From f7c475b8dfc23d461a47dfac5e498f8cc96faea5 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Wed, 24 Feb 2021 11:28:08 +0800
Subject: [PATCH 0001/3804] drm/ttm: Do not add non-system domain BO into swap
 list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BO would be added into swap list if it is validated into system domain.
If BO is validated again into non-system domain, say, VRAM domain. It
actually should not be in the swap list.

Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210224032808.150465-1-xinhui.pan@amd.com
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 101a68dc615b6..799ec7a7caa4d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -153,6 +153,8 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
 
 		swap = &ttm_bo_glob.swap_lru[bo->priority];
 		list_move_tail(&bo->swap, swap);
+	} else {
+		list_del_init(&bo->swap);
 	}
 
 	if (bdev->driver->del_from_lru_notify)
-- 
GitLab


From ffe8768fb8f391cb478466778c55e2110525c15c Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Thu, 15 Apr 2021 16:45:25 +0800
Subject: [PATCH 0002/3804] drm/vc4: remove unused function

Fix the following clang warning:

drivers/gpu/drm/vc4/vc4_vec.c:201:1: warning: unused function
'to_vc4_vec_connector' [-Wunused-function].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/1618476325-112629-1-git-send-email-jiapeng.chong@linux.alibaba.com
---
 drivers/gpu/drm/vc4/vc4_vec.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c
index bd5b8eb58b180..090529d0d5dcd 100644
--- a/drivers/gpu/drm/vc4/vc4_vec.c
+++ b/drivers/gpu/drm/vc4/vc4_vec.c
@@ -197,12 +197,6 @@ struct vc4_vec_connector {
 	struct drm_encoder *encoder;
 };
 
-static inline struct vc4_vec_connector *
-to_vc4_vec_connector(struct drm_connector *connector)
-{
-	return container_of(connector, struct vc4_vec_connector, base);
-}
-
 enum vc4_vec_tv_mode_id {
 	VC4_VEC_TV_MODE_NTSC,
 	VC4_VEC_TV_MODE_NTSC_J,
-- 
GitLab


From 0e793ba77c18382f08e440260fe72bc6fce2a3cb Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Wed, 21 Apr 2021 11:14:02 +0100
Subject: [PATCH 0003/3804] spi: Make of_register_spi_device also set the
 fwnode

Currently, the SPI core doesn't set the struct device fwnode pointer
when it creates a new SPI device. This means when the device is
registered the fwnode is NULL and the check in device_add which sets
the fwnode->dev pointer is skipped. This wasn't previously an issue,
however these two patches:

commit 4731210c09f5 ("gpiolib: Bind gpio_device to a driver to enable
fw_devlink=on by default")
commit ced2af419528 ("gpiolib: Don't probe gpio_device if it's not the
primary device")

Added some code to the GPIO core which relies on using that
fwnode->dev pointer to determine if a driver is bound to the fwnode
and if not bind a stub GPIO driver. This means the GPIO providers
behind SPI will get both the expected driver and this stub driver
causing the stub driver to fail if it attempts to request any pin
configuration. For example on my system:

madera-pinctrl madera-pinctrl: pin gpio5 already requested by madera-pinctrl; cannot claim for gpiochip3
madera-pinctrl madera-pinctrl: pin-4 (gpiochip3) status -22
madera-pinctrl madera-pinctrl: could not request pin 4 (gpio5) from group aif1  on device madera-pinctrl
gpio_stub_drv gpiochip3: Error applying setting, reverse things back
gpio_stub_drv: probe of gpiochip3 failed with error -22

The firmware node on the device created by the GPIO framework is set
through the of_node pointer hence things generally actually work,
however that fwnode->dev is never set, as the check was skipped at
device_add time. This fix appears to match how the I2C subsystem
handles the same situation.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210421101402.8468-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 904a353798b64..862a9bb691298 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2047,6 +2047,7 @@ of_register_spi_device(struct spi_controller *ctlr, struct device_node *nc)
 	/* Store a pointer to the node in the device structure */
 	of_node_get(nc);
 	spi->dev.of_node = nc;
+	spi->dev.fwnode = of_fwnode_handle(nc);
 
 	/* Register the new device */
 	rc = spi_add_device(spi);
-- 
GitLab


From dbaca8e56ea3f23fa215f48c2d46dd03ede06e02 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 20 Apr 2021 19:44:24 +0300
Subject: [PATCH 0004/3804] spi: Allow to have all native CSs in use along with
 GPIOs

The commit 7d93aecdb58d ("spi: Add generic support for unused native cs
with cs-gpios") excludes the valid case for the controllers that doesn't
need to switch native CS in order to perform the transfer, i.e. when

  0		native
  ...		...
  <n> - 1	native
  <n>		GPIO
  <n> + 1	GPIO
  ...		...

where <n> defines maximum of native CSs supported by the controller.

To allow this, bail out from spi_get_gpio_descs() conditionally for
the controllers which explicitly marked with SPI_MASTER_GPIO_SS.

Fixes: 7d93aecdb58d ("spi: Add generic support for unused native cs with cs-gpios")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210420164425.40287-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 862a9bb691298..2e5dca20c8d00 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2612,8 +2612,9 @@ static int spi_get_gpio_descs(struct spi_controller *ctlr)
 	}
 
 	ctlr->unused_native_cs = ffz(native_cs_mask);
-	if (num_cs_gpios && ctlr->max_native_cs &&
-	    ctlr->unused_native_cs >= ctlr->max_native_cs) {
+
+	if ((ctlr->flags & SPI_MASTER_GPIO_SS) && num_cs_gpios &&
+	    ctlr->max_native_cs && ctlr->unused_native_cs >= ctlr->max_native_cs) {
 		dev_err(dev, "No unused native chip select available\n");
 		return -EINVAL;
 	}
-- 
GitLab


From f60d7270c8a3d2beb1c23ae0da42497afa3584c2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 20 Apr 2021 19:44:25 +0300
Subject: [PATCH 0005/3804] spi: Avoid undefined behaviour when counting unused
 native CSs

ffz(), that has been used to count unused native CSs,
might cause undefined behaviour when called against ~0U.
To fix that, open code it with ffs(~value) - 1.

Fixes: 7d93aecdb58d ("spi: Add generic support for unused native cs with cs-gpios")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210420164425.40287-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 2e5dca20c8d00..6fe2a9509675f 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2611,7 +2611,7 @@ static int spi_get_gpio_descs(struct spi_controller *ctlr)
 		native_cs_mask |= BIT(i);
 	}
 
-	ctlr->unused_native_cs = ffz(native_cs_mask);
+	ctlr->unused_native_cs = ffs(~native_cs_mask) - 1;
 
 	if ((ctlr->flags & SPI_MASTER_GPIO_SS) && num_cs_gpios &&
 	    ctlr->max_native_cs && ctlr->unused_native_cs >= ctlr->max_native_cs) {
-- 
GitLab


From 9fdd04918a452980631ecc499317881c1d120b70 Mon Sep 17 00:00:00 2001
From: Dan Robertson <dan@dlrobertson.com>
Date: Fri, 23 Apr 2021 00:02:13 -0400
Subject: [PATCH 0006/3804] net: ieee802154: fix null deref in parse dev addr

Fix a logic error that could result in a null deref if the user sets
the mode incorrectly for the given addr type.

Signed-off-by: Dan Robertson <dan@dlrobertson.com>
Acked-by: Alexander Aring <aahringo@redhat.com>
Link: https://lore.kernel.org/r/20210423040214.15438-2-dan@dlrobertson.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 net/ieee802154/nl802154.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 05f6bd89a7dd8..0cf2374c143bd 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -1298,19 +1298,20 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla,
 	if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, nl802154_dev_addr_policy, NULL))
 		return -EINVAL;
 
-	if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] ||
-	    !attrs[NL802154_DEV_ADDR_ATTR_MODE] ||
-	    !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] ||
-	      attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]))
+	if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || !attrs[NL802154_DEV_ADDR_ATTR_MODE])
 		return -EINVAL;
 
 	addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]);
 	addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]);
 	switch (addr->mode) {
 	case NL802154_DEV_ADDR_SHORT:
+		if (!attrs[NL802154_DEV_ADDR_ATTR_SHORT])
+			return -EINVAL;
 		addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]);
 		break;
 	case NL802154_DEV_ADDR_EXTENDED:
+		if (!attrs[NL802154_DEV_ADDR_ATTR_EXTENDED])
+			return -EINVAL;
 		addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]);
 		break;
 	default:
-- 
GitLab


From 6c9762a78c325107dc37d20ee21002b841679209 Mon Sep 17 00:00:00 2001
From: Marco Felsch <m.felsch@pengutronix.de>
Date: Fri, 23 Apr 2021 15:54:02 +0200
Subject: [PATCH 0007/3804] ASoC: max98088: fix ni clock divider calculation

The ni1/ni2 ratio formula [1] uses the pclk which is the prescaled mclk.
The max98088 datasheet [2] has no such formula but table-12 equals so
we can assume that it is the same for both devices.

While on it make use of DIV_ROUND_CLOSEST_ULL().

[1] https://datasheets.maximintegrated.com/en/ds/MAX98089.pdf; page 86
[2] https://datasheets.maximintegrated.com/en/ds/MAX98088.pdf; page 82

Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
Link: https://lore.kernel.org/r/20210423135402.32105-1-m.felsch@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/max98088.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
index 4be24e7f51c89..f8e49e45ce33f 100644
--- a/sound/soc/codecs/max98088.c
+++ b/sound/soc/codecs/max98088.c
@@ -41,6 +41,7 @@ struct max98088_priv {
 	enum max98088_type devtype;
 	struct max98088_pdata *pdata;
 	struct clk *mclk;
+	unsigned char mclk_prescaler;
 	unsigned int sysclk;
 	struct max98088_cdata dai[2];
 	int eq_textcnt;
@@ -998,13 +999,16 @@ static int max98088_dai1_hw_params(struct snd_pcm_substream *substream,
        /* Configure NI when operating as master */
        if (snd_soc_component_read(component, M98088_REG_14_DAI1_FORMAT)
                & M98088_DAI_MAS) {
+               unsigned long pclk;
+
                if (max98088->sysclk == 0) {
                        dev_err(component->dev, "Invalid system clock frequency\n");
                        return -EINVAL;
                }
                ni = 65536ULL * (rate < 50000 ? 96ULL : 48ULL)
                                * (unsigned long long int)rate;
-               do_div(ni, (unsigned long long int)max98088->sysclk);
+               pclk = DIV_ROUND_CLOSEST(max98088->sysclk, max98088->mclk_prescaler);
+               ni = DIV_ROUND_CLOSEST_ULL(ni, pclk);
                snd_soc_component_write(component, M98088_REG_12_DAI1_CLKCFG_HI,
                        (ni >> 8) & 0x7F);
                snd_soc_component_write(component, M98088_REG_13_DAI1_CLKCFG_LO,
@@ -1065,13 +1069,16 @@ static int max98088_dai2_hw_params(struct snd_pcm_substream *substream,
        /* Configure NI when operating as master */
        if (snd_soc_component_read(component, M98088_REG_1C_DAI2_FORMAT)
                & M98088_DAI_MAS) {
+               unsigned long pclk;
+
                if (max98088->sysclk == 0) {
                        dev_err(component->dev, "Invalid system clock frequency\n");
                        return -EINVAL;
                }
                ni = 65536ULL * (rate < 50000 ? 96ULL : 48ULL)
                                * (unsigned long long int)rate;
-               do_div(ni, (unsigned long long int)max98088->sysclk);
+               pclk = DIV_ROUND_CLOSEST(max98088->sysclk, max98088->mclk_prescaler);
+               ni = DIV_ROUND_CLOSEST_ULL(ni, pclk);
                snd_soc_component_write(component, M98088_REG_1A_DAI2_CLKCFG_HI,
                        (ni >> 8) & 0x7F);
                snd_soc_component_write(component, M98088_REG_1B_DAI2_CLKCFG_LO,
@@ -1113,8 +1120,10 @@ static int max98088_dai_set_sysclk(struct snd_soc_dai *dai,
         */
        if ((freq >= 10000000) && (freq < 20000000)) {
                snd_soc_component_write(component, M98088_REG_10_SYS_CLK, 0x10);
+               max98088->mclk_prescaler = 1;
        } else if ((freq >= 20000000) && (freq < 30000000)) {
                snd_soc_component_write(component, M98088_REG_10_SYS_CLK, 0x20);
+               max98088->mclk_prescaler = 2;
        } else {
                dev_err(component->dev, "Invalid master clock frequency\n");
                return -EINVAL;
-- 
GitLab


From 366db3ac3cdf97e90695282b959c75d5ea58cf00 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 21 Apr 2021 17:02:20 +0200
Subject: [PATCH 0008/3804] arm64: dts: renesas: aistarvision-mipi-adapter-2.1:
 Fix CSI40 ports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the DTS schema by explicitly stating that the input is port@0. This
fixes a schema validation error but has no runtime effect as the default
port number is 0 if not specified.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://lore.kernel.org/r/20210421150221.3202955-2-niklas.soderlund+renesas@ragnatech.se
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
index e7b4a929bb174..2e3d1981cac48 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
+++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts
@@ -33,7 +33,7 @@
 	status = "okay";
 
 	ports {
-		port {
+		port@0 {
 			csi40_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2>;
-- 
GitLab


From 0a96c05995ef1085f9c5e6bf005a04915dd2ec6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Wed, 21 Apr 2021 17:02:21 +0200
Subject: [PATCH 0009/3804] arm64: dts: renesas: Add port@0 node for all CSI-2
 nodes to dtsi
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The port@0 is a mandatory port, add or move the declaration to the CSI-2
nodes top declared in dtsi files instead of depending on dts files
adding them when describing the external connection.

This fixes validation warnings for DTB outputs that do not connect all
CSI-2 receivers to transmitters and thus declaring all port@0 nodes in
dts files.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://lore.kernel.org/r/20210421150221.3202955-3-niklas.soderlund+renesas@ragnatech.se
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 ...hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi |  2 --
 arch/arm64/boot/dts/renesas/r8a774a1.dtsi            |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a774b1.dtsi            |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a774c0.dtsi            |  4 ++++
 arch/arm64/boot/dts/renesas/r8a774e1.dtsi            |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77950.dtsi            |  4 ++++
 arch/arm64/boot/dts/renesas/r8a77951.dtsi            | 12 ++++++++++++
 arch/arm64/boot/dts/renesas/r8a77960.dtsi            |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77961.dtsi            |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77965.dtsi            |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77970.dtsi            |  4 ++++
 arch/arm64/boot/dts/renesas/r8a77980.dtsi            |  8 ++++++++
 arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts       |  2 --
 arch/arm64/boot/dts/renesas/r8a77990.dtsi            |  4 ++++
 arch/arm64/boot/dts/renesas/salvator-common.dtsi     |  3 ---
 15 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
index c62ddb9b2ba56..3771144a2ce49 100644
--- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
+++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi
@@ -14,7 +14,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
 			csi20_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2>;
@@ -29,7 +28,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
 			csi40_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
index d64fb8b1b86c3..46f8dbf689048 100644
--- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi
@@ -2573,6 +2573,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2628,6 +2632,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
index 5b05474dc2727..d16a4be5ef77a 100644
--- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi
@@ -2419,6 +2419,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2474,6 +2478,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
index 20fa3caa050e5..1aef34447abd1 100644
--- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi
@@ -1823,6 +1823,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
index 8eb006cbd9af4..1f51237ab0a64 100644
--- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi
@@ -2709,6 +2709,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2764,6 +2768,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi
index 25b87da32eebb..b643d3079db1e 100644
--- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi
@@ -192,6 +192,10 @@
 			#address-cells = <1>;
 			#size-cells = <0>;
 
+			port@0 {
+				reg = <0>;
+			};
+
 			port@1 {
 				#address-cells = <1>;
 				#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
index 5c39152e45707..85d66d15465ab 100644
--- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi
@@ -3097,6 +3097,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -3152,6 +3156,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -3191,6 +3199,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
index 25d947a81b294..12476e354d746 100644
--- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi
@@ -2761,6 +2761,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2816,6 +2820,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
index ab081f14af9aa..d9804768425a7 100644
--- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi
@@ -2499,6 +2499,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2554,6 +2558,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
index 657b20d3533bd..dcb9df861d749 100644
--- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi
@@ -2575,6 +2575,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -2630,6 +2634,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
index 5a5d5649332a8..e8f6352c3665f 100644
--- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi
@@ -1106,6 +1106,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
index 1ffa4a995a7ab..7b51d464de0ea 100644
--- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi
@@ -1439,6 +1439,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
@@ -1478,6 +1482,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
index 295d34f1d216d..4715e4a4abe06 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
+++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts
@@ -298,8 +298,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
-
 			csi40_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2>;
diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
index 5010f23fafcc7..0eaea58f4210d 100644
--- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi
@@ -1970,6 +1970,10 @@
 				#address-cells = <1>;
 				#size-cells = <0>;
 
+				port@0 {
+					reg = <0>;
+				};
+
 				port@1 {
 					#address-cells = <1>;
 					#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
index e18747df219f8..453ffcef24fae 100644
--- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi
+++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi
@@ -349,7 +349,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
 			csi20_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1>;
@@ -364,8 +363,6 @@
 
 	ports {
 		port@0 {
-			reg = <0>;
-
 			csi40_in: endpoint {
 				clock-lanes = <0>;
 				data-lanes = <1 2 3 4>;
-- 
GitLab


From d9cd78edb2e6b7e26747c0ec312be31e7ef196fe Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 22 Apr 2021 12:02:29 +0300
Subject: [PATCH 0010/3804] firmware: arm_scpi: Prevent the ternary sign
 expansion bug

How the type promotion works in ternary expressions is a bit tricky.
The problem is that scpi_clk_get_val() returns longs, "ret" is a int
which holds a negative error code, and le32_to_cpu() is an unsigned int.
We want the negative error code to be cast to a negative long.  But
because le32_to_cpu() is an u32 then "ret" is type promoted to u32 and
becomes a high positive and then it is promoted to long and it is still
a high positive value.

Fix this by getting rid of the ternary.

Link: https://lore.kernel.org/r/YIE7pdqV/h10tEAK@mwanda
Fixes: 8cb7cf56c9fe ("firmware: add support for ARM System Control and Power Interface(SCPI) protocol")
Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
[sudeep.holla: changed to return 0 as clock rate on error]
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scpi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
index d0dee37ad5228..4ceba5ef78958 100644
--- a/drivers/firmware/arm_scpi.c
+++ b/drivers/firmware/arm_scpi.c
@@ -552,8 +552,10 @@ static unsigned long scpi_clk_get_val(u16 clk_id)
 
 	ret = scpi_send_message(CMD_GET_CLOCK_VALUE, &le_clk_id,
 				sizeof(le_clk_id), &rate, sizeof(rate));
+	if (ret)
+		return 0;
 
-	return ret ? ret : le32_to_cpu(rate);
+	return le32_to_cpu(rate);
 }
 
 static int scpi_clk_set_val(u16 clk_id, unsigned long rate)
-- 
GitLab


From 03f840c49207e8c125b3df8c29c13137c6675d42 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 27 Apr 2021 11:30:31 +0800
Subject: [PATCH 0011/3804] firmware: arm_scmi: Remove duplicate declaration of
 struct scmi_protocol_handle

struct scmi_protocol_handle is declared twice, let us remove the duplicate
declaration.

Link: https://lore.kernel.org/r/20210427033031.4580-1-wanjiabing@vivo.com
Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
[sudeep.holla: minor updates to the title and the changelog]
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scmi/notify.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/firmware/arm_scmi/notify.h b/drivers/firmware/arm_scmi/notify.h
index ce0324be6c71d..4e9b627edfefa 100644
--- a/drivers/firmware/arm_scmi/notify.h
+++ b/drivers/firmware/arm_scmi/notify.h
@@ -79,8 +79,6 @@ struct scmi_protocol_events {
 
 int scmi_notification_init(struct scmi_handle *handle);
 void scmi_notification_exit(struct scmi_handle *handle);
-
-struct scmi_protocol_handle;
 int scmi_register_protocol_events(const struct scmi_handle *handle, u8 proto_id,
 				  const struct scmi_protocol_handle *ph,
 				  const struct scmi_protocol_events *ee);
-- 
GitLab


From 10f76165d30bf568214e75767f2d8d8682cd4040 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@chromium.org>
Date: Mon, 26 Apr 2021 16:53:25 -0700
Subject: [PATCH 0012/3804] drm/msm: Do not unpin/evict exported dma-buf's

Our initial logic for excluding dma-bufs was not quite right.  In
particular we want msm_gem_get/put_pages() path used for exported
dma-bufs to increment/decrement the pin-count.

Also, in case the importer is vmap'ing the dma-buf, we need to be
sure to update the object's status, because it is now no longer
potentially evictable.

Fixes: 63f17ef83428 drm/msm: Support evicting GEM objects to swap
Signed-off-by: Rob Clark <robdclark@chromium.org>
Link: https://lore.kernel.org/r/20210426235326.1230125-1-robdclark@gmail.com
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem.c | 16 +++++++++++++++-
 drivers/gpu/drm/msm/msm_gem.h |  4 ++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index b199942266a26..56df86e5f7400 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -190,13 +190,25 @@ struct page **msm_gem_get_pages(struct drm_gem_object *obj)
 	}
 
 	p = get_pages(obj);
+
+	if (!IS_ERR(p)) {
+		msm_obj->pin_count++;
+		update_inactive(msm_obj);
+	}
+
 	msm_gem_unlock(obj);
 	return p;
 }
 
 void msm_gem_put_pages(struct drm_gem_object *obj)
 {
-	/* when we start tracking the pin count, then do something here */
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	msm_gem_lock(obj);
+	msm_obj->pin_count--;
+	GEM_WARN_ON(msm_obj->pin_count < 0);
+	update_inactive(msm_obj);
+	msm_gem_unlock(obj);
 }
 
 int msm_gem_mmap_obj(struct drm_gem_object *obj,
@@ -646,6 +658,8 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv)
 			ret = -ENOMEM;
 			goto fail;
 		}
+
+		update_inactive(msm_obj);
 	}
 
 	return msm_obj->vaddr;
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index a6480d2c81b2c..03e2cc2a2ce15 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -221,7 +221,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj)
 /* imported/exported objects are not purgeable: */
 static inline bool is_unpurgeable(struct msm_gem_object *msm_obj)
 {
-	return msm_obj->base.dma_buf && msm_obj->base.import_attach;
+	return msm_obj->base.import_attach || msm_obj->pin_count;
 }
 
 static inline bool is_purgeable(struct msm_gem_object *msm_obj)
@@ -271,7 +271,7 @@ static inline void mark_unpurgeable(struct msm_gem_object *msm_obj)
 
 static inline bool is_unevictable(struct msm_gem_object *msm_obj)
 {
-	return is_unpurgeable(msm_obj) || msm_obj->pin_count || msm_obj->vaddr;
+	return is_unpurgeable(msm_obj) || msm_obj->vaddr;
 }
 
 static inline void mark_evictable(struct msm_gem_object *msm_obj)
-- 
GitLab


From 4b95d371fb001185af84d177e69a23d55bd0167a Mon Sep 17 00:00:00 2001
From: Jonathan Marek <jonathan@marek.ca>
Date: Fri, 23 Apr 2021 21:49:26 -0400
Subject: [PATCH 0013/3804] drm/msm: fix LLC not being enabled for mmu500
 targets

mmu500 targets don't have a "cx_mem" region, set llc_mmio to NULL in that
case to avoid the IS_ERR() condition in a6xx_llc_activate().

Fixes: 3d247123b5a1 ("drm/msm/a6xx: Add support for using system cache on MMU500 based targets")
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Link: https://lore.kernel.org/r/20210424014927.1661-1-jonathan@marek.ca
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index d553f62f4eeb8..b4d8e1b01ee4f 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1153,10 +1153,6 @@ static void a6xx_llc_slices_init(struct platform_device *pdev,
 {
 	struct device_node *phandle;
 
-	a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
-	if (IS_ERR(a6xx_gpu->llc_mmio))
-		return;
-
 	/*
 	 * There is a different programming path for targets with an mmu500
 	 * attached, so detect if that is the case
@@ -1166,6 +1162,11 @@ static void a6xx_llc_slices_init(struct platform_device *pdev,
 		of_device_is_compatible(phandle, "arm,mmu-500"));
 	of_node_put(phandle);
 
+	if (a6xx_gpu->have_mmu500)
+		a6xx_gpu->llc_mmio = NULL;
+	else
+		a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+
 	a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
 	a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
 
-- 
GitLab


From 08811c057b3e22f7a3df3955c138a59f3b651df0 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Sat, 10 Apr 2021 04:19:01 +0300
Subject: [PATCH 0014/3804] drm/msm/dsi: dsi_phy_28nm_8960: fix uninitialized
 variable access

The parent_name initialization was lost in refactoring, restore it now.

Fixes: 5d13459650b3 ("drm/msm/dsi: push provided clocks handling into a generic code")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <abhinavk@codeaurora.org>
Link: https://lore.kernel.org/r/20210410011901.1735866-1-dmitry.baryshkov@linaro.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
index 582b1428f9715..86e40a0d41a3b 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -405,6 +405,10 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov
 	if (!vco_name)
 		return -ENOMEM;
 
+	parent_name = devm_kzalloc(dev, 32, GFP_KERNEL);
+	if (!parent_name)
+		return -ENOMEM;
+
 	clk_name = devm_kzalloc(dev, 32, GFP_KERNEL);
 	if (!clk_name)
 		return -ENOMEM;
-- 
GitLab


From 094c7f39ba4b5ae7e4c448527834428b79e3baf9 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 12 Apr 2021 03:01:58 +0300
Subject: [PATCH 0015/3804] drm/msm/dsi: fix msm_dsi_phy_get_clk_provider
 return code

msm_dsi_phy_get_clk_provider() always returns two provided clocks, so
return 0 instead of returning incorrect -EINVAL error code.

Fixes: 5d13459650b3 ("drm/msm/dsi: push provided clocks handling into a generic code")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Abhinav Kumar <abhinavk@codeaurora.org>
Tested-by: Jonathan Marek <jonathan@marek.ca>
Link: https://lore.kernel.org/r/20210412000158.2049066-1-dmitry.baryshkov@linaro.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index f0a2ddf96a4b9..ff7f2ec420300 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -843,7 +843,7 @@ int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy *phy,
 	if (pixel_clk_provider)
 		*pixel_clk_provider = phy->provided_clocks->hws[DSI_PIXEL_PLL_CLK]->clk;
 
-	return -EINVAL;
+	return 0;
 }
 
 void msm_dsi_phy_pll_save_state(struct msm_dsi_phy *phy)
-- 
GitLab


From 774cda6f12d5ad11410c4cda223554c3735ee862 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 27 Apr 2021 22:59:55 +0200
Subject: [PATCH 0016/3804] dt-bindings: nvmem: mediatek: remove duplicate
 mt8192 line

The same patch was accidentally merged twice, resulting in a
duplicate line for the mt8192 SoC.

Fixes: f2674c0c7488 ("dt-bindings: nvmem: mediatek: add support for MediaTek mt8192 SoC")
Fixes: 2a1405a14c3a ("dt-bindings: nvmem: mediatek: add support for MediaTek mt8192 SoC")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/devicetree/bindings/nvmem/mtk-efuse.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt
index d479ad977e24f..b6791702bcfc9 100644
--- a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt
+++ b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt
@@ -9,7 +9,6 @@ Required properties:
 	      "mediatek,mt8173-efuse" or "mediatek,efuse": for MT8173
 	      "mediatek,mt8192-efuse", "mediatek,efuse": for MT8192
 	      "mediatek,mt8516-efuse", "mediatek,efuse": for MT8516
-	      "mediatek,mt8192-efuse", "mediatek,efuse": for MT8192
 - reg: Should contain registers location and length
 
 = Data cells =
-- 
GitLab


From c019d92457826bb7b2091c86f36adb5de08405f9 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 23 Apr 2021 17:09:28 +0200
Subject: [PATCH 0017/3804] openrisc: Fix a memory leak

'setup_find_cpu_node()' take a reference on the node it returns.
This reference must be decremented when not needed anymore, or there will
be a leak.

Add the missing 'of_node_put(cpu)'.

Note that 'setup_cpuinfo()' that also calls this function already has a
correct 'of_node_put(cpu)' at its end.

Fixes: 9d02a4283e9c ("OpenRISC: Boot code")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/kernel/setup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 2416a9f915330..c6f9e7b9f7cb2 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -278,6 +278,8 @@ void calibrate_delay(void)
 	pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
 		loops_per_jiffy / (500000 / HZ),
 		(loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy);
+
+	of_node_put(cpu);
 }
 
 void __init setup_arch(char **cmdline_p)
-- 
GitLab


From a0695853e5906a9558eef9f79856e07659b7a1e6 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 28 Apr 2021 14:26:31 +0200
Subject: [PATCH 0018/3804] ASoC: stm32: do not request a new clock consummer
 reference

This reverts commit 65d1cce726d4912793d0a84c55ecdb0ef5832130.

There is problem with clk_hw_get_hw(). Using it pins the clock provider to
itself, making it impossible to remove the module.

Revert commit 65d1cce726d4 ("ASoC: stm32: properly get clk from the
provider") until this gets sorted out.

Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://lore.kernel.org/r/20210428122632.46244-2-jbrunet@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_sai_sub.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index c1561237ee24b..3aa1cf2624020 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -484,10 +484,7 @@ static int stm32_sai_add_mclk_provider(struct stm32_sai_sub_data *sai)
 		dev_err(dev, "mclk register returned %d\n", ret);
 		return ret;
 	}
-
-	sai->sai_mclk = devm_clk_hw_get_clk(dev, hw, NULL);
-	if (IS_ERR(sai->sai_mclk))
-		return PTR_ERR(sai->sai_mclk);
+	sai->sai_mclk = hw->clk;
 
 	/* register mclk provider */
 	return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw);
-- 
GitLab


From 97c733654ab4a5ac910216b4b74e605acf3e1cce Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Wed, 28 Apr 2021 14:26:32 +0200
Subject: [PATCH 0019/3804] ASoC: da7219: do not request a new clock consummer
 reference

This reverts commit 12f8127fe9e6154dd4197df97e44f3fd67583071.

There is problem with clk_hw_get_hw(). Using it pins the clock provider to
itself, making it impossible to remove the module.

Revert commit 12f8127fe9e6 ("ASoC: da7219: properly get clk from the
provider") until this gets sorted out.

Reported-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Tested-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20210428122632.46244-3-jbrunet@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/da7219.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c
index bd3c523a86171..13009d08b09ac 100644
--- a/sound/soc/codecs/da7219.c
+++ b/sound/soc/codecs/da7219.c
@@ -2181,10 +2181,7 @@ static int da7219_register_dai_clks(struct snd_soc_component *component)
 				 ret);
 			goto err;
 		}
-
-		da7219->dai_clks[i] = devm_clk_hw_get_clk(dev, dai_clk_hw, NULL);
-		if (IS_ERR(da7219->dai_clks[i]))
-			return PTR_ERR(da7219->dai_clks[i]);
+		da7219->dai_clks[i] = dai_clk_hw->clk;
 
 		/* For DT setup onecell data, otherwise create lookup */
 		if (np) {
-- 
GitLab


From 6879e8e759bf9e05eaee85e32ca1a936e6b46da1 Mon Sep 17 00:00:00 2001
From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Date: Wed, 28 Apr 2021 01:53:31 +0530
Subject: [PATCH 0020/3804] ASoC: amd: fix for pcm_read() error

Below phython script throwing pcm_read() error.

import subprocess

p = subprocess.Popen(["aplay -t raw -D plughw:1,0 /dev/zero"], shell=True)
subprocess.call(["arecord -Dhw:1,0 --dump-hw-params"], shell=True)
subprocess.call(["arecord -Dhw:1,0 -fdat -d1 /dev/null"], shell=True)
p.kill()

Handling ACP global external interrupt enable register
causing this issue.
This register got updated wrongly when there is active
stream causing interrupts disabled for active stream.
Refactored code to handle enabling and disabling external interrupts.

Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
Link: https://lore.kernel.org/r/1619555017-29858-1-git-send-email-Vijendar.Mukunda@amd.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/amd/raven/acp3x-pcm-dma.c | 10 ----------
 sound/soc/amd/raven/acp3x.h         |  1 +
 sound/soc/amd/raven/pci-acp3x.c     | 15 +++++++++++++++
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c
index 417cda24030cd..2447a1e6e913f 100644
--- a/sound/soc/amd/raven/acp3x-pcm-dma.c
+++ b/sound/soc/amd/raven/acp3x-pcm-dma.c
@@ -237,10 +237,6 @@ static int acp3x_dma_open(struct snd_soc_component *component,
 		return ret;
 	}
 
-	if (!adata->play_stream && !adata->capture_stream &&
-	    !adata->i2ssp_play_stream && !adata->i2ssp_capture_stream)
-		rv_writel(1, adata->acp3x_base + mmACP_EXTERNAL_INTR_ENB);
-
 	i2s_data->acp3x_base = adata->acp3x_base;
 	runtime->private_data = i2s_data;
 	return ret;
@@ -367,12 +363,6 @@ static int acp3x_dma_close(struct snd_soc_component *component,
 		}
 	}
 
-	/* Disable ACP irq, when the current stream is being closed and
-	 * another stream is also not active.
-	 */
-	if (!adata->play_stream && !adata->capture_stream &&
-		!adata->i2ssp_play_stream && !adata->i2ssp_capture_stream)
-		rv_writel(0, adata->acp3x_base + mmACP_EXTERNAL_INTR_ENB);
 	return 0;
 }
 
diff --git a/sound/soc/amd/raven/acp3x.h b/sound/soc/amd/raven/acp3x.h
index 03fe93913e12e..c3f0c8b7545db 100644
--- a/sound/soc/amd/raven/acp3x.h
+++ b/sound/soc/amd/raven/acp3x.h
@@ -77,6 +77,7 @@
 #define ACP_POWER_OFF_IN_PROGRESS	0x03
 
 #define ACP3x_ITER_IRER_SAMP_LEN_MASK	0x38
+#define ACP_EXT_INTR_STAT_CLEAR_MASK 0xFFFFFFFF
 
 struct acp3x_platform_info {
 	u16 play_i2s_instance;
diff --git a/sound/soc/amd/raven/pci-acp3x.c b/sound/soc/amd/raven/pci-acp3x.c
index d3536fd6a1240..a013a607b3d47 100644
--- a/sound/soc/amd/raven/pci-acp3x.c
+++ b/sound/soc/amd/raven/pci-acp3x.c
@@ -76,6 +76,19 @@ static int acp3x_reset(void __iomem *acp3x_base)
 	return -ETIMEDOUT;
 }
 
+static void acp3x_enable_interrupts(void __iomem *acp_base)
+{
+	rv_writel(0x01, acp_base + mmACP_EXTERNAL_INTR_ENB);
+}
+
+static void acp3x_disable_interrupts(void __iomem *acp_base)
+{
+	rv_writel(ACP_EXT_INTR_STAT_CLEAR_MASK, acp_base +
+		  mmACP_EXTERNAL_INTR_STAT);
+	rv_writel(0x00, acp_base + mmACP_EXTERNAL_INTR_CNTL);
+	rv_writel(0x00, acp_base + mmACP_EXTERNAL_INTR_ENB);
+}
+
 static int acp3x_init(struct acp3x_dev_data *adata)
 {
 	void __iomem *acp3x_base = adata->acp3x_base;
@@ -93,6 +106,7 @@ static int acp3x_init(struct acp3x_dev_data *adata)
 		pr_err("ACP3x reset failed\n");
 		return ret;
 	}
+	acp3x_enable_interrupts(acp3x_base);
 	return 0;
 }
 
@@ -100,6 +114,7 @@ static int acp3x_deinit(void __iomem *acp3x_base)
 {
 	int ret;
 
+	acp3x_disable_interrupts(acp3x_base);
 	/* Reset */
 	ret = acp3x_reset(acp3x_base);
 	if (ret) {
-- 
GitLab


From c7299fea67696db5bd09d924d1f1080d894f92ef Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Mon, 26 Apr 2021 16:56:38 -0700
Subject: [PATCH 0021/3804] spi: Fix spi device unregister flow

When an SPI device is unregistered, the spi->controller->cleanup() is
called in the device's release callback. That's wrong for a couple of
reasons:

1. spi_dev_put() can be called before spi_add_device() is called. And
   it's spi_add_device() that calls spi_setup(). This will cause clean()
   to get called without the spi device ever being setup.

2. There's no guarantee that the controller's driver would be present by
   the time the spi device's release function gets called.

3. It also causes "sleeping in atomic context" stack dump[1] when device
   link deletion code does a put_device() on the spi device.

Fix these issues by simply moving the cleanup from the device release
callback to the actual spi_unregister_device() function.

[1] - https://lore.kernel.org/lkml/CAHp75Vc=FCGcUyS0v6fnxme2YJ+qD+Y-hQDQLa2JhWNON9VmsQ@mail.gmail.com/

Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20210426235638.1285530-1-saravanak@google.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ba425b9c77007..f9885c0965637 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -47,10 +47,6 @@ static void spidev_release(struct device *dev)
 {
 	struct spi_device	*spi = to_spi_device(dev);
 
-	/* spi controllers may cleanup for released devices */
-	if (spi->controller->cleanup)
-		spi->controller->cleanup(spi);
-
 	spi_controller_put(spi->controller);
 	kfree(spi->driver_override);
 	kfree(spi);
@@ -558,6 +554,12 @@ static int spi_dev_check(struct device *dev, void *data)
 	return 0;
 }
 
+static void spi_cleanup(struct spi_device *spi)
+{
+	if (spi->controller->cleanup)
+		spi->controller->cleanup(spi);
+}
+
 /**
  * spi_add_device - Add spi_device allocated with spi_alloc_device
  * @spi: spi_device to register
@@ -622,11 +624,13 @@ int spi_add_device(struct spi_device *spi)
 
 	/* Device may be bound to an active driver when this returns */
 	status = device_add(&spi->dev);
-	if (status < 0)
+	if (status < 0) {
 		dev_err(dev, "can't add %s, status %d\n",
 				dev_name(&spi->dev), status);
-	else
+		spi_cleanup(spi);
+	} else {
 		dev_dbg(dev, "registered child %s\n", dev_name(&spi->dev));
+	}
 
 done:
 	mutex_unlock(&spi_add_lock);
@@ -710,6 +714,8 @@ void spi_unregister_device(struct spi_device *spi)
 	if (!spi)
 		return;
 
+	spi_cleanup(spi);
+
 	if (spi->dev.of_node) {
 		of_node_clear_flag(spi->dev.of_node, OF_POPULATED);
 		of_node_put(spi->dev.of_node);
-- 
GitLab


From 41f48a29ebd5ce944e412f491f1876b5abeff1d6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 27 Apr 2021 16:38:42 +0200
Subject: [PATCH 0022/3804] spi: altera: Make SPI_ALTERA_CORE invisible

The SPI_ALTERA_CORE config symbol controls compilation of the Altera SPI
Controller core code.  It is already selected by all of its users, so
there is no reason to make it visible, unless compile-testing.

Fixes: b0c3d9354de1f87e ("spi: altera: separate core code from platform code")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/f0cb8e66baba4506db6f42fca74dc51b76883507.1619534253.git.geert+renesas@glider.be
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 8b161ec4943bf..f4481fe48bf06 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -65,7 +65,7 @@ config SPI_ALTERA
 	  This is the driver for the Altera SPI Controller.
 
 config SPI_ALTERA_CORE
-	tristate "Altera SPI Controller core code"
+	tristate "Altera SPI Controller core code" if COMPILE_TEST
 	select REGMAP
 	help
 	  "The core code for the Altera SPI Controller"
-- 
GitLab


From adbd914dcde0b03bfc08ffe40b81f31b0457833f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 21 Apr 2021 14:31:50 +0100
Subject: [PATCH 0023/3804] btrfs: zoned: fix silent data loss after failure
 splitting ordered extent

On a zoned filesystem, sometimes we need to split an ordered extent into 3
different ordered extents. The original ordered extent is shortened, at
the front and at the rear, and we create two other new ordered extents to
represent the trimmed parts of the original ordered extent.

After adjusting the original ordered extent, we create an ordered extent
to represent the pre-range, and that may fail with ENOMEM for example.
After that we always try to create the ordered extent for the post-range,
and if that happens to succeed we end up returning success to the caller
as we overwrite the 'ret' variable which contained the previous error.

This means we end up with a file range for which there is no ordered
extent, which results in the range never getting a new file extent item
pointing to the new data location. And since the split operation did
not return an error, writeback does not fail and the inode's mapping is
not flagged with an error, resulting in a subsequent fsync not reporting
an error either.

It's possibly very unlikely to have the creation of the post-range ordered
extent succeed after the creation of the pre-range ordered extent failed,
but it's not impossible.

So fix this by making sure we only create the post-range ordered extent
if there was no error creating the ordered extent for the pre-range.

Fixes: d22002fd37bd97 ("btrfs: zoned: split ordered extent when bio is sent")
CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ordered-data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 07b0b42187913..6c413bb451a3d 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -984,7 +984,7 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
 
 	if (pre)
 		ret = clone_ordered_extent(ordered, 0, pre);
-	if (post)
+	if (ret == 0 && post)
 		ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes,
 					   post);
 
-- 
GitLab


From ffb7c2e923cb3232454a513dcb5636e73091aa88 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 22 Apr 2021 12:09:21 +0100
Subject: [PATCH 0024/3804] btrfs: do not consider send context as valid when
 trying to flush qgroups

At qgroup.c:try_flush_qgroup() we are asserting that current->journal_info
is either NULL or has the value BTRFS_SEND_TRANS_STUB.

However allowing for BTRFS_SEND_TRANS_STUB makes no sense because:

1) It is misleading, because send operations are read-only and do not
   ever need to reserve qgroup space;

2) We already assert that current->journal_info != BTRFS_SEND_TRANS_STUB
   at transaction.c:start_transaction();

3) On a kernel without CONFIG_BTRFS_ASSERT=y set, it would result in
   a crash if try_flush_qgroup() is ever called in a send context, because
   at transaction.c:start_transaction we cast current->journal_info into
   a struct btrfs_trans_handle pointer and then dereference it.

So just do allow a send context at try_flush_qgroup() and update the
comment about it.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 2319c923c9e69..b1caf5acf1e2c 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3545,11 +3545,15 @@ static int try_flush_qgroup(struct btrfs_root *root)
 	struct btrfs_trans_handle *trans;
 	int ret;
 
-	/* Can't hold an open transaction or we run the risk of deadlocking */
-	ASSERT(current->journal_info == NULL ||
-	       current->journal_info == BTRFS_SEND_TRANS_STUB);
-	if (WARN_ON(current->journal_info &&
-		    current->journal_info != BTRFS_SEND_TRANS_STUB))
+	/*
+	 * Can't hold an open transaction or we run the risk of deadlocking,
+	 * and can't either be under the context of a send operation (where
+	 * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that
+	 * would result in a crash when starting a transaction and does not
+	 * make sense either (send is a read-only operation).
+	 */
+	ASSERT(current->journal_info == NULL);
+	if (WARN_ON(current->journal_info))
 		return 0;
 
 	/*
-- 
GitLab


From 626e9f41f7c281ba3e02843702f68471706aa6d9 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 27 Apr 2021 11:27:20 +0100
Subject: [PATCH 0025/3804] btrfs: fix race leading to unpersisted data and
 metadata on fsync

When doing a fast fsync on a file, there is a race which can result in the
fsync returning success to user space without logging the inode and without
durably persisting new data.

The following example shows one possible scenario for this:

   $ mkfs.btrfs -f /dev/sdc
   $ mount /dev/sdc /mnt

   $ touch /mnt/bar
   $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/baz

   # Now we have:
   # file bar == inode 257
   # file baz == inode 258

   $ mv /mnt/baz /mnt/foo

   # Now we have:
   # file bar == inode 257
   # file foo == inode 258

   $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo

   # fsync bar before foo, it is important to trigger the race.
   $ xfs_io -c "fsync" /mnt/bar
   $ xfs_io -c "fsync" /mnt/foo

   # After this:
   # inode 257, file bar, is empty
   # inode 258, file foo, has 1M filled with 0xcd

   <power failure>

   # Replay the log:
   $ mount /dev/sdc /mnt

   # After this point file foo should have 1M filled with 0xcd and not 0xab

The following steps explain how the race happens:

1) Before the first fsync of inode 258, when it has the "baz" name, its
   ->logged_trans is 0, ->last_sub_trans is 0 and ->last_log_commit is -1.
   The inode also has the full sync flag set;

2) After the first fsync, we set inode 258 ->logged_trans to 6, which is
   the generation of the current transaction, and set ->last_log_commit
   to 0, which is the current value of ->last_sub_trans (done at
   btrfs_log_inode()).

   The full sync flag is cleared from the inode during the fsync.

   The log sub transaction that was committed had an ID of 0 and when we
   synced the log, at btrfs_sync_log(), we incremented root->log_transid
   from 0 to 1;

3) During the rename:

   We update inode 258, through btrfs_update_inode(), and that causes its
   ->last_sub_trans to be set to 1 (the current log transaction ID), and
   ->last_log_commit remains with a value of 0.

   After updating inode 258, because we have previously logged the inode
   in the previous fsync, we log again the inode through the call to
   btrfs_log_new_name(). This results in updating the inode's
   ->last_log_commit from 0 to 1 (the current value of its
   ->last_sub_trans).

   The ->last_sub_trans of inode 257 is updated to 1, which is the ID of
   the next log transaction;

4) Then a buffered write against inode 258 is made. This leaves the value
   of ->last_sub_trans as 1 (the ID of the current log transaction, stored
   at root->log_transid);

5) Then an fsync against inode 257 (or any other inode other than 258),
   happens. This results in committing the log transaction with ID 1,
   which results in updating root->last_log_commit to 1 and bumping
   root->log_transid from 1 to 2;

6) Then an fsync against inode 258 starts. We flush delalloc and wait only
   for writeback to complete, since the full sync flag is not set in the
   inode's runtime flags - we do not wait for ordered extents to complete.

   Then, at btrfs_sync_file(), we call btrfs_inode_in_log() before the
   ordered extent completes. The call returns true:

     static inline bool btrfs_inode_in_log(...)
     {
         bool ret = false;

         spin_lock(&inode->lock);
         if (inode->logged_trans == generation &&
             inode->last_sub_trans <= inode->last_log_commit &&
             inode->last_sub_trans <= inode->root->last_log_commit)
                 ret = true;
         spin_unlock(&inode->lock);
         return ret;
     }

   generation has a value of 6 (fs_info->generation), ->logged_trans also
   has a value of 6 (set when we logged the inode during the first fsync
   and when logging it during the rename), ->last_sub_trans has a value
   of 1, set during the rename (step 3), ->last_log_commit also has a
   value of 1 (set in step 3) and root->last_log_commit has a value of 1,
   which was set in step 5 when fsyncing inode 257.

   As a consequence we don't log the inode, any new extents and do not
   sync the log, resulting in a data loss if a power failure happens
   after the fsync and before the current transaction commits.
   Also, because we do not log the inode, after a power failure the mtime
   and ctime of the inode do not match those we had before.

   When the ordered extent completes before we call btrfs_inode_in_log(),
   then the call returns false and we log the inode and sync the log,
   since at the end of ordered extent completion we update the inode and
   set ->last_sub_trans to 2 (the value of root->log_transid) and
   ->last_log_commit to 1.

This problem is found after removing the check for the emptiness of the
inode's list of modified extents in the recent commit 209ecbb8585bf6
("btrfs: remove stale comment and logic from btrfs_inode_in_log()"),
added in the 5.13 merge window. However checking the emptiness of the
list is not really the way to solve this problem, and was never intended
to, because while that solves the problem for COW writes, the problem
persists for NOCOW writes because in that case the list is always empty.

In the case of NOCOW writes, even though we wait for the writeback to
complete before returning from btrfs_sync_file(), we end up not logging
the inode, which has a new mtime/ctime, and because we don't sync the log,
we never issue disk barriers (send REQ_PREFLUSH to the device) since that
only happens when we sync the log (when we write super blocks at
btrfs_sync_log()). So effectively, for a NOCOW case, when we return from
btrfs_sync_file() to user space, we are not guaranteeing that the data is
durably persisted on disk.

Also, while the example above uses a rename exchange to show how the
problem happens, it is not the only way to trigger it. An alternative
could be adding a new hard link to inode 258, since that also results
in calling btrfs_log_new_name() and updating the inode in the log.
An example reproducer using the addition of a hard link instead of a
rename operation:

  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt

  $ touch /mnt/bar
  $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/foo

  $ ln /mnt/foo /mnt/foo_link
  $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo

  $ xfs_io -c "fsync" /mnt/bar
  $ xfs_io -c "fsync" /mnt/foo

  <power failure>

  # Replay the log:
  $ mount /dev/sdc /mnt

  # After this point file foo often has 1M filled with 0xab and not 0xcd

The reasons leading to the final fsync of file foo, inode 258, not
persisting the new data are the same as for the previous example with
a rename operation.

So fix by never skipping logging and log syncing when there are still any
ordered extents in flight. To avoid making the conditional if statement
that checks if logging an inode is needed harder to read, place all the
logic into an helper function with separate if statements to make it more
manageable and easier to read.

A test case for fstests will follow soon.

For NOCOW writes, the problem existed before commit b5e6c3e170b770
("btrfs: always wait on ordered extents at fsync time"), introduced in
kernel 4.19, then it went away with that commit since we started to always
wait for ordered extent completion before logging.

The problem came back again once the fast fsync path was changed again to
avoid waiting for ordered extent completion, in commit 487781796d3022
("btrfs: make fast fsyncs wait only for writeback"), added in kernel 5.10.

However, for COW writes, the race only happens after the recent
commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from
btrfs_inode_in_log()"), introduced in the 5.13 merge window. For NOCOW
writes, the bug existed before that commit. So tag 5.10+ as the release
for stable backports.

CC: stable@vger.kernel.org # 5.10+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c     | 35 +++++++++++++++++++++++++----------
 fs/btrfs/tree-log.c |  3 ++-
 2 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 864c08d08a353..3b10d98b4ebb3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2067,6 +2067,30 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
 	return ret;
 }
 
+static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
+{
+	struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+	if (btrfs_inode_in_log(inode, fs_info->generation) &&
+	    list_empty(&ctx->ordered_extents))
+		return true;
+
+	/*
+	 * If we are doing a fast fsync we can not bail out if the inode's
+	 * last_trans is <= then the last committed transaction, because we only
+	 * update the last_trans of the inode during ordered extent completion,
+	 * and for a fast fsync we don't wait for that, we only wait for the
+	 * writeback to complete.
+	 */
+	if (inode->last_trans <= fs_info->last_trans_committed &&
+	    (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
+	     list_empty(&ctx->ordered_extents)))
+		return true;
+
+	return false;
+}
+
 /*
  * fsync call for both files and directories.  This logs the inode into
  * the tree log instead of forcing full commits whenever possible.
@@ -2185,17 +2209,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
 	atomic_inc(&root->log_batch);
 
-	/*
-	 * If we are doing a fast fsync we can not bail out if the inode's
-	 * last_trans is <= then the last committed transaction, because we only
-	 * update the last_trans of the inode during ordered extent completion,
-	 * and for a fast fsync we don't wait for that, we only wait for the
-	 * writeback to complete.
-	 */
 	smp_mb();
-	if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
-	    (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed &&
-	     (full_sync || list_empty(&ctx.ordered_extents)))) {
+	if (skip_inode_logging(&ctx)) {
 		/*
 		 * We've had everything committed since the last time we were
 		 * modified so clear this flag in case it was set for whatever
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c1353b84ae54c..a0fc3a1390ab3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6060,7 +6060,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 	 * (since logging them is pointless, a link count of 0 means they
 	 * will never be accessible).
 	 */
-	if (btrfs_inode_in_log(inode, trans->transid) ||
+	if ((btrfs_inode_in_log(inode, trans->transid) &&
+	     list_empty(&ctx->ordered_extents)) ||
 	    inode->vfs_inode.i_nlink == 0) {
 		ret = BTRFS_NO_LOG_SYNC;
 		goto end_no_trans;
-- 
GitLab


From f9baa501b4fd6962257853d46ddffbc21f27e344 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 22 Apr 2021 12:08:05 +0100
Subject: [PATCH 0026/3804] btrfs: fix deadlock when cloning inline extents and
 using qgroups

There are a few exceptional cases where cloning an inline extent needs to
copy the inline extent data into a page of the destination inode.

When this happens, we end up starting a transaction while having a dirty
page for the destination inode and while having the range locked in the
destination's inode iotree too. Because when reserving metadata space
for a transaction we may need to flush existing delalloc in case there is
not enough free space, we have a mechanism in place to prevent a deadlock,
which was introduced in commit 3d45f221ce627d ("btrfs: fix deadlock when
cloning inline extent and low on free metadata space").

However when using qgroups, a transaction also reserves metadata qgroup
space, which can also result in flushing delalloc in case there is not
enough available space at the moment. When this happens we deadlock, since
flushing delalloc requires locking the file range in the inode's iotree
and the range was already locked at the very beginning of the clone
operation, before attempting to start the transaction.

When this issue happens, stack traces like the following are reported:

  [72747.556262] task:kworker/u81:9   state:D stack:    0 pid:  225 ppid:     2 flags:0x00004000
  [72747.556268] Workqueue: writeback wb_workfn (flush-btrfs-1142)
  [72747.556271] Call Trace:
  [72747.556273]  __schedule+0x296/0x760
  [72747.556277]  schedule+0x3c/0xa0
  [72747.556279]  io_schedule+0x12/0x40
  [72747.556284]  __lock_page+0x13c/0x280
  [72747.556287]  ? generic_file_readonly_mmap+0x70/0x70
  [72747.556325]  extent_write_cache_pages+0x22a/0x440 [btrfs]
  [72747.556331]  ? __set_page_dirty_nobuffers+0xe7/0x160
  [72747.556358]  ? set_extent_buffer_dirty+0x5e/0x80 [btrfs]
  [72747.556362]  ? update_group_capacity+0x25/0x210
  [72747.556366]  ? cpumask_next_and+0x1a/0x20
  [72747.556391]  extent_writepages+0x44/0xa0 [btrfs]
  [72747.556394]  do_writepages+0x41/0xd0
  [72747.556398]  __writeback_single_inode+0x39/0x2a0
  [72747.556403]  writeback_sb_inodes+0x1ea/0x440
  [72747.556407]  __writeback_inodes_wb+0x5f/0xc0
  [72747.556410]  wb_writeback+0x235/0x2b0
  [72747.556414]  ? get_nr_inodes+0x35/0x50
  [72747.556417]  wb_workfn+0x354/0x490
  [72747.556420]  ? newidle_balance+0x2c5/0x3e0
  [72747.556424]  process_one_work+0x1aa/0x340
  [72747.556426]  worker_thread+0x30/0x390
  [72747.556429]  ? create_worker+0x1a0/0x1a0
  [72747.556432]  kthread+0x116/0x130
  [72747.556435]  ? kthread_park+0x80/0x80
  [72747.556438]  ret_from_fork+0x1f/0x30

  [72747.566958] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs]
  [72747.566961] Call Trace:
  [72747.566964]  __schedule+0x296/0x760
  [72747.566968]  ? finish_wait+0x80/0x80
  [72747.566970]  schedule+0x3c/0xa0
  [72747.566995]  wait_extent_bit.constprop.68+0x13b/0x1c0 [btrfs]
  [72747.566999]  ? finish_wait+0x80/0x80
  [72747.567024]  lock_extent_bits+0x37/0x90 [btrfs]
  [72747.567047]  btrfs_invalidatepage+0x299/0x2c0 [btrfs]
  [72747.567051]  ? find_get_pages_range_tag+0x2cd/0x380
  [72747.567076]  __extent_writepage+0x203/0x320 [btrfs]
  [72747.567102]  extent_write_cache_pages+0x2bb/0x440 [btrfs]
  [72747.567106]  ? update_load_avg+0x7e/0x5f0
  [72747.567109]  ? enqueue_entity+0xf4/0x6f0
  [72747.567134]  extent_writepages+0x44/0xa0 [btrfs]
  [72747.567137]  ? enqueue_task_fair+0x93/0x6f0
  [72747.567140]  do_writepages+0x41/0xd0
  [72747.567144]  __filemap_fdatawrite_range+0xc7/0x100
  [72747.567167]  btrfs_run_delalloc_work+0x17/0x40 [btrfs]
  [72747.567195]  btrfs_work_helper+0xc2/0x300 [btrfs]
  [72747.567200]  process_one_work+0x1aa/0x340
  [72747.567202]  worker_thread+0x30/0x390
  [72747.567205]  ? create_worker+0x1a0/0x1a0
  [72747.567208]  kthread+0x116/0x130
  [72747.567211]  ? kthread_park+0x80/0x80
  [72747.567214]  ret_from_fork+0x1f/0x30

  [72747.569686] task:fsstress        state:D stack:    0 pid:841421 ppid:841417 flags:0x00000000
  [72747.569689] Call Trace:
  [72747.569691]  __schedule+0x296/0x760
  [72747.569694]  schedule+0x3c/0xa0
  [72747.569721]  try_flush_qgroup+0x95/0x140 [btrfs]
  [72747.569725]  ? finish_wait+0x80/0x80
  [72747.569753]  btrfs_qgroup_reserve_data+0x34/0x50 [btrfs]
  [72747.569781]  btrfs_check_data_free_space+0x5f/0xa0 [btrfs]
  [72747.569804]  btrfs_buffered_write+0x1f7/0x7f0 [btrfs]
  [72747.569810]  ? path_lookupat.isra.48+0x97/0x140
  [72747.569833]  btrfs_file_write_iter+0x81/0x410 [btrfs]
  [72747.569836]  ? __kmalloc+0x16a/0x2c0
  [72747.569839]  do_iter_readv_writev+0x160/0x1c0
  [72747.569843]  do_iter_write+0x80/0x1b0
  [72747.569847]  vfs_writev+0x84/0x140
  [72747.569869]  ? btrfs_file_llseek+0x38/0x270 [btrfs]
  [72747.569873]  do_writev+0x65/0x100
  [72747.569876]  do_syscall_64+0x33/0x40
  [72747.569879]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

  [72747.569899] task:fsstress        state:D stack:    0 pid:841424 ppid:841417 flags:0x00004000
  [72747.569903] Call Trace:
  [72747.569906]  __schedule+0x296/0x760
  [72747.569909]  schedule+0x3c/0xa0
  [72747.569936]  try_flush_qgroup+0x95/0x140 [btrfs]
  [72747.569940]  ? finish_wait+0x80/0x80
  [72747.569967]  __btrfs_qgroup_reserve_meta+0x36/0x50 [btrfs]
  [72747.569989]  start_transaction+0x279/0x580 [btrfs]
  [72747.570014]  clone_copy_inline_extent+0x332/0x490 [btrfs]
  [72747.570041]  btrfs_clone+0x5b7/0x7a0 [btrfs]
  [72747.570068]  ? lock_extent_bits+0x64/0x90 [btrfs]
  [72747.570095]  btrfs_clone_files+0xfc/0x150 [btrfs]
  [72747.570122]  btrfs_remap_file_range+0x3d8/0x4a0 [btrfs]
  [72747.570126]  do_clone_file_range+0xed/0x200
  [72747.570131]  vfs_clone_file_range+0x37/0x110
  [72747.570134]  ioctl_file_clone+0x7d/0xb0
  [72747.570137]  do_vfs_ioctl+0x138/0x630
  [72747.570140]  __x64_sys_ioctl+0x62/0xc0
  [72747.570143]  do_syscall_64+0x33/0x40
  [72747.570146]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

So fix this by skipping the flush of delalloc for an inode that is
flagged with BTRFS_INODE_NO_DELALLOC_FLUSH, meaning it is currently under
such a special case of cloning an inline extent, when flushing delalloc
during qgroup metadata reservation.

The special cases for cloning inline extents were added in kernel 5.7 by
by commit 05a5a7621ce66c ("Btrfs: implement full reflink support for
inline extents"), while having qgroup metadata space reservation flushing
delalloc when low on space was added in kernel 5.9 by commit
c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get
-EDQUOT"). So use a "Fixes:" tag for the later commit to ease stable
kernel backports.

Reported-by: Wang Yugui <wangyugui@e16-tech.com>
Link: https://lore.kernel.org/linux-btrfs/20210421083137.31E3.409509F4@e16-tech.com/
Fixes: c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT")
CC: stable@vger.kernel.org # 5.9+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h  | 2 +-
 fs/btrfs/inode.c  | 4 ++--
 fs/btrfs/ioctl.c  | 2 +-
 fs/btrfs/qgroup.c | 2 +-
 fs/btrfs/send.c   | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 278e0cbc9a98b..0f5b0b12762bb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3127,7 +3127,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			       struct btrfs_inode *inode, u64 new_size,
 			       u32 min_type);
 
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
 			       bool in_reclaim_context);
 int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1a349759efae4..69fcdf8f0b1c2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9691,7 +9691,7 @@ out:
 	return ret;
 }
 
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
 {
 	struct writeback_control wbc = {
 		.nr_to_write = LONG_MAX,
@@ -9704,7 +9704,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
 	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
 		return -EROFS;
 
-	return start_delalloc_inodes(root, &wbc, true, false);
+	return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
 }
 
 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b1328f17607ea..0ba0e4ddaf6b4 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1051,7 +1051,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
 	 */
 	btrfs_drew_read_lock(&root->snapshot_lock);
 
-	ret = btrfs_start_delalloc_snapshot(root);
+	ret = btrfs_start_delalloc_snapshot(root, false);
 	if (ret)
 		goto out;
 
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index b1caf5acf1e2c..3ded812f522cc 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3566,7 +3566,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
 		return 0;
 	}
 
-	ret = btrfs_start_delalloc_snapshot(root);
+	ret = btrfs_start_delalloc_snapshot(root, true);
 	if (ret < 0)
 		goto out;
 	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 55741adf90712..bd69db72acc5e 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7170,7 +7170,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
 	int i;
 
 	if (root) {
-		ret = btrfs_start_delalloc_snapshot(root);
+		ret = btrfs_start_delalloc_snapshot(root, false);
 		if (ret)
 			return ret;
 		btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
@@ -7178,7 +7178,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
 
 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
 		root = sctx->clone_roots[i].root;
-		ret = btrfs_start_delalloc_snapshot(root);
+		ret = btrfs_start_delalloc_snapshot(root, false);
 		if (ret)
 			return ret;
 		btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
-- 
GitLab


From 02ded1314a465a89267be38231d9858206853d80 Mon Sep 17 00:00:00 2001
From: Jonathan Marek <jonathan@marek.ca>
Date: Fri, 23 Apr 2021 15:04:20 -0400
Subject: [PATCH 0027/3804] drm/msm: fix minor version to indicate
 MSM_PARAM_SUSPENDS support

Increase the minor version to indicate that MSM_PARAM_SUSPENDS is supported.

Fixes: 3ab1c5cc3939 ("drm/msm: Add param for userspace to query suspend count")
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Link: https://lore.kernel.org/r/20210423190420.25217-1-jonathan@marek.ca
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index e1104d2454e2e..fe7d17cd35ecd 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -42,7 +42,7 @@
  * - 1.7.0 - Add MSM_PARAM_SUSPENDS to access suspend count
  */
 #define MSM_VERSION_MAJOR	1
-#define MSM_VERSION_MINOR	6
+#define MSM_VERSION_MINOR	7
 #define MSM_VERSION_PATCHLEVEL	0
 
 static const struct drm_mode_config_funcs mode_config_funcs = {
-- 
GitLab


From 121271f08809e5dc01d15d3e529988ac5d740af6 Mon Sep 17 00:00:00 2001
From: Amit Kumar Mahapatra <amit.kumar-mahapatra@xilinx.com>
Date: Wed, 28 Apr 2021 23:38:01 -0600
Subject: [PATCH 0028/3804] spi: spi-zynq-qspi: Fix kernel-doc warning

Fix kernel-doc warning.

Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@xilinx.com>
Link: https://lore.kernel.org/r/20210429053802.17650-2-amit.kumar-mahapatra@xilinx.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-zynq-qspi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index 5d8a5ee62fa23..1acde9e249731 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -367,7 +367,7 @@ static int zynq_qspi_config_op(struct zynq_qspi *xqspi, struct spi_device *spi)
 }
 
 /**
- * zynq_qspi_setup - Configure the QSPI controller
+ * zynq_qspi_setup_op - Configure the QSPI controller
  * @spi:	Pointer to the spi_device structure
  *
  * Sets the operational mode of QSPI controller for the next QSPI transfer, baud
-- 
GitLab


From 6d5ff8e632a4f2389c331e5554cd1c2a9a28c7aa Mon Sep 17 00:00:00 2001
From: Karen Dombroski <karen.dombroski@marsbioimaging.com>
Date: Wed, 28 Apr 2021 23:38:02 -0600
Subject: [PATCH 0029/3804] spi: spi-zynq-qspi: Fix stack violation bug

When the number of bytes for the op is greater than one, the read could
run off the end of the function stack and cause a crash.

This patch restores the behaviour of safely reading out of the original
opcode location.

Signed-off-by: Karen Dombroski <karen.dombroski@marsbioimaging.com>
Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@xilinx.com>
Link: https://lore.kernel.org/r/20210429053802.17650-3-amit.kumar-mahapatra@xilinx.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-zynq-qspi.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index 1acde9e249731..5a3d81c31d040 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -528,18 +528,17 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem,
 	struct zynq_qspi *xqspi = spi_controller_get_devdata(mem->spi->master);
 	int err = 0, i;
 	u8 *tmpbuf;
-	u8 opcode = op->cmd.opcode;
 
 	dev_dbg(xqspi->dev, "cmd:%#x mode:%d.%d.%d.%d\n",
-		opcode, op->cmd.buswidth, op->addr.buswidth,
+		op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
 		op->dummy.buswidth, op->data.buswidth);
 
 	zynq_qspi_chipselect(mem->spi, true);
 	zynq_qspi_config_op(xqspi, mem->spi);
 
-	if (op->cmd.nbytes) {
+	if (op->cmd.opcode) {
 		reinit_completion(&xqspi->data_completion);
-		xqspi->txbuf = &opcode;
+		xqspi->txbuf = (u8 *)&op->cmd.opcode;
 		xqspi->rxbuf = NULL;
 		xqspi->tx_bytes = op->cmd.nbytes;
 		xqspi->rx_bytes = op->cmd.nbytes;
-- 
GitLab


From f9c82a4ea89c384d49ce03768ba88d049ed3f1f0 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:08 +0200
Subject: [PATCH 0030/3804] Increase size of ucounts to atomic_long_t

RLIMIT_MSGQUEUE and RLIMIT_MEMLOCK use unsigned long to store their
counters. As a preparation for moving rlimits based on ucounts, we need
to increase the size of the variable to long.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/257aa5fb1a7d81cf0f4c34f39ada2320c4284771.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  4 ++--
 kernel/ucount.c                | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index f6c5f784be5ab..c242c10906c50 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -88,7 +88,7 @@ struct user_namespace {
 	struct ctl_table_header *sysctls;
 #endif
 	struct ucounts		*ucounts;
-	int ucount_max[UCOUNT_COUNTS];
+	long ucount_max[UCOUNT_COUNTS];
 } __randomize_layout;
 
 struct ucounts {
@@ -96,7 +96,7 @@ struct ucounts {
 	struct user_namespace *ns;
 	kuid_t uid;
 	int count;
-	atomic_t ucount[UCOUNT_COUNTS];
+	atomic_long_t ucount[UCOUNT_COUNTS];
 };
 
 extern struct user_namespace init_user_ns;
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 11b1596e2542a..04c561751af1e 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -175,14 +175,14 @@ static void put_ucounts(struct ucounts *ucounts)
 	kfree(ucounts);
 }
 
-static inline bool atomic_inc_below(atomic_t *v, int u)
+static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
 {
-	int c, old;
-	c = atomic_read(v);
+	long c, old;
+	c = atomic_long_read(v);
 	for (;;) {
 		if (unlikely(c >= u))
 			return false;
-		old = atomic_cmpxchg(v, c, c+1);
+		old = atomic_long_cmpxchg(v, c, c+1);
 		if (likely(old == c))
 			return true;
 		c = old;
@@ -196,17 +196,17 @@ struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
 	struct user_namespace *tns;
 	ucounts = get_ucounts(ns, uid);
 	for (iter = ucounts; iter; iter = tns->ucounts) {
-		int max;
+		long max;
 		tns = iter->ns;
 		max = READ_ONCE(tns->ucount_max[type]);
-		if (!atomic_inc_below(&iter->ucount[type], max))
+		if (!atomic_long_inc_below(&iter->ucount[type], max))
 			goto fail;
 	}
 	return ucounts;
 fail:
 	bad = iter;
 	for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
-		atomic_dec(&iter->ucount[type]);
+		atomic_long_dec(&iter->ucount[type]);
 
 	put_ucounts(ucounts);
 	return NULL;
@@ -216,7 +216,7 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
 {
 	struct ucounts *iter;
 	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
-		int dec = atomic_dec_if_positive(&iter->ucount[type]);
+		long dec = atomic_long_dec_if_positive(&iter->ucount[type]);
 		WARN_ON_ONCE(dec < 0);
 	}
 	put_ucounts(ucounts);
-- 
GitLab


From 905ae01c4ae2ae3df05bb141801b1db4b7d83c61 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:09 +0200
Subject: [PATCH 0031/3804] Add a reference to ucounts for each cred

For RLIMIT_NPROC and some other rlimits the user_struct that holds the
global limit is kept alive for the lifetime of a process by keeping it
in struct cred. Adding a pointer to ucounts in the struct cred will
allow to track RLIMIT_NPROC not only for user in the system, but for
user in the user_namespace.

Updating ucounts may require memory allocation which may fail. So, we
cannot change cred.ucounts in the commit_creds() because this function
cannot fail and it should always return 0. For this reason, we modify
cred.ucounts before calling the commit_creds().

Changelog

v6:
* Fix null-ptr-deref in is_ucounts_overlimit() detected by trinity. This
  error was caused by the fact that cred_alloc_blank() left the ucounts
  pointer empty.

Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/b37aaef28d8b9b0d757e07ba6dd27281bbe39259.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/exec.c                      |  4 ++++
 include/linux/cred.h           |  2 ++
 include/linux/user_namespace.h |  4 ++++
 kernel/cred.c                  | 40 ++++++++++++++++++++++++++++++++++
 kernel/fork.c                  |  6 +++++
 kernel/sys.c                   | 12 ++++++++++
 kernel/ucount.c                | 40 +++++++++++++++++++++++++++++++---
 kernel/user_namespace.c        |  3 +++
 8 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 18594f11c31fe..d7c4187ca023e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1360,6 +1360,10 @@ int begin_new_exec(struct linux_binprm * bprm)
 	WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
 	flush_signal_handlers(me, 0);
 
+	retval = set_cred_ucounts(bprm->cred);
+	if (retval < 0)
+		goto out_unlock;
+
 	/*
 	 * install the new credentials for this executable
 	 */
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4c63505036977..66436e6550328 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -144,6 +144,7 @@ struct cred {
 #endif
 	struct user_struct *user;	/* real user ID subscription */
 	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
+	struct ucounts *ucounts;
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
 	/* RCU deletion */
 	union {
@@ -170,6 +171,7 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
 extern int set_create_files_as(struct cred *, struct inode *);
 extern int cred_fscmp(const struct cred *, const struct cred *);
 extern void __init cred_init(void);
+extern int set_cred_ucounts(struct cred *);
 
 /*
  * check for validity of credentials
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index c242c10906c50..7919b80d57ed0 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -100,11 +100,15 @@ struct ucounts {
 };
 
 extern struct user_namespace init_user_ns;
+extern struct ucounts init_ucounts;
 
 bool setup_userns_sysctls(struct user_namespace *ns);
 void retire_userns_sysctls(struct user_namespace *ns);
 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
 void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
+struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
+struct ucounts *get_ucounts(struct ucounts *ucounts);
+void put_ucounts(struct ucounts *ucounts);
 
 #ifdef CONFIG_USER_NS
 
diff --git a/kernel/cred.c b/kernel/cred.c
index 421b1149c6516..58a8a9e24347d 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -60,6 +60,7 @@ struct cred init_cred = {
 	.user			= INIT_USER,
 	.user_ns		= &init_user_ns,
 	.group_info		= &init_groups,
+	.ucounts		= &init_ucounts,
 };
 
 static inline void set_cred_subscribers(struct cred *cred, int n)
@@ -119,6 +120,8 @@ static void put_cred_rcu(struct rcu_head *rcu)
 	if (cred->group_info)
 		put_group_info(cred->group_info);
 	free_uid(cred->user);
+	if (cred->ucounts)
+		put_ucounts(cred->ucounts);
 	put_user_ns(cred->user_ns);
 	kmem_cache_free(cred_jar, cred);
 }
@@ -222,6 +225,7 @@ struct cred *cred_alloc_blank(void)
 #ifdef CONFIG_DEBUG_CREDENTIALS
 	new->magic = CRED_MAGIC;
 #endif
+	new->ucounts = get_ucounts(&init_ucounts);
 
 	if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
 		goto error;
@@ -284,6 +288,11 @@ struct cred *prepare_creds(void)
 
 	if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
 		goto error;
+
+	new->ucounts = get_ucounts(new->ucounts);
+	if (!new->ucounts)
+		goto error;
+
 	validate_creds(new);
 	return new;
 
@@ -363,6 +372,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		ret = create_user_ns(new);
 		if (ret < 0)
 			goto error_put;
+		if (set_cred_ucounts(new) < 0)
+			goto error_put;
 	}
 
 #ifdef CONFIG_KEYS
@@ -653,6 +664,31 @@ int cred_fscmp(const struct cred *a, const struct cred *b)
 }
 EXPORT_SYMBOL(cred_fscmp);
 
+int set_cred_ucounts(struct cred *new)
+{
+	struct task_struct *task = current;
+	const struct cred *old = task->real_cred;
+	struct ucounts *old_ucounts = new->ucounts;
+
+	if (new->user == old->user && new->user_ns == old->user_ns)
+		return 0;
+
+	/*
+	 * This optimization is needed because alloc_ucounts() uses locks
+	 * for table lookups.
+	 */
+	if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid))
+		return 0;
+
+	if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid)))
+		return -EAGAIN;
+
+	if (old_ucounts)
+		put_ucounts(old_ucounts);
+
+	return 0;
+}
+
 /*
  * initialise the credentials stuff
  */
@@ -719,6 +755,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
 		goto error;
 
+	new->ucounts = get_ucounts(new->ucounts);
+	if (!new->ucounts)
+		goto error;
+
 	put_cred(old);
 	validate_creds(new);
 	return new;
diff --git a/kernel/fork.c b/kernel/fork.c
index 426cd0c51f9eb..321a5e31d817e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2995,6 +2995,12 @@ int ksys_unshare(unsigned long unshare_flags)
 	if (err)
 		goto bad_unshare_cleanup_cred;
 
+	if (new_cred) {
+		err = set_cred_ucounts(new_cred);
+		if (err)
+			goto bad_unshare_cleanup_cred;
+	}
+
 	if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
 		if (do_sysvsem) {
 			/*
diff --git a/kernel/sys.c b/kernel/sys.c
index 2e2e3f378d97f..cabfc5b861754 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -552,6 +552,10 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
 	if (retval < 0)
 		goto error;
 
+	retval = set_cred_ucounts(new);
+	if (retval < 0)
+		goto error;
+
 	return commit_creds(new);
 
 error:
@@ -610,6 +614,10 @@ long __sys_setuid(uid_t uid)
 	if (retval < 0)
 		goto error;
 
+	retval = set_cred_ucounts(new);
+	if (retval < 0)
+		goto error;
+
 	return commit_creds(new);
 
 error:
@@ -685,6 +693,10 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 	if (retval < 0)
 		goto error;
 
+	retval = set_cred_ucounts(new);
+	if (retval < 0)
+		goto error;
+
 	return commit_creds(new);
 
 error:
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 04c561751af1e..50cc1dfb7d28a 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -8,6 +8,12 @@
 #include <linux/kmemleak.h>
 #include <linux/user_namespace.h>
 
+struct ucounts init_ucounts = {
+	.ns    = &init_user_ns,
+	.uid   = GLOBAL_ROOT_UID,
+	.count = 1,
+};
+
 #define UCOUNTS_HASHTABLE_BITS 10
 static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
 static DEFINE_SPINLOCK(ucounts_lock);
@@ -125,7 +131,15 @@ static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struc
 	return NULL;
 }
 
-static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
+static void hlist_add_ucounts(struct ucounts *ucounts)
+{
+	struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
+	spin_lock_irq(&ucounts_lock);
+	hlist_add_head(&ucounts->node, hashent);
+	spin_unlock_irq(&ucounts_lock);
+}
+
+struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 {
 	struct hlist_head *hashent = ucounts_hashentry(ns, uid);
 	struct ucounts *ucounts, *new;
@@ -160,7 +174,26 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid)
 	return ucounts;
 }
 
-static void put_ucounts(struct ucounts *ucounts)
+struct ucounts *get_ucounts(struct ucounts *ucounts)
+{
+	unsigned long flags;
+
+	if (!ucounts)
+		return NULL;
+
+	spin_lock_irqsave(&ucounts_lock, flags);
+	if (ucounts->count == INT_MAX) {
+		WARN_ONCE(1, "ucounts: counter has reached its maximum value");
+		ucounts = NULL;
+	} else {
+		ucounts->count += 1;
+	}
+	spin_unlock_irqrestore(&ucounts_lock, flags);
+
+	return ucounts;
+}
+
+void put_ucounts(struct ucounts *ucounts)
 {
 	unsigned long flags;
 
@@ -194,7 +227,7 @@ struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
 {
 	struct ucounts *ucounts, *iter, *bad;
 	struct user_namespace *tns;
-	ucounts = get_ucounts(ns, uid);
+	ucounts = alloc_ucounts(ns, uid);
 	for (iter = ucounts; iter; iter = tns->ucounts) {
 		long max;
 		tns = iter->ns;
@@ -237,6 +270,7 @@ static __init int user_namespace_sysctl_init(void)
 	BUG_ON(!user_header);
 	BUG_ON(!setup_userns_sysctls(&init_user_ns));
 #endif
+	hlist_add_ucounts(&init_ucounts);
 	return 0;
 }
 subsys_initcall(user_namespace_sysctl_init);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 9a4b980d695b8..f1b7b4b8ffa25 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1340,6 +1340,9 @@ static int userns_install(struct nsset *nsset, struct ns_common *ns)
 	put_user_ns(cred->user_ns);
 	set_cred_user_ns(cred, get_user_ns(user_ns));
 
+	if (set_cred_ucounts(cred) < 0)
+		return -EINVAL;
+
 	return 0;
 }
 
-- 
GitLab


From b6c336528926ef73b0f70260f2636de2c3b94c14 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:10 +0200
Subject: [PATCH 0032/3804] Use atomic_t for ucounts reference counting

The current implementation of the ucounts reference counter requires the
use of spin_lock. We're going to use get_ucounts() in more performance
critical areas like a handling of RLIMIT_SIGPENDING.

Now we need to use spin_lock only if we want to change the hashtable.

v10:
* Always try to put ucounts in case we cannot increase ucounts->count.
  This will allow to cover the case when all consumers will return
  ucounts at once.

v9:
* Use a negative value to check that the ucounts->count is close to
  overflow.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/94d1dbecab060a6b116b0a2d1accd8ca1bbb4f5f.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  4 +--
 kernel/ucount.c                | 53 ++++++++++++----------------------
 2 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 7919b80d57ed0..80b5bf12feaeb 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -95,7 +95,7 @@ struct ucounts {
 	struct hlist_node node;
 	struct user_namespace *ns;
 	kuid_t uid;
-	int count;
+	atomic_t count;
 	atomic_long_t ucount[UCOUNT_COUNTS];
 };
 
@@ -107,7 +107,7 @@ void retire_userns_sysctls(struct user_namespace *ns);
 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type);
 void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
-struct ucounts *get_ucounts(struct ucounts *ucounts);
+struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
 void put_ucounts(struct ucounts *ucounts);
 
 #ifdef CONFIG_USER_NS
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 50cc1dfb7d28a..365865f368ecd 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -11,7 +11,7 @@
 struct ucounts init_ucounts = {
 	.ns    = &init_user_ns,
 	.uid   = GLOBAL_ROOT_UID,
-	.count = 1,
+	.count = ATOMIC_INIT(1),
 };
 
 #define UCOUNTS_HASHTABLE_BITS 10
@@ -139,6 +139,15 @@ static void hlist_add_ucounts(struct ucounts *ucounts)
 	spin_unlock_irq(&ucounts_lock);
 }
 
+struct ucounts *get_ucounts(struct ucounts *ucounts)
+{
+	if (ucounts && atomic_add_negative(1, &ucounts->count)) {
+		put_ucounts(ucounts);
+		ucounts = NULL;
+	}
+	return ucounts;
+}
+
 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 {
 	struct hlist_head *hashent = ucounts_hashentry(ns, uid);
@@ -155,7 +164,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 
 		new->ns = ns;
 		new->uid = uid;
-		new->count = 0;
+		atomic_set(&new->count, 1);
 
 		spin_lock_irq(&ucounts_lock);
 		ucounts = find_ucounts(ns, uid, hashent);
@@ -163,33 +172,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 			kfree(new);
 		} else {
 			hlist_add_head(&new->node, hashent);
-			ucounts = new;
+			spin_unlock_irq(&ucounts_lock);
+			return new;
 		}
 	}
-	if (ucounts->count == INT_MAX)
-		ucounts = NULL;
-	else
-		ucounts->count += 1;
 	spin_unlock_irq(&ucounts_lock);
-	return ucounts;
-}
-
-struct ucounts *get_ucounts(struct ucounts *ucounts)
-{
-	unsigned long flags;
-
-	if (!ucounts)
-		return NULL;
-
-	spin_lock_irqsave(&ucounts_lock, flags);
-	if (ucounts->count == INT_MAX) {
-		WARN_ONCE(1, "ucounts: counter has reached its maximum value");
-		ucounts = NULL;
-	} else {
-		ucounts->count += 1;
-	}
-	spin_unlock_irqrestore(&ucounts_lock, flags);
-
+	ucounts = get_ucounts(ucounts);
 	return ucounts;
 }
 
@@ -197,15 +185,12 @@ void put_ucounts(struct ucounts *ucounts)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&ucounts_lock, flags);
-	ucounts->count -= 1;
-	if (!ucounts->count)
+	if (atomic_dec_and_test(&ucounts->count)) {
+		spin_lock_irqsave(&ucounts_lock, flags);
 		hlist_del_init(&ucounts->node);
-	else
-		ucounts = NULL;
-	spin_unlock_irqrestore(&ucounts_lock, flags);
-
-	kfree(ucounts);
+		spin_unlock_irqrestore(&ucounts_lock, flags);
+		kfree(ucounts);
+	}
 }
 
 static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
-- 
GitLab


From 21d1c5e386bc751f1953b371d72cd5b7d9c9e270 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:11 +0200
Subject: [PATCH 0033/3804] Reimplement RLIMIT_NPROC on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

To illustrate the impact of rlimits, let's say there is a program that
does not fork. Some service-A wants to run this program as user X in
multiple containers. Since the program never fork the service wants to
set RLIMIT_NPROC=1.

service-A
 \- program (uid=1000, container1, rlimit_nproc=1)
 \- program (uid=1000, container2, rlimit_nproc=1)

The service-A sets RLIMIT_NPROC=1 and runs the program in container1.
When the service-A tries to run a program with RLIMIT_NPROC=1 in
container2 it fails since user X already has one running process.

We cannot use existing inc_ucounts / dec_ucounts because they do not
allow us to exceed the maximum for the counter. Some rlimits can be
overlimited by root or if the user has the appropriate capability.

Changelog

v11:
* Change inc_rlimit_ucounts() which now returns top value of ucounts.
* Drop inc_rlimit_ucounts_and_test() because the return code of
  inc_rlimit_ucounts() can be checked.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/c5286a8aa16d2d698c222f7532f3d735c82bc6bc.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/exec.c                      |  2 +-
 include/linux/cred.h           |  2 ++
 include/linux/sched/user.h     |  1 -
 include/linux/user_namespace.h | 12 ++++++++++
 kernel/cred.c                  | 10 ++++----
 kernel/exit.c                  |  2 +-
 kernel/fork.c                  |  9 +++----
 kernel/sys.c                   |  2 +-
 kernel/ucount.c                | 44 ++++++++++++++++++++++++++++++++++
 kernel/user.c                  |  1 -
 kernel/user_namespace.c        |  3 ++-
 11 files changed, 73 insertions(+), 15 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index d7c4187ca023e..f2bcdbeb3afb7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1878,7 +1878,7 @@ static int do_execveat_common(int fd, struct filename *filename,
 	 * whether NPROC limit is still exceeded.
 	 */
 	if ((current->flags & PF_NPROC_EXCEEDED) &&
-	    atomic_read(&current_user()->processes) > rlimit(RLIMIT_NPROC)) {
+	    is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
 		retval = -EAGAIN;
 		goto out_ret;
 	}
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 66436e6550328..5ca1e8a1d0354 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -372,6 +372,7 @@ static inline void put_cred(const struct cred *_cred)
 
 #define task_uid(task)		(task_cred_xxx((task), uid))
 #define task_euid(task)		(task_cred_xxx((task), euid))
+#define task_ucounts(task)	(task_cred_xxx((task), ucounts))
 
 #define current_cred_xxx(xxx)			\
 ({						\
@@ -388,6 +389,7 @@ static inline void put_cred(const struct cred *_cred)
 #define current_fsgid() 	(current_cred_xxx(fsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
+#define current_ucounts()	(current_cred_xxx(ucounts))
 
 extern struct user_namespace init_user_ns;
 #ifdef CONFIG_USER_NS
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index a8ec3b6093fcb..d33d867ad6c12 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -12,7 +12,6 @@
  */
 struct user_struct {
 	refcount_t __count;	/* reference count */
-	atomic_t processes;	/* How many processes does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
 	atomic_t fanotify_listeners;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 80b5bf12feaeb..4a97acc359903 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -50,9 +50,12 @@ enum ucount_type {
 	UCOUNT_INOTIFY_INSTANCES,
 	UCOUNT_INOTIFY_WATCHES,
 #endif
+	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_COUNTS,
 };
 
+#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -110,6 +113,15 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid);
 struct ucounts * __must_check get_ucounts(struct ucounts *ucounts);
 void put_ucounts(struct ucounts *ucounts);
 
+static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type)
+{
+	return atomic_long_read(&ucounts->ucount[type]);
+}
+
+long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
+
 #ifdef CONFIG_USER_NS
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
diff --git a/kernel/cred.c b/kernel/cred.c
index 58a8a9e24347d..dcfa30b337c5a 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -360,7 +360,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		kdebug("share_creds(%p{%d,%d})",
 		       p->cred, atomic_read(&p->cred->usage),
 		       read_cred_subscribers(p->cred));
-		atomic_inc(&p->cred->user->processes);
+		inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
 		return 0;
 	}
 
@@ -395,8 +395,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	}
 #endif
 
-	atomic_inc(&new->user->processes);
 	p->cred = p->real_cred = get_cred(new);
+	inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
 	alter_cred_subscribers(new, 2);
 	validate_creds(new);
 	return 0;
@@ -496,12 +496,12 @@ int commit_creds(struct cred *new)
 	 * in set_user().
 	 */
 	alter_cred_subscribers(new, 2);
-	if (new->user != old->user)
-		atomic_inc(&new->user->processes);
+	if (new->user != old->user || new->user_ns != old->user_ns)
+		inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
 	rcu_assign_pointer(task->real_cred, new);
 	rcu_assign_pointer(task->cred, new);
 	if (new->user != old->user)
-		atomic_dec(&old->user->processes);
+		dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
 	alter_cred_subscribers(old, -2);
 
 	/* send notifications */
diff --git a/kernel/exit.c b/kernel/exit.c
index 04029e35e69af..61c0fe902b508 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -188,7 +188,7 @@ repeat:
 	/* don't need to get the RCU readlock here - the process is dead and
 	 * can't be modifying its own credentials. But shut RCU-lockdep up */
 	rcu_read_lock();
-	atomic_dec(&__task_cred(p)->user->processes);
+	dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
 	rcu_read_unlock();
 
 	cgroup_release(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 321a5e31d817e..ed7dfb07178d3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -819,9 +819,11 @@ void __init fork_init(void)
 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
 		init_task.signal->rlim[RLIMIT_NPROC];
 
-	for (i = 0; i < UCOUNT_COUNTS; i++)
+	for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++)
 		init_user_ns.ucount_max[i] = max_threads/2;
 
+	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
+
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
 			  NULL, free_vm_stack_cache);
@@ -1978,8 +1980,7 @@ static __latent_entropy struct task_struct *copy_process(
 	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 #endif
 	retval = -EAGAIN;
-	if (atomic_read(&p->real_cred->user->processes) >=
-			task_rlimit(p, RLIMIT_NPROC)) {
+	if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
 		if (p->real_cred->user != INIT_USER &&
 		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
 			goto bad_fork_free;
@@ -2382,7 +2383,7 @@ bad_fork_cleanup_threadgroup_lock:
 #endif
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
-	atomic_dec(&p->cred->user->processes);
+	dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
 	exit_creds(p);
 bad_fork_free:
 	p->state = TASK_DEAD;
diff --git a/kernel/sys.c b/kernel/sys.c
index cabfc5b861754..00266a65a0006 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -473,7 +473,7 @@ static int set_user(struct cred *new)
 	 * for programs doing set*uid()+execve() by harmlessly deferring the
 	 * failure to the execve() stage.
 	 */
-	if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
+	if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) &&
 			new_user != INIT_USER)
 		current->flags |= PF_NPROC_EXCEEDED;
 	else
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 365865f368ecd..6caa56f7dec85 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -80,6 +80,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
 #endif
+	{ },
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
@@ -240,6 +241,48 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
 	put_ucounts(ucounts);
 }
 
+long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
+{
+	struct ucounts *iter;
+	long ret = 0;
+
+	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+		long max = READ_ONCE(iter->ns->ucount_max[type]);
+		long new = atomic_long_add_return(v, &iter->ucount[type]);
+		if (new < 0 || new > max)
+			ret = LONG_MAX;
+		else if (iter == ucounts)
+			ret = new;
+	}
+	return ret;
+}
+
+bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
+{
+	struct ucounts *iter;
+	long new;
+	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+		long dec = atomic_long_add_return(-v, &iter->ucount[type]);
+		WARN_ON_ONCE(dec < 0);
+		if (iter == ucounts)
+			new = dec;
+	}
+	return (new == 0);
+}
+
+bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
+{
+	struct ucounts *iter;
+	if (get_ucounts_value(ucounts, type) > max)
+		return true;
+	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+		max = READ_ONCE(iter->ns->ucount_max[type]);
+		if (get_ucounts_value(iter, type) > max)
+			return true;
+	}
+	return false;
+}
+
 static __init int user_namespace_sysctl_init(void)
 {
 #ifdef CONFIG_SYSCTL
@@ -256,6 +299,7 @@ static __init int user_namespace_sysctl_init(void)
 	BUG_ON(!setup_userns_sysctls(&init_user_ns));
 #endif
 	hlist_add_ucounts(&init_ucounts);
+	inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
 	return 0;
 }
 subsys_initcall(user_namespace_sysctl_init);
diff --git a/kernel/user.c b/kernel/user.c
index a2478cddf536e..7f5ff498207a7 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
 	.__count	= REFCOUNT_INIT(1),
-	.processes	= ATOMIC_INIT(1),
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index f1b7b4b8ffa25..e6577c8350720 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -119,9 +119,10 @@ int create_user_ns(struct cred *new)
 	ns->owner = owner;
 	ns->group = group;
 	INIT_WORK(&ns->work, free_user_ns);
-	for (i = 0; i < UCOUNT_COUNTS; i++) {
+	for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) {
 		ns->ucount_max[i] = INT_MAX;
 	}
+	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
-- 
GitLab


From 6e52a9f0532f912af37bab4caf18b57d1b9845f4 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:12 +0200
Subject: [PATCH 0034/3804] Reimplement RLIMIT_MSGQUEUE on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/2531f42f7884bbfee56a978040b3e0d25cdf6cde.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sched/user.h     |  4 ----
 include/linux/user_namespace.h |  1 +
 ipc/mqueue.c                   | 40 ++++++++++++++++++----------------
 kernel/fork.c                  |  1 +
 kernel/ucount.c                |  1 +
 kernel/user_namespace.c        |  1 +
 6 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index d33d867ad6c12..8a34446681aa6 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -18,10 +18,6 @@ struct user_struct {
 #endif
 #ifdef CONFIG_EPOLL
 	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
-#endif
-#ifdef CONFIG_POSIX_MQUEUE
-	/* protected by mq_lock	*/
-	unsigned long mq_bytes;	/* How many bytes can be allocated to mqueue? */
 #endif
 	unsigned long locked_shm; /* How many pages of mlocked shm ? */
 	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 4a97acc359903..5eeb86b00e686 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -51,6 +51,7 @@ enum ucount_type {
 	UCOUNT_INOTIFY_WATCHES,
 #endif
 	UCOUNT_RLIMIT_NPROC,
+	UCOUNT_RLIMIT_MSGQUEUE,
 	UCOUNT_COUNTS,
 };
 
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 8031464ed4ae2..461fcf8c873dd 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -144,7 +144,7 @@ struct mqueue_inode_info {
 	struct pid *notify_owner;
 	u32 notify_self_exec_id;
 	struct user_namespace *notify_user_ns;
-	struct user_struct *user;	/* user who created, for accounting */
+	struct ucounts *ucounts;	/* user who created, for accounting */
 	struct sock *notify_sock;
 	struct sk_buff *notify_cookie;
 
@@ -292,7 +292,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 		struct ipc_namespace *ipc_ns, umode_t mode,
 		struct mq_attr *attr)
 {
-	struct user_struct *u = current_user();
 	struct inode *inode;
 	int ret = -ENOMEM;
 
@@ -321,7 +320,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 		info->notify_owner = NULL;
 		info->notify_user_ns = NULL;
 		info->qsize = 0;
-		info->user = NULL;	/* set when all is ok */
+		info->ucounts = NULL;	/* set when all is ok */
 		info->msg_tree = RB_ROOT;
 		info->msg_tree_rightmost = NULL;
 		info->node_cache = NULL;
@@ -371,19 +370,23 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 		if (mq_bytes + mq_treesize < mq_bytes)
 			goto out_inode;
 		mq_bytes += mq_treesize;
-		spin_lock(&mq_lock);
-		if (u->mq_bytes + mq_bytes < u->mq_bytes ||
-		    u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
+		info->ucounts = get_ucounts(current_ucounts());
+		if (info->ucounts) {
+			long msgqueue;
+
+			spin_lock(&mq_lock);
+			msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
+			if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) {
+				dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
+				spin_unlock(&mq_lock);
+				put_ucounts(info->ucounts);
+				info->ucounts = NULL;
+				/* mqueue_evict_inode() releases info->messages */
+				ret = -EMFILE;
+				goto out_inode;
+			}
 			spin_unlock(&mq_lock);
-			/* mqueue_evict_inode() releases info->messages */
-			ret = -EMFILE;
-			goto out_inode;
 		}
-		u->mq_bytes += mq_bytes;
-		spin_unlock(&mq_lock);
-
-		/* all is ok */
-		info->user = get_uid(u);
 	} else if (S_ISDIR(mode)) {
 		inc_nlink(inode);
 		/* Some things misbehave if size == 0 on a directory */
@@ -497,7 +500,6 @@ static void mqueue_free_inode(struct inode *inode)
 static void mqueue_evict_inode(struct inode *inode)
 {
 	struct mqueue_inode_info *info;
-	struct user_struct *user;
 	struct ipc_namespace *ipc_ns;
 	struct msg_msg *msg, *nmsg;
 	LIST_HEAD(tmp_msg);
@@ -520,8 +522,7 @@ static void mqueue_evict_inode(struct inode *inode)
 		free_msg(msg);
 	}
 
-	user = info->user;
-	if (user) {
+	if (info->ucounts) {
 		unsigned long mq_bytes, mq_treesize;
 
 		/* Total amount of bytes accounted for the mqueue */
@@ -533,7 +534,7 @@ static void mqueue_evict_inode(struct inode *inode)
 					  info->attr.mq_msgsize);
 
 		spin_lock(&mq_lock);
-		user->mq_bytes -= mq_bytes;
+		dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
 		/*
 		 * get_ns_from_inode() ensures that the
 		 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
@@ -543,7 +544,8 @@ static void mqueue_evict_inode(struct inode *inode)
 		if (ipc_ns)
 			ipc_ns->mq_queues_count--;
 		spin_unlock(&mq_lock);
-		free_uid(user);
+		put_ucounts(info->ucounts);
+		info->ucounts = NULL;
 	}
 	if (ipc_ns)
 		put_ipc_ns(ipc_ns);
diff --git a/kernel/fork.c b/kernel/fork.c
index ed7dfb07178d3..a9c5097dfc860 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -823,6 +823,7 @@ void __init fork_init(void)
 		init_user_ns.ucount_max[i] = max_threads/2;
 
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
+	init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
 
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 6caa56f7dec85..6e6f936a5963a 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -80,6 +80,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
 #endif
+	{ },
 	{ },
 	{ }
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index e6577c8350720..7eccc4f84549f 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -123,6 +123,7 @@ int create_user_ns(struct cred *new)
 		ns->ucount_max[i] = INT_MAX;
 	}
 	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
+	ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
-- 
GitLab


From d64696905554e919321e31afc210606653b8f6a4 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:13 +0200
Subject: [PATCH 0035/3804] Reimplement RLIMIT_SIGPENDING on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Changelog

v11:
* Revert most of changes to fix performance issues.

v10:
* Fix memory leak on get_ucounts failure.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/df9d7764dddd50f28616b7840de74ec0f81711a8.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/proc/array.c                |  2 +-
 include/linux/sched/user.h     |  1 -
 include/linux/signal_types.h   |  4 +++-
 include/linux/user_namespace.h |  1 +
 kernel/fork.c                  |  1 +
 kernel/signal.c                | 25 +++++++++++++------------
 kernel/ucount.c                |  1 +
 kernel/user.c                  |  1 -
 kernel/user_namespace.c        |  1 +
 9 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index bb87e4d89cd8f..74b0ea4b7e385 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -284,7 +284,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p)
 		collect_sigign_sigcatch(p, &ignored, &caught);
 		num_threads = get_nr_threads(p);
 		rcu_read_lock();  /* FIXME: is this correct? */
-		qsize = atomic_read(&__task_cred(p)->user->sigpending);
+		qsize = get_ucounts_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING);
 		rcu_read_unlock();
 		qlim = task_rlimit(p, RLIMIT_SIGPENDING);
 		unlock_task_sighand(p, &flags);
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 8a34446681aa6..8ba9cec4fb99b 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -12,7 +12,6 @@
  */
 struct user_struct {
 	refcount_t __count;	/* reference count */
-	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_FANOTIFY
 	atomic_t fanotify_listeners;
 #endif
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 68e06c75c5b22..34cb28b8f16ca 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -13,6 +13,8 @@ typedef struct kernel_siginfo {
 	__SIGINFO;
 } kernel_siginfo_t;
 
+struct ucounts;
+
 /*
  * Real Time signals may be queued.
  */
@@ -21,7 +23,7 @@ struct sigqueue {
 	struct list_head list;
 	int flags;
 	kernel_siginfo_t info;
-	struct user_struct *user;
+	struct ucounts *ucounts;
 };
 
 /* flags values. */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 5eeb86b00e686..58f4179864726 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -52,6 +52,7 @@ enum ucount_type {
 #endif
 	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_RLIMIT_MSGQUEUE,
+	UCOUNT_RLIMIT_SIGPENDING,
 	UCOUNT_COUNTS,
 };
 
diff --git a/kernel/fork.c b/kernel/fork.c
index a9c5097dfc860..03119926b27dd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -824,6 +824,7 @@ void __init fork_init(void)
 
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
+	init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING);
 
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
diff --git a/kernel/signal.c b/kernel/signal.c
index f2718350bf4b5..9a6dab712123e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -413,8 +413,8 @@ static struct sigqueue *
 __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
 {
 	struct sigqueue *q = NULL;
-	struct user_struct *user;
-	int sigpending;
+	struct ucounts *ucounts = NULL;
+	long sigpending;
 
 	/*
 	 * Protect access to @t credentials. This can go away when all
@@ -425,27 +425,26 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
 	 * changes from/to zero.
 	 */
 	rcu_read_lock();
-	user = __task_cred(t)->user;
-	sigpending = atomic_inc_return(&user->sigpending);
+	ucounts = task_ucounts(t);
+	sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
 	if (sigpending == 1)
-		get_uid(user);
+		ucounts = get_ucounts(ucounts);
 	rcu_read_unlock();
 
-	if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
+	if (override_rlimit || (sigpending < LONG_MAX && sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
 		q = kmem_cache_alloc(sigqueue_cachep, flags);
 	} else {
 		print_dropped_signal(sig);
 	}
 
 	if (unlikely(q == NULL)) {
-		if (atomic_dec_and_test(&user->sigpending))
-			free_uid(user);
+		if (ucounts && dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
+			put_ucounts(ucounts);
 	} else {
 		INIT_LIST_HEAD(&q->list);
 		q->flags = 0;
-		q->user = user;
+		q->ucounts = ucounts;
 	}
-
 	return q;
 }
 
@@ -453,8 +452,10 @@ static void __sigqueue_free(struct sigqueue *q)
 {
 	if (q->flags & SIGQUEUE_PREALLOC)
 		return;
-	if (atomic_dec_and_test(&q->user->sigpending))
-		free_uid(q->user);
+	if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
+		put_ucounts(q->ucounts);
+		q->ucounts = NULL;
+	}
 	kmem_cache_free(sigqueue_cachep, q);
 }
 
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 6e6f936a5963a..8ce62da6a62c5 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -80,6 +80,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
 #endif
+	{ },
 	{ },
 	{ },
 	{ }
diff --git a/kernel/user.c b/kernel/user.c
index 7f5ff498207a7..6737327f83beb 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
 	.__count	= REFCOUNT_INIT(1),
-	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
 	.ratelimit	= RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 7eccc4f84549f..822eacee45885 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -124,6 +124,7 @@ int create_user_ns(struct cred *new)
 	}
 	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
 	ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
+	ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING);
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
-- 
GitLab


From d7c9e99aee48e6bc0b427f3e3c658a6aba15001e Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:14 +0200
Subject: [PATCH 0036/3804] Reimplement RLIMIT_MEMLOCK on top of ucounts

The rlimit counter is tied to uid in the user_namespace. This allows
rlimit values to be specified in userns even if they are already
globally exceeded by the user. However, the value of the previous
user_namespaces cannot be exceeded.

Changelog

v11:
* Fix issue found by lkp robot.

v8:
* Fix issues found by lkp-tests project.

v7:
* Keep only ucounts for RLIMIT_MEMLOCK checks instead of struct cred.

v6:
* Fix bug in hugetlb_file_setup() detected by trinity.

Reported-by: kernel test robot <oliver.sang@intel.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/970d50c70c71bfd4496e0e8d2a0a32feebebb350.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/hugetlbfs/inode.c           | 16 ++++++++--------
 include/linux/hugetlb.h        |  4 ++--
 include/linux/mm.h             |  4 ++--
 include/linux/sched/user.h     |  1 -
 include/linux/shmem_fs.h       |  2 +-
 include/linux/user_namespace.h |  1 +
 ipc/shm.c                      | 26 +++++++++++++-------------
 kernel/fork.c                  |  1 +
 kernel/ucount.c                |  1 +
 kernel/user.c                  |  1 -
 kernel/user_namespace.c        |  1 +
 mm/memfd.c                     |  4 ++--
 mm/mlock.c                     | 22 ++++++++++++++--------
 mm/mmap.c                      |  4 ++--
 mm/shmem.c                     | 10 +++++-----
 15 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 701c82c361383..be519fc9559a3 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1443,7 +1443,7 @@ static int get_hstate_idx(int page_size_log)
  * otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
  */
 struct file *hugetlb_file_setup(const char *name, size_t size,
-				vm_flags_t acctflag, struct user_struct **user,
+				vm_flags_t acctflag, struct ucounts **ucounts,
 				int creat_flags, int page_size_log)
 {
 	struct inode *inode;
@@ -1455,20 +1455,20 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	if (hstate_idx < 0)
 		return ERR_PTR(-ENODEV);
 
-	*user = NULL;
+	*ucounts = NULL;
 	mnt = hugetlbfs_vfsmount[hstate_idx];
 	if (!mnt)
 		return ERR_PTR(-ENOENT);
 
 	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
-		*user = current_user();
-		if (user_shm_lock(size, *user)) {
+		*ucounts = current_ucounts();
+		if (user_shm_lock(size, *ucounts)) {
 			task_lock(current);
 			pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
 				current->comm, current->pid);
 			task_unlock(current);
 		} else {
-			*user = NULL;
+			*ucounts = NULL;
 			return ERR_PTR(-EPERM);
 		}
 	}
@@ -1495,9 +1495,9 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 
 	iput(inode);
 out:
-	if (*user) {
-		user_shm_unlock(size, *user);
-		*user = NULL;
+	if (*ucounts) {
+		user_shm_unlock(size, *ucounts);
+		*ucounts = NULL;
 	}
 	return file;
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index cccd1aab69dd1..96d63dbdec65c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -434,7 +434,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
 extern const struct file_operations hugetlbfs_file_operations;
 extern const struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
-				struct user_struct **user, int creat_flags,
+				struct ucounts **ucounts, int creat_flags,
 				int page_size_log);
 
 static inline bool is_file_hugepages(struct file *file)
@@ -454,7 +454,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
 #define is_file_hugepages(file)			false
 static inline struct file *
 hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
-		struct user_struct **user, int creat_flags,
+		struct ucounts **ucounts, int creat_flags,
 		int page_size_log)
 {
 	return ERR_PTR(-ENOSYS);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8ba434287387b..3b4e24738ce45 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1670,8 +1670,8 @@ extern bool can_do_mlock(void);
 #else
 static inline bool can_do_mlock(void) { return false; }
 #endif
-extern int user_shm_lock(size_t, struct user_struct *);
-extern void user_shm_unlock(size_t, struct user_struct *);
+extern int user_shm_lock(size_t, struct ucounts *);
+extern void user_shm_unlock(size_t, struct ucounts *);
 
 /*
  * Parameter block passed down to zap_pte_range in exceptional cases.
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 8ba9cec4fb99b..82bd2532da6bc 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -18,7 +18,6 @@ struct user_struct {
 #ifdef CONFIG_EPOLL
 	atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
 #endif
-	unsigned long locked_shm; /* How many pages of mlocked shm ? */
 	unsigned long unix_inflight;	/* How many files in flight in unix sockets */
 	atomic_long_t pipe_bufs;  /* how many pages are allocated in pipe buffers */
 
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index d82b6f3965885..aa77dcd1646fb 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -65,7 +65,7 @@ extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
 extern int shmem_zero_setup(struct vm_area_struct *);
 extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags);
-extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
+extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
 #ifdef CONFIG_SHMEM
 extern const struct address_space_operations shmem_aops;
 static inline bool shmem_mapping(struct address_space *mapping)
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 58f4179864726..2a3177b9b8bfb 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -53,6 +53,7 @@ enum ucount_type {
 	UCOUNT_RLIMIT_NPROC,
 	UCOUNT_RLIMIT_MSGQUEUE,
 	UCOUNT_RLIMIT_SIGPENDING,
+	UCOUNT_RLIMIT_MEMLOCK,
 	UCOUNT_COUNTS,
 };
 
diff --git a/ipc/shm.c b/ipc/shm.c
index febd88daba8c6..003234fbbd176 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -60,7 +60,7 @@ struct shmid_kernel /* private to the kernel */
 	time64_t		shm_ctim;
 	struct pid		*shm_cprid;
 	struct pid		*shm_lprid;
-	struct user_struct	*mlock_user;
+	struct ucounts		*mlock_ucounts;
 
 	/* The task created the shm object.  NULL if the task is dead. */
 	struct task_struct	*shm_creator;
@@ -286,10 +286,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 	shm_rmid(ns, shp);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shm_file))
-		shmem_lock(shm_file, 0, shp->mlock_user);
-	else if (shp->mlock_user)
+		shmem_lock(shm_file, 0, shp->mlock_ucounts);
+	else if (shp->mlock_ucounts)
 		user_shm_unlock(i_size_read(file_inode(shm_file)),
-				shp->mlock_user);
+				shp->mlock_ucounts);
 	fput(shm_file);
 	ipc_update_pid(&shp->shm_cprid, NULL);
 	ipc_update_pid(&shp->shm_lprid, NULL);
@@ -625,7 +625,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 
 	shp->shm_perm.key = key;
 	shp->shm_perm.mode = (shmflg & S_IRWXUGO);
-	shp->mlock_user = NULL;
+	shp->mlock_ucounts = NULL;
 
 	shp->shm_perm.security = NULL;
 	error = security_shm_alloc(&shp->shm_perm);
@@ -650,7 +650,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		if (shmflg & SHM_NORESERVE)
 			acctflag = VM_NORESERVE;
 		file = hugetlb_file_setup(name, hugesize, acctflag,
-				  &shp->mlock_user, HUGETLB_SHMFS_INODE,
+				  &shp->mlock_ucounts, HUGETLB_SHMFS_INODE,
 				(shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
 	} else {
 		/*
@@ -698,8 +698,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 no_id:
 	ipc_update_pid(&shp->shm_cprid, NULL);
 	ipc_update_pid(&shp->shm_lprid, NULL);
-	if (is_file_hugepages(file) && shp->mlock_user)
-		user_shm_unlock(size, shp->mlock_user);
+	if (is_file_hugepages(file) && shp->mlock_ucounts)
+		user_shm_unlock(size, shp->mlock_ucounts);
 	fput(file);
 	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 	return error;
@@ -1105,12 +1105,12 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
 		goto out_unlock0;
 
 	if (cmd == SHM_LOCK) {
-		struct user_struct *user = current_user();
+		struct ucounts *ucounts = current_ucounts();
 
-		err = shmem_lock(shm_file, 1, user);
+		err = shmem_lock(shm_file, 1, ucounts);
 		if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
 			shp->shm_perm.mode |= SHM_LOCKED;
-			shp->mlock_user = user;
+			shp->mlock_ucounts = ucounts;
 		}
 		goto out_unlock0;
 	}
@@ -1118,9 +1118,9 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
 	/* SHM_UNLOCK */
 	if (!(shp->shm_perm.mode & SHM_LOCKED))
 		goto out_unlock0;
-	shmem_lock(shm_file, 0, shp->mlock_user);
+	shmem_lock(shm_file, 0, shp->mlock_ucounts);
 	shp->shm_perm.mode &= ~SHM_LOCKED;
-	shp->mlock_user = NULL;
+	shp->mlock_ucounts = NULL;
 	get_file(shm_file);
 	ipc_unlock_object(&shp->shm_perm);
 	rcu_read_unlock();
diff --git a/kernel/fork.c b/kernel/fork.c
index 03119926b27dd..610fd4de60d76 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -825,6 +825,7 @@ void __init fork_init(void)
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
 	init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING);
+	init_user_ns.ucount_max[UCOUNT_RLIMIT_MEMLOCK] = task_rlimit(&init_task, RLIMIT_MEMLOCK);
 
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 8ce62da6a62c5..d316bac3e5200 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -83,6 +83,7 @@ static struct ctl_table user_table[] = {
 	{ },
 	{ },
 	{ },
+	{ },
 	{ }
 };
 #endif /* CONFIG_SYSCTL */
diff --git a/kernel/user.c b/kernel/user.c
index 6737327f83beb..c82399c1618a6 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 /* root_user.__count is 1, for init task cred */
 struct user_struct root_user = {
 	.__count	= REFCOUNT_INIT(1),
-	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
 	.ratelimit	= RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0),
 };
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 822eacee45885..892da1360862a 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -125,6 +125,7 @@ int create_user_ns(struct cred *new)
 	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
 	ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
 	ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING);
+	ns->ucount_max[UCOUNT_RLIMIT_MEMLOCK] = rlimit(RLIMIT_MEMLOCK);
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
diff --git a/mm/memfd.c b/mm/memfd.c
index 2647c898990c8..081dd33e6a61b 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -297,9 +297,9 @@ SYSCALL_DEFINE2(memfd_create,
 	}
 
 	if (flags & MFD_HUGETLB) {
-		struct user_struct *user = NULL;
+		struct ucounts *ucounts = NULL;
 
-		file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user,
+		file = hugetlb_file_setup(name, 0, VM_NORESERVE, &ucounts,
 					HUGETLB_ANONHUGE_INODE,
 					(flags >> MFD_HUGE_SHIFT) &
 					MFD_HUGE_MASK);
diff --git a/mm/mlock.c b/mm/mlock.c
index f8f8cc32d03d0..dd411aabf695b 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -817,9 +817,10 @@ SYSCALL_DEFINE0(munlockall)
  */
 static DEFINE_SPINLOCK(shmlock_user_lock);
 
-int user_shm_lock(size_t size, struct user_struct *user)
+int user_shm_lock(size_t size, struct ucounts *ucounts)
 {
 	unsigned long lock_limit, locked;
+	long memlock;
 	int allowed = 0;
 
 	locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -828,21 +829,26 @@ int user_shm_lock(size_t size, struct user_struct *user)
 		allowed = 1;
 	lock_limit >>= PAGE_SHIFT;
 	spin_lock(&shmlock_user_lock);
-	if (!allowed &&
-	    locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
+	memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
+
+	if (!allowed && (memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
+		goto out;
+	}
+	if (!get_ucounts(ucounts)) {
+		dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
 		goto out;
-	get_uid(user);
-	user->locked_shm += locked;
+	}
 	allowed = 1;
 out:
 	spin_unlock(&shmlock_user_lock);
 	return allowed;
 }
 
-void user_shm_unlock(size_t size, struct user_struct *user)
+void user_shm_unlock(size_t size, struct ucounts *ucounts)
 {
 	spin_lock(&shmlock_user_lock);
-	user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
 	spin_unlock(&shmlock_user_lock);
-	free_uid(user);
+	put_ucounts(ucounts);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 3f287599a7a30..99f97d200aa4d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1605,7 +1605,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 			goto out_fput;
 		}
 	} else if (flags & MAP_HUGETLB) {
-		struct user_struct *user = NULL;
+		struct ucounts *ucounts = NULL;
 		struct hstate *hs;
 
 		hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
@@ -1621,7 +1621,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 		 */
 		file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
 				VM_NORESERVE,
-				&user, HUGETLB_ANONHUGE_INODE,
+				&ucounts, HUGETLB_ANONHUGE_INODE,
 				(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
 		if (IS_ERR(file))
 			return PTR_ERR(file);
diff --git a/mm/shmem.c b/mm/shmem.c
index b2db4ed0fbc7c..7ee6d27222e9e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2227,7 +2227,7 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 }
 #endif
 
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
+int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
 {
 	struct inode *inode = file_inode(file);
 	struct shmem_inode_info *info = SHMEM_I(inode);
@@ -2239,13 +2239,13 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
 	 * no serialization needed when called from shm_destroy().
 	 */
 	if (lock && !(info->flags & VM_LOCKED)) {
-		if (!user_shm_lock(inode->i_size, user))
+		if (!user_shm_lock(inode->i_size, ucounts))
 			goto out_nomem;
 		info->flags |= VM_LOCKED;
 		mapping_set_unevictable(file->f_mapping);
 	}
-	if (!lock && (info->flags & VM_LOCKED) && user) {
-		user_shm_unlock(inode->i_size, user);
+	if (!lock && (info->flags & VM_LOCKED) && ucounts) {
+		user_shm_unlock(inode->i_size, ucounts);
 		info->flags &= ~VM_LOCKED;
 		mapping_clear_unevictable(file->f_mapping);
 	}
@@ -4093,7 +4093,7 @@ int shmem_unuse(unsigned int type, bool frontswap,
 	return 0;
 }
 
-int shmem_lock(struct file *file, int lock, struct user_struct *user)
+int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
 {
 	return 0;
 }
-- 
GitLab


From e4aebf06695c32d49f1007f9d252f97b5b2998a7 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:15 +0200
Subject: [PATCH 0037/3804] kselftests: Add test to check for rlimit changes in
 different user namespaces

The testcase runs few instances of the program with RLIMIT_NPROC=1 from
user uid=60000, in different user namespaces.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/28cafdcdd4abd8494b34a27f1970b666b30de8bf.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 tools/testing/selftests/Makefile              |   1 +
 tools/testing/selftests/rlimits/.gitignore    |   2 +
 tools/testing/selftests/rlimits/Makefile      |   6 +
 tools/testing/selftests/rlimits/config        |   1 +
 .../selftests/rlimits/rlimits-per-userns.c    | 161 ++++++++++++++++++
 5 files changed, 171 insertions(+)
 create mode 100644 tools/testing/selftests/rlimits/.gitignore
 create mode 100644 tools/testing/selftests/rlimits/Makefile
 create mode 100644 tools/testing/selftests/rlimits/config
 create mode 100644 tools/testing/selftests/rlimits/rlimits-per-userns.c

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 6c575cf34a71f..a4ea1481bd9ac 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -48,6 +48,7 @@ TARGETS += proc
 TARGETS += pstore
 TARGETS += ptrace
 TARGETS += openat2
+TARGETS += rlimits
 TARGETS += rseq
 TARGETS += rtc
 TARGETS += seccomp
diff --git a/tools/testing/selftests/rlimits/.gitignore b/tools/testing/selftests/rlimits/.gitignore
new file mode 100644
index 0000000000000..091021f255b34
--- /dev/null
+++ b/tools/testing/selftests/rlimits/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+rlimits-per-userns
diff --git a/tools/testing/selftests/rlimits/Makefile b/tools/testing/selftests/rlimits/Makefile
new file mode 100644
index 0000000000000..03aadb4062121
--- /dev/null
+++ b/tools/testing/selftests/rlimits/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g
+TEST_GEN_PROGS := rlimits-per-userns
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rlimits/config b/tools/testing/selftests/rlimits/config
new file mode 100644
index 0000000000000..416bd53ce9828
--- /dev/null
+++ b/tools/testing/selftests/rlimits/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
new file mode 100644
index 0000000000000..26dc949e93eae
--- /dev/null
+++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Alexey Gladkov <gladkov.alexey@gmail.com>
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <err.h>
+
+#define NR_CHILDS 2
+
+static char *service_prog;
+static uid_t user   = 60000;
+static uid_t group  = 60000;
+
+static void setrlimit_nproc(rlim_t n)
+{
+	pid_t pid = getpid();
+	struct rlimit limit = {
+		.rlim_cur = n,
+		.rlim_max = n
+	};
+
+	warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n);
+
+	if (setrlimit(RLIMIT_NPROC, &limit) < 0)
+		err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid);
+}
+
+static pid_t fork_child(void)
+{
+	pid_t pid = fork();
+
+	if (pid < 0)
+		err(EXIT_FAILURE, "fork");
+
+	if (pid > 0)
+		return pid;
+
+	pid = getpid();
+
+	warnx("(pid=%d): New process starting ...", pid);
+
+	if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
+		err(EXIT_FAILURE, "(pid=%d): prctl(PR_SET_PDEATHSIG)", pid);
+
+	signal(SIGUSR1, SIG_DFL);
+
+	warnx("(pid=%d): Changing to uid=%d, gid=%d", pid, user, group);
+
+	if (setgid(group) < 0)
+		err(EXIT_FAILURE, "(pid=%d): setgid(%d)", pid, group);
+	if (setuid(user) < 0)
+		err(EXIT_FAILURE, "(pid=%d): setuid(%d)", pid, user);
+
+	warnx("(pid=%d): Service running ...", pid);
+
+	warnx("(pid=%d): Unshare user namespace", pid);
+	if (unshare(CLONE_NEWUSER) < 0)
+		err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
+
+	char *const argv[] = { "service", NULL };
+	char *const envp[] = { "I_AM_SERVICE=1", NULL };
+
+	warnx("(pid=%d): Executing real service ...", pid);
+
+	execve(service_prog, argv, envp);
+	err(EXIT_FAILURE, "(pid=%d): execve", pid);
+}
+
+int main(int argc, char **argv)
+{
+	size_t i;
+	pid_t child[NR_CHILDS];
+	int wstatus[NR_CHILDS];
+	int childs = NR_CHILDS;
+	pid_t pid;
+
+	if (getenv("I_AM_SERVICE")) {
+		pause();
+		exit(EXIT_SUCCESS);
+	}
+
+	service_prog = argv[0];
+	pid = getpid();
+
+	warnx("(pid=%d) Starting testcase", pid);
+
+	/*
+	 * This rlimit is not a problem for root because it can be exceeded.
+	 */
+	setrlimit_nproc(1);
+
+	for (i = 0; i < NR_CHILDS; i++) {
+		child[i] = fork_child();
+		wstatus[i] = 0;
+		usleep(250000);
+	}
+
+	while (1) {
+		for (i = 0; i < NR_CHILDS; i++) {
+			if (child[i] <= 0)
+				continue;
+
+			errno = 0;
+			pid_t ret = waitpid(child[i], &wstatus[i], WNOHANG);
+
+			if (!ret || (!WIFEXITED(wstatus[i]) && !WIFSIGNALED(wstatus[i])))
+				continue;
+
+			if (ret < 0 && errno != ECHILD)
+				warn("(pid=%d): waitpid(%d)", pid, child[i]);
+
+			child[i] *= -1;
+			childs -= 1;
+		}
+
+		if (!childs)
+			break;
+
+		usleep(250000);
+
+		for (i = 0; i < NR_CHILDS; i++) {
+			if (child[i] <= 0)
+				continue;
+			kill(child[i], SIGUSR1);
+		}
+	}
+
+	for (i = 0; i < NR_CHILDS; i++) {
+		if (WIFEXITED(wstatus[i]))
+			warnx("(pid=%d): pid %d exited, status=%d",
+				pid, -child[i], WEXITSTATUS(wstatus[i]));
+		else if (WIFSIGNALED(wstatus[i]))
+			warnx("(pid=%d): pid %d killed by signal %d",
+				pid, -child[i], WTERMSIG(wstatus[i]));
+
+		if (WIFSIGNALED(wstatus[i]) && WTERMSIG(wstatus[i]) == SIGUSR1)
+			continue;
+
+		warnx("(pid=%d): Test failed", pid);
+		exit(EXIT_FAILURE);
+	}
+
+	warnx("(pid=%d): Test passed", pid);
+	exit(EXIT_SUCCESS);
+}
-- 
GitLab


From c1ada3dc7219b02b3467aa906c2f5f8b098578d1 Mon Sep 17 00:00:00 2001
From: Alexey Gladkov <legion@kernel.org>
Date: Thu, 22 Apr 2021 14:27:16 +0200
Subject: [PATCH 0038/3804] ucounts: Set ucount_max to the largest positive
 value the type can hold

The ns->ucount_max[] is signed long which is less than the rlimit size.
We have to protect ucount_max[] from overflow and only use the largest
value that we can hold.

On 32bit using "long" instead of "unsigned long" to hold the counts has
the downside that RLIMIT_MSGQUEUE and RLIMIT_MEMLOCK are limited to 2GiB
instead of 4GiB. I don't think anyone cares but it should be mentioned
in case someone does.

The RLIMIT_NPROC and RLIMIT_SIGPENDING used atomic_t so their maximum
hasn't changed.

Signed-off-by: Alexey Gladkov <legion@kernel.org>
Link: https://lkml.kernel.org/r/1825a5dfa18bc5a570e79feb05e2bd07fd57e7e3.1619094428.git.legion@kernel.org
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/user_namespace.h | 6 ++++++
 kernel/fork.c                  | 8 ++++----
 kernel/user_namespace.c        | 8 ++++----
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 2a3177b9b8bfb..61794ae32fa8f 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -125,6 +125,12 @@ long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
 
+static inline void set_rlimit_ucount_max(struct user_namespace *ns,
+		enum ucount_type type, unsigned long max)
+{
+	ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX;
+}
+
 #ifdef CONFIG_USER_NS
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
diff --git a/kernel/fork.c b/kernel/fork.c
index 610fd4de60d76..c41820481b2e1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -822,10 +822,10 @@ void __init fork_init(void)
 	for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++)
 		init_user_ns.ucount_max[i] = max_threads/2;
 
-	init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC);
-	init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE);
-	init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING);
-	init_user_ns.ucount_max[UCOUNT_RLIMIT_MEMLOCK] = task_rlimit(&init_task, RLIMIT_MEMLOCK);
+	set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_NPROC, task_rlimit(&init_task, RLIMIT_NPROC));
+	set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MSGQUEUE, task_rlimit(&init_task, RLIMIT_MSGQUEUE));
+	set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_SIGPENDING, task_rlimit(&init_task, RLIMIT_SIGPENDING));
+	set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, task_rlimit(&init_task, RLIMIT_MEMLOCK));
 
 #ifdef CONFIG_VMAP_STACK
 	cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 892da1360862a..d4a545bbab7f5 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -122,10 +122,10 @@ int create_user_ns(struct cred *new)
 	for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) {
 		ns->ucount_max[i] = INT_MAX;
 	}
-	ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC);
-	ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE);
-	ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING);
-	ns->ucount_max[UCOUNT_RLIMIT_MEMLOCK] = rlimit(RLIMIT_MEMLOCK);
+	set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC));
+	set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE));
+	set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING));
+	set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK));
 	ns->ucounts = ucounts;
 
 	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
-- 
GitLab


From f928ef685db5d9b82c1c1e24e229c167426c5a1f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 30 Apr 2021 13:00:26 -0500
Subject: [PATCH 0039/3804] ucounts: Silence warning in dec_rlimit_ucounts

Dan Carpenter <dan.carpenter@oracle.com> wrote:
>
> url:    https://github.com/0day-ci/linux/commits/legion-kernel-org/Count-rlimits-in-each-user-namespace/20210427-162857
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
> config: arc-randconfig-m031-20210426 (attached as .config)
> compiler: arceb-elf-gcc (GCC) 9.3.0
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
>
> smatch warnings:
> kernel/ucount.c:270 dec_rlimit_ucounts() error: uninitialized symbol 'new'.
>
> vim +/new +270 kernel/ucount.c
>
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  260  bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  261  {
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  262   struct ucounts *iter;
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  263   long new;
>                                                 ^^^^^^^^
>
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  264   for (iter = ucounts; iter; iter = iter->ns->ucounts) {
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  265    long dec = atomic_long_add_return(-v, &iter->ucount[type]);
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  266    WARN_ON_ONCE(dec < 0);
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  267    if (iter == ucounts)
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  268     new = dec;
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  269   }
> 176ec2b092cc22 Alexey Gladkov 2021-04-22 @270   return (new == 0);
>                                                         ^^^^^^^^
> I don't know if this is a bug or not, but I can definitely tell why the
> static checker complains about it.
>
> 176ec2b092cc22 Alexey Gladkov 2021-04-22  271  }

In the only two cases that care about the return value of
dec_rlimit_ucounts the code first tests to see that ucounts is not
NULL.  In those cases it is guaranteed at least one iteration of the
loop will execute guaranteeing the variable new will be initialized.

Initialize new to -1 so that the return value is well defined even
when the loop does not execute and the static checker is silenced.

Link: https://lkml.kernel.org/r/m1tunny77w.fsf@fess.ebiederm.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 kernel/ucount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/ucount.c b/kernel/ucount.c
index d316bac3e5200..df84a2a639269 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -263,7 +263,7 @@ long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
 {
 	struct ucounts *iter;
-	long new;
+	long new = -1; /* Silence compiler warning */
 	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
 		long dec = atomic_long_add_return(-v, &iter->ucount[type]);
 		WARN_ON_ONCE(dec < 0);
-- 
GitLab


From ff76d506030daeeeb967be8b8a189bf7aee8e7a8 Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Thu, 29 Apr 2021 16:12:26 +1200
Subject: [PATCH 0040/3804] KVM: x86/mmu: Avoid unnecessary page table
 allocation in kvm_tdp_mmu_map()

In kvm_tdp_mmu_map(), while iterating TDP MMU page table entries, it is
possible SPTE has already been frozen by another thread but the frozen
is not done yet, for instance, when another thread is still in middle of
zapping large page.  In this case, the !is_shadow_present_pte() check
for old SPTE in tdp_mmu_for_each_pte() may hit true, and in this case
allocating new page table is unnecessary since tdp_mmu_set_spte_atomic()
later will return false and page table will need to be freed.  Add
is_removed_spte() check before allocating new page table to avoid this.

Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <20210429041226.50279-1-kai.huang@intel.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 88f69a6cc4922..3c8284841bed4 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1009,6 +1009,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 		}
 
 		if (!is_shadow_present_pte(iter.old_spte)) {
+			/*
+			 * If SPTE has been forzen by another thread, just
+			 * give up and retry, avoiding unnecessary page table
+			 * allocation and free.
+			 */
+			if (is_removed_spte(iter.old_spte))
+				break;
+
 			sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
 			child_pt = sp->spt;
 
-- 
GitLab


From 1699f65c8b658d434fe92563c906cd1a136c9cb6 Mon Sep 17 00:00:00 2001
From: "Shahin, Md Shahadat Hossain" <shahinmd@amazon.de>
Date: Fri, 30 Apr 2021 11:52:31 +0000
Subject: [PATCH 0041/3804] kvm/x86: Fix 'lpages' kvm stat for TDM MMU

Large pages not being created properly may result in increased memory
access time. The 'lpages' kvm stat used to keep track of the current
number of large pages in the system, but with TDP MMU enabled the stat
is not showing the correct number.

This patch extends the lpages counter to cover the TDP case.

Signed-off-by: Md Shahadat Hossain Shahin <shahinmd@amazon.de>
Cc: Bartosz Szczepanek <bsz@amazon.de>
Message-Id: <1619783551459.35424@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 3c8284841bed4..c743894fe0b7c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 
 	trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
 
+	if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+		if (is_large_pte(old_spte))
+			atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+		else
+			atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+	}
+
 	/*
 	 * The only times a SPTE should be changed from a non-present to
 	 * non-present state is when an MMIO entry is installed/modified/
-- 
GitLab


From d981dd15498b188636ec5a7d8ad485e650f63d8d Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Wed, 28 Apr 2021 19:08:02 +0800
Subject: [PATCH 0042/3804] KVM: LAPIC: Accurately guarantee busy wait for
 timer to expire when using hv_timer

Commit ee66e453db13d (KVM: lapic: Busy wait for timer to expire when
using hv_timer) tries to set ktime->expired_tscdeadline by checking
ktime->hv_timer_in_use since lapic timer oneshot/periodic modes which
are emulated by vmx preemption timer also get advanced, they leverage
the same vmx preemption timer logic with tsc-deadline mode. However,
ktime->hv_timer_in_use is cleared before apic_timer_expired() handling,
let's delay this clearing in preemption-disabled region.

Fixes: ee66e453db13d ("KVM: lapic: Busy wait for timer to expire when using hv_timer")
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1619608082-4187-1-git-send-email-wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 152591f9243ab..c0ebef560bd14 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1913,8 +1913,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 	if (!apic->lapic_timer.hv_timer_in_use)
 		goto out;
 	WARN_ON(rcuwait_active(&vcpu->wait));
-	cancel_hv_timer(apic);
 	apic_timer_expired(apic, false);
+	cancel_hv_timer(apic);
 
 	if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
 		advance_periodic_target_expiration(apic);
-- 
GitLab


From 262de4102c7bb8e59f26a967a8ffe8cce85cc537 Mon Sep 17 00:00:00 2001
From: Benjamin Segall <bsegall@google.com>
Date: Thu, 29 Apr 2021 16:22:34 +0000
Subject: [PATCH 0043/3804] kvm: exit halt polling on need_resched() as well

single_task_running() is usually more general than need_resched()
but CFS_BANDWIDTH throttling will use resched_task() when there
is just one task to get the task to block. This was causing
long-need_resched warnings and was likely allowing VMs to
overrun their quota when halt polling.

Signed-off-by: Ben Segall <bsegall@google.com>
Signed-off-by: Venkatesh Srinivas <venkateshs@chromium.org>
Message-Id: <20210429162233.116849-1-venkateshs@chromium.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org
Reviewed-by: Jim Mattson <jmattson@google.com>
---
 virt/kvm/kvm_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2799c6660ccea..b9f12da6af0ea 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2973,7 +2973,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 				goto out;
 			}
 			poll_end = cur = ktime_get();
-		} while (single_task_running() && ktime_before(cur, stop));
+		} while (single_task_running() && !need_resched() &&
+			 ktime_before(cur, stop));
 	}
 
 	prepare_to_rcuwait(&vcpu->wait);
-- 
GitLab


From deee59bacb2402c20e6b1b6800f9a5127367eb2a Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 3 May 2021 15:54:42 +0300
Subject: [PATCH 0044/3804] KVM: nSVM: fix a typo in svm_leave_nested

When forcibly leaving the nested mode, we should switch to vmcb01

Fixes: 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest")

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210503125446.1353307-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 540d43ba2cf46..3321220f3deb7 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -886,7 +886,7 @@ void svm_leave_nested(struct vcpu_svm *svm)
 		svm->nested.nested_run_pending = 0;
 		leave_guest_mode(vcpu);
 
-		svm_switch_vmcb(svm, &svm->nested.vmcb02);
+		svm_switch_vmcb(svm, &svm->vmcb01);
 
 		nested_svm_uninit_mmu_context(vcpu);
 		vmcb_mark_all_dirty(svm->vmcb);
-- 
GitLab


From c74ad08f3333db2e44d3346b863f6d10d35e37dd Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 3 May 2021 15:54:43 +0300
Subject: [PATCH 0045/3804] KVM: nSVM: fix few bugs in the vmcb02 caching logic

* Define and use an invalid GPA (all ones) for init value of last
  and current nested vmcb physical addresses.

* Reset the current vmcb12 gpa to the invalid value when leaving
  the nested mode, similar to what is done on nested vmexit.

* Reset	the last seen vmcb12 address when disabling the nested SVM,
  as it relies on vmcb02 fields which are freed at that point.

Fixes: 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest")

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210503125446.1353307-3-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm/nested.c       | 11 +++++++++++
 arch/x86/kvm/svm/svm.c          |  4 ++--
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cbbcee0a84f92..848956bb3cf1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -113,6 +113,7 @@
 #define VALID_PAGE(x) ((x) != INVALID_PAGE)
 
 #define UNMAPPED_GVA (~(gpa_t)0)
+#define INVALID_GPA (~(gpa_t)0)
 
 /* KVM Hugepage definitions for x86 */
 #define KVM_MAX_HUGEPAGE_LEVEL	PG_LEVEL_1G
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3321220f3deb7..a88c64e004c3d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -872,6 +872,15 @@ void svm_free_nested(struct vcpu_svm *svm)
 	__free_page(virt_to_page(svm->nested.vmcb02.ptr));
 	svm->nested.vmcb02.ptr = NULL;
 
+	/*
+	 * When last_vmcb12_gpa matches the current vmcb12 gpa,
+	 * some vmcb12 fields are not loaded if they are marked clean
+	 * in the vmcb12, since in this case they are up to date already.
+	 *
+	 * When the vmcb02 is freed, this optimization becomes invalid.
+	 */
+	svm->nested.last_vmcb12_gpa = INVALID_GPA;
+
 	svm->nested.initialized = false;
 }
 
@@ -884,6 +893,8 @@ void svm_leave_nested(struct vcpu_svm *svm)
 
 	if (is_guest_mode(vcpu)) {
 		svm->nested.nested_run_pending = 0;
+		svm->nested.vmcb12_gpa = INVALID_GPA;
+
 		leave_guest_mode(vcpu);
 
 		svm_switch_vmcb(svm, &svm->vmcb01);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9790c73f2a325..be5cf612ab1fe 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1235,8 +1235,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 	svm->current_vmcb->asid_generation = 0;
 	svm->asid = 0;
 
-	svm->nested.vmcb12_gpa = 0;
-	svm->nested.last_vmcb12_gpa = 0;
+	svm->nested.vmcb12_gpa = INVALID_GPA;
+	svm->nested.last_vmcb12_gpa = INVALID_GPA;
 	vcpu->arch.hflags = 0;
 
 	if (!kvm_pause_in_guest(vcpu->kvm)) {
-- 
GitLab


From 9d290e16432cacd448475d38dec2753b75b9665f Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 3 May 2021 15:54:44 +0300
Subject: [PATCH 0046/3804] KVM: nSVM: leave the guest mode prior to loading a
 nested state

This allows the KVM to load the nested state more than
once without warnings.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210503125446.1353307-4-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index a88c64e004c3d..32400cba608d4 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1309,12 +1309,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 	 * L2 registers if needed are moved from the current VMCB to VMCB02.
 	 */
 
+	if (is_guest_mode(vcpu))
+		svm_leave_nested(svm);
+	else
+		svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+
 	svm->nested.nested_run_pending =
 		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
 
 	svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
-	if (svm->current_vmcb == &svm->vmcb01)
-		svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
 
 	svm->vmcb01.ptr->save.es = save->es;
 	svm->vmcb01.ptr->save.cs = save->cs;
-- 
GitLab


From 7f6231a39117c2781beead59d6ae4923c2703147 Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Mon, 3 May 2021 16:24:46 +1200
Subject: [PATCH 0047/3804] KVM: x86/mmu: Fix kdoc of __handle_changed_spte

The function name of kdoc of __handle_changed_spte() should be itself,
rather than handle_changed_spte().  Fix the typo.

Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <20210503042446.154695-1-kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index c743894fe0b7c..95eeb5ac6a8a7 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
 }
 
 /**
- * handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * __handle_changed_spte - handle bookkeeping associated with an SPTE change
  * @kvm: kvm instance
  * @as_id: the address space of the paging structure the SPTE was a part of
  * @gfn: the base GFN that was mapped by the SPTE
-- 
GitLab


From 8899a5fc7da516460f841189a28aac0b52b554fd Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 30 Apr 2021 18:03:03 +0100
Subject: [PATCH 0048/3804] KVM: x86: Fix potential fput on a null
 source_kvm_file

The fget can potentially return null, so the fput on the error return
path can cause a null pointer dereference. Fix this by checking for
a null source_kvm_file before doing a fput.

Addresses-Coverity: ("Dereference null return")
Fixes: 54526d1fd593 ("KVM: x86: Support KVM VMs sharing SEV context")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Message-Id: <20210430170303.131924-1-colin.king@canonical.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1356ee095cd55..8b11c711a0e40 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1764,7 +1764,8 @@ e_mirror_unlock:
 e_source_unlock:
 	mutex_unlock(&source_kvm->lock);
 e_source_put:
-	fput(source_kvm_file);
+	if (source_kvm_file)
+		fput(source_kvm_file);
 	return ret;
 }
 
-- 
GitLab


From 5e753a817b2d5991dfe8a801b7b1e8e79a1c5a20 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Fri, 30 Apr 2021 19:59:51 +0800
Subject: [PATCH 0049/3804] btrfs: fix unmountable seed device after fstrim

The following test case reproduces an issue of wrongly freeing in-use
blocks on the readonly seed device when fstrim is called on the rw sprout
device. As shown below.

Create a seed device and add a sprout device to it:

  $ mkfs.btrfs -fq -dsingle -msingle /dev/loop0
  $ btrfstune -S 1 /dev/loop0
  $ mount /dev/loop0 /btrfs
  $ btrfs dev add -f /dev/loop1 /btrfs
  BTRFS info (device loop0): relocating block group 290455552 flags system
  BTRFS info (device loop0): relocating block group 1048576 flags system
  BTRFS info (device loop0): disk added /dev/loop1
  $ umount /btrfs

Mount the sprout device and run fstrim:

  $ mount /dev/loop1 /btrfs
  $ fstrim /btrfs
  $ umount /btrfs

Now try to mount the seed device, and it fails:

  $ mount /dev/loop0 /btrfs
  mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error.

Block 5292032 is missing on the readonly seed device:

 $ dmesg -kt | tail
 <snip>
 BTRFS error (device loop0): bad tree block start, want 5292032 have 0
 BTRFS warning (device loop0): couldn't read-tree root
 BTRFS error (device loop0): open_ctree failed

From the dump-tree of the seed device (taken before the fstrim). Block
5292032 belonged to the block group starting at 5242880:

  $ btrfs inspect dump-tree -e /dev/loop0 | grep -A1 BLOCK_GROUP
  <snip>
  item 3 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16169 itemsize 24
  	block group used 114688 chunk_objectid 256 flags METADATA
  <snip>

From the dump-tree of the sprout device (taken before the fstrim).
fstrim used block-group 5242880 to find the related free space to free:

  $ btrfs inspect dump-tree -e /dev/loop1 | grep -A1 BLOCK_GROUP
  <snip>
  item 1 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16226 itemsize 24
  	block group used 32768 chunk_objectid 256 flags METADATA
  <snip>

BPF kernel tracing the fstrim command finds the missing block 5292032
within the range of the discarded blocks as below:

  kprobe:btrfs_discard_extent {
  	printf("freeing start %llu end %llu num_bytes %llu:\n",
  		arg1, arg1+arg2, arg2);
  }

  freeing start 5259264 end 5406720 num_bytes 147456
  <snip>

Fix this by avoiding the discard command to the readonly seed device.

Reported-by: Chris Murphy <lists@colorremedies.com>
CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 7a28314189b4a..f1d15b68994a0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1340,12 +1340,16 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
 		stripe = bbio->stripes;
 		for (i = 0; i < bbio->num_stripes; i++, stripe++) {
 			u64 bytes;
+			struct btrfs_device *device = stripe->dev;
 
-			if (!stripe->dev->bdev) {
+			if (!device->bdev) {
 				ASSERT(btrfs_test_opt(fs_info, DEGRADED));
 				continue;
 			}
 
+			if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+				continue;
+
 			ret = do_discard_extent(stripe, &bytes);
 			if (!ret) {
 				discarded_bytes += bytes;
-- 
GitLab


From 784daf2b9628f2d0117f1f0b578cfe5ab6634919 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Fri, 30 Apr 2021 15:34:17 +0200
Subject: [PATCH 0050/3804] btrfs: zoned: sanity check zone type

The fstests test case generic/475 creates a dm-linear device that gets
changed to a dm-error device. This leads to errors in loading the block
group's zone information when running on a zoned file system, ultimately
resulting in a list corruption. When running on a kernel with list
debugging enabled this leads to the following crash.

 BTRFS: error (device dm-2) in cleanup_transaction:1953: errno=-5 IO failure
 kernel BUG at lib/list_debug.c:54!
 invalid opcode: 0000 [#1] SMP PTI
 CPU: 1 PID: 2433 Comm: umount Tainted: G        W         5.12.0+ #1018
 RIP: 0010:__list_del_entry_valid.cold+0x1d/0x47
 RSP: 0018:ffffc90001473df0 EFLAGS: 00010296
 RAX: 0000000000000054 RBX: ffff8881038fd000 RCX: ffffc90001473c90
 RDX: 0000000100001a31 RSI: 0000000000000003 RDI: 0000000000000003
 RBP: ffff888308871108 R08: 0000000000000003 R09: 0000000000000001
 R10: 3961373532383838 R11: 6666666620736177 R12: ffff888308871000
 R13: ffff8881038fd088 R14: ffff8881038fdc78 R15: dead000000000100
 FS:  00007f353c9b1540(0000) GS:ffff888627d00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f353cc2c710 CR3: 000000018e13c000 CR4: 00000000000006a0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  btrfs_free_block_groups+0xc9/0x310 [btrfs]
  close_ctree+0x2ee/0x31a [btrfs]
  ? call_rcu+0x8f/0x270
  ? mutex_lock+0x1c/0x40
  generic_shutdown_super+0x67/0x100
  kill_anon_super+0x14/0x30
  btrfs_kill_super+0x12/0x20 [btrfs]
  deactivate_locked_super+0x31/0x90
  cleanup_mnt+0x13e/0x1b0
  task_work_run+0x63/0xb0
  exit_to_user_mode_loop+0xd9/0xe0
  exit_to_user_mode_prepare+0x3e/0x60
  syscall_exit_to_user_mode+0x1d/0x50
  entry_SYSCALL_64_after_hwframe+0x44/0xae

As dm-error has no support for zones, btrfs will run it's zone emulation
mode on this device. The zone emulation mode emulates conventional zones,
so bail out if the zone bitmap that gets populated on mount sees the zone
as sequential while we're thinking it's a conventional zone when creating
a block group.

Note: this scenario is unlikely in a real wold application and can only
happen by this (ab)use of device-mapper targets.

CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 70b23a0d03b10..304ce64c70a44 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1126,6 +1126,11 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 			goto out;
 		}
 
+		if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
+			ret = -EIO;
+			goto out;
+		}
+
 		switch (zone.cond) {
 		case BLK_ZONE_COND_OFFLINE:
 		case BLK_ZONE_COND_READONLY:
-- 
GitLab


From 77364faf21b4105ee5adbb4844fdfb461334d249 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 30 Apr 2021 11:06:55 -0700
Subject: [PATCH 0051/3804] btrfs: initialize return variable in
 cleanup_free_space_cache_v1

Static analysis reports this problem

  free-space-cache.c:3965:2: warning: Undefined or garbage value returned
    return ret;
    ^~~~~~~~~~

ret is set in the node handling loop.  Treat doing nothing as a success
and initialize ret to 0, although it's unlikely the loop would be
skipped. We always have block groups, but as it could lead to
transaction abort in the caller it's better to be safe.

CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Tom Rix <trix@redhat.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/free-space-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e54466fc101f7..4806295116d88 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3949,7 +3949,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_block_group *block_group;
 	struct rb_node *node;
-	int ret;
+	int ret = 0;
 
 	btrfs_info(fs_info, "cleaning free space cache v1");
 
-- 
GitLab


From 0a269a008f837e76ce285679ab3005059fadc2a6 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:40 +0200
Subject: [PATCH 0052/3804] x86/kvm: Fix pr_info() for async PF setup/teardown

'pr_fmt' already has 'kvm-guest: ' so 'KVM' prefix is redundant.
"Unregister pv shared memory" is very ambiguous, it's hard to
say which particular PV feature it relates to.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d307c22e5c188..dc440bb692223 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -345,7 +345,7 @@ static void kvm_guest_cpu_init(void)
 
 		wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
 		__this_cpu_write(apf_reason.enabled, 1);
-		pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
+		pr_info("setup async PF for cpu %d\n", smp_processor_id());
 	}
 
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
@@ -371,7 +371,7 @@ static void kvm_pv_disable_apf(void)
 	wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
 	__this_cpu_write(apf_reason.enabled, 0);
 
-	pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
+	pr_info("disable async PF for cpu %d\n", smp_processor_id());
 }
 
 static void kvm_pv_guest_cpu_reboot(void *unused)
-- 
GitLab


From 9f015b3765bf593b3ed5d3b588e409dc0ffa9f85 Mon Sep 17 00:00:00 2001
From: Rijo Thomas <Rijo-john.Thomas@amd.com>
Date: Wed, 14 Apr 2021 23:08:27 +0530
Subject: [PATCH 0053/3804] tee: amdtee: unload TA only when its refcount
 becomes 0

Same Trusted Application (TA) can be loaded in multiple TEE contexts.

If it is a single instance TA, the TA should not get unloaded from AMD
Secure Processor, while it is still in use in another TEE context.

Therefore reference count TA and unload it when the count becomes zero.

Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver")
Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com>
Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com>
Acked-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/amdtee/amdtee_private.h | 13 ++++
 drivers/tee/amdtee/call.c           | 94 ++++++++++++++++++++++++++---
 drivers/tee/amdtee/core.c           | 15 +++--
 3 files changed, 106 insertions(+), 16 deletions(-)

diff --git a/drivers/tee/amdtee/amdtee_private.h b/drivers/tee/amdtee/amdtee_private.h
index 337c8d82f74eb..6d0f7062bb870 100644
--- a/drivers/tee/amdtee/amdtee_private.h
+++ b/drivers/tee/amdtee/amdtee_private.h
@@ -21,6 +21,7 @@
 #define TEEC_SUCCESS			0x00000000
 #define TEEC_ERROR_GENERIC		0xFFFF0000
 #define TEEC_ERROR_BAD_PARAMETERS	0xFFFF0006
+#define TEEC_ERROR_OUT_OF_MEMORY	0xFFFF000C
 #define TEEC_ERROR_COMMUNICATION	0xFFFF000E
 
 #define TEEC_ORIGIN_COMMS		0x00000002
@@ -93,6 +94,18 @@ struct amdtee_shm_data {
 	u32     buf_id;
 };
 
+/**
+ * struct amdtee_ta_data - Keeps track of all TAs loaded in AMD Secure
+ *			   Processor
+ * @ta_handle:	Handle to TA loaded in TEE
+ * @refcount:	Reference count for the loaded TA
+ */
+struct amdtee_ta_data {
+	struct list_head list_node;
+	u32 ta_handle;
+	u32 refcount;
+};
+
 #define LOWER_TWO_BYTE_MASK	0x0000FFFF
 
 /**
diff --git a/drivers/tee/amdtee/call.c b/drivers/tee/amdtee/call.c
index 096dd4d92d39c..07f36ac834c88 100644
--- a/drivers/tee/amdtee/call.c
+++ b/drivers/tee/amdtee/call.c
@@ -121,15 +121,69 @@ static int amd_params_to_tee_params(struct tee_param *tee, u32 count,
 	return ret;
 }
 
+static DEFINE_MUTEX(ta_refcount_mutex);
+static struct list_head ta_list = LIST_HEAD_INIT(ta_list);
+
+static u32 get_ta_refcount(u32 ta_handle)
+{
+	struct amdtee_ta_data *ta_data;
+	u32 count = 0;
+
+	/* Caller must hold a mutex */
+	list_for_each_entry(ta_data, &ta_list, list_node)
+		if (ta_data->ta_handle == ta_handle)
+			return ++ta_data->refcount;
+
+	ta_data = kzalloc(sizeof(*ta_data), GFP_KERNEL);
+	if (ta_data) {
+		ta_data->ta_handle = ta_handle;
+		ta_data->refcount = 1;
+		count = ta_data->refcount;
+		list_add(&ta_data->list_node, &ta_list);
+	}
+
+	return count;
+}
+
+static u32 put_ta_refcount(u32 ta_handle)
+{
+	struct amdtee_ta_data *ta_data;
+	u32 count = 0;
+
+	/* Caller must hold a mutex */
+	list_for_each_entry(ta_data, &ta_list, list_node)
+		if (ta_data->ta_handle == ta_handle) {
+			count = --ta_data->refcount;
+			if (count == 0) {
+				list_del(&ta_data->list_node);
+				kfree(ta_data);
+				break;
+			}
+		}
+
+	return count;
+}
+
 int handle_unload_ta(u32 ta_handle)
 {
 	struct tee_cmd_unload_ta cmd = {0};
-	u32 status;
+	u32 status, count;
 	int ret;
 
 	if (!ta_handle)
 		return -EINVAL;
 
+	mutex_lock(&ta_refcount_mutex);
+
+	count = put_ta_refcount(ta_handle);
+
+	if (count) {
+		pr_debug("unload ta: not unloading %u count %u\n",
+			 ta_handle, count);
+		ret = -EBUSY;
+		goto unlock;
+	}
+
 	cmd.ta_handle = ta_handle;
 
 	ret = psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA, (void *)&cmd,
@@ -137,8 +191,12 @@ int handle_unload_ta(u32 ta_handle)
 	if (!ret && status != 0) {
 		pr_err("unload ta: status = 0x%x\n", status);
 		ret = -EBUSY;
+	} else {
+		pr_debug("unloaded ta handle %u\n", ta_handle);
 	}
 
+unlock:
+	mutex_unlock(&ta_refcount_mutex);
 	return ret;
 }
 
@@ -340,7 +398,8 @@ int handle_open_session(struct tee_ioctl_open_session_arg *arg, u32 *info,
 
 int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg)
 {
-	struct tee_cmd_load_ta cmd = {0};
+	struct tee_cmd_unload_ta unload_cmd = {};
+	struct tee_cmd_load_ta load_cmd = {};
 	phys_addr_t blob;
 	int ret;
 
@@ -353,21 +412,36 @@ int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg)
 		return -EINVAL;
 	}
 
-	cmd.hi_addr = upper_32_bits(blob);
-	cmd.low_addr = lower_32_bits(blob);
-	cmd.size = size;
+	load_cmd.hi_addr = upper_32_bits(blob);
+	load_cmd.low_addr = lower_32_bits(blob);
+	load_cmd.size = size;
 
-	ret = psp_tee_process_cmd(TEE_CMD_ID_LOAD_TA, (void *)&cmd,
-				  sizeof(cmd), &arg->ret);
+	mutex_lock(&ta_refcount_mutex);
+
+	ret = psp_tee_process_cmd(TEE_CMD_ID_LOAD_TA, (void *)&load_cmd,
+				  sizeof(load_cmd), &arg->ret);
 	if (ret) {
 		arg->ret_origin = TEEC_ORIGIN_COMMS;
 		arg->ret = TEEC_ERROR_COMMUNICATION;
-	} else {
-		set_session_id(cmd.ta_handle, 0, &arg->session);
+	} else if (arg->ret == TEEC_SUCCESS) {
+		ret = get_ta_refcount(load_cmd.ta_handle);
+		if (!ret) {
+			arg->ret_origin = TEEC_ORIGIN_COMMS;
+			arg->ret = TEEC_ERROR_OUT_OF_MEMORY;
+
+			/* Unload the TA on error */
+			unload_cmd.ta_handle = load_cmd.ta_handle;
+			psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA,
+					    (void *)&unload_cmd,
+					    sizeof(unload_cmd), &ret);
+		} else {
+			set_session_id(load_cmd.ta_handle, 0, &arg->session);
+		}
 	}
+	mutex_unlock(&ta_refcount_mutex);
 
 	pr_debug("load TA: TA handle = 0x%x, RO = 0x%x, ret = 0x%x\n",
-		 cmd.ta_handle, arg->ret_origin, arg->ret);
+		 load_cmd.ta_handle, arg->ret_origin, arg->ret);
 
 	return 0;
 }
diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c
index 8a6a8f30bb427..da6b88e80dc07 100644
--- a/drivers/tee/amdtee/core.c
+++ b/drivers/tee/amdtee/core.c
@@ -59,10 +59,9 @@ static void release_session(struct amdtee_session *sess)
 			continue;
 
 		handle_close_session(sess->ta_handle, sess->session_info[i]);
+		handle_unload_ta(sess->ta_handle);
 	}
 
-	/* Unload Trusted Application once all sessions are closed */
-	handle_unload_ta(sess->ta_handle);
 	kfree(sess);
 }
 
@@ -224,8 +223,6 @@ static void destroy_session(struct kref *ref)
 	struct amdtee_session *sess = container_of(ref, struct amdtee_session,
 						   refcount);
 
-	/* Unload the TA from TEE */
-	handle_unload_ta(sess->ta_handle);
 	mutex_lock(&session_list_mutex);
 	list_del(&sess->list_node);
 	mutex_unlock(&session_list_mutex);
@@ -238,7 +235,7 @@ int amdtee_open_session(struct tee_context *ctx,
 {
 	struct amdtee_context_data *ctxdata = ctx->data;
 	struct amdtee_session *sess = NULL;
-	u32 session_info;
+	u32 session_info, ta_handle;
 	size_t ta_size;
 	int rc, i;
 	void *ta;
@@ -259,11 +256,14 @@ int amdtee_open_session(struct tee_context *ctx,
 	if (arg->ret != TEEC_SUCCESS)
 		goto out;
 
+	ta_handle = get_ta_handle(arg->session);
+
 	mutex_lock(&session_list_mutex);
 	sess = alloc_session(ctxdata, arg->session);
 	mutex_unlock(&session_list_mutex);
 
 	if (!sess) {
+		handle_unload_ta(ta_handle);
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -277,6 +277,7 @@ int amdtee_open_session(struct tee_context *ctx,
 
 	if (i >= TEE_NUM_SESSIONS) {
 		pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS);
+		handle_unload_ta(ta_handle);
 		kref_put(&sess->refcount, destroy_session);
 		rc = -ENOMEM;
 		goto out;
@@ -289,12 +290,13 @@ int amdtee_open_session(struct tee_context *ctx,
 		spin_lock(&sess->lock);
 		clear_bit(i, sess->sess_mask);
 		spin_unlock(&sess->lock);
+		handle_unload_ta(ta_handle);
 		kref_put(&sess->refcount, destroy_session);
 		goto out;
 	}
 
 	sess->session_info[i] = session_info;
-	set_session_id(sess->ta_handle, i, &arg->session);
+	set_session_id(ta_handle, i, &arg->session);
 out:
 	free_pages((u64)ta, get_order(ta_size));
 	return rc;
@@ -329,6 +331,7 @@ int amdtee_close_session(struct tee_context *ctx, u32 session)
 
 	/* Close the session */
 	handle_close_session(ta_handle, session_info);
+	handle_unload_ta(ta_handle);
 
 	kref_put(&sess->refcount, destroy_session);
 
-- 
GitLab


From 6a01268687c8d00e59dff341c519a337de980d2e Mon Sep 17 00:00:00 2001
From: Benjamin Moody <bmoody@member.fsf.org>
Date: Sun, 7 Feb 2021 13:47:04 -0500
Subject: [PATCH 0054/3804] HID: semitek: new driver for GK6X series keyboards

A number of USB keyboards, using the Semitek firmware, are capable of
handling arbitrary N-key rollover, but due to a buggy report
descriptor, keys beyond the sixth cannot be detected by the generic
HID driver.

There are numerous hardware variants sold by several vendors, mostly
using generic names like "GK61" for the 61-key version.  These
keyboards are sometimes known collectively as the "GK6X" series.

The keyboard has three USB interfaces.  Interface 0 uses the standard
HID boot protocol, limited to eight modifier keys and six normal keys;
interface 2 uses a custom report format that permits any number of
keys.  If more than six keys are pressed simultaneously, the first six
are reported via interface 0 while subsequent keys are reported via
interface 2.

(Interface 1 uses a custom protocol for reprogramming the keyboard;
this can be controlled through userspace tools and is not of concern
for the present driver.)

The report descriptor for interface 2, however, is incorrect (for
report ID 0x04, the input field is marked as "array" rather than
"variable".)  The descriptor appears to be correct in other respects,
so we simply replace the incorrect byte before parsing the descriptor.

Signed-off-by: Benjamin Moody <bmoody@member.fsf.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig       | 15 +++++++++++++++
 drivers/hid/Makefile      |  1 +
 drivers/hid/hid-ids.h     |  3 +++
 drivers/hid/hid-semitek.c | 40 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+)
 create mode 100644 drivers/hid/hid-semitek.c

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 4bf263c2d61a4..5756203af068d 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -922,6 +922,21 @@ config HID_SAMSUNG
 	help
 	Support for Samsung InfraRed remote control or keyboards.
 
+config HID_SEMITEK
+	tristate "Semitek USB keyboards"
+	depends on HID
+	help
+	Support for Semitek USB keyboards that are not fully compliant
+	with the HID standard.
+
+	There are many variants, including:
+	- GK61, GK64, GK68, GK84, GK96, etc.
+	- SK61, SK64, SK68, SK84, SK96, etc.
+	- Dierya DK61/DK66
+	- Tronsmart TK09R
+	- Woo-dy
+	- X-Bows Nature/Knight
+
 config HID_SONY
 	tristate "Sony PS2/3/4 accessories"
 	depends on USB_HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 193431ec4db84..1ea1a7c0b20fe 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_HID_ROCCAT)	+= hid-roccat.o hid-roccat-common.o \
 obj-$(CONFIG_HID_RMI)		+= hid-rmi.o
 obj-$(CONFIG_HID_SAITEK)	+= hid-saitek.o
 obj-$(CONFIG_HID_SAMSUNG)	+= hid-samsung.o
+obj-$(CONFIG_HID_SEMITEK)	+= hid-semitek.o
 obj-$(CONFIG_HID_SMARTJOYPLUS)	+= hid-sjoy.o
 obj-$(CONFIG_HID_SONY)		+= hid-sony.o
 obj-$(CONFIG_HID_SPEEDLINK)	+= hid-speedlink.o
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 84b8da3e7d09a..e0d8dab18a7db 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1060,6 +1060,9 @@
 #define USB_DEVICE_ID_SEMICO_USB_KEYKOARD	0x0023
 #define USB_DEVICE_ID_SEMICO_USB_KEYKOARD2	0x0027
 
+#define USB_VENDOR_ID_SEMITEK	0x1ea7
+#define USB_DEVICE_ID_SEMITEK_KEYBOARD	0x0907
+
 #define USB_VENDOR_ID_SENNHEISER	0x1395
 #define USB_DEVICE_ID_SENNHEISER_BTD500USB	0x002c
 
diff --git a/drivers/hid/hid-semitek.c b/drivers/hid/hid-semitek.c
new file mode 100644
index 0000000000000..ba6607d5e0510
--- /dev/null
+++ b/drivers/hid/hid-semitek.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  HID driver for Semitek keyboards
+ *
+ *  Copyright (c) 2021 Benjamin Moody
+ */
+
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/module.h>
+
+#include "hid-ids.h"
+
+static __u8 *semitek_report_fixup(struct hid_device *hdev, __u8 *rdesc,
+                                  unsigned int *rsize)
+{
+	/* In the report descriptor for interface 2, fix the incorrect
+	   description of report ID 0x04 (the report contains a
+	   bitmask, not an array of keycodes.) */
+	if (*rsize == 0xcb && rdesc[0x83] == 0x81 && rdesc[0x84] == 0x00) {
+		hid_info(hdev, "fixing up Semitek report descriptor\n");
+		rdesc[0x84] = 0x02;
+	}
+	return rdesc;
+}
+
+static const struct hid_device_id semitek_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SEMITEK, USB_DEVICE_ID_SEMITEK_KEYBOARD) },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, semitek_devices);
+
+static struct hid_driver semitek_driver = {
+	.name = "semitek",
+	.id_table = semitek_devices,
+	.report_fixup = semitek_report_fixup,
+};
+module_hid_driver(semitek_driver);
+
+MODULE_LICENSE("GPL");
-- 
GitLab


From 4bfb2c72b2bfca8684c2f5c25a3119bad016a9d3 Mon Sep 17 00:00:00 2001
From: Luke D Jones <luke@ljones.dev>
Date: Fri, 19 Feb 2021 10:38:46 +1300
Subject: [PATCH 0055/3804] HID: asus: Filter keyboard EC for old ROG keyboard

Older ROG keyboards emit a similar stream of bytes to the new
N-Key keyboards and require filtering to prevent a lot of
unmapped key warnings showing. As all the ROG keyboards use
QUIRK_USE_KBD_BACKLIGHT this is now used to branch to filtering
in asus_raw_event.

Signed-off-by: Luke D Jones <luke@ljones.dev>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-asus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 2ab22b9259418..1ed1c05c3d542 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -335,7 +335,7 @@ static int asus_raw_event(struct hid_device *hdev,
 	if (drvdata->quirks & QUIRK_MEDION_E1239T)
 		return asus_e1239t_event(drvdata, data, size);
 
-	if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD) {
+	if (drvdata->quirks & QUIRK_USE_KBD_BACKLIGHT) {
 		/*
 		 * Skip these report ID, the device emits a continuous stream associated
 		 * with the AURA mode it is in which looks like an 'echo'.
-- 
GitLab


From 25bdbfbb2d8331a67824dd03d0087e9c98835f3a Mon Sep 17 00:00:00 2001
From: Nirenjan Krishnan <nirenjan@gmail.com>
Date: Mon, 29 Mar 2021 09:10:02 -0700
Subject: [PATCH 0056/3804] HID: quirks: Set INCREMENT_USAGE_ON_DUPLICATE for
 Saitek X65

The Saitek X65 joystick has a pair of axes that were used as mouse
pointer controls by the Windows driver. The corresponding usage page is
the Game Controls page, which is not recognized by the generic HID
driver, and therefore, both axes get mapped to ABS_MISC. The quirk makes
the second axis get mapped to ABS_MISC+1, and therefore made available
separately.

Signed-off-by: Nirenjan Krishnan <nirenjan@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h    | 1 +
 drivers/hid/hid-quirks.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index e0d8dab18a7db..7601ec19ab289 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1051,6 +1051,7 @@
 #define USB_DEVICE_ID_SAITEK_X52	0x075c
 #define USB_DEVICE_ID_SAITEK_X52_2	0x0255
 #define USB_DEVICE_ID_SAITEK_X52_PRO	0x0762
+#define USB_DEVICE_ID_SAITEK_X65	0x0b6a
 
 #define USB_VENDOR_ID_SAMSUNG		0x0419
 #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE	0x0001
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 3dd6f15f2a67f..152c6aab11b51 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -158,6 +158,7 @@ static const struct hid_device_id hid_quirks[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52_2), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52_PRO), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X65), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD2), HID_QUIRK_NO_INIT_REPORTS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD), HID_QUIRK_NO_INIT_REPORTS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB), HID_QUIRK_NOGET },
-- 
GitLab


From ed1ab6ff213a701d4a635883c63e0d6fcbbab27d Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Fri, 2 Apr 2021 09:40:41 +0000
Subject: [PATCH 0057/3804] HID: thrustmaster: fix return value check in
 thrustmaster_probe()

Fix the return value check which testing the wrong variable
in thrustmaster_probe().

Fixes: c49c33637802 ("HID: support for initialization of some Thrustmaster wheels")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-thrustmaster.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c
index 2e452c6e8ef40..f643b1cb112d5 100644
--- a/drivers/hid/hid-thrustmaster.c
+++ b/drivers/hid/hid-thrustmaster.c
@@ -312,7 +312,7 @@ static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_i
 	}
 
 	tm_wheel->change_request = kzalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL);
-	if (!tm_wheel->model_request) {
+	if (!tm_wheel->change_request) {
 		ret = -ENOMEM;
 		goto error5;
 	}
-- 
GitLab


From ed80bdc4571fae177c44eba0997a0d551fc21e15 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 4 Apr 2021 17:40:54 +0200
Subject: [PATCH 0058/3804] HID: quirks: Add HID_QUIRK_NO_INIT_REPORTS quirk
 for Dell K15A keyboard-dock

Just like the K12A the Dell K15A keyboard-dock has problems with
get_feature requests. This sometimes leads to several
"failed to fetch feature 8" messages getting logged, after which the
touchpad may or may not work.

Just like the K15A these errors are triggered by undocking and docking
the tablet.

There also seem to be other problems when undocking and then docking again
in quick succession. It seems that in this case the keyboard-controller
still retains some power from capacitors and does not go through a
power-on-reset leaving it in a confuses state, symptoms of this are:

1. The USB-ids changing to 048d:8910
2. Failure to read the HID descriptors on the second (mouse) USB intf.
3. The touchpad freezing after a while

These problems can all be cleared by undocking the keyboard and waiting
a full minute before redocking it. Unfortunately there is nothing we can
do about this in the kernel.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h    | 1 +
 drivers/hid/hid-quirks.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 7601ec19ab289..c6a6c8f547954 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1165,6 +1165,7 @@
 #define USB_DEVICE_ID_SYNAPTICS_DELL_K12A	0x2819
 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012	0x2968
 #define USB_DEVICE_ID_SYNAPTICS_TP_V103	0x5710
+#define USB_DEVICE_ID_SYNAPTICS_DELL_K15A	0x6e21
 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002	0x73f4
 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003	0x73f5
 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5	0x81a7
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index 152c6aab11b51..fc6173a91af58 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -177,6 +177,7 @@ static const struct hid_device_id hid_quirks[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_QUAD_HD), HID_QUIRK_NO_INIT_REPORTS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_TP_V103), HID_QUIRK_NO_INIT_REPORTS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DELL_K12A), HID_QUIRK_NO_INIT_REPORTS },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DELL_K15A), HID_QUIRK_NO_INIT_REPORTS },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TOPMAX, USB_DEVICE_ID_TOPMAX_COBRAPAD), HID_QUIRK_BADPAD },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS), HID_QUIRK_MULTI_INPUT },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TPV, USB_DEVICE_ID_TPV_OPTICAL_TOUCHSCREEN_8882), HID_QUIRK_NOGET },
-- 
GitLab


From 9858c74c29e12be5886280725e781cb735b2aca6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= <mat.jonczyk@o2.pl>
Date: Tue, 6 Apr 2021 20:25:38 +0200
Subject: [PATCH 0059/3804] HID: a4tech: use A4_2WHEEL_MOUSE_HACK_B8 for A4TECH
 NB-95
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This mouse has a horizontal wheel that requires special handling.
Without this patch, the horizontal wheel acts like a vertical wheel.

In the output of `hidrd-convert` for this mouse, there is a
`Usage (B8h)` field. It corresponds to a byte in packets sent by the
device that specifies which wheel generated an input event.

The name "A4TECH" is spelled in all capitals on the company website.

Signed-off-by: Mateusz Jończyk <mat.jonczyk@o2.pl>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig      | 4 ++--
 drivers/hid/hid-a4tech.c | 2 ++
 drivers/hid/hid-ids.h    | 1 +
 drivers/hid/hid-quirks.c | 1 +
 4 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 5756203af068d..160554903ef96 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -93,11 +93,11 @@ menu "Special HID drivers"
 	depends on HID
 
 config HID_A4TECH
-	tristate "A4 tech mice"
+	tristate "A4TECH mice"
 	depends on HID
 	default !EXPERT
 	help
-	Support for A4 tech X5 and WOP-35 / Trust 450L mice.
+	Support for some A4TECH mice with two scroll wheels.
 
 config HID_ACCUTOUCH
 	tristate "Accutouch touch device"
diff --git a/drivers/hid/hid-a4tech.c b/drivers/hid/hid-a4tech.c
index 3a8c4a5971f70..2cbc32dda7f74 100644
--- a/drivers/hid/hid-a4tech.c
+++ b/drivers/hid/hid-a4tech.c
@@ -147,6 +147,8 @@ static const struct hid_device_id a4_devices[] = {
 		.driver_data = A4_2WHEEL_MOUSE_HACK_B8 },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649),
 		.driver_data = A4_2WHEEL_MOUSE_HACK_B8 },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_NB_95),
+		.driver_data = A4_2WHEEL_MOUSE_HACK_B8 },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, a4_devices);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index c6a6c8f547954..933b0ed9d3ed7 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -26,6 +26,7 @@
 #define USB_DEVICE_ID_A4TECH_WCP32PU	0x0006
 #define USB_DEVICE_ID_A4TECH_X5_005D	0x000a
 #define USB_DEVICE_ID_A4TECH_RP_649	0x001a
+#define USB_DEVICE_ID_A4TECH_NB_95	0x022b
 
 #define USB_VENDOR_ID_AASHIMA		0x06d6
 #define USB_DEVICE_ID_AASHIMA_GAMEPAD	0x0025
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index fc6173a91af58..d2933f2ffec58 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -213,6 +213,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_NB_95) },
 #endif
 #if IS_ENABLED(CONFIG_HID_ACCUTOUCH)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) },
-- 
GitLab


From 7b229b13d78d112e2c5d4a60a3c6f602289959fa Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 10 Apr 2021 19:56:05 -0700
Subject: [PATCH 0060/3804] HID: hid-input: add mapping for emoji picker key

HUTRR101 added a new usage code for a key that is supposed to invoke and
dismiss an emoji picker widget to assist users to locate and enter emojis.

This patch adds a new key definition KEY_EMOJI_PICKER and maps 0x0c/0x0d9
usage code to this new keycode. Additionally hid-debug is adjusted to
recognize this new usage code as well.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c                | 1 +
 drivers/hid/hid-input.c                | 3 +++
 include/uapi/linux/input-event-codes.h | 1 +
 3 files changed, 5 insertions(+)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 59f8d716d78f5..75a59e41724de 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -930,6 +930,7 @@ static const char *keys[KEY_MAX + 1] = {
 	[KEY_APPSELECT] = "AppSelect",
 	[KEY_SCREENSAVER] = "ScreenSaver",
 	[KEY_VOICECOMMAND] = "VoiceCommand",
+	[KEY_EMOJI_PICKER] = "EmojiPicker",
 	[KEY_BRIGHTNESS_MIN] = "BrightnessMin",
 	[KEY_BRIGHTNESS_MAX] = "BrightnessMax",
 	[KEY_BRIGHTNESS_AUTO] = "BrightnessAuto",
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 18f5e28d475cd..abbfa91e73e43 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -964,6 +964,9 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 
 		case 0x0cd: map_key_clear(KEY_PLAYPAUSE);	break;
 		case 0x0cf: map_key_clear(KEY_VOICECOMMAND);	break;
+
+		case 0x0d9: map_key_clear(KEY_EMOJI_PICKER);	break;
+
 		case 0x0e0: map_abs_clear(ABS_VOLUME);		break;
 		case 0x0e2: map_key_clear(KEY_MUTE);		break;
 		case 0x0e5: map_key_clear(KEY_BASSBOOST);	break;
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index ee93428ced9a1..225ec87d4f228 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -611,6 +611,7 @@
 #define KEY_VOICECOMMAND		0x246	/* Listening Voice Command */
 #define KEY_ASSISTANT		0x247	/* AL Context-aware desktop assistant */
 #define KEY_KBD_LAYOUT_NEXT	0x248	/* AC Next Keyboard Layout Select */
+#define KEY_EMOJI_PICKER	0x249	/* Show/hide emoji picker (HUTRR101) */
 
 #define KEY_BRIGHTNESS_MIN		0x250	/* Set Brightness to Minimum */
 #define KEY_BRIGHTNESS_MAX		0x251	/* Set Brightness to Maximum */
-- 
GitLab


From 0f0fb3d27e5ba51e40f2af4288efeaf3d293ef1a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 10 Apr 2021 19:56:06 -0700
Subject: [PATCH 0061/3804] HID: hid-debug: recognize KEY_ASSISTANT and
 KEY_KBD_LAYOUT_NEXT

Add missing descriptions for KEY_ASSISTANT and KEY_KBD_LAYOUT_NEXT.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 75a59e41724de..a311fb87b02a1 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -930,6 +930,8 @@ static const char *keys[KEY_MAX + 1] = {
 	[KEY_APPSELECT] = "AppSelect",
 	[KEY_SCREENSAVER] = "ScreenSaver",
 	[KEY_VOICECOMMAND] = "VoiceCommand",
+	[KEY_ASSISTANT] = "Assistant",
+	[KEY_KBD_LAYOUT_NEXT] = "KbdLayoutNext",
 	[KEY_EMOJI_PICKER] = "EmojiPicker",
 	[KEY_BRIGHTNESS_MIN] = "BrightnessMin",
 	[KEY_BRIGHTNESS_MAX] = "BrightnessMax",
-- 
GitLab


From b0d713c60c75cdd04bf8ad8cfb046c8530709de3 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 11 Apr 2021 13:34:02 +0200
Subject: [PATCH 0062/3804] HID: surface-hid: Fix integer endian conversion

We want to convert from 16 bit (unsigned) little endian values contained
in a packed struct to CPU native endian values here, not the other way
around. So replace cpu_to_le16() with get_unaligned_le16(), using the
latter instead of le16_to_cpu() to acknowledge that we are reading from
a packed struct.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: b05ff1002a5c ("HID: Add support for Surface Aggregator Module HID transport")
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/surface-hid/surface_hid_core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/surface-hid/surface_hid_core.c b/drivers/hid/surface-hid/surface_hid_core.c
index 7b27ec3922322..5571e74abe91b 100644
--- a/drivers/hid/surface-hid/surface_hid_core.c
+++ b/drivers/hid/surface-hid/surface_hid_core.c
@@ -168,9 +168,9 @@ int surface_hid_device_add(struct surface_hid_device *shid)
 
 	shid->hid->dev.parent = shid->dev;
 	shid->hid->bus = BUS_HOST;
-	shid->hid->vendor = cpu_to_le16(shid->attrs.vendor);
-	shid->hid->product = cpu_to_le16(shid->attrs.product);
-	shid->hid->version = cpu_to_le16(shid->hid_desc.hid_version);
+	shid->hid->vendor = get_unaligned_le16(&shid->attrs.vendor);
+	shid->hid->product = get_unaligned_le16(&shid->attrs.product);
+	shid->hid->version = get_unaligned_le16(&shid->hid_desc.hid_version);
 	shid->hid->country = shid->hid_desc.country_code;
 
 	snprintf(shid->hid->name, sizeof(shid->hid->name), "Microsoft Surface %04X:%04X",
-- 
GitLab


From b45ef5db7bf268f6851bb5395d60301338374abc Mon Sep 17 00:00:00 2001
From: Michael Zaidman <michael.zaidman@gmail.com>
Date: Tue, 13 Apr 2021 18:12:00 +0300
Subject: [PATCH 0063/3804] HID: ft260: check data size in ft260_smbus_write()

The SMbus block transaction limits the number of bytes transferred to 32,
but nothing prevents a user from specifying via ioctl a larger data size
than the ft260 can handle in a single transfer.

i2cdev_ioctl_smbus()
   --> i2c_smbus_xfer
       --> __i2c_smbus_xfer
           --> ft260_smbus_xfer
               --> ft260_smbus_write

This patch adds data size checking in the ft260_smbus_write().

Fixes: 98189a0adfa0 ("HID: ft260: add usb hid to i2c host bridge driver")
Signed-off-by: Michael Zaidman <michael.zaidman@gmail.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ft260.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c
index a5751607ce24a..7a9ba984a75ac 100644
--- a/drivers/hid/hid-ft260.c
+++ b/drivers/hid/hid-ft260.c
@@ -201,7 +201,7 @@ struct ft260_i2c_write_request_report {
 	u8 address;		/* 7-bit I2C address */
 	u8 flag;		/* I2C transaction condition */
 	u8 length;		/* data payload length */
-	u8 data[60];		/* data payload */
+	u8 data[FT260_WR_DATA_MAX]; /* data payload */
 } __packed;
 
 struct ft260_i2c_read_request_report {
@@ -429,6 +429,9 @@ static int ft260_smbus_write(struct ft260_device *dev, u8 addr, u8 cmd,
 	struct ft260_i2c_write_request_report *rep =
 		(struct ft260_i2c_write_request_report *)dev->write_buf;
 
+	if (data_len >= sizeof(rep->data))
+		return -EINVAL;
+
 	rep->address = addr;
 	rep->data[0] = cmd;
 	rep->length = data_len + 1;
-- 
GitLab


From edb032033da0dc850f6e7740fa1023c73195bc89 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 15 Apr 2021 11:52:31 -0700
Subject: [PATCH 0064/3804] HID: hid-sensor-hub: Return error for
 hid_set_field() failure

In the function sensor_hub_set_feature(), return error when hid_set_field()
fails.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sensor-hub.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index 95cf88f3bafb9..6abd3e2a9094c 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -209,16 +209,21 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 	buffer_size = buffer_size / sizeof(__s32);
 	if (buffer_size) {
 		for (i = 0; i < buffer_size; ++i) {
-			hid_set_field(report->field[field_index], i,
-				      (__force __s32)cpu_to_le32(*buf32));
+			ret = hid_set_field(report->field[field_index], i,
+					    (__force __s32)cpu_to_le32(*buf32));
+			if (ret)
+				goto done_proc;
+
 			++buf32;
 		}
 	}
 	if (remaining_bytes) {
 		value = 0;
 		memcpy(&value, (u8 *)buf32, remaining_bytes);
-		hid_set_field(report->field[field_index], i,
-			      (__force __s32)cpu_to_le32(value));
+		ret = hid_set_field(report->field[field_index], i,
+				    (__force __s32)cpu_to_le32(value));
+		if (ret)
+			goto done_proc;
 	}
 	hid_hw_request(hsdev->hdev, report, HID_REQ_SET_REPORT);
 	hid_hw_wait(hsdev->hdev);
-- 
GitLab


From c980512b4512adf2c6f9edb948ce19423b23124d Mon Sep 17 00:00:00 2001
From: Luke D Jones <luke@ljones.dev>
Date: Sun, 18 Apr 2021 21:12:29 +1200
Subject: [PATCH 0065/3804] HID: asus: filter G713/G733 key event to prevent
 shutdown

The G713 and G733 both emit an unexpected keycode on some key
presses such as Fn+Pause. The device in this case is emitting
two events on key down, and 3 on key up, the third key up event
is report ID 0x02 and is unfiltered, causing incorrect event.

This patch filters out the single problematic event.

Signed-off-by: Luke D Jones <luke@ljones.dev>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-asus.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 1ed1c05c3d542..60606c11bdaf0 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -355,6 +355,16 @@ static int asus_raw_event(struct hid_device *hdev,
 				return -1;
 			}
 		}
+		if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD) {
+			/*
+			 * G713 and G733 send these codes on some keypresses, depending on
+			 * the key pressed it can trigger a shutdown event if not caught.
+			*/
+			if(data[0] == 0x02 && data[1] == 0x30) {
+				return -1;
+			}
+		}
+
 	}
 
 	return 0;
-- 
GitLab


From 3b2520076822f15621509a6da3bc4a8636cd33b4 Mon Sep 17 00:00:00 2001
From: Saeed Mirzamohammadi <saeed.mirzamohammadi@oracle.com>
Date: Thu, 29 Apr 2021 11:50:39 -0700
Subject: [PATCH 0066/3804] HID: quirks: Add quirk for Lenovo optical mouse

The Lenovo optical mouse with vendor id of 0x17ef and product id of
0x600e experiences disconnecting issues every 55 seconds:

[38565.706242] usb 1-1.4: Product: Lenovo Optical Mouse
[38565.728603] input: Lenovo Optical Mouse as /devices/platform/scb/fd500000.pcie/pci0000:00/0000:00:00.0/0000:01:00.0/usb1/1-1/1-1.4/1-1.4:1.0/0003:17EF:600E.029A/input/input665
[38565.755949] hid-generic 0003:17EF:600E.029A: input,hidraw1: USB HID v1.11 Mouse [Lenovo Optical Mouse] on usb-0000:01:00.0-1.4/input0
[38619.360692] usb 1-1.4: USB disconnect, device number 48
[38620.864990] usb 1-1.4: new low-speed USB device number 49 using xhci_hcd
[38620.984011] usb 1-1.4: New USB device found, idVendor=17ef,idProduct=600e, bcdDevice= 1.00
[38620.998117] usb 1-1.4: New USB device strings: Mfr=0, Product=2,SerialNumber=0

This adds HID_QUIRK_ALWAYS_POLL for this device in order to work properly.

Signed-off-by: Saeed Mirzamohammadi <saeed.mirzamohammadi@oracle.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h    | 1 +
 drivers/hid/hid-quirks.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 933b0ed9d3ed7..fad61ac349b7b 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -752,6 +752,7 @@
 #define USB_DEVICE_ID_LENOVO_X1_COVER	0x6085
 #define USB_DEVICE_ID_LENOVO_X1_TAB	0x60a3
 #define USB_DEVICE_ID_LENOVO_X1_TAB3	0x60b5
+#define USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E	0x600e
 #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D	0x608d
 #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019	0x6019
 #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_602E	0x602e
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index d2933f2ffec58..51b39bda9a9d2 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -110,6 +110,7 @@ static const struct hid_device_id hid_quirks[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912), HID_QUIRK_MULTI_INPUT },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E), HID_QUIRK_ALWAYS_POLL },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019), HID_QUIRK_ALWAYS_POLL },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_602E), HID_QUIRK_ALWAYS_POLL },
-- 
GitLab


From 670a23111e720dd50b07c25437b480f1bdfecc78 Mon Sep 17 00:00:00 2001
From: Hamza Mahfooz <someguy@effective-light.com>
Date: Wed, 28 Apr 2021 20:05:14 -0400
Subject: [PATCH 0067/3804] HID: remove the unnecessary redefinition of a macro

USB_VENDOR_ID_CORSAIR is defined twice in the same file with the same
value.

Signed-off-by: Hamza Mahfooz <someguy@effective-light.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ids.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index fad61ac349b7b..b84a0a11e05bf 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -300,8 +300,6 @@
 
 #define USB_VENDOR_ID_CORSAIR		0x1b1c
 #define USB_DEVICE_ID_CORSAIR_K90	0x1b02
-
-#define USB_VENDOR_ID_CORSAIR           0x1b1c
 #define USB_DEVICE_ID_CORSAIR_K70R      0x1b09
 #define USB_DEVICE_ID_CORSAIR_K95RGB    0x1b11
 #define USB_DEVICE_ID_CORSAIR_M65RGB    0x1b12
-- 
GitLab


From a2353e3b26012ff43bcdf81d37a3eaddd7ecdbf3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?=
 <nabijaczleweli@nabijaczleweli.xyz>
Date: Mon, 8 Mar 2021 18:42:03 +0100
Subject: [PATCH 0068/3804] HID: multitouch: require Finger field to mark Win8
 reports as MT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This effectively changes collection_is_mt from
  contact ID in report->field
to
  (device is Win8 => collection is finger) && contact ID in report->field

Some devices erroneously report Pen for fingers, and Win8 stylus-on-touchscreen
devices report contact ID, but mark the accompanying touchscreen device's
collection correctly

Cc: stable@vger.kernel.org
Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-multitouch.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 9d9f3e1bd5f41..55dcb8536286b 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -604,9 +604,13 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td,
 		if (!(HID_MAIN_ITEM_VARIABLE & field->flags))
 			continue;
 
-		for (n = 0; n < field->report_count; n++) {
-			if (field->usage[n].hid == HID_DG_CONTACTID)
-				rdata->is_mt_collection = true;
+		if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) {
+			for (n = 0; n < field->report_count; n++) {
+				if (field->usage[n].hid == HID_DG_CONTACTID) {
+					rdata->is_mt_collection = true;
+					break;
+				}
+			}
 		}
 	}
 
-- 
GitLab


From bc8b796f618c3ccb0a2a8ed1e96c00a1a7849415 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?=
 <nabijaczleweli@nabijaczleweli.xyz>
Date: Mon, 8 Mar 2021 18:42:08 +0100
Subject: [PATCH 0069/3804] HID: multitouch: set Stylus suffix for
 Stylus-application devices, too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This re-adds the suffix to Win8 stylus-on-touchscreen devices,
now that they aren't erroneously marked as MT

Signed-off-by: Ahelenia Ziemiańska <nabijaczleweli@nabijaczleweli.xyz>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-multitouch.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 55dcb8536286b..eed81bdc2e869 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1580,13 +1580,13 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi)
 		/* we do not set suffix = "Touchscreen" */
 		hi->input->name = hdev->name;
 		break;
-	case HID_DG_STYLUS:
-		/* force BTN_STYLUS to allow tablet matching in udev */
-		__set_bit(BTN_STYLUS, hi->input->keybit);
-		break;
 	case HID_VD_ASUS_CUSTOM_MEDIA_KEYS:
 		suffix = "Custom Media Keys";
 		break;
+	case HID_DG_STYLUS:
+		/* force BTN_STYLUS to allow tablet matching in udev */
+		__set_bit(BTN_STYLUS, hi->input->keybit);
+		fallthrough;
 	case HID_DG_PEN:
 		suffix = "Stylus";
 		break;
-- 
GitLab


From 48e33befe61a7d407753c53d1a06fc8d6b5dab80 Mon Sep 17 00:00:00 2001
From: Mark Bolhuis <mark@bolhuis.dev>
Date: Mon, 3 May 2021 17:39:38 +0100
Subject: [PATCH 0070/3804] HID: Add BUS_VIRTUAL to hid_connect logging

Add BUS_VIRTUAL to hid_connect logging since it's a valid hid bus type and it
should not print <UNKNOWN>

Signed-off-by: Mark Bolhuis <mark@bolhuis.dev>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 0ae9f6df59d10..265cbe592374c 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2005,6 +2005,9 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 	case BUS_I2C:
 		bus = "I2C";
 		break;
+	case BUS_VIRTUAL:
+		bus = "VIRTUAL";
+		break;
 	default:
 		bus = "<UNKNOWN>";
 	}
-- 
GitLab


From 682ae59ca2876f83396ccc5674235da99beed06c Mon Sep 17 00:00:00 2001
From: Shuming Fan <shumingf@realtek.com>
Date: Tue, 4 May 2021 18:04:24 +0800
Subject: [PATCH 0071/3804] ASoC: rt711-sdca: fix the function number of SDCA
 control for feature unit 0x1E

The function number should be FUNC_NUM_MIC_ARRAY(0x2) for the feature unit 0x1E.

Fixes: ca5118c0c00f6 ('ASoC: rt711-sdca: change capture switch controls')
Signed-off-by: Shuming Fan <shumingf@realtek.com>
Link: https://lore.kernel.org/r/20210504100424.8760-1-shumingf@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt711-sdca.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c
index cc36739f7fcfb..24a084e0b48a1 100644
--- a/sound/soc/codecs/rt711-sdca.c
+++ b/sound/soc/codecs/rt711-sdca.c
@@ -683,13 +683,13 @@ static int rt711_sdca_set_fu1e_capture_ctl(struct rt711_sdca_priv *rt711)
 	ch_r = (rt711->fu1e_dapm_mute || rt711->fu1e_mixer_r_mute) ? 0x01 : 0x00;
 
 	err = regmap_write(rt711->regmap,
-			SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT711_SDCA_ENT_USER_FU1E,
+			SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT711_SDCA_ENT_USER_FU1E,
 			RT711_SDCA_CTL_FU_MUTE, CH_L), ch_l);
 	if (err < 0)
 		return err;
 
 	err = regmap_write(rt711->regmap,
-			SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT711_SDCA_ENT_USER_FU1E,
+			SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT711_SDCA_ENT_USER_FU1E,
 			RT711_SDCA_CTL_FU_MUTE, CH_R), ch_r);
 	if (err < 0)
 		return err;
-- 
GitLab


From 6be388f4a35d2ce5ef7dbf635a8964a5da7f799f Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Sun, 25 Apr 2021 23:03:53 +0530
Subject: [PATCH 0072/3804] HID: usbhid: fix info leak in hid_submit_ctrl

In hid_submit_ctrl(), the way of calculating the report length doesn't
take into account that report->size can be zero. When running the
syzkaller reproducer, a report of size 0 causes hid_submit_ctrl) to
calculate transfer_buffer_length as 16384. When this urb is passed to
the usb core layer, KMSAN reports an info leak of 16384 bytes.

To fix this, first modify hid_report_len() to account for the zero
report size case by using DIV_ROUND_UP for the division. Then, call it
from hid_submit_ctrl().

Reported-by: syzbot+7c2bb71996f95a82524c@syzkaller.appspotmail.com
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hid-core.c | 2 +-
 include/linux/hid.h           | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 86257ce6d6198..4e9077363c962 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -374,7 +374,7 @@ static int hid_submit_ctrl(struct hid_device *hid)
 	raw_report = usbhid->ctrl[usbhid->ctrltail].raw_report;
 	dir = usbhid->ctrl[usbhid->ctrltail].dir;
 
-	len = ((report->size - 1) >> 3) + 1 + (report->id > 0);
+	len = hid_report_len(report);
 	if (dir == USB_DIR_OUT) {
 		usbhid->urbctrl->pipe = usb_sndctrlpipe(hid_to_usb_dev(hid), 0);
 		usbhid->urbctrl->transfer_buffer_length = len;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 271021e20a3f8..10e922cee4ebb 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1167,8 +1167,7 @@ static inline void hid_hw_wait(struct hid_device *hdev)
  */
 static inline u32 hid_report_len(struct hid_report *report)
 {
-	/* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */
-	return ((report->size - 1) >> 3) + 1 + (report->id > 0);
+	return DIV_ROUND_UP(report->size, 8) + (report->id > 0);
 }
 
 int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
-- 
GitLab


From 9683e5775c75097c46bd24e65411b16ac6c6cbb3 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 4 May 2021 16:49:10 -0700
Subject: [PATCH 0073/3804] libbpf: Add NULL check to add_dummy_ksym_var

Avoids a segv if btf isn't present. Seen on the call path
__bpf_object__open calling bpf_object__collect_externs.

Fixes: 5bd022ec01f0 (libbpf: Support extern kernel function)
Suggested-by: Stanislav Fomichev <sdf@google.com>
Suggested-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210504234910.976501-1-irogers@google.com
---
 tools/lib/bpf/libbpf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e2a3cf4378140..c41d9b2b59ace 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3216,6 +3216,9 @@ static int add_dummy_ksym_var(struct btf *btf)
 	const struct btf_var_secinfo *vs;
 	const struct btf_type *sec;
 
+	if (!btf)
+		return 0;
+
 	sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
 					    BTF_KIND_DATASEC);
 	if (sec_btf_id < 0)
-- 
GitLab


From 3b80d106e110d39d3f678954d3b55078669cf07e Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Thu, 6 May 2021 14:43:49 +0200
Subject: [PATCH 0074/3804] samples/bpf: Consider frame size in tx_only of
 xdpsock sample

Fix the tx_only micro-benchmark in xdpsock to take frame size into
consideration. It was hardcoded to the default value of frame_size
which is 4K. Changing this on the command line to 2K made half of the
packets illegal as they were outside the umem and were therefore
discarded by the kernel.

Fixes: 46738f73ea4f ("samples/bpf: add use of need_wakeup flag in xdpsock")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210506124349.6666-1-magnus.karlsson@gmail.com
---
 samples/bpf/xdpsock_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index aa696854be787..53e300f860bb4 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -1255,7 +1255,7 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
 	for (i = 0; i < batch_size; i++) {
 		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
 								  idx + i);
-		tx_desc->addr = (*frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+		tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
 		tx_desc->len = PKT_SIZE;
 	}
 
-- 
GitLab


From d9aa6571b28ba0022de1e48801ff03a1854c7ef2 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <khsieh@codeaurora.org>
Date: Wed, 21 Apr 2021 16:37:35 -0700
Subject: [PATCH 0075/3804] drm/msm/dp: check sink_count before update
 is_connected status

Link status is different from display connected status in the case
of something like an Apple dongle where the type-c plug can be
connected, and therefore the link is connected, but no sink is
connected until an HDMI cable is plugged into the dongle.
The sink_count of DPCD of dongle will increase to 1 once an HDMI
cable is plugged into the dongle so that display connected status
will become true. This checking also apply at pm_resume.

Changes in v4:
-- none

Fixes: 94e58e2d06e3 ("drm/msm/dp: reset dp controller only at boot up and pm_resume")
Reported-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Tested-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Kuogee Hsieh <khsieh@codeaurora.org>
Fixes: 8ede2ecc3e5e ("drm/msm/dp: Add DP compliance tests on Snapdragon Chipsets")
Link: https://lore.kernel.org/r/1619048258-8717-2-git-send-email-khsieh@codeaurora.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dp/dp_display.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 5a39da6e1eaf2..0ba71c7a8dd4d 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -586,10 +586,8 @@ static int dp_connect_pending_timeout(struct dp_display_private *dp, u32 data)
 	mutex_lock(&dp->event_mutex);
 
 	state = dp->hpd_state;
-	if (state == ST_CONNECT_PENDING) {
-		dp_display_enable(dp, 0);
+	if (state == ST_CONNECT_PENDING)
 		dp->hpd_state = ST_CONNECTED;
-	}
 
 	mutex_unlock(&dp->event_mutex);
 
@@ -669,10 +667,8 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data
 	mutex_lock(&dp->event_mutex);
 
 	state =  dp->hpd_state;
-	if (state == ST_DISCONNECT_PENDING) {
-		dp_display_disable(dp, 0);
+	if (state == ST_DISCONNECT_PENDING)
 		dp->hpd_state = ST_DISCONNECTED;
-	}
 
 	mutex_unlock(&dp->event_mutex);
 
@@ -1272,7 +1268,12 @@ static int dp_pm_resume(struct device *dev)
 
 	status = dp_catalog_link_is_connected(dp->catalog);
 
-	if (status)
+	/*
+	 * can not declared display is connected unless
+	 * HDMI cable is plugged in and sink_count of
+	 * dongle become 1
+	 */
+	if (status && dp->link->sink_count)
 		dp->dp_display.is_connected = true;
 	else
 		dp->dp_display.is_connected = false;
-- 
GitLab


From f2f46b878777e0d3f885c7ddad48f477b4dea247 Mon Sep 17 00:00:00 2001
From: Kuogee Hsieh <khsieh@codeaurora.org>
Date: Wed, 21 Apr 2021 16:37:36 -0700
Subject: [PATCH 0076/3804] drm/msm/dp: initialize audio_comp when audio starts

Initialize audio_comp when audio starts and wait for audio_comp at
dp_display_disable(). This will take care of both dongle unplugged
and display off (suspend) cases.

Changes in v2:
-- add dp_display_signal_audio_start()

Changes in v3:
-- restore dp_display_handle_plugged_change() at dp_hpd_unplug_handle().

Changes in v4:
-- none

Signed-off-by: Kuogee Hsieh <khsieh@codeaurora.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Tested-by: Stephen Boyd <swboyd@chromium.org>
Fixes: c703d5789590 ("drm/msm/dp: trigger unplug event in msm_dp_display_disable")
Link: https://lore.kernel.org/r/1619048258-8717-3-git-send-email-khsieh@codeaurora.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dp/dp_audio.c   |  1 +
 drivers/gpu/drm/msm/dp/dp_display.c | 11 +++++++++--
 drivers/gpu/drm/msm/dp/dp_display.h |  1 +
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c
index 82a8673ab8daf..d7e4a39a904e2 100644
--- a/drivers/gpu/drm/msm/dp/dp_audio.c
+++ b/drivers/gpu/drm/msm/dp/dp_audio.c
@@ -527,6 +527,7 @@ int dp_audio_hw_params(struct device *dev,
 	dp_audio_setup_acr(audio);
 	dp_audio_safe_to_exit_level(audio);
 	dp_audio_enable(audio, true);
+	dp_display_signal_audio_start(dp_display);
 	dp_display->audio_enabled = true;
 
 end:
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index 0ba71c7a8dd4d..1784e119269b7 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -178,6 +178,15 @@ static int dp_del_event(struct dp_display_private *dp_priv, u32 event)
 	return 0;
 }
 
+void dp_display_signal_audio_start(struct msm_dp *dp_display)
+{
+	struct dp_display_private *dp;
+
+	dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+	reinit_completion(&dp->audio_comp);
+}
+
 void dp_display_signal_audio_complete(struct msm_dp *dp_display)
 {
 	struct dp_display_private *dp;
@@ -649,7 +658,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
 	dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND);
 
 	/* signal the disconnect event early to ensure proper teardown */
-	reinit_completion(&dp->audio_comp);
 	dp_display_handle_plugged_change(g_dp_display, false);
 
 	dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK |
@@ -894,7 +902,6 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data)
 	/* wait only if audio was enabled */
 	if (dp_display->audio_enabled) {
 		/* signal the disconnect event */
-		reinit_completion(&dp->audio_comp);
 		dp_display_handle_plugged_change(dp_display, false);
 		if (!wait_for_completion_timeout(&dp->audio_comp,
 				HZ * 5))
diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h
index 6092ba1ed85ed..5173c89eedf7e 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.h
+++ b/drivers/gpu/drm/msm/dp/dp_display.h
@@ -34,6 +34,7 @@ int dp_display_get_modes(struct msm_dp *dp_display,
 int dp_display_request_irq(struct msm_dp *dp_display);
 bool dp_display_check_video_test(struct msm_dp *dp_display);
 int dp_display_get_test_bpp(struct msm_dp *dp_display);
+void dp_display_signal_audio_start(struct msm_dp *dp_display);
 void dp_display_signal_audio_complete(struct msm_dp *dp_display);
 
 #endif /* _DP_DISPLAY_H_ */
-- 
GitLab


From 31379397dcc364a59ce764fabb131b645c43e340 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 5 May 2021 15:25:29 +0200
Subject: [PATCH 0077/3804] bpf: Forbid trampoline attach for functions with
 variable arguments

We can't currently allow to attach functions with variable arguments.
The problem is that we should save all the registers for arguments,
which is probably doable, but if caller uses more than 6 arguments,
we need stack data, which will be wrong, because of the extra stack
frame we do in bpf trampoline, so we could crash.

Also currently there's malformed trampoline code generated for such
functions at the moment as described in:

  https://lore.kernel.org/bpf/20210429212834.82621-1-jolsa@kernel.org/

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210505132529.401047-1-jolsa@kernel.org
---
 kernel/bpf/btf.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 0600ed325fa0b..f982a9f0dbc46 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5206,6 +5206,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 	m->ret_size = ret;
 
 	for (i = 0; i < nargs; i++) {
+		if (i == nargs - 1 && args[i].type == 0) {
+			bpf_log(log,
+				"The function %s with variable args is unsupported.\n",
+				tname);
+			return -EINVAL;
+		}
 		ret = __get_type_size(btf, args[i].type, &t);
 		if (ret < 0) {
 			bpf_log(log,
@@ -5213,6 +5219,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 				tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
 			return -EINVAL;
 		}
+		if (ret == 0) {
+			bpf_log(log,
+				"The function %s has malformed void argument.\n",
+				tname);
+			return -EINVAL;
+		}
 		m->arg_size[i] = ret;
 	}
 	m->nr_args = nargs;
-- 
GitLab


From 8822702f6e4c8917c83ba79e0ebf2c8c218910d4 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Fri, 7 May 2021 10:44:52 +0800
Subject: [PATCH 0078/3804] ALSA: hda/realtek: reset eapd coeff to default
 value for alc287

Ubuntu users reported an audio bug on the Lenovo Yoga Slim 7 14IIL05,
he installed dual OS (Windows + Linux), if he booted to the Linux
from Windows, the Speaker can't work well, it has crackling noise,
if he poweroff the machine first after Windows, the Speaker worked
well.

Before rebooting or shutdown from Windows, the Windows changes the
codec eapd coeff value, but the BIOS doesn't re-initialize its value,
when booting into the Linux from Windows, the eapd coeff value is not
correct. To fix it, set the codec default value to that coeff register
in the alsa driver.

BugLink: http://bugs.launchpad.net/bugs/1925057
Suggested-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Link: https://lore.kernel.org/r/20210507024452.8300-1-hui.wang@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 6d58f24c9702f..a5f3e78ec04e7 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -395,7 +395,6 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 	case 0x10ec0282:
 	case 0x10ec0283:
 	case 0x10ec0286:
-	case 0x10ec0287:
 	case 0x10ec0288:
 	case 0x10ec0285:
 	case 0x10ec0298:
@@ -406,6 +405,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 	case 0x10ec0275:
 		alc_update_coef_idx(codec, 0xe, 0, 1<<0);
 		break;
+	case 0x10ec0287:
+		alc_update_coef_idx(codec, 0x10, 1<<9, 0);
+		alc_write_coef_idx(codec, 0x8, 0x4ab7);
+		break;
 	case 0x10ec0293:
 		alc_update_coef_idx(codec, 0xa, 1<<13, 0);
 		break;
-- 
GitLab


From 8b79feffeca28c5459458fe78676b081e87c93a4 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:41 +0200
Subject: [PATCH 0079/3804] x86/kvm: Teardown PV features on boot CPU as well

Various PV features (Async PF, PV EOI, steal time) work through memory
shared with hypervisor and when we restore from hibernation we must
properly teardown all these features to make sure hypervisor doesn't
write to stale locations after we jump to the previously hibernated kernel
(which can try to place anything there). For secondary CPUs the job is
already done by kvm_cpu_down_prepare(), register syscore ops to do
the same for boot CPU.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 56 ++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index dc440bb692223..9d5f96321c7f9 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -26,6 +26,7 @@
 #include <linux/kprobes.h>
 #include <linux/nmi.h>
 #include <linux/swait.h>
+#include <linux/syscore_ops.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -451,6 +452,25 @@ static void __init sev_map_percpu_data(void)
 	}
 }
 
+static void kvm_guest_cpu_offline(void)
+{
+	kvm_disable_steal_time();
+	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+	kvm_pv_disable_apf();
+	apf_task_wake_all();
+}
+
+static int kvm_cpu_online(unsigned int cpu)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	kvm_guest_cpu_init();
+	local_irq_restore(flags);
+	return 0;
+}
+
 #ifdef CONFIG_SMP
 
 static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
@@ -635,32 +655,34 @@ static void __init kvm_smp_prepare_boot_cpu(void)
 	kvm_spinlock_init();
 }
 
-static void kvm_guest_cpu_offline(void)
+static int kvm_cpu_down_prepare(unsigned int cpu)
 {
-	kvm_disable_steal_time();
-	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
-		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
-	kvm_pv_disable_apf();
-	apf_task_wake_all();
-}
+	unsigned long flags;
 
-static int kvm_cpu_online(unsigned int cpu)
-{
-	local_irq_disable();
-	kvm_guest_cpu_init();
-	local_irq_enable();
+	local_irq_save(flags);
+	kvm_guest_cpu_offline();
+	local_irq_restore(flags);
 	return 0;
 }
 
-static int kvm_cpu_down_prepare(unsigned int cpu)
+#endif
+
+static int kvm_suspend(void)
 {
-	local_irq_disable();
 	kvm_guest_cpu_offline();
-	local_irq_enable();
+
 	return 0;
 }
 
-#endif
+static void kvm_resume(void)
+{
+	kvm_cpu_online(raw_smp_processor_id());
+}
+
+static struct syscore_ops kvm_syscore_ops = {
+	.suspend	= kvm_suspend,
+	.resume		= kvm_resume,
+};
 
 static void __init kvm_guest_init(void)
 {
@@ -704,6 +726,8 @@ static void __init kvm_guest_init(void)
 	kvm_guest_cpu_init();
 #endif
 
+	register_syscore_ops(&kvm_syscore_ops);
+
 	/*
 	 * Hard lockup detection is enabled by default. Disable it, as guests
 	 * can get false positives too easily, for example if the host is
-- 
GitLab


From c02027b5742b5aa804ef08a4a9db433295533046 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:42 +0200
Subject: [PATCH 0080/3804] x86/kvm: Disable kvmclock on all CPUs on shutdown

Currenly, we disable kvmclock from machine_shutdown() hook and this
only happens for boot CPU. We need to disable it for all CPUs to
guard against memory corruption e.g. on restore from hibernate.

Note, writing '0' to kvmclock MSR doesn't clear memory location, it
just prevents hypervisor from updating the location so for the short
while after write and while CPU is still alive, the clock remains usable
and correct so we don't need to switch to some other clocksource.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-4-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_para.h | 4 ++--
 arch/x86/kernel/kvm.c           | 1 +
 arch/x86/kernel/kvmclock.c      | 5 +----
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 3381198525126..9c56e0defd453 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -7,8 +7,6 @@
 #include <linux/interrupt.h>
 #include <uapi/asm/kvm_para.h>
 
-extern void kvmclock_init(void);
-
 #ifdef CONFIG_KVM_GUEST
 bool kvm_check_and_clear_guest_paused(void);
 #else
@@ -86,6 +84,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 }
 
 #ifdef CONFIG_KVM_GUEST
+void kvmclock_init(void);
+void kvmclock_disable(void);
 bool kvm_para_available(void);
 unsigned int kvm_arch_para_features(void);
 unsigned int kvm_arch_para_hints(void);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9d5f96321c7f9..25dd126a33250 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -459,6 +459,7 @@ static void kvm_guest_cpu_offline(void)
 		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
 	kvm_pv_disable_apf();
 	apf_task_wake_all();
+	kvmclock_disable();
 }
 
 static int kvm_cpu_online(unsigned int cpu)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d37ed4e1d0338..081686df6cd8a 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -220,11 +220,9 @@ static void kvm_crash_shutdown(struct pt_regs *regs)
 }
 #endif
 
-static void kvm_shutdown(void)
+void kvmclock_disable(void)
 {
 	native_write_msr(msr_kvm_system_time, 0, 0);
-	kvm_disable_steal_time();
-	native_machine_shutdown();
 }
 
 static void __init kvmclock_init_mem(void)
@@ -351,7 +349,6 @@ void __init kvmclock_init(void)
 #endif
 	x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
 	x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
-	machine_ops.shutdown  = kvm_shutdown;
 #ifdef CONFIG_KEXEC_CORE
 	machine_ops.crash_shutdown  = kvm_crash_shutdown;
 #endif
-- 
GitLab


From 3d6b84132d2a57b5a74100f6923a8feb679ac2ce Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:43 +0200
Subject: [PATCH 0081/3804] x86/kvm: Disable all PV features on crash

Crash shutdown handler only disables kvmclock and steal time, other PV
features remain active so we risk corrupting memory or getting some
side-effects in kdump kernel. Move crash handler to kvm.c and unify
with CPU offline.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-5-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_para.h |  6 -----
 arch/x86/kernel/kvm.c           | 44 ++++++++++++++++++++++++---------
 arch/x86/kernel/kvmclock.c      | 21 ----------------
 3 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 9c56e0defd453..69299878b200a 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,6 @@ unsigned int kvm_arch_para_hints(void);
 void kvm_async_pf_task_wait_schedule(u32 token);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_apf_flags(void);
-void kvm_disable_steal_time(void);
 bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
 
 DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
 	return 0;
 }
 
-static inline void kvm_disable_steal_time(void)
-{
-	return;
-}
-
 static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
 	return false;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 25dd126a33250..8eb91dc0f5a87 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -38,6 +38,7 @@
 #include <asm/tlb.h>
 #include <asm/cpuidle_haltpoll.h>
 #include <asm/ptrace.h>
+#include <asm/reboot.h>
 #include <asm/svm.h>
 
 DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -375,6 +376,14 @@ static void kvm_pv_disable_apf(void)
 	pr_info("disable async PF for cpu %d\n", smp_processor_id());
 }
 
+static void kvm_disable_steal_time(void)
+{
+	if (!has_steal_clock)
+		return;
+
+	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+}
+
 static void kvm_pv_guest_cpu_reboot(void *unused)
 {
 	/*
@@ -417,14 +426,6 @@ static u64 kvm_steal_clock(int cpu)
 	return steal;
 }
 
-void kvm_disable_steal_time(void)
-{
-	if (!has_steal_clock)
-		return;
-
-	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
-}
-
 static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
 {
 	early_set_memory_decrypted((unsigned long) ptr, size);
@@ -452,13 +453,14 @@ static void __init sev_map_percpu_data(void)
 	}
 }
 
-static void kvm_guest_cpu_offline(void)
+static void kvm_guest_cpu_offline(bool shutdown)
 {
 	kvm_disable_steal_time();
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
 		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
 	kvm_pv_disable_apf();
-	apf_task_wake_all();
+	if (!shutdown)
+		apf_task_wake_all();
 	kvmclock_disable();
 }
 
@@ -661,7 +663,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	kvm_guest_cpu_offline();
+	kvm_guest_cpu_offline(false);
 	local_irq_restore(flags);
 	return 0;
 }
@@ -670,7 +672,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
 
 static int kvm_suspend(void)
 {
-	kvm_guest_cpu_offline();
+	kvm_guest_cpu_offline(false);
 
 	return 0;
 }
@@ -685,6 +687,20 @@ static struct syscore_ops kvm_syscore_ops = {
 	.resume		= kvm_resume,
 };
 
+/*
+ * After a PV feature is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written.
+ */
+#ifdef CONFIG_KEXEC_CORE
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+	kvm_guest_cpu_offline(true);
+	native_machine_crash_shutdown(regs);
+}
+#endif
+
 static void __init kvm_guest_init(void)
 {
 	int i;
@@ -727,6 +743,10 @@ static void __init kvm_guest_init(void)
 	kvm_guest_cpu_init();
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+	machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+
 	register_syscore_ops(&kvm_syscore_ops);
 
 	/*
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 081686df6cd8a..ad273e5861c1b 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -20,7 +20,6 @@
 #include <asm/hypervisor.h>
 #include <asm/mem_encrypt.h>
 #include <asm/x86_init.h>
-#include <asm/reboot.h>
 #include <asm/kvmclock.h>
 
 static int kvmclock __initdata = 1;
@@ -203,23 +202,6 @@ static void kvm_setup_secondary_clock(void)
 }
 #endif
 
-/*
- * After the clock is registered, the host will keep writing to the
- * registered memory location. If the guest happens to shutdown, this memory
- * won't be valid. In cases like kexec, in which you install a new kernel, this
- * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writing anything
- * that does not have the 'enable' bit set in the msr
- */
-#ifdef CONFIG_KEXEC_CORE
-static void kvm_crash_shutdown(struct pt_regs *regs)
-{
-	native_write_msr(msr_kvm_system_time, 0, 0);
-	kvm_disable_steal_time();
-	native_machine_crash_shutdown(regs);
-}
-#endif
-
 void kvmclock_disable(void)
 {
 	native_write_msr(msr_kvm_system_time, 0, 0);
@@ -349,9 +331,6 @@ void __init kvmclock_init(void)
 #endif
 	x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
 	x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
-#ifdef CONFIG_KEXEC_CORE
-	machine_ops.crash_shutdown  = kvm_crash_shutdown;
-#endif
 	kvm_get_preset_lpj();
 
 	/*
-- 
GitLab


From 384fc672f528d3b84eacd9a86ecf35df3363b8ba Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 14 Apr 2021 14:35:44 +0200
Subject: [PATCH 0082/3804] x86/kvm: Unify kvm_pv_guest_cpu_reboot() with
 kvm_guest_cpu_offline()

Simplify the code by making PV features shutdown happen in one place.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210414123544.1060604-6-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 42 +++++++++++++++++-------------------------
 1 file changed, 17 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8eb91dc0f5a87..a26643dc6bd63 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -384,31 +384,6 @@ static void kvm_disable_steal_time(void)
 	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
 }
 
-static void kvm_pv_guest_cpu_reboot(void *unused)
-{
-	/*
-	 * We disable PV EOI before we load a new kernel by kexec,
-	 * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
-	 * New kernel can re-enable when it boots.
-	 */
-	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
-		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
-	kvm_pv_disable_apf();
-	kvm_disable_steal_time();
-}
-
-static int kvm_pv_reboot_notify(struct notifier_block *nb,
-				unsigned long code, void *unused)
-{
-	if (code == SYS_RESTART)
-		on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block kvm_pv_reboot_nb = {
-	.notifier_call = kvm_pv_reboot_notify,
-};
-
 static u64 kvm_steal_clock(int cpu)
 {
 	u64 steal;
@@ -687,6 +662,23 @@ static struct syscore_ops kvm_syscore_ops = {
 	.resume		= kvm_resume,
 };
 
+static void kvm_pv_guest_cpu_reboot(void *unused)
+{
+	kvm_guest_cpu_offline(true);
+}
+
+static int kvm_pv_reboot_notify(struct notifier_block *nb,
+				unsigned long code, void *unused)
+{
+	if (code == SYS_RESTART)
+		on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kvm_pv_reboot_nb = {
+	.notifier_call = kvm_pv_reboot_notify,
+};
+
 /*
  * After a PV feature is registered, the host will keep writing to the
  * registered memory location. If the guest happens to shutdown, this memory
-- 
GitLab


From 46a63924b05f335b0765ad13dae4d2d7569f25c9 Mon Sep 17 00:00:00 2001
From: Siddharth Chandrasekaran <sidcha@amazon.de>
Date: Mon, 3 May 2021 14:00:58 +0200
Subject: [PATCH 0083/3804] doc/kvm: Fix wrong entry for KVM_CAP_X86_MSR_FILTER

The capability that exposes new ioctl KVM_X86_SET_MSR_FILTER to
userspace is specified incorrectly as the ioctl itself (instead of
KVM_CAP_X86_MSR_FILTER). This patch fixes it.

Fixes: 1a155254ff93 ("KVM: x86: Introduce MSR filtering")
Reviewed-by: Alexander Graf <graf@amazon.de>
Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Message-Id: <20210503120059.9283-1-sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 22d0775621496..7fcb2fd38f42e 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY
 4.126 KVM_X86_SET_MSR_FILTER
 ----------------------------
 
-:Capability: KVM_X86_SET_MSR_FILTER
+:Capability: KVM_CAP_X86_MSR_FILTER
 :Architectures: x86
 :Type: vm ioctl
 :Parameters: struct kvm_msr_filter
@@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
 instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
 KVM_EXIT_X86_WRMSR exit notifications.
 
-8.27 KVM_X86_SET_MSR_FILTER
+8.27 KVM_CAP_X86_MSR_FILTER
 ---------------------------
 
 :Architectures: x86
-- 
GitLab


From f5c7e8425f18fdb9bdb7d13340651d7876890329 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 3 May 2021 17:08:51 +0200
Subject: [PATCH 0084/3804] KVM: nVMX: Always make an attempt to map eVMCS
 after migration

When enlightened VMCS is in use and nested state is migrated with
vmx_get_nested_state()/vmx_set_nested_state() KVM can't map evmcs
page right away: evmcs gpa is not 'struct kvm_vmx_nested_state_hdr'
and we can't read it from VP assist page because userspace may decide
to restore HV_X64_MSR_VP_ASSIST_PAGE after restoring nested state
(and QEMU, for example, does exactly that). To make sure eVMCS is
mapped /vmx_set_nested_state() raises KVM_REQ_GET_NESTED_STATE_PAGES
request.

Commit f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES
on nested vmexit") added KVM_REQ_GET_NESTED_STATE_PAGES clearing to
nested_vmx_vmexit() to make sure MSR permission bitmap is not switched
when an immediate exit from L2 to L1 happens right after migration (caused
by a pending event, for example). Unfortunately, in the exact same
situation we still need to have eVMCS mapped so
nested_sync_vmcs12_to_shadow() reflects changes in VMCS12 to eVMCS.

As a band-aid, restore nested_get_evmcs_page() when clearing
KVM_REQ_GET_NESTED_STATE_PAGES in nested_vmx_vmexit(). The 'fix' is far
from being ideal as we can't easily propagate possible failures and even if
we could, this is most likely already too late to do so. The whole
'KVM_REQ_GET_NESTED_STATE_PAGES' idea for mapping eVMCS after migration
seems to be fragile as we diverge too much from the 'native' path when
vmptr loading happens on vmx_set_nested_state().

Fixes: f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210503150854.1144255-2-vkuznets@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bced766378232..6058a65a6ede6 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3098,15 +3098,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
 			nested_vmx_handle_enlightened_vmptrld(vcpu, false);
 
 		if (evmptrld_status == EVMPTRLD_VMFAIL ||
-		    evmptrld_status == EVMPTRLD_ERROR) {
-			pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
-					     __func__);
-			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-			vcpu->run->internal.suberror =
-				KVM_INTERNAL_ERROR_EMULATION;
-			vcpu->run->internal.ndata = 0;
+		    evmptrld_status == EVMPTRLD_ERROR)
 			return false;
-		}
 	}
 
 	return true;
@@ -3194,8 +3187,16 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 
 static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
 {
-	if (!nested_get_evmcs_page(vcpu))
+	if (!nested_get_evmcs_page(vcpu)) {
+		pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
+				     __func__);
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror =
+			KVM_INTERNAL_ERROR_EMULATION;
+		vcpu->run->internal.ndata = 0;
+
 		return false;
+	}
 
 	if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
 		return false;
@@ -4435,7 +4436,15 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 	/* Similarly, triple faults in L2 should never escape. */
 	WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
 
-	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+	if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
+		/*
+		 * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
+		 * Enlightened VMCS after migration and we still need to
+		 * do that when something is forcing L2->L1 exit prior to
+		 * the first L2 run.
+		 */
+		(void)nested_get_evmcs_page(vcpu);
+	}
 
 	/* Service the TLB flush request for L2 before switching to L1. */
 	if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
-- 
GitLab


From 32d1b3ab588c1231dbfa9eb08819c50529ce77d7 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 5 May 2021 17:18:21 +0200
Subject: [PATCH 0085/3804] KVM: selftests: evmcs_test: Check that VMLAUNCH
 with bogus EVMPTR is causing #UD

'run->exit_reason == KVM_EXIT_SHUTDOWN' check is not ideal as we may be
getting some unexpected exception. Directly check for #UD instead.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210505151823.1341678-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/x86_64/evmcs_test.c | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index ca22ee6d19cbd..b01f64ac6ce30 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -19,6 +19,14 @@
 
 #define VCPU_ID		5
 
+static int ud_count;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+	ud_count++;
+	regs->rip += 3; /* VMLAUNCH */
+}
+
 void l2_guest_code(void)
 {
 	GUEST_SYNC(7);
@@ -71,11 +79,11 @@ void guest_code(struct vmx_pages *vmx_pages)
 	if (vmx_pages)
 		l1_guest_code(vmx_pages);
 
-	GUEST_DONE();
-
 	/* Try enlightened vmptrld with an incorrect GPA */
 	evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
 	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(ud_count == 1);
+	GUEST_DONE();
 }
 
 int main(int argc, char *argv[])
@@ -109,6 +117,10 @@ int main(int argc, char *argv[])
 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
 	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+
 	for (stage = 1;; stage++) {
 		_vcpu_run(vm, VCPU_ID);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -124,7 +136,7 @@ int main(int argc, char *argv[])
 		case UCALL_SYNC:
 			break;
 		case UCALL_DONE:
-			goto part1_done;
+			goto done;
 		default:
 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
@@ -156,10 +168,6 @@ int main(int argc, char *argv[])
 			    (ulong) regs2.rdi, (ulong) regs2.rsi);
 	}
 
-part1_done:
-	_vcpu_run(vm, VCPU_ID);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
-		    "Unexpected successful VMEnter with invalid eVMCS pointer!");
-
+done:
 	kvm_vm_free(vm);
 }
-- 
GitLab


From c9ecafaf0113a305f5085ceb9c7a4b64ca70eae9 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 5 May 2021 17:18:22 +0200
Subject: [PATCH 0086/3804] KVM: selftests: evmcs_test: Check that VMCS12 is
 alway properly synced to eVMCS after restore

Add a test for the regression, introduced by commit f2c7ef3ba955
("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit"). When
L2->L1 exit is forced immediately after restoring nested state,
KVM_REQ_GET_NESTED_STATE_PAGES request is cleared and VMCS12 changes
(e.g. fresh RIP) are not reflected to eVMCS. The consequent nested
vCPU run gets broken.

Utilize NMI injection to do the job.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210505151823.1341678-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/x86_64/evmcs_test.c | 66 +++++++++++++++----
 1 file changed, 55 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index b01f64ac6ce30..63096cea26c61 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -18,30 +18,52 @@
 #include "vmx.h"
 
 #define VCPU_ID		5
+#define NMI_VECTOR	2
 
 static int ud_count;
 
+void enable_x2apic(void)
+{
+	uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
+
+	wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+	      MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+	wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
+}
+
 static void guest_ud_handler(struct ex_regs *regs)
 {
 	ud_count++;
 	regs->rip += 3; /* VMLAUNCH */
 }
 
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
+
 void l2_guest_code(void)
 {
 	GUEST_SYNC(7);
 
 	GUEST_SYNC(8);
 
+	/* Forced exit to L1 upon restore */
+	GUEST_SYNC(9);
+
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
 }
 
-void l1_guest_code(struct vmx_pages *vmx_pages)
+void guest_code(struct vmx_pages *vmx_pages)
 {
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
+	enable_x2apic();
+
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
 	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
 
 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
@@ -63,21 +85,22 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
 	current_evmcs->revision_id = EVMCS_VERSION;
 	GUEST_SYNC(6);
 
+	current_evmcs->pin_based_vm_exec_control |=
+		PIN_BASED_NMI_EXITING;
 	GUEST_ASSERT(!vmlaunch());
 	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-	GUEST_SYNC(9);
+
+	/*
+	 * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+	 * up-to-date (RIP points where it should and not at the beginning
+	 * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+	 */
 	GUEST_ASSERT(!vmresume());
-	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
-	GUEST_SYNC(10);
-}
 
-void guest_code(struct vmx_pages *vmx_pages)
-{
-	GUEST_SYNC(1);
-	GUEST_SYNC(2);
+	GUEST_SYNC(10);
 
-	if (vmx_pages)
-		l1_guest_code(vmx_pages);
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+	GUEST_SYNC(11);
 
 	/* Try enlightened vmptrld with an incorrect GPA */
 	evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
@@ -86,6 +109,18 @@ void guest_code(struct vmx_pages *vmx_pages)
 	GUEST_DONE();
 }
 
+void inject_nmi(struct kvm_vm *vm)
+{
+	struct kvm_vcpu_events events;
+
+	vcpu_events_get(vm, VCPU_ID, &events);
+
+	events.nmi.pending = 1;
+	events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
+
+	vcpu_events_set(vm, VCPU_ID, &events);
+}
+
 int main(int argc, char *argv[])
 {
 	vm_vaddr_t vmx_pages_gva = 0;
@@ -120,6 +155,9 @@ int main(int argc, char *argv[])
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vm, VCPU_ID);
 	vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+	vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
+
+	pr_info("Running L1 which uses EVMCS to run L2\n");
 
 	for (stage = 1;; stage++) {
 		_vcpu_run(vm, VCPU_ID);
@@ -166,6 +204,12 @@ int main(int argc, char *argv[])
 		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
 			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
 			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+
+		/* Force immediate L2->L1 exit before resuming */
+		if (stage == 8) {
+			pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+			inject_nmi(vm);
+		}
 	}
 
 done:
-- 
GitLab


From 70f094f4f01dc4d6f78ac6407f85627293a6553c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 3 May 2021 17:08:52 +0200
Subject: [PATCH 0087/3804] KVM: nVMX: Properly pad 'struct
 kvm_vmx_nested_state_hdr'

Eliminate the probably unwanted hole in 'struct kvm_vmx_nested_state_hdr':

Pre-patch:
struct kvm_vmx_nested_state_hdr {
        __u64                      vmxon_pa;             /*     0     8 */
        __u64                      vmcs12_pa;            /*     8     8 */
        struct {
                __u16              flags;                /*    16     2 */
        } smm;                                           /*    16     2 */

        /* XXX 2 bytes hole, try to pack */

        __u32                      flags;                /*    20     4 */
        __u64                      preemption_timer_deadline; /*    24     8 */
};

Post-patch:
struct kvm_vmx_nested_state_hdr {
        __u64                      vmxon_pa;             /*     0     8 */
        __u64                      vmcs12_pa;            /*     8     8 */
        struct {
                __u16              flags;                /*    16     2 */
        } smm;                                           /*    16     2 */
        __u16                      pad;                  /*    18     2 */
        __u32                      flags;                /*    20     4 */
        __u64                      preemption_timer_deadline; /*    24     8 */
};

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210503150854.1144255-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5a3022c8af82b..0662f644aad9d 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
 		__u16 flags;
 	} smm;
 
+	__u16 pad;
+
 	__u32 flags;
 	__u64 preemption_timer_deadline;
 };
-- 
GitLab


From 5f443e424efab56baa8021da04878f88eb0815d4 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Thu, 10 Dec 2020 17:23:17 -0800
Subject: [PATCH 0088/3804] selftests: kvm: remove reassignment of non-absolute
 variables

Clang's integrated assembler does not allow symbols with non-absolute
values to be reassigned. Modify the interrupt entry loop macro to be
compatible with IAS by using a label and an offset.

Cc: Jian Cai <caij2003@gmail.com>
Signed-off-by: Bill Wendling <morbo@google.com>
References: https://lore.kernel.org/lkml/20200714233024.1789985-1-caij2003@gmail.com/
Message-Id: <20201211012317.3722214-1-morbo@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/x86_64/handlers.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
index aaf7bc7d2ce18..7629819734afd 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S
+++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
@@ -54,9 +54,9 @@ idt_handlers:
 	.align 8
 
 	/* Fetch current address and append it to idt_handlers. */
-	current_handler = .
+666 :
 .pushsection .rodata
-.quad current_handler
+	.quad 666b
 .popsection
 
 	.if ! \has_error
-- 
GitLab


From aca352886ebdd675b5131ed4c83bf5477eee5d72 Mon Sep 17 00:00:00 2001
From: Siddharth Chandrasekaran <sidcha@amazon.de>
Date: Mon, 3 May 2021 14:21:11 +0200
Subject: [PATCH 0089/3804] KVM: x86: Hoist input checks in
 kvm_add_msr_filter()

In ioctl KVM_X86_SET_MSR_FILTER, input from user space is validated
after a memdup_user(). For invalid inputs we'd memdup and then call
kfree unnecessarily. Hoist input validation to avoid kfree altogether.

Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Message-Id: <20210503122111.13775-1-sidcha@amazon.de>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cebdaa1e3cf5b..102f116d9bb40 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5468,14 +5468,18 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
 static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
 			      struct kvm_msr_filter_range *user_range)
 {
-	struct msr_bitmap_range range;
 	unsigned long *bitmap = NULL;
 	size_t bitmap_size;
-	int r;
 
 	if (!user_range->nmsrs)
 		return 0;
 
+	if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
+		return -EINVAL;
+
+	if (!user_range->flags)
+		return -EINVAL;
+
 	bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
 	if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
 		return -EINVAL;
@@ -5484,31 +5488,15 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
 	if (IS_ERR(bitmap))
 		return PTR_ERR(bitmap);
 
-	range = (struct msr_bitmap_range) {
+	msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
 		.flags = user_range->flags,
 		.base = user_range->base,
 		.nmsrs = user_range->nmsrs,
 		.bitmap = bitmap,
 	};
 
-	if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {
-		r = -EINVAL;
-		goto err;
-	}
-
-	if (!range.flags) {
-		r = -EINVAL;
-		goto err;
-	}
-
-	/* Everything ok, add this range identifier. */
-	msr_filter->ranges[msr_filter->count] = range;
 	msr_filter->count++;
-
 	return 0;
-err:
-	kfree(bitmap);
-	return r;
 }
 
 static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
-- 
GitLab


From 063ab16c14db5a2ef52d54d0475b7fed19c982d7 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 4 May 2021 17:39:35 +0300
Subject: [PATCH 0090/3804] KVM: nSVM: always restore the L1's GIF on migration

While usually the L1's GIF is set while L2 runs, and usually
migration nested state is loaded after a vCPU reset which
also sets L1's GIF to true, this is not guaranteed.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210504143936.1644378-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 32400cba608d4..b331446f67f3b 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1314,6 +1314,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 	else
 		svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
 
+	svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+
 	svm->nested.nested_run_pending =
 		!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
 
-- 
GitLab


From 809c79137a192d7e881a517f803ebbf96305f066 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 4 May 2021 17:39:36 +0300
Subject: [PATCH 0091/3804] KVM: nSVM: remove a warning about vmcb01 VM exit
 reason

While in most cases, when returning to use the VMCB01,
the exit reason stored in it will be SVM_EXIT_VMRUN,
on first VM exit after a nested migration this field
can contain anything since the VM entry did happen
before the migration.

Remove this warning to avoid the false positive.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210504143936.1644378-3-mlevitsk@redhat.com>
Fixes: 9a7de6ecc3ed ("KVM: nSVM: If VMRUN is single-stepped, queue the #DB intercept in nested_svm_vmexit()")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index b331446f67f3b..5e8d8443154e8 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 	nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
 
 	svm_switch_vmcb(svm, &svm->vmcb01);
-	WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);
 
 	/*
 	 * On vmexit the  GIF is set to false and
-- 
GitLab


From 8aec21c04caa2000f91cf8822ae0811e4b0c3971 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:20 -0700
Subject: [PATCH 0092/3804] KVM: VMX: Do not advertise RDPID if ENABLE_RDTSCP
 control is unsupported

Clear KVM's RDPID capability if the ENABLE_RDTSCP secondary exec control is
unsupported.  Despite being enumerated in a separate CPUID flag, RDPID is
bundled under the same VMCS control as RDTSCP and will #UD in VMX non-root
if ENABLE_RDTSCP is not enabled.

Fixes: 41cd02c6f7f6 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-2-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cbe0cdade38a5..46573b8626385 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7377,9 +7377,11 @@ static __init void vmx_set_cpu_caps(void)
 	if (!cpu_has_vmx_xsaves())
 		kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
 
-	/* CPUID 0x80000001 */
-	if (!cpu_has_vmx_rdtscp())
+	/* CPUID 0x80000001 and 0x7 (RDPID) */
+	if (!cpu_has_vmx_rdtscp()) {
 		kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+		kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+	}
 
 	if (cpu_has_vmx_waitpkg())
 		kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
-- 
GitLab


From 85d0011264da24be08ae907d7f29983a597ca9b1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:21 -0700
Subject: [PATCH 0093/3804] KVM: x86: Emulate RDPID only if RDTSCP is supported

Do not advertise emulation support for RDPID if RDTSCP is unsupported.
RDPID emulation subtly relies on MSR_TSC_AUX to exist in hardware, as
both vmx_get_msr() and svm_get_msr() will return an error if the MSR is
unsupported, i.e. ctxt->ops->get_msr() will fail and the emulator will
inject a #UD.

Note, RDPID emulation also relies on RDTSCP being enabled in the guest,
but this is a KVM bug and will eventually be fixed.

Fixes: fb6d4d340e05 ("KVM: x86: emulate RDPID")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-3-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 19606a3418889..c0e8c5e921898 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -637,7 +637,8 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
 	case 7:
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		entry->eax = 0;
-		entry->ecx = F(RDPID);
+		if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+			entry->ecx = F(RDPID);
 		++array->nent;
 	default:
 		break;
-- 
GitLab


From 3b195ac9260235624b1c18f7bdaef184479c1d41 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:22 -0700
Subject: [PATCH 0094/3804] KVM: SVM: Inject #UD on RDTSCP when it should be
 disabled in the guest

Intercept RDTSCP to inject #UD if RDTSC is disabled in the guest.

Note, SVM does not support intercepting RDPID.  Unlike VMX's
ENABLE_RDTSCP control, RDTSCP interception does not apply to RDPID.  This
is a benign virtualization hole as the host kernel (incorrectly) sets
MSR_TSC_AUX if RDTSCP is supported, and KVM loads the guest's MSR_TSC_AUX
into hardware if RDTSCP is supported in the host, i.e. KVM will not leak
the host's MSR_TSC_AUX to the guest.

But, when the kernel bug is fixed, KVM will start leaking the host's
MSR_TSC_AUX if RDPID is supported in hardware, but RDTSCP isn't available
for whatever reason.  This leak will be remedied in a future commit.

Fixes: 46896c73c1a4 ("KVM: svm: add support for RDTSCP")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-4-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index be5cf612ab1fe..ebcb5849d69b1 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1100,7 +1100,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 	return svm->vmcb->control.tsc_offset;
 }
 
-static void svm_check_invpcid(struct vcpu_svm *svm)
+/* Evaluate instruction intercepts that depend on guest CPUID features. */
+static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
+					      struct vcpu_svm *svm)
 {
 	/*
 	 * Intercept INVPCID if shadow paging is enabled to sync/free shadow
@@ -1113,6 +1115,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm)
 		else
 			svm_clr_intercept(svm, INTERCEPT_INVPCID);
 	}
+
+	if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
+		if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+			svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+		else
+			svm_set_intercept(svm, INTERCEPT_RDTSCP);
+	}
 }
 
 static void init_vmcb(struct kvm_vcpu *vcpu)
@@ -1248,7 +1257,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 		svm_clr_intercept(svm, INTERCEPT_PAUSE);
 	}
 
-	svm_check_invpcid(svm);
+	svm_recalc_instruction_intercepts(vcpu, svm);
 
 	/*
 	 * If the host supports V_SPEC_CTRL then disable the interception
@@ -3084,6 +3093,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[SVM_EXIT_STGI]				= stgi_interception,
 	[SVM_EXIT_CLGI]				= clgi_interception,
 	[SVM_EXIT_SKINIT]			= skinit_interception,
+	[SVM_EXIT_RDTSCP]			= kvm_handle_invalid_op,
 	[SVM_EXIT_WBINVD]                       = kvm_emulate_wbinvd,
 	[SVM_EXIT_MONITOR]			= kvm_emulate_monitor,
 	[SVM_EXIT_MWAIT]			= kvm_emulate_mwait,
@@ -4007,8 +4017,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
 			     guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
 
-	/* Check again if INVPCID interception if required */
-	svm_check_invpcid(svm);
+	svm_recalc_instruction_intercepts(vcpu, svm);
 
 	/* For sev guests, the memory encryption bit is not reserved in CR3.  */
 	if (sev_guest(vcpu->kvm)) {
-- 
GitLab


From 2183de4161b90bd3851ccd3910c87b2c9adfc6ed Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:23 -0700
Subject: [PATCH 0095/3804] KVM: x86: Move RDPID emulation intercept to its own
 enum

Add a dedicated intercept enum for RDPID instead of piggybacking RDTSCP.
Unlike VMX's ENABLE_RDTSCP, RDPID is not bound to SVM's RDTSCP intercept.

Fixes: fb6d4d340e05 ("KVM: x86: emulate RDPID")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-5-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c     | 2 +-
 arch/x86/kvm/kvm_emulate.h | 1 +
 arch/x86/kvm/vmx/vmx.c     | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 77e1c89a95a7f..8a0ccdb560766 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4502,7 +4502,7 @@ static const struct opcode group8[] = {
  * from the register case of group9.
  */
 static const struct gprefix pfx_0f_c7_7 = {
-	N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
+	N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
 };
 
 
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 0d359115429ad..f016838faedd6 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -468,6 +468,7 @@ enum x86_intercept {
 	x86_intercept_clgi,
 	x86_intercept_skinit,
 	x86_intercept_rdtscp,
+	x86_intercept_rdpid,
 	x86_intercept_icebp,
 	x86_intercept_wbinvd,
 	x86_intercept_monitor,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 46573b8626385..4a625c7482756 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7437,8 +7437,9 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
 	/*
 	 * RDPID causes #UD if disabled through secondary execution controls.
 	 * Because it is marked as EmulateOnUD, we need to intercept it here.
+	 * Note, RDPID is hidden behind ENABLE_RDTSCP.
 	 */
-	case x86_intercept_rdtscp:
+	case x86_intercept_rdpid:
 		if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
 			exception->vector = UD_VECTOR;
 			exception->error_code_valid = false;
-- 
GitLab


From 5104d7ffcf24749939bea7fdb5378d186473f890 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:24 -0700
Subject: [PATCH 0096/3804] KVM: VMX: Disable preemption when probing user
 return MSRs

Disable preemption when probing a user return MSR via RDSMR/WRMSR.  If
the MSR holds a different value per logical CPU, the WRMSR could corrupt
the host's value if KVM is preempted between the RDMSR and WRMSR, and
then rescheduled on a different CPU.

Opportunistically land the helper in common x86, SVM will use the helper
in a future commit.

Fixes: 4be534102624 ("KVM: VMX: Initialize vmx->guest_msrs[] right after allocation")
Cc: stable@vger.kernel.org
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-6-seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/vmx.c          |  5 +----
 arch/x86/kvm/x86.c              | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 848956bb3cf1d..e8dcbc632cf8e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1777,6 +1777,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 		    unsigned long icr, int op_64_bit);
 
 void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_probe_user_return_msr(u32 msr);
 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
 
 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4a625c7482756..11ff9c3d95d53 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6914,12 +6914,9 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
 		u32 index = vmx_uret_msrs_list[i];
-		u32 data_low, data_high;
 		int j = vmx->nr_uret_msrs;
 
-		if (rdmsr_safe(index, &data_low, &data_high) < 0)
-			continue;
-		if (wrmsr_safe(index, data_low, data_high) < 0)
+		if (kvm_probe_user_return_msr(index))
 			continue;
 
 		vmx->guest_uret_msrs[j].slot = i;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 102f116d9bb40..bd90c73c37b4d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -339,6 +339,22 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
 	}
 }
 
+int kvm_probe_user_return_msr(u32 msr)
+{
+	u64 val;
+	int ret;
+
+	preempt_disable();
+	ret = rdmsrl_safe(msr, &val);
+	if (ret)
+		goto out;
+	ret = wrmsrl_safe(msr, val);
+out:
+	preempt_enable();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr);
+
 void kvm_define_user_return_msr(unsigned slot, u32 msr)
 {
 	BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
-- 
GitLab


From 0caa0a77c2f6fcd0830cdcd018db1af98fe35e28 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:25 -0700
Subject: [PATCH 0097/3804] KVM: SVM: Probe and load MSR_TSC_AUX regardless of
 RDTSCP support in host

Probe MSR_TSC_AUX whether or not RDTSCP is supported in the host, and
if probing succeeds, load the guest's MSR_TSC_AUX into hardware prior to
VMRUN.  Because SVM doesn't support interception of RDPID, RDPID cannot
be disallowed in the guest (without resorting to binary translation).
Leaving the host's MSR_TSC_AUX in hardware would leak the host's value to
the guest if RDTSCP is not supported.

Note, there is also a kernel bug that prevents leaking the host's value.
The host kernel initializes MSR_TSC_AUX if and only if RDTSCP is
supported, even though the vDSO usage consumes MSR_TSC_AUX via RDPID.
I.e. if RDTSCP is not supported, there is no host value to leak.  But,
if/when the host kernel bug is fixed, KVM would start leaking MSR_TSC_AUX
in the case where hardware supports RDPID but RDTSCP is unavailable for
whatever reason.

Probing MSR_TSC_AUX will also allow consolidating the probe and define
logic in common x86, and will make it simpler to condition the existence
of MSR_TSX_AUX (from the guest's perspective) on RDTSCP *or* RDPID.

Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ebcb5849d69b1..13e4dd128177d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
  * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
  * defer the restoration of TSC_AUX until the CPU returns to userspace.
  */
-#define TSC_AUX_URET_SLOT	0
+static int tsc_aux_uret_slot __read_mostly = -1;
 
 static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
 
@@ -959,8 +959,10 @@ static __init int svm_hardware_setup(void)
 		kvm_tsc_scaling_ratio_frac_bits = 32;
 	}
 
-	if (boot_cpu_has(X86_FEATURE_RDTSCP))
-		kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);
+	if (!kvm_probe_user_return_msr(MSR_TSC_AUX)) {
+		tsc_aux_uret_slot = 0;
+		kvm_define_user_return_msr(tsc_aux_uret_slot, MSR_TSC_AUX);
+	}
 
 	/* Check for pause filtering support */
 	if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
@@ -1454,8 +1456,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 		}
 	}
 
-	if (static_cpu_has(X86_FEATURE_RDTSCP))
-		kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);
+	if (likely(tsc_aux_uret_slot >= 0))
+		kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
 
 	svm->guest_state_loaded = true;
 }
@@ -2664,7 +2666,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
 		break;
 	case MSR_TSC_AUX:
-		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+		if (tsc_aux_uret_slot < 0)
 			return 1;
 		if (!msr_info->host_initiated &&
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
@@ -2885,7 +2887,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
 		break;
 	case MSR_TSC_AUX:
-		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+		if (tsc_aux_uret_slot < 0)
 			return 1;
 
 		if (!msr->host_initiated &&
@@ -2908,7 +2910,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		 * guest via direct_access_msrs, and switch it via user return.
 		 */
 		preempt_disable();
-		r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);
+		r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
 		preempt_enable();
 		if (r)
 			return 1;
-- 
GitLab


From 36fa06f9ff39f23e03cd8206dc6bbb7711c23be6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:26 -0700
Subject: [PATCH 0098/3804] KVM: x86: Add support for RDPID without RDTSCP

Allow userspace to enable RDPID for a guest without also enabling RDTSCP.
Aside from checking for RDPID support in the obvious flows, VMX also needs
to set ENABLE_RDTSCP=1 when RDPID is exposed.

For the record, there is no known scenario where enabling RDPID without
RDTSCP is desirable.  But, both AMD and Intel architectures allow for the
condition, i.e. this is purely to make KVM more architecturally accurate.

Fixes: 41cd02c6f7f6 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID")
Cc: stable@vger.kernel.org
Reported-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c |  6 ++++--
 arch/x86/kvm/vmx/vmx.c | 27 +++++++++++++++++++++++----
 arch/x86/kvm/x86.c     |  3 ++-
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 13e4dd128177d..0ba0a00f8dc6a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2669,7 +2669,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (tsc_aux_uret_slot < 0)
 			return 1;
 		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		msr_info->data = svm->tsc_aux;
 		break;
@@ -2891,7 +2892,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 			return 1;
 
 		if (!msr->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 
 		/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 11ff9c3d95d53..b304e372aab3c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1788,7 +1788,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 	if (update_transition_efer(vmx))
 		vmx_setup_uret_msr(vmx, MSR_EFER);
 
-	if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
+	if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)  ||
+	    guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID))
 		vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
 
 	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
@@ -1994,7 +1995,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		goto find_uret_msr;
 	case MSR_IA32_DEBUGCTLMSR:
@@ -2314,7 +2316,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	case MSR_TSC_AUX:
 		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		/* Check reserved bit, higher 32 bits should be zero */
 		if ((data >> 32) != 0)
@@ -4368,7 +4371,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 						  xsaves_enabled, false);
 	}
 
-	vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
+	/*
+	 * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
+	 * feature is exposed to the guest.  This creates a virtualization hole
+	 * if both are supported in hardware but only one is exposed to the
+	 * guest, but letting the guest execute RDTSCP or RDPID when either one
+	 * is advertised is preferable to emulating the advertised instruction
+	 * in KVM on #UD, and obviously better than incorrectly injecting #UD.
+	 */
+	if (cpu_has_vmx_rdtscp()) {
+		bool rdpid_or_rdtscp_enabled =
+			guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
+			guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
+
+		vmx_adjust_secondary_exec_control(vmx, &exec_control,
+						  SECONDARY_EXEC_ENABLE_RDTSCP,
+						  rdpid_or_rdtscp_enabled, false);
+	}
 	vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
 
 	vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd90c73c37b4d..0856636efc44c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5941,7 +5941,8 @@ static void kvm_init_msr_list(void)
 				continue;
 			break;
 		case MSR_TSC_AUX:
-			if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+			if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
+			    !kvm_cpu_cap_has(X86_FEATURE_RDPID))
 				continue;
 			break;
 		case MSR_IA32_UMWAIT_CONTROL:
-- 
GitLab


From b6194b94a2ca4affce5aab1bbf773a977ad73671 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:27 -0700
Subject: [PATCH 0099/3804] KVM: VMX: Configure list of user return MSRs at
 module init

Configure the list of user return MSRs that are actually supported at
module init instead of reprobing the list of possible MSRs every time a
vCPU is created.  Curating the list on a per-vCPU basis is pointless; KVM
is completely hosed if the set of supported MSRs changes after module init,
or if the set of MSRs differs per physical PCU.

The per-vCPU lists also increase complexity (see __vmx_find_uret_msr()) and
creates corner cases that _should_ be impossible, but theoretically exist
in KVM, e.g. advertising RDTSCP to userspace without actually being able to
virtualize RDTSCP if probing MSR_TSC_AUX fails.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 61 ++++++++++++++++++++++++++++--------------
 arch/x86/kvm/vmx/vmx.h | 10 ++++++-
 2 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b304e372aab3c..887db1af13125 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -461,7 +461,7 @@ static unsigned long host_idt_base;
  * support this emulation, IA32_STAR must always be included in
  * vmx_uret_msrs_list[], even in i386 builds.
  */
-static const u32 vmx_uret_msrs_list[] = {
+static u32 vmx_uret_msrs_list[] = {
 #ifdef CONFIG_X86_64
 	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
 #endif
@@ -469,6 +469,12 @@ static const u32 vmx_uret_msrs_list[] = {
 	MSR_IA32_TSX_CTRL,
 };
 
+/*
+ * Number of user return MSRs that are actually supported in hardware.
+ * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs.
+ */
+static int vmx_nr_uret_msrs;
+
 #if IS_ENABLED(CONFIG_HYPERV)
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
@@ -700,9 +706,16 @@ static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
 
-	for (i = 0; i < vmx->nr_uret_msrs; ++i)
+	/*
+	 * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
+	 * but is ordered differently.  The MSR is matched against the list of
+	 * supported uret MSRs using "slot", but the index that is returned is
+	 * the index into guest_uret_msrs.
+	 */
+	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
 		if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
 			return i;
+	}
 	return -1;
 }
 
@@ -6929,18 +6942,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 			goto free_vpid;
 	}
 
-	BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
-
-	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
-		u32 index = vmx_uret_msrs_list[i];
-		int j = vmx->nr_uret_msrs;
+	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+		vmx->guest_uret_msrs[i].data = 0;
 
-		if (kvm_probe_user_return_msr(index))
-			continue;
-
-		vmx->guest_uret_msrs[j].slot = i;
-		vmx->guest_uret_msrs[j].data = 0;
-		switch (index) {
+		switch (vmx_uret_msrs_list[i]) {
 		case MSR_IA32_TSX_CTRL:
 			/*
 			 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
@@ -6954,15 +6959,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 			 * host so that TSX remains always disabled.
 			 */
 			if (boot_cpu_has(X86_FEATURE_RTM))
-				vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+				vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
 			else
-				vmx->guest_uret_msrs[j].mask = 0;
+				vmx->guest_uret_msrs[i].mask = 0;
 			break;
 		default:
-			vmx->guest_uret_msrs[j].mask = -1ull;
+			vmx->guest_uret_msrs[i].mask = -1ull;
 			break;
 		}
-		++vmx->nr_uret_msrs;
 	}
 
 	err = alloc_loaded_vmcs(&vmx->vmcs01);
@@ -7821,17 +7825,34 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 };
 
+static __init void vmx_setup_user_return_msrs(void)
+{
+	u32 msr;
+	int i;
+
+	BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
+
+	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
+		msr = vmx_uret_msrs_list[i];
+
+		if (kvm_probe_user_return_msr(msr))
+			continue;
+
+		kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
+		vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr;
+	}
+}
+
 static __init int hardware_setup(void)
 {
 	unsigned long host_bndcfgs;
 	struct desc_ptr dt;
-	int r, i, ept_lpage_level;
+	int r, ept_lpage_level;
 
 	store_idt(&dt);
 	host_idt_base = dt.address;
 
-	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
-		kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
+	vmx_setup_user_return_msrs();
 
 	if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
 		return -EIO;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 008cb87ff088c..d71ed8b425c52 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -245,8 +245,16 @@ struct vcpu_vmx {
 	u32                   idt_vectoring_info;
 	ulong                 rflags;
 
+	/*
+	 * User return MSRs are always emulated when enabled in the guest, but
+	 * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
+	 * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
+	 * be loaded into hardware if those conditions aren't met.
+	 * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
+	 * into hardware when running the guest.  guest_uret_msrs[] is resorted
+	 * whenever the number of "active" uret MSRs is modified.
+	 */
 	struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
-	int                   nr_uret_msrs;
 	int                   nr_active_uret_msrs;
 	bool                  guest_uret_msrs_loaded;
 #ifdef CONFIG_X86_64
-- 
GitLab


From ee9d22e08d1341692a43926e5e1d84c90a5dac1d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:28 -0700
Subject: [PATCH 0100/3804] KVM: VMX: Use flag to indicate "active" uret MSRs
 instead of sorting list

Explicitly flag a uret MSR as needing to be loaded into hardware instead of
resorting the list of "active" MSRs and tracking how many MSRs in total
need to be loaded.  The only benefit to sorting the list is that the loop
to load MSRs during vmx_prepare_switch_to_guest() doesn't need to iterate
over all supported uret MRS, only those that are active.  But that is a
pointless optimization, as the most common case, running a 64-bit guest,
will load the vast majority of MSRs.  Not to mention that a single WRMSR is
far more expensive than iterating over the list.

Providing a stable list order obviates the need to track a given MSR's
"slot" in the per-CPU list of user return MSRs; all lists simply use the
same ordering.  Future patches will take advantage of the stable order to
further simplify the related code.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 80 ++++++++++++++++++++++--------------------
 arch/x86/kvm/vmx/vmx.h |  2 +-
 2 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 887db1af13125..adefedac0e3b9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -458,8 +458,9 @@ static unsigned long host_idt_base;
  * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
  * will emulate SYSCALL in legacy mode if the vendor string in guest
  * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
- * support this emulation, IA32_STAR must always be included in
- * vmx_uret_msrs_list[], even in i386 builds.
+ * support this emulation, MSR_STAR is included in the list for i386,
+ * but is never loaded into hardware.  MSR_CSTAR is also never loaded
+ * into hardware and is here purely for emulation purposes.
  */
 static u32 vmx_uret_msrs_list[] = {
 #ifdef CONFIG_X86_64
@@ -702,18 +703,12 @@ static bool is_valid_passthrough_msr(u32 msr)
 	return r;
 }
 
-static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
+static inline int __vmx_find_uret_msr(u32 msr)
 {
 	int i;
 
-	/*
-	 * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list,
-	 * but is ordered differently.  The MSR is matched against the list of
-	 * supported uret MSRs using "slot", but the index that is returned is
-	 * the index into guest_uret_msrs.
-	 */
 	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
-		if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
+		if (vmx_uret_msrs_list[i] == msr)
 			return i;
 	}
 	return -1;
@@ -723,7 +718,7 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
 
-	i = __vmx_find_uret_msr(vmx, msr);
+	i = __vmx_find_uret_msr(msr);
 	if (i >= 0)
 		return &vmx->guest_uret_msrs[i];
 	return NULL;
@@ -732,13 +727,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
 				  struct vmx_uret_msr *msr, u64 data)
 {
+	unsigned int slot = msr - vmx->guest_uret_msrs;
 	int ret = 0;
 
 	u64 old_msr_data = msr->data;
 	msr->data = data;
-	if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {
+	if (msr->load_into_hardware) {
 		preempt_disable();
-		ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);
+		ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
 		preempt_enable();
 		if (ret)
 			msr->data = old_msr_data;
@@ -1090,7 +1086,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
 		return false;
 	}
 
-	i = __vmx_find_uret_msr(vmx, MSR_EFER);
+	i = __vmx_find_uret_msr(MSR_EFER);
 	if (i < 0)
 		return false;
 
@@ -1252,11 +1248,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	 */
 	if (!vmx->guest_uret_msrs_loaded) {
 		vmx->guest_uret_msrs_loaded = true;
-		for (i = 0; i < vmx->nr_active_uret_msrs; ++i)
-			kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,
+		for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+			if (!vmx->guest_uret_msrs[i].load_into_hardware)
+				continue;
+
+			kvm_set_user_return_msr(i,
 						vmx->guest_uret_msrs[i].data,
 						vmx->guest_uret_msrs[i].mask);
-
+		}
 	}
 
     	if (vmx->nested.need_vmcs12_to_shadow_sync)
@@ -1763,19 +1762,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 	vmx_clear_hlt(vcpu);
 }
 
-static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
+static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
+			       bool load_into_hardware)
 {
-	struct vmx_uret_msr tmp;
-	int from, to;
+	struct vmx_uret_msr *uret_msr;
 
-	from = __vmx_find_uret_msr(vmx, msr);
-	if (from < 0)
+	uret_msr = vmx_find_uret_msr(vmx, msr);
+	if (!uret_msr)
 		return;
-	to = vmx->nr_active_uret_msrs++;
 
-	tmp = vmx->guest_uret_msrs[to];
-	vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];
-	vmx->guest_uret_msrs[from] = tmp;
+	uret_msr->load_into_hardware = load_into_hardware;
 }
 
 /*
@@ -1785,30 +1781,36 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
  */
 static void setup_msrs(struct vcpu_vmx *vmx)
 {
-	vmx->guest_uret_msrs_loaded = false;
-	vmx->nr_active_uret_msrs = 0;
 #ifdef CONFIG_X86_64
+	bool load_syscall_msrs;
+
 	/*
 	 * The SYSCALL MSRs are only needed on long mode guests, and only
 	 * when EFER.SCE is set.
 	 */
-	if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
-		vmx_setup_uret_msr(vmx, MSR_STAR);
-		vmx_setup_uret_msr(vmx, MSR_LSTAR);
-		vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);
-	}
+	load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
+			    (vmx->vcpu.arch.efer & EFER_SCE);
+
+	vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
+	vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
+	vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
 #endif
-	if (update_transition_efer(vmx))
-		vmx_setup_uret_msr(vmx, MSR_EFER);
+	vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
 
-	if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)  ||
-	    guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID))
-		vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
+	vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
+			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
+			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
 
-	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true);
 
 	if (cpu_has_vmx_msr_bitmap())
 		vmx_update_msr_bitmap(&vmx->vcpu);
+
+	/*
+	 * The set of MSRs to load may have changed, reload MSRs before the
+	 * next VM-Enter.
+	 */
+	vmx->guest_uret_msrs_loaded = false;
 }
 
 static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index d71ed8b425c52..16e4e457ba23c 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -36,7 +36,7 @@ struct vmx_msrs {
 };
 
 struct vmx_uret_msr {
-	unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */
+	bool load_into_hardware;
 	u64 data;
 	u64 mask;
 };
-- 
GitLab


From 8ea8b8d6f869425e21f34e60bdbe7e47e6c9d6b9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:29 -0700
Subject: [PATCH 0101/3804] KVM: VMX: Use common x86's uret MSR list as the one
 true list

Drop VMX's global list of user return MSRs now that VMX doesn't resort said
list to isolate "active" MSRs, i.e. now that VMX's list and x86's list have
the same MSRs in the same order.

In addition to eliminating the redundant list, this will also allow moving
more of the list management into common x86.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-11-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/vmx.c          | 97 ++++++++++++++-------------------
 arch/x86/kvm/x86.c              | 12 ++++
 3 files changed, 53 insertions(+), 57 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e8dcbc632cf8e..6b15f27f49d01 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1777,6 +1777,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 		    unsigned long icr, int op_64_bit);
 
 void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_find_user_return_msr(u32 msr);
 int kvm_probe_user_return_msr(u32 msr);
 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index adefedac0e3b9..39506704be966 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -454,26 +454,7 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
 
 static unsigned long host_idt_base;
 
-/*
- * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
- * will emulate SYSCALL in legacy mode if the vendor string in guest
- * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
- * support this emulation, MSR_STAR is included in the list for i386,
- * but is never loaded into hardware.  MSR_CSTAR is also never loaded
- * into hardware and is here purely for emulation purposes.
- */
-static u32 vmx_uret_msrs_list[] = {
-#ifdef CONFIG_X86_64
-	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
-#endif
-	MSR_EFER, MSR_TSC_AUX, MSR_STAR,
-	MSR_IA32_TSX_CTRL,
-};
-
-/*
- * Number of user return MSRs that are actually supported in hardware.
- * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs.
- */
+/* Number of user return MSRs that are actually supported in hardware. */
 static int vmx_nr_uret_msrs;
 
 #if IS_ENABLED(CONFIG_HYPERV)
@@ -703,22 +684,11 @@ static bool is_valid_passthrough_msr(u32 msr)
 	return r;
 }
 
-static inline int __vmx_find_uret_msr(u32 msr)
-{
-	int i;
-
-	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
-		if (vmx_uret_msrs_list[i] == msr)
-			return i;
-	}
-	return -1;
-}
-
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
 
-	i = __vmx_find_uret_msr(msr);
+	i = kvm_find_user_return_msr(msr);
 	if (i >= 0)
 		return &vmx->guest_uret_msrs[i];
 	return NULL;
@@ -1086,7 +1056,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
 		return false;
 	}
 
-	i = __vmx_find_uret_msr(MSR_EFER);
+	i = kvm_find_user_return_msr(MSR_EFER);
 	if (i < 0)
 		return false;
 
@@ -6922,6 +6892,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
 static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 {
+	struct vmx_uret_msr *tsx_ctrl;
 	struct vcpu_vmx *vmx;
 	int i, cpu, err;
 
@@ -6946,29 +6917,25 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 
 	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
 		vmx->guest_uret_msrs[i].data = 0;
-
-		switch (vmx_uret_msrs_list[i]) {
-		case MSR_IA32_TSX_CTRL:
-			/*
-			 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
-			 * interception.  Keep the host value unchanged to avoid
-			 * changing CPUID bits under the host kernel's feet.
-			 *
-			 * hle=0, rtm=0, tsx_ctrl=1 can be found with some
-			 * combinations of new kernel and old userspace.  If
-			 * those guests run on a tsx=off host, do allow guests
-			 * to use TSX_CTRL, but do not change the value on the
-			 * host so that TSX remains always disabled.
-			 */
-			if (boot_cpu_has(X86_FEATURE_RTM))
-				vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
-			else
-				vmx->guest_uret_msrs[i].mask = 0;
-			break;
-		default:
-			vmx->guest_uret_msrs[i].mask = -1ull;
-			break;
-		}
+		vmx->guest_uret_msrs[i].mask = -1ull;
+	}
+	tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+	if (tsx_ctrl) {
+		/*
+		 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.
+		 * Keep the host value unchanged to avoid changing CPUID bits
+		 * under the host kernel's feet.
+		 *
+		 * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations
+		 * of new kernel and old userspace.  If those guests run on a
+		 * tsx=off host, do allow guests to use TSX_CTRL, but do not
+		 * change the value on the host so that TSX remains always
+		 * disabled.
+		 */
+		if (boot_cpu_has(X86_FEATURE_RTM))
+			vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+		else
+			vmx->guest_uret_msrs[i].mask = 0;
 	}
 
 	err = alloc_loaded_vmcs(&vmx->vmcs01);
@@ -7829,6 +7796,22 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
 static __init void vmx_setup_user_return_msrs(void)
 {
+
+	/*
+	 * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
+	 * will emulate SYSCALL in legacy mode if the vendor string in guest
+	 * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
+	 * support this emulation, MSR_STAR is included in the list for i386,
+	 * but is never loaded into hardware.  MSR_CSTAR is also never loaded
+	 * into hardware and is here purely for emulation purposes.
+	 */
+	const u32 vmx_uret_msrs_list[] = {
+	#ifdef CONFIG_X86_64
+		MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
+	#endif
+		MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+		MSR_IA32_TSX_CTRL,
+	};
 	u32 msr;
 	int i;
 
@@ -7841,7 +7824,7 @@ static __init void vmx_setup_user_return_msrs(void)
 			continue;
 
 		kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
-		vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr;
+		vmx_nr_uret_msrs++;
 	}
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0856636efc44c..d514031ed25f8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -364,6 +364,18 @@ void kvm_define_user_return_msr(unsigned slot, u32 msr)
 }
 EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
 
+int kvm_find_user_return_msr(u32 msr)
+{
+	int i;
+
+	for (i = 0; i < user_return_msrs_global.nr; ++i) {
+		if (user_return_msrs_global.msrs[i] == msr)
+			return i;
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
+
 static void kvm_user_return_msr_cpu_online(void)
 {
 	unsigned int cpu = smp_processor_id();
-- 
GitLab


From 5e17c624010a82bbcca9b955155781927eb6532a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:30 -0700
Subject: [PATCH 0102/3804] KVM: VMX: Disable loading of TSX_CTRL MSR the more
 conventional way

Tag TSX_CTRL as not needing to be loaded when RTM isn't supported in the
host.  Crushing the write mask to '0' has the same effect, but requires
more mental gymnastics to understand.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-12-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 39506704be966..bd2187b4cc530 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1771,7 +1771,13 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
 			   guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
 
-	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true);
+	/*
+	 * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new
+	 * kernel and old userspace.  If those guests run on a tsx=off host, do
+	 * allow guests to use TSX_CTRL, but don't change the value in hardware
+	 * so that TSX remains always disabled.
+	 */
+	vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
 
 	if (cpu_has_vmx_msr_bitmap())
 		vmx_update_msr_bitmap(&vmx->vcpu);
@@ -6919,23 +6925,15 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 		vmx->guest_uret_msrs[i].data = 0;
 		vmx->guest_uret_msrs[i].mask = -1ull;
 	}
-	tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
-	if (tsx_ctrl) {
+	if (boot_cpu_has(X86_FEATURE_RTM)) {
 		/*
 		 * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.
 		 * Keep the host value unchanged to avoid changing CPUID bits
 		 * under the host kernel's feet.
-		 *
-		 * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations
-		 * of new kernel and old userspace.  If those guests run on a
-		 * tsx=off host, do allow guests to use TSX_CTRL, but do not
-		 * change the value on the host so that TSX remains always
-		 * disabled.
 		 */
-		if (boot_cpu_has(X86_FEATURE_RTM))
+		tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+		if (tsx_ctrl)
 			vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
-		else
-			vmx->guest_uret_msrs[i].mask = 0;
 	}
 
 	err = alloc_loaded_vmcs(&vmx->vmcs01);
-- 
GitLab


From 9cc39a5a43c05f8eda206bf9e144119820ecf5c8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:31 -0700
Subject: [PATCH 0103/3804] KVM: x86: Export the number of uret MSRs to vendor
 modules

Split out and export the number of configured user return MSRs so that
VMX can iterate over the set of MSRs without having to do its own tracking.
Keep the list itself internal to x86 so that vendor code still has to go
through the "official" APIs to add/modify entries.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-13-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 29 +++++++++++++----------------
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6b15f27f49d01..22505e74c3dac 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1418,6 +1418,7 @@ struct kvm_arch_async_pf {
 	bool direct_map;
 };
 
+extern u32 __read_mostly kvm_nr_uret_msrs;
 extern u64 __read_mostly host_efer;
 extern bool __read_mostly allow_smaller_maxphyaddr;
 extern struct kvm_x86_ops kvm_x86_ops;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d514031ed25f8..5e1deed8ea5d2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -184,11 +184,6 @@ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
  */
 #define KVM_MAX_NR_USER_RETURN_MSRS 16
 
-struct kvm_user_return_msrs_global {
-	int nr;
-	u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS];
-};
-
 struct kvm_user_return_msrs {
 	struct user_return_notifier urn;
 	bool registered;
@@ -198,7 +193,9 @@ struct kvm_user_return_msrs {
 	} values[KVM_MAX_NR_USER_RETURN_MSRS];
 };
 
-static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global;
+u32 __read_mostly kvm_nr_uret_msrs;
+EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
+static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
 static struct kvm_user_return_msrs __percpu *user_return_msrs;
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
@@ -330,10 +327,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
 		user_return_notifier_unregister(urn);
 	}
 	local_irq_restore(flags);
-	for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
+	for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
 		values = &msrs->values[slot];
 		if (values->host != values->curr) {
-			wrmsrl(user_return_msrs_global.msrs[slot], values->host);
+			wrmsrl(kvm_uret_msrs_list[slot], values->host);
 			values->curr = values->host;
 		}
 	}
@@ -358,9 +355,9 @@ EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr);
 void kvm_define_user_return_msr(unsigned slot, u32 msr)
 {
 	BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
-	user_return_msrs_global.msrs[slot] = msr;
-	if (slot >= user_return_msrs_global.nr)
-		user_return_msrs_global.nr = slot + 1;
+	kvm_uret_msrs_list[slot] = msr;
+	if (slot >= kvm_nr_uret_msrs)
+		kvm_nr_uret_msrs = slot + 1;
 }
 EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
 
@@ -368,8 +365,8 @@ int kvm_find_user_return_msr(u32 msr)
 {
 	int i;
 
-	for (i = 0; i < user_return_msrs_global.nr; ++i) {
-		if (user_return_msrs_global.msrs[i] == msr)
+	for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+		if (kvm_uret_msrs_list[i] == msr)
 			return i;
 	}
 	return -1;
@@ -383,8 +380,8 @@ static void kvm_user_return_msr_cpu_online(void)
 	u64 value;
 	int i;
 
-	for (i = 0; i < user_return_msrs_global.nr; ++i) {
-		rdmsrl_safe(user_return_msrs_global.msrs[i], &value);
+	for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+		rdmsrl_safe(kvm_uret_msrs_list[i], &value);
 		msrs->values[i].host = value;
 		msrs->values[i].curr = value;
 	}
@@ -399,7 +396,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
 	value = (value & mask) | (msrs->values[slot].host & ~mask);
 	if (value == msrs->values[slot].curr)
 		return 0;
-	err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value);
+	err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
 	if (err)
 		return 1;
 
-- 
GitLab


From e5fda4bbadb053e3b5164476146cf43092785c0b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:32 -0700
Subject: [PATCH 0104/3804] KVM: x86: Move uret MSR slot management to common
 x86

Now that SVM and VMX both probe MSRs before "defining" user return slots
for them, consolidate the code for probe+define into common x86 and
eliminate the odd behavior of having the vendor code define the slot for
a given MSR.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-14-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +--
 arch/x86/kvm/svm/svm.c          |  5 +----
 arch/x86/kvm/vmx/vmx.c          | 19 ++++---------------
 arch/x86/kvm/x86.c              | 19 +++++++++++--------
 4 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 22505e74c3dac..8155eaac22a7e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1777,9 +1777,8 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 		    unsigned long ipi_bitmap_high, u32 min,
 		    unsigned long icr, int op_64_bit);
 
-void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_add_user_return_msr(u32 msr);
 int kvm_find_user_return_msr(u32 msr);
-int kvm_probe_user_return_msr(u32 msr);
 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
 
 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0ba0a00f8dc6a..d13b72bfb736b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -959,10 +959,7 @@ static __init int svm_hardware_setup(void)
 		kvm_tsc_scaling_ratio_frac_bits = 32;
 	}
 
-	if (!kvm_probe_user_return_msr(MSR_TSC_AUX)) {
-		tsc_aux_uret_slot = 0;
-		kvm_define_user_return_msr(tsc_aux_uret_slot, MSR_TSC_AUX);
-	}
+	tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
 
 	/* Check for pause filtering support */
 	if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bd2187b4cc530..042823b492730 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -454,9 +454,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
 
 static unsigned long host_idt_base;
 
-/* Number of user return MSRs that are actually supported in hardware. */
-static int vmx_nr_uret_msrs;
-
 #if IS_ENABLED(CONFIG_HYPERV)
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
@@ -1218,7 +1215,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 	 */
 	if (!vmx->guest_uret_msrs_loaded) {
 		vmx->guest_uret_msrs_loaded = true;
-		for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+		for (i = 0; i < kvm_nr_uret_msrs; ++i) {
 			if (!vmx->guest_uret_msrs[i].load_into_hardware)
 				continue;
 
@@ -6921,7 +6918,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 			goto free_vpid;
 	}
 
-	for (i = 0; i < vmx_nr_uret_msrs; ++i) {
+	for (i = 0; i < kvm_nr_uret_msrs; ++i) {
 		vmx->guest_uret_msrs[i].data = 0;
 		vmx->guest_uret_msrs[i].mask = -1ull;
 	}
@@ -7810,20 +7807,12 @@ static __init void vmx_setup_user_return_msrs(void)
 		MSR_EFER, MSR_TSC_AUX, MSR_STAR,
 		MSR_IA32_TSX_CTRL,
 	};
-	u32 msr;
 	int i;
 
 	BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
 
-	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
-		msr = vmx_uret_msrs_list[i];
-
-		if (kvm_probe_user_return_msr(msr))
-			continue;
-
-		kvm_define_user_return_msr(vmx_nr_uret_msrs, msr);
-		vmx_nr_uret_msrs++;
-	}
+	for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
+		kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
 }
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5e1deed8ea5d2..0a0eebf35dd53 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -336,7 +336,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
 	}
 }
 
-int kvm_probe_user_return_msr(u32 msr)
+static int kvm_probe_user_return_msr(u32 msr)
 {
 	u64 val;
 	int ret;
@@ -350,16 +350,18 @@ out:
 	preempt_enable();
 	return ret;
 }
-EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr);
 
-void kvm_define_user_return_msr(unsigned slot, u32 msr)
+int kvm_add_user_return_msr(u32 msr)
 {
-	BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
-	kvm_uret_msrs_list[slot] = msr;
-	if (slot >= kvm_nr_uret_msrs)
-		kvm_nr_uret_msrs = slot + 1;
+	BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
+
+	if (kvm_probe_user_return_msr(msr))
+		return -1;
+
+	kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
+	return kvm_nr_uret_msrs++;
 }
-EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
+EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
 
 int kvm_find_user_return_msr(u32 msr)
 {
@@ -8132,6 +8134,7 @@ int kvm_arch_init(void *opaque)
 		printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
 		goto out_free_x86_emulator_cache;
 	}
+	kvm_nr_uret_msrs = 0;
 
 	r = kvm_mmu_module_init();
 	if (r)
-- 
GitLab


From 61a05d444d2ca8d40add453a5f7058fbb1b57eca Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:33 -0700
Subject: [PATCH 0105/3804] KVM: x86: Tie Intel and AMD behavior for
 MSR_TSC_AUX to guest CPU model

Squish the Intel and AMD emulation of MSR_TSC_AUX together and tie it to
the guest CPU model instead of the host CPU behavior.  While not strictly
necessary to avoid guest breakage, emulating cross-vendor "architecture"
will provide consistent behavior for the guest, e.g. WRMSR fault behavior
won't change if the vCPU is migrated to a host with divergent behavior.

Note, the "new" kvm_is_supported_user_return_msr() checks do not add new
functionality on either SVM or VMX.  On SVM, the equivalent was
"tsc_aux_uret_slot < 0", and on VMX the check was buried in the
vmx_find_uret_msr() call at the find_uret_msr label.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-15-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++++
 arch/x86/kvm/svm/svm.c          | 24 ----------------------
 arch/x86/kvm/vmx/vmx.c          | 15 --------------
 arch/x86/kvm/x86.c              | 36 +++++++++++++++++++++++++++++++++
 4 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8155eaac22a7e..f85480b1d215f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1781,6 +1781,11 @@ int kvm_add_user_return_msr(u32 msr);
 int kvm_find_user_return_msr(u32 msr);
 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
 
+static inline bool kvm_is_supported_user_return_msr(u32 msr)
+{
+	return kvm_find_user_return_msr(msr) >= 0;
+}
+
 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d13b72bfb736b..0922d8e173e61 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2663,12 +2663,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
 		break;
 	case MSR_TSC_AUX:
-		if (tsc_aux_uret_slot < 0)
-			return 1;
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
-			return 1;
 		msr_info->data = svm->tsc_aux;
 		break;
 	/*
@@ -2885,24 +2879,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
 		break;
 	case MSR_TSC_AUX:
-		if (tsc_aux_uret_slot < 0)
-			return 1;
-
-		if (!msr->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
-			return 1;
-
-		/*
-		 * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
-		 * incomplete and conflicting architectural behavior.  Current
-		 * AMD CPUs completely ignore bits 63:32, i.e. they aren't
-		 * reserved and always read as zeros.  Emulate AMD CPU behavior
-		 * to avoid explosions if the vCPU is migrated from an AMD host
-		 * to an Intel host.
-		 */
-		data = (u32)data;
-
 		/*
 		 * TSC_AUX is usually changed only during boot and never read
 		 * directly.  Intercept TSC_AUX instead of exposing it to the
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 042823b492730..61f7e5221bf3a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1981,12 +1981,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
 		break;
-	case MSR_TSC_AUX:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
-			return 1;
-		goto find_uret_msr;
 	case MSR_IA32_DEBUGCTLMSR:
 		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
 		break;
@@ -2302,15 +2296,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			vmx->pt_desc.guest.addr_a[index / 2] = data;
 		break;
-	case MSR_TSC_AUX:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
-		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
-			return 1;
-		/* Check reserved bit, higher 32 bits should be zero */
-		if ((data >> 32) != 0)
-			return 1;
-		goto find_uret_msr;
 	case MSR_IA32_PERF_CAPABILITIES:
 		if (data && !vcpu_to_pmu(vcpu)->version)
 			return 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a0eebf35dd53..4efd2978ec089 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1642,6 +1642,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 		 * invokes 64-bit SYSENTER.
 		 */
 		data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
+		break;
+	case MSR_TSC_AUX:
+		if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+			return 1;
+
+		if (!host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+			return 1;
+
+		/*
+		 * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
+		 * incomplete and conflicting architectural behavior.  Current
+		 * AMD CPUs completely ignore bits 63:32, i.e. they aren't
+		 * reserved and always read as zeros.  Enforce Intel's reserved
+		 * bits check if and only if the guest CPU is Intel, and clear
+		 * the bits in all other cases.  This ensures cross-vendor
+		 * migration will provide consistent behavior for the guest.
+		 */
+		if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
+			return 1;
+
+		data = (u32)data;
+		break;
 	}
 
 	msr.data = data;
@@ -1678,6 +1702,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
 	if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
 		return KVM_MSR_RET_FILTERED;
 
+	switch (index) {
+	case MSR_TSC_AUX:
+		if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+			return 1;
+
+		if (!host_initiated &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+			return 1;
+		break;
+	}
+
 	msr.index = index;
 	msr.host_initiated = host_initiated;
 
-- 
GitLab


From 78bba966ee3cdbbfc585d8e39237378fba50a142 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 4 May 2021 10:17:34 -0700
Subject: [PATCH 0106/3804] KVM: x86: Hide RDTSCP and RDPID if MSR_TSC_AUX
 probing failed

If probing MSR_TSC_AUX failed, hide RDTSCP and RDPID, and WARN if either
feature was reported as supported.  In theory, such a scenario should
never happen as both Intel and AMD state that MSR_TSC_AUX is available if
RDTSCP or RDPID is supported.  But, KVM injects #GP on MSR_TSC_AUX
accesses if probing failed, faults on WRMSR(MSR_TSC_AUX) may be fatal to
the guest (because they happen during early CPU bringup), and KVM itself
has effectively misreported RDPID support in the past.

Note, this also has the happy side effect of omitting MSR_TSC_AUX from
the list of MSRs that are exposed to userspace if probing the MSR fails.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210504171734.1434054-16-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c0e8c5e921898..f42e9491a6c8c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void)
 		F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
 		F(PMM) | F(PMM_EN)
 	);
+
+	/*
+	 * Hide RDTSCP and RDPID if either feature is reported as supported but
+	 * probing MSR_TSC_AUX failed.  This is purely a sanity check and
+	 * should never happen, but the guest will likely crash if RDTSCP or
+	 * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in
+	 * the past.  For example, the sanity check may fire if this instance of
+	 * KVM is running as L1 on top of an older, broken KVM.
+	 */
+	if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) ||
+		     kvm_cpu_cap_has(X86_FEATURE_RDPID)) &&
+		     !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) {
+		kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+		kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
 
-- 
GitLab


From 34114136f725cbd0c83e7b5a0c8a977976cd82f7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 5 May 2021 22:15:09 +1000
Subject: [PATCH 0107/3804] KVM: PPC: Book3S HV: Fix conversion to gfn-based
 MMU notifier callbacks

Commit b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier
callbacks") causes unmap_gfn_range and age_gfn callbacks to only work
on the first gfn in the range. It also makes the aging callbacks call
into both radix and hash aging functions for radix guests. Fix this.

Add warnings for the single-gfn calls that have been converted to range
callbacks, in case they ever receieve ranges greater than 1.

Fixes: b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks")
Reported-by: Bharata B Rao <bharata@linux.ibm.com>
Tested-by: Bharata B Rao <bharata@linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Message-Id: <20210505121509.1470207-1-npiggin@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/include/asm/kvm_book3s.h  |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c    | 46 ++++++++++++++++++--------
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  5 ++-
 3 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a6e9a5585e618..e6b53c6e21e32 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -210,7 +210,7 @@ extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
 				      unsigned int lpid);
 extern int kvmppc_radix_init(void);
 extern void kvmppc_radix_exit(void);
-extern bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			    unsigned long gfn);
 extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			  unsigned long gfn);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index b7bd9ca040b85..2d9193cd73be4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -795,7 +795,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
 	}
 }
 
-static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 			    unsigned long gfn)
 {
 	unsigned long i;
@@ -829,15 +829,21 @@ static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		unlock_rmap(rmapp);
 		__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
 	}
-	return false;
 }
 
 bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (kvm_is_radix(kvm))
-		return kvm_unmap_radix(kvm, range->slot, range->start);
+	gfn_t gfn;
+
+	if (kvm_is_radix(kvm)) {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			kvm_unmap_radix(kvm, range->slot, gfn);
+	} else {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			kvm_unmap_rmapp(kvm, range->slot, range->start);
+	}
 
-	return kvm_unmap_rmapp(kvm, range->slot, range->start);
+	return false;
 }
 
 void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
@@ -924,10 +930,18 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (kvm_is_radix(kvm))
-		kvm_age_radix(kvm, range->slot, range->start);
+	gfn_t gfn;
+	bool ret = false;
 
-	return kvm_age_rmapp(kvm, range->slot, range->start);
+	if (kvm_is_radix(kvm)) {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			ret |= kvm_age_radix(kvm, range->slot, gfn);
+	} else {
+		for (gfn = range->start; gfn < range->end; gfn++)
+			ret |= kvm_age_rmapp(kvm, range->slot, gfn);
+	}
+
+	return ret;
 }
 
 static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
@@ -965,18 +979,24 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	if (kvm_is_radix(kvm))
-		kvm_test_age_radix(kvm, range->slot, range->start);
+	WARN_ON(range->start + 1 != range->end);
 
-	return kvm_test_age_rmapp(kvm, range->slot, range->start);
+	if (kvm_is_radix(kvm))
+		return kvm_test_age_radix(kvm, range->slot, range->start);
+	else
+		return kvm_test_age_rmapp(kvm, range->slot, range->start);
 }
 
 bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 {
+	WARN_ON(range->start + 1 != range->end);
+
 	if (kvm_is_radix(kvm))
-		return kvm_unmap_radix(kvm, range->slot, range->start);
+		kvm_unmap_radix(kvm, range->slot, range->start);
+	else
+		kvm_unmap_rmapp(kvm, range->slot, range->start);
 
-	return kvm_unmap_rmapp(kvm, range->slot, range->start);
+	return false;
 }
 
 static int vcpus_running(struct kvm *kvm)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index ec4f58fa9f5a2..d909c069363e0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -993,7 +993,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
 }
 
 /* Called with kvm->mmu_lock held */
-bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 		     unsigned long gfn)
 {
 	pte_t *ptep;
@@ -1002,14 +1002,13 @@ bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 
 	if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
 		uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
-		return false;
+		return;
 	}
 
 	ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
 	if (ptep && pte_present(*ptep))
 		kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
 				 kvm->arch.lpid);
-	return false;
 }
 
 /* Called with kvm->mmu_lock held */
-- 
GitLab


From e8ea85fb280ec55674bca88ea7cd85f60d19567f Mon Sep 17 00:00:00 2001
From: Chenyi Qiang <chenyi.qiang@intel.com>
Date: Tue, 2 Feb 2021 17:04:32 +0800
Subject: [PATCH 0108/3804] KVM: X86: Add support for the emulation of
 DR6_BUS_LOCK bit

Bus lock debug exception introduces a new bit DR6_BUS_LOCK (bit 11 of
DR6) to indicate that bus lock #DB exception is generated. The set/clear
of DR6_BUS_LOCK is similar to the DR6_RTM. The processor clears
DR6_BUS_LOCK when the exception is generated. For all other #DB, the
processor sets this bit to 1. Software #DB handler should set this bit
before returning to the interrupted task.

In VMM, to avoid breaking the CPUs without bus lock #DB exception
support, activate the DR6_BUS_LOCK conditionally in DR6_FIXED_1 bits.
When intercepting the #DB exception caused by bus locks, bit 11 of the
exit qualification is set to identify it. The VMM should emulate the
exception by clearing the bit 11 of the guest DR6.

Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Message-Id: <20210202090433.13441-3-chenyi.qiang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 ++-
 arch/x86/kvm/x86.c              | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f85480b1d215f..8836b30962178 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -200,6 +200,7 @@ enum x86_intercept_stage;
 
 #define KVM_NR_DB_REGS	4
 
+#define DR6_BUS_LOCK   (1 << 11)
 #define DR6_BD		(1 << 13)
 #define DR6_BS		(1 << 14)
 #define DR6_BT		(1 << 15)
@@ -213,7 +214,7 @@ enum x86_intercept_stage;
  * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
  */
 #define DR6_ACTIVE_LOW	0xffff0ff0
-#define DR6_VOLATILE	0x0001e00f
+#define DR6_VOLATILE	0x0001e80f
 #define DR6_FIXED_1	(DR6_ACTIVE_LOW & ~DR6_VOLATILE)
 
 #define DR7_BP_EN_MASK	0x000000ff
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4efd2978ec089..9b1a7040eae31 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1176,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
 		fixed |= DR6_RTM;
+
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+		fixed |= DR6_BUS_LOCK;
 	return fixed;
 }
 
-- 
GitLab


From 76ea438b4afcd9ee8da3387e9af4625eaccff58f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 6 May 2021 06:30:04 -0400
Subject: [PATCH 0109/3804] KVM: X86: Expose bus lock debug exception to guest

Bus lock debug exception is an ability to notify the kernel by an #DB
trap after the instruction acquires a bus lock and is executed when
CPL>0. This allows the kernel to enforce user application throttling or
mitigations.

Existence of bus lock debug exception is enumerated via
CPUID.(EAX=7,ECX=0).ECX[24]. Software can enable these exceptions by
setting bit 2 of the MSR_IA32_DEBUGCTL. Expose the CPUID to guest and
emulate the MSR handling when guest enables it.

Support for this feature was originally developed by Xiaoyao Li and
Chenyi Qiang, but code has since changed enough that this patch has
nothing in common with theirs, except for this commit message.

Co-developed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
Message-Id: <20210202090433.13441-4-chenyi.qiang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c            | 2 +-
 arch/x86/kvm/vmx/capabilities.h | 3 +++
 arch/x86/kvm/vmx/vmx.c          | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f42e9491a6c8c..9a48f138832d4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void)
 		F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
 		F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
 		F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
-		F(SGX_LC)
+		F(SGX_LC) | F(BUS_LOCK_DETECT)
 	);
 	/* Set LA57 based on hardware capability. */
 	if (cpuid_ecx(7) & F(LA57))
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d1d77985e889f..8dee8a5fbc17f 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -398,6 +398,9 @@ static inline u64 vmx_supported_debugctl(void)
 {
 	u64 debugctl = 0;
 
+	if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+		debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
+
 	if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
 		debugctl |= DEBUGCTLMSR_LBR_MASK;
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 61f7e5221bf3a..f2fd447eed459 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2014,6 +2014,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
 	if (!intel_pmu_lbr_is_enabled(vcpu))
 		debugctl &= ~DEBUGCTLMSR_LBR_MASK;
 
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+		debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+
 	return debugctl;
 }
 
-- 
GitLab


From 03ca4589fabcc66b27e4cb8f8e95d64cf43badd0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 5 May 2021 13:42:21 -0700
Subject: [PATCH 0110/3804] KVM: x86: Prevent KVM SVM from loading on kernels
 with 5-level paging

Disallow loading KVM SVM if 5-level paging is supported.  In theory, NPT
for L1 should simply work, but there unknowns with respect to how the
guest's MAXPHYADDR will be handled by hardware.

Nested NPT is more problematic, as running an L1 VMM that is using
2-level page tables requires stacking single-entry PDP and PML4 tables in
KVM's NPT for L2, as there are no equivalent entries in L1's NPT to
shadow.  Barring hardware magic, for 5-level paging, KVM would need stack
another layer to handle PML5.

Opportunistically rename the lm_root pointer, which is used for the
aforementioned stacking when shadowing 2-level L1 NPT, to pml4_root to
call out that it's specifically for PML4.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210505204221.1934471-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/mmu/mmu.c          | 20 ++++++++++----------
 arch/x86/kvm/svm/svm.c          |  5 +++++
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8836b30962178..55efbacfc2445 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -409,7 +409,7 @@ struct kvm_mmu {
 	u32 pkru_mask;
 
 	u64 *pae_root;
-	u64 *lm_root;
+	u64 *pml4_root;
 
 	/*
 	 * check zero bits on shadow page table entries, these
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4b3ee244ebe05..0144c40d09c76 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
 		pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
-		if (WARN_ON_ONCE(!mmu->lm_root)) {
+		if (WARN_ON_ONCE(!mmu->pml4_root)) {
 			r = -EIO;
 			goto out_unlock;
 		}
 
-		mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask;
+		mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
 	}
 
 	for (i = 0; i < 4; ++i) {
@@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	}
 
 	if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
-		mmu->root_hpa = __pa(mmu->lm_root);
+		mmu->root_hpa = __pa(mmu->pml4_root);
 	else
 		mmu->root_hpa = __pa(mmu->pae_root);
 
@@ -3350,7 +3350,7 @@ out_unlock:
 static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
-	u64 *lm_root, *pae_root;
+	u64 *pml4_root, *pae_root;
 
 	/*
 	 * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
@@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
 		return -EIO;
 
-	if (mmu->pae_root && mmu->lm_root)
+	if (mmu->pae_root && mmu->pml4_root)
 		return 0;
 
 	/*
 	 * The special roots should always be allocated in concert.  Yell and
 	 * bail if KVM ends up in a state where only one of the roots is valid.
 	 */
-	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root))
+	if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
 		return -EIO;
 
 	/*
@@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 	if (!pae_root)
 		return -ENOMEM;
 
-	lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
-	if (!lm_root) {
+	pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+	if (!pml4_root) {
 		free_page((unsigned long)pae_root);
 		return -ENOMEM;
 	}
 
 	mmu->pae_root = pae_root;
-	mmu->lm_root = lm_root;
+	mmu->pml4_root = pml4_root;
 
 	return 0;
 }
@@ -5261,7 +5261,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
 	if (!tdp_enabled && mmu->pae_root)
 		set_memory_encrypted((unsigned long)mmu->pae_root, 1);
 	free_page((unsigned long)mmu->pae_root);
-	free_page((unsigned long)mmu->lm_root);
+	free_page((unsigned long)mmu->pml4_root);
 }
 
 static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0922d8e173e61..8124f51e9488f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -447,6 +447,11 @@ static int has_svm(void)
 		return 0;
 	}
 
+	if (pgtable_l5_enabled()) {
+		pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
+		return 0;
+	}
+
 	return 1;
 }
 
-- 
GitLab


From 594b27e677b35f9734b1969d175ebc6146741109 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 5 May 2021 23:48:17 +0200
Subject: [PATCH 0111/3804] KVM: x86: Cancel pvclock_gtod_work on module
 removal

Nothing prevents the following:

  pvclock_gtod_notify()
    queue_work(system_long_wq, &pvclock_gtod_work);
  ...
  remove_module(kvm);
  ...
  work_queue_run()
    pvclock_gtod_work()	<- UAF

Ditto for any other operation on that workqueue list head which touches
pvclock_gtod_work after module removal.

Cancel the work in kvm_arch_exit() to prevent that.

Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Message-Id: <87czu4onry.ffs@nanos.tec.linutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b1a7040eae31..259139a145cbb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8224,6 +8224,7 @@ void kvm_arch_exit(void)
 	cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
 #ifdef CONFIG_X86_64
 	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+	cancel_work_sync(&pvclock_gtod_work);
 #endif
 	kvm_x86_ops.hardware_enable = NULL;
 	kvm_mmu_module_exit();
-- 
GitLab


From 3f804f6d201ca93adf4c3df04d1bfd152c1129d6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 6 May 2021 15:21:37 +0200
Subject: [PATCH 0112/3804] KVM: x86: Prevent deadlock against tk_core.seq

syzbot reported a possible deadlock in pvclock_gtod_notify():

CPU 0  		  	   	    	    CPU 1
write_seqcount_begin(&tk_core.seq);
  pvclock_gtod_notify()			    spin_lock(&pool->lock);
    queue_work(..., &pvclock_gtod_work)	    ktime_get()
     spin_lock(&pool->lock);		      do {
     						seq = read_seqcount_begin(tk_core.seq)
						...
				              } while (read_seqcount_retry(&tk_core.seq, seq);

While this is unlikely to happen, it's possible.

Delegate queue_work() to irq_work() which postpones it until the
tk_core.seq write held region is left and interrupts are reenabled.

Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes")
Reported-by: syzbot+6beae4000559d41d80f8@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Message-Id: <87h7jgm1zy.ffs@nanos.tec.linutronix.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 259139a145cbb..5bd550eaf6833 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8094,6 +8094,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
 
+/*
+ * Indirection to move queue_work() out of the tk_core.seq write held
+ * region to prevent possible deadlocks against time accessors which
+ * are invoked with work related locks held.
+ */
+static void pvclock_irq_work_fn(struct irq_work *w)
+{
+	queue_work(system_long_wq, &pvclock_gtod_work);
+}
+
+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
+
 /*
  * Notification about pvclock gtod data update.
  */
@@ -8105,13 +8117,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
 
 	update_pvclock_gtod(tk);
 
-	/* disable master clock if host does not trust, or does not
-	 * use, TSC based clocksource.
+	/*
+	 * Disable master clock if host does not trust, or does not use,
+	 * TSC based clocksource. Delegate queue_work() to irq_work as
+	 * this is invoked with tk_core.seq write held.
 	 */
 	if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
 	    atomic_read(&kvm_guest_has_master_clock) != 0)
-		queue_work(system_long_wq, &pvclock_gtod_work);
-
+		irq_work_queue(&pvclock_irq_work);
 	return 0;
 }
 
@@ -8224,6 +8237,7 @@ void kvm_arch_exit(void)
 	cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
 #ifdef CONFIG_X86_64
 	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+	irq_work_sync(&pvclock_irq_work);
 	cancel_work_sync(&pvclock_gtod_work);
 #endif
 	kvm_x86_ops.hardware_enable = NULL;
-- 
GitLab


From b26990987ffce0525abbd84b36595869cfdbbfe6 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.ibm.com>
Date: Thu, 6 May 2021 16:03:52 +0200
Subject: [PATCH 0113/3804] tools/kvm_stat: Fix documentation typo

Makes the dash in front of option '-z' disappear in the generated
man-page.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Message-Id: <20210506140352.4178789-1-raspl@linux.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index feaf46451e838..3a9f2037bd23f 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -111,7 +111,7 @@ OPTIONS
 --tracepoints::
         retrieve statistics from tracepoints
 
-*z*::
+-z::
 --skip-zero-records::
         omit records with all zeros in logging mode
 
-- 
GitLab


From 258785ef08b323bddd844b4926a32c2b2045a1b0 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Thu, 6 May 2021 15:24:43 +0000
Subject: [PATCH 0114/3804] kvm: Cap halt polling at kvm->max_halt_poll_ns

When growing halt-polling, there is no check that the poll time exceeds
the per-VM limit. It's possible for vcpu->halt_poll_ns to grow past
kvm->max_halt_poll_ns and stay there until a halt which takes longer
than kvm->halt_poll_ns.

Signed-off-by: David Matlack <dmatlack@google.com>
Signed-off-by: Venkatesh Srinivas <venkateshs@chromium.org>
Message-Id: <20210506152442.4010298-1-venkateshs@chromium.org>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b9f12da6af0ea..6b4feb92dc797 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2893,8 +2893,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 	if (val < grow_start)
 		val = grow_start;
 
-	if (val > halt_poll_ns)
-		val = halt_poll_ns;
+	if (val > vcpu->kvm->max_halt_poll_ns)
+		val = vcpu->kvm->max_halt_poll_ns;
 
 	vcpu->halt_poll_ns = val;
 out:
-- 
GitLab


From 368340a3c7d9a207bfe544721d464b7109be8eae Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 May 2021 16:15:42 -0700
Subject: [PATCH 0115/3804] KVM: SVM: Invert user pointer casting in SEV
 {en,de}crypt helpers

Invert the user pointer params for SEV's helpers for encrypting and
decrypting guest memory so that they take a pointer and cast to an
unsigned long as necessary, as opposed to doing the opposite.  Tagging a
non-pointer as __user is confusing and weird since a cast of some form
needs to occur to actually access the user data.  This also fixes Sparse
warnings triggered by directly consuming the unsigned longs, which are
"noderef" due to the __user tag.

Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Ashish Kalra <ashish.kalra@amd.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210506231542.2331138-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 8b11c711a0e40..eb241c1a4add3 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -763,7 +763,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
 }
 
 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
-				  unsigned long __user dst_uaddr,
+				  void __user *dst_uaddr,
 				  unsigned long dst_paddr,
 				  int size, int *err)
 {
@@ -787,8 +787,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
 
 	if (tpage) {
 		offset = paddr & 15;
-		if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
-				 page_address(tpage) + offset, size))
+		if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
 			ret = -EFAULT;
 	}
 
@@ -800,9 +799,9 @@ e_free:
 }
 
 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
-				  unsigned long __user vaddr,
+				  void __user *vaddr,
 				  unsigned long dst_paddr,
-				  unsigned long __user dst_vaddr,
+				  void __user *dst_vaddr,
 				  int size, int *error)
 {
 	struct page *src_tpage = NULL;
@@ -810,13 +809,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
 	int ret, len = size;
 
 	/* If source buffer is not aligned then use an intermediate buffer */
-	if (!IS_ALIGNED(vaddr, 16)) {
+	if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
 		src_tpage = alloc_page(GFP_KERNEL);
 		if (!src_tpage)
 			return -ENOMEM;
 
-		if (copy_from_user(page_address(src_tpage),
-				(void __user *)(uintptr_t)vaddr, size)) {
+		if (copy_from_user(page_address(src_tpage), vaddr, size)) {
 			__free_page(src_tpage);
 			return -EFAULT;
 		}
@@ -830,7 +828,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
 	 *   - copy the source buffer in an intermediate buffer
 	 *   - use the intermediate buffer as source buffer
 	 */
-	if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+	if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
 		int dst_offset;
 
 		dst_tpage = alloc_page(GFP_KERNEL);
@@ -855,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
 			       page_address(src_tpage), size);
 		else {
 			if (copy_from_user(page_address(dst_tpage) + dst_offset,
-					   (void __user *)(uintptr_t)vaddr, size)) {
+					   vaddr, size)) {
 				ret = -EFAULT;
 				goto e_free;
 			}
@@ -935,15 +933,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
 		if (dec)
 			ret = __sev_dbg_decrypt_user(kvm,
 						     __sme_page_pa(src_p[0]) + s_off,
-						     dst_vaddr,
+						     (void __user *)dst_vaddr,
 						     __sme_page_pa(dst_p[0]) + d_off,
 						     len, &argp->error);
 		else
 			ret = __sev_dbg_encrypt_user(kvm,
 						     __sme_page_pa(src_p[0]) + s_off,
-						     vaddr,
+						     (void __user *)vaddr,
 						     __sme_page_pa(dst_p[0]) + d_off,
-						     dst_vaddr,
+						     (void __user *)dst_vaddr,
 						     len, &argp->error);
 
 		sev_unpin_memory(kvm, src_p, n);
-- 
GitLab


From ce7ea0cfdc2e9ff31d12da31c3226deddb9644f5 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Thu, 6 May 2021 15:14:41 -0500
Subject: [PATCH 0116/3804] KVM: SVM: Move GHCB unmapping to fix RCU warning

When an SEV-ES guest is running, the GHCB is unmapped as part of the
vCPU run support. However, kvm_vcpu_unmap() triggers an RCU dereference
warning with CONFIG_PROVE_LOCKING=y because the SRCU lock is released
before invoking the vCPU run support.

Move the GHCB unmapping into the prepare_guest_switch callback, which is
invoked while still holding the SRCU lock, eliminating the RCU dereference
warning.

Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT")
Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <b2f9b79d15166f2c3e4375c0d9bc3268b7696455.1620332081.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 5 +----
 arch/x86/kvm/svm/svm.c | 3 +++
 arch/x86/kvm/svm/svm.h | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index eb241c1a4add3..5bc887e9a9860 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2197,7 +2197,7 @@ vmgexit_err:
 	return -EINVAL;
 }
 
-static void pre_sev_es_run(struct vcpu_svm *svm)
+void sev_es_unmap_ghcb(struct vcpu_svm *svm)
 {
 	if (!svm->ghcb)
 		return;
@@ -2233,9 +2233,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
 	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
 	int asid = sev_get_asid(svm->vcpu.kvm);
 
-	/* Perform any SEV-ES pre-run actions */
-	pre_sev_es_run(svm);
-
 	/* Assign the asid allocated with this SEV guest */
 	svm->asid = asid;
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8124f51e9488f..4dd9b7856e5b1 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1437,6 +1437,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
 
+	if (sev_es_guest(vcpu->kvm))
+		sev_es_unmap_ghcb(svm);
+
 	if (svm->guest_state_loaded)
 		return;
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 84b3133c2251d..e44567ceb8655 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -581,6 +581,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
 void sev_es_create_vcpu(struct vcpu_svm *svm);
 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
+void sev_es_unmap_ghcb(struct vcpu_svm *svm);
 
 /* vmenter.S */
 
-- 
GitLab


From db8e712e06874e37a1fdb9bb011618811fc96dbd Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 7 May 2021 12:09:03 +0300
Subject: [PATCH 0117/3804] bus: ti-sysc: Fix missing quirk flags for sata

Naresh Kamboju <naresh.kamboju@linaro.org> reported that Beaglebone-X15
does not detect sata drives any longer after dra7 was flipped to boot with
device tree data only. Turns out we are now missing the sata related quirk
flags in ti-sysc that we used to have earlier.

Fixes: 98feab31ac49 ("ARM: OMAP2+: Drop legacy platform data for dra7 sata")
Fixes: 21206c8f2cb5 ("ARM: OMAP2+: Drop legacy platform data for omap5 sata")
Link: https://lore.kernel.org/regressions/CA+G9fYtTN6ug3eBAW3wMcDeESUo+ebj7L5HBe5_fj4uqDExFQg@mail.gmail.com/
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 8880259b41ae3..b3e7a6e8de716 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -1459,6 +1459,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
 	SYSC_QUIRK("tptc", 0, 0, -ENODEV, -ENODEV, 0x40007c00, 0xffffffff,
 		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
+	SYSC_QUIRK("sata", 0, 0xfc, 0x1100, -ENODEV, 0x5e412000, 0xffffffff,
+		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
 	SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, 0x14, 0x50700100, 0xffffffff,
 		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
 	SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, -ENODEV, 0x50700101, 0xffffffff,
@@ -1524,7 +1526,6 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("prcm", 0, 0, -ENODEV, -ENODEV, 0x40000400, 0xffffffff, 0),
 	SYSC_QUIRK("rfbi", 0x4832a800, 0, 0x10, 0x14, 0x00000010, 0xffffffff, 0),
 	SYSC_QUIRK("rfbi", 0x58002000, 0, 0x10, 0x14, 0x00000010, 0xffffffff, 0),
-	SYSC_QUIRK("sata", 0, 0xfc, 0x1100, -ENODEV, 0x5e412000, 0xffffffff, 0),
 	SYSC_QUIRK("scm", 0, 0, 0x10, -ENODEV, 0x40000900, 0xffffffff, 0),
 	SYSC_QUIRK("scm", 0, 0, -ENODEV, -ENODEV, 0x4e8b0100, 0xffffffff, 0),
 	SYSC_QUIRK("scm", 0, 0, -ENODEV, -ENODEV, 0x4f000100, 0xffffffff, 0),
-- 
GitLab


From ca66a6770bd9d6d99e469debd1c7363ac455daf9 Mon Sep 17 00:00:00 2001
From: Johnny Chuang <johnny.chuang.emc@gmail.com>
Date: Tue, 13 Apr 2021 09:20:50 +0800
Subject: [PATCH 0118/3804] HID: i2c-hid: Skip ELAN power-on command after
 reset

For ELAN touchscreen, we found our boot code of IC was not flexible enough
to receive and handle this command.
Once the FW main code of our controller is crashed for some reason,
the controller could not be enumerated successfully to be recognized
by the system host. therefore, it lost touch functionality.

Add quirk for skip send power-on command after reset.
It will impact to ELAN touchscreen and touchpad on HID over I2C projects.

Fixes: 43b7029f475e ("HID: i2c-hid: Send power-on command after reset").

Cc: stable@vger.kernel.org
Signed-off-by: Johnny Chuang <johnny.chuang.emc@gmail.com>
Reviewed-by: Harry Cutts <hcutts@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Tested-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/i2c-hid/i2c-hid-core.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index 9993133989a58..ce91b1e57876d 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -45,6 +45,7 @@
 #define I2C_HID_QUIRK_BOGUS_IRQ			BIT(4)
 #define I2C_HID_QUIRK_RESET_ON_RESUME		BIT(5)
 #define I2C_HID_QUIRK_BAD_INPUT_SIZE		BIT(6)
+#define I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET	BIT(7)
 
 
 /* flags */
@@ -178,6 +179,11 @@ static const struct i2c_hid_quirks {
 		 I2C_HID_QUIRK_RESET_ON_RESUME },
 	{ USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720,
 		I2C_HID_QUIRK_BAD_INPUT_SIZE },
+	/*
+	 * Sending the wakeup after reset actually break ELAN touchscreen controller
+	 */
+	{ USB_VENDOR_ID_ELAN, HID_ANY_ID,
+		 I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET },
 	{ 0, 0 }
 };
 
@@ -461,7 +467,8 @@ static int i2c_hid_hwreset(struct i2c_client *client)
 	}
 
 	/* At least some SIS devices need this after reset */
-	ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
+	if (!(ihid->quirks & I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET))
+		ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
 
 out_unlock:
 	mutex_unlock(&ihid->reset_lock);
-- 
GitLab


From 698ab77aebffe08b312fbcdddeb0e8bd08b78717 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 28 Apr 2021 15:03:12 -0400
Subject: [PATCH 0119/3804] dax: Add an enum for specifying dax wakup mode

Dan mentioned that he is not very fond of passing around a boolean true/false
to specify if only next waiter should be woken up or all waiters should be
woken up. He instead prefers that we introduce an enum and make it very
explicity at the callsite itself. Easier to read code.

This patch should not introduce any change of behavior.

Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Link: https://lore.kernel.org/r/20210428190314.1865312-2-vgoyal@redhat.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index b3d27fdc67752..5ecee51c44ee7 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -144,6 +144,16 @@ struct wait_exceptional_entry_queue {
 	struct exceptional_entry_key key;
 };
 
+/**
+ * enum dax_wake_mode: waitqueue wakeup behaviour
+ * @WAKE_ALL: wake all waiters in the waitqueue
+ * @WAKE_NEXT: wake only the first waiter in the waitqueue
+ */
+enum dax_wake_mode {
+	WAKE_ALL,
+	WAKE_NEXT,
+};
+
 static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
 		void *entry, struct exceptional_entry_key *key)
 {
@@ -182,7 +192,8 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
  * The important information it's conveying is whether the entry at
  * this index used to be a PMD entry.
  */
-static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
+static void dax_wake_entry(struct xa_state *xas, void *entry,
+			   enum dax_wake_mode mode)
 {
 	struct exceptional_entry_key key;
 	wait_queue_head_t *wq;
@@ -196,7 +207,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
 	 * must be in the waitqueue and the following check will see them.
 	 */
 	if (waitqueue_active(wq))
-		__wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
+		__wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key);
 }
 
 /*
@@ -268,7 +279,7 @@ static void put_unlocked_entry(struct xa_state *xas, void *entry)
 {
 	/* If we were the only waiter woken, wake the next one */
 	if (entry && !dax_is_conflict(entry))
-		dax_wake_entry(xas, entry, false);
+		dax_wake_entry(xas, entry, WAKE_NEXT);
 }
 
 /*
@@ -286,7 +297,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry)
 	old = xas_store(xas, entry);
 	xas_unlock_irq(xas);
 	BUG_ON(!dax_is_locked(old));
-	dax_wake_entry(xas, entry, false);
+	dax_wake_entry(xas, entry, WAKE_NEXT);
 }
 
 /*
@@ -524,7 +535,7 @@ retry:
 
 		dax_disassociate_entry(entry, mapping, false);
 		xas_store(xas, NULL);	/* undo the PMD join */
-		dax_wake_entry(xas, entry, true);
+		dax_wake_entry(xas, entry, WAKE_ALL);
 		mapping->nrexceptional--;
 		entry = NULL;
 		xas_set(xas, index);
@@ -937,7 +948,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
 	xas_lock_irq(xas);
 	xas_store(xas, entry);
 	xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
-	dax_wake_entry(xas, entry, false);
+	dax_wake_entry(xas, entry, WAKE_NEXT);
 
 	trace_dax_writeback_one(mapping->host, index, count);
 	return ret;
-- 
GitLab


From 4c3d043d271d4d629aa2328796cdfc96b37d3b3c Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 28 Apr 2021 15:03:13 -0400
Subject: [PATCH 0120/3804] dax: Add a wakeup mode parameter to
 put_unlocked_entry()

As of now put_unlocked_entry() always wakes up next waiter. In next
patches we want to wake up all waiters at one callsite. Hence, add a
parameter to the function.

This patch does not introduce any change of behavior.

Reviewed-by: Greg Kurz <groug@kaod.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Link: https://lore.kernel.org/r/20210428190314.1865312-3-vgoyal@redhat.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 5ecee51c44ee7..56eb1c759ca5c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -275,11 +275,11 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
 	finish_wait(wq, &ewait.wait);
 }
 
-static void put_unlocked_entry(struct xa_state *xas, void *entry)
+static void put_unlocked_entry(struct xa_state *xas, void *entry,
+			       enum dax_wake_mode mode)
 {
-	/* If we were the only waiter woken, wake the next one */
 	if (entry && !dax_is_conflict(entry))
-		dax_wake_entry(xas, entry, WAKE_NEXT);
+		dax_wake_entry(xas, entry, mode);
 }
 
 /*
@@ -633,7 +633,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
 			entry = get_unlocked_entry(&xas, 0);
 		if (entry)
 			page = dax_busy_page(entry);
-		put_unlocked_entry(&xas, entry);
+		put_unlocked_entry(&xas, entry, WAKE_NEXT);
 		if (page)
 			break;
 		if (++scanned % XA_CHECK_SCHED)
@@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping,
 	mapping->nrexceptional--;
 	ret = 1;
 out:
-	put_unlocked_entry(&xas, entry);
+	put_unlocked_entry(&xas, entry, WAKE_NEXT);
 	xas_unlock_irq(&xas);
 	return ret;
 }
@@ -954,7 +954,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
 	return ret;
 
  put_unlocked:
-	put_unlocked_entry(xas, entry);
+	put_unlocked_entry(xas, entry, WAKE_NEXT);
 	return ret;
 }
 
@@ -1695,7 +1695,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
 	/* Did we race with someone splitting entry or so? */
 	if (!entry || dax_is_conflict(entry) ||
 	    (order == 0 && !dax_is_pte_entry(entry))) {
-		put_unlocked_entry(&xas, entry);
+		put_unlocked_entry(&xas, entry, WAKE_NEXT);
 		xas_unlock_irq(&xas);
 		trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
 						      VM_FAULT_NOPAGE);
-- 
GitLab


From 237388320deffde7c2d65ed8fc9eef670dc979b3 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 28 Apr 2021 15:03:14 -0400
Subject: [PATCH 0121/3804] dax: Wake up all waiters after invalidating dax
 entry

I am seeing missed wakeups which ultimately lead to a deadlock when I am
using virtiofs with DAX enabled and running "make -j". I had to mount
virtiofs as rootfs and also reduce to dax window size to 256M to reproduce
the problem consistently.

So here is the problem. put_unlocked_entry() wakes up waiters only
if entry is not null as well as !dax_is_conflict(entry). But if I
call multiple instances of invalidate_inode_pages2() in parallel,
then I can run into a situation where there are waiters on
this index but nobody will wake these waiters.

invalidate_inode_pages2()
  invalidate_inode_pages2_range()
    invalidate_exceptional_entry2()
      dax_invalidate_mapping_entry_sync()
        __dax_invalidate_entry() {
                xas_lock_irq(&xas);
                entry = get_unlocked_entry(&xas, 0);
                ...
                ...
                dax_disassociate_entry(entry, mapping, trunc);
                xas_store(&xas, NULL);
                ...
                ...
                put_unlocked_entry(&xas, entry);
                xas_unlock_irq(&xas);
        }

Say a fault in in progress and it has locked entry at offset say "0x1c".
Now say three instances of invalidate_inode_pages2() are in progress
(A, B, C) and they all try to invalidate entry at offset "0x1c". Given
dax entry is locked, all tree instances A, B, C will wait in wait queue.

When dax fault finishes, say A is woken up. It will store NULL entry
at index "0x1c" and wake up B. When B comes along it will find "entry=0"
at page offset 0x1c and it will call put_unlocked_entry(&xas, 0). And
this means put_unlocked_entry() will not wake up next waiter, given
the current code. And that means C continues to wait and is not woken
up.

This patch fixes the issue by waking up all waiters when a dax entry
has been invalidated. This seems to fix the deadlock I am facing
and I can make forward progress.

Reported-by: Sergio Lopez <slp@redhat.com>
Fixes: ac401cc78242 ("dax: New fault locking")
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Link: https://lore.kernel.org/r/20210428190314.1865312-4-vgoyal@redhat.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index 56eb1c759ca5c..df5485b4bddf1 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping,
 	mapping->nrexceptional--;
 	ret = 1;
 out:
-	put_unlocked_entry(&xas, entry, WAKE_NEXT);
+	put_unlocked_entry(&xas, entry, WAKE_ALL);
 	xas_unlock_irq(&xas);
 	return ret;
 }
-- 
GitLab


From 285c0faddcebdf360412fc9ef9cde63cf98da7f6 Mon Sep 17 00:00:00 2001
From: Bharat Jauhari <bjauhari@habana.ai>
Date: Thu, 25 Mar 2021 18:15:40 +0200
Subject: [PATCH 0122/3804] habanalabs: expose ASIC specific PLL index

Currently the user cannot interpret the PLL information based on index
as its exposed as an integer.

This commit exposes ASIC specific PLL indexes and maps it to a generic
FW compatible index.

Signed-off-by: Bharat Jauhari <bjauhari@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 34 +++++++-----
 drivers/misc/habanalabs/common/habanalabs.h  | 16 +++---
 drivers/misc/habanalabs/common/sysfs.c       |  4 +-
 drivers/misc/habanalabs/gaudi/gaudi.c        | 55 +++++++-------------
 drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c  | 12 ++---
 drivers/misc/habanalabs/goya/goya.c          | 47 +++++++----------
 drivers/misc/habanalabs/goya/goya_hwmgr.c    | 40 +++++++-------
 include/uapi/misc/habanalabs.h               | 33 ++++++++++++
 8 files changed, 127 insertions(+), 114 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 832dd5c5bb065..7cf82da67dabf 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -661,18 +661,13 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
 	return rc;
 }
 
-int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
+int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
 						enum pll_index *pll_index)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u8 pll_byte, pll_bit_off;
 	bool dynamic_pll;
-
-	if (input_pll_index >= PLL_MAX) {
-		dev_err(hdev->dev, "PLL index %d is out of range\n",
-							input_pll_index);
-		return -EINVAL;
-	}
+	int fw_pll_idx;
 
 	dynamic_pll = prop->fw_security_status_valid &&
 		(prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN);
@@ -680,28 +675,39 @@ int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
 	if (!dynamic_pll) {
 		/*
 		 * in case we are working with legacy FW (each asic has unique
-		 * PLL numbering) extract the legacy numbering
+		 * PLL numbering) use the driver based index as they are
+		 * aligned with fw legacy numbering
 		 */
-		*pll_index = hdev->legacy_pll_map[input_pll_index];
+		*pll_index = input_pll_index;
 		return 0;
 	}
 
+	/* retrieve a FW compatible PLL index based on
+	 * ASIC specific user request
+	 */
+	fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index);
+	if (fw_pll_idx < 0) {
+		dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n",
+			input_pll_index, fw_pll_idx);
+		return -EINVAL;
+	}
+
 	/* PLL map is a u8 array */
-	pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3];
-	pll_bit_off = input_pll_index & 0x7;
+	pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3];
+	pll_bit_off = fw_pll_idx & 0x7;
 
 	if (!(pll_byte & BIT(pll_bit_off))) {
 		dev_err(hdev->dev, "PLL index %d is not supported\n",
-							input_pll_index);
+			fw_pll_idx);
 		return -EINVAL;
 	}
 
-	*pll_index = input_pll_index;
+	*pll_index = fw_pll_idx;
 
 	return 0;
 }
 
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
 		u16 *pll_freq_arr)
 {
 	struct cpucp_packet pkt;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 44e89da30b4a7..91291a8e201ee 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -930,6 +930,9 @@ enum div_select_defs {
  *                         driver is ready to receive asynchronous events. This
  *                         function should be called during the first init and
  *                         after every hard-reset of the device
+ * @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event
+ * @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to
+ *                         generic f/w compatible PLL Indexes
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -1054,6 +1057,7 @@ struct hl_asic_funcs {
 			u32 block_id, u32 block_size);
 	void (*enable_events_from_fw)(struct hl_device *hdev);
 	void (*get_msi_info)(u32 *table);
+	int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
 };
 
 
@@ -1950,8 +1954,6 @@ struct hl_mmu_funcs {
  * @aggregated_cs_counters: aggregated cs counters among all contexts
  * @mmu_priv: device-specific MMU data.
  * @mmu_func: device-related MMU functions.
- * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and
- *                  static (asic specific) PLL indexes.
  * @dram_used_mem: current DRAM memory consumption.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
@@ -2071,8 +2073,6 @@ struct hl_device {
 	struct hl_mmu_priv		mmu_priv;
 	struct hl_mmu_funcs		mmu_func[MMU_NUM_PGT_LOCATIONS];
 
-	enum pll_index			*legacy_pll_map;
-
 	atomic64_t			dram_used_mem;
 	u64				timeout_jiffies;
 	u64				max_power;
@@ -2387,9 +2387,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
 			u64 *total_energy);
-int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index,
+int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index,
 						enum pll_index *pll_index);
-int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index,
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index,
 		u16 *pll_freq_arr);
 int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
@@ -2411,9 +2411,9 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
 int hl_pci_init(struct hl_device *hdev);
 void hl_pci_fini(struct hl_device *hdev);
 
-long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index,
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
 								bool curr);
-void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index,
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
 								u64 freq);
 int hl_get_temperature(struct hl_device *hdev,
 		       int sensor_index, u32 attr, long *value);
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 9fa61573a89de..c9f649b31e3a9 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -9,7 +9,7 @@
 
 #include <linux/pci.h>
 
-long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index,
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
 								bool curr)
 {
 	struct cpucp_packet pkt;
@@ -44,7 +44,7 @@ long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index,
 	return (long) result;
 }
 
-void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index,
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
 								u64 freq)
 {
 	struct cpucp_packet pkt;
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index b751652f80a8c..81155f06c126e 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -105,36 +105,6 @@
 
 #define GAUDI_PLL_MAX 10
 
-/*
- * this enum kept here for compatibility with old FW (in which each asic has
- * unique PLL numbering
- */
-enum gaudi_pll_index {
-	GAUDI_CPU_PLL = 0,
-	GAUDI_PCI_PLL,
-	GAUDI_SRAM_PLL,
-	GAUDI_HBM_PLL,
-	GAUDI_NIC_PLL,
-	GAUDI_DMA_PLL,
-	GAUDI_MESH_PLL,
-	GAUDI_MME_PLL,
-	GAUDI_TPC_PLL,
-	GAUDI_IF_PLL,
-};
-
-static enum pll_index gaudi_pll_map[PLL_MAX] = {
-	[CPU_PLL] = GAUDI_CPU_PLL,
-	[PCI_PLL] = GAUDI_PCI_PLL,
-	[SRAM_PLL] = GAUDI_SRAM_PLL,
-	[HBM_PLL] = GAUDI_HBM_PLL,
-	[NIC_PLL] = GAUDI_NIC_PLL,
-	[DMA_PLL] = GAUDI_DMA_PLL,
-	[MESH_PLL] = GAUDI_MESH_PLL,
-	[MME_PLL] = GAUDI_MME_PLL,
-	[TPC_PLL] = GAUDI_TPC_PLL,
-	[IF_PLL] = GAUDI_IF_PLL,
-};
-
 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
 		"gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
 		"gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -810,7 +780,7 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 			freq = 0;
 		}
 	} else {
-		rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr);
+		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
 
 		if (rc)
 			return rc;
@@ -1652,9 +1622,6 @@ static int gaudi_sw_init(struct hl_device *hdev)
 
 	hdev->asic_specific = gaudi;
 
-	/* store legacy PLL map */
-	hdev->legacy_pll_map = gaudi_pll_map;
-
 	/* Create DMA pool for small allocations */
 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
@@ -8783,6 +8750,23 @@ static void gaudi_enable_events_from_fw(struct hl_device *hdev)
 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
 }
 
+static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
+{
+	switch (pll_idx) {
+	case HL_GAUDI_CPU_PLL: return CPU_PLL;
+	case HL_GAUDI_PCI_PLL: return PCI_PLL;
+	case HL_GAUDI_NIC_PLL: return NIC_PLL;
+	case HL_GAUDI_DMA_PLL: return DMA_PLL;
+	case HL_GAUDI_MESH_PLL: return MESH_PLL;
+	case HL_GAUDI_MME_PLL: return MME_PLL;
+	case HL_GAUDI_TPC_PLL: return TPC_PLL;
+	case HL_GAUDI_IF_PLL: return IF_PLL;
+	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
+	case HL_GAUDI_HBM_PLL: return HBM_PLL;
+	default: return -EINVAL;
+	}
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
 	.early_init = gaudi_early_init,
 	.early_fini = gaudi_early_fini,
@@ -8866,7 +8850,8 @@ static const struct hl_asic_funcs gaudi_funcs = {
 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
 	.get_hw_block_id = gaudi_get_hw_block_id,
 	.hw_block_mmap = gaudi_block_mmap,
-	.enable_events_from_fw = gaudi_enable_events_from_fw
+	.enable_events_from_fw = gaudi_enable_events_from_fw,
+	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx
 };
 
 /**
diff --git a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
index 8c49da4bcbd58..9b60eadd4c355 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c
@@ -13,7 +13,7 @@ void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 	struct gaudi_device *gaudi = hdev->asic_specific;
 
 	if (freq == PLL_LAST)
-		hl_set_frequency(hdev, MME_PLL, gaudi->max_freq_value);
+		hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
 }
 
 int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
@@ -23,7 +23,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, false);
+	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
 
 	if (value < 0) {
 		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
@@ -33,7 +33,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 
 	*max_clk = (value / 1000 / 1000);
 
-	value = hl_get_frequency(hdev, MME_PLL, true);
+	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
 
 	if (value < 0) {
 		dev_err(hdev->dev,
@@ -57,7 +57,7 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, false);
+	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false);
 
 	gaudi->max_freq_value = value;
 
@@ -85,7 +85,7 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
 
 	gaudi->max_freq_value = value * 1000 * 1000;
 
-	hl_set_frequency(hdev, MME_PLL, gaudi->max_freq_value);
+	hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
 
 fail:
 	return count;
@@ -100,7 +100,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, true);
+	value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true);
 
 	return sprintf(buf, "%lu\n", (value / 1000 / 1000));
 }
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index e27338f4aad2f..e0ad2a269779b 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -118,30 +118,6 @@
 #define IS_MME_IDLE(mme_arch_sts) \
 	(((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
 
-/*
- * this enum kept here for compatibility with old FW (in which each asic has
- * unique PLL numbering
- */
-enum goya_pll_index {
-	GOYA_CPU_PLL = 0,
-	GOYA_IC_PLL,
-	GOYA_MC_PLL,
-	GOYA_MME_PLL,
-	GOYA_PCI_PLL,
-	GOYA_EMMC_PLL,
-	GOYA_TPC_PLL,
-};
-
-static enum pll_index goya_pll_map[PLL_MAX] = {
-	[CPU_PLL] = GOYA_CPU_PLL,
-	[IC_PLL] = GOYA_IC_PLL,
-	[MC_PLL] = GOYA_MC_PLL,
-	[MME_PLL] = GOYA_MME_PLL,
-	[PCI_PLL] = GOYA_PCI_PLL,
-	[EMMC_PLL] = GOYA_EMMC_PLL,
-	[TPC_PLL] = GOYA_TPC_PLL,
-};
-
 static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
 		"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
 		"goya cq 4", "goya cpu eq"
@@ -775,7 +751,8 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
 			freq = 0;
 		}
 	} else {
-		rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr);
+		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL,
+				pll_freq_arr);
 
 		if (rc)
 			return;
@@ -897,9 +874,6 @@ static int goya_sw_init(struct hl_device *hdev)
 
 	hdev->asic_specific = goya;
 
-	/* store legacy PLL map */
-	hdev->legacy_pll_map = goya_pll_map;
-
 	/* Create DMA pool for small allocations */
 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
 			&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
@@ -5512,6 +5486,20 @@ static void goya_enable_events_from_fw(struct hl_device *hdev)
 			GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
 }
 
+static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
+{
+	switch (pll_idx) {
+	case HL_GOYA_CPU_PLL: return CPU_PLL;
+	case HL_GOYA_PCI_PLL: return PCI_PLL;
+	case HL_GOYA_MME_PLL: return MME_PLL;
+	case HL_GOYA_TPC_PLL: return TPC_PLL;
+	case HL_GOYA_IC_PLL: return IC_PLL;
+	case HL_GOYA_MC_PLL: return MC_PLL;
+	case HL_GOYA_EMMC_PLL: return EMMC_PLL;
+	default: return -EINVAL;
+	}
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -5595,7 +5583,8 @@ static const struct hl_asic_funcs goya_funcs = {
 	.ack_protection_bits_errors = goya_ack_protection_bits_errors,
 	.get_hw_block_id = goya_get_hw_block_id,
 	.hw_block_mmap = goya_block_mmap,
-	.enable_events_from_fw = goya_enable_events_from_fw
+	.enable_events_from_fw = goya_enable_events_from_fw,
+	.map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx
 };
 
 /*
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index 3acb36a1a902e..7d007125727ff 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -13,19 +13,19 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 
 	switch (freq) {
 	case PLL_HIGH:
-		hl_set_frequency(hdev, MME_PLL, hdev->high_pll);
-		hl_set_frequency(hdev, TPC_PLL, hdev->high_pll);
-		hl_set_frequency(hdev, IC_PLL, hdev->high_pll);
+		hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll);
+		hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll);
+		hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll);
 		break;
 	case PLL_LOW:
-		hl_set_frequency(hdev, MME_PLL, GOYA_PLL_FREQ_LOW);
-		hl_set_frequency(hdev, TPC_PLL, GOYA_PLL_FREQ_LOW);
-		hl_set_frequency(hdev, IC_PLL, GOYA_PLL_FREQ_LOW);
+		hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW);
+		hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW);
+		hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW);
 		break;
 	case PLL_LAST:
-		hl_set_frequency(hdev, MME_PLL, goya->mme_clk);
-		hl_set_frequency(hdev, TPC_PLL, goya->tpc_clk);
-		hl_set_frequency(hdev, IC_PLL, goya->ic_clk);
+		hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk);
+		hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk);
+		hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk);
 		break;
 	default:
 		dev_err(hdev->dev, "unknown frequency setting\n");
@@ -39,7 +39,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, false);
+	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
 
 	if (value < 0) {
 		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
@@ -49,7 +49,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
 
 	*max_clk = (value / 1000 / 1000);
 
-	value = hl_get_frequency(hdev, MME_PLL, true);
+	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
 
 	if (value < 0) {
 		dev_err(hdev->dev,
@@ -72,7 +72,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, false);
+	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
 
 	if (value < 0)
 		return value;
@@ -105,7 +105,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr,
 		goto fail;
 	}
 
-	hl_set_frequency(hdev, MME_PLL, value);
+	hl_set_frequency(hdev, HL_GOYA_MME_PLL, value);
 	goya->mme_clk = value;
 
 fail:
@@ -121,7 +121,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, TPC_PLL, false);
+	value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false);
 
 	if (value < 0)
 		return value;
@@ -154,7 +154,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr,
 		goto fail;
 	}
 
-	hl_set_frequency(hdev, TPC_PLL, value);
+	hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value);
 	goya->tpc_clk = value;
 
 fail:
@@ -170,7 +170,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, IC_PLL, false);
+	value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false);
 
 	if (value < 0)
 		return value;
@@ -203,7 +203,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr,
 		goto fail;
 	}
 
-	hl_set_frequency(hdev, IC_PLL, value);
+	hl_set_frequency(hdev, HL_GOYA_IC_PLL, value);
 	goya->ic_clk = value;
 
 fail:
@@ -219,7 +219,7 @@ static ssize_t mme_clk_curr_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, MME_PLL, true);
+	value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
 
 	if (value < 0)
 		return value;
@@ -236,7 +236,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, TPC_PLL, true);
+	value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true);
 
 	if (value < 0)
 		return value;
@@ -253,7 +253,7 @@ static ssize_t ic_clk_curr_show(struct device *dev,
 	if (!hl_device_operational(hdev, NULL))
 		return -ENODEV;
 
-	value = hl_get_frequency(hdev, IC_PLL, true);
+	value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true);
 
 	if (value < 0)
 		return value;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index d3e017b5f0dba..6d2d34c9f375f 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -239,6 +239,39 @@ enum gaudi_engine_id {
 	GAUDI_ENGINE_ID_SIZE
 };
 
+/*
+ * ASIC specific PLL index
+ *
+ * Used to retrieve in frequency info of different IPs via
+ * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be
+ * used as an index in struct hl_pll_frequency_info
+ */
+
+enum hl_goya_pll_index {
+	HL_GOYA_CPU_PLL = 0,
+	HL_GOYA_IC_PLL,
+	HL_GOYA_MC_PLL,
+	HL_GOYA_MME_PLL,
+	HL_GOYA_PCI_PLL,
+	HL_GOYA_EMMC_PLL,
+	HL_GOYA_TPC_PLL,
+	HL_GOYA_PLL_MAX
+};
+
+enum hl_gaudi_pll_index {
+	HL_GAUDI_CPU_PLL = 0,
+	HL_GAUDI_PCI_PLL,
+	HL_GAUDI_SRAM_PLL,
+	HL_GAUDI_HBM_PLL,
+	HL_GAUDI_NIC_PLL,
+	HL_GAUDI_DMA_PLL,
+	HL_GAUDI_MESH_PLL,
+	HL_GAUDI_MME_PLL,
+	HL_GAUDI_TPC_PLL,
+	HL_GAUDI_IF_PLL,
+	HL_GAUDI_PLL_MAX
+};
+
 enum hl_device_status {
 	HL_DEVICE_STATUS_OPERATIONAL,
 	HL_DEVICE_STATUS_IN_RESET,
-- 
GitLab


From 001d5f66c156f2c30b6bf85346de09de8db49b59 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Sun, 11 Apr 2021 21:06:05 +0300
Subject: [PATCH 0123/3804] habanalabs: skip reading f/w errors on bad status

If we read all FF from the boot status register, then something is
totally wrong and there is no point of reading specific errors.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 7cf82da67dabf..fff29f057b6d3 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -850,8 +850,13 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 	if (rc) {
 		dev_err(hdev->dev, "Failed to read preboot version\n");
 		detect_cpu_boot_status(hdev, status);
-		fw_read_errors(hdev, boot_err0_reg,
-				cpu_security_boot_status_reg);
+
+		/* If we read all FF, then something is totally wrong, no point
+		 * of reading specific errors
+		 */
+		if (status != -1)
+			fw_read_errors(hdev, boot_err0_reg,
+					cpu_security_boot_status_reg);
 		return -EIO;
 	}
 
-- 
GitLab


From b5fd82a7af198db04408e218f64dc3d4178d585a Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 12 Apr 2021 09:38:22 +0300
Subject: [PATCH 0124/3804] habanalabs: change error level of security not
 ready

This error indicates a problem in the security initialization inside
the f/w so we need to stop the device loading because it won't be
usable.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index fff29f057b6d3..377a7ca886feb 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -362,12 +362,9 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
 	}
 
 	if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) {
-		dev_warn(hdev->dev,
+		dev_err(hdev->dev,
 			"Device boot warning - security not ready\n");
-		/* This is a warning so we don't want it to disable the
-		 * device
-		 */
-		err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY;
+		err_exists = true;
 	}
 
 	if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) {
-- 
GitLab


From 27a9e35daad080f3770401a1a11eda2f9f7732dd Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Mon, 12 Apr 2021 09:52:05 +0300
Subject: [PATCH 0125/3804] habanalabs: ignore f/w status error

In case firmware has a bug and erroneously reports a status error
(e.g. device unusable) during boot, allow the user to tell the driver
to continue the boot regardless of the error status.

This will be done via kernel parameter which exposes a mask. The
user that loads the driver can decide exactly which status error to
ignore and which to take into account. The bitmask is according to
defines in hl_boot_if.h

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c    | 3 ++-
 drivers/misc/habanalabs/common/habanalabs.h     | 7 +++++++
 drivers/misc/habanalabs/common/habanalabs_drv.c | 7 +++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 377a7ca886feb..0713b2c12d54f 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -400,7 +400,8 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
 		err_exists = true;
 	}
 
-	if (err_exists)
+	if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) &
+				lower_32_bits(hdev->boot_error_status_mask)))
 		return -EIO;
 
 	return 0;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 91291a8e201ee..6579f8767abda 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1962,6 +1962,12 @@ struct hl_mmu_funcs {
  * @clock_gating_mask: is clock gating enabled. bitmask that represents the
  *                     different engines. See debugfs-driver-habanalabs for
  *                     details.
+ * @boot_error_status_mask: contains a mask of the device boot error status.
+ *                          Each bit represents a different error, according to
+ *                          the defines in hl_boot_if.h. If the bit is cleared,
+ *                          the error will be ignored by the driver during
+ *                          device initialization. Mainly used to debug and
+ *                          workaround firmware bugs
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
  * @card_type: Various ASICs have several card types. This indicates the card
@@ -2077,6 +2083,7 @@ struct hl_device {
 	u64				timeout_jiffies;
 	u64				max_power;
 	u64				clock_gating_mask;
+	u64				boot_error_status_mask;
 	atomic_t			in_reset;
 	enum hl_pll_frequency		curr_pll_profile;
 	enum cpucp_card_types		card_type;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 7135f1e038641..64d1530db9854 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -30,6 +30,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock);
 static int timeout_locked = 30;
 static int reset_on_lockup = 1;
 static int memory_scrub = 1;
+static ulong boot_error_status_mask = ULONG_MAX;
 
 module_param(timeout_locked, int, 0444);
 MODULE_PARM_DESC(timeout_locked,
@@ -43,6 +44,10 @@ module_param(memory_scrub, int, 0444);
 MODULE_PARM_DESC(memory_scrub,
 	"Scrub device memory in various states (0 = no, 1 = yes, default yes)");
 
+module_param(boot_error_status_mask, ulong, 0444);
+MODULE_PARM_DESC(boot_error_status_mask,
+	"Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)");
+
 #define PCI_VENDOR_ID_HABANALABS	0x1da3
 
 #define PCI_IDS_GOYA			0x0001
@@ -319,6 +324,8 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 	hdev->major = hl_major;
 	hdev->reset_on_lockup = reset_on_lockup;
 	hdev->memory_scrub = memory_scrub;
+	hdev->boot_error_status_mask = boot_error_status_mask;
+
 	hdev->pldm = 0;
 
 	set_driver_behavior_per_device(hdev);
-- 
GitLab


From 24a107097fbd8fb6a48a0dcb31e64c1de6831a1d Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 27 Apr 2021 17:49:25 +0300
Subject: [PATCH 0126/3804] habanalabs: wait for interrupt wrong timeout
 calculation

Wait for interrupt timeout calculation is wrong, hence timeout occurs
when user waits on an interrupt with certain timeout values.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/command_submission.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index ff8791a651fd1..af3c497defb10 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -2017,7 +2017,7 @@ wait_again:
 		if (completion_value >= target_value) {
 			*status = CS_WAIT_STATUS_COMPLETED;
 		} else {
-			timeout -= jiffies_to_usecs(completion_rc);
+			timeout = completion_rc;
 			goto wait_again;
 		}
 	} else {
-- 
GitLab


From 115726c5d312b462c9d9931ea42becdfa838a076 Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Mon, 26 Apr 2021 06:43:46 -0700
Subject: [PATCH 0127/3804] habanalabs/gaudi: Fix a potential use after free in
 gaudi_memset_device_memory

Our code analyzer reported a uaf.

In gaudi_memset_device_memory, cb is get via hl_cb_kernel_create()
with 2 refcount.
If hl_cs_allocate_job() failed, the execution runs into release_cb
branch. One ref of cb is dropped by hl_cb_put(cb) and could be freed
if other thread also drops one ref. Then cb is used by cb->id later,
which is a potential uaf.

My patch add a variable 'id' to accept the value of cb->id before the
hl_cb_put(cb) is called, to avoid the potential uaf.

Fixes: 423815bf02e25 ("habanalabs/gaudi: remove PCI access to SM block")
Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 81155f06c126e..9e4a6bb3acd11 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5579,6 +5579,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 	struct hl_cs_job *job;
 	u32 cb_size, ctl, err_cause;
 	struct hl_cb *cb;
+	u64 id;
 	int rc;
 
 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
@@ -5645,8 +5646,9 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
 	}
 
 release_cb:
+	id = cb->id;
 	hl_cb_put(cb);
-	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT);
 
 	return rc;
 }
-- 
GitLab


From a298232ee6b9a1d5d732aa497ff8be0d45b5bd82 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 7 May 2021 21:06:38 +0100
Subject: [PATCH 0128/3804] io_uring: fix link timeout refs

WARNING: CPU: 0 PID: 10242 at lib/refcount.c:28 refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
RIP: 0010:refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
Call Trace:
 __refcount_sub_and_test include/linux/refcount.h:283 [inline]
 __refcount_dec_and_test include/linux/refcount.h:315 [inline]
 refcount_dec_and_test include/linux/refcount.h:333 [inline]
 io_put_req fs/io_uring.c:2140 [inline]
 io_queue_linked_timeout fs/io_uring.c:6300 [inline]
 __io_queue_sqe+0xbef/0xec0 fs/io_uring.c:6354
 io_submit_sqe fs/io_uring.c:6534 [inline]
 io_submit_sqes+0x2bbd/0x7c50 fs/io_uring.c:6660
 __do_sys_io_uring_enter fs/io_uring.c:9240 [inline]
 __se_sys_io_uring_enter+0x256/0x1d60 fs/io_uring.c:9182

io_link_timeout_fn() should put only one reference of the linked timeout
request, however in case of racing with the master request's completion
first io_req_complete() puts one and then io_put_req_deferred() is
called.

Cc: stable@vger.kernel.org # 5.12+
Fixes: 9ae1f8dd372e0 ("io_uring: fix inconsistent lock state")
Reported-by: syzbot+a2910119328ce8e7996f@syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/ff51018ff29de5ffa76f09273ef48cb24c720368.1620417627.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f46acbbeed57c..9ac5e278a91e6 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6363,10 +6363,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
 	if (prev) {
 		io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
 		io_put_req_deferred(prev, 1);
+		io_put_req_deferred(req, 1);
 	} else {
 		io_req_complete_post(req, -ETIME, 0);
 	}
-	io_put_req_deferred(req, 1);
 	return HRTIMER_NORESTART;
 }
 
-- 
GitLab


From c1b55029493879f5bd585ff79f326e71f0bc05e3 Mon Sep 17 00:00:00 2001
From: Daniel Cordova A <danesc87@gmail.com>
Date: Fri, 7 May 2021 12:31:16 -0500
Subject: [PATCH 0129/3804] ALSA: hda: fixup headset for ASUS GU502 laptop

The GU502 requires a few steps to make headset i/o works properly:
pincfg, verbs to unmute headphone out and callback to toggle output
between speakers and headphone using jack.

Signed-off-by: Daniel Cordova A <danesc87@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210507173116.12043-1-danesc87@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 62 +++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a5f3e78ec04e7..b4b71609dff11 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6254,6 +6254,35 @@ static void alc294_fixup_gx502_hp(struct hda_codec *codec,
 	}
 }
 
+static void alc294_gu502_toggle_output(struct hda_codec *codec,
+				       struct hda_jack_callback *cb)
+{
+	/* Windows sets 0x10 to 0x8420 for Node 0x20 which is
+	 * responsible from changes between speakers and headphones
+	 */
+	if (snd_hda_jack_detect_state(codec, 0x21) == HDA_JACK_PRESENT)
+		alc_write_coef_idx(codec, 0x10, 0x8420);
+	else
+		alc_write_coef_idx(codec, 0x10, 0x0a20);
+}
+
+static void alc294_fixup_gu502_hp(struct hda_codec *codec,
+				  const struct hda_fixup *fix, int action)
+{
+	if (!is_jack_detectable(codec, 0x21))
+		return;
+
+	switch (action) {
+	case HDA_FIXUP_ACT_PRE_PROBE:
+		snd_hda_jack_detect_enable_callback(codec, 0x21,
+				alc294_gu502_toggle_output);
+		break;
+	case HDA_FIXUP_ACT_INIT:
+		alc294_gu502_toggle_output(codec, NULL);
+		break;
+	}
+}
+
 static void  alc285_fixup_hp_gpio_amp_init(struct hda_codec *codec,
 			      const struct hda_fixup *fix, int action)
 {
@@ -6471,6 +6500,9 @@ enum {
 	ALC294_FIXUP_ASUS_GX502_HP,
 	ALC294_FIXUP_ASUS_GX502_PINS,
 	ALC294_FIXUP_ASUS_GX502_VERBS,
+	ALC294_FIXUP_ASUS_GU502_HP,
+	ALC294_FIXUP_ASUS_GU502_PINS,
+	ALC294_FIXUP_ASUS_GU502_VERBS,
 	ALC285_FIXUP_HP_GPIO_LED,
 	ALC285_FIXUP_HP_MUTE_LED,
 	ALC236_FIXUP_HP_GPIO_LED,
@@ -7712,6 +7744,35 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc294_fixup_gx502_hp,
 	},
+	[ALC294_FIXUP_ASUS_GU502_PINS] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x19, 0x01a11050 }, /* rear HP mic */
+			{ 0x1a, 0x01a11830 }, /* rear external mic */
+			{ 0x21, 0x012110f0 }, /* rear HP out */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC294_FIXUP_ASUS_GU502_VERBS
+	},
+	[ALC294_FIXUP_ASUS_GU502_VERBS] = {
+		.type = HDA_FIXUP_VERBS,
+		.v.verbs = (const struct hda_verb[]) {
+			/* set 0x15 to HP-OUT ctrl */
+			{ 0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 },
+			/* unmute the 0x15 amp */
+			{ 0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000 },
+			/* set 0x1b to HP-OUT */
+			{ 0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC294_FIXUP_ASUS_GU502_HP
+	},
+	[ALC294_FIXUP_ASUS_GU502_HP] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc294_fixup_gu502_hp,
+	},
 	[ALC294_FIXUP_ASUS_COEF_1B] = {
 		.type = HDA_FIXUP_VERBS,
 		.v.verbs = (const struct hda_verb[]) {
@@ -8256,6 +8317,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
 	SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
+	SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
 	SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
 	SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401),
 	SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2),
-- 
GitLab


From 4eff124347191d1548eb4e14e20e77513dcbd0fe Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Sun, 9 May 2021 12:11:02 +0300
Subject: [PATCH 0130/3804] openrisc: mm/init.c: remove unused memblock_region
 variable in map_ram()

Kernel test robot reports:

cppcheck possible warnings: (new ones prefixed by >>, may not real problems)

>> arch/openrisc/mm/init.c:125:10: warning: Uninitialized variable: region [uninitvar]
            region->base, region->base + region->size);
            ^

Replace usage of memblock_region fields with 'start' and 'end' variables
that are initialized in for_each_mem_range() and remove the declaration of
region.

Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/mm/init.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index bf9b2310fc936..f3fa02b8838af 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -75,7 +75,6 @@ static void __init map_ram(void)
 	/* These mark extents of read-only kernel pages...
 	 * ...from vmlinux.lds.S
 	 */
-	struct memblock_region *region;
 
 	v = PAGE_OFFSET;
 
@@ -121,7 +120,7 @@ static void __init map_ram(void)
 		}
 
 		printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
-		       region->base, region->base + region->size);
+		       start, end);
 	}
 }
 
-- 
GitLab


From 371dcaee1ade4b1eefd541ae6ee048b5ce15b37c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Sun, 9 May 2021 12:11:03 +0300
Subject: [PATCH 0131/3804] openrisc: mm/init.c: remove unused variable 'end'
 in paging_init()

A build with W=1 enabled produces the following warning:

  CC      arch/openrisc/mm/init.o
arch/openrisc/mm/init.c: In function 'paging_init':
arch/openrisc/mm/init.c:131:16: warning: variable 'end' set but not used [-Wunused-but-set-variable]
  131 |  unsigned long end;
      |                ^~~

Remove the unused variable 'end'.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/mm/init.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index f3fa02b8838af..6e38ec96cab89 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -128,7 +128,6 @@ void __init paging_init(void)
 {
 	extern void tlb_init(void);
 
-	unsigned long end;
 	int i;
 
 	printk(KERN_INFO "Setting up paging and PTEs.\n");
@@ -144,8 +143,6 @@ void __init paging_init(void)
 	 */
 	current_pgd[smp_processor_id()] = init_mm.pgd;
 
-	end = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
-
 	map_ram();
 
 	zone_sizes_init();
-- 
GitLab


From e759959fe3b8313c81d6200be44cb8a644d845ea Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Tue, 27 Apr 2021 06:16:34 -0500
Subject: [PATCH 0132/3804] x86/sev-es: Rename sev-es.{ch} to sev.{ch}

SEV-SNP builds upon the SEV-ES functionality while adding new hardware
protection. Version 2 of the GHCB specification adds new NAE events that
are SEV-SNP specific. Rename the sev-es.{ch} to sev.{ch} so that all
SEV* functionality can be consolidated in one place.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lkml.kernel.org/r/20210427111636.1207-2-brijesh.singh@amd.com
---
 arch/x86/boot/compressed/Makefile                 | 6 +++---
 arch/x86/boot/compressed/{sev-es.c => sev.c}      | 4 ++--
 arch/x86/include/asm/{sev-es.h => sev.h}          | 0
 arch/x86/kernel/Makefile                          | 6 +++---
 arch/x86/kernel/head64.c                          | 2 +-
 arch/x86/kernel/nmi.c                             | 2 +-
 arch/x86/kernel/{sev-es-shared.c => sev-shared.c} | 0
 arch/x86/kernel/{sev-es.c => sev.c}               | 4 ++--
 arch/x86/mm/extable.c                             | 2 +-
 arch/x86/platform/efi/efi_64.c                    | 2 +-
 arch/x86/realmode/init.c                          | 2 +-
 11 files changed, 15 insertions(+), 15 deletions(-)
 rename arch/x86/boot/compressed/{sev-es.c => sev.c} (98%)
 rename arch/x86/include/asm/{sev-es.h => sev.h} (100%)
 rename arch/x86/kernel/{sev-es-shared.c => sev-shared.c} (100%)
 rename arch/x86/kernel/{sev-es.c => sev.c} (99%)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6e5522aebbbd4..2a2975236c9e3 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -48,10 +48,10 @@ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
 KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
 KBUILD_CFLAGS += $(CLANG_FLAGS)
 
-# sev-es.c indirectly inludes inat-table.h which is generated during
+# sev.c indirectly inludes inat-table.h which is generated during
 # compilation and stored in $(objtree). Add the directory to the includes so
 # that the compiler finds it even with out-of-tree builds (make O=/some/path).
-CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
+CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/
 
 KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
@@ -93,7 +93,7 @@ ifdef CONFIG_X86_64
 	vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
 	vmlinux-objs-y += $(obj)/mem_encrypt.o
 	vmlinux-objs-y += $(obj)/pgtable_64.o
-	vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o
+	vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
 endif
 
 vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev.c
similarity index 98%
rename from arch/x86/boot/compressed/sev-es.c
rename to arch/x86/boot/compressed/sev.c
index 82041bd380e56..670e998fe9306 100644
--- a/arch/x86/boot/compressed/sev-es.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -13,7 +13,7 @@
 #include "misc.h"
 
 #include <asm/pgtable_types.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 #include <asm/trapnr.h>
 #include <asm/trap_pf.h>
 #include <asm/msr-index.h>
@@ -117,7 +117,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
 #include "../../lib/insn.c"
 
 /* Include code for early handlers */
-#include "../../kernel/sev-es-shared.c"
+#include "../../kernel/sev-shared.c"
 
 static bool early_setup_sev_es(void)
 {
diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev.h
similarity index 100%
rename from arch/x86/include/asm/sev-es.h
rename to arch/x86/include/asm/sev.h
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0704c2a94272c..0f66682ac02a6 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -20,7 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg
 CFLAGS_REMOVE_ftrace.o = -pg
 CFLAGS_REMOVE_early_printk.o = -pg
 CFLAGS_REMOVE_head64.o = -pg
-CFLAGS_REMOVE_sev-es.o = -pg
+CFLAGS_REMOVE_sev.o = -pg
 endif
 
 KASAN_SANITIZE_head$(BITS).o				:= n
@@ -28,7 +28,7 @@ KASAN_SANITIZE_dumpstack.o				:= n
 KASAN_SANITIZE_dumpstack_$(BITS).o			:= n
 KASAN_SANITIZE_stacktrace.o				:= n
 KASAN_SANITIZE_paravirt.o				:= n
-KASAN_SANITIZE_sev-es.o					:= n
+KASAN_SANITIZE_sev.o					:= n
 
 # With some compiler versions the generated code results in boot hangs, caused
 # by several compilation units. To be safe, disable all instrumentation.
@@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC)		+= unwind_orc.o
 obj-$(CONFIG_UNWINDER_FRAME_POINTER)	+= unwind_frame.o
 obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
 
-obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev-es.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev.o
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 18be44163a50f..de01903c37355 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,7 +39,7 @@
 #include <asm/realmode.h>
 #include <asm/extable.h>
 #include <asm/trapnr.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 
 /*
  * Manage page tables very early on.
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 2ef961cf4cfc5..4bce802d25fb1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -33,7 +33,7 @@
 #include <asm/reboot.h>
 #include <asm/cache.h>
 #include <asm/nospec-branch.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/nmi.h>
diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-shared.c
similarity index 100%
rename from arch/x86/kernel/sev-es-shared.c
rename to arch/x86/kernel/sev-shared.c
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev.c
similarity index 99%
rename from arch/x86/kernel/sev-es.c
rename to arch/x86/kernel/sev.c
index 73873b0078380..9578c82832aa2 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev.c
@@ -22,7 +22,7 @@
 
 #include <asm/cpu_entry_area.h>
 #include <asm/stacktrace.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 #include <asm/insn-eval.h>
 #include <asm/fpu/internal.h>
 #include <asm/processor.h>
@@ -459,7 +459,7 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
 }
 
 /* Include code shared with pre-decompression boot stage */
-#include "sev-es-shared.c"
+#include "sev-shared.c"
 
 void noinstr __sev_es_nmi_complete(void)
 {
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index b93d6cd08a7ff..121921b2927cb 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -5,7 +5,7 @@
 #include <xen/xen.h>
 
 #include <asm/fpu/internal.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
 
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index df7b5477fc4f2..7515e78ef8983 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -47,7 +47,7 @@
 #include <asm/realmode.h>
 #include <asm/time.h>
 #include <asm/pgalloc.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 
 /*
  * We allocate runtime services regions top-down, starting from -4G, i.e.
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 1be71ef5e4c4e..2e1c1bec0f9e7 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -9,7 +9,7 @@
 #include <asm/realmode.h>
 #include <asm/tlbflush.h>
 #include <asm/crash.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
 
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
-- 
GitLab


From b81fc74d53d1248de6db3136dd6b29e5d5528021 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Tue, 27 Apr 2021 06:16:35 -0500
Subject: [PATCH 0133/3804] x86/sev: Move GHCB MSR protocol and NAE definitions
 in a common header

The guest and the hypervisor contain separate macros to get and set
the GHCB MSR protocol and NAE event fields. Consolidate the GHCB
protocol definitions and helper macros in one place.

Leave the supported protocol version define in separate files to keep
the guest and hypervisor flexibility to support different GHCB version
in the same release.

There is no functional change intended.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lkml.kernel.org/r/20210427111636.1207-3-brijesh.singh@amd.com
---
 arch/x86/include/asm/sev-common.h | 62 +++++++++++++++++++++++++++++++
 arch/x86/include/asm/sev.h        | 30 ++-------------
 arch/x86/kernel/sev-shared.c      | 20 +++++-----
 arch/x86/kvm/svm/svm.h            | 38 ++-----------------
 4 files changed, 80 insertions(+), 70 deletions(-)
 create mode 100644 arch/x86/include/asm/sev-common.h

diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
new file mode 100644
index 0000000000000..629c3df243f03
--- /dev/null
+++ b/arch/x86/include/asm/sev-common.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD SEV header common between the guest and the hypervisor.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#ifndef __ASM_X86_SEV_COMMON_H
+#define __ASM_X86_SEV_COMMON_H
+
+#define GHCB_MSR_INFO_POS		0
+#define GHCB_MSR_INFO_MASK		(BIT_ULL(12) - 1)
+
+#define GHCB_MSR_SEV_INFO_RESP		0x001
+#define GHCB_MSR_SEV_INFO_REQ		0x002
+#define GHCB_MSR_VER_MAX_POS		48
+#define GHCB_MSR_VER_MAX_MASK		0xffff
+#define GHCB_MSR_VER_MIN_POS		32
+#define GHCB_MSR_VER_MIN_MASK		0xffff
+#define GHCB_MSR_CBIT_POS		24
+#define GHCB_MSR_CBIT_MASK		0xff
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)				\
+	((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |	\
+	 (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |	\
+	 (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |	\
+	 GHCB_MSR_SEV_INFO_RESP)
+#define GHCB_MSR_INFO(v)		((v) & 0xfffUL)
+#define GHCB_MSR_PROTO_MAX(v)		(((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK)
+#define GHCB_MSR_PROTO_MIN(v)		(((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK)
+
+#define GHCB_MSR_CPUID_REQ		0x004
+#define GHCB_MSR_CPUID_RESP		0x005
+#define GHCB_MSR_CPUID_FUNC_POS		32
+#define GHCB_MSR_CPUID_FUNC_MASK	0xffffffff
+#define GHCB_MSR_CPUID_VALUE_POS	32
+#define GHCB_MSR_CPUID_VALUE_MASK	0xffffffff
+#define GHCB_MSR_CPUID_REG_POS		30
+#define GHCB_MSR_CPUID_REG_MASK		0x3
+#define GHCB_CPUID_REQ_EAX		0
+#define GHCB_CPUID_REQ_EBX		1
+#define GHCB_CPUID_REQ_ECX		2
+#define GHCB_CPUID_REQ_EDX		3
+#define GHCB_CPUID_REQ(fn, reg)		\
+		(GHCB_MSR_CPUID_REQ | \
+		(((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
+		(((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
+
+#define GHCB_MSR_TERM_REQ		0x100
+#define GHCB_MSR_TERM_REASON_SET_POS	12
+#define GHCB_MSR_TERM_REASON_SET_MASK	0xf
+#define GHCB_MSR_TERM_REASON_POS	16
+#define GHCB_MSR_TERM_REASON_MASK	0xff
+#define GHCB_SEV_TERM_REASON(reason_set, reason_val)						  \
+	(((((u64)reason_set) &  GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \
+	((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS))
+
+#define GHCB_SEV_ES_REASON_GENERAL_REQUEST	0
+#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED	1
+
+#define GHCB_RESP_CODE(v)		((v) & GHCB_MSR_INFO_MASK)
+
+#endif
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index cf1d957c70919..fa5cd05d3b5be 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -10,34 +10,12 @@
 
 #include <linux/types.h>
 #include <asm/insn.h>
+#include <asm/sev-common.h>
 
-#define GHCB_SEV_INFO		0x001UL
-#define GHCB_SEV_INFO_REQ	0x002UL
-#define		GHCB_INFO(v)		((v) & 0xfffUL)
-#define		GHCB_PROTO_MAX(v)	(((v) >> 48) & 0xffffUL)
-#define		GHCB_PROTO_MIN(v)	(((v) >> 32) & 0xffffUL)
-#define		GHCB_PROTO_OUR		0x0001UL
-#define GHCB_SEV_CPUID_REQ	0x004UL
-#define		GHCB_CPUID_REQ_EAX	0
-#define		GHCB_CPUID_REQ_EBX	1
-#define		GHCB_CPUID_REQ_ECX	2
-#define		GHCB_CPUID_REQ_EDX	3
-#define		GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \
-					(((unsigned long)reg & 3) << 30) | \
-					(((unsigned long)fn) << 32))
+#define GHCB_PROTO_OUR		0x0001UL
+#define GHCB_PROTOCOL_MAX	1ULL
+#define GHCB_DEFAULT_USAGE	0ULL
 
-#define	GHCB_PROTOCOL_MAX	0x0001UL
-#define GHCB_DEFAULT_USAGE	0x0000UL
-
-#define GHCB_SEV_CPUID_RESP	0x005UL
-#define GHCB_SEV_TERMINATE	0x100UL
-#define		GHCB_SEV_TERMINATE_REASON(reason_set, reason_val)	\
-			(((((u64)reason_set) &  0x7) << 12) |		\
-			 ((((u64)reason_val) & 0xff) << 16))
-#define		GHCB_SEV_ES_REASON_GENERAL_REQUEST	0
-#define		GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED	1
-
-#define	GHCB_SEV_GHCB_RESP_CODE(v)	((v) & 0xfff)
 #define	VMGEXIT()			{ asm volatile("rep; vmmcall\n\r"); }
 
 enum es_result {
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 0aa9f13efd572..6ec8b3bfd76eb 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -26,13 +26,13 @@ static bool __init sev_es_check_cpu_features(void)
 
 static void __noreturn sev_es_terminate(unsigned int reason)
 {
-	u64 val = GHCB_SEV_TERMINATE;
+	u64 val = GHCB_MSR_TERM_REQ;
 
 	/*
 	 * Tell the hypervisor what went wrong - only reason-set 0 is
 	 * currently supported.
 	 */
-	val |= GHCB_SEV_TERMINATE_REASON(0, reason);
+	val |= GHCB_SEV_TERM_REASON(0, reason);
 
 	/* Request Guest Termination from Hypvervisor */
 	sev_es_wr_ghcb_msr(val);
@@ -47,15 +47,15 @@ static bool sev_es_negotiate_protocol(void)
 	u64 val;
 
 	/* Do the GHCB protocol version negotiation */
-	sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ);
+	sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
 
-	if (GHCB_INFO(val) != GHCB_SEV_INFO)
+	if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
 		return false;
 
-	if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR ||
-	    GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR)
+	if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR ||
+	    GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR)
 		return false;
 
 	return true;
@@ -153,28 +153,28 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
 	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
 		goto fail;
 	regs->ax = val >> 32;
 
 	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
 		goto fail;
 	regs->bx = val >> 32;
 
 	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
 		goto fail;
 	regs->cx = val >> 32;
 
 	sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
 	VMGEXIT();
 	val = sev_es_rd_ghcb_msr();
-	if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
+	if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
 		goto fail;
 	regs->dx = val >> 32;
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 84b3133c2251d..42f8a7b9048fb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -20,6 +20,7 @@
 #include <linux/bits.h>
 
 #include <asm/svm.h>
+#include <asm/sev-common.h>
 
 #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
 
@@ -525,40 +526,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
 
 /* sev.c */
 
-#define GHCB_VERSION_MAX		1ULL
-#define GHCB_VERSION_MIN		1ULL
-
-#define GHCB_MSR_INFO_POS		0
-#define GHCB_MSR_INFO_MASK		(BIT_ULL(12) - 1)
-
-#define GHCB_MSR_SEV_INFO_RESP		0x001
-#define GHCB_MSR_SEV_INFO_REQ		0x002
-#define GHCB_MSR_VER_MAX_POS		48
-#define GHCB_MSR_VER_MAX_MASK		0xffff
-#define GHCB_MSR_VER_MIN_POS		32
-#define GHCB_MSR_VER_MIN_MASK		0xffff
-#define GHCB_MSR_CBIT_POS		24
-#define GHCB_MSR_CBIT_MASK		0xff
-#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)				\
-	((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |	\
-	 (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |	\
-	 (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |	\
-	 GHCB_MSR_SEV_INFO_RESP)
-
-#define GHCB_MSR_CPUID_REQ		0x004
-#define GHCB_MSR_CPUID_RESP		0x005
-#define GHCB_MSR_CPUID_FUNC_POS		32
-#define GHCB_MSR_CPUID_FUNC_MASK	0xffffffff
-#define GHCB_MSR_CPUID_VALUE_POS	32
-#define GHCB_MSR_CPUID_VALUE_MASK	0xffffffff
-#define GHCB_MSR_CPUID_REG_POS		30
-#define GHCB_MSR_CPUID_REG_MASK		0x3
-
-#define GHCB_MSR_TERM_REQ		0x100
-#define GHCB_MSR_TERM_REASON_SET_POS	12
-#define GHCB_MSR_TERM_REASON_SET_MASK	0xf
-#define GHCB_MSR_TERM_REASON_POS	16
-#define GHCB_MSR_TERM_REASON_MASK	0xff
+#define GHCB_VERSION_MAX	1ULL
+#define GHCB_VERSION_MIN	1ULL
+
 
 extern unsigned int max_sev_asid;
 
-- 
GitLab


From 059e5c321a65657877924256ea8ad9c0df257b45 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Tue, 27 Apr 2021 06:16:36 -0500
Subject: [PATCH 0134/3804] x86/msr: Rename MSR_K8_SYSCFG to MSR_AMD64_SYSCFG

The SYSCFG MSR continued being updated beyond the K8 family; drop the K8
name from it.

Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lkml.kernel.org/r/20210427111636.1207-4-brijesh.singh@amd.com
---
 Documentation/virt/kvm/amd-memory-encryption.rst | 2 +-
 Documentation/x86/amd-memory-encryption.rst      | 6 +++---
 arch/x86/include/asm/msr-index.h                 | 6 +++---
 arch/x86/kernel/cpu/amd.c                        | 4 ++--
 arch/x86/kernel/cpu/mtrr/cleanup.c               | 2 +-
 arch/x86/kernel/cpu/mtrr/generic.c               | 4 ++--
 arch/x86/kernel/mmconf-fam10h_64.c               | 2 +-
 arch/x86/kvm/svm/svm.c                           | 4 ++--
 arch/x86/kvm/x86.c                               | 2 +-
 arch/x86/mm/mem_encrypt_identity.c               | 6 +++---
 arch/x86/pci/amd_bus.c                           | 2 +-
 arch/x86/realmode/rm/trampoline_64.S             | 4 ++--
 drivers/edac/amd64_edac.c                        | 2 +-
 tools/arch/x86/include/asm/msr-index.h           | 6 +++---
 14 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst
index 5ec8a1902e15a..5c081c8c7164a 100644
--- a/Documentation/virt/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -22,7 +22,7 @@ to SEV::
 		  [ecx]:
 			Bits[31:0]  Number of encrypted guests supported simultaneously
 
-If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015
+If support for SEV is present, MSR 0xc001_0010 (MSR_AMD64_SYSCFG) and MSR 0xc001_0015
 (MSR_K7_HWCR) can be used to determine if it can be enabled::
 
 	0xc001_0010:
diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst
index c48d452d07189..a1940ebe7be50 100644
--- a/Documentation/x86/amd-memory-encryption.rst
+++ b/Documentation/x86/amd-memory-encryption.rst
@@ -53,7 +53,7 @@ CPUID function 0x8000001f reports information related to SME::
 			   system physical addresses, not guest physical
 			   addresses)
 
-If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to
+If support for SME is present, MSR 0xc00100010 (MSR_AMD64_SYSCFG) can be used to
 determine if SME is enabled and/or to enable memory encryption::
 
 	0xc0010010:
@@ -79,7 +79,7 @@ The state of SME in the Linux kernel can be documented as follows:
 	  The CPU supports SME (determined through CPUID instruction).
 
 	- Enabled:
-	  Supported and bit 23 of MSR_K8_SYSCFG is set.
+	  Supported and bit 23 of MSR_AMD64_SYSCFG is set.
 
 	- Active:
 	  Supported, Enabled and the Linux kernel is actively applying
@@ -89,7 +89,7 @@ The state of SME in the Linux kernel can be documented as follows:
 SME can also be enabled and activated in the BIOS. If SME is enabled and
 activated in the BIOS, then all memory accesses will be encrypted and it will
 not be necessary to activate the Linux memory encryption support.  If the BIOS
-merely enables SME (sets bit 23 of the MSR_K8_SYSCFG), then Linux can activate
+merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate
 memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or
 by supplying mem_encrypt=on on the kernel command line.  However, if BIOS does
 not enable SME, then Linux will not be able to activate memory encryption, even
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 742d89a00721d..211ba3375ee96 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -537,9 +537,9 @@
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
-#define MSR_K8_SYSCFG			0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT	23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG		0xc0010010
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT	23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2d11384dc9ab4..0adb0341cd7c5 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -593,8 +593,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
 	 */
 	if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
 		/* Check if memory encryption is enabled */
-		rdmsrl(MSR_K8_SYSCFG, msr);
-		if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+		rdmsrl(MSR_AMD64_SYSCFG, msr);
+		if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
 			goto clear_all;
 
 		/*
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 0c3b372318b70..b5f43049fa5f7 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -836,7 +836,7 @@ int __init amd_special_default_mtrr(void)
 	if (boot_cpu_data.x86 < 0xf)
 		return 0;
 	/* In case some hypervisor doesn't pass SYSCFG through: */
-	if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
+	if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0)
 		return 0;
 	/*
 	 * Memory between 4GB and top of mem is forced WB by this magic bit.
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index b90f3f437765c..558108296f3cf 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -53,13 +53,13 @@ static inline void k8_check_syscfg_dram_mod_en(void)
 	      (boot_cpu_data.x86 >= 0x0f)))
 		return;
 
-	rdmsr(MSR_K8_SYSCFG, lo, hi);
+	rdmsr(MSR_AMD64_SYSCFG, lo, hi);
 	if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
 		pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
 		       " not cleared by BIOS, clearing this bit\n",
 		       smp_processor_id());
 		lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
-		mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi);
+		mtrr_wrmsr(MSR_AMD64_SYSCFG, lo, hi);
 	}
 }
 
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index b5cb49e57df85..c94dec6a18345 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -95,7 +95,7 @@ static void get_fam10h_pci_mmconf_base(void)
 		return;
 
 	/* SYS_CFG */
-	address = MSR_K8_SYSCFG;
+	address = MSR_AMD64_SYSCFG;
 	rdmsrl(address, val);
 
 	/* TOP_MEM2 is not enabled? */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b649f92287a2e..433e8e4fb3a65 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -858,8 +858,8 @@ static __init void svm_adjust_mmio_mask(void)
 		return;
 
 	/* If memory encryption is not enabled, use existing mask */
-	rdmsrl(MSR_K8_SYSCFG, msr);
-	if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+	rdmsrl(MSR_AMD64_SYSCFG, msr);
+	if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
 		return;
 
 	enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6eda2834fc05e..853c40e893352 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3402,7 +3402,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_LASTBRANCHTOIP:
 	case MSR_IA32_LASTINTFROMIP:
 	case MSR_IA32_LASTINTTOIP:
-	case MSR_K8_SYSCFG:
+	case MSR_AMD64_SYSCFG:
 	case MSR_K8_TSEG_ADDR:
 	case MSR_K8_TSEG_MASK:
 	case MSR_VM_HSAVE_PA:
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 04aba7e80a362..a9639f663d25f 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -529,7 +529,7 @@ void __init sme_enable(struct boot_params *bp)
 		/*
 		 * No SME if Hypervisor bit is set. This check is here to
 		 * prevent a guest from trying to enable SME. For running as a
-		 * KVM guest the MSR_K8_SYSCFG will be sufficient, but there
+		 * KVM guest the MSR_AMD64_SYSCFG will be sufficient, but there
 		 * might be other hypervisors which emulate that MSR as non-zero
 		 * or even pass it through to the guest.
 		 * A malicious hypervisor can still trick a guest into this
@@ -542,8 +542,8 @@ void __init sme_enable(struct boot_params *bp)
 			return;
 
 		/* For SME, check the SYSCFG MSR */
-		msr = __rdmsr(MSR_K8_SYSCFG);
-		if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+		msr = __rdmsr(MSR_AMD64_SYSCFG);
+		if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
 			return;
 	} else {
 		/* SEV state cannot be controlled by a command line option */
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index ae744b6a07856..dd40d3fea74e4 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -284,7 +284,7 @@ static int __init early_root_info_init(void)
 
 	/* need to take out [4G, TOM2) for RAM*/
 	/* SYS_CFG */
-	address = MSR_K8_SYSCFG;
+	address = MSR_AMD64_SYSCFG;
 	rdmsrl(address, val);
 	/* TOP_MEM2 is enabled? */
 	if (val & (1<<21)) {
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index 84c5d1b33d100..cc8391f86cdb6 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -123,9 +123,9 @@ SYM_CODE_START(startup_32)
 	 */
 	btl	$TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
 	jnc	.Ldone
-	movl	$MSR_K8_SYSCFG, %ecx
+	movl	$MSR_AMD64_SYSCFG, %ecx
 	rdmsr
-	bts	$MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
+	bts	$MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT, %eax
 	jc	.Ldone
 
 	/*
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9fa4dfc6ebee6..f0d8f60acee10 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3083,7 +3083,7 @@ static void read_mc_regs(struct amd64_pvt *pvt)
 	edac_dbg(0, "  TOP_MEM:  0x%016llx\n", pvt->top_mem);
 
 	/* Check first whether TOP_MEM2 is enabled: */
-	rdmsrl(MSR_K8_SYSCFG, msr_val);
+	rdmsrl(MSR_AMD64_SYSCFG, msr_val);
 	if (msr_val & BIT(21)) {
 		rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
 		edac_dbg(0, "  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 45029354e0a8b..c60b09e7602f4 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -533,9 +533,9 @@
 /* K8 MSRs */
 #define MSR_K8_TOP_MEM1			0xc001001a
 #define MSR_K8_TOP_MEM2			0xc001001d
-#define MSR_K8_SYSCFG			0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT	23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG		0xc0010010
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT	23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
 #define MSR_K8_INT_PENDING_MSG		0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK		0x18000000
-- 
GitLab


From 970655aa9b42461f8394e4457307005bdeee14d9 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 23 Apr 2021 07:40:38 +0200
Subject: [PATCH 0135/3804] xen/gntdev: fix gntdev_mmap() error exit path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit d3eeb1d77c5d0af ("xen/gntdev: use mmu_interval_notifier_insert")
introduced an error in gntdev_mmap(): in case the call of
mmu_interval_notifier_insert_locked() fails the exit path should not
call mmu_interval_notifier_remove(), as this might result in NULL
dereferences.

One reason for failure is e.g. a signal pending for the running
process.

Fixes: d3eeb1d77c5d0af ("xen/gntdev: use mmu_interval_notifier_insert")
Cc: stable@vger.kernel.org
Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Luca Fancellu <luca.fancellu@arm.com>
Link: https://lore.kernel.org/r/20210423054038.26696-1-jgross@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/gntdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index f01d58c7a042e..a3e7be96527d7 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -1017,8 +1017,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 		err = mmu_interval_notifier_insert_locked(
 			&map->notifier, vma->vm_mm, vma->vm_start,
 			vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
-		if (err)
+		if (err) {
+			map->vma = NULL;
 			goto out_unlock_put;
+		}
 	}
 	mutex_unlock(&priv->lock);
 
-- 
GitLab


From dbc03e81586fc33e4945263fd6e09e22eb4b980f Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 10:19:13 +0800
Subject: [PATCH 0136/3804] xen/unpopulated-alloc: fix error return code in
 fill_list()

Fix to return a negative error code from the error handling case instead
of 0, as done elsewhere in this function.

Fixes: a4574f63edc6 ("mm/memremap_pages: convert to 'struct range'")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20210508021913.1727-1-thunder.leizhen@huawei.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/unpopulated-alloc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c
index e64e6befc63b7..87e6b7db892f5 100644
--- a/drivers/xen/unpopulated-alloc.c
+++ b/drivers/xen/unpopulated-alloc.c
@@ -39,8 +39,10 @@ static int fill_list(unsigned int nr_pages)
 	}
 
 	pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
-	if (!pgmap)
+	if (!pgmap) {
+		ret = -ENOMEM;
 		goto err_pgmap;
+	}
 
 	pgmap->type = MEMORY_DEVICE_GENERIC;
 	pgmap->range = (struct range) {
-- 
GitLab


From c5a80540e425a5f9a82b0f3163e3b6a4331f33bc Mon Sep 17 00:00:00 2001
From: Dominik Andreas Schorpp <dominik.a.schorpp@ids.de>
Date: Thu, 22 Apr 2021 09:58:52 +0200
Subject: [PATCH 0137/3804] USB: serial: ftdi_sio: add IDs for IDS GmbH
 Products

Add the IDS GmbH Vendor ID and the Product IDs for SI31A (2xRS232)
and CM31A (LoRaWAN Modem).

Signed-off-by: Dominik Andreas Schorpp <dominik.a.schorpp@ids.de>
Signed-off-by: Juergen Borleis <jbe@pengutronix.de>
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ftdi_sio.c     | 3 +++
 drivers/usb/serial/ftdi_sio_ids.h | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 6f2659e59b2ee..369ef140df78a 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1034,6 +1034,9 @@ static const struct usb_device_id id_table_combined[] = {
 	/* Sienna devices */
 	{ USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) },
 	{ USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) },
+	/* IDS GmbH devices */
+	{ USB_DEVICE(IDS_VID, IDS_SI31A_PID) },
+	{ USB_DEVICE(IDS_VID, IDS_CM31A_PID) },
 	/* U-Blox devices */
 	{ USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) },
 	{ USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 3d47c6d72256e..d854e04a4286e 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -1567,6 +1567,13 @@
 #define UNJO_VID			0x22B7
 #define UNJO_ISODEBUG_V1_PID		0x150D
 
+/*
+ * IDS GmbH
+ */
+#define IDS_VID				0x2CAF
+#define IDS_SI31A_PID			0x13A2
+#define IDS_CM31A_PID			0x13A3
+
 /*
  * U-Blox products (http://www.u-blox.com).
  */
-- 
GitLab


From e467714f822b5d167a7fb03d34af91b5b6af1827 Mon Sep 17 00:00:00 2001
From: Daniele Palmas <dnlplm@gmail.com>
Date: Wed, 28 Apr 2021 09:26:34 +0200
Subject: [PATCH 0138/3804] USB: serial: option: add Telit LE910-S1
 compositions 0x7010, 0x7011

Add support for the following Telit LE910-S1 compositions:

0x7010: rndis, tty, tty, tty
0x7011: ecm, tty, tty, tty

Signed-off-by: Daniele Palmas <dnlplm@gmail.com>
Link: https://lore.kernel.org/r/20210428072634.5091-1-dnlplm@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 3e79a543d3e77..7608584ef4fe7 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1240,6 +1240,10 @@ static const struct usb_device_id option_ids[] = {
 	  .driver_info = NCTRL(0) | RSVD(1) },
 	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff),	/* Telit LN940 (MBIM) */
 	  .driver_info = NCTRL(0) },
+	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7010, 0xff),	/* Telit LE910-S1 (RNDIS) */
+	  .driver_info = NCTRL(2) },
+	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff),	/* Telit LE910-S1 (ECM) */
+	  .driver_info = NCTRL(2) },
 	{ USB_DEVICE(TELIT_VENDOR_ID, 0x9010),				/* Telit SBL FN980 flashing device */
 	  .driver_info = NCTRL(0) | ZLP },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
-- 
GitLab


From 89b1a3d811e6f8065d6ae8a25e7682329b4a31e2 Mon Sep 17 00:00:00 2001
From: Sean MacLennan <seanm@seanm.ca>
Date: Sat, 1 May 2021 20:40:45 -0400
Subject: [PATCH 0139/3804] USB: serial: ti_usb_3410_5052: add startech.com
 device id

This adds support for the Startech.com generic serial to USB converter.
It seems to be a bone stock TI_3410. I have been using this patch for
years.

Signed-off-by: Sean MacLennan <seanm@seanm.ca>
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ti_usb_3410_5052.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index caa46ac23db90..310db5abea9d8 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -37,6 +37,7 @@
 /* Vendor and product ids */
 #define TI_VENDOR_ID			0x0451
 #define IBM_VENDOR_ID			0x04b3
+#define STARTECH_VENDOR_ID		0x14b0
 #define TI_3410_PRODUCT_ID		0x3410
 #define IBM_4543_PRODUCT_ID		0x4543
 #define IBM_454B_PRODUCT_ID		0x454b
@@ -370,6 +371,7 @@ static const struct usb_device_id ti_id_table_3410[] = {
 	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) },
 	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) },
 	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) },
+	{ USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) },
 	{ }	/* terminator */
 };
 
@@ -408,6 +410,7 @@ static const struct usb_device_id ti_id_table_combined[] = {
 	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) },
 	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) },
 	{ USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) },
+	{ USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) },
 	{ }	/* terminator */
 };
 
-- 
GitLab


From f8e8c1b2f782e7391e8a1c25648ce756e2a7d481 Mon Sep 17 00:00:00 2001
From: Zolton Jheng <s6668c2t@gmail.com>
Date: Mon, 10 May 2021 10:32:00 +0800
Subject: [PATCH 0140/3804] USB: serial: pl2303: add device id for ADLINK
 ND-6530 GC

This adds the device id for the ADLINK ND-6530 which is a PL2303GC based
device.

Signed-off-by: Zolton Jheng <s6668c2t@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/pl2303.c | 1 +
 drivers/usb/serial/pl2303.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index fd773d252691b..940050c314822 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -113,6 +113,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) },
 	{ USB_DEVICE(SANWA_VENDOR_ID, SANWA_PRODUCT_ID) },
 	{ USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530_PRODUCT_ID) },
+	{ USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530GC_PRODUCT_ID) },
 	{ USB_DEVICE(SMART_VENDOR_ID, SMART_PRODUCT_ID) },
 	{ USB_DEVICE(AT_VENDOR_ID, AT_VTKIT3_PRODUCT_ID) },
 	{ }					/* Terminating entry */
diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h
index 0f681ddbfd288..6097ee8fccb25 100644
--- a/drivers/usb/serial/pl2303.h
+++ b/drivers/usb/serial/pl2303.h
@@ -158,6 +158,7 @@
 /* ADLINK ND-6530 RS232,RS485 and RS422 adapter */
 #define ADLINK_VENDOR_ID		0x0b63
 #define ADLINK_ND6530_PRODUCT_ID	0x6530
+#define ADLINK_ND6530GC_PRODUCT_ID	0x653a
 
 /* SMART USB Serial Adapter */
 #define SMART_VENDOR_ID	0x0b8c
-- 
GitLab


From 0c6c2d3615efb7c292573f2e6c886929a2b2da6c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 28 Apr 2021 13:12:31 +0100
Subject: [PATCH 0141/3804] arm64: Generate cpucaps.h

The arm64 code allocates an internal constant to every CPU feature it can
detect, distinct from the public hwcap numbers we use to expose some
features to userspace. Currently this is maintained manually which is an
irritating source of conflicts when working on new features, to avoid this
replace the header with a simple text file listing the names we've assigned
and sort it to minimise conflicts.

As part of doing this we also do the Kbuild hookup required to hook up
an arch tools directory and to generate header files in there.

This will result in a renumbering and reordering of the existing constants,
since they are all internal only the values should not be important. The
reordering will impact the order in which some steps in enumeration handle
features but the algorithm is not intended to depend on this and I haven't
seen any issues when testing. Due to the UAO cpucap having been removed in
the past we end up with ARM64_NCAPS being 1 smaller than it was before.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210428121231.11219-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile              |  3 ++
 arch/arm64/include/asm/Kbuild    |  2 +
 arch/arm64/include/asm/cpucaps.h | 74 --------------------------------
 arch/arm64/tools/Makefile        | 22 ++++++++++
 arch/arm64/tools/cpucaps         | 65 ++++++++++++++++++++++++++++
 arch/arm64/tools/gen-cpucaps.awk | 40 +++++++++++++++++
 6 files changed, 132 insertions(+), 74 deletions(-)
 delete mode 100644 arch/arm64/include/asm/cpucaps.h
 create mode 100644 arch/arm64/tools/Makefile
 create mode 100644 arch/arm64/tools/cpucaps
 create mode 100755 arch/arm64/tools/gen-cpucaps.awk

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 7ef44478560df..b52481f0605d8 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -175,6 +175,9 @@ vdso_install:
 	$(if $(CONFIG_COMPAT_VDSO), \
 		$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@)
 
+archprepare:
+	$(Q)$(MAKE) $(build)=arch/arm64/tools kapi
+
 # We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 07ac208edc894..26889dbfe904d 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -5,3 +5,5 @@ generic-y += qrwlock.h
 generic-y += qspinlock.h
 generic-y += set_memory.h
 generic-y += user.h
+
+generated-y += cpucaps.h
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
deleted file mode 100644
index b0c5eda0498f2..0000000000000
--- a/arch/arm64/include/asm/cpucaps.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm64/include/asm/cpucaps.h
- *
- * Copyright (C) 2016 ARM Ltd.
- */
-#ifndef __ASM_CPUCAPS_H
-#define __ASM_CPUCAPS_H
-
-#define ARM64_WORKAROUND_CLEAN_CACHE		0
-#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE	1
-#define ARM64_WORKAROUND_845719			2
-#define ARM64_HAS_SYSREG_GIC_CPUIF		3
-#define ARM64_HAS_PAN				4
-#define ARM64_HAS_LSE_ATOMICS			5
-#define ARM64_WORKAROUND_CAVIUM_23154		6
-#define ARM64_WORKAROUND_834220			7
-#define ARM64_HAS_NO_HW_PREFETCH		8
-#define ARM64_HAS_VIRT_HOST_EXTN		11
-#define ARM64_WORKAROUND_CAVIUM_27456		12
-#define ARM64_HAS_32BIT_EL0			13
-#define ARM64_SPECTRE_V3A			14
-#define ARM64_HAS_CNP				15
-#define ARM64_HAS_NO_FPSIMD			16
-#define ARM64_WORKAROUND_REPEAT_TLBI		17
-#define ARM64_WORKAROUND_QCOM_FALKOR_E1003	18
-#define ARM64_WORKAROUND_858921			19
-#define ARM64_WORKAROUND_CAVIUM_30115		20
-#define ARM64_HAS_DCPOP				21
-#define ARM64_SVE				22
-#define ARM64_UNMAP_KERNEL_AT_EL0		23
-#define ARM64_SPECTRE_V2			24
-#define ARM64_HAS_RAS_EXTN			25
-#define ARM64_WORKAROUND_843419			26
-#define ARM64_HAS_CACHE_IDC			27
-#define ARM64_HAS_CACHE_DIC			28
-#define ARM64_HW_DBM				29
-#define ARM64_SPECTRE_V4			30
-#define ARM64_MISMATCHED_CACHE_TYPE		31
-#define ARM64_HAS_STAGE2_FWB			32
-#define ARM64_HAS_CRC32				33
-#define ARM64_SSBS				34
-#define ARM64_WORKAROUND_1418040		35
-#define ARM64_HAS_SB				36
-#define ARM64_WORKAROUND_SPECULATIVE_AT		37
-#define ARM64_HAS_ADDRESS_AUTH_ARCH		38
-#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF		39
-#define ARM64_HAS_GENERIC_AUTH_ARCH		40
-#define ARM64_HAS_GENERIC_AUTH_IMP_DEF		41
-#define ARM64_HAS_IRQ_PRIO_MASKING		42
-#define ARM64_HAS_DCPODP			43
-#define ARM64_WORKAROUND_1463225		44
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM	45
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM	46
-#define ARM64_WORKAROUND_1542419		47
-#define ARM64_HAS_E0PD				48
-#define ARM64_HAS_RNG				49
-#define ARM64_HAS_AMU_EXTN			50
-#define ARM64_HAS_ADDRESS_AUTH			51
-#define ARM64_HAS_GENERIC_AUTH			52
-#define ARM64_HAS_32BIT_EL1			53
-#define ARM64_BTI				54
-#define ARM64_HAS_ARMv8_4_TTL			55
-#define ARM64_HAS_TLB_RANGE			56
-#define ARM64_MTE				57
-#define ARM64_WORKAROUND_1508412		58
-#define ARM64_HAS_LDAPR				59
-#define ARM64_KVM_PROTECTED_MODE		60
-#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP	61
-#define ARM64_HAS_EPAN				62
-
-#define ARM64_NCAPS				63
-
-#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile
new file mode 100644
index 0000000000000..932b4fe5c7684
--- /dev/null
+++ b/arch/arm64/tools/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+
+gen := arch/$(ARCH)/include/generated
+kapi := $(gen)/asm
+
+kapi-hdrs-y := $(kapi)/cpucaps.h
+
+targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y))
+
+PHONY += kapi
+
+kapi:   $(kapi-hdrs-y) $(gen-y)
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+quiet_cmd_gen_cpucaps = GEN     $@
+      cmd_gen_cpucaps = mkdir -p $(dir $@) && \
+                     $(AWK) -f $(filter-out $(PHONY),$^) > $@
+
+$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
+	$(call if_changed,gen_cpucaps)
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
new file mode 100644
index 0000000000000..21fbdda7086e2
--- /dev/null
+++ b/arch/arm64/tools/cpucaps
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Internal CPU capabilities constants, keep this list sorted
+
+BTI
+HAS_32BIT_EL0
+HAS_32BIT_EL1
+HAS_ADDRESS_AUTH
+HAS_ADDRESS_AUTH_ARCH
+HAS_ADDRESS_AUTH_IMP_DEF
+HAS_AMU_EXTN
+HAS_ARMv8_4_TTL
+HAS_CACHE_DIC
+HAS_CACHE_IDC
+HAS_CNP
+HAS_CRC32
+HAS_DCPODP
+HAS_DCPOP
+HAS_E0PD
+HAS_EPAN
+HAS_GENERIC_AUTH
+HAS_GENERIC_AUTH_ARCH
+HAS_GENERIC_AUTH_IMP_DEF
+HAS_IRQ_PRIO_MASKING
+HAS_LDAPR
+HAS_LSE_ATOMICS
+HAS_NO_FPSIMD
+HAS_NO_HW_PREFETCH
+HAS_PAN
+HAS_RAS_EXTN
+HAS_RNG
+HAS_SB
+HAS_STAGE2_FWB
+HAS_SYSREG_GIC_CPUIF
+HAS_TLB_RANGE
+HAS_VIRT_HOST_EXTN
+HW_DBM
+KVM_PROTECTED_MODE
+MISMATCHED_CACHE_TYPE
+MTE
+SPECTRE_V2
+SPECTRE_V3A
+SPECTRE_V4
+SSBS
+SVE
+UNMAP_KERNEL_AT_EL0
+WORKAROUND_834220
+WORKAROUND_843419
+WORKAROUND_845719
+WORKAROUND_858921
+WORKAROUND_1418040
+WORKAROUND_1463225
+WORKAROUND_1508412
+WORKAROUND_1542419
+WORKAROUND_CAVIUM_23154
+WORKAROUND_CAVIUM_27456
+WORKAROUND_CAVIUM_30115
+WORKAROUND_CAVIUM_TX2_219_PRFM
+WORKAROUND_CAVIUM_TX2_219_TVM
+WORKAROUND_CLEAN_CACHE
+WORKAROUND_DEVICE_LOAD_ACQUIRE
+WORKAROUND_NVIDIA_CARMEL_CNP
+WORKAROUND_QCOM_FALKOR_E1003
+WORKAROUND_REPEAT_TLBI
+WORKAROUND_SPECULATIVE_AT
diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk
new file mode 100755
index 0000000000000..18737a1ce0448
--- /dev/null
+++ b/arch/arm64/tools/gen-cpucaps.awk
@@ -0,0 +1,40 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-cpucaps.awk: arm64 cpucaps header generator
+#
+# Usage: awk -f gen-cpucaps.awk cpucaps.txt
+
+# Log an error and terminate
+function fatal(msg) {
+	print "Error at line " NR ": " msg > "/dev/stderr"
+	exit 1
+}
+
+# skip blank lines and comment lines
+/^$/ { next }
+/^#/ { next }
+
+BEGIN {
+	print "#ifndef __ASM_CPUCAPS_H"
+	print "#define __ASM_CPUCAPS_H"
+	print ""
+	print "/* Generated file - do not edit */"
+	cap_num = 0
+	print ""
+}
+
+/^[vA-Z0-9_]+$/ {
+	printf("#define ARM64_%-30s\t%d\n", $0, cap_num++)
+	next
+}
+
+END {
+	printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num)
+	print ""
+	print "#endif"
+}
+
+# Any lines not handled by previous rules are unexpected
+{
+	fatal("unhandled statement")
+}
-- 
GitLab


From a1bed090fc56e6e24517d96bc076595544fb5317 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 7 May 2021 17:25:42 +0100
Subject: [PATCH 0142/3804] kselftest/arm64: Add missing stddef.h include to
 BTI tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Explicitly include stddef.h when building the BTI tests so that we have
a definition of NULL, with at least some toolchains this is not done
implicitly by anything else:

test.c: In function ‘start’:
test.c:214:25: error: ‘NULL’ undeclared (first use in this function)
  214 |  sigaction(SIGILL, &sa, NULL);
      |                         ^~~~
test.c:20:1: note: ‘NULL’ is defined in header ‘<stddef.h>’; did you forget to ‘#include <stddef.h>’?

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210507162542.23149-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 tools/testing/selftests/arm64/bti/test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c
index 656b04976ccc6..67b77ab83c20e 100644
--- a/tools/testing/selftests/arm64/bti/test.c
+++ b/tools/testing/selftests/arm64/bti/test.c
@@ -6,6 +6,7 @@
 
 #include "system.h"
 
+#include <stddef.h>
 #include <linux/errno.h>
 #include <linux/auxvec.h>
 #include <linux/signal.h>
-- 
GitLab


From e90812c47b958407b54d05780dc483fdc1b57a93 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:11 +0200
Subject: [PATCH 0143/3804] staging: media: rkvdec: fix pm_runtime_get_sync()
 usage count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.
Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/rkvdec/rkvdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c
index d821661d30f38..8c17615f3a7ab 100644
--- a/drivers/staging/media/rkvdec/rkvdec.c
+++ b/drivers/staging/media/rkvdec/rkvdec.c
@@ -658,7 +658,7 @@ static void rkvdec_device_run(void *priv)
 	if (WARN_ON(!desc))
 		return;
 
-	ret = pm_runtime_get_sync(rkvdec->dev);
+	ret = pm_runtime_resume_and_get(rkvdec->dev);
 	if (ret < 0) {
 		rkvdec_job_finish_no_pm(ctx, VB2_BUF_STATE_ERROR);
 		return;
-- 
GitLab


From 4cba5473c5ce0f1389d316c5dc6f83a0259df5eb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 27 Apr 2021 10:39:47 +0200
Subject: [PATCH 0144/3804] media: venus: Rework error fail recover logic

The Venus code has a sort of watchdog that attempts to recover
from IP errors, implemented as a delayed work job, which
calls venus_sys_error_handler().

Right now, it has several issues:

1. It assumes that PM runtime resume never fails

2. It internally runs two while() loops that also assume that
   PM runtime will never fail to go idle:

	while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc))
		msleep(10);

...

	while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0]))
		usleep_range(1000, 1500);

3. It uses an OR to merge all return codes and then report to the user

4. If the hardware never recovers, it keeps running on every 10ms,
   flooding the syslog with 2 messages (so, up to 200 messages
   per second).

Rework the code, in order to prevent that, by:

1. check the return code from PM runtime resume;
2. don't let the while() loops run forever;
3. store the failed event;
4. use warn ratelimited when it fails to recover.

Fixes: af2c3834c8ca ("[media] media: venus: adding core part and helper functions")
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/core.c | 60 +++++++++++++++++++-----
 1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
index 54bac7ec14c50..91b15842c5558 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -78,22 +78,32 @@ static const struct hfi_core_ops venus_core_ops = {
 	.event_notify = venus_event_notify,
 };
 
+#define RPM_WAIT_FOR_IDLE_MAX_ATTEMPTS 10
+
 static void venus_sys_error_handler(struct work_struct *work)
 {
 	struct venus_core *core =
 			container_of(work, struct venus_core, work.work);
-	int ret = 0;
-
-	pm_runtime_get_sync(core->dev);
+	int ret, i, max_attempts = RPM_WAIT_FOR_IDLE_MAX_ATTEMPTS;
+	const char *err_msg = "";
+	bool failed = false;
+
+	ret = pm_runtime_get_sync(core->dev);
+	if (ret < 0) {
+		err_msg = "resume runtime PM";
+		max_attempts = 0;
+		failed = true;
+	}
 
 	hfi_core_deinit(core, true);
 
-	dev_warn(core->dev, "system error has occurred, starting recovery!\n");
-
 	mutex_lock(&core->lock);
 
-	while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc))
+	for (i = 0; i < max_attempts; i++) {
+		if (!pm_runtime_active(core->dev_dec) && !pm_runtime_active(core->dev_enc))
+			break;
 		msleep(10);
+	}
 
 	venus_shutdown(core);
 
@@ -101,31 +111,55 @@ static void venus_sys_error_handler(struct work_struct *work)
 
 	pm_runtime_put_sync(core->dev);
 
-	while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0]))
+	for (i = 0; i < max_attempts; i++) {
+		if (!core->pmdomains[0] || !pm_runtime_active(core->pmdomains[0]))
+			break;
 		usleep_range(1000, 1500);
+	}
 
 	hfi_reinit(core);
 
-	pm_runtime_get_sync(core->dev);
+	ret = pm_runtime_get_sync(core->dev);
+	if (ret < 0) {
+		err_msg = "resume runtime PM";
+		failed = true;
+	}
 
-	ret |= venus_boot(core);
-	ret |= hfi_core_resume(core, true);
+	ret = venus_boot(core);
+	if (ret && !failed) {
+		err_msg = "boot Venus";
+		failed = true;
+	}
+
+	ret = hfi_core_resume(core, true);
+	if (ret && !failed) {
+		err_msg = "resume HFI";
+		failed = true;
+	}
 
 	enable_irq(core->irq);
 
 	mutex_unlock(&core->lock);
 
-	ret |= hfi_core_init(core);
+	ret = hfi_core_init(core);
+	if (ret && !failed) {
+		err_msg = "init HFI";
+		failed = true;
+	}
 
 	pm_runtime_put_sync(core->dev);
 
-	if (ret) {
+	if (failed) {
 		disable_irq_nosync(core->irq);
-		dev_warn(core->dev, "recovery failed (%d)\n", ret);
+		dev_warn_ratelimited(core->dev,
+				     "System error has occurred, recovery failed to %s\n",
+				     err_msg);
 		schedule_delayed_work(&core->work, msecs_to_jiffies(10));
 		return;
 	}
 
+	dev_warn(core->dev, "system error has occurred (recovered)\n");
+
 	mutex_lock(&core->lock);
 	core->sys_error = false;
 	mutex_unlock(&core->lock);
-- 
GitLab


From 747bad54a677d8633ec14b39dfbeb859c821d7f2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 28 Apr 2021 09:38:56 +0200
Subject: [PATCH 0145/3804] media: s5p_cec: decrement usage count if disabled

There's a bug at s5p_cec_adap_enable(): if called to
disable the device, it should call pm_runtime_put()
instead of pm_runtime_disable(), as the goal here is to
decrement the usage_count and not to disable PM runtime.

Reported-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Fixes: 1bcbf6f4b6b0 ("[media] cec: s5p-cec: Add s5p-cec driver")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/cec/platform/s5p/s5p_cec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/cec/platform/s5p/s5p_cec.c b/drivers/media/cec/platform/s5p/s5p_cec.c
index 2a3e7ffefe0a2..3c7c4c3c798c1 100644
--- a/drivers/media/cec/platform/s5p/s5p_cec.c
+++ b/drivers/media/cec/platform/s5p/s5p_cec.c
@@ -51,7 +51,7 @@ static int s5p_cec_adap_enable(struct cec_adapter *adap, bool enable)
 	} else {
 		s5p_cec_mask_tx_interrupts(cec);
 		s5p_cec_mask_rx_interrupts(cec);
-		pm_runtime_disable(cec->dev);
+		pm_runtime_put(cec->dev);
 	}
 
 	return 0;
-- 
GitLab


From 6005a8e955e4e451e4bf6000affaab566d4cab5e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 27 Apr 2021 08:36:00 +0200
Subject: [PATCH 0146/3804] media: i2c: ccs-core: return the right error code
 at suspend

If pm_runtime resume logic fails, return the error code
provided by it, instead of -EAGAIN, as, depending on what
caused it to fail, it may not be something that would be
recovered.

Fixes: cbba45d43631 ("[media] smiapp: Use runtime PM")
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ccs/ccs-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c
index 9dc3f45da3dcd..b05f409014b2f 100644
--- a/drivers/media/i2c/ccs/ccs-core.c
+++ b/drivers/media/i2c/ccs/ccs-core.c
@@ -3093,7 +3093,7 @@ static int __maybe_unused ccs_suspend(struct device *dev)
 	if (rval < 0) {
 		pm_runtime_put_noidle(dev);
 
-		return -EAGAIN;
+		return rval;
 	}
 
 	if (sensor->streaming)
-- 
GitLab


From da3a1858c3a37c09446e1470c48352897d59d11b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:11 +0200
Subject: [PATCH 0147/3804] media: i2c: ccs-core: fix pm_runtime_get_sync()
 usage count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.

There is a bug at ccs_pm_get_init(): when this function returns
an error, the stream is not started, and RPM usage_count
should not be incremented. However, if the calls to
v4l2_ctrl_handler_setup() return errors, it will be kept
incremented.

At ccs_suspend() the best is to replace it by the new
pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter automatically,
in the case of errors.

Fixes: 96e3a6b92f23 ("media: smiapp: Avoid maintaining power state information")
Cc: stable@vger.kernel.org
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ccs/ccs-core.c | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c
index b05f409014b2f..4a848ac2d2cd2 100644
--- a/drivers/media/i2c/ccs/ccs-core.c
+++ b/drivers/media/i2c/ccs/ccs-core.c
@@ -1880,21 +1880,33 @@ static int ccs_pm_get_init(struct ccs_sensor *sensor)
 	struct i2c_client *client = v4l2_get_subdevdata(&sensor->src->sd);
 	int rval;
 
+	/*
+	 * It can't use pm_runtime_resume_and_get() here, as the driver
+	 * relies at the returned value to detect if the device was already
+	 * active or not.
+	 */
 	rval = pm_runtime_get_sync(&client->dev);
-	if (rval < 0) {
-		pm_runtime_put_noidle(&client->dev);
+	if (rval < 0)
+		goto error;
 
-		return rval;
-	} else if (!rval) {
-		rval = v4l2_ctrl_handler_setup(&sensor->pixel_array->
-					       ctrl_handler);
-		if (rval)
-			return rval;
+	/* Device was already active, so don't set controls */
+	if (rval == 1)
+		return 0;
 
-		return v4l2_ctrl_handler_setup(&sensor->src->ctrl_handler);
-	}
+	/* Restore V4L2 controls to the previously suspended device */
+	rval = v4l2_ctrl_handler_setup(&sensor->pixel_array->ctrl_handler);
+	if (rval)
+		goto error;
 
+	rval = v4l2_ctrl_handler_setup(&sensor->src->ctrl_handler);
+	if (rval)
+		goto error;
+
+	/* Keep PM runtime usage_count incremented on success */
 	return 0;
+error:
+	pm_runtime_put(&client->dev);
+	return rval;
 }
 
 static int ccs_set_stream(struct v4l2_subdev *subdev, int enable)
-- 
GitLab


From 62c90446868b439929cb04395f04a709a64ae04b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:13 +0200
Subject: [PATCH 0148/3804] media: i2c: imx334: fix the pm runtime get logic

The PM runtime get logic is currently broken, as it checks if
ret is zero instead of checking if it is an error code,
as reported by Dan Carpenter.

While here, use the pm_runtime_resume_and_get() as added by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors. As a bonus, such function
always return zero on success.

It should also be noticed that a fail of pm_runtime_get_sync() would
potentially result in a spurious runtime_suspend(), instead of
using pm_runtime_put_noidle().

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Daniele Alessandrelli <daniele.alessandrelli@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/imx334.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/i2c/imx334.c b/drivers/media/i2c/imx334.c
index 047aa7658d217..23f28606e570f 100644
--- a/drivers/media/i2c/imx334.c
+++ b/drivers/media/i2c/imx334.c
@@ -717,9 +717,9 @@ static int imx334_set_stream(struct v4l2_subdev *sd, int enable)
 	}
 
 	if (enable) {
-		ret = pm_runtime_get_sync(imx334->dev);
-		if (ret)
-			goto error_power_off;
+		ret = pm_runtime_resume_and_get(imx334->dev);
+		if (ret < 0)
+			goto error_unlock;
 
 		ret = imx334_start_streaming(imx334);
 		if (ret)
@@ -737,6 +737,7 @@ static int imx334_set_stream(struct v4l2_subdev *sd, int enable)
 
 error_power_off:
 	pm_runtime_put(imx334->dev);
+error_unlock:
 	mutex_unlock(&imx334->mutex);
 
 	return ret;
-- 
GitLab


From e6695c89b3d4595f60c9fe40e0938e085d15dd20 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 27 Apr 2021 11:43:54 +0200
Subject: [PATCH 0149/3804] media: exynos-gsc: don't resume at remove time

Calling pm_runtime_get_sync() at driver's removal time is not
needed, as this will resume PM runtime. Also, the PM runtime
code at pm_runtime_disable() already calls it, if it detects
the need.

So, change the logic in order to disable PM runtime earlier.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/exynos-gsc/gsc-core.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c
index 9f41c2e7097a6..f49f3322f835a 100644
--- a/drivers/media/platform/exynos-gsc/gsc-core.c
+++ b/drivers/media/platform/exynos-gsc/gsc-core.c
@@ -1210,18 +1210,19 @@ static int gsc_remove(struct platform_device *pdev)
 	struct gsc_dev *gsc = platform_get_drvdata(pdev);
 	int i;
 
-	pm_runtime_get_sync(&pdev->dev);
-
 	gsc_unregister_m2m_device(gsc);
 	v4l2_device_unregister(&gsc->v4l2_dev);
 
 	vb2_dma_contig_clear_max_seg_size(&pdev->dev);
-	for (i = 0; i < gsc->num_clocks; i++)
-		clk_disable_unprepare(gsc->clock[i]);
 
-	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		for (i = 0; i < gsc->num_clocks; i++)
+			clk_disable_unprepare(gsc->clock[i]);
+
+	pm_runtime_set_suspended(&pdev->dev);
+
 	dev_dbg(&pdev->dev, "%s driver unloaded\n", pdev->name);
 	return 0;
 }
-- 
GitLab


From dd97908ee35096356fb4111bb77d5f94bcfe337d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 16:47:42 +0200
Subject: [PATCH 0150/3804] media: atmel: properly get pm_runtime

There are several issues in the way the atmel driver handles
pm_runtime_get_sync():

- it doesn't check return codes;
- it doesn't properly decrement the usage_count on all places;
- it starts streaming even if pm_runtime_get_sync() fails.
- while it tries to get pm_runtime at the clock enable logic,
  it doesn't check if the operation was suceeded.

Replace all occurrences of it to use the new kAPI:
pm_runtime_resume_and_get(), which ensures that, if the
return code is not negative, the usage_count was incremented.

With that, add additional checks when this is called, in order
to ensure that errors will be properly addressed.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 30 ++++++++++++++-----
 drivers/media/platform/atmel/atmel-isi.c      | 19 +++++++++---
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index fe3ec8d0eaee3..ce8e1351fa532 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -294,9 +294,13 @@ static int isc_wait_clk_stable(struct clk_hw *hw)
 static int isc_clk_prepare(struct clk_hw *hw)
 {
 	struct isc_clk *isc_clk = to_isc_clk(hw);
+	int ret;
 
-	if (isc_clk->id == ISC_ISPCK)
-		pm_runtime_get_sync(isc_clk->dev);
+	if (isc_clk->id == ISC_ISPCK) {
+		ret = pm_runtime_resume_and_get(isc_clk->dev);
+		if (ret < 0)
+			return ret;
+	}
 
 	return isc_wait_clk_stable(hw);
 }
@@ -353,9 +357,13 @@ static int isc_clk_is_enabled(struct clk_hw *hw)
 {
 	struct isc_clk *isc_clk = to_isc_clk(hw);
 	u32 status;
+	int ret;
 
-	if (isc_clk->id == ISC_ISPCK)
-		pm_runtime_get_sync(isc_clk->dev);
+	if (isc_clk->id == ISC_ISPCK) {
+		ret = pm_runtime_resume_and_get(isc_clk->dev);
+		if (ret < 0)
+			return 0;
+	}
 
 	regmap_read(isc_clk->regmap, ISC_CLKSR, &status);
 
@@ -807,7 +815,12 @@ static int isc_start_streaming(struct vb2_queue *vq, unsigned int count)
 		goto err_start_stream;
 	}
 
-	pm_runtime_get_sync(isc->dev);
+	ret = pm_runtime_resume_and_get(isc->dev);
+	if (ret < 0) {
+		v4l2_err(&isc->v4l2_dev, "RPM resume failed in subdev %d\n",
+			 ret);
+		goto err_pm_get;
+	}
 
 	ret = isc_configure(isc);
 	if (unlikely(ret))
@@ -838,7 +851,7 @@ static int isc_start_streaming(struct vb2_queue *vq, unsigned int count)
 
 err_configure:
 	pm_runtime_put_sync(isc->dev);
-
+err_pm_get:
 	v4l2_subdev_call(isc->current_subdev->sd, video, s_stream, 0);
 
 err_start_stream:
@@ -1809,6 +1822,7 @@ static void isc_awb_work(struct work_struct *w)
 	u32 baysel;
 	unsigned long flags;
 	u32 min, max;
+	int ret;
 
 	/* streaming is not active anymore */
 	if (isc->stop)
@@ -1831,7 +1845,9 @@ static void isc_awb_work(struct work_struct *w)
 	ctrls->hist_id = hist_id;
 	baysel = isc->config.sd_format->cfa_baycfg << ISC_HIS_CFG_BAYSEL_SHIFT;
 
-	pm_runtime_get_sync(isc->dev);
+	ret = pm_runtime_resume_and_get(isc->dev);
+	if (ret < 0)
+		return;
 
 	/*
 	 * only update if we have all the required histograms and controls
diff --git a/drivers/media/platform/atmel/atmel-isi.c b/drivers/media/platform/atmel/atmel-isi.c
index e392b3efe3633..5b1dd358f2e63 100644
--- a/drivers/media/platform/atmel/atmel-isi.c
+++ b/drivers/media/platform/atmel/atmel-isi.c
@@ -422,7 +422,9 @@ static int start_streaming(struct vb2_queue *vq, unsigned int count)
 	struct frame_buffer *buf, *node;
 	int ret;
 
-	pm_runtime_get_sync(isi->dev);
+	ret = pm_runtime_resume_and_get(isi->dev);
+	if (ret < 0)
+		return ret;
 
 	/* Enable stream on the sub device */
 	ret = v4l2_subdev_call(isi->entity.subdev, video, s_stream, 1);
@@ -782,9 +784,10 @@ static int isi_enum_frameintervals(struct file *file, void *fh,
 	return 0;
 }
 
-static void isi_camera_set_bus_param(struct atmel_isi *isi)
+static int isi_camera_set_bus_param(struct atmel_isi *isi)
 {
 	u32 cfg1 = 0;
+	int ret;
 
 	/* set bus param for ISI */
 	if (isi->pdata.hsync_act_low)
@@ -801,12 +804,16 @@ static void isi_camera_set_bus_param(struct atmel_isi *isi)
 	cfg1 |= ISI_CFG1_THMASK_BEATS_16;
 
 	/* Enable PM and peripheral clock before operate isi registers */
-	pm_runtime_get_sync(isi->dev);
+	ret = pm_runtime_resume_and_get(isi->dev);
+	if (ret < 0)
+		return ret;
 
 	isi_writel(isi, ISI_CTRL, ISI_CTRL_DIS);
 	isi_writel(isi, ISI_CFG1, cfg1);
 
 	pm_runtime_put(isi->dev);
+
+	return 0;
 }
 
 /* -----------------------------------------------------------------------*/
@@ -1085,7 +1092,11 @@ static int isi_graph_notify_complete(struct v4l2_async_notifier *notifier)
 		dev_err(isi->dev, "No supported mediabus format found\n");
 		return ret;
 	}
-	isi_camera_set_bus_param(isi);
+	ret = isi_camera_set_bus_param(isi);
+	if (ret) {
+		dev_err(isi->dev, "Can't wake up device\n");
+		return ret;
+	}
 
 	ret = isi_set_default_fmt(isi);
 	if (ret) {
-- 
GitLab


From 892bb6ecead9b834ba7ad1d07513e9eba1baa3a4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 28 Apr 2021 08:27:55 +0200
Subject: [PATCH 0151/3804] media: hantro: do a PM resume earlier

The device_run() first enables the clock and then
tries to resume PM runtime, checking for errors.

Well, if for some reason the pm_runtime can not resume,
it would be better to detect it beforehand.

So, change the order inside device_run().

Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Fixes: 775fec69008d ("media: add Rockchip VPU JPEG encoder driver")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_drv.c | 33 +++++++++++++++--------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 595e82a827287..eea2009fa17bd 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -56,16 +56,12 @@ dma_addr_t hantro_get_ref(struct hantro_ctx *ctx, u64 ts)
 	return hantro_get_dec_buf_addr(ctx, buf);
 }
 
-static void hantro_job_finish(struct hantro_dev *vpu,
-			      struct hantro_ctx *ctx,
-			      enum vb2_buffer_state result)
+static void hantro_job_finish_no_pm(struct hantro_dev *vpu,
+				    struct hantro_ctx *ctx,
+				    enum vb2_buffer_state result)
 {
 	struct vb2_v4l2_buffer *src, *dst;
 
-	pm_runtime_mark_last_busy(vpu->dev);
-	pm_runtime_put_autosuspend(vpu->dev);
-	clk_bulk_disable(vpu->variant->num_clocks, vpu->clocks);
-
 	src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
 	dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
 
@@ -81,6 +77,18 @@ static void hantro_job_finish(struct hantro_dev *vpu,
 					 result);
 }
 
+static void hantro_job_finish(struct hantro_dev *vpu,
+			      struct hantro_ctx *ctx,
+			      enum vb2_buffer_state result)
+{
+	pm_runtime_mark_last_busy(vpu->dev);
+	pm_runtime_put_autosuspend(vpu->dev);
+
+	clk_bulk_disable(vpu->variant->num_clocks, vpu->clocks);
+
+	hantro_job_finish_no_pm(vpu, ctx, result);
+}
+
 void hantro_irq_done(struct hantro_dev *vpu,
 		     enum vb2_buffer_state result)
 {
@@ -152,12 +160,15 @@ static void device_run(void *priv)
 	src = hantro_get_src_buf(ctx);
 	dst = hantro_get_dst_buf(ctx);
 
+	ret = pm_runtime_get_sync(ctx->dev->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(ctx->dev->dev);
+		goto err_cancel_job;
+	}
+
 	ret = clk_bulk_enable(ctx->dev->variant->num_clocks, ctx->dev->clocks);
 	if (ret)
 		goto err_cancel_job;
-	ret = pm_runtime_get_sync(ctx->dev->dev);
-	if (ret < 0)
-		goto err_cancel_job;
 
 	v4l2_m2m_buf_copy_metadata(src, dst, true);
 
@@ -165,7 +176,7 @@ static void device_run(void *priv)
 	return;
 
 err_cancel_job:
-	hantro_job_finish(ctx->dev, ctx, VB2_BUF_STATE_ERROR);
+	hantro_job_finish_no_pm(ctx->dev, ctx, VB2_BUF_STATE_ERROR);
 }
 
 static struct v4l2_m2m_ops vpu_m2m_ops = {
-- 
GitLab


From e7c617cab7a522fba5b20f9033ee98565b6f3546 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 16:54:25 +0200
Subject: [PATCH 0152/3804] media: marvel-ccic: fix some issues when getting
 pm_runtime

Calling pm_runtime_get_sync() is bad, since even when it
returns an error, pm_runtime_put*() should be called.
So, use instead pm_runtime_resume_and_get().

While here, ensure that the error condition will be checked
during clock enable an media open() calls.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/marvell-ccic/mcam-core.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c
index 141bf5d97a044..ea87110d90738 100644
--- a/drivers/media/platform/marvell-ccic/mcam-core.c
+++ b/drivers/media/platform/marvell-ccic/mcam-core.c
@@ -918,6 +918,7 @@ static int mclk_enable(struct clk_hw *hw)
 	struct mcam_camera *cam = container_of(hw, struct mcam_camera, mclk_hw);
 	int mclk_src;
 	int mclk_div;
+	int ret;
 
 	/*
 	 * Clock the sensor appropriately.  Controller clock should
@@ -931,7 +932,9 @@ static int mclk_enable(struct clk_hw *hw)
 		mclk_div = 2;
 	}
 
-	pm_runtime_get_sync(cam->dev);
+	ret = pm_runtime_resume_and_get(cam->dev);
+	if (ret < 0)
+		return ret;
 	clk_enable(cam->clk[0]);
 	mcam_reg_write(cam, REG_CLKCTRL, (mclk_src << 29) | mclk_div);
 	mcam_ctlr_power_up(cam);
@@ -1611,7 +1614,9 @@ static int mcam_v4l_open(struct file *filp)
 		ret = sensor_call(cam, core, s_power, 1);
 		if (ret)
 			goto out;
-		pm_runtime_get_sync(cam->dev);
+		ret = pm_runtime_resume_and_get(cam->dev);
+		if (ret < 0)
+			goto out;
 		__mcam_cam_reset(cam);
 		mcam_set_config_needed(cam, 1);
 	}
-- 
GitLab


From d07bb9702cf5f5ccf3fb661e6cab54bbc33cd23f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 16:57:16 +0200
Subject: [PATCH 0153/3804] media: mdk-mdp: fix pm_runtime_get_sync() usage
 count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.
Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

While here, fix the return contition of mtk_mdp_m2m_start_streaming(),
as it doesn't make any sense to return 0 if the PM runtime failed
to resume.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c b/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c
index ace4528cdc5ef..f14779e7596e5 100644
--- a/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c
+++ b/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c
@@ -391,12 +391,12 @@ static int mtk_mdp_m2m_start_streaming(struct vb2_queue *q, unsigned int count)
 	struct mtk_mdp_ctx *ctx = q->drv_priv;
 	int ret;
 
-	ret = pm_runtime_get_sync(&ctx->mdp_dev->pdev->dev);
+	ret = pm_runtime_resume_and_get(&ctx->mdp_dev->pdev->dev);
 	if (ret < 0)
-		mtk_mdp_dbg(1, "[%d] pm_runtime_get_sync failed:%d",
+		mtk_mdp_dbg(1, "[%d] pm_runtime_resume_and_get failed:%d",
 			    ctx->id, ret);
 
-	return 0;
+	return ret;
 }
 
 static void *mtk_mdp_m2m_buf_remove(struct mtk_mdp_ctx *ctx,
-- 
GitLab


From fa9f443f7c962d072d150472e2bb77de39817a9a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 16:59:34 +0200
Subject: [PATCH 0154/3804] media: rcar_fdp1: simplify error check logic at
 fdp_open()

Avoid some code duplication by moving the common error path
logic at fdp_open().

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rcar_fdp1.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/media/platform/rcar_fdp1.c b/drivers/media/platform/rcar_fdp1.c
index 01c1fbb97bf67..d26413fa52057 100644
--- a/drivers/media/platform/rcar_fdp1.c
+++ b/drivers/media/platform/rcar_fdp1.c
@@ -2117,9 +2117,7 @@ static int fdp1_open(struct file *file)
 
 	if (ctx->hdl.error) {
 		ret = ctx->hdl.error;
-		v4l2_ctrl_handler_free(&ctx->hdl);
-		kfree(ctx);
-		goto done;
+		goto error_ctx;
 	}
 
 	ctx->fh.ctrl_handler = &ctx->hdl;
@@ -2133,10 +2131,7 @@ static int fdp1_open(struct file *file)
 
 	if (IS_ERR(ctx->fh.m2m_ctx)) {
 		ret = PTR_ERR(ctx->fh.m2m_ctx);
-
-		v4l2_ctrl_handler_free(&ctx->hdl);
-		kfree(ctx);
-		goto done;
+		goto error_ctx;
 	}
 
 	/* Perform any power management required */
@@ -2147,6 +2142,12 @@ static int fdp1_open(struct file *file)
 	dprintk(fdp1, "Created instance: %p, m2m_ctx: %p\n",
 		ctx, ctx->fh.m2m_ctx);
 
+	mutex_unlock(&fdp1->dev_mutex);
+	return 0;
+
+error_ctx:
+	v4l2_ctrl_handler_free(&ctx->hdl);
+	kfree(ctx);
 done:
 	mutex_unlock(&fdp1->dev_mutex);
 	return ret;
-- 
GitLab


From 45e75a8c6fa455a5909ac04db76a4b15d6bb8368 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 16:59:34 +0200
Subject: [PATCH 0155/3804] media: rcar_fdp1: fix pm_runtime_get_sync() usage
 count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.
Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

Also, right now, the driver is ignoring any troubles when
trying to do PM resume. So, add the proper error handling
for the code.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rcar_fdp1.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/rcar_fdp1.c b/drivers/media/platform/rcar_fdp1.c
index d26413fa52057..89aac60066d91 100644
--- a/drivers/media/platform/rcar_fdp1.c
+++ b/drivers/media/platform/rcar_fdp1.c
@@ -2135,7 +2135,9 @@ static int fdp1_open(struct file *file)
 	}
 
 	/* Perform any power management required */
-	pm_runtime_get_sync(fdp1->dev);
+	ret = pm_runtime_resume_and_get(fdp1->dev);
+	if (ret < 0)
+		goto error_pm;
 
 	v4l2_fh_add(&ctx->fh);
 
@@ -2145,6 +2147,8 @@ static int fdp1_open(struct file *file)
 	mutex_unlock(&fdp1->dev_mutex);
 	return 0;
 
+error_pm:
+       v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
 error_ctx:
 	v4l2_ctrl_handler_free(&ctx->hdl);
 	kfree(ctx);
@@ -2352,7 +2356,9 @@ static int fdp1_probe(struct platform_device *pdev)
 
 	/* Power up the cells to read HW */
 	pm_runtime_enable(&pdev->dev);
-	pm_runtime_get_sync(fdp1->dev);
+	ret = pm_runtime_resume_and_get(fdp1->dev);
+	if (ret < 0)
+		goto disable_pm;
 
 	hw_version = fdp1_read(fdp1, FD1_IP_INTDATA);
 	switch (hw_version) {
@@ -2381,6 +2387,9 @@ static int fdp1_probe(struct platform_device *pdev)
 
 	return 0;
 
+disable_pm:
+	pm_runtime_disable(fdp1->dev);
+
 release_m2m:
 	v4l2_m2m_release(fdp1->m2m_dev);
 
-- 
GitLab


From 220955ec3c84505ec6a75bea494ec61f5295ef7a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:03:23 +0200
Subject: [PATCH 0156/3804] media: renesas-ceu: Properly check for PM errors

Right now, the driver just assumes that PM runtime resume
worked, but it may fail.

Well, the pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.

So, using it is tricky. Let's replace it by the new
pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
and return an error if something bad happens.

This should ensure that the PM runtime usage_count will be
properly decremented if an error happens at open time.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Jacopo Mondi <jacopo@jmondi.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/renesas-ceu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/renesas-ceu.c b/drivers/media/platform/renesas-ceu.c
index cd137101d41ea..17f01b6e3fe0f 100644
--- a/drivers/media/platform/renesas-ceu.c
+++ b/drivers/media/platform/renesas-ceu.c
@@ -1099,10 +1099,10 @@ static int ceu_open(struct file *file)
 
 	mutex_lock(&ceudev->mlock);
 	/* Causes soft-reset and sensor power on on first open */
-	pm_runtime_get_sync(ceudev->dev);
+	ret = pm_runtime_resume_and_get(ceudev->dev);
 	mutex_unlock(&ceudev->mlock);
 
-	return 0;
+	return ret;
 }
 
 static int ceu_release(struct file *file)
-- 
GitLab


From fdc34e82c0f968ac4c157bd3d8c299ebc24c9c63 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:04:23 +0200
Subject: [PATCH 0157/3804] media: s5p: fix pm_runtime_get_sync() usage count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.
Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

While here, check if the PM runtime error was caught at
s5p_cec_adap_enable().

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/cec/platform/s5p/s5p_cec.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/media/cec/platform/s5p/s5p_cec.c b/drivers/media/cec/platform/s5p/s5p_cec.c
index 3c7c4c3c798c1..028a09a7531ef 100644
--- a/drivers/media/cec/platform/s5p/s5p_cec.c
+++ b/drivers/media/cec/platform/s5p/s5p_cec.c
@@ -35,10 +35,13 @@ MODULE_PARM_DESC(debug, "debug level (0-2)");
 
 static int s5p_cec_adap_enable(struct cec_adapter *adap, bool enable)
 {
+	int ret;
 	struct s5p_cec_dev *cec = cec_get_drvdata(adap);
 
 	if (enable) {
-		pm_runtime_get_sync(cec->dev);
+		ret = pm_runtime_resume_and_get(cec->dev);
+		if (ret < 0)
+			return ret;
 
 		s5p_cec_reset(cec);
 
-- 
GitLab


From c41e02493334985cca1a22efd5ca962ce3abb061 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:05:27 +0200
Subject: [PATCH 0158/3804] media: am437x: fix pm_runtime_get_sync() usage
 count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.
Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

While here, ensure that the driver will check if PM runtime
resumed at vpfe_initialize_device().

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/am437x/am437x-vpfe.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c
index 6cdc77dda0e49..1c9cb9e05fdf6 100644
--- a/drivers/media/platform/am437x/am437x-vpfe.c
+++ b/drivers/media/platform/am437x/am437x-vpfe.c
@@ -1021,7 +1021,9 @@ static int vpfe_initialize_device(struct vpfe_device *vpfe)
 	if (ret)
 		return ret;
 
-	pm_runtime_get_sync(vpfe->pdev);
+	ret = pm_runtime_resume_and_get(vpfe->pdev);
+	if (ret < 0)
+		return ret;
 
 	vpfe_config_enable(&vpfe->ccdc, 1);
 
@@ -2443,7 +2445,11 @@ static int vpfe_probe(struct platform_device *pdev)
 	pm_runtime_enable(&pdev->dev);
 
 	/* for now just enable it here instead of waiting for the open */
-	pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
+	if (ret < 0) {
+		vpfe_err(vpfe, "Unable to resume device.\n");
+		goto probe_out_v4l2_unregister;
+	}
 
 	vpfe_ccdc_config_defaults(ccdc);
 
@@ -2530,6 +2536,11 @@ static int vpfe_suspend(struct device *dev)
 
 	/* only do full suspend if streaming has started */
 	if (vb2_start_streaming_called(&vpfe->buffer_queue)) {
+		/*
+		 * ignore RPM resume errors here, as it is already too late.
+		 * A check like that should happen earlier, either at
+		 * open() or just before start streaming.
+		 */
 		pm_runtime_get_sync(dev);
 		vpfe_config_enable(ccdc, 1);
 
-- 
GitLab


From 6e8b1526db164c9d4b9dacfb9bc48e365d7c4860 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:07:41 +0200
Subject: [PATCH 0159/3804] media: sh_vou: fix pm_runtime_get_sync() usage
 count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.
Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

While here, check if the PM runtime error was caught at open time.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sh_vou.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/sh_vou.c b/drivers/media/platform/sh_vou.c
index 4ac48441f22c4..ca4310e26c49e 100644
--- a/drivers/media/platform/sh_vou.c
+++ b/drivers/media/platform/sh_vou.c
@@ -1133,7 +1133,11 @@ static int sh_vou_open(struct file *file)
 	if (v4l2_fh_is_singular_file(file) &&
 	    vou_dev->status == SH_VOU_INITIALISING) {
 		/* First open */
-		pm_runtime_get_sync(vou_dev->v4l2_dev.dev);
+		err = pm_runtime_resume_and_get(vou_dev->v4l2_dev.dev);
+		if (err < 0) {
+			v4l2_fh_release(file);
+			goto done_open;
+		}
 		err = sh_vou_hw_init(vou_dev);
 		if (err < 0) {
 			pm_runtime_put(vou_dev->v4l2_dev.dev);
-- 
GitLab


From 908711f542c17fe61e5d653da1beb8e5ab5c7b50 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:09 +0200
Subject: [PATCH 0160/3804] media: mtk-vcodec: fix PM runtime get logic

Currently, the driver just assumes that PM runtime logic
succeded resuming the device.

That may not be the case, as pm_runtime_get_sync()
can fail (but keeping the usage count incremented).

Replace the code to use pm_runtime_resume_and_get(),
and letting it return the error code.

This way, if mtk_vcodec_dec_pw_on() fails, the logic
under fops_vcodec_open() will do the right thing and
return an error, instead of just assuming that the
device is ready to be used.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c | 4 +++-
 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c  | 8 +++++---
 drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h  | 2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c
index 147dfef1638d2..f87dc47d9e638 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c
@@ -126,7 +126,9 @@ static int fops_vcodec_open(struct file *file)
 	mtk_vcodec_dec_set_default_params(ctx);
 
 	if (v4l2_fh_is_singular(&ctx->fh)) {
-		mtk_vcodec_dec_pw_on(&dev->pm);
+		ret = mtk_vcodec_dec_pw_on(&dev->pm);
+		if (ret < 0)
+			goto err_load_fw;
 		/*
 		 * Does nothing if firmware was already loaded.
 		 */
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c
index ddee7046ce422..6038db96f71c3 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c
@@ -88,13 +88,15 @@ void mtk_vcodec_release_dec_pm(struct mtk_vcodec_dev *dev)
 	put_device(dev->pm.larbvdec);
 }
 
-void mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm)
+int mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm)
 {
 	int ret;
 
-	ret = pm_runtime_get_sync(pm->dev);
+	ret = pm_runtime_resume_and_get(pm->dev);
 	if (ret)
-		mtk_v4l2_err("pm_runtime_get_sync fail %d", ret);
+		mtk_v4l2_err("pm_runtime_resume_and_get fail %d", ret);
+
+	return ret;
 }
 
 void mtk_vcodec_dec_pw_off(struct mtk_vcodec_pm *pm)
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h
index 872d8bf8cfaf3..280aeaefdb651 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h
@@ -12,7 +12,7 @@
 int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *dev);
 void mtk_vcodec_release_dec_pm(struct mtk_vcodec_dev *dev);
 
-void mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm);
+int mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm);
 void mtk_vcodec_dec_pw_off(struct mtk_vcodec_pm *pm);
 void mtk_vcodec_dec_clock_on(struct mtk_vcodec_pm *pm);
 void mtk_vcodec_dec_clock_off(struct mtk_vcodec_pm *pm);
-- 
GitLab


From 10343de268d10cf07b092b8b525e12ad558ead77 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:10 +0200
Subject: [PATCH 0161/3804] media: s5p-jpeg: fix pm_runtime_get_sync() usage
 count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.
Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

As a plus, pm_runtime_resume_and_get() doesn't return
positive numbers, so the return code validation can
be removed.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/s5p-jpeg/jpeg-core.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c
index 026111505f5a5..d402e456f27df 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.c
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c
@@ -2566,11 +2566,8 @@ static void s5p_jpeg_buf_queue(struct vb2_buffer *vb)
 static int s5p_jpeg_start_streaming(struct vb2_queue *q, unsigned int count)
 {
 	struct s5p_jpeg_ctx *ctx = vb2_get_drv_priv(q);
-	int ret;
-
-	ret = pm_runtime_get_sync(ctx->jpeg->dev);
 
-	return ret > 0 ? 0 : ret;
+	return pm_runtime_resume_and_get(ctx->jpeg->dev);
 }
 
 static void s5p_jpeg_stop_streaming(struct vb2_queue *q)
-- 
GitLab


From baa450f08d691a40fcc29ba8ce40e02613736ac7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:10 +0200
Subject: [PATCH 0162/3804] media: sti/delta: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sti/delta/delta-v4l2.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/sti/delta/delta-v4l2.c b/drivers/media/platform/sti/delta/delta-v4l2.c
index c691b3d81549d..064a00a3084a0 100644
--- a/drivers/media/platform/sti/delta/delta-v4l2.c
+++ b/drivers/media/platform/sti/delta/delta-v4l2.c
@@ -954,10 +954,8 @@ static void delta_run_work(struct work_struct *work)
 	/* enable the hardware */
 	if (!dec->pm) {
 		ret = delta_get_sync(ctx);
-		if (ret) {
-			delta_put_autosuspend(ctx);
+		if (ret)
 			goto err;
-		}
 	}
 
 	/* decode this access unit */
@@ -1277,9 +1275,9 @@ int delta_get_sync(struct delta_ctx *ctx)
 	int ret = 0;
 
 	/* enable the hardware */
-	ret = pm_runtime_get_sync(delta->dev);
+	ret = pm_runtime_resume_and_get(delta->dev);
 	if (ret < 0) {
-		dev_err(delta->dev, "%s pm_runtime_get_sync failed (%d)\n",
+		dev_err(delta->dev, "%s pm_runtime_resume_and_get failed (%d)\n",
 			__func__, ret);
 		return ret;
 	}
-- 
GitLab


From 9c298f82d8392f799a0595f50076afa1d91e9092 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:10 +0200
Subject: [PATCH 0163/3804] media: sunxi: fix pm_runtime_get_sync() usage count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.
Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c b/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c
index 3f81dd17755cb..fbcca59a0517c 100644
--- a/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c
+++ b/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c
@@ -494,7 +494,7 @@ static int rotate_start_streaming(struct vb2_queue *vq, unsigned int count)
 		struct device *dev = ctx->dev->dev;
 		int ret;
 
-		ret = pm_runtime_get_sync(dev);
+		ret = pm_runtime_resume_and_get(dev);
 		if (ret < 0) {
 			dev_err(dev, "Failed to enable module\n");
 
-- 
GitLab


From c44eac5b72e23c31eefc0e10a71d9650036b8341 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:21 +0200
Subject: [PATCH 0164/3804] media: sti/bdisp: fix pm_runtime_get_sync() usage
 count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.

The bdisp_start_streaming() doesn't take it into account, which
would unbalance PM usage counter at bdisp_stop_streaming().

The logic at bdisp_probe() is correct, but the best is to use
the same call along the driver.

So, replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sti/bdisp/bdisp-v4l2.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
index 060ca85f64d5d..85288da9d2ae6 100644
--- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
+++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
@@ -499,7 +499,7 @@ static int bdisp_start_streaming(struct vb2_queue *q, unsigned int count)
 {
 	struct bdisp_ctx *ctx = q->drv_priv;
 	struct vb2_v4l2_buffer *buf;
-	int ret = pm_runtime_get_sync(ctx->bdisp_dev->dev);
+	int ret = pm_runtime_resume_and_get(ctx->bdisp_dev->dev);
 
 	if (ret < 0) {
 		dev_err(ctx->bdisp_dev->dev, "failed to set runtime PM\n");
@@ -1364,10 +1364,10 @@ static int bdisp_probe(struct platform_device *pdev)
 
 	/* Power management */
 	pm_runtime_enable(dev);
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0) {
 		dev_err(dev, "failed to set PM\n");
-		goto err_pm;
+		goto err_remove;
 	}
 
 	/* Filters */
@@ -1395,6 +1395,7 @@ err_filter:
 	bdisp_hw_free_filters(bdisp->dev);
 err_pm:
 	pm_runtime_put(dev);
+err_remove:
 	bdisp_debugfs_remove(bdisp);
 	v4l2_device_unregister(&bdisp->v4l2_dev);
 err_clk:
-- 
GitLab


From 59f96244af9403ddf4810ec5c0fbe8920857634e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:17 +0200
Subject: [PATCH 0165/3804] media: exynos4-is: fix pm_runtime_get_sync() usage
 count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.

On some places, this is ok, but on others the usage count
ended being unbalanced on failures.

Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

As a bonus, such function always return zero on success. So,
some code can be simplified.

Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/exynos4-is/fimc-capture.c   |  6 ++----
 drivers/media/platform/exynos4-is/fimc-is.c        |  4 ++--
 drivers/media/platform/exynos4-is/fimc-isp-video.c |  3 +--
 drivers/media/platform/exynos4-is/fimc-isp.c       |  7 +++----
 drivers/media/platform/exynos4-is/fimc-lite.c      |  5 +++--
 drivers/media/platform/exynos4-is/fimc-m2m.c       |  5 +----
 drivers/media/platform/exynos4-is/media-dev.c      |  9 +++------
 drivers/media/platform/exynos4-is/mipi-csis.c      | 10 ++++------
 8 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/drivers/media/platform/exynos4-is/fimc-capture.c b/drivers/media/platform/exynos4-is/fimc-capture.c
index 13c838d3f9473..0da36443173c1 100644
--- a/drivers/media/platform/exynos4-is/fimc-capture.c
+++ b/drivers/media/platform/exynos4-is/fimc-capture.c
@@ -478,11 +478,9 @@ static int fimc_capture_open(struct file *file)
 		goto unlock;
 
 	set_bit(ST_CAPT_BUSY, &fimc->state);
-	ret = pm_runtime_get_sync(&fimc->pdev->dev);
-	if (ret < 0) {
-		pm_runtime_put_sync(&fimc->pdev->dev);
+	ret = pm_runtime_resume_and_get(&fimc->pdev->dev);
+	if (ret < 0)
 		goto unlock;
-	}
 
 	ret = v4l2_fh_open(file);
 	if (ret) {
diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c
index 972d9601d2360..1b24f5bfc4af4 100644
--- a/drivers/media/platform/exynos4-is/fimc-is.c
+++ b/drivers/media/platform/exynos4-is/fimc-is.c
@@ -828,9 +828,9 @@ static int fimc_is_probe(struct platform_device *pdev)
 			goto err_irq;
 	}
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
-		goto err_pm;
+		goto err_irq;
 
 	vb2_dma_contig_set_max_seg_size(dev, DMA_BIT_MASK(32));
 
diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c
index 612b9872afc87..8d9dc597deaaf 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp-video.c
+++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c
@@ -275,7 +275,7 @@ static int isp_video_open(struct file *file)
 	if (ret < 0)
 		goto unlock;
 
-	ret = pm_runtime_get_sync(&isp->pdev->dev);
+	ret = pm_runtime_resume_and_get(&isp->pdev->dev);
 	if (ret < 0)
 		goto rel_fh;
 
@@ -293,7 +293,6 @@ static int isp_video_open(struct file *file)
 	if (!ret)
 		goto unlock;
 rel_fh:
-	pm_runtime_put_noidle(&isp->pdev->dev);
 	v4l2_fh_release(file);
 unlock:
 	mutex_unlock(&isp->video_lock);
diff --git a/drivers/media/platform/exynos4-is/fimc-isp.c b/drivers/media/platform/exynos4-is/fimc-isp.c
index a77c49b185115..74b49d30901ed 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp.c
+++ b/drivers/media/platform/exynos4-is/fimc-isp.c
@@ -304,11 +304,10 @@ static int fimc_isp_subdev_s_power(struct v4l2_subdev *sd, int on)
 	pr_debug("on: %d\n", on);
 
 	if (on) {
-		ret = pm_runtime_get_sync(&is->pdev->dev);
-		if (ret < 0) {
-			pm_runtime_put(&is->pdev->dev);
+		ret = pm_runtime_resume_and_get(&is->pdev->dev);
+		if (ret < 0)
 			return ret;
-		}
+
 		set_bit(IS_ST_PWR_ON, &is->state);
 
 		ret = fimc_is_start_firmware(is);
diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c
index fe20af3a7178a..4d8b18078ff37 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite.c
+++ b/drivers/media/platform/exynos4-is/fimc-lite.c
@@ -469,9 +469,9 @@ static int fimc_lite_open(struct file *file)
 	}
 
 	set_bit(ST_FLITE_IN_USE, &fimc->state);
-	ret = pm_runtime_get_sync(&fimc->pdev->dev);
+	ret = pm_runtime_resume_and_get(&fimc->pdev->dev);
 	if (ret < 0)
-		goto err_pm;
+		goto err_in_use;
 
 	ret = v4l2_fh_open(file);
 	if (ret < 0)
@@ -499,6 +499,7 @@ static int fimc_lite_open(struct file *file)
 	v4l2_fh_release(file);
 err_pm:
 	pm_runtime_put_sync(&fimc->pdev->dev);
+err_in_use:
 	clear_bit(ST_FLITE_IN_USE, &fimc->state);
 unlock:
 	mutex_unlock(&fimc->lock);
diff --git a/drivers/media/platform/exynos4-is/fimc-m2m.c b/drivers/media/platform/exynos4-is/fimc-m2m.c
index c9704a147e5cf..df8e2aa454d8f 100644
--- a/drivers/media/platform/exynos4-is/fimc-m2m.c
+++ b/drivers/media/platform/exynos4-is/fimc-m2m.c
@@ -73,17 +73,14 @@ static void fimc_m2m_shutdown(struct fimc_ctx *ctx)
 static int start_streaming(struct vb2_queue *q, unsigned int count)
 {
 	struct fimc_ctx *ctx = q->drv_priv;
-	int ret;
 
-	ret = pm_runtime_get_sync(&ctx->fimc_dev->pdev->dev);
-	return ret > 0 ? 0 : ret;
+	return pm_runtime_resume_and_get(&ctx->fimc_dev->pdev->dev);
 }
 
 static void stop_streaming(struct vb2_queue *q)
 {
 	struct fimc_ctx *ctx = q->drv_priv;
 
-
 	fimc_m2m_shutdown(ctx);
 	fimc_m2m_job_finish(ctx, VB2_BUF_STATE_ERROR);
 	pm_runtime_put(&ctx->fimc_dev->pdev->dev);
diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c
index 13d192ba4aa6e..e025178db06c3 100644
--- a/drivers/media/platform/exynos4-is/media-dev.c
+++ b/drivers/media/platform/exynos4-is/media-dev.c
@@ -512,11 +512,9 @@ static int fimc_md_register_sensor_entities(struct fimc_md *fmd)
 	if (!fmd->pmf)
 		return -ENXIO;
 
-	ret = pm_runtime_get_sync(fmd->pmf);
-	if (ret < 0) {
-		pm_runtime_put(fmd->pmf);
+	ret = pm_runtime_resume_and_get(fmd->pmf);
+	if (ret < 0)
 		return ret;
-	}
 
 	fmd->num_sensors = 0;
 
@@ -1291,8 +1289,7 @@ static int cam_clk_prepare(struct clk_hw *hw)
 	if (camclk->fmd->pmf == NULL)
 		return -ENODEV;
 
-	ret = pm_runtime_get_sync(camclk->fmd->pmf);
-	return ret < 0 ? ret : 0;
+	return pm_runtime_resume_and_get(camclk->fmd->pmf);
 }
 
 static void cam_clk_unprepare(struct clk_hw *hw)
diff --git a/drivers/media/platform/exynos4-is/mipi-csis.c b/drivers/media/platform/exynos4-is/mipi-csis.c
index 1aac167abb175..ebf39c8568943 100644
--- a/drivers/media/platform/exynos4-is/mipi-csis.c
+++ b/drivers/media/platform/exynos4-is/mipi-csis.c
@@ -494,7 +494,7 @@ static int s5pcsis_s_power(struct v4l2_subdev *sd, int on)
 	struct device *dev = &state->pdev->dev;
 
 	if (on)
-		return pm_runtime_get_sync(dev);
+		return pm_runtime_resume_and_get(dev);
 
 	return pm_runtime_put_sync(dev);
 }
@@ -509,11 +509,9 @@ static int s5pcsis_s_stream(struct v4l2_subdev *sd, int enable)
 
 	if (enable) {
 		s5pcsis_clear_counters(state);
-		ret = pm_runtime_get_sync(&state->pdev->dev);
-		if (ret && ret != 1) {
-			pm_runtime_put_noidle(&state->pdev->dev);
+		ret = pm_runtime_resume_and_get(&state->pdev->dev);
+		if (ret < 0)
 			return ret;
-		}
 	}
 
 	mutex_lock(&state->lock);
@@ -535,7 +533,7 @@ unlock:
 	if (!enable)
 		pm_runtime_put(&state->pdev->dev);
 
-	return ret == 1 ? 0 : ret;
+	return ret;
 }
 
 static int s5pcsis_enum_mbus_code(struct v4l2_subdev *sd,
-- 
GitLab


From 59087b66ea6730c130c57d23bd9fd139b78c1ba5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:18 +0200
Subject: [PATCH 0166/3804] media: exynos-gsc: fix pm_runtime_get_sync() usage
 count

The pm_runtime_get_sync() internally increments the
dev->power.usage_count without decrementing it, even on errors.
Replace it by the new pm_runtime_resume_and_get(), introduced by:
commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
in order to properly decrement the usage counter, avoiding
a potential PM usage counter leak.

As a bonus, as pm_runtime_get_sync() always return 0 on
success, the logic can be simplified.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/exynos-gsc/gsc-m2m.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/media/platform/exynos-gsc/gsc-m2m.c b/drivers/media/platform/exynos-gsc/gsc-m2m.c
index 27a3c92c73bce..f1cf847d1cc2d 100644
--- a/drivers/media/platform/exynos-gsc/gsc-m2m.c
+++ b/drivers/media/platform/exynos-gsc/gsc-m2m.c
@@ -56,10 +56,8 @@ static void __gsc_m2m_job_abort(struct gsc_ctx *ctx)
 static int gsc_m2m_start_streaming(struct vb2_queue *q, unsigned int count)
 {
 	struct gsc_ctx *ctx = q->drv_priv;
-	int ret;
 
-	ret = pm_runtime_get_sync(&ctx->gsc_dev->pdev->dev);
-	return ret > 0 ? 0 : ret;
+	return pm_runtime_resume_and_get(&ctx->gsc_dev->pdev->dev);
 }
 
 static void __gsc_m2m_cleanup_queue(struct gsc_ctx *ctx)
-- 
GitLab


From 9148cded3a0246d55e62187e219466c0c7986925 Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Fri, 9 Apr 2021 14:24:21 +0200
Subject: [PATCH 0167/3804] media: staging: media: hantro: Align line break to
 the open parenthesis in file hantro_hw.h

Aligns line break with the remaining function arguments
to the open parenthesis. Issue found by checkpatch.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_hw.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 83b3e42b63a3b..0e34ae545f661 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -219,7 +219,7 @@ hantro_h264_mv_size(unsigned int width, unsigned int height)
 void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx);
 void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx);
 void hantro_mpeg2_dec_copy_qtable(u8 *qtable,
-	const struct v4l2_ctrl_mpeg2_quantization *ctrl);
+				  const struct v4l2_ctrl_mpeg2_quantization *ctrl);
 int hantro_mpeg2_dec_init(struct hantro_ctx *ctx);
 void hantro_mpeg2_dec_exit(struct hantro_ctx *ctx);
 
-- 
GitLab


From d637c5dbbfee4c0cbd2f507b627e5b29823f49da Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Fri, 9 Apr 2021 14:24:25 +0200
Subject: [PATCH 0168/3804] media: staging: media: hantro: Align line break to
 the open parenthesis in file hantro_mpeg2.c

Aligns line break with the remaining function arguments
to the open parenthesis. Issue found by checkpatch.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_mpeg2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/hantro/hantro_mpeg2.c b/drivers/staging/media/hantro/hantro_mpeg2.c
index 1d334e6fcd063..53a99a9988d51 100644
--- a/drivers/staging/media/hantro/hantro_mpeg2.c
+++ b/drivers/staging/media/hantro/hantro_mpeg2.c
@@ -19,7 +19,7 @@ static const u8 zigzag[64] = {
 };
 
 void hantro_mpeg2_dec_copy_qtable(u8 *qtable,
-	const struct v4l2_ctrl_mpeg2_quantization *ctrl)
+				  const struct v4l2_ctrl_mpeg2_quantization *ctrl)
 {
 	int i, n;
 
-- 
GitLab


From d58f75de9b958ff9d996da942ccf79d7526bfde8 Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Fri, 9 Apr 2021 21:01:08 +0200
Subject: [PATCH 0169/3804] media: staging: media: omap4iss: Align line break
 to the open parenthesis in file iss_video.c

Aligns line break with the remaining function arguments
to the open parenthesis. Issue found by checkpatch.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/omap4iss/iss_video.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c
index 930f638f51eba..d0da083deed53 100644
--- a/drivers/staging/media/omap4iss/iss_video.c
+++ b/drivers/staging/media/omap4iss/iss_video.c
@@ -399,7 +399,7 @@ static void iss_video_buf_queue(struct vb2_buffer *vb)
 
 		if (start)
 			omap4iss_pipeline_set_stream(pipe,
-						ISS_PIPELINE_STREAM_SINGLESHOT);
+						     ISS_PIPELINE_STREAM_SINGLESHOT);
 	}
 }
 
@@ -960,7 +960,7 @@ iss_video_streamon(struct file *file, void *fh, enum v4l2_buf_type type)
 		unsigned long flags;
 
 		ret = omap4iss_pipeline_set_stream(pipe,
-					      ISS_PIPELINE_STREAM_CONTINUOUS);
+						   ISS_PIPELINE_STREAM_CONTINUOUS);
 		if (ret < 0)
 			goto err_omap4iss_set_stream;
 		spin_lock_irqsave(&video->qlock, flags);
-- 
GitLab


From 047d39c4a1bc197ec038008e941fa30d08d1d885 Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Mon, 12 Apr 2021 04:35:56 +0200
Subject: [PATCH 0170/3804] media: staging: media: atomisp: Removed a
 superfluous else clause

Fixed a coding style issue.

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/i2c/atomisp-ov2722.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
index 1209492c1826a..912eadaffc442 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
@@ -774,11 +774,11 @@ static int ov2722_s_power(struct v4l2_subdev *sd, int on)
 
 	if (on == 0)
 		return power_down(sd);
-	else {
-		ret = power_up(sd);
-		if (!ret)
-			return ov2722_init(sd);
-	}
+
+	ret = power_up(sd);
+	if (!ret)
+		return ov2722_init(sd);
+
 	return ret;
 }
 
-- 
GitLab


From 94dfa800dda45a0849aa493c05800b2a3557a6ee Mon Sep 17 00:00:00 2001
From: Beatriz Martins de Carvalho <martinsdecarvalhobeatriz@gmail.com>
Date: Mon, 12 Apr 2021 15:43:01 +0200
Subject: [PATCH 0171/3804] media: staging: media: atomisp: i2c: align line
 break to match with open parenthesis

Aligns line break with the remaining function arguments
to the open parenthesis.
Issue found by checkpatch in file atomisp-gc2235.c

Signed-off-by: Beatriz Martins de Carvalho <martinsdecarvalhobeatriz@gmail.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/i2c/atomisp-gc2235.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
index 78147ffb60996..6ba4a8adff7c2 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
@@ -171,8 +171,8 @@ static int __gc2235_buf_reg_array(struct i2c_client *client,
 }
 
 static int __gc2235_write_reg_is_consecutive(struct i2c_client *client,
-	struct gc2235_write_ctrl *ctrl,
-	const struct gc2235_reg *next)
+					     struct gc2235_write_ctrl *ctrl,
+					     const struct gc2235_reg *next)
 {
 	if (ctrl->index == 0)
 		return 1;
-- 
GitLab


From a21baa418c5b6a011f02d18d2214c28d6f3a4a47 Mon Sep 17 00:00:00 2001
From: Mitali Borkar <mitaliborkar810@gmail.com>
Date: Tue, 13 Apr 2021 07:15:29 +0200
Subject: [PATCH 0172/3804] media: staging: media: intel-ipu3: remove
 unnecessary blank line

Removed an unnecessary blank line to meet linux kernel coding style.
Reported by checkpatch.pl

Signed-off-by: Mitali Borkar <mitaliborkar810@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/ipu3/include/intel-ipu3.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h
index 9b644fb23dded..3fb7fc547effb 100644
--- a/drivers/staging/media/ipu3/include/intel-ipu3.h
+++ b/drivers/staging/media/ipu3/include/intel-ipu3.h
@@ -74,7 +74,6 @@ struct ipu3_uapi_grid_config {
 	(IPU3_UAPI_AWB_MAX_SETS * \
 	 (IPU3_UAPI_AWB_SET_SIZE + IPU3_UAPI_AWB_SPARE_FOR_BUBBLES))
 
-
 /**
  * struct ipu3_uapi_awb_raw_buffer - AWB raw buffer
  *
-- 
GitLab


From 25074ea239ac92321e75009e001049886f91d850 Mon Sep 17 00:00:00 2001
From: Mitali Borkar <mitaliborkar810@gmail.com>
Date: Tue, 13 Apr 2021 07:15:46 +0200
Subject: [PATCH 0173/3804] media: staging: media: intel-ipu3: reduce length of
 line

Reduced length of line as it was exceeding 100 characters by removing
comments from same line and adding it to previous line. This makes code
neater, and meets linux kernel coding style.
Reported by checkpatch.

Signed-off-by: Mitali Borkar <mitaliborkar810@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/ipu3/include/intel-ipu3.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h
index 3fb7fc547effb..ce068b2bea735 100644
--- a/drivers/staging/media/ipu3/include/intel-ipu3.h
+++ b/drivers/staging/media/ipu3/include/intel-ipu3.h
@@ -9,8 +9,10 @@
 /* from /drivers/staging/media/ipu3/include/videodev2.h */
 
 /* Vendor specific - used for IPU3 camera sub-system */
-#define V4L2_META_FMT_IPU3_PARAMS	v4l2_fourcc('i', 'p', '3', 'p') /* IPU3 processing parameters */
-#define V4L2_META_FMT_IPU3_STAT_3A	v4l2_fourcc('i', 'p', '3', 's') /* IPU3 3A statistics */
+/* IPU3 processing parameters */
+#define V4L2_META_FMT_IPU3_PARAMS	v4l2_fourcc('i', 'p', '3', 'p')
+/* IPU3 3A statistics */
+#define V4L2_META_FMT_IPU3_STAT_3A	v4l2_fourcc('i', 'p', '3', 's')
 
 /* from include/uapi/linux/v4l2-controls.h */
 #define V4L2_CID_INTEL_IPU3_BASE	(V4L2_CID_USER_BASE + 0x10c0)
-- 
GitLab


From 17daf473e2a48ca34b434f69c00bd2fd6fa39a4d Mon Sep 17 00:00:00 2001
From: Mitali Borkar <mitaliborkar810@gmail.com>
Date: Tue, 13 Apr 2021 07:16:22 +0200
Subject: [PATCH 0174/3804] media: staging: media: intel-ipu3: remove space
 before tabs

Removed unnecessary space before tabs to adhere to  linux kernel coding
style.
Reported by checkpatch.

Signed-off-by: Mitali Borkar <mitaliborkar810@gmail.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/ipu3/include/intel-ipu3.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h
index ce068b2bea735..1f4a2fd7c9f5a 100644
--- a/drivers/staging/media/ipu3/include/intel-ipu3.h
+++ b/drivers/staging/media/ipu3/include/intel-ipu3.h
@@ -631,7 +631,7 @@ struct ipu3_uapi_bnr_static_config_wb_gains_thr_config {
  * @cg:	Gain coefficient for threshold calculation, [0, 31], default 8.
  * @ci:	Intensity coefficient for threshold calculation. range [0, 0x1f]
  *	default 6.
- * 	format: u3.2 (3 most significant bits represent whole number,
+ *	format: u3.2 (3 most significant bits represent whole number,
  *	2 least significant bits represent the fractional part
  *	with each count representing 0.25)
  *	e.g. 6 in binary format is 00110, that translates to 1.5
-- 
GitLab


From 72e03872410842e6c0de27b7243fe90af5254bc0 Mon Sep 17 00:00:00 2001
From: Mitali Borkar <mitaliborkar810@gmail.com>
Date: Tue, 13 Apr 2021 17:29:17 +0200
Subject: [PATCH 0175/3804] media: staging: media: intel-ipu3: line should not
 end with '['

Fixed the issue of line should not end with '[' by moving argument
from next line to line ending with '[' and made it under 80
characters.
Reported by checkpatch.

Signed-off-by: Mitali Borkar <mitaliborkar810@gmail.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/ipu3/include/intel-ipu3.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h
index 1f4a2fd7c9f5a..fa3d6ee5adf29 100644
--- a/drivers/staging/media/ipu3/include/intel-ipu3.h
+++ b/drivers/staging/media/ipu3/include/intel-ipu3.h
@@ -245,8 +245,8 @@ struct ipu3_uapi_ae_ccm {
  */
 struct ipu3_uapi_ae_config {
 	struct ipu3_uapi_ae_grid_config grid_cfg __attribute__((aligned(32)));
-	struct ipu3_uapi_ae_weight_elem weights[
-			IPU3_UAPI_AE_WEIGHTS] __attribute__((aligned(32)));
+	struct ipu3_uapi_ae_weight_elem weights[IPU3_UAPI_AE_WEIGHTS]
+						__attribute__((aligned(32)));
 	struct ipu3_uapi_ae_ccm ae_ccm __attribute__((aligned(32)));
 } __packed;
 
-- 
GitLab


From 7900bdc25a019159911d5ee38f83b78ac6639589 Mon Sep 17 00:00:00 2001
From: Mitali Borkar <mitaliborkar810@gmail.com>
Date: Tue, 13 Apr 2021 21:50:16 +0200
Subject: [PATCH 0176/3804] media: staging: media: zoran: add spaces around
 '<<' operator

Added spaces around '<<' operator to improve readability and meet linux
kernel coding style.
Reported by checkpatch

Signed-off-by: Mitali Borkar <mitaliborkar810@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/zoran/zr36057.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/media/zoran/zr36057.h b/drivers/staging/media/zoran/zr36057.h
index 71b651add35ae..a2a75fd9f5354 100644
--- a/drivers/staging/media/zoran/zr36057.h
+++ b/drivers/staging/media/zoran/zr36057.h
@@ -30,13 +30,13 @@
 #define ZR36057_VFESPFR_HOR_DCM          14
 #define ZR36057_VFESPFR_VER_DCM          8
 #define ZR36057_VFESPFR_DISP_MODE        6
-#define ZR36057_VFESPFR_YUV422          (0<<3)
-#define ZR36057_VFESPFR_RGB888          (1<<3)
-#define ZR36057_VFESPFR_RGB565          (2<<3)
-#define ZR36057_VFESPFR_RGB555          (3<<3)
-#define ZR36057_VFESPFR_ERR_DIF          (1<<2)
-#define ZR36057_VFESPFR_PACK24          (1<<1)
-#define ZR36057_VFESPFR_LITTLE_ENDIAN    (1<<0)
+#define ZR36057_VFESPFR_YUV422          (0 << 3)
+#define ZR36057_VFESPFR_RGB888          (1 << 3)
+#define ZR36057_VFESPFR_RGB565          (2 << 3)
+#define ZR36057_VFESPFR_RGB555          (3 << 3)
+#define ZR36057_VFESPFR_ERR_DIF          (1 << 2)
+#define ZR36057_VFESPFR_PACK24          (1 << 1)
+#define ZR36057_VFESPFR_LITTLE_ENDIAN    (1 << 0)
 
 #define ZR36057_VDTR            0x00c	/* Video Display "Top" Register */
 
-- 
GitLab


From 451c34dd69b80857fa50e33581db22143afa8890 Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Tue, 13 Apr 2021 20:03:13 +0200
Subject: [PATCH 0177/3804] media: staging: media: atomisp: Minor code style
 changes

Fixed line continuation and parenthesis alignment issues.

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/i2c/atomisp-ov2722.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
index 912eadaffc442..90a985ee25fa8 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
@@ -49,8 +49,8 @@ static int ov2722_read_reg(struct i2c_client *client,
 		return -ENODEV;
 	}
 
-	if (data_length != OV2722_8BIT && data_length != OV2722_16BIT
-	    && data_length != OV2722_32BIT) {
+	if (data_length != OV2722_8BIT && data_length != OV2722_16BIT &&
+	    data_length != OV2722_32BIT) {
 		dev_err(&client->dev, "%s error, invalid data length\n",
 			__func__);
 		return -EINVAL;
@@ -212,8 +212,8 @@ static int __ov2722_buf_reg_array(struct i2c_client *client,
 }
 
 static int __ov2722_write_reg_is_consecutive(struct i2c_client *client,
-	struct ov2722_write_ctrl *ctrl,
-	const struct ov2722_reg *next)
+					     struct ov2722_write_ctrl *ctrl,
+					     const struct ov2722_reg *next)
 {
 	if (ctrl->index == 0)
 		return 1;
-- 
GitLab


From d7c89be51d17d629e7c550388a81858af09ad343 Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Wed, 14 Apr 2021 15:25:37 +0200
Subject: [PATCH 0178/3804] media: staging: media: omap4iss: Remove unused
 macro function

Remove unused macro function "v4l2_dev_to_iss_device(dev)".

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/omap4iss/iss.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/media/omap4iss/iss.h b/drivers/staging/media/omap4iss/iss.h
index b88f9529683c1..3f587e0007295 100644
--- a/drivers/staging/media/omap4iss/iss.h
+++ b/drivers/staging/media/omap4iss/iss.h
@@ -119,9 +119,6 @@ struct iss_device {
 	unsigned int isp_subclk_resources;
 };
 
-#define v4l2_dev_to_iss_device(dev) \
-	container_of(dev, struct iss_device, v4l2_dev)
-
 int omap4iss_get_external_info(struct iss_pipeline *pipe,
 			       struct media_link *link);
 
-- 
GitLab


From bbbcba0267e2faff32c62905219f1d3b81f75d30 Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Wed, 14 Apr 2021 16:06:02 +0200
Subject: [PATCH 0179/3804] media: staging: media: atomisp: pci: Correct
 identation in block of conditional statements in file atomisp_v4l2.c

Correct identation in block of conditional statements.
The function "v4l2_device_unregister_subdev()" depends on
the results of the macro function "list_for_each_entry_safe()".

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/atomisp_v4l2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
index 0295e2e32d797..6d853f480e1c8 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
@@ -1178,7 +1178,7 @@ static void atomisp_unregister_entities(struct atomisp_device *isp)
 		atomisp_mipi_csi2_unregister_entities(&isp->csi2_port[i]);
 
 	list_for_each_entry_safe(sd, next, &isp->v4l2_dev.subdevs, list)
-	v4l2_device_unregister_subdev(sd);
+		v4l2_device_unregister_subdev(sd);
 
 	v4l2_device_unregister(&isp->v4l2_dev);
 	media_device_unregister(&isp->media_dev);
-- 
GitLab


From 848802da8d0443befd155926ff4184e6ebffb5c0 Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Wed, 14 Apr 2021 16:06:06 +0200
Subject: [PATCH 0180/3804] media: staging: media: atomisp: pci: Correct
 identation in block of conditional statements in file atomisp_acc.c

Correct identation in block of conditional statements.
The conditional statement depends on the results of the
macro function "list_for_each_entry()".

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/atomisp_acc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_acc.c b/drivers/staging/media/atomisp/pci/atomisp_acc.c
index f638d0bd09fe6..5e5faa4b34456 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_acc.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_acc.c
@@ -77,8 +77,8 @@ acc_get_fw(struct atomisp_sub_device *asd, unsigned int handle)
 	struct atomisp_acc_fw *acc_fw;
 
 	list_for_each_entry(acc_fw, &asd->acc.fw, list)
-	if (acc_fw->handle == handle)
-		return acc_fw;
+		if (acc_fw->handle == handle)
+			return acc_fw;
 
 	return NULL;
 }
-- 
GitLab


From 14bc5eb80bda1a3e3c8c2a0eab3064eaba949f3a Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Wed, 14 Apr 2021 23:16:45 +0200
Subject: [PATCH 0181/3804] media: staging: media: atomisp: pci: Format
 comments according to coding-style in file atomisp_acc.c

Format all comments according to the coding-style.
Issue detected by checkpatch.pl.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/atomisp_acc.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_acc.c b/drivers/staging/media/atomisp/pci/atomisp_acc.c
index 5e5faa4b34456..9a1751895ab03 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_acc.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_acc.c
@@ -464,9 +464,11 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd)
 			continue;
 
 		for (i = 0; i < ARRAY_SIZE(acc_flag_to_pipe); i++) {
-			/* QoS (ACC pipe) acceleration stages are currently
-			 * allowed only in continuous mode. Skip them for
-			 * all other modes. */
+			/*
+			 * QoS (ACC pipe) acceleration stages are
+			 * currently allowed only in continuous mode.
+			 * Skip them for all other modes.
+			 */
 			if (!continuous &&
 			    acc_flag_to_pipe[i].flag ==
 			    ATOMISP_ACC_FW_LOAD_FL_ACC)
-- 
GitLab


From 73edc4da40635774100d0eb9ca2e6476e3b2b470 Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Wed, 14 Apr 2021 23:16:48 +0200
Subject: [PATCH 0182/3804] media: staging: media: atomisp: pci: Format
 comments according to coding-style in file atomisp_cmd.h

Format all comments according to the coding-style.
Issue detected by checkpatch.pl.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../staging/media/atomisp/pci/atomisp_cmd.h   | 161 +++++-------------
 1 file changed, 43 insertions(+), 118 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.h b/drivers/staging/media/atomisp/pci/atomisp_cmd.h
index 412baeb919449..e8bdd264d31b2 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_cmd.h
+++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.h
@@ -49,9 +49,7 @@ struct ia_css_frame;
 /* FIXME: check if can go */
 extern int atomisp_punit_hpll_freq;
 
-/*
- * Helper function
- */
+/* Helper function */
 void dump_sp_dmem(struct atomisp_device *isp, unsigned int addr,
 		  unsigned int size);
 struct camera_mipi_info *atomisp_to_sensor_mipi_info(struct v4l2_subdev *sd);
@@ -65,9 +63,7 @@ bool atomisp_buffers_queued(struct atomisp_sub_device *asd);
 /* ISP2401 */
 bool atomisp_buffers_queued_pipe(struct atomisp_video_pipe *pipe);
 
-/*
- * Interrupt functions
- */
+/* Interrupt functions */
 void atomisp_msi_irq_init(struct atomisp_device *isp);
 void atomisp_msi_irq_uninit(struct atomisp_device *isp);
 void atomisp_wdt_work(struct work_struct *work);
@@ -82,15 +78,10 @@ int atomisp_get_frame_pgnr(struct atomisp_device *isp,
 			   const struct ia_css_frame *frame, u32 *p_pgnr);
 void atomisp_delayed_init_work(struct work_struct *work);
 
-/*
- * Get internal fmt according to V4L2 fmt
- */
-
+/* Get internal fmt according to V4L2 fmt */
 bool atomisp_is_viewfinder_support(struct atomisp_device *isp);
 
-/*
- * ISP features control function
- */
+/* ISP features control function */
 
 /*
  * Function to set sensor runmode by user when
@@ -105,9 +96,7 @@ int atomisp_set_sensor_runmode(struct atomisp_sub_device *asd,
 int atomisp_gdc_cac(struct atomisp_sub_device *asd, int flag,
 		    __s32 *value);
 
-/*
- * Function to enable/disable low light mode (including ANR)
- */
+/* Function to enable/disable low light mode (including ANR) */
 int atomisp_low_light(struct atomisp_sub_device *asd, int flag,
 		      __s32 *value);
 
@@ -120,91 +109,63 @@ int atomisp_xnr(struct atomisp_sub_device *asd, int flag, int *arg);
 int atomisp_formats(struct atomisp_sub_device *asd, int flag,
 		    struct atomisp_formats_config *config);
 
-/*
- * Function to configure noise reduction
- */
+/* Function to configure noise reduction */
 int atomisp_nr(struct atomisp_sub_device *asd, int flag,
 	       struct atomisp_nr_config *config);
 
-/*
- * Function to configure temporal noise reduction (TNR)
- */
+/* Function to configure temporal noise reduction (TNR) */
 int atomisp_tnr(struct atomisp_sub_device *asd, int flag,
 		struct atomisp_tnr_config *config);
 
-/*
- * Function to configure black level compensation
- */
+/* Function to configure black level compensation */
 int atomisp_black_level(struct atomisp_sub_device *asd, int flag,
 			struct atomisp_ob_config *config);
 
-/*
- * Function to configure edge enhancement
- */
+/* Function to configure edge enhancement */
 int atomisp_ee(struct atomisp_sub_device *asd, int flag,
 	       struct atomisp_ee_config *config);
 
-/*
- * Function to update Gamma table for gamma, brightness and contrast config
- */
+/* Function to update Gamma table for gamma, brightness and contrast config */
 int atomisp_gamma(struct atomisp_sub_device *asd, int flag,
 		  struct atomisp_gamma_table *config);
-/*
- * Function to update Ctc table for Chroma Enhancement
- */
+
+/* Function to update Ctc table for Chroma Enhancement */
 int atomisp_ctc(struct atomisp_sub_device *asd, int flag,
 		struct atomisp_ctc_table *config);
 
-/*
- * Function to update gamma correction parameters
- */
+/* Function to update gamma correction parameters */
 int atomisp_gamma_correction(struct atomisp_sub_device *asd, int flag,
 			     struct atomisp_gc_config *config);
 
-/*
- * Function to update Gdc table for gdc
- */
+/* Function to update Gdc table for gdc */
 int atomisp_gdc_cac_table(struct atomisp_sub_device *asd, int flag,
 			  struct atomisp_morph_table *config);
 
-/*
- * Function to update table for macc
- */
+/* Function to update table for macc */
 int atomisp_macc_table(struct atomisp_sub_device *asd, int flag,
 		       struct atomisp_macc_config *config);
-/*
- * Function to get DIS statistics.
- */
+
+/* Function to get DIS statistics. */
 int atomisp_get_dis_stat(struct atomisp_sub_device *asd,
 			 struct atomisp_dis_statistics *stats);
 
-/*
- * Function to get DVS2 BQ resolution settings
- */
+/* Function to get DVS2 BQ resolution settings */
 int atomisp_get_dvs2_bq_resolutions(struct atomisp_sub_device *asd,
 				    struct atomisp_dvs2_bq_resolutions *bq_res);
 
-/*
- * Function to set the DIS coefficients.
- */
+/* Function to set the DIS coefficients. */
 int atomisp_set_dis_coefs(struct atomisp_sub_device *asd,
 			  struct atomisp_dis_coefficients *coefs);
 
-/*
- * Function to set the DIS motion vector.
- */
+/* Function to set the DIS motion vector. */
 int atomisp_set_dis_vector(struct atomisp_sub_device *asd,
 			   struct atomisp_dis_vector *vector);
 
-/*
- * Function to set/get 3A stat from isp
- */
+/* Function to set/get 3A stat from isp */
 int atomisp_3a_stat(struct atomisp_sub_device *asd, int flag,
 		    struct atomisp_3a_statistics *config);
 
-/*
- * Function to get metadata from isp
- */
+/* Function to get metadata from isp */
 int atomisp_get_metadata(struct atomisp_sub_device *asd, int flag,
 			 struct atomisp_metadata *config);
 
@@ -213,84 +174,59 @@ int atomisp_get_metadata_by_type(struct atomisp_sub_device *asd, int flag,
 
 int atomisp_set_parameters(struct video_device *vdev,
 			   struct atomisp_parameters *arg);
-/*
- * Function to set/get isp parameters to isp
- */
+
+/* Function to set/get isp parameters to isp */
 int atomisp_param(struct atomisp_sub_device *asd, int flag,
 		  struct atomisp_parm *config);
 
-/*
- * Function to configure color effect of the image
- */
+/* Function to configure color effect of the image */
 int atomisp_color_effect(struct atomisp_sub_device *asd, int flag,
 			 __s32 *effect);
 
-/*
- * Function to configure bad pixel correction
- */
+/* Function to configure bad pixel correction */
 int atomisp_bad_pixel(struct atomisp_sub_device *asd, int flag,
 		      __s32 *value);
 
-/*
- * Function to configure bad pixel correction params
- */
+/* Function to configure bad pixel correction params */
 int atomisp_bad_pixel_param(struct atomisp_sub_device *asd, int flag,
 			    struct atomisp_dp_config *config);
 
-/*
- * Function to enable/disable video image stablization
- */
+/* Function to enable/disable video image stablization */
 int atomisp_video_stable(struct atomisp_sub_device *asd, int flag,
 			 __s32 *value);
 
-/*
- * Function to configure fixed pattern noise
- */
+/* Function to configure fixed pattern noise */
 int atomisp_fixed_pattern(struct atomisp_sub_device *asd, int flag,
 			  __s32 *value);
 
-/*
- * Function to configure fixed pattern noise table
- */
+/* Function to configure fixed pattern noise table */
 int atomisp_fixed_pattern_table(struct atomisp_sub_device *asd,
 				struct v4l2_framebuffer *config);
 
-/*
- * Function to configure false color correction
- */
+/* Function to configure false color correction */
 int atomisp_false_color(struct atomisp_sub_device *asd, int flag,
 			__s32 *value);
 
-/*
- * Function to configure false color correction params
- */
+/* Function to configure false color correction params */
 int atomisp_false_color_param(struct atomisp_sub_device *asd, int flag,
 			      struct atomisp_de_config *config);
 
-/*
- * Function to configure white balance params
- */
+/* Function to configure white balance params */
 int atomisp_white_balance_param(struct atomisp_sub_device *asd, int flag,
 				struct atomisp_wb_config *config);
 
 int atomisp_3a_config_param(struct atomisp_sub_device *asd, int flag,
 			    struct atomisp_3a_config *config);
 
-/*
- * Function to setup digital zoom
- */
+/* Function to setup digital zoom */
 int atomisp_digital_zoom(struct atomisp_sub_device *asd, int flag,
 			 __s32 *value);
 
-/*
- * Function  set camera_prefiles.xml current sensor pixel array size
- */
+/* Function  set camera_prefiles.xml current sensor pixel array size */
 int atomisp_set_array_res(struct atomisp_sub_device *asd,
 			  struct atomisp_resolution  *config);
 
-/*
- * Function to calculate real zoom region for every pipe
- */
+/* Function to calculate real zoom region for every pipe */
 int atomisp_calculate_real_zoom_region(struct atomisp_sub_device *asd,
 				       struct ia_css_dz_config   *dz_config,
 				       enum ia_css_pipe_id css_pipe_id);
@@ -371,9 +307,7 @@ void atomisp_css_flush(struct atomisp_device *isp);
 int atomisp_source_pad_to_stream_id(struct atomisp_sub_device *asd,
 				    uint16_t source_pad);
 
-/*
- * Events. Only one event has to be exported for now.
- */
+/* Events. Only one event has to be exported for now. */
 void atomisp_eof_event(struct atomisp_sub_device *asd, uint8_t exp_id);
 
 enum mipi_port_id __get_mipi_port(struct atomisp_device *isp,
@@ -389,34 +323,25 @@ void atomisp_free_css_parameters(struct atomisp_css_params *css_param);
 void atomisp_handle_parameter_and_buffer(struct atomisp_video_pipe *pipe);
 
 void atomisp_flush_params_queue(struct atomisp_video_pipe *asd);
-/*
- * Function to do Raw Buffer related operation, after enable Lock Unlock Raw Buffer
- */
+
+/* Function to do Raw Buffer related operation, after enable Lock Unlock Raw Buffer */
 int atomisp_exp_id_unlock(struct atomisp_sub_device *asd, int *exp_id);
 int atomisp_exp_id_capture(struct atomisp_sub_device *asd, int *exp_id);
 
-/*
- * Function to update Raw Buffer bitmap
- */
+/* Function to update Raw Buffer bitmap */
 int atomisp_set_raw_buffer_bitmap(struct atomisp_sub_device *asd, int exp_id);
 void atomisp_init_raw_buffer_bitmap(struct atomisp_sub_device *asd);
 
-/*
- * Function to enable/disable zoom for capture pipe
- */
+/* Function to enable/disable zoom for capture pipe */
 int atomisp_enable_dz_capt_pipe(struct atomisp_sub_device *asd,
 				unsigned int *enable);
 
-/*
- * Function to get metadata type bu pipe id
- */
+/* Function to get metadata type bu pipe id */
 enum atomisp_metadata_type
 atomisp_get_metadata_type(struct atomisp_sub_device *asd,
 			  enum ia_css_pipe_id pipe_id);
 
-/*
- * Function for HAL to inject a fake event to wake up poll thread
- */
+/* Function for HAL to inject a fake event to wake up poll thread */
 int atomisp_inject_a_fake_event(struct atomisp_sub_device *asd, int *event);
 
 /*
-- 
GitLab


From bc7c9993a0d836aa88aca2969dcb9b22031924d3 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Sun, 7 Feb 2021 18:02:33 +1100
Subject: [PATCH 0183/3804] m68k: Drop -fno-strength-reduce from KBUILD_CFLAGS

This workaround became redundant either when the driver in question was
removed (in Linux v2.6.23) or when the compiler flag became a no-op
(in GCC v4.2). Linux has required GCC v4.6 or later since v4.19.

Link: https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=efa1cdf01850b28c2f6f7035ebd4420259494615
References: commit 565bae6a4a8f ("[SCSI] 53c7xx: kill driver")
References: commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6")
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Link: https://lore.kernel.org/r/baa95d7235921dff23bed6320518f3fa90396603.1612681353.git.fthain@telegraphics.com.au
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile
index 82620f14124d2..c54055a3d2845 100644
--- a/arch/m68k/Makefile
+++ b/arch/m68k/Makefile
@@ -66,8 +66,7 @@ KBUILD_CFLAGS += $(cpuflags-y)
 KBUILD_CFLAGS += -pipe -ffreestanding
 
 ifdef CONFIG_MMU
-# without -fno-strength-reduce the 53c7xx.c driver fails ;-(
-KBUILD_CFLAGS += -fno-strength-reduce -ffixed-a2
+KBUILD_CFLAGS += -ffixed-a2
 else
 # we can use a m68k-linux-gcc toolchain with these in place
 KBUILD_CPPFLAGS += -DUTS_SYSNAME=\"uClinux\"
-- 
GitLab


From eeff86b6d18ccd7ef1e663dd428b93f5887d02b5 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Mon, 10 May 2021 11:08:35 +0800
Subject: [PATCH 0184/3804] m68k: dma: Remove unnecessary include of
 asm/cacheflush.h

In commit ca15ca406f660 ("mm: remove unneeded includes of
<asm/pgalloc.h>"), asm/cacheflush.h independent on the MACRO
was included at line 18. The include here is unnecessary. Remove it.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Link: https://lore.kernel.org/r/20210510030836.11834-1-wanjiabing@vivo.com
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/kernel/dma.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 1c1b875fadc18..2e192a5df949b 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -34,9 +34,6 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot)
 	return prot;
 }
 #else
-
-#include <asm/cacheflush.h>
-
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs)
 {
-- 
GitLab


From f279b49f13bd2151bbe402a1d812c1e3646c4bbb Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 10 May 2021 01:28:40 -0700
Subject: [PATCH 0185/3804] x86/boot: Modernize genimage script; hdimage+EFI
 support

The image generation scripts in arch/x86/boot are pretty out of date,
except for the isoimage target. Update and clean up the
genimage.sh script, and make it support an arbitrary number of
initramfs files in the image.

Add a "hdimage" target, which can be booted by either BIOS or
EFI (if the kernel is compiled with the EFI stub.) For EFI to be able
to pass the command line to the kernel, we need the EFI shell, but the
firmware builtin EFI shell, if it even exists, is pretty much always
the last resort boot option, so search for OVMF or EDK2 and explicitly
include a copy of the EFI shell.

To make this all work, use bash features in the script.  Furthermore,
this version of the script makes use of some mtools features,
especially mpartition, that might not exist in very old version of
mtools, but given all the other dependencies on this script this
doesn't seem such a big deal.

Finally, put a volume label ("LINUX_BOOT") on all generated images.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510082840.628372-1-hpa@zytor.com
---
 arch/x86/Makefile            |   5 +-
 arch/x86/boot/.gitignore     |   1 +
 arch/x86/boot/Makefile       |  44 ++---
 arch/x86/boot/genimage.sh    | 303 +++++++++++++++++++++++++----------
 arch/x86/boot/mtools.conf.in |   3 +
 5 files changed, 252 insertions(+), 104 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c77c5d8a7b3eb..d42764c60a0a4 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -256,7 +256,7 @@ drivers-$(CONFIG_FB) += arch/x86/video/
 
 boot := arch/x86/boot
 
-BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 isoimage
+BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 hdimage isoimage
 
 PHONY += bzImage $(BOOT_TARGETS)
 
@@ -314,8 +314,9 @@ define archhelp
   echo  '  fdimage		- Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
   echo  '  fdimage144		- Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
   echo  '  fdimage288		- Create 2.8MB boot floppy image (arch/x86/boot/fdimage)'
+  echo  '  hdimage		- Create a BIOS/EFI hard disk image (arch/x86/boot/hdimage)'
   echo  '  isoimage		- Create a boot CD-ROM image (arch/x86/boot/image.iso)'
-  echo  '			  bzdisk/fdimage*/isoimage also accept:'
+  echo  '			  bzdisk/fdimage*/hdimage/isoimage also accept:'
   echo  '			  FDARGS="..."  arguments for the booted kernel'
   echo  '                  	  FDINITRD=file initrd for the booted kernel'
   echo  ''
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 9cc7f1357b9b8..1189be057ebd6 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -11,3 +11,4 @@ setup.elf
 fdimage
 mtools.conf
 image.iso
+hdimage
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index fe605205b4ce2..dfbc26a8e9241 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -29,7 +29,7 @@ KCOV_INSTRUMENT		:= n
 SVGA_MODE	:= -DSVGA_MODE=NORMAL_VGA
 
 targets		:= vmlinux.bin setup.bin setup.elf bzImage
-targets		+= fdimage fdimage144 fdimage288 image.iso mtools.conf
+targets		+= fdimage fdimage144 fdimage288 image.iso hdimage
 subdir-		:= compressed
 
 setup-y		+= a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
@@ -115,47 +115,49 @@ $(obj)/compressed/vmlinux: FORCE
 	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
 
 # Set this if you want to pass append arguments to the
-# bzdisk/fdimage/isoimage kernel
+# bzdisk/fdimage/hdimage/isoimage kernel
 FDARGS =
-# Set this if you want an initrd included with the
-# bzdisk/fdimage/isoimage kernel
+# Set this if you want one or more initrds included in the image
 FDINITRD =
 
-image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
+imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh
 
 $(obj)/mtools.conf: $(src)/mtools.conf.in
 	sed -e 's|@OBJ@|$(obj)|g' < $< > $@
 
+targets += mtools.conf
+
+# genimage.sh requires bash, but it also has a bunch of other
+# external dependencies.
 quiet_cmd_genimage = GENIMAGE $3
-cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
-			$(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD)
+cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \
+		$(obj)/mtools.conf '$(FDARGS)' $(FDINITRD)
 
-PHONY += bzdisk fdimage fdimage144 fdimage288 isoimage bzlilo install
+PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install
 
 # This requires write access to /dev/fd0
-bzdisk: $(obj)/bzImage $(obj)/mtools.conf
+# All images require syslinux to be installed; hdimage also requires
+# EDK2/OVMF if the kernel is compiled with the EFI stub.
+bzdisk: $(imgdeps)
 	$(call cmd,genimage,bzdisk,/dev/fd0)
 
-# These require being root or having syslinux 2.02 or higher installed
-fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
+fdimage fdimage144: $(imgdeps)
 	$(call cmd,genimage,fdimage144,$(obj)/fdimage)
 	@$(kecho) 'Kernel: $(obj)/fdimage is ready'
 
-fdimage288: $(obj)/bzImage $(obj)/mtools.conf
+fdimage288: $(imgdeps)
 	$(call cmd,genimage,fdimage288,$(obj)/fdimage)
 	@$(kecho) 'Kernel: $(obj)/fdimage is ready'
 
-isoimage: $(obj)/bzImage
+hdimage: $(imgdeps)
+	$(call cmd,genimage,hdimage,$(obj)/hdimage)
+	@$(kecho) 'Kernel: $(obj)/hdimage is ready'
+
+isoimage: $(imgdeps)
 	$(call cmd,genimage,isoimage,$(obj)/image.iso)
 	@$(kecho) 'Kernel: $(obj)/image.iso is ready'
 
-bzlilo:
-	if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
-	if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
-	cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
-	cp System.map $(INSTALL_PATH)/
-	if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
-
 install:
-	sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
+	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh \
+		$(KERNELRELEASE) $(obj)/bzImage \
 		System.map "$(INSTALL_PATH)"
diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh
index 6a10d52a41452..0673fdfc1a11a 100644
--- a/arch/x86/boot/genimage.sh
+++ b/arch/x86/boot/genimage.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 #
 # This file is subject to the terms and conditions of the GNU General Public
 # License.  See the file "COPYING" in the main directory of this archive
@@ -8,15 +8,24 @@
 #
 # Adapted from code in arch/x86/boot/Makefile by H. Peter Anvin and others
 #
-# "make fdimage/fdimage144/fdimage288/isoimage" script for x86 architecture
+# "make fdimage/fdimage144/fdimage288/hdimage/isoimage"
+# script for x86 architecture
 #
 # Arguments:
-#   $1 - fdimage format
-#   $2 - target image file
-#   $3 - kernel bzImage file
-#   $4 - mtool configuration file
-#   $5 - kernel cmdline
-#   $6 - inird image file
+#   $1  - fdimage format
+#   $2  - target image file
+#   $3  - kernel bzImage file
+#   $4  - mtools configuration file
+#   $5  - kernel cmdline
+#   $6+ - initrd image file(s)
+#
+# This script requires:
+#   bash
+#   syslinux
+#   mtools (for fdimage* and hdimage)
+#   edk2/OVMF (for hdimage)
+#
+# Otherwise try to stick to POSIX shell commands...
 #
 
 # Use "make V=1" to debug this script
@@ -26,105 +35,237 @@ case "${KBUILD_VERBOSE}" in
         ;;
 esac
 
-verify () {
-	if [ ! -f "$1" ]; then
-		echo ""                                                   1>&2
-		echo " *** Missing file: $1"                              1>&2
-		echo ""                                                   1>&2
-		exit 1
+# Exit the top-level shell with an error
+topshell=$$
+trap 'exit 1' USR1
+die() {
+	echo ""        1>&2
+	echo " *** $*" 1>&2
+	echo ""        1>&2
+	kill -USR1 $topshell
+}
+
+# Verify the existence and readability of a file
+verify() {
+	if [ ! -f "$1" -o ! -r "$1" ]; then
+		die "Missing file: $1"
 	fi
 }
 
+diskfmt="$1"
+FIMAGE="$2"
+FBZIMAGE="$3"
+MTOOLSRC="$4"
+KCMDLINE="$5"
+shift 5				# Remaining arguments = initrd files
+
+export MTOOLSRC
 
-export MTOOLSRC=$4
-FIMAGE=$2
-FBZIMAGE=$3
-KCMDLINE=$5
-FDINITRD=$6
+# common options for dd
+dd='dd iflag=fullblock'
 
 # Make sure the files actually exist
 verify "$FBZIMAGE"
 
-genbzdisk() {
-	verify "$MTOOLSRC"
-	mformat a:
-	syslinux $FIMAGE
-	echo "$KCMDLINE" | mcopy - a:syslinux.cfg
-	if [ -f "$FDINITRD" ] ; then
-		mcopy "$FDINITRD" a:initrd.img
+declare -a FDINITRDS
+irdpfx=' initrd='
+initrdopts_syslinux=''
+initrdopts_efi=''
+for f in "$@"; do
+	if [ -f "$f" -a -r "$f" ]; then
+	    FDINITRDS=("${FDINITRDS[@]}" "$f")
+	    fname="$(basename "$f")"
+	    initrdopts_syslinux="${initrdopts_syslinux}${irdpfx}${fname}"
+	    irdpfx=,
+	    initrdopts_efi="${initrdopts_efi} initrd=${fname}"
 	fi
-	mcopy $FBZIMAGE a:linux
+done
+
+# Read a $3-byte littleendian unsigned value at offset $2 from file $1
+le() {
+	local n=0
+	local m=1
+	for b in $(od -A n -v -j $2 -N $3 -t u1 "$1"); do
+		n=$((n + b*m))
+		m=$((m * 256))
+	done
+	echo $n
 }
 
-genfdimage144() {
-	verify "$MTOOLSRC"
-	dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
-	mformat v:
-	syslinux $FIMAGE
-	echo "$KCMDLINE" | mcopy - v:syslinux.cfg
-	if [ -f "$FDINITRD" ] ; then
-		mcopy "$FDINITRD" v:initrd.img
-	fi
-	mcopy $FBZIMAGE v:linux
+# Get the EFI architecture name such that boot{name}.efi is the default
+# boot file name. Returns false with no output if the file is not an
+# EFI image or otherwise unknown.
+efiarch() {
+	[ -f "$1" ] || return
+	[ $(le "$1" 0 2) -eq 23117 ] || return		# MZ magic
+	peoffs=$(le "$1" 60 4)				# PE header offset
+	[ $peoffs -ge 64 ] || return
+	[ $(le "$1" $peoffs 4) -eq 17744 ] || return	# PE magic
+	case $(le "$1" $((peoffs+4+20)) 2) in		# PE type
+		267)	;;				# PE32
+		523)	;;				# PE32+
+		*) return 1 ;;				# Invalid
+	esac
+	[ $(le "$1" $((peoffs+4+20+68)) 2) -eq 10 ] || return # EFI app
+	case $(le "$1" $((peoffs+4)) 2) in		# Machine type
+		 332)	echo i386	;;
+		 450)	echo arm	;;
+		 512)	echo ia64	;;
+		20530)	echo riscv32	;;
+		20580)	echo riscv64	;;
+		20776)	echo riscv128	;;
+		34404)	echo x64	;;
+		43620)	echo aa64	;;
+	esac
 }
 
-genfdimage288() {
-	verify "$MTOOLSRC"
-	dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
-	mformat w:
-	syslinux $FIMAGE
-	echo "$KCMDLINE" | mcopy - W:syslinux.cfg
-	if [ -f "$FDINITRD" ] ; then
-		mcopy "$FDINITRD" w:initrd.img
-	fi
-	mcopy $FBZIMAGE w:linux
+# Get the combined sizes in bytes of the files given, counting sparse
+# files as full length, and padding each file to a 4K block size
+filesizes() {
+	local t=0
+	local s
+	for s in $(ls -lnL "$@" 2>/dev/null | awk '/^-/{ print $5; }'); do
+		t=$((t + ((s+4095)/4096)*4096))
+	done
+	echo $t
 }
 
-geniso() {
-	tmp_dir=`dirname $FIMAGE`/isoimage
-	rm -rf $tmp_dir
-	mkdir $tmp_dir
-	for i in lib lib64 share ; do
-		for j in syslinux ISOLINUX ; do
-			if [ -f /usr/$i/$j/isolinux.bin ] ; then
-				isolinux=/usr/$i/$j/isolinux.bin
-			fi
+# Expand directory names which should be in /usr/share into a list
+# of possible alternatives
+sharedirs() {
+	local dir file
+	for dir in /usr/share /usr/lib64 /usr/lib; do
+		for file; do
+			echo "$dir/$file"
+			echo "$dir/${file^^}"
 		done
-		for j in syslinux syslinux/modules/bios ; do
-			if [ -f /usr/$i/$j/ldlinux.c32 ]; then
-				ldlinux=/usr/$i/$j/ldlinux.c32
-			fi
+	done
+}
+efidirs() {
+	local dir file
+	for dir in /usr/share /boot /usr/lib64 /usr/lib; do
+		for file; do
+			echo "$dir/$file"
+			echo "$dir/${file^^}"
 		done
-		if [ -n "$isolinux" -a -n "$ldlinux" ] ; then
-			break
+	done
+}
+
+findsyslinux() {
+	local f="$(find -L $(sharedirs syslinux isolinux) \
+		    -name "$1" -readable -type f -print -quit 2>/dev/null)"
+	if [ ! -f "$f" ]; then
+		die "Need a $1 file, please install syslinux/isolinux."
+	fi
+	echo "$f"
+	return 0
+}
+
+findovmf() {
+	local arch="$1"
+	shift
+	local -a names=(-false)
+	local name f
+	for name; do
+		names=("${names[@]}" -or -iname "$name")
+	done
+	for f in $(find -L $(efidirs edk2 ovmf) \
+			\( "${names[@]}" \) -readable -type f \
+			-print 2>/dev/null); do
+		if [ "$(efiarch "$f")" = "$arch" ]; then
+			echo "$f"
+			return 0
 		fi
 	done
-	if [ -z "$isolinux" ] ; then
-		echo 'Need an isolinux.bin file, please install syslinux/isolinux.'
-		exit 1
+	die "Need a $1 file for $arch, please install EDK2/OVMF."
+}
+
+do_mcopy() {
+	if [ ${#FDINITRDS[@]} -gt 0 ]; then
+		mcopy "${FDINITRDS[@]}" "$1"
+	fi
+	if [ -n "$efishell" ]; then
+		mmd "$1"EFI "$1"EFI/Boot
+		mcopy "$efishell" "$1"EFI/Boot/boot${kefiarch}.efi
 	fi
-	if [ -z "$ldlinux" ] ; then
-		echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.'
-		exit 1
+	if [ -n "$kefiarch" ]; then
+		echo linux "$KCMDLINE$initrdopts_efi" | \
+			mcopy - "$1"startup.nsh
 	fi
-	cp $isolinux $tmp_dir
-	cp $ldlinux $tmp_dir
-	cp $FBZIMAGE $tmp_dir/linux
-	echo "$KCMDLINE" > $tmp_dir/isolinux.cfg
-	if [ -f "$FDINITRD" ] ; then
-		cp "$FDINITRD" $tmp_dir/initrd.img
+	echo default linux "$KCMDLINE$initrdopts_syslinux" | \
+		mcopy - "$1"syslinux.cfg
+	mcopy "$FBZIMAGE" "$1"linux
+}
+
+genbzdisk() {
+	verify "$MTOOLSRC"
+	mformat -v 'LINUX_BOOT' a:
+	syslinux "$FIMAGE"
+	do_mcopy a:
+}
+
+genfdimage144() {
+	verify "$MTOOLSRC"
+	$dd if=/dev/zero of="$FIMAGE" bs=1024 count=1440 2>/dev/null
+	mformat -v 'LINUX_BOOT' v:
+	syslinux "$FIMAGE"
+	do_mcopy v:
+}
+
+genfdimage288() {
+	verify "$MTOOLSRC"
+	$dd if=/dev/zero of="$FIMAGE" bs=1024 count=2880 2>/dev/null
+	mformat -v 'LINUX_BOOT' w:
+	syslinux "$FIMAGE"
+	do_mcopy w:
+}
+
+genhdimage() {
+	verify "$MTOOLSRC"
+	mbr="$(findsyslinux mbr.bin)"
+	kefiarch="$(efiarch "$FBZIMAGE")"
+	if [ -n "$kefiarch" ]; then
+		# The efishell provides command line handling
+		efishell="$(findovmf $kefiarch shell.efi shell${kefiarch}.efi)"
+		ptype='-T 0xef'	# EFI system partition, no GPT
 	fi
-	genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \
-		-b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \
-		-boot-info-table $tmp_dir
-	isohybrid $FIMAGE 2>/dev/null || true
-	rm -rf $tmp_dir
+	sizes=$(filesizes "$FBZIMAGE" "${FDINITRDS[@]}" "$efishell")
+	# Allow 1% + 1 MiB for filesystem and partition table overhead,
+	# syslinux, and config files
+	megs=$(((sizes + sizes/100 + 2*1024*1024 - 1)/(1024*1024)))
+	$dd if=/dev/zero of="$FIMAGE" bs=$((1024*1024)) count=$megs 2>/dev/null
+	mpartition -I -c -s 32 -h 64 -t $megs $ptype -b 512 -a h:
+	$dd if="$mbr" of="$FIMAGE" bs=440 count=1 conv=notrunc 2>/dev/null
+	mformat -v 'LINUX_BOOT' -s 32 -h 64 -t $megs h:
+	syslinux --offset $((512*512)) "$FIMAGE"
+	do_mcopy h:
+}
+
+geniso() {
+	tmp_dir="$(dirname "$FIMAGE")/isoimage"
+	rm -rf "$tmp_dir"
+	mkdir "$tmp_dir"
+	isolinux=$(findsyslinux isolinux.bin)
+	ldlinux=$(findsyslinux  ldlinux.c32)
+	cp "$isolinux" "$ldlinux" "$tmp_dir"
+	cp "$FBZIMAGE" "$tmp_dir"/linux
+	echo default linux "$KCMDLINE" > "$tmp_dir"/isolinux.cfg
+	cp "${FDINITRDS[@]}" "$tmp_dir"/
+	genisoimage -J -r -appid 'LINUX_BOOT' -input-charset=utf-8 \
+		    -quiet -o "$FIMAGE" -b isolinux.bin \
+		    -c boot.cat -no-emul-boot -boot-load-size 4 \
+		    -boot-info-table "$tmp_dir"
+	isohybrid "$FIMAGE" 2>/dev/null || true
+	rm -rf "$tmp_dir"
 }
 
-case $1 in
+rm -f "$FIMAGE"
+
+case "$diskfmt" in
 	bzdisk)     genbzdisk;;
 	fdimage144) genfdimage144;;
 	fdimage288) genfdimage288;;
+	hdimage)    genhdimage;;
 	isoimage)   geniso;;
-	*)          echo 'Unknown image format'; exit 1;
+	*)          die "Unknown image format: $diskfmt";;
 esac
diff --git a/arch/x86/boot/mtools.conf.in b/arch/x86/boot/mtools.conf.in
index efd6d2490c1d5..9e2662d013641 100644
--- a/arch/x86/boot/mtools.conf.in
+++ b/arch/x86/boot/mtools.conf.in
@@ -14,4 +14,7 @@ drive v:
 drive w:
   file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter
 
+# Hard disk
+drive h:
+  file="@OBJ@/hdimage" partition=1 mformat_only
 
-- 
GitLab


From be5bb8021c9731f5593de6419ae35d3f16a3e497 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 10 May 2021 02:09:38 -0700
Subject: [PATCH 0186/3804] x86/asm: Have the __ASM_FORM macros handle commas
 in arguments

The __ASM_FORM macros are really useful, but in order to be able to
use them to define instructions via .byte directives breaks because of
the necessary commas. Change the macros to handle commas correctly.

[ mingo: Removed stray whitespaces & aligned the definitions vertically. ]

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510090940.924953-2-hpa@zytor.com
---
 arch/x86/include/asm/asm.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 0603c7423aca2..93aad0b638065 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -3,25 +3,24 @@
 #define _ASM_X86_ASM_H
 
 #ifdef __ASSEMBLY__
-# define __ASM_FORM(x)	x
-# define __ASM_FORM_RAW(x)     x
-# define __ASM_FORM_COMMA(x) x,
+# define __ASM_FORM(x, ...)		x,## __VA_ARGS__
+# define __ASM_FORM_RAW(x, ...)		x,## __VA_ARGS__
+# define __ASM_FORM_COMMA(x, ...)	x,## __VA_ARGS__,
 #else
 #include <linux/stringify.h>
-
-# define __ASM_FORM(x)	" " __stringify(x) " "
-# define __ASM_FORM_RAW(x)     __stringify(x)
-# define __ASM_FORM_COMMA(x) " " __stringify(x) ","
+# define __ASM_FORM(x, ...)		" " __stringify(x,##__VA_ARGS__) " "
+# define __ASM_FORM_RAW(x, ...)		    __stringify(x,##__VA_ARGS__)
+# define __ASM_FORM_COMMA(x, ...)	" " __stringify(x,##__VA_ARGS__) ","
 #endif
 
 #ifndef __x86_64__
 /* 32 bit */
-# define __ASM_SEL(a,b) __ASM_FORM(a)
-# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a)
+# define __ASM_SEL(a,b)		__ASM_FORM(a)
+# define __ASM_SEL_RAW(a,b)	__ASM_FORM_RAW(a)
 #else
 /* 64 bit */
-# define __ASM_SEL(a,b) __ASM_FORM(b)
-# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b)
+# define __ASM_SEL(a,b)		__ASM_FORM(b)
+# define __ASM_SEL_RAW(a,b)	__ASM_FORM_RAW(b)
 #endif
 
 #define __ASM_SIZE(inst, ...)	__ASM_SEL(inst##l##__VA_ARGS__, \
-- 
GitLab


From d88be187a6e6f3a97dfa7ddc500bb9ca191b3772 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 10 May 2021 02:09:39 -0700
Subject: [PATCH 0187/3804] x86/asm: Add _ASM_BYTES() macro for a .byte ...
 opcode sequence

Make it easy to create a sequence of bytes that can be used in either
assembly proper on in a C asm() statement.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510090940.924953-3-hpa@zytor.com
---
 arch/x86/include/asm/asm.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 93aad0b638065..507a37a460276 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -13,6 +13,8 @@
 # define __ASM_FORM_COMMA(x, ...)	" " __stringify(x,##__VA_ARGS__) ","
 #endif
 
+#define _ASM_BYTES(x, ...)	__ASM_FORM(.byte x,##__VA_ARGS__ ;)
+
 #ifndef __x86_64__
 /* 32 bit */
 # define __ASM_SEL(a,b)		__ASM_FORM(a)
-- 
GitLab


From eef23e72b78b36924aea8be5ec7c54e628c442ef Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 10 May 2021 02:09:40 -0700
Subject: [PATCH 0188/3804] x86/asm: Use _ASM_BYTES() in <asm/nops.h>

Use the new generalized _ASM_BYTES() macro from <asm/asm.h> instead of
the "home grown" _ASM_MK_NOP() in <asm/nops.h>.

Add <asm/asm.h> and update <asm/nops.h> in the tools directory...

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510090940.924953-4-hpa@zytor.com
---
 arch/x86/include/asm/nops.h       |  24 ++--
 tools/arch/x86/include/asm/asm.h  | 189 ++++++++++++++++++++++++++++++
 tools/arch/x86/include/asm/nops.h |  24 ++--
 3 files changed, 209 insertions(+), 28 deletions(-)
 create mode 100644 tools/arch/x86/include/asm/asm.h

diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index c1e5e818ba160..c5573eaa5bb98 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_NOPS_H
 #define _ASM_X86_NOPS_H
 
+#include <asm/asm.h>
+
 /*
  * Define nops for use with alternative() and for tracing.
  */
@@ -57,20 +59,14 @@
 
 #endif /* CONFIG_64BIT */
 
-#ifdef __ASSEMBLY__
-#define _ASM_MK_NOP(x) .byte x
-#else
-#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
-#endif
-
-#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8)
+#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1)
+#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2)
+#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3)
+#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4)
+#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5)
+#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6)
+#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
+#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
 
 #define ASM_NOP_MAX 8
 
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
new file mode 100644
index 0000000000000..507a37a460276
--- /dev/null
+++ b/tools/arch/x86/include/asm/asm.h
@@ -0,0 +1,189 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ASM_H
+#define _ASM_X86_ASM_H
+
+#ifdef __ASSEMBLY__
+# define __ASM_FORM(x, ...)		x,## __VA_ARGS__
+# define __ASM_FORM_RAW(x, ...)		x,## __VA_ARGS__
+# define __ASM_FORM_COMMA(x, ...)	x,## __VA_ARGS__,
+#else
+#include <linux/stringify.h>
+# define __ASM_FORM(x, ...)		" " __stringify(x,##__VA_ARGS__) " "
+# define __ASM_FORM_RAW(x, ...)		    __stringify(x,##__VA_ARGS__)
+# define __ASM_FORM_COMMA(x, ...)	" " __stringify(x,##__VA_ARGS__) ","
+#endif
+
+#define _ASM_BYTES(x, ...)	__ASM_FORM(.byte x,##__VA_ARGS__ ;)
+
+#ifndef __x86_64__
+/* 32 bit */
+# define __ASM_SEL(a,b)		__ASM_FORM(a)
+# define __ASM_SEL_RAW(a,b)	__ASM_FORM_RAW(a)
+#else
+/* 64 bit */
+# define __ASM_SEL(a,b)		__ASM_FORM(b)
+# define __ASM_SEL_RAW(a,b)	__ASM_FORM_RAW(b)
+#endif
+
+#define __ASM_SIZE(inst, ...)	__ASM_SEL(inst##l##__VA_ARGS__, \
+					  inst##q##__VA_ARGS__)
+#define __ASM_REG(reg)         __ASM_SEL_RAW(e##reg, r##reg)
+
+#define _ASM_PTR	__ASM_SEL(.long, .quad)
+#define _ASM_ALIGN	__ASM_SEL(.balign 4, .balign 8)
+
+#define _ASM_MOV	__ASM_SIZE(mov)
+#define _ASM_INC	__ASM_SIZE(inc)
+#define _ASM_DEC	__ASM_SIZE(dec)
+#define _ASM_ADD	__ASM_SIZE(add)
+#define _ASM_SUB	__ASM_SIZE(sub)
+#define _ASM_XADD	__ASM_SIZE(xadd)
+#define _ASM_MUL	__ASM_SIZE(mul)
+
+#define _ASM_AX		__ASM_REG(ax)
+#define _ASM_BX		__ASM_REG(bx)
+#define _ASM_CX		__ASM_REG(cx)
+#define _ASM_DX		__ASM_REG(dx)
+#define _ASM_SP		__ASM_REG(sp)
+#define _ASM_BP		__ASM_REG(bp)
+#define _ASM_SI		__ASM_REG(si)
+#define _ASM_DI		__ASM_REG(di)
+
+#ifndef __x86_64__
+/* 32 bit */
+
+#define _ASM_ARG1	_ASM_AX
+#define _ASM_ARG2	_ASM_DX
+#define _ASM_ARG3	_ASM_CX
+
+#define _ASM_ARG1L	eax
+#define _ASM_ARG2L	edx
+#define _ASM_ARG3L	ecx
+
+#define _ASM_ARG1W	ax
+#define _ASM_ARG2W	dx
+#define _ASM_ARG3W	cx
+
+#define _ASM_ARG1B	al
+#define _ASM_ARG2B	dl
+#define _ASM_ARG3B	cl
+
+#else
+/* 64 bit */
+
+#define _ASM_ARG1	_ASM_DI
+#define _ASM_ARG2	_ASM_SI
+#define _ASM_ARG3	_ASM_DX
+#define _ASM_ARG4	_ASM_CX
+#define _ASM_ARG5	r8
+#define _ASM_ARG6	r9
+
+#define _ASM_ARG1Q	rdi
+#define _ASM_ARG2Q	rsi
+#define _ASM_ARG3Q	rdx
+#define _ASM_ARG4Q	rcx
+#define _ASM_ARG5Q	r8
+#define _ASM_ARG6Q	r9
+
+#define _ASM_ARG1L	edi
+#define _ASM_ARG2L	esi
+#define _ASM_ARG3L	edx
+#define _ASM_ARG4L	ecx
+#define _ASM_ARG5L	r8d
+#define _ASM_ARG6L	r9d
+
+#define _ASM_ARG1W	di
+#define _ASM_ARG2W	si
+#define _ASM_ARG3W	dx
+#define _ASM_ARG4W	cx
+#define _ASM_ARG5W	r8w
+#define _ASM_ARG6W	r9w
+
+#define _ASM_ARG1B	dil
+#define _ASM_ARG2B	sil
+#define _ASM_ARG3B	dl
+#define _ASM_ARG4B	cl
+#define _ASM_ARG5B	r8b
+#define _ASM_ARG6B	r9b
+
+#endif
+
+/*
+ * Macros to generate condition code outputs from inline assembly,
+ * The output operand must be type "bool".
+ */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
+# define CC_OUT(c) "=@cc" #c
+#else
+# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
+# define CC_OUT(c) [_cc_ ## c] "=qm"
+#endif
+
+/* Exception table entry */
+#ifdef __ASSEMBLY__
+# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
+	.pushsection "__ex_table","a" ;				\
+	.balign 4 ;						\
+	.long (from) - . ;					\
+	.long (to) - . ;					\
+	.long (handler) - . ;					\
+	.popsection
+
+# define _ASM_EXTABLE(from, to)					\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_UA(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
+# define _ASM_EXTABLE_CPY(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
+# define _ASM_EXTABLE_FAULT(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+# ifdef CONFIG_KPROBES
+#  define _ASM_NOKPROBE(entry)					\
+	.pushsection "_kprobe_blacklist","aw" ;			\
+	_ASM_ALIGN ;						\
+	_ASM_PTR (entry);					\
+	.popsection
+# else
+#  define _ASM_NOKPROBE(entry)
+# endif
+
+#else /* ! __ASSEMBLY__ */
+# define _EXPAND_EXTABLE_HANDLE(x) #x
+# define _ASM_EXTABLE_HANDLE(from, to, handler)			\
+	" .pushsection \"__ex_table\",\"a\"\n"			\
+	" .balign 4\n"						\
+	" .long (" #from ") - .\n"				\
+	" .long (" #to ") - .\n"				\
+	" .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n"	\
+	" .popsection\n"
+
+# define _ASM_EXTABLE(from, to)					\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_default)
+
+# define _ASM_EXTABLE_UA(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+
+# define _ASM_EXTABLE_CPY(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
+# define _ASM_EXTABLE_FAULT(from, to)				\
+	_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
+
+/* For C file, we already have NOKPROBE_SYMBOL macro */
+
+/*
+ * This output constraint should be used for any inline asm which has a "call"
+ * instruction.  Otherwise the asm may be inserted before the frame pointer
+ * gets set up by the containing function.  If you forget to do this, objtool
+ * may print a "call without frame pointer save/setup" warning.
+ */
+register unsigned long current_stack_pointer asm(_ASM_SP);
+#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_ASM_H */
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
index c1e5e818ba160..c5573eaa5bb98 100644
--- a/tools/arch/x86/include/asm/nops.h
+++ b/tools/arch/x86/include/asm/nops.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_NOPS_H
 #define _ASM_X86_NOPS_H
 
+#include <asm/asm.h>
+
 /*
  * Define nops for use with alternative() and for tracing.
  */
@@ -57,20 +59,14 @@
 
 #endif /* CONFIG_64BIT */
 
-#ifdef __ASSEMBLY__
-#define _ASM_MK_NOP(x) .byte x
-#else
-#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
-#endif
-
-#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1)
-#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2)
-#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3)
-#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4)
-#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5)
-#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6)
-#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7)
-#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8)
+#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1)
+#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2)
+#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3)
+#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4)
+#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5)
+#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6)
+#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
+#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
 
 #define ASM_NOP_MAX 8
 
-- 
GitLab


From e5af36b2adb858e982d78d41d7363d05d951a19a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 21 Apr 2021 19:40:56 +0200
Subject: [PATCH 0189/3804] cpufreq: intel_pstate: Use HWP if enabled by
 platform firmware

It turns out that there are systems where HWP is enabled during
initialization by the platform firmware (BIOS), but HWP EPP support
is not advertised.

After commit 7aa1031223bc ("cpufreq: intel_pstate: Avoid enabling HWP
if EPP is not supported") intel_pstate refuses to use HWP on those
systems, but the fallback PERF_CTL interface does not work on them
either because of enabled HWP, and once enabled, HWP cannot be
disabled.  Consequently, the users of those systems cannot control
CPU performance scaling.

Address this issue by making intel_pstate use HWP unconditionally if
it is enabled already when the driver starts.

Fixes: 7aa1031223bc ("cpufreq: intel_pstate: Avoid enabling HWP if EPP is not supported")
Reported-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Tested-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: 5.9+ <stable@vger.kernel.org> # 5.9+
---
 drivers/cpufreq/intel_pstate.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index f0401064d7aa5..0e69dffd5a767 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3033,6 +3033,14 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = {
 	{}
 };
 
+static bool intel_pstate_hwp_is_enabled(void)
+{
+	u64 value;
+
+	rdmsrl(MSR_PM_ENABLE, value);
+	return !!(value & 0x1);
+}
+
 static int __init intel_pstate_init(void)
 {
 	const struct x86_cpu_id *id;
@@ -3051,8 +3059,12 @@ static int __init intel_pstate_init(void)
 		 * Avoid enabling HWP for processors without EPP support,
 		 * because that means incomplete HWP implementation which is a
 		 * corner case and supporting it is generally problematic.
+		 *
+		 * If HWP is enabled already, though, there is no choice but to
+		 * deal with it.
 		 */
-		if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) {
+		if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) ||
+		    intel_pstate_hwp_is_enabled()) {
 			hwp_active++;
 			hwp_mode_bdw = id->driver_data;
 			intel_pstate.attr = hwp_cpufreq_attrs;
-- 
GitLab


From d4335d058f8430a0ce2b43dab9531f3a3cf9fe2c Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Mon, 10 May 2021 11:38:44 +0100
Subject: [PATCH 0190/3804] ASoC: codecs: lpass-rx-macro: add missing
 MODULE_DEVICE_TABLE

Fix module loading by adding missing MODULE_DEVICE_TABLE.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20210510103844.1532-1-srinivas.kandagatla@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/lpass-rx-macro.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c
index 4f1b569d7c472..e074c7908c232 100644
--- a/sound/soc/codecs/lpass-rx-macro.c
+++ b/sound/soc/codecs/lpass-rx-macro.c
@@ -3579,6 +3579,7 @@ static const struct of_device_id rx_macro_dt_match[] = {
 	{ .compatible = "qcom,sm8250-lpass-rx-macro" },
 	{ }
 };
+MODULE_DEVICE_TABLE(of, rx_macro_dt_match);
 
 static struct platform_driver rx_macro_driver = {
 	.driver = {
-- 
GitLab


From 14c0c423746fe7232a093a68809a4bc6233eed60 Mon Sep 17 00:00:00 2001
From: Bixuan Cui <cuibixuan@huawei.com>
Date: Sat, 8 May 2021 11:15:12 +0800
Subject: [PATCH 0191/3804] ASoC: codecs: lpass-tx-macro: add missing
 MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Bixuan Cui <cuibixuan@huawei.com>
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20210508031512.53783-1-cuibixuan@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/lpass-tx-macro.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c
index 4eede9ad57bfd..3d3a6e31551b7 100644
--- a/sound/soc/codecs/lpass-tx-macro.c
+++ b/sound/soc/codecs/lpass-tx-macro.c
@@ -1846,6 +1846,7 @@ static const struct of_device_id tx_macro_dt_match[] = {
 	{ .compatible = "qcom,sm8250-lpass-tx-macro" },
 	{ }
 };
+MODULE_DEVICE_TABLE(of, tx_macro_dt_match);
 static struct platform_driver tx_macro_driver = {
 	.driver = {
 		.name = "tx_macro",
-- 
GitLab


From b23584d6ce0212b9ad6cb7be19a7123461ed9e09 Mon Sep 17 00:00:00 2001
From: Shengjiu Wang <shengjiu.wang@nxp.com>
Date: Sat, 8 May 2021 18:46:47 +0800
Subject: [PATCH 0192/3804] ASoC: ak5558: Correct the dai name for ak5552

Correct the dai name for ak5552. The name should be "ak5552-aif".

Fixes: d8c5c82e4e5b ("ASoC: ak5558: Add support for ak5552")
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Link: https://lore.kernel.org/r/1620470807-12056-1-git-send-email-shengjiu.wang@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/ak5558.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/ak5558.c b/sound/soc/codecs/ak5558.c
index 34aed80db0eb0..37d4600b6f2c2 100644
--- a/sound/soc/codecs/ak5558.c
+++ b/sound/soc/codecs/ak5558.c
@@ -307,7 +307,7 @@ static struct snd_soc_dai_driver ak5558_dai = {
 };
 
 static struct snd_soc_dai_driver ak5552_dai = {
-	.name = "ak5558-aif",
+	.name = "ak5552-aif",
 	.capture = {
 		.stream_name = "Capture",
 		.channels_min = 1,
-- 
GitLab


From 680ec0549a055eb464dce6ffb4bfb736ef87236e Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 9 May 2021 21:12:27 +0200
Subject: [PATCH 0193/3804] spi: spi-fsl-dspi: Fix a resource leak in an error
 handling path

'dspi_request_dma()' should be undone by a 'dspi_release_dma()' call in the
error handling path of the probe function, as already done in the remove
function

Fixes: 90ba37033cb9 ("spi: spi-fsl-dspi: Add DMA support for Vybrid")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Link: https://lore.kernel.org/r/d51caaac747277a1099ba8dea07acd85435b857e.1620587472.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-dspi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 0287366874882..fb45e6af66381 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -1375,11 +1375,13 @@ poll_mode:
 	ret = spi_register_controller(ctlr);
 	if (ret != 0) {
 		dev_err(&pdev->dev, "Problem registering DSPI ctlr\n");
-		goto out_free_irq;
+		goto out_release_dma;
 	}
 
 	return ret;
 
+out_release_dma:
+	dspi_release_dma(dspi);
 out_free_irq:
 	if (dspi->irq)
 		free_irq(dspi->irq, dspi);
-- 
GitLab


From dc5fa590273890a8541ce6e999d606bfb2d73797 Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Sat, 8 May 2021 14:02:14 +0800
Subject: [PATCH 0194/3804] spi: take the SPI IO-mutex in the spi_set_cs_timing
 method

this patch takes the io_mutex to prevent an unprotected HW
register modification in the set_cs_timing callback.

Fixes: 4cea6b8cc34e ("spi: add power control when set_cs_timing")
Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Link: https://lore.kernel.org/r/20210508060214.1485-1-leilk.liu@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index f9885c0965637..a565e7d6bf3ba 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3457,9 +3457,12 @@ int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup,
 
 	if (spi->controller->set_cs_timing &&
 	    !(spi->cs_gpiod || gpio_is_valid(spi->cs_gpio))) {
+		mutex_lock(&spi->controller->io_mutex);
+
 		if (spi->controller->auto_runtime_pm) {
 			status = pm_runtime_get_sync(parent);
 			if (status < 0) {
+				mutex_unlock(&spi->controller->io_mutex);
 				pm_runtime_put_noidle(parent);
 				dev_err(&spi->controller->dev, "Failed to power device: %d\n",
 					status);
@@ -3470,11 +3473,13 @@ int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup,
 								hold, inactive);
 			pm_runtime_mark_last_busy(parent);
 			pm_runtime_put_autosuspend(parent);
-			return status;
 		} else {
-			return spi->controller->set_cs_timing(spi, setup, hold,
+			status = spi->controller->set_cs_timing(spi, setup, hold,
 							      inactive);
 		}
+
+		mutex_unlock(&spi->controller->io_mutex);
+		return status;
 	}
 
 	if ((setup && setup->unit == SPI_DELAY_UNIT_SCK) ||
-- 
GitLab


From a3bc4ffeedf4693262fe7c6d133dcfcacd3d18c2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 3 May 2021 11:48:26 -0300
Subject: [PATCH 0195/3804] tools headers UAPI: Update tools's copy of drm.h
 headers

Picking the changes from:

  b603e810f740e76b ("drm/uapi: document kernel capabilities")

Doesn't result in any tooling changes:

  $ tools/perf/trace/beauty/drm_ioctl.sh  > before
  $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h
  $ tools/perf/trace/beauty/drm_ioctl.sh  > after
  $ diff -u before after

Silencing these perf build warnings:

  Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h'
  diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h

Cc: Simon Ser <contact@emersion.fr>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/drm/drm.h | 125 +++++++++++++++++++++++++++++++++--
 1 file changed, 121 insertions(+), 4 deletions(-)

diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 0827037c54847..67b94bc3c8852 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -625,30 +625,147 @@ struct drm_gem_open {
 	__u64 size;
 };
 
+/**
+ * DRM_CAP_DUMB_BUFFER
+ *
+ * If set to 1, the driver supports creating dumb buffers via the
+ * &DRM_IOCTL_MODE_CREATE_DUMB ioctl.
+ */
 #define DRM_CAP_DUMB_BUFFER		0x1
+/**
+ * DRM_CAP_VBLANK_HIGH_CRTC
+ *
+ * If set to 1, the kernel supports specifying a CRTC index in the high bits of
+ * &drm_wait_vblank_request.type.
+ *
+ * Starting kernel version 2.6.39, this capability is always set to 1.
+ */
 #define DRM_CAP_VBLANK_HIGH_CRTC	0x2
+/**
+ * DRM_CAP_DUMB_PREFERRED_DEPTH
+ *
+ * The preferred bit depth for dumb buffers.
+ *
+ * The bit depth is the number of bits used to indicate the color of a single
+ * pixel excluding any padding. This is different from the number of bits per
+ * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per
+ * pixel.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
 #define DRM_CAP_DUMB_PREFERRED_DEPTH	0x3
+/**
+ * DRM_CAP_DUMB_PREFER_SHADOW
+ *
+ * If set to 1, the driver prefers userspace to render to a shadow buffer
+ * instead of directly rendering to a dumb buffer. For best speed, userspace
+ * should do streaming ordered memory copies into the dumb buffer and never
+ * read from it.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
 #define DRM_CAP_DUMB_PREFER_SHADOW	0x4
+/**
+ * DRM_CAP_PRIME
+ *
+ * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
+ * and &DRM_PRIME_CAP_EXPORT.
+ *
+ * PRIME buffers are exposed as dma-buf file descriptors. See
+ * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing".
+ */
 #define DRM_CAP_PRIME			0x5
+/**
+ * DRM_PRIME_CAP_IMPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME
+ * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.
+ */
 #define  DRM_PRIME_CAP_IMPORT		0x1
+/**
+ * DRM_PRIME_CAP_EXPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME
+ * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.
+ */
 #define  DRM_PRIME_CAP_EXPORT		0x2
+/**
+ * DRM_CAP_TIMESTAMP_MONOTONIC
+ *
+ * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in
+ * struct drm_event_vblank. If set to 1, the kernel will report timestamps with
+ * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these
+ * clocks.
+ *
+ * Starting from kernel version 2.6.39, the default value for this capability
+ * is 1. Starting kernel version 4.15, this capability is always set to 1.
+ */
 #define DRM_CAP_TIMESTAMP_MONOTONIC	0x6
+/**
+ * DRM_CAP_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
+ */
 #define DRM_CAP_ASYNC_PAGE_FLIP		0x7
-/*
- * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight
- * combination for the hardware cursor. The intention is that a hardware
- * agnostic userspace can query a cursor plane size to use.
+/**
+ * DRM_CAP_CURSOR_WIDTH
+ *
+ * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid
+ * width x height combination for the hardware cursor. The intention is that a
+ * hardware agnostic userspace can query a cursor plane size to use.
  *
  * Note that the cross-driver contract is to merely return a valid size;
  * drivers are free to attach another meaning on top, eg. i915 returns the
  * maximum plane size.
  */
 #define DRM_CAP_CURSOR_WIDTH		0x8
+/**
+ * DRM_CAP_CURSOR_HEIGHT
+ *
+ * See &DRM_CAP_CURSOR_WIDTH.
+ */
 #define DRM_CAP_CURSOR_HEIGHT		0x9
+/**
+ * DRM_CAP_ADDFB2_MODIFIERS
+ *
+ * If set to 1, the driver supports supplying modifiers in the
+ * &DRM_IOCTL_MODE_ADDFB2 ioctl.
+ */
 #define DRM_CAP_ADDFB2_MODIFIERS	0x10
+/**
+ * DRM_CAP_PAGE_FLIP_TARGET
+ *
+ * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and
+ * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in
+ * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP
+ * ioctl.
+ */
 #define DRM_CAP_PAGE_FLIP_TARGET	0x11
+/**
+ * DRM_CAP_CRTC_IN_VBLANK_EVENT
+ *
+ * If set to 1, the kernel supports reporting the CRTC ID in
+ * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and
+ * &DRM_EVENT_FLIP_COMPLETE events.
+ *
+ * Starting kernel version 4.12, this capability is always set to 1.
+ */
 #define DRM_CAP_CRTC_IN_VBLANK_EVENT	0x12
+/**
+ * DRM_CAP_SYNCOBJ
+ *
+ * If set to 1, the driver supports sync objects. See
+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ */
 #define DRM_CAP_SYNCOBJ		0x13
+/**
+ * DRM_CAP_SYNCOBJ_TIMELINE
+ *
+ * If set to 1, the driver supports timeline operations on sync objects. See
+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ */
 #define DRM_CAP_SYNCOBJ_TIMELINE	0x14
 
 /* DRM_IOCTL_GET_CAP ioctl argument type */
-- 
GitLab


From 0fdee797d60d71e5a6fd59aa573d84a858e715dd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 3 May 2021 11:51:17 -0300
Subject: [PATCH 0196/3804] tools headers UAPI: Sync drm/i915_drm.h with the
 kernel sources

To pick the changes in:

  b5b6f6a610127b17 ("drm/i915/gem: Drop legacy execbuffer support (v2)")

That don't result in any change in tooling as this is just adding a
comment.

Only silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h'
  diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h

Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/drm/i915_drm.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 1987e2ea79a3b..ddc47bbf48b6d 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -943,6 +943,7 @@ struct drm_i915_gem_exec_object {
 	__u64 offset;
 };
 
+/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */
 struct drm_i915_gem_execbuffer {
 	/**
 	 * List of buffers to be validated with their relocations to be
-- 
GitLab


From b3172585b13d7171c32cfabdf938eca7fdfe9b31 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 3 May 2021 11:53:37 -0300
Subject: [PATCH 0197/3804] tools arch x86: Sync the msr-index.h copy with the
 kernel sources

To pick up the changes from these csets:

  d0946a882e622022 ("perf/x86/intel: Hybrid PMU support for perf capabilities")

That cause no changes to tooling as it isn't adding any new MSR, just
some capabilities for a pre-existing one:

  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before
  $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h
  $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after
  $ diff -u before after
  $

Just silences this perf build warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h'
  diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h

Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/msr-index.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 45029354e0a8b..742d89a00721d 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -185,6 +185,9 @@
 #define MSR_PEBS_DATA_CFG		0x000003f2
 #define MSR_IA32_DS_AREA		0x00000600
 #define MSR_IA32_PERF_CAPABILITIES	0x00000345
+#define PERF_CAP_METRICS_IDX		15
+#define PERF_CAP_PT_IDX			16
+
 #define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
 
 #define MSR_IA32_RTIT_CTL		0x00000570
@@ -265,6 +268,7 @@
 #define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
 #define DEBUGCTLMSR_BTF_SHIFT		1
 #define DEBUGCTLMSR_BTF			(1UL <<  1) /* single-step on branches */
+#define DEBUGCTLMSR_BUS_LOCK_DETECT	(1UL <<  2)
 #define DEBUGCTLMSR_TR			(1UL <<  6)
 #define DEBUGCTLMSR_BTS			(1UL <<  7)
 #define DEBUGCTLMSR_BTINT		(1UL <<  8)
-- 
GitLab


From e8c1167606c63fd8f9934d0b6ce80281463a4945 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 2 Apr 2021 18:40:20 +0900
Subject: [PATCH 0198/3804] perf record: Disallow -c and -F option at the same
 time

It's confusing which one is effective when the both options are given.
The current code happens to use -c in this case but users might not be
aware of it.  We can change it to complain about that instead of relying
on the implicit priority.

Before:

  $ perf record -c 111111 -F 99 true
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.031 MB perf.data (8 samples) ]

  $ perf evlist -F
  cycles: sample_period=111111
  $

After:
  $ perf record -c 111111 -F 99 true
  cannot set frequency and period at the same time
  $

So this change can break existing usages, but I think it's rare to have
both options and it'd be better changing them.

Suggested-by: Alexey Alexandrov <aalexand@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210402094020.28164-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/record.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index f99852d54b147..43e5b563dee89 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -157,9 +157,15 @@ static int get_max_rate(unsigned int *rate)
 static int record_opts__config_freq(struct record_opts *opts)
 {
 	bool user_freq = opts->user_freq != UINT_MAX;
+	bool user_interval = opts->user_interval != ULLONG_MAX;
 	unsigned int max_rate;
 
-	if (opts->user_interval != ULLONG_MAX)
+	if (user_interval && user_freq) {
+		pr_err("cannot set frequency and period at the same time\n");
+		return -1;
+	}
+
+	if (user_interval)
 		opts->default_interval = opts->user_interval;
 	if (user_freq)
 		opts->freq = opts->user_freq;
-- 
GitLab


From 7aa3c9eabdf76017679e975e2ffd50cde3c010b8 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 6 May 2021 15:56:40 -0700
Subject: [PATCH 0199/3804] perf jevents: Silence warning for ArchStd files

JSON files in the level 1 directory are used for ArchStd events (see
preprocess_arch_std_files), as such they shouldn't be warned about.

Signed-off-by: Ian Rogers <irogers@google.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210506225640.1461000-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/jevents.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index ed4f0bd72e5a3..7422b0ea87901 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -1123,8 +1123,10 @@ static int process_one_file(const char *fpath, const struct stat *sb,
 			mapfile = strdup(fpath);
 			return 0;
 		}
-
-		pr_info("%s: Ignoring file %s\n", prog, fpath);
+		if (is_json_file(bname))
+			pr_debug("%s: ArchStd json is preprocessed %s\n", prog, fpath);
+		else
+			pr_info("%s: Ignoring file %s\n", prog, fpath);
 		return 0;
 	}
 
-- 
GitLab


From a11c9a6e472457cf9eeafb585fc5c912f51d1b23 Mon Sep 17 00:00:00 2001
From: Dmitry Koshelev <karaghiozis@gmail.com>
Date: Thu, 6 May 2021 13:11:49 +0000
Subject: [PATCH 0200/3804] perf session: Fix swapping of cpu_map and
 stat_config records

'data' field in perf_record_cpu_map_data struct is 16-bit
wide and so should be swapped using bswap_16().

'nr' field in perf_record_stat_config struct should be
swapped before being used for size calculation.

Signed-off-by: Dmitry Koshelev <karaghiozis@gmail.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210506131244.13328-1-karaghiozis@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index a12cf4f0e97a7..106b3d60881a5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -904,7 +904,7 @@ static void perf_event__cpu_map_swap(union perf_event *event,
 	struct perf_record_record_cpu_map *mask;
 	unsigned i;
 
-	data->type = bswap_64(data->type);
+	data->type = bswap_16(data->type);
 
 	switch (data->type) {
 	case PERF_CPU_MAP__CPUS:
@@ -937,7 +937,7 @@ static void perf_event__stat_config_swap(union perf_event *event,
 {
 	u64 size;
 
-	size  = event->stat_config.nr * sizeof(event->stat_config.data[0]);
+	size  = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]);
 	size += 1; /* nr item itself */
 	mem_bswap_64(&event->stat_config.nr, size);
 }
-- 
GitLab


From ad1237c30d975535a669746496cbed136aa5a045 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 8 May 2021 22:50:20 +0200
Subject: [PATCH 0201/3804] perf tools: Fix dynamic libbpf link

Justin reported broken build with LIBBPF_DYNAMIC=1.

When linking libbpf dynamically we need to use perf's
hashmap object, because it's not exported in libbpf.so
(only in libbpf.a).

Following build is now passing:

  $ make LIBBPF_DYNAMIC=1
    BUILD:   Doing 'make -j8' parallel build
    ...
  $ ldd perf | grep libbpf
        libbpf.so.0 => /lib64/libbpf.so.0 (0x00007fa7630db000)

Fixes: eee19501926d ("perf tools: Grab a copy of libbpf's hashmap")
Reported-by: Justin M. Forbes <jforbes@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210508205020.617984-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config | 1 +
 tools/perf/util/Build      | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 0d6619064a838..406a9519145e5 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -540,6 +540,7 @@ ifndef NO_LIBELF
       ifdef LIBBPF_DYNAMIC
         ifeq ($(feature-libbpf), 1)
           EXTLIBS += -lbpf
+          $(call detected,CONFIG_LIBBPF_DYNAMIC)
         else
           dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
         endif
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8c0d9f368ebcf..b64bdc1a7026d 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -145,7 +145,14 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o
 perf-$(CONFIG_LIBELF) += probe-file.o
 perf-$(CONFIG_LIBELF) += probe-event.o
 
+ifdef CONFIG_LIBBPF_DYNAMIC
+  hashmap := 1
+endif
 ifndef CONFIG_LIBBPF
+  hashmap := 1
+endif
+
+ifdef hashmap
 perf-y += hashmap.o
 endif
 
-- 
GitLab


From 0d943d5fde6070c2661a99618ea95b99655589ad Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 09:39:02 -0300
Subject: [PATCH 0202/3804] tools headers UAPI: Sync linux/kvm.h with the
 kernel sources

To pick the changes in:

  15fb7de1a7f5af0d ("KVM: SVM: Add KVM_SEV_RECEIVE_UPDATE_DATA command")
  3bf725699bf62494 ("KVM: arm64: Add support for the KVM PTP service")
  4cfdd47d6d95aca4 ("KVM: SVM: Add KVM_SEV SEND_START command")
  54526d1fd59338fd ("KVM: x86: Support KVM VMs sharing SEV context")
  5569e2e7a650dfff ("KVM: SVM: Add support for KVM_SEV_SEND_CANCEL command")
  8b13c36493d8cb56 ("KVM: introduce KVM_CAP_SET_GUEST_DEBUG2")
  af43cbbf954b50ca ("KVM: SVM: Add support for KVM_SEV_RECEIVE_START command")
  d3d1af85e2c75bb5 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command")
  fe7e948837f312d8 ("KVM: x86: Add capability to grant VM access to privileged SGX attribute")

That don't cause any change in tooling as it doesn't introduce any new
ioctl.

  $ grep kvm tools/perf/trace/beauty/*.sh
  tools/perf/trace/beauty/kvm_ioctl.sh:printf "static const char *kvm_ioctl_cmds[] = {\n"
  tools/perf/trace/beauty/kvm_ioctl.sh:egrep $regex ${header_dir}/kvm.h	| \
  $
  $ tools/perf/trace/beauty/kvm_ioctl.sh > before
  $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h
  $ tools/perf/trace/beauty/kvm_ioctl.sh > after
  $ diff -u before after
  $

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h'
  diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h

Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Jianyong Wu <jianyong.wu@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Nathan Tempelman <natet@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Steve Rutherford <srutherford@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/kvm.h | 45 ++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index f6afee209620d..3fd9a7e9d90cd 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1078,6 +1078,10 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DIRTY_LOG_RING 192
 #define KVM_CAP_X86_BUS_LOCK_EXIT 193
 #define KVM_CAP_PPC_DAWR1 194
+#define KVM_CAP_SET_GUEST_DEBUG2 195
+#define KVM_CAP_SGX_ATTRIBUTE 196
+#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
+#define KVM_CAP_PTP_KVM 198
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1671,6 +1675,8 @@ enum sev_cmd_id {
 	KVM_SEV_CERT_EXPORT,
 	/* Attestation report */
 	KVM_SEV_GET_ATTESTATION_REPORT,
+	/* Guest Migration Extension */
+	KVM_SEV_SEND_CANCEL,
 
 	KVM_SEV_NR_MAX,
 };
@@ -1729,6 +1735,45 @@ struct kvm_sev_attestation_report {
 	__u32 len;
 };
 
+struct kvm_sev_send_start {
+	__u32 policy;
+	__u64 pdh_cert_uaddr;
+	__u32 pdh_cert_len;
+	__u64 plat_certs_uaddr;
+	__u32 plat_certs_len;
+	__u64 amd_certs_uaddr;
+	__u32 amd_certs_len;
+	__u64 session_uaddr;
+	__u32 session_len;
+};
+
+struct kvm_sev_send_update_data {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+};
+
+struct kvm_sev_receive_start {
+	__u32 handle;
+	__u32 policy;
+	__u64 pdh_uaddr;
+	__u32 pdh_len;
+	__u64 session_uaddr;
+	__u32 session_len;
+};
+
+struct kvm_sev_receive_update_data {
+	__u64 hdr_uaddr;
+	__u32 hdr_len;
+	__u64 guest_uaddr;
+	__u32 guest_len;
+	__u64 trans_uaddr;
+	__u32 trans_len;
+};
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)
-- 
GitLab


From b35629bc2fd59691504debda99c320cf966c8e3a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 09:45:25 -0300
Subject: [PATCH 0203/3804] tools headers kvm: Sync kvm headers with the kernel
 sources

To pick the changes in:

  3c0c2ad1ae75963c ("KVM: VMX: Add basic handling of VM-Exit from SGX enclave")

None of them trigger any changes in tooling, this time this is just to silence
these perf build warnings:

  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h'
  diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/uapi/asm/vmx.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index b8e650a985e35..946d761adbd3d 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -27,6 +27,7 @@
 
 
 #define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE	0x08000000
 
 #define EXIT_REASON_EXCEPTION_NMI       0
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
-- 
GitLab


From a00b7e39d6b56e6f49cdd51a9ebf92627a19d877 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 7 May 2021 17:54:35 +0900
Subject: [PATCH 0204/3804] perf tools: Fix a build error on arm64 with clang

Since clang's -Wmissing-field-initializers warns if a data
structure is initialized with a signle NULL as below,

 ----
 tools/perf $ make CC=clang LLVM=1
 ...
 arch/arm64/util/kvm-stat.c:74:9: error: missing field 'ops' initializer [-Werror,-Wmissing-field-initializers]
         { NULL },
                ^
 1 error generated.
 ----

add another field initializer expressly as same as other
arch's kvm-stat.c code.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Anders Roxell <anders.roxell@linaro.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Link: http://lore.kernel.org/lkml/162037767540.94840.15758657049033010518.stgit@devnote2
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/util/kvm-stat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c
index 2303256b7d05e..73d18e0ed6f6a 100644
--- a/tools/perf/arch/arm64/util/kvm-stat.c
+++ b/tools/perf/arch/arm64/util/kvm-stat.c
@@ -71,7 +71,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = {
 		.name	= "vmexit",
 		.ops	= &exit_events,
 	},
-	{ NULL },
+	{ NULL, NULL },
 };
 
 const char * const kvm_skip_events[] = {
-- 
GitLab


From f8bcb061ea013a9b39a071b9dd9f6ea0aa2caf72 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 09:55:30 -0300
Subject: [PATCH 0205/3804] tools headers UAPI: Sync files changed by landlock,
 quotactl_path and mount_settattr new syscalls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To pick the changes in these csets:

  a49f4f81cb48925e ("arch: Wire up Landlock syscalls")
  2a1867219c7b27f9 ("fs: add mount_setattr()")
  fa8b90070a80bb1a ("quota: wire up quotactl_path")

That silences these perf build warnings and add support for those new
syscalls in tools such as 'perf trace'.

For instance, this is now possible:

  # ~acme/bin/perf trace -v -e landlock*
  event qualifier tracepoint filter: (common_pid != 129365 && common_pid != 3502) && (id == 444 || id == 445 || id == 446)
  ^C#

That is tha filter expression attached to the raw_syscalls:sys_{enter,exit}
tracepoints.

  $ grep landlock tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
  444	common	landlock_create_ruleset	sys_landlock_create_ruleset
  445	common	landlock_add_rule	sys_landlock_add_rule
  446	common	landlock_restrict_self	sys_landlock_restrict_self
  $

This addresses these perf build warnings:

  Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h'
  diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h
  Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'
  diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl'
  diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl'
  diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest version at 'arch/mips/kernel/syscalls/syscall_n64.tbl'
  diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl

Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: James Morris <jamorris@linux.microsoft.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Mickaël Salaün <mic@linux.microsoft.com>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/asm-generic/unistd.h             | 11 ++++++++++-
 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl |  5 +++++
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl  |  4 ++++
 tools/perf/arch/s390/entry/syscalls/syscall.tbl     |  4 ++++
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl   |  4 ++++
 5 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index ce58cff99b665..6de5a7fc066b8 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -863,9 +863,18 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
+#define __NR_quotactl_path 443
+__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+
+#define __NR_landlock_create_ruleset 444
+__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
+#define __NR_landlock_add_rule 445
+__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
+#define __NR_landlock_restrict_self 446
+__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
 
 #undef __NR_syscalls
-#define __NR_syscalls 443
+#define __NR_syscalls 447
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 91649690b52f1..9974f5f8e49bc 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -356,3 +356,8 @@
 439	n64	faccessat2			sys_faccessat2
 440	n64	process_madvise			sys_process_madvise
 441	n64	epoll_pwait2			sys_epoll_pwait2
+442	n64	mount_setattr			sys_mount_setattr
+443	n64	quotactl_path			sys_quotactl_path
+444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
+445	n64	landlock_add_rule		sys_landlock_add_rule
+446	n64	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 0b2480cf3e479..2e68fbb57cc66 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -522,3 +522,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
+443	common	quotactl_path			sys_quotactl_path
+444	common	landlock_create_ruleset		sys_landlock_create_ruleset
+445	common	landlock_add_rule		sys_landlock_add_rule
+446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 3abef2144dac7..7e4a2aba366df 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -445,3 +445,7 @@
 440  common	process_madvise		sys_process_madvise		sys_process_madvise
 441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
+443  common	quotactl_path		sys_quotactl_path		sys_quotactl_path
+444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
+445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
+446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 7bf01cbe582f0..ecd551b08d052 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,6 +364,10 @@
 440	common	process_madvise		sys_process_madvise
 441	common	epoll_pwait2		sys_epoll_pwait2
 442	common	mount_setattr		sys_mount_setattr
+443	common	quotactl_path		sys_quotactl_path
+444	common	landlock_create_ruleset	sys_landlock_create_ruleset
+445	common	landlock_add_rule	sys_landlock_add_rule
+446	common	landlock_restrict_self	sys_landlock_restrict_self
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
-- 
GitLab


From 5a80ee4219a52194f0e815bbceec40eb32c523ec Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:06:40 -0300
Subject: [PATCH 0206/3804] tools headers UAPI: Sync linux/prctl.h with the
 kernel sources

To pick a new prctl introduced in:

  201698626fbca1cf ("arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)")

That results in

  $ grep prctl tools/perf/trace/beauty/*.sh
  tools/perf/trace/beauty/prctl_option.sh:printf "static const char *prctl_options[] = {\n"
  tools/perf/trace/beauty/prctl_option.sh:egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
  tools/perf/trace/beauty/prctl_option.sh:printf "static const char *prctl_set_mm_options[] = {\n"
  tools/perf/trace/beauty/prctl_option.sh:egrep $regex ${header_dir}/prctl.h | \
  tools/perf/trace/beauty/x86_arch_prctl.sh:prctl_arch_header=${x86_header_dir}/prctl.h
  tools/perf/trace/beauty/x86_arch_prctl.sh:	printf "#define x86_arch_prctl_codes_%d_offset %s\n" $idx $first_entry
  tools/perf/trace/beauty/x86_arch_prctl.sh:	printf "static const char *x86_arch_prctl_codes_%d[] = {\n" $idx
  tools/perf/trace/beauty/x86_arch_prctl.sh:	egrep -q $regex ${prctl_arch_header} && \
  tools/perf/trace/beauty/x86_arch_prctl.sh:	(egrep $regex ${prctl_arch_header} | \
  $ tools/perf/trace/beauty/prctl_option.sh > before
  $ cp include/uapi/linux/prctl.h tools/include/uapi/linux/prctl.h
  $ tools/perf/trace/beauty/prctl_option.sh > after
  $ diff -u before after
  --- before	2021-05-09 10:06:10.064559675 -0300
  +++ after	2021-05-09 10:06:21.319791396 -0300
  @@ -54,6 +54,8 @@
   	[57] = "SET_IO_FLUSHER",
   	[58] = "GET_IO_FLUSHER",
   	[59] = "SET_SYSCALL_USER_DISPATCH",
  +	[60] = "PAC_SET_ENABLED_KEYS",
  +	[61] = "PAC_GET_ENABLED_KEYS",
   };
   static const char *prctl_set_mm_options[] = {
   	[1] = "START_CODE",
  $

Now users can do:

  # perf trace -e syscalls:sys_enter_prctl --filter "option==PAC_GET_ENABLED_KEYS"
^C#
  # trace -v -e syscalls:sys_enter_prctl --filter "option==PAC_GET_ENABLED_KEYS"
  New filter for syscalls:sys_enter_prctl: (option==0x3d) && (common_pid != 5519 && common_pid != 3404)
^C#

And also when prctl appears in a session, its options will be
translated to the string.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Peter Collingbourne <pcc@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/prctl.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 667f1aed091c2..18a9f59dc067f 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -255,4 +255,8 @@ struct prctl_mm_map {
 # define SYSCALL_DISPATCH_FILTER_ALLOW	0
 # define SYSCALL_DISPATCH_FILTER_BLOCK	1
 
+/* Set/get enabled arm64 pointer authentication keys */
+#define PR_PAC_SET_ENABLED_KEYS		60
+#define PR_PAC_GET_ENABLED_KEYS		61
+
 #endif /* _LINUX_PRCTL_H */
-- 
GitLab


From fb24e308b6310541e70d11a3f19dc40742974b95 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:19:37 -0300
Subject: [PATCH 0207/3804] tools arch: Update arch/x86/lib/mem{cpy,set}_64.S
 copies used in 'perf bench mem memcpy'

To bring in the change made in this cset:

 5e21a3ecad1500e3 ("x86/alternative: Merge include files")

This just silences these perf tools build warnings, no change in the tools:

  Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S'
  diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S
  Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S'
  diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S

Cc: Borislav Petkov <bp@suse.de>
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/lib/memcpy_64.S                         | 2 +-
 tools/arch/x86/lib/memset_64.S                         | 2 +-
 tools/include/asm/{alternative-asm.h => alternative.h} | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename tools/include/asm/{alternative-asm.h => alternative.h} (100%)

diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 1e299ac73c869..1cc9da6e29c79 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,7 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 .pushsection .noinstr.text, "ax"
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index 0bfd26e4ca9e9..9827ae267f96e 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -3,7 +3,7 @@
 
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 /*
diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative.h
similarity index 100%
rename from tools/include/asm/alternative-asm.h
rename to tools/include/asm/alternative.h
-- 
GitLab


From 3916329309eace19e8c32bc821064a119474c309 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:21:33 -0300
Subject: [PATCH 0208/3804] tools include UAPI powerpc: Sync errno.h with the
 kernel headers

To pick the change in:

  7de21e679e6a789f ("powerpc: fix EDEADLOCK redefinition error in uapi/asm/errno.h")

That will make the errno number -> string tables to pick this change on powerpc.

Silencing this perf build warning:

  Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/errno.h' differs from latest version at 'arch/powerpc/include/uapi/asm/errno.h'
  diff -u tools/arch/powerpc/include/uapi/asm/errno.h arch/powerpc/include/uapi/asm/errno.h

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Tony Ambardar <tony.ambardar@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/powerpc/include/uapi/asm/errno.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h
index cc79856896a19..4ba87de32be00 100644
--- a/tools/arch/powerpc/include/uapi/asm/errno.h
+++ b/tools/arch/powerpc/include/uapi/asm/errno.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_POWERPC_ERRNO_H
 #define _ASM_POWERPC_ERRNO_H
 
+#undef	EDEADLOCK
 #include <asm-generic/errno.h>
 
 #undef	EDEADLOCK
-- 
GitLab


From 6faf64f5248166ecaf50107e883c383e0b66bb70 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:24:29 -0300
Subject: [PATCH 0209/3804] tools headers cpufeatures: Sync with the kernel
 sources

To pick the changes from:

  4e6292114c741221 ("x86/paravirt: Add new features for paravirt patching")
  a161545ab53b174c ("x86/cpufeatures: Enumerate Intel Hybrid Technology feature bit")
  a89dfde3dc3c2dbf ("x86: Remove dynamic NOP selection")
  b8921dccf3b25798 ("x86/cpufeatures: Add SGX1 and SGX2 sub-features")
  f21d4d3b97a86035 ("x86/cpufeatures: Enumerate #DB for bus lock detection")
  f333374e108e7e4c ("x86/cpufeatures: Add the Virtual SPEC_CTRL feature")

This only causes these perf files to be rebuilt:

  CC       /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o
  CC       /tmp/build/perf/bench/mem-memset-x86-64-asm.o

And addresses this perf build warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h'
  diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h

Cc: Babu Moger <babu.moger@amd.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/cpufeatures.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index cc96e26d69f7a..ac37830ae9412 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -84,7 +84,7 @@
 
 /* CPU types for specific tunings: */
 #define X86_FEATURE_K8			( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7			( 3*32+ 5) /* "" Athlon */
+/* FREE, was #define X86_FEATURE_K7			( 3*32+ 5) "" Athlon */
 #define X86_FEATURE_P3			( 3*32+ 6) /* "" P3 */
 #define X86_FEATURE_P4			( 3*32+ 7) /* "" P4 */
 #define X86_FEATURE_CONSTANT_TSC	( 3*32+ 8) /* TSC ticks at a constant rate */
@@ -236,6 +236,8 @@
 #define X86_FEATURE_EPT_AD		( 8*32+17) /* Intel Extended Page Table access-dirty bit */
 #define X86_FEATURE_VMCALL		( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL		( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK		( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT		( 8*32+21) /* "" PV vcpu_is_preempted function */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -290,6 +292,8 @@
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL	(11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT	(11*32+ 6) /* #AC for split lock */
 #define X86_FEATURE_PER_THREAD_MBA	(11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1		(11*32+ 8) /* "" Basic SGX */
+#define X86_FEATURE_SGX2		(11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
@@ -336,6 +340,7 @@
 #define X86_FEATURE_AVIC		(15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD	(15*32+15) /* Virtual VMSAVE VMLOAD */
 #define X86_FEATURE_VGIF		(15*32+16) /* Virtual GIF */
+#define X86_FEATURE_V_SPEC_CTRL		(15*32+20) /* Virtual SPEC_CTRL */
 #define X86_FEATURE_SVME_ADDR_CHK	(15*32+28) /* "" SVME addr check */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
@@ -354,6 +359,7 @@
 #define X86_FEATURE_AVX512_VPOPCNTDQ	(16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT	(16*32+24) /* Bus Lock detect */
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
 #define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
@@ -374,6 +380,7 @@
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_SERIALIZE		(18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU		(18*32+15) /* "" This part has CPUs of more than one type */
 #define X86_FEATURE_TSXLDTRK		(18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG		(18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR		(18*32+19) /* Intel ARCH LBR */
-- 
GitLab


From 71d7924b3e8acaca6a3b0fc3261170031ada3b70 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sun, 9 May 2021 10:29:10 -0300
Subject: [PATCH 0210/3804] tools headers UAPI: Sync perf_event.h with the
 kernel sources

To pick up the changes in:

  2b26f0aa004995f4 ("perf: Support only inheriting events if cloned with CLONE_THREAD")
  2e498d0a74e5b88a ("perf: Add support for event removal on exec")
  547b60988e631f74 ("perf: aux: Add flags for the buffer format")
  55bcf6ef314ae8ba ("perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE")
  7dde51767ca5339e ("perf: aux: Add CoreSight PMU buffer formats")
  97ba62b278674293 ("perf: Add support for SIGTRAP on perf events")
  d0d1dd628527c77d ("perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event")

Also change the expected sizeof(struct perf_event_attr) from 120 to 128 due to
fields being added for the SIGTRAP changes.

Addressing this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h'
  diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h

Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Marco Elver <elver@google.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/perf_event.h   | 26 ++++++++++++++++++++-----
 tools/perf/tests/attr/base-record       |  2 +-
 tools/perf/tests/attr/base-stat         |  2 +-
 tools/perf/tests/attr/system-wide-dummy |  2 +-
 4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 14332f4cf8167..bf8143505c49d 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -127,6 +127,7 @@ enum perf_sw_ids {
 	PERF_COUNT_SW_EMULATION_FAULTS		= 8,
 	PERF_COUNT_SW_DUMMY			= 9,
 	PERF_COUNT_SW_BPF_OUTPUT		= 10,
+	PERF_COUNT_SW_CGROUP_SWITCHES		= 11,
 
 	PERF_COUNT_SW_MAX,			/* non-ABI */
 };
@@ -326,6 +327,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER4	104	/* add: sample_regs_intr */
 #define PERF_ATTR_SIZE_VER5	112	/* add: aux_watermark */
 #define PERF_ATTR_SIZE_VER6	120	/* add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7	128	/* add: sig_data */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -404,7 +406,10 @@ struct perf_event_attr {
 				cgroup         :  1, /* include cgroup events */
 				text_poke      :  1, /* include text poke events */
 				build_id       :  1, /* use build id in mmap2 events */
-				__reserved_1   : 29;
+				inherit_thread :  1, /* children only inherit if cloned with CLONE_THREAD */
+				remove_on_exec :  1, /* event is removed from task on exec */
+				sigtrap        :  1, /* send synchronous SIGTRAP on event */
+				__reserved_1   : 26;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -456,6 +461,12 @@ struct perf_event_attr {
 	__u16	__reserved_2;
 	__u32	aux_sample_size;
 	__u32	__reserved_3;
+
+	/*
+	 * User provided data if sigtrap=1, passed back to user via
+	 * siginfo_t::si_perf, e.g. to permit user to identify the event.
+	 */
+	__u64	sig_data;
 };
 
 /*
@@ -1171,10 +1182,15 @@ enum perf_callchain_context {
 /**
  * PERF_RECORD_AUX::flags bits
  */
-#define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL		0x04	/* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION		0x08	/* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED			0x01	/* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE			0x02	/* snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL			0x04	/* record contains gaps */
+#define PERF_AUX_FLAG_COLLISION			0x08	/* sample collided with another */
+#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK	0xff00	/* PMU specific trace format type */
+
+/* CoreSight PMU AUX buffer formats */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT	0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW		0x0100 /* Raw format of the source */
 
 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 645009c08b3cb..4a7b8deef3fdd 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -5,7 +5,7 @@ group_fd=-1
 flags=0|8
 cpu=*
 type=0|1
-size=120
+size=128
 config=0
 sample_period=*
 sample_type=263
diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat
index b0f42c34882e8..4081644565306 100644
--- a/tools/perf/tests/attr/base-stat
+++ b/tools/perf/tests/attr/base-stat
@@ -5,7 +5,7 @@ group_fd=-1
 flags=0|8
 cpu=*
 type=0
-size=120
+size=128
 config=0
 sample_period=0
 sample_type=65536
diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy
index eba723cc0d380..86a15dd359d93 100644
--- a/tools/perf/tests/attr/system-wide-dummy
+++ b/tools/perf/tests/attr/system-wide-dummy
@@ -7,7 +7,7 @@ cpu=*
 pid=-1
 flags=8
 type=1
-size=120
+size=128
 config=9
 sample_period=4000
 sample_type=455
-- 
GitLab


From 29038ae2ae566d9441e81cda3539db17c20bf06a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 10 May 2021 14:02:17 +0200
Subject: [PATCH 0211/3804] Revert "Revert "ACPI: scan: Turn off unused power
 resources during initialization""

Revert commit 5db91e9cb5b3 ("Revert "ACPI: scan: Turn off unused
power resources during initialization") which was not necessary.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/internal.h | 1 +
 drivers/acpi/power.c    | 2 +-
 drivers/acpi/scan.c     | 2 ++
 drivers/acpi/sleep.h    | 1 -
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index e6a5d997241c4..9fcefcdc1dbe0 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -139,6 +139,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
 int acpi_power_get_inferred_state(struct acpi_device *device, int *state);
 int acpi_power_on_resources(struct acpi_device *device, int state);
 int acpi_power_transition(struct acpi_device *device, int state);
+void acpi_turn_off_unused_power_resources(void);
 
 /* --------------------------------------------------------------------------
                               Device Power Management
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 7e69931be828c..bacae6d178ff5 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -996,6 +996,7 @@ void acpi_resume_power_resources(void)
 
 	mutex_unlock(&power_resource_list_lock);
 }
+#endif
 
 void acpi_turn_off_unused_power_resources(void)
 {
@@ -1016,4 +1017,3 @@ void acpi_turn_off_unused_power_resources(void)
 
 	mutex_unlock(&power_resource_list_lock);
 }
-#endif
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index a184529d8fa40..1584c9e463bdf 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2360,6 +2360,8 @@ int __init acpi_scan_init(void)
 		}
 	}
 
+	acpi_turn_off_unused_power_resources();
+
 	acpi_scan_initialized = true;
 
  out:
diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h
index 1856f76ac83f7..7fe41ee489d61 100644
--- a/drivers/acpi/sleep.h
+++ b/drivers/acpi/sleep.h
@@ -8,7 +8,6 @@ extern struct list_head acpi_wakeup_device_list;
 extern struct mutex acpi_device_lock;
 
 extern void acpi_resume_power_resources(void);
-extern void acpi_turn_off_unused_power_resources(void);
 
 static inline acpi_status acpi_set_waking_vector(u32 wakeup_address)
 {
-- 
GitLab


From 67823d9dadd4dddee4b6bd075f6852b6ade5604a Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Tue, 27 Apr 2021 14:07:11 +0100
Subject: [PATCH 0212/3804] regulator: Add a routine to set the current limit
 for QCOM PMIC VBUS

Add hooks to regulator_get_current_limit_regmap() and
regulator_set_current_limit_regmap() with an accompanying map of amperages.

This lets us use the existing helper functions to map requested current
settings to register bit-map/indicies.

This change is required to elevate the default 2 Amps set by the bootloader
to 3 Amps or indeed to constrain the value lower as the system design may
dictate.

The valid range is 500 mA to 3 A in increments of 500 mA.

Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Link: https://lore.kernel.org/r/20210427130712.2005456-2-bryan.odonoghue@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom_usb_vbus-regulator.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/regulator/qcom_usb_vbus-regulator.c b/drivers/regulator/qcom_usb_vbus-regulator.c
index 457788b505720..2e627c2b6c512 100644
--- a/drivers/regulator/qcom_usb_vbus-regulator.c
+++ b/drivers/regulator/qcom_usb_vbus-regulator.c
@@ -16,13 +16,21 @@
 
 #define CMD_OTG				0x40
 #define OTG_EN				BIT(0)
+#define OTG_CURRENT_LIMIT_CFG		0x52
+#define OTG_CURRENT_LIMIT_MASK		GENMASK(2, 0)
 #define OTG_CFG				0x53
 #define OTG_EN_SRC_CFG			BIT(1)
 
+static const unsigned int curr_table[] = {
+	500000, 1000000, 1500000, 2000000, 2500000, 3000000,
+};
+
 static const struct regulator_ops qcom_usb_vbus_reg_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
 	.is_enabled = regulator_is_enabled_regmap,
+	.get_current_limit = regulator_get_current_limit_regmap,
+	.set_current_limit = regulator_set_current_limit_regmap,
 };
 
 static struct regulator_desc qcom_usb_vbus_rdesc = {
@@ -30,6 +38,8 @@ static struct regulator_desc qcom_usb_vbus_rdesc = {
 	.ops = &qcom_usb_vbus_reg_ops,
 	.owner = THIS_MODULE,
 	.type = REGULATOR_VOLTAGE,
+	.curr_table = curr_table,
+	.n_current_limits = ARRAY_SIZE(curr_table),
 };
 
 static int qcom_usb_vbus_regulator_probe(struct platform_device *pdev)
@@ -61,6 +71,8 @@ static int qcom_usb_vbus_regulator_probe(struct platform_device *pdev)
 
 	qcom_usb_vbus_rdesc.enable_reg = base + CMD_OTG;
 	qcom_usb_vbus_rdesc.enable_mask = OTG_EN;
+	qcom_usb_vbus_rdesc.csel_reg = base + OTG_CURRENT_LIMIT_CFG;
+	qcom_usb_vbus_rdesc.csel_mask = OTG_CURRENT_LIMIT_MASK;
 	config.dev = dev;
 	config.init_data = init_data;
 	config.of_node = dev->of_node;
-- 
GitLab


From 8c816d56a2a4e757bb121d1af4c04f47ac0572d3 Mon Sep 17 00:00:00 2001
From: Bartosz Dudziak <bartosz.dudziak@snejp.pl>
Date: Sun, 2 May 2021 13:53:04 +0200
Subject: [PATCH 0213/3804] regulator: qcom_smd: Add PM8226 regulator support

Add support for PM8226 regulator which is commonly used with MSM8226 SoCs.

Signed-off-by: Bartosz Dudziak <bartosz.dudziak@snejp.pl>
Link: https://lore.kernel.org/r/20210502115304.8570-2-bartosz.dudziak@snejp.pl
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom_smd-regulator.c | 83 ++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index bb944ee5fe3b1..05df7b00e3b17 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -251,6 +251,50 @@ static const struct regulator_desc pma8084_switch = {
 	.ops = &rpm_switch_ops,
 };
 
+static const struct regulator_desc pm8226_hfsmps = {
+	.linear_ranges = (struct linear_range[]) {
+		REGULATOR_LINEAR_RANGE(375000,   0,  95, 12500),
+		REGULATOR_LINEAR_RANGE(1575000, 96, 158, 25000),
+	},
+	.n_linear_ranges = 2,
+	.n_voltages = 159,
+	.ops = &rpm_smps_ldo_ops,
+};
+
+static const struct regulator_desc pm8226_ftsmps = {
+	.linear_ranges = (struct linear_range[]) {
+		REGULATOR_LINEAR_RANGE(350000,    0, 184,  5000),
+		REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000),
+	},
+	.n_linear_ranges = 2,
+	.n_voltages = 262,
+	.ops = &rpm_smps_ldo_ops,
+};
+
+static const struct regulator_desc pm8226_pldo = {
+	.linear_ranges = (struct linear_range[]) {
+		REGULATOR_LINEAR_RANGE(750000,    0,  63, 12500),
+		REGULATOR_LINEAR_RANGE(1550000,  64, 126, 25000),
+		REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
+	},
+	.n_linear_ranges = 3,
+	.n_voltages = 164,
+	.ops = &rpm_smps_ldo_ops,
+};
+
+static const struct regulator_desc pm8226_nldo = {
+	.linear_ranges = (struct linear_range[]) {
+		REGULATOR_LINEAR_RANGE(750000, 0, 63, 12500),
+	},
+	.n_linear_ranges = 1,
+	.n_voltages = 64,
+	.ops = &rpm_smps_ldo_ops,
+};
+
+static const struct regulator_desc pm8226_switch = {
+	.ops = &rpm_switch_ops,
+};
+
 static const struct regulator_desc pm8x41_hfsmps = {
 	.linear_ranges = (struct linear_range[]) {
 		REGULATOR_LINEAR_RANGE( 375000,  0,  95, 12500),
@@ -746,6 +790,44 @@ static const struct rpm_regulator_data rpm_pm8916_regulators[] = {
 	{}
 };
 
+static const struct rpm_regulator_data rpm_pm8226_regulators[] = {
+	{ "s1", QCOM_SMD_RPM_SMPA, 1, &pm8226_hfsmps, "vdd_s1" },
+	{ "s2", QCOM_SMD_RPM_SMPA, 2, &pm8226_ftsmps, "vdd_s2" },
+	{ "s3", QCOM_SMD_RPM_SMPA, 3, &pm8226_hfsmps, "vdd_s3" },
+	{ "s4", QCOM_SMD_RPM_SMPA, 4, &pm8226_hfsmps, "vdd_s4" },
+	{ "s5", QCOM_SMD_RPM_SMPA, 5, &pm8226_hfsmps, "vdd_s5" },
+	{ "l1", QCOM_SMD_RPM_LDOA, 1, &pm8226_nldo, "vdd_l1_l2_l4_l5" },
+	{ "l2", QCOM_SMD_RPM_LDOA, 2, &pm8226_nldo, "vdd_l1_l2_l4_l5" },
+	{ "l3", QCOM_SMD_RPM_LDOA, 3, &pm8226_nldo, "vdd_l3_l24_l26" },
+	{ "l4", QCOM_SMD_RPM_LDOA, 4, &pm8226_nldo, "vdd_l1_l2_l4_l5" },
+	{ "l5", QCOM_SMD_RPM_LDOA, 5, &pm8226_nldo, "vdd_l1_l2_l4_l5" },
+	{ "l6", QCOM_SMD_RPM_LDOA, 6, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" },
+	{ "l7", QCOM_SMD_RPM_LDOA, 7, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" },
+	{ "l8", QCOM_SMD_RPM_LDOA, 8, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" },
+	{ "l9", QCOM_SMD_RPM_LDOA, 9, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" },
+	{ "l10", QCOM_SMD_RPM_LDOA, 10, &pm8226_pldo, "vdd_l10_l11_l13" },
+	{ "l11", QCOM_SMD_RPM_LDOA, 11, &pm8226_pldo, "vdd_l10_l11_l13" },
+	{ "l12", QCOM_SMD_RPM_LDOA, 12, &pm8226_pldo, "vdd_l12_l14" },
+	{ "l13", QCOM_SMD_RPM_LDOA, 13, &pm8226_pldo, "vdd_l10_l11_l13" },
+	{ "l14", QCOM_SMD_RPM_LDOA, 14, &pm8226_pldo, "vdd_l12_l14" },
+	{ "l15", QCOM_SMD_RPM_LDOA, 15, &pm8226_pldo, "vdd_l15_l16_l17_l18" },
+	{ "l16", QCOM_SMD_RPM_LDOA, 16, &pm8226_pldo, "vdd_l15_l16_l17_l18" },
+	{ "l17", QCOM_SMD_RPM_LDOA, 17, &pm8226_pldo, "vdd_l15_l16_l17_l18" },
+	{ "l18", QCOM_SMD_RPM_LDOA, 18, &pm8226_pldo, "vdd_l15_l16_l17_l18" },
+	{ "l19", QCOM_SMD_RPM_LDOA, 19, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" },
+	{ "l20", QCOM_SMD_RPM_LDOA, 20, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" },
+	{ "l21", QCOM_SMD_RPM_LDOA, 21, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" },
+	{ "l22", QCOM_SMD_RPM_LDOA, 22, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" },
+	{ "l23", QCOM_SMD_RPM_LDOA, 23, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" },
+	{ "l24", QCOM_SMD_RPM_LDOA, 24, &pm8226_nldo, "vdd_l3_l24_l26" },
+	{ "l25", QCOM_SMD_RPM_LDOA, 25, &pm8226_pldo, "vdd_l25" },
+	{ "l26", QCOM_SMD_RPM_LDOA, 26, &pm8226_nldo, "vdd_l3_l24_l26" },
+	{ "l27", QCOM_SMD_RPM_LDOA, 27, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" },
+	{ "l28", QCOM_SMD_RPM_LDOA, 28, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" },
+	{ "lvs1", QCOM_SMD_RPM_VSA, 1, &pm8226_switch, "vdd_lvs1" },
+	{}
+};
+
 static const struct rpm_regulator_data rpm_pm8941_regulators[] = {
 	{ "s1", QCOM_SMD_RPM_SMPA, 1, &pm8x41_hfsmps, "vdd_s1" },
 	{ "s2", QCOM_SMD_RPM_SMPA, 2, &pm8x41_hfsmps, "vdd_s2" },
@@ -1092,6 +1174,7 @@ static const struct of_device_id rpm_of_match[] = {
 	{ .compatible = "qcom,rpm-mp5496-regulators", .data = &rpm_mp5496_regulators },
 	{ .compatible = "qcom,rpm-pm8841-regulators", .data = &rpm_pm8841_regulators },
 	{ .compatible = "qcom,rpm-pm8916-regulators", .data = &rpm_pm8916_regulators },
+	{ .compatible = "qcom,rpm-pm8226-regulators", .data = &rpm_pm8226_regulators },
 	{ .compatible = "qcom,rpm-pm8941-regulators", .data = &rpm_pm8941_regulators },
 	{ .compatible = "qcom,rpm-pm8950-regulators", .data = &rpm_pm8950_regulators },
 	{ .compatible = "qcom,rpm-pm8953-regulators", .data = &rpm_pm8953_regulators },
-- 
GitLab


From 00c8b0b1e6e1314bb57aab6438fbc2803c637d9d Mon Sep 17 00:00:00 2001
From: Bartosz Dudziak <bartosz.dudziak@snejp.pl>
Date: Sun, 2 May 2021 13:53:03 +0200
Subject: [PATCH 0214/3804] regulator: qcom: Document PM8226 smd regulator

Document the PM8226 SMD-RPM regulator entry.

Signed-off-by: Bartosz Dudziak <bartosz.dudziak@snejp.pl>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210502115304.8570-1-bartosz.dudziak@snejp.pl
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/qcom,smd-rpm-regulator.yaml           | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml
index a35c6cb9bf972..83b53579f4635 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml
@@ -24,6 +24,10 @@ description:
 
   For mp5496, s2
 
+  For pm8226, s1, s2, s3, s4, s5, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10,
+  l11, l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, l25,
+  l26, l27, l28, lvs1
+
   For pm8841, s1, s2, s3, s4, s5, s6, s7, s8
 
   For pm8916, s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11,
@@ -68,6 +72,7 @@ properties:
   compatible:
     enum:
       - qcom,rpm-mp5496-regulators
+      - qcom,rpm-pm8226-regulators
       - qcom,rpm-pm8841-regulators
       - qcom,rpm-pm8916-regulators
       - qcom,rpm-pm8941-regulators
-- 
GitLab


From 4446e6f3bd5c97c312833b445d0eb2ea638c7e98 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 30 Apr 2021 16:55:55 +0800
Subject: [PATCH 0215/3804] regulator: hi6421v600: Remove unneeded *pmic from
 struct hi6421_spmi_reg_info

Use rdev->regmap instead of pmic->regmap.
With this change, hi6421_spmi_regulator_disable can be removed and use
regulator_disable_regmap instead.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210430085555.1127994-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/hi6421v600-regulator.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c
index f6a14e9c3cbfe..feddb0b5d4f1c 100644
--- a/drivers/regulator/hi6421v600-regulator.c
+++ b/drivers/regulator/hi6421v600-regulator.c
@@ -18,7 +18,6 @@
 
 struct hi6421_spmi_reg_info {
 	struct regulator_desc	desc;
-	struct hi6421_spmi_pmic *pmic;
 	u8			eco_mode_mask;
 	u32			eco_uA;
 
@@ -98,13 +97,12 @@ static const unsigned int ldo34_voltages[] = {
 static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev)
 {
 	struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev);
-	struct hi6421_spmi_pmic *pmic = sreg->pmic;
 	int ret;
 
 	/* cannot enable more than one regulator at one time */
 	mutex_lock(&sreg->enable_mutex);
 
-	ret = regmap_update_bits(pmic->regmap, rdev->desc->enable_reg,
+	ret = regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
 				 rdev->desc->enable_mask,
 				 rdev->desc->enable_mask);
 
@@ -116,22 +114,12 @@ static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev)
 	return ret;
 }
 
-static int hi6421_spmi_regulator_disable(struct regulator_dev *rdev)
-{
-	struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev);
-	struct hi6421_spmi_pmic *pmic = sreg->pmic;
-
-	return regmap_update_bits(pmic->regmap, rdev->desc->enable_reg,
-				  rdev->desc->enable_mask, 0);
-}
-
 static unsigned int hi6421_spmi_regulator_get_mode(struct regulator_dev *rdev)
 {
 	struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev);
-	struct hi6421_spmi_pmic *pmic = sreg->pmic;
 	u32 reg_val;
 
-	regmap_read(pmic->regmap, rdev->desc->enable_reg, &reg_val);
+	regmap_read(rdev->regmap, rdev->desc->enable_reg, &reg_val);
 
 	if (reg_val & sreg->eco_mode_mask)
 		return REGULATOR_MODE_IDLE;
@@ -143,7 +131,6 @@ static int hi6421_spmi_regulator_set_mode(struct regulator_dev *rdev,
 					  unsigned int mode)
 {
 	struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev);
-	struct hi6421_spmi_pmic *pmic = sreg->pmic;
 	u32 val;
 
 	switch (mode) {
@@ -157,7 +144,7 @@ static int hi6421_spmi_regulator_set_mode(struct regulator_dev *rdev,
 		return -EINVAL;
 	}
 
-	return regmap_update_bits(pmic->regmap, rdev->desc->enable_reg,
+	return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
 				  sreg->eco_mode_mask, val);
 }
 
@@ -177,7 +164,7 @@ hi6421_spmi_regulator_get_optimum_mode(struct regulator_dev *rdev,
 static const struct regulator_ops hi6421_spmi_ldo_rops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.enable = hi6421_spmi_regulator_enable,
-	.disable = hi6421_spmi_regulator_disable,
+	.disable = regulator_disable_regmap,
 	.list_voltage = regulator_list_voltage_table,
 	.map_voltage = regulator_map_voltage_iterate,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
@@ -258,7 +245,6 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev)
 	if (!sreg)
 		return -ENOMEM;
 
-	sreg->pmic = pmic;
 	mutex_init(&sreg->enable_mutex);
 
 	for (i = 0; i < ARRAY_SIZE(regulator_info); i++) {
-- 
GitLab


From 66fe740317c82b0caa68ed8d756536d4ff7e910c Mon Sep 17 00:00:00 2001
From: Jay Fang <f.fangjian@huawei.com>
Date: Mon, 10 May 2021 14:58:20 +0800
Subject: [PATCH 0216/3804] spi: ppc4xx: include <linux/io.h> instead of
 <asm/io.h>

Include the more general linux/io.h instead of asm/io.h
as checkpatch suggests.

Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1620629903-15493-2-git-send-email-f.fangjian@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ppc4xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index d8ee363fb7145..9e3974551204d 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -34,7 +34,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/dcr.h>
 #include <asm/dcr-regs.h>
 
-- 
GitLab


From 856a9260e17129303102a7d4a5f71b7a8739e5b9 Mon Sep 17 00:00:00 2001
From: Jay Fang <f.fangjian@huawei.com>
Date: Mon, 10 May 2021 14:58:21 +0800
Subject: [PATCH 0217/3804] spi: omap-100k: Clean the value of 'status' is not
 used

An error code is set to 'status' before exiting list_for_each_entry()
loop, but the value of 'status' is not used as below:

  list_for_each_entry(t, &m->transfers, transfer_list) {
  	if (t->tx_buf == NULL && t->rx_buf == NULL && t->len) {
  		status = -EINVAL;
  		break;
  	}
  	...
  }

  status = omap1_spi100k_setup_transfer(spi, NULL);

Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1620629903-15493-3-git-send-email-f.fangjian@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-omap-100k.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c
index 7062f29022539..dc9b86b648ac0 100644
--- a/drivers/spi/spi-omap-100k.c
+++ b/drivers/spi/spi-omap-100k.c
@@ -296,7 +296,6 @@ static int omap1_spi100k_transfer_one_message(struct spi_master *master,
 
 	list_for_each_entry(t, &m->transfers, transfer_list) {
 		if (t->tx_buf == NULL && t->rx_buf == NULL && t->len) {
-			status = -EINVAL;
 			break;
 		}
 		status = omap1_spi100k_setup_transfer(spi, t);
@@ -315,7 +314,6 @@ static int omap1_spi100k_transfer_one_message(struct spi_master *master,
 			m->actual_length += count;
 
 			if (count != t->len) {
-				status = -EIO;
 				break;
 			}
 		}
-- 
GitLab


From db56d03049524114696aa7158560d8f0e064c487 Mon Sep 17 00:00:00 2001
From: Jay Fang <f.fangjian@huawei.com>
Date: Mon, 10 May 2021 14:58:22 +0800
Subject: [PATCH 0218/3804] spi: delete repeated words in comments

Drop repeated words in spi-bcm2835aux.c
{are}

Drop repeated words in spi-dw-mmio.c
{the}

Drop repeated words in spi-geni-qcom.c
{our}

Drop repeated words in spi-pl022.c
{on}

Drop repeated words in spi-ppc4xx.c
{the}

Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1620629903-15493-4-git-send-email-f.fangjian@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm2835aux.c | 2 +-
 drivers/spi/spi-dw-mmio.c    | 2 +-
 drivers/spi/spi-geni-qcom.c  | 4 ++--
 drivers/spi/spi-pl022.c      | 4 ++--
 drivers/spi/spi-ppc4xx.c     | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 75589ac6e95f9..37eab100a7d8a 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -384,7 +384,7 @@ static int bcm2835aux_spi_transfer_one(struct spi_master *master,
 	bs->pending = 0;
 
 	/* Calculate the estimated time in us the transfer runs.  Note that
-	 * there are are 2 idle clocks cycles after each chunk getting
+	 * there are 2 idle clocks cycles after each chunk getting
 	 * transferred - in our case the chunk size is 3 bytes, so we
 	 * approximate this by 9 cycles/byte.  This is used to find the number
 	 * of Hz per byte per polling limit.  E.g., we can transfer 1 byte in
diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index 17c06039a74d1..3379720cfcb8d 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -56,7 +56,7 @@ struct dw_spi_mscc {
 /*
  * The Designware SPI controller (referred to as master in the documentation)
  * automatically deasserts chip select when the tx fifo is empty. The chip
- * selects then needs to be either driven as GPIOs or, for the first 4 using the
+ * selects then needs to be either driven as GPIOs or, for the first 4 using
  * the SPI boot controller registers. the final chip select is an OR gate
  * between the Designware SPI controller and the SPI boot controller.
  */
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 3d0d8ddd57720..b3861fb88711a 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -639,8 +639,8 @@ static irqreturn_t geni_spi_isr(int irq, void *data)
 		complete(&mas->abort_done);
 
 	/*
-	 * It's safe or a good idea to Ack all of our our interrupts at the
-	 * end of the function. Specifically:
+	 * It's safe or a good idea to Ack all of our interrupts at the end
+	 * of the function. Specifically:
 	 * - M_CMD_DONE_EN / M_RX_FIFO_LAST_EN: Edge triggered interrupts and
 	 *   clearing Acks. Clearing at the end relies on nobody else having
 	 *   started a new transfer yet or else we could be clearing _their_
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 0c9e3f270f05a..feebda66f56eb 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -288,7 +288,7 @@
 #define SPI_POLLING_TIMEOUT 1000
 
 /*
- * The type of reading going on on this chip
+ * The type of reading going on this chip
  */
 enum ssp_reading {
 	READING_NULL,
@@ -298,7 +298,7 @@ enum ssp_reading {
 };
 
 /*
- * The type of writing going on on this chip
+ * The type of writing going on this chip
  */
 enum ssp_writing {
 	WRITING_NULL,
diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 9e3974551204d..76874a7cca9b5 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -326,7 +326,7 @@ static void spi_ppc4xx_enable(struct ppc4xx_spi *hw)
 {
 	/*
 	 * On all 4xx PPC's the SPI bus is shared/multiplexed with
-	 * the 2nd I2C bus. We need to enable the the SPI bus before
+	 * the 2nd I2C bus. We need to enable the SPI bus before
 	 * using it.
 	 */
 
-- 
GitLab


From 9e37a3ab0627011fb63875e9a93094b6fc8ddf48 Mon Sep 17 00:00:00 2001
From: Jay Fang <f.fangjian@huawei.com>
Date: Mon, 10 May 2021 14:58:23 +0800
Subject: [PATCH 0219/3804] spi: spi-loopback-test: Fix 'tx_buf' might be
 'rx_buf'

In function 'spi_test_run_iter': Value 'tx_buf' might be 'rx_buf'.

Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1620629903-15493-5-git-send-email-f.fangjian@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-loopback-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c
index f1cf2232f0b5e..4d4f77a186a98 100644
--- a/drivers/spi/spi-loopback-test.c
+++ b/drivers/spi/spi-loopback-test.c
@@ -875,7 +875,7 @@ static int spi_test_run_iter(struct spi_device *spi,
 		test.transfers[i].len = len;
 		if (test.transfers[i].tx_buf)
 			test.transfers[i].tx_buf += tx_off;
-		if (test.transfers[i].tx_buf)
+		if (test.transfers[i].rx_buf)
 			test.transfers[i].rx_buf += rx_off;
 	}
 
-- 
GitLab


From f2eed8caa336e31d672804a8725dadba0415f19d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Apr 2021 21:24:28 +0300
Subject: [PATCH 0220/3804] spi: pxa2xx: Use one point of return when ->probe()
 fails

When we can't allocate SPI controller, jump to the error path rather than
return locally.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210423182441.50272-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 5e59ba075bc7a..2f5618883ac3c 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1705,8 +1705,8 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 
 	if (!controller) {
 		dev_err(&pdev->dev, "cannot alloc spi_controller\n");
-		pxa_ssp_free(ssp);
-		return -ENOMEM;
+		status = -ENOMEM;
+		goto out_error_controller_alloc;
 	}
 	drv_data = spi_controller_get_devdata(controller);
 	drv_data->controller = controller;
-- 
GitLab


From 9e43c9a8d5de4810ea9688519d55b5e46784d84a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Apr 2021 21:24:29 +0300
Subject: [PATCH 0221/3804] spi: pxa2xx: Utilize MMIO and physical base from
 struct ssp_device

We have a duplication of MMIO and physical base addresses in
the struct driver_data, get rid of it and reuse members from
struct ssp_device instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210423182441.50272-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-dma.c |  4 ++--
 drivers/spi/spi-pxa2xx.c     |  4 +---
 drivers/spi/spi-pxa2xx.h     | 14 ++++----------
 3 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index 37567bc7a5232..3b27f356a18f3 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -94,14 +94,14 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 	cfg.direction = dir;
 
 	if (dir == DMA_MEM_TO_DEV) {
-		cfg.dst_addr = drv_data->ssdr_physical;
+		cfg.dst_addr = drv_data->ssp->phys_base + SSDR;
 		cfg.dst_addr_width = width;
 		cfg.dst_maxburst = chip->dma_burst_size;
 
 		sgt = &xfer->tx_sg;
 		chan = drv_data->controller->dma_tx;
 	} else {
-		cfg.src_addr = drv_data->ssdr_physical;
+		cfg.src_addr = drv_data->ssp->phys_base + SSDR;
 		cfg.src_addr_width = width;
 		cfg.src_maxburst = chip->dma_burst_size;
 
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 2f5618883ac3c..d89db682179d0 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -325,7 +325,7 @@ static void lpss_ssp_setup(struct driver_data *drv_data)
 	u32 value;
 
 	config = lpss_get_config(drv_data);
-	drv_data->lpss_base = drv_data->ioaddr + config->offset;
+	drv_data->lpss_base = drv_data->ssp->mmio_base + config->offset;
 
 	/* Enable software chip select control */
 	value = __lpss_ssp_read_priv(drv_data, config->reg_cs_ctrl);
@@ -1733,8 +1733,6 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 
 	drv_data->ssp_type = ssp->type;
 
-	drv_data->ioaddr = ssp->mmio_base;
-	drv_data->ssdr_physical = ssp->phys_base + SSDR;
 	if (pxa25x_ssp_comp(drv_data)) {
 		switch (drv_data->ssp_type) {
 		case QUARK_X1000_SSP:
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 1400472bc986c..ad9980ebefa65 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -33,10 +33,6 @@ struct driver_data {
 	/* PXA hookup */
 	struct pxa2xx_spi_controller *controller_info;
 
-	/* SSP register addresses */
-	void __iomem *ioaddr;
-	phys_addr_t ssdr_physical;
-
 	/* SSP masks*/
 	u32 dma_cr1;
 	u32 int_cr1;
@@ -87,16 +83,14 @@ struct chip_data {
 	void (*cs_control)(u32 command);
 };
 
-static inline u32 pxa2xx_spi_read(const struct driver_data *drv_data,
-				  unsigned reg)
+static inline u32 pxa2xx_spi_read(const struct driver_data *drv_data, u32 reg)
 {
-	return __raw_readl(drv_data->ioaddr + reg);
+	return pxa_ssp_read_reg(drv_data->ssp, reg);
 }
 
-static  inline void pxa2xx_spi_write(const struct driver_data *drv_data,
-				     unsigned reg, u32 val)
+static inline void pxa2xx_spi_write(const struct driver_data *drv_data, u32 reg, u32 val)
 {
-	__raw_writel(val, drv_data->ioaddr + reg);
+	pxa_ssp_write_reg(drv_data->ssp, reg, val);
 }
 
 #define DMA_ALIGNMENT		8
-- 
GitLab


From c3dce24c40cc7cd07deca5b81b763eae66f30856 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Apr 2021 21:24:30 +0300
Subject: [PATCH 0222/3804] spi: pxa2xx: Utilize struct device from struct
 ssp_device

We have a duplication of struct device in the struct driver_data,
get rid of it and reuse member from struct ssp_device instead.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210423182441.50272-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-dma.c | 12 +++++-------
 drivers/spi/spi-pxa2xx-pci.c |  1 +
 drivers/spi/spi-pxa2xx.c     | 12 +++++-------
 drivers/spi/spi-pxa2xx.h     |  4 ----
 4 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index 3b27f356a18f3..2e4a49567146c 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -111,7 +111,7 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 
 	ret = dmaengine_slave_config(chan, &cfg);
 	if (ret) {
-		dev_warn(&drv_data->pdev->dev, "DMA slave config failed\n");
+		dev_warn(drv_data->ssp->dev, "DMA slave config failed\n");
 		return NULL;
 	}
 
@@ -125,7 +125,7 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data)
 
 	status = pxa2xx_spi_read(drv_data, SSSR) & drv_data->mask_sr;
 	if (status & SSSR_ROR) {
-		dev_err(&drv_data->pdev->dev, "FIFO overrun\n");
+		dev_err(drv_data->ssp->dev, "FIFO overrun\n");
 
 		dmaengine_terminate_async(drv_data->controller->dma_rx);
 		dmaengine_terminate_async(drv_data->controller->dma_tx);
@@ -145,16 +145,14 @@ int pxa2xx_spi_dma_prepare(struct driver_data *drv_data,
 
 	tx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_MEM_TO_DEV, xfer);
 	if (!tx_desc) {
-		dev_err(&drv_data->pdev->dev,
-			"failed to get DMA TX descriptor\n");
+		dev_err(drv_data->ssp->dev, "failed to get DMA TX descriptor\n");
 		err = -EBUSY;
 		goto err_tx;
 	}
 
 	rx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_DEV_TO_MEM, xfer);
 	if (!rx_desc) {
-		dev_err(&drv_data->pdev->dev,
-			"failed to get DMA RX descriptor\n");
+		dev_err(drv_data->ssp->dev, "failed to get DMA RX descriptor\n");
 		err = -EBUSY;
 		goto err_rx;
 	}
@@ -191,8 +189,8 @@ void pxa2xx_spi_dma_stop(struct driver_data *drv_data)
 int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
 {
 	struct pxa2xx_spi_controller *pdata = drv_data->controller_info;
-	struct device *dev = &drv_data->pdev->dev;
 	struct spi_controller *controller = drv_data->controller;
+	struct device *dev = drv_data->ssp->dev;
 	dma_cap_mask_t mask;
 
 	dma_cap_zero(mask);
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 1833f5876e9fa..f588fad77fc09 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -239,6 +239,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	spi_pdata.dma_burst_size = c->dma_burst_size ? c->dma_burst_size : 1;
 
 	ssp = &spi_pdata.ssp;
+	ssp->dev = &dev->dev;
 	ssp->phys_base = pci_resource_start(dev, 0);
 	ssp->mmio_base = pcim_iomap_table(dev)[0];
 	ssp->port_id = (c->port_id >= 0) ? c->port_id : dev->devfn;
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index d89db682179d0..0f3f7d7259374 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -628,7 +628,7 @@ static void int_error_stop(struct driver_data *drv_data, const char *msg)
 	pxa2xx_spi_flush(drv_data);
 	pxa2xx_spi_off(drv_data);
 
-	dev_err(&drv_data->pdev->dev, "%s\n", msg);
+	dev_err(drv_data->ssp->dev, "%s\n", msg);
 
 	drv_data->controller->cur_msg->status = -EIO;
 	spi_finalize_current_transfer(drv_data->controller);
@@ -731,8 +731,7 @@ static void handle_bad_msg(struct driver_data *drv_data)
 		pxa2xx_spi_write(drv_data, SSTO, 0);
 	write_SSSR_CS(drv_data, drv_data->clear_sr);
 
-	dev_err(&drv_data->pdev->dev,
-		"bad message state in interrupt handler\n");
+	dev_err(drv_data->ssp->dev, "bad message state in interrupt handler\n");
 }
 
 static irqreturn_t ssp_int(int irq, void *dev_id)
@@ -748,7 +747,7 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
 	 * the IRQ was not for us (we shouldn't be RPM suspended when the
 	 * interrupt is enabled).
 	 */
-	if (pm_runtime_suspended(&drv_data->pdev->dev))
+	if (pm_runtime_suspended(drv_data->ssp->dev))
 		return IRQ_NONE;
 
 	/*
@@ -1158,7 +1157,7 @@ static int pxa2xx_spi_slave_abort(struct spi_controller *controller)
 	pxa2xx_spi_flush(drv_data);
 	pxa2xx_spi_off(drv_data);
 
-	dev_dbg(&drv_data->pdev->dev, "transfer aborted\n");
+	dev_dbg(drv_data->ssp->dev, "transfer aborted\n");
 
 	drv_data->controller->cur_msg->status = -EINTR;
 	spi_finalize_current_transfer(drv_data->controller);
@@ -1645,7 +1644,7 @@ static int pxa2xx_spi_fw_translate_cs(struct spi_controller *controller,
 {
 	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
-	if (has_acpi_companion(&drv_data->pdev->dev)) {
+	if (has_acpi_companion(drv_data->ssp->dev)) {
 		switch (drv_data->ssp_type) {
 		/*
 		 * For Atoms the ACPI DeviceSelection used by the Windows
@@ -1711,7 +1710,6 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	drv_data = spi_controller_get_devdata(controller);
 	drv_data->controller = controller;
 	drv_data->controller_info = platform_info;
-	drv_data->pdev = pdev;
 	drv_data->ssp = ssp;
 
 	controller->dev.of_node = pdev->dev.of_node;
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index ad9980ebefa65..6724d7e056ce6 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -12,7 +12,6 @@
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
-#include <linux/platform_device.h>
 #include <linux/pxa2xx_ssp.h>
 #include <linux/scatterlist.h>
 #include <linux/sizes.h>
@@ -20,9 +19,6 @@
 #include <linux/spi/pxa2xx_spi.h>
 
 struct driver_data {
-	/* Driver model hookup */
-	struct platform_device *pdev;
-
 	/* SSP Info */
 	struct ssp_device *ssp;
 
-- 
GitLab


From 0e4768713e71dd224633fd7e00ad358bc48f433a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Apr 2021 21:24:31 +0300
Subject: [PATCH 0223/3804] spi: pxa2xx: Replace header inclusions by forward
 declarations

When the data structure is only referred by pointer, compiler may not need
to see the contents of the data type. Thus, we may replace header inclusions
by respective forward declarations. Due to above add missed headers as well.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210423182441.50272-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-dma.c   |  4 ++--
 drivers/spi/spi-pxa2xx-pci.c   |  1 +
 drivers/spi/spi-pxa2xx.c       |  2 ++
 drivers/spi/spi-pxa2xx.h       | 18 ++++++++++--------
 include/linux/spi/pxa2xx_spi.h |  2 ++
 5 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index 2e4a49567146c..e00dbadd39ecb 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -9,11 +9,11 @@
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
-#include <linux/pxa2xx_ssp.h>
 #include <linux/scatterlist.h>
 #include <linux/sizes.h>
-#include <linux/spi/spi.h>
+
 #include <linux/spi/pxa2xx_spi.h>
+#include <linux/spi/spi.h>
 
 #include "spi-pxa2xx.h"
 
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index f588fad77fc09..a259be12d3265 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
+
 #include <linux/spi/pxa2xx_spi.h>
 
 #include <linux/dmaengine.h>
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 0f3f7d7259374..1d4c7f4217ede 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -9,6 +9,7 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/device.h>
+#include <linux/dmaengine.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/gpio/consumer.h>
@@ -25,6 +26,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
 #include <linux/slab.h>
+
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/spi/spi.h>
 
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 6724d7e056ce6..739e264feaa69 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -7,16 +7,18 @@
 #ifndef SPI_PXA2XX_H
 #define SPI_PXA2XX_H
 
-#include <linux/atomic.h>
-#include <linux/dmaengine.h>
-#include <linux/errno.h>
-#include <linux/io.h>
 #include <linux/interrupt.h>
-#include <linux/pxa2xx_ssp.h>
-#include <linux/scatterlist.h>
+#include <linux/io.h>
+#include <linux/types.h>
 #include <linux/sizes.h>
-#include <linux/spi/spi.h>
-#include <linux/spi/pxa2xx_spi.h>
+
+#include <linux/pxa2xx_ssp.h>
+
+struct gpio_desc;
+struct pxa2xx_spi_controller;
+struct spi_controller;
+struct spi_device;
+struct spi_transfer;
 
 struct driver_data {
 	/* SSP Info */
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 31f00c7f4f59d..1e0e2f136319f 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -5,6 +5,8 @@
 #ifndef __linux_pxa2xx_spi_h
 #define __linux_pxa2xx_spi_h
 
+#include <linux/types.h>
+
 #include <linux/pxa2xx_ssp.h>
 
 #define PXA2XX_CS_ASSERT (0x01)
-- 
GitLab


From 5edc24901f4d469f8fc943004f73655933e89dbf Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Apr 2021 21:24:32 +0300
Subject: [PATCH 0224/3804] spi: pxa2xx: Unify ifdeffery used in the headers

The two headers have quite different ifdeffery to prevent multiple inclusion.
Unify them with the pattern that in particular reflects their location.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210423182441.50272-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h     | 6 +++---
 include/linux/spi/pxa2xx_spi.h | 7 ++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 7f73b26ed22e4..14b049840faff 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -11,8 +11,8 @@
  *       PXA3xx     SSP1, SSP2, SSP3, SSP4
  */
 
-#ifndef __LINUX_SSP_H
-#define __LINUX_SSP_H
+#ifndef __LINUX_PXA2XX_SSP_H
+#define __LINUX_PXA2XX_SSP_H
 
 #include <linux/bits.h>
 #include <linux/compiler_types.h>
@@ -270,4 +270,4 @@ static inline struct ssp_device *pxa_ssp_request_of(const struct device_node *n,
 static inline void pxa_ssp_free(struct ssp_device *ssp) {}
 #endif
 
-#endif
+#endif	/* __LINUX_PXA2XX_SSP_H */
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 1e0e2f136319f..12ef04d0896d2 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -2,8 +2,8 @@
 /*
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
  */
-#ifndef __linux_pxa2xx_spi_h
-#define __linux_pxa2xx_spi_h
+#ifndef __LINUX_SPI_PXA2XX_SPI_H
+#define __LINUX_SPI_PXA2XX_SPI_H
 
 #include <linux/types.h>
 
@@ -51,4 +51,5 @@ struct pxa2xx_spi_chip {
 extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_controller *info);
 
 #endif
-#endif
+
+#endif	/* __LINUX_SPI_PXA2XX_SPI_H */
-- 
GitLab


From 1beb37b0e3f98708bfb37778049764b4500756da Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Apr 2021 21:24:33 +0300
Subject: [PATCH 0225/3804] spi: pxa2xx: Group Intel Quark specific definitions

DDS_RATE is Intel Quark specific definition. Move it to the rest
Intel Quark related.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210423182441.50272-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pxa2xx_ssp.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 14b049840faff..1b6c1a0922bd2 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -38,7 +38,6 @@ struct device_node;
 #define SSDR		(0x10)  /* SSP Data Write/Data Read Register */
 
 #define SSTO		(0x28)  /* SSP Time Out Register */
-#define DDS_RATE	(0x28)  /* SSP DDS Clock Rate Register (Intel Quark) */
 #define SSPSP		(0x2C)  /* SSP Programmable Serial Protocol */
 #define SSTSA		(0x30)  /* SSP Tx Timeslot Active */
 #define SSRSA		(0x34)  /* SSP Rx Timeslot Active */
@@ -105,6 +104,9 @@ struct device_node;
 #define CE4100_SSCR1_RFT	GENMASK(11, 10)	/* Receive FIFO Threshold (mask) */
 #define CE4100_SSCR1_RxTresh(x) (((x) - 1) << 10)	/* level [1..4] */
 
+/* Intel Quark X1000 */
+#define DDS_RATE		0x28		 /* SSP DDS Clock Rate Register */
+
 /* QUARK_X1000 SSCR0 bit definition */
 #define QUARK_X1000_SSCR0_DSS		GENMASK(4, 0)	/* Data Size Select (mask) */
 #define QUARK_X1000_SSCR0_DataSize(x)	((x) - 1)	/* Data Size Select [4..32] */
-- 
GitLab


From 026a1dc1af52742c5897e64a3431445371a71871 Mon Sep 17 00:00:00 2001
From: Jay Fang <f.fangjian@huawei.com>
Date: Thu, 6 May 2021 15:08:08 +0800
Subject: [PATCH 0226/3804] spi: spi-topcliff-pch: Fix potential double free in
 pch_spi_process_messages()

pch_spi_set_tx() frees data->pkt_tx_buff on failure of kzalloc() for
data->pkt_rx_buff, but its caller, pch_spi_process_messages(), will
free data->pkt_tx_buff again. Set data->pkt_tx_buff to NULL after
kfree() to avoid double free.

Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1620284888-65215-1-git-send-email-f.fangjian@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-topcliff-pch.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index b8870784fc6ef..8c4615b763398 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -580,8 +580,10 @@ static void pch_spi_set_tx(struct pch_spi_data *data, int *bpw)
 	data->pkt_tx_buff = kzalloc(size, GFP_KERNEL);
 	if (data->pkt_tx_buff != NULL) {
 		data->pkt_rx_buff = kzalloc(size, GFP_KERNEL);
-		if (!data->pkt_rx_buff)
+		if (!data->pkt_rx_buff) {
 			kfree(data->pkt_tx_buff);
+			data->pkt_tx_buff = NULL;
+		}
 	}
 
 	if (!data->pkt_rx_buff) {
-- 
GitLab


From 029d32a892a860017d33ff8d9598259731e776ad Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 6 May 2021 13:52:59 +0200
Subject: [PATCH 0227/3804] spi: dw-apb-ssi: Integrate Renesas RZ/N1 SPI
 controller

Originally, the Renesas RZ/N1 SPI Controller DT bindings were not
integrated in the main DT bindings for the Synopsys DesignWare
Synchronous Serial Interface, but in its own file, as the RZ/N1
controller has additional registers for software CS control and DMA.

As so far DMA is not supported on RZ/N1, and json-schema can handle any
possible differences fine, integrate the RZ/N1 compatible values in the
main DT bindings for the Synopsys DW SSI.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/aef15aa119ed02487ded4691141678bc1040c3b4.1620301936.git.geert+renesas@glider.be
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/renesas,rzn1-spi.txt      | 11 -----------
 .../devicetree/bindings/spi/snps,dw-apb-ssi.yaml      |  6 ++++++
 2 files changed, 6 insertions(+), 11 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt

diff --git a/Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt b/Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt
deleted file mode 100644
index fb1a6728638d3..0000000000000
--- a/Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-Renesas RZ/N1 SPI Controller
-
-This controller is based on the Synopsys DW Synchronous Serial Interface and
-inherits all properties defined in snps,dw-apb-ssi.txt except for the
-compatible property.
-
-Required properties:
-- compatible : The device specific string followed by the generic RZ/N1 string.
-   Therefore it must be one of:
-   "renesas,r9a06g032-spi", "renesas,rzn1-spi"
-   "renesas,r9a06g033-spi", "renesas,rzn1-spi"
diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
index 4825157cd92e8..ca91201a99269 100644
--- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
+++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml
@@ -67,6 +67,12 @@ properties:
         const: baikal,bt1-sys-ssi
       - description: Canaan Kendryte K210 SoS SPI Controller
         const: canaan,k210-spi
+      - description: Renesas RZ/N1 SPI Controller
+        items:
+          - enum:
+              - renesas,r9a06g032-spi # RZ/N1D
+              - renesas,r9a06g033-spi # RZ/N1S
+          - const: renesas,rzn1-spi   # RZ/N1
 
   reg:
     minItems: 1
-- 
GitLab


From e7a1a3abea373e41ba7dfe0fbc93cb79b6a3a529 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Thu, 29 Apr 2021 19:20:48 +0800
Subject: [PATCH 0228/3804] spi: omap-100k: Fix the length judgment problem

word_len should be checked in the omap1_spi100k_setup_transfer
function to see if it exceeds 32.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Link: https://lore.kernel.org/r/1619695248-39045-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-omap-100k.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c
index 7062f29022539..f104470605b38 100644
--- a/drivers/spi/spi-omap-100k.c
+++ b/drivers/spi/spi-omap-100k.c
@@ -241,7 +241,7 @@ static int omap1_spi100k_setup_transfer(struct spi_device *spi,
 	else
 		word_len = spi->bits_per_word;
 
-	if (spi->bits_per_word > 32)
+	if (word_len > 32)
 		return -EINVAL;
 	cs->word_len = word_len;
 
-- 
GitLab


From 86b1d8ecb5f1f271a660ce0b882658447f85904a Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 7 May 2021 17:07:59 +0200
Subject: [PATCH 0229/3804] spi: tegra114: Fix an error message

'ret' is known to be 0 here.
No error code is available, so just remove it from the error message.

Fixes: f333a331ad ("spi/tegra114: add spi driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/e2593974c9484b7055177ad0c9237c8e343946be.1620399829.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra114.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index a2e5907276e7f..5131141bbf0d4 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -1071,8 +1071,7 @@ static int tegra_spi_transfer_one_message(struct spi_master *master,
 		ret = wait_for_completion_timeout(&tspi->xfer_completion,
 						SPI_DMA_TIMEOUT);
 		if (WARN_ON(ret == 0)) {
-			dev_err(tspi->dev,
-				"spi transfer timeout, err %d\n", ret);
+			dev_err(tspi->dev, "spi transfer timeout\n");
 			if (tspi->is_curr_dma_xfer &&
 			    (tspi->cur_direction & DATA_DIR_TX))
 				dmaengine_terminate_all(tspi->tx_dma_chan);
-- 
GitLab


From 665a990fdbea66a4d2af0287420f8266631be2ab Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 7 May 2021 18:26:39 +0200
Subject: [PATCH 0230/3804] spi: tegra210-quad: Fix an error message

'ret' is known to be 0 here.
No error code is available, so just remove it from the error message.

Fixes: 921fc1838fb0 ("spi: tegra210-quad: Add support for Tegra210 QSPI controller")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/b990c1bb5830196142c3d70e3e3c6c0245a7e75f.1620404705.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra210-quad.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c
index 2f806f4b2c34e..2354ca1e38581 100644
--- a/drivers/spi/spi-tegra210-quad.c
+++ b/drivers/spi/spi-tegra210-quad.c
@@ -1028,7 +1028,7 @@ static int tegra_qspi_transfer_one_message(struct spi_master *master, struct spi
 		ret = wait_for_completion_timeout(&tqspi->xfer_completion,
 						  QSPI_DMA_TIMEOUT);
 		if (WARN_ON(ret == 0)) {
-			dev_err(tqspi->dev, "transfer timeout: %d\n", ret);
+			dev_err(tqspi->dev, "transfer timeout\n");
 			if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_TX))
 				dmaengine_terminate_all(tqspi->tx_dma_chan);
 			if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_RX))
-- 
GitLab


From 14b6cff54edaca5740068e9ed070152727ed7718 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 22 Apr 2021 17:26:19 +0200
Subject: [PATCH 0231/3804] staging: rtl8723bs: avoid bogus gcc warning

gcc gets confused by some of the type casts and produces an
apparently senseless warning about an out-of-bound memcpy to
an unrelated array in the same structure:

drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c: In function 'rtw_cfg80211_ap_set_encryption':
cc1: error: writing 8 bytes into a region of size 0 [-Werror=stringop-overflow=]
In file included from drivers/staging/rtl8723bs/include/drv_types.h:32,
                 from drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c:10:
drivers/staging/rtl8723bs/include/rtw_security.h:98:15: note: at offset [184, 4264] into destination object 'dot11AuthAlgrthm' of size 4
   98 |         u32   dot11AuthAlgrthm;         /*  802.11 auth, could be open, shared, 8021x and authswitch */
      |               ^~~~~~~~~~~~~~~~
cc1: error: writing 8 bytes into a region of size 0 [-Werror=stringop-overflow=]
drivers/staging/rtl8723bs/include/rtw_security.h:98:15: note: at offset [264, 4344] into destination object 'dot11AuthAlgrthm' of size 4

This is a known gcc bug, and the patch here is only a workaround,
but the approach of using a temporary variable to hold a pointer
to the key also improves readability in addition to avoiding the
warning, so overall this should still help.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99673
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20210422152648.2891996-1-arnd@kernel.org
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 23 +++++++++++--------
 .../staging/rtl8723bs/os_dep/ioctl_linux.c    | 21 +++++++++--------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index c1dac6eec59f5..a6d731e959a28 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -527,6 +527,9 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 	struct security_priv *psecuritypriv =  &(padapter->securitypriv);
 	struct sta_priv *pstapriv = &padapter->stapriv;
+	char *grpkey = padapter->securitypriv.dot118021XGrpKey[param->u.crypt.idx].skey;
+	char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey;
+	char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey;
 
 	param->u.crypt.err = 0;
 	param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0';
@@ -609,7 +612,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 		{
 			if (strcmp(param->u.crypt.alg, "WEP") == 0)
 			{
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 				psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
 				if (param->u.crypt.key_len == 13)
@@ -622,12 +625,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 			{
 				psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
 
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 				/* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */
 				/* set mic key */
-				memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8);
-				memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8);
+				memcpy(txkey, &(param->u.crypt.key[16]), 8);
+				memcpy(rxkey, &(param->u.crypt.key[24]), 8);
 
 				psecuritypriv->busetkipkey = true;
 
@@ -636,7 +639,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 			{
 				psecuritypriv->dot118021XGrpPrivacy = _AES_;
 
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 			}
 			else
 			{
@@ -713,7 +716,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 			{
 				if (strcmp(param->u.crypt.alg, "WEP") == 0)
 				{
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 					psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
 					if (param->u.crypt.key_len == 13)
@@ -725,12 +728,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 				{
 					psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
 
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 					/* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */
 					/* set mic key */
-					memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8);
-					memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8);
+					memcpy(txkey, &(param->u.crypt.key[16]), 8);
+					memcpy(rxkey, &(param->u.crypt.key[24]), 8);
 
 					psecuritypriv->busetkipkey = true;
 
@@ -739,7 +742,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa
 				{
 					psecuritypriv->dot118021XGrpPrivacy = _AES_;
 
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 				}
 				else
 				{
diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
index e98e5388d5c7b..5088c3731b6df 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
@@ -2963,6 +2963,9 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 	struct security_priv *psecuritypriv = &(padapter->securitypriv);
 	struct sta_priv *pstapriv = &padapter->stapriv;
+	char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey;
+	char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey;
+	char *grpkey = psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey;
 
 	param->u.crypt.err = 0;
 	param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0';
@@ -3064,7 +3067,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 	if (!psta && check_fwstate(pmlmepriv, WIFI_AP_STATE)) { /*  group key */
 		if (param->u.crypt.set_tx == 1) {
 			if (strcmp(param->u.crypt.alg, "WEP") == 0) {
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 				psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
 				if (param->u.crypt.key_len == 13)
@@ -3073,11 +3076,11 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 			} else if (strcmp(param->u.crypt.alg, "TKIP") == 0) {
 				psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
 
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 				/* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */
 				/* set mic key */
-				memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8);
+				memcpy(txkey, &(param->u.crypt.key[16]), 8);
 				memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8);
 
 				psecuritypriv->busetkipkey = true;
@@ -3086,7 +3089,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 			else if (strcmp(param->u.crypt.alg, "CCMP") == 0) {
 				psecuritypriv->dot118021XGrpPrivacy = _AES_;
 
-				memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+				memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 			} else {
 				psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_;
 			}
@@ -3142,7 +3145,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 
 			} else { /* group key??? */
 				if (strcmp(param->u.crypt.alg, "WEP") == 0) {
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 					psecuritypriv->dot118021XGrpPrivacy = _WEP40_;
 					if (param->u.crypt.key_len == 13)
@@ -3150,19 +3153,19 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param,
 				} else if (strcmp(param->u.crypt.alg, "TKIP") == 0) {
 					psecuritypriv->dot118021XGrpPrivacy = _TKIP_;
 
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 
 					/* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */
 					/* set mic key */
-					memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8);
-					memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8);
+					memcpy(txkey, &(param->u.crypt.key[16]), 8);
+					memcpy(rxkey, &(param->u.crypt.key[24]), 8);
 
 					psecuritypriv->busetkipkey = true;
 
 				} else if (strcmp(param->u.crypt.alg, "CCMP") == 0) {
 					psecuritypriv->dot118021XGrpPrivacy = _AES_;
 
-					memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
+					memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len));
 				} else {
 					psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_;
 				}
-- 
GitLab


From cabb1bb60e88ccaaa122ba01862403cd44e8e8f8 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 26 Apr 2021 19:55:58 +0200
Subject: [PATCH 0232/3804] mmc: meson-gx: make replace WARN_ONCE with
 dev_warn_once about scatterlist offset alignment

Some drivers like ath10k can sometimg give an sg buffer with an offset whose alignment
is not compatible with the Amlogic DMA descriptor engine requirements.

Simply replace with dev_warn_once() to inform user this should be fixed to avoid
degraded performance.

This should be ultimately fixed in ath10k, but since it's only a performance issue
the warning should be removed.

Fixes: 79ed05e329c3 ("mmc: meson-gx: add support for descriptor chain mode")
Cc: stable@vger.kernel.org
Reported-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20210426175559.3110575-1-narmstrong@baylibre.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/meson-gx-mmc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index b8b771b643cc8..1c61f0f24c09b 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -258,7 +258,9 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc,
 	for_each_sg(data->sg, sg, data->sg_len, i) {
 		/* check for 8 byte alignment */
 		if (sg->offset % 8) {
-			WARN_ONCE(1, "unaligned scatterlist buffer\n");
+			dev_warn_once(mmc_dev(mmc),
+				      "unaligned sg offset %u, disabling descriptor DMA for transfer\n",
+				      sg->offset);
 			return;
 		}
 	}
-- 
GitLab


From 9b81354d7ebc1fd17f666a168dcabf27dae290bd Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 26 Apr 2021 19:55:59 +0200
Subject: [PATCH 0233/3804] mmc: meson-gx: also check SD_IO_RW_EXTENDED for
 scatterlist size alignment

The brcmfmac driver can generate a scatterlist from a skb with each packets
not aligned to the block size. This is not supported by the Amlogic Descriptor
dma engine where each descriptor must match a multiple of the block size.

The sg list is valid, since the sum of the sg buffers is a multiple of the
block size, but we must discard those when in SD_IO_RW_EXTENDED mode since
SDIO block mode can be used under the hood even with data->blocks == 1.

Those transfers are very rare, thus can be replaced by a bounce buffer
without real performance loss.

Fixes: 7412dee9f1fd ("mmc: meson-gx: replace WARN_ONCE with dev_warn_once about scatterlist size alignment in block mode")
Cc: stable@vger.kernel.org
Reported-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lore.kernel.org/r/20210426175559.3110575-2-narmstrong@baylibre.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/meson-gx-mmc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 1c61f0f24c09b..016a6106151a5 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -236,7 +236,8 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc,
 	if (host->dram_access_quirk)
 		return;
 
-	if (data->blocks > 1) {
+	/* SD_IO_RW_EXTENDED (CMD53) can also use block mode under the hood */
+	if (data->blocks > 1 || mrq->cmd->opcode == SD_IO_RW_EXTENDED) {
 		/*
 		 * In block mode DMA descriptor format, "length" field indicates
 		 * number of blocks and there is no way to pass DMA size that
-- 
GitLab


From a1149a6c06ee094a6e62886b0c0e8e66967a728a Mon Sep 17 00:00:00 2001
From: Daniel Beer <dlbeer@gmail.com>
Date: Sat, 24 Apr 2021 20:16:52 +1200
Subject: [PATCH 0234/3804] mmc: sdhci-pci-gli: increase 1.8V regulator wait

Inserting an SD-card on an Intel NUC10i3FNK4 (which contains a GL9755)
results in the message:

    mmc0: 1.8V regulator output did not become stable

Following this message, some cards work (sometimes), but most cards fail
with EILSEQ. This behaviour is observed on Debian 10 running kernel
4.19.188, but also with 5.8.18 and 5.11.15.

The driver currently waits 5ms after switching on the 1.8V regulator for
it to become stable. Increasing this to 10ms gets rid of the warning
about stability, but most cards still fail. Increasing it to 20ms gets
some cards working (a 32GB Samsung micro SD works, a 128GB ADATA
doesn't). At 50ms, the ADATA works most of the time, and at 100ms both
cards work reliably.

Signed-off-by: Daniel Beer <dlbeer@gmail.com>
Acked-by: Ben Chuang <benchuanggli@gmail.com>
Fixes: e51df6ce668a ("mmc: host: sdhci-pci: Add Genesys Logic GL975x support")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210424081652.GA16047@nyquist.nev
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-gli.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index 592d79082f58c..061618aa247f5 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -627,8 +627,13 @@ static void sdhci_gli_voltage_switch(struct sdhci_host *host)
 	 *
 	 * Wait 5ms after set 1.8V signal enable in Host Control 2 register
 	 * to ensure 1.8V signal enable bit is set by GL9750/GL9755.
+	 *
+	 * ...however, the controller in the NUC10i3FNK4 (a 9755) requires
+	 * slightly longer than 5ms before the control register reports that
+	 * 1.8V is ready, and far longer still before the card will actually
+	 * work reliably.
 	 */
-	usleep_range(5000, 5500);
+	usleep_range(100000, 110000);
 }
 
 static void sdhci_gl9750_reset(struct sdhci_host *host, u8 mask)
-- 
GitLab


From 18abf874367456540846319574864e6ff32752e2 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Mon, 26 Apr 2021 11:26:22 +0200
Subject: [PATCH 0235/3804] cdc-wdm: untangle a circular dependency between
 callback and softint

We have a cycle of callbacks scheduling works which submit
URBs with those callbacks. This needs to be blocked, stopped
and unblocked to untangle the circle.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Link: https://lore.kernel.org/r/20210426092622.20433-1-oneukum@suse.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-wdm.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 508b1c3f8b731..d1e4a7379bebd 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -321,12 +321,23 @@ exit:
 
 }
 
-static void kill_urbs(struct wdm_device *desc)
+static void poison_urbs(struct wdm_device *desc)
 {
 	/* the order here is essential */
-	usb_kill_urb(desc->command);
-	usb_kill_urb(desc->validity);
-	usb_kill_urb(desc->response);
+	usb_poison_urb(desc->command);
+	usb_poison_urb(desc->validity);
+	usb_poison_urb(desc->response);
+}
+
+static void unpoison_urbs(struct wdm_device *desc)
+{
+	/*
+	 *  the order here is not essential
+	 *  it is symmetrical just to be nice
+	 */
+	usb_unpoison_urb(desc->response);
+	usb_unpoison_urb(desc->validity);
+	usb_unpoison_urb(desc->command);
 }
 
 static void free_urbs(struct wdm_device *desc)
@@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file)
 	if (!desc->count) {
 		if (!test_bit(WDM_DISCONNECTING, &desc->flags)) {
 			dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n");
-			kill_urbs(desc);
+			poison_urbs(desc);
 			spin_lock_irq(&desc->iuspin);
 			desc->resp_count = 0;
 			spin_unlock_irq(&desc->iuspin);
 			desc->manage_power(desc->intf, 0);
+			unpoison_urbs(desc);
 		} else {
 			/* must avoid dev_printk here as desc->intf is invalid */
 			pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__);
@@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf)
 	wake_up_all(&desc->wait);
 	mutex_lock(&desc->rlock);
 	mutex_lock(&desc->wlock);
+	poison_urbs(desc);
 	cancel_work_sync(&desc->rxwork);
 	cancel_work_sync(&desc->service_outs_intr);
-	kill_urbs(desc);
 	mutex_unlock(&desc->wlock);
 	mutex_unlock(&desc->rlock);
 
@@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message)
 		set_bit(WDM_SUSPENDING, &desc->flags);
 		spin_unlock_irq(&desc->iuspin);
 		/* callback submits work - order is essential */
-		kill_urbs(desc);
+		poison_urbs(desc);
 		cancel_work_sync(&desc->rxwork);
 		cancel_work_sync(&desc->service_outs_intr);
+		unpoison_urbs(desc);
 	}
 	if (!PMSG_IS_AUTO(message)) {
 		mutex_unlock(&desc->wlock);
@@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf)
 	wake_up_all(&desc->wait);
 	mutex_lock(&desc->rlock);
 	mutex_lock(&desc->wlock);
-	kill_urbs(desc);
+	poison_urbs(desc);
 	cancel_work_sync(&desc->rxwork);
 	cancel_work_sync(&desc->service_outs_intr);
 	return 0;
@@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf)
 	struct wdm_device *desc = wdm_find_device(intf);
 	int rv;
 
+	unpoison_urbs(desc);
 	clear_bit(WDM_OVERFLOW, &desc->flags);
 	clear_bit(WDM_RESETTING, &desc->flags);
 	rv = recover_from_urb_loss(desc);
-- 
GitLab


From 04357fafea9c7ed34525eb9680c760245c3bb958 Mon Sep 17 00:00:00 2001
From: Ferry Toth <ftoth@exalondelft.nl>
Date: Sun, 25 Apr 2021 17:09:47 +0200
Subject: [PATCH 0236/3804] usb: dwc3: pci: Enable usb2-gadget-lpm-disable for
 Intel Merrifield

On Intel Merrifield LPM is causing host to reset port after a timeout.
By disabling LPM entirely this is prevented.

Fixes: 066c09593454 ("usb: dwc3: pci: Enable extcon driver for Intel Merrifield")
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Ferry Toth <ftoth@exalondelft.nl>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210425150947.5862-1-ftoth@exalondelft.nl
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index e7b932dcbf820..1e51460938b83 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -123,6 +123,7 @@ static const struct property_entry dwc3_pci_mrfld_properties[] = {
 	PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"),
 	PROPERTY_ENTRY_BOOL("snps,dis_u3_susphy_quirk"),
 	PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"),
+	PROPERTY_ENTRY_BOOL("snps,usb2-gadget-lpm-disable"),
 	PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"),
 	{}
 };
-- 
GitLab


From 9cbc7eb17cdf6d1adaa2aebfe0079077d31d39a9 Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Mon, 26 Apr 2021 14:08:40 -0700
Subject: [PATCH 0237/3804] usb: dwc3: core: Add missing GHWPARAMS9 doc

Add missing documentation for struct dwc3_hwparams new field hwparams9
to avoid kernel doc build warning.

Fixes: 16710380d3aa ("usb: dwc3: Capture new capability register GHWPARAMS9")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://lore.kernel.org/r/f4c491f7614e623755fafe640b7e690e7c5634e2.1619471127.git.Thinh.Nguyen@synopsys.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index b1e875c58f20f..3859d8cad3cb4 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -850,6 +850,7 @@ struct dwc3_trb {
  * @hwparams6: GHWPARAMS6
  * @hwparams7: GHWPARAMS7
  * @hwparams8: GHWPARAMS8
+ * @hwparams9: GHWPARAMS9
  */
 struct dwc3_hwparams {
 	u32	hwparams0;
-- 
GitLab


From 6c05cdbb9ef1de0264cac9135f6e90dad1e8763f Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Sun, 25 Apr 2021 12:32:53 -0300
Subject: [PATCH 0238/3804] usb: Restore the reference to ch9.h

Keep the textual reference to ch9.h as it was prior to commit
caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location").

As linux/usb/ch9.h does not contain comments anymore, explain
that drivers/usb/common/common.c includes such header and provides
declarations of a few utilities routines for manipulating the data types
from ch9.h. Also mention that drivers/usb/common/debug.c contains
some functions for creating debug output.

Fixes: caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location")
Reported-by: Alan Stern <stern@rowland.harvard.edu>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Link: https://lore.kernel.org/r/20210425153253.2542816-1-festevam@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/usb/usb.rst | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst
index 543e70434da22..820e867af45ab 100644
--- a/Documentation/driver-api/usb/usb.rst
+++ b/Documentation/driver-api/usb/usb.rst
@@ -109,16 +109,19 @@ well as to make sure they aren't relying on some HCD-specific behavior.
 USB-Standard Types
 ==================
 
-In ``drivers/usb/common/common.c`` and ``drivers/usb/common/debug.c`` you
-will find the USB data types defined in chapter 9 of the USB specification.
-These data types are used throughout USB, and in APIs including this host
-side API, gadget APIs, usb character devices and debugfs interfaces.
+In ``include/uapi/linux/usb/ch9.h`` you will find the USB data types defined
+in chapter 9 of the USB specification. These data types are used throughout
+USB, and in APIs including this host side API, gadget APIs, usb character
+devices and debugfs interfaces. That file is itself included by
+``include/linux/usb/ch9.h``, which also contains declarations of a few
+utility routines for manipulating these data types; the implementations
+are in ``drivers/usb/common/common.c``.
 
 .. kernel-doc:: drivers/usb/common/common.c
    :export:
 
-.. kernel-doc:: drivers/usb/common/debug.c
-   :export:
+In addition, some functions useful for creating debugging output are
+defined in ``drivers/usb/common/debug.c``.
 
 Host-Side Data Types and Macros
 ===============================
-- 
GitLab


From d1d90dd27254c44d087ad3f8b5b3e4fff0571f45 Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Wed, 28 Apr 2021 02:01:10 -0700
Subject: [PATCH 0239/3804] usb: dwc3: gadget: Enable suspend events

commit 72704f876f50 ("dwc3: gadget: Implement the suspend entry event
handler") introduced (nearly 5 years ago!) an interrupt handler for
U3/L1-L2 suspend events.  The problem is that these events aren't
currently enabled in the DEVTEN register so the handler is never
even invoked.  Fix this simply by enabling the corresponding bit
in dwc3_gadget_enable_irq() using the same revision check as found
in the handler.

Fixes: 72704f876f50 ("dwc3: gadget: Implement the suspend entry event handler")
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210428090111.3370-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index dd80e5ca8c78b..cab3a91840689 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2323,6 +2323,10 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc)
 	if (DWC3_VER_IS_PRIOR(DWC3, 250A))
 		reg |= DWC3_DEVTEN_ULSTCNGEN;
 
+	/* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */
+	if (!DWC3_VER_IS_PRIOR(DWC3, 230A))
+		reg |= DWC3_DEVTEN_EOPFEN;
+
 	dwc3_writel(dwc->regs, DWC3_DEVTEN, reg);
 }
 
-- 
GitLab


From 6f26ebb79a84bcad211cb2d8a2ef74dfc427322d Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Wed, 28 Apr 2021 02:01:11 -0700
Subject: [PATCH 0240/3804] usb: dwc3: gadget: Rename EOPF event macros to
 Suspend

The device event corresponding to End of Periodic Frame is only
found on older IP revisions (2.10a and prior, according to a
cursory SNPS databook search).  On revisions 2.30a and newer,
including DWC3.1, the same event value and corresponding DEVTEN
bit were repurposed to indicate that the link has gone into
suspend state (U3 or L2/L1).

EOPF events had never been enabled before in this driver, and
going forward we expect current and future DWC3-based devices
won't likely to be using such old DWC3 IP revisions either.
Hence rather than keeping the deprecated EOPF macro names let's
rename them to indicate their usage for suspend events.

Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Link: https://lore.kernel.org/r/20210428090111.3370-2-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.h   | 6 +++---
 drivers/usb/dwc3/debug.h  | 8 ++++----
 drivers/usb/dwc3/gadget.c | 4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 3859d8cad3cb4..c5d5760cdf53e 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -57,7 +57,7 @@
 #define DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE	3
 #define DWC3_DEVICE_EVENT_WAKEUP		4
 #define DWC3_DEVICE_EVENT_HIBER_REQ		5
-#define DWC3_DEVICE_EVENT_EOPF			6
+#define DWC3_DEVICE_EVENT_SUSPEND		6
 #define DWC3_DEVICE_EVENT_SOF			7
 #define DWC3_DEVICE_EVENT_ERRATIC_ERROR		9
 #define DWC3_DEVICE_EVENT_CMD_CMPL		10
@@ -460,7 +460,7 @@
 #define DWC3_DEVTEN_CMDCMPLTEN		BIT(10)
 #define DWC3_DEVTEN_ERRTICERREN		BIT(9)
 #define DWC3_DEVTEN_SOFEN		BIT(7)
-#define DWC3_DEVTEN_EOPFEN		BIT(6)
+#define DWC3_DEVTEN_U3L2L1SUSPEN	BIT(6)
 #define DWC3_DEVTEN_HIBERNATIONREQEVTEN	BIT(5)
 #define DWC3_DEVTEN_WKUPEVTEN		BIT(4)
 #define DWC3_DEVTEN_ULSTCNGEN		BIT(3)
@@ -1375,7 +1375,7 @@ struct dwc3_event_depevt {
  *	3	- ULStChng
  *	4	- WkUpEvt
  *	5	- Reserved
- *	6	- EOPF
+ *	6	- Suspend (EOPF on revisions 2.10a and prior)
  *	7	- SOF
  *	8	- Reserved
  *	9	- ErrticErr
diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h
index db231de46bb35..d0ac89c5b3172 100644
--- a/drivers/usb/dwc3/debug.h
+++ b/drivers/usb/dwc3/debug.h
@@ -221,8 +221,8 @@ static inline const char *dwc3_gadget_event_string(char *str, size_t size,
 		snprintf(str, size, "WakeUp [%s]",
 				dwc3_gadget_link_string(state));
 		break;
-	case DWC3_DEVICE_EVENT_EOPF:
-		snprintf(str, size, "End-Of-Frame [%s]",
+	case DWC3_DEVICE_EVENT_SUSPEND:
+		snprintf(str, size, "Suspend [%s]",
 				dwc3_gadget_link_string(state));
 		break;
 	case DWC3_DEVICE_EVENT_SOF:
@@ -353,8 +353,8 @@ static inline const char *dwc3_gadget_event_type_string(u8 event)
 		return "Wake-Up";
 	case DWC3_DEVICE_EVENT_HIBER_REQ:
 		return "Hibernation";
-	case DWC3_DEVICE_EVENT_EOPF:
-		return "End of Periodic Frame";
+	case DWC3_DEVICE_EVENT_SUSPEND:
+		return "Suspend";
 	case DWC3_DEVICE_EVENT_SOF:
 		return "Start of Frame";
 	case DWC3_DEVICE_EVENT_ERRATIC_ERROR:
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index cab3a91840689..6eab78f8a1a78 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2325,7 +2325,7 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc)
 
 	/* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */
 	if (!DWC3_VER_IS_PRIOR(DWC3, 230A))
-		reg |= DWC3_DEVTEN_EOPFEN;
+		reg |= DWC3_DEVTEN_U3L2L1SUSPEN;
 
 	dwc3_writel(dwc->regs, DWC3_DEVTEN, reg);
 }
@@ -3744,7 +3744,7 @@ static void dwc3_gadget_interrupt(struct dwc3 *dwc,
 	case DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE:
 		dwc3_gadget_linksts_change_interrupt(dwc, event->event_info);
 		break;
-	case DWC3_DEVICE_EVENT_EOPF:
+	case DWC3_DEVICE_EVENT_SUSPEND:
 		/* It changed to be suspend event for version 2.30a and above */
 		if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) {
 			/*
-- 
GitLab


From 75a41ce46bae6cbe7d3bb2584eb844291d642874 Mon Sep 17 00:00:00 2001
From: Phil Elwell <phil@raspberrypi.com>
Date: Thu, 6 May 2021 12:22:00 +0100
Subject: [PATCH 0241/3804] usb: dwc2: Fix gadget DMA unmap direction

The dwc2 gadget support maps and unmaps DMA buffers as necessary. When
mapping and unmapping it uses the direction of the endpoint to select
the direction of the DMA transfer, but this fails for Control OUT
transfers because the unmap occurs after the endpoint direction has
been reversed for the status phase.

A possible solution would be to unmap the buffer before the direction
is changed, but a safer, less invasive fix is to remember the buffer
direction independently of the endpoint direction.

Fixes: fe0b94abcdf6 ("usb: dwc2: gadget: manage ep0 state in software")
Acked-by: Minas Harutyunyan <Minas.Harutyunyan@synopsys.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Phil Elwell <phil@raspberrypi.com>
Link: https://lore.kernel.org/r/20210506112200.2893922-1-phil@raspberrypi.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/core.h   | 2 ++
 drivers/usb/dwc2/gadget.c | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index da5ac4a4595b6..ab6b815e0089c 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -113,6 +113,7 @@ struct dwc2_hsotg_req;
  * @debugfs: File entry for debugfs file for this endpoint.
  * @dir_in: Set to true if this endpoint is of the IN direction, which
  *          means that it is sending data to the Host.
+ * @map_dir: Set to the value of dir_in when the DMA buffer is mapped.
  * @index: The index for the endpoint registers.
  * @mc: Multi Count - number of transactions per microframe
  * @interval: Interval for periodic endpoints, in frames or microframes.
@@ -162,6 +163,7 @@ struct dwc2_hsotg_ep {
 	unsigned short		fifo_index;
 
 	unsigned char           dir_in;
+	unsigned char           map_dir;
 	unsigned char           index;
 	unsigned char           mc;
 	u16                     interval;
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index e6bb1bdb27603..184964174dc0c 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -422,7 +422,7 @@ static void dwc2_hsotg_unmap_dma(struct dwc2_hsotg *hsotg,
 {
 	struct usb_request *req = &hs_req->req;
 
-	usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->dir_in);
+	usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->map_dir);
 }
 
 /*
@@ -1242,6 +1242,7 @@ static int dwc2_hsotg_map_dma(struct dwc2_hsotg *hsotg,
 {
 	int ret;
 
+	hs_ep->map_dir = hs_ep->dir_in;
 	ret = usb_gadget_map_request(&hsotg->gadget, req, hs_ep->dir_in);
 	if (ret)
 		goto dma_error;
-- 
GitLab


From bb9c74a5bd1462499fe5ccb1e3c5ac40dcfa9139 Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Sat, 1 May 2021 02:35:58 -0700
Subject: [PATCH 0242/3804] usb: dwc3: gadget: Free gadget structure only after
 freeing endpoints

As part of commit e81a7018d93a ("usb: dwc3: allocate gadget structure
dynamically") the dwc3_gadget_release() was added which will free
the dwc->gadget structure upon the device's removal when
usb_del_gadget_udc() is called in dwc3_gadget_exit().

However, simply freeing the gadget results a dangling pointer
situation: the endpoints created in dwc3_gadget_init_endpoints()
have their dep->endpoint.ep_list members chained off the list_head
anchored at dwc->gadget->ep_list.  Thus when dwc->gadget is freed,
the first dwc3_ep in the list now has a dangling prev pointer and
likewise for the next pointer of the dwc3_ep at the tail of the list.
The dwc3_gadget_free_endpoints() that follows will result in a
use-after-free when it calls list_del().

This was caught by enabling KASAN and performing a driver unbind.
The recent commit 568262bf5492 ("usb: dwc3: core: Add shutdown
callback for dwc3") also exposes this as a panic during shutdown.

There are a few possibilities to fix this.  One could be to perform
a list_del() of the gadget->ep_list itself which removes it from
the rest of the dwc3_ep chain.

Another approach is what this patch does, by splitting up the
usb_del_gadget_udc() call into its separate "del" and "put"
components.  This allows dwc3_gadget_free_endpoints() to be
called before the gadget is finally freed with usb_put_gadget().

Fixes: e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically")
Reviewed-by: Peter Chen <peter.chen@kernel.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Link: https://lore.kernel.org/r/20210501093558.7375-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 6eab78f8a1a78..dd1342403bb24 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4062,8 +4062,9 @@ err0:
 
 void dwc3_gadget_exit(struct dwc3 *dwc)
 {
-	usb_del_gadget_udc(dwc->gadget);
+	usb_del_gadget(dwc->gadget);
 	dwc3_gadget_free_endpoints(dwc);
+	usb_put_gadget(dwc->gadget);
 	dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce,
 			  dwc->bounce_addr);
 	kfree(dwc->setup_buf);
-- 
GitLab


From 18ffa988dbae69cc6e9949cddd9606f6fe533894 Mon Sep 17 00:00:00 2001
From: Wesley Cheng <wcheng@codeaurora.org>
Date: Fri, 7 May 2021 10:55:19 -0700
Subject: [PATCH 0243/3804] usb: dwc3: gadget: Return success always for kick
 transfer in ep queue

If an error is received when issuing a start or update transfer
command, the error handler will stop all active requests (including
the current USB request), and call dwc3_gadget_giveback() to notify
function drivers of the requests which have been stopped.  Avoid
returning an error for kick transfer during EP queue, to remove
duplicate cleanup operations on the request being queued.

Fixes: 8d99087c2db8 ("usb: dwc3: gadget: Properly handle failed kick_transfer")
cc: stable@vger.kernel.org
Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
Link: https://lore.kernel.org/r/1620410119-24971-1-git-send-email-wcheng@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index dd1342403bb24..49ca5da5e2794 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1684,7 +1684,9 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 		}
 	}
 
-	return __dwc3_gadget_kick_transfer(dep);
+	__dwc3_gadget_kick_transfer(dep);
+
+	return 0;
 }
 
 static int dwc3_gadget_ep_queue(struct usb_ep *ep, struct usb_request *request,
-- 
GitLab


From b96992081fde19806b5beb5b25f9327820ead77b Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Fri, 30 Apr 2021 14:57:16 +0800
Subject: [PATCH 0244/3804] usb: dwc3: imx8mp: detect dwc3 core node via
 compatible string

New schema of usb controller DT-node should be named with prefix
"^usb(@.*)?", dt changed the node name, but missed counter part
change in driver, fix it by switching to use compatible string as
the dwc3 core compatible string keeps "snps,dwc3" in all dt.

Fixes: d1689cd3c0f4 ("arm64: dts: imx8mp: Use the correct name for child node "snps, dwc3"")
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/1619765836-20387-1-git-send-email-jun.li@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-imx8mp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c
index b13cfab89d532..e9fced6f7a7c9 100644
--- a/drivers/usb/dwc3/dwc3-imx8mp.c
+++ b/drivers/usb/dwc3/dwc3-imx8mp.c
@@ -165,7 +165,7 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto disable_rpm;
 
-	dwc3_np = of_get_child_by_name(node, "dwc3");
+	dwc3_np = of_get_compatible_child(node, "snps,dwc3");
 	if (!dwc3_np) {
 		dev_err(dev, "failed to find dwc3 core child\n");
 		goto disable_rpm;
-- 
GitLab


From 0b2b149e918f6dddb4ea53615551bf7bc131f875 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 09:53:10 +0800
Subject: [PATCH 0245/3804] usb: dwc3: imx8mp: fix error return code in
 dwc3_imx8mp_probe()

Fix to return a negative error code from the error handling case instead
of 0, as done elsewhere in this function.

Fixes: 6dd2565989b4 ("usb: dwc3: add imx8mp dwc3 glue layer driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210508015310.1627-1-thunder.leizhen@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-imx8mp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c
index e9fced6f7a7c9..756faa46d33a7 100644
--- a/drivers/usb/dwc3/dwc3-imx8mp.c
+++ b/drivers/usb/dwc3/dwc3-imx8mp.c
@@ -167,6 +167,7 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev)
 
 	dwc3_np = of_get_compatible_child(node, "snps,dwc3");
 	if (!dwc3_np) {
+		err = -ENODEV;
 		dev_err(dev, "failed to find dwc3 core child\n");
 		goto disable_rpm;
 	}
-- 
GitLab


From e89baeba4f64bab679618b3330cdcda5929fb8d5 Mon Sep 17 00:00:00 2001
From: Matthijs Kooijman <matthijs@stdin.nl>
Date: Mon, 3 May 2021 20:05:38 +0200
Subject: [PATCH 0246/3804] usb: dwc2: Remove obsolete MODULE_ constants from
 platform.c

Originally, the core and platform drivers were separate modules, so each
had its own module info. Since commit 2d1165a4b95e (usb: dwc2: remove
dwc2_platform.ko) platform.c is included in the core module, which now
contains duplicate module info (from core.c and platform.c).

Due to the linking order and modinfo implementation, running `modinfo`
on the resulting dwc2.ko shows just the info from platform.c, rather
than that from core.c, suggesting that I am the author of the entire
dwc2 module. Since platform.c is just a minor part of the entire module,
this removes its module info in favor of the info from core.c.

Acked-by: Minas Harutyunyan <Minas.Harutyunyan@synopsys.com>
Signed-off-by: Matthijs Kooijman <matthijs@stdin.nl>
Link: https://lore.kernel.org/r/20210503180538.64423-1-matthijs@stdin.nl
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/platform.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 3024785d84cb8..520a0beef77ca 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -776,7 +776,3 @@ static struct platform_driver dwc2_platform_driver = {
 };
 
 module_platform_driver(dwc2_platform_driver);
-
-MODULE_DESCRIPTION("DESIGNWARE HS OTG Platform Glue");
-MODULE_AUTHOR("Matthijs Kooijman <matthijs@stdin.nl>");
-MODULE_LICENSE("Dual BSD/GPL");
-- 
GitLab


From 2e2b8d15adc2f6ab2d4aa0550e241b9742a436a0 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Tue, 4 May 2021 01:18:49 +0800
Subject: [PATCH 0247/3804] usb: typec: tcpm: Fix wrong handling in
 GET_SINK_CAP

After receiving Sink Capabilities Message in GET_SINK_CAP AMS, it is
incorrect to call tcpm_pd_handle_state because the Message is expected
and the current state is not Ready states. The result of this incorrect
operation ends in Soft Reset which is definitely wrong. Simply
forwarding to Ready States is enough to finish the AMS.

Fixes: 8dea75e11380 ("usb: typec: tcpm: Protocol Error handling")
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503171849.2605302-1-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index c4fdc00a3bc8f..68e04e397e924 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -2390,7 +2390,7 @@ static void tcpm_pd_data_request(struct tcpm_port *port,
 		port->nr_sink_caps = cnt;
 		port->sink_cap_done = true;
 		if (port->ams == GET_SINK_CAPABILITIES)
-			tcpm_pd_handle_state(port, ready_state(port), NONE_AMS, 0);
+			tcpm_set_state(port, ready_state(port), 0);
 		/* Unexpected Sink Capabilities */
 		else
 			tcpm_pd_handle_msg(port,
-- 
GitLab


From 8edb79af88efc6e49e735f9baf61d9f0748b881f Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Wed, 7 Apr 2021 11:49:27 +0800
Subject: [PATCH 0248/3804] iio: light: gp2ap002: Fix rumtime PM imbalance on
 error

When devm_request_threaded_irq() fails, we should decrease the
runtime PM counter to keep the counter balanced. But when
iio_device_register() fails, we need not to decrease it because
we have already decreased it before.

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Fixes: 97d642e23037 ("iio: light: Add a driver for Sharp GP2AP002x00F")
Link: https://lore.kernel.org/r/20210407034927.16882-1-dinghao.liu@zju.edu.cn
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/gp2ap002.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/light/gp2ap002.c b/drivers/iio/light/gp2ap002.c
index d048ae257c519..f960be7d40016 100644
--- a/drivers/iio/light/gp2ap002.c
+++ b/drivers/iio/light/gp2ap002.c
@@ -582,7 +582,7 @@ static int gp2ap002_probe(struct i2c_client *client,
 					"gp2ap002", indio_dev);
 	if (ret) {
 		dev_err(dev, "unable to request IRQ\n");
-		goto out_disable_vio;
+		goto out_put_pm;
 	}
 	gp2ap002->irq = client->irq;
 
@@ -612,8 +612,9 @@ static int gp2ap002_probe(struct i2c_client *client,
 
 	return 0;
 
-out_disable_pm:
+out_put_pm:
 	pm_runtime_put_noidle(dev);
+out_disable_pm:
 	pm_runtime_disable(dev);
 out_disable_vio:
 	regulator_disable(gp2ap002->vio);
-- 
GitLab


From a2fa9242e89f27696515699fe0f0296bf1ac1815 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Mon, 12 Apr 2021 13:32:02 +0800
Subject: [PATCH 0249/3804] iio: proximity: pulsedlight: Fix rumtime PM
 imbalance on error

When lidar_write_control() fails, a pairing PM usage counter
decrement is needed to keep the counter balanced.

Fixes: 4ac4e086fd8c5 ("iio: pulsedlight-lidar-lite: add runtime PM")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210412053204.4889-1-dinghao.liu@zju.edu.cn
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/proximity/pulsedlight-lidar-lite-v2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
index c685f10b5ae48..cc206bfa09c78 100644
--- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
+++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
@@ -160,6 +160,7 @@ static int lidar_get_measurement(struct lidar_data *data, u16 *reg)
 	ret = lidar_write_control(data, LIDAR_REG_CONTROL_ACQUIRE);
 	if (ret < 0) {
 		dev_err(&client->dev, "cannot send start measurement command");
+		pm_runtime_put_noidle(&client->dev);
 		return ret;
 	}
 
-- 
GitLab


From 7061803522ee7876df1ca18cdd1e1551f761352d Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Wed, 14 Apr 2021 11:49:55 +0300
Subject: [PATCH 0250/3804] iio: hid-sensors: select IIO_TRIGGERED_BUFFER under
 HID_SENSOR_IIO_TRIGGER

During commit 067fda1c065ff ("iio: hid-sensors: move triggered buffer
setup into hid_sensor_setup_trigger"), the
iio_triggered_buffer_{setup,cleanup}() functions got moved under the
hid-sensor-trigger module.

The above change works fine, if any of the sensors get built. However, when
only the common hid-sensor-trigger module gets built (and none of the
drivers), then the IIO_TRIGGERED_BUFFER symbol isn't selected/enforced.

Previously, each driver would enforce/select the IIO_TRIGGERED_BUFFER
symbol. With this change the HID_SENSOR_IIO_TRIGGER (for the
hid-sensor-trigger module) will enforce that IIO_TRIGGERED_BUFFER gets
selected.

All HID sensor drivers select the HID_SENSOR_IIO_TRIGGER symbol. So, this
change removes the IIO_TRIGGERED_BUFFER enforcement from each driver.

Fixes: 067fda1c065ff ("iio: hid-sensors: move triggered buffer setup into hid_sensor_setup_trigger")
Reported-by: Thomas Deutschmann <whissi@gentoo.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lore.kernel.org/r/20210414084955.260117-1-aardelean@deviqon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/Kconfig              | 1 -
 drivers/iio/common/hid-sensors/Kconfig | 1 +
 drivers/iio/gyro/Kconfig               | 1 -
 drivers/iio/humidity/Kconfig           | 1 -
 drivers/iio/light/Kconfig              | 2 --
 drivers/iio/magnetometer/Kconfig       | 1 -
 drivers/iio/orientation/Kconfig        | 2 --
 drivers/iio/pressure/Kconfig           | 1 -
 drivers/iio/temperature/Kconfig        | 1 -
 9 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index cceda3cecbcf4..8b1723635cce5 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -229,7 +229,6 @@ config DMARD10
 config HID_SENSOR_ACCEL_3D
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Accelerometers 3D"
diff --git a/drivers/iio/common/hid-sensors/Kconfig b/drivers/iio/common/hid-sensors/Kconfig
index 24d4925673363..2a3dd3b907bee 100644
--- a/drivers/iio/common/hid-sensors/Kconfig
+++ b/drivers/iio/common/hid-sensors/Kconfig
@@ -19,6 +19,7 @@ config HID_SENSOR_IIO_TRIGGER
 	tristate "Common module (trigger) for all HID Sensor IIO drivers"
 	depends on HID_SENSOR_HUB && HID_SENSOR_IIO_COMMON && IIO_BUFFER
 	select IIO_TRIGGER
+	select IIO_TRIGGERED_BUFFER
 	help
 	  Say yes here to build trigger support for HID sensors.
 	  Triggers will be send if all requested attributes were read.
diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig
index 5824f2edf9758..20b5ac7ab66af 100644
--- a/drivers/iio/gyro/Kconfig
+++ b/drivers/iio/gyro/Kconfig
@@ -111,7 +111,6 @@ config FXAS21002C_SPI
 config HID_SENSOR_GYRO_3D
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Gyroscope 3D"
diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig
index 6549fcf6db698..2de5494e7c225 100644
--- a/drivers/iio/humidity/Kconfig
+++ b/drivers/iio/humidity/Kconfig
@@ -52,7 +52,6 @@ config HID_SENSOR_HUMIDITY
 	tristate "HID Environmental humidity sensor"
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	help
diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig
index 33ad4dd0b5c7b..917f9becf9c75 100644
--- a/drivers/iio/light/Kconfig
+++ b/drivers/iio/light/Kconfig
@@ -256,7 +256,6 @@ config ISL29125
 config HID_SENSOR_ALS
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID ALS"
@@ -270,7 +269,6 @@ config HID_SENSOR_ALS
 config HID_SENSOR_PROX
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID PROX"
diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig
index 5d4ffd66032e9..74ad5701c6c29 100644
--- a/drivers/iio/magnetometer/Kconfig
+++ b/drivers/iio/magnetometer/Kconfig
@@ -95,7 +95,6 @@ config MAG3110
 config HID_SENSOR_MAGNETOMETER_3D
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Magenetometer 3D"
diff --git a/drivers/iio/orientation/Kconfig b/drivers/iio/orientation/Kconfig
index a505583cc2fda..396cbbb867f4c 100644
--- a/drivers/iio/orientation/Kconfig
+++ b/drivers/iio/orientation/Kconfig
@@ -9,7 +9,6 @@ menu "Inclinometer sensors"
 config HID_SENSOR_INCLINOMETER_3D
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Inclinometer 3D"
@@ -20,7 +19,6 @@ config HID_SENSOR_INCLINOMETER_3D
 config HID_SENSOR_DEVICE_ROTATION
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID Device Rotation"
diff --git a/drivers/iio/pressure/Kconfig b/drivers/iio/pressure/Kconfig
index 689b978db4f95..fc0d3cfca4186 100644
--- a/drivers/iio/pressure/Kconfig
+++ b/drivers/iio/pressure/Kconfig
@@ -79,7 +79,6 @@ config DPS310
 config HID_SENSOR_PRESS
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	tristate "HID PRESS"
diff --git a/drivers/iio/temperature/Kconfig b/drivers/iio/temperature/Kconfig
index f1f2a1499c9e2..4df60082c1fa8 100644
--- a/drivers/iio/temperature/Kconfig
+++ b/drivers/iio/temperature/Kconfig
@@ -45,7 +45,6 @@ config HID_SENSOR_TEMP
 	tristate "HID Environmental temperature sensor"
 	depends on HID_SENSOR_HUB
 	select IIO_BUFFER
-	select IIO_TRIGGERED_BUFFER
 	select HID_SENSOR_IIO_COMMON
 	select HID_SENSOR_IIO_TRIGGER
 	help
-- 
GitLab


From f73c730774d88a14d7b60feee6d0e13570f99499 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Fri, 23 Apr 2021 05:09:59 +0300
Subject: [PATCH 0251/3804] iio: gyro: mpu3050: Fix reported temperature value

The raw temperature value is a 16-bit signed integer. The sign casting
is missing in the code, which results in a wrong temperature reported
by userspace tools, fix it.

Cc: stable@vger.kernel.org
Fixes: 3904b28efb2c ("iio: gyro: Add driver for the MPU-3050 gyroscope")
Datasheet: https://www.cdiweb.com/datasheets/invensense/mpu-3000a.pdf
Tested-by: Maxim Schwalm <maxim.schwalm@gmail.com> # Asus TF700T
Tested-by: Svyatoslav Ryhel <clamor95@gmail.com> # Asus TF201
Reported-by: Svyatoslav Ryhel <clamor95@gmail.com>
Reviewed-by: Andy Shevchenko <Andy.Shevchenko@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Jean-Baptiste Maneyrol <jmaneyrol@invensense.com>
Link: https://lore.kernel.org/r/20210423020959.5023-1-digetx@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/mpu3050-core.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c
index ac90be03332af..f17a935195352 100644
--- a/drivers/iio/gyro/mpu3050-core.c
+++ b/drivers/iio/gyro/mpu3050-core.c
@@ -272,7 +272,16 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_OFFSET:
 		switch (chan->type) {
 		case IIO_TEMP:
-			/* The temperature scaling is (x+23000)/280 Celsius */
+			/*
+			 * The temperature scaling is (x+23000)/280 Celsius
+			 * for the "best fit straight line" temperature range
+			 * of -30C..85C.  The 23000 includes room temperature
+			 * offset of +35C, 280 is the precision scale and x is
+			 * the 16-bit signed integer reported by hardware.
+			 *
+			 * Temperature value itself represents temperature of
+			 * the sensor die.
+			 */
 			*val = 23000;
 			return IIO_VAL_INT;
 		default:
@@ -329,7 +338,7 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev,
 				goto out_read_raw_unlock;
 			}
 
-			*val = be16_to_cpu(raw_val);
+			*val = (s16)be16_to_cpu(raw_val);
 			ret = IIO_VAL_INT;
 
 			goto out_read_raw_unlock;
-- 
GitLab


From 901f84de0e16bde10a72d7eb2f2eb73fcde8fa1a Mon Sep 17 00:00:00 2001
From: Tomasz Duszynski <tomasz.duszynski@octakon.com>
Date: Fri, 23 Apr 2021 10:02:44 +0200
Subject: [PATCH 0252/3804] iio: core: fix ioctl handlers removal

Currently ioctl handlers are removed twice. For the first time during
iio_device_unregister() then later on inside
iio_device_unregister_eventset() and iio_buffers_free_sysfs_and_mask().
Double free leads to kernel panic.

Fix this by not touching ioctl handlers list directly but rather
letting code responsible for registration call the matching cleanup
routine itself.

Fixes: 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev")
Signed-off-by: Tomasz Duszynski <tomasz.duszynski@octakon.com>
Acked-by: Alexandru Ardelean <ardeleanalex@gmail.com>
Cc: <Stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210423080244.2790-1-tomasz.duszynski@octakon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-core.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index d92c58a94fe4f..9e59f5da3d280 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1926,9 +1926,6 @@ EXPORT_SYMBOL(__iio_device_register);
  **/
 void iio_device_unregister(struct iio_dev *indio_dev)
 {
-	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
-	struct iio_ioctl_handler *h, *t;
-
 	cdev_device_del(&indio_dev->chrdev, &indio_dev->dev);
 
 	mutex_lock(&indio_dev->info_exist_lock);
@@ -1939,9 +1936,6 @@ void iio_device_unregister(struct iio_dev *indio_dev)
 
 	indio_dev->info = NULL;
 
-	list_for_each_entry_safe(h, t, &iio_dev_opaque->ioctl_handlers, entry)
-		list_del(&h->entry);
-
 	iio_device_wakeup_eventset(indio_dev);
 	iio_buffer_wakeup_poll(indio_dev);
 
-- 
GitLab


From af0670b0bf1b116fd729b1b1011cf814bc34e12e Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Mon, 3 May 2021 17:43:50 +0300
Subject: [PATCH 0253/3804] iio: core: return ENODEV if ioctl is unknown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the ioctl() mechanism was introduced in IIO core to centralize the
registration of all ioctls in one place via commit 8dedcc3eee3ac ("iio:
core: centralize ioctl() calls to the main chardev"), the return code was
changed from ENODEV to EINVAL, when the ioctl code isn't known.

This was done by accident.

This change reverts back to the old behavior, where if the ioctl() code
isn't known, ENODEV is returned (vs EINVAL).

This was brought into perspective by this patch:
  https://lore.kernel.org/linux-iio/20210428150815.136150-1-paul@crapouillou.net/

Fixes: 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev")
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Tested-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 9e59f5da3d280..59efb36db2c7c 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1778,7 +1778,6 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	if (!indio_dev->info)
 		goto out_unlock;
 
-	ret = -EINVAL;
 	list_for_each_entry(h, &iio_dev_opaque->ioctl_handlers, entry) {
 		ret = h->ioctl(indio_dev, filp, cmd, arg);
 		if (ret != IIO_IOCTL_UNHANDLED)
@@ -1786,7 +1785,7 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	}
 
 	if (ret == IIO_IOCTL_UNHANDLED)
-		ret = -EINVAL;
+		ret = -ENODEV;
 
 out_unlock:
 	mutex_unlock(&indio_dev->info_exist_lock);
-- 
GitLab


From af0e1871d79cfbb91f732d2c6fa7558e45c31038 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 7 May 2021 19:30:41 +0100
Subject: [PATCH 0254/3804] iio: tsl2583: Fix division by a zero lux_val

The lux_val returned from tsl2583_get_lux can potentially be zero,
so check for this to avoid a division by zero and an overflowed
gain_trim_val.

Fixes clang scan-build warning:

drivers/iio/light/tsl2583.c:345:40: warning: Either the
condition 'lux_val<0' is redundant or there is division
by zero at line 345. [zerodivcond]

Fixes: ac4f6eee8fe8 ("staging: iio: TAOS tsl258x: Device driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/tsl2583.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c
index 0f787bfc88fc4..c9d8f07a6fcdd 100644
--- a/drivers/iio/light/tsl2583.c
+++ b/drivers/iio/light/tsl2583.c
@@ -341,6 +341,14 @@ static int tsl2583_als_calibrate(struct iio_dev *indio_dev)
 		return lux_val;
 	}
 
+	/* Avoid division by zero of lux_value later on */
+	if (lux_val == 0) {
+		dev_err(&chip->client->dev,
+			"%s: lux_val of 0 will produce out of range trim_value\n",
+			__func__);
+		return -ENODATA;
+	}
+
 	gain_trim_val = (unsigned int)(((chip->als_settings.als_cal_target)
 			* chip->als_settings.als_gain_trim) / lux_val);
 	if ((gain_trim_val < 250) || (gain_trim_val > 4000)) {
-- 
GitLab


From b9a0866a5bdf6a4643a52872ada6be6184c6f4f2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Wed, 5 May 2021 01:23:37 +0300
Subject: [PATCH 0255/3804] usb: typec: ucsi: Put fwnode in any case during
 ->probe()

device_for_each_child_node() bumps a reference counting of a returned variable.
We have to balance it whenever we return to the caller.

Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface")
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210504222337.3151726-1-andy.shevchenko@gmail.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 282c3c825c136..0e1cec346e0f8 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -999,6 +999,7 @@ static const struct typec_operations ucsi_ops = {
 	.pr_set = ucsi_pr_swap
 };
 
+/* Caller must call fwnode_handle_put() after use */
 static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con)
 {
 	struct fwnode_handle *fwnode;
@@ -1033,7 +1034,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
 	command |= UCSI_CONNECTOR_NUMBER(con->num);
 	ret = ucsi_send_command(ucsi, command, &con->cap, sizeof(con->cap));
 	if (ret < 0)
-		goto out;
+		goto out_unlock;
 
 	if (con->cap.op_mode & UCSI_CONCAP_OPMODE_DRP)
 		cap->data = TYPEC_PORT_DRD;
@@ -1151,6 +1152,8 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
 	trace_ucsi_register_port(con->num, &con->status);
 
 out:
+	fwnode_handle_put(cap->fwnode);
+out_unlock:
 	mutex_unlock(&con->lock);
 	return ret;
 }
-- 
GitLab


From e17b02d4970913233d543c79c9c66e72cac05bdd Mon Sep 17 00:00:00 2001
From: Marcel Hamer <marcel@solidxs.se>
Date: Tue, 27 Apr 2021 14:21:18 +0200
Subject: [PATCH 0256/3804] usb: dwc3: omap: improve extcon initialization

When extcon is used in combination with dwc3, it is assumed that the dwc3
registers are untouched and as such are only configured if VBUS is valid
or ID is tied to ground.

In case VBUS is not valid or ID is floating, the registers are not
configured as such during driver initialization, causing a wrong
default state during boot.

If the registers are not in a default state, because they are for
instance touched by a boot loader, this can cause for a kernel error.

Signed-off-by: Marcel Hamer <marcel@solidxs.se>
Link: https://lore.kernel.org/r/20210427122118.1948340-1-marcel@solidxs.se
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-omap.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 3db17806e92e7..e196673f5c647 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -437,8 +437,13 @@ static int dwc3_omap_extcon_register(struct dwc3_omap *omap)
 
 		if (extcon_get_state(edev, EXTCON_USB) == true)
 			dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID);
+		else
+			dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF);
+
 		if (extcon_get_state(edev, EXTCON_USB_HOST) == true)
 			dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND);
+		else
+			dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT);
 
 		omap->edev = edev;
 	}
-- 
GitLab


From f75297853470627c4ee4e2b80eed40af7441c96b Mon Sep 17 00:00:00 2001
From: Wei Ming Chen <jj251510319013@gmail.com>
Date: Thu, 6 May 2021 20:20:20 +0800
Subject: [PATCH 0257/3804] docs: usb: function: Modify path name

Original path does not exists, so changed to
"Documentation/ABI/testing/configfs-usb-gadget"

Signed-off-by: Wei Ming Chen <jj251510319013@gmail.com>
Link: https://lore.kernel.org/r/20210506122020.7117-1-jj251510319013@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/usb/gadget_configfs.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/usb/gadget_configfs.rst b/Documentation/usb/gadget_configfs.rst
index 158e48dab586d..e4566ffb223f2 100644
--- a/Documentation/usb/gadget_configfs.rst
+++ b/Documentation/usb/gadget_configfs.rst
@@ -140,7 +140,7 @@ is an arbitrary string allowed in a filesystem, e.g.::
 Each function provides its specific set of attributes, with either read-only
 or read-write access. Where applicable they need to be written to as
 appropriate.
-Please refer to Documentation/ABI/*/configfs-usb-gadget* for more information.
+Please refer to Documentation/ABI/testing/configfs-usb-gadget for more information.
 
 4. Associating the functions with their configurations
 ------------------------------------------------------
-- 
GitLab


From a60a34366e0d09ca002c966dd7c43a68c28b1f82 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 6 May 2021 22:39:10 +0200
Subject: [PATCH 0258/3804] usb: fotg210-hcd: Fix an error message

'retval' is known to be -ENODEV here.
This is a hard-coded default error code which is not useful in the error
message. Moreover, another error message is printed at the end of the
error handling path. The corresponding error code (-ENOMEM) is more
informative.

So remove simplify the first error message.

While at it, also remove the useless initialization of 'retval'.

Fixes: 7d50195f6c50 ("usb: host: Faraday fotg210-hcd driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/94531bcff98e46d4f9c20183a90b7f47f699126c.1620333419.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/fotg210-hcd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c
index 6cac642520fc8..9c2eda0918e13 100644
--- a/drivers/usb/host/fotg210-hcd.c
+++ b/drivers/usb/host/fotg210-hcd.c
@@ -5568,7 +5568,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev)
 	struct usb_hcd *hcd;
 	struct resource *res;
 	int irq;
-	int retval = -ENODEV;
+	int retval;
 	struct fotg210_hcd *fotg210;
 
 	if (usb_disabled())
@@ -5588,7 +5588,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev)
 	hcd = usb_create_hcd(&fotg210_fotg210_hc_driver, dev,
 			dev_name(dev));
 	if (!hcd) {
-		dev_err(dev, "failed to create hcd with err %d\n", retval);
+		dev_err(dev, "failed to create hcd\n");
 		retval = -ENOMEM;
 		goto fail_create_hcd;
 	}
-- 
GitLab


From be1c2bb3ba5a39c20b1d54e01ffbcb2b1ca7e46c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 27 Apr 2021 09:00:28 +0100
Subject: [PATCH 0259/3804] ARM: PXA: Fix cplds irqdesc allocation when using
 legacy mode

The Mainstone PXA platform uses CONFIG_SPARSE_IRQ, and thus we
cannot rely on the irq descriptors to be readilly allocated
before creating the irqdomain in legacy mode. The kernel then
complains loudly about not being able to associate the interrupt
in the domain -- can't blame it.

Fix it by allocating the irqdescs upfront in the legacy case.

Fixes: b68761da0111 ("ARM: PXA: Kill use of irq_create_strict_mappings()")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210426223942.GA213931@roeck-us.net
---
 arch/arm/mach-pxa/pxa_cplds_irqs.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c
index ec0d9b094744d..bddfc7cd5d40f 100644
--- a/arch/arm/mach-pxa/pxa_cplds_irqs.c
+++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c
@@ -121,8 +121,13 @@ static int cplds_probe(struct platform_device *pdev)
 		return fpga->irq;
 
 	base_irq = platform_get_irq(pdev, 1);
-	if (base_irq < 0)
+	if (base_irq < 0) {
 		base_irq = 0;
+	} else {
+		ret = devm_irq_alloc_descs(&pdev->dev, base_irq, base_irq, CPLDS_NB_IRQ, 0);
+		if (ret < 0)
+			return ret;
+	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	fpga->base = devm_ioremap_resource(&pdev->dev, res);
-- 
GitLab


From 5b44955dc19808fa209444ccb192343050e95ab0 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 13 Apr 2021 14:21:58 +0200
Subject: [PATCH 0260/3804] irqchip/apple-aic: APPLE_AIC should depend on
 ARCH_APPLE

The Apple Interrupt Controller is only present on Apple Silicon SoCs.
Hence add a dependency on ARCH_APPLE, to prevent asking the user about
this driver when configuring a kernel without Apple Silicon SoC support.

Drop the default, as ARCH_APPLE already selects APPLE_AIC.

Fixes: 76cde26394114f6a ("irqchip/apple-aic: Add support for the Apple Interrupt Controller")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/f37e8daea37d50651d2164b0b3dad90780188548.1618316398.git.geert+renesas@glider.be
---
 drivers/irqchip/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index b90e825df7e14..62543a4eccc08 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -596,7 +596,7 @@ config IRQ_IDT3243X
 config APPLE_AIC
 	bool "Apple Interrupt Controller (AIC)"
 	depends on ARM64
-	default ARCH_APPLE
+	depends on ARCH_APPLE || COMPILE_TEST
 	help
 	  Support for the Apple Interrupt Controller found on Apple Silicon SoCs,
 	  such as the M1.
-- 
GitLab


From 726c945ab2ebd104631b6105ab455a5bc604a3f1 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 27 Apr 2021 12:42:19 +0800
Subject: [PATCH 0261/3804] hwmon: (corsair-psu) Remove unneeded semicolons

Fix the following coccicheck warning:

./drivers/hwmon/corsair-psu.c:379:2-3: Unneeded semicolon

Remove unneeded semicolons.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Link: https://lore.kernel.org/r/20210427044219.7799-1-wanjiabing@vivo.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/corsair-psu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c
index 3a5807e4a2efb..02298b86b57b6 100644
--- a/drivers/hwmon/corsair-psu.c
+++ b/drivers/hwmon/corsair-psu.c
@@ -355,7 +355,7 @@ static umode_t corsairpsu_hwmon_power_is_visible(const struct corsairpsu_data *p
 		return 0444;
 	default:
 		return 0;
-	};
+	}
 }
 
 static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv, u32 attr,
@@ -376,7 +376,7 @@ static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv
 		break;
 	default:
 		break;
-	};
+	}
 
 	return res;
 }
-- 
GitLab


From 5216dff22dc2bbbbe6f00335f9fd2879670e753b Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Thu, 29 Apr 2021 10:13:36 -0500
Subject: [PATCH 0262/3804] hwmon: (occ) Fix poll rate limiting

The poll rate limiter time was initialized at zero. This breaks the
comparison in time_after if jiffies is large. Switch to storing the
next update time rather than the previous time, and initialize the
time when the device is probed.

Fixes: c10e753d43eb ("hwmon (occ): Add sensor types and versions")
Signed-off-by: Eddie James <eajames@linux.ibm.com>
Link: https://lore.kernel.org/r/20210429151336.18980-1-eajames@linux.ibm.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/occ/common.c | 5 +++--
 drivers/hwmon/occ/common.h | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c
index f1ac153d0b568..967532afb1c01 100644
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -217,9 +217,9 @@ int occ_update_response(struct occ *occ)
 		return rc;
 
 	/* limit the maximum rate of polling the OCC */
-	if (time_after(jiffies, occ->last_update + OCC_UPDATE_FREQUENCY)) {
+	if (time_after(jiffies, occ->next_update)) {
 		rc = occ_poll(occ);
-		occ->last_update = jiffies;
+		occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
 	} else {
 		rc = occ->last_error;
 	}
@@ -1165,6 +1165,7 @@ int occ_setup(struct occ *occ, const char *name)
 		return rc;
 	}
 
+	occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
 	occ_parse_poll_response(occ);
 
 	rc = occ_setup_sensor_attrs(occ);
diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h
index 67e6968b8978e..e6df719770e81 100644
--- a/drivers/hwmon/occ/common.h
+++ b/drivers/hwmon/occ/common.h
@@ -99,7 +99,7 @@ struct occ {
 	u8 poll_cmd_data;		/* to perform OCC poll command */
 	int (*send_cmd)(struct occ *occ, u8 *cmd);
 
-	unsigned long last_update;
+	unsigned long next_update;
 	struct mutex lock;		/* lock OCC access */
 
 	struct device *hwmon;
-- 
GitLab


From 2d101db3e5be3bbee6001d4227705cec70ecb82e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?V=C3=A1clav=20Kubern=C3=A1t?= <kubernat@cesnet.cz>
Date: Thu, 29 Apr 2021 09:53:38 +0200
Subject: [PATCH 0263/3804] hwmon: (pmbus/fsp-3y) Fix FSP-3Y YH-5151E
 non-compliant vout encoding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I didn't properly test the driver for YH-5151E, so it was completely
broken. Firstly, the log/real mapping was incorrect in one case.
Secondly, PMBus specifies that output voltages should be in the linear16
encoding. However, the YH-5151E is non-compliant and uses linear11.
YM-2151E isn't affected by this. Fix this by converting the values
inside the read functions. linear16 gets the exponent from the VOUT_MODE
command. The device doesn't support it, so I have to manually supply the
value for it.

Both supported devices have now been tested to report correct vout
values.

Fixes: 1734b4135a62 ("hwmon: Add driver for fsp-3y PSUs and PDUs")
Signed-off-by: Václav Kubernát <kubernat@cesnet.cz>
Link: https://lore.kernel.org/r/20210429075337.110502-1-kubernat@cesnet.cz
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/fsp-3y.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/pmbus/fsp-3y.c b/drivers/hwmon/pmbus/fsp-3y.c
index b177987286ae0..e248424752545 100644
--- a/drivers/hwmon/pmbus/fsp-3y.c
+++ b/drivers/hwmon/pmbus/fsp-3y.c
@@ -57,7 +57,7 @@ static int page_log_to_page_real(int page_log, enum chips chip)
 		case YH5151E_PAGE_12V_LOG:
 			return YH5151E_PAGE_12V_REAL;
 		case YH5151E_PAGE_5V_LOG:
-			return YH5151E_PAGE_5V_LOG;
+			return YH5151E_PAGE_5V_REAL;
 		case YH5151E_PAGE_3V3_LOG:
 			return YH5151E_PAGE_3V3_REAL;
 		}
@@ -103,8 +103,18 @@ static int set_page(struct i2c_client *client, int page_log)
 
 static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg)
 {
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct fsp3y_data *data = to_fsp3y_data(info);
 	int rv;
 
+	/*
+	 * YH5151-E outputs vout in linear11. The conversion is done when
+	 * reading. Here, we have to inject pmbus_core with the correct
+	 * exponent (it is -6).
+	 */
+	if (data->chip == yh5151e && reg == PMBUS_VOUT_MODE)
+		return 0x1A;
+
 	rv = set_page(client, page);
 	if (rv < 0)
 		return rv;
@@ -114,6 +124,8 @@ static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg)
 
 static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, int reg)
 {
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct fsp3y_data *data = to_fsp3y_data(info);
 	int rv;
 
 	/*
@@ -144,7 +156,18 @@ static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase,
 	if (rv < 0)
 		return rv;
 
-	return i2c_smbus_read_word_data(client, reg);
+	rv = i2c_smbus_read_word_data(client, reg);
+	if (rv < 0)
+		return rv;
+
+	/*
+	 * YH-5151E is non-compliant and outputs output voltages in linear11
+	 * instead of linear16.
+	 */
+	if (data->chip == yh5151e && reg == PMBUS_READ_VOUT)
+		rv = sign_extend32(rv, 10) & 0xffff;
+
+	return rv;
 }
 
 static struct pmbus_driver_info fsp3y_info[] = {
-- 
GitLab


From 1f4642b72be79757f050924a9b9673b6a02034bc Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Mon, 3 May 2021 00:46:11 -0700
Subject: [PATCH 0264/3804] usb: typec: ucsi: Retrieve all the PDOs instead of
 just the first 4

commit 4dbc6a4ef06d ("usb: typec: ucsi: save power data objects
in PD mode") introduced retrieval of the PDOs when connected to a
PD-capable source. But only the first 4 PDOs are received since
that is the maximum number that can be fetched at a time given the
MESSAGE_IN length limitation (16 bytes). However, as per the PD spec
a connected source may advertise up to a maximum of 7 PDOs.

If such a source is connected it's possible the PPM could have
negotiated a power contract with one of the PDOs at index greater
than 4, and would be reflected in the request data object's (RDO)
object position field. This would result in an out-of-bounds access
when the rdo_index() is used to index into the src_pdos array in
ucsi_psy_get_voltage_now().

With the help of the UBSAN -fsanitize=array-bounds checker enabled
this exact issue is revealed when connecting to a PD source adapter
that advertise 5 PDOs and the PPM enters a contract having selected
the 5th one.

[  151.545106][   T70] Unexpected kernel BRK exception at EL1
[  151.545112][   T70] Internal error: BRK handler: f2005512 [#1] PREEMPT SMP
...
[  151.545499][   T70] pc : ucsi_psy_get_prop+0x208/0x20c
[  151.545507][   T70] lr : power_supply_show_property+0xc0/0x328
...
[  151.545542][   T70] Call trace:
[  151.545544][   T70]  ucsi_psy_get_prop+0x208/0x20c
[  151.545546][   T70]  power_supply_uevent+0x1a4/0x2f0
[  151.545550][   T70]  dev_uevent+0x200/0x384
[  151.545555][   T70]  kobject_uevent_env+0x1d4/0x7e8
[  151.545557][   T70]  power_supply_changed_work+0x174/0x31c
[  151.545562][   T70]  process_one_work+0x244/0x6f0
[  151.545564][   T70]  worker_thread+0x3e0/0xa64

We can resolve this by instead retrieving and storing up to the
maximum of 7 PDOs in the con->src_pdos array. This would involve
two calls to the GET_PDOS command.

Fixes: 992a60ed0d5e ("usb: typec: ucsi: register with power_supply class")
Fixes: 4dbc6a4ef06d ("usb: typec: ucsi: save power data objects in PD mode")
Cc: stable@vger.kernel.org
Reported-and-tested-by: Subbaraman Narayanamurthy <subbaram@codeaurora.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Link: https://lore.kernel.org/r/20210503074611.30973-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi.c | 41 +++++++++++++++++++++++++++--------
 drivers/usb/typec/ucsi/ucsi.h |  6 +++--
 2 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 0e1cec346e0f8..1d8b7df59ff49 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -495,7 +495,8 @@ static void ucsi_unregister_altmodes(struct ucsi_connector *con, u8 recipient)
 	}
 }
 
-static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner)
+static int ucsi_get_pdos(struct ucsi_connector *con, int is_partner,
+			 u32 *pdos, int offset, int num_pdos)
 {
 	struct ucsi *ucsi = con->ucsi;
 	u64 command;
@@ -503,17 +504,39 @@ static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner)
 
 	command = UCSI_COMMAND(UCSI_GET_PDOS) | UCSI_CONNECTOR_NUMBER(con->num);
 	command |= UCSI_GET_PDOS_PARTNER_PDO(is_partner);
-	command |= UCSI_GET_PDOS_NUM_PDOS(UCSI_MAX_PDOS - 1);
+	command |= UCSI_GET_PDOS_PDO_OFFSET(offset);
+	command |= UCSI_GET_PDOS_NUM_PDOS(num_pdos - 1);
 	command |= UCSI_GET_PDOS_SRC_PDOS;
-	ret = ucsi_send_command(ucsi, command, con->src_pdos,
-			       sizeof(con->src_pdos));
-	if (ret < 0) {
+	ret = ucsi_send_command(ucsi, command, pdos + offset,
+				num_pdos * sizeof(u32));
+	if (ret < 0)
 		dev_err(ucsi->dev, "UCSI_GET_PDOS failed (%d)\n", ret);
+	if (ret == 0 && offset == 0)
+		dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n");
+
+	return ret;
+}
+
+static void ucsi_get_src_pdos(struct ucsi_connector *con, int is_partner)
+{
+	int ret;
+
+	/* UCSI max payload means only getting at most 4 PDOs at a time */
+	ret = ucsi_get_pdos(con, 1, con->src_pdos, 0, UCSI_MAX_PDOS);
+	if (ret < 0)
 		return;
-	}
+
 	con->num_pdos = ret / sizeof(u32); /* number of bytes to 32-bit PDOs */
-	if (ret == 0)
-		dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n");
+	if (con->num_pdos < UCSI_MAX_PDOS)
+		return;
+
+	/* get the remaining PDOs, if any */
+	ret = ucsi_get_pdos(con, 1, con->src_pdos, UCSI_MAX_PDOS,
+			    PDO_MAX_OBJECTS - UCSI_MAX_PDOS);
+	if (ret < 0)
+		return;
+
+	con->num_pdos += ret / sizeof(u32);
 }
 
 static void ucsi_pwr_opmode_change(struct ucsi_connector *con)
@@ -522,7 +545,7 @@ static void ucsi_pwr_opmode_change(struct ucsi_connector *con)
 	case UCSI_CONSTAT_PWR_OPMODE_PD:
 		con->rdo = con->status.request_data_obj;
 		typec_set_pwr_opmode(con->port, TYPEC_PWR_MODE_PD);
-		ucsi_get_pdos(con, 1);
+		ucsi_get_src_pdos(con, 1);
 		break;
 	case UCSI_CONSTAT_PWR_OPMODE_TYPEC1_5:
 		con->rdo = 0;
diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h
index 3920e20a9e9ef..cee666790907e 100644
--- a/drivers/usb/typec/ucsi/ucsi.h
+++ b/drivers/usb/typec/ucsi/ucsi.h
@@ -8,6 +8,7 @@
 #include <linux/power_supply.h>
 #include <linux/types.h>
 #include <linux/usb/typec.h>
+#include <linux/usb/pd.h>
 #include <linux/usb/role.h>
 
 /* -------------------------------------------------------------------------- */
@@ -134,7 +135,9 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num);
 
 /* GET_PDOS command bits */
 #define UCSI_GET_PDOS_PARTNER_PDO(_r_)		((u64)(_r_) << 23)
+#define UCSI_GET_PDOS_PDO_OFFSET(_r_)		((u64)(_r_) << 24)
 #define UCSI_GET_PDOS_NUM_PDOS(_r_)		((u64)(_r_) << 32)
+#define UCSI_MAX_PDOS				(4)
 #define UCSI_GET_PDOS_SRC_PDOS			((u64)1 << 34)
 
 /* -------------------------------------------------------------------------- */
@@ -302,7 +305,6 @@ struct ucsi {
 
 #define UCSI_MAX_SVID		5
 #define UCSI_MAX_ALTMODES	(UCSI_MAX_SVID * 6)
-#define UCSI_MAX_PDOS		(4)
 
 #define UCSI_TYPEC_VSAFE5V	5000
 #define UCSI_TYPEC_1_5_CURRENT	1500
@@ -330,7 +332,7 @@ struct ucsi_connector {
 	struct power_supply *psy;
 	struct power_supply_desc psy_desc;
 	u32 rdo;
-	u32 src_pdos[UCSI_MAX_PDOS];
+	u32 src_pdos[PDO_MAX_OBJECTS];
 	int num_pdos;
 
 	struct usb_role_switch *usb_role_sw;
-- 
GitLab


From c34e85fa69b9f4568f19da3af06c3870dd8fcc50 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Fri, 7 May 2021 14:22:59 +0800
Subject: [PATCH 0265/3804] usb: typec: tcpm: Send DISCOVER_IDENTITY from
 dedicated work

In current design, DISCOVER_IDENTITY is queued to VDM state machine
immediately in Ready states and never retries if it fails in the AMS.
Move the process to a delayed work so that when it fails for some
reasons (e.g. Sink Tx No Go), it can be retried by queueing the work
again. Also fix a problem that the vdm_state is not set to a proper
state if it is blocked by Collision Avoidance mechanism.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210507062300.1945009-2-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 85 ++++++++++++++++++++++++++++++-----
 1 file changed, 75 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 68e04e397e924..ae1e84252d38f 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -259,6 +259,7 @@ enum frs_typec_current {
 #define ALTMODE_DISCOVERY_MAX	(SVID_DISCOVERY_MAX * MODE_DISCOVERY_MAX)
 
 #define GET_SINK_CAP_RETRY_MS	100
+#define SEND_DISCOVER_RETRY_MS	100
 
 struct pd_mode_data {
 	int svid_index;		/* current SVID index		*/
@@ -366,6 +367,8 @@ struct tcpm_port {
 	struct kthread_work vdm_state_machine;
 	struct hrtimer enable_frs_timer;
 	struct kthread_work enable_frs;
+	struct hrtimer send_discover_timer;
+	struct kthread_work send_discover_work;
 	bool state_machine_running;
 	bool vdm_sm_running;
 
@@ -1178,6 +1181,16 @@ static void mod_enable_frs_delayed_work(struct tcpm_port *port, unsigned int del
 	}
 }
 
+static void mod_send_discover_delayed_work(struct tcpm_port *port, unsigned int delay_ms)
+{
+	if (delay_ms) {
+		hrtimer_start(&port->send_discover_timer, ms_to_ktime(delay_ms), HRTIMER_MODE_REL);
+	} else {
+		hrtimer_cancel(&port->send_discover_timer);
+		kthread_queue_work(port->wq, &port->send_discover_work);
+	}
+}
+
 static void tcpm_set_state(struct tcpm_port *port, enum tcpm_state state,
 			   unsigned int delay_ms)
 {
@@ -1855,6 +1868,9 @@ static void vdm_run_state_machine(struct tcpm_port *port)
 				res = tcpm_ams_start(port, DISCOVER_IDENTITY);
 				if (res == 0)
 					port->send_discover = false;
+				else if (res == -EAGAIN)
+					mod_send_discover_delayed_work(port,
+								       SEND_DISCOVER_RETRY_MS);
 				break;
 			case CMD_DISCOVER_SVID:
 				res = tcpm_ams_start(port, DISCOVER_SVIDS);
@@ -1880,6 +1896,7 @@ static void vdm_run_state_machine(struct tcpm_port *port)
 			}
 
 			if (res < 0) {
+				port->vdm_state = VDM_STATE_ERR_BUSY;
 				port->vdm_sm_running = false;
 				return;
 			}
@@ -3682,14 +3699,6 @@ static inline enum tcpm_state unattached_state(struct tcpm_port *port)
 	return SNK_UNATTACHED;
 }
 
-static void tcpm_check_send_discover(struct tcpm_port *port)
-{
-	if ((port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20) &&
-	    port->send_discover && port->pd_capable)
-		tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0);
-	port->send_discover = false;
-}
-
 static void tcpm_swap_complete(struct tcpm_port *port, int result)
 {
 	if (port->swap_pending) {
@@ -3926,7 +3935,18 @@ static void run_state_machine(struct tcpm_port *port)
 			break;
 		}
 
-		tcpm_check_send_discover(port);
+		/*
+		 * 6.4.4.3.1 Discover Identity
+		 * "The Discover Identity Command Shall only be sent to SOP when there is an
+		 * Explicit Contract."
+		 * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using
+		 * port->explicit_contract to decide whether to send the command.
+		 */
+		if (port->explicit_contract)
+			mod_send_discover_delayed_work(port, 0);
+		else
+			port->send_discover = false;
+
 		/*
 		 * 6.3.5
 		 * Sending ping messages is not necessary if
@@ -4194,7 +4214,18 @@ static void run_state_machine(struct tcpm_port *port)
 			break;
 		}
 
-		tcpm_check_send_discover(port);
+		/*
+		 * 6.4.4.3.1 Discover Identity
+		 * "The Discover Identity Command Shall only be sent to SOP when there is an
+		 * Explicit Contract."
+		 * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using
+		 * port->explicit_contract.
+		 */
+		if (port->explicit_contract)
+			mod_send_discover_delayed_work(port, 0);
+		else
+			port->send_discover = false;
+
 		power_supply_changed(port->psy);
 		break;
 
@@ -5288,6 +5319,29 @@ unlock:
 	mutex_unlock(&port->lock);
 }
 
+static void tcpm_send_discover_work(struct kthread_work *work)
+{
+	struct tcpm_port *port = container_of(work, struct tcpm_port, send_discover_work);
+
+	mutex_lock(&port->lock);
+	/* No need to send DISCOVER_IDENTITY anymore */
+	if (!port->send_discover)
+		goto unlock;
+
+	/* Retry if the port is not idle */
+	if ((port->state != SRC_READY && port->state != SNK_READY) || port->vdm_sm_running) {
+		mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS);
+		goto unlock;
+	}
+
+	/* Only send the Message if the port is host for PD rev2.0 */
+	if (port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20)
+		tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0);
+
+unlock:
+	mutex_unlock(&port->lock);
+}
+
 static int tcpm_dr_set(struct typec_port *p, enum typec_data_role data)
 {
 	struct tcpm_port *port = typec_get_drvdata(p);
@@ -6093,6 +6147,14 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer)
+{
+	struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer);
+
+	kthread_queue_work(port->wq, &port->send_discover_work);
+	return HRTIMER_NORESTART;
+}
+
 struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 {
 	struct tcpm_port *port;
@@ -6123,12 +6185,15 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
 	kthread_init_work(&port->vdm_state_machine, vdm_state_machine_work);
 	kthread_init_work(&port->event_work, tcpm_pd_event_handler);
 	kthread_init_work(&port->enable_frs, tcpm_enable_frs_work);
+	kthread_init_work(&port->send_discover_work, tcpm_send_discover_work);
 	hrtimer_init(&port->state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	port->state_machine_timer.function = state_machine_timer_handler;
 	hrtimer_init(&port->vdm_state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	port->vdm_state_machine_timer.function = vdm_state_machine_timer_handler;
 	hrtimer_init(&port->enable_frs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	port->enable_frs_timer.function = enable_frs_timer_handler;
+	hrtimer_init(&port->send_discover_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	port->send_discover_timer.function = send_discover_timer_handler;
 
 	spin_lock_init(&port->pd_event_lock);
 
-- 
GitLab


From f1fbd950b59b67bc5c202216c8e1c6ca8c99a3b4 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Fri, 7 May 2021 14:23:00 +0800
Subject: [PATCH 0266/3804] usb: typec: tcpm: Fix wrong handling for
 Not_Supported in VDM AMS

Not_Supported Message is acceptable in VDM AMS. Redirect the VDM state
machine to VDM_STATE_DONE when receiving Not_Supported and finish the
VDM AMS.

Also, after the loop in vdm_state_machine_work, add more conditions of
VDM states to clear the vdm_sm_running flag because those are all
stopping states when leaving the loop.

In addition, finish the VDM AMS if the port partner responds BUSY.

Fixes: 8dea75e11380 ("usb: typec: tcpm: Protocol Error handling")
Fixes: 8d3a0578ad1a ("usb: typec: tcpm: Respond Wait if VDM state machine is running")
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210507062300.1945009-3-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index ae1e84252d38f..db567e6fde924 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -1897,7 +1897,6 @@ static void vdm_run_state_machine(struct tcpm_port *port)
 
 			if (res < 0) {
 				port->vdm_state = VDM_STATE_ERR_BUSY;
-				port->vdm_sm_running = false;
 				return;
 			}
 		}
@@ -1913,6 +1912,7 @@ static void vdm_run_state_machine(struct tcpm_port *port)
 		port->vdo_data[0] = port->vdo_retry;
 		port->vdo_count = 1;
 		port->vdm_state = VDM_STATE_READY;
+		tcpm_ams_finish(port);
 		break;
 	case VDM_STATE_BUSY:
 		port->vdm_state = VDM_STATE_ERR_TMOUT;
@@ -1978,7 +1978,7 @@ static void vdm_state_machine_work(struct kthread_work *work)
 		 port->vdm_state != VDM_STATE_BUSY &&
 		 port->vdm_state != VDM_STATE_SEND_MESSAGE);
 
-	if (port->vdm_state == VDM_STATE_ERR_TMOUT)
+	if (port->vdm_state < VDM_STATE_READY)
 		port->vdm_sm_running = false;
 
 	mutex_unlock(&port->lock);
@@ -2569,6 +2569,16 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 			port->sink_cap_done = true;
 			tcpm_set_state(port, ready_state(port), 0);
 			break;
+		case SRC_READY:
+		case SNK_READY:
+			if (port->vdm_state > VDM_STATE_READY) {
+				port->vdm_state = VDM_STATE_DONE;
+				if (tcpm_vdm_ams(port))
+					tcpm_ams_finish(port);
+				mod_vdm_delayed_work(port, 0);
+				break;
+			}
+			fallthrough;
 		default:
 			tcpm_pd_handle_state(port,
 					     port->pwr_role == TYPEC_SOURCE ?
-- 
GitLab


From d9ff1096a840dddea3d5cfa2149ff7da9f499fb2 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 4 May 2021 22:26:29 +0200
Subject: [PATCH 0267/3804] usb: musb: Fix an error message

'ret' is known to be 0 here.
Initialize 'ret' with the expected error code before using it.

Fixes: 0990366bab3c ("usb: musb: Add support for MediaTek musb controller")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/69f514dc7134e3c917cad208e73cc650cb9e2bd6.1620159879.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/mediatek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/musb/mediatek.c b/drivers/usb/musb/mediatek.c
index eebeadd269461..6b92d037d8fc8 100644
--- a/drivers/usb/musb/mediatek.c
+++ b/drivers/usb/musb/mediatek.c
@@ -518,8 +518,8 @@ static int mtk_musb_probe(struct platform_device *pdev)
 
 	glue->xceiv = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
 	if (IS_ERR(glue->xceiv)) {
-		dev_err(dev, "fail to getting usb-phy %d\n", ret);
 		ret = PTR_ERR(glue->xceiv);
+		dev_err(dev, "fail to getting usb-phy %d\n", ret);
 		goto err_unregister_usb_phy;
 	}
 
-- 
GitLab


From 1c4841ccbd2b185587010d6178aac11953f61d4c Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 26 Apr 2021 16:09:19 -0700
Subject: [PATCH 0268/3804] dmaengine: idxd: add engine 'struct device' missing
 bus type assignment

engine 'struct device' setup is missing assigning the bus type. Add it to
dsa_bus_type.

Fixes: 75b911309060 ("dmaengine: idxd: fix engine conf_dev lifetime")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/161947841562.984844.17505646725993659651.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 2a926bef87f2a..ec7305f86bf74 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -242,6 +242,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
 		engine->idxd = idxd;
 		device_initialize(&engine->conf_dev);
 		engine->conf_dev.parent = &idxd->conf_dev;
+		engine->conf_dev.bus = &dsa_bus_type;
 		engine->conf_dev.type = &idxd_engine_device_type;
 		rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
 		if (rc < 0) {
-- 
GitLab


From 077cdb355b3d8ee0f258856962e6dac06e744401 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Mon, 26 Apr 2021 16:32:24 -0700
Subject: [PATCH 0269/3804] dmaengine: idxd: add missing dsa driver unregister

The idxd_unregister_driver() has never been called for the idxd driver upon
removal. Add fix to call unregister driver on module removal.

Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/161947994449.1053102.13189942817915448216.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index ec7305f86bf74..6201f52f13f5d 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -788,6 +788,7 @@ module_init(idxd_init_module);
 
 static void __exit idxd_exit_module(void)
 {
+	idxd_unregister_driver();
 	pci_unregister_driver(&idxd_pci_driver);
 	idxd_cdev_remove();
 	idxd_unregister_bus_type();
-- 
GitLab


From 28ec344bb8911bb0d4910456b22ba0dd4f662521 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 5 May 2021 17:44:22 -0700
Subject: [PATCH 0270/3804] usb: typec: tcpm: Don't block probing of consumers
 of "connector" nodes

fw_devlink expects DT device nodes with "compatible" property to have
struct devices created for them. Since the connector node might not be
populated as a device, mark it as such so that fw_devlink knows not to
wait on this fwnode being populated as a struct device.

Without this patch, USB functionality can be broken on some boards.

Fixes: f7514a663016 ("of: property: fw_devlink: Add support for remote-endpoint")
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20210506004423.345199-1-saravanak@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c           | 3 ++-
 drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++
 include/linux/fwnode.h        | 1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4a8bf8cda52bc..628e33939acae 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -150,7 +150,7 @@ void fwnode_links_purge(struct fwnode_handle *fwnode)
 	fwnode_links_purge_consumers(fwnode);
 }
 
-static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
+void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
 {
 	struct fwnode_handle *child;
 
@@ -164,6 +164,7 @@ static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
 	fwnode_for_each_available_child_node(fwnode, child)
 		fw_devlink_purge_absent_suppliers(child);
 }
+EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers);
 
 #ifdef CONFIG_SRCU
 static DEFINE_MUTEX(device_links_lock);
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index c4fdc00a3bc8f..bffa342d4e386 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5754,6 +5754,15 @@ static int tcpm_fw_get_caps(struct tcpm_port *port,
 	if (!fwnode)
 		return -EINVAL;
 
+	/*
+	 * This fwnode has a "compatible" property, but is never populated as a
+	 * struct device. Instead we simply parse it to read the properties.
+	 * This it breaks fw_devlink=on. To maintain backward compatibility
+	 * with existing DT files, we work around this by deleting any
+	 * fwnode_links to/from this fwnode.
+	 */
+	fw_devlink_purge_absent_suppliers(fwnode);
+
 	/* USB data support is optional */
 	ret = fwnode_property_read_string(fwnode, "data-role", &cap_str);
 	if (ret == 0) {
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index ed4e67a7ff1c4..59828516ebaf1 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -187,5 +187,6 @@ extern u32 fw_devlink_get_flags(void);
 extern bool fw_devlink_is_strict(void);
 int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup);
 void fwnode_links_purge(struct fwnode_handle *fwnode);
+void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode);
 
 #endif
-- 
GitLab


From b577750e4157050ed6de5ca9083893027b8ece33 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 3 May 2021 12:06:03 +0200
Subject: [PATCH 0271/3804] MAINTAINERS: Add Matthew Bobrowski as a reviewer

Matthew helps with fanotify already for some time and he'd like to do
more so let's add him as a reviewer.

CC: Matthew Bobrowski <repnop@google.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..e15e155ff10eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6938,6 +6938,7 @@ F:	net/core/failover.c
 FANOTIFY
 M:	Jan Kara <jack@suse.cz>
 R:	Amir Goldstein <amir73il@gmail.com>
+R:	Matthew Bobrowski <repnop@google.com>
 L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
 F:	fs/notify/fanotify/
-- 
GitLab


From 8c721cb0f742f9a01f2f1985b274b544f89904f4 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 28 Apr 2021 10:44:19 +0200
Subject: [PATCH 0272/3804] quota: Use 'hlist_for_each_entry' to simplify code

Use 'hlist_for_each_entry' instead of hand writing it.
This saves a few lines of code.

Link: https://lore.kernel.org/r/f82d3e33964dcbd2aac19866735e0a8381c8a735.1619599407.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 4f13734637660..22d904bde6ab9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -288,14 +288,12 @@ static inline void remove_dquot_hash(struct dquot *dquot)
 static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
 				struct kqid qid)
 {
-	struct hlist_node *node;
 	struct dquot *dquot;
 
-	hlist_for_each (node, dquot_hash+hashent) {
-		dquot = hlist_entry(node, struct dquot, dq_hash);
+	hlist_for_each_entry(dquot, dquot_hash+hashent, dq_hash)
 		if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid))
 			return dquot;
-	}
+
 	return NULL;
 }
 
-- 
GitLab


From 8370e5b093080c03cf89f7ebf0bef6984545429e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Mon, 10 May 2021 13:01:36 +0300
Subject: [PATCH 0273/3804] hwmon: (ltc2992) Put fwnode in error case during
 ->probe()

In each iteration fwnode_for_each_available_child_node() bumps a reference
counting of a loop variable followed by dropping in on a next iteration,

Since in error case the loop is broken, we have to drop a reference count
by ourselves. Do it for port_fwnode in error case during ->probe().

Fixes: b0bd407e94b0 ("hwmon: (ltc2992) Add support")
Cc: Alexandru Tachici <alexandru.tachici@analog.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210510100136.3303142-1-andy.shevchenko@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ltc2992.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c
index 4382105bf1420..2a4bed0ab226b 100644
--- a/drivers/hwmon/ltc2992.c
+++ b/drivers/hwmon/ltc2992.c
@@ -900,11 +900,15 @@ static int ltc2992_parse_dt(struct ltc2992_state *st)
 
 	fwnode_for_each_available_child_node(fwnode, child) {
 		ret = fwnode_property_read_u32(child, "reg", &addr);
-		if (ret < 0)
+		if (ret < 0) {
+			fwnode_handle_put(child);
 			return ret;
+		}
 
-		if (addr > 1)
+		if (addr > 1) {
+			fwnode_handle_put(child);
 			return -EINVAL;
+		}
 
 		ret = fwnode_property_read_u32(child, "shunt-resistor-micro-ohms", &val);
 		if (!ret)
-- 
GitLab


From e84749a78dc82bc545f12ce009e3dbcc2c5a8a91 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 10 May 2021 17:06:59 +0200
Subject: [PATCH 0274/3804] ALSA: usb-audio: Validate MS endpoint descriptors

snd_usbmidi_get_ms_info() may access beyond the border when a
malformed descriptor is passed.  This patch adds the sanity checks of
the given MS endpoint descriptors, and skips invalid ones.

Reported-by: syzbot+6bb23a5d5548b93c94aa@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210510150659.17710-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/midi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index a10ac75969a8f..649eb8d1ab7dd 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1956,8 +1956,12 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi *umidi,
 		ms_ep = find_usb_ms_endpoint_descriptor(hostep);
 		if (!ms_ep)
 			continue;
+		if (ms_ep->bLength <= sizeof(*ms_ep))
+			continue;
 		if (ms_ep->bNumEmbMIDIJack > 0x10)
 			continue;
+		if (ms_ep->bLength < sizeof(*ms_ep) + ms_ep->bNumEmbMIDIJack)
+			continue;
 		if (usb_endpoint_dir_out(ep)) {
 			if (endpoints[epidx].out_ep) {
 				if (++epidx >= MIDI_MAX_ENDPOINTS) {
-- 
GitLab


From 7ee06ddc4038f936b0d4459d37a7d4d844fb03db Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 7 May 2021 11:38:10 -0400
Subject: [PATCH 0275/3804] dm snapshot: fix a crash when an origin has no
 snapshots

If an origin target has no snapshots, o->split_boundary is set to 0.
This causes BUG_ON(sectors <= 0) in block/bio.c:bio_split().

Fix this by initializing chunk_size, and in turn split_boundary, to
rounddown_pow_of_two(UINT_MAX) -- the largest power of two that fits
into "unsigned" type.

Reported-by: Michael Tokarev <mjt@tls.msk.ru>
Tested-by: Michael Tokarev <mjt@tls.msk.ru>
Cc: stable@vger.kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index a2acb014c13ae..2a51ddd840b41 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -855,12 +855,11 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 static uint32_t __minimum_chunk_size(struct origin *o)
 {
 	struct dm_snapshot *snap;
-	unsigned chunk_size = 0;
+	unsigned chunk_size = rounddown_pow_of_two(UINT_MAX);
 
 	if (o)
 		list_for_each_entry(snap, &o->snapshots, list)
-			chunk_size = min_not_zero(chunk_size,
-						  snap->store->chunk_size);
+			chunk_size = min(chunk_size, snap->store->chunk_size);
 
 	return (uint32_t) chunk_size;
 }
-- 
GitLab


From 63c8af5687f6b1b70e9458cac1ffb25e86db1695 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 10 May 2021 08:48:06 +0900
Subject: [PATCH 0276/3804] block: uapi: fix comment about block device ioctl

Fix the comment mentioning ioctl command range used for zoned block
devices to reflect the range of commands actually implemented.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Link: https://lore.kernel.org/r/20210509234806.3000-1-damien.lemoal@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index f44eb0a04afdd..4c32e97dcdf00 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -185,7 +185,7 @@ struct fsxattr {
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
 /*
- * A jump here: 130-131 are reserved for zoned block devices
+ * A jump here: 130-136 are reserved for zoned block devices
  * (see uapi/linux/blkzoned.h)
  */
 
-- 
GitLab


From 17866bc6b2ae1c3075c9fe7bcbeb8ea50eb4c3fc Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 11:00:56 +0800
Subject: [PATCH 0277/3804] dmaengine: fsl-dpaa2-qdma: Fix error return code in
 two functions

Fix to return a negative error code from the error handling case instead
of 0, as done elsewhere in the function where it is.

Fixes: 7fdf9b05c73b ("dmaengine: fsl-dpaa2-qdma: Add NXP dpaa2 qDMA controller driver for Layerscape SoCs")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20210508030056.2027-1-thunder.leizhen@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
index 4ec909e0b8106..4ae057922ef1f 100644
--- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
+++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
@@ -332,6 +332,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
 	}
 
 	if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) {
+		err = -EINVAL;
 		dev_err(dev, "DPDMAI major version mismatch\n"
 			     "Found %u.%u, supported version is %u.%u\n",
 				priv->dpdmai_attr.version.major,
@@ -341,6 +342,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev)
 	}
 
 	if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) {
+		err = -EINVAL;
 		dev_err(dev, "DPDMAI minor version mismatch\n"
 			     "Found %u.%u, supported version is %u.%u\n",
 				priv->dpdmai_attr.version.major,
@@ -475,6 +477,7 @@ static int __cold dpaa2_qdma_dpio_setup(struct dpaa2_qdma_priv *priv)
 		ppriv->store =
 			dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev);
 		if (!ppriv->store) {
+			err = -ENOMEM;
 			dev_err(dev, "dpaa2_io_store_create() failed\n");
 			goto err_store;
 		}
-- 
GitLab


From 4ad5dd2d7876d79507a20f026507d1a93b8fff10 Mon Sep 17 00:00:00 2001
From: Bumyong Lee <bumyong.lee@samsung.com>
Date: Fri, 7 May 2021 15:36:47 +0900
Subject: [PATCH 0278/3804] dmaengine: pl330: fix wrong usage of spinlock flags
 in dma_cyclc

flags varible which is the input parameter of pl330_prep_dma_cyclic()
should not be used by spinlock_irq[save/restore] function.

Signed-off-by: Jongho Park <jongho7.park@samsung.com>
Signed-off-by: Bumyong Lee <bumyong.lee@samsung.com>
Signed-off-by: Chanho Park <chanho61.park@samsung.com>
Link: https://lore.kernel.org/r/20210507063647.111209-1-chanho61.park@samsung.com
Fixes: f6f2421c0a1c ("dmaengine: pl330: Merge dma_pl330_dmac and pl330_dmac structs")
Cc: stable@vger.kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/pl330.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index fd8d2bc3be9f5..110de8a600588 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2694,13 +2694,15 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 	for (i = 0; i < len / period_len; i++) {
 		desc = pl330_get_desc(pch);
 		if (!desc) {
+			unsigned long iflags;
+
 			dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
 				__func__, __LINE__);
 
 			if (!first)
 				return NULL;
 
-			spin_lock_irqsave(&pl330->pool_lock, flags);
+			spin_lock_irqsave(&pl330->pool_lock, iflags);
 
 			while (!list_empty(&first->node)) {
 				desc = list_entry(first->node.next,
@@ -2710,7 +2712,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
 
 			list_move_tail(&first->node, &pl330->desc_pool);
 
-			spin_unlock_irqrestore(&pl330->pool_lock, flags);
+			spin_unlock_irqrestore(&pl330->pool_lock, iflags);
 
 			return NULL;
 		}
-- 
GitLab


From 538ea65a9fd1194352a41313bff876b74b5d90c5 Mon Sep 17 00:00:00 2001
From: Quanyang Wang <quanyang.wang@windriver.com>
Date: Fri, 30 Apr 2021 14:40:41 +0800
Subject: [PATCH 0279/3804] dmaengine: xilinx: dpdma: initialize registers
 before request_irq

In some scenarios (kdump), dpdma hardware irqs has been enabled when
calling request_irq in probe function, and then the dpdma irq handler
xilinx_dpdma_irq_handler is invoked to access xdev->chan[i]. But at
this moment xdev->chan[i] hasn't been initialized.

We should ensure the dpdma controller to be in a consistent and
clean state before further initialization. So add dpdma_hw_init()
to do this.

Furthermore, in xilinx_dpdma_disable_irq, disable all interrupts
instead of error interrupts.

This patch is to fix the kdump kernel crash as below:

[    3.696128] Unable to handle kernel NULL pointer dereference at virtual address 000000000000012c
[    3.696710] xilinx-zynqmp-dpdma fd4c0000.dma-controller: Xilinx DPDMA engine is probed
[    3.704900] Mem abort info:
[    3.704902]   ESR = 0x96000005
[    3.704905]   EC = 0x25: DABT (current EL), IL = 32 bits
[    3.704907]   SET = 0, FnV = 0
[    3.704912]   EA = 0, S1PTW = 0
[    3.713800] ahci-ceva fd0c0000.ahci: supply ahci not found, using dummy regulator
[    3.715585] Data abort info:
[    3.715587]   ISV = 0, ISS = 0x00000005
[    3.715589]   CM = 0, WnR = 0
[    3.715592] [000000000000012c] user address but active_mm is swapper
[    3.715596] Internal error: Oops: 96000005 [#1] SMP
[    3.715599] Modules linked in:
[    3.715608] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.0-12170-g60894882155f-dirty #77
[    3.723937] Hardware name: ZynqMP ZCU102 Rev1.0 (DT)
[    3.723942] pstate: 80000085 (Nzcv daIf -PAN -UAO -TCO BTYPE=--)
[    3.723956] pc : xilinx_dpdma_irq_handler+0x418/0x560
[    3.793049] lr : xilinx_dpdma_irq_handler+0x3d8/0x560
[    3.798089] sp : ffffffc01186bdf0
[    3.801388] x29: ffffffc01186bdf0 x28: ffffffc011836f28
[    3.806692] x27: ffffff8023e0ac80 x26: 0000000000000080
[    3.811996] x25: 0000000008000408 x24: 0000000000000003
[    3.817300] x23: ffffffc01186be70 x22: ffffffc011291740
[    3.822604] x21: 0000000000000000 x20: 0000000008000408
[    3.827908] x19: 0000000000000000 x18: 0000000000000010
[    3.833212] x17: 0000000000000000 x16: 0000000000000000
[    3.838516] x15: 0000000000000000 x14: ffffffc011291740
[    3.843820] x13: ffffffc02eb4d000 x12: 0000000034d4d91d
[    3.849124] x11: 0000000000000040 x10: ffffffc0112d2d48
[    3.854428] x9 : ffffffc0112d2d40 x8 : ffffff8021c00268
[    3.859732] x7 : 0000000000000000 x6 : ffffffc011836000
[    3.865036] x5 : 0000000000000003 x4 : 0000000000000000
[    3.870340] x3 : 0000000000000001 x2 : 0000000000000000
[    3.875644] x1 : 0000000000000000 x0 : 000000000000012c
[    3.880948] Call trace:
[    3.883382]  xilinx_dpdma_irq_handler+0x418/0x560
[    3.888079]  __handle_irq_event_percpu+0x5c/0x178
[    3.892774]  handle_irq_event_percpu+0x34/0x98
[    3.897210]  handle_irq_event+0x44/0xb8
[    3.901030]  handle_fasteoi_irq+0xd0/0x190
[    3.905117]  generic_handle_irq+0x30/0x48
[    3.909111]  __handle_domain_irq+0x64/0xc0
[    3.913192]  gic_handle_irq+0x78/0xa0
[    3.916846]  el1_irq+0xc4/0x180
[    3.919982]  cpuidle_enter_state+0x134/0x2f8
[    3.924243]  cpuidle_enter+0x38/0x50
[    3.927810]  call_cpuidle+0x1c/0x40
[    3.931290]  do_idle+0x20c/0x270
[    3.934502]  cpu_startup_entry+0x28/0x58
[    3.938410]  rest_init+0xbc/0xcc
[    3.941631]  arch_call_rest_init+0x10/0x1c
[    3.945718]  start_kernel+0x51c/0x558

Fixes: 7cbb0c63de3f ("dmaengine: xilinx: dpdma: Add the Xilinx DisplayPort DMA engine driver")
Signed-off-by: Quanyang Wang <quanyang.wang@windriver.com>
Link: https://lore.kernel.org/r/20210430064041.4058180-1-quanyang.wang@windriver.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dpdma.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
index 70b29bd079c9f..ff7dfb3fdeb47 100644
--- a/drivers/dma/xilinx/xilinx_dpdma.c
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -1459,7 +1459,7 @@ static void xilinx_dpdma_enable_irq(struct xilinx_dpdma_device *xdev)
  */
 static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev)
 {
-	dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ERR_ALL);
+	dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL);
 	dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL);
 }
 
@@ -1596,6 +1596,26 @@ static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
 	return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan);
 }
 
+static void dpdma_hw_init(struct xilinx_dpdma_device *xdev)
+{
+	unsigned int i;
+	void __iomem *reg;
+
+	/* Disable all interrupts */
+	xilinx_dpdma_disable_irq(xdev);
+
+	/* Stop all channels */
+	for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) {
+		reg = xdev->reg + XILINX_DPDMA_CH_BASE
+				+ XILINX_DPDMA_CH_OFFSET * i;
+		dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE);
+	}
+
+	/* Clear the interrupt status registers */
+	dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL);
+	dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL);
+}
+
 static int xilinx_dpdma_probe(struct platform_device *pdev)
 {
 	struct xilinx_dpdma_device *xdev;
@@ -1622,6 +1642,8 @@ static int xilinx_dpdma_probe(struct platform_device *pdev)
 	if (IS_ERR(xdev->reg))
 		return PTR_ERR(xdev->reg);
 
+	dpdma_hw_init(xdev);
+
 	xdev->irq = platform_get_irq(pdev, 0);
 	if (xdev->irq < 0) {
 		dev_err(xdev->dev, "failed to get platform irq\n");
-- 
GitLab


From 56a8d3fd1f342d10ee7b27e9ac0f4d00b5fbb91c Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:34 +0200
Subject: [PATCH 0280/3804] mtd: rawnand: cs553x: Fix external use of SW
 Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-2-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/cs553x_nand.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 6edf78c16fc8b..df40927e56788 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/iopoll.h>
@@ -240,6 +241,15 @@ static int cs_calculate_ecc(struct nand_chip *this, const u_char *dat,
 	return 0;
 }
 
+static int cs553x_ecc_correct(struct nand_chip *chip,
+			      unsigned char *buf,
+			      unsigned char *read_ecc,
+			      unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 static struct cs553x_nand_controller *controllers[4];
 
 static int cs553x_attach_chip(struct nand_chip *chip)
@@ -251,7 +261,7 @@ static int cs553x_attach_chip(struct nand_chip *chip)
 	chip->ecc.bytes = 3;
 	chip->ecc.hwctl  = cs_enable_hwecc;
 	chip->ecc.calculate = cs_calculate_ecc;
-	chip->ecc.correct  = rawnand_sw_hamming_correct;
+	chip->ecc.correct  = cs553x_ecc_correct;
 	chip->ecc.strength = 1;
 
 	return 0;
-- 
GitLab


From ad9ffdce453934cdc22fac0a0268119bd630260f Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:35 +0200
Subject: [PATCH 0281/3804] mtd: rawnand: fsmc: Fix external use of SW Hamming
 ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-3-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/fsmc_nand.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index bf695255b43a2..a3e66155ae405 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -25,6 +25,7 @@
 #include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
@@ -432,6 +433,15 @@ static int fsmc_read_hwecc_ecc1(struct nand_chip *chip, const u8 *data,
 	return 0;
 }
 
+static int fsmc_correct_ecc1(struct nand_chip *chip,
+			     unsigned char *buf,
+			     unsigned char *read_ecc,
+			     unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 /* Count the number of 0's in buff upto a max of max_bits */
 static int count_written_bits(u8 *buff, int size, int max_bits)
 {
@@ -917,7 +927,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand)
 	case NAND_ECC_ENGINE_TYPE_ON_HOST:
 		dev_info(host->dev, "Using 1-bit HW ECC scheme\n");
 		nand->ecc.calculate = fsmc_read_hwecc_ecc1;
-		nand->ecc.correct = rawnand_sw_hamming_correct;
+		nand->ecc.correct = fsmc_correct_ecc1;
 		nand->ecc.hwctl = fsmc_enable_hwecc;
 		nand->ecc.bytes = 3;
 		nand->ecc.strength = 1;
-- 
GitLab


From c4b7d7c480d607e4f52d310d9d16b194868d0917 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:36 +0200
Subject: [PATCH 0282/3804] mtd: rawnand: lpc32xx_slc: Fix external use of SW
 Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Cc: Vladimir Zapolskiy <vz@mleia.com>
Reported-by: Trevor Woerner <twoerner@gmail.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Tested-by: Trevor Woerner <twoerner@gmail.com>
Acked-by: Vladimir Zapolskiy <vz@mleia.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-4-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/lpc32xx_slc.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 6b7269cfb7d83..d7dfc6fd85ca7 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -27,6 +27,7 @@
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/mtd/lpc32xx_slc.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 
 #define LPC32XX_MODNAME		"lpc32xx-nand"
 
@@ -344,6 +345,18 @@ static int lpc32xx_nand_ecc_calculate(struct nand_chip *chip,
 	return 0;
 }
 
+/*
+ * Corrects the data
+ */
+static int lpc32xx_nand_ecc_correct(struct nand_chip *chip,
+				    unsigned char *buf,
+				    unsigned char *read_ecc,
+				    unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 /*
  * Read a single byte from NAND device
  */
@@ -802,7 +815,7 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip)
 	chip->ecc.write_oob = lpc32xx_nand_write_oob_syndrome;
 	chip->ecc.read_oob = lpc32xx_nand_read_oob_syndrome;
 	chip->ecc.calculate = lpc32xx_nand_ecc_calculate;
-	chip->ecc.correct = rawnand_sw_hamming_correct;
+	chip->ecc.correct = lpc32xx_nand_ecc_correct;
 	chip->ecc.hwctl = lpc32xx_nand_ecc_enable;
 
 	/*
-- 
GitLab


From 3e09c0252501829b14b10f14e1982aaab77d0b80 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:37 +0200
Subject: [PATCH 0283/3804] mtd: rawnand: ndfc: Fix external use of SW Hamming
 ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-5-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/ndfc.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 338d6b1a189eb..98d5a94c3a242 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -22,6 +22,7 @@
 #include <linux/mtd/ndfc.h>
 #include <linux/slab.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <asm/io.h>
@@ -100,6 +101,15 @@ static int ndfc_calculate_ecc(struct nand_chip *chip,
 	return 0;
 }
 
+static int ndfc_correct_ecc(struct nand_chip *chip,
+			    unsigned char *buf,
+			    unsigned char *read_ecc,
+			    unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 /*
  * Speedups for buffer read/write/verify
  *
@@ -145,7 +155,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
 	chip->controller = &ndfc->ndfc_control;
 	chip->legacy.read_buf = ndfc_read_buf;
 	chip->legacy.write_buf = ndfc_write_buf;
-	chip->ecc.correct = rawnand_sw_hamming_correct;
+	chip->ecc.correct = ndfc_correct_ecc;
 	chip->ecc.hwctl = ndfc_enable_hwecc;
 	chip->ecc.calculate = ndfc_calculate_ecc;
 	chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST;
-- 
GitLab


From 46fcb57e6b7283533ebf8ba17a6bd30fa88bdc9f Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:38 +0200
Subject: [PATCH 0284/3804] mtd: rawnand: sharpsl: Fix external use of SW
 Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-6-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/sharpsl.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 5612ee628425b..2f1fe464e6637 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/sharpsl.h>
@@ -96,6 +97,15 @@ static int sharpsl_nand_calculate_ecc(struct nand_chip *chip,
 	return readb(sharpsl->io + ECCCNTR) != 0;
 }
 
+static int sharpsl_nand_correct_ecc(struct nand_chip *chip,
+				    unsigned char *buf,
+				    unsigned char *read_ecc,
+				    unsigned char *calc_ecc)
+{
+	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				      chip->ecc.size, false);
+}
+
 static int sharpsl_attach_chip(struct nand_chip *chip)
 {
 	if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST)
@@ -106,7 +116,7 @@ static int sharpsl_attach_chip(struct nand_chip *chip)
 	chip->ecc.strength = 1;
 	chip->ecc.hwctl = sharpsl_nand_enable_hwecc;
 	chip->ecc.calculate = sharpsl_nand_calculate_ecc;
-	chip->ecc.correct = rawnand_sw_hamming_correct;
+	chip->ecc.correct = sharpsl_nand_correct_ecc;
 
 	return 0;
 }
-- 
GitLab


From 6a4c5ada577467a5f79e06f2c5e69c09983c22fb Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:39 +0200
Subject: [PATCH 0285/3804] mtd: rawnand: tmio: Fix external use of SW Hamming
 ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-7-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/tmio_nand.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index de8e919d0ebe6..6d93dd31969b2 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -34,6 +34,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/slab.h>
@@ -292,11 +293,12 @@ static int tmio_nand_correct_data(struct nand_chip *chip, unsigned char *buf,
 	int r0, r1;
 
 	/* assume ecc.size = 512 and ecc.bytes = 6 */
-	r0 = rawnand_sw_hamming_correct(chip, buf, read_ecc, calc_ecc);
+	r0 = ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+				    chip->ecc.size, false);
 	if (r0 < 0)
 		return r0;
-	r1 = rawnand_sw_hamming_correct(chip, buf + 256, read_ecc + 3,
-					calc_ecc + 3);
+	r1 = ecc_sw_hamming_correct(buf + 256, read_ecc + 3, calc_ecc + 3,
+				    chip->ecc.size, false);
 	if (r1 < 0)
 		return r1;
 	return r0 + r1;
-- 
GitLab


From 3d227a0b0ce319edbff6fd0d8af4d66689e477cc Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Tue, 13 Apr 2021 18:18:40 +0200
Subject: [PATCH 0286/3804] mtd: rawnand: txx9ndfmc: Fix external use of SW
 Hamming ECC helper

Since the Hamming software ECC engine has been updated to become a
proper and independent ECC engine, it is now mandatory to either
initialize the engine before using any one of his functions or use one
of the bare helpers which only perform the calculations. As there is no
actual need for a proper ECC initialization, let's just use the bare
helper instead of the rawnand one.

Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions")
Cc: stable@vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-8-miquel.raynal@bootlin.com
---
 drivers/mtd/nand/raw/txx9ndfmc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 1a9449e53bf9d..b8894ac27073c 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -13,6 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
+#include <linux/mtd/nand-ecc-sw-hamming.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/io.h>
@@ -193,8 +194,8 @@ static int txx9ndfmc_correct_data(struct nand_chip *chip, unsigned char *buf,
 	int stat;
 
 	for (eccsize = chip->ecc.size; eccsize > 0; eccsize -= 256) {
-		stat = rawnand_sw_hamming_correct(chip, buf, read_ecc,
-						  calc_ecc);
+		stat = ecc_sw_hamming_correct(buf, read_ecc, calc_ecc,
+					      chip->ecc.size, false);
 		if (stat < 0)
 			return stat;
 		corrected += stat;
-- 
GitLab


From 562b4e91d3b221f737f84ff78ee7d348c8a6891f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Sat, 8 May 2021 19:32:14 +0200
Subject: [PATCH 0287/3804] mtd: parsers: ofpart: fix parsing subpartitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ofpart was recently patched to not scan random partition nodes as
subpartitions. That change unfortunately broke scanning valid
subpartitions like:

partitions {
	compatible = "fixed-partitions";
	#address-cells = <1>;
	#size-cells = <1>;

	partition@0 {
		compatible = "fixed-partitions";
		label = "bootloader";
		reg = <0x0 0x100000>;

		partition@0 {
			label = "config";
			reg = <0x80000 0x80000>;
		};
	};
};

Fix that regression by adding 1 more code path. We actually need 3
conditional blocks to support 3 possible cases. This change also makes
code easier to understand & follow.

Reported-by: David Bauer <mail@david-bauer.net>
Fixes: 2d751203aacf ("mtd: parsers: ofpart: limit parsing of deprecated DT syntax
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Tested-by: Andrew Cameron <apcameron@softhome.net>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210508173214.28365-1-zajec5@gmail.com
---
 drivers/mtd/parsers/ofpart_core.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/drivers/mtd/parsers/ofpart_core.c b/drivers/mtd/parsers/ofpart_core.c
index 0fd8d2a0db973..192190c42fc84 100644
--- a/drivers/mtd/parsers/ofpart_core.c
+++ b/drivers/mtd/parsers/ofpart_core.c
@@ -57,20 +57,22 @@ static int parse_fixed_partitions(struct mtd_info *master,
 	if (!mtd_node)
 		return 0;
 
-	ofpart_node = of_get_child_by_name(mtd_node, "partitions");
-	if (!ofpart_node && !master->parent) {
-		/*
-		 * We might get here even when ofpart isn't used at all (e.g.,
-		 * when using another parser), so don't be louder than
-		 * KERN_DEBUG
-		 */
-		pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n",
-			 master->name, mtd_node);
+	if (!master->parent) { /* Master */
+		ofpart_node = of_get_child_by_name(mtd_node, "partitions");
+		if (!ofpart_node) {
+			/*
+			 * We might get here even when ofpart isn't used at all (e.g.,
+			 * when using another parser), so don't be louder than
+			 * KERN_DEBUG
+			 */
+			pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n",
+				master->name, mtd_node);
+			ofpart_node = mtd_node;
+			dedicated = false;
+		}
+	} else { /* Partition */
 		ofpart_node = mtd_node;
-		dedicated = false;
 	}
-	if (!ofpart_node)
-		return 0;
 
 	of_id = of_match_node(parse_ofpart_match_table, ofpart_node);
 	if (dedicated && !of_id) {
-- 
GitLab


From 5311221304fa60e357aada75efdf2f2da8c30a57 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 6 May 2021 19:49:39 +0800
Subject: [PATCH 0288/3804] dt-bindings: phy: cadence-torrent: update reference
 file of docs

In commit fd7abc3c5b87 ("phy: cadence-torrent: Use a common header
file for Cadence SERDES"), phy-cadence-torrent.h was renamed to
phy-cadence.h. Fix it of the Documentation.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Link: https://lore.kernel.org/r/20210506114940.22215-1-wanjiabing@vivo.com
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
index 01dcd14e7b2ad..320a232c7208c 100644
--- a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
@@ -118,7 +118,7 @@ patternProperties:
         description:
           Specifies the Spread Spectrum Clocking mode used. It can be NO_SSC,
           EXTERNAL_SSC or INTERNAL_SSC.
-          Refer include/dt-bindings/phy/phy-cadence-torrent.h for the constants to be used.
+          Refer include/dt-bindings/phy/phy-cadence.h for the constants to be used.
         $ref: /schemas/types.yaml#/definitions/uint32
         enum: [0, 1, 2]
         default: 0
-- 
GitLab


From 0c8bd174f0fc131bc9dfab35cd8784f59045da87 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 May 2021 09:23:09 +0200
Subject: [PATCH 0289/3804] ACPI: scan: Fix a memory leak in an error handling
 path

If 'acpi_device_set_name()' fails, we must free
'acpi_device_bus_id->bus_id' or there is a (potential) memory leak.

Fixes: eb50aaf960e3 ("ACPI: scan: Use unique number for instance_no")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index a22778e880c22..651a431e2bbf1 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -700,6 +700,7 @@ int acpi_device_add(struct acpi_device *device,
 
 		result = acpi_device_set_name(device, acpi_device_bus_id);
 		if (result) {
+			kfree_const(acpi_device_bus_id->bus_id);
 			kfree(acpi_device_bus_id);
 			goto err_unlock;
 		}
-- 
GitLab


From a568814a55a0e82bbc7c7b51333d0c38e8fb5520 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 9 May 2021 14:39:21 +0300
Subject: [PATCH 0290/3804] RDMA/siw: Properly check send and receive CQ
 pointers

The check for the NULL of pointer received from container_of() is
incorrect by definition as it points to some offset from NULL.

Change such check with proper NULL check of SIW QP attributes.

Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")
Link: https://lore.kernel.org/r/a7535a82925f6f4c1f062abaa294f3ae6e54bdd2.1620560310.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Bernard Metzler <bmt@zurich.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/siw/siw_verbs.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index d2313efb26db8..917c8a919f387 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -300,7 +300,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	struct siw_ucontext *uctx =
 		rdma_udata_to_drv_context(udata, struct siw_ucontext,
 					  base_ucontext);
-	struct siw_cq *scq = NULL, *rcq = NULL;
 	unsigned long flags;
 	int num_sqe, num_rqe, rv = 0;
 	size_t length;
@@ -343,10 +342,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 		rv = -EINVAL;
 		goto err_out;
 	}
-	scq = to_siw_cq(attrs->send_cq);
-	rcq = to_siw_cq(attrs->recv_cq);
 
-	if (!scq || (!rcq && !attrs->srq)) {
+	if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) {
 		siw_dbg(base_dev, "send CQ or receive CQ invalid\n");
 		rv = -EINVAL;
 		goto err_out;
@@ -401,8 +398,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 		}
 	}
 	qp->pd = pd;
-	qp->scq = scq;
-	qp->rcq = rcq;
+	qp->scq = to_siw_cq(attrs->send_cq);
+	qp->rcq = to_siw_cq(attrs->recv_cq);
 
 	if (attrs->srq) {
 		/*
-- 
GitLab


From a3d83276d98886879b5bf7b30b7c29882754e4df Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 9 May 2021 14:41:38 +0300
Subject: [PATCH 0291/3804] RDMA/siw: Release xarray entry

The xarray entry is allocated in siw_qp_add(), but release was
missed in case zero-sized SQ was discovered.

Fixes: 661f385961f0 ("RDMA/siw: Fix handling of zero-sized Read and Receive Queues.")
Link: https://lore.kernel.org/r/f070b59d5a1114d5a4e830346755c2b3f141cde5.1620560472.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Bernard Metzler <bmt@zurich.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/siw/siw_verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 917c8a919f387..3f175f220a229 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -375,7 +375,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd,
 	else {
 		/* Zero sized SQ is not supported */
 		rv = -EINVAL;
-		goto err_out;
+		goto err_out_xa;
 	}
 	if (num_rqe)
 		num_rqe = roundup_pow_of_two(num_rqe);
-- 
GitLab


From 54d87913f147a983589923c7f651f97de9af5be1 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 10 May 2021 17:46:00 +0300
Subject: [PATCH 0292/3804] RDMA/core: Prevent divide-by-zero error triggered
 by the user

The user_entry_size is supplied by the user and later used as a
denominator to calculate number of entries. The zero supplied by the user
will trigger the following divide-by-zero error:

 divide error: 0000 [#1] SMP KASAN PTI
 CPU: 4 PID: 497 Comm: c_repro Not tainted 5.13.0-rc1+ #281
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:ib_uverbs_handler_UVERBS_METHOD_QUERY_GID_TABLE+0x1b1/0x510
 Code: 87 59 03 00 00 e8 9f ab 1e ff 48 8d bd a8 00 00 00 e8 d3 70 41 ff 44 0f b7 b5 a8 00 00 00 e8 86 ab 1e ff 31 d2 4c 89 f0 31 ff <49> f7 f5 48 89 d6 48 89 54 24 10 48 89 04 24 e8 1b ad 1e ff 48 8b
 RSP: 0018:ffff88810416f828 EFLAGS: 00010246
 RAX: 0000000000000008 RBX: 1ffff1102082df09 RCX: ffffffff82183f3d
 RDX: 0000000000000000 RSI: ffff888105f2da00 RDI: 0000000000000000
 RBP: ffff88810416fa98 R08: 0000000000000001 R09: ffffed102082df5f
 R10: ffff88810416faf7 R11: ffffed102082df5e R12: 0000000000000000
 R13: 0000000000000000 R14: 0000000000000008 R15: ffff88810416faf0
 FS:  00007f5715efa740(0000) GS:ffff88811a700000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000020000840 CR3: 000000010c2e0001 CR4: 0000000000370ea0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  ? ib_uverbs_handler_UVERBS_METHOD_INFO_HANDLES+0x4b0/0x4b0
  ib_uverbs_cmd_verbs+0x1546/0x1940
  ib_uverbs_ioctl+0x186/0x240
  __x64_sys_ioctl+0x38a/0x1220
  do_syscall_64+0x3f/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 9f85cbe50aa0 ("RDMA/uverbs: Expose the new GID query API to user space")
Link: https://lore.kernel.org/r/b971cc70a8b240a8b5eda33c99fa0558a0071be2.1620657876.git.leonro@nvidia.com
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/uverbs_std_types_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index 9ec6971056fa8..a03021d94e110 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -331,6 +331,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)(
 	if (ret)
 		return ret;
 
+	if (!user_entry_size)
+		return -EINVAL;
+
 	max_entries = uverbs_attr_ptr_get_array_size(
 		attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES,
 		user_entry_size);
-- 
GitLab


From c745253e2a691a40c66790defe85c104a887e14a Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 5 May 2021 14:09:15 +0300
Subject: [PATCH 0293/3804] PM: runtime: Fix unpaired parent child_count for
 force_resume

As pm_runtime_need_not_resume() relies also on usage_count, it can return
a different value in pm_runtime_force_suspend() compared to when called in
pm_runtime_force_resume(). Different return values can happen if anything
calls PM runtime functions in between, and causes the parent child_count
to increase on every resume.

So far I've seen the issue only for omapdrm that does complicated things
with PM runtime calls during system suspend for legacy reasons:

omap_atomic_commit_tail() for omapdrm.0
 dispc_runtime_get()
  wakes up 58000000.dss as it's the dispc parent
   dispc_runtime_resume()
    rpm_resume() increases parent child_count
 dispc_runtime_put() won't idle, PM runtime suspend blocked
pm_runtime_force_suspend() for 58000000.dss, !pm_runtime_need_not_resume()
 __update_runtime_status()
system suspended
pm_runtime_force_resume() for 58000000.dss, pm_runtime_need_not_resume()
 pm_runtime_enable() only called because of pm_runtime_need_not_resume()
omap_atomic_commit_tail() for omapdrm.0
 dispc_runtime_get()
  wakes up 58000000.dss as it's the dispc parent
   dispc_runtime_resume()
    rpm_resume() increases parent child_count
 dispc_runtime_put() won't idle, PM runtime suspend blocked
...
rpm_suspend for 58000000.dss but parent child_count is now unbalanced

Let's fix the issue by adding a flag for needs_force_resume and use it in
pm_runtime_force_resume() instead of pm_runtime_need_not_resume().

Additionally omapdrm system suspend could be simplified later on to avoid
lots of unnecessary PM runtime calls and the complexity it adds. The
driver can just use internal functions that are shared between the PM
runtime and system suspend related functions.

Fixes: 4918e1f87c5f ("PM / runtime: Rework pm_runtime_force_suspend/resume()")
Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Cc: 4.16+ <stable@vger.kernel.org> # 4.16+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 10 +++++++---
 include/linux/pm.h           |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 1fc1a992f90ca..b570848d23e0e 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1637,6 +1637,7 @@ void pm_runtime_init(struct device *dev)
 	dev->power.request_pending = false;
 	dev->power.request = RPM_REQ_NONE;
 	dev->power.deferred_resume = false;
+	dev->power.needs_force_resume = 0;
 	INIT_WORK(&dev->power.work, pm_runtime_work);
 
 	dev->power.timer_expires = 0;
@@ -1804,10 +1805,12 @@ int pm_runtime_force_suspend(struct device *dev)
 	 * its parent, but set its status to RPM_SUSPENDED anyway in case this
 	 * function will be called again for it in the meantime.
 	 */
-	if (pm_runtime_need_not_resume(dev))
+	if (pm_runtime_need_not_resume(dev)) {
 		pm_runtime_set_suspended(dev);
-	else
+	} else {
 		__update_runtime_status(dev, RPM_SUSPENDED);
+		dev->power.needs_force_resume = 1;
+	}
 
 	return 0;
 
@@ -1834,7 +1837,7 @@ int pm_runtime_force_resume(struct device *dev)
 	int (*callback)(struct device *);
 	int ret = 0;
 
-	if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev))
+	if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume)
 		goto out;
 
 	/*
@@ -1853,6 +1856,7 @@ int pm_runtime_force_resume(struct device *dev)
 
 	pm_runtime_mark_last_busy(dev);
 out:
+	dev->power.needs_force_resume = 0;
 	pm_runtime_enable(dev);
 	return ret;
 }
diff --git a/include/linux/pm.h b/include/linux/pm.h
index c9657408fee1a..1d8209c09686c 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -601,6 +601,7 @@ struct dev_pm_info {
 	unsigned int		idle_notification:1;
 	unsigned int		request_pending:1;
 	unsigned int		deferred_resume:1;
+	unsigned int		needs_force_resume:1;
 	unsigned int		runtime_auto:1;
 	bool			ignore_children:1;
 	unsigned int		no_callbacks:1;
-- 
GitLab


From 37a8024d265564eba680575df6421f19db21dfce Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Fri, 7 May 2021 11:59:05 -0700
Subject: [PATCH 0294/3804] arm64: mte: initialize RGSR_EL1.SEED in __cpu_setup

A valid implementation choice for the ChooseRandomNonExcludedTag()
pseudocode function used by IRG is to behave in the same way as with
GCR_EL1.RRND=0. This would mean that RGSR_EL1.SEED is used as an LFSR
which must have a non-zero value in order for IRG to properly produce
pseudorandom numbers. However, RGSR_EL1 is reset to an UNKNOWN value
on soft reset and thus may reset to 0. Therefore we must initialize
RGSR_EL1.SEED to a non-zero value in order to ensure that IRG behaves
as expected.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Fixes: 3b714d24ef17 ("arm64: mte: CPU feature detection and initial sysreg configuration")
Cc: <stable@vger.kernel.org> # 5.10
Link: https://linux-review.googlesource.com/id/I2b089b6c7d6f17ee37e2f0db7df5ad5bcc04526c
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210507185905.1745402-1-pcc@google.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/proc.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 0a48191534ff6..97d7bcd8d4f26 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -447,6 +447,18 @@ SYM_FUNC_START(__cpu_setup)
 	mov	x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
 	msr_s	SYS_GCR_EL1, x10
 
+	/*
+	 * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
+	 * RGSR_EL1.SEED must be non-zero for IRG to produce
+	 * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
+	 * must initialize it.
+	 */
+	mrs	x10, CNTVCT_EL0
+	ands	x10, x10, #SYS_RGSR_EL1_SEED_MASK
+	csinc	x10, x10, xzr, ne
+	lsl	x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
+	msr_s	SYS_RGSR_EL1, x10
+
 	/* clear any pending tag check faults in TFSR*_EL1 */
 	msr_s	SYS_TFSR_EL1, xzr
 	msr_s	SYS_TFSRE0_EL1, xzr
-- 
GitLab


From f79f7a2d96769d2a3e663a3e673066be77c30cc3 Mon Sep 17 00:00:00 2001
From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Date: Mon, 22 Mar 2021 17:58:19 +0530
Subject: [PATCH 0295/3804] arc: Fix typos/spellos

s/commiting/committing/
s/defintion/definition/
s/gaurantees/guarantees/
s/interrpted/interrupted/
s/interrutps/interrupts/
s/succeded/succeeded/
s/unconditonally/unconditionally/

Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Makefile              | 2 +-
 arch/arc/include/asm/cmpxchg.h | 4 ++--
 arch/arc/kernel/process.c      | 8 ++++----
 arch/arc/kernel/signal.c       | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 4392c9c189c4d..e47adc97a89bf 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -31,7 +31,7 @@ endif
 
 
 ifdef CONFIG_ARC_CURR_IN_REG
-# For a global register defintion, make sure it gets passed to every file
+# For a global register definition, make sure it gets passed to every file
 # We had a customer reported bug where some code built in kernel was NOT using
 # any kernel headers, and missing the r25 global register
 # Can't do unconditionally because of recursive include issues
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index 9b87e162e539b..dfeffa25499bf 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -116,7 +116,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
  *
  * Technically the lock is also needed for UP (boils down to irq save/restore)
  * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
- * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg()
+ * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
  * Other way around, xchg is one instruction anyways, so can't be interrupted
  * as such
  */
@@ -143,7 +143,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 /*
  * "atomic" variant of xchg()
  * REQ: It needs to follow the same serialization rules as other atomic_xxx()
- * Since xchg() doesn't always do that, it would seem that following defintion
+ * Since xchg() doesn't always do that, it would seem that following definition
  * is incorrect. But here's the rationale:
  *   SMP : Even xchg() takes the atomic_ops_lock, so OK.
  *   LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index d838d0d576964..3793876f42d9b 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -50,14 +50,14 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
 	int ret;
 
 	/*
-	 * This is only for old cores lacking LLOCK/SCOND, which by defintion
+	 * This is only for old cores lacking LLOCK/SCOND, which by definition
 	 * can't possibly be SMP. Thus doesn't need to be SMP safe.
 	 * And this also helps reduce the overhead for serializing in
 	 * the UP case
 	 */
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
 
-	/* Z indicates to userspace if operation succeded */
+	/* Z indicates to userspace if operation succeeded */
 	regs->status32 &= ~STATUS_Z_MASK;
 
 	ret = access_ok(uaddr, sizeof(*uaddr));
@@ -107,7 +107,7 @@ fail:
 
 void arch_cpu_idle(void)
 {
-	/* Re-enable interrupts <= default irq priority before commiting SLEEP */
+	/* Re-enable interrupts <= default irq priority before committing SLEEP */
 	const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO;
 
 	__asm__ __volatile__(
@@ -120,7 +120,7 @@ void arch_cpu_idle(void)
 
 void arch_cpu_idle(void)
 {
-	/* sleep, but enable both set E1/E2 (levels of interrutps) before committing */
+	/* sleep, but enable both set E1/E2 (levels of interrupts) before committing */
 	__asm__ __volatile__("sleep 0x3	\n");
 }
 
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index fdbe06c98895e..b3ccb9e5ffe42 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -259,7 +259,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 		regs->r2 = (unsigned long)&sf->uc;
 
 		/*
-		 * small optim to avoid unconditonally calling do_sigaltstack
+		 * small optim to avoid unconditionally calling do_sigaltstack
 		 * in sigreturn path, now that we only have rt_sigreturn
 		 */
 		magic = MAGIC_SIGALTSTK;
@@ -391,7 +391,7 @@ void do_signal(struct pt_regs *regs)
 void do_notify_resume(struct pt_regs *regs)
 {
 	/*
-	 * ASM glue gaurantees that this is only called when returning to
+	 * ASM glue guarantees that this is only called when returning to
 	 * user mode
 	 */
 	if (test_thread_flag(TIF_NOTIFY_RESUME))
-- 
GitLab


From 8e97bf39fa0361af3e64739b3766992b9dafa11d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 21 Apr 2021 22:16:53 -0700
Subject: [PATCH 0296/3804] ARC: kgdb: add 'fallthrough' to prevent a warning

Use the 'fallthrough' macro to document that this switch case
does indeed fall through to the next case.

../arch/arc/kernel/kgdb.c: In function 'kgdb_arch_handle_exception':
../arch/arc/kernel/kgdb.c:141:6: warning: this statement may fall through [-Wimplicit-fallthrough=]
  141 |   if (kgdb_hex2long(&ptr, &addr))
      |      ^
../arch/arc/kernel/kgdb.c:144:2: note: here
  144 |  case 'D':
      |  ^~~~

Cc: linux-snps-arc@lists.infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/kgdb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c
index ecfbc42d3a40f..345a0000554cb 100644
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -140,6 +140,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
 		ptr = &remcomInBuffer[1];
 		if (kgdb_hex2long(&ptr, &addr))
 			regs->ret = addr;
+		fallthrough;
 
 	case 'D':
 	case 'k':
-- 
GitLab


From 3433adc8bd09fc9f29b8baddf33b4ecd1ecd2cdc Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 23 Apr 2021 12:16:25 -0700
Subject: [PATCH 0297/3804] ARC: entry: fix off-by-one error in syscall number
 validation

We have NR_syscall syscalls from [0 .. NR_syscall-1].
However the check for invalid syscall number is "> NR_syscall" as
opposed to >=. This off-by-one error erronesously allows "NR_syscall"
to be treated as valid syscall causeing out-of-bounds access into
syscall-call table ensuing a crash (holes within syscall table have a
invalid-entry handler but this is beyond the array implementing the
table).

This problem showed up on v5.6 kernel when testing glibc 2.33 (v5.10
kernel capable, includng faccessat2 syscall 439). The v5.6 kernel has
NR_syscalls=439 (0 to 438). Due to the bug, 439 passed by glibc was
not handled as -ENOSYS but processed leading to a crash.

Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/48
Reported-by: Shahab Vahedi <shahab@synopsys.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/entry.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1743506081da6..2cb8dfe866b66 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -177,7 +177,7 @@ tracesys:
 
 	; Do the Sys Call as we normally would.
 	; Validate the Sys Call number
-	cmp     r8,  NR_syscalls
+	cmp     r8,  NR_syscalls - 1
 	mov.hi  r0, -ENOSYS
 	bhi     tracesys_exit
 
@@ -255,7 +255,7 @@ ENTRY(EV_Trap)
 	;============ Normal syscall case
 
 	; syscall num shd not exceed the total system calls avail
-	cmp     r8,  NR_syscalls
+	cmp     r8,  NR_syscalls - 1
 	mov.hi  r0, -ENOSYS
 	bhi     .Lret_from_system_call
 
-- 
GitLab


From c5f756d8c6265ebb1736a7787231f010a3b782e5 Mon Sep 17 00:00:00 2001
From: Vladimir Isaev <isaev@synopsys.com>
Date: Tue, 27 Apr 2021 15:12:37 +0300
Subject: [PATCH 0298/3804] ARC: mm: PAE: use 40-bit physical page mask

32-bit PAGE_MASK can not be used as a mask for physical addresses
when PAE is enabled. PAGE_MASK_PHYS must be used for physical
addresses instead of PAGE_MASK.

Without this, init gets SIGSEGV if pte_modify was called:

| potentially unexpected fatal signal 11.
| Path: /bin/busybox
| CPU: 0 PID: 1 Comm: init Not tainted 5.12.0-rc5-00003-g1e43c377a79f-dirty
| Insn could not be fetched
|     @No matching VMA found
|  ECR: 0x00040000 EFA: 0x00000000 ERET: 0x00000000
| STAT: 0x80080082 [IE U     ]   BTA: 0x00000000
|  SP: 0x5f9ffe44  FP: 0x00000000 BLK: 0xaf3d4
| LPS: 0x000d093e LPE: 0x000d0950 LPC: 0x00000000
| r00: 0x00000002 r01: 0x5f9fff14 r02: 0x5f9fff20
| ...
| Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

Signed-off-by: Vladimir Isaev <isaev@synopsys.com>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: stable@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/page.h      | 12 ++++++++++++
 arch/arc/include/asm/pgtable.h   | 12 +++---------
 arch/arc/include/uapi/asm/page.h |  1 -
 arch/arc/mm/ioremap.c            |  5 +++--
 arch/arc/mm/tlb.c                |  2 +-
 5 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h
index ad9b7fe4dba36..4a9d33372fe2b 100644
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -7,6 +7,18 @@
 
 #include <uapi/asm/page.h>
 
+#ifdef CONFIG_ARC_HAS_PAE40
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	40
+#define PAGE_MASK_PHYS			(0xff00000000ull | PAGE_MASK)
+
+#else /* CONFIG_ARC_HAS_PAE40 */
+
+#define MAX_POSSIBLE_PHYSMEM_BITS	32
+#define PAGE_MASK_PHYS			PAGE_MASK
+
+#endif /* CONFIG_ARC_HAS_PAE40 */
+
 #ifndef __ASSEMBLY__
 
 #define clear_page(paddr)		memset((paddr), 0, PAGE_SIZE)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 163641726a2b9..5878846f00cfe 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -107,8 +107,8 @@
 #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
 
 /* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
-
+#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+							   _PAGE_SPECIAL)
 /* More Abbrevaited helpers */
 #define PAGE_U_NONE     __pgprot(___DEF)
 #define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
@@ -132,13 +132,7 @@
 #define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
 #define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
 
-#ifdef CONFIG_ARC_HAS_PAE40
-#define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
-#define MAX_POSSIBLE_PHYSMEM_BITS 40
-#else
-#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
-#define MAX_POSSIBLE_PHYSMEM_BITS 32
-#endif
+#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
 
 /**************************************************************************
  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h
index 2a97e2718a219..2a4ad619abfba 100644
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -33,5 +33,4 @@
 
 #define PAGE_MASK	(~(PAGE_SIZE-1))
 
-
 #endif /* _UAPI__ASM_ARC_PAGE_H */
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index fac4adc902044..95c649fbc95af 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap);
 void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
 			   unsigned long flags)
 {
+	unsigned int off;
 	unsigned long vaddr;
 	struct vm_struct *area;
-	phys_addr_t off, end;
+	phys_addr_t end;
 	pgprot_t prot = __pgprot(flags);
 
 	/* Don't allow wraparound, zero size */
@@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
 
 	/* Mappings have to be page-aligned */
 	off = paddr & ~PAGE_MASK;
-	paddr &= PAGE_MASK;
+	paddr &= PAGE_MASK_PHYS;
 	size = PAGE_ALIGN(end + 1) - paddr;
 
 	/*
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index 9bb3c24f36770..9c7c682472896 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
 		      pte_t *ptep)
 {
 	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
-	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
+	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
 	struct page *page = pfn_to_page(pte_pfn(*ptep));
 
 	create_tlb(vma, vaddr, ptep);
-- 
GitLab


From 1d5e4640e5df15252398c1b621f6bd432f2d7f17 Mon Sep 17 00:00:00 2001
From: Vladimir Isaev <isaev@synopsys.com>
Date: Tue, 27 Apr 2021 15:13:54 +0300
Subject: [PATCH 0299/3804] ARC: mm: Use max_high_pfn as a HIGHMEM zone border

Commit 4af22ded0ecf ("arc: fix memory initialization for systems
with two memory banks") fixed highmem, but for the PAE case it causes
bug messages:

| BUG: Bad page state in process swapper  pfn:80000
| page:(ptrval) refcount:0 mapcount:1 mapping:00000000 index:0x0 pfn:0x80000 flags: 0x0()
| raw: 00000000 00000100 00000122 00000000 00000000 00000000 00000000 00000000
| raw: 00000000
| page dumped because: nonzero mapcount
| Modules linked in:
| CPU: 0 PID: 0 Comm: swapper Not tainted 5.12.0-rc5-00003-g1e43c377a79f #1

This is because the fix expects highmem to be always less than
lowmem and uses min_low_pfn as an upper zone border for highmem.

max_high_pfn should be ok for both highmem and highmem+PAE cases.

Fixes: 4af22ded0ecf ("arc: fix memory initialization for systems with two memory banks")
Signed-off-by: Vladimir Isaev <isaev@synopsys.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: stable@vger.kernel.org  #5.8 onwards
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/init.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 33832e36bdb7d..e2ed355438c96 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -157,7 +157,16 @@ void __init setup_arch_memory(void)
 	min_high_pfn = PFN_DOWN(high_mem_start);
 	max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
 
-	max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn;
+	/*
+	 * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE.
+	 * For HIGHMEM without PAE max_high_pfn should be less than
+	 * min_low_pfn to guarantee that these two regions don't overlap.
+	 * For PAE case highmem is greater than lowmem, so it is natural
+	 * to use max_high_pfn.
+	 *
+	 * In both cases, holes should be handled by pfn_valid().
+	 */
+	max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
 
 	high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
 
-- 
GitLab


From bf9e262fcfa6350269f00a95658f701f2595db13 Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Sat, 8 May 2021 11:07:33 +0800
Subject: [PATCH 0300/3804] docs/zh_CN: Remove obsolete translation file

This translation file was replaced by
   Documentation/translations/zh_CN/admin-guide/security-bugs.rst
which was created in commit 2d153571003b ("docs/zh_CN: Add
zh_CN/admin-guide/security-bugs.rst").
This is a translation left over from history. Remove it.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Acked-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/20210508030741.82655-1-wanjiabing@vivo.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/SecurityBugs | 50 -------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 Documentation/translations/zh_CN/SecurityBugs

diff --git a/Documentation/translations/zh_CN/SecurityBugs b/Documentation/translations/zh_CN/SecurityBugs
deleted file mode 100644
index 2d0fffd122cee..0000000000000
--- a/Documentation/translations/zh_CN/SecurityBugs
+++ /dev/null
@@ -1,50 +0,0 @@
-Chinese translated version of Documentation/admin-guide/security-bugs.rst
-
-If you have any comment or update to the content, please contact the
-original document maintainer directly.  However, if you have a problem
-communicating in English you can also ask the Chinese maintainer for
-help.  Contact the Chinese maintainer if this translation is outdated
-or if there is a problem with the translation.
-
-Chinese maintainer: Harry Wei <harryxiyou@gmail.com>
----------------------------------------------------------------------
-Documentation/admin-guide/security-bugs.rst 的中文翻译
-
-如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
-交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
-译存在问题，请联系中文版维护者。
-
-中文版维护者： 贾威威 Harry Wei <harryxiyou@gmail.com>
-中文版翻译者： 贾威威 Harry Wei <harryxiyou@gmail.com>
-中文版校译者： 贾威威 Harry Wei <harryxiyou@gmail.com>
-
-
-以下为正文
----------------------------------------------------------------------
-Linux内核开发者认为安全非常重要。因此，我们想要知道当一个有关于
-安全的漏洞被发现的时候，并且它可能会被尽快的修复或者公开。请把这个安全
-漏洞报告给Linux内核安全团队。
-
-1) 联系
-
-linux内核安全团队可以通过email<security@kernel.org>来联系。这是
-一组独立的安全工作人员，可以帮助改善漏洞报告并且公布和取消一个修复。安
-全团队有可能会从部分的维护者那里引进额外的帮助来了解并且修复安全漏洞。
-当遇到任何漏洞，所能提供的信息越多就越能诊断和修复。如果你不清楚什么
-是有帮助的信息，那就请重温一下admin-guide/reporting-bugs.rst文件中的概述过程。任
-何攻击性的代码都是非常有用的，未经报告者的同意不会被取消，除非它已经
-被公布于众。
-
-2) 公开
-
-Linux内核安全团队的宗旨就是和漏洞提交者一起处理漏洞的解决方案直
-到公开。我们喜欢尽快地完全公开漏洞。当一个漏洞或者修复还没有被完全地理
-解，解决方案没有通过测试或者供应商协调，可以合理地延迟公开。然而，我们
-期望这些延迟尽可能的短些，是可数的几天，而不是几个星期或者几个月。公开
-日期是通过安全团队和漏洞提供者以及供应商洽谈后的结果。公开时间表是从很
-短（特殊的，它已经被公众所知道）到几个星期。作为一个基本的默认政策，我
-们所期望通知公众的日期是7天的安排。
-
-3) 保密协议
-
-Linux内核安全团队不是一个正式的团体，因此不能加入任何的保密协议。
-- 
GitLab


From 9e255e2b9afe948fb795cbaa854acc3904d4212c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 6 May 2021 16:19:07 -0700
Subject: [PATCH 0301/3804] Documentation: drop optional BOMs

A few of the Documentation .rst files begin with a Unicode
byte order mark (BOM). The BOM may signify endianess for
16-bit or 32-bit encodings or indicate that the text stream
is indeed Unicode. We don't need it for either of those uses.
It may also interfere with (confuse) some software.

Since we don't need it and its use is optional, just delete
the uses of it in Documentation/.

https://en.wikipedia.org/wiki/Byte_order_mark

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210506231907.14359-1-rdunlap@infradead.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/block/data-integrity.rst                 | 2 +-
 Documentation/process/kernel-enforcement-statement.rst | 2 +-
 Documentation/security/tpm/xen-tpmfront.rst            | 2 +-
 Documentation/timers/no_hz.rst                         | 2 +-
 Documentation/usb/mtouchusb.rst                        | 2 +-
 Documentation/usb/usb-serial.rst                       | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst
index 4f2452a95c434..07a97aa266685 100644
--- a/Documentation/block/data-integrity.rst
+++ b/Documentation/block/data-integrity.rst
@@ -1,4 +1,4 @@
-﻿==============
+==============
 Data Integrity
 ==============
 
diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst
index e5a1be4760476..dc2d813b2e793 100644
--- a/Documentation/process/kernel-enforcement-statement.rst
+++ b/Documentation/process/kernel-enforcement-statement.rst
@@ -1,4 +1,4 @@
-﻿.. _process_statement_kernel:
+.. _process_statement_kernel:
 
 Linux Kernel Enforcement Statement
 ----------------------------------
diff --git a/Documentation/security/tpm/xen-tpmfront.rst b/Documentation/security/tpm/xen-tpmfront.rst
index 00d5b1db227d4..31c67522f2ade 100644
--- a/Documentation/security/tpm/xen-tpmfront.rst
+++ b/Documentation/security/tpm/xen-tpmfront.rst
@@ -1,4 +1,4 @@
-﻿=============================
+=============================
 Virtual TPM interface for Xen
 =============================
 
diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst
index c4c70e1aada3c..6cadad7c3aad4 100644
--- a/Documentation/timers/no_hz.rst
+++ b/Documentation/timers/no_hz.rst
@@ -1,4 +1,4 @@
-﻿======================================
+======================================
 NO_HZ: Reducing Scheduling-Clock Ticks
 ======================================
 
diff --git a/Documentation/usb/mtouchusb.rst b/Documentation/usb/mtouchusb.rst
index d1111b74bf759..5ae1f74fe74b6 100644
--- a/Documentation/usb/mtouchusb.rst
+++ b/Documentation/usb/mtouchusb.rst
@@ -1,4 +1,4 @@
-﻿================
+================
 mtouchusb driver
 ================
 
diff --git a/Documentation/usb/usb-serial.rst b/Documentation/usb/usb-serial.rst
index 8fa7dbd3da9a4..69586aeb60bb4 100644
--- a/Documentation/usb/usb-serial.rst
+++ b/Documentation/usb/usb-serial.rst
@@ -1,4 +1,4 @@
-﻿==========
+==========
 USB serial
 ==========
 
-- 
GitLab


From 8ab78863e9eff11910e1ac8bcf478060c29b379e Mon Sep 17 00:00:00 2001
From: Jeimon <jjjinmeng.zhou@gmail.com>
Date: Sat, 8 May 2021 11:52:30 +0800
Subject: [PATCH 0302/3804] net/nfc/rawsock.c: fix a permission check bug

The function rawsock_create() calls a privileged function sk_alloc(), which requires a ns-aware check to check net->user_ns, i.e., ns_capable(). However, the original code checks the init_user_ns using capable(). So we replace the capable() with ns_capable().

Signed-off-by: Jeimon <jjjinmeng.zhou@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/nfc/rawsock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 9c7eb8455ba8e..5f1d438a0a23f 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -329,7 +329,7 @@ static int rawsock_create(struct net *net, struct socket *sock,
 		return -ESOCKTNOSUPPORT;
 
 	if (sock->type == SOCK_RAW) {
-		if (!capable(CAP_NET_RAW))
+		if (!ns_capable(net->user_ns, CAP_NET_RAW))
 			return -EPERM;
 		sock->ops = &rawsock_raw_ops;
 	} else {
-- 
GitLab


From ddb6e00f8413e885ff826e32521cff7924661de0 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 8 May 2021 07:38:22 +0200
Subject: [PATCH 0303/3804] net: netcp: Fix an error message

'ret' is known to be 0 here.
The expected error code is stored in 'tx_pipe->dma_queue', so use it
instead.

While at it, switch from %d to %pe which is more user friendly.

Fixes: 84640e27f230 ("net: netcp: Add Keystone NetCP core ethernet driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/netcp_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 9030e619e5436..97942b0e38975 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1350,8 +1350,8 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe)
 	tx_pipe->dma_queue = knav_queue_open(name, tx_pipe->dma_queue_id,
 					     KNAV_QUEUE_SHARED);
 	if (IS_ERR(tx_pipe->dma_queue)) {
-		dev_err(dev, "Could not open DMA queue for channel \"%s\": %d\n",
-			name, ret);
+		dev_err(dev, "Could not open DMA queue for channel \"%s\": %pe\n",
+			name, tx_pipe->dma_queue);
 		ret = PTR_ERR(tx_pipe->dma_queue);
 		goto err;
 	}
-- 
GitLab


From 0d3ae948741ac6d80e39ab27b45297367ee477de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 14 Apr 2021 10:05:17 -0700
Subject: [PATCH 0304/3804] sh: Remove unused variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes this annoying warning:

arch/sh/kernel/traps.c: In function ‘nmi_trap_handler’:
arch/sh/kernel/traps.c:183:15: warning: unused variable ‘cpu’ [-Wunused-variable]
  183 |  unsigned int cpu = smp_processor_id();

Fixes: fe3f1d5d7cd3 ("sh: Get rid of nmi_count()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210414170517.1205430-1-eric.dumazet@gmail.com
---
 arch/sh/kernel/traps.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index f5beecdac6938..e76b221570999 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(void) { }
 
 BUILD_TRAP_HANDLER(nmi)
 {
-	unsigned int cpu = smp_processor_id();
 	TRAP_HANDLER_DECL;
 
 	arch_ftrace_nmi_enter();
-- 
GitLab


From a269333fa5c0c8e53c92b5a28a6076a28cde3e83 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 8 May 2021 16:30:35 +0300
Subject: [PATCH 0305/3804] net: dsa: fix a crash if ->get_sset_count() fails

If ds->ops->get_sset_count() fails then it "count" is a negative error
code such as -EOPNOTSUPP.  Because "i" is an unsigned int, the negative
error code is type promoted to a very high value and the loop will
corrupt memory until the system crashes.

Fix this by checking for error codes and changing the type of "i" to
just int.

Fixes: badf3ada60ab ("net: dsa: Provide CPU port statistics to master netdev")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/master.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/dsa/master.c b/net/dsa/master.c
index 052a977914a6d..63adbc21a735a 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -147,8 +147,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
 	struct dsa_switch *ds = cpu_dp->ds;
 	int port = cpu_dp->index;
 	int len = ETH_GSTRING_LEN;
-	int mcount = 0, count;
-	unsigned int i;
+	int mcount = 0, count, i;
 	uint8_t pfx[4];
 	uint8_t *ndata;
 
@@ -178,6 +177,8 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
 		 */
 		ds->ops->get_strings(ds, port, stringset, ndata);
 		count = ds->ops->get_sset_count(ds, port, stringset);
+		if (count < 0)
+			return;
 		for (i = 0; i < count; i++) {
 			memmove(ndata + (i * len + sizeof(pfx)),
 				ndata + i * len, len - sizeof(pfx));
-- 
GitLab


From db825feefc6868896fed5e361787ba3bee2fd906 Mon Sep 17 00:00:00 2001
From: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Date: Sun, 9 May 2021 09:43:18 +0300
Subject: [PATCH 0306/3804] net/mlx4: Fix EEPROM dump support

Fix SFP and QSFP* EEPROM queries by setting i2c_address, offset and page
number correctly. For SFP set the following params:
- I2C address for offsets 0-255 is 0x50. For 256-511 - 0x51.
- Page number is zero.
- Offset is 0-255.

At the same time, QSFP* parameters are different:
- I2C address is always 0x50.
- Page number is not limited to zero.
- Offset is 0-255 for page zero and 128-255 for others.

To set parameters accordingly to cable used, implement function to query
module ID and implement respective helper functions to set parameters
correctly.

Fixes: 135dd9594f12 ("net/mlx4_en: ethtool, Remove unsupported SFP EEPROM high pages query")
Signed-off-by: Vladyslav Tarasiuk <vladyslavt@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx4/en_ethtool.c   |   4 +-
 drivers/net/ethernet/mellanox/mlx4/port.c     | 107 +++++++++++++++++-
 2 files changed, 104 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 1434df66fcf2e..3616b77caa0ad 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -2027,8 +2027,6 @@ static int mlx4_en_set_tunable(struct net_device *dev,
 	return ret;
 }
 
-#define MLX4_EEPROM_PAGE_LEN 256
-
 static int mlx4_en_get_module_info(struct net_device *dev,
 				   struct ethtool_modinfo *modinfo)
 {
@@ -2063,7 +2061,7 @@ static int mlx4_en_get_module_info(struct net_device *dev,
 		break;
 	case MLX4_MODULE_ID_SFP:
 		modinfo->type = ETH_MODULE_SFF_8472;
-		modinfo->eeprom_len = MLX4_EEPROM_PAGE_LEN;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
 		break;
 	default:
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index ba6ac31a339dc..256a06b3c096b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -1973,6 +1973,7 @@ EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);
 #define I2C_ADDR_LOW  0x50
 #define I2C_ADDR_HIGH 0x51
 #define I2C_PAGE_SIZE 256
+#define I2C_HIGH_PAGE_SIZE 128
 
 /* Module Info Data */
 struct mlx4_cable_info {
@@ -2026,6 +2027,88 @@ static inline const char *cable_info_mad_err_str(u16 mad_status)
 	return "Unknown Error";
 }
 
+static int mlx4_get_module_id(struct mlx4_dev *dev, u8 port, u8 *module_id)
+{
+	struct mlx4_cmd_mailbox *inbox, *outbox;
+	struct mlx4_mad_ifc *inmad, *outmad;
+	struct mlx4_cable_info *cable_info;
+	int ret;
+
+	inbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(inbox))
+		return PTR_ERR(inbox);
+
+	outbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(outbox)) {
+		mlx4_free_cmd_mailbox(dev, inbox);
+		return PTR_ERR(outbox);
+	}
+
+	inmad = (struct mlx4_mad_ifc *)(inbox->buf);
+	outmad = (struct mlx4_mad_ifc *)(outbox->buf);
+
+	inmad->method = 0x1; /* Get */
+	inmad->class_version = 0x1;
+	inmad->mgmt_class = 0x1;
+	inmad->base_version = 0x1;
+	inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */
+
+	cable_info = (struct mlx4_cable_info *)inmad->data;
+	cable_info->dev_mem_address = 0;
+	cable_info->page_num = 0;
+	cable_info->i2c_addr = I2C_ADDR_LOW;
+	cable_info->size = cpu_to_be16(1);
+
+	ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
+	if (ret)
+		goto out;
+
+	if (be16_to_cpu(outmad->status)) {
+		/* Mad returned with bad status */
+		ret = be16_to_cpu(outmad->status);
+		mlx4_warn(dev,
+			  "MLX4_CMD_MAD_IFC Get Module ID attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n",
+			  0xFF60, port, I2C_ADDR_LOW, 0, 1, ret,
+			  cable_info_mad_err_str(ret));
+		ret = -ret;
+		goto out;
+	}
+	cable_info = (struct mlx4_cable_info *)outmad->data;
+	*module_id = cable_info->data[0];
+out:
+	mlx4_free_cmd_mailbox(dev, inbox);
+	mlx4_free_cmd_mailbox(dev, outbox);
+	return ret;
+}
+
+static void mlx4_sfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset)
+{
+	*i2c_addr = I2C_ADDR_LOW;
+	*page_num = 0;
+
+	if (*offset < I2C_PAGE_SIZE)
+		return;
+
+	*i2c_addr = I2C_ADDR_HIGH;
+	*offset -= I2C_PAGE_SIZE;
+}
+
+static void mlx4_qsfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset)
+{
+	/* Offsets 0-255 belong to page 0.
+	 * Offsets 256-639 belong to pages 01, 02, 03.
+	 * For example, offset 400 is page 02: 1 + (400 - 256) / 128 = 2
+	 */
+	if (*offset < I2C_PAGE_SIZE)
+		*page_num = 0;
+	else
+		*page_num = 1 + (*offset - I2C_PAGE_SIZE) / I2C_HIGH_PAGE_SIZE;
+	*i2c_addr = I2C_ADDR_LOW;
+	*offset -= *page_num * I2C_HIGH_PAGE_SIZE;
+}
+
 /**
  * mlx4_get_module_info - Read cable module eeprom data
  * @dev: mlx4_dev.
@@ -2045,12 +2128,30 @@ int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
 	struct mlx4_cmd_mailbox *inbox, *outbox;
 	struct mlx4_mad_ifc *inmad, *outmad;
 	struct mlx4_cable_info *cable_info;
-	u16 i2c_addr;
+	u8 module_id, i2c_addr, page_num;
 	int ret;
 
 	if (size > MODULE_INFO_MAX_READ)
 		size = MODULE_INFO_MAX_READ;
 
+	ret = mlx4_get_module_id(dev, port, &module_id);
+	if (ret)
+		return ret;
+
+	switch (module_id) {
+	case MLX4_MODULE_ID_SFP:
+		mlx4_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
+		break;
+	case MLX4_MODULE_ID_QSFP:
+	case MLX4_MODULE_ID_QSFP_PLUS:
+	case MLX4_MODULE_ID_QSFP28:
+		mlx4_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset);
+		break;
+	default:
+		mlx4_err(dev, "Module ID not recognized: %#x\n", module_id);
+		return -EINVAL;
+	}
+
 	inbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(inbox))
 		return PTR_ERR(inbox);
@@ -2076,11 +2177,9 @@ int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
 		 */
 		size -= offset + size - I2C_PAGE_SIZE;
 
-	i2c_addr = I2C_ADDR_LOW;
-
 	cable_info = (struct mlx4_cable_info *)inmad->data;
 	cable_info->dev_mem_address = cpu_to_be16(offset);
-	cable_info->page_num = 0;
+	cable_info->page_num = page_num;
 	cable_info->i2c_addr = i2c_addr;
 	cable_info->size = cpu_to_be16(size);
 
-- 
GitLab


From b94cbc909f1d80378a1f541968309e5c1178c98b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 9 May 2021 22:33:38 +0300
Subject: [PATCH 0307/3804] net: dsa: fix error code getting shifted with 4 in
 dsa_slave_get_sset_count

DSA implements a bunch of 'standardized' ethtool statistics counters,
namely tx_packets, tx_bytes, rx_packets, rx_bytes. So whatever the
hardware driver returns in .get_sset_count(), we need to add 4 to that.

That is ok, except that .get_sset_count() can return a negative error
code, for example:

b53_get_sset_count
-> phy_ethtool_get_sset_count
   -> return -EIO

-EIO is -5, and with 4 added to it, it becomes -1, aka -EPERM. One can
imagine that certain error codes may even become positive, although
based on code inspection I did not see instances of that.

Check the error code first, if it is negative return it as-is.

Based on a similar patch for dsa_master_get_strings from Dan Carpenter:
https://patchwork.kernel.org/project/netdevbpf/patch/YJaSe3RPgn7gKxZv@mwanda/

Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8c0f3c6ab3654..d4756b9201089 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -776,13 +776,15 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
 	struct dsa_switch *ds = dp->ds;
 
 	if (sset == ETH_SS_STATS) {
-		int count;
+		int count = 0;
 
-		count = 4;
-		if (ds->ops->get_sset_count)
-			count += ds->ops->get_sset_count(ds, dp->index, sset);
+		if (ds->ops->get_sset_count) {
+			count = ds->ops->get_sset_count(ds, dp->index, sset);
+			if (count < 0)
+				return count;
+		}
 
-		return count;
+		return count + 4;
 	} else if (sset ==  ETH_SS_TEST) {
 		return net_selftest_get_count();
 	}
-- 
GitLab


From 3058e01d31bbdbe50e02cafece2b22817a6a0eae Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Mon, 10 May 2021 09:57:38 +0700
Subject: [PATCH 0308/3804] tipc: make node link identity publish thread safe

The using of the node address and node link identity are not thread safe,
meaning that two publications may be published the same values, as result
one of them will get failure because of already existing in the name table.
To avoid this we have to use the node address and node link identity values
from inside the node item's write lock protection.

Fixes: 50a3499ab853 ("tipc: simplify signature of tipc_namtbl_publish()")
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 8217905348f48..81af92954c6c2 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -423,18 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n)
 	write_unlock_bh(&n->lock);
 
 	if (flags & TIPC_NOTIFY_NODE_DOWN)
-		tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
+		tipc_publ_notify(net, publ_list, sk.node, n->capabilities);
 
 	if (flags & TIPC_NOTIFY_NODE_UP)
-		tipc_named_node_up(net, n->addr, n->capabilities);
+		tipc_named_node_up(net, sk.node, n->capabilities);
 
 	if (flags & TIPC_NOTIFY_LINK_UP) {
-		tipc_mon_peer_up(net, n->addr, bearer_id);
-		tipc_nametbl_publish(net, &ua, &sk, n->link_id);
+		tipc_mon_peer_up(net, sk.node, bearer_id);
+		tipc_nametbl_publish(net, &ua, &sk, sk.ref);
 	}
 	if (flags & TIPC_NOTIFY_LINK_DOWN) {
-		tipc_mon_peer_down(net, n->addr, bearer_id);
-		tipc_nametbl_withdraw(net, &ua, &sk, n->link_id);
+		tipc_mon_peer_down(net, sk.node, bearer_id);
+		tipc_nametbl_withdraw(net, &ua, &sk, sk.ref);
 	}
 }
 
-- 
GitLab


From 297c4de6f780b63b6d2af75a730720483bf1904a Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 10 May 2021 13:07:08 +0200
Subject: [PATCH 0309/3804] net: dsa: felix: re-enable TAS guard band mode

Commit 316bcffe4479 ("net: dsa: felix: disable always guard band bit for
TAS config") disabled the guard band and broke 802.3Qbv compliance.

There are two issues here:
 (1) Without the guard band the end of the scheduling window could be
     overrun by a frame in transit.
 (2) Frames that don't fit into a configured window will still be sent.

The reason for both issues is that the switch will schedule the _start_
of a frame transmission inside the predefined window without taking the
length of the frame into account. Thus, we'll need the guard band which
will close the gate early, so that a complete frame can still be sent.
Revert the commit and add a note.

For a lengthy discussion see [1].

[1] https://lore.kernel.org/netdev/c7618025da6723418c56a54fe4683bd7@walle.cc/

Fixes: 316bcffe4479 ("net: dsa: felix: disable always guard band bit for TAS config")
Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 2473bebe48e6e..f966a253d1c77 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1227,12 +1227,17 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
 	if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX)
 		return -ERANGE;
 
-	/* Set port num and disable ALWAYS_GUARD_BAND_SCH_Q, which means set
-	 * guard band to be implemented for nonschedule queues to schedule
-	 * queues transition.
+	/* Enable guard band. The switch will schedule frames without taking
+	 * their length into account. Thus we'll always need to enable the
+	 * guard band which reserves the time of a maximum sized frame at the
+	 * end of the time window.
+	 *
+	 * Although the ALWAYS_GUARD_BAND_SCH_Q bit is global for all ports, we
+	 * need to set PORT_NUM, because subsequent writes to PARAM_CFG_REG_n
+	 * operate on the port number.
 	 */
-	ocelot_rmw(ocelot,
-		   QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port),
+	ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port) |
+		   QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q,
 		   QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M |
 		   QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q,
 		   QSYS_TAS_PARAM_CFG_CTRL);
-- 
GitLab


From a00593737f8bac2c9e97b696e7ff84a4446653e8 Mon Sep 17 00:00:00 2001
From: Subbaraman Narayanamurthy <subbaram@codeaurora.org>
Date: Thu, 22 Apr 2021 11:36:10 -0700
Subject: [PATCH 0310/3804] interconnect: qcom: bcm-voter: add a missing
 of_node_put()

Add a missing of_node_put() in of_bcm_voter_get() to avoid the
reference leak.

Signed-off-by: Subbaraman Narayanamurthy <subbaram@codeaurora.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Link: https://lore.kernel.org/r/1619116570-13308-1-git-send-email-subbaram@codeaurora.org
Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support")
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/bcm-voter.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/interconnect/qcom/bcm-voter.c b/drivers/interconnect/qcom/bcm-voter.c
index d1591a28b7438..547f4c2593f41 100644
--- a/drivers/interconnect/qcom/bcm-voter.c
+++ b/drivers/interconnect/qcom/bcm-voter.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
  */
 
 #include <asm/div64.h>
@@ -205,6 +205,7 @@ struct bcm_voter *of_bcm_voter_get(struct device *dev, const char *name)
 	}
 	mutex_unlock(&bcm_voter_lock);
 
+	of_node_put(node);
 	return voter;
 }
 EXPORT_SYMBOL_GPL(of_bcm_voter_get);
-- 
GitLab


From 1fd86e280d8b21762901e43d42d66dbfe8b8e0d3 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 11 May 2021 11:44:33 +0800
Subject: [PATCH 0311/3804] interconnect: qcom: Add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Link: https://lore.kernel.org/r/1620704673-104205-1-git-send-email-zou_wei@huawei.com
Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support")
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/bcm-voter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/interconnect/qcom/bcm-voter.c b/drivers/interconnect/qcom/bcm-voter.c
index 547f4c2593f41..8f385f9c2dd38 100644
--- a/drivers/interconnect/qcom/bcm-voter.c
+++ b/drivers/interconnect/qcom/bcm-voter.c
@@ -363,6 +363,7 @@ static const struct of_device_id bcm_voter_of_match[] = {
 	{ .compatible = "qcom,bcm-voter" },
 	{ }
 };
+MODULE_DEVICE_TABLE(of, bcm_voter_of_match);
 
 static struct platform_driver qcom_icc_bcm_voter_driver = {
 	.probe = qcom_icc_bcm_voter_probe,
-- 
GitLab


From 07adc0225484fc199e3dc15ec889f75f498c4fca Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Mon, 12 Apr 2021 13:49:07 +0800
Subject: [PATCH 0312/3804] usb: cdns3: Fix runtime PM imbalance on error

When cdns3_gadget_start() fails, a pairing PM usage counter
decrement is needed to keep the counter balanced.

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Link: https://lore.kernel.org/r/20210412054908.7975-1-dinghao.liu@zju.edu.cn
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/cdns3/cdns3-gadget.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
index 9b1bd417cec03..a8b7b50abf645 100644
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -3268,8 +3268,10 @@ static int __cdns3_gadget_init(struct cdns *cdns)
 	pm_runtime_get_sync(cdns->dev);
 
 	ret = cdns3_gadget_start(cdns);
-	if (ret)
+	if (ret) {
+		pm_runtime_put_sync(cdns->dev);
 		return ret;
+	}
 
 	/*
 	 * Because interrupt line can be shared with other components in
-- 
GitLab


From 3b414d1b0107fa51ad6063de9752d4b2a8063980 Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell@cadence.com>
Date: Tue, 20 Apr 2021 06:28:13 +0200
Subject: [PATCH 0313/3804] usb: cdnsp: Fix lack of removing request from
 pending list.

Patch fixes lack of removing request from ep->pending_list on failure
of the stop endpoint command. Driver even after failing this command
must remove request from ep->pending_list.
Without this fix driver can stuck in cdnsp_gadget_ep_disable function
in loop:
        while (!list_empty(&pep->pending_list)) {
                preq = next_request(&pep->pending_list);
                cdnsp_ep_dequeue(pep, preq);
        }

Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
Signed-off-by: Pawel Laszczak <pawell@cadence.com>
Link: https://lore.kernel.org/r/20210420042813.34917-1-pawell@gli-login.cadence.com
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/cdns3/cdnsp-gadget.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c
index 56707b6b0f57c..c083985e387b2 100644
--- a/drivers/usb/cdns3/cdnsp-gadget.c
+++ b/drivers/usb/cdns3/cdnsp-gadget.c
@@ -422,17 +422,17 @@ unmap:
 int cdnsp_ep_dequeue(struct cdnsp_ep *pep, struct cdnsp_request *preq)
 {
 	struct cdnsp_device *pdev = pep->pdev;
-	int ret;
+	int ret_stop = 0;
+	int ret_rem;
 
 	trace_cdnsp_request_dequeue(preq);
 
-	if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_RUNNING) {
-		ret = cdnsp_cmd_stop_ep(pdev, pep);
-		if (ret)
-			return ret;
-	}
+	if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_RUNNING)
+		ret_stop = cdnsp_cmd_stop_ep(pdev, pep);
+
+	ret_rem = cdnsp_remove_request(pdev, preq, pep);
 
-	return cdnsp_remove_request(pdev, preq, pep);
+	return ret_rem ? ret_rem : ret_stop;
 }
 
 static void cdnsp_zero_in_ctx(struct cdnsp_device *pdev)
-- 
GitLab


From bb4031b8af804244a7e4349d38f6624f68664bd6 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Mon, 26 Apr 2021 09:56:18 +0300
Subject: [PATCH 0314/3804] clk: Skip clk provider registration when np is NULL

commit 6579c8d97ad7 ("clk: Mark fwnodes when their clock provider is added")
revealed that clk/bcm/clk-raspberrypi.c driver calls
devm_of_clk_add_hw_provider(), with a NULL dev->of_node, which resulted in a
NULL pointer dereference in of_clk_add_hw_provider() when calling
fwnode_dev_initialized().

Returning 0 is reducing the if conditions in driver code and is being
consistent with the CONFIG_OF=n inline stub that returns 0 when CONFIG_OF
is disabled. The downside is that drivers will maybe register clkdev lookups
when they don't need to and waste some memory.

Fixes: 6579c8d97ad7 ("clk: Mark fwnodes when their clock provider is added")
Fixes: 3c9ea42802a1 ("clk: Mark fwnodes when their clock provider is added/removed")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Saravana Kannan <saravanak@google.com>
Reviewed-by: Nicolas Saenz Julienne <nsaenz@kernel.org>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Link: https://lore.kernel.org/r/20210426065618.588144-1-tudor.ambarus@microchip.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/clk.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index e2ec1b7452439..65508eb89ec99 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -4540,6 +4540,9 @@ int of_clk_add_provider(struct device_node *np,
 	struct of_clk_provider *cp;
 	int ret;
 
+	if (!np)
+		return 0;
+
 	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
@@ -4579,6 +4582,9 @@ int of_clk_add_hw_provider(struct device_node *np,
 	struct of_clk_provider *cp;
 	int ret;
 
+	if (!np)
+		return 0;
+
 	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
 	if (!cp)
 		return -ENOMEM;
@@ -4676,6 +4682,9 @@ void of_clk_del_provider(struct device_node *np)
 {
 	struct of_clk_provider *cp;
 
+	if (!np)
+		return;
+
 	mutex_lock(&of_clk_mutex);
 	list_for_each_entry(cp, &of_clk_providers, link) {
 		if (cp->node == np) {
-- 
GitLab


From 049c4e13714ecbca567b4d5f6d563f05d431c80e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 10 May 2021 13:10:44 +0000
Subject: [PATCH 0315/3804] bpf: Fix alu32 const subreg bound tracking on
 bitwise operations

Fix a bug in the verifier's scalar32_min_max_*() functions which leads to
incorrect tracking of 32 bit bounds for the simulation of and/or/xor bitops.
When both the src & dst subreg is a known constant, then the assumption is
that scalar_min_max_*() will take care to update bounds correctly. However,
this is not the case, for example, consider a register R2 which has a tnum
of 0xffffffff00000000, meaning, lower 32 bits are known constant and in this
case of value 0x00000001. R2 is then and'ed with a register R3 which is a
64 bit known constant, here, 0x100000002.

What can be seen in line '10:' is that 32 bit bounds reach an invalid state
where {u,s}32_min_value > {u,s}32_max_value. The reason is scalar32_min_max_*()
delegates 32 bit bounds updates to scalar_min_max_*(), however, that really
only takes place when both the 64 bit src & dst register is a known constant.
Given scalar32_min_max_*() is intended to be designed as closely as possible
to scalar_min_max_*(), update the 32 bit bounds in this situation through
__mark_reg32_known() which will set all {u,s}32_{min,max}_value to the correct
constant, which is 0x00000000 after the fix (given 0x00000001 & 0x00000002 in
32 bit space). This is possible given var32_off already holds the final value
as dst_reg->var_off is updated before calling scalar32_min_max_*().

Before fix, invalid tracking of R2:

  [...]
  9: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=-9223372036854775807 (0x8000000000000001),smax_value=9223372032559808513 (0x7fffffff00000001),umin_value=1,umax_value=0xffffffff00000001,var_off=(0x1; 0xffffffff00000000),s32_min_value=1,s32_max_value=1,u32_min_value=1,u32_max_value=1) R3_w=inv4294967298 R10=fp0
  9: (5f) r2 &= r3
  10: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=0,smax_value=4294967296 (0x100000000),umin_value=0,umax_value=0x100000000,var_off=(0x0; 0x100000000),s32_min_value=1,s32_max_value=0,u32_min_value=1,u32_max_value=0) R3_w=inv4294967298 R10=fp0
  [...]

After fix, correct tracking of R2:

  [...]
  9: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=-9223372036854775807 (0x8000000000000001),smax_value=9223372032559808513 (0x7fffffff00000001),umin_value=1,umax_value=0xffffffff00000001,var_off=(0x1; 0xffffffff00000000),s32_min_value=1,s32_max_value=1,u32_min_value=1,u32_max_value=1) R3_w=inv4294967298 R10=fp0
  9: (5f) r2 &= r3
  10: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=0,smax_value=4294967296 (0x100000000),umin_value=0,umax_value=0x100000000,var_off=(0x0; 0x100000000),s32_min_value=0,s32_max_value=0,u32_min_value=0,u32_max_value=0) R3_w=inv4294967298 R10=fp0
  [...]

Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking")
Fixes: 2921c90d4718 ("bpf: Fix a verifier failure with xor")
Reported-by: Manfred Paul (@_manfp)
Reported-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 757476c91c984..9352a1b7de2dd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7084,11 +7084,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
 	s32 smin_val = src_reg->s32_min_value;
 	u32 umax_val = src_reg->u32_max_value;
 
-	/* Assuming scalar64_min_max_and will be called so its safe
-	 * to skip updating register for known 32-bit case.
-	 */
-	if (src_known && dst_known)
+	if (src_known && dst_known) {
+		__mark_reg32_known(dst_reg, var32_off.value);
 		return;
+	}
 
 	/* We get our minimum from the var_off, since that's inherently
 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
@@ -7108,7 +7107,6 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
 		dst_reg->s32_min_value = dst_reg->u32_min_value;
 		dst_reg->s32_max_value = dst_reg->u32_max_value;
 	}
-
 }
 
 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
@@ -7155,11 +7153,10 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
 	s32 smin_val = src_reg->s32_min_value;
 	u32 umin_val = src_reg->u32_min_value;
 
-	/* Assuming scalar64_min_max_or will be called so it is safe
-	 * to skip updating register for known case.
-	 */
-	if (src_known && dst_known)
+	if (src_known && dst_known) {
+		__mark_reg32_known(dst_reg, var32_off.value);
 		return;
+	}
 
 	/* We get our maximum from the var_off, and our minimum is the
 	 * maximum of the operands' minima
@@ -7224,11 +7221,10 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
 	s32 smin_val = src_reg->s32_min_value;
 
-	/* Assuming scalar64_min_max_xor will be called so it is safe
-	 * to skip updating register for known case.
-	 */
-	if (src_known && dst_known)
+	if (src_known && dst_known) {
+		__mark_reg32_known(dst_reg, var32_off.value);
 		return;
+	}
 
 	/* We get both minimum and maximum from the var32_off. */
 	dst_reg->u32_min_value = var32_off.value;
-- 
GitLab


From 2515dd6ce8e545b0b2eece84920048ef9ed846c4 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Mon, 19 Apr 2021 16:17:41 -0700
Subject: [PATCH 0316/3804] stack: Replace "o" output with "r" input constraint

"o" isn't a common asm() constraint to use; it triggers an assertion in
assert-enabled builds of LLVM that it's not recognized when targeting
aarch64 (though it appears to fall back to "m"). It's fixed in LLVM 13 now,
but there isn't really a good reason to use "o" in particular here. To
avoid causing build issues for those using assert-enabled builds of earlier
LLVM versions, the constraint needs changing.

Instead, if the point is to retain the __builtin_alloca(), make ptr appear
to "escape" via being an input to an empty inline asm block. This is
preferable anyways, since otherwise this looks like a dead store.

While the use of "r" was considered in

  https://lore.kernel.org/lkml/202104011447.2E7F543@keescook/

it was only tested as an output (which looks like a dead store, and wasn't
sufficient).

Use "r" as an input constraint instead, which behaves correctly across
compilers and architectures.

Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Kees Cook <keescook@chromium.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://reviews.llvm.org/D100412
Link: https://bugs.llvm.org/show_bug.cgi?id=49956
Link: https://lore.kernel.org/r/20210419231741.4084415-1-keescook@chromium.org
---
 include/linux/randomize_kstack.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
index fd80fab663a96..bebc911161b6f 100644
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -38,7 +38,7 @@ void *__builtin_alloca(size_t size);
 		u32 offset = raw_cpu_read(kstack_offset);		\
 		u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset));	\
 		/* Keep allocation even after "ptr" loses scope. */	\
-		asm volatile("" : "=o"(*ptr) :: "memory");		\
+		asm volatile("" :: "r"(ptr) : "memory");		\
 	}								\
 } while (0)
 
-- 
GitLab


From cc2520909c2df9ad51d642bf09b3da26a9f56393 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@gmail.com>
Date: Wed, 5 May 2021 19:33:35 +0200
Subject: [PATCH 0317/3804] MAINTAINERS: Update my e-mail

Old e-mail address doesn't work anymore, update it to new one.

Link: https://lore.kernel.org/r/20210505173335.1483575-1-jernej.skrabec@gmail.com
Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
---
 .mailmap    |  1 +
 MAINTAINERS | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.mailmap b/.mailmap
index 2d93232ed72b8..ca235ef4755f3 100644
--- a/.mailmap
+++ b/.mailmap
@@ -159,6 +159,7 @@ Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com>
 Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
 Jens Axboe <axboe@suse.de>
 Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
+Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
 Jiri Slaby <jirislaby@kernel.org> <jirislaby@gmail.com>
 Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com>
 Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 7fdc513392f45..2e9063d018d0e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1572,7 +1572,7 @@ F:	drivers/clk/sunxi/
 ARM/Allwinner sunXi SoC support
 M:	Maxime Ripard <mripard@kernel.org>
 M:	Chen-Yu Tsai <wens@csie.org>
-R:	Jernej Skrabec <jernej.skrabec@siol.net>
+R:	Jernej Skrabec <jernej.skrabec@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git
@@ -5003,7 +5003,7 @@ S:	Maintained
 F:	drivers/net/fddi/defza.*
 
 DEINTERLACE DRIVERS FOR ALLWINNER H3
-M:	Jernej Skrabec <jernej.skrabec@siol.net>
+M:	Jernej Skrabec <jernej.skrabec@gmail.com>
 L:	linux-media@vger.kernel.org
 S:	Maintained
 T:	git git://linuxtv.org/media_tree.git
@@ -5527,7 +5527,7 @@ F:	include/linux/power/smartreflex.h
 DRM DRIVER FOR ALLWINNER DE2 AND DE3 ENGINE
 M:	Maxime Ripard <mripard@kernel.org>
 M:	Chen-Yu Tsai <wens@csie.org>
-R:	Jernej Skrabec <jernej.skrabec@siol.net>
+R:	Jernej Skrabec <jernej.skrabec@gmail.com>
 L:	dri-devel@lists.freedesktop.org
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
@@ -5903,7 +5903,7 @@ M:	Andrzej Hajda <a.hajda@samsung.com>
 M:	Neil Armstrong <narmstrong@baylibre.com>
 R:	Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
 R:	Jonas Karlman <jonas@kwiboo.se>
-R:	Jernej Skrabec <jernej.skrabec@siol.net>
+R:	Jernej Skrabec <jernej.skrabec@gmail.com>
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/bridge/
@@ -15490,7 +15490,7 @@ F:	include/uapi/linux/rose.h
 F:	net/rose/
 
 ROTATION DRIVER FOR ALLWINNER A83T
-M:	Jernej Skrabec <jernej.skrabec@siol.net>
+M:	Jernej Skrabec <jernej.skrabec@gmail.com>
 L:	linux-media@vger.kernel.org
 S:	Maintained
 T:	git git://linuxtv.org/media_tree.git
-- 
GitLab


From ab77fe8935c57d1339d3df64957f32e87f0d5ef3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:27 +0300
Subject: [PATCH 0318/3804] spi: pxa2xx: Introduce int_stop_and_reset() helper

Currently we have three times the same few lines repeated in the code.
Deduplicate them by newly introduced int_stop_and_reset() helper.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-8-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 1d4c7f4217ede..5572eec683819 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -620,13 +620,20 @@ static void reset_sccr1(struct driver_data *drv_data)
 	pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg);
 }
 
-static void int_error_stop(struct driver_data *drv_data, const char *msg)
+static void int_stop_and_reset(struct driver_data *drv_data)
 {
-	/* Stop and reset SSP */
+	/* Clear and disable interrupts */
 	write_SSSR_CS(drv_data, drv_data->clear_sr);
 	reset_sccr1(drv_data);
-	if (!pxa25x_ssp_comp(drv_data))
-		pxa2xx_spi_write(drv_data, SSTO, 0);
+	if (pxa25x_ssp_comp(drv_data))
+		return;
+
+	pxa2xx_spi_write(drv_data, SSTO, 0);
+}
+
+static void int_error_stop(struct driver_data *drv_data, const char *msg)
+{
+	int_stop_and_reset(drv_data);
 	pxa2xx_spi_flush(drv_data);
 	pxa2xx_spi_off(drv_data);
 
@@ -638,11 +645,7 @@ static void int_error_stop(struct driver_data *drv_data, const char *msg)
 
 static void int_transfer_complete(struct driver_data *drv_data)
 {
-	/* Clear and disable interrupts */
-	write_SSSR_CS(drv_data, drv_data->clear_sr);
-	reset_sccr1(drv_data);
-	if (!pxa25x_ssp_comp(drv_data))
-		pxa2xx_spi_write(drv_data, SSTO, 0);
+	int_stop_and_reset(drv_data);
 
 	spi_finalize_current_transfer(drv_data->controller);
 }
@@ -1151,11 +1154,7 @@ static int pxa2xx_spi_slave_abort(struct spi_controller *controller)
 {
 	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
-	/* Stop and reset SSP */
-	write_SSSR_CS(drv_data, drv_data->clear_sr);
-	reset_sccr1(drv_data);
-	if (!pxa25x_ssp_comp(drv_data))
-		pxa2xx_spi_write(drv_data, SSTO, 0);
+	int_stop_and_reset(drv_data);
 	pxa2xx_spi_flush(drv_data);
 	pxa2xx_spi_off(drv_data);
 
-- 
GitLab


From 4761d2e7e51cfbe6fdb4e95903d407927f519f50 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:28 +0300
Subject: [PATCH 0319/3804] spi: pxa2xx: Reuse int_error_stop() in
 pxa2xx_spi_slave_abort()

It appears that pxa2xx_spi_slave_abort()almost  repeats the functionality
of the int_error_stop(). Reuse int_error_stop() in pxa2xx_spi_slave_abort().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-9-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 5572eec683819..087c84e605b94 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -631,7 +631,7 @@ static void int_stop_and_reset(struct driver_data *drv_data)
 	pxa2xx_spi_write(drv_data, SSTO, 0);
 }
 
-static void int_error_stop(struct driver_data *drv_data, const char *msg)
+static void int_error_stop(struct driver_data *drv_data, const char *msg, int err)
 {
 	int_stop_and_reset(drv_data);
 	pxa2xx_spi_flush(drv_data);
@@ -639,7 +639,7 @@ static void int_error_stop(struct driver_data *drv_data, const char *msg)
 
 	dev_err(drv_data->ssp->dev, "%s\n", msg);
 
-	drv_data->controller->cur_msg->status = -EIO;
+	drv_data->controller->cur_msg->status = err;
 	spi_finalize_current_transfer(drv_data->controller);
 }
 
@@ -658,12 +658,12 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
 	u32 irq_status = pxa2xx_spi_read(drv_data, SSSR) & irq_mask;
 
 	if (irq_status & SSSR_ROR) {
-		int_error_stop(drv_data, "interrupt_transfer: fifo overrun");
+		int_error_stop(drv_data, "interrupt_transfer: fifo overrun", -EIO);
 		return IRQ_HANDLED;
 	}
 
 	if (irq_status & SSSR_TUR) {
-		int_error_stop(drv_data, "interrupt_transfer: fifo underrun");
+		int_error_stop(drv_data, "interrupt_transfer: fifo underrun", -EIO);
 		return IRQ_HANDLED;
 	}
 
@@ -1154,14 +1154,7 @@ static int pxa2xx_spi_slave_abort(struct spi_controller *controller)
 {
 	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
-	int_stop_and_reset(drv_data);
-	pxa2xx_spi_flush(drv_data);
-	pxa2xx_spi_off(drv_data);
-
-	dev_dbg(drv_data->ssp->dev, "transfer aborted\n");
-
-	drv_data->controller->cur_msg->status = -EINTR;
-	spi_finalize_current_transfer(drv_data->controller);
+	int_error_stop(drv_data, "transfer aborted", -EINTR);
 
 	return 0;
 }
-- 
GitLab


From 0c8ccd8b267fc735e4621774ce62728f27d42863 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:29 +0300
Subject: [PATCH 0320/3804] spi: pxa2xx: Use pxa_ssp_enable()/pxa_ssp_disable()
 in the driver

There are few places that repeat the logic of pxa_ssp_enable() and
pxa_ssp_disable(). Use them instead of open coded variants.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-10-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-dma.c |  4 +---
 drivers/spi/spi-pxa2xx.c     | 36 ++++++++++++++++++------------------
 include/linux/pxa2xx_ssp.h   | 16 ++++++++++++++++
 sound/soc/pxa/pxa-ssp.c      | 16 ----------------
 4 files changed, 35 insertions(+), 37 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index e00dbadd39ecb..5ca01ad7f4604 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -50,9 +50,7 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
 
 		if (error) {
 			/* In case we got an error we disable the SSP now */
-			pxa2xx_spi_write(drv_data, SSCR0,
-					 pxa2xx_spi_read(drv_data, SSCR0)
-					 & ~SSCR0_SSE);
+			pxa_ssp_disable(drv_data->ssp);
 			msg->status = -EIO;
 		}
 
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 087c84e605b94..a27f51f5db65f 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -286,13 +286,11 @@ static u32 pxa2xx_configure_sscr0(const struct driver_data *drv_data,
 	case QUARK_X1000_SSP:
 		return clk_div
 			| QUARK_X1000_SSCR0_Motorola
-			| QUARK_X1000_SSCR0_DataSize(bits > 32 ? 8 : bits)
-			| SSCR0_SSE;
+			| QUARK_X1000_SSCR0_DataSize(bits > 32 ? 8 : bits);
 	default:
 		return clk_div
 			| SSCR0_Motorola
 			| SSCR0_DataSize(bits > 16 ? bits - 16 : bits)
-			| SSCR0_SSE
 			| (bits > 16 ? SSCR0_EDSS : 0);
 	}
 }
@@ -498,8 +496,7 @@ static void pxa2xx_spi_off(struct driver_data *drv_data)
 	if (is_mmp2_ssp(drv_data))
 		return;
 
-	pxa2xx_spi_write(drv_data, SSCR0,
-			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+	pxa_ssp_disable(drv_data->ssp);
 }
 
 static int null_writer(struct driver_data *drv_data)
@@ -1098,25 +1095,26 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 	    (pxa2xx_spi_read(drv_data, DDS_RATE) != chip->dds_rate))
 		pxa2xx_spi_write(drv_data, DDS_RATE, chip->dds_rate);
 
+	/* Stop the SSP */
+	if (!is_mmp2_ssp(drv_data))
+		pxa_ssp_disable(drv_data->ssp);
+
+	if (!pxa25x_ssp_comp(drv_data))
+		pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
+
 	/* see if we need to reload the config registers */
 	if ((pxa2xx_spi_read(drv_data, SSCR0) != cr0)
 	    || (pxa2xx_spi_read(drv_data, SSCR1) & change_mask)
 	    != (cr1 & change_mask)) {
-		/* stop the SSP, and update the other bits */
-		if (!is_mmp2_ssp(drv_data))
-			pxa2xx_spi_write(drv_data, SSCR0, cr0 & ~SSCR0_SSE);
-		if (!pxa25x_ssp_comp(drv_data))
-			pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
 		/* first set CR1 without interrupt and service enables */
 		pxa2xx_spi_write(drv_data, SSCR1, cr1 & change_mask);
-		/* restart the SSP */
+		/* Update the other bits */
 		pxa2xx_spi_write(drv_data, SSCR0, cr0);
-
-	} else {
-		if (!pxa25x_ssp_comp(drv_data))
-			pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
 	}
 
+	/* Restart the SSP */
+	pxa_ssp_enable(drv_data->ssp);
+
 	if (is_mmp2_ssp(drv_data)) {
 		u8 tx_level = (pxa2xx_spi_read(drv_data, SSSR)
 					& SSSR_TFL_MASK) >> 8;
@@ -1786,8 +1784,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		controller->min_speed_hz =
 			DIV_ROUND_UP(controller->max_speed_hz, 512);
 
+	pxa_ssp_disable(ssp);
+
 	/* Load default SSP configuration */
-	pxa2xx_spi_write(drv_data, SSCR0, 0);
 	switch (drv_data->ssp_type) {
 	case QUARK_X1000_SSP:
 		tmp = QUARK_X1000_SSCR1_RxTresh(RX_THRESH_QUARK_X1000_DFLT) |
@@ -1928,7 +1927,7 @@ static int pxa2xx_spi_remove(struct platform_device *pdev)
 	spi_unregister_controller(drv_data->controller);
 
 	/* Disable the SSP at the peripheral and SOC level */
-	pxa2xx_spi_write(drv_data, SSCR0, 0);
+	pxa_ssp_disable(ssp);
 	clk_disable_unprepare(ssp->clk);
 
 	/* Release DMA */
@@ -1957,7 +1956,8 @@ static int pxa2xx_spi_suspend(struct device *dev)
 	status = spi_controller_suspend(drv_data->controller);
 	if (status != 0)
 		return status;
-	pxa2xx_spi_write(drv_data, SSCR0, 0);
+
+	pxa_ssp_disable(ssp);
 
 	if (!pm_runtime_suspended(dev))
 		clk_disable_unprepare(ssp->clk);
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 1b6c1a0922bd2..fdfbe17e15f46 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -254,6 +254,22 @@ static inline u32 pxa_ssp_read_reg(struct ssp_device *dev, u32 reg)
 	return __raw_readl(dev->mmio_base + reg);
 }
 
+static inline void pxa_ssp_enable(struct ssp_device *ssp)
+{
+	u32 sscr0;
+
+	sscr0 = pxa_ssp_read_reg(ssp, SSCR0) | SSCR0_SSE;
+	pxa_ssp_write_reg(ssp, SSCR0, sscr0);
+}
+
+static inline void pxa_ssp_disable(struct ssp_device *ssp)
+{
+	u32 sscr0;
+
+	sscr0 = pxa_ssp_read_reg(ssp, SSCR0) & ~SSCR0_SSE;
+	pxa_ssp_write_reg(ssp, SSCR0, sscr0);
+}
+
 #if IS_ENABLED(CONFIG_PXA_SSP)
 struct ssp_device *pxa_ssp_request(int port, const char *label);
 void pxa_ssp_free(struct ssp_device *);
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index b941adcbb8f95..939e7e28486aa 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -61,22 +61,6 @@ static void dump_registers(struct ssp_device *ssp)
 		 pxa_ssp_read_reg(ssp, SSACD));
 }
 
-static void pxa_ssp_enable(struct ssp_device *ssp)
-{
-	uint32_t sscr0;
-
-	sscr0 = __raw_readl(ssp->mmio_base + SSCR0) | SSCR0_SSE;
-	__raw_writel(sscr0, ssp->mmio_base + SSCR0);
-}
-
-static void pxa_ssp_disable(struct ssp_device *ssp)
-{
-	uint32_t sscr0;
-
-	sscr0 = __raw_readl(ssp->mmio_base + SSCR0) & ~SSCR0_SSE;
-	__raw_writel(sscr0, ssp->mmio_base + SSCR0);
-}
-
 static void pxa_ssp_set_dma_params(struct ssp_device *ssp, int width4,
 			int out, struct snd_dmaengine_dai_dma_data *dma)
 {
-- 
GitLab


From 1bed378c6b9116c51ae59b970cf3d9b4e9e62ced Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:30 +0300
Subject: [PATCH 0321/3804] spi: pxa2xx: Extract pxa2xx_spi_update() helper

There are few places that repeat the logic of "update if changed".
Extract pxa2xx_spi_update() helper to deduplicate that.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-11-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index a27f51f5db65f..54eaa048651fa 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -200,6 +200,12 @@ static bool is_mmp2_ssp(const struct driver_data *drv_data)
 	return drv_data->ssp_type == MMP2_SSP;
 }
 
+static void pxa2xx_spi_update(const struct driver_data *drv_data, u32 reg, u32 mask, u32 value)
+{
+	if ((pxa2xx_spi_read(drv_data, reg) & mask) != value)
+		pxa2xx_spi_write(drv_data, reg, value & mask);
+}
+
 static u32 pxa2xx_spi_get_ssrc1_change_mask(const struct driver_data *drv_data)
 {
 	switch (drv_data->ssp_type) {
@@ -1081,19 +1087,12 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 			dma_mapped ? "DMA" : "PIO");
 
 	if (is_lpss_ssp(drv_data)) {
-		if ((pxa2xx_spi_read(drv_data, SSIRF) & 0xff)
-		    != chip->lpss_rx_threshold)
-			pxa2xx_spi_write(drv_data, SSIRF,
-					 chip->lpss_rx_threshold);
-		if ((pxa2xx_spi_read(drv_data, SSITF) & 0xffff)
-		    != chip->lpss_tx_threshold)
-			pxa2xx_spi_write(drv_data, SSITF,
-					 chip->lpss_tx_threshold);
+		pxa2xx_spi_update(drv_data, SSIRF, GENMASK(7, 0), chip->lpss_rx_threshold);
+		pxa2xx_spi_update(drv_data, SSITF, GENMASK(15, 0), chip->lpss_tx_threshold);
 	}
 
-	if (is_quark_x1000_ssp(drv_data) &&
-	    (pxa2xx_spi_read(drv_data, DDS_RATE) != chip->dds_rate))
-		pxa2xx_spi_write(drv_data, DDS_RATE, chip->dds_rate);
+	if (is_quark_x1000_ssp(drv_data))
+		pxa2xx_spi_update(drv_data, DDS_RATE, GENMASK(23, 0), chip->dds_rate);
 
 	/* Stop the SSP */
 	if (!is_mmp2_ssp(drv_data))
@@ -1102,15 +1101,11 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 	if (!pxa25x_ssp_comp(drv_data))
 		pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
 
+	/* first set CR1 without interrupt and service enables */
+	pxa2xx_spi_update(drv_data, SSCR1, change_mask, cr1);
+
 	/* see if we need to reload the config registers */
-	if ((pxa2xx_spi_read(drv_data, SSCR0) != cr0)
-	    || (pxa2xx_spi_read(drv_data, SSCR1) & change_mask)
-	    != (cr1 & change_mask)) {
-		/* first set CR1 without interrupt and service enables */
-		pxa2xx_spi_write(drv_data, SSCR1, cr1 & change_mask);
-		/* Update the other bits */
-		pxa2xx_spi_write(drv_data, SSCR0, cr0);
-	}
+	pxa2xx_spi_update(drv_data, SSCR0, GENMASK(31, 0), cr0);
 
 	/* Restart the SSP */
 	pxa_ssp_enable(drv_data->ssp);
-- 
GitLab


From 42c80cd439a938569a86f6ae135d38c1cda5569b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:31 +0300
Subject: [PATCH 0322/3804] spi: pxa2xx: Extract clear_SSCR1_bits() helper

There are few places that repeat the logic of "clear some bits in SSCR1".
Extract clear_SSCR1_bits() helper to deduplicate that.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-12-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-dma.c | 4 +---
 drivers/spi/spi-pxa2xx.c     | 7 ++-----
 drivers/spi/spi-pxa2xx.h     | 5 +++++
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index 5ca01ad7f4604..e581027e99f9f 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -41,9 +41,7 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
 		}
 
 		/* Clear status & disable interrupts */
-		pxa2xx_spi_write(drv_data, SSCR1,
-				 pxa2xx_spi_read(drv_data, SSCR1)
-				 & ~drv_data->dma_cr1);
+		clear_SSCR1_bits(drv_data, drv_data->dma_cr1);
 		write_SSSR_CS(drv_data, drv_data->clear_sr);
 		if (!pxa25x_ssp_comp(drv_data))
 			pxa2xx_spi_write(drv_data, SSTO, 0);
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 54eaa048651fa..3a4ad16614f7a 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -733,8 +733,7 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
 static void handle_bad_msg(struct driver_data *drv_data)
 {
 	pxa2xx_spi_off(drv_data);
-	pxa2xx_spi_write(drv_data, SSCR1,
-			 pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1);
+	clear_SSCR1_bits(drv_data, drv_data->int_cr1);
 	if (!pxa25x_ssp_comp(drv_data))
 		pxa2xx_spi_write(drv_data, SSTO, 0);
 	write_SSSR_CS(drv_data, drv_data->clear_sr);
@@ -1161,9 +1160,7 @@ static void pxa2xx_spi_handle_err(struct spi_controller *controller,
 	pxa2xx_spi_off(drv_data);
 	/* Clear and disable interrupts and service requests */
 	write_SSSR_CS(drv_data, drv_data->clear_sr);
-	pxa2xx_spi_write(drv_data, SSCR1,
-			 pxa2xx_spi_read(drv_data, SSCR1)
-			 & ~(drv_data->int_cr1 | drv_data->dma_cr1));
+	clear_SSCR1_bits(drv_data, drv_data->int_cr1 | drv_data->dma_cr1);
 	if (!pxa25x_ssp_comp(drv_data))
 		pxa2xx_spi_write(drv_data, SSTO, 0);
 
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 739e264feaa69..ed63f7165cd8e 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -105,6 +105,11 @@ static inline int pxa25x_ssp_comp(struct driver_data *drv_data)
 	}
 }
 
+static inline void clear_SSCR1_bits(const struct driver_data *drv_data, u32 bits)
+{
+	pxa2xx_spi_write(drv_data, SSCR1, pxa2xx_spi_read(drv_data, SSCR1) & ~bits);
+}
+
 static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val)
 {
 	if (drv_data->ssp_type == CE4100_SSP ||
-- 
GitLab


From 6d380132eaea536bef641f21847c8a7987e96ad8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:32 +0300
Subject: [PATCH 0323/3804] spi: pxa2xx: Extract read_SSSR_bits() helper

There are few places that repeat the logic of "read some bits from SSSR".
Extract read_SSSR_bits() helper to deduplicate that.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-13-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-dma.c |  9 +++------
 drivers/spi/spi-pxa2xx.c     | 26 +++++++++++---------------
 drivers/spi/spi-pxa2xx.h     |  7 ++++++-
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index e581027e99f9f..f022d82dcb1bf 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -34,11 +34,8 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
 		 * might not know about the error yet. So we re-check the
 		 * ROR bit here before we clear the status register.
 		 */
-		if (!error) {
-			u32 status = pxa2xx_spi_read(drv_data, SSSR)
-				     & drv_data->mask_sr;
-			error = status & SSSR_ROR;
-		}
+		if (!error)
+			error = read_SSSR_bits(drv_data, drv_data->mask_sr) & SSSR_ROR;
 
 		/* Clear status & disable interrupts */
 		clear_SSCR1_bits(drv_data, drv_data->dma_cr1);
@@ -119,7 +116,7 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data)
 {
 	u32 status;
 
-	status = pxa2xx_spi_read(drv_data, SSSR) & drv_data->mask_sr;
+	status = read_SSSR_bits(drv_data, drv_data->mask_sr);
 	if (status & SSSR_ROR) {
 		dev_err(drv_data->ssp->dev, "FIFO overrun\n");
 
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 3a4ad16614f7a..af3f01de8f5b1 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -247,7 +247,7 @@ static bool pxa2xx_spi_txfifo_full(const struct driver_data *drv_data)
 		break;
 	}
 
-	return (pxa2xx_spi_read(drv_data, SSSR) & mask) == mask;
+	return read_SSSR_bits(drv_data, mask) == mask;
 }
 
 static void pxa2xx_spi_clear_rx_thre(const struct driver_data *drv_data,
@@ -488,7 +488,7 @@ int pxa2xx_spi_flush(struct driver_data *drv_data)
 	unsigned long limit = loops_per_jiffy << 1;
 
 	do {
-		while (pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE)
+		while (read_SSSR_bits(drv_data, SSSR_RNE))
 			pxa2xx_spi_read(drv_data, SSDR);
 	} while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_BSY) && --limit);
 	write_SSSR_CS(drv_data, SSSR_ROR);
@@ -523,8 +523,7 @@ static int null_reader(struct driver_data *drv_data)
 {
 	u8 n_bytes = drv_data->n_bytes;
 
-	while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE)
-	       && (drv_data->rx < drv_data->rx_end)) {
+	while (read_SSSR_bits(drv_data, SSSR_RNE) && drv_data->rx < drv_data->rx_end) {
 		pxa2xx_spi_read(drv_data, SSDR);
 		drv_data->rx += n_bytes;
 	}
@@ -546,8 +545,7 @@ static int u8_writer(struct driver_data *drv_data)
 
 static int u8_reader(struct driver_data *drv_data)
 {
-	while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE)
-	       && (drv_data->rx < drv_data->rx_end)) {
+	while (read_SSSR_bits(drv_data, SSSR_RNE) && drv_data->rx < drv_data->rx_end) {
 		*(u8 *)(drv_data->rx) = pxa2xx_spi_read(drv_data, SSDR);
 		++drv_data->rx;
 	}
@@ -569,8 +567,7 @@ static int u16_writer(struct driver_data *drv_data)
 
 static int u16_reader(struct driver_data *drv_data)
 {
-	while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE)
-	       && (drv_data->rx < drv_data->rx_end)) {
+	while (read_SSSR_bits(drv_data, SSSR_RNE) && drv_data->rx < drv_data->rx_end) {
 		*(u16 *)(drv_data->rx) = pxa2xx_spi_read(drv_data, SSDR);
 		drv_data->rx += 2;
 	}
@@ -592,8 +589,7 @@ static int u32_writer(struct driver_data *drv_data)
 
 static int u32_reader(struct driver_data *drv_data)
 {
-	while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE)
-	       && (drv_data->rx < drv_data->rx_end)) {
+	while (read_SSSR_bits(drv_data, SSSR_RNE) && drv_data->rx < drv_data->rx_end) {
 		*(u32 *)(drv_data->rx) = pxa2xx_spi_read(drv_data, SSDR);
 		drv_data->rx += 4;
 	}
@@ -655,10 +651,11 @@ static void int_transfer_complete(struct driver_data *drv_data)
 
 static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
 {
-	u32 irq_mask = (pxa2xx_spi_read(drv_data, SSCR1) & SSCR1_TIE) ?
-		       drv_data->mask_sr : drv_data->mask_sr & ~SSSR_TFS;
+	u32 irq_status;
 
-	u32 irq_status = pxa2xx_spi_read(drv_data, SSSR) & irq_mask;
+	irq_status = read_SSSR_bits(drv_data, drv_data->mask_sr);
+	if (!(pxa2xx_spi_read(drv_data, SSCR1) & SSCR1_TIE))
+		irq_status &= ~SSSR_TFS;
 
 	if (irq_status & SSSR_ROR) {
 		int_error_stop(drv_data, "interrupt_transfer: fifo overrun", -EIO);
@@ -1110,8 +1107,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 	pxa_ssp_enable(drv_data->ssp);
 
 	if (is_mmp2_ssp(drv_data)) {
-		u8 tx_level = (pxa2xx_spi_read(drv_data, SSSR)
-					& SSSR_TFL_MASK) >> 8;
+		u8 tx_level = read_SSSR_bits(drv_data, SSSR_TFL_MASK) >> 8;
 
 		if (tx_level) {
 			/* On MMP2, flipping SSE doesn't to empty TXFIFO. */
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index ed63f7165cd8e..d2cb40f97c4b7 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -110,11 +110,16 @@ static inline void clear_SSCR1_bits(const struct driver_data *drv_data, u32 bits
 	pxa2xx_spi_write(drv_data, SSCR1, pxa2xx_spi_read(drv_data, SSCR1) & ~bits);
 }
 
+static inline u32 read_SSSR_bits(const struct driver_data *drv_data, u32 bits)
+{
+	return pxa2xx_spi_read(drv_data, SSSR) & bits;
+}
+
 static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val)
 {
 	if (drv_data->ssp_type == CE4100_SSP ||
 	    drv_data->ssp_type == QUARK_X1000_SSP)
-		val |= pxa2xx_spi_read(drv_data, SSSR) & SSSR_ALT_FRM_MASK;
+		val |= read_SSSR_bits(drv_data, SSSR_ALT_FRM_MASK);
 
 	pxa2xx_spi_write(drv_data, SSSR, val);
 }
-- 
GitLab


From eca32c3974c0664f88fed90b327f473bd18a4809 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:33 +0300
Subject: [PATCH 0324/3804] spi: pxa2xx: Constify struct driver_data parameter

In a couple of functions the contents of struct driver_data are not altered,
hence we may constify the respective function parameter.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-14-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index d2cb40f97c4b7..5c6a5e0f249e9 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -93,7 +93,7 @@ static inline void pxa2xx_spi_write(const struct driver_data *drv_data, u32 reg,
 
 #define DMA_ALIGNMENT		8
 
-static inline int pxa25x_ssp_comp(struct driver_data *drv_data)
+static inline int pxa25x_ssp_comp(const struct driver_data *drv_data)
 {
 	switch (drv_data->ssp_type) {
 	case PXA25x_SSP:
@@ -115,7 +115,7 @@ static inline u32 read_SSSR_bits(const struct driver_data *drv_data, u32 bits)
 	return pxa2xx_spi_read(drv_data, SSSR) & bits;
 }
 
-static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val)
+static inline void write_SSSR_CS(const struct driver_data *drv_data, u32 val)
 {
 	if (drv_data->ssp_type == CE4100_SSP ||
 	    drv_data->ssp_type == QUARK_X1000_SSP)
-- 
GitLab


From 3fdb59cf10b020b32b9f1dfc78611320623dcb3e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 15:41:34 +0300
Subject: [PATCH 0325/3804] spi: pxa2xx: Introduce special type for Merrifield
 SPIs

Intel Merrifield SPI is actually more closer to PXA3xx. It has extended FIFO
(32 bytes) and additional registers to get or set FIFO thresholds.

Introduce new type for Intel Merrifield SPI host controllers and handle bigger
FIFO size.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510124134.24638-15-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c |  2 +-
 drivers/spi/spi-pxa2xx.c     | 32 +++++++++++++++++++++++++++++---
 include/linux/pxa2xx_ssp.h   | 16 ++++++++++++++++
 3 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index a259be12d3265..dce9ade9a4dfb 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -179,7 +179,7 @@ static struct pxa_spi_info spi_info_configs[] = {
 		.rx_param = &bsw2_rx_param,
 	},
 	[PORT_MRFLD] = {
-		.type = PXA27x_SSP,
+		.type = MRFLD_SSP,
 		.max_clk_rate = 25000000,
 		.setup = mrfld_spi_setup,
 	},
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index af3f01de8f5b1..5985b39e2dd60 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -200,6 +200,11 @@ static bool is_mmp2_ssp(const struct driver_data *drv_data)
 	return drv_data->ssp_type == MMP2_SSP;
 }
 
+static bool is_mrfld_ssp(const struct driver_data *drv_data)
+{
+	return drv_data->ssp_type == MRFLD_SSP;
+}
+
 static void pxa2xx_spi_update(const struct driver_data *drv_data, u32 reg, u32 mask, u32 value)
 {
 	if ((pxa2xx_spi_read(drv_data, reg) & mask) != value)
@@ -1087,6 +1092,15 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 		pxa2xx_spi_update(drv_data, SSITF, GENMASK(15, 0), chip->lpss_tx_threshold);
 	}
 
+	if (is_mrfld_ssp(drv_data)) {
+		u32 thresh = 0;
+
+		thresh |= SFIFOTT_RxThresh(chip->lpss_rx_threshold);
+		thresh |= SFIFOTT_TxThresh(chip->lpss_tx_threshold);
+
+		pxa2xx_spi_update(drv_data, SFIFOTT, 0xffffffff, thresh);
+	}
+
 	if (is_quark_x1000_ssp(drv_data))
 		pxa2xx_spi_update(drv_data, DDS_RATE, GENMASK(23, 0), chip->dds_rate);
 
@@ -1253,6 +1267,11 @@ static int setup(struct spi_device *spi)
 		tx_hi_thres = 0;
 		rx_thres = RX_THRESH_QUARK_X1000_DFLT;
 		break;
+	case MRFLD_SSP:
+		tx_thres = TX_THRESH_MRFLD_DFLT;
+		tx_hi_thres = 0;
+		rx_thres = RX_THRESH_MRFLD_DFLT;
+		break;
 	case CE4100_SSP:
 		tx_thres = TX_THRESH_CE4100_DFLT;
 		tx_hi_thres = 0;
@@ -1328,9 +1347,16 @@ static int setup(struct spi_device *spi)
 		chip->cr1 |= SSCR1_SPH;
 	}
 
-	chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres);
-	chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres)
-				| SSITF_TxHiThresh(tx_hi_thres);
+	if (is_lpss_ssp(drv_data)) {
+		chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres);
+		chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres) |
+					  SSITF_TxHiThresh(tx_hi_thres);
+	}
+
+	if (is_mrfld_ssp(drv_data)) {
+		chip->lpss_rx_threshold = rx_thres;
+		chip->lpss_tx_threshold = tx_thres;
+	}
 
 	/* set dma burst and threshold outside of chip_info path so that if
 	 * chip_info goes away after setting chip->enable_dma, the
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index fdfbe17e15f46..2b21bc1f3c732 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -183,6 +183,21 @@ struct device_node;
 #define SSACD_ACPS(x)		((x) << 4)	/* Audio clock PLL select */
 #define SSACD_SCDX8		BIT(7)		/* SYSCLK division ratio select */
 
+/* Intel Merrifield SSP */
+#define SFIFOL			0x68		/* FIFO level */
+#define SFIFOTT			0x6c		/* FIFO trigger threshold */
+
+#define RX_THRESH_MRFLD_DFLT	16
+#define TX_THRESH_MRFLD_DFLT	16
+
+#define SFIFOL_TFL_MASK		GENMASK(15, 0)	/* Transmit FIFO Level mask */
+#define SFIFOL_RFL_MASK		GENMASK(31, 16)	/* Receive FIFO Level mask */
+
+#define SFIFOTT_TFT		GENMASK(15, 0)	/* Transmit FIFO Threshold (mask) */
+#define SFIFOTT_TxThresh(x)	(((x) - 1) << 0)	/* TX FIFO trigger threshold / level */
+#define SFIFOTT_RFT		GENMASK(31, 16)	/* Receive FIFO Threshold (mask) */
+#define SFIFOTT_RxThresh(x)	(((x) - 1) << 16)	/* RX FIFO trigger threshold / level */
+
 /* LPSS SSP */
 #define SSITF			0x44		/* TX FIFO trigger level */
 #define SSITF_TxHiThresh(x)	(((x) - 1) << 0)
@@ -205,6 +220,7 @@ enum pxa_ssp_type {
 	MMP2_SSP,
 	PXA910_SSP,
 	CE4100_SSP,
+	MRFLD_SSP,
 	QUARK_X1000_SSP,
 	LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */
 	LPSS_BYT_SSP,
-- 
GitLab


From 1b55767dfdd93c42712e67e986ac14f0c4debd0c Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang@kernel.org>
Date: Tue, 11 May 2021 00:25:05 +0800
Subject: [PATCH 0326/3804] erofs: fix broken illustration in documentation

Illustration was broken after ReST conversion by accident.
(checked by 'make SPHINXDIRS="filesystems" htmldocs')

Link: https://lore.kernel.org/r/20210510162506.28637-1-xiang@kernel.org
Fixes: e66d8631ddb3 ("docs: filesystems: convert erofs.txt to ReST")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 Documentation/filesystems/erofs.rst | 119 ++++++++++++++--------------
 1 file changed, 59 insertions(+), 60 deletions(-)

diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index bf145171c2bf8..869b183ff2158 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -113,31 +113,31 @@ may not. All metadatas can be now observed in two different spaces (views):
 
     ::
 
-				    |-> aligned with 8B
-					    |-> followed closely
-	+ meta_blkaddr blocks                                      |-> another slot
-	_____________________________________________________________________
-	|  ...   | inode |  xattrs  | extents  | data inline | ... | inode ...
-	|________|_______|(optional)|(optional)|__(optional)_|_____|__________
-		|-> aligned with the inode slot size
-		    .                   .
-		    .                         .
-		.                              .
-		.                                    .
-	    .                                         .
-	    .                                              .
-	.____________________________________________________|-> aligned with 4B
-	| xattr_ibody_header | shared xattrs | inline xattrs |
-	|____________________|_______________|_______________|
-	|->    12 bytes    <-|->x * 4 bytes<-|               .
-			    .                .                 .
-			.                      .                   .
-		.                           .                     .
-	    ._______________________________.______________________.
-	    | id | id | id | id |  ... | id | ent | ... | ent| ... |
-	    |____|____|____|____|______|____|_____|_____|____|_____|
-					    |-> aligned with 4B
-							|-> aligned with 4B
+                                 |-> aligned with 8B
+                                            |-> followed closely
+     + meta_blkaddr blocks                                      |-> another slot
+       _____________________________________________________________________
+     |  ...   | inode |  xattrs  | extents  | data inline | ... | inode ...
+     |________|_______|(optional)|(optional)|__(optional)_|_____|__________
+              |-> aligned with the inode slot size
+                   .                   .
+                 .                         .
+               .                              .
+             .                                    .
+           .                                         .
+         .                                              .
+       .____________________________________________________|-> aligned with 4B
+       | xattr_ibody_header | shared xattrs | inline xattrs |
+       |____________________|_______________|_______________|
+       |->    12 bytes    <-|->x * 4 bytes<-|               .
+                           .                .                 .
+                     .                      .                   .
+                .                           .                     .
+            ._______________________________.______________________.
+            | id | id | id | id |  ... | id | ent | ... | ent| ... |
+            |____|____|____|____|______|____|_____|_____|____|_____|
+                                            |-> aligned with 4B
+                                                        |-> aligned with 4B
 
     Inode could be 32 or 64 bytes, which can be distinguished from a common
     field which all inode versions have -- i_format::
@@ -175,13 +175,13 @@ may not. All metadatas can be now observed in two different spaces (views):
     Each share xattr can also be directly found by the following formula:
          xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
 
-    ::
+::
 
-			    |-> aligned by  4 bytes
-	+ xattr_blkaddr blocks                     |-> aligned with 4 bytes
-	_________________________________________________________________________
-	|  ...   | xattr_entry |  xattr data | ... |  xattr_entry | xattr data  ...
-	|________|_____________|_____________|_____|______________|_______________
+                           |-> aligned by  4 bytes
+    + xattr_blkaddr blocks                     |-> aligned with 4 bytes
+     _________________________________________________________________________
+    |  ...   | xattr_entry |  xattr data | ... |  xattr_entry | xattr data  ...
+    |________|_____________|_____________|_____|______________|_______________
 
 Directories
 -----------
@@ -193,19 +193,18 @@ algorithm (could refer to the related source code).
 
 ::
 
-		    ___________________________
-		    /                           |
-		/              ______________|________________
-		/              /              | nameoff1       | nameoffN-1
-    ____________.______________._______________v________________v__________
-    | dirent | dirent | ... | dirent | filename | filename | ... | filename |
-    |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
-	\                           ^
-	\                          |                           * could have
-	\                         |                             trailing '\0'
-	    \________________________| nameoff0
-
-				Directory block
+                  ___________________________
+                 /                           |
+                /              ______________|________________
+               /              /              | nameoff1       | nameoffN-1
+  ____________.______________._______________v________________v__________
+ | dirent | dirent | ... | dirent | filename | filename | ... | filename |
+ |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
+      \                           ^
+       \                          |                           * could have
+        \                         |                             trailing '\0'
+         \________________________| nameoff0
+                             Directory block
 
 Note that apart from the offset of the first filename, nameoff0 also indicates
 the total number of directory entries in this block since it is no need to
@@ -216,22 +215,22 @@ Compression
 Currently, EROFS supports 4KB fixed-sized output transparent file compression,
 as illustrated below::
 
-	    |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
-	    clusterofs                      clusterofs            clusterofs
-	    |                               |                     |   logical data
-    _________v_______________________________v_____________________v_______________
-    ... |    .        |             |        .    |             |  .          | ...
-    ____|____.________|_____________|________.____|_____________|__.__________|____
-	|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
-	    size          size          size          size          size
-	    .                             .                .                   .
-	    .                       .               .                  .
-		.                  .              .                .
-	_______._____________._____________._____________._____________________
-	    ... |             |             |             | ... physical data
-	_______|_____________|_____________|_____________|_____________________
-		|-> cluster <-|-> cluster <-|-> cluster <-|
-		    size          size          size
+          |<-    variable-sized extent    ->|<-       VLE         ->|
+        clusterofs                        clusterofs              clusterofs
+          |                                 |                       |
+ _________v_________________________________v_______________________v________
+ ... |    .         |              |        .     |              |  .   ...
+ ____|____._________|______________|________.___ _|______________|__.________
+     |-> lcluster <-|-> lcluster <-|-> lcluster <-|-> lcluster <-|
+          size           size           size           size   .             .
+           .                            .                .              .
+            .                       .               .               .
+             .                   .              .               .
+       _______.______________.______________.______________._________________
+          ... |              |              |              | ...
+       _______|______________|______________|______________|_________________
+              |-> pcluster <-|-> pcluster <-|-> pcluster <-|
+                    size           size           size
 
 Currently each on-disk physical cluster can contain 4KB (un)compressed data
 at most. For each logical cluster, there is a corresponding on-disk index to
-- 
GitLab


From 46f2e04484aee056c97f79162da83ac7d2d621bb Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang@kernel.org>
Date: Tue, 11 May 2021 16:44:14 +0800
Subject: [PATCH 0327/3804] erofs: update documentation about data compression

Add more description about (NON)HEAD lclusters, and the new big
pcluster feature.

Link: https://lore.kernel.org/r/20210511084414.21305-1-xiang@kernel.org
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 Documentation/filesystems/erofs.rst | 68 +++++++++++++++++++++--------
 1 file changed, 49 insertions(+), 19 deletions(-)

diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
index 869b183ff2158..832839fcf4c3b 100644
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -50,8 +50,8 @@ Here is the main features of EROFS:
 
  - Support POSIX.1e ACLs by using xattrs;
 
- - Support transparent file compression as an option:
-   LZ4 algorithm with 4 KB fixed-sized output compression for high performance.
+ - Support transparent data compression as an option:
+   LZ4 algorithm with the fixed-sized output compression for high performance.
 
 The following git tree provides the file system user-space tools under
 development (ex, formatting tool mkfs.erofs):
@@ -210,10 +210,21 @@ Note that apart from the offset of the first filename, nameoff0 also indicates
 the total number of directory entries in this block since it is no need to
 introduce another on-disk field at all.
 
-Compression
------------
-Currently, EROFS supports 4KB fixed-sized output transparent file compression,
-as illustrated below::
+Data compression
+----------------
+EROFS implements LZ4 fixed-sized output compression which generates fixed-sized
+compressed data blocks from variable-sized input in contrast to other existing
+fixed-sized input solutions. Relatively higher compression ratios can be gotten
+by using fixed-sized output compression since nowadays popular data compression
+algorithms are mostly LZ77-based and such fixed-sized output approach can be
+benefited from the historical dictionary (aka. sliding window).
+
+In details, original (uncompressed) data is turned into several variable-sized
+extents and in the meanwhile, compressed into physical clusters (pclusters).
+In order to record each variable-sized extent, logical clusters (lclusters) are
+introduced as the basic unit of compress indexes to indicate whether a new
+extent is generated within the range (HEAD) or not (NONHEAD). Lclusters are now
+fixed in block size, as illustrated below::
 
           |<-    variable-sized extent    ->|<-       VLE         ->|
         clusterofs                        clusterofs              clusterofs
@@ -222,18 +233,37 @@ as illustrated below::
  ... |    .         |              |        .     |              |  .   ...
  ____|____._________|______________|________.___ _|______________|__.________
      |-> lcluster <-|-> lcluster <-|-> lcluster <-|-> lcluster <-|
-          size           size           size           size   .             .
-           .                            .                .              .
-            .                       .               .               .
-             .                   .              .               .
-       _______.______________.______________.______________._________________
+          (HEAD)        (NONHEAD)       (HEAD)        (NONHEAD)    .
+           .             CBLKCNT            .                    .
+            .                               .                  .
+             .                              .                .
+       _______._____________________________.______________._________________
           ... |              |              |              | ...
        _______|______________|______________|______________|_________________
-              |-> pcluster <-|-> pcluster <-|-> pcluster <-|
-                    size           size           size
-
-Currently each on-disk physical cluster can contain 4KB (un)compressed data
-at most. For each logical cluster, there is a corresponding on-disk index to
-describe its cluster type, physical cluster address, etc.
-
-See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
+              |->      big pcluster       <-|-> pcluster <-|
+
+A physical cluster can be seen as a container of physical compressed blocks
+which contains compressed data. Previously, only lcluster-sized (4KB) pclusters
+were supported. After big pcluster feature is introduced (available since
+Linux v5.13), pcluster can be a multiple of lcluster size.
+
+For each HEAD lcluster, clusterofs is recorded to indicate where a new extent
+starts and blkaddr is used to seek the compressed data. For each NONHEAD
+lcluster, delta0 and delta1 are available instead of blkaddr to indicate the
+distance to its HEAD lcluster and the next HEAD lcluster. A PLAIN lcluster is
+also a HEAD lcluster except that its data is uncompressed. See the comments
+around "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
+
+If big pcluster is enabled, pcluster size in lclusters needs to be recorded as
+well. Let the delta0 of the first NONHEAD lcluster store the compressed block
+count with a special flag as a new called CBLKCNT NONHEAD lcluster. It's easy
+to understand its delta0 is constantly 1, as illustrated below::
+
+   __________________________________________________________
+  | HEAD |  NONHEAD  | NONHEAD | ... | NONHEAD | HEAD | HEAD |
+  |__:___|_(CBLKCNT)_|_________|_____|_________|__:___|____:_|
+     |<----- a big pcluster (with CBLKCNT) ------>|<--  -->|
+           a lcluster-sized pcluster (without CBLKCNT) ^
+
+If another HEAD follows a HEAD lcluster, there is no room to record CBLKCNT,
+but it's easy to know the size of such pcluster is 1 lcluster as well.
-- 
GitLab


From 35f3f8504c3b60a1ae5576e178b27fc0ddd6157d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:42 +0300
Subject: [PATCH 0328/3804] spi: Switch to signed types for *_native_cs SPI
 controller fields

While fixing undefined behaviour the commit f60d7270c8a3 ("spi: Avoid
undefined behaviour when counting unused native CSs") missed the case
when all CSs are GPIOs and thus unused_native_cs will be evaluated to
-1 in unsigned representation. This will falsely trigger a condition
in the spi_get_gpio_descs().

Switch to signed types for *_native_cs SPI controller fields to fix above.

Fixes: f60d7270c8a3 ("spi: Avoid undefined behaviour when counting unused native CSs")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131242.49455-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 360a3bc767ca0..74239d65c7fd1 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -644,8 +644,8 @@ struct spi_controller {
 	int			*cs_gpios;
 	struct gpio_desc	**cs_gpiods;
 	bool			use_gpio_descriptors;
-	u8			unused_native_cs;
-	u8			max_native_cs;
+	s8			unused_native_cs;
+	s8			max_native_cs;
 
 	/* statistics */
 	struct spi_statistics	statistics;
-- 
GitLab


From d019f38a1af3c6015cde6a47951a3ec43beeed80 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 11 May 2021 11:53:18 +0800
Subject: [PATCH 0329/3804] regulator: uniphier: Add missing
 MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Link: https://lore.kernel.org/r/1620705198-104566-1-git-send-email-zou_wei@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/uniphier-regulator.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/uniphier-regulator.c b/drivers/regulator/uniphier-regulator.c
index 2e02e26b516c4..e75b0973e3256 100644
--- a/drivers/regulator/uniphier-regulator.c
+++ b/drivers/regulator/uniphier-regulator.c
@@ -201,6 +201,7 @@ static const struct of_device_id uniphier_regulator_match[] = {
 	},
 	{ /* Sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, uniphier_regulator_match);
 
 static struct platform_driver uniphier_regulator_driver = {
 	.probe = uniphier_regulator_probe,
-- 
GitLab


From 86b8bff7e3ac6775113639d88db7448a8b47f0c1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:11:20 +0300
Subject: [PATCH 0330/3804] spi: Convert to use predefined time multipliers

We have a lot of hard coded values in nanoseconds or other units.
Use predefined constants to make it more clear.

While at it, add or amend comments in the corresponding functions.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131120.49253-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index f9885c0965637..407420977a739 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1118,10 +1118,20 @@ static int spi_transfer_wait(struct spi_controller *ctlr,
 		if (!speed_hz)
 			speed_hz = 100000;
 
-		ms = 8LL * 1000LL * xfer->len;
+		/*
+		 * For each byte we wait for 8 cycles of the SPI clock.
+		 * Since speed is defined in Hz and we want milliseconds,
+		 * use respective multiplier, but before the division,
+		 * otherwise we may get 0 for short transfers.
+		 */
+		ms = 8LL * MSEC_PER_SEC * xfer->len;
 		do_div(ms, speed_hz);
-		ms += ms + 200; /* some tolerance */
 
+		/*
+		 * Increase it twice and add 200 ms tolerance, use
+		 * predefined maximum in case of overflow.
+		 */
+		ms += ms + 200;
 		if (ms > UINT_MAX)
 			ms = UINT_MAX;
 
@@ -1144,10 +1154,10 @@ static void _spi_transfer_delay_ns(u32 ns)
 {
 	if (!ns)
 		return;
-	if (ns <= 1000) {
+	if (ns <= NSEC_PER_USEC) {
 		ndelay(ns);
 	} else {
-		u32 us = DIV_ROUND_UP(ns, 1000);
+		u32 us = DIV_ROUND_UP(ns, NSEC_PER_USEC);
 
 		if (us <= 10)
 			udelay(us);
@@ -1167,21 +1177,25 @@ int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer)
 
 	switch (unit) {
 	case SPI_DELAY_UNIT_USECS:
-		delay *= 1000;
+		delay *= NSEC_PER_USEC;
 		break;
-	case SPI_DELAY_UNIT_NSECS: /* nothing to do here */
+	case SPI_DELAY_UNIT_NSECS:
+		/* Nothing to do here */
 		break;
 	case SPI_DELAY_UNIT_SCK:
 		/* clock cycles need to be obtained from spi_transfer */
 		if (!xfer)
 			return -EINVAL;
-		/* if there is no effective speed know, then approximate
-		 * by underestimating with half the requested hz
+		/*
+		 * If there is unknown effective speed, approximate it
+		 * by underestimating with half of the requested hz.
 		 */
 		hz = xfer->effective_speed_hz ?: xfer->speed_hz / 2;
 		if (!hz)
 			return -EINVAL;
-		delay *= DIV_ROUND_UP(1000000000, hz);
+
+		/* Convert delay to nanoseconds */
+		delay *= DIV_ROUND_UP(NSEC_PER_SEC, hz);
 		break;
 	default:
 		return -EINVAL;
@@ -1213,6 +1227,7 @@ EXPORT_SYMBOL_GPL(spi_delay_exec);
 static void _spi_transfer_cs_change_delay(struct spi_message *msg,
 					  struct spi_transfer *xfer)
 {
+	u32 default_delay_ns = 10 * NSEC_PER_USEC;
 	u32 delay = xfer->cs_change_delay.value;
 	u32 unit = xfer->cs_change_delay.unit;
 	int ret;
@@ -1220,16 +1235,16 @@ static void _spi_transfer_cs_change_delay(struct spi_message *msg,
 	/* return early on "fast" mode - for everything but USECS */
 	if (!delay) {
 		if (unit == SPI_DELAY_UNIT_USECS)
-			_spi_transfer_delay_ns(10000);
+			_spi_transfer_delay_ns(default_delay_ns);
 		return;
 	}
 
 	ret = spi_delay_exec(&xfer->cs_change_delay, xfer);
 	if (ret) {
 		dev_err_once(&msg->spi->dev,
-			     "Use of unsupported delay unit %i, using default of 10us\n",
-			     unit);
-		_spi_transfer_delay_ns(10000);
+			     "Use of unsupported delay unit %i, using default of %luus\n",
+			     unit, default_delay_ns / NSEC_PER_USEC);
+		_spi_transfer_delay_ns(default_delay_ns);
 	}
 }
 
-- 
GitLab


From 532259bfd1c12d561215c32b94cd9bb7c997bc6f Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 11 May 2021 15:08:42 +0800
Subject: [PATCH 0331/3804] spi: altera: Remove redundant dev_err call in
 dfl_spi_altera_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Link: https://lore.kernel.org/r/1620716922-108572-1-git-send-email-zou_wei@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-altera-dfl.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/spi/spi-altera-dfl.c b/drivers/spi/spi-altera-dfl.c
index 3e32e4fe58950..39a3e1a032e04 100644
--- a/drivers/spi/spi-altera-dfl.c
+++ b/drivers/spi/spi-altera-dfl.c
@@ -148,10 +148,8 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev)
 
 	base = devm_ioremap_resource(dev, &dfl_dev->mmio_res);
 
-	if (IS_ERR(base)) {
-		dev_err(dev, "%s get mem resource fail!\n", __func__);
+	if (IS_ERR(base))
 		return PTR_ERR(base);
-	}
 
 	config_spi_master(base, master);
 	dev_dbg(dev, "%s cs %u bpm 0x%x mode 0x%x\n", __func__,
-- 
GitLab


From 91e02557f377b6837d4f82b14229d92cae231001 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 11 May 2021 11:05:00 +0200
Subject: [PATCH 0332/3804] ALSA: usb-audio: Fix potential out-of-bounce access
 in MIDI EP parser

The recently introduced MIDI endpoint parser code has an access to the
field without the size validation, hence it might lead to
out-of-bounce access.  Add the sanity checks for the descriptor
sizes.

Fixes: eb596e0fd13c ("ALSA: usb-audio: generate midi streaming substream names from jack names")
Link: https://lore.kernel.org/r/20210511090500.2637-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/midi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 649eb8d1ab7dd..2c01649c70f61 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1750,7 +1750,7 @@ static struct usb_midi_in_jack_descriptor *find_usb_in_jack_descriptor(
 		struct usb_midi_in_jack_descriptor *injd =
 				(struct usb_midi_in_jack_descriptor *)extra;
 
-		if (injd->bLength > 4 &&
+		if (injd->bLength >= sizeof(*injd) &&
 		    injd->bDescriptorType == USB_DT_CS_INTERFACE &&
 		    injd->bDescriptorSubtype == UAC_MIDI_IN_JACK &&
 				injd->bJackID == jack_id)
@@ -1773,7 +1773,7 @@ static struct usb_midi_out_jack_descriptor *find_usb_out_jack_descriptor(
 		struct usb_midi_out_jack_descriptor *outjd =
 				(struct usb_midi_out_jack_descriptor *)extra;
 
-		if (outjd->bLength > 4 &&
+		if (outjd->bLength >= sizeof(*outjd) &&
 		    outjd->bDescriptorType == USB_DT_CS_INTERFACE &&
 		    outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK &&
 				outjd->bJackID == jack_id)
@@ -1820,7 +1820,8 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi,
 			outjd = find_usb_out_jack_descriptor(hostif, jack_id);
 			if (outjd) {
 				sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins);
-				iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t));
+				if (outjd->bLength >= sz)
+					iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t));
 			}
 		} else {
 			/* and out jacks connect to ins */
-- 
GitLab


From 4b81ccebaeee885ab1aa1438133f2991e3a2b6ea Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Tue, 27 Apr 2021 10:12:12 -0300
Subject: [PATCH 0333/3804] bpf, ringbuf: Deny reserve of buffers larger than
 ringbuf

A BPF program might try to reserve a buffer larger than the ringbuf size.
If the consumer pointer is way ahead of the producer, that would be
successfully reserved, allowing the BPF program to read or write out of
the ringbuf allocated area.

Reported-by: Ryota Shiga (Flatt Security)
Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/ringbuf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index f25b719ac7868..b86d80c9cd59b 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -315,6 +315,9 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
 		return NULL;
 
 	len = round_up(size + BPF_RINGBUF_HDR_SZ, 8);
+	if (len > rb->mask + 1)
+		return NULL;
+
 	cons_pos = smp_load_acquire(&rb->consumer_pos);
 
 	if (in_nmi()) {
-- 
GitLab


From 04ea3086c4d73da7009de1e84962a904139af219 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 4 May 2021 16:38:00 -0700
Subject: [PATCH 0334/3804] bpf: Prevent writable memory-mapping of read-only
 ringbuf pages

Only the very first page of BPF ringbuf that contains consumer position
counter is supposed to be mapped as writeable by user-space. Producer
position is read-only and can be modified only by the kernel code. BPF ringbuf
data pages are read-only as well and are not meant to be modified by
user-code to maintain integrity of per-record headers.

This patch allows to map only consumer position page as writeable and
everything else is restricted to be read-only. remap_vmalloc_range()
internally adds VM_DONTEXPAND, so all the established memory mappings can't be
extended, which prevents any future violations through mremap()'ing.

Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it")
Reported-by: Ryota Shiga (Flatt Security)
Reported-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/ringbuf.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index b86d80c9cd59b..84b3b35fc0d05 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -221,25 +221,20 @@ static int ringbuf_map_get_next_key(struct bpf_map *map, void *key,
 	return -ENOTSUPP;
 }
 
-static size_t bpf_ringbuf_mmap_page_cnt(const struct bpf_ringbuf *rb)
-{
-	size_t data_pages = (rb->mask + 1) >> PAGE_SHIFT;
-
-	/* consumer page + producer page + 2 x data pages */
-	return RINGBUF_POS_PAGES + 2 * data_pages;
-}
-
 static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
 {
 	struct bpf_ringbuf_map *rb_map;
-	size_t mmap_sz;
 
 	rb_map = container_of(map, struct bpf_ringbuf_map, map);
-	mmap_sz = bpf_ringbuf_mmap_page_cnt(rb_map->rb) << PAGE_SHIFT;
-
-	if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > mmap_sz)
-		return -EINVAL;
 
+	if (vma->vm_flags & VM_WRITE) {
+		/* allow writable mapping for the consumer_pos only */
+		if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE)
+			return -EPERM;
+	} else {
+		vma->vm_flags &= ~VM_MAYWRITE;
+	}
+	/* remap_vmalloc_range() checks size and offset constraints */
 	return remap_vmalloc_range(vma, rb_map->rb,
 				   vma->vm_pgoff + RINGBUF_PGOFF);
 }
-- 
GitLab


From ff67dbd554b2aaa22be933eced32610ff90209dd Mon Sep 17 00:00:00 2001
From: Qiu Wenbo <qiuwenbo@kylinos.com.cn>
Date: Wed, 28 Apr 2021 13:06:36 +0800
Subject: [PATCH 0335/3804] platform/x86: ideapad-laptop: fix a NULL pointer
 dereference

The third parameter of dytc_cql_command should not be NULL since it will
be dereferenced immediately.

Fixes: ff36b0d953dc4 ("platform/x86: ideapad-laptop: rework and create new ACPI helpers")
Signed-off-by: Qiu Wenbo <qiuwenbo@kylinos.com.cn>
Acked-by: Ike Panhc <ike.pan@canonical.com>
Link: https://lore.kernel.org/r/20210428050636.8003-1-qiuwenbo@kylinos.com.cn
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/ideapad-laptop.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 6cb5ad4be231d..8f871151f0ccb 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -809,6 +809,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof,
 {
 	struct ideapad_dytc_priv *dytc = container_of(pprof, struct ideapad_dytc_priv, pprof);
 	struct ideapad_private *priv = dytc->priv;
+	unsigned long output;
 	int err;
 
 	err = mutex_lock_interruptible(&dytc->mutex);
@@ -829,7 +830,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof,
 
 		/* Determine if we are in CQL mode. This alters the commands we do */
 		err = dytc_cql_command(priv, DYTC_SET_COMMAND(DYTC_FUNCTION_MMC, perfmode, 1),
-				       NULL);
+				       &output);
 		if (err)
 			goto unlock;
 	}
-- 
GitLab


From b09aaa3f2c0edeeed670cd29961a0e35bddc78cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Barnab=C3=A1s=20P=C5=91cze?= <pobrn@protonmail.com>
Date: Fri, 7 May 2021 23:53:44 +0000
Subject: [PATCH 0336/3804] platform/x86: ideapad-laptop: fix method name typo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

"smbc" should be "sbmc". `eval_smbc()` incorrectly called
the SMBC ACPI method instead of SBMC. This resulted in
partial loss of functionality. Rectify that by calling
the correct ACPI method (SBMC), and also rename
methods and constants.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=212985
Fixes: 0b765671cb80 ("platform/x86: ideapad-laptop: group and separate (un)related constants into enums")
Fixes: ff36b0d953dc ("platform/x86: ideapad-laptop: rework and create new ACPI helpers")
Cc: stable@vger.kernel.org # 5.12
Signed-off-by: Barnabás Pőcze <pobrn@protonmail.com>
Link: https://lore.kernel.org/r/20210507235333.286505-1-pobrn@protonmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/ideapad-laptop.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 8f871151f0ccb..3878172909219 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -57,8 +57,8 @@ enum {
 };
 
 enum {
-	SMBC_CONSERVATION_ON  = 3,
-	SMBC_CONSERVATION_OFF = 5,
+	SBMC_CONSERVATION_ON  = 3,
+	SBMC_CONSERVATION_OFF = 5,
 };
 
 enum {
@@ -182,9 +182,9 @@ static int eval_gbmd(acpi_handle handle, unsigned long *res)
 	return eval_int(handle, "GBMD", res);
 }
 
-static int exec_smbc(acpi_handle handle, unsigned long arg)
+static int exec_sbmc(acpi_handle handle, unsigned long arg)
 {
-	return exec_simple_method(handle, "SMBC", arg);
+	return exec_simple_method(handle, "SBMC", arg);
 }
 
 static int eval_hals(acpi_handle handle, unsigned long *res)
@@ -477,7 +477,7 @@ static ssize_t conservation_mode_store(struct device *dev,
 	if (err)
 		return err;
 
-	err = exec_smbc(priv->adev->handle, state ? SMBC_CONSERVATION_ON : SMBC_CONSERVATION_OFF);
+	err = exec_sbmc(priv->adev->handle, state ? SBMC_CONSERVATION_ON : SBMC_CONSERVATION_OFF);
 	if (err)
 		return err;
 
-- 
GitLab


From 79d341e26ebcdbc622348aaaab6f8f89b6fdb25f Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 30 Apr 2021 14:07:35 +0800
Subject: [PATCH 0337/3804] platform/x86: hp_accel: Avoid invoking _INI to
 speed up resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hp_accel can take almost two seconds to resume on some HP laptops.

The bottleneck is on evaluating _INI, which is only needed to run once.

Resolve the issue by only invoking _INI when it's necessary. Namely, on
probe and on hibernation restore.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Acked-by: Éric Piel <eric.piel@trempplin-utc.net>
Link: https://lore.kernel.org/r/20210430060736.590321-1-kai.heng.feng@canonical.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/misc/lis3lv02d/lis3lv02d.h |  1 +
 drivers/platform/x86/hp_accel.c    | 22 +++++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h
index c394c0b08519a..7ac788fae1b86 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.h
+++ b/drivers/misc/lis3lv02d/lis3lv02d.h
@@ -271,6 +271,7 @@ struct lis3lv02d {
 	int			regs_size;
 	u8                      *reg_cache;
 	bool			regs_stored;
+	bool			init_required;
 	u8                      odr_mask;  /* ODR bit mask */
 	u8			whoami;    /* indicates measurement precision */
 	s16 (*read_data) (struct lis3lv02d *lis3, int reg);
diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c
index 799cbe2ffcf36..8c0867bda8280 100644
--- a/drivers/platform/x86/hp_accel.c
+++ b/drivers/platform/x86/hp_accel.c
@@ -88,6 +88,9 @@ MODULE_DEVICE_TABLE(acpi, lis3lv02d_device_ids);
 static int lis3lv02d_acpi_init(struct lis3lv02d *lis3)
 {
 	struct acpi_device *dev = lis3->bus_priv;
+	if (!lis3->init_required)
+		return 0;
+
 	if (acpi_evaluate_object(dev->handle, METHOD_NAME__INI,
 				 NULL, NULL) != AE_OK)
 		return -EINVAL;
@@ -356,6 +359,7 @@ static int lis3lv02d_add(struct acpi_device *device)
 	}
 
 	/* call the core layer do its init */
+	lis3_dev.init_required = true;
 	ret = lis3lv02d_init_device(&lis3_dev);
 	if (ret)
 		return ret;
@@ -403,11 +407,27 @@ static int lis3lv02d_suspend(struct device *dev)
 
 static int lis3lv02d_resume(struct device *dev)
 {
+	lis3_dev.init_required = false;
+	lis3lv02d_poweron(&lis3_dev);
+	return 0;
+}
+
+static int lis3lv02d_restore(struct device *dev)
+{
+	lis3_dev.init_required = true;
 	lis3lv02d_poweron(&lis3_dev);
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(hp_accel_pm, lis3lv02d_suspend, lis3lv02d_resume);
+static const struct dev_pm_ops hp_accel_pm = {
+	.suspend = lis3lv02d_suspend,
+	.resume = lis3lv02d_resume,
+	.freeze = lis3lv02d_suspend,
+	.thaw = lis3lv02d_resume,
+	.poweroff = lis3lv02d_suspend,
+	.restore = lis3lv02d_restore,
+};
+
 #define HP_ACCEL_PM (&hp_accel_pm)
 #else
 #define HP_ACCEL_PM NULL
-- 
GitLab


From a5c936add6a23c15c6ae538ab7a12f80751fdf0f Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Wed, 21 Apr 2021 13:20:31 +0800
Subject: [PATCH 0338/3804] drm/i915/dp: Use slow and wide link training for
 everything
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Screen flickers on Innolux eDP 1.3 panel when clock rate 540000 is in use.

According to the panel vendor, though clock rate 540000 is advertised,
but the max clock rate it really supports is 270000.

Ville Syrjälä mentioned that fast and narrow also breaks some eDP 1.4
panel, so use slow and wide training for all panels to resolve the
issue.

User also confirmed that the new strategy doesn't introduce any
regression on XPS 9380.

v2:
 - Use slow and wide for everything.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3384
References: https://gitlab.freedesktop.org/drm/intel/-/issues/272
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210421052054.1434718-1-kai.heng.feng@canonical.com
(cherry picked from commit acca7762eb71bc05a8f28d29320d193150051f79)
Fixes: 2bbd6dba84d4 ("drm/i915: Try to use fast+narrow link on eDP again and fall back to the old max strategy on failure")
Cc: <stable@vger.kernel.org> # v5.12+
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp.c | 59 +++----------------------
 1 file changed, 5 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 6a2dee8cef1f1..1e026177ed1ba 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1095,44 +1095,6 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp,
 	return -EINVAL;
 }
 
-/* Optimize link config in order: max bpp, min lanes, min clock */
-static int
-intel_dp_compute_link_config_fast(struct intel_dp *intel_dp,
-				  struct intel_crtc_state *pipe_config,
-				  const struct link_config_limits *limits)
-{
-	const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
-	int bpp, clock, lane_count;
-	int mode_rate, link_clock, link_avail;
-
-	for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) {
-		int output_bpp = intel_dp_output_bpp(pipe_config->output_format, bpp);
-
-		mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock,
-						   output_bpp);
-
-		for (lane_count = limits->min_lane_count;
-		     lane_count <= limits->max_lane_count;
-		     lane_count <<= 1) {
-			for (clock = limits->min_clock; clock <= limits->max_clock; clock++) {
-				link_clock = intel_dp->common_rates[clock];
-				link_avail = intel_dp_max_data_rate(link_clock,
-								    lane_count);
-
-				if (mode_rate <= link_avail) {
-					pipe_config->lane_count = lane_count;
-					pipe_config->pipe_bpp = bpp;
-					pipe_config->port_clock = link_clock;
-
-					return 0;
-				}
-			}
-		}
-	}
-
-	return -EINVAL;
-}
-
 static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc)
 {
 	int i, num_bpc;
@@ -1382,22 +1344,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
 	    intel_dp_can_bigjoiner(intel_dp))
 		pipe_config->bigjoiner = true;
 
-	if (intel_dp_is_edp(intel_dp))
-		/*
-		 * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4
-		 * section A.1: "It is recommended that the minimum number of
-		 * lanes be used, using the minimum link rate allowed for that
-		 * lane configuration."
-		 *
-		 * Note that we fall back to the max clock and lane count for eDP
-		 * panels that fail with the fast optimal settings (see
-		 * intel_dp->use_max_params), in which case the fast vs. wide
-		 * choice doesn't matter.
-		 */
-		ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config, &limits);
-	else
-		/* Optimize for slow and wide. */
-		ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
+	/*
+	 * Optimize for slow and wide for everything, because there are some
+	 * eDP 1.3 and 1.4 panels don't work well with fast and narrow.
+	 */
+	ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
 
 	/* enable compression if the mode doesn't fit available BW */
 	drm_dbg_kms(&i915->drm, "Force DSC en = %d\n", intel_dp->force_dsc_en);
-- 
GitLab


From 9b8a233bc294dd71d3c7d30692a78ab32f246a0f Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Fri, 30 Apr 2021 21:30:55 +0530
Subject: [PATCH 0339/3804] btrfs: handle transaction start error in
 btrfs_fileattr_set

Add error handling in btrfs_fileattr_set in case of an error while
starting a transaction. This fixes btrfs/232 which otherwise used to
fail with below signature on Power.

  btrfs/232 [ 1119.474650] run fstests btrfs/232 at 2021-04-21 02:21:22
  <...>
  [ 1366.638585] BUG: Unable to handle kernel data access on read at 0xffffffffffffff86
  [ 1366.638768] Faulting instruction address: 0xc0000000009a5c88
  cpu 0x0: Vector: 380 (Data SLB Access) at [c000000014f177b0]
      pc: c0000000009a5c88: btrfs_update_root_times+0x58/0xc0
      lr: c0000000009a5c84: btrfs_update_root_times+0x54/0xc0
      <...>
      pid   = 24881, comm = fsstress
	   btrfs_update_inode+0xa0/0x140
	   btrfs_fileattr_set+0x5d0/0x6f0
	   vfs_fileattr_set+0x2a8/0x390
	   do_vfs_ioctl+0x1290/0x1ac0
	   sys_ioctl+0x6c/0x120
	   system_call_exception+0x3d4/0x410
	   system_call_common+0xec/0x278

Fixes: 97fc29775487 ("btrfs: convert to fileattr")
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ee1dbabb5d3c4..98ecb70466bf3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -259,6 +259,8 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
 	if (!fa->flags_valid) {
 		/* 1 item for the inode */
 		trans = btrfs_start_transaction(root, 1);
+		if (IS_ERR(trans))
+			return PTR_ERR(trans);
 		goto update_flags;
 	}
 
-- 
GitLab


From f2be77fee648ddd6d0d259d3527344ba0120e314 Mon Sep 17 00:00:00 2001
From: Elia Devito <eliadevito@gmail.com>
Date: Tue, 11 May 2021 14:46:49 +0200
Subject: [PATCH 0340/3804] ALSA: hda/realtek: Add fixup for HP Spectre x360
 15-df0xxx

Fixup to enable all 4 speaker on HP Spectre x360 15-df0xxx and probably
on similar models.

0x14 pin config override is required to enable all speakers and
alc285-speaker2-to-dac1 fixup to enable volume adjustment.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=189331
Signed-off-by: Elia Devito <eliadevito@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210511124651.4802-1-eliadevito@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b4b71609dff11..3e269de840799 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6542,6 +6542,7 @@ enum {
 	ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST,
 	ALC295_FIXUP_ASUS_DACS,
 	ALC295_FIXUP_HP_OMEN,
+	ALC285_FIXUP_HP_SPECTRE_X360,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8099,6 +8100,15 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_HP_LINE1_MIC1_LED,
 	},
+	[ALC285_FIXUP_HP_SPECTRE_X360] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x14, 0x90170110 }, /* enable top speaker */
+			{}
+		},
+		.chained = true,
+		.chain_id = ALC285_FIXUP_SPEAKER2_TO_DAC1,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8259,6 +8269,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
 	SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN),
 	SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+	SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360),
 	SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO),
 	SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
@@ -8665,6 +8676,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"},
 	{.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"},
 	{.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"},
+	{.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"},
 	{}
 };
 #define ALC225_STANDARD_PINS \
-- 
GitLab


From efed9a3337e341bd0989161b97453b52567bc59d Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 10 May 2021 17:05:35 -0700
Subject: [PATCH 0341/3804] kyber: fix out of bounds access when preempted

__blk_mq_sched_bio_merge() gets the ctx and hctx for the current CPU and
passes the hctx to ->bio_merge(). kyber_bio_merge() then gets the ctx
for the current CPU again and uses that to get the corresponding Kyber
context in the passed hctx. However, the thread may be preempted between
the two calls to blk_mq_get_ctx(), and the ctx returned the second time
may no longer correspond to the passed hctx. This "works" accidentally
most of the time, but it can cause us to read garbage if the second ctx
came from an hctx with more ctx's than the first one (i.e., if
ctx->index_hw[hctx->type] > hctx->nr_ctx).

This manifested as this UBSAN array index out of bounds error reported
by Jakub:

UBSAN: array-index-out-of-bounds in ../kernel/locking/qspinlock.c:130:9
index 13106 is out of range for type 'long unsigned int [128]'
Call Trace:
 dump_stack+0xa4/0xe5
 ubsan_epilogue+0x5/0x40
 __ubsan_handle_out_of_bounds.cold.13+0x2a/0x34
 queued_spin_lock_slowpath+0x476/0x480
 do_raw_spin_lock+0x1c2/0x1d0
 kyber_bio_merge+0x112/0x180
 blk_mq_submit_bio+0x1f5/0x1100
 submit_bio_noacct+0x7b0/0x870
 submit_bio+0xc2/0x3a0
 btrfs_map_bio+0x4f0/0x9d0
 btrfs_submit_data_bio+0x24e/0x310
 submit_one_bio+0x7f/0xb0
 submit_extent_page+0xc4/0x440
 __extent_writepage_io+0x2b8/0x5e0
 __extent_writepage+0x28d/0x6e0
 extent_write_cache_pages+0x4d7/0x7a0
 extent_writepages+0xa2/0x110
 do_writepages+0x8f/0x180
 __writeback_single_inode+0x99/0x7f0
 writeback_sb_inodes+0x34e/0x790
 __writeback_inodes_wb+0x9e/0x120
 wb_writeback+0x4d2/0x660
 wb_workfn+0x64d/0xa10
 process_one_work+0x53a/0xa80
 worker_thread+0x69/0x5b0
 kthread+0x20b/0x240
 ret_from_fork+0x1f/0x30

Only Kyber uses the hctx, so fix it by passing the request_queue to
->bio_merge() instead. BFQ and mq-deadline just use that, and Kyber can
map the queues itself to avoid the mismatch.

Fixes: a6088845c2bf ("block: kyber: make kyber more friendly with merging")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Link: https://lore.kernel.org/r/c7598605401a48d5cfeadebb678abd10af22b83f.1620691329.git.osandov@fb.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c      | 3 +--
 block/blk-mq-sched.c     | 8 +++++---
 block/kyber-iosched.c    | 5 +++--
 block/mq-deadline.c      | 3 +--
 include/linux/elevator.h | 2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0270cd7ca1658..59b2499d3f8be 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2263,10 +2263,9 @@ static void bfq_remove_request(struct request_queue *q,
 
 }
 
-static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
+static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
-	struct request_queue *q = hctx->queue;
 	struct bfq_data *bfqd = q->elevator->elevator_data;
 	struct request *free = NULL;
 	/*
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 42a365b1b9c0e..996a4b2f73aa9 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -358,14 +358,16 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
 	struct elevator_queue *e = q->elevator;
-	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
-	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
+	struct blk_mq_ctx *ctx;
+	struct blk_mq_hw_ctx *hctx;
 	bool ret = false;
 	enum hctx_type type;
 
 	if (e && e->type->ops.bio_merge)
-		return e->type->ops.bio_merge(hctx, bio, nr_segs);
+		return e->type->ops.bio_merge(q, bio, nr_segs);
 
+	ctx = blk_mq_get_ctx(q);
+	hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
 	type = hctx->type;
 	if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
 	    list_empty_careful(&ctx->rq_lists[type]))
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index 8969e122f0811..81e3279ecd574 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -561,11 +561,12 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
 	}
 }
 
-static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
+static bool kyber_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
+	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
 	struct kyber_hctx_data *khd = hctx->sched_data;
-	struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
 	struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
 	unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
 	struct list_head *rq_list = &kcq->rq_list[sched_domain];
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 04aded71ead27..8eea2cbf2bf4a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -461,10 +461,9 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
 	return ELEVATOR_NO_MERGE;
 }
 
-static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
+static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
-	struct request_queue *q = hctx->queue;
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct request *free = NULL;
 	bool ret;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1fe8e105b83bf..dcb2f9022c1df 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -34,7 +34,7 @@ struct elevator_mq_ops {
 	void (*depth_updated)(struct blk_mq_hw_ctx *);
 
 	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
-	bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int);
+	bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
 	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
 	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
 	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-- 
GitLab


From 0919a3acc0c87049a7d787c4b8b9e64bd7c59eb3 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 11 May 2021 10:17:07 +0900
Subject: [PATCH 0342/3804] ASoC: simple-card: add simple_parse_node()

Original commit 59c35c44a9cf89a83a9 ("ASoC: simple-card: add
simple_parse_node()") was reverted, and this is remake version.

Parse dai/tdm/clk are common for both CPU/Codec node.
This patch creates simple_parse_node() for it and share the code.

Reported-by: "kernelci.org bot" <bot@kernelci.org>
Fixes: 25c4a9b614f101bb9f3 ("ASoC: simple-card: Fix breakage on kontron-sl28-var3-ads2")
Fixes: 59c35c44a9cf89a83a9 ("ASoC: simple-card: add simple_parse_node()")
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Michael Walle <michael@walle.cc>
Link: https://lore.kernel.org/r/87h7jaax2k.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/generic/simple-card.c | 107 ++++++++++++++++----------------
 1 file changed, 53 insertions(+), 54 deletions(-)

diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index a1373be4558f3..57ab89be1b4b7 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -93,12 +93,11 @@ static void simple_parse_convert(struct device *dev,
 }
 
 static void simple_parse_mclk_fs(struct device_node *top,
-				 struct device_node *cpu,
-				 struct device_node *codec,
+				 struct device_node *np,
 				 struct simple_dai_props *props,
 				 char *prefix)
 {
-	struct device_node *node = of_get_parent(cpu);
+	struct device_node *node = of_get_parent(np);
 	char prop[128];
 
 	snprintf(prop, sizeof(prop), "%smclk-fs", PREFIX);
@@ -106,12 +105,50 @@ static void simple_parse_mclk_fs(struct device_node *top,
 
 	snprintf(prop, sizeof(prop), "%smclk-fs", prefix);
 	of_property_read_u32(node,	prop, &props->mclk_fs);
-	of_property_read_u32(cpu,	prop, &props->mclk_fs);
-	of_property_read_u32(codec,	prop, &props->mclk_fs);
+	of_property_read_u32(np,	prop, &props->mclk_fs);
 
 	of_node_put(node);
 }
 
+static int simple_parse_node(struct asoc_simple_priv *priv,
+			     struct device_node *np,
+			     struct link_info *li,
+			     char *prefix,
+			     int *cpu)
+{
+	struct device *dev = simple_priv_to_dev(priv);
+	struct device_node *top = dev->of_node;
+	struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link);
+	struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link);
+	struct snd_soc_dai_link_component *dlc;
+	struct asoc_simple_dai *dai;
+	int ret;
+
+	if (cpu) {
+		dlc = asoc_link_to_cpu(dai_link, 0);
+		dai = simple_props_to_dai_cpu(dai_props, 0);
+	} else {
+		dlc = asoc_link_to_codec(dai_link, 0);
+		dai = simple_props_to_dai_codec(dai_props, 0);
+	}
+
+	simple_parse_mclk_fs(top, np, dai_props, prefix);
+
+	ret = asoc_simple_parse_dai(np, dlc, cpu);
+	if (ret)
+		return ret;
+
+	ret = asoc_simple_parse_clk(dev, np, dai, dlc);
+	if (ret)
+		return ret;
+
+	ret = asoc_simple_parse_tdm(np, dai);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 				   struct device_node *np,
 				   struct device_node *codec,
@@ -121,10 +158,6 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 	struct device *dev = simple_priv_to_dev(priv);
 	struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link);
 	struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link);
-	struct asoc_simple_dai *dai;
-	struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
-	struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
-	struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0);
 	struct device_node *top = dev->of_node;
 	struct device_node *node = of_get_parent(np);
 	char *prefix = "";
@@ -132,13 +165,13 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 
 	dev_dbg(dev, "link_of DPCM (%pOF)\n", np);
 
-	li->link++;
-
 	/* For single DAI link & old style of DT node */
 	if (is_top)
 		prefix = PREFIX;
 
 	if (li->cpu) {
+		struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
+		struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0);
 		int is_single_links = 0;
 
 		/* Codec is dummy */
@@ -147,13 +180,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		dai_link->dynamic		= 1;
 		dai_link->dpcm_merged_format	= 1;
 
-		dai = simple_props_to_dai_cpu(dai_props, 0);
-
-		ret = asoc_simple_parse_dai(np, cpus, &is_single_links);
-		if (ret)
-			goto out_put_node;
-
-		ret = asoc_simple_parse_clk(dev, np, dai, cpus);
+		ret = simple_parse_node(priv, np, li, prefix, &is_single_links);
 		if (ret < 0)
 			goto out_put_node;
 
@@ -166,6 +193,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		asoc_simple_canonicalize_cpu(cpus, is_single_links);
 		asoc_simple_canonicalize_platform(platforms, cpus);
 	} else {
+		struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
 		struct snd_soc_codec_conf *cconf;
 
 		/* CPU is dummy */
@@ -174,14 +202,9 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		dai_link->no_pcm		= 1;
 		dai_link->be_hw_params_fixup	= asoc_simple_be_hw_params_fixup;
 
-		dai	= simple_props_to_dai_codec(dai_props, 0);
 		cconf	= simple_props_to_codec_conf(dai_props, 0);
 
-		ret = asoc_simple_parse_dai(np, codecs, NULL);
-		if (ret < 0)
-			goto out_put_node;
-
-		ret = asoc_simple_parse_clk(dev, np, dai, codecs);
+		ret = simple_parse_node(priv, np, li, prefix, NULL);
 		if (ret < 0)
 			goto out_put_node;
 
@@ -201,11 +224,6 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 	}
 
 	simple_parse_convert(dev, np, &dai_props->adata);
-	simple_parse_mclk_fs(top, np, codec, dai_props, prefix);
-
-	ret = asoc_simple_parse_tdm(np, dai);
-	if (ret)
-		goto out_put_node;
 
 	ret = asoc_simple_parse_daifmt(dev, node, codec,
 				       prefix, &dai_link->dai_fmt);
@@ -218,6 +236,8 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 	dai_link->init			= asoc_simple_dai_init;
 
 out_put_node:
+	li->link++;
+
 	of_node_put(node);
 	return ret;
 }
@@ -230,13 +250,9 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
 {
 	struct device *dev = simple_priv_to_dev(priv);
 	struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link);
-	struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link);
-	struct asoc_simple_dai *cpu_dai	= simple_props_to_dai_cpu(dai_props, 0);
-	struct asoc_simple_dai *codec_dai = simple_props_to_dai_codec(dai_props, 0);
 	struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
 	struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
 	struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0);
-	struct device_node *top = dev->of_node;
 	struct device_node *cpu = NULL;
 	struct device_node *node = NULL;
 	struct device_node *plat = NULL;
@@ -246,7 +262,6 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
 
 	cpu  = np;
 	node = of_get_parent(np);
-	li->link++;
 
 	dev_dbg(dev, "link_of (%pOF)\n", node);
 
@@ -262,13 +277,11 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
 	if (ret < 0)
 		goto dai_link_of_err;
 
-	simple_parse_mclk_fs(top, cpu, codec, dai_props, prefix);
-
-	ret = asoc_simple_parse_dai(cpu, cpus, &single_cpu);
+	ret = simple_parse_node(priv, cpu, li, prefix, &single_cpu);
 	if (ret < 0)
 		goto dai_link_of_err;
 
-	ret = asoc_simple_parse_dai(codec, codecs, NULL);
+	ret = simple_parse_node(priv, codec, li, prefix, NULL);
 	if (ret < 0)
 		goto dai_link_of_err;
 
@@ -276,22 +289,6 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
 	if (ret < 0)
 		goto dai_link_of_err;
 
-	ret = asoc_simple_parse_tdm(cpu, cpu_dai);
-	if (ret < 0)
-		goto dai_link_of_err;
-
-	ret = asoc_simple_parse_tdm(codec, codec_dai);
-	if (ret < 0)
-		goto dai_link_of_err;
-
-	ret = asoc_simple_parse_clk(dev, cpu, cpu_dai, cpus);
-	if (ret < 0)
-		goto dai_link_of_err;
-
-	ret = asoc_simple_parse_clk(dev, codec, codec_dai, codecs);
-	if (ret < 0)
-		goto dai_link_of_err;
-
 	ret = asoc_simple_set_dailink_name(dev, dai_link,
 					   "%s-%s",
 					   cpus->dai_name,
@@ -309,6 +306,8 @@ dai_link_of_err:
 	of_node_put(plat);
 	of_node_put(node);
 
+	li->link++;
+
 	return ret;
 }
 
-- 
GitLab


From 6ad76b573bb63ef229cf60386cc38c6e7c7625d7 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 11 May 2021 10:17:47 +0900
Subject: [PATCH 0343/3804] ASoC: simple-card: add simple_link_init()

Original commit 434392271afcff350fe ("ASoC: simple-card: add
simple_link_init()") are rejected, and this is remake version of it.

This patch adds simple_link_init() and share dai_link setting code.

Reported-by: "kernelci.org bot" <bot@kernelci.org>
Fixes: 25c4a9b614f101bb9f3 ("ASoC: simple-card: Fix breakage on kontron-sl28-var3-ads2")
Fixes: 434392271afcff350fe ("ASoC: simple-card: add simple_link_init()")
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Michael Walle <michael@walle.cc>
Link: https://lore.kernel.org/r/87fsyuax1g.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/generic/simple-card.c | 61 ++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 57ab89be1b4b7..0015f534d42d9 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -149,6 +149,27 @@ static int simple_parse_node(struct asoc_simple_priv *priv,
 	return 0;
 }
 
+static int simple_link_init(struct asoc_simple_priv *priv,
+			    struct device_node *node,
+			    struct device_node *codec,
+			    struct link_info *li,
+			    char *prefix, char *name)
+{
+	struct device *dev = simple_priv_to_dev(priv);
+	struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link);
+	int ret;
+
+	ret = asoc_simple_parse_daifmt(dev, node, codec,
+				       prefix, &dai_link->dai_fmt);
+	if (ret < 0)
+		return 0;
+
+	dai_link->init			= asoc_simple_dai_init;
+	dai_link->ops			= &simple_ops;
+
+	return asoc_simple_set_dailink_name(dev, dai_link, name);
+}
+
 static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 				   struct device_node *np,
 				   struct device_node *codec,
@@ -161,6 +182,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 	struct device_node *top = dev->of_node;
 	struct device_node *node = of_get_parent(np);
 	char *prefix = "";
+	char dai_name[64];
 	int ret;
 
 	dev_dbg(dev, "link_of DPCM (%pOF)\n", np);
@@ -184,11 +206,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		if (ret < 0)
 			goto out_put_node;
 
-		ret = asoc_simple_set_dailink_name(dev, dai_link,
-						   "fe.%s",
-						   cpus->dai_name);
-		if (ret < 0)
-			goto out_put_node;
+		snprintf(dai_name, sizeof(dai_name), "fe.%s", cpus->dai_name);
 
 		asoc_simple_canonicalize_cpu(cpus, is_single_links);
 		asoc_simple_canonicalize_platform(platforms, cpus);
@@ -208,11 +226,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		if (ret < 0)
 			goto out_put_node;
 
-		ret = asoc_simple_set_dailink_name(dev, dai_link,
-						   "be.%s",
-						   codecs->dai_name);
-		if (ret < 0)
-			goto out_put_node;
+		snprintf(dai_name, sizeof(dai_name), "be.%s", codecs->dai_name);
 
 		/* check "prefix" from top node */
 		snd_soc_of_parse_node_prefix(top, cconf, codecs->of_node,
@@ -225,15 +239,9 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 
 	simple_parse_convert(dev, np, &dai_props->adata);
 
-	ret = asoc_simple_parse_daifmt(dev, node, codec,
-				       prefix, &dai_link->dai_fmt);
-	if (ret < 0)
-		goto out_put_node;
-
 	snd_soc_dai_link_set_capabilities(dai_link);
 
-	dai_link->ops			= &simple_ops;
-	dai_link->init			= asoc_simple_dai_init;
+	ret = simple_link_init(priv, node, codec, li, prefix, dai_name);
 
 out_put_node:
 	li->link++;
@@ -256,6 +264,7 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
 	struct device_node *cpu = NULL;
 	struct device_node *node = NULL;
 	struct device_node *plat = NULL;
+	char dai_name[64];
 	char prop[128];
 	char *prefix = "";
 	int ret, single_cpu = 0;
@@ -272,11 +281,6 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
 	snprintf(prop, sizeof(prop), "%splat", prefix);
 	plat = of_get_child_by_name(node, prop);
 
-	ret = asoc_simple_parse_daifmt(dev, node, codec,
-				       prefix, &dai_link->dai_fmt);
-	if (ret < 0)
-		goto dai_link_of_err;
-
 	ret = simple_parse_node(priv, cpu, li, prefix, &single_cpu);
 	if (ret < 0)
 		goto dai_link_of_err;
@@ -289,19 +293,14 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv,
 	if (ret < 0)
 		goto dai_link_of_err;
 
-	ret = asoc_simple_set_dailink_name(dev, dai_link,
-					   "%s-%s",
-					   cpus->dai_name,
-					   codecs->dai_name);
-	if (ret < 0)
-		goto dai_link_of_err;
-
-	dai_link->ops = &simple_ops;
-	dai_link->init = asoc_simple_dai_init;
+	snprintf(dai_name, sizeof(dai_name),
+		 "%s-%s", cpus->dai_name, codecs->dai_name);
 
 	asoc_simple_canonicalize_cpu(cpus, single_cpu);
 	asoc_simple_canonicalize_platform(platforms, cpus);
 
+	ret = simple_link_init(priv, node, codec, li, prefix, dai_name);
+
 dai_link_of_err:
 	of_node_put(plat);
 	of_node_put(node);
-- 
GitLab


From 28c268d3acdd4cbcd2ac320b85609e77f84e74a7 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 8 May 2021 17:01:45 +0200
Subject: [PATCH 0344/3804] ASoC: Intel: bytcr_rt5640: Add quirk for the Glavey
 TM800A550L tablet

Add a quirk for the Glavey TM800A550L tablet, this BYTCR tablet has no CHAN
package in its ACPI tables and uses SSP0-AIF1 rather then SSP0-AIF2 which
is the default for BYTCR devices.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20210508150146.28403-1-hdegoede@redhat.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/bytcr_rt5640.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index df2f5d55e8ffe..b42fa292d408a 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -574,6 +574,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
 					BYT_RT5640_SSP0_AIF1 |
 					BYT_RT5640_MCLK_EN),
 	},
+	{	/* Glavey TM800A550L */
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
+			DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"),
+			/* Above strings are too generic, also match on BIOS version */
+			DMI_MATCH(DMI_BIOS_VERSION, "ZY-8-BI-PX4S70VTR400-X423B-005-D"),
+		},
+		.driver_data = (void *)(BYTCR_INPUT_DEFAULTS |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
 	{
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
-- 
GitLab


From f0353e1f53f92f7b3da91e6669f5d58ee222ebe8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 8 May 2021 17:01:46 +0200
Subject: [PATCH 0345/3804] ASoC: Intel: bytcr_rt5640: Add quirk for the Lenovo
 Miix 3-830 tablet

The Lenovo Miix 3-830 tablet has only 1 speaker, has an internal analog
mic on IN1 and uses JD2 for jack-detect, add a quirk to automatically
apply these settings on Lenovo Miix 3-830 tablets.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210508150146.28403-2-hdegoede@redhat.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/bytcr_rt5640.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index b42fa292d408a..22dbd9d93c1ef 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -663,6 +663,20 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
 					BYT_RT5640_MONO_SPEAKER |
 					BYT_RT5640_MCLK_EN),
 	},
+	{	/* Lenovo Miix 3-830 */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 3-830"),
+		},
+		.driver_data = (void *)(BYT_RT5640_IN1_MAP |
+					BYT_RT5640_JD_SRC_JD2_IN4N |
+					BYT_RT5640_OVCD_TH_2000UA |
+					BYT_RT5640_OVCD_SF_0P75 |
+					BYT_RT5640_MONO_SPEAKER |
+					BYT_RT5640_DIFF_MIC |
+					BYT_RT5640_SSP0_AIF1 |
+					BYT_RT5640_MCLK_EN),
+	},
 	{	/* Linx Linx7 tablet */
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LINX"),
-- 
GitLab


From 40b82c2d9a78593201a3a62dc9239d6405334561 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:11 +0300
Subject: [PATCH 0346/3804] spi: Use SPI_MODE_X_MASK

Use SPI_MODE_X_MASK instead of open coded variant.

While at it, fix format specifier and drop explicit casting.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131217.49357-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 407420977a739..956dce3aafcad 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3441,8 +3441,8 @@ int spi_setup(struct spi_device *spi)
 		spi_set_thread_rt(spi->controller);
 	}
 
-	dev_dbg(&spi->dev, "setup mode %d, %s%s%s%s%u bits/w, %u Hz max --> %d\n",
-			(int) (spi->mode & (SPI_CPOL | SPI_CPHA)),
+	dev_dbg(&spi->dev, "setup mode %lu, %s%s%s%s%u bits/w, %u Hz max --> %d\n",
+			spi->mode & SPI_MODE_X_MASK,
 			(spi->mode & SPI_CS_HIGH) ? "cs_high, " : "",
 			(spi->mode & SPI_LSB_FIRST) ? "lsb, " : "",
 			(spi->mode & SPI_3WIRE) ? "3wire, " : "",
-- 
GitLab


From dd507b5ec7ba44ab51e1a8404d04e815a91b472f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:12 +0300
Subject: [PATCH 0347/3804] spi: spidev: Use SPI_MODE_X_MASK

Use SPI_MODE_X_MASK instead of open coded variant.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131217.49357-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spidev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index f56e0e975a469..24e9469ea35bb 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -59,7 +59,7 @@ static DECLARE_BITMAP(minors, N_SPI_MINORS);
  *
  * REVISIT should changing those flags be privileged?
  */
-#define SPI_MODE_MASK		(SPI_CPHA | SPI_CPOL | SPI_CS_HIGH \
+#define SPI_MODE_MASK		(SPI_MODE_X_MASK | SPI_CS_HIGH \
 				| SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP \
 				| SPI_NO_CS | SPI_READY | SPI_TX_DUAL \
 				| SPI_TX_QUAD | SPI_TX_OCTAL | SPI_RX_DUAL \
-- 
GitLab


From 56f47edf33fb55ab9381f61d60cf34c7578f3d75 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:13 +0300
Subject: [PATCH 0348/3804] spi: npcm-pspi: Use SPI_MODE_X_MASK

Use SPI_MODE_X_MASK instead of open coded variant.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131217.49357-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-npcm-pspi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-npcm-pspi.c b/drivers/spi/spi-npcm-pspi.c
index 56d10c4511db4..1668a347e003d 100644
--- a/drivers/spi/spi-npcm-pspi.c
+++ b/drivers/spi/spi-npcm-pspi.c
@@ -105,7 +105,7 @@ static void npcm_pspi_set_mode(struct spi_device *spi)
 	u16 regtemp;
 	u16 mode_val;
 
-	switch (spi->mode & (SPI_CPOL | SPI_CPHA)) {
+	switch (spi->mode & SPI_MODE_X_MASK) {
 	case SPI_MODE_0:
 		mode_val = 0;
 		break;
-- 
GitLab


From a2f2db6b2a8708f6ac592a362e34fb330f874cea Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:14 +0300
Subject: [PATCH 0349/3804] spi: oc-tiny: Use SPI_MODE_X_MASK

Use SPI_MODE_X_MASK instead of open coded variant.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131217.49357-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-oc-tiny.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-oc-tiny.c b/drivers/spi/spi-oc-tiny.c
index f3843f0ff2607..38c14c4e4e212 100644
--- a/drivers/spi/spi-oc-tiny.c
+++ b/drivers/spi/spi-oc-tiny.c
@@ -86,7 +86,7 @@ static int tiny_spi_setup(struct spi_device *spi)
 		hw->speed_hz = spi->max_speed_hz;
 		hw->baud = tiny_spi_baud(spi, hw->speed_hz);
 	}
-	hw->mode = spi->mode & (SPI_CPOL | SPI_CPHA);
+	hw->mode = spi->mode & SPI_MODE_X_MASK;
 	return 0;
 }
 
-- 
GitLab


From fdb217a38808e041f6eca8c550f1b5981e401a45 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:15 +0300
Subject: [PATCH 0350/3804] spi: omap-uwire: Use SPI_MODE_X_MASK

Use SPI_MODE_X_MASK instead of open coded variant.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131217.49357-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-omap-uwire.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-omap-uwire.c b/drivers/spi/spi-omap-uwire.c
index 71402f71ddd85..ceb479f5c88fe 100644
--- a/drivers/spi/spi-omap-uwire.c
+++ b/drivers/spi/spi-omap-uwire.c
@@ -330,7 +330,7 @@ static int uwire_setup_transfer(struct spi_device *spi, struct spi_transfer *t)
 	if (spi->mode & SPI_CPOL)
 		flags |= UWIRE_CLK_INVERTED;
 
-	switch (spi->mode & (SPI_CPOL | SPI_CPHA)) {
+	switch (spi->mode & SPI_MODE_X_MASK) {
 	case SPI_MODE_0:
 	case SPI_MODE_3:
 		flags |= UWIRE_WRITE_FALLING_EDGE | UWIRE_READ_RISING_EDGE;
-- 
GitLab


From 4ccf05579b9d0f15443a0edc860e2be7472ccfc1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:16 +0300
Subject: [PATCH 0351/3804] spi: ppc4xx: Use SPI_MODE_X_MASK

Use SPI_MODE_X_MASK instead of open coded variant.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131217.49357-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ppc4xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 76874a7cca9b5..59d201acbb394 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -235,7 +235,7 @@ static int spi_ppc4xx_setup(struct spi_device *spi)
 	 */
 	cs->mode = SPI_PPC4XX_MODE_SPE;
 
-	switch (spi->mode & (SPI_CPHA | SPI_CPOL)) {
+	switch (spi->mode & SPI_MODE_X_MASK) {
 	case SPI_MODE_0:
 		cs->mode |= SPI_CLK_MODE0;
 		break;
-- 
GitLab


From 038b9de42269f33aca3e3741214c863a4e9328d0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 16:12:17 +0300
Subject: [PATCH 0352/3804] spi: uniphier: Use SPI_MODE_X_MASK

Use SPI_MODE_X_MASK instead of open coded variant.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510131217.49357-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-uniphier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c
index 6a9ef8ee3cc90..8900e51e1a1cc 100644
--- a/drivers/spi/spi-uniphier.c
+++ b/drivers/spi/spi-uniphier.c
@@ -142,7 +142,7 @@ static void uniphier_spi_set_mode(struct spi_device *spi)
 	 * FSTRT    start frame timing
 	 *          0: rising edge of clock, 1: falling edge of clock
 	 */
-	switch (spi->mode & (SPI_CPOL | SPI_CPHA)) {
+	switch (spi->mode & SPI_MODE_X_MASK) {
 	case SPI_MODE_0:
 		/* CKPHS=1, CKINIT=0, CKDLY=1, FSTRT=0 */
 		val1 = SSI_CKS_CKPHS | SSI_CKS_CKDLY;
-- 
GitLab


From f8090ffc91ffd788a73d4e6b5ca3107c94d9ec27 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 11 May 2021 10:17:57 +0900
Subject: [PATCH 0353/3804] ASoC: audio-graph: tidyup graph_dai_link_of_dpcm()

Use local variable at local area only.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Michael Walle <michael@walle.cc>
Link: https://lore.kernel.org/r/87eeeeax16.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/generic/audio-graph-card.c | 30 +++++++++++++---------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c
index 2c8a2fcb7922a..0159a4576e9c1 100644
--- a/sound/soc/generic/audio-graph-card.c
+++ b/sound/soc/generic/audio-graph-card.c
@@ -276,24 +276,19 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 				  struct link_info *li)
 {
 	struct device *dev = simple_priv_to_dev(priv);
-	struct snd_soc_card *card = simple_priv_to_card(priv);
 	struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link);
 	struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link);
 	struct device_node *top = dev->of_node;
 	struct device_node *ep = li->cpu ? cpu_ep : codec_ep;
-	struct device_node *port;
-	struct device_node *ports;
-	struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
-	struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
 	char dai_name[64];
 	int ret;
 
-	port	= of_get_parent(ep);
-	ports	= of_get_parent(port);
-
 	dev_dbg(dev, "link_of DPCM (%pOF)\n", ep);
 
 	if (li->cpu) {
+		struct snd_soc_card *card = simple_priv_to_card(priv);
+		struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
+
 		/* Codec is dummy */
 
 		/* FE settings */
@@ -302,7 +297,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 
 		ret = graph_parse_node(priv, cpu_ep, li, 1);
 		if (ret)
-			goto out_put_node;
+			return ret;
 
 		snprintf(dai_name, sizeof(dai_name),
 			 "fe.%pOFP.%s", cpus->of_node, cpus->dai_name);
@@ -319,7 +314,10 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		if (card->component_chaining && !soc_component_is_pcm(cpus))
 			dai_link->no_pcm = 1;
 	} else {
-		struct snd_soc_codec_conf *cconf;
+		struct snd_soc_codec_conf *cconf = simple_props_to_codec_conf(dai_props, 0);
+		struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
+		struct device_node *port;
+		struct device_node *ports;
 
 		/* CPU is dummy */
 
@@ -327,22 +325,25 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		dai_link->no_pcm		= 1;
 		dai_link->be_hw_params_fixup	= asoc_simple_be_hw_params_fixup;
 
-		cconf	= simple_props_to_codec_conf(dai_props, 0);
-
 		ret = graph_parse_node(priv, codec_ep, li, 0);
 		if (ret < 0)
-			goto out_put_node;
+			return ret;
 
 		snprintf(dai_name, sizeof(dai_name),
 			 "be.%pOFP.%s", codecs->of_node, codecs->dai_name);
 
 		/* check "prefix" from top node */
+		port = of_get_parent(ep);
+		ports = of_get_parent(port);
 		snd_soc_of_parse_node_prefix(top, cconf, codecs->of_node,
 					      "prefix");
 		if (of_node_name_eq(ports, "ports"))
 			snd_soc_of_parse_node_prefix(ports, cconf, codecs->of_node, "prefix");
 		snd_soc_of_parse_node_prefix(port, cconf, codecs->of_node,
 					     "prefix");
+
+		of_node_put(ports);
+		of_node_put(port);
 	}
 
 	graph_parse_convert(dev, ep, &dai_props->adata);
@@ -351,11 +352,8 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 
 	ret = graph_link_init(priv, cpu_ep, codec_ep, li, dai_name);
 
-out_put_node:
 	li->link++;
 
-	of_node_put(ports);
-	of_node_put(port);
 	return ret;
 }
 
-- 
GitLab


From 582f3503f96543f3afbaaaa085755fd167a0f71e Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 11 May 2021 10:18:48 +0900
Subject: [PATCH 0354/3804] ASoC: audio-graph: tidyup graph_parse_node()

audio-graph is using cpus->dai_name / codecs->dai_name for
dailink->name.
In graph_parse_node(), xxx->dai_name is got by
snd_soc_get_dai_name(), but it might be removed soon by
asoc_simple_canonicalize_cpu().

The order should be
	*1) call snd_soc_get_dai_name()
	 2) create dailink name
	*3) call asoc_simple_canonicalize_cpu()

* are implemented in graph_parse_node().
This patch remove 3) from graph_parse_node()

Reported-by: "kernelci.org bot" <bot@kernelci.org>
Fixes: 8859f809c7d5813 ("ASoC: audio-graph: add graph_parse_node()")
Fixes: e51237b8d305225 ("ASoC: audio-graph: add graph_link_init()")
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Michael Walle <michael@walle.cc>
Link: https://lore.kernel.org/r/87cztyawzr.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/generic/audio-graph-card.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c
index 0159a4576e9c1..5e71382467e88 100644
--- a/sound/soc/generic/audio-graph-card.c
+++ b/sound/soc/generic/audio-graph-card.c
@@ -209,7 +209,7 @@ static void graph_parse_mclk_fs(struct device_node *top,
 static int graph_parse_node(struct asoc_simple_priv *priv,
 			    struct device_node *ep,
 			    struct link_info *li,
-			    int is_cpu)
+			    int *cpu)
 {
 	struct device *dev = simple_priv_to_dev(priv);
 	struct device_node *top = dev->of_node;
@@ -217,9 +217,9 @@ static int graph_parse_node(struct asoc_simple_priv *priv,
 	struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link);
 	struct snd_soc_dai_link_component *dlc;
 	struct asoc_simple_dai *dai;
-	int ret, single = 0;
+	int ret;
 
-	if (is_cpu) {
+	if (cpu) {
 		dlc = asoc_link_to_cpu(dai_link, 0);
 		dai = simple_props_to_dai_cpu(dai_props, 0);
 	} else {
@@ -229,7 +229,7 @@ static int graph_parse_node(struct asoc_simple_priv *priv,
 
 	graph_parse_mclk_fs(top, ep, dai_props);
 
-	ret = asoc_simple_parse_dai(ep, dlc, &single);
+	ret = asoc_simple_parse_dai(ep, dlc, cpu);
 	if (ret < 0)
 		return ret;
 
@@ -241,9 +241,6 @@ static int graph_parse_node(struct asoc_simple_priv *priv,
 	if (ret < 0)
 		return ret;
 
-	if (is_cpu)
-		asoc_simple_canonicalize_cpu(dlc, single);
-
 	return 0;
 }
 
@@ -288,6 +285,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 	if (li->cpu) {
 		struct snd_soc_card *card = simple_priv_to_card(priv);
 		struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
+		int is_single_links = 0;
 
 		/* Codec is dummy */
 
@@ -295,7 +293,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		dai_link->dynamic		= 1;
 		dai_link->dpcm_merged_format	= 1;
 
-		ret = graph_parse_node(priv, cpu_ep, li, 1);
+		ret = graph_parse_node(priv, cpu_ep, li, &is_single_links);
 		if (ret)
 			return ret;
 
@@ -313,6 +311,8 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		 */
 		if (card->component_chaining && !soc_component_is_pcm(cpus))
 			dai_link->no_pcm = 1;
+
+		asoc_simple_canonicalize_cpu(cpus, is_single_links);
 	} else {
 		struct snd_soc_codec_conf *cconf = simple_props_to_codec_conf(dai_props, 0);
 		struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
@@ -325,7 +325,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
 		dai_link->no_pcm		= 1;
 		dai_link->be_hw_params_fixup	= asoc_simple_be_hw_params_fixup;
 
-		ret = graph_parse_node(priv, codec_ep, li, 0);
+		ret = graph_parse_node(priv, codec_ep, li, NULL);
 		if (ret < 0)
 			return ret;
 
@@ -367,20 +367,23 @@ static int graph_dai_link_of(struct asoc_simple_priv *priv,
 	struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
 	struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
 	char dai_name[64];
-	int ret;
+	int ret, is_single_links = 0;
 
 	dev_dbg(dev, "link_of (%pOF)\n", cpu_ep);
 
-	ret = graph_parse_node(priv, cpu_ep, li, 1);
+	ret = graph_parse_node(priv, cpu_ep, li, &is_single_links);
 	if (ret < 0)
 		return ret;
 
-	ret = graph_parse_node(priv, codec_ep, li, 0);
+	ret = graph_parse_node(priv, codec_ep, li, NULL);
 	if (ret < 0)
 		return ret;
 
 	snprintf(dai_name, sizeof(dai_name),
 		 "%s-%s", cpus->dai_name, codecs->dai_name);
+
+	asoc_simple_canonicalize_cpu(cpus, is_single_links);
+
 	ret = graph_link_init(priv, cpu_ep, codec_ep, li, dai_name);
 	if (ret < 0)
 		return ret;
-- 
GitLab


From 0fad605fb0bdc00d8ad78696300ff2fbdee6e048 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Tue, 11 May 2021 14:28:55 +0100
Subject: [PATCH 0355/3804] ASoC: cs42l42: Regmap must use_single_read/write

cs42l42 does not support standard burst transfers so the use_single_read
and use_single_write flags must be set in the regmap config.

Because of this bug, the patch:

commit 0a0eb567e1d4 ("ASoC: cs42l42: Minor error paths fixups")

broke cs42l42 probe() because without the use_single_* flags it causes
regmap to issue a burst read.

However, the missing use_single_* could cause problems anyway because the
regmap cache can attempt burst transfers if these flags are not set.

Fixes: 2c394ca79604 ("ASoC: Add support for CS42L42 codec")
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210511132855.27159-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs42l42.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c
index bf982e145e945..77473c226f9ec 100644
--- a/sound/soc/codecs/cs42l42.c
+++ b/sound/soc/codecs/cs42l42.c
@@ -399,6 +399,9 @@ static const struct regmap_config cs42l42_regmap = {
 	.reg_defaults = cs42l42_reg_defaults,
 	.num_reg_defaults = ARRAY_SIZE(cs42l42_reg_defaults),
 	.cache_type = REGCACHE_RBTREE,
+
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static DECLARE_TLV_DB_SCALE(adc_tlv, -9600, 100, false);
-- 
GitLab


From 29dd19e3ac7b2a8671ebeac02859232ce0e34f58 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:03:21 +0200
Subject: [PATCH 0356/3804] media: exynos4-is: remove a now unused integer

The usage of pm_runtime_resume_and_get() removed the need of a
temporary integer. So, drop it.

Fixes: 59f96244af94 ("media: exynos4-is: fix pm_runtime_get_sync() usage count")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/exynos4-is/media-dev.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c
index e025178db06c3..3b8a24bb724c8 100644
--- a/drivers/media/platform/exynos4-is/media-dev.c
+++ b/drivers/media/platform/exynos4-is/media-dev.c
@@ -1284,7 +1284,6 @@ static DEVICE_ATTR(subdev_conf_mode, S_IWUSR | S_IRUGO,
 static int cam_clk_prepare(struct clk_hw *hw)
 {
 	struct cam_clk *camclk = to_cam_clk(hw);
-	int ret;
 
 	if (camclk->fmd->pmf == NULL)
 		return -ENODEV;
-- 
GitLab


From 2ee4c8a268764e751ee44dfffa76c813cfc27aee Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 11 May 2021 16:00:03 +0100
Subject: [PATCH 0357/3804] MAINTAINERS: Add Krzysztof as PCI host/endpoint
 controllers reviewer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Krzysztof has been carrying out PCI patches review for a long time and
he has been instrumental in driving PCI host/endpoint controller drivers
improvements.

Make his role official.

Link: https://lore.kernel.org/r/20210511150003.1592-1-lorenzo.pieralisi@arm.com
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Kishon Vijay Abraham I <kishon@ti.com>
Cc: Krzysztof Wilczyński <kw@linux.com>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..9755bf97658d6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14110,6 +14110,7 @@ F:	drivers/pci/controller/pci-v3-semi.c
 PCI ENDPOINT SUBSYSTEM
 M:	Kishon Vijay Abraham I <kishon@ti.com>
 M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+R:	Krzysztof Wilczyński <kw@linux.com>
 L:	linux-pci@vger.kernel.org
 S:	Supported
 F:	Documentation/PCI/endpoint/*
@@ -14158,6 +14159,7 @@ F:	drivers/pci/controller/pci-xgene-msi.c
 PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS
 M:	Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 R:	Rob Herring <robh@kernel.org>
+R:	Krzysztof Wilczyński <kw@linux.com>
 L:	linux-pci@vger.kernel.org
 S:	Supported
 Q:	http://patchwork.ozlabs.org/project/linux-pci/list/
-- 
GitLab


From 5e1f689913a4498e3081093670ef9d85b2c60920 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 29 Apr 2021 14:18:53 +0200
Subject: [PATCH 0358/3804] nvme-multipath: fix double initialization of ANA
 state

nvme_init_identify and thus nvme_mpath_init can be called multiple
times and thus must not overwrite potentially initialized or in-use
fields.  Split out a helper for the basic initialization when the
controller is initialized and make sure the init_identify path does
not blindly change in-use data structures.

Fixes: 0d0b660f214d ("nvme: add ANA support")
Reported-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/host/core.c      |  3 +-
 drivers/nvme/host/multipath.c | 55 ++++++++++++++++++-----------------
 drivers/nvme/host/nvme.h      |  8 +++--
 3 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 522c9b229f80e..762125f2905f7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2901,7 +2901,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
 		ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
 	}
 
-	ret = nvme_mpath_init(ctrl, id);
+	ret = nvme_mpath_init_identify(ctrl, id);
 	if (ret < 0)
 		goto out_free;
 
@@ -4364,6 +4364,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		min(default_ps_max_latency_us, (unsigned long)S32_MAX));
 
 	nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
+	nvme_mpath_init_ctrl(ctrl);
 
 	return 0;
 out_free_name:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 0551796517e61..deb14562c96ae 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -781,9 +781,18 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 	put_disk(head->disk);
 }
 
-int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
 {
-	int error;
+	mutex_init(&ctrl->ana_lock);
+	timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
+	INIT_WORK(&ctrl->ana_work, nvme_ana_work);
+}
+
+int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+	size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT;
+	size_t ana_log_size;
+	int error = 0;
 
 	/* check if multipath is enabled and we have the capability */
 	if (!multipath || !ctrl->subsys ||
@@ -795,37 +804,31 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
 	ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
 
-	mutex_init(&ctrl->ana_lock);
-	timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
-	ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
-		ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
-	ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
-
-	if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
+	ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
+		ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) +
+		ctrl->max_namespaces * sizeof(__le32);
+	if (ana_log_size > max_transfer_size) {
 		dev_err(ctrl->device,
-			"ANA log page size (%zd) larger than MDTS (%d).\n",
-			ctrl->ana_log_size,
-			ctrl->max_hw_sectors << SECTOR_SHIFT);
+			"ANA log page size (%zd) larger than MDTS (%zd).\n",
+			ana_log_size, max_transfer_size);
 		dev_err(ctrl->device, "disabling ANA support.\n");
-		return 0;
+		goto out_uninit;
 	}
-
-	INIT_WORK(&ctrl->ana_work, nvme_ana_work);
-	kfree(ctrl->ana_log_buf);
-	ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
-	if (!ctrl->ana_log_buf) {
-		error = -ENOMEM;
-		goto out;
+	if (ana_log_size > ctrl->ana_log_size) {
+		nvme_mpath_stop(ctrl);
+		kfree(ctrl->ana_log_buf);
+		ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
+		if (!ctrl->ana_log_buf)
+			return -ENOMEM;
 	}
-
+	ctrl->ana_log_size = ana_log_size;
 	error = nvme_read_ana_log(ctrl);
 	if (error)
-		goto out_free_ana_log_buf;
+		goto out_uninit;
 	return 0;
-out_free_ana_log_buf:
-	kfree(ctrl->ana_log_buf);
-	ctrl->ana_log_buf = NULL;
-out:
+
+out_uninit:
+	nvme_mpath_uninit(ctrl);
 	return error;
 }
 
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 05f31a2c64bb2..0015860ec12bf 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -712,7 +712,8 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
 void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
 void nvme_mpath_remove_disk(struct nvme_ns_head *head);
-int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
+int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
+void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
 void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
 bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
@@ -780,7 +781,10 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 static inline void nvme_trace_bio_complete(struct request *req)
 {
 }
-static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
+static inline void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
+{
+}
+static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl,
 		struct nvme_id_ctrl *id)
 {
 	if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)
-- 
GitLab


From 608a969046e6e0567d05a166be66c77d2dd8220b Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Thu, 6 May 2021 18:51:35 -0700
Subject: [PATCH 0359/3804] nvmet: fix inline bio check for bdev-ns

When handling rw commands, for inline bio case we only consider
transfer size. This works well when req->sg_cnt fits into the
req->inline_bvec, but it will result in the warning in
__bio_add_page() when req->sg_cnt > NVMET_MAX_INLINE_BVEC.

Consider an I/O size 32768 and first page is not aligned to the page
boundary, then I/O is split in following manner :-

[ 2206.256140] nvmet: sg->length 3440 sg->offset 656
[ 2206.256144] nvmet: sg->length 4096 sg->offset 0
[ 2206.256148] nvmet: sg->length 4096 sg->offset 0
[ 2206.256152] nvmet: sg->length 4096 sg->offset 0
[ 2206.256155] nvmet: sg->length 4096 sg->offset 0
[ 2206.256159] nvmet: sg->length 4096 sg->offset 0
[ 2206.256163] nvmet: sg->length 4096 sg->offset 0
[ 2206.256166] nvmet: sg->length 4096 sg->offset 0
[ 2206.256170] nvmet: sg->length 656 sg->offset 0

Now the req->transfer_size == NVMET_MAX_INLINE_DATA_LEN i.e. 32768, but
the req->sg_cnt is (9) > NVMET_MAX_INLINE_BIOVEC which is (8).
This will result in the following warning message :-

nvmet_bdev_execute_rw()
	bio_add_page()
		__bio_add_page()
			WARN_ON_ONCE(bio_full(bio, len));

This scenario is very hard to reproduce on the nvme-loop transport only
with rw commands issued with the passthru IOCTL interface from the host
application and the data buffer is allocated with the malloc() and not
the posix_memalign().

Fixes: 73383adfad24 ("nvmet: don't split large I/Os unconditionally")
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-bdev.c | 2 +-
 drivers/nvme/target/nvmet.h       | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 9a8b3726a37c4..429263ca9b978 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -258,7 +258,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 
 	sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
 
-	if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
+	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->b.inline_bio;
 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
 	} else {
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 5566ed403576e..d69a409515d65 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -616,4 +616,10 @@ static inline sector_t nvmet_lba_to_sect(struct nvmet_ns *ns, __le64 lba)
 	return le64_to_cpu(lba) << (ns->blksize_shift - SECTOR_SHIFT);
 }
 
+static inline bool nvmet_use_inline_bvec(struct nvmet_req *req)
+{
+	return req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN &&
+	       req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC;
+}
+
 #endif /* _NVMET_H */
-- 
GitLab


From ab96de5def854d8fc51280b6a20597e64b14ac31 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Thu, 6 May 2021 18:51:36 -0700
Subject: [PATCH 0360/3804] nvmet: fix inline bio check for passthru

When handling passthru commands, for inline bio allocation we only
consider the transfer size. This works well when req->sg_cnt fits into
the req->inline_bvec, but it will result in the early return from
bio_add_hw_page() when req->sg_cnt > NVMET_MAX_INLINE_BVEC.

Consider an I/O of size 32768 and first buffer is not aligned to the
page boundary, then I/O is split in following manner :-

[ 2206.256140] nvmet: sg->length 3440 sg->offset 656
[ 2206.256144] nvmet: sg->length 4096 sg->offset 0
[ 2206.256148] nvmet: sg->length 4096 sg->offset 0
[ 2206.256152] nvmet: sg->length 4096 sg->offset 0
[ 2206.256155] nvmet: sg->length 4096 sg->offset 0
[ 2206.256159] nvmet: sg->length 4096 sg->offset 0
[ 2206.256163] nvmet: sg->length 4096 sg->offset 0
[ 2206.256166] nvmet: sg->length 4096 sg->offset 0
[ 2206.256170] nvmet: sg->length 656 sg->offset 0

Now the req->transfer_size == NVMET_MAX_INLINE_DATA_LEN i.e. 32768, but
the req->sg_cnt is (9) > NVMET_MAX_INLINE_BIOVEC which is (8).
This will result in early return in the following code path :-

nvmet_bdev_execute_rw()
	bio_add_pc_page()
		bio_add_hw_page()
			if (bio_full(bio, len))
				return 0;

Use previously introduced helper nvmet_use_inline_bvec() to consider
req->sg_cnt when using inline bio. This only affects nvme-loop
transport.

Fixes: dab3902b19a0 ("nvmet: use inline bio for passthru fast path")
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/passthru.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 2798944899b73..39b1473f7204e 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -194,7 +194,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
 	if (req->sg_cnt > BIO_MAX_VECS)
 		return -EINVAL;
 
-	if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
+	if (nvmet_use_inline_bvec(req)) {
 		bio = &req->p.inline_bio;
 		bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
 	} else {
-- 
GitLab


From 8cc365f9559b86802afc0208389f5c8d46b4ad61 Mon Sep 17 00:00:00 2001
From: Michal Kalderon <michal.kalderon@marvell.com>
Date: Thu, 6 May 2021 10:08:19 +0300
Subject: [PATCH 0361/3804] nvmet-rdma: Fix NULL deref when SEND is completed
 with error

When running some traffic and taking down the link on peer, a
retry counter exceeded error is received. This leads to
nvmet_rdma_error_comp which tried accessing the cq_context to
obtain the queue. The cq_context is no longer valid after the
fix to use shared CQ mechanism and should be obtained similar
to how it is obtained in other functions from the wc->qp.

[ 905.786331] nvmet_rdma: SEND for CQE 0x00000000e3337f90 failed with status transport retry counter exceeded (12).
[ 905.832048] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048
[ 905.839919] PGD 0 P4D 0
[ 905.842464] Oops: 0000 1 SMP NOPTI
[ 905.846144] CPU: 13 PID: 1557 Comm: kworker/13:1H Kdump: loaded Tainted: G OE --------- - - 4.18.0-304.el8.x86_64 #1
[ 905.872135] RIP: 0010:nvmet_rdma_error_comp+0x5/0x1b [nvmet_rdma]
[ 905.878259] Code: 19 4f c0 e8 89 b3 a5 f6 e9 5b e0 ff ff 0f b7 75 14 4c 89 ea 48 c7 c7 08 1a 4f c0 e8 71 b3 a5 f6 e9 4b e0 ff ff 0f 1f 44 00 00 <48> 8b 47 48 48 85 c0 74 08 48 89 c7 e9 98 bf 49 00 e9 c3 e3 ff ff
[ 905.897135] RSP: 0018:ffffab601c45fe28 EFLAGS: 00010246
[ 905.902387] RAX: 0000000000000065 RBX: ffff9e729ea2f800 RCX: 0000000000000000
[ 905.909558] RDX: 0000000000000000 RSI: ffff9e72df9567c8 RDI: 0000000000000000
[ 905.916731] RBP: ffff9e729ea2b400 R08: 000000000000074d R09: 0000000000000074
[ 905.923903] R10: 0000000000000000 R11: ffffab601c45fcc0 R12: 0000000000000010
[ 905.931074] R13: 0000000000000000 R14: 0000000000000010 R15: ffff9e729ea2f400
[ 905.938247] FS: 0000000000000000(0000) GS:ffff9e72df940000(0000) knlGS:0000000000000000
[ 905.938249] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 905.950067] nvmet_rdma: SEND for CQE 0x00000000c7356cca failed with status transport retry counter exceeded (12).
[ 905.961855] CR2: 0000000000000048 CR3: 000000678d010004 CR4: 00000000007706e0
[ 905.961855] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 905.961856] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 905.961857] PKRU: 55555554
[ 906.010315] Call Trace:
[ 906.012778] __ib_process_cq+0x89/0x170 [ib_core]
[ 906.017509] ib_cq_poll_work+0x26/0x80 [ib_core]
[ 906.022152] process_one_work+0x1a7/0x360
[ 906.026182] ? create_worker+0x1a0/0x1a0
[ 906.030123] worker_thread+0x30/0x390
[ 906.033802] ? create_worker+0x1a0/0x1a0
[ 906.037744] kthread+0x116/0x130
[ 906.040988] ? kthread_flush_work_fn+0x10/0x10
[ 906.045456] ret_from_fork+0x1f/0x40

Fixes: ca0f1a8055be2 ("nvmet-rdma: use new shared CQ mechanism")
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/rdma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 6c1f3ab7649c7..7d607f435e366 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -700,7 +700,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
-	struct nvmet_rdma_queue *queue = cq->cq_context;
+	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
 
 	nvmet_rdma_release_rsp(rsp);
 
@@ -786,7 +786,7 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
-	struct nvmet_rdma_queue *queue = cq->cq_context;
+	struct nvmet_rdma_queue *queue = wc->qp->qp_context;
 	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
 	u16 status;
 
-- 
GitLab


From 3651aaacd10b2f8cee3780c490fc2df55bd4f543 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 10 May 2021 12:15:36 -0700
Subject: [PATCH 0362/3804] nvmet: demote discovery cmd parse err msg to debug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Host can send invalid commands and flood the target with error messages
for the discovery controller. Demote the error message from pr_err() to
pr_debug( in nvmet_parse_discovery_cmd(). 

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/discovery.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 4845d12e374ac..fc3645fc2c249 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -379,7 +379,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
 		req->execute = nvmet_execute_disc_identify;
 		return 0;
 	default:
-		pr_err("unhandled cmd %d\n", cmd->common.opcode);
+		pr_debug("unhandled cmd %d\n", cmd->common.opcode);
 		req->error_loc = offsetof(struct nvme_common_command, opcode);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 	}
-- 
GitLab


From 4c2dab2bf5ace0ddc07ca7f04a7ba32fc3b23492 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 10 May 2021 12:15:37 -0700
Subject: [PATCH 0363/3804] nvmet: use helper to remove the duplicate code

Use the helper nvmet_report_invalid_opcode() to report invalid opcode
so we can remove the duplicate code.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index e7a367cf6d367..dcd49a72f2f3c 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -975,10 +975,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
 	case nvme_admin_keep_alive:
 		req->execute = nvmet_execute_keep_alive;
 		return 0;
+	default:
+		return nvmet_report_invalid_opcode(req);
 	}
-
-	pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode,
-	       req->sq->qid);
-	req->error_loc = offsetof(struct nvme_common_command, opcode);
-	return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 }
-- 
GitLab


From 7a4ffd20ec6d31dfde2cc5608851e5109ffed7c9 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Mon, 10 May 2021 12:15:38 -0700
Subject: [PATCH 0364/3804] nvmet: demote fabrics cmd parse err msg to debug

Host can send invalid commands and flood the target with error messages.
Demote the error message from pr_err() to pr_debug() in
nvmet_parse_fabrics_cmd() and nvmet_parse_connect_cmd().

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fabrics-cmd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 1420a8e3e0b10..7d0f3523fdab2 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -94,7 +94,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
 		req->execute = nvmet_execute_prop_get;
 		break;
 	default:
-		pr_err("received unknown capsule type 0x%x\n",
+		pr_debug("received unknown capsule type 0x%x\n",
 			cmd->fabrics.fctype);
 		req->error_loc = offsetof(struct nvmf_common_command, fctype);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
@@ -284,13 +284,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
 	struct nvme_command *cmd = req->cmd;
 
 	if (!nvme_is_fabrics(cmd)) {
-		pr_err("invalid command 0x%x on unconnected queue.\n",
+		pr_debug("invalid command 0x%x on unconnected queue.\n",
 			cmd->fabrics.opcode);
 		req->error_loc = offsetof(struct nvme_common_command, opcode);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
 	}
 	if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
-		pr_err("invalid capsule type 0x%x on unconnected queue.\n",
+		pr_debug("invalid capsule type 0x%x on unconnected queue.\n",
 			cmd->fabrics.fctype);
 		req->error_loc = offsetof(struct nvmf_common_command, fctype);
 		return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
-- 
GitLab


From 918d9c77791cc8267b5b5ab556c868dfa57e0d93 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:28 +0200
Subject: [PATCH 0365/3804] docs: cdrom-standard.rst: get rid of uneeded UTF-8
 chars

This file was converted from a LaTeX one. The conversion used
some UTF-8 characters at the literal blocks. Replace them
by normal ASCII characters.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/79c3f482da17ea48d69b6e6ad1b7fb102b9dd7bf.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/cdrom/cdrom-standard.rst | 30 +++++++++++++-------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst
index 70500b189cc84..5845960ca3821 100644
--- a/Documentation/cdrom/cdrom-standard.rst
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -146,18 +146,18 @@ with the kernel as a block device by registering the following general
 *struct file_operations*::
 
 	struct file_operations cdrom_fops = {
-		NULL,			/∗ lseek ∗/
-		block _read ,		/∗ read—general block-dev read ∗/
-		block _write,		/∗ write—general block-dev write ∗/
-		NULL,			/∗ readdir ∗/
-		NULL,			/∗ select ∗/
-		cdrom_ioctl,		/∗ ioctl ∗/
-		NULL,			/∗ mmap ∗/
-		cdrom_open,		/∗ open ∗/
-		cdrom_release,		/∗ release ∗/
-		NULL,			/∗ fsync ∗/
-		NULL,			/∗ fasync ∗/
-		NULL			/∗ revalidate ∗/
+		NULL,			/* lseek */
+		block _read ,		/* read--general block-dev read */
+		block _write,		/* write--general block-dev write */
+		NULL,			/* readdir */
+		NULL,			/* select */
+		cdrom_ioctl,		/* ioctl */
+		NULL,			/* mmap */
+		cdrom_open,		/* open */
+		cdrom_release,		/* release */
+		NULL,			/* fsync */
+		NULL,			/* fasync */
+		NULL			/* revalidate */
 	};
 
 Every active CD-ROM device shares this *struct*. The routines
@@ -250,12 +250,12 @@ The drive-specific, minor-like information that is registered with
 `cdrom.c`, currently contains the following fields::
 
   struct cdrom_device_info {
-	const struct cdrom_device_ops * ops; 	/* device operations for this major */
+	const struct cdrom_device_ops * ops;	/* device operations for this major */
 	struct list_head list;			/* linked list of all device_info */
 	struct gendisk * disk;			/* matching block layer disk */
 	void *  handle;				/* driver-dependent data */
 
-	int mask; 				/* mask of capability: disables them */
+	int mask;				/* mask of capability: disables them */
 	int speed;				/* maximum speed for reading data */
 	int capacity;				/* number of discs in a jukebox */
 
@@ -569,7 +569,7 @@ the *CDC_CLOSE_TRAY* bit in *mask*.
 
 In the file `cdrom.c` you will encounter many constructions of the type::
 
-	if (cdo->capability & ∼cdi->mask & CDC _⟨capability⟩) ...
+	if (cdo->capability & ~cdi->mask & CDC _<capability>) ...
 
 There is no *ioctl* to set the mask... The reason is that
 I think it is better to control the **behavior** rather than the
-- 
GitLab


From 8d3926c09e043448d4d26896b8225943f12d0933 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:29 +0200
Subject: [PATCH 0366/3804] docs: ABI: remove a meaningless UTF-8 character
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those two files have this character:
	- U+00ac ('¬'): NOT SIGN

at the end of the first line, apparently for no reason. Drop them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6cd3f0b47568fecb7889fd18d1d744c3aaf73866.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/obsolete/sysfs-kernel-fadump_registered  | 2 +-
 Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
index 0360be39c98e9..dae880b1a5d5d 100644
--- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
@@ -1,4 +1,4 @@
-This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬
+This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.
 
 What:		/sys/kernel/fadump_registered
 Date:		Feb 2012
diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
index 6ce0b129ab12d..ca2396edb5f10 100644
--- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
@@ -1,4 +1,4 @@
-This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬
+This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.
 
 What:		/sys/kernel/fadump_release_mem
 Date:		Feb 2012
-- 
GitLab


From 6f3bceba03b4f18e0b83261e2fb761e0ad5da625 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:30 +0200
Subject: [PATCH 0367/3804] docs: ABI: remove some spurious characters

The KernelVersion tag contains some spurious UTF-8 characters
for no reason. Drop them.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6d774ad6cb3795a177309503a39f8f1b5e309d64.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/testing/sysfs-module | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module
index a485434d2a0fb..88bddf192ceb7 100644
--- a/Documentation/ABI/testing/sysfs-module
+++ b/Documentation/ABI/testing/sysfs-module
@@ -37,13 +37,13 @@ Description:	Maximum time allowed for periodic transfers per microframe (μs)
 
 What:		/sys/module/*/{coresize,initsize}
 Date:		Jan 2012
-KernelVersion:»·3.3
+KernelVersion:	3.3
 Contact:	Kay Sievers <kay.sievers@vrfy.org>
 Description:	Module size in bytes.
 
 What:		/sys/module/*/taint
 Date:		Jan 2012
-KernelVersion:»·3.3
+KernelVersion:	3.3
 Contact:	Kay Sievers <kay.sievers@vrfy.org>
 Description:	Module taint flags:
 			==  =====================
-- 
GitLab


From d1f2722d5357d7a5138b1be8bd64946f0a14c81e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:31 +0200
Subject: [PATCH 0368/3804] docs: hwmon: tmp103.rst: fix bad usage of UTF-8
 chars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While UTF-8 characters can be used at the Linux documentation,
the best is to use them only when ASCII doesn't offer a good replacement.
So, replace the occurences of the following UTF-8 characters:

	- U+2013 ('–'): EN DASH

In this specific case, EN DASH was used instead of a minus
sign. So, replace it by a single hyphen.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/73b3c7c1eef5c12ddc941624d23689313bd56529.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/hwmon/tmp103.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/hwmon/tmp103.rst b/Documentation/hwmon/tmp103.rst
index e195a7d14309a..b3ef81475cf8b 100644
--- a/Documentation/hwmon/tmp103.rst
+++ b/Documentation/hwmon/tmp103.rst
@@ -21,10 +21,10 @@ Description
 The TMP103 is a digital output temperature sensor in a four-ball
 wafer chip-scale package (WCSP). The TMP103 is capable of reading
 temperatures to a resolution of 1°C. The TMP103 is specified for
-operation over a temperature range of –40°C to +125°C.
+operation over a temperature range of -40°C to +125°C.
 
 Resolution: 8 Bits
-Accuracy: ±1°C Typ (–10°C to +100°C)
+Accuracy: ±1°C Typ (-10°C to +100°C)
 
 The driver provides the common sysfs-interface for temperatures (see
 Documentation/hwmon/sysfs-interface.rst under Temperatures).
-- 
GitLab


From 5e716ec68b4a75a84e28c0efa68db613deb64981 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 11 May 2021 17:01:32 +0200
Subject: [PATCH 0369/3804] docs: networking: device_drivers: fix bad usage of
 UTF-8 chars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Probably because the original file was pre-processed by some
tool, both i40e.rst and iavf.rst files are using this character:

	- U+2013 ('–'): EN DASH

meaning an hyphen when calling a command line application, which
is obviously wrong. So, replace them by an hyphen, ensuring
that it will be properly displayed as literals when building
the documentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/95eb2a48d0ca3528780ce0dfce64359977fa8cb3.1620744606.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../networking/device_drivers/ethernet/intel/i40e.rst         | 4 ++--
 .../networking/device_drivers/ethernet/intel/iavf.rst         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
index 8a9b18573688d..2d3f6bd969a2b 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
@@ -173,7 +173,7 @@ Director rule is added from ethtool (Sideband filter), ATR is turned off by the
 driver. To re-enable ATR, the sideband can be disabled with the ethtool -K
 option. For example::
 
-  ethtool –K [adapter] ntuple [off|on]
+  ethtool -K [adapter] ntuple [off|on]
 
 If sideband is re-enabled after ATR is re-enabled, ATR remains enabled until a
 TCP-IP flow is added. When all TCP-IP sideband rules are deleted, ATR is
@@ -688,7 +688,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
 Totals must be equal or less than port speed.
 
 For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
-monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+monitoring tools such as `ifstat` or `sar -n DEV [interval] [number of samples]`
 
 2. Enable HW TC offload on interface::
 
diff --git a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
index 52e037b11c979..25330b7b5168d 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
@@ -179,7 +179,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
 Totals must be equal or less than port speed.
 
 For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
-monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+monitoring tools such as ``ifstat`` or ``sar -n DEV [interval] [number of samples]``
 
 NOTE:
   Setting up channels via ethtool (ethtool -L) is not supported when the
-- 
GitLab


From 7240cd200541543008a7ce4fcaf2ba5a5556128f Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Tue, 11 May 2021 09:49:37 -0400
Subject: [PATCH 0370/3804] Remove link to nonexistent rocket driver docs

The rocket driver and documentation were removed in this commit, but
the corresponding entry in index.rst was not removed.

Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Fixes: 3b00b6af7a5b ("tty: rocket, remove the driver")
Link: https://lore.kernel.org/r/20210511134937.2442291-1-desmondcheongzx@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/serial/index.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/driver-api/serial/index.rst b/Documentation/driver-api/serial/index.rst
index 21351b8c95a4d..8f7d7af3b90b1 100644
--- a/Documentation/driver-api/serial/index.rst
+++ b/Documentation/driver-api/serial/index.rst
@@ -19,7 +19,6 @@ Serial drivers
 
     moxa-smartio
     n_gsm
-    rocket
     serial-iso7816
     serial-rs485
 
-- 
GitLab


From 965a7d72e798eb7af0aa67210e37cf7ecd1c9cad Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Date: Tue, 11 May 2021 20:02:42 +0200
Subject: [PATCH 0371/3804] mac80211: assure all fragments are encrypted

Do not mix plaintext and encrypted fragments in protected Wi-Fi
networks. This fixes CVE-2020-26147.

Previously, an attacker was able to first forward a legitimate encrypted
fragment towards a victim, followed by a plaintext fragment. The
encrypted and plaintext fragment would then be reassembled. For further
details see Section 6.3 and Appendix D in the paper "Fragment and Forge:
Breaking Wi-Fi Through Frame Aggregation and Fragmentation".

Because of this change there are now two equivalent conditions in the
code to determine if a received fragment requires sequential PNs, so we
also move this test to a separate function to make the code easier to
maintain.

Cc: stable@vger.kernel.org
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Link: https://lore.kernel.org/r/20210511200110.30c4394bb835.I5acfdb552cc1d20c339c262315950b3eac491397@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 62047e93e217b..65fc674e27cc0 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2194,6 +2194,16 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
 	return NULL;
 }
 
+static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc)
+{
+	return rx->key &&
+		(rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
+		 rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+		 rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+		 rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
+		ieee80211_has_protected(fc);
+}
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 {
@@ -2238,12 +2248,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		/* This is the first fragment of a new frame. */
 		entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
 						 rx->seqno_idx, &(rx->skb));
-		if (rx->key &&
-		    (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
-		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
-		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
-		     rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
-		    ieee80211_has_protected(fc)) {
+		if (requires_sequential_pn(rx, fc)) {
 			int queue = rx->security_idx;
 
 			/* Store CCMP/GCMP PN so that we can verify that the
@@ -2285,11 +2290,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
 		int queue;
 
-		if (!rx->key ||
-		    (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
-		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
-		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
-		     rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
+		if (!requires_sequential_pn(rx, fc))
 			return RX_DROP_UNUSABLE;
 		memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
 		for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
-- 
GitLab


From 94034c40ab4a3fcf581fbc7f8fdf4e29943c4a24 Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Date: Tue, 11 May 2021 20:02:43 +0200
Subject: [PATCH 0372/3804] mac80211: prevent mixed key and fragment cache
 attacks

Simultaneously prevent mixed key attacks (CVE-2020-24587) and fragment
cache attacks (CVE-2020-24586). This is accomplished by assigning a
unique color to every key (per interface) and using this to track which
key was used to decrypt a fragment. When reassembling frames, it is
now checked whether all fragments were decrypted using the same key.

To assure that fragment cache attacks are also prevented, the ID that is
assigned to keys is unique even over (re)associations and (re)connects.
This means fragments separated by a (re)association or (re)connect will
not be reassembled. Because mac80211 now also prevents the reassembly of
mixed encrypted and plaintext fragments, all cache attacks are prevented.

Cc: stable@vger.kernel.org
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Link: https://lore.kernel.org/r/20210511200110.3f8290e59823.I622a67769ed39257327a362cfc09c812320eb979@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 1 +
 net/mac80211/key.c         | 7 +++++++
 net/mac80211/key.h         | 2 ++
 net/mac80211/rx.c          | 6 ++++++
 4 files changed, 16 insertions(+)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8fcbaa1eedf3e..874ffe7819e53 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -97,6 +97,7 @@ struct ieee80211_fragment_entry {
 	u8 rx_queue;
 	bool check_sequential_pn; /* needed for CCMP/GCMP */
 	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+	unsigned int key_color;
 };
 
 
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 56c068cb49c4d..f695fc80088bc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -799,6 +799,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
 		       struct ieee80211_sub_if_data *sdata,
 		       struct sta_info *sta)
 {
+	static atomic_t key_color = ATOMIC_INIT(0);
 	struct ieee80211_key *old_key;
 	int idx = key->conf.keyidx;
 	bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
@@ -850,6 +851,12 @@ int ieee80211_key_link(struct ieee80211_key *key,
 	key->sdata = sdata;
 	key->sta = sta;
 
+	/*
+	 * Assign a unique ID to every key so we can easily prevent mixed
+	 * key and fragment cache attacks.
+	 */
+	key->color = atomic_inc_return(&key_color);
+
 	increment_tailroom_need_count(sdata);
 
 	ret = ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 7ad72e9b4991d..1e326c89d7217 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -128,6 +128,8 @@ struct ieee80211_key {
 	} debugfs;
 #endif
 
+	unsigned int color;
+
 	/*
 	 * key config, must be last because it contains key
 	 * material as variable length member
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 65fc674e27cc0..531232b91bc49 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2255,6 +2255,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 			 * next fragment has a sequential PN value.
 			 */
 			entry->check_sequential_pn = true;
+			entry->key_color = rx->key->color;
 			memcpy(entry->last_pn,
 			       rx->key->u.ccmp.rx_pn[queue],
 			       IEEE80211_CCMP_PN_LEN);
@@ -2292,6 +2293,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 
 		if (!requires_sequential_pn(rx, fc))
 			return RX_DROP_UNUSABLE;
+
+		/* Prevent mixed key and fragment cache attacks */
+		if (entry->key_color != rx->key->color)
+			return RX_DROP_UNUSABLE;
+
 		memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
 		for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
 			pn[i]++;
-- 
GitLab


From a1d5ff5651ea592c67054233b14b30bf4452999c Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Date: Tue, 11 May 2021 20:02:44 +0200
Subject: [PATCH 0373/3804] mac80211: properly handle A-MSDUs that start with
 an RFC 1042 header

Properly parse A-MSDUs whose first 6 bytes happen to equal a rfc1042
header. This can occur in practice when the destination MAC address
equals AA:AA:03:00:00:00. More importantly, this simplifies the next
patch to mitigate A-MSDU injection attacks.

Cc: stable@vger.kernel.org
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Link: https://lore.kernel.org/r/20210511200110.0b2b886492f0.I23dd5d685fe16d3b0ec8106e8f01b59f499dffed@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 4 ++--
 net/mac80211/rx.c      | 2 +-
 net/wireless/util.c    | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5224f885a99a1..58c2cd417e89a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5760,7 +5760,7 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
  */
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 				  const u8 *addr, enum nl80211_iftype iftype,
-				  u8 data_offset);
+				  u8 data_offset, bool is_amsdu);
 
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
@@ -5772,7 +5772,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 					 enum nl80211_iftype iftype)
 {
-	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0);
+	return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false);
 }
 
 /**
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 531232b91bc49..f14d32a5001dd 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2682,7 +2682,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
 	if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
 					  rx->sdata->vif.addr,
 					  rx->sdata->vif.type,
-					  data_offset))
+					  data_offset, true))
 		return RX_DROP_UNUSABLE;
 
 	ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 382c5262d997d..39966a873e401 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -542,7 +542,7 @@ EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
 
 int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 				  const u8 *addr, enum nl80211_iftype iftype,
-				  u8 data_offset)
+				  u8 data_offset, bool is_amsdu)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct {
@@ -629,7 +629,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
 	skb_copy_bits(skb, hdrlen, &payload, sizeof(payload));
 	tmp.h_proto = payload.proto;
 
-	if (likely((ether_addr_equal(payload.hdr, rfc1042_header) &&
+	if (likely((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) &&
 		    tmp.h_proto != htons(ETH_P_AARP) &&
 		    tmp.h_proto != htons(ETH_P_IPX)) ||
 		   ether_addr_equal(payload.hdr, bridge_tunnel_header)))
-- 
GitLab


From 2b8a1fee3488c602aca8bea004a087e60806a5cf Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Date: Tue, 11 May 2021 20:02:45 +0200
Subject: [PATCH 0374/3804] cfg80211: mitigate A-MSDU aggregation attacks

Mitigate A-MSDU injection attacks (CVE-2020-24588) by detecting if the
destination address of a subframe equals an RFC1042 (i.e., LLC/SNAP)
header, and if so dropping the complete A-MSDU frame. This mitigates
known attacks, although new (unknown) aggregation-based attacks may
remain possible.

This defense works because in A-MSDU aggregation injection attacks, a
normal encrypted Wi-Fi frame is turned into an A-MSDU frame. This means
the first 6 bytes of the first A-MSDU subframe correspond to an RFC1042
header. In other words, the destination MAC address of the first A-MSDU
subframe contains the start of an RFC1042 header during an aggregation
attack. We can detect this and thereby prevent this specific attack.
For details, see Section 7.2 of "Fragment and Forge: Breaking Wi-Fi
Through Frame Aggregation and Fragmentation".

Note that for kernel 4.9 and above this patch depends on "mac80211:
properly handle A-MSDUs that start with a rfc1042 header". Otherwise
this patch has no impact and attacks will remain possible.

Cc: stable@vger.kernel.org
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Link: https://lore.kernel.org/r/20210511200110.25d93176ddaf.I9e265b597f2cd23eb44573f35b625947b386a9de@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 39966a873e401..7ec021a610aeb 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -771,6 +771,9 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
 		remaining = skb->len - offset;
 		if (subframe_len > remaining)
 			goto purge;
+		/* mitigate A-MSDU aggregation injection attacks */
+		if (ether_addr_equal(eth.h_dest, rfc1042_header))
+			goto purge;
 
 		offset += sizeof(struct ethhdr);
 		last = remaining <= subframe_len + padding;
-- 
GitLab


From 270032a2a9c4535799736142e1e7c413ca7b836e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:46 +0200
Subject: [PATCH 0375/3804] mac80211: drop A-MSDUs on old ciphers

With old ciphers (WEP and TKIP) we shouldn't be using A-MSDUs
since A-MSDUs are only supported if we know that they are, and
the only practical way for that is HT support which doesn't
support old ciphers.

However, we would normally accept them anyway. Since we check
the MMIC before deaggregating A-MSDUs, and the A-MSDU bit in
the QoS header is not protected in TKIP (or WEP), this enables
attacks similar to CVE-2020-24588. To prevent that, drop A-MSDUs
completely with old ciphers.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.076543300172.I548e6e71f1ee9cad4b9a37bf212ae7db723587aa@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f14d32a5001dd..8a72d48ad6e05 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -6,7 +6,7 @@
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 
 #include <linux/jiffies.h>
@@ -2739,6 +2739,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	if (is_multicast_ether_addr(hdr->addr1))
 		return RX_DROP_UNUSABLE;
 
+	if (rx->key) {
+		/*
+		 * We should not receive A-MSDUs on pre-HT connections,
+		 * and HT connections cannot use old ciphers. Thus drop
+		 * them, as in those cases we couldn't even have SPP
+		 * A-MSDUs or such.
+		 */
+		switch (rx->key->conf.cipher) {
+		case WLAN_CIPHER_SUITE_WEP40:
+		case WLAN_CIPHER_SUITE_WEP104:
+		case WLAN_CIPHER_SUITE_TKIP:
+			return RX_DROP_UNUSABLE;
+		default:
+			break;
+		}
+	}
+
 	return __ieee80211_rx_h_amsdu(rx, 0);
 }
 
-- 
GitLab


From 3a11ce08c45b50d69c891d71760b7c5b92074709 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:47 +0200
Subject: [PATCH 0376/3804] mac80211: add fragment cache to sta_info

Prior patches protected against fragmentation cache attacks
by coloring keys, but this shows that it can lead to issues
when multiple stations use the same sequence number. Add a
fragment cache to struct sta_info (in addition to the one in
the interface) to separate fragments for different stations
properly.

This then automatically clear most of the fragment cache when a
station disconnects (or reassociates) from an AP, or when client
interfaces disconnect from the network, etc.

On the way, also fix the comment there since this brings us in line
with the recommendation in 802.11-2016 ("An AP should support ...").
Additionally, remove a useless condition (since there's no problem
purging an already empty list).

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.fc35046b0d52.I1ef101e3784d13e8f6600d83de7ec9a3a45bcd52@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 26 ++++--------------------
 net/mac80211/iface.c       | 11 +++-------
 net/mac80211/rx.c          | 41 ++++++++++++++++++++++++++++----------
 net/mac80211/sta_info.c    |  6 +++++-
 net/mac80211/sta_info.h    | 32 ++++++++++++++++++++++++++++-
 5 files changed, 73 insertions(+), 43 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 874ffe7819e53..4c714375bad0f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
 #define IEEE80211_ENCRYPT_HEADROOM 8
 #define IEEE80211_ENCRYPT_TAILROOM 18
 
-/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
- * reception of at least three fragmented frames. This limit can be increased
- * by changing this define, at the cost of slower frame reassembly and
- * increased memory use (about 2 kB of RAM per entry). */
-#define IEEE80211_FRAGMENT_MAX 4
-
 /* power level hasn't been configured (or set to automatic) */
 #define IEEE80211_UNSET_POWER_LEVEL	INT_MIN
 
@@ -88,19 +82,6 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
 
 #define IEEE80211_MAX_NAN_INSTANCE_ID 255
 
-struct ieee80211_fragment_entry {
-	struct sk_buff_head skb_list;
-	unsigned long first_frag_time;
-	u16 seq;
-	u16 extra_len;
-	u16 last_frag;
-	u8 rx_queue;
-	bool check_sequential_pn; /* needed for CCMP/GCMP */
-	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
-	unsigned int key_color;
-};
-
-
 struct ieee80211_bss {
 	u32 device_ts_beacon, device_ts_presp;
 
@@ -903,9 +884,7 @@ struct ieee80211_sub_if_data {
 
 	char name[IFNAMSIZ];
 
-	/* Fragment table for host-based reassembly */
-	struct ieee80211_fragment_entry	fragments[IEEE80211_FRAGMENT_MAX];
-	unsigned int fragment_next;
+	struct ieee80211_fragment_cache frags;
 
 	/* TID bitmap for NoAck policy */
 	u16 noack_map;
@@ -2321,4 +2300,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
 #define debug_noinline
 #endif
 
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
+
 #endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7032a2b59249c..2e2f73a4aa734 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
  * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (c) 2016        Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 #include <linux/slab.h>
 #include <linux/kernel.h>
@@ -677,16 +677,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
  */
 static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
 {
-	int i;
-
 	/* free extra data */
 	ieee80211_free_keys(sdata, false);
 
 	ieee80211_debugfs_remove_netdev(sdata);
 
-	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
-		__skb_queue_purge(&sdata->fragments[i].skb_list);
-	sdata->fragment_next = 0;
+	ieee80211_destroy_frag_cache(&sdata->frags);
 
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		ieee80211_mesh_teardown_sdata(sdata);
@@ -1930,8 +1926,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	sdata->wdev.wiphy = local->hw.wiphy;
 	sdata->local = local;
 
-	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
-		skb_queue_head_init(&sdata->fragments[i].skb_list);
+	ieee80211_init_frag_cache(&sdata->frags);
 
 	INIT_LIST_HEAD(&sdata->key_list);
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8a72d48ad6e05..7212a1bebd0c4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2123,19 +2123,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	return result;
 }
 
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+		skb_queue_head_init(&cache->entries[i].skb_list);
+}
+
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+		__skb_queue_purge(&cache->entries[i].skb_list);
+}
+
 static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache,
 			 unsigned int frag, unsigned int seq, int rx_queue,
 			 struct sk_buff **skb)
 {
 	struct ieee80211_fragment_entry *entry;
 
-	entry = &sdata->fragments[sdata->fragment_next++];
-	if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX)
-		sdata->fragment_next = 0;
+	entry = &cache->entries[cache->next++];
+	if (cache->next >= IEEE80211_FRAGMENT_MAX)
+		cache->next = 0;
 
-	if (!skb_queue_empty(&entry->skb_list))
-		__skb_queue_purge(&entry->skb_list);
+	__skb_queue_purge(&entry->skb_list);
 
 	__skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */
 	*skb = NULL;
@@ -2150,14 +2165,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
 }
 
 static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache,
 			  unsigned int frag, unsigned int seq,
 			  int rx_queue, struct ieee80211_hdr *hdr)
 {
 	struct ieee80211_fragment_entry *entry;
 	int i, idx;
 
-	idx = sdata->fragment_next;
+	idx = cache->next;
 	for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
 		struct ieee80211_hdr *f_hdr;
 		struct sk_buff *f_skb;
@@ -2166,7 +2181,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
 		if (idx < 0)
 			idx = IEEE80211_FRAGMENT_MAX - 1;
 
-		entry = &sdata->fragments[idx];
+		entry = &cache->entries[idx];
 		if (skb_queue_empty(&entry->skb_list) || entry->seq != seq ||
 		    entry->rx_queue != rx_queue ||
 		    entry->last_frag + 1 != frag)
@@ -2207,6 +2222,7 @@ static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc)
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 {
+	struct ieee80211_fragment_cache *cache = &rx->sdata->frags;
 	struct ieee80211_hdr *hdr;
 	u16 sc;
 	__le16 fc;
@@ -2228,6 +2244,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		goto out_no_led;
 	}
 
+	if (rx->sta)
+		cache = &rx->sta->frags;
+
 	if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
 		goto out;
 
@@ -2246,7 +2265,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 
 	if (frag == 0) {
 		/* This is the first fragment of a new frame. */
-		entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
+		entry = ieee80211_reassemble_add(cache, frag, seq,
 						 rx->seqno_idx, &(rx->skb));
 		if (requires_sequential_pn(rx, fc)) {
 			int queue = rx->security_idx;
@@ -2274,7 +2293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	/* This is a fragment for a frame that should already be pending in
 	 * fragment cache. Add this fragment to the end of the pending entry.
 	 */
-	entry = ieee80211_reassemble_find(rx->sdata, frag, seq,
+	entry = ieee80211_reassemble_find(cache, frag, seq,
 					  rx->seqno_idx, hdr);
 	if (!entry) {
 		I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ec6973ee88ef4..f2fb69da9b6e1 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
  */
 
 #include <linux/module.h>
@@ -392,6 +392,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 
 	u64_stats_init(&sta->rx_stats.syncp);
 
+	ieee80211_init_frag_cache(&sta->frags);
+
 	sta->sta_state = IEEE80211_STA_NONE;
 
 	/* Mark TID as unreserved */
@@ -1102,6 +1104,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
 
 	ieee80211_sta_debugfs_remove(sta);
 
+	ieee80211_destroy_frag_cache(&sta->frags);
+
 	cleanup_single_sta(sta);
 }
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 78b9d0c7cc583..5c56d29a619e6 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,7 +3,7 @@
  * Copyright 2002-2005, Devicescape Software, Inc.
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright(c) 2020 Intel Corporation
+ * Copyright(c) 2020-2021 Intel Corporation
  */
 
 #ifndef STA_INFO_H
@@ -438,6 +438,33 @@ struct ieee80211_sta_rx_stats {
 	u64 msdu[IEEE80211_NUM_TIDS + 1];
 };
 
+/*
+ * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent
+ * reception of at least one MSDU per access category per associated STA"
+ * on APs, or "at least one MSDU per access category" on other interface types.
+ *
+ * This limit can be increased by changing this define, at the cost of slower
+ * frame reassembly and increased memory use while fragments are pending.
+ */
+#define IEEE80211_FRAGMENT_MAX 4
+
+struct ieee80211_fragment_entry {
+	struct sk_buff_head skb_list;
+	unsigned long first_frag_time;
+	u16 seq;
+	u16 extra_len;
+	u16 last_frag;
+	u8 rx_queue;
+	bool check_sequential_pn; /* needed for CCMP/GCMP */
+	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+	unsigned int key_color;
+};
+
+struct ieee80211_fragment_cache {
+	struct ieee80211_fragment_entry	entries[IEEE80211_FRAGMENT_MAX];
+	unsigned int next;
+};
+
 /*
  * The bandwidth threshold below which the per-station CoDel parameters will be
  * scaled to be more lenient (to prevent starvation of slow stations). This
@@ -531,6 +558,7 @@ struct ieee80211_sta_rx_stats {
  * @status_stats.last_ack_signal: last ACK signal
  * @status_stats.ack_signal_filled: last ACK signal validity
  * @status_stats.avg_ack_signal: average ACK signal
+ * @frags: fragment cache
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -639,6 +667,8 @@ struct sta_info {
 
 	struct cfg80211_chan_def tdls_chandef;
 
+	struct ieee80211_fragment_cache frags;
+
 	/* keep last! */
 	struct ieee80211_sta sta;
 };
-- 
GitLab


From bf30ca922a0c0176007e074b0acc77ed345e9990 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:48 +0200
Subject: [PATCH 0377/3804] mac80211: check defrag PN against current frame

As pointed out by Mathy Vanhoef, we implement the RX PN check
on fragmented frames incorrectly - we check against the last
received PN prior to the new frame, rather than to the one in
this frame itself.

Prior patches addressed the security issue here, but in order
to be able to reason better about the code, fix it to really
compare against the current frame's PN, not the last stored
one.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.bfbc340ff071.Id0b690e581da7d03d76df90bb0e3fd55930bc8a0@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 11 +++++++++--
 net/mac80211/rx.c          |  5 ++---
 net/mac80211/wpa.c         | 13 +++++++++----
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4c714375bad0f..214404a558fb6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -223,8 +223,15 @@ struct ieee80211_rx_data {
 	 */
 	int security_idx;
 
-	u32 tkip_iv32;
-	u16 tkip_iv16;
+	union {
+		struct {
+			u32 iv32;
+			u16 iv16;
+		} tkip;
+		struct {
+			u8 pn[IEEE80211_CCMP_PN_LEN];
+		} ccm_gcm;
+	};
 };
 
 struct ieee80211_csa_settings {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7212a1bebd0c4..b619c47e1d120 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2308,7 +2308,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	if (entry->check_sequential_pn) {
 		int i;
 		u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
-		int queue;
 
 		if (!requires_sequential_pn(rx, fc))
 			return RX_DROP_UNUSABLE;
@@ -2323,8 +2322,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 			if (pn[i])
 				break;
 		}
-		queue = rx->security_idx;
-		rpn = rx->key->u.ccmp.rx_pn[queue];
+
+		rpn = rx->ccm_gcm.pn;
 		if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
 			return RX_DROP_UNUSABLE;
 		memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 91bf32af55e9a..bca47fad5a162 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -3,6 +3,7 @@
  * Copyright 2002-2004, Instant802 Networks, Inc.
  * Copyright 2008, Jouni Malinen <j@w1.fi>
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2020-2021 Intel Corporation
  */
 
 #include <linux/netdevice.h>
@@ -167,8 +168,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 
 update_iv:
 	/* update IV in key information to be able to detect replays */
-	rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32;
-	rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16;
+	rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip.iv32;
+	rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip.iv16;
 
 	return RX_CONTINUE;
 
@@ -294,8 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
 					  key, skb->data + hdrlen,
 					  skb->len - hdrlen, rx->sta->sta.addr,
 					  hdr->addr1, hwaccel, rx->security_idx,
-					  &rx->tkip_iv32,
-					  &rx->tkip_iv16);
+					  &rx->tkip.iv32,
+					  &rx->tkip.iv16);
 	if (res != TKIP_DECRYPT_OK)
 		return RX_DROP_UNUSABLE;
 
@@ -553,6 +554,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
 		}
 
 		memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
+		if (unlikely(ieee80211_is_frag(hdr)))
+			memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
 	}
 
 	/* Remove CCMP header and MIC */
@@ -781,6 +784,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
 		}
 
 		memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
+		if (unlikely(ieee80211_is_frag(hdr)))
+			memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
 	}
 
 	/* Remove GCMP header and MIC */
-- 
GitLab


From 7e44a0b597f04e67eee8cdcbe7ee706c6f5de38b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:49 +0200
Subject: [PATCH 0378/3804] mac80211: prevent attacks on TKIP/WEP as well

Similar to the issues fixed in previous patches, TKIP and WEP
should be protected even if for TKIP we have the Michael MIC
protecting it, and WEP is broken anyway.

However, this also somewhat protects potential other algorithms
that drivers might implement.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511200110.430e8c202313.Ia37e4e5b6b3eaab1a5ae050e015f6c92859dbe27@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c       | 12 ++++++++++++
 net/mac80211/sta_info.h |  3 ++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b619c47e1d120..4454ec47283f8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2274,6 +2274,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 			 * next fragment has a sequential PN value.
 			 */
 			entry->check_sequential_pn = true;
+			entry->is_protected = true;
 			entry->key_color = rx->key->color;
 			memcpy(entry->last_pn,
 			       rx->key->u.ccmp.rx_pn[queue],
@@ -2286,6 +2287,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 				     sizeof(rx->key->u.gcmp.rx_pn[queue]));
 			BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
 				     IEEE80211_GCMP_PN_LEN);
+		} else if (rx->key && ieee80211_has_protected(fc)) {
+			entry->is_protected = true;
+			entry->key_color = rx->key->color;
 		}
 		return RX_QUEUED;
 	}
@@ -2327,6 +2331,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 		if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
 			return RX_DROP_UNUSABLE;
 		memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
+	} else if (entry->is_protected &&
+		   (!rx->key || !ieee80211_has_protected(fc) ||
+		    rx->key->color != entry->key_color)) {
+		/* Drop this as a mixed key or fragment cache attack, even
+		 * if for TKIP Michael MIC should protect us, and WEP is a
+		 * lost cause anyway.
+		 */
+		return RX_DROP_UNUSABLE;
 	}
 
 	skb_pull(rx->skb, ieee80211_hdrlen(fc));
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 5c56d29a619e6..0333072ebd982 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -455,7 +455,8 @@ struct ieee80211_fragment_entry {
 	u16 extra_len;
 	u16 last_frag;
 	u8 rx_queue;
-	bool check_sequential_pn; /* needed for CCMP/GCMP */
+	u8 check_sequential_pn:1, /* needed for CCMP/GCMP */
+	   is_protected:1;
 	u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
 	unsigned int key_color;
 };
-- 
GitLab


From a8c4d76a8dd4fb9666fc8919a703d85fb8f44ed8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 May 2021 20:02:50 +0200
Subject: [PATCH 0379/3804] mac80211: do not accept/forward invalid EAPOL
 frames

EAPOL frames are used for authentication and key management between the
AP and each individual STA associated in the BSS. Those frames are not
supposed to be sent by one associated STA to another associated STA
(either unicast for broadcast/multicast).

Similarly, in 802.11 they're supposed to be sent to the authenticator
(AP) address.

Since it is possible for unexpected EAPOL frames to result in misbehavior
in supplicant implementations, it is better for the AP to not allow such
cases to be forwarded to other clients either directly, or indirectly if
the AP interface is part of a bridge.

Accept EAPOL (control port) frames only if they're transmitted to the
own address, or, due to interoperability concerns, to the PAE group
address.

Disable forwarding of EAPOL (or well, the configured control port
protocol) frames back to wireless medium in all cases. Previously, these
frames were accepted from fully authenticated and authorized stations
and also from unauthenticated stations for one of the cases.

Additionally, to avoid forwarding by the bridge, rewrite the PAE group
address case to the local MAC address.

Cc: stable@vger.kernel.org
Co-developed-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.cb327ed0cabe.Ib7dcffa2a31f0913d660de65ba3c8aca75b1d10f@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4454ec47283f8..22a925899a9ec 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2531,13 +2531,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
 	struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
 
 	/*
-	 * Allow EAPOL frames to us/the PAE group address regardless
-	 * of whether the frame was encrypted or not.
+	 * Allow EAPOL frames to us/the PAE group address regardless of
+	 * whether the frame was encrypted or not, and always disallow
+	 * all other destination addresses for them.
 	 */
-	if (ehdr->h_proto == rx->sdata->control_port_protocol &&
-	    (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
-	     ether_addr_equal(ehdr->h_dest, pae_group_addr)))
-		return true;
+	if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol))
+		return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
+		       ether_addr_equal(ehdr->h_dest, pae_group_addr);
 
 	if (ieee80211_802_1x_port_control(rx) ||
 	    ieee80211_drop_unencrypted(rx, fc))
@@ -2562,8 +2562,28 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
 		cfg80211_rx_control_port(dev, skb, noencrypt);
 		dev_kfree_skb(skb);
 	} else {
+		struct ethhdr *ehdr = (void *)skb_mac_header(skb);
+
 		memset(skb->cb, 0, sizeof(skb->cb));
 
+		/*
+		 * 802.1X over 802.11 requires that the authenticator address
+		 * be used for EAPOL frames. However, 802.1X allows the use of
+		 * the PAE group address instead. If the interface is part of
+		 * a bridge and we pass the frame with the PAE group address,
+		 * then the bridge will forward it to the network (even if the
+		 * client was not associated yet), which isn't supposed to
+		 * happen.
+		 * To avoid that, rewrite the destination address to our own
+		 * address, so that the authenticator (e.g. hostapd) will see
+		 * the frame, but bridge won't forward it anywhere else. Note
+		 * that due to earlier filtering, the only other address can
+		 * be the PAE group address.
+		 */
+		if (unlikely(skb->protocol == sdata->control_port_protocol &&
+			     !ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
+			ether_addr_copy(ehdr->h_dest, sdata->vif.addr);
+
 		/* deliver to local stack */
 		if (rx->list)
 			list_add_tail(&skb->list, rx->list);
@@ -2603,6 +2623,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
 	if ((sdata->vif.type == NL80211_IFTYPE_AP ||
 	     sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
 	    !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
+	    ehdr->h_proto != rx->sdata->control_port_protocol &&
 	    (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
 		if (is_multicast_ether_addr(ehdr->h_dest) &&
 		    ieee80211_vif_get_num_mcast_if(sdata) != 0) {
-- 
GitLab


From 3edc6b0d6c061a70d8ca3c3c72eb1f58ce29bfb1 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:51 +0200
Subject: [PATCH 0380/3804] mac80211: extend protection against mixed key and
 fragment cache attacks

For some chips/drivers, e.g., QCA6174 with ath10k, the decryption is
done by the hardware, and the Protected bit in the Frame Control field
is cleared in the lower level driver before the frame is passed to
mac80211. In such cases, the condition for ieee80211_has_protected() is
not met in ieee80211_rx_h_defragment() of mac80211 and the new security
validation steps are not executed.

Extend mac80211 to cover the case where the Protected bit has been
cleared, but the frame is indicated as having been decrypted by the
hardware. This extends protection against mixed key and fragment cache
attack for additional drivers/chips. This fixes CVE-2020-24586 and
CVE-2020-24587 for such cases.

Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.037aa5ca0390.I7bb888e2965a0db02a67075fcb5deb50eb7408aa@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 22a925899a9ec..1bb43edd47b6c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2229,6 +2229,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	unsigned int frag, seq;
 	struct ieee80211_fragment_entry *entry;
 	struct sk_buff *skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
 	hdr = (struct ieee80211_hdr *)rx->skb->data;
 	fc = hdr->frame_control;
@@ -2287,7 +2288,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 				     sizeof(rx->key->u.gcmp.rx_pn[queue]));
 			BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
 				     IEEE80211_GCMP_PN_LEN);
-		} else if (rx->key && ieee80211_has_protected(fc)) {
+		} else if (rx->key &&
+			   (ieee80211_has_protected(fc) ||
+			    (status->flag & RX_FLAG_DECRYPTED))) {
 			entry->is_protected = true;
 			entry->key_color = rx->key->color;
 		}
@@ -2332,13 +2335,19 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 			return RX_DROP_UNUSABLE;
 		memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
 	} else if (entry->is_protected &&
-		   (!rx->key || !ieee80211_has_protected(fc) ||
+		   (!rx->key ||
+		    (!ieee80211_has_protected(fc) &&
+		     !(status->flag & RX_FLAG_DECRYPTED)) ||
 		    rx->key->color != entry->key_color)) {
 		/* Drop this as a mixed key or fragment cache attack, even
 		 * if for TKIP Michael MIC should protect us, and WEP is a
 		 * lost cause anyway.
 		 */
 		return RX_DROP_UNUSABLE;
+	} else if (entry->is_protected && rx->key &&
+		   entry->key_color != rx->key->color &&
+		   (status->flag & RX_FLAG_DECRYPTED)) {
+		return RX_DROP_UNUSABLE;
 	}
 
 	skb_pull(rx->skb, ieee80211_hdrlen(fc));
-- 
GitLab


From a1166b2653db2f3de7338b9fb8a0f6e924b904ee Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:52 +0200
Subject: [PATCH 0381/3804] ath10k: add CCMP PN replay protection for
 fragmented frames for PCIe

PN replay check for not fragmented frames is finished in the firmware,
but this was not done for fragmented frames when ath10k is used with
QCA6174/QCA6377 PCIe. mac80211 has the function
ieee80211_rx_h_defragment() for PN replay check for fragmented frames,
but this does not get checked with QCA6174 due to the
ieee80211_has_protected() condition not matching the cleared Protected
bit case.

Validate the PN of received fragmented frames within ath10k when CCMP is
used and drop the fragment if the PN is not correct (incremented by
exactly one from the previous fragment). This applies only for
QCA6174/QCA6377 PCIe.

Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.9ba2664866a4.I756e47b67e210dba69966d989c4711ffc02dc6bc@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt.h    |  1 +
 drivers/net/wireless/ath/ath10k/htt_rx.c | 99 +++++++++++++++++++++++-
 2 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 956157946106c..dbc8aef82a65f 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -845,6 +845,7 @@ enum htt_security_types {
 
 #define ATH10K_HTT_TXRX_PEER_SECURITY_MAX 2
 #define ATH10K_TXRX_NUM_EXT_TIDS 19
+#define ATH10K_TXRX_NON_QOS_TID 16
 
 enum htt_security_flags {
 #define HTT_SECURITY_TYPE_MASK 0x7F
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 1a08156d5011d..f1e5bce8b14f4 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1746,16 +1746,87 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu)
 	msdu->ip_summed = ath10k_htt_rx_get_csum_state(msdu);
 }
 
+static u64 ath10k_htt_rx_h_get_pn(struct ath10k *ar, struct sk_buff *skb,
+				  u16 offset,
+				  enum htt_rx_mpdu_encrypt_type enctype)
+{
+	struct ieee80211_hdr *hdr;
+	u64 pn = 0;
+	u8 *ehdr;
+
+	hdr = (struct ieee80211_hdr *)(skb->data + offset);
+	ehdr = skb->data + offset + ieee80211_hdrlen(hdr->frame_control);
+
+	if (enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) {
+		pn = ehdr[0];
+		pn |= (u64)ehdr[1] << 8;
+		pn |= (u64)ehdr[4] << 16;
+		pn |= (u64)ehdr[5] << 24;
+		pn |= (u64)ehdr[6] << 32;
+		pn |= (u64)ehdr[7] << 40;
+	}
+	return pn;
+}
+
+static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar,
+					  struct sk_buff *skb,
+					  u16 peer_id,
+					  u16 offset,
+					  enum htt_rx_mpdu_encrypt_type enctype)
+{
+	struct ath10k_peer *peer;
+	union htt_rx_pn_t *last_pn, new_pn = {0};
+	struct ieee80211_hdr *hdr;
+	bool more_frags;
+	u8 tid, frag_number;
+	u32 seq;
+
+	peer = ath10k_peer_find_by_id(ar, peer_id);
+	if (!peer) {
+		ath10k_dbg(ar, ATH10K_DBG_HTT, "invalid peer for frag pn check\n");
+		return false;
+	}
+
+	hdr = (struct ieee80211_hdr *)(skb->data + offset);
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		tid = ieee80211_get_tid(hdr);
+	else
+		tid = ATH10K_TXRX_NON_QOS_TID;
+
+	last_pn = &peer->frag_tids_last_pn[tid];
+	new_pn.pn48 = ath10k_htt_rx_h_get_pn(ar, skb, offset, enctype);
+	more_frags = ieee80211_has_morefrags(hdr->frame_control);
+	frag_number = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG;
+	seq = (__le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4;
+
+	if (frag_number == 0) {
+		last_pn->pn48 = new_pn.pn48;
+		peer->frag_tids_seq[tid] = seq;
+	} else {
+		if (seq != peer->frag_tids_seq[tid])
+			return false;
+
+		if (new_pn.pn48 != last_pn->pn48 + 1)
+			return false;
+
+		last_pn->pn48 = new_pn.pn48;
+	}
+
+	return true;
+}
+
 static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 				 struct sk_buff_head *amsdu,
 				 struct ieee80211_rx_status *status,
 				 bool fill_crypt_header,
 				 u8 *rx_hdr,
-				 enum ath10k_pkt_rx_err *err)
+				 enum ath10k_pkt_rx_err *err,
+				 u16 peer_id,
+				 bool frag)
 {
 	struct sk_buff *first;
 	struct sk_buff *last;
-	struct sk_buff *msdu;
+	struct sk_buff *msdu, *temp;
 	struct htt_rx_desc *rxd;
 	struct ieee80211_hdr *hdr;
 	enum htt_rx_mpdu_encrypt_type enctype;
@@ -1768,6 +1839,7 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 	bool is_decrypted;
 	bool is_mgmt;
 	u32 attention;
+	bool frag_pn_check = true;
 
 	if (skb_queue_empty(amsdu))
 		return;
@@ -1866,6 +1938,24 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 	}
 
 	skb_queue_walk(amsdu, msdu) {
+		if (frag && !fill_crypt_header && is_decrypted &&
+		    enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2)
+			frag_pn_check = ath10k_htt_rx_h_frag_pn_check(ar,
+								      msdu,
+								      peer_id,
+								      0,
+								      enctype);
+
+		if (!frag_pn_check) {
+			/* Discard the fragment with invalid PN */
+			temp = msdu->prev;
+			__skb_unlink(msdu, amsdu);
+			dev_kfree_skb_any(msdu);
+			msdu = temp;
+			frag_pn_check = true;
+			continue;
+		}
+
 		ath10k_htt_rx_h_csum_offload(msdu);
 		ath10k_htt_rx_h_undecap(ar, msdu, status, first_hdr, enctype,
 					is_decrypted);
@@ -2071,7 +2161,8 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
 		ath10k_htt_rx_h_unchain(ar, &amsdu, &drop_cnt, &unchain_cnt);
 
 	ath10k_htt_rx_h_filter(ar, &amsdu, rx_status, &drop_cnt_filter);
-	ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true, first_hdr, &err);
+	ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true, first_hdr, &err, 0,
+			     false);
 	msdus_to_queue = skb_queue_len(&amsdu);
 	ath10k_htt_rx_h_enqueue(ar, &amsdu, rx_status);
 
@@ -3027,7 +3118,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 			ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
 			ath10k_htt_rx_h_filter(ar, &amsdu, status, NULL);
 			ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false, NULL,
-					     NULL);
+					     NULL, peer_id, frag);
 			ath10k_htt_rx_h_enqueue(ar, &amsdu, status);
 			break;
 		case -EAGAIN:
-- 
GitLab


From 65c415a144ad8132b6a6d97d4a1919ffc728e2d1 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:53 +0200
Subject: [PATCH 0382/3804] ath10k: drop fragments with multicast DA for PCIe

Fragmentation is not used with multicast frames. Discard unexpected
fragments with multicast DA. This fixes CVE-2020-26145.

Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.5a0bd289bda8.Idd6ebea20038fb1cfee6de924aa595e5647c9eae@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index f1e5bce8b14f4..cb04848ed5cb6 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1768,6 +1768,16 @@ static u64 ath10k_htt_rx_h_get_pn(struct ath10k *ar, struct sk_buff *skb,
 	return pn;
 }
 
+static bool ath10k_htt_rx_h_frag_multicast_check(struct ath10k *ar,
+						 struct sk_buff *skb,
+						 u16 offset)
+{
+	struct ieee80211_hdr *hdr;
+
+	hdr = (struct ieee80211_hdr *)(skb->data + offset);
+	return !is_multicast_ether_addr(hdr->addr1);
+}
+
 static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar,
 					  struct sk_buff *skb,
 					  u16 peer_id,
@@ -1839,7 +1849,7 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 	bool is_decrypted;
 	bool is_mgmt;
 	u32 attention;
-	bool frag_pn_check = true;
+	bool frag_pn_check = true, multicast_check = true;
 
 	if (skb_queue_empty(amsdu))
 		return;
@@ -1946,13 +1956,20 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 								      0,
 								      enctype);
 
-		if (!frag_pn_check) {
-			/* Discard the fragment with invalid PN */
+		if (frag)
+			multicast_check = ath10k_htt_rx_h_frag_multicast_check(ar,
+									       msdu,
+									       0);
+
+		if (!frag_pn_check || !multicast_check) {
+			/* Discard the fragment with invalid PN or multicast DA
+			 */
 			temp = msdu->prev;
 			__skb_unlink(msdu, amsdu);
 			dev_kfree_skb_any(msdu);
 			msdu = temp;
 			frag_pn_check = true;
+			multicast_check = true;
 			continue;
 		}
 
-- 
GitLab


From 40e7462dad6f3d06efdb17d26539e61ab6e34db1 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:54 +0200
Subject: [PATCH 0383/3804] ath10k: drop fragments with multicast DA for SDIO

Fragmentation is not used with multicast frames. Discard unexpected
fragments with multicast DA. This fixes CVE-2020-26145.

Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00049

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.9ca6ca7945a9.I1e18b514590af17c155bda86699bc3a971a8dcf4@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index cb04848ed5cb6..b1d93ff5215a6 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2617,6 +2617,13 @@ static bool ath10k_htt_rx_proc_rx_frag_ind_hl(struct ath10k_htt *htt,
 	rx_desc = (struct htt_hl_rx_desc *)(skb->data + tot_hdr_len);
 	rx_desc_info = __le32_to_cpu(rx_desc->info);
 
+	hdr = (struct ieee80211_hdr *)((u8 *)rx_desc + rx_hl->fw_desc.len);
+
+	if (is_multicast_ether_addr(hdr->addr1)) {
+		/* Discard the fragment with multicast DA */
+		goto err;
+	}
+
 	if (!MS(rx_desc_info, HTT_RX_DESC_HL_INFO_ENCRYPTED)) {
 		spin_unlock_bh(&ar->data_lock);
 		return ath10k_htt_rx_proc_rx_ind_hl(htt, &resp->rx_ind_hl, skb,
@@ -2624,8 +2631,6 @@ static bool ath10k_htt_rx_proc_rx_frag_ind_hl(struct ath10k_htt *htt,
 						    HTT_RX_NON_TKIP_MIC);
 	}
 
-	hdr = (struct ieee80211_hdr *)((u8 *)rx_desc + rx_hl->fw_desc.len);
-
 	if (ieee80211_has_retry(hdr->frame_control))
 		goto err;
 
-- 
GitLab


From 079a108feba474b4b32bd3471db03e11f2f83b81 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:55 +0200
Subject: [PATCH 0384/3804] ath10k: drop MPDU which has discard flag set by
 firmware for SDIO

When the discard flag is set by the firmware for an MPDU, it should be
dropped. This allows a mitigation for CVE-2020-24588 to be implemented
in the firmware.

Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00049

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.11968c725b5c.Idd166365ebea2771c0c0a38c78b5060750f90e17@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c  |  5 +++++
 drivers/net/wireless/ath/ath10k/rx_desc.h | 14 +++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index b1d93ff5215a6..12451ab66a191 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2312,6 +2312,11 @@ static bool ath10k_htt_rx_proc_rx_ind_hl(struct ath10k_htt *htt,
 	fw_desc = &rx->fw_desc;
 	rx_desc_len = fw_desc->len;
 
+	if (fw_desc->u.bits.discard) {
+		ath10k_dbg(ar, ATH10K_DBG_HTT, "htt discard mpdu\n");
+		goto err;
+	}
+
 	/* I have not yet seen any case where num_mpdu_ranges > 1.
 	 * qcacld does not seem handle that case either, so we introduce the
 	 * same limitiation here as well.
diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h
index f2b6bf8f0d60d..705b6295e4663 100644
--- a/drivers/net/wireless/ath/ath10k/rx_desc.h
+++ b/drivers/net/wireless/ath/ath10k/rx_desc.h
@@ -1282,7 +1282,19 @@ struct fw_rx_desc_base {
 #define FW_RX_DESC_UDP              (1 << 6)
 
 struct fw_rx_desc_hl {
-	u8 info0;
+	union {
+		struct {
+		u8 discard:1,
+		   forward:1,
+		   any_err:1,
+		   dup_err:1,
+		   reserved:1,
+		   inspect:1,
+		   extension:2;
+		} bits;
+		u8 info0;
+	} u;
+
 	u8 version;
 	u8 len;
 	u8 flags;
-- 
GitLab


From 0dc267b13f3a7e8424a898815dd357211b737330 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 11 May 2021 20:02:56 +0200
Subject: [PATCH 0385/3804] ath10k: Fix TKIP Michael MIC verification for PCIe

TKIP Michael MIC was not verified properly for PCIe cases since the
validation steps in ieee80211_rx_h_michael_mic_verify() in mac80211 did
not get fully executed due to unexpected flag values in
ieee80211_rx_status.

Fix this by setting the flags property to meet mac80211 expectations for
performing Michael MIC validation there. This fixes CVE-2020-26141. It
does the same as ath10k_htt_rx_proc_rx_ind_hl() for SDIO which passed
MIC verification case. This applies only to QCA6174/QCA9377 PCIe.

Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1

Cc: stable@vger.kernel.org
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.c3f1d42c6746.I795593fcaae941c471425b8c7d5f7bb185d29142@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 12451ab66a191..87196f9bbdea3 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -1974,6 +1974,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 		}
 
 		ath10k_htt_rx_h_csum_offload(msdu);
+
+		if (frag && !fill_crypt_header &&
+		    enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA)
+			status->flag &= ~RX_FLAG_MMIC_STRIPPED;
+
 		ath10k_htt_rx_h_undecap(ar, msdu, status, first_hdr, enctype,
 					is_decrypted);
 
@@ -1991,6 +1996,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar,
 
 		hdr = (void *)msdu->data;
 		hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+
+		if (frag && !fill_crypt_header &&
+		    enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA)
+			status->flag &= ~RX_FLAG_IV_STRIPPED &
+					~RX_FLAG_MMIC_STRIPPED;
 	}
 }
 
-- 
GitLab


From 62a8ff67eba52dae9b107e1fb8827054ed00a265 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Tue, 11 May 2021 20:02:57 +0200
Subject: [PATCH 0386/3804] ath10k: Validate first subframe of A-MSDU before
 processing the list

In certain scenarios a normal MSDU can be received as an A-MSDU when
the A-MSDU present bit of a QoS header gets flipped during reception.
Since this bit is unauthenticated, the hardware crypto engine can pass
the frame to the driver without any error indication.

This could result in processing unintended subframes collected in the
A-MSDU list. Hence, validate A-MSDU list by checking if the first frame
has a valid subframe header.

Comparing the non-aggregated MSDU and an A-MSDU, the fields of the first
subframe DA matches the LLC/SNAP header fields of a normal MSDU.
In order to avoid processing such frames, add a validation to
filter such A-MSDU frames where the first subframe header DA matches
with the LLC/SNAP header pattern.

Tested-on: QCA9984 hw1.0 PCI 10.4-3.10-00047

Cc: stable@vger.kernel.org
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.e6f5eb7b9847.I38a77ae26096862527a5eab73caebd7346af8b66@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/htt_rx.c | 61 ++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 87196f9bbdea3..7ffb5d5b2a70e 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -2108,14 +2108,62 @@ static void ath10k_htt_rx_h_unchain(struct ath10k *ar,
 	ath10k_unchain_msdu(amsdu, unchain_cnt);
 }
 
+static bool ath10k_htt_rx_validate_amsdu(struct ath10k *ar,
+					 struct sk_buff_head *amsdu)
+{
+	u8 *subframe_hdr;
+	struct sk_buff *first;
+	bool is_first, is_last;
+	struct htt_rx_desc *rxd;
+	struct ieee80211_hdr *hdr;
+	size_t hdr_len, crypto_len;
+	enum htt_rx_mpdu_encrypt_type enctype;
+	int bytes_aligned = ar->hw_params.decap_align_bytes;
+
+	first = skb_peek(amsdu);
+
+	rxd = (void *)first->data - sizeof(*rxd);
+	hdr = (void *)rxd->rx_hdr_status;
+
+	is_first = !!(rxd->msdu_end.common.info0 &
+		      __cpu_to_le32(RX_MSDU_END_INFO0_FIRST_MSDU));
+	is_last = !!(rxd->msdu_end.common.info0 &
+		     __cpu_to_le32(RX_MSDU_END_INFO0_LAST_MSDU));
+
+	/* Return in case of non-aggregated msdu */
+	if (is_first && is_last)
+		return true;
+
+	/* First msdu flag is not set for the first msdu of the list */
+	if (!is_first)
+		return false;
+
+	enctype = MS(__le32_to_cpu(rxd->mpdu_start.info0),
+		     RX_MPDU_START_INFO0_ENCRYPT_TYPE);
+
+	hdr_len = ieee80211_hdrlen(hdr->frame_control);
+	crypto_len = ath10k_htt_rx_crypto_param_len(ar, enctype);
+
+	subframe_hdr = (u8 *)hdr + round_up(hdr_len, bytes_aligned) +
+		       crypto_len;
+
+	/* Validate if the amsdu has a proper first subframe.
+	 * There are chances a single msdu can be received as amsdu when
+	 * the unauthenticated amsdu flag of a QoS header
+	 * gets flipped in non-SPP AMSDU's, in such cases the first
+	 * subframe has llc/snap header in place of a valid da.
+	 * return false if the da matches rfc1042 pattern
+	 */
+	if (ether_addr_equal(subframe_hdr, rfc1042_header))
+		return false;
+
+	return true;
+}
+
 static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
 					struct sk_buff_head *amsdu,
 					struct ieee80211_rx_status *rx_status)
 {
-	/* FIXME: It might be a good idea to do some fuzzy-testing to drop
-	 * invalid/dangerous frames.
-	 */
-
 	if (!rx_status->freq) {
 		ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring frame(s)!\n");
 		return false;
@@ -2126,6 +2174,11 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar,
 		return false;
 	}
 
+	if (!ath10k_htt_rx_validate_amsdu(ar, amsdu)) {
+		ath10k_dbg(ar, ATH10K_DBG_HTT, "invalid amsdu received\n");
+		return false;
+	}
+
 	return true;
 }
 
-- 
GitLab


From c3944a5621026c176001493d48ee66ff94e1a39a Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Tue, 11 May 2021 20:02:58 +0200
Subject: [PATCH 0387/3804] ath11k: Clear the fragment cache during key install

Currently the fragment cache setup during peer assoc is
cleared only during peer delete. In case a key reinstallation
happens with the same peer, the same fragment cache with old
fragments added before key installation could be clubbed
with fragments received after. This might be exploited
to mix fragments of different data resulting in a proper
unintended reassembled packet to be passed up the stack.

Hence flush the fragment cache on every key installation to prevent
potential attacks (CVE-2020-24587).

Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01734-QCAHKSWPL_SILICONZ-1 v2

Cc: stable@vger.kernel.org
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.218dc777836f.I9af6fc76215a35936c4152552018afb5079c5d8c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 18 ++++++++++++++++++
 drivers/net/wireless/ath/ath11k/dp_rx.h |  1 +
 drivers/net/wireless/ath/ath11k/mac.c   |  6 ++++++
 3 files changed, 25 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 1d9aa1bb6b6e9..3382f8bfcb48d 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -852,6 +852,24 @@ static void ath11k_dp_rx_frags_cleanup(struct dp_rx_tid *rx_tid, bool rel_link_d
 	__skb_queue_purge(&rx_tid->rx_frags);
 }
 
+void ath11k_peer_frags_flush(struct ath11k *ar, struct ath11k_peer *peer)
+{
+	struct dp_rx_tid *rx_tid;
+	int i;
+
+	lockdep_assert_held(&ar->ab->base_lock);
+
+	for (i = 0; i <= IEEE80211_NUM_TIDS; i++) {
+		rx_tid = &peer->rx_tid[i];
+
+		spin_unlock_bh(&ar->ab->base_lock);
+		del_timer_sync(&rx_tid->frag_timer);
+		spin_lock_bh(&ar->ab->base_lock);
+
+		ath11k_dp_rx_frags_cleanup(rx_tid, true);
+	}
+}
+
 void ath11k_peer_rx_tid_cleanup(struct ath11k *ar, struct ath11k_peer *peer)
 {
 	struct dp_rx_tid *rx_tid;
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.h b/drivers/net/wireless/ath/ath11k/dp_rx.h
index bf399312b5ff5..623da3bf9dc81 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.h
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.h
@@ -49,6 +49,7 @@ int ath11k_dp_peer_rx_pn_replay_config(struct ath11k_vif *arvif,
 				       const u8 *peer_addr,
 				       enum set_key_cmd key_cmd,
 				       struct ieee80211_key_conf *key);
+void ath11k_peer_frags_flush(struct ath11k *ar, struct ath11k_peer *peer);
 void ath11k_peer_rx_tid_cleanup(struct ath11k *ar, struct ath11k_peer *peer);
 void ath11k_peer_rx_tid_delete(struct ath11k *ar,
 			       struct ath11k_peer *peer, u8 tid);
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 4df425dd31a26..9d0ff150ec30f 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -2779,6 +2779,12 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	 */
 	spin_lock_bh(&ab->base_lock);
 	peer = ath11k_peer_find(ab, arvif->vdev_id, peer_addr);
+
+	/* flush the fragments cache during key (re)install to
+	 * ensure all frags in the new frag list belong to the same key.
+	 */
+	if (peer && cmd == SET_KEY)
+		ath11k_peer_frags_flush(ar, peer);
 	spin_unlock_bh(&ab->base_lock);
 
 	if (!peer) {
-- 
GitLab


From 210f563b097997ce917e82feab356b298bfd12b0 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Tue, 11 May 2021 20:02:59 +0200
Subject: [PATCH 0388/3804] ath11k: Drop multicast fragments

Fragmentation is used only with unicast frames. Drop multicast fragments
to avoid any undesired behavior.

Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01734-QCAHKSWPL_SILICONZ-1 v2

Cc: stable@vger.kernel.org
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20210511200110.1d53bfd20a8b.Ibb63283051bb5e2c45951932c6e1f351d5a73dc3@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 3382f8bfcb48d..603d2f93ac18f 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -260,6 +260,16 @@ static void ath11k_dp_rxdesc_set_msdu_len(struct ath11k_base *ab,
 	ab->hw_params.hw_ops->rx_desc_set_msdu_len(desc, len);
 }
 
+static bool ath11k_dp_rx_h_attn_is_mcbc(struct ath11k_base *ab,
+					struct hal_rx_desc *desc)
+{
+	struct rx_attention *attn = ath11k_dp_rx_get_attention(ab, desc);
+
+	return ath11k_dp_rx_h_msdu_end_first_msdu(ab, desc) &&
+		(!!FIELD_GET(RX_ATTENTION_INFO1_MCAST_BCAST,
+		 __le32_to_cpu(attn->info1)));
+}
+
 static void ath11k_dp_service_mon_ring(struct timer_list *t)
 {
 	struct ath11k_base *ab = from_timer(ab, t, mon_reap_timer);
@@ -3468,6 +3478,7 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar,
 	u8 tid;
 	int ret = 0;
 	bool more_frags;
+	bool is_mcbc;
 
 	rx_desc = (struct hal_rx_desc *)msdu->data;
 	peer_id = ath11k_dp_rx_h_mpdu_start_peer_id(ar->ab, rx_desc);
@@ -3475,6 +3486,11 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar,
 	seqno = ath11k_dp_rx_h_mpdu_start_seq_no(ar->ab, rx_desc);
 	frag_no = ath11k_dp_rx_h_mpdu_start_frag_no(ar->ab, msdu);
 	more_frags = ath11k_dp_rx_h_mpdu_start_more_frags(ar->ab, msdu);
+	is_mcbc = ath11k_dp_rx_h_attn_is_mcbc(ar->ab, rx_desc);
+
+	/* Multicast/Broadcast fragments are not expected */
+	if (is_mcbc)
+		return -EINVAL;
 
 	if (!ath11k_dp_rx_h_mpdu_start_seq_ctrl_valid(ar->ab, rx_desc) ||
 	    !ath11k_dp_rx_h_mpdu_start_fc_valid(ar->ab, rx_desc) ||
-- 
GitLab


From 875d598db60ac81e768fdfd2c589f6209038488b Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 11 May 2021 18:34:13 +0200
Subject: [PATCH 0389/3804] MAINTAINERS: Update address for Emma Anholt

Reviewed-by: Emma Anholt <emma@anholt.net>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 MAINTAINERS | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..38a1e3bf5af08 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5639,7 +5639,7 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/sun4i/sun8i*
 
 DRM DRIVER FOR ARM PL111 CLCD
-M:	Eric Anholt <eric@anholt.net>
+M:	Emma Anholt <emma@anholt.net>
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/pl111/
@@ -5719,7 +5719,7 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	drivers/gpu/drm/tiny/gm12u320.c
 
 DRM DRIVER FOR HX8357D PANELS
-M:	Eric Anholt <eric@anholt.net>
+M:	Emma Anholt <emma@anholt.net>
 S:	Maintained
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/display/himax,hx8357d.txt
@@ -6177,7 +6177,7 @@ F:	Documentation/devicetree/bindings/display/ti/
 F:	drivers/gpu/drm/omapdrm/
 
 DRM DRIVERS FOR V3D
-M:	Eric Anholt <eric@anholt.net>
+M:	Emma Anholt <emma@anholt.net>
 S:	Supported
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 F:	Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
@@ -6185,7 +6185,7 @@ F:	drivers/gpu/drm/v3d/
 F:	include/uapi/drm/v3d_drm.h
 
 DRM DRIVERS FOR VC4
-M:	Eric Anholt <eric@anholt.net>
+M:	Emma Anholt <emma@anholt.net>
 M:	Maxime Ripard <mripard@kernel.org>
 S:	Supported
 T:	git git://github.com/anholt/linux
-- 
GitLab


From e09784a8a751e539dffc94d43bc917b0ac1e934a Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Tue, 11 May 2021 03:45:16 +0200
Subject: [PATCH 0390/3804] alarmtimer: Check RTC features instead of ops

RTC drivers used to leave .set_alarm() NULL in order to signal the RTC
device doesn't support alarms. The drivers are now clearing the
RTC_FEATURE_ALARM bit for that purpose in order to keep the rtc_class_ops
structure const. So now, .set_alarm() is set unconditionally and this
possibly causes the alarmtimer code to select an RTC device that doesn't
support alarms.

Test RTC_FEATURE_ALARM instead of relying on ops->set_alarm to determine
whether alarms are available.

Fixes: 7ae41220ef58 ("rtc: introduce features bitfield")
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210511014516.563031-1-alexandre.belloni@bootlin.com
---
 kernel/time/alarmtimer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index bea9d08b16988..5897828b9d7ed 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -92,7 +92,7 @@ static int alarmtimer_rtc_add_device(struct device *dev,
 	if (rtcdev)
 		return -EBUSY;
 
-	if (!rtc->ops->set_alarm)
+	if (!test_bit(RTC_FEATURE_ALARM, rtc->features))
 		return -1;
 	if (!device_may_wakeup(rtc->dev.parent))
 		return -1;
-- 
GitLab


From 0bd50826a40e012a35c58ed3576b3873643e7a7d Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 6 May 2021 15:08:24 +0800
Subject: [PATCH 0391/3804] leds: Fix reference file name of documentation

In commit 56b01acc1c79a ("dt-bindings: gpio: fairchild,74hc595:
Convert to json-schema"), gpio-74x164.txt was deleted and replaced
by fairchild,74hc595.yaml. Fix the reference file name.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Link: https://lore.kernel.org/r/20210506070824.10965-1-wanjiabing@vivo.com
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/leds/leds-bcm6328.txt | 4 ++--
 Documentation/devicetree/bindings/leds/leds-bcm6358.txt | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt
index ccebce597f372..a555d94084b7f 100644
--- a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt
+++ b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt
@@ -4,8 +4,8 @@ This controller is present on BCM6318, BCM6328, BCM6362 and BCM63268.
 In these SoCs it's possible to control LEDs both as GPIOs or by hardware.
 However, on some devices there are Serial LEDs (LEDs connected to a 74x164
 controller), which can either be controlled by software (exporting the 74x164
-as spi-gpio. See Documentation/devicetree/bindings/gpio/gpio-74x164.txt), or
-by hardware using this driver.
+as spi-gpio. See Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml),
+or by hardware using this driver.
 Some of these Serial LEDs are hardware controlled (e.g. ethernet LEDs) and
 exporting the 74x164 as spi-gpio prevents those LEDs to be hardware
 controlled, so the only chance to keep them working is by using this driver.
diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
index da5708e7b43b9..6e51c6b91ee54 100644
--- a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
+++ b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
@@ -3,7 +3,7 @@ LEDs connected to Broadcom BCM6358 controller
 This controller is present on BCM6358 and BCM6368.
 In these SoCs there are Serial LEDs (LEDs connected to a 74x164 controller),
 which can either be controlled by software (exporting the 74x164 as spi-gpio.
-See Documentation/devicetree/bindings/gpio/gpio-74x164.txt), or
+See Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml), or
 by hardware using this driver.
 
 Required properties:
-- 
GitLab


From 67f29896fdc83298eed5a6576ff8f9873f709228 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 11 May 2021 10:26:03 +0300
Subject: [PATCH 0392/3804] RDMA/rxe: Clear all QP fields if creation failed

rxe_qp_do_cleanup() relies on valid pointer values in QP for the properly
created ones, but in case rxe_qp_from_init() failed it was filled with
garbage and caused tot the following error.

  refcount_t: underflow; use-after-free.
  WARNING: CPU: 1 PID: 12560 at lib/refcount.c:28 refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28
  Modules linked in:
  CPU: 1 PID: 12560 Comm: syz-executor.4 Not tainted 5.12.0-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  RIP: 0010:refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28
  Code: e9 db fe ff ff 48 89 df e8 2c c2 ea fd e9 8a fe ff ff e8 72 6a a7 fd 48 c7 c7 e0 b2 c1 89 c6 05 dc 3a e6 09 01 e8 ee 74 fb 04 <0f> 0b e9 af fe ff ff 0f 1f 84 00 00 00 00 00 41 56 41 55 41 54 55
  RSP: 0018:ffffc900097ceba8 EFLAGS: 00010286
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
  RDX: 0000000000040000 RSI: ffffffff815bb075 RDI: fffff520012f9d67
  RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000
  R10: ffffffff815b4eae R11: 0000000000000000 R12: ffff8880322a4800
  R13: ffff8880322a4940 R14: ffff888033044e00 R15: 0000000000000000
  FS:  00007f6eb2be3700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fdbe5d41000 CR3: 000000001d181000 CR4: 00000000001506e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   __refcount_sub_and_test include/linux/refcount.h:283 [inline]
   __refcount_dec_and_test include/linux/refcount.h:315 [inline]
   refcount_dec_and_test include/linux/refcount.h:333 [inline]
   kref_put include/linux/kref.h:64 [inline]
   rxe_qp_do_cleanup+0x96f/0xaf0 drivers/infiniband/sw/rxe/rxe_qp.c:805
   execute_in_process_context+0x37/0x150 kernel/workqueue.c:3327
   rxe_elem_release+0x9f/0x180 drivers/infiniband/sw/rxe/rxe_pool.c:391
   kref_put include/linux/kref.h:65 [inline]
   rxe_create_qp+0x2cd/0x310 drivers/infiniband/sw/rxe/rxe_verbs.c:425
   _ib_create_qp drivers/infiniband/core/core_priv.h:331 [inline]
   ib_create_named_qp+0x2ad/0x1370 drivers/infiniband/core/verbs.c:1231
   ib_create_qp include/rdma/ib_verbs.h:3644 [inline]
   create_mad_qp+0x177/0x2d0 drivers/infiniband/core/mad.c:2920
   ib_mad_port_open drivers/infiniband/core/mad.c:3001 [inline]
   ib_mad_init_device+0xd6f/0x1400 drivers/infiniband/core/mad.c:3092
   add_client_context+0x405/0x5e0 drivers/infiniband/core/device.c:717
   enable_device_and_get+0x1cd/0x3b0 drivers/infiniband/core/device.c:1331
   ib_register_device drivers/infiniband/core/device.c:1413 [inline]
   ib_register_device+0x7c7/0xa50 drivers/infiniband/core/device.c:1365
   rxe_register_device+0x3d5/0x4a0 drivers/infiniband/sw/rxe/rxe_verbs.c:1147
   rxe_add+0x12fe/0x16d0 drivers/infiniband/sw/rxe/rxe.c:247
   rxe_net_add+0x8c/0xe0 drivers/infiniband/sw/rxe/rxe_net.c:503
   rxe_newlink drivers/infiniband/sw/rxe/rxe.c:269 [inline]
   rxe_newlink+0xb7/0xe0 drivers/infiniband/sw/rxe/rxe.c:250
   nldev_newlink+0x30e/0x550 drivers/infiniband/core/nldev.c:1555
   rdma_nl_rcv_msg+0x36d/0x690 drivers/infiniband/core/netlink.c:195
   rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline]
   rdma_nl_rcv+0x2ee/0x430 drivers/infiniband/core/netlink.c:259
   netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline]
   netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338
   netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927
   sock_sendmsg_nosec net/socket.c:654 [inline]
   sock_sendmsg+0xcf/0x120 net/socket.c:674
   ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350
   ___sys_sendmsg+0xf3/0x170 net/socket.c:2404
   __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433
   do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/7bf8d548764d406dbbbaf4b574960ebfd5af8387.1620717918.git.leonro@nvidia.com
Reported-by: syzbot+36a7f280de4e11c6f04e@syzkaller.appspotmail.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_qp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 34ae957a315ca..b0f350d674fdb 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -242,6 +242,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
 	if (err) {
 		vfree(qp->sq.queue->buf);
 		kfree(qp->sq.queue);
+		qp->sq.queue = NULL;
 		return err;
 	}
 
@@ -295,6 +296,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
 		if (err) {
 			vfree(qp->rq.queue->buf);
 			kfree(qp->rq.queue);
+			qp->rq.queue = NULL;
 			return err;
 		}
 	}
@@ -355,6 +357,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
 err2:
 	rxe_queue_cleanup(qp->sq.queue);
 err1:
+	qp->pd = NULL;
+	qp->rcq = NULL;
+	qp->scq = NULL;
+	qp->srq = NULL;
+
 	if (srq)
 		rxe_drop_ref(srq);
 	rxe_drop_ref(scq);
-- 
GitLab


From b24abcff918a5cbf44b0c982bd3477a93e8e4911 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 11 May 2021 22:35:16 +0200
Subject: [PATCH 0393/3804] bpf, kconfig: Add consolidated menu entry for bpf
 with core options

Right now, all core BPF related options are scattered in different Kconfig
locations mainly due to historic reasons. Moving forward, lets add a proper
subsystem entry under ...

  General setup  --->
    BPF subsystem  --->

... in order to have all knobs in a single location and thus ease BPF related
configuration. Networking related bits such as sockmap are out of scope for
the general setup and therefore better suited to remain in net/Kconfig.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/f23f58765a4d59244ebd8037da7b6a6b2fb58446.1620765074.git.daniel@iogearbox.net
---
 init/Kconfig       | 41 +-----------------------
 kernel/bpf/Kconfig | 78 ++++++++++++++++++++++++++++++++++++++++++++++
 net/Kconfig        | 27 ----------------
 3 files changed, 79 insertions(+), 67 deletions(-)
 create mode 100644 kernel/bpf/Kconfig

diff --git a/init/Kconfig b/init/Kconfig
index ca559ccdaa324..2282a6842dc68 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -439,6 +439,7 @@ config AUDITSYSCALL
 
 source "kernel/irq/Kconfig"
 source "kernel/time/Kconfig"
+source "kernel/bpf/Kconfig"
 source "kernel/Kconfig.preempt"
 
 menu "CPU/Task time and stats accounting"
@@ -1705,46 +1706,6 @@ config KALLSYMS_BASE_RELATIVE
 
 # syscall, maps, verifier
 
-config BPF_LSM
-	bool "LSM Instrumentation with BPF"
-	depends on BPF_EVENTS
-	depends on BPF_SYSCALL
-	depends on SECURITY
-	depends on BPF_JIT
-	help
-	  Enables instrumentation of the security hooks with eBPF programs for
-	  implementing dynamic MAC and Audit Policies.
-
-	  If you are unsure how to answer this question, answer N.
-
-config BPF_SYSCALL
-	bool "Enable bpf() system call"
-	select BPF
-	select IRQ_WORK
-	select TASKS_TRACE_RCU
-	select BINARY_PRINTF
-	select NET_SOCK_MSG if INET
-	default n
-	help
-	  Enable the bpf() system call that allows to manipulate eBPF
-	  programs and maps via file descriptors.
-
-config ARCH_WANT_DEFAULT_BPF_JIT
-	bool
-
-config BPF_JIT_ALWAYS_ON
-	bool "Permanently enable BPF JIT and remove BPF interpreter"
-	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
-	help
-	  Enables BPF JIT and removes BPF interpreter to avoid
-	  speculative execution of BPF instructions by the interpreter
-
-config BPF_JIT_DEFAULT_ON
-	def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
-	depends on HAVE_EBPF_JIT && BPF_JIT
-
-source "kernel/bpf/preload/Kconfig"
-
 config USERFAULTFD
 	bool "Enable userfaultfd() system call"
 	depends on MMU
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
new file mode 100644
index 0000000000000..b4edaefc62555
--- /dev/null
+++ b/kernel/bpf/Kconfig
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+# BPF interpreter that, for example, classic socket filters depend on.
+config BPF
+	bool
+
+# Used by archs to tell that they support BPF JIT compiler plus which
+# flavour. Only one of the two can be selected for a specific arch since
+# eBPF JIT supersedes the cBPF JIT.
+
+# Classic BPF JIT (cBPF)
+config HAVE_CBPF_JIT
+	bool
+
+# Extended BPF JIT (eBPF)
+config HAVE_EBPF_JIT
+	bool
+
+# Used by archs to tell that they want the BPF JIT compiler enabled by
+# default for kernels that were compiled with BPF JIT support.
+config ARCH_WANT_DEFAULT_BPF_JIT
+	bool
+
+menu "BPF subsystem"
+
+config BPF_SYSCALL
+	bool "Enable bpf() system call"
+	select BPF
+	select IRQ_WORK
+	select TASKS_TRACE_RCU
+	select BINARY_PRINTF
+	select NET_SOCK_MSG if INET
+	default n
+	help
+	  Enable the bpf() system call that allows to manipulate BPF programs
+	  and maps via file descriptors.
+
+config BPF_JIT
+	bool "Enable BPF Just In Time compiler"
+	depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
+	depends on MODULES
+	help
+	  BPF programs are normally handled by a BPF interpreter. This option
+	  allows the kernel to generate native code when a program is loaded
+	  into the kernel. This will significantly speed-up processing of BPF
+	  programs.
+
+	  Note, an admin should enable this feature changing:
+	  /proc/sys/net/core/bpf_jit_enable
+	  /proc/sys/net/core/bpf_jit_harden   (optional)
+	  /proc/sys/net/core/bpf_jit_kallsyms (optional)
+
+config BPF_JIT_ALWAYS_ON
+	bool "Permanently enable BPF JIT and remove BPF interpreter"
+	depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
+	help
+	  Enables BPF JIT and removes BPF interpreter to avoid speculative
+	  execution of BPF instructions by the interpreter.
+
+config BPF_JIT_DEFAULT_ON
+	def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
+	depends on HAVE_EBPF_JIT && BPF_JIT
+
+source "kernel/bpf/preload/Kconfig"
+
+config BPF_LSM
+	bool "Enable BPF LSM Instrumentation"
+	depends on BPF_EVENTS
+	depends on BPF_SYSCALL
+	depends on SECURITY
+	depends on BPF_JIT
+	help
+	  Enables instrumentation of the security hooks with BPF programs for
+	  implementing dynamic MAC and Audit Policies.
+
+	  If you are unsure how to answer this question, answer N.
+
+endmenu # "BPF subsystem"
diff --git a/net/Kconfig b/net/Kconfig
index f5ee7c65e6b4b..c7392c449b254 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -302,21 +302,6 @@ config BQL
 	select DQL
 	default y
 
-config BPF_JIT
-	bool "enable BPF Just In Time compiler"
-	depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
-	depends on MODULES
-	help
-	  Berkeley Packet Filter filtering capabilities are normally handled
-	  by an interpreter. This option allows kernel to generate a native
-	  code when filter is loaded in memory. This should speedup
-	  packet sniffing (libpcap/tcpdump).
-
-	  Note, admin should enable this feature changing:
-	  /proc/sys/net/core/bpf_jit_enable
-	  /proc/sys/net/core/bpf_jit_harden   (optional)
-	  /proc/sys/net/core/bpf_jit_kallsyms (optional)
-
 config BPF_STREAM_PARSER
 	bool "enable BPF STREAM_PARSER"
 	depends on INET
@@ -470,15 +455,3 @@ config ETHTOOL_NETLINK
 	  e.g. notification messages.
 
 endif   # if NET
-
-# Used by archs to tell that they support BPF JIT compiler plus which flavour.
-# Only one of the two can be selected for a specific arch since eBPF JIT supersedes
-# the cBPF JIT.
-
-# Classic BPF JIT (cBPF)
-config HAVE_CBPF_JIT
-	bool
-
-# Extended BPF JIT (eBPF)
-config HAVE_EBPF_JIT
-	bool
-- 
GitLab


From 08389d888287c3823f80b0216766b71e17f0aba5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 11 May 2021 22:35:17 +0200
Subject: [PATCH 0394/3804] bpf: Add kconfig knob for disabling unpriv bpf by
 default

Add a kconfig knob which allows for unprivileged bpf to be disabled by default.
If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2.

This still allows a transition of 2 -> {0,1} through an admin. Similarly,
this also still keeps 1 -> {1} behavior intact, so that once set to permanently
disabled, it cannot be undone aside from a reboot.

We've also added extra2 with max of 2 for the procfs handler, so that an admin
still has a chance to toggle between 0 <-> 2.

Either way, as an additional alternative, applications can make use of CAP_BPF
that we added a while ago.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.1620765074.git.daniel@iogearbox.net
---
 Documentation/admin-guide/sysctl/kernel.rst | 17 +++++++++---
 kernel/bpf/Kconfig                          | 10 +++++++
 kernel/bpf/syscall.c                        |  3 ++-
 kernel/sysctl.c                             | 29 +++++++++++++++++----
 4 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 1d56a6b73a4e9..24ab20d7a50ad 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1457,11 +1457,22 @@ unprivileged_bpf_disabled
 =========================
 
 Writing 1 to this entry will disable unprivileged calls to ``bpf()``;
-once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return
-``-EPERM``.
+once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` or ``CAP_BPF``
+will return ``-EPERM``. Once set to 1, this can't be cleared from the
+running kernel anymore.
 
-Once set, this can't be cleared.
+Writing 2 to this entry will also disable unprivileged calls to ``bpf()``,
+however, an admin can still change this setting later on, if needed, by
+writing 0 or 1 to this entry.
 
+If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this
+entry will default to 2 instead of 0.
+
+= =============================================================
+0 Unprivileged calls to ``bpf()`` are enabled
+1 Unprivileged calls to ``bpf()`` are disabled without recovery
+2 Unprivileged calls to ``bpf()`` are disabled
+= =============================================================
 
 watchdog
 ========
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index b4edaefc62555..26b591e23f16a 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -61,6 +61,16 @@ config BPF_JIT_DEFAULT_ON
 	def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
 	depends on HAVE_EBPF_JIT && BPF_JIT
 
+config BPF_UNPRIV_DEFAULT_OFF
+	bool "Disable unprivileged BPF by default"
+	depends on BPF_SYSCALL
+	help
+	  Disables unprivileged BPF by default by setting the corresponding
+	  /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+	  still reenable it by setting it to 0 later on, or permanently
+	  disable it by setting it to 1 (from which no other transition to
+	  0 is possible anymore).
+
 source "kernel/bpf/preload/Kconfig"
 
 config BPF_LSM
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 941ca06d9dfa1..ea04b0deb5ce4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(map_idr_lock);
 static DEFINE_IDR(link_idr);
 static DEFINE_SPINLOCK(link_idr_lock);
 
-int sysctl_unprivileged_bpf_disabled __read_mostly;
+int sysctl_unprivileged_bpf_disabled __read_mostly =
+	IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
 
 static const struct bpf_map_ops * const bpf_map_types[] = {
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f91d327273c1b..6df7c81f7cdd1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -225,7 +225,27 @@ static int bpf_stats_handler(struct ctl_table *table, int write,
 	mutex_unlock(&bpf_stats_enabled_mutex);
 	return ret;
 }
-#endif
+
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+			      void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret, unpriv_enable = *(int *)table->data;
+	bool locked_state = unpriv_enable == 1;
+	struct ctl_table tmp = *table;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	tmp.data = &unpriv_enable;
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret) {
+		if (locked_state && unpriv_enable != 1)
+			return -EPERM;
+		*(int *)table->data = unpriv_enable;
+	}
+	return ret;
+}
+#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
 
 /*
  * /proc/sys support
@@ -2600,10 +2620,9 @@ static struct ctl_table kern_table[] = {
 		.data		= &sysctl_unprivileged_bpf_disabled,
 		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
 		.mode		= 0644,
-		/* only handle a transition from default "0" to "1" */
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= SYSCTL_ONE,
+		.proc_handler	= bpf_unpriv_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
 	},
 	{
 		.procname	= "bpf_stats_enabled",
-- 
GitLab


From 35e3815fa8102fab4dee75f3547472c66581125d Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 29 Apr 2021 13:47:12 +0200
Subject: [PATCH 0395/3804] bpf: Add deny list of btf ids check for tracing
 programs

The recursion check in __bpf_prog_enter and __bpf_prog_exit
leaves some (not inlined) functions unprotected:

In __bpf_prog_enter:
  - migrate_disable is called before prog->active is checked

In __bpf_prog_exit:
  - migrate_enable,rcu_read_unlock_strict are called after
    prog->active is decreased

When attaching trampoline to them we get panic like:

  traps: PANIC: double fault, error_code: 0x0
  double fault: 0000 [#1] SMP PTI
  RIP: 0010:__bpf_prog_enter+0x4/0x50
  ...
  Call Trace:
   <IRQ>
   bpf_trampoline_6442466513_0+0x18/0x1000
   migrate_disable+0x5/0x50
   __bpf_prog_enter+0x9/0x50
   bpf_trampoline_6442466513_0+0x18/0x1000
   migrate_disable+0x5/0x50
   __bpf_prog_enter+0x9/0x50
   bpf_trampoline_6442466513_0+0x18/0x1000
   migrate_disable+0x5/0x50
   __bpf_prog_enter+0x9/0x50
   bpf_trampoline_6442466513_0+0x18/0x1000
   migrate_disable+0x5/0x50
   ...

Fixing this by adding deny list of btf ids for tracing
programs and checking btf id during program verification.
Adding above functions to this list.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210429114712.43783-1-jolsa@kernel.org
---
 kernel/bpf/verifier.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9352a1b7de2dd..c58598ef4b5b5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13196,6 +13196,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 	return 0;
 }
 
+BTF_SET_START(btf_id_deny)
+BTF_ID_UNUSED
+#ifdef CONFIG_SMP
+BTF_ID(func, migrate_disable)
+BTF_ID(func, migrate_enable)
+#endif
+#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
+BTF_ID(func, rcu_read_unlock_strict)
+#endif
+BTF_SET_END(btf_id_deny)
+
 static int check_attach_btf_id(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog;
@@ -13255,6 +13266,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 		ret = bpf_lsm_verify_prog(&env->log, prog);
 		if (ret < 0)
 			return ret;
+	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
+		   btf_id_set_contains(&btf_id_deny, btf_id)) {
+		return -EINVAL;
 	}
 
 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
-- 
GitLab


From e2d5b2bb769fa5f500760caba76436ba3a10a895 Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Tue, 11 May 2021 10:10:54 +0200
Subject: [PATCH 0396/3804] bpf: Fix nested bpf_bprintf_prepare with more
 per-cpu buffers

The bpf_seq_printf, bpf_trace_printk and bpf_snprintf helpers share one
per-cpu buffer that they use to store temporary data (arguments to
bprintf). They "get" that buffer with try_get_fmt_tmp_buf and "put" it
by the end of their scope with bpf_bprintf_cleanup.

If one of these helpers gets called within the scope of one of these
helpers, for example: a first bpf program gets called, uses
bpf_trace_printk which calls raw_spin_lock_irqsave which is traced by
another bpf program that calls bpf_snprintf, then the second "get"
fails. Essentially, these helpers are not re-entrant. They would return
-EBUSY and print a warning message once.

This patch triples the number of bprintf buffers to allow three levels
of nesting. This is very similar to what was done for tracepoints in
"9594dc3c7e7 bpf: fix nested bpf tracepoints with per-cpu data"

Fixes: d9c9e4db186a ("bpf: Factorize bpf_trace_printk and bpf_seq_printf")
Reported-by: syzbot+63122d0bc347f18c1884@syzkaller.appspotmail.com
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210511081054.2125874-1-revest@chromium.org
---
 kernel/bpf/helpers.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 544773970dbc6..ef658a9ea5c93 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -696,34 +696,35 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
  */
 #define MAX_PRINTF_BUF_LEN	512
 
-struct bpf_printf_buf {
-	char tmp_buf[MAX_PRINTF_BUF_LEN];
+/* Support executing three nested bprintf helper calls on a given CPU */
+struct bpf_bprintf_buffers {
+	char tmp_bufs[3][MAX_PRINTF_BUF_LEN];
 };
-static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
-static DEFINE_PER_CPU(int, bpf_printf_buf_used);
+static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
+static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
 
 static int try_get_fmt_tmp_buf(char **tmp_buf)
 {
-	struct bpf_printf_buf *bufs;
-	int used;
+	struct bpf_bprintf_buffers *bufs;
+	int nest_level;
 
 	preempt_disable();
-	used = this_cpu_inc_return(bpf_printf_buf_used);
-	if (WARN_ON_ONCE(used > 1)) {
-		this_cpu_dec(bpf_printf_buf_used);
+	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
+	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
+		this_cpu_dec(bpf_bprintf_nest_level);
 		preempt_enable();
 		return -EBUSY;
 	}
-	bufs = this_cpu_ptr(&bpf_printf_buf);
-	*tmp_buf = bufs->tmp_buf;
+	bufs = this_cpu_ptr(&bpf_bprintf_bufs);
+	*tmp_buf = bufs->tmp_bufs[nest_level - 1];
 
 	return 0;
 }
 
 void bpf_bprintf_cleanup(void)
 {
-	if (this_cpu_read(bpf_printf_buf_used)) {
-		this_cpu_dec(bpf_printf_buf_used);
+	if (this_cpu_read(bpf_bprintf_nest_level)) {
+		this_cpu_dec(bpf_bprintf_nest_level);
 		preempt_enable();
 	}
 }
-- 
GitLab


From 67e7ec0bd4535fc6e6d3f5d174f80e10a8a80c6e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@kernel.org>
Date: Sat, 8 May 2021 12:22:12 -0300
Subject: [PATCH 0397/3804] libbpf: Provide GELF_ST_VISIBILITY() define for
 older libelf

Where that macro isn't available.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/YJaspEh0qZr4LYOc@kernel.org
---
 tools/lib/bpf/libbpf_internal.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index ee426226928f1..acbcf6c7bdf82 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -41,6 +41,11 @@
 #define ELF_C_READ_MMAP ELF_C_READ
 #endif
 
+/* Older libelf all end up in this expression, for both 32 and 64 bit */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(o) ((o) & 0x03)
+#endif
+
 #define BTF_INFO_ENC(kind, kind_flag, vlen) \
 	((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
-- 
GitLab


From 096eccdef0b32f47e9354231ddc3aaaf9527d51c Mon Sep 17 00:00:00 2001
From: Jussi Maki <joamaki@gmail.com>
Date: Wed, 5 May 2021 08:59:25 +0000
Subject: [PATCH 0398/3804] selftests/bpf: Rewrite test_tc_redirect.sh as
 prog_tests/tc_redirect.c

As discussed in [0], this ports test_tc_redirect.sh to the test_progs
framework and removes the old test.

This makes it more in line with rest of the tests and makes it possible
to run this test case with vmtest.sh and under the bpf CI.

The upcoming skb_change_head() helper fix in [0] is depending on it and
extending the test case to redirect a packet from L3 device to veth.

  [0] https://lore.kernel.org/bpf/20210427135550.807355-1-joamaki@gmail.com

Signed-off-by: Jussi Maki <joamaki@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210505085925.783985-1-joamaki@gmail.com
---
 tools/testing/selftests/bpf/network_helpers.c |   2 +-
 tools/testing/selftests/bpf/network_helpers.h |   1 +
 .../selftests/bpf/prog_tests/tc_redirect.c    | 589 ++++++++++++++++++
 .../selftests/bpf/progs/test_tc_neigh.c       |  33 +-
 .../selftests/bpf/progs/test_tc_neigh_fib.c   |   9 +-
 .../selftests/bpf/progs/test_tc_peer.c        |  33 +-
 .../testing/selftests/bpf/test_tc_redirect.sh | 216 -------
 7 files changed, 617 insertions(+), 266 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tc_redirect.c
 delete mode 100755 tools/testing/selftests/bpf/test_tc_redirect.sh

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 12ee40284da02..2060bc122c530 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -40,7 +40,7 @@ struct ipv6_packet pkt_v6 = {
 	.tcp.doff = 5,
 };
 
-static int settimeo(int fd, int timeout_ms)
+int settimeo(int fd, int timeout_ms)
 {
 	struct timeval timeout = { .tv_sec = 3 };
 
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 7205f8afdba11..5e0d51c07b632 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -33,6 +33,7 @@ struct ipv6_packet {
 } __packed;
 extern struct ipv6_packet pkt_v6;
 
+int settimeo(int fd, int timeout_ms);
 int start_server(int family, int type, const char *addr, __u16 port,
 		 int timeout_ms);
 int connect_to_fd(int server_fd, int timeout_ms);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
new file mode 100644
index 0000000000000..95ef9fcd31d8b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+ * between src and dst. The netns fwd has veth links to each src and dst. The
+ * client is in src and server in dst. The test installs a TC BPF program to each
+ * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
+ * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
+ * switch from ingress side; it also installs a checker prog on the egress side
+ * to drop unexpected traffic.
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_neigh_fib.skel.h"
+#include "test_tc_neigh.skel.h"
+#include "test_tc_peer.skel.h"
+
+#define NS_SRC "ns_src"
+#define NS_FWD "ns_fwd"
+#define NS_DST "ns_dst"
+
+#define IP4_SRC "172.16.1.100"
+#define IP4_DST "172.16.2.100"
+#define IP4_PORT 9004
+
+#define IP6_SRC "::1:dead:beef:cafe"
+#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_PORT 9006
+
+#define IP4_SLL "169.254.0.1"
+#define IP4_DLL "169.254.0.2"
+#define IP4_NET "169.254.0.0"
+
+#define IFADDR_STR_LEN 18
+#define PING_ARGS "-c 3 -w 10 -q"
+
+#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
+#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
+#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
+
+#define TIMEOUT_MILLIS 10000
+
+#define MAX_PROC_MODS 128
+#define MAX_PROC_VALUE_LEN 16
+
+#define log_err(MSG, ...) \
+	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
+
+struct proc_mod {
+	char path[PATH_MAX];
+	char oldval[MAX_PROC_VALUE_LEN];
+	int oldlen;
+};
+
+static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
+static int root_netns_fd = -1;
+static int num_proc_mods;
+static struct proc_mod proc_mods[MAX_PROC_MODS];
+
+/**
+ * modify_proc() - Modify entry in /proc
+ *
+ * Modifies an entry in /proc and saves the original value for later
+ * restoration with restore_proc().
+ */
+static int modify_proc(const char *path, const char *newval)
+{
+	struct proc_mod *mod;
+	FILE *f;
+
+	if (num_proc_mods + 1 > MAX_PROC_MODS)
+		return -1;
+
+	f = fopen(path, "r+");
+	if (!f)
+		return -1;
+
+	mod = &proc_mods[num_proc_mods];
+	num_proc_mods++;
+
+	strncpy(mod->path, path, PATH_MAX);
+
+	if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
+		log_err("reading from %s failed", path);
+		goto fail;
+	}
+	rewind(f);
+	if (fwrite(newval, strlen(newval), 1, f) != 1) {
+		log_err("writing to %s failed", path);
+		goto fail;
+	}
+
+	fclose(f);
+	return 0;
+
+fail:
+	fclose(f);
+	num_proc_mods--;
+	return -1;
+}
+
+/**
+ * restore_proc() - Restore all /proc modifications
+ */
+static void restore_proc(void)
+{
+	int i;
+
+	for (i = 0; i < num_proc_mods; i++) {
+		struct proc_mod *mod = &proc_mods[i];
+		FILE *f;
+
+		f = fopen(mod->path, "w");
+		if (!f) {
+			log_err("fopen of %s failed", mod->path);
+			continue;
+		}
+
+		if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1)
+			log_err("fwrite to %s failed", mod->path);
+
+		fclose(f);
+	}
+	num_proc_mods = 0;
+}
+
+/**
+ * setns_by_name() - Set networks namespace by name
+ */
+static int setns_by_name(const char *name)
+{
+	int nsfd;
+	char nspath[PATH_MAX];
+	int err;
+
+	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+	if (nsfd < 0)
+		return nsfd;
+
+	err = setns(nsfd, CLONE_NEWNET);
+	close(nsfd);
+
+	return err;
+}
+
+/**
+ * setns_root() - Set network namespace to original (root) namespace
+ *
+ * Not expected to ever fail, so error not returned, but failure logged
+ * and test marked as failed.
+ */
+static void setns_root(void)
+{
+	ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root");
+}
+
+static int netns_setup_namespaces(const char *verb)
+{
+	const char * const *ns = namespaces;
+	char cmd[128];
+
+	while (*ns) {
+		snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
+		if (!ASSERT_OK(system(cmd), cmd))
+			return -1;
+		ns++;
+	}
+	return 0;
+}
+
+struct netns_setup_result {
+	int ifindex_veth_src_fwd;
+	int ifindex_veth_dst_fwd;
+};
+
+static int get_ifaddr(const char *name, char *ifaddr)
+{
+	char path[PATH_MAX];
+	FILE *f;
+	int ret;
+
+	snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
+	f = fopen(path, "r");
+	if (!ASSERT_OK_PTR(f, path))
+		return -1;
+
+	ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
+	if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+static int get_ifindex(const char *name)
+{
+	char path[PATH_MAX];
+	char buf[32];
+	FILE *f;
+	int ret;
+
+	snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
+	f = fopen(path, "r");
+	if (!ASSERT_OK_PTR(f, path))
+		return -1;
+
+	ret = fread(buf, 1, sizeof(buf), f);
+	if (!ASSERT_GT(ret, 0, "fread ifindex")) {
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return atoi(buf);
+}
+
+#define SYS(fmt, ...)						\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		if (!ASSERT_OK(system(cmd), cmd))		\
+			goto fail;				\
+	})
+
+static int netns_setup_links_and_routes(struct netns_setup_result *result)
+{
+	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
+	char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
+
+	SYS("ip link add veth_src type veth peer name veth_src_fwd");
+	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
+		goto fail;
+	if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
+		goto fail;
+
+	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
+	if (result->ifindex_veth_src_fwd < 0)
+		goto fail;
+	result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
+	if (result->ifindex_veth_dst_fwd < 0)
+		goto fail;
+
+	SYS("ip link set veth_src netns " NS_SRC);
+	SYS("ip link set veth_src_fwd netns " NS_FWD);
+	SYS("ip link set veth_dst_fwd netns " NS_FWD);
+	SYS("ip link set veth_dst netns " NS_DST);
+
+	/** setup in 'src' namespace */
+	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+		goto fail;
+
+	SYS("ip addr add " IP4_SRC "/32 dev veth_src");
+	SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad");
+	SYS("ip link set dev veth_src up");
+
+	SYS("ip route add " IP4_DST "/32 dev veth_src scope global");
+	SYS("ip route add " IP4_NET "/16 dev veth_src scope global");
+	SYS("ip route add " IP6_DST "/128 dev veth_src scope global");
+
+	SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s",
+	    veth_src_fwd_addr);
+	SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
+	    veth_src_fwd_addr);
+
+	/** setup in 'fwd' namespace */
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+		goto fail;
+
+	/* The fwd netns automatically gets a v6 LL address / routes, but also
+	 * needs v4 one in order to start ARP probing. IP4_NET route is added
+	 * to the endpoints so that the ARP processing will reply.
+	 */
+	SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd");
+	SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
+	SYS("ip link set dev veth_src_fwd up");
+	SYS("ip link set dev veth_dst_fwd up");
+
+	SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
+	SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
+	SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
+	SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+
+	/** setup in 'dst' namespace */
+	if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+		goto fail;
+
+	SYS("ip addr add " IP4_DST "/32 dev veth_dst");
+	SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad");
+	SYS("ip link set dev veth_dst up");
+
+	SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global");
+	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
+	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
+
+	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
+	    veth_dst_fwd_addr);
+	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
+	    veth_dst_fwd_addr);
+
+	setns_root();
+	return 0;
+fail:
+	setns_root();
+	return -1;
+}
+
+static int netns_load_bpf(void)
+{
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+		return -1;
+
+	SYS("tc qdisc add dev veth_src_fwd clsact");
+	SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
+	    SRC_PROG_PIN_FILE);
+	SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
+	    CHK_PROG_PIN_FILE);
+
+	SYS("tc qdisc add dev veth_dst_fwd clsact");
+	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+	    DST_PROG_PIN_FILE);
+	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+	    CHK_PROG_PIN_FILE);
+
+	setns_root();
+	return -1;
+fail:
+	setns_root();
+	return -1;
+}
+
+static int netns_unload_bpf(void)
+{
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+		goto fail;
+	SYS("tc qdisc delete dev veth_src_fwd clsact");
+	SYS("tc qdisc delete dev veth_dst_fwd clsact");
+
+	setns_root();
+	return 0;
+fail:
+	setns_root();
+	return -1;
+}
+
+
+static void test_tcp(int family, const char *addr, __u16 port)
+{
+	int listen_fd = -1, accept_fd = -1, client_fd = -1;
+	char buf[] = "testing testing";
+	int n;
+
+	if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+		return;
+
+	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
+	if (!ASSERT_GE(listen_fd, 0, "listen"))
+		goto done;
+
+	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+		goto done;
+
+	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
+	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+		goto done;
+
+	accept_fd = accept(listen_fd, NULL, NULL);
+	if (!ASSERT_GE(accept_fd, 0, "accept"))
+		goto done;
+
+	if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
+		goto done;
+
+	n = write(client_fd, buf, sizeof(buf));
+	if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+		goto done;
+
+	n = read(accept_fd, buf, sizeof(buf));
+	ASSERT_EQ(n, sizeof(buf), "recv from server");
+
+done:
+	setns_root();
+	if (listen_fd >= 0)
+		close(listen_fd);
+	if (accept_fd >= 0)
+		close(accept_fd);
+	if (client_fd >= 0)
+		close(client_fd);
+}
+
+static int test_ping(int family, const char *addr)
+{
+	const char *ping = family == AF_INET6 ? "ping6" : "ping";
+
+	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr);
+	return 0;
+fail:
+	return -1;
+}
+
+static void test_connectivity(void)
+{
+	test_tcp(AF_INET, IP4_DST, IP4_PORT);
+	test_ping(AF_INET, IP4_DST);
+	test_tcp(AF_INET6, IP6_DST, IP6_PORT);
+	test_ping(AF_INET6, IP6_DST);
+}
+
+static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
+{
+	struct test_tc_neigh_fib *skel;
+	int err;
+
+	skel = test_tc_neigh_fib__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
+		return;
+
+	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) {
+		test_tc_neigh_fib__destroy(skel);
+		return;
+	}
+
+	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+		goto done;
+
+	if (netns_load_bpf())
+		goto done;
+
+	/* bpf_fib_lookup() checks if forwarding is enabled */
+	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+		goto done;
+
+	err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
+	if (!ASSERT_OK(err, "set ipv4.ip_forward"))
+		goto done;
+
+	err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
+	if (!ASSERT_OK(err, "set ipv6.forwarding"))
+		goto done;
+	setns_root();
+
+	test_connectivity();
+done:
+	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	test_tc_neigh_fib__destroy(skel);
+	netns_unload_bpf();
+	setns_root();
+	restore_proc();
+}
+
+static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
+{
+	struct test_tc_neigh *skel;
+	int err;
+
+	skel = test_tc_neigh__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
+		return;
+
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+	err = test_tc_neigh__load(skel);
+	if (!ASSERT_OK(err, "test_tc_neigh__load")) {
+		test_tc_neigh__destroy(skel);
+		return;
+	}
+
+	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+		goto done;
+
+	if (netns_load_bpf())
+		goto done;
+
+	test_connectivity();
+
+done:
+	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	test_tc_neigh__destroy(skel);
+	netns_unload_bpf();
+	setns_root();
+}
+
+static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
+{
+	struct test_tc_peer *skel;
+	int err;
+
+	skel = test_tc_peer__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+		return;
+
+	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+	err = test_tc_peer__load(skel);
+	if (!ASSERT_OK(err, "test_tc_peer__load")) {
+		test_tc_peer__destroy(skel);
+		return;
+	}
+
+	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+		goto done;
+
+	err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+		goto done;
+
+	if (netns_load_bpf())
+		goto done;
+
+	test_connectivity();
+
+done:
+	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
+	test_tc_peer__destroy(skel);
+	netns_unload_bpf();
+	setns_root();
+}
+
+void test_tc_redirect(void)
+{
+	struct netns_setup_result setup_result;
+
+	root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+		return;
+
+	if (netns_setup_namespaces("add"))
+		goto done;
+
+	if (netns_setup_links_and_routes(&setup_result))
+		goto done;
+
+	if (test__start_subtest("tc_redirect_peer"))
+		test_tc_redirect_peer(&setup_result);
+
+	if (test__start_subtest("tc_redirect_neigh"))
+		test_tc_redirect_neigh(&setup_result);
+
+	if (test__start_subtest("tc_redirect_neigh_fib"))
+		test_tc_redirect_neigh_fib(&setup_result);
+
+done:
+	close(root_netns_fd);
+	netns_setup_namespaces("delete");
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index b985ac4e7a814..90f64a85998fa 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -33,17 +33,8 @@
 				 a.s6_addr32[3] == b.s6_addr32[3])
 #endif
 
-enum {
-	dev_src,
-	dev_dst,
-};
-
-struct bpf_map_def SEC("maps") ifindex_map = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(int),
-	.value_size	= sizeof(int),
-	.max_entries	= 2,
-};
+static volatile const __u32 IFINDEX_SRC;
+static volatile const __u32 IFINDEX_DST;
 
 static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
 					    __be32 addr)
@@ -79,14 +70,8 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
 	return v6_equal(ip6h->daddr, addr);
 }
 
-static __always_inline int get_dev_ifindex(int which)
-{
-	int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
-
-	return ifindex ? *ifindex : 0;
-}
-
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
 {
 	void *data_end = ctx_ptr(skb->data_end);
 	void *data = ctx_ptr(skb->data);
@@ -98,7 +83,8 @@ SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
 	return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
 }
 
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
 {
 	__u8 zero[ETH_ALEN * 2];
 	bool redirect = false;
@@ -119,10 +105,11 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
 	if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
 		return TC_ACT_SHOT;
 
-	return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0);
+	return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0);
 }
 
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
 {
 	__u8 zero[ETH_ALEN * 2];
 	bool redirect = false;
@@ -143,7 +130,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
 	if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
 		return TC_ACT_SHOT;
 
-	return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0);
+	return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0);
 }
 
 char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
index d82ed3457030f..f7ab69cf018e5 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -75,7 +75,8 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
 	return 0;
 }
 
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
 {
 	void *data_end = ctx_ptr(skb->data_end);
 	void *data = ctx_ptr(skb->data);
@@ -142,12 +143,14 @@ static __always_inline int tc_redir(struct __sk_buff *skb)
 /* these are identical, but keep them separate for compatibility with the
  * section names expected by test_tc_redirect.sh
  */
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
 {
 	return tc_redir(skb);
 }
 
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
 {
 	return tc_redir(skb);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index fc84a7685aa2c..72c72950c3bbe 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -8,38 +8,25 @@
 
 #include <bpf/bpf_helpers.h>
 
-enum {
-	dev_src,
-	dev_dst,
-};
+static volatile const __u32 IFINDEX_SRC;
+static volatile const __u32 IFINDEX_DST;
 
-struct bpf_map_def SEC("maps") ifindex_map = {
-	.type		= BPF_MAP_TYPE_ARRAY,
-	.key_size	= sizeof(int),
-	.value_size	= sizeof(int),
-	.max_entries	= 2,
-};
-
-static __always_inline int get_dev_ifindex(int which)
-{
-	int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
-
-	return ifindex ? *ifindex : 0;
-}
-
-SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+SEC("classifier/chk_egress")
+int tc_chk(struct __sk_buff *skb)
 {
 	return TC_ACT_SHOT;
 }
 
-SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+SEC("classifier/dst_ingress")
+int tc_dst(struct __sk_buff *skb)
 {
-	return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
+	return bpf_redirect_peer(IFINDEX_SRC, 0);
 }
 
-SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+SEC("classifier/src_ingress")
+int tc_src(struct __sk_buff *skb)
 {
-	return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
+	return bpf_redirect_peer(IFINDEX_DST, 0);
 }
 
 char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
deleted file mode 100755
index 8868aa1ca9021..0000000000000
--- a/tools/testing/selftests/bpf/test_tc_redirect.sh
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
-# between src and dst. The netns fwd has veth links to each src and dst. The
-# client is in src and server in dst. The test installs a TC BPF program to each
-# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
-# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
-# switch from ingress side; it also installs a checker prog on the egress side
-# to drop unexpected traffic.
-
-if [[ $EUID -ne 0 ]]; then
-	echo "This script must be run as root"
-	echo "FAIL"
-	exit 1
-fi
-
-# check that needed tools are present
-command -v nc >/dev/null 2>&1 || \
-	{ echo >&2 "nc is not available"; exit 1; }
-command -v dd >/dev/null 2>&1 || \
-	{ echo >&2 "dd is not available"; exit 1; }
-command -v timeout >/dev/null 2>&1 || \
-	{ echo >&2 "timeout is not available"; exit 1; }
-command -v ping >/dev/null 2>&1 || \
-	{ echo >&2 "ping is not available"; exit 1; }
-if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi
-command -v perl >/dev/null 2>&1 || \
-	{ echo >&2 "perl is not available"; exit 1; }
-command -v jq >/dev/null 2>&1 || \
-	{ echo >&2 "jq is not available"; exit 1; }
-command -v bpftool >/dev/null 2>&1 || \
-	{ echo >&2 "bpftool is not available"; exit 1; }
-
-readonly GREEN='\033[0;92m'
-readonly RED='\033[0;31m'
-readonly NC='\033[0m' # No Color
-
-readonly PING_ARG="-c 3 -w 10 -q"
-
-readonly TIMEOUT=10
-
-readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
-readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
-readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
-
-readonly IP4_SRC="172.16.1.100"
-readonly IP4_DST="172.16.2.100"
-
-readonly IP6_SRC="::1:dead:beef:cafe"
-readonly IP6_DST="::2:dead:beef:cafe"
-
-readonly IP4_SLL="169.254.0.1"
-readonly IP4_DLL="169.254.0.2"
-readonly IP4_NET="169.254.0.0"
-
-netns_cleanup()
-{
-	ip netns del ${NS_SRC}
-	ip netns del ${NS_FWD}
-	ip netns del ${NS_DST}
-}
-
-netns_setup()
-{
-	ip netns add "${NS_SRC}"
-	ip netns add "${NS_FWD}"
-	ip netns add "${NS_DST}"
-
-	ip link add veth_src type veth peer name veth_src_fwd
-	ip link add veth_dst type veth peer name veth_dst_fwd
-
-	ip link set veth_src netns ${NS_SRC}
-	ip link set veth_src_fwd netns ${NS_FWD}
-
-	ip link set veth_dst netns ${NS_DST}
-	ip link set veth_dst_fwd netns ${NS_FWD}
-
-	ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
-	ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
-
-	# The fwd netns automatically get a v6 LL address / routes, but also
-	# needs v4 one in order to start ARP probing. IP4_NET route is added
-	# to the endpoints so that the ARP processing will reply.
-
-	ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
-	ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
-
-	ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
-	ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
-
-	ip -netns ${NS_SRC} link set dev veth_src up
-	ip -netns ${NS_FWD} link set dev veth_src_fwd up
-
-	ip -netns ${NS_DST} link set dev veth_dst up
-	ip -netns ${NS_FWD} link set dev veth_dst_fwd up
-
-	ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
-	ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
-	ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
-
-	ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
-	ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
-
-	ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
-	ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
-	ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
-
-	ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
-	ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
-
-	fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
-	fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
-
-	ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
-	ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
-
-	ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
-	ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
-}
-
-netns_test_connectivity()
-{
-	set +e
-
-	ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
-	ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
-
-	TEST="TCPv4 connectivity test"
-	ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
-	if [ $? -ne 0 ]; then
-		echo -e "${TEST}: ${RED}FAIL${NC}"
-		exit 1
-	fi
-	echo -e "${TEST}: ${GREEN}PASS${NC}"
-
-	TEST="TCPv6 connectivity test"
-	ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
-	if [ $? -ne 0 ]; then
-		echo -e "${TEST}: ${RED}FAIL${NC}"
-		exit 1
-	fi
-	echo -e "${TEST}: ${GREEN}PASS${NC}"
-
-	TEST="ICMPv4 connectivity test"
-	ip netns exec ${NS_SRC} ping  $PING_ARG ${IP4_DST}
-	if [ $? -ne 0 ]; then
-		echo -e "${TEST}: ${RED}FAIL${NC}"
-		exit 1
-	fi
-	echo -e "${TEST}: ${GREEN}PASS${NC}"
-
-	TEST="ICMPv6 connectivity test"
-	ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST}
-	if [ $? -ne 0 ]; then
-		echo -e "${TEST}: ${RED}FAIL${NC}"
-		exit 1
-	fi
-	echo -e "${TEST}: ${GREEN}PASS${NC}"
-
-	set -e
-}
-
-hex_mem_str()
-{
-	perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
-}
-
-netns_setup_bpf()
-{
-	local obj=$1
-	local use_forwarding=${2:-0}
-
-	ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
-	ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
-	ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress  bpf da obj $obj sec chk_egress
-
-	ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
-	ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
-	ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress  bpf da obj $obj sec chk_egress
-
-	if [ "$use_forwarding" -eq "1" ]; then
-		# bpf_fib_lookup() checks if forwarding is enabled
-		ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
-		ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
-		ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1
-		return 0
-	fi
-
-	veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
-	veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
-
-	progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
-	for prog in $progs; do
-		map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
-		if [ ! -z "$map" ]; then
-			bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
-			bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
-		fi
-	done
-}
-
-trap netns_cleanup EXIT
-set -e
-
-netns_setup
-netns_setup_bpf test_tc_neigh.o
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_neigh_fib.o 1
-netns_test_connectivity
-netns_cleanup
-netns_setup
-netns_setup_bpf test_tc_peer.o
-netns_test_connectivity
-- 
GitLab


From 569c484f9995f489f2b80dd134269fe07d2b900d Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 7 May 2021 17:50:11 -0700
Subject: [PATCH 0399/3804] bpf: Limit static tcp-cc functions in the .BTF_ids
 list to x86
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During the discussion in [0]. It was pointed out that static functions
in ppc64 is prefixed with ".". For example, the 'readelf -s vmlinux.ppc':

  89326: c000000001383280    24 NOTYPE  LOCAL  DEFAULT   31 cubictcp_init
  89327: c000000000c97c50   168 FUNC    LOCAL  DEFAULT    2 .cubictcp_init

The one with FUNC type is ".cubictcp_init" instead of "cubictcp_init".
The "." seems to be done by arch/powerpc/include/asm/ppc_asm.h.

This caused that pahole cannot generate the BTF for these tcp-cc kernel
functions because pahole only captures the FUNC type and "cubictcp_init"
is not. It then failed the kernel compilation in ppc64.

This behavior is only reported in ppc64 so far. I tried arm64, s390,
and sparc64 and did not observe this "." prefix and NOTYPE behavior.

Since the kfunc call is only supported in the x86_64 and x86_32 JIT,
this patch limits those tcp-cc functions to x86 only to avoid unnecessary
compilation issue in other ARCHs. In the future, we can examine if it
is better to change all those functions from static to extern.

  [0] https://lore.kernel.org/bpf/4e051459-8532-7b61-c815-f3435767f8a0@kernel.org/

Fixes: e78aea8b2170 ("bpf: tcp: Put some tcp cong functions in allowlist for bpf-tcp-cc")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Michal Suchánek <msuchanek@suse.de>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: https://lore.kernel.org/bpf/20210508005011.3863757-1-kafai@fb.com
---
 net/ipv4/bpf_tcp_ca.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index dff4f0eb96b0f..9e41eff4a6858 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -185,6 +185,7 @@ BTF_ID(func, tcp_reno_cong_avoid)
 BTF_ID(func, tcp_reno_undo_cwnd)
 BTF_ID(func, tcp_slow_start)
 BTF_ID(func, tcp_cong_avoid_ai)
+#ifdef CONFIG_X86
 #ifdef CONFIG_DYNAMIC_FTRACE
 #if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC)
 BTF_ID(func, cubictcp_init)
@@ -213,6 +214,7 @@ BTF_ID(func, bbr_min_tso_segs)
 BTF_ID(func, bbr_set_state)
 #endif
 #endif  /* CONFIG_DYNAMIC_FTRACE */
+#endif	/* CONFIG_X86 */
 BTF_SET_END(bpf_tcp_ca_kfunc_ids)
 
 static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id)
-- 
GitLab


From 349c4d6c75d74b62d8e39913b40bd06117b85e4a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Sun, 9 May 2021 21:53:03 -0700
Subject: [PATCH 0400/3804] f2fs: avoid null pointer access when handling IPU
 error

 Unable to handle kernel NULL pointer dereference at virtual address 000000000000001a
 pc : f2fs_inplace_write_data+0x144/0x208
 lr : f2fs_inplace_write_data+0x134/0x208
 Call trace:
  f2fs_inplace_write_data+0x144/0x208
  f2fs_do_write_data_page+0x270/0x770
  f2fs_write_single_data_page+0x47c/0x830
  __f2fs_write_data_pages+0x444/0x98c
  f2fs_write_data_pages.llvm.16514453770497736882+0x2c/0x38
  do_writepages+0x58/0x118
  __writeback_single_inode+0x44/0x300
  writeback_sb_inodes+0x4b8/0x9c8
  wb_writeback+0x148/0x42c
  wb_do_writeback+0xc8/0x390
  wb_workfn+0xb0/0x2f4
  process_one_work+0x1fc/0x444
  worker_thread+0x268/0x4b4
  kthread+0x13c/0x158
  ret_from_fork+0x10/0x18

Fixes: 955772787667 ("f2fs: drop inplace IO if fs status is abnormal")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c605415840b59..51dc79fad4fe2 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3574,12 +3574,12 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
 
 	return err;
 drop_bio:
-	if (fio->bio) {
+	if (fio->bio && *(fio->bio)) {
 		struct bio *bio = *(fio->bio);
 
 		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
-		fio->bio = NULL;
+		*(fio->bio) = NULL;
 	}
 	return err;
 }
-- 
GitLab


From a753103909a7e3d22147505d944da3d20759e1a5 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 6 May 2021 12:11:14 -0700
Subject: [PATCH 0401/3804] f2fs: support iflag change given the mask

In f2fs_fileattr_set(),

	if (!fa->flags_valid)
		mask &= FS_COMMON_FL;

In this case, we can set supported flags by mask only instead of BUG_ON.

/* Flags shared betwen flags/xflags */
	(FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \
	 FS_NODUMP_FL |	FS_NOATIME_FL | FS_DAX_FL | \
	 FS_PROJINHERIT_FL)

Fixes: 9b1bb01c8ae7 ("f2fs: convert to fileattr")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 44a4650aea7b7..ceb575f99048c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1817,7 +1817,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	u32 masked_flags = fi->i_flags & mask;
 
-	f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask));
+	/* mask can be shrunk by flags_valid selector */
+	iflags &= mask;
 
 	/* Is it quota file? Do not allow user to mess with it */
 	if (IS_NOQUOTA(inode))
-- 
GitLab


From a12cc5b423d4f36dc1a1ea3911e49cf9dff43898 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Thu, 6 May 2021 17:00:43 +0800
Subject: [PATCH 0402/3804] f2fs: compress: fix to free compress page correctly

In error path of f2fs_write_compressed_pages(), it needs to call
f2fs_compress_free_page() to release temporary page.

Fixes: 5e6bbde95982 ("f2fs: introduce mempool for {,de}compress intermediate page allocation")
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/compress.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 53b13787eb2c8..2acaefa100364 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1372,7 +1372,8 @@ out_destroy_crypt:
 	for (i = 0; i < cc->nr_cpages; i++) {
 		if (!cc->cpages[i])
 			continue;
-		f2fs_put_page(cc->cpages[i], 1);
+		f2fs_compress_free_page(cc->cpages[i]);
+		cc->cpages[i] = NULL;
 	}
 out_put_cic:
 	kmem_cache_free(cic_entry_slab, cic);
-- 
GitLab


From a949dc5f2c5cfe0c910b664650f45371254c0744 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 10 May 2021 17:30:31 +0800
Subject: [PATCH 0403/3804] f2fs: compress: fix race condition of overwrite vs
 truncate

pos_fsstress testcase complains a panic as belew:

------------[ cut here ]------------
kernel BUG at fs/f2fs/compress.c:1082!
invalid opcode: 0000 [#1] SMP PTI
CPU: 4 PID: 2753477 Comm: kworker/u16:2 Tainted: G           OE     5.12.0-rc1-custom #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
Workqueue: writeback wb_workfn (flush-252:16)
RIP: 0010:prepare_compress_overwrite+0x4c0/0x760 [f2fs]
Call Trace:
 f2fs_prepare_compress_overwrite+0x5f/0x80 [f2fs]
 f2fs_write_cache_pages+0x468/0x8a0 [f2fs]
 f2fs_write_data_pages+0x2a4/0x2f0 [f2fs]
 do_writepages+0x38/0xc0
 __writeback_single_inode+0x44/0x2a0
 writeback_sb_inodes+0x223/0x4d0
 __writeback_inodes_wb+0x56/0xf0
 wb_writeback+0x1dd/0x290
 wb_workfn+0x309/0x500
 process_one_work+0x220/0x3c0
 worker_thread+0x53/0x420
 kthread+0x12f/0x150
 ret_from_fork+0x22/0x30

The root cause is truncate() may race with overwrite as below,
so that one reference count left in page can not guarantee the
page attaching in mapping tree all the time, after truncation,
later find_lock_page() may return NULL pointer.

- prepare_compress_overwrite
 - f2fs_pagecache_get_page
 - unlock_page
					- f2fs_setattr
					 - truncate_setsize
					  - truncate_inode_page
					   - delete_from_page_cache
 - find_lock_page

Fix this by avoiding referencing updated page.

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/compress.c | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 2acaefa100364..79348bc56e351 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -117,19 +117,6 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len)
 	f2fs_drop_rpages(cc, len, true);
 }
 
-static void f2fs_put_rpages_mapping(struct address_space *mapping,
-				pgoff_t start, int len)
-{
-	int i;
-
-	for (i = 0; i < len; i++) {
-		struct page *page = find_get_page(mapping, start + i);
-
-		put_page(page);
-		put_page(page);
-	}
-}
-
 static void f2fs_put_rpages_wbc(struct compress_ctx *cc,
 		struct writeback_control *wbc, bool redirty, int unlock)
 {
@@ -1036,7 +1023,7 @@ retry:
 		}
 
 		if (PageUptodate(page))
-			unlock_page(page);
+			f2fs_put_page(page, 1);
 		else
 			f2fs_compress_ctx_add_page(cc, page);
 	}
@@ -1046,32 +1033,34 @@ retry:
 
 		ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size,
 					&last_block_in_bio, false, true);
+		f2fs_put_rpages(cc);
 		f2fs_destroy_compress_ctx(cc);
 		if (ret)
-			goto release_pages;
+			goto out;
 		if (bio)
 			f2fs_submit_bio(sbi, bio, DATA);
 
 		ret = f2fs_init_compress_ctx(cc);
 		if (ret)
-			goto release_pages;
+			goto out;
 	}
 
 	for (i = 0; i < cc->cluster_size; i++) {
 		f2fs_bug_on(sbi, cc->rpages[i]);
 
 		page = find_lock_page(mapping, start_idx + i);
-		f2fs_bug_on(sbi, !page);
+		if (!page) {
+			/* page can be truncated */
+			goto release_and_retry;
+		}
 
 		f2fs_wait_on_page_writeback(page, DATA, true, true);
-
 		f2fs_compress_ctx_add_page(cc, page);
-		f2fs_put_page(page, 0);
 
 		if (!PageUptodate(page)) {
+release_and_retry:
+			f2fs_put_rpages(cc);
 			f2fs_unlock_rpages(cc, i + 1);
-			f2fs_put_rpages_mapping(mapping, start_idx,
-					cc->cluster_size);
 			f2fs_destroy_compress_ctx(cc);
 			goto retry;
 		}
@@ -1103,10 +1092,10 @@ retry:
 	}
 
 unlock_pages:
+	f2fs_put_rpages(cc);
 	f2fs_unlock_rpages(cc, i);
-release_pages:
-	f2fs_put_rpages_mapping(mapping, start_idx, i);
 	f2fs_destroy_compress_ctx(cc);
+out:
 	return ret;
 }
 
-- 
GitLab


From 8bfbfb0ddd706b1ce2e89259ecc45f192c0ec2bf Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 10 May 2021 17:30:32 +0800
Subject: [PATCH 0404/3804] f2fs: compress: fix to assign cc.cluster_idx
 correctly

In f2fs_destroy_compress_ctx(), after f2fs_destroy_compress_ctx(),
cc.cluster_idx will be cleared w/ NULL_CLUSTER, f2fs_cluster_blocks()
may check wrong cluster metadata, fix it.

Fixes: 4c8ff7095bef ("f2fs: support data compression")
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/compress.c | 17 +++++++++--------
 fs/f2fs/data.c     |  6 +++---
 fs/f2fs/f2fs.h     |  2 +-
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 79348bc56e351..925a5ca3744a9 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -145,13 +145,14 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc)
 	return cc->rpages ? 0 : -ENOMEM;
 }
 
-void f2fs_destroy_compress_ctx(struct compress_ctx *cc)
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
 {
 	page_array_free(cc->inode, cc->rpages, cc->cluster_size);
 	cc->rpages = NULL;
 	cc->nr_rpages = 0;
 	cc->nr_cpages = 0;
-	cc->cluster_idx = NULL_CLUSTER;
+	if (!reuse)
+		cc->cluster_idx = NULL_CLUSTER;
 }
 
 void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page)
@@ -1034,7 +1035,7 @@ retry:
 		ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size,
 					&last_block_in_bio, false, true);
 		f2fs_put_rpages(cc);
-		f2fs_destroy_compress_ctx(cc);
+		f2fs_destroy_compress_ctx(cc, true);
 		if (ret)
 			goto out;
 		if (bio)
@@ -1061,7 +1062,7 @@ retry:
 release_and_retry:
 			f2fs_put_rpages(cc);
 			f2fs_unlock_rpages(cc, i + 1);
-			f2fs_destroy_compress_ctx(cc);
+			f2fs_destroy_compress_ctx(cc, true);
 			goto retry;
 		}
 	}
@@ -1094,7 +1095,7 @@ release_and_retry:
 unlock_pages:
 	f2fs_put_rpages(cc);
 	f2fs_unlock_rpages(cc, i);
-	f2fs_destroy_compress_ctx(cc);
+	f2fs_destroy_compress_ctx(cc, true);
 out:
 	return ret;
 }
@@ -1130,7 +1131,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
 		set_cluster_dirty(&cc);
 
 	f2fs_put_rpages_wbc(&cc, NULL, false, 1);
-	f2fs_destroy_compress_ctx(&cc);
+	f2fs_destroy_compress_ctx(&cc, false);
 
 	return first_index;
 }
@@ -1350,7 +1351,7 @@ unlock_continue:
 	f2fs_put_rpages(cc);
 	page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
 	cc->cpages = NULL;
-	f2fs_destroy_compress_ctx(cc);
+	f2fs_destroy_compress_ctx(cc, false);
 	return 0;
 
 out_destroy_crypt:
@@ -1512,7 +1513,7 @@ write:
 	err = f2fs_write_raw_pages(cc, submitted, wbc, io_type);
 	f2fs_put_rpages_wbc(cc, wbc, false, 0);
 destroy_out:
-	f2fs_destroy_compress_ctx(cc);
+	f2fs_destroy_compress_ctx(cc, false);
 	return err;
 }
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 96f1a354f89fd..33e56ae84e358 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2287,7 +2287,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
 							max_nr_pages,
 							&last_block_in_bio,
 							rac != NULL, false);
-				f2fs_destroy_compress_ctx(&cc);
+				f2fs_destroy_compress_ctx(&cc, false);
 				if (ret)
 					goto set_error_page;
 			}
@@ -2332,7 +2332,7 @@ next_page:
 							max_nr_pages,
 							&last_block_in_bio,
 							rac != NULL, false);
-				f2fs_destroy_compress_ctx(&cc);
+				f2fs_destroy_compress_ctx(&cc, false);
 			}
 		}
 #endif
@@ -3033,7 +3033,7 @@ next:
 		}
 	}
 	if (f2fs_compressed_file(inode))
-		f2fs_destroy_compress_ctx(&cc);
+		f2fs_destroy_compress_ctx(&cc, false);
 #endif
 	if (retry) {
 		index = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 044878866ca34..c83d90125ebd9 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3956,7 +3956,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
 void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
 void f2fs_put_page_dic(struct page *page);
 int f2fs_init_compress_ctx(struct compress_ctx *cc);
-void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
 void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
 int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
 void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
-- 
GitLab


From 576f9eacc680d2b1f37e8010cff62f7b227ea769 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Mon, 10 May 2021 14:55:09 +0800
Subject: [PATCH 0405/3804] net: stmmac: Fix MAC WoL not working if PHY does
 not support WoL

Both get and set WoL will check device_can_wakeup(), if MAC supports PMT, it
will set device wakeup capability. After commit 1d8e5b0f3f2c ("net: stmmac:
Support WOL with phy"), device wakeup capability will be overwrite in
stmmac_init_phy() according to phy's Wol feature. If phy doesn't support WoL,
then MAC will lose wakeup capability. To fix this issue, only overwrite device
wakeup capability when MAC doesn't support PMT.

For STMMAC now driver checks MAC's WoL capability if MAC supports PMT, if
not support, driver will check PHY's WoL capability.

Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy")
Reviewed-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 345b4c6d1fd40..fea3bf07ae892 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1196,7 +1196,6 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
  */
 static int stmmac_init_phy(struct net_device *dev)
 {
-	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
 	struct stmmac_priv *priv = netdev_priv(dev);
 	struct device_node *node;
 	int ret;
@@ -1222,8 +1221,12 @@ static int stmmac_init_phy(struct net_device *dev)
 		ret = phylink_connect_phy(priv->phylink, phydev);
 	}
 
-	phylink_ethtool_get_wol(priv->phylink, &wol);
-	device_set_wakeup_capable(priv->device, !!wol.supported);
+	if (!priv->plat->pmt) {
+		struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+
+		phylink_ethtool_get_wol(priv->phylink, &wol);
+		device_set_wakeup_capable(priv->device, !!wol.supported);
+	}
 
 	return ret;
 }
-- 
GitLab


From 29249eac5225429b898f278230a6ca2baa1ae154 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 11 May 2021 19:13:51 +0200
Subject: [PATCH 0406/3804] mptcp: fix data stream corruption

Maxim reported several issues when forcing a TCP transparent proxy
to use the MPTCP protocol for the inbound connections. He also
provided a clean reproducer.

The problem boils down to 'mptcp_frag_can_collapse_to()' assuming
that only MPTCP will use the given page_frag.

If others - e.g. the plain TCP protocol - allocate page fragments,
we can end-up re-using already allocated memory for mptcp_data_frag.

Fix the issue ensuring that the to-be-expanded data fragment is
located at the current page frag end.

v1 -> v2:
 - added missing fixes tag (Mat)

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/178
Reported-and-tested-by: Maxim Galaganov <max@internet.ru>
Fixes: 18b683bff89d ("mptcp: queue data for mptcp level retransmission")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 29a2d690d8d59..2d21a4793d9d0 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -879,12 +879,18 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq,
 	       !mpext->frozen;
 }
 
+/* we can append data to the given data frag if:
+ * - there is space available in the backing page_frag
+ * - the data frag tail matches the current page_frag free offset
+ * - the data frag end sequence number matches the current write seq
+ */
 static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
 				       const struct page_frag *pfrag,
 				       const struct mptcp_data_frag *df)
 {
 	return df && pfrag->page == df->page &&
 		pfrag->size - pfrag->offset > 0 &&
+		pfrag->offset == (df->offset + df->data_len) &&
 		df->data_seq + df->data_len == msk->write_seq;
 }
 
-- 
GitLab


From bcbda3fc616272686208f9c4d5f6dccb65360bd8 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 11 May 2021 11:11:32 -0700
Subject: [PATCH 0407/3804] ionic: fix ptp support config breakage

When IONIC=y and PTP_1588_CLOCK=m were set in the .config file
the driver link failed with undefined references.

We add the dependancy
	depends on PTP_1588_CLOCK || !PTP_1588_CLOCK
to clear this up.

If PTP_1588_CLOCK=m, the depends limits IONIC to =m (or disabled).
If PTP_1588_CLOCK is disabled, IONIC can be any of y/m/n.

Fixes: 61db421da31b ("ionic: link in the new hw timestamp code")
Reported-by: kernel test robot <lkp@intel.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Allen Hubbe <allenbh@pensando.io>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig
index 5f8b0bb3af6e3..202973a82712e 100644
--- a/drivers/net/ethernet/pensando/Kconfig
+++ b/drivers/net/ethernet/pensando/Kconfig
@@ -20,6 +20,7 @@ if NET_VENDOR_PENSANDO
 config IONIC
 	tristate "Pensando Ethernet IONIC Support"
 	depends on 64BIT && PCI
+	depends on PTP_1588_CLOCK || !PTP_1588_CLOCK
 	select NET_DEVLINK
 	select DIMLIB
 	help
-- 
GitLab


From 440c3247cba3d9433ac435d371dd7927d68772a7 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Tue, 11 May 2021 14:42:04 -0500
Subject: [PATCH 0408/3804] net: ipa: memory region array is variable size

IPA configuration data includes an array of memory region
descriptors.  That was a fixed-size array at one time, but
at some point we started defining it such that it was only
as big as required for a given platform.  The actual number
of entries in the array is recorded in the configuration data
along with the array.

A loop in ipa_mem_config() still assumes the array has entries
for all defined memory region IDs.  As a result, this loop can
go past the end of the actual array and attempt to write
"canary" values based on nonsensical data.

Fix this, by stashing the number of entries in the array, and
using that rather than IPA_MEM_COUNT in the initialization loop
found in ipa_mem_config().

The only remaining use of IPA_MEM_COUNT is in a validation check
to ensure configuration data doesn't have too many entries.
That's fine for now.

Fixes: 3128aae8c439a ("net: ipa: redefine struct ipa_mem_data")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa.h     | 2 ++
 drivers/net/ipa/ipa_mem.c | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h
index e7ff376cb5b7d..744406832a774 100644
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -58,6 +58,7 @@ enum ipa_flag {
  * @mem_virt:		Virtual address of IPA-local memory space
  * @mem_offset:		Offset from @mem_virt used for access to IPA memory
  * @mem_size:		Total size (bytes) of memory at @mem_virt
+ * @mem_count:		Number of entries in the mem array
  * @mem:		Array of IPA-local memory region descriptors
  * @imem_iova:		I/O virtual address of IPA region in IMEM
  * @imem_size:		Size of IMEM region
@@ -103,6 +104,7 @@ struct ipa {
 	void *mem_virt;
 	u32 mem_offset;
 	u32 mem_size;
+	u32 mem_count;
 	const struct ipa_mem *mem;
 
 	unsigned long imem_iova;
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index c5c3b1b7e67d5..1624125e7459f 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -180,7 +180,7 @@ int ipa_mem_config(struct ipa *ipa)
 	 * for the region, write "canary" values in the space prior to
 	 * the region's base address.
 	 */
-	for (mem_id = 0; mem_id < IPA_MEM_COUNT; mem_id++) {
+	for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) {
 		const struct ipa_mem *mem = &ipa->mem[mem_id];
 		u16 canary_count;
 		__le32 *canary;
@@ -487,6 +487,7 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	ipa->mem_size = resource_size(res);
 
 	/* The ipa->mem[] array is indexed by enum ipa_mem_id values */
+	ipa->mem_count = mem_data->local_count;
 	ipa->mem = mem_data->local;
 
 	ret = ipa_imem_init(ipa, mem_data->imem_addr, mem_data->imem_size);
-- 
GitLab


From a78339698ab1f43435fbe67fcd6de8f4f6eb9eec Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 May 2021 14:49:45 +0000
Subject: [PATCH 0409/3804] powerpc/interrupts: Fix kuep_unlock() call

Same as kuap_user_restore(), kuep_unlock() has to be called when
really returning to user, that is in interrupt_exit_user_prepare(),
not in interrupt_exit_prepare().

Fixes: b5efec00b671 ("powerpc/32s: Move KUEP locking/unlocking in C")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b831e54a2579db24fbef836ed415588ce2b3e825.1620312573.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/interrupt.h | 2 --
 arch/powerpc/kernel/interrupt.c      | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index 44cde2e129b88..c77e8f57ff062 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
  */
 static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
 {
-	if (user_mode(regs))
-		kuep_unlock();
 }
 
 static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index e4559f8914eb7..ed6cebcb78475 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -427,6 +427,7 @@ again:
 
 	/* Restore user access locks last */
 	kuap_user_restore(regs);
+	kuep_unlock();
 
 	return ret;
 }
-- 
GitLab


From 5d510ed78bcfcbbd3b3891cbe79cd7543bce1d05 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 May 2021 11:56:31 +0000
Subject: [PATCH 0410/3804] powerpc/syscall: Calling kuap_save_and_lock() is
 wrong

kuap_save_and_lock() is only for interrupts inside kernel.

system call are only from user, calling kuap_save_and_lock()
is wrong.

Fixes: c16728835eec ("powerpc/32: Manage KUAP in C")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/332773775cf24a422105dee2d383fb8f04589045.1620302182.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/interrupt.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index ed6cebcb78475..e0938ba298f2a 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
 	syscall_fn f;
 
 	kuep_lock();
-#ifdef CONFIG_PPC32
-	kuap_save_and_lock(regs);
-#endif
 
 	regs->orig_gpr3 = r3;
 
-- 
GitLab


From 2c8c89b95831f46a2fb31a8d0fef4601694023ce Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 8 May 2021 20:14:52 +1000
Subject: [PATCH 0411/3804] powerpc/pseries: Fix hcall tracing recursion in pv
 queued spinlocks

The paravit queued spinlock slow path adds itself to the queue then
calls pv_wait to wait for the lock to become free. This is implemented
by calling H_CONFER to donate cycles.

When hcall tracing is enabled, this H_CONFER call can lead to a spin
lock being taken in the tracing code, which will result in the lock to
be taken again, which will also go to the slow path because it queues
behind itself and so won't ever make progress.

An example trace of a deadlock:

  __pv_queued_spin_lock_slowpath
  trace_clock_global
  ring_buffer_lock_reserve
  trace_event_buffer_lock_reserve
  trace_event_buffer_reserve
  trace_event_raw_event_hcall_exit
  __trace_hcall_exit
  plpar_hcall_norets_trace
  __pv_queued_spin_lock_slowpath
  trace_clock_global
  ring_buffer_lock_reserve
  trace_event_buffer_lock_reserve
  trace_event_buffer_reserve
  trace_event_raw_event_rcu_dyntick
  rcu_irq_exit
  irq_exit
  __do_irq
  call_do_irq
  do_IRQ
  hardware_interrupt_common_virt

Fix this by introducing plpar_hcall_norets_notrace(), and using that to
make SPLPAR virtual processor dispatching hcalls by the paravirt
spinlock code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210508101455.1578318-2-npiggin@gmail.com
---
 arch/powerpc/include/asm/hvcall.h       |  3 +++
 arch/powerpc/include/asm/paravirt.h     | 22 +++++++++++++++++++---
 arch/powerpc/platforms/pseries/hvCall.S | 10 ++++++++++
 arch/powerpc/platforms/pseries/lpar.c   |  3 +--
 4 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 4430509060185..e3b29eda8074c 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -448,6 +448,9 @@
  */
 long plpar_hcall_norets(unsigned long opcode, ...);
 
+/* Variant which does not do hcall tracing */
+long plpar_hcall_norets_notrace(unsigned long opcode, ...);
+
 /**
  * plpar_hcall: - Make a pseries hypervisor call
  * @opcode: The hypervisor call to make.
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index 5d1726bb28e79..bcb7b5f917be6 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu)
 	return be32_to_cpu(yield_count);
 }
 
+/*
+ * Spinlock code confers and prods, so don't trace the hcalls because the
+ * tracing code takes spinlocks which can cause recursion deadlocks.
+ *
+ * These calls are made while the lock is not held: the lock slowpath yields if
+ * it can not acquire the lock, and unlock slow path might prod if a waiter has
+ * yielded). So this may not be a problem for simple spin locks because the
+ * tracing does not technically recurse on the lock, but we avoid it anyway.
+ *
+ * However the queued spin lock contended path is more strictly ordered: the
+ * H_CONFER hcall is made after the task has queued itself on the lock, so then
+ * recursing on that lock will cause the task to then queue up again behind the
+ * first instance (or worse: queued spinlocks use tricks that assume a context
+ * never waits on more than one spinlock, so such recursion may cause random
+ * corruption in the lock code).
+ */
 static inline void yield_to_preempted(int cpu, u32 yield_count)
 {
-	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
+	plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
 }
 
 static inline void prod_cpu(int cpu)
 {
-	plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+	plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu));
 }
 
 static inline void yield_to_any(void)
 {
-	plpar_hcall_norets(H_CONFER, -1, 0);
+	plpar_hcall_norets_notrace(H_CONFER, -1, 0);
 }
 #else
 static inline bool is_shared_processor(void)
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 2136e42833af3..8a2b8d64265bc 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1);						\
 #define HCALL_BRANCH(LABEL)
 #endif
 
+_GLOBAL_TOC(plpar_hcall_norets_notrace)
+	HMT_MEDIUM
+
+	mfcr	r0
+	stw	r0,8(r1)
+	HVSC				/* invoke the hypervisor */
+	lwz	r0,8(r1)
+	mtcrf	0xff,r0
+	blr				/* return r3 = status */
+
 _GLOBAL_TOC(plpar_hcall_norets)
 	HMT_MEDIUM
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 1f3152ad72132..b619568a4d04a 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1830,8 +1830,7 @@ void hcall_tracepoint_unregfunc(void)
 
 /*
  * Since the tracing code might execute hcalls we need to guard against
- * recursion. One example of this are spinlocks calling H_YIELD on
- * shared processor partitions.
+ * recursion.
  */
 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
 
-- 
GitLab


From a3f1a39a5643d5c5ed3eee4edd933e0ebfeeed6e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 8 May 2021 20:14:53 +1000
Subject: [PATCH 0412/3804] powerpc/pseries: Don't trace hcall tracing wrapper

This doesn't seem very useful to trace before the recursion check, even
if the ftrace code has any recursion checks of its own. Be on the safe
side and don't trace the hcall trace wrappers.

Reported-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210508101455.1578318-3-npiggin@gmail.com
---
 arch/powerpc/platforms/pseries/lpar.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index b619568a4d04a..d79d7410c3204 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1835,7 +1835,7 @@ void hcall_tracepoint_unregfunc(void)
 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
 
 
-void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 {
 	unsigned long flags;
 	unsigned int *depth;
@@ -1863,7 +1863,7 @@ out:
 	local_irq_restore(flags);
 }
 
-void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
+notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
 {
 	unsigned long flags;
 	unsigned int *depth;
-- 
GitLab


From 7058f4b13edd9dd2cb3c5b4fe340d8307dbe0208 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 8 May 2021 20:14:54 +1000
Subject: [PATCH 0413/3804] powerpc/pseries: use notrace hcall variant for
 H_CEDE idle

Rather than special-case H_CEDE in the hcall trace wrappers, make the
idle H_CEDE call use plpar_hcall_norets_notrace().

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210508101455.1578318-4-npiggin@gmail.com
---
 arch/powerpc/include/asm/plpar_wrappers.h |  6 +++++-
 arch/powerpc/platforms/pseries/lpar.c     | 10 ----------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index ece84a430701f..83e0f701ebc67 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint)
 
 static inline long cede_processor(void)
 {
-	return plpar_hcall_norets(H_CEDE);
+	/*
+	 * We cannot call tracepoints inside RCU idle regions which
+	 * means we must not trace H_CEDE.
+	 */
+	return plpar_hcall_norets_notrace(H_CEDE);
 }
 
 static inline long extended_cede_processor(unsigned long latency_hint)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index d79d7410c3204..ad1cec80019bb 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1840,13 +1840,6 @@ notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 	unsigned long flags;
 	unsigned int *depth;
 
-	/*
-	 * We cannot call tracepoints inside RCU idle regions which
-	 * means we must not trace H_CEDE.
-	 */
-	if (opcode == H_CEDE)
-		return;
-
 	local_irq_save(flags);
 
 	depth = this_cpu_ptr(&hcall_trace_depth);
@@ -1868,9 +1861,6 @@ notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
 	unsigned long flags;
 	unsigned int *depth;
 
-	if (opcode == H_CEDE)
-		return;
-
 	local_irq_save(flags);
 
 	depth = this_cpu_ptr(&hcall_trace_depth);
-- 
GitLab


From 4f242fc5f2e24412b89e934dad025b10293b2712 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 8 May 2021 20:14:55 +1000
Subject: [PATCH 0414/3804] powerpc/pseries: warn if recursing into the hcall
 tracing code

The hcall tracing code has a recursion check built in, which skips
tracing if we are already tracing an hcall.

However if the tracing code has problems with recursion, this check
may not catch all cases because the tracing code could be invoked from
a different tracepoint first, then make an hcall that gets traced,
then recurse.

Add an explicit warning if recursion is detected here, which might help
to notice tracing code making hcalls. Really the core trace code should
have its own recursion checking and warnings though.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210508101455.1578318-5-npiggin@gmail.com
---
 arch/powerpc/platforms/pseries/lpar.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index ad1cec80019bb..dab356e3ff87c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1829,8 +1829,14 @@ void hcall_tracepoint_unregfunc(void)
 #endif
 
 /*
- * Since the tracing code might execute hcalls we need to guard against
- * recursion.
+ * Keep track of hcall tracing depth and prevent recursion. Warn if any is
+ * detected because it may indicate a problem. This will not catch all
+ * problems with tracing code making hcalls, because the tracing might have
+ * been invoked from a non-hcall, so the first hcall could recurse into it
+ * without warning here, but this better than nothing.
+ *
+ * Hcalls with specific problems being traced should use the _notrace
+ * plpar_hcall variants.
  */
 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
 
@@ -1844,7 +1850,7 @@ notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 
 	depth = this_cpu_ptr(&hcall_trace_depth);
 
-	if (*depth)
+	if (WARN_ON_ONCE(*depth))
 		goto out;
 
 	(*depth)++;
@@ -1865,7 +1871,7 @@ notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
 
 	depth = this_cpu_ptr(&hcall_trace_depth);
 
-	if (*depth)
+	if (*depth) /* Don't warn again on the way out */
 		goto out;
 
 	(*depth)++;
-- 
GitLab


From 7315e457d6bc342d06ba0b7ee498221c5237a547 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sat, 8 May 2021 09:25:32 +0000
Subject: [PATCH 0415/3804] powerpc/uaccess: Fix __get_user() with
 CONFIG_CC_HAS_ASM_GOTO_OUTPUT

Building kernel mainline with GCC 11 leads to following failure
when starting 'init':

  init[1]: bad frame in sys_sigreturn: 7ff5a900 nip 001083cc lr 001083c4
  Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

This is an issue due to a segfault happening in
__unsafe_restore_general_regs() in a loop copying registers from user
to kernel:

  10:	7d 09 03 a6 	mtctr   r8
  14:	80 ca 00 00 	lwz     r6,0(r10)
  18:	80 ea 00 04 	lwz     r7,4(r10)
  1c:	90 c9 00 08 	stw     r6,8(r9)
  20:	90 e9 00 0c 	stw     r7,12(r9)
  24:	39 0a 00 08 	addi    r8,r10,8
  28:	39 29 00 08 	addi    r9,r9,8
  2c:	81 4a 00 08 	lwz     r10,8(r10)  <== r10 is clobbered here
  30:	81 6a 00 0c 	lwz     r11,12(r10)
  34:	91 49 00 08 	stw     r10,8(r9)
  38:	91 69 00 0c 	stw     r11,12(r9)
  3c:	39 48 00 08 	addi    r10,r8,8
  40:	39 29 00 08 	addi    r9,r9,8
  44:	42 00 ff d0 	bdnz    14 <__unsafe_restore_general_regs+0x14>

As shown above, this is due to r10 being re-used by GCC. This didn't
happen with CLANG.

This is fixed by tagging 'x' output as an earlyclobber operand in
__get_user_asm2_goto().

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/cf0a050d124d4f426cdc7a74009d17b01d8d8969.1620465917.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index a09e4240c5b16..22c79ab400060 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -157,7 +157,7 @@ do {								\
 		"2:	lwz%X1 %L0, %L1\n"			\
 		EX_TABLE(1b, %l2)				\
 		EX_TABLE(2b, %l2)				\
-		: "=r" (x)					\
+		: "=&r" (x)					\
 		: "m" (*addr)					\
 		:						\
 		: label)
-- 
GitLab


From bc581dbab26edf0b6acc98c76943b4a0c7d672a2 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sat, 8 May 2021 09:25:44 +0000
Subject: [PATCH 0416/3804] powerpc/signal: Fix possible build failure with
 unsafe_copy_fpr_{to/from}_user

When neither CONFIG_VSX nor CONFIG_PPC_FPU_REGS are selected,
unsafe_copy_fpr_to_user() and unsafe_copy_fpr_from_user() are
doing nothing.

Then, unless the 'label' operand is used elsewhere, GCC complains
about it being defined but not used.

To fix that, add an impossible 'goto label'.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/cadc0a328bc8e6c5bf133193e7547d5c10ae7895.1620465920.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/kernel/signal.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index f4aafa337c2ed..1f07317964e49 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
 }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 #else
-#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
+#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0)
 
-#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0)
 
 static inline unsigned long
 copy_fpr_to_user(void __user *to, struct task_struct *task)
-- 
GitLab


From 63970f3c37e75997ed86dbdfdc83df35f2152bb1 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sat, 8 May 2021 06:36:21 +0000
Subject: [PATCH 0417/3804] powerpc/legacy_serial: Fix UBSAN:
 array-index-out-of-bounds

UBSAN complains when a pointer is calculated with invalid
'legacy_serial_console' index, allthough the index is verified
before dereferencing the pointer.

Fix it by checking 'legacy_serial_console' validity before
calculating pointers.

Fixes: 0bd3f9e953bd ("powerpc/legacy_serial: Use early_ioremap()")
Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210511010712.750096-1-mpe@ellerman.id.au
---
 arch/powerpc/kernel/legacy_serial.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 8b2c1a8553a0e..cfc03e016ff2d 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console)
 
 static int __init ioremap_legacy_serial_console(void)
 {
-	struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console];
-	struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console];
+	struct plat_serial8250_port *port;
+	struct legacy_serial_info *info;
 	void __iomem *vaddr;
 
 	if (legacy_serial_console < 0)
 		return 0;
 
+	info = &legacy_serial_infos[legacy_serial_console];
+	port = &legacy_serial_ports[legacy_serial_console];
+
 	if (!info->early_addr)
 		return 0;
 
-- 
GitLab


From da3bb206c9ceb0736d9e2897ea697acabad35833 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 11 May 2021 20:54:59 +1000
Subject: [PATCH 0418/3804] KVM: PPC: Book3S HV: Fix kvm_unmap_gfn_range_hv()
 for Hash MMU

Commit 32b48bf8514c ("KVM: PPC: Book3S HV: Fix conversion to gfn-based
MMU notifier callbacks") fixed kvm_unmap_gfn_range_hv() by adding a for
loop over each gfn in the range.

But for the Hash MMU it repeatedly calls kvm_unmap_rmapp() with the
first gfn of the range, rather than iterating through the range.

This exhibits as strange guest behaviour, sometimes crashing in firmare,
or booting and then guest userspace crashing unexpectedly.

Fix it by passing the iterator, gfn, to kvm_unmap_rmapp().

Fixes: 32b48bf8514c ("KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks")
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210511105459.800788-1-mpe@ellerman.id.au
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2d9193cd73be4..c63e263312a4f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
 			kvm_unmap_radix(kvm, range->slot, gfn);
 	} else {
 		for (gfn = range->start; gfn < range->end; gfn++)
-			kvm_unmap_rmapp(kvm, range->slot, range->start);
+			kvm_unmap_rmapp(kvm, range->slot, gfn);
 	}
 
 	return false;
-- 
GitLab


From e9f4eee9a0023ba22db9560d4cc6ee63f933dae8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 11 May 2021 21:38:36 -0400
Subject: [PATCH 0419/3804] blk-iocost: fix weight updates of inner active
 iocgs

When the weight of an active iocg is updated, weight_updated() is called
which in turn calls __propagate_weights() to update the active and inuse
weights so that the effective hierarchical weights are update accordingly.

The current implementation is incorrect for inner active nodes. For an
active leaf iocg, inuse can be any value between 1 and active and the
difference represents how much the iocg is donating. When weight is updated,
as long as inuse is clamped between 1 and the new weight, we're alright and
this is what __propagate_weights() currently implements.

However, that's not how an active inner node's inuse is set. An inner node's
inuse is solely determined by the ratio between the sums of inuse's and
active's of its children - ie. they're results of propagating the leaves'
active and inuse weights upwards. __propagate_weights() incorrectly applies
the same clamping as for a leaf when an active inner node's weight is
updated. Consider a hierarchy which looks like the following with saturating
workloads in AA and BB.

     R
   /   \
  A     B
  |     |
 AA     BB

1. For both A and B, active=100, inuse=100, hwa=0.5, hwi=0.5.

2. echo 200 > A/io.weight

3. __propagate_weights() update A's active to 200 and leave inuse at 100 as
   it's already between 1 and the new active, making A:active=200,
   A:inuse=100. As R's active_sum is updated along with A's active,
   A:hwa=2/3, B:hwa=1/3. However, because the inuses didn't change, the
   hwi's remain unchanged at 0.5.

4. The weight of A is now twice that of B but AA and BB still have the same
   hwi of 0.5 and thus are doing the same amount of IOs.

Fix it by making __propgate_weights() always calculate the inuse of an
active inner iocg based on the ratio of child_inuse_sum to child_active_sum.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Dan Schatzberg <dschatzberg@fb.com>
Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
Cc: stable@vger.kernel.org # v5.4+
Link: https://lore.kernel.org/r/YJsxnLZV1MnBcqjj@slm.duckdns.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-iocost.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index e0c4baa018578..c2d6bc88d3f15 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1069,7 +1069,17 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
 
 	lockdep_assert_held(&ioc->lock);
 
-	inuse = clamp_t(u32, inuse, 1, active);
+	/*
+	 * For an active leaf node, its inuse shouldn't be zero or exceed
+	 * @active. An active internal node's inuse is solely determined by the
+	 * inuse to active ratio of its children regardless of @inuse.
+	 */
+	if (list_empty(&iocg->active_list) && iocg->child_active_sum) {
+		inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum,
+					   iocg->child_active_sum);
+	} else {
+		inuse = clamp_t(u32, inuse, 1, active);
+	}
 
 	iocg->last_inuse = iocg->inuse;
 	if (save)
@@ -1086,7 +1096,7 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
 		/* update the level sums */
 		parent->child_active_sum += (s32)(active - child->active);
 		parent->child_inuse_sum += (s32)(inuse - child->inuse);
-		/* apply the udpates */
+		/* apply the updates */
 		child->active = active;
 		child->inuse = inuse;
 
-- 
GitLab


From ca298241bc229303ff683db7265a2c625a9c00fe Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 11 May 2021 14:38:47 -0700
Subject: [PATCH 0420/3804] f2fs: avoid swapon failure by giving a warning
 first

The final solution can be migrating blocks to form a section-aligned file
internally. Meanwhile, let's ask users to do that when preparing the swap
file initially like:
1) create()
2) ioctl(F2FS_IOC_SET_PIN_FILE)
3) fallocate()

Reported-by: kernel test robot <oliver.sang@intel.com>
Fixes: 36e4d95891ed ("f2fs: check if swapfile is section-alligned")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 33e56ae84e358..41e260680b27c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3801,6 +3801,7 @@ static int f2fs_is_file_aligned(struct inode *inode)
 	block_t pblock;
 	unsigned long nr_pblocks;
 	unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+	unsigned int not_aligned = 0;
 	int ret = 0;
 
 	cur_lblock = 0;
@@ -3833,13 +3834,20 @@ static int f2fs_is_file_aligned(struct inode *inode)
 
 		if ((pblock - main_blkaddr) & (blocks_per_sec - 1) ||
 			nr_pblocks & (blocks_per_sec - 1)) {
-			f2fs_err(sbi, "Swapfile does not align to section");
-			ret = -EINVAL;
-			goto out;
+			if (f2fs_is_pinned_file(inode)) {
+				f2fs_err(sbi, "Swapfile does not align to section");
+				ret = -EINVAL;
+				goto out;
+			}
+			not_aligned++;
 		}
 
 		cur_lblock += nr_pblocks;
 	}
+	if (not_aligned)
+		f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n"
+			"\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()",
+			not_aligned);
 out:
 	return ret;
 }
@@ -3858,6 +3866,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 	int nr_extents = 0;
 	unsigned long nr_pblocks;
 	unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+	unsigned int not_aligned = 0;
 	int ret = 0;
 
 	/*
@@ -3896,9 +3905,12 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 
 		if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) ||
 				nr_pblocks & (blocks_per_sec - 1)) {
-			f2fs_err(sbi, "Swapfile does not align to section");
-			ret = -EINVAL;
-			goto out;
+			if (f2fs_is_pinned_file(inode)) {
+				f2fs_err(sbi, "Swapfile does not align to section");
+				ret = -EINVAL;
+				goto out;
+			}
+			not_aligned++;
 		}
 
 		if (cur_lblock + nr_pblocks >= sis->max)
@@ -3927,6 +3939,11 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 	sis->max = cur_lblock;
 	sis->pages = cur_lblock - 1;
 	sis->highest_bit = cur_lblock - 1;
+
+	if (not_aligned)
+		f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n"
+			"\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()",
+			not_aligned);
 out:
 	return ret;
 }
-- 
GitLab


From 2b17c400aeb44daf041627722581ade527bb3c1d Mon Sep 17 00:00:00 2001
From: Norbert Slusarek <nslusarek@gmx.net>
Date: Wed, 12 May 2021 00:43:54 +0200
Subject: [PATCH 0421/3804] can: isotp: prevent race between isotp_bind() and
 isotp_setsockopt()

A race condition was found in isotp_setsockopt() which allows to
change socket options after the socket was bound.
For the specific case of SF_BROADCAST support, this might lead to possible
use-after-free because can_rx_unregister() is not called.

Checking for the flag under the socket lock in isotp_bind() and taking
the lock in isotp_setsockopt() fixes the issue.

Fixes: 921ca574cd38 ("can: isotp: add SF_BROADCAST support for functional addressing")
Link: https://lore.kernel.org/r/trinity-e6ae9efa-9afb-4326-84c0-f3609b9b8168-1620773528307@3c-app-gmx-bs06
Reported-by: Norbert Slusarek <nslusarek@gmx.net>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Norbert Slusarek <nslusarek@gmx.net>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 49 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index 9f94ad3caee92..253b24417c8e5 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1062,27 +1062,31 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	if (len < ISOTP_MIN_NAMELEN)
 		return -EINVAL;
 
+	if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
+		return -EADDRNOTAVAIL;
+
+	if (!addr->can_ifindex)
+		return -ENODEV;
+
+	lock_sock(sk);
+
 	/* do not register frame reception for functional addressing */
 	if (so->opt.flags & CAN_ISOTP_SF_BROADCAST)
 		do_rx_reg = 0;
 
 	/* do not validate rx address for functional addressing */
 	if (do_rx_reg) {
-		if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id)
-			return -EADDRNOTAVAIL;
+		if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) {
+			err = -EADDRNOTAVAIL;
+			goto out;
+		}
 
-		if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
-			return -EADDRNOTAVAIL;
+		if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) {
+			err = -EADDRNOTAVAIL;
+			goto out;
+		}
 	}
 
-	if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG))
-		return -EADDRNOTAVAIL;
-
-	if (!addr->can_ifindex)
-		return -ENODEV;
-
-	lock_sock(sk);
-
 	if (so->bound && addr->can_ifindex == so->ifindex &&
 	    addr->can_addr.tp.rx_id == so->rxid &&
 	    addr->can_addr.tp.tx_id == so->txid)
@@ -1164,16 +1168,13 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
 	return ISOTP_MIN_NAMELEN;
 }
 
-static int isotp_setsockopt(struct socket *sock, int level, int optname,
+static int isotp_setsockopt_locked(struct socket *sock, int level, int optname,
 			    sockptr_t optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct isotp_sock *so = isotp_sk(sk);
 	int ret = 0;
 
-	if (level != SOL_CAN_ISOTP)
-		return -EINVAL;
-
 	if (so->bound)
 		return -EISCONN;
 
@@ -1248,6 +1249,22 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname,
 	return ret;
 }
 
+static int isotp_setsockopt(struct socket *sock, int level, int optname,
+			    sockptr_t optval, unsigned int optlen)
+
+{
+	struct sock *sk = sock->sk;
+	int ret;
+
+	if (level != SOL_CAN_ISOTP)
+		return -EINVAL;
+
+	lock_sock(sk);
+	ret = isotp_setsockopt_locked(sock, level, optname, optval, optlen);
+	release_sock(sk);
+	return ret;
+}
+
 static int isotp_getsockopt(struct socket *sock, int level, int optname,
 			    char __user *optval, int __user *optlen)
 {
-- 
GitLab


From 02dbb7246c5bbbbe1607ebdc546ba5c454a664b1 Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
Date: Tue, 11 May 2021 20:46:09 +0530
Subject: [PATCH 0422/3804] sched/fair: Fix clearing of has_idle_cores flag in
 select_idle_cpu()

In commit:

  9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()")

in select_idle_cpu(), we check if an idle core is present in the LLC
of the target CPU via the flag "has_idle_cores". We look for the idle
core in select_idle_cores(). If select_idle_cores() isn't able to find
an idle core/CPU, we need to unset the has_idle_cores flag in the LLC
of the target to prevent other CPUs from going down this route.

However, the current code is unsetting it in the LLC of the current
CPU instead of the target CPU. This patch fixes this issue.

Fixes: 9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()")
Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Link: https://lore.kernel.org/r/1620746169-13996-1-git-send-email-ego@linux.vnet.ibm.com
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 20aa234ffe04c..3248e24a90b0f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6217,7 +6217,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
 	}
 
 	if (has_idle_core)
-		set_idle_cores(this, false);
+		set_idle_cores(target, false);
 
 	if (sched_feat(SIS_PROP) && !has_idle_core) {
 		time = cpu_clock(this) - time;
-- 
GitLab


From 440e906702410f59ae5397ec9e3b639edb53f80e Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Mon, 10 May 2021 15:48:49 -0700
Subject: [PATCH 0423/3804] perf/x86/intel/uncore: Drop unnecessary NULL checks
 after container_of()

The parameter passed to the pmu_enable() and pmu_disable() functions can not be
NULL because it is dereferenced by the caller.

That means the result of container_of() on that parameter can also never be NULL.
The existing NULL checks are therefore unnecessary and misleading. Remove them.

This change was made automatically with the following Coccinelle script.

  @@
  type t;
  identifier v;
  statement s;
  @@

  <+...
  (
    t v = container_of(...);
  |
    v = container_of(...);
  )
    ...
    when != v
  - if (\( !v \| v == NULL \) ) s
  ...+>

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510224849.2349861-1-linux@roeck-us.net
---
 arch/x86/events/intel/uncore.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index df7b07d7fdcb5..9bf4dbbc26e22 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -801,8 +801,6 @@ static void uncore_pmu_enable(struct pmu *pmu)
 	struct intel_uncore_box *box;
 
 	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
-	if (!uncore_pmu)
-		return;
 
 	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
 	if (!box)
@@ -818,8 +816,6 @@ static void uncore_pmu_disable(struct pmu *pmu)
 	struct intel_uncore_box *box;
 
 	uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
-	if (!uncore_pmu)
-		return;
 
 	box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
 	if (!box)
-- 
GitLab


From 6627eb25e40cc8d135d3f8d5391851d18ac497d7 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 10 May 2021 11:53:10 -0700
Subject: [PATCH 0424/3804] x86/entry: Unify definitions from <asm/calling.h>
 and <asm/ptrace-abi.h>

The register offsets in <asm/ptrace-abi.h> are duplicated in
entry/calling.h, but are formatted differently and therefore not
compatible. Use the version from <asm/ptrace-abi.h> consistently.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510185316.3307264-2-hpa@zytor.com
---
 arch/x86/entry/calling.h  | 36 +-----------------------------------
 arch/x86/kernel/head_64.S |  6 +++---
 2 files changed, 4 insertions(+), 38 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 07a9331d55e73..7436d4a74ecbf 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -6,6 +6,7 @@
 #include <asm/percpu.h>
 #include <asm/asm-offsets.h>
 #include <asm/processor-flags.h>
+#include <asm/ptrace-abi.h>
 
 /*
 
@@ -62,41 +63,6 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-/* The layout forms the "struct pt_regs" on the stack: */
-/*
- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
- * unless syscall needs a complete, fully filled "struct pt_regs".
- */
-#define R15		0*8
-#define R14		1*8
-#define R13		2*8
-#define R12		3*8
-#define RBP		4*8
-#define RBX		5*8
-/* These regs are callee-clobbered. Always saved on kernel entry. */
-#define R11		6*8
-#define R10		7*8
-#define R9		8*8
-#define R8		9*8
-#define RAX		10*8
-#define RCX		11*8
-#define RDX		12*8
-#define RSI		13*8
-#define RDI		14*8
-/*
- * On syscall entry, this is syscall#. On CPU exception, this is error code.
- * On hw interrupt, it's IRQ number:
- */
-#define ORIG_RAX	15*8
-/* Return frame for iretq */
-#define RIP		16*8
-#define CS		17*8
-#define EFLAGS		18*8
-#define RSP		19*8
-#define SS		20*8
-
-#define SIZEOF_PTREGS	21*8
-
 .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
 	.if \save_ret
 	pushq	%rsi		/* pt_regs->si */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 04bddaaba8e25..d8b3ebd2bb85f 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -62,7 +62,7 @@ SYM_CODE_START_NOALIGN(startup_64)
 	 */
 
 	/* Set up the stack for verify_cpu(), similar to initial_stack below */
-	leaq	(__end_init_task - SIZEOF_PTREGS)(%rip), %rsp
+	leaq	(__end_init_task - FRAME_SIZE)(%rip), %rsp
 
 	leaq	_text(%rip), %rdi
 	pushq	%rsi
@@ -343,10 +343,10 @@ SYM_DATA(initial_vc_handler,	.quad handle_vc_boot_ghcb)
 #endif
 
 /*
- * The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder
+ * The FRAME_SIZE gap is a convention which helps the in-kernel unwinder
  * reliably detect the end of the stack.
  */
-SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS)
+SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE)
 	__FINITDATA
 
 	__INIT
-- 
GitLab


From 3e5e7f7736b05d5fdf2cc4e0ba4f2d8bc42c630d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 10 May 2021 11:53:11 -0700
Subject: [PATCH 0425/3804] x86/entry: Reverse arguments to do_syscall_64()

Reverse the order of arguments to do_syscall_64() so that the first
argument is the pt_regs pointer. This is not only consistent with
*all* other entry points from assembly, but it actually makes the
compiled code slightly better.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510185316.3307264-3-hpa@zytor.com
---
 arch/x86/entry/common.c        | 2 +-
 arch/x86/entry/entry_64.S      | 4 ++--
 arch/x86/include/asm/syscall.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 7b2542b13ebd9..00da0f5420de8 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -36,7 +36,7 @@
 #include <asm/irq_stack.h>
 
 #ifdef CONFIG_X86_64
-__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+__visible noinstr void do_syscall_64(struct pt_regs *regs, unsigned long nr)
 {
 	add_random_kstack_offset();
 	nr = syscall_enter_from_user_mode(regs, nr);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a16a5294d55f6..1d9db15fdc692 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -107,8 +107,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
 	PUSH_AND_CLEAR_REGS rax=$-ENOSYS
 
 	/* IRQs are off. */
-	movq	%rax, %rdi
-	movq	%rsp, %rsi
+	movq	%rsp, %rdi
+	movq	%rax, %rsi
 	call	do_syscall_64		/* returns with IRQs disabled */
 
 	/*
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 7cbf733d11afd..4e20054d7533e 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -160,7 +160,7 @@ static inline int syscall_get_arch(struct task_struct *task)
 		? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 }
 
-void do_syscall_64(unsigned long nr, struct pt_regs *regs);
+void do_syscall_64(struct pt_regs *regs, unsigned long nr);
 void do_int80_syscall_32(struct pt_regs *regs);
 long do_fast_syscall_32(struct pt_regs *regs);
 
-- 
GitLab


From dce0aa3b2ef28900cc4c779c59a870f1b4bdadee Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 10 May 2021 11:53:12 -0700
Subject: [PATCH 0426/3804] x86/syscall: Unconditionally prototype
 {ia32,x32}_sys_call_table[]

Even if these APIs are disabled, and the arrays therefore do not
exist, having the prototypes allows us to use IS_ENABLED() rather than
using #ifdefs.

If something ends up trying to actually *use* these arrays a linker
error will ensue.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510185316.3307264-4-hpa@zytor.com
---
 arch/x86/include/asm/syscall.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4e20054d7533e..f6593cafdbd93 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -21,13 +21,12 @@ extern const sys_call_ptr_t sys_call_table[];
 
 #if defined(CONFIG_X86_32)
 #define ia32_sys_call_table sys_call_table
-#endif
-
-#if defined(CONFIG_IA32_EMULATION)
+#else
+/*
+ * These may not exist, but still put the prototypes in so we
+ * can use IS_ENABLED().
+ */
 extern const sys_call_ptr_t ia32_sys_call_table[];
-#endif
-
-#ifdef CONFIG_X86_X32_ABI
 extern const sys_call_ptr_t x32_sys_call_table[];
 #endif
 
-- 
GitLab


From 6de4ac1d03f75248974a398110b15af0bfe65a11 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 10 May 2021 11:53:13 -0700
Subject: [PATCH 0427/3804] x86/syscall: Maximize MSR_SYSCALL_MASK

It is better to clear as many flags as possible when we do a system
call entry, as opposed to the other way around. The fewer flags we
keep, the lesser the possible interference between the kernel and user
space.

The flags changed are:

 - CF, PF, AF, ZF, SF, OF: these are arithmetic flags which affect
   branches, possibly speculatively. They should be cleared for the same
   reasons we now clear all GPRs on entry.

 - RF: suppresses a code breakpoint on the subsequent instruction. It is
   probably impossible to enter the kernel with RF set, but if it is
   somehow not, it would break a kernel debugger setting a breakpoint on
   the entry point. Either way, user space should not be able to control
   kernel behavior here.

 - ID: this flag has no direct effect (it is a scratch bit only.)
   However, there is no reason to retain the user space value in the
   kernel, and the standard should be to clear unless needed, not the
   other way around.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510185316.3307264-5-hpa@zytor.com
---
 arch/x86/kernel/cpu/common.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a1b756c49a93a..6cf6975746616 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1773,10 +1773,16 @@ void syscall_init(void)
 	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
 #endif
 
-	/* Flags to clear on syscall */
+	/*
+	 * Flags to clear on syscall; clear as much as possible
+	 * to minimize user space-kernel interference.
+	 */
 	wrmsrl(MSR_SYSCALL_MASK,
-	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|
-	       X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
+	       X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF|
+	       X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_TF|
+	       X86_EFLAGS_IF|X86_EFLAGS_DF|X86_EFLAGS_OF|
+	       X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_RF|
+	       X86_EFLAGS_AC|X86_EFLAGS_ID);
 }
 
 #else	/* CONFIG_X86_64 */
-- 
GitLab


From 29e9758966f47004bd7245e6adadcb708386f36a Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 10 May 2021 11:53:14 -0700
Subject: [PATCH 0428/3804] x86/entry: Split PUSH_AND_CLEAR_REGS into two
 submacros

PUSH_AND_CLEAR_REGS, as the name implies, performs two functions:
pushing registers and clearing registers. They don't necessarily have
to be performed in immediate sequence, although all current users
do. Split it into two macros for the case where that isn't desired;
the FRED enabling patchset will eventually make use of this.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510185316.3307264-6-hpa@zytor.com
---
 arch/x86/entry/calling.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 7436d4a74ecbf..a4c061fb7c6ea 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -63,7 +63,7 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
+.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0
 	.if \save_ret
 	pushq	%rsi		/* pt_regs->si */
 	movq	8(%rsp), %rsi	/* temporarily store the return address in %rsi */
@@ -90,7 +90,9 @@ For 32-bit we have the following conventions - kernel is built with
 	.if \save_ret
 	pushq	%rsi		/* return address on top of stack */
 	.endif
+.endm
 
+.macro CLEAR_REGS
 	/*
 	 * Sanitize registers of values that a speculation attack might
 	 * otherwise want to exploit. The lower registers are likely clobbered
@@ -112,6 +114,11 @@ For 32-bit we have the following conventions - kernel is built with
 
 .endm
 
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
+	PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret
+	CLEAR_REGS
+.endm
+
 .macro POP_REGS pop_rdi=1 skip_r11rcx=0
 	popq %r15
 	popq %r14
-- 
GitLab


From 9ddcb87b9218dec760e8d8a780bc8ad514c3d36a Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 10 May 2021 11:53:15 -0700
Subject: [PATCH 0429/3804] x86/regs: Syscall_get_nr() returns -1 for a
 non-system call

syscall_get_nr() is defined to return -1 for a non-system call or a
ptrace/seccomp restart; not just any arbitrary number. See comment in
<asm-generic/syscall.h> for the official definition of this function.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510185316.3307264-7-hpa@zytor.com
---
 arch/x86/kernel/ptrace.c | 2 +-
 arch/x86/kernel/signal.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 87a4143aa7d7c..4c208ea3bd9f3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -911,7 +911,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
 		 * syscall with TS_COMPAT still set.
 		 */
 		regs->orig_ax = value;
-		if (syscall_get_nr(child, regs) >= 0)
+		if (syscall_get_nr(child, regs) != -1)
 			child->thread_info.status |= TS_I386_REGS_POKED;
 		break;
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index a06cb107c0e88..e12779a2714dc 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -713,7 +713,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL);
 
 	/* Are we from a system call? */
-	if (syscall_get_nr(current, regs) >= 0) {
+	if (syscall_get_nr(current, regs) != -1) {
 		/* If so, check system call restarting.. */
 		switch (syscall_get_error(current, regs)) {
 		case -ERESTART_RESTARTBLOCK:
@@ -793,7 +793,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
 	}
 
 	/* Did we come from a system call? */
-	if (syscall_get_nr(current, regs) >= 0) {
+	if (syscall_get_nr(current, regs) != -1) {
 		/* Restart the system call - no handlers present */
 		switch (syscall_get_error(current, regs)) {
 		case -ERESTARTNOHAND:
-- 
GitLab


From 2b8ca1a907d5fffc85fb648bbace28ddf3420825 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 10 May 2021 18:15:22 +0200
Subject: [PATCH 0430/3804] sched/core: Remove the pointless BUG_ON(!task) from
 wake_up_q()

container_of() can never return NULL - so don't check for it pointlessly.

[ mingo: Twiddled the changelog. ]

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210510161522.GA32644@redhat.com
---
 kernel/sched/core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5226cc26a095f..61d1d85bb93da 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -585,7 +585,6 @@ void wake_up_q(struct wake_q_head *head)
 		struct task_struct *task;
 
 		task = container_of(node, struct task_struct, wake_q);
-		BUG_ON(!task);
 		/* Task can safely be re-inserted now: */
 		node = node->next;
 		task->wake_q.next = NULL;
-- 
GitLab


From e5e678e4fea26d73444f4427cbbaeab4fa79ecee Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Thu, 22 Apr 2021 13:02:36 -0400
Subject: [PATCH 0431/3804] sched,fair: Skip newidle_balance if a wakeup is
 pending

The try_to_wake_up function has an optimization where it can queue
a task for wakeup on its previous CPU, if the task is still in the
middle of going to sleep inside schedule().

Once schedule() re-enables IRQs, the task will be woken up with an
IPI, and placed back on the runqueue.

If we have such a wakeup pending, there is no need to search other
CPUs for runnable tasks. Just skip (or bail out early from) newidle
balancing, and run the just woken up task.

For a memcache like workload test, this reduces total CPU use by
about 2%, proportionally split between user and system time,
and p99 and p95 application response time by 10% on average.
The schedstats run_delay number shows a similar improvement.

Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lkml.kernel.org/r/20210422130236.0bb353df@imladris.surriel.com
---
 kernel/sched/fair.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3248e24a90b0f..d10c6cc4609c0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10592,6 +10592,14 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 	u64 curr_cost = 0;
 
 	update_misfit_status(NULL, this_rq);
+
+	/*
+	 * There is a task waiting to run. No need to search for one.
+	 * Return 0; the task will be enqueued when switching to idle.
+	 */
+	if (this_rq->ttwu_pending)
+		return 0;
+
 	/*
 	 * We must set idle_stamp _before_ calling idle_balance(), such that we
 	 * measure the duration of idle_balance() as idle time.
@@ -10657,7 +10665,8 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 		 * Stop searching for tasks to pull if there are
 		 * now runnable tasks on this rq.
 		 */
-		if (pulled_task || this_rq->nr_running > 0)
+		if (pulled_task || this_rq->nr_running > 0 ||
+		    this_rq->ttwu_pending)
 			break;
 	}
 	rcu_read_unlock();
-- 
GitLab


From 8d4c97c105ca0735b0d972d1025cb150a7008451 Mon Sep 17 00:00:00 2001
From: Pierre Gondois <Pierre.Gondois@arm.com>
Date: Tue, 4 May 2021 10:07:42 +0100
Subject: [PATCH 0432/3804] sched/fair: Only compute base_energy_pd if
 necessary

find_energy_efficient_cpu() searches the best energy CPU
to place a task on. To do so, the energy of each performance domain
(pd) is computed w/ and w/o the task placed on it.

The energy of a pd w/o the task (base_energy_pd) is computed prior
knowing whether a CPU is available in the pd.

Move the base_energy_pd computation after looping through the CPUs
of a pd and only compute it if at least one CPU is available.

Suggested-by: Xuewen Yan <xuewen.yan@unisoc.com>
Signed-off-by: Pierre Gondois <Pierre.Gondois@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Reviewed-by: Vincent Donnefort <vincent.donnefort@arm.com>
Link: https://lkml.kernel.org/r/20210504090743.9688-2-Pierre.Gondois@arm.com
---
 kernel/sched/fair.c | 41 ++++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d10c6cc4609c0..b229d0c238066 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6687,13 +6687,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 
 	for (; pd; pd = pd->next) {
 		unsigned long cur_delta, spare_cap, max_spare_cap = 0;
+		bool compute_prev_delta = false;
 		unsigned long base_energy_pd;
 		int max_spare_cap_cpu = -1;
 
-		/* Compute the 'base' energy of the pd, without @p */
-		base_energy_pd = compute_energy(p, -1, pd);
-		base_energy += base_energy_pd;
-
 		for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
 			if (!cpumask_test_cpu(cpu, p->cpus_ptr))
 				continue;
@@ -6714,25 +6711,35 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 			if (!fits_capacity(util, cpu_cap))
 				continue;
 
-			/* Always use prev_cpu as a candidate. */
 			if (cpu == prev_cpu) {
-				prev_delta = compute_energy(p, prev_cpu, pd);
-				prev_delta -= base_energy_pd;
-				best_delta = min(best_delta, prev_delta);
-			}
-
-			/*
-			 * Find the CPU with the maximum spare capacity in
-			 * the performance domain
-			 */
-			if (spare_cap > max_spare_cap) {
+				/* Always use prev_cpu as a candidate. */
+				compute_prev_delta = true;
+			} else if (spare_cap > max_spare_cap) {
+				/*
+				 * Find the CPU with the maximum spare capacity
+				 * in the performance domain.
+				 */
 				max_spare_cap = spare_cap;
 				max_spare_cap_cpu = cpu;
 			}
 		}
 
-		/* Evaluate the energy impact of using this CPU. */
-		if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
+		if (max_spare_cap_cpu < 0 && !compute_prev_delta)
+			continue;
+
+		/* Compute the 'base' energy of the pd, without @p */
+		base_energy_pd = compute_energy(p, -1, pd);
+		base_energy += base_energy_pd;
+
+		/* Evaluate the energy impact of using prev_cpu. */
+		if (compute_prev_delta) {
+			prev_delta = compute_energy(p, prev_cpu, pd);
+			prev_delta -= base_energy_pd;
+			best_delta = min(best_delta, prev_delta);
+		}
+
+		/* Evaluate the energy impact of using max_spare_cap_cpu. */
+		if (max_spare_cap_cpu >= 0) {
 			cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
 			cur_delta -= base_energy_pd;
 			if (cur_delta < best_delta) {
-- 
GitLab


From 619e090c8e409e09bd3e8edcd5a73d83f689890c Mon Sep 17 00:00:00 2001
From: Pierre Gondois <Pierre.Gondois@arm.com>
Date: Tue, 4 May 2021 10:07:43 +0100
Subject: [PATCH 0433/3804] sched/fair: Fix negative energy delta in
 find_energy_efficient_cpu()

find_energy_efficient_cpu() (feec()) searches the best energy CPU
to place a task on. To do so, compute_energy() estimates the energy
impact of placing the task on a CPU, based on CPU and task utilization
signals.

Utilization signals can be concurrently updated while evaluating a
performance domain (pd). In some cases, this leads to having a
'negative delta', i.e. placing the task in the pd is seen as an
energy gain. Thus, any further energy comparison is biased.

In case of a 'negative delta', return prev_cpu since:
1. a 'negative delta' happens in less than 0.5% of feec() calls,
   on a Juno with 6 CPUs (4 little, 2 big)
2. it is unlikely to have two consecutive 'negative delta' for
   a task, so if the first call fails, feec() will correctly
   place the task in the next feec() call
3. EAS current behavior tends to select prev_cpu if the task
   doesn't raise the OPP of its current pd. prev_cpu is EAS's
   generic decision
4. prev_cpu should be preferred to returning an error code.
   In the latter case, select_idle_sibling() would do the placement,
   selecting a big (and not energy efficient) CPU. As 3., the task
   would potentially reside on the big CPU for a long time

Reported-by: Xuewen Yan <xuewen.yan@unisoc.com>
Suggested-by: Xuewen Yan <xuewen.yan@unisoc.com>
Signed-off-by: Pierre Gondois <Pierre.Gondois@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Reviewed-by: Vincent Donnefort <vincent.donnefort@arm.com>
Link: https://lkml.kernel.org/r/20210504090743.9688-3-Pierre.Gondois@arm.com
---
 kernel/sched/fair.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b229d0c238066..c209f68aad612 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6661,15 +6661,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 {
 	unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
 	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+	int cpu, best_energy_cpu = prev_cpu, target = -1;
 	unsigned long cpu_cap, util, base_energy = 0;
-	int cpu, best_energy_cpu = prev_cpu;
 	struct sched_domain *sd;
 	struct perf_domain *pd;
 
 	rcu_read_lock();
 	pd = rcu_dereference(rd->pd);
 	if (!pd || READ_ONCE(rd->overutilized))
-		goto fail;
+		goto unlock;
 
 	/*
 	 * Energy-aware wake-up happens on the lowest sched_domain starting
@@ -6679,7 +6679,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
 		sd = sd->parent;
 	if (!sd)
-		goto fail;
+		goto unlock;
+
+	target = prev_cpu;
 
 	sync_entity_load_avg(&p->se);
 	if (!task_util_est(p))
@@ -6734,6 +6736,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 		/* Evaluate the energy impact of using prev_cpu. */
 		if (compute_prev_delta) {
 			prev_delta = compute_energy(p, prev_cpu, pd);
+			if (prev_delta < base_energy_pd)
+				goto unlock;
 			prev_delta -= base_energy_pd;
 			best_delta = min(best_delta, prev_delta);
 		}
@@ -6741,6 +6745,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 		/* Evaluate the energy impact of using max_spare_cap_cpu. */
 		if (max_spare_cap_cpu >= 0) {
 			cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
+			if (cur_delta < base_energy_pd)
+				goto unlock;
 			cur_delta -= base_energy_pd;
 			if (cur_delta < best_delta) {
 				best_delta = cur_delta;
@@ -6748,25 +6754,22 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 			}
 		}
 	}
-unlock:
 	rcu_read_unlock();
 
 	/*
 	 * Pick the best CPU if prev_cpu cannot be used, or if it saves at
 	 * least 6% of the energy used by prev_cpu.
 	 */
-	if (prev_delta == ULONG_MAX)
-		return best_energy_cpu;
+	if ((prev_delta == ULONG_MAX) ||
+	    (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
+		target = best_energy_cpu;
 
-	if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
-		return best_energy_cpu;
-
-	return prev_cpu;
+	return target;
 
-fail:
+unlock:
 	rcu_read_unlock();
 
-	return -1;
+	return target;
 }
 
 /*
-- 
GitLab


From 4b7a08a0b6e4e910a6feee438d76e426381df0cb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 May 2021 22:43:48 +0200
Subject: [PATCH 0434/3804] delayacct: Use sched_clock()

Like all scheduler statistics, use sched_clock() based time.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Link: https://lkml.kernel.org/r/20210505111525.001031466@infradead.org
---
 kernel/delayacct.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 27725754ac991..3fe7cd52b4593 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -7,9 +7,9 @@
 #include <linux/sched.h>
 #include <linux/sched/task.h>
 #include <linux/sched/cputime.h>
+#include <linux/sched/clock.h>
 #include <linux/slab.h>
 #include <linux/taskstats.h>
-#include <linux/time.h>
 #include <linux/sysctl.h>
 #include <linux/delayacct.h>
 #include <linux/module.h>
@@ -42,10 +42,9 @@ void __delayacct_tsk_init(struct task_struct *tsk)
  * Finish delay accounting for a statistic using its timestamps (@start),
  * accumalator (@total) and @count
  */
-static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total,
-			  u32 *count)
+static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count)
 {
-	s64 ns = ktime_get_ns() - *start;
+	s64 ns = local_clock() - *start;
 	unsigned long flags;
 
 	if (ns > 0) {
@@ -58,7 +57,7 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total,
 
 void __delayacct_blkio_start(void)
 {
-	current->delays->blkio_start = ktime_get_ns();
+	current->delays->blkio_start = local_clock();
 }
 
 /*
@@ -151,21 +150,20 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk)
 
 void __delayacct_freepages_start(void)
 {
-	current->delays->freepages_start = ktime_get_ns();
+	current->delays->freepages_start = local_clock();
 }
 
 void __delayacct_freepages_end(void)
 {
-	delayacct_end(
-		&current->delays->lock,
-		&current->delays->freepages_start,
-		&current->delays->freepages_delay,
-		&current->delays->freepages_count);
+	delayacct_end(&current->delays->lock,
+		      &current->delays->freepages_start,
+		      &current->delays->freepages_delay,
+		      &current->delays->freepages_count);
 }
 
 void __delayacct_thrashing_start(void)
 {
-	current->delays->thrashing_start = ktime_get_ns();
+	current->delays->thrashing_start = local_clock();
 }
 
 void __delayacct_thrashing_end(void)
-- 
GitLab


From 4e29fb709885eda5f0d1fa3418e6ead01a64e46d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 May 2021 22:43:45 +0200
Subject: [PATCH 0435/3804] sched: Rename sched_info_{queued,dequeued}

For consistency, rename {queued,dequeued} to {enqueue,dequeue}.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Link: https://lkml.kernel.org/r/20210505111525.061402904@infradead.org
---
 kernel/sched/core.c  |  4 ++--
 kernel/sched/stats.h | 20 ++++++++++----------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 61d1d85bb93da..660120d0a2ce1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1595,7 +1595,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 		update_rq_clock(rq);
 
 	if (!(flags & ENQUEUE_RESTORE)) {
-		sched_info_queued(rq, p);
+		sched_info_enqueue(rq, p);
 		psi_enqueue(p, flags & ENQUEUE_WAKEUP);
 	}
 
@@ -1609,7 +1609,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 		update_rq_clock(rq);
 
 	if (!(flags & DEQUEUE_SAVE)) {
-		sched_info_dequeued(rq, p);
+		sched_info_dequeue(rq, p);
 		psi_dequeue(p, flags & DEQUEUE_SLEEP);
 	}
 
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index dc218e9f45585..ee7da12a70569 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -25,7 +25,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 }
 
 static inline void
-rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
+rq_sched_info_dequeue(struct rq *rq, unsigned long long delta)
 {
 	if (rq)
 		rq->rq_sched_info.run_delay += delta;
@@ -42,7 +42,7 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
 
 #else /* !CONFIG_SCHEDSTATS: */
 static inline void rq_sched_info_arrive  (struct rq *rq, unsigned long long delta) { }
-static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) { }
+static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { }
 static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delta) { }
 # define   schedstat_enabled()		0
 # define __schedstat_inc(var)		do { } while (0)
@@ -161,7 +161,7 @@ static inline void sched_info_reset_dequeued(struct task_struct *t)
  * from dequeue_task() to account for possible rq->clock skew across CPUs. The
  * delta taken on each CPU would annul the skew.
  */
-static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
+static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
 {
 	unsigned long long now = rq_clock(rq), delta = 0;
 
@@ -172,7 +172,7 @@ static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
 	sched_info_reset_dequeued(t);
 	t->sched_info.run_delay += delta;
 
-	rq_sched_info_dequeued(rq, delta);
+	rq_sched_info_dequeue(rq, delta);
 }
 
 /*
@@ -197,9 +197,9 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
 /*
  * This function is only called from enqueue_task(), but also only updates
  * the timestamp if it is already not set.  It's assumed that
- * sched_info_dequeued() will clear that stamp when appropriate.
+ * sched_info_dequeue() will clear that stamp when appropriate.
  */
-static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
+static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t)
 {
 	if (sched_info_on()) {
 		if (!t->sched_info.last_queued)
@@ -212,7 +212,7 @@ static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
  * due, typically, to expiring its time slice (this may also be called when
  * switching to the idle task).  Now we can calculate how long we ran.
  * Also, if the process is still in the TASK_RUNNING state, call
- * sched_info_queued() to mark that it has now again started waiting on
+ * sched_info_enqueue() to mark that it has now again started waiting on
  * the runqueue.
  */
 static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
@@ -222,7 +222,7 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
 	rq_sched_info_depart(rq, delta);
 
 	if (t->state == TASK_RUNNING)
-		sched_info_queued(rq, t);
+		sched_info_enqueue(rq, t);
 }
 
 /*
@@ -253,9 +253,9 @@ sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *n
 }
 
 #else /* !CONFIG_SCHED_INFO: */
-# define sched_info_queued(rq, t)	do { } while (0)
+# define sched_info_enqueue(rq, t)	do { } while (0)
 # define sched_info_reset_dequeued(t)	do { } while (0)
-# define sched_info_dequeued(rq, t)	do { } while (0)
+# define sched_info_dequeue(rq, t)	do { } while (0)
 # define sched_info_depart(rq, t)	do { } while (0)
 # define sched_info_arrive(rq, next)	do { } while (0)
 # define sched_info_switch(rq, t, next)	do { } while (0)
-- 
GitLab


From c5895d3f06cbb80ccb311f1dcb37074651030cb6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 May 2021 22:43:42 +0200
Subject: [PATCH 0436/3804] sched: Simplify sched_info_on()

The situation around sched_info is somewhat complicated, it is used by
sched_stats and delayacct and, indirectly, kvm.

If SCHEDSTATS=Y (but disabled by default) sched_info_on() is
unconditionally true -- this is the case for all distro kernel configs
I checked.

If for some reason SCHEDSTATS=N, but TASK_DELAY_ACCT=Y, then
sched_info_on() can return false when delayacct is disabled,
presumably because there would be no other users left; except kvm is.

Instead of complicating matters further by accurately accounting
sched_stat and kvm state, simply unconditionally enable when
SCHED_INFO=Y, matching the common distro case.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lkml.kernel.org/r/20210505111525.121458839@infradead.org
---
 include/linux/sched/stat.h | 10 ++--------
 kernel/delayacct.c         |  1 -
 kernel/sched/stats.h       | 37 ++++++++++---------------------------
 3 files changed, 12 insertions(+), 36 deletions(-)

diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 568286411b43a..939c3ec9e1b90 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -3,6 +3,7 @@
 #define _LINUX_SCHED_STAT_H
 
 #include <linux/percpu.h>
+#include <linux/kconfig.h>
 
 /*
  * Various counters maintained by the scheduler and fork(),
@@ -23,14 +24,7 @@ extern unsigned long nr_iowait_cpu(int cpu);
 
 static inline int sched_info_on(void)
 {
-#ifdef CONFIG_SCHEDSTATS
-	return 1;
-#elif defined(CONFIG_TASK_DELAY_ACCT)
-	extern int delayacct_on;
-	return delayacct_on;
-#else
-	return 0;
-#endif
+	return IS_ENABLED(CONFIG_SCHED_INFO);
 }
 
 #ifdef CONFIG_SCHEDSTATS
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 3fe7cd52b4593..3a0b910386d15 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -15,7 +15,6 @@
 #include <linux/module.h>
 
 int delayacct_on __read_mostly = 1;	/* Delay accounting turned on/off */
-EXPORT_SYMBOL_GPL(delayacct_on);
 struct kmem_cache *delayacct_cache;
 
 static int __init delayacct_setup_disable(char *str)
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index ee7da12a70569..33ffd41935bab 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -150,11 +150,6 @@ static inline void psi_sched_switch(struct task_struct *prev,
 #endif /* CONFIG_PSI */
 
 #ifdef CONFIG_SCHED_INFO
-static inline void sched_info_reset_dequeued(struct task_struct *t)
-{
-	t->sched_info.last_queued = 0;
-}
-
 /*
  * We are interested in knowing how long it was from the *first* time a
  * task was queued to the time that it finally hit a CPU, we call this routine
@@ -163,13 +158,12 @@ static inline void sched_info_reset_dequeued(struct task_struct *t)
  */
 static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
 {
-	unsigned long long now = rq_clock(rq), delta = 0;
+	unsigned long long delta = 0;
 
-	if (sched_info_on()) {
-		if (t->sched_info.last_queued)
-			delta = now - t->sched_info.last_queued;
+	if (t->sched_info.last_queued) {
+		delta = rq_clock(rq) - t->sched_info.last_queued;
+		t->sched_info.last_queued = 0;
 	}
-	sched_info_reset_dequeued(t);
 	t->sched_info.run_delay += delta;
 
 	rq_sched_info_dequeue(rq, delta);
@@ -184,9 +178,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
 {
 	unsigned long long now = rq_clock(rq), delta = 0;
 
-	if (t->sched_info.last_queued)
+	if (t->sched_info.last_queued) {
 		delta = now - t->sched_info.last_queued;
-	sched_info_reset_dequeued(t);
+		t->sched_info.last_queued = 0;
+	}
 	t->sched_info.run_delay += delta;
 	t->sched_info.last_arrival = now;
 	t->sched_info.pcount++;
@@ -201,10 +196,8 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
  */
 static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t)
 {
-	if (sched_info_on()) {
-		if (!t->sched_info.last_queued)
-			t->sched_info.last_queued = rq_clock(rq);
-	}
+	if (!t->sched_info.last_queued)
+		t->sched_info.last_queued = rq_clock(rq);
 }
 
 /*
@@ -231,7 +224,7 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
  * the idle task.)  We are only called when prev != next.
  */
 static inline void
-__sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
+sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
 {
 	/*
 	 * prev now departs the CPU.  It's not interesting to record
@@ -245,18 +238,8 @@ __sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct
 		sched_info_arrive(rq, next);
 }
 
-static inline void
-sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
-{
-	if (sched_info_on())
-		__sched_info_switch(rq, prev, next);
-}
-
 #else /* !CONFIG_SCHED_INFO: */
 # define sched_info_enqueue(rq, t)	do { } while (0)
-# define sched_info_reset_dequeued(t)	do { } while (0)
 # define sched_info_dequeue(rq, t)	do { } while (0)
-# define sched_info_depart(rq, t)	do { } while (0)
-# define sched_info_arrive(rq, next)	do { } while (0)
 # define sched_info_switch(rq, t, next)	do { } while (0)
 #endif /* CONFIG_SCHED_INFO */
-- 
GitLab


From 63b3f96e1a989846a5a521d4fbef4bc86406929d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 May 2021 22:43:39 +0200
Subject: [PATCH 0437/3804] kvm: Select SCHED_INFO instead of TASK_DELAY_ACCT

AFAICT KVM only relies on SCHED_INFO. Nothing uses the p->delays data
that belongs to TASK_DELAY_ACCT.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Link: https://lkml.kernel.org/r/20210505111525.187225172@infradead.org
---
 arch/arm64/kvm/Kconfig | 5 +----
 arch/x86/kvm/Kconfig   | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 3964acf5451ea..a4eba0908bfac 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -20,8 +20,6 @@ if VIRTUALIZATION
 menuconfig KVM
 	bool "Kernel-based Virtual Machine (KVM) support"
 	depends on OF
-	# for TASKSTATS/TASK_DELAY_ACCT:
-	depends on NET && MULTIUSER
 	select MMU_NOTIFIER
 	select PREEMPT_NOTIFIERS
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -38,8 +36,7 @@ menuconfig KVM
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
 	select HAVE_KVM_VCPU_RUN_PID_CHANGE
-	select TASKSTATS
-	select TASK_DELAY_ACCT
+	select SCHED_INFO
 	help
 	  Support hosting virtualized guest machines.
 
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index f6b93a35ce145..fb8efb387aff5 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,8 +22,6 @@ config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
 	depends on HIGH_RES_TIMERS
-	# for TASKSTATS/TASK_DELAY_ACCT:
-	depends on NET && MULTIUSER
 	depends on X86_LOCAL_APIC
 	select PREEMPT_NOTIFIERS
 	select MMU_NOTIFIER
@@ -36,8 +34,7 @@ config KVM
 	select KVM_ASYNC_PF
 	select USER_RETURN_NOTIFIER
 	select KVM_MMIO
-	select TASKSTATS
-	select TASK_DELAY_ACCT
+	select SCHED_INFO
 	select PERF_EVENTS
 	select HAVE_KVM_MSI
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
-- 
GitLab


From eee4d9fee2544389e5ce5697ed92db67c86d7a9f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 May 2021 22:43:36 +0200
Subject: [PATCH 0438/3804] delayacct: Add static_branch in scheduler hooks

Cheaper when delayacct is disabled.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lkml.kernel.org/r/20210505111525.248028369@infradead.org
---
 include/linux/delayacct.h | 8 ++++++++
 kernel/delayacct.c        | 3 +++
 2 files changed, 11 insertions(+)

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 21651f9467515..57fefa54b53a0 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -58,8 +58,10 @@ struct task_delay_info {
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/jump_label.h>
 
 #ifdef CONFIG_TASK_DELAY_ACCT
+DECLARE_STATIC_KEY_TRUE(delayacct_key);
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
@@ -114,6 +116,9 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
 
 static inline void delayacct_blkio_start(void)
 {
+	if (!static_branch_likely(&delayacct_key))
+		return;
+
 	delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
 	if (current->delays)
 		__delayacct_blkio_start();
@@ -121,6 +126,9 @@ static inline void delayacct_blkio_start(void)
 
 static inline void delayacct_blkio_end(struct task_struct *p)
 {
+	if (!static_branch_likely(&delayacct_key))
+		return;
+
 	if (p->delays)
 		__delayacct_blkio_end(p);
 	delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 3a0b910386d15..63012fd39ae23 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -14,6 +14,7 @@
 #include <linux/delayacct.h>
 #include <linux/module.h>
 
+DEFINE_STATIC_KEY_TRUE(delayacct_key);
 int delayacct_on __read_mostly = 1;	/* Delay accounting turned on/off */
 struct kmem_cache *delayacct_cache;
 
@@ -28,6 +29,8 @@ void delayacct_init(void)
 {
 	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
 	delayacct_tsk_init(&init_task);
+	if (!delayacct_on)
+		static_branch_disable(&delayacct_key);
 }
 
 void __delayacct_tsk_init(struct task_struct *tsk)
-- 
GitLab


From e4042ad492357fa995921376462b04a025dd53b6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 4 May 2021 22:43:32 +0200
Subject: [PATCH 0439/3804] delayacct: Default disabled

Assuming this stuff isn't actually used much; disable it by default
and avoid allocating and tracking the task_delay_info structure.

taskstats is changed to still report the regular sched and sched_info
and only skip the missing task_delay_info fields instead of not
reporting anything.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Link: https://lkml.kernel.org/r/20210505111525.308018373@infradead.org
---
 Documentation/accounting/delay-accounting.rst |  8 ++++----
 .../admin-guide/kernel-parameters.txt         |  2 +-
 include/linux/delayacct.h                     | 16 ++++------------
 kernel/delayacct.c                            | 19 +++++++++++--------
 4 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index 7cc7f5852da0f..f20b282d6de08 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -69,13 +69,13 @@ Compile the kernel with::
 	CONFIG_TASK_DELAY_ACCT=y
 	CONFIG_TASKSTATS=y
 
-Delay accounting is enabled by default at boot up.
-To disable, add::
+Delay accounting is disabled by default at boot up.
+To enable, add::
 
-   nodelayacct
+   delayacct
 
 to the kernel boot options. The rest of the instructions
-below assume this has not been done.
+below assume this has been done.
 
 After the system has booted up, use a utility
 similar to  getdelays.c to access the delays
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb89dbdedc463..ef5048c127a3c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3244,7 +3244,7 @@
 
 	noclflush	[BUGS=X86] Don't use the CLFLUSH instruction
 
-	nodelayacct	[KNL] Disable per-task delay accounting
+	delayacct	[KNL] Enable per-task delay accounting
 
 	nodsp		[SH] Disable hardware DSP at boot time.
 
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 57fefa54b53a0..225c8e01a1117 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -61,7 +61,7 @@ struct task_delay_info {
 #include <linux/jump_label.h>
 
 #ifdef CONFIG_TASK_DELAY_ACCT
-DECLARE_STATIC_KEY_TRUE(delayacct_key);
+DECLARE_STATIC_KEY_FALSE(delayacct_key);
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
@@ -69,7 +69,7 @@ extern void __delayacct_tsk_init(struct task_struct *);
 extern void __delayacct_tsk_exit(struct task_struct *);
 extern void __delayacct_blkio_start(void);
 extern void __delayacct_blkio_end(struct task_struct *);
-extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
+extern int delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 extern void __delayacct_freepages_start(void);
 extern void __delayacct_freepages_end(void);
@@ -116,7 +116,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk)
 
 static inline void delayacct_blkio_start(void)
 {
-	if (!static_branch_likely(&delayacct_key))
+	if (!static_branch_unlikely(&delayacct_key))
 		return;
 
 	delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
@@ -126,7 +126,7 @@ static inline void delayacct_blkio_start(void)
 
 static inline void delayacct_blkio_end(struct task_struct *p)
 {
-	if (!static_branch_likely(&delayacct_key))
+	if (!static_branch_unlikely(&delayacct_key))
 		return;
 
 	if (p->delays)
@@ -134,14 +134,6 @@ static inline void delayacct_blkio_end(struct task_struct *p)
 	delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
 }
 
-static inline int delayacct_add_tsk(struct taskstats *d,
-					struct task_struct *tsk)
-{
-	if (!delayacct_on || !tsk->delays)
-		return 0;
-	return __delayacct_add_tsk(d, tsk);
-}
-
 static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk)
 {
 	if (tsk->delays)
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 63012fd39ae23..3f086903b2954 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -14,23 +14,23 @@
 #include <linux/delayacct.h>
 #include <linux/module.h>
 
-DEFINE_STATIC_KEY_TRUE(delayacct_key);
-int delayacct_on __read_mostly = 1;	/* Delay accounting turned on/off */
+DEFINE_STATIC_KEY_FALSE(delayacct_key);
+int delayacct_on __read_mostly;	/* Delay accounting turned on/off */
 struct kmem_cache *delayacct_cache;
 
-static int __init delayacct_setup_disable(char *str)
+static int __init delayacct_setup_enable(char *str)
 {
-	delayacct_on = 0;
+	delayacct_on = 1;
 	return 1;
 }
-__setup("nodelayacct", delayacct_setup_disable);
+__setup("delayacct", delayacct_setup_enable);
 
 void delayacct_init(void)
 {
 	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
 	delayacct_tsk_init(&init_task);
-	if (!delayacct_on)
-		static_branch_disable(&delayacct_key);
+	if (delayacct_on)
+		static_branch_enable(&delayacct_key);
 }
 
 void __delayacct_tsk_init(struct task_struct *tsk)
@@ -83,7 +83,7 @@ void __delayacct_blkio_end(struct task_struct *p)
 	delayacct_end(&delays->lock, &delays->blkio_start, total, count);
 }
 
-int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
+int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 {
 	u64 utime, stime, stimescaled, utimescaled;
 	unsigned long long t2, t3;
@@ -118,6 +118,9 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	d->cpu_run_virtual_total =
 		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
 
+	if (!tsk->delays)
+		return 0;
+
 	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
 
 	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
-- 
GitLab


From 0cd7c741f01de13dc1eecf22557593b3514639bb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 10 May 2021 14:01:00 +0200
Subject: [PATCH 0440/3804] delayacct: Add sysctl to enable at runtime

Just like sched_schedstats, allow runtime enabling (and disabling) of
delayacct. This is useful if one forgot to add the delayacct boot time
option.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/YJkhebGJAywaZowX@hirez.programming.kicks-ass.net
---
 Documentation/accounting/delay-accounting.rst |  6 ++--
 include/linux/delayacct.h                     |  4 +++
 kernel/delayacct.c                            | 36 +++++++++++++++++--
 kernel/sysctl.c                               | 12 +++++++
 4 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst
index f20b282d6de08..1b8b46deeb299 100644
--- a/Documentation/accounting/delay-accounting.rst
+++ b/Documentation/accounting/delay-accounting.rst
@@ -74,8 +74,10 @@ To enable, add::
 
    delayacct
 
-to the kernel boot options. The rest of the instructions
-below assume this has been done.
+to the kernel boot options. The rest of the instructions below assume this has
+been done. Alternatively, use sysctl kernel.task_delayacct to switch the state
+at runtime. Note however that only tasks started after enabling it will have
+delayacct information.
 
 After the system has booted up, use a utility
 similar to  getdelays.c to access the delays
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 225c8e01a1117..af7e6eb502837 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -65,6 +65,10 @@ DECLARE_STATIC_KEY_FALSE(delayacct_key);
 extern int delayacct_on;	/* Delay accounting turned on/off */
 extern struct kmem_cache *delayacct_cache;
 extern void delayacct_init(void);
+
+extern int sysctl_delayacct(struct ctl_table *table, int write, void *buffer,
+			    size_t *lenp, loff_t *ppos);
+
 extern void __delayacct_tsk_init(struct task_struct *);
 extern void __delayacct_tsk_exit(struct task_struct *);
 extern void __delayacct_blkio_start(void);
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 3f086903b2954..51530d5b15a8a 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -18,6 +18,17 @@ DEFINE_STATIC_KEY_FALSE(delayacct_key);
 int delayacct_on __read_mostly;	/* Delay accounting turned on/off */
 struct kmem_cache *delayacct_cache;
 
+static void set_delayacct(bool enabled)
+{
+	if (enabled) {
+		static_branch_enable(&delayacct_key);
+		delayacct_on = 1;
+	} else {
+		delayacct_on = 0;
+		static_branch_disable(&delayacct_key);
+	}
+}
+
 static int __init delayacct_setup_enable(char *str)
 {
 	delayacct_on = 1;
@@ -29,9 +40,30 @@ void delayacct_init(void)
 {
 	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
 	delayacct_tsk_init(&init_task);
-	if (delayacct_on)
-		static_branch_enable(&delayacct_key);
+	set_delayacct(delayacct_on);
+}
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_delayacct(struct ctl_table *table, int write, void *buffer,
+		     size_t *lenp, loff_t *ppos)
+{
+	int state = delayacct_on;
+	struct ctl_table t;
+	int err;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	t = *table;
+	t.data = &state;
+	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+	if (err < 0)
+		return err;
+	if (write)
+		set_delayacct(state);
+	return err;
 }
+#endif
 
 void __delayacct_tsk_init(struct task_struct *tsk)
 {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 14edf84cc571f..0afbfc83157a4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -71,6 +71,7 @@
 #include <linux/coredump.h>
 #include <linux/latencytop.h>
 #include <linux/pid.h>
+#include <linux/delayacct.h>
 
 #include "../lib/kstrtox.h"
 
@@ -1727,6 +1728,17 @@ static struct ctl_table kern_table[] = {
 		.extra2		= SYSCTL_ONE,
 	},
 #endif /* CONFIG_SCHEDSTATS */
+#ifdef CONFIG_TASK_DELAY_ACCT
+	{
+		.procname	= "task_delayacct",
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_delayacct,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_TASK_DELAY_ACCT */
 #ifdef CONFIG_NUMA_BALANCING
 	{
 		.procname	= "numa_balancing",
-- 
GitLab


From 9099a14708ce1dfecb6002605594a0daa319b555 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 18:19:35 -0500
Subject: [PATCH 0441/3804] sched/fair: Add a few assertions

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.015639083@infradead.org
---
 kernel/sched/fair.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c209f68aad612..6bdbb7bb0d66d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6288,6 +6288,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 		task_util = uclamp_task_util(p);
 	}
 
+	/*
+	 * per-cpu select_idle_mask usage
+	 */
+	lockdep_assert_irqs_disabled();
+
 	if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
 	    asym_fits_capacity(task_util, target))
 		return target;
@@ -6781,8 +6786,6 @@ unlock:
  * certain conditions an idle sibling CPU if the domain has SD_WAKE_AFFINE set.
  *
  * Returns the target CPU number.
- *
- * preempt must be disabled.
  */
 static int
 select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
@@ -6795,6 +6798,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 	/* SD_flags and WF_flags share the first nibble */
 	int sd_flag = wake_flags & 0xF;
 
+	/*
+	 * required for stable ->cpus_allowed
+	 */
+	lockdep_assert_held(&p->pi_lock);
 	if (wake_flags & WF_TTWU) {
 		record_wakee(p);
 
-- 
GitLab


From 39d371b7c0c299d489041884d005aacc4bba8c15 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 2 Mar 2021 12:13:13 +0100
Subject: [PATCH 0442/3804] sched: Provide raw_spin_rq_*lock*() helpers

In prepration for playing games with rq->lock, add some rq_lock
wrappers.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.075967879@infradead.org
---
 kernel/sched/core.c  | 15 +++++++++++++
 kernel/sched/sched.h | 50 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 660120d0a2ce1..5568018222d9c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -184,6 +184,21 @@ int sysctl_sched_rt_runtime = 950000;
  *
  */
 
+void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
+{
+	raw_spin_lock_nested(rq_lockp(rq), subclass);
+}
+
+bool raw_spin_rq_trylock(struct rq *rq)
+{
+	return raw_spin_trylock(rq_lockp(rq));
+}
+
+void raw_spin_rq_unlock(struct rq *rq)
+{
+	raw_spin_unlock(rq_lockp(rq));
+}
+
 /*
  * __task_rq_lock - lock the rq @p resides on.
  */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a189bec137291..f654587106355 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1113,6 +1113,56 @@ static inline bool is_migration_disabled(struct task_struct *p)
 #endif
 }
 
+static inline raw_spinlock_t *rq_lockp(struct rq *rq)
+{
+	return &rq->lock;
+}
+
+static inline void lockdep_assert_rq_held(struct rq *rq)
+{
+	lockdep_assert_held(rq_lockp(rq));
+}
+
+extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
+extern bool raw_spin_rq_trylock(struct rq *rq);
+extern void raw_spin_rq_unlock(struct rq *rq);
+
+static inline void raw_spin_rq_lock(struct rq *rq)
+{
+	raw_spin_rq_lock_nested(rq, 0);
+}
+
+static inline void raw_spin_rq_lock_irq(struct rq *rq)
+{
+	local_irq_disable();
+	raw_spin_rq_lock(rq);
+}
+
+static inline void raw_spin_rq_unlock_irq(struct rq *rq)
+{
+	raw_spin_rq_unlock(rq);
+	local_irq_enable();
+}
+
+static inline unsigned long _raw_spin_rq_lock_irqsave(struct rq *rq)
+{
+	unsigned long flags;
+	local_irq_save(flags);
+	raw_spin_rq_lock(rq);
+	return flags;
+}
+
+static inline void raw_spin_rq_unlock_irqrestore(struct rq *rq, unsigned long flags)
+{
+	raw_spin_rq_unlock(rq);
+	local_irq_restore(flags);
+}
+
+#define raw_spin_rq_lock_irqsave(rq, flags)	\
+do {						\
+	flags = _raw_spin_rq_lock_irqsave(rq);	\
+} while (0)
+
 #ifdef CONFIG_SCHED_SMT
 extern void __update_idle_core(struct rq *rq);
 
-- 
GitLab


From 5cb9eaa3d274f75539077a28cf01e3563195fa53 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 18:19:31 -0500
Subject: [PATCH 0443/3804] sched: Wrap rq::lock access

In preparation of playing games with rq->lock, abstract the thing
using an accessor.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.136465446@infradead.org
---
 kernel/sched/core.c     |  70 +++++++++++++--------------
 kernel/sched/cpuacct.c  |  12 ++---
 kernel/sched/deadline.c |  22 ++++-----
 kernel/sched/debug.c    |   4 +-
 kernel/sched/fair.c     |  35 +++++++-------
 kernel/sched/idle.c     |   4 +-
 kernel/sched/pelt.h     |   2 +-
 kernel/sched/rt.c       |  16 +++---
 kernel/sched/sched.h    | 105 ++++++++++++++++++++--------------------
 kernel/sched/topology.c |   4 +-
 10 files changed, 136 insertions(+), 138 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5568018222d9c..5e6f5f5750a32 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -211,12 +211,12 @@ struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
 
 	for (;;) {
 		rq = task_rq(p);
-		raw_spin_lock(&rq->lock);
+		raw_spin_rq_lock(rq);
 		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
 			rq_pin_lock(rq, rf);
 			return rq;
 		}
-		raw_spin_unlock(&rq->lock);
+		raw_spin_rq_unlock(rq);
 
 		while (unlikely(task_on_rq_migrating(p)))
 			cpu_relax();
@@ -235,7 +235,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
 	for (;;) {
 		raw_spin_lock_irqsave(&p->pi_lock, rf->flags);
 		rq = task_rq(p);
-		raw_spin_lock(&rq->lock);
+		raw_spin_rq_lock(rq);
 		/*
 		 *	move_queued_task()		task_rq_lock()
 		 *
@@ -257,7 +257,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
 			rq_pin_lock(rq, rf);
 			return rq;
 		}
-		raw_spin_unlock(&rq->lock);
+		raw_spin_rq_unlock(rq);
 		raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
 
 		while (unlikely(task_on_rq_migrating(p)))
@@ -327,7 +327,7 @@ void update_rq_clock(struct rq *rq)
 {
 	s64 delta;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	if (rq->clock_update_flags & RQCF_ACT_SKIP)
 		return;
@@ -625,7 +625,7 @@ void resched_curr(struct rq *rq)
 	struct task_struct *curr = rq->curr;
 	int cpu;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	if (test_tsk_need_resched(curr))
 		return;
@@ -649,10 +649,10 @@ void resched_cpu(int cpu)
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&rq->lock, flags);
+	raw_spin_rq_lock_irqsave(rq, flags);
 	if (cpu_online(cpu) || cpu == smp_processor_id())
 		resched_curr(rq);
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
+	raw_spin_rq_unlock_irqrestore(rq, flags);
 }
 
 #ifdef CONFIG_SMP
@@ -1151,7 +1151,7 @@ static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p,
 	struct uclamp_se *uc_se = &p->uclamp[clamp_id];
 	struct uclamp_bucket *bucket;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	/* Update task effective clamp */
 	p->uclamp[clamp_id] = uclamp_eff_get(p, clamp_id);
@@ -1191,7 +1191,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
 	unsigned int bkt_clamp;
 	unsigned int rq_clamp;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	/*
 	 * If sched_uclamp_used was enabled after task @p was enqueued,
@@ -1864,7 +1864,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
 static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
 				   struct task_struct *p, int new_cpu)
 {
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	deactivate_task(rq, p, DEQUEUE_NOCLOCK);
 	set_task_cpu(p, new_cpu);
@@ -2038,7 +2038,7 @@ int push_cpu_stop(void *arg)
 	struct task_struct *p = arg;
 
 	raw_spin_lock_irq(&p->pi_lock);
-	raw_spin_lock(&rq->lock);
+	raw_spin_rq_lock(rq);
 
 	if (task_rq(p) != rq)
 		goto out_unlock;
@@ -2068,7 +2068,7 @@ int push_cpu_stop(void *arg)
 
 out_unlock:
 	rq->push_busy = false;
-	raw_spin_unlock(&rq->lock);
+	raw_spin_rq_unlock(rq);
 	raw_spin_unlock_irq(&p->pi_lock);
 
 	put_task_struct(p);
@@ -2121,7 +2121,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32
 		 * Because __kthread_bind() calls this on blocked tasks without
 		 * holding rq->lock.
 		 */
-		lockdep_assert_held(&rq->lock);
+		lockdep_assert_rq_held(rq);
 		dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
 	}
 	if (running)
@@ -2462,7 +2462,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	 * task_rq_lock().
 	 */
 	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-				      lockdep_is_held(&task_rq(p)->lock)));
+				      lockdep_is_held(rq_lockp(task_rq(p)))));
 #endif
 	/*
 	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
@@ -3004,7 +3004,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
 {
 	int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	if (p->sched_contributes_to_load)
 		rq->nr_uninterruptible--;
@@ -4015,7 +4015,7 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head)
 	void (*func)(struct rq *rq);
 	struct callback_head *next;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	while (head) {
 		func = (void (*)(struct rq *))head->func;
@@ -4038,7 +4038,7 @@ static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
 {
 	struct callback_head *head = rq->balance_callback;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 	if (head)
 		rq->balance_callback = NULL;
 
@@ -4055,9 +4055,9 @@ static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
 	unsigned long flags;
 
 	if (unlikely(head)) {
-		raw_spin_lock_irqsave(&rq->lock, flags);
+		raw_spin_rq_lock_irqsave(rq, flags);
 		do_balance_callbacks(rq, head);
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
+		raw_spin_rq_unlock_irqrestore(rq, flags);
 	}
 }
 
@@ -4088,10 +4088,10 @@ prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf
 	 * do an early lockdep release here:
 	 */
 	rq_unpin_lock(rq, rf);
-	spin_release(&rq->lock.dep_map, _THIS_IP_);
+	spin_release(&rq_lockp(rq)->dep_map, _THIS_IP_);
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
-	rq->lock.owner = next;
+	rq_lockp(rq)->owner = next;
 #endif
 }
 
@@ -4102,9 +4102,9 @@ static inline void finish_lock_switch(struct rq *rq)
 	 * fix up the runqueue lock - which gets 'carried over' from
 	 * prev into current:
 	 */
-	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+	spin_acquire(&rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_);
 	__balance_callbacks(rq);
-	raw_spin_unlock_irq(&rq->lock);
+	raw_spin_rq_unlock_irq(rq);
 }
 
 /*
@@ -5164,7 +5164,7 @@ static void __sched notrace __schedule(bool preempt)
 
 		rq_unpin_lock(rq, &rf);
 		__balance_callbacks(rq);
-		raw_spin_unlock_irq(&rq->lock);
+		raw_spin_rq_unlock_irq(rq);
 	}
 }
 
@@ -5706,7 +5706,7 @@ out_unlock:
 
 	rq_unpin_lock(rq, &rf);
 	__balance_callbacks(rq);
-	raw_spin_unlock(&rq->lock);
+	raw_spin_rq_unlock(rq);
 
 	preempt_enable();
 }
@@ -7456,7 +7456,7 @@ void init_idle(struct task_struct *idle, int cpu)
 	__sched_fork(0, idle);
 
 	raw_spin_lock_irqsave(&idle->pi_lock, flags);
-	raw_spin_lock(&rq->lock);
+	raw_spin_rq_lock(rq);
 
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
@@ -7494,7 +7494,7 @@ void init_idle(struct task_struct *idle, int cpu)
 #ifdef CONFIG_SMP
 	idle->on_cpu = 1;
 #endif
-	raw_spin_unlock(&rq->lock);
+	raw_spin_rq_unlock(rq);
 	raw_spin_unlock_irqrestore(&idle->pi_lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */
@@ -7660,7 +7660,7 @@ static void balance_push(struct rq *rq)
 {
 	struct task_struct *push_task = rq->curr;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 	SCHED_WARN_ON(rq->cpu != smp_processor_id());
 
 	/*
@@ -7698,9 +7698,9 @@ static void balance_push(struct rq *rq)
 		 */
 		if (!rq->nr_running && !rq_has_pinned_tasks(rq) &&
 		    rcuwait_active(&rq->hotplug_wait)) {
-			raw_spin_unlock(&rq->lock);
+			raw_spin_rq_unlock(rq);
 			rcuwait_wake_up(&rq->hotplug_wait);
-			raw_spin_lock(&rq->lock);
+			raw_spin_rq_lock(rq);
 		}
 		return;
 	}
@@ -7710,7 +7710,7 @@ static void balance_push(struct rq *rq)
 	 * Temporarily drop rq->lock such that we can wake-up the stop task.
 	 * Both preemption and IRQs are still disabled.
 	 */
-	raw_spin_unlock(&rq->lock);
+	raw_spin_rq_unlock(rq);
 	stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task,
 			    this_cpu_ptr(&push_work));
 	/*
@@ -7718,7 +7718,7 @@ static void balance_push(struct rq *rq)
 	 * schedule(). The next pick is obviously going to be the stop task
 	 * which kthread_is_per_cpu() and will push this task away.
 	 */
-	raw_spin_lock(&rq->lock);
+	raw_spin_rq_lock(rq);
 }
 
 static void balance_push_set(int cpu, bool on)
@@ -8008,7 +8008,7 @@ static void dump_rq_tasks(struct rq *rq, const char *loglvl)
 	struct task_struct *g, *p;
 	int cpu = cpu_of(rq);
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	printk("%sCPU%d enqueued tasks (%u total):\n", loglvl, cpu, rq->nr_running);
 	for_each_process_thread(g, p) {
@@ -8181,7 +8181,7 @@ void __init sched_init(void)
 		struct rq *rq;
 
 		rq = cpu_rq(i);
-		raw_spin_lock_init(&rq->lock);
+		raw_spin_lock_init(&rq->__lock);
 		rq->nr_running = 0;
 		rq->calc_load_active = 0;
 		rq->calc_load_update = jiffies + LOAD_FREQ;
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 104a1bade14f8..893eece65bfda 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -112,7 +112,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
 	/*
 	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
 	 */
-	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
+	raw_spin_rq_lock_irq(cpu_rq(cpu));
 #endif
 
 	if (index == CPUACCT_STAT_NSTATS) {
@@ -126,7 +126,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
 	}
 
 #ifndef CONFIG_64BIT
-	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
+	raw_spin_rq_unlock_irq(cpu_rq(cpu));
 #endif
 
 	return data;
@@ -141,14 +141,14 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
 	/*
 	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
 	 */
-	raw_spin_lock_irq(&cpu_rq(cpu)->lock);
+	raw_spin_rq_lock_irq(cpu_rq(cpu));
 #endif
 
 	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
 		cpuusage->usages[i] = val;
 
 #ifndef CONFIG_64BIT
-	raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
+	raw_spin_rq_unlock_irq(cpu_rq(cpu));
 #endif
 }
 
@@ -253,13 +253,13 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V)
 			 * Take rq->lock to make 64-bit read safe on 32-bit
 			 * platforms.
 			 */
-			raw_spin_lock_irq(&cpu_rq(cpu)->lock);
+			raw_spin_rq_lock_irq(cpu_rq(cpu));
 #endif
 
 			seq_printf(m, " %llu", cpuusage->usages[index]);
 
 #ifndef CONFIG_64BIT
-			raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
+			raw_spin_rq_unlock_irq(cpu_rq(cpu));
 #endif
 		}
 		seq_puts(m, "\n");
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9a2989749b8d1..6e99b8b37c8c8 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -157,7 +157,7 @@ void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
 {
 	u64 old = dl_rq->running_bw;
 
-	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
 	dl_rq->running_bw += dl_bw;
 	SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
 	SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
@@ -170,7 +170,7 @@ void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
 {
 	u64 old = dl_rq->running_bw;
 
-	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
 	dl_rq->running_bw -= dl_bw;
 	SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
 	if (dl_rq->running_bw > old)
@@ -184,7 +184,7 @@ void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
 {
 	u64 old = dl_rq->this_bw;
 
-	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
 	dl_rq->this_bw += dl_bw;
 	SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
 }
@@ -194,7 +194,7 @@ void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
 {
 	u64 old = dl_rq->this_bw;
 
-	lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock);
+	lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
 	dl_rq->this_bw -= dl_bw;
 	SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
 	if (dl_rq->this_bw > old)
@@ -987,7 +987,7 @@ static int start_dl_timer(struct task_struct *p)
 	ktime_t now, act;
 	s64 delta;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	/*
 	 * We want the timer to fire at the deadline, but considering
@@ -1097,9 +1097,9 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 		 * If the runqueue is no longer available, migrate the
 		 * task elsewhere. This necessarily changes rq.
 		 */
-		lockdep_unpin_lock(&rq->lock, rf.cookie);
+		lockdep_unpin_lock(rq_lockp(rq), rf.cookie);
 		rq = dl_task_offline_migration(rq, p);
-		rf.cookie = lockdep_pin_lock(&rq->lock);
+		rf.cookie = lockdep_pin_lock(rq_lockp(rq));
 		update_rq_clock(rq);
 
 		/*
@@ -1731,7 +1731,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
 	 * from try_to_wake_up(). Hence, p->pi_lock is locked, but
 	 * rq->lock is not... So, lock it
 	 */
-	raw_spin_lock(&rq->lock);
+	raw_spin_rq_lock(rq);
 	if (p->dl.dl_non_contending) {
 		sub_running_bw(&p->dl, &rq->dl);
 		p->dl.dl_non_contending = 0;
@@ -1746,7 +1746,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
 			put_task_struct(p);
 	}
 	sub_rq_bw(&p->dl, &rq->dl);
-	raw_spin_unlock(&rq->lock);
+	raw_spin_rq_unlock(rq);
 }
 
 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
@@ -2291,10 +2291,10 @@ skip:
 		double_unlock_balance(this_rq, src_rq);
 
 		if (push_task) {
-			raw_spin_unlock(&this_rq->lock);
+			raw_spin_rq_unlock(this_rq);
 			stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
 					    push_task, &src_rq->push_work);
-			raw_spin_lock(&this_rq->lock);
+			raw_spin_rq_lock(this_rq);
 		}
 	}
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 9c882f20803e0..3bdee5fd7d292 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -576,7 +576,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
 			SPLIT_NS(cfs_rq->exec_clock));
 
-	raw_spin_lock_irqsave(&rq->lock, flags);
+	raw_spin_rq_lock_irqsave(rq, flags);
 	if (rb_first_cached(&cfs_rq->tasks_timeline))
 		MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
 	last = __pick_last_entity(cfs_rq);
@@ -584,7 +584,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 		max_vruntime = last->vruntime;
 	min_vruntime = cfs_rq->min_vruntime;
 	rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
+	raw_spin_rq_unlock_irqrestore(rq, flags);
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
 			SPLIT_NS(MIN_vruntime));
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6bdbb7bb0d66d..e50bd75067d58 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1107,7 +1107,7 @@ struct numa_group {
 static struct numa_group *deref_task_numa_group(struct task_struct *p)
 {
 	return rcu_dereference_check(p->numa_group, p == current ||
-		(lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
+		(lockdep_is_held(rq_lockp(task_rq(p))) && !READ_ONCE(p->on_cpu)));
 }
 
 static struct numa_group *deref_curr_numa_group(struct task_struct *p)
@@ -5328,7 +5328,7 @@ static void __maybe_unused update_runtime_enabled(struct rq *rq)
 {
 	struct task_group *tg;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -5347,7 +5347,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 {
 	struct task_group *tg;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -6891,7 +6891,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 		 * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
 		 * rq->lock and can modify state directly.
 		 */
-		lockdep_assert_held(&task_rq(p)->lock);
+		lockdep_assert_rq_held(task_rq(p));
 		detach_entity_cfs_rq(&p->se);
 
 	} else {
@@ -7518,7 +7518,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
 {
 	s64 delta;
 
-	lockdep_assert_held(&env->src_rq->lock);
+	lockdep_assert_rq_held(env->src_rq);
 
 	if (p->sched_class != &fair_sched_class)
 		return 0;
@@ -7616,7 +7616,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 {
 	int tsk_cache_hot;
 
-	lockdep_assert_held(&env->src_rq->lock);
+	lockdep_assert_rq_held(env->src_rq);
 
 	/*
 	 * We do not migrate tasks that are:
@@ -7705,7 +7705,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
  */
 static void detach_task(struct task_struct *p, struct lb_env *env)
 {
-	lockdep_assert_held(&env->src_rq->lock);
+	lockdep_assert_rq_held(env->src_rq);
 
 	deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
 	set_task_cpu(p, env->dst_cpu);
@@ -7721,7 +7721,7 @@ static struct task_struct *detach_one_task(struct lb_env *env)
 {
 	struct task_struct *p;
 
-	lockdep_assert_held(&env->src_rq->lock);
+	lockdep_assert_rq_held(env->src_rq);
 
 	list_for_each_entry_reverse(p,
 			&env->src_rq->cfs_tasks, se.group_node) {
@@ -7757,7 +7757,7 @@ static int detach_tasks(struct lb_env *env)
 	struct task_struct *p;
 	int detached = 0;
 
-	lockdep_assert_held(&env->src_rq->lock);
+	lockdep_assert_rq_held(env->src_rq);
 
 	/*
 	 * Source run queue has been emptied by another CPU, clear
@@ -7887,7 +7887,7 @@ next:
  */
 static void attach_task(struct rq *rq, struct task_struct *p)
 {
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	BUG_ON(task_rq(p) != rq);
 	activate_task(rq, p, ENQUEUE_NOCLOCK);
@@ -9798,7 +9798,7 @@ more_balance:
 		if (need_active_balance(&env)) {
 			unsigned long flags;
 
-			raw_spin_lock_irqsave(&busiest->lock, flags);
+			raw_spin_rq_lock_irqsave(busiest, flags);
 
 			/*
 			 * Don't kick the active_load_balance_cpu_stop,
@@ -9806,8 +9806,7 @@ more_balance:
 			 * moved to this_cpu:
 			 */
 			if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
-				raw_spin_unlock_irqrestore(&busiest->lock,
-							    flags);
+				raw_spin_rq_unlock_irqrestore(busiest, flags);
 				goto out_one_pinned;
 			}
 
@@ -9824,7 +9823,7 @@ more_balance:
 				busiest->push_cpu = this_cpu;
 				active_balance = 1;
 			}
-			raw_spin_unlock_irqrestore(&busiest->lock, flags);
+			raw_spin_rq_unlock_irqrestore(busiest, flags);
 
 			if (active_balance) {
 				stop_one_cpu_nowait(cpu_of(busiest),
@@ -10649,7 +10648,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 		goto out;
 	}
 
-	raw_spin_unlock(&this_rq->lock);
+	raw_spin_rq_unlock(this_rq);
 
 	update_blocked_averages(this_cpu);
 	rcu_read_lock();
@@ -10688,7 +10687,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 	}
 	rcu_read_unlock();
 
-	raw_spin_lock(&this_rq->lock);
+	raw_spin_rq_lock(this_rq);
 
 	if (curr_cost > this_rq->max_idle_balance_cost)
 		this_rq->max_idle_balance_cost = curr_cost;
@@ -11175,9 +11174,9 @@ void unregister_fair_sched_group(struct task_group *tg)
 
 		rq = cpu_rq(cpu);
 
-		raw_spin_lock_irqsave(&rq->lock, flags);
+		raw_spin_rq_lock_irqsave(rq, flags);
 		list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
+		raw_spin_rq_unlock_irqrestore(rq, flags);
 	}
 }
 
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 7ca3d3d86c2a5..0194768ea9e7b 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -455,10 +455,10 @@ struct task_struct *pick_next_task_idle(struct rq *rq)
 static void
 dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
 {
-	raw_spin_unlock_irq(&rq->lock);
+	raw_spin_rq_unlock_irq(rq);
 	printk(KERN_ERR "bad: scheduling from the idle thread!\n");
 	dump_stack();
-	raw_spin_lock_irq(&rq->lock);
+	raw_spin_rq_lock_irq(rq);
 }
 
 /*
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index 1462846d244e3..9ed6d8c414ad6 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -141,7 +141,7 @@ static inline void update_idle_rq_clock_pelt(struct rq *rq)
 
 static inline u64 rq_clock_pelt(struct rq *rq)
 {
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 	assert_clock_updated(rq);
 
 	return rq->clock_pelt - rq->lost_idle_time;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c286e5ba3c942..b3d39c3d3ab34 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -888,7 +888,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 		if (skip)
 			continue;
 
-		raw_spin_lock(&rq->lock);
+		raw_spin_rq_lock(rq);
 		update_rq_clock(rq);
 
 		if (rt_rq->rt_time) {
@@ -926,7 +926,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 
 		if (enqueue)
 			sched_rt_rq_enqueue(rt_rq);
-		raw_spin_unlock(&rq->lock);
+		raw_spin_rq_unlock(rq);
 	}
 
 	if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
@@ -1894,10 +1894,10 @@ retry:
 		 */
 		push_task = get_push_task(rq);
 		if (push_task) {
-			raw_spin_unlock(&rq->lock);
+			raw_spin_rq_unlock(rq);
 			stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
 					    push_task, &rq->push_work);
-			raw_spin_lock(&rq->lock);
+			raw_spin_rq_lock(rq);
 		}
 
 		return 0;
@@ -2122,10 +2122,10 @@ void rto_push_irq_work_func(struct irq_work *work)
 	 * When it gets updated, a check is made if a push is possible.
 	 */
 	if (has_pushable_tasks(rq)) {
-		raw_spin_lock(&rq->lock);
+		raw_spin_rq_lock(rq);
 		while (push_rt_task(rq, true))
 			;
-		raw_spin_unlock(&rq->lock);
+		raw_spin_rq_unlock(rq);
 	}
 
 	raw_spin_lock(&rd->rto_lock);
@@ -2243,10 +2243,10 @@ skip:
 		double_unlock_balance(this_rq, src_rq);
 
 		if (push_task) {
-			raw_spin_unlock(&this_rq->lock);
+			raw_spin_rq_unlock(this_rq);
 			stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
 					    push_task, &src_rq->push_work);
-			raw_spin_lock(&this_rq->lock);
+			raw_spin_rq_lock(this_rq);
 		}
 	}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f654587106355..dbabf282c039a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -905,7 +905,7 @@ DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
  */
 struct rq {
 	/* runqueue lock: */
-	raw_spinlock_t		lock;
+	raw_spinlock_t		__lock;
 
 	/*
 	 * nr_running and cpu_load should be in the same cacheline because
@@ -1115,7 +1115,7 @@ static inline bool is_migration_disabled(struct task_struct *p)
 
 static inline raw_spinlock_t *rq_lockp(struct rq *rq)
 {
-	return &rq->lock;
+	return &rq->__lock;
 }
 
 static inline void lockdep_assert_rq_held(struct rq *rq)
@@ -1229,7 +1229,7 @@ static inline void assert_clock_updated(struct rq *rq)
 
 static inline u64 rq_clock(struct rq *rq)
 {
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 	assert_clock_updated(rq);
 
 	return rq->clock;
@@ -1237,7 +1237,7 @@ static inline u64 rq_clock(struct rq *rq)
 
 static inline u64 rq_clock_task(struct rq *rq)
 {
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 	assert_clock_updated(rq);
 
 	return rq->clock_task;
@@ -1263,7 +1263,7 @@ static inline u64 rq_clock_thermal(struct rq *rq)
 
 static inline void rq_clock_skip_update(struct rq *rq)
 {
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 	rq->clock_update_flags |= RQCF_REQ_SKIP;
 }
 
@@ -1273,7 +1273,7 @@ static inline void rq_clock_skip_update(struct rq *rq)
  */
 static inline void rq_clock_cancel_skipupdate(struct rq *rq)
 {
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 	rq->clock_update_flags &= ~RQCF_REQ_SKIP;
 }
 
@@ -1304,7 +1304,7 @@ extern struct callback_head balance_push_callback;
  */
 static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
 {
-	rf->cookie = lockdep_pin_lock(&rq->lock);
+	rf->cookie = lockdep_pin_lock(rq_lockp(rq));
 
 #ifdef CONFIG_SCHED_DEBUG
 	rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
@@ -1322,12 +1322,12 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
 		rf->clock_update_flags = RQCF_UPDATED;
 #endif
 
-	lockdep_unpin_lock(&rq->lock, rf->cookie);
+	lockdep_unpin_lock(rq_lockp(rq), rf->cookie);
 }
 
 static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
 {
-	lockdep_repin_lock(&rq->lock, rf->cookie);
+	lockdep_repin_lock(rq_lockp(rq), rf->cookie);
 
 #ifdef CONFIG_SCHED_DEBUG
 	/*
@@ -1348,7 +1348,7 @@ static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
 	__releases(rq->lock)
 {
 	rq_unpin_lock(rq, rf);
-	raw_spin_unlock(&rq->lock);
+	raw_spin_rq_unlock(rq);
 }
 
 static inline void
@@ -1357,7 +1357,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
 	__releases(p->pi_lock)
 {
 	rq_unpin_lock(rq, rf);
-	raw_spin_unlock(&rq->lock);
+	raw_spin_rq_unlock(rq);
 	raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
 }
 
@@ -1365,7 +1365,7 @@ static inline void
 rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
 	__acquires(rq->lock)
 {
-	raw_spin_lock_irqsave(&rq->lock, rf->flags);
+	raw_spin_rq_lock_irqsave(rq, rf->flags);
 	rq_pin_lock(rq, rf);
 }
 
@@ -1373,7 +1373,7 @@ static inline void
 rq_lock_irq(struct rq *rq, struct rq_flags *rf)
 	__acquires(rq->lock)
 {
-	raw_spin_lock_irq(&rq->lock);
+	raw_spin_rq_lock_irq(rq);
 	rq_pin_lock(rq, rf);
 }
 
@@ -1381,7 +1381,7 @@ static inline void
 rq_lock(struct rq *rq, struct rq_flags *rf)
 	__acquires(rq->lock)
 {
-	raw_spin_lock(&rq->lock);
+	raw_spin_rq_lock(rq);
 	rq_pin_lock(rq, rf);
 }
 
@@ -1389,7 +1389,7 @@ static inline void
 rq_relock(struct rq *rq, struct rq_flags *rf)
 	__acquires(rq->lock)
 {
-	raw_spin_lock(&rq->lock);
+	raw_spin_rq_lock(rq);
 	rq_repin_lock(rq, rf);
 }
 
@@ -1398,7 +1398,7 @@ rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
 	__releases(rq->lock)
 {
 	rq_unpin_lock(rq, rf);
-	raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
+	raw_spin_rq_unlock_irqrestore(rq, rf->flags);
 }
 
 static inline void
@@ -1406,7 +1406,7 @@ rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
 	__releases(rq->lock)
 {
 	rq_unpin_lock(rq, rf);
-	raw_spin_unlock_irq(&rq->lock);
+	raw_spin_rq_unlock_irq(rq);
 }
 
 static inline void
@@ -1414,7 +1414,7 @@ rq_unlock(struct rq *rq, struct rq_flags *rf)
 	__releases(rq->lock)
 {
 	rq_unpin_lock(rq, rf);
-	raw_spin_unlock(&rq->lock);
+	raw_spin_rq_unlock(rq);
 }
 
 static inline struct rq *
@@ -1479,7 +1479,7 @@ queue_balance_callback(struct rq *rq,
 		       struct callback_head *head,
 		       void (*func)(struct rq *rq))
 {
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
 		return;
@@ -2019,7 +2019,7 @@ static inline struct task_struct *get_push_task(struct rq *rq)
 {
 	struct task_struct *p = rq->curr;
 
-	lockdep_assert_held(&rq->lock);
+	lockdep_assert_rq_held(rq);
 
 	if (rq->push_busy)
 		return NULL;
@@ -2249,7 +2249,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	__acquires(busiest->lock)
 	__acquires(this_rq->lock)
 {
-	raw_spin_unlock(&this_rq->lock);
+	raw_spin_rq_unlock(this_rq);
 	double_rq_lock(this_rq, busiest);
 
 	return 1;
@@ -2268,20 +2268,22 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	__acquires(busiest->lock)
 	__acquires(this_rq->lock)
 {
-	int ret = 0;
-
-	if (unlikely(!raw_spin_trylock(&busiest->lock))) {
-		if (busiest < this_rq) {
-			raw_spin_unlock(&this_rq->lock);
-			raw_spin_lock(&busiest->lock);
-			raw_spin_lock_nested(&this_rq->lock,
-					      SINGLE_DEPTH_NESTING);
-			ret = 1;
-		} else
-			raw_spin_lock_nested(&busiest->lock,
-					      SINGLE_DEPTH_NESTING);
+	if (rq_lockp(this_rq) == rq_lockp(busiest))
+		return 0;
+
+	if (likely(raw_spin_rq_trylock(busiest)))
+		return 0;
+
+	if (rq_lockp(busiest) >= rq_lockp(this_rq)) {
+		raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
+		return 0;
 	}
-	return ret;
+
+	raw_spin_rq_unlock(this_rq);
+	raw_spin_rq_lock(busiest);
+	raw_spin_rq_lock_nested(this_rq, SINGLE_DEPTH_NESTING);
+
+	return 1;
 }
 
 #endif /* CONFIG_PREEMPTION */
@@ -2291,11 +2293,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
  */
 static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
 {
-	if (unlikely(!irqs_disabled())) {
-		/* printk() doesn't work well under rq->lock */
-		raw_spin_unlock(&this_rq->lock);
-		BUG_ON(1);
-	}
+	lockdep_assert_irqs_disabled();
 
 	return _double_lock_balance(this_rq, busiest);
 }
@@ -2303,8 +2301,9 @@ static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
 static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
 	__releases(busiest->lock)
 {
-	raw_spin_unlock(&busiest->lock);
-	lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
+	if (rq_lockp(this_rq) != rq_lockp(busiest))
+		raw_spin_rq_unlock(busiest);
+	lock_set_subclass(&rq_lockp(this_rq)->dep_map, 0, _RET_IP_);
 }
 
 static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
@@ -2345,16 +2344,16 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
 	__acquires(rq2->lock)
 {
 	BUG_ON(!irqs_disabled());
-	if (rq1 == rq2) {
-		raw_spin_lock(&rq1->lock);
+	if (rq_lockp(rq1) == rq_lockp(rq2)) {
+		raw_spin_rq_lock(rq1);
 		__acquire(rq2->lock);	/* Fake it out ;) */
 	} else {
-		if (rq1 < rq2) {
-			raw_spin_lock(&rq1->lock);
-			raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
+		if (rq_lockp(rq1) < rq_lockp(rq2)) {
+			raw_spin_rq_lock(rq1);
+			raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
 		} else {
-			raw_spin_lock(&rq2->lock);
-			raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
+			raw_spin_rq_lock(rq2);
+			raw_spin_rq_lock_nested(rq1, SINGLE_DEPTH_NESTING);
 		}
 	}
 }
@@ -2369,9 +2368,9 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 	__releases(rq1->lock)
 	__releases(rq2->lock)
 {
-	raw_spin_unlock(&rq1->lock);
-	if (rq1 != rq2)
-		raw_spin_unlock(&rq2->lock);
+	raw_spin_rq_unlock(rq1);
+	if (rq_lockp(rq1) != rq_lockp(rq2))
+		raw_spin_rq_unlock(rq2);
 	else
 		__release(rq2->lock);
 }
@@ -2394,7 +2393,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
 {
 	BUG_ON(!irqs_disabled());
 	BUG_ON(rq1 != rq2);
-	raw_spin_lock(&rq1->lock);
+	raw_spin_rq_lock(rq1);
 	__acquire(rq2->lock);	/* Fake it out ;) */
 }
 
@@ -2409,7 +2408,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 	__releases(rq2->lock)
 {
 	BUG_ON(rq1 != rq2);
-	raw_spin_unlock(&rq1->lock);
+	raw_spin_rq_unlock(rq1);
 	__release(rq2->lock);
 }
 
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 55a0a243e8718..053115b55f89f 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -467,7 +467,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	struct root_domain *old_rd = NULL;
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&rq->lock, flags);
+	raw_spin_rq_lock_irqsave(rq, flags);
 
 	if (rq->rd) {
 		old_rd = rq->rd;
@@ -493,7 +493,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd)
 	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
 		set_rq_online(rq);
 
-	raw_spin_unlock_irqrestore(&rq->lock, flags);
+	raw_spin_rq_unlock_irqrestore(rq, flags);
 
 	if (old_rd)
 		call_rcu(&old_rd->rcu, free_rootdomain);
-- 
GitLab


From d66f1b06b5b438cd20ba3664b8eef1f9c79e84bf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 2 Mar 2021 12:16:48 +0100
Subject: [PATCH 0444/3804] sched: Prepare for Core-wide rq->lock

When switching on core-sched, CPUs need to agree which lock to use for
their RQ.

The new rule will be that rq->core_enabled will be toggled while
holding all rq->__locks that belong to a core. This means we need to
double check the rq->core_enabled value after each lock acquire and
retry if it changed.

This also has implications for those sites that take multiple RQ
locks, they need to be careful that the second lock doesn't end up
being the first lock.

Verify the lock pointer after acquiring the first lock, because if
they're on the same core, holding any of the rq->__lock instances will
pin the core state.

While there, change the rq->__lock order to CPU number, instead of rq
address, this greatly simplifies the next patch.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/YJUNY0dmrJMD/BIm@hirez.programming.kicks-ass.net
---
 kernel/sched/core.c  | 48 ++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/sched.h | 48 ++++++++++++++++----------------------------
 2 files changed, 63 insertions(+), 33 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5e6f5f5750a32..8bd2f12810e30 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -186,12 +186,37 @@ int sysctl_sched_rt_runtime = 950000;
 
 void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
 {
-	raw_spin_lock_nested(rq_lockp(rq), subclass);
+	raw_spinlock_t *lock;
+
+	if (sched_core_disabled()) {
+		raw_spin_lock_nested(&rq->__lock, subclass);
+		return;
+	}
+
+	for (;;) {
+		lock = rq_lockp(rq);
+		raw_spin_lock_nested(lock, subclass);
+		if (likely(lock == rq_lockp(rq)))
+			return;
+		raw_spin_unlock(lock);
+	}
 }
 
 bool raw_spin_rq_trylock(struct rq *rq)
 {
-	return raw_spin_trylock(rq_lockp(rq));
+	raw_spinlock_t *lock;
+	bool ret;
+
+	if (sched_core_disabled())
+		return raw_spin_trylock(&rq->__lock);
+
+	for (;;) {
+		lock = rq_lockp(rq);
+		ret = raw_spin_trylock(lock);
+		if (!ret || (likely(lock == rq_lockp(rq))))
+			return ret;
+		raw_spin_unlock(lock);
+	}
 }
 
 void raw_spin_rq_unlock(struct rq *rq)
@@ -199,6 +224,25 @@ void raw_spin_rq_unlock(struct rq *rq)
 	raw_spin_unlock(rq_lockp(rq));
 }
 
+#ifdef CONFIG_SMP
+/*
+ * double_rq_lock - safely lock two runqueues
+ */
+void double_rq_lock(struct rq *rq1, struct rq *rq2)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (rq_order_less(rq2, rq1))
+		swap(rq1, rq2);
+
+	raw_spin_rq_lock(rq1);
+	if (rq_lockp(rq1) == rq_lockp(rq2))
+		return;
+
+	raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
+}
+#endif
+
 /*
  * __task_rq_lock - lock the rq @p resides on.
  */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dbabf282c039a..f8bd5c8fc90aa 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1113,6 +1113,11 @@ static inline bool is_migration_disabled(struct task_struct *p)
 #endif
 }
 
+static inline bool sched_core_disabled(void)
+{
+	return true;
+}
+
 static inline raw_spinlock_t *rq_lockp(struct rq *rq)
 {
 	return &rq->__lock;
@@ -2231,10 +2236,17 @@ unsigned long arch_scale_freq_capacity(int cpu)
 }
 #endif
 
+
 #ifdef CONFIG_SMP
-#ifdef CONFIG_PREEMPTION
 
-static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
+static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
+{
+	return rq1->cpu < rq2->cpu;
+}
+
+extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
+
+#ifdef CONFIG_PREEMPTION
 
 /*
  * fair double_lock_balance: Safely acquires both rq->locks in a fair
@@ -2274,14 +2286,13 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	if (likely(raw_spin_rq_trylock(busiest)))
 		return 0;
 
-	if (rq_lockp(busiest) >= rq_lockp(this_rq)) {
+	if (rq_order_less(this_rq, busiest)) {
 		raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
 		return 0;
 	}
 
 	raw_spin_rq_unlock(this_rq);
-	raw_spin_rq_lock(busiest);
-	raw_spin_rq_lock_nested(this_rq, SINGLE_DEPTH_NESTING);
+	double_rq_lock(this_rq, busiest);
 
 	return 1;
 }
@@ -2333,31 +2344,6 @@ static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
 	raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
 }
 
-/*
- * double_rq_lock - safely lock two runqueues
- *
- * Note this does not disable interrupts like task_rq_lock,
- * you need to do so manually before calling.
- */
-static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
-	__acquires(rq1->lock)
-	__acquires(rq2->lock)
-{
-	BUG_ON(!irqs_disabled());
-	if (rq_lockp(rq1) == rq_lockp(rq2)) {
-		raw_spin_rq_lock(rq1);
-		__acquire(rq2->lock);	/* Fake it out ;) */
-	} else {
-		if (rq_lockp(rq1) < rq_lockp(rq2)) {
-			raw_spin_rq_lock(rq1);
-			raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
-		} else {
-			raw_spin_rq_lock(rq2);
-			raw_spin_rq_lock_nested(rq1, SINGLE_DEPTH_NESTING);
-		}
-	}
-}
-
 /*
  * double_rq_unlock - safely unlock two runqueues
  *
@@ -2368,11 +2354,11 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 	__releases(rq1->lock)
 	__releases(rq2->lock)
 {
-	raw_spin_rq_unlock(rq1);
 	if (rq_lockp(rq1) != rq_lockp(rq2))
 		raw_spin_rq_unlock(rq2);
 	else
 		__release(rq2->lock);
+	raw_spin_rq_unlock(rq1);
 }
 
 extern void set_rq_online (struct rq *rq);
-- 
GitLab


From 9edeaea1bc452372718837ed2ba775811baf1ba1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 18:19:34 -0500
Subject: [PATCH 0445/3804] sched: Core-wide rq->lock

Introduce the basic infrastructure to have a core wide rq->lock.

This relies on the rq->__lock order being in increasing CPU number
(inside a core). It is also constrained to SMT8 per lockdep (and
SMT256 per preempt_count).

Luckily SMT8 is the max supported SMT count for Linux (Mips, Sparc and
Power are known to have this).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/YJUNfzSgptjX7tG6@hirez.programming.kicks-ass.net
---
 kernel/Kconfig.preempt |   6 ++
 kernel/sched/core.c    | 164 ++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/sched.h   |  58 +++++++++++++++
 3 files changed, 224 insertions(+), 4 deletions(-)

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 4160173016605..ea1e3331c0ba3 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -99,3 +99,9 @@ config PREEMPT_DYNAMIC
 
 	  Interesting if you want the same pre-built kernel should be used for
 	  both Server and Desktop workloads.
+
+config SCHED_CORE
+	bool "Core Scheduling for SMT"
+	default y
+	depends on SCHED_SMT
+
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8bd2f12810e30..384b79363a393 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -84,6 +84,108 @@ unsigned int sysctl_sched_rt_period = 1000000;
 
 __read_mostly int scheduler_running;
 
+#ifdef CONFIG_SCHED_CORE
+
+DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
+
+/*
+ * Magic required such that:
+ *
+ *	raw_spin_rq_lock(rq);
+ *	...
+ *	raw_spin_rq_unlock(rq);
+ *
+ * ends up locking and unlocking the _same_ lock, and all CPUs
+ * always agree on what rq has what lock.
+ *
+ * XXX entirely possible to selectively enable cores, don't bother for now.
+ */
+
+static DEFINE_MUTEX(sched_core_mutex);
+static int sched_core_count;
+static struct cpumask sched_core_mask;
+
+static void __sched_core_flip(bool enabled)
+{
+	int cpu, t, i;
+
+	cpus_read_lock();
+
+	/*
+	 * Toggle the online cores, one by one.
+	 */
+	cpumask_copy(&sched_core_mask, cpu_online_mask);
+	for_each_cpu(cpu, &sched_core_mask) {
+		const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+
+		i = 0;
+		local_irq_disable();
+		for_each_cpu(t, smt_mask) {
+			/* supports up to SMT8 */
+			raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
+		}
+
+		for_each_cpu(t, smt_mask)
+			cpu_rq(t)->core_enabled = enabled;
+
+		for_each_cpu(t, smt_mask)
+			raw_spin_unlock(&cpu_rq(t)->__lock);
+		local_irq_enable();
+
+		cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask);
+	}
+
+	/*
+	 * Toggle the offline CPUs.
+	 */
+	cpumask_copy(&sched_core_mask, cpu_possible_mask);
+	cpumask_andnot(&sched_core_mask, &sched_core_mask, cpu_online_mask);
+
+	for_each_cpu(cpu, &sched_core_mask)
+		cpu_rq(cpu)->core_enabled = enabled;
+
+	cpus_read_unlock();
+}
+
+static void __sched_core_enable(void)
+{
+	// XXX verify there are no cookie tasks (yet)
+
+	static_branch_enable(&__sched_core_enabled);
+	/*
+	 * Ensure all previous instances of raw_spin_rq_*lock() have finished
+	 * and future ones will observe !sched_core_disabled().
+	 */
+	synchronize_rcu();
+	__sched_core_flip(true);
+}
+
+static void __sched_core_disable(void)
+{
+	// XXX verify there are no cookie tasks (left)
+
+	__sched_core_flip(false);
+	static_branch_disable(&__sched_core_enabled);
+}
+
+void sched_core_get(void)
+{
+	mutex_lock(&sched_core_mutex);
+	if (!sched_core_count++)
+		__sched_core_enable();
+	mutex_unlock(&sched_core_mutex);
+}
+
+void sched_core_put(void)
+{
+	mutex_lock(&sched_core_mutex);
+	if (!--sched_core_count)
+		__sched_core_disable();
+	mutex_unlock(&sched_core_mutex);
+}
+
+#endif /* CONFIG_SCHED_CORE */
+
 /*
  * part of the period that we allow rt tasks to run in us.
  * default: 0.95s
@@ -188,16 +290,23 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
 {
 	raw_spinlock_t *lock;
 
+	/* Matches synchronize_rcu() in __sched_core_enable() */
+	preempt_disable();
 	if (sched_core_disabled()) {
 		raw_spin_lock_nested(&rq->__lock, subclass);
+		/* preempt_count *MUST* be > 1 */
+		preempt_enable_no_resched();
 		return;
 	}
 
 	for (;;) {
 		lock = rq_lockp(rq);
 		raw_spin_lock_nested(lock, subclass);
-		if (likely(lock == rq_lockp(rq)))
+		if (likely(lock == rq_lockp(rq))) {
+			/* preempt_count *MUST* be > 1 */
+			preempt_enable_no_resched();
 			return;
+		}
 		raw_spin_unlock(lock);
 	}
 }
@@ -207,14 +316,21 @@ bool raw_spin_rq_trylock(struct rq *rq)
 	raw_spinlock_t *lock;
 	bool ret;
 
-	if (sched_core_disabled())
-		return raw_spin_trylock(&rq->__lock);
+	/* Matches synchronize_rcu() in __sched_core_enable() */
+	preempt_disable();
+	if (sched_core_disabled()) {
+		ret = raw_spin_trylock(&rq->__lock);
+		preempt_enable();
+		return ret;
+	}
 
 	for (;;) {
 		lock = rq_lockp(rq);
 		ret = raw_spin_trylock(lock);
-		if (!ret || (likely(lock == rq_lockp(rq))))
+		if (!ret || (likely(lock == rq_lockp(rq)))) {
+			preempt_enable();
 			return ret;
+		}
 		raw_spin_unlock(lock);
 	}
 }
@@ -5041,6 +5157,40 @@ restart:
 	BUG();
 }
 
+#ifdef CONFIG_SCHED_CORE
+
+static inline void sched_core_cpu_starting(unsigned int cpu)
+{
+	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+	struct rq *rq, *core_rq = NULL;
+	int i;
+
+	core_rq = cpu_rq(cpu)->core;
+
+	if (!core_rq) {
+		for_each_cpu(i, smt_mask) {
+			rq = cpu_rq(i);
+			if (rq->core && rq->core == rq)
+				core_rq = rq;
+		}
+
+		if (!core_rq)
+			core_rq = cpu_rq(cpu);
+
+		for_each_cpu(i, smt_mask) {
+			rq = cpu_rq(i);
+
+			WARN_ON_ONCE(rq->core && rq->core != core_rq);
+			rq->core = core_rq;
+		}
+	}
+}
+#else /* !CONFIG_SCHED_CORE */
+
+static inline void sched_core_cpu_starting(unsigned int cpu) {}
+
+#endif /* CONFIG_SCHED_CORE */
+
 /*
  * __schedule() is the main scheduler function.
  *
@@ -8006,6 +8156,7 @@ static void sched_rq_cpu_starting(unsigned int cpu)
 
 int sched_cpu_starting(unsigned int cpu)
 {
+	sched_core_cpu_starting(cpu);
 	sched_rq_cpu_starting(cpu);
 	sched_tick_start(cpu);
 	return 0;
@@ -8290,6 +8441,11 @@ void __init sched_init(void)
 #endif /* CONFIG_SMP */
 		hrtick_rq_init(rq);
 		atomic_set(&rq->nr_iowait, 0);
+
+#ifdef CONFIG_SCHED_CORE
+		rq->core = NULL;
+		rq->core_enabled = 0;
+#endif
 	}
 
 	set_load_weight(&init_task, false);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f8bd5c8fc90aa..29418b8c05dd2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1075,6 +1075,12 @@ struct rq {
 #endif
 	unsigned int		push_busy;
 	struct cpu_stop_work	push_work;
+
+#ifdef CONFIG_SCHED_CORE
+	/* per rq */
+	struct rq		*core;
+	unsigned int		core_enabled;
+#endif
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1113,6 +1119,35 @@ static inline bool is_migration_disabled(struct task_struct *p)
 #endif
 }
 
+#ifdef CONFIG_SCHED_CORE
+
+DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
+
+static inline bool sched_core_enabled(struct rq *rq)
+{
+	return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled;
+}
+
+static inline bool sched_core_disabled(void)
+{
+	return !static_branch_unlikely(&__sched_core_enabled);
+}
+
+static inline raw_spinlock_t *rq_lockp(struct rq *rq)
+{
+	if (sched_core_enabled(rq))
+		return &rq->core->__lock;
+
+	return &rq->__lock;
+}
+
+#else /* !CONFIG_SCHED_CORE */
+
+static inline bool sched_core_enabled(struct rq *rq)
+{
+	return false;
+}
+
 static inline bool sched_core_disabled(void)
 {
 	return true;
@@ -1123,6 +1158,8 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
 	return &rq->__lock;
 }
 
+#endif /* CONFIG_SCHED_CORE */
+
 static inline void lockdep_assert_rq_held(struct rq *rq)
 {
 	lockdep_assert_held(rq_lockp(rq));
@@ -2241,6 +2278,27 @@ unsigned long arch_scale_freq_capacity(int cpu)
 
 static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
 {
+#ifdef CONFIG_SCHED_CORE
+	/*
+	 * In order to not have {0,2},{1,3} turn into into an AB-BA,
+	 * order by core-id first and cpu-id second.
+	 *
+	 * Notably:
+	 *
+	 *	double_rq_lock(0,3); will take core-0, core-1 lock
+	 *	double_rq_lock(1,2); will take core-1, core-0 lock
+	 *
+	 * when only cpu-id is considered.
+	 */
+	if (rq1->core->cpu < rq2->core->cpu)
+		return true;
+	if (rq1->core->cpu > rq2->core->cpu)
+		return false;
+
+	/*
+	 * __sched_core_flip() relies on SMT having cpu-id lock order.
+	 */
+#endif
 	return rq1->cpu < rq2->cpu;
 }
 
-- 
GitLab


From 9ef7e7e33bcdb57be1afb28884053c28b5f05240 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 3 Mar 2021 16:45:41 +0100
Subject: [PATCH 0446/3804] sched: Optimize rq_lockp() usage

rq_lockp() includes a static_branch(), which is asm-goto, which is
asm volatile which defeats regular CSE. This means that:

	if (!static_branch(&foo))
		return simple;

	if (static_branch(&foo) && cond)
		return complex;

Doesn't fold and we get horrible code. Introduce __rq_lockp() without
the static_branch() on.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.316696988@infradead.org
---
 kernel/sched/core.c     | 16 ++++++++--------
 kernel/sched/deadline.c |  4 ++--
 kernel/sched/fair.c     |  2 +-
 kernel/sched/sched.h    | 33 +++++++++++++++++++++++++--------
 4 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 384b79363a393..42c1c88741c02 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -300,9 +300,9 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
 	}
 
 	for (;;) {
-		lock = rq_lockp(rq);
+		lock = __rq_lockp(rq);
 		raw_spin_lock_nested(lock, subclass);
-		if (likely(lock == rq_lockp(rq))) {
+		if (likely(lock == __rq_lockp(rq))) {
 			/* preempt_count *MUST* be > 1 */
 			preempt_enable_no_resched();
 			return;
@@ -325,9 +325,9 @@ bool raw_spin_rq_trylock(struct rq *rq)
 	}
 
 	for (;;) {
-		lock = rq_lockp(rq);
+		lock = __rq_lockp(rq);
 		ret = raw_spin_trylock(lock);
-		if (!ret || (likely(lock == rq_lockp(rq)))) {
+		if (!ret || (likely(lock == __rq_lockp(rq)))) {
 			preempt_enable();
 			return ret;
 		}
@@ -352,7 +352,7 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2)
 		swap(rq1, rq2);
 
 	raw_spin_rq_lock(rq1);
-	if (rq_lockp(rq1) == rq_lockp(rq2))
+	if (__rq_lockp(rq1) == __rq_lockp(rq2))
 		return;
 
 	raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING);
@@ -2622,7 +2622,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	 * task_rq_lock().
 	 */
 	WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
-				      lockdep_is_held(rq_lockp(task_rq(p)))));
+				      lockdep_is_held(__rq_lockp(task_rq(p)))));
 #endif
 	/*
 	 * Clearly, migrating tasks to offline CPUs is a fairly daft thing.
@@ -4248,7 +4248,7 @@ prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf
 	 * do an early lockdep release here:
 	 */
 	rq_unpin_lock(rq, rf);
-	spin_release(&rq_lockp(rq)->dep_map, _THIS_IP_);
+	spin_release(&__rq_lockp(rq)->dep_map, _THIS_IP_);
 #ifdef CONFIG_DEBUG_SPINLOCK
 	/* this is a valid case when another task releases the spinlock */
 	rq_lockp(rq)->owner = next;
@@ -4262,7 +4262,7 @@ static inline void finish_lock_switch(struct rq *rq)
 	 * fix up the runqueue lock - which gets 'carried over' from
 	 * prev into current:
 	 */
-	spin_acquire(&rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_);
+	spin_acquire(&__rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_);
 	__balance_callbacks(rq);
 	raw_spin_rq_unlock_irq(rq);
 }
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6e99b8b37c8c8..fb0eb9a19fc7c 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1097,9 +1097,9 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 		 * If the runqueue is no longer available, migrate the
 		 * task elsewhere. This necessarily changes rq.
 		 */
-		lockdep_unpin_lock(rq_lockp(rq), rf.cookie);
+		lockdep_unpin_lock(__rq_lockp(rq), rf.cookie);
 		rq = dl_task_offline_migration(rq, p);
-		rf.cookie = lockdep_pin_lock(rq_lockp(rq));
+		rf.cookie = lockdep_pin_lock(__rq_lockp(rq));
 		update_rq_clock(rq);
 
 		/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e50bd75067d58..18960d00708a4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1107,7 +1107,7 @@ struct numa_group {
 static struct numa_group *deref_task_numa_group(struct task_struct *p)
 {
 	return rcu_dereference_check(p->numa_group, p == current ||
-		(lockdep_is_held(rq_lockp(task_rq(p))) && !READ_ONCE(p->on_cpu)));
+		(lockdep_is_held(__rq_lockp(task_rq(p))) && !READ_ONCE(p->on_cpu)));
 }
 
 static struct numa_group *deref_curr_numa_group(struct task_struct *p)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 29418b8c05dd2..ca30af37b9060 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1133,6 +1133,10 @@ static inline bool sched_core_disabled(void)
 	return !static_branch_unlikely(&__sched_core_enabled);
 }
 
+/*
+ * Be careful with this function; not for general use. The return value isn't
+ * stable unless you actually hold a relevant rq->__lock.
+ */
 static inline raw_spinlock_t *rq_lockp(struct rq *rq)
 {
 	if (sched_core_enabled(rq))
@@ -1141,6 +1145,14 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
 	return &rq->__lock;
 }
 
+static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
+{
+	if (rq->core_enabled)
+		return &rq->core->__lock;
+
+	return &rq->__lock;
+}
+
 #else /* !CONFIG_SCHED_CORE */
 
 static inline bool sched_core_enabled(struct rq *rq)
@@ -1158,11 +1170,16 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq)
 	return &rq->__lock;
 }
 
+static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
+{
+	return &rq->__lock;
+}
+
 #endif /* CONFIG_SCHED_CORE */
 
 static inline void lockdep_assert_rq_held(struct rq *rq)
 {
-	lockdep_assert_held(rq_lockp(rq));
+	lockdep_assert_held(__rq_lockp(rq));
 }
 
 extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
@@ -1346,7 +1363,7 @@ extern struct callback_head balance_push_callback;
  */
 static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
 {
-	rf->cookie = lockdep_pin_lock(rq_lockp(rq));
+	rf->cookie = lockdep_pin_lock(__rq_lockp(rq));
 
 #ifdef CONFIG_SCHED_DEBUG
 	rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
@@ -1364,12 +1381,12 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
 		rf->clock_update_flags = RQCF_UPDATED;
 #endif
 
-	lockdep_unpin_lock(rq_lockp(rq), rf->cookie);
+	lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);
 }
 
 static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
 {
-	lockdep_repin_lock(rq_lockp(rq), rf->cookie);
+	lockdep_repin_lock(__rq_lockp(rq), rf->cookie);
 
 #ifdef CONFIG_SCHED_DEBUG
 	/*
@@ -2338,7 +2355,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	__acquires(busiest->lock)
 	__acquires(this_rq->lock)
 {
-	if (rq_lockp(this_rq) == rq_lockp(busiest))
+	if (__rq_lockp(this_rq) == __rq_lockp(busiest))
 		return 0;
 
 	if (likely(raw_spin_rq_trylock(busiest)))
@@ -2370,9 +2387,9 @@ static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
 static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
 	__releases(busiest->lock)
 {
-	if (rq_lockp(this_rq) != rq_lockp(busiest))
+	if (__rq_lockp(this_rq) != __rq_lockp(busiest))
 		raw_spin_rq_unlock(busiest);
-	lock_set_subclass(&rq_lockp(this_rq)->dep_map, 0, _RET_IP_);
+	lock_set_subclass(&__rq_lockp(this_rq)->dep_map, 0, _RET_IP_);
 }
 
 static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
@@ -2412,7 +2429,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 	__releases(rq1->lock)
 	__releases(rq2->lock)
 {
-	if (rq_lockp(rq1) != rq_lockp(rq2))
+	if (__rq_lockp(rq1) != __rq_lockp(rq2))
 		raw_spin_rq_unlock(rq2);
 	else
 		__release(rq2->lock);
-- 
GitLab


From 875feb41fd20f6bd6054c9e79a5bcd9da6d8d2b2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 29 Mar 2021 10:08:58 +0200
Subject: [PATCH 0447/3804] sched: Allow sched_core_put() from atomic context

Stuff the meat of sched_core_put() into a work such that we can use
sched_core_put() from atomic context.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.377455632@infradead.org
---
 kernel/sched/core.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 42c1c88741c02..85147bea9d93d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -102,7 +102,7 @@ DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
  */
 
 static DEFINE_MUTEX(sched_core_mutex);
-static int sched_core_count;
+static atomic_t sched_core_count;
 static struct cpumask sched_core_mask;
 
 static void __sched_core_flip(bool enabled)
@@ -170,18 +170,39 @@ static void __sched_core_disable(void)
 
 void sched_core_get(void)
 {
+	if (atomic_inc_not_zero(&sched_core_count))
+		return;
+
 	mutex_lock(&sched_core_mutex);
-	if (!sched_core_count++)
+	if (!atomic_read(&sched_core_count))
 		__sched_core_enable();
+
+	smp_mb__before_atomic();
+	atomic_inc(&sched_core_count);
 	mutex_unlock(&sched_core_mutex);
 }
 
-void sched_core_put(void)
+static void __sched_core_put(struct work_struct *work)
 {
-	mutex_lock(&sched_core_mutex);
-	if (!--sched_core_count)
+	if (atomic_dec_and_mutex_lock(&sched_core_count, &sched_core_mutex)) {
 		__sched_core_disable();
-	mutex_unlock(&sched_core_mutex);
+		mutex_unlock(&sched_core_mutex);
+	}
+}
+
+void sched_core_put(void)
+{
+	static DECLARE_WORK(_work, __sched_core_put);
+
+	/*
+	 * "There can be only one"
+	 *
+	 * Either this is the last one, or we don't actually need to do any
+	 * 'work'. If it is the last *again*, we rely on
+	 * WORK_STRUCT_PENDING_BIT.
+	 */
+	if (!atomic_add_unless(&sched_core_count, -1, 1))
+		schedule_work(&_work);
 }
 
 #endif /* CONFIG_SCHED_CORE */
-- 
GitLab


From 21f56ffe4482e501b9e83737612493eeaac21f5a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 18:19:32 -0500
Subject: [PATCH 0448/3804] sched: Introduce sched_class::pick_task()

Because sched_class::pick_next_task() also implies
sched_class::set_next_task() (and possibly put_prev_task() and
newidle_balance) it is not state invariant. This makes it unsuitable
for remote task selection.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[Vineeth: folded fixes]
Signed-off-by: Vineeth Remanan Pillai <viremana@linux.microsoft.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.437092775@infradead.org
---
 kernel/sched/deadline.c  | 16 ++++++++++++++--
 kernel/sched/fair.c      | 40 +++++++++++++++++++++++++++++++++++++---
 kernel/sched/idle.c      |  8 ++++++++
 kernel/sched/rt.c        | 15 +++++++++++++--
 kernel/sched/sched.h     |  3 +++
 kernel/sched/stop_task.c | 14 ++++++++++++--
 6 files changed, 87 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fb0eb9a19fc7c..3829c5a1b9366 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1852,7 +1852,7 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq,
 	return rb_entry(left, struct sched_dl_entity, rb_node);
 }
 
-static struct task_struct *pick_next_task_dl(struct rq *rq)
+static struct task_struct *pick_task_dl(struct rq *rq)
 {
 	struct sched_dl_entity *dl_se;
 	struct dl_rq *dl_rq = &rq->dl;
@@ -1864,7 +1864,18 @@ static struct task_struct *pick_next_task_dl(struct rq *rq)
 	dl_se = pick_next_dl_entity(rq, dl_rq);
 	BUG_ON(!dl_se);
 	p = dl_task_of(dl_se);
-	set_next_task_dl(rq, p, true);
+
+	return p;
+}
+
+static struct task_struct *pick_next_task_dl(struct rq *rq)
+{
+	struct task_struct *p;
+
+	p = pick_task_dl(rq);
+	if (p)
+		set_next_task_dl(rq, p, true);
+
 	return p;
 }
 
@@ -2539,6 +2550,7 @@ DEFINE_SCHED_CLASS(dl) = {
 
 #ifdef CONFIG_SMP
 	.balance		= balance_dl,
+	.pick_task		= pick_task_dl,
 	.select_task_rq		= select_task_rq_dl,
 	.migrate_task_rq	= migrate_task_rq_dl,
 	.set_cpus_allowed       = set_cpus_allowed_dl,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 18960d00708a4..51d72ab5b5ae3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4419,6 +4419,8 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 static void
 set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	clear_buddies(cfs_rq, se);
+
 	/* 'current' is not kept within the tree. */
 	if (se->on_rq) {
 		/*
@@ -4478,7 +4480,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 	 * Avoid running the skip buddy, if running something else can
 	 * be done without getting too unfair.
 	 */
-	if (cfs_rq->skip == se) {
+	if (cfs_rq->skip && cfs_rq->skip == se) {
 		struct sched_entity *second;
 
 		if (se == curr) {
@@ -4505,8 +4507,6 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 		se = cfs_rq->last;
 	}
 
-	clear_buddies(cfs_rq, se);
-
 	return se;
 }
 
@@ -7095,6 +7095,39 @@ preempt:
 		set_last_buddy(se);
 }
 
+#ifdef CONFIG_SMP
+static struct task_struct *pick_task_fair(struct rq *rq)
+{
+	struct sched_entity *se;
+	struct cfs_rq *cfs_rq;
+
+again:
+	cfs_rq = &rq->cfs;
+	if (!cfs_rq->nr_running)
+		return NULL;
+
+	do {
+		struct sched_entity *curr = cfs_rq->curr;
+
+		/* When we pick for a remote RQ, we'll not have done put_prev_entity() */
+		if (curr) {
+			if (curr->on_rq)
+				update_curr(cfs_rq);
+			else
+				curr = NULL;
+
+			if (unlikely(check_cfs_rq_runtime(cfs_rq)))
+				goto again;
+		}
+
+		se = pick_next_entity(cfs_rq, curr);
+		cfs_rq = group_cfs_rq(se);
+	} while (cfs_rq);
+
+	return task_of(se);
+}
+#endif
+
 struct task_struct *
 pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
@@ -11298,6 +11331,7 @@ DEFINE_SCHED_CLASS(fair) = {
 
 #ifdef CONFIG_SMP
 	.balance		= balance_fair,
+	.pick_task		= pick_task_fair,
 	.select_task_rq		= select_task_rq_fair,
 	.migrate_task_rq	= migrate_task_rq_fair,
 
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 0194768ea9e7b..43646e7876d91 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -439,6 +439,13 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir
 	schedstat_inc(rq->sched_goidle);
 }
 
+#ifdef CONFIG_SMP
+static struct task_struct *pick_task_idle(struct rq *rq)
+{
+	return rq->idle;
+}
+#endif
+
 struct task_struct *pick_next_task_idle(struct rq *rq)
 {
 	struct task_struct *next = rq->idle;
@@ -506,6 +513,7 @@ DEFINE_SCHED_CLASS(idle) = {
 
 #ifdef CONFIG_SMP
 	.balance		= balance_idle,
+	.pick_task		= pick_task_idle,
 	.select_task_rq		= select_task_rq_idle,
 	.set_cpus_allowed	= set_cpus_allowed_common,
 #endif
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b3d39c3d3ab34..a5254471371c2 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1626,7 +1626,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
 	return rt_task_of(rt_se);
 }
 
-static struct task_struct *pick_next_task_rt(struct rq *rq)
+static struct task_struct *pick_task_rt(struct rq *rq)
 {
 	struct task_struct *p;
 
@@ -1634,7 +1634,17 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 		return NULL;
 
 	p = _pick_next_task_rt(rq);
-	set_next_task_rt(rq, p, true);
+
+	return p;
+}
+
+static struct task_struct *pick_next_task_rt(struct rq *rq)
+{
+	struct task_struct *p = pick_task_rt(rq);
+
+	if (p)
+		set_next_task_rt(rq, p, true);
+
 	return p;
 }
 
@@ -2483,6 +2493,7 @@ DEFINE_SCHED_CLASS(rt) = {
 
 #ifdef CONFIG_SMP
 	.balance		= balance_rt,
+	.pick_task		= pick_task_rt,
 	.select_task_rq		= select_task_rq_rt,
 	.set_cpus_allowed       = set_cpus_allowed_common,
 	.rq_online              = rq_online_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ca30af37b9060..fa990cd259ceb 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1953,6 +1953,9 @@ struct sched_class {
 #ifdef CONFIG_SMP
 	int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
 	int  (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
+
+	struct task_struct * (*pick_task)(struct rq *rq);
+
 	void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
 
 	void (*task_woken)(struct rq *this_rq, struct task_struct *task);
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 55f39125c0e1c..f988ebe3febb9 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -34,15 +34,24 @@ static void set_next_task_stop(struct rq *rq, struct task_struct *stop, bool fir
 	stop->se.exec_start = rq_clock_task(rq);
 }
 
-static struct task_struct *pick_next_task_stop(struct rq *rq)
+static struct task_struct *pick_task_stop(struct rq *rq)
 {
 	if (!sched_stop_runnable(rq))
 		return NULL;
 
-	set_next_task_stop(rq, rq->stop, true);
 	return rq->stop;
 }
 
+static struct task_struct *pick_next_task_stop(struct rq *rq)
+{
+	struct task_struct *p = pick_task_stop(rq);
+
+	if (p)
+		set_next_task_stop(rq, p, true);
+
+	return p;
+}
+
 static void
 enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
 {
@@ -123,6 +132,7 @@ DEFINE_SCHED_CLASS(stop) = {
 
 #ifdef CONFIG_SMP
 	.balance		= balance_stop,
+	.pick_task		= pick_task_stop,
 	.select_task_rq		= select_task_rq_stop,
 	.set_cpus_allowed	= set_cpus_allowed_common,
 #endif
-- 
GitLab


From 8a311c740b53324ec584e0e3bb7077d56b123c28 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 18:19:36 -0500
Subject: [PATCH 0449/3804] sched: Basic tracking of matching tasks

Introduce task_struct::core_cookie as an opaque identifier for core
scheduling. When enabled; core scheduling will only allow matching
task to be on the core; where idle matches everything.

When task_struct::core_cookie is set (and core scheduling is enabled)
these tasks are indexed in a second RB-tree, first on cookie value
then on scheduling function, such that matching task selection always
finds the most elegible match.

NOTE: *shudder* at the overhead...

NOTE: *sigh*, a 3rd copy of the scheduling function; the alternative
is per class tracking of cookies and that just duplicates a lot of
stuff for no raisin (the 2nd copy lives in the rt-mutex PI code).

[Joel: folded fixes]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.496975854@infradead.org
---
 include/linux/sched.h |   8 ++-
 kernel/sched/core.c   | 152 ++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/fair.c   |  46 -------------
 kernel/sched/sched.h  |  55 +++++++++++++++
 4 files changed, 210 insertions(+), 51 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2c881384517b..45eedccf86aae 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -700,10 +700,16 @@ struct task_struct {
 	const struct sched_class	*sched_class;
 	struct sched_entity		se;
 	struct sched_rt_entity		rt;
+	struct sched_dl_entity		dl;
+
+#ifdef CONFIG_SCHED_CORE
+	struct rb_node			core_node;
+	unsigned long			core_cookie;
+#endif
+
 #ifdef CONFIG_CGROUP_SCHED
 	struct task_group		*sched_task_group;
 #endif
-	struct sched_dl_entity		dl;
 
 #ifdef CONFIG_UCLAMP_TASK
 	/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 85147bea9d93d..c057d471c025d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -88,6 +88,133 @@ __read_mostly int scheduler_running;
 
 DEFINE_STATIC_KEY_FALSE(__sched_core_enabled);
 
+/* kernel prio, less is more */
+static inline int __task_prio(struct task_struct *p)
+{
+	if (p->sched_class == &stop_sched_class) /* trumps deadline */
+		return -2;
+
+	if (rt_prio(p->prio)) /* includes deadline */
+		return p->prio; /* [-1, 99] */
+
+	if (p->sched_class == &idle_sched_class)
+		return MAX_RT_PRIO + NICE_WIDTH; /* 140 */
+
+	return MAX_RT_PRIO + MAX_NICE; /* 120, squash fair */
+}
+
+/*
+ * l(a,b)
+ * le(a,b) := !l(b,a)
+ * g(a,b)  := l(b,a)
+ * ge(a,b) := !l(a,b)
+ */
+
+/* real prio, less is less */
+static inline bool prio_less(struct task_struct *a, struct task_struct *b)
+{
+
+	int pa = __task_prio(a), pb = __task_prio(b);
+
+	if (-pa < -pb)
+		return true;
+
+	if (-pb < -pa)
+		return false;
+
+	if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
+		return !dl_time_before(a->dl.deadline, b->dl.deadline);
+
+	if (pa == MAX_RT_PRIO + MAX_NICE)  { /* fair */
+		u64 vruntime = b->se.vruntime;
+
+		/*
+		 * Normalize the vruntime if tasks are in different cpus.
+		 */
+		if (task_cpu(a) != task_cpu(b)) {
+			vruntime -= task_cfs_rq(b)->min_vruntime;
+			vruntime += task_cfs_rq(a)->min_vruntime;
+		}
+
+		return !((s64)(a->se.vruntime - vruntime) <= 0);
+	}
+
+	return false;
+}
+
+static inline bool __sched_core_less(struct task_struct *a, struct task_struct *b)
+{
+	if (a->core_cookie < b->core_cookie)
+		return true;
+
+	if (a->core_cookie > b->core_cookie)
+		return false;
+
+	/* flip prio, so high prio is leftmost */
+	if (prio_less(b, a))
+		return true;
+
+	return false;
+}
+
+#define __node_2_sc(node) rb_entry((node), struct task_struct, core_node)
+
+static inline bool rb_sched_core_less(struct rb_node *a, const struct rb_node *b)
+{
+	return __sched_core_less(__node_2_sc(a), __node_2_sc(b));
+}
+
+static inline int rb_sched_core_cmp(const void *key, const struct rb_node *node)
+{
+	const struct task_struct *p = __node_2_sc(node);
+	unsigned long cookie = (unsigned long)key;
+
+	if (cookie < p->core_cookie)
+		return -1;
+
+	if (cookie > p->core_cookie)
+		return 1;
+
+	return 0;
+}
+
+static void sched_core_enqueue(struct rq *rq, struct task_struct *p)
+{
+	rq->core->core_task_seq++;
+
+	if (!p->core_cookie)
+		return;
+
+	rb_add(&p->core_node, &rq->core_tree, rb_sched_core_less);
+}
+
+static void sched_core_dequeue(struct rq *rq, struct task_struct *p)
+{
+	rq->core->core_task_seq++;
+
+	if (!p->core_cookie)
+		return;
+
+	rb_erase(&p->core_node, &rq->core_tree);
+}
+
+/*
+ * Find left-most (aka, highest priority) task matching @cookie.
+ */
+static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
+{
+	struct rb_node *node;
+
+	node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp);
+	/*
+	 * The idle task always matches any cookie!
+	 */
+	if (!node)
+		return idle_sched_class.pick_task(rq);
+
+	return __node_2_sc(node);
+}
+
 /*
  * Magic required such that:
  *
@@ -147,10 +274,16 @@ static void __sched_core_flip(bool enabled)
 	cpus_read_unlock();
 }
 
-static void __sched_core_enable(void)
+static void sched_core_assert_empty(void)
 {
-	// XXX verify there are no cookie tasks (yet)
+	int cpu;
 
+	for_each_possible_cpu(cpu)
+		WARN_ON_ONCE(!RB_EMPTY_ROOT(&cpu_rq(cpu)->core_tree));
+}
+
+static void __sched_core_enable(void)
+{
 	static_branch_enable(&__sched_core_enabled);
 	/*
 	 * Ensure all previous instances of raw_spin_rq_*lock() have finished
@@ -158,12 +291,12 @@ static void __sched_core_enable(void)
 	 */
 	synchronize_rcu();
 	__sched_core_flip(true);
+	sched_core_assert_empty();
 }
 
 static void __sched_core_disable(void)
 {
-	// XXX verify there are no cookie tasks (left)
-
+	sched_core_assert_empty();
 	__sched_core_flip(false);
 	static_branch_disable(&__sched_core_enabled);
 }
@@ -205,6 +338,11 @@ void sched_core_put(void)
 		schedule_work(&_work);
 }
 
+#else /* !CONFIG_SCHED_CORE */
+
+static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
+static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { }
+
 #endif /* CONFIG_SCHED_CORE */
 
 /*
@@ -1797,10 +1935,16 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 
 	uclamp_rq_inc(rq, p);
 	p->sched_class->enqueue_task(rq, p, flags);
+
+	if (sched_core_enabled(rq))
+		sched_core_enqueue(rq, p);
 }
 
 static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 {
+	if (sched_core_enabled(rq))
+		sched_core_dequeue(rq, p);
+
 	if (!(flags & DEQUEUE_NOCLOCK))
 		update_rq_clock(rq);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 51d72ab5b5ae3..08be7a2eb05b4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -268,33 +268,11 @@ const struct sched_class fair_sched_class;
  */
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-static inline struct task_struct *task_of(struct sched_entity *se)
-{
-	SCHED_WARN_ON(!entity_is_task(se));
-	return container_of(se, struct task_struct, se);
-}
 
 /* Walk up scheduling entities hierarchy */
 #define for_each_sched_entity(se) \
 		for (; se; se = se->parent)
 
-static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
-{
-	return p->se.cfs_rq;
-}
-
-/* runqueue on which this entity is (to be) queued */
-static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
-{
-	return se->cfs_rq;
-}
-
-/* runqueue "owned" by this group */
-static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
-{
-	return grp->my_q;
-}
-
 static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
 {
 	if (!path)
@@ -455,33 +433,9 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
 
 #else	/* !CONFIG_FAIR_GROUP_SCHED */
 
-static inline struct task_struct *task_of(struct sched_entity *se)
-{
-	return container_of(se, struct task_struct, se);
-}
-
 #define for_each_sched_entity(se) \
 		for (; se; se = NULL)
 
-static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
-{
-	return &task_rq(p)->cfs;
-}
-
-static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
-{
-	struct task_struct *p = task_of(se);
-	struct rq *rq = task_rq(p);
-
-	return &rq->cfs;
-}
-
-/* runqueue "owned" by this group */
-static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
-{
-	return NULL;
-}
-
 static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
 {
 	if (path)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index fa990cd259ceb..e43a2176d88f3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1080,6 +1080,10 @@ struct rq {
 	/* per rq */
 	struct rq		*core;
 	unsigned int		core_enabled;
+	struct rb_root		core_tree;
+
+	/* shared state */
+	unsigned int		core_task_seq;
 #endif
 };
 
@@ -1243,6 +1247,57 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 #define raw_rq()		raw_cpu_ptr(&runqueues)
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+	SCHED_WARN_ON(!entity_is_task(se));
+	return container_of(se, struct task_struct, se);
+}
+
+static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
+{
+	return p->se.cfs_rq;
+}
+
+/* runqueue on which this entity is (to be) queued */
+static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
+{
+	return se->cfs_rq;
+}
+
+/* runqueue "owned" by this group */
+static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
+{
+	return grp->my_q;
+}
+
+#else
+
+static inline struct task_struct *task_of(struct sched_entity *se)
+{
+	return container_of(se, struct task_struct, se);
+}
+
+static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
+{
+	return &task_rq(p)->cfs;
+}
+
+static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
+{
+	struct task_struct *p = task_of(se);
+	struct rq *rq = task_rq(p);
+
+	return &rq->cfs;
+}
+
+/* runqueue "owned" by this group */
+static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
+{
+	return NULL;
+}
+#endif
+
 extern void update_rq_clock(struct rq *rq);
 
 static inline u64 __rq_clock_broken(struct rq *rq)
-- 
GitLab


From 539f65125d20aacab54d02d77f10a839f45b09dc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 18:19:37 -0500
Subject: [PATCH 0450/3804] sched: Add core wide task selection and scheduling

Instead of only selecting a local task, select a task for all SMT
siblings for every reschedule on the core (irrespective which logical
CPU does the reschedule).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.557559654@infradead.org
---
 kernel/sched/core.c  | 301 ++++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/sched.h |   6 +-
 2 files changed, 305 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c057d471c025d..db763f42a4b0f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5282,7 +5282,7 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
  * Pick up the highest-prio task:
  */
 static inline struct task_struct *
-pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+__pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
 	const struct sched_class *class;
 	struct task_struct *p;
@@ -5323,6 +5323,294 @@ restart:
 }
 
 #ifdef CONFIG_SCHED_CORE
+static inline bool is_task_rq_idle(struct task_struct *t)
+{
+	return (task_rq(t)->idle == t);
+}
+
+static inline bool cookie_equals(struct task_struct *a, unsigned long cookie)
+{
+	return is_task_rq_idle(a) || (a->core_cookie == cookie);
+}
+
+static inline bool cookie_match(struct task_struct *a, struct task_struct *b)
+{
+	if (is_task_rq_idle(a) || is_task_rq_idle(b))
+		return true;
+
+	return a->core_cookie == b->core_cookie;
+}
+
+// XXX fairness/fwd progress conditions
+/*
+ * Returns
+ * - NULL if there is no runnable task for this class.
+ * - the highest priority task for this runqueue if it matches
+ *   rq->core->core_cookie or its priority is greater than max.
+ * - Else returns idle_task.
+ */
+static struct task_struct *
+pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max)
+{
+	struct task_struct *class_pick, *cookie_pick;
+	unsigned long cookie = rq->core->core_cookie;
+
+	class_pick = class->pick_task(rq);
+	if (!class_pick)
+		return NULL;
+
+	if (!cookie) {
+		/*
+		 * If class_pick is tagged, return it only if it has
+		 * higher priority than max.
+		 */
+		if (max && class_pick->core_cookie &&
+		    prio_less(class_pick, max))
+			return idle_sched_class.pick_task(rq);
+
+		return class_pick;
+	}
+
+	/*
+	 * If class_pick is idle or matches cookie, return early.
+	 */
+	if (cookie_equals(class_pick, cookie))
+		return class_pick;
+
+	cookie_pick = sched_core_find(rq, cookie);
+
+	/*
+	 * If class > max && class > cookie, it is the highest priority task on
+	 * the core (so far) and it must be selected, otherwise we must go with
+	 * the cookie pick in order to satisfy the constraint.
+	 */
+	if (prio_less(cookie_pick, class_pick) &&
+	    (!max || prio_less(max, class_pick)))
+		return class_pick;
+
+	return cookie_pick;
+}
+
+static struct task_struct *
+pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+	struct task_struct *next, *max = NULL;
+	const struct sched_class *class;
+	const struct cpumask *smt_mask;
+	bool need_sync;
+	int i, j, cpu;
+
+	if (!sched_core_enabled(rq))
+		return __pick_next_task(rq, prev, rf);
+
+	cpu = cpu_of(rq);
+
+	/* Stopper task is switching into idle, no need core-wide selection. */
+	if (cpu_is_offline(cpu)) {
+		/*
+		 * Reset core_pick so that we don't enter the fastpath when
+		 * coming online. core_pick would already be migrated to
+		 * another cpu during offline.
+		 */
+		rq->core_pick = NULL;
+		return __pick_next_task(rq, prev, rf);
+	}
+
+	/*
+	 * If there were no {en,de}queues since we picked (IOW, the task
+	 * pointers are all still valid), and we haven't scheduled the last
+	 * pick yet, do so now.
+	 *
+	 * rq->core_pick can be NULL if no selection was made for a CPU because
+	 * it was either offline or went offline during a sibling's core-wide
+	 * selection. In this case, do a core-wide selection.
+	 */
+	if (rq->core->core_pick_seq == rq->core->core_task_seq &&
+	    rq->core->core_pick_seq != rq->core_sched_seq &&
+	    rq->core_pick) {
+		WRITE_ONCE(rq->core_sched_seq, rq->core->core_pick_seq);
+
+		next = rq->core_pick;
+		if (next != prev) {
+			put_prev_task(rq, prev);
+			set_next_task(rq, next);
+		}
+
+		rq->core_pick = NULL;
+		return next;
+	}
+
+	put_prev_task_balance(rq, prev, rf);
+
+	smt_mask = cpu_smt_mask(cpu);
+
+	/*
+	 * core->core_task_seq, core->core_pick_seq, rq->core_sched_seq
+	 *
+	 * @task_seq guards the task state ({en,de}queues)
+	 * @pick_seq is the @task_seq we did a selection on
+	 * @sched_seq is the @pick_seq we scheduled
+	 *
+	 * However, preemptions can cause multiple picks on the same task set.
+	 * 'Fix' this by also increasing @task_seq for every pick.
+	 */
+	rq->core->core_task_seq++;
+	need_sync = !!rq->core->core_cookie;
+
+	/* reset state */
+	rq->core->core_cookie = 0UL;
+	for_each_cpu(i, smt_mask) {
+		struct rq *rq_i = cpu_rq(i);
+
+		rq_i->core_pick = NULL;
+
+		if (rq_i->core_forceidle) {
+			need_sync = true;
+			rq_i->core_forceidle = false;
+		}
+
+		if (i != cpu)
+			update_rq_clock(rq_i);
+	}
+
+	/*
+	 * Try and select tasks for each sibling in decending sched_class
+	 * order.
+	 */
+	for_each_class(class) {
+again:
+		for_each_cpu_wrap(i, smt_mask, cpu) {
+			struct rq *rq_i = cpu_rq(i);
+			struct task_struct *p;
+
+			if (rq_i->core_pick)
+				continue;
+
+			/*
+			 * If this sibling doesn't yet have a suitable task to
+			 * run; ask for the most elegible task, given the
+			 * highest priority task already selected for this
+			 * core.
+			 */
+			p = pick_task(rq_i, class, max);
+			if (!p) {
+				/*
+				 * If there weren't no cookies; we don't need to
+				 * bother with the other siblings.
+				 * If the rest of the core is not running a tagged
+				 * task, i.e.  need_sync == 0, and the current CPU
+				 * which called into the schedule() loop does not
+				 * have any tasks for this class, skip selecting for
+				 * other siblings since there's no point. We don't skip
+				 * for RT/DL because that could make CFS force-idle RT.
+				 */
+				if (i == cpu && !need_sync && class == &fair_sched_class)
+					goto next_class;
+
+				continue;
+			}
+
+			/*
+			 * Optimize the 'normal' case where there aren't any
+			 * cookies and we don't need to sync up.
+			 */
+			if (i == cpu && !need_sync && !p->core_cookie) {
+				next = p;
+				goto done;
+			}
+
+			rq_i->core_pick = p;
+
+			/*
+			 * If this new candidate is of higher priority than the
+			 * previous; and they're incompatible; we need to wipe
+			 * the slate and start over. pick_task makes sure that
+			 * p's priority is more than max if it doesn't match
+			 * max's cookie.
+			 *
+			 * NOTE: this is a linear max-filter and is thus bounded
+			 * in execution time.
+			 */
+			if (!max || !cookie_match(max, p)) {
+				struct task_struct *old_max = max;
+
+				rq->core->core_cookie = p->core_cookie;
+				max = p;
+
+				if (old_max) {
+					for_each_cpu(j, smt_mask) {
+						if (j == i)
+							continue;
+
+						cpu_rq(j)->core_pick = NULL;
+					}
+					goto again;
+				} else {
+					/*
+					 * Once we select a task for a cpu, we
+					 * should not be doing an unconstrained
+					 * pick because it might starve a task
+					 * on a forced idle cpu.
+					 */
+					need_sync = true;
+				}
+
+			}
+		}
+next_class:;
+	}
+
+	rq->core->core_pick_seq = rq->core->core_task_seq;
+	next = rq->core_pick;
+	rq->core_sched_seq = rq->core->core_pick_seq;
+
+	/* Something should have been selected for current CPU */
+	WARN_ON_ONCE(!next);
+
+	/*
+	 * Reschedule siblings
+	 *
+	 * NOTE: L1TF -- at this point we're no longer running the old task and
+	 * sending an IPI (below) ensures the sibling will no longer be running
+	 * their task. This ensures there is no inter-sibling overlap between
+	 * non-matching user state.
+	 */
+	for_each_cpu(i, smt_mask) {
+		struct rq *rq_i = cpu_rq(i);
+
+		/*
+		 * An online sibling might have gone offline before a task
+		 * could be picked for it, or it might be offline but later
+		 * happen to come online, but its too late and nothing was
+		 * picked for it.  That's Ok - it will pick tasks for itself,
+		 * so ignore it.
+		 */
+		if (!rq_i->core_pick)
+			continue;
+
+		if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running)
+			rq_i->core_forceidle = true;
+
+		if (i == cpu) {
+			rq_i->core_pick = NULL;
+			continue;
+		}
+
+		/* Did we break L1TF mitigation requirements? */
+		WARN_ON_ONCE(!cookie_match(next, rq_i->core_pick));
+
+		if (rq_i->curr == rq_i->core_pick) {
+			rq_i->core_pick = NULL;
+			continue;
+		}
+
+		resched_curr(rq_i);
+	}
+
+done:
+	set_next_task(rq, next);
+	return next;
+}
 
 static inline void sched_core_cpu_starting(unsigned int cpu)
 {
@@ -5354,6 +5642,12 @@ static inline void sched_core_cpu_starting(unsigned int cpu)
 
 static inline void sched_core_cpu_starting(unsigned int cpu) {}
 
+static struct task_struct *
+pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+	return __pick_next_task(rq, prev, rf);
+}
+
 #endif /* CONFIG_SCHED_CORE */
 
 /*
@@ -8609,7 +8903,12 @@ void __init sched_init(void)
 
 #ifdef CONFIG_SCHED_CORE
 		rq->core = NULL;
+		rq->core_pick = NULL;
 		rq->core_enabled = 0;
+		rq->core_tree = RB_ROOT;
+		rq->core_forceidle = false;
+
+		rq->core_cookie = 0UL;
 #endif
 	}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e43a2176d88f3..dd44a3127e9ca 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1079,11 +1079,16 @@ struct rq {
 #ifdef CONFIG_SCHED_CORE
 	/* per rq */
 	struct rq		*core;
+	struct task_struct	*core_pick;
 	unsigned int		core_enabled;
+	unsigned int		core_sched_seq;
 	struct rb_root		core_tree;
+	unsigned char		core_forceidle;
 
 	/* shared state */
 	unsigned int		core_task_seq;
+	unsigned int		core_pick_seq;
+	unsigned long		core_cookie;
 #endif
 };
 
@@ -2060,7 +2065,6 @@ static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
 
 static inline void set_next_task(struct rq *rq, struct task_struct *next)
 {
-	WARN_ON_ONCE(rq->curr != next);
 	next->sched_class->set_next_task(rq, next, false);
 }
 
-- 
GitLab


From 8039e96fcc1de30d5bcaf05da9ca2de46a800826 Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <viremana@linux.microsoft.com>
Date: Tue, 17 Nov 2020 18:19:38 -0500
Subject: [PATCH 0451/3804] sched/fair: Fix forced idle sibling starvation
 corner case

If there is only one long running local task and the sibling is
forced idle, it  might not get a chance to run until a schedule
event happens on any cpu in the core.

So we check for this condition during a tick to see if a sibling
is starved and then give it a chance to schedule.

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.617407840@infradead.org
---
 kernel/sched/core.c  | 15 ++++++++-------
 kernel/sched/fair.c  | 40 ++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |  2 +-
 3 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index db763f42a4b0f..f5e1e6f96411b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5459,16 +5459,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 
 	/* reset state */
 	rq->core->core_cookie = 0UL;
+	if (rq->core->core_forceidle) {
+		need_sync = true;
+		rq->core->core_forceidle = false;
+	}
 	for_each_cpu(i, smt_mask) {
 		struct rq *rq_i = cpu_rq(i);
 
 		rq_i->core_pick = NULL;
 
-		if (rq_i->core_forceidle) {
-			need_sync = true;
-			rq_i->core_forceidle = false;
-		}
-
 		if (i != cpu)
 			update_rq_clock(rq_i);
 	}
@@ -5588,8 +5587,10 @@ next_class:;
 		if (!rq_i->core_pick)
 			continue;
 
-		if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running)
-			rq_i->core_forceidle = true;
+		if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running &&
+		    !rq_i->core->core_forceidle) {
+			rq_i->core->core_forceidle = true;
+		}
 
 		if (i == cpu) {
 			rq_i->core_pick = NULL;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 08be7a2eb05b4..4d1ecab41e804 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10767,6 +10767,44 @@ static void rq_offline_fair(struct rq *rq)
 
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_SCHED_CORE
+static inline bool
+__entity_slice_used(struct sched_entity *se, int min_nr_tasks)
+{
+	u64 slice = sched_slice(cfs_rq_of(se), se);
+	u64 rtime = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+
+	return (rtime * min_nr_tasks > slice);
+}
+
+#define MIN_NR_TASKS_DURING_FORCEIDLE	2
+static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
+{
+	if (!sched_core_enabled(rq))
+		return;
+
+	/*
+	 * If runqueue has only one task which used up its slice and
+	 * if the sibling is forced idle, then trigger schedule to
+	 * give forced idle task a chance.
+	 *
+	 * sched_slice() considers only this active rq and it gets the
+	 * whole slice. But during force idle, we have siblings acting
+	 * like a single runqueue and hence we need to consider runnable
+	 * tasks on this cpu and the forced idle cpu. Ideally, we should
+	 * go through the forced idle rq, but that would be a perf hit.
+	 * We can assume that the forced idle cpu has atleast
+	 * MIN_NR_TASKS_DURING_FORCEIDLE - 1 tasks and use that to check
+	 * if we need to give up the cpu.
+	 */
+	if (rq->core->core_forceidle && rq->cfs.nr_running == 1 &&
+	    __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
+		resched_curr(rq);
+}
+#else
+static inline void task_tick_core(struct rq *rq, struct task_struct *curr) {}
+#endif
+
 /*
  * scheduler tick hitting a task of our scheduling class.
  *
@@ -10790,6 +10828,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 
 	update_misfit_status(curr, rq);
 	update_overutilized_status(task_rq(curr));
+
+	task_tick_core(rq, curr);
 }
 
 /*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index dd44a3127e9ca..db555143380d3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1083,12 +1083,12 @@ struct rq {
 	unsigned int		core_enabled;
 	unsigned int		core_sched_seq;
 	struct rb_root		core_tree;
-	unsigned char		core_forceidle;
 
 	/* shared state */
 	unsigned int		core_task_seq;
 	unsigned int		core_pick_seq;
 	unsigned long		core_cookie;
+	unsigned char		core_forceidle;
 #endif
 };
 
-- 
GitLab


From 7afbba119f0da09824d723f8081608ea1f74ff57 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Tue, 17 Nov 2020 18:19:42 -0500
Subject: [PATCH 0452/3804] sched: Fix priority inversion of cookied task with
 sibling

The rationale is as follows. In the core-wide pick logic, even if
need_sync == false, we need to go look at other CPUs (non-local CPUs)
to see if they could be running RT.

Say the RQs in a particular core look like this:

Let CFS1 and CFS2 be 2 tagged CFS tags.
Let RT1 be an untagged RT task.

	rq0		rq1
	CFS1 (tagged)	RT1 (no tag)
	CFS2 (tagged)

Say schedule() runs on rq0. Now, it will enter the above loop and
pick_task(RT) will return NULL for 'p'. It will enter the above if()
block and see that need_sync == false and will skip RT entirely.

The end result of the selection will be (say prio(CFS1) > prio(CFS2)):

	rq0             rq1
	CFS1            IDLE

When it should have selected:

	rq0             rq1
	IDLE            RT

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.678425748@infradead.org
---
 kernel/sched/core.c | 65 ++++++++++++++++++---------------------------
 1 file changed, 26 insertions(+), 39 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f5e1e6f96411b..e506d9de16fcc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5443,6 +5443,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	put_prev_task_balance(rq, prev, rf);
 
 	smt_mask = cpu_smt_mask(cpu);
+	need_sync = !!rq->core->core_cookie;
+
+	/* reset state */
+	rq->core->core_cookie = 0UL;
+	if (rq->core->core_forceidle) {
+		need_sync = true;
+		fi_before = true;
+		rq->core->core_forceidle = false;
+	}
 
 	/*
 	 * core->core_task_seq, core->core_pick_seq, rq->core_sched_seq
@@ -5455,14 +5464,25 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	 * 'Fix' this by also increasing @task_seq for every pick.
 	 */
 	rq->core->core_task_seq++;
-	need_sync = !!rq->core->core_cookie;
 
-	/* reset state */
-	rq->core->core_cookie = 0UL;
-	if (rq->core->core_forceidle) {
+	/*
+	 * Optimize for common case where this CPU has no cookies
+	 * and there are no cookied tasks running on siblings.
+	 */
+	if (!need_sync) {
+		for_each_class(class) {
+			next = class->pick_task(rq);
+			if (next)
+				break;
+		}
+
+		if (!next->core_cookie) {
+			rq->core_pick = NULL;
+			goto done;
+		}
 		need_sync = true;
-		rq->core->core_forceidle = false;
 	}
+
 	for_each_cpu(i, smt_mask) {
 		struct rq *rq_i = cpu_rq(i);
 
@@ -5492,31 +5512,8 @@ again:
 			 * core.
 			 */
 			p = pick_task(rq_i, class, max);
-			if (!p) {
-				/*
-				 * If there weren't no cookies; we don't need to
-				 * bother with the other siblings.
-				 * If the rest of the core is not running a tagged
-				 * task, i.e.  need_sync == 0, and the current CPU
-				 * which called into the schedule() loop does not
-				 * have any tasks for this class, skip selecting for
-				 * other siblings since there's no point. We don't skip
-				 * for RT/DL because that could make CFS force-idle RT.
-				 */
-				if (i == cpu && !need_sync && class == &fair_sched_class)
-					goto next_class;
-
+			if (!p)
 				continue;
-			}
-
-			/*
-			 * Optimize the 'normal' case where there aren't any
-			 * cookies and we don't need to sync up.
-			 */
-			if (i == cpu && !need_sync && !p->core_cookie) {
-				next = p;
-				goto done;
-			}
 
 			rq_i->core_pick = p;
 
@@ -5544,19 +5541,9 @@ again:
 						cpu_rq(j)->core_pick = NULL;
 					}
 					goto again;
-				} else {
-					/*
-					 * Once we select a task for a cpu, we
-					 * should not be doing an unconstrained
-					 * pick because it might starve a task
-					 * on a forced idle cpu.
-					 */
-					need_sync = true;
 				}
-
 			}
 		}
-next_class:;
 	}
 
 	rq->core->core_pick_seq = rq->core->core_task_seq;
-- 
GitLab


From c6047c2e3af68dae23ad884249e0d42ff28d2d1b Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Tue, 17 Nov 2020 18:19:39 -0500
Subject: [PATCH 0453/3804] sched/fair: Snapshot the min_vruntime of CPUs on
 force idle

During force-idle, we end up doing cross-cpu comparison of vruntimes
during pick_next_task. If we simply compare (vruntime-min_vruntime)
across CPUs, and if the CPUs only have 1 task each, we will always
end up comparing 0 with 0 and pick just one of the tasks all the time.
This starves the task that was not picked. To fix this, take a snapshot
of the min_vruntime when entering force idle and use it for comparison.
This min_vruntime snapshot will only be used for cross-CPU vruntime
comparison, and nothing else.

A note about the min_vruntime snapshot and force idling:

During selection:

  When we're not fi, we need to update snapshot.
  when we're fi and we were not fi, we must update snapshot.
  When we're fi and we were already fi, we must not update snapshot.

Which gives:

  fib     fi      update
  0       0       1
  0       1       1
  1       0       1
  1       1       0

Where:

  fi:  force-idled now
  fib: force-idled before

So the min_vruntime snapshot needs to be updated when: !(fib && fi).

Also, the cfs_prio_less() function needs to be aware of whether the
core is in force idle or not, since it will be use this information to
know whether to advance a cfs_rq's min_vruntime_fi in the hierarchy.
So pass this information along via pick_task() -> prio_less().

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.738542617@infradead.org
---
 kernel/sched/core.c  | 59 +++++++++++++++++++---------------
 kernel/sched/fair.c  | 75 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |  8 +++++
 3 files changed, 117 insertions(+), 25 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e506d9de16fcc..e45c1d21b3714 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -111,7 +111,7 @@ static inline int __task_prio(struct task_struct *p)
  */
 
 /* real prio, less is less */
-static inline bool prio_less(struct task_struct *a, struct task_struct *b)
+static inline bool prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
 {
 
 	int pa = __task_prio(a), pb = __task_prio(b);
@@ -125,19 +125,8 @@ static inline bool prio_less(struct task_struct *a, struct task_struct *b)
 	if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
 		return !dl_time_before(a->dl.deadline, b->dl.deadline);
 
-	if (pa == MAX_RT_PRIO + MAX_NICE)  { /* fair */
-		u64 vruntime = b->se.vruntime;
-
-		/*
-		 * Normalize the vruntime if tasks are in different cpus.
-		 */
-		if (task_cpu(a) != task_cpu(b)) {
-			vruntime -= task_cfs_rq(b)->min_vruntime;
-			vruntime += task_cfs_rq(a)->min_vruntime;
-		}
-
-		return !((s64)(a->se.vruntime - vruntime) <= 0);
-	}
+	if (pa == MAX_RT_PRIO + MAX_NICE)	/* fair */
+		return cfs_prio_less(a, b, in_fi);
 
 	return false;
 }
@@ -151,7 +140,7 @@ static inline bool __sched_core_less(struct task_struct *a, struct task_struct *
 		return false;
 
 	/* flip prio, so high prio is leftmost */
-	if (prio_less(b, a))
+	if (prio_less(b, a, task_rq(a)->core->core_forceidle))
 		return true;
 
 	return false;
@@ -5350,7 +5339,7 @@ static inline bool cookie_match(struct task_struct *a, struct task_struct *b)
  * - Else returns idle_task.
  */
 static struct task_struct *
-pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max)
+pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max, bool in_fi)
 {
 	struct task_struct *class_pick, *cookie_pick;
 	unsigned long cookie = rq->core->core_cookie;
@@ -5365,7 +5354,7 @@ pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *ma
 		 * higher priority than max.
 		 */
 		if (max && class_pick->core_cookie &&
-		    prio_less(class_pick, max))
+		    prio_less(class_pick, max, in_fi))
 			return idle_sched_class.pick_task(rq);
 
 		return class_pick;
@@ -5384,19 +5373,22 @@ pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *ma
 	 * the core (so far) and it must be selected, otherwise we must go with
 	 * the cookie pick in order to satisfy the constraint.
 	 */
-	if (prio_less(cookie_pick, class_pick) &&
-	    (!max || prio_less(max, class_pick)))
+	if (prio_less(cookie_pick, class_pick, in_fi) &&
+	    (!max || prio_less(max, class_pick, in_fi)))
 		return class_pick;
 
 	return cookie_pick;
 }
 
+extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
+
 static struct task_struct *
 pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
 	struct task_struct *next, *max = NULL;
 	const struct sched_class *class;
 	const struct cpumask *smt_mask;
+	bool fi_before = false;
 	bool need_sync;
 	int i, j, cpu;
 
@@ -5478,9 +5470,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 
 		if (!next->core_cookie) {
 			rq->core_pick = NULL;
+			/*
+			 * For robustness, update the min_vruntime_fi for
+			 * unconstrained picks as well.
+			 */
+			WARN_ON_ONCE(fi_before);
+			task_vruntime_update(rq, next, false);
 			goto done;
 		}
-		need_sync = true;
 	}
 
 	for_each_cpu(i, smt_mask) {
@@ -5511,11 +5508,16 @@ again:
 			 * highest priority task already selected for this
 			 * core.
 			 */
-			p = pick_task(rq_i, class, max);
+			p = pick_task(rq_i, class, max, fi_before);
 			if (!p)
 				continue;
 
 			rq_i->core_pick = p;
+			if (rq_i->idle == p && rq_i->nr_running) {
+				rq->core->core_forceidle = true;
+				if (!fi_before)
+					rq->core->core_forceidle_seq++;
+			}
 
 			/*
 			 * If this new candidate is of higher priority than the
@@ -5534,6 +5536,7 @@ again:
 				max = p;
 
 				if (old_max) {
+					rq->core->core_forceidle = false;
 					for_each_cpu(j, smt_mask) {
 						if (j == i)
 							continue;
@@ -5574,10 +5577,16 @@ again:
 		if (!rq_i->core_pick)
 			continue;
 
-		if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running &&
-		    !rq_i->core->core_forceidle) {
-			rq_i->core->core_forceidle = true;
-		}
+		/*
+		 * Update for new !FI->FI transitions, or if continuing to be in !FI:
+		 * fi_before     fi      update?
+		 *  0            0       1
+		 *  0            1       1
+		 *  1            0       1
+		 *  1            1       0
+		 */
+		if (!(fi_before && rq->core->core_forceidle))
+			task_vruntime_update(rq_i, rq_i->core_pick, rq->core->core_forceidle);
 
 		if (i == cpu) {
 			rq_i->core_pick = NULL;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4d1ecab41e804..5948dc17b9ccb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10801,6 +10801,81 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
 	    __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
 		resched_curr(rq);
 }
+
+/*
+ * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed.
+ */
+static void se_fi_update(struct sched_entity *se, unsigned int fi_seq, bool forceidle)
+{
+	for_each_sched_entity(se) {
+		struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+		if (forceidle) {
+			if (cfs_rq->forceidle_seq == fi_seq)
+				break;
+			cfs_rq->forceidle_seq = fi_seq;
+		}
+
+		cfs_rq->min_vruntime_fi = cfs_rq->min_vruntime;
+	}
+}
+
+void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi)
+{
+	struct sched_entity *se = &p->se;
+
+	if (p->sched_class != &fair_sched_class)
+		return;
+
+	se_fi_update(se, rq->core->core_forceidle_seq, in_fi);
+}
+
+bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
+{
+	struct rq *rq = task_rq(a);
+	struct sched_entity *sea = &a->se;
+	struct sched_entity *seb = &b->se;
+	struct cfs_rq *cfs_rqa;
+	struct cfs_rq *cfs_rqb;
+	s64 delta;
+
+	SCHED_WARN_ON(task_rq(b)->core != rq->core);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	/*
+	 * Find an se in the hierarchy for tasks a and b, such that the se's
+	 * are immediate siblings.
+	 */
+	while (sea->cfs_rq->tg != seb->cfs_rq->tg) {
+		int sea_depth = sea->depth;
+		int seb_depth = seb->depth;
+
+		if (sea_depth >= seb_depth)
+			sea = parent_entity(sea);
+		if (sea_depth <= seb_depth)
+			seb = parent_entity(seb);
+	}
+
+	se_fi_update(sea, rq->core->core_forceidle_seq, in_fi);
+	se_fi_update(seb, rq->core->core_forceidle_seq, in_fi);
+
+	cfs_rqa = sea->cfs_rq;
+	cfs_rqb = seb->cfs_rq;
+#else
+	cfs_rqa = &task_rq(a)->cfs;
+	cfs_rqb = &task_rq(b)->cfs;
+#endif
+
+	/*
+	 * Find delta after normalizing se's vruntime with its cfs_rq's
+	 * min_vruntime_fi, which would have been updated in prior calls
+	 * to se_fi_update().
+	 */
+	delta = (s64)(sea->vruntime - seb->vruntime) +
+		(s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
+
+	return delta > 0;
+}
 #else
 static inline void task_tick_core(struct rq *rq, struct task_struct *curr) {}
 #endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index db555143380d3..4a898abc60ce2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -526,6 +526,11 @@ struct cfs_rq {
 
 	u64			exec_clock;
 	u64			min_vruntime;
+#ifdef CONFIG_SCHED_CORE
+	unsigned int		forceidle_seq;
+	u64			min_vruntime_fi;
+#endif
+
 #ifndef CONFIG_64BIT
 	u64			min_vruntime_copy;
 #endif
@@ -1089,6 +1094,7 @@ struct rq {
 	unsigned int		core_pick_seq;
 	unsigned long		core_cookie;
 	unsigned char		core_forceidle;
+	unsigned int		core_forceidle_seq;
 #endif
 };
 
@@ -1162,6 +1168,8 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
 	return &rq->__lock;
 }
 
+bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi);
+
 #else /* !CONFIG_SCHED_CORE */
 
 static inline bool sched_core_enabled(struct rq *rq)
-- 
GitLab


From d2dfa17bc7de67e99685c4d6557837bf801a102c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 17 Nov 2020 18:19:43 -0500
Subject: [PATCH 0454/3804] sched: Trivial forced-newidle balancer

When a sibling is forced-idle to match the core-cookie; search for
matching tasks to fill the core.

rcu_read_unlock() can incur an infrequent deadlock in
sched_core_balance(). Fix this by using the RCU-sched flavor instead.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.800048269@infradead.org
---
 include/linux/sched.h |   1 +
 kernel/sched/core.c   | 130 +++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/idle.c   |   1 +
 kernel/sched/sched.h  |   6 ++
 4 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 45eedccf86aae..9b822e3832123 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -705,6 +705,7 @@ struct task_struct {
 #ifdef CONFIG_SCHED_CORE
 	struct rb_node			core_node;
 	unsigned long			core_cookie;
+	unsigned int			core_occupation;
 #endif
 
 #ifdef CONFIG_CGROUP_SCHED
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e45c1d21b3714..b4988887510fd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -204,6 +204,21 @@ static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie)
 	return __node_2_sc(node);
 }
 
+static struct task_struct *sched_core_next(struct task_struct *p, unsigned long cookie)
+{
+	struct rb_node *node = &p->core_node;
+
+	node = rb_next(node);
+	if (!node)
+		return NULL;
+
+	p = container_of(node, struct task_struct, core_node);
+	if (p->core_cookie != cookie)
+		return NULL;
+
+	return p;
+}
+
 /*
  * Magic required such that:
  *
@@ -5389,8 +5404,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	const struct sched_class *class;
 	const struct cpumask *smt_mask;
 	bool fi_before = false;
+	int i, j, cpu, occ = 0;
 	bool need_sync;
-	int i, j, cpu;
 
 	if (!sched_core_enabled(rq))
 		return __pick_next_task(rq, prev, rf);
@@ -5512,6 +5527,9 @@ again:
 			if (!p)
 				continue;
 
+			if (!is_task_rq_idle(p))
+				occ++;
+
 			rq_i->core_pick = p;
 			if (rq_i->idle == p && rq_i->nr_running) {
 				rq->core->core_forceidle = true;
@@ -5543,6 +5561,7 @@ again:
 
 						cpu_rq(j)->core_pick = NULL;
 					}
+					occ = 1;
 					goto again;
 				}
 			}
@@ -5588,6 +5607,8 @@ again:
 		if (!(fi_before && rq->core->core_forceidle))
 			task_vruntime_update(rq_i, rq_i->core_pick, rq->core->core_forceidle);
 
+		rq_i->core_pick->core_occupation = occ;
+
 		if (i == cpu) {
 			rq_i->core_pick = NULL;
 			continue;
@@ -5609,6 +5630,113 @@ done:
 	return next;
 }
 
+static bool try_steal_cookie(int this, int that)
+{
+	struct rq *dst = cpu_rq(this), *src = cpu_rq(that);
+	struct task_struct *p;
+	unsigned long cookie;
+	bool success = false;
+
+	local_irq_disable();
+	double_rq_lock(dst, src);
+
+	cookie = dst->core->core_cookie;
+	if (!cookie)
+		goto unlock;
+
+	if (dst->curr != dst->idle)
+		goto unlock;
+
+	p = sched_core_find(src, cookie);
+	if (p == src->idle)
+		goto unlock;
+
+	do {
+		if (p == src->core_pick || p == src->curr)
+			goto next;
+
+		if (!cpumask_test_cpu(this, &p->cpus_mask))
+			goto next;
+
+		if (p->core_occupation > dst->idle->core_occupation)
+			goto next;
+
+		p->on_rq = TASK_ON_RQ_MIGRATING;
+		deactivate_task(src, p, 0);
+		set_task_cpu(p, this);
+		activate_task(dst, p, 0);
+		p->on_rq = TASK_ON_RQ_QUEUED;
+
+		resched_curr(dst);
+
+		success = true;
+		break;
+
+next:
+		p = sched_core_next(p, cookie);
+	} while (p);
+
+unlock:
+	double_rq_unlock(dst, src);
+	local_irq_enable();
+
+	return success;
+}
+
+static bool steal_cookie_task(int cpu, struct sched_domain *sd)
+{
+	int i;
+
+	for_each_cpu_wrap(i, sched_domain_span(sd), cpu) {
+		if (i == cpu)
+			continue;
+
+		if (need_resched())
+			break;
+
+		if (try_steal_cookie(cpu, i))
+			return true;
+	}
+
+	return false;
+}
+
+static void sched_core_balance(struct rq *rq)
+{
+	struct sched_domain *sd;
+	int cpu = cpu_of(rq);
+
+	preempt_disable();
+	rcu_read_lock();
+	raw_spin_rq_unlock_irq(rq);
+	for_each_domain(cpu, sd) {
+		if (need_resched())
+			break;
+
+		if (steal_cookie_task(cpu, sd))
+			break;
+	}
+	raw_spin_rq_lock_irq(rq);
+	rcu_read_unlock();
+	preempt_enable();
+}
+
+static DEFINE_PER_CPU(struct callback_head, core_balance_head);
+
+void queue_core_balance(struct rq *rq)
+{
+	if (!sched_core_enabled(rq))
+		return;
+
+	if (!rq->core->core_cookie)
+		return;
+
+	if (!rq->nr_running) /* not forced idle */
+		return;
+
+	queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
+}
+
 static inline void sched_core_cpu_starting(unsigned int cpu)
 {
 	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 43646e7876d91..912b47aa99d82 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -437,6 +437,7 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir
 {
 	update_idle_core(rq);
 	schedstat_inc(rq->sched_goidle);
+	queue_core_balance(rq);
 }
 
 #ifdef CONFIG_SMP
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4a898abc60ce2..91ca1fee9fec2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1170,6 +1170,8 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
 
 bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi);
 
+extern void queue_core_balance(struct rq *rq);
+
 #else /* !CONFIG_SCHED_CORE */
 
 static inline bool sched_core_enabled(struct rq *rq)
@@ -1192,6 +1194,10 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
 	return &rq->__lock;
 }
 
+static inline void queue_core_balance(struct rq *rq)
+{
+}
+
 #endif /* CONFIG_SCHED_CORE */
 
 static inline void lockdep_assert_rq_held(struct rq *rq)
-- 
GitLab


From 97886d9dcd86820bdbc1fa73b455982809cbc8c2 Mon Sep 17 00:00:00 2001
From: Aubrey Li <aubrey.li@linux.intel.com>
Date: Wed, 24 Mar 2021 17:40:13 -0400
Subject: [PATCH 0455/3804] sched: Migration changes for core scheduling

 - Don't migrate if there is a cookie mismatch
     Load balance tries to move task from busiest CPU to the
     destination CPU. When core scheduling is enabled, if the
     task's cookie does not match with the destination CPU's
     core cookie, this task may be skipped by this CPU. This
     mitigates the forced idle time on the destination CPU.

 - Select cookie matched idle CPU
     In the fast path of task wakeup, select the first cookie matched
     idle CPU instead of the first idle CPU.

 - Find cookie matched idlest CPU
     In the slow path of task wakeup, find the idlest CPU whose core
     cookie matches with task's cookie

Signed-off-by: Aubrey Li <aubrey.li@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.860083871@infradead.org
---
 kernel/sched/fair.c  | 29 ++++++++++++++----
 kernel/sched/sched.h | 73 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5948dc17b9ccb..2635e10484213 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5889,11 +5889,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
 
 	/* Traverse only the allowed CPUs */
 	for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+		struct rq *rq = cpu_rq(i);
+
+		if (!sched_core_cookie_match(rq, p))
+			continue;
+
 		if (sched_idle_cpu(i))
 			return i;
 
 		if (available_idle_cpu(i)) {
-			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
 			if (idle && idle->exit_latency < min_exit_latency) {
 				/*
@@ -5979,9 +5983,10 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 	return new_cpu;
 }
 
-static inline int __select_idle_cpu(int cpu)
+static inline int __select_idle_cpu(int cpu, struct task_struct *p)
 {
-	if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
+	if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
+	    sched_cpu_cookie_match(cpu_rq(cpu), p))
 		return cpu;
 
 	return -1;
@@ -6051,7 +6056,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
 	int cpu;
 
 	if (!static_branch_likely(&sched_smt_present))
-		return __select_idle_cpu(core);
+		return __select_idle_cpu(core, p);
 
 	for_each_cpu(cpu, cpu_smt_mask(core)) {
 		if (!available_idle_cpu(cpu)) {
@@ -6107,7 +6112,7 @@ static inline bool test_idle_cores(int cpu, bool def)
 
 static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
 {
-	return __select_idle_cpu(core);
+	return __select_idle_cpu(core, p);
 }
 
 static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
@@ -6164,7 +6169,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
 		} else {
 			if (!--nr)
 				return -1;
-			idle_cpu = __select_idle_cpu(cpu);
+			idle_cpu = __select_idle_cpu(cpu, p);
 			if ((unsigned int)idle_cpu < nr_cpumask_bits)
 				break;
 		}
@@ -7527,6 +7532,14 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
 
 	if (sysctl_sched_migration_cost == -1)
 		return 1;
+
+	/*
+	 * Don't migrate task if the task's cookie does not match
+	 * with the destination CPU's core cookie.
+	 */
+	if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
+		return 1;
+
 	if (sysctl_sched_migration_cost == 0)
 		return 0;
 
@@ -8857,6 +8870,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 					p->cpus_ptr))
 			continue;
 
+		/* Skip over this group if no cookie matched */
+		if (!sched_group_cookie_match(cpu_rq(this_cpu), p, group))
+			continue;
+
 		local_group = cpumask_test_cpu(this_cpu,
 					       sched_group_span(group));
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 91ca1fee9fec2..3878386a0a024 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1134,7 +1134,9 @@ static inline bool is_migration_disabled(struct task_struct *p)
 #endif
 }
 
+struct sched_group;
 #ifdef CONFIG_SCHED_CORE
+static inline struct cpumask *sched_group_span(struct sched_group *sg);
 
 DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
 
@@ -1170,6 +1172,61 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
 
 bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi);
 
+/*
+ * Helpers to check if the CPU's core cookie matches with the task's cookie
+ * when core scheduling is enabled.
+ * A special case is that the task's cookie always matches with CPU's core
+ * cookie if the CPU is in an idle core.
+ */
+static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
+{
+	/* Ignore cookie match if core scheduler is not enabled on the CPU. */
+	if (!sched_core_enabled(rq))
+		return true;
+
+	return rq->core->core_cookie == p->core_cookie;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+	bool idle_core = true;
+	int cpu;
+
+	/* Ignore cookie match if core scheduler is not enabled on the CPU. */
+	if (!sched_core_enabled(rq))
+		return true;
+
+	for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
+		if (!available_idle_cpu(cpu)) {
+			idle_core = false;
+			break;
+		}
+	}
+
+	/*
+	 * A CPU in an idle core is always the best choice for tasks with
+	 * cookies.
+	 */
+	return idle_core || rq->core->core_cookie == p->core_cookie;
+}
+
+static inline bool sched_group_cookie_match(struct rq *rq,
+					    struct task_struct *p,
+					    struct sched_group *group)
+{
+	int cpu;
+
+	/* Ignore cookie match if core scheduler is not enabled on the CPU. */
+	if (!sched_core_enabled(rq))
+		return true;
+
+	for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
+		if (sched_core_cookie_match(rq, p))
+			return true;
+	}
+	return false;
+}
+
 extern void queue_core_balance(struct rq *rq);
 
 #else /* !CONFIG_SCHED_CORE */
@@ -1198,6 +1255,22 @@ static inline void queue_core_balance(struct rq *rq)
 {
 }
 
+static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
+{
+	return true;
+}
+
+static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
+{
+	return true;
+}
+
+static inline bool sched_group_cookie_match(struct rq *rq,
+					    struct task_struct *p,
+					    struct sched_group *group)
+{
+	return true;
+}
 #endif /* CONFIG_SCHED_CORE */
 
 static inline void lockdep_assert_rq_held(struct rq *rq)
-- 
GitLab


From 6e33cad0af49336952e5541464bd02f5b5fd433e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 26 Mar 2021 18:55:06 +0100
Subject: [PATCH 0456/3804] sched: Trivial core scheduling cookie management

In order to not have to use pid_struct, create a new, smaller,
structure to manage task cookies for core scheduling.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.919768100@infradead.org
---
 include/linux/sched.h     |   6 +++
 kernel/fork.c             |   1 +
 kernel/sched/Makefile     |   1 +
 kernel/sched/core.c       |   7 +--
 kernel/sched/core_sched.c | 109 ++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h      |  16 ++++++
 6 files changed, 137 insertions(+), 3 deletions(-)
 create mode 100644 kernel/sched/core_sched.c

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9b822e3832123..eab3f7c4251bf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2179,4 +2179,10 @@ int sched_trace_rq_nr_running(struct rq *rq);
 
 const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
 
+#ifdef CONFIG_SCHED_CORE
+extern void sched_core_free(struct task_struct *tsk);
+#else
+static inline void sched_core_free(struct task_struct *tsk) { }
+#endif
+
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index dc06afd725cbd..d16c60c9daca0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -742,6 +742,7 @@ void __put_task_struct(struct task_struct *tsk)
 	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
 	put_signal_struct(tsk->signal);
+	sched_core_free(tsk);
 
 	if (!profile_handoff_task(tsk))
 		free_task(tsk);
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 5fc9c9b70862f..978fcfca5871d 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -36,3 +36,4 @@ obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
 obj-$(CONFIG_MEMBARRIER) += membarrier.o
 obj-$(CONFIG_CPU_ISOLATION) += isolation.o
 obj-$(CONFIG_PSI) += psi.o
+obj-$(CONFIG_SCHED_CORE) += core_sched.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b4988887510fd..55b2d9399e12a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -167,7 +167,7 @@ static inline int rb_sched_core_cmp(const void *key, const struct rb_node *node)
 	return 0;
 }
 
-static void sched_core_enqueue(struct rq *rq, struct task_struct *p)
+void sched_core_enqueue(struct rq *rq, struct task_struct *p)
 {
 	rq->core->core_task_seq++;
 
@@ -177,14 +177,15 @@ static void sched_core_enqueue(struct rq *rq, struct task_struct *p)
 	rb_add(&p->core_node, &rq->core_tree, rb_sched_core_less);
 }
 
-static void sched_core_dequeue(struct rq *rq, struct task_struct *p)
+void sched_core_dequeue(struct rq *rq, struct task_struct *p)
 {
 	rq->core->core_task_seq++;
 
-	if (!p->core_cookie)
+	if (!sched_core_enqueued(p))
 		return;
 
 	rb_erase(&p->core_node, &rq->core_tree);
+	RB_CLEAR_NODE(&p->core_node);
 }
 
 /*
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
new file mode 100644
index 0000000000000..8d0869a9eb8c3
--- /dev/null
+++ b/kernel/sched/core_sched.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "sched.h"
+
+/*
+ * A simple wrapper around refcount. An allocated sched_core_cookie's
+ * address is used to compute the cookie of the task.
+ */
+struct sched_core_cookie {
+	refcount_t refcnt;
+};
+
+unsigned long sched_core_alloc_cookie(void)
+{
+	struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL);
+	if (!ck)
+		return 0;
+
+	refcount_set(&ck->refcnt, 1);
+	sched_core_get();
+
+	return (unsigned long)ck;
+}
+
+void sched_core_put_cookie(unsigned long cookie)
+{
+	struct sched_core_cookie *ptr = (void *)cookie;
+
+	if (ptr && refcount_dec_and_test(&ptr->refcnt)) {
+		kfree(ptr);
+		sched_core_put();
+	}
+}
+
+unsigned long sched_core_get_cookie(unsigned long cookie)
+{
+	struct sched_core_cookie *ptr = (void *)cookie;
+
+	if (ptr)
+		refcount_inc(&ptr->refcnt);
+
+	return cookie;
+}
+
+/*
+ * sched_core_update_cookie - replace the cookie on a task
+ * @p: the task to update
+ * @cookie: the new cookie
+ *
+ * Effectively exchange the task cookie; caller is responsible for lifetimes on
+ * both ends.
+ *
+ * Returns: the old cookie
+ */
+unsigned long sched_core_update_cookie(struct task_struct *p, unsigned long cookie)
+{
+	unsigned long old_cookie;
+	struct rq_flags rf;
+	struct rq *rq;
+	bool enqueued;
+
+	rq = task_rq_lock(p, &rf);
+
+	/*
+	 * Since creating a cookie implies sched_core_get(), and we cannot set
+	 * a cookie until after we've created it, similarly, we cannot destroy
+	 * a cookie until after we've removed it, we must have core scheduling
+	 * enabled here.
+	 */
+	SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq));
+
+	enqueued = sched_core_enqueued(p);
+	if (enqueued)
+		sched_core_dequeue(rq, p);
+
+	old_cookie = p->core_cookie;
+	p->core_cookie = cookie;
+
+	if (enqueued)
+		sched_core_enqueue(rq, p);
+
+	/*
+	 * If task is currently running, it may not be compatible anymore after
+	 * the cookie change, so enter the scheduler on its CPU to schedule it
+	 * away.
+	 */
+	if (task_running(rq, p))
+		resched_curr(rq);
+
+	task_rq_unlock(rq, p, &rf);
+
+	return old_cookie;
+}
+
+static unsigned long sched_core_clone_cookie(struct task_struct *p)
+{
+	unsigned long cookie, flags;
+
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	cookie = sched_core_get_cookie(p->core_cookie);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+	return cookie;
+}
+
+void sched_core_free(struct task_struct *p)
+{
+	sched_core_put_cookie(p->core_cookie);
+}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3878386a0a024..904c52b560d16 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1229,6 +1229,22 @@ static inline bool sched_group_cookie_match(struct rq *rq,
 
 extern void queue_core_balance(struct rq *rq);
 
+static inline bool sched_core_enqueued(struct task_struct *p)
+{
+	return !RB_EMPTY_NODE(&p->core_node);
+}
+
+extern void sched_core_enqueue(struct rq *rq, struct task_struct *p);
+extern void sched_core_dequeue(struct rq *rq, struct task_struct *p);
+
+extern void sched_core_get(void);
+extern void sched_core_put(void);
+
+extern unsigned long sched_core_alloc_cookie(void);
+extern void sched_core_put_cookie(unsigned long cookie);
+extern unsigned long sched_core_get_cookie(unsigned long cookie);
+extern unsigned long sched_core_update_cookie(struct task_struct *p, unsigned long cookie);
+
 #else /* !CONFIG_SCHED_CORE */
 
 static inline bool sched_core_enabled(struct rq *rq)
-- 
GitLab


From 85dd3f61203c5cfa72b308ff327b5fbf3fc1ce5e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 29 Mar 2021 15:18:35 +0200
Subject: [PATCH 0457/3804] sched: Inherit task cookie on fork()

Note that sched_core_fork() is called from under tasklist_lock, and
not from sched_fork() earlier. This avoids a few races later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123308.980003687@infradead.org
---
 include/linux/sched.h     | 2 ++
 kernel/fork.c             | 3 +++
 kernel/sched/core_sched.c | 6 ++++++
 3 files changed, 11 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eab3f7c4251bf..fba47e52e482b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2181,8 +2181,10 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
 
 #ifdef CONFIG_SCHED_CORE
 extern void sched_core_free(struct task_struct *tsk);
+extern void sched_core_fork(struct task_struct *p);
 #else
 static inline void sched_core_free(struct task_struct *tsk) { }
+static inline void sched_core_fork(struct task_struct *p) { }
 #endif
 
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index d16c60c9daca0..e7fd928fcafe9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2251,6 +2251,8 @@ static __latent_entropy struct task_struct *copy_process(
 
 	klp_copy_process(p);
 
+	sched_core_fork(p);
+
 	spin_lock(&current->sighand->siglock);
 
 	/*
@@ -2338,6 +2340,7 @@ static __latent_entropy struct task_struct *copy_process(
 	return p;
 
 bad_fork_cancel_cgroup:
+	sched_core_free(p);
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	cgroup_cancel_fork(p, args);
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index 8d0869a9eb8c3..dcbbeaefaaa3a 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -103,6 +103,12 @@ static unsigned long sched_core_clone_cookie(struct task_struct *p)
 	return cookie;
 }
 
+void sched_core_fork(struct task_struct *p)
+{
+	RB_CLEAR_NODE(&p->core_node);
+	p->core_cookie = sched_core_clone_cookie(current);
+}
+
 void sched_core_free(struct task_struct *p)
 {
 	sched_core_put_cookie(p->core_cookie);
-- 
GitLab


From 7ac592aa35a684ff1858fb9ec282886b9e3575ac Mon Sep 17 00:00:00 2001
From: Chris Hyser <chris.hyser@oracle.com>
Date: Wed, 24 Mar 2021 17:40:15 -0400
Subject: [PATCH 0458/3804] sched: prctl() core-scheduling interface

This patch provides support for setting and copying core scheduling
'task cookies' between threads (PID), processes (TGID), and process
groups (PGID).

The value of core scheduling isn't that tasks don't share a core,
'nosmt' can do that. The value lies in exploiting all the sharing
opportunities that exist to recover possible lost performance and that
requires a degree of flexibility in the API.

From a security perspective (and there are others), the thread,
process and process group distinction is an existent hierarchal
categorization of tasks that reflects many of the security concerns
about 'data sharing'. For example, protecting against cache-snooping
by a thread that can just read the memory directly isn't all that
useful.

With this in mind, subcommands to CREATE/SHARE (TO/FROM) provide a
mechanism to create and share cookies. CREATE/SHARE_TO specify a
target pid with enum pidtype used to specify the scope of the targeted
tasks. For example, PIDTYPE_TGID will share the cookie with the
process and all of it's threads as typically desired in a security
scenario.

API:

  prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, tgtpid, pidtype, &cookie)
  prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, tgtpid, pidtype, NULL)
  prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, tgtpid, pidtype, NULL)
  prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, srcpid, pidtype, NULL)

where 'tgtpid/srcpid == 0' implies the current process and pidtype is
kernel enum pid_type {PIDTYPE_PID, PIDTYPE_TGID, PIDTYPE_PGID, ...}.

For return values, EINVAL, ENOMEM are what they say. ESRCH means the
tgtpid/srcpid was not found. EPERM indicates lack of PTRACE permission
access to tgtpid/srcpid. ENODEV indicates your machines lacks SMT.

[peterz: complete rewrite]
Signed-off-by: Chris Hyser <chris.hyser@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123309.039845339@infradead.org
---
 include/linux/sched.h            |   2 +
 include/uapi/linux/prctl.h       |   8 +++
 kernel/sched/core_sched.c        | 114 +++++++++++++++++++++++++++++++
 kernel/sys.c                     |   5 ++
 tools/include/uapi/linux/prctl.h |   8 +++
 5 files changed, 137 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fba47e52e482b..c7e7d50e2fdca 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2182,6 +2182,8 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd);
 #ifdef CONFIG_SCHED_CORE
 extern void sched_core_free(struct task_struct *tsk);
 extern void sched_core_fork(struct task_struct *p);
+extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
+				unsigned long uaddr);
 #else
 static inline void sched_core_free(struct task_struct *tsk) { }
 static inline void sched_core_fork(struct task_struct *p) { }
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 18a9f59dc067f..967d9c55323d1 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -259,4 +259,12 @@ struct prctl_mm_map {
 #define PR_PAC_SET_ENABLED_KEYS		60
 #define PR_PAC_GET_ENABLED_KEYS		61
 
+/* Request the scheduler to share a core */
+#define PR_SCHED_CORE			62
+# define PR_SCHED_CORE_GET		0
+# define PR_SCHED_CORE_CREATE		1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO		2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM	3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX		4
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c
index dcbbeaefaaa3a..9a80e9a474c07 100644
--- a/kernel/sched/core_sched.c
+++ b/kernel/sched/core_sched.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
+#include <linux/prctl.h>
 #include "sched.h"
 
 /*
@@ -113,3 +114,116 @@ void sched_core_free(struct task_struct *p)
 {
 	sched_core_put_cookie(p->core_cookie);
 }
+
+static void __sched_core_set(struct task_struct *p, unsigned long cookie)
+{
+	cookie = sched_core_get_cookie(cookie);
+	cookie = sched_core_update_cookie(p, cookie);
+	sched_core_put_cookie(cookie);
+}
+
+/* Called from prctl interface: PR_SCHED_CORE */
+int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
+			 unsigned long uaddr)
+{
+	unsigned long cookie = 0, id = 0;
+	struct task_struct *task, *p;
+	struct pid *grp;
+	int err = 0;
+
+	if (!static_branch_likely(&sched_smt_present))
+		return -ENODEV;
+
+	if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 ||
+	    (cmd != PR_SCHED_CORE_GET && uaddr))
+		return -EINVAL;
+
+	rcu_read_lock();
+	if (pid == 0) {
+		task = current;
+	} else {
+		task = find_task_by_vpid(pid);
+		if (!task) {
+			rcu_read_unlock();
+			return -ESRCH;
+		}
+	}
+	get_task_struct(task);
+	rcu_read_unlock();
+
+	/*
+	 * Check if this process has the right to modify the specified
+	 * process. Use the regular "ptrace_may_access()" checks.
+	 */
+	if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	switch (cmd) {
+	case PR_SCHED_CORE_GET:
+		if (type != PIDTYPE_PID || uaddr & 7) {
+			err = -EINVAL;
+			goto out;
+		}
+		cookie = sched_core_clone_cookie(task);
+		if (cookie) {
+			/* XXX improve ? */
+			ptr_to_hashval((void *)cookie, &id);
+		}
+		err = put_user(id, (u64 __user *)uaddr);
+		goto out;
+
+	case PR_SCHED_CORE_CREATE:
+		cookie = sched_core_alloc_cookie();
+		if (!cookie) {
+			err = -ENOMEM;
+			goto out;
+		}
+		break;
+
+	case PR_SCHED_CORE_SHARE_TO:
+		cookie = sched_core_clone_cookie(current);
+		break;
+
+	case PR_SCHED_CORE_SHARE_FROM:
+		if (type != PIDTYPE_PID) {
+			err = -EINVAL;
+			goto out;
+		}
+		cookie = sched_core_clone_cookie(task);
+		__sched_core_set(current, cookie);
+		goto out;
+
+	default:
+		err = -EINVAL;
+		goto out;
+	};
+
+	if (type == PIDTYPE_PID) {
+		__sched_core_set(task, cookie);
+		goto out;
+	}
+
+	read_lock(&tasklist_lock);
+	grp = task_pid_type(task, type);
+
+	do_each_pid_thread(grp, type, p) {
+		if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) {
+			err = -EPERM;
+			goto out_tasklist;
+		}
+	} while_each_pid_thread(grp, type, p);
+
+	do_each_pid_thread(grp, type, p) {
+		__sched_core_set(p, cookie);
+	} while_each_pid_thread(grp, type, p);
+out_tasklist:
+	read_unlock(&tasklist_lock);
+
+out:
+	sched_core_put_cookie(cookie);
+	put_task_struct(task);
+	return err;
+}
+
diff --git a/kernel/sys.c b/kernel/sys.c
index 3a583a29815fa..9de46a4bf4921 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2550,6 +2550,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		error = set_syscall_user_dispatch(arg2, arg3, arg4,
 						  (char __user *) arg5);
 		break;
+#ifdef CONFIG_SCHED_CORE
+	case PR_SCHED_CORE:
+		error = sched_core_share_pid(arg2, arg3, arg4, arg5);
+		break;
+#endif
 	default:
 		error = -EINVAL;
 		break;
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 18a9f59dc067f..967d9c55323d1 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -259,4 +259,12 @@ struct prctl_mm_map {
 #define PR_PAC_SET_ENABLED_KEYS		60
 #define PR_PAC_GET_ENABLED_KEYS		61
 
+/* Request the scheduler to share a core */
+#define PR_SCHED_CORE			62
+# define PR_SCHED_CORE_GET		0
+# define PR_SCHED_CORE_CREATE		1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO		2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM	3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX		4
+
 #endif /* _LINUX_PRCTL_H */
-- 
GitLab


From 9f26990074931bbf797373e53104216059b300b1 Mon Sep 17 00:00:00 2001
From: Chris Hyser <chris.hyser@oracle.com>
Date: Wed, 24 Mar 2021 17:40:16 -0400
Subject: [PATCH 0459/3804] kselftest: Add test for core sched prctl interface

Provides a selftest and examples of using the interface.

[peterz: updated to not use sched_debug]
Signed-off-by: Chris Hyser <chris.hyser@oracle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Don Hiatt <dhiatt@digitalocean.com>
Tested-by: Hongyu Ning <hongyu.ning@linux.intel.com>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210422123309.100860030@infradead.org
---
 tools/testing/selftests/sched/.gitignore      |   1 +
 tools/testing/selftests/sched/Makefile        |  14 +
 tools/testing/selftests/sched/config          |   1 +
 tools/testing/selftests/sched/cs_prctl_test.c | 338 ++++++++++++++++++
 4 files changed, 354 insertions(+)
 create mode 100644 tools/testing/selftests/sched/.gitignore
 create mode 100644 tools/testing/selftests/sched/Makefile
 create mode 100644 tools/testing/selftests/sched/config
 create mode 100644 tools/testing/selftests/sched/cs_prctl_test.c

diff --git a/tools/testing/selftests/sched/.gitignore b/tools/testing/selftests/sched/.gitignore
new file mode 100644
index 0000000000000..6996d4654d924
--- /dev/null
+++ b/tools/testing/selftests/sched/.gitignore
@@ -0,0 +1 @@
+cs_prctl_test
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
new file mode 100644
index 0000000000000..10c72f14fea9d
--- /dev/null
+++ b/tools/testing/selftests/sched/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
+CLANG_FLAGS += -no-integrated-as
+endif
+
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/  -Wl,-rpath=./ \
+	  $(CLANG_FLAGS)
+LDLIBS += -lpthread
+
+TEST_GEN_FILES := cs_prctl_test
+TEST_PROGS := cs_prctl_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config
new file mode 100644
index 0000000000000..e8b09aa7c0c4c
--- /dev/null
+++ b/tools/testing/selftests/sched/config
@@ -0,0 +1 @@
+CONFIG_SCHED_DEBUG=y
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
new file mode 100644
index 0000000000000..63fe6521c56d9
--- /dev/null
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -0,0 +1,338 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Use the core scheduling prctl() to test core scheduling cookies control.
+ *
+ * Copyright (c) 2021 Oracle and/or its affiliates.
+ * Author: Chris Hyser <chris.hyser@oracle.com>
+ *
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses>.
+ */
+
+#define _GNU_SOURCE
+#include <sys/eventfd.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sched.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+	return syscall(SYS_gettid);
+}
+#endif
+
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE			62
+# define PR_SCHED_CORE_GET		0
+# define PR_SCHED_CORE_CREATE		1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO		2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM	3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX		4
+#endif
+
+#define MAX_PROCESSES 128
+#define MAX_THREADS   128
+
+static const char USAGE[] = "cs_prctl_test [options]\n"
+"    options:\n"
+"	-P  : number of processes to create.\n"
+"	-T  : number of threads per process to create.\n"
+"	-d  : delay time to keep tasks alive.\n"
+"	-k  : keep tasks alive until keypress.\n";
+
+enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID};
+
+const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES;
+
+static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4,
+		  unsigned long arg5)
+{
+	int res;
+
+	res = prctl(option, arg2, arg3, arg4, arg5);
+	printf("%d = prctl(%d, %ld, %ld, %ld, %lx)\n", res, option, (long)arg2, (long)arg3,
+	       (long)arg4, arg5);
+	return res;
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+#define handle_error(msg) __handle_error(__FILE__, __LINE__, msg)
+static void __handle_error(char *fn, int ln, char *msg)
+{
+	printf("(%s:%d) - ", fn, ln);
+	perror(msg);
+	exit(EXIT_FAILURE);
+}
+
+static void handle_usage(int rc, char *msg)
+{
+	puts(USAGE);
+	puts(msg);
+	putchar('\n');
+	exit(rc);
+}
+
+static unsigned long get_cs_cookie(int pid)
+{
+	unsigned long long cookie;
+	int ret;
+
+	ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid, PIDTYPE_PID,
+		    (unsigned long)&cookie);
+	if (ret) {
+		printf("Not a core sched system\n");
+		return -1UL;
+	}
+
+	return cookie;
+}
+
+struct child_args {
+	int num_threads;
+	int pfd[2];
+	int cpid;
+	int thr_tids[MAX_THREADS];
+};
+
+static int child_func_thread(void __attribute__((unused))*arg)
+{
+	while (1)
+		usleep(20000);
+	return 0;
+}
+
+static void create_threads(int num_threads, int thr_tids[])
+{
+	void *child_stack;
+	pid_t tid;
+	int i;
+
+	for (i = 0; i < num_threads; ++i) {
+		child_stack = malloc(STACK_SIZE);
+		if (!child_stack)
+			handle_error("child stack allocate");
+
+		tid = clone(child_func_thread, child_stack + STACK_SIZE, THREAD_CLONE_FLAGS, NULL);
+		if (tid == -1)
+			handle_error("clone thread");
+		thr_tids[i] = tid;
+	}
+}
+
+static int child_func_process(void *arg)
+{
+	struct child_args *ca = (struct child_args *)arg;
+
+	close(ca->pfd[0]);
+
+	create_threads(ca->num_threads, ca->thr_tids);
+
+	write(ca->pfd[1], &ca->thr_tids, sizeof(int) * ca->num_threads);
+	close(ca->pfd[1]);
+
+	while (1)
+		usleep(20000);
+	return 0;
+}
+
+static unsigned char child_func_process_stack[STACK_SIZE];
+
+void create_processes(int num_processes, int num_threads, struct child_args proc[])
+{
+	pid_t cpid;
+	int i;
+
+	for (i = 0; i < num_processes; ++i) {
+		proc[i].num_threads = num_threads;
+
+		if (pipe(proc[i].pfd) == -1)
+			handle_error("pipe() failed");
+
+		cpid = clone(child_func_process, child_func_process_stack + STACK_SIZE,
+			     SIGCHLD, &proc[i]);
+		proc[i].cpid = cpid;
+		close(proc[i].pfd[1]);
+	}
+
+	for (i = 0; i < num_processes; ++i) {
+		read(proc[i].pfd[0], &proc[i].thr_tids, sizeof(int) * proc[i].num_threads);
+		close(proc[i].pfd[0]);
+	}
+}
+
+void disp_processes(int num_processes, struct child_args proc[])
+{
+	int i, j;
+
+	printf("tid=%d, / tgid=%d / pgid=%d: %lx\n", gettid(), getpid(), getpgid(0),
+	       get_cs_cookie(getpid()));
+
+	for (i = 0; i < num_processes; ++i) {
+		printf("    tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].cpid, proc[i].cpid,
+		       getpgid(proc[i].cpid), get_cs_cookie(proc[i].cpid));
+		for (j = 0; j < proc[i].num_threads; ++j) {
+			printf("        tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].thr_tids[j],
+			       proc[i].cpid, getpgid(0), get_cs_cookie(proc[i].thr_tids[j]));
+		}
+	}
+	puts("\n");
+}
+
+static int errors;
+
+#define validate(v) _validate(__LINE__, v, #v)
+void _validate(int line, int val, char *msg)
+{
+	if (!val) {
+		++errors;
+		printf("(%d) FAILED: %s\n", line, msg);
+	} else {
+		printf("(%d) PASSED: %s\n", line, msg);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct child_args procs[MAX_PROCESSES];
+
+	int keypress = 0;
+	int num_processes = 2;
+	int num_threads = 3;
+	int delay = 0;
+	int res = 0;
+	int pidx;
+	int pid;
+	int opt;
+
+	while ((opt = getopt(argc, argv, ":hkT:P:d:")) != -1) {
+		switch (opt) {
+		case 'P':
+			num_processes = (int)strtol(optarg, NULL, 10);
+			break;
+		case 'T':
+			num_threads = (int)strtoul(optarg, NULL, 10);
+			break;
+		case 'd':
+			delay = (int)strtol(optarg, NULL, 10);
+			break;
+		case 'k':
+			keypress = 1;
+			break;
+		case 'h':
+			printf(USAGE);
+			exit(EXIT_SUCCESS);
+		default:
+			handle_usage(20, "unknown option");
+		}
+	}
+
+	if (num_processes < 1 || num_processes > MAX_PROCESSES)
+		handle_usage(1, "Bad processes value");
+
+	if (num_threads < 1 || num_threads > MAX_THREADS)
+		handle_usage(2, "Bad thread value");
+
+	if (keypress)
+		delay = -1;
+
+	srand(time(NULL));
+
+	/* put into separate process group */
+	if (setpgid(0, 0) != 0)
+		handle_error("process group");
+
+	printf("\n## Create a thread/process/process group hiearchy\n");
+	create_processes(num_processes, num_threads, procs);
+	disp_processes(num_processes, procs);
+	validate(get_cs_cookie(0) == 0);
+
+	printf("\n## Set a cookie on entire process group\n");
+	if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, 0, PIDTYPE_PGID, 0) < 0)
+		handle_error("core_sched create failed -- PGID");
+	disp_processes(num_processes, procs);
+
+	validate(get_cs_cookie(0) != 0);
+
+	/* get a random process pid */
+	pidx = rand() % num_processes;
+	pid = procs[pidx].cpid;
+
+	validate(get_cs_cookie(0) == get_cs_cookie(pid));
+	validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+	printf("\n## Set a new cookie on entire process/TGID [%d]\n", pid);
+	if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid, PIDTYPE_TGID, 0) < 0)
+		handle_error("core_sched create failed -- TGID");
+	disp_processes(num_processes, procs);
+
+	validate(get_cs_cookie(0) != get_cs_cookie(pid));
+	validate(get_cs_cookie(pid) != 0);
+	validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+	printf("\n## Copy the cookie of current/PGID[%d], to pid [%d] as PIDTYPE_PID\n",
+	       getpid(), pid);
+	if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, pid, PIDTYPE_PID, 0) < 0)
+		handle_error("core_sched share to itself failed -- PID");
+	disp_processes(num_processes, procs);
+
+	validate(get_cs_cookie(0) == get_cs_cookie(pid));
+	validate(get_cs_cookie(pid) != 0);
+	validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+	printf("\n## Copy cookie from a thread [%d] to current/PGID [%d] as PIDTYPE_PID\n",
+	       procs[pidx].thr_tids[0], getpid());
+	if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, procs[pidx].thr_tids[0],
+		   PIDTYPE_PID, 0) < 0)
+		handle_error("core_sched share from thread failed -- PID");
+	disp_processes(num_processes, procs);
+
+	validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+	validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+	printf("\n## Copy cookie from current [%d] to current as pidtype PGID\n", getpid());
+	if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, 0, PIDTYPE_PGID, 0) < 0)
+		handle_error("core_sched share to self failed -- PGID");
+	disp_processes(num_processes, procs);
+
+	validate(get_cs_cookie(0) == get_cs_cookie(pid));
+	validate(get_cs_cookie(pid) != 0);
+	validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+	if (errors) {
+		printf("TESTS FAILED. errors: %d\n", errors);
+		res = 10;
+	} else {
+		printf("SUCCESS !!!\n");
+	}
+
+	if (keypress)
+		getchar();
+	else
+		sleep(delay);
+
+	for (pidx = 0; pidx < num_processes; ++pidx)
+		kill(procs[pidx].cpid, 15);
+
+	return res;
+}
-- 
GitLab


From 64e1f5872a8c3d80bce4686b4ab5dbc6e6bd30c5 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Thu, 6 May 2021 22:07:26 +0300
Subject: [PATCH 0460/3804] x86/alternatives: Make the x86nops[] symbol static

Sparse says:

  arch/x86/kernel/alternative.c:78:21: warning: symbol 'x86nops' was not declared. Should it be static?

Since x86nops[] is not used outside this file, Sparse is right and it can be made static.

Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506190726.15575-1-paskripkin@gmail.com
---
 arch/x86/kernel/alternative.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b51744955..75c752b0628c1 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -75,7 +75,7 @@ do {									\
 	}								\
 } while (0)
 
-const unsigned char x86nops[] =
+static const unsigned char x86nops[] =
 {
 	BYTES_NOP1,
 	BYTES_NOP2,
-- 
GitLab


From 1bc67873d401e6c2e6e30be7fef21337db07a042 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 12 May 2021 11:33:10 +0200
Subject: [PATCH 0461/3804] x86/asm: Simplify __smp_mb() definition

Drop the bitness ifdeffery in favor of using _ASM_SP,
which is the helper macro for the rSP register specification
for 32 and 64 bit depending on the build.

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512093310.5635-1-bp@alien8.de
---
 arch/x86/include/asm/barrier.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 4819d5e5a3353..3ba772a69cc8b 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -54,11 +54,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 #define dma_rmb()	barrier()
 #define dma_wmb()	barrier()
 
-#ifdef CONFIG_X86_32
-#define __smp_mb()	asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc")
-#else
-#define __smp_mb()	asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc")
-#endif
+#define __smp_mb()	asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
+
 #define __smp_rmb()	dma_rmb()
 #define __smp_wmb()	barrier()
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-- 
GitLab


From c6c82e0cd8125d30f2f1b29205c7e1a2f1a6785b Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Tue, 11 May 2021 21:56:29 +0200
Subject: [PATCH 0462/3804] vfio-ccw: Check initialized flag in cp_init()

We have a really nice flag in the channel_program struct that
indicates if it had been initialized by cp_init(), and use it
as a guard in the other cp accessor routines, but not for a
duplicate call into cp_init(). The possibility of this occurring
is low, because that flow is protected by the private->io_mutex
and FSM CP_PROCESSING state. But then why bother checking it
in (for example) cp_prefetch() then?

Let's just be consistent and check for that in cp_init() too.

Fixes: 71189f263f8a3 ("vfio-ccw: make it safe to access channel programs")
Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Matthew Rosato <mjrosato@linux.ibm.com>
Message-Id: <20210511195631.3995081-2-farman@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_cp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index b9febc581b1f4..8d1b2771c1aa0 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -638,6 +638,10 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
 	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1);
 	int ret;
 
+	/* this is an error in the caller */
+	if (cp->initialized)
+		return -EBUSY;
+
 	/*
 	 * We only support prefetching the channel program. We assume all channel
 	 * programs executed by supported guests likewise support prefetching.
-- 
GitLab


From 6c02ac4c9211edabe17bda437ac97e578756f31b Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Tue, 11 May 2021 21:56:30 +0200
Subject: [PATCH 0463/3804] vfio-ccw: Reset FSM state to IDLE inside FSM

When an I/O request is made, the fsm_io_request() routine
moves the FSM state from IDLE to CP_PROCESSING, and then
fsm_io_helper() moves it to CP_PENDING if the START SUBCHANNEL
received a cc0. Yet, the error case to go from CP_PROCESSING
back to IDLE is done after the FSM call returns.

Let's move this up into the FSM proper, to provide some
better symmetry when unwinding in this case.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Acked-by: Matthew Rosato <mjrosato@linux.ibm.com>
Message-Id: <20210511195631.3995081-3-farman@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_fsm.c | 1 +
 drivers/s390/cio/vfio_ccw_ops.c | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index 23e61aa638e4e..e435a9cd92dac 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -318,6 +318,7 @@ static void fsm_io_request(struct vfio_ccw_private *private,
 	}
 
 err_out:
+	private->state = VFIO_CCW_STATE_IDLE;
 	trace_vfio_ccw_fsm_io_request(scsw->cmd.fctl, schid,
 				      io_region->ret_code, errstr);
 }
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 491a64c61fff1..c57d2a7f09197 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -279,8 +279,6 @@ static ssize_t vfio_ccw_mdev_write_io_region(struct vfio_ccw_private *private,
 	}
 
 	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ);
-	if (region->ret_code != 0)
-		private->state = VFIO_CCW_STATE_IDLE;
 	ret = (region->ret_code != 0) ? region->ret_code : count;
 
 out_unlock:
-- 
GitLab


From 2af7a834a435460d546f0cf0a8b8e4d259f1d910 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Tue, 11 May 2021 21:56:31 +0200
Subject: [PATCH 0464/3804] vfio-ccw: Serialize FSM IDLE state with I/O
 completion

Today, the stacked call to vfio_ccw_sch_io_todo() does three things:

  1) Update a solicited IRB with CP information, and release the CP
     if the interrupt was the end of a START operation.
  2) Copy the IRB data into the io_region, under the protection of
     the io_mutex
  3) Reset the vfio-ccw FSM state to IDLE to acknowledge that
     vfio-ccw can accept more work.

The trouble is that step 3 is (A) invoked for both solicited and
unsolicited interrupts, and (B) sitting after the mutex for step 2.
This second piece becomes a problem if it processes an interrupt
for a CLEAR SUBCHANNEL while another thread initiates a START,
thus allowing the CP and FSM states to get out of sync. That is:

    CPU 1                           CPU 2
    fsm_do_clear()
    fsm_irq()
                                    fsm_io_request()
    vfio_ccw_sch_io_todo()
                                    fsm_io_helper()

Since the FSM state and CP should be kept in sync, let's make a
note when the CP is released, and rely on that as an indication
that the FSM should also be reset at the end of this routine and
open up the device for more work.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Acked-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Message-Id: <20210511195631.3995081-4-farman@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 8c625b530035f..9b61e9b131ade 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -86,6 +86,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
 	struct vfio_ccw_private *private;
 	struct irb *irb;
 	bool is_final;
+	bool cp_is_finished = false;
 
 	private = container_of(work, struct vfio_ccw_private, io_work);
 	irb = &private->irb;
@@ -94,14 +95,21 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
 		     (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT));
 	if (scsw_is_solicited(&irb->scsw)) {
 		cp_update_scsw(&private->cp, &irb->scsw);
-		if (is_final && private->state == VFIO_CCW_STATE_CP_PENDING)
+		if (is_final && private->state == VFIO_CCW_STATE_CP_PENDING) {
 			cp_free(&private->cp);
+			cp_is_finished = true;
+		}
 	}
 	mutex_lock(&private->io_mutex);
 	memcpy(private->io_region->irb_area, irb, sizeof(*irb));
 	mutex_unlock(&private->io_mutex);
 
-	if (private->mdev && is_final)
+	/*
+	 * Reset to IDLE only if processing of a channel program
+	 * has finished. Do not overwrite a possible processing
+	 * state if the final interrupt was for HSCH or CSCH.
+	 */
+	if (private->mdev && cp_is_finished)
 		private->state = VFIO_CCW_STATE_IDLE;
 
 	if (private->io_trigger)
-- 
GitLab


From f1a0a376ca0c4ef1fc3d24e3e502acbb5b795674 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Wed, 12 May 2021 10:46:36 +0100
Subject: [PATCH 0465/3804] sched/core: Initialize the idle task with
 preemption disabled

As pointed out by commit

  de9b8f5dcbd9 ("sched: Fix crash trying to dequeue/enqueue the idle thread")

init_idle() can and will be invoked more than once on the same idle
task. At boot time, it is invoked for the boot CPU thread by
sched_init(). Then smp_init() creates the threads for all the secondary
CPUs and invokes init_idle() on them.

As the hotplug machinery brings the secondaries to life, it will issue
calls to idle_thread_get(), which itself invokes init_idle() yet again.
In this case it's invoked twice more per secondary: at _cpu_up(), and at
bringup_cpu().

Given smp_init() already initializes the idle tasks for all *possible*
CPUs, no further initialization should be required. Now, removing
init_idle() from idle_thread_get() exposes some interesting expectations
with regards to the idle task's preempt_count: the secondary startup always
issues a preempt_disable(), requiring some reset of the preempt count to 0
between hot-unplug and hotplug, which is currently served by
idle_thread_get() -> idle_init().

Given the idle task is supposed to have preemption disabled once and never
see it re-enabled, it seems that what we actually want is to initialize its
preempt_count to PREEMPT_DISABLED and leave it there. Do that, and remove
init_idle() from idle_thread_get().

Secondary startups were patched via coccinelle:

  @begone@
  @@

  -preempt_disable();
  ...
  cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512094636.2958515-1-valentin.schneider@arm.com
---
 arch/alpha/kernel/smp.c          | 1 -
 arch/arc/kernel/smp.c            | 1 -
 arch/arm/kernel/smp.c            | 1 -
 arch/arm64/include/asm/preempt.h | 2 +-
 arch/arm64/kernel/smp.c          | 1 -
 arch/csky/kernel/smp.c           | 1 -
 arch/ia64/kernel/smpboot.c       | 1 -
 arch/mips/kernel/smp.c           | 1 -
 arch/openrisc/kernel/smp.c       | 2 --
 arch/parisc/kernel/smp.c         | 1 -
 arch/powerpc/kernel/smp.c        | 1 -
 arch/riscv/kernel/smpboot.c      | 1 -
 arch/s390/include/asm/preempt.h  | 4 ++--
 arch/s390/kernel/smp.c           | 1 -
 arch/sh/kernel/smp.c             | 2 --
 arch/sparc/kernel/smp_32.c       | 1 -
 arch/sparc/kernel/smp_64.c       | 3 ---
 arch/x86/include/asm/preempt.h   | 2 +-
 arch/x86/kernel/smpboot.c        | 1 -
 arch/xtensa/kernel/smp.c         | 1 -
 include/asm-generic/preempt.h    | 2 +-
 init/main.c                      | 6 +-----
 kernel/fork.c                    | 2 +-
 kernel/sched/core.c              | 2 +-
 kernel/smpboot.c                 | 1 -
 25 files changed, 8 insertions(+), 34 deletions(-)

diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index f4dd9f3f30010..4b2575f936d46 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -166,7 +166,6 @@ smp_callin(void)
 	DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n",
 	      cpuid, current, current->active_mm));
 
-	preempt_disable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index 52906d3145371..db0e104d68355 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -189,7 +189,6 @@ void start_kernel_secondary(void)
 	pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
 
 	local_irq_enable();
-	preempt_disable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 74679240a9d8e..c7bb168b0d97c 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -432,7 +432,6 @@ asmlinkage void secondary_start_kernel(void)
 #endif
 	pr_debug("CPU%u: Booted secondary processor\n", cpu);
 
-	preempt_disable();
 	trace_hardirqs_off();
 
 	/*
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
index 80e946b2abee2..e83f0982b99c1 100644
--- a/arch/arm64/include/asm/preempt.h
+++ b/arch/arm64/include/asm/preempt.h
@@ -23,7 +23,7 @@ static inline void preempt_count_set(u64 pc)
 } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
-	task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+	task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
 } while (0)
 
 static inline void set_preempt_need_resched(void)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dcd7041b2b077..6671000a8b7d7 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -224,7 +224,6 @@ asmlinkage notrace void secondary_start_kernel(void)
 		init_gic_priority_masking();
 
 	rcu_cpu_starting(cpu);
-	preempt_disable();
 	trace_hardirqs_off();
 
 	/*
diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c
index 0f9f5eef93386..e2993539af8ef 100644
--- a/arch/csky/kernel/smp.c
+++ b/arch/csky/kernel/smp.c
@@ -281,7 +281,6 @@ void csky_start_secondary(void)
 	pr_info("CPU%u Online: %s...\n", cpu, __func__);
 
 	local_irq_enable();
-	preempt_disable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
 
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 49b4885809399..d10f780c13b9e 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -441,7 +441,6 @@ start_secondary (void *unused)
 #endif
 	efi_map_pal_code();
 	cpu_init();
-	preempt_disable();
 	smp_callin();
 
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index ef86fbad85460..d542fb7af3ba2 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -348,7 +348,6 @@ asmlinkage void start_secondary(void)
 	 */
 
 	calibrate_delay();
-	preempt_disable();
 	cpu = smp_processor_id();
 	cpu_data[cpu].udelay_val = loops_per_jiffy;
 
diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c
index 48e1092a64de3..415e209732a3d 100644
--- a/arch/openrisc/kernel/smp.c
+++ b/arch/openrisc/kernel/smp.c
@@ -145,8 +145,6 @@ asmlinkage __init void secondary_start_kernel(void)
 	set_cpu_online(cpu, true);
 
 	local_irq_enable();
-
-	preempt_disable();
 	/*
 	 * OK, it's off to the idle thread for us
 	 */
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 10227f667c8a6..1405b603b91b6 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -302,7 +302,6 @@ void __init smp_callin(unsigned long pdce_proc)
 #endif
 
 	smp_cpu_init(slave_id);
-	preempt_disable();
 
 	flush_cache_all_local(); /* start with known state */
 	flush_tlb_all_local(NULL);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 2e05c783440a3..6c6e4d934d867 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1547,7 +1547,6 @@ void start_secondary(void *unused)
 	smp_store_cpu_info(cpu);
 	set_dec(tb_ticks_per_jiffy);
 	rcu_cpu_starting(cpu);
-	preempt_disable();
 	cpu_callin_map[cpu] = 1;
 
 	if (smp_ops->setup_cpu)
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 9a408e2942acf..bd82375db51a6 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -180,7 +180,6 @@ asmlinkage __visible void smp_callin(void)
 	 * Disable preemption before enabling interrupts, so we don't try to
 	 * schedule a CPU that hasn't actually started yet.
 	 */
-	preempt_disable();
 	local_irq_enable();
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
 }
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index b49e0492842cc..23ff51be7e29c 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -32,7 +32,7 @@ static inline void preempt_count_set(int pc)
 #define init_task_preempt_count(p)	do { } while (0)
 
 #define init_idle_preempt_count(p, cpu)	do { \
-	S390_lowcore.preempt_count = PREEMPT_ENABLED; \
+	S390_lowcore.preempt_count = PREEMPT_DISABLED; \
 } while (0)
 
 static inline void set_preempt_need_resched(void)
@@ -91,7 +91,7 @@ static inline void preempt_count_set(int pc)
 #define init_task_preempt_count(p)	do { } while (0)
 
 #define init_idle_preempt_count(p, cpu)	do { \
-	S390_lowcore.preempt_count = PREEMPT_ENABLED; \
+	S390_lowcore.preempt_count = PREEMPT_DISABLED; \
 } while (0)
 
 static inline void set_preempt_need_resched(void)
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2fec2b80d35d2..111909aeb8d21 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -878,7 +878,6 @@ static void smp_init_secondary(void)
 	restore_access_regs(S390_lowcore.access_regs_save_area);
 	cpu_init();
 	rcu_cpu_starting(cpu);
-	preempt_disable();
 	init_cpu_timer();
 	vtime_init();
 	vdso_getcpu_init();
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 372acdc9033eb..65924d9ec2459 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -186,8 +186,6 @@ asmlinkage void start_secondary(void)
 
 	per_cpu_trap_init();
 
-	preempt_disable();
-
 	notify_cpu_starting(cpu);
 
 	local_irq_enable();
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 50c127ab46d5b..22b148e5a5f88 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -348,7 +348,6 @@ static void sparc_start_secondary(void *arg)
 	 */
 	arch_cpu_pre_starting(arg);
 
-	preempt_disable();
 	cpu = smp_processor_id();
 
 	notify_cpu_starting(cpu);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index e38d8bf454e86..ae5faa1d989d2 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -138,9 +138,6 @@ void smp_callin(void)
 
 	set_cpu_online(cpuid, true);
 
-	/* idle thread is expected to have preempt disabled */
-	preempt_disable();
-
 	local_irq_enable();
 
 	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index f8cb8af4de5ce..fe5efbcba8240 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
 #define init_task_preempt_count(p) do { } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
-	per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
+	per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
 } while (0)
 
 /*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0ad5214f598a9..0936f5ba32229 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -236,7 +236,6 @@ static void notrace start_secondary(void *unused)
 	cpu_init();
 	rcu_cpu_starting(raw_smp_processor_id());
 	x86_cpuinit.early_percpu_clock_init();
-	preempt_disable();
 	smp_callin();
 
 	enable_start_cpu0 = 0;
diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c
index cd85a7a2722ba..1254da07ead1f 100644
--- a/arch/xtensa/kernel/smp.c
+++ b/arch/xtensa/kernel/smp.c
@@ -145,7 +145,6 @@ void secondary_start_kernel(void)
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 	enter_lazy_tlb(mm, current);
 
-	preempt_disable();
 	trace_hardirqs_off();
 
 	calibrate_delay();
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index d683f5e6d7913..b4d43a4af5f79 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -29,7 +29,7 @@ static __always_inline void preempt_count_set(int pc)
 } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
-	task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+	task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
 } while (0)
 
 static __always_inline void set_preempt_need_resched(void)
diff --git a/init/main.c b/init/main.c
index eb01e121d2f15..7b027d9c5c89b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -941,11 +941,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
 	 * time - but meanwhile we still have a functioning scheduler.
 	 */
 	sched_init();
-	/*
-	 * Disable preemption - early bootup scheduling is extremely
-	 * fragile until we cpu_idle() for the first time.
-	 */
-	preempt_disable();
+
 	if (WARN(!irqs_disabled(),
 		 "Interrupts were enabled *very* early, fixing it\n"))
 		local_irq_disable();
diff --git a/kernel/fork.c b/kernel/fork.c
index e7fd928fcafe9..ace4631b5b547 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2412,7 +2412,7 @@ static inline void init_idle_pids(struct task_struct *idle)
 	}
 }
 
-struct task_struct *fork_idle(int cpu)
+struct task_struct * __init fork_idle(int cpu)
 {
 	struct task_struct *task;
 	struct kernel_clone_args args = {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 55b2d9399e12a..9d00f4958bde7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8227,7 +8227,7 @@ void show_state_filter(unsigned long state_filter)
  * NOTE: this function does not set the idle thread's NEED_RESCHED
  * flag, to make booting more robust.
  */
-void init_idle(struct task_struct *idle, int cpu)
+void __init init_idle(struct task_struct *idle, int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
 	unsigned long flags;
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index f25208e8df836..e4163042c4d66 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -33,7 +33,6 @@ struct task_struct *idle_thread_get(unsigned int cpu)
 
 	if (!tsk)
 		return ERR_PTR(-ENOMEM);
-	init_idle(tsk, cpu);
 	return tsk;
 }
 
-- 
GitLab


From 1e948b1752b58c9c570989ab29ceef5b38fdccda Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Wed, 12 May 2021 11:17:47 +0800
Subject: [PATCH 0466/3804] gpio: cadence: Add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-cadence.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpio-cadence.c b/drivers/gpio/gpio-cadence.c
index a4d3239d25944..4ab3fcd9b9ba6 100644
--- a/drivers/gpio/gpio-cadence.c
+++ b/drivers/gpio/gpio-cadence.c
@@ -278,6 +278,7 @@ static const struct of_device_id cdns_of_ids[] = {
 	{ .compatible = "cdns,gpio-r1p02" },
 	{ /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, cdns_of_ids);
 
 static struct platform_driver cdns_gpio_driver = {
 	.driver = {
-- 
GitLab


From a0579474effff6a139768b300d8439c2327b3848 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 22:46:30 +0300
Subject: [PATCH 0467/3804] gpio: xilinx: Correct kernel doc for xgpio_probe()

Kernel doc validator complains:

.../gpio-xilinx.c:556: warning: expecting prototype for xgpio_of_probe(). Prototype was for xgpio_probe() instead

Correct as suggested by changing the name of the function in the doc..

Fixes: 749564ffd52d ("gpio/xilinx: Convert the driver to platform device interface")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Neeli Srinivas <sneeli@xilinx.com>
Reviewed-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-xilinx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c
index b411d3156e0b6..136557e7dd3ce 100644
--- a/drivers/gpio/gpio-xilinx.c
+++ b/drivers/gpio/gpio-xilinx.c
@@ -542,7 +542,7 @@ static void xgpio_irqhandler(struct irq_desc *desc)
 }
 
 /**
- * xgpio_of_probe - Probe method for the GPIO device.
+ * xgpio_probe - Probe method for the GPIO device.
  * @pdev: pointer to the platform device
  *
  * Return:
-- 
GitLab


From bdbe871ef0caa660e16461a2a94579d9f9ef7ba4 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 7 May 2021 11:34:11 +0100
Subject: [PATCH 0468/3804] gpio: tegra186: Don't set parent IRQ affinity

When hotplugging CPUs on Tegra186 and Tegra194 errors such as the
following are seen ...

 IRQ63: set affinity failed(-22).
 IRQ65: set affinity failed(-22).
 IRQ66: set affinity failed(-22).
 IRQ67: set affinity failed(-22).

Looking at the /proc/interrupts the above are all interrupts associated
with GPIOs. The reason why these error messages occur is because there
is no 'parent_data' associated with any of the GPIO interrupts and so
tegra186_irq_set_affinity() simply returns -EINVAL.

To understand why there is no 'parent_data' it is first necessary to
understand that in addition to the GPIO interrupts being routed to the
interrupt controller (GIC), the interrupts for some GPIOs are also
routed to the Tegra Power Management Controller (PMC) to wake up the
system from low power states. In order to configure GPIO events as
wake events in the PMC, the PMC is configured as IRQ parent domain
for the GPIO IRQ domain. Originally the GIC was the IRQ parent domain
of the PMC and although this was working, this started causing issues
once commit 64a267e9a41c ("irqchip/gic: Configure SGIs as standard
interrupts") was added, because technically, the GIC is not a parent
of the PMC. Commit c351ab7bf2a5 ("soc/tegra: pmc: Don't create fake
interrupt hierarchy levels") fixed this by severing the IRQ domain
hierarchy for the Tegra GPIOs and hence, there may be no IRQ parent
domain for the GPIOs.

The GPIO controllers on Tegra186 and Tegra194 have either one or six
interrupt lines to the interrupt controller. For GPIO controllers with
six interrupts, the mapping of the GPIO interrupt to the controller
interrupt is configurable within the GPIO controller. Currently a
default mapping is used, however, it could be possible to use the
set affinity callback for the Tegra186 GPIO driver to do something a
bit more interesting. Currently, because interrupts for all GPIOs are
have the same mapping and any attempts to configure the affinity for
a given GPIO can conflict with another that shares the same IRQ, for
now it is simpler to just remove set affinity support and this avoids
the above warnings being seen.

Cc: <stable@vger.kernel.org>
Fixes: c4e1f7d92cd6 ("gpio: tegra186: Set affinity callback to parent")
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-tegra186.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c
index 1bd9e44df7184..05974b760796b 100644
--- a/drivers/gpio/gpio-tegra186.c
+++ b/drivers/gpio/gpio-tegra186.c
@@ -444,16 +444,6 @@ static int tegra186_irq_set_wake(struct irq_data *data, unsigned int on)
 	return 0;
 }
 
-static int tegra186_irq_set_affinity(struct irq_data *data,
-				     const struct cpumask *dest,
-				     bool force)
-{
-	if (data->parent_data)
-		return irq_chip_set_affinity_parent(data, dest, force);
-
-	return -EINVAL;
-}
-
 static void tegra186_gpio_irq(struct irq_desc *desc)
 {
 	struct tegra_gpio *gpio = irq_desc_get_handler_data(desc);
@@ -700,7 +690,6 @@ static int tegra186_gpio_probe(struct platform_device *pdev)
 	gpio->intc.irq_unmask = tegra186_irq_unmask;
 	gpio->intc.irq_set_type = tegra186_irq_set_type;
 	gpio->intc.irq_set_wake = tegra186_irq_set_wake;
-	gpio->intc.irq_set_affinity = tegra186_irq_set_affinity;
 
 	irq = &gpio->gpio.irq;
 	irq->chip = &gpio->intc;
-- 
GitLab


From 47c1131633ef6210add63b8b5704497023a3462a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 12 May 2021 08:09:08 +0900
Subject: [PATCH 0469/3804] ASoC: soc-dai.h: Align the word of comment for
 SND_SOC_DAIFMT_CBC_CFC

Let's use "consumer" instead of "follower".

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/8735usc1gr.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 1358a0ceb4d01..0bc29c4516e76 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -81,7 +81,7 @@ struct snd_compr_stream;
 #define SND_SOC_DAIFMT_CBP_CFP		(1 << 12) /* codec clk provider & frame provider */
 #define SND_SOC_DAIFMT_CBC_CFP		(2 << 12) /* codec clk consumer & frame provider */
 #define SND_SOC_DAIFMT_CBP_CFC		(3 << 12) /* codec clk provider & frame consumer */
-#define SND_SOC_DAIFMT_CBC_CFC		(4 << 12) /* codec clk consumer & frame follower */
+#define SND_SOC_DAIFMT_CBC_CFC		(4 << 12) /* codec clk consumer & frame consumer */
 
 /* previous definitions kept for backwards-compatibility, do not use in new contributions */
 #define SND_SOC_DAIFMT_CBM_CFM		SND_SOC_DAIFMT_CBP_CFP
-- 
GitLab


From e072b2671606c77538d6a4dd5dda80b508cb4816 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Wed, 12 May 2021 11:12:25 +0800
Subject: [PATCH 0470/3804] ASoC: sti-sas: add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Link: https://lore.kernel.org/r/1620789145-14936-1-git-send-email-zou_wei@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/sti-sas.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/sti-sas.c b/sound/soc/codecs/sti-sas.c
index ffdf7e5595153..82a24e330065f 100644
--- a/sound/soc/codecs/sti-sas.c
+++ b/sound/soc/codecs/sti-sas.c
@@ -408,6 +408,7 @@ static const struct of_device_id sti_sas_dev_match[] = {
 	},
 	{},
 };
+MODULE_DEVICE_TABLE(of, sti_sas_dev_match);
 
 static int sti_sas_driver_probe(struct platform_device *pdev)
 {
-- 
GitLab


From 96f685974609d4c315669ef33d55dbc43996491e Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 11 May 2021 18:57:14 +0100
Subject: [PATCH 0471/3804] ASoC: cs53l30: Add missing regmap use_single config

This device requires single register transactions, this will
definely cause problems with the new device ID parsing which
uses regmap_bulk_read but might also show up in the cache sync
sometimes. Add the missing flags to the regmap_config.

Fixes: 4fc81bc88ad9 ("ASoC: cs53l30: Minor error paths fixups")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210511175718.15416-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs53l30.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c
index 3d67cbf9eaaa2..abe0cc0bc03a9 100644
--- a/sound/soc/codecs/cs53l30.c
+++ b/sound/soc/codecs/cs53l30.c
@@ -912,6 +912,9 @@ static struct regmap_config cs53l30_regmap = {
 	.writeable_reg = cs53l30_writeable_register,
 	.readable_reg = cs53l30_readable_register,
 	.cache_type = REGCACHE_RBTREE,
+
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static int cs53l30_i2c_probe(struct i2c_client *client,
-- 
GitLab


From 27fb585169024440c1b358da35499fa578d803cd Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 11 May 2021 18:57:15 +0100
Subject: [PATCH 0472/3804] ASoC: cs42l73: Add missing regmap use_single config

This device requires single register transactions, this will
definely cause problems with the new device ID parsing which uses
regmap_bulk_read but might also show up in the cache sync sometimes.
Add the missing flags to the regmap_config.

Fixes: 26495252fe0d ("ASoC: cs42l73: Minor error paths fixups")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210511175718.15416-2-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs42l73.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c
index c3f974ec78e58..e92bacaab53fc 100644
--- a/sound/soc/codecs/cs42l73.c
+++ b/sound/soc/codecs/cs42l73.c
@@ -1268,6 +1268,9 @@ static const struct regmap_config cs42l73_regmap = {
 	.volatile_reg = cs42l73_volatile_register,
 	.readable_reg = cs42l73_readable_register,
 	.cache_type = REGCACHE_RBTREE,
+
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static int cs42l73_i2c_probe(struct i2c_client *i2c_client,
-- 
GitLab


From 2a682f821941e28fb9ceaa1dd03ccfaea0448101 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 11 May 2021 18:57:16 +0100
Subject: [PATCH 0473/3804] ASoC: cs35l34: Add missing regmap use_single config

This device requires single register transactions, this will
definely cause problems with the new device ID parsing which uses
regmap_bulk_read but might also show up in the cache sync sometimes.
Add the missing flags to the regmap_config.

Fixes: 8cb9b001635c ("ASoC: cs35l34: Minor error paths fixups")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210511175718.15416-3-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs35l34.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/cs35l34.c b/sound/soc/codecs/cs35l34.c
index 110ee2d063581..3d3c3c34dfe27 100644
--- a/sound/soc/codecs/cs35l34.c
+++ b/sound/soc/codecs/cs35l34.c
@@ -800,6 +800,9 @@ static struct regmap_config cs35l34_regmap = {
 	.readable_reg = cs35l34_readable_register,
 	.precious_reg = cs35l34_precious_register,
 	.cache_type = REGCACHE_RBTREE,
+
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static int cs35l34_handle_of_data(struct i2c_client *i2c_client,
-- 
GitLab


From b1078e9869531af4f968ba1b9edad51264943bb8 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 11 May 2021 18:57:17 +0100
Subject: [PATCH 0474/3804] ASoC: cs35l32: Add missing regmap use_single config

This device requires single register transactions, this will
definely cause problems with the new device ID parsing which uses
regmap_bulk_read but might also show up in the cache sync sometimes.
Add the missing flags to the regmap_config.

Fixes: 283160f1419d ("ASoC: cs35l32: Minor error paths fixups")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210511175718.15416-4-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs35l32.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/codecs/cs35l32.c b/sound/soc/codecs/cs35l32.c
index f4067230ac425..88e79b9f52edc 100644
--- a/sound/soc/codecs/cs35l32.c
+++ b/sound/soc/codecs/cs35l32.c
@@ -261,6 +261,9 @@ static const struct regmap_config cs35l32_regmap = {
 	.readable_reg = cs35l32_readable_register,
 	.precious_reg = cs35l32_precious_register,
 	.cache_type = REGCACHE_RBTREE,
+
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static int cs35l32_handle_of_data(struct i2c_client *i2c_client,
-- 
GitLab


From 0e49a4de4564b3659a34b0b775d43b6b635b17fa Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Tue, 11 May 2021 18:57:18 +0100
Subject: [PATCH 0475/3804] ASoC: cs42l52: Minor tidy up of error paths

Fixup a needlessly initialised variable and an unchecked return
value.

Reported-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210511175718.15416-5-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs42l56.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c
index c44a5cdb796ec..7cdffdf6b8cf0 100644
--- a/sound/soc/codecs/cs42l56.c
+++ b/sound/soc/codecs/cs42l56.c
@@ -1175,7 +1175,7 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client,
 	struct cs42l56_platform_data *pdata =
 		dev_get_platdata(&i2c_client->dev);
 	int ret, i;
-	unsigned int devid = 0;
+	unsigned int devid;
 	unsigned int alpha_rev, metal_rev;
 	unsigned int reg;
 
@@ -1245,6 +1245,11 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client,
 	}
 
 	ret = regmap_read(cs42l56->regmap, CS42L56_CHIP_ID_1, &reg);
+	if (ret) {
+		dev_err(&i2c_client->dev, "Failed to read chip ID: %d\n", ret);
+		return ret;
+	}
+
 	devid = reg & CS42L56_CHIP_ID_MASK;
 	if (devid != CS42L56_DEVID) {
 		dev_err(&i2c_client->dev,
-- 
GitLab


From 3d681804efcb6e5d8089a433402e19179347d7ae Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 12 May 2021 15:58:24 +0800
Subject: [PATCH 0476/3804] regulator: cros-ec: Fix error code in dev_err
 message

Show proper error code instead of 0.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210512075824.620580-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/cros-ec-regulator.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/cros-ec-regulator.c b/drivers/regulator/cros-ec-regulator.c
index eb3fc1db4edc8..c4754f3cf2337 100644
--- a/drivers/regulator/cros-ec-regulator.c
+++ b/drivers/regulator/cros-ec-regulator.c
@@ -225,8 +225,9 @@ static int cros_ec_regulator_probe(struct platform_device *pdev)
 
 	drvdata->dev = devm_regulator_register(dev, &drvdata->desc, &cfg);
 	if (IS_ERR(drvdata->dev)) {
+		ret = PTR_ERR(drvdata->dev);
 		dev_err(&pdev->dev, "Failed to register regulator: %d\n", ret);
-		return PTR_ERR(drvdata->dev);
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, drvdata);
-- 
GitLab


From 7907cad7d07e0055789ec0c534452f19dfe1fc80 Mon Sep 17 00:00:00 2001
From: Chunyan Zhang <chunyan.zhang@unisoc.com>
Date: Wed, 12 May 2021 17:35:34 +0800
Subject: [PATCH 0477/3804] spi: sprd: Add missing MODULE_DEVICE_TABLE

MODULE_DEVICE_TABLE is used to extract the device information out of the
driver and builds a table when being compiled. If using this macro,
kernel can find the driver if available when the device is plugged in,
and then loads that driver and initializes the device.

Signed-off-by: Chunyan Zhang <chunyan.zhang@unisoc.com>
Link: https://lore.kernel.org/r/20210512093534.243040-1-zhang.lyra@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sprd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c
index b41a75749b498..28e70db9bbba8 100644
--- a/drivers/spi/spi-sprd.c
+++ b/drivers/spi/spi-sprd.c
@@ -1068,6 +1068,7 @@ static const struct of_device_id sprd_spi_of_match[] = {
 	{ .compatible = "sprd,sc9860-spi", },
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, sprd_spi_of_match);
 
 static struct platform_driver sprd_spi_driver = {
 	.driver = {
-- 
GitLab


From 6b69546912a57ff8c31061f98e56383cc0beffd3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 17:09:12 +0300
Subject: [PATCH 0478/3804] spi: Assume GPIO CS active high in ACPI case

Currently GPIO CS handling, when descriptors are in use, doesn't
take into consideration that in ACPI case the default polarity
is Active High and can't be altered. Instead we have to use the
per-chip definition provided by SPISerialBus() resource.

Fixes: 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors")
Cc: Liguang Zhang <zhangliguang@linux.alibaba.com>
Cc: Jay Fang <f.fangjian@huawei.com>
Cc: Sven Van Asbroeck <thesven73@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Xin Hao <xhao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20210511140912.30757-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index a565e7d6bf3ba..98048af04abf9 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -820,15 +820,29 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
 
 	if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) {
 		if (!(spi->mode & SPI_NO_CS)) {
-			if (spi->cs_gpiod)
-				/* polarity handled by gpiolib */
-				gpiod_set_value_cansleep(spi->cs_gpiod, activate);
-			else
+			if (spi->cs_gpiod) {
+				/*
+				 * Historically ACPI has no means of the GPIO polarity and
+				 * thus the SPISerialBus() resource defines it on the per-chip
+				 * basis. In order to avoid a chain of negations, the GPIO
+				 * polarity is considered being Active High. Even for the cases
+				 * when _DSD() is involved (in the updated versions of ACPI)
+				 * the GPIO CS polarity must be defined Active High to avoid
+				 * ambiguity. That's why we use enable, that takes SPI_CS_HIGH
+				 * into account.
+				 */
+				if (has_acpi_companion(&spi->dev))
+					gpiod_set_value_cansleep(spi->cs_gpiod, !enable);
+				else
+					/* Polarity handled by GPIO library */
+					gpiod_set_value_cansleep(spi->cs_gpiod, activate);
+			} else {
 				/*
 				 * invert the enable line, as active low is
 				 * default for SPI.
 				 */
 				gpio_set_value_cansleep(spi->cs_gpio, !enable);
+			}
 		}
 		/* Some SPI masters need both GPIO CS & slave_select */
 		if ((spi->controller->flags & SPI_MASTER_GPIO_SS) &&
-- 
GitLab


From 2ca4dcc4909d787ee153272f7efc2bff3b498720 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Tue, 11 May 2021 16:30:15 +0200
Subject: [PATCH 0479/3804] fs/mount_setattr: tighten permission checks

We currently don't have any filesystems that support idmapped mounts
which are mountable inside a user namespace. That was a deliberate
decision for now as a userns root can just mount the filesystem
themselves. So enforce this restriction explicitly until there's a real
use-case for this. This way we can notice it and will have a chance to
adapt and audit our translation helpers and fstests appropriately if we
need to support such filesystems.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
CC: linux-fsdevel@vger.kernel.org
Suggested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 fs/namespace.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index f63337828e1c4..c3f1a78ba3697 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3855,8 +3855,12 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
 	if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
 		return -EINVAL;
 
+	/* Don't yet support filesystem mountable in user namespaces. */
+	if (m->mnt_sb->s_user_ns != &init_user_ns)
+		return -EINVAL;
+
 	/* We're not controlling the superblock. */
-	if (!ns_capable(m->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
 	/* Mount has already been visible in the filesystem hierarchy. */
-- 
GitLab


From 25cf0d8aa2a3440ed32bf1f8df1310d6baf3f1e8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:33:53 +0200
Subject: [PATCH 0480/3804] objtool: Rewrite hashtable sizing

Currently objtool has 5 hashtables and sizes them 16 or 20 bits
depending on the --vmlinux argument.

However, a single side doesn't really work well for the 5 tables,
which among them, cover 3 different uses. Also, while vmlinux is
larger, there is still a very wide difference between a defconfig and
allyesconfig build, which again isn't optimally covered by a single
size.

Another aspect is the cost of elf_hash_init(), which for large tables
dominates the runtime for small input files. It turns out that all it
does it assign NULL, something that is required when using malloc().
However, when we allocate memory using mmap(), we're guaranteed to get
zero filled pages.

Therefore, rewrite the whole thing to:

 1) use more dynamic sized tables, depending on the input file,
 2) avoid the need for elf_hash_init() entirely by using mmap().

This speeds up a regular kernel build (100s to 98s for
x86_64-defconfig), and potentially dramatically speeds up vmlinux
processing.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.452881700@infradead.org
---
 tools/objtool/elf.c                 | 113 +++++++++++++++++-----------
 tools/objtool/include/objtool/elf.h |  17 +++--
 2 files changed, 83 insertions(+), 47 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d08f5f3670f88..a8a0ee21f71a3 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -9,6 +9,7 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/mman.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -27,21 +28,27 @@ static inline u32 str_hash(const char *str)
 	return jhash(str, strlen(str), 0);
 }
 
-static inline int elf_hash_bits(void)
-{
-	return vmlinux ? ELF_HASH_BITS : 16;
-}
+#define __elf_table(name)	(elf->name##_hash)
+#define __elf_bits(name)	(elf->name##_bits)
 
-#define elf_hash_add(hashtable, node, key) \
-	hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+#define elf_hash_add(name, node, key) \
+	hlist_add_head(node, &__elf_table(name)[hash_min(key, __elf_bits(name))])
 
-static void elf_hash_init(struct hlist_head *table)
-{
-	__hash_init(table, 1U << elf_hash_bits());
-}
+#define elf_hash_for_each_possible(name, obj, member, key) \
+	hlist_for_each_entry(obj, &__elf_table(name)[hash_min(key, __elf_bits(name))], member)
 
-#define elf_hash_for_each_possible(name, obj, member, key)			\
-	hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+#define elf_alloc_hash(name, size) \
+({ \
+	__elf_bits(name) = max(10, ilog2(size)); \
+	__elf_table(name) = mmap(NULL, sizeof(struct hlist_head) << __elf_bits(name), \
+				 PROT_READ|PROT_WRITE, \
+				 MAP_PRIVATE|MAP_ANON, -1, 0); \
+	if (__elf_table(name) == (void *)-1L) { \
+		WARN("mmap fail " #name); \
+		__elf_table(name) = NULL; \
+	} \
+	__elf_table(name); \
+})
 
 static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b)
 {
@@ -80,9 +87,10 @@ struct section *find_section_by_name(const struct elf *elf, const char *name)
 {
 	struct section *sec;
 
-	elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
+	elf_hash_for_each_possible(section_name, sec, name_hash, str_hash(name)) {
 		if (!strcmp(sec->name, name))
 			return sec;
+	}
 
 	return NULL;
 }
@@ -92,9 +100,10 @@ static struct section *find_section_by_index(struct elf *elf,
 {
 	struct section *sec;
 
-	elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
+	elf_hash_for_each_possible(section, sec, hash, idx) {
 		if (sec->idx == idx)
 			return sec;
+	}
 
 	return NULL;
 }
@@ -103,9 +112,10 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
 {
 	struct symbol *sym;
 
-	elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+	elf_hash_for_each_possible(symbol, sym, hash, idx) {
 		if (sym->idx == idx)
 			return sym;
+	}
 
 	return NULL;
 }
@@ -170,9 +180,10 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
 {
 	struct symbol *sym;
 
-	elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+	elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
 		if (!strcmp(sym->name, name))
 			return sym;
+	}
 
 	return NULL;
 }
@@ -189,8 +200,8 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se
 	sec = sec->reloc;
 
 	for_offset_range(o, offset, offset + len) {
-		elf_hash_for_each_possible(elf->reloc_hash, reloc, hash,
-				       sec_offset_hash(sec, o)) {
+		elf_hash_for_each_possible(reloc, reloc, hash,
+					   sec_offset_hash(sec, o)) {
 			if (reloc->sec != sec)
 				continue;
 
@@ -228,6 +239,10 @@ static int read_sections(struct elf *elf)
 		return -1;
 	}
 
+	if (!elf_alloc_hash(section, sections_nr) ||
+	    !elf_alloc_hash(section_name, sections_nr))
+		return -1;
+
 	for (i = 0; i < sections_nr; i++) {
 		sec = malloc(sizeof(*sec));
 		if (!sec) {
@@ -274,12 +289,14 @@ static int read_sections(struct elf *elf)
 		sec->len = sec->sh.sh_size;
 
 		list_add_tail(&sec->list, &elf->sections);
-		elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
-		elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+		elf_hash_add(section, &sec->hash, sec->idx);
+		elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
 	}
 
-	if (stats)
+	if (stats) {
 		printf("nr_sections: %lu\n", (unsigned long)sections_nr);
+		printf("section_bits: %d\n", elf->section_bits);
+	}
 
 	/* sanity check, one more call to elf_nextscn() should return NULL */
 	if (elf_nextscn(elf->elf, s)) {
@@ -308,8 +325,8 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
 	else
 		entry = &sym->sec->symbol_list;
 	list_add(&sym->list, entry);
-	elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
-	elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+	elf_hash_add(symbol, &sym->hash, sym->idx);
+	elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name));
 
 	/*
 	 * Don't store empty STT_NOTYPE symbols in the rbtree.  They
@@ -329,19 +346,25 @@ static int read_symbols(struct elf *elf)
 	Elf32_Word shndx;
 
 	symtab = find_section_by_name(elf, ".symtab");
-	if (!symtab) {
+	if (symtab) {
+		symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+		if (symtab_shndx)
+			shndx_data = symtab_shndx->data;
+
+		symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+	} else {
 		/*
 		 * A missing symbol table is actually possible if it's an empty
-		 * .o file.  This can happen for thunk_64.o.
+		 * .o file. This can happen for thunk_64.o. Make sure to at
+		 * least allocate the symbol hash tables so we can do symbol
+		 * lookups without crashing.
 		 */
-		return 0;
+		symbols_nr = 0;
 	}
 
-	symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
-	if (symtab_shndx)
-		shndx_data = symtab_shndx->data;
-
-	symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+	if (!elf_alloc_hash(symbol, symbols_nr) ||
+	    !elf_alloc_hash(symbol_name, symbols_nr))
+		return -1;
 
 	for (i = 0; i < symbols_nr; i++) {
 		sym = malloc(sizeof(*sym));
@@ -389,8 +412,10 @@ static int read_symbols(struct elf *elf)
 		elf_add_symbol(elf, sym);
 	}
 
-	if (stats)
+	if (stats) {
 		printf("nr_symbols: %lu\n", (unsigned long)symbols_nr);
+		printf("symbol_bits: %d\n", elf->symbol_bits);
+	}
 
 	/* Create parent/child links for any cold subfunctions */
 	list_for_each_entry(sec, &elf->sections, list) {
@@ -479,7 +504,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
 	reloc->addend = addend;
 
 	list_add_tail(&reloc->list, &sec->reloc->reloc_list);
-	elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+	elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
 
 	sec->reloc->changed = true;
 
@@ -556,6 +581,15 @@ static int read_relocs(struct elf *elf)
 	unsigned int symndx;
 	unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
 
+	sec = find_section_by_name(elf, ".text");
+	if (!sec) {
+		WARN("no .text");
+		return -1;
+	}
+
+	if (!elf_alloc_hash(reloc, sec->len / 16))
+		return -1;
+
 	list_for_each_entry(sec, &elf->sections, list) {
 		if ((sec->sh.sh_type != SHT_RELA) &&
 		    (sec->sh.sh_type != SHT_REL))
@@ -600,7 +634,7 @@ static int read_relocs(struct elf *elf)
 			}
 
 			list_add_tail(&reloc->list, &sec->reloc_list);
-			elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+			elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
 
 			nr_reloc++;
 		}
@@ -611,6 +645,7 @@ static int read_relocs(struct elf *elf)
 	if (stats) {
 		printf("max_reloc: %lu\n", max_reloc);
 		printf("tot_reloc: %lu\n", tot_reloc);
+		printf("reloc_bits: %d\n", elf->reloc_bits);
 	}
 
 	return 0;
@@ -632,12 +667,6 @@ struct elf *elf_open_read(const char *name, int flags)
 
 	INIT_LIST_HEAD(&elf->sections);
 
-	elf_hash_init(elf->symbol_hash);
-	elf_hash_init(elf->symbol_name_hash);
-	elf_hash_init(elf->section_hash);
-	elf_hash_init(elf->section_name_hash);
-	elf_hash_init(elf->reloc_hash);
-
 	elf->fd = open(name, flags);
 	if (elf->fd == -1) {
 		fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -850,8 +879,8 @@ struct section *elf_create_section(struct elf *elf, const char *name,
 		return NULL;
 
 	list_add_tail(&sec->list, &elf->sections);
-	elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
-	elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+	elf_hash_add(section, &sec->hash, sec->idx);
+	elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
 
 	elf->changed = true;
 
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 45e5ede363b07..90082751f851d 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -84,11 +84,18 @@ struct elf {
 	bool changed;
 	char *name;
 	struct list_head sections;
-	DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS);
-	DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS);
+
+	int symbol_bits;
+	int symbol_name_bits;
+	int section_bits;
+	int section_name_bits;
+	int reloc_bits;
+
+	struct hlist_head *symbol_hash;
+	struct hlist_head *symbol_name_hash;
+	struct hlist_head *section_hash;
+	struct hlist_head *section_name_hash;
+	struct hlist_head *reloc_hash;
 };
 
 #define OFFSET_STRIDE_BITS	4
-- 
GitLab


From 80870e6ece78ce67b91398db88fb6b92a178f574 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:33:54 +0200
Subject: [PATCH 0481/3804] x86, objtool: Dont exclude arch/x86/realmode/

Specifically, init.c uses jump_labels.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.516200011@infradead.org
---
 arch/x86/realmode/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile
index 6b1f3a4eeb44e..a0b491ae2de8d 100644
--- a/arch/x86/realmode/Makefile
+++ b/arch/x86/realmode/Makefile
@@ -10,7 +10,6 @@
 # Sanitizer runtimes are unavailable and cannot be linked here.
 KASAN_SANITIZE			:= n
 KCSAN_SANITIZE			:= n
-OBJECT_FILES_NON_STANDARD	:= y
 
 subdir- := rm
 
-- 
GitLab


From 8bfafcdccb52e770695b12530b1f800fe98b16b1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:33:55 +0200
Subject: [PATCH 0482/3804] jump_label, x86: Strip ASM jump_label support

In prepration for variable size jump_label support; remove all ASM
bits, which are currently unused.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.599716762@infradead.org
---
 arch/x86/include/asm/jump_label.h | 36 -------------------------------
 1 file changed, 36 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 610a05374c02f..01de21e2d9679 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -47,42 +47,6 @@ l_yes:
 	return true;
 }
 
-#else	/* __ASSEMBLY__ */
-
-.macro STATIC_JUMP_IF_TRUE target, key, def
-.Lstatic_jump_\@:
-	.if \def
-	/* Equivalent to "jmp.d32 \target" */
-	.byte		0xe9
-	.long		\target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-	.else
-	.byte		BYTES_NOP5
-	.endif
-	.pushsection __jump_table, "aw"
-	_ASM_ALIGN
-	.long		.Lstatic_jump_\@ - ., \target - .
-	_ASM_PTR	\key - .
-	.popsection
-.endm
-
-.macro STATIC_JUMP_IF_FALSE target, key, def
-.Lstatic_jump_\@:
-	.if \def
-	.byte		BYTES_NOP5
-	.else
-	/* Equivalent to "jmp.d32 \target" */
-	.byte		0xe9
-	.long		\target - .Lstatic_jump_after_\@
-.Lstatic_jump_after_\@:
-	.endif
-	.pushsection __jump_table, "aw"
-	_ASM_ALIGN
-	.long		.Lstatic_jump_\@ - ., \target - .
-	_ASM_PTR	\key + 1 - .
-	.popsection
-.endm
-
 #endif	/* __ASSEMBLY__ */
 
 #endif
-- 
GitLab


From e1aa35c4c4bc71e44dabc9d7d167b807edd7b439 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:33:56 +0200
Subject: [PATCH 0483/3804] jump_label, x86: Factor out the __jump_table
 generation

Both arch_static_branch() and arch_static_branch_jump() have the same
blurb to generate the __jump_table entry, share it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.663132781@infradead.org
---
 arch/x86/include/asm/jump_label.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 01de21e2d9679..dfdc2b1c17ddd 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -14,15 +14,19 @@
 #include <linux/stringify.h>
 #include <linux/types.h>
 
+#define JUMP_TABLE_ENTRY				\
+	".pushsection __jump_table,  \"aw\" \n\t"	\
+	_ASM_ALIGN "\n\t"				\
+	".long 1b - . \n\t"				\
+	".long %l[l_yes] - . \n\t"			\
+	_ASM_PTR "%c0 + %c1 - .\n\t"			\
+	".popsection \n\t"
+
 static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
 		".byte " __stringify(BYTES_NOP5) "\n\t"
-		".pushsection __jump_table,  \"aw\" \n\t"
-		_ASM_ALIGN "\n\t"
-		".long 1b - ., %l[l_yes] - . \n\t"
-		_ASM_PTR "%c0 + %c1 - .\n\t"
-		".popsection \n\t"
+		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
@@ -33,13 +37,9 @@ l_yes:
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
-		".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
-		"2:\n\t"
-		".pushsection __jump_table,  \"aw\" \n\t"
-		_ASM_ALIGN "\n\t"
-		".long 1b - ., %l[l_yes] - . \n\t"
-		_ASM_PTR "%c0 + %c1 - .\n\t"
-		".popsection \n\t"
+		".byte 0xe9 \n\t"
+		".long %l[l_yes] - (. + 4) \n\t"
+		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
 	return false;
-- 
GitLab


From f9510fa9caaf8229381d5f86ba0774bf1a6ca39b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:33:57 +0200
Subject: [PATCH 0484/3804] jump_label, x86: Improve error when we fail
 expected text

There is only a single usage site left, remove the function and extend
the print to include more information, like the expected text and the
patch type.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.726939027@infradead.org
---
 arch/x86/kernel/jump_label.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 6a2eb62c85e62..638d3b9be0ad2 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -16,37 +16,32 @@
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
 
-static void bug_at(const void *ip, int line)
-{
-	/*
-	 * The location is not an op that we were expecting.
-	 * Something went wrong. Crash the box, as something could be
-	 * corrupting the kernel.
-	 */
-	pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line);
-	BUG();
-}
-
 static const void *
 __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
 {
 	const void *expect, *code;
 	const void *addr, *dest;
-	int line;
 
 	addr = (void *)jump_entry_code(entry);
 	dest = (void *)jump_entry_target(entry);
 
 	code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
 
-	if (type == JUMP_LABEL_JMP) {
-		expect = x86_nops[5]; line = __LINE__;
-	} else {
-		expect = code; line = __LINE__;
-	}
+	if (type == JUMP_LABEL_JMP)
+		expect = x86_nops[5];
+	else
+		expect = code;
 
-	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE))
-		bug_at(addr, line);
+	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) {
+		/*
+		 * The location is not an op that we were expecting.
+		 * Something went wrong. Crash the box, as something could be
+		 * corrupting the kernel.
+		 */
+		pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) type:%d\n",
+				addr, addr, addr, expect, type);
+		BUG();
+	}
 
 	if (type == JUMP_LABEL_NOP)
 		code = x86_nops[5];
-- 
GitLab


From fa5e5dc39669b4427830c546ede8709323b8276c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:33:58 +0200
Subject: [PATCH 0485/3804] jump_label, x86: Introduce jump_entry_size()

This allows architectures to have variable sized jumps.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.786777050@infradead.org
---
 arch/x86/include/asm/jump_label.h | 4 ++--
 arch/x86/kernel/jump_label.c      | 7 +++++++
 include/linux/jump_label.h        | 9 +++++++++
 kernel/jump_label.c               | 2 +-
 4 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index dfdc2b1c17ddd..d85802a006296 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -4,8 +4,6 @@
 
 #define HAVE_JUMP_LABEL_BATCH
 
-#define JUMP_LABEL_NOP_SIZE 5
-
 #include <asm/asm.h>
 #include <asm/nops.h>
 
@@ -47,6 +45,8 @@ l_yes:
 	return true;
 }
 
+extern int arch_jump_entry_size(struct jump_entry *entry);
+
 #endif	/* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 638d3b9be0ad2..a29eecc14c94d 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -16,6 +16,13 @@
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
 
+#define JUMP_LABEL_NOP_SIZE	JMP32_INSN_SIZE
+
+int arch_jump_entry_size(struct jump_entry *entry)
+{
+	return JMP32_INSN_SIZE;
+}
+
 static const void *
 __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
 {
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 05f5554d860f5..8c45f58292ac4 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -176,6 +176,15 @@ static inline void jump_entry_set_init(struct jump_entry *entry)
 	entry->key |= 2;
 }
 
+static inline int jump_entry_size(struct jump_entry *entry)
+{
+#ifdef JUMP_LABEL_NOP_SIZE
+	return JUMP_LABEL_NOP_SIZE;
+#else
+	return arch_jump_entry_size(entry);
+#endif
+}
+
 #endif
 #endif
 
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index ba39fbb1f8e73..521cafcfcb69b 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -309,7 +309,7 @@ EXPORT_SYMBOL_GPL(jump_label_rate_limit);
 static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 {
 	if (jump_entry_code(entry) <= (unsigned long)end &&
-	    jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
+	    jump_entry_code(entry) + jump_entry_size(entry) > (unsigned long)start)
 		return 1;
 
 	return 0;
-- 
GitLab


From 001951bea748d3f675e1778f42b17290a8c551bf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:33:59 +0200
Subject: [PATCH 0486/3804] jump_label, x86: Add variable length patching
 support

This allows the patching to to emit 2 byte JMP/NOP instruction in
addition to the 5 byte JMP/NOP we already did. This allows for more
compact code.

This code is not yet used, as we don't emit shorter code at compile
time yet.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.846870383@infradead.org
---
 arch/x86/kernel/jump_label.c | 53 ++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index a29eecc14c94d..190d810fa435d 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -23,44 +23,63 @@ int arch_jump_entry_size(struct jump_entry *entry)
 	return JMP32_INSN_SIZE;
 }
 
-static const void *
-__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type)
+struct jump_label_patch {
+	const void *code;
+	int size;
+};
+
+static struct jump_label_patch
+__jump_label_patch(struct jump_entry *entry, enum jump_label_type type)
 {
-	const void *expect, *code;
+	const void *expect, *code, *nop;
 	const void *addr, *dest;
+	int size;
 
 	addr = (void *)jump_entry_code(entry);
 	dest = (void *)jump_entry_target(entry);
 
-	code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
+	size = arch_jump_entry_size(entry);
+	switch (size) {
+	case JMP8_INSN_SIZE:
+		code = text_gen_insn(JMP8_INSN_OPCODE, addr, dest);
+		nop = x86_nops[size];
+		break;
+
+	case JMP32_INSN_SIZE:
+		code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest);
+		nop = x86_nops[size];
+		break;
+
+	default: BUG();
+	}
 
 	if (type == JUMP_LABEL_JMP)
-		expect = x86_nops[5];
+		expect = nop;
 	else
 		expect = code;
 
-	if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) {
+	if (memcmp(addr, expect, size)) {
 		/*
 		 * The location is not an op that we were expecting.
 		 * Something went wrong. Crash the box, as something could be
 		 * corrupting the kernel.
 		 */
-		pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) type:%d\n",
-				addr, addr, addr, expect, type);
+		pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) size:%d type:%d\n",
+				addr, addr, addr, expect, size, type);
 		BUG();
 	}
 
 	if (type == JUMP_LABEL_NOP)
-		code = x86_nops[5];
+		code = nop;
 
-	return code;
+	return (struct jump_label_patch){.code = code, .size = size};
 }
 
 static inline void __jump_label_transform(struct jump_entry *entry,
 					  enum jump_label_type type,
 					  int init)
 {
-	const void *opcode = __jump_label_set_jump_code(entry, type);
+	const struct jump_label_patch jlp = __jump_label_patch(entry, type);
 
 	/*
 	 * As long as only a single processor is running and the code is still
@@ -74,12 +93,11 @@ static inline void __jump_label_transform(struct jump_entry *entry,
 	 * always nop being the 'currently valid' instruction
 	 */
 	if (init || system_state == SYSTEM_BOOTING) {
-		text_poke_early((void *)jump_entry_code(entry), opcode,
-				JUMP_LABEL_NOP_SIZE);
+		text_poke_early((void *)jump_entry_code(entry), jlp.code, jlp.size);
 		return;
 	}
 
-	text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL);
+	text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
 }
 
 static void __ref jump_label_transform(struct jump_entry *entry,
@@ -100,7 +118,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
 bool arch_jump_label_transform_queue(struct jump_entry *entry,
 				     enum jump_label_type type)
 {
-	const void *opcode;
+	struct jump_label_patch jlp;
 
 	if (system_state == SYSTEM_BOOTING) {
 		/*
@@ -111,9 +129,8 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
 	}
 
 	mutex_lock(&text_mutex);
-	opcode = __jump_label_set_jump_code(entry, type);
-	text_poke_queue((void *)jump_entry_code(entry),
-			opcode, JUMP_LABEL_NOP_SIZE, NULL);
+	jlp = __jump_label_patch(entry, type);
+	text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
 	mutex_unlock(&text_mutex);
 	return true;
 }
-- 
GitLab


From 5af0ea293d78c8b8f0b87ae2b13f7ac584057bc3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:34:00 +0200
Subject: [PATCH 0487/3804] jump_label: Free jump_entry::key bit1 for build use

Have jump_label_init() set jump_entry::key bit1 to either 0 ot 1
unconditionally. This makes it available for build-time games.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.906893264@infradead.org
---
 include/linux/jump_label.h |  7 +++++--
 kernel/jump_label.c        | 10 ++++++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 8c45f58292ac4..48b9b2a82767d 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -171,9 +171,12 @@ static inline bool jump_entry_is_init(const struct jump_entry *entry)
 	return (unsigned long)entry->key & 2UL;
 }
 
-static inline void jump_entry_set_init(struct jump_entry *entry)
+static inline void jump_entry_set_init(struct jump_entry *entry, bool set)
 {
-	entry->key |= 2;
+	if (set)
+		entry->key |= 2;
+	else
+		entry->key &= ~2;
 }
 
 static inline int jump_entry_size(struct jump_entry *entry)
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 521cafcfcb69b..bdb0681bece82 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -483,13 +483,14 @@ void __init jump_label_init(void)
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
+		bool in_init;
 
 		/* rewrite NOPs */
 		if (jump_label_type(iter) == JUMP_LABEL_NOP)
 			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
 
-		if (init_section_contains((void *)jump_entry_code(iter), 1))
-			jump_entry_set_init(iter);
+		in_init = init_section_contains((void *)jump_entry_code(iter), 1);
+		jump_entry_set_init(iter, in_init);
 
 		iterk = jump_entry_key(iter);
 		if (iterk == key)
@@ -634,9 +635,10 @@ static int jump_label_add_module(struct module *mod)
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
+		bool in_init;
 
-		if (within_module_init(jump_entry_code(iter), mod))
-			jump_entry_set_init(iter);
+		in_init = within_module_init(jump_entry_code(iter), mod);
+		jump_entry_set_init(iter, in_init);
 
 		iterk = jump_entry_key(iter);
 		if (iterk == key)
-- 
GitLab


From e7bf1ba97afdde75b0ef43e4bdb718bf843613f1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:34:01 +0200
Subject: [PATCH 0488/3804] jump_label, x86: Emit short JMP

Now that we can patch short JMP/NOP, allow the compiler/assembler to
emit short JMP instructions.

There is no way to have the assembler emit short NOPs based on the
potential displacement, so leave those long for now.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194157.967034497@infradead.org
---
 arch/x86/include/asm/jump_label.h | 3 +--
 arch/x86/kernel/jump_label.c      | 8 +++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index d85802a006296..ef819e33cfc16 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -35,8 +35,7 @@ l_yes:
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
-		".byte 0xe9 \n\t"
-		".long %l[l_yes] - (. + 4) \n\t"
+		"jmp %l[l_yes]\n\t"
 		JUMP_TABLE_ENTRY
 		: :  "i" (key), "i" (branch) : : l_yes);
 
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 190d810fa435d..a762dc1c615eb 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -15,12 +15,18 @@
 #include <asm/kprobes.h>
 #include <asm/alternative.h>
 #include <asm/text-patching.h>
+#include <asm/insn.h>
 
 #define JUMP_LABEL_NOP_SIZE	JMP32_INSN_SIZE
 
 int arch_jump_entry_size(struct jump_entry *entry)
 {
-	return JMP32_INSN_SIZE;
+	struct insn insn = {};
+
+	insn_decode_kernel(&insn, (void *)jump_entry_code(entry));
+	BUG_ON(insn.length != 2 && insn.length != 5);
+
+	return insn.length;
 }
 
 struct jump_label_patch {
-- 
GitLab


From cbf82a3dc241aea82b941a872ed5c52f6af527ea Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:34:02 +0200
Subject: [PATCH 0489/3804] objtool: Decode jump_entry::key addend

Teach objtool about the the low bits in the struct static_key pointer.

That is, the low two bits of @key in:

  struct jump_entry {
	s32 code;
	s32 target;
	long key;
  }

as found in the __jump_table section. Since @key has a relocation to
the variable (to be resolved by the linker), the low two bits will be
reflected in the relocation's addend.

As such, find the reloc and store the addend, such that we can access
these bits.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194158.028024143@infradead.org
---
 tools/objtool/arch/x86/include/arch/special.h |  1 +
 tools/objtool/include/objtool/special.h       |  1 +
 tools/objtool/special.c                       | 14 ++++++++++++++
 3 files changed, 16 insertions(+)

diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h
index 14271cca0c740..f2918f789a0a3 100644
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -9,6 +9,7 @@
 #define JUMP_ENTRY_SIZE		16
 #define JUMP_ORIG_OFFSET	0
 #define JUMP_NEW_OFFSET		4
+#define JUMP_KEY_OFFSET		8
 
 #define ALT_ENTRY_SIZE		12
 #define ALT_ORIG_OFFSET		0
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index 8a09f4e9d480e..dc4721e190023 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -27,6 +27,7 @@ struct special_alt {
 	unsigned long new_off;
 
 	unsigned int orig_len, new_len; /* group only */
+	u8 key_addend;
 };
 
 int special_get_alts(struct elf *elf, struct list_head *alts);
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 07b21cfabf5c0..bc925cf19e2de 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -23,6 +23,7 @@ struct special_entry {
 	unsigned char size, orig, new;
 	unsigned char orig_len, new_len; /* group only */
 	unsigned char feature; /* ALTERNATIVE macro CPU feature */
+	unsigned char key; /* jump_label key */
 };
 
 struct special_entry entries[] = {
@@ -42,6 +43,7 @@ struct special_entry entries[] = {
 		.size = JUMP_ENTRY_SIZE,
 		.orig = JUMP_ORIG_OFFSET,
 		.new = JUMP_NEW_OFFSET,
+		.key = JUMP_KEY_OFFSET,
 	},
 	{
 		.sec = "__ex_table",
@@ -122,6 +124,18 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 			alt->new_off -= 0x7ffffff0;
 	}
 
+	if (entry->key) {
+		struct reloc *key_reloc;
+
+		key_reloc = find_reloc_by_dest(elf, sec, offset + entry->key);
+		if (!key_reloc) {
+			WARN_FUNC("can't find key reloc",
+				  sec, offset + entry->key);
+			return -1;
+		}
+		alt->key_addend = key_reloc->addend;
+	}
+
 	return 0;
 }
 
-- 
GitLab


From 6d37b83c5d79ef5996cc49c3e3ac3d8ecd8c7050 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:34:03 +0200
Subject: [PATCH 0490/3804] objtool: Rewrite jump_label instructions

When a jump_entry::key has bit1 set, rewrite the instruction to be a
NOP. This allows the compiler/assembler to emit JMP (and thus decide
on which encoding to use).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194158.091028792@infradead.org
---
 tools/objtool/check.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 9ed1a4cd00dc0..98cf87f2c5019 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1234,6 +1234,20 @@ static int handle_jump_alt(struct objtool_file *file,
 		return -1;
 	}
 
+	if (special_alt->key_addend & 2) {
+		struct reloc *reloc = insn_reloc(file, orig_insn);
+
+		if (reloc) {
+			reloc->type = R_NONE;
+			elf_write_reloc(file->elf, reloc);
+		}
+		elf_write_insn(file->elf, orig_insn->sec,
+			       orig_insn->offset, orig_insn->len,
+			       arch_nop_insn(orig_insn->len));
+		orig_insn->type = INSN_NOP;
+		return 0;
+	}
+
 	*new_insn = list_next_entry(orig_insn, list);
 	return 0;
 }
-- 
GitLab


From e2d9494beff21a26438eb611c260b8a6c2dc4dbf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:34:04 +0200
Subject: [PATCH 0491/3804] objtool: Provide stats for jump_labels

Add objtool --stats to count the jump_label sites it encounters.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194158.153101906@infradead.org
---
 tools/objtool/check.c                   | 22 ++++++++++++++++++++--
 tools/objtool/include/objtool/objtool.h |  3 +++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 98cf87f2c5019..2c6a93edf27ec 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1225,8 +1225,15 @@ static int handle_jump_alt(struct objtool_file *file,
 			   struct instruction *orig_insn,
 			   struct instruction **new_insn)
 {
-	if (orig_insn->type == INSN_NOP)
+	if (orig_insn->type == INSN_NOP) {
+do_nop:
+		if (orig_insn->len == 2)
+			file->jl_nop_short++;
+		else
+			file->jl_nop_long++;
+
 		return 0;
+	}
 
 	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
 		WARN_FUNC("unsupported instruction at jump label",
@@ -1245,9 +1252,14 @@ static int handle_jump_alt(struct objtool_file *file,
 			       orig_insn->offset, orig_insn->len,
 			       arch_nop_insn(orig_insn->len));
 		orig_insn->type = INSN_NOP;
-		return 0;
+		goto do_nop;
 	}
 
+	if (orig_insn->len == 2)
+		file->jl_short++;
+	else
+		file->jl_long++;
+
 	*new_insn = list_next_entry(orig_insn, list);
 	return 0;
 }
@@ -1328,6 +1340,12 @@ static int add_special_section_alts(struct objtool_file *file)
 		free(special_alt);
 	}
 
+	if (stats) {
+		printf("jl\\\tNOP\tJMP\n");
+		printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short);
+		printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
+	}
+
 out:
 	return ret;
 }
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index e4084afb2304b..24fa83634de4d 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -22,6 +22,9 @@ struct objtool_file {
 	struct list_head static_call_list;
 	struct list_head mcount_loc_list;
 	bool ignore_unreachables, c_file, hints, rodata;
+
+	unsigned long jl_short, jl_long;
+	unsigned long jl_nop_short, jl_nop_long;
 };
 
 struct objtool_file *objtool_open_read(const char *_objname);
-- 
GitLab


From ab3257042c26d0cd44793c741e2f89bf38b21fe8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 May 2021 21:34:05 +0200
Subject: [PATCH 0492/3804] jump_label, x86: Allow short NOPs

Now that objtool is able to rewrite jump_label instructions, have the
compiler emit a JMP, such that it can decide on the optimal encoding,
and set jump_entry::key bit1 to indicate that objtool should rewrite
the instruction to a matching NOP.

For x86_64-allyesconfig this gives:

  jl\     NOP     JMP
  short:  22997   124
  long:   30874   90

IOW, we save (22997+124) * 3 bytes of kernel text in hotpaths.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210506194158.216763632@infradead.org
---
 arch/x86/include/asm/jump_label.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index ef819e33cfc16..0449b125d27f3 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -20,6 +20,22 @@
 	_ASM_PTR "%c0 + %c1 - .\n\t"			\
 	".popsection \n\t"
 
+#ifdef CONFIG_STACK_VALIDATION
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+	asm_volatile_goto("1:"
+		"jmp %l[l_yes] # objtool NOPs this \n\t"
+		JUMP_TABLE_ENTRY
+		: :  "i" (key), "i" (2 | branch) : : l_yes);
+
+	return false;
+l_yes:
+	return true;
+}
+
+#else
+
 static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
@@ -32,6 +48,8 @@ l_yes:
 	return true;
 }
 
+#endif /* STACK_VALIDATION */
+
 static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
 {
 	asm_volatile_goto("1:"
-- 
GitLab


From 7ea96eefb0097d243af62fc672be9f17b10338b3 Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Wed, 12 May 2021 11:43:52 +0200
Subject: [PATCH 0493/3804] block, bfq: avoid circular stable merges

BFQ may merge a new bfq_queue, stably, with the last bfq_queue
created. In particular, BFQ first waits a little bit for some I/O to
flow inside the new queue, say Q2, if this is needed to understand
whether it is better or worse to merge Q2 with the last queue created,
say Q1. This delayed stable merge is performed by assigning
bic->stable_merge_bfqq = Q1, for the bic associated with Q1.

Yet, while waiting for some I/O to flow in Q2, a non-stable queue
merge of Q2 with Q1 may happen, causing the bic previously associated
with Q2 to be associated with exactly Q1 (bic->bfqq = Q1). After that,
Q2 and Q1 may happen to be split, and, in the split, Q1 may happen to
be recycled as a non-shared bfq_queue. In that case, Q1 may then
happen to undergo a stable merge with the bfq_queue pointed by
bic->stable_merge_bfqq. Yet bic->stable_merge_bfqq still points to
Q1. So Q1 would be merged with itself.

This commit fixes this error by intercepting this situation, and
canceling the schedule of the stable merge.

Fixes: 430a67f9d616 ("block, bfq: merge bursts of newly-created queues")
Signed-off-by: Pietro Pedroni <pedroni.pietro.96@gmail.com>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Link: https://lore.kernel.org/r/20210512094352.85545-2-paolo.valente@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 59b2499d3f8be..acd1f881273e0 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -372,9 +372,38 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
 	return bic->bfqq[is_sync];
 }
 
+static void bfq_put_stable_ref(struct bfq_queue *bfqq);
+
 void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
 {
+	/*
+	 * If bfqq != NULL, then a non-stable queue merge between
+	 * bic->bfqq and bfqq is happening here. This causes troubles
+	 * in the following case: bic->bfqq has also been scheduled
+	 * for a possible stable merge with bic->stable_merge_bfqq,
+	 * and bic->stable_merge_bfqq == bfqq happens to
+	 * hold. Troubles occur because bfqq may then undergo a split,
+	 * thereby becoming eligible for a stable merge. Yet, if
+	 * bic->stable_merge_bfqq points exactly to bfqq, then bfqq
+	 * would be stably merged with itself. To avoid this anomaly,
+	 * we cancel the stable merge if
+	 * bic->stable_merge_bfqq == bfqq.
+	 */
 	bic->bfqq[is_sync] = bfqq;
+
+	if (bfqq && bic->stable_merge_bfqq == bfqq) {
+		/*
+		 * Actually, these same instructions are executed also
+		 * in bfq_setup_cooperator, in case of abort or actual
+		 * execution of a stable merge. We could avoid
+		 * repeating these instructions there too, but if we
+		 * did so, we would nest even more complexity in this
+		 * function.
+		 */
+		bfq_put_stable_ref(bic->stable_merge_bfqq);
+
+		bic->stable_merge_bfqq = NULL;
+	}
 }
 
 struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
@@ -2630,8 +2659,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
 static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd,
 					     struct bfq_queue *bfqq);
 
-static void bfq_put_stable_ref(struct bfq_queue *bfqq);
-
 /*
  * Attempt to schedule a merge of bfqq with the currently in-service
  * queue or with a close queue among the scheduled queues.  Return
-- 
GitLab


From 190515f610946db025cdedebde93958b725fb583 Mon Sep 17 00:00:00 2001
From: Lin Feng <linf@wangsu.com>
Date: Wed, 12 May 2021 18:01:24 +0800
Subject: [PATCH 0494/3804] blkdev.h: remove unused codes blk_account_rq

Last users of blk_account_rq gone with patch commit a1ce35fa49852db
("block: remove dead elevator code") and now it gets no caller, it can
be safely removed.

Signed-off-by: Lin Feng <linf@wangsu.com>
Link: https://lore.kernel.org/r/20210512100124.173769-1-linf@wangsu.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b91ba6207365b..26c3e368656f0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -677,11 +677,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
 
-static inline bool blk_account_rq(struct request *rq)
-{
-	return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
-}
-
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
 #define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ)
-- 
GitLab


From 2404b8747019184002823dba7d2f0ecf89d802b7 Mon Sep 17 00:00:00 2001
From: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Date: Tue, 11 May 2021 23:31:42 +0530
Subject: [PATCH 0495/3804] ACPI: PM: Add ACPI ID of Alder Lake Fan

Add a new unique fan ACPI device ID for Alder Lake to
support it in acpi_dev_pm_attach() function.

Fixes: 38748bcb940e ("ACPI: DPTF: Support Alder Lake")
Signed-off-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Cc: 5.10+ <stable@vger.kernel.org> # 5.10+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 096153761ebc3..58876248b1921 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1310,6 +1310,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on)
 		{"PNP0C0B", }, /* Generic ACPI fan */
 		{"INT3404", }, /* Fan */
 		{"INTC1044", }, /* Fan for Tiger Lake generation */
+		{"INTC1048", }, /* Fan for Alder Lake generation */
 		{}
 	};
 	struct acpi_device *adev = ACPI_COMPANION(dev);
-- 
GitLab


From f395183f9544ba2f56b25938d6ea7042bd873521 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 12 May 2021 07:38:00 -0700
Subject: [PATCH 0496/3804] f2fs: return EINVAL for hole cases in swap file

This tries to fix xfstests/generic/495.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 41e260680b27c..009a09fb9d88c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3896,7 +3896,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 		/* hole */
 		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
 			f2fs_err(sbi, "Swapfile has holes\n");
-			ret = -ENOENT;
+			ret = -EINVAL;
 			goto out;
 		}
 
@@ -4052,7 +4052,7 @@ out:
 	return ret;
 bad_bmap:
 	f2fs_err(sbi, "Swapfile has holes\n");
-	return -ENOENT;
+	return -EINVAL;
 }
 
 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
-- 
GitLab


From 79ebe9110fa458d58f1fceb078e2068d7ad37390 Mon Sep 17 00:00:00 2001
From: Sun Ke <sunke32@huawei.com>
Date: Wed, 12 May 2021 19:43:30 +0800
Subject: [PATCH 0497/3804] nbd: Fix NULL pointer in flush_workqueue

Open /dev/nbdX first, the config_refs will be 1 and
the pointers in nbd_device are still null. Disconnect
/dev/nbdX, then reference a null recv_workq. The
protection by config_refs in nbd_genl_disconnect is useless.

[  656.366194] BUG: kernel NULL pointer dereference, address: 0000000000000020
[  656.368943] #PF: supervisor write access in kernel mode
[  656.369844] #PF: error_code(0x0002) - not-present page
[  656.370717] PGD 10cc87067 P4D 10cc87067 PUD 1074b4067 PMD 0
[  656.371693] Oops: 0002 [#1] SMP
[  656.372242] CPU: 5 PID: 7977 Comm: nbd-client Not tainted 5.11.0-rc5-00040-g76c057c84d28 #1
[  656.373661] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
[  656.375904] RIP: 0010:mutex_lock+0x29/0x60
[  656.376627] Code: 00 0f 1f 44 00 00 55 48 89 fd 48 83 05 6f d7 fe 08 01 e8 7a c3 ff ff 48 83 05 6a d7 fe 08 01 31 c0 65 48 8b 14 25 00 6d 01 00 <f0> 48 0f b1 55 d
[  656.378934] RSP: 0018:ffffc900005eb9b0 EFLAGS: 00010246
[  656.379350] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[  656.379915] RDX: ffff888104cf2600 RSI: ffffffffaae8f452 RDI: 0000000000000020
[  656.380473] RBP: 0000000000000020 R08: 0000000000000000 R09: ffff88813bd6b318
[  656.381039] R10: 00000000000000c7 R11: fefefefefefefeff R12: ffff888102710b40
[  656.381599] R13: ffffc900005eb9e0 R14: ffffffffb2930680 R15: ffff88810770ef00
[  656.382166] FS:  00007fdf117ebb40(0000) GS:ffff88813bd40000(0000) knlGS:0000000000000000
[  656.382806] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  656.383261] CR2: 0000000000000020 CR3: 0000000100c84000 CR4: 00000000000006e0
[  656.383819] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  656.384370] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  656.384927] Call Trace:
[  656.385111]  flush_workqueue+0x92/0x6c0
[  656.385395]  nbd_disconnect_and_put+0x81/0xd0
[  656.385716]  nbd_genl_disconnect+0x125/0x2a0
[  656.386034]  genl_family_rcv_msg_doit.isra.0+0x102/0x1b0
[  656.386422]  genl_rcv_msg+0xfc/0x2b0
[  656.386685]  ? nbd_ioctl+0x490/0x490
[  656.386954]  ? genl_family_rcv_msg_doit.isra.0+0x1b0/0x1b0
[  656.387354]  netlink_rcv_skb+0x62/0x180
[  656.387638]  genl_rcv+0x34/0x60
[  656.387874]  netlink_unicast+0x26d/0x590
[  656.388162]  netlink_sendmsg+0x398/0x6c0
[  656.388451]  ? netlink_rcv_skb+0x180/0x180
[  656.388750]  ____sys_sendmsg+0x1da/0x320
[  656.389038]  ? ____sys_recvmsg+0x130/0x220
[  656.389334]  ___sys_sendmsg+0x8e/0xf0
[  656.389605]  ? ___sys_recvmsg+0xa2/0xf0
[  656.389889]  ? handle_mm_fault+0x1671/0x21d0
[  656.390201]  __sys_sendmsg+0x6d/0xe0
[  656.390464]  __x64_sys_sendmsg+0x23/0x30
[  656.390751]  do_syscall_64+0x45/0x70
[  656.391017]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

To fix it, just add if (nbd->recv_workq) to nbd_disconnect_and_put().

Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs")
Signed-off-by: Sun Ke <sunke32@huawei.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/20210512114331.1233964-2-sunke32@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 4ff71b579cfcc..974da561b8e5e 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1980,7 +1980,8 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
 	 * config ref and try to destroy the workqueue from inside the work
 	 * queue.
 	 */
-	flush_workqueue(nbd->recv_workq);
+	if (nbd->recv_workq)
+		flush_workqueue(nbd->recv_workq);
 	if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
 			       &nbd->config->runtime_flags))
 		nbd_config_put(nbd);
-- 
GitLab


From bedf78c4cbbbb65e42ede5ca2bd21887ef5b7060 Mon Sep 17 00:00:00 2001
From: Sun Ke <sunke32@huawei.com>
Date: Wed, 12 May 2021 19:43:31 +0800
Subject: [PATCH 0498/3804] nbd: share nbd_put and return by goto put_nbd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the following two statements by the statement “goto put_nbd;”

	nbd_put(nbd);
	return 0;

Signed-off-by: Sun Ke <sunke32@huawei.com>
Suggested-by: Markus Elfring <Markus.Elfring@web.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Link: https://lore.kernel.org/r/20210512114331.1233964-3-sunke32@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 974da561b8e5e..45d2c28c8fc83 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -2015,12 +2015,11 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 	mutex_unlock(&nbd_index_mutex);
-	if (!refcount_inc_not_zero(&nbd->config_refs)) {
-		nbd_put(nbd);
-		return 0;
-	}
+	if (!refcount_inc_not_zero(&nbd->config_refs))
+		goto put_nbd;
 	nbd_disconnect_and_put(nbd);
 	nbd_config_put(nbd);
+put_nbd:
 	nbd_put(nbd);
 	return 0;
 }
-- 
GitLab


From f8c8871f5eff3981eeb13421aca2c1cfda4a5204 Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Tue, 11 May 2021 17:13:33 -0400
Subject: [PATCH 0499/3804] regulator: fan53555: fix TCS4525 voltage calulation

The TCS4525 has 128 voltage steps. With the calculation set to 127 the
most significant bit is disregarded which leads to a miscalculation of
the voltage by about 200mv.

Fix the calculation to end deadlock on the rk3566-quartz64 which uses
this as the cpu regulator.

Fixes: 914df8faa7d6 ("regulator: fan53555: Add TCS4525 DCDC support")
Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Link: https://lore.kernel.org/r/20210511211335.2935163-2-pgwipeout@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53555.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index f3918f03aaf3d..26f06f685b1b6 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -55,7 +55,6 @@
 
 #define FAN53555_NVOLTAGES	64	/* Numbers of voltages */
 #define FAN53526_NVOLTAGES	128
-#define TCS4525_NVOLTAGES	127	/* Numbers of voltages */
 
 #define TCS_VSEL_NSEL_MASK	0x7f
 #define TCS_VSEL0_MODE		(1 << 7)
@@ -376,7 +375,7 @@ static int fan53555_voltages_setup_tcs(struct fan53555_device_info *di)
 	/* Init voltage range and step */
 	di->vsel_min = 600000;
 	di->vsel_step = 6250;
-	di->vsel_count = TCS4525_NVOLTAGES;
+	di->vsel_count = FAN53526_NVOLTAGES;
 
 	return 0;
 }
-- 
GitLab


From d4db69eba290732357f03ba0a14350b81f778290 Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Tue, 11 May 2021 17:13:33 -0400
Subject: [PATCH 0500/3804] regulator: fan53555: fix TCS4525 voltage calulation

The TCS4525 has 128 voltage steps. With the calculation set to 127 the
most significant bit is disregarded which leads to a miscalculation of
the voltage by about 200mv.

Fix the calculation to end deadlock on the rk3566-quartz64 which uses
this as the cpu regulator.

Fixes: 914df8faa7d6 ("regulator: fan53555: Add TCS4525 DCDC support")
Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Link: https://lore.kernel.org/r/20210511211335.2935163-2-pgwipeout@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53555.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index f3918f03aaf3d..26f06f685b1b6 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -55,7 +55,6 @@
 
 #define FAN53555_NVOLTAGES	64	/* Numbers of voltages */
 #define FAN53526_NVOLTAGES	128
-#define TCS4525_NVOLTAGES	127	/* Numbers of voltages */
 
 #define TCS_VSEL_NSEL_MASK	0x7f
 #define TCS_VSEL0_MODE		(1 << 7)
@@ -376,7 +375,7 @@ static int fan53555_voltages_setup_tcs(struct fan53555_device_info *di)
 	/* Init voltage range and step */
 	di->vsel_min = 600000;
 	di->vsel_step = 6250;
-	di->vsel_count = TCS4525_NVOLTAGES;
+	di->vsel_count = FAN53526_NVOLTAGES;
 
 	return 0;
 }
-- 
GitLab


From f9028dcdf589f4ab528372088623aa4e8d324df2 Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Tue, 11 May 2021 17:13:34 -0400
Subject: [PATCH 0501/3804] regulator: fan53555: only bind tcs4525 to correct
 chip id

The tcs4525 regulator has a chip id of <12>.
Only allow the driver to bind to the correct chip id for safety, in
accordance with the other supported devices.

Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Link: https://lore.kernel.org/r/20210511211335.2935163-3-pgwipeout@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53555.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index 26f06f685b1b6..16f28f9df6a1b 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -89,6 +89,10 @@ enum {
 	FAN53555_CHIP_ID_08 = 8,
 };
 
+enum {
+	TCS4525_CHIP_ID_12 = 12,
+};
+
 /* IC mask revision */
 enum {
 	FAN53555_CHIP_REV_00 = 0x3,
@@ -368,14 +372,21 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di)
 
 static int fan53555_voltages_setup_tcs(struct fan53555_device_info *di)
 {
-	di->slew_reg = TCS4525_TIME;
-	di->slew_mask = TCS_SLEW_MASK;
-	di->slew_shift = TCS_SLEW_MASK;
+	switch (di->chip_id) {
+	case TCS4525_CHIP_ID_12:
+		di->slew_reg = TCS4525_TIME;
+		di->slew_mask = TCS_SLEW_MASK;
+		di->slew_shift = TCS_SLEW_MASK;
 
-	/* Init voltage range and step */
-	di->vsel_min = 600000;
-	di->vsel_step = 6250;
-	di->vsel_count = FAN53526_NVOLTAGES;
+		/* Init voltage range and step */
+		di->vsel_min = 600000;
+		di->vsel_step = 6250;
+		di->vsel_count = FAN53526_NVOLTAGES;
+		break;
+	default:
+		dev_err(di->dev, "Chip ID %d not supported!\n", di->chip_id);
+		return -EINVAL;
+	}
 
 	return 0;
 }
-- 
GitLab


From b3cc8ec04f50d9c860534fe4e3617a8d10ed9ea9 Mon Sep 17 00:00:00 2001
From: Peter Geis <pgwipeout@gmail.com>
Date: Tue, 11 May 2021 17:13:35 -0400
Subject: [PATCH 0502/3804] regulator: fan53555: fix tcs4525 function names

The tcs4525 is based off the fan53526.
Rename the tcs4525 functions to align with this.

Signed-off-by: Peter Geis <pgwipeout@gmail.com>
Link: https://lore.kernel.org/r/20210511211335.2935163-4-pgwipeout@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53555.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index 16f28f9df6a1b..2695be617373c 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -67,7 +67,7 @@ enum fan53555_vendor {
 	FAN53526_VENDOR_FAIRCHILD = 0,
 	FAN53555_VENDOR_FAIRCHILD,
 	FAN53555_VENDOR_SILERGY,
-	FAN53555_VENDOR_TCS,
+	FAN53526_VENDOR_TCS,
 };
 
 enum {
@@ -233,7 +233,7 @@ static int fan53555_set_ramp(struct regulator_dev *rdev, int ramp)
 		slew_rate_t = slew_rates;
 		slew_rate_n = ARRAY_SIZE(slew_rates);
 		break;
-	case FAN53555_VENDOR_TCS:
+	case FAN53526_VENDOR_TCS:
 		slew_rate_t = tcs_slew_rates;
 		slew_rate_n = ARRAY_SIZE(tcs_slew_rates);
 		break;
@@ -370,7 +370,7 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di)
 	return 0;
 }
 
-static int fan53555_voltages_setup_tcs(struct fan53555_device_info *di)
+static int fan53526_voltages_setup_tcs(struct fan53555_device_info *di)
 {
 	switch (di->chip_id) {
 	case TCS4525_CHIP_ID_12:
@@ -420,7 +420,7 @@ static int fan53555_device_setup(struct fan53555_device_info *di,
 			return -EINVAL;
 		}
 		break;
-	case FAN53555_VENDOR_TCS:
+	case FAN53526_VENDOR_TCS:
 		switch (pdata->sleep_vsel_id) {
 		case FAN53555_VSEL_ID_0:
 			di->sleep_reg = TCS4525_VSEL0;
@@ -459,7 +459,7 @@ static int fan53555_device_setup(struct fan53555_device_info *di,
 		di->mode_reg = di->vol_reg;
 		di->mode_mask = VSEL_MODE;
 		break;
-	case FAN53555_VENDOR_TCS:
+	case FAN53526_VENDOR_TCS:
 		di->mode_reg = TCS4525_COMMAND;
 
 		switch (pdata->sleep_vsel_id) {
@@ -487,8 +487,8 @@ static int fan53555_device_setup(struct fan53555_device_info *di,
 	case FAN53555_VENDOR_SILERGY:
 		ret = fan53555_voltages_setup_silergy(di);
 		break;
-	case FAN53555_VENDOR_TCS:
-		ret = fan53555_voltages_setup_tcs(di);
+	case FAN53526_VENDOR_TCS:
+		ret = fan53526_voltages_setup_tcs(di);
 		break;
 	default:
 		dev_err(di->dev, "vendor %d not supported!\n", di->vendor);
@@ -563,7 +563,7 @@ static const struct of_device_id __maybe_unused fan53555_dt_ids[] = {
 		.data = (void *)FAN53555_VENDOR_SILERGY,
 	}, {
 		.compatible = "tcs,tcs4525",
-		.data = (void *)FAN53555_VENDOR_TCS
+		.data = (void *)FAN53526_VENDOR_TCS
 	},
 	{ }
 };
@@ -671,7 +671,7 @@ static const struct i2c_device_id fan53555_id[] = {
 		.driver_data = FAN53555_VENDOR_SILERGY
 	}, {
 		.name = "tcs4525",
-		.driver_data = FAN53555_VENDOR_TCS
+		.driver_data = FAN53526_VENDOR_TCS
 	},
 	{ },
 };
-- 
GitLab


From dbb5afad100a828c97e012c6106566d99f041db6 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 12 May 2021 15:33:08 +0200
Subject: [PATCH 0503/3804] ptrace: make ptrace() fail if the tracee changed
 its pid unexpectedly

Suppose we have 2 threads, the group-leader L and a sub-theread T,
both parked in ptrace_stop(). Debugger tries to resume both threads
and does

	ptrace(PTRACE_CONT, T);
	ptrace(PTRACE_CONT, L);

If the sub-thread T execs in between, the 2nd PTRACE_CONT doesn not
resume the old leader L, it resumes the post-exec thread T which was
actually now stopped in PTHREAD_EVENT_EXEC. In this case the
PTHREAD_EVENT_EXEC event is lost, and the tracer can't know that the
tracee changed its pid.

This patch makes ptrace() fail in this case until debugger does wait()
and consumes PTHREAD_EVENT_EXEC which reports old_pid. This affects all
ptrace requests except the "asynchronous" PTRACE_INTERRUPT/KILL.

The patch doesn't add the new PTRACE_ option to not complicate the API,
and I _hope_ this won't cause any noticeable regression:

	- If debugger uses PTRACE_O_TRACEEXEC and the thread did an exec
	  and the tracer does a ptrace request without having consumed
	  the exec event, it's 100% sure that the thread the ptracer
	  thinks it is targeting does not exist anymore, or isn't the
	  same as the one it thinks it is targeting.

	- To some degree this patch adds nothing new. In the scenario
	  above ptrace(L) can fail with -ESRCH if it is called after the
	  execing sub-thread wakes the leader up and before it "steals"
	  the leader's pid.

Test-case:

	#include <stdio.h>
	#include <unistd.h>
	#include <signal.h>
	#include <sys/ptrace.h>
	#include <sys/wait.h>
	#include <errno.h>
	#include <pthread.h>
	#include <assert.h>

	void *tf(void *arg)
	{
		execve("/usr/bin/true", NULL, NULL);
		assert(0);

		return NULL;
	}

	int main(void)
	{
		int leader = fork();
		if (!leader) {
			kill(getpid(), SIGSTOP);

			pthread_t th;
			pthread_create(&th, NULL, tf, NULL);
			for (;;)
				pause();

			return 0;
		}

		waitpid(leader, NULL, WSTOPPED);

		ptrace(PTRACE_SEIZE, leader, 0,
				PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC);
		waitpid(leader, NULL, 0);

		ptrace(PTRACE_CONT, leader, 0,0);
		waitpid(leader, NULL, 0);

		int status, thread = waitpid(-1, &status, 0);
		assert(thread > 0 && thread != leader);
		assert(status == 0x80137f);

		ptrace(PTRACE_CONT, thread, 0,0);
		/*
		 * waitid() because waitpid(leader, &status, WNOWAIT) does not
		 * report status. Why ????
		 *
		 * Why WEXITED? because we have another kernel problem connected
		 * to mt-exec.
		 */
		siginfo_t info;
		assert(waitid(P_PID, leader, &info, WSTOPPED|WEXITED|WNOWAIT) == 0);
		assert(info.si_pid == leader && info.si_status == 0x0405);

		/* OK, it sleeps in ptrace(PTRACE_EVENT_EXEC == 0x04) */
		assert(ptrace(PTRACE_CONT, leader, 0,0) == -1);
		assert(errno == ESRCH);

		assert(leader == waitpid(leader, &status, WNOHANG));
		assert(status == 0x04057f);

		assert(ptrace(PTRACE_CONT, leader, 0,0) == 0);

		return 0;
	}

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Simon Marchi <simon.marchi@efficios.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: Pedro Alves <palves@redhat.com>
Acked-by: Simon Marchi <simon.marchi@efficios.com>
Acked-by: Jan Kratochvil <jan.kratochvil@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/ptrace.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 76f09456ec4bc..2997ca600d186 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -170,6 +170,21 @@ void __ptrace_unlink(struct task_struct *child)
 	spin_unlock(&child->sighand->siglock);
 }
 
+static bool looks_like_a_spurious_pid(struct task_struct *task)
+{
+	if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP))
+		return false;
+
+	if (task_pid_vnr(task) == task->ptrace_message)
+		return false;
+	/*
+	 * The tracee changed its pid but the PTRACE_EVENT_EXEC event
+	 * was not wait()'ed, most probably debugger targets the old
+	 * leader which was destroyed in de_thread().
+	 */
+	return true;
+}
+
 /* Ensure that nothing can wake it up, even SIGKILL */
 static bool ptrace_freeze_traced(struct task_struct *task)
 {
@@ -180,7 +195,8 @@ static bool ptrace_freeze_traced(struct task_struct *task)
 		return ret;
 
 	spin_lock_irq(&task->sighand->siglock);
-	if (task_is_traced(task) && !__fatal_signal_pending(task)) {
+	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
+	    !__fatal_signal_pending(task)) {
 		task->state = __TASK_TRACED;
 		ret = true;
 	}
-- 
GitLab


From 85428beac80dbcace5b146b218697c73e367dcf5 Mon Sep 17 00:00:00 2001
From: Daniel Wagner <dwagner@suse.de>
Date: Wed, 12 May 2021 16:50:05 +0200
Subject: [PATCH 0504/3804] nvmet: seset ns->file when open fails

Reset the ns->file value to NULL also in the error case in
nvmet_file_ns_enable().

The ns->file variable points either to file object or contains the
error code after the filp_open() call. This can lead to following
problem:

When the user first setups an invalid file backend and tries to enable
the ns, it will fail. Then the user switches over to a bdev backend
and enables successfully the ns. The first received I/O will crash the
system because the IO backend is chosen based on the ns->file value:

static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
{
	[...]

	if (req->ns->file)
		return nvmet_file_parse_io_cmd(req);

	return nvmet_bdev_parse_io_cmd(req);
}

Reported-by: Enzo Matsumiya <ematsumiya@suse.com>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-file.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 715d4376c9979..7fdbdc496597d 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -49,9 +49,11 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
 
 	ns->file = filp_open(ns->device_path, flags, 0);
 	if (IS_ERR(ns->file)) {
-		pr_err("failed to open file %s: (%ld)\n",
-				ns->device_path, PTR_ERR(ns->file));
-		return PTR_ERR(ns->file);
+		ret = PTR_ERR(ns->file);
+		pr_err("failed to open file %s: (%d)\n",
+			ns->device_path, ret);
+		ns->file = NULL;
+		return ret;
 	}
 
 	ret = nvmet_file_ns_revalidate(ns);
-- 
GitLab


From 4819d16d91145966ce03818a95169df1fd56b299 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 21 Apr 2021 18:33:58 +0300
Subject: [PATCH 0505/3804] drm/i915: Avoid div-by-zero on gen2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gen2 tiles are 2KiB in size so i915_gem_object_get_tile_row_size()
can in fact return <4KiB, which leads to div-by-zero here.
Avoid that.

Not sure i915_gem_object_get_tile_row_size() is entirely
sane anyway since it doesn't account for the different tile
layouts on i8xx/i915...

I'm not able to hit this before commit 6846895fde05 ("drm/i915:
Replace PIN_NONFAULT with calls to PIN_NOEVICT") and it looks
like I also need to run recent version of Mesa. With those in
place xonotic trips on this quite easily on my 85x.

Cc: stable@vger.kernel.org
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210421153401.13847-2-ville.syrjala@linux.intel.com
(cherry picked from commit ed52c62d386f764194e0184fdb905d5f24194cae)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 23f6b00e08e21..f6fe5cb014382 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -189,7 +189,7 @@ compute_partial_view(const struct drm_i915_gem_object *obj,
 	struct i915_ggtt_view view;
 
 	if (i915_gem_object_is_tiled(obj))
-		chunk = roundup(chunk, tile_row_pages(obj));
+		chunk = roundup(chunk, tile_row_pages(obj) ?: 1);
 
 	view.type = I915_GGTT_VIEW_PARTIAL;
 	view.partial.offset = rounddown(page_offset, chunk);
-- 
GitLab


From 04d019961fd15de92874575536310243a0d4c5c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 21 Apr 2021 18:33:59 +0300
Subject: [PATCH 0506/3804] drm/i915: Read C0DRB3/C1DRB3 as 16 bits again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We've defined C0DRB3/C1DRB3 as 16 bit registers, so access them
as such.

Fixes: 1c8242c3a4b2 ("drm/i915: Use unchecked writes for setting up the fences")
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210421153401.13847-3-ville.syrjala@linux.intel.com
(cherry picked from commit f765a5b48c667bdada5e49d5e0f23f8c0687b21b)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
index e72b7a0dc316e..8a322594210c4 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -653,8 +653,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
 		 * banks of memory are paired and unswizzled on the
 		 * uneven portion, so leave that as unknown.
 		 */
-		if (intel_uncore_read(uncore, C0DRB3) ==
-		    intel_uncore_read(uncore, C1DRB3)) {
+		if (intel_uncore_read16(uncore, C0DRB3) ==
+		    intel_uncore_read16(uncore, C1DRB3)) {
 			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
 			swizzle_y = I915_BIT_6_SWIZZLE_9;
 		}
-- 
GitLab


From ea995218dddba171fecd05496c69617c5ef3c5b8 Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Mon, 26 Apr 2021 05:43:40 -0700
Subject: [PATCH 0507/3804] drm/i915/gt: Fix a double free in
 gen8_preallocate_top_level_pdp

Our code analyzer reported a double free bug.

In gen8_preallocate_top_level_pdp, pde and pde->pt.base are allocated
via alloc_pd(vm) with one reference. If pin_pt_dma() failed, pde->pt.base
is freed by i915_gem_object_put() with a reference dropped. Then free_pd
calls free_px() defined in intel_ppgtt.c, which calls i915_gem_object_put()
to put pde->pt.base again.

As pde->pt.base is protected by refcount, so the second put will not free
pde->pt.base actually. But, maybe it is better to remove the first put?

Fixes: 82adf901138cc ("drm/i915/gt: Shrink i915_page_directory's slab bucket")
Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210426124340.4238-1-lyl2019@mail.ustc.edu.cn
(cherry picked from commit ac69496fe65cca0611d5917b7d232730ff605bc7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 176c19633412f..74bf6fc8461fe 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -641,7 +641,6 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
 
 		err = pin_pt_dma(vm, pde->pt.base);
 		if (err) {
-			i915_gem_object_put(pde->pt.base);
 			free_pd(vm, pde);
 			return err;
 		}
-- 
GitLab


From 402be8a101190969fc7ff122d07e262df86e132b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= <marcheu@chromium.org>
Date: Thu, 29 Apr 2021 03:10:21 +0000
Subject: [PATCH 0508/3804] drm/i915: Fix crash in auto_retire
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The retire logic uses the 2 lower bits of the pointer to the retire
function to store flags. However, the auto_retire function is not
guaranteed to be aligned to a multiple of 4, which causes crashes as
we jump to the wrong address, for example like this:

2021-04-24T18:03:53.804300Z WARNING kernel: [  516.876901] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
2021-04-24T18:03:53.804310Z WARNING kernel: [  516.876906] CPU: 7 PID: 146 Comm: kworker/u16:6 Tainted: G     U            5.4.105-13595-g3cd84167b2df #1
2021-04-24T18:03:53.804311Z WARNING kernel: [  516.876907] Hardware name: Google Volteer2/Volteer2, BIOS Google_Volteer2.13672.76.0 02/22/2021
2021-04-24T18:03:53.804312Z WARNING kernel: [  516.876911] Workqueue: events_unbound active_work
2021-04-24T18:03:53.804313Z WARNING kernel: [  516.876914] RIP: 0010:auto_retire+0x1/0x20
2021-04-24T18:03:53.804314Z WARNING kernel: [  516.876916] Code: e8 01 f2 ff ff eb 02 31 db 48 89 d8 5b 5d c3 0f 1f 44 00 00 55 48 89 e5 f0 ff 87 c8 00 00 00 0f 88 ab 47 4a 00 31 c0 5d c3 0f <1f> 44 00 00 55 48 89 e5 f0 ff 8f c8 00 00 00 0f 88 9a 47 4a 00 74
2021-04-24T18:03:53.804319Z WARNING kernel: [  516.876918] RSP: 0018:ffff9b4d809fbe38 EFLAGS: 00010286
2021-04-24T18:03:53.804320Z WARNING kernel: [  516.876919] RAX: 0000000000000007 RBX: ffff927915079600 RCX: 0000000000000007
2021-04-24T18:03:53.804320Z WARNING kernel: [  516.876921] RDX: ffff9b4d809fbe40 RSI: 0000000000000286 RDI: ffff927915079600
2021-04-24T18:03:53.804321Z WARNING kernel: [  516.876922] RBP: ffff9b4d809fbe68 R08: 8080808080808080 R09: fefefefefefefeff
2021-04-24T18:03:53.804321Z WARNING kernel: [  516.876924] R10: 0000000000000010 R11: ffffffff92e44bd8 R12: ffff9279150796a0
2021-04-24T18:03:53.804322Z WARNING kernel: [  516.876925] R13: ffff92791c368180 R14: ffff927915079640 R15: 000000001c867605
2021-04-24T18:03:53.804323Z WARNING kernel: [  516.876926] FS:  0000000000000000(0000) GS:ffff92791ffc0000(0000) knlGS:0000000000000000
2021-04-24T18:03:53.804323Z WARNING kernel: [  516.876928] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
2021-04-24T18:03:53.804324Z WARNING kernel: [  516.876929] CR2: 0000239514955000 CR3: 00000007f82da001 CR4: 0000000000760ee0
2021-04-24T18:03:53.804325Z WARNING kernel: [  516.876930] PKRU: 55555554
2021-04-24T18:03:53.804325Z WARNING kernel: [  516.876931] Call Trace:
2021-04-24T18:03:53.804326Z WARNING kernel: [  516.876935]  __active_retire+0x77/0xcf
2021-04-24T18:03:53.804326Z WARNING kernel: [  516.876939]  process_one_work+0x1da/0x394
2021-04-24T18:03:53.804327Z WARNING kernel: [  516.876941]  worker_thread+0x216/0x375
2021-04-24T18:03:53.804327Z WARNING kernel: [  516.876944]  kthread+0x147/0x156
2021-04-24T18:03:53.804335Z WARNING kernel: [  516.876946]  ? pr_cont_work+0x58/0x58
2021-04-24T18:03:53.804335Z WARNING kernel: [  516.876948]  ? kthread_blkcg+0x2e/0x2e
2021-04-24T18:03:53.804336Z WARNING kernel: [  516.876950]  ret_from_fork+0x1f/0x40
2021-04-24T18:03:53.804336Z WARNING kernel: [  516.876952] Modules linked in: cdc_mbim cdc_ncm cdc_wdm xt_cgroup rfcomm cmac algif_hash algif_skcipher af_alg xt_MASQUERADE uinput snd_soc_rt5682_sdw snd_soc_rt5682 snd_soc_max98373_sdw snd_soc_max98373 snd_soc_rl6231 regmap_sdw snd_soc_sof_sdw snd_soc_hdac_hdmi snd_soc_dmic snd_hda_codec_hdmi snd_sof_pci snd_sof_intel_hda_common intel_ipu6_psys snd_sof_xtensa_dsp soundwire_intel soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda snd_sof snd_soc_hdac_hda snd_soc_acpi_intel_match snd_soc_acpi snd_hda_ext_core soundwire_bus snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core intel_ipu6_isys videobuf2_dma_contig videobuf2_v4l2 videobuf2_common videobuf2_memops mei_hdcp intel_ipu6 ov2740 ov8856 at24 sx9310 dw9768 v4l2_fwnode cros_ec_typec intel_pmc_mux roles acpi_als typec fuse iio_trig_sysfs cros_ec_light_prox cros_ec_lid_angle cros_ec_sensors cros_ec_sensors_core industrialio_triggered_buffer cros_ec_sensors_ring kfifo_buf industrialio cros_ec_sensorhub
2021-04-24T18:03:53.804337Z WARNING kernel: [  516.876972]  cdc_ether usbnet iwlmvm lzo_rle lzo_compress iwl7000_mac80211 iwlwifi zram cfg80211 r8152 mii btusb btrtl btintel btbcm bluetooth ecdh_generic ecc joydev
2021-04-24T18:03:53.804337Z EMERG kernel: [  516.879169] gsmi: Log Shutdown Reason 0x03

This change fixes this by aligning the function.

Signed-off-by: Stéphane Marchesin <marcheu@chromium.org>
Fixes: 229007e02d69 ("drm/i915: Wrap i915_active in a simple kreffed struct")
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210429031021.1218091-1-marcheu@chromium.org
(cherry picked from commit ca419f407b43cc89942ebc297c7a63d94abbcae4)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_active.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index cf9a3d384971f..aa573b078ae75 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -1156,7 +1156,8 @@ static int auto_active(struct i915_active *ref)
 	return 0;
 }
 
-static void auto_retire(struct i915_active *ref)
+__i915_active_call static void
+auto_retire(struct i915_active *ref)
 {
 	i915_active_put(ref);
 }
-- 
GitLab


From a915fe5e9601c632417ef5261af70788d7d23a8a Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Date: Thu, 29 Apr 2021 09:35:29 +0100
Subject: [PATCH 0509/3804] drm/i915/overlay: Fix active retire callback
 alignment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

__i915_active_call annotation is required on the retire callback to ensure
correct function alignment.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Fixes: a21ce8ad12d2 ("drm/i915/overlay: Switch to using i915_active tracking")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210429083530.849546-1-tvrtko.ursulin@linux.intel.com
(cherry picked from commit d8e44e4dd221ee283ea60a6fb87bca08807aa0ab)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_overlay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index e5dadde422f74..bbaf05515e883 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -383,7 +383,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
 		i830_overlay_clock_gating(dev_priv, true);
 }
 
-static void
+__i915_active_call static void
 intel_overlay_last_flip_retire(struct i915_active *active)
 {
 	struct intel_overlay *overlay =
-- 
GitLab


From e4527420ed087f99c6aa2ac22c6d3458c7dc1a94 Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Tue, 11 May 2021 17:39:30 +0530
Subject: [PATCH 0510/3804] drm/i915: Use correct downstream caps for check
 Src-Ctl mode for PCON

Fix the typo in DPCD caps used for checking SRC CTL mode of
HDMI2.1 PCON

v2: Corrected Fixes tag (Jani Nikula).
v3: Rebased.

Fixes: 04b6603d13be ("drm/i915/display: Configure HDMI2.1 Pcon for FRL only if Src-Ctl mode is available")

Cc: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Cc: Uma Shankar <uma.shankar@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: "Ville Syrj_l_" <ville.syrjala@linux.intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: Manasi Navare <manasi.d.navare@intel.com>
Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Sean Paul <seanpaul@chromium.org>

Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Swati Sharma <swati2.sharma@intel.com>
Signed-off-by: Anshuman Gupta <anshuman.gupta@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210511120930.12218-1-ankit.k.nautiyal@intel.com
(cherry picked from commit 88a9c5485c48ab60c89612a17fc89f4162bbdb9d)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 1e026177ed1ba..642c60f3d9b18 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2111,7 +2111,7 @@ void intel_dp_check_frl_training(struct intel_dp *intel_dp)
 	 * -PCON supports SRC_CTL_MODE (VESA DP2.0-HDMI2.1 PCON Spec Draft-1 Sec-7)
 	 * -sink is HDMI2.1
 	 */
-	if (!(intel_dp->dpcd[2] & DP_PCON_SOURCE_CTL_MODE) ||
+	if (!(intel_dp->downstream_ports[2] & DP_PCON_SOURCE_CTL_MODE) ||
 	    !intel_dp_is_hdmi_2_1_sink(intel_dp) ||
 	    intel_dp->frl.is_trained)
 		return;
-- 
GitLab


From cc00c1988801dc71f63bb7bad019e85046865095 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 12 May 2021 19:51:31 +0200
Subject: [PATCH 0511/3804] sched: Fix leftover comment typos

A few more snuck in. Also capitalize 'CPU' while at it.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched_clock.h | 2 +-
 kernel/sched/core.c         | 4 ++--
 kernel/sched/fair.c         | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index 528718e4ed528..835ee87ed7922 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -14,7 +14,7 @@
  * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
  *			clocks.
  * @read_sched_clock:	Current clock source (or dummy source when suspended).
- * @mult:		Multipler for scaled math conversion.
+ * @mult:		Multiplier for scaled math conversion.
  * @shift:		Shift value for scaled math conversion.
  *
  * Care must be taken when updating this structure; it is read by
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9d00f4958bde7..ac8882da5daf0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5506,7 +5506,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	}
 
 	/*
-	 * Try and select tasks for each sibling in decending sched_class
+	 * Try and select tasks for each sibling in descending sched_class
 	 * order.
 	 */
 	for_each_class(class) {
@@ -5520,7 +5520,7 @@ again:
 
 			/*
 			 * If this sibling doesn't yet have a suitable task to
-			 * run; ask for the most elegible task, given the
+			 * run; ask for the most eligible task, given the
 			 * highest priority task already selected for this
 			 * core.
 			 */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2635e10484213..161b92aa1c797 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10808,11 +10808,11 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
 	 * sched_slice() considers only this active rq and it gets the
 	 * whole slice. But during force idle, we have siblings acting
 	 * like a single runqueue and hence we need to consider runnable
-	 * tasks on this cpu and the forced idle cpu. Ideally, we should
+	 * tasks on this CPU and the forced idle CPU. Ideally, we should
 	 * go through the forced idle rq, but that would be a perf hit.
-	 * We can assume that the forced idle cpu has atleast
+	 * We can assume that the forced idle CPU has at least
 	 * MIN_NR_TASKS_DURING_FORCEIDLE - 1 tasks and use that to check
-	 * if we need to give up the cpu.
+	 * if we need to give up the CPU.
 	 */
 	if (rq->core->core_forceidle && rq->cfs.nr_running == 1 &&
 	    __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
-- 
GitLab


From c43426334b3169b6c9e6855483aa7384ff09fd33 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 12 May 2021 19:58:31 +0200
Subject: [PATCH 0512/3804] x86: Fix leftover comment typos

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/hyperv/hv_init.c             | 2 +-
 arch/x86/include/asm/sgx.h            | 2 +-
 arch/x86/include/asm/stackprotector.h | 2 +-
 arch/x86/kernel/kprobes/core.c        | 2 +-
 arch/x86/kvm/mmu/mmu.c                | 2 +-
 arch/x86/kvm/mmu/tdp_mmu.c            | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index bb0ae4b5c00f1..256ad0e34dd23 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -623,7 +623,7 @@ bool hv_query_ext_cap(u64 cap_query)
 	 * output parameter to the hypercall below and so it should be
 	 * compatible with 'virt_to_phys'. Which means, it's address should be
 	 * directly mapped. Use 'static' to keep it compatible; stack variables
-	 * can be virtually mapped, making them imcompatible with
+	 * can be virtually mapped, making them incompatible with
 	 * 'virt_to_phys'.
 	 * Hypercall input/output addresses should also be 8-byte aligned.
 	 */
diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 9c31e0ebc55b1..05f3e21f01a74 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -13,7 +13,7 @@
 /*
  * This file contains both data structures defined by SGX architecture and Linux
  * defined software data structures and functions.  The two should not be mixed
- * together for better readibility.  The architectural definitions come first.
+ * together for better readability.  The architectural definitions come first.
  */
 
 /* The SGX specific CPUID function. */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index b6ffe58c70fab..24a8d6c4fb185 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -11,7 +11,7 @@
  * The same segment is shared by percpu area and stack canary.  On
  * x86_64, percpu symbols are zero based and %gs (64-bit) points to the
  * base of percpu area.  The first occupant of the percpu area is always
- * fixed_percpu_data which contains stack_canary at the approproate
+ * fixed_percpu_data which contains stack_canary at the appropriate
  * offset.  On x86_32, the stack canary is just a regular percpu
  * variable.
  *
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index d3d65545cb8b7..7c4d0736a9987 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -674,7 +674,7 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn)
 			break;
 
 		if (insn->addr_bytes != sizeof(unsigned long))
-			return -EOPNOTSUPP;	/* Don't support differnt size */
+			return -EOPNOTSUPP;	/* Don't support different size */
 		if (X86_MODRM_MOD(opcode) != 3)
 			return -EOPNOTSUPP;	/* TODO: support memory addressing */
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0144c40d09c76..5e60b00e8e500 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2374,7 +2374,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
 	 * page is available, while the caller may end up allocating as many as
 	 * four pages, e.g. for PAE roots or for 5-level paging.  Temporarily
 	 * exceeding the (arbitrary by default) limit will not harm the host,
-	 * being too agressive may unnecessarily kill the guest, and getting an
+	 * being too aggressive may unnecessarily kill the guest, and getting an
 	 * exact count is far more trouble than it's worth, especially in the
 	 * page fault paths.
 	 */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 95eeb5ac6a8a7..e09cb1f978009 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1017,7 +1017,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 
 		if (!is_shadow_present_pte(iter.old_spte)) {
 			/*
-			 * If SPTE has been forzen by another thread, just
+			 * If SPTE has been frozen by another thread, just
 			 * give up and retry, avoiding unnecessary page table
 			 * allocation and free.
 			 */
-- 
GitLab


From 93d0955e6cf562d02aae37f5f8d98d9d9d16e0d4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 12 May 2021 20:04:28 +0200
Subject: [PATCH 0513/3804] locking: Fix comment typos

A few snuck through.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep_types.h |  2 +-
 kernel/futex.c                | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index 2ec9ff5a7fff0..3e726ace5c621 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -52,7 +52,7 @@ enum lockdep_lock_type {
  * NR_LOCKDEP_CACHING_CLASSES ... Number of classes
  * cached in the instance of lockdep_map
  *
- * Currently main class (subclass == 0) and signle depth subclass
+ * Currently main class (subclass == 0) and single depth subclass
  * are cached in lockdep_map. This optimization is mainly targeting
  * on rq->lock. double_rq_lock() acquires this highly competitive with
  * single depth.
diff --git a/kernel/futex.c b/kernel/futex.c
index 4938a00bc7857..2f386f0129001 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1874,7 +1874,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
 	 * If the caller intends to requeue more than 1 waiter to pifutex,
 	 * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
 	 * as we have means to handle the possible fault.  If not, don't set
-	 * the bit unecessarily as it will force the subsequent unlock to enter
+	 * the bit unnecessarily as it will force the subsequent unlock to enter
 	 * the kernel.
 	 */
 	top_waiter = futex_top_waiter(hb1, key1);
@@ -2103,7 +2103,7 @@ retry_private:
 			continue;
 
 		/*
-		 * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
+		 * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
 		 * be paired with each other and no other futex ops.
 		 *
 		 * We should never be requeueing a futex_q with a pi_state,
@@ -2318,7 +2318,7 @@ retry:
 }
 
 /*
- * PI futexes can not be requeued and must remove themself from the
+ * PI futexes can not be requeued and must remove themselves from the
  * hash bucket. The hash bucket lock (i.e. lock_ptr) is held.
  */
 static void unqueue_me_pi(struct futex_q *q)
@@ -2903,7 +2903,7 @@ no_block:
 	 */
 	res = fixup_owner(uaddr, &q, !ret);
 	/*
-	 * If fixup_owner() returned an error, proprogate that.  If it acquired
+	 * If fixup_owner() returned an error, propagate that.  If it acquired
 	 * the lock, clear our -ETIMEDOUT or -EINTR.
 	 */
 	if (res)
@@ -3280,7 +3280,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 		 */
 		res = fixup_owner(uaddr2, &q, !ret);
 		/*
-		 * If fixup_owner() returned an error, proprogate that.  If it
+		 * If fixup_owner() returned an error, propagate that.  If it
 		 * acquired the lock, clear -ETIMEDOUT or -EINTR.
 		 */
 		if (res)
@@ -3678,7 +3678,7 @@ void futex_exec_release(struct task_struct *tsk)
 {
 	/*
 	 * The state handling is done for consistency, but in the case of
-	 * exec() there is no way to prevent futher damage as the PID stays
+	 * exec() there is no way to prevent further damage as the PID stays
 	 * the same. But for the unlikely and arguably buggy case that a
 	 * futex is held on exec(), this provides at least as much state
 	 * consistency protection which is possible.
-- 
GitLab


From ca0760e7d79e2bb9c342e6b3f925b1ef01c6303e Mon Sep 17 00:00:00 2001
From: Wei Ming Chen <jj251510319013@gmail.com>
Date: Thu, 6 May 2021 20:30:51 +0800
Subject: [PATCH 0514/3804] Compiler Attributes: Add continue in comment

Add "continue;" for switch/case block according to Doc[1]

[1] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through

Signed-off-by: Wei Ming Chen <jj251510319013@gmail.com>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 include/linux/compiler_attributes.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index c043b8d2b17bf..183ddd5fd0724 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -199,6 +199,7 @@
  * must end with any of these keywords:
  *   break;
  *   fallthrough;
+ *   continue;
  *   goto <label>;
  *   return [expression];
  *
-- 
GitLab


From c7d84e7ff5a651d186a6ec41361c4f07acc2fb9c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Tue, 27 Apr 2021 10:53:27 -0300
Subject: [PATCH 0515/3804] selftests: futex: Correctly include headers dirs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building selftests, the build system will install uapi linux
headers at usr/include in kernel source's root directory. When building
with a different output folder, the headers will be installed at
kselftests/usr/include.

Add both paths so we can build the tests using up-to-date headers.

Currently, this is uncommon to happen since it's rare to find a
build system with an outdated futex header, but it happens
when testing new futex operations.

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210427135328.11013-2-andrealmeid@collabora.com
---
 tools/testing/selftests/futex/functional/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 23207829ec752..1d2b3b2a5b86b 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-INCLUDES := -I../include -I../../
+INCLUDES := -I../include -I../../ -I../../../../../usr/include/ \
+	-I$(KBUILD_OUTPUT)/kselftest/usr/include
 CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
 LDLIBS := -lpthread -lrt
 
-- 
GitLab


From f4addd54b1617067f735ad194a3580a2db7b8bf5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Tue, 27 Apr 2021 10:53:28 -0300
Subject: [PATCH 0516/3804] selftests: futex: Expand timeout test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Improve futex timeout testing by checking all the operations that
supports timeout and their available modes.

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210427135328.11013-3-andrealmeid@collabora.com
---
 .../futex/functional/futex_wait_timeout.c     | 126 +++++++++++++++---
 1 file changed, 110 insertions(+), 16 deletions(-)

diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index ee55e6d389a3f..1f8f6daaf1e70 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -11,21 +11,18 @@
  *
  * HISTORY
  *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *      2021-Apr-26: More test cases by André Almeida <andrealmeid@collabora.com>
  *
  *****************************************************************************/
 
-#include <errno.h>
-#include <getopt.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+#include <pthread.h>
 #include "futextest.h"
 #include "logging.h"
 
 #define TEST_NAME "futex-wait-timeout"
 
 static long timeout_ns = 100000;	/* 100us default timeout */
+static futex_t futex_pi;
 
 void usage(char *prog)
 {
@@ -37,11 +34,67 @@ void usage(char *prog)
 	       VQUIET, VCRITICAL, VINFO);
 }
 
+/*
+ * Get a PI lock and hold it forever, so the main thread lock_pi will block
+ * and we can test the timeout
+ */
+void *get_pi_lock(void *arg)
+{
+	int ret;
+	volatile futex_t lock = 0;
+
+	ret = futex_lock_pi(&futex_pi, NULL, 0, 0);
+	if (ret != 0)
+		error("futex_lock_pi failed\n", ret);
+
+	/* Blocks forever */
+	ret = futex_wait(&lock, 0, NULL, 0);
+	error("futex_wait failed\n", ret);
+
+	return NULL;
+}
+
+/*
+ * Check if the function returned the expected error
+ */
+static void test_timeout(int res, int *ret, char *test_name, int err)
+{
+	if (!res || errno != err) {
+		ksft_test_result_fail("%s returned %d\n", test_name,
+				      res < 0 ? errno : res);
+		*ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("%s succeeds\n", test_name);
+	}
+}
+
+/*
+ * Calculate absolute timeout and correct overflow
+ */
+static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
+				 long timeout_ns)
+{
+	if (clock_gettime(clockid, to)) {
+		error("clock_gettime failed\n", errno);
+		return errno;
+	}
+
+	to->tv_nsec += timeout_ns;
+
+	if (to->tv_nsec >= 1000000000) {
+		to->tv_sec++;
+		to->tv_nsec -= 1000000000;
+	}
+
+	return 0;
+}
+
 int main(int argc, char *argv[])
 {
 	futex_t f1 = FUTEX_INITIALIZER;
-	struct timespec to;
 	int res, ret = RET_PASS;
+	struct timespec to;
+	pthread_t thread;
 	int c;
 
 	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
@@ -65,22 +118,63 @@ int main(int argc, char *argv[])
 	}
 
 	ksft_print_header();
-	ksft_set_plan(1);
+	ksft_set_plan(7);
 	ksft_print_msg("%s: Block on a futex and wait for timeout\n",
 	       basename(argv[0]));
 	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
 
-	/* initialize timeout */
+	pthread_create(&thread, NULL, get_pi_lock, NULL);
+
+	/* initialize relative timeout */
 	to.tv_sec = 0;
 	to.tv_nsec = timeout_ns;
 
-	info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
-	res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
-	if (!res || errno != ETIMEDOUT) {
-		fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
-		ret = RET_FAIL;
-	}
+	res = futex_wait(&f1, f1, &to, 0);
+	test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT);
+
+	/* FUTEX_WAIT_BITSET with CLOCK_REALTIME */
+	if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+		return RET_FAIL;
+	res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME);
+	test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT);
+
+	/* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */
+	if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+		return RET_FAIL;
+	res = futex_wait_bitset(&f1, f1, &to, 1, 0);
+	test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT);
+
+	/* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */
+	if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+		return RET_FAIL;
+	res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME);
+	test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT);
+
+	/* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */
+	if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+		return RET_FAIL;
+	res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
+	test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
+	/*
+	 * FUTEX_LOCK_PI with CLOCK_REALTIME
+	 * Due to historical reasons, FUTEX_LOCK_PI supports only realtime
+	 * clock, but requires the caller to not set CLOCK_REALTIME flag.
+	 *
+	 * If you call FUTEX_LOCK_PI with a monotonic clock, it'll be
+	 * interpreted as a realtime clock, and (unless you mess your machine's
+	 * time or your time machine) the monotonic clock value is always
+	 * smaller than realtime and the syscall will timeout immediately.
+	 */
+	if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+		return RET_FAIL;
+	res = futex_lock_pi(&futex_pi, &to, 0, 0);
+	test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT);
+
+	/* Test operations that don't support FUTEX_CLOCK_REALTIME */
+	res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
+	test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
 
-	print_result(TEST_NAME, ret);
+	ksft_print_cnts();
 	return ret;
 }
-- 
GitLab


From 46c7405df7de8deb97229eacebcee96d61415f3f Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 12 May 2021 19:42:10 +0200
Subject: [PATCH 0517/3804] objtool: Fix elf_create_undef_symbol() endianness

Currently x86 cross-compilation fails on big endian system with:

  x86_64-cross-ld: init/main.o: invalid string offset 488112128 >= 6229 for section `.strtab'

Mark new ELF data in elf_create_undef_symbol() as symbol, so that libelf
does endianness handling correctly.

Fixes: 2f2f7e47f052 ("objtool: Add elf_create_undef_symbol()")
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: https://lore.kernel.org/r/patch-1.thread-6c9df9.git-d39264656387.your-ad-here.call-01620841104-ext-2554@work.hours
---
 tools/objtool/elf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index d08f5f3670f88..743c2e9d0f564 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -762,6 +762,7 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
 	data->d_buf = &sym->sym;
 	data->d_size = sizeof(sym->sym);
 	data->d_align = 1;
+	data->d_type = ELF_T_SYM;
 
 	sym->idx = symtab->len / sizeof(sym->sym);
 
-- 
GitLab


From f66c05d6baf36069c01a02f869bebb75586f2318 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 12 May 2021 19:42:13 +0200
Subject: [PATCH 0518/3804] objtool/x86: Fix elf_add_alternative() endianness

Currently x86 kernel cross-compiled on big endian system fails at boot with:

  kernel BUG at arch/x86/kernel/alternative.c:258!

Corresponding bug condition look like the following:

  BUG_ON(feature >= (NCAPINTS + NBUGINTS) * 32);

Fix that by converting alternative feature/cpuid to target endianness.

Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: https://lore.kernel.org/r/patch-2.thread-6c9df9.git-6c9df9a8098d.your-ad-here.call-01620841104-ext-2554@work.hours
---
 tools/objtool/arch/x86/decode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index cedf3ede75455..24295d39713b2 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -19,6 +19,7 @@
 #include <objtool/elf.h>
 #include <objtool/arch.h>
 #include <objtool/warn.h>
+#include <objtool/endianness.h>
 #include <arch/elf.h>
 
 static int is_x86_64(const struct elf *elf)
@@ -725,7 +726,7 @@ static int elf_add_alternative(struct elf *elf,
 		return -1;
 	}
 
-	alt->cpuid = cpuid;
+	alt->cpuid = bswap_if_needed(cpuid);
 	alt->instrlen = orig_len;
 	alt->replacementlen = repl_len;
 
-- 
GitLab


From 01aee8fd7fb23049e2b52abadbe1f7b5e94a52d2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 22 Apr 2021 23:02:25 +0300
Subject: [PATCH 0519/3804] sched: Make nr_running() return 32-bit value

Creating 2**32 tasks is impossible due to futex pid limits and wasteful
anyway. Nobody has done it.

Bring nr_running() into 32-bit world to save on REX prefixes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210422200228.1423391-1-adobriyan@gmail.com
---
 fs/proc/loadavg.c          | 2 +-
 fs/proc/stat.c             | 2 +-
 include/linux/sched/stat.h | 2 +-
 kernel/sched/core.c        | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 8468baee951d2..f32878d9a39f3 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -16,7 +16,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
 
 	get_avenrun(avnrun, FIXED_1/200, 0);
 
-	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %u/%d %d\n",
 		LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
 		LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
 		LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index f25e8531fd279..941605de7f9a8 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -200,7 +200,7 @@ static int show_stat(struct seq_file *p, void *v)
 		"\nctxt %llu\n"
 		"btime %llu\n"
 		"processes %lu\n"
-		"procs_running %lu\n"
+		"procs_running %u\n"
 		"procs_blocked %lu\n",
 		nr_context_switches(),
 		(unsigned long long)boottime.tv_sec,
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 939c3ec9e1b90..73606b3de394e 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -17,7 +17,7 @@ extern unsigned long total_forks;
 extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
-extern unsigned long nr_running(void);
+extern unsigned int nr_running(void);
 extern bool single_task_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ac8882da5daf0..2c6cdb059c64c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4692,9 +4692,9 @@ context_switch(struct rq *rq, struct task_struct *prev,
  * externally visible scheduler statistics: current number of runnable
  * threads, total number of context switches performed since bootup.
  */
-unsigned long nr_running(void)
+unsigned int nr_running(void)
 {
-	unsigned long i, sum = 0;
+	unsigned int i, sum = 0;
 
 	for_each_online_cpu(i)
 		sum += cpu_rq(i)->nr_running;
-- 
GitLab


From 9745516841a55c77163a5d549bce1374d776df54 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 22 Apr 2021 23:02:26 +0300
Subject: [PATCH 0520/3804] sched: Make nr_iowait() return 32-bit value

Creating 2**32 tasks to wait in D-state is impossible and wasteful.

Return "unsigned int" and save on REX prefixes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210422200228.1423391-2-adobriyan@gmail.com
---
 fs/proc/stat.c             | 2 +-
 include/linux/sched/stat.h | 2 +-
 kernel/sched/core.c        | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 941605de7f9a8..6561a06ef9059 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -201,7 +201,7 @@ static int show_stat(struct seq_file *p, void *v)
 		"btime %llu\n"
 		"processes %lu\n"
 		"procs_running %u\n"
-		"procs_blocked %lu\n",
+		"procs_blocked %u\n",
 		nr_context_switches(),
 		(unsigned long long)boottime.tv_sec,
 		total_forks,
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 73606b3de394e..81d9b539e3b77 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -19,7 +19,7 @@ DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned int nr_running(void);
 extern bool single_task_running(void);
-extern unsigned long nr_iowait(void);
+extern unsigned int nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 
 static inline int sched_info_on(void)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2c6cdb059c64c..fadf2bf1e86f6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4774,9 +4774,9 @@ unsigned long nr_iowait_cpu(int cpu)
  * Task CPU affinities can make all that even more 'interesting'.
  */
 
-unsigned long nr_iowait(void)
+unsigned int nr_iowait(void)
 {
-	unsigned long i, sum = 0;
+	unsigned int i, sum = 0;
 
 	for_each_possible_cpu(i)
 		sum += nr_iowait_cpu(i);
-- 
GitLab


From 8fc2858e572ce761bffcade81a42ac72005e76f9 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 22 Apr 2021 23:02:27 +0300
Subject: [PATCH 0521/3804] sched: Make nr_iowait_cpu() return 32-bit value

Runqueue ->nr_iowait counters are 32-bit anyway.

Propagate 32-bitness into other code, but don't try too hard.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210422200228.1423391-3-adobriyan@gmail.com
---
 drivers/cpuidle/governors/menu.c | 6 +++---
 include/linux/sched/stat.h       | 2 +-
 kernel/sched/core.c              | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index c3aa8d6ccee33..2e5670446991f 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -117,7 +117,7 @@ struct menu_device {
 	int		interval_ptr;
 };
 
-static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
+static inline int which_bucket(u64 duration_ns, unsigned int nr_iowaiters)
 {
 	int bucket = 0;
 
@@ -150,7 +150,7 @@ static inline int which_bucket(u64 duration_ns, unsigned long nr_iowaiters)
  * to be, the higher this multiplier, and thus the higher
  * the barrier to go to an expensive C state.
  */
-static inline int performance_multiplier(unsigned long nr_iowaiters)
+static inline int performance_multiplier(unsigned int nr_iowaiters)
 {
 	/* for IO wait tasks (per cpu!) we add 10x each */
 	return 1 + 10 * nr_iowaiters;
@@ -270,7 +270,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	unsigned int predicted_us;
 	u64 predicted_ns;
 	u64 interactivity_req;
-	unsigned long nr_iowaiters;
+	unsigned int nr_iowaiters;
 	ktime_t delta, delta_tick;
 	int i, idx;
 
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 81d9b539e3b77..0108a38bb64d7 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -20,7 +20,7 @@ extern int nr_processes(void);
 extern unsigned int nr_running(void);
 extern bool single_task_running(void);
 extern unsigned int nr_iowait(void);
-extern unsigned long nr_iowait_cpu(int cpu);
+extern unsigned int nr_iowait_cpu(int cpu);
 
 static inline int sched_info_on(void)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fadf2bf1e86f6..24fd795e4b8c2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4739,7 +4739,7 @@ unsigned long long nr_context_switches(void)
  * it does become runnable.
  */
 
-unsigned long nr_iowait_cpu(int cpu)
+unsigned int nr_iowait_cpu(int cpu)
 {
 	return atomic_read(&cpu_rq(cpu)->nr_iowait);
 }
-- 
GitLab


From e6fe3f422be128b7d65de607f6ae67bedc55f0ca Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 22 Apr 2021 23:02:28 +0300
Subject: [PATCH 0522/3804] sched: Make multiple runqueue task counters 32-bit

Make:

	struct dl_rq::dl_nr_migratory
	struct dl_rq::dl_nr_running

	struct rt_rq::rt_nr_boosted
	struct rt_rq::rt_nr_migratory
	struct rt_rq::rt_nr_total

	struct rq::nr_uninterruptible

32-bit.

If total number of tasks can't exceed 2**32 (and less due to futex pid
limits), then per-runqueue counters can't as well.

This patchset has been sponsored by REX Prefix Eradication Society.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210422200228.1423391-4-adobriyan@gmail.com
---
 kernel/sched/loadavg.c |  2 +-
 kernel/sched/sched.h   | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 1c79896f1bc09..954b229868d98 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -81,7 +81,7 @@ long calc_load_fold_active(struct rq *this_rq, long adjust)
 	long nr_active, delta = 0;
 
 	nr_active = this_rq->nr_running - adjust;
-	nr_active += (long)this_rq->nr_uninterruptible;
+	nr_active += (int)this_rq->nr_uninterruptible;
 
 	if (nr_active != this_rq->calc_load_active) {
 		delta = nr_active - this_rq->calc_load_active;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 904c52b560d16..8f0194cee0baf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -636,8 +636,8 @@ struct rt_rq {
 	} highest_prio;
 #endif
 #ifdef CONFIG_SMP
-	unsigned long		rt_nr_migratory;
-	unsigned long		rt_nr_total;
+	unsigned int		rt_nr_migratory;
+	unsigned int		rt_nr_total;
 	int			overloaded;
 	struct plist_head	pushable_tasks;
 
@@ -651,7 +651,7 @@ struct rt_rq {
 	raw_spinlock_t		rt_runtime_lock;
 
 #ifdef CONFIG_RT_GROUP_SCHED
-	unsigned long		rt_nr_boosted;
+	unsigned int		rt_nr_boosted;
 
 	struct rq		*rq;
 	struct task_group	*tg;
@@ -668,7 +668,7 @@ struct dl_rq {
 	/* runqueue is an rbtree, ordered by deadline */
 	struct rb_root_cached	root;
 
-	unsigned long		dl_nr_running;
+	unsigned int		dl_nr_running;
 
 #ifdef CONFIG_SMP
 	/*
@@ -682,7 +682,7 @@ struct dl_rq {
 		u64		next;
 	} earliest_dl;
 
-	unsigned long		dl_nr_migratory;
+	unsigned int		dl_nr_migratory;
 	int			overloaded;
 
 	/*
@@ -960,7 +960,7 @@ struct rq {
 	 * one CPU and if it got migrated afterwards it may decrease
 	 * it on another CPU. Always updated under the runqueue lock:
 	 */
-	unsigned long		nr_uninterruptible;
+	unsigned int		nr_uninterruptible;
 
 	struct task_struct __rcu	*curr;
 	struct task_struct	*idle;
-- 
GitLab


From 83a775d5f9bfda95b1c295f95a3a041a40c7f321 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 30 Apr 2021 12:37:24 +0100
Subject: [PATCH 0523/3804] KEYS: trusted: Fix memory leak on object td

Two error return paths are neglecting to free allocated object td,
causing a memory leak. Fix this by returning via the error return
path that securely kfree's td.

Fixes clang scan-build warning:
security/keys/trusted-keys/trusted_tpm1.c:496:10: warning: Potential
memory leak [unix.Malloc]

Cc: stable@vger.kernel.org
Fixes: 5df16caada3f ("KEYS: trusted: Fix incorrect handling of tpm_get_random()")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/keys/trusted-keys/trusted_tpm1.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/security/keys/trusted-keys/trusted_tpm1.c b/security/keys/trusted-keys/trusted_tpm1.c
index 4693945508019..aa108bea6739b 100644
--- a/security/keys/trusted-keys/trusted_tpm1.c
+++ b/security/keys/trusted-keys/trusted_tpm1.c
@@ -493,10 +493,12 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype,
 
 	ret = tpm_get_random(chip, td->nonceodd, TPM_NONCE_SIZE);
 	if (ret < 0)
-		return ret;
+		goto out;
 
-	if (ret != TPM_NONCE_SIZE)
-		return -EIO;
+	if (ret != TPM_NONCE_SIZE) {
+		ret = -EIO;
+		goto out;
+	}
 
 	ordinal = htonl(TPM_ORD_SEAL);
 	datsize = htonl(datalen);
-- 
GitLab


From b3ad7855b7ae3bed4242894d07bdb7f186652dbe Mon Sep 17 00:00:00 2001
From: Ben Boeckel <mathstuf@gmail.com>
Date: Thu, 29 Apr 2021 15:21:56 -0400
Subject: [PATCH 0524/3804] trusted-keys: match tpm_get_ops on all return paths

The `tpm_get_ops` call at the beginning of the function is not paired
with a `tpm_put_ops` on this return path.

Cc: stable@vger.kernel.org
Fixes: f2219745250f ("security: keys: trusted: use ASN.1 TPM2 key format for the blobs")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ben Boeckel <mathstuf@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 security/keys/trusted-keys/trusted_tpm2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
index 617fabd4d913b..0165da386289c 100644
--- a/security/keys/trusted-keys/trusted_tpm2.c
+++ b/security/keys/trusted-keys/trusted_tpm2.c
@@ -336,9 +336,9 @@ out:
 			rc = -EPERM;
 	}
 	if (blob_len < 0)
-		return blob_len;
-
-	payload->blob_len = blob_len;
+		rc = blob_len;
+	else
+		payload->blob_len = blob_len;
 
 	tpm_put_ops(chip);
 	return rc;
-- 
GitLab


From e630af7dfb450d1c00c30077314acf33032ff9e4 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Mon, 10 May 2021 15:28:30 +0300
Subject: [PATCH 0525/3804] tpm, tpm_tis: Extend locality handling to TPM2 in
 tpm_tis_gen_interrupt()

The earlier fix (linked) only partially fixed the locality handling bug
in tpm_tis_gen_interrupt(), i.e. only for TPM 1.x.

Extend the locality handling to cover TPM2.

Cc: Hans de Goede <hdegoede@redhat.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/linux-integrity/20210220125534.20707-1-jarkko@kernel.org/
Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()")
Reported-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Tested-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
---
 drivers/char/tpm/tpm_tis_core.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index a2e0395cbe618..6fa150a3b75e0 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -709,16 +709,14 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
 	cap_t cap;
 	int ret;
 
-	/* TPM 2.0 */
-	if (chip->flags & TPM_CHIP_FLAG_TPM2)
-		return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
-
-	/* TPM 1.2 */
 	ret = request_locality(chip, 0);
 	if (ret < 0)
 		return ret;
 
-	ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
+	if (chip->flags & TPM_CHIP_FLAG_TPM2)
+		ret = tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
+	else
+		ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
 
 	release_locality(chip, 0);
 
-- 
GitLab


From 8a2d296aaebadd68d9c1f6908667df1d1c84c051 Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Mon, 10 May 2021 15:28:31 +0300
Subject: [PATCH 0526/3804] tpm, tpm_tis: Reserve locality in tpm_tis_resume()

Reserve locality in tpm_tis_resume(), as it could be unsert after waking
up from a sleep state.

Cc: stable@vger.kernel.org
Cc: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Reported-by: Hans de Goede <hdegoede@redhat.com>
Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()")
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_core.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 6fa150a3b75e0..55b9d3965ae1b 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -1125,12 +1125,20 @@ int tpm_tis_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	/* TPM 1.2 requires self-test on resume. This function actually returns
+	/*
+	 * TPM 1.2 requires self-test on resume. This function actually returns
 	 * an error code but for unknown reason it isn't handled.
 	 */
-	if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
+	if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
+		ret = request_locality(chip, 0);
+		if (ret < 0)
+			return ret;
+
 		tpm1_do_selftest(chip);
 
+		release_locality(chip, 0);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(tpm_tis_resume);
-- 
GitLab


From 1df83992d977355177810c2b711afc30546c81ce Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 12 May 2021 21:39:26 +0800
Subject: [PATCH 0527/3804] tpm: fix error return code in
 tpm2_get_cc_attrs_tbl()

If the total number of commands queried through TPM2_CAP_COMMANDS is
different from that queried through TPM2_CC_GET_CAPABILITY, it indicates
an unknown error. In this case, an appropriate error code -EFAULT should
be returned. However, we currently do not explicitly assign this error
code to 'rc'. As a result, 0 was incorrectly returned.

Cc: stable@vger.kernel.org
Fixes: 58472f5cd4f6("tpm: validate TPM 2.0 commands")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm2-cmd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index eff1f12d981ab..c84d239512197 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -656,6 +656,7 @@ int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
 
 	if (nr_commands !=
 	    be32_to_cpup((__be32 *)&buf.data[TPM_HEADER_SIZE + 5])) {
+		rc = -EFAULT;
 		tpm_buf_destroy(&buf);
 		goto out;
 	}
-- 
GitLab


From 681865a03d3ec6ac3dda147044ed2a1a0f49f7bf Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Mon, 19 Apr 2021 19:27:25 +0800
Subject: [PATCH 0528/3804] libnvdimm: Remove duplicate struct declaration

struct device is declared at 133rd line. The second declaration is
unnecessary, remove it.

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Link: https://lore.kernel.org/r/20210419112725.42145-1-wanjiabing@vivo.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/libnvdimm.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 01f251b6e36c5..89b69e645ac74 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -141,7 +141,6 @@ static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
 
 struct nvdimm_bus;
 struct module;
-struct device;
 struct nd_blk_region;
 struct nd_blk_region_desc {
 	int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
-- 
GitLab


From 7ddb4cc2b885c740523e6ea54a1f4434acfa3368 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 20 Apr 2021 15:47:47 +0800
Subject: [PATCH 0529/3804] tools/testing/nvdimm: Make symbol
 '__nfit_test_ioremap' static

The sparse tool complains as follows:

tools/testing/nvdimm/test/iomap.c:65:14: warning:
 symbol '__nfit_test_ioremap' was not declared. Should it be static?

This symbol is not used outside of iomap.c, so this
commit marks it static.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Link: https://lore.kernel.org/r/1618904867-25275-1-git-send-email-zou_wei@huawei.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/iomap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index c62d372d426fb..ed563bdd88f39 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -62,7 +62,7 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource)
 }
 EXPORT_SYMBOL(get_nfit_res);
 
-void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
+static void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
 		void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
 {
 	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
-- 
GitLab


From 3dd4fe4b4dfa34e7487edfe159ef787ba397cfa9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 21 Apr 2021 00:05:28 -0700
Subject: [PATCH 0530/3804] MAINTAINERS: Move nvdimm mailing list

After seeing some users have subscription management trouble, more spam
than other Linux development lists, and considering some of the benefits
of kernel.org hosted lists, nvdimm and persistent memory development is
moving to nvdimm@lists.linux.dev.

The old list will remain up until v5.14-rc1 and shutdown thereafter.

Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Link: https://lore.kernel.org/r/161898872871.3406469.4054282559340528393.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/ABI/obsolete/sysfs-class-dax    |  2 +-
 Documentation/ABI/removed/sysfs-bus-nfit      |  2 +-
 Documentation/ABI/testing/sysfs-bus-nfit      | 40 +++++++++----------
 Documentation/ABI/testing/sysfs-bus-papr-pmem |  4 +-
 Documentation/driver-api/nvdimm/nvdimm.rst    |  2 +-
 MAINTAINERS                                   | 14 +++----
 6 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/ABI/obsolete/sysfs-class-dax b/Documentation/ABI/obsolete/sysfs-class-dax
index 0faf1354cd054..5bcce27458e30 100644
--- a/Documentation/ABI/obsolete/sysfs-class-dax
+++ b/Documentation/ABI/obsolete/sysfs-class-dax
@@ -1,7 +1,7 @@
 What:           /sys/class/dax/
 Date:           May, 2016
 KernelVersion:  v4.7
-Contact:        linux-nvdimm@lists.01.org
+Contact:        nvdimm@lists.linux.dev
 Description:	Device DAX is the device-centric analogue of Filesystem
 		DAX (CONFIG_FS_DAX).  It allows memory ranges to be
 		allocated and mapped without need of an intervening file
diff --git a/Documentation/ABI/removed/sysfs-bus-nfit b/Documentation/ABI/removed/sysfs-bus-nfit
index ae8c1ca538287..277437005def7 100644
--- a/Documentation/ABI/removed/sysfs-bus-nfit
+++ b/Documentation/ABI/removed/sysfs-bus-nfit
@@ -1,7 +1,7 @@
 What:		/sys/bus/nd/devices/regionX/nfit/ecc_unit_size
 Date:		Aug, 2017
 KernelVersion:	v4.14 (Removed v4.18)
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Size of a write request to a DIMM that will not incur a
 		read-modify-write cycle at the memory controller.
diff --git a/Documentation/ABI/testing/sysfs-bus-nfit b/Documentation/ABI/testing/sysfs-bus-nfit
index 63ef0b9ecce70..e7282d184a747 100644
--- a/Documentation/ABI/testing/sysfs-bus-nfit
+++ b/Documentation/ABI/testing/sysfs-bus-nfit
@@ -5,7 +5,7 @@ Interface Table (NFIT)' section in the ACPI specification
 What:		/sys/bus/nd/devices/nmemX/nfit/serial
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Serial number of the NVDIMM (non-volatile dual in-line
 		memory module), assigned by the module vendor.
@@ -14,7 +14,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/handle
 Date:		Apr, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The address (given by the _ADR object) of the device on its
 		parent bus of the NVDIMM device containing the NVDIMM region.
@@ -23,7 +23,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/device
 Date:		Apr, 2015
 KernelVersion:	v4.1
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Device id for the NVDIMM, assigned by the module vendor.
 
@@ -31,7 +31,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/rev_id
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Revision of the NVDIMM, assigned by the module vendor.
 
@@ -39,7 +39,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/phys_id
 Date:		Apr, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Handle (i.e., instance number) for the SMBIOS (system
 		management BIOS) Memory Device structure describing the NVDIMM
@@ -49,7 +49,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/flags
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The flags in the NFIT memory device sub-structure indicate
 		the state of the data on the nvdimm relative to its energy
@@ -68,7 +68,7 @@ What:		/sys/bus/nd/devices/nmemX/nfit/format1
 What:		/sys/bus/nd/devices/nmemX/nfit/formats
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The interface codes indicate support for persistent memory
 		mapped directly into system physical address space and / or a
@@ -84,7 +84,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/vendor
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Vendor id of the NVDIMM.
 
@@ -92,7 +92,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/dsm_mask
 Date:		May, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The bitmask indicates the supported device specific control
 		functions relative to the NVDIMM command family supported by the
@@ -102,7 +102,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/family
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Displays the NVDIMM family command sets. Values
 		0, 1, 2 and 3 correspond to NVDIMM_FAMILY_INTEL,
@@ -118,7 +118,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/id
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) ACPI specification 6.2 section 5.2.25.9, defines an
 		identifier for an NVDIMM, which refelects the id attribute.
@@ -127,7 +127,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/subsystem_vendor
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Sub-system vendor id of the NVDIMM non-volatile memory
 		subsystem controller.
@@ -136,7 +136,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/subsystem_rev_id
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Sub-system revision id of the NVDIMM non-volatile memory subsystem
 		controller, assigned by the non-volatile memory subsystem
@@ -146,7 +146,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/nfit/subsystem_device
 Date:		Apr, 2016
 KernelVersion:	v4.7
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) Sub-system device id for the NVDIMM non-volatile memory
 		subsystem controller, assigned by the non-volatile memory
@@ -156,7 +156,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/revision
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) ACPI NFIT table revision number.
 
@@ -164,7 +164,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/scrub
 Date:		Sep, 2016
 KernelVersion:	v4.9
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RW) This shows the number of full Address Range Scrubs (ARS)
 		that have been completed since driver load time. Userspace can
@@ -177,7 +177,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/hw_error_scrub
 Date:		Sep, 2016
 KernelVersion:	v4.9
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RW) Provides a way to toggle the behavior between just adding
 		the address (cache line) where the MCE happened to the poison
@@ -196,7 +196,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/dsm_mask
 Date:		Jun, 2017
 KernelVersion:	v4.13
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) The bitmask indicates the supported bus specific control
 		functions. See the section named 'NVDIMM Root Device _DSMs' in
@@ -205,7 +205,7 @@ Description:
 What:		/sys/bus/nd/devices/ndbusX/nfit/firmware_activate_noidle
 Date:		Apr, 2020
 KernelVersion:	v5.8
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RW) The Intel platform implementation of firmware activate
 		support exposes an option let the platform force idle devices in
@@ -225,7 +225,7 @@ Description:
 What:		/sys/bus/nd/devices/regionX/nfit/range_index
 Date:		Jun, 2015
 KernelVersion:	v4.2
-Contact:	linux-nvdimm@lists.01.org
+Contact:	nvdimm@lists.linux.dev
 Description:
 		(RO) A unique number provided by the BIOS to identify an address
 		range. Used by NVDIMM Region Mapping Structure to uniquely refer
diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem
index 8316c33862a04..92e2db0e2d3de 100644
--- a/Documentation/ABI/testing/sysfs-bus-papr-pmem
+++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem
@@ -1,7 +1,7 @@
 What:		/sys/bus/nd/devices/nmemX/papr/flags
 Date:		Apr, 2020
 KernelVersion:	v5.8
-Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org,
+Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
 Description:
 		(RO) Report flags indicating various states of a
 		papr-pmem NVDIMM device. Each flag maps to a one or
@@ -36,7 +36,7 @@ Description:
 What:		/sys/bus/nd/devices/nmemX/papr/perf_stats
 Date:		May, 2020
 KernelVersion:	v5.9
-Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org,
+Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
 Description:
 		(RO) Report various performance stats related to papr-scm NVDIMM
 		device.  Each stat is reported on a new line with each line
diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst
index ef6d59e0978e7..1d8302b89bd47 100644
--- a/Documentation/driver-api/nvdimm/nvdimm.rst
+++ b/Documentation/driver-api/nvdimm/nvdimm.rst
@@ -4,7 +4,7 @@ LIBNVDIMM: Non-Volatile Devices
 
 libnvdimm - kernel / libndctl - userspace helper library
 
-linux-nvdimm@lists.01.org
+nvdimm@lists.linux.dev
 
 Version 13
 
diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..6b5d489022ea8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5237,7 +5237,7 @@ DEVICE DIRECT ACCESS (DAX)
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 F:	drivers/dax/
 
@@ -7006,7 +7006,7 @@ M:	Dan Williams <dan.j.williams@intel.com>
 R:	Matthew Wilcox <willy@infradead.org>
 R:	Jan Kara <jack@suse.cz>
 L:	linux-fsdevel@vger.kernel.org
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 F:	fs/dax.c
 F:	include/linux/dax.h
@@ -10378,7 +10378,7 @@ LIBNVDIMM BLK: MMIO-APERTURE DRIVER
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 P:	Documentation/nvdimm/maintainer-entry-profile.rst
@@ -10389,7 +10389,7 @@ LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 P:	Documentation/nvdimm/maintainer-entry-profile.rst
@@ -10399,7 +10399,7 @@ LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER
 M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 P:	Documentation/nvdimm/maintainer-entry-profile.rst
@@ -10407,7 +10407,7 @@ F:	drivers/nvdimm/pmem*
 
 LIBNVDIMM: DEVICETREE BINDINGS
 M:	Oliver O'Halloran <oohall@gmail.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 F:	Documentation/devicetree/bindings/pmem/pmem-region.txt
@@ -10418,7 +10418,7 @@ M:	Dan Williams <dan.j.williams@intel.com>
 M:	Vishal Verma <vishal.l.verma@intel.com>
 M:	Dave Jiang <dave.jiang@intel.com>
 M:	Ira Weiny <ira.weiny@intel.com>
-L:	linux-nvdimm@lists.01.org
+L:	nvdimm@lists.linux.dev
 S:	Supported
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 P:	Documentation/nvdimm/maintainer-entry-profile.rst
-- 
GitLab


From e9cfd259c6d386f6235395a13bd4f357a979b2d0 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 7 May 2021 00:33:50 -0700
Subject: [PATCH 0531/3804] ACPI: NFIT: Fix support for variable 'SPA'
 structure size

ACPI 6.4 introduced the "SpaLocationCookie" to the NFIT "System Physical
Address (SPA) Range Structure". The presence of that new field is
indicated by the ACPI_NFIT_LOCATION_COOKIE_VALID flag. Pre-ACPI-6.4
firmware implementations omit the flag and maintain the original size of
the structure.

Update the implementation to check that flag to determine the size
rather than the ACPI 6.4 compliant definition of 'struct
acpi_nfit_system_address' from the Linux ACPICA definitions.

Update the test infrastructure for the new expectations as well, i.e.
continue to emulate the ACPI 6.3 definition of that structure.

Without this fix the kernel fails to validate 'SPA' structures and this
leads to a crash in nfit_get_smbios_id() since that routine assumes that
SPAs are valid if it finds valid SMBIOS tables.

    BUG: unable to handle page fault for address: ffffffffffffffa8
    [..]
    Call Trace:
     skx_get_nvdimm_info+0x56/0x130 [skx_edac]
     skx_get_dimm_config+0x1f5/0x213 [skx_edac]
     skx_register_mci+0x132/0x1c0 [skx_edac]

Cc: Bob Moore <robert.moore@intel.com>
Cc: Erik Kaneda <erik.kaneda@intel.com>
Fixes: cf16b05c607b ("ACPICA: ACPI 6.4: NFIT: add Location Cookie field")
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/162037273007.1195827.10907249070709169329.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c         | 15 +++++++++---
 tools/testing/nvdimm/test/nfit.c | 42 +++++++++++++++++++-------------
 2 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 958aaac869e8d..23d9a09d70604 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -686,6 +686,13 @@ int nfit_spa_type(struct acpi_nfit_system_address *spa)
 	return -1;
 }
 
+static size_t sizeof_spa(struct acpi_nfit_system_address *spa)
+{
+	if (spa->flags & ACPI_NFIT_LOCATION_COOKIE_VALID)
+		return sizeof(*spa);
+	return sizeof(*spa) - 8;
+}
+
 static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_table_prev *prev,
 		struct acpi_nfit_system_address *spa)
@@ -693,22 +700,22 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 	struct device *dev = acpi_desc->dev;
 	struct nfit_spa *nfit_spa;
 
-	if (spa->header.length != sizeof(*spa))
+	if (spa->header.length != sizeof_spa(spa))
 		return false;
 
 	list_for_each_entry(nfit_spa, &prev->spas, list) {
-		if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
+		if (memcmp(nfit_spa->spa, spa, sizeof_spa(spa)) == 0) {
 			list_move_tail(&nfit_spa->list, &acpi_desc->spas);
 			return true;
 		}
 	}
 
-	nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
+	nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof_spa(spa),
 			GFP_KERNEL);
 	if (!nfit_spa)
 		return false;
 	INIT_LIST_HEAD(&nfit_spa->list);
-	memcpy(nfit_spa->spa, spa, sizeof(*spa));
+	memcpy(nfit_spa->spa, spa, sizeof_spa(spa));
 	list_add_tail(&nfit_spa->list, &acpi_desc->spas);
 	dev_dbg(dev, "spa index: %d type: %s\n",
 			spa->range_index,
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 9b185bf82da87..54f367cbadaee 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1871,9 +1871,16 @@ static void smart_init(struct nfit_test *t)
 	}
 }
 
+static size_t sizeof_spa(struct acpi_nfit_system_address *spa)
+{
+	/* until spa location cookie support is added... */
+	return sizeof(*spa) - 8;
+}
+
 static int nfit_test0_alloc(struct nfit_test *t)
 {
-	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
+	struct acpi_nfit_system_address *spa = NULL;
+	size_t nfit_size = sizeof_spa(spa) * NUM_SPA
 			+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
 			+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
 			+ offsetof(struct acpi_nfit_control_region,
@@ -1937,7 +1944,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
 
 static int nfit_test1_alloc(struct nfit_test *t)
 {
-	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+	struct acpi_nfit_system_address *spa = NULL;
+	size_t nfit_size = sizeof_spa(spa) * 2
 		+ sizeof(struct acpi_nfit_memory_map) * 2
 		+ offsetof(struct acpi_nfit_control_region, window_size) * 2;
 	int i;
@@ -2000,7 +2008,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	 */
 	spa = nfit_buf;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
@@ -2014,7 +2022,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 	spa->range_index = 1+1;
 	spa->address = t->spa_set_dma[1];
@@ -2024,7 +2032,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa2 (dcr0) dimm0 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 2+1;
 	spa->address = t->dcr_dma[0];
@@ -2034,7 +2042,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa3 (dcr1) dimm1 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 3+1;
 	spa->address = t->dcr_dma[1];
@@ -2044,7 +2052,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa4 (dcr2) dimm2 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 4+1;
 	spa->address = t->dcr_dma[2];
@@ -2054,7 +2062,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa5 (dcr3) dimm3 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 5+1;
 	spa->address = t->dcr_dma[3];
@@ -2064,7 +2072,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa6 (bdw for dcr0) dimm0 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 6+1;
 	spa->address = t->dimm_dma[0];
@@ -2074,7 +2082,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa7 (bdw for dcr1) dimm1 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 7+1;
 	spa->address = t->dimm_dma[1];
@@ -2084,7 +2092,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa8 (bdw for dcr2) dimm2 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 8+1;
 	spa->address = t->dimm_dma[2];
@@ -2094,7 +2102,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	/* spa9 (bdw for dcr3) dimm3 */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 9+1;
 	spa->address = t->dimm_dma[3];
@@ -2581,7 +2589,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		/* spa10 (dcr4) dimm4 */
 		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-		spa->header.length = sizeof(*spa);
+		spa->header.length = sizeof_spa(spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 		spa->range_index = 10+1;
 		spa->address = t->dcr_dma[4];
@@ -2595,7 +2603,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		 */
 		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-		spa->header.length = sizeof(*spa);
+		spa->header.length = sizeof_spa(spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 		spa->range_index = 11+1;
 		spa->address = t->spa_set_dma[2];
@@ -2605,7 +2613,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		/* spa12 (bdw for dcr4) dimm4 */
 		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-		spa->header.length = sizeof(*spa);
+		spa->header.length = sizeof_spa(spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 		spa->range_index = 12+1;
 		spa->address = t->dimm_dma[4];
@@ -2739,7 +2747,7 @@ static void nfit_test1_setup(struct nfit_test *t)
 	/* spa0 (flat range with no bdw aliasing) */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
@@ -2749,7 +2757,7 @@ static void nfit_test1_setup(struct nfit_test *t)
 	/* virtual cd region */
 	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-	spa->header.length = sizeof(*spa);
+	spa->header.length = sizeof_spa(spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
 	spa->range_index = 0;
 	spa->address = t->spa_set_dma[1];
-- 
GitLab


From a554e740b66a83c7560b30e6b50bece37555ced3 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 22 Apr 2021 12:04:42 -0700
Subject: [PATCH 0532/3804] x86/boot/compressed: Enable -Wundef

A discussion around -Wundef showed that there were still a few boolean
Kconfigs where #if was used rather than #ifdef to guard different code.
Kconfig doesn't define boolean configs, which can result in -Wundef
warnings.

arch/x86/boot/compressed/Makefile resets the CFLAGS used for this
directory, and doesn't re-enable -Wundef as the top level Makefile does.
If re-added, with RANDOMIZE_BASE and X86_NEED_RELOCS disabled, the
following warnings are visible.

  arch/x86/boot/compressed/misc.h:82:5: warning: 'CONFIG_RANDOMIZE_BASE'
  is not defined, evaluates to 0 [-Wundef]
      ^
  arch/x86/boot/compressed/misc.c:175:5: warning: 'CONFIG_X86_NEED_RELOCS'
  is not defined, evaluates to 0 [-Wundef]
      ^

Simply fix these and re-enable this warning for this directory.

Suggested-by: Nathan Chancellor <nathan@kernel.org>
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20210422190450.3903999-1-ndesaulniers@google.com
---
 arch/x86/boot/compressed/Makefile | 1 +
 arch/x86/boot/compressed/misc.c   | 2 +-
 arch/x86/boot/compressed/misc.h   | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 2a2975236c9e3..431bf7f846c3c 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -30,6 +30,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
 
 KBUILD_CFLAGS := -m$(BITS) -O2
 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
+KBUILD_CFLAGS += -Wundef
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 cflags-$(CONFIG_X86_32) := -march=i386
 cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index dde042f64ccaa..743f13ea25c12 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -172,7 +172,7 @@ void __puthex(unsigned long value)
 	}
 }
 
-#if CONFIG_X86_NEED_RELOCS
+#ifdef CONFIG_X86_NEED_RELOCS
 static void handle_relocations(void *output, unsigned long output_len,
 			       unsigned long virt_addr)
 {
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index e5612f035498c..31139256859fc 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -79,7 +79,7 @@ struct mem_vector {
 	u64 size;
 };
 
-#if CONFIG_RANDOMIZE_BASE
+#ifdef CONFIG_RANDOMIZE_BASE
 /* kaslr.c */
 void choose_random_location(unsigned long input,
 			    unsigned long input_size,
-- 
GitLab


From 098116e7e640ba677d9e345cbee83d253c13d556 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 11 May 2021 10:35:21 +0200
Subject: [PATCH 0533/3804] net: really orphan skbs tied to closing sk

If the owing socket is shutting down - e.g. the sock reference
count already dropped to 0 and only sk_wmem_alloc is keeping
the sock alive, skb_orphan_partial() becomes a no-op.

When forwarding packets over veth with GRO enabled, the above
causes refcount errors.

This change addresses the issue with a plain skb_orphan() call
in the critical scenario.

Fixes: 9adc89af724f ("net: let skb_orphan_partial wake-up waiters.")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 4 +++-
 net/core/sock.c    | 8 ++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 42bc5e1a627f4..0e962d8bc73b1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2231,13 +2231,15 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 	sk_mem_charge(sk, skb->truesize);
 }
 
-static inline void skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk)
+static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk)
 {
 	if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) {
 		skb_orphan(skb);
 		skb->destructor = sock_efree;
 		skb->sk = sk;
+		return true;
 	}
+	return false;
 }
 
 void sk_reset_timer(struct sock *sk, struct timer_list *timer,
diff --git a/net/core/sock.c b/net/core/sock.c
index c761c4a0b66b1..958614ea16edb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2132,10 +2132,10 @@ void skb_orphan_partial(struct sk_buff *skb)
 	if (skb_is_tcp_pure_ack(skb))
 		return;
 
-	if (can_skb_orphan_partial(skb))
-		skb_set_owner_sk_safe(skb, skb->sk);
-	else
-		skb_orphan(skb);
+	if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
+		return;
+
+	skb_orphan(skb);
 }
 EXPORT_SYMBOL(skb_orphan_partial);
 
-- 
GitLab


From aa473d6ceb821d7c568c64cca7fff3e86ba9d789 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 11 May 2021 19:10:50 -0400
Subject: [PATCH 0534/3804] bnxt_en: Fix and improve .ndo_features_check().

Jakub Kicinski pointed out that we need to handle ipv6 extension headers
and to explicitly check for supported tunnel types in
.ndo_features_check().

For ipv6 extension headers, the hardware supports up to 2 ext. headers
and each must be <= 64 bytes.  For tunneled packets, the supported
packets are UDP with supported VXLAN and Geneve ports, GRE, and IPIP.

v3: More improvements based on Alexander Duyck's valuable feedback -
    Remove the jump lable in bnxt_features_check() and restructure it
    so that the TCP/UDP is check is consolidated in bnxt_exthdr_check().

v2: Add missing step to check inner ipv6 header for UDP and GRE tunnels.
    Check TCP/UDP next header after skipping ipv6 ext headers for
    non-tunneled packets and for inner ipv6.
    (Both feedback from Alexander Duyck)

Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Fixes: 1698d600b361 ("bnxt_en: Implement .ndo_features_check().")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 126 ++++++++++++++++++----
 1 file changed, 107 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2985844634c8b..46be4046ee51e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10785,37 +10785,125 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
 	return rc;
 }
 
+static bool bnxt_exthdr_check(struct bnxt *bp, struct sk_buff *skb, int nw_off,
+			      u8 **nextp)
+{
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + nw_off);
+	int hdr_count = 0;
+	u8 *nexthdr;
+	int start;
+
+	/* Check that there are at most 2 IPv6 extension headers, no
+	 * fragment header, and each is <= 64 bytes.
+	 */
+	start = nw_off + sizeof(*ip6h);
+	nexthdr = &ip6h->nexthdr;
+	while (ipv6_ext_hdr(*nexthdr)) {
+		struct ipv6_opt_hdr *hp;
+		int hdrlen;
+
+		if (hdr_count >= 3 || *nexthdr == NEXTHDR_NONE ||
+		    *nexthdr == NEXTHDR_FRAGMENT)
+			return false;
+		hp = __skb_header_pointer(NULL, start, sizeof(*hp), skb->data,
+					  skb_headlen(skb), NULL);
+		if (!hp)
+			return false;
+		if (*nexthdr == NEXTHDR_AUTH)
+			hdrlen = ipv6_authlen(hp);
+		else
+			hdrlen = ipv6_optlen(hp);
+
+		if (hdrlen > 64)
+			return false;
+		nexthdr = &hp->nexthdr;
+		start += hdrlen;
+		hdr_count++;
+	}
+	if (nextp) {
+		/* Caller will check inner protocol */
+		if (skb->encapsulation) {
+			*nextp = nexthdr;
+			return true;
+		}
+		*nextp = NULL;
+	}
+	/* Only support TCP/UDP for non-tunneled ipv6 and inner ipv6 */
+	return *nexthdr == IPPROTO_TCP || *nexthdr == IPPROTO_UDP;
+}
+
+/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
+static bool bnxt_udp_tunl_check(struct bnxt *bp, struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	__be16 udp_port = uh->dest;
+
+	if (udp_port != bp->vxlan_port && udp_port != bp->nge_port)
+		return false;
+	if (skb->inner_protocol_type == ENCAP_TYPE_ETHER) {
+		struct ethhdr *eh = inner_eth_hdr(skb);
+
+		switch (eh->h_proto) {
+		case htons(ETH_P_IP):
+			return true;
+		case htons(ETH_P_IPV6):
+			return bnxt_exthdr_check(bp, skb,
+						 skb_inner_network_offset(skb),
+						 NULL);
+		}
+	}
+	return false;
+}
+
+static bool bnxt_tunl_check(struct bnxt *bp, struct sk_buff *skb, u8 l4_proto)
+{
+	switch (l4_proto) {
+	case IPPROTO_UDP:
+		return bnxt_udp_tunl_check(bp, skb);
+	case IPPROTO_IPIP:
+		return true;
+	case IPPROTO_GRE: {
+		switch (skb->inner_protocol) {
+		default:
+			return false;
+		case htons(ETH_P_IP):
+			return true;
+		case htons(ETH_P_IPV6):
+			fallthrough;
+		}
+	}
+	case IPPROTO_IPV6:
+		/* Check ext headers of inner ipv6 */
+		return bnxt_exthdr_check(bp, skb, skb_inner_network_offset(skb),
+					 NULL);
+	}
+	return false;
+}
+
 static netdev_features_t bnxt_features_check(struct sk_buff *skb,
 					     struct net_device *dev,
 					     netdev_features_t features)
 {
-	struct bnxt *bp;
-	__be16 udp_port;
-	u8 l4_proto = 0;
+	struct bnxt *bp = netdev_priv(dev);
+	u8 *l4_proto;
 
 	features = vlan_features_check(skb, features);
-	if (!skb->encapsulation)
-		return features;
-
 	switch (vlan_get_protocol(skb)) {
 	case htons(ETH_P_IP):
-		l4_proto = ip_hdr(skb)->protocol;
+		if (!skb->encapsulation)
+			return features;
+		l4_proto = &ip_hdr(skb)->protocol;
+		if (bnxt_tunl_check(bp, skb, *l4_proto))
+			return features;
 		break;
 	case htons(ETH_P_IPV6):
-		l4_proto = ipv6_hdr(skb)->nexthdr;
+		if (!bnxt_exthdr_check(bp, skb, skb_network_offset(skb),
+				       &l4_proto))
+			break;
+		if (!l4_proto || bnxt_tunl_check(bp, skb, *l4_proto))
+			return features;
 		break;
-	default:
-		return features;
 	}
-
-	if (l4_proto != IPPROTO_UDP)
-		return features;
-
-	bp = netdev_priv(dev);
-	/* For UDP, we can only handle 1 Vxlan port and 1 Geneve port. */
-	udp_port = udp_hdr(skb)->dest;
-	if (udp_port == bp->vxlan_port || udp_port == bp->nge_port)
-		return features;
 	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 
-- 
GitLab


From 171c3b151118a2fe0fc1e2a9d1b5a1570cfe82d2 Mon Sep 17 00:00:00 2001
From: Richard Sanger <rsanger@wand.net.nz>
Date: Wed, 12 May 2021 13:31:22 +1200
Subject: [PATCH 0535/3804] net: packetmmap: fix only tx timestamp on request

The packetmmap tx ring should only return timestamps if requested via
setsockopt PACKET_TIMESTAMP, as documented. This allows compatibility
with non-timestamp aware user-space code which checks
tp_status == TP_STATUS_AVAILABLE; not expecting additional timestamp
flags to be set in tp_status.

Fixes: b9c32fb27170 ("packet: if hw/sw ts enabled in rx/tx ring, report which ts we got")
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Signed-off-by: Richard Sanger <rsanger@wand.net.nz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ba96db1880eae..ae906eb4b269e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -422,7 +422,8 @@ static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
 	    ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
 		return TP_STATUS_TS_RAW_HARDWARE;
 
-	if (ktime_to_timespec64_cond(skb->tstamp, ts))
+	if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
+	    ktime_to_timespec64_cond(skb->tstamp, ts))
 		return TP_STATUS_TS_SOFTWARE;
 
 	return 0;
@@ -2340,7 +2341,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
 
-	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+	/* Always timestamp; prefer an existing software timestamp taken
+	 * closer to the time of capture.
+	 */
+	ts_status = tpacket_get_timestamp(skb, &ts,
+					  po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
+	if (!ts_status)
 		ktime_get_real_ts64(&ts);
 
 	status |= ts_status;
-- 
GitLab


From 619fee9eb13b5d29e4267cb394645608088c28a8 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Wed, 12 May 2021 10:43:59 +0800
Subject: [PATCH 0536/3804] net: fec: fix the potential memory leak in
 fec_enet_init()

If the memory allocated for cbd_base is failed, it should
free the memory allocated for the queues, otherwise it causes
memory leak.

And if the memory allocated for the queues is failed, it can
return error directly.

Fixes: 59d0f7465644 ("net: fec: init multi queue date structure")
Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index f2065f9d02e62..a2ada39c22d7e 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3290,7 +3290,9 @@ static int fec_enet_init(struct net_device *ndev)
 		return ret;
 	}
 
-	fec_enet_alloc_queue(ndev);
+	ret = fec_enet_alloc_queue(ndev);
+	if (ret)
+		return ret;
 
 	bd_size = (fep->total_tx_ring_size + fep->total_rx_ring_size) * dsize;
 
@@ -3298,7 +3300,8 @@ static int fec_enet_init(struct net_device *ndev)
 	cbd_base = dmam_alloc_coherent(&fep->pdev->dev, bd_size, &bd_dma,
 				       GFP_KERNEL);
 	if (!cbd_base) {
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto free_queue_mem;
 	}
 
 	/* Get the Ethernet address */
@@ -3376,6 +3379,10 @@ static int fec_enet_init(struct net_device *ndev)
 		fec_enet_update_ethtool_stats(ndev);
 
 	return 0;
+
+free_queue_mem:
+	fec_enet_free_queue(ndev);
+	return ret;
 }
 
 #ifdef CONFIG_OF
-- 
GitLab


From 052fcc4531824c38f8e0ad88213c1be102a0b124 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Wed, 12 May 2021 10:44:00 +0800
Subject: [PATCH 0537/3804] net: fec: add defer probe for of_get_mac_address

If MAC address read from nvmem efuse by calling .of_get_mac_address(),
but nvmem efuse is registered later than the driver, then it
return -EPROBE_DEFER value. So modify the driver to support
defer probe when read MAC address from nvmem efuse.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index a2ada39c22d7e..ad82cffc6f3f5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1662,7 +1662,7 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
 }
 
 /* ------------------------------------------------------------------------- */
-static void fec_get_mac(struct net_device *ndev)
+static int fec_get_mac(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	unsigned char *iap, tmpaddr[ETH_ALEN];
@@ -1685,6 +1685,8 @@ static void fec_get_mac(struct net_device *ndev)
 			ret = of_get_mac_address(np, tmpaddr);
 			if (!ret)
 				iap = tmpaddr;
+			else if (ret == -EPROBE_DEFER)
+				return ret;
 		}
 	}
 
@@ -1723,7 +1725,7 @@ static void fec_get_mac(struct net_device *ndev)
 		eth_hw_addr_random(ndev);
 		dev_info(&fep->pdev->dev, "Using random MAC address: %pM\n",
 			 ndev->dev_addr);
-		return;
+		return 0;
 	}
 
 	memcpy(ndev->dev_addr, iap, ETH_ALEN);
@@ -1731,6 +1733,8 @@ static void fec_get_mac(struct net_device *ndev)
 	/* Adjust MAC if using macaddr */
 	if (iap == macaddr)
 		 ndev->dev_addr[ETH_ALEN-1] = macaddr[ETH_ALEN-1] + fep->dev_id;
+
+	return 0;
 }
 
 /* ------------------------------------------------------------------------- */
@@ -3305,7 +3309,10 @@ static int fec_enet_init(struct net_device *ndev)
 	}
 
 	/* Get the Ethernet address */
-	fec_get_mac(ndev);
+	ret = fec_get_mac(ndev);
+	if (ret)
+		goto free_queue_mem;
+
 	/* make sure MAC we just acquired is programmed into the hw */
 	fec_set_mac_address(ndev, NULL);
 
-- 
GitLab


From e5cc361e21648b75f935f9571d4003aaee480214 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 May 2021 13:11:43 +0300
Subject: [PATCH 0538/3804] octeontx2-pf: fix a buffer overflow in
 otx2_set_rxfh_context()

This function is called from ethtool_set_rxfh() and "*rss_context"
comes from the user.  Add some bounds checking to prevent memory
corruption.

Fixes: 81a4362016e7 ("octeontx2-pf: Add RSS multi group support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
index f4962a97a0757..9d9a2e438acfc 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c
@@ -786,6 +786,10 @@ static int otx2_set_rxfh_context(struct net_device *dev, const u32 *indir,
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
 		return -EOPNOTSUPP;
 
+	if (*rss_context != ETH_RXFH_CONTEXT_ALLOC &&
+	    *rss_context >= MAX_RSS_GROUPS)
+		return -EINVAL;
+
 	rss = &pfvf->hw.rss_info;
 
 	if (!rss->enable) {
-- 
GitLab


From 9c1bb37f8cad5e2ee1933fa1da9a6baa7876a8e4 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 12 May 2021 13:15:29 +0200
Subject: [PATCH 0539/3804] ptp: ocp: Fix a resource leak in an error handling
 path

If an error occurs after a successful 'pci_ioremap_bar()' call, it must be
undone by a corresponding 'pci_iounmap()' call, as already done in the
remove function.

Fixes: a7e1abad13f3 ("ptp: Add clock driver for the OpenCompute TimeCard.")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_ocp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index 530e5f90095e6..0d1034e3ed0f2 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -324,7 +324,7 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (!bp->base) {
 		dev_err(&pdev->dev, "io_remap bar0\n");
 		err = -ENOMEM;
-		goto out;
+		goto out_release_regions;
 	}
 	bp->reg = bp->base + OCP_REGISTER_OFFSET;
 	bp->tod = bp->base + TOD_REGISTER_OFFSET;
@@ -347,6 +347,8 @@ ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return 0;
 
 out:
+	pci_iounmap(pdev, bp->base);
+out_release_regions:
 	pci_release_regions(pdev);
 out_disable:
 	pci_disable_device(pdev);
-- 
GitLab


From ca14f9597f4fdb3679453aec7bb2807f0b8b7363 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 12 May 2021 10:00:46 -0400
Subject: [PATCH 0540/3804] =?UTF-8?q?MAINTAINERS:=20nfc:=20drop=20Cl=C3=A9?=
 =?UTF-8?q?ment=20Perrochaud=20from=20NXP-NCI?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Emails to Clément Perrochaud bounce with permanent error "user does not
exist", so remove Clément Perrochaud from NXP-NCI driver maintainers
entry.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..ec723b48769e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13205,7 +13205,6 @@ F:	Documentation/devicetree/bindings/sound/tfa9879.txt
 F:	sound/soc/codecs/tfa9879*
 
 NXP-NCI NFC DRIVER
-M:	Clément Perrochaud <clement.perrochaud@effinnov.com>
 R:	Charles Gorand <charles.gorand@effinnov.com>
 L:	linux-nfc@lists.01.org (moderated for non-subscribers)
 S:	Supported
-- 
GitLab


From 8aa5713d8b2ce1ea67bdf212eb61bfcff3c52202 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 12 May 2021 10:43:18 -0400
Subject: [PATCH 0541/3804] MAINTAINERS: nfc: add Krzysztof Kozlowski as
 maintainer

The NFC subsystem is orphaned.  I am happy to spend some cycles to
review the patches, send pull requests and in general keep the NFC
subsystem running.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ec723b48769e0..7020293a1347d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12896,8 +12896,9 @@ F:	include/uapi/linux/nexthop.h
 F:	net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
+M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
 L:	netdev@vger.kernel.org
-S:	Orphan
+S:	Maintained
 F:	Documentation/devicetree/bindings/net/nfc/
 F:	drivers/nfc/
 F:	include/linux/platform_data/nfcmrvl.h
-- 
GitLab


From 4a64541f2cebef54ea8d9f53ac5067328b8e02d8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 12 May 2021 10:43:19 -0400
Subject: [PATCH 0542/3804] MAINTAINERS: nfc: include linux-nfc mailing list

Keep all NFC related patches in existing linux-nfc@lists.01.org mailing
list.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 7020293a1347d..1d834bebf469c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12897,6 +12897,7 @@ F:	net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
 M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
+L:	linux-nfc@lists.01.org (moderated for non-subscribers)
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/nfc/
-- 
GitLab


From 832ce924b1a14e139e184a6da9f5a69a5e47b256 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 May 2021 13:02:48 +0300
Subject: [PATCH 0543/3804] chelsio/chtls: unlock on error in
 chtls_pt_recvmsg()

This error path needs to release some memory and call release_sock(sk);
before returning.

Fixes: 6919a8264a32 ("Crypto/chtls: add/delete TLS header in driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
index 188d871f6b8cd..c320cc8ca68d6 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c
@@ -1564,8 +1564,10 @@ found_ok_skb:
 			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
 					sizeof(thdr->type), &thdr->type);
 
-			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA)
-				return -EIO;
+			if (cerr && thdr->type != TLS_RECORD_TYPE_DATA) {
+				copied = -EIO;
+				break;
+			}
 			/*  don't send tls header, skip copy */
 			goto skip_copy;
 		}
-- 
GitLab


From 4792f9dd12936ec35deced665ae3a4ca8fe98729 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Wed, 12 May 2021 23:32:39 +0200
Subject: [PATCH 0544/3804] clang-format: Update with the latest for_each macro
 list

Re-run the shell fragment that generated the original list.

Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 .clang-format | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/.clang-format b/.clang-format
index c24b147cac018..15d4eaabc6b53 100644
--- a/.clang-format
+++ b/.clang-format
@@ -109,8 +109,8 @@ ForEachMacros:
   - 'css_for_each_child'
   - 'css_for_each_descendant_post'
   - 'css_for_each_descendant_pre'
-  - 'cxl_for_each_cmd'
   - 'device_for_each_child_node'
+  - 'displayid_iter_for_each'
   - 'dma_fence_chain_for_each'
   - 'do_for_each_ftrace_op'
   - 'drm_atomic_crtc_for_each_plane'
@@ -136,6 +136,7 @@ ForEachMacros:
   - 'drm_mm_for_each_node_in_range'
   - 'drm_mm_for_each_node_safe'
   - 'flow_action_for_each'
+  - 'for_each_acpi_dev_match'
   - 'for_each_active_dev_scope'
   - 'for_each_active_drhd_unit'
   - 'for_each_active_iommu'
@@ -171,7 +172,6 @@ ForEachMacros:
   - 'for_each_dapm_widgets'
   - 'for_each_dev_addr'
   - 'for_each_dev_scope'
-  - 'for_each_displayid_db'
   - 'for_each_dma_cap_mask'
   - 'for_each_dpcm_be'
   - 'for_each_dpcm_be_rollback'
@@ -179,6 +179,7 @@ ForEachMacros:
   - 'for_each_dpcm_fe'
   - 'for_each_drhd_unit'
   - 'for_each_dss_dev'
+  - 'for_each_dtpm_table'
   - 'for_each_efi_memory_desc'
   - 'for_each_efi_memory_desc_in_map'
   - 'for_each_element'
@@ -215,6 +216,7 @@ ForEachMacros:
   - 'for_each_migratetype_order'
   - 'for_each_msi_entry'
   - 'for_each_msi_entry_safe'
+  - 'for_each_msi_vector'
   - 'for_each_net'
   - 'for_each_net_continue_reverse'
   - 'for_each_netdev'
@@ -270,6 +272,12 @@ ForEachMacros:
   - 'for_each_prime_number_from'
   - 'for_each_process'
   - 'for_each_process_thread'
+  - 'for_each_prop_codec_conf'
+  - 'for_each_prop_dai_codec'
+  - 'for_each_prop_dai_cpu'
+  - 'for_each_prop_dlc_codecs'
+  - 'for_each_prop_dlc_cpus'
+  - 'for_each_prop_dlc_platforms'
   - 'for_each_property_of_node'
   - 'for_each_registered_fb'
   - 'for_each_requested_gpio'
@@ -430,6 +438,7 @@ ForEachMacros:
   - 'queue_for_each_hw_ctx'
   - 'radix_tree_for_each_slot'
   - 'radix_tree_for_each_tagged'
+  - 'rb_for_each'
   - 'rbtree_postorder_for_each_entry_safe'
   - 'rdma_for_each_block'
   - 'rdma_for_each_port'
-- 
GitLab


From 3b5169c2eb81e822445469a077223f8eb0729a59 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 12 May 2021 15:48:09 -0700
Subject: [PATCH 0545/3804] hwmon: (adm9240) Fix writes into inX_max attributes

When converting the driver to use the devm_hwmon_device_register_with_info
API, the wrong register was selected when writing into inX_max attributes.
Fix it.

Fixes: 124b7e34a5a6 ("hwmon: (adm9240) Convert to devm_hwmon_device_register_with_info API")
Reported-by: Chris Packham <Chris.Packham@alliedtelesis.co.nz>
Tested-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adm9240.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c
index 5677263bcf0de..483cd757abd33 100644
--- a/drivers/hwmon/adm9240.c
+++ b/drivers/hwmon/adm9240.c
@@ -485,7 +485,7 @@ static int adm9240_in_write(struct device *dev, u32 attr, int channel, long val)
 		reg = ADM9240_REG_IN_MIN(channel);
 		break;
 	case hwmon_in_max:
-		reg = ADM9240_REG_IN(channel);
+		reg = ADM9240_REG_IN_MAX(channel);
 		break;
 	default:
 		return -EOPNOTSUPP;
-- 
GitLab


From 9e9da02a68d4b7feaa10022fd1135d9b3f2f72d7 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 11 May 2021 16:16:33 +0300
Subject: [PATCH 0546/3804] percpu_ref: Don't opencode percpu_ref_is_dying

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 lib/percpu-refcount.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index a1071cdefb5aa..af9302141bcf6 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -275,7 +275,7 @@ static void __percpu_ref_switch_mode(struct percpu_ref *ref,
 	wait_event_lock_irq(percpu_ref_switch_waitq, !data->confirm_switch,
 			    percpu_ref_switch_lock);
 
-	if (data->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
+	if (data->force_atomic || percpu_ref_is_dying(ref))
 		__percpu_ref_switch_to_atomic(ref, confirm_switch);
 	else
 		__percpu_ref_switch_to_percpu(ref);
@@ -385,7 +385,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+	WARN_ONCE(percpu_ref_is_dying(ref),
 		  "%s called more than once on %ps!", __func__,
 		  ref->data->release);
 
@@ -465,7 +465,7 @@ void percpu_ref_resurrect(struct percpu_ref *ref)
 
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
-	WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD));
+	WARN_ON_ONCE(!percpu_ref_is_dying(ref));
 	WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count));
 
 	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
-- 
GitLab


From 0e4a4a08cd78efcaddbc2e4c5ed86b5a5cb8a15e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Vok=C3=A1=C4=8D?= <michal.vokac@ysoft.com>
Date: Tue, 13 Apr 2021 16:45:57 +0200
Subject: [PATCH 0547/3804] ARM: dts: imx6dl-yapp4: Fix RGMII connection to
 QCA8334 switch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The FEC does not have a PHY so it should not have a phy-handle. It is
connected to the switch at RGMII level so we need a fixed-link sub-node
on both ends.

This was not a problem until the qca8k.c driver was converted to PHYLINK
by commit b3591c2a3661 ("net: dsa: qca8k: Switch to PHYLINK instead of
PHYLIB"). That commit revealed the FEC configuration was not correct.

Fixes: 87489ec3a77f ("ARM: dts: imx: Add Y Soft IOTA Draco, Hydra and Ursa boards")
Cc: stable@vger.kernel.org
Signed-off-by: Michal Vokáč <michal.vokac@ysoft.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
index 7d2c72562c735..9148a01ed6d9f 100644
--- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
+++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi
@@ -105,9 +105,13 @@
 	phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;
 	phy-reset-duration = <20>;
 	phy-supply = <&sw2_reg>;
-	phy-handle = <&ethphy0>;
 	status = "okay";
 
+	fixed-link {
+		speed = <1000>;
+		full-duplex;
+	};
+
 	mdio {
 		#address-cells = <1>;
 		#size-cells = <0>;
-- 
GitLab


From c547addba7096debac4f99cdfe869a32a81081e2 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 11 May 2021 16:17:37 +0300
Subject: [PATCH 0548/3804] MAINTAINERS: Add lib/percpu* as part of percpu
 entry

Without this patch get_maintainers.pl on a patch which modified
lib/percpu_refcount.c produces:

Jens Axboe <axboe@kernel.dk> (commit_signer:2/5=40%)
Ming Lei <ming.lei@redhat.com> (commit_signer:2/5=40%,authored:2/5=40%,added_lines:99/114=87%,removed_lines:34/43=79%)
"Paul E. McKenney" <paulmck@kernel.org> (commit_signer:1/5=20%,authored:1/5=20%,added_lines:9/114=8%,removed_lines:3/43=7%)
Tejun Heo <tj@kernel.org> (commit_signer:1/5=20%)
Andrew Morton <akpm@linux-foundation.org> (commit_signer:1/5=20%)
Nikolay Borisov <nborisov@suse.com> (authored:1/5=20%,removed_lines:3/43=7%)
Joe Perches <joe@perches.com> (authored:1/5=20%,removed_lines:3/43=7%)
linux-kernel@vger.kernel.org (open list)

Whereas with the patch applied it now (properly) prints:

Dennis Zhou <dennis@kernel.org> (maintainer:PER-CPU MEMORY ALLOCATOR)
Tejun Heo <tj@kernel.org> (maintainer:PER-CPU MEMORY ALLOCATOR)
Christoph Lameter <cl@linux.com> (maintainer:PER-CPU MEMORY ALLOCATOR)
linux-kernel@vger.kernel.org (open list)

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
[Dennis: updated list to linux-mm@kvack.org]
Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..9599e313d7f76 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14317,10 +14317,12 @@ PER-CPU MEMORY ALLOCATOR
 M:	Dennis Zhou <dennis@kernel.org>
 M:	Tejun Heo <tj@kernel.org>
 M:	Christoph Lameter <cl@linux.com>
+L:	linux-mm@kvack.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git
 F:	arch/*/include/asm/percpu.h
 F:	include/linux/percpu*.h
+F:	lib/percpu*.c
 F:	mm/percpu*.c
 
 PER-TASK DELAY ACCOUNTING
-- 
GitLab


From da096fbccd52803db3edd9dd0c5ae4079d31c456 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 4 May 2021 13:59:09 +0100
Subject: [PATCH 0549/3804] soundwire: qcom: fix handling of
 qcom,ports-block-pack-mode

Support to "qcom,ports-block-pack-mode" was added at later stages
to support a variant of Qualcomm SoundWire controllers available
on Apps processor. However the older versions of the SoundWire
controller which are embedded in WCD Codecs do not need this property.

So returning on error for those cases will break boards like DragonBoard
DB845c and Lenovo Yoga C630.

This patch fixes error handling on this property considering older usecases.

Fixes: a5943e4fb14e ("soundwire: qcom: check of_property_read status")
Reported-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Amit Pundir <amit.pundir@linaro.org>
Link: https://lore.kernel.org/r/20210504125909.16108-1-srinivas.kandagatla@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/qcom.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
index 2827085a323b8..0ef79d60e88e6 100644
--- a/drivers/soundwire/qcom.c
+++ b/drivers/soundwire/qcom.c
@@ -1150,8 +1150,16 @@ static int qcom_swrm_get_port_config(struct qcom_swrm_ctrl *ctrl)
 
 	ret = of_property_read_u8_array(np, "qcom,ports-block-pack-mode",
 					bp_mode, nports);
-	if (ret)
-		return ret;
+	if (ret) {
+		u32 version;
+
+		ctrl->reg_read(ctrl, SWRM_COMP_HW_VERSION, &version);
+
+		if (version <= 0x01030000)
+			memset(bp_mode, SWR_INVALID_PARAM, QCOM_SDW_MAX_PORTS);
+		else
+			return ret;
+	}
 
 	memset(hstart, SWR_INVALID_PARAM, QCOM_SDW_MAX_PORTS);
 	of_property_read_u8_array(np, "qcom,ports-hstart", hstart, nports);
-- 
GitLab


From 8967b27a6c1c19251989c7ab33c058d16e4a5f53 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Mon, 26 Apr 2021 12:23:21 +0200
Subject: [PATCH 0550/3804] ARM: dts: imx6q-dhcom: Add PU,VDD1P1,VDD2P5
 regulators

Per schematic, both PU and SOC regulator are supplied from LTC3676 SW1
via VDDSOC_IN rail, add the PU input. Both VDD1P1, VDD2P5 are supplied
from LTC3676 SW2 via VDDHIGH_IN rail, add both inputs.

While no instability or problems are currently observed, the regulators
should be fully described in DT and that description should fully match
the hardware, else this might lead to unforseen issues later. Fix this.

Fixes: 52c7a088badd ("ARM: dts: imx6q: Add support for the DHCOM iMX6 SoM and PDK2")
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Christoph Niedermaier <cniedermaier@dh-electronics.com>
Cc: Fabio Estevam <festevam@gmail.com>
Cc: Ludwig Zenz <lzenz@dh-electronics.com>
Cc: NXP Linux Team <linux-imx@nxp.com>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: stable@vger.kernel.org
Reviewed-by: Christoph Niedermaier <cniedermaier@dh-electronics.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6q-dhcom-som.dtsi | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi
index 236fc205c3890..d0768ae429faa 100644
--- a/arch/arm/boot/dts/imx6q-dhcom-som.dtsi
+++ b/arch/arm/boot/dts/imx6q-dhcom-som.dtsi
@@ -406,6 +406,18 @@
 	vin-supply = <&sw1_reg>;
 };
 
+&reg_pu {
+	vin-supply = <&sw1_reg>;
+};
+
+&reg_vdd1p1 {
+	vin-supply = <&sw2_reg>;
+};
+
+&reg_vdd2p5 {
+	vin-supply = <&sw2_reg>;
+};
+
 &uart1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_uart1>;
-- 
GitLab


From 0852b6ca941ef3ff75076e85738877bd3271e1cd Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang@kernel.org>
Date: Mon, 10 May 2021 14:47:15 +0800
Subject: [PATCH 0551/3804] erofs: fix 1 lcluster-sized pcluster for big
 pcluster

If the 1st NONHEAD lcluster of a pcluster isn't CBLKCNT lcluster type
rather than a HEAD or PLAIN type instead, which means its pclustersize
_must_ be 1 lcluster (since its uncompressed size < 2 lclusters),
as illustrated below:

       HEAD     HEAD / PLAIN    lcluster type
   ____________ ____________
  |_:__________|_________:__|   file data (uncompressed)
   .                .
  .____________.
  |____________|                pcluster data (compressed)

Such on-disk case was explained before [1] but missed to be handled
properly in the runtime implementation.

It can be observed if manually generating 1 lcluster-sized pcluster
with 2 lclusters (thus CBLKCNT doesn't exist.) Let's fix it now.

[1] https://lore.kernel.org/r/20210407043927.10623-1-xiang@kernel.org

Link: https://lore.kernel.org/r/20210510064715.29123-1-xiang@kernel.org
Fixes: cec6e93beadf ("erofs: support parsing big pcluster compress indexes")
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 fs/erofs/zmap.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index e62d813756f28..efaf32596b97f 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -450,14 +450,31 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
 	lcn = m->lcn + 1;
 	if (m->compressedlcs)
 		goto out;
-	if (lcn == initial_lcn)
-		goto err_bonus_cblkcnt;
 
 	err = z_erofs_load_cluster_from_disk(m, lcn);
 	if (err)
 		return err;
 
+	/*
+	 * If the 1st NONHEAD lcluster has already been handled initially w/o
+	 * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+	 * an internal implemenatation error is detected.
+	 *
+	 * The following code can also handle it properly anyway, but let's
+	 * BUG_ON in the debugging mode only for developers to notice that.
+	 */
+	DBG_BUGON(lcn == initial_lcn &&
+		  m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
+
 	switch (m->type) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		/*
+		 * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
+		 * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
+		 */
+		m->compressedlcs = 1;
+		break;
 	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
 		if (m->delta[0] != 1)
 			goto err_bonus_cblkcnt;
-- 
GitLab


From 3743d55b289c203d8f77b7cd47c24926b9d186ae Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Sun, 25 Apr 2021 15:34:51 +0800
Subject: [PATCH 0552/3804] x86, sched: Fix the AMD CPPC maximum performance
 value on certain AMD Ryzen generations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some AMD Ryzen generations has different calculation method on maximum
performance. 255 is not for all ASICs, some specific generations should use 166
as the maximum performance. Otherwise, it will report incorrect frequency value
like below:

  ~ → lscpu | grep MHz
  CPU MHz:                         3400.000
  CPU max MHz:                     7228.3198
  CPU min MHz:                     2200.0000

[ mingo: Tidied up whitespace use. ]
[ Alexander Monakov <amonakov@ispras.ru>: fix 225 -> 255 typo. ]

Fixes: 41ea667227ba ("x86, sched: Calculate frequency invariance for AMD systems")
Fixes: 3c55e94c0ade ("cpufreq: ACPI: Extend frequency tables to cover boost frequencies")
Reported-by: Jason Bagavatsingham <jason.bagavatsingham@gmail.com>
Fixed-by: Alexander Monakov <amonakov@ispras.ru>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Jason Bagavatsingham <jason.bagavatsingham@gmail.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210425073451.2557394-1-ray.huang@amd.com
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=211791
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/processor.h |  2 ++
 arch/x86/kernel/cpu/amd.c        | 16 ++++++++++++++++
 arch/x86/kernel/smpboot.c        |  2 +-
 drivers/cpufreq/acpi-cpufreq.c   |  6 +++++-
 4 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 154321d29050f..556b2b17c3e2f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -787,8 +787,10 @@ DECLARE_PER_CPU(u64, msr_misc_features_shadow);
 
 #ifdef CONFIG_CPU_SUP_AMD
 extern u32 amd_get_nodes_per_socket(void);
+extern u32 amd_get_highest_perf(void);
 #else
 static inline u32 amd_get_nodes_per_socket(void)	{ return 0; }
+static inline u32 amd_get_highest_perf(void)		{ return 0; }
 #endif
 
 static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2d11384dc9ab4..6d7b3b3ea80b1 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1165,3 +1165,19 @@ void set_dr_addr_mask(unsigned long mask, int dr)
 		break;
 	}
 }
+
+u32 amd_get_highest_perf(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
+			       (c->x86_model >= 0x70 && c->x86_model < 0x80)))
+		return 166;
+
+	if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
+			       (c->x86_model >= 0x40 && c->x86_model < 0x70)))
+		return 166;
+
+	return 255;
+}
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0ad5214f598a9..7770245cc7fa7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2043,7 +2043,7 @@ static bool amd_set_max_freq_ratio(void)
 		return false;
 	}
 
-	highest_perf = perf_caps.highest_perf;
+	highest_perf = amd_get_highest_perf();
 	nominal_perf = perf_caps.nominal_perf;
 
 	if (!highest_perf || !nominal_perf) {
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index d1bbc16fba4b4..7e7450453714d 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -646,7 +646,11 @@ static u64 get_max_boost_ratio(unsigned int cpu)
 		return 0;
 	}
 
-	highest_perf = perf_caps.highest_perf;
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		highest_perf = amd_get_highest_perf();
+	else
+		highest_perf = perf_caps.highest_perf;
+
 	nominal_perf = perf_caps.nominal_perf;
 
 	if (!highest_perf || !nominal_perf) {
-- 
GitLab


From 915a2bc3c6b71e9802b89c5c981b2d5367e1ae3f Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Mon, 19 Apr 2021 00:26:59 -0400
Subject: [PATCH 0553/3804] sched/isolation: Reconcile rcu_nocbs= and
 nohz_full=

We have a mismatch between RCU and isolation -- in relation to what is
considered the maximum valid CPU number.

This matters because nohz_full= and rcu_nocbs= are joined at the hip; in
fact the former will enforce the latter.  So we don't want a CPU mask to
be valid for one and denied for the other.

The difference 1st appeared as of v4.15; further details are below.

As it is confusing to anyone who isn't looking at the code regularly, a
reminder is in order; three values exist here:

  CONFIG_NR_CPUS  - compiled in maximum cap on number of CPUs supported.
  nr_cpu_ids      - possible # of CPUs (typically reflects what ACPI says)
  cpus_present    - actual number of present/detected/installed CPUs.

For this example, I'll refer to NR_CPUS=64 from "make defconfig" and
nr_cpu_ids=6 for ACPI reporting on a board that could run a six core,
and present=4 for a quad that is physically in the socket.  From dmesg:

 smpboot: Allowing 6 CPUs, 2 hotplug CPUs
 setup_percpu: NR_CPUS:64 nr_cpumask_bits:64 nr_cpu_ids:6 nr_node_ids:1
 rcu: 	RCU restricting CPUs from NR_CPUS=64 to nr_cpu_ids=6.
 smp: Brought up 1 node, 4 CPUs

And from userspace, see:

   paul@trash:/sys/devices/system/cpu$ cat present
   0-3
   paul@trash:/sys/devices/system/cpu$ cat possible
   0-5
   paul@trash:/sys/devices/system/cpu$ cat kernel_max
   63

Everything is fine if we boot 5x5 for rcu/nohz:

  Command line: BOOT_IMAGE=/boot/bzImage nohz_full=2-5 rcu_nocbs=2-5 root=/dev/sda1 ro
  NO_HZ: Full dynticks CPUs: 2-5.
  rcu: 	Offload RCU callbacks from CPUs: 2-5.

..even though there is no CPU 4 or 5.  Both RCU and nohz_full are OK.
Now we push that > 6 but less than NR_CPU and with 15x15 we get:

  Command line: BOOT_IMAGE=/boot/bzImage rcu_nocbs=2-15 nohz_full=2-15 root=/dev/sda1 ro
  rcu: 	Note: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.
  rcu: 	Offload RCU callbacks from CPUs: 2-5.

These are both functionally equivalent, as we are only changing flags on
phantom CPUs that don't exist, but note the kernel interpretation changes.
And worse, it only changes for one of the two - which is the problem.

RCU doesn't care if you want to restrict the flags on phantom CPUs but
clearly nohz_full does after this change from v4.15.

 edb9382175c3: ("sched/isolation: Move isolcpus= handling to the housekeeping code")

 -       if (cpulist_parse(str, non_housekeeping_mask) < 0) {
 -               pr_warn("Housekeeping: Incorrect nohz_full cpumask\n");
 +       err = cpulist_parse(str, non_housekeeping_mask);
 +       if (err < 0 || cpumask_last(non_housekeeping_mask) >= nr_cpu_ids) {
 +               pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");

To be clear, the sanity check on "possible" (nr_cpu_ids) is new here.

The goal was reasonable ; not wanting housekeeping to land on a
not-possible CPU, but note two things:

  1) this is an exclusion list, not an inclusion list; we are tracking
     non_housekeeping CPUs; not ones who are explicitly assigned housekeeping

  2) we went one further in 9219565aa890 ("sched/isolation: Require a present CPU in housekeeping mask")
     - ensuring that housekeeping was sanity checking against present and not just possible CPUs.

To be clear, this means the check added in v4.15 is doubly redundant.
And more importantly, overly strict/restrictive.

We care now, because the bitmap boot arg parsing now knows that a value
of "N" is NR_CPUS; the size of the bitmap, but the bitmap code doesn't
know anything about the subtleties of our max/possible/present CPU
specifics as outlined above.

So drop the check added in v4.15 (edb9382175c3) and make RCU and
nohz_full both in alignment again on NR_CPUS so "N" works for both,
and then they can fall back to nr_cpu_ids internally just as before.

  Command line: BOOT_IMAGE=/boot/bzImage nohz_full=2-N rcu_nocbs=2-N root=/dev/sda1 ro
  NO_HZ: Full dynticks CPUs: 2-5.
  rcu: 	Offload RCU callbacks from CPUs: 2-5.

As shown above, with this change, RCU and nohz_full are in sync, even
with the use of the "N" placeholder.  Same result is achieved with "15".

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20210419042659.1134916-1-paul.gortmaker@windriver.com
---
 kernel/sched/isolation.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c
index 5a6ea03f9882d..7f06eaf128188 100644
--- a/kernel/sched/isolation.c
+++ b/kernel/sched/isolation.c
@@ -81,11 +81,9 @@ static int __init housekeeping_setup(char *str, enum hk_flags flags)
 {
 	cpumask_var_t non_housekeeping_mask;
 	cpumask_var_t tmp;
-	int err;
 
 	alloc_bootmem_cpumask_var(&non_housekeeping_mask);
-	err = cpulist_parse(str, non_housekeeping_mask);
-	if (err < 0 || cpumask_last(non_housekeeping_mask) >= nr_cpu_ids) {
+	if (cpulist_parse(str, non_housekeeping_mask) < 0) {
 		pr_warn("Housekeeping: nohz_full= or isolcpus= incorrect CPU range\n");
 		free_bootmem_cpumask_var(non_housekeeping_mask);
 		return 0;
-- 
GitLab


From f105dfec0a951cd0d5bfbfe9dc067ea69f71ad5c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 13 May 2021 01:29:15 +0200
Subject: [PATCH 0554/3804] tick/nohz: Evaluate the CPU expression after the
 static key

When tick_nohz_full_cpu() is called with smp_processor_id(), the latter
is unconditionally evaluated whether the static key is on or off. It is
not necessary in the off-case though, so make sure the cpu expression
is executed at the last moment.

Illustrate with the following test function:

	int tick_nohz_test(void)
	{
		return tick_nohz_full_cpu(smp_processor_id());
	}

The resulting code before was:

	mov    %gs:0x7eea92d1(%rip),%eax   # smp_processor_id() fetch
	nopl   0x0(%rax,%rax,1)
	xor    %eax,%eax
	retq
	cmpb   $0x0,0x29d393a(%rip)        # <tick_nohz_full_running>
	je     tick_nohz_test+0x29         # jump to below eax clear
	mov    %eax,%eax
	bt     %rax,0x29d3936(%rip)        # <tick_nohz_full_mask>
	setb   %al
	movzbl %al,%eax
	retq
	xor    %eax,%eax
	retq

Now it becomes:

	nopl   0x0(%rax,%rax,1)
	xor    %eax,%eax
	retq
	cmpb   $0x0,0x29d3871(%rip)        # <tick_nohz_full_running>
	je     tick_nohz_test+0x29         # jump to below eax clear
	mov    %gs:0x7eea91f0(%rip),%eax   # smp_processor_id() fetch, after static key
	mov    %eax,%eax
	bt     %rax,0x29d3866(%rip)        # <tick_nohz_full_mask>
	setb   %al
	movzbl %al,%eax
	retq
	xor    %eax,%eax
	retq

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210512232924.150322-2-frederic@kernel.org
---
 include/linux/tick.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7340613c7eff7..2258984a0e8a7 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -185,13 +185,17 @@ static inline bool tick_nohz_full_enabled(void)
 	return tick_nohz_full_running;
 }
 
-static inline bool tick_nohz_full_cpu(int cpu)
-{
-	if (!tick_nohz_full_enabled())
-		return false;
-
-	return cpumask_test_cpu(cpu, tick_nohz_full_mask);
-}
+/*
+ * Check if a CPU is part of the nohz_full subset. Arrange for evaluating
+ * the cpu expression (typically smp_processor_id()) _after_ the static
+ * key.
+ */
+#define tick_nohz_full_cpu(_cpu) ({					\
+	bool __ret = false;						\
+	if (tick_nohz_full_enabled())					\
+		__ret = cpumask_test_cpu((_cpu), tick_nohz_full_mask);	\
+	__ret;								\
+})
 
 static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
 {
-- 
GitLab


From a5183862e76fdc25f36b39c2489b816a5c66e2e5 Mon Sep 17 00:00:00 2001
From: Yunfeng Ye <yeyunfeng@huawei.com>
Date: Thu, 13 May 2021 01:29:16 +0200
Subject: [PATCH 0555/3804] tick/nohz: Conditionally restart tick on idle exit

In nohz_full mode, switching from idle to a task will unconditionally
issue a tick restart. If the task is alone in the runqueue or is the
highest priority, the tick will fire once then eventually stop. But that
alone is still undesired noise.

Therefore, only restart the tick on idle exit when it's strictly
necessary.

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512232924.150322-3-frederic@kernel.org
---
 kernel/time/tick-sched.c | 42 ++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 828b091501ca4..05c1ce1034d6c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -926,22 +926,28 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 	tick_nohz_restart(ts, now);
 }
 
-static void tick_nohz_full_update_tick(struct tick_sched *ts)
+static void __tick_nohz_full_update_tick(struct tick_sched *ts,
+					 ktime_t now)
 {
 #ifdef CONFIG_NO_HZ_FULL
 	int cpu = smp_processor_id();
 
-	if (!tick_nohz_full_cpu(cpu))
+	if (can_stop_full_tick(cpu, ts))
+		tick_nohz_stop_sched_tick(ts, cpu);
+	else if (ts->tick_stopped)
+		tick_nohz_restart_sched_tick(ts, now);
+#endif
+}
+
+static void tick_nohz_full_update_tick(struct tick_sched *ts)
+{
+	if (!tick_nohz_full_cpu(smp_processor_id()))
 		return;
 
 	if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
 		return;
 
-	if (can_stop_full_tick(cpu, ts))
-		tick_nohz_stop_sched_tick(ts, cpu);
-	else if (ts->tick_stopped)
-		tick_nohz_restart_sched_tick(ts, ktime_get());
-#endif
+	__tick_nohz_full_update_tick(ts, ktime_get());
 }
 
 static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
@@ -1209,18 +1215,24 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 #endif
 }
 
-static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
+void tick_nohz_idle_restart_tick(void)
 {
-	tick_nohz_restart_sched_tick(ts, now);
-	tick_nohz_account_idle_ticks(ts);
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+
+	if (ts->tick_stopped) {
+		tick_nohz_restart_sched_tick(ts, ktime_get());
+		tick_nohz_account_idle_ticks(ts);
+	}
 }
 
-void tick_nohz_idle_restart_tick(void)
+static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
 {
-	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+	if (tick_nohz_full_cpu(smp_processor_id()))
+		__tick_nohz_full_update_tick(ts, now);
+	else
+		tick_nohz_restart_sched_tick(ts, now);
 
-	if (ts->tick_stopped)
-		__tick_nohz_idle_restart_tick(ts, ktime_get());
+	tick_nohz_account_idle_ticks(ts);
 }
 
 /**
@@ -1252,7 +1264,7 @@ void tick_nohz_idle_exit(void)
 		tick_nohz_stop_idle(ts, now);
 
 	if (tick_stopped)
-		__tick_nohz_idle_restart_tick(ts, now);
+		tick_nohz_idle_update_tick(ts, now);
 
 	local_irq_enable();
 }
-- 
GitLab


From 3f624314b3f7c580aa5844a8930befd71e2a287c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 13 May 2021 01:29:17 +0200
Subject: [PATCH 0556/3804] tick/nohz: Remove superflous check for
 CONFIG_VIRT_CPU_ACCOUNTING_NATIVE

The vtime_accounting_enabled_this_cpu() early check already makes what
follows as dead code in the case of CONFIG_VIRT_CPU_ACCOUNTING_NATIVE.
No need to keep the ifdeferry around.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512232924.150322-4-frederic@kernel.org
---
 kernel/time/tick-sched.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 05c1ce1034d6c..1afa7595d1e00 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1196,7 +1196,6 @@ unsigned long tick_nohz_get_idle_calls(void)
 
 static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 {
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 	unsigned long ticks;
 
 	if (vtime_accounting_enabled_this_cpu())
@@ -1212,7 +1211,6 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
 	 */
 	if (ticks && ticks < LONG_MAX)
 		account_idle_ticks(ticks);
-#endif
 }
 
 void tick_nohz_idle_restart_tick(void)
-- 
GitLab


From 96c9b90396f9ab6caf13b4ebf00095818ac53b7f Mon Sep 17 00:00:00 2001
From: Yunfeng Ye <yeyunfeng@huawei.com>
Date: Thu, 13 May 2021 01:29:18 +0200
Subject: [PATCH 0557/3804] tick/nohz: Update idle_exittime on actual idle exit

The idle_exittime field of tick_sched is used to record the time when
the idle state was left. but currently the idle_exittime is updated in
the function tick_nohz_restart_sched_tick(), which is not always in idle
state when nohz_full is configured:

  tick_irq_exit
    tick_nohz_irq_exit
      tick_nohz_full_update_tick
        tick_nohz_restart_sched_tick
          ts->idle_exittime = now;

It's thus overwritten by mistake on nohz_full tick restart. Move the
update to the appropriate idle exit path instead.

Signed-off-by: Yunfeng Ye <yeyunfeng@huawei.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512232924.150322-5-frederic@kernel.org
---
 kernel/time/tick-sched.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 1afa7595d1e00..89ec0abcd62b3 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -921,8 +921,6 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 	 * Cancel the scheduled timer and restore the tick
 	 */
 	ts->tick_stopped  = 0;
-	ts->idle_exittime = now;
-
 	tick_nohz_restart(ts, now);
 }
 
@@ -1194,10 +1192,13 @@ unsigned long tick_nohz_get_idle_calls(void)
 	return ts->idle_calls;
 }
 
-static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
+static void tick_nohz_account_idle_time(struct tick_sched *ts,
+					ktime_t now)
 {
 	unsigned long ticks;
 
+	ts->idle_exittime = now;
+
 	if (vtime_accounting_enabled_this_cpu())
 		return;
 	/*
@@ -1218,8 +1219,9 @@ void tick_nohz_idle_restart_tick(void)
 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
 
 	if (ts->tick_stopped) {
-		tick_nohz_restart_sched_tick(ts, ktime_get());
-		tick_nohz_account_idle_ticks(ts);
+		ktime_t now = ktime_get();
+		tick_nohz_restart_sched_tick(ts, now);
+		tick_nohz_account_idle_time(ts, now);
 	}
 }
 
@@ -1230,7 +1232,7 @@ static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
 	else
 		tick_nohz_restart_sched_tick(ts, now);
 
-	tick_nohz_account_idle_ticks(ts);
+	tick_nohz_account_idle_time(ts, now);
 }
 
 /**
-- 
GitLab


From 176b8906c399a170886ea4bad5b24763c6713d61 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 13 May 2021 01:29:19 +0200
Subject: [PATCH 0558/3804] tick/nohz: Update nohz_full Kconfig help

CONFIG_NO_HZ_FULL behaves just like CONFIG_NO_HZ_IDLE by default.
Reassure distros about it.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512232924.150322-6-frederic@kernel.org
---
 kernel/time/Kconfig | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 83e158d016bad..7df71ef0e1fd9 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -117,13 +117,14 @@ config NO_HZ_FULL
 	 the task mostly runs in userspace and has few kernel activity.
 
 	 You need to fill up the nohz_full boot parameter with the
-	 desired range of dynticks CPUs.
+	 desired range of dynticks CPUs to use it. This is implemented at
+	 the expense of some overhead in user <-> kernel transitions:
+	 syscalls, exceptions and interrupts.
 
-	 This is implemented at the expense of some overhead in user <-> kernel
-	 transitions: syscalls, exceptions and interrupts. Even when it's
-	 dynamically off.
+	 By default, without passing the nohz_full parameter, this behaves just
+	 like NO_HZ_IDLE.
 
-	 Say N.
+	 If you're a distro say Y.
 
 endchoice
 
-- 
GitLab


From 29721b859217b946bfc001c1644745ed4d7c26cb Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 13 May 2021 01:29:20 +0200
Subject: [PATCH 0559/3804] tick/nohz: Only wake up a single target cpu when
 kicking a task

When adding a tick dependency to a task, its necessary to
wake up the CPU where the task resides to reevaluate tick
dependencies on that CPU.

However the current code wakes up all nohz_full CPUs, which
is unnecessary.

Switch to waking up a single CPU, by using ordering of writes
to task->cpu and task->tick_dep_mask.

[ mingo: Minor readability edit. ]

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512232924.150322-7-frederic@kernel.org
---
 kernel/time/tick-sched.c | 40 +++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 89ec0abcd62b3..b90ca6635ea4c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -322,6 +322,31 @@ void tick_nohz_full_kick_cpu(int cpu)
 	irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
 }
 
+static void tick_nohz_kick_task(struct task_struct *tsk)
+{
+	int cpu = task_cpu(tsk);
+
+	/*
+	 * If the task concurrently migrates to another CPU,
+	 * we guarantee it sees the new tick dependency upon
+	 * schedule.
+	 *
+	 *
+	 * set_task_cpu(p, cpu);
+	 *   STORE p->cpu = @cpu
+	 * __schedule() (switch to task 'p')
+	 *   LOCK rq->lock
+	 *   smp_mb__after_spin_lock()          STORE p->tick_dep_mask
+	 *   tick_nohz_task_switch()            smp_mb() (atomic_fetch_or())
+	 *      LOAD p->tick_dep_mask           LOAD p->cpu
+	 */
+
+	preempt_disable();
+	if (cpu_online(cpu))
+		tick_nohz_full_kick_cpu(cpu);
+	preempt_enable();
+}
+
 /*
  * Kick all full dynticks CPUs in order to force these to re-evaluate
  * their dependency on the tick and restart it if necessary.
@@ -404,19 +429,8 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
  */
 void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
 {
-	if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
-		if (tsk == current) {
-			preempt_disable();
-			tick_nohz_full_kick();
-			preempt_enable();
-		} else {
-			/*
-			 * Some future tick_nohz_full_kick_task()
-			 * should optimize this.
-			 */
-			tick_nohz_full_kick_all();
-		}
-	}
+	if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
+		tick_nohz_kick_task(tsk);
 }
 EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
 
-- 
GitLab


From 1e4ca26d367ae71743e25068e5cd8750ef3f5f7d Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 13 May 2021 01:29:21 +0200
Subject: [PATCH 0560/3804] tick/nohz: Change signal tick dependency to wake up
 CPUs of member tasks

Rather than waking up all nohz_full CPUs on the system, only wake up
the target CPUs of member threads of the signal.

Reduces interruptions to nohz_full CPUs.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512232924.150322-8-frederic@kernel.org
---
 include/linux/tick.h           |  8 ++++----
 kernel/time/posix-cpu-timers.c |  4 ++--
 kernel/time/tick-sched.c       | 15 +++++++++++++--
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 2258984a0e8a7..0bb80a7f05b95 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -211,7 +211,7 @@ extern void tick_nohz_dep_set_task(struct task_struct *tsk,
 				   enum tick_dep_bits bit);
 extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
 				     enum tick_dep_bits bit);
-extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
+extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
 				     enum tick_dep_bits bit);
 extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
 				       enum tick_dep_bits bit);
@@ -256,11 +256,11 @@ static inline void tick_dep_clear_task(struct task_struct *tsk,
 	if (tick_nohz_full_enabled())
 		tick_nohz_dep_clear_task(tsk, bit);
 }
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
 				       enum tick_dep_bits bit)
 {
 	if (tick_nohz_full_enabled())
-		tick_nohz_dep_set_signal(signal, bit);
+		tick_nohz_dep_set_signal(tsk, bit);
 }
 static inline void tick_dep_clear_signal(struct signal_struct *signal,
 					 enum tick_dep_bits bit)
@@ -288,7 +288,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
 				     enum tick_dep_bits bit) { }
 static inline void tick_dep_clear_task(struct task_struct *tsk,
 				       enum tick_dep_bits bit) { }
-static inline void tick_dep_set_signal(struct signal_struct *signal,
+static inline void tick_dep_set_signal(struct task_struct *tsk,
 				       enum tick_dep_bits bit) { }
 static inline void tick_dep_clear_signal(struct signal_struct *signal,
 					 enum tick_dep_bits bit) { }
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 3bb96a8b49c9b..29a5e54e6e105 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p)
 	if (CPUCLOCK_PERTHREAD(timer->it_clock))
 		tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
 	else
-		tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
+		tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 /*
@@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
 	if (*newval < *nextevt)
 		*nextevt = *newval;
 
-	tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
+	tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
 }
 
 static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index b90ca6635ea4c..acbe6722cf756 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -444,9 +444,20 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
  * Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
  * per process timers.
  */
-void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
+void tick_nohz_dep_set_signal(struct task_struct *tsk,
+			      enum tick_dep_bits bit)
 {
-	tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
+	int prev;
+	struct signal_struct *sig = tsk->signal;
+
+	prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
+	if (!prev) {
+		struct task_struct *t;
+
+		lockdep_assert_held(&tsk->sighand->siglock);
+		__for_each_thread(sig, t)
+			tick_nohz_kick_task(t);
+	}
 }
 
 void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
-- 
GitLab


From a1dfb6311c7739e21e160bc4c5575a1b21b48c87 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 13 May 2021 01:29:22 +0200
Subject: [PATCH 0561/3804] tick/nohz: Kick only _queued_ task whose tick
 dependency is updated

When the tick dependency of a task is updated, we want it to aknowledge
the new state and restart the tick if needed. If the task is not
running, we don't need to kick it because it will observe the new
dependency upon scheduling in. But if the task is running, we may need
to send an IPI to it so that it gets notified.

Unfortunately we don't have the means to check if a task is running
in a race free way. Checking p->on_cpu in a synchronized way against
p->tick_dep_mask would imply adding a full barrier between
prepare_task_switch() and tick_nohz_task_switch(), which we want to
avoid in this fast-path.

Therefore we blindly fire an IPI to the task's CPU.

Meanwhile we can check if the task is queued on the CPU rq because
p->on_rq is always set to TASK_ON_RQ_QUEUED _before_ schedule() and its
full barrier that precedes tick_nohz_task_switch(). And if the task is
queued on a nohz_full CPU, it also has fair chances to be running as the
isolation constraints prescribe running single tasks on full dynticks
CPUs.

So use this as a trick to check if we can spare an IPI toward a
non-running task.

NOTE: For the ordering to be correct, it is assumed that we never
deactivate a task while it is running, the only exception being the task
deactivating itself while scheduling out.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512232924.150322-9-frederic@kernel.org
---
 include/linux/sched.h    |  2 ++
 kernel/sched/core.c      |  5 +++++
 kernel/time/tick-sched.c | 19 +++++++++++++++++--
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2c881384517b..3341ae2e82319 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2011,6 +2011,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
+extern bool sched_task_on_rq(struct task_struct *p);
+
 /*
  * In order to reduce various lock holder preemption latencies provide an
  * interface to see if a vCPU is currently running or not.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5226cc26a095f..78e480f7881a7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1590,6 +1590,11 @@ static inline void uclamp_post_fork(struct task_struct *p) { }
 static inline void init_uclamp(void) { }
 #endif /* CONFIG_UCLAMP_TASK */
 
+bool sched_task_on_rq(struct task_struct *p)
+{
+	return task_on_rq_queued(p);
+}
+
 static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (!(flags & ENQUEUE_NOCLOCK))
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index acbe6722cf756..197a3bd882ad7 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -324,14 +324,28 @@ void tick_nohz_full_kick_cpu(int cpu)
 
 static void tick_nohz_kick_task(struct task_struct *tsk)
 {
-	int cpu = task_cpu(tsk);
+	int cpu;
+
+	/*
+	 * If the task is not running, run_posix_cpu_timers()
+	 * has nothing to elapse, IPI can then be spared.
+	 *
+	 * activate_task()                      STORE p->tick_dep_mask
+	 *   STORE p->on_rq
+	 * __schedule() (switch to task 'p')    smp_mb() (atomic_fetch_or())
+	 *   LOCK rq->lock                      LOAD p->on_rq
+	 *   smp_mb__after_spin_lock()
+	 *   tick_nohz_task_switch()
+	 *     LOAD p->tick_dep_mask
+	 */
+	if (!sched_task_on_rq(tsk))
+		return;
 
 	/*
 	 * If the task concurrently migrates to another CPU,
 	 * we guarantee it sees the new tick dependency upon
 	 * schedule.
 	 *
-	 *
 	 * set_task_cpu(p, cpu);
 	 *   STORE p->cpu = @cpu
 	 * __schedule() (switch to task 'p')
@@ -340,6 +354,7 @@ static void tick_nohz_kick_task(struct task_struct *tsk)
 	 *   tick_nohz_task_switch()            smp_mb() (atomic_fetch_or())
 	 *      LOAD p->tick_dep_mask           LOAD p->cpu
 	 */
+	cpu = task_cpu(tsk);
 
 	preempt_disable();
 	if (cpu_online(cpu))
-- 
GitLab


From 0fdcccfafcffac70b452b3127cc3d981f0117655 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 13 May 2021 01:29:23 +0200
Subject: [PATCH 0562/3804] tick/nohz: Call tick_nohz_task_switch() with
 interrupts disabled

Call tick_nohz_task_switch() slightly earlier after the context switch
to benefit from disabled IRQs. This way the function doesn't need to
disable them once more.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210512232924.150322-10-frederic@kernel.org
---
 kernel/sched/core.c      | 2 +-
 kernel/time/tick-sched.c | 7 +------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78e480f7881a7..8f86ac28877e5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4212,6 +4212,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	vtime_task_switch(prev);
 	perf_event_task_sched_in(prev, current);
 	finish_task(prev);
+	tick_nohz_task_switch();
 	finish_lock_switch(rq);
 	finish_arch_post_lock_switch();
 	kcov_finish_switch(current);
@@ -4257,7 +4258,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 		put_task_struct_rcu_user(prev);
 	}
 
-	tick_nohz_task_switch();
 	return rq;
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 197a3bd882ad7..6ea619d644faf 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -487,13 +487,10 @@ void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bi
  */
 void __tick_nohz_task_switch(void)
 {
-	unsigned long flags;
 	struct tick_sched *ts;
 
-	local_irq_save(flags);
-
 	if (!tick_nohz_full_cpu(smp_processor_id()))
-		goto out;
+		return;
 
 	ts = this_cpu_ptr(&tick_cpu_sched);
 
@@ -502,8 +499,6 @@ void __tick_nohz_task_switch(void)
 		    atomic_read(&current->signal->tick_dep_mask))
 			tick_nohz_full_kick();
 	}
-out:
-	local_irq_restore(flags);
 }
 
 /* Get the boot-time nohz CPU list from the kernel parameters. */
-- 
GitLab


From b813511135e8b84fa741afdfbab4937919100bef Mon Sep 17 00:00:00 2001
From: Abhijeet Rao <abhijeet.rao@intel.com>
Date: Wed, 12 May 2021 11:08:12 +0300
Subject: [PATCH 0563/3804] xhci-pci: Allow host runtime PM as default for
 Intel Alder Lake xHCI

In the same way as Intel Tiger Lake TCSS (Type-C Subsystem) the Alder Lake
TCSS xHCI needs to be runtime suspended whenever possible to allow the
TCSS hardware block to enter D3cold and thus save energy.

Cc: stable@vger.kernel.org
Signed-off-by: Abhijeet Rao <abhijeet.rao@intel.com>
Signed-off-by: Nikunj A. Dadhania <nikunj.dadhania@intel.com>
Signed-off-by: Azhar Shaikh <azhar.shaikh@intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-2-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 5bbccc9a0179f..a858add8436c5 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -57,6 +57,7 @@
 #define PCI_DEVICE_ID_INTEL_CML_XHCI			0xa3af
 #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI		0x9a13
 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI		0x1138
+#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI		0x461e
 
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4			0x43b9
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3			0x43ba
@@ -243,7 +244,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	     pdev->device == PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_XHCI ||
 	     pdev->device == PCI_DEVICE_ID_INTEL_ICE_LAKE_XHCI ||
 	     pdev->device == PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI ||
-	     pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI))
+	     pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI))
 		xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
 
 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
-- 
GitLab


From 9b6a126ae58d9edfdde2d5f2e87f7615ea5e0155 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 12 May 2021 11:08:13 +0300
Subject: [PATCH 0564/3804] xhci: Fix giving back cancelled URBs even if halted
 endpoint can't reset

Commit 9ebf30007858 ("xhci: Fix halted endpoint at stop endpoint command
completion") in 5.12 changes how cancelled URBs are given back.

To cancel a URB xhci driver needs to stop the endpoint first.
To clear a halted endpoint xhci driver needs to reset the endpoint.

In rare cases when an endpoint halt (error) races with a endpoint stop we
need to clear the reset before removing, and giving back the cancelled URB.

The above change in 5.12 takes care of this, but it also relies on the
reset endpoint completion handler to give back the cancelled URBs.

There are cases when driver refuses to queue reset endpoint commands,
for example when a link suddenly goes to an inactive error state.
In this case the cancelled URB is never given back.

Fix this by giving back the URB in the stop endpoint if queuing a reset
endpoint command fails.

Fixes: 9ebf30007858 ("xhci: Fix halted endpoint at stop endpoint command completion")
CC: <stable@vger.kernel.org> # 5.12
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-3-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 05c38dd3ee361..a8e4189277da8 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -862,7 +862,7 @@ done:
 	return ret;
 }
 
-static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
+static int xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
 				struct xhci_virt_ep *ep, unsigned int stream_id,
 				struct xhci_td *td,
 				enum xhci_ep_reset_type reset_type)
@@ -875,7 +875,7 @@ static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
 	 * Device will be reset soon to recover the link so don't do anything
 	 */
 	if (ep->vdev->flags & VDEV_PORT_ERROR)
-		return;
+		return -ENODEV;
 
 	/* add td to cancelled list and let reset ep handler take care of it */
 	if (reset_type == EP_HARD_RESET) {
@@ -888,16 +888,18 @@ static void xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
 
 	if (ep->ep_state & EP_HALTED) {
 		xhci_dbg(xhci, "Reset ep command already pending\n");
-		return;
+		return 0;
 	}
 
 	err = xhci_reset_halted_ep(xhci, slot_id, ep->ep_index, reset_type);
 	if (err)
-		return;
+		return err;
 
 	ep->ep_state |= EP_HALTED;
 
 	xhci_ring_cmd_db(xhci);
+
+	return 0;
 }
 
 /*
@@ -1014,6 +1016,7 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
 	struct xhci_td *td = NULL;
 	enum xhci_ep_reset_type reset_type;
 	struct xhci_command *command;
+	int err;
 
 	if (unlikely(TRB_TO_SUSPEND_PORT(le32_to_cpu(trb->generic.field[3])))) {
 		if (!xhci->devs[slot_id])
@@ -1058,7 +1061,10 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
 					td->status = -EPROTO;
 			}
 			/* reset ep, reset handler cleans up cancelled tds */
-			xhci_handle_halted_endpoint(xhci, ep, 0, td, reset_type);
+			err = xhci_handle_halted_endpoint(xhci, ep, 0, td,
+							  reset_type);
+			if (err)
+				break;
 			xhci_stop_watchdog_timer_in_irq(xhci, ep);
 			return;
 		case EP_STATE_RUNNING:
-- 
GitLab


From dda32c00c9a0fa103b5d54ef72c477b7aa993679 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 12 May 2021 11:08:14 +0300
Subject: [PATCH 0565/3804] xhci: Do not use GFP_KERNEL in (potentially) atomic
 context

'xhci_urb_enqueue()' is passed a 'mem_flags' argument, because "URBs may be
submitted in interrupt context" (see comment related to 'usb_submit_urb()'
in 'drivers/usb/core/urb.c')

So this flag should be used in all the calling chain.
Up to now, 'xhci_check_maxpacket()' which is only called from
'xhci_urb_enqueue()', uses GFP_KERNEL.

Be safe and pass the mem_flags to this function as well.

Fixes: ddba5cd0aeff ("xhci: Use command structures when queuing commands on the command ring")
Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-4-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index ca9385d22f68d..27283654ca080 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1514,7 +1514,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci,
  * we need to issue an evaluate context command and wait on it.
  */
 static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
-		unsigned int ep_index, struct urb *urb)
+		unsigned int ep_index, struct urb *urb, gfp_t mem_flags)
 {
 	struct xhci_container_ctx *out_ctx;
 	struct xhci_input_control_ctx *ctrl_ctx;
@@ -1545,7 +1545,7 @@ static int xhci_check_maxpacket(struct xhci_hcd *xhci, unsigned int slot_id,
 		 * changes max packet sizes.
 		 */
 
-		command = xhci_alloc_command(xhci, true, GFP_KERNEL);
+		command = xhci_alloc_command(xhci, true, mem_flags);
 		if (!command)
 			return -ENOMEM;
 
@@ -1639,7 +1639,7 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag
 		 */
 		if (urb->dev->speed == USB_SPEED_FULL) {
 			ret = xhci_check_maxpacket(xhci, slot_id,
-					ep_index, urb);
+					ep_index, urb, mem_flags);
 			if (ret < 0) {
 				xhci_urb_free_priv(urb_priv);
 				urb->hcpriv = NULL;
-- 
GitLab


From ca09b1bea63ab83f4cca3a2ae8bc4f597ec28851 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Wed, 12 May 2021 11:08:15 +0300
Subject: [PATCH 0566/3804] usb: xhci: Increase timeout for HC halt

On some devices (specifically the SC8180x based Surface Pro X with
QCOM04A6) HC halt / xhci_halt() times out during boot. Manually binding
the xhci-hcd driver at some point later does not exhibit this behavior.
To work around this, double XHCI_MAX_HALT_USEC, which also resolves this
issue.

Cc: <stable@vger.kernel.org>
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-5-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ext-caps.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci-ext-caps.h b/drivers/usb/host/xhci-ext-caps.h
index fa59b242cd515..e8af0a125f84b 100644
--- a/drivers/usb/host/xhci-ext-caps.h
+++ b/drivers/usb/host/xhci-ext-caps.h
@@ -7,8 +7,9 @@
  * Author: Sarah Sharp
  * Some code borrowed from the Linux EHCI driver.
  */
-/* Up to 16 ms to halt an HC */
-#define XHCI_MAX_HALT_USEC	(16*1000)
+
+/* HC should halt within 16 ms, but use 32 ms as some hosts take longer */
+#define XHCI_MAX_HALT_USEC	(32 * 1000)
 /* HC not running - set to 1 when run/stop bit is cleared. */
 #define XHCI_STS_HALT		(1<<0)
 
-- 
GitLab


From 3c128781d8da463761495aaf8898c9ecb4e71528 Mon Sep 17 00:00:00 2001
From: Sandeep Singh <sandeep.singh@amd.com>
Date: Wed, 12 May 2021 11:08:16 +0300
Subject: [PATCH 0567/3804] xhci: Add reset resume quirk for AMD xhci
 controller.

One of AMD xhci controller require reset on resume.
Occasionally AMD xhci controller does not respond to
Stop endpoint command.
Once the issue happens controller goes into bad state
and in that case controller needs to be reset.

Cc: <stable@vger.kernel.org>
Signed-off-by: Sandeep Singh <sandeep.singh@amd.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210512080816.866037-6-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index a858add8436c5..7bc18cf8042cc 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -167,8 +167,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	    (pdev->device == 0x15e0 || pdev->device == 0x15e1))
 		xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND;
 
-	if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x15e5)
+	if (pdev->vendor == PCI_VENDOR_ID_AMD && pdev->device == 0x15e5) {
 		xhci->quirks |= XHCI_DISABLE_SPARSE;
+		xhci->quirks |= XHCI_RESET_ON_RESUME;
+	}
 
 	if (pdev->vendor == PCI_VENDOR_ID_AMD)
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
-- 
GitLab


From 12701ce524bc9b7c6345a2425208501fd2c62aad Mon Sep 17 00:00:00 2001
From: Badhri Jagan Sridharan <badhri@google.com>
Date: Mon, 10 May 2021 14:17:56 -0700
Subject: [PATCH 0568/3804] usb: typec: tcpm: Fix SINK_DISCOVERY current limit
 for Rp-default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a regression introduced by 1373fefc6243 ("usb: typec: tcpm:
Allow slow charging loops to comply to pSnkStby")

When Source advertises Rp-default, tcpm would request 500mA when in
SINK_DISCOVERY, Type-C spec advises the sink to follow BC1.2 current
limits when Rp-default is advertised.
[12750.503381] Requesting mux state 1, usb-role 2, orientation 1
[12750.503837] state change SNK_ATTACHED -> SNK_STARTUP [rev3 NONE_AMS]
[12751.003891] state change SNK_STARTUP -> SNK_DISCOVERY
[12751.003900] Setting voltage/current limit 5000 mV 500 mA

This patch restores the behavior where the tcpm would request 0mA when
Rp-default is advertised by the source.
[   73.174252] Requesting mux state 1, usb-role 2, orientation 1
[   73.174749] state change SNK_ATTACHED -> SNK_STARTUP [rev3 NONE_AMS]
[   73.674800] state change SNK_STARTUP -> SNK_DISCOVERY
[   73.674808] Setting voltage/current limit 5000 mV 0 mA

During SNK_DISCOVERY, Cap the current limit to PD_P_SNK_STDBY_MW / 5 only
for slow_charger_loop case.

Fixes: 1373fefc6243 ("usb: typec: tcpm: Allow slow charging loops to comply to pSnkStby")
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Badhri Jagan Sridharan <badhri@google.com>
Link: https://lore.kernel.org/r/20210510211756.3346954-1-badhri@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index db567e6fde924..72e4d63a23669 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4085,7 +4085,7 @@ static void run_state_machine(struct tcpm_port *port)
 		if (port->vbus_present) {
 			u32 current_lim = tcpm_get_current_limit(port);
 
-			if (port->slow_charger_loop || (current_lim > PD_P_SNK_STDBY_MW / 5))
+			if (port->slow_charger_loop && (current_lim > PD_P_SNK_STDBY_MW / 5))
 				current_lim = PD_P_SNK_STDBY_MW / 5;
 			tcpm_set_current_limit(port, current_lim, 5000);
 			tcpm_set_charge(port, true);
-- 
GitLab


From 975f94c7d6c306b833628baa9aec3f79db1eb3a1 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Wed, 12 May 2021 10:07:38 +0800
Subject: [PATCH 0569/3804] usb: core: hub: fix race condition about TRSMRCY of
 resume

This may happen if the port becomes resume status exactly
when usb_port_resume() gets port status, it still need provide
a TRSMCRY time before access the device.

CC: <stable@vger.kernel.org>
Reported-by: Tianping Fang <tianping.fang@mediatek.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Link: https://lore.kernel.org/r/20210512020738.52961-1-chunfeng.yun@mediatek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b2bc4b7c42895..fc7d6cdacf16b 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3642,9 +3642,6 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 		 * sequence.
 		 */
 		status = hub_port_status(hub, port1, &portstatus, &portchange);
-
-		/* TRSMRCY = 10 msec */
-		msleep(10);
 	}
 
  SuspendCleared:
@@ -3659,6 +3656,9 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 				usb_clear_port_feature(hub->hdev, port1,
 						USB_PORT_FEAT_C_SUSPEND);
 		}
+
+		/* TRSMRCY = 10 msec */
+		msleep(10);
 	}
 
 	if (udev->persist_enabled)
-- 
GitLab


From e181811bd04d874fe48bbfa1165a82068b58144d Mon Sep 17 00:00:00 2001
From: Hou Pu <houpu.main@gmail.com>
Date: Thu, 13 May 2021 21:04:10 +0800
Subject: [PATCH 0570/3804] nvmet: use new ana_log_size instead the old one

The new ana_log_size should be used instead of the old one.
Or kernel NULL pointer dereference will happen like below:

[   38.957849][   T69] BUG: kernel NULL pointer dereference, address: 000000000000003c
[   38.975550][   T69] #PF: supervisor write access in kernel mode
[   38.975955][   T69] #PF: error_code(0x0002) - not-present page
[   38.976905][   T69] PGD 0 P4D 0
[   38.979388][   T69] Oops: 0002 [#1] SMP NOPTI
[   38.980488][   T69] CPU: 0 PID: 69 Comm: kworker/0:2 Not tainted 5.12.0+ #54
[   38.981254][   T69] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
[   38.982502][   T69] Workqueue: events nvme_loop_execute_work
[   38.985219][   T69] RIP: 0010:memcpy_orig+0x68/0x10f
[   38.986203][   T69] Code: 83 c2 20 eb 44 48 01 d6 48 01 d7 48 83 ea 20 0f 1f 00 48 83 ea 20 4c 8b 46 f8 4c 8b 4e f0 4c 8b 56 e8 4c 8b 5e e0 48 8d 76 e0 <4c> 89 47 f8 4c 89 4f f0 4c 89 57 e8 4c 89 5f e0 48 8d 7f e0 73 d2
[   38.987677][   T69] RSP: 0018:ffffc900001b7d48 EFLAGS: 00000287
[   38.987996][   T69] RAX: 0000000000000020 RBX: 0000000000000024 RCX: 0000000000000010
[   38.988327][   T69] RDX: ffffffffffffffe4 RSI: ffff8881084bc004 RDI: 0000000000000044
[   38.988620][   T69] RBP: 0000000000000024 R08: 0000000100000000 R09: 0000000000000000
[   38.988991][   T69] R10: 0000000100000000 R11: 0000000000000001 R12: 0000000000000024
[   38.989289][   T69] R13: ffff8881084bc000 R14: 0000000000000000 R15: 0000000000000024
[   38.989845][   T69] FS:  0000000000000000(0000) GS:ffff888237c00000(0000) knlGS:0000000000000000
[   38.990234][   T69] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   38.990490][   T69] CR2: 000000000000003c CR3: 00000001085b2000 CR4: 00000000000006f0
[   38.991105][   T69] Call Trace:
[   38.994157][   T69]  sg_copy_buffer+0xb8/0xf0
[   38.995357][   T69]  nvmet_copy_to_sgl+0x48/0x6d
[   38.995565][   T69]  nvmet_execute_get_log_page_ana+0xd4/0x1cb
[   38.995792][   T69]  nvmet_execute_get_log_page+0xc9/0x146
[   38.995992][   T69]  nvme_loop_execute_work+0x3e/0x44
[   38.996181][   T69]  process_one_work+0x1c3/0x3c0
[   38.996393][   T69]  worker_thread+0x44/0x3d0
[   38.996600][   T69]  ? cancel_delayed_work+0x90/0x90
[   38.996804][   T69]  kthread+0xf7/0x130
[   38.996961][   T69]  ? kthread_create_worker_on_cpu+0x70/0x70
[   38.997171][   T69]  ret_from_fork+0x22/0x30
[   38.997705][   T69] Modules linked in:
[   38.998741][   T69] CR2: 000000000000003c
[   39.000104][   T69] ---[ end trace e719927b609d0fa0 ]---

Fixes: 5e1f689913a4 ("nvme-multipath: fix double initialization of ANA state")
Signed-off-by: Hou Pu <houpu.main@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index deb14562c96ae..f81871c7128a0 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -817,7 +817,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 	if (ana_log_size > ctrl->ana_log_size) {
 		nvme_mpath_stop(ctrl);
 		kfree(ctrl->ana_log_buf);
-		ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
+		ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL);
 		if (!ctrl->ana_log_buf)
 			return -ENOMEM;
 	}
-- 
GitLab


From 5d31950a483381b5444494dfb7fa5ed764193b92 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Sun, 9 May 2021 17:49:26 -0500
Subject: [PATCH 0571/3804] drm/radeon/ni_dpm: Fix booting bug

Create new structure NISLANDS_SMC_SWSTATE_SINGLE, as initialState.levels
and ACPIState.levels are never actually used as flexible arrays. Those
arrays can be used as simple objects of type
NISLANDS_SMC_HW_PERFORMANCE_LEVEL, instead.

Currently, the code fails because flexible array _levels_ in
struct NISLANDS_SMC_SWSTATE doesn't allow for code that access
the first element of initialState.levels and ACPIState.levels
arrays:

drivers/gpu/drm/radeon/ni_dpm.c:
1690         table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
1691                 cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl);
...
1903:   table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
1904:   table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2);

because such element cannot exist without previously allocating
any dynamic memory for it (which never actually happens).

That's why struct NISLANDS_SMC_SWSTATE should only be used as type
for object driverState and new struct SISLANDS_SMC_SWSTATE_SINGLE is
created as type for objects initialState, ACPIState and ULVState.

Also, with the change from one-element array to flexible-array member
in commit 434fb1e7444a ("drm/radeon/nislands_smc.h: Replace one-element
array with flexible-array member in struct NISLANDS_SMC_SWSTATE"), the
size of dpmLevels in struct NISLANDS_SMC_STATETABLE should be fixed to
be NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE instead of
NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1.

Bug: https://lore.kernel.org/dri-devel/3eedbe78-1fbd-4763-a7f3-ac5665e76a4a@xenosoft.de/
Fixes: 434fb1e7444a ("drm/radeon/nislands_smc.h: Replace one-element array with flexible-array member in struct NISLANDS_SMC_SWSTATE")
Cc: stable@vger.kernel.org
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Tested-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Link: https://lore.kernel.org/dri-devel/9bb5fcbd-daf5-1669-b3e7-b8624b3c36f9@xenosoft.de/
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/ni_dpm.c       | 144 +++++++++++++-------------
 drivers/gpu/drm/radeon/nislands_smc.h |  34 +++---
 2 files changed, 94 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index dd5ef64937230..769f666335ac4 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -1687,102 +1687,102 @@ static int ni_populate_smc_initial_state(struct radeon_device *rdev,
 	u32 reg;
 	int ret;
 
-	table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 =
+	table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL_2 =
 		cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl_2);
-	table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.mpll_dq_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL_2 =
+	table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL_2 =
 		cpu_to_be32(ni_pi->clock_registers.mpll_dq_func_cntl_2);
-	table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->initialState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.mclk_pwrmgt_cntl);
-	table->initialState.levels[0].mclk.vDLL_CNTL =
+	table->initialState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.dll_cntl);
-	table->initialState.levels[0].mclk.vMPLL_SS =
+	table->initialState.level.mclk.vMPLL_SS =
 		cpu_to_be32(ni_pi->clock_registers.mpll_ss1);
-	table->initialState.levels[0].mclk.vMPLL_SS2 =
+	table->initialState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(ni_pi->clock_registers.mpll_ss2);
-	table->initialState.levels[0].mclk.mclk_value =
+	table->initialState.level.mclk.mclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].mclk);
 
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_2);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_3);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_4);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_spread_spectrum);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2 =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2 =
 		cpu_to_be32(ni_pi->clock_registers.cg_spll_spread_spectrum_2);
-	table->initialState.levels[0].sclk.sclk_value =
+	table->initialState.level.sclk.sclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].sclk);
-	table->initialState.levels[0].arbRefreshState =
+	table->initialState.level.arbRefreshState =
 		NISLANDS_INITIAL_STATE_ARB_INDEX;
 
-	table->initialState.levels[0].ACIndex = 0;
+	table->initialState.level.ACIndex = 0;
 
 	ret = ni_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
 					initial_state->performance_levels[0].vddc,
-					&table->initialState.levels[0].vddc);
+					&table->initialState.level.vddc);
 	if (!ret) {
 		u16 std_vddc;
 
 		ret = ni_get_std_voltage_value(rdev,
-					       &table->initialState.levels[0].vddc,
+					       &table->initialState.level.vddc,
 					       &std_vddc);
 		if (!ret)
 			ni_populate_std_voltage_value(rdev, std_vddc,
-						      table->initialState.levels[0].vddc.index,
-						      &table->initialState.levels[0].std_vddc);
+						      table->initialState.level.vddc.index,
+						      &table->initialState.level.std_vddc);
 	}
 
 	if (eg_pi->vddci_control)
 		ni_populate_voltage_value(rdev,
 					  &eg_pi->vddci_voltage_table,
 					  initial_state->performance_levels[0].vddci,
-					  &table->initialState.levels[0].vddci);
+					  &table->initialState.level.vddci);
 
-	ni_populate_initial_mvdd_value(rdev, &table->initialState.levels[0].mvdd);
+	ni_populate_initial_mvdd_value(rdev, &table->initialState.level.mvdd);
 
 	reg = CG_R(0xffff) | CG_L(0);
-	table->initialState.levels[0].aT = cpu_to_be32(reg);
+	table->initialState.level.aT = cpu_to_be32(reg);
 
-	table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
+	table->initialState.level.bSP = cpu_to_be32(pi->dsp);
 
 	if (pi->boot_in_gen2)
-		table->initialState.levels[0].gen2PCIE = 1;
+		table->initialState.level.gen2PCIE = 1;
 	else
-		table->initialState.levels[0].gen2PCIE = 0;
+		table->initialState.level.gen2PCIE = 0;
 
 	if (pi->mem_gddr5) {
-		table->initialState.levels[0].strobeMode =
+		table->initialState.level.strobeMode =
 			cypress_get_strobe_mode_settings(rdev,
 							 initial_state->performance_levels[0].mclk);
 
 		if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold)
-			table->initialState.levels[0].mcFlags = NISLANDS_SMC_MC_EDC_RD_FLAG | NISLANDS_SMC_MC_EDC_WR_FLAG;
+			table->initialState.level.mcFlags = NISLANDS_SMC_MC_EDC_RD_FLAG | NISLANDS_SMC_MC_EDC_WR_FLAG;
 		else
-			table->initialState.levels[0].mcFlags =  0;
+			table->initialState.level.mcFlags =  0;
 	}
 
 	table->initialState.levelCount = 1;
 
 	table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC;
 
-	table->initialState.levels[0].dpm2.MaxPS = 0;
-	table->initialState.levels[0].dpm2.NearTDPDec = 0;
-	table->initialState.levels[0].dpm2.AboveSafeInc = 0;
-	table->initialState.levels[0].dpm2.BelowSafeInc = 0;
+	table->initialState.level.dpm2.MaxPS = 0;
+	table->initialState.level.dpm2.NearTDPDec = 0;
+	table->initialState.level.dpm2.AboveSafeInc = 0;
+	table->initialState.level.dpm2.BelowSafeInc = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
@@ -1813,43 +1813,43 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 	if (pi->acpi_vddc) {
 		ret = ni_populate_voltage_value(rdev,
 						&eg_pi->vddc_voltage_table,
-						pi->acpi_vddc, &table->ACPIState.levels[0].vddc);
+						pi->acpi_vddc, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = ni_get_std_voltage_value(rdev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 			if (!ret)
 				ni_populate_std_voltage_value(rdev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
 
 		if (pi->pcie_gen2) {
 			if (pi->acpi_pcie_gen2)
-				table->ACPIState.levels[0].gen2PCIE = 1;
+				table->ACPIState.level.gen2PCIE = 1;
 			else
-				table->ACPIState.levels[0].gen2PCIE = 0;
+				table->ACPIState.level.gen2PCIE = 0;
 		} else {
-			table->ACPIState.levels[0].gen2PCIE = 0;
+			table->ACPIState.level.gen2PCIE = 0;
 		}
 	} else {
 		ret = ni_populate_voltage_value(rdev,
 						&eg_pi->vddc_voltage_table,
 						pi->min_vddc_in_table,
-						&table->ACPIState.levels[0].vddc);
+						&table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = ni_get_std_voltage_value(rdev,
-						       &table->ACPIState.levels[0].vddc,
+						       &table->ACPIState.level.vddc,
 						       &std_vddc);
 			if (!ret)
 				ni_populate_std_voltage_value(rdev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE = 0;
+		table->ACPIState.level.gen2PCIE = 0;
 	}
 
 	if (eg_pi->acpi_vddci) {
@@ -1857,7 +1857,7 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 			ni_populate_voltage_value(rdev,
 						  &eg_pi->vddci_voltage_table,
 						  eg_pi->acpi_vddci,
-						  &table->ACPIState.levels[0].vddci);
+						  &table->ACPIState.level.vddci);
 	}
 
 
@@ -1900,37 +1900,37 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
 	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
 
-	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2);
-	table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL_2 = cpu_to_be32(mpll_dq_func_cntl_2);
-	table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl);
-	table->ACPIState.levels[0].mclk.vDLL_CNTL = cpu_to_be32(dll_cntl);
+	table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
+	table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2);
+	table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl);
+	table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL_2 = cpu_to_be32(mpll_dq_func_cntl_2);
+	table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl);
+	table->ACPIState.level.mclk.vDLL_CNTL = cpu_to_be32(dll_cntl);
 
-	table->ACPIState.levels[0].mclk.mclk_value = 0;
+	table->ACPIState.level.mclk.mclk_value = 0;
 
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4);
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl);
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2);
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3);
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4);
 
-	table->ACPIState.levels[0].sclk.sclk_value = 0;
+	table->ACPIState.level.sclk.sclk_value = 0;
 
-	ni_populate_mvdd_value(rdev, 0, &table->ACPIState.levels[0].mvdd);
+	ni_populate_mvdd_value(rdev, 0, &table->ACPIState.level.mvdd);
 
 	if (eg_pi->dynamic_ac_timing)
-		table->ACPIState.levels[0].ACIndex = 1;
+		table->ACPIState.level.ACIndex = 1;
 
-	table->ACPIState.levels[0].dpm2.MaxPS = 0;
-	table->ACPIState.levels[0].dpm2.NearTDPDec = 0;
-	table->ACPIState.levels[0].dpm2.AboveSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.BelowSafeInc = 0;
+	table->ACPIState.level.dpm2.MaxPS = 0;
+	table->ACPIState.level.dpm2.NearTDPDec = 0;
+	table->ACPIState.level.dpm2.AboveSafeInc = 0;
+	table->ACPIState.level.dpm2.BelowSafeInc = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
@@ -1980,7 +1980,9 @@ static int ni_init_smc_table(struct radeon_device *rdev)
 	if (ret)
 		return ret;
 
-	table->driverState = table->initialState;
+	table->driverState.flags = table->initialState.flags;
+	table->driverState.levelCount = table->initialState.levelCount;
+	table->driverState.levels[0] = table->initialState.level;
 
 	table->ULVState = table->initialState;
 
diff --git a/drivers/gpu/drm/radeon/nislands_smc.h b/drivers/gpu/drm/radeon/nislands_smc.h
index 7395cb6b3cac6..42f3bab0f9ee6 100644
--- a/drivers/gpu/drm/radeon/nislands_smc.h
+++ b/drivers/gpu/drm/radeon/nislands_smc.h
@@ -143,6 +143,14 @@ struct NISLANDS_SMC_SWSTATE
 
 typedef struct NISLANDS_SMC_SWSTATE NISLANDS_SMC_SWSTATE;
 
+struct NISLANDS_SMC_SWSTATE_SINGLE {
+	uint8_t                             flags;
+	uint8_t                             levelCount;
+	uint8_t                             padding2;
+	uint8_t                             padding3;
+	NISLANDS_SMC_HW_PERFORMANCE_LEVEL   level;
+};
+
 #define NISLANDS_SMC_VOLTAGEMASK_VDDC  0
 #define NISLANDS_SMC_VOLTAGEMASK_MVDD  1
 #define NISLANDS_SMC_VOLTAGEMASK_VDDCI 2
@@ -160,19 +168,19 @@ typedef struct NISLANDS_SMC_VOLTAGEMASKTABLE NISLANDS_SMC_VOLTAGEMASKTABLE;
 
 struct NISLANDS_SMC_STATETABLE
 {
-    uint8_t                             thermalProtectType;
-    uint8_t                             systemFlags;
-    uint8_t                             maxVDDCIndexInPPTable;
-    uint8_t                             extraFlags;
-    uint8_t                             highSMIO[NISLANDS_MAX_NO_VREG_STEPS];
-    uint32_t                            lowSMIO[NISLANDS_MAX_NO_VREG_STEPS];
-    NISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
-    PP_NIslands_DPM2Parameters          dpm2Params;
-    NISLANDS_SMC_SWSTATE                initialState;
-    NISLANDS_SMC_SWSTATE                ACPIState;
-    NISLANDS_SMC_SWSTATE                ULVState;
-    NISLANDS_SMC_SWSTATE                driverState;
-    NISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1];
+	uint8_t                             thermalProtectType;
+	uint8_t                             systemFlags;
+	uint8_t                             maxVDDCIndexInPPTable;
+	uint8_t                             extraFlags;
+	uint8_t                             highSMIO[NISLANDS_MAX_NO_VREG_STEPS];
+	uint32_t                            lowSMIO[NISLANDS_MAX_NO_VREG_STEPS];
+	NISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
+	PP_NIslands_DPM2Parameters          dpm2Params;
+	struct NISLANDS_SMC_SWSTATE_SINGLE  initialState;
+	struct NISLANDS_SMC_SWSTATE_SINGLE  ACPIState;
+	struct NISLANDS_SMC_SWSTATE_SINGLE  ULVState;
+	NISLANDS_SMC_SWSTATE                driverState;
+	NISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE];
 };
 
 typedef struct NISLANDS_SMC_STATETABLE NISLANDS_SMC_STATETABLE;
-- 
GitLab


From 1ddeedaa28e14c4e40c95e3d8026d69eef47eaba Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Sun, 9 May 2021 17:55:25 -0500
Subject: [PATCH 0572/3804] drm/radeon/si_dpm: Fix SMU power state load

Create new structure SISLANDS_SMC_SWSTATE_SINGLE, as initialState.levels
and ACPIState.levels are never actually used as flexible arrays. Those
arrays can be used as simple objects of type
SISLANDS_SMC_HW_PERFORMANCE_LEVEL, instead.

Currently, the code fails because flexible array _levels_ in
struct SISLANDS_SMC_SWSTATE doesn't allow for code that access
the first element of initialState.levels and ACPIState.levels
arrays:

4353         table->initialState.levels[0].mclk.vDLL_CNTL =
4354                 cpu_to_be32(si_pi->clock_registers.dll_cntl);
...
4555         table->ACPIState.levels[0].mclk.vDLL_CNTL =
4556                 cpu_to_be32(dll_cntl);

because such element cannot exist without previously allocating
any dynamic memory for it (which never actually happens).

That's why struct SISLANDS_SMC_SWSTATE should only be used as type
for object driverState and new struct SISLANDS_SMC_SWSTATE_SINGLE is
created as type for objects initialState, ACPIState and ULVState.

Also, with the change from one-element array to flexible-array member
in commit 96e27e8d919e ("drm/radeon/si_dpm: Replace one-element array
with flexible-array in struct SISLANDS_SMC_SWSTATE"), the size of
dpmLevels in struct SISLANDS_SMC_STATETABLE should be fixed to be
SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE instead of
SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1583
Fixes: 96e27e8d919e ("drm/radeon/si_dpm: Replace one-element array with flexible-array in struct SISLANDS_SMC_SWSTATE")
Cc: stable@vger.kernel.org
Reported-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/si_dpm.c       | 174 +++++++++++++-------------
 drivers/gpu/drm/radeon/sislands_smc.h |  34 +++--
 2 files changed, 109 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 9186095518047..2c54c0d7ca5be 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -4350,70 +4350,70 @@ static int si_populate_smc_initial_state(struct radeon_device *rdev,
 	u32 reg;
 	int ret;
 
-	table->initialState.levels[0].mclk.vDLL_CNTL =
+	table->initialState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(si_pi->clock_registers.dll_cntl);
-	table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->initialState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mclk_pwrmgt_cntl);
-	table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_ad_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_dq_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL_1 =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_1);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL_2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_2);
-	table->initialState.levels[0].mclk.vMPLL_SS =
+	table->initialState.level.mclk.vMPLL_SS =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-	table->initialState.levels[0].mclk.vMPLL_SS2 =
+	table->initialState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss2);
 
-	table->initialState.levels[0].mclk.mclk_value =
+	table->initialState.level.mclk.mclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].mclk);
 
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_2);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_3);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_4);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum_2);
 
-	table->initialState.levels[0].sclk.sclk_value =
+	table->initialState.level.sclk.sclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].sclk);
 
-	table->initialState.levels[0].arbRefreshState =
+	table->initialState.level.arbRefreshState =
 		SISLANDS_INITIAL_STATE_ARB_INDEX;
 
-	table->initialState.levels[0].ACIndex = 0;
+	table->initialState.level.ACIndex = 0;
 
 	ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
 					initial_state->performance_levels[0].vddc,
-					&table->initialState.levels[0].vddc);
+					&table->initialState.level.vddc);
 
 	if (!ret) {
 		u16 std_vddc;
 
 		ret = si_get_std_voltage_value(rdev,
-					       &table->initialState.levels[0].vddc,
+					       &table->initialState.level.vddc,
 					       &std_vddc);
 		if (!ret)
 			si_populate_std_voltage_value(rdev, std_vddc,
-						      table->initialState.levels[0].vddc.index,
-						      &table->initialState.levels[0].std_vddc);
+						      table->initialState.level.vddc.index,
+						      &table->initialState.level.std_vddc);
 	}
 
 	if (eg_pi->vddci_control)
 		si_populate_voltage_value(rdev,
 					  &eg_pi->vddci_voltage_table,
 					  initial_state->performance_levels[0].vddci,
-					  &table->initialState.levels[0].vddci);
+					  &table->initialState.level.vddci);
 
 	if (si_pi->vddc_phase_shed_control)
 		si_populate_phase_shedding_value(rdev,
@@ -4421,43 +4421,43 @@ static int si_populate_smc_initial_state(struct radeon_device *rdev,
 						 initial_state->performance_levels[0].vddc,
 						 initial_state->performance_levels[0].sclk,
 						 initial_state->performance_levels[0].mclk,
-						 &table->initialState.levels[0].vddc);
+						 &table->initialState.level.vddc);
 
-	si_populate_initial_mvdd_value(rdev, &table->initialState.levels[0].mvdd);
+	si_populate_initial_mvdd_value(rdev, &table->initialState.level.mvdd);
 
 	reg = CG_R(0xffff) | CG_L(0);
-	table->initialState.levels[0].aT = cpu_to_be32(reg);
+	table->initialState.level.aT = cpu_to_be32(reg);
 
-	table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
+	table->initialState.level.bSP = cpu_to_be32(pi->dsp);
 
-	table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen;
+	table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen;
 
 	if (pi->mem_gddr5) {
-		table->initialState.levels[0].strobeMode =
+		table->initialState.level.strobeMode =
 			si_get_strobe_mode_settings(rdev,
 						    initial_state->performance_levels[0].mclk);
 
 		if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold)
-			table->initialState.levels[0].mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
+			table->initialState.level.mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
 		else
-			table->initialState.levels[0].mcFlags =  0;
+			table->initialState.level.mcFlags =  0;
 	}
 
 	table->initialState.levelCount = 1;
 
 	table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC;
 
-	table->initialState.levels[0].dpm2.MaxPS = 0;
-	table->initialState.levels[0].dpm2.NearTDPDec = 0;
-	table->initialState.levels[0].dpm2.AboveSafeInc = 0;
-	table->initialState.levels[0].dpm2.BelowSafeInc = 0;
-	table->initialState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+	table->initialState.level.dpm2.MaxPS = 0;
+	table->initialState.level.dpm2.NearTDPDec = 0;
+	table->initialState.level.dpm2.AboveSafeInc = 0;
+	table->initialState.level.dpm2.BelowSafeInc = 0;
+	table->initialState.level.dpm2.PwrEfficiencyRatio = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
@@ -4488,18 +4488,18 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
 
 	if (pi->acpi_vddc) {
 		ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
-						pi->acpi_vddc, &table->ACPIState.levels[0].vddc);
+						pi->acpi_vddc, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = si_get_std_voltage_value(rdev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 			if (!ret)
 				si_populate_std_voltage_value(rdev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE = si_pi->acpi_pcie_gen;
+		table->ACPIState.level.gen2PCIE = si_pi->acpi_pcie_gen;
 
 		if (si_pi->vddc_phase_shed_control) {
 			si_populate_phase_shedding_value(rdev,
@@ -4507,23 +4507,23 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
 							 pi->acpi_vddc,
 							 0,
 							 0,
-							 &table->ACPIState.levels[0].vddc);
+							 &table->ACPIState.level.vddc);
 		}
 	} else {
 		ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
-						pi->min_vddc_in_table, &table->ACPIState.levels[0].vddc);
+						pi->min_vddc_in_table, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = si_get_std_voltage_value(rdev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 
 			if (!ret)
 				si_populate_std_voltage_value(rdev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE = (u8)r600_get_pcie_gen_support(rdev,
+		table->ACPIState.level.gen2PCIE = (u8)r600_get_pcie_gen_support(rdev,
 										    si_pi->sys_pcie_mask,
 										    si_pi->boot_pcie_gen,
 										    RADEON_PCIE_GEN1);
@@ -4534,14 +4534,14 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
 							 pi->min_vddc_in_table,
 							 0,
 							 0,
-							 &table->ACPIState.levels[0].vddc);
+							 &table->ACPIState.level.vddc);
 	}
 
 	if (pi->acpi_vddc) {
 		if (eg_pi->acpi_vddci)
 			si_populate_voltage_value(rdev, &eg_pi->vddci_voltage_table,
 						  eg_pi->acpi_vddci,
-						  &table->ACPIState.levels[0].vddci);
+						  &table->ACPIState.level.vddci);
 	}
 
 	mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET;
@@ -4552,59 +4552,59 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
 	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
 
-	table->ACPIState.levels[0].mclk.vDLL_CNTL =
+	table->ACPIState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(dll_cntl);
-	table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(mclk_pwrmgt_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(mpll_ad_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(mpll_dq_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL =
 		cpu_to_be32(mpll_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_1 =
 		cpu_to_be32(mpll_func_cntl_1);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_2 =
 		cpu_to_be32(mpll_func_cntl_2);
-	table->ACPIState.levels[0].mclk.vMPLL_SS =
+	table->ACPIState.level.mclk.vMPLL_SS =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-	table->ACPIState.levels[0].mclk.vMPLL_SS2 =
+	table->ACPIState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss2);
 
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(spll_func_cntl);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(spll_func_cntl_2);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(spll_func_cntl_3);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(spll_func_cntl_4);
 
-	table->ACPIState.levels[0].mclk.mclk_value = 0;
-	table->ACPIState.levels[0].sclk.sclk_value = 0;
+	table->ACPIState.level.mclk.mclk_value = 0;
+	table->ACPIState.level.sclk.sclk_value = 0;
 
-	si_populate_mvdd_value(rdev, 0, &table->ACPIState.levels[0].mvdd);
+	si_populate_mvdd_value(rdev, 0, &table->ACPIState.level.mvdd);
 
 	if (eg_pi->dynamic_ac_timing)
-		table->ACPIState.levels[0].ACIndex = 0;
+		table->ACPIState.level.ACIndex = 0;
 
-	table->ACPIState.levels[0].dpm2.MaxPS = 0;
-	table->ACPIState.levels[0].dpm2.NearTDPDec = 0;
-	table->ACPIState.levels[0].dpm2.AboveSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.BelowSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+	table->ACPIState.level.dpm2.MaxPS = 0;
+	table->ACPIState.level.dpm2.NearTDPDec = 0;
+	table->ACPIState.level.dpm2.AboveSafeInc = 0;
+	table->ACPIState.level.dpm2.BelowSafeInc = 0;
+	table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
 
 static int si_populate_ulv_state(struct radeon_device *rdev,
-				 SISLANDS_SMC_SWSTATE *state)
+				 struct SISLANDS_SMC_SWSTATE_SINGLE *state)
 {
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct si_power_info *si_pi = si_get_pi(rdev);
@@ -4613,19 +4613,19 @@ static int si_populate_ulv_state(struct radeon_device *rdev,
 	int ret;
 
 	ret = si_convert_power_level_to_smc(rdev, &ulv->pl,
-					    &state->levels[0]);
+					    &state->level);
 	if (!ret) {
 		if (eg_pi->sclk_deep_sleep) {
 			if (sclk_in_sr <= SCLK_MIN_DEEPSLEEP_FREQ)
-				state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
+				state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
 			else
-				state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
+				state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
 		}
 		if (ulv->one_pcie_lane_in_ulv)
 			state->flags |= PPSMC_SWSTATE_FLAG_PCIE_X1;
-		state->levels[0].arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
-		state->levels[0].ACIndex = 1;
-		state->levels[0].std_vddc = state->levels[0].vddc;
+		state->level.arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
+		state->level.ACIndex = 1;
+		state->level.std_vddc = state->level.vddc;
 		state->levelCount = 1;
 
 		state->flags |= PPSMC_SWSTATE_FLAG_DC;
@@ -4725,7 +4725,9 @@ static int si_init_smc_table(struct radeon_device *rdev)
 	if (ret)
 		return ret;
 
-	table->driverState = table->initialState;
+	table->driverState.flags = table->initialState.flags;
+	table->driverState.levelCount = table->initialState.levelCount;
+	table->driverState.levels[0] = table->initialState.level;
 
 	ret = si_do_program_memory_timing_parameters(rdev, radeon_boot_state,
 						     SISLANDS_INITIAL_STATE_ARB_INDEX);
@@ -5275,8 +5277,8 @@ static int si_upload_ulv_state(struct radeon_device *rdev)
 	if (ulv->supported && ulv->pl.vddc) {
 		u32 address = si_pi->state_table_start +
 			offsetof(SISLANDS_SMC_STATETABLE, ULVState);
-		SISLANDS_SMC_SWSTATE *smc_state = &si_pi->smc_statetable.ULVState;
-		u32 state_size = sizeof(SISLANDS_SMC_SWSTATE);
+		struct SISLANDS_SMC_SWSTATE_SINGLE *smc_state = &si_pi->smc_statetable.ULVState;
+		u32 state_size = sizeof(struct SISLANDS_SMC_SWSTATE_SINGLE);
 
 		memset(smc_state, 0, state_size);
 
diff --git a/drivers/gpu/drm/radeon/sislands_smc.h b/drivers/gpu/drm/radeon/sislands_smc.h
index fbd6589bdab92..4ea1cb2e45a3c 100644
--- a/drivers/gpu/drm/radeon/sislands_smc.h
+++ b/drivers/gpu/drm/radeon/sislands_smc.h
@@ -191,6 +191,14 @@ struct SISLANDS_SMC_SWSTATE
 
 typedef struct SISLANDS_SMC_SWSTATE SISLANDS_SMC_SWSTATE;
 
+struct SISLANDS_SMC_SWSTATE_SINGLE {
+	uint8_t                             flags;
+	uint8_t                             levelCount;
+	uint8_t                             padding2;
+	uint8_t                             padding3;
+	SISLANDS_SMC_HW_PERFORMANCE_LEVEL   level;
+};
+
 #define SISLANDS_SMC_VOLTAGEMASK_VDDC  0
 #define SISLANDS_SMC_VOLTAGEMASK_MVDD  1
 #define SISLANDS_SMC_VOLTAGEMASK_VDDCI 2
@@ -208,19 +216,19 @@ typedef struct SISLANDS_SMC_VOLTAGEMASKTABLE SISLANDS_SMC_VOLTAGEMASKTABLE;
 
 struct SISLANDS_SMC_STATETABLE
 {
-    uint8_t                             thermalProtectType;
-    uint8_t                             systemFlags;
-    uint8_t                             maxVDDCIndexInPPTable;
-    uint8_t                             extraFlags;
-    uint32_t                            lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
-    SISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
-    SISLANDS_SMC_VOLTAGEMASKTABLE       phaseMaskTable;
-    PP_SIslands_DPM2Parameters          dpm2Params;
-    SISLANDS_SMC_SWSTATE                initialState;
-    SISLANDS_SMC_SWSTATE                ACPIState;
-    SISLANDS_SMC_SWSTATE                ULVState;
-    SISLANDS_SMC_SWSTATE                driverState;
-    SISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1];
+	uint8_t					thermalProtectType;
+	uint8_t					systemFlags;
+	uint8_t					maxVDDCIndexInPPTable;
+	uint8_t					extraFlags;
+	uint32_t				lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
+	SISLANDS_SMC_VOLTAGEMASKTABLE		voltageMaskTable;
+	SISLANDS_SMC_VOLTAGEMASKTABLE		phaseMaskTable;
+	PP_SIslands_DPM2Parameters		dpm2Params;
+	struct SISLANDS_SMC_SWSTATE_SINGLE	initialState;
+	struct SISLANDS_SMC_SWSTATE_SINGLE      ACPIState;
+	struct SISLANDS_SMC_SWSTATE_SINGLE      ULVState;
+	SISLANDS_SMC_SWSTATE			driverState;
+	SISLANDS_SMC_HW_PERFORMANCE_LEVEL	dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE];
 };
 
 typedef struct SISLANDS_SMC_STATETABLE SISLANDS_SMC_STATETABLE;
-- 
GitLab


From 939baec9e895e75149327c01b775f46c21e12be5 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 10 May 2021 15:46:18 -0500
Subject: [PATCH 0573/3804] drm/amd/pm: Fix out-of-bounds bug

Create new structure SISLANDS_SMC_SWSTATE_SINGLE, as initialState.levels
and ACPIState.levels are never actually used as flexible arrays. Those
arrays can be used as simple objects of type
SISLANDS_SMC_HW_PERFORMANCE_LEVEL, instead.

Currently, the code fails because flexible array _levels_ in
struct SISLANDS_SMC_SWSTATE doesn't allow for code that accesses
the first element of initialState.levels and ACPIState.levels
arrays:

drivers/gpu/drm/amd/pm/powerplay/si_dpm.c:
4820: table->initialState.levels[0].mclk.vDLL_CNTL =
4821:         cpu_to_be32(si_pi->clock_registers.dll_cntl);
...
5021: table->ACPIState.levels[0].mclk.vDLL_CNTL =
5022:         cpu_to_be32(dll_cntl);

because such element cannot be accessed without previously allocating
enough dynamic memory for it to exist (which never actually happens).
So, there is an out-of-bounds bug in this case.

That's why struct SISLANDS_SMC_SWSTATE should only be used as type
for object driverState and new struct SISLANDS_SMC_SWSTATE_SINGLE is
created as type for objects initialState, ACPIState and ULVState.

Also, with the change from one-element array to flexible-array member
in commit 0e1aa13ca3ff ("drm/amd/pm: Replace one-element array with
flexible-array in struct SISLANDS_SMC_SWSTATE"), the size of
dpmLevels in struct SISLANDS_SMC_STATETABLE should be fixed to be
SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE instead of
SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1.

Fixes: 0e1aa13ca3ff ("drm/amd/pm: Replace one-element array with flexible-array in struct SISLANDS_SMC_SWSTATE")
Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/powerplay/si_dpm.c     | 174 +++++++++---------
 .../gpu/drm/amd/pm/powerplay/sislands_smc.h   |  34 ++--
 2 files changed, 109 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
index 26a5321e621bf..15c0b8af376f8 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
@@ -4817,70 +4817,70 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
 	u32 reg;
 	int ret;
 
-	table->initialState.levels[0].mclk.vDLL_CNTL =
+	table->initialState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(si_pi->clock_registers.dll_cntl);
-	table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->initialState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mclk_pwrmgt_cntl);
-	table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_ad_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_dq_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL_1 =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_1);
-	table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+	table->initialState.level.mclk.vMPLL_FUNC_CNTL_2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_2);
-	table->initialState.levels[0].mclk.vMPLL_SS =
+	table->initialState.level.mclk.vMPLL_SS =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-	table->initialState.levels[0].mclk.vMPLL_SS2 =
+	table->initialState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss2);
 
-	table->initialState.levels[0].mclk.mclk_value =
+	table->initialState.level.mclk.mclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].mclk);
 
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_2);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_3);
-	table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_4);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum);
-	table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
+	table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
 		cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum_2);
 
-	table->initialState.levels[0].sclk.sclk_value =
+	table->initialState.level.sclk.sclk_value =
 		cpu_to_be32(initial_state->performance_levels[0].sclk);
 
-	table->initialState.levels[0].arbRefreshState =
+	table->initialState.level.arbRefreshState =
 		SISLANDS_INITIAL_STATE_ARB_INDEX;
 
-	table->initialState.levels[0].ACIndex = 0;
+	table->initialState.level.ACIndex = 0;
 
 	ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table,
 					initial_state->performance_levels[0].vddc,
-					&table->initialState.levels[0].vddc);
+					&table->initialState.level.vddc);
 
 	if (!ret) {
 		u16 std_vddc;
 
 		ret = si_get_std_voltage_value(adev,
-					       &table->initialState.levels[0].vddc,
+					       &table->initialState.level.vddc,
 					       &std_vddc);
 		if (!ret)
 			si_populate_std_voltage_value(adev, std_vddc,
-						      table->initialState.levels[0].vddc.index,
-						      &table->initialState.levels[0].std_vddc);
+						      table->initialState.level.vddc.index,
+						      &table->initialState.level.std_vddc);
 	}
 
 	if (eg_pi->vddci_control)
 		si_populate_voltage_value(adev,
 					  &eg_pi->vddci_voltage_table,
 					  initial_state->performance_levels[0].vddci,
-					  &table->initialState.levels[0].vddci);
+					  &table->initialState.level.vddci);
 
 	if (si_pi->vddc_phase_shed_control)
 		si_populate_phase_shedding_value(adev,
@@ -4888,41 +4888,41 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
 						 initial_state->performance_levels[0].vddc,
 						 initial_state->performance_levels[0].sclk,
 						 initial_state->performance_levels[0].mclk,
-						 &table->initialState.levels[0].vddc);
+						 &table->initialState.level.vddc);
 
-	si_populate_initial_mvdd_value(adev, &table->initialState.levels[0].mvdd);
+	si_populate_initial_mvdd_value(adev, &table->initialState.level.mvdd);
 
 	reg = CG_R(0xffff) | CG_L(0);
-	table->initialState.levels[0].aT = cpu_to_be32(reg);
-	table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
-	table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen;
+	table->initialState.level.aT = cpu_to_be32(reg);
+	table->initialState.level.bSP = cpu_to_be32(pi->dsp);
+	table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen;
 
 	if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
-		table->initialState.levels[0].strobeMode =
+		table->initialState.level.strobeMode =
 			si_get_strobe_mode_settings(adev,
 						    initial_state->performance_levels[0].mclk);
 
 		if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold)
-			table->initialState.levels[0].mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
+			table->initialState.level.mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
 		else
-			table->initialState.levels[0].mcFlags =  0;
+			table->initialState.level.mcFlags =  0;
 	}
 
 	table->initialState.levelCount = 1;
 
 	table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC;
 
-	table->initialState.levels[0].dpm2.MaxPS = 0;
-	table->initialState.levels[0].dpm2.NearTDPDec = 0;
-	table->initialState.levels[0].dpm2.AboveSafeInc = 0;
-	table->initialState.levels[0].dpm2.BelowSafeInc = 0;
-	table->initialState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+	table->initialState.level.dpm2.MaxPS = 0;
+	table->initialState.level.dpm2.NearTDPDec = 0;
+	table->initialState.level.dpm2.AboveSafeInc = 0;
+	table->initialState.level.dpm2.BelowSafeInc = 0;
+	table->initialState.level.dpm2.PwrEfficiencyRatio = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
@@ -4953,18 +4953,18 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 
 	if (pi->acpi_vddc) {
 		ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table,
-						pi->acpi_vddc, &table->ACPIState.levels[0].vddc);
+						pi->acpi_vddc, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = si_get_std_voltage_value(adev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 			if (!ret)
 				si_populate_std_voltage_value(adev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE = si_pi->acpi_pcie_gen;
+		table->ACPIState.level.gen2PCIE = si_pi->acpi_pcie_gen;
 
 		if (si_pi->vddc_phase_shed_control) {
 			si_populate_phase_shedding_value(adev,
@@ -4972,23 +4972,23 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 							 pi->acpi_vddc,
 							 0,
 							 0,
-							 &table->ACPIState.levels[0].vddc);
+							 &table->ACPIState.level.vddc);
 		}
 	} else {
 		ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table,
-						pi->min_vddc_in_table, &table->ACPIState.levels[0].vddc);
+						pi->min_vddc_in_table, &table->ACPIState.level.vddc);
 		if (!ret) {
 			u16 std_vddc;
 
 			ret = si_get_std_voltage_value(adev,
-						       &table->ACPIState.levels[0].vddc, &std_vddc);
+						       &table->ACPIState.level.vddc, &std_vddc);
 
 			if (!ret)
 				si_populate_std_voltage_value(adev, std_vddc,
-							      table->ACPIState.levels[0].vddc.index,
-							      &table->ACPIState.levels[0].std_vddc);
+							      table->ACPIState.level.vddc.index,
+							      &table->ACPIState.level.std_vddc);
 		}
-		table->ACPIState.levels[0].gen2PCIE =
+		table->ACPIState.level.gen2PCIE =
 			(u8)amdgpu_get_pcie_gen_support(adev,
 							si_pi->sys_pcie_mask,
 							si_pi->boot_pcie_gen,
@@ -5000,14 +5000,14 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 							 pi->min_vddc_in_table,
 							 0,
 							 0,
-							 &table->ACPIState.levels[0].vddc);
+							 &table->ACPIState.level.vddc);
 	}
 
 	if (pi->acpi_vddc) {
 		if (eg_pi->acpi_vddci)
 			si_populate_voltage_value(adev, &eg_pi->vddci_voltage_table,
 						  eg_pi->acpi_vddci,
-						  &table->ACPIState.levels[0].vddci);
+						  &table->ACPIState.level.vddci);
 	}
 
 	mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET;
@@ -5018,59 +5018,59 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
 	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
 	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
 
-	table->ACPIState.levels[0].mclk.vDLL_CNTL =
+	table->ACPIState.level.mclk.vDLL_CNTL =
 		cpu_to_be32(dll_cntl);
-	table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+	table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL =
 		cpu_to_be32(mclk_pwrmgt_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL =
 		cpu_to_be32(mpll_ad_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL =
 		cpu_to_be32(mpll_dq_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL =
 		cpu_to_be32(mpll_func_cntl);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_1 =
 		cpu_to_be32(mpll_func_cntl_1);
-	table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+	table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_2 =
 		cpu_to_be32(mpll_func_cntl_2);
-	table->ACPIState.levels[0].mclk.vMPLL_SS =
+	table->ACPIState.level.mclk.vMPLL_SS =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-	table->ACPIState.levels[0].mclk.vMPLL_SS2 =
+	table->ACPIState.level.mclk.vMPLL_SS2 =
 		cpu_to_be32(si_pi->clock_registers.mpll_ss2);
 
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL =
 		cpu_to_be32(spll_func_cntl);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
 		cpu_to_be32(spll_func_cntl_2);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
 		cpu_to_be32(spll_func_cntl_3);
-	table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+	table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
 		cpu_to_be32(spll_func_cntl_4);
 
-	table->ACPIState.levels[0].mclk.mclk_value = 0;
-	table->ACPIState.levels[0].sclk.sclk_value = 0;
+	table->ACPIState.level.mclk.mclk_value = 0;
+	table->ACPIState.level.sclk.sclk_value = 0;
 
-	si_populate_mvdd_value(adev, 0, &table->ACPIState.levels[0].mvdd);
+	si_populate_mvdd_value(adev, 0, &table->ACPIState.level.mvdd);
 
 	if (eg_pi->dynamic_ac_timing)
-		table->ACPIState.levels[0].ACIndex = 0;
+		table->ACPIState.level.ACIndex = 0;
 
-	table->ACPIState.levels[0].dpm2.MaxPS = 0;
-	table->ACPIState.levels[0].dpm2.NearTDPDec = 0;
-	table->ACPIState.levels[0].dpm2.AboveSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.BelowSafeInc = 0;
-	table->ACPIState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+	table->ACPIState.level.dpm2.MaxPS = 0;
+	table->ACPIState.level.dpm2.NearTDPDec = 0;
+	table->ACPIState.level.dpm2.AboveSafeInc = 0;
+	table->ACPIState.level.dpm2.BelowSafeInc = 0;
+	table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0;
 
 	reg = MIN_POWER_MASK | MAX_POWER_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
 
 	reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-	table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+	table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
 
 	return 0;
 }
 
 static int si_populate_ulv_state(struct amdgpu_device *adev,
-				 SISLANDS_SMC_SWSTATE *state)
+				 struct SISLANDS_SMC_SWSTATE_SINGLE *state)
 {
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
 	struct si_power_info *si_pi = si_get_pi(adev);
@@ -5079,19 +5079,19 @@ static int si_populate_ulv_state(struct amdgpu_device *adev,
 	int ret;
 
 	ret = si_convert_power_level_to_smc(adev, &ulv->pl,
-					    &state->levels[0]);
+					    &state->level);
 	if (!ret) {
 		if (eg_pi->sclk_deep_sleep) {
 			if (sclk_in_sr <= SCLK_MIN_DEEPSLEEP_FREQ)
-				state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
+				state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
 			else
-				state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
+				state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
 		}
 		if (ulv->one_pcie_lane_in_ulv)
 			state->flags |= PPSMC_SWSTATE_FLAG_PCIE_X1;
-		state->levels[0].arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
-		state->levels[0].ACIndex = 1;
-		state->levels[0].std_vddc = state->levels[0].vddc;
+		state->level.arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
+		state->level.ACIndex = 1;
+		state->level.std_vddc = state->level.vddc;
 		state->levelCount = 1;
 
 		state->flags |= PPSMC_SWSTATE_FLAG_DC;
@@ -5190,7 +5190,9 @@ static int si_init_smc_table(struct amdgpu_device *adev)
 	if (ret)
 		return ret;
 
-	table->driverState = table->initialState;
+	table->driverState.flags = table->initialState.flags;
+	table->driverState.levelCount = table->initialState.levelCount;
+	table->driverState.levels[0] = table->initialState.level;
 
 	ret = si_do_program_memory_timing_parameters(adev, amdgpu_boot_state,
 						     SISLANDS_INITIAL_STATE_ARB_INDEX);
@@ -5737,8 +5739,8 @@ static int si_upload_ulv_state(struct amdgpu_device *adev)
 	if (ulv->supported && ulv->pl.vddc) {
 		u32 address = si_pi->state_table_start +
 			offsetof(SISLANDS_SMC_STATETABLE, ULVState);
-		SISLANDS_SMC_SWSTATE *smc_state = &si_pi->smc_statetable.ULVState;
-		u32 state_size = sizeof(SISLANDS_SMC_SWSTATE);
+		struct SISLANDS_SMC_SWSTATE_SINGLE *smc_state = &si_pi->smc_statetable.ULVState;
+		u32 state_size = sizeof(struct SISLANDS_SMC_SWSTATE_SINGLE);
 
 		memset(smc_state, 0, state_size);
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h b/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h
index 0f7554052c906..c7dc117a688cb 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h
@@ -191,6 +191,14 @@ struct SISLANDS_SMC_SWSTATE
 
 typedef struct SISLANDS_SMC_SWSTATE SISLANDS_SMC_SWSTATE;
 
+struct SISLANDS_SMC_SWSTATE_SINGLE {
+	uint8_t                             flags;
+	uint8_t                             levelCount;
+	uint8_t                             padding2;
+	uint8_t                             padding3;
+	SISLANDS_SMC_HW_PERFORMANCE_LEVEL   level;
+};
+
 #define SISLANDS_SMC_VOLTAGEMASK_VDDC  0
 #define SISLANDS_SMC_VOLTAGEMASK_MVDD  1
 #define SISLANDS_SMC_VOLTAGEMASK_VDDCI 2
@@ -208,19 +216,19 @@ typedef struct SISLANDS_SMC_VOLTAGEMASKTABLE SISLANDS_SMC_VOLTAGEMASKTABLE;
 
 struct SISLANDS_SMC_STATETABLE
 {
-    uint8_t                             thermalProtectType;
-    uint8_t                             systemFlags;
-    uint8_t                             maxVDDCIndexInPPTable;
-    uint8_t                             extraFlags;
-    uint32_t                            lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
-    SISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
-    SISLANDS_SMC_VOLTAGEMASKTABLE       phaseMaskTable;
-    PP_SIslands_DPM2Parameters          dpm2Params;
-    SISLANDS_SMC_SWSTATE                initialState;
-    SISLANDS_SMC_SWSTATE                ACPIState;
-    SISLANDS_SMC_SWSTATE                ULVState;
-    SISLANDS_SMC_SWSTATE                driverState;
-    SISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1];
+	uint8_t					thermalProtectType;
+	uint8_t					systemFlags;
+	uint8_t					maxVDDCIndexInPPTable;
+	uint8_t					extraFlags;
+	uint32_t				lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
+	SISLANDS_SMC_VOLTAGEMASKTABLE		voltageMaskTable;
+	SISLANDS_SMC_VOLTAGEMASKTABLE		phaseMaskTable;
+	PP_SIslands_DPM2Parameters		dpm2Params;
+	struct SISLANDS_SMC_SWSTATE_SINGLE	initialState;
+	struct SISLANDS_SMC_SWSTATE_SINGLE	ACPIState;
+	struct SISLANDS_SMC_SWSTATE_SINGLE	ULVState;
+	SISLANDS_SMC_SWSTATE			driverState;
+	SISLANDS_SMC_HW_PERFORMANCE_LEVEL	dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE];
 };
 
 typedef struct SISLANDS_SMC_STATETABLE SISLANDS_SMC_STATETABLE;
-- 
GitLab


From fe1c97d008f86f672f0e9265f180c22451ca3b9f Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@gatech.edu>
Date: Mon, 10 May 2021 05:30:39 -0400
Subject: [PATCH 0574/3804] drm/amd/display: Initialize attribute for hdcp_srm
 sysfs file

It is stored in dynamically allocated memory, so sysfs_bin_attr_init() must
be called to initialize it. (Note: "initialization" only sets the .attr.key
member in this struct; it does not change the value of any other members.)

Otherwise, when CONFIG_DEBUG_LOCK_ALLOC=y this message appears during boot:

    BUG: key ffff9248900cd148 has not been registered!

Fixes: 9037246bb2da ("drm/amd/display: Add sysfs interface for set/get srm")
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1586
Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Signed-off-by: David Ward <david.ward@gatech.edu>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 616f5b1ea3a88..666796a0067c3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -650,6 +650,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct
 
 	/* File created at /sys/class/drm/card0/device/hdcp_srm*/
 	hdcp_work[0].attr = data_attr;
+	sysfs_bin_attr_init(&hdcp_work[0].attr);
 
 	if (sysfs_create_bin_file(&adev->dev->kobj, &hdcp_work[0].attr))
 		DRM_WARN("Failed to create device file hdcp_srm");
-- 
GitLab


From 83a0b8639185f40ab7fc9dd291a057150eb9d238 Mon Sep 17 00:00:00 2001
From: Likun GAO <Likun.Gao@amd.com>
Date: Thu, 29 Apr 2021 14:08:13 +0800
Subject: [PATCH 0575/3804] drm/amdgpu: add judgement when add ip blocks (v2)

Judgement whether to add an sw ip according to the harvest info.

v2: fix indentation (Alex)

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c    | 15 +++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 28 +++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h |  1 +
 drivers/gpu/drm/amd/amdgpu/nv.c               |  8 +++++-
 drivers/gpu/drm/amd/include/amd_shared.h      |  6 ++++
 6 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index dc3a69296321b..264176a01e16a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1006,6 +1006,7 @@ struct amdgpu_device {
 	struct amdgpu_df                df;
 
 	struct amdgpu_ip_block          ip_blocks[AMDGPU_MAX_IP_NUM];
+	uint32_t		        harvest_ip_mask;
 	int				num_ip_blocks;
 	struct mutex	mn_lock;
 	DECLARE_HASHTABLE(mn_hash, 7);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 7d3b546151475..8b2a37bf2adf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1683,6 +1683,19 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
 	if (!ip_block_version)
 		return -EINVAL;
 
+	switch (ip_block_version->type) {
+	case AMD_IP_BLOCK_TYPE_VCN:
+		if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+			return 0;
+		break;
+	case AMD_IP_BLOCK_TYPE_JPEG:
+		if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
+			return 0;
+		break;
+	default:
+		break;
+	}
+
 	DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
 		  ip_block_version->funcs->name);
 
@@ -3111,7 +3124,6 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
 }
 
-
 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
 {
 	struct amdgpu_device *adev =
@@ -3276,6 +3288,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->vm_manager.vm_pte_funcs = NULL;
 	adev->vm_manager.vm_pte_num_scheds = 0;
 	adev->gmc.gmc_funcs = NULL;
+	adev->harvest_ip_mask = 0x0;
 	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
 	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index b2dbcb4df0208..e1b6f58917599 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -373,6 +373,34 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
 	return -EINVAL;
 }
 
+void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
+{
+	struct binary_header *bhdr;
+	struct harvest_table *harvest_info;
+	int i;
+
+	bhdr = (struct binary_header *)adev->mman.discovery_bin;
+	harvest_info = (struct harvest_table *)(adev->mman.discovery_bin +
+			le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset));
+
+	for (i = 0; i < 32; i++) {
+		if (le32_to_cpu(harvest_info->list[i].hw_id) == 0)
+			break;
+
+		switch (le32_to_cpu(harvest_info->list[i].hw_id)) {
+		case VCN_HWID:
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+			break;
+		case DMU_HWID:
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
 {
 	struct binary_header *bhdr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 8f6183801cb34..1b1ae21b10375 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -29,6 +29,7 @@
 
 void amdgpu_discovery_fini(struct amdgpu_device *adev);
 int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
+void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev);
 int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
                                     int *major, int *minor, int *revision);
 int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index d54af7f8801bf..428413c860c75 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -635,6 +635,8 @@ static int nv_reg_base_init(struct amdgpu_device *adev)
 			goto legacy_init;
 		}
 
+		amdgpu_discovery_harvest_ip(adev);
+
 		return 0;
 	}
 
@@ -777,7 +779,6 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
 		if (!amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
-
 		if (adev->enable_mes)
 			amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
 		break;
@@ -1149,6 +1150,11 @@ static int nv_common_early_init(void *handle)
 		return -EINVAL;
 	}
 
+	if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+		adev->pg_flags &= ~(AMD_PG_SUPPORT_VCN |
+				    AMD_PG_SUPPORT_VCN_DPG |
+				    AMD_PG_SUPPORT_JPEG);
+
 	if (amdgpu_sriov_vf(adev)) {
 		amdgpu_virt_init_setting(adev);
 		xgpu_nv_mailbox_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 43ed6291b2b89..9ab706cd07ff4 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -216,6 +216,12 @@ enum PP_FEATURE_MASK {
 	PP_GFX_DCS_MASK = 0x80000,
 };
 
+enum amd_harvest_ip_mask {
+    AMD_HARVEST_IP_VCN_MASK = 0x1,
+    AMD_HARVEST_IP_JPEG_MASK = 0x2,
+    AMD_HARVEST_IP_DMU_MASK = 0x4,
+};
+
 enum DC_FEATURE_MASK {
 	DC_FBC_MASK = 0x1,
 	DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2,
-- 
GitLab


From 5c1a376823c408efd7de30fc300e687c78627f27 Mon Sep 17 00:00:00 2001
From: Likun Gao <Likun.Gao@amd.com>
Date: Fri, 7 May 2021 13:56:46 +0800
Subject: [PATCH 0576/3804] drm/amdgpu: update the method for harvest IP for
 specific SKU

Update the method of disabling VCN IP for specific SKU for navi1x ASIC,
it will judge whether should add the related IP at the function of
amdgpu_device_ip_block_add().

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/nv.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 428413c860c75..d290ca0b06da8 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -623,6 +623,16 @@ static const struct amdgpu_ip_block_version nv_common_ip_block =
 	.funcs = &nv_common_ip_funcs,
 };
 
+static bool nv_is_headless_sku(struct pci_dev *pdev)
+{
+	if ((pdev->device == 0x731E &&
+	    (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
+	    (pdev->device == 0x7340 && pdev->revision == 0xC9)  ||
+	    (pdev->device == 0x7360 && pdev->revision == 0xC7))
+		return true;
+	return false;
+}
+
 static int nv_reg_base_init(struct amdgpu_device *adev)
 {
 	int r;
@@ -636,6 +646,10 @@ static int nv_reg_base_init(struct amdgpu_device *adev)
 		}
 
 		amdgpu_discovery_harvest_ip(adev);
+		if (nv_is_headless_sku(adev->pdev)) {
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+			adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+		}
 
 		return 0;
 	}
@@ -673,16 +687,6 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
 	adev->virt.ops = &xgpu_nv_virt_ops;
 }
 
-static bool nv_is_headless_sku(struct pci_dev *pdev)
-{
-	if ((pdev->device == 0x731E &&
-	    (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
-	    (pdev->device == 0x7340 && pdev->revision == 0xC9)  ||
-	    (pdev->device == 0x7360 && pdev->revision == 0xC7))
-		return true;
-	return false;
-}
-
 int nv_set_ip_blocks(struct amdgpu_device *adev)
 {
 	int r;
@@ -730,8 +734,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
 		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-		if (!nv_is_headless_sku(adev->pdev))
-			amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
 		if (adev->enable_mes)
 			amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
@@ -754,8 +757,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
 		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
 		    !amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-		if (!nv_is_headless_sku(adev->pdev))
-		        amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+		amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
 		if (!amdgpu_sriov_vf(adev))
 			amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
 		break;
-- 
GitLab


From 227545b9a08c68778ddd89428f99c351fc9315ac Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 30 Apr 2021 12:56:56 +0800
Subject: [PATCH 0577/3804] drm/radeon/dpm: Disable sclk switching on Oland
 when two 4K 60Hz monitors are connected

Screen flickers rapidly when two 4K 60Hz monitors are in use. This issue
doesn't happen when one monitor is 4K 60Hz (pixelclock 594MHz) and
another one is 4K 30Hz (pixelclock 297MHz).

The issue is gone after setting "power_dpm_force_performance_level" to
"high". Following the indication, we found that the issue occurs when
sclk is too low.

So resolve the issue by disabling sclk switching when there are two
monitors requires high pixelclock (> 297MHz).

v2:
 - Only apply the fix to Oland.
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon.h    | 1 +
 drivers/gpu/drm/radeon/radeon_pm.c | 8 ++++++++
 drivers/gpu/drm/radeon/si_dpm.c    | 3 +++
 3 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 42281fce552e6..56ed5634cebef 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1549,6 +1549,7 @@ struct radeon_dpm {
 	void                    *priv;
 	u32			new_active_crtcs;
 	int			new_active_crtc_count;
+	int			high_pixelclock_count;
 	u32			current_active_crtcs;
 	int			current_active_crtc_count;
 	bool single_display;
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 0c1950f4e146f..3861c0b98fcf3 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -1767,6 +1767,7 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev)
 	struct drm_device *ddev = rdev->ddev;
 	struct drm_crtc *crtc;
 	struct radeon_crtc *radeon_crtc;
+	struct radeon_connector *radeon_connector;
 
 	if (!rdev->pm.dpm_enabled)
 		return;
@@ -1776,6 +1777,7 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev)
 	/* update active crtc counts */
 	rdev->pm.dpm.new_active_crtcs = 0;
 	rdev->pm.dpm.new_active_crtc_count = 0;
+	rdev->pm.dpm.high_pixelclock_count = 0;
 	if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) {
 		list_for_each_entry(crtc,
 				    &ddev->mode_config.crtc_list, head) {
@@ -1783,6 +1785,12 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev)
 			if (crtc->enabled) {
 				rdev->pm.dpm.new_active_crtcs |= (1 << radeon_crtc->crtc_id);
 				rdev->pm.dpm.new_active_crtc_count++;
+				if (!radeon_crtc->connector)
+					continue;
+
+				radeon_connector = to_radeon_connector(radeon_crtc->connector);
+				if (radeon_connector->pixelclock_for_modeset > 297000)
+					rdev->pm.dpm.high_pixelclock_count++;
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 2c54c0d7ca5be..3add39c1a6897 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2979,6 +2979,9 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
 		    (rdev->pdev->device == 0x6605)) {
 			max_sclk = 75000;
 		}
+
+		if (rdev->pm.dpm.high_pixelclock_count > 1)
+			disable_sclk_switching = true;
 	}
 
 	if (rps->vce_active) {
-- 
GitLab


From 3666f83a11293fd3cbeb3c9e0c3c53a33a48c28b Mon Sep 17 00:00:00 2001
From: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Date: Mon, 3 May 2021 12:34:10 +0530
Subject: [PATCH 0578/3804] drm/amdgpu: set vcn mgcg flag for picasso

enable vcn mgcg flag for picasso.

Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index d80e12b80c7e5..8e1b9a40839fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1401,7 +1401,8 @@ static int soc15_common_early_init(void *handle)
 				AMD_CG_SUPPORT_MC_MGCG |
 				AMD_CG_SUPPORT_MC_LS |
 				AMD_CG_SUPPORT_SDMA_MGCG |
-				AMD_CG_SUPPORT_SDMA_LS;
+				AMD_CG_SUPPORT_SDMA_LS |
+				AMD_CG_SUPPORT_VCN_MGCG;
 
 			adev->pg_flags = AMD_PG_SUPPORT_SDMA |
 				AMD_PG_SUPPORT_MMHUB |
-- 
GitLab


From 5c1efb5f7682e2072ca5ce12cd616d432604ecc0 Mon Sep 17 00:00:00 2001
From: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Date: Mon, 3 May 2021 23:57:31 +0530
Subject: [PATCH 0579/3804] drm/amdgpu: update vcn1.0 Non-DPG suspend sequence

update suspend register settings in Non-DPG mode.

Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 51a773a37a354..0c1beefa3e498 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1119,10 +1119,10 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
 		UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
 	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
 
-	/* put VCPU into reset */
-	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
-		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
-		~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+	/* stall UMC channel */
+	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+		UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+		~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 
 	tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
 		UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
@@ -1141,6 +1141,11 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
 		UVD_SOFT_RESET__LMI_SOFT_RESET_MASK,
 		~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
 
+	/* put VCPU into reset */
+	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+		~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
 	WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0);
 
 	vcn_v1_0_enable_clock_gating(adev);
-- 
GitLab


From 3ddb4ce1e6e3bd112778ab93bbd9092f23a878ec Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 26 Apr 2021 11:55:14 +0100
Subject: [PATCH 0580/3804] serial: tegra: Fix a mask operation that is always
 true

Currently the expression lsr | UART_LSR_TEMT is always true and
this seems suspect. I believe the intent was to mask lsr with UART_LSR_TEMT
to check that bit, so the expression should be using the & operator
instead. Fix this.

Fixes: b9c2470fb150 ("serial: tegra: flush the RX fifo on frame error")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210426105514.23268-1-colin.king@canonical.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial-tegra.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c
index bbae072a125db..222032792d6c2 100644
--- a/drivers/tty/serial/serial-tegra.c
+++ b/drivers/tty/serial/serial-tegra.c
@@ -338,7 +338,7 @@ static void tegra_uart_fifo_reset(struct tegra_uart_port *tup, u8 fcr_bits)
 
 	do {
 		lsr = tegra_uart_read(tup, UART_LSR);
-		if ((lsr | UART_LSR_TEMT) && !(lsr & UART_LSR_DR))
+		if ((lsr & UART_LSR_TEMT) && !(lsr & UART_LSR_DR))
 			break;
 		udelay(1);
 	} while (--tmout);
-- 
GitLab


From 5e722b217ad3cf41f5504db80a68062df82b5242 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Fri, 7 May 2021 13:57:19 +0200
Subject: [PATCH 0581/3804] serial: core: fix suspicious security_locked_down()
 call

The commit that added this check did so in a very strange way - first
security_locked_down() is called, its value stored into retval, and if
it's nonzero, then an additional check is made for (change_irq ||
change_port), and if this is true, the function returns. However, if
the goto exit branch is not taken, the code keeps the retval value and
continues executing the function. Then, depending on whether
uport->ops->verify_port is set, the retval value may or may not be reset
to zero and eventually the error value from security_locked_down() may
abort the function a few lines below.

I will go out on a limb and assume that this isn't the intended behavior
and that an error value from security_locked_down() was supposed to
abort the function only in case (change_irq || change_port) is true.

Note that security_locked_down() should be called last in any series of
checks, since the SELinux implementation of this hook will do a check
against the policy and generate an audit record in case of denial. If
the operation was to carry on after calling security_locked_down(), then
the SELinux denial record would be bogus.

See commit 59438b46471a ("security,lockdown,selinux: implement SELinux
lockdown") for how SELinux implements this hook.

Fixes: 794edf30ee6c ("lockdown: Lock down TIOCSSERIAL")
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210507115719.140799-1-omosnace@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial_core.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 87f7127b57e6b..18ff85a83f806 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -863,9 +863,11 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
 		goto check_and_exit;
 	}
 
-	retval = security_locked_down(LOCKDOWN_TIOCSSERIAL);
-	if (retval && (change_irq || change_port))
-		goto exit;
+	if (change_irq || change_port) {
+		retval = security_locked_down(LOCKDOWN_TIOCSSERIAL);
+		if (retval)
+			goto exit;
+	}
 
 	/*
 	 * Ask the low level driver to verify the settings.
-- 
GitLab


From 2ea2e019c190ee3973ef7bcaf829d8762e56e635 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 10 May 2021 14:07:55 +0200
Subject: [PATCH 0582/3804] serial: sh-sci: Fix off-by-one error in FIFO
 threshold register setting

The Receive FIFO Data Count Trigger field (RTRG[6:0]) in the Receive
FIFO Data Count Trigger Register (HSRTRGR) of HSCIF can only hold values
ranging from 0-127.  As the FIFO size is equal to 128 on HSCIF, the user
can write an out-of-range value, touching reserved bits.

Fix this by limiting the trigger value to the FIFO size minus one.
Reverse the order of the checks, to avoid rx_trig becoming zero if the
FIFO size is one.

Note that this change has no impact on other SCIF variants, as their
maximum supported trigger value is lower than the FIFO size anyway, and
the code below takes care of enforcing these limits.

Fixes: a380ed461f66d1b8 ("serial: sh-sci: implement FIFO threshold register setting")
Reported-by: Linh Phung <linh.phung.jy@renesas.com>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Ulrich Hecht <uli+renesas@fpond.eu>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/5eff320aef92ffb33d00e57979fd3603bbb4a70f.1620648218.git.geert+renesas@glider.be
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index ef37fdf37612f..4baf1316ea729 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1023,10 +1023,10 @@ static int scif_set_rtrg(struct uart_port *port, int rx_trig)
 {
 	unsigned int bits;
 
+	if (rx_trig >= port->fifosize)
+		rx_trig = port->fifosize - 1;
 	if (rx_trig < 1)
 		rx_trig = 1;
-	if (rx_trig >= port->fifosize)
-		rx_trig = port->fifosize;
 
 	/* HSCIF can be set to an arbitrary level. */
 	if (sci_getreg(port, HSRTRGR)->size) {
-- 
GitLab


From 3c35d2a960c0077a4cb09bf4989f45d289332ea0 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Wed, 12 May 2021 23:04:13 +0200
Subject: [PATCH 0583/3804] serial: 8250_dw: Add device HID for new AMD UART
 controller

Add device HID AMDI0022 to the AMD UART controller driver match table
and create a platform device for it. This controller can be found on
Microsoft Surface Laptop 4 devices and seems similar enough that we can
just copy the existing AMDI0020 entries.

Cc: <stable@vger.kernel.org> # 5.10+
Tested-by: Sachi King <nakato@nakato.io>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # for 8250_dw part
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210512210413.1982933-1-luzmaximilian@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpi_apd.c           | 1 +
 drivers/tty/serial/8250/8250_dw.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index 0ec5b3f691127..6e02448d15d95 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -226,6 +226,7 @@ static const struct acpi_device_id acpi_apd_device_ids[] = {
 	{ "AMDI0010", APD_ADDR(wt_i2c_desc) },
 	{ "AMD0020", APD_ADDR(cz_uart_desc) },
 	{ "AMDI0020", APD_ADDR(cz_uart_desc) },
+	{ "AMDI0022", APD_ADDR(cz_uart_desc) },
 	{ "AMD0030", },
 	{ "AMD0040", APD_ADDR(fch_misc_desc)},
 	{ "HYGO0010", APD_ADDR(wt_i2c_desc) },
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index 9e204f9b799a1..a3a0154da567d 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -714,6 +714,7 @@ static const struct acpi_device_id dw8250_acpi_match[] = {
 	{ "APMC0D08", 0},
 	{ "AMD0020", 0 },
 	{ "AMDI0020", 0 },
+	{ "AMDI0022", 0 },
 	{ "BRCM2032", 0 },
 	{ "HISI0031", 0 },
 	{ },
-- 
GitLab


From 3e42d1de020805ff3f7d854e1cff742d14e158f5 Mon Sep 17 00:00:00 2001
From: Carlos Bilbao <bilbao@vt.edu>
Date: Thu, 13 May 2021 09:31:10 -0400
Subject: [PATCH 0584/3804] docs: typo fixes in Documentation/ABI/

Fix the following typos in the Documentation/ABI/ directory:

- In file obsolete/sysfs-cpuidle, change "obselete" for "obsolete".

- In file removed/sysfs-kernel-uids, change "propotional" for "proportional".

- In directory stable/, fix the following words: "associtated" for "associated",
  "hexidecimal" for "hexadecimal", "vlue" for "value", "csed" for "caused" and
  "wrtie" for "write". This updates a total of five files.

- In directory testing/, fix the following words: "subystem" for "subsystem",
  "isochrnous" for "isochronous", "Desctiptors" for "Descriptors", "picutre" for
  "picture", "capture" for "capture", "occured" for "ocurred", "connnected" for
  "connected","agressively" for "aggressively","manufacturee" for "manufacturer"
  and "transaction" for "transaction", "malformatted" for "incorrectly formated"
  ,"internel" for "internal", "writtento" for "written to", "specificed" for
  "specified", "beyound" for "beyond", "Symetric" for "Symmetric". This updates
  a total of eleven files.

Signed-off-by: Carlos Bilbao <bilbao@vt.edu>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/5710038.lOV4Wx5bFT@iron-maiden
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/obsolete/sysfs-cpuidle            |  2 +-
 Documentation/ABI/removed/sysfs-kernel-uids         |  2 +-
 Documentation/ABI/stable/sysfs-bus-vmbus            |  2 +-
 Documentation/ABI/stable/sysfs-bus-xen-backend      |  2 +-
 Documentation/ABI/stable/sysfs-driver-dma-idxd      |  2 +-
 Documentation/ABI/stable/sysfs-driver-mlxreg-io     |  4 ++--
 Documentation/ABI/testing/configfs-iio              |  2 +-
 Documentation/ABI/testing/configfs-most             |  8 ++++----
 Documentation/ABI/testing/configfs-usb-gadget       |  2 +-
 Documentation/ABI/testing/configfs-usb-gadget-uvc   |  4 ++--
 Documentation/ABI/testing/debugfs-driver-genwqe     |  2 +-
 Documentation/ABI/testing/debugfs-driver-habanalabs |  2 +-
 Documentation/ABI/testing/sysfs-bus-fsi             |  2 +-
 Documentation/ABI/testing/sysfs-bus-pci             |  4 ++--
 Documentation/ABI/testing/sysfs-devices-system-cpu  | 10 +++++-----
 Documentation/ABI/testing/sysfs-driver-ufs          |  4 ++--
 Documentation/ABI/testing/sysfs-fs-f2fs             |  2 +-
 17 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/Documentation/ABI/obsolete/sysfs-cpuidle b/Documentation/ABI/obsolete/sysfs-cpuidle
index e398fb5e542f6..972cc11d34344 100644
--- a/Documentation/ABI/obsolete/sysfs-cpuidle
+++ b/Documentation/ABI/obsolete/sysfs-cpuidle
@@ -6,4 +6,4 @@ Description:
 	with the update that cpuidle governor can be changed at runtime in default,
 	both current_governor and current_governor_ro co-exist under
 	/sys/devices/system/cpu/cpuidle/ file, it's duplicate so make
-	current_governor_ro obselete.
+	current_governor_ro obsolete.
diff --git a/Documentation/ABI/removed/sysfs-kernel-uids b/Documentation/ABI/removed/sysfs-kernel-uids
index dc4463f190a7d..85a90b86ce1ea 100644
--- a/Documentation/ABI/removed/sysfs-kernel-uids
+++ b/Documentation/ABI/removed/sysfs-kernel-uids
@@ -5,7 +5,7 @@ Contact:	Dhaval Giani <dhaval@linux.vnet.ibm.com>
 Description:
 		The /sys/kernel/uids/<uid>/cpu_shares tunable is used
 		to set the cpu bandwidth a user is allowed. This is a
-		propotional value. What that means is that if there
+		proportional value. What that means is that if there
 		are two users logged in, each with an equal number of
 		shares, then they will get equal CPU bandwidth. Another
 		example would be, if User A has shares = 1024 and user
diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus
index 42599d9fa161f..3066feae1d8d2 100644
--- a/Documentation/ABI/stable/sysfs-bus-vmbus
+++ b/Documentation/ABI/stable/sysfs-bus-vmbus
@@ -61,7 +61,7 @@ Date:		September. 2017
 KernelVersion:	4.14
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Directory for per-channel information
-		NN is the VMBUS relid associtated with the channel.
+		NN is the VMBUS relid associated with the channel.
 
 What:		/sys/bus/vmbus/devices/<UUID>/channels/<N>/cpu
 Date:		September. 2017
diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend
index e8b60bd766f76..480a89edfa05b 100644
--- a/Documentation/ABI/stable/sysfs-bus-xen-backend
+++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
@@ -19,7 +19,7 @@ Date:		April 2011
 KernelVersion:	3.0
 Contact:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 Description:
-                The major:minor number (in hexidecimal) of the
+                The major:minor number (in hexadecimal) of the
                 physical device providing the storage for this backend
                 block device.
 
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index 55285c136cf06..d431e2d00472c 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -173,7 +173,7 @@ What:           /sys/bus/dsa/devices/wq<m>.<n>/priority
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
-Description:    The priority value of this work queue, it is a vlue relative to
+Description:    The priority value of this work queue, it is a value relative to
 		other work queue in the same group to control quality of service
 		for dispatching work from multiple workqueues in the same group.
 
diff --git a/Documentation/ABI/stable/sysfs-driver-mlxreg-io b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
index fd9a8045bb0c5..b2553df2e786a 100644
--- a/Documentation/ABI/stable/sysfs-driver-mlxreg-io
+++ b/Documentation/ABI/stable/sysfs-driver-mlxreg-io
@@ -137,7 +137,7 @@ Contact:	Vadim Pasternak <vadimpmellanox.com>
 Description:	These files show the system reset cause, as following:
 		COMEX thermal shutdown; wathchdog power off or reset was derived
 		by one of the next components: COMEX, switch board or by Small Form
-		Factor mezzanine, reset requested from ASIC, reset cuased by BIOS
+		Factor mezzanine, reset requested from ASIC, reset caused by BIOS
 		reload. Value 1 in file means this is reset cause, 0 - otherwise.
 		Only one of the above causes could be 1 at the same time, representing
 		only last reset cause.
@@ -183,7 +183,7 @@ What:		/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/vpd_wp
 Date:		January 2020
 KernelVersion:	5.6
 Contact:	Vadim Pasternak <vadimpmellanox.com>
-Description:	This file allows to overwrite system VPD hardware wrtie
+Description:	This file allows to overwrite system VPD hardware write
 		protection when attribute is set 1.
 
 		The file is read/write.
diff --git a/Documentation/ABI/testing/configfs-iio b/Documentation/ABI/testing/configfs-iio
index aebda53ec0f77..1637fcb50f568 100644
--- a/Documentation/ABI/testing/configfs-iio
+++ b/Documentation/ABI/testing/configfs-iio
@@ -31,4 +31,4 @@ Date:		April 2016
 KernelVersion:	4.7
 Description:
 		Dummy IIO devices directory. Creating a directory here will result
-		in creating a dummy IIO device in the IIO subystem.
+		in creating a dummy IIO device in the IIO subsystem.
diff --git a/Documentation/ABI/testing/configfs-most b/Documentation/ABI/testing/configfs-most
index bc6b8bd18da49..0a4b8649aa5a0 100644
--- a/Documentation/ABI/testing/configfs-most
+++ b/Documentation/ABI/testing/configfs-most
@@ -20,7 +20,7 @@ Description:
 
 		subbuffer_size
 				configure the sub-buffer size for this channel
-				(needed for synchronous and isochrnous data)
+				(needed for synchronous and isochronous data)
 
 
 		num_buffers
@@ -75,7 +75,7 @@ Description:
 
 		subbuffer_size
 				configure the sub-buffer size for this channel
-				(needed for synchronous and isochrnous data)
+				(needed for synchronous and isochronous data)
 
 
 		num_buffers
@@ -130,7 +130,7 @@ Description:
 
 		subbuffer_size
 				configure the sub-buffer size for this channel
-				(needed for synchronous and isochrnous data)
+				(needed for synchronous and isochronous data)
 
 
 		num_buffers
@@ -196,7 +196,7 @@ Description:
 
 		subbuffer_size
 				configure the sub-buffer size for this channel
-				(needed for synchronous and isochrnous data)
+				(needed for synchronous and isochronous data)
 
 
 		num_buffers
diff --git a/Documentation/ABI/testing/configfs-usb-gadget b/Documentation/ABI/testing/configfs-usb-gadget
index dc351e9af80ad..b7943aa7e997e 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget
+++ b/Documentation/ABI/testing/configfs-usb-gadget
@@ -137,7 +137,7 @@ Description:
 		This group contains "OS String" extension handling attributes.
 
 		=============	===============================================
-		use		flag turning "OS Desctiptors" support on/off
+		use		flag turning "OS Descriptors" support on/off
 		b_vendor_code	one-byte value used for custom per-device and
 				per-interface requests
 		qw_sign		an identifier to be reported as "OS String"
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc
index ac5e11af79a81..889ed45be4ca6 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@@ -170,7 +170,7 @@ Description:	Default color matching descriptors
 		bMatrixCoefficients	  matrix used to compute luma and
 					  chroma values from the color primaries
 		bTransferCharacteristics  optoelectronic transfer
-					  characteristic of the source picutre,
+					  characteristic of the source picture,
 					  also called the gamma function
 		bColorPrimaries		  color primaries and the reference
 					  white
@@ -311,7 +311,7 @@ Description:	Specific streaming header descriptors
 					a hardware trigger interrupt event
 		bTriggerSupport		flag specifying if hardware
 					triggering is supported
-		bStillCaptureMethod	method of still image caputre
+		bStillCaptureMethod	method of still image capture
 					supported
 		bTerminalLink		id of the output terminal to which
 					the video endpoint of this interface
diff --git a/Documentation/ABI/testing/debugfs-driver-genwqe b/Documentation/ABI/testing/debugfs-driver-genwqe
index 1c2f25674e8c3..b45b016545d89 100644
--- a/Documentation/ABI/testing/debugfs-driver-genwqe
+++ b/Documentation/ABI/testing/debugfs-driver-genwqe
@@ -31,7 +31,7 @@ What:           /sys/kernel/debug/genwqe/genwqe<n>_card/prev_regs
 Date:           Oct 2013
 Contact:        haver@linux.vnet.ibm.com
 Description:    Dump of the error registers before the last reset of
-                the card occured.
+                the card occurred.
                 Only available for PF.
 
 What:           /sys/kernel/debug/genwqe/genwqe<n>_card/prev_dbg_uid0
diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index c78fc9282876f..e89c6351503ca 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -153,7 +153,7 @@ KernelVersion:  5.1
 Contact:        ogabbay@kernel.org
 Description:    Triggers an I2C transaction that is generated by the device's
                 CPU. Writing to this file generates a write transaction while
-                reading from the file generates a read transcation
+                reading from the file generates a read transaction
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
 Date:           Jan 2019
diff --git a/Documentation/ABI/testing/sysfs-bus-fsi b/Documentation/ABI/testing/sysfs-bus-fsi
index d148214181a1b..76e0caa0c2b3f 100644
--- a/Documentation/ABI/testing/sysfs-bus-fsi
+++ b/Documentation/ABI/testing/sysfs-bus-fsi
@@ -12,7 +12,7 @@ KernelVersion:  4.12
 Contact:        linux-fsi@lists.ozlabs.org
 Description:
 		Sends an FSI BREAK command on a master's communication
-		link to any connnected slaves.  A BREAK resets connected
+		link to any connected slaves.  A BREAK resets connected
 		device's logic and preps it to receive further commands
 		from the master.
 
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index ef00fada2efbf..793cbb76cd250 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -139,8 +139,8 @@ Description:
 		binary file containing the Vital Product Data for the
 		device.  It should follow the VPD format defined in
 		PCI Specification 2.1 or 2.2, but users should consider
-		that some devices may have malformatted data.  If the
-		underlying VPD has a writable section then the
+		that some devices may have incorrectly formatted data.  
+		If the underlying VPD has a writable section then the
 		corresponding section of this file will be writable.
 
 What:		/sys/bus/pci/devices/.../virtfnN
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index fe13baa53c59b..160b10c029c05 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -50,7 +50,7 @@ Description:	Dynamic addition and removal of CPU's.  This is not hotplug
 		architecture specific.
 
 		release: writes to this file dynamically remove a CPU from
-		the system.  Information writtento the file to remove CPU's
+		the system.  Information written to the file to remove CPU's
 		is architecture specific.
 
 What:		/sys/devices/system/cpu/cpu#/node
@@ -97,7 +97,7 @@ Description:	CPU topology files that describe a logical CPU's relationship
 		corresponds to a physical socket number, but the actual value
 		is architecture and platform dependent.
 
-		thread_siblings: internel kernel map of cpu#'s hardware
+		thread_siblings: internal kernel map of cpu#'s hardware
 		threads within the same core as cpu#
 
 		thread_siblings_list: human-readable list of cpu#'s hardware
@@ -280,7 +280,7 @@ Description:	Disable L3 cache indices
 		on a processor with this functionality will return the currently
 		disabled index for that node. There is one L3 structure per
 		node, or per internal node on MCM machines. Writing a valid
-		index to one of these files will cause the specificed cache
+		index to one of these files will cause the specified cache
 		index to be disabled.
 
 		All AMD processors with L3 caches provide this functionality.
@@ -295,7 +295,7 @@ Description:	Processor frequency boosting control
 
 		This switch controls the boost setting for the whole system.
 		Boosting allows the CPU and the firmware to run at a frequency
-		beyound it's nominal limit.
+		beyond it's nominal limit.
 
 		More details can be found in
 		Documentation/admin-guide/pm/cpufreq.rst
@@ -532,7 +532,7 @@ What:		/sys/devices/system/cpu/smt
 		/sys/devices/system/cpu/smt/control
 Date:		June 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
-Description:	Control Symetric Multi Threading (SMT)
+Description:	Control Symmetric Multi Threading (SMT)
 
 		active:  Tells whether SMT is active (enabled and siblings online)
 
diff --git a/Documentation/ABI/testing/sysfs-driver-ufs b/Documentation/ABI/testing/sysfs-driver-ufs
index d1bc23cb6a9d8..eaac6898f0c03 100644
--- a/Documentation/ABI/testing/sysfs-driver-ufs
+++ b/Documentation/ABI/testing/sysfs-driver-ufs
@@ -168,7 +168,7 @@ Description:	This file shows the manufacturing date in BCD format.
 What:		/sys/bus/platform/drivers/ufshcd/*/device_descriptor/manufacturer_id
 Date:		February 2018
 Contact:	Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
-Description:	This file shows the manufacturee ID. This is one of the
+Description:	This file shows the manufacturer ID. This is one of the
 		UFS device descriptor parameters. The full information about
 		the descriptor could be found at UFS specifications 2.1.
 
@@ -521,7 +521,7 @@ Description:	This file shows maximum VCC, VCCQ and VCCQ2 value for
 What:		/sys/bus/platform/drivers/ufshcd/*/string_descriptors/manufacturer_name
 Date:		February 2018
 Contact:	Stanislav Nijnikov <stanislav.nijnikov@wdc.com>
-Description:	This file contains a device manufactureer name string.
+Description:	This file contains a device manufacturer name string.
 		The full information about the descriptor could be found at
 		UFS specifications 2.1.
 
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 4849b8e84e422..5d9ae27bd4620 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -238,7 +238,7 @@ Description:	Shows current reserved blocks in system, it may be temporarily
 What:		/sys/fs/f2fs/<disk>/gc_urgent
 Date:		August 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:	Do background GC agressively when set. When gc_urgent = 1,
+Description:	Do background GC aggressively when set. When gc_urgent = 1,
 		background thread starts to do GC by given gc_urgent_sleep_time
 		interval. When gc_urgent = 2, F2FS will lower the bar of
 		checking idle in order to process outstanding discard commands
-- 
GitLab


From a73b6a3b4109ce2ed01dbc51a6c1551a6431b53c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 28 Apr 2021 15:25:34 -0700
Subject: [PATCH 0585/3804] ics932s401: fix broken handling of errors when word
 reading fails

In commit b05ae01fdb89, someone tried to make the driver handle i2c read
errors by simply zeroing out the register contents, but for some reason
left unaltered the code that sets the cached register value the function
call return value.

The original patch was authored by a member of the Underhanded
Mangle-happy Nerds, I'm not terribly surprised.  I don't have the
hardware anymore so I can't test this, but it seems like a pretty
obvious API usage fix to me...

Fixes: b05ae01fdb89 ("misc/ics932s401: Add a missing check to i2c_smbus_read_word_data")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20210428222534.GJ3122264@magnolia
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ics932s401.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
index 2bdf560ee681b..0f9ea75b0b189 100644
--- a/drivers/misc/ics932s401.c
+++ b/drivers/misc/ics932s401.c
@@ -134,7 +134,7 @@ static struct ics932s401_data *ics932s401_update_device(struct device *dev)
 	for (i = 0; i < NUM_MIRRORED_REGS; i++) {
 		temp = i2c_smbus_read_word_data(client, regs_to_copy[i]);
 		if (temp < 0)
-			data->regs[regs_to_copy[i]] = 0;
+			temp = 0;
 		data->regs[regs_to_copy[i]] = temp >> 8;
 	}
 
-- 
GitLab


From 6a3239a738d86c5e9b5aad17fefe2c2bfd6ced83 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 28 Apr 2021 09:49:31 +0200
Subject: [PATCH 0586/3804] Revert "crypto: cavium/nitrox - add an error
 message to explain the failure of pci_request_mem_regions"

This reverts commit 9fcddaf2e28d779cb946d23838ba6d50f299aa80 as it was
submitted under a fake name and we can not knowingly accept anonymous
contributions to the repository.

This commit was part of a submission "test" to the Linux kernel
community by some "researchers" at umn.edu.  As outlined at:
	https://www-users.cs.umn.edu/%7Ekjlu/papers/full-disclosure.pdf
it was done so as an attempt to submit a known-buggy patch to see if it
could get by our review.  However, the submission turned out to actually
be correct, and not have a bug in it as the author did not understand
how the PCI driver model works at all, and so the submission was
accepted.

As this change is of useless consequence, there is no loss of
functionality in reverting it.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: linux-crypto@vger.kernel.org
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Email: Herbert Xu <herbert@gondor.apana.org.au>
Link: https://lore.kernel.org/r/YIkTi9a3nnL50wMq@kroah.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/cavium/nitrox/nitrox_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index facc8e6bc5801..d385daf2c71c3 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -442,7 +442,6 @@ static int nitrox_probe(struct pci_dev *pdev,
 	err = pci_request_mem_regions(pdev, nitrox_driver_name);
 	if (err) {
 		pci_disable_device(pdev);
-		dev_err(&pdev->dev, "Failed to request mem regions!\n");
 		return err;
 	}
 	pci_set_master(pdev);
-- 
GitLab


From 3e465fc3846734e9489273d889f19cc17b4cf4bd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:30 +0200
Subject: [PATCH 0587/3804] Revert "media: rcar_drif: fix a memory disclosure"

This reverts commit d39083234c60519724c6ed59509a2129fd2aed41.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, it was determined that this commit is not needed at all as
the media core already prevents memory disclosure on this codepath, so
just drop the extra memset happening here.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Fixes: d39083234c60 ("media: rcar_drif: fix a memory disclosure")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-4-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/rcar_drif.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/media/platform/rcar_drif.c b/drivers/media/platform/rcar_drif.c
index 83bd9a412a560..1e3b68a8743af 100644
--- a/drivers/media/platform/rcar_drif.c
+++ b/drivers/media/platform/rcar_drif.c
@@ -915,7 +915,6 @@ static int rcar_drif_g_fmt_sdr_cap(struct file *file, void *priv,
 {
 	struct rcar_drif_sdr *sdr = video_drvdata(file);
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	f->fmt.sdr.pixelformat = sdr->fmt->pixelformat;
 	f->fmt.sdr.buffersize = sdr->fmt->buffersize;
 
-- 
GitLab


From 99ae3417672a6d4a3bf68d4fc43d7c6ca074d477 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:31 +0200
Subject: [PATCH 0588/3804] Revert "hwmon: (lm80) fix a missing check of bus
 read in lm80 probe"

This reverts commit 9aa3aa15f4c2f74f47afd6c5db4b420fadf3f315.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, it was determined that this commit is not needed at all so
just revert it.  Also, the call to lm80_init_client() was not properly
handled, so if error handling is needed in the lm80_probe() function,
then it should be done properly, not half-baked like the commit being
reverted here did.

Cc: Kangjie Lu <kjlu@umn.edu>
Fixes: 9aa3aa15f4c2 ("hwmon: (lm80) fix a missing check of bus read in lm80 probe")
Cc: stable <stable@vger.kernel.org>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-5-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwmon/lm80.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c
index ac4adb44b224d..97ab491d2922c 100644
--- a/drivers/hwmon/lm80.c
+++ b/drivers/hwmon/lm80.c
@@ -596,7 +596,6 @@ static int lm80_probe(struct i2c_client *client)
 	struct device *dev = &client->dev;
 	struct device *hwmon_dev;
 	struct lm80_data *data;
-	int rv;
 
 	data = devm_kzalloc(dev, sizeof(struct lm80_data), GFP_KERNEL);
 	if (!data)
@@ -609,14 +608,8 @@ static int lm80_probe(struct i2c_client *client)
 	lm80_init_client(client);
 
 	/* A few vars need to be filled upon startup */
-	rv = lm80_read_value(client, LM80_REG_FAN_MIN(1));
-	if (rv < 0)
-		return rv;
-	data->fan[f_min][0] = rv;
-	rv = lm80_read_value(client, LM80_REG_FAN_MIN(2));
-	if (rv < 0)
-		return rv;
-	data->fan[f_min][1] = rv;
+	data->fan[f_min][0] = lm80_read_value(client, LM80_REG_FAN_MIN(1));
+	data->fan[f_min][1] = lm80_read_value(client, LM80_REG_FAN_MIN(2));
 
 	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
 							   data, lm80_groups);
-- 
GitLab


From 754f39158441f4c0d7a8255209dd9a939f08ce80 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:32 +0200
Subject: [PATCH 0589/3804] Revert "serial: mvebu-uart: Fix to avoid a
 potential NULL pointer dereference"

This reverts commit 32f47179833b63de72427131169809065db6745e.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be not be needed at all as the
change was useless because this function can only be called when
of_match_device matched on something.  So it should be reverted.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: stable <stable@vger.kernel.org>
Fixes: 32f47179833b ("serial: mvebu-uart: Fix to avoid a potential NULL pointer dereference")
Acked-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-6-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/mvebu-uart.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index e0c00a1b07639..51b0ecabf2ec9 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -818,9 +818,6 @@ static int mvebu_uart_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	if (!match)
-		return -ENODEV;
-
 	/* Assume that all UART ports have a DT alias or none has */
 	id = of_alias_get_id(pdev->dev.of_node, "serial");
 	if (!pdev->dev.of_node || id < 0)
-- 
GitLab


From fd013265e5b5576a74a033920d6c571e08d7c423 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:33 +0200
Subject: [PATCH 0590/3804] Revert "media: usb: gspca: add a missed check for
 goto_low_power"

This reverts commit 5b711870bec4dc9a6d705d41e127e73944fa3650.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to do does nothing useful as a user
can do nothing with this information and if an error did happen, the
code would continue on as before.  Because of this, just revert it.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-7-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/cpia1.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c
index a4f7431486f31..d93d384286c16 100644
--- a/drivers/media/usb/gspca/cpia1.c
+++ b/drivers/media/usb/gspca/cpia1.c
@@ -1424,7 +1424,6 @@ static int sd_config(struct gspca_dev *gspca_dev,
 {
 	struct sd *sd = (struct sd *) gspca_dev;
 	struct cam *cam;
-	int ret;
 
 	sd->mainsFreq = FREQ_DEF == V4L2_CID_POWER_LINE_FREQUENCY_60HZ;
 	reset_camera_params(gspca_dev);
@@ -1436,10 +1435,7 @@ static int sd_config(struct gspca_dev *gspca_dev,
 	cam->cam_mode = mode;
 	cam->nmodes = ARRAY_SIZE(mode);
 
-	ret = goto_low_power(gspca_dev);
-	if (ret)
-		gspca_err(gspca_dev, "Cannot go to low power mode: %d\n",
-			  ret);
+	goto_low_power(gspca_dev);
 	/* Check the firmware version. */
 	sd->params.version.firmwareVersion = 0;
 	get_version_information(gspca_dev);
-- 
GitLab


From 4b059ce1f4b368208c2310925f49be77f15e527b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:34 +0200
Subject: [PATCH 0591/3804] Revert "ALSA: sb: fix a missing check of
 snd_ctl_add"

This reverts commit beae77170c60aa786f3e4599c18ead2854d8694d.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It is safe to ignore this error as the
mixer element is optional, and the driver is very legacy.

Cc: Aditya Pakki <pakki001@umn.edu>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20210503115736.2104747-8-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/isa/sb/sb16_main.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/sound/isa/sb/sb16_main.c b/sound/isa/sb/sb16_main.c
index 38dc1fde25f3c..aa48705310231 100644
--- a/sound/isa/sb/sb16_main.c
+++ b/sound/isa/sb/sb16_main.c
@@ -846,14 +846,10 @@ int snd_sb16dsp_pcm(struct snd_sb *chip, int device)
 	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &snd_sb16_playback_ops);
 	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &snd_sb16_capture_ops);
 
-	if (chip->dma16 >= 0 && chip->dma8 != chip->dma16) {
-		err = snd_ctl_add(card, snd_ctl_new1(
-					&snd_sb16_dma_control, chip));
-		if (err)
-			return err;
-	} else {
+	if (chip->dma16 >= 0 && chip->dma8 != chip->dma16)
+		snd_ctl_add(card, snd_ctl_new1(&snd_sb16_dma_control, chip));
+	else
 		pcm->info_flags = SNDRV_PCM_INFO_HALF_DUPLEX;
-	}
 
 	snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_DEV,
 				       card->dev, 64*1024, 128*1024);
-- 
GitLab


From 8d1beda5f11953ffe135a5213287f0b25b4da41b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:35 +0200
Subject: [PATCH 0592/3804] Revert "leds: lp5523: fix a missing check of return
 value of lp55xx_read"

This reverts commit 248b57015f35c94d4eae2fdd8c6febf5cd703900.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit does not properly unwind if there is an error
condition so it needs to be reverted at this point in time.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Cc: stable <stable@vger.kernel.org>
Fixes: 248b57015f35 ("leds: lp5523: fix a missing check of return value of lp55xx_read")
Link: https://lore.kernel.org/r/20210503115736.2104747-9-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/leds/leds-lp5523.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index fc433e63b1dc0..5036d7d5f3d48 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c
@@ -305,9 +305,7 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip)
 
 	/* Let the programs run for couple of ms and check the engine status */
 	usleep_range(3000, 6000);
-	ret = lp55xx_read(chip, LP5523_REG_STATUS, &status);
-	if (ret)
-		return ret;
+	lp55xx_read(chip, LP5523_REG_STATUS, &status);
 	status &= LP5523_ENG_STATUS_MASK;
 
 	if (status != LP5523_ENG_STATUS_MASK) {
-- 
GitLab


From 6647f7a06eb030a2384ec71f0bb2e78854afabfe Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:56:36 +0200
Subject: [PATCH 0593/3804] leds: lp5523: check return value of lp5xx_read and
 jump to cleanup code

Check return value of lp5xx_read and if non-zero, jump to code at end of
the function, causing lp5523_stop_all_engines to be executed before
returning the error value up the call chain. This fixes the original
commit (248b57015f35) which was reverted due to the University of Minnesota
problems.

Cc: stable <stable@vger.kernel.org>
Acked-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-10-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/leds/leds-lp5523.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index 5036d7d5f3d48..b1590cb4a1887 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c
@@ -305,7 +305,9 @@ static int lp5523_init_program_engine(struct lp55xx_chip *chip)
 
 	/* Let the programs run for couple of ms and check the engine status */
 	usleep_range(3000, 6000);
-	lp55xx_read(chip, LP5523_REG_STATUS, &status);
+	ret = lp55xx_read(chip, LP5523_REG_STATUS, &status);
+	if (ret)
+		goto out;
 	status &= LP5523_ENG_STATUS_MASK;
 
 	if (status != LP5523_ENG_STATUS_MASK) {
-- 
GitLab


From b0a85abbe92e1a6f3e8580a4590fa7245de7090b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:37 +0200
Subject: [PATCH 0594/3804] Revert "serial: max310x: pass return value of
 spi_register_driver"

This reverts commit 51f689cc11333944c7a457f25ec75fcb41e99410.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This change did not properly unwind from the error condition, so it was
not correct.

Cc: Kangjie Lu <kjlu@umn.edu>
Acked-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-11-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/max310x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index 8534d6e45a1d7..a3ba0e6520a14 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1518,10 +1518,10 @@ static int __init max310x_uart_init(void)
 		return ret;
 
 #ifdef CONFIG_SPI_MASTER
-	ret = spi_register_driver(&max310x_spi_driver);
+	spi_register_driver(&max310x_spi_driver);
 #endif
 
-	return ret;
+	return 0;
 }
 module_init(max310x_uart_init);
 
-- 
GitLab


From 3890e3dea315f1a257d1b940a2a4e2fa16a7b095 Mon Sep 17 00:00:00 2001
From: Atul Gopinathan <atulgopinathan@gmail.com>
Date: Mon, 3 May 2021 13:56:38 +0200
Subject: [PATCH 0595/3804] serial: max310x: unregister uart driver in case of
 failure and abort

The macro "spi_register_driver" invokes the function
"__spi_register_driver()" which has a return type of int and can fail,
returning a negative value in such a case. This is currently ignored and
the init() function yields success even if the spi driver failed to
register.

Fix this by collecting the return value of "__spi_register_driver()" and
also unregister the uart driver in case of failure.

Cc: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Atul Gopinathan <atulgopinathan@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-12-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/max310x.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index a3ba0e6520a14..3cbc757d7be76 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1518,10 +1518,12 @@ static int __init max310x_uart_init(void)
 		return ret;
 
 #ifdef CONFIG_SPI_MASTER
-	spi_register_driver(&max310x_spi_driver);
+	ret = spi_register_driver(&max310x_spi_driver);
+	if (ret)
+		uart_unregister_driver(&max310x_uart);
 #endif
 
-	return 0;
+	return ret;
 }
 module_init(max310x_uart_init);
 
-- 
GitLab


From 68c5634c4a7278672a3bed00eb5646884257c413 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:39 +0200
Subject: [PATCH 0596/3804] Revert "rtlwifi: fix a potential NULL pointer
 dereference"

This reverts commit 765976285a8c8db3f0eb7f033829a899d0c2786e.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This commit is not correct, it should not have used unlikely() and is
not propagating the error properly to the calling function, so it should
be reverted at this point in time.  Also, if the check failed, the
work queue was still assumed to be allocated, so further accesses would
have continued to fail, meaning this patch does nothing to solve the
root issues at all.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Bryan Brattlof <hello@bryanbrattlof.com>
Fixes: 765976285a8c ("rtlwifi: fix a potential NULL pointer dereference")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-13-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 2a7ee90a3f549..4136d7c63254c 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -452,11 +452,6 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw)
 	/* <2> work queue */
 	rtlpriv->works.hw = hw;
 	rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name);
-	if (unlikely(!rtlpriv->works.rtl_wq)) {
-		pr_err("Failed to allocate work queue\n");
-		return;
-	}
-
 	INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq,
 			  rtl_watchdog_wq_callback);
 	INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq,
-- 
GitLab


From 30b0e0ee9d02b97b68705c46b41444786effc40c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:40 +0200
Subject: [PATCH 0597/3804] net: rtlwifi: properly check for alloc_workqueue()
 failure

If alloc_workqueue() fails, properly catch this and propagate the error
to the calling functions, so that the devuce initialization will
properly error out.

Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Bryan Brattlof <hello@bryanbrattlof.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-14-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index 4136d7c63254c..ffd150ec181fa 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -440,9 +440,14 @@ static void rtl_watchdog_wq_callback(struct work_struct *work);
 static void rtl_fwevt_wq_callback(struct work_struct *work);
 static void rtl_c2hcmd_wq_callback(struct work_struct *work);
 
-static void _rtl_init_deferred_work(struct ieee80211_hw *hw)
+static int _rtl_init_deferred_work(struct ieee80211_hw *hw)
 {
 	struct rtl_priv *rtlpriv = rtl_priv(hw);
+	struct workqueue_struct *wq;
+
+	wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name);
+	if (!wq)
+		return -ENOMEM;
 
 	/* <1> timer */
 	timer_setup(&rtlpriv->works.watchdog_timer,
@@ -451,7 +456,8 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw)
 		    rtl_easy_concurrent_retrytimer_callback, 0);
 	/* <2> work queue */
 	rtlpriv->works.hw = hw;
-	rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name);
+	rtlpriv->works.rtl_wq = wq;
+
 	INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq,
 			  rtl_watchdog_wq_callback);
 	INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq,
@@ -461,6 +467,7 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw)
 			  rtl_swlps_rfon_wq_callback);
 	INIT_DELAYED_WORK(&rtlpriv->works.fwevt_wq, rtl_fwevt_wq_callback);
 	INIT_DELAYED_WORK(&rtlpriv->works.c2hcmd_wq, rtl_c2hcmd_wq_callback);
+	return 0;
 }
 
 void rtl_deinit_deferred_work(struct ieee80211_hw *hw, bool ips_wq)
@@ -559,9 +566,7 @@ int rtl_init_core(struct ieee80211_hw *hw)
 	rtlmac->link_state = MAC80211_NOLINK;
 
 	/* <6> init deferred work */
-	_rtl_init_deferred_work(hw);
-
-	return 0;
+	return _rtl_init_deferred_work(hw);
 }
 EXPORT_SYMBOL_GPL(rtl_init_core);
 
-- 
GitLab


From 5f94eaa4ee23e80841fa359a372f84cfe25daee1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:41 +0200
Subject: [PATCH 0598/3804] Revert "net: fujitsu: fix a potential NULL pointer
 dereference"

This reverts commit 9f4d6358e11bbc7b839f9419636188e4151fb6e4.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original change does not change any behavior as the caller of this
function onlyu checks for "== -1" as an error condition so this error is
not handled properly.  Remove this change and it will be fixed up
properly in a later commit.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Reviewed-by: Dominik Brodowski <linux@dominikbrodowski.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-15-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index a7b7a4aace791..dc90c61fc8275 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -547,11 +547,6 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
 	return -1;
 
     base = ioremap(link->resource[2]->start, resource_size(link->resource[2]));
-    if (!base) {
-	    pcmcia_release_window(link, link->resource[2]);
-	    return -ENOMEM;
-    }
-
     pcmcia_map_mem_page(link, link->resource[2], 0);
 
     /*
-- 
GitLab


From 52202be1cd996cde6e8969a128dc27ee45a7cb5e Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:56:42 +0200
Subject: [PATCH 0599/3804] net: fujitsu: fix potential null-ptr-deref

In fmvj18x_get_hwinfo(), if ioremap fails there will be NULL pointer
deref. To fix this, check the return value of ioremap and return -1
to the caller in case of failure.

Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-16-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index dc90c61fc8275..b0c0504950d81 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -547,6 +547,11 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id)
 	return -1;
 
     base = ioremap(link->resource[2]->start, resource_size(link->resource[2]));
+    if (!base) {
+	pcmcia_release_window(link, link->resource[2]);
+	return -1;
+    }
+
     pcmcia_map_mem_page(link, link->resource[2], 0);
 
     /*
-- 
GitLab


From 5369ead83f5aff223b6418c99cb1fe9a8f007363 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:43 +0200
Subject: [PATCH 0600/3804] Revert "net/smc: fix a NULL pointer dereference"

This reverts commit e183d4e414b64711baf7a04e214b61969ca08dfa.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit causes a memory leak and does not properly fix the
issue it claims to fix.  I will send a follow-on patch to resolve this
properly.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Ursula Braun <ubraun@linux.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-17-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/smc/smc_ism.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 9c6e95882553e..6558cf7643a7f 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -417,11 +417,6 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 	init_waitqueue_head(&smcd->lgrs_deleted);
 	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
 						 WQ_MEM_RECLAIM, name);
-	if (!smcd->event_wq) {
-		kfree(smcd->conn);
-		kfree(smcd);
-		return NULL;
-	}
 	return smcd;
 }
 EXPORT_SYMBOL_GPL(smcd_alloc_dev);
-- 
GitLab


From bbeb18f27a44ce6adb00d2316968bc59dc640b9b Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:56:44 +0200
Subject: [PATCH 0601/3804] net/smc: properly handle workqueue allocation
 failure

In smcd_alloc_dev(), if alloc_ordered_workqueue() fails, properly catch
it, clean up and return NULL to let the caller know there was a failure.
Move the call to alloc_ordered_workqueue higher in the function in order
to abort earlier without needing to unwind the call to device_initialize().

Cc: Ursula Braun <ubraun@linux.ibm.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-18-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/smc/smc_ism.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 6558cf7643a7f..94b31f2551bc9 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -402,6 +402,14 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 		return NULL;
 	}
 
+	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
+						 WQ_MEM_RECLAIM, name);
+	if (!smcd->event_wq) {
+		kfree(smcd->conn);
+		kfree(smcd);
+		return NULL;
+	}
+
 	smcd->dev.parent = parent;
 	smcd->dev.release = smcd_release;
 	device_initialize(&smcd->dev);
@@ -415,8 +423,6 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 	INIT_LIST_HEAD(&smcd->vlan);
 	INIT_LIST_HEAD(&smcd->lgr_list);
 	init_waitqueue_head(&smcd->lgrs_deleted);
-	smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
-						 WQ_MEM_RECLAIM, name);
 	return smcd;
 }
 EXPORT_SYMBOL_GPL(smcd_alloc_dev);
-- 
GitLab


From 4df07045fcfd684379a394d0f2aa0cc4067bda2a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:45 +0200
Subject: [PATCH 0602/3804] Revert "net: caif: replace BUG_ON with recovery
 code"

This reverts commit c5dea815834c7d2e9fc633785455bc428b7a1956.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original change here was pointless as dev can never be NULL in this
function so the claim in the changelog that this "fixes" anything is
incorrect (also the developer forgot about panic_on_warn).  A follow-up
change will resolve this issue properly.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-19-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/caif/caif_serial.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index da6fffb4d5a8e..a7f51eb58915d 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -269,9 +269,7 @@ static netdev_tx_t caif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ser_device *ser;
 
-	if (WARN_ON(!dev))
-		return -EINVAL;
-
+	BUG_ON(dev == NULL);
 	ser = netdev_priv(dev);
 
 	/* Send flow off once, on high water mark */
-- 
GitLab


From 65a67792e3416f7c5d7daa47d99334cbb19a7449 Mon Sep 17 00:00:00 2001
From: Du Cheng <ducheng2@gmail.com>
Date: Mon, 3 May 2021 13:56:46 +0200
Subject: [PATCH 0603/3804] net: caif: remove BUG_ON(dev == NULL) in caif_xmit

The condition of dev == NULL is impossible in caif_xmit(), hence it is
for the removal.

Explanation:
The static caif_xmit() is only called upon via a function pointer
`ndo_start_xmit` defined in include/linux/netdevice.h:
```
struct net_device_ops {
    ...
    netdev_tx_t     (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev);
    ...
}
```

The exhausive list of call points are:
```
drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
    dev->netdev_ops->ndo_start_xmit(skb, dev);
    ^                                    ^

drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
    struct opa_vnic_adapter *adapter = opa_vnic_priv(netdev);
			     ^                       ^
    return adapter->rn_ops->ndo_start_xmit(skb, netdev); // adapter would crash first
	   ^                                    ^

drivers/usb/gadget/function/f_ncm.c
    ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev);
	      ^                                   ^

include/linux/netdevice.h
static inline netdev_tx_t __netdev_start_xmit(...
{
    return ops->ndo_start_xmit(skb, dev);
				    ^
}

    const struct net_device_ops *ops = dev->netdev_ops;
				       ^
    rc = __netdev_start_xmit(ops, skb, dev, more);
				       ^
```

In each of the enumerated scenarios, it is impossible for the NULL-valued dev to
reach the caif_xmit() without crashing the kernel earlier, therefore `BUG_ON(dev ==
NULL)` is rather useless, hence the removal.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Du Cheng <ducheng2@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-20-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/caif/caif_serial.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index a7f51eb58915d..d17482395a4da 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -269,7 +269,6 @@ static netdev_tx_t caif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ser_device *ser;
 
-	BUG_ON(dev == NULL);
 	ser = netdev_priv(dev);
 
 	/* Send flow off once, on high water mark */
-- 
GitLab


From bee1b0511844c8c79fccf1f2b13472393b6b91f7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:47 +0200
Subject: [PATCH 0604/3804] Revert "net: stmicro: fix a missing check of
 clk_prepare"

This reverts commit f86a3b83833e7cfe558ca4d70b64ebc48903efec.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit causes a memory leak when it is trying to claim it
is properly handling errors.  Revert this change and fix it up properly
in a follow-on commit.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Fixes: f86a3b83833e ("net: stmicro: fix a missing check of clk_prepare")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-21-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index 527077c98ebce..fc68e90acbeac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -50,9 +50,7 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv)
 		gmac->clk_enabled = 1;
 	} else {
 		clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE);
-		ret = clk_prepare(gmac->tx_clk);
-		if (ret)
-			return ret;
+		clk_prepare(gmac->tx_clk);
 	}
 
 	return 0;
-- 
GitLab


From 4573472315f0fa461330545ff2aa2f6da0b1ae76 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Thu, 13 May 2021 15:07:41 +0300
Subject: [PATCH 0605/3804] iio: adc: ad7124: Fix missbalanced regulator enable
 / disable on error.

If the devm_regulator_get() call succeeded but not the regulator_enable()
then regulator_disable() would be called on a regulator that was not
enabled.

Fix this by moving regulator enabling / disabling over to
devm_ management via devm_add_action_or_reset.

Alexandru's sign-off here because he pulled Jonathan's patch into
a larger set which Jonathan then applied.

Fixes: b3af341bbd96 ("iio: adc: Add ad7124 support")
Reviewed-by: Alexandru Ardelean <ardeleanalex@gmail.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Cc: <Stable@vger.kernel.org>
---
 drivers/iio/adc/ad7124.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index 9d3952b4674f5..437116a07cf16 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -850,6 +850,11 @@ static int ad7124_setup(struct ad7124_state *st)
 	return ret;
 }
 
+static void ad7124_reg_disable(void *r)
+{
+	regulator_disable(r);
+}
+
 static int ad7124_probe(struct spi_device *spi)
 {
 	const struct ad7124_chip_info *info;
@@ -895,17 +900,20 @@ static int ad7124_probe(struct spi_device *spi)
 		ret = regulator_enable(st->vref[i]);
 		if (ret)
 			return ret;
+
+		ret = devm_add_action_or_reset(&spi->dev, ad7124_reg_disable,
+					       st->vref[i]);
+		if (ret)
+			return ret;
 	}
 
 	st->mclk = devm_clk_get(&spi->dev, "mclk");
-	if (IS_ERR(st->mclk)) {
-		ret = PTR_ERR(st->mclk);
-		goto error_regulator_disable;
-	}
+	if (IS_ERR(st->mclk))
+		return PTR_ERR(st->mclk);
 
 	ret = clk_prepare_enable(st->mclk);
 	if (ret < 0)
-		goto error_regulator_disable;
+		return ret;
 
 	ret = ad7124_soft_reset(st);
 	if (ret < 0)
@@ -935,11 +943,6 @@ error_remove_trigger:
 	ad_sd_cleanup_buffer_and_trigger(indio_dev);
 error_clk_disable_unprepare:
 	clk_disable_unprepare(st->mclk);
-error_regulator_disable:
-	for (i = ARRAY_SIZE(st->vref) - 1; i >= 0; i--) {
-		if (!IS_ERR_OR_NULL(st->vref[i]))
-			regulator_disable(st->vref[i]);
-	}
 
 	return ret;
 }
@@ -948,17 +951,11 @@ static int ad7124_remove(struct spi_device *spi)
 {
 	struct iio_dev *indio_dev = spi_get_drvdata(spi);
 	struct ad7124_state *st = iio_priv(indio_dev);
-	int i;
 
 	iio_device_unregister(indio_dev);
 	ad_sd_cleanup_buffer_and_trigger(indio_dev);
 	clk_disable_unprepare(st->mclk);
 
-	for (i = ARRAY_SIZE(st->vref) - 1; i >= 0; i--) {
-		if (!IS_ERR_OR_NULL(st->vref[i]))
-			regulator_disable(st->vref[i]);
-	}
-
 	return 0;
 }
 
-- 
GitLab


From f2a772c51206b0c3f262e4f6a3812c89a650191b Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Thu, 13 May 2021 15:07:42 +0300
Subject: [PATCH 0606/3804] iio: adc: ad7124: Fix potential overflow due to non
 sequential channel numbers

Channel numbering must start at 0 and then not have any holes, or
it is possible to overflow the available storage.  Note this bug was
introduced as part of a fix to ensure we didn't rely on the ordering
of child nodes.  So we need to support arbitrary ordering but they all
need to be there somewhere.

Note I hit this when using qemu to test the rest of this series.
Arguably this isn't the best fix, but it is probably the most minimal
option for backporting etc.

Alexandru's sign-off is here because he carried this patch in a larger
set that Jonathan then applied.

Fixes: d7857e4ee1ba6 ("iio: adc: ad7124: Fix DT channel configuration")
Reviewed-by: Alexandru Ardelean <ardeleanalex@gmail.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Cc: <Stable@vger.kernel.org>
---
 drivers/iio/adc/ad7124.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index 437116a07cf16..a27db78ea13ee 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -771,6 +771,13 @@ static int ad7124_of_parse_channel_config(struct iio_dev *indio_dev,
 		if (ret)
 			goto err;
 
+		if (channel >= indio_dev->num_channels) {
+			dev_err(indio_dev->dev.parent,
+				"Channel index >= number of channels\n");
+			ret = -EINVAL;
+			goto err;
+		}
+
 		ret = of_property_read_u32_array(child, "diff-channels",
 						 ain, 2);
 		if (ret)
-- 
GitLab


From 0c32a96d000f260b5ebfabb4145a86ae1cd71847 Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:56:48 +0200
Subject: [PATCH 0607/3804] net: stmicro: handle clk_prepare() failure during
 init

In case clk_prepare() fails, capture and propagate the error code up the
stack. If regulator_enable() was called earlier, properly unwind it by
calling regulator_disable().

Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-22-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index fc68e90acbeac..fc3b0acc8f99f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -30,7 +30,7 @@ struct sunxi_priv_data {
 static int sun7i_gmac_init(struct platform_device *pdev, void *priv)
 {
 	struct sunxi_priv_data *gmac = priv;
-	int ret;
+	int ret = 0;
 
 	if (gmac->regulator) {
 		ret = regulator_enable(gmac->regulator);
@@ -50,10 +50,12 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv)
 		gmac->clk_enabled = 1;
 	} else {
 		clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE);
-		clk_prepare(gmac->tx_clk);
+		ret = clk_prepare(gmac->tx_clk);
+		if (ret && gmac->regulator)
+			regulator_disable(gmac->regulator);
 	}
 
-	return 0;
+	return ret;
 }
 
 static void sun7i_gmac_exit(struct platform_device *pdev, void *priv)
-- 
GitLab


From 7930742d6a0ff091c85b92ef4e076432d8d8cb79 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:49 +0200
Subject: [PATCH 0608/3804] Revert "niu: fix missing checks of
 niu_pci_eeprom_read"

This reverts commit 26fd962bde0b15e54234fe762d86bc0349df1de4.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The change here was incorrect.  While it is nice to check if
niu_pci_eeprom_read() succeeded or not when using the data, any error
that might have happened was not propagated upwards properly, causing
the kernel to assume that these reads were successful, which results in
invalid data in the buffer that was to contain the successfully read
data.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Shannon Nelson <shannon.lee.nelson@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Fixes: 26fd962bde0b ("niu: fix missing checks of niu_pci_eeprom_read")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-23-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/sun/niu.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 707ccdd03b19e..d70cdea756d1a 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -8097,8 +8097,6 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 		start += 3;
 
 		prop_len = niu_pci_eeprom_read(np, start + 4);
-		if (prop_len < 0)
-			return prop_len;
 		err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64);
 		if (err < 0)
 			return err;
@@ -8143,12 +8141,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 			netif_printk(np, probe, KERN_DEBUG, np->dev,
 				     "VPD_SCAN: Reading in property [%s] len[%d]\n",
 				     namebuf, prop_len);
-			for (i = 0; i < prop_len; i++) {
-				err = niu_pci_eeprom_read(np, off + i);
-				if (err >= 0)
-					*prop_buf = err;
-				++prop_buf;
-			}
+			for (i = 0; i < prop_len; i++)
+				*prop_buf++ = niu_pci_eeprom_read(np, off + i);
 		}
 
 		start += len;
-- 
GitLab


From e6e337708c22f80824b82d4af645f20715730ad0 Mon Sep 17 00:00:00 2001
From: Du Cheng <ducheng2@gmail.com>
Date: Mon, 3 May 2021 13:56:50 +0200
Subject: [PATCH 0609/3804] ethernet: sun: niu: fix missing checks of
 niu_pci_eeprom_read()

niu_pci_eeprom_read() may fail, so add checks to its return value and
propagate the error up the callstack.

An examination of the callstack up to niu_pci_eeprom_read shows that:

niu_pci_eeprom_read() // returns int
    niu_pci_vpd_scan_props() // returns int
        niu_pci_vpd_fetch() // returns *void*
            niu_get_invariants() // returns int

since niu_pci_vpd_fetch() returns void which breaks the bubbling up,
change its return type to int so that error is propagated upwards.

Signed-off-by: Du Cheng <ducheng2@gmail.com>
Cc: Shannon Nelson <shannon.lee.nelson@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-24-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/sun/niu.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index d70cdea756d1a..74e748662ec01 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -8097,6 +8097,8 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 		start += 3;
 
 		prop_len = niu_pci_eeprom_read(np, start + 4);
+		if (prop_len < 0)
+			return prop_len;
 		err = niu_pci_vpd_get_propname(np, start + 5, namebuf, 64);
 		if (err < 0)
 			return err;
@@ -8141,8 +8143,12 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 			netif_printk(np, probe, KERN_DEBUG, np->dev,
 				     "VPD_SCAN: Reading in property [%s] len[%d]\n",
 				     namebuf, prop_len);
-			for (i = 0; i < prop_len; i++)
-				*prop_buf++ = niu_pci_eeprom_read(np, off + i);
+			for (i = 0; i < prop_len; i++) {
+				err =  niu_pci_eeprom_read(np, off + i);
+				if (err < 0)
+					return err;
+				*prop_buf++ = err;
+			}
 		}
 
 		start += len;
@@ -8152,14 +8158,14 @@ static int niu_pci_vpd_scan_props(struct niu *np, u32 start, u32 end)
 }
 
 /* ESPC_PIO_EN_ENABLE must be set */
-static void niu_pci_vpd_fetch(struct niu *np, u32 start)
+static int niu_pci_vpd_fetch(struct niu *np, u32 start)
 {
 	u32 offset;
 	int err;
 
 	err = niu_pci_eeprom_read16_swp(np, start + 1);
 	if (err < 0)
-		return;
+		return err;
 
 	offset = err + 3;
 
@@ -8168,12 +8174,14 @@ static void niu_pci_vpd_fetch(struct niu *np, u32 start)
 		u32 end;
 
 		err = niu_pci_eeprom_read(np, here);
+		if (err < 0)
+			return err;
 		if (err != 0x90)
-			return;
+			return -EINVAL;
 
 		err = niu_pci_eeprom_read16_swp(np, here + 1);
 		if (err < 0)
-			return;
+			return err;
 
 		here = start + offset + 3;
 		end = start + offset + err;
@@ -8181,9 +8189,12 @@ static void niu_pci_vpd_fetch(struct niu *np, u32 start)
 		offset += err;
 
 		err = niu_pci_vpd_scan_props(np, here, end);
-		if (err < 0 || err == 1)
-			return;
+		if (err < 0)
+			return err;
+		if (err == 1)
+			return -EINVAL;
 	}
+	return 0;
 }
 
 /* ESPC_PIO_EN_ENABLE must be set */
@@ -9274,8 +9285,11 @@ static int niu_get_invariants(struct niu *np)
 		offset = niu_pci_vpd_offset(np);
 		netif_printk(np, probe, KERN_DEBUG, np->dev,
 			     "%s() VPD offset [%08x]\n", __func__, offset);
-		if (offset)
-			niu_pci_vpd_fetch(np, offset);
+		if (offset) {
+			err = niu_pci_vpd_fetch(np, offset);
+			if (err < 0)
+				return err;
+		}
 		nw64(ESPC_PIO_EN, 0);
 
 		if (np->flags & NIU_FLAGS_VPD_VALID) {
-- 
GitLab


From b95b57dfe7a142bf2446548eb7f49340fd73e78b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:51 +0200
Subject: [PATCH 0610/3804] Revert "qlcnic: Avoid potential NULL pointer
 dereference"

This reverts commit 5bf7295fe34a5251b1d241b9736af4697b590670.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This commit does not properly detect if an error happens because the
logic after this loop will not detect that there was a failed
allocation.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Fixes: 5bf7295fe34a ("qlcnic: Avoid potential NULL pointer dereference")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-25-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index d8a3ecaed3fc6..985cf8cb2ec04 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -1047,8 +1047,6 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
 
 	for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) {
 		skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE);
-		if (!skb)
-			break;
 		qlcnic_create_loopback_buff(skb->data, adapter->mac_addr);
 		skb_put(skb, QLCNIC_ILB_PKT_SIZE);
 		adapter->ahw->diag_cnt = 0;
-- 
GitLab


From 84460f01cba382553199bc1361f69a872d5abed4 Mon Sep 17 00:00:00 2001
From: Tom Seewald <tseewald@gmail.com>
Date: Mon, 3 May 2021 13:56:52 +0200
Subject: [PATCH 0611/3804] qlcnic: Add null check after calling
 netdev_alloc_skb

The function qlcnic_dl_lb_test() currently calls netdev_alloc_skb()
without checking afterwards that the allocation succeeded. Fix this by
checking if the skb is NULL and returning an error in such a case.
Breaking out of the loop if the skb is NULL is not correct as no error
would be reported to the caller and no message would be printed for the
user.

Cc: David S. Miller <davem@davemloft.net>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Tom Seewald <tseewald@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-26-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 985cf8cb2ec04..d8f0863b39342 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -1047,6 +1047,8 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
 
 	for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) {
 		skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE);
+		if (!skb)
+			goto error;
 		qlcnic_create_loopback_buff(skb->data, adapter->mac_addr);
 		skb_put(skb, QLCNIC_ILB_PKT_SIZE);
 		adapter->ahw->diag_cnt = 0;
@@ -1070,6 +1072,7 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
 			cnt++;
 	}
 	if (cnt != i) {
+error:
 		dev_err(&adapter->pdev->dev,
 			"LB Test: failed, TX[%d], RX[%d]\n", i, cnt);
 		if (mode != QLCNIC_ILB_MODE)
-- 
GitLab


From 257343d3ed557f11d580d0b7c515dc154f64a42b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:53 +0200
Subject: [PATCH 0612/3804] Revert "gdrom: fix a memory leak bug"

This reverts commit 093c48213ee37c3c3ff1cf5ac1aa2a9d8bc66017.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

Because of this, all submissions from this group must be reverted from
the kernel tree and will need to be re-reviewed again to determine if
they actually are a valid fix.  Until that work is complete, remove this
change to ensure that no problems are being introduced into the
codebase.

Cc: Wenwen Wang <wang6495@umn.edu>
Cc: Peter Rosin <peda@axentia.se>
Cc: Jens Axboe <axboe@kernel.dk>
Fixes: 093c48213ee3 ("gdrom: fix a memory leak bug")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-27-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cdrom/gdrom.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 742b4a0932e3d..7f681320c7d3f 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -862,7 +862,6 @@ static void __exit exit_gdrom(void)
 	platform_device_unregister(pd);
 	platform_driver_unregister(&gdrom_driver);
 	kfree(gd.toc);
-	kfree(gd.cd_info);
 }
 
 module_init(init_gdrom);
-- 
GitLab


From d03d1021da6fe7f46efe9f2a7335564e7c9db5ab Mon Sep 17 00:00:00 2001
From: Atul Gopinathan <atulgopinathan@gmail.com>
Date: Mon, 3 May 2021 13:56:54 +0200
Subject: [PATCH 0613/3804] cdrom: gdrom: deallocate struct gdrom_unit fields
 in remove_gdrom

The fields, "toc" and "cd_info", of "struct gdrom_unit gd" are allocated
in "probe_gdrom()". Prevent a memory leak by making sure "gd.cd_info" is
deallocated in the "remove_gdrom()" function.

Also prevent double free of the field "gd.toc" by moving it from the
module's exit function to "remove_gdrom()". This is because, in
"probe_gdrom()", the function makes sure to deallocate "gd.toc" in case
of any errors, so the exit function invoked later would again free
"gd.toc".

The patch also maintains consistency by deallocating the above mentioned
fields in "remove_gdrom()" along with another memory allocated field
"gd.disk".

Suggested-by: Jens Axboe <axboe@kernel.dk>
Cc: Peter Rosin <peda@axentia.se>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Atul Gopinathan <atulgopinathan@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-28-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cdrom/gdrom.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 7f681320c7d3f..6c4f6139f8530 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -830,6 +830,8 @@ static int remove_gdrom(struct platform_device *devptr)
 	if (gdrom_major)
 		unregister_blkdev(gdrom_major, GDROM_DEV_NAME);
 	unregister_cdrom(gd.cd_info);
+	kfree(gd.cd_info);
+	kfree(gd.toc);
 
 	return 0;
 }
@@ -861,7 +863,6 @@ static void __exit exit_gdrom(void)
 {
 	platform_device_unregister(pd);
 	platform_driver_unregister(&gdrom_driver);
-	kfree(gd.toc);
 }
 
 module_init(init_gdrom);
-- 
GitLab


From 566f53238da74801b48e985788e5f7c9159e5940 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:55 +0200
Subject: [PATCH 0614/3804] Revert "char: hpet: fix a missing check of ioremap"

This reverts commit 13bd14a41ce3105d5b1f3cd8b4d1e249d17b6d9b.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

While this is technically correct, it is only fixing ONE of these errors
in this function, so the patch is not fully correct.  I'll leave this
revert and provide a fix for this later that resolves this same
"problem" everywhere in this function.

Cc: Kangjie Lu <kjlu@umn.edu>
Link: https://lore.kernel.org/r/20210503115736.2104747-29-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/hpet.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index ed3b7dab678db..6f13def6c1727 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -969,8 +969,6 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
 	if (ACPI_SUCCESS(status)) {
 		hdp->hd_phys_address = addr.address.minimum;
 		hdp->hd_address = ioremap(addr.address.minimum, addr.address.address_length);
-		if (!hdp->hd_address)
-			return AE_ERROR;
 
 		if (hpet_is_known(hdp)) {
 			iounmap(hdp->hd_address);
-- 
GitLab


From b11701c933112d49b808dee01cb7ff854ba6a77a Mon Sep 17 00:00:00 2001
From: Tom Seewald <tseewald@gmail.com>
Date: Mon, 3 May 2021 13:56:56 +0200
Subject: [PATCH 0615/3804] char: hpet: add checks after calling ioremap

The function hpet_resources() calls ioremap() two times, but in both
cases it does not check if ioremap() returned a null pointer. Fix this
by adding null pointer checks and returning an appropriate error.

Signed-off-by: Tom Seewald <tseewald@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-30-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/hpet.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c
index 6f13def6c1727..8b55085650ad0 100644
--- a/drivers/char/hpet.c
+++ b/drivers/char/hpet.c
@@ -969,6 +969,8 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
 	if (ACPI_SUCCESS(status)) {
 		hdp->hd_phys_address = addr.address.minimum;
 		hdp->hd_address = ioremap(addr.address.minimum, addr.address.address_length);
+		if (!hdp->hd_address)
+			return AE_ERROR;
 
 		if (hpet_is_known(hdp)) {
 			iounmap(hdp->hd_address);
@@ -982,6 +984,8 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
 		hdp->hd_phys_address = fixmem32->address;
 		hdp->hd_address = ioremap(fixmem32->address,
 						HPET_RANGE_SIZE);
+		if (!hdp->hd_address)
+			return AE_ERROR;
 
 		if (hpet_is_known(hdp)) {
 			iounmap(hdp->hd_address);
-- 
GitLab


From 4d427b408c4c2ff1676966c72119a3a559f8e39b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:57 +0200
Subject: [PATCH 0616/3804] Revert "scsi: ufs: fix a missing check of
 devm_reset_control_get"

This reverts commit 63a06181d7ce169d09843645c50fea1901bc9f0a.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit is incorrect, it does not properly clean up on the
error path, so I'll keep the revert and fix it up properly with a
follow-on patch.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Fixes: 63a06181d7ce ("scsi: ufs: fix a missing check of devm_reset_control_get")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-31-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufs-hisi.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c
index 0aa58131e7915..7d1e07a9d9dde 100644
--- a/drivers/scsi/ufs/ufs-hisi.c
+++ b/drivers/scsi/ufs/ufs-hisi.c
@@ -468,10 +468,6 @@ static int ufs_hisi_init_common(struct ufs_hba *hba)
 	ufshcd_set_variant(hba, host);
 
 	host->rst  = devm_reset_control_get(dev, "rst");
-	if (IS_ERR(host->rst)) {
-		dev_err(dev, "%s: failed to get reset control\n", __func__);
-		return PTR_ERR(host->rst);
-	}
 
 	ufs_hisi_set_pm_lvl(hba);
 
-- 
GitLab


From 2f4a784f40f8d337d6590e2e93f46429052e15ac Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:56:58 +0200
Subject: [PATCH 0617/3804] scsi: ufs: handle cleanup correctly on
 devm_reset_control_get error

Move ufshcd_set_variant call in ufs_hisi_init_common to common error
section at end of the function, and then jump to this from the error
checking statements for both devm_reset_control_get and
ufs_hisi_get_resource. This fixes the original commit (63a06181d7ce)
which was reverted due to the University of Minnesota problems.

Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-32-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/ufs/ufs-hisi.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c
index 7d1e07a9d9dde..d0626773eb386 100644
--- a/drivers/scsi/ufs/ufs-hisi.c
+++ b/drivers/scsi/ufs/ufs-hisi.c
@@ -467,17 +467,24 @@ static int ufs_hisi_init_common(struct ufs_hba *hba)
 	host->hba = hba;
 	ufshcd_set_variant(hba, host);
 
-	host->rst  = devm_reset_control_get(dev, "rst");
+	host->rst = devm_reset_control_get(dev, "rst");
+	if (IS_ERR(host->rst)) {
+		dev_err(dev, "%s: failed to get reset control\n", __func__);
+		err = PTR_ERR(host->rst);
+		goto error;
+	}
 
 	ufs_hisi_set_pm_lvl(hba);
 
 	err = ufs_hisi_get_resource(host);
-	if (err) {
-		ufshcd_set_variant(hba, NULL);
-		return err;
-	}
+	if (err)
+		goto error;
 
 	return 0;
+
+error:
+	ufshcd_set_variant(hba, NULL);
+	return err;
 }
 
 static int ufs_hi3660_init(struct ufs_hba *hba)
-- 
GitLab


From 1dacca7fa1ebea47d38d20cd2df37094805d2649 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:56:59 +0200
Subject: [PATCH 0618/3804] Revert "ALSA: gus: add a check of the status of
 snd_ctl_add"

This reverts commit 0f25e000cb4398081748e54f62a902098aa79ec1.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit did nothing if there was an error, except to print
out a message, which is pointless.  So remove the commit as it gives a
"false sense of doing something".

Cc: Kangjie Lu <kjlu@umn.edu>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20210503115736.2104747-33-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/isa/gus/gus_main.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/sound/isa/gus/gus_main.c b/sound/isa/gus/gus_main.c
index afc088f0377ce..b7518122a10d6 100644
--- a/sound/isa/gus/gus_main.c
+++ b/sound/isa/gus/gus_main.c
@@ -77,17 +77,8 @@ static const struct snd_kcontrol_new snd_gus_joystick_control = {
 
 static void snd_gus_init_control(struct snd_gus_card *gus)
 {
-	int ret;
-
-	if (!gus->ace_flag) {
-		ret =
-			snd_ctl_add(gus->card,
-					snd_ctl_new1(&snd_gus_joystick_control,
-						gus));
-		if (ret)
-			snd_printk(KERN_ERR "gus: snd_ctl_add failed: %d\n",
-					ret);
-	}
+	if (!gus->ace_flag)
+		snd_ctl_add(gus->card, snd_ctl_new1(&snd_gus_joystick_control, gus));
 }
 
 /*
-- 
GitLab


From 94f88309f201821073f57ae6005caefa61bf7b7e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:01 +0200
Subject: [PATCH 0619/3804] Revert "ALSA: sb8: add a check for request_region"

This reverts commit dcd0feac9bab901d5739de51b3f69840851f8919.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit message for this change was incorrect as the code
path can never result in a NULL dereference, alluding to the fact that
whatever tool was used to "find this" is broken.  It's just an optional
resource reservation, so removing this check is fine.

Cc: Kangjie Lu <kjlu@umn.edu>
Acked-by: Takashi Iwai <tiwai@suse.de>
Fixes: dcd0feac9bab ("ALSA: sb8: add a check for request_region")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-35-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/isa/sb/sb8.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c
index 6c9d534ce8b61..95290ffe5c6e7 100644
--- a/sound/isa/sb/sb8.c
+++ b/sound/isa/sb/sb8.c
@@ -95,10 +95,6 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev)
 
 	/* block the 0x388 port to avoid PnP conflicts */
 	acard->fm_res = request_region(0x388, 4, "SoundBlaster FM");
-	if (!acard->fm_res) {
-		err = -EBUSY;
-		goto _err;
-	}
 
 	if (port[dev] != SNDRV_AUTO_PORT) {
 		if ((err = snd_sbdsp_create(card, port[dev], irq[dev],
-- 
GitLab


From a28591f61b60fac820c6de59826ffa710e5e314e Mon Sep 17 00:00:00 2001
From: Atul Gopinathan <atulgopinathan@gmail.com>
Date: Mon, 3 May 2021 13:57:02 +0200
Subject: [PATCH 0620/3804] ALSA: sb8: Add a comment note regarding an unused
 pointer

The field "fm_res" of "struct snd_sb8" is never used/dereferenced
throughout the sb8.c code. Therefore there is no need for any null value
check after the "request_region()".

Add a comment note to make developers know about this and prevent any
"NULL check" patches on this part of code.

Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Atul Gopinathan <atulgopinathan@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-36-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/isa/sb/sb8.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c
index 95290ffe5c6e7..ed3a87ebe3f41 100644
--- a/sound/isa/sb/sb8.c
+++ b/sound/isa/sb/sb8.c
@@ -93,7 +93,11 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev)
 	acard = card->private_data;
 	card->private_free = snd_sb8_free;
 
-	/* block the 0x388 port to avoid PnP conflicts */
+	/*
+	 * Block the 0x388 port to avoid PnP conflicts.
+	 * No need to check this value after request_region,
+	 * as we never do anything with it.
+	 */
 	acard->fm_res = request_region(0x388, 4, "SoundBlaster FM");
 
 	if (port[dev] != SNDRV_AUTO_PORT) {
-- 
GitLab


From 4667a6fc1777ce071504bab570d3599107f4790f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:03 +0200
Subject: [PATCH 0621/3804] Revert "ALSA: usx2y: Fix potential NULL pointer
 dereference"

This reverts commit a2c6433ee5a35a8de6d563f6512a26f87835ea0f.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original patch was incorrect, and would leak memory if the error
path the patch added was hit.

Cc: Aditya Pakki <pakki001@umn.edu>
Reviewed-by: Takashi Iwai <tiwai@suse.de>
Link: https://lore.kernel.org/r/20210503115736.2104747-37-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/usx2y/usb_stream.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c
index 091c071b270af..6bba17bf689ac 100644
--- a/sound/usb/usx2y/usb_stream.c
+++ b/sound/usb/usx2y/usb_stream.c
@@ -91,12 +91,7 @@ static int init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize,
 
 	for (u = 0; u < USB_STREAM_NURBS; ++u) {
 		sk->inurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
-		if (!sk->inurb[u])
-			return -ENOMEM;
-
 		sk->outurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
-		if (!sk->outurb[u])
-			return -ENOMEM;
 	}
 
 	if (init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe) ||
-- 
GitLab


From 58c0cc2d90f1e37c4eb63ae7f164c83830833f78 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:05 +0200
Subject: [PATCH 0622/3804] Revert "video: hgafb: fix potential NULL pointer
 dereference"

This reverts commit ec7f6aad57ad29e4e66cc2e18e1e1599ddb02542.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This patch "looks" correct, but the driver keeps on running and will
fail horribly right afterward if this error condition ever trips.

So points for trying to resolve an issue, but a huge NEGATIVE value for
providing a "fake" fix for the problem as nothing actually got resolved
at all.  I'll go fix this up properly...

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Ferenc Bakonyi <fero@drama.obuda.kando.hu>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Fixes: ec7f6aad57ad ("video: hgafb: fix potential NULL pointer dereference")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-39-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/hgafb.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c
index 8bbac7182ad32..fca29f219f8b1 100644
--- a/drivers/video/fbdev/hgafb.c
+++ b/drivers/video/fbdev/hgafb.c
@@ -285,8 +285,6 @@ static int hga_card_detect(void)
 	hga_vram_len  = 0x08000;
 
 	hga_vram = ioremap(0xb0000, hga_vram_len);
-	if (!hga_vram)
-		goto error;
 
 	if (request_region(0x3b0, 12, "hgafb"))
 		release_io_ports = 1;
-- 
GitLab


From dc13cac4862cc68ec74348a80b6942532b7735fa Mon Sep 17 00:00:00 2001
From: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Date: Mon, 3 May 2021 13:57:06 +0200
Subject: [PATCH 0623/3804] video: hgafb: fix potential NULL pointer
 dereference

The return of ioremap if not checked, and can lead to a NULL to be
assigned to hga_vram. Potentially leading to a NULL pointer
dereference.

The fix adds code to deal with this case in the error label and
changes how the hgafb_probe handles the return of hga_card_detect.

Cc: Ferenc Bakonyi <fero@drama.obuda.kando.hu>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-40-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/hgafb.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c
index fca29f219f8b1..cc8e62ae93f6a 100644
--- a/drivers/video/fbdev/hgafb.c
+++ b/drivers/video/fbdev/hgafb.c
@@ -285,6 +285,8 @@ static int hga_card_detect(void)
 	hga_vram_len  = 0x08000;
 
 	hga_vram = ioremap(0xb0000, hga_vram_len);
+	if (!hga_vram)
+		return -ENOMEM;
 
 	if (request_region(0x3b0, 12, "hgafb"))
 		release_io_ports = 1;
@@ -344,13 +346,18 @@ static int hga_card_detect(void)
 			hga_type_name = "Hercules";
 			break;
 	}
-	return 1;
+	return 0;
 error:
 	if (release_io_ports)
 		release_region(0x3b0, 12);
 	if (release_io_port)
 		release_region(0x3bf, 1);
-	return 0;
+
+	iounmap(hga_vram);
+
+	pr_err("hgafb: HGA card not detected.\n");
+
+	return -EINVAL;
 }
 
 /**
@@ -548,13 +555,11 @@ static const struct fb_ops hgafb_ops = {
 static int hgafb_probe(struct platform_device *pdev)
 {
 	struct fb_info *info;
+	int ret;
 
-	if (! hga_card_detect()) {
-		printk(KERN_INFO "hgafb: HGA card not detected.\n");
-		if (hga_vram)
-			iounmap(hga_vram);
-		return -EINVAL;
-	}
+	ret = hga_card_detect();
+	if (!ret)
+		return ret;
 
 	printk(KERN_INFO "hgafb: %s with %ldK of memory detected.\n",
 		hga_type_name, hga_vram_len/1024);
-- 
GitLab


From abd7bca23bd4247124265152d00ffd4b2b0d6877 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:07 +0200
Subject: [PATCH 0624/3804] Revert "isdn: mISDNinfineon: fix potential NULL
 pointer dereference"

This reverts commit d721fe99f6ada070ae8fc0ec3e01ce5a42def0d9.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit was incorrect, it should have never have used
"unlikely()" and if it ever does trigger, resources are left grabbed.

Given there are no users for this code around, I'll just revert this and
leave it "as is" as the odds that ioremap() will ever fail here is
horrendiously low.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-41-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/hardware/mISDN/mISDNinfineon.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
index a16c7a2a7f3d0..fa9c491f9c38b 100644
--- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c
+++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
@@ -697,11 +697,8 @@ setup_io(struct inf_hw *hw)
 				(ulong)hw->addr.start, (ulong)hw->addr.size);
 			return err;
 		}
-		if (hw->ci->addr_mode == AM_MEMIO) {
+		if (hw->ci->addr_mode == AM_MEMIO)
 			hw->addr.p = ioremap(hw->addr.start, hw->addr.size);
-			if (unlikely(!hw->addr.p))
-				return -ENOMEM;
-		}
 		hw->addr.mode = hw->ci->addr_mode;
 		if (debug & DEBUG_HW)
 			pr_notice("%s: IO addr %lx (%lu bytes) mode%d\n",
-- 
GitLab


From e32fe6d90f44922ccbb94016cfc3c238359e3e39 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Thu, 13 May 2021 15:07:43 +0300
Subject: [PATCH 0625/3804] iio: adc: ad7192: Avoid disabling a clock that was
 never enabled.

Found by inspection.

If the internal clock source is being used, the driver doesn't
call clk_prepare_enable() and as such we should not call
clk_disable_unprepare()

Use the same condition to protect the disable path as is used
on the enable one.  Note this will all get simplified when
the driver moves over to a full devm_ flow, but that would make
backporting the fix harder.

Fix obviously predates move out of staging, but backporting will
become more complex (and is unlikely to happen), hence that patch
is given in the fixes tag.

Alexandru's sign off is here because he added this patch into
a larger series that Jonathan then applied.

Fixes: b581f748cce0 ("staging: iio: adc: ad7192: move out of staging")
Cc: Alexandru Tachici <alexandru.tachici@analog.com>
Reviewed-by: Alexandru Ardelean <ardeleanalex@gmail.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Cc: <Stable@vger.kernel.org>
---
 drivers/iio/adc/ad7192.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
index 2ed580521d815..d3be67aa05225 100644
--- a/drivers/iio/adc/ad7192.c
+++ b/drivers/iio/adc/ad7192.c
@@ -1014,7 +1014,9 @@ static int ad7192_probe(struct spi_device *spi)
 	return 0;
 
 error_disable_clk:
-	clk_disable_unprepare(st->mclk);
+	if (st->clock_sel == AD7192_CLK_EXT_MCLK1_2 ||
+	    st->clock_sel == AD7192_CLK_EXT_MCLK2)
+		clk_disable_unprepare(st->mclk);
 error_remove_trigger:
 	ad_sd_cleanup_buffer_and_trigger(indio_dev);
 error_disable_dvdd:
@@ -1031,7 +1033,9 @@ static int ad7192_remove(struct spi_device *spi)
 	struct ad7192_state *st = iio_priv(indio_dev);
 
 	iio_device_unregister(indio_dev);
-	clk_disable_unprepare(st->mclk);
+	if (st->clock_sel == AD7192_CLK_EXT_MCLK1_2 ||
+	    st->clock_sel == AD7192_CLK_EXT_MCLK2)
+		clk_disable_unprepare(st->mclk);
 	ad_sd_cleanup_buffer_and_trigger(indio_dev);
 
 	regulator_disable(st->dvdd);
-- 
GitLab


From b0f27fca5a6c7652e265aae6a4452ce2f2ed64da Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Thu, 13 May 2021 15:07:44 +0300
Subject: [PATCH 0626/3804] iio: adc: ad7192: handle regulator voltage error
 first

This change fixes a corner-case, where for a zero regulator value, the
driver would exit early, initializing the driver only partially.
The driver would be in an unknown state.

This change reworks the code to check regulator_voltage() return value
for negative (error) first, and return early. This is the more common
idiom.

Also, this change is removing the 'voltage_uv' variable and using the 'ret'
value directly. The only place where 'voltage_uv' is being used is to
compute the internal reference voltage, and the type of this variable is
'int' (same are for 'ret'). Using only 'ret' avoids having to assign it on
the error path.

Fixes: ab0afa65bbc7 ("staging: iio: adc: ad7192: fail probe on get_voltage")
Cc: Alexandru Tachici <alexandru.tachici@analog.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: <Stable@vger.kernel.org>
---
 drivers/iio/adc/ad7192.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
index d3be67aa05225..1141cc13a1249 100644
--- a/drivers/iio/adc/ad7192.c
+++ b/drivers/iio/adc/ad7192.c
@@ -912,7 +912,7 @@ static int ad7192_probe(struct spi_device *spi)
 {
 	struct ad7192_state *st;
 	struct iio_dev *indio_dev;
-	int ret, voltage_uv = 0;
+	int ret;
 
 	if (!spi->irq) {
 		dev_err(&spi->dev, "no IRQ?\n");
@@ -949,15 +949,12 @@ static int ad7192_probe(struct spi_device *spi)
 		goto error_disable_avdd;
 	}
 
-	voltage_uv = regulator_get_voltage(st->avdd);
-
-	if (voltage_uv > 0) {
-		st->int_vref_mv = voltage_uv / 1000;
-	} else {
-		ret = voltage_uv;
+	ret = regulator_get_voltage(st->avdd);
+	if (ret < 0) {
 		dev_err(&spi->dev, "Device tree error, reference voltage undefined\n");
 		goto error_disable_avdd;
 	}
+	st->int_vref_mv = ret / 1000;
 
 	spi_set_drvdata(spi, indio_dev);
 	st->chip_info = of_device_get_match_data(&spi->dev);
-- 
GitLab


From 04f5b9f539ce314f758d919a14dc7a669f3b7838 Mon Sep 17 00:00:00 2001
From: Lucas Stankus <lucas.p.stankus@gmail.com>
Date: Tue, 11 May 2021 17:54:18 -0300
Subject: [PATCH 0627/3804] staging: iio: cdc: ad7746: avoid overwrite of
 num_channels

AD7745 devices don't have the CIN2 pins and therefore can't handle related
channels. Forcing the number of AD7746 channels may lead to enabling more
channels than what the hardware actually supports.
Avoid num_channels being overwritten after first assignment.

Signed-off-by: Lucas Stankus <lucas.p.stankus@gmail.com>
Fixes: 83e416f458d53 ("staging: iio: adc: Replace, rewrite ad7745 from scratch.")
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: <Stable@vger.kernel.org>
---
 drivers/staging/iio/cdc/ad7746.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/iio/cdc/ad7746.c b/drivers/staging/iio/cdc/ad7746.c
index dfd71e99e872e..eab534dc4bcc0 100644
--- a/drivers/staging/iio/cdc/ad7746.c
+++ b/drivers/staging/iio/cdc/ad7746.c
@@ -700,7 +700,6 @@ static int ad7746_probe(struct i2c_client *client,
 		indio_dev->num_channels = ARRAY_SIZE(ad7746_channels);
 	else
 		indio_dev->num_channels =  ARRAY_SIZE(ad7746_channels) - 2;
-	indio_dev->num_channels = ARRAY_SIZE(ad7746_channels);
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
 	if (pdata) {
-- 
GitLab


From 1e886090cefe26113122a7d59a36a9aec492fef5 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 20 Apr 2021 14:06:38 +0200
Subject: [PATCH 0628/3804] docs: admin-guide: update description for
 kernel.hotplug sysctl

It's been a few releases since this defaulted to /sbin/hotplug. Update
the text, and include pointers to the two CONFIG_UEVENT_HELPER{,_PATH}
config knobs whose help text could provide more info, but also hint
that the user probably doesn't need to care at all.

Fixes: 7934779a69f1 ("Driver-Core: disable /sbin/hotplug by default")
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Link: https://lore.kernel.org/r/20210420120638.1104016-1-linux@rasmusvillemoes.dk
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 1d56a6b73a4e9..c24f57f2c7827 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -333,7 +333,12 @@ hotplug
 =======
 
 Path for the hotplug policy agent.
-Default value is "``/sbin/hotplug``".
+Default value is ``CONFIG_UEVENT_HELPER_PATH``, which in turn defaults
+to the empty string.
+
+This file only exists when ``CONFIG_UEVENT_HELPER`` is enabled. Most
+modern systems rely exclusively on the netlink-based uevent source and
+don't need this.
 
 
 hung_task_all_cpu_backtrace
-- 
GitLab


From 2c5ff2caa4f8164e93a9bf035af9a2ad87cad9f2 Mon Sep 17 00:00:00 2001
From: Wei Ming Chen <jj251510319013@gmail.com>
Date: Thu, 6 May 2021 20:20:20 +0800
Subject: [PATCH 0629/3804] docs: usb: function: Modify path name

Original path does not exists, so changed to
"Documentation/ABI/testing/configfs-usb-gadget"

Signed-off-by: Wei Ming Chen <jj251510319013@gmail.com>
Link: https://lore.kernel.org/r/20210506122020.7117-1-jj251510319013@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/usb/gadget_configfs.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/usb/gadget_configfs.rst b/Documentation/usb/gadget_configfs.rst
index 158e48dab586d..e4566ffb223f2 100644
--- a/Documentation/usb/gadget_configfs.rst
+++ b/Documentation/usb/gadget_configfs.rst
@@ -140,7 +140,7 @@ is an arbitrary string allowed in a filesystem, e.g.::
 Each function provides its specific set of attributes, with either read-only
 or read-write access. Where applicable they need to be written to as
 appropriate.
-Please refer to Documentation/ABI/*/configfs-usb-gadget* for more information.
+Please refer to Documentation/ABI/testing/configfs-usb-gadget for more information.
 
 4. Associating the functions with their configurations
 ------------------------------------------------------
-- 
GitLab


From c446f0d4702d316e1c6bf621f70e79678d28830a Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:08 +0200
Subject: [PATCH 0630/3804] isdn: mISDNinfineon: check/cleanup ioremap failure
 correctly in setup_io

Move hw->cfg.mode and hw->addr.mode assignments from hw->ci->cfg_mode
and hw->ci->addr_mode respectively, to be before the subsequent checks
for memory IO mode (and possible ioremap calls in this case).

Also introduce ioremap error checks at both locations. This allows
resources to be properly freed on ioremap failure, as when the caller
of setup_io then subsequently calls release_io via its error path,
release_io can now correctly determine the mode as it has been set
before the ioremap call.

Finally, refactor release_io function so that it will call
release_mem_region in the memory IO case, regardless of whether or not
hw->cfg.p/hw->addr.p are NULL. This means resources are then properly
released on failure.

This properly implements the original reverted commit (d721fe99f6ad)
from the University of Minnesota, whilst also implementing the ioremap
check for the hw->ci->cfg_mode if block as well.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-42-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/hardware/mISDN/mISDNinfineon.c | 24 ++++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/mISDNinfineon.c b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
index fa9c491f9c38b..88d592bafdb02 100644
--- a/drivers/isdn/hardware/mISDN/mISDNinfineon.c
+++ b/drivers/isdn/hardware/mISDN/mISDNinfineon.c
@@ -630,17 +630,19 @@ static void
 release_io(struct inf_hw *hw)
 {
 	if (hw->cfg.mode) {
-		if (hw->cfg.p) {
+		if (hw->cfg.mode == AM_MEMIO) {
 			release_mem_region(hw->cfg.start, hw->cfg.size);
-			iounmap(hw->cfg.p);
+			if (hw->cfg.p)
+				iounmap(hw->cfg.p);
 		} else
 			release_region(hw->cfg.start, hw->cfg.size);
 		hw->cfg.mode = AM_NONE;
 	}
 	if (hw->addr.mode) {
-		if (hw->addr.p) {
+		if (hw->addr.mode == AM_MEMIO) {
 			release_mem_region(hw->addr.start, hw->addr.size);
-			iounmap(hw->addr.p);
+			if (hw->addr.p)
+				iounmap(hw->addr.p);
 		} else
 			release_region(hw->addr.start, hw->addr.size);
 		hw->addr.mode = AM_NONE;
@@ -670,9 +672,12 @@ setup_io(struct inf_hw *hw)
 				(ulong)hw->cfg.start, (ulong)hw->cfg.size);
 			return err;
 		}
-		if (hw->ci->cfg_mode == AM_MEMIO)
-			hw->cfg.p = ioremap(hw->cfg.start, hw->cfg.size);
 		hw->cfg.mode = hw->ci->cfg_mode;
+		if (hw->ci->cfg_mode == AM_MEMIO) {
+			hw->cfg.p = ioremap(hw->cfg.start, hw->cfg.size);
+			if (!hw->cfg.p)
+				return -ENOMEM;
+		}
 		if (debug & DEBUG_HW)
 			pr_notice("%s: IO cfg %lx (%lu bytes) mode%d\n",
 				  hw->name, (ulong)hw->cfg.start,
@@ -697,9 +702,12 @@ setup_io(struct inf_hw *hw)
 				(ulong)hw->addr.start, (ulong)hw->addr.size);
 			return err;
 		}
-		if (hw->ci->addr_mode == AM_MEMIO)
-			hw->addr.p = ioremap(hw->addr.start, hw->addr.size);
 		hw->addr.mode = hw->ci->addr_mode;
+		if (hw->ci->addr_mode == AM_MEMIO) {
+			hw->addr.p = ioremap(hw->addr.start, hw->addr.size);
+			if (!hw->addr.p)
+				return -ENOMEM;
+		}
 		if (debug & DEBUG_HW)
 			pr_notice("%s: IO addr %lx (%lu bytes) mode%d\n",
 				  hw->name, (ulong)hw->addr.start,
-- 
GitLab


From efba106f89fc6848726716c101f4c84e88720a9c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:09 +0200
Subject: [PATCH 0631/3804] Revert "ath6kl: return error code in
 ath6kl_wmi_set_roam_lrssi_cmd()"

This reverts commit fc6a6521556c8250e356ddc6a3f2391aa62dc976.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The change being reverted does NOTHING as the caller to this function
does not even look at the return value of the call.  So the "claim" that
this fixed an an issue is not true.  It will be fixed up properly in a
future patch by propagating the error up the stack correctly.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-43-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath6kl/wmi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index b137e7f343979..aca9732ec1eef 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -776,8 +776,10 @@ int ath6kl_wmi_set_roam_lrssi_cmd(struct wmi *wmi, u8 lrssi)
 	cmd->info.params.roam_rssi_floor = DEF_LRSSI_ROAM_FLOOR;
 	cmd->roam_ctrl = WMI_SET_LRSSI_SCAN_PARAMS;
 
-	return ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID,
+	ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID,
 			    NO_SYNC_WMIFLAG);
+
+	return 0;
 }
 
 int ath6kl_wmi_force_roam_cmd(struct wmi *wmi, const u8 *bssid)
-- 
GitLab


From 54433367840b46a1555c8ed36c4c0cfc5dbf1358 Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:57:10 +0200
Subject: [PATCH 0632/3804] ath6kl: return error code in
 ath6kl_wmi_set_roam_lrssi_cmd()

Propagate error code from failure of ath6kl_wmi_cmd_send() to the
caller.

Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-44-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath6kl/debug.c | 5 ++++-
 drivers/net/wireless/ath/ath6kl/wmi.c   | 4 +---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c
index 7506cea46f589..433a047f3747b 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.c
+++ b/drivers/net/wireless/ath/ath6kl/debug.c
@@ -1027,14 +1027,17 @@ static ssize_t ath6kl_lrssi_roam_write(struct file *file,
 {
 	struct ath6kl *ar = file->private_data;
 	unsigned long lrssi_roam_threshold;
+	int ret;
 
 	if (kstrtoul_from_user(user_buf, count, 0, &lrssi_roam_threshold))
 		return -EINVAL;
 
 	ar->lrssi_roam_threshold = lrssi_roam_threshold;
 
-	ath6kl_wmi_set_roam_lrssi_cmd(ar->wmi, ar->lrssi_roam_threshold);
+	ret = ath6kl_wmi_set_roam_lrssi_cmd(ar->wmi, ar->lrssi_roam_threshold);
 
+	if (ret)
+		return ret;
 	return count;
 }
 
diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index aca9732ec1eef..b137e7f343979 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -776,10 +776,8 @@ int ath6kl_wmi_set_roam_lrssi_cmd(struct wmi *wmi, u8 lrssi)
 	cmd->info.params.roam_rssi_floor = DEF_LRSSI_ROAM_FLOOR;
 	cmd->roam_ctrl = WMI_SET_LRSSI_SCAN_PARAMS;
 
-	ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID,
+	return ath6kl_wmi_cmd_send(wmi, 0, skb, WMI_SET_ROAM_CTRL_CMDID,
 			    NO_SYNC_WMIFLAG);
-
-	return 0;
 }
 
 int ath6kl_wmi_force_roam_cmd(struct wmi *wmi, const u8 *bssid)
-- 
GitLab


From 5e68b86c7b7c059c0f0ec4bf8adabe63f84a61eb Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:11 +0200
Subject: [PATCH 0633/3804] Revert "rapidio: fix a NULL pointer dereference
 when create_workqueue() fails"

This reverts commit 23015b22e47c5409620b1726a677d69e5cd032ba.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit has a memory leak on the error path here, it does
not clean up everything properly.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: 23015b22e47c ("rapidio: fix a NULL pointer dereference when create_workqueue() fails")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-45-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rapidio/rio_cm.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index 50ec53d67a4c0..e6c16f04f2b45 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -2138,14 +2138,6 @@ static int riocm_add_mport(struct device *dev,
 	mutex_init(&cm->rx_lock);
 	riocm_rx_fill(cm, RIOCM_RX_RING_SIZE);
 	cm->rx_wq = create_workqueue(DRV_NAME "/rxq");
-	if (!cm->rx_wq) {
-		riocm_error("failed to allocate IBMBOX_%d on %s",
-			    cmbox, mport->name);
-		rio_release_outb_mbox(mport, cmbox);
-		kfree(cm);
-		return -ENOMEM;
-	}
-
 	INIT_WORK(&cm->rx_work, rio_ibmsg_handler);
 
 	cm->tx_slot = 0;
-- 
GitLab


From 69ce3ae36dcb03cdf416b0862a45369ddbf50fdf Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 3 May 2021 13:57:12 +0200
Subject: [PATCH 0634/3804] rapidio: handle create_workqueue() failure

In case create_workqueue() fails, release all resources and return -ENOMEM
to caller to avoid potential NULL pointer deref later. Move up the
create_workequeue() call to return early and avoid unwinding the call to
riocm_rx_fill().

Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-46-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rapidio/rio_cm.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index e6c16f04f2b45..db4c265287ae6 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -2127,6 +2127,14 @@ static int riocm_add_mport(struct device *dev,
 		return -ENODEV;
 	}
 
+	cm->rx_wq = create_workqueue(DRV_NAME "/rxq");
+	if (!cm->rx_wq) {
+		rio_release_inb_mbox(mport, cmbox);
+		rio_release_outb_mbox(mport, cmbox);
+		kfree(cm);
+		return -ENOMEM;
+	}
+
 	/*
 	 * Allocate and register inbound messaging buffers to be ready
 	 * to receive channel and system management requests
@@ -2137,7 +2145,6 @@ static int riocm_add_mport(struct device *dev,
 	cm->rx_slots = RIOCM_RX_RING_SIZE;
 	mutex_init(&cm->rx_lock);
 	riocm_rx_fill(cm, RIOCM_RX_RING_SIZE);
-	cm->rx_wq = create_workqueue(DRV_NAME "/rxq");
 	INIT_WORK(&cm->rx_work, rio_ibmsg_handler);
 
 	cm->tx_slot = 0;
-- 
GitLab


From 36a2c87f7ed9e305d05b9a5c044cc6c494771504 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:13 +0200
Subject: [PATCH 0635/3804] Revert "isdn: mISDN: Fix potential NULL pointer
 dereference of kzalloc"

This reverts commit 38d22659803a033b1b66cd2624c33570c0dde77d.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

While it looks like the original change is correct, it is not, as none
of the setup actually happens, and the error value is not propagated
upwards.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-47-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/hardware/mISDN/hfcsusb.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index 70061991915a5..4bb470d3963df 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -249,9 +249,6 @@ hfcsusb_ph_info(struct hfcsusb *hw)
 	int i;
 
 	phi = kzalloc(struct_size(phi, bch, dch->dev.nrbchan), GFP_ATOMIC);
-	if (!phi)
-		return;
-
 	phi->dch.ch.protocol = hw->protocol;
 	phi->dch.ch.Flags = dch->Flags;
 	phi->dch.state = dch->state;
-- 
GitLab


From 5265db2ccc735e2783b790d6c19fb5cee8c025ed Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:14 +0200
Subject: [PATCH 0636/3804] isdn: mISDN: correctly handle ph_info allocation
 failure in hfcsusb_ph_info

Modify return type of hfcusb_ph_info to int, so that we can pass error
value up the call stack when allocation of ph_info fails. Also change
three of four call sites to actually account for the memory failure.
The fourth, in ph_state_nt, is infeasible to change as it is in turn
called by ph_state which is used as a function pointer argument to
mISDN_initdchannel, which would necessitate changing its signature
and updating all the places where it is used (too many).

Fixes original flawed commit (38d22659803a) from the University of
Minnesota.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-48-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/isdn/hardware/mISDN/hfcsusb.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index 4bb470d3963df..cd5642cef01fd 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -46,7 +46,7 @@ static void hfcsusb_start_endpoint(struct hfcsusb *hw, int channel);
 static void hfcsusb_stop_endpoint(struct hfcsusb *hw, int channel);
 static int  hfcsusb_setup_bch(struct bchannel *bch, int protocol);
 static void deactivate_bchannel(struct bchannel *bch);
-static void hfcsusb_ph_info(struct hfcsusb *hw);
+static int  hfcsusb_ph_info(struct hfcsusb *hw);
 
 /* start next background transfer for control channel */
 static void
@@ -241,7 +241,7 @@ hfcusb_l2l1B(struct mISDNchannel *ch, struct sk_buff *skb)
  * send full D/B channel status information
  * as MPH_INFORMATION_IND
  */
-static void
+static int
 hfcsusb_ph_info(struct hfcsusb *hw)
 {
 	struct ph_info *phi;
@@ -249,6 +249,9 @@ hfcsusb_ph_info(struct hfcsusb *hw)
 	int i;
 
 	phi = kzalloc(struct_size(phi, bch, dch->dev.nrbchan), GFP_ATOMIC);
+	if (!phi)
+		return -ENOMEM;
+
 	phi->dch.ch.protocol = hw->protocol;
 	phi->dch.ch.Flags = dch->Flags;
 	phi->dch.state = dch->state;
@@ -260,6 +263,8 @@ hfcsusb_ph_info(struct hfcsusb *hw)
 	_queue_data(&dch->dev.D, MPH_INFORMATION_IND, MISDN_ID_ANY,
 		    struct_size(phi, bch, dch->dev.nrbchan), phi, GFP_ATOMIC);
 	kfree(phi);
+
+	return 0;
 }
 
 /*
@@ -344,8 +349,7 @@ hfcusb_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
 			ret = l1_event(dch->l1, hh->prim);
 		break;
 	case MPH_INFORMATION_REQ:
-		hfcsusb_ph_info(hw);
-		ret = 0;
+		ret = hfcsusb_ph_info(hw);
 		break;
 	}
 
@@ -400,8 +404,7 @@ hfc_l1callback(struct dchannel *dch, u_int cmd)
 			       hw->name, __func__, cmd);
 		return -1;
 	}
-	hfcsusb_ph_info(hw);
-	return 0;
+	return hfcsusb_ph_info(hw);
 }
 
 static int
@@ -743,8 +746,7 @@ hfcsusb_setup_bch(struct bchannel *bch, int protocol)
 			handle_led(hw, (bch->nr == 1) ? LED_B1_OFF :
 				   LED_B2_OFF);
 	}
-	hfcsusb_ph_info(hw);
-	return 0;
+	return hfcsusb_ph_info(hw);
 }
 
 static void
-- 
GitLab


From e1436df2f2550bc89d832ffd456373fdf5d5b5d7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:15 +0200
Subject: [PATCH 0637/3804] Revert "ecryptfs: replace BUG_ON with error
 handling code"

This reverts commit 2c2a7552dd6465e8fde6bc9cccf8d66ed1c1eb72.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit log for this change was incorrect, no "error
handling code" was added, things will blow up just as badly as before if
any of these cases ever were true.  As this BUG_ON() never fired, and
most of these checks are "obviously" never going to be true, let's just
revert to the original code for now until this gets unwound to be done
correctly in the future.

Cc: Aditya Pakki <pakki001@umn.edu>
Fixes: 2c2a7552dd64 ("ecryptfs: replace BUG_ON with error handling code")
Cc: stable <stable@vger.kernel.org>
Acked-by: Tyler Hicks <code@tyhicks.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-49-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ecryptfs/crypto.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 345f8061e3b4a..b1aa993784f77 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -296,10 +296,8 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 	struct extent_crypt_result ecr;
 	int rc = 0;
 
-	if (!crypt_stat || !crypt_stat->tfm
-	       || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
-		return -EINVAL;
-
+	BUG_ON(!crypt_stat || !crypt_stat->tfm
+	       || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
 	if (unlikely(ecryptfs_verbosity > 0)) {
 		ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
 				crypt_stat->key_size);
-- 
GitLab


From c6052f09c14bf0ecdd582662e022eb716f9b8022 Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:16 +0200
Subject: [PATCH 0638/3804] fs: ecryptfs: remove BUG_ON from crypt_scatterlist

crypt_stat memory itself is allocated when inode is created, in
ecryptfs_alloc_inode, which returns NULL on failure and is handled
by callers, which would prevent us getting to this point. It then
calls ecryptfs_init_crypt_stat which allocates crypt_stat->tfm
checking for and likewise handling allocation failure. Finally,
crypt_stat->flags has ECRYPTFS_STRUCT_INITIALIZED merged into it
in ecryptfs_init_crypt_stat as well.

Simply put, the conditions that the BUG_ON checks for will never
be triggered, as to even get to this function, the relevant conditions
will have already been fulfilled (or the inode allocation would fail in
the first place and thus no call to this function or those above it).

Cc: Tyler Hicks <code@tyhicks.com>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-50-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ecryptfs/crypto.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index b1aa993784f77..e3f5d7f3c8a0a 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -296,8 +296,6 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
 	struct extent_crypt_result ecr;
 	int rc = 0;
 
-	BUG_ON(!crypt_stat || !crypt_stat->tfm
-	       || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
 	if (unlikely(ecryptfs_verbosity > 0)) {
 		ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n",
 				crypt_stat->key_size);
-- 
GitLab


From 43ed0fcf613a87dd0221ec72d1ade4d6544f2ffc Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:17 +0200
Subject: [PATCH 0639/3804] Revert "dmaengine: qcom_hidma: Check for driver
 register failure"

This reverts commit a474b3f0428d6b02a538aa10b3c3b722751cb382.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original change is NOT correct, as it does not correctly unwind from
the resources that was allocated before the call to
platform_driver_register().

Cc: Aditya Pakki <pakki001@umn.edu>
Acked-By: Vinod Koul <vkoul@kernel.org>
Acked-By: Sinan Kaya <okaya@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-51-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/qcom/hidma_mgmt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
index 806ca02c52d71..fe87b01f7a4eb 100644
--- a/drivers/dma/qcom/hidma_mgmt.c
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -418,8 +418,9 @@ static int __init hidma_mgmt_init(void)
 		hidma_mgmt_of_populate_channels(child);
 	}
 #endif
-	return platform_driver_register(&hidma_mgmt_driver);
+	platform_driver_register(&hidma_mgmt_driver);
 
+	return 0;
 }
 module_init(hidma_mgmt_init);
 MODULE_LICENSE("GPL v2");
-- 
GitLab


From 4df2a8b0ad634d98a67e540a4e18a60f943e7d9f Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:18 +0200
Subject: [PATCH 0640/3804] dmaengine: qcom_hidma: comment
 platform_driver_register call

Place a comment in hidma_mgmt_init explaining why success must
currently be assumed, due to the cleanup issue that would need to
be considered were this module ever to be unloadable or were this
platform_driver_register call ever to fail.

Acked-By: Vinod Koul <vkoul@kernel.org>
Acked-By: Sinan Kaya <okaya@kernel.org>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-52-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/qcom/hidma_mgmt.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
index fe87b01f7a4eb..62026607f3f8b 100644
--- a/drivers/dma/qcom/hidma_mgmt.c
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -418,6 +418,20 @@ static int __init hidma_mgmt_init(void)
 		hidma_mgmt_of_populate_channels(child);
 	}
 #endif
+	/*
+	 * We do not check for return value here, as it is assumed that
+	 * platform_driver_register must not fail. The reason for this is that
+	 * the (potential) hidma_mgmt_of_populate_channels calls above are not
+	 * cleaned up if it does fail, and to do this work is quite
+	 * complicated. In particular, various calls of of_address_to_resource,
+	 * of_irq_to_resource, platform_device_register_full, of_dma_configure,
+	 * and of_msi_configure which then call other functions and so on, must
+	 * be cleaned up - this is not a trivial exercise.
+	 *
+	 * Currently, this module is not intended to be unloaded, and there is
+	 * no module_exit function defined which does the needed cleanup. For
+	 * this reason, we have to assume success here.
+	 */
 	platform_driver_register(&hidma_mgmt_driver);
 
 	return 0;
-- 
GitLab


From 46651077765c80a0d6f87f3469129a72e49ce91b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:19 +0200
Subject: [PATCH 0641/3804] Revert "libertas: add checks for the return value
 of sysfs_create_group"

This reverts commit 434256833d8eb988cb7f3b8a41699e2fe48d9332.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit was incorrect, the error needs to be propagated back
to the caller AND if the second group call fails, the first needs to be
removed.  There are much better ways to solve this, the driver should
NOT be calling sysfs_create_group() on its own as it is racing userspace
and loosing.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-53-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/marvell/libertas/mesh.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c
index f5b78257d5518..c611e6668b218 100644
--- a/drivers/net/wireless/marvell/libertas/mesh.c
+++ b/drivers/net/wireless/marvell/libertas/mesh.c
@@ -805,12 +805,7 @@ static void lbs_persist_config_init(struct net_device *dev)
 {
 	int ret;
 	ret = sysfs_create_group(&(dev->dev.kobj), &boot_opts_group);
-	if (ret)
-		pr_err("failed to create boot_opts_group.\n");
-
 	ret = sysfs_create_group(&(dev->dev.kobj), &mesh_ie_group);
-	if (ret)
-		pr_err("failed to create mesh_ie_group.\n");
 }
 
 static void lbs_persist_config_remove(struct net_device *dev)
-- 
GitLab


From 7e79b38fe9a403b065ac5915465f620a8fb3de84 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:20 +0200
Subject: [PATCH 0642/3804] libertas: register sysfs groups properly

The libertas driver was trying to register sysfs groups "by hand" which
causes them to be created _after_ the device is initialized and
announced to userspace, which causes races and can prevent userspace
tools from seeing the sysfs files correctly.

Fix this up by using the built-in sysfs_groups pointers in struct
net_device which were created for this very reason, fixing the race
condition, and properly allowing for any error that might have occured
to be handled properly.

Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-54-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/marvell/libertas/mesh.c | 28 +++-----------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c
index c611e6668b218..c68814841583f 100644
--- a/drivers/net/wireless/marvell/libertas/mesh.c
+++ b/drivers/net/wireless/marvell/libertas/mesh.c
@@ -801,19 +801,6 @@ static const struct attribute_group mesh_ie_group = {
 	.attrs = mesh_ie_attrs,
 };
 
-static void lbs_persist_config_init(struct net_device *dev)
-{
-	int ret;
-	ret = sysfs_create_group(&(dev->dev.kobj), &boot_opts_group);
-	ret = sysfs_create_group(&(dev->dev.kobj), &mesh_ie_group);
-}
-
-static void lbs_persist_config_remove(struct net_device *dev)
-{
-	sysfs_remove_group(&(dev->dev.kobj), &boot_opts_group);
-	sysfs_remove_group(&(dev->dev.kobj), &mesh_ie_group);
-}
-
 
 /***************************************************************************
  * Initializing and starting, stopping mesh
@@ -1009,6 +996,10 @@ static int lbs_add_mesh(struct lbs_private *priv)
 	SET_NETDEV_DEV(priv->mesh_dev, priv->dev->dev.parent);
 
 	mesh_dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
+	mesh_dev->sysfs_groups[0] = &lbs_mesh_attr_group;
+	mesh_dev->sysfs_groups[1] = &boot_opts_group;
+	mesh_dev->sysfs_groups[2] = &mesh_ie_group;
+
 	/* Register virtual mesh interface */
 	ret = register_netdev(mesh_dev);
 	if (ret) {
@@ -1016,19 +1007,10 @@ static int lbs_add_mesh(struct lbs_private *priv)
 		goto err_free_netdev;
 	}
 
-	ret = sysfs_create_group(&(mesh_dev->dev.kobj), &lbs_mesh_attr_group);
-	if (ret)
-		goto err_unregister;
-
-	lbs_persist_config_init(mesh_dev);
-
 	/* Everything successful */
 	ret = 0;
 	goto done;
 
-err_unregister:
-	unregister_netdev(mesh_dev);
-
 err_free_netdev:
 	free_netdev(mesh_dev);
 
@@ -1049,8 +1031,6 @@ void lbs_remove_mesh(struct lbs_private *priv)
 
 	netif_stop_queue(mesh_dev);
 	netif_carrier_off(mesh_dev);
-	sysfs_remove_group(&(mesh_dev->dev.kobj), &lbs_mesh_attr_group);
-	lbs_persist_config_remove(mesh_dev);
 	unregister_netdev(mesh_dev);
 	priv->mesh_dev = NULL;
 	kfree(mesh_dev->ieee80211_ptr);
-- 
GitLab


From 1e0ce84215dbfd6065872e5d3755352da34f198b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:21 +0200
Subject: [PATCH 0643/3804] Revert "ASoC: rt5645: fix a NULL pointer
 dereference"

This reverts commit 51dd97d1df5fb9ac58b9b358e63e67b530f6ae21.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

Lots of things seem to be still allocated here and must be properly
cleaned up if an error happens here.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-55-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/rt5645.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 9408ee63cb268..7cb90975009ae 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3431,9 +3431,6 @@ static int rt5645_probe(struct snd_soc_component *component)
 		RT5645_HWEQ_NUM, sizeof(struct rt5645_eq_param_s),
 		GFP_KERNEL);
 
-	if (!rt5645->eq_param)
-		return -ENOMEM;
-
 	return 0;
 }
 
-- 
GitLab


From 5e70b8e22b64eed13d5bbebcb5911dae65bf8c6b Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 3 May 2021 13:57:22 +0200
Subject: [PATCH 0644/3804] ASoC: rt5645: add error checking to rt5645_probe
 function

Check for return value from various snd_soc_dapm_* calls, as many of
them can return errors and this should be handled. Also, reintroduce
the allocation failure check for rt5645->eq_param as well. Make all
areas where return values are checked lead to the end of the function
in the case of an error. Finally, introduce a comment explaining how
resources here are actually eventually cleaned up by the caller.

Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210503115736.2104747-56-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/rt5645.c | 48 +++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 7cb90975009ae..438fa18bcb55d 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3388,30 +3388,44 @@ static int rt5645_probe(struct snd_soc_component *component)
 {
 	struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
 	struct rt5645_priv *rt5645 = snd_soc_component_get_drvdata(component);
+	int ret = 0;
 
 	rt5645->component = component;
 
 	switch (rt5645->codec_type) {
 	case CODEC_TYPE_RT5645:
-		snd_soc_dapm_new_controls(dapm,
+		ret = snd_soc_dapm_new_controls(dapm,
 			rt5645_specific_dapm_widgets,
 			ARRAY_SIZE(rt5645_specific_dapm_widgets));
-		snd_soc_dapm_add_routes(dapm,
+		if (ret < 0)
+			goto exit;
+
+		ret = snd_soc_dapm_add_routes(dapm,
 			rt5645_specific_dapm_routes,
 			ARRAY_SIZE(rt5645_specific_dapm_routes));
+		if (ret < 0)
+			goto exit;
+
 		if (rt5645->v_id < 3) {
-			snd_soc_dapm_add_routes(dapm,
+			ret = snd_soc_dapm_add_routes(dapm,
 				rt5645_old_dapm_routes,
 				ARRAY_SIZE(rt5645_old_dapm_routes));
+			if (ret < 0)
+				goto exit;
 		}
 		break;
 	case CODEC_TYPE_RT5650:
-		snd_soc_dapm_new_controls(dapm,
+		ret = snd_soc_dapm_new_controls(dapm,
 			rt5650_specific_dapm_widgets,
 			ARRAY_SIZE(rt5650_specific_dapm_widgets));
-		snd_soc_dapm_add_routes(dapm,
+		if (ret < 0)
+			goto exit;
+
+		ret = snd_soc_dapm_add_routes(dapm,
 			rt5650_specific_dapm_routes,
 			ARRAY_SIZE(rt5650_specific_dapm_routes));
+		if (ret < 0)
+			goto exit;
 		break;
 	}
 
@@ -3419,9 +3433,17 @@ static int rt5645_probe(struct snd_soc_component *component)
 
 	/* for JD function */
 	if (rt5645->pdata.jd_mode) {
-		snd_soc_dapm_force_enable_pin(dapm, "JD Power");
-		snd_soc_dapm_force_enable_pin(dapm, "LDO2");
-		snd_soc_dapm_sync(dapm);
+		ret = snd_soc_dapm_force_enable_pin(dapm, "JD Power");
+		if (ret < 0)
+			goto exit;
+
+		ret = snd_soc_dapm_force_enable_pin(dapm, "LDO2");
+		if (ret < 0)
+			goto exit;
+
+		ret = snd_soc_dapm_sync(dapm);
+		if (ret < 0)
+			goto exit;
 	}
 
 	if (rt5645->pdata.long_name)
@@ -3431,7 +3453,15 @@ static int rt5645_probe(struct snd_soc_component *component)
 		RT5645_HWEQ_NUM, sizeof(struct rt5645_eq_param_s),
 		GFP_KERNEL);
 
-	return 0;
+	if (!rt5645->eq_param)
+		ret = -ENOMEM;
+exit:
+	/*
+	 * If there was an error above, everything will be cleaned up by the
+	 * caller if we return an error here.  This will be done with a later
+	 * call to rt5645_remove().
+	 */
+	return ret;
 }
 
 static void rt5645_remove(struct snd_soc_component *component)
-- 
GitLab


From fdda0dd2686ecd1f2e616c9e0366ea71b40c485d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:23 +0200
Subject: [PATCH 0645/3804] Revert "ASoC: cs43130: fix a NULL pointer
 dereference"

This reverts commit a2be42f18d409213bb7e7a736e3ef6ba005115bb.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original patch here is not correct, sysfs files that were created
are not unwound.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-57-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/cs43130.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c
index 80bc7c10ed757..c2b6f0ae6d57d 100644
--- a/sound/soc/codecs/cs43130.c
+++ b/sound/soc/codecs/cs43130.c
@@ -2319,8 +2319,6 @@ static int cs43130_probe(struct snd_soc_component *component)
 			return ret;
 
 		cs43130->wq = create_singlethread_workqueue("cs43130_hp");
-		if (!cs43130->wq)
-			return -ENOMEM;
 		INIT_WORK(&cs43130->work, cs43130_imp_meas);
 	}
 
-- 
GitLab


From 2da441a6491d93eff8ffff523837fd621dc80389 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:24 +0200
Subject: [PATCH 0646/3804] ASoC: cs43130: handle errors in cs43130_probe()
 properly

cs43130_probe() does not do any valid error checking of things it
initializes, OR what it does, it does not unwind properly if there are
errors.

Fix this up by moving the sysfs files to an attribute group so the
driver core will correctly add/remove them all at once and handle errors
with them, and correctly check for creating a new workqueue and
unwinding if that fails.

Cc: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-58-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/cs43130.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c
index c2b6f0ae6d57d..80cd3ea0c1577 100644
--- a/sound/soc/codecs/cs43130.c
+++ b/sound/soc/codecs/cs43130.c
@@ -1735,6 +1735,14 @@ static DEVICE_ATTR(hpload_dc_r, 0444, cs43130_show_dc_r, NULL);
 static DEVICE_ATTR(hpload_ac_l, 0444, cs43130_show_ac_l, NULL);
 static DEVICE_ATTR(hpload_ac_r, 0444, cs43130_show_ac_r, NULL);
 
+static struct attribute *hpload_attrs[] = {
+	&dev_attr_hpload_dc_l.attr,
+	&dev_attr_hpload_dc_r.attr,
+	&dev_attr_hpload_ac_l.attr,
+	&dev_attr_hpload_ac_r.attr,
+};
+ATTRIBUTE_GROUPS(hpload);
+
 static struct reg_sequence hp_en_cal_seq[] = {
 	{CS43130_INT_MASK_4, CS43130_INT_MASK_ALL},
 	{CS43130_HP_MEAS_LOAD_1, 0},
@@ -2302,23 +2310,15 @@ static int cs43130_probe(struct snd_soc_component *component)
 
 	cs43130->hpload_done = false;
 	if (cs43130->dc_meas) {
-		ret = device_create_file(component->dev, &dev_attr_hpload_dc_l);
-		if (ret < 0)
-			return ret;
-
-		ret = device_create_file(component->dev, &dev_attr_hpload_dc_r);
-		if (ret < 0)
-			return ret;
-
-		ret = device_create_file(component->dev, &dev_attr_hpload_ac_l);
-		if (ret < 0)
-			return ret;
-
-		ret = device_create_file(component->dev, &dev_attr_hpload_ac_r);
-		if (ret < 0)
+		ret = sysfs_create_groups(&component->dev->kobj, hpload_groups);
+		if (ret)
 			return ret;
 
 		cs43130->wq = create_singlethread_workqueue("cs43130_hp");
+		if (!cs43130->wq) {
+			sysfs_remove_groups(&component->dev->kobj, hpload_groups);
+			return -ENOMEM;
+		}
 		INIT_WORK(&cs43130->work, cs43130_imp_meas);
 	}
 
-- 
GitLab


From 47e4ff06fa7f5ba4860543a2913bbd0c164640aa Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:25 +0200
Subject: [PATCH 0647/3804] Revert "media: dvb: Add check on sp8870_readreg"

This reverts commit 467a37fba93f2b4fe3ab597ff6a517b22b566882.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

This commit is not properly checking for an error at all, so if a
read succeeds from this device, it will error out.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Sean Young <sean@mess.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-59-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/dvb-frontends/sp8870.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c
index 655db8272268d..ee893a2f22614 100644
--- a/drivers/media/dvb-frontends/sp8870.c
+++ b/drivers/media/dvb-frontends/sp8870.c
@@ -280,9 +280,7 @@ static int sp8870_set_frontend_parameters(struct dvb_frontend *fe)
 	sp8870_writereg(state, 0xc05, reg0xc05);
 
 	// read status reg in order to clear pending irqs
-	err = sp8870_readreg(state, 0x200);
-	if (err)
-		return err;
+	sp8870_readreg(state, 0x200);
 
 	// system controller start
 	sp8870_microcontroller_start(state);
-- 
GitLab


From c6d822c56e7fd29e6fa1b1bb91b98f6a1e942b3c Mon Sep 17 00:00:00 2001
From: Alaa Emad <alaaemadhossney.ae@gmail.com>
Date: Mon, 3 May 2021 13:57:26 +0200
Subject: [PATCH 0648/3804] media: dvb: Add check on sp8870_readreg return

The function sp8870_readreg returns a negative value when i2c_transfer
fails so properly check for this and return the error if it happens.

Cc: Sean Young <sean@mess.org>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Alaa Emad <alaaemadhossney.ae@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-60-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/dvb-frontends/sp8870.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c
index ee893a2f22614..9767159aeb9b2 100644
--- a/drivers/media/dvb-frontends/sp8870.c
+++ b/drivers/media/dvb-frontends/sp8870.c
@@ -280,7 +280,9 @@ static int sp8870_set_frontend_parameters(struct dvb_frontend *fe)
 	sp8870_writereg(state, 0xc05, reg0xc05);
 
 	// read status reg in order to clear pending irqs
-	sp8870_readreg(state, 0x200);
+	err = sp8870_readreg(state, 0x200);
+	if (err < 0)
+		return err;
 
 	// system controller start
 	sp8870_microcontroller_start(state);
-- 
GitLab


From d8c3be2fb2079d0cb4cd29d6aba58dbe54771e42 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:27 +0200
Subject: [PATCH 0649/3804] Revert "media: gspca: mt9m111: Check write_bridge
 for timeout"

This reverts commit 656025850074f5c1ba2e05be37bda57ba2b8d491.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

Different error values should never be "OR" together and expect anything
sane to come out of the result.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-61-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/m5602/m5602_mt9m111.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
index bfa3b381d8a26..50481dc928d0f 100644
--- a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
+++ b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
@@ -195,7 +195,7 @@ static const struct v4l2_ctrl_config mt9m111_greenbal_cfg = {
 int mt9m111_probe(struct sd *sd)
 {
 	u8 data[2] = {0x00, 0x00};
-	int i, rc = 0;
+	int i;
 	struct gspca_dev *gspca_dev = (struct gspca_dev *)sd;
 
 	if (force_sensor) {
@@ -213,18 +213,16 @@ int mt9m111_probe(struct sd *sd)
 	/* Do the preinit */
 	for (i = 0; i < ARRAY_SIZE(preinit_mt9m111); i++) {
 		if (preinit_mt9m111[i][0] == BRIDGE) {
-			rc |= m5602_write_bridge(sd,
+			m5602_write_bridge(sd,
 				preinit_mt9m111[i][1],
 				preinit_mt9m111[i][2]);
 		} else {
 			data[0] = preinit_mt9m111[i][2];
 			data[1] = preinit_mt9m111[i][3];
-			rc |= m5602_write_sensor(sd,
+			m5602_write_sensor(sd,
 				preinit_mt9m111[i][1], data, 2);
 		}
 	}
-	if (rc < 0)
-		return rc;
 
 	if (m5602_read_sensor(sd, MT9M111_SC_CHIPVER, data, 2))
 		return -ENODEV;
-- 
GitLab


From af44068c581c028fd9897ca75a10fa310d8fc449 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 13 May 2021 16:18:19 +0100
Subject: [PATCH 0650/3804] arm64: tools: Add __ASM_CPUCAPS_H to the endif in
 cpucaps.h

Anshuman suggested this.

Suggested-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210513151819.12526-1-broonie@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/tools/gen-cpucaps.awk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk
index 18737a1ce0448..00c9e72a200a5 100755
--- a/arch/arm64/tools/gen-cpucaps.awk
+++ b/arch/arm64/tools/gen-cpucaps.awk
@@ -31,7 +31,7 @@ BEGIN {
 END {
 	printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num)
 	print ""
-	print "#endif"
+	print "#endif /* __ASM_CPUCAPS_H */"
 }
 
 # Any lines not handled by previous rules are unexpected
-- 
GitLab


From e932f5b458eee63d013578ea128b9ff8ef5f5496 Mon Sep 17 00:00:00 2001
From: Alaa Emad <alaaemadhossney.ae@gmail.com>
Date: Mon, 3 May 2021 13:57:28 +0200
Subject: [PATCH 0651/3804] media: gspca: mt9m111: Check write_bridge for
 timeout

If m5602_write_bridge times out, it will return a negative error value.
So properly check for this and handle the error correctly instead of
just ignoring it.

Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Alaa Emad <alaaemadhossney.ae@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-62-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/m5602/m5602_mt9m111.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
index 50481dc928d0f..bf1af6ed9131e 100644
--- a/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
+++ b/drivers/media/usb/gspca/m5602/m5602_mt9m111.c
@@ -195,7 +195,7 @@ static const struct v4l2_ctrl_config mt9m111_greenbal_cfg = {
 int mt9m111_probe(struct sd *sd)
 {
 	u8 data[2] = {0x00, 0x00};
-	int i;
+	int i, err;
 	struct gspca_dev *gspca_dev = (struct gspca_dev *)sd;
 
 	if (force_sensor) {
@@ -213,15 +213,17 @@ int mt9m111_probe(struct sd *sd)
 	/* Do the preinit */
 	for (i = 0; i < ARRAY_SIZE(preinit_mt9m111); i++) {
 		if (preinit_mt9m111[i][0] == BRIDGE) {
-			m5602_write_bridge(sd,
-				preinit_mt9m111[i][1],
-				preinit_mt9m111[i][2]);
+			err = m5602_write_bridge(sd,
+					preinit_mt9m111[i][1],
+					preinit_mt9m111[i][2]);
 		} else {
 			data[0] = preinit_mt9m111[i][2];
 			data[1] = preinit_mt9m111[i][3];
-			m5602_write_sensor(sd,
-				preinit_mt9m111[i][1], data, 2);
+			err = m5602_write_sensor(sd,
+					preinit_mt9m111[i][1], data, 2);
 		}
+		if (err < 0)
+			return err;
 	}
 
 	if (m5602_read_sensor(sd, MT9M111_SC_CHIPVER, data, 2))
-- 
GitLab


From 8e23e83c752b54e98102627a1cc09281ad71a299 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:29 +0200
Subject: [PATCH 0652/3804] Revert "media: gspca: Check the return value of
 write_bridge for timeout"

This reverts commit a21a0eb56b4e8fe4a330243af8030f890cde2283.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

Different error values should never be "OR" together and expect anything
sane to come out of the result.

Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-63-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/m5602/m5602_po1030.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/media/usb/gspca/m5602/m5602_po1030.c b/drivers/media/usb/gspca/m5602/m5602_po1030.c
index d680b777f097f..7bdbb8065146b 100644
--- a/drivers/media/usb/gspca/m5602/m5602_po1030.c
+++ b/drivers/media/usb/gspca/m5602/m5602_po1030.c
@@ -154,7 +154,6 @@ static const struct v4l2_ctrl_config po1030_greenbal_cfg = {
 
 int po1030_probe(struct sd *sd)
 {
-	int rc = 0;
 	u8 dev_id_h = 0, i;
 	struct gspca_dev *gspca_dev = (struct gspca_dev *)sd;
 
@@ -174,14 +173,11 @@ int po1030_probe(struct sd *sd)
 	for (i = 0; i < ARRAY_SIZE(preinit_po1030); i++) {
 		u8 data = preinit_po1030[i][2];
 		if (preinit_po1030[i][0] == SENSOR)
-			rc |= m5602_write_sensor(sd,
+			m5602_write_sensor(sd,
 				preinit_po1030[i][1], &data, 1);
 		else
-			rc |= m5602_write_bridge(sd, preinit_po1030[i][1],
-						data);
+			m5602_write_bridge(sd, preinit_po1030[i][1], data);
 	}
-	if (rc < 0)
-		return rc;
 
 	if (m5602_read_sensor(sd, PO1030_DEVID_H, &dev_id_h, 1))
 		return -ENODEV;
-- 
GitLab


From dacb408ca6f0e34df22b40d8dd5fae7f8e777d84 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:30 +0200
Subject: [PATCH 0653/3804] media: gspca: properly check for errors in
 po1030_probe()

If m5602_write_sensor() or m5602_write_bridge() fail, do not continue to
initialize the device but return the error to the calling funtion.

Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-64-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/usb/gspca/m5602/m5602_po1030.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/media/usb/gspca/m5602/m5602_po1030.c b/drivers/media/usb/gspca/m5602/m5602_po1030.c
index 7bdbb8065146b..8fd99ceee4b67 100644
--- a/drivers/media/usb/gspca/m5602/m5602_po1030.c
+++ b/drivers/media/usb/gspca/m5602/m5602_po1030.c
@@ -155,6 +155,7 @@ static const struct v4l2_ctrl_config po1030_greenbal_cfg = {
 int po1030_probe(struct sd *sd)
 {
 	u8 dev_id_h = 0, i;
+	int err;
 	struct gspca_dev *gspca_dev = (struct gspca_dev *)sd;
 
 	if (force_sensor) {
@@ -173,10 +174,13 @@ int po1030_probe(struct sd *sd)
 	for (i = 0; i < ARRAY_SIZE(preinit_po1030); i++) {
 		u8 data = preinit_po1030[i][2];
 		if (preinit_po1030[i][0] == SENSOR)
-			m5602_write_sensor(sd,
-				preinit_po1030[i][1], &data, 1);
+			err = m5602_write_sensor(sd, preinit_po1030[i][1],
+						 &data, 1);
 		else
-			m5602_write_bridge(sd, preinit_po1030[i][1], data);
+			err = m5602_write_bridge(sd, preinit_po1030[i][1],
+						 data);
+		if (err < 0)
+			return err;
 	}
 
 	if (m5602_read_sensor(sd, PO1030_DEVID_H, &dev_id_h, 1))
-- 
GitLab


From 4fd798a5a89114c1892574c50f2aebd49bc5b4f5 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:31 +0200
Subject: [PATCH 0654/3804] Revert "net: liquidio: fix a NULL pointer
 dereference"

This reverts commit fe543b2f174f34a7a751aa08b334fe6b105c4569.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

While the original commit does keep the immediate "NULL dereference"
from happening, it does not properly propagate the error back to the
callers, AND it does not fix this same identical issue in the
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c for some reason.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: David S. Miller <davem@davemloft.net>
Link: https://lore.kernel.org/r/20210503115736.2104747-65-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cavium/liquidio/lio_main.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 7c5af4beedc6d..6fa5700686481 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1166,11 +1166,6 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
 					  16, 0);
-	if (!sc) {
-		netif_info(lio, rx_err, lio->netdev,
-			   "Failed to allocate octeon_soft_command\n");
-		return;
-	}
 
 	ncmd = (union octnet_cmd *)sc->virtdptr;
 
-- 
GitLab


From dbc97bfd3918ed9268bfc174cae8a7d6b3d51aad Mon Sep 17 00:00:00 2001
From: Tom Seewald <tseewald@gmail.com>
Date: Mon, 3 May 2021 13:57:32 +0200
Subject: [PATCH 0655/3804] net: liquidio: Add missing null pointer checks

The functions send_rx_ctrl_cmd() in both liquidio/lio_main.c and
liquidio/lio_vf_main.c do not check if the call to
octeon_alloc_soft_command() fails and returns a null pointer. Both
functions also return void so errors are not propagated back to the
caller.

Fix these issues by updating both instances of send_rx_ctrl_cmd() to
return an integer rather than void, and have them return -ENOMEM if an
allocation failure occurs. Also update all callers of send_rx_ctrl_cmd()
so that they now check the return value.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Tom Seewald <tseewald@gmail.com>
Link: https://lore.kernel.org/r/20210503115736.2104747-66-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../net/ethernet/cavium/liquidio/lio_main.c   | 28 +++++++++++++------
 .../ethernet/cavium/liquidio/lio_vf_main.c    | 27 +++++++++++++-----
 2 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 6fa5700686481..591229b96257e 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1153,7 +1153,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
  * @lio: per-network private data
  * @start_stop: whether to start or stop
  */
-static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
+static int send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
 	struct octeon_soft_command *sc;
 	union octnet_cmd *ncmd;
@@ -1161,11 +1161,16 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 	int retval;
 
 	if (oct->props[lio->ifidx].rx_on == start_stop)
-		return;
+		return 0;
 
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
 					  16, 0);
+	if (!sc) {
+		netif_info(lio, rx_err, lio->netdev,
+			   "Failed to allocate octeon_soft_command struct\n");
+		return -ENOMEM;
+	}
 
 	ncmd = (union octnet_cmd *)sc->virtdptr;
 
@@ -1187,18 +1192,19 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 	if (retval == IQ_SEND_FAILED) {
 		netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
 		octeon_free_soft_command(oct, sc);
-		return;
 	} else {
 		/* Sleep on a wait queue till the cond flag indicates that the
 		 * response arrived or timed-out.
 		 */
 		retval = wait_for_sc_completion_timeout(oct, sc, 0);
 		if (retval)
-			return;
+			return retval;
 
 		oct->props[lio->ifidx].rx_on = start_stop;
 		WRITE_ONCE(sc->caller_is_done, true);
 	}
+
+	return retval;
 }
 
 /**
@@ -1773,6 +1779,7 @@ static int liquidio_open(struct net_device *netdev)
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
+	int ret = 0;
 
 	if (oct->props[lio->ifidx].napi_enabled == 0) {
 		tasklet_disable(&oct_priv->droq_tasklet);
@@ -1808,7 +1815,9 @@ static int liquidio_open(struct net_device *netdev)
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 
 	/* tell Octeon to start forwarding packets to host */
-	send_rx_ctrl_cmd(lio, 1);
+	ret = send_rx_ctrl_cmd(lio, 1);
+	if (ret)
+		return ret;
 
 	/* start periodical statistics fetch */
 	INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats);
@@ -1819,7 +1828,7 @@ static int liquidio_open(struct net_device *netdev)
 	dev_info(&oct->pci_dev->dev, "%s interface is opened\n",
 		 netdev->name);
 
-	return 0;
+	return ret;
 }
 
 /**
@@ -1833,6 +1842,7 @@ static int liquidio_stop(struct net_device *netdev)
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
+	int ret = 0;
 
 	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
 
@@ -1849,7 +1859,9 @@ static int liquidio_stop(struct net_device *netdev)
 	lio->link_changes++;
 
 	/* Tell Octeon that nic interface is down. */
-	send_rx_ctrl_cmd(lio, 0);
+	ret = send_rx_ctrl_cmd(lio, 0);
+	if (ret)
+		return ret;
 
 	if (OCTEON_CN23XX_PF(oct)) {
 		if (!oct->msix_on)
@@ -1884,7 +1896,7 @@ static int liquidio_stop(struct net_device *netdev)
 
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 516f166ceff8c..ffddb3126a323 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -595,7 +595,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
  * @lio: per-network private data
  * @start_stop: whether to start or stop
  */
-static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
+static int send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
 	struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
 	struct octeon_soft_command *sc;
@@ -603,11 +603,16 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 	int retval;
 
 	if (oct->props[lio->ifidx].rx_on == start_stop)
-		return;
+		return 0;
 
 	sc = (struct octeon_soft_command *)
 		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
 					  16, 0);
+	if (!sc) {
+		netif_info(lio, rx_err, lio->netdev,
+			   "Failed to allocate octeon_soft_command struct\n");
+		return -ENOMEM;
+	}
 
 	ncmd = (union octnet_cmd *)sc->virtdptr;
 
@@ -635,11 +640,13 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 		 */
 		retval = wait_for_sc_completion_timeout(oct, sc, 0);
 		if (retval)
-			return;
+			return retval;
 
 		oct->props[lio->ifidx].rx_on = start_stop;
 		WRITE_ONCE(sc->caller_is_done, true);
 	}
+
+	return retval;
 }
 
 /**
@@ -906,6 +913,7 @@ static int liquidio_open(struct net_device *netdev)
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
+	int ret = 0;
 
 	if (!oct->props[lio->ifidx].napi_enabled) {
 		tasklet_disable(&oct_priv->droq_tasklet);
@@ -932,11 +940,13 @@ static int liquidio_open(struct net_device *netdev)
 					(LIQUIDIO_NDEV_STATS_POLL_TIME_MS));
 
 	/* tell Octeon to start forwarding packets to host */
-	send_rx_ctrl_cmd(lio, 1);
+	ret = send_rx_ctrl_cmd(lio, 1);
+	if (ret)
+		return ret;
 
 	dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name);
 
-	return 0;
+	return ret;
 }
 
 /**
@@ -950,9 +960,12 @@ static int liquidio_stop(struct net_device *netdev)
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 	struct napi_struct *napi, *n;
+	int ret = 0;
 
 	/* tell Octeon to stop forwarding packets to host */
-	send_rx_ctrl_cmd(lio, 0);
+	ret = send_rx_ctrl_cmd(lio, 0);
+	if (ret)
+		return ret;
 
 	netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
 	/* Inform that netif carrier is down */
@@ -986,7 +999,7 @@ static int liquidio_stop(struct net_device *netdev)
 
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
-	return 0;
+	return ret;
 }
 
 /**
-- 
GitLab


From ed04fe8a0e87d7b5ea17d47f4ac9ec962b24814a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:33 +0200
Subject: [PATCH 0656/3804] Revert "video: imsttfb: fix potential NULL pointer
 dereferences"

This reverts commit 1d84353d205a953e2381044953b7fa31c8c9702d.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit here, while technically correct, did not fully
handle all of the reported issues that the commit stated it was fixing,
so revert it until it can be "fixed" fully.

Note, ioremap() probably will never fail for old hardware like this, and
if anyone actually used this hardware (a PowerMac era PCI display card),
they would not be using fbdev anymore.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Aditya Pakki <pakki001@umn.edu>
Cc: Finn Thain <fthain@telegraphics.com.au>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Fixes: 1d84353d205a ("video: imsttfb: fix potential NULL pointer dereferences")
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-67-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/imsttfb.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
index 3ac053b884958..e04411701ec85 100644
--- a/drivers/video/fbdev/imsttfb.c
+++ b/drivers/video/fbdev/imsttfb.c
@@ -1512,11 +1512,6 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	info->fix.smem_start = addr;
 	info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ?
 					    0x400000 : 0x800000);
-	if (!info->screen_base) {
-		release_mem_region(addr, size);
-		framebuffer_release(info);
-		return -ENOMEM;
-	}
 	info->fix.mmio_start = addr + 0x800000;
 	par->dc_regs = ioremap(addr + 0x800000, 0x1000);
 	par->cmap_regs_phys = addr + 0x840000;
-- 
GitLab


From 13b7c0390a5d3840e1e2cda8f44a310fdbb982de Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:34 +0200
Subject: [PATCH 0657/3804] video: imsttfb: check for ioremap() failures

We should check if ioremap() were to somehow fail in imsttfb_probe() and
handle the unwinding of the resources allocated here properly.

Ideally if anyone cares about this driver (it's for a PowerMac era PCI
display card), they wouldn't even be using fbdev anymore.  Or the devm_*
apis could be used, but that's just extra work for diminishing
returns...

Cc: Finn Thain <fthain@telegraphics.com.au>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-68-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/imsttfb.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/video/fbdev/imsttfb.c b/drivers/video/fbdev/imsttfb.c
index e04411701ec85..16f272a508112 100644
--- a/drivers/video/fbdev/imsttfb.c
+++ b/drivers/video/fbdev/imsttfb.c
@@ -1469,6 +1469,7 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct imstt_par *par;
 	struct fb_info *info;
 	struct device_node *dp;
+	int ret = -ENOMEM;
 	
 	dp = pci_device_to_OF_node(pdev);
 	if(dp)
@@ -1504,23 +1505,37 @@ static int imsttfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		default:
 			printk(KERN_INFO "imsttfb: Device 0x%x unknown, "
 					 "contact maintainer.\n", pdev->device);
-			release_mem_region(addr, size);
-			framebuffer_release(info);
-			return -ENODEV;
+			ret = -ENODEV;
+			goto error;
 	}
 
 	info->fix.smem_start = addr;
 	info->screen_base = (__u8 *)ioremap(addr, par->ramdac == IBM ?
 					    0x400000 : 0x800000);
+	if (!info->screen_base)
+		goto error;
 	info->fix.mmio_start = addr + 0x800000;
 	par->dc_regs = ioremap(addr + 0x800000, 0x1000);
+	if (!par->dc_regs)
+		goto error;
 	par->cmap_regs_phys = addr + 0x840000;
 	par->cmap_regs = (__u8 *)ioremap(addr + 0x840000, 0x1000);
+	if (!par->cmap_regs)
+		goto error;
 	info->pseudo_palette = par->palette;
 	init_imstt(info);
 
 	pci_set_drvdata(pdev, info);
 	return 0;
+
+error:
+	if (par->dc_regs)
+		iounmap(par->dc_regs);
+	if (info->screen_base)
+		iounmap(info->screen_base);
+	release_mem_region(addr, size);
+	framebuffer_release(info);
+	return ret;
 }
 
 static void imsttfb_remove(struct pci_dev *pdev)
-- 
GitLab


From 30a350947692f794796f563029d29764497f2887 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:35 +0200
Subject: [PATCH 0658/3804] Revert "brcmfmac: add a check for the status of
 usb_register"

This reverts commit 42daad3343be4a4e1ee03e30a5f5cc731dadfef5.

Because of recent interactions with developers from @umn.edu, all
commits from them have been recently re-reviewed to ensure if they were
correct or not.

Upon review, this commit was found to be incorrect for the reasons
below, so it must be reverted.  It will be fixed up "correctly" in a
later kernel change.

The original commit here did nothing to actually help if usb_register()
failed, so it gives a "false sense of security" when there is none.  The
correct solution is to correctly unwind from this error.

Cc: Kangjie Lu <kjlu@umn.edu>
Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-69-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 586f4dfc638b9..d2a803fc8ac63 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1586,10 +1586,6 @@ void brcmf_usb_exit(void)
 
 void brcmf_usb_register(void)
 {
-	int ret;
-
 	brcmf_dbg(USB, "Enter\n");
-	ret = usb_register(&brcmf_usbdrvr);
-	if (ret)
-		brcmf_err("usb_register failed %d\n", ret);
+	usb_register(&brcmf_usbdrvr);
 }
-- 
GitLab


From 419b4a142a7ece36cebcd434f8ce2af59ef94b85 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 3 May 2021 13:57:36 +0200
Subject: [PATCH 0659/3804] brcmfmac: properly check for bus register errors

The brcmfmac driver ignores any errors on initialization with the
different busses by deferring the initialization to a workqueue and
ignoring all possible errors that might happen.  Fix up all of this by
only allowing the module to load if all bus registering worked properly.

Cc: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210503115736.2104747-70-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../broadcom/brcm80211/brcmfmac/bcmsdh.c      |  8 +---
 .../broadcom/brcm80211/brcmfmac/bus.h         | 19 ++++++++-
 .../broadcom/brcm80211/brcmfmac/core.c        | 42 ++++++++-----------
 .../broadcom/brcm80211/brcmfmac/pcie.c        |  9 +---
 .../broadcom/brcm80211/brcmfmac/pcie.h        |  5 ---
 .../broadcom/brcm80211/brcmfmac/usb.c         |  4 +-
 6 files changed, 41 insertions(+), 46 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index ce8c102df7b3e..633d0ab190314 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -1217,13 +1217,9 @@ static struct sdio_driver brcmf_sdmmc_driver = {
 	},
 };
 
-void brcmf_sdio_register(void)
+int brcmf_sdio_register(void)
 {
-	int ret;
-
-	ret = sdio_register_driver(&brcmf_sdmmc_driver);
-	if (ret)
-		brcmf_err("sdio_register_driver failed: %d\n", ret);
+	return sdio_register_driver(&brcmf_sdmmc_driver);
 }
 
 void brcmf_sdio_exit(void)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
index 08f9d47f2e5ca..3f5da3bb6aa59 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bus.h
@@ -275,11 +275,26 @@ void brcmf_bus_add_txhdrlen(struct device *dev, uint len);
 
 #ifdef CONFIG_BRCMFMAC_SDIO
 void brcmf_sdio_exit(void);
-void brcmf_sdio_register(void);
+int brcmf_sdio_register(void);
+#else
+static inline void brcmf_sdio_exit(void) { }
+static inline int brcmf_sdio_register(void) { return 0; }
 #endif
+
 #ifdef CONFIG_BRCMFMAC_USB
 void brcmf_usb_exit(void);
-void brcmf_usb_register(void);
+int brcmf_usb_register(void);
+#else
+static inline void brcmf_usb_exit(void) { }
+static inline int brcmf_usb_register(void) { return 0; }
+#endif
+
+#ifdef CONFIG_BRCMFMAC_PCIE
+void brcmf_pcie_exit(void);
+int brcmf_pcie_register(void);
+#else
+static inline void brcmf_pcie_exit(void) { }
+static inline int brcmf_pcie_register(void) { return 0; }
 #endif
 
 #endif /* BRCMFMAC_BUS_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 838b09b23abff..cee1682d23335 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -1518,40 +1518,34 @@ void brcmf_bus_change_state(struct brcmf_bus *bus, enum brcmf_bus_state state)
 	}
 }
 
-static void brcmf_driver_register(struct work_struct *work)
-{
-#ifdef CONFIG_BRCMFMAC_SDIO
-	brcmf_sdio_register();
-#endif
-#ifdef CONFIG_BRCMFMAC_USB
-	brcmf_usb_register();
-#endif
-#ifdef CONFIG_BRCMFMAC_PCIE
-	brcmf_pcie_register();
-#endif
-}
-static DECLARE_WORK(brcmf_driver_work, brcmf_driver_register);
-
 int __init brcmf_core_init(void)
 {
-	if (!schedule_work(&brcmf_driver_work))
-		return -EBUSY;
+	int err;
 
+	err = brcmf_sdio_register();
+	if (err)
+		return err;
+
+	err = brcmf_usb_register();
+	if (err)
+		goto error_usb_register;
+
+	err = brcmf_pcie_register();
+	if (err)
+		goto error_pcie_register;
 	return 0;
+
+error_pcie_register:
+	brcmf_usb_exit();
+error_usb_register:
+	brcmf_sdio_exit();
+	return err;
 }
 
 void __exit brcmf_core_exit(void)
 {
-	cancel_work_sync(&brcmf_driver_work);
-
-#ifdef CONFIG_BRCMFMAC_SDIO
 	brcmf_sdio_exit();
-#endif
-#ifdef CONFIG_BRCMFMAC_USB
 	brcmf_usb_exit();
-#endif
-#ifdef CONFIG_BRCMFMAC_PCIE
 	brcmf_pcie_exit();
-#endif
 }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
index ad79e3b7e74a3..143a705b5cb3a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c
@@ -2140,15 +2140,10 @@ static struct pci_driver brcmf_pciedrvr = {
 };
 
 
-void brcmf_pcie_register(void)
+int brcmf_pcie_register(void)
 {
-	int err;
-
 	brcmf_dbg(PCIE, "Enter\n");
-	err = pci_register_driver(&brcmf_pciedrvr);
-	if (err)
-		brcmf_err(NULL, "PCIE driver registration failed, err=%d\n",
-			  err);
+	return pci_register_driver(&brcmf_pciedrvr);
 }
 
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h
index d026401d20010..8e6c227e8315c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.h
@@ -11,9 +11,4 @@ struct brcmf_pciedev {
 	struct brcmf_pciedev_info *devinfo;
 };
 
-
-void brcmf_pcie_exit(void);
-void brcmf_pcie_register(void);
-
-
 #endif /* BRCMFMAC_PCIE_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index d2a803fc8ac63..9fb68c2dc7e39 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1584,8 +1584,8 @@ void brcmf_usb_exit(void)
 	usb_deregister(&brcmf_usbdrvr);
 }
 
-void brcmf_usb_register(void)
+int brcmf_usb_register(void)
 {
 	brcmf_dbg(USB, "Enter\n");
-	usb_register(&brcmf_usbdrvr);
+	return usb_register(&brcmf_usbdrvr);
 }
-- 
GitLab


From 9183f01b5e6e32eb3f17b5f3f8d5ad5ac9786c49 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 6 May 2021 16:00:47 +0200
Subject: [PATCH 0660/3804] cdrom: gdrom: initialize global variable at init
 time

As Peter points out, if we were to disconnect and then reconnect this
driver from a device, the "global" state of the device would contain odd
values and could cause problems.  Fix this up by just initializing the
whole thing to 0 at probe() time.

Ideally this would be a per-device variable, but given the age and the
total lack of users of it, that would require a lot of s/./->/g changes
for really no good reason.

Reported-by: Peter Rosin <peda@axentia.se>
Cc: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Peter Rosin <peda@axentia.se>
Link: https://lore.kernel.org/r/YJP2j6AU82MqEY2M@kroah.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cdrom/gdrom.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 6c4f6139f8530..c6d8c0f597224 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -744,6 +744,13 @@ static const struct blk_mq_ops gdrom_mq_ops = {
 static int probe_gdrom(struct platform_device *devptr)
 {
 	int err;
+
+	/*
+	 * Ensure our "one" device is initialized properly in case of previous
+	 * usages of it
+	 */
+	memset(&gd, 0, sizeof(gd));
+
 	/* Start the device */
 	if (gdrom_execute_diagnostic() != 1) {
 		pr_warn("ATA Probe for GDROM failed\n");
@@ -847,7 +854,7 @@ static struct platform_driver gdrom_driver = {
 static int __init init_gdrom(void)
 {
 	int rc;
-	gd.toc = NULL;
+
 	rc = platform_driver_register(&gdrom_driver);
 	if (rc)
 		return rc;
-- 
GitLab


From d4d0ad57b3865795c4cde2fb5094c594c2e8f469 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Thu, 13 May 2021 11:51:41 +0200
Subject: [PATCH 0661/3804] vgacon: Record video mode changes with VT_RESIZEX

Fix an issue with VGA console font size changes made after the initial
video text mode has been changed with a user tool like `svgatextmode'
calling the VT_RESIZEX ioctl.  As it stands in that case the original
screen geometry continues being used to validate further VT resizing.

Consequently when the video adapter is firstly reprogrammed from the
original say 80x25 text mode using a 9x16 character cell (720x400 pixel
resolution) to say 80x37 text mode and the same character cell (720x592
pixel resolution), and secondly the CRTC character cell updated to 9x8
(by loading a suitable font with the KD_FONT_OP_SET request of the
KDFONTOP ioctl), the VT geometry does not get further updated from 80x37
and only upper half of the screen is used for the VT, with the lower
half showing rubbish corresponding to whatever happens to be there in
the video memory that maps to that part of the screen.  Of course the
proportions change according to text mode geometries and font sizes
chosen.

Address the problem then, by updating the text mode geometry defaults
rather than checking against them whenever the VT is resized via a user
ioctl.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: e400b6ec4ede ("vt/vgacon: Check if screen resize request comes from userspace")
Cc: stable@vger.kernel.org # v2.6.24+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/console/vgacon.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 962c12be97741..511e7d06b1485 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -1089,12 +1089,20 @@ static int vgacon_resize(struct vc_data *c, unsigned int width,
 	if ((width << 1) * height > vga_vram_size)
 		return -EINVAL;
 
+	if (user) {
+		/*
+		 * Ho ho!  Someone (svgatextmode, eh?) may have reprogrammed
+		 * the video mode!  Set the new defaults then and go away.
+		 */
+		screen_info.orig_video_cols = width;
+		screen_info.orig_video_lines = height;
+		vga_default_font_height = c->vc_font.height;
+		return 0;
+	}
 	if (width % 2 || width > screen_info.orig_video_cols ||
 	    height > (screen_info.orig_video_lines * vga_default_font_height)/
 	    c->vc_font.height)
-		/* let svgatextmode tinker with video timings and
-		   return success */
-		return (user) ? 0 : -EINVAL;
+		return -EINVAL;
 
 	if (con_is_visible(c) && !vga_is_gfx) /* who knows */
 		vgacon_doresize(c, width, height);
-- 
GitLab


From a90c275eb144c1b755f04769e1f29d832d6daeaf Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Thu, 13 May 2021 11:51:45 +0200
Subject: [PATCH 0662/3804] vt_ioctl: Revert VT_RESIZEX parameter handling
 removal

Revert the removal of code handling extra VT_RESIZEX ioctl's parameters
beyond those that VT_RESIZE supports, fixing a functional regression
causing `svgatextmode' not to resize the VT anymore.

As a consequence of the reverted change when the video adapter is
reprogrammed from the original say 80x25 text mode using a 9x16
character cell (720x400 pixel resolution) to say 80x37 text mode and the
same character cell (720x592 pixel resolution), the VT geometry does not
get updated and only upper two thirds of the screen are used for the VT,
and the lower part remains blank.  The proportions change according to
text mode geometries chosen.

Revert the change verbatim then, bringing back previous VT resizing.

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: 988d0763361b ("vt_ioctl: make VT_RESIZEX behave like VT_RESIZE")
Cc: stable@vger.kernel.org # v5.10+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/vt/vt_ioctl.c | 57 ++++++++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 10 deletions(-)

diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 89aeaf3c1bca6..95d10197566ba 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -671,21 +671,58 @@ static int vt_resizex(struct vc_data *vc, struct vt_consize __user *cs)
 	if (copy_from_user(&v, cs, sizeof(struct vt_consize)))
 		return -EFAULT;
 
-	if (v.v_vlin)
-		pr_info_once("\"struct vt_consize\"->v_vlin is ignored. Please report if you need this.\n");
-	if (v.v_clin)
-		pr_info_once("\"struct vt_consize\"->v_clin is ignored. Please report if you need this.\n");
+	/* FIXME: Should check the copies properly */
+	if (!v.v_vlin)
+		v.v_vlin = vc->vc_scan_lines;
+
+	if (v.v_clin) {
+		int rows = v.v_vlin / v.v_clin;
+		if (v.v_rows != rows) {
+			if (v.v_rows) /* Parameters don't add up */
+				return -EINVAL;
+			v.v_rows = rows;
+		}
+	}
+
+	if (v.v_vcol && v.v_ccol) {
+		int cols = v.v_vcol / v.v_ccol;
+		if (v.v_cols != cols) {
+			if (v.v_cols)
+				return -EINVAL;
+			v.v_cols = cols;
+		}
+	}
+
+	if (v.v_clin > 32)
+		return -EINVAL;
 
-	console_lock();
 	for (i = 0; i < MAX_NR_CONSOLES; i++) {
-		vc = vc_cons[i].d;
+		struct vc_data *vcp;
 
-		if (vc) {
-			vc->vc_resize_user = 1;
-			vc_resize(vc, v.v_cols, v.v_rows);
+		if (!vc_cons[i].d)
+			continue;
+		console_lock();
+		vcp = vc_cons[i].d;
+		if (vcp) {
+			int ret;
+			int save_scan_lines = vcp->vc_scan_lines;
+			int save_font_height = vcp->vc_font.height;
+
+			if (v.v_vlin)
+				vcp->vc_scan_lines = v.v_vlin;
+			if (v.v_clin)
+				vcp->vc_font.height = v.v_clin;
+			vcp->vc_resize_user = 1;
+			ret = vc_resize(vcp, v.v_cols, v.v_rows);
+			if (ret) {
+				vcp->vc_scan_lines = save_scan_lines;
+				vcp->vc_font.height = save_font_height;
+				console_unlock();
+				return ret;
+			}
 		}
+		console_unlock();
 	}
-	console_unlock();
 
 	return 0;
 }
-- 
GitLab


From 860dafa902595fb5f1d23bbcce1215188c3341e6 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Thu, 13 May 2021 11:51:50 +0200
Subject: [PATCH 0663/3804] vt: Fix character height handling with VT_RESIZEX

Restore the original intent of the VT_RESIZEX ioctl's `v_clin' parameter
which is the number of pixel rows per character (cell) rather than the
height of the font used.

For framebuffer devices the two values are always the same, because the
former is inferred from the latter one.  For VGA used as a true text
mode device these two parameters are independent from each other: the
number of pixel rows per character is set in the CRT controller, while
font height is in fact hardwired to 32 pixel rows and fonts of heights
below that value are handled by padding their data with blanks when
loaded to hardware for use by the character generator.  One can change
the setting in the CRT controller and it will update the screen contents
accordingly regardless of the font loaded.

The `v_clin' parameter is used by the `vgacon' driver to set the height
of the character cell and then the cursor position within.  Make the
parameter explicit then, by defining a new `vc_cell_height' struct
member of `vc_data', set it instead of `vc_font.height' from `v_clin' in
the VT_RESIZEX ioctl, and then use it throughout the `vgacon' driver
except where actual font data is accessed which as noted above is
independent from the CRTC setting.

This way the framebuffer console driver is free to ignore the `v_clin'
parameter as irrelevant, as it always should have, avoiding any issues
attempts to give the parameter a meaning there could have caused, such
as one that has led to commit 988d0763361b ("vt_ioctl: make VT_RESIZEX
behave like VT_RESIZE"):

 "syzbot is reporting UAF/OOB read at bit_putcs()/soft_cursor() [1][2],
  for vt_resizex() from ioctl(VT_RESIZEX) allows setting font height
  larger than actual font height calculated by con_font_set() from
  ioctl(PIO_FONT). Since fbcon_set_font() from con_font_set() allocates
  minimal amount of memory based on actual font height calculated by
  con_font_set(), use of vt_resizex() can cause UAF/OOB read for font
  data."

The problem first appeared around Linux 2.5.66 which predates our repo
history, but the origin could be identified with the old MIPS/Linux repo
also at: <git://git.kernel.org/pub/scm/linux/kernel/git/ralf/linux.git>
as commit 9736a3546de7 ("Merge with Linux 2.5.66."), where VT_RESIZEX
code in `vt_ioctl' was updated as follows:

 		if (clin)
-			video_font_height = clin;
+			vc->vc_font.height = clin;

making the parameter apply to framebuffer devices as well, perhaps due
to the use of "font" in the name of the original `video_font_height'
variable.  Use "cell" in the new struct member then to avoid ambiguity.

References:

[1] https://syzkaller.appspot.com/bug?id=32577e96d88447ded2d3b76d71254fb855245837
[2] https://syzkaller.appspot.com/bug?id=6b8355d27b2b94fb5cedf4655e3a59162d9e48e3

Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org # v2.6.12+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/vt/vt_ioctl.c      |  6 ++---
 drivers/video/console/vgacon.c | 44 +++++++++++++++++-----------------
 include/linux/console_struct.h |  1 +
 3 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c
index 95d10197566ba..0e0cd9e9e589e 100644
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -706,17 +706,17 @@ static int vt_resizex(struct vc_data *vc, struct vt_consize __user *cs)
 		if (vcp) {
 			int ret;
 			int save_scan_lines = vcp->vc_scan_lines;
-			int save_font_height = vcp->vc_font.height;
+			int save_cell_height = vcp->vc_cell_height;
 
 			if (v.v_vlin)
 				vcp->vc_scan_lines = v.v_vlin;
 			if (v.v_clin)
-				vcp->vc_font.height = v.v_clin;
+				vcp->vc_cell_height = v.v_clin;
 			vcp->vc_resize_user = 1;
 			ret = vc_resize(vcp, v.v_cols, v.v_rows);
 			if (ret) {
 				vcp->vc_scan_lines = save_scan_lines;
-				vcp->vc_font.height = save_font_height;
+				vcp->vc_cell_height = save_cell_height;
 				console_unlock();
 				return ret;
 			}
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 511e7d06b1485..631eb918f8e14 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -383,7 +383,7 @@ static void vgacon_init(struct vc_data *c, int init)
 		vc_resize(c, vga_video_num_columns, vga_video_num_lines);
 
 	c->vc_scan_lines = vga_scan_lines;
-	c->vc_font.height = vga_video_font_height;
+	c->vc_font.height = c->vc_cell_height = vga_video_font_height;
 	c->vc_complement_mask = 0x7700;
 	if (vga_512_chars)
 		c->vc_hi_font_mask = 0x0800;
@@ -518,32 +518,32 @@ static void vgacon_cursor(struct vc_data *c, int mode)
 		switch (CUR_SIZE(c->vc_cursor_type)) {
 		case CUR_UNDERLINE:
 			vgacon_set_cursor_size(c->state.x,
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 2 : 3),
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 1 : 2));
 			break;
 		case CUR_TWO_THIRDS:
 			vgacon_set_cursor_size(c->state.x,
-					       c->vc_font.height / 3,
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       c->vc_cell_height / 3,
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 1 : 2));
 			break;
 		case CUR_LOWER_THIRD:
 			vgacon_set_cursor_size(c->state.x,
-					       (c->vc_font.height * 2) / 3,
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       (c->vc_cell_height * 2) / 3,
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 1 : 2));
 			break;
 		case CUR_LOWER_HALF:
 			vgacon_set_cursor_size(c->state.x,
-					       c->vc_font.height / 2,
-					       c->vc_font.height -
-					       (c->vc_font.height <
+					       c->vc_cell_height / 2,
+					       c->vc_cell_height -
+					       (c->vc_cell_height <
 						10 ? 1 : 2));
 			break;
 		case CUR_NONE:
@@ -554,7 +554,7 @@ static void vgacon_cursor(struct vc_data *c, int mode)
 			break;
 		default:
 			vgacon_set_cursor_size(c->state.x, 1,
-					       c->vc_font.height);
+					       c->vc_cell_height);
 			break;
 		}
 		break;
@@ -565,13 +565,13 @@ static int vgacon_doresize(struct vc_data *c,
 		unsigned int width, unsigned int height)
 {
 	unsigned long flags;
-	unsigned int scanlines = height * c->vc_font.height;
+	unsigned int scanlines = height * c->vc_cell_height;
 	u8 scanlines_lo = 0, r7 = 0, vsync_end = 0, mode, max_scan;
 
 	raw_spin_lock_irqsave(&vga_lock, flags);
 
 	vgacon_xres = width * VGA_FONTWIDTH;
-	vgacon_yres = height * c->vc_font.height;
+	vgacon_yres = height * c->vc_cell_height;
 	if (vga_video_type >= VIDEO_TYPE_VGAC) {
 		outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg);
 		max_scan = inb_p(vga_video_port_val);
@@ -626,9 +626,9 @@ static int vgacon_doresize(struct vc_data *c,
 static int vgacon_switch(struct vc_data *c)
 {
 	int x = c->vc_cols * VGA_FONTWIDTH;
-	int y = c->vc_rows * c->vc_font.height;
+	int y = c->vc_rows * c->vc_cell_height;
 	int rows = screen_info.orig_video_lines * vga_default_font_height/
-		c->vc_font.height;
+		c->vc_cell_height;
 	/*
 	 * We need to save screen size here as it's the only way
 	 * we can spot the screen has been resized and we need to
@@ -1041,7 +1041,7 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight)
 				cursor_size_lastto = 0;
 				c->vc_sw->con_cursor(c, CM_DRAW);
 			}
-			c->vc_font.height = fontheight;
+			c->vc_font.height = c->vc_cell_height = fontheight;
 			vc_resize(c, 0, rows);	/* Adjust console size */
 		}
 	}
@@ -1096,12 +1096,12 @@ static int vgacon_resize(struct vc_data *c, unsigned int width,
 		 */
 		screen_info.orig_video_cols = width;
 		screen_info.orig_video_lines = height;
-		vga_default_font_height = c->vc_font.height;
+		vga_default_font_height = c->vc_cell_height;
 		return 0;
 	}
 	if (width % 2 || width > screen_info.orig_video_cols ||
 	    height > (screen_info.orig_video_lines * vga_default_font_height)/
-	    c->vc_font.height)
+	    c->vc_cell_height)
 		return -EINVAL;
 
 	if (con_is_visible(c) && !vga_is_gfx) /* who knows */
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 153734816b49c..d5b9c8d40c18e 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -101,6 +101,7 @@ struct vc_data {
 	unsigned int	vc_rows;
 	unsigned int	vc_size_row;		/* Bytes per row */
 	unsigned int	vc_scan_lines;		/* # of scan lines */
+	unsigned int	vc_cell_height;		/* CRTC character cell height */
 	unsigned long	vc_origin;		/* [!] Start of real screen */
 	unsigned long	vc_scr_end;		/* [!] End of real screen */
 	unsigned long	vc_visible_origin;	/* [!] Top of visible window */
-- 
GitLab


From eb01f5353bdaa59600b29d864819056a0e3de24d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 13 May 2021 12:23:24 -0400
Subject: [PATCH 0664/3804] tracing: Handle %.*s in trace_check_vprintf()

If a trace event uses the %*.s notation, the trace_check_vprintf() will
fail and will warn about a bad processing of strings, because it does not
take into account the length field when processing the star (*) part.
Have it handle this case as well.

Link: https://lore.kernel.org/linux-nfs/238C0E2D-C2A4-4578-ADD2-C565B3B99842@oracle.com/

Reported-by: Chuck Lever III <chuck.lever@oracle.com>
Fixes: 9a6944fee68e2 ("tracing: Add a verifier to check string pointers for trace events")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 560e4c8d3825b..a21ef9cd2aae2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3704,6 +3704,9 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 		goto print;
 
 	while (*p) {
+		bool star = false;
+		int len = 0;
+
 		j = 0;
 
 		/* We only care about %s and variants */
@@ -3725,13 +3728,17 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 				/* Need to test cases like %08.*s */
 				for (j = 1; p[i+j]; j++) {
 					if (isdigit(p[i+j]) ||
-					    p[i+j] == '*' ||
 					    p[i+j] == '.')
 						continue;
+					if (p[i+j] == '*') {
+						star = true;
+						continue;
+					}
 					break;
 				}
 				if (p[i+j] == 's')
 					break;
+				star = false;
 			}
 			j = 0;
 		}
@@ -3744,6 +3751,9 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 		iter->fmt[i] = '\0';
 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
 
+		if (star)
+			len = va_arg(ap, int);
+
 		/* The ap now points to the string data of the %s */
 		str = va_arg(ap, const char *);
 
@@ -3762,8 +3772,18 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 			int ret;
 
 			/* Try to safely read the string */
-			ret = strncpy_from_kernel_nofault(iter->fmt, str,
-							  iter->fmt_size);
+			if (star) {
+				if (len + 1 > iter->fmt_size)
+					len = iter->fmt_size - 1;
+				if (len < 0)
+					len = 0;
+				ret = copy_from_kernel_nofault(iter->fmt, str, len);
+				iter->fmt[len] = 0;
+				star = false;
+			} else {
+				ret = strncpy_from_kernel_nofault(iter->fmt, str,
+								  iter->fmt_size);
+			}
 			if (ret < 0)
 				trace_seq_printf(&iter->seq, "(0x%px)", str);
 			else
@@ -3775,7 +3795,10 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
 			strncpy(iter->fmt, p + i, j + 1);
 			iter->fmt[j+1] = '\0';
 		}
-		trace_seq_printf(&iter->seq, iter->fmt, str);
+		if (star)
+			trace_seq_printf(&iter->seq, iter->fmt, len, str);
+		else
+			trace_seq_printf(&iter->seq, iter->fmt, str);
 
 		p += i + j + 1;
 	}
-- 
GitLab


From ced081a436d21a7d34d4d42acb85058f9cf423f2 Mon Sep 17 00:00:00 2001
From: Luca Stefani <luca.stefani.ge1@gmail.com>
Date: Thu, 6 May 2021 21:37:25 +0200
Subject: [PATCH 0665/3804] binder: Return EFAULT if we fail
 BINDER_ENABLE_ONEWAY_SPAM_DETECTION

All the other ioctl paths return EFAULT in case the
copy_from_user/copy_to_user call fails, make oneway spam detection
follow the same paradigm.

Fixes: a7dc1e6f99df ("binder: tell userspace to dump current backtrace when detected oneway spamming")
Acked-by: Todd Kjos <tkjos@google.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Luca Stefani <luca.stefani.ge1@gmail.com>
Link: https://lore.kernel.org/r/20210506193726.45118-1-luca.stefani.ge1@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 61d34e1dc59c5..bcec598b89f23 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -4918,7 +4918,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		uint32_t enable;
 
 		if (copy_from_user(&enable, ubuf, sizeof(enable))) {
-			ret = -EINVAL;
+			ret = -EFAULT;
 			goto err;
 		}
 		binder_inner_proc_lock(proc);
-- 
GitLab


From c699a0db2d62e3bbb7f0bf35c87edbc8d23e3062 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 10 May 2021 14:49:05 -0400
Subject: [PATCH 0666/3804] dm snapshot: fix crash with transient storage and
 zero chunk size

The following commands will crash the kernel:

modprobe brd rd_size=1048576
dmsetup create o --table "0 `blockdev --getsize /dev/ram0` snapshot-origin /dev/ram0"
dmsetup create s --table "0 `blockdev --getsize /dev/ram0` snapshot /dev/ram0 /dev/ram1 N 0"

The reason is that when we test for zero chunk size, we jump to the label
bad_read_metadata without setting the "r" variable. The function
snapshot_ctr destroys all the structures and then exits with "r == 0". The
kernel then crashes because it falsely believes that snapshot_ctr
succeeded.

In order to fix the bug, we set the variable "r" to -EINVAL.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 2a51ddd840b41..b8e4d31124eaa 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1408,6 +1408,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (!s->store->chunk_size) {
 		ti->error = "Chunk size not set";
+		r = -EINVAL;
 		goto bad_read_metadata;
 	}
 
-- 
GitLab


From 640d1eaff2c09e382a23bd831094ebbfaa16fef5 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Tue, 4 May 2021 16:22:34 -0600
Subject: [PATCH 0667/3804] dyndbg: avoid calling dyndbg_emit_prefix when it
 has no work

Wrap function in a static-inline one, which checks flags to avoid
calling the function unnecessarily.

And hoist its output-buffer initialization to the grand-caller, which
is already allocating the buffer on the stack, and can trivially
initialize it too.

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Link: https://lore.kernel.org/r/20210504222235.1033685-2-jim.cromie@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/dynamic_debug.h |  5 +++++
 lib/dynamic_debug.c           | 19 ++++++++++++-------
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index a57ee75342cf8..dce631e678dd6 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -32,6 +32,11 @@ struct _ddebug {
 #define _DPRINTK_FLAGS_INCL_FUNCNAME	(1<<2)
 #define _DPRINTK_FLAGS_INCL_LINENO	(1<<3)
 #define _DPRINTK_FLAGS_INCL_TID		(1<<4)
+
+#define _DPRINTK_FLAGS_INCL_ANY		\
+	(_DPRINTK_FLAGS_INCL_MODNAME | _DPRINTK_FLAGS_INCL_FUNCNAME |\
+	 _DPRINTK_FLAGS_INCL_LINENO  | _DPRINTK_FLAGS_INCL_TID)
+
 #if defined DEBUG
 #define _DPRINTK_FLAGS_DEFAULT _DPRINTK_FLAGS_PRINT
 #else
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 921d0a654243c..3989204033212 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -586,13 +586,11 @@ static int remaining(int wrote)
 	return 0;
 }
 
-static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
+static char *__dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
 {
 	int pos_after_tid;
 	int pos = 0;
 
-	*buf = '\0';
-
 	if (desc->flags & _DPRINTK_FLAGS_INCL_TID) {
 		if (in_interrupt())
 			pos += snprintf(buf + pos, remaining(pos), "<intr> ");
@@ -618,11 +616,18 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
 	return buf;
 }
 
+static inline char *dynamic_emit_prefix(struct _ddebug *desc, char *buf)
+{
+	if (unlikely(desc->flags & _DPRINTK_FLAGS_INCL_ANY))
+		return __dynamic_emit_prefix(desc, buf);
+	return buf;
+}
+
 void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 {
 	va_list args;
 	struct va_format vaf;
-	char buf[PREFIX_SIZE];
+	char buf[PREFIX_SIZE] = "";
 
 	BUG_ON(!descriptor);
 	BUG_ON(!fmt);
@@ -655,7 +660,7 @@ void __dynamic_dev_dbg(struct _ddebug *descriptor,
 	if (!dev) {
 		printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
 	} else {
-		char buf[PREFIX_SIZE];
+		char buf[PREFIX_SIZE] = "";
 
 		dev_printk_emit(LOGLEVEL_DEBUG, dev, "%s%s %s: %pV",
 				dynamic_emit_prefix(descriptor, buf),
@@ -684,7 +689,7 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
 	vaf.va = &args;
 
 	if (dev && dev->dev.parent) {
-		char buf[PREFIX_SIZE];
+		char buf[PREFIX_SIZE] = "";
 
 		dev_printk_emit(LOGLEVEL_DEBUG, dev->dev.parent,
 				"%s%s %s %s%s: %pV",
@@ -720,7 +725,7 @@ void __dynamic_ibdev_dbg(struct _ddebug *descriptor,
 	vaf.va = &args;
 
 	if (ibdev && ibdev->dev.parent) {
-		char buf[PREFIX_SIZE];
+		char buf[PREFIX_SIZE] = "";
 
 		dev_printk_emit(LOGLEVEL_DEBUG, ibdev->dev.parent,
 				"%s%s %s %s: %pV",
-- 
GitLab


From a3626bcf5fafad0ded410b269e21f37bdaf2baf4 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Tue, 4 May 2021 16:22:35 -0600
Subject: [PATCH 0668/3804] dyndbg: drop uninformative vpr_info

Remove a vpr_info which I added in 2012, when I knew even less than now.
In 2020, a simpler pr_fmt stripped it of context, and any remaining value.

no functional change.

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Link: https://lore.kernel.org/r/20210504222235.1033685-3-jim.cromie@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/dynamic_debug.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 3989204033212..641767b0dce29 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -920,7 +920,6 @@ static const struct seq_operations ddebug_proc_seqops = {
 
 static int ddebug_proc_open(struct inode *inode, struct file *file)
 {
-	vpr_info("called\n");
 	return seq_open_private(file, &ddebug_proc_seqops,
 				sizeof(struct ddebug_iter));
 }
-- 
GitLab


From dbae70d452a0858d62915166d93650c98fe6639c Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 12 May 2021 08:28:43 -0400
Subject: [PATCH 0669/3804] dm integrity: revert to not using discard filler
 when recalulating

Revert the commit 7a5b96b4784454ba258e83dc7469ddbacd3aaac3 ("dm integrity:
use discard support when recalculating").

There's a bug that when we write some data beyond the current recalculate
boundary, the checksum will be rewritten with the discard filler later.
And the data will no longer have integrity protection. There's no easy
fix for this case.

Also, another problematic case is if dm-integrity is used to detect
bitrot (random device errors, bit flips, etc); dm-integrity should
detect that even for unused sectors. With commit 7a5b96b4784 it can
happen that such change is undetected (because discard filler is not a
valid checksum).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Acked-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-integrity.c | 57 +++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 781942aeddd15..6d00e619a141f 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2689,30 +2689,26 @@ next_chunk:
 	if (unlikely(dm_integrity_failed(ic)))
 		goto err;
 
-	if (!ic->discard) {
-		io_req.bi_op = REQ_OP_READ;
-		io_req.bi_op_flags = 0;
-		io_req.mem.type = DM_IO_VMA;
-		io_req.mem.ptr.addr = ic->recalc_buffer;
-		io_req.notify.fn = NULL;
-		io_req.client = ic->io;
-		io_loc.bdev = ic->dev->bdev;
-		io_loc.sector = get_data_sector(ic, area, offset);
-		io_loc.count = n_sectors;
+	io_req.bi_op = REQ_OP_READ;
+	io_req.bi_op_flags = 0;
+	io_req.mem.type = DM_IO_VMA;
+	io_req.mem.ptr.addr = ic->recalc_buffer;
+	io_req.notify.fn = NULL;
+	io_req.client = ic->io;
+	io_loc.bdev = ic->dev->bdev;
+	io_loc.sector = get_data_sector(ic, area, offset);
+	io_loc.count = n_sectors;
 
-		r = dm_io(&io_req, 1, &io_loc, NULL);
-		if (unlikely(r)) {
-			dm_integrity_io_error(ic, "reading data", r);
-			goto err;
-		}
+	r = dm_io(&io_req, 1, &io_loc, NULL);
+	if (unlikely(r)) {
+		dm_integrity_io_error(ic, "reading data", r);
+		goto err;
+	}
 
-		t = ic->recalc_tags;
-		for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
-			integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
-			t += ic->tag_size;
-		}
-	} else {
-		t = ic->recalc_tags + (n_sectors >> ic->sb->log2_sectors_per_block) * ic->tag_size;
+	t = ic->recalc_tags;
+	for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
+		integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
+		t += ic->tag_size;
 	}
 
 	metadata_block = get_metadata_sector_and_offset(ic, area, offset, &metadata_offset);
@@ -4368,13 +4364,11 @@ try_smaller_buffer:
 			goto bad;
 		}
 		INIT_WORK(&ic->recalc_work, integrity_recalc);
-		if (!ic->discard) {
-			ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT);
-			if (!ic->recalc_buffer) {
-				ti->error = "Cannot allocate buffer for recalculating";
-				r = -ENOMEM;
-				goto bad;
-			}
+		ic->recalc_buffer = vmalloc(RECALC_SECTORS << SECTOR_SHIFT);
+		if (!ic->recalc_buffer) {
+			ti->error = "Cannot allocate buffer for recalculating";
+			r = -ENOMEM;
+			goto bad;
 		}
 		ic->recalc_tags = kvmalloc_array(RECALC_SECTORS >> ic->sb->log2_sectors_per_block,
 						 ic->tag_size, GFP_KERNEL);
@@ -4383,9 +4377,6 @@ try_smaller_buffer:
 			r = -ENOMEM;
 			goto bad;
 		}
-		if (ic->discard)
-			memset(ic->recalc_tags, DISCARD_FILLER,
-			       (RECALC_SECTORS >> ic->sb->log2_sectors_per_block) * ic->tag_size);
 	} else {
 		if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
 			ti->error = "Recalculate can only be specified with internal_hash";
@@ -4579,7 +4570,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
 
 static struct target_type integrity_target = {
 	.name			= "integrity",
-	.version		= {1, 9, 0},
+	.version		= {1, 10, 0},
 	.module			= THIS_MODULE,
 	.features		= DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
 	.ctr			= dm_integrity_ctr,
-- 
GitLab


From bc8f3d4647a99468d7733039b6bc9234b6e91df4 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 11 May 2021 11:41:00 -0400
Subject: [PATCH 0670/3804] dm integrity: fix sparse warnings

Use the types __le* instead of __u* to fix sparse warnings.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-integrity.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 6d00e619a141f..20f2510db1f67 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -66,14 +66,14 @@ struct superblock {
 	__u8 magic[8];
 	__u8 version;
 	__u8 log2_interleave_sectors;
-	__u16 integrity_tag_size;
-	__u32 journal_sections;
-	__u64 provided_data_sectors;	/* userspace uses this value */
-	__u32 flags;
+	__le16 integrity_tag_size;
+	__le32 journal_sections;
+	__le64 provided_data_sectors;	/* userspace uses this value */
+	__le32 flags;
 	__u8 log2_sectors_per_block;
 	__u8 log2_blocks_per_bitmap_bit;
 	__u8 pad[2];
-	__u64 recalc_sector;
+	__le64 recalc_sector;
 	__u8 pad2[8];
 	__u8 salt[SALT_SIZE];
 };
@@ -86,16 +86,16 @@ struct superblock {
 
 #define	JOURNAL_ENTRY_ROUNDUP		8
 
-typedef __u64 commit_id_t;
+typedef __le64 commit_id_t;
 #define JOURNAL_MAC_PER_SECTOR		8
 
 struct journal_entry {
 	union {
 		struct {
-			__u32 sector_lo;
-			__u32 sector_hi;
+			__le32 sector_lo;
+			__le32 sector_hi;
 		} s;
-		__u64 sector;
+		__le64 sector;
 	} u;
 	commit_id_t last_bytes[];
 	/* __u8 tag[0]; */
@@ -806,7 +806,7 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result
 	}
 
 	if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_HMAC)) {
-		uint64_t section_le;
+		__le64 section_le;
 
 		r = crypto_shash_update(desc, (__u8 *)&ic->sb->salt, SALT_SIZE);
 		if (unlikely(r < 0)) {
@@ -1640,7 +1640,7 @@ static void integrity_end_io(struct bio *bio)
 static void integrity_sector_checksum(struct dm_integrity_c *ic, sector_t sector,
 				      const char *data, char *result)
 {
-	__u64 sector_le = cpu_to_le64(sector);
+	__le64 sector_le = cpu_to_le64(sector);
 	SHASH_DESC_ON_STACK(req, ic->internal_hash);
 	int r;
 	unsigned digest_size;
@@ -3822,7 +3822,7 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
 			for (i = 0; i < ic->journal_sections; i++) {
 				struct scatterlist sg;
 				struct skcipher_request *section_req;
-				__u32 section_le = cpu_to_le32(i);
+				__le32 section_le = cpu_to_le32(i);
 
 				memset(crypt_iv, 0x00, ivsize);
 				memset(crypt_data, 0x00, crypt_len);
-- 
GitLab


From 27b57bb76a897be80494ee11ee4e85326d19383d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 13 May 2021 21:40:38 +0200
Subject: [PATCH 0671/3804] Revert "Revert "ALSA: usx2y: Fix potential NULL
 pointer dereference""

This reverts commit 4667a6fc1777ce071504bab570d3599107f4790f.

Takashi writes:
	I have already started working on the bigger cleanup of this driver
	code based on 5.13-rc1, so could you drop this revert?

I missed our previous discussion about this, my fault for applying it.

Reported-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/usx2y/usb_stream.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/usb/usx2y/usb_stream.c b/sound/usb/usx2y/usb_stream.c
index 6bba17bf689ac..091c071b270af 100644
--- a/sound/usb/usx2y/usb_stream.c
+++ b/sound/usb/usx2y/usb_stream.c
@@ -91,7 +91,12 @@ static int init_urbs(struct usb_stream_kernel *sk, unsigned use_packsize,
 
 	for (u = 0; u < USB_STREAM_NURBS; ++u) {
 		sk->inurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
+		if (!sk->inurb[u])
+			return -ENOMEM;
+
 		sk->outurb[u] = usb_alloc_urb(sk->n_o_ps, GFP_KERNEL);
+		if (!sk->outurb[u])
+			return -ENOMEM;
 	}
 
 	if (init_pipe_urbs(sk, use_packsize, sk->inurb, indata, dev, in_pipe) ||
-- 
GitLab


From a93a0a15876d2a077a3bc260b387d2457a051f24 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 13 May 2021 09:44:49 +0200
Subject: [PATCH 0672/3804] net: mdio: thunder: Fix a double free issue in the
 .remove function

'bus->mii_bus' have been allocated with 'devm_mdiobus_alloc_size()' in the
probe function. So it must not be freed explicitly or there will be a
double free.

Remove the incorrect 'mdiobus_free' in the remove function.

Fixes: 379d7ac7ca31 ("phy: mdio-thunder: Add driver for Cavium Thunder SoC MDIO buses.")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-thunder.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/mdio/mdio-thunder.c b/drivers/net/mdio/mdio-thunder.c
index cb1761693b690..822d2cdd2f359 100644
--- a/drivers/net/mdio/mdio-thunder.c
+++ b/drivers/net/mdio/mdio-thunder.c
@@ -126,7 +126,6 @@ static void thunder_mdiobus_pci_remove(struct pci_dev *pdev)
 			continue;
 
 		mdiobus_unregister(bus->mii_bus);
-		mdiobus_free(bus->mii_bus);
 		oct_mdio_writeq(0, bus->register_base + SMI_EN);
 	}
 	pci_release_regions(pdev);
-- 
GitLab


From e1d027dd97e1e750669cdc0d3b016a4f54e473eb Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 13 May 2021 09:24:55 +0200
Subject: [PATCH 0673/3804] net: mdio: octeon: Fix some double free issues

'bus->mii_bus' has been allocated with 'devm_mdiobus_alloc_size()' in the
probe function. So it must not be freed explicitly or there will be a
double free.

Remove the incorrect 'mdiobus_free' in the error handling path of the
probe function and in remove function.

Suggested-By: Andrew Lunn <andrew@lunn.ch>
Fixes: 35d2aeac9810 ("phy: mdio-octeon: Use devm_mdiobus_alloc_size()")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mdio/mdio-octeon.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c
index 8ce99c4888e10..e096e68ac667b 100644
--- a/drivers/net/mdio/mdio-octeon.c
+++ b/drivers/net/mdio/mdio-octeon.c
@@ -71,7 +71,6 @@ static int octeon_mdiobus_probe(struct platform_device *pdev)
 
 	return 0;
 fail_register:
-	mdiobus_free(bus->mii_bus);
 	smi_en.u64 = 0;
 	oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN);
 	return err;
@@ -85,7 +84,6 @@ static int octeon_mdiobus_remove(struct platform_device *pdev)
 	bus = platform_get_drvdata(pdev);
 
 	mdiobus_unregister(bus->mii_bus);
-	mdiobus_free(bus->mii_bus);
 	smi_en.u64 = 0;
 	oct_mdio_writeq(smi_en.u64, bus->register_base + SMI_EN);
 	return 0;
-- 
GitLab


From 65e302a9bd57b62872040d57eea1201562a7cbb2 Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Thu, 13 May 2021 15:11:51 +0530
Subject: [PATCH 0674/3804] cxgb4/ch_ktls: Clear resources when pf4 device is
 removed

This patch maintain the list of active tids and clear all the active
connection resources when DETACH notification comes.

Fixes: a8c16e8ed624f ("crypto/chcr: move nic TLS functionality to drivers/net")
Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/chelsio/cxgb4/cxgb4_main.c   |  2 +-
 .../chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 80 ++++++++++++++++++-
 .../chelsio/inline_crypto/ch_ktls/chcr_ktls.h |  2 +
 3 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 6264bc66a4fc9..421bd9b88028d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -6480,9 +6480,9 @@ static void cxgb4_ktls_dev_del(struct net_device *netdev,
 
 	adap->uld[CXGB4_ULD_KTLS].tlsdev_ops->tls_dev_del(netdev, tls_ctx,
 							  direction);
-	cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE);
 
 out_unlock:
+	cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE);
 	mutex_unlock(&uld_mutex);
 }
 
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index ef3f1e92632f3..59683f79959ce 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -59,6 +59,7 @@ static int chcr_get_nfrags_to_send(struct sk_buff *skb, u32 start, u32 len)
 }
 
 static int chcr_init_tcb_fields(struct chcr_ktls_info *tx_info);
+static void clear_conn_resources(struct chcr_ktls_info *tx_info);
 /*
  * chcr_ktls_save_keys: calculate and save crypto keys.
  * @tx_info - driver specific tls info.
@@ -364,10 +365,14 @@ static void chcr_ktls_dev_del(struct net_device *netdev,
 				chcr_get_ktls_tx_context(tls_ctx);
 	struct chcr_ktls_info *tx_info = tx_ctx->chcr_info;
 	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_uld_ctx *u_ctx;
 
 	if (!tx_info)
 		return;
 
+	u_ctx = tx_info->adap->uld[CXGB4_ULD_KTLS].handle;
+	if (u_ctx && u_ctx->detach)
+		return;
 	/* clear l2t entry */
 	if (tx_info->l2te)
 		cxgb4_l2t_release(tx_info->l2te);
@@ -384,6 +389,8 @@ static void chcr_ktls_dev_del(struct net_device *netdev,
 	if (tx_info->tid != -1) {
 		cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
 				 tx_info->tid, tx_info->ip_family);
+
+		xa_erase(&u_ctx->tid_list, tx_info->tid);
 	}
 
 	port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
@@ -411,6 +418,7 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct ch_ktls_port_stats_debug *port_stats;
 	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_uld_ctx *u_ctx;
 	struct chcr_ktls_info *tx_info;
 	struct dst_entry *dst;
 	struct adapter *adap;
@@ -425,6 +433,7 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 	adap = pi->adapter;
 	port_stats = &adap->ch_ktls_stats.ktls_port[pi->port_id];
 	atomic64_inc(&port_stats->ktls_tx_connection_open);
+	u_ctx = adap->uld[CXGB4_ULD_KTLS].handle;
 
 	if (direction == TLS_OFFLOAD_CTX_DIR_RX) {
 		pr_err("not expecting for RX direction\n");
@@ -434,6 +443,9 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 	if (tx_ctx->chcr_info)
 		goto out;
 
+	if (u_ctx && u_ctx->detach)
+		goto out;
+
 	tx_info = kvzalloc(sizeof(*tx_info), GFP_KERNEL);
 	if (!tx_info)
 		goto out;
@@ -569,6 +581,8 @@ free_tid:
 	cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
 			 tx_info->tid, tx_info->ip_family);
 
+	xa_erase(&u_ctx->tid_list, tx_info->tid);
+
 put_module:
 	/* release module refcount */
 	module_put(THIS_MODULE);
@@ -633,8 +647,12 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap,
 {
 	const struct cpl_act_open_rpl *p = (void *)input;
 	struct chcr_ktls_info *tx_info = NULL;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_uld_ctx *u_ctx;
 	unsigned int atid, tid, status;
+	struct tls_context *tls_ctx;
 	struct tid_info *t;
+	int ret = 0;
 
 	tid = GET_TID(p);
 	status = AOPEN_STATUS_G(ntohl(p->atid_status));
@@ -666,14 +684,29 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap,
 	if (!status) {
 		tx_info->tid = tid;
 		cxgb4_insert_tid(t, tx_info, tx_info->tid, tx_info->ip_family);
+		/* Adding tid */
+		tls_ctx = tls_get_ctx(tx_info->sk);
+		tx_ctx = chcr_get_ktls_tx_context(tls_ctx);
+		u_ctx = adap->uld[CXGB4_ULD_KTLS].handle;
+		if (u_ctx) {
+			ret = xa_insert_bh(&u_ctx->tid_list, tid, tx_ctx,
+					   GFP_NOWAIT);
+			if (ret < 0) {
+				pr_err("%s: Failed to allocate tid XA entry = %d\n",
+				       __func__, tx_info->tid);
+				tx_info->open_state = CH_KTLS_OPEN_FAILURE;
+				goto out;
+			}
+		}
 		tx_info->open_state = CH_KTLS_OPEN_SUCCESS;
 	} else {
 		tx_info->open_state = CH_KTLS_OPEN_FAILURE;
 	}
+out:
 	spin_unlock(&tx_info->lock);
 
 	complete(&tx_info->completion);
-	return 0;
+	return ret;
 }
 
 /*
@@ -2090,6 +2123,8 @@ static void *chcr_ktls_uld_add(const struct cxgb4_lld_info *lldi)
 		goto out;
 	}
 	u_ctx->lldi = *lldi;
+	u_ctx->detach = false;
+	xa_init_flags(&u_ctx->tid_list, XA_FLAGS_LOCK_BH);
 out:
 	return u_ctx;
 }
@@ -2123,6 +2158,45 @@ static int chcr_ktls_uld_rx_handler(void *handle, const __be64 *rsp,
 	return 0;
 }
 
+static void clear_conn_resources(struct chcr_ktls_info *tx_info)
+{
+	/* clear l2t entry */
+	if (tx_info->l2te)
+		cxgb4_l2t_release(tx_info->l2te);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	/* clear clip entry */
+	if (tx_info->ip_family == AF_INET6)
+		cxgb4_clip_release(tx_info->netdev, (const u32 *)
+				   &tx_info->sk->sk_v6_rcv_saddr,
+				   1);
+#endif
+
+	/* clear tid */
+	if (tx_info->tid != -1)
+		cxgb4_remove_tid(&tx_info->adap->tids, tx_info->tx_chan,
+				 tx_info->tid, tx_info->ip_family);
+}
+
+static void ch_ktls_reset_all_conn(struct chcr_ktls_uld_ctx *u_ctx)
+{
+	struct ch_ktls_port_stats_debug *port_stats;
+	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
+	struct chcr_ktls_info *tx_info;
+	unsigned long index;
+
+	xa_for_each(&u_ctx->tid_list, index, tx_ctx) {
+		tx_info = tx_ctx->chcr_info;
+		clear_conn_resources(tx_info);
+		port_stats = &tx_info->adap->ch_ktls_stats.ktls_port[tx_info->port_id];
+		atomic64_inc(&port_stats->ktls_tx_connection_close);
+		kvfree(tx_info);
+		tx_ctx->chcr_info = NULL;
+		/* release module refcount */
+		module_put(THIS_MODULE);
+	}
+}
+
 static int chcr_ktls_uld_state_change(void *handle, enum cxgb4_state new_state)
 {
 	struct chcr_ktls_uld_ctx *u_ctx = handle;
@@ -2139,7 +2213,10 @@ static int chcr_ktls_uld_state_change(void *handle, enum cxgb4_state new_state)
 	case CXGB4_STATE_DETACH:
 		pr_info("%s: Down\n", pci_name(u_ctx->lldi.pdev));
 		mutex_lock(&dev_mutex);
+		u_ctx->detach = true;
 		list_del(&u_ctx->entry);
+		ch_ktls_reset_all_conn(u_ctx);
+		xa_destroy(&u_ctx->tid_list);
 		mutex_unlock(&dev_mutex);
 		break;
 	default:
@@ -2178,6 +2255,7 @@ static void __exit chcr_ktls_exit(void)
 		adap = pci_get_drvdata(u_ctx->lldi.pdev);
 		memset(&adap->ch_ktls_stats, 0, sizeof(adap->ch_ktls_stats));
 		list_del(&u_ctx->entry);
+		xa_destroy(&u_ctx->tid_list);
 		kfree(u_ctx);
 	}
 	mutex_unlock(&dev_mutex);
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
index 18b3b1f024156..10572dc55365a 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.h
@@ -75,6 +75,8 @@ struct chcr_ktls_ofld_ctx_tx {
 struct chcr_ktls_uld_ctx {
 	struct list_head entry;
 	struct cxgb4_lld_info lldi;
+	struct xarray tid_list;
+	bool detach;
 };
 
 static inline struct chcr_ktls_ofld_ctx_tx *
-- 
GitLab


From c7d8302478ae645c2e9b59f2cf125641875b7dc2 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 13 May 2021 12:46:21 +0000
Subject: [PATCH 0675/3804] net: korina: Fix return value check in
 korina_probe()

In case of error, the function devm_platform_ioremap_resource_byname()
returns ERR_PTR() and never returns NULL. The NULL test in the return
value check should be replaced with IS_ERR().

Fixes: b4cd249a8cc0 ("net: korina: Use devres functions")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/korina.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 6f987a7ffcb36..b30a45725374b 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1315,23 +1315,23 @@ static int korina_probe(struct platform_device *pdev)
 	lp->tx_irq = platform_get_irq_byname(pdev, "tx");
 
 	p = devm_platform_ioremap_resource_byname(pdev, "emac");
-	if (!p) {
+	if (IS_ERR(p)) {
 		printk(KERN_ERR DRV_NAME ": cannot remap registers\n");
-		return -ENOMEM;
+		return PTR_ERR(p);
 	}
 	lp->eth_regs = p;
 
 	p = devm_platform_ioremap_resource_byname(pdev, "dma_rx");
-	if (!p) {
+	if (IS_ERR(p)) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Rx DMA registers\n");
-		return -ENOMEM;
+		return PTR_ERR(p);
 	}
 	lp->rx_dma_regs = p;
 
 	p = devm_platform_ioremap_resource_byname(pdev, "dma_tx");
-	if (!p) {
+	if (IS_ERR(p)) {
 		printk(KERN_ERR DRV_NAME ": cannot remap Tx DMA registers\n");
-		return -ENOMEM;
+		return PTR_ERR(p);
 	}
 	lp->tx_dma_regs = p;
 
-- 
GitLab


From e4df1b0c24350a0f00229ff895a91f1072bd850d Mon Sep 17 00:00:00 2001
From: Tao Liu <thomas.liu@ucloud.cn>
Date: Thu, 13 May 2021 21:08:00 +0800
Subject: [PATCH 0676/3804] openvswitch: meter: fix race when getting now_ms.

We have observed meters working unexpected if traffic is 3+Gbit/s
with multiple connections.

now_ms is not pretected by meter->lock, we may get a negative
long_delta_ms when another cpu updated meter->used, then:
    delta_ms = (u32)long_delta_ms;
which will be a large value.

    band->bucket += delta_ms * band->rate;
then we get a wrong band->bucket.

OpenVswitch userspace datapath has fixed the same issue[1] some
time ago, and we port the implementation to kernel datapath.

[1] https://patchwork.ozlabs.org/project/openvswitch/patch/20191025114436.9746-1-i.maximets@ovn.org/

Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure")
Signed-off-by: Tao Liu <thomas.liu@ucloud.cn>
Suggested-by: Ilya Maximets <i.maximets@ovn.org>
Reviewed-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 96b524ceabca4..896b8f5bc8853 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -611,6 +611,14 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 	spin_lock(&meter->lock);
 
 	long_delta_ms = (now_ms - meter->used); /* ms */
+	if (long_delta_ms < 0) {
+		/* This condition means that we have several threads fighting
+		 * for a meter lock, and the one who received the packets a
+		 * bit later wins. Assuming that all racing threads received
+		 * packets at the same time to avoid overflow.
+		 */
+		long_delta_ms = 0;
+	}
 
 	/* Make sure delta_ms will not be too large, so that bucket will not
 	 * wrap around below.
-- 
GitLab


From d6f67afbdf9df5301641b2ef7ac4030abab3e067 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Mon, 10 May 2021 22:39:38 +0900
Subject: [PATCH 0677/3804] btrfs: return 0 for dev_extent_hole_check_zoned
 hole_start in case of error

Commit 7000babddac6 ("btrfs: assign proper values to a bool variable in
dev_extent_hole_check_zoned") assigned false to the hole_start parameter
of dev_extent_hole_check_zoned().

The hole_start parameter is not boolean and returns the start location of
the found hole.

Fixes: 7000babddac6 ("btrfs: assign proper values to a bool variable in dev_extent_hole_check_zoned")
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 77cdb75acc15f..bc53939fef48f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1458,7 +1458,7 @@ static bool dev_extent_hole_check_zoned(struct btrfs_device *device,
 		/* Given hole range was invalid (outside of device) */
 		if (ret == -ERANGE) {
 			*hole_start += *hole_size;
-			*hole_size = false;
+			*hole_size = 0;
 			return true;
 		}
 
-- 
GitLab


From 71795ee590111e3636cc3c148289dfa9fa0a5fc3 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 29 Apr 2021 10:51:34 -0400
Subject: [PATCH 0678/3804] btrfs: avoid RCU stalls while running delayed iputs

Generally a delayed iput is added when we might do the final iput, so
usually we'll end up sleeping while processing the delayed iputs
naturally.  However there's no guarantee of this, especially for small
files.  In production we noticed 5 instances of RCU stalls while testing
a kernel release overnight across 1000 machines, so this is relatively
common:

  host count: 5
  rcu: INFO: rcu_sched self-detected stall on CPU
  rcu: ....: (20998 ticks this GP) idle=59e/1/0x4000000000000002 softirq=12333372/12333372 fqs=3208
   	(t=21031 jiffies g=27810193 q=41075) NMI backtrace for cpu 1
  CPU: 1 PID: 1713 Comm: btrfs-cleaner Kdump: loaded Not tainted 5.6.13-0_fbk12_rc1_5520_gec92bffc1ec9 #1
  Call Trace:
    <IRQ> dump_stack+0x50/0x70
    nmi_cpu_backtrace.cold.6+0x30/0x65
    ? lapic_can_unplug_cpu.cold.30+0x40/0x40
    nmi_trigger_cpumask_backtrace+0xba/0xca
    rcu_dump_cpu_stacks+0x99/0xc7
    rcu_sched_clock_irq.cold.90+0x1b2/0x3a3
    ? trigger_load_balance+0x5c/0x200
    ? tick_sched_do_timer+0x60/0x60
    ? tick_sched_do_timer+0x60/0x60
    update_process_times+0x24/0x50
    tick_sched_timer+0x37/0x70
    __hrtimer_run_queues+0xfe/0x270
    hrtimer_interrupt+0xf4/0x210
    smp_apic_timer_interrupt+0x5e/0x120
    apic_timer_interrupt+0xf/0x20 </IRQ>
   RIP: 0010:queued_spin_lock_slowpath+0x17d/0x1b0
   RSP: 0018:ffffc9000da5fe48 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
   RAX: 0000000000000000 RBX: ffff889fa81d0cd8 RCX: 0000000000000029
   RDX: ffff889fff86c0c0 RSI: 0000000000080000 RDI: ffff88bfc2da7200
   RBP: ffff888f2dcdd768 R08: 0000000001040000 R09: 0000000000000000
   R10: 0000000000000001 R11: ffffffff82a55560 R12: ffff88bfc2da7200
   R13: 0000000000000000 R14: ffff88bff6c2a360 R15: ffffffff814bd870
   ? kzalloc.constprop.57+0x30/0x30
   list_lru_add+0x5a/0x100
   inode_lru_list_add+0x20/0x40
   iput+0x1c1/0x1f0
   run_delayed_iput_locked+0x46/0x90
   btrfs_run_delayed_iputs+0x3f/0x60
   cleaner_kthread+0xf2/0x120
   kthread+0x10b/0x130

Fix this by adding a cond_resched_lock() to the loop processing delayed
iputs so we can avoid these sort of stalls.

CC: stable@vger.kernel.org # 4.9+
Reviewed-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 69fcdf8f0b1c2..095e452f59f0f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3246,6 +3246,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
 		inode = list_first_entry(&fs_info->delayed_iputs,
 				struct btrfs_inode, delayed_iput);
 		run_delayed_iput_locked(fs_info, inode);
+		cond_resched_lock(&fs_info->delayed_iput_lock);
 	}
 	spin_unlock(&fs_info->delayed_iput_lock);
 }
-- 
GitLab


From 15c7745c9a0078edad1f7df5a6bb7b80bc8cca23 Mon Sep 17 00:00:00 2001
From: Boris Burkov <boris@bur.io>
Date: Tue, 6 Apr 2021 15:31:18 -0700
Subject: [PATCH 0679/3804] btrfs: return whole extents in fiemap

  `xfs_io -c 'fiemap <off> <len>' <file>`

can give surprising results on btrfs that differ from xfs.

btrfs prints out extents trimmed to fit the user input. If the user's
fiemap request has an offset, then rather than returning each whole
extent which intersects that range, we also trim the start extent to not
have start < off.

Documentation in filesystems/fiemap.txt and the xfs_io man page suggests
that returning the whole extent is expected.

Some cases which all yield the same fiemap in xfs, but not btrfs:
  dd if=/dev/zero of=$f bs=4k count=1
  sudo xfs_io -c 'fiemap 0 1024' $f
    0: [0..7]: 26624..26631
  sudo xfs_io -c 'fiemap 2048 1024' $f
    0: [4..7]: 26628..26631
  sudo xfs_io -c 'fiemap 2048 4096' $f
    0: [4..7]: 26628..26631
  sudo xfs_io -c 'fiemap 3584 512' $f
    0: [7..7]: 26631..26631
  sudo xfs_io -c 'fiemap 4091 5' $f
    0: [7..6]: 26631..26630

I believe this is a consequence of the logic for merging contiguous
extents represented by separate extent items. That logic needs to track
the last offset as it loops through the extent items, which happens to
pick up the start offset on the first iteration, and trim off the
beginning of the full extent. To fix it, start `off` at 0 rather than
`start` so that we keep the iteration/merging intact without cutting off
the start of the extent.

after the fix, all the above commands give:

  0: [0..7]: 26624..26631

The merging logic is exercised by fstest generic/483, and I have written
a new fstest for checking we don't have backwards or zero-length fiemaps
for cases like those above.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Boris Burkov <boris@bur.io>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f2d1bb2343779..360d997c72263 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5210,7 +5210,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
 		  u64 start, u64 len)
 {
 	int ret = 0;
-	u64 off = start;
+	u64 off;
 	u64 max = start + len;
 	u32 flags = 0;
 	u32 found_type;
@@ -5245,6 +5245,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
 		goto out_free_ulist;
 	}
 
+	/*
+	 * We can't initialize that to 'start' as this could miss extents due
+	 * to extent item merging
+	 */
+	off = 0;
 	start = round_down(start, btrfs_inode_sectorsize(inode));
 	len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
 
-- 
GitLab


From 54a40fc3a1da21b52dbf19f72fdc27a2ec740760 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 12 May 2021 16:27:16 +0100
Subject: [PATCH 0680/3804] btrfs: fix removed dentries still existing after
 log is synced

When we move one inode from one directory to another and both the inode
and its previous parent directory were logged before, we are not supposed
to have the dentry for the old parent if we have a power failure after the
log is synced. Only the new dentry is supposed to exist.

Generally this works correctly, however there is a scenario where this is
not currently working, because the old parent of the file/directory that
was moved is not authoritative for a range that includes the dir index and
dir item keys of the old dentry. This case is better explained with the
following example and reproducer:

  # The test requires a very specific layout of keys and items in the
  # fs/subvolume btree to trigger the bug. So we want to make sure that
  # on whatever platform we are, we have the same leaf/node size.
  #
  # Currently in btrfs the node/leaf size can not be smaller than the page
  # size (but it can be greater than the page size). So use the largest
  # supported node/leaf size (64K).

  $ mkfs.btrfs -f -n 65536 /dev/sdc
  $ mount /dev/sdc /mnt

  # "testdir" is inode 257.
  $ mkdir /mnt/testdir
  $ chmod 755 /mnt/testdir

  # Create several empty files to have the directory "testdir" with its
  # items spread over several leaves (7 in this case).
  $ for ((i = 1; i <= 1200; i++)); do
       echo -n > /mnt/testdir/file$i
    done

  # Create our test directory "dira", inode number 1458, which gets all
  # its items in leaf 7.
  #
  # The BTRFS_DIR_ITEM_KEY item for inode 257 ("testdir") that points to
  # the entry named "dira" is in leaf 2, while the BTRFS_DIR_INDEX_KEY
  # item that points to that entry is in leaf 3.
  #
  # For this particular filesystem node size (64K), file count and file
  # names, we endup with the directory entry items from inode 257 in
  # leaves 2 and 3, as previously mentioned - what matters for triggering
  # the bug exercised by this test case is that those items are not placed
  # in leaf 1, they must be placed in a leaf different from the one
  # containing the inode item for inode 257.
  #
  # The corresponding BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY items for
  # the parent inode (257) are the following:
  #
  #    item 460 key (257 DIR_ITEM 3724298081) itemoff 48344 itemsize 34
  #         location key (1458 INODE_ITEM 0) type DIR
  #         transid 6 data_len 0 name_len 4
  #         name: dira
  #
  # and:
  #
  #    item 771 key (257 DIR_INDEX 1202) itemoff 36673 itemsize 34
  #         location key (1458 INODE_ITEM 0) type DIR
  #         transid 6 data_len 0 name_len 4
  #         name: dira

  $ mkdir /mnt/testdir/dira

  # Make sure everything done so far is durably persisted.
  $ sync

  # Now do a change to inode 257 ("testdir") that does not result in
  # COWing leaves 2 and 3 - the leaves that contain the directory items
  # pointing to inode 1458 (directory "dira").
  #
  # Changing permissions, the owner/group, updating or adding a xattr,
  # etc, will not change (COW) leaves 2 and 3. So for the sake of
  # simplicity change the permissions of inode 257, which results in
  # updating its inode item and therefore change (COW) only leaf 1.

  $ chmod 700 /mnt/testdir

  # Now fsync directory inode 257.
  #
  # Since only the first leaf was changed/COWed, we log the inode item of
  # inode 257 and only the dentries found in the first leaf, all have a
  # key type of BTRFS_DIR_ITEM_KEY, and no keys of type
  # BTRFS_DIR_INDEX_KEY, because they sort after the former type and none
  # exist in the first leaf.
  #
  # We also log 3 items that represent ranges for dir items and dir
  # indexes for which the log is authoritative:
  #
  # 1) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is
  #    authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset
  #    in the range [0, 2285968570] (the offset here is the crc32c of the
  #    dentry's name). The value 2285968570 corresponds to the offset of
  #    the first key of leaf 2 (which is of type BTRFS_DIR_ITEM_KEY);
  #
  # 2) a key of type BTRFS_DIR_LOG_ITEM_KEY, which indicates the log is
  #    authoritative for all BTRFS_DIR_ITEM_KEY keys that have an offset
  #    in the range [4293818216, (u64)-1] (the offset here is the crc32c
  #    of the dentry's name). The value 4293818216 corresponds to the
  #    offset of the highest key of type BTRFS_DIR_ITEM_KEY plus 1
  #    (4293818215 + 1), which is located in leaf 2;
  #
  # 3) a key of type BTRFS_DIR_LOG_INDEX_KEY, with an offset of 1203,
  #    which indicates the log is authoritative for all keys of type
  #    BTRFS_DIR_INDEX_KEY that have an offset in the range
  #    [1203, (u64)-1]. The value 1203 corresponds to the offset of the
  #    last key of type BTRFS_DIR_INDEX_KEY plus 1 (1202 + 1), which is
  #    located in leaf 3;
  #
  # Also, because "testdir" is a directory and inode 1458 ("dira") is a
  # child directory, we log inode 1458 too.

  $ xfs_io -c "fsync" /mnt/testdir

  # Now move "dira", inode 1458, to be a child of the root directory
  # (inode 256).
  #
  # Because this inode was previously logged, when "testdir" was fsynced,
  # the log is updated so that the old inode reference, referring to inode
  # 257 as the parent, is deleted and the new inode reference, referring
  # to inode 256 as the parent, is added to the log.

  $ mv /mnt/testdir/dira /mnt

  # Now change some file and fsync it. This guarantees the log changes
  # made by the previous move/rename operation are persisted. We do not
  # need to do any special modification to the file, just any change to
  # any file and sync the log.

  $ xfs_io -c "pwrite -S 0xab 0 64K" -c "fsync" /mnt/testdir/file1

  # Simulate a power failure and then mount again the filesystem to
  # replay the log tree. We want to verify that we are able to mount the
  # filesystem, meaning log replay was successful, and that directory
  # inode 1458 ("dira") only has inode 256 (the filesystem's root) as
  # its parent (and no longer a child of inode 257).
  #
  # It used to happen that during log replay we would end up having
  # inode 1458 (directory "dira") with 2 hard links, being a child of
  # inode 257 ("testdir") and inode 256 (the filesystem's root). This
  # resulted in the tree checker detecting the issue and causing the
  # mount operation to fail (with -EIO).
  #
  # This happened because in the log we have the new name/parent for
  # inode 1458, which results in adding the new dentry with inode 256
  # as the parent, but the previous dentry, under inode 257 was never
  # removed - this is because the ranges for dir items and dir indexes
  # of inode 257 for which the log is authoritative do not include the
  # old dir item and dir index for the dentry of inode 257 referring to
  # inode 1458:
  #
  # - for dir items, the log is authoritative for the ranges
  #   [0, 2285968570] and [4293818216, (u64)-1]. The dir item at inode 257
  #   pointing to inode 1458 has a key of (257 DIR_ITEM 3724298081), as
  #   previously mentioned, so the dir item is not deleted when the log
  #   replay procedure processes the authoritative ranges, as 3724298081
  #   is outside both ranges;
  #
  # - for dir indexes, the log is authoritative for the range
  #   [1203, (u64)-1], and the dir index item of inode 257 pointing to
  #   inode 1458 has a key of (257 DIR_INDEX 1202), as previously
  #   mentioned, so the dir index item is not deleted when the log
  #   replay procedure processes the authoritative range.

  <power failure>

  $ mount /dev/sdc /mnt
  mount: /mnt: can't read superblock on /dev/sdc.

  $ dmesg
  (...)
  [87849.840509] BTRFS info (device sdc): start tree-log replay
  [87849.875719] BTRFS critical (device sdc): corrupt leaf: root=5 block=30539776 slot=554 ino=1458, invalid nlink: has 2 expect no more than 1 for dir
  [87849.878084] BTRFS info (device sdc): leaf 30539776 gen 7 total ptrs 557 free space 2092 owner 5
  [87849.879516] BTRFS info (device sdc): refs 1 lock_owner 0 current 2099108
  [87849.880613] 	item 0 key (1181 1 0) itemoff 65275 itemsize 160
  [87849.881544] 		inode generation 6 size 0 mode 100644
  [87849.882692] 	item 1 key (1181 12 257) itemoff 65258 itemsize 17
  (...)
  [87850.562549] 	item 556 key (1458 12 257) itemoff 16017 itemsize 14
  [87850.563349] BTRFS error (device dm-0): block=30539776 write time tree block corruption detected
  [87850.564386] ------------[ cut here ]------------
  [87850.564920] WARNING: CPU: 3 PID: 2099108 at fs/btrfs/disk-io.c:465 csum_one_extent_buffer+0xed/0x100 [btrfs]
  [87850.566129] Modules linked in: btrfs dm_zero dm_snapshot (...)
  [87850.573789] CPU: 3 PID: 2099108 Comm: mount Not tainted 5.12.0-rc8-btrfs-next-86 #1
  (...)
  [87850.587481] Call Trace:
  [87850.587768]  btree_csum_one_bio+0x244/0x2b0 [btrfs]
  [87850.588354]  ? btrfs_bio_fits_in_stripe+0xd8/0x110 [btrfs]
  [87850.589003]  btrfs_submit_metadata_bio+0xb7/0x100 [btrfs]
  [87850.589654]  submit_one_bio+0x61/0x70 [btrfs]
  [87850.590248]  submit_extent_page+0x91/0x2f0 [btrfs]
  [87850.590842]  write_one_eb+0x175/0x440 [btrfs]
  [87850.591370]  ? find_extent_buffer_nolock+0x1c0/0x1c0 [btrfs]
  [87850.592036]  btree_write_cache_pages+0x1e6/0x610 [btrfs]
  [87850.592665]  ? free_debug_processing+0x1d5/0x240
  [87850.593209]  do_writepages+0x43/0xf0
  [87850.593798]  ? __filemap_fdatawrite_range+0xa4/0x100
  [87850.594391]  __filemap_fdatawrite_range+0xc5/0x100
  [87850.595196]  btrfs_write_marked_extents+0x68/0x160 [btrfs]
  [87850.596202]  btrfs_write_and_wait_transaction.isra.0+0x4d/0xd0 [btrfs]
  [87850.597377]  btrfs_commit_transaction+0x794/0xca0 [btrfs]
  [87850.598455]  ? _raw_spin_unlock_irqrestore+0x32/0x60
  [87850.599305]  ? kmem_cache_free+0x15a/0x3d0
  [87850.600029]  btrfs_recover_log_trees+0x346/0x380 [btrfs]
  [87850.601021]  ? replay_one_extent+0x7d0/0x7d0 [btrfs]
  [87850.601988]  open_ctree+0x13c9/0x1698 [btrfs]
  [87850.602846]  btrfs_mount_root.cold+0x13/0xed [btrfs]
  [87850.603771]  ? kmem_cache_alloc_trace+0x7c9/0x930
  [87850.604576]  ? vfs_parse_fs_string+0x5d/0xb0
  [87850.605293]  ? kfree+0x276/0x3f0
  [87850.605857]  legacy_get_tree+0x30/0x50
  [87850.606540]  vfs_get_tree+0x28/0xc0
  [87850.607163]  fc_mount+0xe/0x40
  [87850.607695]  vfs_kern_mount.part.0+0x71/0x90
  [87850.608440]  btrfs_mount+0x13b/0x3e0 [btrfs]
  (...)
  [87850.629477] ---[ end trace 68802022b99a1ea0 ]---
  [87850.630849] BTRFS: error (device sdc) in btrfs_commit_transaction:2381: errno=-5 IO failure (Error while writing out transaction)
  [87850.632422] BTRFS warning (device sdc): Skipping commit of aborted transaction.
  [87850.633416] BTRFS: error (device sdc) in cleanup_transaction:1978: errno=-5 IO failure
  [87850.634553] BTRFS: error (device sdc) in btrfs_replay_log:2431: errno=-5 IO failure (Failed to recover log tree)
  [87850.637529] BTRFS error (device sdc): open_ctree failed

In this example the inode we moved was a directory, so it was easy to
detect the problem because directories can only have one hard link and
the tree checker immediately detects that. If the moved inode was a file,
then the log replay would succeed and we would end up having both the
new hard link (/mnt/foo) and the old hard link (/mnt/testdir/foo) present,
but only the new one should be present.

Fix this by forcing re-logging of the old parent directory when logging
the new name during a rename operation. This ensures we end up with a log
that is authoritative for a range covering the keys for the old dentry,
therefore causing the old dentry do be deleted when replaying the log.

A test case for fstests will follow up soon.

Fixes: 64d6b281ba4db0 ("btrfs: remove unnecessary check_parent_dirs_for_sync()")
CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a0fc3a1390ab3..fd6b1f13112ed 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6462,6 +6462,24 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	    (!old_dir || old_dir->logged_trans < trans->transid))
 		return;
 
+	/*
+	 * If we are doing a rename (old_dir is not NULL) from a directory that
+	 * was previously logged, make sure the next log attempt on the directory
+	 * is not skipped and logs the inode again. This is because the log may
+	 * not currently be authoritative for a range including the old
+	 * BTRFS_DIR_ITEM_KEY and BTRFS_DIR_INDEX_KEY keys, so we want to make
+	 * sure after a log replay we do not end up with both the new and old
+	 * dentries around (in case the inode is a directory we would have a
+	 * directory with two hard links and 2 inode references for different
+	 * parents). The next log attempt of old_dir will happen at
+	 * btrfs_log_all_parents(), called through btrfs_log_inode_parent()
+	 * below, because we have previously set inode->last_unlink_trans to the
+	 * current transaction ID, either here or at btrfs_record_unlink_dir() in
+	 * case inode is a directory.
+	 */
+	if (old_dir)
+		old_dir->logged_trans = 0;
+
 	btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
 	ctx.logging_new_name = true;
 	/*
-- 
GitLab


From c07531c01d8284aedaf95708ea90e76d11af0e21 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Mon, 10 May 2021 14:50:24 +0300
Subject: [PATCH 0681/3804] netfilter: flowtable: Remove redundant hw refresh
 bit

Offloading conns could fail for multiple reasons and a hw refresh bit is
set to try to reoffload it in next sw packet.
But it could be in some cases and future points that the hw refresh bit
is not set but a refresh could succeed.
Remove the hw refresh bit and do offload refresh if requested.
There won't be a new work entry if a work is already pending
anyway as there is the hw pending bit.

Fixes: 8b3646d6e0c4 ("net/sched: act_ct: Support refreshing the flow table entries")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h | 1 -
 net/netfilter/nf_flow_table_core.c    | 3 +--
 net/netfilter/nf_flow_table_offload.c | 7 ++++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 51d8eb99764dc..48ef7460ff304 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -157,7 +157,6 @@ enum nf_flow_flags {
 	NF_FLOW_HW,
 	NF_FLOW_HW_DYING,
 	NF_FLOW_HW_DEAD,
-	NF_FLOW_HW_REFRESH,
 	NF_FLOW_HW_PENDING,
 };
 
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 39c02d1aeedfa..1d02650dd715a 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -306,8 +306,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
 {
 	flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
 
-	if (likely(!nf_flowtable_hw_offload(flow_table) ||
-		   !test_and_clear_bit(NF_FLOW_HW_REFRESH, &flow->flags)))
+	if (likely(!nf_flowtable_hw_offload(flow_table)))
 		return;
 
 	nf_flow_offload_add(flow_table, flow);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 2af7bdb384077..528b2f1726844 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -902,10 +902,11 @@ static void flow_offload_work_add(struct flow_offload_work *offload)
 
 	err = flow_offload_rule_add(offload, flow_rule);
 	if (err < 0)
-		set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags);
-	else
-		set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
+		goto out;
+
+	set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
 
+out:
 	nf_flow_offload_destroy(flow_rule);
 }
 
-- 
GitLab


From f0b3d338064e1fe7531f0d2977e35f3b334abfb4 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Mon, 10 May 2021 07:58:22 +0200
Subject: [PATCH 0682/3804] netfilter: nft_set_pipapo_avx2: Add
 irq_fpu_usable() check, fallback to non-AVX2 version

Arturo reported this backtrace:

[709732.358791] WARNING: CPU: 3 PID: 456 at arch/x86/kernel/fpu/core.c:128 kernel_fpu_begin_mask+0xae/0xe0
[709732.358793] Modules linked in: binfmt_misc nft_nat nft_chain_nat nf_nat nft_counter nft_ct nf_tables nf_conntrack_netlink nfnetlink 8021q garp stp mrp llc vrf intel_rapl_msr intel_rapl_common skx_edac nfit libnvdimm ipmi_ssif x86_pkg_temp_thermal intel_powerclamp coretemp crc32_pclmul mgag200 ghash_clmulni_intel drm_kms_helper cec aesni_intel drm libaes crypto_simd cryptd glue_helper mei_me dell_smbios iTCO_wdt evdev intel_pmc_bxt iTCO_vendor_support dcdbas pcspkr rapl dell_wmi_descriptor wmi_bmof sg i2c_algo_bit watchdog mei acpi_ipmi ipmi_si button nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ipmi_devintf ipmi_msghandler ip_tables x_tables autofs4 ext4 crc16 mbcache jbd2 dm_mod raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor sd_mod t10_pi crc_t10dif crct10dif_generic raid6_pq libcrc32c crc32c_generic raid1 raid0 multipath linear md_mod ahci libahci tg3 libata xhci_pci libphy xhci_hcd ptp usbcore crct10dif_pclmul crct10dif_common bnxt_en crc32c_intel scsi_mod
[709732.358941]  pps_core i2c_i801 lpc_ich i2c_smbus wmi usb_common
[709732.358957] CPU: 3 PID: 456 Comm: jbd2/dm-0-8 Not tainted 5.10.0-0.bpo.5-amd64 #1 Debian 5.10.24-1~bpo10+1
[709732.358959] Hardware name: Dell Inc. PowerEdge R440/04JN2K, BIOS 2.9.3 09/23/2020
[709732.358964] RIP: 0010:kernel_fpu_begin_mask+0xae/0xe0
[709732.358969] Code: ae 54 24 04 83 e3 01 75 38 48 8b 44 24 08 65 48 33 04 25 28 00 00 00 75 33 48 83 c4 10 5b c3 65 8a 05 5e 21 5e 76 84 c0 74 92 <0f> 0b eb 8e f0 80 4f 01 40 48 81 c7 00 14 00 00 e8 dd fb ff ff eb
[709732.358972] RSP: 0018:ffffbb9700304740 EFLAGS: 00010202
[709732.358976] RAX: 0000000000000001 RBX: 0000000000000003 RCX: 0000000000000001
[709732.358979] RDX: ffffbb9700304970 RSI: ffff922fe1952e00 RDI: 0000000000000003
[709732.358981] RBP: ffffbb9700304970 R08: ffff922fc868a600 R09: ffff922fc711e462
[709732.358984] R10: 000000000000005f R11: ffff922ff0b27180 R12: ffffbb9700304960
[709732.358987] R13: ffffbb9700304b08 R14: ffff922fc664b6c8 R15: ffff922fc664b660
[709732.358990] FS:  0000000000000000(0000) GS:ffff92371fec0000(0000) knlGS:0000000000000000
[709732.358993] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[709732.358996] CR2: 0000557a6655bdd0 CR3: 000000026020a001 CR4: 00000000007706e0
[709732.358999] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[709732.359001] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[709732.359003] PKRU: 55555554
[709732.359005] Call Trace:
[709732.359009]  <IRQ>
[709732.359035]  nft_pipapo_avx2_lookup+0x4c/0x1cba [nf_tables]
[709732.359046]  ? sched_clock+0x5/0x10
[709732.359054]  ? sched_clock_cpu+0xc/0xb0
[709732.359061]  ? record_times+0x16/0x80
[709732.359068]  ? plist_add+0xc1/0x100
[709732.359073]  ? psi_group_change+0x47/0x230
[709732.359079]  ? skb_clone+0x4d/0xb0
[709732.359085]  ? enqueue_task_rt+0x22b/0x310
[709732.359098]  ? bnxt_start_xmit+0x1e8/0xaf0 [bnxt_en]
[709732.359102]  ? packet_rcv+0x40/0x4a0
[709732.359121]  nft_lookup_eval+0x59/0x160 [nf_tables]
[709732.359133]  nft_do_chain+0x350/0x500 [nf_tables]
[709732.359152]  ? nft_lookup_eval+0x59/0x160 [nf_tables]
[709732.359163]  ? nft_do_chain+0x364/0x500 [nf_tables]
[709732.359172]  ? fib4_rule_action+0x6d/0x80
[709732.359178]  ? fib_rules_lookup+0x107/0x250
[709732.359184]  nft_nat_do_chain+0x8a/0xf2 [nft_chain_nat]
[709732.359193]  nf_nat_inet_fn+0xea/0x210 [nf_nat]
[709732.359202]  nf_nat_ipv4_out+0x14/0xa0 [nf_nat]
[709732.359207]  nf_hook_slow+0x44/0xc0
[709732.359214]  ip_output+0xd2/0x100
[709732.359221]  ? __ip_finish_output+0x210/0x210
[709732.359226]  ip_forward+0x37d/0x4a0
[709732.359232]  ? ip4_key_hashfn+0xb0/0xb0
[709732.359238]  ip_sublist_rcv_finish+0x4f/0x60
[709732.359243]  ip_sublist_rcv+0x196/0x220
[709732.359250]  ? ip_rcv_finish_core.isra.22+0x400/0x400
[709732.359255]  ip_list_rcv+0x137/0x160
[709732.359264]  __netif_receive_skb_list_core+0x29b/0x2c0
[709732.359272]  netif_receive_skb_list_internal+0x1a6/0x2d0
[709732.359280]  gro_normal_list.part.156+0x19/0x40
[709732.359286]  napi_complete_done+0x67/0x170
[709732.359298]  bnxt_poll+0x105/0x190 [bnxt_en]
[709732.359304]  ? irqentry_exit+0x29/0x30
[709732.359309]  ? asm_common_interrupt+0x1e/0x40
[709732.359315]  net_rx_action+0x144/0x3c0
[709732.359322]  __do_softirq+0xd5/0x29c
[709732.359329]  asm_call_irq_on_stack+0xf/0x20
[709732.359332]  </IRQ>
[709732.359339]  do_softirq_own_stack+0x37/0x40
[709732.359346]  irq_exit_rcu+0x9d/0xa0
[709732.359353]  common_interrupt+0x78/0x130
[709732.359358]  asm_common_interrupt+0x1e/0x40
[709732.359366] RIP: 0010:crc_41+0x0/0x1e [crc32c_intel]
[709732.359370] Code: ff ff f2 4d 0f 38 f1 93 a8 fe ff ff f2 4c 0f 38 f1 81 b0 fe ff ff f2 4c 0f 38 f1 8a b0 fe ff ff f2 4d 0f 38 f1 93 b0 fe ff ff <f2> 4c 0f 38 f1 81 b8 fe ff ff f2 4c 0f 38 f1 8a b8 fe ff ff f2 4d
[709732.359373] RSP: 0018:ffffbb97008dfcd0 EFLAGS: 00000246
[709732.359377] RAX: 000000000000002a RBX: 0000000000000400 RCX: ffff922fc591dd50
[709732.359379] RDX: ffff922fc591dea0 RSI: 0000000000000a14 RDI: ffffffffc00dddc0
[709732.359382] RBP: 0000000000001000 R08: 000000000342d8c3 R09: 0000000000000000
[709732.359384] R10: 0000000000000000 R11: ffff922fc591dff0 R12: ffffbb97008dfe58
[709732.359386] R13: 000000000000000a R14: ffff922fd2b91e80 R15: ffff922fef83fe38
[709732.359395]  ? crc_43+0x1e/0x1e [crc32c_intel]
[709732.359403]  ? crc32c_pcl_intel_update+0x97/0xb0 [crc32c_intel]
[709732.359419]  ? jbd2_journal_commit_transaction+0xaec/0x1a30 [jbd2]
[709732.359425]  ? irq_exit_rcu+0x3e/0xa0
[709732.359447]  ? kjournald2+0xbd/0x270 [jbd2]
[709732.359454]  ? finish_wait+0x80/0x80
[709732.359470]  ? commit_timeout+0x10/0x10 [jbd2]
[709732.359476]  ? kthread+0x116/0x130
[709732.359481]  ? kthread_park+0x80/0x80
[709732.359488]  ? ret_from_fork+0x1f/0x30
[709732.359494] ---[ end trace 081a19978e5f09f5 ]---

that is, nft_pipapo_avx2_lookup() uses the FPU running from a softirq
that interrupted a kthread, also using the FPU.

That's exactly the reason why irq_fpu_usable() is there: use it, and
if we can't use the FPU, fall back to the non-AVX2 version of the
lookup operation, i.e. nft_pipapo_lookup().

Reported-by: Arturo Borrero Gonzalez <arturo@netfilter.org>
Cc: <stable@vger.kernel.org> # 5.6.x
Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_pipapo.c      | 4 ++--
 net/netfilter/nft_set_pipapo.h      | 2 ++
 net/netfilter/nft_set_pipapo_avx2.c | 3 +++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 528a2d7ca9918..dce866d93feed 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -408,8 +408,8 @@ int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
  *
  * Return: true on match, false otherwise.
  */
-static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
-			      const u32 *key, const struct nft_set_ext **ext)
+bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext)
 {
 	struct nft_pipapo *priv = nft_set_priv(set);
 	unsigned long *res_map, *fill_map;
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 25a75591583eb..d84afb8fa79a1 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -178,6 +178,8 @@ struct nft_pipapo_elem {
 
 int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst,
 		  union nft_pipapo_map_bucket *mt, bool match_only);
+bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
+		       const u32 *key, const struct nft_set_ext **ext);
 
 /**
  * pipapo_and_field_buckets_4bit() - Intersect 4-bit buckets
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index d65ae0e23028d..eabdb8d552eef 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1131,6 +1131,9 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
 	bool map_index;
 	int i, ret = 0;
 
+	if (unlikely(!irq_fpu_usable()))
+		return nft_pipapo_lookup(net, set, key, ext);
+
 	m = rcu_dereference(priv->match);
 
 	/* This also protects access to all data related to scratch maps */
-- 
GitLab


From 41f45fb045bcc20e71eb705b361356e715682162 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Thu, 13 May 2021 13:41:41 +0200
Subject: [PATCH 0683/3804] x86/asm: Make <asm/asm.h> valid on cross-builds as
 well

Stephen Rothwell reported that the objtool cross-build breaks on
non-x86 hosts:

  > tools/arch/x86/include/asm/asm.h:185:24: error: invalid register name for 'current_stack_pointer'
  >   185 | register unsigned long current_stack_pointer asm(_ASM_SP);
  >       |                        ^~~~~~~~~~~~~~~~~~~~~

The PowerPC host obviously doesn't know much about x86 register names.

Protect the kernel-specific bits of <asm/asm.h>, so that it can be
included by tooling and cross-built.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/asm.h       | 4 ++++
 tools/arch/x86/include/asm/asm.h | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 507a37a460276..3ad3da9a7d974 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -120,6 +120,8 @@
 # define CC_OUT(c) [_cc_ ## c] "=qm"
 #endif
 
+#ifdef __KERNEL__
+
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE_HANDLE(from, to, handler)			\
@@ -186,4 +188,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
 #endif /* __ASSEMBLY__ */
 
+#endif /* __KERNEL__ */
+
 #endif /* _ASM_X86_ASM_H */
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
index 507a37a460276..3ad3da9a7d974 100644
--- a/tools/arch/x86/include/asm/asm.h
+++ b/tools/arch/x86/include/asm/asm.h
@@ -120,6 +120,8 @@
 # define CC_OUT(c) [_cc_ ## c] "=qm"
 #endif
 
+#ifdef __KERNEL__
+
 /* Exception table entry */
 #ifdef __ASSEMBLY__
 # define _ASM_EXTABLE_HANDLE(from, to, handler)			\
@@ -186,4 +188,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP);
 #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
 #endif /* __ASSEMBLY__ */
 
+#endif /* __KERNEL__ */
+
 #endif /* _ASM_X86_ASM_H */
-- 
GitLab


From d46f61b20b060f03b58fde170ee618f17dc6f99d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 13 May 2021 16:16:47 +0200
Subject: [PATCH 0684/3804] jump_label/x86: Remove unused JUMP_LABEL_NOP_SIZE

JUMP_LABEL_NOP_SIZE is now unused, remove it.

Fixes: 001951bea748 ("jump_label, x86: Add variable length patching support")
Reported-by: Miroslav Benes <mbenes@suse.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/YJ00zxsvocDV5vLU@hirez.programming.kicks-ass.net
---
 arch/x86/kernel/jump_label.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index a762dc1c615eb..674906fad43b1 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -17,8 +17,6 @@
 #include <asm/text-patching.h>
 #include <asm/insn.h>
 
-#define JUMP_LABEL_NOP_SIZE	JMP32_INSN_SIZE
-
 int arch_jump_entry_size(struct jump_entry *entry)
 {
 	struct insn insn = {};
-- 
GitLab


From 48001d26c19f02c33795829ec9fc71a0d8d42413 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 13 May 2021 16:15:50 +0200
Subject: [PATCH 0685/3804] objtool: Reflow handle_jump_alt()

Miroslav figured the code flow in handle_jump_alt() was sub-optimal
with that goto. Reflow the code to make it clearer.

Reported-by: Miroslav Benes <mbenes@suse.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/YJ00lgslY+IpA/rL@hirez.programming.kicks-ass.net
---
 tools/objtool/check.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 2c6a93edf27ec..e5947fbb9e7a6 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1225,17 +1225,9 @@ static int handle_jump_alt(struct objtool_file *file,
 			   struct instruction *orig_insn,
 			   struct instruction **new_insn)
 {
-	if (orig_insn->type == INSN_NOP) {
-do_nop:
-		if (orig_insn->len == 2)
-			file->jl_nop_short++;
-		else
-			file->jl_nop_long++;
+	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL &&
+	    orig_insn->type != INSN_NOP) {
 
-		return 0;
-	}
-
-	if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
 		WARN_FUNC("unsupported instruction at jump label",
 			  orig_insn->sec, orig_insn->offset);
 		return -1;
@@ -1252,7 +1244,15 @@ do_nop:
 			       orig_insn->offset, orig_insn->len,
 			       arch_nop_insn(orig_insn->len));
 		orig_insn->type = INSN_NOP;
-		goto do_nop;
+	}
+
+	if (orig_insn->type == INSN_NOP) {
+		if (orig_insn->len == 2)
+			file->jl_nop_short++;
+		else
+			file->jl_nop_long++;
+
+		return 0;
 	}
 
 	if (orig_insn->len == 2)
-- 
GitLab


From 8ec7791bae1327b1c279c5cd6e929c3b12daaf0a Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 May 2021 14:49:58 +1000
Subject: [PATCH 0686/3804] powerpc/64s: Fix crashes when toggling stf barrier

The STF (store-to-load forwarding) barrier mitigation can be
enabled/disabled at runtime via a debugfs file (stf_barrier), which
causes the kernel to patch itself to enable/disable the relevant
mitigations.

However depending on which mitigation we're using, it may not be safe to
do that patching while other CPUs are active. For example the following
crash:

  User access of kernel address (c00000003fff5af0) - exploit attempt? (uid: 0)
  segfault (11) at c00000003fff5af0 nip 7fff8ad12198 lr 7fff8ad121f8 code 1
  code: 40820128 e93c00d0 e9290058 7c292840 40810058 38600000 4bfd9a81 e8410018
  code: 2c030006 41810154 3860ffb6 e9210098 <e94d8ff0> 7d295279 39400000 40820a3c

Shows that we returned to userspace without restoring the user r13
value, due to executing the partially patched STF exit code.

Fix it by doing the patching under stop machine. The CPUs that aren't
doing the patching will be spinning in the core of the stop machine
logic. That is currently sufficient for our purposes, because none of
the patching we do is to that code or anywhere in the vicinity.

Fixes: a048a07d7f45 ("powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit")
Cc: stable@vger.kernel.org # v4.17+
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210506044959.1298123-1-mpe@ellerman.id.au
---
 arch/powerpc/lib/feature-fixups.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 1fd31b4b0e139..10083add8b336 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -14,6 +14,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
 #include <asm/cputable.h>
 #include <asm/code-patching.h>
 #include <asm/page.h>
@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
 		                                           : "unknown");
 }
 
+static int __do_stf_barrier_fixups(void *data)
+{
+	enum stf_barrier_type *types = data;
+
+	do_stf_entry_barrier_fixups(*types);
+	do_stf_exit_barrier_fixups(*types);
+
+	return 0;
+}
 
 void do_stf_barrier_fixups(enum stf_barrier_type types)
 {
-	do_stf_entry_barrier_fixups(types);
-	do_stf_exit_barrier_fixups(types);
+	/*
+	 * The call to the fallback entry flush, and the fallback/sync-ori exit
+	 * flush can not be safely patched in/out while other CPUs are executing
+	 * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
+	 * spin in the stop machine core with interrupts hard disabled.
+	 */
+	stop_machine(__do_stf_barrier_fixups, &types, NULL);
 }
 
 void do_uaccess_flush_fixups(enum l1d_flush_type types)
-- 
GitLab


From aec86b052df6541cc97c5fca44e5934cbea4963b Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 May 2021 14:49:59 +1000
Subject: [PATCH 0687/3804] powerpc/64s: Fix crashes when toggling entry flush
 barrier

The entry flush mitigation can be enabled/disabled at runtime via a
debugfs file (entry_flush), which causes the kernel to patch itself to
enable/disable the relevant mitigations.

However depending on which mitigation we're using, it may not be safe to
do that patching while other CPUs are active. For example the following
crash:

  sleeper[15639]: segfault (11) at c000000000004c20 nip c000000000004c20 lr c000000000004c20

Shows that we returned to userspace with a corrupted LR that points into
the kernel, due to executing the partially patched call to the fallback
entry flush (ie. we missed the LR restore).

Fix it by doing the patching under stop machine. The CPUs that aren't
doing the patching will be spinning in the core of the stop machine
logic. That is currently sufficient for our purposes, because none of
the patching we do is to that code or anywhere in the vicinity.

Fixes: f79643787e0a ("powerpc/64s: flush L1D on kernel entry")
Cc: stable@vger.kernel.org # v5.10+
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210506044959.1298123-2-mpe@ellerman.id.au
---
 arch/powerpc/lib/feature-fixups.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 10083add8b336..0aefa6a4a259b 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -299,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
 						: "unknown");
 }
 
-void do_entry_flush_fixups(enum l1d_flush_type types)
+static int __do_entry_flush_fixups(void *data)
 {
+	enum l1d_flush_type types = *(enum l1d_flush_type *)data;
 	unsigned int instrs[3], *dest;
 	long *start, *end;
 	int i;
@@ -369,6 +370,19 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
 							: "ori type" :
 		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
 						: "unknown");
+
+	return 0;
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+	/*
+	 * The call to the fallback flush can not be safely patched in/out while
+	 * other CPUs are executing it. So call __do_entry_flush_fixups() on one
+	 * CPU while all other CPUs spin in the stop machine core with interrupts
+	 * hard disabled.
+	 */
+	stop_machine(__do_entry_flush_fixups, &types, NULL);
 }
 
 void do_rfi_flush_fixups(enum l1d_flush_type types)
-- 
GitLab


From 49b39ec248af863781a13aa6d81c5f69a2928094 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 14 May 2021 00:07:59 +1000
Subject: [PATCH 0688/3804] powerpc/64s: Fix entry flush patching w/strict RWX
 & hash

The entry flush mitigation can be enabled/disabled at runtime. When this
happens it results in the kernel patching its own instructions to
enable/disable the mitigation sequence.

With strict kernel RWX enabled instruction patching happens via a
secondary mapping of the kernel text, so that we don't have to make the
primary mapping writable. With the hash MMU this leads to a hash fault,
which causes us to execute the exception entry which contains the entry
flush mitigation.

This means we end up executing the entry flush in a semi-patched state,
ie. after we have patched the first instruction but before we patch the
second or third instruction of the sequence.

On machines with updated firmware the entry flush is a series of special
nops, and it's safe to to execute in a semi-patched state.

However when using the fallback flush the sequence is mflr/branch/mtlr,
and so it's not safe to execute if we have patched out the mflr but not
the other two instructions. Doing so leads to us corrputing LR, leading
to an oops, for example:

  # echo 0 > /sys/kernel/debug/powerpc/entry_flush
  kernel tried to execute exec-protected page (c000000002971000) - exploit attempt? (uid: 0)
  BUG: Unable to handle kernel instruction fetch
  Faulting instruction address: 0xc000000002971000
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
  CPU: 0 PID: 2215 Comm: bash Not tainted 5.13.0-rc1-00010-gda3bb206c9ce #1
  NIP:  c000000002971000 LR: c000000002971000 CTR: c000000000120c40
  REGS: c000000013243840 TRAP: 0400   Not tainted  (5.13.0-rc1-00010-gda3bb206c9ce)
  MSR:  8000000010009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 48428482  XER: 00000000
  ...
  NIP  0xc000000002971000
  LR   0xc000000002971000
  Call Trace:
    do_patch_instruction+0xc4/0x340 (unreliable)
    do_entry_flush_fixups+0x100/0x3b0
    entry_flush_set+0x50/0xe0
    simple_attr_write+0x160/0x1a0
    full_proxy_write+0x8c/0x110
    vfs_write+0xf0/0x340
    ksys_write+0x84/0x140
    system_call_exception+0x164/0x2d0
    system_call_common+0xec/0x278

The simplest fix is to change the order in which we patch the
instructions, so that the sequence is always safe to execute. For the
non-fallback flushes it doesn't matter what order we patch in.

Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-1-mpe@ellerman.id.au
---
 arch/powerpc/lib/feature-fixups.c | 59 ++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 0aefa6a4a259b..5d12e37fa8bfd 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -325,6 +325,31 @@ static int __do_entry_flush_fixups(void *data)
 	if (types & L1D_FLUSH_MTTRIG)
 		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
 
+	/*
+	 * If we're patching in or out the fallback flush we need to be careful about the
+	 * order in which we patch instructions. That's because it's possible we could
+	 * take a page fault after patching one instruction, so the sequence of
+	 * instructions must be safe even in a half patched state.
+	 *
+	 * To make that work, when patching in the fallback flush we patch in this order:
+	 *  - the mflr		(dest)
+	 *  - the mtlr		(dest + 2)
+	 *  - the branch	(dest + 1)
+	 *
+	 * That ensures the sequence is safe to execute at any point. In contrast if we
+	 * patch the mtlr last, it's possible we could return from the branch and not
+	 * restore LR, leading to a crash later.
+	 *
+	 * When patching out the fallback flush (either with nops or another flush type),
+	 * we patch in this order:
+	 *  - the branch	(dest + 1)
+	 *  - the mtlr		(dest + 2)
+	 *  - the mflr		(dest)
+	 *
+	 * Note we are protected by stop_machine() from other CPUs executing the code in a
+	 * semi-patched state.
+	 */
+
 	start = PTRRELOC(&__start___entry_flush_fixup);
 	end = PTRRELOC(&__stop___entry_flush_fixup);
 	for (i = 0; start < end; start++, i++) {
@@ -332,15 +357,16 @@ static int __do_entry_flush_fixups(void *data)
 
 		pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-		if (types == L1D_FLUSH_FALLBACK)
-			patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
-				     BRANCH_SET_LINK);
-		else
+		if (types == L1D_FLUSH_FALLBACK) {
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_branch((struct ppc_inst *)(dest + 1),
+				     (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
+		} else {
 			patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
-		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+		}
 	}
 
 	start = PTRRELOC(&__start___scv_entry_flush_fixup);
@@ -350,15 +376,16 @@ static int __do_entry_flush_fixups(void *data)
 
 		pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-		if (types == L1D_FLUSH_FALLBACK)
-			patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback,
-				     BRANCH_SET_LINK);
-		else
+		if (types == L1D_FLUSH_FALLBACK) {
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_branch((struct ppc_inst *)(dest + 1),
+				     (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
+		} else {
 			patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
-		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+		}
 	}
 
 
-- 
GitLab


From 5b48ba2fbd77bc68feebd336ffad5ff166782bde Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 14 May 2021 00:08:00 +1000
Subject: [PATCH 0689/3804] powerpc/64s: Fix stf mitigation patching w/strict
 RWX & hash

The stf entry barrier fallback is unsafe to execute in a semi-patched
state, which can happen when enabling/disabling the mitigation with
strict kernel RWX enabled and using the hash MMU.

See the previous commit for more details.

Fix it by changing the order in which we patch the instructions.

Note the stf barrier fallback is only used on Power6 or earlier.

Fixes: bd573a81312f ("powerpc/mm/64s: Allow STRICT_KERNEL_RWX again")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210513140800.1391706-2-mpe@ellerman.id.au
---
 arch/powerpc/lib/feature-fixups.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 5d12e37fa8bfd..fe26f2fa0f3f8 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -150,17 +150,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
 
 		pr_devel("patching dest %lx\n", (unsigned long)dest);
 
-		patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-		if (types & STF_BARRIER_FALLBACK)
+		// See comment in do_entry_flush_fixups() RE order of patching
+		if (types & STF_BARRIER_FALLBACK) {
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
 			patch_branch((struct ppc_inst *)(dest + 1),
-				     (unsigned long)&stf_barrier_fallback,
-				     BRANCH_SET_LINK);
-		else
-			patch_instruction((struct ppc_inst *)(dest + 1),
-					  ppc_inst(instrs[1]));
-
-		patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+				     (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
+		} else {
+			patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+			patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+			patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+		}
 	}
 
 	printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
-- 
GitLab


From 4ec5feec1ad029bdf7d49bc50ccc0c195eeabe93 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 3 May 2021 21:17:08 +1000
Subject: [PATCH 0690/3804] powerpc/64s: Make NMI record implicitly soft-masked
 code as irqs disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

scv support introduced the notion of code that implicitly soft-masks
irqs due to the instruction addresses. This is required because scv
enters the kernel with MSR[EE]=1.

If a NMI (including soft-NMI) interrupt hits when we are implicitly
soft-masked then its regs->softe does not reflect this because it is
derived from the explicit soft mask state (paca->irq_soft_mask). This
makes arch_irq_disabled_regs(regs) return false.

This can trigger a warning in the soft-NMI watchdog code (shown below).
Fix it by having NMI interrupts set regs->softe to disabled in case of
interrupting an implicit soft-masked region.

  ------------[ cut here ]------------
  WARNING: CPU: 41 PID: 1103 at arch/powerpc/kernel/watchdog.c:259 soft_nmi_interrupt+0x3e4/0x5f0
  CPU: 41 PID: 1103 Comm: (spawn) Not tainted
  NIP:  c000000000039534 LR: c000000000039234 CTR: c000000000009a00
  REGS: c000007fffbcf940 TRAP: 0700   Not tainted
  MSR:  9000000000021033 <SF,HV,ME,IR,DR,RI,LE>  CR: 22042482  XER: 200400ad
  CFAR: c000000000039260 IRQMASK: 3
  GPR00: c000000000039204 c000007fffbcfbe0 c000000001d6c300 0000000000000003
  GPR04: 00007ffffa45d078 0000000000000000 0000000000000008 0000000000000020
  GPR08: 0000007ffd4e0000 0000000000000000 c000007ffffceb00 7265677368657265
  GPR12: 9000000000009033 c000007ffffceb00 00000f7075bf4480 000000000000002a
  GPR16: 00000f705745a528 00007ffffa45ddd8 00000f70574d0008 0000000000000000
  GPR20: 00000f7075c58d70 00000f7057459c38 0000000000000001 0000000000000040
  GPR24: 0000000000000000 0000000000000029 c000000001dae058 0000000000000029
  GPR28: 0000000000000000 0000000000000800 0000000000000009 c000007fffbcfd60
  NIP [c000000000039534] soft_nmi_interrupt+0x3e4/0x5f0
  LR [c000000000039234] soft_nmi_interrupt+0xe4/0x5f0
  Call Trace:
  [c000007fffbcfbe0] [c000000000039204] soft_nmi_interrupt+0xb4/0x5f0 (unreliable)
  [c000007fffbcfcf0] [c00000000000c0e8] soft_nmi_common+0x138/0x1c4
  --- interrupt: 900 at end_real_trampolines+0x0/0x1000
  NIP:  c000000000003000 LR: 00007ca426adb03c CTR: 900000000280f033
  REGS: c000007fffbcfd60 TRAP: 0900
  MSR:  9000000000009033 <SF,HV,EE,ME,IR,DR,RI,LE>  CR: 44042482  XER: 200400ad
  CFAR: 00007ca426946020 IRQMASK: 0
  GPR00: 00000000000000ad 00007ffffa45d050 00007ca426b07f00 0000000000000035
  GPR04: 00007ffffa45d078 0000000000000000 0000000000000008 0000000000000020
  GPR08: 0000000000000000 0000000000100000 0000000010000000 00007ffffa45d110
  GPR12: 0000000000000001 00007ca426d4e680 00000f7075bf4480 000000000000002a
  GPR16: 00000f705745a528 00007ffffa45ddd8 00000f70574d0008 0000000000000000
  GPR20: 00000f7075c58d70 00000f7057459c38 0000000000000001 0000000000000040
  GPR24: 0000000000000000 00000f7057473f68 0000000000000003 000000000000041b
  GPR28: 00007ffffa45d4c4 0000000000000035 0000000000000000 00000f7057473f68
  NIP [c000000000003000] end_real_trampolines+0x0/0x1000
  LR [00007ca426adb03c] 0x7ca426adb03c
  --- interrupt: 900
  Instruction dump:
  60000000 60000000 60420000 38600001 482b3ae5 60000000 e93f0138 a36d0008
  7daa6b78 71290001 7f7907b4 4082fd34 <0fe00000> 4bfffd2c 60420000 ea6100a8
  ---[ end trace dc75f67d819779da ]---

Fixes: 118178e62e2e ("powerpc: move NMI entry/exit code into wrapper")
Reported-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210503111708.758261-1-npiggin@gmail.com
---
 arch/powerpc/include/asm/interrupt.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
index c77e8f57ff062..59f704408d65d 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -220,6 +220,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
 	local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
 	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) &&
+				regs->nip < (unsigned long)__end_interrupts) {
+		// Kernel code running below __end_interrupts is
+		// implicitly soft-masked.
+		regs->softe = IRQS_ALL_DISABLED;
+	}
+
 	/* Don't do any per-CPU operations until interrupt state is fixed */
 
 	if (nmi_disables_ftrace(regs)) {
-- 
GitLab


From c6ac667b07996929835b512de0e9a988977e6abc Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 14 May 2021 14:40:08 +1000
Subject: [PATCH 0691/3804] powerpc/64e/interrupt: Fix nvgprs being clobbered

Some interrupt handlers have an "extra" that saves 1 or 2
registers (r14, r15) in the paca save area and makes them available to
use by the handler.

The change to always save nvgprs in exception handlers lead to some
interrupt handlers saving those scratch r14 / r15 registers into the
interrupt frame's GPR saves, which get restored on interrupt exit.

Fix this by always reloading those scratch registers from paca before
the EXCEPTION_COMMON that saves nvgprs.

Fixes: 4228b2c3d20e ("powerpc/64e/interrupt: always save nvgprs on interrupt")
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210514044008.1955783-1-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64e.S | 38 ++++++++++++++++++----------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 7c3654b0d0f47..f1ae710274bc9 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -340,6 +340,12 @@ ret_from_mc_except:
 	andi.	r10,r10,IRQS_DISABLED;	/* yes -> go out of line */ \
 	bne	masked_interrupt_book3e_##n
 
+/*
+ * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
+ * called, because that does SAVE_NVGPRS which must see the original register
+ * values, otherwise the scratch values might be restored when exiting the
+ * interrupt.
+ */
 #define PROLOG_ADDITION_2REGS_GEN(n)					    \
 	std	r14,PACA_EXGEN+EX_R14(r13);				    \
 	std	r15,PACA_EXGEN+EX_R15(r13)
@@ -535,6 +541,10 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
+	std	r14,_DAR(r1)
+	std	r15,_DSISR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x300)
 	b	storage_fault_common
 
@@ -544,6 +554,10 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	li	r15,0
 	mr	r14,r10
+	std	r14,_DAR(r1)
+	std	r15,_DSISR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x400)
 	b	storage_fault_common
 
@@ -557,6 +571,10 @@ __end_interrupts:
 				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
+	std	r14,_DAR(r1)
+	std	r15,_DSISR(r1)
+	ld	r14,PACA_EXGEN+EX_R14(r13)
+	ld	r15,PACA_EXGEN+EX_R15(r13)
 	EXCEPTION_COMMON(0x600)
 	b	alignment_more	/* no room, go out of line */
 
@@ -565,10 +583,10 @@ __end_interrupts:
 	NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
 				PROLOG_ADDITION_1REG)
 	mfspr	r14,SPRN_ESR
-	EXCEPTION_COMMON(0x700)
 	std	r14,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXGEN+EX_R14(r13)
+	EXCEPTION_COMMON(0x700)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	program_check_exception
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -725,11 +743,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	 * normal exception
 	 */
 	mfspr	r14,SPRN_DBSR
-	EXCEPTION_COMMON_CRIT(0xd00)
 	std	r14,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXCRIT+EX_R14(r13)
 	ld	r15,PACA_EXCRIT+EX_R15(r13)
+	EXCEPTION_COMMON_CRIT(0xd00)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	DebugException
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -796,11 +814,11 @@ kernel_dbg_exc:
 	 * normal exception
 	 */
 	mfspr	r14,SPRN_DBSR
-	EXCEPTION_COMMON_DBG(0xd08)
 	std	r14,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
 	ld	r14,PACA_EXDBG+EX_R14(r13)
 	ld	r15,PACA_EXDBG+EX_R15(r13)
+	EXCEPTION_COMMON_DBG(0xd08)
+	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	DebugException
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -931,11 +949,7 @@ masked_interrupt_book3e_0x2c0:
  * original values stashed away in the PACA
  */
 storage_fault_common:
-	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ld	r14,PACA_EXGEN+EX_R14(r13)
-	ld	r15,PACA_EXGEN+EX_R15(r13)
 	bl	do_page_fault
 	b	interrupt_return
 
@@ -944,11 +958,7 @@ storage_fault_common:
  * continues here.
  */
 alignment_more:
-	std	r14,_DAR(r1)
-	std	r15,_DSISR(r1)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
-	ld	r14,PACA_EXGEN+EX_R14(r13)
-	ld	r15,PACA_EXGEN+EX_R15(r13)
 	bl	alignment_exception
 	REST_NVGPRS(r1)
 	b	interrupt_return
-- 
GitLab


From eb8500b874cf295971a6a2a04e14eb0854197a3c Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 30 Apr 2021 05:23:43 -0700
Subject: [PATCH 0692/3804] thermal/drivers/intel: Initialize RW trip to
 THERMAL_TEMP_INVALID

After commit 81ad4276b505 ("Thermal: Ignore invalid trip points") all
user_space governor notifications via RW trip point is broken in intel
thermal drivers. This commits marks trip_points with value of 0 during
call to thermal_zone_device_register() as invalid. RW trip points can be
0 as user space will set the correct trip temperature later.

During driver init, x86_package_temp and all int340x drivers sets RW trip
temperature as 0. This results in all these trips marked as invalid by
the thermal core.

To fix this initialize RW trips to THERMAL_TEMP_INVALID instead of 0.

Cc: <stable@vger.kernel.org>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210430122343.1789899-1-srinivas.pandruvada@linux.intel.com
---
 drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c | 4 ++++
 drivers/thermal/intel/x86_pkg_temp_thermal.c                 | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
index d1248ba943a4e..62c0aa5d07837 100644
--- a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
+++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
@@ -237,6 +237,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
 	if (ACPI_FAILURE(status))
 		trip_cnt = 0;
 	else {
+		int i;
+
 		int34x_thermal_zone->aux_trips =
 			kcalloc(trip_cnt,
 				sizeof(*int34x_thermal_zone->aux_trips),
@@ -247,6 +249,8 @@ struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
 		}
 		trip_mask = BIT(trip_cnt) - 1;
 		int34x_thermal_zone->aux_trip_nr = trip_cnt;
+		for (i = 0; i < trip_cnt; ++i)
+			int34x_thermal_zone->aux_trips[i] = THERMAL_TEMP_INVALID;
 	}
 
 	trip_cnt = int340x_thermal_read_trips(int34x_thermal_zone);
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
index 295742e839602..4d8edc61a78b2 100644
--- a/drivers/thermal/intel/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -166,7 +166,7 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd,
 	if (thres_reg_value)
 		*temp = zonedev->tj_max - thres_reg_value * 1000;
 	else
-		*temp = 0;
+		*temp = THERMAL_TEMP_INVALID;
 	pr_debug("sys_get_trip_temp %d\n", *temp);
 
 	return 0;
-- 
GitLab


From 1b6604896e78969baffc1b6cc6bc175f95929ac4 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:48 +0900
Subject: [PATCH 0693/3804] ALSA: dice: fix stream format at middle sampling
 rate for Alesis iO 26

Alesis iO 26 FireWire has two pairs of digital optical interface. It
delivers PCM frames from the interfaces by second isochronous packet
streaming. Although both of the interfaces are available at 44.1/48.0
kHz, first one of them is only available at 88.2/96.0 kHz. It reduces
the number of PCM samples to 4 in Multi Bit Linear Audio data channel
of data blocks on the second isochronous packet streaming.

This commit fixes hardcoded stream formats.

Cc: <stable@vger.kernel.org>
Fixes: 28b208f600a3 ("ALSA: dice: add parameters of stream formats for models produced by Alesis")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-2-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/dice/dice-alesis.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/firewire/dice/dice-alesis.c b/sound/firewire/dice/dice-alesis.c
index 0916864511d50..27c13b9cc9efd 100644
--- a/sound/firewire/dice/dice-alesis.c
+++ b/sound/firewire/dice/dice-alesis.c
@@ -16,7 +16,7 @@ alesis_io14_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
 static const unsigned int
 alesis_io26_tx_pcm_chs[MAX_STREAMS][SND_DICE_RATE_MODE_COUNT] = {
 	{10, 10, 4},	/* Tx0 = Analog + S/PDIF. */
-	{16, 8, 0},	/* Tx1 = ADAT1 + ADAT2. */
+	{16, 4, 0},	/* Tx1 = ADAT1 + ADAT2 (available at low rate). */
 };
 
 int snd_dice_detect_alesis_formats(struct snd_dice *dice)
-- 
GitLab


From 0edabdfe89581669609eaac5f6a8d0ae6fe95e7f Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:49 +0900
Subject: [PATCH 0694/3804] ALSA: bebob/oxfw: fix Kconfig entry for Mackie d.2
 Pro

Mackie d.2 has an extension card for IEEE 1394 communication, which uses
BridgeCo DM1000 ASIC. On the other hand, Mackie d.4 Pro has built-in
function for IEEE 1394 communication by Oxford Semiconductor OXFW971,
according to schematic diagram available in Mackie website. Although I
misunderstood that Mackie d.2 Pro would be also a model with OXFW971,
it's wrong. Mackie d.2 Pro is a model which includes the extension card
as factory settings.

This commit fixes entries in Kconfig and comment in ALSA OXFW driver.

Cc: <stable@vger.kernel.org>
Fixes: fd6f4b0dc167 ("ALSA: bebob: Add skelton for BeBoB based devices")
Fixes: ec4dba5053e1 ("ALSA: oxfw: Add support for Behringer/Mackie devices")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-3-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/Kconfig       | 4 ++--
 sound/firewire/bebob/bebob.c | 2 +-
 sound/firewire/oxfw/oxfw.c   | 1 -
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sound/firewire/Kconfig b/sound/firewire/Kconfig
index 25778765cbfe9..9897bd26a4388 100644
--- a/sound/firewire/Kconfig
+++ b/sound/firewire/Kconfig
@@ -38,7 +38,7 @@ config SND_OXFW
 	   * Mackie(Loud) Onyx 1640i (former model)
 	   * Mackie(Loud) Onyx Satellite
 	   * Mackie(Loud) Tapco Link.Firewire
-	   * Mackie(Loud) d.2 pro/d.4 pro
+	   * Mackie(Loud) d.4 pro
 	   * Mackie(Loud) U.420/U.420d
 	   * TASCAM FireOne
 	   * Stanton Controllers & Systems 1 Deck/Mixer
@@ -84,7 +84,7 @@ config SND_BEBOB
 	  * PreSonus FIREBOX/FIREPOD/FP10/Inspire1394
 	  * BridgeCo RDAudio1/Audio5
 	  * Mackie Onyx 1220/1620/1640 (FireWire I/O Card)
-	  * Mackie d.2 (FireWire Option)
+	  * Mackie d.2 (FireWire Option) and d.2 Pro
 	  * Stanton FinalScratch 2 (ScratchAmp)
 	  * Tascam IF-FW/DM
 	  * Behringer XENIX UFX 1204/1604
diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c
index 2c8e3392a4903..daeecfa8b9aac 100644
--- a/sound/firewire/bebob/bebob.c
+++ b/sound/firewire/bebob/bebob.c
@@ -387,7 +387,7 @@ static const struct ieee1394_device_id bebob_id_table[] = {
 	SND_BEBOB_DEV_ENTRY(VEN_BRIDGECO, 0x00010049, &spec_normal),
 	/* Mackie, Onyx 1220/1620/1640 (Firewire I/O Card) */
 	SND_BEBOB_DEV_ENTRY(VEN_MACKIE2, 0x00010065, &spec_normal),
-	/* Mackie, d.2 (Firewire Option) */
+	// Mackie, d.2 (Firewire option card) and d.2 Pro (the card is built-in).
 	SND_BEBOB_DEV_ENTRY(VEN_MACKIE1, 0x00010067, &spec_normal),
 	/* Stanton, ScratchAmp */
 	SND_BEBOB_DEV_ENTRY(VEN_STANTON, 0x00000001, &spec_normal),
diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
index 1f1e3236efb8e..9eea25c46dc7e 100644
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c
@@ -355,7 +355,6 @@ static const struct ieee1394_device_id oxfw_id_table[] = {
 	 *  Onyx-i series (former models):	0x081216
 	 *  Mackie Onyx Satellite:		0x00200f
 	 *  Tapco LINK.firewire 4x6:		0x000460
-	 *  d.2 pro:				Unknown
 	 *  d.4 pro:				Unknown
 	 *  U.420:				Unknown
 	 *  U.420d:				Unknown
-- 
GitLab


From 395f41e2cdac63e7581fb9574e5ac0f02556e34a Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:50 +0900
Subject: [PATCH 0695/3804] ALSA: firewire-lib: fix check for the size of
 isochronous packet payload

The check for size of isochronous packet payload just cares of the size of
IR context payload without the size of CIP header.

Cc: <stable@vger.kernel.org>
Fixes: f11453c7cc01 ("ALSA: firewire-lib: use 16 bytes IR context header to separate CIP header")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-4-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/amdtp-stream.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 4e2f2bb7879fb..b53971bf4b90a 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -633,18 +633,24 @@ static int parse_ir_ctx_header(struct amdtp_stream *s, unsigned int cycle,
 			       unsigned int *syt, unsigned int index)
 {
 	const __be32 *cip_header;
+	unsigned int cip_header_size;
 	int err;
 
 	*payload_length = be32_to_cpu(ctx_header[0]) >> ISO_DATA_LENGTH_SHIFT;
-	if (*payload_length > s->ctx_data.tx.ctx_header_size +
-					s->ctx_data.tx.max_ctx_payload_length) {
+
+	if (!(s->flags & CIP_NO_HEADER))
+		cip_header_size = 8;
+	else
+		cip_header_size = 0;
+
+	if (*payload_length > cip_header_size + s->ctx_data.tx.max_ctx_payload_length) {
 		dev_err(&s->unit->device,
 			"Detect jumbo payload: %04x %04x\n",
-			*payload_length, s->ctx_data.tx.max_ctx_payload_length);
+			*payload_length, cip_header_size + s->ctx_data.tx.max_ctx_payload_length);
 		return -EIO;
 	}
 
-	if (!(s->flags & CIP_NO_HEADER)) {
+	if (cip_header_size > 0) {
 		cip_header = ctx_header + 2;
 		err = check_cip_header(s, cip_header, *payload_length,
 				       data_blocks, data_block_counter, syt);
-- 
GitLab


From 1be4f21d9984fa9835fae5411a29465dc5aece6f Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:51 +0900
Subject: [PATCH 0696/3804] ALSA: firewire-lib: fix calculation for size of IR
 context payload

The quadlets for CIP header is handled as a part of IR context header,
thus it doesn't join in IR context payload. However current calculation
includes the quadlets in IR context payload.

Cc: <stable@vger.kernel.org>
Fixes: f11453c7cc01 ("ALSA: firewire-lib: use 16 bytes IR context header to separate CIP header")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-5-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/amdtp-stream.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index b53971bf4b90a..73aff017dc9a3 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -1071,23 +1071,22 @@ static int amdtp_stream_start(struct amdtp_stream *s, int channel, int speed,
 		s->data_block_counter = 0;
 	}
 
-	/* initialize packet buffer */
+	// initialize packet buffer.
+	max_ctx_payload_size = amdtp_stream_get_max_payload(s);
 	if (s->direction == AMDTP_IN_STREAM) {
 		dir = DMA_FROM_DEVICE;
 		type = FW_ISO_CONTEXT_RECEIVE;
-		if (!(s->flags & CIP_NO_HEADER))
+		if (!(s->flags & CIP_NO_HEADER)) {
+			max_ctx_payload_size -= 8;
 			ctx_header_size = IR_CTX_HEADER_SIZE_CIP;
-		else
+		} else {
 			ctx_header_size = IR_CTX_HEADER_SIZE_NO_CIP;
-
-		max_ctx_payload_size = amdtp_stream_get_max_payload(s) -
-				       ctx_header_size;
+		}
 	} else {
 		dir = DMA_TO_DEVICE;
 		type = FW_ISO_CONTEXT_TRANSMIT;
 		ctx_header_size = 0;	// No effect for IT context.
 
-		max_ctx_payload_size = amdtp_stream_get_max_payload(s);
 		if (!(s->flags & CIP_NO_HEADER))
 			max_ctx_payload_size -= IT_PKT_HEADER_SIZE_CIP;
 	}
-- 
GitLab


From 814b43127f4ac69332e809152e30773941438aff Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Thu, 13 May 2021 21:56:52 +0900
Subject: [PATCH 0697/3804] ALSA: firewire-lib: fix amdtp_packet tracepoints
 event for packet_index field

The snd_firewire_lib:amdtp_packet tracepoints event includes index of
packet processed in a context handling. However in IR context, it is not
calculated as expected.

Cc: <stable@vger.kernel.org>
Fixes: 753e717986c2 ("ALSA: firewire-lib: use packet descriptor for IR context")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210513125652.110249-6-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/amdtp-stream-trace.h |  6 +++---
 sound/firewire/amdtp-stream.c       | 15 +++++++++------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/sound/firewire/amdtp-stream-trace.h b/sound/firewire/amdtp-stream-trace.h
index 26e7cb555d3c5..aa53c13b89d34 100644
--- a/sound/firewire/amdtp-stream-trace.h
+++ b/sound/firewire/amdtp-stream-trace.h
@@ -14,8 +14,8 @@
 #include <linux/tracepoint.h>
 
 TRACE_EVENT(amdtp_packet,
-	TP_PROTO(const struct amdtp_stream *s, u32 cycles, const __be32 *cip_header, unsigned int payload_length, unsigned int data_blocks, unsigned int data_block_counter, unsigned int index),
-	TP_ARGS(s, cycles, cip_header, payload_length, data_blocks, data_block_counter, index),
+	TP_PROTO(const struct amdtp_stream *s, u32 cycles, const __be32 *cip_header, unsigned int payload_length, unsigned int data_blocks, unsigned int data_block_counter, unsigned int packet_index, unsigned int index),
+	TP_ARGS(s, cycles, cip_header, payload_length, data_blocks, data_block_counter, packet_index, index),
 	TP_STRUCT__entry(
 		__field(unsigned int, second)
 		__field(unsigned int, cycle)
@@ -48,7 +48,7 @@ TRACE_EVENT(amdtp_packet,
 		__entry->payload_quadlets = payload_length / sizeof(__be32);
 		__entry->data_blocks = data_blocks;
 		__entry->data_block_counter = data_block_counter,
-		__entry->packet_index = s->packet_index;
+		__entry->packet_index = packet_index;
 		__entry->irq = !!in_interrupt();
 		__entry->index = index;
 	),
diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index 73aff017dc9a3..e0faa6601966c 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -526,7 +526,7 @@ static void build_it_pkt_header(struct amdtp_stream *s, unsigned int cycle,
 	}
 
 	trace_amdtp_packet(s, cycle, cip_header, payload_length, data_blocks,
-			   data_block_counter, index);
+			   data_block_counter, s->packet_index, index);
 }
 
 static int check_cip_header(struct amdtp_stream *s, const __be32 *buf,
@@ -630,7 +630,7 @@ static int parse_ir_ctx_header(struct amdtp_stream *s, unsigned int cycle,
 			       unsigned int *payload_length,
 			       unsigned int *data_blocks,
 			       unsigned int *data_block_counter,
-			       unsigned int *syt, unsigned int index)
+			       unsigned int *syt, unsigned int packet_index, unsigned int index)
 {
 	const __be32 *cip_header;
 	unsigned int cip_header_size;
@@ -668,7 +668,7 @@ static int parse_ir_ctx_header(struct amdtp_stream *s, unsigned int cycle,
 	}
 
 	trace_amdtp_packet(s, cycle, cip_header, *payload_length, *data_blocks,
-			   *data_block_counter, index);
+			   *data_block_counter, packet_index, index);
 
 	return err;
 }
@@ -707,12 +707,13 @@ static int generate_device_pkt_descs(struct amdtp_stream *s,
 				     unsigned int packets)
 {
 	unsigned int dbc = s->data_block_counter;
+	unsigned int packet_index = s->packet_index;
+	unsigned int queue_size = s->queue_size;
 	int i;
 	int err;
 
 	for (i = 0; i < packets; ++i) {
 		struct pkt_desc *desc = descs + i;
-		unsigned int index = (s->packet_index + i) % s->queue_size;
 		unsigned int cycle;
 		unsigned int payload_length;
 		unsigned int data_blocks;
@@ -721,7 +722,7 @@ static int generate_device_pkt_descs(struct amdtp_stream *s,
 		cycle = compute_cycle_count(ctx_header[1]);
 
 		err = parse_ir_ctx_header(s, cycle, ctx_header, &payload_length,
-					  &data_blocks, &dbc, &syt, i);
+					  &data_blocks, &dbc, &syt, packet_index, i);
 		if (err < 0)
 			return err;
 
@@ -729,13 +730,15 @@ static int generate_device_pkt_descs(struct amdtp_stream *s,
 		desc->syt = syt;
 		desc->data_blocks = data_blocks;
 		desc->data_block_counter = dbc;
-		desc->ctx_payload = s->buffer.packets[index].buffer;
+		desc->ctx_payload = s->buffer.packets[packet_index].buffer;
 
 		if (!(s->flags & CIP_DBC_IS_END_EVENT))
 			dbc = (dbc + desc->data_blocks) & 0xff;
 
 		ctx_header +=
 			s->ctx_data.tx.ctx_header_size / sizeof(*ctx_header);
+
+		packet_index = (packet_index + 1) % queue_size;
 	}
 
 	s->data_block_counter = dbc;
-- 
GitLab


From 1d5cfca286178ce81fb0c8a5f5777ef123cd69e4 Mon Sep 17 00:00:00 2001
From: PeiSen Hou <pshou@realtek.com>
Date: Fri, 14 May 2021 12:50:48 +0200
Subject: [PATCH 0698/3804] ALSA: hda/realtek: Add some CLOVE SSIDs of ALC293

Fix "use as headset mic, without its own jack detect" problen.

Signed-off-by: PeiSen Hou <pshou@realtek.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/d0746eaf29f248a5acc30313e3ba4f99@realtek.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3e269de840799..552e2cb73291e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8385,12 +8385,19 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x50f2, "Clevo NH50E[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x50f5, "Clevo NH55EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x50f6, "Clevo NH55DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x5101, "Clevo S510WU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x70f2, "Clevo NH79EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x70f3, "Clevo NH77DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -8408,9 +8415,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x951d, "Clevo N950T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0x9600, "Clevo N960K[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x961d, "Clevo N960S[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL53RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL5XNU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xb018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xb019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xb022, "Clevo NH77D[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xc018, "Clevo NP50D[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xc019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1558, 0xc022, "Clevo NH77[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
 	SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
-- 
GitLab


From 7c0303ff7e67b637c47d8afee533ca9e2a02359b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 26 Apr 2021 02:57:31 +0900
Subject: [PATCH 0699/3804] crypto: arm - generate *.S by Perl at build time
 instead of shipping them

Generate *.S by Perl like arch/{mips,x86}/crypto/Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Makefile                |    8 +-
 arch/arm/crypto/poly1305-core.S_shipped | 1158 ----------
 arch/arm/crypto/sha256-core.S_shipped   | 2816 -----------------------
 arch/arm/crypto/sha512-core.S_shipped   | 1869 ---------------
 4 files changed, 3 insertions(+), 5848 deletions(-)
 delete mode 100644 arch/arm/crypto/poly1305-core.S_shipped
 delete mode 100644 arch/arm/crypto/sha256-core.S_shipped
 delete mode 100644 arch/arm/crypto/sha512-core.S_shipped

diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 8f26c454ea12e..51f160c61740f 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -45,19 +45,17 @@ poly1305-arm-y := poly1305-core.o poly1305-glue.o
 nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
 curve25519-neon-y := curve25519-core.o curve25519-glue.o
 
-ifdef REGENERATE_ARM_CRYPTO
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
 
-$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
+$(obj)/poly1305-core.S: $(src)/poly1305-armv4.pl
 	$(call cmd,perl)
 
-$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
+$(obj)/sha256-core.S: $(src)/sha256-armv4.pl
 	$(call cmd,perl)
 
-$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
+$(obj)/sha512-core.S: $(src)/sha512-armv4.pl
 	$(call cmd,perl)
-endif
 
 clean-files += poly1305-core.S sha256-core.S sha512-core.S
 
diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped
deleted file mode 100644
index 37b71d9902932..0000000000000
--- a/arch/arm/crypto/poly1305-core.S_shipped
+++ /dev/null
@@ -1,1158 +0,0 @@
-#ifndef	__KERNEL__
-# include "arm_arch.h"
-#else
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
-# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
-# define poly1305_init   poly1305_init_arm
-# define poly1305_blocks poly1305_blocks_arm
-# define poly1305_emit   poly1305_emit_arm
-.globl	poly1305_blocks_neon
-#endif
-
-#if defined(__thumb2__)
-.syntax	unified
-.thumb
-#else
-.code	32
-#endif
-
-.text
-
-.globl	poly1305_emit
-.globl	poly1305_blocks
-.globl	poly1305_init
-.type	poly1305_init,%function
-.align	5
-poly1305_init:
-.Lpoly1305_init:
-	stmdb	sp!,{r4-r11}
-
-	eor	r3,r3,r3
-	cmp	r1,#0
-	str	r3,[r0,#0]		@ zero hash value
-	str	r3,[r0,#4]
-	str	r3,[r0,#8]
-	str	r3,[r0,#12]
-	str	r3,[r0,#16]
-	str	r3,[r0,#36]		@ clear is_base2_26
-	add	r0,r0,#20
-
-#ifdef	__thumb2__
-	it	eq
-#endif
-	moveq	r0,#0
-	beq	.Lno_key
-
-#if	__ARM_MAX_ARCH__>=7
-	mov	r3,#-1
-	str	r3,[r0,#28]		@ impossible key power value
-# ifndef __KERNEL__
-	adr	r11,.Lpoly1305_init
-	ldr	r12,.LOPENSSL_armcap
-# endif
-#endif
-	ldrb	r4,[r1,#0]
-	mov	r10,#0x0fffffff
-	ldrb	r5,[r1,#1]
-	and	r3,r10,#-4		@ 0x0ffffffc
-	ldrb	r6,[r1,#2]
-	ldrb	r7,[r1,#3]
-	orr	r4,r4,r5,lsl#8
-	ldrb	r5,[r1,#4]
-	orr	r4,r4,r6,lsl#16
-	ldrb	r6,[r1,#5]
-	orr	r4,r4,r7,lsl#24
-	ldrb	r7,[r1,#6]
-	and	r4,r4,r10
-
-#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-# if !defined(_WIN32)
-	ldr	r12,[r11,r12]		@ OPENSSL_armcap_P
-# endif
-# if defined(__APPLE__) || defined(_WIN32)
-	ldr	r12,[r12]
-# endif
-#endif
-	ldrb	r8,[r1,#7]
-	orr	r5,r5,r6,lsl#8
-	ldrb	r6,[r1,#8]
-	orr	r5,r5,r7,lsl#16
-	ldrb	r7,[r1,#9]
-	orr	r5,r5,r8,lsl#24
-	ldrb	r8,[r1,#10]
-	and	r5,r5,r3
-
-#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-	tst	r12,#ARMV7_NEON		@ check for NEON
-# ifdef	__thumb2__
-	adr	r9,.Lpoly1305_blocks_neon
-	adr	r11,.Lpoly1305_blocks
-	it	ne
-	movne	r11,r9
-	adr	r12,.Lpoly1305_emit
-	orr	r11,r11,#1		@ thumb-ify addresses
-	orr	r12,r12,#1
-# else
-	add	r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
-	ite	eq
-	addeq	r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
-	addne	r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
-# endif
-#endif
-	ldrb	r9,[r1,#11]
-	orr	r6,r6,r7,lsl#8
-	ldrb	r7,[r1,#12]
-	orr	r6,r6,r8,lsl#16
-	ldrb	r8,[r1,#13]
-	orr	r6,r6,r9,lsl#24
-	ldrb	r9,[r1,#14]
-	and	r6,r6,r3
-
-	ldrb	r10,[r1,#15]
-	orr	r7,r7,r8,lsl#8
-	str	r4,[r0,#0]
-	orr	r7,r7,r9,lsl#16
-	str	r5,[r0,#4]
-	orr	r7,r7,r10,lsl#24
-	str	r6,[r0,#8]
-	and	r7,r7,r3
-	str	r7,[r0,#12]
-#if	__ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-	stmia	r2,{r11,r12}		@ fill functions table
-	mov	r0,#1
-#else
-	mov	r0,#0
-#endif
-.Lno_key:
-	ldmia	sp!,{r4-r11}
-#if	__ARM_ARCH__>=5
-	bx	lr				@ bx	lr
-#else
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	poly1305_init,.-poly1305_init
-.type	poly1305_blocks,%function
-.align	5
-poly1305_blocks:
-.Lpoly1305_blocks:
-	stmdb	sp!,{r3-r11,lr}
-
-	ands	r2,r2,#-16
-	beq	.Lno_data
-
-	add	r2,r2,r1		@ end pointer
-	sub	sp,sp,#32
-
-#if __ARM_ARCH__<7
-	ldmia	r0,{r4-r12}		@ load context
-	add	r0,r0,#20
-	str	r2,[sp,#16]		@ offload stuff
-	str	r0,[sp,#12]
-#else
-	ldr	lr,[r0,#36]		@ is_base2_26
-	ldmia	r0!,{r4-r8}		@ load hash value
-	str	r2,[sp,#16]		@ offload stuff
-	str	r0,[sp,#12]
-
-	adds	r9,r4,r5,lsl#26	@ base 2^26 -> base 2^32
-	mov	r10,r5,lsr#6
-	adcs	r10,r10,r6,lsl#20
-	mov	r11,r6,lsr#12
-	adcs	r11,r11,r7,lsl#14
-	mov	r12,r7,lsr#18
-	adcs	r12,r12,r8,lsl#8
-	mov	r2,#0
-	teq	lr,#0
-	str	r2,[r0,#16]		@ clear is_base2_26
-	adc	r2,r2,r8,lsr#24
-
-	itttt	ne
-	movne	r4,r9			@ choose between radixes
-	movne	r5,r10
-	movne	r6,r11
-	movne	r7,r12
-	ldmia	r0,{r9-r12}		@ load key
-	it	ne
-	movne	r8,r2
-#endif
-
-	mov	lr,r1
-	cmp	r3,#0
-	str	r10,[sp,#20]
-	str	r11,[sp,#24]
-	str	r12,[sp,#28]
-	b	.Loop
-
-.align	4
-.Loop:
-#if __ARM_ARCH__<7
-	ldrb	r0,[lr],#16		@ load input
-# ifdef	__thumb2__
-	it	hi
-# endif
-	addhi	r8,r8,#1		@ 1<<128
-	ldrb	r1,[lr,#-15]
-	ldrb	r2,[lr,#-14]
-	ldrb	r3,[lr,#-13]
-	orr	r1,r0,r1,lsl#8
-	ldrb	r0,[lr,#-12]
-	orr	r2,r1,r2,lsl#16
-	ldrb	r1,[lr,#-11]
-	orr	r3,r2,r3,lsl#24
-	ldrb	r2,[lr,#-10]
-	adds	r4,r4,r3		@ accumulate input
-
-	ldrb	r3,[lr,#-9]
-	orr	r1,r0,r1,lsl#8
-	ldrb	r0,[lr,#-8]
-	orr	r2,r1,r2,lsl#16
-	ldrb	r1,[lr,#-7]
-	orr	r3,r2,r3,lsl#24
-	ldrb	r2,[lr,#-6]
-	adcs	r5,r5,r3
-
-	ldrb	r3,[lr,#-5]
-	orr	r1,r0,r1,lsl#8
-	ldrb	r0,[lr,#-4]
-	orr	r2,r1,r2,lsl#16
-	ldrb	r1,[lr,#-3]
-	orr	r3,r2,r3,lsl#24
-	ldrb	r2,[lr,#-2]
-	adcs	r6,r6,r3
-
-	ldrb	r3,[lr,#-1]
-	orr	r1,r0,r1,lsl#8
-	str	lr,[sp,#8]		@ offload input pointer
-	orr	r2,r1,r2,lsl#16
-	add	r10,r10,r10,lsr#2
-	orr	r3,r2,r3,lsl#24
-#else
-	ldr	r0,[lr],#16		@ load input
-	it	hi
-	addhi	r8,r8,#1		@ padbit
-	ldr	r1,[lr,#-12]
-	ldr	r2,[lr,#-8]
-	ldr	r3,[lr,#-4]
-# ifdef	__ARMEB__
-	rev	r0,r0
-	rev	r1,r1
-	rev	r2,r2
-	rev	r3,r3
-# endif
-	adds	r4,r4,r0		@ accumulate input
-	str	lr,[sp,#8]		@ offload input pointer
-	adcs	r5,r5,r1
-	add	r10,r10,r10,lsr#2
-	adcs	r6,r6,r2
-#endif
-	add	r11,r11,r11,lsr#2
-	adcs	r7,r7,r3
-	add	r12,r12,r12,lsr#2
-
-	umull	r2,r3,r5,r9
-	 adc	r8,r8,#0
-	umull	r0,r1,r4,r9
-	umlal	r2,r3,r8,r10
-	umlal	r0,r1,r7,r10
-	ldr	r10,[sp,#20]		@ reload r10
-	umlal	r2,r3,r6,r12
-	umlal	r0,r1,r5,r12
-	umlal	r2,r3,r7,r11
-	umlal	r0,r1,r6,r11
-	umlal	r2,r3,r4,r10
-	str	r0,[sp,#0]		@ future r4
-	 mul	r0,r11,r8
-	ldr	r11,[sp,#24]		@ reload r11
-	adds	r2,r2,r1		@ d1+=d0>>32
-	 eor	r1,r1,r1
-	adc	lr,r3,#0		@ future r6
-	str	r2,[sp,#4]		@ future r5
-
-	mul	r2,r12,r8
-	eor	r3,r3,r3
-	umlal	r0,r1,r7,r12
-	ldr	r12,[sp,#28]		@ reload r12
-	umlal	r2,r3,r7,r9
-	umlal	r0,r1,r6,r9
-	umlal	r2,r3,r6,r10
-	umlal	r0,r1,r5,r10
-	umlal	r2,r3,r5,r11
-	umlal	r0,r1,r4,r11
-	umlal	r2,r3,r4,r12
-	ldr	r4,[sp,#0]
-	mul	r8,r9,r8
-	ldr	r5,[sp,#4]
-
-	adds	r6,lr,r0		@ d2+=d1>>32
-	ldr	lr,[sp,#8]		@ reload input pointer
-	adc	r1,r1,#0
-	adds	r7,r2,r1		@ d3+=d2>>32
-	ldr	r0,[sp,#16]		@ reload end pointer
-	adc	r3,r3,#0
-	add	r8,r8,r3		@ h4+=d3>>32
-
-	and	r1,r8,#-4
-	and	r8,r8,#3
-	add	r1,r1,r1,lsr#2		@ *=5
-	adds	r4,r4,r1
-	adcs	r5,r5,#0
-	adcs	r6,r6,#0
-	adcs	r7,r7,#0
-	adc	r8,r8,#0
-
-	cmp	r0,lr			@ done yet?
-	bhi	.Loop
-
-	ldr	r0,[sp,#12]
-	add	sp,sp,#32
-	stmdb	r0,{r4-r8}		@ store the result
-
-.Lno_data:
-#if	__ARM_ARCH__>=5
-	ldmia	sp!,{r3-r11,pc}
-#else
-	ldmia	sp!,{r3-r11,lr}
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	poly1305_blocks,.-poly1305_blocks
-.type	poly1305_emit,%function
-.align	5
-poly1305_emit:
-.Lpoly1305_emit:
-	stmdb	sp!,{r4-r11}
-
-	ldmia	r0,{r3-r7}
-
-#if __ARM_ARCH__>=7
-	ldr	ip,[r0,#36]		@ is_base2_26
-
-	adds	r8,r3,r4,lsl#26	@ base 2^26 -> base 2^32
-	mov	r9,r4,lsr#6
-	adcs	r9,r9,r5,lsl#20
-	mov	r10,r5,lsr#12
-	adcs	r10,r10,r6,lsl#14
-	mov	r11,r6,lsr#18
-	adcs	r11,r11,r7,lsl#8
-	mov	r0,#0
-	adc	r0,r0,r7,lsr#24
-
-	tst	ip,ip
-	itttt	ne
-	movne	r3,r8
-	movne	r4,r9
-	movne	r5,r10
-	movne	r6,r11
-	it	ne
-	movne	r7,r0
-#endif
-
-	adds	r8,r3,#5		@ compare to modulus
-	adcs	r9,r4,#0
-	adcs	r10,r5,#0
-	adcs	r11,r6,#0
-	adc	r0,r7,#0
-	tst	r0,#4			@ did it carry/borrow?
-
-#ifdef	__thumb2__
-	it	ne
-#endif
-	movne	r3,r8
-	ldr	r8,[r2,#0]
-#ifdef	__thumb2__
-	it	ne
-#endif
-	movne	r4,r9
-	ldr	r9,[r2,#4]
-#ifdef	__thumb2__
-	it	ne
-#endif
-	movne	r5,r10
-	ldr	r10,[r2,#8]
-#ifdef	__thumb2__
-	it	ne
-#endif
-	movne	r6,r11
-	ldr	r11,[r2,#12]
-
-	adds	r3,r3,r8
-	adcs	r4,r4,r9
-	adcs	r5,r5,r10
-	adc	r6,r6,r11
-
-#if __ARM_ARCH__>=7
-# ifdef __ARMEB__
-	rev	r3,r3
-	rev	r4,r4
-	rev	r5,r5
-	rev	r6,r6
-# endif
-	str	r3,[r1,#0]
-	str	r4,[r1,#4]
-	str	r5,[r1,#8]
-	str	r6,[r1,#12]
-#else
-	strb	r3,[r1,#0]
-	mov	r3,r3,lsr#8
-	strb	r4,[r1,#4]
-	mov	r4,r4,lsr#8
-	strb	r5,[r1,#8]
-	mov	r5,r5,lsr#8
-	strb	r6,[r1,#12]
-	mov	r6,r6,lsr#8
-
-	strb	r3,[r1,#1]
-	mov	r3,r3,lsr#8
-	strb	r4,[r1,#5]
-	mov	r4,r4,lsr#8
-	strb	r5,[r1,#9]
-	mov	r5,r5,lsr#8
-	strb	r6,[r1,#13]
-	mov	r6,r6,lsr#8
-
-	strb	r3,[r1,#2]
-	mov	r3,r3,lsr#8
-	strb	r4,[r1,#6]
-	mov	r4,r4,lsr#8
-	strb	r5,[r1,#10]
-	mov	r5,r5,lsr#8
-	strb	r6,[r1,#14]
-	mov	r6,r6,lsr#8
-
-	strb	r3,[r1,#3]
-	strb	r4,[r1,#7]
-	strb	r5,[r1,#11]
-	strb	r6,[r1,#15]
-#endif
-	ldmia	sp!,{r4-r11}
-#if	__ARM_ARCH__>=5
-	bx	lr				@ bx	lr
-#else
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	poly1305_emit,.-poly1305_emit
-#if	__ARM_MAX_ARCH__>=7
-.fpu	neon
-
-.type	poly1305_init_neon,%function
-.align	5
-poly1305_init_neon:
-.Lpoly1305_init_neon:
-	ldr	r3,[r0,#48]		@ first table element
-	cmp	r3,#-1			@ is value impossible?
-	bne	.Lno_init_neon
-
-	ldr	r4,[r0,#20]		@ load key base 2^32
-	ldr	r5,[r0,#24]
-	ldr	r6,[r0,#28]
-	ldr	r7,[r0,#32]
-
-	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
-	mov	r3,r4,lsr#26
-	mov	r4,r5,lsr#20
-	orr	r3,r3,r5,lsl#6
-	mov	r5,r6,lsr#14
-	orr	r4,r4,r6,lsl#12
-	mov	r6,r7,lsr#8
-	orr	r5,r5,r7,lsl#18
-	and	r3,r3,#0x03ffffff
-	and	r4,r4,#0x03ffffff
-	and	r5,r5,#0x03ffffff
-
-	vdup.32	d0,r2			@ r^1 in both lanes
-	add	r2,r3,r3,lsl#2		@ *5
-	vdup.32	d1,r3
-	add	r3,r4,r4,lsl#2
-	vdup.32	d2,r2
-	vdup.32	d3,r4
-	add	r4,r5,r5,lsl#2
-	vdup.32	d4,r3
-	vdup.32	d5,r5
-	add	r5,r6,r6,lsl#2
-	vdup.32	d6,r4
-	vdup.32	d7,r6
-	vdup.32	d8,r5
-
-	mov	r5,#2		@ counter
-
-.Lsquare_neon:
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
-	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
-	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
-	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
-	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
-
-	vmull.u32	q5,d0,d0[1]
-	vmull.u32	q6,d1,d0[1]
-	vmull.u32	q7,d3,d0[1]
-	vmull.u32	q8,d5,d0[1]
-	vmull.u32	q9,d7,d0[1]
-
-	vmlal.u32	q5,d7,d2[1]
-	vmlal.u32	q6,d0,d1[1]
-	vmlal.u32	q7,d1,d1[1]
-	vmlal.u32	q8,d3,d1[1]
-	vmlal.u32	q9,d5,d1[1]
-
-	vmlal.u32	q5,d5,d4[1]
-	vmlal.u32	q6,d7,d4[1]
-	vmlal.u32	q8,d1,d3[1]
-	vmlal.u32	q7,d0,d3[1]
-	vmlal.u32	q9,d3,d3[1]
-
-	vmlal.u32	q5,d3,d6[1]
-	vmlal.u32	q8,d0,d5[1]
-	vmlal.u32	q6,d5,d6[1]
-	vmlal.u32	q7,d7,d6[1]
-	vmlal.u32	q9,d1,d5[1]
-
-	vmlal.u32	q8,d7,d8[1]
-	vmlal.u32	q5,d1,d8[1]
-	vmlal.u32	q6,d3,d8[1]
-	vmlal.u32	q7,d5,d8[1]
-	vmlal.u32	q9,d0,d7[1]
-
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
-	@ and P. Schwabe
-	@
-	@ H0>>+H1>>+H2>>+H3>>+H4
-	@ H3>>+H4>>*5+H0>>+H1
-	@
-	@ Trivia.
-	@
-	@ Result of multiplication of n-bit number by m-bit number is
-	@ n+m bits wide. However! Even though 2^n is a n+1-bit number,
-	@ m-bit number multiplied by 2^n is still n+m bits wide.
-	@
-	@ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
-	@ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
-	@ one is n+1 bits wide.
-	@
-	@ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
-	@ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
-	@ can be 27. However! In cases when their width exceeds 26 bits
-	@ they are limited by 2^26+2^6. This in turn means that *sum*
-	@ of the products with these values can still be viewed as sum
-	@ of 52-bit numbers as long as the amount of addends is not a
-	@ power of 2. For example,
-	@
-	@ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
-	@
-	@ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
-	@ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
-	@ 8 * (2^52) or 2^55. However, the value is then multiplied by
-	@ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
-	@ which is less than 32 * (2^52) or 2^57. And when processing
-	@ data we are looking at triple as many addends...
-	@
-	@ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
-	@ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
-	@ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
-	@ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
-	@ instruction accepts 2x32-bit input and writes 2x64-bit result.
-	@ This means that result of reduction have to be compressed upon
-	@ loop wrap-around. This can be done in the process of reduction
-	@ to minimize amount of instructions [as well as amount of
-	@ 128-bit instructions, which benefits low-end processors], but
-	@ one has to watch for H2 (which is narrower than H0) and 5*H4
-	@ not being wider than 58 bits, so that result of right shift
-	@ by 26 bits fits in 32 bits. This is also useful on x86,
-	@ because it allows to use paddd in place for paddq, which
-	@ benefits Atom, where paddq is ridiculously slow.
-
-	vshr.u64	q15,q8,#26
-	vmovn.i64	d16,q8
-	 vshr.u64	q4,q5,#26
-	 vmovn.i64	d10,q5
-	vadd.i64	q9,q9,q15		@ h3 -> h4
-	vbic.i32	d16,#0xfc000000	@ &=0x03ffffff
-	 vadd.i64	q6,q6,q4		@ h0 -> h1
-	 vbic.i32	d10,#0xfc000000
-
-	vshrn.u64	d30,q9,#26
-	vmovn.i64	d18,q9
-	 vshr.u64	q4,q6,#26
-	 vmovn.i64	d12,q6
-	 vadd.i64	q7,q7,q4		@ h1 -> h2
-	vbic.i32	d18,#0xfc000000
-	 vbic.i32	d12,#0xfc000000
-
-	vadd.i32	d10,d10,d30
-	vshl.u32	d30,d30,#2
-	 vshrn.u64	d8,q7,#26
-	 vmovn.i64	d14,q7
-	vadd.i32	d10,d10,d30	@ h4 -> h0
-	 vadd.i32	d16,d16,d8	@ h2 -> h3
-	 vbic.i32	d14,#0xfc000000
-
-	vshr.u32	d30,d10,#26
-	vbic.i32	d10,#0xfc000000
-	 vshr.u32	d8,d16,#26
-	 vbic.i32	d16,#0xfc000000
-	vadd.i32	d12,d12,d30	@ h0 -> h1
-	 vadd.i32	d18,d18,d8	@ h3 -> h4
-
-	subs		r5,r5,#1
-	beq		.Lsquare_break_neon
-
-	add		r6,r0,#(48+0*9*4)
-	add		r7,r0,#(48+1*9*4)
-
-	vtrn.32		d0,d10		@ r^2:r^1
-	vtrn.32		d3,d14
-	vtrn.32		d5,d16
-	vtrn.32		d1,d12
-	vtrn.32		d7,d18
-
-	vshl.u32	d4,d3,#2		@ *5
-	vshl.u32	d6,d5,#2
-	vshl.u32	d2,d1,#2
-	vshl.u32	d8,d7,#2
-	vadd.i32	d4,d4,d3
-	vadd.i32	d2,d2,d1
-	vadd.i32	d6,d6,d5
-	vadd.i32	d8,d8,d7
-
-	vst4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!
-	vst4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!
-	vst4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
-	vst4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
-	vst1.32		{d8[0]},[r6,:32]
-	vst1.32		{d8[1]},[r7,:32]
-
-	b		.Lsquare_neon
-
-.align	4
-.Lsquare_break_neon:
-	add		r6,r0,#(48+2*4*9)
-	add		r7,r0,#(48+3*4*9)
-
-	vmov		d0,d10		@ r^4:r^3
-	vshl.u32	d2,d12,#2		@ *5
-	vmov		d1,d12
-	vshl.u32	d4,d14,#2
-	vmov		d3,d14
-	vshl.u32	d6,d16,#2
-	vmov		d5,d16
-	vshl.u32	d8,d18,#2
-	vmov		d7,d18
-	vadd.i32	d2,d2,d12
-	vadd.i32	d4,d4,d14
-	vadd.i32	d6,d6,d16
-	vadd.i32	d8,d8,d18
-
-	vst4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!
-	vst4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!
-	vst4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
-	vst4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
-	vst1.32		{d8[0]},[r6]
-	vst1.32		{d8[1]},[r7]
-
-.Lno_init_neon:
-	bx	lr				@ bx	lr
-.size	poly1305_init_neon,.-poly1305_init_neon
-
-.type	poly1305_blocks_neon,%function
-.align	5
-poly1305_blocks_neon:
-.Lpoly1305_blocks_neon:
-	ldr	ip,[r0,#36]		@ is_base2_26
-
-	cmp	r2,#64
-	blo	.Lpoly1305_blocks
-
-	stmdb	sp!,{r4-r7}
-	vstmdb	sp!,{d8-d15}		@ ABI specification says so
-
-	tst	ip,ip			@ is_base2_26?
-	bne	.Lbase2_26_neon
-
-	stmdb	sp!,{r1-r3,lr}
-	bl	.Lpoly1305_init_neon
-
-	ldr	r4,[r0,#0]		@ load hash value base 2^32
-	ldr	r5,[r0,#4]
-	ldr	r6,[r0,#8]
-	ldr	r7,[r0,#12]
-	ldr	ip,[r0,#16]
-
-	and	r2,r4,#0x03ffffff	@ base 2^32 -> base 2^26
-	mov	r3,r4,lsr#26
-	 veor	d10,d10,d10
-	mov	r4,r5,lsr#20
-	orr	r3,r3,r5,lsl#6
-	 veor	d12,d12,d12
-	mov	r5,r6,lsr#14
-	orr	r4,r4,r6,lsl#12
-	 veor	d14,d14,d14
-	mov	r6,r7,lsr#8
-	orr	r5,r5,r7,lsl#18
-	 veor	d16,d16,d16
-	and	r3,r3,#0x03ffffff
-	orr	r6,r6,ip,lsl#24
-	 veor	d18,d18,d18
-	and	r4,r4,#0x03ffffff
-	mov	r1,#1
-	and	r5,r5,#0x03ffffff
-	str	r1,[r0,#36]		@ set is_base2_26
-
-	vmov.32	d10[0],r2
-	vmov.32	d12[0],r3
-	vmov.32	d14[0],r4
-	vmov.32	d16[0],r5
-	vmov.32	d18[0],r6
-	adr	r5,.Lzeros
-
-	ldmia	sp!,{r1-r3,lr}
-	b	.Lhash_loaded
-
-.align	4
-.Lbase2_26_neon:
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ load hash value
-
-	veor		d10,d10,d10
-	veor		d12,d12,d12
-	veor		d14,d14,d14
-	veor		d16,d16,d16
-	veor		d18,d18,d18
-	vld4.32		{d10[0],d12[0],d14[0],d16[0]},[r0]!
-	adr		r5,.Lzeros
-	vld1.32		{d18[0]},[r0]
-	sub		r0,r0,#16		@ rewind
-
-.Lhash_loaded:
-	add		r4,r1,#32
-	mov		r3,r3,lsl#24
-	tst		r2,#31
-	beq		.Leven
-
-	vld4.32		{d20[0],d22[0],d24[0],d26[0]},[r1]!
-	vmov.32		d28[0],r3
-	sub		r2,r2,#16
-	add		r4,r1,#32
-
-# ifdef	__ARMEB__
-	vrev32.8	q10,q10
-	vrev32.8	q13,q13
-	vrev32.8	q11,q11
-	vrev32.8	q12,q12
-# endif
-	vsri.u32	d28,d26,#8	@ base 2^32 -> base 2^26
-	vshl.u32	d26,d26,#18
-
-	vsri.u32	d26,d24,#14
-	vshl.u32	d24,d24,#12
-	vadd.i32	d29,d28,d18	@ add hash value and move to #hi
-
-	vbic.i32	d26,#0xfc000000
-	vsri.u32	d24,d22,#20
-	vshl.u32	d22,d22,#6
-
-	vbic.i32	d24,#0xfc000000
-	vsri.u32	d22,d20,#26
-	vadd.i32	d27,d26,d16
-
-	vbic.i32	d20,#0xfc000000
-	vbic.i32	d22,#0xfc000000
-	vadd.i32	d25,d24,d14
-
-	vadd.i32	d21,d20,d10
-	vadd.i32	d23,d22,d12
-
-	mov		r7,r5
-	add		r6,r0,#48
-
-	cmp		r2,r2
-	b		.Long_tail
-
-.align	4
-.Leven:
-	subs		r2,r2,#64
-	it		lo
-	movlo		r4,r5
-
-	vmov.i32	q14,#1<<24		@ padbit, yes, always
-	vld4.32		{d20,d22,d24,d26},[r1]	@ inp[0:1]
-	add		r1,r1,#64
-	vld4.32		{d21,d23,d25,d27},[r4]	@ inp[2:3] (or 0)
-	add		r4,r4,#64
-	itt		hi
-	addhi		r7,r0,#(48+1*9*4)
-	addhi		r6,r0,#(48+3*9*4)
-
-# ifdef	__ARMEB__
-	vrev32.8	q10,q10
-	vrev32.8	q13,q13
-	vrev32.8	q11,q11
-	vrev32.8	q12,q12
-# endif
-	vsri.u32	q14,q13,#8		@ base 2^32 -> base 2^26
-	vshl.u32	q13,q13,#18
-
-	vsri.u32	q13,q12,#14
-	vshl.u32	q12,q12,#12
-
-	vbic.i32	q13,#0xfc000000
-	vsri.u32	q12,q11,#20
-	vshl.u32	q11,q11,#6
-
-	vbic.i32	q12,#0xfc000000
-	vsri.u32	q11,q10,#26
-
-	vbic.i32	q10,#0xfc000000
-	vbic.i32	q11,#0xfc000000
-
-	bls		.Lskip_loop
-
-	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^2
-	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^4
-	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
-	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
-	b		.Loop_neon
-
-.align	5
-.Loop_neon:
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
-	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
-	@   ___________________/
-	@ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
-	@ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
-	@   ___________________/ ____________________/
-	@
-	@ Note that we start with inp[2:3]*r^2. This is because it
-	@ doesn't depend on reduction in previous iteration.
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ d4 = h4*r0 + h3*r1   + h2*r2   + h1*r3   + h0*r4
-	@ d3 = h3*r0 + h2*r1   + h1*r2   + h0*r3   + h4*5*r4
-	@ d2 = h2*r0 + h1*r1   + h0*r2   + h4*5*r3 + h3*5*r4
-	@ d1 = h1*r0 + h0*r1   + h4*5*r2 + h3*5*r3 + h2*5*r4
-	@ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
-
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ inp[2:3]*r^2
-
-	vadd.i32	d24,d24,d14	@ accumulate inp[0:1]
-	vmull.u32	q7,d25,d0[1]
-	vadd.i32	d20,d20,d10
-	vmull.u32	q5,d21,d0[1]
-	vadd.i32	d26,d26,d16
-	vmull.u32	q8,d27,d0[1]
-	vmlal.u32	q7,d23,d1[1]
-	vadd.i32	d22,d22,d12
-	vmull.u32	q6,d23,d0[1]
-
-	vadd.i32	d28,d28,d18
-	vmull.u32	q9,d29,d0[1]
-	subs		r2,r2,#64
-	vmlal.u32	q5,d29,d2[1]
-	it		lo
-	movlo		r4,r5
-	vmlal.u32	q8,d25,d1[1]
-	vld1.32		d8[1],[r7,:32]
-	vmlal.u32	q6,d21,d1[1]
-	vmlal.u32	q9,d27,d1[1]
-
-	vmlal.u32	q5,d27,d4[1]
-	vmlal.u32	q8,d23,d3[1]
-	vmlal.u32	q9,d25,d3[1]
-	vmlal.u32	q6,d29,d4[1]
-	vmlal.u32	q7,d21,d3[1]
-
-	vmlal.u32	q8,d21,d5[1]
-	vmlal.u32	q5,d25,d6[1]
-	vmlal.u32	q9,d23,d5[1]
-	vmlal.u32	q6,d27,d6[1]
-	vmlal.u32	q7,d29,d6[1]
-
-	vmlal.u32	q8,d29,d8[1]
-	vmlal.u32	q5,d23,d8[1]
-	vmlal.u32	q9,d21,d7[1]
-	vmlal.u32	q6,d25,d8[1]
-	vmlal.u32	q7,d27,d8[1]
-
-	vld4.32		{d21,d23,d25,d27},[r4]	@ inp[2:3] (or 0)
-	add		r4,r4,#64
-
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ (hash+inp[0:1])*r^4 and accumulate
-
-	vmlal.u32	q8,d26,d0[0]
-	vmlal.u32	q5,d20,d0[0]
-	vmlal.u32	q9,d28,d0[0]
-	vmlal.u32	q6,d22,d0[0]
-	vmlal.u32	q7,d24,d0[0]
-	vld1.32		d8[0],[r6,:32]
-
-	vmlal.u32	q8,d24,d1[0]
-	vmlal.u32	q5,d28,d2[0]
-	vmlal.u32	q9,d26,d1[0]
-	vmlal.u32	q6,d20,d1[0]
-	vmlal.u32	q7,d22,d1[0]
-
-	vmlal.u32	q8,d22,d3[0]
-	vmlal.u32	q5,d26,d4[0]
-	vmlal.u32	q9,d24,d3[0]
-	vmlal.u32	q6,d28,d4[0]
-	vmlal.u32	q7,d20,d3[0]
-
-	vmlal.u32	q8,d20,d5[0]
-	vmlal.u32	q5,d24,d6[0]
-	vmlal.u32	q9,d22,d5[0]
-	vmlal.u32	q6,d26,d6[0]
-	vmlal.u32	q8,d28,d8[0]
-
-	vmlal.u32	q7,d28,d6[0]
-	vmlal.u32	q5,d22,d8[0]
-	vmlal.u32	q9,d20,d7[0]
-	vmov.i32	q14,#1<<24		@ padbit, yes, always
-	vmlal.u32	q6,d24,d8[0]
-	vmlal.u32	q7,d26,d8[0]
-
-	vld4.32		{d20,d22,d24,d26},[r1]	@ inp[0:1]
-	add		r1,r1,#64
-# ifdef	__ARMEB__
-	vrev32.8	q10,q10
-	vrev32.8	q11,q11
-	vrev32.8	q12,q12
-	vrev32.8	q13,q13
-# endif
-
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ lazy reduction interleaved with base 2^32 -> base 2^26 of
-	@ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14.
-
-	vshr.u64	q15,q8,#26
-	vmovn.i64	d16,q8
-	 vshr.u64	q4,q5,#26
-	 vmovn.i64	d10,q5
-	vadd.i64	q9,q9,q15		@ h3 -> h4
-	vbic.i32	d16,#0xfc000000
-	  vsri.u32	q14,q13,#8		@ base 2^32 -> base 2^26
-	 vadd.i64	q6,q6,q4		@ h0 -> h1
-	  vshl.u32	q13,q13,#18
-	 vbic.i32	d10,#0xfc000000
-
-	vshrn.u64	d30,q9,#26
-	vmovn.i64	d18,q9
-	 vshr.u64	q4,q6,#26
-	 vmovn.i64	d12,q6
-	 vadd.i64	q7,q7,q4		@ h1 -> h2
-	  vsri.u32	q13,q12,#14
-	vbic.i32	d18,#0xfc000000
-	  vshl.u32	q12,q12,#12
-	 vbic.i32	d12,#0xfc000000
-
-	vadd.i32	d10,d10,d30
-	vshl.u32	d30,d30,#2
-	  vbic.i32	q13,#0xfc000000
-	 vshrn.u64	d8,q7,#26
-	 vmovn.i64	d14,q7
-	vaddl.u32	q5,d10,d30	@ h4 -> h0 [widen for a sec]
-	  vsri.u32	q12,q11,#20
-	 vadd.i32	d16,d16,d8	@ h2 -> h3
-	  vshl.u32	q11,q11,#6
-	 vbic.i32	d14,#0xfc000000
-	  vbic.i32	q12,#0xfc000000
-
-	vshrn.u64	d30,q5,#26		@ re-narrow
-	vmovn.i64	d10,q5
-	  vsri.u32	q11,q10,#26
-	  vbic.i32	q10,#0xfc000000
-	 vshr.u32	d8,d16,#26
-	 vbic.i32	d16,#0xfc000000
-	vbic.i32	d10,#0xfc000000
-	vadd.i32	d12,d12,d30	@ h0 -> h1
-	 vadd.i32	d18,d18,d8	@ h3 -> h4
-	  vbic.i32	q11,#0xfc000000
-
-	bhi		.Loop_neon
-
-.Lskip_loop:
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
-
-	add		r7,r0,#(48+0*9*4)
-	add		r6,r0,#(48+1*9*4)
-	adds		r2,r2,#32
-	it		ne
-	movne		r2,#0
-	bne		.Long_tail
-
-	vadd.i32	d25,d24,d14	@ add hash value and move to #hi
-	vadd.i32	d21,d20,d10
-	vadd.i32	d27,d26,d16
-	vadd.i32	d23,d22,d12
-	vadd.i32	d29,d28,d18
-
-.Long_tail:
-	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^1
-	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^2
-
-	vadd.i32	d24,d24,d14	@ can be redundant
-	vmull.u32	q7,d25,d0
-	vadd.i32	d20,d20,d10
-	vmull.u32	q5,d21,d0
-	vadd.i32	d26,d26,d16
-	vmull.u32	q8,d27,d0
-	vadd.i32	d22,d22,d12
-	vmull.u32	q6,d23,d0
-	vadd.i32	d28,d28,d18
-	vmull.u32	q9,d29,d0
-
-	vmlal.u32	q5,d29,d2
-	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
-	vmlal.u32	q8,d25,d1
-	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
-	vmlal.u32	q6,d21,d1
-	vmlal.u32	q9,d27,d1
-	vmlal.u32	q7,d23,d1
-
-	vmlal.u32	q8,d23,d3
-	vld1.32		d8[1],[r7,:32]
-	vmlal.u32	q5,d27,d4
-	vld1.32		d8[0],[r6,:32]
-	vmlal.u32	q9,d25,d3
-	vmlal.u32	q6,d29,d4
-	vmlal.u32	q7,d21,d3
-
-	vmlal.u32	q8,d21,d5
-	 it		ne
-	 addne		r7,r0,#(48+2*9*4)
-	vmlal.u32	q5,d25,d6
-	 it		ne
-	 addne		r6,r0,#(48+3*9*4)
-	vmlal.u32	q9,d23,d5
-	vmlal.u32	q6,d27,d6
-	vmlal.u32	q7,d29,d6
-
-	vmlal.u32	q8,d29,d8
-	 vorn		q0,q0,q0	@ all-ones, can be redundant
-	vmlal.u32	q5,d23,d8
-	 vshr.u64	q0,q0,#38
-	vmlal.u32	q9,d21,d7
-	vmlal.u32	q6,d25,d8
-	vmlal.u32	q7,d27,d8
-
-	beq		.Lshort_tail
-
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ (hash+inp[0:1])*r^4:r^3 and accumulate
-
-	vld4.32		{d0[1],d1[1],d2[1],d3[1]},[r7]!	@ load r^3
-	vld4.32		{d0[0],d1[0],d2[0],d3[0]},[r6]!	@ load r^4
-
-	vmlal.u32	q7,d24,d0
-	vmlal.u32	q5,d20,d0
-	vmlal.u32	q8,d26,d0
-	vmlal.u32	q6,d22,d0
-	vmlal.u32	q9,d28,d0
-
-	vmlal.u32	q5,d28,d2
-	vld4.32		{d4[1],d5[1],d6[1],d7[1]},[r7]!
-	vmlal.u32	q8,d24,d1
-	vld4.32		{d4[0],d5[0],d6[0],d7[0]},[r6]!
-	vmlal.u32	q6,d20,d1
-	vmlal.u32	q9,d26,d1
-	vmlal.u32	q7,d22,d1
-
-	vmlal.u32	q8,d22,d3
-	vld1.32		d8[1],[r7,:32]
-	vmlal.u32	q5,d26,d4
-	vld1.32		d8[0],[r6,:32]
-	vmlal.u32	q9,d24,d3
-	vmlal.u32	q6,d28,d4
-	vmlal.u32	q7,d20,d3
-
-	vmlal.u32	q8,d20,d5
-	vmlal.u32	q5,d24,d6
-	vmlal.u32	q9,d22,d5
-	vmlal.u32	q6,d26,d6
-	vmlal.u32	q7,d28,d6
-
-	vmlal.u32	q8,d28,d8
-	 vorn		q0,q0,q0	@ all-ones
-	vmlal.u32	q5,d22,d8
-	 vshr.u64	q0,q0,#38
-	vmlal.u32	q9,d20,d7
-	vmlal.u32	q6,d24,d8
-	vmlal.u32	q7,d26,d8
-
-.Lshort_tail:
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ horizontal addition
-
-	vadd.i64	d16,d16,d17
-	vadd.i64	d10,d10,d11
-	vadd.i64	d18,d18,d19
-	vadd.i64	d12,d12,d13
-	vadd.i64	d14,d14,d15
-
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ lazy reduction, but without narrowing
-
-	vshr.u64	q15,q8,#26
-	vand.i64	q8,q8,q0
-	 vshr.u64	q4,q5,#26
-	 vand.i64	q5,q5,q0
-	vadd.i64	q9,q9,q15		@ h3 -> h4
-	 vadd.i64	q6,q6,q4		@ h0 -> h1
-
-	vshr.u64	q15,q9,#26
-	vand.i64	q9,q9,q0
-	 vshr.u64	q4,q6,#26
-	 vand.i64	q6,q6,q0
-	 vadd.i64	q7,q7,q4		@ h1 -> h2
-
-	vadd.i64	q5,q5,q15
-	vshl.u64	q15,q15,#2
-	 vshr.u64	q4,q7,#26
-	 vand.i64	q7,q7,q0
-	vadd.i64	q5,q5,q15		@ h4 -> h0
-	 vadd.i64	q8,q8,q4		@ h2 -> h3
-
-	vshr.u64	q15,q5,#26
-	vand.i64	q5,q5,q0
-	 vshr.u64	q4,q8,#26
-	 vand.i64	q8,q8,q0
-	vadd.i64	q6,q6,q15		@ h0 -> h1
-	 vadd.i64	q9,q9,q4		@ h3 -> h4
-
-	cmp		r2,#0
-	bne		.Leven
-
-	@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-	@ store hash value
-
-	vst4.32		{d10[0],d12[0],d14[0],d16[0]},[r0]!
-	vst1.32		{d18[0]},[r0]
-
-	vldmia	sp!,{d8-d15}			@ epilogue
-	ldmia	sp!,{r4-r7}
-	bx	lr					@ bx	lr
-.size	poly1305_blocks_neon,.-poly1305_blocks_neon
-
-.align	5
-.Lzeros:
-.long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-#ifndef	__KERNEL__
-.LOPENSSL_armcap:
-# ifdef	_WIN32
-.word	OPENSSL_armcap_P
-# else
-.word	OPENSSL_armcap_P-.Lpoly1305_init
-# endif
-.comm	OPENSSL_armcap_P,4,4
-.hidden	OPENSSL_armcap_P
-#endif
-#endif
-.asciz	"Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm"
-.align	2
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
deleted file mode 100644
index 6363014a50d79..0000000000000
--- a/arch/arm/crypto/sha256-core.S_shipped
+++ /dev/null
@@ -1,2816 +0,0 @@
-@ SPDX-License-Identifier: GPL-2.0
-
-@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
-@ has relicensed it under the GPLv2. Therefore this program is free software;
-@ you can redistribute it and/or modify it under the terms of the GNU General
-@ Public License version 2 as published by the Free Software Foundation.
-@
-@ The original headers, including the original license headers, are
-@ included below for completeness.
-
-@ ====================================================================
-@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-@ project. The module is, however, dual licensed under OpenSSL and
-@ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see https://www.openssl.org/~appro/cryptogams/.
-@ ====================================================================
-
-@ SHA256 block procedure for ARMv4. May 2007.
-
-@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
-@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
-@ byte [on single-issue Xscale PXA250 core].
-
-@ July 2010.
-@
-@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
-@ Cortex A8 core and ~20 cycles per processed byte.
-
-@ February 2011.
-@
-@ Profiler-assisted and platform-specific optimization resulted in 16%
-@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
-
-@ September 2013.
-@
-@ Add NEON implementation. On Cortex A8 it was measured to process one
-@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
-@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
-@ code (meaning that latter performs sub-optimally, nothing was done
-@ about it).
-
-@ May 2014.
-@
-@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
-
-#ifndef __KERNEL__
-# include "arm_arch.h"
-#else
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
-# define __ARM_MAX_ARCH__ 7
-#endif
-
-.text
-#if __ARM_ARCH__<7
-.code	32
-#else
-.syntax unified
-# ifdef __thumb2__
-.thumb
-# else
-.code   32
-# endif
-#endif
-
-.type	K256,%object
-.align	5
-K256:
-.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-.size	K256,.-K256
-.word	0				@ terminator
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.LOPENSSL_armcap:
-.word	OPENSSL_armcap_P-sha256_block_data_order
-#endif
-.align	5
-
-.global	sha256_block_data_order
-.type	sha256_block_data_order,%function
-sha256_block_data_order:
-.Lsha256_block_data_order:
-#if __ARM_ARCH__<7
-	sub	r3,pc,#8		@ sha256_block_data_order
-#else
-	adr	r3,.Lsha256_block_data_order
-#endif
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-	ldr	r12,.LOPENSSL_armcap
-	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
-	tst	r12,#ARMV8_SHA256
-	bne	.LARMv8
-	tst	r12,#ARMV7_NEON
-	bne	.LNEON
-#endif
-	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
-	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
-	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
-	sub	r14,r3,#256+32	@ K256
-	sub	sp,sp,#16*4		@ alloca(X[16])
-.Loop:
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r5,r6		@ magic
-	eor	r12,r12,r12
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 0
-# if 0==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r8,r8,ror#5
-	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r8,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 0
-	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
-	ldrb	r12,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r12,lsl#8
-	ldrb	r12,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 0==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r8,r8,ror#5
-	orr	r2,r2,r12,lsl#24
-	eor	r0,r0,r8,ror#19	@ Sigma1(e)
-#endif
-	ldr	r12,[r14],#4			@ *K256++
-	add	r11,r11,r2			@ h+=X[i]
-	str	r2,[sp,#0*4]
-	eor	r2,r9,r10
-	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r8
-	add	r11,r11,r12			@ h+=K256[i]
-	eor	r2,r2,r10			@ Ch(e,f,g)
-	eor	r0,r4,r4,ror#11
-	add	r11,r11,r2			@ h+=Ch(e,f,g)
-#if 0==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 0<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r4,r5			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
-	eor	r12,r4,r5			@ a^b, b^c in next round
-	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r4,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r7,r7,r11			@ d+=h
-	eor	r3,r3,r5			@ Maj(a,b,c)
-	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 1
-# if 1==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r7,r7,ror#5
-	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r7,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 1
-	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
-	ldrb	r3,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r3,lsl#8
-	ldrb	r3,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 1==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r7,r7,ror#5
-	orr	r2,r2,r3,lsl#24
-	eor	r0,r0,r7,ror#19	@ Sigma1(e)
-#endif
-	ldr	r3,[r14],#4			@ *K256++
-	add	r10,r10,r2			@ h+=X[i]
-	str	r2,[sp,#1*4]
-	eor	r2,r8,r9
-	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r7
-	add	r10,r10,r3			@ h+=K256[i]
-	eor	r2,r2,r9			@ Ch(e,f,g)
-	eor	r0,r11,r11,ror#11
-	add	r10,r10,r2			@ h+=Ch(e,f,g)
-#if 1==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 1<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r11,r4			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
-	eor	r3,r11,r4			@ a^b, b^c in next round
-	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r11,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r6,r6,r10			@ d+=h
-	eor	r12,r12,r4			@ Maj(a,b,c)
-	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 2
-# if 2==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r6,r6,ror#5
-	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r6,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 2
-	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
-	ldrb	r12,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r12,lsl#8
-	ldrb	r12,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 2==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r6,r6,ror#5
-	orr	r2,r2,r12,lsl#24
-	eor	r0,r0,r6,ror#19	@ Sigma1(e)
-#endif
-	ldr	r12,[r14],#4			@ *K256++
-	add	r9,r9,r2			@ h+=X[i]
-	str	r2,[sp,#2*4]
-	eor	r2,r7,r8
-	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r6
-	add	r9,r9,r12			@ h+=K256[i]
-	eor	r2,r2,r8			@ Ch(e,f,g)
-	eor	r0,r10,r10,ror#11
-	add	r9,r9,r2			@ h+=Ch(e,f,g)
-#if 2==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 2<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r10,r11			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
-	eor	r12,r10,r11			@ a^b, b^c in next round
-	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r10,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r5,r5,r9			@ d+=h
-	eor	r3,r3,r11			@ Maj(a,b,c)
-	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 3
-# if 3==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r5,r5,ror#5
-	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r5,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 3
-	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
-	ldrb	r3,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r3,lsl#8
-	ldrb	r3,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 3==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r5,r5,ror#5
-	orr	r2,r2,r3,lsl#24
-	eor	r0,r0,r5,ror#19	@ Sigma1(e)
-#endif
-	ldr	r3,[r14],#4			@ *K256++
-	add	r8,r8,r2			@ h+=X[i]
-	str	r2,[sp,#3*4]
-	eor	r2,r6,r7
-	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r5
-	add	r8,r8,r3			@ h+=K256[i]
-	eor	r2,r2,r7			@ Ch(e,f,g)
-	eor	r0,r9,r9,ror#11
-	add	r8,r8,r2			@ h+=Ch(e,f,g)
-#if 3==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 3<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r9,r10			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
-	eor	r3,r9,r10			@ a^b, b^c in next round
-	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r9,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r4,r4,r8			@ d+=h
-	eor	r12,r12,r10			@ Maj(a,b,c)
-	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 4
-# if 4==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r4,r4,ror#5
-	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r4,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 4
-	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
-	ldrb	r12,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r12,lsl#8
-	ldrb	r12,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 4==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r4,r4,ror#5
-	orr	r2,r2,r12,lsl#24
-	eor	r0,r0,r4,ror#19	@ Sigma1(e)
-#endif
-	ldr	r12,[r14],#4			@ *K256++
-	add	r7,r7,r2			@ h+=X[i]
-	str	r2,[sp,#4*4]
-	eor	r2,r5,r6
-	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r4
-	add	r7,r7,r12			@ h+=K256[i]
-	eor	r2,r2,r6			@ Ch(e,f,g)
-	eor	r0,r8,r8,ror#11
-	add	r7,r7,r2			@ h+=Ch(e,f,g)
-#if 4==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 4<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r8,r9			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
-	eor	r12,r8,r9			@ a^b, b^c in next round
-	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r8,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r11,r11,r7			@ d+=h
-	eor	r3,r3,r9			@ Maj(a,b,c)
-	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 5
-# if 5==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r11,r11,ror#5
-	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r11,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 5
-	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
-	ldrb	r3,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r3,lsl#8
-	ldrb	r3,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 5==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r11,r11,ror#5
-	orr	r2,r2,r3,lsl#24
-	eor	r0,r0,r11,ror#19	@ Sigma1(e)
-#endif
-	ldr	r3,[r14],#4			@ *K256++
-	add	r6,r6,r2			@ h+=X[i]
-	str	r2,[sp,#5*4]
-	eor	r2,r4,r5
-	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r11
-	add	r6,r6,r3			@ h+=K256[i]
-	eor	r2,r2,r5			@ Ch(e,f,g)
-	eor	r0,r7,r7,ror#11
-	add	r6,r6,r2			@ h+=Ch(e,f,g)
-#if 5==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 5<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r7,r8			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
-	eor	r3,r7,r8			@ a^b, b^c in next round
-	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r7,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r10,r10,r6			@ d+=h
-	eor	r12,r12,r8			@ Maj(a,b,c)
-	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 6
-# if 6==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r10,r10,ror#5
-	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r10,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 6
-	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
-	ldrb	r12,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r12,lsl#8
-	ldrb	r12,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 6==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r10,r10,ror#5
-	orr	r2,r2,r12,lsl#24
-	eor	r0,r0,r10,ror#19	@ Sigma1(e)
-#endif
-	ldr	r12,[r14],#4			@ *K256++
-	add	r5,r5,r2			@ h+=X[i]
-	str	r2,[sp,#6*4]
-	eor	r2,r11,r4
-	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r10
-	add	r5,r5,r12			@ h+=K256[i]
-	eor	r2,r2,r4			@ Ch(e,f,g)
-	eor	r0,r6,r6,ror#11
-	add	r5,r5,r2			@ h+=Ch(e,f,g)
-#if 6==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 6<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r6,r7			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
-	eor	r12,r6,r7			@ a^b, b^c in next round
-	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r6,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r9,r9,r5			@ d+=h
-	eor	r3,r3,r7			@ Maj(a,b,c)
-	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 7
-# if 7==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r9,r9,ror#5
-	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r9,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 7
-	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
-	ldrb	r3,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r3,lsl#8
-	ldrb	r3,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 7==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r9,r9,ror#5
-	orr	r2,r2,r3,lsl#24
-	eor	r0,r0,r9,ror#19	@ Sigma1(e)
-#endif
-	ldr	r3,[r14],#4			@ *K256++
-	add	r4,r4,r2			@ h+=X[i]
-	str	r2,[sp,#7*4]
-	eor	r2,r10,r11
-	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r9
-	add	r4,r4,r3			@ h+=K256[i]
-	eor	r2,r2,r11			@ Ch(e,f,g)
-	eor	r0,r5,r5,ror#11
-	add	r4,r4,r2			@ h+=Ch(e,f,g)
-#if 7==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 7<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r5,r6			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
-	eor	r3,r5,r6			@ a^b, b^c in next round
-	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r5,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r8,r8,r4			@ d+=h
-	eor	r12,r12,r6			@ Maj(a,b,c)
-	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 8
-# if 8==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r8,r8,ror#5
-	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r8,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 8
-	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
-	ldrb	r12,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r12,lsl#8
-	ldrb	r12,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 8==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r8,r8,ror#5
-	orr	r2,r2,r12,lsl#24
-	eor	r0,r0,r8,ror#19	@ Sigma1(e)
-#endif
-	ldr	r12,[r14],#4			@ *K256++
-	add	r11,r11,r2			@ h+=X[i]
-	str	r2,[sp,#8*4]
-	eor	r2,r9,r10
-	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r8
-	add	r11,r11,r12			@ h+=K256[i]
-	eor	r2,r2,r10			@ Ch(e,f,g)
-	eor	r0,r4,r4,ror#11
-	add	r11,r11,r2			@ h+=Ch(e,f,g)
-#if 8==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 8<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r4,r5			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
-	eor	r12,r4,r5			@ a^b, b^c in next round
-	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r4,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r7,r7,r11			@ d+=h
-	eor	r3,r3,r5			@ Maj(a,b,c)
-	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 9
-# if 9==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r7,r7,ror#5
-	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r7,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 9
-	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
-	ldrb	r3,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r3,lsl#8
-	ldrb	r3,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 9==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r7,r7,ror#5
-	orr	r2,r2,r3,lsl#24
-	eor	r0,r0,r7,ror#19	@ Sigma1(e)
-#endif
-	ldr	r3,[r14],#4			@ *K256++
-	add	r10,r10,r2			@ h+=X[i]
-	str	r2,[sp,#9*4]
-	eor	r2,r8,r9
-	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r7
-	add	r10,r10,r3			@ h+=K256[i]
-	eor	r2,r2,r9			@ Ch(e,f,g)
-	eor	r0,r11,r11,ror#11
-	add	r10,r10,r2			@ h+=Ch(e,f,g)
-#if 9==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 9<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r11,r4			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
-	eor	r3,r11,r4			@ a^b, b^c in next round
-	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r11,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r6,r6,r10			@ d+=h
-	eor	r12,r12,r4			@ Maj(a,b,c)
-	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 10
-# if 10==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r6,r6,ror#5
-	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r6,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 10
-	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
-	ldrb	r12,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r12,lsl#8
-	ldrb	r12,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 10==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r6,r6,ror#5
-	orr	r2,r2,r12,lsl#24
-	eor	r0,r0,r6,ror#19	@ Sigma1(e)
-#endif
-	ldr	r12,[r14],#4			@ *K256++
-	add	r9,r9,r2			@ h+=X[i]
-	str	r2,[sp,#10*4]
-	eor	r2,r7,r8
-	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r6
-	add	r9,r9,r12			@ h+=K256[i]
-	eor	r2,r2,r8			@ Ch(e,f,g)
-	eor	r0,r10,r10,ror#11
-	add	r9,r9,r2			@ h+=Ch(e,f,g)
-#if 10==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 10<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r10,r11			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
-	eor	r12,r10,r11			@ a^b, b^c in next round
-	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r10,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r5,r5,r9			@ d+=h
-	eor	r3,r3,r11			@ Maj(a,b,c)
-	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 11
-# if 11==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r5,r5,ror#5
-	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r5,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 11
-	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
-	ldrb	r3,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r3,lsl#8
-	ldrb	r3,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 11==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r5,r5,ror#5
-	orr	r2,r2,r3,lsl#24
-	eor	r0,r0,r5,ror#19	@ Sigma1(e)
-#endif
-	ldr	r3,[r14],#4			@ *K256++
-	add	r8,r8,r2			@ h+=X[i]
-	str	r2,[sp,#11*4]
-	eor	r2,r6,r7
-	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r5
-	add	r8,r8,r3			@ h+=K256[i]
-	eor	r2,r2,r7			@ Ch(e,f,g)
-	eor	r0,r9,r9,ror#11
-	add	r8,r8,r2			@ h+=Ch(e,f,g)
-#if 11==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 11<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r9,r10			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
-	eor	r3,r9,r10			@ a^b, b^c in next round
-	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r9,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r4,r4,r8			@ d+=h
-	eor	r12,r12,r10			@ Maj(a,b,c)
-	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 12
-# if 12==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r4,r4,ror#5
-	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r4,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 12
-	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
-	ldrb	r12,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r12,lsl#8
-	ldrb	r12,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 12==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r4,r4,ror#5
-	orr	r2,r2,r12,lsl#24
-	eor	r0,r0,r4,ror#19	@ Sigma1(e)
-#endif
-	ldr	r12,[r14],#4			@ *K256++
-	add	r7,r7,r2			@ h+=X[i]
-	str	r2,[sp,#12*4]
-	eor	r2,r5,r6
-	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r4
-	add	r7,r7,r12			@ h+=K256[i]
-	eor	r2,r2,r6			@ Ch(e,f,g)
-	eor	r0,r8,r8,ror#11
-	add	r7,r7,r2			@ h+=Ch(e,f,g)
-#if 12==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 12<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r8,r9			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
-	eor	r12,r8,r9			@ a^b, b^c in next round
-	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r8,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r11,r11,r7			@ d+=h
-	eor	r3,r3,r9			@ Maj(a,b,c)
-	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 13
-# if 13==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r11,r11,ror#5
-	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r11,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 13
-	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
-	ldrb	r3,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r3,lsl#8
-	ldrb	r3,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 13==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r11,r11,ror#5
-	orr	r2,r2,r3,lsl#24
-	eor	r0,r0,r11,ror#19	@ Sigma1(e)
-#endif
-	ldr	r3,[r14],#4			@ *K256++
-	add	r6,r6,r2			@ h+=X[i]
-	str	r2,[sp,#13*4]
-	eor	r2,r4,r5
-	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r11
-	add	r6,r6,r3			@ h+=K256[i]
-	eor	r2,r2,r5			@ Ch(e,f,g)
-	eor	r0,r7,r7,ror#11
-	add	r6,r6,r2			@ h+=Ch(e,f,g)
-#if 13==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 13<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r7,r8			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
-	eor	r3,r7,r8			@ a^b, b^c in next round
-	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r7,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r10,r10,r6			@ d+=h
-	eor	r12,r12,r8			@ Maj(a,b,c)
-	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 14
-# if 14==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r10,r10,ror#5
-	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r10,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 14
-	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
-	ldrb	r12,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r12,lsl#8
-	ldrb	r12,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 14==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r10,r10,ror#5
-	orr	r2,r2,r12,lsl#24
-	eor	r0,r0,r10,ror#19	@ Sigma1(e)
-#endif
-	ldr	r12,[r14],#4			@ *K256++
-	add	r5,r5,r2			@ h+=X[i]
-	str	r2,[sp,#14*4]
-	eor	r2,r11,r4
-	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r10
-	add	r5,r5,r12			@ h+=K256[i]
-	eor	r2,r2,r4			@ Ch(e,f,g)
-	eor	r0,r6,r6,ror#11
-	add	r5,r5,r2			@ h+=Ch(e,f,g)
-#if 14==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 14<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r6,r7			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
-	eor	r12,r6,r7			@ a^b, b^c in next round
-	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r6,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r9,r9,r5			@ d+=h
-	eor	r3,r3,r7			@ Maj(a,b,c)
-	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	@ ldr	r2,[r1],#4			@ 15
-# if 15==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r9,r9,ror#5
-	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
-	eor	r0,r0,r9,ror#19	@ Sigma1(e)
-# ifndef __ARMEB__
-	rev	r2,r2
-# endif
-#else
-	@ ldrb	r2,[r1,#3]			@ 15
-	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
-	ldrb	r3,[r1,#2]
-	ldrb	r0,[r1,#1]
-	orr	r2,r2,r3,lsl#8
-	ldrb	r3,[r1],#4
-	orr	r2,r2,r0,lsl#16
-# if 15==15
-	str	r1,[sp,#17*4]			@ make room for r1
-# endif
-	eor	r0,r9,r9,ror#5
-	orr	r2,r2,r3,lsl#24
-	eor	r0,r0,r9,ror#19	@ Sigma1(e)
-#endif
-	ldr	r3,[r14],#4			@ *K256++
-	add	r4,r4,r2			@ h+=X[i]
-	str	r2,[sp,#15*4]
-	eor	r2,r10,r11
-	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r9
-	add	r4,r4,r3			@ h+=K256[i]
-	eor	r2,r2,r11			@ Ch(e,f,g)
-	eor	r0,r5,r5,ror#11
-	add	r4,r4,r2			@ h+=Ch(e,f,g)
-#if 15==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 15<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r5,r6			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
-	eor	r3,r5,r6			@ a^b, b^c in next round
-	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r5,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r8,r8,r4			@ d+=h
-	eor	r12,r12,r6			@ Maj(a,b,c)
-	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
-.Lrounds_16_xx:
-	@ ldr	r2,[sp,#1*4]		@ 16
-	@ ldr	r1,[sp,#14*4]
-	mov	r0,r2,ror#7
-	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
-	mov	r12,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r12,r12,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#0*4]
-	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#9*4]
-
-	add	r12,r12,r0
-	eor	r0,r8,r8,ror#5	@ from BODY_00_15
-	add	r2,r2,r12
-	eor	r0,r0,r8,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r12,[r14],#4			@ *K256++
-	add	r11,r11,r2			@ h+=X[i]
-	str	r2,[sp,#0*4]
-	eor	r2,r9,r10
-	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r8
-	add	r11,r11,r12			@ h+=K256[i]
-	eor	r2,r2,r10			@ Ch(e,f,g)
-	eor	r0,r4,r4,ror#11
-	add	r11,r11,r2			@ h+=Ch(e,f,g)
-#if 16==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 16<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r4,r5			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
-	eor	r12,r4,r5			@ a^b, b^c in next round
-	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r4,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r7,r7,r11			@ d+=h
-	eor	r3,r3,r5			@ Maj(a,b,c)
-	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#2*4]		@ 17
-	@ ldr	r1,[sp,#15*4]
-	mov	r0,r2,ror#7
-	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
-	mov	r3,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r3,r3,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#1*4]
-	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#10*4]
-
-	add	r3,r3,r0
-	eor	r0,r7,r7,ror#5	@ from BODY_00_15
-	add	r2,r2,r3
-	eor	r0,r0,r7,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r3,[r14],#4			@ *K256++
-	add	r10,r10,r2			@ h+=X[i]
-	str	r2,[sp,#1*4]
-	eor	r2,r8,r9
-	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r7
-	add	r10,r10,r3			@ h+=K256[i]
-	eor	r2,r2,r9			@ Ch(e,f,g)
-	eor	r0,r11,r11,ror#11
-	add	r10,r10,r2			@ h+=Ch(e,f,g)
-#if 17==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 17<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r11,r4			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
-	eor	r3,r11,r4			@ a^b, b^c in next round
-	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r11,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r6,r6,r10			@ d+=h
-	eor	r12,r12,r4			@ Maj(a,b,c)
-	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#3*4]		@ 18
-	@ ldr	r1,[sp,#0*4]
-	mov	r0,r2,ror#7
-	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
-	mov	r12,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r12,r12,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#2*4]
-	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#11*4]
-
-	add	r12,r12,r0
-	eor	r0,r6,r6,ror#5	@ from BODY_00_15
-	add	r2,r2,r12
-	eor	r0,r0,r6,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r12,[r14],#4			@ *K256++
-	add	r9,r9,r2			@ h+=X[i]
-	str	r2,[sp,#2*4]
-	eor	r2,r7,r8
-	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r6
-	add	r9,r9,r12			@ h+=K256[i]
-	eor	r2,r2,r8			@ Ch(e,f,g)
-	eor	r0,r10,r10,ror#11
-	add	r9,r9,r2			@ h+=Ch(e,f,g)
-#if 18==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 18<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r10,r11			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
-	eor	r12,r10,r11			@ a^b, b^c in next round
-	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r10,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r5,r5,r9			@ d+=h
-	eor	r3,r3,r11			@ Maj(a,b,c)
-	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#4*4]		@ 19
-	@ ldr	r1,[sp,#1*4]
-	mov	r0,r2,ror#7
-	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
-	mov	r3,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r3,r3,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#3*4]
-	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#12*4]
-
-	add	r3,r3,r0
-	eor	r0,r5,r5,ror#5	@ from BODY_00_15
-	add	r2,r2,r3
-	eor	r0,r0,r5,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r3,[r14],#4			@ *K256++
-	add	r8,r8,r2			@ h+=X[i]
-	str	r2,[sp,#3*4]
-	eor	r2,r6,r7
-	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r5
-	add	r8,r8,r3			@ h+=K256[i]
-	eor	r2,r2,r7			@ Ch(e,f,g)
-	eor	r0,r9,r9,ror#11
-	add	r8,r8,r2			@ h+=Ch(e,f,g)
-#if 19==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 19<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r9,r10			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
-	eor	r3,r9,r10			@ a^b, b^c in next round
-	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r9,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r4,r4,r8			@ d+=h
-	eor	r12,r12,r10			@ Maj(a,b,c)
-	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#5*4]		@ 20
-	@ ldr	r1,[sp,#2*4]
-	mov	r0,r2,ror#7
-	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
-	mov	r12,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r12,r12,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#4*4]
-	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#13*4]
-
-	add	r12,r12,r0
-	eor	r0,r4,r4,ror#5	@ from BODY_00_15
-	add	r2,r2,r12
-	eor	r0,r0,r4,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r12,[r14],#4			@ *K256++
-	add	r7,r7,r2			@ h+=X[i]
-	str	r2,[sp,#4*4]
-	eor	r2,r5,r6
-	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r4
-	add	r7,r7,r12			@ h+=K256[i]
-	eor	r2,r2,r6			@ Ch(e,f,g)
-	eor	r0,r8,r8,ror#11
-	add	r7,r7,r2			@ h+=Ch(e,f,g)
-#if 20==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 20<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r8,r9			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
-	eor	r12,r8,r9			@ a^b, b^c in next round
-	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r8,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r11,r11,r7			@ d+=h
-	eor	r3,r3,r9			@ Maj(a,b,c)
-	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#6*4]		@ 21
-	@ ldr	r1,[sp,#3*4]
-	mov	r0,r2,ror#7
-	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
-	mov	r3,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r3,r3,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#5*4]
-	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#14*4]
-
-	add	r3,r3,r0
-	eor	r0,r11,r11,ror#5	@ from BODY_00_15
-	add	r2,r2,r3
-	eor	r0,r0,r11,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r3,[r14],#4			@ *K256++
-	add	r6,r6,r2			@ h+=X[i]
-	str	r2,[sp,#5*4]
-	eor	r2,r4,r5
-	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r11
-	add	r6,r6,r3			@ h+=K256[i]
-	eor	r2,r2,r5			@ Ch(e,f,g)
-	eor	r0,r7,r7,ror#11
-	add	r6,r6,r2			@ h+=Ch(e,f,g)
-#if 21==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 21<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r7,r8			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
-	eor	r3,r7,r8			@ a^b, b^c in next round
-	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r7,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r10,r10,r6			@ d+=h
-	eor	r12,r12,r8			@ Maj(a,b,c)
-	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#7*4]		@ 22
-	@ ldr	r1,[sp,#4*4]
-	mov	r0,r2,ror#7
-	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
-	mov	r12,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r12,r12,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#6*4]
-	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#15*4]
-
-	add	r12,r12,r0
-	eor	r0,r10,r10,ror#5	@ from BODY_00_15
-	add	r2,r2,r12
-	eor	r0,r0,r10,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r12,[r14],#4			@ *K256++
-	add	r5,r5,r2			@ h+=X[i]
-	str	r2,[sp,#6*4]
-	eor	r2,r11,r4
-	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r10
-	add	r5,r5,r12			@ h+=K256[i]
-	eor	r2,r2,r4			@ Ch(e,f,g)
-	eor	r0,r6,r6,ror#11
-	add	r5,r5,r2			@ h+=Ch(e,f,g)
-#if 22==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 22<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r6,r7			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
-	eor	r12,r6,r7			@ a^b, b^c in next round
-	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r6,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r9,r9,r5			@ d+=h
-	eor	r3,r3,r7			@ Maj(a,b,c)
-	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#8*4]		@ 23
-	@ ldr	r1,[sp,#5*4]
-	mov	r0,r2,ror#7
-	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
-	mov	r3,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r3,r3,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#7*4]
-	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#0*4]
-
-	add	r3,r3,r0
-	eor	r0,r9,r9,ror#5	@ from BODY_00_15
-	add	r2,r2,r3
-	eor	r0,r0,r9,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r3,[r14],#4			@ *K256++
-	add	r4,r4,r2			@ h+=X[i]
-	str	r2,[sp,#7*4]
-	eor	r2,r10,r11
-	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r9
-	add	r4,r4,r3			@ h+=K256[i]
-	eor	r2,r2,r11			@ Ch(e,f,g)
-	eor	r0,r5,r5,ror#11
-	add	r4,r4,r2			@ h+=Ch(e,f,g)
-#if 23==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 23<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r5,r6			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
-	eor	r3,r5,r6			@ a^b, b^c in next round
-	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r5,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r8,r8,r4			@ d+=h
-	eor	r12,r12,r6			@ Maj(a,b,c)
-	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#9*4]		@ 24
-	@ ldr	r1,[sp,#6*4]
-	mov	r0,r2,ror#7
-	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
-	mov	r12,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r12,r12,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#8*4]
-	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#1*4]
-
-	add	r12,r12,r0
-	eor	r0,r8,r8,ror#5	@ from BODY_00_15
-	add	r2,r2,r12
-	eor	r0,r0,r8,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r12,[r14],#4			@ *K256++
-	add	r11,r11,r2			@ h+=X[i]
-	str	r2,[sp,#8*4]
-	eor	r2,r9,r10
-	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r8
-	add	r11,r11,r12			@ h+=K256[i]
-	eor	r2,r2,r10			@ Ch(e,f,g)
-	eor	r0,r4,r4,ror#11
-	add	r11,r11,r2			@ h+=Ch(e,f,g)
-#if 24==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 24<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r4,r5			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
-	eor	r12,r4,r5			@ a^b, b^c in next round
-	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r4,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r7,r7,r11			@ d+=h
-	eor	r3,r3,r5			@ Maj(a,b,c)
-	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#10*4]		@ 25
-	@ ldr	r1,[sp,#7*4]
-	mov	r0,r2,ror#7
-	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
-	mov	r3,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r3,r3,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#9*4]
-	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#2*4]
-
-	add	r3,r3,r0
-	eor	r0,r7,r7,ror#5	@ from BODY_00_15
-	add	r2,r2,r3
-	eor	r0,r0,r7,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r3,[r14],#4			@ *K256++
-	add	r10,r10,r2			@ h+=X[i]
-	str	r2,[sp,#9*4]
-	eor	r2,r8,r9
-	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r7
-	add	r10,r10,r3			@ h+=K256[i]
-	eor	r2,r2,r9			@ Ch(e,f,g)
-	eor	r0,r11,r11,ror#11
-	add	r10,r10,r2			@ h+=Ch(e,f,g)
-#if 25==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 25<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r11,r4			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
-	eor	r3,r11,r4			@ a^b, b^c in next round
-	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r11,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r6,r6,r10			@ d+=h
-	eor	r12,r12,r4			@ Maj(a,b,c)
-	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#11*4]		@ 26
-	@ ldr	r1,[sp,#8*4]
-	mov	r0,r2,ror#7
-	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
-	mov	r12,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r12,r12,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#10*4]
-	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#3*4]
-
-	add	r12,r12,r0
-	eor	r0,r6,r6,ror#5	@ from BODY_00_15
-	add	r2,r2,r12
-	eor	r0,r0,r6,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r12,[r14],#4			@ *K256++
-	add	r9,r9,r2			@ h+=X[i]
-	str	r2,[sp,#10*4]
-	eor	r2,r7,r8
-	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r6
-	add	r9,r9,r12			@ h+=K256[i]
-	eor	r2,r2,r8			@ Ch(e,f,g)
-	eor	r0,r10,r10,ror#11
-	add	r9,r9,r2			@ h+=Ch(e,f,g)
-#if 26==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 26<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r10,r11			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
-	eor	r12,r10,r11			@ a^b, b^c in next round
-	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r10,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r5,r5,r9			@ d+=h
-	eor	r3,r3,r11			@ Maj(a,b,c)
-	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#12*4]		@ 27
-	@ ldr	r1,[sp,#9*4]
-	mov	r0,r2,ror#7
-	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
-	mov	r3,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r3,r3,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#11*4]
-	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#4*4]
-
-	add	r3,r3,r0
-	eor	r0,r5,r5,ror#5	@ from BODY_00_15
-	add	r2,r2,r3
-	eor	r0,r0,r5,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r3,[r14],#4			@ *K256++
-	add	r8,r8,r2			@ h+=X[i]
-	str	r2,[sp,#11*4]
-	eor	r2,r6,r7
-	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r5
-	add	r8,r8,r3			@ h+=K256[i]
-	eor	r2,r2,r7			@ Ch(e,f,g)
-	eor	r0,r9,r9,ror#11
-	add	r8,r8,r2			@ h+=Ch(e,f,g)
-#if 27==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 27<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r9,r10			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
-	eor	r3,r9,r10			@ a^b, b^c in next round
-	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r9,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r4,r4,r8			@ d+=h
-	eor	r12,r12,r10			@ Maj(a,b,c)
-	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#13*4]		@ 28
-	@ ldr	r1,[sp,#10*4]
-	mov	r0,r2,ror#7
-	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
-	mov	r12,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r12,r12,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#12*4]
-	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#5*4]
-
-	add	r12,r12,r0
-	eor	r0,r4,r4,ror#5	@ from BODY_00_15
-	add	r2,r2,r12
-	eor	r0,r0,r4,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r12,[r14],#4			@ *K256++
-	add	r7,r7,r2			@ h+=X[i]
-	str	r2,[sp,#12*4]
-	eor	r2,r5,r6
-	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r4
-	add	r7,r7,r12			@ h+=K256[i]
-	eor	r2,r2,r6			@ Ch(e,f,g)
-	eor	r0,r8,r8,ror#11
-	add	r7,r7,r2			@ h+=Ch(e,f,g)
-#if 28==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 28<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r8,r9			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
-	eor	r12,r8,r9			@ a^b, b^c in next round
-	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r8,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r11,r11,r7			@ d+=h
-	eor	r3,r3,r9			@ Maj(a,b,c)
-	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#14*4]		@ 29
-	@ ldr	r1,[sp,#11*4]
-	mov	r0,r2,ror#7
-	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
-	mov	r3,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r3,r3,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#13*4]
-	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#6*4]
-
-	add	r3,r3,r0
-	eor	r0,r11,r11,ror#5	@ from BODY_00_15
-	add	r2,r2,r3
-	eor	r0,r0,r11,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r3,[r14],#4			@ *K256++
-	add	r6,r6,r2			@ h+=X[i]
-	str	r2,[sp,#13*4]
-	eor	r2,r4,r5
-	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r11
-	add	r6,r6,r3			@ h+=K256[i]
-	eor	r2,r2,r5			@ Ch(e,f,g)
-	eor	r0,r7,r7,ror#11
-	add	r6,r6,r2			@ h+=Ch(e,f,g)
-#if 29==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 29<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r7,r8			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
-	eor	r3,r7,r8			@ a^b, b^c in next round
-	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r7,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r10,r10,r6			@ d+=h
-	eor	r12,r12,r8			@ Maj(a,b,c)
-	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#15*4]		@ 30
-	@ ldr	r1,[sp,#12*4]
-	mov	r0,r2,ror#7
-	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
-	mov	r12,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r12,r12,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#14*4]
-	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#7*4]
-
-	add	r12,r12,r0
-	eor	r0,r10,r10,ror#5	@ from BODY_00_15
-	add	r2,r2,r12
-	eor	r0,r0,r10,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r12,[r14],#4			@ *K256++
-	add	r5,r5,r2			@ h+=X[i]
-	str	r2,[sp,#14*4]
-	eor	r2,r11,r4
-	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r10
-	add	r5,r5,r12			@ h+=K256[i]
-	eor	r2,r2,r4			@ Ch(e,f,g)
-	eor	r0,r6,r6,ror#11
-	add	r5,r5,r2			@ h+=Ch(e,f,g)
-#if 30==31
-	and	r12,r12,#0xff
-	cmp	r12,#0xf2			@ done?
-#endif
-#if 30<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r12,r6,r7			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
-	eor	r12,r6,r7			@ a^b, b^c in next round
-	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r6,ror#20	@ Sigma0(a)
-	and	r3,r3,r12			@ (b^c)&=(a^b)
-	add	r9,r9,r5			@ d+=h
-	eor	r3,r3,r7			@ Maj(a,b,c)
-	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
-	@ ldr	r2,[sp,#0*4]		@ 31
-	@ ldr	r1,[sp,#13*4]
-	mov	r0,r2,ror#7
-	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
-	mov	r3,r1,ror#17
-	eor	r0,r0,r2,ror#18
-	eor	r3,r3,r1,ror#19
-	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
-	ldr	r2,[sp,#15*4]
-	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
-	ldr	r1,[sp,#8*4]
-
-	add	r3,r3,r0
-	eor	r0,r9,r9,ror#5	@ from BODY_00_15
-	add	r2,r2,r3
-	eor	r0,r0,r9,ror#19	@ Sigma1(e)
-	add	r2,r2,r1			@ X[i]
-	ldr	r3,[r14],#4			@ *K256++
-	add	r4,r4,r2			@ h+=X[i]
-	str	r2,[sp,#15*4]
-	eor	r2,r10,r11
-	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
-	and	r2,r2,r9
-	add	r4,r4,r3			@ h+=K256[i]
-	eor	r2,r2,r11			@ Ch(e,f,g)
-	eor	r0,r5,r5,ror#11
-	add	r4,r4,r2			@ h+=Ch(e,f,g)
-#if 31==31
-	and	r3,r3,#0xff
-	cmp	r3,#0xf2			@ done?
-#endif
-#if 31<15
-# if __ARM_ARCH__>=7
-	ldr	r2,[r1],#4			@ prefetch
-# else
-	ldrb	r2,[r1,#3]
-# endif
-	eor	r3,r5,r6			@ a^b, b^c in next round
-#else
-	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
-	eor	r3,r5,r6			@ a^b, b^c in next round
-	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
-#endif
-	eor	r0,r0,r5,ror#20	@ Sigma0(a)
-	and	r12,r12,r3			@ (b^c)&=(a^b)
-	add	r8,r8,r4			@ d+=h
-	eor	r12,r12,r6			@ Maj(a,b,c)
-	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
-	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
-	ite	eq			@ Thumb2 thing, sanity check in ARM
-#endif
-	ldreq	r3,[sp,#16*4]		@ pull ctx
-	bne	.Lrounds_16_xx
-
-	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
-	ldr	r0,[r3,#0]
-	ldr	r2,[r3,#4]
-	ldr	r12,[r3,#8]
-	add	r4,r4,r0
-	ldr	r0,[r3,#12]
-	add	r5,r5,r2
-	ldr	r2,[r3,#16]
-	add	r6,r6,r12
-	ldr	r12,[r3,#20]
-	add	r7,r7,r0
-	ldr	r0,[r3,#24]
-	add	r8,r8,r2
-	ldr	r2,[r3,#28]
-	add	r9,r9,r12
-	ldr	r1,[sp,#17*4]		@ pull inp
-	ldr	r12,[sp,#18*4]		@ pull inp+len
-	add	r10,r10,r0
-	add	r11,r11,r2
-	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
-	cmp	r1,r12
-	sub	r14,r14,#256	@ rewind Ktbl
-	bne	.Loop
-
-	add	sp,sp,#19*4	@ destroy frame
-#if __ARM_ARCH__>=5
-	ldmia	sp!,{r4-r11,pc}
-#else
-	ldmia	sp!,{r4-r11,lr}
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	sha256_block_data_order,.-sha256_block_data_order
-#if __ARM_MAX_ARCH__>=7
-.arch	armv7-a
-.fpu	neon
-
-.global	sha256_block_data_order_neon
-.type	sha256_block_data_order_neon,%function
-.align	4
-sha256_block_data_order_neon:
-.LNEON:
-	stmdb	sp!,{r4-r12,lr}
-
-	sub	r11,sp,#16*4+16
-	adr	r14,.Lsha256_block_data_order
-	sub	r14,r14,#.Lsha256_block_data_order-K256
-	bic	r11,r11,#15		@ align for 128-bit stores
-	mov	r12,sp
-	mov	sp,r11			@ alloca
-	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
-
-	vld1.8		{q0},[r1]!
-	vld1.8		{q1},[r1]!
-	vld1.8		{q2},[r1]!
-	vld1.8		{q3},[r1]!
-	vld1.32		{q8},[r14,:128]!
-	vld1.32		{q9},[r14,:128]!
-	vld1.32		{q10},[r14,:128]!
-	vld1.32		{q11},[r14,:128]!
-	vrev32.8	q0,q0		@ yes, even on
-	str		r0,[sp,#64]
-	vrev32.8	q1,q1		@ big-endian
-	str		r1,[sp,#68]
-	mov		r1,sp
-	vrev32.8	q2,q2
-	str		r2,[sp,#72]
-	vrev32.8	q3,q3
-	str		r12,[sp,#76]		@ save original sp
-	vadd.i32	q8,q8,q0
-	vadd.i32	q9,q9,q1
-	vst1.32		{q8},[r1,:128]!
-	vadd.i32	q10,q10,q2
-	vst1.32		{q9},[r1,:128]!
-	vadd.i32	q11,q11,q3
-	vst1.32		{q10},[r1,:128]!
-	vst1.32		{q11},[r1,:128]!
-
-	ldmia		r0,{r4-r11}
-	sub		r1,r1,#64
-	ldr		r2,[sp,#0]
-	eor		r12,r12,r12
-	eor		r3,r5,r6
-	b		.L_00_48
-
-.align	4
-.L_00_48:
-	vext.8	q8,q0,q1,#4
-	add	r11,r11,r2
-	eor	r2,r9,r10
-	eor	r0,r8,r8,ror#5
-	vext.8	q9,q2,q3,#4
-	add	r4,r4,r12
-	and	r2,r2,r8
-	eor	r12,r0,r8,ror#19
-	vshr.u32	q10,q8,#7
-	eor	r0,r4,r4,ror#11
-	eor	r2,r2,r10
-	vadd.i32	q0,q0,q9
-	add	r11,r11,r12,ror#6
-	eor	r12,r4,r5
-	vshr.u32	q9,q8,#3
-	eor	r0,r0,r4,ror#20
-	add	r11,r11,r2
-	vsli.32	q10,q8,#25
-	ldr	r2,[sp,#4]
-	and	r3,r3,r12
-	vshr.u32	q11,q8,#18
-	add	r7,r7,r11
-	add	r11,r11,r0,ror#2
-	eor	r3,r3,r5
-	veor	q9,q9,q10
-	add	r10,r10,r2
-	vsli.32	q11,q8,#14
-	eor	r2,r8,r9
-	eor	r0,r7,r7,ror#5
-	vshr.u32	d24,d7,#17
-	add	r11,r11,r3
-	and	r2,r2,r7
-	veor	q9,q9,q11
-	eor	r3,r0,r7,ror#19
-	eor	r0,r11,r11,ror#11
-	vsli.32	d24,d7,#15
-	eor	r2,r2,r9
-	add	r10,r10,r3,ror#6
-	vshr.u32	d25,d7,#10
-	eor	r3,r11,r4
-	eor	r0,r0,r11,ror#20
-	vadd.i32	q0,q0,q9
-	add	r10,r10,r2
-	ldr	r2,[sp,#8]
-	veor	d25,d25,d24
-	and	r12,r12,r3
-	add	r6,r6,r10
-	vshr.u32	d24,d7,#19
-	add	r10,r10,r0,ror#2
-	eor	r12,r12,r4
-	vsli.32	d24,d7,#13
-	add	r9,r9,r2
-	eor	r2,r7,r8
-	veor	d25,d25,d24
-	eor	r0,r6,r6,ror#5
-	add	r10,r10,r12
-	vadd.i32	d0,d0,d25
-	and	r2,r2,r6
-	eor	r12,r0,r6,ror#19
-	vshr.u32	d24,d0,#17
-	eor	r0,r10,r10,ror#11
-	eor	r2,r2,r8
-	vsli.32	d24,d0,#15
-	add	r9,r9,r12,ror#6
-	eor	r12,r10,r11
-	vshr.u32	d25,d0,#10
-	eor	r0,r0,r10,ror#20
-	add	r9,r9,r2
-	veor	d25,d25,d24
-	ldr	r2,[sp,#12]
-	and	r3,r3,r12
-	vshr.u32	d24,d0,#19
-	add	r5,r5,r9
-	add	r9,r9,r0,ror#2
-	eor	r3,r3,r11
-	vld1.32	{q8},[r14,:128]!
-	add	r8,r8,r2
-	vsli.32	d24,d0,#13
-	eor	r2,r6,r7
-	eor	r0,r5,r5,ror#5
-	veor	d25,d25,d24
-	add	r9,r9,r3
-	and	r2,r2,r5
-	vadd.i32	d1,d1,d25
-	eor	r3,r0,r5,ror#19
-	eor	r0,r9,r9,ror#11
-	vadd.i32	q8,q8,q0
-	eor	r2,r2,r7
-	add	r8,r8,r3,ror#6
-	eor	r3,r9,r10
-	eor	r0,r0,r9,ror#20
-	add	r8,r8,r2
-	ldr	r2,[sp,#16]
-	and	r12,r12,r3
-	add	r4,r4,r8
-	vst1.32	{q8},[r1,:128]!
-	add	r8,r8,r0,ror#2
-	eor	r12,r12,r10
-	vext.8	q8,q1,q2,#4
-	add	r7,r7,r2
-	eor	r2,r5,r6
-	eor	r0,r4,r4,ror#5
-	vext.8	q9,q3,q0,#4
-	add	r8,r8,r12
-	and	r2,r2,r4
-	eor	r12,r0,r4,ror#19
-	vshr.u32	q10,q8,#7
-	eor	r0,r8,r8,ror#11
-	eor	r2,r2,r6
-	vadd.i32	q1,q1,q9
-	add	r7,r7,r12,ror#6
-	eor	r12,r8,r9
-	vshr.u32	q9,q8,#3
-	eor	r0,r0,r8,ror#20
-	add	r7,r7,r2
-	vsli.32	q10,q8,#25
-	ldr	r2,[sp,#20]
-	and	r3,r3,r12
-	vshr.u32	q11,q8,#18
-	add	r11,r11,r7
-	add	r7,r7,r0,ror#2
-	eor	r3,r3,r9
-	veor	q9,q9,q10
-	add	r6,r6,r2
-	vsli.32	q11,q8,#14
-	eor	r2,r4,r5
-	eor	r0,r11,r11,ror#5
-	vshr.u32	d24,d1,#17
-	add	r7,r7,r3
-	and	r2,r2,r11
-	veor	q9,q9,q11
-	eor	r3,r0,r11,ror#19
-	eor	r0,r7,r7,ror#11
-	vsli.32	d24,d1,#15
-	eor	r2,r2,r5
-	add	r6,r6,r3,ror#6
-	vshr.u32	d25,d1,#10
-	eor	r3,r7,r8
-	eor	r0,r0,r7,ror#20
-	vadd.i32	q1,q1,q9
-	add	r6,r6,r2
-	ldr	r2,[sp,#24]
-	veor	d25,d25,d24
-	and	r12,r12,r3
-	add	r10,r10,r6
-	vshr.u32	d24,d1,#19
-	add	r6,r6,r0,ror#2
-	eor	r12,r12,r8
-	vsli.32	d24,d1,#13
-	add	r5,r5,r2
-	eor	r2,r11,r4
-	veor	d25,d25,d24
-	eor	r0,r10,r10,ror#5
-	add	r6,r6,r12
-	vadd.i32	d2,d2,d25
-	and	r2,r2,r10
-	eor	r12,r0,r10,ror#19
-	vshr.u32	d24,d2,#17
-	eor	r0,r6,r6,ror#11
-	eor	r2,r2,r4
-	vsli.32	d24,d2,#15
-	add	r5,r5,r12,ror#6
-	eor	r12,r6,r7
-	vshr.u32	d25,d2,#10
-	eor	r0,r0,r6,ror#20
-	add	r5,r5,r2
-	veor	d25,d25,d24
-	ldr	r2,[sp,#28]
-	and	r3,r3,r12
-	vshr.u32	d24,d2,#19
-	add	r9,r9,r5
-	add	r5,r5,r0,ror#2
-	eor	r3,r3,r7
-	vld1.32	{q8},[r14,:128]!
-	add	r4,r4,r2
-	vsli.32	d24,d2,#13
-	eor	r2,r10,r11
-	eor	r0,r9,r9,ror#5
-	veor	d25,d25,d24
-	add	r5,r5,r3
-	and	r2,r2,r9
-	vadd.i32	d3,d3,d25
-	eor	r3,r0,r9,ror#19
-	eor	r0,r5,r5,ror#11
-	vadd.i32	q8,q8,q1
-	eor	r2,r2,r11
-	add	r4,r4,r3,ror#6
-	eor	r3,r5,r6
-	eor	r0,r0,r5,ror#20
-	add	r4,r4,r2
-	ldr	r2,[sp,#32]
-	and	r12,r12,r3
-	add	r8,r8,r4
-	vst1.32	{q8},[r1,:128]!
-	add	r4,r4,r0,ror#2
-	eor	r12,r12,r6
-	vext.8	q8,q2,q3,#4
-	add	r11,r11,r2
-	eor	r2,r9,r10
-	eor	r0,r8,r8,ror#5
-	vext.8	q9,q0,q1,#4
-	add	r4,r4,r12
-	and	r2,r2,r8
-	eor	r12,r0,r8,ror#19
-	vshr.u32	q10,q8,#7
-	eor	r0,r4,r4,ror#11
-	eor	r2,r2,r10
-	vadd.i32	q2,q2,q9
-	add	r11,r11,r12,ror#6
-	eor	r12,r4,r5
-	vshr.u32	q9,q8,#3
-	eor	r0,r0,r4,ror#20
-	add	r11,r11,r2
-	vsli.32	q10,q8,#25
-	ldr	r2,[sp,#36]
-	and	r3,r3,r12
-	vshr.u32	q11,q8,#18
-	add	r7,r7,r11
-	add	r11,r11,r0,ror#2
-	eor	r3,r3,r5
-	veor	q9,q9,q10
-	add	r10,r10,r2
-	vsli.32	q11,q8,#14
-	eor	r2,r8,r9
-	eor	r0,r7,r7,ror#5
-	vshr.u32	d24,d3,#17
-	add	r11,r11,r3
-	and	r2,r2,r7
-	veor	q9,q9,q11
-	eor	r3,r0,r7,ror#19
-	eor	r0,r11,r11,ror#11
-	vsli.32	d24,d3,#15
-	eor	r2,r2,r9
-	add	r10,r10,r3,ror#6
-	vshr.u32	d25,d3,#10
-	eor	r3,r11,r4
-	eor	r0,r0,r11,ror#20
-	vadd.i32	q2,q2,q9
-	add	r10,r10,r2
-	ldr	r2,[sp,#40]
-	veor	d25,d25,d24
-	and	r12,r12,r3
-	add	r6,r6,r10
-	vshr.u32	d24,d3,#19
-	add	r10,r10,r0,ror#2
-	eor	r12,r12,r4
-	vsli.32	d24,d3,#13
-	add	r9,r9,r2
-	eor	r2,r7,r8
-	veor	d25,d25,d24
-	eor	r0,r6,r6,ror#5
-	add	r10,r10,r12
-	vadd.i32	d4,d4,d25
-	and	r2,r2,r6
-	eor	r12,r0,r6,ror#19
-	vshr.u32	d24,d4,#17
-	eor	r0,r10,r10,ror#11
-	eor	r2,r2,r8
-	vsli.32	d24,d4,#15
-	add	r9,r9,r12,ror#6
-	eor	r12,r10,r11
-	vshr.u32	d25,d4,#10
-	eor	r0,r0,r10,ror#20
-	add	r9,r9,r2
-	veor	d25,d25,d24
-	ldr	r2,[sp,#44]
-	and	r3,r3,r12
-	vshr.u32	d24,d4,#19
-	add	r5,r5,r9
-	add	r9,r9,r0,ror#2
-	eor	r3,r3,r11
-	vld1.32	{q8},[r14,:128]!
-	add	r8,r8,r2
-	vsli.32	d24,d4,#13
-	eor	r2,r6,r7
-	eor	r0,r5,r5,ror#5
-	veor	d25,d25,d24
-	add	r9,r9,r3
-	and	r2,r2,r5
-	vadd.i32	d5,d5,d25
-	eor	r3,r0,r5,ror#19
-	eor	r0,r9,r9,ror#11
-	vadd.i32	q8,q8,q2
-	eor	r2,r2,r7
-	add	r8,r8,r3,ror#6
-	eor	r3,r9,r10
-	eor	r0,r0,r9,ror#20
-	add	r8,r8,r2
-	ldr	r2,[sp,#48]
-	and	r12,r12,r3
-	add	r4,r4,r8
-	vst1.32	{q8},[r1,:128]!
-	add	r8,r8,r0,ror#2
-	eor	r12,r12,r10
-	vext.8	q8,q3,q0,#4
-	add	r7,r7,r2
-	eor	r2,r5,r6
-	eor	r0,r4,r4,ror#5
-	vext.8	q9,q1,q2,#4
-	add	r8,r8,r12
-	and	r2,r2,r4
-	eor	r12,r0,r4,ror#19
-	vshr.u32	q10,q8,#7
-	eor	r0,r8,r8,ror#11
-	eor	r2,r2,r6
-	vadd.i32	q3,q3,q9
-	add	r7,r7,r12,ror#6
-	eor	r12,r8,r9
-	vshr.u32	q9,q8,#3
-	eor	r0,r0,r8,ror#20
-	add	r7,r7,r2
-	vsli.32	q10,q8,#25
-	ldr	r2,[sp,#52]
-	and	r3,r3,r12
-	vshr.u32	q11,q8,#18
-	add	r11,r11,r7
-	add	r7,r7,r0,ror#2
-	eor	r3,r3,r9
-	veor	q9,q9,q10
-	add	r6,r6,r2
-	vsli.32	q11,q8,#14
-	eor	r2,r4,r5
-	eor	r0,r11,r11,ror#5
-	vshr.u32	d24,d5,#17
-	add	r7,r7,r3
-	and	r2,r2,r11
-	veor	q9,q9,q11
-	eor	r3,r0,r11,ror#19
-	eor	r0,r7,r7,ror#11
-	vsli.32	d24,d5,#15
-	eor	r2,r2,r5
-	add	r6,r6,r3,ror#6
-	vshr.u32	d25,d5,#10
-	eor	r3,r7,r8
-	eor	r0,r0,r7,ror#20
-	vadd.i32	q3,q3,q9
-	add	r6,r6,r2
-	ldr	r2,[sp,#56]
-	veor	d25,d25,d24
-	and	r12,r12,r3
-	add	r10,r10,r6
-	vshr.u32	d24,d5,#19
-	add	r6,r6,r0,ror#2
-	eor	r12,r12,r8
-	vsli.32	d24,d5,#13
-	add	r5,r5,r2
-	eor	r2,r11,r4
-	veor	d25,d25,d24
-	eor	r0,r10,r10,ror#5
-	add	r6,r6,r12
-	vadd.i32	d6,d6,d25
-	and	r2,r2,r10
-	eor	r12,r0,r10,ror#19
-	vshr.u32	d24,d6,#17
-	eor	r0,r6,r6,ror#11
-	eor	r2,r2,r4
-	vsli.32	d24,d6,#15
-	add	r5,r5,r12,ror#6
-	eor	r12,r6,r7
-	vshr.u32	d25,d6,#10
-	eor	r0,r0,r6,ror#20
-	add	r5,r5,r2
-	veor	d25,d25,d24
-	ldr	r2,[sp,#60]
-	and	r3,r3,r12
-	vshr.u32	d24,d6,#19
-	add	r9,r9,r5
-	add	r5,r5,r0,ror#2
-	eor	r3,r3,r7
-	vld1.32	{q8},[r14,:128]!
-	add	r4,r4,r2
-	vsli.32	d24,d6,#13
-	eor	r2,r10,r11
-	eor	r0,r9,r9,ror#5
-	veor	d25,d25,d24
-	add	r5,r5,r3
-	and	r2,r2,r9
-	vadd.i32	d7,d7,d25
-	eor	r3,r0,r9,ror#19
-	eor	r0,r5,r5,ror#11
-	vadd.i32	q8,q8,q3
-	eor	r2,r2,r11
-	add	r4,r4,r3,ror#6
-	eor	r3,r5,r6
-	eor	r0,r0,r5,ror#20
-	add	r4,r4,r2
-	ldr	r2,[r14]
-	and	r12,r12,r3
-	add	r8,r8,r4
-	vst1.32	{q8},[r1,:128]!
-	add	r4,r4,r0,ror#2
-	eor	r12,r12,r6
-	teq	r2,#0				@ check for K256 terminator
-	ldr	r2,[sp,#0]
-	sub	r1,r1,#64
-	bne	.L_00_48
-
-	ldr		r1,[sp,#68]
-	ldr		r0,[sp,#72]
-	sub		r14,r14,#256	@ rewind r14
-	teq		r1,r0
-	it		eq
-	subeq		r1,r1,#64		@ avoid SEGV
-	vld1.8		{q0},[r1]!		@ load next input block
-	vld1.8		{q1},[r1]!
-	vld1.8		{q2},[r1]!
-	vld1.8		{q3},[r1]!
-	it		ne
-	strne		r1,[sp,#68]
-	mov		r1,sp
-	add	r11,r11,r2
-	eor	r2,r9,r10
-	eor	r0,r8,r8,ror#5
-	add	r4,r4,r12
-	vld1.32	{q8},[r14,:128]!
-	and	r2,r2,r8
-	eor	r12,r0,r8,ror#19
-	eor	r0,r4,r4,ror#11
-	eor	r2,r2,r10
-	vrev32.8	q0,q0
-	add	r11,r11,r12,ror#6
-	eor	r12,r4,r5
-	eor	r0,r0,r4,ror#20
-	add	r11,r11,r2
-	vadd.i32	q8,q8,q0
-	ldr	r2,[sp,#4]
-	and	r3,r3,r12
-	add	r7,r7,r11
-	add	r11,r11,r0,ror#2
-	eor	r3,r3,r5
-	add	r10,r10,r2
-	eor	r2,r8,r9
-	eor	r0,r7,r7,ror#5
-	add	r11,r11,r3
-	and	r2,r2,r7
-	eor	r3,r0,r7,ror#19
-	eor	r0,r11,r11,ror#11
-	eor	r2,r2,r9
-	add	r10,r10,r3,ror#6
-	eor	r3,r11,r4
-	eor	r0,r0,r11,ror#20
-	add	r10,r10,r2
-	ldr	r2,[sp,#8]
-	and	r12,r12,r3
-	add	r6,r6,r10
-	add	r10,r10,r0,ror#2
-	eor	r12,r12,r4
-	add	r9,r9,r2
-	eor	r2,r7,r8
-	eor	r0,r6,r6,ror#5
-	add	r10,r10,r12
-	and	r2,r2,r6
-	eor	r12,r0,r6,ror#19
-	eor	r0,r10,r10,ror#11
-	eor	r2,r2,r8
-	add	r9,r9,r12,ror#6
-	eor	r12,r10,r11
-	eor	r0,r0,r10,ror#20
-	add	r9,r9,r2
-	ldr	r2,[sp,#12]
-	and	r3,r3,r12
-	add	r5,r5,r9
-	add	r9,r9,r0,ror#2
-	eor	r3,r3,r11
-	add	r8,r8,r2
-	eor	r2,r6,r7
-	eor	r0,r5,r5,ror#5
-	add	r9,r9,r3
-	and	r2,r2,r5
-	eor	r3,r0,r5,ror#19
-	eor	r0,r9,r9,ror#11
-	eor	r2,r2,r7
-	add	r8,r8,r3,ror#6
-	eor	r3,r9,r10
-	eor	r0,r0,r9,ror#20
-	add	r8,r8,r2
-	ldr	r2,[sp,#16]
-	and	r12,r12,r3
-	add	r4,r4,r8
-	add	r8,r8,r0,ror#2
-	eor	r12,r12,r10
-	vst1.32	{q8},[r1,:128]!
-	add	r7,r7,r2
-	eor	r2,r5,r6
-	eor	r0,r4,r4,ror#5
-	add	r8,r8,r12
-	vld1.32	{q8},[r14,:128]!
-	and	r2,r2,r4
-	eor	r12,r0,r4,ror#19
-	eor	r0,r8,r8,ror#11
-	eor	r2,r2,r6
-	vrev32.8	q1,q1
-	add	r7,r7,r12,ror#6
-	eor	r12,r8,r9
-	eor	r0,r0,r8,ror#20
-	add	r7,r7,r2
-	vadd.i32	q8,q8,q1
-	ldr	r2,[sp,#20]
-	and	r3,r3,r12
-	add	r11,r11,r7
-	add	r7,r7,r0,ror#2
-	eor	r3,r3,r9
-	add	r6,r6,r2
-	eor	r2,r4,r5
-	eor	r0,r11,r11,ror#5
-	add	r7,r7,r3
-	and	r2,r2,r11
-	eor	r3,r0,r11,ror#19
-	eor	r0,r7,r7,ror#11
-	eor	r2,r2,r5
-	add	r6,r6,r3,ror#6
-	eor	r3,r7,r8
-	eor	r0,r0,r7,ror#20
-	add	r6,r6,r2
-	ldr	r2,[sp,#24]
-	and	r12,r12,r3
-	add	r10,r10,r6
-	add	r6,r6,r0,ror#2
-	eor	r12,r12,r8
-	add	r5,r5,r2
-	eor	r2,r11,r4
-	eor	r0,r10,r10,ror#5
-	add	r6,r6,r12
-	and	r2,r2,r10
-	eor	r12,r0,r10,ror#19
-	eor	r0,r6,r6,ror#11
-	eor	r2,r2,r4
-	add	r5,r5,r12,ror#6
-	eor	r12,r6,r7
-	eor	r0,r0,r6,ror#20
-	add	r5,r5,r2
-	ldr	r2,[sp,#28]
-	and	r3,r3,r12
-	add	r9,r9,r5
-	add	r5,r5,r0,ror#2
-	eor	r3,r3,r7
-	add	r4,r4,r2
-	eor	r2,r10,r11
-	eor	r0,r9,r9,ror#5
-	add	r5,r5,r3
-	and	r2,r2,r9
-	eor	r3,r0,r9,ror#19
-	eor	r0,r5,r5,ror#11
-	eor	r2,r2,r11
-	add	r4,r4,r3,ror#6
-	eor	r3,r5,r6
-	eor	r0,r0,r5,ror#20
-	add	r4,r4,r2
-	ldr	r2,[sp,#32]
-	and	r12,r12,r3
-	add	r8,r8,r4
-	add	r4,r4,r0,ror#2
-	eor	r12,r12,r6
-	vst1.32	{q8},[r1,:128]!
-	add	r11,r11,r2
-	eor	r2,r9,r10
-	eor	r0,r8,r8,ror#5
-	add	r4,r4,r12
-	vld1.32	{q8},[r14,:128]!
-	and	r2,r2,r8
-	eor	r12,r0,r8,ror#19
-	eor	r0,r4,r4,ror#11
-	eor	r2,r2,r10
-	vrev32.8	q2,q2
-	add	r11,r11,r12,ror#6
-	eor	r12,r4,r5
-	eor	r0,r0,r4,ror#20
-	add	r11,r11,r2
-	vadd.i32	q8,q8,q2
-	ldr	r2,[sp,#36]
-	and	r3,r3,r12
-	add	r7,r7,r11
-	add	r11,r11,r0,ror#2
-	eor	r3,r3,r5
-	add	r10,r10,r2
-	eor	r2,r8,r9
-	eor	r0,r7,r7,ror#5
-	add	r11,r11,r3
-	and	r2,r2,r7
-	eor	r3,r0,r7,ror#19
-	eor	r0,r11,r11,ror#11
-	eor	r2,r2,r9
-	add	r10,r10,r3,ror#6
-	eor	r3,r11,r4
-	eor	r0,r0,r11,ror#20
-	add	r10,r10,r2
-	ldr	r2,[sp,#40]
-	and	r12,r12,r3
-	add	r6,r6,r10
-	add	r10,r10,r0,ror#2
-	eor	r12,r12,r4
-	add	r9,r9,r2
-	eor	r2,r7,r8
-	eor	r0,r6,r6,ror#5
-	add	r10,r10,r12
-	and	r2,r2,r6
-	eor	r12,r0,r6,ror#19
-	eor	r0,r10,r10,ror#11
-	eor	r2,r2,r8
-	add	r9,r9,r12,ror#6
-	eor	r12,r10,r11
-	eor	r0,r0,r10,ror#20
-	add	r9,r9,r2
-	ldr	r2,[sp,#44]
-	and	r3,r3,r12
-	add	r5,r5,r9
-	add	r9,r9,r0,ror#2
-	eor	r3,r3,r11
-	add	r8,r8,r2
-	eor	r2,r6,r7
-	eor	r0,r5,r5,ror#5
-	add	r9,r9,r3
-	and	r2,r2,r5
-	eor	r3,r0,r5,ror#19
-	eor	r0,r9,r9,ror#11
-	eor	r2,r2,r7
-	add	r8,r8,r3,ror#6
-	eor	r3,r9,r10
-	eor	r0,r0,r9,ror#20
-	add	r8,r8,r2
-	ldr	r2,[sp,#48]
-	and	r12,r12,r3
-	add	r4,r4,r8
-	add	r8,r8,r0,ror#2
-	eor	r12,r12,r10
-	vst1.32	{q8},[r1,:128]!
-	add	r7,r7,r2
-	eor	r2,r5,r6
-	eor	r0,r4,r4,ror#5
-	add	r8,r8,r12
-	vld1.32	{q8},[r14,:128]!
-	and	r2,r2,r4
-	eor	r12,r0,r4,ror#19
-	eor	r0,r8,r8,ror#11
-	eor	r2,r2,r6
-	vrev32.8	q3,q3
-	add	r7,r7,r12,ror#6
-	eor	r12,r8,r9
-	eor	r0,r0,r8,ror#20
-	add	r7,r7,r2
-	vadd.i32	q8,q8,q3
-	ldr	r2,[sp,#52]
-	and	r3,r3,r12
-	add	r11,r11,r7
-	add	r7,r7,r0,ror#2
-	eor	r3,r3,r9
-	add	r6,r6,r2
-	eor	r2,r4,r5
-	eor	r0,r11,r11,ror#5
-	add	r7,r7,r3
-	and	r2,r2,r11
-	eor	r3,r0,r11,ror#19
-	eor	r0,r7,r7,ror#11
-	eor	r2,r2,r5
-	add	r6,r6,r3,ror#6
-	eor	r3,r7,r8
-	eor	r0,r0,r7,ror#20
-	add	r6,r6,r2
-	ldr	r2,[sp,#56]
-	and	r12,r12,r3
-	add	r10,r10,r6
-	add	r6,r6,r0,ror#2
-	eor	r12,r12,r8
-	add	r5,r5,r2
-	eor	r2,r11,r4
-	eor	r0,r10,r10,ror#5
-	add	r6,r6,r12
-	and	r2,r2,r10
-	eor	r12,r0,r10,ror#19
-	eor	r0,r6,r6,ror#11
-	eor	r2,r2,r4
-	add	r5,r5,r12,ror#6
-	eor	r12,r6,r7
-	eor	r0,r0,r6,ror#20
-	add	r5,r5,r2
-	ldr	r2,[sp,#60]
-	and	r3,r3,r12
-	add	r9,r9,r5
-	add	r5,r5,r0,ror#2
-	eor	r3,r3,r7
-	add	r4,r4,r2
-	eor	r2,r10,r11
-	eor	r0,r9,r9,ror#5
-	add	r5,r5,r3
-	and	r2,r2,r9
-	eor	r3,r0,r9,ror#19
-	eor	r0,r5,r5,ror#11
-	eor	r2,r2,r11
-	add	r4,r4,r3,ror#6
-	eor	r3,r5,r6
-	eor	r0,r0,r5,ror#20
-	add	r4,r4,r2
-	ldr	r2,[sp,#64]
-	and	r12,r12,r3
-	add	r8,r8,r4
-	add	r4,r4,r0,ror#2
-	eor	r12,r12,r6
-	vst1.32	{q8},[r1,:128]!
-	ldr	r0,[r2,#0]
-	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
-	ldr	r12,[r2,#4]
-	ldr	r3,[r2,#8]
-	ldr	r1,[r2,#12]
-	add	r4,r4,r0			@ accumulate
-	ldr	r0,[r2,#16]
-	add	r5,r5,r12
-	ldr	r12,[r2,#20]
-	add	r6,r6,r3
-	ldr	r3,[r2,#24]
-	add	r7,r7,r1
-	ldr	r1,[r2,#28]
-	add	r8,r8,r0
-	str	r4,[r2],#4
-	add	r9,r9,r12
-	str	r5,[r2],#4
-	add	r10,r10,r3
-	str	r6,[r2],#4
-	add	r11,r11,r1
-	str	r7,[r2],#4
-	stmia	r2,{r8-r11}
-
-	ittte	ne
-	movne	r1,sp
-	ldrne	r2,[sp,#0]
-	eorne	r12,r12,r12
-	ldreq	sp,[sp,#76]			@ restore original sp
-	itt	ne
-	eorne	r3,r5,r6
-	bne	.L_00_48
-
-	ldmia	sp!,{r4-r12,pc}
-.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
-#endif
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-
-# ifdef __thumb2__
-#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
-# else
-#  define INST(a,b,c,d)	.byte	a,b,c,d
-# endif
-
-.type	sha256_block_data_order_armv8,%function
-.align	5
-sha256_block_data_order_armv8:
-.LARMv8:
-	vld1.32	{q0,q1},[r0]
-# ifdef __thumb2__
-	adr	r3,.LARMv8
-	sub	r3,r3,#.LARMv8-K256
-# else
-	adrl	r3,K256
-# endif
-	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
-
-.Loop_v8:
-	vld1.8		{q8-q9},[r1]!
-	vld1.8		{q10-q11},[r1]!
-	vld1.32		{q12},[r3]!
-	vrev32.8	q8,q8
-	vrev32.8	q9,q9
-	vrev32.8	q10,q10
-	vrev32.8	q11,q11
-	vmov		q14,q0	@ offload
-	vmov		q15,q1
-	teq		r1,r2
-	vld1.32		{q13},[r3]!
-	vadd.i32	q12,q12,q8
-	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
-	vmov		q2,q0
-	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
-	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
-	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
-	vld1.32		{q12},[r3]!
-	vadd.i32	q13,q13,q9
-	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
-	vmov		q2,q0
-	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
-	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
-	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
-	vld1.32		{q13},[r3]!
-	vadd.i32	q12,q12,q10
-	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
-	vmov		q2,q0
-	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
-	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
-	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
-	vld1.32		{q12},[r3]!
-	vadd.i32	q13,q13,q11
-	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
-	vmov		q2,q0
-	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
-	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
-	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
-	vld1.32		{q13},[r3]!
-	vadd.i32	q12,q12,q8
-	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
-	vmov		q2,q0
-	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
-	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
-	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
-	vld1.32		{q12},[r3]!
-	vadd.i32	q13,q13,q9
-	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
-	vmov		q2,q0
-	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
-	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
-	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
-	vld1.32		{q13},[r3]!
-	vadd.i32	q12,q12,q10
-	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
-	vmov		q2,q0
-	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
-	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
-	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
-	vld1.32		{q12},[r3]!
-	vadd.i32	q13,q13,q11
-	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
-	vmov		q2,q0
-	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
-	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
-	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
-	vld1.32		{q13},[r3]!
-	vadd.i32	q12,q12,q8
-	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
-	vmov		q2,q0
-	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
-	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
-	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
-	vld1.32		{q12},[r3]!
-	vadd.i32	q13,q13,q9
-	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
-	vmov		q2,q0
-	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
-	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
-	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
-	vld1.32		{q13},[r3]!
-	vadd.i32	q12,q12,q10
-	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
-	vmov		q2,q0
-	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
-	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
-	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
-	vld1.32		{q12},[r3]!
-	vadd.i32	q13,q13,q11
-	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
-	vmov		q2,q0
-	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
-	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
-	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
-	vld1.32		{q13},[r3]!
-	vadd.i32	q12,q12,q8
-	vmov		q2,q0
-	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
-	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
-
-	vld1.32		{q12},[r3]!
-	vadd.i32	q13,q13,q9
-	vmov		q2,q0
-	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
-	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
-
-	vld1.32		{q13},[r3]
-	vadd.i32	q12,q12,q10
-	sub		r3,r3,#256-16	@ rewind
-	vmov		q2,q0
-	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
-	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
-
-	vadd.i32	q13,q13,q11
-	vmov		q2,q0
-	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
-	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
-
-	vadd.i32	q0,q0,q14
-	vadd.i32	q1,q1,q15
-	it		ne
-	bne		.Loop_v8
-
-	vst1.32		{q0,q1},[r0]
-
-	bx	lr		@ bx lr
-.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
-#endif
-.asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align	2
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.comm   OPENSSL_armcap_P,4,4
-#endif
diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped
deleted file mode 100644
index 03014624f2ab5..0000000000000
--- a/arch/arm/crypto/sha512-core.S_shipped
+++ /dev/null
@@ -1,1869 +0,0 @@
-@ SPDX-License-Identifier: GPL-2.0
-
-@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
-@ has relicensed it under the GPLv2. Therefore this program is free software;
-@ you can redistribute it and/or modify it under the terms of the GNU General
-@ Public License version 2 as published by the Free Software Foundation.
-@
-@ The original headers, including the original license headers, are
-@ included below for completeness.
-
-@ ====================================================================
-@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-@ project. The module is, however, dual licensed under OpenSSL and
-@ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see https://www.openssl.org/~appro/cryptogams/.
-@ ====================================================================
-
-@ SHA512 block procedure for ARMv4. September 2007.
-
-@ This code is ~4.5 (four and a half) times faster than code generated
-@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
-@ Xscale PXA250 core].
-@
-@ July 2010.
-@
-@ Rescheduling for dual-issue pipeline resulted in 6% improvement on
-@ Cortex A8 core and ~40 cycles per processed byte.
-
-@ February 2011.
-@
-@ Profiler-assisted and platform-specific optimization resulted in 7%
-@ improvement on Coxtex A8 core and ~38 cycles per byte.
-
-@ March 2011.
-@
-@ Add NEON implementation. On Cortex A8 it was measured to process
-@ one byte in 23.3 cycles or ~60% faster than integer-only code.
-
-@ August 2012.
-@
-@ Improve NEON performance by 12% on Snapdragon S4. In absolute
-@ terms it's 22.6 cycles per byte, which is disappointing result.
-@ Technical writers asserted that 3-way S4 pipeline can sustain
-@ multiple NEON instructions per cycle, but dual NEON issue could
-@ not be observed, see https://www.openssl.org/~appro/Snapdragon-S4.html
-@ for further details. On side note Cortex-A15 processes one byte in
-@ 16 cycles.
-
-@ Byte order [in]dependence. =========================================
-@
-@ Originally caller was expected to maintain specific *dword* order in
-@ h[0-7], namely with most significant dword at *lower* address, which
-@ was reflected in below two parameters as 0 and 4. Now caller is
-@ expected to maintain native byte order for whole 64-bit values.
-#ifndef __KERNEL__
-# include "arm_arch.h"
-# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
-# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
-#else
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
-# define __ARM_MAX_ARCH__ 7
-# define VFP_ABI_PUSH
-# define VFP_ABI_POP
-#endif
-
-#ifdef __ARMEL__
-# define LO 0
-# define HI 4
-# define WORD64(hi0,lo0,hi1,lo1)	.word	lo0,hi0, lo1,hi1
-#else
-# define HI 0
-# define LO 4
-# define WORD64(hi0,lo0,hi1,lo1)	.word	hi0,lo0, hi1,lo1
-#endif
-
-.text
-#if __ARM_ARCH__<7
-.code	32
-#else
-.syntax unified
-# ifdef __thumb2__
-.thumb
-# else
-.code   32
-# endif
-#endif
-
-.type	K512,%object
-.align	5
-K512:
-WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
-WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
-WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
-WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
-WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
-WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
-WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
-WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
-WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
-WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
-WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
-WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
-WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
-WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
-WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
-WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
-WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
-WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
-WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
-WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
-WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
-WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
-WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
-WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
-WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
-WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
-WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
-WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
-WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
-WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
-WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
-WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
-WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
-WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
-WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
-WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
-WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
-WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
-WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
-WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
-.size	K512,.-K512
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.LOPENSSL_armcap:
-.word	OPENSSL_armcap_P-sha512_block_data_order
-.skip	32-4
-#else
-.skip	32
-#endif
-
-.global	sha512_block_data_order
-.type	sha512_block_data_order,%function
-sha512_block_data_order:
-.Lsha512_block_data_order:
-#if __ARM_ARCH__<7
-	sub	r3,pc,#8		@ sha512_block_data_order
-#else
-	adr	r3,.Lsha512_block_data_order
-#endif
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-	ldr	r12,.LOPENSSL_armcap
-	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
-	tst	r12,#1
-	bne	.LNEON
-#endif
-	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
-	stmdb	sp!,{r4-r12,lr}
-	sub	r14,r3,#672		@ K512
-	sub	sp,sp,#9*8
-
-	ldr	r7,[r0,#32+LO]
-	ldr	r8,[r0,#32+HI]
-	ldr	r9, [r0,#48+LO]
-	ldr	r10, [r0,#48+HI]
-	ldr	r11, [r0,#56+LO]
-	ldr	r12, [r0,#56+HI]
-.Loop:
-	str	r9, [sp,#48+0]
-	str	r10, [sp,#48+4]
-	str	r11, [sp,#56+0]
-	str	r12, [sp,#56+4]
-	ldr	r5,[r0,#0+LO]
-	ldr	r6,[r0,#0+HI]
-	ldr	r3,[r0,#8+LO]
-	ldr	r4,[r0,#8+HI]
-	ldr	r9, [r0,#16+LO]
-	ldr	r10, [r0,#16+HI]
-	ldr	r11, [r0,#24+LO]
-	ldr	r12, [r0,#24+HI]
-	str	r3,[sp,#8+0]
-	str	r4,[sp,#8+4]
-	str	r9, [sp,#16+0]
-	str	r10, [sp,#16+4]
-	str	r11, [sp,#24+0]
-	str	r12, [sp,#24+4]
-	ldr	r3,[r0,#40+LO]
-	ldr	r4,[r0,#40+HI]
-	str	r3,[sp,#40+0]
-	str	r4,[sp,#40+4]
-
-.L00_15:
-#if __ARM_ARCH__<7
-	ldrb	r3,[r1,#7]
-	ldrb	r9, [r1,#6]
-	ldrb	r10, [r1,#5]
-	ldrb	r11, [r1,#4]
-	ldrb	r4,[r1,#3]
-	ldrb	r12, [r1,#2]
-	orr	r3,r3,r9,lsl#8
-	ldrb	r9, [r1,#1]
-	orr	r3,r3,r10,lsl#16
-	ldrb	r10, [r1],#8
-	orr	r3,r3,r11,lsl#24
-	orr	r4,r4,r12,lsl#8
-	orr	r4,r4,r9,lsl#16
-	orr	r4,r4,r10,lsl#24
-#else
-	ldr	r3,[r1,#4]
-	ldr	r4,[r1],#8
-#ifdef __ARMEL__
-	rev	r3,r3
-	rev	r4,r4
-#endif
-#endif
-	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
-	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
-	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
-	mov	r9,r7,lsr#14
-	str	r3,[sp,#64+0]
-	mov	r10,r8,lsr#14
-	str	r4,[sp,#64+4]
-	eor	r9,r9,r8,lsl#18
-	ldr	r11,[sp,#56+0]	@ h.lo
-	eor	r10,r10,r7,lsl#18
-	ldr	r12,[sp,#56+4]	@ h.hi
-	eor	r9,r9,r7,lsr#18
-	eor	r10,r10,r8,lsr#18
-	eor	r9,r9,r8,lsl#14
-	eor	r10,r10,r7,lsl#14
-	eor	r9,r9,r8,lsr#9
-	eor	r10,r10,r7,lsr#9
-	eor	r9,r9,r7,lsl#23
-	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
-	adds	r3,r3,r9
-	ldr	r9,[sp,#40+0]	@ f.lo
-	adc	r4,r4,r10		@ T += Sigma1(e)
-	ldr	r10,[sp,#40+4]	@ f.hi
-	adds	r3,r3,r11
-	ldr	r11,[sp,#48+0]	@ g.lo
-	adc	r4,r4,r12		@ T += h
-	ldr	r12,[sp,#48+4]	@ g.hi
-
-	eor	r9,r9,r11
-	str	r7,[sp,#32+0]
-	eor	r10,r10,r12
-	str	r8,[sp,#32+4]
-	and	r9,r9,r7
-	str	r5,[sp,#0+0]
-	and	r10,r10,r8
-	str	r6,[sp,#0+4]
-	eor	r9,r9,r11
-	ldr	r11,[r14,#LO]	@ K[i].lo
-	eor	r10,r10,r12		@ Ch(e,f,g)
-	ldr	r12,[r14,#HI]	@ K[i].hi
-
-	adds	r3,r3,r9
-	ldr	r7,[sp,#24+0]	@ d.lo
-	adc	r4,r4,r10		@ T += Ch(e,f,g)
-	ldr	r8,[sp,#24+4]	@ d.hi
-	adds	r3,r3,r11
-	and	r9,r11,#0xff
-	adc	r4,r4,r12		@ T += K[i]
-	adds	r7,r7,r3
-	ldr	r11,[sp,#8+0]	@ b.lo
-	adc	r8,r8,r4		@ d += T
-	teq	r9,#148
-
-	ldr	r12,[sp,#16+0]	@ c.lo
-#if __ARM_ARCH__>=7
-	it	eq			@ Thumb2 thing, sanity check in ARM
-#endif
-	orreq	r14,r14,#1
-	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
-	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
-	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
-	mov	r9,r5,lsr#28
-	mov	r10,r6,lsr#28
-	eor	r9,r9,r6,lsl#4
-	eor	r10,r10,r5,lsl#4
-	eor	r9,r9,r6,lsr#2
-	eor	r10,r10,r5,lsr#2
-	eor	r9,r9,r5,lsl#30
-	eor	r10,r10,r6,lsl#30
-	eor	r9,r9,r6,lsr#7
-	eor	r10,r10,r5,lsr#7
-	eor	r9,r9,r5,lsl#25
-	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
-	adds	r3,r3,r9
-	and	r9,r5,r11
-	adc	r4,r4,r10		@ T += Sigma0(a)
-
-	ldr	r10,[sp,#8+4]	@ b.hi
-	orr	r5,r5,r11
-	ldr	r11,[sp,#16+4]	@ c.hi
-	and	r5,r5,r12
-	and	r12,r6,r10
-	orr	r6,r6,r10
-	orr	r5,r5,r9		@ Maj(a,b,c).lo
-	and	r6,r6,r11
-	adds	r5,r5,r3
-	orr	r6,r6,r12		@ Maj(a,b,c).hi
-	sub	sp,sp,#8
-	adc	r6,r6,r4		@ h += T
-	tst	r14,#1
-	add	r14,r14,#8
-	tst	r14,#1
-	beq	.L00_15
-	ldr	r9,[sp,#184+0]
-	ldr	r10,[sp,#184+4]
-	bic	r14,r14,#1
-.L16_79:
-	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
-	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
-	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
-	mov	r3,r9,lsr#1
-	ldr	r11,[sp,#80+0]
-	mov	r4,r10,lsr#1
-	ldr	r12,[sp,#80+4]
-	eor	r3,r3,r10,lsl#31
-	eor	r4,r4,r9,lsl#31
-	eor	r3,r3,r9,lsr#8
-	eor	r4,r4,r10,lsr#8
-	eor	r3,r3,r10,lsl#24
-	eor	r4,r4,r9,lsl#24
-	eor	r3,r3,r9,lsr#7
-	eor	r4,r4,r10,lsr#7
-	eor	r3,r3,r10,lsl#25
-
-	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
-	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
-	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
-	mov	r9,r11,lsr#19
-	mov	r10,r12,lsr#19
-	eor	r9,r9,r12,lsl#13
-	eor	r10,r10,r11,lsl#13
-	eor	r9,r9,r12,lsr#29
-	eor	r10,r10,r11,lsr#29
-	eor	r9,r9,r11,lsl#3
-	eor	r10,r10,r12,lsl#3
-	eor	r9,r9,r11,lsr#6
-	eor	r10,r10,r12,lsr#6
-	ldr	r11,[sp,#120+0]
-	eor	r9,r9,r12,lsl#26
-
-	ldr	r12,[sp,#120+4]
-	adds	r3,r3,r9
-	ldr	r9,[sp,#192+0]
-	adc	r4,r4,r10
-
-	ldr	r10,[sp,#192+4]
-	adds	r3,r3,r11
-	adc	r4,r4,r12
-	adds	r3,r3,r9
-	adc	r4,r4,r10
-	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
-	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
-	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
-	mov	r9,r7,lsr#14
-	str	r3,[sp,#64+0]
-	mov	r10,r8,lsr#14
-	str	r4,[sp,#64+4]
-	eor	r9,r9,r8,lsl#18
-	ldr	r11,[sp,#56+0]	@ h.lo
-	eor	r10,r10,r7,lsl#18
-	ldr	r12,[sp,#56+4]	@ h.hi
-	eor	r9,r9,r7,lsr#18
-	eor	r10,r10,r8,lsr#18
-	eor	r9,r9,r8,lsl#14
-	eor	r10,r10,r7,lsl#14
-	eor	r9,r9,r8,lsr#9
-	eor	r10,r10,r7,lsr#9
-	eor	r9,r9,r7,lsl#23
-	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
-	adds	r3,r3,r9
-	ldr	r9,[sp,#40+0]	@ f.lo
-	adc	r4,r4,r10		@ T += Sigma1(e)
-	ldr	r10,[sp,#40+4]	@ f.hi
-	adds	r3,r3,r11
-	ldr	r11,[sp,#48+0]	@ g.lo
-	adc	r4,r4,r12		@ T += h
-	ldr	r12,[sp,#48+4]	@ g.hi
-
-	eor	r9,r9,r11
-	str	r7,[sp,#32+0]
-	eor	r10,r10,r12
-	str	r8,[sp,#32+4]
-	and	r9,r9,r7
-	str	r5,[sp,#0+0]
-	and	r10,r10,r8
-	str	r6,[sp,#0+4]
-	eor	r9,r9,r11
-	ldr	r11,[r14,#LO]	@ K[i].lo
-	eor	r10,r10,r12		@ Ch(e,f,g)
-	ldr	r12,[r14,#HI]	@ K[i].hi
-
-	adds	r3,r3,r9
-	ldr	r7,[sp,#24+0]	@ d.lo
-	adc	r4,r4,r10		@ T += Ch(e,f,g)
-	ldr	r8,[sp,#24+4]	@ d.hi
-	adds	r3,r3,r11
-	and	r9,r11,#0xff
-	adc	r4,r4,r12		@ T += K[i]
-	adds	r7,r7,r3
-	ldr	r11,[sp,#8+0]	@ b.lo
-	adc	r8,r8,r4		@ d += T
-	teq	r9,#23
-
-	ldr	r12,[sp,#16+0]	@ c.lo
-#if __ARM_ARCH__>=7
-	it	eq			@ Thumb2 thing, sanity check in ARM
-#endif
-	orreq	r14,r14,#1
-	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
-	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
-	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
-	mov	r9,r5,lsr#28
-	mov	r10,r6,lsr#28
-	eor	r9,r9,r6,lsl#4
-	eor	r10,r10,r5,lsl#4
-	eor	r9,r9,r6,lsr#2
-	eor	r10,r10,r5,lsr#2
-	eor	r9,r9,r5,lsl#30
-	eor	r10,r10,r6,lsl#30
-	eor	r9,r9,r6,lsr#7
-	eor	r10,r10,r5,lsr#7
-	eor	r9,r9,r5,lsl#25
-	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
-	adds	r3,r3,r9
-	and	r9,r5,r11
-	adc	r4,r4,r10		@ T += Sigma0(a)
-
-	ldr	r10,[sp,#8+4]	@ b.hi
-	orr	r5,r5,r11
-	ldr	r11,[sp,#16+4]	@ c.hi
-	and	r5,r5,r12
-	and	r12,r6,r10
-	orr	r6,r6,r10
-	orr	r5,r5,r9		@ Maj(a,b,c).lo
-	and	r6,r6,r11
-	adds	r5,r5,r3
-	orr	r6,r6,r12		@ Maj(a,b,c).hi
-	sub	sp,sp,#8
-	adc	r6,r6,r4		@ h += T
-	tst	r14,#1
-	add	r14,r14,#8
-#if __ARM_ARCH__>=7
-	ittt	eq			@ Thumb2 thing, sanity check in ARM
-#endif
-	ldreq	r9,[sp,#184+0]
-	ldreq	r10,[sp,#184+4]
-	beq	.L16_79
-	bic	r14,r14,#1
-
-	ldr	r3,[sp,#8+0]
-	ldr	r4,[sp,#8+4]
-	ldr	r9, [r0,#0+LO]
-	ldr	r10, [r0,#0+HI]
-	ldr	r11, [r0,#8+LO]
-	ldr	r12, [r0,#8+HI]
-	adds	r9,r5,r9
-	str	r9, [r0,#0+LO]
-	adc	r10,r6,r10
-	str	r10, [r0,#0+HI]
-	adds	r11,r3,r11
-	str	r11, [r0,#8+LO]
-	adc	r12,r4,r12
-	str	r12, [r0,#8+HI]
-
-	ldr	r5,[sp,#16+0]
-	ldr	r6,[sp,#16+4]
-	ldr	r3,[sp,#24+0]
-	ldr	r4,[sp,#24+4]
-	ldr	r9, [r0,#16+LO]
-	ldr	r10, [r0,#16+HI]
-	ldr	r11, [r0,#24+LO]
-	ldr	r12, [r0,#24+HI]
-	adds	r9,r5,r9
-	str	r9, [r0,#16+LO]
-	adc	r10,r6,r10
-	str	r10, [r0,#16+HI]
-	adds	r11,r3,r11
-	str	r11, [r0,#24+LO]
-	adc	r12,r4,r12
-	str	r12, [r0,#24+HI]
-
-	ldr	r3,[sp,#40+0]
-	ldr	r4,[sp,#40+4]
-	ldr	r9, [r0,#32+LO]
-	ldr	r10, [r0,#32+HI]
-	ldr	r11, [r0,#40+LO]
-	ldr	r12, [r0,#40+HI]
-	adds	r7,r7,r9
-	str	r7,[r0,#32+LO]
-	adc	r8,r8,r10
-	str	r8,[r0,#32+HI]
-	adds	r11,r3,r11
-	str	r11, [r0,#40+LO]
-	adc	r12,r4,r12
-	str	r12, [r0,#40+HI]
-
-	ldr	r5,[sp,#48+0]
-	ldr	r6,[sp,#48+4]
-	ldr	r3,[sp,#56+0]
-	ldr	r4,[sp,#56+4]
-	ldr	r9, [r0,#48+LO]
-	ldr	r10, [r0,#48+HI]
-	ldr	r11, [r0,#56+LO]
-	ldr	r12, [r0,#56+HI]
-	adds	r9,r5,r9
-	str	r9, [r0,#48+LO]
-	adc	r10,r6,r10
-	str	r10, [r0,#48+HI]
-	adds	r11,r3,r11
-	str	r11, [r0,#56+LO]
-	adc	r12,r4,r12
-	str	r12, [r0,#56+HI]
-
-	add	sp,sp,#640
-	sub	r14,r14,#640
-
-	teq	r1,r2
-	bne	.Loop
-
-	add	sp,sp,#8*9		@ destroy frame
-#if __ARM_ARCH__>=5
-	ldmia	sp!,{r4-r12,pc}
-#else
-	ldmia	sp!,{r4-r12,lr}
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
-.size	sha512_block_data_order,.-sha512_block_data_order
-#if __ARM_MAX_ARCH__>=7
-.arch	armv7-a
-.fpu	neon
-
-.global	sha512_block_data_order_neon
-.type	sha512_block_data_order_neon,%function
-.align	4
-sha512_block_data_order_neon:
-.LNEON:
-	dmb				@ errata #451034 on early Cortex A8
-	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
-	VFP_ABI_PUSH
-	adr	r3,.Lsha512_block_data_order
-	sub	r3,r3,.Lsha512_block_data_order-K512
-	vldmia	r0,{d16-d23}		@ load context
-.Loop_neon:
-	vshr.u64	d24,d20,#14	@ 0
-#if 0<16
-	vld1.64		{d0},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d20,#18
-#if 0>0
-	 vadd.i64	d16,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d20,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d20,#50
-	vsli.64		d25,d20,#46
-	vmov		d29,d20
-	vsli.64		d26,d20,#23
-#if 0<16 && defined(__ARMEL__)
-	vrev64.8	d0,d0
-#endif
-	veor		d25,d24
-	vbsl		d29,d21,d22		@ Ch(e,f,g)
-	vshr.u64	d24,d16,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d23
-	vshr.u64	d25,d16,#34
-	vsli.64		d24,d16,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d16,#39
-	vadd.i64	d28,d0
-	vsli.64		d25,d16,#30
-	veor		d30,d16,d17
-	vsli.64		d26,d16,#25
-	veor		d23,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d18,d17		@ Maj(a,b,c)
-	veor		d23,d26			@ Sigma0(a)
-	vadd.i64	d19,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d23,d30
-	vshr.u64	d24,d19,#14	@ 1
-#if 1<16
-	vld1.64		{d1},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d19,#18
-#if 1>0
-	 vadd.i64	d23,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d19,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d19,#50
-	vsli.64		d25,d19,#46
-	vmov		d29,d19
-	vsli.64		d26,d19,#23
-#if 1<16 && defined(__ARMEL__)
-	vrev64.8	d1,d1
-#endif
-	veor		d25,d24
-	vbsl		d29,d20,d21		@ Ch(e,f,g)
-	vshr.u64	d24,d23,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d22
-	vshr.u64	d25,d23,#34
-	vsli.64		d24,d23,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d23,#39
-	vadd.i64	d28,d1
-	vsli.64		d25,d23,#30
-	veor		d30,d23,d16
-	vsli.64		d26,d23,#25
-	veor		d22,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d17,d16		@ Maj(a,b,c)
-	veor		d22,d26			@ Sigma0(a)
-	vadd.i64	d18,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d22,d30
-	vshr.u64	d24,d18,#14	@ 2
-#if 2<16
-	vld1.64		{d2},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d18,#18
-#if 2>0
-	 vadd.i64	d22,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d18,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d18,#50
-	vsli.64		d25,d18,#46
-	vmov		d29,d18
-	vsli.64		d26,d18,#23
-#if 2<16 && defined(__ARMEL__)
-	vrev64.8	d2,d2
-#endif
-	veor		d25,d24
-	vbsl		d29,d19,d20		@ Ch(e,f,g)
-	vshr.u64	d24,d22,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d21
-	vshr.u64	d25,d22,#34
-	vsli.64		d24,d22,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d22,#39
-	vadd.i64	d28,d2
-	vsli.64		d25,d22,#30
-	veor		d30,d22,d23
-	vsli.64		d26,d22,#25
-	veor		d21,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d16,d23		@ Maj(a,b,c)
-	veor		d21,d26			@ Sigma0(a)
-	vadd.i64	d17,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d21,d30
-	vshr.u64	d24,d17,#14	@ 3
-#if 3<16
-	vld1.64		{d3},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d17,#18
-#if 3>0
-	 vadd.i64	d21,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d17,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d17,#50
-	vsli.64		d25,d17,#46
-	vmov		d29,d17
-	vsli.64		d26,d17,#23
-#if 3<16 && defined(__ARMEL__)
-	vrev64.8	d3,d3
-#endif
-	veor		d25,d24
-	vbsl		d29,d18,d19		@ Ch(e,f,g)
-	vshr.u64	d24,d21,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d20
-	vshr.u64	d25,d21,#34
-	vsli.64		d24,d21,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d21,#39
-	vadd.i64	d28,d3
-	vsli.64		d25,d21,#30
-	veor		d30,d21,d22
-	vsli.64		d26,d21,#25
-	veor		d20,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d23,d22		@ Maj(a,b,c)
-	veor		d20,d26			@ Sigma0(a)
-	vadd.i64	d16,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d20,d30
-	vshr.u64	d24,d16,#14	@ 4
-#if 4<16
-	vld1.64		{d4},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d16,#18
-#if 4>0
-	 vadd.i64	d20,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d16,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d16,#50
-	vsli.64		d25,d16,#46
-	vmov		d29,d16
-	vsli.64		d26,d16,#23
-#if 4<16 && defined(__ARMEL__)
-	vrev64.8	d4,d4
-#endif
-	veor		d25,d24
-	vbsl		d29,d17,d18		@ Ch(e,f,g)
-	vshr.u64	d24,d20,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d19
-	vshr.u64	d25,d20,#34
-	vsli.64		d24,d20,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d20,#39
-	vadd.i64	d28,d4
-	vsli.64		d25,d20,#30
-	veor		d30,d20,d21
-	vsli.64		d26,d20,#25
-	veor		d19,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d22,d21		@ Maj(a,b,c)
-	veor		d19,d26			@ Sigma0(a)
-	vadd.i64	d23,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d19,d30
-	vshr.u64	d24,d23,#14	@ 5
-#if 5<16
-	vld1.64		{d5},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d23,#18
-#if 5>0
-	 vadd.i64	d19,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d23,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d23,#50
-	vsli.64		d25,d23,#46
-	vmov		d29,d23
-	vsli.64		d26,d23,#23
-#if 5<16 && defined(__ARMEL__)
-	vrev64.8	d5,d5
-#endif
-	veor		d25,d24
-	vbsl		d29,d16,d17		@ Ch(e,f,g)
-	vshr.u64	d24,d19,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d18
-	vshr.u64	d25,d19,#34
-	vsli.64		d24,d19,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d19,#39
-	vadd.i64	d28,d5
-	vsli.64		d25,d19,#30
-	veor		d30,d19,d20
-	vsli.64		d26,d19,#25
-	veor		d18,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d21,d20		@ Maj(a,b,c)
-	veor		d18,d26			@ Sigma0(a)
-	vadd.i64	d22,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d18,d30
-	vshr.u64	d24,d22,#14	@ 6
-#if 6<16
-	vld1.64		{d6},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d22,#18
-#if 6>0
-	 vadd.i64	d18,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d22,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d22,#50
-	vsli.64		d25,d22,#46
-	vmov		d29,d22
-	vsli.64		d26,d22,#23
-#if 6<16 && defined(__ARMEL__)
-	vrev64.8	d6,d6
-#endif
-	veor		d25,d24
-	vbsl		d29,d23,d16		@ Ch(e,f,g)
-	vshr.u64	d24,d18,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d17
-	vshr.u64	d25,d18,#34
-	vsli.64		d24,d18,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d18,#39
-	vadd.i64	d28,d6
-	vsli.64		d25,d18,#30
-	veor		d30,d18,d19
-	vsli.64		d26,d18,#25
-	veor		d17,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d20,d19		@ Maj(a,b,c)
-	veor		d17,d26			@ Sigma0(a)
-	vadd.i64	d21,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d17,d30
-	vshr.u64	d24,d21,#14	@ 7
-#if 7<16
-	vld1.64		{d7},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d21,#18
-#if 7>0
-	 vadd.i64	d17,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d21,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d21,#50
-	vsli.64		d25,d21,#46
-	vmov		d29,d21
-	vsli.64		d26,d21,#23
-#if 7<16 && defined(__ARMEL__)
-	vrev64.8	d7,d7
-#endif
-	veor		d25,d24
-	vbsl		d29,d22,d23		@ Ch(e,f,g)
-	vshr.u64	d24,d17,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d16
-	vshr.u64	d25,d17,#34
-	vsli.64		d24,d17,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d17,#39
-	vadd.i64	d28,d7
-	vsli.64		d25,d17,#30
-	veor		d30,d17,d18
-	vsli.64		d26,d17,#25
-	veor		d16,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d19,d18		@ Maj(a,b,c)
-	veor		d16,d26			@ Sigma0(a)
-	vadd.i64	d20,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d16,d30
-	vshr.u64	d24,d20,#14	@ 8
-#if 8<16
-	vld1.64		{d8},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d20,#18
-#if 8>0
-	 vadd.i64	d16,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d20,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d20,#50
-	vsli.64		d25,d20,#46
-	vmov		d29,d20
-	vsli.64		d26,d20,#23
-#if 8<16 && defined(__ARMEL__)
-	vrev64.8	d8,d8
-#endif
-	veor		d25,d24
-	vbsl		d29,d21,d22		@ Ch(e,f,g)
-	vshr.u64	d24,d16,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d23
-	vshr.u64	d25,d16,#34
-	vsli.64		d24,d16,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d16,#39
-	vadd.i64	d28,d8
-	vsli.64		d25,d16,#30
-	veor		d30,d16,d17
-	vsli.64		d26,d16,#25
-	veor		d23,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d18,d17		@ Maj(a,b,c)
-	veor		d23,d26			@ Sigma0(a)
-	vadd.i64	d19,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d23,d30
-	vshr.u64	d24,d19,#14	@ 9
-#if 9<16
-	vld1.64		{d9},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d19,#18
-#if 9>0
-	 vadd.i64	d23,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d19,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d19,#50
-	vsli.64		d25,d19,#46
-	vmov		d29,d19
-	vsli.64		d26,d19,#23
-#if 9<16 && defined(__ARMEL__)
-	vrev64.8	d9,d9
-#endif
-	veor		d25,d24
-	vbsl		d29,d20,d21		@ Ch(e,f,g)
-	vshr.u64	d24,d23,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d22
-	vshr.u64	d25,d23,#34
-	vsli.64		d24,d23,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d23,#39
-	vadd.i64	d28,d9
-	vsli.64		d25,d23,#30
-	veor		d30,d23,d16
-	vsli.64		d26,d23,#25
-	veor		d22,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d17,d16		@ Maj(a,b,c)
-	veor		d22,d26			@ Sigma0(a)
-	vadd.i64	d18,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d22,d30
-	vshr.u64	d24,d18,#14	@ 10
-#if 10<16
-	vld1.64		{d10},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d18,#18
-#if 10>0
-	 vadd.i64	d22,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d18,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d18,#50
-	vsli.64		d25,d18,#46
-	vmov		d29,d18
-	vsli.64		d26,d18,#23
-#if 10<16 && defined(__ARMEL__)
-	vrev64.8	d10,d10
-#endif
-	veor		d25,d24
-	vbsl		d29,d19,d20		@ Ch(e,f,g)
-	vshr.u64	d24,d22,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d21
-	vshr.u64	d25,d22,#34
-	vsli.64		d24,d22,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d22,#39
-	vadd.i64	d28,d10
-	vsli.64		d25,d22,#30
-	veor		d30,d22,d23
-	vsli.64		d26,d22,#25
-	veor		d21,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d16,d23		@ Maj(a,b,c)
-	veor		d21,d26			@ Sigma0(a)
-	vadd.i64	d17,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d21,d30
-	vshr.u64	d24,d17,#14	@ 11
-#if 11<16
-	vld1.64		{d11},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d17,#18
-#if 11>0
-	 vadd.i64	d21,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d17,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d17,#50
-	vsli.64		d25,d17,#46
-	vmov		d29,d17
-	vsli.64		d26,d17,#23
-#if 11<16 && defined(__ARMEL__)
-	vrev64.8	d11,d11
-#endif
-	veor		d25,d24
-	vbsl		d29,d18,d19		@ Ch(e,f,g)
-	vshr.u64	d24,d21,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d20
-	vshr.u64	d25,d21,#34
-	vsli.64		d24,d21,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d21,#39
-	vadd.i64	d28,d11
-	vsli.64		d25,d21,#30
-	veor		d30,d21,d22
-	vsli.64		d26,d21,#25
-	veor		d20,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d23,d22		@ Maj(a,b,c)
-	veor		d20,d26			@ Sigma0(a)
-	vadd.i64	d16,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d20,d30
-	vshr.u64	d24,d16,#14	@ 12
-#if 12<16
-	vld1.64		{d12},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d16,#18
-#if 12>0
-	 vadd.i64	d20,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d16,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d16,#50
-	vsli.64		d25,d16,#46
-	vmov		d29,d16
-	vsli.64		d26,d16,#23
-#if 12<16 && defined(__ARMEL__)
-	vrev64.8	d12,d12
-#endif
-	veor		d25,d24
-	vbsl		d29,d17,d18		@ Ch(e,f,g)
-	vshr.u64	d24,d20,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d19
-	vshr.u64	d25,d20,#34
-	vsli.64		d24,d20,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d20,#39
-	vadd.i64	d28,d12
-	vsli.64		d25,d20,#30
-	veor		d30,d20,d21
-	vsli.64		d26,d20,#25
-	veor		d19,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d22,d21		@ Maj(a,b,c)
-	veor		d19,d26			@ Sigma0(a)
-	vadd.i64	d23,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d19,d30
-	vshr.u64	d24,d23,#14	@ 13
-#if 13<16
-	vld1.64		{d13},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d23,#18
-#if 13>0
-	 vadd.i64	d19,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d23,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d23,#50
-	vsli.64		d25,d23,#46
-	vmov		d29,d23
-	vsli.64		d26,d23,#23
-#if 13<16 && defined(__ARMEL__)
-	vrev64.8	d13,d13
-#endif
-	veor		d25,d24
-	vbsl		d29,d16,d17		@ Ch(e,f,g)
-	vshr.u64	d24,d19,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d18
-	vshr.u64	d25,d19,#34
-	vsli.64		d24,d19,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d19,#39
-	vadd.i64	d28,d13
-	vsli.64		d25,d19,#30
-	veor		d30,d19,d20
-	vsli.64		d26,d19,#25
-	veor		d18,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d21,d20		@ Maj(a,b,c)
-	veor		d18,d26			@ Sigma0(a)
-	vadd.i64	d22,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d18,d30
-	vshr.u64	d24,d22,#14	@ 14
-#if 14<16
-	vld1.64		{d14},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d22,#18
-#if 14>0
-	 vadd.i64	d18,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d22,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d22,#50
-	vsli.64		d25,d22,#46
-	vmov		d29,d22
-	vsli.64		d26,d22,#23
-#if 14<16 && defined(__ARMEL__)
-	vrev64.8	d14,d14
-#endif
-	veor		d25,d24
-	vbsl		d29,d23,d16		@ Ch(e,f,g)
-	vshr.u64	d24,d18,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d17
-	vshr.u64	d25,d18,#34
-	vsli.64		d24,d18,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d18,#39
-	vadd.i64	d28,d14
-	vsli.64		d25,d18,#30
-	veor		d30,d18,d19
-	vsli.64		d26,d18,#25
-	veor		d17,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d20,d19		@ Maj(a,b,c)
-	veor		d17,d26			@ Sigma0(a)
-	vadd.i64	d21,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d17,d30
-	vshr.u64	d24,d21,#14	@ 15
-#if 15<16
-	vld1.64		{d15},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d21,#18
-#if 15>0
-	 vadd.i64	d17,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d21,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d21,#50
-	vsli.64		d25,d21,#46
-	vmov		d29,d21
-	vsli.64		d26,d21,#23
-#if 15<16 && defined(__ARMEL__)
-	vrev64.8	d15,d15
-#endif
-	veor		d25,d24
-	vbsl		d29,d22,d23		@ Ch(e,f,g)
-	vshr.u64	d24,d17,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d16
-	vshr.u64	d25,d17,#34
-	vsli.64		d24,d17,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d17,#39
-	vadd.i64	d28,d15
-	vsli.64		d25,d17,#30
-	veor		d30,d17,d18
-	vsli.64		d26,d17,#25
-	veor		d16,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d19,d18		@ Maj(a,b,c)
-	veor		d16,d26			@ Sigma0(a)
-	vadd.i64	d20,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d16,d30
-	mov		r12,#4
-.L16_79_neon:
-	subs		r12,#1
-	vshr.u64	q12,q7,#19
-	vshr.u64	q13,q7,#61
-	 vadd.i64	d16,d30			@ h+=Maj from the past
-	vshr.u64	q15,q7,#6
-	vsli.64		q12,q7,#45
-	vext.8		q14,q0,q1,#8	@ X[i+1]
-	vsli.64		q13,q7,#3
-	veor		q15,q12
-	vshr.u64	q12,q14,#1
-	veor		q15,q13				@ sigma1(X[i+14])
-	vshr.u64	q13,q14,#8
-	vadd.i64	q0,q15
-	vshr.u64	q15,q14,#7
-	vsli.64		q12,q14,#63
-	vsli.64		q13,q14,#56
-	vext.8		q14,q4,q5,#8	@ X[i+9]
-	veor		q15,q12
-	vshr.u64	d24,d20,#14		@ from NEON_00_15
-	vadd.i64	q0,q14
-	vshr.u64	d25,d20,#18		@ from NEON_00_15
-	veor		q15,q13				@ sigma0(X[i+1])
-	vshr.u64	d26,d20,#41		@ from NEON_00_15
-	vadd.i64	q0,q15
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d20,#50
-	vsli.64		d25,d20,#46
-	vmov		d29,d20
-	vsli.64		d26,d20,#23
-#if 16<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d21,d22		@ Ch(e,f,g)
-	vshr.u64	d24,d16,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d23
-	vshr.u64	d25,d16,#34
-	vsli.64		d24,d16,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d16,#39
-	vadd.i64	d28,d0
-	vsli.64		d25,d16,#30
-	veor		d30,d16,d17
-	vsli.64		d26,d16,#25
-	veor		d23,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d18,d17		@ Maj(a,b,c)
-	veor		d23,d26			@ Sigma0(a)
-	vadd.i64	d19,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d23,d30
-	vshr.u64	d24,d19,#14	@ 17
-#if 17<16
-	vld1.64		{d1},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d19,#18
-#if 17>0
-	 vadd.i64	d23,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d19,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d19,#50
-	vsli.64		d25,d19,#46
-	vmov		d29,d19
-	vsli.64		d26,d19,#23
-#if 17<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d20,d21		@ Ch(e,f,g)
-	vshr.u64	d24,d23,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d22
-	vshr.u64	d25,d23,#34
-	vsli.64		d24,d23,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d23,#39
-	vadd.i64	d28,d1
-	vsli.64		d25,d23,#30
-	veor		d30,d23,d16
-	vsli.64		d26,d23,#25
-	veor		d22,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d17,d16		@ Maj(a,b,c)
-	veor		d22,d26			@ Sigma0(a)
-	vadd.i64	d18,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d22,d30
-	vshr.u64	q12,q0,#19
-	vshr.u64	q13,q0,#61
-	 vadd.i64	d22,d30			@ h+=Maj from the past
-	vshr.u64	q15,q0,#6
-	vsli.64		q12,q0,#45
-	vext.8		q14,q1,q2,#8	@ X[i+1]
-	vsli.64		q13,q0,#3
-	veor		q15,q12
-	vshr.u64	q12,q14,#1
-	veor		q15,q13				@ sigma1(X[i+14])
-	vshr.u64	q13,q14,#8
-	vadd.i64	q1,q15
-	vshr.u64	q15,q14,#7
-	vsli.64		q12,q14,#63
-	vsli.64		q13,q14,#56
-	vext.8		q14,q5,q6,#8	@ X[i+9]
-	veor		q15,q12
-	vshr.u64	d24,d18,#14		@ from NEON_00_15
-	vadd.i64	q1,q14
-	vshr.u64	d25,d18,#18		@ from NEON_00_15
-	veor		q15,q13				@ sigma0(X[i+1])
-	vshr.u64	d26,d18,#41		@ from NEON_00_15
-	vadd.i64	q1,q15
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d18,#50
-	vsli.64		d25,d18,#46
-	vmov		d29,d18
-	vsli.64		d26,d18,#23
-#if 18<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d19,d20		@ Ch(e,f,g)
-	vshr.u64	d24,d22,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d21
-	vshr.u64	d25,d22,#34
-	vsli.64		d24,d22,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d22,#39
-	vadd.i64	d28,d2
-	vsli.64		d25,d22,#30
-	veor		d30,d22,d23
-	vsli.64		d26,d22,#25
-	veor		d21,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d16,d23		@ Maj(a,b,c)
-	veor		d21,d26			@ Sigma0(a)
-	vadd.i64	d17,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d21,d30
-	vshr.u64	d24,d17,#14	@ 19
-#if 19<16
-	vld1.64		{d3},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d17,#18
-#if 19>0
-	 vadd.i64	d21,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d17,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d17,#50
-	vsli.64		d25,d17,#46
-	vmov		d29,d17
-	vsli.64		d26,d17,#23
-#if 19<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d18,d19		@ Ch(e,f,g)
-	vshr.u64	d24,d21,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d20
-	vshr.u64	d25,d21,#34
-	vsli.64		d24,d21,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d21,#39
-	vadd.i64	d28,d3
-	vsli.64		d25,d21,#30
-	veor		d30,d21,d22
-	vsli.64		d26,d21,#25
-	veor		d20,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d23,d22		@ Maj(a,b,c)
-	veor		d20,d26			@ Sigma0(a)
-	vadd.i64	d16,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d20,d30
-	vshr.u64	q12,q1,#19
-	vshr.u64	q13,q1,#61
-	 vadd.i64	d20,d30			@ h+=Maj from the past
-	vshr.u64	q15,q1,#6
-	vsli.64		q12,q1,#45
-	vext.8		q14,q2,q3,#8	@ X[i+1]
-	vsli.64		q13,q1,#3
-	veor		q15,q12
-	vshr.u64	q12,q14,#1
-	veor		q15,q13				@ sigma1(X[i+14])
-	vshr.u64	q13,q14,#8
-	vadd.i64	q2,q15
-	vshr.u64	q15,q14,#7
-	vsli.64		q12,q14,#63
-	vsli.64		q13,q14,#56
-	vext.8		q14,q6,q7,#8	@ X[i+9]
-	veor		q15,q12
-	vshr.u64	d24,d16,#14		@ from NEON_00_15
-	vadd.i64	q2,q14
-	vshr.u64	d25,d16,#18		@ from NEON_00_15
-	veor		q15,q13				@ sigma0(X[i+1])
-	vshr.u64	d26,d16,#41		@ from NEON_00_15
-	vadd.i64	q2,q15
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d16,#50
-	vsli.64		d25,d16,#46
-	vmov		d29,d16
-	vsli.64		d26,d16,#23
-#if 20<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d17,d18		@ Ch(e,f,g)
-	vshr.u64	d24,d20,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d19
-	vshr.u64	d25,d20,#34
-	vsli.64		d24,d20,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d20,#39
-	vadd.i64	d28,d4
-	vsli.64		d25,d20,#30
-	veor		d30,d20,d21
-	vsli.64		d26,d20,#25
-	veor		d19,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d22,d21		@ Maj(a,b,c)
-	veor		d19,d26			@ Sigma0(a)
-	vadd.i64	d23,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d19,d30
-	vshr.u64	d24,d23,#14	@ 21
-#if 21<16
-	vld1.64		{d5},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d23,#18
-#if 21>0
-	 vadd.i64	d19,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d23,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d23,#50
-	vsli.64		d25,d23,#46
-	vmov		d29,d23
-	vsli.64		d26,d23,#23
-#if 21<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d16,d17		@ Ch(e,f,g)
-	vshr.u64	d24,d19,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d18
-	vshr.u64	d25,d19,#34
-	vsli.64		d24,d19,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d19,#39
-	vadd.i64	d28,d5
-	vsli.64		d25,d19,#30
-	veor		d30,d19,d20
-	vsli.64		d26,d19,#25
-	veor		d18,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d21,d20		@ Maj(a,b,c)
-	veor		d18,d26			@ Sigma0(a)
-	vadd.i64	d22,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d18,d30
-	vshr.u64	q12,q2,#19
-	vshr.u64	q13,q2,#61
-	 vadd.i64	d18,d30			@ h+=Maj from the past
-	vshr.u64	q15,q2,#6
-	vsli.64		q12,q2,#45
-	vext.8		q14,q3,q4,#8	@ X[i+1]
-	vsli.64		q13,q2,#3
-	veor		q15,q12
-	vshr.u64	q12,q14,#1
-	veor		q15,q13				@ sigma1(X[i+14])
-	vshr.u64	q13,q14,#8
-	vadd.i64	q3,q15
-	vshr.u64	q15,q14,#7
-	vsli.64		q12,q14,#63
-	vsli.64		q13,q14,#56
-	vext.8		q14,q7,q0,#8	@ X[i+9]
-	veor		q15,q12
-	vshr.u64	d24,d22,#14		@ from NEON_00_15
-	vadd.i64	q3,q14
-	vshr.u64	d25,d22,#18		@ from NEON_00_15
-	veor		q15,q13				@ sigma0(X[i+1])
-	vshr.u64	d26,d22,#41		@ from NEON_00_15
-	vadd.i64	q3,q15
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d22,#50
-	vsli.64		d25,d22,#46
-	vmov		d29,d22
-	vsli.64		d26,d22,#23
-#if 22<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d23,d16		@ Ch(e,f,g)
-	vshr.u64	d24,d18,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d17
-	vshr.u64	d25,d18,#34
-	vsli.64		d24,d18,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d18,#39
-	vadd.i64	d28,d6
-	vsli.64		d25,d18,#30
-	veor		d30,d18,d19
-	vsli.64		d26,d18,#25
-	veor		d17,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d20,d19		@ Maj(a,b,c)
-	veor		d17,d26			@ Sigma0(a)
-	vadd.i64	d21,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d17,d30
-	vshr.u64	d24,d21,#14	@ 23
-#if 23<16
-	vld1.64		{d7},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d21,#18
-#if 23>0
-	 vadd.i64	d17,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d21,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d21,#50
-	vsli.64		d25,d21,#46
-	vmov		d29,d21
-	vsli.64		d26,d21,#23
-#if 23<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d22,d23		@ Ch(e,f,g)
-	vshr.u64	d24,d17,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d16
-	vshr.u64	d25,d17,#34
-	vsli.64		d24,d17,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d17,#39
-	vadd.i64	d28,d7
-	vsli.64		d25,d17,#30
-	veor		d30,d17,d18
-	vsli.64		d26,d17,#25
-	veor		d16,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d19,d18		@ Maj(a,b,c)
-	veor		d16,d26			@ Sigma0(a)
-	vadd.i64	d20,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d16,d30
-	vshr.u64	q12,q3,#19
-	vshr.u64	q13,q3,#61
-	 vadd.i64	d16,d30			@ h+=Maj from the past
-	vshr.u64	q15,q3,#6
-	vsli.64		q12,q3,#45
-	vext.8		q14,q4,q5,#8	@ X[i+1]
-	vsli.64		q13,q3,#3
-	veor		q15,q12
-	vshr.u64	q12,q14,#1
-	veor		q15,q13				@ sigma1(X[i+14])
-	vshr.u64	q13,q14,#8
-	vadd.i64	q4,q15
-	vshr.u64	q15,q14,#7
-	vsli.64		q12,q14,#63
-	vsli.64		q13,q14,#56
-	vext.8		q14,q0,q1,#8	@ X[i+9]
-	veor		q15,q12
-	vshr.u64	d24,d20,#14		@ from NEON_00_15
-	vadd.i64	q4,q14
-	vshr.u64	d25,d20,#18		@ from NEON_00_15
-	veor		q15,q13				@ sigma0(X[i+1])
-	vshr.u64	d26,d20,#41		@ from NEON_00_15
-	vadd.i64	q4,q15
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d20,#50
-	vsli.64		d25,d20,#46
-	vmov		d29,d20
-	vsli.64		d26,d20,#23
-#if 24<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d21,d22		@ Ch(e,f,g)
-	vshr.u64	d24,d16,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d23
-	vshr.u64	d25,d16,#34
-	vsli.64		d24,d16,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d16,#39
-	vadd.i64	d28,d8
-	vsli.64		d25,d16,#30
-	veor		d30,d16,d17
-	vsli.64		d26,d16,#25
-	veor		d23,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d18,d17		@ Maj(a,b,c)
-	veor		d23,d26			@ Sigma0(a)
-	vadd.i64	d19,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d23,d30
-	vshr.u64	d24,d19,#14	@ 25
-#if 25<16
-	vld1.64		{d9},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d19,#18
-#if 25>0
-	 vadd.i64	d23,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d19,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d19,#50
-	vsli.64		d25,d19,#46
-	vmov		d29,d19
-	vsli.64		d26,d19,#23
-#if 25<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d20,d21		@ Ch(e,f,g)
-	vshr.u64	d24,d23,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d22
-	vshr.u64	d25,d23,#34
-	vsli.64		d24,d23,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d23,#39
-	vadd.i64	d28,d9
-	vsli.64		d25,d23,#30
-	veor		d30,d23,d16
-	vsli.64		d26,d23,#25
-	veor		d22,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d17,d16		@ Maj(a,b,c)
-	veor		d22,d26			@ Sigma0(a)
-	vadd.i64	d18,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d22,d30
-	vshr.u64	q12,q4,#19
-	vshr.u64	q13,q4,#61
-	 vadd.i64	d22,d30			@ h+=Maj from the past
-	vshr.u64	q15,q4,#6
-	vsli.64		q12,q4,#45
-	vext.8		q14,q5,q6,#8	@ X[i+1]
-	vsli.64		q13,q4,#3
-	veor		q15,q12
-	vshr.u64	q12,q14,#1
-	veor		q15,q13				@ sigma1(X[i+14])
-	vshr.u64	q13,q14,#8
-	vadd.i64	q5,q15
-	vshr.u64	q15,q14,#7
-	vsli.64		q12,q14,#63
-	vsli.64		q13,q14,#56
-	vext.8		q14,q1,q2,#8	@ X[i+9]
-	veor		q15,q12
-	vshr.u64	d24,d18,#14		@ from NEON_00_15
-	vadd.i64	q5,q14
-	vshr.u64	d25,d18,#18		@ from NEON_00_15
-	veor		q15,q13				@ sigma0(X[i+1])
-	vshr.u64	d26,d18,#41		@ from NEON_00_15
-	vadd.i64	q5,q15
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d18,#50
-	vsli.64		d25,d18,#46
-	vmov		d29,d18
-	vsli.64		d26,d18,#23
-#if 26<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d19,d20		@ Ch(e,f,g)
-	vshr.u64	d24,d22,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d21
-	vshr.u64	d25,d22,#34
-	vsli.64		d24,d22,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d22,#39
-	vadd.i64	d28,d10
-	vsli.64		d25,d22,#30
-	veor		d30,d22,d23
-	vsli.64		d26,d22,#25
-	veor		d21,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d16,d23		@ Maj(a,b,c)
-	veor		d21,d26			@ Sigma0(a)
-	vadd.i64	d17,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d21,d30
-	vshr.u64	d24,d17,#14	@ 27
-#if 27<16
-	vld1.64		{d11},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d17,#18
-#if 27>0
-	 vadd.i64	d21,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d17,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d17,#50
-	vsli.64		d25,d17,#46
-	vmov		d29,d17
-	vsli.64		d26,d17,#23
-#if 27<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d18,d19		@ Ch(e,f,g)
-	vshr.u64	d24,d21,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d20
-	vshr.u64	d25,d21,#34
-	vsli.64		d24,d21,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d21,#39
-	vadd.i64	d28,d11
-	vsli.64		d25,d21,#30
-	veor		d30,d21,d22
-	vsli.64		d26,d21,#25
-	veor		d20,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d23,d22		@ Maj(a,b,c)
-	veor		d20,d26			@ Sigma0(a)
-	vadd.i64	d16,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d20,d30
-	vshr.u64	q12,q5,#19
-	vshr.u64	q13,q5,#61
-	 vadd.i64	d20,d30			@ h+=Maj from the past
-	vshr.u64	q15,q5,#6
-	vsli.64		q12,q5,#45
-	vext.8		q14,q6,q7,#8	@ X[i+1]
-	vsli.64		q13,q5,#3
-	veor		q15,q12
-	vshr.u64	q12,q14,#1
-	veor		q15,q13				@ sigma1(X[i+14])
-	vshr.u64	q13,q14,#8
-	vadd.i64	q6,q15
-	vshr.u64	q15,q14,#7
-	vsli.64		q12,q14,#63
-	vsli.64		q13,q14,#56
-	vext.8		q14,q2,q3,#8	@ X[i+9]
-	veor		q15,q12
-	vshr.u64	d24,d16,#14		@ from NEON_00_15
-	vadd.i64	q6,q14
-	vshr.u64	d25,d16,#18		@ from NEON_00_15
-	veor		q15,q13				@ sigma0(X[i+1])
-	vshr.u64	d26,d16,#41		@ from NEON_00_15
-	vadd.i64	q6,q15
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d16,#50
-	vsli.64		d25,d16,#46
-	vmov		d29,d16
-	vsli.64		d26,d16,#23
-#if 28<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d17,d18		@ Ch(e,f,g)
-	vshr.u64	d24,d20,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d19
-	vshr.u64	d25,d20,#34
-	vsli.64		d24,d20,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d20,#39
-	vadd.i64	d28,d12
-	vsli.64		d25,d20,#30
-	veor		d30,d20,d21
-	vsli.64		d26,d20,#25
-	veor		d19,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d22,d21		@ Maj(a,b,c)
-	veor		d19,d26			@ Sigma0(a)
-	vadd.i64	d23,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d19,d30
-	vshr.u64	d24,d23,#14	@ 29
-#if 29<16
-	vld1.64		{d13},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d23,#18
-#if 29>0
-	 vadd.i64	d19,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d23,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d23,#50
-	vsli.64		d25,d23,#46
-	vmov		d29,d23
-	vsli.64		d26,d23,#23
-#if 29<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d16,d17		@ Ch(e,f,g)
-	vshr.u64	d24,d19,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d18
-	vshr.u64	d25,d19,#34
-	vsli.64		d24,d19,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d19,#39
-	vadd.i64	d28,d13
-	vsli.64		d25,d19,#30
-	veor		d30,d19,d20
-	vsli.64		d26,d19,#25
-	veor		d18,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d21,d20		@ Maj(a,b,c)
-	veor		d18,d26			@ Sigma0(a)
-	vadd.i64	d22,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d18,d30
-	vshr.u64	q12,q6,#19
-	vshr.u64	q13,q6,#61
-	 vadd.i64	d18,d30			@ h+=Maj from the past
-	vshr.u64	q15,q6,#6
-	vsli.64		q12,q6,#45
-	vext.8		q14,q7,q0,#8	@ X[i+1]
-	vsli.64		q13,q6,#3
-	veor		q15,q12
-	vshr.u64	q12,q14,#1
-	veor		q15,q13				@ sigma1(X[i+14])
-	vshr.u64	q13,q14,#8
-	vadd.i64	q7,q15
-	vshr.u64	q15,q14,#7
-	vsli.64		q12,q14,#63
-	vsli.64		q13,q14,#56
-	vext.8		q14,q3,q4,#8	@ X[i+9]
-	veor		q15,q12
-	vshr.u64	d24,d22,#14		@ from NEON_00_15
-	vadd.i64	q7,q14
-	vshr.u64	d25,d22,#18		@ from NEON_00_15
-	veor		q15,q13				@ sigma0(X[i+1])
-	vshr.u64	d26,d22,#41		@ from NEON_00_15
-	vadd.i64	q7,q15
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d22,#50
-	vsli.64		d25,d22,#46
-	vmov		d29,d22
-	vsli.64		d26,d22,#23
-#if 30<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d23,d16		@ Ch(e,f,g)
-	vshr.u64	d24,d18,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d17
-	vshr.u64	d25,d18,#34
-	vsli.64		d24,d18,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d18,#39
-	vadd.i64	d28,d14
-	vsli.64		d25,d18,#30
-	veor		d30,d18,d19
-	vsli.64		d26,d18,#25
-	veor		d17,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d20,d19		@ Maj(a,b,c)
-	veor		d17,d26			@ Sigma0(a)
-	vadd.i64	d21,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d17,d30
-	vshr.u64	d24,d21,#14	@ 31
-#if 31<16
-	vld1.64		{d15},[r1]!	@ handles unaligned
-#endif
-	vshr.u64	d25,d21,#18
-#if 31>0
-	 vadd.i64	d17,d30			@ h+=Maj from the past
-#endif
-	vshr.u64	d26,d21,#41
-	vld1.64		{d28},[r3,:64]!	@ K[i++]
-	vsli.64		d24,d21,#50
-	vsli.64		d25,d21,#46
-	vmov		d29,d21
-	vsli.64		d26,d21,#23
-#if 31<16 && defined(__ARMEL__)
-	vrev64.8	,
-#endif
-	veor		d25,d24
-	vbsl		d29,d22,d23		@ Ch(e,f,g)
-	vshr.u64	d24,d17,#28
-	veor		d26,d25			@ Sigma1(e)
-	vadd.i64	d27,d29,d16
-	vshr.u64	d25,d17,#34
-	vsli.64		d24,d17,#36
-	vadd.i64	d27,d26
-	vshr.u64	d26,d17,#39
-	vadd.i64	d28,d15
-	vsli.64		d25,d17,#30
-	veor		d30,d17,d18
-	vsli.64		d26,d17,#25
-	veor		d16,d24,d25
-	vadd.i64	d27,d28
-	vbsl		d30,d19,d18		@ Maj(a,b,c)
-	veor		d16,d26			@ Sigma0(a)
-	vadd.i64	d20,d27
-	vadd.i64	d30,d27
-	@ vadd.i64	d16,d30
-	bne		.L16_79_neon
-
-	 vadd.i64	d16,d30		@ h+=Maj from the past
-	vldmia		r0,{d24-d31}	@ load context to temp
-	vadd.i64	q8,q12		@ vectorized accumulate
-	vadd.i64	q9,q13
-	vadd.i64	q10,q14
-	vadd.i64	q11,q15
-	vstmia		r0,{d16-d23}	@ save context
-	teq		r1,r2
-	sub		r3,#640	@ rewind K512
-	bne		.Loop_neon
-
-	VFP_ABI_POP
-	bx	lr				@ .word	0xe12fff1e
-.size	sha512_block_data_order_neon,.-sha512_block_data_order_neon
-#endif
-.asciz	"SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
-.align	2
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.comm	OPENSSL_armcap_P,4,4
-#endif
-- 
GitLab


From 8116138cbfcee80b1bf9b57073278dcd86b44656 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 26 Apr 2021 02:57:32 +0900
Subject: [PATCH 0700/3804] crypto: arm - use a pattern rule for generating *.S
 files

Unify similar build rules.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Makefile | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 51f160c61740f..eafa898ba6a73 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -48,13 +48,7 @@ curve25519-neon-y := curve25519-core.o curve25519-glue.o
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
 
-$(obj)/poly1305-core.S: $(src)/poly1305-armv4.pl
-	$(call cmd,perl)
-
-$(obj)/sha256-core.S: $(src)/sha256-armv4.pl
-	$(call cmd,perl)
-
-$(obj)/sha512-core.S: $(src)/sha512-armv4.pl
+$(obj)/%-core.S: $(src)/%-armv4.pl
 	$(call cmd,perl)
 
 clean-files += poly1305-core.S sha256-core.S sha512-core.S
-- 
GitLab


From 12dd461ebd1941afe821539419685ff9dea3a31d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 26 Apr 2021 02:57:33 +0900
Subject: [PATCH 0701/3804] crypto: arm64 - generate *.S by Perl at build time
 instead of shipping them

Generate *.S by Perl like arch/{mips,x86}/crypto/Makefile.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Makefile                |    9 +-
 arch/arm64/crypto/poly1305-core.S_shipped |  835 ---------
 arch/arm64/crypto/sha256-core.S_shipped   | 2069 ---------------------
 arch/arm64/crypto/sha512-core.S_shipped   | 1093 -----------
 4 files changed, 3 insertions(+), 4003 deletions(-)
 delete mode 100644 arch/arm64/crypto/poly1305-core.S_shipped
 delete mode 100644 arch/arm64/crypto/sha256-core.S_shipped
 delete mode 100644 arch/arm64/crypto/sha512-core.S_shipped

diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index d0901e610df3b..592e52a08c623 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -68,19 +68,16 @@ CFLAGS_aes-glue-ce.o	:= -DUSE_V8_CRYPTO_EXTENSIONS
 $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
-ifdef REGENERATE_ARM64_CRYPTO
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) void $(@)
 
-$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl
+$(obj)/poly1305-core.S: $(src)/poly1305-armv8.pl
 	$(call cmd,perlasm)
 
-$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
+$(obj)/sha256-core.S: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
 
-$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
+$(obj)/sha512-core.S: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
 
-endif
-
 clean-files += poly1305-core.S sha256-core.S sha512-core.S
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
deleted file mode 100644
index fb2822abf63aa..0000000000000
--- a/arch/arm64/crypto/poly1305-core.S_shipped
+++ /dev/null
@@ -1,835 +0,0 @@
-#ifndef __KERNEL__
-# include "arm_arch.h"
-.extern	OPENSSL_armcap_P
-#endif
-
-.text
-
-// forward "declarations" are required for Apple
-.globl	poly1305_blocks
-.globl	poly1305_emit
-
-.globl	poly1305_init
-.type	poly1305_init,%function
-.align	5
-poly1305_init:
-	cmp	x1,xzr
-	stp	xzr,xzr,[x0]		// zero hash value
-	stp	xzr,xzr,[x0,#16]	// [along with is_base2_26]
-
-	csel	x0,xzr,x0,eq
-	b.eq	.Lno_key
-
-#ifndef	__KERNEL__
-	adrp	x17,OPENSSL_armcap_P
-	ldr	w17,[x17,#:lo12:OPENSSL_armcap_P]
-#endif
-
-	ldp	x7,x8,[x1]		// load key
-	mov	x9,#0xfffffffc0fffffff
-	movk	x9,#0x0fff,lsl#48
-#ifdef	__AARCH64EB__
-	rev	x7,x7			// flip bytes
-	rev	x8,x8
-#endif
-	and	x7,x7,x9		// &=0ffffffc0fffffff
-	and	x9,x9,#-4
-	and	x8,x8,x9		// &=0ffffffc0ffffffc
-	mov	w9,#-1
-	stp	x7,x8,[x0,#32]	// save key value
-	str	w9,[x0,#48]	// impossible key power value
-
-#ifndef	__KERNEL__
-	tst	w17,#ARMV7_NEON
-
-	adr	x12,.Lpoly1305_blocks
-	adr	x7,.Lpoly1305_blocks_neon
-	adr	x13,.Lpoly1305_emit
-
-	csel	x12,x12,x7,eq
-
-# ifdef	__ILP32__
-	stp	w12,w13,[x2]
-# else
-	stp	x12,x13,[x2]
-# endif
-#endif
-	mov	x0,#1
-.Lno_key:
-	ret
-.size	poly1305_init,.-poly1305_init
-
-.type	poly1305_blocks,%function
-.align	5
-poly1305_blocks:
-.Lpoly1305_blocks:
-	ands	x2,x2,#-16
-	b.eq	.Lno_data
-
-	ldp	x4,x5,[x0]		// load hash value
-	ldp	x6,x17,[x0,#16]	// [along with is_base2_26]
-	ldp	x7,x8,[x0,#32]	// load key value
-
-#ifdef	__AARCH64EB__
-	lsr	x12,x4,#32
-	mov	w13,w4
-	lsr	x14,x5,#32
-	mov	w15,w5
-	lsr	x16,x6,#32
-#else
-	mov	w12,w4
-	lsr	x13,x4,#32
-	mov	w14,w5
-	lsr	x15,x5,#32
-	mov	w16,w6
-#endif
-
-	add	x12,x12,x13,lsl#26	// base 2^26 -> base 2^64
-	lsr	x13,x14,#12
-	adds	x12,x12,x14,lsl#52
-	add	x13,x13,x15,lsl#14
-	adc	x13,x13,xzr
-	lsr	x14,x16,#24
-	adds	x13,x13,x16,lsl#40
-	adc	x14,x14,xzr
-
-	cmp	x17,#0			// is_base2_26?
-	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
-	csel	x4,x4,x12,eq		// choose between radixes
-	csel	x5,x5,x13,eq
-	csel	x6,x6,x14,eq
-
-.Loop:
-	ldp	x10,x11,[x1],#16	// load input
-	sub	x2,x2,#16
-#ifdef	__AARCH64EB__
-	rev	x10,x10
-	rev	x11,x11
-#endif
-	adds	x4,x4,x10		// accumulate input
-	adcs	x5,x5,x11
-
-	mul	x12,x4,x7		// h0*r0
-	adc	x6,x6,x3
-	umulh	x13,x4,x7
-
-	mul	x10,x5,x9		// h1*5*r1
-	umulh	x11,x5,x9
-
-	adds	x12,x12,x10
-	mul	x10,x4,x8		// h0*r1
-	adc	x13,x13,x11
-	umulh	x14,x4,x8
-
-	adds	x13,x13,x10
-	mul	x10,x5,x7		// h1*r0
-	adc	x14,x14,xzr
-	umulh	x11,x5,x7
-
-	adds	x13,x13,x10
-	mul	x10,x6,x9		// h2*5*r1
-	adc	x14,x14,x11
-	mul	x11,x6,x7		// h2*r0
-
-	adds	x13,x13,x10
-	adc	x14,x14,x11
-
-	and	x10,x14,#-4		// final reduction
-	and	x6,x14,#3
-	add	x10,x10,x14,lsr#2
-	adds	x4,x12,x10
-	adcs	x5,x13,xzr
-	adc	x6,x6,xzr
-
-	cbnz	x2,.Loop
-
-	stp	x4,x5,[x0]		// store hash value
-	stp	x6,xzr,[x0,#16]	// [and clear is_base2_26]
-
-.Lno_data:
-	ret
-.size	poly1305_blocks,.-poly1305_blocks
-
-.type	poly1305_emit,%function
-.align	5
-poly1305_emit:
-.Lpoly1305_emit:
-	ldp	x4,x5,[x0]		// load hash base 2^64
-	ldp	x6,x7,[x0,#16]	// [along with is_base2_26]
-	ldp	x10,x11,[x2]	// load nonce
-
-#ifdef	__AARCH64EB__
-	lsr	x12,x4,#32
-	mov	w13,w4
-	lsr	x14,x5,#32
-	mov	w15,w5
-	lsr	x16,x6,#32
-#else
-	mov	w12,w4
-	lsr	x13,x4,#32
-	mov	w14,w5
-	lsr	x15,x5,#32
-	mov	w16,w6
-#endif
-
-	add	x12,x12,x13,lsl#26	// base 2^26 -> base 2^64
-	lsr	x13,x14,#12
-	adds	x12,x12,x14,lsl#52
-	add	x13,x13,x15,lsl#14
-	adc	x13,x13,xzr
-	lsr	x14,x16,#24
-	adds	x13,x13,x16,lsl#40
-	adc	x14,x14,xzr
-
-	cmp	x7,#0			// is_base2_26?
-	csel	x4,x4,x12,eq		// choose between radixes
-	csel	x5,x5,x13,eq
-	csel	x6,x6,x14,eq
-
-	adds	x12,x4,#5		// compare to modulus
-	adcs	x13,x5,xzr
-	adc	x14,x6,xzr
-
-	tst	x14,#-4			// see if it's carried/borrowed
-
-	csel	x4,x4,x12,eq
-	csel	x5,x5,x13,eq
-
-#ifdef	__AARCH64EB__
-	ror	x10,x10,#32		// flip nonce words
-	ror	x11,x11,#32
-#endif
-	adds	x4,x4,x10		// accumulate nonce
-	adc	x5,x5,x11
-#ifdef	__AARCH64EB__
-	rev	x4,x4			// flip output bytes
-	rev	x5,x5
-#endif
-	stp	x4,x5,[x1]		// write result
-
-	ret
-.size	poly1305_emit,.-poly1305_emit
-.type	poly1305_mult,%function
-.align	5
-poly1305_mult:
-	mul	x12,x4,x7		// h0*r0
-	umulh	x13,x4,x7
-
-	mul	x10,x5,x9		// h1*5*r1
-	umulh	x11,x5,x9
-
-	adds	x12,x12,x10
-	mul	x10,x4,x8		// h0*r1
-	adc	x13,x13,x11
-	umulh	x14,x4,x8
-
-	adds	x13,x13,x10
-	mul	x10,x5,x7		// h1*r0
-	adc	x14,x14,xzr
-	umulh	x11,x5,x7
-
-	adds	x13,x13,x10
-	mul	x10,x6,x9		// h2*5*r1
-	adc	x14,x14,x11
-	mul	x11,x6,x7		// h2*r0
-
-	adds	x13,x13,x10
-	adc	x14,x14,x11
-
-	and	x10,x14,#-4		// final reduction
-	and	x6,x14,#3
-	add	x10,x10,x14,lsr#2
-	adds	x4,x12,x10
-	adcs	x5,x13,xzr
-	adc	x6,x6,xzr
-
-	ret
-.size	poly1305_mult,.-poly1305_mult
-
-.type	poly1305_splat,%function
-.align	4
-poly1305_splat:
-	and	x12,x4,#0x03ffffff	// base 2^64 -> base 2^26
-	ubfx	x13,x4,#26,#26
-	extr	x14,x5,x4,#52
-	and	x14,x14,#0x03ffffff
-	ubfx	x15,x5,#14,#26
-	extr	x16,x6,x5,#40
-
-	str	w12,[x0,#16*0]	// r0
-	add	w12,w13,w13,lsl#2	// r1*5
-	str	w13,[x0,#16*1]	// r1
-	add	w13,w14,w14,lsl#2	// r2*5
-	str	w12,[x0,#16*2]	// s1
-	str	w14,[x0,#16*3]	// r2
-	add	w14,w15,w15,lsl#2	// r3*5
-	str	w13,[x0,#16*4]	// s2
-	str	w15,[x0,#16*5]	// r3
-	add	w15,w16,w16,lsl#2	// r4*5
-	str	w14,[x0,#16*6]	// s3
-	str	w16,[x0,#16*7]	// r4
-	str	w15,[x0,#16*8]	// s4
-
-	ret
-.size	poly1305_splat,.-poly1305_splat
-
-#ifdef	__KERNEL__
-.globl	poly1305_blocks_neon
-#endif
-.type	poly1305_blocks_neon,%function
-.align	5
-poly1305_blocks_neon:
-.Lpoly1305_blocks_neon:
-	ldr	x17,[x0,#24]
-	cmp	x2,#128
-	b.lo	.Lpoly1305_blocks
-
-	.inst	0xd503233f		// paciasp
-	stp	x29,x30,[sp,#-80]!
-	add	x29,sp,#0
-
-	stp	d8,d9,[sp,#16]		// meet ABI requirements
-	stp	d10,d11,[sp,#32]
-	stp	d12,d13,[sp,#48]
-	stp	d14,d15,[sp,#64]
-
-	cbz	x17,.Lbase2_64_neon
-
-	ldp	w10,w11,[x0]		// load hash value base 2^26
-	ldp	w12,w13,[x0,#8]
-	ldr	w14,[x0,#16]
-
-	tst	x2,#31
-	b.eq	.Leven_neon
-
-	ldp	x7,x8,[x0,#32]	// load key value
-
-	add	x4,x10,x11,lsl#26	// base 2^26 -> base 2^64
-	lsr	x5,x12,#12
-	adds	x4,x4,x12,lsl#52
-	add	x5,x5,x13,lsl#14
-	adc	x5,x5,xzr
-	lsr	x6,x14,#24
-	adds	x5,x5,x14,lsl#40
-	adc	x14,x6,xzr		// can be partially reduced...
-
-	ldp	x12,x13,[x1],#16	// load input
-	sub	x2,x2,#16
-	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
-
-#ifdef	__AARCH64EB__
-	rev	x12,x12
-	rev	x13,x13
-#endif
-	adds	x4,x4,x12		// accumulate input
-	adcs	x5,x5,x13
-	adc	x6,x6,x3
-
-	bl	poly1305_mult
-
-	and	x10,x4,#0x03ffffff	// base 2^64 -> base 2^26
-	ubfx	x11,x4,#26,#26
-	extr	x12,x5,x4,#52
-	and	x12,x12,#0x03ffffff
-	ubfx	x13,x5,#14,#26
-	extr	x14,x6,x5,#40
-
-	b	.Leven_neon
-
-.align	4
-.Lbase2_64_neon:
-	ldp	x7,x8,[x0,#32]	// load key value
-
-	ldp	x4,x5,[x0]		// load hash value base 2^64
-	ldr	x6,[x0,#16]
-
-	tst	x2,#31
-	b.eq	.Linit_neon
-
-	ldp	x12,x13,[x1],#16	// load input
-	sub	x2,x2,#16
-	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
-#ifdef	__AARCH64EB__
-	rev	x12,x12
-	rev	x13,x13
-#endif
-	adds	x4,x4,x12		// accumulate input
-	adcs	x5,x5,x13
-	adc	x6,x6,x3
-
-	bl	poly1305_mult
-
-.Linit_neon:
-	ldr	w17,[x0,#48]		// first table element
-	and	x10,x4,#0x03ffffff	// base 2^64 -> base 2^26
-	ubfx	x11,x4,#26,#26
-	extr	x12,x5,x4,#52
-	and	x12,x12,#0x03ffffff
-	ubfx	x13,x5,#14,#26
-	extr	x14,x6,x5,#40
-
-	cmp	w17,#-1			// is value impossible?
-	b.ne	.Leven_neon
-
-	fmov	d24,x10
-	fmov	d25,x11
-	fmov	d26,x12
-	fmov	d27,x13
-	fmov	d28,x14
-
-	////////////////////////////////// initialize r^n table
-	mov	x4,x7			// r^1
-	add	x9,x8,x8,lsr#2	// s1 = r1 + (r1 >> 2)
-	mov	x5,x8
-	mov	x6,xzr
-	add	x0,x0,#48+12
-	bl	poly1305_splat
-
-	bl	poly1305_mult		// r^2
-	sub	x0,x0,#4
-	bl	poly1305_splat
-
-	bl	poly1305_mult		// r^3
-	sub	x0,x0,#4
-	bl	poly1305_splat
-
-	bl	poly1305_mult		// r^4
-	sub	x0,x0,#4
-	bl	poly1305_splat
-	sub	x0,x0,#48		// restore original x0
-	b	.Ldo_neon
-
-.align	4
-.Leven_neon:
-	fmov	d24,x10
-	fmov	d25,x11
-	fmov	d26,x12
-	fmov	d27,x13
-	fmov	d28,x14
-
-.Ldo_neon:
-	ldp	x8,x12,[x1,#32]	// inp[2:3]
-	subs	x2,x2,#64
-	ldp	x9,x13,[x1,#48]
-	add	x16,x1,#96
-	adr	x17,.Lzeros
-
-	lsl	x3,x3,#24
-	add	x15,x0,#48
-
-#ifdef	__AARCH64EB__
-	rev	x8,x8
-	rev	x12,x12
-	rev	x9,x9
-	rev	x13,x13
-#endif
-	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
-	and	x5,x9,#0x03ffffff
-	ubfx	x6,x8,#26,#26
-	ubfx	x7,x9,#26,#26
-	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
-	extr	x8,x12,x8,#52
-	extr	x9,x13,x9,#52
-	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
-	fmov	d14,x4
-	and	x8,x8,#0x03ffffff
-	and	x9,x9,#0x03ffffff
-	ubfx	x10,x12,#14,#26
-	ubfx	x11,x13,#14,#26
-	add	x12,x3,x12,lsr#40
-	add	x13,x3,x13,lsr#40
-	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
-	fmov	d15,x6
-	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
-	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
-	fmov	d16,x8
-	fmov	d17,x10
-	fmov	d18,x12
-
-	ldp	x8,x12,[x1],#16	// inp[0:1]
-	ldp	x9,x13,[x1],#48
-
-	ld1	{v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
-	ld1	{v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
-	ld1	{v8.4s},[x15]
-
-#ifdef	__AARCH64EB__
-	rev	x8,x8
-	rev	x12,x12
-	rev	x9,x9
-	rev	x13,x13
-#endif
-	and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
-	and	x5,x9,#0x03ffffff
-	ubfx	x6,x8,#26,#26
-	ubfx	x7,x9,#26,#26
-	add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
-	extr	x8,x12,x8,#52
-	extr	x9,x13,x9,#52
-	add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
-	fmov	d9,x4
-	and	x8,x8,#0x03ffffff
-	and	x9,x9,#0x03ffffff
-	ubfx	x10,x12,#14,#26
-	ubfx	x11,x13,#14,#26
-	add	x12,x3,x12,lsr#40
-	add	x13,x3,x13,lsr#40
-	add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
-	fmov	d10,x6
-	add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
-	add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
-	movi	v31.2d,#-1
-	fmov	d11,x8
-	fmov	d12,x10
-	fmov	d13,x12
-	ushr	v31.2d,v31.2d,#38
-
-	b.ls	.Lskip_loop
-
-.align	4
-.Loop_neon:
-	////////////////////////////////////////////////////////////////
-	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
-	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
-	//   ___________________/
-	// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
-	// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
-	//   ___________________/ ____________________/
-	//
-	// Note that we start with inp[2:3]*r^2. This is because it
-	// doesn't depend on reduction in previous iteration.
-	////////////////////////////////////////////////////////////////
-	// d4 = h0*r4 + h1*r3   + h2*r2   + h3*r1   + h4*r0
-	// d3 = h0*r3 + h1*r2   + h2*r1   + h3*r0   + h4*5*r4
-	// d2 = h0*r2 + h1*r1   + h2*r0   + h3*5*r4 + h4*5*r3
-	// d1 = h0*r1 + h1*r0   + h2*5*r4 + h3*5*r3 + h4*5*r2
-	// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
-
-	subs	x2,x2,#64
-	umull	v23.2d,v14.2s,v7.s[2]
-	csel	x16,x17,x16,lo
-	umull	v22.2d,v14.2s,v5.s[2]
-	umull	v21.2d,v14.2s,v3.s[2]
-	 ldp	x8,x12,[x16],#16	// inp[2:3] (or zero)
-	umull	v20.2d,v14.2s,v1.s[2]
-	 ldp	x9,x13,[x16],#48
-	umull	v19.2d,v14.2s,v0.s[2]
-#ifdef	__AARCH64EB__
-	 rev	x8,x8
-	 rev	x12,x12
-	 rev	x9,x9
-	 rev	x13,x13
-#endif
-
-	umlal	v23.2d,v15.2s,v5.s[2]
-	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
-	umlal	v22.2d,v15.2s,v3.s[2]
-	 and	x5,x9,#0x03ffffff
-	umlal	v21.2d,v15.2s,v1.s[2]
-	 ubfx	x6,x8,#26,#26
-	umlal	v20.2d,v15.2s,v0.s[2]
-	 ubfx	x7,x9,#26,#26
-	umlal	v19.2d,v15.2s,v8.s[2]
-	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
-
-	umlal	v23.2d,v16.2s,v3.s[2]
-	 extr	x8,x12,x8,#52
-	umlal	v22.2d,v16.2s,v1.s[2]
-	 extr	x9,x13,x9,#52
-	umlal	v21.2d,v16.2s,v0.s[2]
-	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
-	umlal	v20.2d,v16.2s,v8.s[2]
-	 fmov	d14,x4
-	umlal	v19.2d,v16.2s,v6.s[2]
-	 and	x8,x8,#0x03ffffff
-
-	umlal	v23.2d,v17.2s,v1.s[2]
-	 and	x9,x9,#0x03ffffff
-	umlal	v22.2d,v17.2s,v0.s[2]
-	 ubfx	x10,x12,#14,#26
-	umlal	v21.2d,v17.2s,v8.s[2]
-	 ubfx	x11,x13,#14,#26
-	umlal	v20.2d,v17.2s,v6.s[2]
-	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
-	umlal	v19.2d,v17.2s,v4.s[2]
-	 fmov	d15,x6
-
-	add	v11.2s,v11.2s,v26.2s
-	 add	x12,x3,x12,lsr#40
-	umlal	v23.2d,v18.2s,v0.s[2]
-	 add	x13,x3,x13,lsr#40
-	umlal	v22.2d,v18.2s,v8.s[2]
-	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
-	umlal	v21.2d,v18.2s,v6.s[2]
-	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
-	umlal	v20.2d,v18.2s,v4.s[2]
-	 fmov	d16,x8
-	umlal	v19.2d,v18.2s,v2.s[2]
-	 fmov	d17,x10
-
-	////////////////////////////////////////////////////////////////
-	// (hash+inp[0:1])*r^4 and accumulate
-
-	add	v9.2s,v9.2s,v24.2s
-	 fmov	d18,x12
-	umlal	v22.2d,v11.2s,v1.s[0]
-	 ldp	x8,x12,[x1],#16	// inp[0:1]
-	umlal	v19.2d,v11.2s,v6.s[0]
-	 ldp	x9,x13,[x1],#48
-	umlal	v23.2d,v11.2s,v3.s[0]
-	umlal	v20.2d,v11.2s,v8.s[0]
-	umlal	v21.2d,v11.2s,v0.s[0]
-#ifdef	__AARCH64EB__
-	 rev	x8,x8
-	 rev	x12,x12
-	 rev	x9,x9
-	 rev	x13,x13
-#endif
-
-	add	v10.2s,v10.2s,v25.2s
-	umlal	v22.2d,v9.2s,v5.s[0]
-	umlal	v23.2d,v9.2s,v7.s[0]
-	 and	x4,x8,#0x03ffffff	// base 2^64 -> base 2^26
-	umlal	v21.2d,v9.2s,v3.s[0]
-	 and	x5,x9,#0x03ffffff
-	umlal	v19.2d,v9.2s,v0.s[0]
-	 ubfx	x6,x8,#26,#26
-	umlal	v20.2d,v9.2s,v1.s[0]
-	 ubfx	x7,x9,#26,#26
-
-	add	v12.2s,v12.2s,v27.2s
-	 add	x4,x4,x5,lsl#32		// bfi	x4,x5,#32,#32
-	umlal	v22.2d,v10.2s,v3.s[0]
-	 extr	x8,x12,x8,#52
-	umlal	v23.2d,v10.2s,v5.s[0]
-	 extr	x9,x13,x9,#52
-	umlal	v19.2d,v10.2s,v8.s[0]
-	 add	x6,x6,x7,lsl#32		// bfi	x6,x7,#32,#32
-	umlal	v21.2d,v10.2s,v1.s[0]
-	 fmov	d9,x4
-	umlal	v20.2d,v10.2s,v0.s[0]
-	 and	x8,x8,#0x03ffffff
-
-	add	v13.2s,v13.2s,v28.2s
-	 and	x9,x9,#0x03ffffff
-	umlal	v22.2d,v12.2s,v0.s[0]
-	 ubfx	x10,x12,#14,#26
-	umlal	v19.2d,v12.2s,v4.s[0]
-	 ubfx	x11,x13,#14,#26
-	umlal	v23.2d,v12.2s,v1.s[0]
-	 add	x8,x8,x9,lsl#32		// bfi	x8,x9,#32,#32
-	umlal	v20.2d,v12.2s,v6.s[0]
-	 fmov	d10,x6
-	umlal	v21.2d,v12.2s,v8.s[0]
-	 add	x12,x3,x12,lsr#40
-
-	umlal	v22.2d,v13.2s,v8.s[0]
-	 add	x13,x3,x13,lsr#40
-	umlal	v19.2d,v13.2s,v2.s[0]
-	 add	x10,x10,x11,lsl#32	// bfi	x10,x11,#32,#32
-	umlal	v23.2d,v13.2s,v0.s[0]
-	 add	x12,x12,x13,lsl#32	// bfi	x12,x13,#32,#32
-	umlal	v20.2d,v13.2s,v4.s[0]
-	 fmov	d11,x8
-	umlal	v21.2d,v13.2s,v6.s[0]
-	 fmov	d12,x10
-	 fmov	d13,x12
-
-	/////////////////////////////////////////////////////////////////
-	// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
-	// and P. Schwabe
-	//
-	// [see discussion in poly1305-armv4 module]
-
-	ushr	v29.2d,v22.2d,#26
-	xtn	v27.2s,v22.2d
-	 ushr	v30.2d,v19.2d,#26
-	 and	v19.16b,v19.16b,v31.16b
-	add	v23.2d,v23.2d,v29.2d	// h3 -> h4
-	bic	v27.2s,#0xfc,lsl#24	// &=0x03ffffff
-	 add	v20.2d,v20.2d,v30.2d	// h0 -> h1
-
-	ushr	v29.2d,v23.2d,#26
-	xtn	v28.2s,v23.2d
-	 ushr	v30.2d,v20.2d,#26
-	 xtn	v25.2s,v20.2d
-	bic	v28.2s,#0xfc,lsl#24
-	 add	v21.2d,v21.2d,v30.2d	// h1 -> h2
-
-	add	v19.2d,v19.2d,v29.2d
-	shl	v29.2d,v29.2d,#2
-	 shrn	v30.2s,v21.2d,#26
-	 xtn	v26.2s,v21.2d
-	add	v19.2d,v19.2d,v29.2d	// h4 -> h0
-	 bic	v25.2s,#0xfc,lsl#24
-	 add	v27.2s,v27.2s,v30.2s		// h2 -> h3
-	 bic	v26.2s,#0xfc,lsl#24
-
-	shrn	v29.2s,v19.2d,#26
-	xtn	v24.2s,v19.2d
-	 ushr	v30.2s,v27.2s,#26
-	 bic	v27.2s,#0xfc,lsl#24
-	 bic	v24.2s,#0xfc,lsl#24
-	add	v25.2s,v25.2s,v29.2s		// h0 -> h1
-	 add	v28.2s,v28.2s,v30.2s		// h3 -> h4
-
-	b.hi	.Loop_neon
-
-.Lskip_loop:
-	dup	v16.2d,v16.d[0]
-	add	v11.2s,v11.2s,v26.2s
-
-	////////////////////////////////////////////////////////////////
-	// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
-
-	adds	x2,x2,#32
-	b.ne	.Long_tail
-
-	dup	v16.2d,v11.d[0]
-	add	v14.2s,v9.2s,v24.2s
-	add	v17.2s,v12.2s,v27.2s
-	add	v15.2s,v10.2s,v25.2s
-	add	v18.2s,v13.2s,v28.2s
-
-.Long_tail:
-	dup	v14.2d,v14.d[0]
-	umull2	v19.2d,v16.4s,v6.4s
-	umull2	v22.2d,v16.4s,v1.4s
-	umull2	v23.2d,v16.4s,v3.4s
-	umull2	v21.2d,v16.4s,v0.4s
-	umull2	v20.2d,v16.4s,v8.4s
-
-	dup	v15.2d,v15.d[0]
-	umlal2	v19.2d,v14.4s,v0.4s
-	umlal2	v21.2d,v14.4s,v3.4s
-	umlal2	v22.2d,v14.4s,v5.4s
-	umlal2	v23.2d,v14.4s,v7.4s
-	umlal2	v20.2d,v14.4s,v1.4s
-
-	dup	v17.2d,v17.d[0]
-	umlal2	v19.2d,v15.4s,v8.4s
-	umlal2	v22.2d,v15.4s,v3.4s
-	umlal2	v21.2d,v15.4s,v1.4s
-	umlal2	v23.2d,v15.4s,v5.4s
-	umlal2	v20.2d,v15.4s,v0.4s
-
-	dup	v18.2d,v18.d[0]
-	umlal2	v22.2d,v17.4s,v0.4s
-	umlal2	v23.2d,v17.4s,v1.4s
-	umlal2	v19.2d,v17.4s,v4.4s
-	umlal2	v20.2d,v17.4s,v6.4s
-	umlal2	v21.2d,v17.4s,v8.4s
-
-	umlal2	v22.2d,v18.4s,v8.4s
-	umlal2	v19.2d,v18.4s,v2.4s
-	umlal2	v23.2d,v18.4s,v0.4s
-	umlal2	v20.2d,v18.4s,v4.4s
-	umlal2	v21.2d,v18.4s,v6.4s
-
-	b.eq	.Lshort_tail
-
-	////////////////////////////////////////////////////////////////
-	// (hash+inp[0:1])*r^4:r^3 and accumulate
-
-	add	v9.2s,v9.2s,v24.2s
-	umlal	v22.2d,v11.2s,v1.2s
-	umlal	v19.2d,v11.2s,v6.2s
-	umlal	v23.2d,v11.2s,v3.2s
-	umlal	v20.2d,v11.2s,v8.2s
-	umlal	v21.2d,v11.2s,v0.2s
-
-	add	v10.2s,v10.2s,v25.2s
-	umlal	v22.2d,v9.2s,v5.2s
-	umlal	v19.2d,v9.2s,v0.2s
-	umlal	v23.2d,v9.2s,v7.2s
-	umlal	v20.2d,v9.2s,v1.2s
-	umlal	v21.2d,v9.2s,v3.2s
-
-	add	v12.2s,v12.2s,v27.2s
-	umlal	v22.2d,v10.2s,v3.2s
-	umlal	v19.2d,v10.2s,v8.2s
-	umlal	v23.2d,v10.2s,v5.2s
-	umlal	v20.2d,v10.2s,v0.2s
-	umlal	v21.2d,v10.2s,v1.2s
-
-	add	v13.2s,v13.2s,v28.2s
-	umlal	v22.2d,v12.2s,v0.2s
-	umlal	v19.2d,v12.2s,v4.2s
-	umlal	v23.2d,v12.2s,v1.2s
-	umlal	v20.2d,v12.2s,v6.2s
-	umlal	v21.2d,v12.2s,v8.2s
-
-	umlal	v22.2d,v13.2s,v8.2s
-	umlal	v19.2d,v13.2s,v2.2s
-	umlal	v23.2d,v13.2s,v0.2s
-	umlal	v20.2d,v13.2s,v4.2s
-	umlal	v21.2d,v13.2s,v6.2s
-
-.Lshort_tail:
-	////////////////////////////////////////////////////////////////
-	// horizontal add
-
-	addp	v22.2d,v22.2d,v22.2d
-	 ldp	d8,d9,[sp,#16]		// meet ABI requirements
-	addp	v19.2d,v19.2d,v19.2d
-	 ldp	d10,d11,[sp,#32]
-	addp	v23.2d,v23.2d,v23.2d
-	 ldp	d12,d13,[sp,#48]
-	addp	v20.2d,v20.2d,v20.2d
-	 ldp	d14,d15,[sp,#64]
-	addp	v21.2d,v21.2d,v21.2d
-	 ldr	x30,[sp,#8]
-
-	////////////////////////////////////////////////////////////////
-	// lazy reduction, but without narrowing
-
-	ushr	v29.2d,v22.2d,#26
-	and	v22.16b,v22.16b,v31.16b
-	 ushr	v30.2d,v19.2d,#26
-	 and	v19.16b,v19.16b,v31.16b
-
-	add	v23.2d,v23.2d,v29.2d	// h3 -> h4
-	 add	v20.2d,v20.2d,v30.2d	// h0 -> h1
-
-	ushr	v29.2d,v23.2d,#26
-	and	v23.16b,v23.16b,v31.16b
-	 ushr	v30.2d,v20.2d,#26
-	 and	v20.16b,v20.16b,v31.16b
-	 add	v21.2d,v21.2d,v30.2d	// h1 -> h2
-
-	add	v19.2d,v19.2d,v29.2d
-	shl	v29.2d,v29.2d,#2
-	 ushr	v30.2d,v21.2d,#26
-	 and	v21.16b,v21.16b,v31.16b
-	add	v19.2d,v19.2d,v29.2d	// h4 -> h0
-	 add	v22.2d,v22.2d,v30.2d	// h2 -> h3
-
-	ushr	v29.2d,v19.2d,#26
-	and	v19.16b,v19.16b,v31.16b
-	 ushr	v30.2d,v22.2d,#26
-	 and	v22.16b,v22.16b,v31.16b
-	add	v20.2d,v20.2d,v29.2d	// h0 -> h1
-	 add	v23.2d,v23.2d,v30.2d	// h3 -> h4
-
-	////////////////////////////////////////////////////////////////
-	// write the result, can be partially reduced
-
-	st4	{v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
-	mov	x4,#1
-	st1	{v23.s}[0],[x0]
-	str	x4,[x0,#8]		// set is_base2_26
-
-	ldr	x29,[sp],#80
-	 .inst	0xd50323bf		// autiasp
-	ret
-.size	poly1305_blocks_neon,.-poly1305_blocks_neon
-
-.align	5
-.Lzeros:
-.long	0,0,0,0,0,0,0,0
-.asciz	"Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm"
-.align	2
-#if !defined(__KERNEL__) && !defined(_WIN64)
-.comm	OPENSSL_armcap_P,4,4
-.hidden	OPENSSL_armcap_P
-#endif
diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped
deleted file mode 100644
index 7c7ce2e3bad6b..0000000000000
--- a/arch/arm64/crypto/sha256-core.S_shipped
+++ /dev/null
@@ -1,2069 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// This code is taken from the OpenSSL project but the author (Andy Polyakov)
-// has relicensed it under the GPLv2. Therefore this program is free software;
-// you can redistribute it and/or modify it under the terms of the GNU General
-// Public License version 2 as published by the Free Software Foundation.
-//
-// The original headers, including the original license headers, are
-// included below for completeness.
-
-// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the OpenSSL license (the "License").  You may not use
-// this file except in compliance with the License.  You can obtain a copy
-// in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-
-// ====================================================================
-// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-// project. The module is, however, dual licensed under OpenSSL and
-// CRYPTOGAMS licenses depending on where you obtain it. For further
-// details see http://www.openssl.org/~appro/cryptogams/.
-// ====================================================================
-//
-// SHA256/512 for ARMv8.
-//
-// Performance in cycles per processed byte and improvement coefficient
-// over code generated with "default" compiler:
-//
-//		SHA256-hw	SHA256(*)	SHA512
-// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
-// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
-// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
-// Denver	2.01		10.5 (+26%)	6.70 (+8%)
-// X-Gene			20.0 (+100%)	12.8 (+300%(***))
-// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
-//
-// (*)	Software SHA256 results are of lesser relevance, presented
-//	mostly for informational purposes.
-// (**)	The result is a trade-off: it's possible to improve it by
-//	10% (or by 1 cycle per round), but at the cost of 20% loss
-//	on Cortex-A53 (or by 4 cycles per round).
-// (***)	Super-impressive coefficients over gcc-generated code are
-//	indication of some compiler "pathology", most notably code
-//	generated with -mgeneral-regs-only is significanty faster
-//	and the gap is only 40-90%.
-//
-// October 2016.
-//
-// Originally it was reckoned that it makes no sense to implement NEON
-// version of SHA256 for 64-bit processors. This is because performance
-// improvement on most wide-spread Cortex-A5x processors was observed
-// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
-// observed that 32-bit NEON SHA256 performs significantly better than
-// 64-bit scalar version on *some* of the more recent processors. As
-// result 64-bit NEON version of SHA256 was added to provide best
-// all-round performance. For example it executes ~30% faster on X-Gene
-// and Mongoose. [For reference, NEON version of SHA512 is bound to
-// deliver much less improvement, likely *negative* on Cortex-A5x.
-// Which is why NEON support is limited to SHA256.]
-
-#ifndef	__KERNEL__
-# include "arm_arch.h"
-#endif
-
-.text
-
-.extern	OPENSSL_armcap_P
-.globl	sha256_block_data_order
-.type	sha256_block_data_order,%function
-.align	6
-sha256_block_data_order:
-#ifndef	__KERNEL__
-# ifdef	__ILP32__
-	ldrsw	x16,.LOPENSSL_armcap_P
-# else
-	ldr	x16,.LOPENSSL_armcap_P
-# endif
-	adr	x17,.LOPENSSL_armcap_P
-	add	x16,x16,x17
-	ldr	w16,[x16]
-	tst	w16,#ARMV8_SHA256
-	b.ne	.Lv8_entry
-	tst	w16,#ARMV7_NEON
-	b.ne	.Lneon_entry
-#endif
-	stp	x29,x30,[sp,#-128]!
-	add	x29,sp,#0
-
-	stp	x19,x20,[sp,#16]
-	stp	x21,x22,[sp,#32]
-	stp	x23,x24,[sp,#48]
-	stp	x25,x26,[sp,#64]
-	stp	x27,x28,[sp,#80]
-	sub	sp,sp,#4*4
-
-	ldp	w20,w21,[x0]				// load context
-	ldp	w22,w23,[x0,#2*4]
-	ldp	w24,w25,[x0,#4*4]
-	add	x2,x1,x2,lsl#6	// end of input
-	ldp	w26,w27,[x0,#6*4]
-	adr	x30,.LK256
-	stp	x0,x2,[x29,#96]
-
-.Loop:
-	ldp	w3,w4,[x1],#2*4
-	ldr	w19,[x30],#4			// *K++
-	eor	w28,w21,w22				// magic seed
-	str	x1,[x29,#112]
-#ifndef	__AARCH64EB__
-	rev	w3,w3			// 0
-#endif
-	ror	w16,w24,#6
-	add	w27,w27,w19			// h+=K[i]
-	eor	w6,w24,w24,ror#14
-	and	w17,w25,w24
-	bic	w19,w26,w24
-	add	w27,w27,w3			// h+=X[i]
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w20,w21			// a^b, b^c in next round
-	eor	w16,w16,w6,ror#11	// Sigma1(e)
-	ror	w6,w20,#2
-	add	w27,w27,w17			// h+=Ch(e,f,g)
-	eor	w17,w20,w20,ror#9
-	add	w27,w27,w16			// h+=Sigma1(e)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	add	w23,w23,w27			// d+=h
-	eor	w28,w28,w21			// Maj(a,b,c)
-	eor	w17,w6,w17,ror#13	// Sigma0(a)
-	add	w27,w27,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	//add	w27,w27,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w4,w4			// 1
-#endif
-	ldp	w5,w6,[x1],#2*4
-	add	w27,w27,w17			// h+=Sigma0(a)
-	ror	w16,w23,#6
-	add	w26,w26,w28			// h+=K[i]
-	eor	w7,w23,w23,ror#14
-	and	w17,w24,w23
-	bic	w28,w25,w23
-	add	w26,w26,w4			// h+=X[i]
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w27,w20			// a^b, b^c in next round
-	eor	w16,w16,w7,ror#11	// Sigma1(e)
-	ror	w7,w27,#2
-	add	w26,w26,w17			// h+=Ch(e,f,g)
-	eor	w17,w27,w27,ror#9
-	add	w26,w26,w16			// h+=Sigma1(e)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	add	w22,w22,w26			// d+=h
-	eor	w19,w19,w20			// Maj(a,b,c)
-	eor	w17,w7,w17,ror#13	// Sigma0(a)
-	add	w26,w26,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	//add	w26,w26,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w5,w5			// 2
-#endif
-	add	w26,w26,w17			// h+=Sigma0(a)
-	ror	w16,w22,#6
-	add	w25,w25,w19			// h+=K[i]
-	eor	w8,w22,w22,ror#14
-	and	w17,w23,w22
-	bic	w19,w24,w22
-	add	w25,w25,w5			// h+=X[i]
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w26,w27			// a^b, b^c in next round
-	eor	w16,w16,w8,ror#11	// Sigma1(e)
-	ror	w8,w26,#2
-	add	w25,w25,w17			// h+=Ch(e,f,g)
-	eor	w17,w26,w26,ror#9
-	add	w25,w25,w16			// h+=Sigma1(e)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	add	w21,w21,w25			// d+=h
-	eor	w28,w28,w27			// Maj(a,b,c)
-	eor	w17,w8,w17,ror#13	// Sigma0(a)
-	add	w25,w25,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	//add	w25,w25,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w6,w6			// 3
-#endif
-	ldp	w7,w8,[x1],#2*4
-	add	w25,w25,w17			// h+=Sigma0(a)
-	ror	w16,w21,#6
-	add	w24,w24,w28			// h+=K[i]
-	eor	w9,w21,w21,ror#14
-	and	w17,w22,w21
-	bic	w28,w23,w21
-	add	w24,w24,w6			// h+=X[i]
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w25,w26			// a^b, b^c in next round
-	eor	w16,w16,w9,ror#11	// Sigma1(e)
-	ror	w9,w25,#2
-	add	w24,w24,w17			// h+=Ch(e,f,g)
-	eor	w17,w25,w25,ror#9
-	add	w24,w24,w16			// h+=Sigma1(e)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	add	w20,w20,w24			// d+=h
-	eor	w19,w19,w26			// Maj(a,b,c)
-	eor	w17,w9,w17,ror#13	// Sigma0(a)
-	add	w24,w24,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	//add	w24,w24,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w7,w7			// 4
-#endif
-	add	w24,w24,w17			// h+=Sigma0(a)
-	ror	w16,w20,#6
-	add	w23,w23,w19			// h+=K[i]
-	eor	w10,w20,w20,ror#14
-	and	w17,w21,w20
-	bic	w19,w22,w20
-	add	w23,w23,w7			// h+=X[i]
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w24,w25			// a^b, b^c in next round
-	eor	w16,w16,w10,ror#11	// Sigma1(e)
-	ror	w10,w24,#2
-	add	w23,w23,w17			// h+=Ch(e,f,g)
-	eor	w17,w24,w24,ror#9
-	add	w23,w23,w16			// h+=Sigma1(e)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	add	w27,w27,w23			// d+=h
-	eor	w28,w28,w25			// Maj(a,b,c)
-	eor	w17,w10,w17,ror#13	// Sigma0(a)
-	add	w23,w23,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	//add	w23,w23,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w8,w8			// 5
-#endif
-	ldp	w9,w10,[x1],#2*4
-	add	w23,w23,w17			// h+=Sigma0(a)
-	ror	w16,w27,#6
-	add	w22,w22,w28			// h+=K[i]
-	eor	w11,w27,w27,ror#14
-	and	w17,w20,w27
-	bic	w28,w21,w27
-	add	w22,w22,w8			// h+=X[i]
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w23,w24			// a^b, b^c in next round
-	eor	w16,w16,w11,ror#11	// Sigma1(e)
-	ror	w11,w23,#2
-	add	w22,w22,w17			// h+=Ch(e,f,g)
-	eor	w17,w23,w23,ror#9
-	add	w22,w22,w16			// h+=Sigma1(e)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	add	w26,w26,w22			// d+=h
-	eor	w19,w19,w24			// Maj(a,b,c)
-	eor	w17,w11,w17,ror#13	// Sigma0(a)
-	add	w22,w22,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	//add	w22,w22,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w9,w9			// 6
-#endif
-	add	w22,w22,w17			// h+=Sigma0(a)
-	ror	w16,w26,#6
-	add	w21,w21,w19			// h+=K[i]
-	eor	w12,w26,w26,ror#14
-	and	w17,w27,w26
-	bic	w19,w20,w26
-	add	w21,w21,w9			// h+=X[i]
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w22,w23			// a^b, b^c in next round
-	eor	w16,w16,w12,ror#11	// Sigma1(e)
-	ror	w12,w22,#2
-	add	w21,w21,w17			// h+=Ch(e,f,g)
-	eor	w17,w22,w22,ror#9
-	add	w21,w21,w16			// h+=Sigma1(e)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	add	w25,w25,w21			// d+=h
-	eor	w28,w28,w23			// Maj(a,b,c)
-	eor	w17,w12,w17,ror#13	// Sigma0(a)
-	add	w21,w21,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	//add	w21,w21,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w10,w10			// 7
-#endif
-	ldp	w11,w12,[x1],#2*4
-	add	w21,w21,w17			// h+=Sigma0(a)
-	ror	w16,w25,#6
-	add	w20,w20,w28			// h+=K[i]
-	eor	w13,w25,w25,ror#14
-	and	w17,w26,w25
-	bic	w28,w27,w25
-	add	w20,w20,w10			// h+=X[i]
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w21,w22			// a^b, b^c in next round
-	eor	w16,w16,w13,ror#11	// Sigma1(e)
-	ror	w13,w21,#2
-	add	w20,w20,w17			// h+=Ch(e,f,g)
-	eor	w17,w21,w21,ror#9
-	add	w20,w20,w16			// h+=Sigma1(e)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	add	w24,w24,w20			// d+=h
-	eor	w19,w19,w22			// Maj(a,b,c)
-	eor	w17,w13,w17,ror#13	// Sigma0(a)
-	add	w20,w20,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	//add	w20,w20,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w11,w11			// 8
-#endif
-	add	w20,w20,w17			// h+=Sigma0(a)
-	ror	w16,w24,#6
-	add	w27,w27,w19			// h+=K[i]
-	eor	w14,w24,w24,ror#14
-	and	w17,w25,w24
-	bic	w19,w26,w24
-	add	w27,w27,w11			// h+=X[i]
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w20,w21			// a^b, b^c in next round
-	eor	w16,w16,w14,ror#11	// Sigma1(e)
-	ror	w14,w20,#2
-	add	w27,w27,w17			// h+=Ch(e,f,g)
-	eor	w17,w20,w20,ror#9
-	add	w27,w27,w16			// h+=Sigma1(e)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	add	w23,w23,w27			// d+=h
-	eor	w28,w28,w21			// Maj(a,b,c)
-	eor	w17,w14,w17,ror#13	// Sigma0(a)
-	add	w27,w27,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	//add	w27,w27,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w12,w12			// 9
-#endif
-	ldp	w13,w14,[x1],#2*4
-	add	w27,w27,w17			// h+=Sigma0(a)
-	ror	w16,w23,#6
-	add	w26,w26,w28			// h+=K[i]
-	eor	w15,w23,w23,ror#14
-	and	w17,w24,w23
-	bic	w28,w25,w23
-	add	w26,w26,w12			// h+=X[i]
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w27,w20			// a^b, b^c in next round
-	eor	w16,w16,w15,ror#11	// Sigma1(e)
-	ror	w15,w27,#2
-	add	w26,w26,w17			// h+=Ch(e,f,g)
-	eor	w17,w27,w27,ror#9
-	add	w26,w26,w16			// h+=Sigma1(e)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	add	w22,w22,w26			// d+=h
-	eor	w19,w19,w20			// Maj(a,b,c)
-	eor	w17,w15,w17,ror#13	// Sigma0(a)
-	add	w26,w26,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	//add	w26,w26,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w13,w13			// 10
-#endif
-	add	w26,w26,w17			// h+=Sigma0(a)
-	ror	w16,w22,#6
-	add	w25,w25,w19			// h+=K[i]
-	eor	w0,w22,w22,ror#14
-	and	w17,w23,w22
-	bic	w19,w24,w22
-	add	w25,w25,w13			// h+=X[i]
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w26,w27			// a^b, b^c in next round
-	eor	w16,w16,w0,ror#11	// Sigma1(e)
-	ror	w0,w26,#2
-	add	w25,w25,w17			// h+=Ch(e,f,g)
-	eor	w17,w26,w26,ror#9
-	add	w25,w25,w16			// h+=Sigma1(e)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	add	w21,w21,w25			// d+=h
-	eor	w28,w28,w27			// Maj(a,b,c)
-	eor	w17,w0,w17,ror#13	// Sigma0(a)
-	add	w25,w25,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	//add	w25,w25,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w14,w14			// 11
-#endif
-	ldp	w15,w0,[x1],#2*4
-	add	w25,w25,w17			// h+=Sigma0(a)
-	str	w6,[sp,#12]
-	ror	w16,w21,#6
-	add	w24,w24,w28			// h+=K[i]
-	eor	w6,w21,w21,ror#14
-	and	w17,w22,w21
-	bic	w28,w23,w21
-	add	w24,w24,w14			// h+=X[i]
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w25,w26			// a^b, b^c in next round
-	eor	w16,w16,w6,ror#11	// Sigma1(e)
-	ror	w6,w25,#2
-	add	w24,w24,w17			// h+=Ch(e,f,g)
-	eor	w17,w25,w25,ror#9
-	add	w24,w24,w16			// h+=Sigma1(e)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	add	w20,w20,w24			// d+=h
-	eor	w19,w19,w26			// Maj(a,b,c)
-	eor	w17,w6,w17,ror#13	// Sigma0(a)
-	add	w24,w24,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	//add	w24,w24,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w15,w15			// 12
-#endif
-	add	w24,w24,w17			// h+=Sigma0(a)
-	str	w7,[sp,#0]
-	ror	w16,w20,#6
-	add	w23,w23,w19			// h+=K[i]
-	eor	w7,w20,w20,ror#14
-	and	w17,w21,w20
-	bic	w19,w22,w20
-	add	w23,w23,w15			// h+=X[i]
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w24,w25			// a^b, b^c in next round
-	eor	w16,w16,w7,ror#11	// Sigma1(e)
-	ror	w7,w24,#2
-	add	w23,w23,w17			// h+=Ch(e,f,g)
-	eor	w17,w24,w24,ror#9
-	add	w23,w23,w16			// h+=Sigma1(e)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	add	w27,w27,w23			// d+=h
-	eor	w28,w28,w25			// Maj(a,b,c)
-	eor	w17,w7,w17,ror#13	// Sigma0(a)
-	add	w23,w23,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	//add	w23,w23,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w0,w0			// 13
-#endif
-	ldp	w1,w2,[x1]
-	add	w23,w23,w17			// h+=Sigma0(a)
-	str	w8,[sp,#4]
-	ror	w16,w27,#6
-	add	w22,w22,w28			// h+=K[i]
-	eor	w8,w27,w27,ror#14
-	and	w17,w20,w27
-	bic	w28,w21,w27
-	add	w22,w22,w0			// h+=X[i]
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w23,w24			// a^b, b^c in next round
-	eor	w16,w16,w8,ror#11	// Sigma1(e)
-	ror	w8,w23,#2
-	add	w22,w22,w17			// h+=Ch(e,f,g)
-	eor	w17,w23,w23,ror#9
-	add	w22,w22,w16			// h+=Sigma1(e)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	add	w26,w26,w22			// d+=h
-	eor	w19,w19,w24			// Maj(a,b,c)
-	eor	w17,w8,w17,ror#13	// Sigma0(a)
-	add	w22,w22,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	//add	w22,w22,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w1,w1			// 14
-#endif
-	ldr	w6,[sp,#12]
-	add	w22,w22,w17			// h+=Sigma0(a)
-	str	w9,[sp,#8]
-	ror	w16,w26,#6
-	add	w21,w21,w19			// h+=K[i]
-	eor	w9,w26,w26,ror#14
-	and	w17,w27,w26
-	bic	w19,w20,w26
-	add	w21,w21,w1			// h+=X[i]
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w22,w23			// a^b, b^c in next round
-	eor	w16,w16,w9,ror#11	// Sigma1(e)
-	ror	w9,w22,#2
-	add	w21,w21,w17			// h+=Ch(e,f,g)
-	eor	w17,w22,w22,ror#9
-	add	w21,w21,w16			// h+=Sigma1(e)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	add	w25,w25,w21			// d+=h
-	eor	w28,w28,w23			// Maj(a,b,c)
-	eor	w17,w9,w17,ror#13	// Sigma0(a)
-	add	w21,w21,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	//add	w21,w21,w17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	w2,w2			// 15
-#endif
-	ldr	w7,[sp,#0]
-	add	w21,w21,w17			// h+=Sigma0(a)
-	str	w10,[sp,#12]
-	ror	w16,w25,#6
-	add	w20,w20,w28			// h+=K[i]
-	ror	w9,w4,#7
-	and	w17,w26,w25
-	ror	w8,w1,#17
-	bic	w28,w27,w25
-	ror	w10,w21,#2
-	add	w20,w20,w2			// h+=X[i]
-	eor	w16,w16,w25,ror#11
-	eor	w9,w9,w4,ror#18
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w21,w22			// a^b, b^c in next round
-	eor	w16,w16,w25,ror#25	// Sigma1(e)
-	eor	w10,w10,w21,ror#13
-	add	w20,w20,w17			// h+=Ch(e,f,g)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	eor	w8,w8,w1,ror#19
-	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1])
-	add	w20,w20,w16			// h+=Sigma1(e)
-	eor	w19,w19,w22			// Maj(a,b,c)
-	eor	w17,w10,w21,ror#22	// Sigma0(a)
-	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14])
-	add	w3,w3,w12
-	add	w24,w24,w20			// d+=h
-	add	w20,w20,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	add	w3,w3,w9
-	add	w20,w20,w17			// h+=Sigma0(a)
-	add	w3,w3,w8
-.Loop_16_xx:
-	ldr	w8,[sp,#4]
-	str	w11,[sp,#0]
-	ror	w16,w24,#6
-	add	w27,w27,w19			// h+=K[i]
-	ror	w10,w5,#7
-	and	w17,w25,w24
-	ror	w9,w2,#17
-	bic	w19,w26,w24
-	ror	w11,w20,#2
-	add	w27,w27,w3			// h+=X[i]
-	eor	w16,w16,w24,ror#11
-	eor	w10,w10,w5,ror#18
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w20,w21			// a^b, b^c in next round
-	eor	w16,w16,w24,ror#25	// Sigma1(e)
-	eor	w11,w11,w20,ror#13
-	add	w27,w27,w17			// h+=Ch(e,f,g)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	eor	w9,w9,w2,ror#19
-	eor	w10,w10,w5,lsr#3	// sigma0(X[i+1])
-	add	w27,w27,w16			// h+=Sigma1(e)
-	eor	w28,w28,w21			// Maj(a,b,c)
-	eor	w17,w11,w20,ror#22	// Sigma0(a)
-	eor	w9,w9,w2,lsr#10	// sigma1(X[i+14])
-	add	w4,w4,w13
-	add	w23,w23,w27			// d+=h
-	add	w27,w27,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	add	w4,w4,w10
-	add	w27,w27,w17			// h+=Sigma0(a)
-	add	w4,w4,w9
-	ldr	w9,[sp,#8]
-	str	w12,[sp,#4]
-	ror	w16,w23,#6
-	add	w26,w26,w28			// h+=K[i]
-	ror	w11,w6,#7
-	and	w17,w24,w23
-	ror	w10,w3,#17
-	bic	w28,w25,w23
-	ror	w12,w27,#2
-	add	w26,w26,w4			// h+=X[i]
-	eor	w16,w16,w23,ror#11
-	eor	w11,w11,w6,ror#18
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w27,w20			// a^b, b^c in next round
-	eor	w16,w16,w23,ror#25	// Sigma1(e)
-	eor	w12,w12,w27,ror#13
-	add	w26,w26,w17			// h+=Ch(e,f,g)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	eor	w10,w10,w3,ror#19
-	eor	w11,w11,w6,lsr#3	// sigma0(X[i+1])
-	add	w26,w26,w16			// h+=Sigma1(e)
-	eor	w19,w19,w20			// Maj(a,b,c)
-	eor	w17,w12,w27,ror#22	// Sigma0(a)
-	eor	w10,w10,w3,lsr#10	// sigma1(X[i+14])
-	add	w5,w5,w14
-	add	w22,w22,w26			// d+=h
-	add	w26,w26,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	add	w5,w5,w11
-	add	w26,w26,w17			// h+=Sigma0(a)
-	add	w5,w5,w10
-	ldr	w10,[sp,#12]
-	str	w13,[sp,#8]
-	ror	w16,w22,#6
-	add	w25,w25,w19			// h+=K[i]
-	ror	w12,w7,#7
-	and	w17,w23,w22
-	ror	w11,w4,#17
-	bic	w19,w24,w22
-	ror	w13,w26,#2
-	add	w25,w25,w5			// h+=X[i]
-	eor	w16,w16,w22,ror#11
-	eor	w12,w12,w7,ror#18
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w26,w27			// a^b, b^c in next round
-	eor	w16,w16,w22,ror#25	// Sigma1(e)
-	eor	w13,w13,w26,ror#13
-	add	w25,w25,w17			// h+=Ch(e,f,g)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	eor	w11,w11,w4,ror#19
-	eor	w12,w12,w7,lsr#3	// sigma0(X[i+1])
-	add	w25,w25,w16			// h+=Sigma1(e)
-	eor	w28,w28,w27			// Maj(a,b,c)
-	eor	w17,w13,w26,ror#22	// Sigma0(a)
-	eor	w11,w11,w4,lsr#10	// sigma1(X[i+14])
-	add	w6,w6,w15
-	add	w21,w21,w25			// d+=h
-	add	w25,w25,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	add	w6,w6,w12
-	add	w25,w25,w17			// h+=Sigma0(a)
-	add	w6,w6,w11
-	ldr	w11,[sp,#0]
-	str	w14,[sp,#12]
-	ror	w16,w21,#6
-	add	w24,w24,w28			// h+=K[i]
-	ror	w13,w8,#7
-	and	w17,w22,w21
-	ror	w12,w5,#17
-	bic	w28,w23,w21
-	ror	w14,w25,#2
-	add	w24,w24,w6			// h+=X[i]
-	eor	w16,w16,w21,ror#11
-	eor	w13,w13,w8,ror#18
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w25,w26			// a^b, b^c in next round
-	eor	w16,w16,w21,ror#25	// Sigma1(e)
-	eor	w14,w14,w25,ror#13
-	add	w24,w24,w17			// h+=Ch(e,f,g)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	eor	w12,w12,w5,ror#19
-	eor	w13,w13,w8,lsr#3	// sigma0(X[i+1])
-	add	w24,w24,w16			// h+=Sigma1(e)
-	eor	w19,w19,w26			// Maj(a,b,c)
-	eor	w17,w14,w25,ror#22	// Sigma0(a)
-	eor	w12,w12,w5,lsr#10	// sigma1(X[i+14])
-	add	w7,w7,w0
-	add	w20,w20,w24			// d+=h
-	add	w24,w24,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	add	w7,w7,w13
-	add	w24,w24,w17			// h+=Sigma0(a)
-	add	w7,w7,w12
-	ldr	w12,[sp,#4]
-	str	w15,[sp,#0]
-	ror	w16,w20,#6
-	add	w23,w23,w19			// h+=K[i]
-	ror	w14,w9,#7
-	and	w17,w21,w20
-	ror	w13,w6,#17
-	bic	w19,w22,w20
-	ror	w15,w24,#2
-	add	w23,w23,w7			// h+=X[i]
-	eor	w16,w16,w20,ror#11
-	eor	w14,w14,w9,ror#18
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w24,w25			// a^b, b^c in next round
-	eor	w16,w16,w20,ror#25	// Sigma1(e)
-	eor	w15,w15,w24,ror#13
-	add	w23,w23,w17			// h+=Ch(e,f,g)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	eor	w13,w13,w6,ror#19
-	eor	w14,w14,w9,lsr#3	// sigma0(X[i+1])
-	add	w23,w23,w16			// h+=Sigma1(e)
-	eor	w28,w28,w25			// Maj(a,b,c)
-	eor	w17,w15,w24,ror#22	// Sigma0(a)
-	eor	w13,w13,w6,lsr#10	// sigma1(X[i+14])
-	add	w8,w8,w1
-	add	w27,w27,w23			// d+=h
-	add	w23,w23,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	add	w8,w8,w14
-	add	w23,w23,w17			// h+=Sigma0(a)
-	add	w8,w8,w13
-	ldr	w13,[sp,#8]
-	str	w0,[sp,#4]
-	ror	w16,w27,#6
-	add	w22,w22,w28			// h+=K[i]
-	ror	w15,w10,#7
-	and	w17,w20,w27
-	ror	w14,w7,#17
-	bic	w28,w21,w27
-	ror	w0,w23,#2
-	add	w22,w22,w8			// h+=X[i]
-	eor	w16,w16,w27,ror#11
-	eor	w15,w15,w10,ror#18
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w23,w24			// a^b, b^c in next round
-	eor	w16,w16,w27,ror#25	// Sigma1(e)
-	eor	w0,w0,w23,ror#13
-	add	w22,w22,w17			// h+=Ch(e,f,g)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	eor	w14,w14,w7,ror#19
-	eor	w15,w15,w10,lsr#3	// sigma0(X[i+1])
-	add	w22,w22,w16			// h+=Sigma1(e)
-	eor	w19,w19,w24			// Maj(a,b,c)
-	eor	w17,w0,w23,ror#22	// Sigma0(a)
-	eor	w14,w14,w7,lsr#10	// sigma1(X[i+14])
-	add	w9,w9,w2
-	add	w26,w26,w22			// d+=h
-	add	w22,w22,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	add	w9,w9,w15
-	add	w22,w22,w17			// h+=Sigma0(a)
-	add	w9,w9,w14
-	ldr	w14,[sp,#12]
-	str	w1,[sp,#8]
-	ror	w16,w26,#6
-	add	w21,w21,w19			// h+=K[i]
-	ror	w0,w11,#7
-	and	w17,w27,w26
-	ror	w15,w8,#17
-	bic	w19,w20,w26
-	ror	w1,w22,#2
-	add	w21,w21,w9			// h+=X[i]
-	eor	w16,w16,w26,ror#11
-	eor	w0,w0,w11,ror#18
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w22,w23			// a^b, b^c in next round
-	eor	w16,w16,w26,ror#25	// Sigma1(e)
-	eor	w1,w1,w22,ror#13
-	add	w21,w21,w17			// h+=Ch(e,f,g)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	eor	w15,w15,w8,ror#19
-	eor	w0,w0,w11,lsr#3	// sigma0(X[i+1])
-	add	w21,w21,w16			// h+=Sigma1(e)
-	eor	w28,w28,w23			// Maj(a,b,c)
-	eor	w17,w1,w22,ror#22	// Sigma0(a)
-	eor	w15,w15,w8,lsr#10	// sigma1(X[i+14])
-	add	w10,w10,w3
-	add	w25,w25,w21			// d+=h
-	add	w21,w21,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	add	w10,w10,w0
-	add	w21,w21,w17			// h+=Sigma0(a)
-	add	w10,w10,w15
-	ldr	w15,[sp,#0]
-	str	w2,[sp,#12]
-	ror	w16,w25,#6
-	add	w20,w20,w28			// h+=K[i]
-	ror	w1,w12,#7
-	and	w17,w26,w25
-	ror	w0,w9,#17
-	bic	w28,w27,w25
-	ror	w2,w21,#2
-	add	w20,w20,w10			// h+=X[i]
-	eor	w16,w16,w25,ror#11
-	eor	w1,w1,w12,ror#18
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w21,w22			// a^b, b^c in next round
-	eor	w16,w16,w25,ror#25	// Sigma1(e)
-	eor	w2,w2,w21,ror#13
-	add	w20,w20,w17			// h+=Ch(e,f,g)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	eor	w0,w0,w9,ror#19
-	eor	w1,w1,w12,lsr#3	// sigma0(X[i+1])
-	add	w20,w20,w16			// h+=Sigma1(e)
-	eor	w19,w19,w22			// Maj(a,b,c)
-	eor	w17,w2,w21,ror#22	// Sigma0(a)
-	eor	w0,w0,w9,lsr#10	// sigma1(X[i+14])
-	add	w11,w11,w4
-	add	w24,w24,w20			// d+=h
-	add	w20,w20,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	add	w11,w11,w1
-	add	w20,w20,w17			// h+=Sigma0(a)
-	add	w11,w11,w0
-	ldr	w0,[sp,#4]
-	str	w3,[sp,#0]
-	ror	w16,w24,#6
-	add	w27,w27,w19			// h+=K[i]
-	ror	w2,w13,#7
-	and	w17,w25,w24
-	ror	w1,w10,#17
-	bic	w19,w26,w24
-	ror	w3,w20,#2
-	add	w27,w27,w11			// h+=X[i]
-	eor	w16,w16,w24,ror#11
-	eor	w2,w2,w13,ror#18
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w20,w21			// a^b, b^c in next round
-	eor	w16,w16,w24,ror#25	// Sigma1(e)
-	eor	w3,w3,w20,ror#13
-	add	w27,w27,w17			// h+=Ch(e,f,g)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	eor	w1,w1,w10,ror#19
-	eor	w2,w2,w13,lsr#3	// sigma0(X[i+1])
-	add	w27,w27,w16			// h+=Sigma1(e)
-	eor	w28,w28,w21			// Maj(a,b,c)
-	eor	w17,w3,w20,ror#22	// Sigma0(a)
-	eor	w1,w1,w10,lsr#10	// sigma1(X[i+14])
-	add	w12,w12,w5
-	add	w23,w23,w27			// d+=h
-	add	w27,w27,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	add	w12,w12,w2
-	add	w27,w27,w17			// h+=Sigma0(a)
-	add	w12,w12,w1
-	ldr	w1,[sp,#8]
-	str	w4,[sp,#4]
-	ror	w16,w23,#6
-	add	w26,w26,w28			// h+=K[i]
-	ror	w3,w14,#7
-	and	w17,w24,w23
-	ror	w2,w11,#17
-	bic	w28,w25,w23
-	ror	w4,w27,#2
-	add	w26,w26,w12			// h+=X[i]
-	eor	w16,w16,w23,ror#11
-	eor	w3,w3,w14,ror#18
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w27,w20			// a^b, b^c in next round
-	eor	w16,w16,w23,ror#25	// Sigma1(e)
-	eor	w4,w4,w27,ror#13
-	add	w26,w26,w17			// h+=Ch(e,f,g)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	eor	w2,w2,w11,ror#19
-	eor	w3,w3,w14,lsr#3	// sigma0(X[i+1])
-	add	w26,w26,w16			// h+=Sigma1(e)
-	eor	w19,w19,w20			// Maj(a,b,c)
-	eor	w17,w4,w27,ror#22	// Sigma0(a)
-	eor	w2,w2,w11,lsr#10	// sigma1(X[i+14])
-	add	w13,w13,w6
-	add	w22,w22,w26			// d+=h
-	add	w26,w26,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	add	w13,w13,w3
-	add	w26,w26,w17			// h+=Sigma0(a)
-	add	w13,w13,w2
-	ldr	w2,[sp,#12]
-	str	w5,[sp,#8]
-	ror	w16,w22,#6
-	add	w25,w25,w19			// h+=K[i]
-	ror	w4,w15,#7
-	and	w17,w23,w22
-	ror	w3,w12,#17
-	bic	w19,w24,w22
-	ror	w5,w26,#2
-	add	w25,w25,w13			// h+=X[i]
-	eor	w16,w16,w22,ror#11
-	eor	w4,w4,w15,ror#18
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w26,w27			// a^b, b^c in next round
-	eor	w16,w16,w22,ror#25	// Sigma1(e)
-	eor	w5,w5,w26,ror#13
-	add	w25,w25,w17			// h+=Ch(e,f,g)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	eor	w3,w3,w12,ror#19
-	eor	w4,w4,w15,lsr#3	// sigma0(X[i+1])
-	add	w25,w25,w16			// h+=Sigma1(e)
-	eor	w28,w28,w27			// Maj(a,b,c)
-	eor	w17,w5,w26,ror#22	// Sigma0(a)
-	eor	w3,w3,w12,lsr#10	// sigma1(X[i+14])
-	add	w14,w14,w7
-	add	w21,w21,w25			// d+=h
-	add	w25,w25,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	add	w14,w14,w4
-	add	w25,w25,w17			// h+=Sigma0(a)
-	add	w14,w14,w3
-	ldr	w3,[sp,#0]
-	str	w6,[sp,#12]
-	ror	w16,w21,#6
-	add	w24,w24,w28			// h+=K[i]
-	ror	w5,w0,#7
-	and	w17,w22,w21
-	ror	w4,w13,#17
-	bic	w28,w23,w21
-	ror	w6,w25,#2
-	add	w24,w24,w14			// h+=X[i]
-	eor	w16,w16,w21,ror#11
-	eor	w5,w5,w0,ror#18
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w25,w26			// a^b, b^c in next round
-	eor	w16,w16,w21,ror#25	// Sigma1(e)
-	eor	w6,w6,w25,ror#13
-	add	w24,w24,w17			// h+=Ch(e,f,g)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	eor	w4,w4,w13,ror#19
-	eor	w5,w5,w0,lsr#3	// sigma0(X[i+1])
-	add	w24,w24,w16			// h+=Sigma1(e)
-	eor	w19,w19,w26			// Maj(a,b,c)
-	eor	w17,w6,w25,ror#22	// Sigma0(a)
-	eor	w4,w4,w13,lsr#10	// sigma1(X[i+14])
-	add	w15,w15,w8
-	add	w20,w20,w24			// d+=h
-	add	w24,w24,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	add	w15,w15,w5
-	add	w24,w24,w17			// h+=Sigma0(a)
-	add	w15,w15,w4
-	ldr	w4,[sp,#4]
-	str	w7,[sp,#0]
-	ror	w16,w20,#6
-	add	w23,w23,w19			// h+=K[i]
-	ror	w6,w1,#7
-	and	w17,w21,w20
-	ror	w5,w14,#17
-	bic	w19,w22,w20
-	ror	w7,w24,#2
-	add	w23,w23,w15			// h+=X[i]
-	eor	w16,w16,w20,ror#11
-	eor	w6,w6,w1,ror#18
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w24,w25			// a^b, b^c in next round
-	eor	w16,w16,w20,ror#25	// Sigma1(e)
-	eor	w7,w7,w24,ror#13
-	add	w23,w23,w17			// h+=Ch(e,f,g)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	eor	w5,w5,w14,ror#19
-	eor	w6,w6,w1,lsr#3	// sigma0(X[i+1])
-	add	w23,w23,w16			// h+=Sigma1(e)
-	eor	w28,w28,w25			// Maj(a,b,c)
-	eor	w17,w7,w24,ror#22	// Sigma0(a)
-	eor	w5,w5,w14,lsr#10	// sigma1(X[i+14])
-	add	w0,w0,w9
-	add	w27,w27,w23			// d+=h
-	add	w23,w23,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	add	w0,w0,w6
-	add	w23,w23,w17			// h+=Sigma0(a)
-	add	w0,w0,w5
-	ldr	w5,[sp,#8]
-	str	w8,[sp,#4]
-	ror	w16,w27,#6
-	add	w22,w22,w28			// h+=K[i]
-	ror	w7,w2,#7
-	and	w17,w20,w27
-	ror	w6,w15,#17
-	bic	w28,w21,w27
-	ror	w8,w23,#2
-	add	w22,w22,w0			// h+=X[i]
-	eor	w16,w16,w27,ror#11
-	eor	w7,w7,w2,ror#18
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w23,w24			// a^b, b^c in next round
-	eor	w16,w16,w27,ror#25	// Sigma1(e)
-	eor	w8,w8,w23,ror#13
-	add	w22,w22,w17			// h+=Ch(e,f,g)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	eor	w6,w6,w15,ror#19
-	eor	w7,w7,w2,lsr#3	// sigma0(X[i+1])
-	add	w22,w22,w16			// h+=Sigma1(e)
-	eor	w19,w19,w24			// Maj(a,b,c)
-	eor	w17,w8,w23,ror#22	// Sigma0(a)
-	eor	w6,w6,w15,lsr#10	// sigma1(X[i+14])
-	add	w1,w1,w10
-	add	w26,w26,w22			// d+=h
-	add	w22,w22,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	add	w1,w1,w7
-	add	w22,w22,w17			// h+=Sigma0(a)
-	add	w1,w1,w6
-	ldr	w6,[sp,#12]
-	str	w9,[sp,#8]
-	ror	w16,w26,#6
-	add	w21,w21,w19			// h+=K[i]
-	ror	w8,w3,#7
-	and	w17,w27,w26
-	ror	w7,w0,#17
-	bic	w19,w20,w26
-	ror	w9,w22,#2
-	add	w21,w21,w1			// h+=X[i]
-	eor	w16,w16,w26,ror#11
-	eor	w8,w8,w3,ror#18
-	orr	w17,w17,w19			// Ch(e,f,g)
-	eor	w19,w22,w23			// a^b, b^c in next round
-	eor	w16,w16,w26,ror#25	// Sigma1(e)
-	eor	w9,w9,w22,ror#13
-	add	w21,w21,w17			// h+=Ch(e,f,g)
-	and	w28,w28,w19			// (b^c)&=(a^b)
-	eor	w7,w7,w0,ror#19
-	eor	w8,w8,w3,lsr#3	// sigma0(X[i+1])
-	add	w21,w21,w16			// h+=Sigma1(e)
-	eor	w28,w28,w23			// Maj(a,b,c)
-	eor	w17,w9,w22,ror#22	// Sigma0(a)
-	eor	w7,w7,w0,lsr#10	// sigma1(X[i+14])
-	add	w2,w2,w11
-	add	w25,w25,w21			// d+=h
-	add	w21,w21,w28			// h+=Maj(a,b,c)
-	ldr	w28,[x30],#4		// *K++, w19 in next round
-	add	w2,w2,w8
-	add	w21,w21,w17			// h+=Sigma0(a)
-	add	w2,w2,w7
-	ldr	w7,[sp,#0]
-	str	w10,[sp,#12]
-	ror	w16,w25,#6
-	add	w20,w20,w28			// h+=K[i]
-	ror	w9,w4,#7
-	and	w17,w26,w25
-	ror	w8,w1,#17
-	bic	w28,w27,w25
-	ror	w10,w21,#2
-	add	w20,w20,w2			// h+=X[i]
-	eor	w16,w16,w25,ror#11
-	eor	w9,w9,w4,ror#18
-	orr	w17,w17,w28			// Ch(e,f,g)
-	eor	w28,w21,w22			// a^b, b^c in next round
-	eor	w16,w16,w25,ror#25	// Sigma1(e)
-	eor	w10,w10,w21,ror#13
-	add	w20,w20,w17			// h+=Ch(e,f,g)
-	and	w19,w19,w28			// (b^c)&=(a^b)
-	eor	w8,w8,w1,ror#19
-	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1])
-	add	w20,w20,w16			// h+=Sigma1(e)
-	eor	w19,w19,w22			// Maj(a,b,c)
-	eor	w17,w10,w21,ror#22	// Sigma0(a)
-	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14])
-	add	w3,w3,w12
-	add	w24,w24,w20			// d+=h
-	add	w20,w20,w19			// h+=Maj(a,b,c)
-	ldr	w19,[x30],#4		// *K++, w28 in next round
-	add	w3,w3,w9
-	add	w20,w20,w17			// h+=Sigma0(a)
-	add	w3,w3,w8
-	cbnz	w19,.Loop_16_xx
-
-	ldp	x0,x2,[x29,#96]
-	ldr	x1,[x29,#112]
-	sub	x30,x30,#260		// rewind
-
-	ldp	w3,w4,[x0]
-	ldp	w5,w6,[x0,#2*4]
-	add	x1,x1,#14*4			// advance input pointer
-	ldp	w7,w8,[x0,#4*4]
-	add	w20,w20,w3
-	ldp	w9,w10,[x0,#6*4]
-	add	w21,w21,w4
-	add	w22,w22,w5
-	add	w23,w23,w6
-	stp	w20,w21,[x0]
-	add	w24,w24,w7
-	add	w25,w25,w8
-	stp	w22,w23,[x0,#2*4]
-	add	w26,w26,w9
-	add	w27,w27,w10
-	cmp	x1,x2
-	stp	w24,w25,[x0,#4*4]
-	stp	w26,w27,[x0,#6*4]
-	b.ne	.Loop
-
-	ldp	x19,x20,[x29,#16]
-	add	sp,sp,#4*4
-	ldp	x21,x22,[x29,#32]
-	ldp	x23,x24,[x29,#48]
-	ldp	x25,x26,[x29,#64]
-	ldp	x27,x28,[x29,#80]
-	ldp	x29,x30,[sp],#128
-	ret
-.size	sha256_block_data_order,.-sha256_block_data_order
-
-.align	6
-.type	.LK256,%object
-.LK256:
-	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-	.long	0	//terminator
-.size	.LK256,.-.LK256
-#ifndef	__KERNEL__
-.align	3
-.LOPENSSL_armcap_P:
-# ifdef	__ILP32__
-	.long	OPENSSL_armcap_P-.
-# else
-	.quad	OPENSSL_armcap_P-.
-# endif
-#endif
-.asciz	"SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align	2
-#ifndef	__KERNEL__
-.type	sha256_block_armv8,%function
-.align	6
-sha256_block_armv8:
-.Lv8_entry:
-	stp		x29,x30,[sp,#-16]!
-	add		x29,sp,#0
-
-	ld1		{v0.4s,v1.4s},[x0]
-	adr		x3,.LK256
-
-.Loop_hw:
-	ld1		{v4.16b-v7.16b},[x1],#64
-	sub		x2,x2,#1
-	ld1		{v16.4s},[x3],#16
-	rev32		v4.16b,v4.16b
-	rev32		v5.16b,v5.16b
-	rev32		v6.16b,v6.16b
-	rev32		v7.16b,v7.16b
-	orr		v18.16b,v0.16b,v0.16b		// offload
-	orr		v19.16b,v1.16b,v1.16b
-	ld1		{v17.4s},[x3],#16
-	add		v16.4s,v16.4s,v4.4s
-	.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
-	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
-	.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
-	ld1		{v16.4s},[x3],#16
-	add		v17.4s,v17.4s,v5.4s
-	.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
-	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
-	.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
-	ld1		{v17.4s},[x3],#16
-	add		v16.4s,v16.4s,v6.4s
-	.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
-	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
-	.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
-	ld1		{v16.4s},[x3],#16
-	add		v17.4s,v17.4s,v7.4s
-	.inst	0x5e282887	//sha256su0 v7.16b,v4.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
-	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
-	.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
-	ld1		{v17.4s},[x3],#16
-	add		v16.4s,v16.4s,v4.4s
-	.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
-	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
-	.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
-	ld1		{v16.4s},[x3],#16
-	add		v17.4s,v17.4s,v5.4s
-	.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
-	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
-	.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
-	ld1		{v17.4s},[x3],#16
-	add		v16.4s,v16.4s,v6.4s
-	.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
-	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
-	.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
-	ld1		{v16.4s},[x3],#16
-	add		v17.4s,v17.4s,v7.4s
-	.inst	0x5e282887	//sha256su0 v7.16b,v4.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
-	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
-	.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
-	ld1		{v17.4s},[x3],#16
-	add		v16.4s,v16.4s,v4.4s
-	.inst	0x5e2828a4	//sha256su0 v4.16b,v5.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
-	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
-	.inst	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
-	ld1		{v16.4s},[x3],#16
-	add		v17.4s,v17.4s,v5.4s
-	.inst	0x5e2828c5	//sha256su0 v5.16b,v6.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
-	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
-	.inst	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
-	ld1		{v17.4s},[x3],#16
-	add		v16.4s,v16.4s,v6.4s
-	.inst	0x5e2828e6	//sha256su0 v6.16b,v7.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
-	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
-	.inst	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
-	ld1		{v16.4s},[x3],#16
-	add		v17.4s,v17.4s,v7.4s
-	.inst	0x5e282887	//sha256su0 v7.16b,v4.16b
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
-	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
-	.inst	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
-	ld1		{v17.4s},[x3],#16
-	add		v16.4s,v16.4s,v4.4s
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
-	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
-
-	ld1		{v16.4s},[x3],#16
-	add		v17.4s,v17.4s,v5.4s
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
-	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
-
-	ld1		{v17.4s},[x3]
-	add		v16.4s,v16.4s,v6.4s
-	sub		x3,x3,#64*4-16	// rewind
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
-	.inst	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
-
-	add		v17.4s,v17.4s,v7.4s
-	orr		v2.16b,v0.16b,v0.16b
-	.inst	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
-	.inst	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
-
-	add		v0.4s,v0.4s,v18.4s
-	add		v1.4s,v1.4s,v19.4s
-
-	cbnz		x2,.Loop_hw
-
-	st1		{v0.4s,v1.4s},[x0]
-
-	ldr		x29,[sp],#16
-	ret
-.size	sha256_block_armv8,.-sha256_block_armv8
-#endif
-#ifdef	__KERNEL__
-.globl	sha256_block_neon
-#endif
-.type	sha256_block_neon,%function
-.align	4
-sha256_block_neon:
-.Lneon_entry:
-	stp	x29, x30, [sp, #-16]!
-	mov	x29, sp
-	sub	sp,sp,#16*4
-
-	adr	x16,.LK256
-	add	x2,x1,x2,lsl#6	// len to point at the end of inp
-
-	ld1	{v0.16b},[x1], #16
-	ld1	{v1.16b},[x1], #16
-	ld1	{v2.16b},[x1], #16
-	ld1	{v3.16b},[x1], #16
-	ld1	{v4.4s},[x16], #16
-	ld1	{v5.4s},[x16], #16
-	ld1	{v6.4s},[x16], #16
-	ld1	{v7.4s},[x16], #16
-	rev32	v0.16b,v0.16b		// yes, even on
-	rev32	v1.16b,v1.16b		// big-endian
-	rev32	v2.16b,v2.16b
-	rev32	v3.16b,v3.16b
-	mov	x17,sp
-	add	v4.4s,v4.4s,v0.4s
-	add	v5.4s,v5.4s,v1.4s
-	add	v6.4s,v6.4s,v2.4s
-	st1	{v4.4s-v5.4s},[x17], #32
-	add	v7.4s,v7.4s,v3.4s
-	st1	{v6.4s-v7.4s},[x17]
-	sub	x17,x17,#32
-
-	ldp	w3,w4,[x0]
-	ldp	w5,w6,[x0,#8]
-	ldp	w7,w8,[x0,#16]
-	ldp	w9,w10,[x0,#24]
-	ldr	w12,[sp,#0]
-	mov	w13,wzr
-	eor	w14,w4,w5
-	mov	w15,wzr
-	b	.L_00_48
-
-.align	4
-.L_00_48:
-	ext	v4.16b,v0.16b,v1.16b,#4
-	add	w10,w10,w12
-	add	w3,w3,w15
-	and	w12,w8,w7
-	bic	w15,w9,w7
-	ext	v7.16b,v2.16b,v3.16b,#4
-	eor	w11,w7,w7,ror#5
-	add	w3,w3,w13
-	mov	d19,v3.d[1]
-	orr	w12,w12,w15
-	eor	w11,w11,w7,ror#19
-	ushr	v6.4s,v4.4s,#7
-	eor	w15,w3,w3,ror#11
-	ushr	v5.4s,v4.4s,#3
-	add	w10,w10,w12
-	add	v0.4s,v0.4s,v7.4s
-	ror	w11,w11,#6
-	sli	v6.4s,v4.4s,#25
-	eor	w13,w3,w4
-	eor	w15,w15,w3,ror#20
-	ushr	v7.4s,v4.4s,#18
-	add	w10,w10,w11
-	ldr	w12,[sp,#4]
-	and	w14,w14,w13
-	eor	v5.16b,v5.16b,v6.16b
-	ror	w15,w15,#2
-	add	w6,w6,w10
-	sli	v7.4s,v4.4s,#14
-	eor	w14,w14,w4
-	ushr	v16.4s,v19.4s,#17
-	add	w9,w9,w12
-	add	w10,w10,w15
-	and	w12,w7,w6
-	eor	v5.16b,v5.16b,v7.16b
-	bic	w15,w8,w6
-	eor	w11,w6,w6,ror#5
-	sli	v16.4s,v19.4s,#15
-	add	w10,w10,w14
-	orr	w12,w12,w15
-	ushr	v17.4s,v19.4s,#10
-	eor	w11,w11,w6,ror#19
-	eor	w15,w10,w10,ror#11
-	ushr	v7.4s,v19.4s,#19
-	add	w9,w9,w12
-	ror	w11,w11,#6
-	add	v0.4s,v0.4s,v5.4s
-	eor	w14,w10,w3
-	eor	w15,w15,w10,ror#20
-	sli	v7.4s,v19.4s,#13
-	add	w9,w9,w11
-	ldr	w12,[sp,#8]
-	and	w13,w13,w14
-	eor	v17.16b,v17.16b,v16.16b
-	ror	w15,w15,#2
-	add	w5,w5,w9
-	eor	w13,w13,w3
-	eor	v17.16b,v17.16b,v7.16b
-	add	w8,w8,w12
-	add	w9,w9,w15
-	and	w12,w6,w5
-	add	v0.4s,v0.4s,v17.4s
-	bic	w15,w7,w5
-	eor	w11,w5,w5,ror#5
-	add	w9,w9,w13
-	ushr	v18.4s,v0.4s,#17
-	orr	w12,w12,w15
-	ushr	v19.4s,v0.4s,#10
-	eor	w11,w11,w5,ror#19
-	eor	w15,w9,w9,ror#11
-	sli	v18.4s,v0.4s,#15
-	add	w8,w8,w12
-	ushr	v17.4s,v0.4s,#19
-	ror	w11,w11,#6
-	eor	w13,w9,w10
-	eor	v19.16b,v19.16b,v18.16b
-	eor	w15,w15,w9,ror#20
-	add	w8,w8,w11
-	sli	v17.4s,v0.4s,#13
-	ldr	w12,[sp,#12]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	ld1	{v4.4s},[x16], #16
-	add	w4,w4,w8
-	eor	v19.16b,v19.16b,v17.16b
-	eor	w14,w14,w10
-	eor	v17.16b,v17.16b,v17.16b
-	add	w7,w7,w12
-	add	w8,w8,w15
-	and	w12,w5,w4
-	mov	v17.d[1],v19.d[0]
-	bic	w15,w6,w4
-	eor	w11,w4,w4,ror#5
-	add	w8,w8,w14
-	add	v0.4s,v0.4s,v17.4s
-	orr	w12,w12,w15
-	eor	w11,w11,w4,ror#19
-	eor	w15,w8,w8,ror#11
-	add	v4.4s,v4.4s,v0.4s
-	add	w7,w7,w12
-	ror	w11,w11,#6
-	eor	w14,w8,w9
-	eor	w15,w15,w8,ror#20
-	add	w7,w7,w11
-	ldr	w12,[sp,#16]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w3,w3,w7
-	eor	w13,w13,w9
-	st1	{v4.4s},[x17], #16
-	ext	v4.16b,v1.16b,v2.16b,#4
-	add	w6,w6,w12
-	add	w7,w7,w15
-	and	w12,w4,w3
-	bic	w15,w5,w3
-	ext	v7.16b,v3.16b,v0.16b,#4
-	eor	w11,w3,w3,ror#5
-	add	w7,w7,w13
-	mov	d19,v0.d[1]
-	orr	w12,w12,w15
-	eor	w11,w11,w3,ror#19
-	ushr	v6.4s,v4.4s,#7
-	eor	w15,w7,w7,ror#11
-	ushr	v5.4s,v4.4s,#3
-	add	w6,w6,w12
-	add	v1.4s,v1.4s,v7.4s
-	ror	w11,w11,#6
-	sli	v6.4s,v4.4s,#25
-	eor	w13,w7,w8
-	eor	w15,w15,w7,ror#20
-	ushr	v7.4s,v4.4s,#18
-	add	w6,w6,w11
-	ldr	w12,[sp,#20]
-	and	w14,w14,w13
-	eor	v5.16b,v5.16b,v6.16b
-	ror	w15,w15,#2
-	add	w10,w10,w6
-	sli	v7.4s,v4.4s,#14
-	eor	w14,w14,w8
-	ushr	v16.4s,v19.4s,#17
-	add	w5,w5,w12
-	add	w6,w6,w15
-	and	w12,w3,w10
-	eor	v5.16b,v5.16b,v7.16b
-	bic	w15,w4,w10
-	eor	w11,w10,w10,ror#5
-	sli	v16.4s,v19.4s,#15
-	add	w6,w6,w14
-	orr	w12,w12,w15
-	ushr	v17.4s,v19.4s,#10
-	eor	w11,w11,w10,ror#19
-	eor	w15,w6,w6,ror#11
-	ushr	v7.4s,v19.4s,#19
-	add	w5,w5,w12
-	ror	w11,w11,#6
-	add	v1.4s,v1.4s,v5.4s
-	eor	w14,w6,w7
-	eor	w15,w15,w6,ror#20
-	sli	v7.4s,v19.4s,#13
-	add	w5,w5,w11
-	ldr	w12,[sp,#24]
-	and	w13,w13,w14
-	eor	v17.16b,v17.16b,v16.16b
-	ror	w15,w15,#2
-	add	w9,w9,w5
-	eor	w13,w13,w7
-	eor	v17.16b,v17.16b,v7.16b
-	add	w4,w4,w12
-	add	w5,w5,w15
-	and	w12,w10,w9
-	add	v1.4s,v1.4s,v17.4s
-	bic	w15,w3,w9
-	eor	w11,w9,w9,ror#5
-	add	w5,w5,w13
-	ushr	v18.4s,v1.4s,#17
-	orr	w12,w12,w15
-	ushr	v19.4s,v1.4s,#10
-	eor	w11,w11,w9,ror#19
-	eor	w15,w5,w5,ror#11
-	sli	v18.4s,v1.4s,#15
-	add	w4,w4,w12
-	ushr	v17.4s,v1.4s,#19
-	ror	w11,w11,#6
-	eor	w13,w5,w6
-	eor	v19.16b,v19.16b,v18.16b
-	eor	w15,w15,w5,ror#20
-	add	w4,w4,w11
-	sli	v17.4s,v1.4s,#13
-	ldr	w12,[sp,#28]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	ld1	{v4.4s},[x16], #16
-	add	w8,w8,w4
-	eor	v19.16b,v19.16b,v17.16b
-	eor	w14,w14,w6
-	eor	v17.16b,v17.16b,v17.16b
-	add	w3,w3,w12
-	add	w4,w4,w15
-	and	w12,w9,w8
-	mov	v17.d[1],v19.d[0]
-	bic	w15,w10,w8
-	eor	w11,w8,w8,ror#5
-	add	w4,w4,w14
-	add	v1.4s,v1.4s,v17.4s
-	orr	w12,w12,w15
-	eor	w11,w11,w8,ror#19
-	eor	w15,w4,w4,ror#11
-	add	v4.4s,v4.4s,v1.4s
-	add	w3,w3,w12
-	ror	w11,w11,#6
-	eor	w14,w4,w5
-	eor	w15,w15,w4,ror#20
-	add	w3,w3,w11
-	ldr	w12,[sp,#32]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w7,w7,w3
-	eor	w13,w13,w5
-	st1	{v4.4s},[x17], #16
-	ext	v4.16b,v2.16b,v3.16b,#4
-	add	w10,w10,w12
-	add	w3,w3,w15
-	and	w12,w8,w7
-	bic	w15,w9,w7
-	ext	v7.16b,v0.16b,v1.16b,#4
-	eor	w11,w7,w7,ror#5
-	add	w3,w3,w13
-	mov	d19,v1.d[1]
-	orr	w12,w12,w15
-	eor	w11,w11,w7,ror#19
-	ushr	v6.4s,v4.4s,#7
-	eor	w15,w3,w3,ror#11
-	ushr	v5.4s,v4.4s,#3
-	add	w10,w10,w12
-	add	v2.4s,v2.4s,v7.4s
-	ror	w11,w11,#6
-	sli	v6.4s,v4.4s,#25
-	eor	w13,w3,w4
-	eor	w15,w15,w3,ror#20
-	ushr	v7.4s,v4.4s,#18
-	add	w10,w10,w11
-	ldr	w12,[sp,#36]
-	and	w14,w14,w13
-	eor	v5.16b,v5.16b,v6.16b
-	ror	w15,w15,#2
-	add	w6,w6,w10
-	sli	v7.4s,v4.4s,#14
-	eor	w14,w14,w4
-	ushr	v16.4s,v19.4s,#17
-	add	w9,w9,w12
-	add	w10,w10,w15
-	and	w12,w7,w6
-	eor	v5.16b,v5.16b,v7.16b
-	bic	w15,w8,w6
-	eor	w11,w6,w6,ror#5
-	sli	v16.4s,v19.4s,#15
-	add	w10,w10,w14
-	orr	w12,w12,w15
-	ushr	v17.4s,v19.4s,#10
-	eor	w11,w11,w6,ror#19
-	eor	w15,w10,w10,ror#11
-	ushr	v7.4s,v19.4s,#19
-	add	w9,w9,w12
-	ror	w11,w11,#6
-	add	v2.4s,v2.4s,v5.4s
-	eor	w14,w10,w3
-	eor	w15,w15,w10,ror#20
-	sli	v7.4s,v19.4s,#13
-	add	w9,w9,w11
-	ldr	w12,[sp,#40]
-	and	w13,w13,w14
-	eor	v17.16b,v17.16b,v16.16b
-	ror	w15,w15,#2
-	add	w5,w5,w9
-	eor	w13,w13,w3
-	eor	v17.16b,v17.16b,v7.16b
-	add	w8,w8,w12
-	add	w9,w9,w15
-	and	w12,w6,w5
-	add	v2.4s,v2.4s,v17.4s
-	bic	w15,w7,w5
-	eor	w11,w5,w5,ror#5
-	add	w9,w9,w13
-	ushr	v18.4s,v2.4s,#17
-	orr	w12,w12,w15
-	ushr	v19.4s,v2.4s,#10
-	eor	w11,w11,w5,ror#19
-	eor	w15,w9,w9,ror#11
-	sli	v18.4s,v2.4s,#15
-	add	w8,w8,w12
-	ushr	v17.4s,v2.4s,#19
-	ror	w11,w11,#6
-	eor	w13,w9,w10
-	eor	v19.16b,v19.16b,v18.16b
-	eor	w15,w15,w9,ror#20
-	add	w8,w8,w11
-	sli	v17.4s,v2.4s,#13
-	ldr	w12,[sp,#44]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	ld1	{v4.4s},[x16], #16
-	add	w4,w4,w8
-	eor	v19.16b,v19.16b,v17.16b
-	eor	w14,w14,w10
-	eor	v17.16b,v17.16b,v17.16b
-	add	w7,w7,w12
-	add	w8,w8,w15
-	and	w12,w5,w4
-	mov	v17.d[1],v19.d[0]
-	bic	w15,w6,w4
-	eor	w11,w4,w4,ror#5
-	add	w8,w8,w14
-	add	v2.4s,v2.4s,v17.4s
-	orr	w12,w12,w15
-	eor	w11,w11,w4,ror#19
-	eor	w15,w8,w8,ror#11
-	add	v4.4s,v4.4s,v2.4s
-	add	w7,w7,w12
-	ror	w11,w11,#6
-	eor	w14,w8,w9
-	eor	w15,w15,w8,ror#20
-	add	w7,w7,w11
-	ldr	w12,[sp,#48]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w3,w3,w7
-	eor	w13,w13,w9
-	st1	{v4.4s},[x17], #16
-	ext	v4.16b,v3.16b,v0.16b,#4
-	add	w6,w6,w12
-	add	w7,w7,w15
-	and	w12,w4,w3
-	bic	w15,w5,w3
-	ext	v7.16b,v1.16b,v2.16b,#4
-	eor	w11,w3,w3,ror#5
-	add	w7,w7,w13
-	mov	d19,v2.d[1]
-	orr	w12,w12,w15
-	eor	w11,w11,w3,ror#19
-	ushr	v6.4s,v4.4s,#7
-	eor	w15,w7,w7,ror#11
-	ushr	v5.4s,v4.4s,#3
-	add	w6,w6,w12
-	add	v3.4s,v3.4s,v7.4s
-	ror	w11,w11,#6
-	sli	v6.4s,v4.4s,#25
-	eor	w13,w7,w8
-	eor	w15,w15,w7,ror#20
-	ushr	v7.4s,v4.4s,#18
-	add	w6,w6,w11
-	ldr	w12,[sp,#52]
-	and	w14,w14,w13
-	eor	v5.16b,v5.16b,v6.16b
-	ror	w15,w15,#2
-	add	w10,w10,w6
-	sli	v7.4s,v4.4s,#14
-	eor	w14,w14,w8
-	ushr	v16.4s,v19.4s,#17
-	add	w5,w5,w12
-	add	w6,w6,w15
-	and	w12,w3,w10
-	eor	v5.16b,v5.16b,v7.16b
-	bic	w15,w4,w10
-	eor	w11,w10,w10,ror#5
-	sli	v16.4s,v19.4s,#15
-	add	w6,w6,w14
-	orr	w12,w12,w15
-	ushr	v17.4s,v19.4s,#10
-	eor	w11,w11,w10,ror#19
-	eor	w15,w6,w6,ror#11
-	ushr	v7.4s,v19.4s,#19
-	add	w5,w5,w12
-	ror	w11,w11,#6
-	add	v3.4s,v3.4s,v5.4s
-	eor	w14,w6,w7
-	eor	w15,w15,w6,ror#20
-	sli	v7.4s,v19.4s,#13
-	add	w5,w5,w11
-	ldr	w12,[sp,#56]
-	and	w13,w13,w14
-	eor	v17.16b,v17.16b,v16.16b
-	ror	w15,w15,#2
-	add	w9,w9,w5
-	eor	w13,w13,w7
-	eor	v17.16b,v17.16b,v7.16b
-	add	w4,w4,w12
-	add	w5,w5,w15
-	and	w12,w10,w9
-	add	v3.4s,v3.4s,v17.4s
-	bic	w15,w3,w9
-	eor	w11,w9,w9,ror#5
-	add	w5,w5,w13
-	ushr	v18.4s,v3.4s,#17
-	orr	w12,w12,w15
-	ushr	v19.4s,v3.4s,#10
-	eor	w11,w11,w9,ror#19
-	eor	w15,w5,w5,ror#11
-	sli	v18.4s,v3.4s,#15
-	add	w4,w4,w12
-	ushr	v17.4s,v3.4s,#19
-	ror	w11,w11,#6
-	eor	w13,w5,w6
-	eor	v19.16b,v19.16b,v18.16b
-	eor	w15,w15,w5,ror#20
-	add	w4,w4,w11
-	sli	v17.4s,v3.4s,#13
-	ldr	w12,[sp,#60]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	ld1	{v4.4s},[x16], #16
-	add	w8,w8,w4
-	eor	v19.16b,v19.16b,v17.16b
-	eor	w14,w14,w6
-	eor	v17.16b,v17.16b,v17.16b
-	add	w3,w3,w12
-	add	w4,w4,w15
-	and	w12,w9,w8
-	mov	v17.d[1],v19.d[0]
-	bic	w15,w10,w8
-	eor	w11,w8,w8,ror#5
-	add	w4,w4,w14
-	add	v3.4s,v3.4s,v17.4s
-	orr	w12,w12,w15
-	eor	w11,w11,w8,ror#19
-	eor	w15,w4,w4,ror#11
-	add	v4.4s,v4.4s,v3.4s
-	add	w3,w3,w12
-	ror	w11,w11,#6
-	eor	w14,w4,w5
-	eor	w15,w15,w4,ror#20
-	add	w3,w3,w11
-	ldr	w12,[x16]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w7,w7,w3
-	eor	w13,w13,w5
-	st1	{v4.4s},[x17], #16
-	cmp	w12,#0				// check for K256 terminator
-	ldr	w12,[sp,#0]
-	sub	x17,x17,#64
-	bne	.L_00_48
-
-	sub	x16,x16,#256		// rewind x16
-	cmp	x1,x2
-	mov	x17, #64
-	csel	x17, x17, xzr, eq
-	sub	x1,x1,x17			// avoid SEGV
-	mov	x17,sp
-	add	w10,w10,w12
-	add	w3,w3,w15
-	and	w12,w8,w7
-	ld1	{v0.16b},[x1],#16
-	bic	w15,w9,w7
-	eor	w11,w7,w7,ror#5
-	ld1	{v4.4s},[x16],#16
-	add	w3,w3,w13
-	orr	w12,w12,w15
-	eor	w11,w11,w7,ror#19
-	eor	w15,w3,w3,ror#11
-	rev32	v0.16b,v0.16b
-	add	w10,w10,w12
-	ror	w11,w11,#6
-	eor	w13,w3,w4
-	eor	w15,w15,w3,ror#20
-	add	v4.4s,v4.4s,v0.4s
-	add	w10,w10,w11
-	ldr	w12,[sp,#4]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	add	w6,w6,w10
-	eor	w14,w14,w4
-	add	w9,w9,w12
-	add	w10,w10,w15
-	and	w12,w7,w6
-	bic	w15,w8,w6
-	eor	w11,w6,w6,ror#5
-	add	w10,w10,w14
-	orr	w12,w12,w15
-	eor	w11,w11,w6,ror#19
-	eor	w15,w10,w10,ror#11
-	add	w9,w9,w12
-	ror	w11,w11,#6
-	eor	w14,w10,w3
-	eor	w15,w15,w10,ror#20
-	add	w9,w9,w11
-	ldr	w12,[sp,#8]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w5,w5,w9
-	eor	w13,w13,w3
-	add	w8,w8,w12
-	add	w9,w9,w15
-	and	w12,w6,w5
-	bic	w15,w7,w5
-	eor	w11,w5,w5,ror#5
-	add	w9,w9,w13
-	orr	w12,w12,w15
-	eor	w11,w11,w5,ror#19
-	eor	w15,w9,w9,ror#11
-	add	w8,w8,w12
-	ror	w11,w11,#6
-	eor	w13,w9,w10
-	eor	w15,w15,w9,ror#20
-	add	w8,w8,w11
-	ldr	w12,[sp,#12]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	add	w4,w4,w8
-	eor	w14,w14,w10
-	add	w7,w7,w12
-	add	w8,w8,w15
-	and	w12,w5,w4
-	bic	w15,w6,w4
-	eor	w11,w4,w4,ror#5
-	add	w8,w8,w14
-	orr	w12,w12,w15
-	eor	w11,w11,w4,ror#19
-	eor	w15,w8,w8,ror#11
-	add	w7,w7,w12
-	ror	w11,w11,#6
-	eor	w14,w8,w9
-	eor	w15,w15,w8,ror#20
-	add	w7,w7,w11
-	ldr	w12,[sp,#16]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w3,w3,w7
-	eor	w13,w13,w9
-	st1	{v4.4s},[x17], #16
-	add	w6,w6,w12
-	add	w7,w7,w15
-	and	w12,w4,w3
-	ld1	{v1.16b},[x1],#16
-	bic	w15,w5,w3
-	eor	w11,w3,w3,ror#5
-	ld1	{v4.4s},[x16],#16
-	add	w7,w7,w13
-	orr	w12,w12,w15
-	eor	w11,w11,w3,ror#19
-	eor	w15,w7,w7,ror#11
-	rev32	v1.16b,v1.16b
-	add	w6,w6,w12
-	ror	w11,w11,#6
-	eor	w13,w7,w8
-	eor	w15,w15,w7,ror#20
-	add	v4.4s,v4.4s,v1.4s
-	add	w6,w6,w11
-	ldr	w12,[sp,#20]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	add	w10,w10,w6
-	eor	w14,w14,w8
-	add	w5,w5,w12
-	add	w6,w6,w15
-	and	w12,w3,w10
-	bic	w15,w4,w10
-	eor	w11,w10,w10,ror#5
-	add	w6,w6,w14
-	orr	w12,w12,w15
-	eor	w11,w11,w10,ror#19
-	eor	w15,w6,w6,ror#11
-	add	w5,w5,w12
-	ror	w11,w11,#6
-	eor	w14,w6,w7
-	eor	w15,w15,w6,ror#20
-	add	w5,w5,w11
-	ldr	w12,[sp,#24]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w9,w9,w5
-	eor	w13,w13,w7
-	add	w4,w4,w12
-	add	w5,w5,w15
-	and	w12,w10,w9
-	bic	w15,w3,w9
-	eor	w11,w9,w9,ror#5
-	add	w5,w5,w13
-	orr	w12,w12,w15
-	eor	w11,w11,w9,ror#19
-	eor	w15,w5,w5,ror#11
-	add	w4,w4,w12
-	ror	w11,w11,#6
-	eor	w13,w5,w6
-	eor	w15,w15,w5,ror#20
-	add	w4,w4,w11
-	ldr	w12,[sp,#28]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	add	w8,w8,w4
-	eor	w14,w14,w6
-	add	w3,w3,w12
-	add	w4,w4,w15
-	and	w12,w9,w8
-	bic	w15,w10,w8
-	eor	w11,w8,w8,ror#5
-	add	w4,w4,w14
-	orr	w12,w12,w15
-	eor	w11,w11,w8,ror#19
-	eor	w15,w4,w4,ror#11
-	add	w3,w3,w12
-	ror	w11,w11,#6
-	eor	w14,w4,w5
-	eor	w15,w15,w4,ror#20
-	add	w3,w3,w11
-	ldr	w12,[sp,#32]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w7,w7,w3
-	eor	w13,w13,w5
-	st1	{v4.4s},[x17], #16
-	add	w10,w10,w12
-	add	w3,w3,w15
-	and	w12,w8,w7
-	ld1	{v2.16b},[x1],#16
-	bic	w15,w9,w7
-	eor	w11,w7,w7,ror#5
-	ld1	{v4.4s},[x16],#16
-	add	w3,w3,w13
-	orr	w12,w12,w15
-	eor	w11,w11,w7,ror#19
-	eor	w15,w3,w3,ror#11
-	rev32	v2.16b,v2.16b
-	add	w10,w10,w12
-	ror	w11,w11,#6
-	eor	w13,w3,w4
-	eor	w15,w15,w3,ror#20
-	add	v4.4s,v4.4s,v2.4s
-	add	w10,w10,w11
-	ldr	w12,[sp,#36]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	add	w6,w6,w10
-	eor	w14,w14,w4
-	add	w9,w9,w12
-	add	w10,w10,w15
-	and	w12,w7,w6
-	bic	w15,w8,w6
-	eor	w11,w6,w6,ror#5
-	add	w10,w10,w14
-	orr	w12,w12,w15
-	eor	w11,w11,w6,ror#19
-	eor	w15,w10,w10,ror#11
-	add	w9,w9,w12
-	ror	w11,w11,#6
-	eor	w14,w10,w3
-	eor	w15,w15,w10,ror#20
-	add	w9,w9,w11
-	ldr	w12,[sp,#40]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w5,w5,w9
-	eor	w13,w13,w3
-	add	w8,w8,w12
-	add	w9,w9,w15
-	and	w12,w6,w5
-	bic	w15,w7,w5
-	eor	w11,w5,w5,ror#5
-	add	w9,w9,w13
-	orr	w12,w12,w15
-	eor	w11,w11,w5,ror#19
-	eor	w15,w9,w9,ror#11
-	add	w8,w8,w12
-	ror	w11,w11,#6
-	eor	w13,w9,w10
-	eor	w15,w15,w9,ror#20
-	add	w8,w8,w11
-	ldr	w12,[sp,#44]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	add	w4,w4,w8
-	eor	w14,w14,w10
-	add	w7,w7,w12
-	add	w8,w8,w15
-	and	w12,w5,w4
-	bic	w15,w6,w4
-	eor	w11,w4,w4,ror#5
-	add	w8,w8,w14
-	orr	w12,w12,w15
-	eor	w11,w11,w4,ror#19
-	eor	w15,w8,w8,ror#11
-	add	w7,w7,w12
-	ror	w11,w11,#6
-	eor	w14,w8,w9
-	eor	w15,w15,w8,ror#20
-	add	w7,w7,w11
-	ldr	w12,[sp,#48]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w3,w3,w7
-	eor	w13,w13,w9
-	st1	{v4.4s},[x17], #16
-	add	w6,w6,w12
-	add	w7,w7,w15
-	and	w12,w4,w3
-	ld1	{v3.16b},[x1],#16
-	bic	w15,w5,w3
-	eor	w11,w3,w3,ror#5
-	ld1	{v4.4s},[x16],#16
-	add	w7,w7,w13
-	orr	w12,w12,w15
-	eor	w11,w11,w3,ror#19
-	eor	w15,w7,w7,ror#11
-	rev32	v3.16b,v3.16b
-	add	w6,w6,w12
-	ror	w11,w11,#6
-	eor	w13,w7,w8
-	eor	w15,w15,w7,ror#20
-	add	v4.4s,v4.4s,v3.4s
-	add	w6,w6,w11
-	ldr	w12,[sp,#52]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	add	w10,w10,w6
-	eor	w14,w14,w8
-	add	w5,w5,w12
-	add	w6,w6,w15
-	and	w12,w3,w10
-	bic	w15,w4,w10
-	eor	w11,w10,w10,ror#5
-	add	w6,w6,w14
-	orr	w12,w12,w15
-	eor	w11,w11,w10,ror#19
-	eor	w15,w6,w6,ror#11
-	add	w5,w5,w12
-	ror	w11,w11,#6
-	eor	w14,w6,w7
-	eor	w15,w15,w6,ror#20
-	add	w5,w5,w11
-	ldr	w12,[sp,#56]
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w9,w9,w5
-	eor	w13,w13,w7
-	add	w4,w4,w12
-	add	w5,w5,w15
-	and	w12,w10,w9
-	bic	w15,w3,w9
-	eor	w11,w9,w9,ror#5
-	add	w5,w5,w13
-	orr	w12,w12,w15
-	eor	w11,w11,w9,ror#19
-	eor	w15,w5,w5,ror#11
-	add	w4,w4,w12
-	ror	w11,w11,#6
-	eor	w13,w5,w6
-	eor	w15,w15,w5,ror#20
-	add	w4,w4,w11
-	ldr	w12,[sp,#60]
-	and	w14,w14,w13
-	ror	w15,w15,#2
-	add	w8,w8,w4
-	eor	w14,w14,w6
-	add	w3,w3,w12
-	add	w4,w4,w15
-	and	w12,w9,w8
-	bic	w15,w10,w8
-	eor	w11,w8,w8,ror#5
-	add	w4,w4,w14
-	orr	w12,w12,w15
-	eor	w11,w11,w8,ror#19
-	eor	w15,w4,w4,ror#11
-	add	w3,w3,w12
-	ror	w11,w11,#6
-	eor	w14,w4,w5
-	eor	w15,w15,w4,ror#20
-	add	w3,w3,w11
-	and	w13,w13,w14
-	ror	w15,w15,#2
-	add	w7,w7,w3
-	eor	w13,w13,w5
-	st1	{v4.4s},[x17], #16
-	add	w3,w3,w15			// h+=Sigma0(a) from the past
-	ldp	w11,w12,[x0,#0]
-	add	w3,w3,w13			// h+=Maj(a,b,c) from the past
-	ldp	w13,w14,[x0,#8]
-	add	w3,w3,w11			// accumulate
-	add	w4,w4,w12
-	ldp	w11,w12,[x0,#16]
-	add	w5,w5,w13
-	add	w6,w6,w14
-	ldp	w13,w14,[x0,#24]
-	add	w7,w7,w11
-	add	w8,w8,w12
-	 ldr	w12,[sp,#0]
-	stp	w3,w4,[x0,#0]
-	add	w9,w9,w13
-	 mov	w13,wzr
-	stp	w5,w6,[x0,#8]
-	add	w10,w10,w14
-	stp	w7,w8,[x0,#16]
-	 eor	w14,w4,w5
-	stp	w9,w10,[x0,#24]
-	 mov	w15,wzr
-	 mov	x17,sp
-	b.ne	.L_00_48
-
-	ldr	x29,[x29]
-	add	sp,sp,#16*4+16
-	ret
-.size	sha256_block_neon,.-sha256_block_neon
-#ifndef	__KERNEL__
-.comm	OPENSSL_armcap_P,4,4
-#endif
diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped
deleted file mode 100644
index e063a61067201..0000000000000
--- a/arch/arm64/crypto/sha512-core.S_shipped
+++ /dev/null
@@ -1,1093 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// This code is taken from the OpenSSL project but the author (Andy Polyakov)
-// has relicensed it under the GPLv2. Therefore this program is free software;
-// you can redistribute it and/or modify it under the terms of the GNU General
-// Public License version 2 as published by the Free Software Foundation.
-//
-// The original headers, including the original license headers, are
-// included below for completeness.
-
-// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the OpenSSL license (the "License").  You may not use
-// this file except in compliance with the License.  You can obtain a copy
-// in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-
-// ====================================================================
-// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-// project. The module is, however, dual licensed under OpenSSL and
-// CRYPTOGAMS licenses depending on where you obtain it. For further
-// details see http://www.openssl.org/~appro/cryptogams/.
-// ====================================================================
-//
-// SHA256/512 for ARMv8.
-//
-// Performance in cycles per processed byte and improvement coefficient
-// over code generated with "default" compiler:
-//
-//		SHA256-hw	SHA256(*)	SHA512
-// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
-// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
-// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
-// Denver	2.01		10.5 (+26%)	6.70 (+8%)
-// X-Gene			20.0 (+100%)	12.8 (+300%(***))
-// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
-//
-// (*)	Software SHA256 results are of lesser relevance, presented
-//	mostly for informational purposes.
-// (**)	The result is a trade-off: it's possible to improve it by
-//	10% (or by 1 cycle per round), but at the cost of 20% loss
-//	on Cortex-A53 (or by 4 cycles per round).
-// (***)	Super-impressive coefficients over gcc-generated code are
-//	indication of some compiler "pathology", most notably code
-//	generated with -mgeneral-regs-only is significanty faster
-//	and the gap is only 40-90%.
-//
-// October 2016.
-//
-// Originally it was reckoned that it makes no sense to implement NEON
-// version of SHA256 for 64-bit processors. This is because performance
-// improvement on most wide-spread Cortex-A5x processors was observed
-// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
-// observed that 32-bit NEON SHA256 performs significantly better than
-// 64-bit scalar version on *some* of the more recent processors. As
-// result 64-bit NEON version of SHA256 was added to provide best
-// all-round performance. For example it executes ~30% faster on X-Gene
-// and Mongoose. [For reference, NEON version of SHA512 is bound to
-// deliver much less improvement, likely *negative* on Cortex-A5x.
-// Which is why NEON support is limited to SHA256.]
-
-#ifndef	__KERNEL__
-# include "arm_arch.h"
-#endif
-
-.text
-
-.extern	OPENSSL_armcap_P
-.globl	sha512_block_data_order
-.type	sha512_block_data_order,%function
-.align	6
-sha512_block_data_order:
-	stp	x29,x30,[sp,#-128]!
-	add	x29,sp,#0
-
-	stp	x19,x20,[sp,#16]
-	stp	x21,x22,[sp,#32]
-	stp	x23,x24,[sp,#48]
-	stp	x25,x26,[sp,#64]
-	stp	x27,x28,[sp,#80]
-	sub	sp,sp,#4*8
-
-	ldp	x20,x21,[x0]				// load context
-	ldp	x22,x23,[x0,#2*8]
-	ldp	x24,x25,[x0,#4*8]
-	add	x2,x1,x2,lsl#7	// end of input
-	ldp	x26,x27,[x0,#6*8]
-	adr	x30,.LK512
-	stp	x0,x2,[x29,#96]
-
-.Loop:
-	ldp	x3,x4,[x1],#2*8
-	ldr	x19,[x30],#8			// *K++
-	eor	x28,x21,x22				// magic seed
-	str	x1,[x29,#112]
-#ifndef	__AARCH64EB__
-	rev	x3,x3			// 0
-#endif
-	ror	x16,x24,#14
-	add	x27,x27,x19			// h+=K[i]
-	eor	x6,x24,x24,ror#23
-	and	x17,x25,x24
-	bic	x19,x26,x24
-	add	x27,x27,x3			// h+=X[i]
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x20,x21			// a^b, b^c in next round
-	eor	x16,x16,x6,ror#18	// Sigma1(e)
-	ror	x6,x20,#28
-	add	x27,x27,x17			// h+=Ch(e,f,g)
-	eor	x17,x20,x20,ror#5
-	add	x27,x27,x16			// h+=Sigma1(e)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	add	x23,x23,x27			// d+=h
-	eor	x28,x28,x21			// Maj(a,b,c)
-	eor	x17,x6,x17,ror#34	// Sigma0(a)
-	add	x27,x27,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	//add	x27,x27,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x4,x4			// 1
-#endif
-	ldp	x5,x6,[x1],#2*8
-	add	x27,x27,x17			// h+=Sigma0(a)
-	ror	x16,x23,#14
-	add	x26,x26,x28			// h+=K[i]
-	eor	x7,x23,x23,ror#23
-	and	x17,x24,x23
-	bic	x28,x25,x23
-	add	x26,x26,x4			// h+=X[i]
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x27,x20			// a^b, b^c in next round
-	eor	x16,x16,x7,ror#18	// Sigma1(e)
-	ror	x7,x27,#28
-	add	x26,x26,x17			// h+=Ch(e,f,g)
-	eor	x17,x27,x27,ror#5
-	add	x26,x26,x16			// h+=Sigma1(e)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	add	x22,x22,x26			// d+=h
-	eor	x19,x19,x20			// Maj(a,b,c)
-	eor	x17,x7,x17,ror#34	// Sigma0(a)
-	add	x26,x26,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	//add	x26,x26,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x5,x5			// 2
-#endif
-	add	x26,x26,x17			// h+=Sigma0(a)
-	ror	x16,x22,#14
-	add	x25,x25,x19			// h+=K[i]
-	eor	x8,x22,x22,ror#23
-	and	x17,x23,x22
-	bic	x19,x24,x22
-	add	x25,x25,x5			// h+=X[i]
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x26,x27			// a^b, b^c in next round
-	eor	x16,x16,x8,ror#18	// Sigma1(e)
-	ror	x8,x26,#28
-	add	x25,x25,x17			// h+=Ch(e,f,g)
-	eor	x17,x26,x26,ror#5
-	add	x25,x25,x16			// h+=Sigma1(e)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	add	x21,x21,x25			// d+=h
-	eor	x28,x28,x27			// Maj(a,b,c)
-	eor	x17,x8,x17,ror#34	// Sigma0(a)
-	add	x25,x25,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	//add	x25,x25,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x6,x6			// 3
-#endif
-	ldp	x7,x8,[x1],#2*8
-	add	x25,x25,x17			// h+=Sigma0(a)
-	ror	x16,x21,#14
-	add	x24,x24,x28			// h+=K[i]
-	eor	x9,x21,x21,ror#23
-	and	x17,x22,x21
-	bic	x28,x23,x21
-	add	x24,x24,x6			// h+=X[i]
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x25,x26			// a^b, b^c in next round
-	eor	x16,x16,x9,ror#18	// Sigma1(e)
-	ror	x9,x25,#28
-	add	x24,x24,x17			// h+=Ch(e,f,g)
-	eor	x17,x25,x25,ror#5
-	add	x24,x24,x16			// h+=Sigma1(e)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	add	x20,x20,x24			// d+=h
-	eor	x19,x19,x26			// Maj(a,b,c)
-	eor	x17,x9,x17,ror#34	// Sigma0(a)
-	add	x24,x24,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	//add	x24,x24,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x7,x7			// 4
-#endif
-	add	x24,x24,x17			// h+=Sigma0(a)
-	ror	x16,x20,#14
-	add	x23,x23,x19			// h+=K[i]
-	eor	x10,x20,x20,ror#23
-	and	x17,x21,x20
-	bic	x19,x22,x20
-	add	x23,x23,x7			// h+=X[i]
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x24,x25			// a^b, b^c in next round
-	eor	x16,x16,x10,ror#18	// Sigma1(e)
-	ror	x10,x24,#28
-	add	x23,x23,x17			// h+=Ch(e,f,g)
-	eor	x17,x24,x24,ror#5
-	add	x23,x23,x16			// h+=Sigma1(e)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	add	x27,x27,x23			// d+=h
-	eor	x28,x28,x25			// Maj(a,b,c)
-	eor	x17,x10,x17,ror#34	// Sigma0(a)
-	add	x23,x23,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	//add	x23,x23,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x8,x8			// 5
-#endif
-	ldp	x9,x10,[x1],#2*8
-	add	x23,x23,x17			// h+=Sigma0(a)
-	ror	x16,x27,#14
-	add	x22,x22,x28			// h+=K[i]
-	eor	x11,x27,x27,ror#23
-	and	x17,x20,x27
-	bic	x28,x21,x27
-	add	x22,x22,x8			// h+=X[i]
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x23,x24			// a^b, b^c in next round
-	eor	x16,x16,x11,ror#18	// Sigma1(e)
-	ror	x11,x23,#28
-	add	x22,x22,x17			// h+=Ch(e,f,g)
-	eor	x17,x23,x23,ror#5
-	add	x22,x22,x16			// h+=Sigma1(e)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	add	x26,x26,x22			// d+=h
-	eor	x19,x19,x24			// Maj(a,b,c)
-	eor	x17,x11,x17,ror#34	// Sigma0(a)
-	add	x22,x22,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	//add	x22,x22,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x9,x9			// 6
-#endif
-	add	x22,x22,x17			// h+=Sigma0(a)
-	ror	x16,x26,#14
-	add	x21,x21,x19			// h+=K[i]
-	eor	x12,x26,x26,ror#23
-	and	x17,x27,x26
-	bic	x19,x20,x26
-	add	x21,x21,x9			// h+=X[i]
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x22,x23			// a^b, b^c in next round
-	eor	x16,x16,x12,ror#18	// Sigma1(e)
-	ror	x12,x22,#28
-	add	x21,x21,x17			// h+=Ch(e,f,g)
-	eor	x17,x22,x22,ror#5
-	add	x21,x21,x16			// h+=Sigma1(e)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	add	x25,x25,x21			// d+=h
-	eor	x28,x28,x23			// Maj(a,b,c)
-	eor	x17,x12,x17,ror#34	// Sigma0(a)
-	add	x21,x21,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	//add	x21,x21,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x10,x10			// 7
-#endif
-	ldp	x11,x12,[x1],#2*8
-	add	x21,x21,x17			// h+=Sigma0(a)
-	ror	x16,x25,#14
-	add	x20,x20,x28			// h+=K[i]
-	eor	x13,x25,x25,ror#23
-	and	x17,x26,x25
-	bic	x28,x27,x25
-	add	x20,x20,x10			// h+=X[i]
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x21,x22			// a^b, b^c in next round
-	eor	x16,x16,x13,ror#18	// Sigma1(e)
-	ror	x13,x21,#28
-	add	x20,x20,x17			// h+=Ch(e,f,g)
-	eor	x17,x21,x21,ror#5
-	add	x20,x20,x16			// h+=Sigma1(e)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	add	x24,x24,x20			// d+=h
-	eor	x19,x19,x22			// Maj(a,b,c)
-	eor	x17,x13,x17,ror#34	// Sigma0(a)
-	add	x20,x20,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	//add	x20,x20,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x11,x11			// 8
-#endif
-	add	x20,x20,x17			// h+=Sigma0(a)
-	ror	x16,x24,#14
-	add	x27,x27,x19			// h+=K[i]
-	eor	x14,x24,x24,ror#23
-	and	x17,x25,x24
-	bic	x19,x26,x24
-	add	x27,x27,x11			// h+=X[i]
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x20,x21			// a^b, b^c in next round
-	eor	x16,x16,x14,ror#18	// Sigma1(e)
-	ror	x14,x20,#28
-	add	x27,x27,x17			// h+=Ch(e,f,g)
-	eor	x17,x20,x20,ror#5
-	add	x27,x27,x16			// h+=Sigma1(e)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	add	x23,x23,x27			// d+=h
-	eor	x28,x28,x21			// Maj(a,b,c)
-	eor	x17,x14,x17,ror#34	// Sigma0(a)
-	add	x27,x27,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	//add	x27,x27,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x12,x12			// 9
-#endif
-	ldp	x13,x14,[x1],#2*8
-	add	x27,x27,x17			// h+=Sigma0(a)
-	ror	x16,x23,#14
-	add	x26,x26,x28			// h+=K[i]
-	eor	x15,x23,x23,ror#23
-	and	x17,x24,x23
-	bic	x28,x25,x23
-	add	x26,x26,x12			// h+=X[i]
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x27,x20			// a^b, b^c in next round
-	eor	x16,x16,x15,ror#18	// Sigma1(e)
-	ror	x15,x27,#28
-	add	x26,x26,x17			// h+=Ch(e,f,g)
-	eor	x17,x27,x27,ror#5
-	add	x26,x26,x16			// h+=Sigma1(e)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	add	x22,x22,x26			// d+=h
-	eor	x19,x19,x20			// Maj(a,b,c)
-	eor	x17,x15,x17,ror#34	// Sigma0(a)
-	add	x26,x26,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	//add	x26,x26,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x13,x13			// 10
-#endif
-	add	x26,x26,x17			// h+=Sigma0(a)
-	ror	x16,x22,#14
-	add	x25,x25,x19			// h+=K[i]
-	eor	x0,x22,x22,ror#23
-	and	x17,x23,x22
-	bic	x19,x24,x22
-	add	x25,x25,x13			// h+=X[i]
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x26,x27			// a^b, b^c in next round
-	eor	x16,x16,x0,ror#18	// Sigma1(e)
-	ror	x0,x26,#28
-	add	x25,x25,x17			// h+=Ch(e,f,g)
-	eor	x17,x26,x26,ror#5
-	add	x25,x25,x16			// h+=Sigma1(e)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	add	x21,x21,x25			// d+=h
-	eor	x28,x28,x27			// Maj(a,b,c)
-	eor	x17,x0,x17,ror#34	// Sigma0(a)
-	add	x25,x25,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	//add	x25,x25,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x14,x14			// 11
-#endif
-	ldp	x15,x0,[x1],#2*8
-	add	x25,x25,x17			// h+=Sigma0(a)
-	str	x6,[sp,#24]
-	ror	x16,x21,#14
-	add	x24,x24,x28			// h+=K[i]
-	eor	x6,x21,x21,ror#23
-	and	x17,x22,x21
-	bic	x28,x23,x21
-	add	x24,x24,x14			// h+=X[i]
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x25,x26			// a^b, b^c in next round
-	eor	x16,x16,x6,ror#18	// Sigma1(e)
-	ror	x6,x25,#28
-	add	x24,x24,x17			// h+=Ch(e,f,g)
-	eor	x17,x25,x25,ror#5
-	add	x24,x24,x16			// h+=Sigma1(e)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	add	x20,x20,x24			// d+=h
-	eor	x19,x19,x26			// Maj(a,b,c)
-	eor	x17,x6,x17,ror#34	// Sigma0(a)
-	add	x24,x24,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	//add	x24,x24,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x15,x15			// 12
-#endif
-	add	x24,x24,x17			// h+=Sigma0(a)
-	str	x7,[sp,#0]
-	ror	x16,x20,#14
-	add	x23,x23,x19			// h+=K[i]
-	eor	x7,x20,x20,ror#23
-	and	x17,x21,x20
-	bic	x19,x22,x20
-	add	x23,x23,x15			// h+=X[i]
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x24,x25			// a^b, b^c in next round
-	eor	x16,x16,x7,ror#18	// Sigma1(e)
-	ror	x7,x24,#28
-	add	x23,x23,x17			// h+=Ch(e,f,g)
-	eor	x17,x24,x24,ror#5
-	add	x23,x23,x16			// h+=Sigma1(e)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	add	x27,x27,x23			// d+=h
-	eor	x28,x28,x25			// Maj(a,b,c)
-	eor	x17,x7,x17,ror#34	// Sigma0(a)
-	add	x23,x23,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	//add	x23,x23,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x0,x0			// 13
-#endif
-	ldp	x1,x2,[x1]
-	add	x23,x23,x17			// h+=Sigma0(a)
-	str	x8,[sp,#8]
-	ror	x16,x27,#14
-	add	x22,x22,x28			// h+=K[i]
-	eor	x8,x27,x27,ror#23
-	and	x17,x20,x27
-	bic	x28,x21,x27
-	add	x22,x22,x0			// h+=X[i]
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x23,x24			// a^b, b^c in next round
-	eor	x16,x16,x8,ror#18	// Sigma1(e)
-	ror	x8,x23,#28
-	add	x22,x22,x17			// h+=Ch(e,f,g)
-	eor	x17,x23,x23,ror#5
-	add	x22,x22,x16			// h+=Sigma1(e)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	add	x26,x26,x22			// d+=h
-	eor	x19,x19,x24			// Maj(a,b,c)
-	eor	x17,x8,x17,ror#34	// Sigma0(a)
-	add	x22,x22,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	//add	x22,x22,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x1,x1			// 14
-#endif
-	ldr	x6,[sp,#24]
-	add	x22,x22,x17			// h+=Sigma0(a)
-	str	x9,[sp,#16]
-	ror	x16,x26,#14
-	add	x21,x21,x19			// h+=K[i]
-	eor	x9,x26,x26,ror#23
-	and	x17,x27,x26
-	bic	x19,x20,x26
-	add	x21,x21,x1			// h+=X[i]
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x22,x23			// a^b, b^c in next round
-	eor	x16,x16,x9,ror#18	// Sigma1(e)
-	ror	x9,x22,#28
-	add	x21,x21,x17			// h+=Ch(e,f,g)
-	eor	x17,x22,x22,ror#5
-	add	x21,x21,x16			// h+=Sigma1(e)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	add	x25,x25,x21			// d+=h
-	eor	x28,x28,x23			// Maj(a,b,c)
-	eor	x17,x9,x17,ror#34	// Sigma0(a)
-	add	x21,x21,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	//add	x21,x21,x17			// h+=Sigma0(a)
-#ifndef	__AARCH64EB__
-	rev	x2,x2			// 15
-#endif
-	ldr	x7,[sp,#0]
-	add	x21,x21,x17			// h+=Sigma0(a)
-	str	x10,[sp,#24]
-	ror	x16,x25,#14
-	add	x20,x20,x28			// h+=K[i]
-	ror	x9,x4,#1
-	and	x17,x26,x25
-	ror	x8,x1,#19
-	bic	x28,x27,x25
-	ror	x10,x21,#28
-	add	x20,x20,x2			// h+=X[i]
-	eor	x16,x16,x25,ror#18
-	eor	x9,x9,x4,ror#8
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x21,x22			// a^b, b^c in next round
-	eor	x16,x16,x25,ror#41	// Sigma1(e)
-	eor	x10,x10,x21,ror#34
-	add	x20,x20,x17			// h+=Ch(e,f,g)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	eor	x8,x8,x1,ror#61
-	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
-	add	x20,x20,x16			// h+=Sigma1(e)
-	eor	x19,x19,x22			// Maj(a,b,c)
-	eor	x17,x10,x21,ror#39	// Sigma0(a)
-	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
-	add	x3,x3,x12
-	add	x24,x24,x20			// d+=h
-	add	x20,x20,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	add	x3,x3,x9
-	add	x20,x20,x17			// h+=Sigma0(a)
-	add	x3,x3,x8
-.Loop_16_xx:
-	ldr	x8,[sp,#8]
-	str	x11,[sp,#0]
-	ror	x16,x24,#14
-	add	x27,x27,x19			// h+=K[i]
-	ror	x10,x5,#1
-	and	x17,x25,x24
-	ror	x9,x2,#19
-	bic	x19,x26,x24
-	ror	x11,x20,#28
-	add	x27,x27,x3			// h+=X[i]
-	eor	x16,x16,x24,ror#18
-	eor	x10,x10,x5,ror#8
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x20,x21			// a^b, b^c in next round
-	eor	x16,x16,x24,ror#41	// Sigma1(e)
-	eor	x11,x11,x20,ror#34
-	add	x27,x27,x17			// h+=Ch(e,f,g)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	eor	x9,x9,x2,ror#61
-	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
-	add	x27,x27,x16			// h+=Sigma1(e)
-	eor	x28,x28,x21			// Maj(a,b,c)
-	eor	x17,x11,x20,ror#39	// Sigma0(a)
-	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
-	add	x4,x4,x13
-	add	x23,x23,x27			// d+=h
-	add	x27,x27,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	add	x4,x4,x10
-	add	x27,x27,x17			// h+=Sigma0(a)
-	add	x4,x4,x9
-	ldr	x9,[sp,#16]
-	str	x12,[sp,#8]
-	ror	x16,x23,#14
-	add	x26,x26,x28			// h+=K[i]
-	ror	x11,x6,#1
-	and	x17,x24,x23
-	ror	x10,x3,#19
-	bic	x28,x25,x23
-	ror	x12,x27,#28
-	add	x26,x26,x4			// h+=X[i]
-	eor	x16,x16,x23,ror#18
-	eor	x11,x11,x6,ror#8
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x27,x20			// a^b, b^c in next round
-	eor	x16,x16,x23,ror#41	// Sigma1(e)
-	eor	x12,x12,x27,ror#34
-	add	x26,x26,x17			// h+=Ch(e,f,g)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	eor	x10,x10,x3,ror#61
-	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
-	add	x26,x26,x16			// h+=Sigma1(e)
-	eor	x19,x19,x20			// Maj(a,b,c)
-	eor	x17,x12,x27,ror#39	// Sigma0(a)
-	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
-	add	x5,x5,x14
-	add	x22,x22,x26			// d+=h
-	add	x26,x26,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	add	x5,x5,x11
-	add	x26,x26,x17			// h+=Sigma0(a)
-	add	x5,x5,x10
-	ldr	x10,[sp,#24]
-	str	x13,[sp,#16]
-	ror	x16,x22,#14
-	add	x25,x25,x19			// h+=K[i]
-	ror	x12,x7,#1
-	and	x17,x23,x22
-	ror	x11,x4,#19
-	bic	x19,x24,x22
-	ror	x13,x26,#28
-	add	x25,x25,x5			// h+=X[i]
-	eor	x16,x16,x22,ror#18
-	eor	x12,x12,x7,ror#8
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x26,x27			// a^b, b^c in next round
-	eor	x16,x16,x22,ror#41	// Sigma1(e)
-	eor	x13,x13,x26,ror#34
-	add	x25,x25,x17			// h+=Ch(e,f,g)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	eor	x11,x11,x4,ror#61
-	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
-	add	x25,x25,x16			// h+=Sigma1(e)
-	eor	x28,x28,x27			// Maj(a,b,c)
-	eor	x17,x13,x26,ror#39	// Sigma0(a)
-	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
-	add	x6,x6,x15
-	add	x21,x21,x25			// d+=h
-	add	x25,x25,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	add	x6,x6,x12
-	add	x25,x25,x17			// h+=Sigma0(a)
-	add	x6,x6,x11
-	ldr	x11,[sp,#0]
-	str	x14,[sp,#24]
-	ror	x16,x21,#14
-	add	x24,x24,x28			// h+=K[i]
-	ror	x13,x8,#1
-	and	x17,x22,x21
-	ror	x12,x5,#19
-	bic	x28,x23,x21
-	ror	x14,x25,#28
-	add	x24,x24,x6			// h+=X[i]
-	eor	x16,x16,x21,ror#18
-	eor	x13,x13,x8,ror#8
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x25,x26			// a^b, b^c in next round
-	eor	x16,x16,x21,ror#41	// Sigma1(e)
-	eor	x14,x14,x25,ror#34
-	add	x24,x24,x17			// h+=Ch(e,f,g)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	eor	x12,x12,x5,ror#61
-	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
-	add	x24,x24,x16			// h+=Sigma1(e)
-	eor	x19,x19,x26			// Maj(a,b,c)
-	eor	x17,x14,x25,ror#39	// Sigma0(a)
-	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
-	add	x7,x7,x0
-	add	x20,x20,x24			// d+=h
-	add	x24,x24,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	add	x7,x7,x13
-	add	x24,x24,x17			// h+=Sigma0(a)
-	add	x7,x7,x12
-	ldr	x12,[sp,#8]
-	str	x15,[sp,#0]
-	ror	x16,x20,#14
-	add	x23,x23,x19			// h+=K[i]
-	ror	x14,x9,#1
-	and	x17,x21,x20
-	ror	x13,x6,#19
-	bic	x19,x22,x20
-	ror	x15,x24,#28
-	add	x23,x23,x7			// h+=X[i]
-	eor	x16,x16,x20,ror#18
-	eor	x14,x14,x9,ror#8
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x24,x25			// a^b, b^c in next round
-	eor	x16,x16,x20,ror#41	// Sigma1(e)
-	eor	x15,x15,x24,ror#34
-	add	x23,x23,x17			// h+=Ch(e,f,g)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	eor	x13,x13,x6,ror#61
-	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
-	add	x23,x23,x16			// h+=Sigma1(e)
-	eor	x28,x28,x25			// Maj(a,b,c)
-	eor	x17,x15,x24,ror#39	// Sigma0(a)
-	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
-	add	x8,x8,x1
-	add	x27,x27,x23			// d+=h
-	add	x23,x23,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	add	x8,x8,x14
-	add	x23,x23,x17			// h+=Sigma0(a)
-	add	x8,x8,x13
-	ldr	x13,[sp,#16]
-	str	x0,[sp,#8]
-	ror	x16,x27,#14
-	add	x22,x22,x28			// h+=K[i]
-	ror	x15,x10,#1
-	and	x17,x20,x27
-	ror	x14,x7,#19
-	bic	x28,x21,x27
-	ror	x0,x23,#28
-	add	x22,x22,x8			// h+=X[i]
-	eor	x16,x16,x27,ror#18
-	eor	x15,x15,x10,ror#8
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x23,x24			// a^b, b^c in next round
-	eor	x16,x16,x27,ror#41	// Sigma1(e)
-	eor	x0,x0,x23,ror#34
-	add	x22,x22,x17			// h+=Ch(e,f,g)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	eor	x14,x14,x7,ror#61
-	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
-	add	x22,x22,x16			// h+=Sigma1(e)
-	eor	x19,x19,x24			// Maj(a,b,c)
-	eor	x17,x0,x23,ror#39	// Sigma0(a)
-	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
-	add	x9,x9,x2
-	add	x26,x26,x22			// d+=h
-	add	x22,x22,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	add	x9,x9,x15
-	add	x22,x22,x17			// h+=Sigma0(a)
-	add	x9,x9,x14
-	ldr	x14,[sp,#24]
-	str	x1,[sp,#16]
-	ror	x16,x26,#14
-	add	x21,x21,x19			// h+=K[i]
-	ror	x0,x11,#1
-	and	x17,x27,x26
-	ror	x15,x8,#19
-	bic	x19,x20,x26
-	ror	x1,x22,#28
-	add	x21,x21,x9			// h+=X[i]
-	eor	x16,x16,x26,ror#18
-	eor	x0,x0,x11,ror#8
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x22,x23			// a^b, b^c in next round
-	eor	x16,x16,x26,ror#41	// Sigma1(e)
-	eor	x1,x1,x22,ror#34
-	add	x21,x21,x17			// h+=Ch(e,f,g)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	eor	x15,x15,x8,ror#61
-	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
-	add	x21,x21,x16			// h+=Sigma1(e)
-	eor	x28,x28,x23			// Maj(a,b,c)
-	eor	x17,x1,x22,ror#39	// Sigma0(a)
-	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
-	add	x10,x10,x3
-	add	x25,x25,x21			// d+=h
-	add	x21,x21,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	add	x10,x10,x0
-	add	x21,x21,x17			// h+=Sigma0(a)
-	add	x10,x10,x15
-	ldr	x15,[sp,#0]
-	str	x2,[sp,#24]
-	ror	x16,x25,#14
-	add	x20,x20,x28			// h+=K[i]
-	ror	x1,x12,#1
-	and	x17,x26,x25
-	ror	x0,x9,#19
-	bic	x28,x27,x25
-	ror	x2,x21,#28
-	add	x20,x20,x10			// h+=X[i]
-	eor	x16,x16,x25,ror#18
-	eor	x1,x1,x12,ror#8
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x21,x22			// a^b, b^c in next round
-	eor	x16,x16,x25,ror#41	// Sigma1(e)
-	eor	x2,x2,x21,ror#34
-	add	x20,x20,x17			// h+=Ch(e,f,g)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	eor	x0,x0,x9,ror#61
-	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
-	add	x20,x20,x16			// h+=Sigma1(e)
-	eor	x19,x19,x22			// Maj(a,b,c)
-	eor	x17,x2,x21,ror#39	// Sigma0(a)
-	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
-	add	x11,x11,x4
-	add	x24,x24,x20			// d+=h
-	add	x20,x20,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	add	x11,x11,x1
-	add	x20,x20,x17			// h+=Sigma0(a)
-	add	x11,x11,x0
-	ldr	x0,[sp,#8]
-	str	x3,[sp,#0]
-	ror	x16,x24,#14
-	add	x27,x27,x19			// h+=K[i]
-	ror	x2,x13,#1
-	and	x17,x25,x24
-	ror	x1,x10,#19
-	bic	x19,x26,x24
-	ror	x3,x20,#28
-	add	x27,x27,x11			// h+=X[i]
-	eor	x16,x16,x24,ror#18
-	eor	x2,x2,x13,ror#8
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x20,x21			// a^b, b^c in next round
-	eor	x16,x16,x24,ror#41	// Sigma1(e)
-	eor	x3,x3,x20,ror#34
-	add	x27,x27,x17			// h+=Ch(e,f,g)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	eor	x1,x1,x10,ror#61
-	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
-	add	x27,x27,x16			// h+=Sigma1(e)
-	eor	x28,x28,x21			// Maj(a,b,c)
-	eor	x17,x3,x20,ror#39	// Sigma0(a)
-	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
-	add	x12,x12,x5
-	add	x23,x23,x27			// d+=h
-	add	x27,x27,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	add	x12,x12,x2
-	add	x27,x27,x17			// h+=Sigma0(a)
-	add	x12,x12,x1
-	ldr	x1,[sp,#16]
-	str	x4,[sp,#8]
-	ror	x16,x23,#14
-	add	x26,x26,x28			// h+=K[i]
-	ror	x3,x14,#1
-	and	x17,x24,x23
-	ror	x2,x11,#19
-	bic	x28,x25,x23
-	ror	x4,x27,#28
-	add	x26,x26,x12			// h+=X[i]
-	eor	x16,x16,x23,ror#18
-	eor	x3,x3,x14,ror#8
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x27,x20			// a^b, b^c in next round
-	eor	x16,x16,x23,ror#41	// Sigma1(e)
-	eor	x4,x4,x27,ror#34
-	add	x26,x26,x17			// h+=Ch(e,f,g)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	eor	x2,x2,x11,ror#61
-	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
-	add	x26,x26,x16			// h+=Sigma1(e)
-	eor	x19,x19,x20			// Maj(a,b,c)
-	eor	x17,x4,x27,ror#39	// Sigma0(a)
-	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
-	add	x13,x13,x6
-	add	x22,x22,x26			// d+=h
-	add	x26,x26,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	add	x13,x13,x3
-	add	x26,x26,x17			// h+=Sigma0(a)
-	add	x13,x13,x2
-	ldr	x2,[sp,#24]
-	str	x5,[sp,#16]
-	ror	x16,x22,#14
-	add	x25,x25,x19			// h+=K[i]
-	ror	x4,x15,#1
-	and	x17,x23,x22
-	ror	x3,x12,#19
-	bic	x19,x24,x22
-	ror	x5,x26,#28
-	add	x25,x25,x13			// h+=X[i]
-	eor	x16,x16,x22,ror#18
-	eor	x4,x4,x15,ror#8
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x26,x27			// a^b, b^c in next round
-	eor	x16,x16,x22,ror#41	// Sigma1(e)
-	eor	x5,x5,x26,ror#34
-	add	x25,x25,x17			// h+=Ch(e,f,g)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	eor	x3,x3,x12,ror#61
-	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
-	add	x25,x25,x16			// h+=Sigma1(e)
-	eor	x28,x28,x27			// Maj(a,b,c)
-	eor	x17,x5,x26,ror#39	// Sigma0(a)
-	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
-	add	x14,x14,x7
-	add	x21,x21,x25			// d+=h
-	add	x25,x25,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	add	x14,x14,x4
-	add	x25,x25,x17			// h+=Sigma0(a)
-	add	x14,x14,x3
-	ldr	x3,[sp,#0]
-	str	x6,[sp,#24]
-	ror	x16,x21,#14
-	add	x24,x24,x28			// h+=K[i]
-	ror	x5,x0,#1
-	and	x17,x22,x21
-	ror	x4,x13,#19
-	bic	x28,x23,x21
-	ror	x6,x25,#28
-	add	x24,x24,x14			// h+=X[i]
-	eor	x16,x16,x21,ror#18
-	eor	x5,x5,x0,ror#8
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x25,x26			// a^b, b^c in next round
-	eor	x16,x16,x21,ror#41	// Sigma1(e)
-	eor	x6,x6,x25,ror#34
-	add	x24,x24,x17			// h+=Ch(e,f,g)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	eor	x4,x4,x13,ror#61
-	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
-	add	x24,x24,x16			// h+=Sigma1(e)
-	eor	x19,x19,x26			// Maj(a,b,c)
-	eor	x17,x6,x25,ror#39	// Sigma0(a)
-	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
-	add	x15,x15,x8
-	add	x20,x20,x24			// d+=h
-	add	x24,x24,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	add	x15,x15,x5
-	add	x24,x24,x17			// h+=Sigma0(a)
-	add	x15,x15,x4
-	ldr	x4,[sp,#8]
-	str	x7,[sp,#0]
-	ror	x16,x20,#14
-	add	x23,x23,x19			// h+=K[i]
-	ror	x6,x1,#1
-	and	x17,x21,x20
-	ror	x5,x14,#19
-	bic	x19,x22,x20
-	ror	x7,x24,#28
-	add	x23,x23,x15			// h+=X[i]
-	eor	x16,x16,x20,ror#18
-	eor	x6,x6,x1,ror#8
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x24,x25			// a^b, b^c in next round
-	eor	x16,x16,x20,ror#41	// Sigma1(e)
-	eor	x7,x7,x24,ror#34
-	add	x23,x23,x17			// h+=Ch(e,f,g)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	eor	x5,x5,x14,ror#61
-	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
-	add	x23,x23,x16			// h+=Sigma1(e)
-	eor	x28,x28,x25			// Maj(a,b,c)
-	eor	x17,x7,x24,ror#39	// Sigma0(a)
-	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
-	add	x0,x0,x9
-	add	x27,x27,x23			// d+=h
-	add	x23,x23,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	add	x0,x0,x6
-	add	x23,x23,x17			// h+=Sigma0(a)
-	add	x0,x0,x5
-	ldr	x5,[sp,#16]
-	str	x8,[sp,#8]
-	ror	x16,x27,#14
-	add	x22,x22,x28			// h+=K[i]
-	ror	x7,x2,#1
-	and	x17,x20,x27
-	ror	x6,x15,#19
-	bic	x28,x21,x27
-	ror	x8,x23,#28
-	add	x22,x22,x0			// h+=X[i]
-	eor	x16,x16,x27,ror#18
-	eor	x7,x7,x2,ror#8
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x23,x24			// a^b, b^c in next round
-	eor	x16,x16,x27,ror#41	// Sigma1(e)
-	eor	x8,x8,x23,ror#34
-	add	x22,x22,x17			// h+=Ch(e,f,g)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	eor	x6,x6,x15,ror#61
-	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
-	add	x22,x22,x16			// h+=Sigma1(e)
-	eor	x19,x19,x24			// Maj(a,b,c)
-	eor	x17,x8,x23,ror#39	// Sigma0(a)
-	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
-	add	x1,x1,x10
-	add	x26,x26,x22			// d+=h
-	add	x22,x22,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	add	x1,x1,x7
-	add	x22,x22,x17			// h+=Sigma0(a)
-	add	x1,x1,x6
-	ldr	x6,[sp,#24]
-	str	x9,[sp,#16]
-	ror	x16,x26,#14
-	add	x21,x21,x19			// h+=K[i]
-	ror	x8,x3,#1
-	and	x17,x27,x26
-	ror	x7,x0,#19
-	bic	x19,x20,x26
-	ror	x9,x22,#28
-	add	x21,x21,x1			// h+=X[i]
-	eor	x16,x16,x26,ror#18
-	eor	x8,x8,x3,ror#8
-	orr	x17,x17,x19			// Ch(e,f,g)
-	eor	x19,x22,x23			// a^b, b^c in next round
-	eor	x16,x16,x26,ror#41	// Sigma1(e)
-	eor	x9,x9,x22,ror#34
-	add	x21,x21,x17			// h+=Ch(e,f,g)
-	and	x28,x28,x19			// (b^c)&=(a^b)
-	eor	x7,x7,x0,ror#61
-	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
-	add	x21,x21,x16			// h+=Sigma1(e)
-	eor	x28,x28,x23			// Maj(a,b,c)
-	eor	x17,x9,x22,ror#39	// Sigma0(a)
-	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
-	add	x2,x2,x11
-	add	x25,x25,x21			// d+=h
-	add	x21,x21,x28			// h+=Maj(a,b,c)
-	ldr	x28,[x30],#8		// *K++, x19 in next round
-	add	x2,x2,x8
-	add	x21,x21,x17			// h+=Sigma0(a)
-	add	x2,x2,x7
-	ldr	x7,[sp,#0]
-	str	x10,[sp,#24]
-	ror	x16,x25,#14
-	add	x20,x20,x28			// h+=K[i]
-	ror	x9,x4,#1
-	and	x17,x26,x25
-	ror	x8,x1,#19
-	bic	x28,x27,x25
-	ror	x10,x21,#28
-	add	x20,x20,x2			// h+=X[i]
-	eor	x16,x16,x25,ror#18
-	eor	x9,x9,x4,ror#8
-	orr	x17,x17,x28			// Ch(e,f,g)
-	eor	x28,x21,x22			// a^b, b^c in next round
-	eor	x16,x16,x25,ror#41	// Sigma1(e)
-	eor	x10,x10,x21,ror#34
-	add	x20,x20,x17			// h+=Ch(e,f,g)
-	and	x19,x19,x28			// (b^c)&=(a^b)
-	eor	x8,x8,x1,ror#61
-	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
-	add	x20,x20,x16			// h+=Sigma1(e)
-	eor	x19,x19,x22			// Maj(a,b,c)
-	eor	x17,x10,x21,ror#39	// Sigma0(a)
-	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
-	add	x3,x3,x12
-	add	x24,x24,x20			// d+=h
-	add	x20,x20,x19			// h+=Maj(a,b,c)
-	ldr	x19,[x30],#8		// *K++, x28 in next round
-	add	x3,x3,x9
-	add	x20,x20,x17			// h+=Sigma0(a)
-	add	x3,x3,x8
-	cbnz	x19,.Loop_16_xx
-
-	ldp	x0,x2,[x29,#96]
-	ldr	x1,[x29,#112]
-	sub	x30,x30,#648		// rewind
-
-	ldp	x3,x4,[x0]
-	ldp	x5,x6,[x0,#2*8]
-	add	x1,x1,#14*8			// advance input pointer
-	ldp	x7,x8,[x0,#4*8]
-	add	x20,x20,x3
-	ldp	x9,x10,[x0,#6*8]
-	add	x21,x21,x4
-	add	x22,x22,x5
-	add	x23,x23,x6
-	stp	x20,x21,[x0]
-	add	x24,x24,x7
-	add	x25,x25,x8
-	stp	x22,x23,[x0,#2*8]
-	add	x26,x26,x9
-	add	x27,x27,x10
-	cmp	x1,x2
-	stp	x24,x25,[x0,#4*8]
-	stp	x26,x27,[x0,#6*8]
-	b.ne	.Loop
-
-	ldp	x19,x20,[x29,#16]
-	add	sp,sp,#4*8
-	ldp	x21,x22,[x29,#32]
-	ldp	x23,x24,[x29,#48]
-	ldp	x25,x26,[x29,#64]
-	ldp	x27,x28,[x29,#80]
-	ldp	x29,x30,[sp],#128
-	ret
-.size	sha512_block_data_order,.-sha512_block_data_order
-
-.align	6
-.type	.LK512,%object
-.LK512:
-	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
-	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
-	.quad	0x3956c25bf348b538,0x59f111f1b605d019
-	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
-	.quad	0xd807aa98a3030242,0x12835b0145706fbe
-	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
-	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
-	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
-	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
-	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
-	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
-	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
-	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
-	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
-	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
-	.quad	0x06ca6351e003826f,0x142929670a0e6e70
-	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
-	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
-	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
-	.quad	0x81c2c92e47edaee6,0x92722c851482353b
-	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
-	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
-	.quad	0xd192e819d6ef5218,0xd69906245565a910
-	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
-	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
-	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
-	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
-	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
-	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
-	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
-	.quad	0x90befffa23631e28,0xa4506cebde82bde9
-	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
-	.quad	0xca273eceea26619c,0xd186b8c721c0c207
-	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
-	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
-	.quad	0x113f9804bef90dae,0x1b710b35131c471b
-	.quad	0x28db77f523047d84,0x32caab7b40c72493
-	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
-	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
-	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
-	.quad	0	// terminator
-.size	.LK512,.-.LK512
-#ifndef	__KERNEL__
-.align	3
-.LOPENSSL_armcap_P:
-# ifdef	__ILP32__
-	.long	OPENSSL_armcap_P-.
-# else
-	.quad	OPENSSL_armcap_P-.
-# endif
-#endif
-.asciz	"SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align	2
-#ifndef	__KERNEL__
-.comm	OPENSSL_armcap_P,4,4
-#endif
-- 
GitLab


From 2063257d4b2472e8f113527c642e467914bb82d3 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 26 Apr 2021 02:57:34 +0900
Subject: [PATCH 0702/3804] crypto: arm64 - use a pattern rule for generating
 *.S files

Unify similar build rules.

sha256-core.S opts out it because it is generated from sha512-armv8.pl.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Makefile | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 592e52a08c623..09a805cc32d7c 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -71,13 +71,10 @@ $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) void $(@)
 
-$(obj)/poly1305-core.S: $(src)/poly1305-armv8.pl
+$(obj)/%-core.S: $(src)/%-armv8.pl
 	$(call cmd,perlasm)
 
 $(obj)/sha256-core.S: $(src)/sha512-armv8.pl
 	$(call cmd,perlasm)
 
-$(obj)/sha512-core.S: $(src)/sha512-armv8.pl
-	$(call cmd,perlasm)
-
 clean-files += poly1305-core.S sha256-core.S sha512-core.S
-- 
GitLab


From c8671c7dc7d51125ab9f651697866bf4a9132277 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Mon, 26 Apr 2021 10:17:48 +0200
Subject: [PATCH 0703/3804] crypto: ccp - Annotate SEV Firmware file names

Annotate the firmware files CCP might need using MODULE_FIRMWARE().
This will get them included into an initrd when CCP is also included
there. Otherwise the CCP module will not find its firmware when loaded
before the root-fs is mounted.
This can cause problems when the pre-loaded SEV firmware is too old to
support current SEV and SEV-ES virtualization features.

Fixes: e93720606efd ("crypto: ccp - Allow SEV firmware to be chosen based on Family and Model")
Cc: stable@vger.kernel.org # v4.20+
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/sev-dev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 3506b2050fb86..91808402e0bf2 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -43,6 +43,10 @@ static int psp_probe_timeout = 5;
 module_param(psp_probe_timeout, int, 0644);
 MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe");
 
+MODULE_FIRMWARE("amd/amd_sev_fam17h_model0xh.sbin"); /* 1st gen EPYC */
+MODULE_FIRMWARE("amd/amd_sev_fam17h_model3xh.sbin"); /* 2nd gen EPYC */
+MODULE_FIRMWARE("amd/amd_sev_fam19h_model0xh.sbin"); /* 3rd gen EPYC */
+
 static bool psp_dead;
 static int psp_timeout;
 
-- 
GitLab


From fa8edbb630ae9ef99d4ab570a16f01c3c39d9a86 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 28 Apr 2021 09:33:37 +0200
Subject: [PATCH 0704/3804] crypto: cpt - Use 'hlist_for_each_entry' to
 simplify code

Use 'hlist_for_each_entry' instead of hand writing it.
This saves a few lines of code.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/cpt/cptvf_reqmanager.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
index 4fe7898c85615..feb0f76783dda 100644
--- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
+++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
@@ -244,11 +244,7 @@ static int send_cpt_command(struct cpt_vf *cptvf, union cpt_inst_s *cmd,
 	memcpy(ent, (void *)cmd, qinfo->cmd_size);
 
 	if (++queue->idx >= queue->qhead->size / 64) {
-		struct hlist_node *node;
-
-		hlist_for_each(node, &queue->chead) {
-			chunk = hlist_entry(node, struct command_chunk,
-					    nextchunk);
+		hlist_for_each_entry(chunk, &queue->chead, nextchunk) {
 			if (chunk == queue->qhead) {
 				continue;
 			} else {
-- 
GitLab


From b7c3635e56d6561436af59b9876faa7cc8389644 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 29 Apr 2021 12:32:53 +0100
Subject: [PATCH 0705/3804] hwrng: amd - remove redundant initialization of
 variable err

The variable err is being initialized with a value that is
never read and it is being updated later with a new value.  The
initialization is redundant and can be removed

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/amd-rng.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
index 9959c762da2f8..d8d4ef5214a19 100644
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
@@ -126,7 +126,7 @@ static struct hwrng amd_rng = {
 
 static int __init mod_init(void)
 {
-	int err = -ENODEV;
+	int err;
 	struct pci_dev *pdev = NULL;
 	const struct pci_device_id *ent;
 	u32 pmbase;
-- 
GitLab


From a9ca8eacb3204208863b9175baae8ac7ee6b2a64 Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 29 Apr 2021 11:07:01 -0400
Subject: [PATCH 0706/3804] crypto: qce - Add MAC failed error checking

MAC_FAILED gets set in the status register if authenthication fails
for ccm algorithms(during decryption). Add support to catch and flag
this error.

Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/common.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
index dceb9579d87a2..dd76175d5c628 100644
--- a/drivers/crypto/qce/common.c
+++ b/drivers/crypto/qce/common.c
@@ -419,6 +419,8 @@ int qce_check_status(struct qce_device *qce, u32 *status)
 	 */
 	if (*status & STATUS_ERRORS || !(*status & BIT(OPERATION_DONE_SHIFT)))
 		ret = -ENXIO;
+	else if (*status & BIT(MAC_FAILED_SHIFT))
+		ret = -EBADMSG;
 
 	return ret;
 }
-- 
GitLab


From 6c34e446b2e2b7d26e83c4c391e89d7cf6824093 Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 29 Apr 2021 11:07:02 -0400
Subject: [PATCH 0707/3804] crypto: qce - Make result dump optional

Qualcomm crypto engine allows for IV registers and status register
to be concatenated to the output. This option is enabled by setting the
RESULTS_DUMP field in GOPROC  register. This is useful for most of the
algorithms to either retrieve status of operation or in case of
authentication algorithms to retrieve the mac. But for ccm
algorithms, the mac is part of the output stream and not retrieved
from the IV registers, thus needing a separate buffer to retrieve it.
Make enabling RESULTS_DUMP field optional so that algorithms can choose
whether or not to enable the option.
Note that in this patch, the enabled algorithms always choose
RESULTS_DUMP to be enabled. But later with the introduction of ccm
algorithms, this changes.

Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/common.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
index dd76175d5c628..7b5bc5a6ae81c 100644
--- a/drivers/crypto/qce/common.c
+++ b/drivers/crypto/qce/common.c
@@ -88,9 +88,12 @@ static void qce_setup_config(struct qce_device *qce)
 	qce_write(qce, REG_CONFIG, config);
 }
 
-static inline void qce_crypto_go(struct qce_device *qce)
+static inline void qce_crypto_go(struct qce_device *qce, bool result_dump)
 {
-	qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT));
+	if (result_dump)
+		qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT));
+	else
+		qce_write(qce, REG_GOPROC, BIT(GO_SHIFT));
 }
 
 #ifdef CONFIG_CRYPTO_DEV_QCE_SHA
@@ -219,7 +222,7 @@ go_proc:
 	config = qce_config_reg(qce, 1);
 	qce_write(qce, REG_CONFIG, config);
 
-	qce_crypto_go(qce);
+	qce_crypto_go(qce, true);
 
 	return 0;
 }
@@ -380,7 +383,7 @@ static int qce_setup_regs_skcipher(struct crypto_async_request *async_req)
 	config = qce_config_reg(qce, 1);
 	qce_write(qce, REG_CONFIG, config);
 
-	qce_crypto_go(qce);
+	qce_crypto_go(qce, true);
 
 	return 0;
 }
-- 
GitLab


From 7ba9cd4e22a0c177a222669fc58ab300903b63e8 Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 29 Apr 2021 11:07:03 -0400
Subject: [PATCH 0708/3804] crypto: qce - Add mode for rfc4309

rf4309 is the specification that uses aes ccm algorithms with IPsec
security packets. Add a submode to identify rfc4309 ccm(aes) algorithm
in the crypto driver.

Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/common.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h
index 3bc244bcca2d9..b135440bf72b5 100644
--- a/drivers/crypto/qce/common.h
+++ b/drivers/crypto/qce/common.h
@@ -51,9 +51,11 @@
 #define QCE_MODE_CCM			BIT(12)
 #define QCE_MODE_MASK			GENMASK(12, 8)
 
+#define QCE_MODE_CCM_RFC4309		BIT(13)
+
 /* cipher encryption/decryption operations */
-#define QCE_ENCRYPT			BIT(13)
-#define QCE_DECRYPT			BIT(14)
+#define QCE_ENCRYPT			BIT(30)
+#define QCE_DECRYPT			BIT(31)
 
 #define IS_DES(flags)			(flags & QCE_ALG_DES)
 #define IS_3DES(flags)			(flags & QCE_ALG_3DES)
@@ -73,6 +75,7 @@
 #define IS_CTR(mode)			(mode & QCE_MODE_CTR)
 #define IS_XTS(mode)			(mode & QCE_MODE_XTS)
 #define IS_CCM(mode)			(mode & QCE_MODE_CCM)
+#define IS_CCM_RFC4309(mode)		((mode) & QCE_MODE_CCM_RFC4309)
 
 #define IS_ENCRYPT(dir)			(dir & QCE_ENCRYPT)
 #define IS_DECRYPT(dir)			(dir & QCE_DECRYPT)
-- 
GitLab


From 9363efb4181c5e0fbf86bdfa759262aa29f0eb50 Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 29 Apr 2021 11:07:04 -0400
Subject: [PATCH 0709/3804] crypto: qce - Add support for AEAD algorithms

Introduce support to enable following algorithms in Qualcomm Crypto Engine.

- authenc(hmac(sha1),cbc(des))
- authenc(hmac(sha1),cbc(des3_ede))
- authenc(hmac(sha256),cbc(des))
- authenc(hmac(sha256),cbc(des3_ede))
- authenc(hmac(sha256),cbc(aes))
- ccm(aes)
- rfc4309(ccm(aes))

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig      |  15 +
 drivers/crypto/qce/Makefile |   1 +
 drivers/crypto/qce/aead.c   | 799 ++++++++++++++++++++++++++++++++++++
 drivers/crypto/qce/aead.h   |  53 +++
 drivers/crypto/qce/common.h |   2 +
 drivers/crypto/qce/core.c   |   4 +
 6 files changed, 874 insertions(+)
 create mode 100644 drivers/crypto/qce/aead.c
 create mode 100644 drivers/crypto/qce/aead.h

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 9a4c275a13350..1fe5b7eafc02c 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -627,6 +627,12 @@ config CRYPTO_DEV_QCE_SHA
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 
+config CRYPTO_DEV_QCE_AEAD
+	bool
+	depends on CRYPTO_DEV_QCE
+	select CRYPTO_AUTHENC
+	select CRYPTO_LIB_DES
+
 choice
 	prompt "Algorithms enabled for QCE acceleration"
 	default CRYPTO_DEV_QCE_ENABLE_ALL
@@ -647,6 +653,7 @@ choice
 		bool "All supported algorithms"
 		select CRYPTO_DEV_QCE_SKCIPHER
 		select CRYPTO_DEV_QCE_SHA
+		select CRYPTO_DEV_QCE_AEAD
 		help
 		  Enable all supported algorithms:
 			- AES (CBC, CTR, ECB, XTS)
@@ -672,6 +679,14 @@ choice
 			- SHA1, HMAC-SHA1
 			- SHA256, HMAC-SHA256
 
+	config CRYPTO_DEV_QCE_ENABLE_AEAD
+		bool "AEAD algorithms only"
+		select CRYPTO_DEV_QCE_AEAD
+		help
+		  Enable AEAD algorithms only:
+			- authenc()
+			- ccm(aes)
+			- rfc4309(ccm(aes))
 endchoice
 
 config CRYPTO_DEV_QCE_SW_MAX_LEN
diff --git a/drivers/crypto/qce/Makefile b/drivers/crypto/qce/Makefile
index 14ade8a7d6644..2cf8984e1b851 100644
--- a/drivers/crypto/qce/Makefile
+++ b/drivers/crypto/qce/Makefile
@@ -6,3 +6,4 @@ qcrypto-objs := core.o \
 
 qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SHA) += sha.o
 qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) += skcipher.o
+qcrypto-$(CONFIG_CRYPTO_DEV_QCE_AEAD) += aead.o
diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c
new file mode 100644
index 0000000000000..ef66ae21eae36
--- /dev/null
+++ b/drivers/crypto/qce/aead.c
@@ -0,0 +1,799 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2021, Linaro Limited. All rights reserved.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <crypto/gcm.h>
+#include <crypto/authenc.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/des.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
+#include <crypto/scatterwalk.h>
+#include "aead.h"
+
+#define CCM_NONCE_ADATA_SHIFT		6
+#define CCM_NONCE_AUTHSIZE_SHIFT	3
+#define MAX_CCM_ADATA_HEADER_LEN        6
+
+static LIST_HEAD(aead_algs);
+
+static void qce_aead_done(void *data)
+{
+	struct crypto_async_request *async_req = data;
+	struct aead_request *req = aead_request_cast(async_req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
+	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+	struct qce_device *qce = tmpl->qce;
+	struct qce_result_dump *result_buf = qce->dma.result_buf;
+	enum dma_data_direction dir_src, dir_dst;
+	bool diff_dst;
+	int error;
+	u32 status;
+	unsigned int totallen;
+	unsigned char tag[SHA256_DIGEST_SIZE] = {0};
+	int ret = 0;
+
+	diff_dst = (req->src != req->dst) ? true : false;
+	dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+	dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL;
+
+	error = qce_dma_terminate_all(&qce->dma);
+	if (error)
+		dev_dbg(qce->dev, "aead dma termination error (%d)\n",
+			error);
+	if (diff_dst)
+		dma_unmap_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src);
+
+	dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+
+	if (IS_CCM(rctx->flags)) {
+		if (req->assoclen) {
+			sg_free_table(&rctx->src_tbl);
+			if (diff_dst)
+				sg_free_table(&rctx->dst_tbl);
+		} else {
+			if (!(IS_DECRYPT(rctx->flags) && !diff_dst))
+				sg_free_table(&rctx->dst_tbl);
+		}
+	} else {
+		sg_free_table(&rctx->dst_tbl);
+	}
+
+	error = qce_check_status(qce, &status);
+	if (error < 0 && (error != -EBADMSG))
+		dev_err(qce->dev, "aead operation error (%x)\n", status);
+
+	if (IS_ENCRYPT(rctx->flags)) {
+		totallen = req->cryptlen + req->assoclen;
+		if (IS_CCM(rctx->flags))
+			scatterwalk_map_and_copy(rctx->ccmresult_buf, req->dst,
+						 totallen, ctx->authsize, 1);
+		else
+			scatterwalk_map_and_copy(result_buf->auth_iv, req->dst,
+						 totallen, ctx->authsize, 1);
+
+	} else if (!IS_CCM(rctx->flags)) {
+		totallen = req->cryptlen + req->assoclen - ctx->authsize;
+		scatterwalk_map_and_copy(tag, req->src, totallen, ctx->authsize, 0);
+		ret = memcmp(result_buf->auth_iv, tag, ctx->authsize);
+		if (ret) {
+			pr_err("Bad message error\n");
+			 error = -EBADMSG;
+		}
+	}
+
+	qce->async_req_done(qce, error);
+}
+
+static struct scatterlist *
+qce_aead_prepare_result_buf(struct sg_table *tbl, struct aead_request *req)
+{
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+	struct qce_device *qce = tmpl->qce;
+
+	sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ);
+	return qce_sgtable_add(tbl, &rctx->result_sg, QCE_RESULT_BUF_SZ);
+}
+
+static struct scatterlist *
+qce_aead_prepare_ccm_result_buf(struct sg_table *tbl, struct aead_request *req)
+{
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+
+	sg_init_one(&rctx->result_sg, rctx->ccmresult_buf, QCE_BAM_BURST_SIZE);
+	return qce_sgtable_add(tbl, &rctx->result_sg, QCE_BAM_BURST_SIZE);
+}
+
+static struct scatterlist *
+qce_aead_prepare_dst_buf(struct aead_request *req)
+{
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+	struct qce_device *qce = tmpl->qce;
+	struct scatterlist *sg, *msg_sg, __sg[2];
+	gfp_t gfp;
+	unsigned int assoclen = req->assoclen;
+	unsigned int totallen;
+	int ret;
+
+	totallen = rctx->cryptlen + assoclen;
+	rctx->dst_nents = sg_nents_for_len(req->dst, totallen);
+	if (rctx->dst_nents < 0) {
+		dev_err(qce->dev, "Invalid numbers of dst SG.\n");
+		return ERR_PTR(-EINVAL);
+	}
+	if (IS_CCM(rctx->flags))
+		rctx->dst_nents += 2;
+	else
+		rctx->dst_nents += 1;
+
+	gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+						GFP_KERNEL : GFP_ATOMIC;
+	ret = sg_alloc_table(&rctx->dst_tbl, rctx->dst_nents, gfp);
+	if (ret)
+		return ERR_PTR(ret);
+
+	if (IS_CCM(rctx->flags) && assoclen) {
+		/* Get the dst buffer */
+		msg_sg = scatterwalk_ffwd(__sg, req->dst, assoclen);
+
+		sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->adata_sg,
+				     rctx->assoclen);
+		if (IS_ERR(sg)) {
+			ret = PTR_ERR(sg);
+			goto dst_tbl_free;
+		}
+		/* dst buffer */
+		sg = qce_sgtable_add(&rctx->dst_tbl, msg_sg, rctx->cryptlen);
+		if (IS_ERR(sg)) {
+			ret = PTR_ERR(sg);
+			goto dst_tbl_free;
+		}
+		totallen = rctx->cryptlen + rctx->assoclen;
+	} else {
+		if (totallen) {
+			sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, totallen);
+			if (IS_ERR(sg))
+				goto dst_tbl_free;
+		}
+	}
+	if (IS_CCM(rctx->flags))
+		sg = qce_aead_prepare_ccm_result_buf(&rctx->dst_tbl, req);
+	else
+		sg = qce_aead_prepare_result_buf(&rctx->dst_tbl, req);
+
+	if (IS_ERR(sg))
+		goto dst_tbl_free;
+
+	sg_mark_end(sg);
+	rctx->dst_sg = rctx->dst_tbl.sgl;
+	rctx->dst_nents = sg_nents_for_len(rctx->dst_sg, totallen) + 1;
+
+	return sg;
+
+dst_tbl_free:
+	sg_free_table(&rctx->dst_tbl);
+	return sg;
+}
+
+static int
+qce_aead_ccm_prepare_buf_assoclen(struct aead_request *req)
+{
+	struct scatterlist *sg, *msg_sg, __sg[2];
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	unsigned int assoclen = rctx->assoclen;
+	unsigned int adata_header_len, cryptlen, totallen;
+	gfp_t gfp;
+	bool diff_dst;
+	int ret;
+
+	if (IS_DECRYPT(rctx->flags))
+		cryptlen = rctx->cryptlen + ctx->authsize;
+	else
+		cryptlen = rctx->cryptlen;
+	totallen = cryptlen + req->assoclen;
+
+	/* Get the msg */
+	msg_sg = scatterwalk_ffwd(__sg, req->src, req->assoclen);
+
+	rctx->adata = kzalloc((ALIGN(assoclen, 16) + MAX_CCM_ADATA_HEADER_LEN) *
+			       sizeof(unsigned char), GFP_ATOMIC);
+	if (!rctx->adata)
+		return -ENOMEM;
+
+	/*
+	 * Format associated data (RFC3610 and NIST 800-38C)
+	 * Even though specification allows for AAD to be up to 2^64 - 1 bytes,
+	 * the assoclen field in aead_request is unsigned int and thus limits
+	 * the AAD to be up to 2^32 - 1 bytes. So we handle only two scenarios
+	 * while forming the header for AAD.
+	 */
+	if (assoclen < 0xff00) {
+		adata_header_len = 2;
+		*(__be16 *)rctx->adata = cpu_to_be16(assoclen);
+	} else {
+		adata_header_len = 6;
+		*(__be16 *)rctx->adata = cpu_to_be16(0xfffe);
+		*(__be32 *)(rctx->adata + 2) = cpu_to_be32(assoclen);
+	}
+
+	/* Copy the associated data */
+	if (sg_copy_to_buffer(req->src, sg_nents_for_len(req->src, assoclen),
+			      rctx->adata + adata_header_len,
+			      assoclen) != assoclen)
+		return -EINVAL;
+
+	/* Pad associated data to block size */
+	rctx->assoclen = ALIGN(assoclen + adata_header_len, 16);
+
+	diff_dst = (req->src != req->dst) ? true : false;
+
+	if (diff_dst)
+		rctx->src_nents = sg_nents_for_len(req->src, totallen) + 1;
+	else
+		rctx->src_nents = sg_nents_for_len(req->src, totallen) + 2;
+
+	gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC;
+	ret = sg_alloc_table(&rctx->src_tbl, rctx->src_nents, gfp);
+	if (ret)
+		return ret;
+
+	/* Associated Data */
+	sg_init_one(&rctx->adata_sg, rctx->adata, rctx->assoclen);
+	sg = qce_sgtable_add(&rctx->src_tbl, &rctx->adata_sg,
+			     rctx->assoclen);
+	if (IS_ERR(sg)) {
+		ret = PTR_ERR(sg);
+		goto err_free;
+	}
+	/* src msg */
+	sg = qce_sgtable_add(&rctx->src_tbl, msg_sg, cryptlen);
+	if (IS_ERR(sg)) {
+		ret = PTR_ERR(sg);
+		goto err_free;
+	}
+	if (!diff_dst) {
+		/*
+		 * For decrypt, when src and dst buffers are same, there is already space
+		 * in the buffer for padded 0's which is output in lieu of
+		 * the MAC that is input. So skip the below.
+		 */
+		if (!IS_DECRYPT(rctx->flags)) {
+			sg = qce_aead_prepare_ccm_result_buf(&rctx->src_tbl, req);
+			if (IS_ERR(sg)) {
+				ret = PTR_ERR(sg);
+				goto err_free;
+			}
+		}
+	}
+	sg_mark_end(sg);
+	rctx->src_sg = rctx->src_tbl.sgl;
+	totallen = cryptlen + rctx->assoclen;
+	rctx->src_nents = sg_nents_for_len(rctx->src_sg, totallen);
+
+	if (diff_dst) {
+		sg = qce_aead_prepare_dst_buf(req);
+		if (IS_ERR(sg))
+			goto err_free;
+	} else {
+		if (IS_ENCRYPT(rctx->flags))
+			rctx->dst_nents = rctx->src_nents + 1;
+		else
+			rctx->dst_nents = rctx->src_nents;
+		rctx->dst_sg = rctx->src_sg;
+	}
+
+	return 0;
+err_free:
+	sg_free_table(&rctx->src_tbl);
+	return ret;
+}
+
+static int qce_aead_prepare_buf(struct aead_request *req)
+{
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+	struct qce_device *qce = tmpl->qce;
+	struct scatterlist *sg;
+	bool diff_dst = (req->src != req->dst) ? true : false;
+	unsigned int totallen;
+
+	totallen = rctx->cryptlen + rctx->assoclen;
+
+	sg = qce_aead_prepare_dst_buf(req);
+	if (IS_ERR(sg))
+		return PTR_ERR(sg);
+	if (diff_dst) {
+		rctx->src_nents = sg_nents_for_len(req->src, totallen);
+		if (rctx->src_nents < 0) {
+			dev_err(qce->dev, "Invalid numbers of src SG.\n");
+			return -EINVAL;
+		}
+		rctx->src_sg = req->src;
+	} else {
+		rctx->src_nents = rctx->dst_nents - 1;
+		rctx->src_sg = rctx->dst_sg;
+	}
+	return 0;
+}
+
+static int qce_aead_ccm_prepare_buf(struct aead_request *req)
+{
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct scatterlist *sg;
+	bool diff_dst = (req->src != req->dst) ? true : false;
+	unsigned int cryptlen;
+
+	if (rctx->assoclen)
+		return qce_aead_ccm_prepare_buf_assoclen(req);
+
+	if (IS_ENCRYPT(rctx->flags))
+		return qce_aead_prepare_buf(req);
+
+	cryptlen = rctx->cryptlen + ctx->authsize;
+	if (diff_dst) {
+		rctx->src_nents = sg_nents_for_len(req->src, cryptlen);
+		rctx->src_sg = req->src;
+		sg = qce_aead_prepare_dst_buf(req);
+		if (IS_ERR(sg))
+			return PTR_ERR(sg);
+	} else {
+		rctx->src_nents = sg_nents_for_len(req->src, cryptlen);
+		rctx->src_sg = req->src;
+		rctx->dst_nents = rctx->src_nents;
+		rctx->dst_sg = rctx->src_sg;
+	}
+
+	return 0;
+}
+
+static int qce_aead_create_ccm_nonce(struct qce_aead_reqctx *rctx, struct qce_aead_ctx *ctx)
+{
+	unsigned int msglen_size, ivsize;
+	u8 msg_len[4];
+	int i;
+
+	if (!rctx || !rctx->iv)
+		return -EINVAL;
+
+	msglen_size = rctx->iv[0] + 1;
+
+	/* Verify that msg len size is valid */
+	if (msglen_size < 2 || msglen_size > 8)
+		return -EINVAL;
+
+	ivsize = rctx->ivsize;
+
+	/*
+	 * Clear the msglen bytes in IV.
+	 * Else the h/w engine and nonce will use any stray value pending there.
+	 */
+	if (!IS_CCM_RFC4309(rctx->flags)) {
+		for (i = 0; i < msglen_size; i++)
+			rctx->iv[ivsize - i - 1] = 0;
+	}
+
+	/*
+	 * The crypto framework encodes cryptlen as unsigned int. Thus, even though
+	 * spec allows for upto 8 bytes to encode msg_len only 4 bytes are needed.
+	 */
+	if (msglen_size > 4)
+		msglen_size = 4;
+
+	memcpy(&msg_len[0], &rctx->cryptlen, 4);
+
+	memcpy(&rctx->ccm_nonce[0], rctx->iv, rctx->ivsize);
+	if (rctx->assoclen)
+		rctx->ccm_nonce[0] |= 1 << CCM_NONCE_ADATA_SHIFT;
+	rctx->ccm_nonce[0] |= ((ctx->authsize - 2) / 2) <<
+				CCM_NONCE_AUTHSIZE_SHIFT;
+	for (i = 0; i < msglen_size; i++)
+		rctx->ccm_nonce[QCE_MAX_NONCE - i - 1] = msg_len[i];
+
+	return 0;
+}
+
+static int
+qce_aead_async_req_handle(struct crypto_async_request *async_req)
+{
+	struct aead_request *req = aead_request_cast(async_req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
+	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+	struct qce_device *qce = tmpl->qce;
+	enum dma_data_direction dir_src, dir_dst;
+	bool diff_dst;
+	int dst_nents, src_nents, ret;
+
+	if (IS_CCM_RFC4309(rctx->flags)) {
+		memset(rctx->ccm_rfc4309_iv, 0, QCE_MAX_IV_SIZE);
+		rctx->ccm_rfc4309_iv[0] = 3;
+		memcpy(&rctx->ccm_rfc4309_iv[1], ctx->ccm4309_salt, QCE_CCM4309_SALT_SIZE);
+		memcpy(&rctx->ccm_rfc4309_iv[4], req->iv, 8);
+		rctx->iv = rctx->ccm_rfc4309_iv;
+		rctx->ivsize = AES_BLOCK_SIZE;
+	} else {
+		rctx->iv = req->iv;
+		rctx->ivsize = crypto_aead_ivsize(tfm);
+	}
+	if (IS_CCM_RFC4309(rctx->flags))
+		rctx->assoclen = req->assoclen - 8;
+	else
+		rctx->assoclen = req->assoclen;
+
+	diff_dst = (req->src != req->dst) ? true : false;
+	dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+	dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL;
+
+	if (IS_CCM(rctx->flags)) {
+		ret = qce_aead_create_ccm_nonce(rctx, ctx);
+		if (ret)
+			return ret;
+	}
+	if (IS_CCM(rctx->flags))
+		ret = qce_aead_ccm_prepare_buf(req);
+	else
+		ret = qce_aead_prepare_buf(req);
+
+	if (ret)
+		return ret;
+	dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+	if (dst_nents < 0)
+		goto error_free;
+
+	if (diff_dst) {
+		src_nents = dma_map_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src);
+		if (src_nents < 0)
+			goto error_unmap_dst;
+	} else {
+		if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags))
+			src_nents = dst_nents;
+		else
+			src_nents = dst_nents - 1;
+	}
+
+	ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, src_nents, rctx->dst_sg, dst_nents,
+			       qce_aead_done, async_req);
+	if (ret)
+		goto error_unmap_src;
+
+	qce_dma_issue_pending(&qce->dma);
+
+	ret = qce_start(async_req, tmpl->crypto_alg_type);
+	if (ret)
+		goto error_terminate;
+
+	return 0;
+
+error_terminate:
+	qce_dma_terminate_all(&qce->dma);
+error_unmap_src:
+	if (diff_dst)
+		dma_unmap_sg(qce->dev, req->src, rctx->src_nents, dir_src);
+error_unmap_dst:
+	dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+error_free:
+	if (IS_CCM(rctx->flags) && rctx->assoclen) {
+		sg_free_table(&rctx->src_tbl);
+		if (diff_dst)
+			sg_free_table(&rctx->dst_tbl);
+	} else {
+		sg_free_table(&rctx->dst_tbl);
+	}
+	return ret;
+}
+
+static int qce_aead_crypt(struct aead_request *req, int encrypt)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct qce_alg_template *tmpl = to_aead_tmpl(tfm);
+	unsigned int blocksize = crypto_aead_blocksize(tfm);
+
+	rctx->flags  = tmpl->alg_flags;
+	rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT;
+
+	if (encrypt)
+		rctx->cryptlen = req->cryptlen;
+	else
+		rctx->cryptlen = req->cryptlen - ctx->authsize;
+
+	/* CE does not handle 0 length messages */
+	if (!rctx->cryptlen) {
+		if (!(IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags)))
+			return -EINVAL;
+	}
+
+	/*
+	 * CBC algorithms require message lengths to be
+	 * multiples of block size.
+	 */
+	if (IS_CBC(rctx->flags) && !IS_ALIGNED(rctx->cryptlen, blocksize))
+		return -EINVAL;
+
+	/* RFC4309 supported AAD size 16 bytes/20 bytes */
+	if (IS_CCM_RFC4309(rctx->flags))
+		if (crypto_ipsec_check_assoclen(req->assoclen))
+			return -EINVAL;
+
+	return tmpl->qce->async_req_enqueue(tmpl->qce, &req->base);
+}
+
+static int qce_aead_encrypt(struct aead_request *req)
+{
+	return qce_aead_crypt(req, 1);
+}
+
+static int qce_aead_decrypt(struct aead_request *req)
+{
+	return qce_aead_crypt(req, 0);
+}
+
+static int qce_aead_ccm_setkey(struct crypto_aead *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	unsigned long flags = to_aead_tmpl(tfm)->alg_flags;
+
+	if (IS_CCM_RFC4309(flags)) {
+		if (keylen < QCE_CCM4309_SALT_SIZE)
+			return -EINVAL;
+		keylen -= QCE_CCM4309_SALT_SIZE;
+		memcpy(ctx->ccm4309_salt, key + keylen, QCE_CCM4309_SALT_SIZE);
+	}
+
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	ctx->enc_keylen = keylen;
+	ctx->auth_keylen = keylen;
+
+	memcpy(ctx->enc_key, key, keylen);
+	memcpy(ctx->auth_key, key, keylen);
+
+	return 0;
+}
+
+static int qce_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen)
+{
+	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct crypto_authenc_keys authenc_keys;
+	unsigned long flags = to_aead_tmpl(tfm)->alg_flags;
+	u32 _key[6];
+	int err;
+
+	err = crypto_authenc_extractkeys(&authenc_keys, key, keylen);
+	if (err)
+		return err;
+
+	if (authenc_keys.enckeylen > QCE_MAX_KEY_SIZE ||
+	    authenc_keys.authkeylen > QCE_MAX_KEY_SIZE)
+		return -EINVAL;
+
+	if (IS_DES(flags)) {
+		err = verify_aead_des_key(tfm, authenc_keys.enckey, authenc_keys.enckeylen);
+		if (err)
+			return err;
+	} else if (IS_3DES(flags)) {
+		err = verify_aead_des3_key(tfm, authenc_keys.enckey, authenc_keys.enckeylen);
+		if (err)
+			return err;
+		/*
+		 * The crypto engine does not support any two keys
+		 * being the same for triple des algorithms. The
+		 * verify_skcipher_des3_key does not check for all the
+		 * below conditions. Return -EINVAL in case any two keys
+		 * are the same. Revisit to see if a fallback cipher
+		 * is needed to handle this condition.
+		 */
+		memcpy(_key, authenc_keys.enckey, DES3_EDE_KEY_SIZE);
+		if (!((_key[0] ^ _key[2]) | (_key[1] ^ _key[3])) ||
+		    !((_key[2] ^ _key[4]) | (_key[3] ^ _key[5])) ||
+		    !((_key[0] ^ _key[4]) | (_key[1] ^ _key[5])))
+			return -EINVAL;
+	} else if (IS_AES(flags)) {
+		/* No random key sizes */
+		if (authenc_keys.enckeylen != AES_KEYSIZE_128 &&
+		    authenc_keys.enckeylen != AES_KEYSIZE_256)
+			return -EINVAL;
+	}
+
+	ctx->enc_keylen = authenc_keys.enckeylen;
+	ctx->auth_keylen = authenc_keys.authkeylen;
+
+	memcpy(ctx->enc_key, authenc_keys.enckey, authenc_keys.enckeylen);
+
+	memset(ctx->auth_key, 0, sizeof(ctx->auth_key));
+	memcpy(ctx->auth_key, authenc_keys.authkey, authenc_keys.authkeylen);
+
+	return 0;
+}
+
+static int qce_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	unsigned long flags = to_aead_tmpl(tfm)->alg_flags;
+
+	if (IS_CCM(flags)) {
+		if (authsize < 4 || authsize > 16 || authsize % 2)
+			return -EINVAL;
+		if (IS_CCM_RFC4309(flags) && (authsize < 8 || authsize % 4))
+			return -EINVAL;
+	}
+	ctx->authsize = authsize;
+	return 0;
+}
+
+static int qce_aead_init(struct crypto_aead *tfm)
+{
+	crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx));
+	return 0;
+}
+
+struct qce_aead_def {
+	unsigned long flags;
+	const char *name;
+	const char *drv_name;
+	unsigned int blocksize;
+	unsigned int chunksize;
+	unsigned int ivsize;
+	unsigned int maxauthsize;
+};
+
+static const struct qce_aead_def aead_def[] = {
+	{
+		.flags          = QCE_ALG_DES | QCE_MODE_CBC | QCE_HASH_SHA1_HMAC,
+		.name           = "authenc(hmac(sha1),cbc(des))",
+		.drv_name       = "authenc-hmac-sha1-cbc-des-qce",
+		.blocksize      = DES_BLOCK_SIZE,
+		.ivsize         = DES_BLOCK_SIZE,
+		.maxauthsize	= SHA1_DIGEST_SIZE,
+	},
+	{
+		.flags          = QCE_ALG_3DES | QCE_MODE_CBC | QCE_HASH_SHA1_HMAC,
+		.name           = "authenc(hmac(sha1),cbc(des3_ede))",
+		.drv_name       = "authenc-hmac-sha1-cbc-3des-qce",
+		.blocksize      = DES3_EDE_BLOCK_SIZE,
+		.ivsize         = DES3_EDE_BLOCK_SIZE,
+		.maxauthsize	= SHA1_DIGEST_SIZE,
+	},
+	{
+		.flags          = QCE_ALG_DES | QCE_MODE_CBC | QCE_HASH_SHA256_HMAC,
+		.name           = "authenc(hmac(sha256),cbc(des))",
+		.drv_name       = "authenc-hmac-sha256-cbc-des-qce",
+		.blocksize      = DES_BLOCK_SIZE,
+		.ivsize         = DES_BLOCK_SIZE,
+		.maxauthsize	= SHA256_DIGEST_SIZE,
+	},
+	{
+		.flags          = QCE_ALG_3DES | QCE_MODE_CBC | QCE_HASH_SHA256_HMAC,
+		.name           = "authenc(hmac(sha256),cbc(des3_ede))",
+		.drv_name       = "authenc-hmac-sha256-cbc-3des-qce",
+		.blocksize      = DES3_EDE_BLOCK_SIZE,
+		.ivsize         = DES3_EDE_BLOCK_SIZE,
+		.maxauthsize	= SHA256_DIGEST_SIZE,
+	},
+	{
+		.flags          =  QCE_ALG_AES | QCE_MODE_CBC | QCE_HASH_SHA256_HMAC,
+		.name           = "authenc(hmac(sha256),cbc(aes))",
+		.drv_name       = "authenc-hmac-sha256-cbc-aes-qce",
+		.blocksize      = AES_BLOCK_SIZE,
+		.ivsize         = AES_BLOCK_SIZE,
+		.maxauthsize	= SHA256_DIGEST_SIZE,
+	},
+	{
+		.flags          =  QCE_ALG_AES | QCE_MODE_CCM,
+		.name           = "ccm(aes)",
+		.drv_name       = "ccm-aes-qce",
+		.blocksize	= 1,
+		.ivsize         = AES_BLOCK_SIZE,
+		.maxauthsize	= AES_BLOCK_SIZE,
+	},
+	{
+		.flags          =  QCE_ALG_AES | QCE_MODE_CCM | QCE_MODE_CCM_RFC4309,
+		.name           = "rfc4309(ccm(aes))",
+		.drv_name       = "rfc4309-ccm-aes-qce",
+		.blocksize	= 1,
+		.ivsize         = 8,
+		.maxauthsize	= AES_BLOCK_SIZE,
+	},
+};
+
+static int qce_aead_register_one(const struct qce_aead_def *def, struct qce_device *qce)
+{
+	struct qce_alg_template *tmpl;
+	struct aead_alg *alg;
+	int ret;
+
+	tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL);
+	if (!tmpl)
+		return -ENOMEM;
+
+	alg = &tmpl->alg.aead;
+
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->drv_name);
+
+	alg->base.cra_blocksize		= def->blocksize;
+	alg->chunksize			= def->chunksize;
+	alg->ivsize			= def->ivsize;
+	alg->maxauthsize		= def->maxauthsize;
+	if (IS_CCM(def->flags))
+		alg->setkey		= qce_aead_ccm_setkey;
+	else
+		alg->setkey		= qce_aead_setkey;
+	alg->setauthsize		= qce_aead_setauthsize;
+	alg->encrypt			= qce_aead_encrypt;
+	alg->decrypt			= qce_aead_decrypt;
+	alg->init			= qce_aead_init;
+
+	alg->base.cra_priority		= 300;
+	alg->base.cra_flags		= CRYPTO_ALG_ASYNC |
+					  CRYPTO_ALG_ALLOCATES_MEMORY |
+					  CRYPTO_ALG_KERN_DRIVER_ONLY;
+	alg->base.cra_ctxsize		= sizeof(struct qce_aead_ctx);
+	alg->base.cra_alignmask		= 0;
+	alg->base.cra_module		= THIS_MODULE;
+
+	INIT_LIST_HEAD(&tmpl->entry);
+	tmpl->crypto_alg_type = CRYPTO_ALG_TYPE_AEAD;
+	tmpl->alg_flags = def->flags;
+	tmpl->qce = qce;
+
+	ret = crypto_register_aead(alg);
+	if (ret) {
+		kfree(tmpl);
+		dev_err(qce->dev, "%s registration failed\n", alg->base.cra_name);
+		return ret;
+	}
+
+	list_add_tail(&tmpl->entry, &aead_algs);
+	dev_dbg(qce->dev, "%s is registered\n", alg->base.cra_name);
+	return 0;
+}
+
+static void qce_aead_unregister(struct qce_device *qce)
+{
+	struct qce_alg_template *tmpl, *n;
+
+	list_for_each_entry_safe(tmpl, n, &aead_algs, entry) {
+		crypto_unregister_aead(&tmpl->alg.aead);
+		list_del(&tmpl->entry);
+		kfree(tmpl);
+	}
+}
+
+static int qce_aead_register(struct qce_device *qce)
+{
+	int ret, i;
+
+	for (i = 0; i < ARRAY_SIZE(aead_def); i++) {
+		ret = qce_aead_register_one(&aead_def[i], qce);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+err:
+	qce_aead_unregister(qce);
+	return ret;
+}
+
+const struct qce_algo_ops aead_ops = {
+	.type = CRYPTO_ALG_TYPE_AEAD,
+	.register_algs = qce_aead_register,
+	.unregister_algs = qce_aead_unregister,
+	.async_req_handle = qce_aead_async_req_handle,
+};
diff --git a/drivers/crypto/qce/aead.h b/drivers/crypto/qce/aead.h
new file mode 100644
index 0000000000000..3d1f2039930b6
--- /dev/null
+++ b/drivers/crypto/qce/aead.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, Linaro Limited. All rights reserved.
+ */
+
+#ifndef _AEAD_H_
+#define _AEAD_H_
+
+#include "common.h"
+#include "core.h"
+
+#define QCE_MAX_KEY_SIZE		64
+#define QCE_CCM4309_SALT_SIZE		3
+
+struct qce_aead_ctx {
+	u8 enc_key[QCE_MAX_KEY_SIZE];
+	u8 auth_key[QCE_MAX_KEY_SIZE];
+	u8 ccm4309_salt[QCE_CCM4309_SALT_SIZE];
+	unsigned int enc_keylen;
+	unsigned int auth_keylen;
+	unsigned int authsize;
+};
+
+struct qce_aead_reqctx {
+	unsigned long flags;
+	u8 *iv;
+	unsigned int ivsize;
+	int src_nents;
+	int dst_nents;
+	struct scatterlist result_sg;
+	struct scatterlist adata_sg;
+	struct sg_table dst_tbl;
+	struct sg_table src_tbl;
+	struct scatterlist *dst_sg;
+	struct scatterlist *src_sg;
+	unsigned int cryptlen;
+	unsigned int assoclen;
+	unsigned char *adata;
+	u8 ccm_nonce[QCE_MAX_NONCE];
+	u8 ccmresult_buf[QCE_BAM_BURST_SIZE];
+	u8 ccm_rfc4309_iv[QCE_MAX_IV_SIZE];
+};
+
+static inline struct qce_alg_template *to_aead_tmpl(struct crypto_aead *tfm)
+{
+	struct aead_alg *alg = crypto_aead_alg(tfm);
+
+	return container_of(alg, struct qce_alg_template, alg.aead);
+}
+
+extern const struct qce_algo_ops aead_ops;
+
+#endif /* _AEAD_H_ */
diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h
index b135440bf72b5..02e63ad9f2455 100644
--- a/drivers/crypto/qce/common.h
+++ b/drivers/crypto/qce/common.h
@@ -11,6 +11,7 @@
 #include <crypto/aes.h>
 #include <crypto/hash.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/internal/aead.h>
 
 /* xts du size */
 #define QCE_SECTOR_SIZE			512
@@ -88,6 +89,7 @@ struct qce_alg_template {
 	union {
 		struct skcipher_alg skcipher;
 		struct ahash_alg ahash;
+		struct aead_alg aead;
 	} alg;
 	struct qce_device *qce;
 	const u8 *hash_zero;
diff --git a/drivers/crypto/qce/core.c b/drivers/crypto/qce/core.c
index 80b75085c2659..d3780be44a763 100644
--- a/drivers/crypto/qce/core.c
+++ b/drivers/crypto/qce/core.c
@@ -17,6 +17,7 @@
 #include "core.h"
 #include "cipher.h"
 #include "sha.h"
+#include "aead.h"
 
 #define QCE_MAJOR_VERSION5	0x05
 #define QCE_QUEUE_LENGTH	1
@@ -28,6 +29,9 @@ static const struct qce_algo_ops *qce_ops[] = {
 #ifdef CONFIG_CRYPTO_DEV_QCE_SHA
 	&ahash_ops,
 #endif
+#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD
+	&aead_ops,
+#endif
 };
 
 static void qce_unregister_algs(struct qce_device *qce)
-- 
GitLab


From e5d6181d35b257c13841f774f5ad36b0cb2d82aa Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 29 Apr 2021 11:07:05 -0400
Subject: [PATCH 0710/3804] crypto: qce - Clean up qce_auth_cfg

Remove various redundant checks in qce_auth_cfg. Also allow qce_auth_cfg
to take auth_size as a parameter which is a required setting for ccm(aes)
algorithms

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/common.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
index 7b5bc5a6ae81c..7b3d6caec1b21 100644
--- a/drivers/crypto/qce/common.c
+++ b/drivers/crypto/qce/common.c
@@ -97,11 +97,11 @@ static inline void qce_crypto_go(struct qce_device *qce, bool result_dump)
 }
 
 #ifdef CONFIG_CRYPTO_DEV_QCE_SHA
-static u32 qce_auth_cfg(unsigned long flags, u32 key_size)
+static u32 qce_auth_cfg(unsigned long flags, u32 key_size, u32 auth_size)
 {
 	u32 cfg = 0;
 
-	if (IS_AES(flags) && (IS_CCM(flags) || IS_CMAC(flags)))
+	if (IS_CCM(flags) || IS_CMAC(flags))
 		cfg |= AUTH_ALG_AES << AUTH_ALG_SHIFT;
 	else
 		cfg |= AUTH_ALG_SHA << AUTH_ALG_SHIFT;
@@ -119,15 +119,16 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size)
 		cfg |= AUTH_SIZE_SHA256 << AUTH_SIZE_SHIFT;
 	else if (IS_CMAC(flags))
 		cfg |= AUTH_SIZE_ENUM_16_BYTES << AUTH_SIZE_SHIFT;
+	else if (IS_CCM(flags))
+		cfg |= (auth_size - 1) << AUTH_SIZE_SHIFT;
 
 	if (IS_SHA1(flags) || IS_SHA256(flags))
 		cfg |= AUTH_MODE_HASH << AUTH_MODE_SHIFT;
-	else if (IS_SHA1_HMAC(flags) || IS_SHA256_HMAC(flags) ||
-		 IS_CBC(flags) || IS_CTR(flags))
+	else if (IS_SHA1_HMAC(flags) || IS_SHA256_HMAC(flags))
 		cfg |= AUTH_MODE_HMAC << AUTH_MODE_SHIFT;
-	else if (IS_AES(flags) && IS_CCM(flags))
+	else if (IS_CCM(flags))
 		cfg |= AUTH_MODE_CCM << AUTH_MODE_SHIFT;
-	else if (IS_AES(flags) && IS_CMAC(flags))
+	else if (IS_CMAC(flags))
 		cfg |= AUTH_MODE_CMAC << AUTH_MODE_SHIFT;
 
 	if (IS_SHA(flags) || IS_SHA_HMAC(flags))
@@ -136,10 +137,6 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size)
 	if (IS_CCM(flags))
 		cfg |= QCE_MAX_NONCE_WORDS << AUTH_NONCE_NUM_WORDS_SHIFT;
 
-	if (IS_CBC(flags) || IS_CTR(flags) || IS_CCM(flags) ||
-	    IS_CMAC(flags))
-		cfg |= BIT(AUTH_LAST_SHIFT) | BIT(AUTH_FIRST_SHIFT);
-
 	return cfg;
 }
 
@@ -171,7 +168,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req)
 		qce_clear_array(qce, REG_AUTH_KEY0, 16);
 		qce_clear_array(qce, REG_AUTH_BYTECNT0, 4);
 
-		auth_cfg = qce_auth_cfg(rctx->flags, rctx->authklen);
+		auth_cfg = qce_auth_cfg(rctx->flags, rctx->authklen, digestsize);
 	}
 
 	if (IS_SHA_HMAC(rctx->flags) || IS_CMAC(rctx->flags)) {
@@ -199,7 +196,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req)
 		qce_write_array(qce, REG_AUTH_BYTECNT0,
 				(u32 *)rctx->byte_count, 2);
 
-	auth_cfg = qce_auth_cfg(rctx->flags, 0);
+	auth_cfg = qce_auth_cfg(rctx->flags, 0, digestsize);
 
 	if (rctx->last_blk)
 		auth_cfg |= BIT(AUTH_LAST_SHIFT);
-- 
GitLab


From db0018a8b615e256c90a63d2d5698f2144dde222 Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 29 Apr 2021 11:07:06 -0400
Subject: [PATCH 0711/3804] crypto: qce - Add support for AEAD algorithms

Add register programming sequence for enabling AEAD
algorithms on the Qualcomm crypto engine.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/common.c | 162 +++++++++++++++++++++++++++++++++++-
 1 file changed, 160 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
index 7b3d6caec1b21..7c612ba5068f7 100644
--- a/drivers/crypto/qce/common.c
+++ b/drivers/crypto/qce/common.c
@@ -15,6 +15,7 @@
 #include "core.h"
 #include "regs-v5.h"
 #include "sha.h"
+#include "aead.h"
 
 static inline u32 qce_read(struct qce_device *qce, u32 offset)
 {
@@ -96,7 +97,7 @@ static inline void qce_crypto_go(struct qce_device *qce, bool result_dump)
 		qce_write(qce, REG_GOPROC, BIT(GO_SHIFT));
 }
 
-#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
+#if defined(CONFIG_CRYPTO_DEV_QCE_SHA) || defined(CONFIG_CRYPTO_DEV_QCE_AEAD)
 static u32 qce_auth_cfg(unsigned long flags, u32 key_size, u32 auth_size)
 {
 	u32 cfg = 0;
@@ -139,7 +140,9 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size, u32 auth_size)
 
 	return cfg;
 }
+#endif
 
+#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
 static int qce_setup_regs_ahash(struct crypto_async_request *async_req)
 {
 	struct ahash_request *req = ahash_request_cast(async_req);
@@ -225,7 +228,7 @@ go_proc:
 }
 #endif
 
-#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER
+#if defined(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) || defined(CONFIG_CRYPTO_DEV_QCE_AEAD)
 static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size)
 {
 	u32 cfg = 0;
@@ -271,7 +274,9 @@ static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size)
 
 	return cfg;
 }
+#endif
 
+#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER
 static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize)
 {
 	u8 swap[QCE_AES_IV_LENGTH];
@@ -386,6 +391,155 @@ static int qce_setup_regs_skcipher(struct crypto_async_request *async_req)
 }
 #endif
 
+#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD
+static const u32 std_iv_sha1[SHA256_DIGEST_SIZE / sizeof(u32)] = {
+	SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4, 0, 0, 0
+};
+
+static const u32 std_iv_sha256[SHA256_DIGEST_SIZE / sizeof(u32)] = {
+	SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+	SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7
+};
+
+static unsigned int qce_be32_to_cpu_array(u32 *dst, const u8 *src, unsigned int len)
+{
+	u32 *d = dst;
+	const u8 *s = src;
+	unsigned int n;
+
+	n = len / sizeof(u32);
+	for (; n > 0; n--) {
+		*d = be32_to_cpup((const __be32 *)s);
+		s += sizeof(u32);
+		d++;
+	}
+	return DIV_ROUND_UP(len, sizeof(u32));
+}
+
+static int qce_setup_regs_aead(struct crypto_async_request *async_req)
+{
+	struct aead_request *req = aead_request_cast(async_req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
+	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+	struct qce_device *qce = tmpl->qce;
+	u32 enckey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0};
+	u32 enciv[QCE_MAX_IV_SIZE / sizeof(u32)] = {0};
+	u32 authkey[QCE_SHA_HMAC_KEY_SIZE / sizeof(u32)] = {0};
+	u32 authiv[SHA256_DIGEST_SIZE / sizeof(u32)] = {0};
+	u32 authnonce[QCE_MAX_NONCE / sizeof(u32)] = {0};
+	unsigned int enc_keylen = ctx->enc_keylen;
+	unsigned int auth_keylen = ctx->auth_keylen;
+	unsigned int enc_ivsize = rctx->ivsize;
+	unsigned int auth_ivsize = 0;
+	unsigned int enckey_words, enciv_words;
+	unsigned int authkey_words, authiv_words, authnonce_words;
+	unsigned long flags = rctx->flags;
+	u32 encr_cfg, auth_cfg, config, totallen;
+	u32 iv_last_word;
+
+	qce_setup_config(qce);
+
+	/* Write encryption key */
+	enckey_words = qce_be32_to_cpu_array(enckey, ctx->enc_key, enc_keylen);
+	qce_write_array(qce, REG_ENCR_KEY0, enckey, enckey_words);
+
+	/* Write encryption iv */
+	enciv_words = qce_be32_to_cpu_array(enciv, rctx->iv, enc_ivsize);
+	qce_write_array(qce, REG_CNTR0_IV0, enciv, enciv_words);
+
+	if (IS_CCM(rctx->flags)) {
+		iv_last_word = enciv[enciv_words - 1];
+		qce_write(qce, REG_CNTR3_IV3, iv_last_word + 1);
+		qce_write_array(qce, REG_ENCR_CCM_INT_CNTR0, (u32 *)enciv, enciv_words);
+		qce_write(qce, REG_CNTR_MASK, ~0);
+		qce_write(qce, REG_CNTR_MASK0, ~0);
+		qce_write(qce, REG_CNTR_MASK1, ~0);
+		qce_write(qce, REG_CNTR_MASK2, ~0);
+	}
+
+	/* Clear authentication IV and KEY registers of previous values */
+	qce_clear_array(qce, REG_AUTH_IV0, 16);
+	qce_clear_array(qce, REG_AUTH_KEY0, 16);
+
+	/* Clear byte count */
+	qce_clear_array(qce, REG_AUTH_BYTECNT0, 4);
+
+	/* Write authentication key */
+	authkey_words = qce_be32_to_cpu_array(authkey, ctx->auth_key, auth_keylen);
+	qce_write_array(qce, REG_AUTH_KEY0, (u32 *)authkey, authkey_words);
+
+	/* Write initial authentication IV only for HMAC algorithms */
+	if (IS_SHA_HMAC(rctx->flags)) {
+		/* Write default authentication iv */
+		if (IS_SHA1_HMAC(rctx->flags)) {
+			auth_ivsize = SHA1_DIGEST_SIZE;
+			memcpy(authiv, std_iv_sha1, auth_ivsize);
+		} else if (IS_SHA256_HMAC(rctx->flags)) {
+			auth_ivsize = SHA256_DIGEST_SIZE;
+			memcpy(authiv, std_iv_sha256, auth_ivsize);
+		}
+		authiv_words = auth_ivsize / sizeof(u32);
+		qce_write_array(qce, REG_AUTH_IV0, (u32 *)authiv, authiv_words);
+	} else if (IS_CCM(rctx->flags)) {
+		/* Write nonce for CCM algorithms */
+		authnonce_words = qce_be32_to_cpu_array(authnonce, rctx->ccm_nonce, QCE_MAX_NONCE);
+		qce_write_array(qce, REG_AUTH_INFO_NONCE0, authnonce, authnonce_words);
+	}
+
+	/* Set up ENCR_SEG_CFG */
+	encr_cfg = qce_encr_cfg(flags, enc_keylen);
+	if (IS_ENCRYPT(flags))
+		encr_cfg |= BIT(ENCODE_SHIFT);
+	qce_write(qce, REG_ENCR_SEG_CFG, encr_cfg);
+
+	/* Set up AUTH_SEG_CFG */
+	auth_cfg = qce_auth_cfg(rctx->flags, auth_keylen, ctx->authsize);
+	auth_cfg |= BIT(AUTH_LAST_SHIFT);
+	auth_cfg |= BIT(AUTH_FIRST_SHIFT);
+	if (IS_ENCRYPT(flags)) {
+		if (IS_CCM(rctx->flags))
+			auth_cfg |= AUTH_POS_BEFORE << AUTH_POS_SHIFT;
+		else
+			auth_cfg |= AUTH_POS_AFTER << AUTH_POS_SHIFT;
+	} else {
+		if (IS_CCM(rctx->flags))
+			auth_cfg |= AUTH_POS_AFTER << AUTH_POS_SHIFT;
+		else
+			auth_cfg |= AUTH_POS_BEFORE << AUTH_POS_SHIFT;
+	}
+	qce_write(qce, REG_AUTH_SEG_CFG, auth_cfg);
+
+	totallen = rctx->cryptlen + rctx->assoclen;
+
+	/* Set the encryption size and start offset */
+	if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags))
+		qce_write(qce, REG_ENCR_SEG_SIZE, rctx->cryptlen + ctx->authsize);
+	else
+		qce_write(qce, REG_ENCR_SEG_SIZE, rctx->cryptlen);
+	qce_write(qce, REG_ENCR_SEG_START, rctx->assoclen & 0xffff);
+
+	/* Set the authentication size and start offset */
+	qce_write(qce, REG_AUTH_SEG_SIZE, totallen);
+	qce_write(qce, REG_AUTH_SEG_START, 0);
+
+	/* Write total length */
+	if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags))
+		qce_write(qce, REG_SEG_SIZE, totallen + ctx->authsize);
+	else
+		qce_write(qce, REG_SEG_SIZE, totallen);
+
+	/* get little endianness */
+	config = qce_config_reg(qce, 1);
+	qce_write(qce, REG_CONFIG, config);
+
+	/* Start the process */
+	qce_crypto_go(qce, !IS_CCM(flags));
+
+	return 0;
+}
+#endif
+
 int qce_start(struct crypto_async_request *async_req, u32 type)
 {
 	switch (type) {
@@ -396,6 +550,10 @@ int qce_start(struct crypto_async_request *async_req, u32 type)
 #ifdef CONFIG_CRYPTO_DEV_QCE_SHA
 	case CRYPTO_ALG_TYPE_AHASH:
 		return qce_setup_regs_ahash(async_req);
+#endif
+#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD
+	case CRYPTO_ALG_TYPE_AEAD:
+		return qce_setup_regs_aead(async_req);
 #endif
 	default:
 		return -EINVAL;
-- 
GitLab


From b51dcf05c1e96caccda769f3a60042d77f1a3a7d Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 29 Apr 2021 11:07:07 -0400
Subject: [PATCH 0712/3804] crypto: qce - Schedule fallback aead algorithm

Qualcomm crypto engine does not handle the following scenarios and
will issue an abort. In such cases, pass on the transformation to
a fallback algorithm.

- DES3 algorithms with all three keys same.
- AES192 algorithms.
- 0 length messages.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/aead.c | 64 ++++++++++++++++++++++++++++++++-------
 drivers/crypto/qce/aead.h |  3 ++
 2 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c
index ef66ae21eae36..6d06a19b48e49 100644
--- a/drivers/crypto/qce/aead.c
+++ b/drivers/crypto/qce/aead.c
@@ -512,7 +512,23 @@ static int qce_aead_crypt(struct aead_request *req, int encrypt)
 	/* CE does not handle 0 length messages */
 	if (!rctx->cryptlen) {
 		if (!(IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags)))
-			return -EINVAL;
+			ctx->need_fallback = true;
+	}
+
+	/* If fallback is needed, schedule and exit */
+	if (ctx->need_fallback) {
+		/* Reset need_fallback in case the same ctx is used for another transaction */
+		ctx->need_fallback = false;
+
+		aead_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+		aead_request_set_callback(&rctx->fallback_req, req->base.flags,
+					  req->base.complete, req->base.data);
+		aead_request_set_crypt(&rctx->fallback_req, req->src,
+				       req->dst, req->cryptlen, req->iv);
+		aead_request_set_ad(&rctx->fallback_req, req->assoclen);
+
+		return encrypt ? crypto_aead_encrypt(&rctx->fallback_req) :
+				 crypto_aead_decrypt(&rctx->fallback_req);
 	}
 
 	/*
@@ -553,7 +569,7 @@ static int qce_aead_ccm_setkey(struct crypto_aead *tfm, const u8 *key,
 		memcpy(ctx->ccm4309_salt, key + keylen, QCE_CCM4309_SALT_SIZE);
 	}
 
-	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256)
+	if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256 && keylen != AES_KEYSIZE_192)
 		return -EINVAL;
 
 	ctx->enc_keylen = keylen;
@@ -562,7 +578,12 @@ static int qce_aead_ccm_setkey(struct crypto_aead *tfm, const u8 *key,
 	memcpy(ctx->enc_key, key, keylen);
 	memcpy(ctx->auth_key, key, keylen);
 
-	return 0;
+	if (keylen == AES_KEYSIZE_192)
+		ctx->need_fallback = true;
+
+	return IS_CCM_RFC4309(flags) ?
+		crypto_aead_setkey(ctx->fallback, key, keylen + QCE_CCM4309_SALT_SIZE) :
+		crypto_aead_setkey(ctx->fallback, key, keylen);
 }
 
 static int qce_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen)
@@ -593,20 +614,21 @@ static int qce_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int
 		 * The crypto engine does not support any two keys
 		 * being the same for triple des algorithms. The
 		 * verify_skcipher_des3_key does not check for all the
-		 * below conditions. Return -EINVAL in case any two keys
-		 * are the same. Revisit to see if a fallback cipher
-		 * is needed to handle this condition.
+		 * below conditions. Schedule fallback in this case.
 		 */
 		memcpy(_key, authenc_keys.enckey, DES3_EDE_KEY_SIZE);
 		if (!((_key[0] ^ _key[2]) | (_key[1] ^ _key[3])) ||
 		    !((_key[2] ^ _key[4]) | (_key[3] ^ _key[5])) ||
 		    !((_key[0] ^ _key[4]) | (_key[1] ^ _key[5])))
-			return -EINVAL;
+			ctx->need_fallback = true;
 	} else if (IS_AES(flags)) {
 		/* No random key sizes */
 		if (authenc_keys.enckeylen != AES_KEYSIZE_128 &&
+		    authenc_keys.enckeylen != AES_KEYSIZE_192 &&
 		    authenc_keys.enckeylen != AES_KEYSIZE_256)
 			return -EINVAL;
+		if (authenc_keys.enckeylen == AES_KEYSIZE_192)
+			ctx->need_fallback = true;
 	}
 
 	ctx->enc_keylen = authenc_keys.enckeylen;
@@ -617,7 +639,7 @@ static int qce_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int
 	memset(ctx->auth_key, 0, sizeof(ctx->auth_key));
 	memcpy(ctx->auth_key, authenc_keys.authkey, authenc_keys.authkeylen);
 
-	return 0;
+	return crypto_aead_setkey(ctx->fallback, key, keylen);
 }
 
 static int qce_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
@@ -632,15 +654,33 @@ static int qce_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
 			return -EINVAL;
 	}
 	ctx->authsize = authsize;
-	return 0;
+
+	return crypto_aead_setauthsize(ctx->fallback, authsize);
 }
 
 static int qce_aead_init(struct crypto_aead *tfm)
 {
-	crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx));
+	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+	ctx->need_fallback = false;
+	ctx->fallback = crypto_alloc_aead(crypto_tfm_alg_name(&tfm->base),
+					  0, CRYPTO_ALG_NEED_FALLBACK);
+
+	if (IS_ERR(ctx->fallback))
+		return PTR_ERR(ctx->fallback);
+
+	crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx) +
+				crypto_aead_reqsize(ctx->fallback));
 	return 0;
 }
 
+static void qce_aead_exit(struct crypto_aead *tfm)
+{
+	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+	crypto_free_aead(ctx->fallback);
+}
+
 struct qce_aead_def {
 	unsigned long flags;
 	const char *name;
@@ -738,11 +778,13 @@ static int qce_aead_register_one(const struct qce_aead_def *def, struct qce_devi
 	alg->encrypt			= qce_aead_encrypt;
 	alg->decrypt			= qce_aead_decrypt;
 	alg->init			= qce_aead_init;
+	alg->exit			= qce_aead_exit;
 
 	alg->base.cra_priority		= 300;
 	alg->base.cra_flags		= CRYPTO_ALG_ASYNC |
 					  CRYPTO_ALG_ALLOCATES_MEMORY |
-					  CRYPTO_ALG_KERN_DRIVER_ONLY;
+					  CRYPTO_ALG_KERN_DRIVER_ONLY |
+					  CRYPTO_ALG_NEED_FALLBACK;
 	alg->base.cra_ctxsize		= sizeof(struct qce_aead_ctx);
 	alg->base.cra_alignmask		= 0;
 	alg->base.cra_module		= THIS_MODULE;
diff --git a/drivers/crypto/qce/aead.h b/drivers/crypto/qce/aead.h
index 3d1f2039930b6..efb8477cc0887 100644
--- a/drivers/crypto/qce/aead.h
+++ b/drivers/crypto/qce/aead.h
@@ -19,6 +19,8 @@ struct qce_aead_ctx {
 	unsigned int enc_keylen;
 	unsigned int auth_keylen;
 	unsigned int authsize;
+	bool need_fallback;
+	struct crypto_aead *fallback;
 };
 
 struct qce_aead_reqctx {
@@ -39,6 +41,7 @@ struct qce_aead_reqctx {
 	u8 ccm_nonce[QCE_MAX_NONCE];
 	u8 ccmresult_buf[QCE_BAM_BURST_SIZE];
 	u8 ccm_rfc4309_iv[QCE_MAX_IV_SIZE];
+	struct aead_request fallback_req;
 };
 
 static inline struct qce_alg_template *to_aead_tmpl(struct crypto_aead *tfm)
-- 
GitLab


From 0cdbabf8bb7a6147f5adf37dbc251e92a1bbc2c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Stelmach?= <l.stelmach@samsung.com>
Date: Wed, 5 May 2021 20:29:14 +0200
Subject: [PATCH 0713/3804] hwrng: exynos - Fix runtime PM imbalance on error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pm_runtime_resume_and_get() wraps around pm_runtime_get_sync() and
decrements the runtime PM usage counter in case the latter function
fails and keeps the counter balanced.

Signed-off-by: Łukasz Stelmach <l.stelmach@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/exynos-trng.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c
index 8e1fe3f8dd2df..c8db62bc5ff72 100644
--- a/drivers/char/hw_random/exynos-trng.c
+++ b/drivers/char/hw_random/exynos-trng.c
@@ -132,7 +132,7 @@ static int exynos_trng_probe(struct platform_device *pdev)
 		return PTR_ERR(trng->mem);
 
 	pm_runtime_enable(&pdev->dev);
-	ret = pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Could not get runtime PM.\n");
 		goto err_pm_get;
@@ -165,7 +165,7 @@ err_register:
 	clk_disable_unprepare(trng->clk);
 
 err_clock:
-	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_put_noidle(&pdev->dev);
 
 err_pm_get:
 	pm_runtime_disable(&pdev->dev);
-- 
GitLab


From 9395c58fdddd79cdd3882132cdd04e8ac7ad525f Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:08 +0000
Subject: [PATCH 0714/3804] crypto: ixp4xx - dma_unmap the correct address

Testing ixp4xx_crypto with CONFIG_DMA_API_DEBUG lead to the following error:
DMA-API: platform ixp4xx_crypto.0: device driver tries to free DMA memory it has not allocated [device address=0x0000000000000000] [size=24 bytes]

This is due to dma_unmap using the wrong address.

Fixes: 0d44dc59b2b4 ("crypto: ixp4xx - Fix handling of chained sg buffers")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 0616e369522e9..ed3deaa5ed2b8 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -330,7 +330,7 @@ static void free_buf_chain(struct device *dev, struct buffer_desc *buf,
 
 		buf1 = buf->next;
 		phys1 = buf->phys_next;
-		dma_unmap_single(dev, buf->phys_next, buf->buf_len, buf->dir);
+		dma_unmap_single(dev, buf->phys_addr, buf->buf_len, buf->dir);
 		dma_pool_free(buffer_pool, buf, phys);
 		buf = buf1;
 		phys = phys1;
-- 
GitLab


From e8acf011f2e7e21a7e2fae47cbaa06598e533d40 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:09 +0000
Subject: [PATCH 0715/3804] crypto: ixp4xx - update IV after requests

Crypto selftests fail on ixp4xx since it do not update IV after skcipher
requests.

Fixes: 81bef0150074 ("crypto: ixp4xx - Hardware crypto support for IXP4xx CPUs")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index ed3deaa5ed2b8..f577ee4afd06f 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -149,6 +149,8 @@ struct crypt_ctl {
 struct ablk_ctx {
 	struct buffer_desc *src;
 	struct buffer_desc *dst;
+	u8 iv[MAX_IVLEN];
+	bool encrypt;
 };
 
 struct aead_ctx {
@@ -381,6 +383,20 @@ static void one_packet(dma_addr_t phys)
 	case CTL_FLAG_PERFORM_ABLK: {
 		struct skcipher_request *req = crypt->data.ablk_req;
 		struct ablk_ctx *req_ctx = skcipher_request_ctx(req);
+		struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+		unsigned int ivsize = crypto_skcipher_ivsize(tfm);
+		unsigned int offset;
+
+		if (ivsize > 0) {
+			offset = req->cryptlen - ivsize;
+			if (req_ctx->encrypt) {
+				scatterwalk_map_and_copy(req->iv, req->dst,
+							 offset, ivsize, 0);
+			} else {
+				memcpy(req->iv, req_ctx->iv, ivsize);
+				memzero_explicit(req_ctx->iv, ivsize);
+			}
+		}
 
 		if (req_ctx->dst) {
 			free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
@@ -876,6 +892,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 	struct ablk_ctx *req_ctx = skcipher_request_ctx(req);
 	struct buffer_desc src_hook;
 	struct device *dev = &pdev->dev;
+	unsigned int offset;
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
 				GFP_KERNEL : GFP_ATOMIC;
 
@@ -885,6 +902,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 		return -EAGAIN;
 
 	dir = encrypt ? &ctx->encrypt : &ctx->decrypt;
+	req_ctx->encrypt = encrypt;
 
 	crypt = get_crypt_desc();
 	if (!crypt)
@@ -900,6 +918,10 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 
 	BUG_ON(ivsize && !req->iv);
 	memcpy(crypt->iv, req->iv, ivsize);
+	if (ivsize > 0 && !encrypt) {
+		offset = req->cryptlen - ivsize;
+		scatterwalk_map_and_copy(req_ctx->iv, req->src, offset, ivsize, 0);
+	}
 	if (req->src != req->dst) {
 		struct buffer_desc dst_hook;
 		crypt->mode |= NPE_OP_NOT_IN_PLACE;
-- 
GitLab


From dfb098d692eac2a11a7051dfe87be98cd90da67d Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:10 +0000
Subject: [PATCH 0716/3804] crypto: ixp4xx - fallback when having more than one
 SG

Testing ixp4xx_crypto lead to:
alg: skcipher: ecb(des)-ixp4xx encryption overran dst buffer on test vector 0, cfg="two even aligned splits"

The HW overwrites destination always when sg_nents() > 1.
The problem seems that the HW always write areq->cryptlen bytes on the
last SG.
A comment in driver's code seems to give a clue that multiple SG was not
planned "This was never tested by Intel for more than one dst buffer, I think".
So let's add a fallback for this situation.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig         |  5 +++
 drivers/crypto/ixp4xx_crypto.c | 56 ++++++++++++++++++++++++++++++++--
 2 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 1fe5b7eafc02c..1d5b342e6b424 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -325,6 +325,11 @@ config CRYPTO_DEV_TALITOS2
 config CRYPTO_DEV_IXP4XX
 	tristate "Driver for IXP4xx crypto hardware acceleration"
 	depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE
+	select CRYPTO_AES
+	select CRYPTO_DES
+	select CRYPTO_ECB
+	select CRYPTO_CBC
+	select CRYPTO_CTR
 	select CRYPTO_LIB_DES
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index f577ee4afd06f..8bbf2ead6e791 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -151,6 +151,7 @@ struct ablk_ctx {
 	struct buffer_desc *dst;
 	u8 iv[MAX_IVLEN];
 	bool encrypt;
+	struct skcipher_request fallback_req;   // keep at the end
 };
 
 struct aead_ctx {
@@ -186,6 +187,7 @@ struct ixp_ctx {
 	unsigned salted;
 	atomic_t configuring;
 	struct completion completion;
+	struct crypto_skcipher *fallback_tfm;
 };
 
 struct ixp_alg {
@@ -590,7 +592,23 @@ static int init_tfm(struct crypto_tfm *tfm)
 
 static int init_tfm_ablk(struct crypto_skcipher *tfm)
 {
-	crypto_skcipher_set_reqsize(tfm, sizeof(struct ablk_ctx));
+	struct crypto_tfm *ctfm = crypto_skcipher_tfm(tfm);
+	struct ixp_ctx *ctx = crypto_tfm_ctx(ctfm);
+	const char *name = crypto_tfm_alg_name(ctfm);
+
+	ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ctx->fallback_tfm)) {
+		pr_err("ERROR: Cannot allocate fallback for %s %ld\n",
+			name, PTR_ERR(ctx->fallback_tfm));
+		return PTR_ERR(ctx->fallback_tfm);
+	}
+
+	pr_info("Fallback for %s is %s\n",
+		 crypto_tfm_alg_driver_name(&tfm->base),
+		 crypto_tfm_alg_driver_name(crypto_skcipher_tfm(ctx->fallback_tfm))
+		 );
+
+	crypto_skcipher_set_reqsize(tfm, sizeof(struct ablk_ctx) + crypto_skcipher_reqsize(ctx->fallback_tfm));
 	return init_tfm(crypto_skcipher_tfm(tfm));
 }
 
@@ -609,6 +627,10 @@ static void exit_tfm(struct crypto_tfm *tfm)
 
 static void exit_tfm_ablk(struct crypto_skcipher *tfm)
 {
+	struct crypto_tfm *ctfm = crypto_skcipher_tfm(tfm);
+	struct ixp_ctx *ctx = crypto_tfm_ctx(ctfm);
+
+	crypto_free_skcipher(ctx->fallback_tfm);
 	exit_tfm(crypto_skcipher_tfm(tfm));
 }
 
@@ -854,7 +876,12 @@ static int ablk_setkey(struct crypto_skcipher *tfm, const u8 *key,
 out:
 	if (!atomic_dec_and_test(&ctx->configuring))
 		wait_for_completion(&ctx->completion);
-	return ret;
+	if (ret)
+		return ret;
+	crypto_skcipher_clear_flags(ctx->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(ctx->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+
+	return crypto_skcipher_setkey(ctx->fallback_tfm, key, key_len);
 }
 
 static int ablk_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
@@ -880,6 +907,25 @@ static int ablk_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	return ablk_setkey(tfm, key, key_len);
 }
 
+static int ixp4xx_cipher_fallback(struct skcipher_request *areq, int encrypt)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct ixp_ctx *op = crypto_skcipher_ctx(tfm);
+	struct ablk_ctx *rctx = skcipher_request_ctx(areq);
+	int err;
+
+	skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
+	skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
+				      areq->base.complete, areq->base.data);
+	skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
+				   areq->cryptlen, areq->iv);
+	if (encrypt)
+		err = crypto_skcipher_encrypt(&rctx->fallback_req);
+	else
+		err = crypto_skcipher_decrypt(&rctx->fallback_req);
+	return err;
+}
+
 static int ablk_perform(struct skcipher_request *req, int encrypt)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -896,6 +942,9 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 	gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
 				GFP_KERNEL : GFP_ATOMIC;
 
+	if (sg_nents(req->src) > 1 || sg_nents(req->dst) > 1)
+		return ixp4xx_cipher_fallback(req, encrypt);
+
 	if (qmgr_stat_full(SEND_QID))
 		return -EAGAIN;
 	if (atomic_read(&ctx->configuring))
@@ -1422,7 +1471,8 @@ static int __init ixp_module_init(void)
 		/* block ciphers */
 		cra->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
 				      CRYPTO_ALG_ASYNC |
-				      CRYPTO_ALG_ALLOCATES_MEMORY;
+				      CRYPTO_ALG_ALLOCATES_MEMORY |
+				      CRYPTO_ALG_NEED_FALLBACK;
 		if (!cra->setkey)
 			cra->setkey = ablk_setkey;
 		if (!cra->encrypt)
-- 
GitLab


From 3557084ef47ba79f84325c575cb9a4887c484d36 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:11 +0000
Subject: [PATCH 0717/3804] crypto: ixp4xx - convert unsigned to unsigned int

Fixes all issues reported by checkpatch about "unsigned", lets convert
them to unsigned int.
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 8bbf2ead6e791..17de9e60adadb 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -136,7 +136,7 @@ struct crypt_ctl {
 	u32 crypto_ctx;		/* NPE Crypto Param structure address */
 
 	/* Used by Host: 4*4 bytes*/
-	unsigned ctl_flags;
+	unsigned int ctl_flags;
 	union {
 		struct skcipher_request *ablk_req;
 		struct aead_request *aead_req;
@@ -184,7 +184,7 @@ struct ixp_ctx {
 	u8 enckey[MAX_KEYLEN];
 	u8 salt[MAX_IVLEN];
 	u8 nonce[CTR_RFC3686_NONCE_SIZE];
-	unsigned salted;
+	unsigned int salted;
 	atomic_t configuring;
 	struct completion completion;
 	struct crypto_skcipher *fallback_tfm;
@@ -695,8 +695,8 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
 	return 0;
 }
 
-static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned authsize,
-		const u8 *key, int key_len, unsigned digest_len)
+static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize,
+		      const u8 *key, int key_len, unsigned int digest_len)
 {
 	u32 itarget, otarget, npe_ctx_addr;
 	unsigned char *cinfo;
@@ -823,12 +823,12 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
 }
 
 static struct buffer_desc *chainup_buffers(struct device *dev,
-		struct scatterlist *sg,	unsigned nbytes,
+		struct scatterlist *sg,	unsigned int nbytes,
 		struct buffer_desc *buf, gfp_t flags,
 		enum dma_data_direction dir)
 {
 	for (; nbytes > 0; sg = sg_next(sg)) {
-		unsigned len = min(nbytes, sg->length);
+		unsigned int len = min(nbytes, sg->length);
 		struct buffer_desc *next_buf;
 		dma_addr_t next_buf_phys;
 		void *ptr;
@@ -930,7 +930,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
-	unsigned ivsize = crypto_skcipher_ivsize(tfm);
+	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
 	struct ix_sa_dir *dir;
 	struct crypt_ctl *crypt;
 	unsigned int nbytes = req->cryptlen;
@@ -1045,8 +1045,8 @@ static int aead_perform(struct aead_request *req, int encrypt,
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
-	unsigned ivsize = crypto_aead_ivsize(tfm);
-	unsigned authsize = crypto_aead_authsize(tfm);
+	unsigned int ivsize = crypto_aead_ivsize(tfm);
+	unsigned int authsize = crypto_aead_authsize(tfm);
 	struct ix_sa_dir *dir;
 	struct crypt_ctl *crypt;
 	unsigned int cryptlen;
@@ -1157,7 +1157,7 @@ free_buf_src:
 static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
 {
 	struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
-	unsigned digest_len = crypto_aead_maxauthsize(tfm);
+	unsigned int digest_len = crypto_aead_maxauthsize(tfm);
 	int ret;
 
 	if (!ctx->enckey_len && !ctx->authkey_len)
-- 
GitLab


From f5b82be62ddd7d9be7dbb624b47aec6240c62a38 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:12 +0000
Subject: [PATCH 0718/3804] crypto: ixp4xx - convert all printk to dev_xxx

Convert all old printk to dev_xxx.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 17de9e60adadb..486a388c909f1 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -452,7 +452,7 @@ static int init_ixp_crypto(struct device *dev)
 
 	if (! ( ~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH |
 				IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) {
-		printk(KERN_ERR "ixp_crypto: No HW crypto available\n");
+		dev_err(dev, "ixp_crypto: No HW crypto available\n");
 		return ret;
 	}
 	npe_c = npe_request(NPE_ID);
@@ -475,8 +475,7 @@ static int init_ixp_crypto(struct device *dev)
 
 	switch ((msg[1]>>16) & 0xff) {
 	case 3:
-		printk(KERN_WARNING "Firmware of %s lacks AES support\n",
-				npe_name(npe_c));
+		dev_warn(dev, "Firmware of %s lacks AES support\n", npe_name(npe_c));
 		support_aes = 0;
 		break;
 	case 4:
@@ -484,8 +483,7 @@ static int init_ixp_crypto(struct device *dev)
 		support_aes = 1;
 		break;
 	default:
-		printk(KERN_ERR "Firmware of %s lacks crypto support\n",
-			npe_name(npe_c));
+		dev_err(dev, "Firmware of %s lacks crypto support\n", npe_name(npe_c));
 		ret = -ENODEV;
 		goto npe_release;
 	}
@@ -521,7 +519,7 @@ static int init_ixp_crypto(struct device *dev)
 	return 0;
 
 npe_error:
-	printk(KERN_ERR "%s not responding\n", npe_name(npe_c));
+	dev_err(dev, "%s not responding\n", npe_name(npe_c));
 	ret = -EIO;
 err:
 	dma_pool_destroy(ctx_pool);
@@ -1487,7 +1485,7 @@ static int __init ixp_module_init(void)
 		cra->base.cra_alignmask = 3;
 		cra->base.cra_priority = 300;
 		if (crypto_register_skcipher(cra))
-			printk(KERN_ERR "Failed to register '%s'\n",
+			dev_err(&pdev->dev, "Failed to register '%s'\n",
 				cra->base.cra_name);
 		else
 			ixp4xx_algos[i].registered = 1;
@@ -1520,7 +1518,7 @@ static int __init ixp_module_init(void)
 		cra->base.cra_priority = 300;
 
 		if (crypto_register_aead(cra))
-			printk(KERN_ERR "Failed to register '%s'\n",
+			dev_err(&pdev->dev, "Failed to register '%s'\n",
 				cra->base.cra_driver_name);
 		else
 			ixp4xx_aeads[i].registered = 1;
-- 
GitLab


From 39e39cfb2dc7325714e8f93b77c4acacd5c1ac2e Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:13 +0000
Subject: [PATCH 0719/3804] crypto: ixp4xx - whitespace fixes

Fixes all whitespace issues reported by checkpatch

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c | 43 +++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 486a388c909f1..5b8ffa4db45d7 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -213,6 +213,7 @@ static const struct ix_hash_algo hash_alg_md5 = {
 	.icv		= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
 			  "\xFE\xDC\xBA\x98\x76\x54\x32\x10",
 };
+
 static const struct ix_hash_algo hash_alg_sha1 = {
 	.cfgword	= 0x00000005,
 	.icv		= "\x67\x45\x23\x01\xEF\xCD\xAB\x89\x98\xBA"
@@ -244,12 +245,12 @@ static inline struct crypt_ctl *crypt_phys2virt(dma_addr_t phys)
 
 static inline u32 cipher_cfg_enc(struct crypto_tfm *tfm)
 {
-	return container_of(tfm->__crt_alg, struct ixp_alg,crypto.base)->cfg_enc;
+	return container_of(tfm->__crt_alg, struct ixp_alg, crypto.base)->cfg_enc;
 }
 
 static inline u32 cipher_cfg_dec(struct crypto_tfm *tfm)
 {
-	return container_of(tfm->__crt_alg, struct ixp_alg,crypto.base)->cfg_dec;
+	return container_of(tfm->__crt_alg, struct ixp_alg, crypto.base)->cfg_dec;
 }
 
 static inline const struct ix_hash_algo *ix_hash(struct crypto_tfm *tfm)
@@ -260,6 +261,7 @@ static inline const struct ix_hash_algo *ix_hash(struct crypto_tfm *tfm)
 static int setup_crypt_desc(void)
 {
 	struct device *dev = &pdev->dev;
+
 	BUILD_BUG_ON(sizeof(struct crypt_ctl) != 64);
 	crypt_virt = dma_alloc_coherent(dev,
 					NPE_QLEN * sizeof(struct crypt_ctl),
@@ -290,7 +292,7 @@ static struct crypt_ctl *get_crypt_desc(void)
 			idx = 0;
 		crypt_virt[i].ctl_flags = CTL_FLAG_USED;
 		spin_unlock_irqrestore(&desc_lock, flags);
-		return crypt_virt +i;
+		return crypt_virt + i;
 	} else {
 		spin_unlock_irqrestore(&desc_lock, flags);
 		return NULL;
@@ -318,7 +320,7 @@ static struct crypt_ctl *get_crypt_desc_emerg(void)
 			idx = NPE_QLEN;
 		crypt_virt[i].ctl_flags = CTL_FLAG_USED;
 		spin_unlock_irqrestore(&emerg_lock, flags);
-		return crypt_virt +i;
+		return crypt_virt + i;
 	} else {
 		spin_unlock_irqrestore(&emerg_lock, flags);
 		return NULL;
@@ -417,7 +419,7 @@ static void one_packet(dma_addr_t phys)
 		break;
 	case CTL_FLAG_GEN_REVAES:
 		ctx = crypto_tfm_ctx(crypt->data.tfm);
-		*(u32*)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR);
+		*(u32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR);
 		if (atomic_dec_and_test(&ctx->configuring))
 			complete(&ctx->completion);
 		break;
@@ -436,8 +438,9 @@ static void crypto_done_action(unsigned long arg)
 {
 	int i;
 
-	for(i=0; i<4; i++) {
+	for (i = 0; i < 4; i++) {
 		dma_addr_t phys = qmgr_get_entry(RECV_QID);
+
 		if (!phys)
 			return;
 		one_packet(phys);
@@ -473,7 +476,7 @@ static int init_ixp_crypto(struct device *dev)
 			goto npe_error;
 	}
 
-	switch ((msg[1]>>16) & 0xff) {
+	switch ((msg[1] >> 16) & 0xff) {
 	case 3:
 		dev_warn(dev, "Firmware of %s lacks AES support\n", npe_name(npe_c));
 		support_aes = 0;
@@ -619,6 +622,7 @@ static int init_tfm_aead(struct crypto_aead *tfm)
 static void exit_tfm(struct crypto_tfm *tfm)
 {
 	struct ixp_ctx *ctx = crypto_tfm_ctx(tfm);
+
 	free_sa_dir(&ctx->encrypt);
 	free_sa_dir(&ctx->decrypt);
 }
@@ -709,11 +713,11 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize
 	algo = ix_hash(tfm);
 
 	/* write cfg word to cryptinfo */
-	cfgword = algo->cfgword | ( authsize << 6); /* (authsize/4) << 8 */
+	cfgword = algo->cfgword | (authsize << 6); /* (authsize/4) << 8 */
 #ifndef __ARMEB__
 	cfgword ^= 0xAA000000; /* change the "byte swap" flags */
 #endif
-	*(u32*)cinfo = cpu_to_be32(cfgword);
+	*(u32 *)cinfo = cpu_to_be32(cfgword);
 	cinfo += sizeof(cfgword);
 
 	/* write ICV to cryptinfo */
@@ -750,7 +754,7 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm)
 	if (!crypt) {
 		return -EAGAIN;
 	}
-	*(u32*)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR);
+	*(u32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR);
 
 	crypt->data.tfm = tfm;
 	crypt->crypt_offs = 0;
@@ -802,21 +806,21 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
 			return err;
 	}
 	/* write cfg word to cryptinfo */
-	*(u32*)cinfo = cpu_to_be32(cipher_cfg);
+	*(u32 *)cinfo = cpu_to_be32(cipher_cfg);
 	cinfo += sizeof(cipher_cfg);
 
 	/* write cipher key to cryptinfo */
 	memcpy(cinfo, key, key_len);
 	/* NPE wants keylen set to DES3_EDE_KEY_SIZE even for single DES */
 	if (key_len < DES3_EDE_KEY_SIZE && !(cipher_cfg & MOD_AES)) {
-		memset(cinfo + key_len, 0, DES3_EDE_KEY_SIZE -key_len);
+		memset(cinfo + key_len, 0, DES3_EDE_KEY_SIZE - key_len);
 		key_len = DES3_EDE_KEY_SIZE;
 	}
 	dir->npe_ctx_idx = sizeof(cipher_cfg) + key_len;
 	dir->npe_mode |= NPE_OP_CRYPT_ENABLE;
-	if ((cipher_cfg & MOD_AES) && !encrypt) {
+	if ((cipher_cfg & MOD_AES) && !encrypt)
 		return gen_rev_aes_key(tfm);
-	}
+
 	return 0;
 }
 
@@ -971,6 +975,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 	}
 	if (req->src != req->dst) {
 		struct buffer_desc dst_hook;
+
 		crypt->mode |= NPE_OP_NOT_IN_PLACE;
 		/* This was never tested by Intel
 		 * for more than one dst buffer, I think. */
@@ -1025,7 +1030,7 @@ static int ablk_rfc3686_crypt(struct skcipher_request *req)
 	int ret;
 
 	/* set up counter block */
-        memcpy(iv, ctx->nonce, CTR_RFC3686_NONCE_SIZE);
+	memcpy(iv, ctx->nonce, CTR_RFC3686_NONCE_SIZE);
 	memcpy(iv + CTR_RFC3686_NONCE_SIZE, info, CTR_RFC3686_IV_SIZE);
 
 	/* initialize counter portion of counter block */
@@ -1067,7 +1072,7 @@ static int aead_perform(struct aead_request *req, int encrypt,
 	} else {
 		dir = &ctx->decrypt;
 		/* req->cryptlen includes the authsize when decrypting */
-		cryptlen = req->cryptlen -authsize;
+		cryptlen = req->cryptlen - authsize;
 		eff_cryptlen -= authsize;
 	}
 	crypt = get_crypt_desc();
@@ -1188,7 +1193,7 @@ static int aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
 {
 	int max = crypto_aead_maxauthsize(tfm) >> 2;
 
-	if ((authsize>>2) < 1 || (authsize>>2) > max || (authsize & 3))
+	if ((authsize >> 2) < 1 || (authsize >> 2) > max || (authsize & 3))
 		return -EINVAL;
 	return aead_setup(tfm, authsize);
 }
@@ -1453,7 +1458,7 @@ static int __init ixp_module_init(void)
 		platform_device_unregister(pdev);
 		return err;
 	}
-	for (i=0; i< num; i++) {
+	for (i = 0; i < num; i++) {
 		struct skcipher_alg *cra = &ixp4xx_algos[i].crypto;
 
 		if (snprintf(cra->base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
@@ -1536,7 +1541,7 @@ static void __exit ixp_module_exit(void)
 			crypto_unregister_aead(&ixp4xx_aeads[i].crypto);
 	}
 
-	for (i=0; i< num; i++) {
+	for (i = 0; i < num; i++) {
 		if (ixp4xx_algos[i].registered)
 			crypto_unregister_skcipher(&ixp4xx_algos[i].crypto);
 	}
-- 
GitLab


From 87d11a5e9621d2dd9edaee007b339e3afbfcf2ee Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:14 +0000
Subject: [PATCH 0720/3804] crypto: ixp4xx - Do not initialize static to NULL

This patch fixes all checkpatch report about static init.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 5b8ffa4db45d7..954696a398750 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -221,10 +221,10 @@ static const struct ix_hash_algo hash_alg_sha1 = {
 };
 
 static struct npe *npe_c;
-static struct dma_pool *buffer_pool = NULL;
-static struct dma_pool *ctx_pool = NULL;
+static struct dma_pool *buffer_pool;
+static struct dma_pool *ctx_pool;
 
-static struct crypt_ctl *crypt_virt = NULL;
+static struct crypt_ctl *crypt_virt;
 static dma_addr_t crypt_phys;
 
 static int support_aes = 1;
@@ -275,7 +275,7 @@ static DEFINE_SPINLOCK(desc_lock);
 static struct crypt_ctl *get_crypt_desc(void)
 {
 	int i;
-	static int idx = 0;
+	static int idx;
 	unsigned long flags;
 
 	spin_lock_irqsave(&desc_lock, flags);
-- 
GitLab


From ffb017e9ac66d3e4f368f556d13da79f80611997 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:15 +0000
Subject: [PATCH 0721/3804] crypto: ixp4xx - remove brackets from single
 statement

fixes all single statement issues reported by checkpatch

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c | 47 +++++++++++++++-------------------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 954696a398750..03ae9c3a8d97f 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -378,9 +378,9 @@ static void one_packet(dma_addr_t phys)
 
 		free_buf_chain(dev, req_ctx->src, crypt->src_buf);
 		free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
-		if (req_ctx->hmac_virt) {
+		if (req_ctx->hmac_virt)
 			finish_scattered_hmac(crypt);
-		}
+
 		req->base.complete(&req->base, failed);
 		break;
 	}
@@ -402,9 +402,9 @@ static void one_packet(dma_addr_t phys)
 			}
 		}
 
-		if (req_ctx->dst) {
+		if (req_ctx->dst)
 			free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
-		}
+
 		free_buf_chain(dev, req_ctx->src, crypt->src_buf);
 		req->base.complete(&req->base, failed);
 		break;
@@ -497,14 +497,14 @@ static int init_ixp_crypto(struct device *dev)
 	buffer_pool = dma_pool_create("buffer", dev,
 			sizeof(struct buffer_desc), 32, 0);
 	ret = -ENOMEM;
-	if (!buffer_pool) {
+	if (!buffer_pool)
 		goto err;
-	}
+
 	ctx_pool = dma_pool_create("context", dev,
 			NPE_CTX_LEN, 16, 0);
-	if (!ctx_pool) {
+	if (!ctx_pool)
 		goto err;
-	}
+
 	ret = qmgr_request_queue(SEND_QID, NPE_QLEN_TOTAL, 0, 0,
 				 "ixp_crypto:out", NULL);
 	if (ret)
@@ -545,11 +545,10 @@ static void release_ixp_crypto(struct device *dev)
 
 	npe_release(npe_c);
 
-	if (crypt_virt) {
+	if (crypt_virt)
 		dma_free_coherent(dev,
 			NPE_QLEN * sizeof(struct crypt_ctl),
 			crypt_virt, crypt_phys);
-	}
 }
 
 static void reset_sa_dir(struct ix_sa_dir *dir)
@@ -562,9 +561,9 @@ static void reset_sa_dir(struct ix_sa_dir *dir)
 static int init_sa_dir(struct ix_sa_dir *dir)
 {
 	dir->npe_ctx = dma_pool_alloc(ctx_pool, GFP_KERNEL, &dir->npe_ctx_phys);
-	if (!dir->npe_ctx) {
+	if (!dir->npe_ctx)
 		return -ENOMEM;
-	}
+
 	reset_sa_dir(dir);
 	return 0;
 }
@@ -585,9 +584,9 @@ static int init_tfm(struct crypto_tfm *tfm)
 	if (ret)
 		return ret;
 	ret = init_sa_dir(&ctx->decrypt);
-	if (ret) {
+	if (ret)
 		free_sa_dir(&ctx->encrypt);
-	}
+
 	return ret;
 }
 
@@ -669,9 +668,8 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
 
 	memcpy(pad, key, key_len);
 	memset(pad + key_len, 0, HMAC_PAD_BLOCKLEN - key_len);
-	for (i = 0; i < HMAC_PAD_BLOCKLEN; i++) {
+	for (i = 0; i < HMAC_PAD_BLOCKLEN; i++)
 		pad[i] ^= xpad;
-	}
 
 	crypt->data.tfm = tfm;
 	crypt->regist_ptr = pad;
@@ -751,9 +749,9 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm)
 	struct ix_sa_dir *dir = &ctx->decrypt;
 
 	crypt = get_crypt_desc_emerg();
-	if (!crypt) {
+	if (!crypt)
 		return -EAGAIN;
-	}
+
 	*(u32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR);
 
 	crypt->data.tfm = tfm;
@@ -1004,9 +1002,9 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 free_buf_src:
 	free_buf_chain(dev, req_ctx->src, crypt->src_buf);
 free_buf_dest:
-	if (req->src != req->dst) {
+	if (req->src != req->dst)
 		free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
-	}
+
 	crypt->ctl_flags = CTL_FLAG_UNUSED;
 	return -ENOMEM;
 }
@@ -1462,14 +1460,11 @@ static int __init ixp_module_init(void)
 		struct skcipher_alg *cra = &ixp4xx_algos[i].crypto;
 
 		if (snprintf(cra->base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-			"%s"IXP_POSTFIX, cra->base.cra_name) >=
-			CRYPTO_MAX_ALG_NAME)
-		{
+			     "%s"IXP_POSTFIX, cra->base.cra_name) >=
+			     CRYPTO_MAX_ALG_NAME)
 			continue;
-		}
-		if (!support_aes && (ixp4xx_algos[i].cfg_enc & MOD_AES)) {
+		if (!support_aes && (ixp4xx_algos[i].cfg_enc & MOD_AES))
 			continue;
-		}
 
 		/* block ciphers */
 		cra->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
-- 
GitLab


From c5e070311fab7aa8398f67b97d2a452d9eb1a112 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:16 +0000
Subject: [PATCH 0722/3804] crypto: ixp4xx - Correct functions alignment

This patch fixes all alignment issues reported by checkpatch.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c | 65 ++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 03ae9c3a8d97f..b38650b0fea10 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -354,8 +354,8 @@ static void finish_scattered_hmac(struct crypt_ctl *crypt)
 	int decryptlen = req->assoclen + req->cryptlen - authsize;
 
 	if (req_ctx->encrypt) {
-		scatterwalk_map_and_copy(req_ctx->hmac_virt,
-			req->dst, decryptlen, authsize, 1);
+		scatterwalk_map_and_copy(req_ctx->hmac_virt, req->dst,
+					 decryptlen, authsize, 1);
 	}
 	dma_pool_free(buffer_pool, req_ctx->hmac_virt, crypt->icv_rev_aes);
 }
@@ -412,7 +412,7 @@ static void one_packet(dma_addr_t phys)
 	case CTL_FLAG_GEN_ICV:
 		ctx = crypto_tfm_ctx(crypt->data.tfm);
 		dma_pool_free(ctx_pool, crypt->regist_ptr,
-				crypt->regist_buf->phys_addr);
+			      crypt->regist_buf->phys_addr);
 		dma_pool_free(buffer_pool, crypt->regist_buf, crypt->src_buf);
 		if (atomic_dec_and_test(&ctx->configuring))
 			complete(&ctx->completion);
@@ -494,14 +494,13 @@ static int init_ixp_crypto(struct device *dev)
 	 * so assure it is large enough
 	 */
 	BUILD_BUG_ON(SHA1_DIGEST_SIZE > sizeof(struct buffer_desc));
-	buffer_pool = dma_pool_create("buffer", dev,
-			sizeof(struct buffer_desc), 32, 0);
+	buffer_pool = dma_pool_create("buffer", dev, sizeof(struct buffer_desc),
+				      32, 0);
 	ret = -ENOMEM;
 	if (!buffer_pool)
 		goto err;
 
-	ctx_pool = dma_pool_create("context", dev,
-			NPE_CTX_LEN, 16, 0);
+	ctx_pool = dma_pool_create("context", dev, NPE_CTX_LEN, 16, 0);
 	if (!ctx_pool)
 		goto err;
 
@@ -546,9 +545,8 @@ static void release_ixp_crypto(struct device *dev)
 	npe_release(npe_c);
 
 	if (crypt_virt)
-		dma_free_coherent(dev,
-			NPE_QLEN * sizeof(struct crypt_ctl),
-			crypt_virt, crypt_phys);
+		dma_free_coherent(dev, NPE_QLEN * sizeof(struct crypt_ctl),
+				  crypt_virt, crypt_phys);
 }
 
 static void reset_sa_dir(struct ix_sa_dir *dir)
@@ -641,7 +639,8 @@ static void exit_tfm_aead(struct crypto_aead *tfm)
 }
 
 static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
-		int init_len, u32 ctx_addr, const u8 *key, int key_len)
+			      int init_len, u32 ctx_addr, const u8 *key,
+			      int key_len)
 {
 	struct ixp_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct crypt_ctl *crypt;
@@ -735,11 +734,11 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize
 		dir->npe_mode |= NPE_OP_HASH_VERIFY;
 
 	ret = register_chain_var(tfm, HMAC_OPAD_VALUE, otarget,
-			init_len, npe_ctx_addr, key, key_len);
+				 init_len, npe_ctx_addr, key, key_len);
 	if (ret)
 		return ret;
 	return register_chain_var(tfm, HMAC_IPAD_VALUE, itarget,
-			init_len, npe_ctx_addr, key, key_len);
+				  init_len, npe_ctx_addr, key, key_len);
 }
 
 static int gen_rev_aes_key(struct crypto_tfm *tfm)
@@ -770,8 +769,8 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm)
 	return 0;
 }
 
-static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
-		const u8 *key, int key_len)
+static int setup_cipher(struct crypto_tfm *tfm, int encrypt, const u8 *key,
+			int key_len)
 {
 	u8 *cinfo;
 	u32 cipher_cfg;
@@ -791,9 +790,15 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
 	}
 	if (cipher_cfg & MOD_AES) {
 		switch (key_len) {
-		case 16: keylen_cfg = MOD_AES128; break;
-		case 24: keylen_cfg = MOD_AES192; break;
-		case 32: keylen_cfg = MOD_AES256; break;
+		case 16:
+			keylen_cfg = MOD_AES128;
+			break;
+		case 24:
+			keylen_cfg = MOD_AES192;
+			break;
+		case 32:
+			keylen_cfg = MOD_AES256;
+			break;
 		default:
 			return -EINVAL;
 		}
@@ -855,7 +860,7 @@ static struct buffer_desc *chainup_buffers(struct device *dev,
 }
 
 static int ablk_setkey(struct crypto_skcipher *tfm, const u8 *key,
-			unsigned int key_len)
+		       unsigned int key_len)
 {
 	struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int ret;
@@ -892,7 +897,7 @@ static int ablk_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 }
 
 static int ablk_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key,
-		unsigned int key_len)
+			       unsigned int key_len)
 {
 	struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
 
@@ -901,7 +906,7 @@ static int ablk_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		return -EINVAL;
 
 	memcpy(ctx->nonce, key + (key_len - CTR_RFC3686_NONCE_SIZE),
-			CTR_RFC3686_NONCE_SIZE);
+	       CTR_RFC3686_NONCE_SIZE);
 
 	key_len -= CTR_RFC3686_NONCE_SIZE;
 	return ablk_setkey(tfm, key, key_len);
@@ -979,7 +984,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 		 * for more than one dst buffer, I think. */
 		req_ctx->dst = NULL;
 		if (!chainup_buffers(dev, req->dst, nbytes, &dst_hook,
-					flags, DMA_FROM_DEVICE))
+				     flags, DMA_FROM_DEVICE))
 			goto free_buf_dest;
 		src_direction = DMA_TO_DEVICE;
 		req_ctx->dst = dst_hook.next;
@@ -988,8 +993,8 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 		req_ctx->dst = NULL;
 	}
 	req_ctx->src = NULL;
-	if (!chainup_buffers(dev, req->src, nbytes, &src_hook,
-				flags, src_direction))
+	if (!chainup_buffers(dev, req->src, nbytes, &src_hook, flags,
+			     src_direction))
 		goto free_buf_src;
 
 	req_ctx->src = src_hook.next;
@@ -1042,7 +1047,7 @@ static int ablk_rfc3686_crypt(struct skcipher_request *req)
 }
 
 static int aead_perform(struct aead_request *req, int encrypt,
-		int cryptoffset, int eff_cryptlen, u8 *iv)
+			int cryptoffset, int eff_cryptlen, u8 *iv)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
@@ -1130,12 +1135,12 @@ static int aead_perform(struct aead_request *req, int encrypt,
 		/* The 12 hmac bytes are scattered,
 		 * we need to copy them into a safe buffer */
 		req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags,
-				&crypt->icv_rev_aes);
+						    &crypt->icv_rev_aes);
 		if (unlikely(!req_ctx->hmac_virt))
 			goto free_buf_dst;
 		if (!encrypt) {
 			scatterwalk_map_and_copy(req_ctx->hmac_virt,
-				req->src, cryptlen, authsize, 0);
+						 req->src, cryptlen, authsize, 0);
 		}
 		req_ctx->encrypt = encrypt;
 	} else {
@@ -1176,11 +1181,11 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
 	if (ret)
 		goto out;
 	ret = setup_auth(&tfm->base, 0, authsize, ctx->authkey,
-			ctx->authkey_len, digest_len);
+			 ctx->authkey_len, digest_len);
 	if (ret)
 		goto out;
 	ret = setup_auth(&tfm->base, 1, authsize,  ctx->authkey,
-			ctx->authkey_len, digest_len);
+			 ctx->authkey_len, digest_len);
 out:
 	if (!atomic_dec_and_test(&ctx->configuring))
 		wait_for_completion(&ctx->completion);
@@ -1197,7 +1202,7 @@ static int aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
 }
 
 static int aead_setkey(struct crypto_aead *tfm, const u8 *key,
-			unsigned int keylen)
+		       unsigned int keylen)
 {
 	struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
 	struct crypto_authenc_keys keys;
-- 
GitLab


From 9ca04a51a7e0b08b0e402ddc65acba00678a91d7 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:17 +0000
Subject: [PATCH 0723/3804] MAINTAINERS: add ixp4xx_crypto to the right arch
 list

drivers/crypto/ixp4xx_crypto.c is missing in the IXP4XX arch file list.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..75885258fae39 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1972,6 +1972,7 @@ F:	Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt
 F:	Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml
 F:	arch/arm/mach-ixp4xx/
 F:	drivers/clocksource/timer-ixp4xx.c
+F:	drivers/crypto/ixp4xx_crypto.c
 F:	drivers/gpio/gpio-ixp4xx.c
 F:	drivers/irqchip/irq-ixp4xx.c
 F:	include/linux/irqchip/irq-ixp4xx.h
-- 
GitLab


From 653fdbbf2d2006322b73dfa50add020625947a60 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 5 May 2021 20:26:18 +0000
Subject: [PATCH 0724/3804] MAINTAINERS: add myself as maintainer of
 ixp4xx_crypto

No maintainer exists for ixp4xx_crypto, since I have access to a board
with it, I propose to maintain it.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 75885258fae39..6df5a401ff92f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9236,6 +9236,12 @@ F:	Documentation/admin-guide/media/ipu3_rcb.svg
 F:	Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst
 F:	drivers/staging/media/ipu3/
 
+INTEL IXP4XX CRYPTO SUPPORT
+M:	Corentin Labbe <clabbe@baylibre.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/ixp4xx_crypto.c
+
 INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT
 M:	Krzysztof Halasa <khalasa@piap.pl>
 S:	Maintained
-- 
GitLab


From 3c995c4c7575b7b248d16e765fe05c01795fcd14 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 7 May 2021 17:56:57 +0800
Subject: [PATCH 0725/3804] crypto: cavium/nitrox - Remove redundant
 initialization of 'sg'

Pointer 'sg' is being initialized however this value is never
read as 'sg' is assigned a same value in for_each_sg().
Remove the redundant assignment.

Cleans up clang warning:

drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:161:22: warning: Value
stored to 'sg' during its initialization is never read
[clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
index df95ba26b4141..bc35d4cc41b68 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -159,7 +159,7 @@ static int dma_map_inbufs(struct nitrox_softreq *sr,
 			  struct se_crypto_request *req)
 {
 	struct device *dev = DEV(sr->ndev);
-	struct scatterlist *sg = req->src;
+	struct scatterlist *sg;
 	int i, nents, ret = 0;
 
 	nents = dma_map_sg(dev, req->src, sg_nents(req->src),
-- 
GitLab


From eb9e492f5c06fe197550e68973f88cba6e14274a Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 7 May 2021 17:58:07 +0800
Subject: [PATCH 0726/3804] crypto: cavium/nitrox - Fix kernel-doc

Fix function name in nitrox_reqmgr.c kernel-doc comment
to remove a warning.

drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:382: warning: expecting
prototype for nitrox_se_request(). Prototype was for
nitrox_process_se_request() instead

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
index bc35d4cc41b68..4434c92d6229f 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -369,7 +369,7 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr)
 }
 
 /**
- * nitrox_se_request - Send request to SE core
+ * nitrox_process_se_request - Send request to SE core
  * @ndev: NITROX device
  * @req: Crypto request
  *
-- 
GitLab


From 06676aa1f455c74e3ad1624cea3acb9ed2ef71ae Mon Sep 17 00:00:00 2001
From: Bixuan Cui <cuibixuan@huawei.com>
Date: Sat, 8 May 2021 11:14:55 +0800
Subject: [PATCH 0727/3804] crypto: nx - add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Bixuan Cui <cuibixuan@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/nx/nx-842-pseries.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index cc8dd3072b8b7..8ee547ee378ec 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -1069,6 +1069,7 @@ static const struct vio_device_id nx842_vio_driver_ids[] = {
 	{"ibm,compression-v1", "ibm,compression"},
 	{"", ""},
 };
+MODULE_DEVICE_TABLE(vio, nx842_vio_driver_ids);
 
 static struct vio_driver nx842_vio_driver = {
 	.name = KBUILD_MODNAME,
-- 
GitLab


From b01360384009ab066940b45f34880991ea7ccbfb Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 15:00:49 +0800
Subject: [PATCH 0728/3804] crypto: ux500 - Fix error return code in
 hash_hw_final()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 8a63b1994c50 ("crypto: ux500 - Add driver for HASH hardware")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ux500/hash/hash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index ecb7412e84e3e..51a6e1a424349 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -1011,6 +1011,7 @@ static int hash_hw_final(struct ahash_request *req)
 			goto out;
 		}
 	} else if (req->nbytes == 0 && ctx->keylen > 0) {
+		ret = -EPERM;
 		dev_err(device_data->dev, "%s: Empty message with keylength > 0, NOT supported\n",
 			__func__);
 		goto out;
-- 
GitLab


From 156ed0215ef365604f2382d5164c36d3a1cfd98f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20=C3=85gren?= <martin.agren@gmail.com>
Date: Thu, 22 Apr 2021 21:22:40 +0200
Subject: [PATCH 0729/3804] uio/uio_pci_generic: fix return value changed in
 refactoring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit ef84928cff58 ("uio/uio_pci_generic: use device-managed function
equivalents") was able to simplify various error paths thanks to no
longer having to clean up on the way out. Some error paths were dropped,
others were simplified. In one of those simplifications, the return
value was accidentally changed from -ENODEV to -ENOMEM. Restore the old
return value.

Fixes: ef84928cff58 ("uio/uio_pci_generic: use device-managed function equivalents")
Cc: stable <stable@vger.kernel.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Martin Ågren <martin.agren@gmail.com>
Link: https://lore.kernel.org/r/20210422192240.1136373-1-martin.agren@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_pci_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
index c7d681fef198d..3bb0b00754679 100644
--- a/drivers/uio/uio_pci_generic.c
+++ b/drivers/uio/uio_pci_generic.c
@@ -82,7 +82,7 @@ static int probe(struct pci_dev *pdev,
 	}
 
 	if (pdev->irq && !pci_intx_mask_supported(pdev))
-		return -ENOMEM;
+		return -ENODEV;
 
 	gdev = devm_kzalloc(&pdev->dev, sizeof(struct uio_pci_generic_dev), GFP_KERNEL);
 	if (!gdev)
-- 
GitLab


From 3ee098f96b8b6c1a98f7f97915f8873164e6af9d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 9 May 2021 09:13:03 +0200
Subject: [PATCH 0730/3804] uio_hv_generic: Fix a memory leak in error handling
 paths

If 'vmbus_establish_gpadl()' fails, the (recv|send)_gpadl will not be
updated and 'hv_uio_cleanup()' in the error handling path will not be
able to free the corresponding buffer.

In such a case, we need to free the buffer explicitly.

Fixes: cdfa835c6e5e ("uio_hv_generic: defer opening vmbus until first use")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/4fdaff557deef6f0475d02ba7922ddbaa1ab08a6.1620544055.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_hv_generic.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 0330ba99730e2..eebc399f2cc7d 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -296,8 +296,10 @@ hv_uio_probe(struct hv_device *dev,
 
 	ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
 				    RECV_BUFFER_SIZE, &pdata->recv_gpadl);
-	if (ret)
+	if (ret) {
+		vfree(pdata->recv_buf);
 		goto fail_close;
+	}
 
 	/* put Global Physical Address Label in name */
 	snprintf(pdata->recv_name, sizeof(pdata->recv_name),
@@ -316,8 +318,10 @@ hv_uio_probe(struct hv_device *dev,
 
 	ret = vmbus_establish_gpadl(channel, pdata->send_buf,
 				    SEND_BUFFER_SIZE, &pdata->send_gpadl);
-	if (ret)
+	if (ret) {
+		vfree(pdata->send_buf);
 		goto fail_close;
+	}
 
 	snprintf(pdata->send_name, sizeof(pdata->send_name),
 		 "send:%u", pdata->send_gpadl);
-- 
GitLab


From 0b0226be3a52dadd965644bc52a807961c2c26df Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 9 May 2021 09:13:12 +0200
Subject: [PATCH 0731/3804] uio_hv_generic: Fix another memory leak in error
 handling paths

Memory allocated by 'vmbus_alloc_ring()' at the beginning of the probe
function is never freed in the error handling path.

Add the missing 'vmbus_free_ring()' call.

Note that it is already freed in the .remove function.

Fixes: cdfa835c6e5e ("uio_hv_generic: defer opening vmbus until first use")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/0d86027b8eeed8e6360bc3d52bcdb328ff9bdca1.1620544055.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_hv_generic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index eebc399f2cc7d..652fe25475878 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -291,7 +291,7 @@ hv_uio_probe(struct hv_device *dev,
 	pdata->recv_buf = vzalloc(RECV_BUFFER_SIZE);
 	if (pdata->recv_buf == NULL) {
 		ret = -ENOMEM;
-		goto fail_close;
+		goto fail_free_ring;
 	}
 
 	ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
@@ -351,6 +351,8 @@ hv_uio_probe(struct hv_device *dev,
 
 fail_close:
 	hv_uio_cleanup(dev, pdata);
+fail_free_ring:
+	vmbus_free_ring(dev->channel);
 
 	return ret;
 }
-- 
GitLab


From 2962484dfef8dbb7f9059822bc26ce8a04d0e47c Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Tue, 20 Apr 2021 21:30:50 +0800
Subject: [PATCH 0732/3804] misc: eeprom: at24: check suspend status before
 disable regulator

cd5676db0574 ("misc: eeprom: at24: support pm_runtime control") disables
regulator in runtime suspend. If runtime suspend is called before
regulator disable, it will results in regulator unbalanced disabling.

Fixes: cd5676db0574 ("misc: eeprom: at24: support pm_runtime control")
Cc: stable <stable@vger.kernel.org>
Acked-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Link: https://lore.kernel.org/r/20210420133050.377209-1-hsinyi@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/eeprom/at24.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 926408b41270c..7a6f01ace78ac 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -763,7 +763,8 @@ static int at24_probe(struct i2c_client *client)
 	at24->nvmem = devm_nvmem_register(dev, &nvmem_config);
 	if (IS_ERR(at24->nvmem)) {
 		pm_runtime_disable(dev);
-		regulator_disable(at24->vcc_reg);
+		if (!pm_runtime_status_suspended(dev))
+			regulator_disable(at24->vcc_reg);
 		return PTR_ERR(at24->nvmem);
 	}
 
@@ -774,7 +775,8 @@ static int at24_probe(struct i2c_client *client)
 	err = at24_read(at24, 0, &test_byte, 1);
 	if (err) {
 		pm_runtime_disable(dev);
-		regulator_disable(at24->vcc_reg);
+		if (!pm_runtime_status_suspended(dev))
+			regulator_disable(at24->vcc_reg);
 		return -ENODEV;
 	}
 
-- 
GitLab


From 447c19f3b5074409c794b350b10306e1da1ef4ba Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 14 May 2021 12:02:50 +0100
Subject: [PATCH 0733/3804] io_uring: fix ltout double free on completion race

Always remove linked timeout on io_link_timeout_fn() from the master
request link list, otherwise we may get use-after-free when first
io_link_timeout_fn() puts linked timeout in the fail path, and then
will be found and put on master's free.

Cc: stable@vger.kernel.org # 5.10+
Fixes: 90cd7e424969d ("io_uring: track link timeout's master explicitly")
Reported-and-tested-by: syzbot+5a864149dd970b546223@syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/69c46bf6ce37fec4fdcd98f0882e18eb07ce693a.1620990121.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9ac5e278a91e6..599102cc6dfc2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6354,10 +6354,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
 	 * We don't expect the list to be empty, that will only happen if we
 	 * race with the completion of the linked work.
 	 */
-	if (prev && req_ref_inc_not_zero(prev))
+	if (prev) {
 		io_remove_next_linked(prev);
-	else
-		prev = NULL;
+		if (!req_ref_inc_not_zero(prev))
+			prev = NULL;
+	}
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
 	if (prev) {
-- 
GitLab


From 2d74d0421e5afc1e7be7167ffb7eb8b2cf32343a Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 14 May 2021 12:05:46 +0100
Subject: [PATCH 0734/3804] io_uring: further remove sqpoll limits on opcodes

There are three types of requests that left disabled for sqpoll, namely
epoll ctx, statx, and resources update. Since SQPOLL task is now closely
mimics a userspace thread, remove the restrictions.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/909b52d70c45636d8d7897582474ea5aab5eed34.1620990306.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 599102cc6dfc2..29ec5b28c73dc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4035,7 +4035,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
 #if defined(CONFIG_EPOLL)
 	if (sqe->ioprio || sqe->buf_index)
 		return -EINVAL;
-	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 
 	req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -4150,7 +4150,7 @@ static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
 
 static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 	if (sqe->ioprio || sqe->buf_index)
 		return -EINVAL;
@@ -5827,8 +5827,6 @@ done:
 static int io_rsrc_update_prep(struct io_kiocb *req,
 				const struct io_uring_sqe *sqe)
 {
-	if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
-		return -EINVAL;
 	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
 		return -EINVAL;
 	if (sqe->ioprio || sqe->rw_flags)
-- 
GitLab


From 489809e2e22b3dedc0737163d97eb2b574137b42 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 14 May 2021 12:06:44 +0100
Subject: [PATCH 0735/3804] io_uring: increase max number of reg buffers

Since recent changes instead of storing a large array of struct
io_mapped_ubuf, we store pointers to them, that is 4 times slimmer and
we should not to so worry about restricting max number of registererd
buffer slots, increase the limit 4 times.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/d3dee1da37f46da416aa96a16bf9e5094e10584d.1620990371.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 29ec5b28c73dc..e481ac8a757ad 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -100,6 +100,8 @@
 #define IORING_MAX_RESTRICTIONS	(IORING_RESTRICTION_LAST + \
 				 IORING_REGISTER_LAST + IORING_OP_LAST)
 
+#define IORING_MAX_REG_BUFFERS	(1U << 14)
+
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
 				IOSQE_BUFFER_SELECT)
@@ -8389,7 +8391,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
 
 	if (ctx->user_bufs)
 		return -EBUSY;
-	if (!nr_args || nr_args > UIO_MAXIOV)
+	if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
 		return -EINVAL;
 	ret = io_rsrc_node_switch_start(ctx);
 	if (ret)
-- 
GitLab


From 7c2fc79250cafa1a29befeb60163028ec4720814 Mon Sep 17 00:00:00 2001
From: Chen Li <chenli@uniontech.com>
Date: Tue, 27 Apr 2021 15:17:45 +0800
Subject: [PATCH 0736/3804] phy: usb: Fix misuse of IS_ENABLED

While IS_ENABLED() is perfectly fine for CONFIG_* symbols, it is not
for other symbols such as __BIG_ENDIAN that is provided directly by
the compiler.

Switch to use CONFIG_CPU_BIG_ENDIAN instead of __BIG_ENDIAN.

Signed-off-by: Chen Li <chenli@uniontech.com>
Reviewed-by: Al Cooper <alcooperx@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Fixes: 94583a41047e ("phy: usb: Restructure in preparation for adding 7216 USB support")
Link: https://lore.kernel.org/r/87czuggpra.wl-chenli@uniontech.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/broadcom/phy-brcm-usb-init.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h
index 899b9eb43fad6..a39f30fa2e991 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init.h
+++ b/drivers/phy/broadcom/phy-brcm-usb-init.h
@@ -78,7 +78,7 @@ static inline u32 brcm_usb_readl(void __iomem *addr)
 	 * Other architectures (e.g., ARM) either do not support big endian, or
 	 * else leave I/O in little endian mode.
 	 */
-	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(__BIG_ENDIAN))
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
 		return __raw_readl(addr);
 	else
 		return readl_relaxed(addr);
@@ -87,7 +87,7 @@ static inline u32 brcm_usb_readl(void __iomem *addr)
 static inline void brcm_usb_writel(u32 val, void __iomem *addr)
 {
 	/* See brcmnand_readl() comments */
-	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(__BIG_ENDIAN))
+	if (IS_ENABLED(CONFIG_MIPS) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
 		__raw_writel(val, addr);
 	else
 		writel_relaxed(val, addr);
-- 
GitLab


From 27e7db56cf3dffd302bd7ddfacb1d405cf671a2a Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 5 May 2021 09:47:34 -0700
Subject: [PATCH 0737/3804] spi: Don't have controller clean up spi device
 before driver unbind

When a spi device is unregistered and triggers a driver unbind, the
driver might need to access the spi device. So, don't have the
controller clean up the spi device before the driver is unbound. Clean
up the spi device after the driver is unbound.

Fixes: c7299fea6769 ("spi: Fix spi device unregister flow")
Reported-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Saravana Kannan <saravanak@google.com>
Tested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210505164734.175546-1-saravanak@google.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 98048af04abf9..e353b7a9e54eb 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -714,8 +714,6 @@ void spi_unregister_device(struct spi_device *spi)
 	if (!spi)
 		return;
 
-	spi_cleanup(spi);
-
 	if (spi->dev.of_node) {
 		of_node_clear_flag(spi->dev.of_node, OF_POPULATED);
 		of_node_put(spi->dev.of_node);
@@ -723,7 +721,9 @@ void spi_unregister_device(struct spi_device *spi)
 	if (ACPI_COMPANION(&spi->dev))
 		acpi_device_clear_enumerated(ACPI_COMPANION(&spi->dev));
 	device_remove_software_node(&spi->dev);
-	device_unregister(&spi->dev);
+	device_del(&spi->dev);
+	spi_cleanup(spi);
+	put_device(&spi->dev);
 }
 EXPORT_SYMBOL_GPL(spi_unregister_device);
 
-- 
GitLab


From ea030ca688193462b8d612c1628c37129aa30072 Mon Sep 17 00:00:00 2001
From: Lucas Tanure <tanureal@opensource.cirrus.com>
Date: Wed, 12 May 2021 14:52:22 +0100
Subject: [PATCH 0738/3804] regmap-i2c: Set regmap max raw r/w from quirks

Set regmap raw read/write from i2c quirks max read/write
so regmap_raw_read/write can split the access into chunks

Signed-off-by: Lucas Tanure <tanureal@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210512135222.223203-1-tanureal@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-i2c.c | 45 +++++++++++++++++++++++++++-----
 drivers/base/regmap/regmap.c     |  2 ++
 include/linux/regmap.h           |  2 ++
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index 62b95a9212ae1..980e5ce6a3a35 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -306,33 +306,64 @@ static const struct regmap_bus regmap_i2c_smbus_i2c_block_reg16 = {
 static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
 					const struct regmap_config *config)
 {
+	const struct i2c_adapter_quirks *quirks;
+	const struct regmap_bus *bus = NULL;
+	struct regmap_bus *ret_bus;
+	u16 max_read = 0, max_write = 0;
+
 	if (i2c_check_functionality(i2c->adapter, I2C_FUNC_I2C))
-		return &regmap_i2c;
+		bus = &regmap_i2c;
 	else if (config->val_bits == 8 && config->reg_bits == 8 &&
 		 i2c_check_functionality(i2c->adapter,
 					 I2C_FUNC_SMBUS_I2C_BLOCK))
-		return &regmap_i2c_smbus_i2c_block;
+		bus = &regmap_i2c_smbus_i2c_block;
 	else if (config->val_bits == 8 && config->reg_bits == 16 &&
 		i2c_check_functionality(i2c->adapter,
 					I2C_FUNC_SMBUS_I2C_BLOCK))
-		return &regmap_i2c_smbus_i2c_block_reg16;
+		bus = &regmap_i2c_smbus_i2c_block_reg16;
 	else if (config->val_bits == 16 && config->reg_bits == 8 &&
 		 i2c_check_functionality(i2c->adapter,
 					 I2C_FUNC_SMBUS_WORD_DATA))
 		switch (regmap_get_val_endian(&i2c->dev, NULL, config)) {
 		case REGMAP_ENDIAN_LITTLE:
-			return &regmap_smbus_word;
+			bus = &regmap_smbus_word;
+			break;
 		case REGMAP_ENDIAN_BIG:
-			return &regmap_smbus_word_swapped;
+			bus = &regmap_smbus_word_swapped;
+			break;
 		default:		/* everything else is not supported */
 			break;
 		}
 	else if (config->val_bits == 8 && config->reg_bits == 8 &&
 		 i2c_check_functionality(i2c->adapter,
 					 I2C_FUNC_SMBUS_BYTE_DATA))
-		return &regmap_smbus_byte;
+		bus = &regmap_smbus_byte;
+
+	if (!bus)
+		return ERR_PTR(-ENOTSUPP);
+
+	quirks = i2c->adapter->quirks;
+	if (quirks) {
+		if (quirks->max_read_len &&
+		    (bus->max_raw_read == 0 || bus->max_raw_read > quirks->max_read_len))
+			max_read = quirks->max_read_len;
+
+		if (quirks->max_write_len &&
+		    (bus->max_raw_write == 0 || bus->max_raw_write > quirks->max_write_len))
+			max_write = quirks->max_write_len;
+
+		if (max_read || max_write) {
+			ret_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL);
+			if (!ret_bus)
+				return ERR_PTR(-ENOMEM);
+			ret_bus->free_on_exit = true;
+			ret_bus->max_raw_read = max_read;
+			ret_bus->max_raw_write = max_write;
+			bus = ret_bus;
+		}
+	}
 
-	return ERR_PTR(-ENOTSUPP);
+	return bus;
 }
 
 struct regmap *__regmap_init_i2c(struct i2c_client *i2c,
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 297e95be25b3b..0d185ec018a5c 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1496,6 +1496,8 @@ void regmap_exit(struct regmap *map)
 		mutex_destroy(&map->mutex);
 	kfree_const(map->name);
 	kfree(map->patch);
+	if (map->bus && map->bus->free_on_exit)
+		kfree(map->bus);
 	kfree(map);
 }
 EXPORT_SYMBOL_GPL(regmap_exit);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index f87a11a5cc4a7..8c16e6fa0f665 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -502,6 +502,7 @@ typedef void (*regmap_hw_free_context)(void *context);
  *     DEFAULT, BIG is assumed.
  * @max_raw_read: Max raw read size that can be used on the bus.
  * @max_raw_write: Max raw write size that can be used on the bus.
+ * @free_on_exit: kfree this on exit of regmap
  */
 struct regmap_bus {
 	bool fast_io;
@@ -519,6 +520,7 @@ struct regmap_bus {
 	enum regmap_endian val_format_endian_default;
 	size_t max_raw_read;
 	size_t max_raw_write;
+	bool free_on_exit;
 };
 
 /*
-- 
GitLab


From 28188cc461f6cf8b7d28de4f6df52014cc1d5e39 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 13 May 2021 09:39:04 -0700
Subject: [PATCH 0739/3804] x86/cpu: Fix core name for Sapphire Rapids

Sapphire Rapids uses Golden Cove, not Willow Cove.

Fixes: 53375a5a218e ("x86/cpu: Resort and comment Intel models")
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210513163904.3083274-1-ak@linux.intel.com
---
 arch/x86/include/asm/intel-family.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 955b06d6325af..27158436f322d 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -102,7 +102,8 @@
 
 #define INTEL_FAM6_TIGERLAKE_L		0x8C	/* Willow Cove */
 #define INTEL_FAM6_TIGERLAKE		0x8D	/* Willow Cove */
-#define INTEL_FAM6_SAPPHIRERAPIDS_X	0x8F	/* Willow Cove */
+
+#define INTEL_FAM6_SAPPHIRERAPIDS_X	0x8F	/* Golden Cove */
 
 #define INTEL_FAM6_ALDERLAKE		0x97	/* Golden Cove / Gracemont */
 #define INTEL_FAM6_ALDERLAKE_L		0x9A	/* Golden Cove / Gracemont */
-- 
GitLab


From 3486d2c9be652a31033363bdd50391b0c8a8fe21 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 13 May 2021 09:32:46 +0200
Subject: [PATCH 0740/3804] clocksource/drivers/hyper-v: Re-enable
 VDSO_CLOCKMODE_HVCLOCK on X86

Mohammed reports (https://bugzilla.kernel.org/show_bug.cgi?id=213029)
the commit e4ab4658f1cf ("clocksource/drivers/hyper-v: Handle vDSO
differences inline") broke vDSO on x86. The problem appears to be that
VDSO_CLOCKMODE_HVCLOCK is an enum value in 'enum vdso_clock_mode' and
'#ifdef VDSO_CLOCKMODE_HVCLOCK' branch evaluates to false (it is not
a define).

Use a dedicated HAVE_VDSO_CLOCKMODE_HVCLOCK define instead.

Fixes: e4ab4658f1cf ("clocksource/drivers/hyper-v: Handle vDSO differences inline")
Reported-by: Mohammed Gamal <mgamal@redhat.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20210513073246.1715070-1-vkuznets@redhat.com
---
 arch/x86/include/asm/vdso/clocksource.h | 2 ++
 drivers/clocksource/hyperv_timer.c      | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h
index 119ac8612d893..136e5e57cfe11 100644
--- a/arch/x86/include/asm/vdso/clocksource.h
+++ b/arch/x86/include/asm/vdso/clocksource.h
@@ -7,4 +7,6 @@
 	VDSO_CLOCKMODE_PVCLOCK,	\
 	VDSO_CLOCKMODE_HVCLOCK
 
+#define HAVE_VDSO_CLOCKMODE_HVCLOCK
+
 #endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 977fd05ac35f6..d6ece7bbce894 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -419,7 +419,7 @@ static void resume_hv_clock_tsc(struct clocksource *arg)
 	hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);
 }
 
-#ifdef VDSO_CLOCKMODE_HVCLOCK
+#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
 static int hv_cs_enable(struct clocksource *cs)
 {
 	vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);
@@ -435,7 +435,7 @@ static struct clocksource hyperv_cs_tsc = {
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 	.suspend= suspend_hv_clock_tsc,
 	.resume	= resume_hv_clock_tsc,
-#ifdef VDSO_CLOCKMODE_HVCLOCK
+#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
 	.enable = hv_cs_enable,
 	.vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK,
 #else
-- 
GitLab


From cb6f6b3384d7825d2a43f2256c5200e3b3956fc8 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@xilinx.com>
Date: Wed, 12 May 2021 13:18:21 -0700
Subject: [PATCH 0741/3804] xen/arm: move xen_swiotlb_detect to
 arm/swiotlb-xen.h

Move xen_swiotlb_detect to a static inline function to make it available
to !CONFIG_XEN builds.

CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20210512201823.1963-1-sstabellini@kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/arm/xen/mm.c             | 12 ------------
 include/xen/arm/swiotlb-xen.h | 15 ++++++++++++++-
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index f8f07469d2591..223b1151fd7de 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -135,18 +135,6 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 	return;
 }
 
-int xen_swiotlb_detect(void)
-{
-	if (!xen_domain())
-		return 0;
-	if (xen_feature(XENFEAT_direct_mapped))
-		return 1;
-	/* legacy case */
-	if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
-		return 1;
-	return 0;
-}
-
 static int __init xen_mm_init(void)
 {
 	struct gnttab_cache_flush cflush;
diff --git a/include/xen/arm/swiotlb-xen.h b/include/xen/arm/swiotlb-xen.h
index 2994fe6031a09..33336ab58afcf 100644
--- a/include/xen/arm/swiotlb-xen.h
+++ b/include/xen/arm/swiotlb-xen.h
@@ -2,6 +2,19 @@
 #ifndef _ASM_ARM_SWIOTLB_XEN_H
 #define _ASM_ARM_SWIOTLB_XEN_H
 
-extern int xen_swiotlb_detect(void);
+#include <xen/features.h>
+#include <xen/xen.h>
+
+static inline int xen_swiotlb_detect(void)
+{
+	if (!xen_domain())
+		return 0;
+	if (xen_feature(XENFEAT_direct_mapped))
+		return 1;
+	/* legacy case */
+	if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
+		return 1;
+	return 0;
+}
 
 #endif /* _ASM_ARM_SWIOTLB_XEN_H */
-- 
GitLab


From 687842ec50342b716953f5847a49dd337cb6de8c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 12 May 2021 13:18:22 -0700
Subject: [PATCH 0742/3804] arm64: do not set SWIOTLB_NO_FORCE when swiotlb is
 required

Although SWIOTLB_NO_FORCE is meant to allow later calls to swiotlb_init,
today dma_direct_map_page returns error if SWIOTLB_NO_FORCE.

For now, without a larger overhaul of SWIOTLB_NO_FORCE, the best we can
do is to avoid setting SWIOTLB_NO_FORCE in mem_init when we know that it
is going to be required later (e.g. Xen requires it).

CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
CC: catalin.marinas@arm.com
CC: will@kernel.org
CC: linux-arm-kernel@lists.infradead.org
Fixes: 2726bf3ff252 ("swiotlb: Make SWIOTLB_NO_FORCE perform no allocation")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210512201823.1963-2-sstabellini@kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/arm64/mm/init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 16a2b2b1c54d4..e55409caaee34 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -43,6 +43,7 @@
 #include <linux/sizes.h>
 #include <asm/tlb.h>
 #include <asm/alternative.h>
+#include <asm/xen/swiotlb-xen.h>
 
 /*
  * We need to be able to catch inadvertent references to memstart_addr
@@ -482,7 +483,7 @@ void __init mem_init(void)
 	if (swiotlb_force == SWIOTLB_FORCE ||
 	    max_pfn > PFN_DOWN(arm64_dma_phys_limit))
 		swiotlb_init(1);
-	else
+	else if (!xen_swiotlb_detect())
 		swiotlb_force = SWIOTLB_NO_FORCE;
 
 	set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
-- 
GitLab


From 97729b653de52ba98e08732dd8855586e37a3a31 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@xilinx.com>
Date: Wed, 12 May 2021 13:18:23 -0700
Subject: [PATCH 0743/3804] xen/swiotlb: check if the swiotlb has already been
 initialized

xen_swiotlb_init calls swiotlb_late_init_with_tbl, which fails with
-ENOMEM if the swiotlb has already been initialized.

Add an explicit check io_tlb_default_mem != NULL at the beginning of
xen_swiotlb_init. If the swiotlb is already initialized print a warning
and return -EEXIST.

On x86, the error propagates.

On ARM, we don't actually need a special swiotlb buffer (yet), any
buffer would do. So ignore the error and continue.

CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com>
Reviewed-by: Boris Ostrovsky <boris.ostrvsky@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210512201823.1963-3-sstabellini@kernel.org
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/arm/xen/mm.c         | 8 +++++++-
 drivers/xen/swiotlb-xen.c | 5 +++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 223b1151fd7de..a7e54a087b802 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -138,9 +138,15 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 static int __init xen_mm_init(void)
 {
 	struct gnttab_cache_flush cflush;
+	int rc;
+
 	if (!xen_swiotlb_detect())
 		return 0;
-	xen_swiotlb_init();
+
+	rc = xen_swiotlb_init();
+	/* we can work with the default swiotlb */
+	if (rc < 0 && rc != -EEXIST)
+		return rc;
 
 	cflush.op = 0;
 	cflush.a.dev_bus_addr = 0;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4c89afc0df628..24d11861ac7d8 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -164,6 +164,11 @@ int __ref xen_swiotlb_init(void)
 	int rc = -ENOMEM;
 	char *start;
 
+	if (io_tlb_default_mem != NULL) {
+		pr_warn("swiotlb buffer already initialized\n");
+		return -EEXIST;
+	}
+
 retry:
 	m_ret = XEN_SWIOTLB_ENOMEM;
 	order = get_order(bytes);
-- 
GitLab


From 03f26d8f11403295de445b6e4e0e57ac57755791 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 14 May 2021 10:20:52 +0800
Subject: [PATCH 0744/3804] blk-mq: plug request for shared sbitmap

In case of shared sbitmap, request won't be held in plug list any more
sine commit 32bc15afed04 ("blk-mq: Facilitate a shared sbitmap per
tagset"), this way makes request merge from flush plug list & batching
submission not possible, so cause performance regression.

Yanhui reports performance regression when running sequential IO
test(libaio, 16 jobs, 8 depth for each job) in VM, and the VM disk
is emulated with image stored on xfs/megaraid_sas.

Fix the issue by recovering original behavior to allow to hold request
in plug list.

Cc: Yanhui Ma <yama@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: kashyap.desai@broadcom.com
Fixes: 32bc15afed04 ("blk-mq: Facilitate a shared sbitmap per tagset")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210514022052.1047665-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 466676bc2f0be..28ef0248efba3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2232,8 +2232,9 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 		/* Bypass scheduler for flush requests */
 		blk_insert_flush(rq);
 		blk_mq_run_hw_queue(data.hctx, true);
-	} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
-				!blk_queue_nonrot(q))) {
+	} else if (plug && (q->nr_hw_queues == 1 ||
+		   blk_mq_is_sbitmap_shared(rq->mq_hctx->flags) ||
+		   q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) {
 		/*
 		 * Use plugging if we have a ->commit_rqs() hook as well, as
 		 * we know the driver uses bd->last in a smart fashion.
-- 
GitLab


From 630ef623ed26c18a457cdc070cf24014e50129c2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 13 May 2021 10:15:29 -0700
Subject: [PATCH 0745/3804] blk-mq: Swap two calls in blk_mq_exit_queue()

If a tag set is shared across request queues (e.g. SCSI LUNs) then the
block layer core keeps track of the number of active request queues in
tags->active_queues. blk_mq_tag_busy() and blk_mq_tag_idle() update that
atomic counter if the hctx flag BLK_MQ_F_TAG_QUEUE_SHARED is set. Make
sure that blk_mq_exit_queue() calls blk_mq_tag_idle() before that flag is
cleared by blk_mq_del_queue_tag_set().

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Fixes: 0d2602ca30e4 ("blk-mq: improve support for shared tags maps")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210513171529.7977-1-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 28ef0248efba3..c86c01bfecdbe 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3286,10 +3286,12 @@ EXPORT_SYMBOL(blk_mq_init_allocated_queue);
 /* tags can _not_ be used after returning from blk_mq_exit_queue */
 void blk_mq_exit_queue(struct request_queue *q)
 {
-	struct blk_mq_tag_set	*set = q->tag_set;
+	struct blk_mq_tag_set *set = q->tag_set;
 
-	blk_mq_del_queue_tag_set(q);
+	/* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
+	/* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */
+	blk_mq_del_queue_tag_set(q);
 }
 
 static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
-- 
GitLab


From 4bc2082311311892742deb2ce04bc335f85ee27a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 13 May 2021 10:17:08 -0700
Subject: [PATCH 0746/3804] block/partitions/efi.c: Fix the efi_partition()
 kernel-doc header

Fix the following kernel-doc warning:

block/partitions/efi.c:685: warning: wrong kernel-doc identifier on line:
 * efi_partition(struct parsed_partitions *state)

Cc: Alexander Viro <viro@math.psu.edu>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20210513171708.8391-1-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/partitions/efi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index b64bfdd4326c9..e2716792ecc13 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le16 *in, unsigned int size, u8 *out)
 }
 
 /**
- * efi_partition(struct parsed_partitions *state)
+ * efi_partition - scan for GPT partitions
  * @state: disk parsed partitions
  *
  * Description: called from check.c, if the disk contains GPT
-- 
GitLab


From 588a513d34257fdde95a9f0df0202e31998e85c6 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 14 May 2021 10:50:01 +0100
Subject: [PATCH 0747/3804] arm64: Fix race condition on PG_dcache_clean in
 __sync_icache_dcache()

To ensure that instructions are observable in a new mapping, the arm64
set_pte_at() implementation cleans the D-cache and invalidates the
I-cache to the PoU. As an optimisation, this is only done on executable
mappings and the PG_dcache_clean page flag is set to avoid future cache
maintenance on the same page.

When two different processes map the same page (e.g. private executable
file or shared mapping) there's a potential race on checking and setting
PG_dcache_clean via set_pte_at() -> __sync_icache_dcache(). While on the
fault paths the page is locked (PG_locked), mprotect() does not take the
page lock. The result is that one process may see the PG_dcache_clean
flag set but the I/D cache maintenance not yet performed.

Avoid test_and_set_bit(PG_dcache_clean) in favour of separate test_bit()
and set_bit(). In the rare event of a race, the cache maintenance is
done twice.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: <stable@vger.kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: Steven Price <steven.price@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210514095001.13236-1-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/flush.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index ac485163a4a76..6d44c028d1c9e 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -55,8 +55,10 @@ void __sync_icache_dcache(pte_t pte)
 {
 	struct page *page = pte_page(pte);
 
-	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+	if (!test_bit(PG_dcache_clean, &page->flags)) {
 		sync_icache_aliases(page_address(page), page_size(page));
+		set_bit(PG_dcache_clean, &page->flags);
+	}
 }
 EXPORT_SYMBOL_GPL(__sync_icache_dcache);
 
-- 
GitLab


From 03b30cc38dd3c3521dafb1cb2ac4ecd8470bbf0a Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Thu, 29 Apr 2021 18:21:21 +0800
Subject: [PATCH 0748/3804] hv_balloon: Remove redundant assignment to
 region_start

Variable region_start is set to pg_start but this value is never
read as it is overwritten later on, hence it is a redundant
assignment and can be removed.

Cleans up the following clang-analyzer warning:

drivers/hv/hv_balloon.c:1013:3: warning: Value stored to 'region_start'
is never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/1619691681-86256-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/hv_balloon.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 58af84e30144b..7f11ea07d698f 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1010,7 +1010,6 @@ static void hot_add_req(struct work_struct *dummy)
 		 * that need to be hot-added while ensuring the alignment
 		 * and size requirements of Linux as it relates to hot-add.
 		 */
-		region_start = pg_start;
 		region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK;
 		if (pfn_cnt % HA_CHUNK)
 			region_size += HA_CHUNK;
-- 
GitLab


From adae1e931acd8b430d31141a283ea06d4b705417 Mon Sep 17 00:00:00 2001
From: Andres Beltran <lkmlabelt@gmail.com>
Date: Thu, 8 Apr 2021 18:14:39 +0200
Subject: [PATCH 0749/3804] Drivers: hv: vmbus: Copy packets sent by Hyper-V
 out of the ring buffer

Pointers to ring-buffer packets sent by Hyper-V are used within the
guest VM. Hyper-V can send packets with erroneous values or modify
packet fields after they are processed by the guest. To defend
against these scenarios, return a copy of the incoming VMBus packet
after validating its length and offset fields in hv_pkt_iter_first().
In this way, the packet can no longer be modified by the host.

Signed-off-by: Andres Beltran <lkmlabelt@gmail.com>
Co-developed-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20210408161439.341988-1-parri.andrea@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/channel.c              |  9 ++--
 drivers/hv/hv_fcopy.c             |  1 +
 drivers/hv/hv_kvp.c               |  1 +
 drivers/hv/hyperv_vmbus.h         |  2 +-
 drivers/hv/ring_buffer.c          | 82 ++++++++++++++++++++++++++-----
 drivers/net/hyperv/hyperv_net.h   |  7 +++
 drivers/net/hyperv/netvsc.c       |  2 +
 drivers/net/hyperv/rndis_filter.c |  2 +
 drivers/scsi/storvsc_drv.c        | 10 ++++
 include/linux/hyperv.h            | 48 +++++++++++++++---
 net/vmw_vsock/hyperv_transport.c  |  4 +-
 11 files changed, 143 insertions(+), 25 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index c2635e913a92f..bfbca4eeb7733 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -662,12 +662,15 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
 	newchannel->onchannel_callback = onchannelcallback;
 	newchannel->channel_callback_context = context;
 
-	err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages);
+	if (!newchannel->max_pkt_size)
+		newchannel->max_pkt_size = VMBUS_DEFAULT_MAX_PKT_SIZE;
+
+	err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages, 0);
 	if (err)
 		goto error_clean_ring;
 
-	err = hv_ringbuffer_init(&newchannel->inbound,
-				 &page[send_pages], recv_pages);
+	err = hv_ringbuffer_init(&newchannel->inbound, &page[send_pages],
+				 recv_pages, newchannel->max_pkt_size);
 	if (err)
 		goto error_clean_ring;
 
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 59ce85e00a028..660036da74495 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -349,6 +349,7 @@ int hv_fcopy_init(struct hv_util_service *srv)
 {
 	recv_buffer = srv->recv_buffer;
 	fcopy_transaction.recv_channel = srv->channel;
+	fcopy_transaction.recv_channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2;
 
 	/*
 	 * When this driver loads, the user level daemon that
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index b49962d312cef..c698592b83e42 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -757,6 +757,7 @@ hv_kvp_init(struct hv_util_service *srv)
 {
 	recv_buffer = srv->recv_buffer;
 	kvp_transaction.recv_channel = srv->channel;
+	kvp_transaction.recv_channel->max_pkt_size = HV_HYP_PAGE_SIZE * 4;
 
 	/*
 	 * When this driver loads, the user level daemon that
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 9416e09ebd58c..42f3d9d123a12 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -174,7 +174,7 @@ extern int hv_synic_cleanup(unsigned int cpu);
 void hv_ringbuffer_pre_init(struct vmbus_channel *channel);
 
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-		       struct page *pages, u32 pagecnt);
+		       struct page *pages, u32 pagecnt, u32 max_pkt_size);
 
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
 
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 374f8afbf8a58..e621f8d9b436e 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -181,7 +181,7 @@ void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
 
 /* Initialize the ring buffer. */
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-		       struct page *pages, u32 page_cnt)
+		       struct page *pages, u32 page_cnt, u32 max_pkt_size)
 {
 	int i;
 	struct page **pages_wraparound;
@@ -223,6 +223,14 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 		sizeof(struct hv_ring_buffer);
 	ring_info->priv_read_index = 0;
 
+	/* Initialize buffer that holds copies of incoming packets */
+	if (max_pkt_size) {
+		ring_info->pkt_buffer = kzalloc(max_pkt_size, GFP_KERNEL);
+		if (!ring_info->pkt_buffer)
+			return -ENOMEM;
+		ring_info->pkt_buffer_size = max_pkt_size;
+	}
+
 	spin_lock_init(&ring_info->ring_lock);
 
 	return 0;
@@ -235,6 +243,9 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
 	vunmap(ring_info->ring_buffer);
 	ring_info->ring_buffer = NULL;
 	mutex_unlock(&ring_info->ring_buffer_mutex);
+
+	kfree(ring_info->pkt_buffer);
+	ring_info->pkt_buffer_size = 0;
 }
 
 /* Write to the ring buffer. */
@@ -375,7 +386,7 @@ int hv_ringbuffer_read(struct vmbus_channel *channel,
 	memcpy(buffer, (const char *)desc + offset, packetlen);
 
 	/* Advance ring index to next packet descriptor */
-	__hv_pkt_iter_next(channel, desc);
+	__hv_pkt_iter_next(channel, desc, true);
 
 	/* Notify host of update */
 	hv_pkt_iter_close(channel);
@@ -401,6 +412,22 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
 		return (rbi->ring_datasize - priv_read_loc) + write_loc;
 }
 
+/*
+ * Get first vmbus packet without copying it out of the ring buffer
+ */
+struct vmpacket_descriptor *hv_pkt_iter_first_raw(struct vmbus_channel *channel)
+{
+	struct hv_ring_buffer_info *rbi = &channel->inbound;
+
+	hv_debug_delay_test(channel, MESSAGE_DELAY);
+
+	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+		return NULL;
+
+	return (struct vmpacket_descriptor *)(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
+}
+EXPORT_SYMBOL_GPL(hv_pkt_iter_first_raw);
+
 /*
  * Get first vmbus packet from ring buffer after read_index
  *
@@ -409,17 +436,49 @@ static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
 struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
 {
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
-	struct vmpacket_descriptor *desc;
+	struct vmpacket_descriptor *desc, *desc_copy;
+	u32 bytes_avail, pkt_len, pkt_offset;
 
-	hv_debug_delay_test(channel, MESSAGE_DELAY);
-	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
+	desc = hv_pkt_iter_first_raw(channel);
+	if (!desc)
 		return NULL;
 
-	desc = hv_get_ring_buffer(rbi) + rbi->priv_read_index;
-	if (desc)
-		prefetch((char *)desc + (desc->len8 << 3));
+	bytes_avail = min(rbi->pkt_buffer_size, hv_pkt_iter_avail(rbi));
+
+	/*
+	 * Ensure the compiler does not use references to incoming Hyper-V values (which
+	 * could change at any moment) when reading local variables later in the code
+	 */
+	pkt_len = READ_ONCE(desc->len8) << 3;
+	pkt_offset = READ_ONCE(desc->offset8) << 3;
+
+	/*
+	 * If pkt_len is invalid, set it to the smaller of hv_pkt_iter_avail() and
+	 * rbi->pkt_buffer_size
+	 */
+	if (pkt_len < sizeof(struct vmpacket_descriptor) || pkt_len > bytes_avail)
+		pkt_len = bytes_avail;
+
+	/*
+	 * If pkt_offset is invalid, arbitrarily set it to
+	 * the size of vmpacket_descriptor
+	 */
+	if (pkt_offset < sizeof(struct vmpacket_descriptor) || pkt_offset > pkt_len)
+		pkt_offset = sizeof(struct vmpacket_descriptor);
+
+	/* Copy the Hyper-V packet out of the ring buffer */
+	desc_copy = (struct vmpacket_descriptor *)rbi->pkt_buffer;
+	memcpy(desc_copy, desc, pkt_len);
+
+	/*
+	 * Hyper-V could still change len8 and offset8 after the earlier read.
+	 * Ensure that desc_copy has legal values for len8 and offset8 that
+	 * are consistent with the copy we just made
+	 */
+	desc_copy->len8 = pkt_len >> 3;
+	desc_copy->offset8 = pkt_offset >> 3;
 
-	return desc;
+	return desc_copy;
 }
 EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
 
@@ -431,7 +490,8 @@ EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
  */
 struct vmpacket_descriptor *
 __hv_pkt_iter_next(struct vmbus_channel *channel,
-		   const struct vmpacket_descriptor *desc)
+		   const struct vmpacket_descriptor *desc,
+		   bool copy)
 {
 	struct hv_ring_buffer_info *rbi = &channel->inbound;
 	u32 packetlen = desc->len8 << 3;
@@ -444,7 +504,7 @@ __hv_pkt_iter_next(struct vmbus_channel *channel,
 		rbi->priv_read_index -= dsize;
 
 	/* more data? */
-	return hv_pkt_iter_first(channel);
+	return copy ? hv_pkt_iter_first(channel) : hv_pkt_iter_first_raw(channel);
 }
 EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
 
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 442c520ab8f30..b11aa68b44ec7 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -895,9 +895,16 @@ static inline u32 netvsc_rqstor_size(unsigned long ringbytes)
 		ringbytes / NETVSC_MIN_IN_MSG_SIZE;
 }
 
+/* XFER PAGE packets can specify a maximum of 375 ranges for NDIS >= 6.0
+ * and a maximum of 64 ranges for NDIS < 6.0 with no RSC; with RSC, this
+ * limit is raised to 562 (= NVSP_RSC_MAX).
+ */
+#define NETVSC_MAX_XFER_PAGE_RANGES NVSP_RSC_MAX
 #define NETVSC_XFER_HEADER_SIZE(rng_cnt) \
 		(offsetof(struct vmtransfer_page_packet_header, ranges) + \
 		(rng_cnt) * sizeof(struct vmtransfer_page_range))
+#define NETVSC_MAX_PKT_SIZE (NETVSC_XFER_HEADER_SIZE(NETVSC_MAX_XFER_PAGE_RANGES) + \
+		sizeof(struct nvsp_message) + (sizeof(u32) * VRSS_SEND_TAB_SIZE))
 
 struct multi_send_data {
 	struct sk_buff *skb; /* skb containing the pkt */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 9d07c9ce4be28..067077138e529 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1650,6 +1650,8 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
 
 	/* Open the channel */
 	device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+	device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE;
+
 	ret = vmbus_open(device->channel, netvsc_ring_bytes,
 			 netvsc_ring_bytes,  NULL, 0,
 			 netvsc_channel_cb, net_device->chan_table);
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index c0e89e107d575..d7ff9ddcbae28 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1260,6 +1260,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 	nvchan->channel = new_sc;
 
 	new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
+	new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE;
+
 	ret = vmbus_open(new_sc, netvsc_ring_bytes,
 			 netvsc_ring_bytes, NULL, 0,
 			 netvsc_channel_cb, nvchan);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index e6718a74e5dae..07149fa72b683 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -406,6 +406,14 @@ static void storvsc_on_channel_callback(void *context);
 #define STORVSC_IDE_MAX_TARGETS				1
 #define STORVSC_IDE_MAX_CHANNELS			1
 
+/*
+ * Upper bound on the size of a storvsc packet. vmscsi_size_delta is not
+ * included in the calculation because it is set after STORVSC_MAX_PKT_SIZE
+ * is used in storvsc_connect_to_vsp
+ */
+#define STORVSC_MAX_PKT_SIZE (sizeof(struct vmpacket_descriptor) +\
+			      sizeof(struct vstor_packet))
+
 struct storvsc_cmd_request {
 	struct scsi_cmnd *cmd;
 
@@ -701,6 +709,7 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
 		return;
 
 	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
+	new_sc->max_pkt_size = STORVSC_MAX_PKT_SIZE;
 
 	/*
 	 * The size of vmbus_requestor is an upper bound on the number of requests
@@ -1294,6 +1303,7 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size,
 
 	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
 
+	device->channel->max_pkt_size = STORVSC_MAX_PKT_SIZE;
 	/*
 	 * The size of vmbus_requestor is an upper bound on the number of requests
 	 * that can be in-progress at any one time across all channels.
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index d1e59dbef1ddf..3932446f215f2 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -181,6 +181,10 @@ struct hv_ring_buffer_info {
 	 * being freed while the ring buffer is being accessed.
 	 */
 	struct mutex ring_buffer_mutex;
+
+	/* Buffer that holds a copy of an incoming host packet */
+	void *pkt_buffer;
+	u32 pkt_buffer_size;
 };
 
 
@@ -799,6 +803,8 @@ struct vmbus_device {
 	bool allowed_in_isolated;
 };
 
+#define VMBUS_DEFAULT_MAX_PKT_SIZE 4096
+
 struct vmbus_channel {
 	struct list_head listentry;
 
@@ -1021,6 +1027,9 @@ struct vmbus_channel {
 	/* request/transaction ids for VMBus */
 	struct vmbus_requestor requestor;
 	u32 rqstor_size;
+
+	/* The max size of a packet on this channel */
+	u32 max_pkt_size;
 };
 
 u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr);
@@ -1662,32 +1671,55 @@ static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc)
 }
 
 
+struct vmpacket_descriptor *
+hv_pkt_iter_first_raw(struct vmbus_channel *channel);
+
 struct vmpacket_descriptor *
 hv_pkt_iter_first(struct vmbus_channel *channel);
 
 struct vmpacket_descriptor *
 __hv_pkt_iter_next(struct vmbus_channel *channel,
-		   const struct vmpacket_descriptor *pkt);
+		   const struct vmpacket_descriptor *pkt,
+		   bool copy);
 
 void hv_pkt_iter_close(struct vmbus_channel *channel);
 
-/*
- * Get next packet descriptor from iterator
- * If at end of list, return NULL and update host.
- */
 static inline struct vmpacket_descriptor *
-hv_pkt_iter_next(struct vmbus_channel *channel,
-		 const struct vmpacket_descriptor *pkt)
+hv_pkt_iter_next_pkt(struct vmbus_channel *channel,
+		     const struct vmpacket_descriptor *pkt,
+		     bool copy)
 {
 	struct vmpacket_descriptor *nxt;
 
-	nxt = __hv_pkt_iter_next(channel, pkt);
+	nxt = __hv_pkt_iter_next(channel, pkt, copy);
 	if (!nxt)
 		hv_pkt_iter_close(channel);
 
 	return nxt;
 }
 
+/*
+ * Get next packet descriptor without copying it out of the ring buffer
+ * If at end of list, return NULL and update host.
+ */
+static inline struct vmpacket_descriptor *
+hv_pkt_iter_next_raw(struct vmbus_channel *channel,
+		     const struct vmpacket_descriptor *pkt)
+{
+	return hv_pkt_iter_next_pkt(channel, pkt, false);
+}
+
+/*
+ * Get next packet descriptor from iterator
+ * If at end of list, return NULL and update host.
+ */
+static inline struct vmpacket_descriptor *
+hv_pkt_iter_next(struct vmbus_channel *channel,
+		 const struct vmpacket_descriptor *pkt)
+{
+	return hv_pkt_iter_next_pkt(channel, pkt, true);
+}
+
 #define foreach_vmbus_pkt(pkt, channel) \
 	for (pkt = hv_pkt_iter_first(channel); pkt; \
 	    pkt = hv_pkt_iter_next(channel, pkt))
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index cc3bae2659e79..19189cf30a72f 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -596,7 +596,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 		return -EOPNOTSUPP;
 
 	if (need_refill) {
-		hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
+		hvs->recv_desc = hv_pkt_iter_first_raw(hvs->chan);
 		ret = hvs_update_recv_data(hvs);
 		if (ret)
 			return ret;
@@ -610,7 +610,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 
 	hvs->recv_data_len -= to_read;
 	if (hvs->recv_data_len == 0) {
-		hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
+		hvs->recv_desc = hv_pkt_iter_next_raw(hvs->chan, hvs->recv_desc);
 		if (hvs->recv_desc) {
 			ret = hvs_update_recv_data(hvs);
 			if (ret)
-- 
GitLab


From bf5fd8cae3c8f0d1e6f71a076e0ce2bd17645d0b Mon Sep 17 00:00:00 2001
From: "Andrea Parri (Microsoft)" <parri.andrea@gmail.com>
Date: Mon, 10 May 2021 23:08:41 +0200
Subject: [PATCH 0750/3804] scsi: storvsc: Use blk_mq_unique_tag() to generate
 requestIDs

Use blk_mq_unique_tag() to generate requestIDs for StorVSC, avoiding
all issues with allocating enough entries in the VMbus requestor.

Suggested-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20210510210841.370472-1-parri.andrea@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/channel.c              | 14 ++---
 drivers/hv/ring_buffer.c          | 13 +++--
 drivers/net/hyperv/netvsc.c       |  8 ++-
 drivers/net/hyperv/rndis_filter.c |  2 +
 drivers/scsi/storvsc_drv.c        | 94 +++++++++++++++++++++----------
 include/linux/hyperv.h            | 13 ++++-
 6 files changed, 95 insertions(+), 49 deletions(-)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index bfbca4eeb7733..f3761c73b0742 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -1189,15 +1189,14 @@ EXPORT_SYMBOL_GPL(vmbus_recvpacket_raw);
  * vmbus_next_request_id - Returns a new request id. It is also
  * the index at which the guest memory address is stored.
  * Uses a spin lock to avoid race conditions.
- * @rqstor: Pointer to the requestor struct
+ * @channel: Pointer to the VMbus channel struct
  * @rqst_add: Guest memory address to be stored in the array
  */
-u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr)
+u64 vmbus_next_request_id(struct vmbus_channel *channel, u64 rqst_addr)
 {
+	struct vmbus_requestor *rqstor = &channel->requestor;
 	unsigned long flags;
 	u64 current_id;
-	const struct vmbus_channel *channel =
-		container_of(rqstor, const struct vmbus_channel, requestor);
 
 	/* Check rqstor has been initialized */
 	if (!channel->rqstor_size)
@@ -1231,16 +1230,15 @@ EXPORT_SYMBOL_GPL(vmbus_next_request_id);
 /*
  * vmbus_request_addr - Returns the memory address stored at @trans_id
  * in @rqstor. Uses a spin lock to avoid race conditions.
- * @rqstor: Pointer to the requestor struct
+ * @channel: Pointer to the VMbus channel struct
  * @trans_id: Request id sent back from Hyper-V. Becomes the requestor's
  * next request id.
  */
-u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id)
+u64 vmbus_request_addr(struct vmbus_channel *channel, u64 trans_id)
 {
+	struct vmbus_requestor *rqstor = &channel->requestor;
 	unsigned long flags;
 	u64 req_addr;
-	const struct vmbus_channel *channel =
-		container_of(rqstor, const struct vmbus_channel, requestor);
 
 	/* Check rqstor has been initialized */
 	if (!channel->rqstor_size)
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index e621f8d9b436e..2aee356840a2b 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -312,10 +312,12 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
 	 */
 
 	if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) {
-		rqst_id = vmbus_next_request_id(&channel->requestor, requestid);
-		if (rqst_id == VMBUS_RQST_ERROR) {
-			spin_unlock_irqrestore(&outring_info->ring_lock, flags);
-			return -EAGAIN;
+		if (channel->next_request_id_callback != NULL) {
+			rqst_id = channel->next_request_id_callback(channel, requestid);
+			if (rqst_id == VMBUS_RQST_ERROR) {
+				spin_unlock_irqrestore(&outring_info->ring_lock, flags);
+				return -EAGAIN;
+			}
 		}
 	}
 	desc = hv_get_ring_buffer(outring_info) + old_write;
@@ -343,7 +345,8 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
 	if (channel->rescind) {
 		if (rqst_id != VMBUS_NO_RQSTOR) {
 			/* Reclaim request ID to avoid leak of IDs */
-			vmbus_request_addr(&channel->requestor, rqst_id);
+			if (channel->request_addr_callback != NULL)
+				channel->request_addr_callback(channel, rqst_id);
 		}
 		return -ENODEV;
 	}
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 067077138e529..7bd9354128534 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -757,7 +757,7 @@ static void netvsc_send_tx_complete(struct net_device *ndev,
 	int queue_sends;
 	u64 cmd_rqst;
 
-	cmd_rqst = vmbus_request_addr(&channel->requestor, (u64)desc->trans_id);
+	cmd_rqst = channel->request_addr_callback(channel, (u64)desc->trans_id);
 	if (cmd_rqst == VMBUS_RQST_ERROR) {
 		netdev_err(ndev, "Incorrect transaction id\n");
 		return;
@@ -817,8 +817,8 @@ static void netvsc_send_completion(struct net_device *ndev,
 
 	/* First check if this is a VMBUS completion without data payload */
 	if (!msglen) {
-		cmd_rqst = vmbus_request_addr(&incoming_channel->requestor,
-					      (u64)desc->trans_id);
+		cmd_rqst = incoming_channel->request_addr_callback(incoming_channel,
+								   (u64)desc->trans_id);
 		if (cmd_rqst == VMBUS_RQST_ERROR) {
 			netdev_err(ndev, "Invalid transaction id\n");
 			return;
@@ -1649,6 +1649,8 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
 		       netvsc_poll, NAPI_POLL_WEIGHT);
 
 	/* Open the channel */
+	device->channel->next_request_id_callback = vmbus_next_request_id;
+	device->channel->request_addr_callback = vmbus_request_addr;
 	device->channel->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
 	device->channel->max_pkt_size = NETVSC_MAX_PKT_SIZE;
 
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index d7ff9ddcbae28..983bf362466ad 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1259,6 +1259,8 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
 	/* Set the channel before opening.*/
 	nvchan->channel = new_sc;
 
+	new_sc->next_request_id_callback = vmbus_next_request_id;
+	new_sc->request_addr_callback = vmbus_request_addr;
 	new_sc->rqstor_size = netvsc_rqstor_size(netvsc_ring_bytes);
 	new_sc->max_pkt_size = NETVSC_MAX_PKT_SIZE;
 
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 07149fa72b683..4037539293207 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -696,6 +696,23 @@ old_is_alloced:
 	spin_unlock_irqrestore(&stor_device->lock, flags);
 }
 
+static u64 storvsc_next_request_id(struct vmbus_channel *channel, u64 rqst_addr)
+{
+	struct storvsc_cmd_request *request =
+		(struct storvsc_cmd_request *)(unsigned long)rqst_addr;
+
+	if (rqst_addr == VMBUS_RQST_INIT)
+		return VMBUS_RQST_INIT;
+	if (rqst_addr == VMBUS_RQST_RESET)
+		return VMBUS_RQST_RESET;
+
+	/*
+	 * Cannot return an ID of 0, which is reserved for an unsolicited
+	 * message from Hyper-V.
+	 */
+	return (u64)blk_mq_unique_tag(request->cmd->request) + 1;
+}
+
 static void handle_sc_creation(struct vmbus_channel *new_sc)
 {
 	struct hv_device *device = new_sc->primary_channel->device_obj;
@@ -711,11 +728,7 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
 	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
 	new_sc->max_pkt_size = STORVSC_MAX_PKT_SIZE;
 
-	/*
-	 * The size of vmbus_requestor is an upper bound on the number of requests
-	 * that can be in-progress at any one time across all channels.
-	 */
-	new_sc->rqstor_size = scsi_driver.can_queue;
+	new_sc->next_request_id_callback = storvsc_next_request_id;
 
 	ret = vmbus_open(new_sc,
 			 storvsc_ringbuffer_size,
@@ -782,7 +795,7 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
 	ret = vmbus_sendpacket(device->channel, vstor_packet,
 			       (sizeof(struct vstor_packet) -
 			       stor_device->vmscsi_size_delta),
-			       (unsigned long)request,
+			       VMBUS_RQST_INIT,
 			       VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
@@ -851,7 +864,7 @@ static int storvsc_execute_vstor_op(struct hv_device *device,
 	ret = vmbus_sendpacket(device->channel, vstor_packet,
 			       (sizeof(struct vstor_packet) -
 			       stor_device->vmscsi_size_delta),
-			       (unsigned long)request,
+			       VMBUS_RQST_INIT,
 			       VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0)
@@ -1253,6 +1266,7 @@ static void storvsc_on_channel_callback(void *context)
 	const struct vmpacket_descriptor *desc;
 	struct hv_device *device;
 	struct storvsc_device *stor_device;
+	struct Scsi_Host *shost;
 
 	if (channel->primary_channel != NULL)
 		device = channel->primary_channel->device_obj;
@@ -1263,20 +1277,12 @@ static void storvsc_on_channel_callback(void *context)
 	if (!stor_device)
 		return;
 
-	foreach_vmbus_pkt(desc, channel) {
-		void *packet = hv_pkt_data(desc);
-		struct storvsc_cmd_request *request;
-		u64 cmd_rqst;
-
-		cmd_rqst = vmbus_request_addr(&channel->requestor,
-					      desc->trans_id);
-		if (cmd_rqst == VMBUS_RQST_ERROR) {
-			dev_err(&device->device,
-				"Incorrect transaction id\n");
-			continue;
-		}
+	shost = stor_device->host;
 
-		request = (struct storvsc_cmd_request *)(unsigned long)cmd_rqst;
+	foreach_vmbus_pkt(desc, channel) {
+		struct vstor_packet *packet = hv_pkt_data(desc);
+		struct storvsc_cmd_request *request = NULL;
+		u64 rqst_id = desc->trans_id;
 
 		if (hv_pkt_datalen(desc) < sizeof(struct vstor_packet) -
 				stor_device->vmscsi_size_delta) {
@@ -1284,14 +1290,44 @@ static void storvsc_on_channel_callback(void *context)
 			continue;
 		}
 
-		if (request == &stor_device->init_request ||
-		    request == &stor_device->reset_request) {
-			memcpy(&request->vstor_packet, packet,
-			       (sizeof(struct vstor_packet) - stor_device->vmscsi_size_delta));
-			complete(&request->wait_event);
+		if (rqst_id == VMBUS_RQST_INIT) {
+			request = &stor_device->init_request;
+		} else if (rqst_id == VMBUS_RQST_RESET) {
+			request = &stor_device->reset_request;
 		} else {
+			/* Hyper-V can send an unsolicited message with ID of 0 */
+			if (rqst_id == 0) {
+				/*
+				 * storvsc_on_receive() looks at the vstor_packet in the message
+				 * from the ring buffer.  If the operation in the vstor_packet is
+				 * COMPLETE_IO, then we call storvsc_on_io_completion(), and
+				 * dereference the guest memory address.  Make sure we don't call
+				 * storvsc_on_io_completion() with a guest memory address that is
+				 * zero if Hyper-V were to construct and send such a bogus packet.
+				 */
+				if (packet->operation == VSTOR_OPERATION_COMPLETE_IO) {
+					dev_err(&device->device, "Invalid packet with ID of 0\n");
+					continue;
+				}
+			} else {
+				struct scsi_cmnd *scmnd;
+
+				/* Transaction 'rqst_id' corresponds to tag 'rqst_id - 1' */
+				scmnd = scsi_host_find_tag(shost, rqst_id - 1);
+				if (scmnd == NULL) {
+					dev_err(&device->device, "Incorrect transaction ID\n");
+					continue;
+				}
+				request = (struct storvsc_cmd_request *)scsi_cmd_priv(scmnd);
+			}
+
 			storvsc_on_receive(stor_device, packet, request);
+			continue;
 		}
+
+		memcpy(&request->vstor_packet, packet,
+		       (sizeof(struct vstor_packet) - stor_device->vmscsi_size_delta));
+		complete(&request->wait_event);
 	}
 }
 
@@ -1304,11 +1340,7 @@ static int storvsc_connect_to_vsp(struct hv_device *device, u32 ring_size,
 	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
 
 	device->channel->max_pkt_size = STORVSC_MAX_PKT_SIZE;
-	/*
-	 * The size of vmbus_requestor is an upper bound on the number of requests
-	 * that can be in-progress at any one time across all channels.
-	 */
-	device->channel->rqstor_size = scsi_driver.can_queue;
+	device->channel->next_request_id_callback = storvsc_next_request_id;
 
 	ret = vmbus_open(device->channel,
 			 ring_size,
@@ -1634,7 +1666,7 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
 	ret = vmbus_sendpacket(device->channel, vstor_packet,
 			       (sizeof(struct vstor_packet) -
 				stor_device->vmscsi_size_delta),
-			       (unsigned long)&stor_device->reset_request,
+			       VMBUS_RQST_RESET,
 			       VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 	if (ret != 0)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 3932446f215f2..2e859d2f96094 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -794,7 +794,11 @@ struct vmbus_requestor {
 
 #define VMBUS_NO_RQSTOR U64_MAX
 #define VMBUS_RQST_ERROR (U64_MAX - 1)
+/* NetVSC-specific */
 #define VMBUS_RQST_ID_NO_RESPONSE (U64_MAX - 2)
+/* StorVSC-specific */
+#define VMBUS_RQST_INIT (U64_MAX - 2)
+#define VMBUS_RQST_RESET (U64_MAX - 3)
 
 struct vmbus_device {
 	u16  dev_type;
@@ -1024,6 +1028,11 @@ struct vmbus_channel {
 	u32 fuzz_testing_interrupt_delay;
 	u32 fuzz_testing_message_delay;
 
+	/* callback to generate a request ID from a request address */
+	u64 (*next_request_id_callback)(struct vmbus_channel *channel, u64 rqst_addr);
+	/* callback to retrieve a request address from a request ID */
+	u64 (*request_addr_callback)(struct vmbus_channel *channel, u64 rqst_id);
+
 	/* request/transaction ids for VMBus */
 	struct vmbus_requestor requestor;
 	u32 rqstor_size;
@@ -1032,8 +1041,8 @@ struct vmbus_channel {
 	u32 max_pkt_size;
 };
 
-u64 vmbus_next_request_id(struct vmbus_requestor *rqstor, u64 rqst_addr);
-u64 vmbus_request_addr(struct vmbus_requestor *rqstor, u64 trans_id);
+u64 vmbus_next_request_id(struct vmbus_channel *channel, u64 rqst_addr);
+u64 vmbus_request_addr(struct vmbus_channel *channel, u64 trans_id);
 
 static inline bool is_hvsock_channel(const struct vmbus_channel *c)
 {
-- 
GitLab


From df61cd9393845383adc4ea2410f2a91e1d1972b6 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Fri, 23 Apr 2021 11:31:20 +0300
Subject: [PATCH 0751/3804] arm64: dts: ti: k3-am654-base-board: remove ov5640

AM654 EVM boards are not shipped with OV5640 sensor module, it is a
separate purchase. OV5640 module is also just one of the possible
sensors or capture boards you can connect.

However, for some reason, OV5640 has been added to the board dts file,
making it cumbersome to use other sensors.

Remove the OV5640 from the dts file so that it is easy to use other
sensors via DT overlays.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Acked-by: Pratyush Yadav <p.yadav@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Link: https://lore.kernel.org/r/20210423083120.73476-1-tomi.valkeinen@ideasonboard.com
---
 .../arm64/boot/dts/ti/k3-am654-base-board.dts | 31 -------------------
 1 file changed, 31 deletions(-)

diff --git a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
index 9e87fb313a541..eddb2ffb93ca6 100644
--- a/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
+++ b/arch/arm64/boot/dts/ti/k3-am654-base-board.dts
@@ -85,12 +85,6 @@
 			gpios = <&wkup_gpio0 27 GPIO_ACTIVE_LOW>;
 		};
 	};
-
-	clk_ov5640_fixed: clock {
-		compatible = "fixed-clock";
-		#clock-cells = <0>;
-		clock-frequency = <24000000>;
-	};
 };
 
 &wkup_pmx0 {
@@ -287,23 +281,6 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&main_i2c1_pins_default>;
 	clock-frequency = <400000>;
-
-	ov5640: camera@3c {
-		compatible = "ovti,ov5640";
-		reg = <0x3c>;
-
-		clocks = <&clk_ov5640_fixed>;
-		clock-names = "xclk";
-
-		port {
-			csi2_cam0: endpoint {
-				remote-endpoint = <&csi2_phy0>;
-				clock-lanes = <0>;
-				data-lanes = <1 2>;
-			};
-		};
-	};
-
 };
 
 &main_i2c2 {
@@ -496,14 +473,6 @@
 	};
 };
 
-&csi2_0 {
-	csi2_phy0: endpoint {
-		remote-endpoint = <&csi2_cam0>;
-		clock-lanes = <0>;
-		data-lanes = <1 2>;
-	};
-};
-
 &mcu_cpsw {
 	pinctrl-names = "default";
 	pinctrl-0 = <&mcu_cpsw_pins_default &mcu_mdio_pins_default>;
-- 
GitLab


From 52ae30f55a2a40cff549fac95de82f25403bd387 Mon Sep 17 00:00:00 2001
From: Vignesh Raghavendra <vigneshr@ti.com>
Date: Mon, 10 May 2021 23:36:01 +0530
Subject: [PATCH 0752/3804] arm64: dts: ti: j7200-main: Mark Main NAVSS as
 dma-coherent

Traffic through main NAVSS interconnect is coherent wrt ARM caches on
J7200 SoC.  Add missing dma-coherent property to main_navss node.

Also add dma-ranges to be consistent with mcu_navss node
and with AM65/J721e main_navss and mcu_navss nodes.

Fixes: d361ed88455fe ("arm64: dts: ti: Add support for J7200 SoC")
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Link: https://lore.kernel.org/r/20210510180601.19458-1-vigneshr@ti.com
---
 arch/arm64/boot/dts/ti/k3-j7200-main.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
index f86c493a44f1c..a6826f1888ef0 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
@@ -85,6 +85,8 @@
 		#size-cells = <2>;
 		ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>;
 		ti,sci-dev-id = <199>;
+		dma-coherent;
+		dma-ranges;
 
 		main_navss_intr: interrupt-controller1 {
 			compatible = "ti,sci-intr";
-- 
GitLab


From a0812885fa7a1074c8003484b8176ffe28d5df68 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Mon, 10 May 2021 09:50:30 -0500
Subject: [PATCH 0753/3804] arm64: dts: ti: k3-*: Rename the TI-SCI clocks node
 name

We currently use clocks as the node name for the node representing
TI-SCI clock nodes. This is better renamed to being clock-controller
as that is a better representative of the system controller function
as a clock controller for the SoC.

Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Link: https://lore.kernel.org/r/20210510145033.7426-2-nm@ti.com
---
 arch/arm64/boot/dts/ti/k3-am64-main.dtsi        | 2 +-
 arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi      | 2 +-
 arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi | 2 +-
 arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
index b2bcbf23eefda..e1216073e3df2 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
@@ -148,7 +148,7 @@
 			#power-domain-cells = <2>;
 		};
 
-		k3_clks: clocks {
+		k3_clks: clock-controller {
 			compatible = "ti,k2g-sci-clk";
 			#clock-cells = <2>;
 		};
diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
index ed42f13e76634..2ae1f9214b8a7 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
@@ -23,7 +23,7 @@
 			#power-domain-cells = <2>;
 		};
 
-		k3_clks: clocks {
+		k3_clks: clock-controller {
 			compatible = "ti,k2g-sci-clk";
 			#clock-cells = <2>;
 		};
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
index 5e74e43822c3f..9dba2df3569fa 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
@@ -23,7 +23,7 @@
 			#power-domain-cells = <2>;
 		};
 
-		k3_clks: clocks {
+		k3_clks: clock-controller {
 			compatible = "ti,k2g-sci-clk";
 			#clock-cells = <2>;
 		};
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
index d56e3475aee79..b83801feeb10f 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
@@ -23,7 +23,7 @@
 			#power-domain-cells = <2>;
 		};
 
-		k3_clks: clocks {
+		k3_clks: clock-controller {
 			compatible = "ti,k2g-sci-clk";
 			#clock-cells = <2>;
 		};
-- 
GitLab


From 830454bbd628330c3779c3de637b709dae790da0 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Mon, 10 May 2021 09:50:31 -0500
Subject: [PATCH 0754/3804] arm64: dts: ti: k3-am65-wakeup: Add debug region to
 TI-SCI node

Lets add the TISCI debug region to TI-SCI region in line with TI-SCI
documentation[1]. While at it, lets rename the node to indicate the
address usage.

[1] http://downloads.ti.com/tisci/esd/latest/4_trace/trace.html

Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Link: https://lore.kernel.org/r/20210510145033.7426-3-nm@ti.com
---
 arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
index 2ae1f9214b8a7..444842a2d556d 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
@@ -6,7 +6,7 @@
  */
 
 &cbass_wakeup {
-	dmsc: dmsc {
+	dmsc: dmsc@44083000 {
 		compatible = "ti,am654-sci";
 		ti,host-id = <12>;
 		#address-cells = <1>;
@@ -18,6 +18,9 @@
 		mboxes= <&secure_proxy_main 11>,
 			<&secure_proxy_main 13>;
 
+		reg-names = "debug_messages";
+		reg = <0x44083000 0x1000>;
+
 		k3_pds: power-controller {
 			compatible = "ti,sci-pm-domain";
 			#power-domain-cells = <2>;
-- 
GitLab


From 421c06b8761abd7d953148f5b955b4149df9846e Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Mon, 10 May 2021 09:50:32 -0500
Subject: [PATCH 0755/3804] arm64: dts: ti: k3-am65-wakeup: Drop un-necessary
 properties from dmsc node

The DMSC node does'nt require any of "#address-cells", "#size-cells"
or "ranges" property as the child nodes are representations of SoC's
system controller itself, so align it with the bindings.

Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Link: https://lore.kernel.org/r/20210510145033.7426-4-nm@ti.com
---
 arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
index 444842a2d556d..80d4df775f439 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
@@ -9,9 +9,6 @@
 	dmsc: dmsc@44083000 {
 		compatible = "ti,am654-sci";
 		ti,host-id = <12>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		ranges;
 
 		mbox-names = "rx", "tx";
 
-- 
GitLab


From 9d3c9378f96a95f15881ee3373d2c2f773273fc2 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Mon, 10 May 2021 09:50:33 -0500
Subject: [PATCH 0756/3804] arm64: dts: ti: k3-*: Rename the TI-SCI node

Lets rename the node name of TI-SCI node to be system-controller as it
is a better standardized name for the function that TI-SCI plays in the
SoC.

Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Link: https://lore.kernel.org/r/20210510145033.7426-5-nm@ti.com
---
 arch/arm64/boot/dts/ti/k3-am64-main.dtsi        | 2 +-
 arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi      | 2 +-
 arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi | 2 +-
 arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
index e1216073e3df2..dc52178d9b64a 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
@@ -134,7 +134,7 @@
 		};
 	};
 
-	dmsc: dmsc@44043000 {
+	dmsc: system-controller@44043000 {
 		compatible = "ti,k2g-sci";
 		ti,host-id = <12>;
 		mbox-names = "rx", "tx";
diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
index 80d4df775f439..822f4cff1db42 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
@@ -6,7 +6,7 @@
  */
 
 &cbass_wakeup {
-	dmsc: dmsc@44083000 {
+	dmsc: system-controller@44083000 {
 		compatible = "ti,am654-sci";
 		ti,host-id = <12>;
 
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
index 9dba2df3569fa..65f3fabda114c 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
@@ -6,7 +6,7 @@
  */
 
 &cbass_mcu_wakeup {
-	dmsc: dmsc@44083000 {
+	dmsc: system-controller@44083000 {
 		compatible = "ti,k2g-sci";
 		ti,host-id = <12>;
 
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
index b83801feeb10f..94e821467eaae 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
@@ -6,7 +6,7 @@
  */
 
 &cbass_mcu_wakeup {
-	dmsc: dmsc@44083000 {
+	dmsc: system-controller@44083000 {
 		compatible = "ti,k2g-sci";
 		ti,host-id = <12>;
 
-- 
GitLab


From 9ecdb6d6b11434494af4bad11b03f0dcda1eebbd Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Mon, 10 May 2021 09:54:29 -0500
Subject: [PATCH 0757/3804] arm64: dts: ti: k3-am65|j721e|am64: Map the dma /
 navigator subsystem via explicit ranges

Instead of using empty ranges property, lets map explicitly the address
range that is mapped onto the dma / navigator subsystems (navss/dmss).

This is also exposed via the dtbs_check with dt-schema newer than
2021.03 version by throwing out following:
arch/arm64/boot/dts/ti/k3-am654-base-board.dt.yaml: bus@100000: main-navss:
{'type': 'object'} is not allowed for
{'compatible': ['simple-mfd'], '#address-cells': [[2]], .....

This has already been correctly done for J7200, however was missed for
other k3 SoCs. Fix that oversight.

Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Acked-by: Vignesh Raghavendra <vigneshr@ti.com>
Link: https://lore.kernel.org/r/20210510145429.8752-1-nm@ti.com
---
 arch/arm64/boot/dts/ti/k3-am64-main.dtsi        | 4 ++--
 arch/arm64/boot/dts/ti/k3-am65-main.dtsi        | 4 ++--
 arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi         | 4 ++--
 arch/arm64/boot/dts/ti/k3-j721e-main.dtsi       | 4 ++--
 arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
index dc52178d9b64a..d5dc05d4cba63 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
@@ -42,12 +42,12 @@
 		};
 	};
 
-	dmss: dmss {
+	dmss: bus@48000000 {
 		compatible = "simple-mfd";
 		#address-cells = <2>;
 		#size-cells = <2>;
 		dma-ranges;
-		ranges;
+		ranges = <0x00 0x48000000 0x00 0x48000000 0x00 0x06400000>;
 
 		ti,sci-dev-id = <25>;
 
diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
index cb340d1b401f3..e160f22a15186 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
@@ -444,11 +444,11 @@
 		ti,interrupt-ranges = <0 392 32>;
 	};
 
-	main-navss {
+	main_navss: bus@30800000 {
 		compatible = "simple-mfd";
 		#address-cells = <2>;
 		#size-cells = <2>;
-		ranges;
+		ranges = <0x0 0x30800000 0x0 0x30800000 0x0 0xbc00000>;
 		dma-coherent;
 		dma-ranges;
 
diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
index 0388c02c22037..f5b8ef2f5f773 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
@@ -116,11 +116,11 @@
 		};
 	};
 
-	mcu-navss {
+	mcu_navss: bus@28380000 {
 		compatible = "simple-mfd";
 		#address-cells = <2>;
 		#size-cells = <2>;
-		ranges;
+		ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>;
 		dma-coherent;
 		dma-ranges;
 
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
index c2aa45a3ac795..1a7b1cf7f794e 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
@@ -87,11 +87,11 @@
 		ti,interrupt-ranges = <8 392 56>;
 	};
 
-	main-navss {
+	main_navss: bus@30000000 {
 		compatible = "simple-mfd";
 		#address-cells = <2>;
 		#size-cells = <2>;
-		ranges;
+		ranges = <0x00 0x30000000 0x00 0x30000000 0x00 0x0c400000>;
 		dma-coherent;
 		dma-ranges;
 
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
index 94e821467eaae..c85f98b819876 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
@@ -249,11 +249,11 @@
 		};
 	};
 
-	mcu-navss {
+	mcu_navss: bus@28380000 {
 		compatible = "simple-mfd";
 		#address-cells = <2>;
 		#size-cells = <2>;
-		ranges;
+		ranges = <0x00 0x28380000 0x00 0x28380000 0x00 0x03880000>;
 		dma-coherent;
 		dma-ranges;
 
-- 
GitLab


From cab12badfc99f93c1dccf192dd150f94b687a27c Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 11 May 2021 14:48:21 -0500
Subject: [PATCH 0758/3804] arm64: dts: ti: k3*: Introduce reg definition for
 interrupt routers

Interrupt routers are memory mapped peripherals, that are organized
in our dts bus hierarchy to closely represents the actual hardware
behavior.

However, without explicitly calling out the reg property, using
2021.03+ dt-schema package, this exposes the following problem with
dtbs_check:

/arch/arm64/boot/dts/ti/k3-am654-base-board.dt.yaml: bus@100000:
interrupt-controller0: {'type': 'object'} is not allowed for
{'compatible': ['ti,sci-intr'], .....

Even though we don't use interrupt router directly via memory mapped
registers and have to use it via the system controller, the hardware
block is memory mapped, so describe the base address in device tree.

This is a valid, comprehensive description of hardware and permitted
by the existing ti,sci-intr schema.

Reviewed-by: Tero Kristo <kristo@kernel.org>
Reviewed-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Link: https://lore.kernel.org/r/20210511194821.13919-1-nm@ti.com
---
 arch/arm64/boot/dts/ti/k3-am64-main.dtsi        | 3 ++-
 arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi         | 3 ++-
 arch/arm64/boot/dts/ti/k3-am65-main.dtsi        | 6 ++++--
 arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi      | 3 ++-
 arch/arm64/boot/dts/ti/k3-j7200-main.dtsi       | 6 ++++--
 arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi | 3 ++-
 arch/arm64/boot/dts/ti/k3-j721e-main.dtsi       | 6 ++++--
 arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 3 ++-
 8 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
index d5dc05d4cba63..ca59d1f711f8a 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-main.dtsi
@@ -373,8 +373,9 @@
 		clocks = <&k3_clks 145 0>;
 	};
 
-	main_gpio_intr: interrupt-controller0 {
+	main_gpio_intr: interrupt-controller@a00000 {
 		compatible = "ti,sci-intr";
+		reg = <0x00 0x00a00000 0x00 0x800>;
 		ti,intr-trigger-type = <1>;
 		interrupt-controller;
 		interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
index 99e94dee1bd45..deb19ae5e168a 100644
--- a/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am64-mcu.dtsi
@@ -74,8 +74,9 @@
 		clocks = <&k3_clks 148 0>;
 	};
 
-	mcu_gpio_intr: interrupt-controller1 {
+	mcu_gpio_intr: interrupt-controller@4210000 {
 		compatible = "ti,sci-intr";
+		reg = <0x00 0x04210000 0x00 0x200>;
 		ti,intr-trigger-type = <1>;
 		interrupt-controller;
 		interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
index e160f22a15186..6cd3131eb9ff9 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
@@ -433,8 +433,9 @@
 		#phy-cells = <0>;
 	};
 
-	intr_main_gpio: interrupt-controller0 {
+	intr_main_gpio: interrupt-controller@a00000 {
 		compatible = "ti,sci-intr";
+		reg = <0x0 0x00a00000 0x0 0x400>;
 		ti,intr-trigger-type = <1>;
 		interrupt-controller;
 		interrupt-parent = <&gic500>;
@@ -454,8 +455,9 @@
 
 		ti,sci-dev-id = <118>;
 
-		intr_main_navss: interrupt-controller1 {
+		intr_main_navss: interrupt-controller@310e0000 {
 			compatible = "ti,sci-intr";
+			reg = <0x0 0x310e0000 0x0 0x2000>;
 			ti,intr-trigger-type = <4>;
 			interrupt-controller;
 			interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
index 822f4cff1db42..7cb864b4d74a8 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
@@ -69,8 +69,9 @@
 		power-domains = <&k3_pds 115 TI_SCI_PD_EXCLUSIVE>;
 	};
 
-	intr_wkup_gpio: interrupt-controller2 {
+	intr_wkup_gpio: interrupt-controller@42200000 {
 		compatible = "ti,sci-intr";
+		reg = <0x42200000 0x200>;
 		ti,intr-trigger-type = <1>;
 		interrupt-controller;
 		interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
index a6826f1888ef0..19fea8adbcff4 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi
@@ -68,8 +68,9 @@
 		};
 	};
 
-	main_gpio_intr: interrupt-controller0 {
+	main_gpio_intr: interrupt-controller@a00000 {
 		compatible = "ti,sci-intr";
+		reg = <0x00 0x00a00000 0x00 0x800>;
 		ti,intr-trigger-type = <1>;
 		interrupt-controller;
 		interrupt-parent = <&gic500>;
@@ -88,8 +89,9 @@
 		dma-coherent;
 		dma-ranges;
 
-		main_navss_intr: interrupt-controller1 {
+		main_navss_intr: interrupt-controller@310e0000 {
 			compatible = "ti,sci-intr";
+			reg = <0x00 0x310e0000 0x00 0x4000>;
 			ti,intr-trigger-type = <4>;
 			interrupt-controller;
 			interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
index 65f3fabda114c..5663fe3ea4660 100644
--- a/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j7200-mcu-wakeup.dtsi
@@ -96,8 +96,9 @@
 		clock-names = "fclk";
 	};
 
-	wkup_gpio_intr: interrupt-controller2 {
+	wkup_gpio_intr: interrupt-controller@42200000 {
 		compatible = "ti,sci-intr";
+		reg = <0x00 0x42200000 0x00 0x400>;
 		ti,intr-trigger-type = <1>;
 		interrupt-controller;
 		interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
index 1a7b1cf7f794e..3bcafe4c1742e 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
@@ -76,8 +76,9 @@
 		};
 	};
 
-	main_gpio_intr: interrupt-controller0 {
+	main_gpio_intr: interrupt-controller@a00000 {
 		compatible = "ti,sci-intr";
+		reg = <0x00 0x00a00000 0x00 0x800>;
 		ti,intr-trigger-type = <1>;
 		interrupt-controller;
 		interrupt-parent = <&gic500>;
@@ -97,8 +98,9 @@
 
 		ti,sci-dev-id = <199>;
 
-		main_navss_intr: interrupt-controller1 {
+		main_navss_intr: interrupt-controller@310e0000 {
 			compatible = "ti,sci-intr";
+			reg = <0x0 0x310e0000 0x0 0x4000>;
 			ti,intr-trigger-type = <4>;
 			interrupt-controller;
 			interrupt-parent = <&gic500>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
index c85f98b819876..5e825e4d0306d 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
@@ -96,8 +96,9 @@
 		clock-names = "fclk";
 	};
 
-	wkup_gpio_intr: interrupt-controller2 {
+	wkup_gpio_intr: interrupt-controller@42200000 {
 		compatible = "ti,sci-intr";
+		reg = <0x00 0x42200000 0x00 0x400>;
 		ti,intr-trigger-type = <1>;
 		interrupt-controller;
 		interrupt-parent = <&gic500>;
-- 
GitLab


From 75016891357a628d2b8acc09e2b9b2576c18d318 Mon Sep 17 00:00:00 2001
From: Hoang Le <hoang.h.le@dektech.com.au>
Date: Fri, 14 May 2021 08:23:03 +0700
Subject: [PATCH 0759/3804] Revert "net:tipc: Fix a double free in
 tipc_sk_mcast_rcv"

This reverts commit 6bf24dc0cc0cc43b29ba344b66d78590e687e046.
Above fix is not correct and caused memory leak issue.

Fixes: 6bf24dc0cc0c ("net:tipc: Fix a double free in tipc_sk_mcast_rcv")
Acked-by: Jon Maloy <jmaloy@redhat.com>
Acked-by: Tung Nguyen <tung.q.nguyen@dektech.com.au>
Signed-off-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 58935cd0d068a..53af72824c9ce 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1262,7 +1262,10 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 		spin_lock_bh(&inputq->lock);
 		if (skb_peek(arrvq) == skb) {
 			skb_queue_splice_tail_init(&tmpq, inputq);
-			__skb_dequeue(arrvq);
+			/* Decrease the skb's refcnt as increasing in the
+			 * function tipc_skb_peek
+			 */
+			kfree_skb(__skb_dequeue(arrvq));
 		}
 		spin_unlock_bh(&inputq->lock);
 		__skb_queue_purge(&tmpq);
-- 
GitLab


From 974271e5ed45cfe4daddbeb16224a2156918530e Mon Sep 17 00:00:00 2001
From: Jim Ma <majinjing3@gmail.com>
Date: Fri, 14 May 2021 11:11:02 +0800
Subject: [PATCH 0760/3804] tls splice: check SPLICE_F_NONBLOCK instead of
 MSG_DONTWAIT

In tls_sw_splice_read, checkout MSG_* is inappropriate, should use
SPLICE_*, update tls_wait_data to accept nonblock arguments instead
of flags for recvmsg and splice.

Fixes: c46234ebb4d1 ("tls: RX path for ktls")
Signed-off-by: Jim Ma <majinjing3@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1dcb34dfd56b3..694de024d0ee6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -37,6 +37,7 @@
 
 #include <linux/sched/signal.h>
 #include <linux/module.h>
+#include <linux/splice.h>
 #include <crypto/aead.h>
 
 #include <net/strparser.h>
@@ -1281,7 +1282,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 }
 
 static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
-				     int flags, long timeo, int *err)
+				     bool nonblock, long timeo, int *err)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1306,7 +1307,7 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
 		if (sock_flag(sk, SOCK_DONE))
 			return NULL;
 
-		if ((flags & MSG_DONTWAIT) || !timeo) {
+		if (nonblock || !timeo) {
 			*err = -EAGAIN;
 			return NULL;
 		}
@@ -1786,7 +1787,7 @@ int tls_sw_recvmsg(struct sock *sk,
 		bool async_capable;
 		bool async = false;
 
-		skb = tls_wait_data(sk, psock, flags, timeo, &err);
+		skb = tls_wait_data(sk, psock, flags & MSG_DONTWAIT, timeo, &err);
 		if (!skb) {
 			if (psock) {
 				int ret = sk_msg_recvmsg(sk, psock, msg, len,
@@ -1990,9 +1991,9 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 
 	lock_sock(sk);
 
-	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
 
-	skb = tls_wait_data(sk, NULL, flags, timeo, &err);
+	skb = tls_wait_data(sk, NULL, flags & SPLICE_F_NONBLOCK, timeo, &err);
 	if (!skb)
 		goto splice_read_end;
 
-- 
GitLab


From a90c57f2cedd52a511f739fb55e6244e22e1a2fb Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 14 May 2021 11:16:59 +0800
Subject: [PATCH 0761/3804] net: sched: fix packet stuck problem for lockless
 qdisc

Lockless qdisc has below concurrent problem:
    cpu0                 cpu1
     .                     .
q->enqueue                 .
     .                     .
qdisc_run_begin()          .
     .                     .
dequeue_skb()              .
     .                     .
sch_direct_xmit()          .
     .                     .
     .                q->enqueue
     .             qdisc_run_begin()
     .            return and do nothing
     .                     .
qdisc_run_end()            .

cpu1 enqueue a skb without calling __qdisc_run() because cpu0
has not released the lock yet and spin_trylock() return false
for cpu1 in qdisc_run_begin(), and cpu0 do not see the skb
enqueued by cpu1 when calling dequeue_skb() because cpu1 may
enqueue the skb after cpu0 calling dequeue_skb() and before
cpu0 calling qdisc_run_end().

Lockless qdisc has below another concurrent problem when
tx_action is involved:

cpu0(serving tx_action)     cpu1             cpu2
          .                   .                .
          .              q->enqueue            .
          .            qdisc_run_begin()       .
          .              dequeue_skb()         .
          .                   .            q->enqueue
          .                   .                .
          .             sch_direct_xmit()      .
          .                   .         qdisc_run_begin()
          .                   .       return and do nothing
          .                   .                .
 clear __QDISC_STATE_SCHED    .                .
 qdisc_run_begin()            .                .
 return and do nothing        .                .
          .                   .                .
          .            qdisc_run_end()         .

This patch fixes the above data race by:
1. If the first spin_trylock() return false and STATE_MISSED is
   not set, set STATE_MISSED and retry another spin_trylock() in
   case other CPU may not see STATE_MISSED after it releases the
   lock.
2. reschedule if STATE_MISSED is set after the lock is released
   at the end of qdisc_run_end().

For tx_action case, STATE_MISSED is also set when cpu1 is at the
end if qdisc_run_end(), so tx_action will be rescheduled again
to dequeue the skb enqueued by cpu2.

Clear STATE_MISSED before retrying a dequeuing when dequeuing
returns NULL in order to reduce the overhead of the second
spin_trylock() and __netif_schedule() calling.

Also clear the STATE_MISSED before calling __netif_schedule()
at the end of qdisc_run_end() to avoid doing another round of
dequeuing in the pfifo_fast_dequeue().

The performance impact of this patch, tested using pktgen and
dummy netdev with pfifo_fast qdisc attached:

 threads  without+this_patch   with+this_patch      delta
    1        2.61Mpps            2.60Mpps           -0.3%
    2        3.97Mpps            3.82Mpps           -3.7%
    4        5.62Mpps            5.59Mpps           -0.5%
    8        2.78Mpps            2.77Mpps           -0.3%
   16        2.22Mpps            2.22Mpps           -0.0%

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Acked-by: Jakub Kicinski <kuba@kernel.org>
Tested-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 35 ++++++++++++++++++++++++++++++++++-
 net/sched/sch_generic.c   | 19 +++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f7a6e14491fb6..1e625519ae968 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -36,6 +36,7 @@ struct qdisc_rate_table {
 enum qdisc_state_t {
 	__QDISC_STATE_SCHED,
 	__QDISC_STATE_DEACTIVATED,
+	__QDISC_STATE_MISSED,
 };
 
 struct qdisc_size_table {
@@ -159,8 +160,33 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK) {
+		if (spin_trylock(&qdisc->seqlock))
+			goto nolock_empty;
+
+		/* If the MISSED flag is set, it means other thread has
+		 * set the MISSED flag before second spin_trylock(), so
+		 * we can return false here to avoid multi cpus doing
+		 * the set_bit() and second spin_trylock() concurrently.
+		 */
+		if (test_bit(__QDISC_STATE_MISSED, &qdisc->state))
+			return false;
+
+		/* Set the MISSED flag before the second spin_trylock(),
+		 * if the second spin_trylock() return false, it means
+		 * other cpu holding the lock will do dequeuing for us
+		 * or it will see the MISSED flag set after releasing
+		 * lock and reschedule the net_tx_action() to do the
+		 * dequeuing.
+		 */
+		set_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+		/* Retry again in case other CPU may not see the new flag
+		 * after it releases the lock at the end of qdisc_run_end().
+		 */
 		if (!spin_trylock(&qdisc->seqlock))
 			return false;
+
+nolock_empty:
 		WRITE_ONCE(qdisc->empty, false);
 	} else if (qdisc_is_running(qdisc)) {
 		return false;
@@ -176,8 +202,15 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 static inline void qdisc_run_end(struct Qdisc *qdisc)
 {
 	write_seqcount_end(&qdisc->running);
-	if (qdisc->flags & TCQ_F_NOLOCK)
+	if (qdisc->flags & TCQ_F_NOLOCK) {
 		spin_unlock(&qdisc->seqlock);
+
+		if (unlikely(test_bit(__QDISC_STATE_MISSED,
+				      &qdisc->state))) {
+			clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+			__netif_schedule(qdisc);
+		}
+	}
 }
 
 static inline bool qdisc_may_bulk(const struct Qdisc *qdisc)
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 44991ea726fc7..795d986e70308 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -640,8 +640,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 {
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 	struct sk_buff *skb = NULL;
+	bool need_retry = true;
 	int band;
 
+retry:
 	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
 		struct skb_array *q = band2list(priv, band);
 
@@ -652,6 +654,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 	}
 	if (likely(skb)) {
 		qdisc_update_stats_at_dequeue(qdisc, skb);
+	} else if (need_retry &&
+		   test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
+		/* Delay clearing the STATE_MISSED here to reduce
+		 * the overhead of the second spin_trylock() in
+		 * qdisc_run_begin() and __netif_schedule() calling
+		 * in qdisc_run_end().
+		 */
+		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+
+		/* Make sure dequeuing happens after clearing
+		 * STATE_MISSED.
+		 */
+		smp_mb__after_atomic();
+
+		need_retry = false;
+
+		goto retry;
 	} else {
 		WRITE_ONCE(qdisc->empty, true);
 	}
-- 
GitLab


From 102b55ee92f9fda4dde7a45d2b20538e6e3e3d1e Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 14 May 2021 11:17:00 +0800
Subject: [PATCH 0762/3804] net: sched: fix tx action rescheduling issue during
 deactivation

Currently qdisc_run() checks the STATE_DEACTIVATED of lockless
qdisc before calling __qdisc_run(), which ultimately clear the
STATE_MISSED when all the skb is dequeued. If STATE_DEACTIVATED
is set before clearing STATE_MISSED, there may be rescheduling
of net_tx_action() at the end of qdisc_run_end(), see below:

CPU0(net_tx_atcion)  CPU1(__dev_xmit_skb)  CPU2(dev_deactivate)
          .                   .                     .
          .            set STATE_MISSED             .
          .           __netif_schedule()            .
          .                   .           set STATE_DEACTIVATED
          .                   .                qdisc_reset()
          .                   .                     .
          .<---------------   .              synchronize_net()
clear __QDISC_STATE_SCHED  |  .                     .
          .                |  .                     .
          .                |  .            some_qdisc_is_busy()
          .                |  .               return *false*
          .                |  .                     .
  test STATE_DEACTIVATED   |  .                     .
__qdisc_run() *not* called |  .                     .
          .                |  .                     .
   test STATE_MISS         |  .                     .
 __netif_schedule()--------|  .                     .
          .                   .                     .
          .                   .                     .

__qdisc_run() is not called by net_tx_atcion() in CPU0 because
CPU2 has set STATE_DEACTIVATED flag during dev_deactivate(), and
STATE_MISSED is only cleared in __qdisc_run(), __netif_schedule
is called at the end of qdisc_run_end(), causing tx action
rescheduling problem.

qdisc_run() called by net_tx_action() runs in the softirq context,
which should has the same semantic as the qdisc_run() called by
__dev_xmit_skb() protected by rcu_read_lock_bh(). And there is a
synchronize_net() between STATE_DEACTIVATED flag being set and
qdisc_reset()/some_qdisc_is_busy in dev_deactivate(), we can safely
bail out for the deactived lockless qdisc in net_tx_action(), and
qdisc_reset() will reset all skb not dequeued yet.

So add the rcu_read_lock() explicitly to protect the qdisc_run()
and do the STATE_DEACTIVATED checking in net_tx_action() before
calling qdisc_run_begin(). Another option is to do the checking in
the qdisc_run_end(), but it will add unnecessary overhead for
non-tx_action case, because __dev_queue_xmit() will not see qdisc
with STATE_DEACTIVATED after synchronize_net(), the qdisc with
STATE_DEACTIVATED can only be seen by net_tx_action() because of
__netif_schedule().

The STATE_DEACTIVATED checking in qdisc_run() is to avoid race
between net_tx_action() and qdisc_reset(), see:
commit d518d2ed8640 ("net/sched: fix race between deactivation
and dequeue for NOLOCK qdisc"). As the bailout added above for
deactived lockless qdisc in net_tx_action() provides better
protection for the race without calling qdisc_run() at all, so
remove the STATE_DEACTIVATED checking in qdisc_run().

After qdisc_reset(), there is no skb in qdisc to be dequeued, so
clear the STATE_MISSED in dev_reset_queue() too.

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
V8: Clearing STATE_MISSED before calling __netif_schedule() has
    avoid the endless rescheduling problem, but there may still
    be a unnecessary rescheduling, so adjust the commit log.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  7 +------
 net/core/dev.c          | 26 ++++++++++++++++++++++----
 net/sched/sch_generic.c |  4 +++-
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index f5c1bee0cd6ac..6d7b12cba0158 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -128,12 +128,7 @@ void __qdisc_run(struct Qdisc *q);
 static inline void qdisc_run(struct Qdisc *q)
 {
 	if (qdisc_run_begin(q)) {
-		/* NOLOCK qdisc must check 'state' under the qdisc seqlock
-		 * to avoid racing with dev_qdisc_reset()
-		 */
-		if (!(q->flags & TCQ_F_NOLOCK) ||
-		    likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
-			__qdisc_run(q);
+		__qdisc_run(q);
 		qdisc_run_end(q);
 	}
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 222b1d322c969..d596cd7463534 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5025,25 +5025,43 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
 		sd->output_queue_tailp = &sd->output_queue;
 		local_irq_enable();
 
+		rcu_read_lock();
+
 		while (head) {
 			struct Qdisc *q = head;
 			spinlock_t *root_lock = NULL;
 
 			head = head->next_sched;
 
-			if (!(q->flags & TCQ_F_NOLOCK)) {
-				root_lock = qdisc_lock(q);
-				spin_lock(root_lock);
-			}
 			/* We need to make sure head->next_sched is read
 			 * before clearing __QDISC_STATE_SCHED
 			 */
 			smp_mb__before_atomic();
+
+			if (!(q->flags & TCQ_F_NOLOCK)) {
+				root_lock = qdisc_lock(q);
+				spin_lock(root_lock);
+			} else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
+						     &q->state))) {
+				/* There is a synchronize_net() between
+				 * STATE_DEACTIVATED flag being set and
+				 * qdisc_reset()/some_qdisc_is_busy() in
+				 * dev_deactivate(), so we can safely bail out
+				 * early here to avoid data race between
+				 * qdisc_deactivate() and some_qdisc_is_busy()
+				 * for lockless qdisc.
+				 */
+				clear_bit(__QDISC_STATE_SCHED, &q->state);
+				continue;
+			}
+
 			clear_bit(__QDISC_STATE_SCHED, &q->state);
 			qdisc_run(q);
 			if (root_lock)
 				spin_unlock(root_lock);
 		}
+
+		rcu_read_unlock();
 	}
 
 	xfrm_dev_backlog(sd);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 795d986e70308..d86c4cca2cab9 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1177,8 +1177,10 @@ static void dev_reset_queue(struct net_device *dev,
 	qdisc_reset(qdisc);
 
 	spin_unlock_bh(qdisc_lock(qdisc));
-	if (nolock)
+	if (nolock) {
+		clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
 		spin_unlock_bh(&qdisc->seqlock);
+	}
 }
 
 static bool some_qdisc_is_busy(struct net_device *dev)
-- 
GitLab


From dcad9ee9e0663d74a89b25b987f9c7be86432812 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 14 May 2021 11:17:01 +0800
Subject: [PATCH 0763/3804] net: sched: fix tx action reschedule issue with
 stopped queue

The netdev qeueue might be stopped when byte queue limit has
reached or tx hw ring is full, net_tx_action() may still be
rescheduled if STATE_MISSED is set, which consumes unnecessary
cpu without dequeuing and transmiting any skb because the
netdev queue is stopped, see qdisc_run_end().

This patch fixes it by checking the netdev queue state before
calling qdisc_run() and clearing STATE_MISSED if netdev queue is
stopped during qdisc_run(), the net_tx_action() is rescheduled
again when netdev qeueue is restarted, see netif_tx_wake_queue().

As there is time window between netif_xmit_frozen_or_stopped()
checking and STATE_MISSED clearing, between which STATE_MISSED
may set by net_tx_action() scheduled by netif_tx_wake_queue(),
so set the STATE_MISSED again if netdev queue is restarted.

Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking")
Reported-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c          |  3 ++-
 net/sched/sch_generic.c | 27 ++++++++++++++++++++++++++-
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index d596cd7463534..ef8cf7619bafa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3853,7 +3853,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 	if (q->flags & TCQ_F_NOLOCK) {
 		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
-		qdisc_run(q);
+		if (likely(!netif_xmit_frozen_or_stopped(txq)))
+			qdisc_run(q);
 
 		if (unlikely(to_free))
 			kfree_skb_list(to_free);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d86c4cca2cab9..fc8b56bcabf39 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -35,6 +35,25 @@
 const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
 EXPORT_SYMBOL(default_qdisc_ops);
 
+static void qdisc_maybe_clear_missed(struct Qdisc *q,
+				     const struct netdev_queue *txq)
+{
+	clear_bit(__QDISC_STATE_MISSED, &q->state);
+
+	/* Make sure the below netif_xmit_frozen_or_stopped()
+	 * checking happens after clearing STATE_MISSED.
+	 */
+	smp_mb__after_atomic();
+
+	/* Checking netif_xmit_frozen_or_stopped() again to
+	 * make sure STATE_MISSED is set if the STATE_MISSED
+	 * set by netif_tx_wake_queue()'s rescheduling of
+	 * net_tx_action() is cleared by the above clear_bit().
+	 */
+	if (!netif_xmit_frozen_or_stopped(txq))
+		set_bit(__QDISC_STATE_MISSED, &q->state);
+}
+
 /* Main transmission queue. */
 
 /* Modifications to data participating in scheduling must be protected with
@@ -74,6 +93,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
 			}
 		} else {
 			skb = SKB_XOFF_MAGIC;
+			qdisc_maybe_clear_missed(q, txq);
 		}
 	}
 
@@ -242,6 +262,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 			}
 		} else {
 			skb = NULL;
+			qdisc_maybe_clear_missed(q, txq);
 		}
 		if (lock)
 			spin_unlock(lock);
@@ -251,8 +272,10 @@ validate:
 	*validate = true;
 
 	if ((q->flags & TCQ_F_ONETXQUEUE) &&
-	    netif_xmit_frozen_or_stopped(txq))
+	    netif_xmit_frozen_or_stopped(txq)) {
+		qdisc_maybe_clear_missed(q, txq);
 		return skb;
+	}
 
 	skb = qdisc_dequeue_skb_bad_txq(q);
 	if (unlikely(skb)) {
@@ -311,6 +334,8 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		HARD_TX_LOCK(dev, txq, smp_processor_id());
 		if (!netif_xmit_frozen_or_stopped(txq))
 			skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+		else
+			qdisc_maybe_clear_missed(q, txq);
 
 		HARD_TX_UNLOCK(dev, txq);
 	} else {
-- 
GitLab


From b81ac7841d511d68989534eff5550269e1bf896d Mon Sep 17 00:00:00 2001
From: Jonathan Davies <jonathan.davies@nutanix.com>
Date: Fri, 14 May 2021 14:41:01 +0000
Subject: [PATCH 0764/3804] net: cdc_eem: fix URL to CDC EEM 1.0 spec

The old URL is no longer accessible.

Signed-off-by: Jonathan Davies <jonathan.davies@nutanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_eem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
index 0eeec80bec311..2e60bc1b9a6b0 100644
--- a/drivers/net/usb/cdc_eem.c
+++ b/drivers/net/usb/cdc_eem.c
@@ -26,7 +26,7 @@
  * for transport over USB using a simpler USB device model than the
  * previous CDC "Ethernet Control Model" (ECM, or "CDC Ethernet").
  *
- * For details, see www.usb.org/developers/devclass_docs/CDC_EEM10.pdf
+ * For details, see https://usb.org/sites/default/files/CDC_EEM10.pdf
  *
  * This version has been tested with GIGAntIC WuaoW SIM Smart Card on 2.6.24,
  * 2.6.27 and 2.6.30rc2 kernel.
-- 
GitLab


From c625b80b9d00f3546722cd77527f9697c8c4c911 Mon Sep 17 00:00:00 2001
From: Peter Wang <peter.wang@mediatek.com>
Date: Wed, 12 May 2021 18:01:45 +0800
Subject: [PATCH 0765/3804] scsi: ufs: ufs-mediatek: Fix power down spec
 violation

As per spec, e.g. JESD220E chapter 7.2, while powering off the UFS device,
RST_N signal should be between VSS(Ground) and VCCQ/VCCQ2. The power down
sequence after fixing:

Power down:

 1. Assert RST_N low

 2. Turn-off VCC

 3. Turn-off VCCQ/VCCQ2

Link: https://lore.kernel.org/r/1620813706-25331-1-git-send-email-peter.wang@mediatek.com
Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Peter Wang <peter.wang@mediatek.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs-mediatek.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c
index a981f261b3043..aee3cfc7142a4 100644
--- a/drivers/scsi/ufs/ufs-mediatek.c
+++ b/drivers/scsi/ufs/ufs-mediatek.c
@@ -922,6 +922,7 @@ static void ufs_mtk_vreg_set_lpm(struct ufs_hba *hba, bool lpm)
 static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 {
 	int err;
+	struct arm_smccc_res res;
 
 	if (ufshcd_is_link_hibern8(hba)) {
 		err = ufs_mtk_link_set_lpm(hba);
@@ -941,6 +942,9 @@ static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 			goto fail;
 	}
 
+	if (ufshcd_is_link_off(hba))
+		ufs_mtk_device_reset_ctrl(0, res);
+
 	return 0;
 fail:
 	/*
-- 
GitLab


From 56f396146af278135c0ff958c79b5ee1bd22453d Mon Sep 17 00:00:00 2001
From: Matt Wang <wwentao@vmware.com>
Date: Tue, 11 May 2021 03:04:37 +0000
Subject: [PATCH 0766/3804] scsi: BusLogic: Fix 64-bit system enumeration error
 for Buslogic

Commit 391e2f25601e ("[SCSI] BusLogic: Port driver to 64-bit")
introduced a serious issue for 64-bit systems.  With this commit,
64-bit kernel will enumerate 8*15 non-existing disks.  This is caused
by the broken CCB structure.  The change from u32 data to void *data
increased CCB length on 64-bit system, which introduced an extra 4
byte offset of the CDB.  This leads to incorrect response to INQUIRY
commands during enumeration.

Fix disk enumeration failure by reverting the portion of the commit
above which switched the data pointer from u32 to void.

Link: https://lore.kernel.org/r/C325637F-1166-4340-8F0F-3BCCD59D4D54@vmware.com
Acked-by: Khalid Aziz <khalid@gonehiking.org>
Signed-off-by: Matt Wang <wwentao@vmware.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/BusLogic.c | 6 +++---
 drivers/scsi/BusLogic.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 3ee46a843cb5f..adddcd5899416 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -2926,11 +2926,11 @@ static int blogic_qcmd_lck(struct scsi_cmnd *command,
 		ccb->opcode = BLOGIC_INITIATOR_CCB_SG;
 		ccb->datalen = count * sizeof(struct blogic_sg_seg);
 		if (blogic_multimaster_type(adapter))
-			ccb->data = (void *)((unsigned int) ccb->dma_handle +
+			ccb->data = (unsigned int) ccb->dma_handle +
 					((unsigned long) &ccb->sglist -
-					(unsigned long) ccb));
+					(unsigned long) ccb);
 		else
-			ccb->data = ccb->sglist;
+			ccb->data = virt_to_32bit_virt(ccb->sglist);
 
 		scsi_for_each_sg(command, sg, count, i) {
 			ccb->sglist[i].segbytes = sg_dma_len(sg);
diff --git a/drivers/scsi/BusLogic.h b/drivers/scsi/BusLogic.h
index a8e4a19788a77..7d1ec10f24305 100644
--- a/drivers/scsi/BusLogic.h
+++ b/drivers/scsi/BusLogic.h
@@ -806,7 +806,7 @@ struct blogic_ccb {
 	unsigned char cdblen;				/* Byte 2 */
 	unsigned char sense_datalen;			/* Byte 3 */
 	u32 datalen;					/* Bytes 4-7 */
-	void *data;					/* Bytes 8-11 */
+	u32 data;					/* Bytes 8-11 */
 	unsigned char:8;				/* Byte 12 */
 	unsigned char:8;				/* Byte 13 */
 	enum blogic_adapter_status adapter_status;	/* Byte 14 */
-- 
GitLab


From d0b2b70eb12e9ffaf95e11b16b230a4e015a536c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 13 May 2021 09:49:12 -0700
Subject: [PATCH 0767/3804] scsi: ufs: core: Increase the usable queue depth

With the current implementation of the UFS driver active_queues is 1
instead of 0 if all UFS request queues are idle. That causes
hctx_may_queue() to divide the queue depth by 2 when queueing a request and
hence reduces the usable queue depth.

The shared tag set code in the block layer keeps track of the number of
active request queues. blk_mq_tag_busy() is called before a request is
queued onto a hwq and blk_mq_tag_idle() is called some time after the hwq
became idle. blk_mq_tag_idle() is called from inside blk_mq_timeout_work().
Hence, blk_mq_tag_idle() is only called if a timer is associated with each
request that is submitted to a request queue that shares a tag set with
another request queue.

Adds a blk_mq_start_request() call in ufshcd_exec_dev_cmd(). This doubles
the queue depth on my test setup from 16 to 32.

In addition to increasing the usable queue depth, also fix the
documentation of the 'timeout' parameter in the header above
ufshcd_exec_dev_cmd().

Link: https://lore.kernel.org/r/20210513164912.5683-1-bvanassche@acm.org
Fixes: 7252a3603015 ("scsi: ufs: Avoid busy-waiting by eliminating tag conflicts")
Cc: Can Guo <cang@codeaurora.org>
Cc: Alim Akhtar <alim.akhtar@samsung.com>
Cc: Avri Altman <avri.altman@wdc.com>
Cc: Stanley Chu <stanley.chu@mediatek.com>
Cc: Bean Huo <beanhuo@micron.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Can Guo <cang@codeaurora.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 3eb54937f1d8a..72fd41bfbd54b 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2842,7 +2842,7 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
  * ufshcd_exec_dev_cmd - API for sending device management requests
  * @hba: UFS hba
  * @cmd_type: specifies the type (NOP, Query...)
- * @timeout: time in seconds
+ * @timeout: timeout in milliseconds
  *
  * NOTE: Since there is only one available tag for device management commands,
  * it is expected you hold the hba->dev_cmd.lock mutex.
@@ -2872,6 +2872,9 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	}
 	tag = req->tag;
 	WARN_ON_ONCE(!ufshcd_valid_tag(hba, tag));
+	/* Set the timeout such that the SCSI error handler is not activated. */
+	req->timeout = msecs_to_jiffies(2 * timeout);
+	blk_mq_start_request(req);
 
 	init_completion(&wait);
 	lrbp = &hba->lrb[tag];
-- 
GitLab


From 22247efd822e6d263f3c8bd327f3f769aea9b1d9 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 14 May 2021 17:27:04 -0700
Subject: [PATCH 0768/3804] mm/hugetlb: fix F_SEAL_FUTURE_WRITE

Patch series "mm/hugetlb: Fix issues on file sealing and fork", v2.

Hugh reported issue with F_SEAL_FUTURE_WRITE not applied correctly to
hugetlbfs, which I can easily verify using the memfd_test program, which
seems that the program is hardly run with hugetlbfs pages (as by default
shmem).

Meanwhile I found another probably even more severe issue on that hugetlb
fork won't wr-protect child cow pages, so child can potentially write to
parent private pages.  Patch 2 addresses that.

After this series applied, "memfd_test hugetlbfs" should start to pass.

This patch (of 2):

F_SEAL_FUTURE_WRITE is missing for hugetlb starting from the first day.
There is a test program for that and it fails constantly.

$ ./memfd_test hugetlbfs
memfd-hugetlb: CREATE
memfd-hugetlb: BASIC
memfd-hugetlb: SEAL-WRITE
memfd-hugetlb: SEAL-FUTURE-WRITE
mmap() didn't fail as expected
Aborted (core dumped)

I think it's probably because no one is really running the hugetlbfs test.

Fix it by checking FUTURE_WRITE also in hugetlbfs_file_mmap() as what we
do in shmem_mmap().  Generalize a helper for that.

Link: https://lkml.kernel.org/r/20210503234356.9097-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20210503234356.9097-2-peterx@redhat.com
Fixes: ab3948f58ff84 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd")
Signed-off-by: Peter Xu <peterx@redhat.com>
Reported-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c |  5 +++++
 include/linux/mm.h   | 32 ++++++++++++++++++++++++++++++++
 mm/shmem.c           | 22 ++++------------------
 3 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a2a42335e8fd2..9d9e0097c1d38 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -131,6 +131,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode = file_inode(file);
+	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
 	loff_t len, vma_len;
 	int ret;
 	struct hstate *h = hstate_file(file);
@@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
 	vma->vm_ops = &hugetlb_vm_ops;
 
+	ret = seal_check_future_write(info->seals, vma);
+	if (ret)
+		return ret;
+
 	/*
 	 * page based offset in vm_pgoff could be sufficiently large to
 	 * overflow a loff_t when converted to byte offset.  This can
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 322ec61d0da79..c274f75efcf97 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3216,5 +3216,37 @@ void mem_dump_obj(void *object);
 static inline void mem_dump_obj(void *object) {}
 #endif
 
+/**
+ * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
+ * @seals: the seals to check
+ * @vma: the vma to operate on
+ *
+ * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
+ * the vma flags.  Return 0 if check pass, or <0 for errors.
+ */
+static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
+{
+	if (seals & F_SEAL_FUTURE_WRITE) {
+		/*
+		 * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+		 * "future write" seal active.
+		 */
+		if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+			return -EPERM;
+
+		/*
+		 * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
+		 * MAP_SHARED and read-only, take care to not allow mprotect to
+		 * revert protections on such mappings. Do this only for shared
+		 * mappings. For private mappings, don't need to mask
+		 * VM_MAYWRITE as we still want them to be COW-writable.
+		 */
+		if (vma->vm_flags & VM_SHARED)
+			vma->vm_flags &= ~(VM_MAYWRITE);
+	}
+
+	return 0;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/shmem.c b/mm/shmem.c
index a08cedefbfaa6..eb131b9fb1909 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2258,25 +2258,11 @@ out_nomem:
 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+	int ret;
 
-	if (info->seals & F_SEAL_FUTURE_WRITE) {
-		/*
-		 * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
-		 * "future write" seal active.
-		 */
-		if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
-			return -EPERM;
-
-		/*
-		 * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
-		 * MAP_SHARED and read-only, take care to not allow mprotect to
-		 * revert protections on such mappings. Do this only for shared
-		 * mappings. For private mappings, don't need to mask
-		 * VM_MAYWRITE as we still want them to be COW-writable.
-		 */
-		if (vma->vm_flags & VM_SHARED)
-			vma->vm_flags &= ~(VM_MAYWRITE);
-	}
+	ret = seal_check_future_write(info->seals, vma);
+	if (ret)
+		return ret;
 
 	/* arm64 - allow memory tagging on RAM-based files */
 	vma->vm_flags |= VM_MTE_ALLOWED;
-- 
GitLab


From 84894e1c42e9f25c17f2888e0c0e1505cb727538 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 14 May 2021 17:27:07 -0700
Subject: [PATCH 0769/3804] mm/hugetlb: fix cow where page writtable in child

When rework early cow of pinned hugetlb pages, we moved huge_ptep_get()
upper but overlooked a side effect that the huge_ptep_get() will fetch the
pte after wr-protection.  After moving it upwards, we need explicit
wr-protect of child pte or we will keep the write bit set in the child
process, which could cause data corrution where the child can write to the
original page directly.

This issue can also be exposed by "memfd_test hugetlbfs" kselftest.

Link: https://lkml.kernel.org/r/20210503234356.9097-3-peterx@redhat.com
Fixes: 4eae4efa2c299 ("hugetlb: do early cow when page pinned on src mm")
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3db405dea3dc9..95918f410c0f8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4056,6 +4056,7 @@ again:
 				 * See Documentation/vm/mmu_notifier.rst
 				 */
 				huge_ptep_set_wrprotect(src, addr, src_pte);
+				entry = huge_pte_wrprotect(entry);
 			}
 
 			page_dup_rmap(ptepage, true);
-- 
GitLab


From afe0c26d1968fe3bbef6a45df945bfeff774ca75 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 14 May 2021 17:27:10 -0700
Subject: [PATCH 0770/3804] mm, slub: move slub_debug static key enabling
 outside slab_mutex

Paul E.  McKenney reported [1] that commit 1f0723a4c0df ("mm, slub: enable
slub_debug static key when creating cache with explicit debug flags")
results in the lockdep complaint:

 ======================================================
 WARNING: possible circular locking dependency detected
 5.12.0+ #15 Not tainted
 ------------------------------------------------------
 rcu_torture_sta/109 is trying to acquire lock:
 ffffffff96063cd0 (cpu_hotplug_lock){++++}-{0:0}, at: static_key_enable+0x9/0x20

 but task is already holding lock:
 ffffffff96173c28 (slab_mutex){+.+.}-{3:3}, at: kmem_cache_create_usercopy+0x2d/0x250

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (slab_mutex){+.+.}-{3:3}:
        lock_acquire+0xb9/0x3a0
        __mutex_lock+0x8d/0x920
        slub_cpu_dead+0x15/0xf0
        cpuhp_invoke_callback+0x17a/0x7c0
        cpuhp_invoke_callback_range+0x3b/0x80
        _cpu_down+0xdf/0x2a0
        cpu_down+0x2c/0x50
        device_offline+0x82/0xb0
        remove_cpu+0x1a/0x30
        torture_offline+0x80/0x140
        torture_onoff+0x147/0x260
        kthread+0x10a/0x140
        ret_from_fork+0x22/0x30

 -> #0 (cpu_hotplug_lock){++++}-{0:0}:
        check_prev_add+0x8f/0xbf0
        __lock_acquire+0x13f0/0x1d80
        lock_acquire+0xb9/0x3a0
        cpus_read_lock+0x21/0xa0
        static_key_enable+0x9/0x20
        __kmem_cache_create+0x38d/0x430
        kmem_cache_create_usercopy+0x146/0x250
        kmem_cache_create+0xd/0x10
        rcu_torture_stats+0x79/0x280
        kthread+0x10a/0x140
        ret_from_fork+0x22/0x30

 other info that might help us debug this:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(slab_mutex);
                                lock(cpu_hotplug_lock);
                                lock(slab_mutex);
   lock(cpu_hotplug_lock);

  *** DEADLOCK ***

 1 lock held by rcu_torture_sta/109:
  #0: ffffffff96173c28 (slab_mutex){+.+.}-{3:3}, at: kmem_cache_create_usercopy+0x2d/0x250

 stack backtrace:
 CPU: 3 PID: 109 Comm: rcu_torture_sta Not tainted 5.12.0+ #15
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-1ubuntu1.1 04/01/2014
 Call Trace:
  dump_stack+0x6d/0x89
  check_noncircular+0xfe/0x110
  ? lock_is_held_type+0x98/0x110
  check_prev_add+0x8f/0xbf0
  __lock_acquire+0x13f0/0x1d80
  lock_acquire+0xb9/0x3a0
  ? static_key_enable+0x9/0x20
  ? mark_held_locks+0x49/0x70
  cpus_read_lock+0x21/0xa0
  ? static_key_enable+0x9/0x20
  static_key_enable+0x9/0x20
  __kmem_cache_create+0x38d/0x430
  kmem_cache_create_usercopy+0x146/0x250
  ? rcu_torture_stats_print+0xd0/0xd0
  kmem_cache_create+0xd/0x10
  rcu_torture_stats+0x79/0x280
  ? rcu_torture_stats_print+0xd0/0xd0
  kthread+0x10a/0x140
  ? kthread_park+0x80/0x80
  ret_from_fork+0x22/0x30

This is because there's one order of locking from the hotplug callbacks:

lock(cpu_hotplug_lock); // from hotplug machinery itself
lock(slab_mutex); // in e.g. slab_mem_going_offline_callback()

And commit 1f0723a4c0df made the reverse sequence possible:
lock(slab_mutex); // in kmem_cache_create_usercopy()
lock(cpu_hotplug_lock); // kmem_cache_open() -> static_key_enable()

The simplest fix is to move static_key_enable() to a place before slab_mutex is
taken. That means kmem_cache_create_usercopy() in mm/slab_common.c which is not
ideal for SLUB-specific code, but the #ifdef CONFIG_SLUB_DEBUG makes it
at least self-contained and obvious.

[1] https://lore.kernel.org/lkml/20210502171827.GA3670492@paulmck-ThinkPad-P17-Gen-1/

Link: https://lkml.kernel.org/r/20210504120019.26791-1-vbabka@suse.cz
Fixes: 1f0723a4c0df ("mm, slub: enable slub_debug static key when creating cache with explicit debug flags")
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab_common.c | 10 ++++++++++
 mm/slub.c        |  9 ---------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index f8833d3e5d47e..a4a571428c511 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -318,6 +318,16 @@ kmem_cache_create_usercopy(const char *name,
 	const char *cache_name;
 	int err;
 
+#ifdef CONFIG_SLUB_DEBUG
+	/*
+	 * If no slub_debug was enabled globally, the static key is not yet
+	 * enabled by setup_slub_debug(). Enable it if the cache is being
+	 * created with any of the debugging flags passed explicitly.
+	 */
+	if (flags & SLAB_DEBUG_FLAGS)
+		static_branch_enable(&slub_debug_enabled);
+#endif
+
 	mutex_lock(&slab_mutex);
 
 	err = kmem_cache_sanity_check(name, size);
diff --git a/mm/slub.c b/mm/slub.c
index feda53ae62ba4..438fa8d4c970d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3828,15 +3828,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 
 static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
 {
-#ifdef CONFIG_SLUB_DEBUG
-	/*
-	 * If no slub_debug was enabled globally, the static key is not yet
-	 * enabled by setup_slub_debug(). Enable it if the cache is being
-	 * created with any of the debugging flags passed explicitly.
-	 */
-	if (flags & SLAB_DEBUG_FLAGS)
-		static_branch_enable(&slub_debug_enabled);
-#endif
 	s->flags = kmem_cache_flags(s->size, flags, s->name);
 #ifdef CONFIG_SLAB_FREELIST_HARDENED
 	s->random = get_random_long();
-- 
GitLab


From eb1f065f90cdcdcc704e9e2dc678931317c69a99 Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple@nvidia.com>
Date: Fri, 14 May 2021 17:27:13 -0700
Subject: [PATCH 0771/3804] kernel/resource: fix return code check in
 __request_free_mem_region

Splitting an earlier version of a patch that allowed calling
__request_region() while holding the resource lock into a series of
patches required changing the return code for the newly introduced
__request_region_locked().

Unfortunately this change was not carried through to a subsequent commit
56fd94919b8b ("kernel/resource: fix locking in request_free_mem_region")
in the series.  This resulted in a use-after-free due to freeing the
struct resource without properly releasing it.  Fix this by correcting the
return code check so that the struct is not freed if the request to add it
was successful.

Link: https://lkml.kernel.org/r/20210512073528.22334-1-apopple@nvidia.com
Fixes: 56fd94919b8b ("kernel/resource: fix locking in request_free_mem_region")
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Muchun Song <smuchun@gmail.com>
Cc: Oliver Sang <oliver.sang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/resource.c b/kernel/resource.c
index 028a5ab18818f..ca9f5198a01ff 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1805,7 +1805,7 @@ static struct resource *__request_free_mem_region(struct device *dev,
 				REGION_DISJOINT)
 			continue;
 
-		if (!__request_region_locked(res, &iomem_resource, addr, size,
+		if (__request_region_locked(res, &iomem_resource, addr, size,
 						name, 0))
 			break;
 
-- 
GitLab


From d6e621de1fceb3b098ebf435ef7ea91ec4838a1a Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@squashfs.org.uk>
Date: Fri, 14 May 2021 17:27:16 -0700
Subject: [PATCH 0772/3804] squashfs: fix divide error in calculate_skip()

Sysbot has reported a "divide error" which has been identified as being
caused by a corrupted file_size value within the file inode.  This value
has been corrupted to a much larger value than expected.

Calculate_skip() is passed i_size_read(inode) >> msblk->block_log.  Due to
the file_size value corruption this overflows the int argument/variable in
that function, leading to the divide error.

This patch changes the function to use u64.  This will accommodate any
unexpectedly large values due to corruption.

The value returned from calculate_skip() is clamped to be never more than
SQUASHFS_CACHED_BLKS - 1, or 7.  So file_size corruption does not lead to
an unexpectedly large return result here.

Link: https://lkml.kernel.org/r/20210507152618.9447-1-phillip@squashfs.org.uk
Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
Reported-by: <syzbot+e8f781243ce16ac2f962@syzkaller.appspotmail.com>
Reported-by: <syzbot+7b98870d4fec9447b951@syzkaller.appspotmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/squashfs/file.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 7b1128398976e..89d492916deaf 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -211,11 +211,11 @@ failure:
  * If the skip factor is limited in this way then the file will use multiple
  * slots.
  */
-static inline int calculate_skip(int blocks)
+static inline int calculate_skip(u64 blocks)
 {
-	int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+	u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
 		 * SQUASHFS_META_INDEXES);
-	return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
+	return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
 }
 
 
-- 
GitLab


From 7ed9d238c7dbb1fdb63ad96a6184985151b0171c Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Fri, 14 May 2021 17:27:19 -0700
Subject: [PATCH 0773/3804] userfaultfd: release page in error path to avoid
 BUG_ON

Consider the following sequence of events:

1. Userspace issues a UFFD ioctl, which ends up calling into
   shmem_mfill_atomic_pte(). We successfully account the blocks, we
   shmem_alloc_page(), but then the copy_from_user() fails. We return
   -ENOENT. We don't release the page we allocated.
2. Our caller detects this error code, tries the copy_from_user() after
   dropping the mmap_lock, and retries, calling back into
   shmem_mfill_atomic_pte().
3. Meanwhile, let's say another process filled up the tmpfs being used.
4. So shmem_mfill_atomic_pte() fails to account blocks this time, and
   immediately returns - without releasing the page.

This triggers a BUG_ON in our caller, which asserts that the page
should always be consumed, unless -ENOENT is returned.

To fix this, detect if we have such a "dangling" page when accounting
fails, and if so, release it before returning.

Link: https://lkml.kernel.org/r/20210428230858.348400-1-axelrasmussen@google.com
Fixes: cb658a453b93 ("userfaultfd: shmem: avoid leaking blocks and used blocks in UFFDIO_COPY")
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Reported-by: Hugh Dickins <hughd@google.com>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index eb131b9fb1909..5d46611cba8dc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2361,8 +2361,18 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 	pgoff_t offset, max_off;
 
 	ret = -ENOMEM;
-	if (!shmem_inode_acct_block(inode, 1))
+	if (!shmem_inode_acct_block(inode, 1)) {
+		/*
+		 * We may have got a page, returned -ENOENT triggering a retry,
+		 * and now we find ourselves with -ENOMEM. Release the page, to
+		 * avoid a BUG_ON in our caller.
+		 */
+		if (unlikely(*pagep)) {
+			put_page(*pagep);
+			*pagep = NULL;
+		}
 		goto out;
+	}
 
 	if (!*pagep) {
 		page = shmem_alloc_page(gfp, info, pgoff);
-- 
GitLab


From 628622904b8d229591134e44efd6608a7541eb89 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 14 May 2021 17:27:22 -0700
Subject: [PATCH 0774/3804] ksm: revert "use GET_KSM_PAGE_NOLOCK to get ksm
 page in remove_rmap_item_from_tree()"

This reverts commit 3e96b6a2e9ad929a3230a22f4d64a74671a0720b.  General
Protection Fault in rmap_walk_ksm() under memory pressure:
remove_rmap_item_from_tree() needs to take page lock, of course.

Link: https://lkml.kernel.org/r/alpine.LSU.2.11.2105092253500.1127@eggly.anvils
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/ksm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 6bbe314c52603..2f3aaeb34a42e 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -776,11 +776,12 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 		struct page *page;
 
 		stable_node = rmap_item->head;
-		page = get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK);
+		page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
 		if (!page)
 			goto out;
 
 		hlist_del(&rmap_item->hlist);
+		unlock_page(page);
 		put_page(page);
 
 		if (!hlist_empty(&stable_node->hlist))
-- 
GitLab


From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 14 May 2021 17:27:24 -0700
Subject: [PATCH 0775/3804] mm: fix struct page layout on 32-bit systems

32-bit architectures which expect 8-byte alignment for 8-byte integers and
need 64-bit DMA addresses (arm, mips, ppc) had their struct page
inadvertently expanded in 2019.  When the dma_addr_t was added, it forced
the alignment of the union to 8 bytes, which inserted a 4 byte gap between
'flags' and the union.

Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
This restores the alignment to that of an unsigned long.  We always
store the low bits in the first word to prevent the PageTail bit from
being inadvertently set on a big endian platform.  If that happened,
get_user_pages_fast() racing against a page which was freed and
reallocated to the page_pool could dereference a bogus compound_head(),
which would be hard to trace back to this cause.

Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org
Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Tested-by: Matteo Croce <mcroce@linux.microsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  4 ++--
 include/net/page_pool.h  | 12 +++++++++++-
 net/core/page_pool.c     | 12 +++++++-----
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6613b26a88946..5aacc1c10a45a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,10 +97,10 @@ struct page {
 		};
 		struct {	/* page_pool used by netstack */
 			/**
-			 * @dma_addr: might require a 64-bit value even on
+			 * @dma_addr: might require a 64-bit value on
 			 * 32-bit architectures.
 			 */
-			dma_addr_t dma_addr;
+			unsigned long dma_addr[2];
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 6d517a37c18bf..b4b6de909c934 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	return page->dma_addr;
+	dma_addr_t ret = page->dma_addr[0];
+	if (sizeof(dma_addr_t) > sizeof(unsigned long))
+		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+	return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+	page->dma_addr[0] = addr;
+	if (sizeof(dma_addr_t) > sizeof(unsigned long))
+		page->dma_addr[1] = upper_32_bits(addr);
 }
 
 static inline bool is_page_pool_compiled_in(void)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9ec1aa9640ade..3c4c4c7a04022 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
 					  struct page *page,
 					  unsigned int dma_sync_size)
 {
+	dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+
 	dma_sync_size = min(dma_sync_size, pool->p.max_len);
-	dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
+	dma_sync_single_range_for_device(pool->p.dev, dma_addr,
 					 pool->p.offset, dma_sync_size,
 					 pool->p.dma_dir);
 }
@@ -195,7 +197,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
 	if (dma_mapping_error(pool->p.dev, dma))
 		return false;
 
-	page->dma_addr = dma;
+	page_pool_set_dma_addr(page, dma);
 
 	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
 		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
@@ -331,13 +333,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
 		 */
 		goto skip_dma_unmap;
 
-	dma = page->dma_addr;
+	dma = page_pool_get_dma_addr(page);
 
-	/* When page is unmapped, it cannot be returned our pool */
+	/* When page is unmapped, it cannot be returned to our pool */
 	dma_unmap_page_attrs(pool->p.dev, dma,
 			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
 			     DMA_ATTR_SKIP_CPU_SYNC);
-	page->dma_addr = 0;
+	page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
 	/* This may be the last page returned, releasing the pool, so
 	 * it is not safe to reference pool afterwards.
-- 
GitLab


From f649dc0e0d7b509c75570ee403723660f5b72ec7 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Fri, 14 May 2021 17:27:27 -0700
Subject: [PATCH 0776/3804] kasan: fix unit tests with
 CONFIG_UBSAN_LOCAL_BOUNDS enabled

These tests deliberately access these arrays out of bounds, which will
cause the dynamic local bounds checks inserted by
CONFIG_UBSAN_LOCAL_BOUNDS to fail and panic the kernel.  To avoid this
problem, access the arrays via volatile pointers, which will prevent the
compiler from being able to determine the array bounds.

These accesses use volatile pointers to char (char *volatile) rather than
the more conventional pointers to volatile char (volatile char *) because
we want to prevent the compiler from making inferences about the pointer
itself (i.e.  its array bounds), not the data that it refers to.

Link: https://lkml.kernel.org/r/20210507025915.1464056-1-pcc@google.com
Link: https://linux-review.googlesource.com/id/I90b1713fbfa1bf68ff895aef099ea77b98a7c3b9
Signed-off-by: Peter Collingbourne <pcc@google.com>
Tested-by: Alexander Potapenko <glider@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: George Popescu <georgepope@android.com>
Cc: Elena Petrova <lenaptr@google.com>
Cc: Evgenii Stepanov <eugenis@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kasan.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index dc05cfc2d12f0..cacbbbdef768d 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -654,8 +654,20 @@ static char global_array[10];
 
 static void kasan_global_oob(struct kunit *test)
 {
-	volatile int i = 3;
-	char *p = &global_array[ARRAY_SIZE(global_array) + i];
+	/*
+	 * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS
+	 * from failing here and panicing the kernel, access the array via a
+	 * volatile pointer, which will prevent the compiler from being able to
+	 * determine the array bounds.
+	 *
+	 * This access uses a volatile pointer to char (char *volatile) rather
+	 * than the more conventional pointer to volatile char (volatile char *)
+	 * because we want to prevent the compiler from making inferences about
+	 * the pointer itself (i.e. its array bounds), not the data that it
+	 * refers to.
+	 */
+	char *volatile array = global_array;
+	char *p = &array[ARRAY_SIZE(global_array) + 3];
 
 	/* Only generic mode instruments globals. */
 	KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
@@ -703,8 +715,9 @@ static void ksize_uaf(struct kunit *test)
 static void kasan_stack_oob(struct kunit *test)
 {
 	char stack_array[10];
-	volatile int i = OOB_TAG_OFF;
-	char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
+	/* See comment in kasan_global_oob. */
+	char *volatile array = stack_array;
+	char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF];
 
 	KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
 
@@ -715,7 +728,9 @@ static void kasan_alloca_oob_left(struct kunit *test)
 {
 	volatile int i = 10;
 	char alloca_array[i];
-	char *p = alloca_array - 1;
+	/* See comment in kasan_global_oob. */
+	char *volatile array = alloca_array;
+	char *p = array - 1;
 
 	/* Only generic mode instruments dynamic allocas. */
 	KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
@@ -728,7 +743,9 @@ static void kasan_alloca_oob_right(struct kunit *test)
 {
 	volatile int i = 10;
 	char alloca_array[i];
-	char *p = alloca_array + i;
+	/* See comment in kasan_global_oob. */
+	char *volatile array = alloca_array;
+	char *p = array + i;
 
 	/* Only generic mode instruments dynamic allocas. */
 	KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
-- 
GitLab


From 076171a67789ad0107de44c2964f2e46a7d0d7b8 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 14 May 2021 17:27:30 -0700
Subject: [PATCH 0777/3804] mm/filemap: fix readahead return types

A readahead request will not allocate more memory than can be represented
by a size_t, even on systems that have HIGHMEM available.  Change the
length functions from returning an loff_t to a size_t.

Link: https://lkml.kernel.org/r/20210510201201.1558972-1-willy@infradead.org
Fixes: 32c0a6bcaa1f57 ("btrfs: add and use readahead_batch_length")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/iomap/buffered-io.c  | 4 ++--
 include/linux/pagemap.h | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index f2cd2034a87bb..9023717c5188b 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -394,7 +394,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
 {
 	struct inode *inode = rac->mapping->host;
 	loff_t pos = readahead_pos(rac);
-	loff_t length = readahead_length(rac);
+	size_t length = readahead_length(rac);
 	struct iomap_readpage_ctx ctx = {
 		.rac	= rac,
 	};
@@ -402,7 +402,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
 	trace_iomap_readahead(inode, readahead_count(rac));
 
 	while (length > 0) {
-		loff_t ret = iomap_apply(inode, pos, length, 0, ops,
+		ssize_t ret = iomap_apply(inode, pos, length, 0, ops,
 				&ctx, iomap_readahead_actor);
 		if (ret <= 0) {
 			WARN_ON_ONCE(ret == 0);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a4bd41128bf31..e89df447fae32 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -997,9 +997,9 @@ static inline loff_t readahead_pos(struct readahead_control *rac)
  * readahead_length - The number of bytes in this readahead request.
  * @rac: The readahead request.
  */
-static inline loff_t readahead_length(struct readahead_control *rac)
+static inline size_t readahead_length(struct readahead_control *rac)
 {
-	return (loff_t)rac->_nr_pages * PAGE_SIZE;
+	return rac->_nr_pages * PAGE_SIZE;
 }
 
 /**
@@ -1024,7 +1024,7 @@ static inline unsigned int readahead_count(struct readahead_control *rac)
  * readahead_batch_length - The number of bytes in the current batch.
  * @rac: The readahead request.
  */
-static inline loff_t readahead_batch_length(struct readahead_control *rac)
+static inline size_t readahead_batch_length(struct readahead_control *rac)
 {
 	return rac->_batch_count * PAGE_SIZE;
 }
-- 
GitLab


From c3187cf32216313fb316084efac4dab3a8459b1d Mon Sep 17 00:00:00 2001
From: Jouni Roivas <jouni.roivas@tuxera.com>
Date: Fri, 14 May 2021 17:27:33 -0700
Subject: [PATCH 0778/3804] hfsplus: prevent corruption in shrinking truncate

I believe there are some issues introduced by commit 31651c607151
("hfsplus: avoid deadlock on file truncation")

HFS+ has extent records which always contains 8 extents.  In case the
first extent record in catalog file gets full, new ones are allocated from
extents overflow file.

In case shrinking truncate happens to middle of an extent record which
locates in extents overflow file, the logic in hfsplus_file_truncate() was
changed so that call to hfs_brec_remove() is not guarded any more.

Right action would be just freeing the extents that exceed the new size
inside extent record by calling hfsplus_free_extents(), and then check if
the whole extent record should be removed.  However since the guard
(blk_cnt > start) is now after the call to hfs_brec_remove(), this has
unfortunate effect that the last matching extent record is removed
unconditionally.

To reproduce this issue, create a file which has at least 10 extents, and
then perform shrinking truncate into middle of the last extent record, so
that the number of remaining extents is not under or divisible by 8.  This
causes the last extent record (8 extents) to be removed totally instead of
truncating into middle of it.  Thus this causes corruption, and lost data.

Fix for this is simply checking if the new truncated end is below the
start of this extent record, making it safe to remove the full extent
record.  However call to hfs_brec_remove() can't be moved to it's previous
place since we're dropping ->tree_lock and it can cause a race condition
and the cached info being invalidated possibly corrupting the node data.

Another issue is related to this one.  When entering into the block
(blk_cnt > start) we are not holding the ->tree_lock.  We break out from
the loop not holding the lock, but hfs_find_exit() does unlock it.  Not
sure if it's possible for someone else to take the lock under our feet,
but it can cause hard to debug errors and premature unlocking.  Even if
there's no real risk of it, the locking should still always be kept in
balance.  Thus taking the lock now just before the check.

Link: https://lkml.kernel.org/r/20210429165139.3082828-1-jouni.roivas@tuxera.com
Fixes: 31651c607151f ("hfsplus: avoid deadlock on file truncation")
Signed-off-by: Jouni Roivas <jouni.roivas@tuxera.com>
Reviewed-by: Anton Altaparmakov <anton@tuxera.com>
Cc: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Cc: Viacheslav Dubeyko <slava@dubeyko.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/extents.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index a930ddd156819..7054a542689f9 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -598,13 +598,15 @@ void hfsplus_file_truncate(struct inode *inode)
 		res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
 		if (res)
 			break;
-		hfs_brec_remove(&fd);
 
-		mutex_unlock(&fd.tree->tree_lock);
 		start = hip->cached_start;
+		if (blk_cnt <= start)
+			hfs_brec_remove(&fd);
+		mutex_unlock(&fd.tree->tree_lock);
 		hfsplus_free_extents(sb, hip->cached_extents,
 				     alloc_cnt - start, alloc_cnt - blk_cnt);
 		hfsplus_dump_extent(hip->cached_extents);
+		mutex_lock(&fd.tree->tree_lock);
 		if (blk_cnt > start) {
 			hip->extent_state |= HFSPLUS_EXT_DIRTY;
 			break;
@@ -612,7 +614,6 @@ void hfsplus_file_truncate(struct inode *inode)
 		alloc_cnt = start;
 		hip->cached_start = hip->cached_blocks = 0;
 		hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
-		mutex_lock(&fd.tree->tree_lock);
 	}
 	hfs_find_exit(&fd);
 
-- 
GitLab


From f4d3f25aced3b493e57fd4109e2bc86f0831b23e Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 14 May 2021 17:27:36 -0700
Subject: [PATCH 0779/3804] docs: admin-guide: update description for
 kernel.modprobe sysctl

When I added CONFIG_MODPROBE_PATH, I neglected to update Documentation/.
It's still true that this defaults to /sbin/modprobe, but now via a level
of indirection.  So document that the kernel might have been built with
something other than /sbin/modprobe as the initial value.

Link: https://lkml.kernel.org/r/20210420125324.1246826-1-linux@rasmusvillemoes.dk
Fixes: 17652f4240f7a ("modules: add CONFIG_MODPROBE_PATH")
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/sysctl/kernel.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 1d56a6b73a4e9..7ca8df5451d45 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -483,10 +483,11 @@ modprobe
 ========
 
 The full path to the usermode helper for autoloading kernel modules,
-by default "/sbin/modprobe".  This binary is executed when the kernel
-requests a module.  For example, if userspace passes an unknown
-filesystem type to mount(), then the kernel will automatically request
-the corresponding filesystem module by executing this usermode helper.
+by default ``CONFIG_MODPROBE_PATH``, which in turn defaults to
+"/sbin/modprobe".  This binary is executed when the kernel requests a
+module.  For example, if userspace passes an unknown filesystem type
+to mount(), then the kernel will automatically request the
+corresponding filesystem module by executing this usermode helper.
 This usermode helper should insert the needed module into the kernel.
 
 This sysctl only affects module autoloading.  It has no effect on the
-- 
GitLab


From 86d0c164272536c732853e19391de5159f860701 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 14 May 2021 17:27:39 -0700
Subject: [PATCH 0780/3804] mm/ioremap: fix iomap_max_page_shift

iomap_max_page_shift is expected to contain a page shift, so it can't be a
'bool', has to be an 'unsigned int'

And fix the default values: P4D_SHIFT is when huge iomap is allowed.

However, on some architectures (eg: powerpc book3s/64), P4D_SHIFT is not a
constant so it can't be used to initialise a static variable.  So,
initialise iomap_max_page_shift with a maximum shift supported by the
architecture, it is gated by P4D_SHIFT in vmap_try_huge_p4d() anyway.

Link: https://lkml.kernel.org/r/ad2d366015794a9f21320dcbdd0a8eb98979e9df.1620898113.git.christophe.leroy@csgroup.eu
Fixes: bbc180a5adb0 ("mm: HUGE_VMAP arch support cleanup")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/ioremap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/ioremap.c b/mm/ioremap.c
index d1dcc7e744acf..8ee0136f8cb08 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -16,16 +16,16 @@
 #include "pgalloc-track.h"
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT;
+static unsigned int __ro_after_init iomap_max_page_shift = BITS_PER_LONG - 1;
 
 static int __init set_nohugeiomap(char *str)
 {
-	iomap_max_page_shift = P4D_SHIFT;
+	iomap_max_page_shift = PAGE_SHIFT;
 	return 0;
 }
 early_param("nohugeiomap", set_nohugeiomap);
 #else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static const bool iomap_max_page_shift = PAGE_SHIFT;
+static const unsigned int iomap_max_page_shift = PAGE_SHIFT;
 #endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */
 
 int ioremap_page_range(unsigned long addr,
-- 
GitLab


From 73578af92a0fae6609b955fcc9113e50e413c80f Mon Sep 17 00:00:00 2001
From: Javed Hasan <jhasan@marvell.com>
Date: Wed, 12 May 2021 00:25:33 -0700
Subject: [PATCH 0781/3804] scsi: qedf: Add pointer checks in
 qedf_update_link_speed()

The following trace was observed:

 [   14.042059] Call Trace:
 [   14.042061]  <IRQ>
 [   14.042068]  qedf_link_update+0x144/0x1f0 [qedf]
 [   14.042117]  qed_link_update+0x5c/0x80 [qed]
 [   14.042135]  qed_mcp_handle_link_change+0x2d2/0x410 [qed]
 [   14.042155]  ? qed_set_ptt+0x70/0x80 [qed]
 [   14.042170]  ? qed_set_ptt+0x70/0x80 [qed]
 [   14.042186]  ? qed_rd+0x13/0x40 [qed]
 [   14.042205]  qed_mcp_handle_events+0x437/0x690 [qed]
 [   14.042221]  ? qed_set_ptt+0x70/0x80 [qed]
 [   14.042239]  qed_int_sp_dpc+0x3a6/0x3e0 [qed]
 [   14.042245]  tasklet_action_common.isra.14+0x5a/0x100
 [   14.042250]  __do_softirq+0xe4/0x2f8
 [   14.042253]  irq_exit+0xf7/0x100
 [   14.042255]  do_IRQ+0x7f/0xd0
 [   14.042257]  common_interrupt+0xf/0xf
 [   14.042259]  </IRQ>

API qedf_link_update() is getting called from QED but by that time
shost_data is not initialised. This results in a NULL pointer dereference
when we try to dereference shost_data while updating supported_speeds.

Add a NULL pointer check before dereferencing shost_data.

Link: https://lore.kernel.org/r/20210512072533.23618-1-jhasan@marvell.com
Fixes: 61d8658b4a43 ("scsi: qedf: Add QLogic FastLinQ offload FCoE driver framework.")
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Javed Hasan <jhasan@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 69f7784233f93..7562311518824 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -536,7 +536,9 @@ static void qedf_update_link_speed(struct qedf_ctx *qedf,
 	if (linkmode_intersects(link->supported_caps, sup_caps))
 		lport->link_supported_speeds |= FC_PORTSPEED_20GBIT;
 
-	fc_host_supported_speeds(lport->host) = lport->link_supported_speeds;
+	if (lport->host && lport->host->shost_data)
+		fc_host_supported_speeds(lport->host) =
+			lport->link_supported_speeds;
 }
 
 static void qedf_bw_update(void *dev)
-- 
GitLab


From 8b549c18ae81dbc36fb11e4aa08b8378c599ca95 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Apr 2021 14:45:43 +0200
Subject: [PATCH 0782/3804] openrisc: Define memory barrier mb

This came up in the discussion of the requirements of qspinlock on an
architecture.  OpenRISC uses qspinlock, but it was noticed that the
memmory barrier was not defined.

Peter defined it in the mail thread writing:

    As near as I can tell this should do. The arch spec only lists
    this one instruction and the text makes it sound like a completion
    barrier.

This is correct so applying this patch.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
[shorne@gmail.com:Turned the mail into a patch]
Signed-off-by: Stafford Horne <shorne@gmail.com>
---
 arch/openrisc/include/asm/barrier.h | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 arch/openrisc/include/asm/barrier.h

diff --git a/arch/openrisc/include/asm/barrier.h b/arch/openrisc/include/asm/barrier.h
new file mode 100644
index 0000000000000..7538294721bed
--- /dev/null
+++ b/arch/openrisc/include/asm/barrier.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#define mb() asm volatile ("l.msync" ::: "memory")
+
+#include <asm-generic/barrier.h>
+
+#endif /* __ASM_BARRIER_H */
-- 
GitLab


From e44b49f623c77bee7451f1a82ccfb969c1028ae2 Mon Sep 17 00:00:00 2001
From: Zhu Lingshan <lingshan.zhu@intel.com>
Date: Sat, 8 May 2021 15:11:52 +0800
Subject: [PATCH 0783/3804] Revert "irqbypass: do not start cons/prod when
 failed connect"

This reverts commit a979a6aa009f3c99689432e0cdb5402a4463fb88.

The reverted commit may cause VM freeze on arm64 with GICv4,
where stopping a consumer is implemented by suspending the VM.
Should the connect fail, the VM will not be resumed, which
is a bit of a problem.

It also erroneously calls the producer destructor unconditionally,
which is unexpected.

Reported-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Suggested-by: Marc Zyngier <maz@kernel.org>
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Zhu Lingshan <lingshan.zhu@intel.com>
[maz: tags and cc-stable, commit message update]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Fixes: a979a6aa009f ("irqbypass: do not start cons/prod when failed connect")
Link: https://lore.kernel.org/r/3a2c66d6-6ca0-8478-d24b-61e8e3241b20@hisilicon.com
Link: https://lore.kernel.org/r/20210508071152.722425-1-lingshan.zhu@intel.com
Cc: stable@vger.kernel.org
---
 virt/lib/irqbypass.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c
index c9bb3957f58a7..28fda42e471bb 100644
--- a/virt/lib/irqbypass.c
+++ b/virt/lib/irqbypass.c
@@ -40,21 +40,17 @@ static int __connect(struct irq_bypass_producer *prod,
 	if (prod->add_consumer)
 		ret = prod->add_consumer(prod, cons);
 
-	if (ret)
-		goto err_add_consumer;
-
-	ret = cons->add_producer(cons, prod);
-	if (ret)
-		goto err_add_producer;
+	if (!ret) {
+		ret = cons->add_producer(cons, prod);
+		if (ret && prod->del_consumer)
+			prod->del_consumer(prod, cons);
+	}
 
 	if (cons->start)
 		cons->start(cons);
 	if (prod->start)
 		prod->start(prod);
-err_add_producer:
-	if (prod->del_consumer)
-		prod->del_consumer(prod, cons);
-err_add_consumer:
+
 	return ret;
 }
 
-- 
GitLab


From fcb8283920b135bca2916133e2383a501ad57eaa Mon Sep 17 00:00:00 2001
From: kernel test robot <lkp@intel.com>
Date: Tue, 27 Apr 2021 06:33:57 +0800
Subject: [PATCH 0784/3804] KVM: arm64: Fix boolreturn.cocci warnings

arch/arm64/kvm/mmu.c:1114:9-10: WARNING: return of 0/1 in function 'kvm_age_gfn' with return type bool
arch/arm64/kvm/mmu.c:1084:9-10: WARNING: return of 0/1 in function 'kvm_set_spte_gfn' with return type bool
arch/arm64/kvm/mmu.c:1127:9-10: WARNING: return of 0/1 in function 'kvm_test_age_gfn' with return type bool
arch/arm64/kvm/mmu.c:1070:9-10: WARNING: return of 0/1 in function 'kvm_unmap_gfn_range' with return type bool

 Return statements in functions returning bool should use
 true/false instead of 1/0.
Generated by: scripts/coccinelle/misc/boolreturn.cocci

Fixes: cd4c71835228 ("KVM: arm64: Convert to the gfn-based MMU notifier callbacks")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: kernel test robot <lkp@intel.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210426223357.GA45871@cd4295a34ed8
---
 arch/arm64/kvm/mmu.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c5d1f3c87dbdb..c10207fed2f36 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1156,13 +1156,13 @@ out_unlock:
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	if (!kvm->arch.mmu.pgt)
-		return 0;
+		return false;
 
 	__unmap_stage2_range(&kvm->arch.mmu, range->start << PAGE_SHIFT,
 			     (range->end - range->start) << PAGE_SHIFT,
 			     range->may_block);
 
-	return 0;
+	return false;
 }
 
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -1170,7 +1170,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	kvm_pfn_t pfn = pte_pfn(range->pte);
 
 	if (!kvm->arch.mmu.pgt)
-		return 0;
+		return false;
 
 	WARN_ON(range->end - range->start != 1);
 
@@ -1190,7 +1190,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 			       PAGE_SIZE, __pfn_to_phys(pfn),
 			       KVM_PGTABLE_PROT_R, NULL);
 
-	return 0;
+	return false;
 }
 
 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
@@ -1200,7 +1200,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	pte_t pte;
 
 	if (!kvm->arch.mmu.pgt)
-		return 0;
+		return false;
 
 	WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
 
@@ -1213,7 +1213,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	if (!kvm->arch.mmu.pgt)
-		return 0;
+		return false;
 
 	return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
 					   range->start << PAGE_SHIFT);
-- 
GitLab


From eaa9b88dae64254a87d3d83b77afa71ee992f502 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Fri, 14 May 2021 08:56:39 +0000
Subject: [PATCH 0785/3804] KVM: arm64: Mark pkvm_pgtable_mm_ops static

It is not used outside of setup.c, mark it static.

Fixes:f320bc742bc2 ("KVM: arm64: Prepare the creation of s1 mappings at EL2")

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210514085640.3917886-2-qperret@google.com
---
 arch/arm64/kvm/hyp/nvhe/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 7488f53b0aa2f..a3d3a275344e9 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -17,7 +17,6 @@
 #include <nvhe/trap_handler.h>
 
 struct hyp_pool hpool;
-struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
 unsigned long hyp_nr_cpus;
 
 #define hyp_percpu_size ((unsigned long)__per_cpu_end - \
@@ -27,6 +26,7 @@ static void *vmemmap_base;
 static void *hyp_pgt_base;
 static void *host_s2_mem_pgt_base;
 static void *host_s2_dev_pgt_base;
+static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
 
 static int divide_memory_pool(void *virt, unsigned long size)
 {
-- 
GitLab


From 3fdc15fe8c6445175d61f0fac111d2ee9354e385 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Fri, 14 May 2021 08:56:40 +0000
Subject: [PATCH 0786/3804] KVM: arm64: Mark the host stage-2 memory pools
 static

The host stage-2 memory pools are not used outside of mem_protect.c,
mark them static.

Fixes: 1025c8c0c6ac ("KVM: arm64: Wrap the host with a stage 2")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210514085640.3917886-3-qperret@google.com
---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index e342f7f4f4fb7..4b60c0056c041 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -23,8 +23,8 @@
 extern unsigned long hyp_nr_cpus;
 struct host_kvm host_kvm;
 
-struct hyp_pool host_s2_mem;
-struct hyp_pool host_s2_dev;
+static struct hyp_pool host_s2_mem;
+static struct hyp_pool host_s2_dev;
 
 /*
  * Copies of the host's CPU features registers holding sanitized values.
-- 
GitLab


From f5e30680616ab09e690b153b7a68ff7dd13e6579 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 6 May 2021 14:31:42 +0100
Subject: [PATCH 0787/3804] KVM: arm64: Move __adjust_pc out of line

In order to make it easy to call __adjust_pc() from the EL1 code
(in the case of nVHE), rename it to __kvm_adjust_pc() and move
it out of line.

No expected functional change.

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>
Tested-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org # 5.11
---
 arch/arm64/include/asm/kvm_asm.h           |  2 ++
 arch/arm64/kvm/hyp/exception.c             | 18 +++++++++++++++++-
 arch/arm64/kvm/hyp/include/hyp/adjust_pc.h | 18 ------------------
 arch/arm64/kvm/hyp/nvhe/switch.c           |  3 +--
 arch/arm64/kvm/hyp/vhe/switch.c            |  3 +--
 5 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index cf8df032b9c30..d5b11037401dd 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -201,6 +201,8 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
+extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu);
+
 extern u64 __vgic_v3_get_gic_config(void);
 extern u64 __vgic_v3_read_vmcr(void);
 extern void __vgic_v3_write_vmcr(u32 vmcr);
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 73629094f9030..0812a496725f6 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -296,7 +296,7 @@ static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
 	*vcpu_pc(vcpu) = vect_offset;
 }
 
-void kvm_inject_exception(struct kvm_vcpu *vcpu)
+static void kvm_inject_exception(struct kvm_vcpu *vcpu)
 {
 	if (vcpu_el1_is_32bit(vcpu)) {
 		switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) {
@@ -329,3 +329,19 @@ void kvm_inject_exception(struct kvm_vcpu *vcpu)
 		}
 	}
 }
+
+/*
+ * Adjust the guest PC on entry, depending on flags provided by EL1
+ * for the purpose of emulation (MMIO, sysreg) or exception injection.
+ */
+void __kvm_adjust_pc(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
+		kvm_inject_exception(vcpu);
+		vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
+				      KVM_ARM64_EXCEPT_MASK);
+	} else 	if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
+		kvm_skip_instr(vcpu);
+		vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
+	}
+}
diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
index 61716359035d6..4fdfeabefeb43 100644
--- a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
+++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h
@@ -13,8 +13,6 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_host.h>
 
-void kvm_inject_exception(struct kvm_vcpu *vcpu);
-
 static inline void kvm_skip_instr(struct kvm_vcpu *vcpu)
 {
 	if (vcpu_mode_is_32bit(vcpu)) {
@@ -43,22 +41,6 @@ static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu)
 	write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
 }
 
-/*
- * Adjust the guest PC on entry, depending on flags provided by EL1
- * for the purpose of emulation (MMIO, sysreg) or exception injection.
- */
-static inline void __adjust_pc(struct kvm_vcpu *vcpu)
-{
-	if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) {
-		kvm_inject_exception(vcpu);
-		vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION |
-				      KVM_ARM64_EXCEPT_MASK);
-	} else 	if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) {
-		kvm_skip_instr(vcpu);
-		vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC;
-	}
-}
-
 /*
  * Skip an instruction while host sysregs are live.
  * Assumes host is always 64-bit.
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index e9f6ea704d07d..f7af9688c1f72 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -4,7 +4,6 @@
  * Author: Marc Zyngier <marc.zyngier@arm.com>
  */
 
-#include <hyp/adjust_pc.h>
 #include <hyp/switch.h>
 #include <hyp/sysreg-sr.h>
 
@@ -201,7 +200,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 	 */
 	__debug_save_host_buffers_nvhe(vcpu);
 
-	__adjust_pc(vcpu);
+	__kvm_adjust_pc(vcpu);
 
 	/*
 	 * We must restore the 32-bit state before the sysregs, thanks
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 7b8f7db5c1ed6..b3229924d2431 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -4,7 +4,6 @@
  * Author: Marc Zyngier <marc.zyngier@arm.com>
  */
 
-#include <hyp/adjust_pc.h>
 #include <hyp/switch.h>
 
 #include <linux/arm-smccc.h>
@@ -132,7 +131,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 	__load_guest_stage2(vcpu->arch.hw_mmu);
 	__activate_traps(vcpu);
 
-	__adjust_pc(vcpu);
+	__kvm_adjust_pc(vcpu);
 
 	sysreg_restore_guest_state_vhe(guest_ctxt);
 	__debug_switch_to_guest(vcpu);
-- 
GitLab


From 26778aaa134a9aefdf5dbaad904054d7be9d656d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 6 May 2021 15:20:12 +0100
Subject: [PATCH 0788/3804] KVM: arm64: Commit pending PC adjustemnts before
 returning to userspace

KVM currently updates PC (and the corresponding exception state)
using a two phase approach: first by setting a set of flags,
then by converting these flags into a state update when the vcpu
is about to enter the guest.

However, this creates a disconnect with userspace if the vcpu thread
returns there with any exception/PC flag set. In this case, the exposed
context is wrong, as userspace doesn't have access to these flags
(they aren't architectural). It also means that these flags are
preserved across a reset, which isn't expected.

To solve this problem, force an explicit synchronisation of the
exception state on vcpu exit to userspace. As an optimisation
for nVHE systems, only perform this when there is something pending.

Reported-by: Zenghui Yu <yuzenghui@huawei.com>
Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>
Tested-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org # 5.11
---
 arch/arm64/include/asm/kvm_asm.h   |  1 +
 arch/arm64/kvm/arm.c               | 11 +++++++++++
 arch/arm64/kvm/hyp/exception.c     |  4 ++--
 arch/arm64/kvm/hyp/nvhe/hyp-main.c |  8 ++++++++
 4 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index d5b11037401dd..5e9b33cbac513 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -63,6 +63,7 @@
 #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector		18
 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize		19
 #define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp			20
+#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc			21
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 1cb39c0803a44..1126eae274000 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -897,6 +897,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
 	kvm_sigset_deactivate(vcpu);
 
+	/*
+	 * In the unlikely event that we are returning to userspace
+	 * with pending exceptions or PC adjustment, commit these
+	 * adjustments in order to give userspace a consistent view of
+	 * the vcpu state. Note that this relies on __kvm_adjust_pc()
+	 * being preempt-safe on VHE.
+	 */
+	if (unlikely(vcpu->arch.flags & (KVM_ARM64_PENDING_EXCEPTION |
+					 KVM_ARM64_INCREMENT_PC)))
+		kvm_call_hyp(__kvm_adjust_pc, vcpu);
+
 	vcpu_put(vcpu);
 	return ret;
 }
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 0812a496725f6..11541b94b328f 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -331,8 +331,8 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
 }
 
 /*
- * Adjust the guest PC on entry, depending on flags provided by EL1
- * for the purpose of emulation (MMIO, sysreg) or exception injection.
+ * Adjust the guest PC (and potentially exception state) depending on
+ * flags provided by the emulation code.
  */
 void __kvm_adjust_pc(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index f36420a804745..1632f001f4ed6 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -28,6 +28,13 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
 	cpu_reg(host_ctxt, 1) =  __kvm_vcpu_run(kern_hyp_va(vcpu));
 }
 
+static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
+
+	__kvm_adjust_pc(kern_hyp_va(vcpu));
+}
+
 static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt)
 {
 	__kvm_flush_vm_context();
@@ -170,6 +177,7 @@ typedef void (*hcall_t)(struct kvm_cpu_context *);
 
 static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__kvm_vcpu_run),
+	HANDLE_FUNC(__kvm_adjust_pc),
 	HANDLE_FUNC(__kvm_flush_vm_context),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid),
-- 
GitLab


From cb853ded1d25e5b026ce115dbcde69e3d7e2e831 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 14 May 2021 09:05:41 +0100
Subject: [PATCH 0789/3804] KVM: arm64: Fix debug register indexing

Commit 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on
reset") flipped the register number to 0 for all the debug registers
in the sysreg table, hereby indicating that these registers live
in a separate shadow structure.

However, the author of this patch failed to realise that all the
accessors are using that particular index instead of the register
encoding, resulting in all the registers hitting index 0. Not quite
a valid implementation of the architecture...

Address the issue by fixing all the accessors to use the CRm field
of the encoding, which contains the debug register index.

Fixes: 03fdfb2690099 ("KVM: arm64: Don't write junk to sysregs on reset")
Reported-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org
---
 arch/arm64/kvm/sys_regs.c | 42 +++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 76ea2800c33e8..1a7968ad078c6 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -399,14 +399,14 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
 		     struct sys_reg_params *p,
 		     const struct sys_reg_desc *rd)
 {
-	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
 
 	if (p->is_write)
 		reg_to_dbg(vcpu, p, rd, dbg_reg);
 	else
 		dbg_to_reg(vcpu, p, rd, dbg_reg);
 
-	trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+	trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
 
 	return true;
 }
@@ -414,7 +414,7 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
 static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 		const struct kvm_one_reg *reg, void __user *uaddr)
 {
-	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
 
 	if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
@@ -424,7 +424,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	const struct kvm_one_reg *reg, void __user *uaddr)
 {
-	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
 
 	if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
@@ -434,21 +434,21 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 static void reset_bvr(struct kvm_vcpu *vcpu,
 		      const struct sys_reg_desc *rd)
 {
-	vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
+	vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm] = rd->val;
 }
 
 static bool trap_bcr(struct kvm_vcpu *vcpu,
 		     struct sys_reg_params *p,
 		     const struct sys_reg_desc *rd)
 {
-	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
+	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
 
 	if (p->is_write)
 		reg_to_dbg(vcpu, p, rd, dbg_reg);
 	else
 		dbg_to_reg(vcpu, p, rd, dbg_reg);
 
-	trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+	trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
 
 	return true;
 }
@@ -456,7 +456,7 @@ static bool trap_bcr(struct kvm_vcpu *vcpu,
 static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 		const struct kvm_one_reg *reg, void __user *uaddr)
 {
-	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
 
 	if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
@@ -467,7 +467,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	const struct kvm_one_reg *reg, void __user *uaddr)
 {
-	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
 
 	if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
@@ -477,22 +477,22 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 static void reset_bcr(struct kvm_vcpu *vcpu,
 		      const struct sys_reg_desc *rd)
 {
-	vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
+	vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm] = rd->val;
 }
 
 static bool trap_wvr(struct kvm_vcpu *vcpu,
 		     struct sys_reg_params *p,
 		     const struct sys_reg_desc *rd)
 {
-	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
+	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
 
 	if (p->is_write)
 		reg_to_dbg(vcpu, p, rd, dbg_reg);
 	else
 		dbg_to_reg(vcpu, p, rd, dbg_reg);
 
-	trace_trap_reg(__func__, rd->reg, p->is_write,
-		vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]);
+	trace_trap_reg(__func__, rd->CRm, p->is_write,
+		vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm]);
 
 	return true;
 }
@@ -500,7 +500,7 @@ static bool trap_wvr(struct kvm_vcpu *vcpu,
 static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 		const struct kvm_one_reg *reg, void __user *uaddr)
 {
-	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
 
 	if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
@@ -510,7 +510,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	const struct kvm_one_reg *reg, void __user *uaddr)
 {
-	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm];
 
 	if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
@@ -520,21 +520,21 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 static void reset_wvr(struct kvm_vcpu *vcpu,
 		      const struct sys_reg_desc *rd)
 {
-	vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
+	vcpu->arch.vcpu_debug_state.dbg_wvr[rd->CRm] = rd->val;
 }
 
 static bool trap_wcr(struct kvm_vcpu *vcpu,
 		     struct sys_reg_params *p,
 		     const struct sys_reg_desc *rd)
 {
-	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
+	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
 
 	if (p->is_write)
 		reg_to_dbg(vcpu, p, rd, dbg_reg);
 	else
 		dbg_to_reg(vcpu, p, rd, dbg_reg);
 
-	trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg);
+	trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
 
 	return true;
 }
@@ -542,7 +542,7 @@ static bool trap_wcr(struct kvm_vcpu *vcpu,
 static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 		const struct kvm_one_reg *reg, void __user *uaddr)
 {
-	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
 
 	if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
@@ -552,7 +552,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	const struct kvm_one_reg *reg, void __user *uaddr)
 {
-	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
+	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm];
 
 	if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
@@ -562,7 +562,7 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 static void reset_wcr(struct kvm_vcpu *vcpu,
 		      const struct sys_reg_desc *rd)
 {
-	vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
+	vcpu->arch.vcpu_debug_state.dbg_wcr[rd->CRm] = rd->val;
 }
 
 static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
-- 
GitLab


From d874e6c06952382897d35bf4094193cd44ae91bd Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 23 Apr 2021 16:27:09 +0200
Subject: [PATCH 0790/3804] mt76: mt7921: fix possible AOOB issue in
 mt7921_mcu_tx_rate_report

Fix possible array out of bound access in mt7921_mcu_tx_rate_report.
Remove unnecessary varibable in mt7921_mcu_tx_rate_report

Fixes: 1c099ab44727c ("mt76: mt7921: add MCU support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/91a1e8f6b6a3e6a929de560ed68132f6eb421720.1619187875.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 5f3d56d570a5e..67dc4b4cc0945 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -402,20 +402,22 @@ static void
 mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb,
 			  u16 wlan_idx)
 {
-	struct mt7921_mcu_wlan_info_event *wtbl_info =
-		(struct mt7921_mcu_wlan_info_event *)(skb->data);
-	struct rate_info rate = {};
-	u8 curr_idx = wtbl_info->rate_info.rate_idx;
-	u16 curr = le16_to_cpu(wtbl_info->rate_info.rate[curr_idx]);
-	struct mt7921_mcu_peer_cap peer = wtbl_info->peer_cap;
+	struct mt7921_mcu_wlan_info_event *wtbl_info;
 	struct mt76_phy *mphy = &dev->mphy;
 	struct mt7921_sta_stats *stats;
+	struct rate_info rate = {};
 	struct mt7921_sta *msta;
 	struct mt76_wcid *wcid;
+	u8 idx;
 
 	if (wlan_idx >= MT76_N_WCIDS)
 		return;
 
+	wtbl_info = (struct mt7921_mcu_wlan_info_event *)skb->data;
+	idx = wtbl_info->rate_info.rate_idx;
+	if (idx >= ARRAY_SIZE(wtbl_info->rate_info.rate))
+		return;
+
 	rcu_read_lock();
 
 	wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
@@ -426,7 +428,8 @@ mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb,
 	stats = &msta->stats;
 
 	/* current rate */
-	mt7921_mcu_tx_rate_parse(mphy, &peer, &rate, curr);
+	mt7921_mcu_tx_rate_parse(mphy, &wtbl_info->peer_cap, &rate,
+				 le16_to_cpu(wtbl_info->rate_info.rate[idx]));
 	stats->tx_rate = rate;
 out:
 	rcu_read_unlock();
-- 
GitLab


From 6061fcf4820a2036189d0a6215f75a913f0fb5a1 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 23 Apr 2021 16:27:11 +0200
Subject: [PATCH 0791/3804] mt76: connac: do not schedule mac_work if the
 device is not running

Do not run ieee80211_queue_delayed_work for mac work if the worqueue is
not initialized yet

Fixes: b1bd7bb8121d ("mt76: connac: unschedule mac_work before going to sleep")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/5bfd36a75799c37b12fcb54d17e77fbc2c7a0558.1619187875.git.lorenzo@kernel.org
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 5 +++--
 drivers/net/wireless/mediatek/mt76/mt7921/mac.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index f81a17d560088..e2dcfee6be81e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1912,8 +1912,9 @@ void mt7615_pm_wake_work(struct work_struct *work)
 			napi_schedule(&dev->mt76.napi[i]);
 		mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
 		mt76_queue_tx_cleanup(dev, dev->mt76.q_mcu[MT_MCUQ_WM], false);
-		ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
-					     MT7615_WATCHDOG_TIME);
+		if (test_bit(MT76_STATE_RUNNING, &mphy->state))
+			ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
+						     MT7615_WATCHDOG_TIME);
 	}
 
 	ieee80211_wake_queues(mphy->hw);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index 214bd18597929..decf2d5f0ce3a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -1404,8 +1404,9 @@ void mt7921_pm_wake_work(struct work_struct *work)
 			napi_schedule(&dev->mt76.napi[i]);
 		mt76_connac_pm_dequeue_skbs(mphy, &dev->pm);
 		mt7921_tx_cleanup(dev);
-		ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
-					     MT7921_WATCHDOG_TIME);
+		if (test_bit(MT76_STATE_RUNNING, &mphy->state))
+			ieee80211_queue_delayed_work(mphy->hw, &mphy->mac_work,
+						     MT7921_WATCHDOG_TIME);
 	}
 
 	ieee80211_wake_queues(mphy->hw);
-- 
GitLab


From d6245712add0af27f64d66793bf9c00f882e2d15 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 May 2021 12:02:10 +0200
Subject: [PATCH 0792/3804] mt76: connac: fix HT A-MPDU setting field in
 STA_REC_PHY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MT7921 firmware needs this to enable tx A-MPDU properly

Reported-by: Jayden Kuo (郭育辰) <Jayden.Kuo@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210507100211.15709-1-nbd@nbd.name
---
 drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index fe0ab5e5ff815..619561606f96d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -721,6 +721,10 @@ void mt76_connac_mcu_sta_tlv(struct mt76_phy *mphy, struct sk_buff *skb,
 	phy->phy_type = mt76_connac_get_phy_mode_v2(mphy, vif, band, sta);
 	phy->basic_rate = cpu_to_le16((u16)vif->bss_conf.basic_rates);
 	phy->rcpi = rcpi;
+	phy->ampdu = FIELD_PREP(IEEE80211_HT_AMPDU_PARM_FACTOR,
+				sta->ht_cap.ampdu_factor) |
+		     FIELD_PREP(IEEE80211_HT_AMPDU_PARM_DENSITY,
+				sta->ht_cap.ampdu_density);
 
 	tlv = mt76_connac_mcu_add_tlv(skb, STA_REC_RA, sizeof(*ra_info));
 	ra_info = (struct sta_rec_ra_info *)tlv;
-- 
GitLab


From 94bb18b03d43f32e9440e8e350b7f533137c40f6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 7 May 2021 12:02:11 +0200
Subject: [PATCH 0793/3804] mt76: mt7921: fix max aggregation subframes setting

The hardware can only handle 64 subframes in rx direction and 128 for tx.
Improves throughput with APs that can handle more than that

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210507100211.15709-2-nbd@nbd.name
---
 drivers/net/wireless/mediatek/mt76/mt7921/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
index fe28bf4050c41..1763ea0614ce2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c
@@ -76,8 +76,8 @@ mt7921_init_wiphy(struct ieee80211_hw *hw)
 	struct wiphy *wiphy = hw->wiphy;
 
 	hw->queues = 4;
-	hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
-	hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+	hw->max_rx_aggregation_subframes = 64;
+	hw->max_tx_aggregation_subframes = 128;
 
 	hw->radiotap_timestamp.units_pos =
 		IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US;
-- 
GitLab


From 509559c35bcd23d5a046624b225cb3e99a9f1481 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 7 May 2021 18:50:19 +0200
Subject: [PATCH 0794/3804] mt76: mt76x0e: fix device hang during
 suspend/resume

Similar to usb device, re-initialize mt76x0e device after resume in order
to fix mt7630e hang during suspend/resume

Reported-by: Luca Trombin <luca.trombin@gmail.com>
Fixes: c2a4d9fbabfb9 ("mt76x0: inital split between pci and usb")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/4812f9611624b34053c1592fd9c175b67d4ffcb4.1620406022.git.lorenzo@kernel.org
---
 .../net/wireless/mediatek/mt76/mt76x0/pci.c   | 81 ++++++++++++++++++-
 1 file changed, 77 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
index 5847f943e8da7..b795e7245c075 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
@@ -87,7 +87,7 @@ static const struct ieee80211_ops mt76x0e_ops = {
 	.reconfig_complete = mt76x02_reconfig_complete,
 };
 
-static int mt76x0e_register_device(struct mt76x02_dev *dev)
+static int mt76x0e_init_hardware(struct mt76x02_dev *dev, bool resume)
 {
 	int err;
 
@@ -100,9 +100,11 @@ static int mt76x0e_register_device(struct mt76x02_dev *dev)
 	if (err < 0)
 		return err;
 
-	err = mt76x02_dma_init(dev);
-	if (err < 0)
-		return err;
+	if (!resume) {
+		err = mt76x02_dma_init(dev);
+		if (err < 0)
+			return err;
+	}
 
 	err = mt76x0_init_hardware(dev);
 	if (err < 0)
@@ -123,6 +125,17 @@ static int mt76x0e_register_device(struct mt76x02_dev *dev)
 	mt76_clear(dev, 0x110, BIT(9));
 	mt76_set(dev, MT_MAX_LEN_CFG, BIT(13));
 
+	return 0;
+}
+
+static int mt76x0e_register_device(struct mt76x02_dev *dev)
+{
+	int err;
+
+	err = mt76x0e_init_hardware(dev, false);
+	if (err < 0)
+		return err;
+
 	err = mt76x0_register_device(dev);
 	if (err < 0)
 		return err;
@@ -167,6 +180,8 @@ mt76x0e_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		return ret;
 
+	mt76_pci_disable_aspm(pdev);
+
 	mdev = mt76_alloc_device(&pdev->dev, sizeof(*dev), &mt76x0e_ops,
 				 &drv_ops);
 	if (!mdev)
@@ -220,6 +235,60 @@ mt76x0e_remove(struct pci_dev *pdev)
 	mt76_free_device(mdev);
 }
 
+#ifdef CONFIG_PM
+static int mt76x0e_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct mt76_dev *mdev = pci_get_drvdata(pdev);
+	struct mt76x02_dev *dev = container_of(mdev, struct mt76x02_dev, mt76);
+	int i;
+
+	mt76_worker_disable(&mdev->tx_worker);
+	for (i = 0; i < ARRAY_SIZE(mdev->phy.q_tx); i++)
+		mt76_queue_tx_cleanup(dev, mdev->phy.q_tx[i], true);
+	for (i = 0; i < ARRAY_SIZE(mdev->q_mcu); i++)
+		mt76_queue_tx_cleanup(dev, mdev->q_mcu[i], true);
+	napi_disable(&mdev->tx_napi);
+
+	mt76_for_each_q_rx(mdev, i)
+		napi_disable(&mdev->napi[i]);
+
+	mt76x02_dma_disable(dev);
+	mt76x02_mcu_cleanup(dev);
+	mt76x0_chip_onoff(dev, false, false);
+
+	pci_enable_wake(pdev, pci_choose_state(pdev, state), true);
+	pci_save_state(pdev);
+
+	return pci_set_power_state(pdev, pci_choose_state(pdev, state));
+}
+
+static int mt76x0e_resume(struct pci_dev *pdev)
+{
+	struct mt76_dev *mdev = pci_get_drvdata(pdev);
+	struct mt76x02_dev *dev = container_of(mdev, struct mt76x02_dev, mt76);
+	int err, i;
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	pci_restore_state(pdev);
+
+	mt76_worker_enable(&mdev->tx_worker);
+
+	mt76_for_each_q_rx(mdev, i) {
+		mt76_queue_rx_reset(dev, i);
+		napi_enable(&mdev->napi[i]);
+		napi_schedule(&mdev->napi[i]);
+	}
+
+	napi_enable(&mdev->tx_napi);
+	napi_schedule(&mdev->tx_napi);
+
+	return mt76x0e_init_hardware(dev, true);
+}
+#endif /* CONFIG_PM */
+
 static const struct pci_device_id mt76x0e_device_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7610) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_MEDIATEK, 0x7630) },
@@ -237,6 +306,10 @@ static struct pci_driver mt76x0e_driver = {
 	.id_table	= mt76x0e_device_table,
 	.probe		= mt76x0e_probe,
 	.remove		= mt76x0e_remove,
+#ifdef CONFIG_PM
+	.suspend	= mt76x0e_suspend,
+	.resume		= mt76x0e_resume,
+#endif /* CONFIG_PM */
 };
 
 module_pci_driver(mt76x0e_driver);
-- 
GitLab


From 2c2bdd2372afcfcf24fe11c65ebe3361b7e1cd9f Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 13 May 2021 09:03:03 +0200
Subject: [PATCH 0795/3804] mt76: validate rx A-MSDU subframes

Mitigate A-MSDU injection attacks (CVE-2020-24588) by detecting if the
destination address of a subframe equals an RFC1042 (i.e., LLC/SNAP)
header, and if so dropping the complete A-MSDU frame. This mitigates
known attacks, although new (unknown) aggregation-based attacks may
remain possible.

This defense works because in A-MSDU aggregation injection attacks, a
normal encrypted Wi-Fi frame is turned into an A-MSDU frame. This means
the first 6 bytes of the first A-MSDU subframe correspond to an RFC1042
header. In other words, the destination MAC address of the first A-MSDU
subframe contains the start of an RFC1042 header during an aggregation
attack. We can detect this and thereby prevent this specific attack.
For details, see Section 7.2 of "Fragment and Forge: Breaking Wi-Fi
Through Frame Aggregation and Fragmentation".

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210513070303.20253-1-nbd@nbd.name
---
 drivers/net/wireless/mediatek/mt76/mac80211.c | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 977acab0360a3..03fe628375576 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -514,10 +514,36 @@ EXPORT_SYMBOL_GPL(mt76_free_device);
 static void mt76_rx_release_amsdu(struct mt76_phy *phy, enum mt76_rxq_id q)
 {
 	struct sk_buff *skb = phy->rx_amsdu[q].head;
+	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
 	struct mt76_dev *dev = phy->dev;
 
 	phy->rx_amsdu[q].head = NULL;
 	phy->rx_amsdu[q].tail = NULL;
+
+	/*
+	 * Validate if the amsdu has a proper first subframe.
+	 * A single MSDU can be parsed as A-MSDU when the unauthenticated A-MSDU
+	 * flag of the QoS header gets flipped. In such cases, the first
+	 * subframe has a LLC/SNAP header in the location of the destination
+	 * address.
+	 */
+	if (skb_shinfo(skb)->frag_list) {
+		int offset = 0;
+
+		if (!(status->flag & RX_FLAG_8023)) {
+			offset = ieee80211_get_hdrlen_from_skb(skb);
+
+			if ((status->flag &
+			     (RX_FLAG_DECRYPTED | RX_FLAG_IV_STRIPPED)) ==
+			    RX_FLAG_DECRYPTED)
+				offset += 8;
+		}
+
+		if (ether_addr_equal(skb->data + offset, rfc1042_header)) {
+			dev_kfree_skb(skb);
+			return;
+		}
+	}
 	__skb_queue_tail(&dev->rx_skb[q], skb);
 }
 
-- 
GitLab


From ffb324e6f874121f7dce5bdae5e05d02baae7269 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 15 May 2021 03:00:37 +0000
Subject: [PATCH 0796/3804] tty: vt: always invoke vc->vc_sw->con_resize
 callback

syzbot is reporting OOB write at vga16fb_imageblit() [1], for
resize_screen() from ioctl(VT_RESIZE) returns 0 without checking whether
requested rows/columns fit the amount of memory reserved for the graphical
screen if current mode is KD_GRAPHICS.

----------
  #include <sys/types.h>
  #include <sys/stat.h>
  #include <fcntl.h>
  #include <sys/ioctl.h>
  #include <linux/kd.h>
  #include <linux/vt.h>

  int main(int argc, char *argv[])
  {
        const int fd = open("/dev/char/4:1", O_RDWR);
        struct vt_sizes vt = { 0x4100, 2 };

        ioctl(fd, KDSETMODE, KD_GRAPHICS);
        ioctl(fd, VT_RESIZE, &vt);
        ioctl(fd, KDSETMODE, KD_TEXT);
        return 0;
  }
----------

Allow framebuffer drivers to return -EINVAL, by moving vc->vc_mode !=
KD_GRAPHICS check from resize_screen() to fbcon_resize().

Link: https://syzkaller.appspot.com/bug?extid=1f29e126cf461c4de3b3 [1]
Reported-by: syzbot <syzbot+1f29e126cf461c4de3b3@syzkaller.appspotmail.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: syzbot <syzbot+1f29e126cf461c4de3b3@syzkaller.appspotmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/vt/vt.c              | 2 +-
 drivers/video/fbdev/core/fbcon.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 01645e87b3d5c..fa1548d4f94be 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1171,7 +1171,7 @@ static inline int resize_screen(struct vc_data *vc, int width, int height,
 	/* Resizes the resolution of the display adapater */
 	int err = 0;
 
-	if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_resize)
+	if (vc->vc_sw->con_resize)
 		err = vc->vc_sw->con_resize(vc, width, height, user);
 
 	return err;
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 3406067985b1f..22bb3892f6bd1 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2019,7 +2019,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
 			return -EINVAL;
 
 		pr_debug("resize now %ix%i\n", var.xres, var.yres);
-		if (con_is_visible(vc)) {
+		if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) {
 			var.activate = FB_ACTIVATE_NOW |
 				FB_ACTIVATE_FORCE;
 			fb_set_var(info, &var);
-- 
GitLab


From 5cb289bf2d7c34ca1abd794ce116c4f19185a1d4 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Fri, 14 May 2021 17:09:52 +0800
Subject: [PATCH 0797/3804] scsi: qla2xxx: Fix error return code in
 qla82xx_write_flash_dword()

Fix to return a negative error code from the error handling case instead of
0 as done elsewhere in this function.

Link: https://lore.kernel.org/r/20210514090952.6715-1-thunder.leizhen@huawei.com
Fixes: a9083016a531 ("[SCSI] qla2xxx: Add ISP82XX support.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_nx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 0677295957bc5..615e44af1ca60 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -1063,7 +1063,8 @@ qla82xx_write_flash_dword(struct qla_hw_data *ha, uint32_t flashaddr,
 		return ret;
 	}
 
-	if (qla82xx_flash_set_write_enable(ha))
+	ret = qla82xx_flash_set_write_enable(ha);
+	if (ret < 0)
 		goto done_write;
 
 	qla82xx_wr_32(ha, QLA82XX_ROMUSB_ROM_WDATA, data);
-- 
GitLab


From d1acd81bd6eb685aa9fef25624fb36d297f6404e Mon Sep 17 00:00:00 2001
From: Ajish Koshy <ajish.koshy@microchip.com>
Date: Wed, 5 May 2021 17:31:03 +0530
Subject: [PATCH 0798/3804] scsi: pm80xx: Fix drives missing during
 rmmod/insmod loop

When driver is loaded after rmmod some drives are not showing up during
discovery.

SATA drives are directly attached to the controller connected phys.  During
device discovery, the IDENTIFY command (qc timeout (cmd 0xec)) is timing out
during revalidation. This will trigger abort from host side and controller
successfully aborts the command and returns success. Post this successful
abort response ATA library decides to mark the disk as NODEV.

To overcome this, inside pm8001_scan_start() after phy_start() call, add get
start response and wait for few milliseconds to trigger next phy start.
This millisecond delay will give sufficient time for the controller state
machine to accept next phy start.

Link: https://lore.kernel.org/r/20210505120103.24497-1-ajish.koshy@microchip.com
Signed-off-by: Ajish Koshy <ajish.koshy@microchip.com>
Signed-off-by: Viswas G <viswas.g@microchip.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_hwi.c  | 10 ++++++----
 drivers/scsi/pm8001/pm8001_init.c |  2 +-
 drivers/scsi/pm8001/pm8001_sas.c  |  7 ++++++-
 drivers/scsi/pm8001/pm80xx_hwi.c  | 12 ++++++------
 4 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index ecd06d2d7e816..71aa6af083406 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -3765,11 +3765,13 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	case HW_EVENT_PHY_START_STATUS:
 		pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PHY_START_STATUS status = %x\n",
 			   status);
-		if (status == 0) {
+		if (status == 0)
 			phy->phy_state = 1;
-			if (pm8001_ha->flags == PM8001F_RUN_TIME &&
-					phy->enable_completion != NULL)
-				complete(phy->enable_completion);
+
+		if (pm8001_ha->flags == PM8001F_RUN_TIME &&
+				phy->enable_completion != NULL) {
+			complete(phy->enable_completion);
+			phy->enable_completion = NULL;
 		}
 		break;
 	case HW_EVENT_SAS_PHY_UP:
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 390c33df03578..af09bd282cb94 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -1151,8 +1151,8 @@ static int pm8001_pci_probe(struct pci_dev *pdev,
 		goto err_out_shost;
 	}
 	list_add_tail(&pm8001_ha->list, &hba_list);
-	scsi_scan_host(pm8001_ha->shost);
 	pm8001_ha->flags = PM8001F_RUN_TIME;
+	scsi_scan_host(pm8001_ha->shost);
 	return 0;
 
 err_out_shost:
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index d28af413b93a2..335cf37e6cb94 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -264,12 +264,17 @@ void pm8001_scan_start(struct Scsi_Host *shost)
 	int i;
 	struct pm8001_hba_info *pm8001_ha;
 	struct sas_ha_struct *sha = SHOST_TO_SAS_HA(shost);
+	DECLARE_COMPLETION_ONSTACK(completion);
 	pm8001_ha = sha->lldd_ha;
 	/* SAS_RE_INITIALIZATION not available in SPCv/ve */
 	if (pm8001_ha->chip_id == chip_8001)
 		PM8001_CHIP_DISP->sas_re_init_req(pm8001_ha);
-	for (i = 0; i < pm8001_ha->chip->n_phy; ++i)
+	for (i = 0; i < pm8001_ha->chip->n_phy; ++i) {
+		pm8001_ha->phy[i].enable_completion = &completion;
 		PM8001_CHIP_DISP->phy_start_req(pm8001_ha, i);
+		wait_for_completion(&completion);
+		msleep(300);
+	}
 }
 
 int pm8001_scan_finished(struct Scsi_Host *shost, unsigned long time)
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 4e980830f9f59..700530e969ac0 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -3487,13 +3487,13 @@ static int mpi_phy_start_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	pm8001_dbg(pm8001_ha, INIT,
 		   "phy start resp status:0x%x, phyid:0x%x\n",
 		   status, phy_id);
-	if (status == 0) {
+	if (status == 0)
 		phy->phy_state = PHY_LINK_DOWN;
-		if (pm8001_ha->flags == PM8001F_RUN_TIME &&
-				phy->enable_completion != NULL) {
-			complete(phy->enable_completion);
-			phy->enable_completion = NULL;
-		}
+
+	if (pm8001_ha->flags == PM8001F_RUN_TIME &&
+			phy->enable_completion != NULL) {
+		complete(phy->enable_completion);
+		phy->enable_completion = NULL;
 	}
 	return 0;
 
-- 
GitLab


From fbb80d5ad400a12ec67214a0e7e9f9497dc9e615 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 20:54:28 +0800
Subject: [PATCH 0799/3804] irqchip: Remove redundant error printing

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210511125428.6108-2-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-mvebu-icu.c  | 4 +---
 drivers/irqchip/irq-mvebu-sei.c  | 4 +---
 drivers/irqchip/irq-stm32-exti.c | 4 +---
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c
index 91adf771f1859..090bc3f4f7d88 100644
--- a/drivers/irqchip/irq-mvebu-icu.c
+++ b/drivers/irqchip/irq-mvebu-icu.c
@@ -359,10 +359,8 @@ static int mvebu_icu_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	icu->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(icu->base)) {
-		dev_err(&pdev->dev, "Failed to map icu base address.\n");
+	if (IS_ERR(icu->base))
 		return PTR_ERR(icu->base);
-	}
 
 	/*
 	 * Legacy bindings: ICU is one node with one MSI parent: force manually
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index 18832ccc8ff87..3a7b7a7f20ca7 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -384,10 +384,8 @@ static int mvebu_sei_probe(struct platform_device *pdev)
 
 	sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	sei->base = devm_ioremap_resource(sei->dev, sei->res);
-	if (IS_ERR(sei->base)) {
-		dev_err(sei->dev, "Failed to remap SEI resource\n");
+	if (IS_ERR(sei->base))
 		return PTR_ERR(sei->base);
-	}
 
 	/* Retrieve the SEI capabilities with the interrupt ranges */
 	sei->caps = of_device_get_match_data(&pdev->dev);
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index b9db90c4aa566..4704f2ee5797a 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -892,10 +892,8 @@ static int stm32_exti_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	host_data->base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(host_data->base)) {
-		dev_err(dev, "Unable to map registers\n");
+	if (IS_ERR(host_data->base))
 		return PTR_ERR(host_data->base);
-	}
 
 	for (i = 0; i < drv_data->bank_nr; i++)
 		stm32_exti_chip_init(host_data, i, np);
-- 
GitLab


From c1f0616124c455c5c762b6f123e40bba5df759e6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 16 May 2021 18:17:55 +0200
Subject: [PATCH 0800/3804] ALSA: intel8x0: Don't update period unless prepared

The interrupt handler of intel8x0 calls snd_intel8x0_update() whenever
the hardware sets the corresponding status bit for each stream.  This
works fine for most cases as long as the hardware behaves properly.
But when the hardware gives a wrong bit set, this leads to a zero-
division Oops, and reportedly, this seems what happened on a VM.

For fixing the crash, this patch adds a internal flag indicating that
the stream is ready to be updated, and check it (as well as the flag
being in suspended) to ignore such spurious update.

Cc: <stable@vger.kernel.org>
Reported-and-tested-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Link: https://lore.kernel.org/r/s5h5yzi7uh0.wl-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/intel8x0.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 35903d1a1cbd2..5b124c4ad5725 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -331,6 +331,7 @@ struct ichdev {
 	unsigned int ali_slot;			/* ALI DMA slot */
 	struct ac97_pcm *pcm;
 	int pcm_open_flag;
+	unsigned int prepared:1;
 	unsigned int suspended: 1;
 };
 
@@ -691,6 +692,9 @@ static inline void snd_intel8x0_update(struct intel8x0 *chip, struct ichdev *ich
 	int status, civ, i, step;
 	int ack = 0;
 
+	if (!ichdev->prepared || ichdev->suspended)
+		return;
+
 	spin_lock_irqsave(&chip->reg_lock, flags);
 	status = igetbyte(chip, port + ichdev->roff_sr);
 	civ = igetbyte(chip, port + ICH_REG_OFF_CIV);
@@ -881,6 +885,7 @@ static int snd_intel8x0_hw_params(struct snd_pcm_substream *substream,
 	if (ichdev->pcm_open_flag) {
 		snd_ac97_pcm_close(ichdev->pcm);
 		ichdev->pcm_open_flag = 0;
+		ichdev->prepared = 0;
 	}
 	err = snd_ac97_pcm_open(ichdev->pcm, params_rate(hw_params),
 				params_channels(hw_params),
@@ -902,6 +907,7 @@ static int snd_intel8x0_hw_free(struct snd_pcm_substream *substream)
 	if (ichdev->pcm_open_flag) {
 		snd_ac97_pcm_close(ichdev->pcm);
 		ichdev->pcm_open_flag = 0;
+		ichdev->prepared = 0;
 	}
 	return 0;
 }
@@ -976,6 +982,7 @@ static int snd_intel8x0_pcm_prepare(struct snd_pcm_substream *substream)
 			ichdev->pos_shift = (runtime->sample_bits > 16) ? 2 : 1;
 	}
 	snd_intel8x0_setup_periods(chip, ichdev);
+	ichdev->prepared = 1;
 	return 0;
 }
 
-- 
GitLab


From d07f6ca923ea0927a1024dfccafc5b53b61cfecc Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 16 May 2021 15:27:44 -0700
Subject: [PATCH 0801/3804] Linux 5.13-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 53d09c414635c..0ed7e061c8e9e 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Frozen Wasteland
 
 # *DOCUMENTATION*
-- 
GitLab


From 35d470b5fbc9f82feb77b56bb0d5d0b5cd73e9da Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Thu, 13 May 2021 17:45:46 +0200
Subject: [PATCH 0802/3804] hwmon: (dell-smm-hwmon) Fix index values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When support for up to 10 temp sensors and for disabling automatic BIOS
fan control was added, noone updated the index values used for
disallowing fan support and fan type calls.
Fix those values.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Reviewed-by: Pali Rohár <pali@kernel.org>
Link: https://lore.kernel.org/r/20210513154546.12430-1-W_Armin@gmx.de
Fixes: 1bb46a20e73b ("hwmon: (dell-smm) Support up to 10 temp sensors")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/dell-smm-hwmon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index 2970892bed829..f2221ca0aa7be 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -838,10 +838,10 @@ static struct attribute *i8k_attrs[] = {
 static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
 			      int index)
 {
-	if (disallow_fan_support && index >= 8)
+	if (disallow_fan_support && index >= 20)
 		return 0;
 	if (disallow_fan_type_call &&
-	    (index == 9 || index == 12 || index == 15))
+	    (index == 21 || index == 25 || index == 28))
 		return 0;
 	if (index >= 0 && index <= 1 &&
 	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1))
-- 
GitLab


From c2a338c9395eb843a9a11a2385f4b00cd0978494 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?V=C3=A1clav=20Kubern=C3=A1t?= <kubernat@cesnet.cz>
Date: Thu, 13 May 2021 22:11:10 +0200
Subject: [PATCH 0803/3804] hwmon: (pmbus/fsp-3y) Fix FSP-3Y YH-5151E VOUT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After testing new YH-5151E devices, we found out that not all YH-5151E
work the same. The newly tested devices actually report vout correctly
in linear16 (even though they're still YH-5151E). We suspect that it is
because these new devices have a different firmware version, but that is
unconfirmed. The version cannot be queried through PMBus.

The compliant versions of YH-5151E report VOUT_MODE normally, so we turn
on the linear11 workaround only if VOUT_MODE doesn't report anything.

Signed-off-by: Václav Kubernát <kubernat@cesnet.cz>
Link: https://lore.kernel.org/r/20210513201110.313523-1-kubernat@cesnet.cz
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/fsp-3y.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/hwmon/pmbus/fsp-3y.c b/drivers/hwmon/pmbus/fsp-3y.c
index e248424752545..aec294cc72d1f 100644
--- a/drivers/hwmon/pmbus/fsp-3y.c
+++ b/drivers/hwmon/pmbus/fsp-3y.c
@@ -37,6 +37,8 @@ struct fsp3y_data {
 	struct pmbus_driver_info info;
 	int chip;
 	int page;
+
+	bool vout_linear_11;
 };
 
 #define to_fsp3y_data(x) container_of(x, struct fsp3y_data, info)
@@ -108,11 +110,9 @@ static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg)
 	int rv;
 
 	/*
-	 * YH5151-E outputs vout in linear11. The conversion is done when
-	 * reading. Here, we have to inject pmbus_core with the correct
-	 * exponent (it is -6).
+	 * Inject an exponent for non-compliant YH5151-E.
 	 */
-	if (data->chip == yh5151e && reg == PMBUS_VOUT_MODE)
+	if (data->vout_linear_11 && reg == PMBUS_VOUT_MODE)
 		return 0x1A;
 
 	rv = set_page(client, page);
@@ -161,10 +161,9 @@ static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase,
 		return rv;
 
 	/*
-	 * YH-5151E is non-compliant and outputs output voltages in linear11
-	 * instead of linear16.
+	 * Handle YH-5151E non-compliant linear11 vout voltage.
 	 */
-	if (data->chip == yh5151e && reg == PMBUS_READ_VOUT)
+	if (data->vout_linear_11 && reg == PMBUS_READ_VOUT)
 		rv = sign_extend32(rv, 10) & 0xffff;
 
 	return rv;
@@ -256,6 +255,25 @@ static int fsp3y_probe(struct i2c_client *client)
 
 	data->info = fsp3y_info[data->chip];
 
+	/*
+	 * YH-5151E sometimes reports vout in linear11 and sometimes in
+	 * linear16. This depends on the exact individual piece of hardware. One
+	 * YH-5151E can use linear16 and another might use linear11 instead.
+	 *
+	 * The format can be recognized by reading VOUT_MODE - if it doesn't
+	 * report a valid exponent, then vout uses linear11. Otherwise, the
+	 * device is compliant and uses linear16.
+	 */
+	data->vout_linear_11 = false;
+	if (data->chip == yh5151e) {
+		rv = i2c_smbus_read_byte_data(client, PMBUS_VOUT_MODE);
+		if (rv < 0)
+			return rv;
+
+		if (rv == 0xFF)
+			data->vout_linear_11 = true;
+	}
+
 	return pmbus_do_probe(client, &data->info);
 }
 
-- 
GitLab


From 2a29db088c7ae7121801a0d7a60740ed2d18c4f3 Mon Sep 17 00:00:00 2001
From: Grant Peltier <grantpeltier93@gmail.com>
Date: Fri, 14 May 2021 16:19:55 -0500
Subject: [PATCH 0804/3804] hwmon: (pmbus/isl68137) remove READ_TEMPERATURE_3
 for RAA228228

The initial version of the RAA228228 datasheet claimed that the device
supported READ_TEMPERATURE_3 but not READ_TEMPERATURE_1. It has since been
discovered that the datasheet was incorrect. The RAA228228 does support
READ_TEMPERATURE_1 but does not support READ_TEMPERATURE_3.

Signed-off-by: Grant Peltier <grantpeltier93@gmail.com>
Fixes: 51fb91ed5a6f ("hwmon: (pmbus/isl68137) remove READ_TEMPERATURE_1 telemetry for RAA228228")
Link: https://lore.kernel.org/r/20210514211954.GA24646@raspberrypi
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/isl68137.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c
index 40597a9e799f5..1a8caff1ac5f6 100644
--- a/drivers/hwmon/pmbus/isl68137.c
+++ b/drivers/hwmon/pmbus/isl68137.c
@@ -244,8 +244,8 @@ static int isl68137_probe(struct i2c_client *client)
 		info->read_word_data = raa_dmpvr2_read_word_data;
 		break;
 	case raa_dmpvr2_2rail_nontc:
-		info->func[0] &= ~PMBUS_HAVE_TEMP;
-		info->func[1] &= ~PMBUS_HAVE_TEMP;
+		info->func[0] &= ~PMBUS_HAVE_TEMP3;
+		info->func[1] &= ~PMBUS_HAVE_TEMP3;
 		fallthrough;
 	case raa_dmpvr2_2rail:
 		info->pages = 2;
-- 
GitLab


From 9d5e8492eee017ffdaa9f0957e91d39d83163197 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 9 May 2021 16:22:54 -0700
Subject: [PATCH 0805/3804] xfs: adjust rt allocation minlen when extszhint >
 rtextsize

xfs_bmap_rtalloc doesn't handle realtime extent files with extent size
hints larger than the rt volume's extent size properly, because
xfs_bmap_extsize_align can adjust the offset/length parameters to try to
fit the extent size hint.

Under these conditions, minlen has to be large enough so that any
allocation returned by xfs_rtallocate_extent will be large enough to
cover at least one of the blocks that the caller asked for.  If the
allocation is too short, bmapi_write will return no mapping for the
requested range, which causes ENOSPC errors in other parts of the
filesystem.

Therefore, adjust minlen upwards to fix this.  This can be found by
running generic/263 (g/127 or g/522) with a realtime extent size hint
that's larger than the rt volume extent size.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 83 +++++++++++++++++++++++++++++-------------
 1 file changed, 57 insertions(+), 26 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a5e9d7d34023f..c9381bf4f04bf 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -71,18 +71,23 @@ xfs_zero_extent(
 #ifdef CONFIG_XFS_RT
 int
 xfs_bmap_rtalloc(
-	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
+	struct xfs_bmalloca	*ap)
 {
-	int		error;		/* error return value */
-	xfs_mount_t	*mp;		/* mount point structure */
-	xfs_extlen_t	prod = 0;	/* product factor for allocators */
-	xfs_extlen_t	mod = 0;	/* product factor for allocators */
-	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
-	xfs_extlen_t	align;		/* minimum allocation alignment */
-	xfs_rtblock_t	rtb;
-
-	mp = ap->ip->i_mount;
+	struct xfs_mount	*mp = ap->ip->i_mount;
+	xfs_fileoff_t		orig_offset = ap->offset;
+	xfs_rtblock_t		rtb;
+	xfs_extlen_t		prod = 0;  /* product factor for allocators */
+	xfs_extlen_t		mod = 0;   /* product factor for allocators */
+	xfs_extlen_t		ralen = 0; /* realtime allocation length */
+	xfs_extlen_t		align;     /* minimum allocation alignment */
+	xfs_extlen_t		orig_length = ap->length;
+	xfs_extlen_t		minlen = mp->m_sb.sb_rextsize;
+	xfs_extlen_t		raminlen;
+	bool			rtlocked = false;
+	int			error;
+
 	align = xfs_get_extsz_hint(ap->ip);
+retry:
 	prod = align / mp->m_sb.sb_rextsize;
 	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
 					align, 1, ap->eof, 0,
@@ -92,6 +97,15 @@ xfs_bmap_rtalloc(
 	ASSERT(ap->length);
 	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
 
+	/*
+	 * If we shifted the file offset downward to satisfy an extent size
+	 * hint, increase minlen by that amount so that the allocator won't
+	 * give us an allocation that's too short to cover at least one of the
+	 * blocks that the caller asked for.
+	 */
+	if (ap->offset != orig_offset)
+		minlen += orig_offset - ap->offset;
+
 	/*
 	 * If the offset & length are not perfectly aligned
 	 * then kill prod, it will just get us in trouble.
@@ -116,10 +130,13 @@ xfs_bmap_rtalloc(
 	/*
 	 * Lock out modifications to both the RT bitmap and summary inodes
 	 */
-	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
-	xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
-	xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
-	xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
+	if (!rtlocked) {
+		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
+		xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
+		xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
+		rtlocked = true;
+	}
 
 	/*
 	 * If it's an allocation to an empty file at offset 0,
@@ -144,30 +161,44 @@ xfs_bmap_rtalloc(
 	do_div(ap->blkno, mp->m_sb.sb_rextsize);
 	rtb = ap->blkno;
 	ap->length = ralen;
-	error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
-				&ralen, ap->wasdel, prod, &rtb);
+	raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize);
+	error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length,
+			&ralen, ap->wasdel, prod, &rtb);
 	if (error)
 		return error;
 
-	ap->blkno = rtb;
-	if (ap->blkno != NULLFSBLOCK) {
-		ap->blkno *= mp->m_sb.sb_rextsize;
-		ralen *= mp->m_sb.sb_rextsize;
-		ap->length = ralen;
-		ap->ip->i_nblocks += ralen;
+	if (rtb != NULLRTBLOCK) {
+		ap->blkno = rtb * mp->m_sb.sb_rextsize;
+		ap->length = ralen * mp->m_sb.sb_rextsize;
+		ap->ip->i_nblocks += ap->length;
 		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
 		if (ap->wasdel)
-			ap->ip->i_delayed_blks -= ralen;
+			ap->ip->i_delayed_blks -= ap->length;
 		/*
 		 * Adjust the disk quota also. This was reserved
 		 * earlier.
 		 */
 		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
 			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
-					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
-	} else {
-		ap->length = 0;
+					XFS_TRANS_DQ_RTBCOUNT, ap->length);
+		return 0;
+	}
+
+	if (align > mp->m_sb.sb_rextsize) {
+		/*
+		 * We previously enlarged the request length to try to satisfy
+		 * an extent size hint.  The allocator didn't return anything,
+		 * so reset the parameters to the original values and try again
+		 * without alignment criteria.
+		 */
+		ap->offset = orig_offset;
+		ap->length = orig_length;
+		minlen = align = mp->m_sb.sb_rextsize;
+		goto retry;
 	}
+
+	ap->blkno = NULLFSBLOCK;
+	ap->length = 0;
 	return 0;
 }
 #endif /* CONFIG_XFS_RT */
-- 
GitLab


From 9e3927f6373da54cb17e17f4bd700907e1123d2f Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Fri, 14 May 2021 18:59:44 +0800
Subject: [PATCH 0806/3804] usb: chipidea: udc: assign interrupt number to USB
 gadget structure

Chipidea also need sync interrupt before unbind the udc while
gadget remove driver, otherwise setup irq handling may happen
while unbind, see below dump generated from android function
switch stress test:

[ 4703.503056] android_work: sent uevent USB_STATE=CONNECTED
[ 4703.514642] android_work: sent uevent USB_STATE=DISCONNECTED
[ 4703.651339] android_work: sent uevent USB_STATE=CONNECTED
[ 4703.661806] init: Control message: Processed ctl.stop for 'adbd' from pid: 561 (system_server)
[ 4703.673469] init: processing action (init.svc.adbd=stopped) from (/system/etc/init/hw/init.usb.configfs.rc:14)
[ 4703.676451] Unable to handle kernel read from unreadable memory at virtual address 0000000000000090
[ 4703.676454] Mem abort info:
[ 4703.676458]   ESR = 0x96000004
[ 4703.676461]   EC = 0x25: DABT (current EL), IL = 32 bits
[ 4703.676464]   SET = 0, FnV = 0
[ 4703.676466]   EA = 0, S1PTW = 0
[ 4703.676468] Data abort info:
[ 4703.676471]   ISV = 0, ISS = 0x00000004
[ 4703.676473]   CM = 0, WnR = 0
[ 4703.676478] user pgtable: 4k pages, 48-bit VAs, pgdp=000000004a867000
[ 4703.676481] [0000000000000090] pgd=0000000000000000, p4d=0000000000000000
[ 4703.676503] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[ 4703.758297] Modules linked in: synaptics_dsx_i2c moal(O) mlan(O)
[ 4703.764327] CPU: 0 PID: 235 Comm: lmkd Tainted: G        W  O      5.10.9-00001-g3f5fd8487c38-dirty #63
[ 4703.773720] Hardware name: NXP i.MX8MNano EVK board (DT)
[ 4703.779033] pstate: 60400085 (nZCv daIf +PAN -UAO -TCO BTYPE=--)
[ 4703.785046] pc : _raw_write_unlock_bh+0xc0/0x2c8
[ 4703.789667] lr : android_setup+0x4c/0x168
[ 4703.793676] sp : ffff80001256bd80
[ 4703.796989] x29: ffff80001256bd80 x28: 00000000000000a8
[ 4703.802304] x27: ffff800012470000 x26: ffff80006d923000
[ 4703.807616] x25: ffff800012471000 x24: ffff00000b091140
[ 4703.812929] x23: ffff0000077dbd38 x22: ffff0000077da490
[ 4703.818242] x21: ffff80001256be30 x20: 0000000000000000
[ 4703.823554] x19: 0000000000000080 x18: ffff800012561048
[ 4703.828867] x17: 0000000000000000 x16: 0000000000000039
[ 4703.834180] x15: ffff8000106ad258 x14: ffff80001194c277
[ 4703.839493] x13: 0000000000003934 x12: 0000000000000000
[ 4703.844805] x11: 0000000000000000 x10: 0000000000000001
[ 4703.850117] x9 : 0000000000000000 x8 : 0000000000000090
[ 4703.855429] x7 : 6f72646e61203a70 x6 : ffff8000124f2450
[ 4703.860742] x5 : ffffffffffffffff x4 : 0000000000000009
[ 4703.866054] x3 : ffff8000108a290c x2 : ffff00007fb3a9c8
[ 4703.871367] x1 : 0000000000000000 x0 : 0000000000000090
[ 4703.876681] Call trace:
[ 4703.879129]  _raw_write_unlock_bh+0xc0/0x2c8
[ 4703.883397]  android_setup+0x4c/0x168
[ 4703.887059]  udc_irq+0x824/0xa9c
[ 4703.890287]  ci_irq+0x124/0x148
[ 4703.893429]  __handle_irq_event_percpu+0x84/0x268
[ 4703.898131]  handle_irq_event+0x64/0x14c
[ 4703.902054]  handle_fasteoi_irq+0x110/0x210
[ 4703.906236]  __handle_domain_irq+0x8c/0xd4
[ 4703.910332]  gic_handle_irq+0x6c/0x124
[ 4703.914081]  el1_irq+0xdc/0x1c0
[ 4703.917221]  _raw_spin_unlock_irq+0x20/0x54
[ 4703.921405]  finish_task_switch+0x84/0x224
[ 4703.925502]  __schedule+0x4a4/0x734
[ 4703.928990]  schedule+0xa0/0xe8
[ 4703.932132]  do_notify_resume+0x150/0x184
[ 4703.936140]  work_pending+0xc/0x40c
[ 4703.939633] Code: d5384613 521b0a69 d5184609 f9800111 (885ffd01)
[ 4703.945732] ---[ end trace ba5c1875ae49d53c ]---
[ 4703.950350] Kernel panic - not syncing: Oops: Fatal exception in interrupt
[ 4703.957223] SMP: stopping secondary CPUs
[ 4703.961151] Kernel Offset: disabled
[ 4703.964638] CPU features: 0x0240002,2000200c
[ 4703.968905] Memory Limit: none
[ 4703.971963] Rebooting in 5 seconds..

Tested-by: faqiang.zhu <faqiang.zhu@nxp.com>
Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/1620989984-7653-1-git-send-email-jun.li@nxp.com
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/chipidea/udc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index c16d900cdaee3..393f216b91615 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -2061,6 +2061,7 @@ static int udc_start(struct ci_hdrc *ci)
 	ci->gadget.name         = ci->platdata->name;
 	ci->gadget.otg_caps	= otg_caps;
 	ci->gadget.sg_supported = 1;
+	ci->gadget.irq		= ci->irq;
 
 	if (ci->platdata->flags & CI_HDRC_REQUIRES_ALIGNED_DMA)
 		ci->gadget.quirk_avoids_skb_reserve = 1;
-- 
GitLab


From c6de37dd5e48b883db032aa4dc0547a4858b9f20 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 21 Apr 2021 11:58:48 -0700
Subject: [PATCH 0807/3804] tools build: Fix quiet cmd indentation

The tools quiet cmd output has mismatched indentation (and extra space
character between cmd name and target name) compared to the rest of
kbuild out:

  HOSTCC  scripts/insert-sys-cert
  LD       /srv/code/tools/objtool/arch/x86/objtool-in.o
  LD       /srv/code/tools/objtool/libsubcmd-in.o
  AR       /srv/code/tools/objtool/libsubcmd.a
  HOSTLD  scripts/genksyms/genksyms
  CC      scripts/mod/empty.o
  HOSTCC  scripts/mod/mk_elfconfig
  CC      scripts/mod/devicetable-offsets.s
  MKELF   scripts/mod/elfconfig.h
  HOSTCC  scripts/mod/modpost.o
  HOSTCC  scripts/mod/file2alias.o
  HOSTCC  scripts/mod/sumversion.o
  LD       /srv/code/tools/objtool/objtool-in.o
  LINK     /srv/code/tools/objtool/objtool
  HOSTLD  scripts/mod/modpost
  CC      kernel/bounds.s

Adjust to match the rest of kbuild.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 tools/build/Makefile.build     | 22 +++++++++++-----------
 tools/scripts/Makefile.include | 30 +++++++++++++++---------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index cd72016c3cfa7..715092fc6a239 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -51,39 +51,39 @@ subdir-obj-y :=
 build-file := $(dir)/Build
 -include $(build-file)
 
-quiet_cmd_flex  = FLEX     $@
-quiet_cmd_bison = BISON    $@
+quiet_cmd_flex  = FLEX    $@
+quiet_cmd_bison = BISON   $@
 
 # Create directory unless it exists
-quiet_cmd_mkdir = MKDIR    $(dir $@)
+quiet_cmd_mkdir = MKDIR   $(dir $@)
       cmd_mkdir = mkdir -p $(dir $@)
      rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir))
 
 # Compile command
-quiet_cmd_cc_o_c = CC       $@
+quiet_cmd_cc_o_c = CC      $@
       cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
 
-quiet_cmd_host_cc_o_c = HOSTCC   $@
+quiet_cmd_host_cc_o_c = HOSTCC  $@
       cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
 
-quiet_cmd_cxx_o_c = CXX      $@
+quiet_cmd_cxx_o_c = CXX     $@
       cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
 
-quiet_cmd_cpp_i_c = CPP      $@
+quiet_cmd_cpp_i_c = CPP     $@
       cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<
 
-quiet_cmd_cc_s_c = AS       $@
+quiet_cmd_cc_s_c = AS      $@
       cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
 
-quiet_cmd_gen = GEN      $@
+quiet_cmd_gen = GEN     $@
 
 # Link agregate command
 # If there's nothing to link, create empty $@ object.
-quiet_cmd_ld_multi = LD       $@
+quiet_cmd_ld_multi = LD      $@
       cmd_ld_multi = $(if $(strip $(obj-y)),\
                      $(LD) -r -o $@  $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
 
-quiet_cmd_host_ld_multi = HOSTLD   $@
+quiet_cmd_host_ld_multi = HOSTLD  $@
       cmd_host_ld_multi = $(if $(strip $(obj-y)),\
                           $(HOSTLD) -r -o $@  $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@)
 
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index f9271f3ea9129..071312f5eb928 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -131,29 +131,29 @@ QUIET_SUBDIR1  =
 
 ifneq ($(silent),1)
   ifneq ($(V),1)
-	QUIET_CC       = @echo '  CC       '$@;
-	QUIET_CC_FPIC  = @echo '  CC FPIC  '$@;
-	QUIET_CLANG    = @echo '  CLANG    '$@;
-	QUIET_AR       = @echo '  AR       '$@;
-	QUIET_LINK     = @echo '  LINK     '$@;
-	QUIET_MKDIR    = @echo '  MKDIR    '$@;
-	QUIET_GEN      = @echo '  GEN      '$@;
+	QUIET_CC       = @echo '  CC      '$@;
+	QUIET_CC_FPIC  = @echo '  CC FPIC '$@;
+	QUIET_CLANG    = @echo '  CLANG   '$@;
+	QUIET_AR       = @echo '  AR      '$@;
+	QUIET_LINK     = @echo '  LINK    '$@;
+	QUIET_MKDIR    = @echo '  MKDIR   '$@;
+	QUIET_GEN      = @echo '  GEN     '$@;
 	QUIET_SUBDIR0  = +@subdir=
 	QUIET_SUBDIR1  = ;$(NO_SUBDIR) \
-			  echo '  SUBDIR   '$$subdir; \
+			  echo '  SUBDIR  '$$subdir; \
 			 $(MAKE) $(PRINT_DIR) -C $$subdir
-	QUIET_FLEX     = @echo '  FLEX     '$@;
-	QUIET_BISON    = @echo '  BISON    '$@;
-	QUIET_GENSKEL  = @echo '  GEN-SKEL '$@;
+	QUIET_FLEX     = @echo '  FLEX    '$@;
+	QUIET_BISON    = @echo '  BISON   '$@;
+	QUIET_GENSKEL  = @echo '  GENSKEL '$@;
 
 	descend = \
-		+@echo	       '  DESCEND  '$(1); \
+		+@echo	       '  DESCEND '$(1); \
 		mkdir -p $(OUTPUT)$(1) && \
 		$(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2)
 
-	QUIET_CLEAN    = @printf '  CLEAN    %s\n' $1;
-	QUIET_INSTALL  = @printf '  INSTALL  %s\n' $1;
-	QUIET_UNINST   = @printf '  UNINST   %s\n' $1;
+	QUIET_CLEAN    = @printf '  CLEAN   %s\n' $1;
+	QUIET_INSTALL  = @printf '  INSTALL %s\n' $1;
+	QUIET_UNINST   = @printf '  UNINST  %s\n' $1;
   endif
 endif
 
-- 
GitLab


From 98a499a11ecdd8cb91d03dd5c034aaf7422f2deb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Thu, 13 May 2021 18:24:02 +0200
Subject: [PATCH 0808/3804] scripts/jobserver-exec: Fix a typo ("envirnoment")
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/jobserver-exec | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec
index 48d141e3ec56f..8762887a970ce 100755
--- a/scripts/jobserver-exec
+++ b/scripts/jobserver-exec
@@ -10,7 +10,7 @@ from __future__ import print_function
 import os, sys, errno
 import subprocess
 
-# Extract and prepare jobserver file descriptors from envirnoment.
+# Extract and prepare jobserver file descriptors from environment.
 claim = 0
 jobs = b""
 try:
-- 
GitLab


From c93db682cfb213501881072a9200a48ce1dc3c3f Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Sat, 15 May 2021 12:11:13 +0200
Subject: [PATCH 0809/3804] kbuild: dummy-tools: adjust to stricter
 stackprotector check

Commit 3fb0fdb3bbe7 ("x86/stackprotector/32: Make the canary into a regular
percpu variable") modified the stackprotector check on 32-bit x86 to check
if gcc supports using %fs as canary. Adjust dummy-tools gcc script to pass
this new test by returning "%fs" rather than "%gs" if it detects
-mstack-protector-guard-reg=fs on command line.

Fixes: 3fb0fdb3bbe7 ("x86/stackprotector/32: Make the canary into a regular percpu variable")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/dummy-tools/gcc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc
index f6d543725f1ed..b2483149bbe55 100755
--- a/scripts/dummy-tools/gcc
+++ b/scripts/dummy-tools/gcc
@@ -76,7 +76,11 @@ fi
 if arg_contain -S "$@"; then
 	# For scripts/gcc-x86-*-has-stack-protector.sh
 	if arg_contain -fstack-protector "$@"; then
-		echo "%gs"
+		if arg_contain -mstack-protector-guard-reg=fs "$@"; then
+			echo "%fs"
+		else
+			echo "%gs"
+		fi
 		exit 0
 	fi
 
-- 
GitLab


From 4236a26a6b998c8c4fdc0117b8848a38789c48ae Mon Sep 17 00:00:00 2001
From: wenhuizhang <wenhui@gwmail.gwu.edu>
Date: Thu, 13 May 2021 12:55:16 -0400
Subject: [PATCH 0810/3804] cifs: remove deadstore in
 cifs_close_all_deferred_files()

Deadstore detected by Lukas Bulwahn's CodeChecker Tool (ELISA group).

line 741 struct cifsInodeInfo *cinode;
line 747 cinode = CIFS_I(d_inode(cfile->dentry));
could be deleted.

cinode on filesystem should not be deleted when files are closed,
they are representations of some data fields on a physical disk,
thus no further action is required.
The virtual inode on vfs will be handled by vfs automatically,
and the denotation is inode, which is different from the cinode.

Signed-off-by: wenhuizhang <wenhui@gwmail.gwu.edu>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/misc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 524dbdfb7184d..801a5300f765d 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -738,13 +738,11 @@ void
 cifs_close_all_deferred_files(struct cifs_tcon *tcon)
 {
 	struct cifsFileInfo *cfile;
-	struct cifsInodeInfo *cinode;
 	struct list_head *tmp;
 
 	spin_lock(&tcon->open_file_lock);
 	list_for_each(tmp, &tcon->openFileList) {
 		cfile = list_entry(tmp, struct cifsFileInfo, tlist);
-		cinode = CIFS_I(d_inode(cfile->dentry));
 		if (delayed_work_pending(&cfile->deferred))
 			mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
 	}
-- 
GitLab


From 055f23b74b20f2824ce33047b4cf2e2aa856bf3b Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@kernel.org>
Date: Wed, 12 May 2021 15:45:46 +0200
Subject: [PATCH 0811/3804] module: check for exit sections in
 layout_sections() instead of module_init_section()

Previously, when CONFIG_MODULE_UNLOAD=n, the module loader just does not
attempt to load exit sections since it never expects that any code in those
sections will ever execute. However, dynamic code patching (alternatives,
jump_label and static_call) can have sites in __exit code, even if __exit is
never executed. Therefore __exit must be present at runtime, at least for as
long as __init code is.

Commit 33121347fb1c ("module: treat exit sections the same as init
sections when !CONFIG_MODULE_UNLOAD") solves the requirements of
jump_labels and static_calls by putting the exit sections in the init
region of the module so that they are at least present at init, and
discarded afterwards. It does this by including a check for exit
sections in module_init_section(), so that it also returns true for exit
sections, and the module loader will automatically sort them in the init
region of the module.

However, the solution there was not completely arch-independent. ARM is
a special case where it supplies its own module_{init, exit}_section()
functions. Instead of pushing the exit section checks into
module_init_section(), just implement the exit section check in
layout_sections(), so that we don't have to touch arch-dependent code.

Fixes: 33121347fb1c ("module: treat exit sections the same as init sections when !CONFIG_MODULE_UNLOAD")
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 kernel/module.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index b5dd92e35b02a..7e78dfabca97f 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2401,6 +2401,15 @@ static long get_offset(struct module *mod, unsigned int *size,
 	return ret;
 }
 
+static bool module_init_layout_section(const char *sname)
+{
+#ifndef CONFIG_MODULE_UNLOAD
+	if (module_exit_section(sname))
+		return true;
+#endif
+	return module_init_section(sname);
+}
+
 /*
  * Lay out the SHF_ALLOC sections in a way not dissimilar to how ld
  * might -- code, read-only data, read-write data, small data.  Tally
@@ -2435,7 +2444,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
 			if ((s->sh_flags & masks[m][0]) != masks[m][0]
 			    || (s->sh_flags & masks[m][1])
 			    || s->sh_entsize != ~0UL
-			    || module_init_section(sname))
+			    || module_init_layout_section(sname))
 				continue;
 			s->sh_entsize = get_offset(mod, &mod->core_layout.size, s, i);
 			pr_debug("\t%s\n", sname);
@@ -2468,7 +2477,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
 			if ((s->sh_flags & masks[m][0]) != masks[m][0]
 			    || (s->sh_flags & masks[m][1])
 			    || s->sh_entsize != ~0UL
-			    || !module_init_section(sname))
+			    || !module_init_layout_section(sname))
 				continue;
 			s->sh_entsize = (get_offset(mod, &mod->init_layout.size, s, i)
 					 | INIT_OFFSET_MASK);
@@ -2807,11 +2816,7 @@ void * __weak module_alloc(unsigned long size)
 
 bool __weak module_init_section(const char *name)
 {
-#ifndef CONFIG_MODULE_UNLOAD
-	return strstarts(name, ".init") || module_exit_section(name);
-#else
 	return strstarts(name, ".init");
-#endif
 }
 
 bool __weak module_exit_section(const char *name)
-- 
GitLab


From 145e06b58f8625becc61792a0554726314297a85 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Thu, 13 May 2021 16:39:02 +0800
Subject: [PATCH 0812/3804] drm/i915/gvt: Move mdev attribute groups into kvmgt
 module

As kvmgt module contains all handling for VFIO/mdev, leaving mdev attribute
groups in gvt module caused dependency issue. Although it was there for possible
other hypervisor usage, that turns out never to be true. So this moves all mdev
handling into kvmgt module completely to resolve dependency issue.

With this fix, no config workaround is required. So revert previous workaround
commits: adaeb718d46f ("vfio/gvt: fix DRM_I915_GVT dependency on VFIO_MDEV")
and 07e543f4f9d1 ("vfio/gvt: Make DRM_I915_GVT depend on VFIO_MDEV").

Reviewed-by: Colin Xu <colin.xu@intel.com>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20210513083902.2822350-1-zhenyuw@linux.intel.com
---
 drivers/gpu/drm/i915/Kconfig         |   1 -
 drivers/gpu/drm/i915/gvt/gvt.c       | 124 +--------------------------
 drivers/gpu/drm/i915/gvt/gvt.h       |   3 -
 drivers/gpu/drm/i915/gvt/hypercall.h |   2 +-
 drivers/gpu/drm/i915/gvt/kvmgt.c     | 122 ++++++++++++++++++++++++--
 drivers/gpu/drm/i915/gvt/mpt.h       |   4 +-
 6 files changed, 118 insertions(+), 138 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 69f57ca9c68d7..93f4d059fc89f 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -102,7 +102,6 @@ config DRM_I915_GVT
 	bool "Enable Intel GVT-g graphics virtualization host support"
 	depends on DRM_I915
 	depends on 64BIT
-	depends on VFIO_MDEV=y || VFIO_MDEV=DRM_I915
 	default n
 	help
 	  Choose this option if you want to enable Intel GVT-g graphics
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index e7c2babcee8b7..cbac409f6c8a5 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -46,118 +46,6 @@ static const char * const supported_hypervisors[] = {
 	[INTEL_GVT_HYPERVISOR_KVM] = "KVM",
 };
 
-static struct intel_vgpu_type *
-intel_gvt_find_vgpu_type(struct intel_gvt *gvt, unsigned int type_group_id)
-{
-	if (WARN_ON(type_group_id >= gvt->num_types))
-		return NULL;
-	return &gvt->types[type_group_id];
-}
-
-static ssize_t available_instances_show(struct mdev_type *mtype,
-					struct mdev_type_attribute *attr,
-					char *buf)
-{
-	struct intel_vgpu_type *type;
-	unsigned int num = 0;
-	void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
-
-	type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype));
-	if (!type)
-		num = 0;
-	else
-		num = type->avail_instance;
-
-	return sprintf(buf, "%u\n", num);
-}
-
-static ssize_t device_api_show(struct mdev_type *mtype,
-			       struct mdev_type_attribute *attr, char *buf)
-{
-	return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
-}
-
-static ssize_t description_show(struct mdev_type *mtype,
-				struct mdev_type_attribute *attr, char *buf)
-{
-	struct intel_vgpu_type *type;
-	void *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
-
-	type = intel_gvt_find_vgpu_type(gvt, mtype_get_type_group_id(mtype));
-	if (!type)
-		return 0;
-
-	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
-		       "fence: %d\nresolution: %s\n"
-		       "weight: %d\n",
-		       BYTES_TO_MB(type->low_gm_size),
-		       BYTES_TO_MB(type->high_gm_size),
-		       type->fence, vgpu_edid_str(type->resolution),
-		       type->weight);
-}
-
-static MDEV_TYPE_ATTR_RO(available_instances);
-static MDEV_TYPE_ATTR_RO(device_api);
-static MDEV_TYPE_ATTR_RO(description);
-
-static struct attribute *gvt_type_attrs[] = {
-	&mdev_type_attr_available_instances.attr,
-	&mdev_type_attr_device_api.attr,
-	&mdev_type_attr_description.attr,
-	NULL,
-};
-
-static struct attribute_group *gvt_vgpu_type_groups[] = {
-	[0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
-};
-
-static bool intel_get_gvt_attrs(struct attribute_group ***intel_vgpu_type_groups)
-{
-	*intel_vgpu_type_groups = gvt_vgpu_type_groups;
-	return true;
-}
-
-static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
-{
-	int i, j;
-	struct intel_vgpu_type *type;
-	struct attribute_group *group;
-
-	for (i = 0; i < gvt->num_types; i++) {
-		type = &gvt->types[i];
-
-		group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
-		if (WARN_ON(!group))
-			goto unwind;
-
-		group->name = type->name;
-		group->attrs = gvt_type_attrs;
-		gvt_vgpu_type_groups[i] = group;
-	}
-
-	return 0;
-
-unwind:
-	for (j = 0; j < i; j++) {
-		group = gvt_vgpu_type_groups[j];
-		kfree(group);
-	}
-
-	return -ENOMEM;
-}
-
-static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
-{
-	int i;
-	struct attribute_group *group;
-
-	for (i = 0; i < gvt->num_types; i++) {
-		group = gvt_vgpu_type_groups[i];
-		gvt_vgpu_type_groups[i] = NULL;
-		kfree(group);
-	}
-}
-
 static const struct intel_gvt_ops intel_gvt_ops = {
 	.emulate_cfg_read = intel_vgpu_emulate_cfg_read,
 	.emulate_cfg_write = intel_vgpu_emulate_cfg_write,
@@ -169,8 +57,6 @@ static const struct intel_gvt_ops intel_gvt_ops = {
 	.vgpu_reset = intel_gvt_reset_vgpu,
 	.vgpu_activate = intel_gvt_activate_vgpu,
 	.vgpu_deactivate = intel_gvt_deactivate_vgpu,
-	.gvt_find_vgpu_type = intel_gvt_find_vgpu_type,
-	.get_gvt_attrs = intel_get_gvt_attrs,
 	.vgpu_query_plane = intel_vgpu_query_plane,
 	.vgpu_get_dmabuf = intel_vgpu_get_dmabuf,
 	.write_protect_handler = intel_vgpu_page_track_handler,
@@ -274,7 +160,6 @@ void intel_gvt_clean_device(struct drm_i915_private *i915)
 		return;
 
 	intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
-	intel_gvt_cleanup_vgpu_type_groups(gvt);
 	intel_gvt_clean_vgpu_types(gvt);
 
 	intel_gvt_debugfs_clean(gvt);
@@ -363,12 +248,6 @@ int intel_gvt_init_device(struct drm_i915_private *i915)
 	if (ret)
 		goto out_clean_thread;
 
-	ret = intel_gvt_init_vgpu_type_groups(gvt);
-	if (ret) {
-		gvt_err("failed to init vgpu type groups: %d\n", ret);
-		goto out_clean_types;
-	}
-
 	vgpu = intel_gvt_create_idle_vgpu(gvt);
 	if (IS_ERR(vgpu)) {
 		ret = PTR_ERR(vgpu);
@@ -454,7 +333,8 @@ EXPORT_SYMBOL_GPL(intel_gvt_register_hypervisor);
 void
 intel_gvt_unregister_hypervisor(void)
 {
-	intel_gvt_hypervisor_host_exit(intel_gvt_host.dev);
+	void *gvt = (void *)kdev_to_i915(intel_gvt_host.dev)->gvt;
+	intel_gvt_hypervisor_host_exit(intel_gvt_host.dev, gvt);
 	module_put(THIS_MODULE);
 }
 EXPORT_SYMBOL_GPL(intel_gvt_unregister_hypervisor);
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 88ab360fcb31a..0c0615602343a 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -574,9 +574,6 @@ struct intel_gvt_ops {
 	void (*vgpu_reset)(struct intel_vgpu *);
 	void (*vgpu_activate)(struct intel_vgpu *);
 	void (*vgpu_deactivate)(struct intel_vgpu *);
-	struct intel_vgpu_type *(*gvt_find_vgpu_type)(
-		struct intel_gvt *gvt, unsigned int type_group_id);
-	bool (*get_gvt_attrs)(struct attribute_group ***intel_vgpu_type_groups);
 	int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *);
 	int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int);
 	int (*write_protect_handler)(struct intel_vgpu *, u64, void *,
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index b79da5124f831..f33e3cbd0439d 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -49,7 +49,7 @@ enum hypervisor_type {
 struct intel_gvt_mpt {
 	enum hypervisor_type type;
 	int (*host_init)(struct device *dev, void *gvt, const void *ops);
-	void (*host_exit)(struct device *dev);
+	void (*host_exit)(struct device *dev, void *gvt);
 	int (*attach_vgpu)(void *vgpu, unsigned long *handle);
 	void (*detach_vgpu)(void *vgpu);
 	int (*inject_msi)(unsigned long handle, u32 addr, u16 data);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 65ff43cfc0f71..48b4d4cf805d1 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -144,6 +144,104 @@ static inline bool handle_valid(unsigned long handle)
 	return !!(handle & ~0xff);
 }
 
+static ssize_t available_instances_show(struct mdev_type *mtype,
+					struct mdev_type_attribute *attr,
+					char *buf)
+{
+	struct intel_vgpu_type *type;
+	unsigned int num = 0;
+	struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
+
+	type = &gvt->types[mtype_get_type_group_id(mtype)];
+	if (!type)
+		num = 0;
+	else
+		num = type->avail_instance;
+
+	return sprintf(buf, "%u\n", num);
+}
+
+static ssize_t device_api_show(struct mdev_type *mtype,
+			       struct mdev_type_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
+}
+
+static ssize_t description_show(struct mdev_type *mtype,
+				struct mdev_type_attribute *attr, char *buf)
+{
+	struct intel_vgpu_type *type;
+	struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
+
+	type = &gvt->types[mtype_get_type_group_id(mtype)];
+	if (!type)
+		return 0;
+
+	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
+		       "fence: %d\nresolution: %s\n"
+		       "weight: %d\n",
+		       BYTES_TO_MB(type->low_gm_size),
+		       BYTES_TO_MB(type->high_gm_size),
+		       type->fence, vgpu_edid_str(type->resolution),
+		       type->weight);
+}
+
+static MDEV_TYPE_ATTR_RO(available_instances);
+static MDEV_TYPE_ATTR_RO(device_api);
+static MDEV_TYPE_ATTR_RO(description);
+
+static struct attribute *gvt_type_attrs[] = {
+	&mdev_type_attr_available_instances.attr,
+	&mdev_type_attr_device_api.attr,
+	&mdev_type_attr_description.attr,
+	NULL,
+};
+
+static struct attribute_group *gvt_vgpu_type_groups[] = {
+	[0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
+};
+
+static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
+{
+	int i, j;
+	struct intel_vgpu_type *type;
+	struct attribute_group *group;
+
+	for (i = 0; i < gvt->num_types; i++) {
+		type = &gvt->types[i];
+
+		group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
+		if (!group)
+			goto unwind;
+
+		group->name = type->name;
+		group->attrs = gvt_type_attrs;
+		gvt_vgpu_type_groups[i] = group;
+	}
+
+	return 0;
+
+unwind:
+	for (j = 0; j < i; j++) {
+		group = gvt_vgpu_type_groups[j];
+		kfree(group);
+	}
+
+	return -ENOMEM;
+}
+
+static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
+{
+	int i;
+	struct attribute_group *group;
+
+	for (i = 0; i < gvt->num_types; i++) {
+		group = gvt_vgpu_type_groups[i];
+		gvt_vgpu_type_groups[i] = NULL;
+		kfree(group);
+	}
+}
+
 static int kvmgt_guest_init(struct mdev_device *mdev);
 static void intel_vgpu_release_work(struct work_struct *work);
 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
@@ -694,14 +792,13 @@ static int intel_vgpu_create(struct mdev_device *mdev)
 	struct intel_vgpu *vgpu = NULL;
 	struct intel_vgpu_type *type;
 	struct device *pdev;
-	void *gvt;
+	struct intel_gvt *gvt;
 	int ret;
 
 	pdev = mdev_parent_dev(mdev);
 	gvt = kdev_to_i915(pdev)->gvt;
 
-	type = intel_gvt_ops->gvt_find_vgpu_type(gvt,
-						 mdev_get_type_group_id(mdev));
+	type = &gvt->types[mdev_get_type_group_id(mdev)];
 	if (!type) {
 		ret = -EINVAL;
 		goto out;
@@ -1667,19 +1764,26 @@ static struct mdev_parent_ops intel_vgpu_ops = {
 
 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
 {
-	struct attribute_group **kvm_vgpu_type_groups;
+	int ret;
+
+	ret = intel_gvt_init_vgpu_type_groups((struct intel_gvt *)gvt);
+	if (ret)
+		return ret;
 
 	intel_gvt_ops = ops;
-	if (!intel_gvt_ops->get_gvt_attrs(&kvm_vgpu_type_groups))
-		return -EFAULT;
-	intel_vgpu_ops.supported_type_groups = kvm_vgpu_type_groups;
+	intel_vgpu_ops.supported_type_groups = gvt_vgpu_type_groups;
 
-	return mdev_register_device(dev, &intel_vgpu_ops);
+	ret = mdev_register_device(dev, &intel_vgpu_ops);
+	if (ret)
+		intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt);
+
+	return ret;
 }
 
-static void kvmgt_host_exit(struct device *dev)
+static void kvmgt_host_exit(struct device *dev, void *gvt)
 {
 	mdev_unregister_device(dev);
+	intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt);
 }
 
 static int kvmgt_page_track_add(unsigned long handle, u64 gfn)
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 550a456e936fc..e6c5a792a49a5 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -63,13 +63,13 @@ static inline int intel_gvt_hypervisor_host_init(struct device *dev,
 /**
  * intel_gvt_hypervisor_host_exit - exit GVT-g host side
  */
-static inline void intel_gvt_hypervisor_host_exit(struct device *dev)
+static inline void intel_gvt_hypervisor_host_exit(struct device *dev, void *gvt)
 {
 	/* optional to provide */
 	if (!intel_gvt_host.mpt->host_exit)
 		return;
 
-	intel_gvt_host.mpt->host_exit(dev);
+	intel_gvt_host.mpt->host_exit(dev, gvt);
 }
 
 /**
-- 
GitLab


From 20bc8c1e972f29afcac85e524e430c11a6df5f58 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 18:39:55 +0300
Subject: [PATCH 0813/3804] lib/vsprintf: Allow to override ISO 8601 date and
 time separator

ISO 8601 defines 'T' as a separator between date and time. Though,
some ABIs use time and date with ' ' (space) separator instead.

Add a flavour to the %pt specifier to override default separator.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210511153958.34527-1-andriy.shevchenko@linux.intel.com
---
 Documentation/core-api/printk-formats.rst |  7 ++++++-
 lib/test_printf.c                         |  5 +++++
 lib/vsprintf.c                            | 22 +++++++++++++++++-----
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 9be6de402cb9b..7b6bfd8a78d35 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -514,9 +514,10 @@ Time and date
 ::
 
 	%pt[RT]			YYYY-mm-ddTHH:MM:SS
+	%pt[RT]s		YYYY-mm-dd HH:MM:SS
 	%pt[RT]d		YYYY-mm-dd
 	%pt[RT]t		HH:MM:SS
-	%pt[RT][dt][r]
+	%pt[RT][dt][r][s]
 
 For printing date and time as represented by::
 
@@ -528,6 +529,10 @@ in human readable format.
 By default year will be incremented by 1900 and month by 1.
 Use %pt[RT]r (raw) to suppress this behaviour.
 
+The %pt[RT]s (space) will override ISO 8601 separator by using ' ' (space)
+instead of 'T' (Capital T) between date and time. It won't have any effect
+when date or time is omitted.
+
 Passed by reference.
 
 struct clk
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 27b964ec723da..69b04b531492d 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -528,6 +528,11 @@ time_and_date(void)
 	test("0119-00-04T15:32:23", "%ptTr", &t);
 	test("15:32:23|2019-01-04", "%ptTt|%ptTd", &t, &t);
 	test("15:32:23|0119-00-04", "%ptTtr|%ptTdr", &t, &t);
+
+	test("2019-01-04 15:32:23", "%ptTs", &t);
+	test("0119-00-04 15:32:23", "%ptTsr", &t);
+	test("15:32:23|2019-01-04", "%ptTts|%ptTds", &t, &t);
+	test("15:32:23|0119-00-04", "%ptTtrs|%ptTdrs", &t, &t);
 }
 
 static void __init
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6c56c62fd9a50..8d5142ae742e7 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1798,7 +1798,8 @@ char *rtc_str(char *buf, char *end, const struct rtc_time *tm,
 	      struct printf_spec spec, const char *fmt)
 {
 	bool have_t = true, have_d = true;
-	bool raw = false;
+	bool raw = false, iso8601_separator = true;
+	bool found = true;
 	int count = 2;
 
 	if (check_pointer(&buf, end, tm, spec))
@@ -1815,14 +1816,25 @@ char *rtc_str(char *buf, char *end, const struct rtc_time *tm,
 		break;
 	}
 
-	raw = fmt[count] == 'r';
+	do {
+		switch (fmt[count++]) {
+		case 'r':
+			raw = true;
+			break;
+		case 's':
+			iso8601_separator = false;
+			break;
+		default:
+			found = false;
+			break;
+		}
+	} while (found);
 
 	if (have_d)
 		buf = date_str(buf, end, tm, raw);
 	if (have_d && have_t) {
-		/* Respect ISO 8601 */
 		if (buf < end)
-			*buf = 'T';
+			*buf = iso8601_separator ? 'T' : ' ';
 		buf++;
 	}
 	if (have_t)
@@ -2261,7 +2273,7 @@ early_param("no_hash_pointers", no_hash_pointers_enable);
  * - 'd[234]' For a dentry name (optionally 2-4 last components)
  * - 'D[234]' Same as 'd' but for a struct file
  * - 'g' For block_device name (gendisk + partition number)
- * - 't[RT][dt][r]' For time and date as represented by:
+ * - 't[RT][dt][r][s]' For time and date as represented by:
  *      R    struct rtc_time
  *      T    time64_t
  * - 'C' For a clock, it prints the name (Common Clock Framework) or address
-- 
GitLab


From 126ac4d67d97fdeef52b6249702266eb94a05d9e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 18:39:56 +0300
Subject: [PATCH 0814/3804] kdb: Switch to use %ptTs

Use %ptTs instead of open-coded variant to print contents
of time64_t type in human readable form.

Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: kgdb-bugreport@lists.sourceforge.net
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210511153958.34527-2-andriy.shevchenko@linux.intel.com
---
 kernel/debug/kdb/kdb_main.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 1baa96a2ecb8f..622410c45da16 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2488,7 +2488,6 @@ static void kdb_sysinfo(struct sysinfo *val)
 static int kdb_summary(int argc, const char **argv)
 {
 	time64_t now;
-	struct tm tm;
 	struct sysinfo val;
 
 	if (argc)
@@ -2502,13 +2501,7 @@ static int kdb_summary(int argc, const char **argv)
 	kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
 
 	now = __ktime_get_real_seconds();
-	time64_to_tm(now, 0, &tm);
-	kdb_printf("date       %04ld-%02d-%02d %02d:%02d:%02d "
-		   "tz_minuteswest %d\n",
-		1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
-		tm.tm_hour, tm.tm_min, tm.tm_sec,
-		sys_tz.tz_minuteswest);
-
+	kdb_printf("date       %ptTs tz_minuteswest %d\n", &now, sys_tz.tz_minuteswest);
 	kdb_sysinfo(&val);
 	kdb_printf("uptime     ");
 	if (val.uptime > (24*60*60)) {
-- 
GitLab


From 776797f1bd1caef34c4ca6dd362fa6376b880e10 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 18:39:57 +0300
Subject: [PATCH 0815/3804] nilfs2: Switch to use %ptTs

Use %ptTs instead of open coded variant to print contents
of time64_t type in human readable form.

Use sysfs_emit() at the same time in the changed functions.

Cc: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: linux-nilfs@vger.kernel.org
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210511153958.34527-3-andriy.shevchenko@linux.intel.com
---
 fs/nilfs2/sysfs.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 303d71430bdd1..4e10423f04481 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -19,19 +19,6 @@
 /* /sys/fs/<nilfs>/ */
 static struct kset *nilfs_kset;
 
-#define NILFS_SHOW_TIME(time_t_val, buf) ({ \
-		struct tm res; \
-		int count = 0; \
-		time64_to_tm(time_t_val, 0, &res); \
-		res.tm_year += 1900; \
-		res.tm_mon += 1; \
-		count = scnprintf(buf, PAGE_SIZE, \
-				    "%ld-%.2d-%.2d %.2d:%.2d:%.2d\n", \
-				    res.tm_year, res.tm_mon, res.tm_mday, \
-				    res.tm_hour, res.tm_min, res.tm_sec);\
-		count; \
-})
-
 #define NILFS_DEV_INT_GROUP_OPS(name, parent_name) \
 static ssize_t nilfs_##name##_attr_show(struct kobject *kobj, \
 					struct attribute *attr, char *buf) \
@@ -576,7 +563,7 @@ nilfs_segctor_last_seg_write_time_show(struct nilfs_segctor_attr *attr,
 	ctime = nilfs->ns_ctime;
 	up_read(&nilfs->ns_segctor_sem);
 
-	return NILFS_SHOW_TIME(ctime, buf);
+	return sysfs_emit(buf, "%ptTs\n", &ctime);
 }
 
 static ssize_t
@@ -604,7 +591,7 @@ nilfs_segctor_last_nongc_write_time_show(struct nilfs_segctor_attr *attr,
 	nongc_ctime = nilfs->ns_nongc_ctime;
 	up_read(&nilfs->ns_segctor_sem);
 
-	return NILFS_SHOW_TIME(nongc_ctime, buf);
+	return sysfs_emit(buf, "%ptTs\n", &nongc_ctime);
 }
 
 static ssize_t
@@ -724,7 +711,7 @@ nilfs_superblock_sb_write_time_show(struct nilfs_superblock_attr *attr,
 	sbwtime = nilfs->ns_sbwtime;
 	up_read(&nilfs->ns_sem);
 
-	return NILFS_SHOW_TIME(sbwtime, buf);
+	return sysfs_emit(buf, "%ptTs\n", &sbwtime);
 }
 
 static ssize_t
-- 
GitLab


From 2f9e0f8c7e173e312e1d98b50fd8dc890245831a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 11 May 2021 18:39:58 +0300
Subject: [PATCH 0816/3804] usb: host: xhci-tegra: Switch to use %ptTs

Use %ptTs instead of open coded variant to print contents
of time64_t type in human readable form.

Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Jonathan Hunter <jonathanh@nvidia.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210511153958.34527-4-andriy.shevchenko@linux.intel.com
---
 drivers/usb/host/xhci-tegra.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 50bb91b6a4b8d..c7387677a26ac 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -917,7 +917,6 @@ static int tegra_xusb_load_firmware(struct tegra_xusb *tegra)
 	struct xhci_op_regs __iomem *op;
 	unsigned long timeout;
 	time64_t timestamp;
-	struct tm time;
 	u64 address;
 	u32 value;
 	int err;
@@ -1014,11 +1013,8 @@ static int tegra_xusb_load_firmware(struct tegra_xusb *tegra)
 	}
 
 	timestamp = le32_to_cpu(header->fwimg_created_time);
-	time64_to_tm(timestamp, 0, &time);
 
-	dev_info(dev, "Firmware timestamp: %ld-%02d-%02d %02d:%02d:%02d UTC\n",
-		 time.tm_year + 1900, time.tm_mon + 1, time.tm_mday,
-		 time.tm_hour, time.tm_min, time.tm_sec);
+	dev_info(dev, "Firmware timestamp: %ptTs UTC\n", &timestamp);
 
 	return 0;
 }
-- 
GitLab


From 73a395c46704304b96bc5e2ee19be31124025c0c Mon Sep 17 00:00:00 2001
From: "Pavel Machek (CIP)" <pavel@denx.de>
Date: Tue, 6 Apr 2021 13:54:14 +0200
Subject: [PATCH 0817/3804] drm/tegra: sor: Do not leak runtime PM reference

It's theoretically possible for the runtime PM reference to leak if the
code fails anywhere between the pm_runtime_resume_and_get() and
pm_runtime_put() calls, so make sure to release the runtime PM reference
in that case.

Practically this will never happen because none of the functions will
fail on Tegra, but it's better for the code to be pedantic in case these
assumptions will ever become wrong.

Signed-off-by: Pavel Machek (CIP) <pavel@denx.de>
[treding@nvidia.com: add commit message]
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 7b88261f57bb6..67a80dae1c00c 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3125,21 +3125,21 @@ static int tegra_sor_init(struct host1x_client *client)
 		if (err < 0) {
 			dev_err(sor->dev, "failed to acquire SOR reset: %d\n",
 				err);
-			return err;
+			goto rpm_put;
 		}
 
 		err = reset_control_assert(sor->rst);
 		if (err < 0) {
 			dev_err(sor->dev, "failed to assert SOR reset: %d\n",
 				err);
-			return err;
+			goto rpm_put;
 		}
 	}
 
 	err = clk_prepare_enable(sor->clk);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to enable clock: %d\n", err);
-		return err;
+		goto rpm_put;
 	}
 
 	usleep_range(1000, 3000);
@@ -3150,7 +3150,7 @@ static int tegra_sor_init(struct host1x_client *client)
 			dev_err(sor->dev, "failed to deassert SOR reset: %d\n",
 				err);
 			clk_disable_unprepare(sor->clk);
-			return err;
+			goto rpm_put;
 		}
 
 		reset_control_release(sor->rst);
@@ -3171,6 +3171,12 @@ static int tegra_sor_init(struct host1x_client *client)
 	}
 
 	return 0;
+
+rpm_put:
+	if (sor->rst)
+		pm_runtime_put(sor->dev);
+
+	return err;
 }
 
 static int tegra_sor_exit(struct host1x_client *client)
-- 
GitLab


From 0cfe5a6e758fb20be8ad3e8f10cb087cc8033eeb Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 1 Apr 2021 17:41:04 +0200
Subject: [PATCH 0818/3804] gpu: host1x: Split up client initalization and
 registration

In some cases we may need to initialize the host1x client first before
registering it. This commit adds a new helper that will do nothing but
the initialization of the data structure.

At the same time, the initialization is removed from the registration
function. Note, however, that for simplicity we explicitly initialize
the client when the host1x_client_register() function is called, as
opposed to the low-level __host1x_client_register() function. This
allows existing callers to remain unchanged.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/bus.c | 30 ++++++++++++++++++++++++------
 include/linux/host1x.h   | 30 ++++++++++++++++++++++++------
 2 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 46f69c532b6b7..218e3718fd68c 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -735,6 +735,29 @@ void host1x_driver_unregister(struct host1x_driver *driver)
 }
 EXPORT_SYMBOL(host1x_driver_unregister);
 
+/**
+ * __host1x_client_init() - initialize a host1x client
+ * @client: host1x client
+ * @key: lock class key for the client-specific mutex
+ */
+void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key)
+{
+	INIT_LIST_HEAD(&client->list);
+	__mutex_init(&client->lock, "host1x client lock", key);
+	client->usecount = 0;
+}
+EXPORT_SYMBOL(__host1x_client_init);
+
+/**
+ * host1x_client_exit() - uninitialize a host1x client
+ * @client: host1x client
+ */
+void host1x_client_exit(struct host1x_client *client)
+{
+	mutex_destroy(&client->lock);
+}
+EXPORT_SYMBOL(host1x_client_exit);
+
 /**
  * __host1x_client_register() - register a host1x client
  * @client: host1x client
@@ -747,16 +770,11 @@ EXPORT_SYMBOL(host1x_driver_unregister);
  * device and call host1x_device_init(), which will in turn call each client's
  * &host1x_client_ops.init implementation.
  */
-int __host1x_client_register(struct host1x_client *client,
-			     struct lock_class_key *key)
+int __host1x_client_register(struct host1x_client *client)
 {
 	struct host1x *host1x;
 	int err;
 
-	INIT_LIST_HEAD(&client->list);
-	__mutex_init(&client->lock, "host1x client lock", key);
-	client->usecount = 0;
-
 	mutex_lock(&devices_lock);
 
 	list_for_each_entry(host1x, &devices, list) {
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 232e1bd507a7e..9b0487c885719 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -332,12 +332,30 @@ static inline struct host1x_device *to_host1x_device(struct device *dev)
 int host1x_device_init(struct host1x_device *device);
 int host1x_device_exit(struct host1x_device *device);
 
-int __host1x_client_register(struct host1x_client *client,
-			     struct lock_class_key *key);
-#define host1x_client_register(class) \
-	({ \
-		static struct lock_class_key __key; \
-		__host1x_client_register(class, &__key); \
+void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key);
+void host1x_client_exit(struct host1x_client *client);
+
+#define host1x_client_init(client)			\
+	({						\
+		static struct lock_class_key __key;	\
+		__host1x_client_init(client, &__key);	\
+	})
+
+int __host1x_client_register(struct host1x_client *client);
+
+/*
+ * Note that this wrapper calls __host1x_client_init() for compatibility
+ * with existing callers. Callers that want to separately initialize and
+ * register a host1x client must first initialize using either of the
+ * __host1x_client_init() or host1x_client_init() functions and then use
+ * the low-level __host1x_client_register() function to avoid the client
+ * getting reinitialized.
+ */
+#define host1x_client_register(client)			\
+	({						\
+		static struct lock_class_key __key;	\
+		__host1x_client_init(client, &__key);	\
+		__host1x_client_register(client);	\
 	})
 
 int host1x_client_unregister(struct host1x_client *client);
-- 
GitLab


From 5dea42759bcef74b0802ea64b904409bc37f9045 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 1 Apr 2021 17:41:05 +0200
Subject: [PATCH 0819/3804] drm/tegra: sor: Fully initialize SOR before
 registration

Before registering the SOR host1x client, make sure that it is fully
initialized. This avoids a potential race condition between the SOR's
probe and the host1x device initialization in cases where the SOR is
the final sub-device to register to a host1x instance.

Reported-by: Jonathan Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 67a80dae1c00c..32c83f2e386ca 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3922,17 +3922,10 @@ static int tegra_sor_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, sor);
 	pm_runtime_enable(&pdev->dev);
 
-	INIT_LIST_HEAD(&sor->client.list);
+	host1x_client_init(&sor->client);
 	sor->client.ops = &sor_client_ops;
 	sor->client.dev = &pdev->dev;
 
-	err = host1x_client_register(&sor->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
-			err);
-		goto rpm_disable;
-	}
-
 	/*
 	 * On Tegra210 and earlier, provide our own implementation for the
 	 * pad output clock.
@@ -3944,13 +3937,13 @@ static int tegra_sor_probe(struct platform_device *pdev)
 				      sor->index);
 		if (!name) {
 			err = -ENOMEM;
-			goto unregister;
+			goto uninit;
 		}
 
 		err = host1x_client_resume(&sor->client);
 		if (err < 0) {
 			dev_err(sor->dev, "failed to resume: %d\n", err);
-			goto unregister;
+			goto uninit;
 		}
 
 		sor->clk_pad = tegra_clk_sor_pad_register(sor, name);
@@ -3961,14 +3954,20 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		err = PTR_ERR(sor->clk_pad);
 		dev_err(sor->dev, "failed to register SOR pad clock: %d\n",
 			err);
-		goto unregister;
+		goto uninit;
+	}
+
+	err = __host1x_client_register(&sor->client);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
+			err);
+		goto uninit;
 	}
 
 	return 0;
 
-unregister:
-	host1x_client_unregister(&sor->client);
-rpm_disable:
+uninit:
+	host1x_client_exit(&sor->client);
 	pm_runtime_disable(&pdev->dev);
 remove:
 	tegra_output_remove(&sor->output);
-- 
GitLab


From dc9a91d279b721aef7c4f1a2e2e33631d388446f Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 15 Apr 2021 08:29:14 -0700
Subject: [PATCH 0820/3804] drm/tegra: Fix shift overflow in
 tegra_shared_plane_atomic_update

Clang warns:

drivers/gpu/drm/tegra/hub.c:513:11: warning: shift count >= width of
type [-Wshift-count-overflow]
                base |= BIT(39);
                        ^~~~~~~

BIT is unsigned long, which is 32-bit on ARCH=arm, hence the overflow
warning. Switch to BIT_ULL, which is 64-bit and will not overflow.

Fixes: 7b6f846785f4 ("drm/tegra: Support sector layout on Tegra194")
Link: https://github.com/ClangBuiltLinux/linux/issues/1351
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/hub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 79bff8b48271a..bfae8a02f55b8 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -510,7 +510,7 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 	 * dGPU sector layout.
 	 */
 	if (tegra_plane_state->tiling.sector_layout == TEGRA_BO_SECTOR_LAYOUT_GPU)
-		base |= BIT(39);
+		base |= BIT_ULL(39);
 #endif
 
 	tegra_plane_writel(p, tegra_plane_state->format, DC_WIN_COLOR_DEPTH);
-- 
GitLab


From b80bfc59c60d8a006fdd7a33352732911ee51397 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 5 Apr 2021 20:15:24 +0200
Subject: [PATCH 0821/3804] drm/exynos: correct exynos_drm_fimd kerneldoc

Correct the kerneldoc of fimd_shadow_protect_win() to fix W=1 warnings:

  drivers/gpu/drm/exynos/exynos_drm_fimd.c:734: warning:
    expecting prototype for shadow_protect_win(). Prototype was for fimd_shadow_protect_win() instead

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fimd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 49a2e0c539187..ae576122873e0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -723,7 +723,7 @@ static void fimd_win_set_colkey(struct fimd_context *ctx, unsigned int win)
 }
 
 /**
- * shadow_protect_win() - disable updating values from shadow registers at vsync
+ * fimd_shadow_protect_win() - disable updating values from shadow registers at vsync
  *
  * @ctx: local driver data
  * @win: window to protect registers for
-- 
GitLab


From 04562956fd41fb22645e47a00cd5cbd601ce4bdd Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 17:40:04 +0800
Subject: [PATCH 0822/3804] drm/exynos: Remove redundant error printing in
 exynos_dsi_probe()

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 44e402b7cdfb6..2d2fe5ab26e70 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1786,10 +1786,8 @@ static int exynos_dsi_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dsi->reg_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(dsi->reg_base)) {
-		dev_err(dev, "failed to remap io region\n");
+	if (IS_ERR(dsi->reg_base))
 		return PTR_ERR(dsi->reg_base);
-	}
 
 	dsi->phy = devm_phy_get(dev, "dsim");
 	if (IS_ERR(dsi->phy)) {
-- 
GitLab


From a470c5665b3b918c31bcc912234862803b10ba00 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 19:27:33 +0800
Subject: [PATCH 0823/3804] drm/exynos/decon5433: Remove redundant error
 printing in exynos5433_decon_probe()

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index b9a4b7670a899..197b97341cad2 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -815,10 +815,8 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ctx->addr = devm_ioremap_resource(dev, res);
-	if (IS_ERR(ctx->addr)) {
-		dev_err(dev, "ioremap failed\n");
+	if (IS_ERR(ctx->addr))
 		return PTR_ERR(ctx->addr);
-	}
 
 	ret = decon_conf_irq(ctx, "vsync", decon_irq_handler, 0);
 	if (ret < 0)
-- 
GitLab


From 5b9fedb31e476693c90d8ee040e7d4c51b3e7cc4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 17 May 2021 14:39:56 +0200
Subject: [PATCH 0824/3804] quota: Disable quotactl_path syscall

In commit fa8b90070a80 ("quota: wire up quotactl_path") we have wired up
new quotactl_path syscall. However some people in LWN discussion have
objected that the path based syscall is missing dirfd and flags argument
which is mostly standard for contemporary path based syscalls. Indeed
they have a point and after a discussion with Christian Brauner and
Sascha Hauer I've decided to disable the syscall for now and update its
API. Since there is no userspace currently using that syscall and it
hasn't been released in any major release, we should be fine.

CC: Christian Brauner <christian.brauner@ubuntu.com>
CC: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/lkml/20210512153621.n5u43jsytbik4yze@wittgenstein
Signed-off-by: Jan Kara <jack@suse.cz>
---
 arch/alpha/kernel/syscalls/syscall.tbl      | 2 +-
 arch/arm/tools/syscall.tbl                  | 2 +-
 arch/arm64/include/asm/unistd32.h           | 3 +--
 arch/ia64/kernel/syscalls/syscall.tbl       | 2 +-
 arch/m68k/kernel/syscalls/syscall.tbl       | 2 +-
 arch/microblaze/kernel/syscalls/syscall.tbl | 2 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 2 +-
 arch/mips/kernel/syscalls/syscall_n64.tbl   | 2 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 2 +-
 arch/parisc/kernel/syscalls/syscall.tbl     | 2 +-
 arch/powerpc/kernel/syscalls/syscall.tbl    | 2 +-
 arch/s390/kernel/syscalls/syscall.tbl       | 2 +-
 arch/sh/kernel/syscalls/syscall.tbl         | 2 +-
 arch/sparc/kernel/syscalls/syscall.tbl      | 2 +-
 arch/x86/entry/syscalls/syscall_32.tbl      | 2 +-
 arch/x86/entry/syscalls/syscall_64.tbl      | 2 +-
 arch/xtensa/kernel/syscalls/syscall.tbl     | 2 +-
 17 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 5622578742fdd..3000a2e8ee217 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -482,7 +482,7 @@
 550	common	process_madvise			sys_process_madvise
 551	common	epoll_pwait2			sys_epoll_pwait2
 552	common	mount_setattr			sys_mount_setattr
-553	common	quotactl_path			sys_quotactl_path
+# 553 reserved for quotactl_path
 554	common	landlock_create_ruleset		sys_landlock_create_ruleset
 555	common	landlock_add_rule		sys_landlock_add_rule
 556	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index c7679d7db98b0..28e03b5fec004 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -456,7 +456,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 7859749d6628a..5dab69d2c22bf 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -893,8 +893,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
 #define __NR_landlock_add_rule 445
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 1ee8e736a48e3..bb11fe4c875af 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -363,7 +363,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 0dd019dc21362..79c2d24c89dda 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -442,7 +442,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 2ac716984ca28..b11395a20c203 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -448,7 +448,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 5e00966572510..9220909526f9b 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -381,7 +381,7 @@
 440	n32	process_madvise			sys_process_madvise
 441	n32	epoll_pwait2			compat_sys_epoll_pwait2
 442	n32	mount_setattr			sys_mount_setattr
-443	n32	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	n32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n32	landlock_add_rule		sys_landlock_add_rule
 446	n32	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index 9974f5f8e49bc..9cd1c34f31b50 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -357,7 +357,7 @@
 440	n64	process_madvise			sys_process_madvise
 441	n64	epoll_pwait2			sys_epoll_pwait2
 442	n64	mount_setattr			sys_mount_setattr
-443	n64	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n64	landlock_add_rule		sys_landlock_add_rule
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 39d6e71e57b60..d560c467a8c69 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -430,7 +430,7 @@
 440	o32	process_madvise			sys_process_madvise
 441	o32	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	o32	mount_setattr			sys_mount_setattr
-443	o32	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	o32	landlock_create_ruleset		sys_landlock_create_ruleset
 445	o32	landlock_add_rule		sys_landlock_add_rule
 446	o32	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 5ac80b83d745e..aabc37f8cae3a 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -440,7 +440,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 2e68fbb57cc66..8f052ff4058ce 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 7e4a2aba366df..0690263df1dd0 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
 440  common	process_madvise		sys_process_madvise		sys_process_madvise
 441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
-443  common	quotactl_path		sys_quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
 445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index f47a0dc554455..0b91499ebdcfc 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index b9e1c0e735b72..e34cc30ef22ce 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -488,7 +488,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 28a1423ce32ee..4bbc267fb36bb 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -447,7 +447,7 @@
 440	i386	process_madvise		sys_process_madvise
 441	i386	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	i386	mount_setattr		sys_mount_setattr
-443	i386	quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444	i386	landlock_create_ruleset	sys_landlock_create_ruleset
 445	i386	landlock_add_rule	sys_landlock_add_rule
 446	i386	landlock_restrict_self	sys_landlock_restrict_self
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index ecd551b08d052..ce18119ea0d0f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,7 +364,7 @@
 440	common	process_madvise		sys_process_madvise
 441	common	epoll_pwait2		sys_epoll_pwait2
 442	common	mount_setattr		sys_mount_setattr
-443	common	quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset	sys_landlock_create_ruleset
 445	common	landlock_add_rule	sys_landlock_add_rule
 446	common	landlock_restrict_self	sys_landlock_restrict_self
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 9d76d433d3d67..fd2f30227d961 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -413,7 +413,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
-- 
GitLab


From 7a274727702cc07d27cdebd36d1d5132abeea12f Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 17 May 2021 12:43:34 +0100
Subject: [PATCH 0825/3804] io_uring: don't modify req->poll for rw

__io_queue_proc() is used by both poll and apoll, so we should not
access req->poll directly but selecting right struct io_poll_iocb
depending on use case.

Reported-and-tested-by: syzbot+a84b8783366ecb1c65d0@syzkaller.appspotmail.com
Fixes: ea6a693d862d ("io_uring: disable multishot poll for double poll add cases")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/4a6a1de31142d8e0250fe2dfd4c8923d82a5bbfc.1621251795.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e481ac8a757ad..89ec10471b30b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5019,10 +5019,10 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 		 * Can't handle multishot for double wait for now, turn it
 		 * into one-shot mode.
 		 */
-		if (!(req->poll.events & EPOLLONESHOT))
-			req->poll.events |= EPOLLONESHOT;
+		if (!(poll_one->events & EPOLLONESHOT))
+			poll_one->events |= EPOLLONESHOT;
 		/* double add on the same waitqueue head, ignore */
-		if (poll->head == head)
+		if (poll_one->head == head)
 			return;
 		poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
 		if (!poll) {
-- 
GitLab


From 6416954ca75baed71640bf3828625bf165fb9b5e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 14 May 2021 10:03:40 +0100
Subject: [PATCH 0826/3804] btrfs: release path before starting transaction
 when cloning inline extent

When cloning an inline extent there are a few cases, such as when we have
an implicit hole at file offset 0, where we start a transaction while
holding a read lock on a leaf. Starting the transaction results in a call
to sb_start_intwrite(), which results in doing a read lock on a percpu
semaphore. Lockdep doesn't like this and complains about it:

  [46.580704] ======================================================
  [46.580752] WARNING: possible circular locking dependency detected
  [46.580799] 5.13.0-rc1 #28 Not tainted
  [46.580832] ------------------------------------------------------
  [46.580877] cloner/3835 is trying to acquire lock:
  [46.580918] c00000001301d638 (sb_internal#2){.+.+}-{0:0}, at: clone_copy_inline_extent+0xe4/0x5a0
  [46.581167]
  [46.581167] but task is already holding lock:
  [46.581217] c000000007fa2550 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x70/0x1d0
  [46.581293]
  [46.581293] which lock already depends on the new lock.
  [46.581293]
  [46.581351]
  [46.581351] the existing dependency chain (in reverse order) is:
  [46.581410]
  [46.581410] -> #1 (btrfs-tree-00){++++}-{3:3}:
  [46.581464]        down_read_nested+0x68/0x200
  [46.581536]        __btrfs_tree_read_lock+0x70/0x1d0
  [46.581577]        btrfs_read_lock_root_node+0x88/0x200
  [46.581623]        btrfs_search_slot+0x298/0xb70
  [46.581665]        btrfs_set_inode_index+0xfc/0x260
  [46.581708]        btrfs_new_inode+0x26c/0x950
  [46.581749]        btrfs_create+0xf4/0x2b0
  [46.581782]        lookup_open.isra.57+0x55c/0x6a0
  [46.581855]        path_openat+0x418/0xd20
  [46.581888]        do_filp_open+0x9c/0x130
  [46.581920]        do_sys_openat2+0x2ec/0x430
  [46.581961]        do_sys_open+0x90/0xc0
  [46.581993]        system_call_exception+0x3d4/0x410
  [46.582037]        system_call_common+0xec/0x278
  [46.582078]
  [46.582078] -> #0 (sb_internal#2){.+.+}-{0:0}:
  [46.582135]        __lock_acquire+0x1e90/0x2c50
  [46.582176]        lock_acquire+0x2b4/0x5b0
  [46.582263]        start_transaction+0x3cc/0x950
  [46.582308]        clone_copy_inline_extent+0xe4/0x5a0
  [46.582353]        btrfs_clone+0x5fc/0x880
  [46.582388]        btrfs_clone_files+0xd8/0x1c0
  [46.582434]        btrfs_remap_file_range+0x3d8/0x590
  [46.582481]        do_clone_file_range+0x10c/0x270
  [46.582558]        vfs_clone_file_range+0x1b0/0x310
  [46.582605]        ioctl_file_clone+0x90/0x130
  [46.582651]        do_vfs_ioctl+0x874/0x1ac0
  [46.582697]        sys_ioctl+0x6c/0x120
  [46.582733]        system_call_exception+0x3d4/0x410
  [46.582777]        system_call_common+0xec/0x278
  [46.582822]
  [46.582822] other info that might help us debug this:
  [46.582822]
  [46.582888]  Possible unsafe locking scenario:
  [46.582888]
  [46.582942]        CPU0                    CPU1
  [46.582984]        ----                    ----
  [46.583028]   lock(btrfs-tree-00);
  [46.583062]                                lock(sb_internal#2);
  [46.583119]                                lock(btrfs-tree-00);
  [46.583174]   lock(sb_internal#2);
  [46.583212]
  [46.583212]  *** DEADLOCK ***
  [46.583212]
  [46.583266] 6 locks held by cloner/3835:
  [46.583299]  #0: c00000001301d448 (sb_writers#12){.+.+}-{0:0}, at: ioctl_file_clone+0x90/0x130
  [46.583382]  #1: c00000000f6d3768 (&sb->s_type->i_mutex_key#15){+.+.}-{3:3}, at: lock_two_nondirectories+0x58/0xc0
  [46.583477]  #2: c00000000f6d72a8 (&sb->s_type->i_mutex_key#15/4){+.+.}-{3:3}, at: lock_two_nondirectories+0x9c/0xc0
  [46.583574]  #3: c00000000f6d7138 (&ei->i_mmap_lock){+.+.}-{3:3}, at: btrfs_remap_file_range+0xd0/0x590
  [46.583657]  #4: c00000000f6d35f8 (&ei->i_mmap_lock/1){+.+.}-{3:3}, at: btrfs_remap_file_range+0xe0/0x590
  [46.583743]  #5: c000000007fa2550 (btrfs-tree-00){++++}-{3:3}, at: __btrfs_tree_read_lock+0x70/0x1d0
  [46.583828]
  [46.583828] stack backtrace:
  [46.583872] CPU: 1 PID: 3835 Comm: cloner Not tainted 5.13.0-rc1 #28
  [46.583931] Call Trace:
  [46.583955] [c0000000167c7200] [c000000000c1ee78] dump_stack+0xec/0x144 (unreliable)
  [46.584052] [c0000000167c7240] [c000000000274058] print_circular_bug.isra.32+0x3a8/0x400
  [46.584123] [c0000000167c72e0] [c0000000002741f4] check_noncircular+0x144/0x190
  [46.584191] [c0000000167c73b0] [c000000000278fc0] __lock_acquire+0x1e90/0x2c50
  [46.584259] [c0000000167c74f0] [c00000000027aa94] lock_acquire+0x2b4/0x5b0
  [46.584317] [c0000000167c75e0] [c000000000a0d6cc] start_transaction+0x3cc/0x950
  [46.584388] [c0000000167c7690] [c000000000af47a4] clone_copy_inline_extent+0xe4/0x5a0
  [46.584457] [c0000000167c77c0] [c000000000af525c] btrfs_clone+0x5fc/0x880
  [46.584514] [c0000000167c7990] [c000000000af5698] btrfs_clone_files+0xd8/0x1c0
  [46.584583] [c0000000167c7a00] [c000000000af5b58] btrfs_remap_file_range+0x3d8/0x590
  [46.584652] [c0000000167c7ae0] [c0000000005d81dc] do_clone_file_range+0x10c/0x270
  [46.584722] [c0000000167c7b40] [c0000000005d84f0] vfs_clone_file_range+0x1b0/0x310
  [46.584793] [c0000000167c7bb0] [c00000000058bf80] ioctl_file_clone+0x90/0x130
  [46.584861] [c0000000167c7c10] [c00000000058c894] do_vfs_ioctl+0x874/0x1ac0
  [46.584922] [c0000000167c7d10] [c00000000058db4c] sys_ioctl+0x6c/0x120
  [46.584978] [c0000000167c7d60] [c0000000000364a4] system_call_exception+0x3d4/0x410
  [46.585046] [c0000000167c7e10] [c00000000000d45c] system_call_common+0xec/0x278
  [46.585114] --- interrupt: c00 at 0x7ffff7e22990
  [46.585160] NIP:  00007ffff7e22990 LR: 00000001000010ec CTR: 0000000000000000
  [46.585224] REGS: c0000000167c7e80 TRAP: 0c00   Not tainted  (5.13.0-rc1)
  [46.585280] MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28000244  XER: 00000000
  [46.585374] IRQMASK: 0
  [46.585374] GPR00: 0000000000000036 00007fffffffdec0 00007ffff7f17100 0000000000000004
  [46.585374] GPR04: 000000008020940d 00007fffffffdf40 0000000000000000 0000000000000000
  [46.585374] GPR08: 0000000000000004 0000000000000000 0000000000000000 0000000000000000
  [46.585374] GPR12: 0000000000000000 00007ffff7ffa940 0000000000000000 0000000000000000
  [46.585374] GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
  [46.585374] GPR20: 0000000000000000 000000009123683e 00007fffffffdf40 0000000000000000
  [46.585374] GPR24: 0000000000000000 0000000000000000 0000000000000000 0000000000000004
  [46.585374] GPR28: 0000000100030260 0000000100030280 0000000000000003 000000000000005f
  [46.585919] NIP [00007ffff7e22990] 0x7ffff7e22990
  [46.585964] LR [00000001000010ec] 0x1000010ec
  [46.586010] --- interrupt: c00

This should be a false positive, as both locks are acquired in read mode.
Nevertheless, we don't need to hold a leaf locked when we start the
transaction, so just release the leaf (path) before starting it.

Reported-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/linux-btrfs/20210513214404.xks77p566fglzgum@riteshh-domain/
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reflink.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index f4ec06b53aa03..06682128d8fae 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -285,6 +285,11 @@ copy_inline_extent:
 	ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
 out:
 	if (!ret && !trans) {
+		/*
+		 * Release path before starting a new transaction so we don't
+		 * hold locks that would confuse lockdep.
+		 */
+		btrfs_release_path(path);
 		/*
 		 * No transaction here means we copied the inline extent into a
 		 * page of the destination inode.
-- 
GitLab


From 91df99a6eb50d5a1bc70fff4a09a0b7ae6aab96d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 14 May 2021 10:56:16 -0400
Subject: [PATCH 0827/3804] btrfs: do not BUG_ON in link_to_fixup_dir

While doing error injection testing I got the following panic

  kernel BUG at fs/btrfs/tree-log.c:1862!
  invalid opcode: 0000 [#1] SMP NOPTI
  CPU: 1 PID: 7836 Comm: mount Not tainted 5.13.0-rc1+ #305
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014
  RIP: 0010:link_to_fixup_dir+0xd5/0xe0
  RSP: 0018:ffffb5800180fa30 EFLAGS: 00010216
  RAX: fffffffffffffffb RBX: 00000000fffffffb RCX: ffff8f595287faf0
  RDX: ffffb5800180fa37 RSI: ffff8f5954978800 RDI: 0000000000000000
  RBP: ffff8f5953af9450 R08: 0000000000000019 R09: 0000000000000001
  R10: 000151f408682970 R11: 0000000120021001 R12: ffff8f5954978800
  R13: ffff8f595287faf0 R14: ffff8f5953c77dd0 R15: 0000000000000065
  FS:  00007fc5284c8c40(0000) GS:ffff8f59bbd00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fc5287f47c0 CR3: 000000011275e002 CR4: 0000000000370ee0
  Call Trace:
   replay_one_buffer+0x409/0x470
   ? btree_read_extent_buffer_pages+0xd0/0x110
   walk_up_log_tree+0x157/0x1e0
   walk_log_tree+0xa6/0x1d0
   btrfs_recover_log_trees+0x1da/0x360
   ? replay_one_extent+0x7b0/0x7b0
   open_ctree+0x1486/0x1720
   btrfs_mount_root.cold+0x12/0xea
   ? __kmalloc_track_caller+0x12f/0x240
   legacy_get_tree+0x24/0x40
   vfs_get_tree+0x22/0xb0
   vfs_kern_mount.part.0+0x71/0xb0
   btrfs_mount+0x10d/0x380
   ? vfs_parse_fs_string+0x4d/0x90
   legacy_get_tree+0x24/0x40
   vfs_get_tree+0x22/0xb0
   path_mount+0x433/0xa10
   __x64_sys_mount+0xe3/0x120
   do_syscall_64+0x3d/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xae

We can get -EIO or any number of legitimate errors from
btrfs_search_slot(), panicing here is not the appropriate response.  The
error path for this code handles errors properly, simply return the
error.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fd6b1f13112ed..c17d6b827b42e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1858,8 +1858,6 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
 		ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
 	} else if (ret == -EEXIST) {
 		ret = 0;
-	} else {
-		BUG(); /* Logic Error */
 	}
 	iput(inode);
 
-- 
GitLab


From b433d090ac63eae4d3182cfc274dbacb0c4ee0ec Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@baylibre.com>
Date: Tue, 11 May 2021 12:00:54 -0700
Subject: [PATCH 0828/3804] MAINTAINERS: ARM/Amlogic SoCs: add Neil as primary
 maintainer

Add Neil as primary maintainer for the Amlogic family of Arm SoCs.  I
will now act as co-maintainer.

Neil is already doing lots of the reviewing, testing and behind the
scenes support for users of the upstream kernel on these SoCs, so this
is just to formalize the current state of affairs.

Thanks Neil for all of your efforts, and keep up the great work!

Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20210511190054.26300-1-khilman@baylibre.com'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1162b0917630a..a75eb1514957b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1622,8 +1622,8 @@ F:	Documentation/devicetree/bindings/sound/amlogic*
 F:	sound/soc/meson/
 
 ARM/Amlogic Meson SoC support
+M:	Neil Armstrong <narmstrong@baylibre.com>
 M:	Kevin Hilman <khilman@baylibre.com>
-R:	Neil Armstrong <narmstrong@baylibre.com>
 R:	Jerome Brunet <jbrunet@baylibre.com>
 R:	Martin Blumenstingl <martin.blumenstingl@googlemail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-- 
GitLab


From dbc557fa5ff866f46c7e29c790f3a9b64e49ef3f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 9 Apr 2021 18:34:56 +0300
Subject: [PATCH 0829/3804] ata: Replace inclusion of kernel.h by bits.h in the
 header

ata.h uses BIT() macro, hence bits.h must be included. Otherwise
there is no need to have kernel.h included, I do not see any
direct users of it in ata.h. Hence replace inclusion of kernel.h.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210409153456.87798-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/ata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/ata.h b/include/linux/ata.h
index 6e67aded28f8c..1b44f40c7700b 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -13,7 +13,7 @@
 #ifndef __LINUX_ATA_H__
 #define __LINUX_ATA_H__
 
-#include <linux/kernel.h>
+#include <linux/bits.h>
 #include <linux/string.h>
 #include <linux/types.h>
 #include <asm/byteorder.h>
-- 
GitLab


From c9efa49290ce3aa8692054b5110f8123819d4874 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 23:33:57 +0200
Subject: [PATCH 0830/3804] sata: fsl: fix DPRINTK format string

Printing an __iomem pointer as %x produces a warning:

drivers/ata/sata_fsl.c: In function 'fsl_sata_set_irq_coalescing':
drivers/ata/sata_fsl.c:316:17: error: format '%x' expects argument of type 'unsigned int', but argument 2 has type 'void *' [-Werror=format=]
  316 |         DPRINTK("ICC register status: (hcr base: 0x%x) = 0x%x\n",
      |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  317 |                         hcr_base, ioread32(hcr_base + ICC));
      |                         ~~~~~~~~
      |                         |
      |                         void *

It's not clear why that pointer should be printed here, but if we do,
then using %p is the way to avoid the warnings.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20210514213402.691436-1-arnd@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/sata_fsl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index d55ee244d6931..e5838b23c9e0a 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -313,7 +313,7 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host,
 
 	DPRINTK("interrupt coalescing, count = 0x%x, ticks = %x\n",
 			intr_coalescing_count, intr_coalescing_ticks);
-	DPRINTK("ICC register status: (hcr base: 0x%x) = 0x%x\n",
+	DPRINTK("ICC register status: (hcr base: %p) = 0x%x\n",
 			hcr_base, ioread32(hcr_base + ICC));
 }
 
-- 
GitLab


From 14fe1e858ca21cf3298b818cc27cb610dc7dac3e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 16:01:01 +0200
Subject: [PATCH 0831/3804] sata: nv: fix debug format string mismatch

Turning on debugging in this this driver reveals a type mismatch:

In file included from include/linux/kernel.h:17,
                 from drivers/ata/sata_nv.c:23:
drivers/ata/sata_nv.c: In function 'nv_swncq_sdbfis':
drivers/ata/sata_nv.c:2121:10: error: format '%x' expects argument of type 'unsigned int', but argument 3 has type 'u64' {aka 'long long unsigned int'} [-Werror=format=]
 2121 |  DPRINTK("id 0x%x QC: qc_active 0x%x,"
      |          ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
 2124 |   ap->print_id, ap->qc_active, pp->qc_active,
      |                 ~~~~~~~~~~~~~
      |                   |
      |                   u64 {aka long long unsigned int}
include/linux/printk.h:142:10: note: in definition of macro 'no_printk'
  142 |   printk(fmt, ##__VA_ARGS__);  \
      |          ^~~
drivers/ata/sata_nv.c:2121:2: note: in expansion of macro 'DPRINTK'
 2121 |  DPRINTK("id 0x%x QC: qc_active 0x%x,"
      |  ^~~~~~~
drivers/ata/sata_nv.c:2121:36: note: format string is defined here
 2121 |  DPRINTK("id 0x%x QC: qc_active 0x%x,"
      |                                   ~^
      |                                    |
      |                                    unsigned int
      |                                   %llx

Use the correct format string for the u64 type.

Fixes: e3ed89396441 ("libata: bump ->qc_active to a 64-bit type")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20210514140105.3080580-1-arnd@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/sata_nv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 20190f66ced98..de45045566692 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -2118,7 +2118,7 @@ static int nv_swncq_sdbfis(struct ata_port *ap)
 		 */
 		lack_dhfis = 1;
 
-	DPRINTK("id 0x%x QC: qc_active 0x%x,"
+	DPRINTK("id 0x%x QC: qc_active 0x%llx,"
 		"SWNCQ:qc_active 0x%X defer_bits %X "
 		"dhfis 0x%X dmafis 0x%X last_issue_tag %x\n",
 		ap->print_id, ap->qc_active, pp->qc_active,
-- 
GitLab


From 6863b4d7bf19a54e23fc5838b7e66d954444289d Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Tue, 11 May 2021 08:48:27 +0300
Subject: [PATCH 0832/3804] RDMA/mlx5: Verify that DM operation is reasonable

Fix the complaint from smatch by verifing that the user requested DM
operation is not greater than 31.

divers/infiniband/hw/mlx5/dm.c:220 mlx5_ib_handler_MLX5_IB_METHOD_DM_MAP_OP_ADDR()
error: undefined (user controlled) shift '(((1))) << op'

Fixes: cea85fa5dbc2 ("RDMA/mlx5: Add support in MEMIC operations")
Link: https://lore.kernel.org/r/458b1d7710c3cf01360c8771893f483665569786.1620711734.git.leonro@nvidia.com
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/dm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
index 094bf85589db5..001d766cf291c 100644
--- a/drivers/infiniband/hw/mlx5/dm.c
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -217,6 +217,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DM_MAP_OP_ADDR)(
 	if (err)
 		return err;
 
+	if (op >= BITS_PER_TYPE(u32))
+		return -EOPNOTSUPP;
+
 	if (!(MLX5_CAP_DEV_MEM(dev->mdev, memic_operations) & BIT(op)))
 		return -EOPNOTSUPP;
 
-- 
GitLab


From 97f30d324ce6645a4de4ffb71e4ae9b8ca36ff04 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Tue, 11 May 2021 08:48:29 +0300
Subject: [PATCH 0833/3804] RDMA/mlx5: Recover from fatal event in dual port
 mode

When there is fatal event on the slave port, the device is marked as not
active. We need to mark it as active again when the slave is recovered to
regain full functionality.

Fixes: d69a24e03659 ("IB/mlx5: Move IB event processing onto a workqueue")
Link: https://lore.kernel.org/r/8906754455bb23019ef223c725d2c0d38acfb80b.1620711734.git.leonro@nvidia.com
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 6d1dd09a43881..644d5d0ac5442 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4419,6 +4419,7 @@ static int mlx5r_mp_probe(struct auxiliary_device *adev,
 
 		if (bound) {
 			rdma_roce_rescan_device(&dev->ib_dev);
+			mpi->ibdev->ib_active = true;
 			break;
 		}
 	}
-- 
GitLab


From dc07628bd2bbc1da768e265192c28ebd301f509d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 11 May 2021 08:48:31 +0300
Subject: [PATCH 0834/3804] RDMA/rxe: Return CQE error if invalid lkey was
 supplied

RXE is missing update of WQE status in LOCAL_WRITE failures.  This caused
the following kernel panic if someone sent an atomic operation with an
explicitly wrong lkey.

[leonro@vm ~]$ mkt test
test_atomic_invalid_lkey (tests.test_atomic.AtomicTest) ...
 WARNING: CPU: 5 PID: 263 at drivers/infiniband/sw/rxe/rxe_comp.c:740 rxe_completer+0x1a6d/0x2e30 [rdma_rxe]
 Modules linked in: crc32_generic rdma_rxe ip6_udp_tunnel udp_tunnel rdma_ucm rdma_cm ib_umad ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core mlx5_core ptp pps_core
 CPU: 5 PID: 263 Comm: python3 Not tainted 5.13.0-rc1+ #2936
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:rxe_completer+0x1a6d/0x2e30 [rdma_rxe]
 Code: 03 0f 8e 65 0e 00 00 3b 93 10 06 00 00 0f 84 82 0a 00 00 4c 89 ff 4c 89 44 24 38 e8 2d 74 a9 e1 4c 8b 44 24 38 e9 1c f5 ff ff <0f> 0b e9 0c e8 ff ff b8 05 00 00 00 41 bf 05 00 00 00 e9 ab e7 ff
 RSP: 0018:ffff8880158af090 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: ffff888016a78000 RCX: ffffffffa0cf1652
 RDX: 1ffff9200004b442 RSI: 0000000000000004 RDI: ffffc9000025a210
 RBP: dffffc0000000000 R08: 00000000ffffffea R09: ffff88801617740b
 R10: ffffed1002c2ee81 R11: 0000000000000007 R12: ffff88800f3b63e8
 R13: ffff888016a78008 R14: ffffc9000025a180 R15: 000000000000000c
 FS:  00007f88b622a740(0000) GS:ffff88806d540000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f88b5a1fa10 CR3: 000000000d848004 CR4: 0000000000370ea0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  rxe_do_task+0x130/0x230 [rdma_rxe]
  rxe_rcv+0xb11/0x1df0 [rdma_rxe]
  rxe_loopback+0x157/0x1e0 [rdma_rxe]
  rxe_responder+0x5532/0x7620 [rdma_rxe]
  rxe_do_task+0x130/0x230 [rdma_rxe]
  rxe_rcv+0x9c8/0x1df0 [rdma_rxe]
  rxe_loopback+0x157/0x1e0 [rdma_rxe]
  rxe_requester+0x1efd/0x58c0 [rdma_rxe]
  rxe_do_task+0x130/0x230 [rdma_rxe]
  rxe_post_send+0x998/0x1860 [rdma_rxe]
  ib_uverbs_post_send+0xd5f/0x1220 [ib_uverbs]
  ib_uverbs_write+0x847/0xc80 [ib_uverbs]
  vfs_write+0x1c5/0x840
  ksys_write+0x176/0x1d0
  do_syscall_64+0x3f/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/11e7b553f3a6f5371c6bb3f57c494bb52b88af99.1620711734.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Acked-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 2af26737d32dc..a6712e373eed6 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -346,13 +346,15 @@ static inline enum comp_state do_read(struct rxe_qp *qp,
 	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
 			&wqe->dma, payload_addr(pkt),
 			payload_size(pkt), to_mr_obj, NULL);
-	if (ret)
+	if (ret) {
+		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
+	}
 
 	if (wqe->dma.resid == 0 && (pkt->mask & RXE_END_MASK))
 		return COMPST_COMP_ACK;
-	else
-		return COMPST_UPDATE_COMP;
+
+	return COMPST_UPDATE_COMP;
 }
 
 static inline enum comp_state do_atomic(struct rxe_qp *qp,
@@ -366,10 +368,12 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
 	ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
 			&wqe->dma, &atomic_orig,
 			sizeof(u64), to_mr_obj, NULL);
-	if (ret)
+	if (ret) {
+		wqe->status = IB_WC_LOC_PROT_ERR;
 		return COMPST_ERROR;
-	else
-		return COMPST_COMP_ACK;
+	}
+
+	return COMPST_COMP_ACK;
 }
 
 static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
-- 
GitLab


From e86bdb24375a810ea7993d64ed406a803db71225 Mon Sep 17 00:00:00 2001
From: Aditya Srivastava <yashsri421@gmail.com>
Date: Fri, 14 May 2021 20:12:44 +0530
Subject: [PATCH 0835/3804] scripts: kernel-doc: reduce repeated regex
 expressions into variables

There are some regex expressions in the kernel-doc script, which are used
repeatedly in the script.

Reduce such expressions into variables, which can be used everywhere.

A quick manual check found that no errors and warnings were added/removed
in this process.

Suggested-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
Link: https://lore.kernel.org/r/20210514144244.25341-1-yashsri421@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/kernel-doc | 71 ++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 37 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 4840e748fca8b..7c4a6a507ac41 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -406,6 +406,8 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
 my $doc_inline_end = '^\s*\*/\s*$';
 my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
 my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
+my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
+my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
 
 my %parameterdescs;
 my %parameterdesc_start_lines;
@@ -694,7 +696,7 @@ sub output_function_man(%) {
 	    $post = ");";
 	}
 	$type = $args{'parametertypes'}{$parameter};
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$function_pointer/) {
 	    # pointer-to-function
 	    print ".BI \"" . $parenth . $1 . "\" " . " \") (" . $2 . ")" . $post . "\"\n";
 	} else {
@@ -974,7 +976,7 @@ sub output_function_rst(%) {
 	$count++;
 	$type = $args{'parametertypes'}{$parameter};
 
-	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
+	if ($type =~ m/$function_pointer/) {
 	    # pointer-to-function
 	    print $1 . $parameter . ") (" . $2 . ")";
 	} else {
@@ -1211,7 +1213,9 @@ sub dump_struct($$) {
     my $members;
     my $type = qr{struct|union};
     # For capturing struct/union definition body, i.e. "{members*}qualifiers*"
-    my $definition_body = qr{\{(.*)\}(?:\s*(?:__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned|__attribute__\s*\(\([a-z0-9,_\s\(\)]*\)\)))*};
+    my $qualifiers = qr{$attribute|__packed|__aligned|____cacheline_aligned_in_smp|____cacheline_aligned};
+    my $definition_body = qr{\{(.*)\}\s*$qualifiers*};
+    my $struct_members = qr{($type)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;};
 
     if ($x =~ /($type)\s+(\w+)\s*$definition_body/) {
 	$decl_type = $1;
@@ -1235,27 +1239,27 @@ sub dump_struct($$) {
 	# strip comments:
 	$members =~ s/\/\*.*?\*\///gos;
 	# strip attributes
-	$members =~ s/\s*__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)/ /gi;
+	$members =~ s/\s*$attribute/ /gi;
 	$members =~ s/\s*__aligned\s*\([^;]*\)/ /gos;
 	$members =~ s/\s*__packed\s*/ /gos;
 	$members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
 	$members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
 	$members =~ s/\s*____cacheline_aligned/ /gos;
 
+	my $args = qr{([^,)]+)};
 	# replace DECLARE_BITMAP
 	$members =~ s/__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)/DECLARE_BITMAP($1, __ETHTOOL_LINK_MODE_MASK_NBITS)/gos;
-	$members =~ s/DECLARE_BITMAP\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
+	$members =~ s/DECLARE_BITMAP\s*\($args,\s*$args\)/unsigned long $1\[BITS_TO_LONGS($2)\]/gos;
 	# replace DECLARE_HASHTABLE
-	$members =~ s/DECLARE_HASHTABLE\s*\(([^,)]+),\s*([^,)]+)\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
+	$members =~ s/DECLARE_HASHTABLE\s*\($args,\s*$args\)/unsigned long $1\[1 << (($2) - 1)\]/gos;
 	# replace DECLARE_KFIFO
-	$members =~ s/DECLARE_KFIFO\s*\(([^,)]+),\s*([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
+	$members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
 	# replace DECLARE_KFIFO_PTR
-	$members =~ s/DECLARE_KFIFO_PTR\s*\(([^,)]+),\s*([^,)]+)\)/$2 \*$1/gos;
-
+	$members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
 	my $declaration = $members;
 
 	# Split nested struct/union elements as newer ones
-	while ($members =~ m/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/) {
+	while ($members =~ m/$struct_members/) {
 		my $newmember;
 		my $maintype = $1;
 		my $ids = $4;
@@ -1315,7 +1319,7 @@ sub dump_struct($$) {
 				}
 			}
 		}
-		$members =~ s/(struct|union)([^\{\};]+)\{([^\{\}]*)\}([^\{\}\;]*)\;/$newmember/;
+		$members =~ s/$struct_members/$newmember/;
 	}
 
 	# Ignore other nested elements, like enums
@@ -1555,8 +1559,9 @@ sub create_parameterlist($$$$) {
     my $param;
 
     # temporarily replace commas inside function pointer definition
-    while ($args =~ /(\([^\),]+),/) {
-	$args =~ s/(\([^\),]+),/$1#/g;
+    my $arg_expr = qr{\([^\),]+};
+    while ($args =~ /$arg_expr,/) {
+	$args =~ s/($arg_expr),/$1#/g;
     }
 
     foreach my $arg (split($splitter, $args)) {
@@ -1707,7 +1712,7 @@ sub check_sections($$$$$) {
 		foreach $px (0 .. $#prms) {
 			$prm_clean = $prms[$px];
 			$prm_clean =~ s/\[.*\]//;
-			$prm_clean =~ s/__attribute__\s*\(\([a-z,_\*\s\(\)]*\)\)//i;
+			$prm_clean =~ s/$attribute//i;
 			# ignore array size in a parameter string;
 			# however, the original param string may contain
 			# spaces, e.g.:  addr[6 + 2]
@@ -1809,8 +1814,14 @@ sub dump_function($$) {
     # - parport_register_device (function pointer parameters)
     # - atomic_set (macro)
     # - pci_match_device, __copy_to_user (long return type)
-
-    if ($define && $prototype =~ m/^()([a-zA-Z0-9_~:]+)\s+/) {
+    my $name = qr{[a-zA-Z0-9_~:]+};
+    my $prototype_end1 = qr{[^\(]*};
+    my $prototype_end2 = qr{[^\{]*};
+    my $prototype_end = qr{\(($prototype_end1|$prototype_end2)\)};
+    my $type1 = qr{[\w\s]+};
+    my $type2 = qr{$type1\*+};
+
+    if ($define && $prototype =~ m/^()($name)\s+/) {
         # This is an object-like macro, it has no return type and no parameter
         # list.
         # Function-like macros are not allowed to have spaces between
@@ -1818,23 +1829,9 @@ sub dump_function($$) {
         $return_type = $1;
         $declaration_name = $2;
         $noret = 1;
-    } elsif ($prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\(]*)\)/ ||
-	$prototype =~ m/^()([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+)\s+([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s+\w+\s+\w+\s*\*+)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/ ||
-	$prototype =~ m/^(\w+\s+\w+\s*\*+\s*\w+\s*\*+\s*)\s*([a-zA-Z0-9_~:]+)\s*\(([^\{]*)\)/)  {
+    } elsif ($prototype =~ m/^()($name)\s*$prototype_end/ ||
+	$prototype =~ m/^($type1)\s+($name)\s*$prototype_end/ ||
+	$prototype =~ m/^($type2+)\s*($name)\s*$prototype_end/)  {
 	$return_type = $1;
 	$declaration_name = $2;
 	my $args = $3;
@@ -2111,12 +2108,12 @@ sub process_name($$) {
     } elsif (/$doc_decl/o) {
 	$identifier = $1;
 	my $is_kernel_comment = 0;
-	my $decl_start = qr{\s*\*};
+	my $decl_start = qr{$doc_com};
 	# test for pointer declaration type, foo * bar() - desc
 	my $fn_type = qr{\w+\s*\*\s*}; 
 	my $parenthesis = qr{\(\w*\)};
 	my $decl_end = qr{[-:].*};
-	if (/^$decl_start\s*([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
+	if (/^$decl_start([\w\s]+?)$parenthesis?\s*$decl_end?$/) {
 	    $identifier = $1;
 	}
 	if ($identifier =~ m/^(struct|union|enum|typedef)\b\s*(\S*)/) {
@@ -2126,8 +2123,8 @@ sub process_name($$) {
 	}
 	# Look for foo() or static void foo() - description; or misspelt
 	# identifier
-	elsif (/^$decl_start\s*$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
-	    /^$decl_start\s*$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
+	elsif (/^$decl_start$fn_type?(\w+)\s*$parenthesis?\s*$decl_end?$/ ||
+	    /^$decl_start$fn_type?(\w+.*)$parenthesis?\s*$decl_end$/) {
 	    $identifier = $1;
 	    $decl_type = 'function';
 	    $identifier =~ s/^define\s+//;
-- 
GitLab


From 867e6d38f367c5414c076f94c451da2f664b9c7e Mon Sep 17 00:00:00 2001
From: Wu XiangCheng <bobwxc@email.cn>
Date: Mon, 17 May 2021 18:34:20 +0800
Subject: [PATCH 0836/3804] docs/zh_CN: Add translation
 zh_CN/maintainer/index.rst

Add a new translation
  Documentation/translations/zh_CN/maintainer/index.rst
and link it to zh_CN/index.rst

Signed-off-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/224959c4cdcd4c6554035145d5cedcd244887552.1621243426.git.bobwxc@email.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/index.rst    |  2 +-
 .../translations/zh_CN/maintainer/index.rst   | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/maintainer/index.rst

diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index d56d6b7092e62..95c3f313cea1a 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -72,11 +72,11 @@ TODOlist:
    dev-tools/index
    doc-guide/index
    kernel-hacking/index
+   maintainer/index
 
 TODOList:
 
 * trace/index
-* maintainer/index
 * fault-injection/index
 * livepatch/index
 * rust/index
diff --git a/Documentation/translations/zh_CN/maintainer/index.rst b/Documentation/translations/zh_CN/maintainer/index.rst
new file mode 100644
index 0000000000000..4ce27c12f370b
--- /dev/null
+++ b/Documentation/translations/zh_CN/maintainer/index.rst
@@ -0,0 +1,19 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/maintainer/index.rst
+
+==============
+内核维护者手册
+==============
+
+本文档本是内核维护者手册的首页。
+本手册还需要大量完善！请自由提出（和编写）本手册的补充内容。
+*译注：指英文原版*
+
+TODOList:
+
+-   configure-git
+-   rebasing-and-merging
+-   pull-requests
+-   maintainer-entry-profile
+-   modifying-patches
-- 
GitLab


From 6ba8a96f4dbab7118d4c019bb30a41d74b2bda13 Mon Sep 17 00:00:00 2001
From: Wu XiangCheng <bobwxc@email.cn>
Date: Mon, 17 May 2021 18:34:32 +0800
Subject: [PATCH 0837/3804] docs/zh_CN: Add translation
 zh_CN/maintainer/configure-git.rst

Add a new translation
  Documentation/translations/zh_CN/maintainer/configure-git.rst
and link it to zh_CN/maintainer/index.rst

Signed-off-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/dcf6addd89eba3777b9b59d8b506fa162fbcd828.1621243426.git.bobwxc@email.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../zh_CN/maintainer/configure-git.rst        | 62 +++++++++++++++++++
 .../translations/zh_CN/maintainer/index.rst   |  6 +-
 2 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/maintainer/configure-git.rst

diff --git a/Documentation/translations/zh_CN/maintainer/configure-git.rst b/Documentation/translations/zh_CN/maintainer/configure-git.rst
new file mode 100644
index 0000000000000..a45ea736f73bb
--- /dev/null
+++ b/Documentation/translations/zh_CN/maintainer/configure-git.rst
@@ -0,0 +1,62 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/maintainer/configure-git.rst
+
+:译者:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+.. _configuregit_zh:
+
+Git配置
+=======
+
+本章讲述了维护者级别的git配置。
+
+Documentation/maintainer/pull-requests.rst 中使用的标记分支应使用开发人员的
+GPG公钥进行签名。可以通过将 ``-u`` 标志传递给 ``git tag`` 来创建签名标记。
+但是，由于 *通常* 对同一项目使用同一个密钥，因此可以设置::
+
+	git config user.signingkey "keyname"
+
+或者手动编辑你的 ``.git/config`` 或 ``~/.gitconfig`` 文件::
+
+	[user]
+		name = Jane Developer
+		email = jd@domain.org
+		signingkey = jd@domain.org
+
+你可能需要告诉 ``git`` 去使用 ``gpg2``::
+
+	[gpg]
+		program = /path/to/gpg2
+
+你可能也需要告诉 ``gpg`` 去使用哪个 ``tty`` （添加到你的shell rc文件中）::
+
+	export GPG_TTY=$(tty)
+
+
+创建链接到lore.kernel.org的提交
+-------------------------------
+
+http://lore.kernel.org 网站是所有涉及或影响内核开发的邮件列表的总存档。在这里
+存储补丁存档是推荐的做法，当维护人员将补丁应用到子系统树时，最好提供一个指向
+lore存档链接的标签，以便浏览提交历史的人可以找到某个更改背后的相关讨论和基本
+原理。链接标签如下所示：
+
+	Link: https://lore.kernel.org/r/<message-id>
+
+通过在git中添加以下钩子，可以将此配置为在发布 ``git am`` 时自动执行：
+
+.. code-block:: none
+
+	$ git config am.messageid true
+	$ cat >.git/hooks/applypatch-msg <<'EOF'
+	#!/bin/sh
+	. git-sh-setup
+	perl -pi -e 's|^Message-Id:\s*<?([^>]+)>?$|Link: https://lore.kernel.org/r/$1|g;' "$1"
+	test -x "$GIT_DIR/hooks/commit-msg" &&
+		exec "$GIT_DIR/hooks/commit-msg" ${1+"$@"}
+	:
+	EOF
+	$ chmod a+x .git/hooks/applypatch-msg
diff --git a/Documentation/translations/zh_CN/maintainer/index.rst b/Documentation/translations/zh_CN/maintainer/index.rst
index 4ce27c12f370b..e263315f5e7ac 100644
--- a/Documentation/translations/zh_CN/maintainer/index.rst
+++ b/Documentation/translations/zh_CN/maintainer/index.rst
@@ -10,9 +10,13 @@
 本手册还需要大量完善！请自由提出（和编写）本手册的补充内容。
 *译注：指英文原版*
 
+.. toctree::
+   :maxdepth: 2
+
+   configure-git
+
 TODOList:
 
--   configure-git
 -   rebasing-and-merging
 -   pull-requests
 -   maintainer-entry-profile
-- 
GitLab


From b7198943af1709790e7a125ff911c529c12ccc3f Mon Sep 17 00:00:00 2001
From: Wu XiangCheng <bobwxc@email.cn>
Date: Mon, 17 May 2021 18:34:46 +0800
Subject: [PATCH 0838/3804] docs/zh_CN: Add translation
 zh_CN/maintainer/rebasing-and-merging.rst

Add a new translation
  Documentation/translations/zh_CN/maintainer/rebasing-and-merging.rst
and link it to zh_CN/maintainer/index.rst

Signed-off-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/040f93d0f773a0c9c8d6637701ba269d816a6385.1621243426.git.bobwxc@email.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/maintainer/index.rst   |   2 +-
 .../zh_CN/maintainer/rebasing-and-merging.rst | 165 ++++++++++++++++++
 2 files changed, 166 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/maintainer/rebasing-and-merging.rst

diff --git a/Documentation/translations/zh_CN/maintainer/index.rst b/Documentation/translations/zh_CN/maintainer/index.rst
index e263315f5e7ac..21ab7cebf78b3 100644
--- a/Documentation/translations/zh_CN/maintainer/index.rst
+++ b/Documentation/translations/zh_CN/maintainer/index.rst
@@ -14,10 +14,10 @@
    :maxdepth: 2
 
    configure-git
+   rebasing-and-merging
 
 TODOList:
 
--   rebasing-and-merging
 -   pull-requests
 -   maintainer-entry-profile
 -   modifying-patches
diff --git a/Documentation/translations/zh_CN/maintainer/rebasing-and-merging.rst b/Documentation/translations/zh_CN/maintainer/rebasing-and-merging.rst
new file mode 100644
index 0000000000000..83b7dabfe88b8
--- /dev/null
+++ b/Documentation/translations/zh_CN/maintainer/rebasing-and-merging.rst
@@ -0,0 +1,165 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/maintainer/rebasing-and-merging.rst
+
+:译者:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+==========
+变基与合并
+==========
+
+一般来说，维护子系统需要熟悉Git源代码管理系统。Git是一个功能强大的工具，有
+很多功能；就像这类工具常出现的情况一样，使用这些功能的方法有对有错。本文档
+特别介绍了变基与合并的用法。维护者经常在错误使用这些工具时遇到麻烦，但避免
+问题实际上并不那么困难。
+
+总的来说，需要注意的一点是：与许多其他项目不同，内核社区并不害怕在其开发历史
+中看到合并提交。事实上，考虑到该项目的规模，避免合并几乎是不可能的。维护者会
+在希望避免合并时遇到一些问题，而过于频繁的合并也会带来另一些问题。
+
+变基
+====
+
+“变基（Rebase）”是更改存储库中一系列提交的历史记录的过程。有两种不同型的操作
+都被称为变基，因为这两种操作都使用 ``git rebase`` 命令，但它们之间存在显著
+差异：
+
+ - 更改一系列补丁的父提交（起始提交）。例如，变基操作可以将基于上一内核版本
+   的一个补丁集重建到当前版本上。在下面的讨论中，我们将此操作称为“变根”。
+
+ - 通过修复（或删除）损坏的提交、添加补丁、添加标记以更改一系列补丁的历史，
+   来提交变更日志或更改已应用提交的顺序。在下文中，这种类型的操作称为“历史
+   修改”
+
+术语“变基”将用于指代上述两种操作。如果使用得当，变基可以产生更清晰、更整洁的
+开发历史；如果使用不当，它可能会模糊历史并引入错误。
+
+以下一些经验法则可以帮助开发者避免最糟糕的变基风险：
+
+ - 已经发布到你私人系统之外世界的历史通常不应更改。其他人可能会拉取你的树
+   的副本，然后基于它进行工作；修改你的树会给他们带来麻烦。如果工作需要变基，
+   这通常是表明它还没有准备好提交到公共存储库的信号。
+
+   但是，总有例外。有些树（linux-next是一个典型的例子）由于它们的需要经常
+   变基，开发人员知道不要基于它们来工作。开发人员有时会公开一个不稳定的分支，
+   供其他人或自动测试服务进行测试。如果您确实以这种方式公开了一个可能不稳定
+   的分支，请确保潜在使用者知道不要基于它来工作。
+
+ - 不要在包含由他人创建的历史的分支上变基。如果你从别的开发者的仓库拉取了变更，
+   那你现在就成了他们历史记录的保管人。你不应该改变它，除了少数例外情况。例如
+   树中有问题的提交必须显式恢复（即通过另一个补丁修复），而不是通过修改历史而
+   消失。
+
+ - 没有合理理由，不要对树变根。仅为了切换到更新的基或避免与上游储存库的合并
+   通常不是合理理由。
+
+ - 如果你必须对储存库进行变根，请不要随机选取一个提交作为新基。在发布节点之间
+   内核通常处于一个相对不稳定的状态；基于其中某点进行开发会增加遇到意外错误的
+   几率。当一系列补丁必须移动到新基时，请选择移动到一个稳定节点（例如-rc版本
+   节点）。
+
+ - 请知悉对补丁系列进行变根（或做明显的历史修改）会改变它们的开发环境，且很
+   可能使做过的大部分测试失效。一般来说，变基后的补丁系列应当像新代码一样对
+   待，并重新测试。
+
+合并窗口麻烦的一个常见原因是，Linus收到了一个明显在拉取请求发送之前不久才变根
+（通常是变根到随机的提交上）的补丁系列。这样一个系列被充分测试的可能性相对较
+低，拉取请求被接受的几率也同样较低。
+
+相反，如果变基仅限于私有树、提交基于一个通用的起点、且经过充分测试，则引起
+麻烦的可能性就很低。
+
+合并
+====
+
+内核开发过程中，合并是一个很常见的操作；5.1版本开发周期中有超过1126个合并
+——差不多占了整体的9%。内核开发工作积累在100多个不同的子系统树中，每个
+子系统树都可能包含多个主题分支；每个分支通常独立于其他分支进行开发。因此
+在任何给定分支进入上游储存库之前，至少需要一次合并。
+
+许多项目要求拉取请求中的分支基于当前主干，这样历史记录中就不会出现合并提交。
+内核并不是这样；任何为了避免合并而重新对分支变基都很可能导致麻烦。
+
+子系统维护人员发现他们必须进行两种类型的合并：从较低层级的子系统树和从其他
+子系统树（同级树或主线）进行合并。这两种情况下要遵循的最佳实践是不同的。
+
+合并较低层级树
+--------------
+
+较大的子系统往往有多个级别的维护人员，较低级别的维护人员向较高级别发送拉取
+请求。合并这样的请求执行几乎肯定会生成一个合并提交；这也是应该的。实际上，
+子系统维护人员可能希望在极少数快进合并情况下使用 ``-–no-ff`` 标志来强制添加
+合并提交，以便记录合并的原因。 **任何** 类型的合并的变更日志必须说明
+*为什么* 合并。对于较低级别的树，“为什么”通常是对该取所带来的变化的总结。
+
+各级维护人员都应在他们的拉取请求上使用经签名的标签，上游维护人员应在拉取分支
+时验证标签。不这样做会威胁整个开发过程的安全。
+
+根据上面列出的规则，一旦您将其他人的历史记录合并到树中，您就不得对该分支进行
+变基，即使您能够这样做。
+
+合并同级树或上游树
+------------------
+
+虽然来自下游的合并是常见且不起眼的，但当需要将一个分支推向上游时，其中来自
+其他树的合并往往是一个危险信号。这种合并需要仔细考虑并加以充分证明，否则后续
+的拉取请求很可能会被拒绝。
+
+想要将主分支合并到存储库中是很自然的；这种类型的合并通常被称为“反向合并”
+。反向合并有助于确保与并行的开发没有冲突，并且通常会给人一种温暖、舒服的
+感觉，即处于最新。但这种诱惑几乎总是应该避免的。
+
+为什么呢？反向合并将搅乱你自己分支的开发历史。它们会大大增加你遇到来自社区
+其他地方的错误的机会，且使你很难确保你所管理的工作稳定并准备好合入上游。
+频繁的合并还可以掩盖树中开发过程中的问题；它们会隐藏与其他树的交互，而这些
+交互不应该（经常）发生在管理良好的分支中。
+
+也就是说，偶尔需要进行反向合并；当这种情况发生时，一定要在提交信息中记录
+*为什么* 。同样，在一个众所周知的稳定点进行合并，而不是随机提交。即使这样，
+你也不应该反向合并一棵比你的直接上游树更高层级的树；如果确实需要更高级别的
+反向合并，应首先在上游树进行。
+
+导致合并相关问题最常见的原因之一是：在发送拉取请求之前维护者合并上游以解决
+合并冲突。同样，这种诱惑很容易理解，但绝对应该避免。对于最终拉取请求来说
+尤其如此：Linus坚信他更愿意看到合并冲突，而不是不必要的反向合并。看到冲突
+可以让他了解潜在的问题所在。他做过很多合并（在5.1版本开发周期中是382次），
+而且在解决冲突方面也很在行——通常比参与的开发人员要强。
+
+那么，当他们的子系统分支和主线之间发生冲突时，维护人员应该怎么做呢？最重要
+的一步是在拉取请求中提示Linus会发生冲突；如果啥都没说则表明您的分支可以正常
+合入。对于特别困难的冲突，创建并推送一个 *独立* 分支来展示你将如何解决问题。
+在拉取请求中提到该分支，但是请求本身应该针对未合并的分支。
+
+即使不存在已知冲突，在发送拉取请求之前进行合并测试也是个好主意。它可能会提醒
+您一些在linux-next树中没有发现的问题，并帮助您准确地理解您正在要求上游做什么。
+
+合并上游树或另一个子系统树的另一个原因是解决依赖关系。这些依赖性问题有时确实
+会发生，而且有时与另一棵树交叉合并是解决这些问题的最佳方法；同样，在这种情况
+下，合并提交应该解释为什么要进行合并。花点时间把它做好；会有人阅读这些变更
+日志。
+
+然而依赖性问题通常表明需要改变方法。合并另一个子系统树以解决依赖性风险会带来
+其他缺陷，几乎永远不应这样做。如果该子系统树无法被合到上游，那么它的任何问题
+也都会阻碍你的树合并。更可取的选择包括与维护人员达成一致意见，在其中一个树中
+同时进行两组更改；或者创建一个主题分支专门处理可以合并到两个树中的先决条件提交。
+如果依赖关系与主要的基础结构更改相关，正确的解决方案可能是将依赖提交保留一个
+开发周期，以便这些更改有时间在主线上稳定。
+
+最后
+====
+
+在开发周期的开头合并主线是比较常见的，可以获取树中其他地方的更改和修复。同样，
+这样的合并应该选择一个众所周知的发布点，而不是一些随机点。如果在合并窗口期间
+上游分支已完全清空到主线中，则可以使用以下命令向前拉取它::
+
+  git merge v5.2-rc1^0
+
+“^0”使Git执行快进合并（在这种情况下这应该可以），从而避免多余的虚假合并提交。
+
+上面列出的就是指导方针了。总是会有一些情况需要不同的解决方案，这些指导原则
+不应阻止开发人员在需要时做正确的事情。但是，我们应该时刻考虑是否真的出现了
+这样的需求，并准备好解释为什么需要做一些不寻常的事情。
-- 
GitLab


From 989cfaecbd2c06800d13f49206498602b1e769a8 Mon Sep 17 00:00:00 2001
From: Wu XiangCheng <bobwxc@email.cn>
Date: Mon, 17 May 2021 18:35:05 +0800
Subject: [PATCH 0839/3804] docs/zh_CN: Add translation
 zh_CN/maintainer/pull-requests.rst

Add a new translation
  Documentation/translations/zh_CN/maintainer/pull-requests.rst
and link it to zh_CN/maintainer/index.rst

Signed-off-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/4774d1b7976678ce97c4356bd71509df0cec1ffc.1621243426.git.bobwxc@email.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/maintainer/index.rst   |   2 +-
 .../zh_CN/maintainer/pull-requests.rst        | 148 ++++++++++++++++++
 2 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/maintainer/pull-requests.rst

diff --git a/Documentation/translations/zh_CN/maintainer/index.rst b/Documentation/translations/zh_CN/maintainer/index.rst
index 21ab7cebf78b3..1a1222b787158 100644
--- a/Documentation/translations/zh_CN/maintainer/index.rst
+++ b/Documentation/translations/zh_CN/maintainer/index.rst
@@ -15,9 +15,9 @@
 
    configure-git
    rebasing-and-merging
+   pull-requests
 
 TODOList:
 
--   pull-requests
 -   maintainer-entry-profile
 -   modifying-patches
diff --git a/Documentation/translations/zh_CN/maintainer/pull-requests.rst b/Documentation/translations/zh_CN/maintainer/pull-requests.rst
new file mode 100644
index 0000000000000..f46d6f3f2498b
--- /dev/null
+++ b/Documentation/translations/zh_CN/maintainer/pull-requests.rst
@@ -0,0 +1,148 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/maintainer/pull-requests.rst
+
+:译者:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+.. _pullrequests_zh:
+
+如何创建拉取请求
+================
+
+本章描述维护人员如何创建并向其他维护人员提交拉取请求。这对将更改从一个维护者
+树转移到另一个维护者树非常有用。
+
+本文档由Tobin C. Harding（当时他尚不是一名经验丰富的维护人员）编写，内容主要
+来自Greg Kroah Hartman和Linus Torvalds在LKML上的评论。Jonathan Corbet和Mauro
+Carvalho Chehab提出了一些建议和修改。错误不可避免，如有问题，请找Tobin C.
+Harding <me@tobin.cc>。
+
+原始邮件线程::
+
+	http://lkml.kernel.org/r/20171114110500.GA21175@kroah.com
+
+
+创建分支
+--------
+
+首先，您需要将希望包含拉取请求里的所有更改都放在单独分支中。通常您将基于某开发
+人员树的一个分支，一般是打算向其发送拉取请求的开发人员。
+
+为了创建拉取请求，您必须首先标记刚刚创建的分支。建议您选择一个有意义的标记名，
+以即使过了一段时间您和他人仍能理解的方式。在名称中包含源子系统和目标内核版本
+的指示也是一个好的做法。
+
+Greg提供了以下内容。对于一个含有drivers/char中混杂事项、将应用于4.15-rc1内核的
+拉取请求，可以命名为 ``char-misc-4.15-rc1`` 。如果要在 ``char-misc-next`` 分支
+上打上此标记，您可以使用以下命令::
+
+	git tag -s char-misc-4.15-rc1 char-misc-next
+
+这将在 ``char-misc-next`` 分支的最后一个提交上创建一个名为 ``char-misc-4.15-rc1``
+的标记，并用您的gpg密钥签名（参见 Documentation/maintainer/configure-git.rst ）。
+
+Linus只接受基于签名过的标记的拉取请求。其他维护者可能会有所不同。
+
+当您运行上述命令时 ``git`` 会打开编辑器要求你描述一下这个标记。在本例中您需要
+描述拉取请求，所以请概述一下包含的内容，为什么要合并，是否完成任何测试。所有
+这些信息都将留在标记中，然后在维护者合并拉取请求时保留在合并提交中。所以把它
+写好，它将永远留在内核中。
+
+正如Linus所说::
+
+	不管怎么样，至少对我来说，重要的是 *信息* 。我需要知道我在拉取什么、
+	为什么我要拉取。我也希望将此消息用于合并消息，因此它不仅应该对我有
+	意义，也应该可以成为一个有意义的历史记录。
+
+	注意，如果拉取请求有一些不寻常的地方，请详细说明。如果你修改了并非
+	由你维护的文件，请解释 **为什么** 。我总会在差异中看到的，如果你不
+	提的话，我只会觉得分外可疑。当你在合并窗口后给我发新东西的时候，
+	（甚至是比较重大的错误修复），不仅需要解释做了什么、为什么这么做，
+	还请解释一下 **时间问题** 。为什么错过了合并窗口……
+
+	我会看你写在拉取请求邮件和签名标记里面的内容，所以根据你的工作流，
+	你可以在签名标记里面描述工作内容（也会自动放进拉取请求邮件），也
+	可以只在标记里面放个占位符，稍后在你实际发给我拉取请求时描述工作内容。
+
+	是的，我会编辑这些消息。部分因为我需要做一些琐碎的格式调整（整体缩进、
+	括号等），也因为此消息可能对我有意义（描述了冲突或一些个人问题）而对
+	合并提交信息上下文没啥意义，因此我需要尽力让它有意义起来。我也会
+	修复一些拼写和语法错误，特别是非母语者（母语者也是;^）。但我也会删掉
+	或增加一些内容。
+
+			Linus
+
+Greg给出了一个拉取请求的例子::
+
+	Char/Misc patches for 4.15-rc1
+
+	Here is the big char/misc patch set for the 4.15-rc1 merge window.
+	Contained in here is the normal set of new functions added to all
+	of these crazy drivers, as well as the following brand new
+	subsystems:
+		- time_travel_controller: Finally a set of drivers for the
+		  latest time travel bus architecture that provides i/o to
+		  the CPU before it asked for it, allowing uninterrupted
+		  processing
+		- relativity_shifters: due to the affect that the
+		  time_travel_controllers have on the overall system, there
+		  was a need for a new set of relativity shifter drivers to
+		  accommodate the newly formed black holes that would
+		  threaten to suck CPUs into them.  This subsystem handles
+		  this in a way to successfully neutralize the problems.
+		  There is a Kconfig option to force these to be enabled
+		  when needed, so problems should not occur.
+
+	All of these patches have been successfully tested in the latest
+	linux-next releases, and the original problems that it found have
+	all been resolved (apologies to anyone living near Canberra for the
+	lack of the Kconfig options in the earlier versions of the
+	linux-next tree creations.)
+
+	Signed-off-by: Your-name-here <your_email@domain>
+
+
+此标记消息格式就像一个git提交。顶部有一行“总结标题”， 一定要在下面sign-off。
+
+现在您已经有了一个本地签名标记，您需要将它推送到可以被拉取的位置::
+
+	git push origin char-misc-4.15-rc1
+
+
+创建拉取请求
+------------
+
+最后要做的是创建拉取请求消息。可以使用 ``git request-pull`` 命令让 ``git``
+为你做这件事，但它需要确定你想拉取什么，以及拉取针对的基础（显示正确的拉取
+更改和变更状态）。以下命令将生成一个拉取请求::
+
+	git request-pull master git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git/ char-misc-4.15-rc1
+
+引用Greg的话::
+
+	此命令要求git比较从“char-misc-4.15-rc1”标记位置到“master”分支头（上述
+	例子中指向了我从Linus的树分叉的地方，通常是-rc发布）的差异，并去使用
+	git:// 协议拉取。如果你希望使用 https:// 协议，也可以用在这里（但是请
+	注意，部分人由于防火墙问题没法用https协议拉取）。
+
+	如果char-misc-4.15-rc1标记没有出现在我要求拉取的仓库中，git会提醒
+	它不在那里，所以记得推送到公开地方。
+
+	“git request-pull”会包含git树的地址和需要拉取的特定标记，以及标记
+	描述全文（详尽描述标记）。同时它也会创建此拉取请求的差异状态和单个
+	提交的缩短日志。
+
+Linus回复说他倾向于 ``git://`` 协议。其他维护者可能有不同的偏好。另外，请注意
+如果你创建的拉取请求没有签名标记， ``https://`` 可能是更好的选择。完整的讨论
+请看原邮件。
+
+
+提交拉取请求
+------------
+
+拉取请求的提交方式与普通补丁相同。向维护人员发送内联电子邮件并抄送LKML以及
+任何必要特定子系统的列表。对Linus的拉取请求通常有如下主题行::
+
+	[GIT PULL] <subsystem> changes for v4.15-rc1
-- 
GitLab


From 91643aba949347ab46b870524dd9348781fa8b1d Mon Sep 17 00:00:00 2001
From: Wu XiangCheng <bobwxc@email.cn>
Date: Mon, 17 May 2021 18:35:33 +0800
Subject: [PATCH 0840/3804] docs/zh_CN: Add translation
 zh_CN/maintainer/maintainer-entry-profile.rst

Add a new translation
  Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst
and link it to zh_CN/maintainer/index.rst

Signed-off-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/e5b0838317cbc2f8fb3a9480f4604b6f099db975.1621243426.git.bobwxc@email.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/maintainer/index.rst   |  2 +-
 .../maintainer/maintainer-entry-profile.rst   | 92 +++++++++++++++++++
 2 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst

diff --git a/Documentation/translations/zh_CN/maintainer/index.rst b/Documentation/translations/zh_CN/maintainer/index.rst
index 1a1222b787158..18a820741f523 100644
--- a/Documentation/translations/zh_CN/maintainer/index.rst
+++ b/Documentation/translations/zh_CN/maintainer/index.rst
@@ -16,8 +16,8 @@
    configure-git
    rebasing-and-merging
    pull-requests
+   maintainer-entry-profile
 
 TODOList:
 
--   maintainer-entry-profile
 -   modifying-patches
diff --git a/Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst b/Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst
new file mode 100644
index 0000000000000..a1ee99c4786ea
--- /dev/null
+++ b/Documentation/translations/zh_CN/maintainer/maintainer-entry-profile.rst
@@ -0,0 +1,92 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/maintainer/maintainer-entry-profile.rst
+
+:译者:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+.. _maintainerentryprofile_zh:
+
+维护者条目概要
+==============
+
+维护人员条目概要补充了顶层过程文档（提交补丁，提交驱动程序……），增加了子系
+统/设备驱动程序本地习惯以及有关补丁提交生命周期的相关内容。贡献者使用此文档
+来调整他们的期望和避免常见错误；维护人员可以使用这些信息超越子系统层面查看
+是否有机会汇聚到通用实践中。
+
+
+总览
+----
+
+提供了子系统如何操作的介绍。MAINTAINERS文件告诉了贡献者应发送某文件的补丁到哪，
+但它没有传达其他子系统的本地基础设施和机制以协助开发。
+
+请考虑以下问题：
+
+- 当补丁被本地树接纳或合并到上游时是否有通知？
+- 子系统是否使用patchwork实例？Patchwork状态变更是否有通知？
+- 是否有任何机器人或CI基础设施监视列表，或子系统是否使用自动测试反馈以便把
+  控接纳补丁？
+- 被拉入-next的Git分支是哪个？
+- 贡献者应针对哪个分支提交？
+- 是否链接到其他维护者条目概要？例如一个设备驱动可能指向其父子系统的条目。
+  这使得贡献者意识到某维护者可能对提交链中其他维护者负有的义务。
+
+
+提交检查单补遗
+--------------
+
+列出强制性和咨询性标准，超出通用标准“提交检查表，以便维护者检查一个补丁是否
+足够健康。例如：“通过checkpatch.pl，没有错误、没有警告。通过单元测试详见某处”。
+
+提交检查单补遗还可以包括有关硬件规格状态的详细信息。例如，子系统接受补丁之前
+是否需要考虑在某个修订版上发布的规范。
+
+
+开发周期的关键日期
+------------------
+
+提交者常常会误以为补丁可以在合并窗口关闭之前的任何时间发送，且下一个-rc1时仍
+可以。事实上，大多数补丁都需要在下一个合并窗口打开之前提前进入linux-next中。
+向提交者澄清关键日期（以-rc发布周为标志）以明确什么时候补丁会被考虑合并以及
+何时需要等待下一个-rc。
+
+至少需要讲明：
+
+- 最后一个可以提交新功能的-rc：
+  针对下一个合并窗口的新功能提交应该在此点之前首次发布以供考虑。在此时间点
+  之后提交的补丁应该明确他们的目标为下下个合并窗口，或者给出应加快进度被接受
+  的充足理由。通常新特性贡献者的提交应出现在-rc5之前。
+
+- 最后合并-rc：合并决策的最后期限。
+  向贡献者指出尚未接受的补丁集需要等待下下个合并窗口。当然，维护者没有义务
+  接受所有给定的补丁集，但是如果审阅在此时间点尚未结束，那么希望贡献者应该
+  等待并在下一个合并窗口重新提交。
+
+可选项：
+
+- 开发基线分支的首个-rc，列在概述部分，视为已为新提交做好准备。
+
+
+审阅节奏
+--------
+
+贡献者最担心的问题之一是：补丁集已发布却未收到反馈，应在多久后发送提醒。除了
+指定在重新提交之前要等待多长时间，还可以指示更新的首选样式；例如，重新发送
+整个系列，或私下发送提醒邮件。本节也可以列出本区域的代码审阅方式，以及获取
+不能直接从维护者那里得到的反馈的方法。
+
+
+现有概要
+--------
+
+这里列出了现有的维护人员条目概要；我们可能会想要在不久的将来做一些不同的事情。
+
+.. toctree::
+   :maxdepth: 1
+
+   ../doc-guide/maintainer-profile
+   ../../../nvdimm/maintainer-entry-profile
+   ../../../riscv/patch-acceptance
-- 
GitLab


From 55e0990231a980459393729e253c26759172744a Mon Sep 17 00:00:00 2001
From: Wu XiangCheng <bobwxc@email.cn>
Date: Mon, 17 May 2021 18:35:47 +0800
Subject: [PATCH 0841/3804] docs/zh_CN: Add translation
 zh_CN/maintainer/modifying-patches.rst

Add a new translation
  Documentation/translations/zh_CN/maintainer/modifying-patches.rst
and link it to zh_CN/maintainer/index.rst

Signed-off-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/35e6878cb634db61c573fc7fdc69ef4c5d8ae31c.1621243426.git.bobwxc@email.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/maintainer/index.rst   |  4 +-
 .../zh_CN/maintainer/modifying-patches.rst    | 51 +++++++++++++++++++
 2 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/translations/zh_CN/maintainer/modifying-patches.rst

diff --git a/Documentation/translations/zh_CN/maintainer/index.rst b/Documentation/translations/zh_CN/maintainer/index.rst
index 18a820741f523..eb75ccea9a213 100644
--- a/Documentation/translations/zh_CN/maintainer/index.rst
+++ b/Documentation/translations/zh_CN/maintainer/index.rst
@@ -17,7 +17,5 @@
    rebasing-and-merging
    pull-requests
    maintainer-entry-profile
+   modifying-patches
 
-TODOList:
-
--   modifying-patches
diff --git a/Documentation/translations/zh_CN/maintainer/modifying-patches.rst b/Documentation/translations/zh_CN/maintainer/modifying-patches.rst
new file mode 100644
index 0000000000000..7f6326137f6b1
--- /dev/null
+++ b/Documentation/translations/zh_CN/maintainer/modifying-patches.rst
@@ -0,0 +1,51 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/maintainer/modifying-patches.rst
+
+:译者:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+.. _modifyingpatches_zh:
+
+修改补丁
+========
+
+如果你是子系统或者分支的维护者，由于代码在你的和提交者的树中并不完全相同，
+有时你需要稍微修改一下收到的补丁以合并它们。
+
+如果你严格遵守开发者来源证书的规则（c），你应该要求提交者重做，但这完全是会
+适得其反的时间、精力浪费。规则（b）允许你调整代码，但这样修改提交者的代码并
+让他背书你的错误是非常不礼貌的。为解决此问题，建议在你之前最后一个
+Signed-off-by标签和你的之间添加一行，以指示更改的性质。这没有强制性要求，最
+好在描述前面加上你的邮件和/或姓名，用方括号括住整行，以明显指出你对最后一刻
+的更改负责。例如::
+
+        Signed-off-by: Random J Developer <random@developer.example.org>
+        [lucky@maintainer.example.org: struct foo moved from foo.c to foo.h]
+        Signed-off-by: Lucky K Maintainer <lucky@maintainer.example.org>
+
+如果您维护着一个稳定的分支，并希望同时明确贡献、跟踪更改、合并修复，并保护
+提交者免受责难，这种做法尤其有用。请注意，在任何情况下都不得更改作者的身份
+（From头），因为它会在变更日志中显示。
+
+向后移植（back-port）人员特别要注意：为了便于跟踪，请在提交消息的顶部（即主题行
+之后）插入补丁的来源，这是一种常见而有用的做法。例如，我们可以在3.x稳定版本
+中看到以下内容::
+
+        Date:   Tue Oct 7 07:26:38 2014 -0400
+
+        libata: Un-break ATA blacklist
+
+        commit 1c40279960bcd7d52dbdf1d466b20d24b99176c8 upstream.
+
+下面是一个旧的内核在某补丁被向后移植后会出现的::
+
+        Date:   Tue May 13 22:12:27 2008 +0200
+
+        wireless, airo: waitbusy() won't delay
+
+        [backport of 2.6 commit b7acbdfbd1f277c1eb23f344f899cfa4cd0bf36a]
+
+不管什么格式，这些信息都为人们跟踪你的树，以及试图解决你树中的错误的人提供了
+有价值的帮助。
-- 
GitLab


From c2b1063e8feb2115537addce10f36c0c82d11d9b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 2 Apr 2021 08:23:25 +0200
Subject: [PATCH 0842/3804] genirq: Add a IRQF_NO_DEBUG flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The whole call to note_interrupt() can be avoided or return early when
interrupts would be marked accordingly. For IPI handlers which always
return HANDLED the whole procedure is pretty pointless to begin with.

Add a IRQF_NO_DEBUG flag and mark the interrupt accordingly if supplied
when the interrupt is requested.

When noirqdebug is set on the kernel commandline, then the interrupt is
marked unconditionally so that there is only one condition in the hotpath
to evaluate.

 [ clg: Add changelog ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/7a8ad02f-63a8-c1aa-fdd1-39d973593d02@kaod.org
---
 include/linux/interrupt.h |  3 +++
 include/linux/irq.h       |  2 ++
 kernel/irq/chip.c         |  2 +-
 kernel/irq/handle.c       |  2 +-
 kernel/irq/manage.c       |  5 +++++
 kernel/irq/settings.h     | 12 ++++++++++++
 6 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 4777850a6dc7c..a52109c3f3a47 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -64,6 +64,8 @@
  * IRQF_NO_AUTOEN - Don't enable IRQ or NMI automatically when users request it.
  *                Users will enable it explicitly by enable_irq() or enable_nmi()
  *                later.
+ * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers,
+ *		   depends on IRQF_PERCPU.
  */
 #define IRQF_SHARED		0x00000080
 #define IRQF_PROBE_SHARED	0x00000100
@@ -78,6 +80,7 @@
 #define IRQF_EARLY_RESUME	0x00020000
 #define IRQF_COND_SUSPEND	0x00040000
 #define IRQF_NO_AUTOEN		0x00080000
+#define IRQF_NO_DEBUG		0x00100000
 
 #define IRQF_TIMER		(__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD)
 
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 31b347c9f8dd0..8e9a9ae471a6e 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -72,6 +72,7 @@ enum irqchip_irq_state;
  *				  mechanism and from core side polling.
  * IRQ_DISABLE_UNLAZY		- Disable lazy irq disable
  * IRQ_HIDDEN			- Don't show up in /proc/interrupts
+ * IRQ_NO_DEBUG			- Exclude from note_interrupt() debugging
  */
 enum {
 	IRQ_TYPE_NONE		= 0x00000000,
@@ -99,6 +100,7 @@ enum {
 	IRQ_IS_POLLED		= (1 << 18),
 	IRQ_DISABLE_UNLAZY	= (1 << 19),
 	IRQ_HIDDEN		= (1 << 20),
+	IRQ_NO_DEBUG		= (1 << 21),
 };
 
 #define IRQF_MODIFY_MASK	\
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 8cc8e57132870..7f04c7d8296e2 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -481,7 +481,7 @@ void handle_nested_irq(unsigned int irq)
 	for_each_action_of_desc(desc, action)
 		action_ret |= action->thread_fn(action->irq, action->dev_id);
 
-	if (!noirqdebug)
+	if (!irq_settings_no_debug(desc))
 		note_interrupt(desc, action_ret);
 
 	raw_spin_lock_irq(&desc->lock);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 762a928e18f92..221d80c31e94c 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -197,7 +197,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
 
 	add_interrupt_randomness(desc->irq_data.irq, flags);
 
-	if (!noirqdebug)
+	if (!irq_settings_no_debug(desc))
 		note_interrupt(desc, retval);
 	return retval;
 }
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 4c14356543d93..7bdd09e7d5f04 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1686,8 +1686,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		if (new->flags & IRQF_PERCPU) {
 			irqd_set(&desc->irq_data, IRQD_PER_CPU);
 			irq_settings_set_per_cpu(desc);
+			if (new->flags & IRQF_NO_DEBUG)
+				irq_settings_set_no_debug(desc);
 		}
 
+		if (noirqdebug)
+			irq_settings_set_no_debug(desc);
+
 		if (new->flags & IRQF_ONESHOT)
 			desc->istate |= IRQS_ONESHOT;
 
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 403378b9947b9..7b7efb1a114bd 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -18,6 +18,7 @@ enum {
 	_IRQ_IS_POLLED		= IRQ_IS_POLLED,
 	_IRQ_DISABLE_UNLAZY	= IRQ_DISABLE_UNLAZY,
 	_IRQ_HIDDEN		= IRQ_HIDDEN,
+	_IRQ_NO_DEBUG		= IRQ_NO_DEBUG,
 	_IRQF_MODIFY_MASK	= IRQF_MODIFY_MASK,
 };
 
@@ -33,6 +34,7 @@ enum {
 #define IRQ_IS_POLLED		GOT_YOU_MORON
 #define IRQ_DISABLE_UNLAZY	GOT_YOU_MORON
 #define IRQ_HIDDEN		GOT_YOU_MORON
+#define IRQ_NO_DEBUG		GOT_YOU_MORON
 #undef IRQF_MODIFY_MASK
 #define IRQF_MODIFY_MASK	GOT_YOU_MORON
 
@@ -174,3 +176,13 @@ static inline bool irq_settings_is_hidden(struct irq_desc *desc)
 {
 	return desc->status_use_accessors & _IRQ_HIDDEN;
 }
+
+static inline void irq_settings_set_no_debug(struct irq_desc *desc)
+{
+	desc->status_use_accessors |= _IRQ_NO_DEBUG;
+}
+
+static inline bool irq_settings_no_debug(struct irq_desc *desc)
+{
+	return desc->status_use_accessors & _IRQ_NO_DEBUG;
+}
-- 
GitLab


From 94c1fbd487b33a28ea7f0da076a2cba6d0f410cf Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Mon, 17 May 2021 16:46:13 +0800
Subject: [PATCH 0843/3804] docs/zh_CN: add core-api kernel-api.rst translation

This patch translates Documentation/core-api/kernel-api.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/d79e2d1f37bae52ce6fce0efb2fa4a32a89165fb.1621239725.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   5 +-
 .../zh_CN/core-api/kernel-api.rst             | 369 ++++++++++++++++++
 2 files changed, 373 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/kernel-api.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index f1fa71e45c772..60e1566d57eb4 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -19,9 +19,12 @@
 来的大量 kerneldoc 信息；有朝一日，若有人有动力的话，应当把它们拆分
 出来。
 
-Todolist:
+.. toctree::
+   :maxdepth: 1
 
    kernel-api
+
+Todolist:
    workqueue
    printk-basics
    printk-formats
diff --git a/Documentation/translations/zh_CN/core-api/kernel-api.rst b/Documentation/translations/zh_CN/core-api/kernel-api.rst
new file mode 100644
index 0000000000000..d6f815ec265b8
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/kernel-api.rst
@@ -0,0 +1,369 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/kernel-api.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_kernel-api.rst:
+
+
+============
+Linux内核API
+============
+
+
+列表管理函数
+============
+
+该API在以下内核代码中:
+
+include/linux/list.h
+
+基本的C库函数
+=============
+
+在编写驱动程序时，一般不能使用C库中的例程。部分函数通常很有用，它们在
+下面被列出。这些函数的行为可能会与ANSI定义的略有不同，这些偏差会在文中
+注明。
+
+字符串转换
+----------
+
+该API在以下内核代码中:
+
+lib/vsprintf.c
+
+include/linux/kernel.h
+
+include/linux/kernel.h
+
+lib/kstrtox.c
+
+lib/string_helpers.c
+
+字符串处理
+----------
+
+该API在以下内核代码中:
+
+lib/string.c
+
+include/linux/string.h
+
+mm/util.c
+
+基本的内核库函数
+================
+
+Linux内核提供了很多实用的基本函数。
+
+位运算
+------
+
+该API在以下内核代码中:
+
+include/asm-generic/bitops/instrumented-atomic.h
+
+include/asm-generic/bitops/instrumented-non-atomic.h
+
+include/asm-generic/bitops/instrumented-lock.h
+
+位图运算
+--------
+
+该API在以下内核代码中:
+
+lib/bitmap.c
+
+include/linux/bitmap.h
+
+include/linux/bitmap.h
+
+include/linux/bitmap.h
+
+lib/bitmap.c
+
+lib/bitmap.c
+
+include/linux/bitmap.h
+
+命令行解析
+----------
+
+该API在以下内核代码中:
+
+lib/cmdline.c
+
+排序
+----
+
+该API在以下内核代码中:
+
+lib/sort.c
+
+lib/list_sort.c
+
+文本检索
+--------
+
+该API在以下内核代码中:
+
+lib/textsearch.c
+
+lib/textsearch.c
+
+include/linux/textsearch.h
+
+Linux中的CRC和数学函数
+======================
+
+
+CRC函数
+-------
+
+*译注：CRC，Cyclic Redundancy Check，循环冗余校验*
+
+该API在以下内核代码中:
+
+lib/crc4.c
+
+lib/crc7.c
+
+lib/crc8.c
+
+lib/crc16.c
+
+lib/crc32.c
+
+lib/crc-ccitt.c
+
+lib/crc-itu-t.c
+
+基数为2的对数和幂函数
+---------------------
+
+该API在以下内核代码中:
+
+include/linux/log2.h
+
+整数幂函数
+----------
+
+该API在以下内核代码中:
+
+lib/math/int_pow.c
+
+lib/math/int_sqrt.c
+
+除法函数
+--------
+
+该API在以下内核代码中:
+
+include/asm-generic/div64.h
+
+include/linux/math64.h
+
+lib/math/div64.c
+
+lib/math/gcd.c
+
+UUID/GUID
+---------
+
+该API在以下内核代码中:
+
+lib/uuid.c
+
+内核IPC设备
+===========
+
+IPC实用程序
+-----------
+
+该API在以下内核代码中:
+
+ipc/util.c
+
+FIFO 缓冲区
+===========
+
+kfifo接口
+---------
+
+该API在以下内核代码中:
+
+include/linux/kfifo.h
+
+转发接口支持
+============
+
+转发接口支持旨在为工具和设备提供一种有效的机制，将大量数据从内核空间
+转发到用户空间。
+
+转发接口
+--------
+
+该API在以下内核代码中:
+
+kernel/relay.c
+
+kernel/relay.c
+
+模块支持
+========
+
+模块加载
+--------
+
+该API在以下内核代码中:
+
+kernel/kmod.c
+
+模块接口支持
+------------
+
+更多信息请参考文件kernel/module.c。
+
+硬件接口
+========
+
+
+该API在以下内核代码中:
+
+kernel/dma.c
+
+资源管理
+--------
+
+该API在以下内核代码中:
+
+kernel/resource.c
+
+kernel/resource.c
+
+MTRR处理
+--------
+
+该API在以下内核代码中:
+
+arch/x86/kernel/cpu/mtrr/mtrr.c
+
+安全框架
+========
+
+该API在以下内核代码中:
+
+security/security.c
+
+security/inode.c
+
+审计接口
+========
+
+该API在以下内核代码中:
+
+kernel/audit.c
+
+kernel/auditsc.c
+
+kernel/auditfilter.c
+
+核算框架
+========
+
+该API在以下内核代码中:
+
+kernel/acct.c
+
+块设备
+======
+
+该API在以下内核代码中:
+
+block/blk-core.c
+
+block/blk-core.c
+
+block/blk-map.c
+
+block/blk-sysfs.c
+
+block/blk-settings.c
+
+block/blk-exec.c
+
+block/blk-flush.c
+
+block/blk-lib.c
+
+block/blk-integrity.c
+
+kernel/trace/blktrace.c
+
+block/genhd.c
+
+block/genhd.c
+
+字符设备
+========
+
+该API在以下内核代码中:
+
+fs/char_dev.c
+
+时钟框架
+========
+
+时钟框架定义了编程接口，以支持系统时钟树的软件管理。该框架广泛用于系统级芯片（SOC）平
+台，以支持电源管理和各种可能需要自定义时钟速率的设备。请注意，这些 “时钟”与计时或实
+时时钟(RTC)无关，它们都有单独的框架。这些:c:type: `struct clk <clk>` 实例可用于管理
+各种时钟信号，例如一个96理例如96MHz的时钟信号，该信号可被用于总线或外设的数据交换，或以
+其他方式触发系统硬件中的同步状态机转换。
+
+通过明确的软件时钟门控来支持电源管理：未使用的时钟被禁用，因此系统不会因为改变不在使用
+中的晶体管的状态而浪费电源。在某些系统中，这可能是由硬件时钟门控支持的，其中时钟被门控
+而不在软件中被禁用。芯片的部分，在供电但没有时钟的情况下，可能会保留其最后的状态。这种
+低功耗状态通常被称为*保留模式*。这种模式仍然会产生漏电流，特别是在电路几何结构较细的情
+况下，但对于CMOS电路来说，电能主要是随着时钟翻转而被消耗的。
+
+电源感知驱动程序只有在其管理的设备处于活动使用状态时才会启用时钟。此外，系统睡眠状态通
+常根据哪些时钟域处于活动状态而有所不同：“待机”状态可能允许从多个活动域中唤醒，而
+"mem"（暂停到RAM）状态可能需要更全面地关闭来自高速PLL和振荡器的时钟，从而限制了可能
+的唤醒事件源的数量。驱动器的暂停方法可能需要注意目标睡眠状态的系统特定时钟约束。
+
+一些平台支持可编程时钟发生器。这些可以被各种外部芯片使用，如其他CPU、多媒体编解码器以
+及对接口时钟有严格要求的设备。
+
+该API在以下内核代码中:
+
+include/linux/clk.h
+
+同步原语
+========
+
+读-复制-更新（RCU）
+-------------------
+
+该API在以下内核代码中:
+
+include/linux/rcupdate.h
+
+kernel/rcu/tree.c
+
+kernel/rcu/tree_exp.h
+
+kernel/rcu/update.c
+
+include/linux/srcu.h
+
+kernel/rcu/srcutree.c
+
+include/linux/rculist_bl.h
+
+include/linux/rculist.h
+
+include/linux/rculist_nulls.h
+
+include/linux/rcu_sync.h
+
+kernel/rcu/sync.c
-- 
GitLab


From 7c0066d132d237db6702804aa1fcb18fefcda00b Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Mon, 17 May 2021 16:46:14 +0800
Subject: [PATCH 0844/3804] docs/zh_CN: add core-api printk-basics.rst
 translation

This patch translates Documentation/core-api/printk-basics.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Link: https://lore.kernel.org/r/1610df76720b69f3f81bff27403e70e86c0875c5.1621239725.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   2 +-
 .../zh_CN/core-api/printk-basics.rst          | 110 ++++++++++++++++++
 2 files changed, 111 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/printk-basics.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index 60e1566d57eb4..5ecb23e3ba8c5 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -23,10 +23,10 @@
    :maxdepth: 1
 
    kernel-api
+   printk-basics
 
 Todolist:
    workqueue
-   printk-basics
    printk-formats
    symbol-namespaces
 
diff --git a/Documentation/translations/zh_CN/core-api/printk-basics.rst b/Documentation/translations/zh_CN/core-api/printk-basics.rst
new file mode 100644
index 0000000000000..2b20f6303a82a
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/printk-basics.rst
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/printk-basics.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_printk-basics.rst:
+
+
+==================
+使用printk记录消息
+==================
+
+printk()是Linux内核中最广为人知的函数之一。它是我们打印消息的标准工具，通常也是追踪和调试
+的最基本方法。如果你熟悉printf(3)，你就能够知道printk()是基于它的，尽管它在功能上有一些不
+同之处:
+
+  - printk() 消息可以指定日志级别。
+
+  - 格式字符串虽然与C99基本兼容，但并不遵循完全相同的规范。它有一些扩展和一些限制（没
+    有 ``%n`` 或浮点转换指定符）。参见:ref: `如何正确地获得printk格式指定符<printk-specifiers>` 。
+
+所有的printk()消息都会被打印到内核日志缓冲区，这是一个通过/dev/kmsg输出到用户空间的环
+形缓冲区。读取它的通常方法是使用 ``dmesg`` 。
+
+printk()的用法通常是这样的::
+
+  printk(KERN_INFO "Message: %s\n", arg);
+
+其中 ``KERN_INFO`` 是日志级别（注意，它与格式字符串连在一起，日志级别不是一个单独的参数）。
+可用的日志级别是：
+
+
++----------------+--------+-----------------------------------------------+
+| 名称           | 字符串 |  别名函数                                     |
++================+========+===============================================+
+| KERN_EMERG     | "0"    | pr_emerg()                                    |
++----------------+--------+-----------------------------------------------+
+| KERN_ALERT     | "1"    | pr_alert()                                    |
++----------------+--------+-----------------------------------------------+
+| KERN_CRIT      | "2"    | pr_crit()                                     |
++----------------+--------+-----------------------------------------------+
+| KERN_ERR       | "3"    | pr_err()                                      |
++----------------+--------+-----------------------------------------------+
+| KERN_WARNING   | "4"    | pr_warn()                                     |
++----------------+--------+-----------------------------------------------+
+| KERN_NOTICE    | "5"    | pr_notice()                                   |
++----------------+--------+-----------------------------------------------+
+| KERN_INFO      | "6"    | pr_info()                                     |
++----------------+--------+-----------------------------------------------+
+| KERN_DEBUG     | "7"    | pr_debug() and pr_devel()  若定义了DEBUG      |
++----------------+--------+-----------------------------------------------+
+| KERN_DEFAULT   | ""     |                                               |
++----------------+--------+-----------------------------------------------+
+| KERN_CONT      | "c"    | pr_cont()                                     |
++----------------+--------+-----------------------------------------------+
+
+
+日志级别指定了一条消息的重要性。内核根据日志级别和当前 *console_loglevel* （一个内核变量）决
+定是否立即显示消息（将其打印到当前控制台）。如果消息的优先级比 *console_loglevel* 高（日志级
+别值较低），消息将被打印到控制台。
+
+如果省略了日志级别，则以 ``KERN_DEFAULT`` 级别打印消息。
+
+你可以用以下方法检查当前的 *console_loglevel* ::
+
+  $ cat /proc/sys/kernel/printk
+  4        4        1        7
+
+结果显示了 *current*, *default*, *minimum* 和 *boot-time-default* 日志级别
+
+要改变当前的 console_loglevel，只需在 ``/proc/sys/kernel/printk`` 中写入所需的
+级别。例如，要打印所有的消息到控制台上::
+
+  # echo 8 > /proc/sys/kernel/printk
+
+另一种方式，使用 ``dmesg``::
+
+  # dmesg -n 5
+
+设置 console_loglevel 打印 KERN_WARNING (4) 或更严重的消息到控制台。更多消息参
+见 ``dmesg(1)`` 。
+
+作为printk()的替代方案，你可以使用 ``pr_*()`` 别名来记录日志。这个系列的宏在宏名中
+嵌入了日志级别。例如::
+
+  pr_info("Info message no. %d\n", msg_num);
+
+打印 ``KERN_INFO`` 消息。
+
+除了比等效的printk()调用更简洁之外，它们还可以通过pr_fmt()宏为格式字符串使用一个通用
+的定义。例如，在源文件的顶部（在任何  ``#include`` 指令之前）定义这样的内容。::
+
+  #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
+
+会在该文件中的每一条 pr_*() 消息前加上发起该消息的模块和函数名称。
+
+为了调试，还有两个有条件编译的宏：
+pr_debug()和pr_devel()，除非定义了 ``DEBUG`` (或者在pr_debug()的情况下定义了
+``CONFIG_DYNAMIC_DEBUG`` )，否则它们会被编译。
+
+
+函数接口
+========
+
+该API在以下内核代码中:
+
+kernel/printk/printk.c
+
+include/linux/printk.h
-- 
GitLab


From eb2e708b9727806893e379c091184270c5468a6c Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Mon, 17 May 2021 16:46:15 +0800
Subject: [PATCH 0845/3804] docs/zh_CN: add core-api printk-formats.rst
 translation

This patch translates Documentation/core-api/printk-formats.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Jiaxun Yang <jaixun.yang@flygoat.com>
Link: https://lore.kernel.org/r/b9dea1426e43d8848f1a8b7319c002418aebd3df.1621239725.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   2 +-
 .../zh_CN/core-api/printk-formats.rst         | 580 ++++++++++++++++++
 2 files changed, 581 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/printk-formats.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index 5ecb23e3ba8c5..c2d4614d9e682 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -24,10 +24,10 @@
 
    kernel-api
    printk-basics
+   printk-formats
 
 Todolist:
    workqueue
-   printk-formats
    symbol-namespaces
 
 数据结构和低级实用程序
diff --git a/Documentation/translations/zh_CN/core-api/printk-formats.rst b/Documentation/translations/zh_CN/core-api/printk-formats.rst
new file mode 100644
index 0000000000000..624a090e6ee5a
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/printk-formats.rst
@@ -0,0 +1,580 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/printk-formats.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_printk-formats.rst:
+
+
+==============================
+如何获得正确的printk格式占位符
+==============================
+
+
+
+:作者: Randy Dunlap <rdunlap@infradead.org>
+:作者: Andrew Murray <amurray@mpc-data.co.uk>
+
+
+整数类型
+========
+
+::
+
+	        若变量类型是Type，则使用printk格式占位符。
+	        -------------------------------------------
+		char			%d 或 %x
+		unsigned char		%u 或 %x
+		short int		%d 或 %x
+		unsigned short int	%u 或 %x
+		int			%d 或 %x
+		unsigned int		%u 或 %x
+		long			%ld 或 %lx
+		unsigned long		%lu 或 %lx
+		long long		%lld 或 %llx
+		unsigned long long	%llu 或 %llx
+		size_t			%zu 或 %zx
+		ssize_t			%zd 或 %zx
+		s8			%d 或 %x
+		u8			%u 或 %x
+		s16			%d 或 %x
+		u16			%u 或 %x
+		s32			%d 或 %x
+		u32			%u 或 %x
+		s64			%lld 或 %llx
+		u64			%llu 或 %llx
+
+
+如果 <type> 的大小依赖于配置选项 (例如 sector_t, blkcnt_t) 或其大小依赖于架构
+(例如 tcflag_t)，则使用其可能的最大类型的格式占位符并显式强制转换为它。
+
+例如
+
+::
+
+	printk("test: sector number/total blocks: %llu/%llu\n",
+		(unsigned long long)sector, (unsigned long long)blockcount);
+
+提醒：sizeof()返回类型为size_t。
+
+内核的printf不支持%n。显而易见，浮点格式(%e, %f, %g, %a)也不被识别。使用任何不
+支持的占位符或长度限定符都会导致一个WARN并且终止vsnprintf()执行。
+
+指针类型
+========
+
+一个原始指针值可以用%p打印，它将在打印前对地址进行哈希处理。内核也支持扩展占位符来打印
+不同类型的指针。
+
+一些扩展占位符会打印给定地址上的数据，而不是打印地址本身。在这种情况下，以下错误消息可能
+会被打印出来，而不是无法访问的消息::
+
+	(null)	 data on plain NULL address
+	(efault) data on invalid address
+	(einval) invalid data on a valid address
+
+普通指针
+----------
+
+::
+
+	%p	abcdef12 or 00000000abcdef12
+
+没有指定扩展名的指针（即没有修饰符的%p）被哈希（hash），以防止内核内存布局消息的泄露。这
+样还有一个额外的好处，就是提供一个唯一的标识符。在64位机器上，前32位被清零。当没有足够的
+熵进行散列处理时，内核将打印(ptrval)代替
+
+如果可能的话，使用专门的修饰符，如%pS或%pB（如下所述），以避免打印一个必须事后解释的非哈
+希地址。如果不可能，而且打印地址的目的是为调试提供更多的消息，使用%p，并在调试过程中
+用 ``no_hash_pointers`` 参数启动内核，这将打印所有未修改的%p地址。如果你 *真的* 想知
+道未修改的地址，请看下面的%px。
+
+如果（也只有在）你将地址作为虚拟文件的内容打印出来，例如在procfs或sysfs中（使用
+seq_printf()，而不是printk()）由用户空间进程读取，使用下面描述的%pK修饰符，不
+要用%p或%px。
+
+
+错误指针
+--------
+
+::
+
+	%pe	-ENOSPC
+
+用于打印错误指针(即IS_ERR()为真的指针)的符号错误名。不知道符号名的错误值会以十进制打印，
+而作为%pe参数传递的非ERR_PTR会被视为普通的%p。
+
+符号/函数指针
+-------------
+
+::
+
+	%pS	versatile_init+0x0/0x110
+	%ps	versatile_init
+	%pSR	versatile_init+0x9/0x110
+		(with __builtin_extract_return_addr() translation)
+	%pB	prev_fn_of_versatile_init+0x88/0x88
+
+
+``S`` 和 ``s`` 占位符用于打印符号格式的指针。它们的结果是符号名称带有(S)或不带有(s)偏移
+量。如果禁用KALLSYMS，则打印符号地址。
+
+``B`` 占位符的结果是带有偏移量的符号名，在打印堆栈回溯时应该使用。占位符将考虑编译器优化
+的影响，当使用尾部调用并使用noreturn GCC属性标记时，可能会发生这种优化。
+
+来自BPF / tracing追踪的探查指针
+----------------------------------
+
+::
+
+	%pks	kernel string
+	%pus	user string
+
+``k`` 和 ``u`` 指定符用于打印来自内核内存(k)或用户内存(u)的先前探测的内存。后面的 ``s`` 指
+定符的结果是打印一个字符串。对于直接在常规的vsnprintf()中使用时，(k)和(u)注释被忽略，但是，当
+在BPF的bpf_trace_printk()之外使用时，它会读取它所指向的内存，不会出现错误。
+
+内核指针
+--------
+
+::
+
+	%pK	01234567 or 0123456789abcdef
+
+用于打印应该对非特权用户隐藏的内核指针。%pK的行为取决于kptr_restrict sysctl——详见
+Documentation/admin-guide/sysctl/kernel.rst。
+
+未经修改的地址
+--------------
+
+::
+
+	%px	01234567 or 0123456789abcdef
+
+对于打印指针，当你 *真的* 想打印地址时。在用%px打印指针之前，请考虑你是否泄露了内核内
+存布局的敏感消息。%px在功能上等同于%lx（或%lu）。%px是首选，因为它在grep查找时更唯一。
+如果将来我们需要修改内核处理打印指针的方式，我们将能更好地找到调用点。
+
+在使用%px之前，请考虑使用%p并在调试过程中启用' ' no_hash_pointer ' '内核参数是否足
+够(参见上面的%p描述)。%px的一个有效场景可能是在panic发生之前立即打印消息，这样无论如何
+都可以防止任何敏感消息被利用，使用%px就不需要用no_hash_pointer来重现panic。
+
+指针差异
+--------
+
+::
+
+	%td	2560
+	%tx	a00
+
+为了打印指针的差异，使用ptrdiff_t的%t修饰符。
+
+例如::
+
+	printk("test: difference between pointers: %td\n", ptr2 - ptr1);
+
+结构体资源（Resources）
+-----------------------
+
+::
+
+	%pr	[mem 0x60000000-0x6fffffff flags 0x2200] or
+		[mem 0x0000000060000000-0x000000006fffffff flags 0x2200]
+	%pR	[mem 0x60000000-0x6fffffff pref] or
+		[mem 0x0000000060000000-0x000000006fffffff pref]
+
+用于打印结构体资源。 ``R`` 和 ``r`` 占位符的结果是打印出的资源带有（R）或不带有（r）解码标志
+成员。
+
+通过引用传递。
+
+物理地址类型 phys_addr_t
+------------------------
+
+::
+
+	%pa[p]	0x01234567 or 0x0123456789abcdef
+
+用于打印phys_addr_t类型（以及它的衍生物，如resource_size_t），该类型可以根据构建选项而
+变化，无论CPU数据真实物理地址宽度如何。
+
+通过引用传递。
+
+DMA地址类型dma_addr_t
+---------------------
+
+::
+
+	%pad	0x01234567 or 0x0123456789abcdef
+
+用于打印dma_addr_t类型，该类型可以根据构建选项而变化，而不考虑CPU数据路径的宽度。
+
+通过引用传递。
+
+原始缓冲区为转义字符串
+----------------------
+
+::
+
+	%*pE[achnops]
+
+用于将原始缓冲区打印成转义字符串。对于以下缓冲区::
+
+		1b 62 20 5c 43 07 22 90 0d 5d
+
+几个例子展示了如何进行转换（不包括两端的引号）。::
+
+		%*pE		"\eb \C\a"\220\r]"
+		%*pEhp		"\x1bb \C\x07"\x90\x0d]"
+		%*pEa		"\e\142\040\\\103\a\042\220\r\135"
+
+转换规则是根据可选的标志组合来应用的(详见:c:func:`string_escape_mem` 内核文档):
+
+	- a - ESCAPE_ANY
+	- c - ESCAPE_SPECIAL
+	- h - ESCAPE_HEX
+	- n - ESCAPE_NULL
+	- o - ESCAPE_OCTAL
+	- p - ESCAPE_NP
+	- s - ESCAPE_SPACE
+
+默认情况下，使用 ESCAPE_ANY_NP。
+
+ESCAPE_ANY_NP是许多情况下的明智选择，特别是对于打印SSID。
+
+如果字段宽度被省略，那么将只转义1个字节。
+
+原始缓冲区为十六进制字符串
+--------------------------
+
+::
+
+	%*ph	00 01 02  ...  3f
+	%*phC	00:01:02: ... :3f
+	%*phD	00-01-02- ... -3f
+	%*phN	000102 ... 3f
+
+对于打印小的缓冲区（最长64个字节），可以用一定的分隔符作为一个
+十六进制字符串。对于较大的缓冲区，可以考虑使用
+:c:func:`print_hex_dump` 。
+
+MAC/FDDI地址
+------------
+
+::
+
+	%pM	00:01:02:03:04:05
+	%pMR	05:04:03:02:01:00
+	%pMF	00-01-02-03-04-05
+	%pm	000102030405
+	%pmR	050403020100
+
+用于打印以十六进制表示的6字节MAC/FDDI地址。 ``M`` 和 ``m`` 占位符导致打印的
+地址有(M)或没有(m)字节分隔符。默认的字节分隔符是冒号（：）。
+
+对于FDDI地址，可以在 ``M`` 占位符之后使用 ``F`` 说明，以使用破折号(——)分隔符
+代替默认的分隔符。
+
+对于蓝牙地址， ``R`` 占位符应使用在 ``M`` 占位符之后，以使用反转的字节顺序，适
+合于以小尾端顺序的蓝牙地址的肉眼可见的解析。
+
+通过引用传递。
+
+IPv4地址
+--------
+
+::
+
+	%pI4	1.2.3.4
+	%pi4	001.002.003.004
+	%p[Ii]4[hnbl]
+
+用于打印IPv4点分隔的十进制地址。 ``I4`` 和 ``i4`` 占位符的结果是打印的地址
+有(i4)或没有(I4)前导零。
+
+附加的 ``h`` 、 ``n`` 、 ``b`` 和 ``l`` 占位符分别用于指定主机、网络、大
+尾端或小尾端地址。如果没有提供占位符，则使用默认的网络/大尾端顺序。
+
+通过引用传递。
+
+IPv6 地址
+---------
+
+::
+
+	%pI6	0001:0002:0003:0004:0005:0006:0007:0008
+	%pi6	00010002000300040005000600070008
+	%pI6c	1:2:3:4:5:6:7:8
+
+用于打印IPv6网络顺序的16位十六进制地址。 ``I6`` 和 ``i6`` 占位符的结果是
+打印的地址有(I6)或没有(i6)分号。始终使用前导零。
+
+额外的 ``c`` 占位符可与 ``I`` 占位符一起使用，以打印压缩的IPv6地址，如
+https://tools.ietf.org/html/rfc5952 所述
+
+通过引用传递。
+
+IPv4/IPv6地址(generic, with port, flowinfo, scope)
+--------------------------------------------------
+
+::
+
+	%pIS	1.2.3.4		or 0001:0002:0003:0004:0005:0006:0007:0008
+	%piS	001.002.003.004	or 00010002000300040005000600070008
+	%pISc	1.2.3.4		or 1:2:3:4:5:6:7:8
+	%pISpc	1.2.3.4:12345	or [1:2:3:4:5:6:7:8]:12345
+	%p[Ii]S[pfschnbl]
+
+用于打印一个IP地址，不需要区分它的类型是AF_INET还是AF_INET6。一个指向有效结构
+体sockaddr的指针，通过 ``IS`` 或 ``IS`` 指定，可以传递给这个格式占位符。
+
+附加的 ``p`` 、  ``f`` 和 ``s`` 占位符用于指定port(IPv4, IPv6)、
+flowinfo (IPv6)和sope(IPv6)。port有一个 ``:`` 前缀，flowinfo是 ``/`` 和
+范围是 ``%`` ，每个后面都跟着实际的值。
+
+对于IPv6地址，如果指定了额外的指定符 ``c`` ，则使用
+https://tools.ietf.org/html/rfc5952 描述的压缩IPv6地址。
+如https://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-07
+所建议的，IPv6地址由'['，']'包围，以防止出现额外的占位符 ``p`` ， ``f`` 或 ``s`` 。
+
+对于IPv4地址，也可以使用额外的 ``h`` ， ``n`` ， ``b`` 和 ``l`` 说
+明符，但对于IPv6地址则忽略。
+
+通过引用传递。
+
+更多例子::
+
+	%pISfc		1.2.3.4		or [1:2:3:4:5:6:7:8]/123456789
+	%pISsc		1.2.3.4		or [1:2:3:4:5:6:7:8]%1234567890
+	%pISpfc		1.2.3.4:12345	or [1:2:3:4:5:6:7:8]:12345/123456789
+
+UUID/GUID地址
+-------------
+
+::
+
+	%pUb	00010203-0405-0607-0809-0a0b0c0d0e0f
+	%pUB	00010203-0405-0607-0809-0A0B0C0D0E0F
+	%pUl	03020100-0504-0706-0809-0a0b0c0e0e0f
+	%pUL	03020100-0504-0706-0809-0A0B0C0E0E0F
+
+用于打印16字节的UUID/GUIDs地址。附加的 ``l`` , ``L`` , ``b`` 和 ``B`` 占位符用
+于指定小写(l)或大写(L)十六进制表示法中的小尾端顺序，以及小写(b)或大写(B)十六进制表
+示法中的大尾端顺序。
+
+如果没有使用额外的占位符，则将打印带有小写十六进制表示法的默认大端顺序。
+
+通过引用传递。
+
+目录项（dentry）的名称
+----------------------
+
+::
+
+	%pd{,2,3,4}
+	%pD{,2,3,4}
+
+用于打印dentry名称；如果我们用 :c:func:`d_move` 和它比较，名称可能是新旧混合的，但
+不会oops。 %pd dentry比较安全，其相当于我们以前用的%s dentry->d_name.name，%pd<n>打
+印 ``n`` 最后的组件。 %pD对结构文件做同样的事情。
+
+
+通过引用传递。
+
+块设备（block_device）名称
+--------------------------
+
+::
+
+	%pg	sda, sda1 or loop0p1
+
+用于打印block_device指针的名称。
+
+va_format结构体
+---------------
+
+::
+
+	%pV
+
+用于打印结构体va_format。这些结构包含一个格式字符串
+和va_list如下
+
+::
+
+	struct va_format {
+		const char *fmt;
+		va_list *va;
+	};
+
+实现 "递归vsnprintf"。
+
+如果没有一些机制来验证格式字符串和va_list参数的正确性，请不要使用这个功能。
+
+通过引用传递。
+
+设备树节点
+----------
+
+::
+
+	%pOF[fnpPcCF]
+
+
+用于打印设备树节点结构。默认行为相当于%pOFf。
+
+	- f - 设备节点全称
+	- n - 设备节点名
+	- p - 设备节点句柄
+	- P - 设备节点路径规范(名称+@单位)
+	- F - 设备节点标志
+	- c - 主要兼容字符串
+	- C - 全兼容字符串
+
+当使用多个参数时，分隔符是':'。
+
+例如
+
+::
+
+	%pOF	/foo/bar@0			- Node full name
+	%pOFf	/foo/bar@0			- Same as above
+	%pOFfp	/foo/bar@0:10			- Node full name + phandle
+	%pOFfcF	/foo/bar@0:foo,device:--P-	- Node full name +
+	                                          major compatible string +
+						  node flags
+							D - dynamic
+							d - detached
+							P - Populated
+							B - Populated bus
+
+通过引用传递。
+
+Fwnode handles
+--------------
+
+::
+
+	%pfw[fP]
+
+用于打印fwnode_handles的消息。默认情况下是打印完整的节点名称，包括路径。
+这些修饰符在功能上等同于上面的%pOF。
+
+	- f - 节点的全名，包括路径。
+	- P - 节点名称，包括地址（如果有的话）。
+
+例如 (ACPI)
+
+::
+
+	%pfwf	\_SB.PCI0.CIO2.port@1.endpoint@0	- Full node name
+	%pfwP	endpoint@0				- Node name
+
+例如 (OF)
+
+::
+
+	%pfwf	/ocp@68000000/i2c@48072000/camera@10/port/endpoint - Full name
+	%pfwP	endpoint				- Node name
+
+时间和日期
+----------
+
+::
+
+	%pt[RT]			YYYY-mm-ddTHH:MM:SS
+	%pt[RT]d		YYYY-mm-dd
+	%pt[RT]t		HH:MM:SS
+	%pt[RT][dt][r]
+
+用于打印日期和时间::
+
+	R  struct rtc_time structure
+	T  time64_t type
+
+以我们（人类）可读的格式。
+
+默认情况下，年将以1900为单位递增，月将以1为单位递增。 使用%pt[RT]r (raw)
+来抑制这种行为。
+
+通过引用传递。
+
+clk结构体
+---------
+
+::
+
+	%pC	pll1
+	%pCn	pll1
+
+用于打印clk结构。%pC 和 %pCn 打印时钟的名称（通用时钟框架）或唯一的32位
+ID（传统时钟框架）。
+
+通过引用传递。
+
+位图及其衍生物，如cpumask和nodemask
+-----------------------------------
+
+::
+
+	%*pb	0779
+	%*pbl	0,3-6,8-10
+
+对于打印位图（bitmap）及其派生的cpumask和nodemask，%*pb输出以字段宽度为位数的位图，
+%*pbl输出以字段宽度为位数的范围列表。
+
+字段宽度用值传递，位图用引用传递。可以使用辅助宏cpumask_pr_args()和
+nodemask_pr_args()来方便打印cpumask和nodemask。
+
+标志位字段，如页标志、gfp_flags
+-------------------------------
+
+::
+
+	%pGp	referenced|uptodate|lru|active|private|node=0|zone=2|lastcpupid=0x1fffff
+	%pGg	GFP_USER|GFP_DMA32|GFP_NOWARN
+	%pGv	read|exec|mayread|maywrite|mayexec|denywrite
+
+将flags位字段打印为构造值的符号常量集合。标志的类型由第三个字符给出。目前支持的
+是[p]age flags， [v]ma_flags(都期望 ``unsigned long *`` )和
+[g]fp_flags(期望 ``gfp_t *`` )。标志名称和打印顺序取决于特定的类型。
+
+注意，这种格式不应该直接用于跟踪点的:c:func:`TP_printk()` 部分。相反，应使
+用 <trace/events/mmflags.h>中的show_*_flags()函数。
+
+通过引用传递。
+
+网络设备特性
+------------
+
+::
+
+	%pNF	0x000000000000c000
+
+用于打印netdev_features_t。
+
+通过引用传递。
+
+V4L2和DRM FourCC代码(像素格式)
+------------------------------
+
+::
+
+	%p4cc
+
+打印V4L2或DRM使用的FourCC代码，包括格式端序及其十六进制的数值。
+
+通过引用传递。
+
+例如::
+
+	%p4cc	BG12 little-endian (0x32314742)
+	%p4cc	Y10  little-endian (0x20303159)
+	%p4cc	NV12 big-endian (0xb231564e)
+
+谢谢
+====
+
+如果您添加了其他%p扩展，请在可行的情况下，用一个或多个测试用例扩展<lib/test_printf.c>。
+
+谢谢你的合作和关注。
-- 
GitLab


From b345b9ab1d0c7811f96d87cfb87a6cda01624b16 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Mon, 17 May 2021 21:29:27 +0800
Subject: [PATCH 0846/3804] docs/zh_CN: add core-api workqueue.rst translation

This patch translates Documentation/core-api/workqueue.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/20210517132927.3461185-1-siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   2 +-
 .../translations/zh_CN/core-api/workqueue.rst | 337 ++++++++++++++++++
 2 files changed, 338 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/workqueue.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index c2d4614d9e682..34be9b25cfa1f 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -25,9 +25,9 @@
    kernel-api
    printk-basics
    printk-formats
+   workqueue
 
 Todolist:
-   workqueue
    symbol-namespaces
 
 数据结构和低级实用程序
diff --git a/Documentation/translations/zh_CN/core-api/workqueue.rst b/Documentation/translations/zh_CN/core-api/workqueue.rst
new file mode 100644
index 0000000000000..0b8f730db6c0d
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/workqueue.rst
@@ -0,0 +1,337 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/workqueue.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_workqueue.rst:
+
+
+=========================
+并发管理的工作队列 (cmwq)
+=========================
+
+:日期: September, 2010
+:作者: Tejun Heo <tj@kernel.org>
+:作者: Florian Mickler <florian@mickler.org>
+
+
+简介
+====
+
+在很多情况下，需要一个异步进程的执行环境，工作队列（wq）API是这种情况下
+最常用的机制。
+
+当需要这样一个异步执行上下文时，一个描述将要执行的函数的工作项（work，
+即一个待执行的任务）被放在队列中。一个独立的线程作为异步执行环境。该队
+列被称为workqueue，线程被称为工作者（worker，即执行这一队列的线程）。
+
+当工作队列上有工作项时，工作者会一个接一个地执行与工作项相关的函数。当
+工作队列中没有任何工作项时，工作者就会变得空闲。当一个新的工作项被排入
+队列时，工作者又开始执行。
+
+
+为什么要cmwq?
+=============
+
+在最初的wq实现中，多线程（MT）wq在每个CPU上有一个工作者线程，而单线程
+（ST）wq在全系统有一个工作者线程。一个MT wq需要保持与CPU数量相同的工
+作者数量。这些年来，内核增加了很多MT wq的用户，随着CPU核心数量的不断
+增加，一些系统刚启动就达到了默认的32k PID的饱和空间。
+
+尽管MT wq浪费了大量的资源，但所提供的并发性水平却不能令人满意。这个限
+制在ST和MT wq中都有，只是在MT中没有那么严重。每个wq都保持着自己独立的
+工作者池。一个MT wq只能为每个CPU提供一个执行环境，而一个ST wq则为整个
+系统提供一个。工作项必须竞争这些非常有限的执行上下文，从而导致各种问题，
+包括在单一执行上下文周围容易发生死锁。
+
+(MT wq)所提供的并发性水平和资源使用之间的矛盾也迫使其用户做出不必要的权衡，比
+如libata选择使用ST wq来轮询PIO，并接受一个不必要的限制，即没有两个轮
+询PIO可以同时进行。由于MT wq并没有提供更好的并发性，需要更高层次的并
+发性的用户，如async或fscache，不得不实现他们自己的线程池。
+
+并发管理工作队列（cmwq）是对wq的重新实现，重点是以下目标。
+
+* 保持与原始工作队列API的兼容性。
+
+* 使用由所有wq共享的每CPU统一的工作者池，在不浪费大量资源的情况下按
+* 需提供灵活的并发水平。
+
+* 自动调节工作者池和并发水平，使API用户不需要担心这些细节。
+
+
+设计
+====
+
+为了简化函数的异步执行，引入了一个新的抽象概念，即工作项。
+
+一个工作项是一个简单的结构，它持有一个指向将被异步执行的函数的指针。
+每当一个驱动程序或子系统希望一个函数被异步执行时，它必须建立一个指
+向该函数的工作项，并在工作队列中排队等待该工作项。（就是挂到workqueue
+队列里面去）
+
+特定目的线程，称为工作线程（工作者），一个接一个地执行队列中的功能。
+如果没有工作项排队，工作者线程就会闲置。这些工作者线程被管理在所谓
+的工作者池中。
+
+cmwq设计区分了面向用户的工作队列，子系统和驱动程序在上面排队工作，
+以及管理工作者池和处理排队工作项的后端机制。
+
+每个可能的CPU都有两个工作者池，一个用于正常的工作项，另一个用于高
+优先级的工作项，还有一些额外的工作者池，用于服务未绑定工作队列的工
+作项目——这些后备池的数量是动态的。
+
+当他们认为合适的时候，子系统和驱动程序可以通过特殊的
+``workqueue API`` 函数创建和排队工作项。他们可以通过在工作队列上
+设置标志来影响工作项执行方式的某些方面，他们把工作项放在那里。这些
+标志包括诸如CPU定位、并发限制、优先级等等。要获得详细的概述，请参
+考下面的 ``alloc_workqueue()`` 的 API 描述。
+
+当一个工作项被排入一个工作队列时，目标工作池将根据队列参数和工作队
+列属性确定，并被附加到工作池的共享工作列表上。例如，除非特别重写，
+否则一个绑定的工作队列的工作项将被排在与发起线程运行的CPU相关的普
+通或高级工作工作者池的工作项列表中。
+
+对于任何工作者池的实施，管理并发水平（有多少执行上下文处于活动状
+态）是一个重要问题。最低水平是为了节省资源，而饱和水平是指系统被
+充分使用。
+
+每个与实际CPU绑定的worker-pool通过钩住调度器来实现并发管理。每当
+一个活动的工作者被唤醒或睡眠时，工作者池就会得到通知，并跟踪当前可
+运行的工作者的数量。一般来说，工作项不会占用CPU并消耗很多周期。这
+意味着保持足够的并发性以防止工作处理停滞应该是最优的。只要CPU上有
+一个或多个可运行的工作者，工作者池就不会开始执行新的工作，但是，当
+最后一个运行的工作者进入睡眠状态时，它会立即安排一个新的工作者，这
+样CPU就不会在有待处理的工作项目时闲置。这允许在不损失执行带宽的情
+况下使用最少的工作者。
+
+除了kthreads的内存空间外，保留空闲的工作者并没有其他成本，所以cmwq
+在杀死它们之前会保留一段时间的空闲。
+
+对于非绑定的工作队列，后备池的数量是动态的。可以使用
+``apply_workqueue_attrs()`` 为非绑定工作队列分配自定义属性，
+workqueue将自动创建与属性相匹配的后备工作者池。调节并发水平的责任在
+用户身上。也有一个标志可以将绑定的wq标记为忽略并发管理。
+详情请参考API部分。
+
+前进进度的保证依赖于当需要更多的执行上下文时可以创建工作者，这也是
+通过使用救援工作者来保证的。所有可能在处理内存回收的代码路径上使用
+的工作项都需要在wq上排队，wq上保留了一个救援工作者，以便在内存有压
+力的情况下下执行。否则，工作者池就有可能出现死锁，等待执行上下文释
+放出来。
+
+
+应用程序编程接口 (API)
+======================
+
+``alloc_workqueue()`` 分配了一个wq。原来的 ``create_*workqueue()``
+函数已被废弃，并计划删除。 ``alloc_workqueue()`` 需要三个
+参数 - ``@name`` , ``@flags`` 和 ``@max_active`` 。
+``@name`` 是wq的名称，如果有的话，也用作救援线程的名称。
+
+一个wq不再管理执行资源，而是作为前进进度保证、刷新(flush)和
+工作项属性的域。 ``@flags`` 和 ``@max_active`` 控制着工作
+项如何被分配执行资源、安排和执行。
+
+
+``flags``
+---------
+
+``WQ_UNBOUND``
+  排队到非绑定wq的工作项由特殊的工作者池提供服务，这些工作者不
+  绑定在任何特定的CPU上。这使得wq表现得像一个简单的执行环境提
+  供者，没有并发管理。非绑定工作者池试图尽快开始执行工作项。非
+  绑定的wq牺牲了局部性，但在以下情况下是有用的。
+
+  * 预计并发水平要求会有很大的波动，使用绑定的wq最终可能会在不
+    同的CPU上产生大量大部分未使用的工作者，因为发起线程在不同
+    的CPU上跳转。
+
+  * 长期运行的CPU密集型工作负载，可以由系统调度器更好地管理。
+
+``WQ_FREEZABLE``
+  一个可冻结的wq参与了系统暂停操作的冻结阶段。wq上的工作项被
+  排空，在解冻之前没有新的工作项开始执行。
+
+``WQ_MEM_RECLAIM``
+  所有可能在内存回收路径中使用的wq都必须设置这个标志。无论内
+  存压力如何，wq都能保证至少有一个执行上下文。
+
+``WQ_HIGHPRI``
+  高优先级wq的工作项目被排到目标cpu的高优先级工作者池中。高
+  优先级的工作者池由具有较高级别的工作者线程提供服务。
+
+  请注意，普通工作者池和高优先级工作者池之间并不相互影响。他
+  们各自维护其独立的工作者池，并在其工作者之间实现并发管理。
+
+``WQ_CPU_INTENSIVE``
+  CPU密集型wq的工作项对并发水平没有贡献。换句话说，可运行的
+  CPU密集型工作项不会阻止同一工作者池中的其他工作项开始执行。
+  这对于那些预计会占用CPU周期的绑定工作项很有用，这样它们的
+  执行就会受到系统调度器的监管。
+
+  尽管CPU密集型工作项不会对并发水平做出贡献，但它们的执行开
+  始仍然受到并发管理的管制，可运行的非CPU密集型工作项会延迟
+  CPU密集型工作项的执行。
+
+  这个标志对于未绑定的wq来说是没有意义的。
+
+请注意，标志 ``WQ_NON_REENTRANT`` 不再存在，因为现在所有的工作
+队列都是不可逆的——任何工作项都保证在任何时间内最多被整个系统的一
+个工作者执行。
+
+
+``max_active``
+--------------
+
+``@max_active`` 决定了每个CPU可以分配给wq的工作项的最大执行上
+下文数量。例如，如果 ``@max_active为16`` ，每个CPU最多可以同
+时执行16个wq的工作项。
+
+目前，对于一个绑定的wq， ``@max_active`` 的最大限制是512，当指
+定为0时使用的默认值是256。对于非绑定的wq，其限制是512和
+4 * ``num_possible_cpus()`` 中的较高值。这些值被选得足够高，所
+以它们不是限制性因素，同时会在失控情况下提供保护。
+
+一个wq的活动工作项的数量通常由wq的用户来调节，更具体地说，是由用
+户在同一时间可以排列多少个工作项来调节。除非有特定的需求来控制活动
+工作项的数量，否则建议指定 为"0"。
+
+一些用户依赖于ST wq的严格执行顺序。 ``@max_active`` 为1和 ``WQ_UNBOUND``
+的组合用来实现这种行为。这种wq上的工作项目总是被排到未绑定的工作池
+中，并且在任何时候都只有一个工作项目处于活动状态，从而实现与ST wq相
+同的排序属性。
+
+在目前的实现中，上述配置只保证了特定NUMA节点内的ST行为。相反，
+``alloc_ordered_queue()`` 应该被用来实现全系统的ST行为。
+
+
+执行场景示例
+============
+
+下面的示例执行场景试图说明cmwq在不同配置下的行为。
+
+ 工作项w0、w1、w2被排到同一个CPU上的一个绑定的wq q0上。w0
+ 消耗CPU 5ms，然后睡眠10ms，然后在完成之前再次消耗CPU 5ms。
+
+忽略所有其他的任务、工作和处理开销，并假设简单的FIFO调度，
+下面是一个高度简化的原始wq的可能事件序列的版本。::
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 starts and burns CPU
+ 25		w1 sleeps
+ 35		w1 wakes up and finishes
+ 35		w2 starts and burns CPU
+ 40		w2 sleeps
+ 50		w2 wakes up and finishes
+
+And with cmwq with ``@max_active`` >= 3, ::
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 10		w2 starts and burns CPU
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+如果 ``@max_active`` == 2, ::
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 20		w2 starts and burns CPU
+ 25		w2 sleeps
+ 35		w2 wakes up and finishes
+
+现在，我们假设w1和w2被排到了不同的wq q1上，这个wq q1
+有 ``WQ_CPU_INTENSIVE`` 设置::
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 and w2 start and burn CPU
+ 10		w1 sleeps
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+
+指南
+====
+
+* 如果一个wq可能处理在内存回收期间使用的工作项目，请不
+  要忘记使用 ``WQ_MEM_RECLAIM`` 。每个设置了
+  ``WQ_MEM_RECLAIM`` 的wq都有一个为其保留的执行环境。
+  如果在内存回收过程中使用的多个工作项之间存在依赖关系，
+  它们应该被排在不同的wq中，每个wq都有 ``WQ_MEM_RECLAIM`` 。
+
+* 除非需要严格排序，否则没有必要使用ST wq。
+
+* 除非有特殊需要，建议使用0作为@max_active。在大多数使用情
+  况下，并发水平通常保持在默认限制之下。
+
+* 一个wq作为前进进度保证（WQ_MEM_RECLAIM，冲洗（flush）和工
+  作项属性的域。不涉及内存回收的工作项，不需要作为工作项组的一
+  部分被刷新，也不需要任何特殊属性，可以使用系统中的一个wq。使
+  用专用wq和系统wq在执行特性上没有区别。
+
+* 除非工作项预计会消耗大量的CPU周期，否则使用绑定的wq通常是有
+  益的，因为wq操作和工作项执行中的定位水平提高了。
+
+
+调试
+====
+
+因为工作函数是由通用的工作者线程执行的，所以需要一些手段来揭示一些行为不端的工作队列用户。
+
+工作者线程在进程列表中显示为: ::
+
+  root      5671  0.0  0.0      0     0 ?        S    12:07   0:00 [kworker/0:1]
+  root      5672  0.0  0.0      0     0 ?        S    12:07   0:00 [kworker/1:2]
+  root      5673  0.0  0.0      0     0 ?        S    12:12   0:00 [kworker/0:0]
+  root      5674  0.0  0.0      0     0 ?        S    12:13   0:00 [kworker/1:0]
+
+如果kworkers失控了（使用了太多的cpu），有两类可能的问题:
+
+	1. 正在迅速调度的事情
+	2. 一个消耗大量cpu周期的工作项。
+
+第一个可以用追踪的方式进行跟踪: ::
+
+	$ echo workqueue:workqueue_queue_work > /sys/kernel/debug/tracing/set_event
+	$ cat /sys/kernel/debug/tracing/trace_pipe > out.txt
+	(wait a few secs)
+
+如果有什么东西在工作队列上忙着做循环，它就会主导输出，可以用工作项函数确定违规者。
+
+对于第二类问题，应该可以只检查违规工作者线程的堆栈跟踪。 ::
+
+	$ cat /proc/THE_OFFENDING_KWORKER/stack
+
+工作项函数在堆栈追踪中应该是微不足道的。
+
+
+内核内联文档参考
+================
+
+该API在以下内核代码中:
+
+include/linux/workqueue.h
+
+kernel/workqueue.c
-- 
GitLab


From 6586f2d8cec186ef0af7cf6a0738293fea048ed8 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Mon, 17 May 2021 21:37:48 +0800
Subject: [PATCH 0847/3804] docs/zh_CN: add core api kobject translation

This patch translates Documentation/core-api/kobject.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/20210517133748.3461357-1-siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   6 +-
 .../translations/zh_CN/core-api/kobject.rst   | 378 ++++++++++++++++++
 2 files changed, 383 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/kobject.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index 34be9b25cfa1f..a1dd792e46f76 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -35,9 +35,13 @@ Todolist:
 
 在整个内核中使用的函数库。
 
-Todolist:
+.. toctree::
+   :maxdepth: 1
 
    kobject
+
+Todolist:
+
    kref
    assoc_array
    xarray
diff --git a/Documentation/translations/zh_CN/core-api/kobject.rst b/Documentation/translations/zh_CN/core-api/kobject.rst
new file mode 100644
index 0000000000000..f0e6a4aeb3722
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/kobject.rst
@@ -0,0 +1,378 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/kobject.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_core_api_kobject.rst:
+
+=======================================================
+关于kobjects、ksets和ktypes的一切你没想过需要了解的东西
+=======================================================
+
+:作者: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+:最后一次更新: December 19, 2007
+
+根据Jon Corbet于2003年10月1日为lwn.net撰写的原创文章改编，网址是：
+https://lwn.net/Articles/51437/
+
+理解驱动模型和建立在其上的kobject抽象的部分的困难在于，没有明显的切入点。
+处理kobjects需要理解一些不同的类型，所有这些类型都会相互引用。为了使事情
+变得更简单，我们将多路并进，从模糊的术语开始，并逐渐增加细节。那么，先来
+了解一些我们将要使用的术语的简明定义吧。
+
+ - 一个kobject是一个kobject结构体类型的对象。Kobjects有一个名字和一个
+   引用计数。一个kobject也有一个父指针（允许对象被排列成层次结构），一个
+   特定的类型，并且，通常在sysfs虚拟文件系统中表示。
+
+  Kobjects本身通常并不引人关注；相反它们常常被嵌入到其他包含真正引人注目
+  的代码的结构体中。
+
+  任何结构体都 **不应该** 有一个以上的kobject嵌入其中。如果有的话，对象的引用计
+  数肯定会被打乱，而且不正确，你的代码就会出现错误。所以不要这样做。
+
+ - ktype是嵌入一个kobject的对象的类型。每个嵌入kobject的结构体都需要一个
+   相应的ktype。ktype控制着kobject在被创建和销毁时的行为。
+
+ - 一个kset是一组kobjects。这些kobjects可以是相同的ktype或者属于不同的
+   ktype。kset是kobjects集合的基本容器类型。Ksets包含它们自己的kobjects，
+   但你可以安全地忽略这个实现细节，因为kset的核心代码会自动处理这个kobject。
+
+ 当你看到一个下面全是其他目录的sysfs目录时，通常这些目录中的每一个都对应
+ 于同一个kset中的一个kobject。
+
+ 我们将研究如何创建和操作所有这些类型。将采取一种自下而上的方法，所以我们
+ 将回到kobjects。
+
+
+嵌入kobjects
+=============
+
+内核代码很少创建孤立的kobject，只有一个主要的例外，下面会解释。相反，
+kobjects被用来控制对一个更大的、特定领域的对象的访问。为此，kobjects会被
+嵌入到其他结构中。如果你习惯于用面向对象的术语来思考问题，那么kobjects可
+以被看作是一个顶级的抽象类，其他的类都是从它派生出来的。一个kobject实现了
+一系列的功能，这些功能本身并不特别有用，但在其他对象中却很好用。C语言不允
+许直接表达继承，所以必须使用其他技术——比如结构体嵌入。
+
+（对于那些熟悉内核链表实现的人来说，这类似于“list_head”结构本身很少有用，
+但总是被嵌入到感兴趣的更大的对象中）。
+
+例如， ``drivers/uio/uio.c`` 中的IO代码有一个结构体，定义了与uio设备相
+关的内存区域::
+
+    struct uio_map {
+            struct kobject kobj;
+            struct uio_mem *mem;
+    };
+
+如果你有一个uio_map结构体，找到其嵌入的kobject只是一个使用kobj成员的问题。
+然而，与kobjects一起工作的代码往往会遇到相反的问题：给定一个结构体kobject
+的指针，指向包含结构体的指针是什么？你必须避免使用一些技巧（比如假设
+kobject在结构的开头），相反，你得使用container_of()宏，其可以在 ``<linux/kernel.h>``
+中找到::
+
+    container_of(ptr, type, member)
+
+其中:
+
+  * ``ptr`` 是一个指向嵌入kobject的指针，
+  * ``type`` 是包含结构体的类型，
+  * ``member`` 是 ``指针`` 所指向的结构体域的名称。
+
+container_of()的返回值是一个指向相应容器类型的指针。因此，例如，一个嵌入到
+uio_map结构 **中** 的kobject结构体的指针kp可以被转换为一个指向 **包含** uio_map
+结构体的指针，方法是::
+
+    struct uio_map *u_map = container_of(kp, struct uio_map, kobj);
+
+为了方便起见，程序员经常定义一个简单的宏，用于将kobject指针 **反推** 到包含
+类型。在早期的 ``drivers/uio/uio.c`` 中正是如此，你可以在这里看到::
+
+    struct uio_map {
+            struct kobject kobj;
+            struct uio_mem *mem;
+    };
+
+    #define to_map(map) container_of(map, struct uio_map, kobj)
+
+其中宏的参数“map”是一个指向有关的kobject结构体的指针。该宏随后被调用::
+
+    struct uio_map *map = to_map(kobj);
+
+
+kobjects的初始化
+================
+
+当然，创建kobject的代码必须初始化该对象。一些内部字段是通过（强制）调用kobject_init()
+来设置的::
+
+    void kobject_init(struct kobject *kobj, struct kobj_type *ktype);
+
+ktype是正确创建kobject的必要条件，因为每个kobject都必须有一个相关的kobj_type。
+在调用kobject_init()后，为了向sysfs注册kobject，必须调用函数kobject_add()::
+
+    int kobject_add(struct kobject *kobj, struct kobject *parent,
+                    const char *fmt, ...);
+
+这将正确设置kobject的父级和kobject的名称。如果该kobject要与一个特定的kset相关
+联，在调用kobject_add()之前必须分配kobj->kset。如果kset与kobject相关联，则
+kobject的父级可以在调用kobject_add()时被设置为NULL，则kobject的父级将是kset
+本身。
+
+由于kobject的名字是在它被添加到内核时设置的，所以kobject的名字不应该被直接操作。
+如果你必须改变kobject的名字，请调用kobject_rename()::
+
+    int kobject_rename(struct kobject *kobj, const char *new_name);
+
+kobject_rename()函数不会执行任何锁定操作，也不会对name进行可靠性检查，所以调用
+者自己检查和串行化操作是明智的选择
+
+有一个叫kobject_set_name()的函数，但那是历史遗产，正在被删除。如果你的代码需
+要调用这个函数，那么它是不正确的，需要被修复。
+
+要正确访问kobject的名称，请使用函数kobject_name()::
+
+    const char *kobject_name(const struct kobject * kobj);
+
+有一个辅助函数可以同时初始化和添加kobject到内核中，令人惊讶的是，该函数被称为
+kobject_init_and_add()::
+
+    int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
+                             struct kobject *parent, const char *fmt, ...);
+
+参数与上面描述的单个kobject_init()和kobject_add()函数相同。
+
+
+Uevents
+=======
+
+当一个kobject被注册到kobject核心后，你需要向全世界宣布它已经被创建了。这可以通
+过调用kobject_uevent()来实现::
+
+    int kobject_uevent(struct kobject *kobj, enum kobject_action action);
+
+当kobject第一次被添加到内核时，使用 *KOBJ_ADD* 动作。这应该在该kobject的任
+何属性或子对象被正确初始化后进行，因为当这个调用发生时，用户空间会立即开始寻
+找它们。
+
+当kobject从内核中移除时（关于如何做的细节在下面）， **KOBJ_REMOVE** 的uevent
+将由kobject核心自动创建，所以调用者不必担心手动操作。
+
+
+引用计数
+========
+
+kobject的关键功能之一是作为它所嵌入的对象的一个引用计数器。只要对该对象的引用
+存在，该对象（以及支持它的代码）就必须继续存在。用于操作kobject的引用计数的低
+级函数是::
+
+    struct kobject *kobject_get(struct kobject *kobj);
+    void kobject_put(struct kobject *kobj);
+
+对kobject_get()的成功调用将增加kobject的引用计数器值并返回kobject的指针。
+
+当引用被释放时，对kobject_put()的调用将递减引用计数值，并可能释放该对象。请注
+意，kobject_init()将引用计数设置为1，所以设置kobject的代码最终需要kobject_put()
+来释放该引用。
+
+因为kobjects是动态的，所以它们不能以静态方式或在堆栈中声明，而总是以动态方式分
+配。未来版本的内核将包含对静态创建的kobjects的运行时检查，并将警告开发者这种不
+当的使用。
+
+如果你使用struct kobject只是为了给你的结构体提供一个引用计数器，请使用struct kref
+来代替；kobject是多余的。关于如何使用kref结构体的更多信息，请参见Linux内核源代
+码树中的文件Documentation/core-api/kref.rst
+
+
+创建“简单的”kobjects
+====================
+
+有时，开发者想要的只是在sysfs层次结构中创建一个简单的目录，而不必去搞那些复杂
+的ksets、显示和存储函数，以及其他细节。这是一个应该创建单个kobject的例外。要
+创建这样一个条目（即简单的目录），请使用函数::
+
+    struct kobject *kobject_create_and_add(const char *name, struct kobject *parent);
+
+这个函数将创建一个kobject，并将其放在sysfs中指定的父kobject下面的位置。要创
+建与此kobject相关的简单属性，请使用::
+
+    int sysfs_create_file(struct kobject *kobj, const struct attribute *attr);
+
+或者::
+
+    int sysfs_create_group(struct kobject *kobj, const struct attribute_group *grp);
+
+这里使用的两种类型的属性，与已经用kobject_create_and_add()创建的kobject，
+都可以是kobj_attribute类型，所以不需要创建特殊的自定义属性。
+
+参见示例模块， ``samples/kobject/kobject-example.c`` ，以了解一个简单的
+kobject和属性的实现。
+
+
+
+ktypes和释放方法
+================
+
+以上讨论中还缺少一件重要的事情，那就是当一个kobject的引用次数达到零的时候
+会发生什么。创建kobject的代码通常不知道何时会发生这种情况；首先，如果它知
+道，那么使用kobject就没有什么意义。当sysfs被引入时，即使是可预测的对象生命
+周期也会变得更加复杂，因为内核的其他部分可以获得在系统中注册的任何kobject
+的引用。
+
+最终的结果是，一个由kobject保护的结构体在其引用计数归零之前不能被释放。引
+用计数不受创建kobject的代码的直接控制。因此，每当它的一个kobjects的最后一
+个引用消失时，必须异步通知该代码。
+
+一旦你通过kobject_add()注册了你的kobject，你绝对不能使用kfree()来直接释
+放它。唯一安全的方法是使用kobject_put()。在kobject_init()之后总是使用
+kobject_put()以避免错误的发生是一个很好的做法。
+
+这个通知是通过kobject的release()方法完成的。通常这样的方法有如下形式::
+
+    void my_object_release(struct kobject *kobj)
+    {
+            struct my_object *mine = container_of(kobj, struct my_object, kobj);
+
+            /* Perform any additional cleanup on this object, then... */
+            kfree(mine);
+    }
+
+有一点很重要：每个kobject都必须有一个release()方法，而且这个kobject必
+须持续存在（处于一致的状态），直到这个方法被调用。如果这些约束条件没有
+得到满足，那么代码就是有缺陷的。注意，如果你忘记提供release()方法，内
+核会警告你。不要试图通过提供一个“空”的释放函数来摆脱这个警告。
+
+如果你的清理函数只需要调用kfree()，那么你必须创建一个包装函数，该函数
+使用container_of()来向上造型到正确的类型（如上面的例子所示），然后在整个
+结构体上调用kfree()。
+
+注意，kobject的名字在release函数中是可用的，但它不能在这个回调中被改
+变。否则，在kobject核心中会出现内存泄漏，这让人很不爽。
+
+有趣的是，release()方法并不存储在kobject本身；相反，它与ktype相关。
+因此，让我们引入结构体kobj_type::
+
+    struct kobj_type {
+            void (*release)(struct kobject *kobj);
+            const struct sysfs_ops *sysfs_ops;
+            struct attribute **default_attrs;
+            const struct attribute_group **default_groups;
+            const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
+            const void *(*namespace)(struct kobject *kobj);
+            void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
+    };
+
+这个结构提用来描述一个特定类型的kobject（或者更正确地说，包含对象的
+类型）。每个kobject都需要有一个相关的kobj_type结构；当你调用
+kobject_init()或kobject_init_and_add()时必须指定一个指向该结构的
+指针。
+
+当然，kobj_type结构中的release字段是指向这种类型的kobject的release()
+方法的一个指针。另外两个字段（sysfs_ops 和 default_attrs）控制这种
+类型的对象如何在 sysfs 中被表示；它们超出了本文的范围。
+
+default_attrs 指针是一个默认属性的列表，它将为任何用这个 ktype 注册
+的 kobject 自动创建。
+
+
+ksets
+=====
+
+一个kset仅仅是一个希望相互关联的kobjects的集合。没有限制它们必须是相
+同的ktype，但是如果它们不是相同的，就要非常小心。
+
+一个kset有以下功能:
+
+ - 它像是一个包含一组对象的袋子。一个kset可以被内核用来追踪“所有块
+   设备”或“所有PCI设备驱动”。
+
+ - kset也是sysfs中的一个子目录，与kset相关的kobjects可以在这里显示
+   出来。每个kset都包含一个kobject，它可以被设置为其他kobject的父对象；
+   sysfs层次结构的顶级目录就是以这种方式构建的。
+
+ - Ksets可以支持kobjects的 "热插拔"，并影响uevent事件如何被报告给
+   用户空间。
+
+ 在面向对象的术语中，“kset”是顶级的容器类；ksets包含它们自己的kobject，
+ 但是这个kobject是由kset代码管理的，不应该被任何其他用户所操纵。
+
+ kset在一个标准的内核链表中保存它的子对象。Kobjects通过其kset字段指向其
+ 包含的kset。在几乎所有的情况下，属于一个kset的kobjects在它们的父
+ 对象中都有那个kset（或者，严格地说，它的嵌入kobject）。
+
+ 由于kset中包含一个kobject，它应该总是被动态地创建，而不是静态地
+ 或在堆栈中声明。要创建一个新的kset，请使用::
+
+  struct kset *kset_create_and_add(const char *name,
+                                   const struct kset_uevent_ops *uevent_ops,
+                                   struct kobject *parent_kobj);
+
+当你完成对kset的处理后，调用::
+
+  void kset_unregister(struct kset *k);
+
+来销毁它。这将从sysfs中删除该kset并递减其引用计数值。当引用计数
+为零时，该kset将被释放。因为对该kset的其他引用可能仍然存在，
+释放可能发生在kset_unregister()返回之后。
+
+一个使用kset的例子可以在内核树中的 ``samples/kobject/kset-example.c``
+文件中看到。
+
+如果一个kset希望控制与它相关的kobjects的uevent操作，它可以使用
+结构体kset_uevent_ops来处理它::
+
+  struct kset_uevent_ops {
+          int (* const filter)(struct kset *kset, struct kobject *kobj);
+          const char *(* const name)(struct kset *kset, struct kobject *kobj);
+          int (* const uevent)(struct kset *kset, struct kobject *kobj,
+                        struct kobj_uevent_env *env);
+  };
+
+
+过滤器函数允许kset阻止一个特定kobject的uevent被发送到用户空间。
+如果该函数返回0，该uevent将不会被发射出去。
+
+name函数将被调用以覆盖uevent发送到用户空间的kset的默认名称。默
+认情况下，该名称将与kset本身相同，但这个函数，如果存在，可以覆盖
+该名称。
+
+当uevent即将被发送至用户空间时，uevent函数将被调用，以允许更多
+的环境变量被添加到uevent中。
+
+有人可能会问，鉴于没有提出执行该功能的函数，究竟如何将一个kobject
+添加到一个kset中。答案是这个任务是由kobject_add()处理的。当一个
+kobject被传递给kobject_add()时，它的kset成员应该指向这个kobject
+所属的kset。 kobject_add()将处理剩下的部分。
+
+如果属于一个kset的kobject没有父kobject集，它将被添加到kset的目
+录中。并非所有的kset成员都必须住在kset目录中。如果在添加kobject
+之前分配了一个明确的父kobject，那么该kobject将被注册到kset中，
+但是被添加到父kobject下面。
+
+
+移除Kobject
+===========
+
+当一个kobject在kobject核心注册成功后，在代码使用完它时，必须将其
+清理掉。要做到这一点，请调用kobject_put()。通过这样做，kobject核
+心会自动清理这个kobject分配的所有内存。如果为这个对象发送了 ``KOBJ_ADD``
+uevent，那么相应的 ``KOBJ_REMOVE`` uevent也将被发送，任何其他的
+sysfs内务将被正确处理。
+
+如果你需要分两次对kobject进行删除（比如说在你要销毁对象时无权睡眠），
+那么调用kobject_del()将从sysfs中取消kobject的注册。这使得kobject
+“不可见”，但它并没有被清理掉，而且该对象的引用计数仍然是一样的。在稍
+后的时间调用kobject_put()来完成与该kobject相关的内存的清理。
+
+kobject_del()可以用来放弃对父对象的引用，如果循环引用被构建的话。
+在某些情况下，一个父对象引用一个子对象是有效的。循环引用必须通过明
+确调用kobject_del()来打断，这样一个释放函数就会被调用，前一个循环
+中的对象会相互释放。
+
+
+示例代码出处
+============
+
+关于正确使用ksets和kobjects的更完整的例子，请参见示例程序
+``samples/kobject/{kobject-example.c,kset-example.c}`` ，如果
+您选择 ``CONFIG_SAMPLE_KOBJECT`` ，它们将被构建为可加载模块。
-- 
GitLab


From 35f1fceaa288ee0954ced2d740b95211aef4cc80 Mon Sep 17 00:00:00 2001
From: Chun-Hung Tseng <henrybear327@gmail.com>
Date: Sun, 16 May 2021 18:59:55 +0800
Subject: [PATCH 0848/3804] Documentation: scheduler: fixed 2 typos in
 sched-nice-design.rst

This patch fixed 2 spelling errors in the documentation.

Signed-off-by: Chun-Hung Tseng <henrybear327@gmail.com>
Link: https://lore.kernel.org/r/20210516105955.120651-1-henrybear327@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/scheduler/sched-nice-design.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/scheduler/sched-nice-design.rst b/Documentation/scheduler/sched-nice-design.rst
index 0571f1b47e644..889bf2b737dc6 100644
--- a/Documentation/scheduler/sched-nice-design.rst
+++ b/Documentation/scheduler/sched-nice-design.rst
@@ -60,7 +60,7 @@ within the constraints of HZ and jiffies and their nasty design level
 coupling to timeslices and granularity it was not really viable.
 
 The second (less frequent but still periodically occurring) complaint
-about Linux's nice level support was its assymetry around the origo
+about Linux's nice level support was its asymmetry around the origin
 (which you can see demonstrated in the picture above), or more
 accurately: the fact that nice level behavior depended on the _absolute_
 nice level as well, while the nice API itself is fundamentally
-- 
GitLab


From b7df21cf1b79ab7026f545e7bf837bd5750ac026 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 8 May 2021 03:57:03 +0800
Subject: [PATCH 0849/3804] tipc: skb_linearize the head skb when reassembling
 msgs

It's not a good idea to append the frag skb to a skb's frag_list if
the frag_list already has skbs from elsewhere, such as this skb was
created by pskb_copy() where the frag_list was cloned (all the skbs
in it were skb_get'ed) and shared by multiple skbs.

However, the new appended frag skb should have been only seen by the
current skb. Otherwise, it will cause use after free crashes as this
appended frag skb are seen by multiple skbs but it only got skb_get
called once.

The same thing happens with a skb updated by pskb_may_pull() with a
skb_cloned skb. Li Shuang has reported quite a few crashes caused
by this when doing testing over macvlan devices:

  [] kernel BUG at net/core/skbuff.c:1970!
  [] Call Trace:
  []  skb_clone+0x4d/0xb0
  []  macvlan_broadcast+0xd8/0x160 [macvlan]
  []  macvlan_process_broadcast+0x148/0x150 [macvlan]
  []  process_one_work+0x1a7/0x360
  []  worker_thread+0x30/0x390

  [] kernel BUG at mm/usercopy.c:102!
  [] Call Trace:
  []  __check_heap_object+0xd3/0x100
  []  __check_object_size+0xff/0x16b
  []  simple_copy_to_iter+0x1c/0x30
  []  __skb_datagram_iter+0x7d/0x310
  []  __skb_datagram_iter+0x2a5/0x310
  []  skb_copy_datagram_iter+0x3b/0x90
  []  tipc_recvmsg+0x14a/0x3a0 [tipc]
  []  ____sys_recvmsg+0x91/0x150
  []  ___sys_recvmsg+0x7b/0xc0

  [] kernel BUG at mm/slub.c:305!
  [] Call Trace:
  []  <IRQ>
  []  kmem_cache_free+0x3ff/0x400
  []  __netif_receive_skb_core+0x12c/0xc40
  []  ? kmem_cache_alloc+0x12e/0x270
  []  netif_receive_skb_internal+0x3d/0xb0
  []  ? get_rx_page_info+0x8e/0xa0 [be2net]
  []  be_poll+0x6ef/0xd00 [be2net]
  []  ? irq_exit+0x4f/0x100
  []  net_rx_action+0x149/0x3b0

  ...

This patch is to fix it by linearizing the head skb if it has frag_list
set in tipc_buf_append(). Note that we choose to do this before calling
skb_unshare(), as __skb_linearize() will avoid skb_copy(). Also, we can
not just drop the frag_list either as the early time.

Fixes: 45c8b7b175ce ("tipc: allow non-linear first fragment buffer")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 3f0a25345a7c0..ce6ab54822d8d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -149,18 +149,13 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
 		if (unlikely(head))
 			goto err;
 		*buf = NULL;
+		if (skb_has_frag_list(frag) && __skb_linearize(frag))
+			goto err;
 		frag = skb_unshare(frag, GFP_ATOMIC);
 		if (unlikely(!frag))
 			goto err;
 		head = *headbuf = frag;
 		TIPC_SKB_CB(head)->tail = NULL;
-		if (skb_is_nonlinear(head)) {
-			skb_walk_frags(head, tail) {
-				TIPC_SKB_CB(head)->tail = tail;
-			}
-		} else {
-			skb_frag_list_init(head);
-		}
 		return 0;
 	}
 
-- 
GitLab


From 31db0dbd72444abe645d90c20ecb84d668f5af5e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 14 May 2021 17:24:48 +0300
Subject: [PATCH 0850/3804] net: hso: check for allocation failure in
 hso_create_bulk_serial_device()

In current kernels, small allocations never actually fail so this
patch shouldn't affect runtime.

Originally this error handling code written with the idea that if
the "serial->tiocmget" allocation failed, then we would continue
operating instead of bailing out early.  But in later years we added
an unchecked dereference on the next line.

	serial->tiocmget->serial_state_notification = kzalloc();
        ^^^^^^^^^^^^^^^^^^

Since these allocations are never going fail in real life, this is
mostly a philosophical debate, but I think bailing out early is the
correct behavior that the user would want.  And generally it's safer to
bail as soon an error happens.

Fixes: af0de1303c4e ("usb: hso: obey DMA rules in tiocmget")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 3ef4b2841402c..260f850d69eb3 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2618,29 +2618,28 @@ static struct hso_device *hso_create_bulk_serial_device(
 		num_urbs = 2;
 		serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget),
 					   GFP_KERNEL);
+		if (!serial->tiocmget)
+			goto exit;
 		serial->tiocmget->serial_state_notification
 			= kzalloc(sizeof(struct hso_serial_state_notification),
 					   GFP_KERNEL);
-		/* it isn't going to break our heart if serial->tiocmget
-		 *  allocation fails don't bother checking this.
-		 */
-		if (serial->tiocmget && serial->tiocmget->serial_state_notification) {
-			tiocmget = serial->tiocmget;
-			tiocmget->endp = hso_get_ep(interface,
-						    USB_ENDPOINT_XFER_INT,
-						    USB_DIR_IN);
-			if (!tiocmget->endp) {
-				dev_err(&interface->dev, "Failed to find INT IN ep\n");
-				goto exit;
-			}
-
-			tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (tiocmget->urb) {
-				mutex_init(&tiocmget->mutex);
-				init_waitqueue_head(&tiocmget->waitq);
-			} else
-				hso_free_tiomget(serial);
+		if (!serial->tiocmget->serial_state_notification)
+			goto exit;
+		tiocmget = serial->tiocmget;
+		tiocmget->endp = hso_get_ep(interface,
+					    USB_ENDPOINT_XFER_INT,
+					    USB_DIR_IN);
+		if (!tiocmget->endp) {
+			dev_err(&interface->dev, "Failed to find INT IN ep\n");
+			goto exit;
 		}
+
+		tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL);
+		if (tiocmget->urb) {
+			mutex_init(&tiocmget->mutex);
+			init_waitqueue_head(&tiocmget->waitq);
+		} else
+			hso_free_tiomget(serial);
 	}
 	else
 		num_urbs = 1;
-- 
GitLab


From e0652f8bb44d6294eeeac06d703185357f25d50b Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Sat, 15 May 2021 07:29:06 +0800
Subject: [PATCH 0851/3804] NFC: nci: fix memory leak in nci_allocate_device

nfcmrvl_disconnect fails to free the hci_dev field in struct nci_dev.
Fix this by freeing hci_dev in nci_free_device.

BUG: memory leak
unreferenced object 0xffff888111ea6800 (size 1024):
  comm "kworker/1:0", pid 19, jiffies 4294942308 (age 13.580s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 60 fd 0c 81 88 ff ff  .........`......
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<000000004bc25d43>] kmalloc include/linux/slab.h:552 [inline]
    [<000000004bc25d43>] kzalloc include/linux/slab.h:682 [inline]
    [<000000004bc25d43>] nci_hci_allocate+0x21/0xd0 net/nfc/nci/hci.c:784
    [<00000000c59cff92>] nci_allocate_device net/nfc/nci/core.c:1170 [inline]
    [<00000000c59cff92>] nci_allocate_device+0x10b/0x160 net/nfc/nci/core.c:1132
    [<00000000006e0a8e>] nfcmrvl_nci_register_dev+0x10a/0x1c0 drivers/nfc/nfcmrvl/main.c:153
    [<000000004da1b57e>] nfcmrvl_probe+0x223/0x290 drivers/nfc/nfcmrvl/usb.c:345
    [<00000000d506aed9>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396
    [<00000000bc632c92>] really_probe+0x159/0x4a0 drivers/base/dd.c:554
    [<00000000f5009125>] driver_probe_device+0x84/0x100 drivers/base/dd.c:740
    [<000000000ce658ca>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:846
    [<000000007067d05f>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431
    [<00000000f8e13372>] __device_attach+0x122/0x250 drivers/base/dd.c:914
    [<000000009cf68860>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491
    [<00000000359c965a>] device_add+0x5be/0xc30 drivers/base/core.c:3109
    [<00000000086e4bd3>] usb_set_configuration+0x9d9/0xb90 drivers/usb/core/message.c:2164
    [<00000000ca036872>] usb_generic_driver_probe+0x8c/0xc0 drivers/usb/core/generic.c:238
    [<00000000d40d36f6>] usb_probe_device+0x5c/0x140 drivers/usb/core/driver.c:293
    [<00000000bc632c92>] really_probe+0x159/0x4a0 drivers/base/dd.c:554

Reported-by: syzbot+19bcfc64a8df1318d1c3@syzkaller.appspotmail.com
Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support")
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nfc/nci_core.h | 1 +
 net/nfc/nci/core.c         | 1 +
 net/nfc/nci/hci.c          | 5 +++++
 3 files changed, 7 insertions(+)

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index bd76e8e082c01..1df0f8074c9d3 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -298,6 +298,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len,
 		      struct sk_buff **resp);
 
 struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev);
+void nci_hci_deallocate(struct nci_dev *ndev);
 int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event,
 		       const u8 *param, size_t param_len);
 int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 9a585332ea84a..da7fe9db1b00f 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1191,6 +1191,7 @@ EXPORT_SYMBOL(nci_allocate_device);
 void nci_free_device(struct nci_dev *ndev)
 {
 	nfc_free_device(ndev->nfc_dev);
+	nci_hci_deallocate(ndev);
 	kfree(ndev);
 }
 EXPORT_SYMBOL(nci_free_device);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 6b275a387a92a..96865142104f4 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -792,3 +792,8 @@ struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev)
 
 	return hdev;
 }
+
+void nci_hci_deallocate(struct nci_dev *ndev)
+{
+	kfree(ndev->hci_dev);
+}
-- 
GitLab


From 28c66b6da4087b8cfe81c2ec0a46eb6116dafda9 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 15 May 2021 15:16:05 +0800
Subject: [PATCH 0852/3804] net: bnx2: Fix error return code in
 bnx2_init_board()

Fix to return -EPERM from the error handling case instead of 0, as done
elsewhere in this function.

Fixes: b6016b767397 ("[BNX2]: New Broadcom gigabit network driver.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index c0986096c701d..5bace8a93d73b 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8247,9 +8247,9 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 		BNX2_WR(bp, PCI_COMMAND, reg);
 	} else if ((BNX2_CHIP_ID(bp) == BNX2_CHIP_ID_5706_A1) &&
 		!(bp->flags & BNX2_FLAG_PCIX)) {
-
 		dev_err(&pdev->dev,
 			"5706 A1 can only be used in a PCIX bus, aborting\n");
+		rc = -EPERM;
 		goto err_out_unmap;
 	}
 
-- 
GitLab


From ab21494be9dc7d62736c5fcd06be65d49df713ee Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@broadcom.com>
Date: Sat, 15 May 2021 03:25:18 -0400
Subject: [PATCH 0853/3804] bnxt_en: Include new P5 HV definition in VF check.

Otherwise, some of the recently added HyperV VF IDs would not be
recognized as VF devices and they would not initialize properly.

Fixes: 7fbf359bb2c1 ("bnxt_en: Add PCI IDs for Hyper-V VF devices.")
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Andy Gospodarek <gospo@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 46be4046ee51e..4e57041b4775d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -282,7 +282,8 @@ static bool bnxt_vf_pciid(enum board_idx idx)
 {
 	return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF ||
 		idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV ||
-		idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF);
+		idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF ||
+		idx == NETXTREME_E_P5_VF_HV);
 }
 
 #define DB_CP_REARM_FLAGS	(DB_KEY_CP | DB_IDX_VALID)
-- 
GitLab


From 702279d2ce4650000bb6302013630304e359dc13 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sat, 15 May 2021 03:25:19 -0400
Subject: [PATCH 0854/3804] bnxt_en: Fix context memory setup for 64K page
 size.

There was a typo in the code that checks for 64K BNXT_PAGE_SHIFT in
bnxt_hwrm_set_pg_attr().  Fix it and make the code more understandable
with a new macro BNXT_SET_CTX_PAGE_ATTR().

Fixes: 1b9394e5a2ad ("bnxt_en: Configure context memory on new devices.")
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c |  9 +--------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 10 ++++++++++
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4e57041b4775d..fcc729d52b174 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6933,17 +6933,10 @@ ctx_err:
 static void bnxt_hwrm_set_pg_attr(struct bnxt_ring_mem_info *rmem, u8 *pg_attr,
 				  __le64 *pg_dir)
 {
-	u8 pg_size = 0;
-
 	if (!rmem->nr_pages)
 		return;
 
-	if (BNXT_PAGE_SHIFT == 13)
-		pg_size = 1 << 4;
-	else if (BNXT_PAGE_SIZE == 16)
-		pg_size = 2 << 4;
-
-	*pg_attr = pg_size;
+	BNXT_SET_CTX_PAGE_ATTR(*pg_attr);
 	if (rmem->depth >= 1) {
 		if (rmem->depth == 2)
 			*pg_attr |= 2;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 98e0cef4532cb..30e47ea343f91 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1457,6 +1457,16 @@ struct bnxt_ctx_pg_info {
 
 #define BNXT_BACKING_STORE_CFG_LEGACY_LEN	256
 
+#define BNXT_SET_CTX_PAGE_ATTR(attr)					\
+do {									\
+	if (BNXT_PAGE_SIZE == 0x2000)					\
+		attr = FUNC_BACKING_STORE_CFG_REQ_SRQ_PG_SIZE_PG_8K;	\
+	else if (BNXT_PAGE_SIZE == 0x10000)				\
+		attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_64K;	\
+	else								\
+		attr = FUNC_BACKING_STORE_CFG_REQ_QPC_PG_SIZE_PG_4K;	\
+} while (0)
+
 struct bnxt_ctx_mem_info {
 	u32	qp_max_entries;
 	u16	qp_min_qp1_entries;
-- 
GitLab


From 9f6f852550d0e1b7735651228116ae9d300f69b3 Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Sun, 16 May 2021 07:11:40 +0000
Subject: [PATCH 0855/3804] isdn: mISDN: netjet: Fix crash in nj_probe:

'nj_setup' in netjet.c might fail with -EIO and in this case
'card->irq' is initialized and is bigger than zero. A subsequent call to
'nj_release' will free the irq that has not been requested.

Fix this bug by deleting the previous assignment to 'card->irq' and just
keep the assignment before 'request_irq'.

The KASAN's log reveals it:

[    3.354615 ] WARNING: CPU: 0 PID: 1 at kernel/irq/manage.c:1826
free_irq+0x100/0x480
[    3.355112 ] Modules linked in:
[    3.355310 ] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
5.13.0-rc1-00144-g25a1298726e #13
[    3.355816 ] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[    3.356552 ] RIP: 0010:free_irq+0x100/0x480
[    3.356820 ] Code: 6e 08 74 6f 4d 89 f4 e8 5e ac 09 00 4d 8b 74 24 18
4d 85 f6 75 e3 e8 4f ac 09 00 8b 75 c8 48 c7 c7 78 c1 2e 85 e8 e0 cf f5
ff <0f> 0b 48 8b 75 c0 4c 89 ff e8 72 33 0b 03 48 8b 43 40 4c 8b a0 80
[    3.358012 ] RSP: 0000:ffffc90000017b48 EFLAGS: 00010082
[    3.358357 ] RAX: 0000000000000000 RBX: ffff888104dc8000 RCX:
0000000000000000
[    3.358814 ] RDX: ffff8881003c8000 RSI: ffffffff8124a9e6 RDI:
00000000ffffffff
[    3.359272 ] RBP: ffffc90000017b88 R08: 0000000000000000 R09:
0000000000000000
[    3.359732 ] R10: ffffc900000179f0 R11: 0000000000001d04 R12:
0000000000000000
[    3.360195 ] R13: ffff888107dc6000 R14: ffff888107dc6928 R15:
ffff888104dc80a8
[    3.360652 ] FS:  0000000000000000(0000) GS:ffff88817bc00000(0000)
knlGS:0000000000000000
[    3.361170 ] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    3.361538 ] CR2: 0000000000000000 CR3: 000000000582e000 CR4:
00000000000006f0
[    3.362003 ] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[    3.362175 ] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[    3.362175 ] Call Trace:
[    3.362175 ]  nj_release+0x51/0x1e0
[    3.362175 ]  nj_probe+0x450/0x950
[    3.362175 ]  ? pci_device_remove+0x110/0x110
[    3.362175 ]  local_pci_probe+0x45/0xa0
[    3.362175 ]  pci_device_probe+0x12b/0x1d0
[    3.362175 ]  really_probe+0x2a9/0x610
[    3.362175 ]  driver_probe_device+0x90/0x1d0
[    3.362175 ]  ? mutex_lock_nested+0x1b/0x20
[    3.362175 ]  device_driver_attach+0x68/0x70
[    3.362175 ]  __driver_attach+0x124/0x1b0
[    3.362175 ]  ? device_driver_attach+0x70/0x70
[    3.362175 ]  bus_for_each_dev+0xbb/0x110
[    3.362175 ]  ? rdinit_setup+0x45/0x45
[    3.362175 ]  driver_attach+0x27/0x30
[    3.362175 ]  bus_add_driver+0x1eb/0x2a0
[    3.362175 ]  driver_register+0xa9/0x180
[    3.362175 ]  __pci_register_driver+0x82/0x90
[    3.362175 ]  ? w6692_init+0x38/0x38
[    3.362175 ]  nj_init+0x36/0x38
[    3.362175 ]  do_one_initcall+0x7f/0x3d0
[    3.362175 ]  ? rdinit_setup+0x45/0x45
[    3.362175 ]  ? rcu_read_lock_sched_held+0x4f/0x80
[    3.362175 ]  kernel_init_freeable+0x2aa/0x301
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  kernel_init+0x18/0x190
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  ret_from_fork+0x1f/0x30
[    3.362175 ] Kernel panic - not syncing: panic_on_warn set ...
[    3.362175 ] CPU: 0 PID: 1 Comm: swapper/0 Not tainted
5.13.0-rc1-00144-g25a1298726e #13
[    3.362175 ] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[    3.362175 ] Call Trace:
[    3.362175 ]  dump_stack+0xba/0xf5
[    3.362175 ]  ? free_irq+0x100/0x480
[    3.362175 ]  panic+0x15a/0x3f2
[    3.362175 ]  ? __warn+0xf2/0x150
[    3.362175 ]  ? free_irq+0x100/0x480
[    3.362175 ]  __warn+0x108/0x150
[    3.362175 ]  ? free_irq+0x100/0x480
[    3.362175 ]  report_bug+0x119/0x1c0
[    3.362175 ]  handle_bug+0x3b/0x80
[    3.362175 ]  exc_invalid_op+0x18/0x70
[    3.362175 ]  asm_exc_invalid_op+0x12/0x20
[    3.362175 ] RIP: 0010:free_irq+0x100/0x480
[    3.362175 ] Code: 6e 08 74 6f 4d 89 f4 e8 5e ac 09 00 4d 8b 74 24 18
4d 85 f6 75 e3 e8 4f ac 09 00 8b 75 c8 48 c7 c7 78 c1 2e 85 e8 e0 cf f5
ff <0f> 0b 48 8b 75 c0 4c 89 ff e8 72 33 0b 03 48 8b 43 40 4c 8b a0 80
[    3.362175 ] RSP: 0000:ffffc90000017b48 EFLAGS: 00010082
[    3.362175 ] RAX: 0000000000000000 RBX: ffff888104dc8000 RCX:
0000000000000000
[    3.362175 ] RDX: ffff8881003c8000 RSI: ffffffff8124a9e6 RDI:
00000000ffffffff
[    3.362175 ] RBP: ffffc90000017b88 R08: 0000000000000000 R09:
0000000000000000
[    3.362175 ] R10: ffffc900000179f0 R11: 0000000000001d04 R12:
0000000000000000
[    3.362175 ] R13: ffff888107dc6000 R14: ffff888107dc6928 R15:
ffff888104dc80a8
[    3.362175 ]  ? vprintk+0x76/0x150
[    3.362175 ]  ? free_irq+0x100/0x480
[    3.362175 ]  nj_release+0x51/0x1e0
[    3.362175 ]  nj_probe+0x450/0x950
[    3.362175 ]  ? pci_device_remove+0x110/0x110
[    3.362175 ]  local_pci_probe+0x45/0xa0
[    3.362175 ]  pci_device_probe+0x12b/0x1d0
[    3.362175 ]  really_probe+0x2a9/0x610
[    3.362175 ]  driver_probe_device+0x90/0x1d0
[    3.362175 ]  ? mutex_lock_nested+0x1b/0x20
[    3.362175 ]  device_driver_attach+0x68/0x70
[    3.362175 ]  __driver_attach+0x124/0x1b0
[    3.362175 ]  ? device_driver_attach+0x70/0x70
[    3.362175 ]  bus_for_each_dev+0xbb/0x110
[    3.362175 ]  ? rdinit_setup+0x45/0x45
[    3.362175 ]  driver_attach+0x27/0x30
[    3.362175 ]  bus_add_driver+0x1eb/0x2a0
[    3.362175 ]  driver_register+0xa9/0x180
[    3.362175 ]  __pci_register_driver+0x82/0x90
[    3.362175 ]  ? w6692_init+0x38/0x38
[    3.362175 ]  nj_init+0x36/0x38
[    3.362175 ]  do_one_initcall+0x7f/0x3d0
[    3.362175 ]  ? rdinit_setup+0x45/0x45
[    3.362175 ]  ? rcu_read_lock_sched_held+0x4f/0x80
[    3.362175 ]  kernel_init_freeable+0x2aa/0x301
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  kernel_init+0x18/0x190
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  ? rest_init+0x2c0/0x2c0
[    3.362175 ]  ret_from_fork+0x1f/0x30
[    3.362175 ] Dumping ftrace buffer:
[    3.362175 ]    (ftrace buffer empty)
[    3.362175 ] Kernel Offset: disabled
[    3.362175 ] Rebooting in 1 seconds..

Reported-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/hardware/mISDN/netjet.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c
index ee925b58bbcea..2a1ddd47a0968 100644
--- a/drivers/isdn/hardware/mISDN/netjet.c
+++ b/drivers/isdn/hardware/mISDN/netjet.c
@@ -1100,7 +1100,6 @@ nj_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		card->typ = NETJET_S_TJ300;
 
 	card->base = pci_resource_start(pdev, 0);
-	card->irq = pdev->irq;
 	pci_set_drvdata(pdev, card);
 	err = setup_instance(card);
 	if (err)
-- 
GitLab


From 020ef930b826d21c5446fdc9db80fd72a791bc21 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 16 May 2021 14:44:42 +0000
Subject: [PATCH 0856/3804] mld: fix panic in mld_newpack()

mld_newpack() doesn't allow to allocate high order page,
only order-0 allocation is allowed.
If headroom size is too large, a kernel panic could occur in skb_put().

Test commands:
    ip netns del A
    ip netns del B
    ip netns add A
    ip netns add B
    ip link add veth0 type veth peer name veth1
    ip link set veth0 netns A
    ip link set veth1 netns B

    ip netns exec A ip link set lo up
    ip netns exec A ip link set veth0 up
    ip netns exec A ip -6 a a 2001:db8:0::1/64 dev veth0
    ip netns exec B ip link set lo up
    ip netns exec B ip link set veth1 up
    ip netns exec B ip -6 a a 2001:db8:0::2/64 dev veth1
    for i in {1..99}
    do
        let A=$i-1
        ip netns exec A ip link add ip6gre$i type ip6gre \
	local 2001:db8:$A::1 remote 2001:db8:$A::2 encaplimit 100
        ip netns exec A ip -6 a a 2001:db8:$i::1/64 dev ip6gre$i
        ip netns exec A ip link set ip6gre$i up

        ip netns exec B ip link add ip6gre$i type ip6gre \
	local 2001:db8:$A::2 remote 2001:db8:$A::1 encaplimit 100
        ip netns exec B ip -6 a a 2001:db8:$i::2/64 dev ip6gre$i
        ip netns exec B ip link set ip6gre$i up
    done

Splat looks like:
kernel BUG at net/core/skbuff.c:110!
invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
CPU: 0 PID: 7 Comm: kworker/0:1 Not tainted 5.12.0+ #891
Workqueue: ipv6_addrconf addrconf_dad_work
RIP: 0010:skb_panic+0x15d/0x15f
Code: 92 fe 4c 8b 4c 24 10 53 8b 4d 70 45 89 e0 48 c7 c7 00 ae 79 83
41 57 41 56 41 55 48 8b 54 24 a6 26 f9 ff <0f> 0b 48 8b 6c 24 20 89
34 24 e8 4a 4e 92 fe 8b 34 24 48 c7 c1 20
RSP: 0018:ffff88810091f820 EFLAGS: 00010282
RAX: 0000000000000089 RBX: ffff8881086e9000 RCX: 0000000000000000
RDX: 0000000000000089 RSI: 0000000000000008 RDI: ffffed1020123efb
RBP: ffff888005f6eac0 R08: ffffed1022fc0031 R09: ffffed1022fc0031
R10: ffff888117e00187 R11: ffffed1022fc0030 R12: 0000000000000028
R13: ffff888008284eb0 R14: 0000000000000ed8 R15: 0000000000000ec0
FS:  0000000000000000(0000) GS:ffff888117c00000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f8b801c5640 CR3: 0000000033c2c006 CR4: 00000000003706f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 ? ip6_mc_hdr.isra.26.constprop.46+0x12a/0x600
 ? ip6_mc_hdr.isra.26.constprop.46+0x12a/0x600
 skb_put.cold.104+0x22/0x22
 ip6_mc_hdr.isra.26.constprop.46+0x12a/0x600
 ? rcu_read_lock_sched_held+0x91/0xc0
 mld_newpack+0x398/0x8f0
 ? ip6_mc_hdr.isra.26.constprop.46+0x600/0x600
 ? lock_contended+0xc40/0xc40
 add_grhead.isra.33+0x280/0x380
 add_grec+0x5ca/0xff0
 ? mld_sendpack+0xf40/0xf40
 ? lock_downgrade+0x690/0x690
 mld_send_initial_cr.part.34+0xb9/0x180
 ipv6_mc_dad_complete+0x15d/0x1b0
 addrconf_dad_completed+0x8d2/0xbb0
 ? lock_downgrade+0x690/0x690
 ? addrconf_rs_timer+0x660/0x660
 ? addrconf_dad_work+0x73c/0x10e0
 addrconf_dad_work+0x73c/0x10e0

Allowing high order page allocation could fix this problem.

Fixes: 72e09ad107e7 ("ipv6: avoid high order allocations")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 0d59efb6b49ec..d36ef9d25e73c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1745,10 +1745,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
 		     IPV6_TLV_PADN, 0 };
 
 	/* we assume size > sizeof(ra) here */
-	/* limit our allocations to order-0 page */
-	size = min_t(int, size, SKB_MAX_ORDER(0, 0));
 	skb = sock_alloc_send_skb(sk, size, 1, &err);
-
 	if (!skb)
 		return NULL;
 
-- 
GitLab


From 04c26faa51d1e2fe71cf13c45791f5174c37f986 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 17 May 2021 02:28:58 +0800
Subject: [PATCH 0857/3804] tipc: wait and exit until all work queues are done

On some host, a crash could be triggered simply by repeating these
commands several times:

  # modprobe tipc
  # tipc bearer enable media udp name UDP1 localip 127.0.0.1
  # rmmod tipc

  [] BUG: unable to handle kernel paging request at ffffffffc096bb00
  [] Workqueue: events 0xffffffffc096bb00
  [] Call Trace:
  []  ? process_one_work+0x1a7/0x360
  []  ? worker_thread+0x30/0x390
  []  ? create_worker+0x1a0/0x1a0
  []  ? kthread+0x116/0x130
  []  ? kthread_flush_work_fn+0x10/0x10
  []  ? ret_from_fork+0x35/0x40

When removing the TIPC module, the UDP tunnel sock will be delayed to
release in a work queue as sock_release() can't be done in rtnl_lock().
If the work queue is schedule to run after the TIPC module is removed,
kernel will crash as the work queue function cleanup_beareri() code no
longer exists when trying to invoke it.

To fix it, this patch introduce a member wq_count in tipc_net to track
the numbers of work queues in schedule, and  wait and exit until all
work queues are done in tipc_exit_net().

Fixes: d0f91938bede ("tipc: add ip/udp media type")
Reported-by: Shuang Li <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c      | 2 ++
 net/tipc/core.h      | 2 ++
 net/tipc/udp_media.c | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 5cc1f03072150..72f3ac73779bf 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -119,6 +119,8 @@ static void __net_exit tipc_exit_net(struct net *net)
 #ifdef CONFIG_TIPC_CRYPTO
 	tipc_crypto_stop(&tipc_net(net)->crypto_tx);
 #endif
+	while (atomic_read(&tn->wq_count))
+		cond_resched();
 }
 
 static void __net_exit tipc_pernet_pre_exit(struct net *net)
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 03de7b213f553..5741ae488bb56 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -149,6 +149,8 @@ struct tipc_net {
 #endif
 	/* Work item for net finalize */
 	struct tipc_net_work final_work;
+	/* The numbers of work queues in schedule */
+	atomic_t wq_count;
 };
 
 static inline struct tipc_net *tipc_net(struct net *net)
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index e556d2cdc0644..c2bb818704c8f 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -814,6 +814,7 @@ static void cleanup_bearer(struct work_struct *work)
 		kfree_rcu(rcast, rcu);
 	}
 
+	atomic_dec(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
 	dst_cache_destroy(&ub->rcast.dst_cache);
 	udp_tunnel_sock_release(ub->ubsock);
 	synchronize_net();
@@ -834,6 +835,7 @@ static void tipc_udp_disable(struct tipc_bearer *b)
 	RCU_INIT_POINTER(ub->bearer, NULL);
 
 	/* sock_release need to be done outside of rtnl lock */
+	atomic_inc(&tipc_net(sock_net(ub->ubsock->sk))->wq_count);
 	INIT_WORK(&ub->work, cleanup_bearer);
 	schedule_work(&ub->work);
 }
-- 
GitLab


From 4710ccc52e8e504a5617a889843a18cd06f1ab72 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 10 May 2021 15:35:14 -0500
Subject: [PATCH 0858/3804] dt-bindings: media: renesas,drif: Use graph schema

Convert the renesas,drif binding schema to use the graph schema. The
binding referred to video-interfaces.txt, but it doesn't actually use any
properties from it as 'sync-active' is a custom property. As 'sync-active'
is custom, it needs a type definition.

Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Ramesh Shanmugasundaram <rashanmu@gmail.com>
Cc: linux-media@vger.kernel.org
Cc: linux-renesas-soc@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Link: https://lore.kernel.org/r/20210510203514.603471-1-robh@kernel.org
---
 .../bindings/media/renesas,drif.yaml          | 20 +++++++------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/Documentation/devicetree/bindings/media/renesas,drif.yaml b/Documentation/devicetree/bindings/media/renesas,drif.yaml
index f1bdaeab40538..ce505a7c006aa 100644
--- a/Documentation/devicetree/bindings/media/renesas,drif.yaml
+++ b/Documentation/devicetree/bindings/media/renesas,drif.yaml
@@ -99,32 +99,26 @@ properties:
       Indicates that the channel acts as primary among the bonded channels.
 
   port:
-    type: object
+    $ref: /schemas/graph.yaml#/properties/port
+    unevaluatedProperties: false
     description:
-      Child port node corresponding to the data input, in accordance with the
-      video interface bindings defined in
-      Documentation/devicetree/bindings/media/video-interfaces.txt.
-      The port node must contain at least one endpoint.
+      Child port node corresponding to the data input. The port node must
+      contain at least one endpoint.
 
     properties:
       endpoint:
-        type: object
+        $ref: /schemas/graph.yaml#/$defs/endpoint-base
+        unevaluatedProperties: false
 
         properties:
-          remote-endpoint:
-            description:
-              A phandle to the remote tuner endpoint subnode in remote node
-              port.
-
           sync-active:
+            $ref: /schemas/types.yaml#/definitions/uint32
             enum: [0, 1]
             description:
               Indicates sync signal polarity, 0/1 for low/high respectively.
               This property maps to SYNCAC bit in the hardware manual. The
               default is 1 (active high).
 
-        additionalProperties: false
-
 required:
   - compatible
   - reg
-- 
GitLab


From c17611592d9635c443bedc9be901f4463f45c6d5 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 10 May 2021 15:45:24 -0500
Subject: [PATCH 0859/3804] dt-bindings: More removals of type references on
 common properties

Users of common properties shouldn't have a type definition as the
common schemas already have one. A few new ones slipped in and
*-names was missed in the last clean-up pass. Drop all the unnecessary
type references in the tree.

A meta-schema update to catch these is pending.

Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Olivier Moysan <olivier.moysan@foss.st.com>
Cc: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Orson Zhai <orsonzhai@gmail.com>
Cc: Baolin Wang <baolin.wang7@gmail.com>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Fabrice Gasnier <fabrice.gasnier@st.com>
Cc: Odelu Kukatla <okukatla@codeaurora.org>
Cc: Alex Elder <elder@kernel.org>
Cc: Shengjiu Wang <shengjiu.wang@nxp.com>
Cc: linux-clk@vger.kernel.org
Cc: alsa-devel@alsa-project.org
Cc: linux-iio@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-input@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Georgi Djakov <djakov@kernel.org>
Reviewed-by: Luca Ceresoli <luca@lucaceresoli.net>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20210510204524.617390-1-robh@kernel.org
---
 Documentation/devicetree/bindings/clock/idt,versaclock5.yaml    | 2 --
 .../devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml         | 1 -
 Documentation/devicetree/bindings/input/input.yaml              | 1 -
 Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml   | 1 -
 Documentation/devicetree/bindings/net/qcom,ipa.yaml             | 1 -
 .../devicetree/bindings/power/supply/sc2731-charger.yaml        | 2 +-
 Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml          | 2 +-
 7 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
index c268debe5b8d5..28675b0b80f1b 100644
--- a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
+++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
@@ -60,7 +60,6 @@ properties:
     maxItems: 2
 
   idt,xtal-load-femtofarads:
-    $ref: /schemas/types.yaml#/definitions/uint32
     minimum: 9000
     maximum: 22760
     description: Optional load capacitor for XTAL1 and XTAL2
@@ -84,7 +83,6 @@ patternProperties:
         enum: [ 1800000, 2500000, 3300000 ]
       idt,slew-percent:
         description: The Slew rate control for CMOS single-ended.
-        $ref: /schemas/types.yaml#/definitions/uint32
         enum: [ 80, 85, 90, 100 ]
 
 required:
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
index 6f2398cdc82dd..1e7894e524f9b 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
@@ -102,7 +102,6 @@ patternProperties:
 
       st,adc-channel-names:
         description: List of single-ended channel names.
-        $ref: /schemas/types.yaml#/definitions/string-array
 
       st,filter-order:
         description: |
diff --git a/Documentation/devicetree/bindings/input/input.yaml b/Documentation/devicetree/bindings/input/input.yaml
index 74244d21d2b3f..d41d8743aad4e 100644
--- a/Documentation/devicetree/bindings/input/input.yaml
+++ b/Documentation/devicetree/bindings/input/input.yaml
@@ -38,6 +38,5 @@ properties:
       Duration in seconds which the key should be kept pressed for device to
       reset automatically. Device with key pressed reset feature can specify
       this property.
-    $ref: /schemas/types.yaml#/definitions/uint32
 
 additionalProperties: true
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
index cb6498108b78a..36c955965d902 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
@@ -92,7 +92,6 @@ properties:
       this interconnect to send RPMh commands.
 
   qcom,bcm-voter-names:
-    $ref: /schemas/types.yaml#/definitions/string-array
     description: |
       Names for each of the qcom,bcm-voters specified.
 
diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 7443490d4cc6d..5fe6d3dceb082 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -105,7 +105,6 @@ properties:
       - description: Whether the IPA clock is enabled (if valid)
 
   qcom,smem-state-names:
-    $ref: /schemas/types.yaml#/definitions/string-array
     description: The names of the state bits used for SMP2P output
     items:
       - const: ipa-clock-enabled-valid
diff --git a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
index db1aa238cda53..b62c2431f94e7 100644
--- a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
@@ -20,7 +20,7 @@ properties:
     maxItems: 1
 
   phys:
-    $ref: /schemas/types.yaml#/definitions/phandle
+    maxItems: 1
     description: phandle to the USB phy
 
   monitored-battery:
diff --git a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
index b4c190bddd84c..61802a11baf4e 100644
--- a/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
+++ b/Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
@@ -49,7 +49,7 @@ properties:
     maxItems: 1
 
   memory-region:
-    $ref: /schemas/types.yaml#/definitions/phandle
+    maxItems: 1
     description:
       phandle to a node describing reserved memory (System RAM memory)
       The M core can't access all the DDR memory space on some platform,
-- 
GitLab


From 3c814519743a919f8b3c236c0565e24709806d66 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 17 May 2021 10:19:54 -0400
Subject: [PATCH 0860/3804] MAINTAINERS: net: remove stale website link

The http://www.linuxfoundation.org/en/Net does not contain networking
subsystem description ("Nothing found").

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1d834bebf469c..c1cb2e38ae2ed 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12709,7 +12709,6 @@ M:	"David S. Miller" <davem@davemloft.net>
 M:	Jakub Kicinski <kuba@kernel.org>
 L:	netdev@vger.kernel.org
 S:	Maintained
-W:	http://www.linuxfoundation.org/en/Net
 Q:	https://patchwork.kernel.org/project/netdevbpf/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
@@ -12754,7 +12753,6 @@ M:	"David S. Miller" <davem@davemloft.net>
 M:	Jakub Kicinski <kuba@kernel.org>
 L:	netdev@vger.kernel.org
 S:	Maintained
-W:	http://www.linuxfoundation.org/en/Net
 Q:	https://patchwork.kernel.org/project/netdevbpf/list/
 B:	mailto:netdev@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
-- 
GitLab


From 35d96e631860226d5dc4de0fad0a415362ec2457 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 16:13:35 +0200
Subject: [PATCH 0861/3804] bonding: init notify_work earlier to avoid
 uninitialized use

If bond_kobj_init() or later kzalloc() in bond_alloc_slave() fail,
then we call kobject_put() on the slave->kobj. This in turn calls
the release function slave_kobj_release() which will always try to
cancel_delayed_work_sync(&slave->notify_work), which shouldn't be
done on an uninitialized work struct.

Always initialize the work struct earlier to avoid problems here.

Syzbot bisected this down to a completely pointless commit, some
fault injection may have been at work here that caused the alloc
failure in the first place, which may interact badly with bisect.

Reported-by: syzbot+bfda097c12a00c8cae67@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 20bbda1b36e1e..c5a646d06102a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1526,6 +1526,7 @@ static struct slave *bond_alloc_slave(struct bonding *bond,
 
 	slave->bond = bond;
 	slave->dev = slave_dev;
+	INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work);
 
 	if (bond_kobj_init(slave))
 		return NULL;
@@ -1538,7 +1539,6 @@ static struct slave *bond_alloc_slave(struct bonding *bond,
 			return NULL;
 		}
 	}
-	INIT_DELAYED_WORK(&slave->notify_work, bond_netdev_notify_work);
 
 	return slave;
 }
-- 
GitLab


From 444d7be9532dcfda8e0385226c862fd7e986f607 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Mon, 17 May 2021 10:47:06 +0200
Subject: [PATCH 0862/3804] net/smc: remove device from smcd_dev_list after
 failed device_add()

If the device_add() for a smcd_dev fails, there's no cleanup step that
rolls back the earlier list_add(). The device subsequently gets freed,
and we end up with a corrupted list.

Add some error handling that removes the device from the list.

Fixes: c6ba7c9ba43d ("net/smc: add base infrastructure for SMC-D and ISM")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_ism.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 9c6e95882553e..d24b96ea0eb5c 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -428,6 +428,8 @@ EXPORT_SYMBOL_GPL(smcd_alloc_dev);
 
 int smcd_register_dev(struct smcd_dev *smcd)
 {
+	int rc;
+
 	mutex_lock(&smcd_dev_list.mutex);
 	if (list_empty(&smcd_dev_list.list)) {
 		u8 *system_eid = NULL;
@@ -447,7 +449,14 @@ int smcd_register_dev(struct smcd_dev *smcd)
 			    dev_name(&smcd->dev), smcd->pnetid,
 			    smcd->pnetid_by_user ? " (user defined)" : "");
 
-	return device_add(&smcd->dev);
+	rc = device_add(&smcd->dev);
+	if (rc) {
+		mutex_lock(&smcd_dev_list.mutex);
+		list_del(&smcd->list);
+		mutex_unlock(&smcd_dev_list.mutex);
+	}
+
+	return rc;
 }
 EXPORT_SYMBOL_GPL(smcd_register_dev);
 
-- 
GitLab


From 1d482e666b8e74c7555dbdfbfb77205eeed3ff2d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 16:38:09 +0200
Subject: [PATCH 0863/3804] netlink: disable IRQs for netlink_lock_table()

Syzbot reports that in mac80211 we have a potential deadlock
between our "local->stop_queue_reasons_lock" (spinlock) and
netlink's nl_table_lock (rwlock). This is because there's at
least one situation in which we might try to send a netlink
message with this spinlock held while it is also possible to
take the spinlock from a hardirq context, resulting in the
following deadlock scenario reported by lockdep:

       CPU0                    CPU1
       ----                    ----
  lock(nl_table_lock);
                               local_irq_disable();
                               lock(&local->queue_stop_reason_lock);
                               lock(nl_table_lock);
  <Interrupt>
    lock(&local->queue_stop_reason_lock);

This seems valid, we can take the queue_stop_reason_lock in
any kind of context ("CPU0"), and call ieee80211_report_ack_skb()
with the spinlock held and IRQs disabled ("CPU1") in some
code path (ieee80211_do_stop() via ieee80211_free_txskb()).

Short of disallowing netlink use in scenarios like these
(which would be rather complex in mac80211's case due to
the deep callchain), it seems the only fix for this is to
disable IRQs while nl_table_lock is held to avoid hitting
this scenario, this disallows the "CPU0" portion of the
reported deadlock.

Note that the writer side (netlink_table_grab()) already
disables IRQs for this lock.

Unfortunately though, this seems like a huge hammer, and
maybe the whole netlink table locking should be reworked.

Reported-by: syzbot+69ff9dff50dcfe14ddd4@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3a62f97acf39d..6133e412b948c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -461,11 +461,13 @@ void netlink_table_ungrab(void)
 static inline void
 netlink_lock_table(void)
 {
+	unsigned long flags;
+
 	/* read_lock() synchronizes us to netlink_table_grab */
 
-	read_lock(&nl_table_lock);
+	read_lock_irqsave(&nl_table_lock, flags);
 	atomic_inc(&nl_table_users);
-	read_unlock(&nl_table_lock);
+	read_unlock_irqrestore(&nl_table_lock, flags);
 }
 
 static inline void
-- 
GitLab


From 5aec55b46c6238506cdf0c60cd0e42ab77a1e5e0 Mon Sep 17 00:00:00 2001
From: Catherine Sullivan <csully@google.com>
Date: Mon, 17 May 2021 14:08:11 -0700
Subject: [PATCH 0864/3804] gve: Check TX QPL was actually assigned

Correctly check the TX QPL was assigned and unassigned if
other steps in the allocation fail.

Fixes: f5cedc84a30d (gve: Add transmit and receive support)
Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_tx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 6938f3a939d64..bb57c42872b48 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -212,10 +212,11 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
 	tx->dev = &priv->pdev->dev;
 	if (!tx->raw_addressing) {
 		tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
-
+		if (!tx->tx_fifo.qpl)
+			goto abort_with_desc;
 		/* map Tx FIFO */
 		if (gve_tx_fifo_init(priv, &tx->tx_fifo))
-			goto abort_with_desc;
+			goto abort_with_qpl;
 	}
 
 	tx->q_resources =
@@ -236,6 +237,9 @@ static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
 abort_with_fifo:
 	if (!tx->raw_addressing)
 		gve_tx_fifo_release(priv, &tx->tx_fifo);
+abort_with_qpl:
+	if (!tx->raw_addressing)
+		gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
 abort_with_desc:
 	dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
 	tx->desc = NULL;
-- 
GitLab


From e96b491a0ffa35a8a9607c193fa4d894ca9fb32f Mon Sep 17 00:00:00 2001
From: David Awogbemila <awogbemila@google.com>
Date: Mon, 17 May 2021 14:08:12 -0700
Subject: [PATCH 0865/3804] gve: Update mgmt_msix_idx if num_ntfy changes

If we do not get the expected number of vectors from
pci_enable_msix_range, we update priv->num_ntfy_blks but not
priv->mgmt_msix_idx. This patch fixes this so that priv->mgmt_msix_idx
is updated accordingly.

Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 7302498c6df36..64192942ca53a 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -220,6 +220,7 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
 		int vecs_left = new_num_ntfy_blks % 2;
 
 		priv->num_ntfy_blks = new_num_ntfy_blks;
+		priv->mgmt_msix_idx = priv->num_ntfy_blks;
 		priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues,
 						vecs_per_type);
 		priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues,
-- 
GitLab


From 5218e919c8d06279884aa0baf76778a6817d5b93 Mon Sep 17 00:00:00 2001
From: David Awogbemila <awogbemila@google.com>
Date: Mon, 17 May 2021 14:08:13 -0700
Subject: [PATCH 0866/3804] gve: Add NULL pointer checks when freeing irqs.

When freeing notification blocks, we index priv->msix_vectors.
If we failed to allocate priv->msix_vectors (see abort_with_msix_vectors)
this could lead to a NULL pointer dereference if the driver is unloaded.

Fixes: 893ce44df565 ("gve: Add basic driver framework for Compute Engine Virtual NIC")
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Brujin <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_main.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 64192942ca53a..21a5d058dab44 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -301,20 +301,22 @@ static void gve_free_notify_blocks(struct gve_priv *priv)
 {
 	int i;
 
-	/* Free the irqs */
-	for (i = 0; i < priv->num_ntfy_blks; i++) {
-		struct gve_notify_block *block = &priv->ntfy_blocks[i];
-		int msix_idx = i;
-
-		irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
-				      NULL);
-		free_irq(priv->msix_vectors[msix_idx].vector, block);
+	if (priv->msix_vectors) {
+		/* Free the irqs */
+		for (i = 0; i < priv->num_ntfy_blks; i++) {
+			struct gve_notify_block *block = &priv->ntfy_blocks[i];
+			int msix_idx = i;
+
+			irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector,
+					      NULL);
+			free_irq(priv->msix_vectors[msix_idx].vector, block);
+		}
+		free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 	}
 	dma_free_coherent(&priv->pdev->dev,
 			  priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks),
 			  priv->ntfy_blocks, priv->ntfy_block_bus);
 	priv->ntfy_blocks = NULL;
-	free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv);
 	pci_disable_msix(priv->pdev);
 	kvfree(priv->msix_vectors);
 	priv->msix_vectors = NULL;
-- 
GitLab


From f81781835f0adfae8d701545386030d223efcd6f Mon Sep 17 00:00:00 2001
From: Catherine Sullivan <csully@google.com>
Date: Mon, 17 May 2021 14:08:14 -0700
Subject: [PATCH 0867/3804] gve: Upgrade memory barrier in poll routine

As currently written, if the driver checks for more work (via
gve_tx_poll or gve_rx_poll) before the device posts work and the
irq doorbell is not unmasked
(via iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, ...)) before the device
attempts to raise an interrupt, an interrupt is lost and this could
potentially lead to the traffic being completely halted. For
example, if a tx queue has already been stopped, the driver won't get
the chance to complete work and egress will be halted.

We need a full memory barrier in the poll
routine to ensure that the irq doorbell is unmasked before the driver
checks for more work.

Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
Signed-off-by: Catherine Sullivan <csully@google.com>
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Brujin <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 21a5d058dab44..bbc423e931223 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -180,7 +180,7 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
 	/* Double check we have no extra work.
 	 * Ensure unmask synchronizes with checking for work.
 	 */
-	dma_rmb();
+	mb();
 	if (block->tx)
 		reschedule |= gve_tx_poll(block, -1);
 	if (block->rx)
-- 
GitLab


From fbd4a28b4fa66faaa7f510c0adc531d37e0a7848 Mon Sep 17 00:00:00 2001
From: David Awogbemila <awogbemila@google.com>
Date: Mon, 17 May 2021 14:08:15 -0700
Subject: [PATCH 0868/3804] gve: Correct SKB queue index validation.

SKBs with skb_get_queue_mapping(skb) == tx_cfg.num_queues should also be
considered invalid.

Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
Signed-off-by: David Awogbemila <awogbemila@google.com>
Acked-by: Willem de Brujin <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/google/gve/gve_tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index bb57c42872b48..3e04a3973d680 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -593,7 +593,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
 	struct gve_tx_ring *tx;
 	int nsegs;
 
-	WARN(skb_get_queue_mapping(skb) > priv->tx_cfg.num_queues,
+	WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues,
 	     "skb queue index out of range");
 	tx = &priv->tx[skb_get_queue_mapping(skb)];
 	if (unlikely(gve_maybe_stop_tx(tx, skb))) {
-- 
GitLab


From 1dde47a66d4fb181830d6fa000e5ea86907b639e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 17 May 2021 12:04:13 +0300
Subject: [PATCH 0869/3804] net: mdiobus: get rid of a BUG_ON()

We spotted a bug recently during a review where a driver was
unregistering a bus that wasn't registered, which would trigger this
BUG_ON().  Let's handle that situation more gracefully, and just print
a warning and return.

Reported-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index dadf75ff3ab93..6045ad3def123 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -607,7 +607,8 @@ void mdiobus_unregister(struct mii_bus *bus)
 	struct mdio_device *mdiodev;
 	int i;
 
-	BUG_ON(bus->state != MDIOBUS_REGISTERED);
+	if (WARN_ON_ONCE(bus->state != MDIOBUS_REGISTERED))
+		return;
 	bus->state = MDIOBUS_UNREGISTERED;
 
 	for (i = 0; i < PHY_MAX_ADDR; i++) {
-- 
GitLab


From f0fb26c456a30d6009faa2c9d44aa22f5bf88c90 Mon Sep 17 00:00:00 2001
From: Chu Lin <linchuyuan@google.com>
Date: Mon, 17 May 2021 22:26:06 +0000
Subject: [PATCH 0870/3804] hwmon/pmbus: (q54sj108a2) The PMBUS_MFR_ID is
 actually 6 chars instead of 5

The PMBUS_MFR_ID block is actually 6 chars for q54sj108a2.
/sys/bus/i2c/drivers/q54sj108a2_test# iotools smbus_read8 $BUS $ADDR 0x99
0x06

Tested: Devices are able to bind to the q54sj108a2 driver successfully.

Signed-off-by: Chu Lin <linchuyuan@google.com>
Link: https://lore.kernel.org/r/20210517222606.3457594-1-linchuyuan@google.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/q54sj108a2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/q54sj108a2.c b/drivers/hwmon/pmbus/q54sj108a2.c
index b6e8b20466f15..fa298b4265a1c 100644
--- a/drivers/hwmon/pmbus/q54sj108a2.c
+++ b/drivers/hwmon/pmbus/q54sj108a2.c
@@ -299,7 +299,7 @@ static int q54sj108a2_probe(struct i2c_client *client)
 		dev_err(&client->dev, "Failed to read Manufacturer ID\n");
 		return ret;
 	}
-	if (ret != 5 || strncmp(buf, "DELTA", 5)) {
+	if (ret != 6 || strncmp(buf, "DELTA", 5)) {
 		buf[ret] = '\0';
 		dev_err(dev, "Unsupported Manufacturer ID '%s'\n", buf);
 		return -ENODEV;
-- 
GitLab


From c67d734975a25ba7b6e8f820c13e0d8eb4a2a77c Mon Sep 17 00:00:00 2001
From: Milian Wolff <milian.wolff@kdab.com>
Date: Thu, 29 Apr 2021 20:57:59 +0200
Subject: [PATCH 0871/3804] perf buildid-list: Initialize zstd_data

Fixes segmentation fault when trying to obtain buildid list (e.g. via
perf-archive) from a zstd-compressed `perf.data` file:

```
    $ perf record -z ls
    ...
    [ perf record: Captured and wrote 0,010 MB perf.data, compressed (original 0,001 MB, ratio is 2,190) ]
    $ memcheck perf buildid-list
    ...
    ==57268== Invalid read of size 4
    ==57268==    at 0x5260D88: ZSTD_decompressStream (in /usr/lib/libzstd.so.1.4.9)
    ==57268==    by 0x4BB51B: zstd_decompress_stream (zstd.c:100)
    ==57268==    by 0x425C6C: perf_session__process_compressed_event (session.c:73)
    ==57268==    by 0x427450: perf_session__process_user_event (session.c:1631)
    ==57268==    by 0x42A609: reader__process_events (session.c:2207)
    ==57268==    by 0x42A609: __perf_session__process_events (session.c:2264)
    ==57268==    by 0x42A609: perf_session__process_events (session.c:2297)
    ==57268==    by 0x343A62: perf_session__list_build_ids (builtin-buildid-list.c:88)
    ==57268==    by 0x343A62: cmd_buildid_list (builtin-buildid-list.c:120)
    ==57268==    by 0x3C7732: run_builtin (perf.c:313)
    ==57268==    by 0x331157: handle_internal_command (perf.c:365)
    ==57268==    by 0x331157: run_argv (perf.c:409)
    ==57268==    by 0x331157: main (perf.c:539)
    ==57268==  Address 0x7470 is not stack'd, malloc'd or (recently) free'd
```

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Link: http://lore.kernel.org/lkml/20210429185759.59870-1-milian.wolff@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-buildid-list.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 87f5b1a4a7fa8..833405c27dae2 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -80,6 +80,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
 	if (!perf_header__has_feat(&session->header, HEADER_BUILD_ID))
 		with_hits = true;
 
+	if (zstd_init(&(session->zstd_data), 0) < 0)
+		pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
+
 	/*
 	 * in pipe-mode, the only way to get the buildids is to parse
 	 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
-- 
GitLab


From 3c91e8efaf4838e4c8e465656e9707b5de26f3db Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 12 May 2021 12:35:07 -0300
Subject: [PATCH 0872/3804] tools arch kvm: Sync kvm headers with the kernel
 sources

To pick up the changes from:

  70f094f4f01dc4d6 ("KVM: nVMX: Properly pad 'struct kvm_vmx_nested_state_hdr'")

That don't entail changes in tooling.

This silences these tools/perf build warnings:

  Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/kvm.h' differs from latest version at 'arch/x86/include/uapi/asm/kvm.h'
  diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/uapi/asm/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 5a3022c8af82b..0662f644aad9d 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
 		__u16 flags;
 	} smm;
 
+	__u16 pad;
+
 	__u32 flags;
 	__u64 preemption_timer_deadline;
 };
-- 
GitLab


From 963cdcc37e98b8dd2894a4a5d48c9d5fe0ae903b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:33:19 -0600
Subject: [PATCH 0873/3804] tee: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Acked-by: Jens Wiklander <jens.wiklander@linaro.org>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/tee/tee_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index 480d294a23ab0..2b37bc408fc3d 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -452,6 +452,7 @@ static int params_to_user(struct tee_ioctl_param __user *uparams,
 		case TEE_IOCTL_PARAM_ATTR_TYPE_MEMREF_INOUT:
 			if (put_user((u64)p->u.memref.size, &up->b))
 				return -EFAULT;
+			break;
 		default:
 			break;
 		}
-- 
GitLab


From 47ce0b65bfb337a7bb4958b076ef8d2865d6d07c Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:34:24 -0600
Subject: [PATCH 0874/3804] atm: fore200e: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a fallthrough pseudo-keyword.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/atm/fore200e.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index b508df2ecadab..fb2be3574c26a 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -420,6 +420,7 @@ fore200e_shutdown(struct fore200e* fore200e)
 	/* XXX shouldn't we *start* by deregistering the device? */
 	atm_dev_deregister(fore200e->atm_dev);
 
+	fallthrough;
     case FORE200E_STATE_BLANK:
 	/* nothing to do for that state */
 	break;
-- 
GitLab


From 18a0e8d0f103af40c82f751fabb0b4cb0bf2f32a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:32:51 -0600
Subject: [PATCH 0875/3804] watchdog: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a fallthrough pseudo-keyword instead of letting the
code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/watchdog/machzwd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/watchdog/machzwd.c b/drivers/watchdog/machzwd.c
index 743377c5b1735..73f2221f6222f 100644
--- a/drivers/watchdog/machzwd.c
+++ b/drivers/watchdog/machzwd.c
@@ -174,6 +174,7 @@ static inline void zf_set_timer(unsigned short new, unsigned char n)
 		fallthrough;
 	case WD2:
 		zf_writeb(COUNTER_2, new > 0xff ? 0xff : new);
+		fallthrough;
 	default:
 		return;
 	}
-- 
GitLab


From fc7980915a8601b9a54c547a74dad0fdffc674a5 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:32:46 -0600
Subject: [PATCH 0876/3804] vxge: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a return statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/net/ethernet/neterion/vxge/vxge-config.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.c b/drivers/net/ethernet/neterion/vxge/vxge-config.c
index 5162b938a1ac0..b47d74743f5a4 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.c
@@ -3784,6 +3784,7 @@ vxge_hw_rts_rth_data0_data1_get(u32 j, u64 *data0, u64 *data1,
 			VXGE_HW_RTS_ACCESS_STEER_DATA1_RTH_ITEM1_ENTRY_EN |
 			VXGE_HW_RTS_ACCESS_STEER_DATA1_RTH_ITEM1_BUCKET_DATA(
 			itable[j]);
+		return;
 	default:
 		return;
 	}
-- 
GitLab


From c3754da3b7391006eaafa41fb28239268655afa5 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:32:14 -0600
Subject: [PATCH 0877/3804] reiserfs: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 fs/reiserfs/namei.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 017db70d0f486..3d7a35d6a18bc 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -132,6 +132,7 @@ int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
 			return IO_ERROR;
 		}
 		PATH_LAST_POSITION(path)--;
+		break;
 
 	case ITEM_FOUND:
 		break;
-- 
GitLab


From ebd0476256bea64fb0146f28a079c9aa9ce670c0 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:31:30 -0600
Subject: [PATCH 0878/3804] nfp: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Acked-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index b3cabc274121b..3b8e675087dea 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -103,6 +103,7 @@ nfp_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
 	case NFP_PORT_PF_PORT:
 	case NFP_PORT_VF_PORT:
 		nfp_repr_vnic_get_stats64(repr->port, stats);
+		break;
 	default:
 		break;
 	}
-- 
GitLab


From 9b8b84c168b6c1e033a21b7a6880550a802ad378 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:31:24 -0600
Subject: [PATCH 0879/3804] netxen_nic: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a goto statement instead of just letting the code
fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
index 08f9477d2ee84..35ec9aab3dc7b 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
@@ -1685,6 +1685,7 @@ netxen_process_rcv_ring(struct nx_host_sds_ring *sds_ring, int max)
 			break;
 		case NETXEN_NIC_RESPONSE_DESC:
 			netxen_handle_fw_message(desc_cnt, consumer, sds_ring);
+			goto skip;
 		default:
 			goto skip;
 		}
-- 
GitLab


From cc9fd18032efada6433712f52de8d98dfbd00fd2 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:27:50 -0600
Subject: [PATCH 0880/3804] bnxt_en: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of just letting the code
fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2985844634c8b..be150ea01bd1c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2183,6 +2183,7 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp)
 	case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT:
 		bnxt_async_event_process(bp,
 					 (struct hwrm_async_event_cmpl *)txcmp);
+		break;
 
 	default:
 		break;
-- 
GitLab


From d66aea44b0ac7b58d9da8269ebd053f164e4aa54 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:27:00 -0600
Subject: [PATCH 0881/3804] qlcnic: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding a break and a goto statements instead of
just letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c   | 1 +
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index bdf15d2a64313..af4c516a9e7cd 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -1390,6 +1390,7 @@ static int qlcnic_process_rcv_ring(struct qlcnic_host_sds_ring *sds_ring, int ma
 			break;
 		case QLCNIC_RESPONSE_DESC:
 			qlcnic_handle_fw_message(desc_cnt, consumer, sds_ring);
+			goto skip;
 		default:
 			goto skip;
 		}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 96b947fde646b..8966f1bcda77a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -3455,6 +3455,7 @@ wait_npar:
 			adapter->fw_wait_cnt = 0;
 			return;
 		}
+		break;
 	case QLCNIC_DEV_FAILED:
 		break;
 	default:
-- 
GitLab


From 79121184f8e7c41c0ffe483f402b9d7f89256698 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:25:57 -0600
Subject: [PATCH 0882/3804] ipv4: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple break statements instead of just
letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 net/ipv4/ah4.c           | 1 +
 net/ipv4/esp4.c          | 1 +
 net/ipv4/fib_semantics.c | 1 +
 net/ipv4/ip_vti.c        | 1 +
 net/ipv4/ipcomp.c        | 1 +
 5 files changed, 5 insertions(+)

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 36ed85bf2ad51..fab0958c41bed 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -450,6 +450,7 @@ static int ah4_err(struct sk_buff *skb, u32 info)
 	case ICMP_DEST_UNREACH:
 		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 			return 0;
+		break;
 	case ICMP_REDIRECT:
 		break;
 	default:
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 35803ab7ac804..8e3b445a8c219 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -982,6 +982,7 @@ static int esp4_err(struct sk_buff *skb, u32 info)
 	case ICMP_DEST_UNREACH:
 		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 			return 0;
+		break;
 	case ICMP_REDIRECT:
 		break;
 	default:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a632b66bc13ab..4c0c33e4710da 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1874,6 +1874,7 @@ static int call_fib_nh_notifiers(struct fib_nh *nh,
 		    (nh->fib_nh_flags & RTNH_F_DEAD))
 			return call_fib4_notifiers(dev_net(nh->fib_nh_dev),
 						   event_type, &info.info);
+		break;
 	default:
 		break;
 	}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 31c6c6d99d5ec..eb560eecee089 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -351,6 +351,7 @@ static int vti4_err(struct sk_buff *skb, u32 info)
 	case ICMP_DEST_UNREACH:
 		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 			return 0;
+		break;
 	case ICMP_REDIRECT:
 		break;
 	default:
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index b42683212c659..bbb56f5e06dde 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -31,6 +31,7 @@ static int ipcomp4_err(struct sk_buff *skb, u32 info)
 	case ICMP_DEST_UNREACH:
 		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 			return 0;
+		break;
 	case ICMP_REDIRECT:
 		break;
 	default:
-- 
GitLab


From f5e9724c42d94b9acf061cc8225c4778b22186b7 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:34:35 -0600
Subject: [PATCH 0883/3804] braille_console: Fix fall-through warnings for
 Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/accessibility/braille/braille_console.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c
index 9861302cc7dbb..359bead4b2805 100644
--- a/drivers/accessibility/braille/braille_console.c
+++ b/drivers/accessibility/braille/braille_console.c
@@ -246,6 +246,7 @@ static int keyboard_notifier_call(struct notifier_block *blk,
 				beep(440);
 		}
 	}
+		break;
 	case KBD_UNBOUND_KEYCODE:
 	case KBD_UNICODE:
 	case KBD_KEYSYM:
-- 
GitLab


From 5ef73b6e652107bbdcf65d10a477cfc027ee9090 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:35:59 -0600
Subject: [PATCH 0884/3804] firewire: core: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a fallthrough pseudo-keyword.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/firewire/core-topology.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index ec68ed27b0a5f..b63d55f5ebd33 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -58,6 +58,7 @@ static u32 *count_ports(u32 *sid, int *total_port_count, int *child_port_count)
 		case SELFID_PORT_PARENT:
 		case SELFID_PORT_NCONN:
 			(*total_port_count)++;
+			fallthrough;
 		case SELFID_PORT_NONE:
 			break;
 		}
-- 
GitLab


From 3752445d79ee73fc2cb08c35c82890b2ef5c9757 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:36:04 -0600
Subject: [PATCH 0885/3804] hwmon: (corsair-cpro) Fix fall-through warnings for
 Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Acked-by: Guenter Roeck <linux@roeck-us.net>
Marius Zachmann <mail@mariuszachmann.de>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/hwmon/corsair-cpro.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/corsair-cpro.c b/drivers/hwmon/corsair-cpro.c
index 591929ec217a6..fa6aa4fc8b521 100644
--- a/drivers/hwmon/corsair-cpro.c
+++ b/drivers/hwmon/corsair-cpro.c
@@ -310,6 +310,7 @@ static int ccp_write(struct device *dev, enum hwmon_sensor_types type,
 		default:
 			break;
 		}
+		break;
 	default:
 		break;
 	}
-- 
GitLab


From 58e31cf015e68e2696cbced6f2128ec68162ef17 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:36:09 -0600
Subject: [PATCH 0886/3804] hwmon: (max6621) Fix fall-through warnings for
 Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/hwmon/max6621.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/max6621.c b/drivers/hwmon/max6621.c
index 367855d5edaee..7821132e17faa 100644
--- a/drivers/hwmon/max6621.c
+++ b/drivers/hwmon/max6621.c
@@ -156,7 +156,7 @@ max6621_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr,
 		default:
 			break;
 		}
-
+		break;
 	default:
 		break;
 	}
-- 
GitLab


From 731d5f441e1c6c1c4f012ac43b644f63e9ae8478 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:36:20 -0600
Subject: [PATCH 0887/3804] ide: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/ide/siimage.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
index c4b20f350b84b..145c0ab3a856d 100644
--- a/drivers/ide/siimage.c
+++ b/drivers/ide/siimage.c
@@ -493,6 +493,7 @@ static int init_chipset_siimage(struct pci_dev *dev)
 	case 0x30:
 		/* Clocking is disabled, attempt to force 133MHz clocking. */
 		sil_iowrite8(dev, tmp & ~0x20, scsc_addr);
+		break;
 	case 0x10:
 		/* On 133Mhz clocking. */
 		break;
-- 
GitLab


From 6518e3fc972ed54772f81e8b89b17be47f6d55f3 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:38:15 -0600
Subject: [PATCH 0888/3804] net: netrom: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple break statements instead of
letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 net/netrom/nr_route.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 78da5eab252a0..de0456073dc0b 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -266,6 +266,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
 		fallthrough;
 	case 2:
 		re_sort_routes(nr_node, 0, 1);
+		break;
 	case 1:
 		break;
 	}
@@ -359,6 +360,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n
 					fallthrough;
 				case 1:
 					nr_node->routes[1] = nr_node->routes[2];
+					fallthrough;
 				case 2:
 					break;
 				}
@@ -482,6 +484,7 @@ static int nr_dec_obs(void)
 					fallthrough;
 				case 1:
 					s->routes[1] = s->routes[2];
+					break;
 				case 2:
 					break;
 				}
@@ -529,6 +532,7 @@ void nr_rt_device_down(struct net_device *dev)
 							fallthrough;
 						case 1:
 							t->routes[1] = t->routes[2];
+							break;
 						case 2:
 							break;
 						}
-- 
GitLab


From 5af5a020ddd10afc6caff05b4c941c2b1c17bf1d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:38:20 -0600
Subject: [PATCH 0889/3804] net/packet: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 net/packet/af_packet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ba96db1880eae..bfd805d6d79d2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1655,6 +1655,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
 	case PACKET_FANOUT_ROLLOVER:
 		if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
 			return -EINVAL;
+		break;
 	case PACKET_FANOUT_HASH:
 	case PACKET_FANOUT_LB:
 	case PACKET_FANOUT_CPU:
-- 
GitLab


From 3754fa747dc0fb41cd1fd9a79bcb442dfe6802b3 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:38:47 -0600
Subject: [PATCH 0890/3804] rds: Fix fall-through warnings for Clang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple break statements instead of
letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 net/rds/tcp_connect.c | 1 +
 net/rds/threads.c     | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 4e64598176b05..5461d77fff4f4 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -78,6 +78,7 @@ void rds_tcp_state_change(struct sock *sk)
 	case TCP_CLOSE_WAIT:
 	case TCP_CLOSE:
 		rds_conn_path_drop(cp, false);
+		break;
 	default:
 		break;
 	}
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 32dc50f0a3031..1f424cbfcbb47 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -208,6 +208,7 @@ void rds_send_worker(struct work_struct *work)
 		case -ENOMEM:
 			rds_stats_inc(s_send_delayed_retry);
 			queue_delayed_work(rds_wq, &cp->cp_send_w, 2);
+			break;
 		default:
 			break;
 		}
@@ -232,6 +233,7 @@ void rds_recv_worker(struct work_struct *work)
 		case -ENOMEM:
 			rds_stats_inc(s_recv_delayed_retry);
 			queue_delayed_work(rds_wq, &cp->cp_recv_w, 2);
+			break;
 		default:
 			break;
 		}
-- 
GitLab


From 0572b37b27f4f26bfd53f0f10186fa1783b97421 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:39:41 -0600
Subject: [PATCH 0891/3804] sctp: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a couple
of warnings by explicitly adding a break statement and replacing a
comment with a goto statement instead of letting the code fall through
to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 net/sctp/input.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index d508f6f3dd08a..5ceaf75105bae 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -633,7 +633,7 @@ int sctp_v4_err(struct sk_buff *skb, __u32 info)
 		break;
 	case ICMP_REDIRECT:
 		sctp_icmp_redirect(sk, transport, skb);
-		/* Fall through to out_unlock. */
+		goto out_unlock;
 	default:
 		goto out_unlock;
 	}
@@ -1236,6 +1236,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
 						net, ch, laddr,
 						sctp_hdr(skb)->source,
 						transportp);
+			break;
 		default:
 			break;
 		}
-- 
GitLab


From 1c78ba4924107b06de60f887a0d20d9b3e9bc9e2 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:40:08 -0600
Subject: [PATCH 0892/3804] tipc: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 net/tipc/link.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1151092594302..bcc426e167259 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -649,6 +649,7 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt)
 			break;
 		case LINK_FAILOVER_BEGIN_EVT:
 			l->state = LINK_FAILINGOVER;
+			break;
 		case LINK_FAILURE_EVT:
 		case LINK_RESET_EVT:
 		case LINK_ESTABLISH_EVT:
-- 
GitLab


From 135436a7d2cdd505aacc142f7f57e388b23ba73e Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:41:00 -0600
Subject: [PATCH 0893/3804] xfrm: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 net/xfrm/xfrm_interface.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 8831f5a9e9923..41de46b5ffa94 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -432,6 +432,7 @@ static int xfrmi4_err(struct sk_buff *skb, u32 info)
 	case ICMP_DEST_UNREACH:
 		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
 			return 0;
+		break;
 	case ICMP_REDIRECT:
 		break;
 	default:
-- 
GitLab


From fea63d54f7a3e74f8ab489a8b82413a29849a594 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 17 May 2021 12:42:32 -0500
Subject: [PATCH 0894/3804] x86/sev-es: Move sev_es_put_ghcb() in prep for
 follow on patch

Move the location of sev_es_put_ghcb() in preparation for an update to it
in a follow-on patch. This will better highlight the changes being made
to the function.

No functional change.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/8c07662ec17d3d82e5c53841a1d9e766d3bdbab6.1621273353.git.thomas.lendacky@amd.com
---
 arch/x86/kernel/sev.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 9578c82832aa2..45e212675811c 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -221,24 +221,6 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
 	return ghcb;
 }
 
-static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
-{
-	struct sev_es_runtime_data *data;
-	struct ghcb *ghcb;
-
-	data = this_cpu_read(runtime_data);
-	ghcb = &data->ghcb_page;
-
-	if (state->ghcb) {
-		/* Restore GHCB from Backup */
-		*ghcb = *state->ghcb;
-		data->backup_ghcb_active = false;
-		state->ghcb = NULL;
-	} else {
-		data->ghcb_active = false;
-	}
-}
-
 /* Needed in vc_early_forward_exception */
 void do_early_exception(struct pt_regs *regs, int trapnr);
 
@@ -461,6 +443,24 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
 /* Include code shared with pre-decompression boot stage */
 #include "sev-shared.c"
 
+static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+{
+	struct sev_es_runtime_data *data;
+	struct ghcb *ghcb;
+
+	data = this_cpu_read(runtime_data);
+	ghcb = &data->ghcb_page;
+
+	if (state->ghcb) {
+		/* Restore GHCB from Backup */
+		*ghcb = *state->ghcb;
+		data->backup_ghcb_active = false;
+		state->ghcb = NULL;
+	} else {
+		data->ghcb_active = false;
+	}
+}
+
 void noinstr __sev_es_nmi_complete(void)
 {
 	struct ghcb_state state;
-- 
GitLab


From a50c5bebc99c525e7fbc059988c6a5ab8680cb76 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Mon, 17 May 2021 12:42:33 -0500
Subject: [PATCH 0895/3804] x86/sev-es: Invalidate the GHCB after completing
 VMGEXIT

Since the VMGEXIT instruction can be issued from userspace, invalidate
the GHCB after performing VMGEXIT processing in the kernel.

Invalidation is only required after userspace is available, so call
vc_ghcb_invalidate() from sev_es_put_ghcb(). Update vc_ghcb_invalidate()
to additionally clear the GHCB exit code so that it is always presented
as 0 when VMGEXIT has been issued by anything else besides the kernel.

Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/5a8130462e4f0057ee1184509cd056eedd78742b.1621273353.git.thomas.lendacky@amd.com
---
 arch/x86/kernel/sev-shared.c | 1 +
 arch/x86/kernel/sev.c        | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 6ec8b3bfd76eb..9f90f460a28cc 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -63,6 +63,7 @@ static bool sev_es_negotiate_protocol(void)
 
 static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
 {
+	ghcb->save.sw_exit_code = 0;
 	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
 }
 
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 45e212675811c..4fa111becc93b 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -457,6 +457,11 @@ static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
 		data->backup_ghcb_active = false;
 		state->ghcb = NULL;
 	} else {
+		/*
+		 * Invalidate the GHCB so a VMGEXIT instruction issued
+		 * from userspace won't appear to be valid.
+		 */
+		vc_ghcb_invalidate(ghcb);
 		data->ghcb_active = false;
 	}
 }
-- 
GitLab


From 673c7aa2436bfc857b92417f3e590a297c586dde Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Mon, 19 Apr 2021 18:46:30 +0200
Subject: [PATCH 0896/3804] optee: use export_uuid() to copy client UUID

Prior to this patch optee_open_session() was making assumptions about
the internal format of uuid_t by casting a memory location in a
parameter struct to uuid_t *. Fix this using export_uuid() to get a well
defined binary representation and also add an octets field in struct
optee_msg_param in order to avoid casting.

Fixes: c5b4312bea5d ("tee: optee: Add support for session login client UUID generation")
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/optee/call.c      | 6 ++++--
 drivers/tee/optee/optee_msg.h | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/tee/optee/call.c b/drivers/tee/optee/call.c
index 6132cc8d014c0..6e6eb836e9b62 100644
--- a/drivers/tee/optee/call.c
+++ b/drivers/tee/optee/call.c
@@ -220,6 +220,7 @@ int optee_open_session(struct tee_context *ctx,
 	struct optee_msg_arg *msg_arg;
 	phys_addr_t msg_parg;
 	struct optee_session *sess = NULL;
+	uuid_t client_uuid;
 
 	/* +2 for the meta parameters added below */
 	shm = get_msg_arg(ctx, arg->num_params + 2, &msg_arg, &msg_parg);
@@ -240,10 +241,11 @@ int optee_open_session(struct tee_context *ctx,
 	memcpy(&msg_arg->params[0].u.value, arg->uuid, sizeof(arg->uuid));
 	msg_arg->params[1].u.value.c = arg->clnt_login;
 
-	rc = tee_session_calc_client_uuid((uuid_t *)&msg_arg->params[1].u.value,
-					  arg->clnt_login, arg->clnt_uuid);
+	rc = tee_session_calc_client_uuid(&client_uuid, arg->clnt_login,
+					  arg->clnt_uuid);
 	if (rc)
 		goto out;
+	export_uuid(msg_arg->params[1].u.octets, &client_uuid);
 
 	rc = optee_to_msg_param(msg_arg->params + 2, arg->num_params, param);
 	if (rc)
diff --git a/drivers/tee/optee/optee_msg.h b/drivers/tee/optee/optee_msg.h
index 81ff593ac4ec2..e3d72d09c4848 100644
--- a/drivers/tee/optee/optee_msg.h
+++ b/drivers/tee/optee/optee_msg.h
@@ -9,7 +9,7 @@
 #include <linux/types.h>
 
 /*
- * This file defines the OP-TEE message protocol used to communicate
+ * This file defines the OP-TEE message protocol (ABI) used to communicate
  * with an instance of OP-TEE running in secure world.
  *
  * This file is divided into two sections.
@@ -144,9 +144,10 @@ struct optee_msg_param_value {
  * @tmem:	parameter by temporary memory reference
  * @rmem:	parameter by registered memory reference
  * @value:	parameter by opaque value
+ * @octets:	parameter by octet string
  *
  * @attr & OPTEE_MSG_ATTR_TYPE_MASK indicates if tmem, rmem or value is used in
- * the union. OPTEE_MSG_ATTR_TYPE_VALUE_* indicates value,
+ * the union. OPTEE_MSG_ATTR_TYPE_VALUE_* indicates value or octets,
  * OPTEE_MSG_ATTR_TYPE_TMEM_* indicates @tmem and
  * OPTEE_MSG_ATTR_TYPE_RMEM_* indicates @rmem,
  * OPTEE_MSG_ATTR_TYPE_NONE indicates that none of the members are used.
@@ -157,6 +158,7 @@ struct optee_msg_param {
 		struct optee_msg_param_tmem tmem;
 		struct optee_msg_param_rmem rmem;
 		struct optee_msg_param_value value;
+		u8 octets[24];
 	} u;
 };
 
-- 
GitLab


From 3c4e0147c269738a19c7d70cd32395600bcc0714 Mon Sep 17 00:00:00 2001
From: Maciej Falkowski <maciej.falkowski9@gmail.com>
Date: Thu, 1 Apr 2021 18:11:27 +0200
Subject: [PATCH 0897/3804] ARM: OMAP1: Fix use of possibly uninitialized irq
 variable

The current control flow of IRQ number assignment to `irq` variable
allows a request of IRQ of unspecified value,
generating a warning under Clang compilation with omap1_defconfig on
linux-next:

arch/arm/mach-omap1/pm.c:656:11: warning: variable 'irq' is used
uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
        else if (cpu_is_omap16xx())
                 ^~~~~~~~~~~~~~~~~
./arch/arm/mach-omap1/include/mach/soc.h:123:30: note: expanded from macro
'cpu_is_omap16xx'
                                        ^~~~~~~~~~~~~
arch/arm/mach-omap1/pm.c:658:18: note: uninitialized use occurs here
        if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup",
                        ^~~
arch/arm/mach-omap1/pm.c:656:7: note: remove the 'if' if its condition is
always true
        else if (cpu_is_omap16xx())
             ^~~~~~~~~~~~~~~~~~~~~~
arch/arm/mach-omap1/pm.c:611:9: note: initialize the variable 'irq' to
silence this warning
        int irq;
               ^
                = 0
1 warning generated.

The patch provides a default value to the `irq` variable
along with a validity check.

Signed-off-by: Maciej Falkowski <maciej.falkowski9@gmail.com>
Link: https://github.com/ClangBuiltLinux/linux/issues/1324
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/pm.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 2c1e2b32b9b36..a745d64d46995 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -655,9 +655,13 @@ static int __init omap_pm_init(void)
 		irq = INT_7XX_WAKE_UP_REQ;
 	else if (cpu_is_omap16xx())
 		irq = INT_1610_WAKE_UP_REQ;
-	if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup",
-			NULL))
-		pr_err("Failed to request irq %d (peripheral wakeup)\n", irq);
+	else
+		irq = -1;
+
+	if (irq >= 0) {
+		if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup", NULL))
+			pr_err("Failed to request irq %d (peripheral wakeup)\n", irq);
+	}
 
 	/* Program new power ramp-up time
 	 * (0 for most boards since we don't lower voltage when in deep sleep)
-- 
GitLab


From 7c302314f37b44595f180198fca5ca646bce4a5f Mon Sep 17 00:00:00 2001
From: Maciej Falkowski <maciej.falkowski9@gmail.com>
Date: Thu, 1 Apr 2021 18:20:32 +0200
Subject: [PATCH 0898/3804] ARM: OMAP1: isp1301-omap: Add missing
 gpiod_add_lookup_table function

The gpiod table was added without any usage making it unused
as reported by Clang compilation from omap1_defconfig on linux-next:

arch/arm/mach-omap1/board-h2.c:347:34: warning: unused variable
'isp1301_gpiod_table' [-Wunused-variable]
static struct gpiod_lookup_table isp1301_gpiod_table = {
                                 ^
1 warning generated.

The patch adds the missing gpiod_add_lookup_table() function.

Signed-off-by: Maciej Falkowski <maciej.falkowski9@gmail.com>
Fixes: f3ef38160e3d ("usb: isp1301-omap: Convert to use GPIO descriptors")
Link: https://github.com/ClangBuiltLinux/linux/issues/1325
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/board-h2.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index c40cf5ef86079..977b0b744c22a 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -320,7 +320,7 @@ static int tps_setup(struct i2c_client *client, void *context)
 {
 	if (!IS_BUILTIN(CONFIG_TPS65010))
 		return -ENOSYS;
-	
+
 	tps65010_config_vregs1(TPS_LDO2_ENABLE | TPS_VLDO2_3_0V |
 				TPS_LDO1_ENABLE | TPS_VLDO1_3_0V);
 
@@ -394,6 +394,8 @@ static void __init h2_init(void)
 	BUG_ON(gpio_request(H2_NAND_RB_GPIO_PIN, "NAND ready") < 0);
 	gpio_direction_input(H2_NAND_RB_GPIO_PIN);
 
+	gpiod_add_lookup_table(&isp1301_gpiod_table);
+
 	omap_cfg_reg(L3_1610_FLASH_CS2B_OE);
 	omap_cfg_reg(M8_1610_FLASH_CS2B_WE);
 
-- 
GitLab


From 040ab72ee10ea88e1883ad143b3e2b77596abc31 Mon Sep 17 00:00:00 2001
From: Yongqiang Liu <liuyongqiang13@huawei.com>
Date: Thu, 1 Apr 2021 13:15:33 +0000
Subject: [PATCH 0899/3804] ARM: OMAP2+: Fix build warning when mmc_omap is not
 built

GCC reports the following warning with W=1:

arch/arm/mach-omap2/board-n8x0.c:325:19: warning:
variable 'index' set but not used [-Wunused-but-set-variable]
325 |  int bit, *openp, index;
    |                   ^~~~~

Fix this by moving CONFIG_MMC_OMAP to cover the rest codes
in the n8x0_mmc_callback().

Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-n8x0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 418a61ecb8275..5e86145db0e2a 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -322,6 +322,7 @@ static int n8x0_mmc_get_cover_state(struct device *dev, int slot)
 
 static void n8x0_mmc_callback(void *data, u8 card_mask)
 {
+#ifdef CONFIG_MMC_OMAP
 	int bit, *openp, index;
 
 	if (board_is_n800()) {
@@ -339,7 +340,6 @@ static void n8x0_mmc_callback(void *data, u8 card_mask)
 	else
 		*openp = 0;
 
-#ifdef CONFIG_MMC_OMAP
 	omap_mmc_notify_cover_event(mmc_device, index, *openp);
 #else
 	pr_warn("MMC: notify cover event not available\n");
-- 
GitLab


From 9f079c1bdc9087842dc5ac9d81b1d7f2578e81ce Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 18 May 2021 10:25:10 +0900
Subject: [PATCH 0900/3804] ALSA: dice: disable double_pcm_frames mode for
 M-Audio Profire 610, 2626 and Avid M-Box 3 Pro

ALSA dice driver detects jumbo payload at high sampling transfer frequency
for below models:

 * Avid M-Box 3 Pro
 * M-Audio Profire 610
 * M-Audio Profire 2626

Although many DICE-based devices have a quirk at high sampling transfer
frequency to multiplex double number of PCM frames into data block than
the number in IEC 61883-1/6, the above devices are just compliant to
IEC 61883-1/6.

This commit disables the mode of double_pcm_frames for the models.

Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210518012510.37126-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/dice/dice-pcm.c    |  4 ++--
 sound/firewire/dice/dice-stream.c |  2 +-
 sound/firewire/dice/dice.c        | 24 ++++++++++++++++++++++++
 sound/firewire/dice/dice.h        |  3 ++-
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/sound/firewire/dice/dice-pcm.c b/sound/firewire/dice/dice-pcm.c
index af8a90ee40f39..a69ca1111b033 100644
--- a/sound/firewire/dice/dice-pcm.c
+++ b/sound/firewire/dice/dice-pcm.c
@@ -218,7 +218,7 @@ static int pcm_open(struct snd_pcm_substream *substream)
 
 		if (frames_per_period > 0) {
 			// For double_pcm_frame quirk.
-			if (rate > 96000) {
+			if (rate > 96000 && !dice->disable_double_pcm_frames) {
 				frames_per_period *= 2;
 				frames_per_buffer *= 2;
 			}
@@ -273,7 +273,7 @@ static int pcm_hw_params(struct snd_pcm_substream *substream,
 
 		mutex_lock(&dice->mutex);
 		// For double_pcm_frame quirk.
-		if (rate > 96000) {
+		if (rate > 96000 && !dice->disable_double_pcm_frames) {
 			events_per_period /= 2;
 			events_per_buffer /= 2;
 		}
diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c
index 1a14c083e8cea..c4dfe76500c29 100644
--- a/sound/firewire/dice/dice-stream.c
+++ b/sound/firewire/dice/dice-stream.c
@@ -181,7 +181,7 @@ static int keep_resources(struct snd_dice *dice, struct amdtp_stream *stream,
 	// as 'Dual Wire'.
 	// For this quirk, blocking mode is required and PCM buffer size should
 	// be aligned to SYT_INTERVAL.
-	double_pcm_frames = rate > 96000;
+	double_pcm_frames = (rate > 96000 && !dice->disable_double_pcm_frames);
 	if (double_pcm_frames) {
 		rate /= 2;
 		pcm_chs *= 2;
diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
index 107a81691f0e8..239d164b0eea8 100644
--- a/sound/firewire/dice/dice.c
+++ b/sound/firewire/dice/dice.c
@@ -21,6 +21,7 @@ MODULE_LICENSE("GPL v2");
 #define OUI_SSL			0x0050c2	// Actually ID reserved by IEEE.
 #define OUI_PRESONUS		0x000a92
 #define OUI_HARMAN		0x000fd7
+#define OUI_AVID		0x00a07e
 
 #define DICE_CATEGORY_ID	0x04
 #define WEISS_CATEGORY_ID	0x00
@@ -222,6 +223,14 @@ static int dice_probe(struct fw_unit *unit,
 				(snd_dice_detect_formats_t)entry->driver_data;
 	}
 
+	// Below models are compliant to IEC 61883-1/6 and have no quirk at high sampling transfer
+	// frequency.
+	// * Avid M-Box 3 Pro
+	// * M-Audio Profire 610
+	// * M-Audio Profire 2626
+	if (entry->vendor_id == OUI_MAUDIO || entry->vendor_id == OUI_AVID)
+		dice->disable_double_pcm_frames = true;
+
 	spin_lock_init(&dice->lock);
 	mutex_init(&dice->mutex);
 	init_completion(&dice->clock_accepted);
@@ -278,7 +287,22 @@ static void dice_bus_reset(struct fw_unit *unit)
 
 #define DICE_INTERFACE	0x000001
 
+#define DICE_DEV_ENTRY_TYPICAL(vendor, model, data) \
+	{ \
+		.match_flags	= IEEE1394_MATCH_VENDOR_ID | \
+				  IEEE1394_MATCH_MODEL_ID | \
+				  IEEE1394_MATCH_SPECIFIER_ID | \
+				  IEEE1394_MATCH_VERSION, \
+		.vendor_id	= (vendor), \
+		.model_id	= (model), \
+		.specifier_id	= (vendor), \
+		.version	= DICE_INTERFACE, \
+		.driver_data = (kernel_ulong_t)(data), \
+	}
+
 static const struct ieee1394_device_id dice_id_table[] = {
+	// Avid M-Box 3 Pro. To match in probe function.
+	DICE_DEV_ENTRY_TYPICAL(OUI_AVID, 0x000004, snd_dice_detect_extension_formats),
 	/* M-Audio Profire 2626 has a different value in version field. */
 	{
 		.match_flags	= IEEE1394_MATCH_VENDOR_ID |
diff --git a/sound/firewire/dice/dice.h b/sound/firewire/dice/dice.h
index adc6f7c844609..3c967d1b3605d 100644
--- a/sound/firewire/dice/dice.h
+++ b/sound/firewire/dice/dice.h
@@ -109,7 +109,8 @@ struct snd_dice {
 	struct fw_iso_resources rx_resources[MAX_STREAMS];
 	struct amdtp_stream tx_stream[MAX_STREAMS];
 	struct amdtp_stream rx_stream[MAX_STREAMS];
-	bool global_enabled;
+	bool global_enabled:1;
+	bool disable_double_pcm_frames:1;
 	struct completion clock_accepted;
 	unsigned int substreams_counter;
 
-- 
GitLab


From 4c6fe8c547e3c9e8c15dabdd23c569ee0df3adb1 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 18 May 2021 10:26:12 +0900
Subject: [PATCH 0901/3804] ALSA: dice: fix stream format for TC Electronic
 Konnekt Live at high sampling transfer frequency

At high sampling transfer frequency, TC Electronic Konnekt Live
transfers/receives 6 audio data frames in multi bit linear audio data
channel of data block in CIP payload. Current hard-coded stream format
is wrong.

Cc: <stable@vger.kernel.org>
Fixes: f1f0f330b1d0 ("ALSA: dice: add parameters of stream formats for models produced by TC Electronic")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210518012612.37268-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/dice/dice-tcelectronic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/firewire/dice/dice-tcelectronic.c b/sound/firewire/dice/dice-tcelectronic.c
index a8875d24ba2aa..43a3bcb15b3d1 100644
--- a/sound/firewire/dice/dice-tcelectronic.c
+++ b/sound/firewire/dice/dice-tcelectronic.c
@@ -38,8 +38,8 @@ static const struct dice_tc_spec konnekt_24d = {
 };
 
 static const struct dice_tc_spec konnekt_live = {
-	.tx_pcm_chs = {{16, 16, 16}, {0, 0, 0} },
-	.rx_pcm_chs = {{16, 16, 16}, {0, 0, 0} },
+	.tx_pcm_chs = {{16, 16, 6}, {0, 0, 0} },
+	.rx_pcm_chs = {{16, 16, 6}, {0, 0, 0} },
 	.has_midi = true,
 };
 
-- 
GitLab


From 4d7b324e231366ea772ab10df46be31273ca39af Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 18 May 2021 09:47:23 +0300
Subject: [PATCH 0902/3804] bus: ti-sysc: Fix am335x resume hang for usb otg
 module

On am335x, suspend and resume only works once, and the system hangs if
suspend is attempted again. However, turns out suspend and resume works
fine multiple times if the USB OTG driver for musb controller is loaded.

The issue is caused my the interconnect target module losing context
during suspend, and it needs a restore on resume to be reconfigure again
as debugged earlier by Dave Gerlach <d-gerlach@ti.com>.

There are also other modules that need a restore on resume, like gpmc as
noted by Dave. So let's add a common way to restore an interconnect
target module based on a quirk flag. For now, let's enable the quirk for
am335x otg only to fix the suspend and resume issue.

As gpmc is not causing hangs based on tests with BeagleBone, let's patch
gpmc separately. For gpmc, we also need a hardware reset done before
restore according to Dave.

To reinit the modules, we decouple system suspend from PM runtime. We
replace calls to pm_runtime_force_suspend() and pm_runtime_force_resume()
with direct calls to internal functions and rely on the driver internal
state. There no point trying to handle complex system suspend and resume
quirks via PM runtime.

This is issue should have already been noticed with commit 1819ef2e2d12
("bus: ti-sysc: Use swsup quirks also for am335x musb") when quirk
handling was added for am335x otg for swsup. But the issue went unnoticed
as having musb driver loaded hides the issue, and suspend and resume works
once without the driver loaded.

Fixes: 1819ef2e2d12 ("bus: ti-sysc: Use swsup quirks also for am335x musb")
Suggested-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c                 | 53 +++++++++++++++++++++++++--
 include/linux/platform_data/ti-sysc.h |  1 +
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 50a9f34b9e6c6..4ff319863be2d 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -1334,6 +1334,34 @@ err_allow_idle:
 	return error;
 }
 
+static int sysc_reinit_module(struct sysc *ddata, bool leave_enabled)
+{
+	struct device *dev = ddata->dev;
+	int error;
+
+	/* Disable target module if it is enabled */
+	if (ddata->enabled) {
+		error = sysc_runtime_suspend(dev);
+		if (error)
+			dev_warn(dev, "reinit suspend failed: %i\n", error);
+	}
+
+	/* Enable target module */
+	error = sysc_runtime_resume(dev);
+	if (error)
+		dev_warn(dev, "reinit resume failed: %i\n", error);
+
+	if (leave_enabled)
+		return error;
+
+	/* Disable target module if no leave_enabled was set */
+	error = sysc_runtime_suspend(dev);
+	if (error)
+		dev_warn(dev, "reinit suspend failed: %i\n", error);
+
+	return error;
+}
+
 static int __maybe_unused sysc_noirq_suspend(struct device *dev)
 {
 	struct sysc *ddata;
@@ -1344,12 +1372,18 @@ static int __maybe_unused sysc_noirq_suspend(struct device *dev)
 	    (SYSC_QUIRK_LEGACY_IDLE | SYSC_QUIRK_NO_IDLE))
 		return 0;
 
-	return pm_runtime_force_suspend(dev);
+	if (!ddata->enabled)
+		return 0;
+
+	ddata->needs_resume = 1;
+
+	return sysc_runtime_suspend(dev);
 }
 
 static int __maybe_unused sysc_noirq_resume(struct device *dev)
 {
 	struct sysc *ddata;
+	int error = 0;
 
 	ddata = dev_get_drvdata(dev);
 
@@ -1357,7 +1391,19 @@ static int __maybe_unused sysc_noirq_resume(struct device *dev)
 	    (SYSC_QUIRK_LEGACY_IDLE | SYSC_QUIRK_NO_IDLE))
 		return 0;
 
-	return pm_runtime_force_resume(dev);
+	if (ddata->cfg.quirks & SYSC_QUIRK_REINIT_ON_RESUME) {
+		error = sysc_reinit_module(ddata, ddata->needs_resume);
+		if (error)
+			dev_warn(dev, "noirq_resume failed: %i\n", error);
+	} else if (ddata->needs_resume) {
+		error = sysc_runtime_resume(dev);
+		if (error)
+			dev_warn(dev, "noirq_resume failed: %i\n", error);
+	}
+
+	ddata->needs_resume = 0;
+
+	return error;
 }
 
 static const struct dev_pm_ops sysc_pm_ops = {
@@ -1468,7 +1514,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050,
 		   0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
 	SYSC_QUIRK("usb_otg_hs", 0, 0, 0x10, -ENODEV, 0x4ea2080d, 0xffffffff,
-		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
+		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY |
+		   SYSC_QUIRK_REINIT_ON_RESUME),
 	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0,
 		   SYSC_MODULE_QUIRK_WDT),
 	/* PRUSS on am3, am4 and am5 */
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index fafc1beea504a..9837fb011f2fb 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -50,6 +50,7 @@ struct sysc_regbits {
 	s8 emufree_shift;
 };
 
+#define SYSC_QUIRK_REINIT_ON_RESUME	BIT(27)
 #define SYSC_QUIRK_GPMC_DEBUG		BIT(26)
 #define SYSC_MODULE_QUIRK_ENA_RESETDONE	BIT(25)
 #define SYSC_MODULE_QUIRK_PRUSS		BIT(24)
-- 
GitLab


From d6177a6556f853785867e2ec6d5b7f4906f0d809 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Thu, 22 Apr 2021 11:42:19 +0200
Subject: [PATCH 0903/3804] iommu/amd: Clear DMA ops when switching domain

Since commit 08a27c1c3ecf ("iommu: Add support to change default domain
of an iommu group") a user can switch a device between IOMMU and direct
DMA through sysfs. This doesn't work for AMD IOMMU at the moment because
dev->dma_ops is not cleared when switching from a DMA to an identity
IOMMU domain. The DMA layer thus attempts to use the dma-iommu ops on an
identity domain, causing an oops:

  # echo 0000:00:05.0 > /sys/sys/bus/pci/drivers/e1000e/unbind
  # echo identity > /sys/bus/pci/devices/0000:00:05.0/iommu_group/type
  # echo 0000:00:05.0 > /sys/sys/bus/pci/drivers/e1000e/bind
   ...
  BUG: kernel NULL pointer dereference, address: 0000000000000028
   ...
   Call Trace:
    iommu_dma_alloc
    e1000e_setup_tx_resources
    e1000e_open

Since iommu_change_dev_def_domain() calls probe_finalize() again, clear
the dma_ops there like Vt-d does.

Fixes: 08a27c1c3ecf ("iommu: Add support to change default domain of an iommu group")
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Link: https://lore.kernel.org/r/20210422094216.2282097-1-jean-philippe@linaro.org
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/iommu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 80e8e1916dd17..67da96d5b3c22 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1714,6 +1714,8 @@ static void amd_iommu_probe_finalize(struct device *dev)
 	domain = iommu_get_domain_for_dev(dev);
 	if (domain->type == IOMMU_DOMAIN_DMA)
 		iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0);
+	else
+		set_dma_ops(dev, NULL);
 }
 
 static void amd_iommu_release_device(struct device *dev)
-- 
GitLab


From a017c567915fd7a017006f8c210e2c6b30ab6fad Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Sat, 1 May 2021 23:59:56 -0700
Subject: [PATCH 0904/3804] iommu/amd: Fix wrong parentheses on page-specific
 invalidations

The logic to determine the mask of page-specific invalidations was
tested in userspace. As the code was copied into the kernel, the
parentheses were mistakenly set in the wrong place, resulting in the
wrong mask.

Fix it.

Cc: Joerg Roedel <joro@8bytes.org>
Cc: Will Deacon <will@kernel.org>
Cc: Jiajun Cao <caojiajun@vmware.com>
Cc: iommu@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org
Fixes: 268aa4548277 ("iommu/amd: Page-specific invalidations for more than one page")
Signed-off-by: Nadav Amit <namit@vmware.com>
Link: https://lore.kernel.org/r/20210502070001.1559127-2-namit@vmware.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 67da96d5b3c22..3ac42bbdefc63 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -884,7 +884,7 @@ static inline u64 build_inv_address(u64 address, size_t size)
 		 * The msb-bit must be clear on the address. Just set all the
 		 * lower bits.
 		 */
-		address |= 1ull << (msb_diff - 1);
+		address |= (1ull << msb_diff) - 1;
 	}
 
 	/* Clear bits 11:0 */
-- 
GitLab


From 382d91fc0f4f1b13f8a0dcbf7145f4f175b71a18 Mon Sep 17 00:00:00 2001
From: Bixuan Cui <cuibixuan@huawei.com>
Date: Sat, 8 May 2021 11:14:51 +0800
Subject: [PATCH 0905/3804] iommu/virtio: Add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Bixuan Cui <cuibixuan@huawei.com>
Fixes: fa4afd78ea12 ("iommu/virtio: Build virtio-iommu as module")
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Link: https://lore.kernel.org/r/20210508031451.53493-1-cuibixuan@huawei.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/virtio-iommu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 7c02481a81b4e..c6e5ee4d9cef8 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -1136,6 +1136,7 @@ static struct virtio_device_id id_table[] = {
 	{ VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID },
 	{ 0 },
 };
+MODULE_DEVICE_TABLE(virtio, id_table);
 
 static struct virtio_driver virtio_iommu_drv = {
 	.driver.name		= KBUILD_MODNAME,
-- 
GitLab


From 56c1f0876293888f686e31278d183d4af2cac3c3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 18 May 2021 11:26:31 +0200
Subject: [PATCH 0906/3804] media: sti: fix obj-$(config) targets

The right thing to do is to add a new object to the building
system when a certain config option is selected, and *not*
override them.

So, fix obj-$(config) logic at sti makefiles, using "+=",
instead of ":=".

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sti/bdisp/Makefile | 2 +-
 drivers/media/platform/sti/delta/Makefile | 2 +-
 drivers/media/platform/sti/hva/Makefile   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/sti/bdisp/Makefile b/drivers/media/platform/sti/bdisp/Makefile
index caf7ccd193eaa..39ade0a347236 100644
--- a/drivers/media/platform/sti/bdisp/Makefile
+++ b/drivers/media/platform/sti/bdisp/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_VIDEO_STI_BDISP) := bdisp.o
+obj-$(CONFIG_VIDEO_STI_BDISP) += bdisp.o
 
 bdisp-objs := bdisp-v4l2.o bdisp-hw.o bdisp-debug.o
diff --git a/drivers/media/platform/sti/delta/Makefile b/drivers/media/platform/sti/delta/Makefile
index 92b37e216f004..32412fa4c6328 100644
--- a/drivers/media/platform/sti/delta/Makefile
+++ b/drivers/media/platform/sti/delta/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_VIDEO_STI_DELTA_DRIVER) := st-delta.o
+obj-$(CONFIG_VIDEO_STI_DELTA_DRIVER) += st-delta.o
 st-delta-y := delta-v4l2.o delta-mem.o delta-ipc.o delta-debug.o
 
 # MJPEG support
diff --git a/drivers/media/platform/sti/hva/Makefile b/drivers/media/platform/sti/hva/Makefile
index 74b41ec52f976..b5a5478bdd016 100644
--- a/drivers/media/platform/sti/hva/Makefile
+++ b/drivers/media/platform/sti/hva/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_VIDEO_STI_HVA) := st-hva.o
+obj-$(CONFIG_VIDEO_STI_HVA) += st-hva.o
 st-hva-y := hva-v4l2.o hva-hw.o hva-mem.o hva-h264.o
 st-hva-$(CONFIG_VIDEO_STI_HVA_DEBUGFS) += hva-debugfs.o
-- 
GitLab


From 76d0fc5e9bc650766a90cc3ffd2a29248df0f020 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 18 May 2021 10:08:37 +0100
Subject: [PATCH 0907/3804] arm64: Fix stale link in the
 arch_counter_enforce_ordering() comment

With infradead.org archives gone, update the link to lore.kernel.org as
these links are deemed stable.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/barrier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 2175ec0004edb..451e11e5fd23b 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -74,7 +74,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx,
  * This insanity brought to you by speculative system register reads,
  * out-of-order memory accesses, sequence locks and Thomas Gleixner.
  *
- * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
+ * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/
  */
 #define arch_counter_enforce_ordering(val) do {				\
 	u64 tmp, _val = (val);						\
-- 
GitLab


From c6a8625fa4c6b0a97860d053271660ccedc3d1b3 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 14 May 2021 15:01:16 +0800
Subject: [PATCH 0908/3804] hv_utils: Fix passing zero to 'PTR_ERR' warning

Sparse warn this:

drivers/hv/hv_util.c:753 hv_timesync_init() warn:
 passing zero to 'PTR_ERR'

Use PTR_ERR_OR_ZERO instead of PTR_ERR to fix this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20210514070116.16800-1-yuehaibing@huawei.com
[ wei: change %ld to %d ]
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/hv_util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index e4aefeb330daf..136576cba26f5 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -750,8 +750,8 @@ static int hv_timesync_init(struct hv_util_service *srv)
 	 */
 	hv_ptp_clock = ptp_clock_register(&ptp_hyperv_info, NULL);
 	if (IS_ERR_OR_NULL(hv_ptp_clock)) {
-		pr_err("cannot register PTP clock: %ld\n",
-		       PTR_ERR(hv_ptp_clock));
+		pr_err("cannot register PTP clock: %d\n",
+		       PTR_ERR_OR_ZERO(hv_ptp_clock));
 		hv_ptp_clock = NULL;
 	}
 
-- 
GitLab


From 3317c26a4b413b41364f2c4b83c778c6aba1576d Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Fri, 30 Apr 2021 13:22:46 +0800
Subject: [PATCH 0909/3804] perf/x86: Avoid touching LBR_TOS MSR for Arch LBR

The Architecture LBR does not have MSR_LBR_TOS (0x000001c9).
In a guest that should support Architecture LBR, check_msr()
will be a non-related check for the architecture MSR 0x0
(IA32_P5_MC_ADDR) that is also not supported by KVM.

The failure will cause x86_pmu.lbr_nr = 0, thereby preventing
the initialization of the guest Arch LBR. Fix it by avoiding
this extraneous check in intel_pmu_init() for Arch LBR.

Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR")
Signed-off-by: Like Xu <like.xu@linux.intel.com>
[peterz: simpler still]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210430052247.3079672-1-like.xu@linux.intel.com
---
 arch/x86/events/intel/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2521d03de5e02..e28892270c580 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6253,7 +6253,7 @@ __init int intel_pmu_init(void)
 	 * Check all LBT MSR here.
 	 * Disable LBR access if any LBR MSRs can not be accessed.
 	 */
-	if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
+	if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL))
 		x86_pmu.lbr_nr = 0;
 	for (i = 0; i < x86_pmu.lbr_nr; i++) {
 		if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
-- 
GitLab


From 488e13a489e9707a7e81e1991fdd1f20c0f04689 Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu@linux.intel.com>
Date: Fri, 30 Apr 2021 13:22:47 +0800
Subject: [PATCH 0910/3804] perf/x86/lbr: Remove cpuc->lbr_xsave allocation
 from atomic context

If the kernel is compiled with the CONFIG_LOCKDEP option, the conditional
might_sleep_if() deep in kmem_cache_alloc() will generate the following
trace, and potentially cause a deadlock when another LBR event is added:

  [] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:196
  [] Call Trace:
  []  kmem_cache_alloc+0x36/0x250
  []  intel_pmu_lbr_add+0x152/0x170
  []  x86_pmu_add+0x83/0xd0

Make it symmetric with the release_lbr_buffers() call and mirror the
existing DS buffers.

Fixes: c085fb8774 ("perf/x86/intel/lbr: Support XSAVES for arch LBR read")
Signed-off-by: Like Xu <like.xu@linux.intel.com>
[peterz: simplified]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lkml.kernel.org/r/20210430052247.3079672-2-like.xu@linux.intel.com
---
 arch/x86/events/core.c       |  6 ++++--
 arch/x86/events/intel/lbr.c  | 26 ++++++++++++++++++++------
 arch/x86/events/perf_event.h |  6 ++++++
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e509325c2c3d..8f71dd72ef95f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -396,10 +396,12 @@ int x86_reserve_hardware(void)
 	if (!atomic_inc_not_zero(&pmc_refcount)) {
 		mutex_lock(&pmc_reserve_mutex);
 		if (atomic_read(&pmc_refcount) == 0) {
-			if (!reserve_pmc_hardware())
+			if (!reserve_pmc_hardware()) {
 				err = -EBUSY;
-			else
+			} else {
 				reserve_ds_buffers();
+				reserve_lbr_buffers();
+			}
 		}
 		if (!err)
 			atomic_inc(&pmc_refcount);
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 76dbab6ac9fbe..4409d2cccfda5 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel)
 
 void intel_pmu_lbr_add(struct perf_event *event)
 {
-	struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache;
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	if (!x86_pmu.lbr_nr)
@@ -696,11 +695,6 @@ void intel_pmu_lbr_add(struct perf_event *event)
 	perf_sched_cb_inc(event->ctx->pmu);
 	if (!cpuc->lbr_users++ && !event->total_time_running)
 		intel_pmu_lbr_reset();
-
-	if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
-	    kmem_cache && !cpuc->lbr_xsave &&
-	    (cpuc->lbr_users != cpuc->lbr_pebs_users))
-		cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL);
 }
 
 void release_lbr_buffers(void)
@@ -722,6 +716,26 @@ void release_lbr_buffers(void)
 	}
 }
 
+void reserve_lbr_buffers(void)
+{
+	struct kmem_cache *kmem_cache;
+	struct cpu_hw_events *cpuc;
+	int cpu;
+
+	if (!static_cpu_has(X86_FEATURE_ARCH_LBR))
+		return;
+
+	for_each_possible_cpu(cpu) {
+		cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
+		kmem_cache = x86_get_pmu(cpu)->task_ctx_cache;
+		if (!kmem_cache || cpuc->lbr_xsave)
+			continue;
+
+		cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL,
+							cpu_to_node(cpu));
+	}
+}
+
 void intel_pmu_lbr_del(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 27fa85e7d4fda..ad87cb36f7c81 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1244,6 +1244,8 @@ void reserve_ds_buffers(void);
 
 void release_lbr_buffers(void);
 
+void reserve_lbr_buffers(void);
+
 extern struct event_constraint bts_constraint;
 extern struct event_constraint vlbr_constraint;
 
@@ -1393,6 +1395,10 @@ static inline void release_lbr_buffers(void)
 {
 }
 
+static inline void reserve_lbr_buffers(void)
+{
+}
+
 static inline int intel_pmu_init(void)
 {
 	return 0;
-- 
GitLab


From 89e70d5c583c55088faa2201d397ee30a15704aa Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Wed, 12 May 2021 20:09:37 +0800
Subject: [PATCH 0911/3804] locking/lockdep: Correct calling tracepoints

The commit eb1f00237aca ("lockdep,trace: Expose tracepoints") reverses
tracepoints for lock_contended() and lock_acquired(), thus the ftrace
log shows the wrong locking sequence that "acquired" event is prior to
"contended" event:

  <idle>-0       [001] d.s3 20803.501685: lock_acquire: 0000000008b91ab4 &sg_policy->update_lock
  <idle>-0       [001] d.s3 20803.501686: lock_acquired: 0000000008b91ab4 &sg_policy->update_lock
  <idle>-0       [001] d.s3 20803.501689: lock_contended: 0000000008b91ab4 &sg_policy->update_lock
  <idle>-0       [001] d.s3 20803.501690: lock_release: 0000000008b91ab4 &sg_policy->update_lock

This patch fixes calling tracepoints for lock_contended() and
lock_acquired().

Fixes: eb1f00237aca ("lockdep,trace: Expose tracepoints")
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210512120937.90211-1-leo.yan@linaro.org
---
 kernel/locking/lockdep.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 48d736aa03b24..7641bd4072390 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -5736,7 +5736,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
-	trace_lock_acquired(lock, ip);
+	trace_lock_contended(lock, ip);
 
 	if (unlikely(!lock_stat || !lockdep_enabled()))
 		return;
@@ -5754,7 +5754,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
 	unsigned long flags;
 
-	trace_lock_contended(lock, ip);
+	trace_lock_acquired(lock, ip);
 
 	if (unlikely(!lock_stat || !lockdep_enabled()))
 		return;
-- 
GitLab


From 3a010c493271f04578b133de977e0e5dd2848cea Mon Sep 17 00:00:00 2001
From: Zqiang <qiang.zhang@windriver.com>
Date: Mon, 17 May 2021 11:40:05 +0800
Subject: [PATCH 0912/3804] locking/mutex: clear MUTEX_FLAGS if wait_list is
 empty due to signal

When a interruptible mutex locker is interrupted by a signal
without acquiring this lock and removed from the wait queue.
if the mutex isn't contended enough to have a waiter
put into the wait queue again, the setting of the WAITER
bit will force mutex locker to go into the slowpath to
acquire the lock every time, so if the wait queue is empty,
the WAITER bit need to be clear.

Fixes: 040a0a371005 ("mutex: Add support for wound/wait style locks")
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Zqiang <qiang.zhang@windriver.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210517034005.30828-1-qiang.zhang@windriver.com
---
 kernel/locking/mutex-debug.c |  4 ++--
 kernel/locking/mutex-debug.h |  2 +-
 kernel/locking/mutex.c       | 18 +++++++++++++-----
 kernel/locking/mutex.h       |  4 +---
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c
index a7276aaf2abc0..db9301591e3fc 100644
--- a/kernel/locking/mutex-debug.c
+++ b/kernel/locking/mutex-debug.c
@@ -57,7 +57,7 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 	task->blocked_on = waiter;
 }
 
-void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 			 struct task_struct *task)
 {
 	DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
@@ -65,7 +65,7 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 	DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter);
 	task->blocked_on = NULL;
 
-	list_del_init(&waiter->list);
+	INIT_LIST_HEAD(&waiter->list);
 	waiter->task = NULL;
 }
 
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index 1edd3f45a4ecb..53e631e1d76da 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -22,7 +22,7 @@ extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
 extern void debug_mutex_add_waiter(struct mutex *lock,
 				   struct mutex_waiter *waiter,
 				   struct task_struct *task);
-extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+extern void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 				struct task_struct *task);
 extern void debug_mutex_unlock(struct mutex *lock);
 extern void debug_mutex_init(struct mutex *lock, const char *name,
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index cb6b112ce1550..013e1b08a1bfb 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -194,7 +194,7 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait
  * Add @waiter to a given location in the lock wait_list and set the
  * FLAG_WAITERS flag if it's the first waiter.
  */
-static void __sched
+static void
 __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 		   struct list_head *list)
 {
@@ -205,6 +205,16 @@ __mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
 		__mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
 }
 
+static void
+__mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter)
+{
+	list_del(&waiter->list);
+	if (likely(list_empty(&lock->wait_list)))
+		__mutex_clear_flag(lock, MUTEX_FLAGS);
+
+	debug_mutex_remove_waiter(lock, waiter, current);
+}
+
 /*
  * Give up ownership to a specific task, when @task = NULL, this is equivalent
  * to a regular unlock. Sets PICKUP on a handoff, clears HANDOFF, preserves
@@ -1061,9 +1071,7 @@ acquired:
 			__ww_mutex_check_waiters(lock, ww_ctx);
 	}
 
-	mutex_remove_waiter(lock, &waiter, current);
-	if (likely(list_empty(&lock->wait_list)))
-		__mutex_clear_flag(lock, MUTEX_FLAGS);
+	__mutex_remove_waiter(lock, &waiter);
 
 	debug_mutex_free_waiter(&waiter);
 
@@ -1080,7 +1088,7 @@ skip_wait:
 
 err:
 	__set_current_state(TASK_RUNNING);
-	mutex_remove_waiter(lock, &waiter, current);
+	__mutex_remove_waiter(lock, &waiter);
 err_early_kill:
 	spin_unlock(&lock->wait_lock);
 	debug_mutex_free_waiter(&waiter);
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 1c2287d3fa719..f0c710b1d1927 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -10,12 +10,10 @@
  * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
  */
 
-#define mutex_remove_waiter(lock, waiter, task) \
-		__list_del((waiter)->list.prev, (waiter)->list.next)
-
 #define debug_mutex_wake_waiter(lock, waiter)		do { } while (0)
 #define debug_mutex_free_waiter(waiter)			do { } while (0)
 #define debug_mutex_add_waiter(lock, waiter, ti)	do { } while (0)
+#define debug_mutex_remove_waiter(lock, waiter, ti)     do { } while (0)
 #define debug_mutex_unlock(lock)			do { } while (0)
 #define debug_mutex_init(lock, name, key)		do { } while (0)
 
-- 
GitLab


From 90a0ff4ec9c65cae3085d23301933172cea3f38a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 12 May 2021 13:32:37 +0200
Subject: [PATCH 0913/3804] sched,stats: Further simplify sched_info

There's no point doing delta==0 updates.

Suggested-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/stats.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 33ffd41935bab..111072ee96638 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -160,10 +160,11 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
 {
 	unsigned long long delta = 0;
 
-	if (t->sched_info.last_queued) {
-		delta = rq_clock(rq) - t->sched_info.last_queued;
-		t->sched_info.last_queued = 0;
-	}
+	if (!t->sched_info.last_queued)
+		return;
+
+	delta = rq_clock(rq) - t->sched_info.last_queued;
+	t->sched_info.last_queued = 0;
 	t->sched_info.run_delay += delta;
 
 	rq_sched_info_dequeue(rq, delta);
@@ -176,12 +177,14 @@ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t)
  */
 static void sched_info_arrive(struct rq *rq, struct task_struct *t)
 {
-	unsigned long long now = rq_clock(rq), delta = 0;
+	unsigned long long now, delta = 0;
 
-	if (t->sched_info.last_queued) {
-		delta = now - t->sched_info.last_queued;
-		t->sched_info.last_queued = 0;
-	}
+	if (!t->sched_info.last_queued)
+		return;
+
+	now = rq_clock(rq);
+	delta = now - t->sched_info.last_queued;
+	t->sched_info.last_queued = 0;
 	t->sched_info.run_delay += delta;
 	t->sched_info.last_arrival = now;
 	t->sched_info.pcount++;
-- 
GitLab


From fcb501704554eebfd27e3220b0540997fd2b24a8 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 12 May 2021 12:40:35 +0100
Subject: [PATCH 0914/3804] delayacct: Document task_delayacct sysctl

Update sysctl/kernel.rst.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210512114035.GH3672@suse.de
---
 Documentation/admin-guide/sysctl/kernel.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 1d56a6b73a4e9..ebd2f993d608e 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1087,6 +1087,13 @@ Model available). If your platform happens to meet the
 requirements for EAS but you do not want to use it, change
 this value to 0.
 
+task_delayacct
+===============
+
+Enables/disables task delay accounting (see
+:doc:`accounting/delay-accounting.rst`). Enabling this feature incurs
+a small amount of overhead in the scheduler but is useful for debugging
+and performance tuning. It is required by some tools such as iotop.
 
 sched_schedstats
 ================
-- 
GitLab


From 00b89fe0197f0c55a045775c11553c0cdb7082fe Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Mon, 10 May 2021 16:10:23 +0100
Subject: [PATCH 0915/3804] sched: Make the idle task quack like a per-CPU
 kthread

For all intents and purposes, the idle task is a per-CPU kthread. It isn't
created via the same route as other pcpu kthreads however, and as a result
it is missing a few bells and whistles: it fails kthread_is_per_cpu() and
it doesn't have PF_NO_SETAFFINITY set.

Fix the former by giving the idle task a kthread struct along with the
KTHREAD_IS_PER_CPU flag. This requires some extra iffery as init_idle()
call be called more than once on the same idle task.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210510151024.2448573-2-valentin.schneider@arm.com
---
 include/linux/kthread.h |  2 ++
 kernel/kthread.c        | 30 ++++++++++++++++++------------
 kernel/sched/core.c     | 21 +++++++++++++++------
 3 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 2484ed97e72f5..d9133d6db3084 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -33,6 +33,8 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
 					  unsigned int cpu,
 					  const char *namefmt);
 
+void set_kthread_struct(struct task_struct *p);
+
 void kthread_set_per_cpu(struct task_struct *k, int cpu);
 bool kthread_is_per_cpu(struct task_struct *k);
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index fe3f2a40d61e8..3d326833092be 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -68,16 +68,6 @@ enum KTHREAD_BITS {
 	KTHREAD_SHOULD_PARK,
 };
 
-static inline void set_kthread_struct(void *kthread)
-{
-	/*
-	 * We abuse ->set_child_tid to avoid the new member and because it
-	 * can't be wrongly copied by copy_process(). We also rely on fact
-	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
-	 */
-	current->set_child_tid = (__force void __user *)kthread;
-}
-
 static inline struct kthread *to_kthread(struct task_struct *k)
 {
 	WARN_ON(!(k->flags & PF_KTHREAD));
@@ -103,6 +93,22 @@ static inline struct kthread *__to_kthread(struct task_struct *p)
 	return kthread;
 }
 
+void set_kthread_struct(struct task_struct *p)
+{
+	struct kthread *kthread;
+
+	if (__to_kthread(p))
+		return;
+
+	kthread = kzalloc(sizeof(*kthread), GFP_KERNEL);
+	/*
+	 * We abuse ->set_child_tid to avoid the new member and because it
+	 * can't be wrongly copied by copy_process(). We also rely on fact
+	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
+	 */
+	p->set_child_tid = (__force void __user *)kthread;
+}
+
 void free_kthread_struct(struct task_struct *k)
 {
 	struct kthread *kthread;
@@ -272,8 +278,8 @@ static int kthread(void *_create)
 	struct kthread *self;
 	int ret;
 
-	self = kzalloc(sizeof(*self), GFP_KERNEL);
-	set_kthread_struct(self);
+	set_kthread_struct(current);
+	self = to_kthread(current);
 
 	/* If user was SIGKILLed, I release the structure. */
 	done = xchg(&create->done, NULL);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 24fd795e4b8c2..6a5124c4d54f2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8234,12 +8234,25 @@ void __init init_idle(struct task_struct *idle, int cpu)
 
 	__sched_fork(0, idle);
 
+	/*
+	 * The idle task doesn't need the kthread struct to function, but it
+	 * is dressed up as a per-CPU kthread and thus needs to play the part
+	 * if we want to avoid special-casing it in code that deals with per-CPU
+	 * kthreads.
+	 */
+	set_kthread_struct(idle);
+
 	raw_spin_lock_irqsave(&idle->pi_lock, flags);
 	raw_spin_rq_lock(rq);
 
 	idle->state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
-	idle->flags |= PF_IDLE;
+	/*
+	 * PF_KTHREAD should already be set at this point; regardless, make it
+	 * look like a proper per-CPU kthread.
+	 */
+	idle->flags |= PF_IDLE | PF_KTHREAD | PF_NO_SETAFFINITY;
+	kthread_set_per_cpu(idle, cpu);
 
 	scs_task_reset(idle);
 	kasan_unpoison_task_stack(idle);
@@ -8456,12 +8469,8 @@ static void balance_push(struct rq *rq)
 	/*
 	 * Both the cpu-hotplug and stop task are in this case and are
 	 * required to complete the hotplug process.
-	 *
-	 * XXX: the idle task does not match kthread_is_per_cpu() due to
-	 * histerical raisins.
 	 */
-	if (rq->idle == push_task ||
-	    kthread_is_per_cpu(push_task) ||
+	if (kthread_is_per_cpu(push_task) ||
 	    is_migration_disabled(push_task)) {
 
 		/*
-- 
GitLab


From f471fac77b41a2573c7b677ef790bf18a0e64195 Mon Sep 17 00:00:00 2001
From: Alexander Antonov <alexander.antonov@linux.intel.com>
Date: Mon, 26 Apr 2021 16:16:12 +0300
Subject: [PATCH 0916/3804] perf/x86/intel/uncore: Generalize I/O stacks to
 PMON mapping procedure

Currently I/O stacks to IIO PMON mapping is available on Skylake servers
only and need to make code more general to easily enable further platforms.
So, introduce get_topology() callback in struct intel_uncore_type which
allows to move common code to separate function and make mapping procedure
more general.

Signed-off-by: Alexander Antonov <alexander.antonov@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lkml.kernel.org/r/20210426131614.16205-2-alexander.antonov@linux.intel.com
---
 arch/x86/events/intel/uncore.h       |  1 +
 arch/x86/events/intel/uncore_snbep.c | 26 ++++++++++++++++++++------
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 2917910029970..187d7287039c8 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -92,6 +92,7 @@ struct intel_uncore_type {
 	/*
 	 * Optional callbacks for managing mapping of Uncore units to PMONs
 	 */
+	int (*get_topology)(struct intel_uncore_type *type);
 	int (*set_mapping)(struct intel_uncore_type *type);
 	void (*cleanup_mapping)(struct intel_uncore_type *type);
 };
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 63f097289a84c..02e36a35cedc9 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -3680,12 +3680,19 @@ static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die)
 }
 
 static umode_t
-skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+pmu_iio_mapping_visible(struct kobject *kobj, struct attribute *attr,
+			 int die, int zero_bus_pmu)
 {
 	struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj));
 
-	/* Root bus 0x00 is valid only for die 0 AND pmu_idx = 0. */
-	return (!skx_iio_stack(pmu, die) && pmu->pmu_idx) ? 0 : attr->mode;
+	return (!skx_iio_stack(pmu, die) && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode;
+}
+
+static umode_t
+skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+{
+	/* Root bus 0x00 is valid only for pmu_idx = 0. */
+	return pmu_iio_mapping_visible(kobj, attr, die, 0);
 }
 
 static ssize_t skx_iio_mapping_show(struct device *dev,
@@ -3770,7 +3777,8 @@ static const struct attribute_group *skx_iio_attr_update[] = {
 	NULL,
 };
 
-static int skx_iio_set_mapping(struct intel_uncore_type *type)
+static int
+pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag)
 {
 	char buf[64];
 	int ret;
@@ -3778,7 +3786,7 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
 	struct attribute **attrs = NULL;
 	struct dev_ext_attribute *eas = NULL;
 
-	ret = skx_iio_get_topology(type);
+	ret = type->get_topology(type);
 	if (ret < 0)
 		goto clear_attr_update;
 
@@ -3805,7 +3813,7 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type)
 		eas[die].var = (void *)die;
 		attrs[die] = &eas[die].attr.attr;
 	}
-	skx_iio_mapping_group.attrs = attrs;
+	ag->attrs = attrs;
 
 	return 0;
 err:
@@ -3819,6 +3827,11 @@ clear_attr_update:
 	return ret;
 }
 
+static int skx_iio_set_mapping(struct intel_uncore_type *type)
+{
+	return pmu_iio_set_mapping(type, &skx_iio_mapping_group);
+}
+
 static void skx_iio_cleanup_mapping(struct intel_uncore_type *type)
 {
 	struct attribute **attr = skx_iio_mapping_group.attrs;
@@ -3849,6 +3862,7 @@ static struct intel_uncore_type skx_uncore_iio = {
 	.ops			= &skx_uncore_iio_ops,
 	.format_group		= &skx_uncore_iio_format_group,
 	.attr_update		= skx_iio_attr_update,
+	.get_topology		= skx_iio_get_topology,
 	.set_mapping		= skx_iio_set_mapping,
 	.cleanup_mapping	= skx_iio_cleanup_mapping,
 };
-- 
GitLab


From c1777be3646b48f6638d8339ad270a27659adaa4 Mon Sep 17 00:00:00 2001
From: Alexander Antonov <alexander.antonov@linux.intel.com>
Date: Mon, 26 Apr 2021 16:16:13 +0300
Subject: [PATCH 0917/3804] perf/x86/intel/uncore: Enable I/O stacks to IIO
 PMON mapping on SNR

I/O stacks to PMON mapping on Skylake server relies on topology information
from CPU_BUS_NO MSR but this approach is not applicable for SNR and ICX.
Mapping on these platforms can be gotten by reading SAD_CONTROL_CFG CSR
from Mesh2IIO device with 0x09a2 DID.
SAD_CONTROL_CFG CSR contains stack IDs in its own notation which are
statically mapped on IDs in PMON notation.

The map for Snowridge:

Stack Name         | CBDMA/DMI | PCIe Gen 3 | DLB | NIS | QAT
SAD_CONTROL_CFG ID |     0     |      1     |  2  |  3  |  4
PMON ID            |     1     |      4     |  3  |  2  |  0

This patch enables I/O stacks to IIO PMON mapping on Snowridge.
Mapping is exposed through attributes /sys/devices/uncore_iio_<pmu_idx>/dieX,
where dieX is file which holds "Segment:Root Bus" for PCIe root port which
can be monitored by that IIO PMON block. Example for Snowridge:

==> /sys/devices/uncore_iio_0/die0 <==
0000:f3
==> /sys/devices/uncore_iio_1/die0 <==
0000:00
==> /sys/devices/uncore_iio_2/die0 <==
0000:eb
==> /sys/devices/uncore_iio_3/die0 <==
0000:e3
==> /sys/devices/uncore_iio_4/die0 <==
0000:14

Mapping for Icelake server will be enabled in the follow-up patch.

Signed-off-by: Alexander Antonov <alexander.antonov@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lkml.kernel.org/r/20210426131614.16205-3-alexander.antonov@linux.intel.com
---
 arch/x86/events/intel/uncore_snbep.c | 96 ++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 02e36a35cedc9..b50c94649554a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -348,6 +348,13 @@
 #define SKX_M2M_PCI_PMON_CTR0		0x200
 #define SKX_M2M_PCI_PMON_BOX_CTL	0x258
 
+/* Memory Map registers device ID */
+#define SNR_ICX_MESH2IIO_MMAP_DID		0x9a2
+#define SNR_ICX_SAD_CONTROL_CFG		0x3f4
+
+/* Getting I/O stack id in SAD_COTROL_CFG notation */
+#define SAD_CONTROL_STACK_ID(data)		(((data) >> 4) & 0x7)
+
 /* SNR Ubox */
 #define SNR_U_MSR_PMON_CTR0			0x1f98
 #define SNR_U_MSR_PMON_CTL0			0x1f91
@@ -4405,6 +4412,91 @@ static const struct attribute_group snr_uncore_iio_format_group = {
 	.attrs = snr_uncore_iio_formats_attr,
 };
 
+static umode_t
+snr_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+{
+	/* Root bus 0x00 is valid only for pmu_idx = 1. */
+	return pmu_iio_mapping_visible(kobj, attr, die, 1);
+}
+
+static struct attribute_group snr_iio_mapping_group = {
+	.is_visible	= snr_iio_mapping_visible,
+};
+
+static const struct attribute_group *snr_iio_attr_update[] = {
+	&snr_iio_mapping_group,
+	NULL,
+};
+
+static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_mapping)
+{
+	u32 sad_cfg;
+	int die, stack_id, ret = -EPERM;
+	struct pci_dev *dev = NULL;
+
+	type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology),
+				 GFP_KERNEL);
+	if (!type->topology)
+		return -ENOMEM;
+
+	while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, SNR_ICX_MESH2IIO_MMAP_DID, dev))) {
+		ret = pci_read_config_dword(dev, SNR_ICX_SAD_CONTROL_CFG, &sad_cfg);
+		if (ret) {
+			ret = pcibios_err_to_errno(ret);
+			break;
+		}
+
+		die = uncore_pcibus_to_dieid(dev->bus);
+		stack_id = SAD_CONTROL_STACK_ID(sad_cfg);
+		if (die < 0 || stack_id >= type->num_boxes) {
+			ret = -EPERM;
+			break;
+		}
+
+		/* Convert stack id from SAD_CONTROL to PMON notation. */
+		stack_id = sad_pmon_mapping[stack_id];
+
+		((u8 *)&(type->topology[die].configuration))[stack_id] = dev->bus->number;
+		type->topology[die].segment = pci_domain_nr(dev->bus);
+	}
+
+	if (ret) {
+		kfree(type->topology);
+		type->topology = NULL;
+	}
+
+	return ret;
+}
+
+/*
+ * SNR has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON
+ */
+enum {
+	SNR_QAT_PMON_ID,
+	SNR_CBDMA_DMI_PMON_ID,
+	SNR_NIS_PMON_ID,
+	SNR_DLB_PMON_ID,
+	SNR_PCIE_GEN3_PMON_ID
+};
+
+static u8 snr_sad_pmon_mapping[] = {
+	SNR_CBDMA_DMI_PMON_ID,
+	SNR_PCIE_GEN3_PMON_ID,
+	SNR_DLB_PMON_ID,
+	SNR_NIS_PMON_ID,
+	SNR_QAT_PMON_ID
+};
+
+static int snr_iio_get_topology(struct intel_uncore_type *type)
+{
+	return sad_cfg_iio_topology(type, snr_sad_pmon_mapping);
+}
+
+static int snr_iio_set_mapping(struct intel_uncore_type *type)
+{
+	return pmu_iio_set_mapping(type, &snr_iio_mapping_group);
+}
+
 static struct intel_uncore_type snr_uncore_iio = {
 	.name			= "iio",
 	.num_counters		= 4,
@@ -4418,6 +4510,10 @@ static struct intel_uncore_type snr_uncore_iio = {
 	.msr_offset		= SNR_IIO_MSR_OFFSET,
 	.ops			= &ivbep_uncore_msr_ops,
 	.format_group		= &snr_uncore_iio_format_group,
+	.attr_update		= snr_iio_attr_update,
+	.get_topology		= snr_iio_get_topology,
+	.set_mapping		= snr_iio_set_mapping,
+	.cleanup_mapping	= skx_iio_cleanup_mapping,
 };
 
 static struct intel_uncore_type snr_uncore_irp = {
-- 
GitLab


From 10337e95e04c9bcd15d9bf5b26f194c92c13da56 Mon Sep 17 00:00:00 2001
From: Alexander Antonov <alexander.antonov@linux.intel.com>
Date: Mon, 26 Apr 2021 16:16:14 +0300
Subject: [PATCH 0918/3804] perf/x86/intel/uncore: Enable I/O stacks to IIO
 PMON mapping on ICX

This patch enables I/O stacks to IIO PMON mapping on Icelake server.

Mapping of IDs in SAD_CONTROL_CFG notation to IDs in PMON notation for
Icelake server:

Stack Name         | CBDMA/DMI | PCIe_1 | PCIe_2 | PCIe_3 | PCIe_4 | PCIe_5
SAD_CONTROL_CFG ID |     0     |    1   |    2   |    3   |    4   |    5
PMON ID            |     5     |    0   |    1   |    2   |    3   |    4

I/O stacks to IIO PMON mapping is exposed through attributes
/sys/devices/uncore_iio_<pmu_idx>/dieX, where dieX is file which holds
"Segment:Root Bus" for PCIe root port which can be monitored by that
IIO PMON block. Example for 2-S Icelake server:

==> /sys/devices/uncore_iio_0/die0 <==
0000:16
==> /sys/devices/uncore_iio_0/die1 <==
0000:97
==> /sys/devices/uncore_iio_1/die0 <==
0000:30
==> /sys/devices/uncore_iio_1/die1 <==
0000:b0
==> /sys/devices/uncore_iio_3/die0 <==
0000:4a
==> /sys/devices/uncore_iio_3/die1 <==
0000:c9
==> /sys/devices/uncore_iio_4/die0 <==
0000:64
==> /sys/devices/uncore_iio_4/die1 <==
0000:e2
==> /sys/devices/uncore_iio_5/die0 <==
0000:00
==> /sys/devices/uncore_iio_5/die1 <==
0000:80

Signed-off-by: Alexander Antonov <alexander.antonov@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lkml.kernel.org/r/20210426131614.16205-4-alexander.antonov@linux.intel.com
---
 arch/x86/events/intel/uncore_snbep.c | 51 ++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index b50c94649554a..76227627e3b47 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -5041,6 +5041,53 @@ static struct event_constraint icx_uncore_iio_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+static umode_t
+icx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die)
+{
+	/* Root bus 0x00 is valid only for pmu_idx = 5. */
+	return pmu_iio_mapping_visible(kobj, attr, die, 5);
+}
+
+static struct attribute_group icx_iio_mapping_group = {
+	.is_visible	= icx_iio_mapping_visible,
+};
+
+static const struct attribute_group *icx_iio_attr_update[] = {
+	&icx_iio_mapping_group,
+	NULL,
+};
+
+/*
+ * ICX has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON
+ */
+enum {
+	ICX_PCIE1_PMON_ID,
+	ICX_PCIE2_PMON_ID,
+	ICX_PCIE3_PMON_ID,
+	ICX_PCIE4_PMON_ID,
+	ICX_PCIE5_PMON_ID,
+	ICX_CBDMA_DMI_PMON_ID
+};
+
+static u8 icx_sad_pmon_mapping[] = {
+	ICX_CBDMA_DMI_PMON_ID,
+	ICX_PCIE1_PMON_ID,
+	ICX_PCIE2_PMON_ID,
+	ICX_PCIE3_PMON_ID,
+	ICX_PCIE4_PMON_ID,
+	ICX_PCIE5_PMON_ID,
+};
+
+static int icx_iio_get_topology(struct intel_uncore_type *type)
+{
+	return sad_cfg_iio_topology(type, icx_sad_pmon_mapping);
+}
+
+static int icx_iio_set_mapping(struct intel_uncore_type *type)
+{
+	return pmu_iio_set_mapping(type, &icx_iio_mapping_group);
+}
+
 static struct intel_uncore_type icx_uncore_iio = {
 	.name			= "iio",
 	.num_counters		= 4,
@@ -5055,6 +5102,10 @@ static struct intel_uncore_type icx_uncore_iio = {
 	.constraints		= icx_uncore_iio_constraints,
 	.ops			= &skx_uncore_iio_ops,
 	.format_group		= &snr_uncore_iio_format_group,
+	.attr_update		= icx_iio_attr_update,
+	.get_topology		= icx_iio_get_topology,
+	.set_mapping		= icx_iio_set_mapping,
+	.cleanup_mapping	= skx_iio_cleanup_mapping,
 };
 
 static struct intel_uncore_type icx_uncore_irp = {
-- 
GitLab


From b1efd0ff4bd16e8bb8607ba566b03f2024a830bb Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 10 May 2021 23:29:25 +0200
Subject: [PATCH 0919/3804] x86/cpu: Init AP exception handling from
 cpu_init_secondary()

SEV-ES guests require properly setup task register with which the TSS
descriptor in the GDT can be located so that the IST-type #VC exception
handler which they need to function properly, can be executed.

This setup needs to happen before attempting to load microcode in
ucode_cpu_init() on secondary CPUs which can cause such #VC exceptions.

Simplify the machinery by running that exception setup from a new function
cpu_init_secondary() and explicitly call cpu_init_exception_handling() for
the boot CPU before cpu_init(). The latter prepares for fixing and
simplifying the exception/IST setup on the boot CPU.

There should be no functional changes resulting from this patch.

[ tglx: Reworked it so cpu_init_exception_handling() stays seperate ]

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Lai Jiangshan <laijs@linux.alibaba.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/87k0o6gtvu.ffs@nanos.tec.linutronix.de
---
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/common.c     | 28 +++++++++++++++-------------
 arch/x86/kernel/smpboot.c        |  3 +--
 arch/x86/kernel/traps.c          |  4 +---
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 556b2b17c3e2f..364d0e42e2800 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -663,6 +663,7 @@ extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
+extern void cpu_init_secondary(void);
 extern void cpu_init_exception_handling(void);
 extern void cr4_init(void);
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a1b756c49a93a..212e8bc070da6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1938,13 +1938,12 @@ void cpu_init_exception_handling(void)
 
 /*
  * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
+ * initialized (naturally) in the bootstrap process, such as the GDT.  We
+ * reload it nevertheless, this function acts as a 'CPU state barrier',
+ * nothing should get across.
  */
 void cpu_init(void)
 {
-	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
 	struct task_struct *cur = current;
 	int cpu = raw_smp_processor_id();
 
@@ -1957,8 +1956,6 @@ void cpu_init(void)
 	    early_cpu_to_node(cpu) != NUMA_NO_NODE)
 		set_numa_node(early_cpu_to_node(cpu));
 #endif
-	setup_getcpu(cpu);
-
 	pr_debug("Initializing CPU#%d\n", cpu);
 
 	if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) ||
@@ -1970,7 +1967,6 @@ void cpu_init(void)
 	 * and set up the GDT descriptor:
 	 */
 	switch_to_new_gdt(cpu);
-	load_current_idt();
 
 	if (IS_ENABLED(CONFIG_X86_64)) {
 		loadsegment(fs, 0);
@@ -1990,12 +1986,6 @@ void cpu_init(void)
 	initialize_tlbstate_and_flush();
 	enter_lazy_tlb(&init_mm, cur);
 
-	/* Initialize the TSS. */
-	tss_setup_ist(tss);
-	tss_setup_io_bitmap(tss);
-	set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
-
-	load_TR_desc();
 	/*
 	 * sp0 points to the entry trampoline stack regardless of what task
 	 * is running.
@@ -2017,6 +2007,18 @@ void cpu_init(void)
 	load_fixmap_gdt(cpu);
 }
 
+#ifdef CONFIG_SMP
+void cpu_init_secondary(void)
+{
+	/*
+	 * Relies on the BP having set-up the IDT tables, which are loaded
+	 * on this CPU in cpu_init_exception_handling().
+	 */
+	cpu_init_exception_handling();
+	cpu_init();
+}
+#endif
+
 /*
  * The microcode loader calls this upon late microcode load to recheck features,
  * only when microcode has been updated. Caller holds microcode_mutex and CPU
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7770245cc7fa7..2ed45b036629d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -232,8 +232,7 @@ static void notrace start_secondary(void *unused)
 	load_cr3(swapper_pg_dir);
 	__flush_tlb_all();
 #endif
-	cpu_init_exception_handling();
-	cpu_init();
+	cpu_init_secondary();
 	rcu_cpu_starting(raw_smp_processor_id());
 	x86_cpuinit.early_percpu_clock_init();
 	preempt_disable();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 853ea7a808061..41f7dc4928030 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1162,9 +1162,7 @@ void __init trap_init(void)
 
 	idt_setup_traps();
 
-	/*
-	 * Should be a barrier for any external CPU state:
-	 */
+	cpu_init_exception_handling();
 	cpu_init();
 
 	idt_setup_ist_traps();
-- 
GitLab


From 1dcc917a0eed934c522d93bb05a9a7bb3c54f96c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 7 May 2021 13:02:12 +0200
Subject: [PATCH 0920/3804] x86/idt: Rework IDT setup for boot CPU

A basic IDT setup for the boot CPU has to be done before invoking
cpu_init() because that might trigger #GP when accessing certain MSRs. This
setup cannot install the IST variants on 64-bit because the TSS setup which
is required for ISTs to work happens in cpu_init(). That leaves a
theoretical window where a NMI would invoke the ASM entry point which
relies on IST being enabled on the kernel stack which is undefined
behaviour.

This setup logic has never worked correctly, but on the other hand a NMI
hitting the boot CPU before it has fully set up the IDT would be fatal
anyway. So the small window between the wrong NMI gate and the IST based
NMI gate is not really adding a substantial amount of risk.

But the setup logic is nevertheless more convoluted than necessary. The
recent separation of the TSS setup into a separate function to ensure that
setup so it can setup TSS first, then initialize IDT with the IST variants
before invoking cpu_init() and get rid of the post cpu_init() IST setup.

Move the invocation of cpu_init_exception_handling() ahead of
idt_setup_traps() and merge the IST setup into the default setup table.

Reported-by: Lai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Lai Jiangshan <laijs@linux.alibaba.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210507114000.569244755@linutronix.de
---
 arch/x86/include/asm/desc.h |  2 --
 arch/x86/kernel/idt.c       | 40 +++++++++++--------------------------
 arch/x86/kernel/traps.c     |  7 +++----
 3 files changed, 15 insertions(+), 34 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 476082a83d1c1..96021e9bd2022 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -421,10 +421,8 @@ extern bool idt_is_f00f_address(unsigned long address);
 
 #ifdef CONFIG_X86_64
 extern void idt_setup_early_pf(void);
-extern void idt_setup_ist_traps(void);
 #else
 static inline void idt_setup_early_pf(void) { }
-static inline void idt_setup_ist_traps(void) { }
 #endif
 
 extern void idt_invalidate(void *addr);
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d552f177eca0e..6cce6047fa12a 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -35,12 +35,16 @@
 #define SYSG(_vector, _addr)				\
 	G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
 
+#ifdef CONFIG_X86_64
 /*
  * Interrupt gate with interrupt stack. The _ist index is the index in
  * the tss.ist[] array, but for the descriptor it needs to start at 1.
  */
 #define ISTG(_vector, _addr, _ist)			\
 	G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS)
+#else
+#define ISTG(_vector, _addr, _ist)	INTG(_vector, _addr)
+#endif
 
 /* Task gate */
 #define TSKG(_vector, _gdt)				\
@@ -74,7 +78,7 @@ static const __initconst struct idt_data early_idts[] = {
  */
 static const __initconst struct idt_data def_idts[] = {
 	INTG(X86_TRAP_DE,		asm_exc_divide_error),
-	INTG(X86_TRAP_NMI,		asm_exc_nmi),
+	ISTG(X86_TRAP_NMI,		asm_exc_nmi, IST_INDEX_NMI),
 	INTG(X86_TRAP_BR,		asm_exc_bounds),
 	INTG(X86_TRAP_UD,		asm_exc_invalid_op),
 	INTG(X86_TRAP_NM,		asm_exc_device_not_available),
@@ -91,12 +95,16 @@ static const __initconst struct idt_data def_idts[] = {
 #ifdef CONFIG_X86_32
 	TSKG(X86_TRAP_DF,		GDT_ENTRY_DOUBLEFAULT_TSS),
 #else
-	INTG(X86_TRAP_DF,		asm_exc_double_fault),
+	ISTG(X86_TRAP_DF,		asm_exc_double_fault, IST_INDEX_DF),
 #endif
-	INTG(X86_TRAP_DB,		asm_exc_debug),
+	ISTG(X86_TRAP_DB,		asm_exc_debug, IST_INDEX_DB),
 
 #ifdef CONFIG_X86_MCE
-	INTG(X86_TRAP_MC,		asm_exc_machine_check),
+	ISTG(X86_TRAP_MC,		asm_exc_machine_check, IST_INDEX_MCE),
+#endif
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+	ISTG(X86_TRAP_VC,		asm_exc_vmm_communication, IST_INDEX_VC),
 #endif
 
 	SYSG(X86_TRAP_OF,		asm_exc_overflow),
@@ -221,22 +229,6 @@ static const __initconst struct idt_data early_pf_idts[] = {
 	INTG(X86_TRAP_PF,		asm_exc_page_fault),
 };
 
-/*
- * The exceptions which use Interrupt stacks. They are setup after
- * cpu_init() when the TSS has been initialized.
- */
-static const __initconst struct idt_data ist_idts[] = {
-	ISTG(X86_TRAP_DB,	asm_exc_debug,			IST_INDEX_DB),
-	ISTG(X86_TRAP_NMI,	asm_exc_nmi,			IST_INDEX_NMI),
-	ISTG(X86_TRAP_DF,	asm_exc_double_fault,		IST_INDEX_DF),
-#ifdef CONFIG_X86_MCE
-	ISTG(X86_TRAP_MC,	asm_exc_machine_check,		IST_INDEX_MCE),
-#endif
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-	ISTG(X86_TRAP_VC,	asm_exc_vmm_communication,	IST_INDEX_VC),
-#endif
-};
-
 /**
  * idt_setup_early_pf - Initialize the idt table with early pagefault handler
  *
@@ -254,14 +246,6 @@ void __init idt_setup_early_pf(void)
 	idt_setup_from_table(idt_table, early_pf_idts,
 			     ARRAY_SIZE(early_pf_idts), true);
 }
-
-/**
- * idt_setup_ist_traps - Initialize the idt table with traps using IST
- */
-void __init idt_setup_ist_traps(void)
-{
-	idt_setup_from_table(idt_table, ist_idts, ARRAY_SIZE(ist_idts), true);
-}
 #endif
 
 static void __init idt_map_in_cea(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 41f7dc4928030..ed540e09a399a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1160,10 +1160,9 @@ void __init trap_init(void)
 	/* Init GHCB memory pages when running as an SEV-ES guest */
 	sev_es_init_vc_handling();
 
-	idt_setup_traps();
-
+	/* Initialize TSS before setting up traps so ISTs work */
 	cpu_init_exception_handling();
+	/* Setup traps as cpu_init() might #GP */
+	idt_setup_traps();
 	cpu_init();
-
-	idt_setup_ist_traps();
 }
-- 
GitLab


From cdf112d4c65f83065793b73b49363123517fdb71 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 23:31:14 +0200
Subject: [PATCH 0921/3804] ASoC: fsl: fix SND_SOC_IMX_RPMSG dependency

Kconfig produces a warning with SND_SOC_FSL_RPMSG=y and SND_IMX_SOC=m:

WARNING: unmet direct dependencies detected for SND_SOC_IMX_RPMSG
  Depends on [m]: SOUND [=y] && !UML && SND [=y] && SND_SOC [=y] && SND_IMX_SOC [=m] && RPMSG [=y]
  Selected by [y]:
  - SND_SOC_FSL_RPMSG [=y] && SOUND [=y] && !UML && SND [=y] && SND_SOC [=y] && COMMON_CLK [=y] && RPMSG [=y] && SND_IMX_SOC [=m]!=n

Add a dependency to prevent this configuration.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Shengjiu Wang <shengjiu.wang@gmail.com>
Link: https://lore.kernel.org/r/20210514213118.630427-1-arnd@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 0917d65d69213..556c284f49dd3 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -119,6 +119,7 @@ config SND_SOC_FSL_RPMSG
 	tristate "NXP Audio Base On RPMSG support"
 	depends on COMMON_CLK
 	depends on RPMSG
+	depends on SND_IMX_SOC || SND_IMX_SOC = n
 	select SND_SOC_IMX_RPMSG if SND_IMX_SOC != n
 	help
 	  Say Y if you want to add rpmsg audio support for the Freescale CPUs.
-- 
GitLab


From 0b1e552673724832b08d49037cdeeac634a3b319 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 17 May 2021 13:27:21 +0800
Subject: [PATCH 0922/3804] regulator: da9121: Return REGULATOR_MODE_INVALID
 for invalid mode

-EINVAL is not a valid return value for .of_map_mode, return
REGULATOR_MODE_INVALID instead.

Fixes: 65ac97042d4e ("regulator: da9121: add mode support")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Adam Ward <Adam.Ward.opensource@diasemi.com>
Link: https://lore.kernel.org/r/20210517052721.1063375-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/da9121-regulator.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/da9121-regulator.c b/drivers/regulator/da9121-regulator.c
index 08cbf688e14d3..e669250902580 100644
--- a/drivers/regulator/da9121-regulator.c
+++ b/drivers/regulator/da9121-regulator.c
@@ -280,7 +280,7 @@ static unsigned int da9121_map_mode(unsigned int mode)
 	case DA9121_BUCK_MODE_FORCE_PFM:
 		return REGULATOR_MODE_STANDBY;
 	default:
-		return -EINVAL;
+		return REGULATOR_MODE_INVALID;
 	}
 }
 
@@ -317,7 +317,7 @@ static unsigned int da9121_buck_get_mode(struct regulator_dev *rdev)
 {
 	struct da9121 *chip = rdev_get_drvdata(rdev);
 	int id = rdev_get_id(rdev);
-	unsigned int val;
+	unsigned int val, mode;
 	int ret = 0;
 
 	ret = regmap_read(chip->regmap, da9121_mode_field[id].reg, &val);
@@ -326,7 +326,11 @@ static unsigned int da9121_buck_get_mode(struct regulator_dev *rdev)
 		return -EINVAL;
 	}
 
-	return da9121_map_mode(val & da9121_mode_field[id].msk);
+	mode = da9121_map_mode(val & da9121_mode_field[id].msk);
+	if (mode == REGULATOR_MODE_INVALID)
+		return -EINVAL;
+
+	return mode;
 }
 
 static const struct regulator_ops da9121_buck_ops = {
-- 
GitLab


From 34991ee96fd8477479dd15adadceb6b28b30d9b0 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 17 May 2021 18:53:24 +0800
Subject: [PATCH 0923/3804] regulator: fan53880: Fix missing n_voltages setting

Fixes: e6dea51e2d41 ("regulator: fan53880: Add initial support")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Christoph Fritz <chf.fritz@googlemail.com>
Link: https://lore.kernel.org/r/20210517105325.1227393-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53880.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/regulator/fan53880.c b/drivers/regulator/fan53880.c
index e83eb4fb1876a..1684faf82ed25 100644
--- a/drivers/regulator/fan53880.c
+++ b/drivers/regulator/fan53880.c
@@ -51,6 +51,7 @@ static const struct regulator_ops fan53880_ops = {
 		      REGULATOR_LINEAR_RANGE(800000, 0xf, 0x73, 25000),	\
 		},							\
 		.n_linear_ranges = 2,					\
+		.n_voltages =	   0x74,				\
 		.vsel_reg =	   FAN53880_LDO ## _num ## VOUT,	\
 		.vsel_mask =	   0x7f,				\
 		.enable_reg =	   FAN53880_ENABLE,			\
@@ -76,6 +77,7 @@ static const struct regulator_desc fan53880_regulators[] = {
 		      REGULATOR_LINEAR_RANGE(600000, 0x1f, 0xf7, 12500),
 		},
 		.n_linear_ranges = 2,
+		.n_voltages =	   0xf8,
 		.vsel_reg =	   FAN53880_BUCKVOUT,
 		.vsel_mask =	   0x7f,
 		.enable_reg =	   FAN53880_ENABLE,
@@ -95,6 +97,7 @@ static const struct regulator_desc fan53880_regulators[] = {
 		      REGULATOR_LINEAR_RANGE(3000000, 0x4, 0x70, 25000),
 		},
 		.n_linear_ranges = 2,
+		.n_voltages =	   0x71,
 		.vsel_reg =	   FAN53880_BOOSTVOUT,
 		.vsel_mask =	   0x7f,
 		.enable_reg =	   FAN53880_ENABLE_BOOST,
-- 
GitLab


From 0b07154f066ab2c087c342b372be5771145bdc60 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 17 May 2021 17:39:46 +0200
Subject: [PATCH 0924/3804] dt-bindings: spi: spi-mux: rename flash node

The recent conversion of the common MTD properties to YAML now mandates
a particular node name for SPI flash devices.

Reported-by: Rob Herring <robh@kernel.org>
Signed-off-by: Michael Walle <michael@walle.cc>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210517153946.9502-1-michael@walle.cc
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-mux.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/spi/spi-mux.yaml b/Documentation/devicetree/bindings/spi/spi-mux.yaml
index 6c21a132b51ff..c2c007260582a 100644
--- a/Documentation/devicetree/bindings/spi/spi-mux.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-mux.yaml
@@ -72,7 +72,7 @@ examples:
 
             mux-controls = <&mux>;
 
-            spi-flash@0 {
+            flash@0 {
                 compatible = "jedec,spi-nor";
                 reg = <0>;
                 #address-cells = <1>;
-- 
GitLab


From 3799fa23afa4cac347739d5290df44a474a82a82 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 18 May 2021 19:48:43 +0800
Subject: [PATCH 0925/3804] regulator: bd71815: Fix missing include files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include linux/of.h and linux/gpio/consumer.h to fix below errors:
error: implicit declaration of function ‘of_match_ptr’
error: implicit declaration of function ‘devm_gpiod_get_from_of_node’

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/20210518114843.1495152-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd71815-regulator.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/regulator/bd71815-regulator.c b/drivers/regulator/bd71815-regulator.c
index a4e8d5e36b409..a079efa800925 100644
--- a/drivers/regulator/bd71815-regulator.c
+++ b/drivers/regulator/bd71815-regulator.c
@@ -13,6 +13,8 @@
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/gpio/consumer.h>
 #include <linux/regulator/driver.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-- 
GitLab


From 7075359c8e0da1b01e34201b09b9ab2fd23b8a7d Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 17 May 2021 18:53:25 +0800
Subject: [PATCH 0926/3804] regulator: fan53880: Convert to use .probe_new

Use the new .probe_new for fan53880.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Christoph Fritz <chf.fritz@googlemail.com>
Link: https://lore.kernel.org/r/20210517105325.1227393-2-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53880.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/fan53880.c b/drivers/regulator/fan53880.c
index e83eb4fb1876a..d49f10ac222d5 100644
--- a/drivers/regulator/fan53880.c
+++ b/drivers/regulator/fan53880.c
@@ -111,8 +111,7 @@ static const struct regmap_config fan53880_regmap = {
 	.max_register = FAN53880_ENABLE_BOOST,
 };
 
-static int fan53880_i2c_probe(struct i2c_client *i2c,
-			     const struct i2c_device_id *id)
+static int fan53880_i2c_probe(struct i2c_client *i2c)
 {
 	struct regulator_config config = { };
 	struct regulator_dev *rdev;
@@ -174,7 +173,7 @@ static struct i2c_driver fan53880_regulator_driver = {
 		.name = "fan53880",
 		.of_match_table	= of_match_ptr(fan53880_dt_ids),
 	},
-	.probe = fan53880_i2c_probe,
+	.probe_new = fan53880_i2c_probe,
 	.id_table = fan53880_i2c_id,
 };
 module_i2c_driver(fan53880_regulator_driver);
-- 
GitLab


From 94acf80755c8d8dd066d4f5a2afbdf393f0e2afd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 May 2021 17:03:43 +0300
Subject: [PATCH 0927/3804] spi: pxa2xx: Propagate firmware node to the child
 SPI controller device

SPI core may utilize properties and resources provided by the parent device.
Propagate firmware node to the child SPI controller device for that.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210517140351.901-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 5985b39e2dd60..38eef90334684 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1718,7 +1718,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	drv_data->controller_info = platform_info;
 	drv_data->ssp = ssp;
 
-	controller->dev.of_node = pdev->dev.of_node;
+	controller->dev.of_node = dev->of_node;
+	controller->dev.fwnode = dev->fwnode;
+
 	/* the spi->mode bits understood by this driver: */
 	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
 
-- 
GitLab


From 778c12e69481d544e6fcfa45b23ae3c5379b5a02 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 May 2021 17:03:44 +0300
Subject: [PATCH 0928/3804] spi: pxa2xx: Switch to use SPI core GPIO
 (descriptor) CS handling

SPI core has been already providing the GPIO CS handling. Use it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210517140351.901-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 51 +++-------------------------------------
 drivers/spi/spi-pxa2xx.h |  3 ---
 2 files changed, 3 insertions(+), 51 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 38eef90334684..d19cea4ed9468 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1198,25 +1198,12 @@ static int pxa2xx_spi_unprepare_transfer(struct spi_controller *controller)
 static int setup_cs(struct spi_device *spi, struct chip_data *chip,
 		    struct pxa2xx_spi_chip *chip_info)
 {
-	struct driver_data *drv_data =
-		spi_controller_get_devdata(spi->controller);
 	struct gpio_desc *gpiod;
 	int err = 0;
 
 	if (chip == NULL)
 		return 0;
 
-	if (drv_data->cs_gpiods) {
-		gpiod = drv_data->cs_gpiods[spi->chip_select];
-		if (gpiod) {
-			chip->gpiod_cs = gpiod;
-			chip->gpio_cs_inverted = spi->mode & SPI_CS_HIGH;
-			gpiod_set_value(gpiod, chip->gpio_cs_inverted);
-		}
-
-		return 0;
-	}
-
 	if (chip_info == NULL)
 		return 0;
 
@@ -1430,8 +1417,7 @@ static void cleanup(struct spi_device *spi)
 	if (!chip)
 		return;
 
-	if (drv_data->ssp_type != CE4100_SSP && !drv_data->cs_gpiods &&
-	    chip->gpiod_cs)
+	if (drv_data->ssp_type != CE4100_SSP && chip->gpiod_cs)
 		gpiod_put(chip->gpiod_cs);
 
 	kfree(chip);
@@ -1682,7 +1668,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	struct driver_data *drv_data;
 	struct ssp_device *ssp;
 	const struct lpss_config *config;
-	int status, count;
+	int status;
 	u32 tmp;
 
 	platform_info = dev_get_platdata(dev);
@@ -1861,38 +1847,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		}
 	}
 	controller->num_chipselect = platform_info->num_chipselect;
-
-	count = gpiod_count(&pdev->dev, "cs");
-	if (count > 0) {
-		int i;
-
-		controller->num_chipselect = max_t(int, count,
-			controller->num_chipselect);
-
-		drv_data->cs_gpiods = devm_kcalloc(&pdev->dev,
-			controller->num_chipselect, sizeof(struct gpio_desc *),
-			GFP_KERNEL);
-		if (!drv_data->cs_gpiods) {
-			status = -ENOMEM;
-			goto out_error_clock_enabled;
-		}
-
-		for (i = 0; i < controller->num_chipselect; i++) {
-			struct gpio_desc *gpiod;
-
-			gpiod = devm_gpiod_get_index(dev, "cs", i, GPIOD_ASIS);
-			if (IS_ERR(gpiod)) {
-				/* Means use native chip select */
-				if (PTR_ERR(gpiod) == -ENOENT)
-					continue;
-
-				status = PTR_ERR(gpiod);
-				goto out_error_clock_enabled;
-			} else {
-				drv_data->cs_gpiods[i] = gpiod;
-			}
-		}
-	}
+	controller->use_gpio_descriptors = true;
 
 	if (platform_info->is_slave) {
 		drv_data->gpiod_ready = devm_gpiod_get_optional(dev,
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 5c6a5e0f249e9..672dfd4863cc2 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -53,9 +53,6 @@ struct driver_data {
 
 	void __iomem *lpss_base;
 
-	/* GPIOs for chip selects */
-	struct gpio_desc **cs_gpiods;
-
 	/* Optional slave FIFO ready signal */
 	struct gpio_desc *gpiod_ready;
 };
-- 
GitLab


From de6926f307e7ff605f3e37f11a4b3cc20c85c365 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 May 2021 17:03:45 +0300
Subject: [PATCH 0929/3804] spi: pxa2xx: Switch to use SPI core GPIO (legacy)
 CS handling

SPI core has been already providing the GPIO CS handling. Use it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210517140351.901-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 59 ++++++++++++++++++----------------------
 drivers/spi/spi-pxa2xx.h | 12 ++++----
 2 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index d19cea4ed9468..1a0bcd3bac1f8 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -441,11 +441,6 @@ static void cs_assert(struct spi_device *spi)
 		return;
 	}
 
-	if (chip->gpiod_cs) {
-		gpiod_set_value(chip->gpiod_cs, chip->gpio_cs_inverted);
-		return;
-	}
-
 	if (is_lpss_ssp(drv_data))
 		lpss_ssp_cs_control(spi, true);
 }
@@ -471,11 +466,6 @@ static void cs_deassert(struct spi_device *spi)
 		return;
 	}
 
-	if (chip->gpiod_cs) {
-		gpiod_set_value(chip->gpiod_cs, !chip->gpio_cs_inverted);
-		return;
-	}
-
 	if (is_lpss_ssp(drv_data))
 		lpss_ssp_cs_control(spi, false);
 }
@@ -1195,11 +1185,19 @@ static int pxa2xx_spi_unprepare_transfer(struct spi_controller *controller)
 	return 0;
 }
 
+static void cleanup_cs(struct spi_device *spi)
+{
+	if (!gpio_is_valid(spi->cs_gpio))
+		return;
+
+	gpio_free(spi->cs_gpio);
+	spi->cs_gpio = -ENOENT;
+}
+
 static int setup_cs(struct spi_device *spi, struct chip_data *chip,
 		    struct pxa2xx_spi_chip *chip_info)
 {
-	struct gpio_desc *gpiod;
-	int err = 0;
+	struct driver_data *drv_data = spi_controller_get_devdata(spi->controller);
 
 	if (chip == NULL)
 		return 0;
@@ -1207,13 +1205,13 @@ static int setup_cs(struct spi_device *spi, struct chip_data *chip,
 	if (chip_info == NULL)
 		return 0;
 
+	if (drv_data->ssp_type == CE4100_SSP)
+		return 0;
+
 	/* NOTE: setup() can be called multiple times, possibly with
 	 * different chip_info, release previously requested GPIO
 	 */
-	if (chip->gpiod_cs) {
-		gpiod_put(chip->gpiod_cs);
-		chip->gpiod_cs = NULL;
-	}
+	cleanup_cs(spi);
 
 	/* If (*cs_control) is provided, ignore GPIO chip select */
 	if (chip_info->cs_control) {
@@ -1222,21 +1220,25 @@ static int setup_cs(struct spi_device *spi, struct chip_data *chip,
 	}
 
 	if (gpio_is_valid(chip_info->gpio_cs)) {
-		err = gpio_request(chip_info->gpio_cs, "SPI_CS");
+		int gpio = chip_info->gpio_cs;
+		int err;
+
+		err = gpio_request(gpio, "SPI_CS");
 		if (err) {
-			dev_err(&spi->dev, "failed to request chip select GPIO%d\n",
-				chip_info->gpio_cs);
+			dev_err(&spi->dev, "failed to request chip select GPIO%d\n", gpio);
 			return err;
 		}
 
-		gpiod = gpio_to_desc(chip_info->gpio_cs);
-		chip->gpiod_cs = gpiod;
-		chip->gpio_cs_inverted = spi->mode & SPI_CS_HIGH;
+		err = gpio_direction_output(gpio, !(spi->mode & SPI_CS_HIGH));
+		if (err) {
+			gpio_free(gpio);
+			return err;
+		}
 
-		err = gpiod_direction_output(gpiod, !chip->gpio_cs_inverted);
+		spi->cs_gpio = gpio;
 	}
 
-	return err;
+	return 0;
 }
 
 static int setup(struct spi_device *spi)
@@ -1411,15 +1413,8 @@ static int setup(struct spi_device *spi)
 static void cleanup(struct spi_device *spi)
 {
 	struct chip_data *chip = spi_get_ctldata(spi);
-	struct driver_data *drv_data =
-		spi_controller_get_devdata(spi->controller);
-
-	if (!chip)
-		return;
-
-	if (drv_data->ssp_type != CE4100_SSP && chip->gpiod_cs)
-		gpiod_put(chip->gpiod_cs);
 
+	cleanup_cs(spi);
 	kfree(chip);
 }
 
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 672dfd4863cc2..a91fe6edb2750 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -62,19 +62,17 @@ struct chip_data {
 	u32 dds_rate;
 	u32 timeout;
 	u8 n_bytes;
+	u8 enable_dma;
 	u32 dma_burst_size;
-	u32 threshold;
 	u32 dma_threshold;
+	u32 threshold;
 	u16 lpss_rx_threshold;
 	u16 lpss_tx_threshold;
-	u8 enable_dma;
-	union {
-		struct gpio_desc *gpiod_cs;
-		unsigned int frm;
-	};
-	int gpio_cs_inverted;
+
 	int (*write)(struct driver_data *drv_data);
 	int (*read)(struct driver_data *drv_data);
+
+	unsigned int frm;
 	void (*cs_control)(u32 command);
 };
 
-- 
GitLab


From ccd60b2030a04b91977d9fee9e0ec6469ac4cd1b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 May 2021 17:03:46 +0300
Subject: [PATCH 0930/3804] spi: pxa2xx: Drop duplicate chip_select in struct
 chip_data

The struct chip_data had been introduced in order to keep the parameters
that may be provided on stack during device allocation. There is no need
to duplicate parameters there, which are carried on by SPI device itself.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210517140351.901-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 4 +---
 drivers/spi/spi-pxa2xx.h | 1 -
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 1a0bcd3bac1f8..fb80f6013d54b 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -432,7 +432,7 @@ static void cs_assert(struct spi_device *spi)
 		spi_controller_get_devdata(spi->controller);
 
 	if (drv_data->ssp_type == CE4100_SSP) {
-		pxa2xx_spi_write(drv_data, SSSR, chip->frm);
+		pxa2xx_spi_write(drv_data, SSSR, spi->chip_select);
 		return;
 	}
 
@@ -1303,8 +1303,6 @@ static int setup(struct spi_device *spi)
 				kfree(chip);
 				return -EINVAL;
 			}
-
-			chip->frm = spi->chip_select;
 		}
 		chip->enable_dma = drv_data->controller_info->enable_dma;
 		chip->timeout = TIMOUT_DFLT;
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index a91fe6edb2750..db9de46110ade 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -72,7 +72,6 @@ struct chip_data {
 	int (*write)(struct driver_data *drv_data);
 	int (*read)(struct driver_data *drv_data);
 
-	unsigned int frm;
 	void (*cs_control)(u32 command);
 };
 
-- 
GitLab


From eb743ec600be596553bf4e42c85b0bbc65083791 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 May 2021 17:03:47 +0300
Subject: [PATCH 0931/3804] spi: pxa2xx: Drop unneeded '!= 0' comparisons

In the few places it's redundant to compare against 0.
Drop the unneeded comparisons.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210517140351.901-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index fb80f6013d54b..f24851b3c0202 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1380,8 +1380,8 @@ static int setup(struct spi_device *spi)
 	}
 
 	chip->cr1 &= ~(SSCR1_SPO | SSCR1_SPH);
-	chip->cr1 |= (((spi->mode & SPI_CPHA) != 0) ? SSCR1_SPH : 0)
-			| (((spi->mode & SPI_CPOL) != 0) ? SSCR1_SPO : 0);
+	chip->cr1 |= ((spi->mode & SPI_CPHA) ? SSCR1_SPH : 0) |
+		     ((spi->mode & SPI_CPOL) ? SSCR1_SPO : 0);
 
 	if (spi->mode & SPI_LOOP)
 		chip->cr1 |= SSCR1_LBM;
@@ -1859,7 +1859,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	/* Register with the SPI framework */
 	platform_set_drvdata(pdev, drv_data);
 	status = spi_register_controller(controller);
-	if (status != 0) {
+	if (status) {
 		dev_err(&pdev->dev, "problem registering spi controller\n");
 		goto out_error_pm_runtime_enabled;
 	}
@@ -1918,7 +1918,7 @@ static int pxa2xx_spi_suspend(struct device *dev)
 	int status;
 
 	status = spi_controller_suspend(drv_data->controller);
-	if (status != 0)
+	if (status)
 		return status;
 
 	pxa_ssp_disable(ssp);
-- 
GitLab


From 684a3ac720c3fd4c09ae5aa3d349861bf12dcff6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 May 2021 17:03:48 +0300
Subject: [PATCH 0932/3804] spi: pxa2xx: Fix printf() specifiers

Instead of explicit casting use proper specifier in one case,
and fix specifier signness in another.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210517140351.901-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index f24851b3c0202..f8264771b3609 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -983,8 +983,8 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 
 		/* warn ... we force this to PIO mode */
 		dev_warn_ratelimited(&spi->dev,
-				     "DMA disabled for transfer length %ld greater than %d\n",
-				     (long)transfer->len, MAX_DMA_LEN);
+				     "DMA disabled for transfer length %u greater than %d\n",
+				     transfer->len, MAX_DMA_LEN);
 	}
 
 	/* Setup the transfer state based on the type of transfer */
@@ -1115,8 +1115,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 
 		if (tx_level) {
 			/* On MMP2, flipping SSE doesn't to empty TXFIFO. */
-			dev_warn(&spi->dev, "%d bytes of garbage in TXFIFO!\n",
-								tx_level);
+			dev_warn(&spi->dev, "%u bytes of garbage in Tx FIFO!\n", tx_level);
 			if (tx_level > transfer->len)
 				tx_level = transfer->len;
 			drv_data->tx += tx_level;
-- 
GitLab


From 8083d6b812cac5e38db9c707b41cd478beed4a0c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 May 2021 17:03:49 +0300
Subject: [PATCH 0933/3804] spi: pxa2xx: Fix style of and typos in the comments
 and messages

Fix style of the comments and messages along with typos in them.

While at it, update Intel Copyright year.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210517140351.901-8-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-dma.c   |  4 +-
 drivers/spi/spi-pxa2xx-pci.c   |  5 ++-
 drivers/spi/spi-pxa2xx.c       | 71 ++++++++++++++++++----------------
 drivers/spi/spi-pxa2xx.h       |  2 +-
 include/linux/pxa2xx_ssp.h     |  9 +++--
 include/linux/spi/pxa2xx_spi.h | 12 ++++--
 6 files changed, 58 insertions(+), 45 deletions(-)

diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index f022d82dcb1bf..be563f0dd03a2 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -2,7 +2,7 @@
 /*
  * PXA2xx SPI DMA engine support.
  *
- * Copyright (C) 2013, Intel Corporation
+ * Copyright (C) 2013, 2021 Intel Corporation
  * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
  */
 
@@ -26,7 +26,7 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
 	 * It is possible that one CPU is handling ROR interrupt and other
 	 * just gets DMA completion. Calling pump_transfers() twice for the
 	 * same transfer leads to problems thus we prevent concurrent calls
-	 * by using ->dma_running.
+	 * by using dma_running.
 	 */
 	if (atomic_dec_and_test(&drv_data->dma_running)) {
 		/*
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index dce9ade9a4dfb..9c9992d4f5475 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -1,8 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * CE4100's SPI device is more or less the same one as found on PXA
+ * PCI glue driver for SPI PXA2xx compatible controllers.
+ * CE4100's SPI device is more or less the same one as found on PXA.
  *
- * Copyright (C) 2016, Intel Corporation
+ * Copyright (C) 2016, 2021 Intel Corporation
  */
 #include <linux/clk-provider.h>
 #include <linux/module.h>
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index f8264771b3609..94b1585de203b 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
- * Copyright (C) 2013, Intel Corporation
+ * Copyright (C) 2013, 2021 Intel Corporation
  */
 
 #include <linux/acpi.h>
@@ -40,11 +40,11 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 #define TIMOUT_DFLT		1000
 
 /*
- * for testing SSCR1 changes that require SSP restart, basically
- * everything except the service and interrupt enables, the pxa270 developer
+ * For testing SSCR1 changes that require SSP restart, basically
+ * everything except the service and interrupt enables, the PXA270 developer
  * manual says only SSCR1_SCFR, SSCR1_SPH, SSCR1_SPO need to be in this
- * list, but the PXA255 dev man says all bits without really meaning the
- * service and interrupt enables
+ * list, but the PXA255 developer manual says all bits without really meaning
+ * the service and interrupt enables.
  */
 #define SSCR1_CHANGE_MASK (SSCR1_TTELP | SSCR1_TTE | SSCR1_SCFR \
 				| SSCR1_ECRA | SSCR1_ECRB | SSCR1_SCLKDIR \
@@ -653,12 +653,12 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
 		irq_status &= ~SSSR_TFS;
 
 	if (irq_status & SSSR_ROR) {
-		int_error_stop(drv_data, "interrupt_transfer: fifo overrun", -EIO);
+		int_error_stop(drv_data, "interrupt_transfer: FIFO overrun", -EIO);
 		return IRQ_HANDLED;
 	}
 
 	if (irq_status & SSSR_TUR) {
-		int_error_stop(drv_data, "interrupt_transfer: fifo underrun", -EIO);
+		int_error_stop(drv_data, "interrupt_transfer: FIFO underrun", -EIO);
 		return IRQ_HANDLED;
 	}
 
@@ -670,7 +670,7 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
 		}
 	}
 
-	/* Drain rx fifo, Fill tx fifo and prevent overruns */
+	/* Drain Rx FIFO, Fill Tx FIFO and prevent overruns */
 	do {
 		if (drv_data->read(drv_data)) {
 			int_transfer_complete(drv_data);
@@ -691,8 +691,8 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
 		sccr1_reg &= ~SSCR1_TIE;
 
 		/*
-		 * PXA25x_SSP has no timeout, set up rx threshould for the
-		 * remaining RX bytes.
+		 * PXA25x_SSP has no timeout, set up Rx threshold for
+		 * the remaining Rx bytes.
 		 */
 		if (pxa25x_ssp_comp(drv_data)) {
 			u32 rx_thre;
@@ -914,7 +914,7 @@ static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
 
 	/*
 	 * Calculate the divisor for the SCR (Serial Clock Rate), avoiding
-	 * that the SSP transmission rate can be greater than the device rate
+	 * that the SSP transmission rate can be greater than the device rate.
 	 */
 	if (ssp->type == PXA25x_SSP || ssp->type == CE4100_SSP)
 		return (DIV_ROUND_UP(ssp_clk, 2 * rate) - 1) & 0xff;
@@ -972,7 +972,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 	/* Check if we can DMA this transfer */
 	if (transfer->len > MAX_DMA_LEN && chip->enable_dma) {
 
-		/* reject already-mapped transfers; PIO won't always work */
+		/* Reject already-mapped transfers; PIO won't always work */
 		if (message->is_dma_mapped
 				|| transfer->rx_dma || transfer->tx_dma) {
 			dev_err(&spi->dev,
@@ -981,7 +981,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 			return -EINVAL;
 		}
 
-		/* warn ... we force this to PIO mode */
+		/* Warn ... we force this to PIO mode */
 		dev_warn_ratelimited(&spi->dev,
 				     "DMA disabled for transfer length %u greater than %d\n",
 				     transfer->len, MAX_DMA_LEN);
@@ -1026,8 +1026,8 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 					u32_writer : null_writer;
 	}
 	/*
-	 * if bits/word is changed in dma mode, then must check the
-	 * thresholds and burst also
+	 * If bits per word is changed in DMA mode, then must check
+	 * the thresholds and burst also.
 	 */
 	if (chip->enable_dma) {
 		if (pxa2xx_spi_set_dma_burst_and_threshold(chip,
@@ -1101,10 +1101,10 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 	if (!pxa25x_ssp_comp(drv_data))
 		pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
 
-	/* first set CR1 without interrupt and service enables */
+	/* First set CR1 without interrupt and service enables */
 	pxa2xx_spi_update(drv_data, SSCR1, change_mask, cr1);
 
-	/* see if we need to reload the config registers */
+	/* See if we need to reload the configuration registers */
 	pxa2xx_spi_update(drv_data, SSCR0, GENMASK(31, 0), cr0);
 
 	/* Restart the SSP */
@@ -1114,7 +1114,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 		u8 tx_level = read_SSSR_bits(drv_data, SSSR_TFL_MASK) >> 8;
 
 		if (tx_level) {
-			/* On MMP2, flipping SSE doesn't to empty TXFIFO. */
+			/* On MMP2, flipping SSE doesn't to empty Tx FIFO. */
 			dev_warn(&spi->dev, "%u bytes of garbage in Tx FIFO!\n", tx_level);
 			if (tx_level > transfer->len)
 				tx_level = transfer->len;
@@ -1134,7 +1134,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 
 	/*
 	 * Release the data by enabling service requests and interrupts,
-	 * without changing any mode bits
+	 * without changing any mode bits.
 	 */
 	pxa2xx_spi_write(drv_data, SSCR1, cr1);
 
@@ -1207,12 +1207,13 @@ static int setup_cs(struct spi_device *spi, struct chip_data *chip,
 	if (drv_data->ssp_type == CE4100_SSP)
 		return 0;
 
-	/* NOTE: setup() can be called multiple times, possibly with
-	 * different chip_info, release previously requested GPIO
+	/*
+	 * NOTE: setup() can be called multiple times, possibly with
+	 * different chip_info, release previously requested GPIO.
 	 */
 	cleanup_cs(spi);
 
-	/* If (*cs_control) is provided, ignore GPIO chip select */
+	/* If ->cs_control() is provided, ignore GPIO chip select */
 	if (chip_info->cs_control) {
 		chip->cs_control = chip_info->cs_control;
 		return 0;
@@ -1288,7 +1289,7 @@ static int setup(struct spi_device *spi)
 		break;
 	}
 
-	/* Only alloc on first setup */
+	/* Only allocate on the first setup */
 	chip = spi_get_ctldata(spi);
 	if (!chip) {
 		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
@@ -1307,8 +1308,10 @@ static int setup(struct spi_device *spi)
 		chip->timeout = TIMOUT_DFLT;
 	}
 
-	/* protocol drivers may change the chip settings, so...
-	 * if chip_info exists, use it */
+	/*
+	 * Protocol drivers may change the chip settings, so...
+	 * if chip_info exists, use it.
+	 */
 	chip_info = spi->controller_data;
 
 	/* chip_info isn't always needed */
@@ -1344,11 +1347,13 @@ static int setup(struct spi_device *spi)
 		chip->lpss_tx_threshold = tx_thres;
 	}
 
-	/* set dma burst and threshold outside of chip_info path so that if
-	 * chip_info goes away after setting chip->enable_dma, the
-	 * burst and threshold can still respond to changes in bits_per_word */
+	/*
+	 * Set DMA burst and threshold outside of chip_info path so that if
+	 * chip_info goes away after setting chip->enable_dma, the burst and
+	 * threshold can still respond to changes in bits_per_word.
+	 */
 	if (chip->enable_dma) {
-		/* set up legal burst and threshold for dma */
+		/* Set up legal burst and threshold for DMA */
 		if (pxa2xx_spi_set_dma_burst_and_threshold(chip, spi,
 						spi->bits_per_word,
 						&chip->dma_burst_size,
@@ -1677,7 +1682,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		ssp = &platform_info->ssp;
 
 	if (!ssp->mmio_base) {
-		dev_err(&pdev->dev, "failed to get ssp\n");
+		dev_err(&pdev->dev, "failed to get SSP\n");
 		return -ENODEV;
 	}
 
@@ -1699,7 +1704,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	controller->dev.of_node = dev->of_node;
 	controller->dev.fwnode = dev->fwnode;
 
-	/* the spi->mode bits understood by this driver: */
+	/* The spi->mode bits understood by this driver: */
 	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
 
 	controller->bus_num = ssp->port_id;
@@ -1787,7 +1792,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		      QUARK_X1000_SSCR1_TxTresh(TX_THRESH_QUARK_X1000_DFLT);
 		pxa2xx_spi_write(drv_data, SSCR1, tmp);
 
-		/* using the Motorola SPI protocol and use 8 bit frame */
+		/* Using the Motorola SPI protocol and use 8 bit frame */
 		tmp = QUARK_X1000_SSCR0_Motorola | QUARK_X1000_SSCR0_DataSize(8);
 		pxa2xx_spi_write(drv_data, SSCR0, tmp);
 		break;
@@ -1859,7 +1864,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, drv_data);
 	status = spi_register_controller(controller);
 	if (status) {
-		dev_err(&pdev->dev, "problem registering spi controller\n");
+		dev_err(&pdev->dev, "problem registering SPI controller\n");
 		goto out_error_pm_runtime_enabled;
 	}
 
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index db9de46110ade..9a20fb88e50f4 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs
- * Copyright (C) 2013, Intel Corporation
+ * Copyright (C) 2013, 2021 Intel Corporation
  */
 
 #ifndef SPI_PXA2XX_H
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 2b21bc1f3c732..a3fec2de512fc 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- *  Copyright (C) 2003 Russell King, All Rights Reserved.
+ * Copyright (C) 2003 Russell King, All Rights Reserved.
  *
  * This driver supports the following PXA CPU/SSP ports:-
  *
@@ -59,7 +59,7 @@ struct device_node;
 /* PXA27x, PXA3xx */
 #define SSCR0_EDSS	BIT(20)		/* Extended data size select */
 #define SSCR0_NCS	BIT(21)		/* Network clock select */
-#define SSCR0_RIM	BIT(22)		/* Receive FIFO overrrun interrupt mask */
+#define SSCR0_RIM	BIT(22)		/* Receive FIFO overrun interrupt mask */
 #define SSCR0_TUM	BIT(23)		/* Transmit FIFO underrun interrupt mask */
 #define SSCR0_FRDC	GENMASK(26, 24)	/* Frame rate divider control (mask) */
 #define SSCR0_SlotsPerFrm(x) (((x) - 1) << 24)	/* Time slots per frame [1..8] */
@@ -126,7 +126,7 @@ struct device_node;
 #define QUARK_X1000_SSCR1_EFWR	BIT(16)		/* Enable FIFO Write/Read */
 #define QUARK_X1000_SSCR1_STRF	BIT(17)		/* Select FIFO or EFWR */
 
-/* extra bits in PXA255, PXA26x and PXA27x SSP ports */
+/* Extra bits in PXA255, PXA26x and PXA27x SSP ports */
 #define SSCR0_TISSP		(1 << 4)	/* TI Sync Serial Protocol */
 #define SSCR0_PSP		(3 << 4)	/* PSP - Programmable Serial Protocol */
 
@@ -222,7 +222,8 @@ enum pxa_ssp_type {
 	CE4100_SSP,
 	MRFLD_SSP,
 	QUARK_X1000_SSP,
-	LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */
+	/* Keep LPSS types sorted with lpss_platforms[] */
+	LPSS_LPT_SSP,
 	LPSS_BYT_SSP,
 	LPSS_BSW_SSP,
 	LPSS_SPT_SSP,
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 12ef04d0896d2..eaab121ee5751 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -14,7 +14,10 @@
 
 struct dma_chan;
 
-/* device.platform_data for SSP controller devices */
+/*
+ * The platform data for SSP controller devices
+ * (resides in device.platform_data).
+ */
 struct pxa2xx_spi_controller {
 	u16 num_chipselect;
 	u8 enable_dma;
@@ -30,8 +33,11 @@ struct pxa2xx_spi_controller {
 	struct ssp_device ssp;
 };
 
-/* spi_board_info.controller_data for SPI slave devices,
- * copied to spi_device.platform_data ... mostly for dma tuning
+/*
+ * The controller specific data for SPI slave devices
+ * (resides in spi_board_info.controller_data),
+ * copied to spi_device.platform_data ... mostly for
+ * DMA tuning.
  */
 struct pxa2xx_spi_chip {
 	u8 tx_threshold;
-- 
GitLab


From f96e6c0ef63b981d295547ef624f4da7c820e097 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 May 2021 17:03:50 +0300
Subject: [PATCH 0934/3804] spi: pxa2xx: Update documentation to point out that
 it's outdated

Update documentation by pointing out that it's applicable mostly
for a legacy platform. While at it, add couple of points with regard
to ACPI, Device Tree, and automatic DMA enablement.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210517140351.901-9-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/spi/pxa2xx.rst | 58 ++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/Documentation/spi/pxa2xx.rst b/Documentation/spi/pxa2xx.rst
index 882d3cc72cc2b..6312968acfe99 100644
--- a/Documentation/spi/pxa2xx.rst
+++ b/Documentation/spi/pxa2xx.rst
@@ -2,43 +2,47 @@
 PXA2xx SPI on SSP driver HOWTO
 ==============================
 
-This a mini howto on the pxa2xx_spi driver.  The driver turns a PXA2xx
-synchronous serial port into a SPI master controller
+This a mini HOWTO on the pxa2xx_spi driver. The driver turns a PXA2xx
+synchronous serial port into an SPI master controller
 (see Documentation/spi/spi-summary.rst). The driver has the following features
 
-- Support for any PXA2xx SSP
+- Support for any PXA2xx and compatible SSP.
 - SSP PIO and SSP DMA data transfers.
 - External and Internal (SSPFRM) chip selects.
 - Per slave device (chip) configuration.
 - Full suspend, freeze, resume support.
 
-The driver is built around a "spi_message" fifo serviced by workqueue and a
-tasklet. The workqueue, "pump_messages", drives message fifo and the tasklet
-(pump_transfer) is responsible for queuing SPI transactions and setting up and
-launching the dma/interrupt driven transfers.
+The driver is built around a &struct spi_message FIFO serviced by kernel
+thread. The kernel thread, spi_pump_messages(), drives message FIFO and
+is responsible for queuing SPI transactions and setting up and launching
+the DMA or interrupt driven transfers.
 
 Declaring PXA2xx Master Controllers
 -----------------------------------
-Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
-"platform device".  The master configuration is passed to the driver via a table
-found in include/linux/spi/pxa2xx_spi.h::
+Typically, for a legacy platform, an SPI master is defined in the
+arch/.../mach-*/board-*.c as a "platform device". The master configuration
+is passed to the driver via a table found in include/linux/spi/pxa2xx_spi.h::
 
   struct pxa2xx_spi_controller {
 	u16 num_chipselect;
 	u8 enable_dma;
+	...
   };
 
 The "pxa2xx_spi_controller.num_chipselect" field is used to determine the number of
 slave device (chips) attached to this SPI master.
 
 The "pxa2xx_spi_controller.enable_dma" field informs the driver that SSP DMA should
-be used.  This caused the driver to acquire two DMA channels: rx_channel and
-tx_channel.  The rx_channel has a higher DMA service priority the tx_channel.
+be used. This caused the driver to acquire two DMA channels: Rx channel and
+Tx channel. The Rx channel has a higher DMA service priority than the Tx channel.
 See the "PXA2xx Developer Manual" section "DMA Controller".
 
+For the new platforms the description of the controller and peripheral devices
+comes from Device Tree or ACPI.
+
 NSSP MASTER SAMPLE
 ------------------
-Below is a sample configuration using the PXA255 NSSP::
+Below is a sample configuration using the PXA255 NSSP for a legacy platform::
 
   static struct resource pxa_spi_nssp_resources[] = {
 	[0] = {
@@ -79,9 +83,10 @@ Below is a sample configuration using the PXA255 NSSP::
 
 Declaring Slave Devices
 -----------------------
-Typically each SPI slave (chip) is defined in the arch/.../mach-*/board-*.c
-using the "spi_board_info" structure found in "linux/spi/spi.h". See
-"Documentation/spi/spi-summary.rst" for additional information.
+Typically, for a legacy platform, each SPI slave (chip) is defined in the
+arch/.../mach-*/board-*.c using the "spi_board_info" structure found in
+"linux/spi/spi.h". See "Documentation/spi/spi-summary.rst" for additional
+information.
 
 Each slave device attached to the PXA must provide slave specific configuration
 information via the structure "pxa2xx_spi_chip" found in
@@ -101,9 +106,9 @@ device. All fields are optional.
   };
 
 The "pxa2xx_spi_chip.tx_threshold" and "pxa2xx_spi_chip.rx_threshold" fields are
-used to configure the SSP hardware fifo.  These fields are critical to the
+used to configure the SSP hardware FIFO. These fields are critical to the
 performance of pxa2xx_spi driver and misconfiguration will result in rx
-fifo overruns (especially in PIO mode transfers). Good default values are::
+FIFO overruns (especially in PIO mode transfers). Good default values are::
 
 	.tx_threshold = 8,
 	.rx_threshold = 8,
@@ -118,7 +123,7 @@ use a value of 8. The driver will determine a reasonable default if
 dma_burst_size == 0.
 
 The "pxa2xx_spi_chip.timeout" fields is used to efficiently handle
-trailing bytes in the SSP receiver fifo.  The correct value for this field is
+trailing bytes in the SSP receiver FIFO. The correct value for this field is
 dependent on the SPI bus speed ("spi_board_info.max_speed_hz") and the specific
 slave device.  Please note that the PXA2xx SSP 1 does not support trailing byte
 timeouts and must busy-wait any trailing bytes.
@@ -131,19 +136,19 @@ testing.
 The "pxa2xx_spi_chip.cs_control" field is used to point to a board specific
 function for asserting/deasserting a slave device chip select.  If the field is
 NULL, the pxa2xx_spi master controller driver assumes that the SSP port is
-configured to use SSPFRM instead.
+configured to use GPIO or SSPFRM instead.
 
 NOTE: the SPI driver cannot control the chip select if SSPFRM is used, so the
 chipselect is dropped after each spi_transfer.  Most devices need chip select
-asserted around the complete message.  Use SSPFRM as a GPIO (through cs_control)
+asserted around the complete message. Use SSPFRM as a GPIO (through a descriptor)
 to accommodate these chips.
 
 
 NSSP SLAVE SAMPLE
 -----------------
-The pxa2xx_spi_chip structure is passed to the pxa2xx_spi driver in the
-"spi_board_info.controller_data" field. Below is a sample configuration using
-the PXA255 NSSP.
+For a legacy platform or in some other cases, the pxa2xx_spi_chip structure
+is passed to the pxa2xx_spi driver in the "spi_board_info.controller_data"
+field. Below is a sample configuration using the PXA255 NSSP.
 
 ::
 
@@ -212,7 +217,9 @@ DMA and PIO I/O Support
 -----------------------
 The pxa2xx_spi driver supports both DMA and interrupt driven PIO message
 transfers.  The driver defaults to PIO mode and DMA transfers must be enabled
-by setting the "enable_dma" flag in the "pxa2xx_spi_controller" structure.  The DMA
+by setting the "enable_dma" flag in the "pxa2xx_spi_controller" structure.
+For the newer platforms, that are known to support DMA, the driver will enable
+it automatically and try it first with a possible fallback to PIO. The DMA
 mode supports both coherent and stream based DMA mappings.
 
 The following logic is used to determine the type of I/O to be used on
@@ -236,5 +243,4 @@ a per "spi_transfer" basis::
 
 THANKS TO
 ---------
-
 David Brownell and others for mentoring the development of this driver.
-- 
GitLab


From 70252440b2b6337d03f2b95bc475fedbea79072f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 May 2021 17:03:51 +0300
Subject: [PATCH 0935/3804] spi: pxa2xx: Use predefined mask when programming
 FIFO thresholds

The predefined mask for threshold modification can be used
in case of Intel Merrifield SPI. Replace open-coded value
with predefined mask when programming FIFO thresholds.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210517140351.901-10-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 94b1585de203b..bdf9a283efc31 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1083,12 +1083,13 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 	}
 
 	if (is_mrfld_ssp(drv_data)) {
+		u32 mask = SFIFOTT_RFT | SFIFOTT_TFT;
 		u32 thresh = 0;
 
 		thresh |= SFIFOTT_RxThresh(chip->lpss_rx_threshold);
 		thresh |= SFIFOTT_TxThresh(chip->lpss_tx_threshold);
 
-		pxa2xx_spi_update(drv_data, SFIFOTT, 0xffffffff, thresh);
+		pxa2xx_spi_update(drv_data, SFIFOTT, mask, thresh);
 	}
 
 	if (is_quark_x1000_ssp(drv_data))
-- 
GitLab


From d7aed20d446d8c87f5e13adf73281056b0064a45 Mon Sep 17 00:00:00 2001
From: Alain Volmat <alain.volmat@foss.st.com>
Date: Wed, 12 May 2021 07:20:42 +0200
Subject: [PATCH 0936/3804] MAINTAINERS: Add Alain Volmat as STM32 SPI
 maintainer

Add Alain Volmat as STM32 SPI maintainer.

Signed-off-by: Alain Volmat <alain.volmat@foss.st.com>
Reviewed-by: Amelie Delaunay <amelie.delaunay@foss.st.com>
Link: https://lore.kernel.org/r/1620796842-23546-1-git-send-email-alain.volmat@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1c98272079492..528068e318450 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16954,6 +16954,12 @@ L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/busses/i2c-stm32*
 
+ST STM32 SPI DRIVER
+M:	Alain Volmat <alain.volmat@foss.st.com>
+L:	linux-spi@vger.kernel.org
+S:	Maintained
+F:	drivers/spi/spi-stm32.c
+
 ST VL53L0X ToF RANGER(I2C) IIO DRIVER
 M:	Song Qiang <songqiang1304521@gmail.com>
 L:	linux-iio@vger.kernel.org
-- 
GitLab


From d37316b72e8bf95a52d1c3e93c823d128c09b521 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Tue, 18 May 2021 16:45:14 +0930
Subject: [PATCH 0937/3804] ARM: npcm: wpcm450: select interrupt controller
 driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The interrupt controller driver is necessary in order to have a
functioning Linux system on WPCM450. Select it in mach-npcm/Kconfig.

Fixes: ece3fe93e8f4 ("ARM: npcm: Introduce Nuvoton WPCM450 SoC")
Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Joel Stanley <joel@jms.id.au>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20210513165627.1767093-1-j.neuschaefer@gmx.net
Link: https://lore.kernel.org/r/20210518071514.604492-1-joel@jms.id.au'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-npcm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig
index 658c8efb4ca14..a71cf1d189ae5 100644
--- a/arch/arm/mach-npcm/Kconfig
+++ b/arch/arm/mach-npcm/Kconfig
@@ -10,6 +10,7 @@ config ARCH_WPCM450
 	bool "Support for WPCM450 BMC (Hermon)"
 	depends on ARCH_MULTI_V5
 	select CPU_ARM926T
+	select WPCM450_AIC
 	select NPCM7XX_TIMER
 	help
 	  General support for WPCM450 BMC (Hermon).
-- 
GitLab


From 1897907cca5aa22cdfcdb7fb8f0644a6add0877d Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 19 Apr 2021 21:49:55 +0000
Subject: [PATCH 0938/3804] Documentation/x86: Add buslock.rst

Add buslock.rst to explain bus lock problem and how to detect and
handle it.

[ tglx: Included it into index.rst and added the missing include ... ]

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210419214958.4035512-2-fenghua.yu@intel.com
---
 Documentation/x86/buslock.rst | 104 ++++++++++++++++++++++++++++++++++
 Documentation/x86/index.rst   |   1 +
 2 files changed, 105 insertions(+)
 create mode 100644 Documentation/x86/buslock.rst

diff --git a/Documentation/x86/buslock.rst b/Documentation/x86/buslock.rst
new file mode 100644
index 0000000000000..159ff6ba830ee
--- /dev/null
+++ b/Documentation/x86/buslock.rst
@@ -0,0 +1,104 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+===============================
+Bus lock detection and handling
+===============================
+
+:Copyright: |copy| 2021 Intel Corporation
+:Authors: - Fenghua Yu <fenghua.yu@intel.com>
+          - Tony Luck <tony.luck@intel.com>
+
+Problem
+=======
+
+A split lock is any atomic operation whose operand crosses two cache lines.
+Since the operand spans two cache lines and the operation must be atomic,
+the system locks the bus while the CPU accesses the two cache lines.
+
+A bus lock is acquired through either split locked access to writeback (WB)
+memory or any locked access to non-WB memory. This is typically thousands of
+cycles slower than an atomic operation within a cache line. It also disrupts
+performance on other cores and brings the whole system to its knees.
+
+Detection
+=========
+
+Intel processors may support either or both of the following hardware
+mechanisms to detect split locks and bus locks.
+
+#AC exception for split lock detection
+--------------------------------------
+
+Beginning with the Tremont Atom CPU split lock operations may raise an
+Alignment Check (#AC) exception when a split lock operation is attemped.
+
+#DB exception for bus lock detection
+------------------------------------
+
+Some CPUs have the ability to notify the kernel by an #DB trap after a user
+instruction acquires a bus lock and is executed. This allows the kernel to
+terminate the application or to enforce throttling.
+
+Software handling
+=================
+
+The kernel #AC and #DB handlers handle bus lock based on the kernel
+parameter "split_lock_detect". Here is a summary of different options:
+
++------------------+----------------------------+-----------------------+
+|split_lock_detect=|#AC for split lock		|#DB for bus lock	|
++------------------+----------------------------+-----------------------+
+|off	  	   |Do nothing			|Do nothing		|
++------------------+----------------------------+-----------------------+
+|warn		   |Kernel OOPs			|Warn once per task and |
+|(default)	   |Warn once per task and	|and continues to run.  |
+|		   |disable future checking	|			|
+|		   |When both features are	|			|
+|		   |supported, warn in #AC	|			|
++------------------+----------------------------+-----------------------+
+|fatal		   |Kernel OOPs			|Send SIGBUS to user.	|
+|		   |Send SIGBUS to user		|			|
+|		   |When both features are	|			|
+|		   |supported, fatal in #AC	|			|
++------------------+----------------------------+-----------------------+
+
+Usages
+======
+
+Detecting and handling bus lock may find usages in various areas:
+
+It is critical for real time system designers who build consolidated real
+time systems. These systems run hard real time code on some cores and run
+"untrusted" user processes on other cores. The hard real time cannot afford
+to have any bus lock from the untrusted processes to hurt real time
+performance. To date the designers have been unable to deploy these
+solutions as they have no way to prevent the "untrusted" user code from
+generating split lock and bus lock to block the hard real time code to
+access memory during bus locking.
+
+It's also useful for general computing to prevent guests or user
+applications from slowing down the overall system by executing instructions
+with bus lock.
+
+
+Guidance
+========
+off
+---
+
+Disable checking for split lock and bus lock. This option can be useful if
+there are legacy applications that trigger these events at a low rate so
+that mitigation is not needed.
+
+warn
+----
+
+A warning is emitted when a bus lock is detected which allows to identify
+the offending application. This is the default behavior.
+
+fatal
+-----
+
+In this case, the bus lock is not tolerated and the process is killed.
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index 4693e192b447c..0004f5d2283ee 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -29,6 +29,7 @@ x86-specific Documentation
    microcode
    resctrl
    tsx_async_abort
+   buslock
    usb-legacy-support
    i386/index
    x86_64/index
-- 
GitLab


From ef4ae6e4413159d2329a172c12e9274e2cb0a3a8 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 19 Apr 2021 21:49:56 +0000
Subject: [PATCH 0939/3804] x86/bus_lock: Set rate limit for bus lock

A bus lock can be thousands of cycles slower than atomic operation within
one cache line. It also disrupts performance on other cores. Malicious
users can generate multiple bus locks to degrade the whole system
performance.

The current mitigation is to kill the offending process, but for certain
scenarios it's desired to identify and throttle the offending application.

Add a system wide rate limit for bus locks. When the system detects bus
locks at a rate higher than N/sec (where N can be set by the kernel boot
argument in the range [1..1000]) any task triggering a bus lock will be
forced to sleep for at least 20ms until the overall system rate of bus
locks drops below the threshold.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210419214958.4035512-3-fenghua.yu@intel.com
---
 arch/x86/kernel/cpu/intel.c | 42 +++++++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8adffc17fa8b8..7c23f0397390b 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -10,6 +10,7 @@
 #include <linux/thread_info.h>
 #include <linux/init.h>
 #include <linux/uaccess.h>
+#include <linux/delay.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -41,6 +42,7 @@ enum split_lock_detect_state {
 	sld_off = 0,
 	sld_warn,
 	sld_fatal,
+	sld_ratelimit,
 };
 
 /*
@@ -997,13 +999,30 @@ static const struct {
 	{ "off",	sld_off   },
 	{ "warn",	sld_warn  },
 	{ "fatal",	sld_fatal },
+	{ "ratelimit:", sld_ratelimit },
 };
 
+static struct ratelimit_state bld_ratelimit;
+
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
-	int len = strlen(opt);
+	int len = strlen(opt), ratelimit;
+
+	if (strncmp(arg, opt, len))
+		return false;
+
+	/*
+	 * Min ratelimit is 1 bus lock/sec.
+	 * Max ratelimit is 1000 bus locks/sec.
+	 */
+	if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 &&
+	    ratelimit > 0 && ratelimit <= 1000) {
+		ratelimit_state_init(&bld_ratelimit, HZ, ratelimit);
+		ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE);
+		return true;
+	}
 
-	return len == arglen && !strncmp(arg, opt, len);
+	return len == arglen;
 }
 
 static bool split_lock_verify_msr(bool on)
@@ -1082,6 +1101,15 @@ static void sld_update_msr(bool on)
 
 static void split_lock_init(void)
 {
+	/*
+	 * #DB for bus lock handles ratelimit and #AC for split lock is
+	 * disabled.
+	 */
+	if (sld_state == sld_ratelimit) {
+		split_lock_verify_msr(false);
+		return;
+	}
+
 	if (cpu_model_supports_sld)
 		split_lock_verify_msr(sld_state != sld_off);
 }
@@ -1154,6 +1182,12 @@ void handle_bus_lock(struct pt_regs *regs)
 	switch (sld_state) {
 	case sld_off:
 		break;
+	case sld_ratelimit:
+		/* Enforce no more than bld_ratelimit bus locks/sec. */
+		while (!__ratelimit(&bld_ratelimit))
+			msleep(20);
+		/* Warn on the bus lock. */
+		fallthrough;
 	case sld_warn:
 		pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n",
 				    current->comm, current->pid, regs->ip);
@@ -1259,6 +1293,10 @@ static void sld_state_show(void)
 				" from non-WB" : "");
 		}
 		break;
+	case sld_ratelimit:
+		if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+			pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst);
+		break;
 	}
 }
 
-- 
GitLab


From 9d839c280b64817345c2fa462c0027a9bd742361 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 19 Apr 2021 21:49:57 +0000
Subject: [PATCH 0940/3804] Documentation/admin-guide: Add bus lock ratelimit

Since bus lock rate limit changes the split_lock_detect parameter,
update the documentation for the change.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210419214958.4035512-4-fenghua.yu@intel.com
---
 Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb89dbdedc463..ca94624438b97 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5283,6 +5283,14 @@
 				  exception. Default behavior is by #AC if
 				  both features are enabled in hardware.
 
+			ratelimit:N -
+				  Set system wide rate limit to N bus locks
+				  per second for bus lock detection.
+				  0 < N <= 1000.
+
+				  N/A for split lock detection.
+
+
 			If an #AC exception is hit in the kernel or in
 			firmware (i.e. not while executing in user mode)
 			the kernel will oops in either "warn" or "fatal"
-- 
GitLab


From d28397eaf4c27947a1ffc720d42e8b3a33ae1e2a Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Mon, 19 Apr 2021 21:49:58 +0000
Subject: [PATCH 0941/3804] Documentation/x86: Add ratelimit in buslock.rst

ratelimit is a new command line option for bus lock handling. Add proper
documentation.

[ tglx: Massaged documentation ]

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20210419214958.4035512-5-fenghua.yu@intel.com
---
 Documentation/x86/buslock.rst | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/Documentation/x86/buslock.rst b/Documentation/x86/buslock.rst
index 159ff6ba830ee..7c051e714943c 100644
--- a/Documentation/x86/buslock.rst
+++ b/Documentation/x86/buslock.rst
@@ -63,6 +63,11 @@ parameter "split_lock_detect". Here is a summary of different options:
 |		   |When both features are	|			|
 |		   |supported, fatal in #AC	|			|
 +------------------+----------------------------+-----------------------+
+|ratelimit:N	   |Do nothing			|Limit bus lock rate to	|
+|(0 < N <= 1000)   |				|N bus locks per second	|
+|		   |				|system wide and warn on|
+|		   |				|bus locks.		|
++------------------+----------------------------+-----------------------+
 
 Usages
 ======
@@ -102,3 +107,20 @@ fatal
 -----
 
 In this case, the bus lock is not tolerated and the process is killed.
+
+ratelimit
+---------
+
+A system wide bus lock rate limit N is specified where 0 < N <= 1000. This
+allows a bus lock rate up to N bus locks per second. When the bus lock rate
+is exceeded then any task which is caught via the buslock #DB exception is
+throttled by enforced sleeps until the rate goes under the limit again.
+
+This is an effective mitigation in cases where a minimal impact can be
+tolerated, but an eventual Denial of Service attack has to be prevented. It
+allows to identify the offending processes and analyze whether they are
+malicious or just badly written.
+
+Selecting a rate limit of 1000 allows the bus to be locked for up to about
+seven million cycles each second (assuming 7000 cycles for each bus
+lock). On a 2 GHz processor that would be about 0.35% system slowdown.
-- 
GitLab


From 5881fa8dc2de9697a89451f6518e8b3a796c09c6 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Fri, 7 May 2021 14:53:04 +0200
Subject: [PATCH 0942/3804] debugfs: fix security_locked_down() call for
 SELinux

When (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) is zero, then
the SELinux implementation of the locked_down hook might report a denial
even though the operation would actually be allowed.

To fix this, make sure that security_locked_down() is called only when
the return value will be taken into account (i.e. when changing one of
the problematic attributes).

Note: this was introduced by commit 5496197f9b08 ("debugfs: Restrict
debugfs when the kernel is locked down"), but it didn't matter at that
time, as the SELinux support came in later.

Fixes: 59438b46471a ("security,lockdown,selinux: implement SELinux lockdown")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Link: https://lore.kernel.org/r/20210507125304.144394-1-omosnace@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/inode.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 1d252164d97b6..8129a430d789d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -45,10 +45,13 @@ static unsigned int debugfs_allow __ro_after_init = DEFAULT_DEBUGFS_ALLOW_BITS;
 static int debugfs_setattr(struct user_namespace *mnt_userns,
 			   struct dentry *dentry, struct iattr *ia)
 {
-	int ret = security_locked_down(LOCKDOWN_DEBUGFS);
+	int ret;
 
-	if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
-		return ret;
+	if (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) {
+		ret = security_locked_down(LOCKDOWN_DEBUGFS);
+		if (ret)
+			return ret;
+	}
 	return simple_setattr(&init_user_ns, dentry, ia);
 }
 
-- 
GitLab


From 889d916b6f8a48b8c9489fffcad3b78eedd01a51 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 11 May 2021 08:48:28 +0300
Subject: [PATCH 0943/3804] RDMA/core: Don't access cm_id after its destruction

restrack should only be attached to a cm_id while the ID has a valid
device pointer. It is set up when the device is first loaded, but not
cleared when the device is removed. There is also two copies of the device
pointer, one private and one in the public API, and these were left out of
sync.

Make everything go to NULL together and manipulate restrack right around
the device assignments.

Found by syzcaller:
BUG: KASAN: wild-memory-access in __list_del include/linux/list.h:112 [inline]
BUG: KASAN: wild-memory-access in __list_del_entry include/linux/list.h:135 [inline]
BUG: KASAN: wild-memory-access in list_del include/linux/list.h:146 [inline]
BUG: KASAN: wild-memory-access in cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline]
BUG: KASAN: wild-memory-access in cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline]
BUG: KASAN: wild-memory-access in cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783
Write of size 8 at addr dead000000000108 by task syz-executor716/334

CPU: 0 PID: 334 Comm: syz-executor716 Not tainted 5.11.0+ #271
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0xbe/0xf9 lib/dump_stack.c:120
 __kasan_report mm/kasan/report.c:400 [inline]
 kasan_report.cold+0x5f/0xd5 mm/kasan/report.c:413
 __list_del include/linux/list.h:112 [inline]
 __list_del_entry include/linux/list.h:135 [inline]
 list_del include/linux/list.h:146 [inline]
 cma_cancel_listens drivers/infiniband/core/cma.c:1767 [inline]
 cma_cancel_operation drivers/infiniband/core/cma.c:1795 [inline]
 cma_cancel_operation+0x1f4/0x4b0 drivers/infiniband/core/cma.c:1783
 _destroy_id+0x29/0x460 drivers/infiniband/core/cma.c:1862
 ucma_close_id+0x36/0x50 drivers/infiniband/core/ucma.c:185
 ucma_destroy_private_ctx+0x58d/0x5b0 drivers/infiniband/core/ucma.c:576
 ucma_close+0x91/0xd0 drivers/infiniband/core/ucma.c:1797
 __fput+0x169/0x540 fs/file_table.c:280
 task_work_run+0xb7/0x100 kernel/task_work.c:140
 exit_task_work include/linux/task_work.h:30 [inline]
 do_exit+0x7da/0x17f0 kernel/exit.c:825
 do_group_exit+0x9e/0x190 kernel/exit.c:922
 __do_sys_exit_group kernel/exit.c:933 [inline]
 __se_sys_exit_group kernel/exit.c:931 [inline]
 __x64_sys_exit_group+0x2d/0x30 kernel/exit.c:931
 do_syscall_64+0x2d/0x40 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 255d0c14b375 ("RDMA/cma: rdma_bind_addr() leaks a cma_dev reference count")
Link: https://lore.kernel.org/r/3352ee288fe34f2b44220457a29bfc0548686363.1620711734.git.leonro@nvidia.com
Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/cma.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2b9ffc21cbc4a..ab148a696c0ce 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -473,6 +473,7 @@ static void cma_release_dev(struct rdma_id_private *id_priv)
 	list_del(&id_priv->list);
 	cma_dev_put(id_priv->cma_dev);
 	id_priv->cma_dev = NULL;
+	id_priv->id.device = NULL;
 	if (id_priv->id.route.addr.dev_addr.sgid_attr) {
 		rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
 		id_priv->id.route.addr.dev_addr.sgid_attr = NULL;
@@ -1860,6 +1861,7 @@ static void _destroy_id(struct rdma_id_private *id_priv,
 				iw_destroy_cm_id(id_priv->cm_id.iw);
 		}
 		cma_leave_mc_groups(id_priv);
+		rdma_restrack_del(&id_priv->res);
 		cma_release_dev(id_priv);
 	}
 
@@ -1873,7 +1875,6 @@ static void _destroy_id(struct rdma_id_private *id_priv,
 	kfree(id_priv->id.route.path_rec);
 
 	put_net(id_priv->id.route.addr.dev_addr.net);
-	rdma_restrack_del(&id_priv->res);
 	kfree(id_priv);
 }
 
@@ -3774,7 +3775,7 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 	}
 
 	id_priv->backlog = backlog;
-	if (id->device) {
+	if (id_priv->cma_dev) {
 		if (rdma_cap_ib_cm(id->device, 1)) {
 			ret = cma_ib_listen(id_priv);
 			if (ret)
-- 
GitLab


From 976aac5f882989e4f6c1b3a7224819bf0e801c6a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 16:00:08 +0200
Subject: [PATCH 0944/3804] kcsan: Fix debugfs initcall return type

clang with CONFIG_LTO_CLANG points out that an initcall function should
return an 'int' due to the changes made to the initcall macros in commit
3578ad11f3fb ("init: lto: fix PREL32 relocations"):

kernel/kcsan/debugfs.c:274:15: error: returning 'void' from a function with incompatible result type 'int'
late_initcall(kcsan_debugfs_init);
~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~
include/linux/init.h:292:46: note: expanded from macro 'late_initcall'
 #define late_initcall(fn)               __define_initcall(fn, 7)

Fixes: e36299efe7d7 ("kcsan, debugfs: Move debugfs file creation out of early init")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/kcsan/debugfs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c
index c1dd02f3be8b8..e65de172ccf7c 100644
--- a/kernel/kcsan/debugfs.c
+++ b/kernel/kcsan/debugfs.c
@@ -266,9 +266,10 @@ static const struct file_operations debugfs_ops =
 	.release = single_release
 };
 
-static void __init kcsan_debugfs_init(void)
+static int __init kcsan_debugfs_init(void)
 {
 	debugfs_create_file("kcsan", 0644, NULL, NULL, &debugfs_ops);
+	return 0;
 }
 
 late_initcall(kcsan_debugfs_init);
-- 
GitLab


From 9f460ae31c4435fd022c443a6029352217a16ac1 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 18 May 2021 21:00:27 +0200
Subject: [PATCH 0945/3804] batman-adv: Avoid WARN_ON timing related checks

The soft/batadv interface for a queued OGM can be changed during the time
the OGM was queued for transmission and when the OGM is actually
transmitted by the worker.

But WARN_ON must be used to denote kernel bugs and not to print simple
warnings. A warning can simply be printed using pr_warn.

Reported-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Reported-by: syzbot+c0b807de416427ff3dd1@syzkaller.appspotmail.com
Fixes: ef0a937f7a14 ("batman-adv: consider outgoing interface in OGM sending")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_iv_ogm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 789f257be24f3..fc8be49010b9f 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -409,8 +409,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
 	if (WARN_ON(!forw_packet->if_outgoing))
 		return;
 
-	if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
+	if (forw_packet->if_outgoing->soft_iface != soft_iface) {
+		pr_warn("%s: soft interface switch for queued OGM\n", __func__);
 		return;
+	}
 
 	if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
 		return;
-- 
GitLab


From be07f056396d6bb40963c45a02951c566ddeef8e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 18 May 2021 10:09:08 +0800
Subject: [PATCH 0946/3804] tipc: simplify the finalize work queue

This patch is to use "struct work_struct" for the finalize work queue
instead of "struct tipc_net_work", as it can get the "net" and "addr"
from tipc_net's other members and there is no need to add extra net
and addr in tipc_net by defining "struct tipc_net_work".

Note that it's safe to get net from tn->bcl as bcl is always released
after the finalize work queue is done.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c     |  4 ++--
 net/tipc/core.h     |  8 +-------
 net/tipc/discover.c |  4 ++--
 net/tipc/link.c     |  5 +++++
 net/tipc/link.h     |  1 +
 net/tipc/net.c      | 15 +++------------
 6 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/net/tipc/core.c b/net/tipc/core.c
index 72f3ac73779bf..3f4542e0f0650 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -60,7 +60,7 @@ static int __net_init tipc_init_net(struct net *net)
 	tn->trial_addr = 0;
 	tn->addr_trial_end = 0;
 	tn->capabilities = TIPC_NODE_CAPABILITIES;
-	INIT_WORK(&tn->final_work.work, tipc_net_finalize_work);
+	INIT_WORK(&tn->work, tipc_net_finalize_work);
 	memset(tn->node_id, 0, sizeof(tn->node_id));
 	memset(tn->node_id_string, 0, sizeof(tn->node_id_string));
 	tn->mon_threshold = TIPC_DEF_MON_THRESHOLD;
@@ -110,7 +110,7 @@ static void __net_exit tipc_exit_net(struct net *net)
 
 	tipc_detach_loopback(net);
 	/* Make sure the tipc_net_finalize_work() finished */
-	cancel_work_sync(&tn->final_work.work);
+	cancel_work_sync(&tn->work);
 	tipc_net_stop(net);
 
 	tipc_bcast_stop(net);
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5741ae488bb56..0a3f7a70a50a1 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -91,12 +91,6 @@ extern unsigned int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
 
-struct tipc_net_work {
-	struct work_struct work;
-	struct net *net;
-	u32 addr;
-};
-
 struct tipc_net {
 	u8  node_id[NODE_ID_LEN];
 	u32 node_addr;
@@ -148,7 +142,7 @@ struct tipc_net {
 	struct tipc_crypto *crypto_tx;
 #endif
 	/* Work item for net finalize */
-	struct tipc_net_work final_work;
+	struct work_struct work;
 	/* The numbers of work queues in schedule */
 	atomic_t wq_count;
 };
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 5380f605b8514..da69e1abf68ff 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -168,7 +168,7 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
 
 	/* Apply trial address if we just left trial period */
 	if (!trial && !self) {
-		tipc_sched_net_finalize(net, tn->trial_addr);
+		schedule_work(&tn->work);
 		msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
 		msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
 	}
@@ -308,7 +308,7 @@ static void tipc_disc_timeout(struct timer_list *t)
 	if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
 		mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
 		spin_unlock_bh(&d->lock);
-		tipc_sched_net_finalize(net, tn->trial_addr);
+		schedule_work(&tn->work);
 		return;
 	}
 
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1151092594302..c44b4bfaaee6a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -372,6 +372,11 @@ char tipc_link_plane(struct tipc_link *l)
 	return l->net_plane;
 }
 
+struct net *tipc_link_net(struct tipc_link *l)
+{
+	return l->net;
+}
+
 void tipc_link_update_caps(struct tipc_link *l, u16 capabilities)
 {
 	l->peer_caps = capabilities;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index fc07232c9a127..a16f401fdabda 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -156,4 +156,5 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l,   struct tipc_msg *hdr,
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 			  struct sk_buff_head *xmitq);
 bool tipc_link_too_silent(struct tipc_link *l);
+struct net *tipc_link_net(struct tipc_link *l);
 #endif
diff --git a/net/tipc/net.c b/net/tipc/net.c
index a130195af1883..0e95572e56b41 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,6 +41,7 @@
 #include "socket.h"
 #include "node.h"
 #include "bcast.h"
+#include "link.h"
 #include "netlink.h"
 #include "monitor.h"
 
@@ -142,19 +143,9 @@ static void tipc_net_finalize(struct net *net, u32 addr)
 
 void tipc_net_finalize_work(struct work_struct *work)
 {
-	struct tipc_net_work *fwork;
+	struct tipc_net *tn = container_of(work, struct tipc_net, work);
 
-	fwork = container_of(work, struct tipc_net_work, work);
-	tipc_net_finalize(fwork->net, fwork->addr);
-}
-
-void tipc_sched_net_finalize(struct net *net, u32 addr)
-{
-	struct tipc_net *tn = tipc_net(net);
-
-	tn->final_work.net = net;
-	tn->final_work.addr = addr;
-	schedule_work(&tn->final_work.work);
+	tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr);
 }
 
 void tipc_net_stop(struct net *net)
-- 
GitLab


From 33e6b1674f339c5d3be56ec9b4921d1ddd14327d Mon Sep 17 00:00:00 2001
From: Markus Bloechl <markus.bloechl@ipetronik.com>
Date: Tue, 18 May 2021 11:54:11 +0200
Subject: [PATCH 0947/3804] net: lan78xx: advertise tx software timestamping
 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

lan78xx already calls skb_tx_timestamp() in its lan78xx_start_xmit().
Override .get_ts_info to also advertise this capability
(SOF_TIMESTAMPING_TX_SOFTWARE) via ethtool.

Signed-off-by: Markus Blöchl <markus.bloechl@ipetronik.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/lan78xx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 6acc5e9045181..02bce40a67e5b 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1645,6 +1645,7 @@ static const struct ethtool_ops lan78xx_ethtool_ops = {
 	.get_strings	= lan78xx_get_strings,
 	.get_wol	= lan78xx_get_wol,
 	.set_wol	= lan78xx_set_wol,
+	.get_ts_info	= ethtool_op_get_ts_info,
 	.get_eee	= lan78xx_get_eee,
 	.set_eee	= lan78xx_set_eee,
 	.get_pauseparam	= lan78xx_get_pause,
-- 
GitLab


From a710b9ffbebaf713f7dbd4dbd9524907e5d66f33 Mon Sep 17 00:00:00 2001
From: Jiaran Zhang <zhangjiaran@huawei.com>
Date: Tue, 18 May 2021 19:36:00 +0800
Subject: [PATCH 0948/3804] net: hns3: fix incorrect resp_msg issue

In hclge_mbx_handler(), if there are two consecutive mailbox
messages that requires resp_msg, the resp_msg is not cleared
after processing the first message, which will cause the resp_msg
data of second message incorrect.

Fix it by clearing the resp_msg before processing every mailbox
message.

Fixes: bb5790b71bad ("net: hns3: refactor mailbox response scheme between PF and VF")
Signed-off-by: Jiaran Zhang <zhangjiaran@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 8e5f9dc8791d2..f1c9f4ada348a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -710,7 +710,6 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 	unsigned int flag;
 	int ret = 0;
 
-	memset(&resp_msg, 0, sizeof(resp_msg));
 	/* handle all the mailbox requests in the queue */
 	while (!hclge_cmd_crq_empty(&hdev->hw)) {
 		if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
@@ -738,6 +737,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 
 		trace_hclge_pf_mbx_get(hdev, req);
 
+		/* clear the resp_msg before processing every mailbox message */
+		memset(&resp_msg, 0, sizeof(resp_msg));
+
 		switch (req->msg.code) {
 		case HCLGE_MBX_MAP_RING_TO_VECTOR:
 			ret = hclge_map_unmap_ring_to_vf_vector(vport, true,
-- 
GitLab


From a289a7e5c1d49b7d47df9913c1cc81fb48fab613 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Tue, 18 May 2021 19:36:01 +0800
Subject: [PATCH 0949/3804] net: hns3: put off calling register_netdev() until
 client initialize complete

Currently, the netdevice is registered before client initializing
complete. So there is a timewindow between netdevice available
and usable. In this case, if user try to change the channel number
or ring param, it may cause the hns3_set_rx_cpu_rmap() being called
twice, and report bug.

[47199.416502] hns3 0000:35:00.0 eth1: set channels: tqp_num=1, rxfh=0
[47199.430340] hns3 0000:35:00.0 eth1: already uninitialized
[47199.438554] hns3 0000:35:00.0: rss changes from 4 to 1
[47199.511854] hns3 0000:35:00.0: Channels changed, rss_size from 4 to 1, tqps from 4 to 1
[47200.163524] ------------[ cut here ]------------
[47200.171674] kernel BUG at lib/cpu_rmap.c:142!
[47200.177847] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
[47200.185259] Modules linked in: hclge(+) hns3(-) hns3_cae(O) hns_roce_hw_v2 hnae3 vfio_iommu_type1 vfio_pci vfio_virqfd vfio pv680_mii(O) [last unloaded: hclge]
[47200.205912] CPU: 1 PID: 8260 Comm: ethtool Tainted: G           O      5.11.0-rc3+ #1
[47200.215601] Hardware name:  , xxxxxx 02/04/2021
[47200.223052] pstate: 60400009 (nZCv daif +PAN -UAO -TCO BTYPE=--)
[47200.230188] pc : cpu_rmap_add+0x38/0x40
[47200.237472] lr : irq_cpu_rmap_add+0x84/0x140
[47200.243291] sp : ffff800010e93a30
[47200.247295] x29: ffff800010e93a30 x28: ffff082100584880
[47200.254155] x27: 0000000000000000 x26: 0000000000000000
[47200.260712] x25: 0000000000000000 x24: 0000000000000004
[47200.267241] x23: ffff08209ba03000 x22: ffff08209ba038c0
[47200.273789] x21: 000000000000003f x20: ffff0820e2bc1680
[47200.280400] x19: ffff0820c970ec80 x18: 00000000000000c0
[47200.286944] x17: 0000000000000000 x16: ffffb43debe4a0d0
[47200.293456] x15: fffffc2082990600 x14: dead000000000122
[47200.300059] x13: ffffffffffffffff x12: 000000000000003e
[47200.306606] x11: ffff0820815b8080 x10: ffff53e411988000
[47200.313171] x9 : 0000000000000000 x8 : ffff0820e2bc1700
[47200.319682] x7 : 0000000000000000 x6 : 000000000000003f
[47200.326170] x5 : 0000000000000040 x4 : ffff800010e93a20
[47200.332656] x3 : 0000000000000004 x2 : ffff0820c970ec80
[47200.339168] x1 : ffff0820e2bc1680 x0 : 0000000000000004
[47200.346058] Call trace:
[47200.349324]  cpu_rmap_add+0x38/0x40
[47200.354300]  hns3_set_rx_cpu_rmap+0x6c/0xe0 [hns3]
[47200.362294]  hns3_reset_notify_init_enet+0x1cc/0x340 [hns3]
[47200.370049]  hns3_change_channels+0x40/0xb0 [hns3]
[47200.376770]  hns3_set_channels+0x12c/0x2a0 [hns3]
[47200.383353]  ethtool_set_channels+0x140/0x250
[47200.389772]  dev_ethtool+0x714/0x23d0
[47200.394440]  dev_ioctl+0x4cc/0x640
[47200.399277]  sock_do_ioctl+0x100/0x2a0
[47200.404574]  sock_ioctl+0x28c/0x470
[47200.409079]  __arm64_sys_ioctl+0xb4/0x100
[47200.415217]  el0_svc_common.constprop.0+0x84/0x210
[47200.422088]  do_el0_svc+0x28/0x34
[47200.426387]  el0_svc+0x28/0x70
[47200.431308]  el0_sync_handler+0x1a4/0x1b0
[47200.436477]  el0_sync+0x174/0x180
[47200.441562] Code: 11000405 79000c45 f8247861 d65f03c0 (d4210000)
[47200.448869] ---[ end trace a01efe4ce42e5f34 ]---

The process is like below:
excuting hns3_client_init
|
register_netdev()
|                           hns3_set_channels()
|                           |
hns3_set_rx_cpu_rmap()      hns3_reset_notify_uninit_enet()
|                               |
|                            quit without calling function
|                            hns3_free_rx_cpu_rmap for flag
|                            HNS3_NIC_STATE_INITED is unset.
|                           |
|                           hns3_reset_notify_init_enet()
|                               |
set HNS3_NIC_STATE_INITED    call hns3_set_rx_cpu_rmap()-- crash

Fix it by calling register_netdev() at the end of function
hns3_client_init().

Fixes: 08a100689d4b ("net: hns3: re-organize vector handle")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 783fdaf8f8d64..c64d18878f641 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4317,12 +4317,6 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	if (ret)
 		goto out_init_phy;
 
-	ret = register_netdev(netdev);
-	if (ret) {
-		dev_err(priv->dev, "probe register netdev fail!\n");
-		goto out_reg_netdev_fail;
-	}
-
 	/* the device can work without cpu rmap, only aRFS needs it */
 	ret = hns3_set_rx_cpu_rmap(netdev);
 	if (ret)
@@ -4355,17 +4349,23 @@ static int hns3_client_init(struct hnae3_handle *handle)
 	if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
 		set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags);
 
+	ret = register_netdev(netdev);
+	if (ret) {
+		dev_err(priv->dev, "probe register netdev fail!\n");
+		goto out_reg_netdev_fail;
+	}
+
 	if (netif_msg_drv(handle))
 		hns3_info_show(priv);
 
 	return ret;
 
+out_reg_netdev_fail:
+	hns3_dbg_uninit(handle);
 out_client_start:
 	hns3_free_rx_cpu_rmap(netdev);
 	hns3_nic_uninit_irq(priv);
 out_init_irq_fail:
-	unregister_netdev(netdev);
-out_reg_netdev_fail:
 	hns3_uninit_phy(netdev);
 out_init_phy:
 	hns3_uninit_all_ring(priv);
-- 
GitLab


From 73a13d8dbe33e53a12400f2be0f5af169816c67f Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 18 May 2021 19:36:02 +0800
Subject: [PATCH 0950/3804] net: hns3: fix user's coalesce configuration lost
 issue

Currently, when adaptive is on, the user's coalesce configuration
may be overwritten by the dynamic one. The reason is that user's
configurations are saved in struct hns3_enet_tqp_vector whose
value maybe changed by the dynamic algorithm. To fix it, use
struct hns3_nic_priv instead of struct hns3_enet_tqp_vector to
save and get the user's configuration.

BTW, operations of storing and restoring coalesce info in the reset
process are unnecessary now, so remove them as well.

Fixes: 434776a5fae2 ("net: hns3: add ethtool_ops.set_coalesce support to PF")
Fixes: 7e96adc46633 ("net: hns3: add ethtool_ops.get_coalesce support to PF")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 84 +++++++++----------
 .../ethernet/hisilicon/hns3/hns3_ethtool.c    | 64 +++++---------
 2 files changed, 63 insertions(+), 85 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index c64d18878f641..6d6c0ac65bb49 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -264,22 +264,17 @@ static void hns3_vector_coalesce_init(struct hns3_enet_tqp_vector *tqp_vector,
 	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(priv->ae_handle->pdev);
 	struct hns3_enet_coalesce *tx_coal = &tqp_vector->tx_group.coal;
 	struct hns3_enet_coalesce *rx_coal = &tqp_vector->rx_group.coal;
+	struct hns3_enet_coalesce *ptx_coal = &priv->tx_coal;
+	struct hns3_enet_coalesce *prx_coal = &priv->rx_coal;
 
-	/* initialize the configuration for interrupt coalescing.
-	 * 1. GL (Interrupt Gap Limiter)
-	 * 2. RL (Interrupt Rate Limiter)
-	 * 3. QL (Interrupt Quantity Limiter)
-	 *
-	 * Default: enable interrupt coalescing self-adaptive and GL
-	 */
-	tx_coal->adapt_enable = 1;
-	rx_coal->adapt_enable = 1;
+	tx_coal->adapt_enable = ptx_coal->adapt_enable;
+	rx_coal->adapt_enable = prx_coal->adapt_enable;
 
-	tx_coal->int_gl = HNS3_INT_GL_50K;
-	rx_coal->int_gl = HNS3_INT_GL_50K;
+	tx_coal->int_gl = ptx_coal->int_gl;
+	rx_coal->int_gl = prx_coal->int_gl;
 
-	rx_coal->flow_level = HNS3_FLOW_LOW;
-	tx_coal->flow_level = HNS3_FLOW_LOW;
+	rx_coal->flow_level = prx_coal->flow_level;
+	tx_coal->flow_level = ptx_coal->flow_level;
 
 	/* device version above V3(include V3), GL can configure 1us
 	 * unit, so uses 1us unit.
@@ -294,8 +289,8 @@ static void hns3_vector_coalesce_init(struct hns3_enet_tqp_vector *tqp_vector,
 		rx_coal->ql_enable = 1;
 		tx_coal->int_ql_max = ae_dev->dev_specs.int_ql_max;
 		rx_coal->int_ql_max = ae_dev->dev_specs.int_ql_max;
-		tx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG;
-		rx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG;
+		tx_coal->int_ql = ptx_coal->int_ql;
+		rx_coal->int_ql = prx_coal->int_ql;
 	}
 }
 
@@ -3844,6 +3839,34 @@ map_ring_fail:
 	return ret;
 }
 
+static void hns3_nic_init_coal_cfg(struct hns3_nic_priv *priv)
+{
+	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(priv->ae_handle->pdev);
+	struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
+	struct hns3_enet_coalesce *rx_coal = &priv->rx_coal;
+
+	/* initialize the configuration for interrupt coalescing.
+	 * 1. GL (Interrupt Gap Limiter)
+	 * 2. RL (Interrupt Rate Limiter)
+	 * 3. QL (Interrupt Quantity Limiter)
+	 *
+	 * Default: enable interrupt coalescing self-adaptive and GL
+	 */
+	tx_coal->adapt_enable = 1;
+	rx_coal->adapt_enable = 1;
+
+	tx_coal->int_gl = HNS3_INT_GL_50K;
+	rx_coal->int_gl = HNS3_INT_GL_50K;
+
+	rx_coal->flow_level = HNS3_FLOW_LOW;
+	tx_coal->flow_level = HNS3_FLOW_LOW;
+
+	if (ae_dev->dev_specs.int_ql_max) {
+		tx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG;
+		rx_coal->int_ql = HNS3_INT_QL_DEFAULT_CFG;
+	}
+}
+
 static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
 {
 	struct hnae3_handle *h = priv->ae_handle;
@@ -4295,6 +4318,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
 		goto out_get_ring_cfg;
 	}
 
+	hns3_nic_init_coal_cfg(priv);
+
 	ret = hns3_nic_alloc_vector_data(priv);
 	if (ret) {
 		ret = -ENOMEM;
@@ -4571,31 +4596,6 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
 	return 0;
 }
 
-static void hns3_store_coal(struct hns3_nic_priv *priv)
-{
-	/* ethtool only support setting and querying one coal
-	 * configuration for now, so save the vector 0' coal
-	 * configuration here in order to restore it.
-	 */
-	memcpy(&priv->tx_coal, &priv->tqp_vector[0].tx_group.coal,
-	       sizeof(struct hns3_enet_coalesce));
-	memcpy(&priv->rx_coal, &priv->tqp_vector[0].rx_group.coal,
-	       sizeof(struct hns3_enet_coalesce));
-}
-
-static void hns3_restore_coal(struct hns3_nic_priv *priv)
-{
-	u16 vector_num = priv->vector_num;
-	int i;
-
-	for (i = 0; i < vector_num; i++) {
-		memcpy(&priv->tqp_vector[i].tx_group.coal, &priv->tx_coal,
-		       sizeof(struct hns3_enet_coalesce));
-		memcpy(&priv->tqp_vector[i].rx_group.coal, &priv->rx_coal,
-		       sizeof(struct hns3_enet_coalesce));
-	}
-}
-
 static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 {
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
@@ -4654,8 +4654,6 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 	if (ret)
 		goto err_put_ring;
 
-	hns3_restore_coal(priv);
-
 	ret = hns3_nic_init_vector_data(priv);
 	if (ret)
 		goto err_dealloc_vector;
@@ -4721,8 +4719,6 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 
 	hns3_nic_uninit_vector_data(priv);
 
-	hns3_store_coal(priv);
-
 	hns3_nic_dealloc_vector_data(priv);
 
 	hns3_uninit_all_ring(priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index b48faf769b1c9..c1ea403d2b567 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1134,50 +1134,32 @@ static void hns3_get_channels(struct net_device *netdev,
 		h->ae_algo->ops->get_channels(h, ch);
 }
 
-static int hns3_get_coalesce_per_queue(struct net_device *netdev, u32 queue,
-				       struct ethtool_coalesce *cmd)
+static int hns3_get_coalesce(struct net_device *netdev,
+			     struct ethtool_coalesce *cmd)
 {
-	struct hns3_enet_tqp_vector *tx_vector, *rx_vector;
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
+	struct hns3_enet_coalesce *rx_coal = &priv->rx_coal;
 	struct hnae3_handle *h = priv->ae_handle;
-	u16 queue_num = h->kinfo.num_tqps;
 
 	if (hns3_nic_resetting(netdev))
 		return -EBUSY;
 
-	if (queue >= queue_num) {
-		netdev_err(netdev,
-			   "Invalid queue value %u! Queue max id=%u\n",
-			   queue, queue_num - 1);
-		return -EINVAL;
-	}
-
-	tx_vector = priv->ring[queue].tqp_vector;
-	rx_vector = priv->ring[queue_num + queue].tqp_vector;
+	cmd->use_adaptive_tx_coalesce = tx_coal->adapt_enable;
+	cmd->use_adaptive_rx_coalesce = rx_coal->adapt_enable;
 
-	cmd->use_adaptive_tx_coalesce =
-			tx_vector->tx_group.coal.adapt_enable;
-	cmd->use_adaptive_rx_coalesce =
-			rx_vector->rx_group.coal.adapt_enable;
-
-	cmd->tx_coalesce_usecs = tx_vector->tx_group.coal.int_gl;
-	cmd->rx_coalesce_usecs = rx_vector->rx_group.coal.int_gl;
+	cmd->tx_coalesce_usecs = tx_coal->int_gl;
+	cmd->rx_coalesce_usecs = rx_coal->int_gl;
 
 	cmd->tx_coalesce_usecs_high = h->kinfo.int_rl_setting;
 	cmd->rx_coalesce_usecs_high = h->kinfo.int_rl_setting;
 
-	cmd->tx_max_coalesced_frames = tx_vector->tx_group.coal.int_ql;
-	cmd->rx_max_coalesced_frames = rx_vector->rx_group.coal.int_ql;
+	cmd->tx_max_coalesced_frames = tx_coal->int_ql;
+	cmd->rx_max_coalesced_frames = rx_coal->int_ql;
 
 	return 0;
 }
 
-static int hns3_get_coalesce(struct net_device *netdev,
-			     struct ethtool_coalesce *cmd)
-{
-	return hns3_get_coalesce_per_queue(netdev, 0, cmd);
-}
-
 static int hns3_check_gl_coalesce_para(struct net_device *netdev,
 				       struct ethtool_coalesce *cmd)
 {
@@ -1292,19 +1274,7 @@ static int hns3_check_coalesce_para(struct net_device *netdev,
 		return ret;
 	}
 
-	ret = hns3_check_ql_coalesce_param(netdev, cmd);
-	if (ret)
-		return ret;
-
-	if (cmd->use_adaptive_tx_coalesce == 1 ||
-	    cmd->use_adaptive_rx_coalesce == 1) {
-		netdev_info(netdev,
-			    "adaptive-tx=%u and adaptive-rx=%u, tx_usecs or rx_usecs will changed dynamically.\n",
-			    cmd->use_adaptive_tx_coalesce,
-			    cmd->use_adaptive_rx_coalesce);
-	}
-
-	return 0;
+	return hns3_check_ql_coalesce_param(netdev, cmd);
 }
 
 static void hns3_set_coalesce_per_queue(struct net_device *netdev,
@@ -1350,6 +1320,9 @@ static int hns3_set_coalesce(struct net_device *netdev,
 			     struct ethtool_coalesce *cmd)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
+	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	struct hns3_enet_coalesce *tx_coal = &priv->tx_coal;
+	struct hns3_enet_coalesce *rx_coal = &priv->rx_coal;
 	u16 queue_num = h->kinfo.num_tqps;
 	int ret;
 	int i;
@@ -1364,6 +1337,15 @@ static int hns3_set_coalesce(struct net_device *netdev,
 	h->kinfo.int_rl_setting =
 		hns3_rl_round_down(cmd->rx_coalesce_usecs_high);
 
+	tx_coal->adapt_enable = cmd->use_adaptive_tx_coalesce;
+	rx_coal->adapt_enable = cmd->use_adaptive_rx_coalesce;
+
+	tx_coal->int_gl = cmd->tx_coalesce_usecs;
+	rx_coal->int_gl = cmd->rx_coalesce_usecs;
+
+	tx_coal->int_ql = cmd->tx_max_coalesced_frames;
+	rx_coal->int_ql = cmd->rx_max_coalesced_frames;
+
 	for (i = 0; i < queue_num; i++)
 		hns3_set_coalesce_per_queue(netdev, cmd, i);
 
-- 
GitLab


From 9bb5a495424fd4bfa672eb1f31481248562fa156 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Tue, 18 May 2021 19:36:03 +0800
Subject: [PATCH 0951/3804] net: hns3: check the return of skb_checksum_help()

Currently skb_checksum_help()'s return is ignored, but it may
return error when it fails to allocate memory when linearizing.

So adds checking for the return of skb_checksum_help().

Fixes: 76ad4f0ee747("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC")
Fixes: 3db084d28dc0("net: hns3: Fix for vxlan tx checksum bug")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 6d6c0ac65bb49..026558f8e04b9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -841,8 +841,6 @@ static bool hns3_tunnel_csum_bug(struct sk_buff *skb)
 	      l4.udp->dest == htons(4790))))
 		return false;
 
-	skb_checksum_help(skb);
-
 	return true;
 }
 
@@ -919,8 +917,7 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 			/* the stack computes the IP header already,
 			 * driver calculate l4 checksum when not TSO.
 			 */
-			skb_checksum_help(skb);
-			return 0;
+			return skb_checksum_help(skb);
 		}
 
 		hns3_set_outer_l2l3l4(skb, ol4_proto, ol_type_vlan_len_msec);
@@ -965,7 +962,7 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 		break;
 	case IPPROTO_UDP:
 		if (hns3_tunnel_csum_bug(skb))
-			break;
+			return skb_checksum_help(skb);
 
 		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4CS_B, 1);
 		hns3_set_field(*type_cs_vlan_tso, HNS3_TXD_L4T_S,
@@ -990,8 +987,7 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto,
 		/* the stack computes the IP header already,
 		 * driver calculate l4 checksum when not TSO.
 		 */
-		skb_checksum_help(skb);
-		return 0;
+		return skb_checksum_help(skb);
 	}
 
 	return 0;
-- 
GitLab


From add0b32ef9146a8559a60aed54c37692a5f9d34f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 30 Apr 2021 17:06:01 -0500
Subject: [PATCH 0952/3804] siginfo: Move si_trapno inside the union inside
 _si_fault

It turns out that linux uses si_trapno very sparingly, and as such it
can be considered extra information for a very narrow selection of
signals, rather than information that is present with every fault
reported in siginfo.

As such move si_trapno inside the union inside of _si_fault.  This
results in no change in placement, and makes it eaiser
to extend _si_fault in the future as this reduces the number of
special cases.  In particular with si_trapno included in the union it
is no longer a concern that the union must be pointer aligned on most
architectures because the union follows immediately after si_addr
which is a pointer.

This change results in a difference in siginfo field placement on
sparc and alpha for the fields si_addr_lsb, si_lower, si_upper,
si_pkey, and si_perf.  These architectures do not implement the
signals that would use si_addr_lsb, si_lower, si_upper, si_pkey, and
si_perf.  Further these architecture have not yet implemented the
userspace that would use si_perf.

The point of this change is in fact to correct these placement issues
before sparc or alpha grow userspace that cares.  This change was
discussed[1] and the agreement is that this change is currently safe.

[1]: https://lkml.kernel.org/r/CAK8P3a0+uKYwL1NhY6Hvtieghba2hKYGD6hcKx5n8=4Gtt+pHA@mail.gmail.com
Acked-by: Marco Elver <elver@google.com>
v1: https://lkml.kernel.org/r/m1tunns7yf.fsf_-_@fess.ebiederm.org
v2: https://lkml.kernel.org/r/20210505141101.11519-5-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-1-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/x86/kernel/signal_compat.c    | 3 +++
 include/linux/compat.h             | 5 ++---
 include/uapi/asm-generic/siginfo.h | 7 ++-----
 kernel/signal.c                    | 1 +
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index 0e5d0a7e203b3..a9fcabd8a5e50 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -127,6 +127,9 @@ static inline void signal_compat_build_tests(void)
 	BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10);
 	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C);
 
+	BUILD_BUG_ON(offsetof(siginfo_t, si_trapno) != 0x18);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_trapno) != 0x10);
+
 	BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18);
 	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10);
 
diff --git a/include/linux/compat.h b/include/linux/compat.h
index f0d2dd35d408b..6af7bef15e949 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -214,12 +214,11 @@ typedef struct compat_siginfo {
 		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */
 		struct {
 			compat_uptr_t _addr;	/* faulting insn/memory ref. */
-#ifdef __ARCH_SI_TRAPNO
-			int _trapno;	/* TRAP # which caused the signal */
-#endif
 #define __COMPAT_ADDR_BND_PKEY_PAD  (__alignof__(compat_uptr_t) < sizeof(short) ? \
 				     sizeof(short) : __alignof__(compat_uptr_t))
 			union {
+				/* used on alpha and sparc */
+				int _trapno;	/* TRAP # which caused the signal */
 				/*
 				 * used when si_code=BUS_MCEERR_AR or
 				 * used when si_code=BUS_MCEERR_AO
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index 03d6f6d2c1fe8..e663bf117b461 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -63,9 +63,6 @@ union __sifields {
 	/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */
 	struct {
 		void __user *_addr; /* faulting insn/memory ref. */
-#ifdef __ARCH_SI_TRAPNO
-		int _trapno;	/* TRAP # which caused the signal */
-#endif
 #ifdef __ia64__
 		int _imm;		/* immediate value for "break" */
 		unsigned int _flags;	/* see ia64 si_flags */
@@ -75,6 +72,8 @@ union __sifields {
 #define __ADDR_BND_PKEY_PAD  (__alignof__(void *) < sizeof(short) ? \
 			      sizeof(short) : __alignof__(void *))
 		union {
+			/* used on alpha and sparc */
+			int _trapno;	/* TRAP # which caused the signal */
 			/*
 			 * used when si_code=BUS_MCEERR_AR or
 			 * used when si_code=BUS_MCEERR_AO
@@ -150,9 +149,7 @@ typedef struct siginfo {
 #define si_int		_sifields._rt._sigval.sival_int
 #define si_ptr		_sifields._rt._sigval.sival_ptr
 #define si_addr		_sifields._sigfault._addr
-#ifdef __ARCH_SI_TRAPNO
 #define si_trapno	_sifields._sigfault._trapno
-#endif
 #define si_addr_lsb	_sifields._sigfault._addr_lsb
 #define si_lower	_sifields._sigfault._addr_bnd._lower
 #define si_upper	_sifields._sigfault._addr_bnd._upper
diff --git a/kernel/signal.c b/kernel/signal.c
index c3017aa8024a0..65888aec65a0f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -4607,6 +4607,7 @@ static inline void siginfo_buildtime_checks(void)
 
 	/* sigfault */
 	CHECK_OFFSET(si_addr);
+	CHECK_OFFSET(si_trapno);
 	CHECK_OFFSET(si_addr_lsb);
 	CHECK_OFFSET(si_lower);
 	CHECK_OFFSET(si_upper);
-- 
GitLab


From 4a24efa16e7db02306fb5db84518bb0a7ada5a46 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Sun, 14 Mar 2021 23:34:27 +0300
Subject: [PATCH 0953/3804] sata_highbank: fix deferred probing

The driver overrides the error codes returned by platform_get_irq() to
-EINVAL, so if it returns -EPROBE_DEFER, the driver would fail the probe
permanently instead of the deferred probing. Switch to propagating the
error code upstream, still checking/overriding IRQ0 as libata regards it
as "no IRQ" (thus polling) anyway...

Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq")
Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Link: https://lore.kernel.org/r/105b456d-1199-f6e9-ceb7-ffc5ba551d1a@omprussia.ru
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/sata_highbank.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c
index 64b2ef15ec191..8440203e835ed 100644
--- a/drivers/ata/sata_highbank.c
+++ b/drivers/ata/sata_highbank.c
@@ -469,10 +469,12 @@ static int ahci_highbank_probe(struct platform_device *pdev)
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0) {
+	if (irq < 0) {
 		dev_err(dev, "no irq\n");
-		return -EINVAL;
+		return irq;
 	}
+	if (!irq)
+		return -EINVAL;
 
 	hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
 	if (!hpriv) {
-- 
GitLab


From 2d3a62fbae8e5badc2342388f65ab2191c209cc0 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Mon, 15 Mar 2021 14:46:53 +0300
Subject: [PATCH 0954/3804] pata_rb532_cf: fix deferred probing

The driver overrides the error codes returned by platform_get_irq() to
-ENOENT, so if it returns -EPROBE_DEFER, the driver would fail the probe
permanently instead of the deferred probing. Switch to propagating the
error code upstream, still checking/overriding IRQ0 as libata regards it
as "no IRQ" (thus polling) anyway...

Fixes: 9ec36cafe43b ("of/irq: do irq resolution in platform_get_irq")
Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Link: https://lore.kernel.org/r/771ced55-3efb-21f5-f21c-b99920aae611@omprussia.ru
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_rb532_cf.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index 479c4b29b8562..303f8c375b3af 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c
@@ -115,10 +115,12 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0) {
+	if (irq < 0) {
 		dev_err(&pdev->dev, "no IRQ resource found\n");
-		return -ENOENT;
+		return irq;
 	}
+	if (!irq)
+		return -EINVAL;
 
 	gpiod = devm_gpiod_get(&pdev->dev, NULL, GPIOD_IN);
 	if (IS_ERR(gpiod)) {
-- 
GitLab


From 9abcabe3111811aeae0f3a14e159b14248631875 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 30 Apr 2021 17:29:36 -0500
Subject: [PATCH 0955/3804] signal: Implement SIL_FAULT_TRAPNO

Now that si_trapno is part of the union in _si_fault and available on
all architectures, add SIL_FAULT_TRAPNO and update siginfo_layout to
return SIL_FAULT_TRAPNO when the code assumes si_trapno is valid.

There is room for future changes to reduce when si_trapno is valid but
this is all that is needed to make si_trapno and the other members of
the the union in _sigfault mutually exclusive.

Update the code that uses siginfo_layout to deal with SIL_FAULT_TRAPNO
and have the same code ignore si_trapno in in all other cases.

v1: https://lkml.kernel.org/r/m1o8dvs7s7.fsf_-_@fess.ebiederm.org
v2: https://lkml.kernel.org/r/20210505141101.11519-6-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-2-ebiederm@xmission.com
Reviewed-by: Marco Elver <elver@google.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/signalfd.c          |  8 +++-----
 include/linux/signal.h |  1 +
 kernel/signal.c        | 34 ++++++++++++----------------------
 3 files changed, 16 insertions(+), 27 deletions(-)

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 040a1142915fc..e87e59581653c 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -123,15 +123,13 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		 */
 	case SIL_FAULT:
 		new.ssi_addr = (long) kinfo->si_addr;
-#ifdef __ARCH_SI_TRAPNO
+		break;
+	case SIL_FAULT_TRAPNO:
+		new.ssi_addr = (long) kinfo->si_addr;
 		new.ssi_trapno = kinfo->si_trapno;
-#endif
 		break;
 	case SIL_FAULT_MCEERR:
 		new.ssi_addr = (long) kinfo->si_addr;
-#ifdef __ARCH_SI_TRAPNO
-		new.ssi_trapno = kinfo->si_trapno;
-#endif
 		new.ssi_addr_lsb = (short) kinfo->si_addr_lsb;
 		break;
 	case SIL_PERF_EVENT:
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 1e98548d7cf68..5160fd45e5cab 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -40,6 +40,7 @@ enum siginfo_layout {
 	SIL_TIMER,
 	SIL_POLL,
 	SIL_FAULT,
+	SIL_FAULT_TRAPNO,
 	SIL_FAULT_MCEERR,
 	SIL_FAULT_BNDERR,
 	SIL_FAULT_PKUERR,
diff --git a/kernel/signal.c b/kernel/signal.c
index 65888aec65a0f..597594ee72de1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1194,6 +1194,7 @@ static inline bool has_si_pid_and_uid(struct kernel_siginfo *info)
 	case SIL_TIMER:
 	case SIL_POLL:
 	case SIL_FAULT:
+	case SIL_FAULT_TRAPNO:
 	case SIL_FAULT_MCEERR:
 	case SIL_FAULT_BNDERR:
 	case SIL_FAULT_PKUERR:
@@ -2527,6 +2528,7 @@ static void hide_si_addr_tag_bits(struct ksignal *ksig)
 {
 	switch (siginfo_layout(ksig->sig, ksig->info.si_code)) {
 	case SIL_FAULT:
+	case SIL_FAULT_TRAPNO:
 	case SIL_FAULT_MCEERR:
 	case SIL_FAULT_BNDERR:
 	case SIL_FAULT_PKUERR:
@@ -3214,6 +3216,10 @@ enum siginfo_layout siginfo_layout(unsigned sig, int si_code)
 #endif
 			else if ((sig == SIGTRAP) && (si_code == TRAP_PERF))
 				layout = SIL_PERF_EVENT;
+#ifdef __ARCH_SI_TRAPNO
+			else if (layout == SIL_FAULT)
+				layout = SIL_FAULT_TRAPNO;
+#endif
 		}
 		else if (si_code <= NSIGPOLL)
 			layout = SIL_POLL;
@@ -3317,30 +3323,22 @@ void copy_siginfo_to_external32(struct compat_siginfo *to,
 		break;
 	case SIL_FAULT:
 		to->si_addr = ptr_to_compat(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
+		break;
+	case SIL_FAULT_TRAPNO:
+		to->si_addr = ptr_to_compat(from->si_addr);
 		to->si_trapno = from->si_trapno;
-#endif
 		break;
 	case SIL_FAULT_MCEERR:
 		to->si_addr = ptr_to_compat(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_addr_lsb = from->si_addr_lsb;
 		break;
 	case SIL_FAULT_BNDERR:
 		to->si_addr = ptr_to_compat(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_lower = ptr_to_compat(from->si_lower);
 		to->si_upper = ptr_to_compat(from->si_upper);
 		break;
 	case SIL_FAULT_PKUERR:
 		to->si_addr = ptr_to_compat(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_pkey = from->si_pkey;
 		break;
 	case SIL_PERF_EVENT:
@@ -3401,30 +3399,22 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
 		break;
 	case SIL_FAULT:
 		to->si_addr = compat_ptr(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
+		break;
+	case SIL_FAULT_TRAPNO:
+		to->si_addr = compat_ptr(from->si_addr);
 		to->si_trapno = from->si_trapno;
-#endif
 		break;
 	case SIL_FAULT_MCEERR:
 		to->si_addr = compat_ptr(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_addr_lsb = from->si_addr_lsb;
 		break;
 	case SIL_FAULT_BNDERR:
 		to->si_addr = compat_ptr(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_lower = compat_ptr(from->si_lower);
 		to->si_upper = compat_ptr(from->si_upper);
 		break;
 	case SIL_FAULT_PKUERR:
 		to->si_addr = compat_ptr(from->si_addr);
-#ifdef __ARCH_SI_TRAPNO
-		to->si_trapno = from->si_trapno;
-#endif
 		to->si_pkey = from->si_pkey;
 		break;
 	case SIL_PERF_EVENT:
-- 
GitLab


From af5eeab7e8e8c2f0fad10e4ab8cc8092012a2d5b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 2 May 2021 14:27:24 -0500
Subject: [PATCH 0956/3804] signal: Factor force_sig_perf out of perf_sigtrap

Separate filling in siginfo for TRAP_PERF from deciding that
siginal needs to be sent.

There are enough little details that need to be correct when
properly filling in siginfo_t that it is easy to make mistakes
if filling in the siginfo_t is in the same function with other
logic.  So factor out force_sig_perf to reduce the cognative
load of on reviewers, maintainers and implementors.

v1: https://lkml.kernel.org/r/m17dkjqqxz.fsf_-_@fess.ebiederm.org
v2: https://lkml.kernel.org/r/20210505141101.11519-10-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-3-ebiederm@xmission.com
Reviewed-by: Marco Elver <elver@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  1 +
 kernel/events/core.c         | 11 ++---------
 kernel/signal.c              | 13 +++++++++++++
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 3f6a0fcaa10cf..7f4278fa21fef 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -326,6 +326,7 @@ int send_sig_mceerr(int code, void __user *, short, struct task_struct *);
 
 int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper);
 int force_sig_pkuerr(void __user *addr, u32 pkey);
+int force_sig_perf(void __user *addr, u32 type, u64 sig_data);
 
 int force_sig_ptrace_errno_trap(int errno, void __user *addr);
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 928b166d888e3..48ea8863183bf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6394,8 +6394,6 @@ void perf_event_wakeup(struct perf_event *event)
 
 static void perf_sigtrap(struct perf_event *event)
 {
-	struct kernel_siginfo info;
-
 	/*
 	 * We'd expect this to only occur if the irq_work is delayed and either
 	 * ctx->task or current has changed in the meantime. This can be the
@@ -6410,13 +6408,8 @@ static void perf_sigtrap(struct perf_event *event)
 	if (current->flags & PF_EXITING)
 		return;
 
-	clear_siginfo(&info);
-	info.si_signo = SIGTRAP;
-	info.si_code = TRAP_PERF;
-	info.si_errno = event->attr.type;
-	info.si_perf = event->attr.sig_data;
-	info.si_addr = (void __user *)event->pending_addr;
-	force_sig_info(&info);
+	force_sig_perf((void __user *)event->pending_addr,
+		       event->attr.type, event->attr.sig_data);
 }
 
 static void perf_pending_event_disable(struct perf_event *event)
diff --git a/kernel/signal.c b/kernel/signal.c
index 597594ee72de1..3a18d13c39b2e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1763,6 +1763,19 @@ int force_sig_pkuerr(void __user *addr, u32 pkey)
 }
 #endif
 
+int force_sig_perf(void __user *addr, u32 type, u64 sig_data)
+{
+	struct kernel_siginfo info;
+
+	clear_siginfo(&info);
+	info.si_signo = SIGTRAP;
+	info.si_errno = type;
+	info.si_code  = TRAP_PERF;
+	info.si_addr  = addr;
+	info.si_perf  = sig_data;
+	return force_sig_info(&info);
+}
+
 /* For the crazy architectures that include trap information in
  * the errno field, instead of an actual errno value.
  */
-- 
GitLab


From 0683b53197b55343a166f1507086823030809a19 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 2 May 2021 17:28:31 -0500
Subject: [PATCH 0957/3804] signal: Deliver all of the siginfo perf data in
 _perf

Don't abuse si_errno and deliver all of the perf data in _perf member
of siginfo_t.

Note: The data field in the perf data structures in a u64 to allow a
pointer to be encoded without needed to implement a 32bit and 64bit
version of the same structure.  There already exists a 32bit and 64bit
versions siginfo_t, and the 32bit version can not include a 64bit
member as it only has 32bit alignment.  So unsigned long is used in
siginfo_t instead of a u64 as unsigned long can encode a pointer on
all architectures linux supports.

v1: https://lkml.kernel.org/r/m11rarqqx2.fsf_-_@fess.ebiederm.org
v2: https://lkml.kernel.org/r/20210503203814.25487-10-ebiederm@xmission.com
v3: https://lkml.kernel.org/r/20210505141101.11519-11-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-4-ebiederm@xmission.com
Reviewed-by: Marco Elver <elver@google.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/m68k/kernel/signal.c                     |  3 ++-
 arch/x86/kernel/signal_compat.c               |  6 ++++--
 fs/signalfd.c                                 |  3 ++-
 include/linux/compat.h                        |  5 ++++-
 include/uapi/asm-generic/siginfo.h            |  8 +++++--
 include/uapi/linux/perf_event.h               |  2 +-
 include/uapi/linux/signalfd.h                 |  4 ++--
 kernel/signal.c                               | 21 ++++++++++++-------
 .../selftests/perf_events/sigtrap_threads.c   | 14 ++++++-------
 9 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index a4b7ee1df2119..8f215e79e70e6 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -623,7 +623,8 @@ static inline void siginfo_build_tests(void)
 	BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12);
 
 	/* _sigfault._perf */
-	BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14);
 
 	/* _sigpoll */
 	BUILD_BUG_ON(offsetof(siginfo_t, si_band)   != 0x0c);
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index a9fcabd8a5e50..06743ec054d2a 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -141,8 +141,10 @@ static inline void signal_compat_build_tests(void)
 	BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20);
 	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14);
 
-	BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x18);
-	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf) != 0x10);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18);
+	BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10);
+	BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14);
 
 	CHECK_CSI_OFFSET(_sigpoll);
 	CHECK_CSI_SIZE  (_sigpoll, 2*sizeof(int));
diff --git a/fs/signalfd.c b/fs/signalfd.c
index e87e59581653c..373df2f12415c 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -134,7 +134,8 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		break;
 	case SIL_PERF_EVENT:
 		new.ssi_addr = (long) kinfo->si_addr;
-		new.ssi_perf = kinfo->si_perf;
+		new.ssi_perf_type = kinfo->si_perf_type;
+		new.ssi_perf_data = kinfo->si_perf_data;
 		break;
 	case SIL_CHLD:
 		new.ssi_pid    = kinfo->si_pid;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 6af7bef15e949..a27fffaae121a 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -236,7 +236,10 @@ typedef struct compat_siginfo {
 					u32 _pkey;
 				} _addr_pkey;
 				/* used when si_code=TRAP_PERF */
-				compat_ulong_t _perf;
+				struct {
+					compat_ulong_t _data;
+					u32 _type;
+				} _perf;
 			};
 		} _sigfault;
 
diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h
index e663bf117b461..5a3c221f4c9d3 100644
--- a/include/uapi/asm-generic/siginfo.h
+++ b/include/uapi/asm-generic/siginfo.h
@@ -91,7 +91,10 @@ union __sifields {
 				__u32 _pkey;
 			} _addr_pkey;
 			/* used when si_code=TRAP_PERF */
-			unsigned long _perf;
+			struct {
+				unsigned long _data;
+				__u32 _type;
+			} _perf;
 		};
 	} _sigfault;
 
@@ -154,7 +157,8 @@ typedef struct siginfo {
 #define si_lower	_sifields._sigfault._addr_bnd._lower
 #define si_upper	_sifields._sigfault._addr_bnd._upper
 #define si_pkey		_sifields._sigfault._addr_pkey._pkey
-#define si_perf		_sifields._sigfault._perf
+#define si_perf_data	_sifields._sigfault._perf._data
+#define si_perf_type	_sifields._sigfault._perf._type
 #define si_band		_sifields._sigpoll._band
 #define si_fd		_sifields._sigpoll._fd
 #define si_call_addr	_sifields._sigsys._call_addr
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index e54e639248c86..7b14753b3d38a 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -464,7 +464,7 @@ struct perf_event_attr {
 
 	/*
 	 * User provided data if sigtrap=1, passed back to user via
-	 * siginfo_t::si_perf, e.g. to permit user to identify the event.
+	 * siginfo_t::si_perf_data, e.g. to permit user to identify the event.
 	 */
 	__u64	sig_data;
 };
diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h
index 7e333042c7e3a..e78dddf433fcd 100644
--- a/include/uapi/linux/signalfd.h
+++ b/include/uapi/linux/signalfd.h
@@ -39,8 +39,8 @@ struct signalfd_siginfo {
 	__s32 ssi_syscall;
 	__u64 ssi_call_addr;
 	__u32 ssi_arch;
-	__u32 __pad3;
-	__u64 ssi_perf;
+	__u32 ssi_perf_type;
+	__u64 ssi_perf_data;
 
 	/*
 	 * Pad strcture to 128 bytes. Remember to update the
diff --git a/kernel/signal.c b/kernel/signal.c
index 3a18d13c39b2e..dca53515ae3f7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1768,11 +1768,13 @@ int force_sig_perf(void __user *addr, u32 type, u64 sig_data)
 	struct kernel_siginfo info;
 
 	clear_siginfo(&info);
-	info.si_signo = SIGTRAP;
-	info.si_errno = type;
-	info.si_code  = TRAP_PERF;
-	info.si_addr  = addr;
-	info.si_perf  = sig_data;
+	info.si_signo     = SIGTRAP;
+	info.si_errno     = 0;
+	info.si_code      = TRAP_PERF;
+	info.si_addr      = addr;
+	info.si_perf_data = sig_data;
+	info.si_perf_type = type;
+
 	return force_sig_info(&info);
 }
 
@@ -3356,7 +3358,8 @@ void copy_siginfo_to_external32(struct compat_siginfo *to,
 		break;
 	case SIL_PERF_EVENT:
 		to->si_addr = ptr_to_compat(from->si_addr);
-		to->si_perf = from->si_perf;
+		to->si_perf_data = from->si_perf_data;
+		to->si_perf_type = from->si_perf_type;
 		break;
 	case SIL_CHLD:
 		to->si_pid = from->si_pid;
@@ -3432,7 +3435,8 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to,
 		break;
 	case SIL_PERF_EVENT:
 		to->si_addr = compat_ptr(from->si_addr);
-		to->si_perf = from->si_perf;
+		to->si_perf_data = from->si_perf_data;
+		to->si_perf_type = from->si_perf_type;
 		break;
 	case SIL_CHLD:
 		to->si_pid    = from->si_pid;
@@ -4615,7 +4619,8 @@ static inline void siginfo_buildtime_checks(void)
 	CHECK_OFFSET(si_lower);
 	CHECK_OFFSET(si_upper);
 	CHECK_OFFSET(si_pkey);
-	CHECK_OFFSET(si_perf);
+	CHECK_OFFSET(si_perf_data);
+	CHECK_OFFSET(si_perf_type);
 
 	/* sigpoll */
 	CHECK_OFFSET(si_band);
diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c
index 78ddf5e116254..8e83cf91513a6 100644
--- a/tools/testing/selftests/perf_events/sigtrap_threads.c
+++ b/tools/testing/selftests/perf_events/sigtrap_threads.c
@@ -43,7 +43,7 @@ static struct {
 	siginfo_t first_siginfo;	/* First observed siginfo_t. */
 } ctx;
 
-/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */
+/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */
 #define TEST_SIG_DATA(addr) (~(unsigned long)(addr))
 
 static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
@@ -164,8 +164,8 @@ TEST_F(sigtrap_threads, enable_event)
 	EXPECT_EQ(ctx.signal_count, NUM_THREADS);
 	EXPECT_EQ(ctx.tids_want_signal, 0);
 	EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
-	EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
-	EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+	EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+	EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
 
 	/* Check enabled for parent. */
 	ctx.iterate_on = 0;
@@ -183,8 +183,8 @@ TEST_F(sigtrap_threads, modify_and_enable_event)
 	EXPECT_EQ(ctx.signal_count, NUM_THREADS);
 	EXPECT_EQ(ctx.tids_want_signal, 0);
 	EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
-	EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
-	EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+	EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+	EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
 
 	/* Check enabled for parent. */
 	ctx.iterate_on = 0;
@@ -203,8 +203,8 @@ TEST_F(sigtrap_threads, signal_stress)
 	EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on);
 	EXPECT_EQ(ctx.tids_want_signal, 0);
 	EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
-	EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
-	EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+	EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+	EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on));
 }
 
 TEST_HARNESS_MAIN
-- 
GitLab


From 922e3013046b79b444c87eda5baf43afae1326a8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 3 May 2021 12:52:43 -0500
Subject: [PATCH 0958/3804] signalfd: Remove SIL_PERF_EVENT fields from
 signalfd_siginfo

With the addition of ssi_perf_data and ssi_perf_type struct signalfd_siginfo
is dangerously close to running out of space.  All that remains is just
enough space for two additional 64bit fields.  A practice of adding all
possible siginfo_t fields into struct singalfd_siginfo can not be supported
as adding the missing fields ssi_lower, ssi_upper, and ssi_pkey would
require two 64bit fields and one 32bit fields.  In practice the fields
ssi_perf_data and ssi_perf_type can never be used by signalfd as the signal
that generates them always delivers them synchronously to the thread that
triggers them.

Therefore until someone actually needs the fields ssi_perf_data and
ssi_perf_type in signalfd_siginfo remove them.  This leaves a bit more room
for future expansion.

v1: https://lkml.kernel.org/r/20210503203814.25487-12-ebiederm@xmission.com
v2: https://lkml.kernel.org/r/20210505141101.11519-12-ebiederm@xmission.com
Link: https://lkml.kernel.org/r/20210517195748.8880-5-ebiederm@xmission.com
Reviewed-by: Marco Elver <elver@google.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/signalfd.c                 | 16 ++++++----------
 include/uapi/linux/signalfd.h |  4 +---
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/fs/signalfd.c b/fs/signalfd.c
index 373df2f12415c..167b5889db4bb 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -114,12 +114,13 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		break;
 	case SIL_FAULT_BNDERR:
 	case SIL_FAULT_PKUERR:
+	case SIL_PERF_EVENT:
 		/*
-		 * Fall through to the SIL_FAULT case.  Both SIL_FAULT_BNDERR
-		 * and SIL_FAULT_PKUERR are only generated by faults that
-		 * deliver them synchronously to userspace.  In case someone
-		 * injects one of these signals and signalfd catches it treat
-		 * it as SIL_FAULT.
+		 * Fall through to the SIL_FAULT case.  SIL_FAULT_BNDERR,
+		 * SIL_FAULT_PKUERR, and SIL_PERF_EVENT are only
+		 * generated by faults that deliver them synchronously to
+		 * userspace.  In case someone injects one of these signals
+		 * and signalfd catches it treat it as SIL_FAULT.
 		 */
 	case SIL_FAULT:
 		new.ssi_addr = (long) kinfo->si_addr;
@@ -132,11 +133,6 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 		new.ssi_addr = (long) kinfo->si_addr;
 		new.ssi_addr_lsb = (short) kinfo->si_addr_lsb;
 		break;
-	case SIL_PERF_EVENT:
-		new.ssi_addr = (long) kinfo->si_addr;
-		new.ssi_perf_type = kinfo->si_perf_type;
-		new.ssi_perf_data = kinfo->si_perf_data;
-		break;
 	case SIL_CHLD:
 		new.ssi_pid    = kinfo->si_pid;
 		new.ssi_uid    = kinfo->si_uid;
diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h
index e78dddf433fcd..83429a05b698e 100644
--- a/include/uapi/linux/signalfd.h
+++ b/include/uapi/linux/signalfd.h
@@ -39,8 +39,6 @@ struct signalfd_siginfo {
 	__s32 ssi_syscall;
 	__u64 ssi_call_addr;
 	__u32 ssi_arch;
-	__u32 ssi_perf_type;
-	__u64 ssi_perf_data;
 
 	/*
 	 * Pad strcture to 128 bytes. Remember to update the
@@ -51,7 +49,7 @@ struct signalfd_siginfo {
 	 * comes out of a read(2) and we really don't want to have
 	 * a compat on read(2).
 	 */
-	__u8 __pad[16];
+	__u8 __pad[28];
 };
 
 
-- 
GitLab


From 333944c7c3759c546035f1f9b0b4c72bdc5b7878 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Sun, 25 Apr 2021 17:56:24 +0530
Subject: [PATCH 0959/3804] pinctrl: aspeed: Fix minor documentation error

Kernel test robot throws below warning ->

drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c:2705: warning: This comment
starts with '/**', but isn't a kernel-doc comment. Refer
Documentation/doc-guide/kernel-doc.rst
drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c:2614: warning: This comment
starts with '/**', but isn't a kernel-doc comment. Refer
Documentation/doc-guide/kernel-doc.rst
drivers/pinctrl/aspeed/pinctrl-aspeed.c:111: warning: This comment
starts with '/**', but isn't a kernel-doc comment. Refer
Documentation/doc-guide/kernel-doc.rst
drivers/pinctrl/aspeed/pinmux-aspeed.c:24: warning: This comment starts
with '/**', but isn't a kernel-doc comment. Refer
Documentation/doc-guide/kernel-doc.rst

Fix minor documentation error.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Andrew Jeffery <andrew@aj.id.au>
Link: https://lore.kernel.org/r/1619353584-8196-1-git-send-email-jrdr.linux@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c | 4 ++--
 drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c | 4 ++--
 drivers/pinctrl/aspeed/pinctrl-aspeed.c    | 3 ++-
 drivers/pinctrl/aspeed/pinmux-aspeed.c     | 3 ++-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c
index 996ebcba4d386..4c0d26606b6cc 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c
@@ -2702,8 +2702,8 @@ static int aspeed_g5_sig_expr_eval(struct aspeed_pinmux_data *ctx,
 }
 
 /**
- * Configure a pin's signal by applying an expression's descriptor state for
- * all descriptors in the expression.
+ * aspeed_g5_sig_expr_set() - Configure a pin's signal by applying an
+ * expression's descriptor state for all descriptors in the expression.
  *
  * @ctx: The pinmux context
  * @expr: The expression associated with the function whose signal is to be
diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
index 5c1a109842a76..eeab093a78159 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
@@ -2611,8 +2611,8 @@ static struct aspeed_pin_config aspeed_g6_configs[] = {
 };
 
 /**
- * Configure a pin's signal by applying an expression's descriptor state for
- * all descriptors in the expression.
+ * aspeed_g6_sig_expr_set() - Configure a pin's signal by applying an
+ * expression's descriptor state for all descriptors in the expression.
  *
  * @ctx: The pinmux context
  * @expr: The expression associated with the function whose signal is to be
diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
index 9c65d560d48f7..9bbfe5c14b368 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c
@@ -108,7 +108,8 @@ static int aspeed_sig_expr_disable(struct aspeed_pinmux_data *ctx,
 }
 
 /**
- * Disable a signal on a pin by disabling all provided signal expressions.
+ * aspeed_disable_sig() - Disable a signal on a pin by disabling all provided
+ * signal expressions.
  *
  * @ctx: The pinmux context
  * @exprs: The list of signal expressions (from a priority level on a pin)
diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.c b/drivers/pinctrl/aspeed/pinmux-aspeed.c
index 57305ca838a7c..894e2efd3be76 100644
--- a/drivers/pinctrl/aspeed/pinmux-aspeed.c
+++ b/drivers/pinctrl/aspeed/pinmux-aspeed.c
@@ -21,7 +21,8 @@ static inline void aspeed_sig_desc_print_val(
 }
 
 /**
- * Query the enabled or disabled state of a signal descriptor
+ * aspeed_sig_desc_eval() - Query the enabled or disabled state of a signal
+ * descriptor.
  *
  * @desc: The signal descriptor of interest
  * @enabled: True to query the enabled state, false to query disabled state
-- 
GitLab


From 3410fbcd47dc6479af4309febf760ccaa5efb472 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Wed, 12 May 2021 13:52:27 +0300
Subject: [PATCH 0960/3804] {net, RDMA}/mlx5: Fix override of log_max_qp by
 other device

mlx5_core_dev holds pointer to static profile, hence when the
log_max_qp of the profile is override by some device, then it
effect all other mlx5 devices that share the same profile.
Fix it by having a profile instance for every mlx5 device.

Fixes: 883371c453b9 ("net/mlx5: Check FW limitations on log_max_qp before setting it")
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c               |  4 +-
 .../net/ethernet/mellanox/mlx5/core/main.c    | 11 +++--
 include/linux/mlx5/driver.h                   | 44 +++++++++----------
 3 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4388afeff2512..9662cd39c7ffc 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -743,10 +743,10 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 		ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
 			   MLX5_IB_UMR_OCTOWORD;
 		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
-		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+		if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
 		    !dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
 		    mlx5_ib_can_load_pas_with_umr(dev, 0))
-			ent->limit = dev->mdev->profile->mr_cache[i].limit;
+			ent->limit = dev->mdev->profile.mr_cache[i].limit;
 		else
 			ent->limit = 0;
 		spin_lock_irq(&ent->lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c114365eb126f..a1d67bd7fb43b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -503,7 +503,7 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
 
 static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 {
-	struct mlx5_profile *prof = dev->profile;
+	struct mlx5_profile *prof = &dev->profile;
 	void *set_hca_cap;
 	int err;
 
@@ -524,11 +524,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 		 to_fw_pkey_sz(dev, 128));
 
 	/* Check log_max_qp from HCA caps to set in current profile */
-	if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < profile[prof_sel].log_max_qp) {
+	if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) {
 		mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n",
-			       profile[prof_sel].log_max_qp,
+			       prof->log_max_qp,
 			       MLX5_CAP_GEN_MAX(dev, log_max_qp));
-		profile[prof_sel].log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
+		prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp);
 	}
 	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
 		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
@@ -1381,8 +1381,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
 	struct mlx5_priv *priv = &dev->priv;
 	int err;
 
-	dev->profile = &profile[profile_idx];
-
+	memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
 	INIT_LIST_HEAD(&priv->ctx_list);
 	spin_lock_init(&priv->ctx_lock);
 	mutex_init(&dev->intf_state_mutex);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f8e8d7e906160..020a8f7fdbdd4 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -703,6 +703,27 @@ struct mlx5_hv_vhca;
 #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
 #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
 
+enum {
+	MLX5_PROF_MASK_QP_SIZE		= (u64)1 << 0,
+	MLX5_PROF_MASK_MR_CACHE		= (u64)1 << 1,
+};
+
+enum {
+	MR_CACHE_LAST_STD_ENTRY = 20,
+	MLX5_IMR_MTT_CACHE_ENTRY,
+	MLX5_IMR_KSM_CACHE_ENTRY,
+	MAX_MR_CACHE_ENTRIES
+};
+
+struct mlx5_profile {
+	u64	mask;
+	u8	log_max_qp;
+	struct {
+		int	size;
+		int	limit;
+	} mr_cache[MAX_MR_CACHE_ENTRIES];
+};
+
 struct mlx5_core_dev {
 	struct device *device;
 	enum mlx5_coredev_type coredev_type;
@@ -731,7 +752,7 @@ struct mlx5_core_dev {
 	struct mutex		intf_state_mutex;
 	unsigned long		intf_state;
 	struct mlx5_priv	priv;
-	struct mlx5_profile	*profile;
+	struct mlx5_profile	profile;
 	u32			issi;
 	struct mlx5e_resources  mlx5e_res;
 	struct mlx5_dm          *dm;
@@ -1083,18 +1104,6 @@ static inline u8 mlx5_mkey_variant(u32 mkey)
 	return mkey & 0xff;
 }
 
-enum {
-	MLX5_PROF_MASK_QP_SIZE		= (u64)1 << 0,
-	MLX5_PROF_MASK_MR_CACHE		= (u64)1 << 1,
-};
-
-enum {
-	MR_CACHE_LAST_STD_ENTRY = 20,
-	MLX5_IMR_MTT_CACHE_ENTRY,
-	MLX5_IMR_KSM_CACHE_ENTRY,
-	MAX_MR_CACHE_ENTRIES
-};
-
 /* Async-atomic event notifier used by mlx5 core to forward FW
  * evetns recived from event queue to mlx5 consumers.
  * Optimise event queue dipatching.
@@ -1148,15 +1157,6 @@ int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev,
 			    struct ib_device *device,
 			    struct rdma_netdev_alloc_params *params);
 
-struct mlx5_profile {
-	u64	mask;
-	u8	log_max_qp;
-	struct {
-		int	size;
-		int	limit;
-	} mr_cache[MAX_MR_CACHE_ENTRIES];
-};
-
 enum {
 	MLX5_PCI_DEV_IS_VF		= 1 << 0,
 };
-- 
GitLab


From dca59f4a791960ec73fa15803faa0abe0f92ece2 Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Mon, 26 Apr 2021 15:16:26 +0300
Subject: [PATCH 0961/3804] net/mlx5e: Fix nullptr in add_vlan_push_action()

The result of dev_get_by_index_rcu() is not checked for NULL and then
gets dereferenced immediately.

Also, the RCU lock must be held by the caller of dev_get_by_index_rcu(),
which isn't satisfied by the call stack.

Fix by handling nullptr return value when iflink device is not found.
Add RCU locking around dev_get_by_index_rcu() to avoid possible adverse
effects while iterating over the net_device's hlist.

It is safe not to increment reference count of the net_device pointer in
case of a successful lookup, because it's already handled by VLAN code
during VLAN device registration (see register_vlan_dev and
netdev_upper_dev_link).

Fixes: 278748a95aa3 ("net/mlx5e: Offload TC e-switch rules with egress VLAN device")
Addresses-Coverity: ("Dereference null return value")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 47a9c49b25fd1..46945d04b5b89 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3526,8 +3526,12 @@ static int add_vlan_push_action(struct mlx5e_priv *priv,
 	if (err)
 		return err;
 
-	*out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
-					dev_get_iflink(vlan_dev));
+	rcu_read_lock();
+	*out_dev = dev_get_by_index_rcu(dev_net(vlan_dev), dev_get_iflink(vlan_dev));
+	rcu_read_unlock();
+	if (!*out_dev)
+		return -ENODEV;
+
 	if (is_vlan_dev(*out_dev))
 		err = add_vlan_push_action(priv, attr, out_dev, action);
 
-- 
GitLab


From 442b3d7b671bcb779ebdad46edd08051eb8b28d9 Mon Sep 17 00:00:00 2001
From: Jianbo Liu <jianbol@nvidia.com>
Date: Fri, 30 Apr 2021 06:58:29 +0000
Subject: [PATCH 0962/3804] net/mlx5: Set reformat action when needed for
 termination rules

For remote mirroring, after the tunnel packets are received, they are
decapsulated and sent to representor, then re-encapsulated and sent
out over another tunnel. So reformat action is set only when the
destination is required to do encapsulation.

Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink")
Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
Reviewed-by: Ariel Levkovich <lariel@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mlx5/core/eswitch_offloads_termtbl.c      | 31 ++++++-------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index a81ece94f5991..e3e7fdd396ad7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -172,19 +172,6 @@ mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw,
 	}
 }
 
-static bool mlx5_eswitch_termtbl_is_encap_reformat(struct mlx5_pkt_reformat *rt)
-{
-	switch (rt->reformat_type) {
-	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
-	case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
-	case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
-	case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static void
 mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
 				  struct mlx5_flow_act *dst)
@@ -202,14 +189,6 @@ mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
 			memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
 		}
 	}
-
-	if (src->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT &&
-	    mlx5_eswitch_termtbl_is_encap_reformat(src->pkt_reformat)) {
-		src->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
-		dst->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
-		dst->pkt_reformat = src->pkt_reformat;
-		src->pkt_reformat = NULL;
-	}
 }
 
 static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
@@ -279,6 +258,14 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
 		if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT)
 			continue;
 
+		if (attr->dests[num_vport_dests].flags & MLX5_ESW_DEST_ENCAP) {
+			term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+			term_tbl_act.pkt_reformat = attr->dests[num_vport_dests].pkt_reformat;
+		} else {
+			term_tbl_act.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+			term_tbl_act.pkt_reformat = NULL;
+		}
+
 		/* get the terminating table for the action list */
 		tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
 						     &dest[i], attr);
@@ -301,6 +288,8 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
 		goto revert_changes;
 
 	/* create the FTE */
+	flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+	flow_act->pkt_reformat = NULL;
 	rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
 	if (IS_ERR(rule))
 		goto revert_changes;
-- 
GitLab


From fca086617af864efd20289774901221b2df06b39 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Thu, 13 May 2021 15:00:53 +0300
Subject: [PATCH 0963/3804] net/mlx5: Fix err prints and return when creating
 termination table

Fix print to print correct error code and not using IS_ERR() which
will just result in always printing 1.
Also return real err instead of always -EOPNOTSUPP.

Fixes: 10caabdaad5a ("net/mlx5e: Use termination table for VLAN push actions")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mlx5/core/eswitch_offloads_termtbl.c      | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index e3e7fdd396ad7..d61bee2d35fec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -65,7 +65,7 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
 {
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_flow_namespace *root_ns;
-	int err;
+	int err, err2;
 
 	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
 	if (!root_ns) {
@@ -83,26 +83,26 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
 	ft_attr.autogroup.max_num_groups = 1;
 	tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
 	if (IS_ERR(tt->termtbl)) {
-		esw_warn(dev, "Failed to create termination table (error %d)\n",
-			 IS_ERR(tt->termtbl));
-		return -EOPNOTSUPP;
+		err = PTR_ERR(tt->termtbl);
+		esw_warn(dev, "Failed to create termination table, err %pe\n", tt->termtbl);
+		return err;
 	}
 
 	tt->rule = mlx5_add_flow_rules(tt->termtbl, NULL, flow_act,
 				       &tt->dest, 1);
 	if (IS_ERR(tt->rule)) {
-		esw_warn(dev, "Failed to create termination table rule (error %d)\n",
-			 IS_ERR(tt->rule));
+		err = PTR_ERR(tt->rule);
+		esw_warn(dev, "Failed to create termination table rule, err %pe\n", tt->rule);
 		goto add_flow_err;
 	}
 	return 0;
 
 add_flow_err:
-	err = mlx5_destroy_flow_table(tt->termtbl);
-	if (err)
-		esw_warn(dev, "Failed to destroy termination table\n");
+	err2 = mlx5_destroy_flow_table(tt->termtbl);
+	if (err2)
+		esw_warn(dev, "Failed to destroy termination table, err %d\n", err2);
 
-	return -EOPNOTSUPP;
+	return err;
 }
 
 static struct mlx5_termtbl_handle *
@@ -270,8 +270,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
 		tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act,
 						     &dest[i], attr);
 		if (IS_ERR(tt)) {
-			esw_warn(esw->dev, "Failed to get termination table (error %d)\n",
-				 IS_ERR(tt));
+			esw_warn(esw->dev, "Failed to get termination table, err %pe\n", tt);
 			goto revert_changes;
 		}
 		attr->dests[num_vport_dests].termtbl = tt;
-- 
GitLab


From 82041634d96e87b41c600a673f10150d9f21f742 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 7 May 2021 10:08:47 +0300
Subject: [PATCH 0964/3804] net/mlx5: SF, Fix show state inactive when its
 inactivated

When a SF is inactivated and when it is in a TEARDOWN_REQUEST
state, driver still returns its state as active. This is incorrect.
Fix it by treating TEARDOWN_REQEUST as inactive state. When a SF
is still attached to the driver, on user request to reactivate EINVAL
error is returned. Inform user about it with better code EBUSY and
informative error message.

Fixes: 6a3273217469 ("net/mlx5: SF, Port function state change support")
Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/sf/devlink.c   | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index a8e73c9ed1eae..1be0487693094 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -136,10 +136,10 @@ static enum devlink_port_fn_state mlx5_sf_to_devlink_state(u8 hw_state)
 	switch (hw_state) {
 	case MLX5_VHCA_STATE_ACTIVE:
 	case MLX5_VHCA_STATE_IN_USE:
-	case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
 		return DEVLINK_PORT_FN_STATE_ACTIVE;
 	case MLX5_VHCA_STATE_INVALID:
 	case MLX5_VHCA_STATE_ALLOCATED:
+	case MLX5_VHCA_STATE_TEARDOWN_REQUEST:
 	default:
 		return DEVLINK_PORT_FN_STATE_INACTIVE;
 	}
@@ -192,14 +192,17 @@ sf_err:
 	return err;
 }
 
-static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf)
+static int mlx5_sf_activate(struct mlx5_core_dev *dev, struct mlx5_sf *sf,
+			    struct netlink_ext_ack *extack)
 {
 	int err;
 
 	if (mlx5_sf_is_active(sf))
 		return 0;
-	if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED)
-		return -EINVAL;
+	if (sf->hw_state != MLX5_VHCA_STATE_ALLOCATED) {
+		NL_SET_ERR_MSG_MOD(extack, "SF is inactivated but it is still attached");
+		return -EBUSY;
+	}
 
 	err = mlx5_cmd_sf_enable_hca(dev, sf->hw_fn_id);
 	if (err)
@@ -226,7 +229,8 @@ static int mlx5_sf_deactivate(struct mlx5_core_dev *dev, struct mlx5_sf *sf)
 
 static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *table,
 			     struct mlx5_sf *sf,
-			     enum devlink_port_fn_state state)
+			     enum devlink_port_fn_state state,
+			     struct netlink_ext_ack *extack)
 {
 	int err = 0;
 
@@ -234,7 +238,7 @@ static int mlx5_sf_state_set(struct mlx5_core_dev *dev, struct mlx5_sf_table *ta
 	if (state == mlx5_sf_to_devlink_state(sf->hw_state))
 		goto out;
 	if (state == DEVLINK_PORT_FN_STATE_ACTIVE)
-		err = mlx5_sf_activate(dev, sf);
+		err = mlx5_sf_activate(dev, sf, extack);
 	else if (state == DEVLINK_PORT_FN_STATE_INACTIVE)
 		err = mlx5_sf_deactivate(dev, sf);
 	else
@@ -265,7 +269,7 @@ int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_po
 		goto out;
 	}
 
-	err = mlx5_sf_state_set(dev, table, sf, state);
+	err = mlx5_sf_state_set(dev, table, sf, state, extack);
 out:
 	mlx5_sf_table_put(table);
 	return err;
-- 
GitLab


From fe7738eb3ca3631a75844e790f6cb576c0fe7b00 Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Mon, 26 Apr 2021 15:16:26 +0300
Subject: [PATCH 0965/3804] net/mlx5e: Fix nullptr in mlx5e_tc_add_fdb_flow()

The result of __dev_get_by_index() is not checked for NULL, which then
passed to mlx5e_attach_encap() and gets dereferenced.

Also, in case of a successful lookup, the net_device reference count is
not incremented, which may result in net_device pointer becoming invalid
at any time during mlx5e_attach_encap() execution.

Fix by using dev_get_by_index(), which does proper reference counting on
the net_device pointer. Also, handle nullptr return value when mirred
device is not found.

It's safe to call dev_put() on the mirred net_device pointer, right
after mlx5e_attach_encap() call, because it's not being saved/copied
down the call chain.

Fixes: 3c37745ec614 ("net/mlx5e: Properly deal with encap flows add/del under neigh update")
Addresses-Coverity: ("Dereference null return value")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 46945d04b5b89..882bafba43f28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1322,10 +1322,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct netlink_ext_ack *extack)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct net_device *out_dev, *encap_dev = NULL;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 	struct mlx5_flow_attr *attr = flow->attr;
 	bool vf_tun = false, encap_valid = true;
+	struct net_device *encap_dev = NULL;
 	struct mlx5_esw_flow_attr *esw_attr;
 	struct mlx5_fc *counter = NULL;
 	struct mlx5e_rep_priv *rpriv;
@@ -1371,16 +1371,22 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	esw_attr = attr->esw_attr;
 
 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
+		struct net_device *out_dev;
 		int mirred_ifindex;
 
 		if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
 			continue;
 
 		mirred_ifindex = parse_attr->mirred_ifindex[out_index];
-		out_dev = __dev_get_by_index(dev_net(priv->netdev),
-					     mirred_ifindex);
+		out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
+		if (!out_dev) {
+			NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
+			err = -ENODEV;
+			goto err_out;
+		}
 		err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
 					 extack, &encap_dev, &encap_valid);
+		dev_put(out_dev);
 		if (err)
 			goto err_out;
 
-- 
GitLab


From 83026d83186bc48bb41ee4872f339b83f31dfc55 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Mon, 3 May 2021 18:01:02 +0300
Subject: [PATCH 0966/3804] net/mlx5e: Fix null deref accessing lag dev

It could be the lag dev is null so stop processing the event.
In bond_enslave() the active/backup slave being set before setting the
upper dev so first event is without an upper dev.
After setting the upper dev with bond_master_upper_dev_link() there is
a second event and in that event we have an upper dev.

Fixes: 7e51891a237f ("net/mlx5e: Use netdev events to set/del egress acl forward-to-vport rule")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
index 95f2b26a3ee31..9c076aa20306a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -223,6 +223,8 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt
 	rpriv = priv->ppriv;
 	fwd_vport_num = rpriv->rep->vport;
 	lag_dev = netdev_master_upper_dev_get(netdev);
+	if (!lag_dev)
+		return;
 
 	netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
 		   lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
-- 
GitLab


From eb96cc15926f4ddde3a28c42feeffdf002451c24 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Sun, 2 May 2021 10:25:50 +0300
Subject: [PATCH 0967/3804] net/mlx5e: Make sure fib dev exists in fib event

For unreachable route entry the fib dev does not exists.

Fixes: 8914add2c9e5 ("net/mlx5e: Handle FIB events to update tunnel endpoint device")
Reported-by: Dennis Afanasev <dennis.afanasev@stateless.net>
Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 593503bc4d078..f1fb11680d202 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -1505,7 +1505,7 @@ mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
 
 	fen_info = container_of(info, struct fib_entry_notifier_info, info);
 	fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
-	if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
+	if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
 	    fen_info->dst_len != 32)
 		return NULL;
 
-- 
GitLab


From 77ecd10d0a8aaa6e4871d8c63626e4c9fc5e47db Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Thu, 25 Feb 2021 11:20:00 -0800
Subject: [PATCH 0968/3804] net/mlx5e: reset XPS on error flow if netdev isn't
 registered yet

mlx5e_attach_netdev can be called prior to registering the netdevice:
Example stack:

ipoib_new_child_link ->
ipoib_intf_init->
rdma_init_netdev->
mlx5_rdma_setup_rn->

mlx5e_attach_netdev->
mlx5e_num_channels_changed ->
mlx5e_set_default_xps_cpumasks ->
netif_set_xps_queue ->
__netif_set_xps_queue -> kmalloc

If any later stage fails at any point after mlx5e_num_channels_changed()
returns, XPS allocated maps will never be freed as they
are only freed during netdev unregistration, which will never happen for
yet to be registered netdevs.

Fixes: 3909a12e7913 ("net/mlx5e: Fix configuration of XPS cpumasks and netdev queues in corner cases")
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bca832cdc4cbe..89937b0550706 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5229,6 +5229,11 @@ static void mlx5e_update_features(struct net_device *netdev)
 	rtnl_unlock();
 }
 
+static void mlx5e_reset_channels(struct net_device *netdev)
+{
+	netdev_reset_tc(netdev);
+}
+
 int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 {
 	const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
@@ -5283,6 +5288,7 @@ err_cleanup_tx:
 	profile->cleanup_tx(priv);
 
 out:
+	mlx5e_reset_channels(priv->netdev);
 	set_bit(MLX5E_STATE_DESTROYING, &priv->state);
 	cancel_work_sync(&priv->update_stats_work);
 	return err;
@@ -5300,6 +5306,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
 
 	profile->cleanup_rx(priv);
 	profile->cleanup_tx(priv);
+	mlx5e_reset_channels(priv->netdev);
 	cancel_work_sync(&priv->update_stats_work);
 }
 
-- 
GitLab


From 97817fcc684ed01497bd19d0cd4dea699665b9cf Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Tue, 13 Apr 2021 22:43:08 +0300
Subject: [PATCH 0969/3804] net/mlx5e: Fix multipath lag activation

When handling FIB_EVENT_ENTRY_REPLACE event for a new multipath route,
lag activation can be missed if a stale (struct lag_mp)->mfi pointer
exists, which was associated with an older multipath route that had been
removed.

Normally, when a route is removed, it triggers mlx5_lag_fib_event(),
which handles FIB_EVENT_ENTRY_DEL and clears mfi pointer. But, if
mlx5_lag_check_prereq() condition isn't met, for example when eswitch is
in legacy mode, the fib event is skipped and mfi pointer becomes stale.

Fix by resetting mfi pointer to NULL every time mlx5_lag_mp_init() is
called.

Fixes: 544fe7c2e654 ("net/mlx5e: Activate HW multipath and handle port affinity based on FIB events")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 2c41a69202642..fd6196b5e1630 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -307,6 +307,11 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev)
 	struct lag_mp *mp = &ldev->lag_mp;
 	int err;
 
+	/* always clear mfi, as it might become stale when a route delete event
+	 * has been missed
+	 */
+	mp->mfi = NULL;
+
 	if (mp->fib_nb.notifier_call)
 		return 0;
 
@@ -335,4 +340,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
 	unregister_fib_notifier(&init_net, &mp->fib_nb);
 	destroy_workqueue(mp->wq);
 	mp->fib_nb.notifier_call = NULL;
+	mp->mfi = NULL;
 }
-- 
GitLab


From 7d1a3d08c8a6398e7497a98cf3f7b73ea13d9939 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Tue, 20 Apr 2021 15:16:16 +0300
Subject: [PATCH 0970/3804] net/mlx5e: Reject mirroring on source port change
 encap rules

Rules with MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE dest flag are
translated to destination FT in eswitch. Currently it is not possible to
mirror such rules because firmware doesn't support mixing FT and Vport
destinations in single rule when one of them adds encapsulation. Since the
only use case for MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE destination is
support for tunnel endpoints on VF and trying to offload such rule with
mirror action causes either crash in fs_core or firmware error with
syndrome 0xff6a1d, reject all such rules in mlx5 TC layer.

Fixes: 10742efc20a4 ("net/mlx5e: VF tunnel TX traffic offloading")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 882bafba43f28..bccdb43a880b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1399,6 +1399,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		esw_attr->dests[out_index].mdev = out_priv->mdev;
 	}
 
+	if (vf_tun && esw_attr->out_count > 1) {
+		NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+
 	err = mlx5_eswitch_add_vlan_action(esw, attr);
 	if (err)
 		goto err_out;
-- 
GitLab


From 5e7923acbd86d0ff29269688d8a9c47ad091dd46 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 21 Apr 2021 14:26:31 +0300
Subject: [PATCH 0971/3804] net/mlx5e: Fix error path of updating netdev queues

Avoid division by zero in the error flow. In the driver TC number can be
either 1 or 8. When TC count is set to 1, driver zero netdev->num_tc.
Hence, need to convert it back from 0 to 1 in the error flow.

Fixes: fa3748775b92 ("net/mlx5e: Handle errors from netif_set_real_num_{tx,rx}_queues")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 89937b0550706..d1b9a4040d60d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2697,7 +2697,7 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 	int err;
 
 	old_num_txqs = netdev->real_num_tx_queues;
-	old_ntc = netdev->num_tc;
+	old_ntc = netdev->num_tc ? : 1;
 
 	nch = priv->channels.params.num_channels;
 	ntc = priv->channels.params.num_tc;
-- 
GitLab


From 7c9f131f366ab414691907fa0407124ea2b2f3bc Mon Sep 17 00:00:00 2001
From: Eli Cohen <elic@nvidia.com>
Date: Thu, 22 Apr 2021 15:48:10 +0300
Subject: [PATCH 0972/3804] {net,vdpa}/mlx5: Configure interface MAC into mpfs
 L2 table

net/mlx5: Expose MPFS configuration API

MPFS is the multi physical function switch that bridges traffic between
the physical port and any physical functions associated with it. The
driver is required to add or remove MAC entries to properly forward
incoming traffic to the correct physical function.

We export the API to control MPFS so that other drivers, such as
mlx5_vdpa are able to add MAC addresses of their network interfaces.

The MAC address of the vdpa interface must be configured into the MPFS L2
address. Failing to do so could cause, in some NIC configurations, failure
to forward packets to the vdpa network device instance.

Fix this by adding calls to update the MPFS table.

CC: <mst@redhat.com>
CC: <jasowang@redhat.com>
CC: <virtualization@lists.linux-foundation.org>
Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices")
Signed-off-by: Eli Cohen <elic@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_fs.c   |  1 +
 .../net/ethernet/mellanox/mlx5/core/eswitch.c |  1 +
 .../ethernet/mellanox/mlx5/core/lib/mpfs.c    |  3 +++
 .../ethernet/mellanox/mlx5/core/lib/mpfs.h    |  5 +----
 drivers/vdpa/mlx5/net/mlx5_vnet.c             | 19 ++++++++++++++++++-
 include/linux/mlx5/mpfs.h                     | 18 ++++++++++++++++++
 6 files changed, 42 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/mlx5/mpfs.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 0d571a0c76d90..0b75fab41ae8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -35,6 +35,7 @@
 #include <linux/ipv6.h>
 #include <linux/tcp.h>
 #include <linux/mlx5/fs.h>
+#include <linux/mlx5/mpfs.h>
 #include "en.h"
 #include "en_rep.h"
 #include "lib/mpfs.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 570f2280823c0..b88705a3a1a8e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -35,6 +35,7 @@
 #include <linux/mlx5/mlx5_ifc.h>
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
+#include <linux/mlx5/mpfs.h>
 #include "esw/acl/lgcy.h"
 #include "esw/legacy.h"
 #include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index fd8449ff9e176..839a01da110f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -33,6 +33,7 @@
 #include <linux/etherdevice.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/mpfs.h>
 #include <linux/mlx5/eswitch.h>
 #include "mlx5_core.h"
 #include "lib/mpfs.h"
@@ -175,6 +176,7 @@ out:
 	mutex_unlock(&mpfs->lock);
 	return err;
 }
+EXPORT_SYMBOL(mlx5_mpfs_add_mac);
 
 int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
 {
@@ -206,3 +208,4 @@ unlock:
 	mutex_unlock(&mpfs->lock);
 	return err;
 }
+EXPORT_SYMBOL(mlx5_mpfs_del_mac);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
index 4a7b2c3203a7e..4a293542a7aa1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h
@@ -84,12 +84,9 @@ struct l2addr_node {
 #ifdef CONFIG_MLX5_MPFS
 int  mlx5_mpfs_init(struct mlx5_core_dev *dev);
 void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev);
-int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac);
-int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac);
 #else /* #ifndef CONFIG_MLX5_MPFS */
 static inline int  mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {}
-static inline int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
-static inline int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
 #endif
+
 #endif
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 189e4385df403..dda5dc6f77378 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -15,6 +15,7 @@
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/mlx5_ifc_vdpa.h>
+#include <linux/mlx5/mpfs.h>
 #include "mlx5_vdpa.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
@@ -1859,11 +1860,16 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb
 static void mlx5_vdpa_free(struct vdpa_device *vdev)
 {
 	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+	struct mlx5_core_dev *pfmdev;
 	struct mlx5_vdpa_net *ndev;
 
 	ndev = to_mlx5_vdpa_ndev(mvdev);
 
 	free_resources(ndev);
+	if (!is_zero_ether_addr(ndev->config.mac)) {
+		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
+		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
+	}
 	mlx5_vdpa_free_resources(&ndev->mvdev);
 	mutex_destroy(&ndev->reslock);
 }
@@ -1990,6 +1996,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 {
 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
 	struct virtio_net_config *config;
+	struct mlx5_core_dev *pfmdev;
 	struct mlx5_vdpa_dev *mvdev;
 	struct mlx5_vdpa_net *ndev;
 	struct mlx5_core_dev *mdev;
@@ -2023,10 +2030,17 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
 	if (err)
 		goto err_mtu;
 
+	if (!is_zero_ether_addr(config->mac)) {
+		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
+		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
+		if (err)
+			goto err_mtu;
+	}
+
 	mvdev->vdev.dma_dev = mdev->device;
 	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
 	if (err)
-		goto err_mtu;
+		goto err_mpfs;
 
 	err = alloc_resources(ndev);
 	if (err)
@@ -2044,6 +2058,9 @@ err_reg:
 	free_resources(ndev);
 err_res:
 	mlx5_vdpa_free_resources(&ndev->mvdev);
+err_mpfs:
+	if (!is_zero_ether_addr(config->mac))
+		mlx5_mpfs_del_mac(pfmdev, config->mac);
 err_mtu:
 	mutex_destroy(&ndev->reslock);
 	put_device(&mvdev->vdev.dev);
diff --git a/include/linux/mlx5/mpfs.h b/include/linux/mlx5/mpfs.h
new file mode 100644
index 0000000000000..bf700c8d55164
--- /dev/null
+++ b/include/linux/mlx5/mpfs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2021 Mellanox Technologies Ltd.
+ */
+
+#ifndef _MLX5_MPFS_
+#define _MLX5_MPFS_
+
+struct mlx5_core_dev;
+
+#ifdef CONFIG_MLX5_MPFS
+int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac);
+int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac);
+#else /* #ifndef CONFIG_MLX5_MPFS */
+static inline int  mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+static inline int  mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; }
+#endif
+
+#endif
-- 
GitLab


From 75e8564e919f369cafb3d2b8fd11ec5af7b37416 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 25 Apr 2021 13:28:10 +0300
Subject: [PATCH 0973/3804] net/mlx5: Don't overwrite HCA capabilities when
 setting MSI-X count

During driver probe of device that has dynamic MSI-X feature enabled,
the following error is printed in some FW flavour (not released yet).

 mlx5_core 0000:06:00.0: firmware version: 4.7.4387
 mlx5_core 0000:06:00.0: 126.016 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x16 link)
 mlx5_core 0000:06:00.0: mlx5_cmd_check:777:(pid 70599): SET_HCA_CAP(0x109) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x0)
 mlx5_core 0000:06:00.0: set_hca_cap:622:(pid 70599): handle_hca_cap failed
 mlx5_core 0000:06:00.0: mlx5_function_setup:1045:(pid 70599): set_hca_cap failed
 mlx5_core 0000:06:00.0: probe_one:1465:(pid 70599): mlx5_init_one failed with error code -22
 mlx5_core: probe of 0000:06:00.0 failed with error -22

In order to make the setting capability of MSI-X future proof, let's
query the current capabilities first.

Fixes: 604774add516 ("net/mlx5: Dynamically assign MSI-X vectors count")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 1f907df5b3a2b..c3373fb1cd7fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -95,9 +95,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs)
 int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
 			    int msix_vec_count)
 {
-	int sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	int query_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	void *hca_cap = NULL, *query_cap = NULL, *cap;
 	int num_vf_msix, min_msix, max_msix;
-	void *hca_cap, *cap;
 	int ret;
 
 	num_vf_msix = MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix);
@@ -116,11 +117,20 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
 	if (msix_vec_count > max_msix)
 		return -EOVERFLOW;
 
-	hca_cap = kzalloc(sz, GFP_KERNEL);
-	if (!hca_cap)
-		return -ENOMEM;
+	query_cap = kzalloc(query_sz, GFP_KERNEL);
+	hca_cap = kzalloc(set_sz, GFP_KERNEL);
+	if (!hca_cap || !query_cap) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap);
+	if (ret)
+		goto out;
 
 	cap = MLX5_ADDR_OF(set_hca_cap_in, hca_cap, capability);
+	memcpy(cap, MLX5_ADDR_OF(query_hca_cap_out, query_cap, capability),
+	       MLX5_UN_SZ_BYTES(hca_cap_union));
 	MLX5_SET(cmd_hca_cap, cap, dynamic_msix_table_size, msix_vec_count);
 
 	MLX5_SET(set_hca_cap_in, hca_cap, opcode, MLX5_CMD_OP_SET_HCA_CAP);
@@ -130,7 +140,9 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id,
 	MLX5_SET(set_hca_cap_in, hca_cap, op_mod,
 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1);
 	ret = mlx5_cmd_exec_in(dev, set_hca_cap, hca_cap);
+out:
 	kfree(hca_cap);
+	kfree(query_cap);
 	return ret;
 }
 
-- 
GitLab


From 6ff51ab8aa8fcbcddeeefce8ca705b575805d12b Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@nvidia.com>
Date: Wed, 31 Mar 2021 10:09:02 +0300
Subject: [PATCH 0974/3804] net/mlx5: Set term table as an unmanaged flow table

Termination tables are restricted to have the default miss action and
cannot be set to forward to another table in case of a miss.
If the fs prio of the termination table is not the last one in the
list, fs_core will attempt to attach it to another table.

Set the unmanaged ft flag when creating the termination table ft
and select the tc offload prio for it to prevent fs_core from selecting
the forwarding to next ft miss action and use the default one.

In addition, set the flow that forwards to the termination table to
ignore ft level restrictions since the ft level is not set by fs_core
for unamanged fts.

Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink")
Signed-off-by: Ariel Levkovich <lariel@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index d61bee2d35fec..b459549058450 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -76,10 +76,11 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev,
 	/* As this is the terminating action then the termination table is the
 	 * same prio as the slow path
 	 */
-	ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION |
+	ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED |
 			MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
-	ft_attr.prio = FDB_SLOW_PATH;
+	ft_attr.prio = FDB_TC_OFFLOAD;
 	ft_attr.max_fte = 1;
+	ft_attr.level = 1;
 	ft_attr.autogroup.max_num_groups = 1;
 	tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
 	if (IS_ERR(tt->termtbl)) {
@@ -217,6 +218,7 @@ mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
 	int i;
 
 	if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table) ||
+	    !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level) ||
 	    attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH ||
 	    !mlx5_eswitch_offload_is_uplink_port(esw, spec))
 		return false;
@@ -289,6 +291,7 @@ mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw,
 	/* create the FTE */
 	flow_act->action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
 	flow_act->pkt_reformat = NULL;
+	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
 	rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest);
 	if (IS_ERR(rule))
 		goto revert_changes;
-- 
GitLab


From e63052a5dd3ce7979bff727a8f4bb6d6b3d1317b Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 5 May 2021 13:20:26 -0700
Subject: [PATCH 0975/3804] mlx5e: add add missing BH locking around
 napi_schdule()

It's not correct to call napi_schedule() in pure process
context. Because we use __raise_softirq_irqoff() we require
callers to be in a context which will eventually lead to
softirq handling (hardirq, bh disabled, etc.).

With code as is users will see:

 NOHZ tick-stop error: Non-RCU local softirq work is pending, handler #08!!!

Fixes: a8dd7ac12fc3 ("net/mlx5e: Generalize RQ activation")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d1b9a4040d60d..ad0f69480b9ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -889,10 +889,13 @@ err_free_rq:
 void mlx5e_activate_rq(struct mlx5e_rq *rq)
 {
 	set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
-	if (rq->icosq)
+	if (rq->icosq) {
 		mlx5e_trigger_irq(rq->icosq);
-	else
+	} else {
+		local_bh_disable();
 		napi_schedule(rq->cq.napi);
+		local_bh_enable();
+	}
 }
 
 void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
-- 
GitLab


From fec356a61aa3d3a66416b4321f1279e09e0f256f Mon Sep 17 00:00:00 2001
From: Wu Bo <wubo40@huawei.com>
Date: Wed, 19 May 2021 13:01:09 +0800
Subject: [PATCH 0976/3804] nvmet: fix memory leak in nvmet_alloc_ctrl()

When creating ctrl in nvmet_alloc_ctrl(), if the cntlid_min is larger
than cntlid_max of the subsystem, and jumps to the
"out_free_changed_ns_list" label, but the ctrl->sqs lack of be freed.
Fix this by jumping to the "out_free_sqs" label.

Fixes: 94a39d61f80f ("nvmet: make ctrl-id configurable")
Signed-off-by: Wu Bo <wubo40@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 25cc2ee8de3f1..1853db38b6820 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1372,7 +1372,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 		goto out_free_changed_ns_list;
 
 	if (subsys->cntlid_min > subsys->cntlid_max)
-		goto out_free_changed_ns_list;
+		goto out_free_sqs;
 
 	ret = ida_simple_get(&cntlid_ida,
 			     subsys->cntlid_min, subsys->cntlid_max,
-- 
GitLab


From 03504e3b54cc8118cc26c064e60a0b00c2308708 Mon Sep 17 00:00:00 2001
From: Wu Bo <wubo40@huawei.com>
Date: Wed, 19 May 2021 13:01:10 +0800
Subject: [PATCH 0977/3804] nvme-loop: fix memory leak in
 nvme_loop_create_ctrl()

When creating loop ctrl in nvme_loop_create_ctrl(), if nvme_init_ctrl()
fails, the loop ctrl should be freed before jumping to the "out" label.

Fixes: 3a85a5de29ea ("nvme-loop: add a NVMe loopback host driver")
Signed-off-by: Wu Bo <wubo40@huawei.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/loop.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 74b3b150e1a57..cb30cb942e1d1 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -590,8 +590,10 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
 
 	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
 				0 /* no quirks, we're perfect! */);
-	if (ret)
+	if (ret) {
+		kfree(ctrl);
 		goto out;
+	}
 
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
 		WARN_ON_ONCE(1);
-- 
GitLab


From 825619b09ad351894d2c6fb6705f5b3711d145c7 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 17 May 2021 14:07:45 -0700
Subject: [PATCH 0978/3804] nvme-tcp: fix possible use-after-completion

Commit db5ad6b7f8cd ("nvme-tcp: try to send request in queue_rq
context") added a second context that may perform a network send.
This means that now RX and TX are not serialized in nvme_tcp_io_work
and can run concurrently.

While there is correct mutual exclusion in the TX path (where
the send_mutex protect the queue socket send activity) RX activity,
and more specifically request completion may run concurrently.

This means we must guarantee that any mutation of the request state
related to its lifetime, bytes sent must not be accessed when a completion
may have possibly arrived back (and processed).

The race may trigger when a request completion arrives, processed
_and_ reused as a fresh new request, exactly in the (relatively short)
window between the last data payload sent and before the request iov_iter
is advanced.

Consider the following race:
1. 16K write request is queued
2. The nvme command and the data is sent to the controller (in-capsule
   or solicited by r2t)
3. After the last payload is sent but before the req.iter is advanced,
   the controller sends back a completion.
4. The completion is processed, the request is completed, and reused
   to transfer a new request (write or read)
5. The new request is queued, and the driver reset the request parameters
   (nvme_tcp_setup_cmd_pdu).
6. Now context in (2) resumes execution and advances the req.iter

==> use-after-completion as this is already a new request.

Fix this by making sure the request is not advanced after the last
data payload send, knowing that a completion may have arrived already.

An alternative solution would have been to delay the request completion
or state change waiting for reference counting on the TX path, but besides
adding atomic operations to the hot-path, it may present challenges in
multi-stage R2T scenarios where a r2t handler needs to be deferred to
an async execution.

Reported-by: Narayan Ayalasomayajula <narayan.ayalasomayajula@wdc.com>
Tested-by: Anil Mishra <anil.mishra@wdc.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Cc: stable@vger.kernel.org # v5.8+
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 0222e23f5936d..b97d2732a80f1 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -943,7 +943,6 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		if (ret <= 0)
 			return ret;
 
-		nvme_tcp_advance_req(req, ret);
 		if (queue->data_digest)
 			nvme_tcp_ddgst_update(queue->snd_hash, page,
 					offset, ret);
@@ -960,6 +959,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 			}
 			return 1;
 		}
+		nvme_tcp_advance_req(req, ret);
 	}
 	return -EAGAIN;
 }
-- 
GitLab


From a0fdd1418007f83565d3f2e04b47923ba93a9b8c Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Mon, 17 May 2021 15:36:43 -0700
Subject: [PATCH 0979/3804] nvme-tcp: rerun io_work if req_list is not empty

A possible race condition exists where the request to send data is
enqueued from nvme_tcp_handle_r2t()'s will not be observed by
nvme_tcp_send_all() if it happens to be running. The driver relies on
io_work to send the enqueued request when it is runs again, but the
concurrently running nvme_tcp_send_all() may not have released the
send_mutex at that time. If no future commands are enqueued to re-kick
the io_work, the request will timeout in the SEND_H2C state, resulting
in a timeout error like:

  nvme nvme0: queue 1: timeout request 0x3 type 6

Ensure the io_work continues to run as long as the req_list is not empty.

Fixes: db5ad6b7f8cdd ("nvme-tcp: try to send request in queue_rq context")
Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index b97d2732a80f1..34f4b3402f7c1 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1140,7 +1140,8 @@ static void nvme_tcp_io_work(struct work_struct *w)
 				pending = true;
 			else if (unlikely(result < 0))
 				break;
-		}
+		} else
+			pending = !llist_empty(&queue->req_list);
 
 		result = nvme_tcp_try_recv(queue);
 		if (result > 0)
-- 
GitLab


From a7d139145a6640172516b193abf6d2398620aa14 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 10 May 2021 21:56:35 -0700
Subject: [PATCH 0980/3804] nvme-fc: clear q_live at beginning of association
 teardown

The __nvmf_check_ready() routine used to bounce all filesystem io if the
controller state isn't LIVE.  However, a later patch changed the logic so
that it rejection ends up being based on the Q live check.  The FC
transport has a slightly different sequence from rdma and tcp for
shutting down queues/marking them non-live.  FC marks its queue non-live
after aborting all ios and waiting for their termination, leaving a
rather large window for filesystem io to continue to hit the transport.
Unfortunately this resulted in filesystem I/O or applications seeing I/O
errors.

Change the FC transport to mark the queues non-live at the first sign of
teardown for the association (when I/O is initially terminated).

Fixes: 73a5379937ec ("nvme-fabrics: allow to queue requests for live queues")
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index d9ab9e7871d0f..256e87721a01f 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2461,6 +2461,18 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
 static void
 __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
 {
+	int q;
+
+	/*
+	 * if aborting io, the queues are no longer good, mark them
+	 * all as not live.
+	 */
+	if (ctrl->ctrl.queue_count > 1) {
+		for (q = 1; q < ctrl->ctrl.queue_count; q++)
+			clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[q].flags);
+	}
+	clear_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags);
+
 	/*
 	 * If io queues are present, stop them and terminate all outstanding
 	 * ios on them. As FC allocates FC exchange for each io, the
-- 
GitLab


From 036867e93ebf4d7e70eba6a8c72db74ee3760bc3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 17 May 2021 09:46:40 +0100
Subject: [PATCH 0981/3804] drm/i915/gem: Pin the L-shape quirked object as
 unshrinkable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When instantiating a tiled object on an L-shaped memory machine, we mark
the object as unshrinkable to prevent the shrinker from trying to swap
out the pages. We have to do this as we do not know the swizzling on the
individual pages, and so the data will be scrambled across swap out/in.

Not only do we need to move the object off the shrinker list, we need to
mark the object with shrink_pin so that the counter is consistent across
calls to madvise.

v2: in the madvise ioctl we need to check if the object is currently
shrinkable/purgeable, not if the object type supports shrinking

Fixes: 0175969e489a ("drm/i915/gem: Use shrinkable status for unknown swizzle quirks")
References: https://gitlab.freedesktop.org/drm/intel/-/issues/3293
References: https://gitlab.freedesktop.org/drm/intel/-/issues/3450
Reported-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: <stable@vger.kernel.org> # v5.12+
Link: https://patchwork.freedesktop.org/patch/msgid/20210517084640.18862-1-matthew.auld@intel.com
(cherry picked from commit 8777d17b68dcfbfbd4d524f444adefae56f41225)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_pages.c |  2 ++
 drivers/gpu/drm/i915/i915_gem.c           | 11 +++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index aed8a37ccdc93..7361971c177dd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -63,6 +63,8 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
 	    i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
 		GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj));
 		i915_gem_object_set_tiling_quirk(obj);
+		GEM_BUG_ON(!list_empty(&obj->mm.link));
+		atomic_inc(&obj->mm.shrink_pin);
 		shrinkable = false;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b23f58e94cfb7..b3cedd20f3653 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -999,12 +999,11 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		obj->mm.madv = args->madv;
 
 	if (i915_gem_object_has_pages(obj)) {
-		struct list_head *list;
+		unsigned long flags;
 
-		if (i915_gem_object_is_shrinkable(obj)) {
-			unsigned long flags;
-
-			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		if (!list_empty(&obj->mm.link)) {
+			struct list_head *list;
 
 			if (obj->mm.madv != I915_MADV_WILLNEED)
 				list = &i915->mm.purge_list;
@@ -1012,8 +1011,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 				list = &i915->mm.shrink_list;
 			list_move_tail(&obj->mm.link, list);
 
-			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 		}
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 	}
 
 	/* if the object is no longer attached, discard its backing storage */
-- 
GitLab


From 1a590a1c8bf46bf80ea12b657ca44c345531ac80 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 19 May 2021 09:50:26 +0800
Subject: [PATCH 0982/3804] iommu/vt-d: Check for allocation failure in
 aux_detach_device()

In current kernels small allocations never fail, but checking for
allocation failure is the correct thing to do.

Fixes: 18abda7a2d55 ("iommu/vt-d: Fix general protection fault in aux_detach_device()")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/YJuobKuSn81dOPLd@mwanda
Link: https://lore.kernel.org/r/20210519015027.108468-2-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 708f430af1c44..9a7b79b5af183 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4606,6 +4606,8 @@ static int auxiliary_link_device(struct dmar_domain *domain,
 
 	if (!sinfo) {
 		sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC);
+		if (!sinfo)
+			return -ENOMEM;
 		sinfo->domain = domain;
 		sinfo->pdev = dev;
 		list_add(&sinfo->link_phys, &info->subdevices);
-- 
GitLab


From 54c80d907400189b09548039be8f3b6e297e8ae3 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 19 May 2021 09:50:27 +0800
Subject: [PATCH 0983/3804] iommu/vt-d: Use user privilege for RID2PASID
 translation

When first-level page tables are used for IOVA translation, we use user
privilege by setting U/S bit in the page table entry. This is to make it
consistent with the second level translation, where the U/S enforcement
is not available. Clear the SRE (Supervisor Request Enable) field in the
pasid table entry of RID2PASID so that requests requesting the supervisor
privilege are blocked and treated as DMA remapping faults.

Fixes: b802d070a52a1 ("iommu/vt-d: Use iova over first level")
Suggested-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20210512064426.3440915-1-baolu.lu@linux.intel.com
Link: https://lore.kernel.org/r/20210519015027.108468-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 7 +++++--
 drivers/iommu/intel/pasid.c | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9a7b79b5af183..be35284a20160 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2525,9 +2525,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
 				    struct device *dev,
 				    u32 pasid)
 {
-	int flags = PASID_FLAG_SUPERVISOR_MODE;
 	struct dma_pte *pgd = domain->pgd;
 	int agaw, level;
+	int flags = 0;
 
 	/*
 	 * Skip top levels of page tables for iommu which has
@@ -2543,7 +2543,10 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
 	if (level != 4 && level != 5)
 		return -EINVAL;
 
-	flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
+	if (pasid != PASID_RID2PASID)
+		flags |= PASID_FLAG_SUPERVISOR_MODE;
+	if (level == 5)
+		flags |= PASID_FLAG_FL5LP;
 
 	if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED)
 		flags |= PASID_FLAG_PAGE_SNOOP;
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 72646bafc52f4..72dc84821dad2 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -699,7 +699,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
 	 * Since it is a second level only translation setup, we should
 	 * set SRE bit as well (addresses are expected to be GPAs).
 	 */
-	pasid_set_sre(pte);
+	if (pasid != PASID_RID2PASID)
+		pasid_set_sre(pte);
 	pasid_set_present(pte);
 	pasid_flush_caches(iommu, pte, pasid, did);
 
-- 
GitLab


From 023dfa9602f561952c0e19d74f66614a56d7e57a Mon Sep 17 00:00:00 2001
From: Simon Rettberg <simon.rettberg@rz.uni-freiburg.de>
Date: Mon, 26 Apr 2021 16:11:24 +0200
Subject: [PATCH 0984/3804] drm/i915/gt: Disable HiZ Raw Stall Optimization on
 broken gen7

When resetting CACHE_MODE registers, don't enable HiZ Raw Stall
Optimization on Ivybridge GT1 and Baytrail, as it causes severe glitches
when rendering any kind of 3D accelerated content.
This optimization is disabled on these platforms by default according to
official documentation from 01.org.

Fixes: ef99a60ffd9b ("drm/i915/gt: Clear CACHE_MODE prior to clearing residuals")
BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/3081
BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/3404
BugLink: https://gitlab.freedesktop.org/drm/intel/-/issues/3071
Reviewed-by: Manuel Bentele <development@manuel-bentele.de>
Signed-off-by: Simon Rettberg <simon.rettberg@rz.uni-freiburg.de>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
[Rodrigo removed invalid Fixes line]
Link: https://patchwork.freedesktop.org/patch/msgid/20210426161124.2b7fd708@dellnichtsogutkiste
(cherry picked from commit 929b734ad34b717d6a1b8de97f53bb5616040147)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/gen7_renderclear.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
index de575fdb033f5..21f08e53889c3 100644
--- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c
+++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c
@@ -397,7 +397,10 @@ static void emit_batch(struct i915_vma * const vma,
 	gen7_emit_pipeline_invalidate(&cmds);
 	batch_add(&cmds, MI_LOAD_REGISTER_IMM(2));
 	batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7));
-	batch_add(&cmds, 0xffff0000);
+	batch_add(&cmds, 0xffff0000 |
+			((IS_IVB_GT1(i915) || IS_VALLEYVIEW(i915)) ?
+			 HIZ_RAW_STALL_OPT_DISABLE :
+			 0));
 	batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1));
 	batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
 	gen7_emit_pipeline_invalidate(&cmds);
-- 
GitLab


From 0a016c35a326c6b2f558ede58ff08da7ef1da1a8 Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Thu, 15 Apr 2021 18:54:23 +0200
Subject: [PATCH 0985/3804] media: staging: media: atomisp: pci: Balance braces
 around conditional statements in file atomisp_cmd.c

Balance braces around conditional statements.
Issue detected by checkpatch.pl.
It happens in if-else statements where one of the commands
uses braces around a block of code and the other command
does not since it has just a single line of code.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../staging/media/atomisp/pci/atomisp_cmd.c   | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
index 14abc1ca00e8e..24d8eaccb9c6a 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
@@ -1138,9 +1138,10 @@ void atomisp_buf_done(struct atomisp_sub_device *asd, int error,
 					asd->frame_status[vb->i] =
 					    ATOMISP_FRAME_STATUS_OK;
 				}
-			} else
+			} else {
 				asd->frame_status[vb->i] =
 				    ATOMISP_FRAME_STATUS_OK;
+			}
 		} else {
 			asd->frame_status[vb->i] = ATOMISP_FRAME_STATUS_OK;
 		}
@@ -4941,9 +4942,9 @@ atomisp_try_fmt_file(struct atomisp_device *isp, struct v4l2_format *f)
 
 	depth = get_pixel_depth(pixelformat);
 
-	if (field == V4L2_FIELD_ANY)
+	if (field == V4L2_FIELD_ANY) {
 		field = V4L2_FIELD_NONE;
-	else if (field != V4L2_FIELD_NONE) {
+	} else if (field != V4L2_FIELD_NONE) {
 		dev_err(isp->dev, "Wrong output field\n");
 		return -EINVAL;
 	}
@@ -6564,17 +6565,17 @@ static int atomisp_get_pipe_id(struct atomisp_video_pipe *pipe)
 {
 	struct atomisp_sub_device *asd = pipe->asd;
 
-	if (ATOMISP_USE_YUVPP(asd))
+	if (ATOMISP_USE_YUVPP(asd)) {
 		return IA_CSS_PIPE_ID_YUVPP;
-	else if (asd->vfpp->val == ATOMISP_VFPP_DISABLE_SCALER)
+	} else if (asd->vfpp->val == ATOMISP_VFPP_DISABLE_SCALER) {
 		return IA_CSS_PIPE_ID_VIDEO;
-	else if (asd->vfpp->val == ATOMISP_VFPP_DISABLE_LOWLAT)
+	} else if (asd->vfpp->val == ATOMISP_VFPP_DISABLE_LOWLAT) {
 		return IA_CSS_PIPE_ID_CAPTURE;
-	else if (pipe == &asd->video_out_video_capture)
+	} else if (pipe == &asd->video_out_video_capture) {
 		return IA_CSS_PIPE_ID_VIDEO;
-	else if (pipe == &asd->video_out_vf)
+	} else if (pipe == &asd->video_out_vf) {
 		return IA_CSS_PIPE_ID_CAPTURE;
-	else if (pipe == &asd->video_out_preview) {
+	} else if (pipe == &asd->video_out_preview) {
 		if (asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO)
 			return IA_CSS_PIPE_ID_VIDEO;
 		else
-- 
GitLab


From b6465b1d74b8ce6dd585ae96877bb74bc6f86f5e Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Thu, 15 Apr 2021 18:54:27 +0200
Subject: [PATCH 0986/3804] media: staging: media: atomisp: pci: Balance braces
 around conditional statements in file atomisp_compat_css20.c

Balance braces around conditional statements.
Issue detected by checkpatch.pl.
It happens in if-else statements where one of the commands
uses braces around a block of code and the other command
does not since it has just a single line of code.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/atomisp_compat_css20.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_compat_css20.c b/drivers/staging/media/atomisp/pci/atomisp_compat_css20.c
index ce3165291eec6..f60198bb8a1a9 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_compat_css20.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_compat_css20.c
@@ -2782,9 +2782,9 @@ int atomisp_get_css_frame_info(struct atomisp_sub_device *asd,
 	int stream_index;
 	struct atomisp_device *isp = asd->isp;
 
-	if (ATOMISP_SOC_CAMERA(asd))
+	if (ATOMISP_SOC_CAMERA(asd)) {
 		stream_index = atomisp_source_pad_to_stream_id(asd, source_pad);
-	else {
+	} else {
 		stream_index = (pipe_index == IA_CSS_PIPE_ID_YUVPP) ?
 			       ATOMISP_INPUT_STREAM_VIDEO :
 			       atomisp_source_pad_to_stream_id(asd, source_pad);
-- 
GitLab


From 41d1f1b03909782aa8f0a05db9a15a762679bc17 Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Thu, 15 Apr 2021 18:54:34 +0200
Subject: [PATCH 0987/3804] media: staging: media: atomisp: pci: Balance braces
 around conditional statements in file atomisp_subdev.c

Balance braces around conditional statements.
Issue detected by checkpatch.pl.
It happens in if-else statements where one of the commands
uses braces around a block of code and the other command
does not since it has just a single line of code.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/atomisp_subdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
index 2ef5f44e4b6b6..aeabd07bf518d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
@@ -472,9 +472,9 @@ int atomisp_subdev_set_selection(struct v4l2_subdev *sd,
 			 * when dvs is disabled.
 			 */
 			dvs_w = dvs_h = 12;
-		} else
+		} else {
 			dvs_w = dvs_h = 0;
-
+		}
 		atomisp_css_video_set_dis_envelope(isp_sd, dvs_w, dvs_h);
 		atomisp_css_input_set_effective_resolution(isp_sd, stream_id,
 			crop[pad]->width, crop[pad]->height);
-- 
GitLab


From d1ca04c476d6dccb6a02248187b8aab1400ed176 Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Thu, 15 Apr 2021 18:54:38 +0200
Subject: [PATCH 0988/3804] media: staging: media: atomisp: pci: Balance braces
 around conditional statements in file atomisp_v4l2.c

Balance braces around conditional statements.
Issue detected by checkpatch.pl.
It happens in if-else statements where one of the commands
uses braces around a block of code and the other command
does not since it has just a single line of code.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/atomisp_v4l2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
index 6d853f480e1c8..948769ca6539d 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c
@@ -1500,9 +1500,9 @@ static int init_atomisp_wdts(struct atomisp_device *isp)
 	for (i = 0; i < isp->num_of_streams; i++) {
 		struct atomisp_sub_device *asd = &isp->asd[i];
 
-		if (!IS_ISP2401)
+		if (!IS_ISP2401) {
 			timer_setup(&asd->wdt, atomisp_wdt, 0);
-		else {
+		} else {
 			timer_setup(&asd->video_out_capture.wdt,
 				    atomisp_wdt, 0);
 			timer_setup(&asd->video_out_preview.wdt,
-- 
GitLab


From 049eda0749faa98d074e7f362c3e2d211da2e5ed Mon Sep 17 00:00:00 2001
From: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Date: Thu, 15 Apr 2021 19:18:18 +0200
Subject: [PATCH 0989/3804] media: staging: media: tegra-video: Align line
 break to match with the open parenthesis in file vi.c

Align line break to match with the open parenthesis.
Issue detected by checkpatch.pl.

Signed-off-by: Aline Santana Cordeiro <alinesantanacordeiro@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/tegra-video/vi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/tegra-video/vi.c b/drivers/staging/media/tegra-video/vi.c
index df5ca35964706..b712063a7c5d2 100644
--- a/drivers/staging/media/tegra-video/vi.c
+++ b/drivers/staging/media/tegra-video/vi.c
@@ -1812,8 +1812,8 @@ static int tegra_vi_graph_parse_one(struct tegra_vi_channel *chan,
 			continue;
 		}
 
-		tvge = v4l2_async_notifier_add_fwnode_subdev(&chan->notifier,
-				remote, struct tegra_vi_graph_entity);
+		tvge = v4l2_async_notifier_add_fwnode_subdev(&chan->notifier, remote,
+							     struct tegra_vi_graph_entity);
 		if (IS_ERR(tvge)) {
 			ret = PTR_ERR(tvge);
 			dev_err(vi->dev,
-- 
GitLab


From 6ceb557604e85c55bce0585216623c21c7a00453 Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Thu, 15 Apr 2021 23:41:41 +0200
Subject: [PATCH 0990/3804] media: staging: media: atomisp: Fix sh_css.c brace
 coding style issues

Fix brace coding style issues.

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/sh_css.c | 224 +++++++++------------
 1 file changed, 90 insertions(+), 134 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
index 27dd8ce8ba0a4..bb752d47457cb 100644
--- a/drivers/staging/media/atomisp/pci/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/sh_css.c
@@ -453,15 +453,15 @@ static enum ia_css_frame_format yuv422_copy_formats[] = {
  * by the copy binary given the stream format.
  * */
 static int
-verify_copy_out_frame_format(struct ia_css_pipe *pipe) {
+verify_copy_out_frame_format(struct ia_css_pipe *pipe)
+{
 	enum ia_css_frame_format out_fmt = pipe->output_info[0].format;
 	unsigned int i, found = 0;
 
 	assert(pipe);
 	assert(pipe->stream);
 
-	switch (pipe->stream->config.input_config.format)
-	{
+	switch (pipe->stream->config.input_config.format) {
 	case ATOMISP_INPUT_FORMAT_YUV420_8_LEGACY:
 	case ATOMISP_INPUT_FORMAT_YUV420_8:
 		for (i = 0; i < ARRAY_SIZE(yuv420_copy_formats) && !found; i++)
@@ -528,7 +528,8 @@ ia_css_stream_input_format_bits_per_pixel(struct ia_css_stream *stream)
 
 #if !defined(ISP2401)
 static int
-sh_css_config_input_network(struct ia_css_stream *stream) {
+sh_css_config_input_network(struct ia_css_stream *stream)
+{
 	unsigned int fmt_type;
 	struct ia_css_pipe *pipe = stream->last_pipe;
 	struct ia_css_binary *binary = NULL;
@@ -554,8 +555,7 @@ sh_css_config_input_network(struct ia_css_stream *stream) {
 					stream->config.mode);
 
 	if ((binary && (binary->online || stream->config.continuous)) ||
-	    pipe->config.mode == IA_CSS_PIPE_MODE_COPY)
-	{
+	    pipe->config.mode == IA_CSS_PIPE_MODE_COPY) {
 		err = ia_css_ifmtr_configure(&stream->config,
 					     binary);
 		if (err)
@@ -563,8 +563,7 @@ sh_css_config_input_network(struct ia_css_stream *stream) {
 	}
 
 	if (stream->config.mode == IA_CSS_INPUT_MODE_TPG ||
-	    stream->config.mode == IA_CSS_INPUT_MODE_PRBS)
-	{
+	    stream->config.mode == IA_CSS_INPUT_MODE_PRBS) {
 		unsigned int hblank_cycles = 100,
 		vblank_lines = 6,
 		width,
@@ -723,35 +722,32 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_id(
 	switch (stream_cfg->mode) {
 	case IA_CSS_INPUT_MODE_TPG:
 
-		if (stream_cfg->source.tpg.id == IA_CSS_TPG_ID0) {
+		if (stream_cfg->source.tpg.id == IA_CSS_TPG_ID0)
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_PIXELGEN_PORT0_ID;
-		} else if (stream_cfg->source.tpg.id == IA_CSS_TPG_ID1) {
+		else if (stream_cfg->source.tpg.id == IA_CSS_TPG_ID1)
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_PIXELGEN_PORT1_ID;
-		} else if (stream_cfg->source.tpg.id == IA_CSS_TPG_ID2) {
+		else if (stream_cfg->source.tpg.id == IA_CSS_TPG_ID2)
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_PIXELGEN_PORT2_ID;
-		}
 
 		break;
 	case IA_CSS_INPUT_MODE_PRBS:
 
-		if (stream_cfg->source.prbs.id == IA_CSS_PRBS_ID0) {
+		if (stream_cfg->source.prbs.id == IA_CSS_PRBS_ID0)
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_PIXELGEN_PORT0_ID;
-		} else if (stream_cfg->source.prbs.id == IA_CSS_PRBS_ID1) {
+		else if (stream_cfg->source.prbs.id == IA_CSS_PRBS_ID1)
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_PIXELGEN_PORT1_ID;
-		} else if (stream_cfg->source.prbs.id == IA_CSS_PRBS_ID2) {
+		else if (stream_cfg->source.prbs.id == IA_CSS_PRBS_ID2)
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_PIXELGEN_PORT2_ID;
-		}
 
 		break;
 	case IA_CSS_INPUT_MODE_BUFFERED_SENSOR:
 
-		if (stream_cfg->source.port.port == MIPI_PORT0_ID) {
+		if (stream_cfg->source.port.port == MIPI_PORT0_ID)
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT0_ID;
-		} else if (stream_cfg->source.port.port == MIPI_PORT1_ID) {
+		else if (stream_cfg->source.port.port == MIPI_PORT1_ID)
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT1_ID;
-		} else if (stream_cfg->source.port.port == MIPI_PORT2_ID) {
+		else if (stream_cfg->source.port.port == MIPI_PORT2_ID)
 			isys_stream_descr->input_port_id = INPUT_SYSTEM_CSI_PORT2_ID;
-		}
 
 		break;
 	default:
@@ -804,15 +800,14 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_attr(
 	rc = true;
 	switch (stream_cfg->mode) {
 	case IA_CSS_INPUT_MODE_TPG:
-		if (stream_cfg->source.tpg.mode == IA_CSS_TPG_MODE_RAMP) {
+		if (stream_cfg->source.tpg.mode == IA_CSS_TPG_MODE_RAMP)
 			isys_stream_descr->tpg_port_attr.mode = PIXELGEN_TPG_MODE_RAMP;
-		} else if (stream_cfg->source.tpg.mode == IA_CSS_TPG_MODE_CHECKERBOARD) {
+		else if (stream_cfg->source.tpg.mode == IA_CSS_TPG_MODE_CHECKERBOARD)
 			isys_stream_descr->tpg_port_attr.mode = PIXELGEN_TPG_MODE_CHBO;
-		} else if (stream_cfg->source.tpg.mode == IA_CSS_TPG_MODE_MONO) {
+		else if (stream_cfg->source.tpg.mode == IA_CSS_TPG_MODE_MONO)
 			isys_stream_descr->tpg_port_attr.mode = PIXELGEN_TPG_MODE_MONO;
-		} else {
+		else
 			rc = false;
-		}
 
 		/*
 		 * TODO
@@ -951,12 +946,12 @@ static bool sh_css_translate_stream_cfg_to_input_system_input_port_resolution(
 	     stream_cfg->mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR) &&
 	    stream_cfg->source.port.compression.type != IA_CSS_CSI2_COMPRESSION_TYPE_NONE) {
 		if (stream_cfg->source.port.compression.uncompressed_bits_per_pixel ==
-		    UNCOMPRESSED_BITS_PER_PIXEL_10) {
+		    UNCOMPRESSED_BITS_PER_PIXEL_10)
 			fmt_type = ATOMISP_INPUT_FORMAT_RAW_10;
-		} else if (stream_cfg->source.port.compression.uncompressed_bits_per_pixel ==
-			   UNCOMPRESSED_BITS_PER_PIXEL_12) {
+		else if (stream_cfg->source.port.compression.uncompressed_bits_per_pixel ==
+			   UNCOMPRESSED_BITS_PER_PIXEL_12)
 			fmt_type = ATOMISP_INPUT_FORMAT_RAW_12;
-		} else
+		else
 			return false;
 	}
 
@@ -1045,7 +1040,8 @@ static bool sh_css_translate_binary_info_to_input_system_output_port_attr(
 }
 
 static int
-sh_css_config_input_network(struct ia_css_stream *stream) {
+sh_css_config_input_network(struct ia_css_stream *stream)
+{
 	bool					rc;
 	ia_css_isys_descr_t			isys_stream_descr;
 	unsigned int				sp_thread_id;
@@ -1060,19 +1056,16 @@ sh_css_config_input_network(struct ia_css_stream *stream) {
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
 			    "sh_css_config_input_network() enter 0x%p:\n", stream);
 
-	if (stream->config.continuous)
-	{
-		if (stream->last_pipe->config.mode == IA_CSS_PIPE_MODE_CAPTURE) {
+	if (stream->config.continuous) {
+		if (stream->last_pipe->config.mode == IA_CSS_PIPE_MODE_CAPTURE)
 			pipe = stream->last_pipe;
-		} else if (stream->last_pipe->config.mode == IA_CSS_PIPE_MODE_YUVPP) {
+		else if (stream->last_pipe->config.mode == IA_CSS_PIPE_MODE_YUVPP)
 			pipe = stream->last_pipe;
-		} else if (stream->last_pipe->config.mode == IA_CSS_PIPE_MODE_PREVIEW) {
+		else if (stream->last_pipe->config.mode == IA_CSS_PIPE_MODE_PREVIEW)
 			pipe = stream->last_pipe->pipe_settings.preview.copy_pipe;
-		} else if (stream->last_pipe->config.mode == IA_CSS_PIPE_MODE_VIDEO) {
+		else if (stream->last_pipe->config.mode == IA_CSS_PIPE_MODE_VIDEO)
 			pipe = stream->last_pipe->pipe_settings.video.copy_pipe;
-		}
-	} else
-	{
+	} else {
 		pipe = stream->last_pipe;
 		if (stream->last_pipe->config.mode == IA_CSS_PIPE_MODE_CAPTURE) {
 			/*
@@ -1095,8 +1088,7 @@ sh_css_config_input_network(struct ia_css_stream *stream) {
 		if (pipe->pipeline.stages->binary)
 			binary = pipe->pipeline.stages->binary;
 
-	if (binary)
-	{
+	if (binary) {
 		/* this was being done in ifmtr in 2400.
 		 * online and cont bypass the init_in_frameinfo_memory_defaults
 		 * so need to do it here
@@ -1210,11 +1202,10 @@ static inline struct ia_css_pipe *stream_get_target_pipe(
 	struct ia_css_pipe *target_pipe;
 
 	/* get the pipe that consumes the stream */
-	if (stream->config.continuous) {
+	if (stream->config.continuous)
 		target_pipe = stream_get_copy_pipe(stream);
-	} else {
+	else
 		target_pipe = stream_get_last_pipe(stream);
-	}
 
 	return target_pipe;
 }
@@ -1388,7 +1379,8 @@ start_binary(struct ia_css_pipe *pipe,
 /* start the copy function on the SP */
 static int
 start_copy_on_sp(struct ia_css_pipe *pipe,
-		 struct ia_css_frame *out_frame) {
+		 struct ia_css_frame *out_frame)
+{
 	(void)out_frame;
 	assert(pipe);
 	assert(pipe->stream);
@@ -1406,8 +1398,7 @@ start_copy_on_sp(struct ia_css_pipe *pipe,
 	sh_css_sp_start_binary_copy(ia_css_pipe_get_pipe_num(pipe), out_frame, pipe->stream->config.pixels_per_clock == 2);
 
 #if !defined(ISP2401)
-	if (pipe->stream->reconfigure_css_rx)
-	{
+	if (pipe->stream->reconfigure_css_rx) {
 		ia_css_isys_rx_configure(&pipe->stream->csi_rx_config,
 					 pipe->stream->config.mode);
 		pipe->stream->reconfigure_css_rx = false;
@@ -1596,7 +1587,8 @@ ia_css_reset_defaults(struct sh_css *css)
 
 int
 ia_css_load_firmware(struct device *dev, const struct ia_css_env *env,
-		     const struct ia_css_fw  *fw) {
+		     const struct ia_css_fw  *fw)
+{
 	int err;
 
 	if (!env)
@@ -1607,16 +1599,14 @@ ia_css_load_firmware(struct device *dev, const struct ia_css_env *env,
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_load_firmware() enter\n");
 
 	/* make sure we initialize my_css */
-	if (my_css.flush != env->cpu_mem_env.flush)
-	{
+	if (my_css.flush != env->cpu_mem_env.flush) {
 		ia_css_reset_defaults(&my_css);
 		my_css.flush = env->cpu_mem_env.flush;
 	}
 
 	ia_css_unload_firmware(); /* in case we are called twice */
 	err = sh_css_load_firmware(dev, fw->data, fw->bytes);
-	if (!err)
-	{
+	if (!err) {
 		err = ia_css_binary_init_infos();
 		if (!err)
 			fw_explicitly_loaded = true;
@@ -1630,7 +1620,8 @@ int
 ia_css_init(struct device *dev, const struct ia_css_env *env,
 	    const struct ia_css_fw  *fw,
 	    u32                 mmu_l1_base,
-	    enum ia_css_irq_type     irq_type) {
+	    enum ia_css_irq_type     irq_type)
+{
 	int err;
 	ia_css_spctrl_cfg spctrl_cfg;
 
@@ -1704,16 +1695,14 @@ ia_css_init(struct device *dev, const struct ia_css_env *env,
 	my_css.flush     = flush_func;
 
 	err = ia_css_rmgr_init();
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR(err);
 		return err;
 	}
 
 	IA_CSS_LOG("init: %d", my_css_save_initialized);
 
-	if (!my_css_save_initialized)
-	{
+	if (!my_css_save_initialized) {
 		my_css_save_initialized = true;
 		my_css_save.mode = sh_css_mode_working;
 		memset(my_css_save.stream_seeds, 0,
@@ -1741,19 +1730,16 @@ ia_css_init(struct device *dev, const struct ia_css_env *env,
 	gpio_reg_store(GPIO0_ID, _gpio_block_reg_do_0, 0);
 
 	err = ia_css_refcount_init(REFCOUNT_SIZE);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR(err);
 		return err;
 	}
 	err = sh_css_params_init();
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR(err);
 		return err;
 	}
-	if (fw)
-	{
+	if (fw) {
 		ia_css_unload_firmware(); /* in case we already had firmware loaded */
 		err = sh_css_load_firmware(dev, fw->data, fw->bytes);
 		if (err) {
@@ -1774,23 +1760,20 @@ ia_css_init(struct device *dev, const struct ia_css_env *env,
 		return -EINVAL;
 
 	err = ia_css_spctrl_load_fw(SP0_ID, &spctrl_cfg);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR(err);
 		return err;
 	}
 
 #if WITH_PC_MONITORING
-	if (!thread_alive)
-	{
+	if (!thread_alive) {
 		thread_alive++;
 		sh_css_print("PC_MONITORING: %s() -- create thread DISABLED\n",
 			     __func__);
 		spying_thread_create();
 	}
 #endif
-	if (!sh_css_hrt_system_is_idle())
-	{
+	if (!sh_css_hrt_system_is_idle()) {
 		IA_CSS_LEAVE_ERR(-EBUSY);
 		return -EBUSY;
 	}
@@ -1823,7 +1806,8 @@ ia_css_init(struct device *dev, const struct ia_css_env *env,
 }
 
 int
-ia_css_enable_isys_event_queue(bool enable) {
+ia_css_enable_isys_event_queue(bool enable)
+{
 	if (sh_css_sp_is_running())
 		return -EBUSY;
 	sh_css_sp_enable_isys_event_queue(enable);
@@ -1844,7 +1828,8 @@ sh_css_flush(struct ia_css_acc_fw *fw)
  * doing it from stream_create since we could run out of sp threads due to
  * allocation on inactive pipelines. */
 static int
-map_sp_threads(struct ia_css_stream *stream, bool map) {
+map_sp_threads(struct ia_css_stream *stream, bool map)
+{
 	struct ia_css_pipe *main_pipe = NULL;
 	struct ia_css_pipe *copy_pipe = NULL;
 	struct ia_css_pipe *capture_pipe = NULL;
@@ -1856,8 +1841,7 @@ map_sp_threads(struct ia_css_stream *stream, bool map) {
 	IA_CSS_ENTER_PRIVATE("stream = %p, map = %s",
 			     stream, map ? "true" : "false");
 
-	if (!stream)
-	{
+	if (!stream) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -1867,8 +1851,7 @@ map_sp_threads(struct ia_css_stream *stream, bool map) {
 
 	ia_css_pipeline_map(main_pipe->pipe_num, map);
 
-	switch (pipe_id)
-	{
+	switch (pipe_id) {
 	case IA_CSS_PIPE_ID_PREVIEW:
 		copy_pipe    = main_pipe->pipe_settings.preview.copy_pipe;
 		capture_pipe = main_pipe->pipe_settings.preview.capture_pipe;
@@ -1887,23 +1870,17 @@ map_sp_threads(struct ia_css_stream *stream, bool map) {
 	}
 
 	if (acc_pipe)
-	{
 		ia_css_pipeline_map(acc_pipe->pipe_num, map);
-	}
 
 	if (capture_pipe)
-	{
 		ia_css_pipeline_map(capture_pipe->pipe_num, map);
-	}
 
 	/* Firmware expects copy pipe to be the last pipe mapped. (if needed) */
 	if (copy_pipe)
-	{
 		ia_css_pipeline_map(copy_pipe->pipe_num, map);
-	}
+
 	/* DH regular multi pipe - not continuous mode: map the next pipes too */
-	if (!stream->config.continuous)
-	{
+	if (!stream->config.continuous) {
 		int i;
 
 		for (i = 1; i < stream->num_pipes; i++)
@@ -1917,7 +1894,8 @@ map_sp_threads(struct ia_css_stream *stream, bool map) {
 /* creates a host pipeline skeleton for all pipes in a stream. Called during
  * stream_create. */
 static int
-create_host_pipeline_structure(struct ia_css_stream *stream) {
+create_host_pipeline_structure(struct ia_css_stream *stream)
+{
 	struct ia_css_pipe *copy_pipe = NULL, *capture_pipe = NULL;
 	struct ia_css_pipe *acc_pipe = NULL;
 	enum ia_css_pipe_id pipe_id;
@@ -1929,24 +1907,21 @@ create_host_pipeline_structure(struct ia_css_stream *stream) {
 	assert(stream);
 	IA_CSS_ENTER_PRIVATE("stream = %p", stream);
 
-	if (!stream)
-	{
+	if (!stream) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
 
 	main_pipe	= stream->last_pipe;
 	assert(main_pipe);
-	if (!main_pipe)
-	{
+	if (!main_pipe) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
 
 	pipe_id	= main_pipe->mode;
 
-	switch (pipe_id)
-	{
+	switch (pipe_id) {
 	case IA_CSS_PIPE_ID_PREVIEW:
 		copy_pipe    = main_pipe->pipe_settings.preview.copy_pipe;
 		copy_pipe_delay = main_pipe->dvs_frame_delay;
@@ -1986,30 +1961,23 @@ create_host_pipeline_structure(struct ia_css_stream *stream) {
 	}
 
 	if (!(err) && copy_pipe)
-	{
 		err = ia_css_pipeline_create(&copy_pipe->pipeline,
 					     copy_pipe->mode,
 					     copy_pipe->pipe_num,
 					     copy_pipe_delay);
-	}
 
 	if (!(err) && capture_pipe)
-	{
 		err = ia_css_pipeline_create(&capture_pipe->pipeline,
 					     capture_pipe->mode,
 					     capture_pipe->pipe_num,
 					     capture_pipe_delay);
-	}
 
 	if (!(err) && acc_pipe)
-	{
 		err = ia_css_pipeline_create(&acc_pipe->pipeline, acc_pipe->mode,
 					     acc_pipe->pipe_num, main_pipe->dvs_frame_delay);
-	}
 
 	/* DH regular multi pipe - not continuous mode: create the next pipelines too */
-	if (!stream->config.continuous)
-	{
+	if (!stream->config.continuous) {
 		int i;
 
 		for (i = 1; i < stream->num_pipes && 0 == err; i++) {
@@ -2028,7 +1996,8 @@ create_host_pipeline_structure(struct ia_css_stream *stream) {
 /* creates a host pipeline for all pipes in a stream. Called during
  * stream_start. */
 static int
-create_host_pipeline(struct ia_css_stream *stream) {
+create_host_pipeline(struct ia_css_stream *stream)
+{
 	struct ia_css_pipe *copy_pipe = NULL, *capture_pipe = NULL;
 	struct ia_css_pipe *acc_pipe = NULL;
 	enum ia_css_pipe_id pipe_id;
@@ -2037,8 +2006,7 @@ create_host_pipeline(struct ia_css_stream *stream) {
 	unsigned int max_input_width = 0;
 
 	IA_CSS_ENTER_PRIVATE("stream = %p", stream);
-	if (!stream)
-	{
+	if (!stream) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -2049,8 +2017,7 @@ create_host_pipeline(struct ia_css_stream *stream) {
 	/* No continuous frame allocation for capture pipe. It uses the
 	 * "main" pipe's frames. */
 	if ((pipe_id == IA_CSS_PIPE_ID_PREVIEW) ||
-	    (pipe_id == IA_CSS_PIPE_ID_VIDEO))
-	{
+	    (pipe_id == IA_CSS_PIPE_ID_VIDEO)) {
 		/* About pipe_id == IA_CSS_PIPE_ID_PREVIEW && stream->config.mode != IA_CSS_INPUT_MODE_MEMORY:
 		 * The original condition pipe_id == IA_CSS_PIPE_ID_PREVIEW is too strong. E.g. in SkyCam (with memory
 		 * based input frames) there is no continuous mode and thus no need for allocated continuous frames
@@ -2068,24 +2035,21 @@ create_host_pipeline(struct ia_css_stream *stream) {
 
 #if !defined(ISP2401)
 	/* old isys: need to allocate_mipi_frames() even in IA_CSS_PIPE_MODE_COPY */
-	if (pipe_id != IA_CSS_PIPE_ID_ACC)
-	{
+	if (pipe_id != IA_CSS_PIPE_ID_ACC) {
 		err = allocate_mipi_frames(main_pipe, &stream->info);
 		if (err)
 			goto ERR;
 	}
 #elif defined(ISP2401)
 	if ((pipe_id != IA_CSS_PIPE_ID_ACC) &&
-	    (main_pipe->config.mode != IA_CSS_PIPE_MODE_COPY))
-	{
+	    (main_pipe->config.mode != IA_CSS_PIPE_MODE_COPY)) {
 		err = allocate_mipi_frames(main_pipe, &stream->info);
 		if (err)
 			goto ERR;
 	}
 #endif
 
-	switch (pipe_id)
-	{
+	switch (pipe_id) {
 	case IA_CSS_PIPE_ID_PREVIEW:
 		copy_pipe    = main_pipe->pipe_settings.preview.copy_pipe;
 		capture_pipe = main_pipe->pipe_settings.preview.capture_pipe;
@@ -2135,31 +2099,27 @@ create_host_pipeline(struct ia_css_stream *stream) {
 	if (err)
 		goto ERR;
 
-	if (copy_pipe)
-	{
+	if (copy_pipe) {
 		err = create_host_copy_pipeline(copy_pipe, max_input_width,
 						main_pipe->continuous_frames[0]);
 		if (err)
 			goto ERR;
 	}
 
-	if (capture_pipe)
-	{
+	if (capture_pipe) {
 		err = create_host_capture_pipeline(capture_pipe);
 		if (err)
 			goto ERR;
 	}
 
-	if (acc_pipe)
-	{
+	if (acc_pipe) {
 		err = create_host_acc_pipeline(acc_pipe);
 		if (err)
 			goto ERR;
 	}
 
 	/* DH regular multi pipe - not continuous mode: create the next pipelines too */
-	if (!stream->config.continuous)
-	{
+	if (!stream->config.continuous) {
 		int i;
 
 		for (i = 1; i < stream->num_pipes && 0 == err; i++) {
@@ -2201,10 +2161,10 @@ static const struct ia_css_yuvpp_settings yuvpp = IA_CSS_DEFAULT_YUVPP_SETTINGS;
 static int
 init_pipe_defaults(enum ia_css_pipe_mode mode,
 		   struct ia_css_pipe *pipe,
-		   bool copy_pipe) {
+		   bool copy_pipe)
+{
 
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_ERROR("NULL pipe parameter");
 		return -EINVAL;
 	}
@@ -2213,18 +2173,17 @@ init_pipe_defaults(enum ia_css_pipe_mode mode,
 	memcpy(pipe, &default_pipe, sizeof(default_pipe));
 
 	/* TODO: JB should not be needed, but temporary backward reference */
-	switch (mode)
-	{
+	switch (mode) {
 	case IA_CSS_PIPE_MODE_PREVIEW:
 		pipe->mode = IA_CSS_PIPE_ID_PREVIEW;
 		memcpy(&pipe->pipe_settings.preview, &preview, sizeof(preview));
 		break;
 	case IA_CSS_PIPE_MODE_CAPTURE:
-		if (copy_pipe) {
+		if (copy_pipe)
 			pipe->mode = IA_CSS_PIPE_ID_COPY;
-		} else {
+		else
 			pipe->mode = IA_CSS_PIPE_ID_CAPTURE;
-		}
+
 		memcpy(&pipe->pipe_settings.capture, &capture, sizeof(capture));
 		break;
 	case IA_CSS_PIPE_MODE_VIDEO:
@@ -2254,27 +2213,25 @@ pipe_global_init(void)
 	u8 i;
 
 	my_css.pipe_counter = 0;
-	for (i = 0; i < IA_CSS_PIPELINE_NUM_MAX; i++) {
+	for (i = 0; i < IA_CSS_PIPELINE_NUM_MAX; i++)
 		my_css.all_pipes[i] = NULL;
-	}
 }
 
 static int
 pipe_generate_pipe_num(const struct ia_css_pipe *pipe,
-		       unsigned int *pipe_number) {
+		       unsigned int *pipe_number)
+{
 	const u8 INVALID_PIPE_NUM = (uint8_t)~(0);
 	u8 pipe_num = INVALID_PIPE_NUM;
 	u8 i;
 
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_ERROR("NULL pipe parameter");
 		return -EINVAL;
 	}
 
 	/* Assign a new pipe_num .... search for empty place */
-	for (i = 0; i < IA_CSS_PIPELINE_NUM_MAX; i++)
-	{
+	for (i = 0; i < IA_CSS_PIPELINE_NUM_MAX; i++) {
 		if (!my_css.all_pipes[i]) {
 			/*position is reserved */
 			my_css.all_pipes[i] = (struct ia_css_pipe *)pipe;
@@ -2282,8 +2239,7 @@ pipe_generate_pipe_num(const struct ia_css_pipe *pipe,
 			break;
 		}
 	}
-	if (pipe_num == INVALID_PIPE_NUM)
-	{
+	if (pipe_num == INVALID_PIPE_NUM) {
 		/* Max number of pipes already allocated */
 		IA_CSS_ERROR("Max number of pipes already created");
 		return -ENOSPC;
-- 
GitLab


From c0633711b00ae5997925f0b691acb7ec900155c4 Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Thu, 15 Apr 2021 23:41:42 +0200
Subject: [PATCH 0991/3804] media: staging: media: atomisp: Remove redundant
 assertions in sh_css.c

Remove assert() in places where the condition is already handled.

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/sh_css.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
index bb752d47457cb..01ce6005ead4e 100644
--- a/drivers/staging/media/atomisp/pci/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/sh_css.c
@@ -413,7 +413,6 @@ aspect_ratio_crop(struct ia_css_pipe *curr_pipe,
 static void
 sh_css_pipe_free_shading_table(struct ia_css_pipe *pipe)
 {
-	assert(pipe);
 	if (!pipe) {
 		IA_CSS_ERROR("NULL input parameter");
 		return;
@@ -1080,7 +1079,6 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 		}
 	}
 
-	assert(pipe);
 	if (!pipe)
 		return -EINVAL;
 
@@ -1382,8 +1380,6 @@ start_copy_on_sp(struct ia_css_pipe *pipe,
 		 struct ia_css_frame *out_frame)
 {
 	(void)out_frame;
-	assert(pipe);
-	assert(pipe->stream);
 
 	if ((!pipe) || (!pipe->stream))
 		return -EINVAL;
@@ -1837,7 +1833,6 @@ map_sp_threads(struct ia_css_stream *stream, bool map)
 	int err = 0;
 	enum ia_css_pipe_id pipe_id;
 
-	assert(stream);
 	IA_CSS_ENTER_PRIVATE("stream = %p, map = %s",
 			     stream, map ? "true" : "false");
 
@@ -1904,7 +1899,6 @@ create_host_pipeline_structure(struct ia_css_stream *stream)
 	unsigned int copy_pipe_delay = 0,
 	capture_pipe_delay = 0;
 
-	assert(stream);
 	IA_CSS_ENTER_PRIVATE("stream = %p", stream);
 
 	if (!stream) {
@@ -1913,7 +1907,6 @@ create_host_pipeline_structure(struct ia_css_stream *stream)
 	}
 
 	main_pipe	= stream->last_pipe;
-	assert(main_pipe);
 	if (!main_pipe) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
@@ -2317,7 +2310,6 @@ static void sh_css_pipe_free_acc_binaries(
 	struct ia_css_pipeline *pipeline;
 	struct ia_css_pipeline_stage *stage;
 
-	assert(pipe);
 	if (!pipe) {
 		IA_CSS_ERROR("NULL input pointer");
 		return;
-- 
GitLab


From 7394bf6d3c1e8316484fcf3a9d71daaab489f211 Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Mon, 19 Apr 2021 21:25:55 +0200
Subject: [PATCH 0992/3804] media: staging: media: atomisp: Fix the rest of
 sh_css.c brace issues

Fix the remainder of brace coding style issues.

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/sh_css.c | 1134 +++++++++-----------
 1 file changed, 498 insertions(+), 636 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
index 01ce6005ead4e..ac748da7a7ef5 100644
--- a/drivers/staging/media/atomisp/pci/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/sh_css.c
@@ -1101,8 +1101,7 @@ sh_css_config_input_network(struct ia_css_stream *stream)
 	/* get the target input terminal */
 	sp_pipeline_input_terminal = &sh_css_sp_group.pipe_io[sp_thread_id].input;
 
-	for (i = 0; i < IA_CSS_STREAM_MAX_ISYS_STREAM_PER_CH; i++)
-	{
+	for (i = 0; i < IA_CSS_STREAM_MAX_ISYS_STREAM_PER_CH; i++) {
 		/* initialization */
 		memset((void *)(&isys_stream_descr), 0, sizeof(ia_css_isys_descr_t));
 		sp_pipeline_input_terminal->context.virtual_input_system_stream[i].valid = 0;
@@ -2156,7 +2155,6 @@ init_pipe_defaults(enum ia_css_pipe_mode mode,
 		   struct ia_css_pipe *pipe,
 		   bool copy_pipe)
 {
-
 	if (!pipe) {
 		IA_CSS_ERROR("NULL pipe parameter");
 		return -EINVAL;
@@ -2258,12 +2256,12 @@ pipe_release_pipe_num(unsigned int pipe_num)
 static int
 create_pipe(enum ia_css_pipe_mode mode,
 	    struct ia_css_pipe **pipe,
-	    bool copy_pipe) {
+	    bool copy_pipe)
+{
 	int err = 0;
 	struct ia_css_pipe *me;
 
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_ERROR("NULL pipe parameter");
 		return -EINVAL;
 	}
@@ -2273,15 +2271,13 @@ create_pipe(enum ia_css_pipe_mode mode,
 		return -ENOMEM;
 
 	err = init_pipe_defaults(mode, me, copy_pipe);
-	if (err)
-	{
+	if (err) {
 		kfree(me);
 		return err;
 	}
 
 	err = pipe_generate_pipe_num(me, &me->pipe_num);
-	if (err)
-	{
+	if (err) {
 		kfree(me);
 		return err;
 	}
@@ -2326,26 +2322,24 @@ static void sh_css_pipe_free_acc_binaries(
 }
 
 int
-ia_css_pipe_destroy(struct ia_css_pipe *pipe) {
+ia_css_pipe_destroy(struct ia_css_pipe *pipe)
+{
 	int err = 0;
 
 	IA_CSS_ENTER("pipe = %p", pipe);
 
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
-	if (pipe->stream)
-	{
+	if (pipe->stream) {
 		IA_CSS_LOG("ia_css_stream_destroy not called!");
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
-	switch (pipe->config.mode)
-	{
+	switch (pipe->config.mode) {
 	case IA_CSS_PIPE_MODE_PREVIEW:
 		/* need to take into account that this function is also called
 		   on the internal copy pipe */
@@ -2409,9 +2403,8 @@ ia_css_pipe_destroy(struct ia_css_pipe *pipe) {
 
 	/* Temporarily, not every sh_css_pipe has an acc_extension. */
 	if (pipe->config.acc_extension)
-	{
 		ia_css_pipe_unload_extension(pipe, pipe->config.acc_extension);
-	}
+
 	kfree(pipe);
 	IA_CSS_LEAVE("err = %d", err);
 	return err;
@@ -2441,9 +2434,9 @@ ia_css_uninit(void)
 	ifmtr_set_if_blocking_mode_reset = true;
 #endif
 
-	if (!fw_explicitly_loaded) {
+	if (!fw_explicitly_loaded)
 		ia_css_unload_firmware();
-	}
+
 	ia_css_spctrl_unload_fw(SP0_ID);
 	sh_css_sp_set_sp_running(false);
 	/* check and free any remaining mipi frames */
@@ -2630,7 +2623,8 @@ static int load_copy_binary(
 
 static int
 alloc_continuous_frames(
-    struct ia_css_pipe *pipe, bool init_time) {
+    struct ia_css_pipe *pipe, bool init_time)
+{
 	int err = 0;
 	struct ia_css_frame_info ref_info;
 	enum ia_css_pipe_id pipe_id;
@@ -2640,8 +2634,7 @@ alloc_continuous_frames(
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p, init_time = %d", pipe, init_time);
 
-	if ((!pipe) || (!pipe->stream))
-	{
+	if ((!pipe) || (!pipe->stream)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -2649,26 +2642,22 @@ alloc_continuous_frames(
 	pipe_id = pipe->mode;
 	continuous = pipe->stream->config.continuous;
 
-	if (continuous)
-	{
+	if (continuous) {
 		if (init_time) {
 			num_frames = pipe->stream->config.init_num_cont_raw_buf;
 			pipe->stream->continuous_pipe = pipe;
-		} else
+		} else {
 			num_frames = pipe->stream->config.target_num_cont_raw_buf;
-	} else
-	{
+		}
+	} else {
 		num_frames = NUM_ONLINE_INIT_CONTINUOUS_FRAMES;
 	}
 
-	if (pipe_id == IA_CSS_PIPE_ID_PREVIEW)
-	{
+	if (pipe_id == IA_CSS_PIPE_ID_PREVIEW) {
 		ref_info = pipe->pipe_settings.preview.preview_binary.in_frame_info;
-	} else if (pipe_id == IA_CSS_PIPE_ID_VIDEO)
-	{
+	} else if (pipe_id == IA_CSS_PIPE_ID_VIDEO) {
 		ref_info = pipe->pipe_settings.video.video_binary.in_frame_info;
-	} else
-	{
+	} else {
 		/* should not happen */
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
@@ -2684,8 +2673,7 @@ alloc_continuous_frames(
 #endif
 
 #if !defined(HAS_NO_PACKED_RAW_PIXELS)
-	if (pipe->stream->config.pack_raw_pixels)
-	{
+	if (pipe->stream->config.pack_raw_pixels) {
 		ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
 				    "alloc_continuous_frames() IA_CSS_FRAME_FORMAT_RAW_PACKED\n");
 		ref_info.format = IA_CSS_FRAME_FORMAT_RAW_PACKED;
@@ -2714,8 +2702,7 @@ alloc_continuous_frames(
 	else
 		idx = pipe->stream->config.init_num_cont_raw_buf;
 
-	for (i = idx; i < NUM_CONTINUOUS_FRAMES; i++)
-	{
+	for (i = idx; i < NUM_CONTINUOUS_FRAMES; i++) {
 		/* free previous frame */
 		if (pipe->continuous_frames[i]) {
 			ia_css_frame_free(pipe->continuous_frames[i]);
@@ -2745,14 +2732,16 @@ alloc_continuous_frames(
 }
 
 int
-ia_css_alloc_continuous_frame_remain(struct ia_css_stream *stream) {
+ia_css_alloc_continuous_frame_remain(struct ia_css_stream *stream)
+{
 	if (!stream)
 		return -EINVAL;
 	return alloc_continuous_frames(stream->continuous_pipe, false);
 }
 
 static int
-load_preview_binaries(struct ia_css_pipe *pipe) {
+load_preview_binaries(struct ia_css_pipe *pipe)
+{
 	struct ia_css_frame_info prev_in_info,
 		prev_bds_out_info,
 		prev_out_info,
@@ -2860,8 +2849,7 @@ load_preview_binaries(struct ia_css_pipe *pipe) {
 	 * then the preview binary selection is done again.
 	 */
 	if (need_vf_pp &&
-	    (mycs->preview_binary.out_frame_info[0].format != IA_CSS_FRAME_FORMAT_YUV_LINE))
-	{
+	    (mycs->preview_binary.out_frame_info[0].format != IA_CSS_FRAME_FORMAT_YUV_LINE)) {
 		/* Preview step 2 */
 		if (pipe->vf_yuv_ds_input_info.res.width)
 			prev_vf_info = pipe->vf_yuv_ds_input_info;
@@ -2886,8 +2874,7 @@ load_preview_binaries(struct ia_css_pipe *pipe) {
 			return err;
 	}
 
-	if (need_vf_pp)
-	{
+	if (need_vf_pp) {
 		struct ia_css_binary_descr vf_pp_descr;
 
 		/* Viewfinder post-processing */
@@ -2918,8 +2905,7 @@ load_preview_binaries(struct ia_css_pipe *pipe) {
 #endif
 
 	/* Copy */
-	if (need_isp_copy_binary)
-	{
+	if (need_isp_copy_binary) {
 		err = load_copy_binary(pipe,
 				       &mycs->copy_binary,
 				       &mycs->preview_binary);
@@ -2927,8 +2913,7 @@ load_preview_binaries(struct ia_css_pipe *pipe) {
 			return err;
 	}
 
-	if (pipe->shading_table)
-	{
+	if (pipe->shading_table) {
 		ia_css_shading_table_free(pipe->shading_table);
 		pipe->shading_table = NULL;
 	}
@@ -2943,11 +2928,11 @@ ia_css_binary_unload(struct ia_css_binary *binary)
 }
 
 static int
-unload_preview_binaries(struct ia_css_pipe *pipe) {
+unload_preview_binaries(struct ia_css_pipe *pipe)
+{
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
 
-	if ((!pipe) || (pipe->mode != IA_CSS_PIPE_ID_PREVIEW))
-	{
+	if ((!pipe) || (pipe->mode != IA_CSS_PIPE_ID_PREVIEW)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -3000,15 +2985,15 @@ static int add_firmwares(
 		struct ia_css_frame *in = NULL;
 		struct ia_css_frame *vf = NULL;
 
-		if ((fw == last_fw) && (fw->info.isp.sp.enable.out_frame  != 0)) {
+		if ((fw == last_fw) && (fw->info.isp.sp.enable.out_frame  != 0))
 			out[0] = out_frame;
-		}
-		if (fw->info.isp.sp.enable.in_frame != 0) {
+
+		if (fw->info.isp.sp.enable.in_frame != 0)
 			in = in_frame;
-		}
-		if (fw->info.isp.sp.enable.out_frame != 0) {
+
+		if (fw->info.isp.sp.enable.out_frame != 0)
 			vf = vf_frame;
-		}
+
 		ia_css_pipe_get_firmwares_stage_desc(&stage_desc, binary,
 						     out, in, vf, fw, binary_mode);
 		err = ia_css_pipeline_create_and_add_stage(me,
@@ -3222,7 +3207,8 @@ static void sh_css_setup_queues(void)
 
 static int
 init_vf_frameinfo_defaults(struct ia_css_pipe *pipe,
-			   struct ia_css_frame *vf_frame, unsigned int idx) {
+			   struct ia_css_frame *vf_frame, unsigned int idx)
+{
 	int err = 0;
 	unsigned int thread_id;
 	enum sh_css_queue_id queue_id;
@@ -3387,7 +3373,8 @@ ia_css_get_crop_offsets(
 
 static int
 init_in_frameinfo_memory_defaults(struct ia_css_pipe *pipe,
-				  struct ia_css_frame *frame, enum ia_css_frame_format format) {
+				  struct ia_css_frame *frame, enum ia_css_frame_format format)
+{
 	struct ia_css_frame *in_frame;
 	int err = 0;
 	unsigned int thread_id;
@@ -3428,7 +3415,8 @@ init_in_frameinfo_memory_defaults(struct ia_css_pipe *pipe,
 
 static int
 init_out_frameinfo_defaults(struct ia_css_pipe *pipe,
-			    struct ia_css_frame *out_frame, unsigned int idx) {
+			    struct ia_css_frame *out_frame, unsigned int idx)
+{
 	int err = 0;
 	unsigned int thread_id;
 	enum sh_css_queue_id queue_id;
@@ -3659,14 +3647,14 @@ ERR:
 }
 
 static int
-create_host_acc_pipeline(struct ia_css_pipe *pipe) {
+create_host_acc_pipeline(struct ia_css_pipe *pipe)
+{
 	int err = 0;
 	const struct ia_css_fw_info *fw;
 	unsigned int i;
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
-	if ((!pipe) || (!pipe->stream))
-	{
+	if ((!pipe) || (!pipe->stream)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -3677,15 +3665,13 @@ create_host_acc_pipeline(struct ia_css_pipe *pipe) {
 		pipe->pipeline.pipe_qos_config = 0;
 
 	fw = pipe->vf_stage;
-	for (i = 0; fw; fw = fw->next)
-	{
+	for (i = 0; fw; fw = fw->next) {
 		err = sh_css_pipeline_add_acc_stage(&pipe->pipeline, fw);
 		if (err)
 			goto ERR;
 	}
 
-	for (i = 0; i < pipe->config.num_acc_stages; i++)
-	{
+	for (i = 0; i < pipe->config.num_acc_stages; i++) {
 		struct ia_css_fw_info *fw = pipe->config.acc_stages[i];
 
 		err = sh_css_pipeline_add_acc_stage(&pipe->pipeline, fw);
@@ -3702,7 +3688,8 @@ ERR:
 
 /* Create stages for preview */
 static int
-create_host_preview_pipeline(struct ia_css_pipe *pipe) {
+create_host_preview_pipeline(struct ia_css_pipe *pipe)
+{
 	struct ia_css_pipeline_stage *copy_stage = NULL;
 	struct ia_css_pipeline_stage *preview_stage = NULL;
 	struct ia_css_pipeline_stage *vf_pp_stage = NULL;
@@ -3722,8 +3709,7 @@ create_host_preview_pipeline(struct ia_css_pipe *pipe) {
 #endif
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
-	if ((!pipe) || (!pipe->stream) || (pipe->mode != IA_CSS_PIPE_ID_PREVIEW))
-	{
+	if ((!pipe) || (!pipe->stream) || (pipe->mode != IA_CSS_PIPE_ID_PREVIEW)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -3751,16 +3737,14 @@ create_host_preview_pipeline(struct ia_css_pipe *pipe) {
 	/* Construct in_frame info (only in case we have dynamic input */
 	need_in_frameinfo_memory = pipe->stream->config.mode == IA_CSS_INPUT_MODE_MEMORY;
 #endif
-	if (need_in_frameinfo_memory)
-	{
+	if (need_in_frameinfo_memory) {
 		err = init_in_frameinfo_memory_defaults(pipe, &me->in_frame,
 							IA_CSS_FRAME_FORMAT_RAW);
 		if (err)
 			goto ERR;
 
 		in_frame = &me->in_frame;
-	} else
-	{
+	} else {
 		in_frame = NULL;
 	}
 
@@ -3774,8 +3758,7 @@ create_host_preview_pipeline(struct ia_css_pipe *pipe) {
 	if (pipe->pipe_settings.preview.vf_pp_binary.info)
 		vf_pp_binary = &pipe->pipe_settings.preview.vf_pp_binary;
 
-	if (pipe->pipe_settings.preview.copy_binary.info)
-	{
+	if (pipe->pipe_settings.preview.copy_binary.info) {
 		ia_css_pipe_util_set_output_frames(out_frames, 0, NULL);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, copy_binary,
 						   out_frames, NULL, NULL);
@@ -3790,21 +3773,19 @@ create_host_preview_pipeline(struct ia_css_pipe *pipe) {
 		/* When continuous is enabled, configure in_frame with the
 		 * last pipe, which is the copy pipe.
 		 */
-		if (continuous || !online) {
+		if (continuous || !online)
 			in_frame = pipe->stream->last_pipe->continuous_frames[0];
-		}
+
 #else
 		in_frame = pipe->continuous_frames[0];
 #endif
 	}
 
-	if (vf_pp_binary)
-	{
+	if (vf_pp_binary) {
 		ia_css_pipe_util_set_output_frames(out_frames, 0, NULL);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, preview_binary,
 						   out_frames, in_frame, NULL);
-	} else
-	{
+	} else {
 		ia_css_pipe_util_set_output_frames(out_frames, 0, out_frame);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, preview_binary,
 						   out_frames, in_frame, NULL);
@@ -3818,14 +3799,12 @@ create_host_preview_pipeline(struct ia_css_pipe *pipe) {
 	preview_stage->args.copy_vf =
 	    preview_binary->info->sp.pipeline.mode == IA_CSS_BINARY_MODE_COPY;
 	preview_stage->args.copy_output = !preview_stage->args.copy_vf;
-	if (preview_stage->args.copy_vf && !preview_stage->args.out_vf_frame)
-	{
+	if (preview_stage->args.copy_vf && !preview_stage->args.out_vf_frame) {
 		/* in case of copy, use the vf frame as output frame */
 		preview_stage->args.out_vf_frame =
 		    preview_stage->args.out_frame[0];
 	}
-	if (vf_pp_binary)
-	{
+	if (vf_pp_binary) {
 		if (preview_binary->info->sp.pipeline.mode == IA_CSS_BINARY_MODE_COPY)
 			in_frame = preview_stage->args.out_vf_frame;
 		else
@@ -3865,7 +3844,8 @@ static void send_raw_frames(struct ia_css_pipe *pipe)
 }
 
 static int
-preview_start(struct ia_css_pipe *pipe) {
+preview_start(struct ia_css_pipe *pipe)
+{
 	int err = 0;
 	struct ia_css_pipe *copy_pipe, *capture_pipe;
 	struct ia_css_pipe *acc_pipe;
@@ -3875,8 +3855,7 @@ preview_start(struct ia_css_pipe *pipe) {
 	const struct ia_css_isp_parameters *params = NULL;
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
-	if ((!pipe) || (!pipe->stream) || (pipe->mode != IA_CSS_PIPE_ID_PREVIEW))
-	{
+	if ((!pipe) || (!pipe->stream) || (pipe->mode != IA_CSS_PIPE_ID_PREVIEW)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -3903,8 +3882,7 @@ preview_start(struct ia_css_pipe *pipe) {
 		ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &thread_id);
 		copy_ovrd = 1 << thread_id;
 
-		if (pipe->stream->cont_capt)
-		{
+		if (pipe->stream->cont_capt) {
 			ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(capture_pipe),
 							 &thread_id);
 			copy_ovrd |= 1 << thread_id;
@@ -3917,8 +3895,7 @@ preview_start(struct ia_css_pipe *pipe) {
 	}
 
 	/* Construct and load the copy pipe */
-	if (pipe->stream->config.continuous)
-	{
+	if (pipe->stream->config.continuous) {
 		sh_css_sp_init_pipeline(&copy_pipe->pipeline,
 					IA_CSS_PIPE_ID_COPY,
 					(uint8_t)ia_css_pipe_get_pipe_num(copy_pipe),
@@ -3939,8 +3916,7 @@ preview_start(struct ia_css_pipe *pipe) {
 	}
 
 	/* Construct and load the capture pipe */
-	if (pipe->stream->cont_capt)
-	{
+	if (pipe->stream->cont_capt) {
 		sh_css_sp_init_pipeline(&capture_pipe->pipeline,
 					IA_CSS_PIPE_ID_CAPTURE,
 					(uint8_t)ia_css_pipe_get_pipe_num(capture_pipe),
@@ -3958,8 +3934,7 @@ preview_start(struct ia_css_pipe *pipe) {
 					params);
 	}
 
-	if (acc_pipe)
-	{
+	if (acc_pipe) {
 		sh_css_sp_init_pipeline(&acc_pipe->pipeline,
 					IA_CSS_PIPE_ID_ACC,
 					(uint8_t)ia_css_pipe_get_pipe_num(acc_pipe),
@@ -3985,7 +3960,8 @@ preview_start(struct ia_css_pipe *pipe) {
 
 int
 ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
-			   const struct ia_css_buffer *buffer) {
+			   const struct ia_css_buffer *buffer)
+{
 	int return_err = 0;
 	unsigned int thread_id;
 	enum sh_css_queue_id queue_id;
@@ -4000,8 +3976,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 
 	IA_CSS_ENTER("pipe=%p, buffer=%p", pipe, buffer);
 
-	if ((!pipe) || (!buffer))
-	{
+	if ((!pipe) || (!buffer)) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
@@ -4010,8 +3985,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 	/* following code will be enabled when IA_CSS_BUFFER_TYPE_SEC_OUTPUT_FRAME
 	   is removed */
 #if 0
-	if (buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME)
-	{
+	if (buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME) {
 		bool found_pipe = false;
 
 		for (i = 0; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++) {
@@ -4025,8 +3999,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 		if (!found_pipe)
 			return -EINVAL;
 	}
-	if (buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME)
-	{
+	if (buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME) {
 		bool found_pipe = false;
 
 		for (i = 0; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++) {
@@ -4049,34 +4022,29 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 	assert(buf_type < IA_CSS_NUM_DYNAMIC_BUFFER_TYPE);
 	if ((buf_type == IA_CSS_BUFFER_TYPE_INVALID) ||
 	    (buf_type >= IA_CSS_NUM_DYNAMIC_BUFFER_TYPE) ||
-	    (pipe_id >= IA_CSS_PIPE_ID_NUM))
-	{
+	    (pipe_id >= IA_CSS_PIPE_ID_NUM)) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
 	ret_err = ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &thread_id);
-	if (!ret_err)
-	{
+	if (!ret_err) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
 	ret_err = ia_css_query_internal_queue_id(buf_type, thread_id, &queue_id);
-	if (!ret_err)
-	{
+	if (!ret_err) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
-	if ((queue_id <= SH_CSS_INVALID_QUEUE_ID) || (queue_id >= SH_CSS_MAX_NUM_QUEUES))
-	{
+	if ((queue_id <= SH_CSS_INVALID_QUEUE_ID) || (queue_id >= SH_CSS_MAX_NUM_QUEUES)) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
-	if (!sh_css_sp_is_running())
-	{
+	if (!sh_css_sp_is_running()) {
 		IA_CSS_LOG("SP is not running!");
 		IA_CSS_LEAVE_ERR(-EBUSY);
 		/* SP is not running. The queues are not valid */
@@ -4094,24 +4062,21 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 	ddr_buffer.cookie_ptr = buffer->driver_cookie;
 	ddr_buffer.timing_data = buffer->timing_data;
 
-	if (buf_type == IA_CSS_BUFFER_TYPE_3A_STATISTICS)
-	{
+	if (buf_type == IA_CSS_BUFFER_TYPE_3A_STATISTICS) {
 		if (!buffer->data.stats_3a) {
 			IA_CSS_LEAVE_ERR(-EINVAL);
 			return -EINVAL;
 		}
 		ddr_buffer.kernel_ptr = HOST_ADDRESS(buffer->data.stats_3a);
 		ddr_buffer.payload.s3a = *buffer->data.stats_3a;
-	} else if (buf_type == IA_CSS_BUFFER_TYPE_DIS_STATISTICS)
-	{
+	} else if (buf_type == IA_CSS_BUFFER_TYPE_DIS_STATISTICS) {
 		if (!buffer->data.stats_dvs) {
 			IA_CSS_LEAVE_ERR(-EINVAL);
 			return -EINVAL;
 		}
 		ddr_buffer.kernel_ptr = HOST_ADDRESS(buffer->data.stats_dvs);
 		ddr_buffer.payload.dis = *buffer->data.stats_dvs;
-	} else if (buf_type == IA_CSS_BUFFER_TYPE_METADATA)
-	{
+	} else if (buf_type == IA_CSS_BUFFER_TYPE_METADATA) {
 		if (!buffer->data.metadata) {
 			IA_CSS_LEAVE_ERR(-EINVAL);
 			return -EINVAL;
@@ -4122,8 +4087,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 		   || (buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME)
 		   || (buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME)
 		   || (buf_type == IA_CSS_BUFFER_TYPE_SEC_OUTPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME))
-	{
+		   || (buf_type == IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME)) {
 		if (!buffer->data.frame) {
 			IA_CSS_LEAVE_ERR(-EINVAL);
 			return -EINVAL;
@@ -4158,8 +4122,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 	assert(h_vbuf);
 	assert(h_vbuf->vptr != 0x0);
 
-	if ((!h_vbuf) || (h_vbuf->vptr == 0x0))
-	{
+	if ((!h_vbuf) || (h_vbuf->vptr == 0x0)) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
@@ -4169,8 +4132,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 		   sizeof(struct sh_css_hmm_buffer));
 	if ((buf_type == IA_CSS_BUFFER_TYPE_3A_STATISTICS)
 	    || (buf_type == IA_CSS_BUFFER_TYPE_DIS_STATISTICS)
-	    || (buf_type == IA_CSS_BUFFER_TYPE_LACE_STATISTICS))
-	{
+	    || (buf_type == IA_CSS_BUFFER_TYPE_LACE_STATISTICS)) {
 		if (!pipeline) {
 			ia_css_rmgr_rel_vbuf(hmm_buffer_pool, &h_vbuf);
 			IA_CSS_LOG("pipeline is empty!");
@@ -4193,8 +4155,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 		   || (buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME)
 		   || (buf_type == IA_CSS_BUFFER_TYPE_SEC_OUTPUT_FRAME)
 		   || (buf_type == IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_METADATA))
-	{
+		   || (buf_type == IA_CSS_BUFFER_TYPE_METADATA)) {
 		return_err = ia_css_bufq_enqueue_buffer(thread_id,
 							queue_id,
 							(uint32_t)h_vbuf->vptr);
@@ -4208,8 +4169,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 #endif
 	}
 
-	if (!return_err)
-	{
+	if (!return_err) {
 		if (sh_css_hmm_buffer_record_acquire(
 			h_vbuf, buf_type,
 			HOST_ADDRESS(ddr_buffer.kernel_ptr))) {
@@ -4224,8 +4184,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 	 * Tell the SP which queues are not empty,
 	 * by sending the software event.
 	 */
-	if (!return_err)
-	{
+	if (!return_err) {
 		if (!sh_css_sp_is_running()) {
 			/* SP is not running. The queues are not valid */
 			IA_CSS_LOG("SP is not running!");
@@ -4237,8 +4196,7 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 				 (uint8_t)thread_id,
 				 queue_id,
 				 0);
-	} else
-	{
+	} else {
 		ia_css_rmgr_rel_vbuf(hmm_buffer_pool, &h_vbuf);
 		IA_CSS_ERROR("buffer not enqueued");
 	}
@@ -4253,7 +4211,8 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 	 */
 int
 ia_css_pipe_dequeue_buffer(struct ia_css_pipe *pipe,
-			   struct ia_css_buffer *buffer) {
+			   struct ia_css_buffer *buffer)
+{
 	int return_err;
 	enum sh_css_queue_id queue_id;
 	ia_css_ptr ddr_buffer_addr = (ia_css_ptr)0;
@@ -4266,8 +4225,7 @@ ia_css_pipe_dequeue_buffer(struct ia_css_pipe *pipe,
 
 	IA_CSS_ENTER("pipe=%p, buffer=%p", pipe, buffer);
 
-	if ((!pipe) || (!buffer))
-	{
+	if ((!pipe) || (!buffer)) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
@@ -4281,27 +4239,23 @@ ia_css_pipe_dequeue_buffer(struct ia_css_pipe *pipe,
 	ddr_buffer.kernel_ptr = 0;
 
 	ret_err = ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &thread_id);
-	if (!ret_err)
-	{
+	if (!ret_err) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
 	ret_err = ia_css_query_internal_queue_id(buf_type, thread_id, &queue_id);
-	if (!ret_err)
-	{
+	if (!ret_err) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
-	if ((queue_id <= SH_CSS_INVALID_QUEUE_ID) || (queue_id >= SH_CSS_MAX_NUM_QUEUES))
-	{
+	if ((queue_id <= SH_CSS_INVALID_QUEUE_ID) || (queue_id >= SH_CSS_MAX_NUM_QUEUES)) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
-	if (!sh_css_sp_is_running())
-	{
+	if (!sh_css_sp_is_running()) {
 		IA_CSS_LOG("SP is not running!");
 		IA_CSS_LEAVE_ERR(-EBUSY);
 		/* SP is not running. The queues are not valid */
@@ -4311,8 +4265,7 @@ ia_css_pipe_dequeue_buffer(struct ia_css_pipe *pipe,
 	return_err = ia_css_bufq_dequeue_buffer(queue_id,
 						(uint32_t *)&ddr_buffer_addr);
 
-	if (!return_err)
-	{
+	if (!return_err) {
 		struct ia_css_frame *frame;
 		struct sh_css_hmm_buffer_record *hmm_buffer_record = NULL;
 
@@ -4454,8 +4407,7 @@ ia_css_pipe_dequeue_buffer(struct ia_css_pipe *pipe,
 	 * Tell the SP which queues are not full,
 	 * by sending the software event.
 	 */
-	if (!return_err)
-	{
+	if (!return_err) {
 		if (!sh_css_sp_is_running()) {
 			IA_CSS_LOG("SP is not running!");
 			IA_CSS_LEAVE_ERR(-EBUSY);
@@ -4504,12 +4456,14 @@ static enum ia_css_event_type convert_event_sp_to_host_domain[] = {
 };
 
 int
-ia_css_dequeue_event(struct ia_css_event *event) {
+ia_css_dequeue_event(struct ia_css_event *event)
+{
 	return ia_css_dequeue_psys_event(event);
 }
 
 int
-ia_css_dequeue_psys_event(struct ia_css_event *event) {
+ia_css_dequeue_psys_event(struct ia_css_event *event)
+{
 	enum ia_css_pipe_id pipe_id = 0;
 	u8 payload[4] = {0, 0, 0, 0};
 	int ret_err;
@@ -4524,11 +4478,9 @@ ia_css_dequeue_psys_event(struct ia_css_event *event) {
 	if (!event)
 		return -EINVAL;
 
+	/* SP is not running. The queues are not valid */
 	if (!sh_css_sp_is_running())
-	{
-		/* SP is not running. The queues are not valid */
 		return -EBUSY;
-	}
 
 	/* dequeue the event (if any) from the psys event queue */
 	ret_err = ia_css_bufq_dequeue_psys_event(payload);
@@ -4555,8 +4507,7 @@ ia_css_dequeue_psys_event(struct ia_css_event *event) {
 	event->timer_code = 0;
 	event->timer_subcode = 0;
 
-	if (event->type == IA_CSS_EVENT_TYPE_TIMER)
-	{
+	if (event->type == IA_CSS_EVENT_TYPE_TIMER) {
 		/* timer event ??? get the 2nd event and decode the data into the event struct */
 		u32 tmp_data;
 		/* 1st event: LSB 16-bit timer data and code */
@@ -4580,37 +4531,32 @@ ia_css_dequeue_psys_event(struct ia_css_event *event) {
 			tmp_data = ((payload[1] & 0xFF) | ((payload[3] & 0xFF) << 8));
 			event->timer_data |= (tmp_data << 16);
 			event->timer_subcode = payload[2];
-		}
+		} else {
 		/* It's a non timer event. So clear first half of the timer event data.
 		* If the second part of the TIMER event is not received, we discard
 		* the first half of the timer data and process the non timer event without
 		* affecting the flow. So the non timer event falls through
 		* the code. */
-		else {
 			event->timer_data = 0;
 			event->timer_code = 0;
 			event->timer_subcode = 0;
 			IA_CSS_ERROR("Missing 2nd timer event. Timer event discarded");
 		}
 	}
-	if (event->type == IA_CSS_EVENT_TYPE_PORT_EOF)
-	{
+	if (event->type == IA_CSS_EVENT_TYPE_PORT_EOF) {
 		event->port = (enum mipi_port_id)payload[1];
 		event->exp_id = payload[3];
-	} else if (event->type == IA_CSS_EVENT_TYPE_FW_WARNING)
-	{
+	} else if (event->type == IA_CSS_EVENT_TYPE_FW_WARNING) {
 		event->fw_warning = (enum ia_css_fw_warning)payload[1];
 		/* exp_id is only available in these warning types */
 		if (event->fw_warning == IA_CSS_FW_WARNING_EXP_ID_LOCKED ||
 		    event->fw_warning == IA_CSS_FW_WARNING_TAG_EXP_ID_FAILED)
 			event->exp_id = payload[3];
-	} else if (event->type == IA_CSS_EVENT_TYPE_FW_ASSERT)
-	{
+	} else if (event->type == IA_CSS_EVENT_TYPE_FW_ASSERT) {
 		event->fw_assert_module_id = payload[1]; /* module */
 		event->fw_assert_line_no = (payload[2] << 8) + payload[3];
 		/* payload[2] is line_no>>8, payload[3] is line_no&0xff */
-	} else if (event->type != IA_CSS_EVENT_TYPE_TIMER)
-	{
+	} else if (event->type != IA_CSS_EVENT_TYPE_TIMER) {
 		/* pipe related events.
 		 * payload[1] contains the pipe_num,
 		 * payload[2] contains the pipe_id. These are different. */
@@ -4660,7 +4606,8 @@ ia_css_dequeue_psys_event(struct ia_css_event *event) {
 }
 
 int
-ia_css_dequeue_isys_event(struct ia_css_event *event) {
+ia_css_dequeue_isys_event(struct ia_css_event *event)
+{
 	u8 payload[4] = {0, 0, 0, 0};
 	int err = 0;
 
@@ -4670,11 +4617,9 @@ ia_css_dequeue_isys_event(struct ia_css_event *event) {
 	if (!event)
 		return -EINVAL;
 
+	/* SP is not running. The queues are not valid */
 	if (!sh_css_sp_is_running())
-	{
-		/* SP is not running. The queues are not valid */
 		return -EBUSY;
-	}
 
 	err = ia_css_bufq_dequeue_isys_event(payload);
 	if (err)
@@ -4707,7 +4652,8 @@ acc_start(struct ia_css_pipe *pipe)
 }
 
 static int
-sh_css_pipe_start(struct ia_css_stream *stream) {
+sh_css_pipe_start(struct ia_css_stream *stream)
+{
 	int err = 0;
 
 	struct ia_css_pipe *pipe;
@@ -4716,22 +4662,19 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 
 	IA_CSS_ENTER_PRIVATE("stream = %p", stream);
 
-	if (!stream)
-	{
+	if (!stream) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 	pipe = stream->last_pipe;
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
 
 	pipe_id = pipe->mode;
 
-	if (stream->started)
-	{
+	if (stream->started) {
 		IA_CSS_WARNING("Cannot start stream that is already started");
 		IA_CSS_LEAVE_ERR(err);
 		return err;
@@ -4739,8 +4682,7 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 
 	pipe->stop_requested = false;
 
-	switch (pipe_id)
-	{
+	switch (pipe_id) {
 	case IA_CSS_PIPE_ID_PREVIEW:
 		err = preview_start(pipe);
 		break;
@@ -4760,8 +4702,7 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 		err = -EINVAL;
 	}
 	/* DH regular multi pipe - not continuous mode: start the next pipes too */
-	if (!stream->config.continuous)
-	{
+	if (!stream->config.continuous) {
 		int i;
 
 		for (i = 1; i < stream->num_pipes && 0 == err ; i++) {
@@ -4791,8 +4732,7 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 			}
 		}
 	}
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
 	}
@@ -4802,8 +4742,7 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 	 * don't use ISP parameters anyway. So this should be okay.
 	 * The SP binary (jpeg) copy does not use any parameters.
 	 */
-	if (!copy_on_sp(pipe))
-	{
+	if (!copy_on_sp(pipe)) {
 		sh_css_invalidate_params(stream);
 		err = sh_css_param_update_isp_params(pipe,
 						     stream->isp_params_configs, true, NULL);
@@ -4817,8 +4756,7 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 
 	ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &thread_id);
 
-	if (!sh_css_sp_is_running())
-	{
+	if (!sh_css_sp_is_running()) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EBUSY);
 		/* SP is not running. The queues are not valid */
 		return -EBUSY;
@@ -4827,8 +4765,7 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 				       (uint8_t)thread_id, 0, 0);
 
 	/* DH regular multi pipe - not continuous mode: enqueue event to the next pipes too */
-	if (!stream->config.continuous)
-	{
+	if (!stream->config.continuous) {
 		int i;
 
 		for (i = 1; i < stream->num_pipes; i++) {
@@ -4842,8 +4779,7 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 	}
 
 	/* in case of continuous capture mode, we also start capture thread and copy thread*/
-	if (pipe->stream->config.continuous)
-	{
+	if (pipe->stream->config.continuous) {
 		struct ia_css_pipe *copy_pipe = NULL;
 
 		if (pipe_id == IA_CSS_PIPE_ID_PREVIEW)
@@ -4862,8 +4798,7 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 		    IA_CSS_PSYS_SW_EVENT_START_STREAM,
 		    (uint8_t)thread_id, 0,  0);
 	}
-	if (pipe->stream->cont_capt)
-	{
+	if (pipe->stream->cont_capt) {
 		struct ia_css_pipe *capture_pipe = NULL;
 
 		if (pipe_id == IA_CSS_PIPE_ID_PREVIEW)
@@ -4884,8 +4819,7 @@ sh_css_pipe_start(struct ia_css_stream *stream) {
 	}
 
 	/* in case of PREVIEW mode, check whether QOS acc_pipe is available, then start the qos pipe */
-	if (pipe_id == IA_CSS_PIPE_ID_PREVIEW)
-	{
+	if (pipe_id == IA_CSS_PIPE_ID_PREVIEW) {
 		struct ia_css_pipe *acc_pipe = NULL;
 
 		acc_pipe = pipe->pipe_settings.preview.acc_pipe;
@@ -4936,7 +4870,8 @@ sh_css_continuous_is_enabled(uint8_t pipe_num)
 /* ISP2400 */
 int
 ia_css_stream_get_max_buffer_depth(struct ia_css_stream *stream,
-				   int *buffer_depth) {
+				   int *buffer_depth)
+{
 	if (!buffer_depth)
 		return -EINVAL;
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_stream_get_max_buffer_depth() enter: void\n");
@@ -4946,7 +4881,8 @@ ia_css_stream_get_max_buffer_depth(struct ia_css_stream *stream,
 }
 
 int
-ia_css_stream_set_buffer_depth(struct ia_css_stream *stream, int buffer_depth) {
+ia_css_stream_set_buffer_depth(struct ia_css_stream *stream, int buffer_depth)
+{
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_stream_set_buffer_depth() enter: num_frames=%d\n", buffer_depth);
 	(void)stream;
 	if (buffer_depth > NUM_CONTINUOUS_FRAMES || buffer_depth < 1)
@@ -4960,7 +4896,8 @@ ia_css_stream_set_buffer_depth(struct ia_css_stream *stream, int buffer_depth) {
 /* ISP2401 */
 int
 ia_css_stream_get_buffer_depth(struct ia_css_stream *stream,
-			       int *buffer_depth) {
+			       int *buffer_depth)
+{
 	if (!buffer_depth)
 		return -EINVAL;
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_stream_get_buffer_depth() enter: void\n");
@@ -4985,8 +4922,7 @@ sh_css_pipes_stop(struct ia_css_stream *stream)
 	int i;
 
 	assert(stream);
-	if (!stream)
-	{
+	if (!stream) {
 		IA_CSS_LOG("stream does NOT exist!");
 		err = -EINVAL;
 		goto ERR;
@@ -4994,8 +4930,7 @@ sh_css_pipes_stop(struct ia_css_stream *stream)
 
 	main_pipe = stream->last_pipe;
 	assert(main_pipe);
-	if (!main_pipe)
-	{
+	if (!main_pipe) {
 		IA_CSS_LOG("main_pipe does NOT exist!");
 		err = -EINVAL;
 		goto ERR;
@@ -5008,8 +4943,7 @@ sh_css_pipes_stop(struct ia_css_stream *stream)
 	 * Stop all "ia_css_pipe" instances in this target
 	 * "ia_css_stream" instance.
 	 */
-	for (i = 0; i < stream->num_pipes; i++)
-	{
+	for (i = 0; i < stream->num_pipes; i++) {
 		/* send the "stop" request to the "ia_css_pipe" instance */
 		IA_CSS_LOG("Send the stop-request to the pipe: pipe_id=%d",
 			stream->pipes[i]->pipeline.pipe_id);
@@ -5043,8 +4977,7 @@ sh_css_pipes_stop(struct ia_css_stream *stream)
 	 *
 	 * We need to stop this "Copy Pipe", as well.
 	 */
-	if (main_pipe->stream->config.continuous)
-	{
+	if (main_pipe->stream->config.continuous) {
 		struct ia_css_pipe *copy_pipe = NULL;
 
 		/* get the reference to "Copy Pipe" */
@@ -5220,8 +5153,7 @@ sh_css_pipe_get_shading_info(struct ia_css_pipe *pipe,
 
 	binary = ia_css_pipe_get_shading_correction_binary(pipe);
 
-	if (binary)
-	{
+	if (binary) {
 		err = ia_css_binary_get_shading_info(binary,
 						     IA_CSS_SHADING_CORRECTION_TYPE_1,
 						     pipe->required_bds_factor,
@@ -5231,8 +5163,7 @@ sh_css_pipe_get_shading_info(struct ia_css_pipe *pipe,
 		/* Other function calls can be added here when other shading correction types will be added
 		 * in the future.
 		 */
-	} else
-	{
+	} else {
 		/* When the pipe does not have a binary which has the shading
 		 * correction, this function does not need to fill the shading
 		 * information. It is not a error case, and then
@@ -5245,7 +5176,8 @@ sh_css_pipe_get_shading_info(struct ia_css_pipe *pipe,
 
 static int
 sh_css_pipe_get_grid_info(struct ia_css_pipe *pipe,
-			  struct ia_css_grid_info *info) {
+			  struct ia_css_grid_info *info)
+{
 	int err = 0;
 	struct ia_css_binary *binary = NULL;
 
@@ -5256,30 +5188,27 @@ sh_css_pipe_get_grid_info(struct ia_css_pipe *pipe,
 
 	binary = ia_css_pipe_get_s3a_binary(pipe);
 
-	if (binary)
-	{
+	if (binary) {
 		err = ia_css_binary_3a_grid_info(binary, info, pipe);
 		if (err)
 			goto ERR;
-	} else
+	} else {
 		memset(&info->s3a_grid, 0, sizeof(info->s3a_grid));
+	}
 
 	binary = ia_css_pipe_get_sdis_binary(pipe);
 
-	if (binary)
-	{
+	if (binary) {
 		ia_css_binary_dvs_grid_info(binary, info, pipe);
 		ia_css_binary_dvs_stat_grid_info(binary, info, pipe);
-	} else
-	{
+	} else {
 		memset(&info->dvs_grid.dvs_grid_info, 0,
 		       sizeof(info->dvs_grid.dvs_grid_info));
 		memset(&info->dvs_grid.dvs_stat_grid_info, 0,
 		       sizeof(info->dvs_grid.dvs_stat_grid_info));
 	}
 
-	if (binary)
-	{
+	if (binary) {
 		/* copy pipe does not have ISP binary*/
 		info->isp_in_width = binary->internal_frame_info.res.width;
 		info->isp_in_height = binary->internal_frame_info.res.height;
@@ -5299,7 +5228,8 @@ ERR :
  */
 static int
 ia_css_pipe_check_format(struct ia_css_pipe *pipe,
-			 enum ia_css_frame_format format) {
+			 enum ia_css_frame_format format)
+{
 	const enum ia_css_frame_format *supported_formats;
 	int number_of_formats;
 	int found = 0;
@@ -5307,8 +5237,7 @@ ia_css_pipe_check_format(struct ia_css_pipe *pipe,
 
 	IA_CSS_ENTER_PRIVATE("");
 
-	if (NULL == pipe || NULL == pipe->pipe_settings.video.video_binary.info)
-	{
+	if (NULL == pipe || NULL == pipe->pipe_settings.video.video_binary.info) {
 		IA_CSS_ERROR("Pipe or binary info is not set");
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
@@ -5317,15 +5246,13 @@ ia_css_pipe_check_format(struct ia_css_pipe *pipe,
 	supported_formats = pipe->pipe_settings.video.video_binary.info->output_formats;
 	number_of_formats = sizeof(pipe->pipe_settings.video.video_binary.info->output_formats) / sizeof(enum ia_css_frame_format);
 
-	for (i = 0; i < number_of_formats && !found; i++)
-	{
+	for (i = 0; i < number_of_formats && !found; i++) {
 		if (supported_formats[i] == format) {
 			found = 1;
 			break;
 		}
 	}
-	if (!found)
-	{
+	if (!found) {
 		IA_CSS_ERROR("Requested format is not supported by binary");
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
@@ -5476,10 +5403,10 @@ static int load_video_binaries(struct ia_css_pipe *pipe)
 					 &mycs->video_binary);
 
 		if (err) {
-			if (video_vf_info) {
-				/* This will do another video binary lookup later for YUV_LINE format*/
+			/* This will do another video binary lookup later for YUV_LINE format*/
+			if (video_vf_info)
 				need_vf_pp = true;
-			} else
+			else
 				return err;
 		} else if (video_vf_info) {
 			/* The first video binary lookup is successful, but we may
@@ -5642,13 +5569,13 @@ static int load_video_binaries(struct ia_css_pipe *pipe)
 }
 
 static int
-unload_video_binaries(struct ia_css_pipe *pipe) {
+unload_video_binaries(struct ia_css_pipe *pipe)
+{
 	unsigned int i;
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
 
-	if ((!pipe) || (pipe->mode != IA_CSS_PIPE_ID_VIDEO))
-	{
+	if ((!pipe) || (pipe->mode != IA_CSS_PIPE_ID_VIDEO)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -5798,31 +5725,29 @@ static int
 sh_css_pipe_configure_viewfinder(struct ia_css_pipe *pipe, unsigned int width,
 				 unsigned int height, unsigned int min_width,
 				 enum ia_css_frame_format format,
-				 unsigned int idx) {
+				 unsigned int idx)
+{
 	int err = 0;
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p, width = %d, height = %d, min_width = %d, format = %d, idx = %d\n",
 			     pipe, width, height, min_width, format, idx);
 
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
 
 	err = ia_css_util_check_res(width, height);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
 	}
 	if (pipe->vf_output_info[idx].res.width != width ||
 	    pipe->vf_output_info[idx].res.height != height ||
 	    pipe->vf_output_info[idx].format != format)
-	{
 		ia_css_frame_info_init(&pipe->vf_output_info[idx], width, height,
 				       format, min_width);
-	}
+
 	IA_CSS_LEAVE_ERR_PRIVATE(0);
 	return 0;
 }
@@ -6202,7 +6127,8 @@ static int load_primary_binaries(
 }
 
 static int
-allocate_delay_frames(struct ia_css_pipe *pipe) {
+allocate_delay_frames(struct ia_css_pipe *pipe)
+{
 	unsigned int num_delay_frames = 0, i = 0;
 	unsigned int dvs_frame_delay = 0;
 	struct ia_css_frame_info ref_info;
@@ -6212,8 +6138,7 @@ allocate_delay_frames(struct ia_css_pipe *pipe) {
 
 	IA_CSS_ENTER_PRIVATE("");
 
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_ERROR("Invalid args - pipe %p", pipe);
 		return -EINVAL;
 	}
@@ -6224,8 +6149,7 @@ allocate_delay_frames(struct ia_css_pipe *pipe) {
 	if (dvs_frame_delay > 0)
 		num_delay_frames = dvs_frame_delay + 1;
 
-	switch (mode)
-	{
+	switch (mode) {
 	case IA_CSS_PIPE_ID_CAPTURE: {
 		struct ia_css_capture_settings *mycs_capture = &pipe->pipe_settings.capture;
 		(void)mycs_capture;
@@ -6277,8 +6201,7 @@ allocate_delay_frames(struct ia_css_pipe *pipe) {
 	ref_info.raw_bit_depth = SH_CSS_REF_BIT_DEPTH;
 
 	assert(num_delay_frames <= MAX_NUM_VIDEO_DELAY_FRAMES);
-	for (i = 0; i < num_delay_frames; i++)
-	{
+	for (i = 0; i < num_delay_frames; i++) {
 		err = ia_css_frame_allocate_from_info(&delay_frames[i],	&ref_info);
 		if (err)
 			return err;
@@ -6288,7 +6211,8 @@ allocate_delay_frames(struct ia_css_pipe *pipe) {
 }
 
 static int load_advanced_binaries(
-    struct ia_css_pipe *pipe) {
+    struct ia_css_pipe *pipe)
+{
 	struct ia_css_frame_info pre_in_info, gdc_in_info,
 			post_in_info, post_out_info,
 			vf_info, *vf_pp_in_info, *pipe_out_info,
@@ -6405,7 +6329,8 @@ static int load_advanced_binaries(
 }
 
 static int load_bayer_isp_binaries(
-    struct ia_css_pipe *pipe) {
+    struct ia_css_pipe *pipe)
+{
 	struct ia_css_frame_info pre_isp_in_info, *pipe_out_info;
 	int err = 0;
 	struct ia_css_binary_descr pre_de_descr;
@@ -6434,7 +6359,8 @@ static int load_bayer_isp_binaries(
 }
 
 static int load_low_light_binaries(
-    struct ia_css_pipe *pipe) {
+    struct ia_css_pipe *pipe)
+{
 	struct ia_css_frame_info pre_in_info, anr_in_info,
 			post_in_info, post_out_info,
 			vf_info, *pipe_vf_out_info, *pipe_out_info,
@@ -6572,7 +6498,8 @@ static bool copy_on_sp(struct ia_css_pipe *pipe)
 }
 
 static int load_capture_binaries(
-    struct ia_css_pipe *pipe) {
+    struct ia_css_pipe *pipe)
+{
 	int err = 0;
 	bool must_be_raw;
 
@@ -6640,13 +6567,13 @@ static int load_capture_binaries(
 }
 
 static int
-unload_capture_binaries(struct ia_css_pipe *pipe) {
+unload_capture_binaries(struct ia_css_pipe *pipe)
+{
 	unsigned int i;
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
 
-	if ((!pipe) || ((pipe->mode != IA_CSS_PIPE_ID_CAPTURE) && (pipe->mode != IA_CSS_PIPE_ID_COPY)))
-	{
+	if ((!pipe) || ((pipe->mode != IA_CSS_PIPE_ID_CAPTURE) && (pipe->mode != IA_CSS_PIPE_ID_COPY))) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -6674,7 +6601,8 @@ unload_capture_binaries(struct ia_css_pipe *pipe) {
 
 static bool
 need_downscaling(const struct ia_css_resolution in_res,
-		    const struct ia_css_resolution out_res) {
+		    const struct ia_css_resolution out_res)
+{
 	if (in_res.width > out_res.width || in_res.height > out_res.height)
 		return true;
 
@@ -6682,7 +6610,8 @@ need_downscaling(const struct ia_css_resolution in_res,
 }
 
 static bool
-need_yuv_scaler_stage(const struct ia_css_pipe *pipe) {
+need_yuv_scaler_stage(const struct ia_css_pipe *pipe)
+{
 	unsigned int i;
 	struct ia_css_resolution in_res, out_res;
 
@@ -6724,7 +6653,8 @@ static int ia_css_pipe_create_cas_scaler_desc_single_output(
     struct ia_css_frame_info *cas_scaler_in_info,
     struct ia_css_frame_info *cas_scaler_out_info,
     struct ia_css_frame_info *cas_scaler_vf_info,
-    struct ia_css_cas_binary_descr *descr) {
+    struct ia_css_cas_binary_descr *descr)
+{
 	unsigned int i;
 	unsigned int hor_ds_factor = 0, ver_ds_factor = 0;
 	int err = 0;
@@ -6842,7 +6772,8 @@ ERR:
 /* FIXME: merge most of this and single output version */
 static int ia_css_pipe_create_cas_scaler_desc(
     struct ia_css_pipe *pipe,
-    struct ia_css_cas_binary_descr *descr) {
+    struct ia_css_cas_binary_descr *descr)
+{
 	struct ia_css_frame_info in_info = IA_CSS_BINARY_DEFAULT_FRAME_INFO;
 	struct ia_css_frame_info *out_info[IA_CSS_PIPE_MAX_OUTPUT_STAGE];
 	struct ia_css_frame_info *vf_out_info[IA_CSS_PIPE_MAX_OUTPUT_STAGE];
@@ -6998,7 +6929,8 @@ ERR:
 }
 
 static void ia_css_pipe_destroy_cas_scaler_desc(struct ia_css_cas_binary_descr
-	*descr) {
+	*descr)
+{
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
 			    "ia_css_pipe_destroy_cas_scaler_desc() enter:\n");
 	kfree(descr->in_info);
@@ -7016,7 +6948,8 @@ static void ia_css_pipe_destroy_cas_scaler_desc(struct ia_css_cas_binary_descr
 }
 
 static int
-load_yuvpp_binaries(struct ia_css_pipe *pipe) {
+load_yuvpp_binaries(struct ia_css_pipe *pipe)
+{
 	int err = 0;
 	bool need_scaler = false;
 	struct ia_css_frame_info *vf_pp_in_info[IA_CSS_PIPE_MAX_OUTPUT_STAGE];
@@ -7041,8 +6974,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe) {
 
 	mycs = &pipe->pipe_settings.yuvpp;
 
-	for (i = 0; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++)
-	{
+	for (i = 0; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++) {
 		if (pipe->vf_output_info[i].res.width != 0) {
 			err = ia_css_util_check_vf_out_info(&pipe->output_info[i],
 							    &pipe->vf_output_info[i]);
@@ -7056,8 +6988,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe) {
 
 	/* we build up the pipeline starting at the end */
 	/* Capture post-processing */
-	if (need_scaler)
-	{
+	if (need_scaler) {
 		struct ia_css_binary_descr yuv_scaler_descr;
 
 		err = ia_css_pipe_create_cas_scaler_desc(pipe,
@@ -7091,18 +7022,14 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe) {
 				goto ERR;
 		}
 		ia_css_pipe_destroy_cas_scaler_desc(&cas_scaler_descr);
-	} else
-	{
+	} else {
 		mycs->num_output = 1;
 	}
 
 	if (need_scaler)
-	{
 		next_binary = &mycs->yuv_scaler_binary[0];
-	} else
-	{
+	else
 		next_binary = NULL;
-	}
 
 #if defined(ISP2401)
 	/*
@@ -7128,8 +7055,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe) {
 	need_isp_copy_binary = true;
 #endif /*  ISP2401 */
 
-	if (need_isp_copy_binary)
-	{
+	if (need_isp_copy_binary) {
 		err = load_copy_binary(pipe,
 					&mycs->copy_binary,
 					next_binary);
@@ -7159,8 +7085,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe) {
 	}
 
 	/* Viewfinder post-processing */
-	if (need_scaler)
-	{
+	if (need_scaler) {
 		for (i = 0, j = 0; i < mycs->num_yuv_scaler; i++) {
 			if (mycs->is_output_stage[i]) {
 				assert(j < 2);
@@ -7170,19 +7095,17 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe) {
 			}
 		}
 		mycs->num_vf_pp = j;
-	} else
-	{
+	} else {
 		vf_pp_in_info[0] =
 		    &mycs->copy_binary.vf_frame_info;
-		for (i = 1; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++) {
+		for (i = 1; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++)
 			vf_pp_in_info[i] = NULL;
-		}
+
 		mycs->num_vf_pp = 1;
 	}
 	mycs->vf_pp_binary = kzalloc(mycs->num_vf_pp * sizeof(struct ia_css_binary),
 					GFP_KERNEL);
-	if (!mycs->vf_pp_binary)
-	{
+	if (!mycs->vf_pp_binary) {
 		err = -ENOMEM;
 		goto ERR;
 	}
@@ -7190,8 +7113,7 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe) {
 	{
 		struct ia_css_binary_descr vf_pp_descr;
 
-		for (i = 0; i < mycs->num_vf_pp; i++)
-		{
+		for (i = 0; i < mycs->num_vf_pp; i++) {
 			if (pipe->vf_output_info[i].res.width != 0) {
 				ia_css_pipe_get_vfpp_binarydesc(pipe,
 								&vf_pp_descr, vf_pp_in_info[i], &pipe->vf_output_info[i]);
@@ -7207,34 +7129,31 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe) {
 
 ERR:
 	if (need_scaler)
-	{
 		ia_css_pipe_destroy_cas_scaler_desc(&cas_scaler_descr);
-	}
+
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE, "load_yuvpp_binaries() leave, err=%d\n",
 			    err);
 	return err;
 }
 
 static int
-unload_yuvpp_binaries(struct ia_css_pipe *pipe) {
+unload_yuvpp_binaries(struct ia_css_pipe *pipe)
+{
 	unsigned int i;
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
 
-	if ((!pipe) || (pipe->mode != IA_CSS_PIPE_ID_YUVPP))
-	{
+	if ((!pipe) || (pipe->mode != IA_CSS_PIPE_ID_YUVPP)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
 	ia_css_binary_unload(&pipe->pipe_settings.yuvpp.copy_binary);
 	for (i = 0; i < pipe->pipe_settings.yuvpp.num_yuv_scaler; i++)
-	{
 		ia_css_binary_unload(&pipe->pipe_settings.yuvpp.yuv_scaler_binary[i]);
-	}
+
 	for (i = 0; i < pipe->pipe_settings.yuvpp.num_vf_pp; i++)
-	{
 		ia_css_binary_unload(&pipe->pipe_settings.yuvpp.vf_pp_binary[i]);
-	}
+
 	kfree(pipe->pipe_settings.yuvpp.is_output_stage);
 	pipe->pipe_settings.yuvpp.is_output_stage = NULL;
 	kfree(pipe->pipe_settings.yuvpp.yuv_scaler_binary);
@@ -7284,25 +7203,23 @@ static int yuvpp_start(struct ia_css_pipe *pipe)
 }
 
 static int
-sh_css_pipe_unload_binaries(struct ia_css_pipe *pipe) {
+sh_css_pipe_unload_binaries(struct ia_css_pipe *pipe)
+{
 	int err = 0;
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
 
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
 	/* PIPE_MODE_COPY has no binaries, but has output frames to outside*/
-	if (pipe->config.mode == IA_CSS_PIPE_MODE_COPY)
-	{
+	if (pipe->config.mode == IA_CSS_PIPE_MODE_COPY) {
 		IA_CSS_LEAVE_ERR_PRIVATE(0);
 		return 0;
 	}
 
-	switch (pipe->mode)
-	{
+	switch (pipe->mode) {
 	case IA_CSS_PIPE_ID_PREVIEW:
 		err = unload_preview_binaries(pipe);
 		break;
@@ -7323,7 +7240,8 @@ sh_css_pipe_unload_binaries(struct ia_css_pipe *pipe) {
 }
 
 static int
-sh_css_pipe_load_binaries(struct ia_css_pipe *pipe) {
+sh_css_pipe_load_binaries(struct ia_css_pipe *pipe)
+{
 	int err = 0;
 
 	assert(pipe);
@@ -7333,8 +7251,7 @@ sh_css_pipe_load_binaries(struct ia_css_pipe *pipe) {
 	if (pipe->config.mode == IA_CSS_PIPE_MODE_COPY)
 		return err;
 
-	switch (pipe->mode)
-	{
+	switch (pipe->mode) {
 	case IA_CSS_PIPE_ID_PREVIEW:
 		err = load_preview_binaries(pipe);
 		break;
@@ -7353,8 +7270,7 @@ sh_css_pipe_load_binaries(struct ia_css_pipe *pipe) {
 		err = -EINVAL;
 		break;
 	}
-	if (err)
-	{
+	if (err) {
 		if (sh_css_pipe_unload_binaries(pipe)) {
 			/* currently css does not support multiple error returns in a single function,
 			    * using -EINVAL in this case */
@@ -7365,7 +7281,8 @@ sh_css_pipe_load_binaries(struct ia_css_pipe *pipe) {
 }
 
 static int
-create_host_yuvpp_pipeline(struct ia_css_pipe *pipe) {
+create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
+{
 	struct ia_css_pipeline *me;
 	int err = 0;
 	struct ia_css_pipeline_stage *vf_pp_stage = NULL,
@@ -7392,15 +7309,13 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe) {
 #endif
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
-	if ((!pipe) || (!pipe->stream) || (pipe->mode != IA_CSS_PIPE_ID_YUVPP))
-	{
+	if ((!pipe) || (!pipe->stream) || (pipe->mode != IA_CSS_PIPE_ID_YUVPP)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
 	me = &pipe->pipeline;
 	ia_css_pipeline_clean(me);
-	for (i = 0; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++)
-	{
+	for (i = 0; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++) {
 		out_frame[i] = NULL;
 		vf_frame[i] = NULL;
 	}
@@ -7428,8 +7343,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe) {
 	/* the input frame can come from:
 	    *  a) memory: connect yuvscaler to me->in_frame
 	    *  b) sensor, via copy binary: connect yuvscaler to copy binary later on */
-	if (need_in_frameinfo_memory)
-	{
+	if (need_in_frameinfo_memory) {
 		/* TODO: improve for different input formats. */
 
 		/*
@@ -7478,13 +7392,11 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe) {
 		}
 
 		in_frame = &me->in_frame;
-	} else
-	{
+	} else {
 		in_frame = NULL;
 	}
 
-	for (i = 0; i < num_output_stage; i++)
-	{
+	for (i = 0; i < num_output_stage; i++) {
 		assert(i < IA_CSS_PIPE_MAX_OUTPUT_STAGE);
 		if (pipe->output_info[i].res.width != 0) {
 			err = init_out_frameinfo_defaults(pipe, &me->out_frame[i], i);
@@ -7511,8 +7423,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe) {
 	yuv_scaler_binary = pipe->pipe_settings.yuvpp.yuv_scaler_binary;
 	need_scaler = need_yuv_scaler_stage(pipe);
 
-	if (pipe->pipe_settings.yuvpp.copy_binary.info)
-	{
+	if (pipe->pipe_settings.yuvpp.copy_binary.info) {
 		struct ia_css_frame *in_frame_local = NULL;
 
 #ifdef ISP2401
@@ -7550,8 +7461,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe) {
 		}
 	}
 
-	if (need_scaler)
-	{
+	if (need_scaler) {
 		struct ia_css_frame *tmp_out_frame = NULL;
 		struct ia_css_frame *tmp_vf_frame = NULL;
 		struct ia_css_frame *tmp_in_frame = in_frame;
@@ -7591,8 +7501,7 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe) {
 				j++;
 			}
 		}
-	} else if (copy_stage)
-	{
+	} else if (copy_stage) {
 		if (vf_frame[0] && vf_frame[0]->info.res.width != 0) {
 			in_frame = copy_stage->args.out_vf_frame;
 			err = add_vf_pp_stage(pipe, in_frame, vf_frame[0], &vf_pp_binary[0],
@@ -7614,7 +7523,8 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe) {
 static int
 create_host_copy_pipeline(struct ia_css_pipe *pipe,
 			    unsigned int max_input_width,
-			    struct ia_css_frame *out_frame) {
+			    struct ia_css_frame *out_frame)
+{
 	struct ia_css_pipeline *me;
 	int err = 0;
 	struct ia_css_pipeline_stage_desc stage_desc;
@@ -7631,16 +7541,14 @@ create_host_copy_pipeline(struct ia_css_pipe *pipe,
 	out_frame->flash_state = IA_CSS_FRAME_FLASH_STATE_NONE;
 
 	if (copy_on_sp(pipe) &&
-	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8)
-	{
+	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
 		ia_css_frame_info_init(
 		    &out_frame->info,
 		    JPEG_BYTES,
 		    1,
 		    IA_CSS_FRAME_FORMAT_BINARY_8,
 		    0);
-	} else if (out_frame->info.format == IA_CSS_FRAME_FORMAT_RAW)
-	{
+	} else if (out_frame->info.format == IA_CSS_FRAME_FORMAT_RAW) {
 		out_frame->info.raw_bit_depth =
 		ia_css_pipe_util_pipe_input_format_bpp(pipe);
 	}
@@ -7664,7 +7572,8 @@ create_host_copy_pipeline(struct ia_css_pipe *pipe,
 }
 
 static int
-create_host_isyscopy_capture_pipeline(struct ia_css_pipe *pipe) {
+create_host_isyscopy_capture_pipeline(struct ia_css_pipe *pipe)
+{
 	struct ia_css_pipeline *me = &pipe->pipeline;
 	int err = 0;
 	struct ia_css_pipeline_stage_desc stage_desc;
@@ -7708,7 +7617,8 @@ create_host_isyscopy_capture_pipeline(struct ia_css_pipe *pipe) {
 }
 
 static int
-create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
+create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
+{
 	struct ia_css_pipeline *me;
 	int err = 0;
 	enum ia_css_capture_mode mode;
@@ -7772,8 +7682,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 	/* Construct in_frame info (only in case we have dynamic input */
 	need_in_frameinfo_memory = pipe->stream->config.mode == IA_CSS_INPUT_MODE_MEMORY;
 #endif
-	if (need_in_frameinfo_memory)
-	{
+	if (need_in_frameinfo_memory) {
 		err = init_in_frameinfo_memory_defaults(pipe, &me->in_frame,
 							IA_CSS_FRAME_FORMAT_RAW);
 		if (err) {
@@ -7782,22 +7691,19 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 		}
 
 		in_frame = &me->in_frame;
-	} else
-	{
+	} else {
 		in_frame = NULL;
 	}
 
 	err = init_out_frameinfo_defaults(pipe, &me->out_frame[0], 0);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
 	}
 	out_frame = &me->out_frame[0];
 
 	/* Construct vf_frame info (only in case we have VF) */
-	if (pipe->enable_viewfinder[IA_CSS_PIPE_OUTPUT_STAGE_0])
-	{
+	if (pipe->enable_viewfinder[IA_CSS_PIPE_OUTPUT_STAGE_0]) {
 		if (mode == IA_CSS_CAPTURE_MODE_RAW || mode == IA_CSS_CAPTURE_MODE_BAYER) {
 			/* These modes don't support viewfinder output */
 			vf_frame = NULL;
@@ -7805,22 +7711,20 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 			init_vf_frameinfo_defaults(pipe, &me->vf_frame[0], 0);
 			vf_frame = &me->vf_frame[0];
 		}
-	} else
-	{
+	} else {
 		vf_frame = NULL;
 	}
 
 	copy_binary       = &pipe->pipe_settings.capture.copy_binary;
 	num_primary_stage = pipe->pipe_settings.capture.num_primary_stage;
-	if ((num_primary_stage == 0) && (mode == IA_CSS_CAPTURE_MODE_PRIMARY))
-	{
+	if ((num_primary_stage == 0) && (mode == IA_CSS_CAPTURE_MODE_PRIMARY)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
+
 	for (i = 0; i < num_primary_stage; i++)
-	{
 		primary_binary[i] = &pipe->pipe_settings.capture.primary_binary[i];
-	}
+
 	vf_pp_binary      = &pipe->pipe_settings.capture.vf_pp_binary;
 	pre_isp_binary    = &pipe->pipe_settings.capture.pre_isp_binary;
 	anr_gdc_binary    = &pipe->pipe_settings.capture.anr_gdc_binary;
@@ -7837,8 +7741,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 	need_yuv_pp = (yuv_scaler_binary && yuv_scaler_binary->info);
 	need_ldc = (capture_ldc_binary && capture_ldc_binary->info);
 
-	if (pipe->pipe_settings.capture.copy_binary.info)
-	{
+	if (pipe->pipe_settings.capture.copy_binary.info) {
 		if (raw) {
 			ia_css_pipe_util_set_output_frames(out_frames, 0, out_frame);
 #if defined(ISP2401)
@@ -7867,13 +7770,11 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
 		}
-	} else if (pipe->stream->config.continuous)
-	{
+	} else if (pipe->stream->config.continuous) {
 		in_frame = pipe->stream->last_pipe->continuous_frames[0];
 	}
 
-	if (mode == IA_CSS_CAPTURE_MODE_PRIMARY)
-	{
+	if (mode == IA_CSS_CAPTURE_MODE_PRIMARY) {
 		struct ia_css_frame *local_in_frame = NULL;
 		struct ia_css_frame *local_out_frame = NULL;
 
@@ -7918,8 +7819,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 		    IA_CSS_BINARY_MODE_COPY;
 		current_stage->args.copy_output = current_stage->args.copy_vf;
 	} else if (mode == IA_CSS_CAPTURE_MODE_ADVANCED ||
-		    mode == IA_CSS_CAPTURE_MODE_LOW_LIGHT)
-	{
+		    mode == IA_CSS_CAPTURE_MODE_LOW_LIGHT) {
 		ia_css_pipe_util_set_output_frames(out_frames, 0, NULL);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, pre_isp_binary,
 						    out_frames, in_frame, NULL);
@@ -7955,8 +7855,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
 		}
-	} else if (mode == IA_CSS_CAPTURE_MODE_BAYER)
-	{
+	} else if (mode == IA_CSS_CAPTURE_MODE_BAYER) {
 		ia_css_pipe_util_set_output_frames(out_frames, 0, out_frame);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, pre_isp_binary,
 						    out_frames, in_frame, NULL);
@@ -7970,8 +7869,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 	}
 
 #ifndef ISP2401
-	if (need_pp && current_stage)
-	{
+	if (need_pp && current_stage) {
 		struct ia_css_frame *local_in_frame = NULL;
 
 		local_in_frame = current_stage->args.out_frame[0];
@@ -7989,8 +7887,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 					    need_yuv_pp ? NULL : out_frame,
 #else
 	/* ldc and capture_pp not supported in same pipeline */
-	if (need_ldc && current_stage)
-	{
+	if (need_ldc && current_stage) {
 		in_frame = current_stage->args.out_frame[0];
 		ia_css_pipe_util_set_output_frames(out_frames, 0, out_frame);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, capture_ldc_binary,
@@ -7998,8 +7895,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 		err = ia_css_pipeline_create_and_add_stage(me,
 			&stage_desc,
 			NULL);
-	} else if (need_pp && current_stage)
-	{
+	} else if (need_pp && current_stage) {
 		in_frame = current_stage->args.out_frame[0];
 		err = add_capture_pp_stage(pipe, me, in_frame, need_yuv_pp ? NULL : out_frame,
 #endif
@@ -8011,8 +7907,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 		}
 	}
 
-	if (need_yuv_pp && current_stage)
-	{
+	if (need_yuv_pp && current_stage) {
 		struct ia_css_frame *tmp_in_frame = current_stage->args.out_frame[0];
 		struct ia_css_frame *tmp_out_frame = NULL;
 
@@ -8044,8 +7939,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 	    * should not be considered as a clean solution. Proper
 	    * investigation should be done to come up with the clean solution.
 	    * */
-	if (mode != IA_CSS_CAPTURE_MODE_RAW && mode != IA_CSS_CAPTURE_MODE_BAYER && current_stage && vf_frame)
-	{
+	if (mode != IA_CSS_CAPTURE_MODE_RAW && mode != IA_CSS_CAPTURE_MODE_BAYER && current_stage && vf_frame) {
 		in_frame = current_stage->args.out_vf_frame;
 		err = add_vf_pp_stage(pipe, in_frame, vf_frame, vf_pp_binary,
 					&current_stage);
@@ -8063,7 +7957,8 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe) {
 }
 
 static int
-create_host_capture_pipeline(struct ia_css_pipe *pipe) {
+create_host_capture_pipeline(struct ia_css_pipe *pipe)
+{
 	int err = 0;
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
@@ -8072,8 +7967,7 @@ create_host_capture_pipeline(struct ia_css_pipe *pipe) {
 		err = create_host_isyscopy_capture_pipeline(pipe);
 	else
 		err = create_host_regular_capture_pipeline(pipe);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
 	}
@@ -8084,7 +7978,8 @@ create_host_capture_pipeline(struct ia_css_pipe *pipe) {
 }
 
 static int capture_start(
-    struct ia_css_pipe *pipe) {
+    struct ia_css_pipe *pipe)
+{
 	struct ia_css_pipeline *me;
 
 	int err = 0;
@@ -8155,7 +8050,8 @@ static int capture_start(
 static int
 sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 				    struct ia_css_frame_info *info,
-				    unsigned int idx) {
+				    unsigned int idx)
+{
 	assert(pipe);
 	assert(info);
 
@@ -8164,8 +8060,7 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 
 	*info = pipe->output_info[idx];
 	if (copy_on_sp(pipe) &&
-	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8)
-	{
+	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
 		ia_css_frame_info_init(
 		    info,
 		    JPEG_BYTES,
@@ -8173,8 +8068,7 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 		    IA_CSS_FRAME_FORMAT_BINARY_8,
 		    0);
 	} else if (info->format == IA_CSS_FRAME_FORMAT_RAW ||
-		    info->format == IA_CSS_FRAME_FORMAT_RAW_PACKED)
-	{
+		    info->format == IA_CSS_FRAME_FORMAT_RAW_PACKED) {
 		info->raw_bit_depth =
 		ia_css_pipe_util_pipe_input_format_bpp(pipe);
 	}
@@ -8188,7 +8082,8 @@ void
 ia_css_stream_send_input_frame(const struct ia_css_stream *stream,
 				const unsigned short *data,
 				unsigned int width,
-				unsigned int height) {
+				unsigned int height)
+{
 	assert(stream);
 
 	ia_css_inputfifo_send_input_frame(
@@ -8199,7 +8094,8 @@ ia_css_stream_send_input_frame(const struct ia_css_stream *stream,
 }
 
 void
-ia_css_stream_start_input_frame(const struct ia_css_stream *stream) {
+ia_css_stream_start_input_frame(const struct ia_css_stream *stream)
+{
 	assert(stream);
 
 	ia_css_inputfifo_start_frame(
@@ -8213,7 +8109,8 @@ ia_css_stream_send_input_line(const struct ia_css_stream *stream,
 				const unsigned short *data,
 				unsigned int width,
 				const unsigned short *data2,
-				unsigned int width2) {
+				unsigned int width2)
+{
 	assert(stream);
 
 	ia_css_inputfifo_send_line(stream->config.channel_id,
@@ -8224,7 +8121,8 @@ void
 ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
 					enum atomisp_input_format format,
 					const unsigned short *data,
-					unsigned int width) {
+					unsigned int width)
+{
 	assert(stream);
 	if (!data || width == 0)
 		return;
@@ -8233,14 +8131,16 @@ ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
 }
 
 void
-ia_css_stream_end_input_frame(const struct ia_css_stream *stream) {
+ia_css_stream_end_input_frame(const struct ia_css_stream *stream)
+{
 	assert(stream);
 
 	ia_css_inputfifo_end_frame(stream->config.channel_id);
 }
 
 static void
-append_firmware(struct ia_css_fw_info **l, struct ia_css_fw_info *firmware) {
+append_firmware(struct ia_css_fw_info **l, struct ia_css_fw_info *firmware)
+{
 	IA_CSS_ENTER_PRIVATE("l = %p, firmware = %p", l, firmware);
 	if (!l) {
 		IA_CSS_ERROR("NULL fw_info");
@@ -8255,7 +8155,8 @@ append_firmware(struct ia_css_fw_info **l, struct ia_css_fw_info *firmware) {
 }
 
 static void
-remove_firmware(struct ia_css_fw_info **l, struct ia_css_fw_info *firmware) {
+remove_firmware(struct ia_css_fw_info **l, struct ia_css_fw_info *firmware)
+{
 	assert(*l);
 	assert(firmware);
 	(void)l;
@@ -8297,12 +8198,12 @@ static int upload_isp_code(struct ia_css_fw_info *firmware)
 }
 
 static int
-acc_load_extension(struct ia_css_fw_info *firmware) {
+acc_load_extension(struct ia_css_fw_info *firmware)
+{
 	int err;
 	struct ia_css_fw_info *hd = firmware;
 
-	while (hd)
-	{
+	while (hd) {
 		err = upload_isp_code(hd);
 		if (err)
 			return err;
@@ -8316,7 +8217,8 @@ acc_load_extension(struct ia_css_fw_info *firmware) {
 }
 
 static void
-acc_unload_extension(struct ia_css_fw_info *firmware) {
+acc_unload_extension(struct ia_css_fw_info *firmware)
+{
 	struct ia_css_fw_info *hd = firmware;
 	struct ia_css_fw_info *hdn = NULL;
 
@@ -8340,13 +8242,13 @@ acc_unload_extension(struct ia_css_fw_info *firmware) {
 /* Load firmware for extension */
 static int
 ia_css_pipe_load_extension(struct ia_css_pipe *pipe,
-			    struct ia_css_fw_info *firmware) {
+			    struct ia_css_fw_info *firmware)
+{
 	int err = 0;
 
 	IA_CSS_ENTER_PRIVATE("fw = %p pipe = %p", firmware, pipe);
 
-	if ((!firmware) || (!pipe))
-	{
+	if ((!firmware) || (!pipe)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -8364,7 +8266,8 @@ ia_css_pipe_load_extension(struct ia_css_pipe *pipe,
 /* Unload firmware for extension */
 static void
 ia_css_pipe_unload_extension(struct ia_css_pipe *pipe,
-				struct ia_css_fw_info *firmware) {
+				struct ia_css_fw_info *firmware)
+{
 	IA_CSS_ENTER_PRIVATE("fw = %p pipe = %p", firmware, pipe);
 
 	if ((!firmware) || (!pipe)) {
@@ -8383,7 +8286,8 @@ ia_css_pipe_unload_extension(struct ia_css_pipe *pipe,
 }
 
 bool
-ia_css_pipeline_uses_params(struct ia_css_pipeline *me) {
+ia_css_pipeline_uses_params(struct ia_css_pipeline *me)
+{
 	struct ia_css_pipeline_stage *stage;
 
 	assert(me);
@@ -8404,7 +8308,8 @@ ia_css_pipeline_uses_params(struct ia_css_pipeline *me) {
 
 static int
 sh_css_pipeline_add_acc_stage(struct ia_css_pipeline *pipeline,
-				const void *acc_fw) {
+				const void *acc_fw)
+{
 	struct ia_css_fw_info *fw = (struct ia_css_fw_info *)acc_fw;
 	/* In QoS case, load_extension already called, so skipping */
 	int	err = 0;
@@ -8416,8 +8321,7 @@ sh_css_pipeline_add_acc_stage(struct ia_css_pipeline *pipeline,
 			    "sh_css_pipeline_add_acc_stage() enter: pipeline=%p, acc_fw=%p\n",
 			    pipeline, acc_fw);
 
-	if (!err)
-	{
+	if (!err) {
 		struct ia_css_pipeline_stage_desc stage_desc;
 
 		ia_css_pipe_get_acc_stage_desc(&stage_desc, NULL, fw);
@@ -8436,7 +8340,8 @@ sh_css_pipeline_add_acc_stage(struct ia_css_pipeline *pipeline,
     * Refer to "sh_css_internal.h" for details.
     */
 int ia_css_stream_capture_frame(struct ia_css_stream *stream,
-	unsigned int exp_id) {
+	unsigned int exp_id)
+{
 	struct sh_css_tag_descr tag_descr;
 	u32 encoded_tag_descr;
 	int err;
@@ -8478,7 +8383,8 @@ int ia_css_stream_capture(
     struct ia_css_stream *stream,
     int num_captures,
     unsigned int skip,
-    int offset) {
+    int offset)
+{
 	struct sh_css_tag_descr tag_descr;
 	unsigned int encoded_tag_descr;
 	int return_err;
@@ -8541,8 +8447,9 @@ void ia_css_stream_request_flash(struct ia_css_stream *stream)
 			ia_css_debug_dump_sp_sw_debug_info();
 			ia_css_debug_dump_debug_info(NULL);
 		}
-	} else
+	} else {
 		IA_CSS_LOG("SP is not running!");
+	}
 
 #endif
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,
@@ -8550,7 +8457,8 @@ void ia_css_stream_request_flash(struct ia_css_stream *stream)
 }
 
 static void
-sh_css_init_host_sp_control_vars(void) {
+sh_css_init_host_sp_control_vars(void)
+{
 	const struct ia_css_fw_info *fw;
 	unsigned int HIVE_ADDR_ia_css_ispctrl_sp_isp_started;
 
@@ -8634,7 +8542,8 @@ void ia_css_pipe_config_defaults(struct ia_css_pipe_config *pipe_config)
 
 void
 ia_css_pipe_extra_config_defaults(struct ia_css_pipe_extra_config
-				    *extra_config) {
+				    *extra_config)
+{
 	if (!extra_config) {
 		IA_CSS_ERROR("NULL input parameter");
 		return;
@@ -8664,11 +8573,11 @@ void ia_css_stream_config_defaults(struct ia_css_stream_config *stream_config)
 }
 
 static int
-ia_css_acc_pipe_create(struct ia_css_pipe *pipe) {
+ia_css_acc_pipe_create(struct ia_css_pipe *pipe)
+{
 	int err = 0;
 
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_ERROR("NULL input parameter");
 		return -EINVAL;
 	}
@@ -8678,9 +8587,7 @@ ia_css_acc_pipe_create(struct ia_css_pipe *pipe) {
 		pipe->config.acc_num_execs = 1;
 
 	if (pipe->config.acc_extension)
-	{
 		err = ia_css_pipe_load_extension(pipe, pipe->config.acc_extension);
-	}
 
 	return err;
 }
@@ -8699,9 +8606,8 @@ int ia_css_pipe_create(const struct ia_css_pipe_config *config,
 
 	err = ia_css_pipe_create_extra(config, NULL, pipe);
 
-	if (err == 0) {
+	if (err == 0)
 		IA_CSS_LOG("pipe created successfully = %p", *pipe);
-	}
 
 	IA_CSS_LEAVE_ERR_PRIVATE(err);
 
@@ -8711,7 +8617,8 @@ int ia_css_pipe_create(const struct ia_css_pipe_config *config,
 int
 ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 			    const struct ia_css_pipe_extra_config *extra_config,
-			    struct ia_css_pipe **pipe) {
+			    struct ia_css_pipe **pipe)
+{
 	int err = -EINVAL;
 	struct ia_css_pipe *internal_pipe = NULL;
 	unsigned int i;
@@ -8719,14 +8626,12 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 	IA_CSS_ENTER_PRIVATE("config = %p, extra_config = %p and pipe = %p", config, extra_config, pipe);
 
 	/* do not allow to create more than the maximum limit */
-	if (my_css.pipe_counter >= IA_CSS_PIPELINE_NUM_MAX)
-	{
+	if (my_css.pipe_counter >= IA_CSS_PIPELINE_NUM_MAX) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-ENOSPC);
 		return -EINVAL;
 	}
 
-	if ((!pipe) || (!config))
-	{
+	if ((!pipe) || (!config)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -8735,8 +8640,7 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 	ia_css_debug_dump_pipe_extra_config(extra_config);
 
 	err = create_pipe(config->mode, &internal_pipe, false);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
 	}
@@ -8748,8 +8652,7 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 	else
 		ia_css_pipe_extra_config_defaults(&internal_pipe->extra_config);
 
-	if (config->mode == IA_CSS_PIPE_MODE_ACC)
-	{
+	if (config->mode == IA_CSS_PIPE_MODE_ACC) {
 		/* Temporary hack to migrate acceleration to CSS 2.0.
 		    * In the future the code for all pipe types should be
 		    * unified. */
@@ -8776,15 +8679,13 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 	    set bayer_ds_out_res equal to IF output resolution(IF may do cropping on
 	    sensor output) or use default decimation factor 1. */
 	if (internal_pipe->extra_config.enable_raw_binning &&
-	    internal_pipe->config.bayer_ds_out_res.width)
-	{
+	    internal_pipe->config.bayer_ds_out_res.width) {
 		/* fill some code here, if no code is needed, please remove it during integration */
 	}
 
 	/* YUV downscaling */
 	if ((internal_pipe->config.vf_pp_in_res.width ||
-		internal_pipe->config.capt_pp_in_res.width))
-	{
+		internal_pipe->config.capt_pp_in_res.width)) {
 		enum ia_css_frame_format format;
 
 		if (internal_pipe->config.vf_pp_in_res.width) {
@@ -8805,8 +8706,7 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 		}
 	}
 	if (internal_pipe->config.vf_pp_in_res.width &&
-	    internal_pipe->config.mode == IA_CSS_PIPE_MODE_PREVIEW)
-	{
+	    internal_pipe->config.mode == IA_CSS_PIPE_MODE_PREVIEW) {
 		ia_css_frame_info_init(
 		    &internal_pipe->vf_yuv_ds_input_info,
 		    internal_pipe->config.vf_pp_in_res.width,
@@ -8814,8 +8714,7 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 		    IA_CSS_FRAME_FORMAT_YUV_LINE, 0);
 	}
 	/* handle bayer downscaling output info */
-	if (internal_pipe->config.bayer_ds_out_res.width)
-	{
+	if (internal_pipe->config.bayer_ds_out_res.width) {
 		ia_css_frame_info_init(
 		    &internal_pipe->bds_output_info,
 		    internal_pipe->config.bayer_ds_out_res.width,
@@ -8824,8 +8723,7 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 	}
 
 	/* handle output info, assume always needed */
-	for (i = 0; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++)
-	{
+	for (i = 0; i < IA_CSS_PIPE_MAX_OUTPUT_STAGE; i++) {
 		if (internal_pipe->config.output_info[i].res.width) {
 			err = sh_css_pipe_configure_output(
 				    internal_pipe,
@@ -8861,8 +8759,7 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 			}
 		}
 	}
-	if (internal_pipe->config.acc_extension)
-	{
+	if (internal_pipe->config.acc_extension) {
 		err = ia_css_pipe_load_extension(internal_pipe,
 						    internal_pipe->config.acc_extension);
 		if (err) {
@@ -8882,18 +8779,18 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 
 int
 ia_css_pipe_get_info(const struct ia_css_pipe *pipe,
-			struct ia_css_pipe_info *pipe_info) {
+			struct ia_css_pipe_info *pipe_info)
+{
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,
 			    "ia_css_pipe_get_info()\n");
+
 	assert(pipe_info);
-	if (!pipe_info)
-	{
+	if (!pipe_info) {
 		ia_css_debug_dtrace(IA_CSS_DEBUG_ERROR,
 				    "ia_css_pipe_get_info: pipe_info cannot be NULL\n");
 		return -EINVAL;
 	}
-	if (!pipe || !pipe->stream)
-	{
+	if (!pipe || !pipe->stream) {
 		ia_css_debug_dtrace(IA_CSS_DEBUG_ERROR,
 				    "ia_css_pipe_get_info: ia_css_stream_create needs to be called before ia_css_[stream/pipe]_get_info\n");
 		return -EINVAL;
@@ -8921,40 +8818,36 @@ bool ia_css_pipe_has_dvs_stats(struct ia_css_pipe_info *pipe_info)
 int
 ia_css_pipe_override_frame_format(struct ia_css_pipe *pipe,
 				    int pin_index,
-				    enum ia_css_frame_format new_format) {
+				    enum ia_css_frame_format new_format)
+{
 	int err = 0;
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p, pin_index = %d, new_formats = %d", pipe, pin_index, new_format);
 
-	if (!pipe)
-	{
+	if (!pipe) {
 		IA_CSS_ERROR("pipe is not set");
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
 	}
-	if (0 != pin_index && 1 != pin_index)
-	{
+	if (0 != pin_index && 1 != pin_index) {
 		IA_CSS_ERROR("pin index is not valid");
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
 	}
-	if (new_format != IA_CSS_FRAME_FORMAT_NV12_TILEY)
-	{
+	if (new_format != IA_CSS_FRAME_FORMAT_NV12_TILEY) {
 		IA_CSS_ERROR("new format is not valid");
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
-	} else
-	{
+	} else {
 		err = ia_css_pipe_check_format(pipe, new_format);
 		if (!err) {
-			if (pin_index == 0) {
+			if (pin_index == 0)
 				pipe->output_info[0].format = new_format;
-			} else {
+			else
 				pipe->vf_output_info[0].format = new_format;
-			}
 		}
 	}
 	IA_CSS_LEAVE_ERR_PRIVATE(err);
@@ -8964,7 +8857,8 @@ ia_css_pipe_override_frame_format(struct ia_css_pipe *pipe,
 #if !defined(ISP2401)
 /* Configuration of INPUT_SYSTEM_VERSION_2401 is done on SP */
 static int
-ia_css_stream_configure_rx(struct ia_css_stream *stream) {
+ia_css_stream_configure_rx(struct ia_css_stream *stream)
+{
 	struct ia_css_input_port *config;
 
 	assert(stream);
@@ -8993,11 +8887,10 @@ ia_css_stream_configure_rx(struct ia_css_stream *stream) {
 	if (config->compression.type == IA_CSS_CSI2_COMPRESSION_TYPE_NONE)
 		stream->csi_rx_config.comp = MIPI_PREDICTOR_NONE;
 	else
-	{
 		/* not implemented yet, requires extension of the rx_cfg_t
 		    * struct */
 		return -EINVAL;
-	}
+
 	stream->csi_rx_config.is_two_ppc = (stream->config.pixels_per_clock == 2);
 	stream->reconfigure_css_rx = true;
 	return 0;
@@ -9008,7 +8901,8 @@ static struct ia_css_pipe *
 find_pipe(struct ia_css_pipe *pipes[],
 	    unsigned int num_pipes,
 	    enum ia_css_pipe_mode mode,
-	    bool copy_pipe) {
+	    bool copy_pipe)
+{
 	unsigned int i;
 
 	assert(pipes);
@@ -9024,21 +8918,20 @@ find_pipe(struct ia_css_pipe *pipes[],
 }
 
 static int
-ia_css_acc_stream_create(struct ia_css_stream *stream) {
+ia_css_acc_stream_create(struct ia_css_stream *stream)
+{
 	int i;
 	int err = 0;
 
 	assert(stream);
 	IA_CSS_ENTER_PRIVATE("stream = %p", stream);
 
-	if (!stream)
-	{
+	if (!stream) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
 
-	for (i = 0;  i < stream->num_pipes; i++)
-	{
+	for (i = 0;  i < stream->num_pipes; i++) {
 		struct ia_css_pipe *pipe = stream->pipes[i];
 
 		assert(pipe);
@@ -9052,14 +8945,12 @@ ia_css_acc_stream_create(struct ia_css_stream *stream) {
 
 	/* Map SP threads before doing anything. */
 	err = map_sp_threads(stream, true);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
 	}
 
-	for (i = 0;  i < stream->num_pipes; i++)
-	{
+	for (i = 0;  i < stream->num_pipes; i++) {
 		struct ia_css_pipe *pipe = stream->pipes[i];
 
 		assert(pipe);
@@ -9067,8 +8958,7 @@ ia_css_acc_stream_create(struct ia_css_stream *stream) {
 	}
 
 	err = create_host_pipeline_structure(stream);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
 	}
@@ -9082,7 +8972,8 @@ ia_css_acc_stream_create(struct ia_css_stream *stream) {
 
 static int
 metadata_info_init(const struct ia_css_metadata_config *mdc,
-		    struct ia_css_metadata_info *md) {
+		    struct ia_css_metadata_info *md)
+{
 	/* Either both width and height should be set or neither */
 	if ((mdc->resolution.height > 0) ^ (mdc->resolution.width > 0))
 		return -EINVAL;
@@ -9142,7 +9033,8 @@ int
 ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 			int num_pipes,
 			struct ia_css_pipe *pipes[],
-			struct ia_css_stream **stream) {
+			struct ia_css_stream **stream)
+{
 	struct ia_css_pipe *curr_pipe;
 	struct ia_css_stream *curr_stream = NULL;
 	bool spcopyonly;
@@ -9161,8 +9053,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 	/* some checks */
 	if (num_pipes == 0 ||
 	    !stream ||
-	    !pipes)
-	{
+	    !pipes) {
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR(err);
 		return err;
@@ -9171,8 +9062,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 #if !defined(ISP2401)
 	/* We don't support metadata for JPEG stream, since they both use str2mem */
 	if (stream_config->input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8 &&
-	    stream_config->metadata_config.resolution.height > 0)
-	{
+	    stream_config->metadata_config.resolution.height > 0) {
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR(err);
 		return err;
@@ -9180,8 +9070,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 #endif
 
 #ifdef ISP2401
-	if (stream_config->online && stream_config->pack_raw_pixels)
-	{
+	if (stream_config->online && stream_config->pack_raw_pixels) {
 		IA_CSS_LOG("online and pack raw is invalid on input system 2401");
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR(err);
@@ -9236,16 +9125,14 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 
 	/* Currently we only supported metadata up to a certain size. */
 	err = metadata_info_init(&stream_config->metadata_config, &md_info);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR(err);
 		return err;
 	}
 
 	/* allocate the stream instance */
 	curr_stream = kzalloc(sizeof(struct ia_css_stream), GFP_KERNEL);
-	if (!curr_stream)
-	{
+	if (!curr_stream) {
 		err = -ENOMEM;
 		IA_CSS_LEAVE_ERR(err);
 		return err;
@@ -9256,8 +9143,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 	/* allocate pipes */
 	curr_stream->num_pipes = num_pipes;
 	curr_stream->pipes = kcalloc(num_pipes, sizeof(struct ia_css_pipe *), GFP_KERNEL);
-	if (!curr_stream->pipes)
-	{
+	if (!curr_stream->pipes) {
 		curr_stream->num_pipes = 0;
 		kfree(curr_stream);
 		curr_stream = NULL;
@@ -9280,8 +9166,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 #endif
 
 #ifdef ISP2401
-	if (curr_stream->config.online)
-	{
+	if (curr_stream->config.online) {
 		curr_stream->config.source.port.num_lanes =
 		    stream_config->source.port.num_lanes;
 		curr_stream->config.mode =  IA_CSS_INPUT_MODE_BUFFERED_SENSOR;
@@ -9299,8 +9184,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 		    curr_stream->config.lock_all);
 
 	/* copy mode specific stuff */
-	switch (curr_stream->config.mode)
-	{
+	switch (curr_stream->config.mode) {
 	case IA_CSS_INPUT_MODE_SENSOR:
 	case IA_CSS_INPUT_MODE_BUFFERED_SENSOR:
 #if !defined(ISP2401)
@@ -9342,14 +9226,12 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 	err = aspect_ratio_crop_init(curr_stream,
 					pipes,
 					&aspect_ratio_crop_enabled);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR(err);
 		goto ERR;
 	}
 #endif
-	for (i = 0; i < num_pipes; i++)
-	{
+	for (i = 0; i < num_pipes; i++) {
 		struct ia_css_resolution effective_res;
 
 		curr_pipe = pipes[i];
@@ -9389,9 +9271,8 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 			if (pipes[i]->config.mode != IA_CSS_PIPE_MODE_ACC &&
 			    pipes[i]->config.mode != IA_CSS_PIPE_MODE_COPY) {
 				err = check_pipe_resolutions(pipes[i]);
-				if (err) {
+				if (err)
 					goto ERR;
-				}
 			}
 		}
 	}
@@ -9401,20 +9282,17 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 		goto ERR;
 	IA_CSS_LOG("isp_params_configs: %p", curr_stream->isp_params_configs);
 
-	if (num_pipes == 1 && pipes[0]->config.mode == IA_CSS_PIPE_MODE_ACC)
-	{
+	if (num_pipes == 1 && pipes[0]->config.mode == IA_CSS_PIPE_MODE_ACC) {
 		*stream = curr_stream;
 		err = ia_css_acc_stream_create(curr_stream);
 		goto ERR;
 	}
 	/* sensor binning */
-	if (!spcopyonly)
-	{
+	if (!spcopyonly) {
 		sensor_binning_changed =
 		    sh_css_params_set_binning_factor(curr_stream,
 							curr_stream->config.sensor_binning_factor);
-	} else
-	{
+	} else {
 		sensor_binning_changed = false;
 	}
 
@@ -9425,8 +9303,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 	curr_stream->cont_capt = false;
 	/* Temporary hack: we give the preview pipe a reference to the capture
 	    * pipe in continuous capture mode. */
-	if (curr_stream->config.continuous)
-	{
+	if (curr_stream->config.continuous) {
 		/* Search for the preview pipe and create the copy pipe */
 		struct ia_css_pipe *preview_pipe;
 		struct ia_css_pipe *video_pipe;
@@ -9474,9 +9351,9 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 			preview_pipe->pipe_settings.preview.copy_pipe = copy_pipe;
 			copy_pipe->stream = curr_stream;
 		}
-		if (preview_pipe && curr_stream->cont_capt) {
+		if (preview_pipe && curr_stream->cont_capt)
 			preview_pipe->pipe_settings.preview.capture_pipe = capture_pipe;
-		}
+
 		if (video_pipe && !video_pipe->pipe_settings.video.copy_pipe) {
 			err = create_pipe(IA_CSS_PIPE_MODE_CAPTURE, &copy_pipe, true);
 			if (err)
@@ -9485,15 +9362,13 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 			video_pipe->pipe_settings.video.copy_pipe = copy_pipe;
 			copy_pipe->stream = curr_stream;
 		}
-		if (video_pipe && curr_stream->cont_capt) {
+		if (video_pipe && curr_stream->cont_capt)
 			video_pipe->pipe_settings.video.capture_pipe = capture_pipe;
-		}
-		if (preview_pipe && acc_pipe) {
+
+		if (preview_pipe && acc_pipe)
 			preview_pipe->pipe_settings.preview.acc_pipe = acc_pipe;
-		}
 	}
-	for (i = 0; i < num_pipes; i++)
-	{
+	for (i = 0; i < num_pipes; i++) {
 		curr_pipe = pipes[i];
 		/* set current stream */
 		curr_pipe->stream = curr_stream;
@@ -9514,8 +9389,7 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 	}
 
 	/* now pipes have been configured, info should be available */
-	for (i = 0; i < num_pipes; i++)
-	{
+	for (i = 0; i < num_pipes; i++) {
 		struct ia_css_pipe_info *pipe_info = NULL;
 
 		curr_pipe = pipes[i];
@@ -9565,22 +9439,19 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 
 	/* Map SP threads before doing anything. */
 	err = map_sp_threads(curr_stream, true);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LOG("map_sp_threads: return_err=%d", err);
 		goto ERR;
 	}
 
-	for (i = 0; i < num_pipes; i++)
-	{
+	for (i = 0; i < num_pipes; i++) {
 		curr_pipe = pipes[i];
 		ia_css_pipe_map_queue(curr_pipe, true);
 	}
 
 	/* Create host side pipeline objects without stages */
 	err = create_host_pipeline_structure(curr_stream);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LOG("create_host_pipeline_structure: return_err=%d", err);
 		goto ERR;
 	}
@@ -9618,13 +9489,13 @@ ERR:
 }
 
 int
-ia_css_stream_destroy(struct ia_css_stream *stream) {
+ia_css_stream_destroy(struct ia_css_stream *stream)
+{
 	int i;
 	int err = 0;
 
 	IA_CSS_ENTER_PRIVATE("stream = %p", stream);
-	if (!stream)
-	{
+	if (!stream) {
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR_PRIVATE(err);
 		return err;
@@ -9633,8 +9504,7 @@ ia_css_stream_destroy(struct ia_css_stream *stream) {
 	ia_css_stream_isp_parameters_uninit(stream);
 
 	if ((stream->last_pipe) &&
-	    ia_css_pipeline_is_mapped(stream->last_pipe->pipe_num))
-	{
+	    ia_css_pipeline_is_mapped(stream->last_pipe->pipe_num)) {
 #if defined(ISP2401)
 		bool free_mpi;
 
@@ -9696,8 +9566,7 @@ ia_css_stream_destroy(struct ia_css_stream *stream) {
 	}
 
 	/* remove references from pipes to stream */
-	for (i = 0; i < stream->num_pipes; i++)
-	{
+	for (i = 0; i < stream->num_pipes; i++) {
 		struct ia_css_pipe *entry = stream->pipes[i];
 
 		assert(entry);
@@ -9726,8 +9595,7 @@ ia_css_stream_destroy(struct ia_css_stream *stream) {
 	/* working mode: take out of the seed list */
 	if (my_css_save.mode == sh_css_mode_working) {
 		for (i = 0; i < MAX_ACTIVE_STREAMS; i++) {
-			if (my_css_save.stream_seeds[i].stream == stream)
-			{
+			if (my_css_save.stream_seeds[i].stream == stream) {
 				IA_CSS_LOG("took out stream %d", i);
 				my_css_save.stream_seeds[i].stream = NULL;
 				break;
@@ -9743,7 +9611,8 @@ ia_css_stream_destroy(struct ia_css_stream *stream) {
 
 int
 ia_css_stream_get_info(const struct ia_css_stream *stream,
-			struct ia_css_stream_info *stream_info) {
+			struct ia_css_stream_info *stream_info)
+{
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_stream_get_info: enter/exit\n");
 	assert(stream);
 	assert(stream_info);
@@ -9759,15 +9628,15 @@ ia_css_stream_get_info(const struct ia_css_stream *stream,
     * The stream handle is used to identify the correct entry in the css_save struct
     */
 int
-ia_css_stream_load(struct ia_css_stream *stream) {
+ia_css_stream_load(struct ia_css_stream *stream)
+{
 	if (!IS_ISP2401) {
 		int i;
 		int err;
 
 		assert(stream);
 		ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,	"ia_css_stream_load() enter,\n");
-		for (i = 0; i < MAX_ACTIVE_STREAMS; i++)
-		{
+		for (i = 0; i < MAX_ACTIVE_STREAMS; i++) {
 			if (my_css_save.stream_seeds[i].stream == stream) {
 				int j;
 
@@ -9806,12 +9675,12 @@ ia_css_stream_load(struct ia_css_stream *stream) {
 }
 
 int
-ia_css_stream_start(struct ia_css_stream *stream) {
+ia_css_stream_start(struct ia_css_stream *stream)
+{
 	int err = 0;
 
 	IA_CSS_ENTER("stream = %p", stream);
-	if ((!stream) || (!stream->last_pipe))
-	{
+	if ((!stream) || (!stream->last_pipe)) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
@@ -9821,8 +9690,7 @@ ia_css_stream_start(struct ia_css_stream *stream) {
 
 	/* Create host side pipeline. */
 	err = create_host_pipeline(stream);
-	if (err)
-	{
+	if (err) {
 		IA_CSS_LEAVE_ERR(err);
 		return err;
 	}
@@ -9835,8 +9703,7 @@ ia_css_stream_start(struct ia_css_stream *stream) {
 
 #if !defined(ISP2401)
 	/* Initialize mipi size checks */
-	if (stream->config.mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR)
-	{
+	if (stream->config.mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR) {
 		unsigned int idx;
 		unsigned int port = (unsigned int)(stream->config.source.port.port);
 
@@ -9847,8 +9714,7 @@ ia_css_stream_start(struct ia_css_stream *stream) {
 	}
 #endif
 
-	if (stream->config.mode != IA_CSS_INPUT_MODE_MEMORY)
-	{
+	if (stream->config.mode != IA_CSS_INPUT_MODE_MEMORY) {
 		err = sh_css_config_input_network(stream);
 		if (err)
 			return err;
@@ -9860,7 +9726,8 @@ ia_css_stream_start(struct ia_css_stream *stream) {
 }
 
 int
-ia_css_stream_stop(struct ia_css_stream *stream) {
+ia_css_stream_stop(struct ia_css_stream *stream)
+{
 	int err = 0;
 
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_stream_stop() enter/exit\n");
@@ -9871,22 +9738,19 @@ ia_css_stream_stop(struct ia_css_stream *stream) {
 
 #if !defined(ISP2401)
 	/* De-initialize mipi size checks */
-	if (stream->config.mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR)
-	{
+	if (stream->config.mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR) {
 		unsigned int idx;
 		unsigned int port = (unsigned int)(stream->config.source.port.port);
 
-		for (idx = 0; idx < IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT; idx++) {
+		for (idx = 0; idx < IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT; idx++)
 			sh_css_sp_group.config.mipi_sizes_for_check[port][idx] = 0;
-		}
 	}
 #endif
 
-	if (!IS_ISP2401) {
+	if (!IS_ISP2401)
 		err = ia_css_pipeline_request_stop(&stream->last_pipe->pipeline);
-	} else {
+	else
 		err = sh_css_pipes_stop(stream);
-	}
 
 	if (err)
 		return err;
@@ -9899,16 +9763,16 @@ ia_css_stream_stop(struct ia_css_stream *stream) {
 }
 
 bool
-ia_css_stream_has_stopped(struct ia_css_stream *stream) {
+ia_css_stream_has_stopped(struct ia_css_stream *stream)
+{
 	bool stopped;
 
 	assert(stream);
 
-	if (!IS_ISP2401) {
+	if (!IS_ISP2401)
 		stopped = ia_css_pipeline_has_stopped(&stream->last_pipe->pipeline);
-	} else {
+	else
 		stopped = sh_css_pipes_have_stopped(stream);
-	}
 
 	return stopped;
 }
@@ -9919,7 +9783,8 @@ ia_css_stream_has_stopped(struct ia_css_stream *stream) {
     * The stream handle is used to identify the correct entry in the css_save struct
     */
 int
-ia_css_stream_unload(struct ia_css_stream *stream) {
+ia_css_stream_unload(struct ia_css_stream *stream)
+{
 	int i;
 
 	assert(stream);
@@ -9927,8 +9792,7 @@ ia_css_stream_unload(struct ia_css_stream *stream) {
 	/* some checks */
 	assert(stream);
 	for (i = 0; i < MAX_ACTIVE_STREAMS; i++)
-		if (my_css_save.stream_seeds[i].stream == stream)
-		{
+		if (my_css_save.stream_seeds[i].stream == stream) {
 			int j;
 
 			ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,
@@ -9948,7 +9812,8 @@ ia_css_stream_unload(struct ia_css_stream *stream) {
 
 int
 ia_css_temp_pipe_to_pipe_id(const struct ia_css_pipe *pipe,
-			    enum ia_css_pipe_id *pipe_id) {
+			    enum ia_css_pipe_id *pipe_id)
+{
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_temp_pipe_to_pipe_id() enter/exit\n");
 	if (pipe)
 		*pipe_id = pipe->mode;
@@ -9959,18 +9824,21 @@ ia_css_temp_pipe_to_pipe_id(const struct ia_css_pipe *pipe,
 }
 
 enum atomisp_input_format
-ia_css_stream_get_format(const struct ia_css_stream *stream) {
+ia_css_stream_get_format(const struct ia_css_stream *stream)
+{
 	return stream->config.input_config.format;
 }
 
 bool
-ia_css_stream_get_two_pixels_per_clock(const struct ia_css_stream *stream) {
+ia_css_stream_get_two_pixels_per_clock(const struct ia_css_stream *stream)
+{
 	return (stream->config.pixels_per_clock == 2);
 }
 
 struct ia_css_binary *
 ia_css_stream_get_shading_correction_binary(const struct ia_css_stream
-	*stream) {
+	*stream)
+{
 	struct ia_css_pipe *pipe;
 
 	assert(stream);
@@ -9988,7 +9856,8 @@ ia_css_stream_get_shading_correction_binary(const struct ia_css_stream
 }
 
 struct ia_css_binary *
-ia_css_stream_get_dvs_binary(const struct ia_css_stream *stream) {
+ia_css_stream_get_dvs_binary(const struct ia_css_stream *stream)
+{
 	int i;
 	struct ia_css_pipe *video_pipe = NULL;
 
@@ -10007,7 +9876,8 @@ ia_css_stream_get_dvs_binary(const struct ia_css_stream *stream) {
 }
 
 struct ia_css_binary *
-ia_css_stream_get_3a_binary(const struct ia_css_stream *stream) {
+ia_css_stream_get_3a_binary(const struct ia_css_stream *stream)
+{
 	struct ia_css_pipe *pipe;
 	struct ia_css_binary *s3a_binary = NULL;
 
@@ -10029,7 +9899,8 @@ ia_css_stream_get_3a_binary(const struct ia_css_stream *stream) {
 
 int
 ia_css_stream_set_output_padded_width(struct ia_css_stream *stream,
-					unsigned int output_padded_width) {
+					unsigned int output_padded_width)
+{
 	struct ia_css_pipe *pipe;
 
 	assert(stream);
@@ -10046,7 +9917,8 @@ ia_css_stream_set_output_padded_width(struct ia_css_stream *stream,
 }
 
 static struct ia_css_binary *
-ia_css_pipe_get_shading_correction_binary(const struct ia_css_pipe *pipe) {
+ia_css_pipe_get_shading_correction_binary(const struct ia_css_pipe *pipe)
+{
 	struct ia_css_binary *binary = NULL;
 
 	assert(pipe);
@@ -10091,7 +9963,8 @@ ia_css_pipe_get_shading_correction_binary(const struct ia_css_pipe *pipe) {
 }
 
 static struct ia_css_binary *
-ia_css_pipe_get_s3a_binary(const struct ia_css_pipe *pipe) {
+ia_css_pipe_get_s3a_binary(const struct ia_css_pipe *pipe)
+{
 	struct ia_css_binary *binary = NULL;
 
 	assert(pipe);
@@ -10114,9 +9987,9 @@ ia_css_pipe_get_s3a_binary(const struct ia_css_pipe *pipe) {
 				}
 			}
 		} else if (pipe->config.default_capture_config.mode ==
-			    IA_CSS_CAPTURE_MODE_BAYER)
+			    IA_CSS_CAPTURE_MODE_BAYER) {
 			binary = (struct ia_css_binary *)&pipe->pipe_settings.capture.pre_isp_binary;
-		else if (pipe->config.default_capture_config.mode ==
+		} else if (pipe->config.default_capture_config.mode ==
 			    IA_CSS_CAPTURE_MODE_ADVANCED ||
 			    pipe->config.default_capture_config.mode == IA_CSS_CAPTURE_MODE_LOW_LIGHT) {
 			if (pipe->config.isp_pipe_version == IA_CSS_PIPE_VERSION_1)
@@ -10138,7 +10011,8 @@ ia_css_pipe_get_s3a_binary(const struct ia_css_pipe *pipe) {
 }
 
 static struct ia_css_binary *
-ia_css_pipe_get_sdis_binary(const struct ia_css_pipe *pipe) {
+ia_css_pipe_get_sdis_binary(const struct ia_css_pipe *pipe)
+{
 	struct ia_css_binary *binary = NULL;
 
 	assert(pipe);
@@ -10158,14 +10032,16 @@ ia_css_pipe_get_sdis_binary(const struct ia_css_pipe *pipe) {
 }
 
 struct ia_css_pipeline *
-ia_css_pipe_get_pipeline(const struct ia_css_pipe *pipe) {
+ia_css_pipe_get_pipeline(const struct ia_css_pipe *pipe)
+{
 	assert(pipe);
 
 	return (struct ia_css_pipeline *)&pipe->pipeline;
 }
 
 unsigned int
-ia_css_pipe_get_pipe_num(const struct ia_css_pipe *pipe) {
+ia_css_pipe_get_pipe_num(const struct ia_css_pipe *pipe)
+{
 	assert(pipe);
 
 	/* KW was not sure this function was not returning a value
@@ -10182,7 +10058,8 @@ ia_css_pipe_get_pipe_num(const struct ia_css_pipe *pipe) {
 }
 
 unsigned int
-ia_css_pipe_get_isp_pipe_version(const struct ia_css_pipe *pipe) {
+ia_css_pipe_get_isp_pipe_version(const struct ia_css_pipe *pipe)
+{
 	assert(pipe);
 
 	return (unsigned int)pipe->config.isp_pipe_version;
@@ -10191,7 +10068,8 @@ ia_css_pipe_get_isp_pipe_version(const struct ia_css_pipe *pipe) {
 #define SP_START_TIMEOUT_US 30000000
 
 int
-ia_css_start_sp(void) {
+ia_css_start_sp(void)
+{
 	unsigned long timeout;
 	int err = 0;
 
@@ -10200,13 +10078,11 @@ ia_css_start_sp(void) {
 
 	/* waiting for the SP is completely started */
 	timeout = SP_START_TIMEOUT_US;
-	while ((ia_css_spctrl_get_state(SP0_ID) != IA_CSS_SP_SW_INITIALIZED) && timeout)
-	{
+	while ((ia_css_spctrl_get_state(SP0_ID) != IA_CSS_SP_SW_INITIALIZED) && timeout) {
 		timeout--;
 		udelay(1);
 	}
-	if (timeout == 0)
-	{
+	if (timeout == 0) {
 		IA_CSS_ERROR("timeout during SP initialization");
 		return -EINVAL;
 	}
@@ -10234,14 +10110,14 @@ ia_css_start_sp(void) {
 #define SP_SHUTDOWN_TIMEOUT_US 200000
 
 int
-ia_css_stop_sp(void) {
+ia_css_stop_sp(void)
+{
 	unsigned long timeout;
 	int err = 0;
 
 	IA_CSS_ENTER("void");
 
-	if (!sh_css_sp_is_running())
-	{
+	if (!sh_css_sp_is_running()) {
 		err = -EINVAL;
 		IA_CSS_LEAVE("SP already stopped : return_err=%d", err);
 
@@ -10253,8 +10129,7 @@ ia_css_stop_sp(void) {
 	if (!IS_ISP2401) {
 		sh_css_write_host2sp_command(host2sp_cmd_terminate);
 	} else {
-		if (!sh_css_write_host2sp_command(host2sp_cmd_terminate))
-		{
+		if (!sh_css_write_host2sp_command(host2sp_cmd_terminate)) {
 			IA_CSS_ERROR("Call to 'sh-css_write_host2sp_command()' failed");
 			ia_css_debug_dump_sp_sw_debug_info();
 			ia_css_debug_dump_debug_info(NULL);
@@ -10264,27 +10139,23 @@ ia_css_stop_sp(void) {
 	sh_css_sp_set_sp_running(false);
 
 	timeout = SP_SHUTDOWN_TIMEOUT_US;
-	while (!ia_css_spctrl_is_idle(SP0_ID) && timeout)
-	{
+	while (!ia_css_spctrl_is_idle(SP0_ID) && timeout) {
 		timeout--;
 		udelay(1);
 	}
 	if ((ia_css_spctrl_get_state(SP0_ID) != IA_CSS_SP_SW_TERMINATED))
 		IA_CSS_WARNING("SP has not terminated (SW)");
 
-	if (timeout == 0)
-	{
+	if (timeout == 0) {
 		IA_CSS_WARNING("SP is not idle");
 		ia_css_debug_dump_sp_sw_debug_info();
 	}
 	timeout = SP_SHUTDOWN_TIMEOUT_US;
-	while (!isp_ctrl_getbit(ISP0_ID, ISP_SC_REG, ISP_IDLE_BIT) && timeout)
-	{
+	while (!isp_ctrl_getbit(ISP0_ID, ISP_SC_REG, ISP_IDLE_BIT) && timeout) {
 		timeout--;
 		udelay(1);
 	}
-	if (timeout == 0)
-	{
+	if (timeout == 0) {
 		IA_CSS_WARNING("ISP is not idle");
 		ia_css_debug_dump_sp_sw_debug_info();
 	}
@@ -10299,7 +10170,8 @@ ia_css_stop_sp(void) {
 }
 
 int
-ia_css_update_continuous_frames(struct ia_css_stream *stream) {
+ia_css_update_continuous_frames(struct ia_css_stream *stream)
+{
 	struct ia_css_pipe *pipe;
 	unsigned int i;
 
@@ -10307,8 +10179,7 @@ ia_css_update_continuous_frames(struct ia_css_stream *stream) {
 	    IA_CSS_DEBUG_TRACE,
 	    "sh_css_update_continuous_frames() enter:\n");
 
-	if (!stream)
-	{
+	if (!stream) {
 		ia_css_debug_dtrace(
 		    IA_CSS_DEBUG_TRACE,
 		    "sh_css_update_continuous_frames() leave: invalid stream, return_void\n");
@@ -10319,10 +10190,9 @@ ia_css_update_continuous_frames(struct ia_css_stream *stream) {
 
 	for (i = stream->config.init_num_cont_raw_buf;
 		i < stream->config.target_num_cont_raw_buf; i++)
-	{
 		sh_css_update_host2sp_offline_frame(i,
 						    pipe->continuous_frames[i], pipe->cont_md_buffers[i]);
-	}
+
 	sh_css_update_host2sp_cont_num_raw_frames
 	(stream->config.target_num_cont_raw_buf, true);
 	ia_css_debug_dtrace(
@@ -10448,7 +10318,8 @@ void ia_css_pipe_map_queue(struct ia_css_pipe *pipe, bool map)
 
 #if CONFIG_ON_FRAME_ENQUEUE()
 static int set_config_on_frame_enqueue(struct ia_css_frame_info
-	*info, struct frame_data_wrapper *frame) {
+	*info, struct frame_data_wrapper *frame)
+{
 	frame->config_on_frame_enqueue.padded_width = 0;
 
 	/* currently we support configuration on frame enqueue only on YUV formats */
@@ -10456,11 +10327,11 @@ static int set_config_on_frame_enqueue(struct ia_css_frame_info
 	switch (info->format) {
 	case IA_CSS_FRAME_FORMAT_YUV420:
 	case IA_CSS_FRAME_FORMAT_NV12:
-		if (info->padded_width > info->res.width) {
+		if (info->padded_width > info->res.width)
 			frame->config_on_frame_enqueue.padded_width = info->padded_width;
-		} else if ((info->padded_width < info->res.width) && (info->padded_width > 0)) {
+		else if ((info->padded_width < info->res.width) && (info->padded_width > 0))
 			return -EINVAL;
-		}
+
 		/* nothing to do if width == padded width or padded width is zeroed (the same) */
 		break;
 	default:
@@ -10472,22 +10343,21 @@ static int set_config_on_frame_enqueue(struct ia_css_frame_info
 #endif
 
 int
-ia_css_unlock_raw_frame(struct ia_css_stream *stream, uint32_t exp_id) {
+ia_css_unlock_raw_frame(struct ia_css_stream *stream, uint32_t exp_id)
+{
 	int ret;
 
 	IA_CSS_ENTER("");
 
 	/* Only continuous streams have a tagger to which we can send the
 	    * unlock message. */
-	if (!stream || !stream->config.continuous)
-	{
+	if (!stream || !stream->config.continuous) {
 		IA_CSS_ERROR("invalid stream pointer");
 		return -EINVAL;
 	}
 
 	if (exp_id > IA_CSS_ISYS_MAX_EXPOSURE_ID ||
-	    exp_id < IA_CSS_ISYS_MIN_EXPOSURE_ID)
-	{
+	    exp_id < IA_CSS_ISYS_MIN_EXPOSURE_ID) {
 		IA_CSS_ERROR("invalid exposure ID: %d\n", exp_id);
 		return -EINVAL;
 	}
@@ -10506,7 +10376,8 @@ ia_css_unlock_raw_frame(struct ia_css_stream *stream, uint32_t exp_id) {
     */
 int
 ia_css_pipe_set_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle,
-				bool enable) {
+				bool enable)
+{
 	unsigned int thread_id;
 	struct ia_css_pipeline_stage *stage;
 	int err = 0;
@@ -10514,20 +10385,16 @@ ia_css_pipe_set_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle,
 	IA_CSS_ENTER("");
 
 	/* Parameter Check */
-	if (!pipe || !pipe->stream)
-	{
+	if (!pipe || !pipe->stream) {
 		IA_CSS_ERROR("Invalid Pipe.");
 		err = -EINVAL;
-	} else if (!(pipe->config.acc_extension))
-	{
+	} else if (!(pipe->config.acc_extension)) {
 		IA_CSS_ERROR("Invalid Pipe(No Extension Firmware)");
 		err = -EINVAL;
-	} else if (!sh_css_sp_is_running())
-	{
+	} else if (!sh_css_sp_is_running()) {
 		IA_CSS_ERROR("Leaving: queue unavailable.");
 		err = -EBUSY;
-	} else
-	{
+	} else {
 		/* Query the threadid and stage_num for the Extension firmware*/
 		ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &thread_id);
 		err = ia_css_pipeline_get_stage_from_fw(&pipe->pipeline, fw_handle, &stage);
@@ -10555,7 +10422,8 @@ ia_css_pipe_set_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle,
     */
 int
 ia_css_pipe_get_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle,
-				bool *enable) {
+				bool *enable)
+{
 	struct ia_css_pipeline_stage *stage;
 	unsigned int thread_id;
 	int err = 0;
@@ -10563,20 +10431,16 @@ ia_css_pipe_get_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle,
 	IA_CSS_ENTER("");
 
 	/* Parameter Check */
-	if (!pipe || !pipe->stream)
-	{
+	if (!pipe || !pipe->stream) {
 		IA_CSS_ERROR("Invalid Pipe.");
 		err = -EINVAL;
-	} else if (!(pipe->config.acc_extension))
-	{
+	} else if (!(pipe->config.acc_extension)) {
 		IA_CSS_ERROR("Invalid Pipe (No Extension Firmware).");
 		err = -EINVAL;
-	} else if (!sh_css_sp_is_running())
-	{
+	} else if (!sh_css_sp_is_running()) {
 		IA_CSS_ERROR("Leaving: queue unavailable.");
 		err = -EBUSY;
-	} else
-	{
+	} else {
 		/* Query the threadid and stage_num corresponding to the Extension firmware*/
 		ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &thread_id);
 		err = ia_css_pipeline_get_stage_from_fw(&pipe->pipeline, fw_handle, &stage);
@@ -10596,7 +10460,8 @@ int
 ia_css_pipe_update_qos_ext_mapped_arg(struct ia_css_pipe *pipe,
 					u32 fw_handle,
 					struct ia_css_isp_param_css_segments *css_seg,
-					struct ia_css_isp_param_isp_segments *isp_seg) {
+					struct ia_css_isp_param_isp_segments *isp_seg)
+{
 	unsigned int HIVE_ADDR_sp_group;
 	static struct sh_css_sp_group sp_group;
 	static struct sh_css_sp_stage sp_stage;
@@ -10614,20 +10479,16 @@ ia_css_pipe_update_qos_ext_mapped_arg(struct ia_css_pipe *pipe,
 	fw = &sh_css_sp_fw;
 
 	/* Parameter Check */
-	if (!pipe || !pipe->stream)
-	{
+	if (!pipe || !pipe->stream) {
 		IA_CSS_ERROR("Invalid Pipe.");
 		err = -EINVAL;
-	} else if (!(pipe->config.acc_extension))
-	{
+	} else if (!(pipe->config.acc_extension)) {
 		IA_CSS_ERROR("Invalid Pipe (No Extension Firmware).");
 		err = -EINVAL;
-	} else if (!sh_css_sp_is_running())
-	{
+	} else if (!sh_css_sp_is_running()) {
 		IA_CSS_ERROR("Leaving: queue unavailable.");
 		err = -EBUSY;
-	} else
-	{
+	} else {
 		/* Query the thread_id and stage_num corresponding to the Extension firmware */
 		ia_css_pipeline_get_sp_thread_id(ia_css_pipe_get_pipe_num(pipe), &thread_id);
 		err = ia_css_pipeline_get_stage_from_fw(&pipe->pipeline, fw_handle, &stage);
@@ -10678,7 +10539,8 @@ ia_css_pipe_update_qos_ext_mapped_arg(struct ia_css_pipe *pipe,
 static int
 aspect_ratio_crop_init(struct ia_css_stream *curr_stream,
 			struct ia_css_pipe *pipes[],
-			bool *do_crop_status) {
+			bool *do_crop_status)
+{
 	int err = 0;
 	int i;
 	struct ia_css_pipe *curr_pipe;
@@ -10687,15 +10549,13 @@ aspect_ratio_crop_init(struct ia_css_stream *curr_stream,
 	if ((!curr_stream) ||
 	    (curr_stream->num_pipes == 0) ||
 	    (!pipes) ||
-	    (!do_crop_status))
-	{
+	    (!do_crop_status)) {
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR(err);
 		return err;
 	}
 
-	for (i = 0; i < curr_stream->num_pipes; i++)
-	{
+	for (i = 0; i < curr_stream->num_pipes; i++) {
 		curr_pipe = pipes[i];
 		pipe_mask |= (1 << curr_pipe->config.mode);
 	}
@@ -10709,7 +10569,8 @@ aspect_ratio_crop_init(struct ia_css_stream *curr_stream,
 }
 
 static bool
-aspect_ratio_crop_check(bool enabled, struct ia_css_pipe *curr_pipe) {
+aspect_ratio_crop_check(bool enabled, struct ia_css_pipe *curr_pipe)
+{
 	bool status = false;
 
 	if ((curr_pipe) && enabled) {
@@ -10724,7 +10585,8 @@ aspect_ratio_crop_check(bool enabled, struct ia_css_pipe *curr_pipe) {
 
 static int
 aspect_ratio_crop(struct ia_css_pipe *curr_pipe,
-		    struct ia_css_resolution *effective_res) {
+		    struct ia_css_resolution *effective_res)
+{
 	int err = 0;
 	struct ia_css_resolution crop_res;
 	struct ia_css_resolution *in_res = NULL;
@@ -10734,8 +10596,7 @@ aspect_ratio_crop(struct ia_css_pipe *curr_pipe,
 	bool use_capt_pp_in_res = false;
 
 	if ((!curr_pipe) ||
-	    (!effective_res))
-	{
+	    (!effective_res)) {
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR(err);
 		return err;
@@ -10743,8 +10604,7 @@ aspect_ratio_crop(struct ia_css_pipe *curr_pipe,
 
 	if ((curr_pipe->config.mode != IA_CSS_PIPE_MODE_PREVIEW) &&
 	    (curr_pipe->config.mode != IA_CSS_PIPE_MODE_VIDEO) &&
-	    (curr_pipe->config.mode != IA_CSS_PIPE_MODE_CAPTURE))
-	{
+	    (curr_pipe->config.mode != IA_CSS_PIPE_MODE_CAPTURE)) {
 		err = -EINVAL;
 		IA_CSS_LEAVE_ERR(err);
 		return err;
@@ -10765,8 +10625,7 @@ aspect_ratio_crop(struct ia_css_pipe *curr_pipe,
 	in_res = &curr_pipe->stream->config.input_config.effective_res;
 	out_res = &curr_pipe->output_info[0].res;
 
-	switch (curr_pipe->config.mode)
-	{
+	switch (curr_pipe->config.mode) {
 	case IA_CSS_PIPE_MODE_PREVIEW:
 		if (use_bds_output_info)
 			out_res = &curr_pipe->bds_output_info.res;
@@ -10793,20 +10652,19 @@ aspect_ratio_crop(struct ia_css_pipe *curr_pipe,
 
 	err = ia_css_frame_find_crop_resolution(in_res, out_res, &crop_res);
 	if (!err)
-	{
 		*effective_res = crop_res;
-	} else
-	{
+	else
 		/* in case of error fallback to default
 		    * effective resolution from driver. */
 		IA_CSS_LOG("ia_css_frame_find_crop_resolution() failed with err(%d)", err);
-	}
+
 	return err;
 }
 #endif
 
 static void
-sh_css_hmm_buffer_record_init(void) {
+sh_css_hmm_buffer_record_init(void)
+{
 	int i;
 
 	for (i = 0; i < MAX_HMM_BUFFER_NUM; i++)
@@ -10814,7 +10672,8 @@ sh_css_hmm_buffer_record_init(void) {
 }
 
 static void
-sh_css_hmm_buffer_record_uninit(void) {
+sh_css_hmm_buffer_record_uninit(void)
+{
 	int i;
 	struct sh_css_hmm_buffer_record *buffer_record = NULL;
 
@@ -10830,7 +10689,8 @@ sh_css_hmm_buffer_record_uninit(void) {
 }
 
 static void
-sh_css_hmm_buffer_record_reset(struct sh_css_hmm_buffer_record *buffer_record) {
+sh_css_hmm_buffer_record_reset(struct sh_css_hmm_buffer_record *buffer_record)
+{
 	assert(buffer_record);
 	buffer_record->in_use = false;
 	buffer_record->type = IA_CSS_BUFFER_TYPE_INVALID;
@@ -10841,7 +10701,8 @@ sh_css_hmm_buffer_record_reset(struct sh_css_hmm_buffer_record *buffer_record) {
 static struct sh_css_hmm_buffer_record
 *sh_css_hmm_buffer_record_acquire(struct ia_css_rmgr_vbuf_handle *h_vbuf,
 				    enum ia_css_buffer_type type,
-				    hrt_address kernel_ptr) {
+				    hrt_address kernel_ptr)
+{
 	int i;
 	struct sh_css_hmm_buffer_record *buffer_record = NULL;
 	struct sh_css_hmm_buffer_record *out_buffer_record = NULL;
@@ -10869,7 +10730,8 @@ static struct sh_css_hmm_buffer_record
 
 static struct sh_css_hmm_buffer_record
 *sh_css_hmm_buffer_record_validate(ia_css_ptr ddr_buffer_addr,
-				    enum ia_css_buffer_type type) {
+				    enum ia_css_buffer_type type)
+{
 	int i;
 	struct sh_css_hmm_buffer_record *buffer_record = NULL;
 	bool found_record = false;
-- 
GitLab


From dfdd8ceb6f748abe4848fe09dedc4e5fc4c5635b Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Mon, 19 Apr 2021 21:25:56 +0200
Subject: [PATCH 0993/3804] media: staging: media: atomisp: Remove all
 redundant assertions in sh_css.c

Remove the remainder of assert() in places where the condition is
already handled.

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/sh_css.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
index ac748da7a7ef5..8484c83ad29fa 100644
--- a/drivers/staging/media/atomisp/pci/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/sh_css.c
@@ -4119,9 +4119,6 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 	/* TODO: change next to correct pool for optimization */
 	ia_css_rmgr_acq_vbuf(hmm_buffer_pool, &h_vbuf);
 
-	assert(h_vbuf);
-	assert(h_vbuf->vptr != 0x0);
-
 	if ((!h_vbuf) || (h_vbuf->vptr == 0x0)) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
@@ -4921,7 +4918,6 @@ sh_css_pipes_stop(struct ia_css_stream *stream)
 	enum ia_css_pipe_id main_pipe_id;
 	int i;
 
-	assert(stream);
 	if (!stream) {
 		IA_CSS_LOG("stream does NOT exist!");
 		err = -EINVAL;
@@ -4929,7 +4925,6 @@ sh_css_pipes_stop(struct ia_css_stream *stream)
 	}
 
 	main_pipe = stream->last_pipe;
-	assert(main_pipe);
 	if (!main_pipe) {
 		IA_CSS_LOG("main_pipe does NOT exist!");
 		err = -EINVAL;
@@ -4987,7 +4982,6 @@ sh_css_pipes_stop(struct ia_css_stream *stream)
 			copy_pipe = main_pipe->pipe_settings.video.copy_pipe;
 
 		/* return the error code if "Copy Pipe" does NOT exist */
-		assert(copy_pipe);
 		if (!copy_pipe) {
 			IA_CSS_LOG("Copy Pipe does NOT exist!");
 			err = -EINVAL;
@@ -5022,7 +5016,6 @@ sh_css_pipes_have_stopped(struct ia_css_stream *stream)
 
 	int i;
 
-	assert(stream);
 	if (!stream) {
 		IA_CSS_LOG("stream does NOT exist!");
 		rval = false;
@@ -5030,7 +5023,6 @@ sh_css_pipes_have_stopped(struct ia_css_stream *stream)
 	}
 
 	main_pipe = stream->last_pipe;
-	assert(main_pipe);
 
 	if (!main_pipe) {
 		IA_CSS_LOG("main_pipe does NOT exist!");
@@ -5071,7 +5063,6 @@ sh_css_pipes_have_stopped(struct ia_css_stream *stream)
 			copy_pipe = main_pipe->pipe_settings.video.copy_pipe;
 
 		/* return if "Copy Pipe" does NOT exist */
-		assert(copy_pipe);
 		if (!copy_pipe) {
 			IA_CSS_LOG("Copy Pipe does NOT exist!");
 
@@ -8783,8 +8774,6 @@ ia_css_pipe_get_info(const struct ia_css_pipe *pipe,
 {
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,
 			    "ia_css_pipe_get_info()\n");
-
-	assert(pipe_info);
 	if (!pipe_info) {
 		ia_css_debug_dtrace(IA_CSS_DEBUG_ERROR,
 				    "ia_css_pipe_get_info: pipe_info cannot be NULL\n");
@@ -8923,7 +8912,6 @@ ia_css_acc_stream_create(struct ia_css_stream *stream)
 	int i;
 	int err = 0;
 
-	assert(stream);
 	IA_CSS_ENTER_PRIVATE("stream = %p", stream);
 
 	if (!stream) {
@@ -8934,7 +8922,6 @@ ia_css_acc_stream_create(struct ia_css_stream *stream)
 	for (i = 0;  i < stream->num_pipes; i++) {
 		struct ia_css_pipe *pipe = stream->pipes[i];
 
-		assert(pipe);
 		if (!pipe) {
 			IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 			return -EINVAL;
-- 
GitLab


From 9d634547323e7cb7dd11c68075e8128ce68544ae Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Mon, 19 Apr 2021 21:25:57 +0200
Subject: [PATCH 0994/3804] media: staging: media: atomisp: Remove a
 superfluous else clause in sh_css.c

Remove a superfluous else clause in ia_css_pipe_check_format()

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/sh_css.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
index 8484c83ad29fa..4d4f030e0fe03 100644
--- a/drivers/staging/media/atomisp/pci/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/sh_css.c
@@ -5247,11 +5247,9 @@ ia_css_pipe_check_format(struct ia_css_pipe *pipe,
 		IA_CSS_ERROR("Requested format is not supported by binary");
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
-	} else
-	{
-		IA_CSS_LEAVE_ERR_PRIVATE(0);
-		return 0;
 	}
+	IA_CSS_LEAVE_ERR_PRIVATE(0);
+	return 0;
 }
 
 static int load_video_binaries(struct ia_css_pipe *pipe)
-- 
GitLab


From d4bc34d18201120b247506b4a6ed17af694dfcf7 Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Mon, 19 Apr 2021 21:26:00 +0200
Subject: [PATCH 0995/3804] media: staging: media: atomisp: Replace if else
 clause with a ternary

Use the ternary operator for conditional variable assignment in
create_host_video_pipeline().

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/sh_css.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
index 4d4f030e0fe03..346f57147d90a 100644
--- a/drivers/staging/media/atomisp/pci/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/sh_css.c
@@ -3617,11 +3617,8 @@ static int create_host_video_pipeline(struct ia_css_pipe *pipe)
 		struct ia_css_frame *tmp_out_frame = NULL;
 
 		for (i = 0; i < num_yuv_scaler; i++) {
-			if (is_output_stage[i]) {
-				tmp_out_frame = out_frame;
-			} else {
-				tmp_out_frame = NULL;
-			}
+			tmp_out_frame = is_output_stage[i] ? out_frame : NULL;
+
 			err = add_yuv_scaler_stage(pipe, me, tmp_in_frame, tmp_out_frame,
 						   NULL,
 						   &yuv_scaler_binary[i],
-- 
GitLab


From 7796e455170efa1823457b17a292b1c65bb8c1e0 Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Fri, 23 Apr 2021 18:13:48 +0200
Subject: [PATCH 0996/3804] media: staging: media: atomisp: Fix alignment and
 line length issues

Fix alignment style issues and adjacent line length issues in sh_css.c

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/sh_css.c | 619 +++++++++++----------
 1 file changed, 333 insertions(+), 286 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
index 346f57147d90a..0482bfa2c490f 100644
--- a/drivers/staging/media/atomisp/pci/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/sh_css.c
@@ -239,8 +239,8 @@ ia_css_reset_defaults(struct sh_css *css);
 static void
 sh_css_init_host_sp_control_vars(void);
 
-static int set_num_primary_stages(unsigned int *num,
-	enum ia_css_pipe_version version);
+static int
+set_num_primary_stages(unsigned int *num, enum ia_css_pipe_version version);
 
 static bool
 need_capture_pp(const struct ia_css_pipe *pipe);
@@ -2996,9 +2996,8 @@ static int add_firmwares(
 
 		ia_css_pipe_get_firmwares_stage_desc(&stage_desc, binary,
 						     out, in, vf, fw, binary_mode);
-		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc,
-			&extra_stage);
+		err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+							   &extra_stage);
 		if (err)
 			return err;
 		if (fw->info.isp.sp.enable.output != 0)
@@ -3106,9 +3105,8 @@ static int add_yuv_scaler_stage(
 		ia_css_pipe_get_generic_stage_desc(&stage_desc,
 						   yuv_scaler_binary, out_frames, in_frame, vf_frame);
 	}
-	err = ia_css_pipeline_create_and_add_stage(me,
-		&stage_desc,
-		pre_vf_pp_stage);
+	err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+						   pre_vf_pp_stage);
 	if (err)
 		return err;
 	in_frame = (*pre_vf_pp_stage)->args.out_frame[0];
@@ -3166,9 +3164,8 @@ static int add_capture_pp_stage(
 		ia_css_pipe_get_generic_stage_desc(&stage_desc,
 						   capture_pp_binary, out_frames, NULL, vf_frame);
 	}
-	err = ia_css_pipeline_create_and_add_stage(me,
-		&stage_desc,
-		capture_pp_stage);
+	err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+						   capture_pp_stage);
 	if (err)
 		return err;
 	err = add_firmwares(me, capture_pp_binary, pipe->output_stage, last_fw,
@@ -3523,9 +3520,8 @@ static int create_host_video_pipeline(struct ia_css_pipe *pipe)
 		ia_css_pipe_util_set_output_frames(out_frames, 0, NULL);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, copy_binary,
 						   out_frames, NULL, NULL);
-		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc,
-			&copy_stage);
+		err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+							   &copy_stage);
 		if (err)
 			goto ERR;
 		in_frame = me->stages->args.out_frame[0];
@@ -3552,9 +3548,8 @@ static int create_host_video_pipeline(struct ia_css_pipe *pipe)
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, video_binary,
 						   out_frames, in_frame, vf_frame);
 	}
-	err = ia_css_pipeline_create_and_add_stage(me,
-		&stage_desc,
-		&video_stage);
+	err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+						   &video_stage);
 	if (err)
 		goto ERR;
 
@@ -3619,8 +3614,8 @@ static int create_host_video_pipeline(struct ia_css_pipe *pipe)
 		for (i = 0; i < num_yuv_scaler; i++) {
 			tmp_out_frame = is_output_stage[i] ? out_frame : NULL;
 
-			err = add_yuv_scaler_stage(pipe, me, tmp_in_frame, tmp_out_frame,
-						   NULL,
+			err = add_yuv_scaler_stage(pipe, me, tmp_in_frame,
+						   tmp_out_frame, NULL,
 						   &yuv_scaler_binary[i],
 						   &yuv_scaler_stage);
 
@@ -3759,9 +3754,8 @@ create_host_preview_pipeline(struct ia_css_pipe *pipe)
 		ia_css_pipe_util_set_output_frames(out_frames, 0, NULL);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, copy_binary,
 						   out_frames, NULL, NULL);
-		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc,
-			&copy_stage);
+		err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+							   &copy_stage);
 		if (err)
 			goto ERR;
 		in_frame = me->stages->args.out_frame[0];
@@ -3787,9 +3781,8 @@ create_host_preview_pipeline(struct ia_css_pipe *pipe)
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, preview_binary,
 						   out_frames, in_frame, NULL);
 	}
-	err = ia_css_pipeline_create_and_add_stage(me,
-		&stage_desc,
-		&preview_stage);
+	err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+						   &preview_stage);
 	if (err)
 		goto ERR;
 	/* If we use copy iso preview, the input must be yuv iso raw */
@@ -4122,8 +4115,8 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 	}
 
 	hmm_store(h_vbuf->vptr,
-		   (void *)(&ddr_buffer),
-		   sizeof(struct sh_css_hmm_buffer));
+		  (void *)(&ddr_buffer),
+		  sizeof(struct sh_css_hmm_buffer));
 	if ((buf_type == IA_CSS_BUFFER_TYPE_3A_STATISTICS)
 	    || (buf_type == IA_CSS_BUFFER_TYPE_DIS_STATISTICS)
 	    || (buf_type == IA_CSS_BUFFER_TYPE_LACE_STATISTICS)) {
@@ -4284,8 +4277,8 @@ ia_css_pipe_dequeue_buffer(struct ia_css_pipe *pipe,
 		}
 
 		hmm_load(ddr_buffer_addr,
-			  &ddr_buffer,
-			  sizeof(struct sh_css_hmm_buffer));
+			 &ddr_buffer,
+			 sizeof(struct sh_css_hmm_buffer));
 
 		/* if the kernel_ptr is 0 or an invalid, return an error.
 		 * do not access the buffer via the kernal_ptr.
@@ -4938,7 +4931,7 @@ sh_css_pipes_stop(struct ia_css_stream *stream)
 	for (i = 0; i < stream->num_pipes; i++) {
 		/* send the "stop" request to the "ia_css_pipe" instance */
 		IA_CSS_LOG("Send the stop-request to the pipe: pipe_id=%d",
-			stream->pipes[i]->pipeline.pipe_id);
+			   stream->pipes[i]->pipeline.pipe_id);
 		err = ia_css_pipeline_request_stop(&stream->pipes[i]->pipeline);
 
 		/*
@@ -4987,7 +4980,7 @@ sh_css_pipes_stop(struct ia_css_stream *stream)
 
 		/* send the "stop" request to "Copy Pipe" */
 		IA_CSS_LOG("Send the stop-request to the pipe: pipe_id=%d",
-			copy_pipe->pipeline.pipe_id);
+			   copy_pipe->pipeline.pipe_id);
 		err = ia_css_pipeline_request_stop(&copy_pipe->pipeline);
 	}
 
@@ -5814,7 +5807,7 @@ static bool need_capt_ldc(
 }
 
 static int set_num_primary_stages(unsigned int *num,
-	enum ia_css_pipe_version version)
+				  enum ia_css_pipe_version version)
 {
 	int err = 0;
 
@@ -6014,10 +6007,13 @@ static int load_primary_binaries(
 			capt_pp_in_info = &prim_out_info;
 
 		ia_css_pipe_get_capturepp_binarydesc(pipe,
-							&capture_pp_descr, capt_pp_in_info,
-							&capt_pp_out_info, &vf_info);
+						     &capture_pp_descr,
+						     capt_pp_in_info,
+						     &capt_pp_out_info,
+						     &vf_info);
+
 		err = ia_css_binary_find(&capture_pp_descr,
-					    &mycs->capture_pp_binary);
+					 &mycs->capture_pp_binary);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
@@ -6027,11 +6023,12 @@ static int load_primary_binaries(
 			struct ia_css_binary_descr capt_ldc_descr;
 
 			ia_css_pipe_get_ldc_binarydesc(pipe,
-							&capt_ldc_descr, &prim_out_info,
-							&capt_ldc_out_info);
+						       &capt_ldc_descr,
+						       &prim_out_info,
+						       &capt_ldc_out_info);
 
 			err = ia_css_binary_find(&capt_ldc_descr,
-						    &mycs->capture_ldc_binary);
+						 &mycs->capture_ldc_binary);
 			if (err) {
 				IA_CSS_LEAVE_ERR_PRIVATE(err);
 				return err;
@@ -6048,8 +6045,9 @@ static int load_primary_binaries(
 		if (pipe->enable_viewfinder[IA_CSS_PIPE_OUTPUT_STAGE_0] &&
 		    (i == mycs->num_primary_stage - 1))
 			local_vf_info = &vf_info;
-		ia_css_pipe_get_primary_binarydesc(pipe, &prim_descr[i], &prim_in_info,
-						    &prim_out_info, local_vf_info, i);
+		ia_css_pipe_get_primary_binarydesc(pipe, &prim_descr[i],
+						   &prim_in_info, &prim_out_info,
+						   local_vf_info, i);
 		err = ia_css_binary_find(&prim_descr[i], &mycs->primary_binary[i]);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
@@ -6101,8 +6099,8 @@ static int load_primary_binaries(
 	/* ISP Copy */
 	if (need_isp_copy_binary) {
 		err = load_copy_binary(pipe,
-					&mycs->copy_binary,
-					&mycs->primary_binary[0]);
+				       &mycs->copy_binary,
+				       &mycs->primary_binary[0]);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
@@ -6211,7 +6209,7 @@ static int load_advanced_binaries(
 
 	assert(pipe);
 	assert(pipe->mode == IA_CSS_PIPE_ID_CAPTURE ||
-		pipe->mode == IA_CSS_PIPE_ID_COPY);
+	       pipe->mode == IA_CSS_PIPE_ID_COPY);
 	if (pipe->pipe_settings.capture.pre_isp_binary.info)
 		return 0;
 	pipe_out_info = &pipe->output_info[0];
@@ -6224,17 +6222,18 @@ static int load_advanced_binaries(
 	need_pp = need_capture_pp(pipe);
 
 	ia_css_frame_info_set_format(&vf_info,
-					IA_CSS_FRAME_FORMAT_YUV_LINE);
+				     IA_CSS_FRAME_FORMAT_YUV_LINE);
 
 	/* we build up the pipeline starting at the end */
 	/* Capture post-processing */
 	if (need_pp) {
 		struct ia_css_binary_descr capture_pp_descr;
 
-		ia_css_pipe_get_capturepp_binarydesc(pipe,
-							&capture_pp_descr, &post_out_info, pipe_out_info, &vf_info);
+		ia_css_pipe_get_capturepp_binarydesc(pipe, &capture_pp_descr,
+						     &post_out_info,
+						     pipe_out_info, &vf_info);
 		err = ia_css_binary_find(&capture_pp_descr,
-					    &pipe->pipe_settings.capture.capture_pp_binary);
+					 &pipe->pipe_settings.capture.capture_pp_binary);
 		if (err)
 			return err;
 	} else {
@@ -6245,10 +6244,11 @@ static int load_advanced_binaries(
 	{
 		struct ia_css_binary_descr post_gdc_descr;
 
-		ia_css_pipe_get_post_gdc_binarydesc(pipe,
-						    &post_gdc_descr, &post_in_info, &post_out_info, &vf_info);
+		ia_css_pipe_get_post_gdc_binarydesc(pipe, &post_gdc_descr,
+						    &post_in_info,
+						    &post_out_info, &vf_info);
 		err = ia_css_binary_find(&post_gdc_descr,
-					    &pipe->pipe_settings.capture.post_isp_binary);
+					 &pipe->pipe_settings.capture.post_isp_binary);
 		if (err)
 			return err;
 	}
@@ -6258,9 +6258,9 @@ static int load_advanced_binaries(
 		struct ia_css_binary_descr gdc_descr;
 
 		ia_css_pipe_get_gdc_binarydesc(pipe, &gdc_descr, &gdc_in_info,
-						&pipe->pipe_settings.capture.post_isp_binary.in_frame_info);
+					       &pipe->pipe_settings.capture.post_isp_binary.in_frame_info);
 		err = ia_css_binary_find(&gdc_descr,
-					    &pipe->pipe_settings.capture.anr_gdc_binary);
+					 &pipe->pipe_settings.capture.anr_gdc_binary);
 		if (err)
 			return err;
 	}
@@ -6272,9 +6272,9 @@ static int load_advanced_binaries(
 		struct ia_css_binary_descr pre_gdc_descr;
 
 		ia_css_pipe_get_pre_gdc_binarydesc(pipe, &pre_gdc_descr, &pre_in_info,
-						    &pipe->pipe_settings.capture.anr_gdc_binary.in_frame_info);
+						   &pipe->pipe_settings.capture.anr_gdc_binary.in_frame_info);
 		err = ia_css_binary_find(&pre_gdc_descr,
-					    &pipe->pipe_settings.capture.pre_isp_binary);
+					 &pipe->pipe_settings.capture.pre_isp_binary);
 		if (err)
 			return err;
 	}
@@ -6296,7 +6296,7 @@ static int load_advanced_binaries(
 		ia_css_pipe_get_vfpp_binarydesc(pipe,
 						&vf_pp_descr, vf_pp_in_info, pipe_vf_out_info);
 		err = ia_css_binary_find(&vf_pp_descr,
-					    &pipe->pipe_settings.capture.vf_pp_binary);
+					 &pipe->pipe_settings.capture.vf_pp_binary);
 		if (err)
 			return err;
 	}
@@ -6308,8 +6308,8 @@ static int load_advanced_binaries(
 #endif
 	if (need_isp_copy)
 		load_copy_binary(pipe,
-				    &pipe->pipe_settings.capture.copy_binary,
-				    &pipe->pipe_settings.capture.pre_isp_binary);
+				 &pipe->pipe_settings.capture.copy_binary,
+				 &pipe->pipe_settings.capture.pre_isp_binary);
 
 	return err;
 }
@@ -6324,7 +6324,7 @@ static int load_bayer_isp_binaries(
 	IA_CSS_ENTER_PRIVATE("");
 	assert(pipe);
 	assert(pipe->mode == IA_CSS_PIPE_ID_CAPTURE ||
-		pipe->mode == IA_CSS_PIPE_ID_COPY);
+	       pipe->mode == IA_CSS_PIPE_ID_COPY);
 	pipe_out_info = &pipe->output_info[0];
 
 	if (pipe->pipe_settings.capture.pre_isp_binary.info)
@@ -6335,11 +6335,11 @@ static int load_bayer_isp_binaries(
 		return err;
 
 	ia_css_pipe_get_pre_de_binarydesc(pipe, &pre_de_descr,
-					    &pre_isp_in_info,
-					    pipe_out_info);
+					  &pre_isp_in_info,
+					  pipe_out_info);
 
 	err = ia_css_binary_find(&pre_de_descr,
-				    &pipe->pipe_settings.capture.pre_isp_binary);
+				 &pipe->pipe_settings.capture.pre_isp_binary);
 
 	return err;
 }
@@ -6358,7 +6358,7 @@ static int load_low_light_binaries(
 	IA_CSS_ENTER_PRIVATE("");
 	assert(pipe);
 	assert(pipe->mode == IA_CSS_PIPE_ID_CAPTURE ||
-		pipe->mode == IA_CSS_PIPE_ID_COPY);
+	       pipe->mode == IA_CSS_PIPE_ID_COPY);
 
 	if (pipe->pipe_settings.capture.pre_isp_binary.info)
 		return 0;
@@ -6373,17 +6373,18 @@ static int load_low_light_binaries(
 	need_pp = need_capture_pp(pipe);
 
 	ia_css_frame_info_set_format(&vf_info,
-					IA_CSS_FRAME_FORMAT_YUV_LINE);
+				     IA_CSS_FRAME_FORMAT_YUV_LINE);
 
 	/* we build up the pipeline starting at the end */
 	/* Capture post-processing */
 	if (need_pp) {
 		struct ia_css_binary_descr capture_pp_descr;
 
-		ia_css_pipe_get_capturepp_binarydesc(pipe,
-							&capture_pp_descr, &post_out_info, pipe_out_info, &vf_info);
+		ia_css_pipe_get_capturepp_binarydesc(pipe, &capture_pp_descr,
+						     &post_out_info,
+						     pipe_out_info, &vf_info);
 		err = ia_css_binary_find(&capture_pp_descr,
-					    &pipe->pipe_settings.capture.capture_pp_binary);
+					 &pipe->pipe_settings.capture.capture_pp_binary);
 		if (err)
 			return err;
 	} else {
@@ -6397,7 +6398,7 @@ static int load_low_light_binaries(
 		ia_css_pipe_get_post_anr_binarydesc(pipe,
 						    &post_anr_descr, &post_in_info, &post_out_info, &vf_info);
 		err = ia_css_binary_find(&post_anr_descr,
-					    &pipe->pipe_settings.capture.post_isp_binary);
+					 &pipe->pipe_settings.capture.post_isp_binary);
 		if (err)
 			return err;
 	}
@@ -6407,9 +6408,9 @@ static int load_low_light_binaries(
 		struct ia_css_binary_descr anr_descr;
 
 		ia_css_pipe_get_anr_binarydesc(pipe, &anr_descr, &anr_in_info,
-						&pipe->pipe_settings.capture.post_isp_binary.in_frame_info);
+					       &pipe->pipe_settings.capture.post_isp_binary.in_frame_info);
 		err = ia_css_binary_find(&anr_descr,
-					    &pipe->pipe_settings.capture.anr_gdc_binary);
+					 &pipe->pipe_settings.capture.anr_gdc_binary);
 		if (err)
 			return err;
 	}
@@ -6421,9 +6422,9 @@ static int load_low_light_binaries(
 		struct ia_css_binary_descr pre_anr_descr;
 
 		ia_css_pipe_get_pre_anr_binarydesc(pipe, &pre_anr_descr, &pre_in_info,
-						    &pipe->pipe_settings.capture.anr_gdc_binary.in_frame_info);
+						   &pipe->pipe_settings.capture.anr_gdc_binary.in_frame_info);
 		err = ia_css_binary_find(&pre_anr_descr,
-					    &pipe->pipe_settings.capture.pre_isp_binary);
+					 &pipe->pipe_settings.capture.pre_isp_binary);
 		if (err)
 			return err;
 	}
@@ -6442,10 +6443,10 @@ static int load_low_light_binaries(
 	{
 		struct ia_css_binary_descr vf_pp_descr;
 
-		ia_css_pipe_get_vfpp_binarydesc(pipe,
-						&vf_pp_descr, vf_pp_in_info, pipe_vf_out_info);
+		ia_css_pipe_get_vfpp_binarydesc(pipe, &vf_pp_descr,
+						vf_pp_in_info, pipe_vf_out_info);
 		err = ia_css_binary_find(&vf_pp_descr,
-					    &pipe->pipe_settings.capture.vf_pp_binary);
+					 &pipe->pipe_settings.capture.vf_pp_binary);
 		if (err)
 			return err;
 	}
@@ -6457,8 +6458,8 @@ static int load_low_light_binaries(
 #endif
 	if (need_isp_copy)
 		err = load_copy_binary(pipe,
-					&pipe->pipe_settings.capture.copy_binary,
-					&pipe->pipe_settings.capture.pre_isp_binary);
+				       &pipe->pipe_settings.capture.copy_binary,
+				       &pipe->pipe_settings.capture.pre_isp_binary);
 
 	return err;
 }
@@ -6492,7 +6493,7 @@ static int load_capture_binaries(
 	IA_CSS_ENTER_PRIVATE("");
 	assert(pipe);
 	assert(pipe->mode == IA_CSS_PIPE_ID_CAPTURE ||
-		pipe->mode == IA_CSS_PIPE_ID_COPY);
+	       pipe->mode == IA_CSS_PIPE_ID_COPY);
 
 	if (pipe->pipe_settings.capture.primary_binary[0].info) {
 		IA_CSS_LEAVE_ERR_PRIVATE(0);
@@ -6587,7 +6588,7 @@ unload_capture_binaries(struct ia_css_pipe *pipe)
 
 static bool
 need_downscaling(const struct ia_css_resolution in_res,
-		    const struct ia_css_resolution out_res)
+		 const struct ia_css_resolution out_res)
 {
 	if (in_res.width > out_res.width || in_res.height > out_res.height)
 		return true;
@@ -6658,9 +6659,9 @@ static int ia_css_pipe_create_cas_scaler_desc_single_output(
 	descr->num_output_stage = 1;
 
 	hor_ds_factor = CEIL_DIV(cas_scaler_in_info->res.width,
-				    cas_scaler_out_info->res.width);
+				 cas_scaler_out_info->res.width);
 	ver_ds_factor = CEIL_DIV(cas_scaler_in_info->res.height,
-				    cas_scaler_out_info->res.height);
+				 cas_scaler_out_info->res.height);
 	/* use the same horizontal and vertical downscaling factor for simplicity */
 	assert(hor_ds_factor == ver_ds_factor);
 
@@ -6670,31 +6671,36 @@ static int ia_css_pipe_create_cas_scaler_desc_single_output(
 		i *= max_scale_factor_per_stage;
 	}
 
-	descr->in_info = kmalloc(descr->num_stage * sizeof(struct ia_css_frame_info),
-				    GFP_KERNEL);
+	descr->in_info = kmalloc(descr->num_stage *
+				 sizeof(struct ia_css_frame_info),
+				 GFP_KERNEL);
 	if (!descr->in_info) {
 		err = -ENOMEM;
 		goto ERR;
 	}
-	descr->internal_out_info = kmalloc(descr->num_stage * sizeof(
-						struct ia_css_frame_info), GFP_KERNEL);
+	descr->internal_out_info = kmalloc(descr->num_stage *
+					   sizeof(struct ia_css_frame_info),
+					   GFP_KERNEL);
 	if (!descr->internal_out_info) {
 		err = -ENOMEM;
 		goto ERR;
 	}
-	descr->out_info = kmalloc(descr->num_stage * sizeof(struct ia_css_frame_info),
-				    GFP_KERNEL);
+	descr->out_info = kmalloc(descr->num_stage *
+				  sizeof(struct ia_css_frame_info),
+				  GFP_KERNEL);
 	if (!descr->out_info) {
 		err = -ENOMEM;
 		goto ERR;
 	}
-	descr->vf_info = kmalloc(descr->num_stage * sizeof(struct ia_css_frame_info),
-				    GFP_KERNEL);
+	descr->vf_info = kmalloc(descr->num_stage *
+				 sizeof(struct ia_css_frame_info),
+				 GFP_KERNEL);
 	if (!descr->vf_info) {
 		err = -ENOMEM;
 		goto ERR;
 	}
-	descr->is_output_stage = kmalloc(descr->num_stage * sizeof(bool), GFP_KERNEL);
+	descr->is_output_stage = kmalloc(descr->num_stage * sizeof(bool),
+					 GFP_KERNEL);
 	if (!descr->is_output_stage) {
 		err = -ENOMEM;
 		goto ERR;
@@ -6738,9 +6744,9 @@ static int ia_css_pipe_create_cas_scaler_desc_single_output(
 				max_scale_factor_per_stage;
 			descr->internal_out_info[i].format = IA_CSS_FRAME_FORMAT_YUV420;
 			ia_css_frame_info_init(&descr->internal_out_info[i],
-						tmp_in_info.res.width / max_scale_factor_per_stage,
-						tmp_in_info.res.height / max_scale_factor_per_stage,
-						IA_CSS_FRAME_FORMAT_YUV420, 0);
+					       tmp_in_info.res.width / max_scale_factor_per_stage,
+					       tmp_in_info.res.height / max_scale_factor_per_stage,
+					       IA_CSS_FRAME_FORMAT_YUV420, 0);
 			descr->out_info[i].res.width = 0;
 			descr->out_info[i].res.height = 0;
 			descr->vf_info[i].res.width = 0;
@@ -6816,30 +6822,35 @@ static int ia_css_pipe_create_cas_scaler_desc(
 	descr->num_stage = num_stages;
 
 	descr->in_info = kmalloc_array(descr->num_stage,
-					sizeof(struct ia_css_frame_info), GFP_KERNEL);
+				       sizeof(struct ia_css_frame_info),
+				       GFP_KERNEL);
 	if (!descr->in_info) {
 		err = -ENOMEM;
 		goto ERR;
 	}
-	descr->internal_out_info = kmalloc(descr->num_stage * sizeof(
-						struct ia_css_frame_info), GFP_KERNEL);
+	descr->internal_out_info = kmalloc(descr->num_stage *
+					   sizeof(struct ia_css_frame_info),
+					   GFP_KERNEL);
 	if (!descr->internal_out_info) {
 		err = -ENOMEM;
 		goto ERR;
 	}
-	descr->out_info = kmalloc(descr->num_stage * sizeof(struct ia_css_frame_info),
-				    GFP_KERNEL);
+	descr->out_info = kmalloc(descr->num_stage *
+				  sizeof(struct ia_css_frame_info),
+				  GFP_KERNEL);
 	if (!descr->out_info) {
 		err = -ENOMEM;
 		goto ERR;
 	}
-	descr->vf_info = kmalloc(descr->num_stage * sizeof(struct ia_css_frame_info),
-				    GFP_KERNEL);
+	descr->vf_info = kmalloc(descr->num_stage *
+				 sizeof(struct ia_css_frame_info),
+				 GFP_KERNEL);
 	if (!descr->vf_info) {
 		err = -ENOMEM;
 		goto ERR;
 	}
-	descr->is_output_stage = kmalloc(descr->num_stage * sizeof(bool), GFP_KERNEL);
+	descr->is_output_stage = kmalloc(descr->num_stage * sizeof(bool),
+					 GFP_KERNEL);
 	if (!descr->is_output_stage) {
 		err = -ENOMEM;
 		goto ERR;
@@ -6849,7 +6860,7 @@ static int ia_css_pipe_create_cas_scaler_desc(
 		if (out_info[i]) {
 			if (i > 0) {
 				assert((out_info[i - 1]->res.width >= out_info[i]->res.width) &&
-					(out_info[i - 1]->res.height >= out_info[i]->res.height));
+				       (out_info[i - 1]->res.height >= out_info[i]->res.height));
 			}
 		}
 	}
@@ -6897,9 +6908,9 @@ static int ia_css_pipe_create_cas_scaler_desc(
 				max_scale_factor_per_stage;
 			descr->internal_out_info[i].format = IA_CSS_FRAME_FORMAT_YUV420;
 			ia_css_frame_info_init(&descr->internal_out_info[i],
-						tmp_in_info.res.width / max_scale_factor_per_stage,
-						tmp_in_info.res.height / max_scale_factor_per_stage,
-						IA_CSS_FRAME_FORMAT_YUV420, 0);
+					       tmp_in_info.res.width / max_scale_factor_per_stage,
+					       tmp_in_info.res.height / max_scale_factor_per_stage,
+					       IA_CSS_FRAME_FORMAT_YUV420, 0);
 			descr->out_info[i].res.width = 0;
 			descr->out_info[i].res.height = 0;
 			descr->vf_info[i].res.width = 0;
@@ -6978,13 +6989,14 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
 		struct ia_css_binary_descr yuv_scaler_descr;
 
 		err = ia_css_pipe_create_cas_scaler_desc(pipe,
-			&cas_scaler_descr);
+							 &cas_scaler_descr);
 		if (err)
 			goto ERR;
 		mycs->num_output = cas_scaler_descr.num_output_stage;
 		mycs->num_yuv_scaler = cas_scaler_descr.num_stage;
 		mycs->yuv_scaler_binary = kzalloc(cas_scaler_descr.num_stage *
-						    sizeof(struct ia_css_binary), GFP_KERNEL);
+						  sizeof(struct ia_css_binary),
+						  GFP_KERNEL);
 		if (!mycs->yuv_scaler_binary) {
 			err = -ENOMEM;
 			goto ERR;
@@ -6998,12 +7010,13 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
 		for (i = 0; i < cas_scaler_descr.num_stage; i++) {
 			mycs->is_output_stage[i] = cas_scaler_descr.is_output_stage[i];
 			ia_css_pipe_get_yuvscaler_binarydesc(pipe,
-								&yuv_scaler_descr, &cas_scaler_descr.in_info[i],
-								&cas_scaler_descr.out_info[i],
-								&cas_scaler_descr.internal_out_info[i],
-								&cas_scaler_descr.vf_info[i]);
+							     &yuv_scaler_descr,
+							     &cas_scaler_descr.in_info[i],
+							     &cas_scaler_descr.out_info[i],
+							     &cas_scaler_descr.internal_out_info[i],
+							     &cas_scaler_descr.vf_info[i]);
 			err = ia_css_binary_find(&yuv_scaler_descr,
-						    &mycs->yuv_scaler_binary[i]);
+						 &mycs->yuv_scaler_binary[i]);
 			if (err)
 				goto ERR;
 		}
@@ -7043,8 +7056,8 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
 
 	if (need_isp_copy_binary) {
 		err = load_copy_binary(pipe,
-					&mycs->copy_binary,
-					next_binary);
+				       &mycs->copy_binary,
+				       next_binary);
 
 		if (err)
 			goto ERR;
@@ -7089,8 +7102,9 @@ load_yuvpp_binaries(struct ia_css_pipe *pipe)
 
 		mycs->num_vf_pp = 1;
 	}
-	mycs->vf_pp_binary = kzalloc(mycs->num_vf_pp * sizeof(struct ia_css_binary),
-					GFP_KERNEL);
+	mycs->vf_pp_binary = kzalloc(mycs->num_vf_pp *
+				     sizeof(struct ia_css_binary),
+				     GFP_KERNEL);
 	if (!mycs->vf_pp_binary) {
 		err = -ENOMEM;
 		goto ERR;
@@ -7419,18 +7433,26 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 #endif
 
 		if (need_scaler) {
-			ia_css_pipe_util_set_output_frames(bin_out_frame, 0, NULL);
-			ia_css_pipe_get_generic_stage_desc(&stage_desc, copy_binary,
-							    bin_out_frame, in_frame_local, NULL);
+			ia_css_pipe_util_set_output_frames(bin_out_frame,
+							   0, NULL);
+			ia_css_pipe_get_generic_stage_desc(&stage_desc,
+							   copy_binary,
+							   bin_out_frame,
+							   in_frame_local,
+							   NULL);
 		} else {
-			ia_css_pipe_util_set_output_frames(bin_out_frame, 0, out_frame[0]);
-			ia_css_pipe_get_generic_stage_desc(&stage_desc, copy_binary,
-							    bin_out_frame, in_frame_local, NULL);
+			ia_css_pipe_util_set_output_frames(bin_out_frame,
+							   0, out_frame[0]);
+			ia_css_pipe_get_generic_stage_desc(&stage_desc,
+							   copy_binary,
+							   bin_out_frame,
+							   in_frame_local,
+							   NULL);
 		}
 
 		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc,
-			&copy_stage);
+							   &stage_desc,
+							   &copy_stage);
 
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
@@ -7462,10 +7484,11 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 				tmp_vf_frame = NULL;
 			}
 
-			err = add_yuv_scaler_stage(pipe, me, tmp_in_frame, tmp_out_frame,
-						    NULL,
-						    &yuv_scaler_binary[i],
-						    &yuv_scaler_stage);
+			err = add_yuv_scaler_stage(pipe, me, tmp_in_frame,
+						   tmp_out_frame,
+						   NULL,
+						   &yuv_scaler_binary[i],
+						   &yuv_scaler_stage);
 
 			if (err) {
 				IA_CSS_LEAVE_ERR_PRIVATE(err);
@@ -7476,8 +7499,10 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 			if (pipe->pipe_settings.yuvpp.is_output_stage[i]) {
 				if (tmp_vf_frame && (tmp_vf_frame->info.res.width != 0)) {
 					in_frame = yuv_scaler_stage->args.out_vf_frame;
-					err = add_vf_pp_stage(pipe, in_frame, tmp_vf_frame, &vf_pp_binary[j],
-								&vf_pp_stage);
+					err = add_vf_pp_stage(pipe, in_frame,
+							      tmp_vf_frame,
+							      &vf_pp_binary[j],
+							      &vf_pp_stage);
 
 					if (err) {
 						IA_CSS_LEAVE_ERR_PRIVATE(err);
@@ -7490,8 +7515,8 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 	} else if (copy_stage) {
 		if (vf_frame[0] && vf_frame[0]->info.res.width != 0) {
 			in_frame = copy_stage->args.out_vf_frame;
-			err = add_vf_pp_stage(pipe, in_frame, vf_frame[0], &vf_pp_binary[0],
-						&vf_pp_stage);
+			err = add_vf_pp_stage(pipe, in_frame, vf_frame[0],
+					      &vf_pp_binary[0], &vf_pp_stage);
 		}
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
@@ -7499,7 +7524,8 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 		}
 	}
 
-	ia_css_pipeline_finalize_stages(&pipe->pipeline, pipe->stream->config.continuous);
+	ia_css_pipeline_finalize_stages(&pipe->pipeline,
+					pipe->stream->config.continuous);
 
 	IA_CSS_LEAVE_ERR_PRIVATE(0);
 
@@ -7508,8 +7534,8 @@ create_host_yuvpp_pipeline(struct ia_css_pipe *pipe)
 
 static int
 create_host_copy_pipeline(struct ia_css_pipe *pipe,
-			    unsigned int max_input_width,
-			    struct ia_css_frame *out_frame)
+			  unsigned int max_input_width,
+			  struct ia_css_frame *out_frame)
 {
 	struct ia_css_pipeline *me;
 	int err = 0;
@@ -7528,12 +7554,8 @@ create_host_copy_pipeline(struct ia_css_pipe *pipe,
 
 	if (copy_on_sp(pipe) &&
 	    pipe->stream->config.input_config.format == ATOMISP_INPUT_FORMAT_BINARY_8) {
-		ia_css_frame_info_init(
-		    &out_frame->info,
-		    JPEG_BYTES,
-		    1,
-		    IA_CSS_FRAME_FORMAT_BINARY_8,
-		    0);
+		ia_css_frame_info_init(&out_frame->info, JPEG_BYTES, 1,
+				       IA_CSS_FRAME_FORMAT_BINARY_8, 0);
 	} else if (out_frame->info.format == IA_CSS_FRAME_FORMAT_RAW) {
 		out_frame->info.raw_bit_depth =
 		ia_css_pipe_util_pipe_input_format_bpp(pipe);
@@ -7544,12 +7566,12 @@ create_host_copy_pipeline(struct ia_css_pipe *pipe,
 	pipe->mode  = IA_CSS_PIPE_ID_COPY;
 
 	ia_css_pipe_get_sp_func_stage_desc(&stage_desc, out_frame,
-					    IA_CSS_PIPELINE_RAW_COPY, max_input_width);
-	err = ia_css_pipeline_create_and_add_stage(me,
-		&stage_desc,
-		NULL);
+					   IA_CSS_PIPELINE_RAW_COPY,
+					   max_input_width);
+	err = ia_css_pipeline_create_and_add_stage(me, &stage_desc, NULL);
 
-	ia_css_pipeline_finalize_stages(&pipe->pipeline, pipe->stream->config.continuous);
+	ia_css_pipeline_finalize_stages(&pipe->pipeline,
+					pipe->stream->config.continuous);
 
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE,
 			    "create_host_copy_pipeline() leave:\n");
@@ -7588,9 +7610,10 @@ create_host_isyscopy_capture_pipeline(struct ia_css_pipe *pipe)
 	me->pipe_id = IA_CSS_PIPE_ID_CAPTURE;
 	pipe->mode  = IA_CSS_PIPE_ID_CAPTURE;
 	ia_css_pipe_get_sp_func_stage_desc(&stage_desc, out_frame,
-					    IA_CSS_PIPELINE_ISYS_COPY, max_input_width);
+					   IA_CSS_PIPELINE_ISYS_COPY,
+					   max_input_width);
 	err = ia_css_pipeline_create_and_add_stage(me,
-		&stage_desc, &out_stage);
+						   &stage_desc, &out_stage);
 	if (err)
 		return err;
 
@@ -7642,7 +7665,8 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 	IA_CSS_ENTER_PRIVATE("");
 	assert(pipe);
 	assert(pipe->stream);
-	assert(pipe->mode == IA_CSS_PIPE_ID_CAPTURE || pipe->mode == IA_CSS_PIPE_ID_COPY);
+	assert(pipe->mode == IA_CSS_PIPE_ID_CAPTURE ||
+	       pipe->mode == IA_CSS_PIPE_ID_COPY);
 
 	me = &pipe->pipeline;
 	mode = pipe->config.default_capture_config.mode;
@@ -7732,26 +7756,37 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 			ia_css_pipe_util_set_output_frames(out_frames, 0, out_frame);
 #if defined(ISP2401)
 			if (!continuous) {
-				ia_css_pipe_get_generic_stage_desc(&stage_desc, copy_binary,
-								    out_frames, in_frame, NULL);
+				ia_css_pipe_get_generic_stage_desc(&stage_desc,
+								   copy_binary,
+								   out_frames,
+								   in_frame,
+								   NULL);
 			} else {
 				in_frame = pipe->stream->last_pipe->continuous_frames[0];
-				ia_css_pipe_get_generic_stage_desc(&stage_desc, copy_binary,
-								    out_frames, in_frame, NULL);
+				ia_css_pipe_get_generic_stage_desc(&stage_desc,
+								   copy_binary,
+								   out_frames,
+								   in_frame,
+								   NULL);
 			}
 #else
-			ia_css_pipe_get_generic_stage_desc(&stage_desc, copy_binary,
-							    out_frames, NULL, NULL);
+			ia_css_pipe_get_generic_stage_desc(&stage_desc,
+							   copy_binary,
+							   out_frames,
+							   NULL, NULL);
 #endif
 		} else {
-			ia_css_pipe_util_set_output_frames(out_frames, 0, in_frame);
-			ia_css_pipe_get_generic_stage_desc(&stage_desc, copy_binary,
-							    out_frames, NULL, NULL);
+			ia_css_pipe_util_set_output_frames(out_frames, 0,
+							   in_frame);
+			ia_css_pipe_get_generic_stage_desc(&stage_desc,
+							   copy_binary,
+							   out_frames,
+							   NULL, NULL);
 		}
 
 		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc,
-			&current_stage);
+							   &stage_desc,
+							   &current_stage);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
@@ -7788,11 +7823,14 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 			    * Proper investigation should be done to come up with the clean
 			    * solution.
 			    * */
-			ia_css_pipe_get_generic_stage_desc(&stage_desc, primary_binary[i],
-							    out_frames, local_in_frame, NULL);
+			ia_css_pipe_get_generic_stage_desc(&stage_desc,
+							   primary_binary[i],
+							   out_frames,
+							   local_in_frame,
+							   NULL);
 			err = ia_css_pipeline_create_and_add_stage(me,
-				&stage_desc,
-				&current_stage);
+								   &stage_desc,
+								   &current_stage);
 			if (err) {
 				IA_CSS_LEAVE_ERR_PRIVATE(err);
 				return err;
@@ -7808,18 +7846,18 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 		    mode == IA_CSS_CAPTURE_MODE_LOW_LIGHT) {
 		ia_css_pipe_util_set_output_frames(out_frames, 0, NULL);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, pre_isp_binary,
-						    out_frames, in_frame, NULL);
-		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc, NULL);
+						   out_frames, in_frame, NULL);
+		err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+							   NULL);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
 		}
 		ia_css_pipe_util_set_output_frames(out_frames, 0, NULL);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, anr_gdc_binary,
-						    out_frames, NULL, NULL);
-		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc, NULL);
+						   out_frames, NULL, NULL);
+		err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+							   NULL);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
@@ -7827,16 +7865,21 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 
 		if (need_pp) {
 			ia_css_pipe_util_set_output_frames(out_frames, 0, NULL);
-			ia_css_pipe_get_generic_stage_desc(&stage_desc, post_isp_binary,
-							    out_frames, NULL, NULL);
+			ia_css_pipe_get_generic_stage_desc(&stage_desc,
+							   post_isp_binary,
+							   out_frames,
+							   NULL, NULL);
 		} else {
-			ia_css_pipe_util_set_output_frames(out_frames, 0, out_frame);
-			ia_css_pipe_get_generic_stage_desc(&stage_desc, post_isp_binary,
-							    out_frames, NULL, NULL);
+			ia_css_pipe_util_set_output_frames(out_frames, 0,
+							   out_frame);
+			ia_css_pipe_get_generic_stage_desc(&stage_desc,
+							   post_isp_binary,
+							   out_frames,
+							   NULL, NULL);
 		}
 
-		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc, &current_stage);
+		err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+							   &current_stage);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
@@ -7844,10 +7887,9 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 	} else if (mode == IA_CSS_CAPTURE_MODE_BAYER) {
 		ia_css_pipe_util_set_output_frames(out_frames, 0, out_frame);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, pre_isp_binary,
-						    out_frames, in_frame, NULL);
-		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc,
-			NULL);
+						   out_frames, in_frame, NULL);
+		err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+							   NULL);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
@@ -7862,31 +7904,34 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 
 		if (need_ldc) {
 			ia_css_pipe_util_set_output_frames(out_frames, 0, NULL);
-			ia_css_pipe_get_generic_stage_desc(&stage_desc, capture_ldc_binary,
-							    out_frames, local_in_frame, NULL);
+			ia_css_pipe_get_generic_stage_desc(&stage_desc,
+							   capture_ldc_binary,
+							   out_frames,
+							   local_in_frame,
+							   NULL);
 			err = ia_css_pipeline_create_and_add_stage(me,
-				&stage_desc,
-				&current_stage);
+								   &stage_desc,
+								   &current_stage);
 			local_in_frame = current_stage->args.out_frame[0];
 		}
 		err = add_capture_pp_stage(pipe, me, local_in_frame,
-					    need_yuv_pp ? NULL : out_frame,
+					   need_yuv_pp ? NULL : out_frame,
 #else
 	/* ldc and capture_pp not supported in same pipeline */
 	if (need_ldc && current_stage) {
 		in_frame = current_stage->args.out_frame[0];
 		ia_css_pipe_util_set_output_frames(out_frames, 0, out_frame);
 		ia_css_pipe_get_generic_stage_desc(&stage_desc, capture_ldc_binary,
-						    out_frames, in_frame, NULL);
-		err = ia_css_pipeline_create_and_add_stage(me,
-			&stage_desc,
-			NULL);
+						   out_frames, in_frame, NULL);
+		err = ia_css_pipeline_create_and_add_stage(me, &stage_desc,
+							   NULL);
 	} else if (need_pp && current_stage) {
 		in_frame = current_stage->args.out_frame[0];
-		err = add_capture_pp_stage(pipe, me, in_frame, need_yuv_pp ? NULL : out_frame,
+		err = add_capture_pp_stage(pipe, me, in_frame,
+					   need_yuv_pp ? NULL : out_frame,
 #endif
-					    capture_pp_binary,
-					    &current_stage);
+					   capture_pp_binary,
+					   &current_stage);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
@@ -7903,10 +7948,10 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 			else
 				tmp_out_frame = NULL;
 
-			err = add_yuv_scaler_stage(pipe, me, tmp_in_frame, tmp_out_frame,
-						    NULL,
-						    &yuv_scaler_binary[i],
-						    &yuv_scaler_stage);
+			err = add_yuv_scaler_stage(pipe, me, tmp_in_frame,
+						   tmp_out_frame, NULL,
+						   &yuv_scaler_binary[i],
+						   &yuv_scaler_stage);
 			if (err) {
 				IA_CSS_LEAVE_ERR_PRIVATE(err);
 				return err;
@@ -7928,7 +7973,7 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 	if (mode != IA_CSS_CAPTURE_MODE_RAW && mode != IA_CSS_CAPTURE_MODE_BAYER && current_stage && vf_frame) {
 		in_frame = current_stage->args.out_vf_frame;
 		err = add_vf_pp_stage(pipe, in_frame, vf_frame, vf_pp_binary,
-					&current_stage);
+				      &current_stage);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			return err;
@@ -7980,7 +8025,7 @@ static int capture_start(
 	me = &pipe->pipeline;
 
 	if ((pipe->config.default_capture_config.mode == IA_CSS_CAPTURE_MODE_RAW   ||
-		pipe->config.default_capture_config.mode == IA_CSS_CAPTURE_MODE_BAYER) &&
+	     pipe->config.default_capture_config.mode == IA_CSS_CAPTURE_MODE_BAYER) &&
 	    (pipe->config.mode != IA_CSS_PIPE_MODE_COPY)) {
 		if (copy_on_sp(pipe)) {
 			err = start_copy_on_sp(pipe, &me->out_frame[0]);
@@ -8024,7 +8069,7 @@ static int capture_start(
 	if (pipe->config.mode == IA_CSS_PIPE_MODE_COPY &&
 	    pipe->stream->reconfigure_css_rx) {
 		ia_css_isys_rx_configure(&pipe->stream->csi_rx_config,
-					    pipe->stream->config.mode);
+					 pipe->stream->config.mode);
 		pipe->stream->reconfigure_css_rx = false;
 	}
 #endif
@@ -8035,8 +8080,8 @@ static int capture_start(
 
 static int
 sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
-				    struct ia_css_frame_info *info,
-				    unsigned int idx)
+				  struct ia_css_frame_info *info,
+				  unsigned int idx)
 {
 	assert(pipe);
 	assert(info);
@@ -8054,7 +8099,7 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 		    IA_CSS_FRAME_FORMAT_BINARY_8,
 		    0);
 	} else if (info->format == IA_CSS_FRAME_FORMAT_RAW ||
-		    info->format == IA_CSS_FRAME_FORMAT_RAW_PACKED) {
+		   info->format == IA_CSS_FRAME_FORMAT_RAW_PACKED) {
 		info->raw_bit_depth =
 		ia_css_pipe_util_pipe_input_format_bpp(pipe);
 	}
@@ -8066,9 +8111,9 @@ sh_css_pipe_get_output_frame_info(struct ia_css_pipe *pipe,
 
 void
 ia_css_stream_send_input_frame(const struct ia_css_stream *stream,
-				const unsigned short *data,
-				unsigned int width,
-				unsigned int height)
+			       const unsigned short *data,
+			       unsigned int width,
+			       unsigned int height)
 {
 	assert(stream);
 
@@ -8092,22 +8137,22 @@ ia_css_stream_start_input_frame(const struct ia_css_stream *stream)
 
 void
 ia_css_stream_send_input_line(const struct ia_css_stream *stream,
-				const unsigned short *data,
-				unsigned int width,
-				const unsigned short *data2,
-				unsigned int width2)
+			      const unsigned short *data,
+			      unsigned int width,
+			      const unsigned short *data2,
+			      unsigned int width2)
 {
 	assert(stream);
 
 	ia_css_inputfifo_send_line(stream->config.channel_id,
-				    data, width, data2, width2);
+				   data, width, data2, width2);
 }
 
 void
 ia_css_stream_send_input_embedded_line(const struct ia_css_stream *stream,
-					enum atomisp_input_format format,
-					const unsigned short *data,
-					unsigned int width)
+				       enum atomisp_input_format format,
+				       const unsigned short *data,
+				       unsigned int width)
 {
 	assert(stream);
 	if (!data || width == 0)
@@ -8228,7 +8273,7 @@ acc_unload_extension(struct ia_css_fw_info *firmware)
 /* Load firmware for extension */
 static int
 ia_css_pipe_load_extension(struct ia_css_pipe *pipe,
-			    struct ia_css_fw_info *firmware)
+			   struct ia_css_fw_info *firmware)
 {
 	int err = 0;
 
@@ -8252,7 +8297,7 @@ ia_css_pipe_load_extension(struct ia_css_pipe *pipe,
 /* Unload firmware for extension */
 static void
 ia_css_pipe_unload_extension(struct ia_css_pipe *pipe,
-				struct ia_css_fw_info *firmware)
+			     struct ia_css_fw_info *firmware)
 {
 	IA_CSS_ENTER_PRIVATE("fw = %p pipe = %p", firmware, pipe);
 
@@ -8294,7 +8339,7 @@ ia_css_pipeline_uses_params(struct ia_css_pipeline *me)
 
 static int
 sh_css_pipeline_add_acc_stage(struct ia_css_pipeline *pipeline,
-				const void *acc_fw)
+			      const void *acc_fw)
 {
 	struct ia_css_fw_info *fw = (struct ia_css_fw_info *)acc_fw;
 	/* In QoS case, load_extension already called, so skipping */
@@ -8312,8 +8357,8 @@ sh_css_pipeline_add_acc_stage(struct ia_css_pipeline *pipeline,
 
 		ia_css_pipe_get_acc_stage_desc(&stage_desc, NULL, fw);
 		err = ia_css_pipeline_create_and_add_stage(pipeline,
-			&stage_desc,
-			NULL);
+							   &stage_desc,
+							   NULL);
 	}
 
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,
@@ -8326,7 +8371,7 @@ sh_css_pipeline_add_acc_stage(struct ia_css_pipeline *pipeline,
     * Refer to "sh_css_internal.h" for details.
     */
 int ia_css_stream_capture_frame(struct ia_css_stream *stream,
-	unsigned int exp_id)
+				unsigned int exp_id)
 {
 	struct sh_css_tag_descr tag_descr;
 	u32 encoded_tag_descr;
@@ -8485,22 +8530,22 @@ sh_css_init_host_sp_control_vars(void)
 	(void)HIVE_ADDR_host_sp_com;
 
 	sp_dmem_store_uint32(SP0_ID,
-				(unsigned int)sp_address_of(ia_css_ispctrl_sp_isp_started),
-				(uint32_t)(0));
+			     (unsigned int)sp_address_of(ia_css_ispctrl_sp_isp_started),
+			     (uint32_t)(0));
 
 	sp_dmem_store_uint32(SP0_ID,
-				(unsigned int)sp_address_of(host_sp_queues_initialized),
-				(uint32_t)(0));
+			     (unsigned int)sp_address_of(host_sp_queues_initialized),
+			     (uint32_t)(0));
 	sp_dmem_store_uint32(SP0_ID,
-				(unsigned int)sp_address_of(sp_sleep_mode),
-				(uint32_t)(0));
+			     (unsigned int)sp_address_of(sp_sleep_mode),
+			     (uint32_t)(0));
 	sp_dmem_store_uint32(SP0_ID,
-				(unsigned int)sp_address_of(ia_css_dmaproxy_sp_invalidate_tlb),
-				(uint32_t)(false));
+			     (unsigned int)sp_address_of(ia_css_dmaproxy_sp_invalidate_tlb),
+			     (uint32_t)(false));
 #ifndef ISP2401
 	sp_dmem_store_uint32(SP0_ID,
-				(unsigned int)sp_address_of(sp_stop_copy_preview),
-				my_css.stop_copy_preview ? (uint32_t)(1) : (uint32_t)(0));
+			     (unsigned int)sp_address_of(sp_stop_copy_preview),
+			     my_css.stop_copy_preview ? (uint32_t)(1) : (uint32_t)(0));
 #endif
 	store_sp_array_uint(host_sp_com, o, host2sp_cmd_ready);
 
@@ -8527,8 +8572,7 @@ void ia_css_pipe_config_defaults(struct ia_css_pipe_config *pipe_config)
 }
 
 void
-ia_css_pipe_extra_config_defaults(struct ia_css_pipe_extra_config
-				    *extra_config)
+ia_css_pipe_extra_config_defaults(struct ia_css_pipe_extra_config *extra_config)
 {
 	if (!extra_config) {
 		IA_CSS_ERROR("NULL input parameter");
@@ -8602,8 +8646,8 @@ int ia_css_pipe_create(const struct ia_css_pipe_config *config,
 
 int
 ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
-			    const struct ia_css_pipe_extra_config *extra_config,
-			    struct ia_css_pipe **pipe)
+			 const struct ia_css_pipe_extra_config *extra_config,
+			 struct ia_css_pipe **pipe)
 {
 	int err = -EINVAL;
 	struct ia_css_pipe *internal_pipe = NULL;
@@ -8671,7 +8715,7 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 
 	/* YUV downscaling */
 	if ((internal_pipe->config.vf_pp_in_res.width ||
-		internal_pipe->config.capt_pp_in_res.width)) {
+	     internal_pipe->config.capt_pp_in_res.width)) {
 		enum ia_css_frame_format format;
 
 		if (internal_pipe->config.vf_pp_in_res.width) {
@@ -8747,7 +8791,7 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 	}
 	if (internal_pipe->config.acc_extension) {
 		err = ia_css_pipe_load_extension(internal_pipe,
-						    internal_pipe->config.acc_extension);
+						 internal_pipe->config.acc_extension);
 		if (err) {
 			IA_CSS_LEAVE_ERR_PRIVATE(err);
 			kvfree(internal_pipe);
@@ -8765,7 +8809,7 @@ ia_css_pipe_create_extra(const struct ia_css_pipe_config *config,
 
 int
 ia_css_pipe_get_info(const struct ia_css_pipe *pipe,
-			struct ia_css_pipe_info *pipe_info)
+		     struct ia_css_pipe_info *pipe_info)
 {
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,
 			    "ia_css_pipe_get_info()\n");
@@ -8801,8 +8845,8 @@ bool ia_css_pipe_has_dvs_stats(struct ia_css_pipe_info *pipe_info)
 
 int
 ia_css_pipe_override_frame_format(struct ia_css_pipe *pipe,
-				    int pin_index,
-				    enum ia_css_frame_format new_format)
+				  int pin_index,
+				  enum ia_css_frame_format new_format)
 {
 	int err = 0;
 
@@ -8882,10 +8926,8 @@ ia_css_stream_configure_rx(struct ia_css_stream *stream)
 #endif
 
 static struct ia_css_pipe *
-find_pipe(struct ia_css_pipe *pipes[],
-	    unsigned int num_pipes,
-	    enum ia_css_pipe_mode mode,
-	    bool copy_pipe)
+find_pipe(struct ia_css_pipe *pipes[], unsigned int num_pipes,
+	  enum ia_css_pipe_mode mode, bool copy_pipe)
 {
 	unsigned int i;
 
@@ -8954,7 +8996,7 @@ ia_css_acc_stream_create(struct ia_css_stream *stream)
 
 static int
 metadata_info_init(const struct ia_css_metadata_config *mdc,
-		    struct ia_css_metadata_info *md)
+		   struct ia_css_metadata_info *md)
 {
 	/* Either both width and height should be set or neither */
 	if ((mdc->resolution.height > 0) ^ (mdc->resolution.width > 0))
@@ -8982,7 +9024,7 @@ static int check_pipe_resolutions(const struct ia_css_pipe *pipe)
 	}
 
 	if (ia_css_util_check_res(pipe->config.input_effective_res.width,
-				    pipe->config.input_effective_res.height) != 0) {
+				  pipe->config.input_effective_res.height) != 0) {
 		IA_CSS_ERROR("effective resolution not supported");
 		err = -EINVAL;
 		goto EXIT;
@@ -8990,7 +9032,7 @@ static int check_pipe_resolutions(const struct ia_css_pipe *pipe)
 	if (!ia_css_util_resolution_is_zero(
 		pipe->stream->config.input_config.input_res)) {
 		if (!ia_css_util_res_leq(pipe->config.input_effective_res,
-					    pipe->stream->config.input_config.input_res)) {
+					 pipe->stream->config.input_config.input_res)) {
 			IA_CSS_ERROR("effective resolution is larger than input resolution");
 			err = -EINVAL;
 			goto EXIT;
@@ -9013,9 +9055,9 @@ EXIT:
 
 int
 ia_css_stream_create(const struct ia_css_stream_config *stream_config,
-			int num_pipes,
-			struct ia_css_pipe *pipes[],
-			struct ia_css_stream **stream)
+		     int num_pipes,
+		     struct ia_css_pipe *pipes[],
+		     struct ia_css_stream **stream)
 {
 	struct ia_css_pipe *curr_pipe;
 	struct ia_css_stream *curr_stream = NULL;
@@ -9176,11 +9218,11 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 	case IA_CSS_INPUT_MODE_TPG:
 #if !defined(ISP2401)
 		IA_CSS_LOG("tpg_configuration: x_mask=%d, y_mask=%d, x_delta=%d, y_delta=%d, xy_mask=%d",
-			    curr_stream->config.source.tpg.x_mask,
-			    curr_stream->config.source.tpg.y_mask,
-			    curr_stream->config.source.tpg.x_delta,
-			    curr_stream->config.source.tpg.y_delta,
-			    curr_stream->config.source.tpg.xy_mask);
+			   curr_stream->config.source.tpg.x_mask,
+			   curr_stream->config.source.tpg.y_mask,
+			   curr_stream->config.source.tpg.x_delta,
+			   curr_stream->config.source.tpg.y_delta,
+			   curr_stream->config.source.tpg.xy_mask);
 
 		sh_css_sp_configure_tpg(
 		    curr_stream->config.source.tpg.x_mask,
@@ -9205,9 +9247,8 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 	}
 
 #ifdef ISP2401
-	err = aspect_ratio_crop_init(curr_stream,
-					pipes,
-					&aspect_ratio_crop_enabled);
+	err = aspect_ratio_crop_init(curr_stream, pipes,
+				     &aspect_ratio_crop_enabled);
 	if (err) {
 		IA_CSS_LEAVE_ERR(err);
 		goto ERR;
@@ -9244,8 +9285,8 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 			curr_pipe->config.input_effective_res = effective_res;
 		}
 		IA_CSS_LOG("effective_res=%dx%d",
-			    effective_res.width,
-			    effective_res.height);
+			   effective_res.width,
+			   effective_res.height);
 	}
 
 	if (IS_ISP2401) {
@@ -9273,13 +9314,13 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 	if (!spcopyonly) {
 		sensor_binning_changed =
 		    sh_css_params_set_binning_factor(curr_stream,
-							curr_stream->config.sensor_binning_factor);
+						     curr_stream->config.sensor_binning_factor);
 	} else {
 		sensor_binning_changed = false;
 	}
 
 	IA_CSS_LOG("sensor_binning=%d, changed=%d",
-		    curr_stream->config.sensor_binning_factor, sensor_binning_changed);
+		   curr_stream->config.sensor_binning_factor, sensor_binning_changed);
 	/* loop over pipes */
 	IA_CSS_LOG("num_pipes=%d", num_pipes);
 	curr_stream->cont_capt = false;
@@ -9303,17 +9344,18 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 
 		/* Create copy pipe here, since it may not be exposed to the driver */
 		preview_pipe = find_pipe(pipes, num_pipes,
-					    IA_CSS_PIPE_MODE_PREVIEW, false);
+					 IA_CSS_PIPE_MODE_PREVIEW, false);
 		video_pipe = find_pipe(pipes, num_pipes,
-					IA_CSS_PIPE_MODE_VIDEO, false);
-		acc_pipe = find_pipe(pipes, num_pipes,
-					IA_CSS_PIPE_MODE_ACC, false);
+				       IA_CSS_PIPE_MODE_VIDEO, false);
+		acc_pipe = find_pipe(pipes, num_pipes, IA_CSS_PIPE_MODE_ACC,
+				     false);
 		if (acc_pipe && num_pipes == 2 && curr_stream->cont_capt)
 			curr_stream->cont_capt =
 			    false; /* preview + QoS case will not need cont_capt switch */
 		if (curr_stream->cont_capt) {
 			capture_pipe = find_pipe(pipes, num_pipes,
-						    IA_CSS_PIPE_MODE_CAPTURE, false);
+						 IA_CSS_PIPE_MODE_CAPTURE,
+						 false);
 			if (!capture_pipe) {
 				err = -EINVAL;
 				goto ERR;
@@ -9395,10 +9437,12 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 		if (!spcopyonly) {
 			if (!IS_ISP2401)
 				err = sh_css_pipe_get_shading_info(curr_pipe,
-								    &pipe_info->shading_info, NULL);
+								   &pipe_info->shading_info,
+								   NULL);
 			else
 				err = sh_css_pipe_get_shading_info(curr_pipe,
-								    &pipe_info->shading_info, &curr_pipe->config);
+								   &pipe_info->shading_info,
+								   &curr_pipe->config);
 
 			if (err)
 				goto ERR;
@@ -9408,7 +9452,8 @@ ia_css_stream_create(const struct ia_css_stream_config *stream_config,
 				goto ERR;
 			for (j = 0; j < IA_CSS_PIPE_MAX_OUTPUT_STAGE; j++) {
 				sh_css_pipe_get_viewfinder_frame_info(curr_pipe,
-									&pipe_info->vf_output_info[j], j);
+								      &pipe_info->vf_output_info[j],
+								      j);
 				if (err)
 					goto ERR;
 			}
@@ -9593,7 +9638,7 @@ ia_css_stream_destroy(struct ia_css_stream *stream)
 
 int
 ia_css_stream_get_info(const struct ia_css_stream *stream,
-			struct ia_css_stream_info *stream_info)
+		       struct ia_css_stream_info *stream_info)
 {
 	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE, "ia_css_stream_get_info: enter/exit\n");
 	assert(stream);
@@ -9881,7 +9926,7 @@ ia_css_stream_get_3a_binary(const struct ia_css_stream *stream)
 
 int
 ia_css_stream_set_output_padded_width(struct ia_css_stream *stream,
-					unsigned int output_padded_width)
+				      unsigned int output_padded_width)
 {
 	struct ia_css_pipe *pipe;
 
@@ -10358,7 +10403,7 @@ ia_css_unlock_raw_frame(struct ia_css_stream *stream, uint32_t exp_id)
     */
 int
 ia_css_pipe_set_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle,
-				bool enable)
+			      bool enable)
 {
 	unsigned int thread_id;
 	struct ia_css_pipeline_stage *stage;
@@ -10404,7 +10449,7 @@ ia_css_pipe_set_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle,
     */
 int
 ia_css_pipe_get_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle,
-				bool *enable)
+			      bool *enable)
 {
 	struct ia_css_pipeline_stage *stage;
 	unsigned int thread_id;
@@ -10440,9 +10485,9 @@ ia_css_pipe_get_qos_ext_state(struct ia_css_pipe *pipe, uint32_t fw_handle,
 /* ISP2401 */
 int
 ia_css_pipe_update_qos_ext_mapped_arg(struct ia_css_pipe *pipe,
-					u32 fw_handle,
-					struct ia_css_isp_param_css_segments *css_seg,
-					struct ia_css_isp_param_isp_segments *isp_seg)
+				      u32 fw_handle,
+				      struct ia_css_isp_param_css_segments *css_seg,
+				      struct ia_css_isp_param_isp_segments *isp_seg)
 {
 	unsigned int HIVE_ADDR_sp_group;
 	static struct sh_css_sp_group sp_group;
@@ -10477,7 +10522,7 @@ ia_css_pipe_update_qos_ext_mapped_arg(struct ia_css_pipe *pipe,
 		if (!err) {
 			/* Get the Extension State */
 			enabled = (SH_CSS_QOS_STAGE_IS_ENABLED(&sh_css_sp_group.pipe[thread_id],
-								stage->stage_num)) ? true : false;
+							       stage->stage_num)) ? true : false;
 			/* Update mapped arg only when extension stage is not enabled */
 			if (enabled) {
 				IA_CSS_ERROR("Leaving: cannot update when stage is enabled.");
@@ -10487,13 +10532,14 @@ ia_css_pipe_update_qos_ext_mapped_arg(struct ia_css_pipe *pipe,
 
 				HIVE_ADDR_sp_group = fw->info.sp.group;
 				sp_dmem_load(SP0_ID,
-						(unsigned int)sp_address_of(sp_group),
-						&sp_group, sizeof(struct sh_css_sp_group));
+					     (unsigned int)sp_address_of(sp_group),
+					     &sp_group,
+					     sizeof(struct sh_css_sp_group));
 				hmm_load(sp_group.pipe[thread_id].sp_stage_addr[stage_num],
-					    &sp_stage, sizeof(struct sh_css_sp_stage));
+					 &sp_stage, sizeof(struct sh_css_sp_stage));
 
 				hmm_load(sp_stage.isp_stage_addr,
-					    &isp_stage, sizeof(struct sh_css_isp_stage));
+					 &isp_stage, sizeof(struct sh_css_isp_stage));
 
 				for (mem = 0; mem < N_IA_CSS_ISP_MEMORIES; mem++) {
 					isp_stage.mem_initializers.params[IA_CSS_PARAM_CLASS_PARAM][mem].address =
@@ -10509,7 +10555,8 @@ ia_css_pipe_update_qos_ext_mapped_arg(struct ia_css_pipe *pipe,
 				}
 
 				hmm_store(sp_stage.isp_stage_addr,
-					    &isp_stage, sizeof(struct sh_css_isp_stage));
+					  &isp_stage,
+					  sizeof(struct sh_css_isp_stage));
 			}
 		}
 	}
@@ -10520,8 +10567,8 @@ ia_css_pipe_update_qos_ext_mapped_arg(struct ia_css_pipe *pipe,
 #ifdef ISP2401
 static int
 aspect_ratio_crop_init(struct ia_css_stream *curr_stream,
-			struct ia_css_pipe *pipes[],
-			bool *do_crop_status)
+		       struct ia_css_pipe *pipes[],
+		       bool *do_crop_status)
 {
 	int err = 0;
 	int i;
@@ -10567,7 +10614,7 @@ aspect_ratio_crop_check(bool enabled, struct ia_css_pipe *curr_pipe)
 
 static int
 aspect_ratio_crop(struct ia_css_pipe *curr_pipe,
-		    struct ia_css_resolution *effective_res)
+		  struct ia_css_resolution *effective_res)
 {
 	int err = 0;
 	struct ia_css_resolution crop_res;
@@ -10627,7 +10674,7 @@ aspect_ratio_crop(struct ia_css_pipe *curr_pipe,
 	case IA_CSS_PIPE_MODE_YUVPP:
 	default:
 		IA_CSS_ERROR("aspect ratio cropping invalid args: mode[%d]\n",
-				curr_pipe->config.mode);
+			     curr_pipe->config.mode);
 		assert(0);
 		break;
 	}
@@ -10691,7 +10738,7 @@ static struct sh_css_hmm_buffer_record
 
 	assert(h_vbuf);
 	assert((type > IA_CSS_BUFFER_TYPE_INVALID) &&
-		(type < IA_CSS_NUM_DYNAMIC_BUFFER_TYPE));
+	       (type < IA_CSS_NUM_DYNAMIC_BUFFER_TYPE));
 	assert(kernel_ptr != 0);
 
 	buffer_record = &hmm_buffer_record[0];
-- 
GitLab


From 58a328830e490c5ee512519c2e8172ecc3073445 Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Fri, 23 Apr 2021 18:12:29 +0200
Subject: [PATCH 0997/3804] media: staging: media: atomisp: Refactor
 ia_css_stream_load()

Move the support check to the beginning of the function.
Change the logic to avoid multiple nesting blocks.
Move 'err' variable assignment outside of the if statement.
Remove an unnecessary check whether 'j' is zero.

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/sh_css.c | 73 +++++++++++-----------
 1 file changed, 36 insertions(+), 37 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
index 0482bfa2c490f..f1a2ba99f90ee 100644
--- a/drivers/staging/media/atomisp/pci/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/sh_css.c
@@ -9657,48 +9657,47 @@ ia_css_stream_get_info(const struct ia_css_stream *stream,
 int
 ia_css_stream_load(struct ia_css_stream *stream)
 {
-	if (!IS_ISP2401) {
-		int i;
-		int err;
-
-		assert(stream);
-		ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,	"ia_css_stream_load() enter,\n");
-		for (i = 0; i < MAX_ACTIVE_STREAMS; i++) {
-			if (my_css_save.stream_seeds[i].stream == stream) {
-				int j;
+	int i, j, err;
 
-				for (j = 0; j < my_css_save.stream_seeds[i].num_pipes; j++) {
-					if ((err = ia_css_pipe_create(&my_css_save.stream_seeds[i].pipe_config[j],
-								    &my_css_save.stream_seeds[i].pipes[j])) != 0) {
-						if (j) {
-							int k;
-
-							for (k = 0; k < j; k++)
-								ia_css_pipe_destroy(my_css_save.stream_seeds[i].pipes[k]);
-						}
-						return err;
-					}
-				}
-				err = ia_css_stream_create(&my_css_save.stream_seeds[i].stream_config,
-							my_css_save.stream_seeds[i].num_pipes,
-							my_css_save.stream_seeds[i].pipes,
-							&my_css_save.stream_seeds[i].stream);
-				if (err) {
-					ia_css_stream_destroy(stream);
-					for (j = 0; j < my_css_save.stream_seeds[i].num_pipes; j++)
-						ia_css_pipe_destroy(my_css_save.stream_seeds[i].pipes[j]);
-					return err;
-				}
-				break;
-			}
-		}
-		ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,	"ia_css_stream_load() exit,\n");
-		return 0;
-	} else {
+	if (IS_ISP2401) {
 		/* TODO remove function - DEPRECATED */
 		(void)stream;
 		return -ENOTSUPP;
 	}
+
+	assert(stream);
+	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,	"ia_css_stream_load() enter,\n");
+	for (i = 0; i < MAX_ACTIVE_STREAMS; i++) {
+		if (my_css_save.stream_seeds[i].stream != stream)
+			continue;
+
+		for (j = 0; j < my_css_save.stream_seeds[i].num_pipes; j++) {
+			int k;
+
+			err = ia_css_pipe_create(&my_css_save.stream_seeds[i].pipe_config[j],
+						 &my_css_save.stream_seeds[i].pipes[j]);
+			if (!err)
+				continue;
+
+			for (k = 0; k < j; k++)
+				ia_css_pipe_destroy(my_css_save.stream_seeds[i].pipes[k]);
+			return err;
+		}
+		err = ia_css_stream_create(&my_css_save.stream_seeds[i].stream_config,
+					   my_css_save.stream_seeds[i].num_pipes,
+					   my_css_save.stream_seeds[i].pipes,
+					   &my_css_save.stream_seeds[i].stream);
+		if (!err)
+			break;
+
+		ia_css_stream_destroy(stream);
+		for (j = 0; j < my_css_save.stream_seeds[i].num_pipes; j++)
+			ia_css_pipe_destroy(my_css_save.stream_seeds[i].pipes[j]);
+		return err;
+	}
+
+	ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE,	"ia_css_stream_load() exit,\n");
+	return 0;
 }
 
 int
-- 
GitLab


From 684e025c716568496ec63e892f573a26a4db72d9 Mon Sep 17 00:00:00 2001
From: Martiros Shakhzadyan <vrzh@vrzh.net>
Date: Fri, 23 Apr 2021 18:13:12 +0200
Subject: [PATCH 0998/3804] media: staging: media: atomisp: Fix line split
 style issues

Fix line split issues and resolve adjacent extra parens.

Signed-off-by: Martiros Shakhzadyan <vrzh@vrzh.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/sh_css.c | 91 ++++++++++------------
 1 file changed, 42 insertions(+), 49 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c
index f1a2ba99f90ee..d26b1301eeb7e 100644
--- a/drivers/staging/media/atomisp/pci/sh_css.c
+++ b/drivers/staging/media/atomisp/pci/sh_css.c
@@ -308,8 +308,7 @@ sh_css_pipeline_add_acc_stage(struct ia_css_pipeline *pipeline,
 			      const void *acc_fw);
 
 static int
-alloc_continuous_frames(
-    struct ia_css_pipe *pipe, bool init_time);
+alloc_continuous_frames(struct ia_css_pipe *pipe, bool init_time);
 
 static void
 pipe_global_init(void);
@@ -2622,8 +2621,7 @@ static int load_copy_binary(
 }
 
 static int
-alloc_continuous_frames(
-    struct ia_css_pipe *pipe, bool init_time)
+alloc_continuous_frames(struct ia_css_pipe *pipe, bool init_time)
 {
 	int err = 0;
 	struct ia_css_frame_info ref_info;
@@ -4010,9 +4008,9 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 
 	assert(pipe_id < IA_CSS_PIPE_ID_NUM);
 	assert(buf_type < IA_CSS_NUM_DYNAMIC_BUFFER_TYPE);
-	if ((buf_type == IA_CSS_BUFFER_TYPE_INVALID) ||
-	    (buf_type >= IA_CSS_NUM_DYNAMIC_BUFFER_TYPE) ||
-	    (pipe_id >= IA_CSS_PIPE_ID_NUM)) {
+	if (buf_type == IA_CSS_BUFFER_TYPE_INVALID ||
+	    buf_type >= IA_CSS_NUM_DYNAMIC_BUFFER_TYPE ||
+	    pipe_id >= IA_CSS_PIPE_ID_NUM) {
 		IA_CSS_LEAVE_ERR(-EINVAL);
 		return -EINVAL;
 	}
@@ -4073,11 +4071,11 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 		}
 		ddr_buffer.kernel_ptr = HOST_ADDRESS(buffer->data.metadata);
 		ddr_buffer.payload.metadata = *buffer->data.metadata;
-	} else if ((buf_type == IA_CSS_BUFFER_TYPE_INPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_SEC_OUTPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME)) {
+	} else if (buf_type == IA_CSS_BUFFER_TYPE_INPUT_FRAME ||
+		   buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME ||
+		   buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME ||
+		   buf_type == IA_CSS_BUFFER_TYPE_SEC_OUTPUT_FRAME ||
+		   buf_type == IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME) {
 		if (!buffer->data.frame) {
 			IA_CSS_LEAVE_ERR(-EINVAL);
 			return -EINVAL;
@@ -4117,9 +4115,9 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 	hmm_store(h_vbuf->vptr,
 		  (void *)(&ddr_buffer),
 		  sizeof(struct sh_css_hmm_buffer));
-	if ((buf_type == IA_CSS_BUFFER_TYPE_3A_STATISTICS)
-	    || (buf_type == IA_CSS_BUFFER_TYPE_DIS_STATISTICS)
-	    || (buf_type == IA_CSS_BUFFER_TYPE_LACE_STATISTICS)) {
+	if (buf_type == IA_CSS_BUFFER_TYPE_3A_STATISTICS ||
+	    buf_type == IA_CSS_BUFFER_TYPE_DIS_STATISTICS ||
+	    buf_type == IA_CSS_BUFFER_TYPE_LACE_STATISTICS) {
 		if (!pipeline) {
 			ia_css_rmgr_rel_vbuf(hmm_buffer_pool, &h_vbuf);
 			IA_CSS_LOG("pipeline is empty!");
@@ -4137,18 +4135,18 @@ ia_css_pipe_enqueue_buffer(struct ia_css_pipe *pipe,
 									(uint32_t)h_vbuf->vptr);
 			}
 		}
-	} else if ((buf_type == IA_CSS_BUFFER_TYPE_INPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_SEC_OUTPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME)
-		   || (buf_type == IA_CSS_BUFFER_TYPE_METADATA)) {
+	} else if (buf_type == IA_CSS_BUFFER_TYPE_INPUT_FRAME ||
+		   buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME ||
+		   buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME ||
+		   buf_type == IA_CSS_BUFFER_TYPE_SEC_OUTPUT_FRAME ||
+		   buf_type == IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME ||
+		   buf_type == IA_CSS_BUFFER_TYPE_METADATA) {
 		return_err = ia_css_bufq_enqueue_buffer(thread_id,
 							queue_id,
 							(uint32_t)h_vbuf->vptr);
 #if defined(SH_CSS_ENABLE_PER_FRAME_PARAMS)
-		if (!(return_err) &&
-		    (buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME)) {
+		if (!return_err &&
+		    buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME) {
 			IA_CSS_LOG("pfp: enqueued OF %d to q %d thread %d",
 				   ddr_buffer.payload.frame.frame_data,
 				   queue_id, thread_id);
@@ -4300,8 +4298,8 @@ ia_css_pipe_dequeue_buffer(struct ia_css_pipe *pipe,
 			buffer->driver_cookie = ddr_buffer.cookie_ptr;
 			buffer->timing_data = ddr_buffer.timing_data;
 
-			if ((buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME) ||
-			    (buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME)) {
+			if (buf_type == IA_CSS_BUFFER_TYPE_OUTPUT_FRAME ||
+			    buf_type == IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME) {
 				buffer->isys_eof_clock_tick.ticks = ddr_buffer.isys_eof_clock_tick;
 			}
 
@@ -6194,8 +6192,7 @@ allocate_delay_frames(struct ia_css_pipe *pipe)
 	return 0;
 }
 
-static int load_advanced_binaries(
-    struct ia_css_pipe *pipe)
+static int load_advanced_binaries(struct ia_css_pipe *pipe)
 {
 	struct ia_css_frame_info pre_in_info, gdc_in_info,
 			post_in_info, post_out_info,
@@ -6314,8 +6311,7 @@ static int load_advanced_binaries(
 	return err;
 }
 
-static int load_bayer_isp_binaries(
-    struct ia_css_pipe *pipe)
+static int load_bayer_isp_binaries(struct ia_css_pipe *pipe)
 {
 	struct ia_css_frame_info pre_isp_in_info, *pipe_out_info;
 	int err = 0;
@@ -6344,8 +6340,7 @@ static int load_bayer_isp_binaries(
 	return err;
 }
 
-static int load_low_light_binaries(
-    struct ia_css_pipe *pipe)
+static int load_low_light_binaries(struct ia_css_pipe *pipe)
 {
 	struct ia_css_frame_info pre_in_info, anr_in_info,
 			post_in_info, post_out_info,
@@ -6484,8 +6479,7 @@ static bool copy_on_sp(struct ia_css_pipe *pipe)
 	return rval;
 }
 
-static int load_capture_binaries(
-    struct ia_css_pipe *pipe)
+static int load_capture_binaries(struct ia_css_pipe *pipe)
 {
 	int err = 0;
 	bool must_be_raw;
@@ -6560,7 +6554,8 @@ unload_capture_binaries(struct ia_css_pipe *pipe)
 
 	IA_CSS_ENTER_PRIVATE("pipe = %p", pipe);
 
-	if ((!pipe) || ((pipe->mode != IA_CSS_PIPE_ID_CAPTURE) && (pipe->mode != IA_CSS_PIPE_ID_COPY))) {
+	if (!pipe || (pipe->mode != IA_CSS_PIPE_ID_CAPTURE &&
+		      pipe->mode != IA_CSS_PIPE_ID_COPY)) {
 		IA_CSS_LEAVE_ERR_PRIVATE(-EINVAL);
 		return -EINVAL;
 	}
@@ -6637,10 +6632,10 @@ need_yuv_scaler_stage(const struct ia_css_pipe *pipe)
 /* which has some hard-coded knowledge which prevents reuse of the function. */
 /* Later, merge this with ia_css_pipe_create_cas_scaler_desc */
 static int ia_css_pipe_create_cas_scaler_desc_single_output(
-    struct ia_css_frame_info *cas_scaler_in_info,
-    struct ia_css_frame_info *cas_scaler_out_info,
-    struct ia_css_frame_info *cas_scaler_vf_info,
-    struct ia_css_cas_binary_descr *descr)
+	    struct ia_css_frame_info *cas_scaler_in_info,
+	    struct ia_css_frame_info *cas_scaler_out_info,
+	    struct ia_css_frame_info *cas_scaler_vf_info,
+	    struct ia_css_cas_binary_descr *descr)
 {
 	unsigned int i;
 	unsigned int hor_ds_factor = 0, ver_ds_factor = 0;
@@ -6762,9 +6757,9 @@ ERR:
 }
 
 /* FIXME: merge most of this and single output version */
-static int ia_css_pipe_create_cas_scaler_desc(
-    struct ia_css_pipe *pipe,
-    struct ia_css_cas_binary_descr *descr)
+static int
+ia_css_pipe_create_cas_scaler_desc(struct ia_css_pipe *pipe,
+				   struct ia_css_cas_binary_descr *descr)
 {
 	struct ia_css_frame_info in_info = IA_CSS_BINARY_DEFAULT_FRAME_INFO;
 	struct ia_css_frame_info *out_info[IA_CSS_PIPE_MAX_OUTPUT_STAGE];
@@ -7970,7 +7965,9 @@ create_host_regular_capture_pipeline(struct ia_css_pipe *pipe)
 	    * should not be considered as a clean solution. Proper
 	    * investigation should be done to come up with the clean solution.
 	    * */
-	if (mode != IA_CSS_CAPTURE_MODE_RAW && mode != IA_CSS_CAPTURE_MODE_BAYER && current_stage && vf_frame) {
+	if (mode != IA_CSS_CAPTURE_MODE_RAW &&
+	    mode != IA_CSS_CAPTURE_MODE_BAYER &&
+	    current_stage && vf_frame) {
 		in_frame = current_stage->args.out_vf_frame;
 		err = add_vf_pp_stage(pipe, in_frame, vf_frame, vf_pp_binary,
 				      &current_stage);
@@ -8008,8 +8005,7 @@ create_host_capture_pipeline(struct ia_css_pipe *pipe)
 	return err;
 }
 
-static int capture_start(
-    struct ia_css_pipe *pipe)
+static int capture_start(struct ia_css_pipe *pipe)
 {
 	struct ia_css_pipeline *me;
 
@@ -8410,11 +8406,8 @@ int ia_css_stream_capture_frame(struct ia_css_stream *stream,
     * @brief Configure the continuous capture.
     * Refer to "sh_css_internal.h" for details.
     */
-int ia_css_stream_capture(
-    struct ia_css_stream *stream,
-    int num_captures,
-    unsigned int skip,
-    int offset)
+int ia_css_stream_capture(struct ia_css_stream *stream, int num_captures,
+			  unsigned int skip, int offset)
 {
 	struct sh_css_tag_descr tag_descr;
 	unsigned int encoded_tag_descr;
-- 
GitLab


From 43692e9e1b8b93d0e26e4a752adc41973863ecb2 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Wed, 28 Apr 2021 20:05:41 +0200
Subject: [PATCH 0999/3804] media: staging: media: atomisp: balance braces
 around if...else block

Balance braces around the if else blocks as per the code style guidelines.
Add braces to branches where it is missing. Resolves checkpatch script
CHECK / WARNING feedback messages.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/i2c/atomisp-gc2235.c  | 4 ++--
 drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
index 6ba4a8adff7c2..e722c639b60d0 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
@@ -658,9 +658,9 @@ static int gc2235_s_power(struct v4l2_subdev *sd, int on)
 {
 	int ret;
 
-	if (on == 0)
+	if (on == 0) {
 		ret = power_down(sd);
-	else {
+	} else {
 		ret = power_up(sd);
 		if (!ret)
 			ret = __gc2235_init(sd);
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
index f5de81132177d..465fc44684422 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
@@ -568,9 +568,9 @@ static int power_down(struct v4l2_subdev *sd)
 
 static int mt9m114_s_power(struct v4l2_subdev *sd, int power)
 {
-	if (power == 0)
+	if (power == 0) {
 		return power_down(sd);
-	else {
+	} else {
 		if (power_up(sd))
 			return -EINVAL;
 
-- 
GitLab


From 21837c2c27cdc0ab768b64c8f626b6738604e37d Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Wed, 28 Apr 2021 20:06:50 +0200
Subject: [PATCH 1000/3804] media: staging: media: atomisp: remove unnecessary
 braces

According to the coding style guidelines, if...else blocks with single
instructions do not need enclosing braces. This resolves checkpatch
WARNING / CHECK complaints.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/i2c/atomisp-ov2680.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
index c90730513438b..f167781e258ac 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
@@ -461,11 +461,11 @@ static int ov2680_v_flip(struct v4l2_subdev *sd, s32 value)
 	ret = ov2680_read_reg(client, 1, OV2680_FLIP_REG, &val);
 	if (ret)
 		return ret;
-	if (value) {
+	if (value)
 		val |= OV2680_FLIP_MIRROR_BIT_ENABLE;
-	} else {
+	else
 		val &= ~OV2680_FLIP_MIRROR_BIT_ENABLE;
-	}
+
 	ret = ov2680_write_reg(client, 1,
 			       OV2680_FLIP_REG, val);
 	if (ret)
-- 
GitLab


From f50559f0c9b43b023476664724e8494fbe9de4fc Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Wed, 28 Apr 2021 20:08:45 +0200
Subject: [PATCH 1001/3804] media: staging: media: atomisp: reformat code
 comment blocks

Reformat code comment blocks according to the coding style guidelines.
This resolves different checkpatch script WARNINGs around block comments.

[hverkuil: fixed up one missed '/* text' comment as reported by Fabio]

Suggested-by: Fabio Aiuto <fabioaiuto83@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/atomisp/i2c/atomisp-gc2235.c        | 19 ++++---
 .../atomisp/i2c/atomisp-libmsrlisthelper.c    |  6 ++-
 .../media/atomisp/i2c/atomisp-mt9m114.c       | 49 ++++++++++++-------
 .../media/atomisp/i2c/atomisp-ov2680.c        | 20 +++++---
 drivers/staging/media/atomisp/i2c/mt9m114.h   |  6 ++-
 drivers/staging/media/atomisp/i2c/ov2680.h    | 10 ++--
 6 files changed, 67 insertions(+), 43 deletions(-)

diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
index e722c639b60d0..38defa0f81513 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
@@ -228,7 +228,7 @@ static int gc2235_g_focal(struct v4l2_subdev *sd, s32 *val)
 
 static int gc2235_g_fnumber(struct v4l2_subdev *sd, s32 *val)
 {
-	/*const f number for imx*/
+	/* const f number for imx */
 	*val = (GC2235_F_NUMBER_DEFAULT_NUM << 16) | GC2235_F_NUMBER_DEM;
 	return 0;
 }
@@ -427,7 +427,8 @@ static long gc2235_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
 	return 0;
 }
 
-/* This returns the exposure time being used. This should only be used
+/*
+ * This returns the exposure time being used. This should only be used
  * for filling in EXIF data, not for actual image processing.
  */
 static int gc2235_q_exposure(struct v4l2_subdev *sd, s32 *value)
@@ -746,11 +747,12 @@ static int startup(struct v4l2_subdev *sd)
 	int ret = 0;
 
 	if (is_init == 0) {
-		/* force gc2235 to do a reset in res change, otherwise it
-		* can not output normal after switching res. and it is not
-		* necessary for first time run up after power on, for the sack
-		* of performance
-		*/
+		/*
+		 * force gc2235 to do a reset in res change, otherwise it
+		 * can not output normal after switching res. and it is not
+		 * necessary for first time run up after power on, for the sack
+		 * of performance
+		 */
 		power_down(sd);
 		power_up(sd);
 		gc2235_write_reg_array(client, gc2235_init_settings);
@@ -904,7 +906,8 @@ static int gc2235_s_config(struct v4l2_subdev *sd,
 	    (struct camera_sensor_platform_data *)platform_data;
 
 	mutex_lock(&dev->input_lock);
-	/* power off the module, then power on it in future
+	/*
+	 * power off the module, then power on it in future
 	 * as first power on by board may not fulfill the
 	 * power on sequqence needed by the module
 	 */
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-libmsrlisthelper.c b/drivers/staging/media/atomisp/i2c/atomisp-libmsrlisthelper.c
index b93c80471f229..7a20d918a9d5a 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-libmsrlisthelper.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-libmsrlisthelper.c
@@ -50,14 +50,16 @@ struct tbd_data_record_header {
 static int set_msr_configuration(struct i2c_client *client, uint8_t *bufptr,
 				 unsigned int size)
 {
-	/* The configuration data contains any number of sequences where
+	/*
+	 * The configuration data contains any number of sequences where
 	 * the first byte (that is, uint8_t) that marks the number of bytes
 	 * in the sequence to follow, is indeed followed by the indicated
 	 * number of bytes of actual data to be written to sensor.
 	 * By convention, the first two bytes of actual data should be
 	 * understood as an address in the sensor address space (hibyte
 	 * followed by lobyte) where the remaining data in the sequence
-	 * will be written. */
+	 * will be written.
+	 */
 
 	u8 *ptr = bufptr;
 
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
index 465fc44684422..a5f0b4848ddfd 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
@@ -475,10 +475,12 @@ static int gpio_ctrl(struct v4l2_subdev *sd, bool flag)
 	if (!dev || !dev->platform_data)
 		return -ENODEV;
 
-	/* Note: current modules wire only one GPIO signal (RESET#),
+	/*
+	 * Note: current modules wire only one GPIO signal (RESET#),
 	 * but the schematic wires up two to the connector.  BIOS
 	 * versions have been unfortunately inconsistent with which
-	 * ACPI index RESET# is on, so hit both */
+	 * ACPI index RESET# is on, so hit both
+	 */
 
 	if (flag) {
 		ret = dev->platform_data->gpio0_ctrl(sd, 0);
@@ -560,7 +562,7 @@ static int power_down(struct v4l2_subdev *sd)
 	if (ret)
 		dev_err(&client->dev, "vprog failed.\n");
 
-	/*according to DS, 20ms is needed after power down*/
+	/* according to DS, 20ms is needed after power down */
 	msleep(20);
 
 	return ret;
@@ -947,7 +949,7 @@ static int mt9m114_g_focal(struct v4l2_subdev *sd, s32 *val)
 
 static int mt9m114_g_fnumber(struct v4l2_subdev *sd, s32 *val)
 {
-	/*const f number for mt9m114*/
+	/* const f number for mt9m114 */
 	*val = (MT9M114_F_NUMBER_DEFAULT_NUM << 16) | MT9M114_F_NUMBER_DEM;
 	return 0;
 }
@@ -1008,8 +1010,10 @@ static long mt9m114_s_exposure(struct v4l2_subdev *sd,
 		exposure->gain[1]);
 
 	coarse_integration = exposure->integration_time[0];
-	/* fine_integration = ExposureTime.FineIntegrationTime; */
-	/* FrameLengthLines = ExposureTime.FrameLengthLines; */
+	/*
+	 * fine_integration = ExposureTime.FineIntegrationTime;
+	 * FrameLengthLines = ExposureTime.FrameLengthLines;
+	 */
 	FLines = mt9m114_res[dev->res].lines_per_frame;
 	AnalogGain = exposure->gain[0];
 	DigitalGain = exposure->gain[1];
@@ -1019,8 +1023,8 @@ static long mt9m114_s_exposure(struct v4l2_subdev *sd,
 		dev->first_gain = AnalogGain;
 		dev->first_diggain = DigitalGain;
 	}
-	/* DigitalGain = 0x400 * (((u16) DigitalGain) >> 8) +
-	((unsigned int)(0x400 * (((u16) DigitalGain) & 0xFF)) >>8); */
+	/* DigitalGain = 0x400 * (((u16) DigitalGain) >> 8) +		*/
+	/* ((unsigned int)(0x400 * (((u16) DigitalGain) & 0xFF)) >>8);	*/
 
 	/* set frame length */
 	if (FLines < coarse_integration + 6)
@@ -1034,8 +1038,10 @@ static long mt9m114_s_exposure(struct v4l2_subdev *sd,
 	}
 
 	/* set coarse integration */
-	/* 3A provide real exposure time.
-		should not translate to any value here. */
+	/*
+	 * 3A provide real exposure time.
+	 * should not translate to any value here.
+	 */
 	ret = mt9m114_write_reg(client, MISENSOR_16BIT,
 				REG_EXPO_COARSE, (u16)(coarse_integration));
 	if (ret) {
@@ -1044,7 +1050,7 @@ static long mt9m114_s_exposure(struct v4l2_subdev *sd,
 	}
 
 	/*
-	// set analog/digital gain
+	 * set analog/digital gain
 	switch(AnalogGain)
 	{
 	case 0:
@@ -1069,8 +1075,9 @@ static long mt9m114_s_exposure(struct v4l2_subdev *sd,
 	*/
 	if (DigitalGain >= 16 || DigitalGain <= 1)
 		DigitalGain = 1;
-	/* AnalogGainToWrite =
-		(u16)((DigitalGain << 12) | AnalogGainToWrite); */
+	/*
+	 * AnalogGainToWrite = (u16)((DigitalGain << 12) | AnalogGainToWrite);
+	 */
 	AnalogGainToWrite = (u16)((DigitalGain << 12) | (u16)AnalogGain);
 	ret = mt9m114_write_reg(client, MISENSOR_16BIT,
 				REG_GAIN, AnalogGainToWrite);
@@ -1095,8 +1102,10 @@ static long mt9m114_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
 	return 0;
 }
 
-/* This returns the exposure time being used. This should only be used
-   for filling in EXIF data, not for actual image processing. */
+/*
+ * This returns the exposure time being used. This should only be used
+ * for filling in EXIF data, not for actual image processing.
+ */
 static int mt9m114_g_exposure(struct v4l2_subdev *sd, s32 *value)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -1247,7 +1256,8 @@ static int mt9m114_s_ev(struct v4l2_subdev *sd, s32 val)
 	s32 luma = 0x37;
 	int err;
 
-	/* EV value only support -2 to 2
+	/*
+	 * EV value only support -2 to 2
 	 * 0: 0x37, 1:0x47, 2:0x57, -1:0x27, -2:0x17
 	 */
 	if (val < -2 || val > 2)
@@ -1295,9 +1305,10 @@ static int mt9m114_g_ev(struct v4l2_subdev *sd, s32 *val)
 	return 0;
 }
 
-/* Fake interface
+/*
+ * Fake interface
  * mt9m114 now can not support 3a_lock
-*/
+ */
 static int mt9m114_s_3a_lock(struct v4l2_subdev *sd, s32 val)
 {
 	aaalock = val;
@@ -1843,7 +1854,7 @@ static int mt9m114_probe(struct i2c_client *client)
 		return ret;
 	}
 
-	/*TODO add format code here*/
+	/* TODO add format code here */
 	dev->sd.flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
 	dev->pad.flags = MEDIA_PAD_FL_SOURCE;
 	dev->format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
index f167781e258ac..eb1ecd198c22f 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
@@ -127,7 +127,7 @@ static int ov2680_g_focal(struct v4l2_subdev *sd, s32 *val)
 
 static int ov2680_g_fnumber(struct v4l2_subdev *sd, s32 *val)
 {
-	/*const f number for ov2680*/
+	/* const f number for ov2680 */
 
 	*val = (OV2680_F_NUMBER_DEFAULT_NUM << 16) | OV2680_F_NUMBER_DEM;
 	return 0;
@@ -399,7 +399,8 @@ static long ov2680_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
 	return 0;
 }
 
-/* This returns the exposure time being used. This should only be used
+/*
+ * This returns the exposure time being used. This should only be used
  * for filling in EXIF data, not for actual image processing.
  */
 static int ov2680_q_exposure(struct v4l2_subdev *sd, s32 *value)
@@ -727,11 +728,13 @@ static int gpio_ctrl(struct v4l2_subdev *sd, bool flag)
 	if (!dev || !dev->platform_data)
 		return -ENODEV;
 
-	/* The OV2680 documents only one GPIO input (#XSHUTDN), but
+	/*
+	 * The OV2680 documents only one GPIO input (#XSHUTDN), but
 	 * existing integrations often wire two (reset/power_down)
 	 * because that is the way other sensors work.  There is no
 	 * way to tell how it is wired internally, so existing
-	 * firmwares expose both and we drive them symmetrically. */
+	 * firmwares expose both and we drive them symmetrically.
+	 */
 	if (flag) {
 		ret = dev->platform_data->gpio0_ctrl(sd, 1);
 		usleep_range(10000, 15000);
@@ -977,7 +980,8 @@ static int ov2680_set_fmt(struct v4l2_subdev *sd,
 		goto err;
 	}
 
-	/*recall flip functions to avoid flip registers
+	/*
+	 * recall flip functions to avoid flip registers
 	 * were overridden by default setting
 	 */
 	if (h_flag)
@@ -987,7 +991,8 @@ static int ov2680_set_fmt(struct v4l2_subdev *sd,
 
 	v4l2_info(client, "\n%s idx %d\n", __func__, dev->fmt_idx);
 
-	/*ret = startup(sd);
+	/*
+	 * ret = startup(sd);
 	 * if (ret)
 	 * dev_err(&client->dev, "ov2680 startup err\n");
 	 */
@@ -1096,7 +1101,8 @@ static int ov2680_s_config(struct v4l2_subdev *sd,
 	    (struct camera_sensor_platform_data *)platform_data;
 
 	mutex_lock(&dev->input_lock);
-	/* power off the module, then power on it in future
+	/*
+	 * power off the module, then power on it in future
 	 * as first power on by board may not fulfill the
 	 * power on sequqence needed by the module
 	 */
diff --git a/drivers/staging/media/atomisp/i2c/mt9m114.h b/drivers/staging/media/atomisp/i2c/mt9m114.h
index 787bbf59e895c..bcce18b65fa68 100644
--- a/drivers/staging/media/atomisp/i2c/mt9m114.h
+++ b/drivers/staging/media/atomisp/i2c/mt9m114.h
@@ -764,8 +764,10 @@ static struct misensor_reg const mt9m114_common[] = {
 	{MISENSOR_8BIT, 0xC85C, 0x03},    /* cam_crop_cropmode = 3 */
 	{MISENSOR_16BIT, 0xC868, 0x0280}, /* cam_output_width = 952 */
 	{MISENSOR_16BIT, 0xC86A, 0x01E0}, /* cam_output_height = 538 */
-	/* LOAD = Step3-Recommended
-	 * Patch,Errata and Sensor optimization Setting */
+	/*
+	 * LOAD = Step3-Recommended
+	 * Patch, Errata and Sensor optimization Setting
+	 */
 	{MISENSOR_16BIT, 0x316A, 0x8270}, /* DAC_TXLO_ROW */
 	{MISENSOR_16BIT, 0x316C, 0x8270}, /* DAC_TXLO */
 	{MISENSOR_16BIT, 0x3ED0, 0x2305}, /* DAC_LD_4_5 */
diff --git a/drivers/staging/media/atomisp/i2c/ov2680.h b/drivers/staging/media/atomisp/i2c/ov2680.h
index 49920245e0647..4d43b45915e56 100644
--- a/drivers/staging/media/atomisp/i2c/ov2680.h
+++ b/drivers/staging/media/atomisp/i2c/ov2680.h
@@ -459,8 +459,8 @@ static struct ov2680_reg const ov2680_656x496_30fps[] = {
 };
 
 /*
-* 800x600 30fps  VBlanking 1lane 10Bit (binning)
-*/
+ * 800x600 30fps  VBlanking 1lane 10Bit (binning)
+ */
 static struct ov2680_reg const ov2680_720x592_30fps[] = {
 	{0x3086, 0x01},
 	{0x3501, 0x26},
@@ -504,8 +504,8 @@ static struct ov2680_reg const ov2680_720x592_30fps[] = {
 };
 
 /*
-* 800x600 30fps  VBlanking 1lane 10Bit (binning)
-*/
+ * 800x600 30fps  VBlanking 1lane 10Bit (binning)
+ */
 static struct ov2680_reg const ov2680_800x600_30fps[] = {
 	{0x3086, 0x01},
 	{0x3501, 0x26},
@@ -634,7 +634,7 @@ static struct ov2680_reg const ov2680_1296x976_30fps[] = {
 
 /*
  *   1456*1096 30fps  VBlanking 1lane 10bit(no-scaling)
-*/
+ */
 static struct ov2680_reg const ov2680_1456x1096_30fps[] = {
 	{0x3086, 0x00},
 	{0x3501, 0x48},
-- 
GitLab


From 4c999ae366e13bdf1f961fdf4c4cefe3d772f275 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Wed, 28 Apr 2021 20:09:20 +0200
Subject: [PATCH 1002/3804] media: staging: media: atomisp: fix CamelCase
 variable naming

Mixed case variable names are discouraged and they result in checkpatch
script "Avoid CamelCase" warnings. Replace such CamelCase variable names
by lower case strings according to the coding style guidelines.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/atomisp/i2c/atomisp-mt9m114.c       | 63 ++++++++++---------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
index a5f0b4848ddfd..0a6f8f68b215a 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
@@ -1000,10 +1000,10 @@ static long mt9m114_s_exposure(struct v4l2_subdev *sd,
 	struct mt9m114_device *dev = to_mt9m114_sensor(sd);
 	int ret = 0;
 	unsigned int coarse_integration = 0;
-	unsigned int FLines = 0;
-	unsigned int FrameLengthLines = 0; /* ExposureTime.FrameLengthLines; */
-	unsigned int AnalogGain, DigitalGain;
-	u32 AnalogGainToWrite = 0;
+	unsigned int f_lines = 0;
+	unsigned int frame_len_lines = 0; /* ExposureTime.FrameLengthLines; */
+	unsigned int analog_gain, digital_gain;
+	u32 analog_gain_to_write = 0;
 
 	dev_dbg(&client->dev, "%s(0x%X 0x%X 0x%X)\n", __func__,
 		exposure->integration_time[0], exposure->gain[0],
@@ -1012,28 +1012,28 @@ static long mt9m114_s_exposure(struct v4l2_subdev *sd,
 	coarse_integration = exposure->integration_time[0];
 	/*
 	 * fine_integration = ExposureTime.FineIntegrationTime;
-	 * FrameLengthLines = ExposureTime.FrameLengthLines;
+	 * frame_len_lines = ExposureTime.FrameLengthLines;
 	 */
-	FLines = mt9m114_res[dev->res].lines_per_frame;
-	AnalogGain = exposure->gain[0];
-	DigitalGain = exposure->gain[1];
+	f_lines = mt9m114_res[dev->res].lines_per_frame;
+	analog_gain = exposure->gain[0];
+	digital_gain = exposure->gain[1];
 	if (!dev->streamon) {
 		/*Save the first exposure values while stream is off*/
 		dev->first_exp = coarse_integration;
-		dev->first_gain = AnalogGain;
-		dev->first_diggain = DigitalGain;
+		dev->first_gain = analog_gain;
+		dev->first_diggain = digital_gain;
 	}
-	/* DigitalGain = 0x400 * (((u16) DigitalGain) >> 8) +		*/
-	/* ((unsigned int)(0x400 * (((u16) DigitalGain) & 0xFF)) >>8);	*/
+	/* digital_gain = 0x400 * (((u16) digital_gain) >> 8) +		*/
+	/* ((unsigned int)(0x400 * (((u16) digital_gain) & 0xFF)) >>8); */
 
 	/* set frame length */
-	if (FLines < coarse_integration + 6)
-		FLines = coarse_integration + 6;
-	if (FLines < FrameLengthLines)
-		FLines = FrameLengthLines;
-	ret = mt9m114_write_reg(client, MISENSOR_16BIT, 0x300A, FLines);
+	if (f_lines < coarse_integration + 6)
+		f_lines = coarse_integration + 6;
+	if (f_lines < frame_len_lines)
+		f_lines = frame_len_lines;
+	ret = mt9m114_write_reg(client, MISENSOR_16BIT, 0x300A, f_lines);
 	if (ret) {
-		v4l2_err(client, "%s: fail to set FLines\n", __func__);
+		v4l2_err(client, "%s: fail to set f_lines\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1051,38 +1051,39 @@ static long mt9m114_s_exposure(struct v4l2_subdev *sd,
 
 	/*
 	 * set analog/digital gain
-	switch(AnalogGain)
+	switch(analog_gain)
 	{
 	case 0:
-	  AnalogGainToWrite = 0x0;
+	  analog_gain_to_write = 0x0;
 	  break;
 	case 1:
-	  AnalogGainToWrite = 0x20;
+	  analog_gain_to_write = 0x20;
 	  break;
 	case 2:
-	  AnalogGainToWrite = 0x60;
+	  analog_gain_to_write = 0x60;
 	  break;
 	case 4:
-	  AnalogGainToWrite = 0xA0;
+	  analog_gain_to_write = 0xA0;
 	  break;
 	case 8:
-	  AnalogGainToWrite = 0xE0;
+	  analog_gain_to_write = 0xE0;
 	  break;
 	default:
-	  AnalogGainToWrite = 0x20;
+	  analog_gain_to_write = 0x20;
 	  break;
 	}
 	*/
-	if (DigitalGain >= 16 || DigitalGain <= 1)
-		DigitalGain = 1;
+	if (digital_gain >= 16 || digital_gain <= 1)
+		digital_gain = 1;
 	/*
-	 * AnalogGainToWrite = (u16)((DigitalGain << 12) | AnalogGainToWrite);
+	 * analog_gain_to_write = (u16)((digital_gain << 12)
+	 *				| analog_gain_to_write);
 	 */
-	AnalogGainToWrite = (u16)((DigitalGain << 12) | (u16)AnalogGain);
+	analog_gain_to_write = (u16)((digital_gain << 12) | (u16)analog_gain);
 	ret = mt9m114_write_reg(client, MISENSOR_16BIT,
-				REG_GAIN, AnalogGainToWrite);
+				REG_GAIN, analog_gain_to_write);
 	if (ret) {
-		v4l2_err(client, "%s: fail to set AnalogGainToWrite\n",
+		v4l2_err(client, "%s: fail to set analog_gain_to_write\n",
 			 __func__);
 		return -EINVAL;
 	}
-- 
GitLab


From 2dfc978ac0f3aaa5de4eb604ef7f35bacd077a03 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Wed, 28 Apr 2021 20:09:55 +0200
Subject: [PATCH 1003/3804] media: staging: media: atomisp: replace raw pr_*()
 by dev_dbg()

It is recommended to use driver model diagnostic macros dev_*() instead
of raw printk() or pr_*() since the former ensures that the log messages
are always associated with the corresponding device and driver. This also
addresses the checkpatch complain for not using KERN_<LEVEL> facility in
printk() call.

Suggested-by: Fabio Aiuto <fabioaiuto83@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/atomisp/i2c/atomisp-gc0310.c        | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
index d170d0adfea40..e1de3cf68893e 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
@@ -300,7 +300,7 @@ static int gc0310_get_intg_factor(struct i2c_client *client,
 	/* pixel clock calculattion */
 	dev->vt_pix_clk_freq_mhz = 14400000; // 16.8MHz
 	buf->vt_pix_clk_freq_mhz = dev->vt_pix_clk_freq_mhz;
-	pr_info("vt_pix_clk_freq_mhz=%d\n", buf->vt_pix_clk_freq_mhz);
+	dev_dbg(&client->dev, "vt_pix_clk_freq_mhz=%d\n", buf->vt_pix_clk_freq_mhz);
 
 	/* get integration time */
 	buf->coarse_integration_time_min = GC0310_COARSE_INTG_TIME_MIN;
@@ -326,7 +326,7 @@ static int gc0310_get_intg_factor(struct i2c_client *client,
 	if (ret)
 		return ret;
 	buf->crop_horizontal_start = val | (reg_val & 0xFF);
-	pr_info("crop_horizontal_start=%d\n", buf->crop_horizontal_start);
+	dev_dbg(&client->dev, "crop_horizontal_start=%d\n", buf->crop_horizontal_start);
 
 	/* Getting crop_vertical_start */
 	ret =  gc0310_read_reg(client, GC0310_8BIT,
@@ -339,7 +339,7 @@ static int gc0310_get_intg_factor(struct i2c_client *client,
 	if (ret)
 		return ret;
 	buf->crop_vertical_start = val | (reg_val & 0xFF);
-	pr_info("crop_vertical_start=%d\n", buf->crop_vertical_start);
+	dev_dbg(&client->dev, "crop_vertical_start=%d\n", buf->crop_vertical_start);
 
 	/* Getting output_width */
 	ret = gc0310_read_reg(client, GC0310_8BIT,
@@ -352,7 +352,7 @@ static int gc0310_get_intg_factor(struct i2c_client *client,
 	if (ret)
 		return ret;
 	buf->output_width = val | (reg_val & 0xFF);
-	pr_info("output_width=%d\n", buf->output_width);
+	dev_dbg(&client->dev, "output_width=%d\n", buf->output_width);
 
 	/* Getting output_height */
 	ret = gc0310_read_reg(client, GC0310_8BIT,
@@ -365,12 +365,12 @@ static int gc0310_get_intg_factor(struct i2c_client *client,
 	if (ret)
 		return ret;
 	buf->output_height = val | (reg_val & 0xFF);
-	pr_info("output_height=%d\n", buf->output_height);
+	dev_dbg(&client->dev, "output_height=%d\n", buf->output_height);
 
 	buf->crop_horizontal_end = buf->crop_horizontal_start + buf->output_width - 1;
 	buf->crop_vertical_end = buf->crop_vertical_start + buf->output_height - 1;
-	pr_info("crop_horizontal_end=%d\n", buf->crop_horizontal_end);
-	pr_info("crop_vertical_end=%d\n", buf->crop_vertical_end);
+	dev_dbg(&client->dev, "crop_horizontal_end=%d\n", buf->crop_horizontal_end);
+	dev_dbg(&client->dev, "crop_vertical_end=%d\n", buf->crop_vertical_end);
 
 	/* Getting line_length_pck */
 	ret = gc0310_read_reg(client, GC0310_8BIT,
@@ -389,7 +389,7 @@ static int gc0310_get_intg_factor(struct i2c_client *client,
 		return ret;
 	sh_delay = reg_val;
 	buf->line_length_pck = buf->output_width + hori_blanking + sh_delay + 4;
-	pr_info("hori_blanking=%d sh_delay=%d line_length_pck=%d\n", hori_blanking,
+	dev_dbg(&client->dev, "hori_blanking=%d sh_delay=%d line_length_pck=%d\n", hori_blanking,
 		sh_delay, buf->line_length_pck);
 
 	/* Getting frame_length_lines */
@@ -404,7 +404,7 @@ static int gc0310_get_intg_factor(struct i2c_client *client,
 		return ret;
 	vert_blanking = val | (reg_val & 0xFF);
 	buf->frame_length_lines = buf->output_height + vert_blanking;
-	pr_info("vert_blanking=%d frame_length_lines=%d\n", vert_blanking,
+	dev_dbg(&client->dev, "vert_blanking=%d frame_length_lines=%d\n", vert_blanking,
 		buf->frame_length_lines);
 
 	buf->binning_factor_x = res->bin_factor_x ?
@@ -434,7 +434,7 @@ static int gc0310_set_gain(struct v4l2_subdev *sd, int gain)
 		dgain = gain / 2;
 	}
 
-	pr_info("gain=0x%x again=0x%x dgain=0x%x\n", gain, again, dgain);
+	dev_dbg(&client->dev, "gain=0x%x again=0x%x dgain=0x%x\n", gain, again, dgain);
 
 	/* set analog gain */
 	ret = gc0310_write_reg(client, GC0310_8BIT,
@@ -458,7 +458,7 @@ static int __gc0310_set_exposure(struct v4l2_subdev *sd, int coarse_itg,
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	int ret;
 
-	pr_info("coarse_itg=%d gain=%d digitgain=%d\n", coarse_itg, gain, digitgain);
+	dev_dbg(&client->dev, "coarse_itg=%d gain=%d digitgain=%d\n", coarse_itg, gain, digitgain);
 
 	/* set exposure */
 	ret = gc0310_write_reg(client, GC0310_8BIT,
@@ -1020,8 +1020,8 @@ static int gc0310_set_fmt(struct v4l2_subdev *sd,
 		return -EINVAL;
 	}
 
-	printk("%s: before gc0310_write_reg_array %s\n", __func__,
-	       gc0310_res[dev->fmt_idx].desc);
+	dev_dbg(&client->dev, "%s: before gc0310_write_reg_array %s\n",
+		__func__, gc0310_res[dev->fmt_idx].desc);
 	ret = startup(sd);
 	if (ret) {
 		dev_err(&client->dev, "gc0310 startup err\n");
@@ -1085,7 +1085,7 @@ static int gc0310_detect(struct i2c_client *client)
 		return -ENODEV;
 	}
 	id = ((((u16)high) << 8) | (u16)low);
-	pr_info("sensor ID = 0x%x\n", id);
+	dev_dbg(&client->dev, "sensor ID = 0x%x\n", id);
 
 	if (id != GC0310_ID) {
 		dev_err(&client->dev, "sensor ID error, read id = 0x%x, target id = 0x%x\n", id,
@@ -1106,7 +1106,7 @@ static int gc0310_s_stream(struct v4l2_subdev *sd, int enable)
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	int ret;
 
-	pr_info("%s S enable=%d\n", __func__, enable);
+	dev_dbg(&client->dev, "%s S enable=%d\n", __func__, enable);
 	mutex_lock(&dev->input_lock);
 
 	if (enable) {
-- 
GitLab


From 637959f7e273a83934c9d3c6a50af529fb46cbb6 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Wed, 28 Apr 2021 20:10:26 +0200
Subject: [PATCH 1004/3804] media: staging: media: atomisp: remove unnecessary
 pr_info calls

pr_info() messages to log function entry / exit tracing spams the log.
Such basic tracing is not necessary and be removed.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../staging/media/atomisp/i2c/atomisp-gc0310.c  | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
index e1de3cf68893e..6c5a378a2eb5c 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
@@ -718,7 +718,6 @@ static int gc0310_init(struct v4l2_subdev *sd)
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	struct gc0310_device *dev = to_gc0310_sensor(sd);
 
-	pr_info("%s S\n", __func__);
 	mutex_lock(&dev->input_lock);
 
 	/* set initial registers */
@@ -730,7 +729,6 @@ static int gc0310_init(struct v4l2_subdev *sd)
 
 	mutex_unlock(&dev->input_lock);
 
-	pr_info("%s E\n", __func__);
 	return ret;
 }
 
@@ -796,7 +794,6 @@ static int power_up(struct v4l2_subdev *sd)
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	int ret;
 
-	pr_info("%s S\n", __func__);
 	if (!dev->platform_data) {
 		dev_err(&client->dev,
 			"no camera_sensor_platform_data");
@@ -823,7 +820,6 @@ static int power_up(struct v4l2_subdev *sd)
 
 	msleep(100);
 
-	pr_info("%s E\n", __func__);
 	return 0;
 
 fail_gpio:
@@ -959,15 +955,12 @@ static int startup(struct v4l2_subdev *sd)
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	int ret = 0;
 
-	pr_info("%s S\n", __func__);
-
 	ret = gc0310_write_reg_array(client, gc0310_res[dev->fmt_idx].regs);
 	if (ret) {
 		dev_err(&client->dev, "gc0310 write register err.\n");
 		return ret;
 	}
 
-	pr_info("%s E\n", __func__);
 	return ret;
 }
 
@@ -982,8 +975,6 @@ static int gc0310_set_fmt(struct v4l2_subdev *sd,
 	int ret = 0;
 	int idx = 0;
 
-	pr_info("%s S\n", __func__);
-
 	if (format->pad)
 		return -EINVAL;
 
@@ -1035,7 +1026,6 @@ static int gc0310_set_fmt(struct v4l2_subdev *sd,
 		goto err;
 	}
 
-	pr_info("%s E\n", __func__);
 err:
 	mutex_unlock(&dev->input_lock);
 	return ret;
@@ -1068,7 +1058,6 @@ static int gc0310_detect(struct i2c_client *client)
 	int ret;
 	u16 id;
 
-	pr_info("%s S\n", __func__);
 	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
 		return -ENODEV;
 
@@ -1095,8 +1084,6 @@ static int gc0310_detect(struct i2c_client *client)
 
 	dev_dbg(&client->dev, "detect gc0310 success\n");
 
-	pr_info("%s E\n", __func__);
-
 	return 0;
 }
 
@@ -1142,7 +1129,6 @@ static int gc0310_s_stream(struct v4l2_subdev *sd, int enable)
 	}
 
 	mutex_unlock(&dev->input_lock);
-	pr_info("%s E\n", __func__);
 	return ret;
 }
 
@@ -1153,7 +1139,6 @@ static int gc0310_s_config(struct v4l2_subdev *sd,
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	int ret = 0;
 
-	pr_info("%s S\n", __func__);
 	if (!platform_data)
 		return -ENODEV;
 
@@ -1196,7 +1181,6 @@ static int gc0310_s_config(struct v4l2_subdev *sd,
 	}
 	mutex_unlock(&dev->input_lock);
 
-	pr_info("%s E\n", __func__);
 	return 0;
 
 fail_csi_cfg:
@@ -1365,7 +1349,6 @@ static int gc0310_probe(struct i2c_client *client)
 	if (ret)
 		gc0310_remove(client);
 
-	pr_info("%s E\n", __func__);
 	return ret;
 out_free:
 	v4l2_device_unregister_subdev(&dev->sd);
-- 
GitLab


From 4eb48acac1e9fef09fde3079e4b2e30dc7cf2b35 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 9 Apr 2021 11:15:31 +0200
Subject: [PATCH 1005/3804] media: s2255drv: remove redundant assignment to
 variable field

The variable 'field' is being assigned a value this is never read,
it is being updated in all the following if/else combinations. The
assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/s2255/s2255drv.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/media/usb/s2255/s2255drv.c b/drivers/media/usb/s2255/s2255drv.c
index 4af55e2478be1..3b0e4ed75d99c 100644
--- a/drivers/media/usb/s2255/s2255drv.c
+++ b/drivers/media/usb/s2255/s2255drv.c
@@ -767,8 +767,6 @@ static int vidioc_try_fmt_vid_cap(struct file *file, void *priv,
 	if (fmt == NULL)
 		return -EINVAL;
 
-	field = f->fmt.pix.field;
-
 	dprintk(vc->dev, 50, "%s NTSC: %d suggested width: %d, height: %d\n",
 		__func__, is_ntsc, f->fmt.pix.width, f->fmt.pix.height);
 	if (is_ntsc) {
-- 
GitLab


From b07006ff9365ddfc184e1836cd1f75355c6bb740 Mon Sep 17 00:00:00 2001
From: Sebastian Fricke <sebastian.fricke@posteo.net>
Date: Sat, 17 Apr 2021 15:34:38 +0200
Subject: [PATCH 1006/3804] media: mc: mc-entity.c: Fix typo

s/entity in the other end/entity at the other end/

[hverkuil: also remove the spurious space after 'link']

Signed-off-by: Sebastian Fricke <sebastian.fricke@posteo.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/mc/mc-entity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/mc/mc-entity.c b/drivers/media/mc/mc-entity.c
index 678b99771cfa4..f40f41977142e 100644
--- a/drivers/media/mc/mc-entity.c
+++ b/drivers/media/mc/mc-entity.c
@@ -323,7 +323,7 @@ static void media_graph_walk_iter(struct media_graph *graph)
 		return;
 	}
 
-	/* Get the entity in the other end of the link . */
+	/* Get the entity at the other end of the link. */
 	next = media_entity_other(entity, link);
 
 	/* Has the entity already been visited? */
-- 
GitLab


From 71c41518128414ebb1215a074f94ef8f3e2bf0cc Mon Sep 17 00:00:00 2001
From: Sebastian Fricke <sebastian.fricke@posteo.net>
Date: Sun, 18 Apr 2021 12:34:25 +0200
Subject: [PATCH 1007/3804] media: rkisp1: rksip1-capture.c: Improve comments
 and fix typos

Improve the wording of the function description to increase readability.

Fix three typos:
s/during processing a frame/while processing a frame/
s/it also update/it also updates/
s/there's not buf in shadow/there's no buffer in a shadow register/

Replace the abbreviation 'buf' with the full word buffer, the
abbreviation 'config' with the verb configure, and 'regs' with registers.
The goal of this change is to ease the reading flow of the comment.

Signed-off-by: Sebastian Fricke <sebastian.fricke@posteo.net>
Reviewed-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../platform/rockchip/rkisp1/rkisp1-capture.c    | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
index 5f6c9d1623e4e..9643bdd05b7b2 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
@@ -830,8 +830,8 @@ static void rkisp1_return_all_buffers(struct rkisp1_capture *cap,
 }
 
 /*
- * Most of registers inside rockchip ISP1 have shadow register since
- * they must be not be changed during processing a frame.
+ * Most registers inside the rockchip ISP1 have shadow register since
+ * they must not be changed while processing a frame.
  * Usually, each sub-module updates its shadow register after
  * processing the last pixel of a frame.
  */
@@ -847,14 +847,14 @@ static void rkisp1_cap_stream_enable(struct rkisp1_capture *cap)
 	spin_lock_irq(&cap->buf.lock);
 	rkisp1_set_next_buf(cap);
 	cap->ops->enable(cap);
-	/* It's safe to config ACTIVE and SHADOW regs for the
+	/* It's safe to configure ACTIVE and SHADOW registers for the
 	 * first stream. While when the second is starting, do NOT
-	 * force update because it also update the first one.
+	 * force update because it also updates the first one.
 	 *
-	 * The latter case would drop one more buf(that is 2) since
-	 * there's not buf in shadow when the second FE received. This's
-	 * also required because the second FE maybe corrupt especially
-	 * when run at 120fps.
+	 * The latter case would drop one more buffer(that is 2) since
+	 * there's no buffer in a shadow register when the second FE received.
+	 * This's also required because the second FE maybe corrupt
+	 * especially when run at 120fps.
 	 */
 	if (!other->is_streaming) {
 		/* force cfg update */
-- 
GitLab


From 27ba44270b5ea3d6ec71e148051becffb51d2324 Mon Sep 17 00:00:00 2001
From: Sebastian Fricke <sebastian.fricke@posteo.net>
Date: Tue, 20 Apr 2021 19:45:22 +0200
Subject: [PATCH 1008/3804] media: rkisp1: rkisp1-params.c: Fix typos

s/when the camera active/when the camera is active/
s/thus not isr protection/therefore there is no need to acquire a lock/

Signed-off-by: Sebastian Fricke <sebastian.fricke@posteo.net>
Reviewed-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rockchip/rkisp1/rkisp1-params.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
index b6beddd988d0a..529c6e21815f3 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
@@ -1258,7 +1258,10 @@ void rkisp1_params_configure(struct rkisp1_params *params,
 	rkisp1_params_config_parameter(params);
 }
 
-/* Not called when the camera active, thus not isr protection. */
+/*
+ * Not called when the camera is active, therefore there is no need to acquire
+ * a lock.
+ */
 void rkisp1_params_disable(struct rkisp1_params *params)
 {
 	rkisp1_param_clear_bits(params, RKISP1_CIF_ISP_DPCC_MODE,
-- 
GitLab


From be8656e62e9e791837b606a027802b504a945c97 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Wed, 21 Apr 2021 21:43:45 +0200
Subject: [PATCH 1009/3804] media: cpia2: fix memory leak in cpia2_usb_probe

syzbot reported leak in cpia2 usb driver. The problem was
in invalid error handling.

v4l2_device_register() is called in cpia2_init_camera_struct(), but
all error cases after cpia2_init_camera_struct() did not call the
v4l2_device_unregister()

Reported-by: syzbot+d1e69c888f0d3866ead4@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/cpia2/cpia2.h      |  1 +
 drivers/media/usb/cpia2/cpia2_core.c | 12 ++++++++++++
 drivers/media/usb/cpia2/cpia2_usb.c  | 13 +++++++------
 3 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/media/usb/cpia2/cpia2.h b/drivers/media/usb/cpia2/cpia2.h
index 50835f5f7512c..57b7f1ea68da5 100644
--- a/drivers/media/usb/cpia2/cpia2.h
+++ b/drivers/media/usb/cpia2/cpia2.h
@@ -429,6 +429,7 @@ int cpia2_send_command(struct camera_data *cam, struct cpia2_command *cmd);
 int cpia2_do_command(struct camera_data *cam,
 		     unsigned int command,
 		     unsigned char direction, unsigned char param);
+void cpia2_deinit_camera_struct(struct camera_data *cam, struct usb_interface *intf);
 struct camera_data *cpia2_init_camera_struct(struct usb_interface *intf);
 int cpia2_init_camera(struct camera_data *cam);
 int cpia2_allocate_buffers(struct camera_data *cam);
diff --git a/drivers/media/usb/cpia2/cpia2_core.c b/drivers/media/usb/cpia2/cpia2_core.c
index e747548ab2869..b5a2d06fb356b 100644
--- a/drivers/media/usb/cpia2/cpia2_core.c
+++ b/drivers/media/usb/cpia2/cpia2_core.c
@@ -2163,6 +2163,18 @@ static void reset_camera_struct(struct camera_data *cam)
 	cam->height = cam->params.roi.height;
 }
 
+/******************************************************************************
+ *
+ *  cpia2_init_camera_struct
+ *
+ *  Deinitialize camera struct
+ *****************************************************************************/
+void cpia2_deinit_camera_struct(struct camera_data *cam, struct usb_interface *intf)
+{
+	v4l2_device_unregister(&cam->v4l2_dev);
+	kfree(cam);
+}
+
 /******************************************************************************
  *
  *  cpia2_init_camera_struct
diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c
index 3ab80a7b44985..76aac06f9fb8e 100644
--- a/drivers/media/usb/cpia2/cpia2_usb.c
+++ b/drivers/media/usb/cpia2/cpia2_usb.c
@@ -844,15 +844,13 @@ static int cpia2_usb_probe(struct usb_interface *intf,
 	ret = set_alternate(cam, USBIF_CMDONLY);
 	if (ret < 0) {
 		ERR("%s: usb_set_interface error (ret = %d)\n", __func__, ret);
-		kfree(cam);
-		return ret;
+		goto alt_err;
 	}
 
 
 	if((ret = cpia2_init_camera(cam)) < 0) {
 		ERR("%s: failed to initialize cpia2 camera (ret = %d)\n", __func__, ret);
-		kfree(cam);
-		return ret;
+		goto alt_err;
 	}
 	LOG("  CPiA Version: %d.%02d (%d.%d)\n",
 	       cam->params.version.firmware_revision_hi,
@@ -872,11 +870,14 @@ static int cpia2_usb_probe(struct usb_interface *intf,
 	ret = cpia2_register_camera(cam);
 	if (ret < 0) {
 		ERR("%s: Failed to register cpia2 camera (ret = %d)\n", __func__, ret);
-		kfree(cam);
-		return ret;
+		goto alt_err;
 	}
 
 	return 0;
+
+alt_err:
+	cpia2_deinit_camera_struct(cam, intf);
+	return ret;
 }
 
 /******************************************************************************
-- 
GitLab


From b2cd0b31ed896c1a6a423019ed3633e890a7f997 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 22 Apr 2021 10:21:52 +0200
Subject: [PATCH 1010/3804] media: rtl2832_sdr/vivid/airspy/hackrf/msi2500:
 drop memset of fmt.sdr.reserved

The V4L2 core already zeroes fmt.sdr.reserved, so there is no need for
drivers to do the same.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/dvb-frontends/rtl2832_sdr.c        | 4 ----
 drivers/media/test-drivers/vivid/vivid-sdr-cap.c | 3 ---
 drivers/media/usb/airspy/airspy.c                | 3 ---
 drivers/media/usb/hackrf/hackrf.c                | 3 ---
 drivers/media/usb/msi2500/msi2500.c              | 3 ---
 5 files changed, 16 deletions(-)

diff --git a/drivers/media/dvb-frontends/rtl2832_sdr.c b/drivers/media/dvb-frontends/rtl2832_sdr.c
index ef6feb299d462..1a2f0d2adadf9 100644
--- a/drivers/media/dvb-frontends/rtl2832_sdr.c
+++ b/drivers/media/dvb-frontends/rtl2832_sdr.c
@@ -1130,8 +1130,6 @@ static int rtl2832_sdr_g_fmt_sdr_cap(struct file *file, void *priv,
 	f->fmt.sdr.pixelformat = dev->pixelformat;
 	f->fmt.sdr.buffersize = dev->buffersize;
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
-
 	return 0;
 }
 
@@ -1149,7 +1147,6 @@ static int rtl2832_sdr_s_fmt_sdr_cap(struct file *file, void *priv,
 	if (vb2_is_busy(q))
 		return -EBUSY;
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < dev->num_formats; i++) {
 		if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
 			dev->pixelformat = formats[i].pixelformat;
@@ -1177,7 +1174,6 @@ static int rtl2832_sdr_try_fmt_sdr_cap(struct file *file, void *priv,
 	dev_dbg(&pdev->dev, "pixelformat fourcc %4.4s\n",
 		(char *)&f->fmt.sdr.pixelformat);
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < dev->num_formats; i++) {
 		if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
 			f->fmt.sdr.buffersize = formats[i].buffersize;
diff --git a/drivers/media/test-drivers/vivid/vivid-sdr-cap.c b/drivers/media/test-drivers/vivid/vivid-sdr-cap.c
index a1e52708b7cae..265db2114671f 100644
--- a/drivers/media/test-drivers/vivid/vivid-sdr-cap.c
+++ b/drivers/media/test-drivers/vivid/vivid-sdr-cap.c
@@ -455,7 +455,6 @@ int vidioc_g_fmt_sdr_cap(struct file *file, void *fh, struct v4l2_format *f)
 
 	f->fmt.sdr.pixelformat = dev->sdr_pixelformat;
 	f->fmt.sdr.buffersize = dev->sdr_buffersize;
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	return 0;
 }
 
@@ -468,7 +467,6 @@ int vidioc_s_fmt_sdr_cap(struct file *file, void *fh, struct v4l2_format *f)
 	if (vb2_is_busy(q))
 		return -EBUSY;
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < ARRAY_SIZE(formats); i++) {
 		if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
 			dev->sdr_pixelformat = formats[i].pixelformat;
@@ -488,7 +486,6 @@ int vidioc_try_fmt_sdr_cap(struct file *file, void *fh, struct v4l2_format *f)
 {
 	int i;
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < ARRAY_SIZE(formats); i++) {
 		if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
 			f->fmt.sdr.buffersize = formats[i].buffersize;
diff --git a/drivers/media/usb/airspy/airspy.c b/drivers/media/usb/airspy/airspy.c
index 751703db06f51..7a81be7970b2f 100644
--- a/drivers/media/usb/airspy/airspy.c
+++ b/drivers/media/usb/airspy/airspy.c
@@ -632,7 +632,6 @@ static int airspy_g_fmt_sdr_cap(struct file *file, void *priv,
 
 	f->fmt.sdr.pixelformat = s->pixelformat;
 	f->fmt.sdr.buffersize = s->buffersize;
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 
 	return 0;
 }
@@ -647,7 +646,6 @@ static int airspy_s_fmt_sdr_cap(struct file *file, void *priv,
 	if (vb2_is_busy(q))
 		return -EBUSY;
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < NUM_FORMATS; i++) {
 		if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
 			s->pixelformat = formats[i].pixelformat;
@@ -670,7 +668,6 @@ static int airspy_try_fmt_sdr_cap(struct file *file, void *priv,
 {
 	int i;
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < NUM_FORMATS; i++) {
 		if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
 			f->fmt.sdr.buffersize = formats[i].buffersize;
diff --git a/drivers/media/usb/hackrf/hackrf.c b/drivers/media/usb/hackrf/hackrf.c
index cec841ad74952..3e535be2c520e 100644
--- a/drivers/media/usb/hackrf/hackrf.c
+++ b/drivers/media/usb/hackrf/hackrf.c
@@ -929,7 +929,6 @@ static int hackrf_s_fmt_sdr(struct file *file, void *priv,
 	if (vb2_is_busy(q))
 		return -EBUSY;
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < NUM_FORMATS; i++) {
 		if (f->fmt.sdr.pixelformat == formats[i].pixelformat) {
 			dev->pixelformat = formats[i].pixelformat;
@@ -955,7 +954,6 @@ static int hackrf_g_fmt_sdr(struct file *file, void *priv,
 	dev_dbg(dev->dev, "pixelformat fourcc %4.4s\n",
 			(char *)&dev->pixelformat);
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	f->fmt.sdr.pixelformat = dev->pixelformat;
 	f->fmt.sdr.buffersize = dev->buffersize;
 
@@ -971,7 +969,6 @@ static int hackrf_try_fmt_sdr(struct file *file, void *priv,
 	dev_dbg(dev->dev, "pixelformat fourcc %4.4s\n",
 			(char *)&f->fmt.sdr.pixelformat);
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < NUM_FORMATS; i++) {
 		if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
 			f->fmt.sdr.buffersize = formats[i].buffersize;
diff --git a/drivers/media/usb/msi2500/msi2500.c b/drivers/media/usb/msi2500/msi2500.c
index 63882a5248ae5..71de6b4c4e4ce 100644
--- a/drivers/media/usb/msi2500/msi2500.c
+++ b/drivers/media/usb/msi2500/msi2500.c
@@ -912,7 +912,6 @@ static int msi2500_g_fmt_sdr_cap(struct file *file, void *priv,
 
 	f->fmt.sdr.pixelformat = dev->pixelformat;
 	f->fmt.sdr.buffersize = dev->buffersize;
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 
 	return 0;
 }
@@ -930,7 +929,6 @@ static int msi2500_s_fmt_sdr_cap(struct file *file, void *priv,
 	if (vb2_is_busy(q))
 		return -EBUSY;
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < dev->num_formats; i++) {
 		if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
 			dev->pixelformat = formats[i].pixelformat;
@@ -957,7 +955,6 @@ static int msi2500_try_fmt_sdr_cap(struct file *file, void *priv,
 	dev_dbg(dev->dev, "pixelformat fourcc %4.4s\n",
 		(char *)&f->fmt.sdr.pixelformat);
 
-	memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved));
 	for (i = 0; i < dev->num_formats; i++) {
 		if (formats[i].pixelformat == f->fmt.sdr.pixelformat) {
 			f->fmt.sdr.buffersize = formats[i].buffersize;
-- 
GitLab


From 3d37ef41bed0854805ab9af22c422267510e1344 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Fri, 23 Apr 2021 10:00:49 +0200
Subject: [PATCH 1011/3804] media: cobalt: fix race condition in setting HPD

The cobalt_s_bit_sysctrl reads the old register value over PCI,
then changes a bit and sets writes the new value to the register.

This is used among other things for setting the HPD output pin.

But if the HPD is changed for multiple inputs at the same time,
then this causes a race condition where a stale value is read.

Serialize this function with a mutex.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/cobalt/cobalt-driver.c | 1 +
 drivers/media/pci/cobalt/cobalt-driver.h | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/media/pci/cobalt/cobalt-driver.c b/drivers/media/pci/cobalt/cobalt-driver.c
index 839503e654f46..16af58f2f93cc 100644
--- a/drivers/media/pci/cobalt/cobalt-driver.c
+++ b/drivers/media/pci/cobalt/cobalt-driver.c
@@ -667,6 +667,7 @@ static int cobalt_probe(struct pci_dev *pci_dev,
 		return -ENOMEM;
 	cobalt->pci_dev = pci_dev;
 	cobalt->instance = i;
+	mutex_init(&cobalt->pci_lock);
 
 	retval = v4l2_device_register(&pci_dev->dev, &cobalt->v4l2_dev);
 	if (retval) {
diff --git a/drivers/media/pci/cobalt/cobalt-driver.h b/drivers/media/pci/cobalt/cobalt-driver.h
index bca68572b3242..12c33e035904c 100644
--- a/drivers/media/pci/cobalt/cobalt-driver.h
+++ b/drivers/media/pci/cobalt/cobalt-driver.h
@@ -251,6 +251,8 @@ struct cobalt {
 	int instance;
 	struct pci_dev *pci_dev;
 	struct v4l2_device v4l2_dev;
+	/* serialize PCI access in cobalt_s_bit_sysctrl() */
+	struct mutex pci_lock;
 
 	void __iomem *bar0, *bar1;
 
@@ -320,10 +322,13 @@ static inline u32 cobalt_g_sysctrl(struct cobalt *cobalt)
 static inline void cobalt_s_bit_sysctrl(struct cobalt *cobalt,
 					int bit, int val)
 {
-	u32 ctrl = cobalt_read_bar1(cobalt, COBALT_SYS_CTRL_BASE);
+	u32 ctrl;
 
+	mutex_lock(&cobalt->pci_lock);
+	ctrl = cobalt_read_bar1(cobalt, COBALT_SYS_CTRL_BASE);
 	cobalt_write_bar1(cobalt, COBALT_SYS_CTRL_BASE,
 			(ctrl & ~(1UL << bit)) | (val << bit));
+	mutex_unlock(&cobalt->pci_lock);
 }
 
 static inline u32 cobalt_g_sysstat(struct cobalt *cobalt)
-- 
GitLab


From 67a7e53d5b21f3a84efc03a4e62db7caf97841ef Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Tue, 27 Apr 2021 09:15:54 +0200
Subject: [PATCH 1012/3804] media: hevc: Fix dependent slice segment flags

Dependent slice segment flag for PPS control is misnamed. It should have
"enabled" at the end. It only tells if this flag is present in slice
header or not and not the actual value.

Fix this by renaming the PPS flag and introduce another flag for slice
control which tells actual value.

Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst | 5 ++++-
 drivers/staging/media/sunxi/cedrus/cedrus_h265.c          | 4 ++--
 include/media/hevc-ctrls.h                                | 3 ++-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index b0de4e6e7ebd1..514b334470eab 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -3053,7 +3053,7 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
     :stub-columns: 0
     :widths:       1 1 2
 
-    * - ``V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT``
+    * - ``V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED``
       - 0x00000001
       -
     * - ``V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT``
@@ -3277,6 +3277,9 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
     * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED``
       - 0x00000100
       -
+    * - ``V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT``
+      - 0x00000200
+      -
 
 .. raw:: latex
 
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c
index ce497d0197dfc..10744fab7ceaa 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c
@@ -477,8 +477,8 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx,
 				slice_params->flags);
 
 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_DEPENDENT_SLICE_SEGMENT,
-				V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT,
-				pps->flags);
+				V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT,
+				slice_params->flags);
 
 	/* FIXME: For multi-slice support. */
 	reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_FIRST_SLICE_SEGMENT_IN_PIC;
diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h
index b4cb2ef02f171..226fcfa0e0261 100644
--- a/include/media/hevc-ctrls.h
+++ b/include/media/hevc-ctrls.h
@@ -81,7 +81,7 @@ struct v4l2_ctrl_hevc_sps {
 	__u64	flags;
 };
 
-#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT		(1ULL << 0)
+#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED	(1ULL << 0)
 #define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
 #define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
 #define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
@@ -160,6 +160,7 @@ struct v4l2_hevc_pred_weight_table {
 #define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
 #define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
 #define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
+#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT	(1ULL << 9)
 
 struct v4l2_ctrl_hevc_slice_params {
 	__u32	bit_size;
-- 
GitLab


From 414e0a6437f96234fe0ba932093b8ff6a187192a Mon Sep 17 00:00:00 2001
From: dingsenjie <dingsenjie@yulong.com>
Date: Fri, 30 Apr 2021 08:18:33 +0200
Subject: [PATCH 1013/3804] media: qcom/camss: Use
 devm_platform_ioremap_resource_byname

Use the devm_platform_ioremap_resource_byname() helper instead of
calling platform_get_resource_byname() and devm_ioremap_resource()
separately.

Signed-off-by: dingsenjie <dingsenjie@yulong.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/camss/camss-csid.c   | 3 +--
 drivers/media/platform/qcom/camss/camss-csiphy.c | 8 +++-----
 drivers/media/platform/qcom/camss/camss-ispif.c  | 6 ++----
 drivers/media/platform/qcom/camss/camss-vfe.c    | 3 +--
 4 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/drivers/media/platform/qcom/camss/camss-csid.c b/drivers/media/platform/qcom/camss/camss-csid.c
index cc11fbfdae132..0e6b76e839834 100644
--- a/drivers/media/platform/qcom/camss/camss-csid.c
+++ b/drivers/media/platform/qcom/camss/camss-csid.c
@@ -566,8 +566,7 @@ int msm_csid_subdev_init(struct camss *camss, struct csid_device *csid,
 
 	/* Memory */
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, res->reg[0]);
-	csid->base = devm_ioremap_resource(dev, r);
+	csid->base = devm_platform_ioremap_resource_byname(pdev, res->reg[0]);
 	if (IS_ERR(csid->base))
 		return PTR_ERR(csid->base);
 
diff --git a/drivers/media/platform/qcom/camss/camss-csiphy.c b/drivers/media/platform/qcom/camss/camss-csiphy.c
index b3c3bf19e5223..1996541278a2a 100644
--- a/drivers/media/platform/qcom/camss/camss-csiphy.c
+++ b/drivers/media/platform/qcom/camss/camss-csiphy.c
@@ -591,16 +591,14 @@ int msm_csiphy_subdev_init(struct camss *camss,
 
 	/* Memory */
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, res->reg[0]);
-	csiphy->base = devm_ioremap_resource(dev, r);
+	csiphy->base = devm_platform_ioremap_resource_byname(pdev, res->reg[0]);
 	if (IS_ERR(csiphy->base))
 		return PTR_ERR(csiphy->base);
 
 	if (camss->version == CAMSS_8x16 ||
 	    camss->version == CAMSS_8x96) {
-		r = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						 res->reg[1]);
-		csiphy->base_clk_mux = devm_ioremap_resource(dev, r);
+		csiphy->base_clk_mux =
+			devm_platform_ioremap_resource_byname(pdev, res->reg[1]);
 		if (IS_ERR(csiphy->base_clk_mux))
 			return PTR_ERR(csiphy->base_clk_mux);
 	} else {
diff --git a/drivers/media/platform/qcom/camss/camss-ispif.c b/drivers/media/platform/qcom/camss/camss-ispif.c
index 37611c8861da9..d7942f723fdc3 100644
--- a/drivers/media/platform/qcom/camss/camss-ispif.c
+++ b/drivers/media/platform/qcom/camss/camss-ispif.c
@@ -1143,13 +1143,11 @@ int msm_ispif_subdev_init(struct camss *camss,
 
 	/* Memory */
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, res->reg[0]);
-	ispif->base = devm_ioremap_resource(dev, r);
+	ispif->base = devm_platform_ioremap_resource_byname(pdev, res->reg[0]);
 	if (IS_ERR(ispif->base))
 		return PTR_ERR(ispif->base);
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, res->reg[1]);
-	ispif->base_clk_mux = devm_ioremap_resource(dev, r);
+	ispif->base_clk_mux = devm_platform_ioremap_resource_byname(pdev, res->reg[1]);
 	if (IS_ERR(ispif->base_clk_mux))
 		return PTR_ERR(ispif->base_clk_mux);
 
diff --git a/drivers/media/platform/qcom/camss/camss-vfe.c b/drivers/media/platform/qcom/camss/camss-vfe.c
index 15695fd466c4d..1584ee77ad008 100644
--- a/drivers/media/platform/qcom/camss/camss-vfe.c
+++ b/drivers/media/platform/qcom/camss/camss-vfe.c
@@ -1301,8 +1301,7 @@ int msm_vfe_subdev_init(struct camss *camss, struct vfe_device *vfe,
 
 	/* Memory */
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_MEM, res->reg[0]);
-	vfe->base = devm_ioremap_resource(dev, r);
+	vfe->base = devm_platform_ioremap_resource_byname(pdev, res->reg[0]);
 	if (IS_ERR(vfe->base)) {
 		dev_err(dev, "could not map memory\n");
 		return PTR_ERR(vfe->base);
-- 
GitLab


From 8edcb5049ac29aa3c8acc5ef15dd4036543d747e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 30 Apr 2021 22:19:55 +0200
Subject: [PATCH 1014/3804] media: I2C: change 'RST' to "RSET" to fix multiple
 build errors

The use of an enum named 'RST' conflicts with a #define macro
named 'RST' in arch/mips/include/asm/mach-rc32434/rb.h.

The MIPS use of RST was there first (AFAICT), so change the
media/i2c/ uses of RST to be named 'RSET'.
'git grep -w RSET' does not report any naming conflicts with the
new name.

This fixes multiple build errors:

arch/mips/include/asm/mach-rc32434/rb.h:15:14: error: expected identifier before '(' token
   15 | #define RST  (1 << 15)
      |              ^
drivers/media/i2c/s5c73m3/s5c73m3.h:356:2: note: in expansion of macro 'RST'
  356 |  RST,
      |  ^~~

../arch/mips/include/asm/mach-rc32434/rb.h:15:14: error: expected identifier before '(' token
   15 | #define RST  (1 << 15)
      |              ^
../drivers/media/i2c/s5k6aa.c:180:2: note: in expansion of macro 'RST'
  180 |  RST,
      |  ^~~

../arch/mips/include/asm/mach-rc32434/rb.h:15:14: error: expected identifier before '(' token
   15 | #define RST  (1 << 15)
      |              ^
../drivers/media/i2c/s5k5baf.c:238:2: note: in expansion of macro 'RST'
  238 |  RST,
      |  ^~~

and some others that I have trimmed.

Fixes: cac47f1822fc ("[media] V4L: Add S5C73M3 camera driver")
Fixes: 8b99312b7214 ("[media] Add v4l2 subdev driver for S5K4ECGX sensor")
Fixes: 7d459937dc09 ("[media] Add driver for Samsung S5K5BAF camera sensor")
Fixes: bfa8dd3a0524 ("[media] v4l: Add v4l2 subdev driver for S5K6AAFX sensor")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: Fabio Estevam <festevam@gmail.com>
Cc: NXP Linux Team <linux-imx@nxp.com>
Cc: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
Cc: Andrzej Hajda <a.hajda@samsung.com>
Cc: Sylwester Nawrocki <s.nawrocki@samsung.com>
Cc: Sangwook Lee <sangwook.lee@linaro.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/s5c73m3/s5c73m3-core.c |  6 +++---
 drivers/media/i2c/s5c73m3/s5c73m3.h      |  2 +-
 drivers/media/i2c/s5k4ecgx.c             | 10 +++++-----
 drivers/media/i2c/s5k5baf.c              |  6 +++---
 drivers/media/i2c/s5k6aa.c               | 10 +++++-----
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/media/i2c/s5c73m3/s5c73m3-core.c b/drivers/media/i2c/s5c73m3/s5c73m3-core.c
index 5b4c4a3547c93..71804a70bc6d7 100644
--- a/drivers/media/i2c/s5c73m3/s5c73m3-core.c
+++ b/drivers/media/i2c/s5c73m3/s5c73m3-core.c
@@ -1386,7 +1386,7 @@ static int __s5c73m3_power_on(struct s5c73m3 *state)
 	s5c73m3_gpio_deassert(state, STBY);
 	usleep_range(100, 200);
 
-	s5c73m3_gpio_deassert(state, RST);
+	s5c73m3_gpio_deassert(state, RSET);
 	usleep_range(50, 100);
 
 	return 0;
@@ -1401,7 +1401,7 @@ static int __s5c73m3_power_off(struct s5c73m3 *state)
 {
 	int i, ret;
 
-	if (s5c73m3_gpio_assert(state, RST))
+	if (s5c73m3_gpio_assert(state, RSET))
 		usleep_range(10, 50);
 
 	if (s5c73m3_gpio_assert(state, STBY))
@@ -1606,7 +1606,7 @@ static int s5c73m3_get_platform_data(struct s5c73m3 *state)
 
 		state->mclk_frequency = pdata->mclk_frequency;
 		state->gpio[STBY] = pdata->gpio_stby;
-		state->gpio[RST] = pdata->gpio_reset;
+		state->gpio[RSET] = pdata->gpio_reset;
 		return 0;
 	}
 
diff --git a/drivers/media/i2c/s5c73m3/s5c73m3.h b/drivers/media/i2c/s5c73m3/s5c73m3.h
index ef7e85b34263b..c3fcfdd3ea66d 100644
--- a/drivers/media/i2c/s5c73m3/s5c73m3.h
+++ b/drivers/media/i2c/s5c73m3/s5c73m3.h
@@ -353,7 +353,7 @@ struct s5c73m3_ctrls {
 
 enum s5c73m3_gpio_id {
 	STBY,
-	RST,
+	RSET,
 	GPIO_NUM,
 };
 
diff --git a/drivers/media/i2c/s5k4ecgx.c b/drivers/media/i2c/s5k4ecgx.c
index b2d53417badf6..4e97309a67f41 100644
--- a/drivers/media/i2c/s5k4ecgx.c
+++ b/drivers/media/i2c/s5k4ecgx.c
@@ -173,7 +173,7 @@ static const char * const s5k4ecgx_supply_names[] = {
 
 enum s5k4ecgx_gpio_id {
 	STBY,
-	RST,
+	RSET,
 	GPIO_NUM,
 };
 
@@ -476,7 +476,7 @@ static int __s5k4ecgx_power_on(struct s5k4ecgx *priv)
 	if (s5k4ecgx_gpio_set_value(priv, STBY, priv->gpio[STBY].level))
 		usleep_range(30, 50);
 
-	if (s5k4ecgx_gpio_set_value(priv, RST, priv->gpio[RST].level))
+	if (s5k4ecgx_gpio_set_value(priv, RSET, priv->gpio[RSET].level))
 		usleep_range(30, 50);
 
 	return 0;
@@ -484,7 +484,7 @@ static int __s5k4ecgx_power_on(struct s5k4ecgx *priv)
 
 static int __s5k4ecgx_power_off(struct s5k4ecgx *priv)
 {
-	if (s5k4ecgx_gpio_set_value(priv, RST, !priv->gpio[RST].level))
+	if (s5k4ecgx_gpio_set_value(priv, RSET, !priv->gpio[RSET].level))
 		usleep_range(30, 50);
 
 	if (s5k4ecgx_gpio_set_value(priv, STBY, !priv->gpio[STBY].level))
@@ -872,7 +872,7 @@ static int s5k4ecgx_config_gpios(struct s5k4ecgx *priv,
 	int ret;
 
 	priv->gpio[STBY].gpio = -EINVAL;
-	priv->gpio[RST].gpio  = -EINVAL;
+	priv->gpio[RSET].gpio  = -EINVAL;
 
 	ret = s5k4ecgx_config_gpio(gpio->gpio, gpio->level, "S5K4ECGX_STBY");
 
@@ -891,7 +891,7 @@ static int s5k4ecgx_config_gpios(struct s5k4ecgx *priv,
 		s5k4ecgx_free_gpios(priv);
 		return ret;
 	}
-	priv->gpio[RST] = *gpio;
+	priv->gpio[RSET] = *gpio;
 	if (gpio_is_valid(gpio->gpio))
 		gpio_set_value(gpio->gpio, 0);
 
diff --git a/drivers/media/i2c/s5k5baf.c b/drivers/media/i2c/s5k5baf.c
index 6e702b57c37da..bc560817e5046 100644
--- a/drivers/media/i2c/s5k5baf.c
+++ b/drivers/media/i2c/s5k5baf.c
@@ -235,7 +235,7 @@ struct s5k5baf_gpio {
 
 enum s5k5baf_gpio_id {
 	STBY,
-	RST,
+	RSET,
 	NUM_GPIOS,
 };
 
@@ -969,7 +969,7 @@ static int s5k5baf_power_on(struct s5k5baf *state)
 
 	s5k5baf_gpio_deassert(state, STBY);
 	usleep_range(50, 100);
-	s5k5baf_gpio_deassert(state, RST);
+	s5k5baf_gpio_deassert(state, RSET);
 	return 0;
 
 err_reg_dis:
@@ -987,7 +987,7 @@ static int s5k5baf_power_off(struct s5k5baf *state)
 	state->apply_cfg = 0;
 	state->apply_crop = 0;
 
-	s5k5baf_gpio_assert(state, RST);
+	s5k5baf_gpio_assert(state, RSET);
 	s5k5baf_gpio_assert(state, STBY);
 
 	if (!IS_ERR(state->clock))
diff --git a/drivers/media/i2c/s5k6aa.c b/drivers/media/i2c/s5k6aa.c
index 038e385007601..e9be7323a22e9 100644
--- a/drivers/media/i2c/s5k6aa.c
+++ b/drivers/media/i2c/s5k6aa.c
@@ -177,7 +177,7 @@ static const char * const s5k6aa_supply_names[] = {
 
 enum s5k6aa_gpio_id {
 	STBY,
-	RST,
+	RSET,
 	GPIO_NUM,
 };
 
@@ -841,7 +841,7 @@ static int __s5k6aa_power_on(struct s5k6aa *s5k6aa)
 		ret = s5k6aa->s_power(1);
 	usleep_range(4000, 5000);
 
-	if (s5k6aa_gpio_deassert(s5k6aa, RST))
+	if (s5k6aa_gpio_deassert(s5k6aa, RSET))
 		msleep(20);
 
 	return ret;
@@ -851,7 +851,7 @@ static int __s5k6aa_power_off(struct s5k6aa *s5k6aa)
 {
 	int ret;
 
-	if (s5k6aa_gpio_assert(s5k6aa, RST))
+	if (s5k6aa_gpio_assert(s5k6aa, RSET))
 		usleep_range(100, 150);
 
 	if (s5k6aa->s_power) {
@@ -1510,7 +1510,7 @@ static int s5k6aa_configure_gpios(struct s5k6aa *s5k6aa,
 	int ret;
 
 	s5k6aa->gpio[STBY].gpio = -EINVAL;
-	s5k6aa->gpio[RST].gpio  = -EINVAL;
+	s5k6aa->gpio[RSET].gpio  = -EINVAL;
 
 	gpio = &pdata->gpio_stby;
 	if (gpio_is_valid(gpio->gpio)) {
@@ -1533,7 +1533,7 @@ static int s5k6aa_configure_gpios(struct s5k6aa *s5k6aa,
 		if (ret < 0)
 			return ret;
 
-		s5k6aa->gpio[RST] = *gpio;
+		s5k6aa->gpio[RSET] = *gpio;
 	}
 
 	return 0;
-- 
GitLab


From f8194e5e63fdcb349e8da9eef9e574d5b1d687cb Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Tue, 4 May 2021 19:08:58 +0200
Subject: [PATCH 1015/3804] media: pvrusb2: fix warning in pvr2_i2c_core_done

syzbot has reported the following warning in pvr2_i2c_done:

	sysfs group 'power' not found for kobject '1-0043'

When the device is disconnected (pvr_hdw_disconnect), the i2c adapter is
not unregistered along with the USB and v4l2 teardown. As part of the USB
device disconnect, the sysfs files of the subdevices are also deleted.
So, by the time pvr_i2c_core_done is called by pvr_context_destroy, the
sysfs files have been deleted.

To fix this, unregister the i2c adapter too in pvr_hdw_disconnect. Make
the device deregistration code shared by calling pvr_hdw_disconnect from
pvr2_hdw_destroy.

Reported-by: syzbot+e74a998ca8f1df9cc332@syzkaller.appspotmail.com
Tested-by: syzbot+e74a998ca8f1df9cc332@syzkaller.appspotmail.com
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/pvrusb2/pvrusb2-hdw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
index f4a727918e352..d38dee1792e41 100644
--- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c
@@ -2676,9 +2676,8 @@ void pvr2_hdw_destroy(struct pvr2_hdw *hdw)
 		pvr2_stream_destroy(hdw->vid_stream);
 		hdw->vid_stream = NULL;
 	}
-	pvr2_i2c_core_done(hdw);
 	v4l2_device_unregister(&hdw->v4l2_dev);
-	pvr2_hdw_remove_usb_stuff(hdw);
+	pvr2_hdw_disconnect(hdw);
 	mutex_lock(&pvr2_unit_mtx);
 	do {
 		if ((hdw->unit_number >= 0) &&
@@ -2705,6 +2704,7 @@ void pvr2_hdw_disconnect(struct pvr2_hdw *hdw)
 {
 	pvr2_trace(PVR2_TRACE_INIT,"pvr2_hdw_disconnect(hdw=%p)",hdw);
 	LOCK_TAKE(hdw->big_lock);
+	pvr2_i2c_core_done(hdw);
 	LOCK_TAKE(hdw->ctl_lock);
 	pvr2_hdw_remove_usb_stuff(hdw);
 	LOCK_GIVE(hdw->ctl_lock);
-- 
GitLab


From 09f4310c6bfbc0002ce1cf8fc90db50777d75916 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 1 Apr 2021 16:43:28 +0200
Subject: [PATCH 1016/3804] media: hantro: use G1_REG_INTERRUPT directly for
 the mpeg2

Use the register directly over the existing SWREG().

Ideally we'll port the driver away from the local registers, but for
now this is enough. For context - I was reading through the IRQ register
handling across the variants.

Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
index 6386a3989bfe9..0fd306806f166 100644
--- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
@@ -10,6 +10,7 @@
 #include <media/v4l2-mem2mem.h>
 #include "hantro.h"
 #include "hantro_hw.h"
+#include "hantro_g1_regs.h"
 
 #define G1_SWREG(nr)			((nr) * 4)
 
@@ -20,7 +21,6 @@
 #define G1_REG_REFER2_BASE		G1_SWREG(16)
 #define G1_REG_REFER3_BASE		G1_SWREG(17)
 #define G1_REG_QTABLE_BASE		G1_SWREG(40)
-#define G1_REG_DEC_E(v)			((v) ? BIT(0) : 0)
 
 #define G1_REG_DEC_AXI_RD_ID(v)		(((v) << 24) & GENMASK(31, 24))
 #define G1_REG_DEC_TIMEOUT_E(v)		((v) ? BIT(23) : 0)
@@ -246,6 +246,5 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	hantro_end_prepare_run(ctx);
 
-	reg = G1_REG_DEC_E(1);
-	vdpu_write(vpu, reg, G1_SWREG(1));
+	vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT);
 }
-- 
GitLab


From 3b330849512ef7c617f72111fd958daf6febdf40 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 1 Apr 2021 16:43:29 +0200
Subject: [PATCH 1017/3804] media: hantro: imx: reuse MB_DIM define

Swap the hardcoded 16 with MB_DIM define.

Fixes: 8e4aaa687863 ("media: hantro: add initial i.MX8MQ support")
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/imx8m_vpu_hw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
index c222de075ef44..1f48c1956cd26 100644
--- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
+++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
@@ -109,10 +109,10 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
 		.frmsize = {
 			.min_width = 48,
 			.max_width = 3840,
-			.step_width = 16,
+			.step_width = MB_DIM,
 			.min_height = 48,
 			.max_height = 2160,
-			.step_height = 16,
+			.step_height = MB_DIM,
 		},
 	},
 	{
-- 
GitLab


From c78b22aaaa9055b05a5bd47f6715f0de16487dd4 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 1 Apr 2021 16:43:30 +0200
Subject: [PATCH 1018/3804] media: hantro: imx: remove duplicate dec_base init

The vpu->dec_base is already set by the hantro driver itself.

Fixes: 8e4aaa687863 ("media: hantro: add initial i.MX8MQ support")
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/imx8m_vpu_hw.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
index 1f48c1956cd26..cb1ac02c03d29 100644
--- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
+++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
@@ -150,7 +150,6 @@ static irqreturn_t imx8m_vpu_g1_irq(int irq, void *dev_id)
 
 static int imx8mq_vpu_hw_init(struct hantro_dev *vpu)
 {
-	vpu->dec_base = vpu->reg_bases[0];
 	vpu->ctrl_base = vpu->reg_bases[vpu->variant->num_regs - 1];
 
 	return 0;
-- 
GitLab


From d72a96b6c053dca29e7b2a94fb700f4960ce9834 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 1 Apr 2021 16:43:31 +0200
Subject: [PATCH 1019/3804] media: hantro: imx: remove unused include

The current imx8 code does not use the jpeg encoder. Remove the
unnecessary include.

Fixes: 8e4aaa687863 ("media: hantro: add initial i.MX8MQ support")
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/imx8m_vpu_hw.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
index cb1ac02c03d29..f36c1bd681ba0 100644
--- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
+++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
@@ -9,7 +9,6 @@
 #include <linux/delay.h>
 
 #include "hantro.h"
-#include "hantro_jpeg.h"
 #include "hantro_g1_regs.h"
 
 #define CTRL_SOFT_RESET		0x00
-- 
GitLab


From bbc42ab0da985f538eefe1b470a610624e53829f Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 1 Apr 2021 16:43:32 +0200
Subject: [PATCH 1020/3804] media: hantro: introduce hantro_g1.c for common API

The Hantro G1 IRQ and reset handling is pretty standard. I was this
close to duplicating it, yet again, before reconsidering and refactoring
it to a separate file.

Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/Makefile        |  1 +
 drivers/staging/media/hantro/hantro_g1.c     | 39 ++++++++++++++++++++
 drivers/staging/media/hantro/hantro_hw.h     |  3 ++
 drivers/staging/media/hantro/imx8m_vpu_hw.c  | 21 +----------
 drivers/staging/media/hantro/rk3288_vpu_hw.c | 36 ++----------------
 5 files changed, 48 insertions(+), 52 deletions(-)
 create mode 100644 drivers/staging/media/hantro/hantro_g1.c

diff --git a/drivers/staging/media/hantro/Makefile b/drivers/staging/media/hantro/Makefile
index 743ce08eb1846..3747a32799b2e 100644
--- a/drivers/staging/media/hantro/Makefile
+++ b/drivers/staging/media/hantro/Makefile
@@ -7,6 +7,7 @@ hantro-vpu-y += \
 		hantro_v4l2.o \
 		hantro_postproc.o \
 		hantro_h1_jpeg_enc.o \
+		hantro_g1.o \
 		hantro_g1_h264_dec.o \
 		hantro_g1_mpeg2_dec.o \
 		hantro_g1_vp8_dec.o \
diff --git a/drivers/staging/media/hantro/hantro_g1.c b/drivers/staging/media/hantro/hantro_g1.c
new file mode 100644
index 0000000000000..0ab1cee622189
--- /dev/null
+++ b/drivers/staging/media/hantro/hantro_g1.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ *	Jeffy Chen <jeffy.chen@rock-chips.com>
+ * Copyright (C) 2019 Pengutronix, Philipp Zabel <kernel@pengutronix.de>
+ * Copyright (C) 2021 Collabora Ltd, Emil Velikov <emil.velikov@collabora.com>
+ */
+
+#include "hantro.h"
+#include "hantro_g1_regs.h"
+
+irqreturn_t hantro_g1_irq(int irq, void *dev_id)
+{
+	struct hantro_dev *vpu = dev_id;
+	enum vb2_buffer_state state;
+	u32 status;
+
+	status = vdpu_read(vpu, G1_REG_INTERRUPT);
+	state = (status & G1_REG_INTERRUPT_DEC_RDY_INT) ?
+		 VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+	vdpu_write(vpu, 0, G1_REG_INTERRUPT);
+	vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
+
+	hantro_irq_done(vpu, state);
+
+	return IRQ_HANDLED;
+}
+
+void hantro_g1_reset(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	vdpu_write(vpu, G1_REG_INTERRUPT_DEC_IRQ_DIS, G1_REG_INTERRUPT);
+	vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
+	vdpu_write(vpu, 1, G1_REG_SOFT_RESET);
+}
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 0e34ae545f661..a1008e595808f 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -176,6 +176,9 @@ void hantro_irq_done(struct hantro_dev *vpu,
 void hantro_start_prepare_run(struct hantro_ctx *ctx);
 void hantro_end_prepare_run(struct hantro_ctx *ctx);
 
+irqreturn_t hantro_g1_irq(int irq, void *dev_id);
+void hantro_g1_reset(struct hantro_ctx *ctx);
+
 void hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx);
 void rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx);
 int hantro_jpeg_enc_init(struct hantro_ctx *ctx);
diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
index f36c1bd681ba0..9eb556460e522 100644
--- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
+++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
@@ -9,7 +9,6 @@
 #include <linux/delay.h>
 
 #include "hantro.h"
-#include "hantro_g1_regs.h"
 
 #define CTRL_SOFT_RESET		0x00
 #define RESET_G1		BIT(1)
@@ -129,24 +128,6 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
 	},
 };
 
-static irqreturn_t imx8m_vpu_g1_irq(int irq, void *dev_id)
-{
-	struct hantro_dev *vpu = dev_id;
-	enum vb2_buffer_state state;
-	u32 status;
-
-	status = vdpu_read(vpu, G1_REG_INTERRUPT);
-	state = (status & G1_REG_INTERRUPT_DEC_RDY_INT) ?
-		 VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
-
-	vdpu_write(vpu, 0, G1_REG_INTERRUPT);
-	vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
-
-	hantro_irq_done(vpu, state);
-
-	return IRQ_HANDLED;
-}
-
 static int imx8mq_vpu_hw_init(struct hantro_dev *vpu)
 {
 	vpu->ctrl_base = vpu->reg_bases[vpu->variant->num_regs - 1];
@@ -191,7 +172,7 @@ static const struct hantro_codec_ops imx8mq_vpu_codec_ops[] = {
  */
 
 static const struct hantro_irq imx8mq_irqs[] = {
-	{ "g1", imx8m_vpu_g1_irq },
+	{ "g1", hantro_g1_irq },
 	{ "g2", NULL /* TODO: imx8m_vpu_g2_irq */ },
 };
 
diff --git a/drivers/staging/media/hantro/rk3288_vpu_hw.c b/drivers/staging/media/hantro/rk3288_vpu_hw.c
index 7b299ee3e93d7..fefd45269e52a 100644
--- a/drivers/staging/media/hantro/rk3288_vpu_hw.c
+++ b/drivers/staging/media/hantro/rk3288_vpu_hw.c
@@ -10,7 +10,6 @@
 
 #include "hantro.h"
 #include "hantro_jpeg.h"
-#include "hantro_g1_regs.h"
 #include "hantro_h1_regs.h"
 
 #define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000)
@@ -127,24 +126,6 @@ static irqreturn_t rk3288_vepu_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t rk3288_vdpu_irq(int irq, void *dev_id)
-{
-	struct hantro_dev *vpu = dev_id;
-	enum vb2_buffer_state state;
-	u32 status;
-
-	status = vdpu_read(vpu, G1_REG_INTERRUPT);
-	state = (status & G1_REG_INTERRUPT_DEC_RDY_INT) ?
-		VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
-
-	vdpu_write(vpu, 0, G1_REG_INTERRUPT);
-	vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
-
-	hantro_irq_done(vpu, state);
-
-	return IRQ_HANDLED;
-}
-
 static int rk3288_vpu_hw_init(struct hantro_dev *vpu)
 {
 	/* Bump ACLK to max. possible freq. to improve performance. */
@@ -161,15 +142,6 @@ static void rk3288_vpu_enc_reset(struct hantro_ctx *ctx)
 	vepu_write(vpu, 0, H1_REG_AXI_CTRL);
 }
 
-static void rk3288_vpu_dec_reset(struct hantro_ctx *ctx)
-{
-	struct hantro_dev *vpu = ctx->dev;
-
-	vdpu_write(vpu, G1_REG_INTERRUPT_DEC_IRQ_DIS, G1_REG_INTERRUPT);
-	vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
-	vdpu_write(vpu, 1, G1_REG_SOFT_RESET);
-}
-
 /*
  * Supported codec ops.
  */
@@ -184,19 +156,19 @@ static const struct hantro_codec_ops rk3288_vpu_codec_ops[] = {
 	},
 	[HANTRO_MODE_H264_DEC] = {
 		.run = hantro_g1_h264_dec_run,
-		.reset = rk3288_vpu_dec_reset,
+		.reset = hantro_g1_reset,
 		.init = hantro_h264_dec_init,
 		.exit = hantro_h264_dec_exit,
 	},
 	[HANTRO_MODE_MPEG2_DEC] = {
 		.run = hantro_g1_mpeg2_dec_run,
-		.reset = rk3288_vpu_dec_reset,
+		.reset = hantro_g1_reset,
 		.init = hantro_mpeg2_dec_init,
 		.exit = hantro_mpeg2_dec_exit,
 	},
 	[HANTRO_MODE_VP8_DEC] = {
 		.run = hantro_g1_vp8_dec_run,
-		.reset = rk3288_vpu_dec_reset,
+		.reset = hantro_g1_reset,
 		.init = hantro_vp8_dec_init,
 		.exit = hantro_vp8_dec_exit,
 	},
@@ -208,7 +180,7 @@ static const struct hantro_codec_ops rk3288_vpu_codec_ops[] = {
 
 static const struct hantro_irq rk3288_irqs[] = {
 	{ "vepu", rk3288_vepu_irq },
-	{ "vdpu", rk3288_vdpu_irq },
+	{ "vdpu", hantro_g1_irq },
 };
 
 static const char * const rk3288_clk_names[] = {
-- 
GitLab


From 18d6c8b7b4c94c5e2e5b5807d9484b4d54b8fa1b Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 1 Apr 2021 16:43:33 +0200
Subject: [PATCH 1021/3804] media: hantro: add fallback handling for single
 irq/clk

Currently the driver expects that each irq/clk will have a name
specified.

A valid point was raised by the DT maintainers - when there is a single
interrupt line or clock - the names are not needed.

Keep the names within the drivers themselves, but don't use them when
only a single entry exists. Instead use:
 - num_clk == 1 - devm_clk_get(..., NULL)
 - num_irq == 1 - platform_get_irq(..., 0)

Suggested-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_drv.c | 37 ++++++++++++++++++-----
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index eea2009fa17bd..dd37d87e4c2a7 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -763,12 +763,23 @@ static int hantro_probe(struct platform_device *pdev)
 	if (!vpu->clocks)
 		return -ENOMEM;
 
-	for (i = 0; i < vpu->variant->num_clocks; i++)
-		vpu->clocks[i].id = vpu->variant->clk_names[i];
-	ret = devm_clk_bulk_get(&pdev->dev, vpu->variant->num_clocks,
-				vpu->clocks);
-	if (ret)
-		return ret;
+	if (vpu->variant->num_clocks > 1) {
+		for (i = 0; i < vpu->variant->num_clocks; i++)
+			vpu->clocks[i].id = vpu->variant->clk_names[i];
+
+		ret = devm_clk_bulk_get(&pdev->dev, vpu->variant->num_clocks,
+					vpu->clocks);
+		if (ret)
+			return ret;
+	} else {
+		/*
+		 * If the driver has a single clk, chances are there will be no
+		 * actual name in the DT bindings.
+		 */
+		vpu->clocks[0].clk = devm_clk_get(&pdev->dev, NULL);
+		if (IS_ERR(vpu->clocks))
+			return PTR_ERR(vpu->clocks);
+	}
 
 	num_bases = vpu->variant->num_regs ?: 1;
 	vpu->reg_bases = devm_kcalloc(&pdev->dev, num_bases,
@@ -796,13 +807,23 @@ static int hantro_probe(struct platform_device *pdev)
 	vb2_dma_contig_set_max_seg_size(&pdev->dev, DMA_BIT_MASK(32));
 
 	for (i = 0; i < vpu->variant->num_irqs; i++) {
-		const char *irq_name = vpu->variant->irqs[i].name;
+		const char *irq_name;
 		int irq;
 
 		if (!vpu->variant->irqs[i].handler)
 			continue;
 
-		irq = platform_get_irq_byname(vpu->pdev, irq_name);
+		if (vpu->variant->num_clocks > 1) {
+			irq_name = vpu->variant->irqs[i].name;
+			irq = platform_get_irq_byname(vpu->pdev, irq_name);
+		} else {
+			/*
+			 * If the driver has a single IRQ, chances are there
+			 * will be no actual name in the DT bindings.
+			 */
+			irq_name = "default";
+			irq = platform_get_irq(vpu->pdev, 0);
+		}
 		if (irq <= 0)
 			return -ENXIO;
 
-- 
GitLab


From 0d705395afa4b4fa7d0fae86b9c04cfe50a03ace Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 1 Apr 2021 16:43:34 +0200
Subject: [PATCH 1022/3804] media: dt-bindings: Document SAMA5D4 VDEC bindings

Add devicetree binding documentation for the Hantro G1/G2 VDEC on
the Microchip SAMAS5D4 SoC.

Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/microchip,sama5d4-vdec.yaml         | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/microchip,sama5d4-vdec.yaml

diff --git a/Documentation/devicetree/bindings/media/microchip,sama5d4-vdec.yaml b/Documentation/devicetree/bindings/media/microchip,sama5d4-vdec.yaml
new file mode 100644
index 0000000000000..4b77103ca9132
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/microchip,sama5d4-vdec.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/media/microchip,sama5d4-vdec.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Hantro G1 VPU codec implemented on Microchip SAMA5D4 SoCs
+
+maintainers:
+  - Emil Velikov <emil.velikov@collabora.com>
+
+description:
+  Hantro G1 video decode accelerator present on Microchip SAMA5D4 SoCs.
+
+properties:
+  compatible:
+    const: microchip,sama5d4-vdec
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+
+additionalProperties: false
+
+examples:
+  - |
+        #include <dt-bindings/clock/at91.h>
+        #include <dt-bindings/interrupt-controller/irq.h>
+
+        vdec0: vdec@300000 {
+                compatible = "microchip,sama5d4-vdec";
+                reg = <0x00300000 0x100000>;
+                interrupts = <19 IRQ_TYPE_LEVEL_HIGH 4>;
+                clocks = <&pmc PMC_TYPE_PERIPHERAL 19>;
+        };
-- 
GitLab


From 82ad940c00949965739360f68b90d9a00ccefc81 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 1 Apr 2021 16:43:35 +0200
Subject: [PATCH 1023/3804] media: hantro: add initial SAMA5D4 support

The SoC features a Hantro G1 compatible video decoder, supporting the
MPEG-2, VP8 and H264 codecs with resolutions up-to 1280x720.

Post-processing core is also available on the SoC.

Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/Kconfig          |  10 +-
 drivers/staging/media/hantro/Makefile         |   3 +
 drivers/staging/media/hantro/hantro_drv.c     |   3 +
 drivers/staging/media/hantro/hantro_hw.h      |   1 +
 .../staging/media/hantro/sama5d4_vdec_hw.c    | 117 ++++++++++++++++++
 5 files changed, 133 insertions(+), 1 deletion(-)
 create mode 100644 drivers/staging/media/hantro/sama5d4_vdec_hw.c

diff --git a/drivers/staging/media/hantro/Kconfig b/drivers/staging/media/hantro/Kconfig
index 5b6cf9f62b1ae..20b1f6d7b69c6 100644
--- a/drivers/staging/media/hantro/Kconfig
+++ b/drivers/staging/media/hantro/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 config VIDEO_HANTRO
 	tristate "Hantro VPU driver"
-	depends on ARCH_MXC || ARCH_ROCKCHIP || COMPILE_TEST
+	depends on ARCH_MXC || ARCH_ROCKCHIP || ARCH_AT91 || COMPILE_TEST
 	depends on VIDEO_DEV && VIDEO_V4L2
 	select MEDIA_CONTROLLER
 	select MEDIA_CONTROLLER_REQUEST_API
@@ -24,6 +24,14 @@ config VIDEO_HANTRO_IMX8M
 	help
 	  Enable support for i.MX8M SoCs.
 
+config VIDEO_HANTRO_SAMA5D4
+	bool "Hantro VDEC SAMA5D4 support"
+	depends on VIDEO_HANTRO
+	depends on ARCH_AT91 || COMPILE_TEST
+	default y
+	help
+	  Enable support for Microchip SAMA5D4 SoCs.
+
 config VIDEO_HANTRO_ROCKCHIP
 	bool "Hantro VPU Rockchip support"
 	depends on VIDEO_HANTRO
diff --git a/drivers/staging/media/hantro/Makefile b/drivers/staging/media/hantro/Makefile
index 3747a32799b2e..f4b99901eeeec 100644
--- a/drivers/staging/media/hantro/Makefile
+++ b/drivers/staging/media/hantro/Makefile
@@ -22,6 +22,9 @@ hantro-vpu-y += \
 hantro-vpu-$(CONFIG_VIDEO_HANTRO_IMX8M) += \
 		imx8m_vpu_hw.o
 
+hantro-vpu-$(CONFIG_VIDEO_HANTRO_SAMA5D4) += \
+		sama5d4_vdec_hw.o
+
 hantro-vpu-$(CONFIG_VIDEO_HANTRO_ROCKCHIP) += \
 		rk3288_vpu_hw.o \
 		rk3399_vpu_hw.o
diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index dd37d87e4c2a7..85dcb0882afcb 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -489,6 +489,9 @@ static const struct of_device_id of_hantro_match[] = {
 #endif
 #ifdef CONFIG_VIDEO_HANTRO_IMX8M
 	{ .compatible = "nxp,imx8mq-vpu", .data = &imx8mq_vpu_variant, },
+#endif
+#ifdef CONFIG_VIDEO_HANTRO_SAMA5D4
+	{ .compatible = "microchip,sama5d4-vdec", .data = &sama5d4_vdec_variant, },
 #endif
 	{ /* sentinel */ }
 };
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index a1008e595808f..0a42df22472e6 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -164,6 +164,7 @@ extern const struct hantro_variant rk3399_vpu_variant;
 extern const struct hantro_variant rk3328_vpu_variant;
 extern const struct hantro_variant rk3288_vpu_variant;
 extern const struct hantro_variant imx8mq_vpu_variant;
+extern const struct hantro_variant sama5d4_vdec_variant;
 
 extern const struct hantro_postproc_regs hantro_g1_postproc_regs;
 
diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
new file mode 100644
index 0000000000000..58ae72c2b723e
--- /dev/null
+++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VDEC driver
+ *
+ * Copyright (C) 2021 Collabora Ltd, Emil Velikov <emil.velikov@collabora.com>
+ */
+
+#include "hantro.h"
+
+/*
+ * Supported formats.
+ */
+
+static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_YUYV,
+		.codec_mode = HANTRO_MODE_NONE,
+	},
+};
+
+static const struct hantro_fmt sama5d4_vdec_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.codec_mode = HANTRO_MODE_NONE,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+		.codec_mode = HANTRO_MODE_MPEG2_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 1280,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 720,
+			.step_height = MB_DIM,
+		},
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_VP8_FRAME,
+		.codec_mode = HANTRO_MODE_VP8_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 1280,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 720,
+			.step_height = MB_DIM,
+		},
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_H264_SLICE,
+		.codec_mode = HANTRO_MODE_H264_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 1280,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 720,
+			.step_height = MB_DIM,
+		},
+	},
+};
+
+static int sama5d4_hw_init(struct hantro_dev *vpu)
+{
+	return 0;
+}
+
+/*
+ * Supported codec ops.
+ */
+
+static const struct hantro_codec_ops sama5d4_vdec_codec_ops[] = {
+	[HANTRO_MODE_MPEG2_DEC] = {
+		.run = hantro_g1_mpeg2_dec_run,
+		.reset = hantro_g1_reset,
+		.init = hantro_mpeg2_dec_init,
+		.exit = hantro_mpeg2_dec_exit,
+	},
+	[HANTRO_MODE_VP8_DEC] = {
+		.run = hantro_g1_vp8_dec_run,
+		.reset = hantro_g1_reset,
+		.init = hantro_vp8_dec_init,
+		.exit = hantro_vp8_dec_exit,
+	},
+	[HANTRO_MODE_H264_DEC] = {
+		.run = hantro_g1_h264_dec_run,
+		.reset = hantro_g1_reset,
+		.init = hantro_h264_dec_init,
+		.exit = hantro_h264_dec_exit,
+	},
+};
+
+static const struct hantro_irq sama5d4_irqs[] = {
+	{ "vdec", hantro_g1_irq },
+};
+
+static const char * const sama5d4_clk_names[] = { "vdec_clk" };
+
+const struct hantro_variant sama5d4_vdec_variant = {
+	.dec_fmts = sama5d4_vdec_fmts,
+	.num_dec_fmts = ARRAY_SIZE(sama5d4_vdec_fmts),
+	.postproc_fmts = sama5d4_vdec_postproc_fmts,
+	.num_postproc_fmts = ARRAY_SIZE(sama5d4_vdec_postproc_fmts),
+	.postproc_regs = &hantro_g1_postproc_regs,
+	.codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
+		 HANTRO_H264_DECODER,
+	.codec_ops = sama5d4_vdec_codec_ops,
+	.init = sama5d4_hw_init,
+	.irqs = sama5d4_irqs,
+	.num_irqs = ARRAY_SIZE(sama5d4_irqs),
+	.clk_names = sama5d4_clk_names,
+	.num_clocks = ARRAY_SIZE(sama5d4_clk_names),
+};
-- 
GitLab


From bb9212fd971035597d264fc6a7cc4df0db9b5fd0 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Thu, 1 Apr 2021 16:43:36 +0200
Subject: [PATCH 1024/3804] media: ARM: dts: sama5d4: enable Hantro G1 VDEC

Add the SAMA5D4 VDEC module which comprises Hantro G1 video decoder
core.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: devicetree@vger.kernel.org
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 arch/arm/boot/dts/sama5d4.dtsi | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
index 05c55875835d5..e47e1ca630439 100644
--- a/arch/arm/boot/dts/sama5d4.dtsi
+++ b/arch/arm/boot/dts/sama5d4.dtsi
@@ -101,6 +101,13 @@
 			ranges = <0 0x100000 0x2400>;
 		};
 
+		vdec0: vdec@300000 {
+			compatible = "microchip,sama5d4-vdec";
+			reg = <0x00300000 0x100000>;
+			interrupts = <19 IRQ_TYPE_LEVEL_HIGH 4>;
+			clocks = <&pmc PMC_TYPE_PERIPHERAL 19>;
+		};
+
 		usb0: gadget@400000 {
 			compatible = "atmel,sama5d3-udc";
 			reg = <0x00400000 0x100000
-- 
GitLab


From 401b0e5dcab62196c56aa7c2536b0cacfc506e0e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:11 +0200
Subject: [PATCH 1025/3804] media: i2c: ak7375: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ak7375.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/media/i2c/ak7375.c b/drivers/media/i2c/ak7375.c
index e1f94ee0f48f5..40b1a4aa846ca 100644
--- a/drivers/media/i2c/ak7375.c
+++ b/drivers/media/i2c/ak7375.c
@@ -87,15 +87,7 @@ static const struct v4l2_ctrl_ops ak7375_vcm_ctrl_ops = {
 
 static int ak7375_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	int ret;
-
-	ret = pm_runtime_get_sync(sd->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(sd->dev);
-		return ret;
-	}
-
-	return 0;
+	return pm_runtime_resume_and_get(sd->dev);
 }
 
 static int ak7375_close(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
-- 
GitLab


From 7917f27941c3c1289aad2be27bf329844125d13e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:11 +0200
Subject: [PATCH 1026/3804] media: i2c: dw9714: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/dw9714.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/media/i2c/dw9714.c b/drivers/media/i2c/dw9714.c
index 3f0b082f863f5..c8b4292512dca 100644
--- a/drivers/media/i2c/dw9714.c
+++ b/drivers/media/i2c/dw9714.c
@@ -85,15 +85,7 @@ static const struct v4l2_ctrl_ops dw9714_vcm_ctrl_ops = {
 
 static int dw9714_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	int rval;
-
-	rval = pm_runtime_get_sync(sd->dev);
-	if (rval < 0) {
-		pm_runtime_put_noidle(sd->dev);
-		return rval;
-	}
-
-	return 0;
+	return pm_runtime_resume_and_get(sd->dev);
 }
 
 static int dw9714_close(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
-- 
GitLab


From ed8f47b061250ddaadcfe33a54532241a718b59e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:12 +0200
Subject: [PATCH 1027/3804] media: i2c: dw9768: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/dw9768.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/media/i2c/dw9768.c b/drivers/media/i2c/dw9768.c
index 8b8cb4b077b5f..c086580efac78 100644
--- a/drivers/media/i2c/dw9768.c
+++ b/drivers/media/i2c/dw9768.c
@@ -374,15 +374,7 @@ static const struct v4l2_ctrl_ops dw9768_ctrl_ops = {
 
 static int dw9768_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	int ret;
-
-	ret = pm_runtime_get_sync(sd->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(sd->dev);
-		return ret;
-	}
-
-	return 0;
+	return pm_runtime_resume_and_get(sd->dev);
 }
 
 static int dw9768_close(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
-- 
GitLab


From d5e75e8b4a24715fddd4adf3a4c0bf90e36546c5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:12 +0200
Subject: [PATCH 1028/3804] media: i2c: dw9807-vcm: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/dw9807-vcm.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/media/i2c/dw9807-vcm.c b/drivers/media/i2c/dw9807-vcm.c
index 438a44b76da80..95e06f13bc9ed 100644
--- a/drivers/media/i2c/dw9807-vcm.c
+++ b/drivers/media/i2c/dw9807-vcm.c
@@ -130,15 +130,7 @@ static const struct v4l2_ctrl_ops dw9807_vcm_ctrl_ops = {
 
 static int dw9807_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	int rval;
-
-	rval = pm_runtime_get_sync(sd->dev);
-	if (rval < 0) {
-		pm_runtime_put_noidle(sd->dev);
-		return rval;
-	}
-
-	return 0;
+	return pm_runtime_resume_and_get(sd->dev);
 }
 
 static int dw9807_close(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
-- 
GitLab


From c36c7d56ad4426ddf6cc598a52562533709a245c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:12 +0200
Subject: [PATCH 1029/3804] media: i2c: hi556: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/hi556.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/i2c/hi556.c b/drivers/media/i2c/hi556.c
index 6f05c1138e3be..627ccfa34835d 100644
--- a/drivers/media/i2c/hi556.c
+++ b/drivers/media/i2c/hi556.c
@@ -813,9 +813,8 @@ static int hi556_set_stream(struct v4l2_subdev *sd, int enable)
 
 	mutex_lock(&hi556->mutex);
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
 		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
 			mutex_unlock(&hi556->mutex);
 			return ret;
 		}
-- 
GitLab


From 3c11dfe3be25940dba2472f4e3114be132a74ba0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:12 +0200
Subject: [PATCH 1030/3804] media: i2c: imx214: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/imx214.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/imx214.c b/drivers/media/i2c/imx214.c
index e8b281e432e88..1a770a530cf52 100644
--- a/drivers/media/i2c/imx214.c
+++ b/drivers/media/i2c/imx214.c
@@ -776,11 +776,9 @@ static int imx214_s_stream(struct v4l2_subdev *subdev, int enable)
 		return 0;
 
 	if (enable) {
-		ret = pm_runtime_get_sync(imx214->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(imx214->dev);
+		ret = pm_runtime_resume_and_get(imx214->dev);
+		if (ret < 0)
 			return ret;
-		}
 
 		ret = imx214_start_streaming(imx214);
 		if (ret < 0)
-- 
GitLab


From 30ad455912651abb79db379a2a2eaad00509ba87 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:12 +0200
Subject: [PATCH 1031/3804] media: i2c: imx219: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/imx219.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c
index 1054ffedaefd0..74a0bf9b088b2 100644
--- a/drivers/media/i2c/imx219.c
+++ b/drivers/media/i2c/imx219.c
@@ -1035,11 +1035,9 @@ static int imx219_start_streaming(struct imx219 *imx219)
 	const struct imx219_reg_list *reg_list;
 	int ret;
 
-	ret = pm_runtime_get_sync(&client->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(&client->dev);
+	ret = pm_runtime_resume_and_get(&client->dev);
+	if (ret < 0)
 		return ret;
-	}
 
 	/* Apply default values of current mode */
 	reg_list = &imx219->mode->reg_list;
-- 
GitLab


From 018ef43017113afc24d0a1842b7c7cf66a52cfc9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:13 +0200
Subject: [PATCH 1032/3804] media: i2c: imx258: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/imx258.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/imx258.c b/drivers/media/i2c/imx258.c
index a017ec4e0f504..90529424d5b66 100644
--- a/drivers/media/i2c/imx258.c
+++ b/drivers/media/i2c/imx258.c
@@ -1039,11 +1039,9 @@ static int imx258_set_stream(struct v4l2_subdev *sd, int enable)
 	}
 
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
+		if (ret < 0)
 			goto err_unlock;
-		}
 
 		/*
 		 * Apply default & customized values
-- 
GitLab


From bb94b8f3a76e76ed24e76fa58892fd8db86c13c2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:13 +0200
Subject: [PATCH 1033/3804] media: i2c: imx274: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/imx274.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/i2c/imx274.c b/drivers/media/i2c/imx274.c
index cdccaab3043aa..ee2127436f0bc 100644
--- a/drivers/media/i2c/imx274.c
+++ b/drivers/media/i2c/imx274.c
@@ -1441,9 +1441,8 @@ static int imx274_s_stream(struct v4l2_subdev *sd, int on)
 	mutex_lock(&imx274->lock);
 
 	if (on) {
-		ret = pm_runtime_get_sync(&imx274->client->dev);
+		ret = pm_runtime_resume_and_get(&imx274->client->dev);
 		if (ret < 0) {
-			pm_runtime_put_noidle(&imx274->client->dev);
 			mutex_unlock(&imx274->lock);
 			return ret;
 		}
-- 
GitLab


From 739d9c64150a73dccfa9a8b792de5179fa06e1f7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:13 +0200
Subject: [PATCH 1034/3804] media: i2c: imx290: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/imx290.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c
index 6319a42057d26..06020e648a97c 100644
--- a/drivers/media/i2c/imx290.c
+++ b/drivers/media/i2c/imx290.c
@@ -764,11 +764,9 @@ static int imx290_set_stream(struct v4l2_subdev *sd, int enable)
 	int ret = 0;
 
 	if (enable) {
-		ret = pm_runtime_get_sync(imx290->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(imx290->dev);
+		ret = pm_runtime_resume_and_get(imx290->dev);
+		if (ret < 0)
 			goto unlock_and_return;
-		}
 
 		ret = imx290_start_streaming(imx290);
 		if (ret) {
-- 
GitLab


From c0f8f1b6a10110fd70c41114214ba1b209d9a910 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:13 +0200
Subject: [PATCH 1035/3804] media: i2c: imx319: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/imx319.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/imx319.c b/drivers/media/i2c/imx319.c
index 38540323a1560..4e0a8c9d271f8 100644
--- a/drivers/media/i2c/imx319.c
+++ b/drivers/media/i2c/imx319.c
@@ -2141,11 +2141,9 @@ static int imx319_set_stream(struct v4l2_subdev *sd, int enable)
 	}
 
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
+		if (ret < 0)
 			goto err_unlock;
-		}
 
 		/*
 		 * Apply default & customized values
-- 
GitLab


From 5f070f4df4fd71230074d154a6fd99c6abe03abb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:14 +0200
Subject: [PATCH 1036/3804] media: i2c: imx355: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/imx355.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/imx355.c b/drivers/media/i2c/imx355.c
index ccedcd4c520ae..93f13a04439a8 100644
--- a/drivers/media/i2c/imx355.c
+++ b/drivers/media/i2c/imx355.c
@@ -1442,11 +1442,9 @@ static int imx355_set_stream(struct v4l2_subdev *sd, int enable)
 	}
 
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
+		if (ret < 0)
 			goto err_unlock;
-		}
 
 		/*
 		 * Apply default & customized values
-- 
GitLab


From e7c018a96355fa0d8ce2d4499d300584a92717c2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:14 +0200
Subject: [PATCH 1037/3804] media: i2c: mt9m001: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/mt9m001.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/media/i2c/mt9m001.c b/drivers/media/i2c/mt9m001.c
index 3b0ba8ed5233f..58c85a3bccf64 100644
--- a/drivers/media/i2c/mt9m001.c
+++ b/drivers/media/i2c/mt9m001.c
@@ -217,9 +217,9 @@ static int mt9m001_s_stream(struct v4l2_subdev *sd, int enable)
 		goto done;
 
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
 		if (ret < 0)
-			goto put_unlock;
+			goto unlock;
 
 		ret = mt9m001_apply_selection(sd);
 		if (ret)
@@ -247,6 +247,7 @@ done:
 
 put_unlock:
 	pm_runtime_put(&client->dev);
+unlock:
 	mutex_unlock(&mt9m001->mutex);
 
 	return ret;
@@ -834,6 +835,10 @@ static int mt9m001_remove(struct i2c_client *client)
 {
 	struct mt9m001 *mt9m001 = to_mt9m001(client);
 
+	/*
+	 * As it increments RPM usage_count even on errors, we don't need to
+	 * check the returned code here.
+	 */
 	pm_runtime_get_sync(&client->dev);
 
 	v4l2_async_unregister_subdev(&mt9m001->subdev);
-- 
GitLab


From 67d44de2f842414d28acc0512311a9dad17ee797 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:14 +0200
Subject: [PATCH 1038/3804] media: i2c: ov02a10: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov02a10.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov02a10.c b/drivers/media/i2c/ov02a10.c
index c47b1d45d8fd8..a1d7314b20a96 100644
--- a/drivers/media/i2c/ov02a10.c
+++ b/drivers/media/i2c/ov02a10.c
@@ -540,11 +540,9 @@ static int ov02a10_s_stream(struct v4l2_subdev *sd, int on)
 	}
 
 	if (on) {
-		ret = pm_runtime_get_sync(&client->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
+		if (ret < 0)
 			goto unlock_and_return;
-		}
 
 		ret = __ov02a10_start_stream(ov02a10);
 		if (ret) {
-- 
GitLab


From cc9351ff1055f932e7af637081a411ffeaf82f76 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:14 +0200
Subject: [PATCH 1039/3804] media: i2c: ov13858: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov13858.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov13858.c b/drivers/media/i2c/ov13858.c
index 4a2885ff0cbe7..9598c0b19603d 100644
--- a/drivers/media/i2c/ov13858.c
+++ b/drivers/media/i2c/ov13858.c
@@ -1472,11 +1472,9 @@ static int ov13858_set_stream(struct v4l2_subdev *sd, int enable)
 	}
 
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
+		if (ret < 0)
 			goto err_unlock;
-		}
 
 		/*
 		 * Apply default & customized values
-- 
GitLab


From c12ede18c38beb4804bfc9995be05a75e0c1a34a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:14 +0200
Subject: [PATCH 1040/3804] media: i2c: ov2659: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Acked-by: Lad Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov2659.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c
index 42f64175a6dff..a3c8eae684865 100644
--- a/drivers/media/i2c/ov2659.c
+++ b/drivers/media/i2c/ov2659.c
@@ -1186,11 +1186,9 @@ static int ov2659_s_stream(struct v4l2_subdev *sd, int on)
 		goto unlock;
 	}
 
-	ret = pm_runtime_get_sync(&client->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(&client->dev);
+	ret = pm_runtime_resume_and_get(&client->dev);
+	if (ret < 0)
 		goto unlock;
-	}
 
 	ret = ov2659_init(sd, 0);
 	if (!ret)
-- 
GitLab


From c679b2365a9e93f5def3a548bb2917f1968c59a5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:15 +0200
Subject: [PATCH 1041/3804] media: i2c: ov2685: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov2685.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov2685.c b/drivers/media/i2c/ov2685.c
index 49a2dcedb3474..2f3836dd8eed9 100644
--- a/drivers/media/i2c/ov2685.c
+++ b/drivers/media/i2c/ov2685.c
@@ -456,11 +456,10 @@ static int ov2685_s_stream(struct v4l2_subdev *sd, int on)
 		goto unlock_and_return;
 
 	if (on) {
-		ret = pm_runtime_get_sync(&ov2685->client->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
+		ret = pm_runtime_resume_and_get(&ov2685->client->dev);
+		if (ret < 0)
 			goto unlock_and_return;
-		}
+
 		ret = __v4l2_ctrl_handler_setup(&ov2685->ctrl_handler);
 		if (ret) {
 			pm_runtime_put(&client->dev);
-- 
GitLab


From b9be93aa55b4ee1821b2974995640fa769689d94 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:15 +0200
Subject: [PATCH 1042/3804] media: i2c: ov2740: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov2740.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov2740.c b/drivers/media/i2c/ov2740.c
index 0f3f17f3c426f..54779f720f9de 100644
--- a/drivers/media/i2c/ov2740.c
+++ b/drivers/media/i2c/ov2740.c
@@ -751,9 +751,8 @@ static int ov2740_set_stream(struct v4l2_subdev *sd, int enable)
 
 	mutex_lock(&ov2740->mutex);
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
 		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
 			mutex_unlock(&ov2740->mutex);
 			return ret;
 		}
@@ -1049,9 +1048,8 @@ static int ov2740_nvmem_read(void *priv, unsigned int off, void *val,
 		goto exit;
 	}
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(dev);
 		goto exit;
 	}
 
-- 
GitLab


From 5187df40bf3d910d883cfff815812540de951999 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:15 +0200
Subject: [PATCH 1043/3804] media: i2c: ov5647: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Acked-by: Jacopo Mondi <jacopo@jmondi.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov5647.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov5647.c b/drivers/media/i2c/ov5647.c
index 1cefa15729ce3..38faa74755e3c 100644
--- a/drivers/media/i2c/ov5647.c
+++ b/drivers/media/i2c/ov5647.c
@@ -882,20 +882,20 @@ static int ov5647_s_stream(struct v4l2_subdev *sd, int enable)
 	}
 
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
 		if (ret < 0)
 			goto error_unlock;
 
 		ret = ov5647_stream_on(sd);
 		if (ret < 0) {
 			dev_err(&client->dev, "stream start failed: %d\n", ret);
-			goto error_unlock;
+			goto error_pm;
 		}
 	} else {
 		ret = ov5647_stream_off(sd);
 		if (ret < 0) {
 			dev_err(&client->dev, "stream stop failed: %d\n", ret);
-			goto error_unlock;
+			goto error_pm;
 		}
 		pm_runtime_put(&client->dev);
 	}
@@ -905,8 +905,9 @@ static int ov5647_s_stream(struct v4l2_subdev *sd, int enable)
 
 	return 0;
 
-error_unlock:
+error_pm:
 	pm_runtime_put(&client->dev);
+error_unlock:
 	mutex_unlock(&sensor->lock);
 
 	return ret;
-- 
GitLab


From 6b19d297008627ba4296448e760624578b199542 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:15 +0200
Subject: [PATCH 1044/3804] media: i2c: ov5648: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov5648.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov5648.c b/drivers/media/i2c/ov5648.c
index 3ecb4a3e87736..07e64ff0be3ff 100644
--- a/drivers/media/i2c/ov5648.c
+++ b/drivers/media/i2c/ov5648.c
@@ -2132,11 +2132,9 @@ static int ov5648_s_stream(struct v4l2_subdev *subdev, int enable)
 	int ret;
 
 	if (enable) {
-		ret = pm_runtime_get_sync(sensor->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(sensor->dev);
+		ret = pm_runtime_resume_and_get(sensor->dev);
+		if (ret < 0)
 			return ret;
-		}
 	}
 
 	mutex_lock(&sensor->mutex);
-- 
GitLab


From f151c230dced061eebeeafae6d6c5958d4a46689 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:15 +0200
Subject: [PATCH 1045/3804] media: i2c: ov5670: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov5670.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov5670.c b/drivers/media/i2c/ov5670.c
index dee7df8dd1006..182f271f118f2 100644
--- a/drivers/media/i2c/ov5670.c
+++ b/drivers/media/i2c/ov5670.c
@@ -2347,11 +2347,9 @@ static int ov5670_set_stream(struct v4l2_subdev *sd, int enable)
 		goto unlock_and_return;
 
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
+		if (ret < 0)
 			goto unlock_and_return;
-		}
 
 		ret = ov5670_start_streaming(ov5670);
 		if (ret)
-- 
GitLab


From f236bb2490b60eea8e2db0251fc9062181762fe5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:16 +0200
Subject: [PATCH 1046/3804] media: i2c: ov5675: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov5675.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/i2c/ov5675.c b/drivers/media/i2c/ov5675.c
index dea32859459a7..e7e297a239609 100644
--- a/drivers/media/i2c/ov5675.c
+++ b/drivers/media/i2c/ov5675.c
@@ -863,9 +863,8 @@ static int ov5675_set_stream(struct v4l2_subdev *sd, int enable)
 
 	mutex_lock(&ov5675->mutex);
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
 		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
 			mutex_unlock(&ov5675->mutex);
 			return ret;
 		}
-- 
GitLab


From 1541ac5ffd4c228242ec4f0af3a73e7bd652ab3f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:16 +0200
Subject: [PATCH 1047/3804] media: i2c: ov5695: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov5695.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov5695.c b/drivers/media/i2c/ov5695.c
index 09bee57a241df..469d941813c6e 100644
--- a/drivers/media/i2c/ov5695.c
+++ b/drivers/media/i2c/ov5695.c
@@ -946,11 +946,9 @@ static int ov5695_s_stream(struct v4l2_subdev *sd, int on)
 		goto unlock_and_return;
 
 	if (on) {
-		ret = pm_runtime_get_sync(&client->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
+		if (ret < 0)
 			goto unlock_and_return;
-		}
 
 		ret = __ov5695_start_stream(ov5695);
 		if (ret) {
-- 
GitLab


From 45bbff806cab3bb5b5026386b896d1c6f027556c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:16 +0200
Subject: [PATCH 1048/3804] media: i2c: ov7740: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov7740.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov7740.c b/drivers/media/i2c/ov7740.c
index 47a9003d29d6f..e0ff6506a5430 100644
--- a/drivers/media/i2c/ov7740.c
+++ b/drivers/media/i2c/ov7740.c
@@ -624,11 +624,9 @@ static int ov7740_set_stream(struct v4l2_subdev *sd, int enable)
 	}
 
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
+		if (ret < 0)
 			goto err_unlock;
-		}
 
 		ret = ov7740_start_streaming(ov7740);
 		if (ret)
-- 
GitLab


From 529e78dea6a05750a9ff7bb44bb360546d4a322d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:16 +0200
Subject: [PATCH 1049/3804] media: i2c: ov8856: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov8856.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/i2c/ov8856.c b/drivers/media/i2c/ov8856.c
index e3af3ea277afe..2875f8e4ddcba 100644
--- a/drivers/media/i2c/ov8856.c
+++ b/drivers/media/i2c/ov8856.c
@@ -1340,9 +1340,8 @@ static int ov8856_set_stream(struct v4l2_subdev *sd, int enable)
 
 	mutex_lock(&ov8856->mutex);
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
 		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
 			mutex_unlock(&ov8856->mutex);
 			return ret;
 		}
-- 
GitLab


From 586ee057ba6b014520e3dac232ac25da987f5ac0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:16 +0200
Subject: [PATCH 1050/3804] media: i2c: ov8865: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov8865.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/ov8865.c b/drivers/media/i2c/ov8865.c
index 9ecf180635ee2..3bf6ee4898a91 100644
--- a/drivers/media/i2c/ov8865.c
+++ b/drivers/media/i2c/ov8865.c
@@ -2497,11 +2497,9 @@ static int ov8865_s_stream(struct v4l2_subdev *subdev, int enable)
 	int ret;
 
 	if (enable) {
-		ret = pm_runtime_get_sync(sensor->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(sensor->dev);
+		ret = pm_runtime_resume_and_get(sensor->dev);
+		if (ret < 0)
 			return ret;
-		}
 	}
 
 	mutex_lock(&sensor->mutex);
-- 
GitLab


From 279a085db8c8bcf7d83b06cfe36cf4b80f4d0566 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:16 +0200
Subject: [PATCH 1051/3804] media: i2c: ov9734: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov9734.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/i2c/ov9734.c b/drivers/media/i2c/ov9734.c
index b7309a551cae0..ba156683c5338 100644
--- a/drivers/media/i2c/ov9734.c
+++ b/drivers/media/i2c/ov9734.c
@@ -644,9 +644,8 @@ static int ov9734_set_stream(struct v4l2_subdev *sd, int enable)
 	}
 
 	if (enable) {
-		ret = pm_runtime_get_sync(&client->dev);
+		ret = pm_runtime_resume_and_get(&client->dev);
 		if (ret < 0) {
-			pm_runtime_put_noidle(&client->dev);
 			mutex_unlock(&ov9734->mutex);
 			return ret;
 		}
-- 
GitLab


From 81f2fe55d12b3ad26c8203c51967ae531db7ac40 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:17 +0200
Subject: [PATCH 1052/3804] media: i2c: tvp5150: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/tvp5150.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c
index e26e3f5440549..374a9da75e4db 100644
--- a/drivers/media/i2c/tvp5150.c
+++ b/drivers/media/i2c/tvp5150.c
@@ -1448,11 +1448,9 @@ static int tvp5150_s_stream(struct v4l2_subdev *sd, int enable)
 	       TVP5150_MISC_CTL_CLOCK_OE;
 
 	if (enable) {
-		ret = pm_runtime_get_sync(sd->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(sd->dev);
+		ret = pm_runtime_resume_and_get(sd->dev);
+		if (ret < 0)
 			return ret;
-		}
 
 		tvp5150_enable(sd);
 
@@ -1675,15 +1673,7 @@ err:
 
 static int tvp5150_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	int ret;
-
-	ret = pm_runtime_get_sync(sd->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(sd->dev);
-		return ret;
-	}
-
-	return 0;
+	return pm_runtime_resume_and_get(sd->dev);
 }
 
 static int tvp5150_close(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
-- 
GitLab


From a959a7bf3b939494adca95b1c53e98b74fd90702 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:17 +0200
Subject: [PATCH 1053/3804] media: i2c: video-i2c: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/video-i2c.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/media/i2c/video-i2c.c b/drivers/media/i2c/video-i2c.c
index 0465832a40908..de12f38f347ce 100644
--- a/drivers/media/i2c/video-i2c.c
+++ b/drivers/media/i2c/video-i2c.c
@@ -286,11 +286,9 @@ static int amg88xx_read(struct device *dev, enum hwmon_sensor_types type,
 	__le16 buf;
 	int tmp;
 
-	tmp = pm_runtime_get_sync(regmap_get_device(data->regmap));
-	if (tmp < 0) {
-		pm_runtime_put_noidle(regmap_get_device(data->regmap));
+	tmp = pm_runtime_resume_and_get(regmap_get_device(data->regmap));
+	if (tmp < 0)
 		return tmp;
-	}
 
 	tmp = regmap_bulk_read(data->regmap, AMG88XX_REG_TTHL, &buf, 2);
 	pm_runtime_mark_last_busy(regmap_get_device(data->regmap));
@@ -512,11 +510,9 @@ static int start_streaming(struct vb2_queue *vq, unsigned int count)
 	if (data->kthread_vid_cap)
 		return 0;
 
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(dev);
+	ret = pm_runtime_resume_and_get(dev);
+	if (ret < 0)
 		goto error_del_list;
-	}
 
 	ret = data->chip->setup(data);
 	if (ret)
-- 
GitLab


From 2450f59d24166c8af60aa075e9dec4466f905c1f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:11 +0200
Subject: [PATCH 1054/3804] media: i2c: ccs-core: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ccs/ccs-core.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c
index 4a848ac2d2cd2..a349189a38dbb 100644
--- a/drivers/media/i2c/ccs/ccs-core.c
+++ b/drivers/media/i2c/ccs/ccs-core.c
@@ -3101,12 +3101,9 @@ static int __maybe_unused ccs_suspend(struct device *dev)
 	bool streaming = sensor->streaming;
 	int rval;
 
-	rval = pm_runtime_get_sync(dev);
-	if (rval < 0) {
-		pm_runtime_put_noidle(dev);
-
+	rval = pm_runtime_resume_and_get(dev);
+	if (rval < 0)
 		return rval;
-	}
 
 	if (sensor->streaming)
 		ccs_stop_streaming(sensor);
-- 
GitLab


From 75ecb9c83cf9d29eaa05bbdd0f84356ac22ed972 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:22 +0200
Subject: [PATCH 1055/3804] staging: media: imx7-mipi-csis: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 025fdc488bd66..1dc680d94a46a 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -695,11 +695,10 @@ static int mipi_csis_s_stream(struct v4l2_subdev *mipi_sd, int enable)
 
 		mipi_csis_clear_counters(state);
 
-		ret = pm_runtime_get_sync(&state->pdev->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(&state->pdev->dev);
+		ret = pm_runtime_resume_and_get(&state->pdev->dev);
+		if (ret < 0)
 			return ret;
-		}
+
 		ret = v4l2_subdev_call(state->src_sd, core, s_power, 1);
 		if (ret < 0 && ret != -ENOIOCTLCMD)
 			goto done;
-- 
GitLab


From deb9119f807abd01d4d78a2dc567b10e2501ec79 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:21 +0200
Subject: [PATCH 1056/3804] staging: media: atomisp: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Besides that, the de-init order in case of css error was wrong.
This change should also fix that.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/pci/atomisp_fops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/atomisp/pci/atomisp_fops.c b/drivers/staging/media/atomisp/pci/atomisp_fops.c
index f1e6b25978534..26d05474a035c 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_fops.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_fops.c
@@ -837,7 +837,7 @@ dev_init:
 	}
 
 	/* runtime power management, turn on ISP */
-	ret = pm_runtime_get_sync(vdev->v4l2_dev->dev);
+	ret = pm_runtime_resume_and_get(vdev->v4l2_dev->dev);
 	if (ret < 0) {
 		dev_err(isp->dev, "Failed to power on device\n");
 		goto error;
@@ -881,9 +881,9 @@ done:
 
 css_error:
 	atomisp_css_uninit(isp);
+	pm_runtime_put(vdev->v4l2_dev->dev);
 error:
 	hmm_pool_unregister(HMM_POOL_TYPE_DYNAMIC);
-	pm_runtime_put(vdev->v4l2_dev->dev);
 	rt_mutex_unlock(&isp->mutex);
 	return ret;
 }
-- 
GitLab


From 7af42f3136d8029f84be743a06c96ba024eabb40 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:22 +0200
Subject: [PATCH 1057/3804] staging: media: ipu3: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/ipu3/ipu3.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/media/ipu3/ipu3.c b/drivers/staging/media/ipu3/ipu3.c
index ee1bba6bdcacd..8e1e9e46e6045 100644
--- a/drivers/staging/media/ipu3/ipu3.c
+++ b/drivers/staging/media/ipu3/ipu3.c
@@ -392,10 +392,9 @@ int imgu_s_stream(struct imgu_device *imgu, int enable)
 	}
 
 	/* Set Power */
-	r = pm_runtime_get_sync(dev);
+	r = pm_runtime_resume_and_get(dev);
 	if (r < 0) {
 		dev_err(dev, "failed to set imgu power\n");
-		pm_runtime_put(dev);
 		return r;
 	}
 
-- 
GitLab


From e21e1e94ce10e7ce09f98184526a237125179155 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:22 +0200
Subject: [PATCH 1058/3804] staging: media: cedrus_video: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/sunxi/cedrus/cedrus_video.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
index b62eb8e840573..9ddd789d0b1f2 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
@@ -490,11 +490,9 @@ static int cedrus_start_streaming(struct vb2_queue *vq, unsigned int count)
 	}
 
 	if (V4L2_TYPE_IS_OUTPUT(vq->type)) {
-		ret = pm_runtime_get_sync(dev->dev);
-		if (ret < 0) {
-			pm_runtime_put_noidle(dev->dev);
+		ret = pm_runtime_resume_and_get(dev->dev);
+		if (ret < 0)
 			goto err_cleanup;
-		}
 
 		if (dev->dec_ops[ctx->current_codec]->start) {
 			ret = dev->dec_ops[ctx->current_codec]->start(ctx);
-- 
GitLab


From dc8276b789174071ca3fbfe887a95718b3f9b888 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:22 +0200
Subject: [PATCH 1059/3804] staging: media: tegra-vde: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/tegra-vde/vde.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/media/tegra-vde/vde.c b/drivers/staging/media/tegra-vde/vde.c
index 28845b5bafaf1..e025b69776f25 100644
--- a/drivers/staging/media/tegra-vde/vde.c
+++ b/drivers/staging/media/tegra-vde/vde.c
@@ -775,9 +775,9 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde *vde,
 	if (ret)
 		goto release_dpb_frames;
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
-		goto put_runtime_pm;
+		goto unlock;
 
 	/*
 	 * We rely on the VDE registers reset value, otherwise VDE
@@ -843,6 +843,8 @@ static int tegra_vde_ioctl_decode_h264(struct tegra_vde *vde,
 put_runtime_pm:
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
+
+unlock:
 	mutex_unlock(&vde->lock);
 
 release_dpb_frames:
@@ -1069,11 +1071,19 @@ static int tegra_vde_probe(struct platform_device *pdev)
 	 * power-cycle it in order to put hardware into a predictable lower
 	 * power state.
 	 */
-	pm_runtime_get_sync(dev);
+	if (pm_runtime_resume_and_get(dev) < 0)
+		goto err_pm_runtime;
+
 	pm_runtime_put(dev);
 
 	return 0;
 
+err_pm_runtime:
+	misc_deregister(&vde->miscdev);
+
+	pm_runtime_dont_use_autosuspend(dev);
+	pm_runtime_disable(dev);
+
 err_deinit_iommu:
 	tegra_vde_iommu_deinit(vde);
 
@@ -1089,7 +1099,12 @@ static int tegra_vde_remove(struct platform_device *pdev)
 	struct tegra_vde *vde = platform_get_drvdata(pdev);
 	struct device *dev = &pdev->dev;
 
+	/*
+	 * As it increments RPM usage_count even on errors, we don't need to
+	 * check the returned code here.
+	 */
 	pm_runtime_get_sync(dev);
+
 	pm_runtime_dont_use_autosuspend(dev);
 	pm_runtime_disable(dev);
 
-- 
GitLab


From c09ffca53fcd186e140c82ea73e51cb4dd362053 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:23 +0200
Subject: [PATCH 1060/3804] staging: media: tegra-video: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/tegra-video/csi.c | 3 +--
 drivers/staging/media/tegra-video/vi.c  | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c
index 033a6935c26df..e938bf4c48b63 100644
--- a/drivers/staging/media/tegra-video/csi.c
+++ b/drivers/staging/media/tegra-video/csi.c
@@ -298,10 +298,9 @@ static int tegra_csi_enable_stream(struct v4l2_subdev *subdev)
 	struct tegra_csi *csi = csi_chan->csi;
 	int ret, err;
 
-	ret = pm_runtime_get_sync(csi->dev);
+	ret = pm_runtime_resume_and_get(csi->dev);
 	if (ret < 0) {
 		dev_err(csi->dev, "failed to get runtime PM: %d\n", ret);
-		pm_runtime_put_noidle(csi->dev);
 		return ret;
 	}
 
diff --git a/drivers/staging/media/tegra-video/vi.c b/drivers/staging/media/tegra-video/vi.c
index b712063a7c5d2..b76e9110e7064 100644
--- a/drivers/staging/media/tegra-video/vi.c
+++ b/drivers/staging/media/tegra-video/vi.c
@@ -297,10 +297,9 @@ static int tegra_channel_start_streaming(struct vb2_queue *vq, u32 count)
 	struct tegra_vi_channel *chan = vb2_get_drv_priv(vq);
 	int ret;
 
-	ret = pm_runtime_get_sync(chan->vi->dev);
+	ret = pm_runtime_resume_and_get(chan->vi->dev);
 	if (ret < 0) {
 		dev_err(chan->vi->dev, "failed to get runtime PM: %d\n", ret);
-		pm_runtime_put_noidle(chan->vi->dev);
 		return ret;
 	}
 
-- 
GitLab


From 0314339a0a49f4a128b61e5e1a0af1df6e64a186 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:20 +0200
Subject: [PATCH 1061/3804] media: rockchip/rga: use
 pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rockchip/rga/rga-buf.c | 3 +--
 drivers/media/platform/rockchip/rga/rga.c     | 4 +++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/rockchip/rga/rga-buf.c b/drivers/media/platform/rockchip/rga/rga-buf.c
index bf9a75b75083b..81508ed5abf34 100644
--- a/drivers/media/platform/rockchip/rga/rga-buf.c
+++ b/drivers/media/platform/rockchip/rga/rga-buf.c
@@ -79,9 +79,8 @@ static int rga_buf_start_streaming(struct vb2_queue *q, unsigned int count)
 	struct rockchip_rga *rga = ctx->rga;
 	int ret;
 
-	ret = pm_runtime_get_sync(rga->dev);
+	ret = pm_runtime_resume_and_get(rga->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(rga->dev);
 		rga_buf_return_buffers(q, VB2_BUF_STATE_QUEUED);
 		return ret;
 	}
diff --git a/drivers/media/platform/rockchip/rga/rga.c b/drivers/media/platform/rockchip/rga/rga.c
index 9d122429706e9..bf3fd71ec3aff 100644
--- a/drivers/media/platform/rockchip/rga/rga.c
+++ b/drivers/media/platform/rockchip/rga/rga.c
@@ -866,7 +866,9 @@ static int rga_probe(struct platform_device *pdev)
 		goto unreg_video_dev;
 	}
 
-	pm_runtime_get_sync(rga->dev);
+	ret = pm_runtime_resume_and_get(rga->dev);
+	if (ret < 0)
+		goto unreg_video_dev;
 
 	rga->version.major = (rga_read(rga, RGA_VERSION_INFO) >> 24) & 0xFF;
 	rga->version.minor = (rga_read(rga, RGA_VERSION_INFO) >> 20) & 0x0F;
-- 
GitLab


From bc700a13cd3ffe8bb6ef7274ede74a19639fd6dd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 15:35:51 +0200
Subject: [PATCH 1062/3804] media: sti/hva: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

While the hva driver does it right, there are lots of errors
on other drivers due to its misusage. So, let's change
this driver to also use pm_runtime_resume_and_get(), as we're
doing similar changes all over the media subsystem.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sti/hva/hva-hw.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/media/platform/sti/hva/hva-hw.c b/drivers/media/platform/sti/hva/hva-hw.c
index f59811e27f51f..77b8bfa5e0c5e 100644
--- a/drivers/media/platform/sti/hva/hva-hw.c
+++ b/drivers/media/platform/sti/hva/hva-hw.c
@@ -270,9 +270,8 @@ static unsigned long int hva_hw_get_ip_version(struct hva_dev *hva)
 	struct device *dev = hva_to_dev(hva);
 	unsigned long int version;
 
-	if (pm_runtime_get_sync(dev) < 0) {
+	if (pm_runtime_resume_and_get(dev) < 0) {
 		dev_err(dev, "%s     failed to get pm_runtime\n", HVA_PREFIX);
-		pm_runtime_put_noidle(dev);
 		mutex_unlock(&hva->protect_mutex);
 		return -EFAULT;
 	}
@@ -386,10 +385,10 @@ int hva_hw_probe(struct platform_device *pdev, struct hva_dev *hva)
 	pm_runtime_set_suspended(dev);
 	pm_runtime_enable(dev);
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0) {
 		dev_err(dev, "%s     failed to set PM\n", HVA_PREFIX);
-		goto err_pm;
+		goto err_clk;
 	}
 
 	/* check IP hardware version */
@@ -462,6 +461,7 @@ int hva_hw_execute_task(struct hva_ctx *ctx, enum hva_hw_cmd_type cmd,
 	u8 client_id = ctx->id;
 	int ret;
 	u32 reg = 0;
+	bool got_pm = false;
 
 	mutex_lock(&hva->protect_mutex);
 
@@ -469,12 +469,13 @@ int hva_hw_execute_task(struct hva_ctx *ctx, enum hva_hw_cmd_type cmd,
 	enable_irq(hva->irq_its);
 	enable_irq(hva->irq_err);
 
-	if (pm_runtime_get_sync(dev) < 0) {
+	if (pm_runtime_resume_and_get(dev) < 0) {
 		dev_err(dev, "%s     failed to get pm_runtime\n", ctx->name);
 		ctx->sys_errors++;
 		ret = -EFAULT;
 		goto out;
 	}
+	got_pm = true;
 
 	reg = readl_relaxed(hva->regs + HVA_HIF_REG_CLK_GATING);
 	switch (cmd) {
@@ -537,7 +538,8 @@ out:
 		dev_dbg(dev, "%s     unknown command 0x%x\n", ctx->name, cmd);
 	}
 
-	pm_runtime_put_autosuspend(dev);
+	if (got_pm)
+		pm_runtime_put_autosuspend(dev);
 	mutex_unlock(&hva->protect_mutex);
 
 	return ret;
@@ -553,9 +555,8 @@ void hva_hw_dump_regs(struct hva_dev *hva, struct seq_file *s)
 
 	mutex_lock(&hva->protect_mutex);
 
-	if (pm_runtime_get_sync(dev) < 0) {
+	if (pm_runtime_resume_and_get(dev) < 0) {
 		seq_puts(s, "Cannot wake up IP\n");
-		pm_runtime_put_noidle(dev);
 		mutex_unlock(&hva->protect_mutex);
 		return;
 	}
-- 
GitLab


From 0ccb25b8279cb6f212434322126b4c512f0b306c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:17 +0200
Subject: [PATCH 1063/3804] media: ipu3: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/intel/ipu3/ipu3-cio2-main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
index fecef85bd62eb..ca8040d1a725b 100644
--- a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
+++ b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
@@ -975,10 +975,9 @@ static int cio2_vb2_start_streaming(struct vb2_queue *vq, unsigned int count)
 	cio2->cur_queue = q;
 	atomic_set(&q->frame_sequence, 0);
 
-	r = pm_runtime_get_sync(&cio2->pci_dev->dev);
+	r = pm_runtime_resume_and_get(&cio2->pci_dev->dev);
 	if (r < 0) {
 		dev_info(&cio2->pci_dev->dev, "failed to set power %d\n", r);
-		pm_runtime_put_noidle(&cio2->pci_dev->dev);
 		return r;
 	}
 
-- 
GitLab


From 7295e537bb2b16b500ff55cd6209b3cefd323948 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:17 +0200
Subject: [PATCH 1064/3804] media: coda: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

While here, as noted by Phillip, the labels at the coda_open()
function are currently named after what operation failed,
instead of what they do in response. So, change the name of
the error label that it is called when clk_enable fails,
in order to be consistent.

Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/coda/coda-common.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c
index bd666c858fa10..2017de85713eb 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -2660,7 +2660,7 @@ static int coda_open(struct file *file)
 	ctx->use_vdoa = false;
 
 	/* Power up and upload firmware if necessary */
-	ret = pm_runtime_get_sync(dev->dev);
+	ret = pm_runtime_resume_and_get(dev->dev);
 	if (ret < 0) {
 		v4l2_err(&dev->v4l2_dev, "failed to power up: %d\n", ret);
 		goto err_pm_get;
@@ -2668,7 +2668,7 @@ static int coda_open(struct file *file)
 
 	ret = clk_prepare_enable(dev->clk_per);
 	if (ret)
-		goto err_pm_get;
+		goto err_clk_enable;
 
 	ret = clk_prepare_enable(dev->clk_ahb);
 	if (ret)
@@ -2707,8 +2707,9 @@ err_ctx_init:
 	clk_disable_unprepare(dev->clk_ahb);
 err_clk_ahb:
 	clk_disable_unprepare(dev->clk_per);
-err_pm_get:
+err_clk_enable:
 	pm_runtime_put_sync(dev->dev);
+err_pm_get:
 	v4l2_fh_del(&ctx->fh);
 	v4l2_fh_exit(&ctx->fh);
 err_coda_name_init:
-- 
GitLab


From 97df01fae800ff765dbb247ee30c7a438b2ae07b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:18 +0200
Subject: [PATCH 1065/3804] media: mtk-jpeg: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
index 88a23bce569d9..a89c7b206eefd 100644
--- a/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mtk-jpeg/mtk_jpeg_core.c
@@ -920,7 +920,7 @@ static void mtk_jpeg_enc_device_run(void *priv)
 	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
 	dst_buf = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
 
-	ret = pm_runtime_get_sync(jpeg->dev);
+	ret = pm_runtime_resume_and_get(jpeg->dev);
 	if (ret < 0)
 		goto enc_end;
 
@@ -973,7 +973,7 @@ static void mtk_jpeg_dec_device_run(void *priv)
 		return;
 	}
 
-	ret = pm_runtime_get_sync(jpeg->dev);
+	ret = pm_runtime_resume_and_get(jpeg->dev);
 	if (ret < 0)
 		goto dec_end;
 
-- 
GitLab


From 09dfb36ce250e76322ac8940c10d75e53a9e48d4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:18 +0200
Subject: [PATCH 1066/3804] media: camss: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Robert Foss <robert.foss@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/camss/camss-csid.c   | 6 ++----
 drivers/media/platform/qcom/camss/camss-csiphy.c | 6 ++----
 drivers/media/platform/qcom/camss/camss-ispif.c  | 6 ++----
 drivers/media/platform/qcom/camss/camss-vfe.c    | 5 +++--
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/media/platform/qcom/camss/camss-csid.c b/drivers/media/platform/qcom/camss/camss-csid.c
index 0e6b76e839834..7e2490ca1ad16 100644
--- a/drivers/media/platform/qcom/camss/camss-csid.c
+++ b/drivers/media/platform/qcom/camss/camss-csid.c
@@ -156,11 +156,9 @@ static int csid_set_power(struct v4l2_subdev *sd, int on)
 	int ret;
 
 	if (on) {
-		ret = pm_runtime_get_sync(dev);
-		if (ret < 0) {
-			pm_runtime_put_sync(dev);
+		ret = pm_runtime_resume_and_get(dev);
+		if (ret < 0)
 			return ret;
-		}
 
 		ret = regulator_enable(csid->vdda);
 		if (ret < 0) {
diff --git a/drivers/media/platform/qcom/camss/camss-csiphy.c b/drivers/media/platform/qcom/camss/camss-csiphy.c
index 1996541278a2a..b623e007aec67 100644
--- a/drivers/media/platform/qcom/camss/camss-csiphy.c
+++ b/drivers/media/platform/qcom/camss/camss-csiphy.c
@@ -197,11 +197,9 @@ static int csiphy_set_power(struct v4l2_subdev *sd, int on)
 	if (on) {
 		int ret;
 
-		ret = pm_runtime_get_sync(dev);
-		if (ret < 0) {
-			pm_runtime_put_sync(dev);
+		ret = pm_runtime_resume_and_get(dev);
+		if (ret < 0)
 			return ret;
-		}
 
 		ret = csiphy_set_clock_rates(csiphy);
 		if (ret < 0) {
diff --git a/drivers/media/platform/qcom/camss/camss-ispif.c b/drivers/media/platform/qcom/camss/camss-ispif.c
index d7942f723fdc3..1b716182d35c0 100644
--- a/drivers/media/platform/qcom/camss/camss-ispif.c
+++ b/drivers/media/platform/qcom/camss/camss-ispif.c
@@ -372,11 +372,9 @@ static int ispif_set_power(struct v4l2_subdev *sd, int on)
 			goto exit;
 		}
 
-		ret = pm_runtime_get_sync(dev);
-		if (ret < 0) {
-			pm_runtime_put_sync(dev);
+		ret = pm_runtime_resume_and_get(dev);
+		if (ret < 0)
 			goto exit;
-		}
 
 		ret = camss_enable_clocks(ispif->nclocks, ispif->clock, dev);
 		if (ret < 0) {
diff --git a/drivers/media/platform/qcom/camss/camss-vfe.c b/drivers/media/platform/qcom/camss/camss-vfe.c
index 1584ee77ad008..27ab20c5b57eb 100644
--- a/drivers/media/platform/qcom/camss/camss-vfe.c
+++ b/drivers/media/platform/qcom/camss/camss-vfe.c
@@ -584,9 +584,9 @@ static int vfe_get(struct vfe_device *vfe)
 		if (ret < 0)
 			goto error_pm_domain;
 
-		ret = pm_runtime_get_sync(vfe->camss->dev);
+		ret = pm_runtime_resume_and_get(vfe->camss->dev);
 		if (ret < 0)
-			goto error_pm_runtime_get;
+			goto error_domain_off;
 
 		ret = vfe_set_clock_rates(vfe);
 		if (ret < 0)
@@ -620,6 +620,7 @@ error_reset:
 
 error_pm_runtime_get:
 	pm_runtime_put_sync(vfe->camss->dev);
+error_domain_off:
 	vfe->ops->pm_domain_off(vfe);
 
 error_pm_domain:
-- 
GitLab


From f6bf35ee3e4d9353c117bcc5cc4616d1ce2d977f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 5 May 2021 09:45:19 +0200
Subject: [PATCH 1067/3804] media: venus: core: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/pm_helpers.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c
index c7e1ebec47eea..d0fddf5e9a69f 100644
--- a/drivers/media/platform/qcom/venus/pm_helpers.c
+++ b/drivers/media/platform/qcom/venus/pm_helpers.c
@@ -990,9 +990,8 @@ static int core_power_v4(struct venus_core *core, int on)
 
 	if (on == POWER_ON) {
 		if (pmctrl) {
-			ret = pm_runtime_get_sync(pmctrl);
+			ret = pm_runtime_resume_and_get(pmctrl);
 			if (ret < 0) {
-				pm_runtime_put_noidle(pmctrl);
 				return ret;
 			}
 		}
-- 
GitLab


From 1938ab0d271ffb6f0c9efa2873a53642167fe57c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:19 +0200
Subject: [PATCH 1068/3804] media: venus: vdec: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

As a bonus, there's no need to check if ret == 1, as
pm_runtime_resume_and_get() always return 0 on success.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/vdec.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c
index ddb7cd39424e4..198e47eb63f41 100644
--- a/drivers/media/platform/qcom/venus/vdec.c
+++ b/drivers/media/platform/qcom/venus/vdec.c
@@ -568,10 +568,10 @@ static int vdec_pm_get(struct venus_inst *inst)
 	int ret;
 
 	mutex_lock(&core->pm_lock);
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	mutex_unlock(&core->pm_lock);
 
-	return ret < 0 ? ret : 0;
+	return ret;
 }
 
 static int vdec_pm_put(struct venus_inst *inst, bool autosuspend)
@@ -601,7 +601,7 @@ static int vdec_pm_get_put(struct venus_inst *inst)
 	mutex_lock(&core->pm_lock);
 
 	if (pm_runtime_suspended(dev)) {
-		ret = pm_runtime_get_sync(dev);
+		ret = pm_runtime_resume_and_get(dev);
 		if (ret < 0)
 			goto error;
 
-- 
GitLab


From 85368a213e2dc0f4956e9d23daa39c4a339861e1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:19 +0200
Subject: [PATCH 1069/3804] media: venus: venc: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/venc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c
index 4a7291f934b6b..8dd49d4f124cb 100644
--- a/drivers/media/platform/qcom/venus/venc.c
+++ b/drivers/media/platform/qcom/venus/venc.c
@@ -1205,9 +1205,9 @@ static int venc_open(struct file *file)
 
 	venus_helper_init_instance(inst);
 
-	ret = pm_runtime_get_sync(core->dev_enc);
+	ret = pm_runtime_resume_and_get(core->dev_enc);
 	if (ret < 0)
-		goto err_put_sync;
+		goto err_free;
 
 	ret = venc_ctrl_init(inst);
 	if (ret)
@@ -1252,6 +1252,7 @@ err_ctrl_deinit:
 	venc_ctrl_deinit(inst);
 err_put_sync:
 	pm_runtime_put_sync(core->dev_enc);
+err_free:
 	kfree(inst);
 	return ret;
 }
-- 
GitLab


From 09fe880ed7a160ebbffb84a0a9096a075e314d2f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 13 May 2021 01:29:24 +0200
Subject: [PATCH 1070/3804] MAINTAINERS: Add myself as context tracking
 maintainer

I've been missing a lot of patches touching context tracking for which
I wasn't Cc'ed these last months. The code looks like a simple single
file but has a lot of subtle tentacles.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210512232924.150322-11-frederic@kernel.org
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..bda71decb316e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4601,6 +4601,12 @@ S:	Supported
 F:	drivers/video/console/
 F:	include/linux/console*
 
+CONTEXT TRACKING
+M:	Frederic Weisbecker <frederic@kernel.org>
+S:	Maintained
+F:	kernel/context_tracking.c
+F:	include/linux/context_tracking*
+
 CONTROL GROUP (CGROUP)
 M:	Tejun Heo <tj@kernel.org>
 M:	Zefan Li <lizefan.x@bytedance.com>
-- 
GitLab


From 570a752b7a9bd03b50ad6420cd7f10092cc11bd3 Mon Sep 17 00:00:00 2001
From: Yejune Deng <yejune.deng@gmail.com>
Date: Mon, 10 May 2021 16:10:24 +0100
Subject: [PATCH 1071/3804] lib/smp_processor_id: Use is_percpu_thread()
 instead of nr_cpus_allowed

is_percpu_thread() more elegantly handles SMP vs UP, and further checks the
presence of PF_NO_SETAFFINITY. This lets us catch cases where
check_preemption_disabled() can race with a concurrent sched_setaffinity().

Signed-off-by: Yejune Deng <yejune.deng@gmail.com>
[Amended changelog]
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210510151024.2448573-3-valentin.schneider@arm.com
---
 lib/smp_processor_id.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 1c1dbd3003257..046ac6297c781 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -19,11 +19,7 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
 	if (irqs_disabled())
 		goto out;
 
-	/*
-	 * Kernel threads bound to a single CPU can safely use
-	 * smp_processor_id():
-	 */
-	if (current->nr_cpus_allowed == 1)
+	if (is_percpu_thread())
 		goto out;
 
 #ifdef CONFIG_SMP
-- 
GitLab


From 0c18f2ecfcc274a4bcc1d122f79ebd4001c3b445 Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@arm.com>
Date: Mon, 10 May 2021 15:50:31 +0100
Subject: [PATCH 1072/3804] sched/uclamp: Fix wrong implementation of
 cpu.uclamp.min

cpu.uclamp.min is a protection as described in cgroup-v2 Resource
Distribution Model

	Documentation/admin-guide/cgroup-v2.rst

which means we try our best to preserve the minimum performance point of
tasks in this group. See full description of cpu.uclamp.min in the
cgroup-v2.rst.

But the current implementation makes it a limit, which is not what was
intended.

For example:

	tg->cpu.uclamp.min = 20%

	p0->uclamp[UCLAMP_MIN] = 0
	p1->uclamp[UCLAMP_MIN] = 50%

	Previous Behavior (limit):

		p0->effective_uclamp = 0
		p1->effective_uclamp = 20%

	New Behavior (Protection):

		p0->effective_uclamp = 20%
		p1->effective_uclamp = 50%

Which is inline with how protections should work.

With this change the cgroup and per-task behaviors are the same, as
expected.

Additionally, we remove the confusing relationship between cgroup and
!user_defined flag.

We don't want for example RT tasks that are boosted by default to max to
change their boost value when they attach to a cgroup. If a cgroup wants
to limit the max performance point of tasks attached to it, then
cpu.uclamp.max must be set accordingly.

Or if they want to set different boost value based on cgroup, then
sysctl_sched_util_clamp_min_rt_default must be used to NOT boost to max
and set the right cpu.uclamp.min for each group to let the RT tasks
obtain the desired boost value when attached to that group.

As it stands the dependency on !user_defined flag adds an extra layer of
complexity that is not required now cpu.uclamp.min behaves properly as
a protection.

The propagation model of effective cpu.uclamp.min in child cgroups as
implemented by cpu_util_update_eff() is still correct. The parent
protection sets an upper limit of what the child cgroups will
effectively get.

Fixes: 3eac870a3247 (sched/uclamp: Use TG's clamps to restrict TASK's clamps)
Signed-off-by: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210510145032.1934078-2-qais.yousef@arm.com
---
 kernel/sched/core.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6a5124c4d54f2..f97eb7323f368 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1405,7 +1405,6 @@ uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
 {
 	struct uclamp_se uc_req = p->uclamp_req[clamp_id];
 #ifdef CONFIG_UCLAMP_TASK_GROUP
-	struct uclamp_se uc_max;
 
 	/*
 	 * Tasks in autogroups or root task group will be
@@ -1416,9 +1415,23 @@ uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
 	if (task_group(p) == &root_task_group)
 		return uc_req;
 
-	uc_max = task_group(p)->uclamp[clamp_id];
-	if (uc_req.value > uc_max.value || !uc_req.user_defined)
-		return uc_max;
+	switch (clamp_id) {
+	case UCLAMP_MIN: {
+		struct uclamp_se uc_min = task_group(p)->uclamp[clamp_id];
+		if (uc_req.value < uc_min.value)
+			return uc_min;
+		break;
+	}
+	case UCLAMP_MAX: {
+		struct uclamp_se uc_max = task_group(p)->uclamp[clamp_id];
+		if (uc_req.value > uc_max.value)
+			return uc_max;
+		break;
+	}
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
 #endif
 
 	return uc_req;
-- 
GitLab


From 93b73858701fd01de26a4a874eb95f9b7156fd4b Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@arm.com>
Date: Mon, 10 May 2021 15:50:32 +0100
Subject: [PATCH 1073/3804] sched/uclamp: Fix locking around
 cpu_util_update_eff()

cpu_cgroup_css_online() calls cpu_util_update_eff() without holding the
uclamp_mutex or rcu_read_lock() like other call sites, which is
a mistake.

The uclamp_mutex is required to protect against concurrent reads and
writes that could update the cgroup hierarchy.

The rcu_read_lock() is required to traverse the cgroup data structures
in cpu_util_update_eff().

Surround the caller with the required locks and add some asserts to
better document the dependency in cpu_util_update_eff().

Fixes: 7226017ad37a ("sched/uclamp: Fix a bug in propagating uclamp value in new cgroups")
Reported-by: Quentin Perret <qperret@google.com>
Signed-off-by: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210510145032.1934078-3-qais.yousef@arm.com
---
 kernel/sched/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f97eb7323f368..3ec420c8a0381 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9507,7 +9507,11 @@ static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
 
 #ifdef CONFIG_UCLAMP_TASK_GROUP
 	/* Propagate the effective uclamp value for the new group */
+	mutex_lock(&uclamp_mutex);
+	rcu_read_lock();
 	cpu_util_update_eff(css);
+	rcu_read_unlock();
+	mutex_unlock(&uclamp_mutex);
 #endif
 
 	return 0;
@@ -9597,6 +9601,9 @@ static void cpu_util_update_eff(struct cgroup_subsys_state *css)
 	enum uclamp_id clamp_id;
 	unsigned int clamps;
 
+	lockdep_assert_held(&uclamp_mutex);
+	SCHED_WARN_ON(!rcu_read_lock_held());
+
 	css_for_each_descendant_pre(css, top_css) {
 		uc_parent = css_tg(css)->parent
 			? css_tg(css)->parent->uclamp : NULL;
-- 
GitLab


From 4d80d6ca5d77fde9880da8466e5b64f250e5bf82 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 18 May 2021 11:17:26 +0200
Subject: [PATCH 1074/3804] genirq: Export affinity setter for modules

Perf modules abuse irq_set_affinity_hint() to set the affinity of system
PMU interrupts just because irq_set_affinity() was not exported.

The fact that irq_set_affinity_hint() actually sets the affinity is a
non-documented side effect and the name is clearly saying it's a hint.

To clean this up, export the real affinity setter.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518093117.968251441@linutronix.de
---
 include/linux/interrupt.h | 35 ++---------------------------------
 kernel/irq/manage.c       | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 4777850a6dc7c..35a374241515b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -319,39 +319,8 @@ struct irq_affinity_desc {
 
 extern cpumask_var_t irq_default_affinity;
 
-/* Internal implementation. Use the helpers below */
-extern int __irq_set_affinity(unsigned int irq, const struct cpumask *cpumask,
-			      bool force);
-
-/**
- * irq_set_affinity - Set the irq affinity of a given irq
- * @irq:	Interrupt to set affinity
- * @cpumask:	cpumask
- *
- * Fails if cpumask does not contain an online CPU
- */
-static inline int
-irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
-{
-	return __irq_set_affinity(irq, cpumask, false);
-}
-
-/**
- * irq_force_affinity - Force the irq affinity of a given irq
- * @irq:	Interrupt to set affinity
- * @cpumask:	cpumask
- *
- * Same as irq_set_affinity, but without checking the mask against
- * online cpus.
- *
- * Solely for low level cpu hotplug code, where we need to make per
- * cpu interrupts affine before the cpu becomes online.
- */
-static inline int
-irq_force_affinity(unsigned int irq, const struct cpumask *cpumask)
-{
-	return __irq_set_affinity(irq, cpumask, true);
-}
+extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
+extern int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask);
 
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 4c14356543d93..a847dd2044c85 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -441,7 +441,8 @@ out_unlock:
 	return ret;
 }
 
-int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force)
+static int __irq_set_affinity(unsigned int irq, const struct cpumask *mask,
+			      bool force)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
@@ -456,6 +457,36 @@ int __irq_set_affinity(unsigned int irq, const struct cpumask *mask, bool force)
 	return ret;
 }
 
+/**
+ * irq_set_affinity - Set the irq affinity of a given irq
+ * @irq:	Interrupt to set affinity
+ * @cpumask:	cpumask
+ *
+ * Fails if cpumask does not contain an online CPU
+ */
+int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+{
+	return __irq_set_affinity(irq, cpumask, false);
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity);
+
+/**
+ * irq_force_affinity - Force the irq affinity of a given irq
+ * @irq:	Interrupt to set affinity
+ * @cpumask:	cpumask
+ *
+ * Same as irq_set_affinity, but without checking the mask against
+ * online cpus.
+ *
+ * Solely for low level cpu hotplug code, where we need to make per
+ * cpu interrupts affine before the cpu becomes online.
+ */
+int irq_force_affinity(unsigned int irq, const struct cpumask *cpumask)
+{
+	return __irq_set_affinity(irq, cpumask, true);
+}
+EXPORT_SYMBOL_GPL(irq_force_affinity);
+
 int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
 {
 	unsigned long flags;
-- 
GitLab


From 1699949d3314e5d1956fb082e4cd4798bf6149fc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 19 May 2021 15:37:09 +0900
Subject: [PATCH 1075/3804] sched: Fix a stale comment in pick_next_task()

fair_sched_class->next no longer exists since commit:

  a87e749e8fa1 ("sched: Remove struct sched_class::next field").

Now the sched_class order is specified by the linker script.

Rewrite the comment in a more generic way.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210519063709.323162-1-masahiroy@kernel.org
---
 kernel/sched/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3ec420c8a0381..3d2527239c3ed 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5318,7 +5318,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 		if (unlikely(p == RETRY_TASK))
 			goto restart;
 
-		/* Assumes fair_sched_class->next == idle_sched_class */
+		/* Assume the next prioritized class is idle_sched_class */
 		if (!p) {
 			put_prev_task(rq, prev);
 			p = pick_next_task_idle(rq);
-- 
GitLab


From 0024430e920f2900654ad83cd081cf52e02a3ef5 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 18 May 2021 12:01:06 -0700
Subject: [PATCH 1076/3804] x86/build: Fix location of '-plugin-opt=' flags

Commit b33fff07e3e3 ("x86, build: allow LTO to be selected") added a
couple of '-plugin-opt=' flags to KBUILD_LDFLAGS because the code model
and stack alignment are not stored in LLVM bitcode.

However, these flags were added to KBUILD_LDFLAGS prior to the
emulation flag assignment, which uses ':=', so they were overwritten
and never added to $(LD) invocations.

The absence of these flags caused misalignment issues in the
AMDGPU driver when compiling with CONFIG_LTO_CLANG, resulting in
general protection faults.

Shuffle the assignment below the initial one so that the flags are
properly passed along and all of the linker flags stay together.

At the same time, avoid any future issues with clobbering flags by
changing the emulation flag assignment to '+=' since KBUILD_LDFLAGS is
already defined with ':=' in the main Makefile before being exported for
modification here as a result of commit:

  ce99d0bf312d ("kbuild: clear LDFLAGS in the top Makefile")

Fixes: b33fff07e3e3 ("x86, build: allow LTO to be selected")
Reported-by: Anthony Ruhier <aruhier@mailbox.org>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Anthony Ruhier <aruhier@mailbox.org>
Cc: stable@vger.kernel.org
Link: https://github.com/ClangBuiltLinux/linux/issues/1374
Link: https://lore.kernel.org/r/20210518190106.60935-1-nathan@kernel.org
---
 arch/x86/Makefile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c77c5d8a7b3eb..307529417021a 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -178,11 +178,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
 	KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
 endif
 
-ifdef CONFIG_LTO_CLANG
-KBUILD_LDFLAGS	+= -plugin-opt=-code-model=kernel \
-		   -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
-endif
-
 # Workaround for a gcc prelease that unfortunately was shipped in a suse release
 KBUILD_CFLAGS += -Wno-sign-compare
 #
@@ -202,7 +197,12 @@ ifdef CONFIG_RETPOLINE
   endif
 endif
 
-KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE)
+KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
+
+ifdef CONFIG_LTO_CLANG
+KBUILD_LDFLAGS	+= -plugin-opt=-code-model=kernel \
+		   -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
 
 ifdef CONFIG_X86_NEED_RELOCS
 LDFLAGS_vmlinux := --emit-relocs --discard-none
-- 
GitLab


From 11b3dda5e8b6cde957a6410233f30d6c48582998 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 14 May 2021 17:12:03 +0100
Subject: [PATCH 1077/3804] lib: vsprintf: scanf: Negative number must have
 field width > 1

If a signed number field starts with a '-' the field width must be > 1,
or unlimited, to allow at least one digit after the '-'.

This patch adds a check for this. If a signed field starts with '-'
and field_width == 1 the scanf will quit.

It is ok for a signed number field to have a field width of 1 if it
starts with a digit. In that case the single digit can be converted.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210514161206.30821-1-rf@opensource.cirrus.com
---
 lib/vsprintf.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6c56c62fd9a50..af307588ad8b3 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -3526,8 +3526,12 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 		str = skip_spaces(str);
 
 		digit = *str;
-		if (is_sign && digit == '-')
+		if (is_sign && digit == '-') {
+			if (field_width == 1)
+				break;
+
 			digit = *(str + 1);
+		}
 
 		if (!digit
 		    || (base == 16 && !isxdigit(digit))
-- 
GitLab


From 900fdc4573766dd43b847b4f54bd4a1ee2bc7360 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 14 May 2021 17:12:04 +0100
Subject: [PATCH 1078/3804] lib: vsprintf: Fix handling of number field widths
 in vsscanf

The existing code attempted to handle numbers by doing a strto[u]l(),
ignoring the field width, and then repeatedly dividing to extract the
field out of the full converted value. If the string contains a run of
valid digits longer than will fit in a long or long long, this would
overflow and no amount of dividing can recover the correct value.

This patch fixes vsscanf() to obey number field widths when parsing
the number.

A new _parse_integer_limit() is added that takes a limit for the number
of characters to parse. The number field conversion in vsscanf is changed
to use this new function.

If a number starts with a radix prefix, the field width  must be long
enough for at last one digit after the prefix. If not, it will be handled
like this:

 sscanf("0x4", "%1i", &i): i=0, scanning continues with the 'x'
 sscanf("0x4", "%2i", &i): i=0, scanning continues with the '4'

This is consistent with the observed behaviour of userland sscanf.

Note that this patch does NOT fix the problem of a single field value
overflowing the target type. So for example:

  sscanf("123456789abcdef", "%x", &i);

Will not produce the correct result because the value obviously overflows
INT_MAX. But sscanf will report a successful conversion.

Note that where a very large number is used to mean "unlimited", the value
INT_MAX is used for consistency with the behaviour of vsnprintf().

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210514161206.30821-2-rf@opensource.cirrus.com
---
 lib/kstrtox.c  | 13 ++++++--
 lib/kstrtox.h  |  2 ++
 lib/vsprintf.c | 82 +++++++++++++++++++++++++++++---------------------
 3 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/lib/kstrtox.c b/lib/kstrtox.c
index a118b0b1e9b2c..0b5fe8b411732 100644
--- a/lib/kstrtox.c
+++ b/lib/kstrtox.c
@@ -39,20 +39,22 @@ const char *_parse_integer_fixup_radix(const char *s, unsigned int *base)
 
 /*
  * Convert non-negative integer string representation in explicitly given radix
- * to an integer.
+ * to an integer. A maximum of max_chars characters will be converted.
+ *
  * Return number of characters consumed maybe or-ed with overflow bit.
  * If overflow occurs, result integer (incorrect) is still returned.
  *
  * Don't you dare use this function.
  */
-unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *p,
+				  size_t max_chars)
 {
 	unsigned long long res;
 	unsigned int rv;
 
 	res = 0;
 	rv = 0;
-	while (1) {
+	while (max_chars--) {
 		unsigned int c = *s;
 		unsigned int lc = c | 0x20; /* don't tolower() this line */
 		unsigned int val;
@@ -82,6 +84,11 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long
 	return rv;
 }
 
+unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *p)
+{
+	return _parse_integer_limit(s, base, p, INT_MAX);
+}
+
 static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res)
 {
 	unsigned long long _res;
diff --git a/lib/kstrtox.h b/lib/kstrtox.h
index 3b4637bcd2540..158c400ca8658 100644
--- a/lib/kstrtox.h
+++ b/lib/kstrtox.h
@@ -4,6 +4,8 @@
 
 #define KSTRTOX_OVERFLOW	(1U << 31)
 const char *_parse_integer_fixup_radix(const char *s, unsigned int *base);
+unsigned int _parse_integer_limit(const char *s, unsigned int base, unsigned long long *res,
+				  size_t max_chars);
 unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long *res);
 
 #endif
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index af307588ad8b3..3290eca46e479 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -53,6 +53,31 @@
 #include <linux/string_helpers.h>
 #include "kstrtox.h"
 
+static unsigned long long simple_strntoull(const char *startp, size_t max_chars,
+					   char **endp, unsigned int base)
+{
+	const char *cp;
+	unsigned long long result = 0ULL;
+	size_t prefix_chars;
+	unsigned int rv;
+
+	cp = _parse_integer_fixup_radix(startp, &base);
+	prefix_chars = cp - startp;
+	if (prefix_chars < max_chars) {
+		rv = _parse_integer_limit(cp, base, &result, max_chars - prefix_chars);
+		/* FIXME */
+		cp += (rv & ~KSTRTOX_OVERFLOW);
+	} else {
+		/* Field too short for prefix + digit, skip over without converting */
+		cp = startp + max_chars;
+	}
+
+	if (endp)
+		*endp = (char *)cp;
+
+	return result;
+}
+
 /**
  * simple_strtoull - convert a string to an unsigned long long
  * @cp: The start of the string
@@ -63,18 +88,7 @@
  */
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
 {
-	unsigned long long result;
-	unsigned int rv;
-
-	cp = _parse_integer_fixup_radix(cp, &base);
-	rv = _parse_integer(cp, base, &result);
-	/* FIXME */
-	cp += (rv & ~KSTRTOX_OVERFLOW);
-
-	if (endp)
-		*endp = (char *)cp;
-
-	return result;
+	return simple_strntoull(cp, INT_MAX, endp, base);
 }
 EXPORT_SYMBOL(simple_strtoull);
 
@@ -109,6 +123,21 @@ long simple_strtol(const char *cp, char **endp, unsigned int base)
 }
 EXPORT_SYMBOL(simple_strtol);
 
+static long long simple_strntoll(const char *cp, size_t max_chars, char **endp,
+				 unsigned int base)
+{
+	/*
+	 * simple_strntoull() safely handles receiving max_chars==0 in the
+	 * case cp[0] == '-' && max_chars == 1.
+	 * If max_chars == 0 we can drop through and pass it to simple_strntoull()
+	 * and the content of *cp is irrelevant.
+	 */
+	if (*cp == '-' && max_chars > 0)
+		return -simple_strntoull(cp + 1, max_chars - 1, endp, base);
+
+	return simple_strntoull(cp, max_chars, endp, base);
+}
+
 /**
  * simple_strtoll - convert a string to a signed long long
  * @cp: The start of the string
@@ -119,10 +148,7 @@ EXPORT_SYMBOL(simple_strtol);
  */
 long long simple_strtoll(const char *cp, char **endp, unsigned int base)
 {
-	if (*cp == '-')
-		return -simple_strtoull(cp + 1, endp, base);
-
-	return simple_strtoull(cp, endp, base);
+	return simple_strntoll(cp, INT_MAX, endp, base);
 }
 EXPORT_SYMBOL(simple_strtoll);
 
@@ -3541,25 +3567,13 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 			break;
 
 		if (is_sign)
-			val.s = qualifier != 'L' ?
-				simple_strtol(str, &next, base) :
-				simple_strtoll(str, &next, base);
+			val.s = simple_strntoll(str,
+						field_width >= 0 ? field_width : INT_MAX,
+						&next, base);
 		else
-			val.u = qualifier != 'L' ?
-				simple_strtoul(str, &next, base) :
-				simple_strtoull(str, &next, base);
-
-		if (field_width > 0 && next - str > field_width) {
-			if (base == 0)
-				_parse_integer_fixup_radix(str, &base);
-			while (next - str > field_width) {
-				if (is_sign)
-					val.s = div_s64(val.s, base);
-				else
-					val.u = div_u64(val.u, base);
-				--next;
-			}
-		}
+			val.u = simple_strntoull(str,
+						 field_width >= 0 ? field_width : INT_MAX,
+						 &next, base);
 
 		switch (qualifier) {
 		case 'H':	/* that's 'hh' in format */
-- 
GitLab


From 50f530e176eac808e64416732e54c0686ce2c39b Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 14 May 2021 17:12:05 +0100
Subject: [PATCH 1079/3804] lib: test_scanf: Add tests for sscanf number
 conversion

Adds test_sscanf to test various number conversion cases, as
number conversion was previously broken.

This also tests the simple_strtoxxx() functions exported from
vsprintf.c.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210514161206.30821-3-rf@opensource.cirrus.com
---
 MAINTAINERS       |   1 +
 lib/Kconfig.debug |   3 +
 lib/Makefile      |   1 +
 lib/test_scanf.c  | 751 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 756 insertions(+)
 create mode 100644 lib/test_scanf.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 04e7de8c95be7..eb05132f8ff1f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19395,6 +19395,7 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pmladek/printk.git
 F:	Documentation/core-api/printk-formats.rst
 F:	lib/test_printf.c
+F:	lib/test_scanf.c
 F:	lib/vsprintf.c
 
 VT1211 HARDWARE MONITOR DRIVER
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2c7f46b366f16..dc437a171c0fc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2163,6 +2163,9 @@ config TEST_KSTRTOX
 config TEST_PRINTF
 	tristate "Test printf() family of functions at runtime"
 
+config TEST_SCANF
+	tristate "Test scanf() family of functions at runtime"
+
 config TEST_BITMAP
 	tristate "Test bitmap_*() family of functions at runtime"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index e11cfc18b6c08..5f7f246241b1c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o
 obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
+obj-$(CONFIG_TEST_SCANF) += test_scanf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
 obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
diff --git a/lib/test_scanf.c b/lib/test_scanf.c
new file mode 100644
index 0000000000000..8d577aec6c285
--- /dev/null
+++ b/lib/test_scanf.c
@@ -0,0 +1,751 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test cases for sscanf facility.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitops.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/overflow.h>
+#include <linux/printk.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "../tools/testing/selftests/kselftest_module.h"
+
+#define BUF_SIZE 1024
+
+KSTM_MODULE_GLOBALS();
+static char *test_buffer __initdata;
+static char *fmt_buffer __initdata;
+static struct rnd_state rnd_state __initdata;
+
+typedef int (*check_fn)(const void *check_data, const char *string,
+			const char *fmt, int n_args, va_list ap);
+
+static void __scanf(4, 6) __init
+_test(check_fn fn, const void *check_data, const char *string, const char *fmt,
+	int n_args, ...)
+{
+	va_list ap, ap_copy;
+	int ret;
+
+	total_tests++;
+
+	va_start(ap, n_args);
+	va_copy(ap_copy, ap);
+	ret = vsscanf(string, fmt, ap_copy);
+	va_end(ap_copy);
+
+	if (ret != n_args) {
+		pr_warn("vsscanf(\"%s\", \"%s\", ...) returned %d expected %d\n",
+			string, fmt, ret, n_args);
+		goto fail;
+	}
+
+	ret = (*fn)(check_data, string, fmt, n_args, ap);
+	if (ret)
+		goto fail;
+
+	va_end(ap);
+
+	return;
+
+fail:
+	failed_tests++;
+	va_end(ap);
+}
+
+#define _check_numbers_template(arg_fmt, expect, str, fmt, n_args, ap)		\
+do {										\
+	pr_debug("\"%s\", \"%s\" ->\n", str, fmt);				\
+	for (; n_args > 0; n_args--, expect++) {				\
+		typeof(*expect) got = *va_arg(ap, typeof(expect));		\
+		pr_debug("\t" arg_fmt "\n", got);				\
+		if (got != *expect) {						\
+			pr_warn("vsscanf(\"%s\", \"%s\", ...) expected " arg_fmt " got " arg_fmt "\n", \
+				str, fmt, *expect, got);			\
+			return 1;						\
+		}								\
+	}									\
+	return 0;								\
+} while (0)
+
+static int __init check_ull(const void *check_data, const char *string,
+			    const char *fmt, int n_args, va_list ap)
+{
+	const unsigned long long *pval = check_data;
+
+	_check_numbers_template("%llu", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_ll(const void *check_data, const char *string,
+			   const char *fmt, int n_args, va_list ap)
+{
+	const long long *pval = check_data;
+
+	_check_numbers_template("%lld", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_ulong(const void *check_data, const char *string,
+			   const char *fmt, int n_args, va_list ap)
+{
+	const unsigned long *pval = check_data;
+
+	_check_numbers_template("%lu", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_long(const void *check_data, const char *string,
+			  const char *fmt, int n_args, va_list ap)
+{
+	const long *pval = check_data;
+
+	_check_numbers_template("%ld", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_uint(const void *check_data, const char *string,
+			     const char *fmt, int n_args, va_list ap)
+{
+	const unsigned int *pval = check_data;
+
+	_check_numbers_template("%u", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_int(const void *check_data, const char *string,
+			    const char *fmt, int n_args, va_list ap)
+{
+	const int *pval = check_data;
+
+	_check_numbers_template("%d", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_ushort(const void *check_data, const char *string,
+			       const char *fmt, int n_args, va_list ap)
+{
+	const unsigned short *pval = check_data;
+
+	_check_numbers_template("%hu", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_short(const void *check_data, const char *string,
+			       const char *fmt, int n_args, va_list ap)
+{
+	const short *pval = check_data;
+
+	_check_numbers_template("%hd", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_uchar(const void *check_data, const char *string,
+			       const char *fmt, int n_args, va_list ap)
+{
+	const unsigned char *pval = check_data;
+
+	_check_numbers_template("%hhu", pval, string, fmt, n_args, ap);
+}
+
+static int __init check_char(const void *check_data, const char *string,
+			       const char *fmt, int n_args, va_list ap)
+{
+	const signed char *pval = check_data;
+
+	_check_numbers_template("%hhd", pval, string, fmt, n_args, ap);
+}
+
+/* Selection of interesting numbers to test, copied from test-kstrtox.c */
+static const unsigned long long numbers[] __initconst = {
+	0x0ULL,
+	0x1ULL,
+	0x7fULL,
+	0x80ULL,
+	0x81ULL,
+	0xffULL,
+	0x100ULL,
+	0x101ULL,
+	0x7fffULL,
+	0x8000ULL,
+	0x8001ULL,
+	0xffffULL,
+	0x10000ULL,
+	0x10001ULL,
+	0x7fffffffULL,
+	0x80000000ULL,
+	0x80000001ULL,
+	0xffffffffULL,
+	0x100000000ULL,
+	0x100000001ULL,
+	0x7fffffffffffffffULL,
+	0x8000000000000000ULL,
+	0x8000000000000001ULL,
+	0xfffffffffffffffeULL,
+	0xffffffffffffffffULL,
+};
+
+#define value_representable_in_type(T, val)					 \
+(is_signed_type(T)								 \
+	? ((long long)(val) >= type_min(T)) && ((long long)(val) <= type_max(T)) \
+	: ((unsigned long long)(val) >= type_min(T)) &&				 \
+	  ((unsigned long long)(val) <= type_max(T)))
+
+#define test_one_number(T, gen_fmt, scan_fmt, val, fn)			\
+do {									\
+	const T expect_val = (T)(val);					\
+	T result = ~expect_val; /* should be overwritten */		\
+									\
+	snprintf(test_buffer, BUF_SIZE, gen_fmt, expect_val);		\
+	_test(fn, &expect_val, test_buffer, "%" scan_fmt, 1, &result);	\
+} while (0)
+
+#define simple_numbers_loop(T, gen_fmt, scan_fmt, fn)			\
+do {									\
+	int i;								\
+									\
+	for (i = 0; i < ARRAY_SIZE(numbers); i++) {			\
+		if (!value_representable_in_type(T, numbers[i]))	\
+			continue;					\
+									\
+		test_one_number(T, gen_fmt, scan_fmt, numbers[i], fn);	\
+									\
+		if (is_signed_type(T))					\
+			test_one_number(T, gen_fmt, scan_fmt,		\
+					-numbers[i], fn);		\
+	}								\
+} while (0)
+
+static void __init numbers_simple(void)
+{
+	simple_numbers_loop(unsigned long long,	"%llu",	  "llu", check_ull);
+	simple_numbers_loop(long long,		"%lld",	  "lld", check_ll);
+	simple_numbers_loop(long long,		"%lld",	  "lli", check_ll);
+	simple_numbers_loop(unsigned long long,	"%llx",	  "llx", check_ull);
+	simple_numbers_loop(long long,		"%llx",	  "llx", check_ll);
+	simple_numbers_loop(long long,		"0x%llx", "lli", check_ll);
+	simple_numbers_loop(unsigned long long, "0x%llx", "llx", check_ull);
+	simple_numbers_loop(long long,		"0x%llx", "llx", check_ll);
+
+	simple_numbers_loop(unsigned long,	"%lu",	  "lu", check_ulong);
+	simple_numbers_loop(long,		"%ld",	  "ld", check_long);
+	simple_numbers_loop(long,		"%ld",	  "li", check_long);
+	simple_numbers_loop(unsigned long,	"%lx",	  "lx", check_ulong);
+	simple_numbers_loop(long,		"%lx",	  "lx", check_long);
+	simple_numbers_loop(long,		"0x%lx",  "li", check_long);
+	simple_numbers_loop(unsigned long,	"0x%lx",  "lx", check_ulong);
+	simple_numbers_loop(long,		"0x%lx",  "lx", check_long);
+
+	simple_numbers_loop(unsigned int,	"%u",	  "u", check_uint);
+	simple_numbers_loop(int,		"%d",	  "d", check_int);
+	simple_numbers_loop(int,		"%d",	  "i", check_int);
+	simple_numbers_loop(unsigned int,	"%x",	  "x", check_uint);
+	simple_numbers_loop(int,		"%x",	  "x", check_int);
+	simple_numbers_loop(int,		"0x%x",   "i", check_int);
+	simple_numbers_loop(unsigned int,	"0x%x",   "x", check_uint);
+	simple_numbers_loop(int,		"0x%x",   "x", check_int);
+
+	simple_numbers_loop(unsigned short,	"%hu",	  "hu", check_ushort);
+	simple_numbers_loop(short,		"%hd",	  "hd", check_short);
+	simple_numbers_loop(short,		"%hd",	  "hi", check_short);
+	simple_numbers_loop(unsigned short,	"%hx",	  "hx", check_ushort);
+	simple_numbers_loop(short,		"%hx",	  "hx", check_short);
+	simple_numbers_loop(short,		"0x%hx",  "hi", check_short);
+	simple_numbers_loop(unsigned short,	"0x%hx",  "hx", check_ushort);
+	simple_numbers_loop(short,		"0x%hx",  "hx", check_short);
+
+	simple_numbers_loop(unsigned char,	"%hhu",	  "hhu", check_uchar);
+	simple_numbers_loop(signed char,	"%hhd",	  "hhd", check_char);
+	simple_numbers_loop(signed char,	"%hhd",	  "hhi", check_char);
+	simple_numbers_loop(unsigned char,	"%hhx",	  "hhx", check_uchar);
+	simple_numbers_loop(signed char,	"%hhx",	  "hhx", check_char);
+	simple_numbers_loop(signed char,	"0x%hhx", "hhi", check_char);
+	simple_numbers_loop(unsigned char,	"0x%hhx", "hhx", check_uchar);
+	simple_numbers_loop(signed char,	"0x%hhx", "hhx", check_char);
+}
+
+/*
+ * This gives a better variety of number "lengths" in a small sample than
+ * the raw prandom*() functions (Not mathematically rigorous!!).
+ * Variabilty of length and value is more important than perfect randomness.
+ */
+static u32 __init next_test_random(u32 max_bits)
+{
+	u32 n_bits = hweight32(prandom_u32_state(&rnd_state)) % (max_bits + 1);
+
+	return prandom_u32_state(&rnd_state) & (UINT_MAX >> (32 - n_bits));
+}
+
+static unsigned long long __init next_test_random_ull(void)
+{
+	u32 rand1 = prandom_u32_state(&rnd_state);
+	u32 n_bits = (hweight32(rand1) * 3) % 64;
+	u64 val = (u64)prandom_u32_state(&rnd_state) * rand1;
+
+	return val & (ULLONG_MAX >> (64 - n_bits));
+}
+
+#define random_for_type(T)				\
+	((T)(sizeof(T) <= sizeof(u32)			\
+		? next_test_random(BITS_PER_TYPE(T))	\
+		: next_test_random_ull()))
+
+/*
+ * Define a pattern of negative and positive numbers to ensure we get
+ * some of both within the small number of samples in a test string.
+ */
+#define NEGATIVES_PATTERN 0x3246	/* 00110010 01000110 */
+
+#define fill_random_array(arr)							\
+do {										\
+	unsigned int neg_pattern = NEGATIVES_PATTERN;				\
+	int i;									\
+										\
+	for (i = 0; i < ARRAY_SIZE(arr); i++, neg_pattern >>= 1) {		\
+		(arr)[i] = random_for_type(typeof((arr)[0]));			\
+		if (is_signed_type(typeof((arr)[0])) && (neg_pattern & 1))	\
+			(arr)[i] = -(arr)[i];					\
+	}									\
+} while (0)
+
+/*
+ * Convenience wrapper around snprintf() to append at buf_pos in buf,
+ * updating buf_pos and returning the number of characters appended.
+ * On error buf_pos is not changed and return value is 0.
+ */
+static int __init __printf(4, 5)
+append_fmt(char *buf, int *buf_pos, int buf_len, const char *val_fmt, ...)
+{
+	va_list ap;
+	int field_len;
+
+	va_start(ap, val_fmt);
+	field_len = vsnprintf(buf + *buf_pos, buf_len - *buf_pos, val_fmt, ap);
+	va_end(ap);
+
+	if (field_len < 0)
+		field_len = 0;
+
+	*buf_pos += field_len;
+
+	return field_len;
+}
+
+/*
+ * Convenience function to append the field delimiter string
+ * to both the value string and format string buffers.
+ */
+static void __init append_delim(char *str_buf, int *str_buf_pos, int str_buf_len,
+				char *fmt_buf, int *fmt_buf_pos, int fmt_buf_len,
+				const char *delim_str)
+{
+	append_fmt(str_buf, str_buf_pos, str_buf_len, delim_str);
+	append_fmt(fmt_buf, fmt_buf_pos, fmt_buf_len, delim_str);
+}
+
+#define test_array_8(fn, check_data, string, fmt, arr)				\
+do {										\
+	BUILD_BUG_ON(ARRAY_SIZE(arr) != 8);					\
+	_test(fn, check_data, string, fmt, 8,					\
+		&(arr)[0], &(arr)[1], &(arr)[2], &(arr)[3],			\
+		&(arr)[4], &(arr)[5], &(arr)[6], &(arr)[7]);			\
+} while (0)
+
+#define numbers_list_8(T, gen_fmt, field_sep, scan_fmt, fn)			\
+do {										\
+	int i, pos = 0, fmt_pos = 0;						\
+	T expect[8], result[8];							\
+										\
+	fill_random_array(expect);						\
+										\
+	for (i = 0; i < ARRAY_SIZE(expect); i++) {				\
+		if (i != 0)							\
+			append_delim(test_buffer, &pos, BUF_SIZE,		\
+				     fmt_buffer, &fmt_pos, BUF_SIZE,		\
+				     field_sep);				\
+										\
+		append_fmt(test_buffer, &pos, BUF_SIZE, gen_fmt, expect[i]);	\
+		append_fmt(fmt_buffer, &fmt_pos, BUF_SIZE, "%%%s", scan_fmt);	\
+	}									\
+										\
+	test_array_8(fn, expect, test_buffer, fmt_buffer, result);		\
+} while (0)
+
+#define numbers_list_fix_width(T, gen_fmt, field_sep, width, scan_fmt, fn)	\
+do {										\
+	char full_fmt[16];							\
+										\
+	snprintf(full_fmt, sizeof(full_fmt), "%u%s", width, scan_fmt);		\
+	numbers_list_8(T, gen_fmt, field_sep, full_fmt, fn);			\
+} while (0)
+
+#define numbers_list_val_width(T, gen_fmt, field_sep, scan_fmt, fn)		\
+do {										\
+	int i, val_len, pos = 0, fmt_pos = 0;					\
+	T expect[8], result[8];							\
+										\
+	fill_random_array(expect);						\
+										\
+	for (i = 0; i < ARRAY_SIZE(expect); i++) {				\
+		if (i != 0)							\
+			append_delim(test_buffer, &pos, BUF_SIZE,		\
+				     fmt_buffer, &fmt_pos, BUF_SIZE, field_sep);\
+										\
+		val_len = append_fmt(test_buffer, &pos, BUF_SIZE, gen_fmt,	\
+				     expect[i]);				\
+		append_fmt(fmt_buffer, &fmt_pos, BUF_SIZE,			\
+			   "%%%u%s", val_len, scan_fmt);			\
+	}									\
+										\
+	test_array_8(fn, expect, test_buffer, fmt_buffer, result);		\
+} while (0)
+
+static void __init numbers_list(const char *delim)
+{
+	numbers_list_8(unsigned long long, "%llu",   delim, "llu", check_ull);
+	numbers_list_8(long long,	   "%lld",   delim, "lld", check_ll);
+	numbers_list_8(long long,	   "%lld",   delim, "lli", check_ll);
+	numbers_list_8(unsigned long long, "%llx",   delim, "llx", check_ull);
+	numbers_list_8(unsigned long long, "0x%llx", delim, "llx", check_ull);
+	numbers_list_8(long long,	   "0x%llx", delim, "lli", check_ll);
+
+	numbers_list_8(unsigned long,	   "%lu",    delim, "lu", check_ulong);
+	numbers_list_8(long,		   "%ld",    delim, "ld", check_long);
+	numbers_list_8(long,		   "%ld",    delim, "li", check_long);
+	numbers_list_8(unsigned long,	   "%lx",    delim, "lx", check_ulong);
+	numbers_list_8(unsigned long,	   "0x%lx",  delim, "lx", check_ulong);
+	numbers_list_8(long,		   "0x%lx",  delim, "li", check_long);
+
+	numbers_list_8(unsigned int,	   "%u",     delim, "u", check_uint);
+	numbers_list_8(int,		   "%d",     delim, "d", check_int);
+	numbers_list_8(int,		   "%d",     delim, "i", check_int);
+	numbers_list_8(unsigned int,	   "%x",     delim, "x", check_uint);
+	numbers_list_8(unsigned int,	   "0x%x",   delim, "x", check_uint);
+	numbers_list_8(int,		   "0x%x",   delim, "i", check_int);
+
+	numbers_list_8(unsigned short,	   "%hu",    delim, "hu", check_ushort);
+	numbers_list_8(short,		   "%hd",    delim, "hd", check_short);
+	numbers_list_8(short,		   "%hd",    delim, "hi", check_short);
+	numbers_list_8(unsigned short,	   "%hx",    delim, "hx", check_ushort);
+	numbers_list_8(unsigned short,	   "0x%hx",  delim, "hx", check_ushort);
+	numbers_list_8(short,		   "0x%hx",  delim, "hi", check_short);
+
+	numbers_list_8(unsigned char,	   "%hhu",   delim, "hhu", check_uchar);
+	numbers_list_8(signed char,	   "%hhd",   delim, "hhd", check_char);
+	numbers_list_8(signed char,	   "%hhd",   delim, "hhi", check_char);
+	numbers_list_8(unsigned char,	   "%hhx",   delim, "hhx", check_uchar);
+	numbers_list_8(unsigned char,	   "0x%hhx", delim, "hhx", check_uchar);
+	numbers_list_8(signed char,	   "0x%hhx", delim, "hhi", check_char);
+}
+
+/*
+ * List of numbers separated by delim. Each field width specifier is the
+ * maximum possible digits for the given type and base.
+ */
+static void __init numbers_list_field_width_typemax(const char *delim)
+{
+	numbers_list_fix_width(unsigned long long, "%llu",   delim, 20, "llu", check_ull);
+	numbers_list_fix_width(long long,	   "%lld",   delim, 20, "lld", check_ll);
+	numbers_list_fix_width(long long,	   "%lld",   delim, 20, "lli", check_ll);
+	numbers_list_fix_width(unsigned long long, "%llx",   delim, 16, "llx", check_ull);
+	numbers_list_fix_width(unsigned long long, "0x%llx", delim, 18, "llx", check_ull);
+	numbers_list_fix_width(long long,	   "0x%llx", delim, 18, "lli", check_ll);
+
+#if BITS_PER_LONG == 64
+	numbers_list_fix_width(unsigned long,	"%lu",	     delim, 20, "lu", check_ulong);
+	numbers_list_fix_width(long,		"%ld",	     delim, 20, "ld", check_long);
+	numbers_list_fix_width(long,		"%ld",	     delim, 20, "li", check_long);
+	numbers_list_fix_width(unsigned long,	"%lx",	     delim, 16, "lx", check_ulong);
+	numbers_list_fix_width(unsigned long,	"0x%lx",     delim, 18, "lx", check_ulong);
+	numbers_list_fix_width(long,		"0x%lx",     delim, 18, "li", check_long);
+#else
+	numbers_list_fix_width(unsigned long,	"%lu",	     delim, 10, "lu", check_ulong);
+	numbers_list_fix_width(long,		"%ld",	     delim, 11, "ld", check_long);
+	numbers_list_fix_width(long,		"%ld",	     delim, 11, "li", check_long);
+	numbers_list_fix_width(unsigned long,	"%lx",	     delim, 8,  "lx", check_ulong);
+	numbers_list_fix_width(unsigned long,	"0x%lx",     delim, 10, "lx", check_ulong);
+	numbers_list_fix_width(long,		"0x%lx",     delim, 10, "li", check_long);
+#endif
+
+	numbers_list_fix_width(unsigned int,	"%u",	     delim, 10, "u", check_uint);
+	numbers_list_fix_width(int,		"%d",	     delim, 11, "d", check_int);
+	numbers_list_fix_width(int,		"%d",	     delim, 11, "i", check_int);
+	numbers_list_fix_width(unsigned int,	"%x",	     delim, 8,  "x", check_uint);
+	numbers_list_fix_width(unsigned int,	"0x%x",	     delim, 10, "x", check_uint);
+	numbers_list_fix_width(int,		"0x%x",	     delim, 10, "i", check_int);
+
+	numbers_list_fix_width(unsigned short,	"%hu",	     delim, 5, "hu", check_ushort);
+	numbers_list_fix_width(short,		"%hd",	     delim, 6, "hd", check_short);
+	numbers_list_fix_width(short,		"%hd",	     delim, 6, "hi", check_short);
+	numbers_list_fix_width(unsigned short,	"%hx",	     delim, 4, "hx", check_ushort);
+	numbers_list_fix_width(unsigned short,	"0x%hx",     delim, 6, "hx", check_ushort);
+	numbers_list_fix_width(short,		"0x%hx",     delim, 6, "hi", check_short);
+
+	numbers_list_fix_width(unsigned char,	"%hhu",	     delim, 3, "hhu", check_uchar);
+	numbers_list_fix_width(signed char,	"%hhd",	     delim, 4, "hhd", check_char);
+	numbers_list_fix_width(signed char,	"%hhd",	     delim, 4, "hhi", check_char);
+	numbers_list_fix_width(unsigned char,	"%hhx",	     delim, 2, "hhx", check_uchar);
+	numbers_list_fix_width(unsigned char,	"0x%hhx",    delim, 4, "hhx", check_uchar);
+	numbers_list_fix_width(signed char,	"0x%hhx",    delim, 4, "hhi", check_char);
+}
+
+/*
+ * List of numbers separated by delim. Each field width specifier is the
+ * exact length of the corresponding value digits in the string being scanned.
+ */
+static void __init numbers_list_field_width_val_width(const char *delim)
+{
+	numbers_list_val_width(unsigned long long, "%llu",   delim, "llu", check_ull);
+	numbers_list_val_width(long long,	   "%lld",   delim, "lld", check_ll);
+	numbers_list_val_width(long long,	   "%lld",   delim, "lli", check_ll);
+	numbers_list_val_width(unsigned long long, "%llx",   delim, "llx", check_ull);
+	numbers_list_val_width(unsigned long long, "0x%llx", delim, "llx", check_ull);
+	numbers_list_val_width(long long,	   "0x%llx", delim, "lli", check_ll);
+
+	numbers_list_val_width(unsigned long,	"%lu",	     delim, "lu", check_ulong);
+	numbers_list_val_width(long,		"%ld",	     delim, "ld", check_long);
+	numbers_list_val_width(long,		"%ld",	     delim, "li", check_long);
+	numbers_list_val_width(unsigned long,	"%lx",	     delim, "lx", check_ulong);
+	numbers_list_val_width(unsigned long,	"0x%lx",     delim, "lx", check_ulong);
+	numbers_list_val_width(long,		"0x%lx",     delim, "li", check_long);
+
+	numbers_list_val_width(unsigned int,	"%u",	     delim, "u", check_uint);
+	numbers_list_val_width(int,		"%d",	     delim, "d", check_int);
+	numbers_list_val_width(int,		"%d",	     delim, "i", check_int);
+	numbers_list_val_width(unsigned int,	"%x",	     delim, "x", check_uint);
+	numbers_list_val_width(unsigned int,	"0x%x",	     delim, "x", check_uint);
+	numbers_list_val_width(int,		"0x%x",	     delim, "i", check_int);
+
+	numbers_list_val_width(unsigned short,	"%hu",	     delim, "hu", check_ushort);
+	numbers_list_val_width(short,		"%hd",	     delim, "hd", check_short);
+	numbers_list_val_width(short,		"%hd",	     delim, "hi", check_short);
+	numbers_list_val_width(unsigned short,	"%hx",	     delim, "hx", check_ushort);
+	numbers_list_val_width(unsigned short,	"0x%hx",     delim, "hx", check_ushort);
+	numbers_list_val_width(short,		"0x%hx",     delim, "hi", check_short);
+
+	numbers_list_val_width(unsigned char,	"%hhu",	     delim, "hhu", check_uchar);
+	numbers_list_val_width(signed char,	"%hhd",	     delim, "hhd", check_char);
+	numbers_list_val_width(signed char,	"%hhd",	     delim, "hhi", check_char);
+	numbers_list_val_width(unsigned char,	"%hhx",	     delim, "hhx", check_uchar);
+	numbers_list_val_width(unsigned char,	"0x%hhx",    delim, "hhx", check_uchar);
+	numbers_list_val_width(signed char,	"0x%hhx",    delim, "hhi", check_char);
+}
+
+/*
+ * Slice a continuous string of digits without field delimiters, containing
+ * numbers of varying length, using the field width to extract each group
+ * of digits. For example the hex values c0,3,bf01,303 would have a
+ * string representation of "c03bf01303" and extracted with "%2x%1x%4x%3x".
+ */
+static void __init numbers_slice(void)
+{
+	numbers_list_field_width_val_width("");
+}
+
+#define test_number_prefix(T, str, scan_fmt, expect0, expect1, n_args, fn)	\
+do {										\
+	const T expect[2] = { expect0, expect1 };				\
+	T result[2] = {~expect[0], ~expect[1]};					\
+										\
+	_test(fn, &expect, str, scan_fmt, n_args, &result[0], &result[1]);	\
+} while (0)
+
+/*
+ * Number prefix is >= field width.
+ * Expected behaviour is derived from testing userland sscanf.
+ */
+static void __init numbers_prefix_overflow(void)
+{
+	/*
+	 * Negative decimal with a field of width 1, should quit scanning
+	 * and return 0.
+	 */
+	test_number_prefix(long long,	"-1 1", "%1lld %lld",	0, 0, 0, check_ll);
+	test_number_prefix(long,	"-1 1", "%1ld %ld",	0, 0, 0, check_long);
+	test_number_prefix(int,		"-1 1", "%1d %d",	0, 0, 0, check_int);
+	test_number_prefix(short,	"-1 1", "%1hd %hd",	0, 0, 0, check_short);
+	test_number_prefix(signed char,	"-1 1", "%1hhd %hhd",	0, 0, 0, check_char);
+
+	test_number_prefix(long long,	"-1 1", "%1lli %lli",	0, 0, 0, check_ll);
+	test_number_prefix(long,	"-1 1", "%1li %li",	0, 0, 0, check_long);
+	test_number_prefix(int,		"-1 1", "%1i %i",	0, 0, 0, check_int);
+	test_number_prefix(short,	"-1 1", "%1hi %hi",	0, 0, 0, check_short);
+	test_number_prefix(signed char,	"-1 1", "%1hhi %hhi",	0, 0, 0, check_char);
+
+	/*
+	 * 0x prefix in a field of width 1: 0 is a valid digit so should
+	 * convert. Next field scan starts at the 'x' which isn't a digit so
+	 * scan quits with one field converted.
+	 */
+	test_number_prefix(unsigned long long,	"0xA7", "%1llx%llx", 0, 0, 1, check_ull);
+	test_number_prefix(unsigned long,	"0xA7", "%1lx%lx",   0, 0, 1, check_ulong);
+	test_number_prefix(unsigned int,	"0xA7", "%1x%x",     0, 0, 1, check_uint);
+	test_number_prefix(unsigned short,	"0xA7", "%1hx%hx",   0, 0, 1, check_ushort);
+	test_number_prefix(unsigned char,	"0xA7", "%1hhx%hhx", 0, 0, 1, check_uchar);
+	test_number_prefix(long long,		"0xA7", "%1lli%llx", 0, 0, 1, check_ll);
+	test_number_prefix(long,		"0xA7", "%1li%lx",   0, 0, 1, check_long);
+	test_number_prefix(int,			"0xA7", "%1i%x",     0, 0, 1, check_int);
+	test_number_prefix(short,		"0xA7", "%1hi%hx",   0, 0, 1, check_short);
+	test_number_prefix(char,		"0xA7", "%1hhi%hhx", 0, 0, 1, check_char);
+
+	/*
+	 * 0x prefix in a field of width 2 using %x conversion: first field
+	 * converts to 0. Next field scan starts at the character after "0x".
+	 * Both fields will convert.
+	 */
+	test_number_prefix(unsigned long long,	"0xA7", "%2llx%llx", 0, 0xa7, 2, check_ull);
+	test_number_prefix(unsigned long,	"0xA7", "%2lx%lx",   0, 0xa7, 2, check_ulong);
+	test_number_prefix(unsigned int,	"0xA7", "%2x%x",     0, 0xa7, 2, check_uint);
+	test_number_prefix(unsigned short,	"0xA7", "%2hx%hx",   0, 0xa7, 2, check_ushort);
+	test_number_prefix(unsigned char,	"0xA7", "%2hhx%hhx", 0, 0xa7, 2, check_uchar);
+
+	/*
+	 * 0x prefix in a field of width 2 using %i conversion: first field
+	 * converts to 0. Next field scan starts at the character after "0x",
+	 * which will convert if can be intepreted as decimal but will fail
+	 * if it contains any hex digits (since no 0x prefix).
+	 */
+	test_number_prefix(long long,	"0x67", "%2lli%lli", 0, 67, 2, check_ll);
+	test_number_prefix(long,	"0x67", "%2li%li",   0, 67, 2, check_long);
+	test_number_prefix(int,		"0x67", "%2i%i",     0, 67, 2, check_int);
+	test_number_prefix(short,	"0x67", "%2hi%hi",   0, 67, 2, check_short);
+	test_number_prefix(char,	"0x67", "%2hhi%hhi", 0, 67, 2, check_char);
+
+	test_number_prefix(long long,	"0xA7", "%2lli%lli", 0, 0,  1, check_ll);
+	test_number_prefix(long,	"0xA7", "%2li%li",   0, 0,  1, check_long);
+	test_number_prefix(int,		"0xA7", "%2i%i",     0, 0,  1, check_int);
+	test_number_prefix(short,	"0xA7", "%2hi%hi",   0, 0,  1, check_short);
+	test_number_prefix(char,	"0xA7", "%2hhi%hhi", 0, 0,  1, check_char);
+}
+
+#define _test_simple_strtoxx(T, fn, gen_fmt, expect, base)			\
+do {										\
+	T got;									\
+	char *endp;								\
+	int len;								\
+	bool fail = false;							\
+										\
+	total_tests++;								\
+	len = snprintf(test_buffer, BUF_SIZE, gen_fmt, expect);			\
+	got = (fn)(test_buffer, &endp, base);					\
+	pr_debug(#fn "(\"%s\", %d) -> " gen_fmt "\n", test_buffer, base, got);	\
+	if (got != (expect)) {							\
+		fail = true;							\
+		pr_warn(#fn "(\"%s\", %d): got " gen_fmt " expected " gen_fmt "\n", \
+			test_buffer, base, got, expect);			\
+	} else if (endp != test_buffer + len) {					\
+		fail = true;							\
+		pr_warn(#fn "(\"%s\", %d) startp=0x%px got endp=0x%px expected 0x%px\n", \
+			test_buffer, base, test_buffer,				\
+			test_buffer + len, endp);				\
+	}									\
+										\
+	if (fail)								\
+		failed_tests++;							\
+} while (0)
+
+#define test_simple_strtoxx(T, fn, gen_fmt, base)				\
+do {										\
+	int i;									\
+										\
+	for (i = 0; i < ARRAY_SIZE(numbers); i++) {				\
+		_test_simple_strtoxx(T, fn, gen_fmt, (T)numbers[i], base);	\
+										\
+		if (is_signed_type(T))						\
+			_test_simple_strtoxx(T, fn, gen_fmt,			\
+					      -(T)numbers[i], base);		\
+	}									\
+} while (0)
+
+static void __init test_simple_strtoull(void)
+{
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "%llu",   10);
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "%llu",   0);
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "%llx",   16);
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "0x%llx", 16);
+	test_simple_strtoxx(unsigned long long, simple_strtoull, "0x%llx", 0);
+}
+
+static void __init test_simple_strtoll(void)
+{
+	test_simple_strtoxx(long long, simple_strtoll, "%lld",	 10);
+	test_simple_strtoxx(long long, simple_strtoll, "%lld",	 0);
+	test_simple_strtoxx(long long, simple_strtoll, "%llx",	 16);
+	test_simple_strtoxx(long long, simple_strtoll, "0x%llx", 16);
+	test_simple_strtoxx(long long, simple_strtoll, "0x%llx", 0);
+}
+
+static void __init test_simple_strtoul(void)
+{
+	test_simple_strtoxx(unsigned long, simple_strtoul, "%lu",   10);
+	test_simple_strtoxx(unsigned long, simple_strtoul, "%lu",   0);
+	test_simple_strtoxx(unsigned long, simple_strtoul, "%lx",   16);
+	test_simple_strtoxx(unsigned long, simple_strtoul, "0x%lx", 16);
+	test_simple_strtoxx(unsigned long, simple_strtoul, "0x%lx", 0);
+}
+
+static void __init test_simple_strtol(void)
+{
+	test_simple_strtoxx(long, simple_strtol, "%ld",   10);
+	test_simple_strtoxx(long, simple_strtol, "%ld",   0);
+	test_simple_strtoxx(long, simple_strtol, "%lx",   16);
+	test_simple_strtoxx(long, simple_strtol, "0x%lx", 16);
+	test_simple_strtoxx(long, simple_strtol, "0x%lx", 0);
+}
+
+/* Selection of common delimiters/separators between numbers in a string. */
+static const char * const number_delimiters[] __initconst = {
+	" ", ":", ",", "-", "/",
+};
+
+static void __init test_numbers(void)
+{
+	int i;
+
+	/* String containing only one number. */
+	numbers_simple();
+
+	/* String with multiple numbers separated by delimiter. */
+	for (i = 0; i < ARRAY_SIZE(number_delimiters); i++) {
+		numbers_list(number_delimiters[i]);
+
+		/* Field width may be longer than actual field digits. */
+		numbers_list_field_width_typemax(number_delimiters[i]);
+
+		/* Each field width exactly length of actual field digits. */
+		numbers_list_field_width_val_width(number_delimiters[i]);
+	}
+
+	/* Slice continuous sequence of digits using field widths. */
+	numbers_slice();
+
+	numbers_prefix_overflow();
+}
+
+static void __init selftest(void)
+{
+	test_buffer = kmalloc(BUF_SIZE, GFP_KERNEL);
+	if (!test_buffer)
+		return;
+
+	fmt_buffer = kmalloc(BUF_SIZE, GFP_KERNEL);
+	if (!fmt_buffer) {
+		kfree(test_buffer);
+		return;
+	}
+
+	prandom_seed_state(&rnd_state, 3141592653589793238ULL);
+
+	test_numbers();
+
+	test_simple_strtoull();
+	test_simple_strtoll();
+	test_simple_strtoul();
+	test_simple_strtol();
+
+	kfree(fmt_buffer);
+	kfree(test_buffer);
+}
+
+KSTM_MODULE_LOADERS(test_scanf);
+MODULE_AUTHOR("Richard Fitzgerald <rf@opensource.cirrus.com>");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From ef04d4ff4b19628c78abddc768acce097d35d086 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Fri, 14 May 2021 17:12:06 +0100
Subject: [PATCH 1080/3804] selftests: lib: Add wrapper script for test_scanf

Adds a wrapper shell script for the test_scanf module.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210514161206.30821-4-rf@opensource.cirrus.com
---
 tools/testing/selftests/lib/Makefile | 2 +-
 tools/testing/selftests/lib/config   | 1 +
 tools/testing/selftests/lib/scanf.sh | 4 ++++
 3 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/lib/scanf.sh

diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index a105f094676e1..ee71fc99d5b51 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,6 +4,6 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh strscpy.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index b80ee3f6e265b..645839b50b0a2 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,4 +1,5 @@
 CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_PRIME_NUMBERS=m
 CONFIG_TEST_STRSCPY=m
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
new file mode 100755
index 0000000000000..b59b8ba561c3d
--- /dev/null
+++ b/tools/testing/selftests/lib/scanf.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Tests the scanf infrastructure using test_scanf kernel module.
+$(dirname $0)/../kselftest/module.sh "scanf" test_scanf
-- 
GitLab


From 8c08652614cb7468620a6328b37ca2965cd48283 Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Tue, 18 May 2021 10:41:21 -0700
Subject: [PATCH 1081/3804] ASoC: SOF: Intel: hda: don't send DAI_CONFIG IPC
 for older firmware

BE hw_params op was recently added for SSP type DAIs.
But sending the DAI_CONFIG IPC during hw_params
is not supported with older firmware. So add an ABI check
to avoid sending the IPC if the firmware ABI is older than
3.18.

Fixes: e12be9fbfb91 ('ASoC: SOF: Intel: HDA: add hw params callback for SSP DAIs')
Tested-by: Yong Zhi <yong.zhi@intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://lore.kernel.org/r/20210518174121.151601-1-ranjani.sridharan@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/intel/hda-dai.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c
index 8d7bab433fb33..c1f9f0f584647 100644
--- a/sound/soc/sof/intel/hda-dai.c
+++ b/sound/soc/sof/intel/hda-dai.c
@@ -421,11 +421,16 @@ static int ssp_dai_hw_params(struct snd_pcm_substream *substream,
 	struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
 	struct snd_soc_component *component = snd_soc_rtdcom_lookup(rtd, SOF_AUDIO_PCM_DRV_NAME);
 	struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
+	struct sof_ipc_fw_version *v = &sdev->fw_ready.version;
 	struct sof_ipc_dai_config *config;
 	struct snd_sof_dai *sof_dai;
 	struct sof_ipc_reply reply;
 	int ret;
 
+	/* DAI_CONFIG IPC during hw_params is not supported in older firmware */
+	if (v->abi_version < SOF_ABI_VER(3, 18, 0))
+		return 0;
+
 	list_for_each_entry(sof_dai, &sdev->dai_list, list) {
 		if (!sof_dai->cpu_dai_name || !sof_dai->dai_config)
 			continue;
-- 
GitLab


From 1f89d2fe16072a74b34bdb895160910091427891 Mon Sep 17 00:00:00 2001
From: Sander Vanheule <sander@svanheule.net>
Date: Mon, 17 May 2021 21:28:03 +0200
Subject: [PATCH 1082/3804] regmap: Add MDIO bus support

Basic support for MDIO bus access. Support only includes clause-22
register access, with 5-bit addresses, and 16-bit wide registers.

Signed-off-by: Sander Vanheule <sander@svanheule.net>
Link: https://lore.kernel.org/r/63b99a2fec2c4ea3c461d59d451af8d675ecf312.1621279162.git.sander@svanheule.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/Kconfig       |  6 +++-
 drivers/base/regmap/Makefile      |  1 +
 drivers/base/regmap/regmap-mdio.c | 57 +++++++++++++++++++++++++++++++
 include/linux/regmap.h            | 36 +++++++++++++++++++
 4 files changed, 99 insertions(+), 1 deletion(-)
 create mode 100644 drivers/base/regmap/regmap-mdio.c

diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index 50b1e2d06a257..159bac6c50466 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -4,8 +4,9 @@
 # subsystems should select the appropriate symbols.
 
 config REGMAP
-	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM)
+	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_W1 || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ || REGMAP_SOUNDWIRE || REGMAP_SOUNDWIRE_MBQ || REGMAP_SCCB || REGMAP_I3C || REGMAP_SPI_AVMM || REGMAP_MDIO)
 	select IRQ_DOMAIN if REGMAP_IRQ
+	select MDIO_BUS if REGMAP_MDIO
 	bool
 
 config REGCACHE_COMPRESSED
@@ -36,6 +37,9 @@ config REGMAP_W1
 	tristate
 	depends on W1
 
+config REGMAP_MDIO
+	tristate
+
 config REGMAP_MMIO
 	tristate
 
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index 33f63adb5b3df..11facb32a027e 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_REGMAP_SOUNDWIRE_MBQ) += regmap-sdw-mbq.o
 obj-$(CONFIG_REGMAP_SCCB) += regmap-sccb.o
 obj-$(CONFIG_REGMAP_I3C) += regmap-i3c.o
 obj-$(CONFIG_REGMAP_SPI_AVMM) += regmap-spi-avmm.o
+obj-$(CONFIG_REGMAP_MDIO) += regmap-mdio.o
diff --git a/drivers/base/regmap/regmap-mdio.c b/drivers/base/regmap/regmap-mdio.c
new file mode 100644
index 0000000000000..5f18fe409f569
--- /dev/null
+++ b/drivers/base/regmap/regmap-mdio.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/errno.h>
+#include <linux/mdio.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+static int regmap_mdio_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct mdio_device *mdio_dev = context;
+	int ret;
+
+	ret = mdiobus_read(mdio_dev->bus, mdio_dev->addr, reg);
+	*val = ret & 0xffff;
+
+	return ret < 0 ? ret : 0;
+}
+
+static int regmap_mdio_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct mdio_device *mdio_dev = context;
+
+	return mdiobus_write(mdio_dev->bus, mdio_dev->addr, reg, val);
+}
+
+static const struct regmap_bus regmap_mdio_bus = {
+	.reg_write = regmap_mdio_write,
+	.reg_read = regmap_mdio_read,
+};
+
+struct regmap *__regmap_init_mdio(struct mdio_device *mdio_dev,
+	const struct regmap_config *config, struct lock_class_key *lock_key,
+	const char *lock_name)
+{
+	if (config->reg_bits != 5 || config->val_bits != 16)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	return __regmap_init(&mdio_dev->dev, &regmap_mdio_bus, mdio_dev, config,
+		lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__regmap_init_mdio);
+
+struct regmap *__devm_regmap_init_mdio(struct mdio_device *mdio_dev,
+	const struct regmap_config *config, struct lock_class_key *lock_key,
+	const char *lock_name)
+{
+	if (config->reg_bits != 5 || config->val_bits != 16)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	return __devm_regmap_init(&mdio_dev->dev, &regmap_mdio_bus, mdio_dev,
+		config, lock_key, lock_name);
+}
+EXPORT_SYMBOL_GPL(__devm_regmap_init_mdio);
+
+MODULE_AUTHOR("Sander Vanheule <sander@svanheule.net>");
+MODULE_DESCRIPTION("Regmap MDIO Module");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index f87a11a5cc4a7..e97dd05f7cdbd 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -27,6 +27,7 @@ struct device_node;
 struct i2c_client;
 struct i3c_device;
 struct irq_domain;
+struct mdio_device;
 struct slim_device;
 struct spi_device;
 struct spmi_device;
@@ -538,6 +539,10 @@ struct regmap *__regmap_init_i2c(struct i2c_client *i2c,
 				 const struct regmap_config *config,
 				 struct lock_class_key *lock_key,
 				 const char *lock_name);
+struct regmap *__regmap_init_mdio(struct mdio_device *mdio_dev,
+				 const struct regmap_config *config,
+				 struct lock_class_key *lock_key,
+				 const char *lock_name);
 struct regmap *__regmap_init_sccb(struct i2c_client *i2c,
 				  const struct regmap_config *config,
 				  struct lock_class_key *lock_key,
@@ -594,6 +599,10 @@ struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c,
 				      const struct regmap_config *config,
 				      struct lock_class_key *lock_key,
 				      const char *lock_name);
+struct regmap *__devm_regmap_init_mdio(struct mdio_device *mdio_dev,
+				      const struct regmap_config *config,
+				      struct lock_class_key *lock_key,
+				      const char *lock_name);
 struct regmap *__devm_regmap_init_sccb(struct i2c_client *i2c,
 				       const struct regmap_config *config,
 				       struct lock_class_key *lock_key,
@@ -697,6 +706,19 @@ int regmap_attach_dev(struct device *dev, struct regmap *map,
 	__regmap_lockdep_wrapper(__regmap_init_i2c, #config,		\
 				i2c, config)
 
+/**
+ * regmap_init_mdio() - Initialise register map
+ *
+ * @mdio_dev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+#define regmap_init_mdio(mdio_dev, config)				\
+	__regmap_lockdep_wrapper(__regmap_init_mdio, #config,		\
+				mdio_dev, config)
+
 /**
  * regmap_init_sccb() - Initialise register map
  *
@@ -888,6 +910,20 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 	__regmap_lockdep_wrapper(__devm_regmap_init_i2c, #config,	\
 				i2c, config)
 
+/**
+ * devm_regmap_init_mdio() - Initialise managed register map
+ *
+ * @mdio_dev: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+#define devm_regmap_init_mdio(mdio_dev, config)				\
+	__regmap_lockdep_wrapper(__devm_regmap_init_mdio, #config,	\
+				mdio_dev, config)
+
 /**
  * devm_regmap_init_sccb() - Initialise managed register map
  *
-- 
GitLab


From a7f003147b785d9780ceeac13a8e344927a3b9ea Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 17 May 2021 09:03:17 +0800
Subject: [PATCH 1083/3804] regulator: fan53555: Fix slew_shift setting for
 tcs4525

Fix trivial copy-paste mistake.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210517010318.1027949-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53555.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index 2695be617373c..d582ef3a3aeb3 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -376,7 +376,7 @@ static int fan53526_voltages_setup_tcs(struct fan53555_device_info *di)
 	case TCS4525_CHIP_ID_12:
 		di->slew_reg = TCS4525_TIME;
 		di->slew_mask = TCS_SLEW_MASK;
-		di->slew_shift = TCS_SLEW_MASK;
+		di->slew_shift = TCS_SLEW_SHIFT;
 
 		/* Init voltage range and step */
 		di->vsel_min = 600000;
-- 
GitLab


From 79c7e1447c1c998e2571191e3cad12f9285ee22e Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 17 May 2021 09:03:18 +0800
Subject: [PATCH 1084/3804] regulator: fan53555: Cleanup unused define and
 redundant assignment

TCS_VSEL_NSEL_MASK is not used so remove it.
Also remove redundant assignment for di->slew_reg.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210517010318.1027949-2-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53555.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index d582ef3a3aeb3..f3f49cf3731b7 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -56,7 +56,6 @@
 #define FAN53555_NVOLTAGES	64	/* Numbers of voltages */
 #define FAN53526_NVOLTAGES	128
 
-#define TCS_VSEL_NSEL_MASK	0x7f
 #define TCS_VSEL0_MODE		(1 << 7)
 #define TCS_VSEL1_MODE		(1 << 6)
 
@@ -362,7 +361,6 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di)
 		return -EINVAL;
 	}
 	di->slew_reg = FAN53555_CONTROL;
-	di->slew_reg = FAN53555_CONTROL;
 	di->slew_mask = CTL_SLEW_MASK;
 	di->slew_shift = CTL_SLEW_SHIFT;
 	di->vsel_count = FAN53555_NVOLTAGES;
-- 
GitLab


From 647e6cc979b0675499347ddbac55c83876a20cf9 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Wed, 5 May 2021 15:36:35 +0200
Subject: [PATCH 1085/3804] platform/surface: aggregator: Do not mark interrupt
 as shared

Having both IRQF_NO_AUTOEN and IRQF_SHARED set causes
request_threaded_irq() to return with -EINVAL (see comment in flag
validation in that function). As the interrupt is currently not shared
between multiple devices, drop the IRQF_SHARED flag.

Fixes: 507cf5a2f1e2 ("platform/surface: aggregator: move to use request_irq by IRQF_NO_AUTOEN flag")
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210505133635.1499703-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/aggregator/controller.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index 69e86cd599d38..8a70df60142c2 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -2483,8 +2483,7 @@ int ssam_irq_setup(struct ssam_controller *ctrl)
 	 * interrupt, and let the SAM resume callback during the controller
 	 * resume process clear it.
 	 */
-	const int irqf = IRQF_SHARED | IRQF_ONESHOT |
-			 IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN;
+	const int irqf = IRQF_ONESHOT | IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN;
 
 	gpiod = gpiod_get(dev, "ssam_wakeup-int", GPIOD_ASIS);
 	if (IS_ERR(gpiod))
-- 
GitLab


From ba6e1d8422bd476ad79da409639a773c02f0cbad Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 22:04:36 +0200
Subject: [PATCH 1086/3804] platform/surface: aggregator: avoid clang
 -Wconstant-conversion warning

Clang complains about the assignment of SSAM_ANY_IID to
ssam_device_uid->instance:

drivers/platform/surface/surface_aggregator_registry.c:478:25: error: implicit conversion from 'int' to '__u8' (aka 'unsigned char') changes value from 65535 to 255 [-Werror,-Wconstant-conversion]
        { SSAM_VDEV(HUB, 0x02, SSAM_ANY_IID, 0x00) },
        ~                      ^~~~~~~~~~~~
include/linux/surface_aggregator/device.h:71:23: note: expanded from macro 'SSAM_ANY_IID'
 #define SSAM_ANY_IID            0xffff
                                ^~~~~~
include/linux/surface_aggregator/device.h:126:63: note: expanded from macro 'SSAM_VDEV'
        SSAM_DEVICE(SSAM_DOMAIN_VIRTUAL, SSAM_VIRTUAL_TC_##cat, tid, iid, fun)
                                                                     ^~~
include/linux/surface_aggregator/device.h:102:41: note: expanded from macro 'SSAM_DEVICE'
        .instance = ((iid) != SSAM_ANY_IID) ? (iid) : 0,                        \
                                               ^~~

The assignment doesn't actually happen, but clang checks the type limits
before checking whether this assignment is reached. Replace the ?:
operator with a __builtin_choose_expr() invocation that avoids the
warning for the untaken part.

Fixes: eb0e90a82098 ("platform/surface: aggregator: Add dedicated bus and device type")
Cc: platform-driver-x86@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210514200453.1542978-1-arnd@kernel.org
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/surface_aggregator/device.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/surface_aggregator/device.h b/include/linux/surface_aggregator/device.h
index 4441ad667c3f9..6ff9c58b3e17f 100644
--- a/include/linux/surface_aggregator/device.h
+++ b/include/linux/surface_aggregator/device.h
@@ -98,9 +98,9 @@ struct ssam_device_uid {
 		     | (((fun) != SSAM_ANY_FUN) ? SSAM_MATCH_FUNCTION : 0),	\
 	.domain   = d,								\
 	.category = cat,							\
-	.target   = ((tid) != SSAM_ANY_TID) ? (tid) : 0,			\
-	.instance = ((iid) != SSAM_ANY_IID) ? (iid) : 0,			\
-	.function = ((fun) != SSAM_ANY_FUN) ? (fun) : 0				\
+	.target   = __builtin_choose_expr((tid) != SSAM_ANY_TID, (tid), 0),	\
+	.instance = __builtin_choose_expr((iid) != SSAM_ANY_IID, (iid), 0),	\
+	.function = __builtin_choose_expr((fun) != SSAM_ANY_FUN, (fun), 0)
 
 /**
  * SSAM_VDEV() - Initialize a &struct ssam_device_id as virtual device with
-- 
GitLab


From 773fe1d74404fcb6f0e7e69c3420cf04a6bb56b0 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sat, 15 May 2021 00:19:54 +0200
Subject: [PATCH 1087/3804] platform/surface: aggregator: Add
 platform-drivers-x86 list to MAINTAINERS entry

The Surface System Aggregator Module driver entry is currently missing a
mailing list. Surface platform drivers are discussed on the
platform-driver-x86 list and all other Surface platform drivers have a
reference to that list in their entries. So let's add one here as well.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210514221954.5976-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..57467b6046f1c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12180,6 +12180,7 @@ F:	drivers/platform/surface/surfacepro3_button.c
 
 MICROSOFT SURFACE SYSTEM AGGREGATOR SUBSYSTEM
 M:	Maximilian Luz <luzmaximilian@gmail.com>
+L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 W:	https://github.com/linux-surface/surface-aggregator-module
 C:	irc://chat.freenode.net/##linux-surface
-- 
GitLab


From 9795d8232a24be9e1e1cc408a6bdc01c40e2cedc Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Thu, 13 May 2021 15:44:37 +0200
Subject: [PATCH 1088/3804] platform/surface: dtx: Fix poll function

The poll function should not return -ERESTARTSYS.

Furthermore, locking in this function is completely unnecessary. The
ddev->lock protects access to the main device and controller (ddev->dev
and ddev->ctrl), ensuring that both are and remain valid while being
accessed by clients. Both are, however, never accessed in the poll
function. The shutdown test (via atomic bit flags) be safely done
without locking, so drop locking here entirely.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 1d609992832e ("platform/surface: Add DTX driver)
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210513134437.2431022-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/surface_dtx.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/platform/surface/surface_dtx.c b/drivers/platform/surface/surface_dtx.c
index 63ce587e79e3b..5d9b758a99bbc 100644
--- a/drivers/platform/surface/surface_dtx.c
+++ b/drivers/platform/surface/surface_dtx.c
@@ -527,20 +527,14 @@ static __poll_t surface_dtx_poll(struct file *file, struct poll_table_struct *pt
 	struct sdtx_client *client = file->private_data;
 	__poll_t events = 0;
 
-	if (down_read_killable(&client->ddev->lock))
-		return -ERESTARTSYS;
-
-	if (test_bit(SDTX_DEVICE_SHUTDOWN_BIT, &client->ddev->flags)) {
-		up_read(&client->ddev->lock);
+	if (test_bit(SDTX_DEVICE_SHUTDOWN_BIT, &client->ddev->flags))
 		return EPOLLHUP | EPOLLERR;
-	}
 
 	poll_wait(file, &client->ddev->waitq, pt);
 
 	if (!kfifo_is_empty(&client->buffer))
 		events |= EPOLLIN | EPOLLRDNORM;
 
-	up_read(&client->ddev->lock);
 	return events;
 }
 
-- 
GitLab


From 1c0e5701c5e792c090aef0e5b9b8923c334d9324 Mon Sep 17 00:00:00 2001
From: Liming Sun <limings@nvidia.com>
Date: Fri, 7 May 2021 20:30:12 -0400
Subject: [PATCH 1089/3804] platform/mellanox: mlxbf-tmfifo: Fix a memory
 barrier issue

The virtio framework uses wmb() when updating avail->idx. It
guarantees the write order, but not necessarily loading order
for the code accessing the memory. This commit adds a load barrier
after reading the avail->idx to make sure all the data in the
descriptor is visible. It also adds a barrier when returning the
packet to virtio framework to make sure read/writes are visible to
the virtio code.

Fixes: 1357dfd7261f ("platform/mellanox: Add TmFifo driver for Mellanox BlueField Soc")
Signed-off-by: Liming Sun <limings@nvidia.com>
Reviewed-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/1620433812-17911-1-git-send-email-limings@nvidia.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/mellanox/mlxbf-tmfifo.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index bbc4e71a16ff8..38800e86ed8ad 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -294,6 +294,9 @@ mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
 	if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
 		return NULL;
 
+	/* Make sure 'avail->idx' is visible already. */
+	virtio_rmb(false);
+
 	idx = vring->next_avail % vr->num;
 	head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
 	if (WARN_ON(head >= vr->num))
@@ -322,7 +325,7 @@ static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
 	 * done or not. Add a memory barrier here to make sure the update above
 	 * completes before updating the idx.
 	 */
-	mb();
+	virtio_mb(false);
 	vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
 }
 
@@ -733,6 +736,12 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
 		desc = NULL;
 		fifo->vring[is_rx] = NULL;
 
+		/*
+		 * Make sure the load/store are in order before
+		 * returning back to virtio.
+		 */
+		virtio_mb(false);
+
 		/* Notify upper layer that packet is done. */
 		spin_lock_irqsave(&fifo->spin_lock[is_rx], flags);
 		vring_interrupt(0, vring->vq);
-- 
GitLab


From 316a76a58c3f30735e5e416a6dc304d6bb86312d Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Mon, 17 May 2021 16:09:31 +0200
Subject: [PATCH 1090/3804] perf test: Fix libpfm4 support (63) test error for
 nested event groups

Compiling perf with make LIBPFM4=1 includes libpfm support and
enables test case 63 'Test libpfm4 support'. This test reports an error
on all platforms for subtest 63.2 'test groups of --pfm-events'.
The reported error message is 'nested event groups not supported'

 # ./perf test -F 63
 63: Test libpfm4 support                                            :
 63.1: test of individual --pfm-events                               :
 Error:
 failed to parse event stereolab : event not found
 Error:
 failed to parse event stereolab,instructions : event not found
 Error:
 failed to parse event instructions,stereolab : event not found
  Ok
 63.2: test groups of --pfm-events                                   :
 Error:
 nested event groups not supported    <------ Error message here
 Error:
 failed to parse event {stereolab} : event not found
 Error:
 failed to parse event {instructions,cycles},{instructions,stereolab} :\
	 event not found
 Ok
 #

This patch addresses the error message 'nested event groups not supported'.
The root cause is function parse_libpfm_events_option() which parses the
event string '{},{instructions}' and can not handle a leading empty
group notation '{},...'.

The code detects the first (empty) group indicator '{' but does not
terminate group processing on the following group closing character '}'.
So when the second group indicator '{' is detected, the code assumes
a nested group and returns an error.

With the error message fixed, also change the expected event number to
one for the test case to succeed.

While at it also fix a memory leak. In good case the function does not
free the duplicated string given as first parameter.

Output after:
 # ./perf test -F 63
 63: Test libpfm4 support                                            :
 63.1: test of individual --pfm-events                               :
 Error:
 failed to parse event stereolab : event not found
 Error:
 failed to parse event stereolab,instructions : event not found
 Error:
 failed to parse event instructions,stereolab : event not found
  Ok
 63.2: test groups of --pfm-events                                   :
 Error:
 failed to parse event {stereolab} : event not found
 Error:
 failed to parse event {instructions,cycles},{instructions,stereolab} : \
	 event not found
  Ok
 #
Error message 'nested event groups not supported' is gone.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Acked-By: Ian Rogers <irogers@google.com>
Acked-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Link: http://lore.kernel.org/lkml/20210517140931.2559364-1-tmricht@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/pfm.c |  4 ++--
 tools/perf/util/pfm.c  | 11 ++++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c
index 76a53126efdf1..d4b0ef74defcc 100644
--- a/tools/perf/tests/pfm.c
+++ b/tools/perf/tests/pfm.c
@@ -131,8 +131,8 @@ static int test__pfm_group(void)
 		},
 		{
 			.events = "{},{instructions}",
-			.nr_events = 0,
-			.nr_groups = 0,
+			.nr_events = 1,
+			.nr_groups = 1,
 		},
 		{
 			.events = "{instructions},{instructions}",
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index d735acb6c29cb..6eef6dfeaa574 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -62,8 +62,16 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
 		}
 
 		/* no event */
-		if (*q == '\0')
+		if (*q == '\0') {
+			if (*sep == '}') {
+				if (grp_evt < 0) {
+					ui__error("cannot close a non-existing event group\n");
+					goto error;
+				}
+				grp_evt--;
+			}
 			continue;
+		}
 
 		memset(&attr, 0, sizeof(attr));
 		event_attr_init(&attr);
@@ -107,6 +115,7 @@ int parse_libpfm_events_option(const struct option *opt, const char *str,
 			grp_evt = -1;
 		}
 	}
+	free(p_orig);
 	return 0;
 error:
 	free(p_orig);
-- 
GitLab


From cb7987837c31b217b28089bbc78922d5c9187869 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 19 May 2021 10:45:13 +0300
Subject: [PATCH 1091/3804] perf intel-pt: Fix transaction abort handling

When adding support for power events, some handling of FUP packets was
unified. That resulted in breaking reporting of TSX aborts, by not
considering the associated TIP packet. Fix that.

Example:

A machine that supports TSX is required. It will have flag "rtm". Kernel
parameter tsx=on may be required.

 # for w in `cat /proc/cpuinfo | grep -m1 flags `;do echo $w | grep rtm ; done
 rtm

Test program:

 #include <stdio.h>
 #include <immintrin.h>

 int main()
 {
        int x = 0;

        if (_xbegin() == _XBEGIN_STARTED) {
                x = 1;
                _xabort(1);
        } else {
                printf("x = %d\n", x);
        }
        return 0;
 }

Compile with -mrtm i.e.

 gcc -Wall -Wextra -mrtm xabort.c -o xabort

Record:

 perf record -e intel_pt/cyc/u --filter 'filter main @ ./xabort' ./xabort

Before:

 # perf script --itrace=be -F+flags,+addr,-period,-event --ns
          xabort  1478 [007] 92161.431348552:   tr strt                             0 [unknown] ([unknown]) =>           400b6d main+0x0 (/root/xabort)
          xabort  1478 [007] 92161.431348624:   jmp                            400b96 main+0x29 (/root/xabort) =>           400bae main+0x41 (/root/xabort)
          xabort  1478 [007] 92161.431348624:   return                         400bb4 main+0x47 (/root/xabort) =>           400b87 main+0x1a (/root/xabort)
          xabort  1478 [007] 92161.431348637:   jcc                            400b8a main+0x1d (/root/xabort) =>           400b98 main+0x2b (/root/xabort)
          xabort  1478 [007] 92161.431348644:   tr end  call                   400ba9 main+0x3c (/root/xabort) =>           40f690 printf+0x0 (/root/xabort)
          xabort  1478 [007] 92161.431360859:   tr strt                             0 [unknown] ([unknown]) =>           400bae main+0x41 (/root/xabort)
          xabort  1478 [007] 92161.431360882:   tr end  return                 400bb4 main+0x47 (/root/xabort) =>           401139 __libc_start_main+0x309 (/root/xabort)

After:

 # perf script --itrace=be -F+flags,+addr,-period,-event --ns
          xabort  1478 [007] 92161.431348552:   tr strt                             0 [unknown] ([unknown]) =>           400b6d main+0x0 (/root/xabort)
          xabort  1478 [007] 92161.431348624:   tx abrt                        400b93 main+0x26 (/root/xabort) =>           400b87 main+0x1a (/root/xabort)
          xabort  1478 [007] 92161.431348637:   jcc                            400b8a main+0x1d (/root/xabort) =>           400b98 main+0x2b (/root/xabort)
          xabort  1478 [007] 92161.431348644:   tr end  call                   400ba9 main+0x3c (/root/xabort) =>           40f690 printf+0x0 (/root/xabort)
          xabort  1478 [007] 92161.431360859:   tr strt                             0 [unknown] ([unknown]) =>           400bae main+0x41 (/root/xabort)
          xabort  1478 [007] 92161.431360882:   tr end  return                 400bb4 main+0x47 (/root/xabort) =>           401139 __libc_start_main+0x309 (/root/xabort)

Fixes: a472e65fc490a ("perf intel-pt: Add decoder support for ptwrite and power event packets")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lore.kernel.org/lkml/20210519074515.9262-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 8c59677bee130..20ad663978cc4 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1146,6 +1146,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
 		decoder->set_fup_tx_flags = false;
 		decoder->tx_flags = decoder->fup_tx_flags;
 		decoder->state.type = INTEL_PT_TRANSACTION;
+		if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
+			decoder->state.type |= INTEL_PT_BRANCH;
 		decoder->state.from_ip = decoder->ip;
 		decoder->state.to_ip = 0;
 		decoder->state.flags = decoder->fup_tx_flags;
@@ -1220,8 +1222,10 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
 			return 0;
 		if (err == -EAGAIN ||
 		    intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+			bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
+
 			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
-			if (intel_pt_fup_event(decoder))
+			if (intel_pt_fup_event(decoder) && no_tip)
 				return 0;
 			return -EAGAIN;
 		}
-- 
GitLab


From 687c9e3b1a81d43b233482f781bd4e20561bc390 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 19 May 2021 21:22:55 +0800
Subject: [PATCH 1092/3804] regulator: Check ramp_delay_table for
 regulator_set_ramp_delay_regmap

Return -EINVAL if ramp_delay_table is NULL.
Also add WARN_ON since the driver code needs fix if this happened.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210519132255.1683863-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index 0e16e31c968f1..ad2237a95572a 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -948,7 +948,7 @@ int regulator_set_ramp_delay_regmap(struct regulator_dev *rdev, int ramp_delay)
 	int ret;
 	unsigned int sel;
 
-	if (!rdev->desc->n_ramp_values)
+	if (WARN_ON(!rdev->desc->n_ramp_values || !rdev->desc->ramp_delay_table))
 		return -EINVAL;
 
 	ret = find_closest_bigger(ramp_delay, rdev->desc->ramp_delay_table,
-- 
GitLab


From c954eb72b31a9dc56c99b450253ec5b121add320 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 19 May 2021 10:45:14 +0300
Subject: [PATCH 1093/3804] perf intel-pt: Fix sample instruction bytes

The decoder reports the current instruction if it was decoded. In some
cases the current instruction is not decoded, in which case the instruction
bytes length must be set to zero. Ensure that is always done.

Note perf script can anyway get the instruction bytes for any samples where
they are not present.

Also note, that there is a redundant "ptq->insn_len = 0" statement which is
not removed until a subsequent patch in order to make this patch apply
cleanly to stable branches.

Example:

A machne that supports TSX is required. It will have flag "rtm". Kernel
parameter tsx=on may be required.

 # for w in `cat /proc/cpuinfo | grep -m1 flags `;do echo $w | grep rtm ; done
 rtm

Test program:

 #include <stdio.h>
 #include <immintrin.h>

 int main()
 {
        int x = 0;

        if (_xbegin() == _XBEGIN_STARTED) {
                x = 1;
                _xabort(1);
        } else {
                printf("x = %d\n", x);
        }
        return 0;
 }

Compile with -mrtm i.e.

 gcc -Wall -Wextra -mrtm xabort.c -o xabort

Record:

 perf record -e intel_pt/cyc/u --filter 'filter main @ ./xabort' ./xabort

Before:

 # perf script --itrace=xe -F+flags,+insn,-period --xed --ns
          xabort  1478 [007] 92161.431348581:   transactions:   x                              400b81 main+0x14 (/root/xabort)          mov $0xffffffff, %eax
          xabort  1478 [007] 92161.431348624:   transactions:   tx abrt                        400b93 main+0x26 (/root/xabort)          mov $0xffffffff, %eax

After:

 # perf script --itrace=xe -F+flags,+insn,-period --xed --ns
          xabort  1478 [007] 92161.431348581:   transactions:   x                              400b81 main+0x14 (/root/xabort)          xbegin 0x6
          xabort  1478 [007] 92161.431348624:   transactions:   tx abrt                        400b93 main+0x26 (/root/xabort)          xabort $0x1

Fixes: faaa87680b25d ("perf intel-pt/bts: Report instruction bytes and length in sample")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lore.kernel.org/lkml/20210519074515.9262-3-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 8658d42ce57a0..beae5cbe9cc2d 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -707,8 +707,10 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 
 			*ip += intel_pt_insn->length;
 
-			if (to_ip && *ip == to_ip)
+			if (to_ip && *ip == to_ip) {
+				intel_pt_insn->length = 0;
 				goto out_no_cache;
+			}
 
 			if (*ip >= al.map->end)
 				break;
@@ -1198,6 +1200,7 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
 
 static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 {
+	ptq->insn_len = 0;
 	if (ptq->state->flags & INTEL_PT_ABORT_TX) {
 		ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
 	} else if (ptq->state->flags & INTEL_PT_ASYNC) {
-- 
GitLab


From 0a0c59724516fabf9705c0d9927fa12319908852 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 19 May 2021 10:45:15 +0300
Subject: [PATCH 1094/3804] perf intel-pt: Remove redundant setting of
 ptq->insn_len

Remove redundant "ptq->insn_len = 0" statement.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lore.kernel.org/lkml/20210519074515.9262-4-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index beae5cbe9cc2d..0dfec8761b9ac 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1214,7 +1214,6 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
 			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
 				     PERF_IP_FLAG_ASYNC |
 				     PERF_IP_FLAG_INTERRUPT;
-		ptq->insn_len = 0;
 	} else {
 		if (ptq->state->from_ip)
 			ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
-- 
GitLab


From 86bf2b8ffec40eb4c278ce393e2b0bf48d335e59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Tue, 11 May 2021 00:15:43 +0200
Subject: [PATCH 1095/3804] platform/x86: gigabyte-wmi: streamline dmi matching
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Streamline dmi matching.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20210510221545.412522-1-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/gigabyte-wmi.c | 36 ++++++++++-------------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 13d57434e60f2..b95a94ed40b8c 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -133,31 +133,19 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
 	return r;
 }
 
+#define DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME(name) \
+	{ .matches = { \
+		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."), \
+		DMI_EXACT_MATCH(DMI_BOARD_NAME, name), \
+	}}
+
 static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "B550 GAMING X V2"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "B550M AORUS PRO-P"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "B550M DS3H"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "Z390 I AORUS PRO WIFI-CF"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "X570 AORUS ELITE"),
-	}},
-	{ .matches = {
-		DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Gigabyte Technology Co., Ltd."),
-		DMI_EXACT_MATCH(DMI_BOARD_NAME, "X570 I AORUS PRO WIFI"),
-	}},
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
 	{ }
 };
 
-- 
GitLab


From 8605d64f485fbdb71cb4d55a53085feb000e426e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Tue, 11 May 2021 00:15:44 +0200
Subject: [PATCH 1096/3804] platform/x86: gigabyte-wmi: add support for X570 UD
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported as working here:
https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/4

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20210510221545.412522-2-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/gigabyte-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index b95a94ed40b8c..7af6c24151e25 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -146,6 +146,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"),
 	{ }
 };
 
-- 
GitLab


From dac282def6f57d251234e7bbb87d21d7a57b26fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Tue, 11 May 2021 00:15:45 +0200
Subject: [PATCH 1097/3804] platform/x86: gigabyte-wmi: add support for B550
 Aorus Elite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported as working here:
https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/1#issuecomment-837210304

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20210510221545.412522-3-linux@weissschuh.net
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/gigabyte-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c
index 7af6c24151e25..5529d7b0abea3 100644
--- a/drivers/platform/x86/gigabyte-wmi.c
+++ b/drivers/platform/x86/gigabyte-wmi.c
@@ -140,6 +140,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev)
 	}}
 
 static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
+	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
 	DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
-- 
GitLab


From b68e182a3062e326b891f47152a3a1b84abccf0f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 12 May 2021 14:55:23 +0200
Subject: [PATCH 1098/3804] platform/x86: intel_int0002_vgpio: Only call
 enable_irq_wake() when using s2idle

Commit 871f1f2bcb01 ("platform/x86: intel_int0002_vgpio: Only implement
irq_set_wake on Bay Trail") stopped passing irq_set_wake requests on to
the parents IRQ because this was breaking suspend (causing immediate
wakeups) on an Asus E202SA.

This workaround for the Asus E202SA is causing wakeup by USB keyboard to
not work on other devices with Airmont CPU cores such as the Medion Akoya
E1239T. In hindsight the problem with the Asus E202SA has nothing to do
with Silvermont vs Airmont CPU cores, so the differentiation between the
2 types of CPU cores introduced by the previous fix is wrong.

The real issue at hand is s2idle vs S3 suspend where the suspend is
mostly handled by firmware. The parent IRQ for the INT0002 device is shared
with the ACPI SCI and the real problem is that the INT0002 code should not
be messing with the wakeup settings of that IRQ when suspend/resume is
being handled by the firmware.

Note that on systems which support both s2idle and S3 suspend, which
suspend method to use can be changed at runtime.

This patch fixes both the Asus E202SA spurious wakeups issue as well as
the wakeup by USB keyboard not working on the Medion Akoya E1239T issue.

These are both fixed by replacing the old workaround with delaying the
enable_irq_wake(parent_irq) call till system-suspend time and protecting
it with a !pm_suspend_via_firmware() check so that we still do not call
it on devices using firmware-based (S3) suspend such as the Asus E202SA.

Note rather then adding #ifdef CONFIG_PM_SLEEP, this commit simply adds
a "depends on PM_SLEEP" to the Kconfig since this drivers whole purpose
is to deal with wakeup events, so using it without CONFIG_PM_SLEEP makes
no sense.

Cc: Maxim Mikityanskiy <maxtram95@gmail.com>
Fixes: 871f1f2bcb01 ("platform/x86: intel_int0002_vgpio: Only implement irq_set_wake on Bay Trail")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20210512125523.55215-2-hdegoede@redhat.com
---
 drivers/platform/x86/Kconfig               |  2 +-
 drivers/platform/x86/intel_int0002_vgpio.c | 80 +++++++++++++++-------
 2 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 2714f7c3843e3..60592fb88e7a0 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -711,7 +711,7 @@ config INTEL_HID_EVENT
 
 config INTEL_INT0002_VGPIO
 	tristate "Intel ACPI INT0002 Virtual GPIO driver"
-	depends on GPIOLIB && ACPI
+	depends on GPIOLIB && ACPI && PM_SLEEP
 	select GPIOLIB_IRQCHIP
 	help
 	  Some peripherals on Bay Trail and Cherry Trail platforms signal a
diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
index 289c6655d425d..569342aa8926e 100644
--- a/drivers/platform/x86/intel_int0002_vgpio.c
+++ b/drivers/platform/x86/intel_int0002_vgpio.c
@@ -51,6 +51,12 @@
 #define GPE0A_STS_PORT			0x420
 #define GPE0A_EN_PORT			0x428
 
+struct int0002_data {
+	struct gpio_chip chip;
+	int parent_irq;
+	int wake_enable_count;
+};
+
 /*
  * As this is not a real GPIO at all, but just a hack to model an event in
  * ACPI the get / set functions are dummy functions.
@@ -98,14 +104,16 @@ static void int0002_irq_mask(struct irq_data *data)
 static int int0002_irq_set_wake(struct irq_data *data, unsigned int on)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(data);
-	struct platform_device *pdev = to_platform_device(chip->parent);
-	int irq = platform_get_irq(pdev, 0);
+	struct int0002_data *int0002 = container_of(chip, struct int0002_data, chip);
 
-	/* Propagate to parent irq */
+	/*
+	 * Applying of the wakeup flag to our parent IRQ is delayed till system
+	 * suspend, because we only want to do this when using s2idle.
+	 */
 	if (on)
-		enable_irq_wake(irq);
+		int0002->wake_enable_count++;
 	else
-		disable_irq_wake(irq);
+		int0002->wake_enable_count--;
 
 	return 0;
 }
@@ -135,7 +143,7 @@ static bool int0002_check_wake(void *data)
 	return (gpe_sts_reg & GPE0A_PME_B0_STS_BIT);
 }
 
-static struct irq_chip int0002_byt_irqchip = {
+static struct irq_chip int0002_irqchip = {
 	.name			= DRV_NAME,
 	.irq_ack		= int0002_irq_ack,
 	.irq_mask		= int0002_irq_mask,
@@ -143,21 +151,9 @@ static struct irq_chip int0002_byt_irqchip = {
 	.irq_set_wake		= int0002_irq_set_wake,
 };
 
-static struct irq_chip int0002_cht_irqchip = {
-	.name			= DRV_NAME,
-	.irq_ack		= int0002_irq_ack,
-	.irq_mask		= int0002_irq_mask,
-	.irq_unmask		= int0002_irq_unmask,
-	/*
-	 * No set_wake, on CHT the IRQ is typically shared with the ACPI SCI
-	 * and we don't want to mess with the ACPI SCI irq settings.
-	 */
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
 static const struct x86_cpu_id int0002_cpu_ids[] = {
-	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT,	&int0002_byt_irqchip),
-	X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT,	&int0002_cht_irqchip),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, NULL),
+	X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, NULL),
 	{}
 };
 
@@ -172,8 +168,9 @@ static int int0002_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	const struct x86_cpu_id *cpu_id;
-	struct gpio_chip *chip;
+	struct int0002_data *int0002;
 	struct gpio_irq_chip *girq;
+	struct gpio_chip *chip;
 	int irq, ret;
 
 	/* Menlow has a different INT0002 device? <sigh> */
@@ -185,10 +182,13 @@ static int int0002_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
-	if (!chip)
+	int0002 = devm_kzalloc(dev, sizeof(*int0002), GFP_KERNEL);
+	if (!int0002)
 		return -ENOMEM;
 
+	int0002->parent_irq = irq;
+
+	chip = &int0002->chip;
 	chip->label = DRV_NAME;
 	chip->parent = dev;
 	chip->owner = THIS_MODULE;
@@ -214,7 +214,7 @@ static int int0002_probe(struct platform_device *pdev)
 	}
 
 	girq = &chip->irq;
-	girq->chip = (struct irq_chip *)cpu_id->driver_data;
+	girq->chip = &int0002_irqchip;
 	/* This let us handle the parent IRQ in the driver */
 	girq->parent_handler = NULL;
 	girq->num_parents = 0;
@@ -230,6 +230,7 @@ static int int0002_probe(struct platform_device *pdev)
 
 	acpi_register_wakeup_handler(irq, int0002_check_wake, NULL);
 	device_init_wakeup(dev, true);
+	dev_set_drvdata(dev, int0002);
 	return 0;
 }
 
@@ -240,6 +241,36 @@ static int int0002_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int int0002_suspend(struct device *dev)
+{
+	struct int0002_data *int0002 = dev_get_drvdata(dev);
+
+	/*
+	 * The INT0002 parent IRQ is often shared with the ACPI GPE IRQ, don't
+	 * muck with it when firmware based suspend is used, otherwise we may
+	 * cause spurious wakeups from firmware managed suspend.
+	 */
+	if (!pm_suspend_via_firmware() && int0002->wake_enable_count)
+		enable_irq_wake(int0002->parent_irq);
+
+	return 0;
+}
+
+static int int0002_resume(struct device *dev)
+{
+	struct int0002_data *int0002 = dev_get_drvdata(dev);
+
+	if (!pm_suspend_via_firmware() && int0002->wake_enable_count)
+		disable_irq_wake(int0002->parent_irq);
+
+	return 0;
+}
+
+static const struct dev_pm_ops int0002_pm_ops = {
+	.suspend = int0002_suspend,
+	.resume = int0002_resume,
+};
+
 static const struct acpi_device_id int0002_acpi_ids[] = {
 	{ "INT0002", 0 },
 	{ },
@@ -250,6 +281,7 @@ static struct platform_driver int0002_driver = {
 	.driver = {
 		.name			= DRV_NAME,
 		.acpi_match_table	= int0002_acpi_ids,
+		.pm			= &int0002_pm_ops,
 	},
 	.probe	= int0002_probe,
 	.remove	= int0002_remove,
-- 
GitLab


From f048630bdd55eb5379ef35f971639fe52fabe499 Mon Sep 17 00:00:00 2001
From: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Date: Fri, 14 May 2021 23:30:47 +0530
Subject: [PATCH 1099/3804] platform/x86: hp-wireless: add AMD's hardware id to
 the supported list

Newer AMD based laptops uses AMDI0051 as the hardware id to support the
airplane mode button. Adding this to the supported list.

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Link: https://lore.kernel.org/r/20210514180047.1697543-1-Shyam-sundar.S-k@amd.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/hp-wireless.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/platform/x86/hp-wireless.c b/drivers/platform/x86/hp-wireless.c
index 12c31fd5d5ae2..0753ef18e7211 100644
--- a/drivers/platform/x86/hp-wireless.c
+++ b/drivers/platform/x86/hp-wireless.c
@@ -17,12 +17,14 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Alex Hung");
 MODULE_ALIAS("acpi*:HPQ6001:*");
 MODULE_ALIAS("acpi*:WSTADEF:*");
+MODULE_ALIAS("acpi*:AMDI0051:*");
 
 static struct input_dev *hpwl_input_dev;
 
 static const struct acpi_device_id hpwl_ids[] = {
 	{"HPQ6001", 0},
 	{"WSTADEF", 0},
+	{"AMDI0051", 0},
 	{"", 0},
 };
 
-- 
GitLab


From 3a53587423d25c87af4b4126a806a0575104b45e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 18 May 2021 14:50:27 +0200
Subject: [PATCH 1100/3804] platform/x86: dell-smbios-wmi: Fix oops on rmmod
 dell_smbios

init_dell_smbios_wmi() only registers the dell_smbios_wmi_driver on systems
where the Dell WMI interface is supported. While exit_dell_smbios_wmi()
unregisters it unconditionally, this leads to the following oops:

[  175.722921] ------------[ cut here ]------------
[  175.722925] Unexpected driver unregister!
[  175.722939] WARNING: CPU: 1 PID: 3630 at drivers/base/driver.c:194 driver_unregister+0x38/0x40
...
[  175.723089] Call Trace:
[  175.723094]  cleanup_module+0x5/0xedd [dell_smbios]
...
[  175.723148] ---[ end trace 064c34e1ad49509d ]---

Make the unregister happen on the same condition the register happens
to fix this.

Cc: Mario Limonciello <mario.limonciello@outlook.com>
Fixes: 1a258e670434 ("platform/x86: dell-smbios-wmi: Add new WMI dispatcher driver")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Mario Limonciello <mario.limonciello@outlook.com>
Reviewed-by: Mark Gross <mgross@linux.intel.com>
Link: https://lore.kernel.org/r/20210518125027.21824-1-hdegoede@redhat.com
---
 drivers/platform/x86/dell/dell-smbios-wmi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/dell/dell-smbios-wmi.c b/drivers/platform/x86/dell/dell-smbios-wmi.c
index a1753485159ca..33f8237727335 100644
--- a/drivers/platform/x86/dell/dell-smbios-wmi.c
+++ b/drivers/platform/x86/dell/dell-smbios-wmi.c
@@ -270,7 +270,8 @@ int init_dell_smbios_wmi(void)
 
 void exit_dell_smbios_wmi(void)
 {
-	wmi_driver_unregister(&dell_smbios_wmi_driver);
+	if (wmi_supported)
+		wmi_driver_unregister(&dell_smbios_wmi_driver);
 }
 
 MODULE_DEVICE_TABLE(wmi, dell_smbios_wmi_id_table);
-- 
GitLab


From bc1eca606d8084465e6f89fd646cc71defbad490 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 May 2021 13:15:21 +0300
Subject: [PATCH 1101/3804] platform/x86: intel_punit_ipc: Append
 MODULE_DEVICE_TABLE for ACPI

The intel_punit_ipc driver might be compiled as a module.
When udev handles the event of the devices appearing
the intel_punit_ipc module is missing.

Append MODULE_DEVICE_TABLE for ACPI case to fix the loading issue.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210519101521.79338-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/intel_punit_ipc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c
index 05cced59e251a..f58b8543f6ac5 100644
--- a/drivers/platform/x86/intel_punit_ipc.c
+++ b/drivers/platform/x86/intel_punit_ipc.c
@@ -312,6 +312,7 @@ static const struct acpi_device_id punit_ipc_acpi_ids[] = {
 	{ "INT34D4", 0 },
 	{ }
 };
+MODULE_DEVICE_TABLE(acpi, punit_ipc_acpi_ids);
 
 static struct platform_driver intel_punit_ipc_driver = {
 	.probe = intel_punit_ipc_probe,
-- 
GitLab


From 39a6172ea88b3117353ae16cbb0a53cd80a9340a Mon Sep 17 00:00:00 2001
From: Teava Radu <rateava@gmail.com>
Date: Tue, 4 May 2021 20:57:46 +0200
Subject: [PATCH 1102/3804] platform/x86: touchscreen_dmi: Add info for the
 Mediacom Winpad 7.0 W700 tablet

Add touchscreen info for the Mediacom Winpad 7.0 W700 tablet.
Tested on 5.11 hirsute.
Note: it's hw clone to Wintron surftab 7.

Signed-off-by: Teava Radu <rateava@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210504185746.175461-6-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 90fe4f8f3c2c7..875519c6c2064 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -1096,6 +1096,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_BIOS_VERSION, "jumperx.T87.KFBNEEA"),
 		},
 	},
+	{
+		/* Mediacom WinPad 7.0 W700 (same hw as Wintron surftab 7") */
+		.driver_data = (void *)&trekstor_surftab_wintron70_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "MEDIACOM"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "WinPad 7 W10 - WPW700"),
+		},
+	},
 	{
 		/* Mediacom Flexbook Edge 11 (same hw as TS Primebook C11) */
 		.driver_data = (void *)&trekstor_primebook_c11_data,
-- 
GitLab


From 05ca447630334c323c9e2b788b61133ab75d60d3 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 18 May 2021 10:39:39 +0200
Subject: [PATCH 1103/3804] ALSA: line6: Fix racy initialization of LINE6 MIDI

The initialization of MIDI devices that are found on some LINE6
drivers are currently done in a racy way; namely, the MIDI buffer
instance is allocated and initialized in each private_init callback
while the communication with the interface is already started via
line6_init_cap_control() call before that point.  This may lead to
Oops in line6_data_received() when a spurious event is received, as
reported by syzkaller.

This patch moves the MIDI initialization to line6_init_cap_control()
as well instead of the too-lately-called private_init for avoiding the
race.  Also this reduces slightly more lines, so it's a win-win
change.

Reported-by: syzbot+0d2b3feb0a2887862e06@syzkallerlkml..appspotmail.com
Link: https://lore.kernel.org/r/000000000000a4be9405c28520de@google.com
Link: https://lore.kernel.org/r/20210517132725.GA50495@hyeyoo
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210518083939.1927-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/line6/driver.c | 4 ++++
 sound/usb/line6/pod.c    | 5 -----
 sound/usb/line6/variax.c | 6 ------
 3 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c
index a030dd65eb280..9602929b7de90 100644
--- a/sound/usb/line6/driver.c
+++ b/sound/usb/line6/driver.c
@@ -699,6 +699,10 @@ static int line6_init_cap_control(struct usb_line6 *line6)
 		line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
 		if (!line6->buffer_message)
 			return -ENOMEM;
+
+		ret = line6_init_midi(line6);
+		if (ret < 0)
+			return ret;
 	} else {
 		ret = line6_hwdep_init(line6);
 		if (ret < 0)
diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c
index cd44cb5f1310c..16e644330c4d6 100644
--- a/sound/usb/line6/pod.c
+++ b/sound/usb/line6/pod.c
@@ -376,11 +376,6 @@ static int pod_init(struct usb_line6 *line6,
 	if (err < 0)
 		return err;
 
-	/* initialize MIDI subsystem: */
-	err = line6_init_midi(line6);
-	if (err < 0)
-		return err;
-
 	/* initialize PCM subsystem: */
 	err = line6_init_pcm(line6, &pod_pcm_properties);
 	if (err < 0)
diff --git a/sound/usb/line6/variax.c b/sound/usb/line6/variax.c
index ed158f04de80f..c2245aa93b08f 100644
--- a/sound/usb/line6/variax.c
+++ b/sound/usb/line6/variax.c
@@ -159,7 +159,6 @@ static int variax_init(struct usb_line6 *line6,
 		       const struct usb_device_id *id)
 {
 	struct usb_line6_variax *variax = line6_to_variax(line6);
-	int err;
 
 	line6->process_message = line6_variax_process_message;
 	line6->disconnect = line6_variax_disconnect;
@@ -172,11 +171,6 @@ static int variax_init(struct usb_line6 *line6,
 	if (variax->buffer_activate == NULL)
 		return -ENOMEM;
 
-	/* initialize MIDI subsystem: */
-	err = line6_init_midi(&variax->line6);
-	if (err < 0)
-		return err;
-
 	/* initiate startup procedure: */
 	schedule_delayed_work(&line6->startup_work,
 			      msecs_to_jiffies(VARIAX_STARTUP_DELAY1));
-- 
GitLab


From b250f2f7792d15bcde98e0456781e2835556d5fa Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 19 May 2021 15:52:44 +0200
Subject: [PATCH 1104/3804] x86/sev-es: Don't return NULL from
 sev_es_get_ghcb()

sev_es_get_ghcb() is called from several places but only one of them
checks the return value. The reaction to returning NULL is always the
same: calling panic() and kill the machine.

Instead of adding checks to all call sites, move the panic() into the
function itself so that it will no longer return NULL.

Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210519135251.30093-2-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 4fa111becc93b..82bced88153bb 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -203,8 +203,18 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
 	if (unlikely(data->ghcb_active)) {
 		/* GHCB is already in use - save its contents */
 
-		if (unlikely(data->backup_ghcb_active))
-			return NULL;
+		if (unlikely(data->backup_ghcb_active)) {
+			/*
+			 * Backup-GHCB is also already in use. There is no way
+			 * to continue here so just kill the machine. To make
+			 * panic() work, mark GHCBs inactive so that messages
+			 * can be printed out.
+			 */
+			data->ghcb_active        = false;
+			data->backup_ghcb_active = false;
+
+			panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+		}
 
 		/* Mark backup_ghcb active before writing to it */
 		data->backup_ghcb_active = true;
@@ -1289,7 +1299,6 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
  */
 DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 {
-	struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
 	irqentry_state_t irq_state;
 	struct ghcb_state state;
 	struct es_em_ctxt ctxt;
@@ -1315,16 +1324,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	 */
 
 	ghcb = sev_es_get_ghcb(&state);
-	if (!ghcb) {
-		/*
-		 * Mark GHCBs inactive so that panic() is able to print the
-		 * message.
-		 */
-		data->ghcb_active        = false;
-		data->backup_ghcb_active = false;
-
-		panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
-	}
 
 	vc_ghcb_invalidate(ghcb);
 	result = vc_init_em_ctxt(&ctxt, regs, error_code);
-- 
GitLab


From c0d46717b95735b0eacfddbcca9df37a49de9c7a Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 15 May 2021 09:52:22 -0500
Subject: [PATCH 1105/3804] SMB3: incorrect file id in requests compounded with
 open

See MS-SMB2 3.2.4.1.4, file ids in compounded requests should be set to
0xFFFFFFFFFFFFFFFF (we were treating it as u32 not u64 and setting
it incorrectly).

Signed-off-by: Steve French <stfrench@microsoft.com>
Reported-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/cifs/smb2pdu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a8bf431847730..9f24eb88297a8 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -3900,10 +3900,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len,
 			 * Related requests use info from previous read request
 			 * in chain.
 			 */
-			shdr->SessionId = 0xFFFFFFFF;
+			shdr->SessionId = 0xFFFFFFFFFFFFFFFF;
 			shdr->TreeId = 0xFFFFFFFF;
-			req->PersistentFileId = 0xFFFFFFFF;
-			req->VolatileFileId = 0xFFFFFFFF;
+			req->PersistentFileId = 0xFFFFFFFFFFFFFFFF;
+			req->VolatileFileId = 0xFFFFFFFFFFFFFFFF;
 		}
 	}
 	if (remaining_bytes > io_parms->length)
-- 
GitLab


From c25bbdb564060adaad5c3a8a10765c13487ba6a3 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 19 May 2021 15:52:45 +0200
Subject: [PATCH 1106/3804] x86/sev-es: Forward page-faults which happen during
 emulation

When emulating guest instructions for MMIO or IOIO accesses, the #VC
handler might get a page-fault and will not be able to complete. Forward
the page-fault in this case to the correct handler instead of killing
the machine.

Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210519135251.30093-3-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 82bced88153bb..1f428f401beda 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1270,6 +1270,10 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
 	case X86_TRAP_UD:
 		exc_invalid_op(ctxt->regs);
 		break;
+	case X86_TRAP_PF:
+		write_cr2(ctxt->fi.cr2);
+		exc_page_fault(ctxt->regs, error_code);
+		break;
 	case X86_TRAP_AC:
 		exc_alignment_check(ctxt->regs, error_code);
 		break;
-- 
GitLab


From 293837b9ac8d3021657f44c9d7a14948ec01c5d0 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 19 May 2021 05:55:57 -1000
Subject: [PATCH 1107/3804] Revert "i915: fix remap_io_sg to verify the pgprot"

This reverts commit b12d691ea5e01db42ccf3b4207e57cb3ce7cfe91.

It turns out this is not ready for primetime yet.  The intentions are
good, but using remap_pfn_range() requires that there is nothing already
mapped in the area, and the i915 code seems to very much intentionally
remap the same area multiple times.

That will then just trigger the

                BUG_ON(!pte_none(*pte));

in mm/memory.c: remap_pte_range().

There are also reports of mapping type inconsistencies, resulting in
warnings and in screen corruption.

Link: https://lore.kernel.org/lkml/20210519024322.GA29704@xsang-OptiPlex-9020/
Link: https://lore.kernel.org/lkml/YKUjvoaKKggAmpIR@sf/
Link: https://lore.kernel.org/lkml/b6b61cf0-5874-f4c0-1fcc-4b3848451c31@redhat.com/
Reported-by: kernel test robot <oliver.sang@intel.com>
Reported-by: Kalle Valo <kvalo@codeaurora.org>
Reported-by: Hans de Goede <hdegoede@redhat.com>
Reported-by: Sergei Trofimovich <slyfox@gentoo.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_mm.c | 73 +++++++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index 4c8cd08c672d2..9a777b0ff59b0 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -28,10 +28,46 @@
 
 #include "i915_drv.h"
 
-#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
+struct remap_pfn {
+	struct mm_struct *mm;
+	unsigned long pfn;
+	pgprot_t prot;
+
+	struct sgt_iter sgt;
+	resource_size_t iobase;
+};
 
 #define use_dma(io) ((io) != -1)
 
+static inline unsigned long sgt_pfn(const struct remap_pfn *r)
+{
+	if (use_dma(r->iobase))
+		return (r->sgt.dma + r->sgt.curr + r->iobase) >> PAGE_SHIFT;
+	else
+		return r->sgt.pfn + (r->sgt.curr >> PAGE_SHIFT);
+}
+
+static int remap_sg(pte_t *pte, unsigned long addr, void *data)
+{
+	struct remap_pfn *r = data;
+
+	if (GEM_WARN_ON(!r->sgt.sgp))
+		return -EINVAL;
+
+	/* Special PTE are not associated with any struct page */
+	set_pte_at(r->mm, addr, pte,
+		   pte_mkspecial(pfn_pte(sgt_pfn(r), r->prot)));
+	r->pfn++; /* track insertions in case we need to unwind later */
+
+	r->sgt.curr += PAGE_SIZE;
+	if (r->sgt.curr >= r->sgt.max)
+		r->sgt = __sgt_iter(__sg_next(r->sgt.sgp), use_dma(r->iobase));
+
+	return 0;
+}
+
+#define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
+
 /**
  * remap_io_sg - remap an IO mapping to userspace
  * @vma: user vma to map to
@@ -46,7 +82,12 @@ int remap_io_sg(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long size,
 		struct scatterlist *sgl, resource_size_t iobase)
 {
-	unsigned long pfn, len, remapped = 0;
+	struct remap_pfn r = {
+		.mm = vma->vm_mm,
+		.prot = vma->vm_page_prot,
+		.sgt = __sgt_iter(sgl, use_dma(iobase)),
+		.iobase = iobase,
+	};
 	int err;
 
 	/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
@@ -55,25 +96,11 @@ int remap_io_sg(struct vm_area_struct *vma,
 	if (!use_dma(iobase))
 		flush_cache_range(vma, addr, size);
 
-	do {
-		if (use_dma(iobase)) {
-			if (!sg_dma_len(sgl))
-				break;
-			pfn = (sg_dma_address(sgl) + iobase) >> PAGE_SHIFT;
-			len = sg_dma_len(sgl);
-		} else {
-			pfn = page_to_pfn(sg_page(sgl));
-			len = sgl->length;
-		}
-
-		err = remap_pfn_range(vma, addr + remapped, pfn, len,
-				      vma->vm_page_prot);
-		if (err)
-			break;
-		remapped += len;
-	} while ((sgl = __sg_next(sgl)));
-
-	if (err)
-		zap_vma_ptes(vma, addr, remapped);
-	return err;
+	err = apply_to_page_range(r.mm, addr, size, remap_sg, &r);
+	if (unlikely(err)) {
+		zap_vma_ptes(vma, addr, r.pfn << PAGE_SHIFT);
+		return err;
+	}
+
+	return 0;
 }
-- 
GitLab


From cfa3b797118eda7d68f9ede9b1a0279192aca653 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Wed, 19 May 2021 11:41:32 +0300
Subject: [PATCH 1108/3804] RDMA/mlx5: Fix query DCT via DEVX

When executing DEVX command to query QP object, we need to take the QP
type from the mlx5_ib_qp struct which hold the driver specific QP types as
well, such as DC.

Fixes: 34613eb1d2ad ("IB/mlx5: Enable modify and query verbs objects via DEVX")
Link: https://lore.kernel.org/r/6eee15d63f09bb70787488e0cf96216e2957f5aa.1621413654.git.leonro@nvidia.com
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/devx.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index a0b677accd965..eb9b0a2707f80 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -630,9 +630,8 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
 	case UVERBS_OBJECT_QP:
 	{
 		struct mlx5_ib_qp *qp = to_mqp(uobj->object);
-		enum ib_qp_type	qp_type = qp->ibqp.qp_type;
 
-		if (qp_type == IB_QPT_RAW_PACKET ||
+		if (qp->type == IB_QPT_RAW_PACKET ||
 		    (qp->flags & IB_QP_CREATE_SOURCE_QPN)) {
 			struct mlx5_ib_raw_packet_qp *raw_packet_qp =
 							 &qp->raw_packet_qp;
@@ -649,10 +648,9 @@ static bool devx_is_valid_obj_id(struct uverbs_attr_bundle *attrs,
 					       sq->tisn) == obj_id);
 		}
 
-		if (qp_type == MLX5_IB_QPT_DCT)
+		if (qp->type == MLX5_IB_QPT_DCT)
 			return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
 					      qp->dct.mdct.mqp.qpn) == obj_id;
-
 		return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
 				      qp->ibqp.qp_num) == obj_id;
 	}
-- 
GitLab


From 4954f5b8ef0baf70fe978d1a99a5f70e4dd5c877 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 19 May 2021 15:52:46 +0200
Subject: [PATCH 1109/3804] x86/sev-es: Use __put_user()/__get_user() for data
 accesses

The put_user() and get_user() functions do checks on the address which is
passed to them. They check whether the address is actually a user-space
address and whether its fine to access it. They also call might_fault()
to indicate that they could fault and possibly sleep.

All of these checks are neither wanted nor needed in the #VC exception
handler, which can be invoked from almost any context and also for MMIO
instructions from kernel space on kernel memory. All the #VC handler
wants to know is whether a fault happened when the access was tried.

This is provided by __put_user()/__get_user(), which just do the access
no matter what. Also add comments explaining why __get_user() and
__put_user() are the best choice here and why it is safe to use them
in this context. Also explain why copy_to/from_user can't be used.

In addition, also revert commit

  7024f60d6552 ("x86/sev-es: Handle string port IO to kernel memory properly")

because using __get_user()/__put_user() fixes the same problem while
the above commit introduced several problems:

  1) It uses access_ok() which is only allowed in task context.

  2) It uses memcpy() which has no fault handling at all and is
     thus unsafe to use here.

  [ bp: Fix up commit ID of the reverted commit above. ]

Fixes: f980f9c31a92 ("x86/sev-es: Compile early handler code into kernel image")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org # v5.10+
Link: https://lkml.kernel.org/r/20210519135251.30093-4-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 66 ++++++++++++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 1f428f401beda..651b81cd648e5 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -315,31 +315,44 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
 	u16 d2;
 	u8  d1;
 
-	/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
-	if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
-		memcpy(dst, buf, size);
-		return ES_OK;
-	}
-
+	/*
+	 * This function uses __put_user() independent of whether kernel or user
+	 * memory is accessed. This works fine because __put_user() does no
+	 * sanity checks of the pointer being accessed. All that it does is
+	 * to report when the access failed.
+	 *
+	 * Also, this function runs in atomic context, so __put_user() is not
+	 * allowed to sleep. The page-fault handler detects that it is running
+	 * in atomic context and will not try to take mmap_sem and handle the
+	 * fault, so additional pagefault_enable()/disable() calls are not
+	 * needed.
+	 *
+	 * The access can't be done via copy_to_user() here because
+	 * vc_write_mem() must not use string instructions to access unsafe
+	 * memory. The reason is that MOVS is emulated by the #VC handler by
+	 * splitting the move up into a read and a write and taking a nested #VC
+	 * exception on whatever of them is the MMIO access. Using string
+	 * instructions here would cause infinite nesting.
+	 */
 	switch (size) {
 	case 1:
 		memcpy(&d1, buf, 1);
-		if (put_user(d1, target))
+		if (__put_user(d1, target))
 			goto fault;
 		break;
 	case 2:
 		memcpy(&d2, buf, 2);
-		if (put_user(d2, target))
+		if (__put_user(d2, target))
 			goto fault;
 		break;
 	case 4:
 		memcpy(&d4, buf, 4);
-		if (put_user(d4, target))
+		if (__put_user(d4, target))
 			goto fault;
 		break;
 	case 8:
 		memcpy(&d8, buf, 8);
-		if (put_user(d8, target))
+		if (__put_user(d8, target))
 			goto fault;
 		break;
 	default:
@@ -370,30 +383,43 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
 	u16 d2;
 	u8  d1;
 
-	/* If instruction ran in kernel mode and the I/O buffer is in kernel space */
-	if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
-		memcpy(buf, src, size);
-		return ES_OK;
-	}
-
+	/*
+	 * This function uses __get_user() independent of whether kernel or user
+	 * memory is accessed. This works fine because __get_user() does no
+	 * sanity checks of the pointer being accessed. All that it does is
+	 * to report when the access failed.
+	 *
+	 * Also, this function runs in atomic context, so __get_user() is not
+	 * allowed to sleep. The page-fault handler detects that it is running
+	 * in atomic context and will not try to take mmap_sem and handle the
+	 * fault, so additional pagefault_enable()/disable() calls are not
+	 * needed.
+	 *
+	 * The access can't be done via copy_from_user() here because
+	 * vc_read_mem() must not use string instructions to access unsafe
+	 * memory. The reason is that MOVS is emulated by the #VC handler by
+	 * splitting the move up into a read and a write and taking a nested #VC
+	 * exception on whatever of them is the MMIO access. Using string
+	 * instructions here would cause infinite nesting.
+	 */
 	switch (size) {
 	case 1:
-		if (get_user(d1, s))
+		if (__get_user(d1, s))
 			goto fault;
 		memcpy(buf, &d1, 1);
 		break;
 	case 2:
-		if (get_user(d2, s))
+		if (__get_user(d2, s))
 			goto fault;
 		memcpy(buf, &d2, 2);
 		break;
 	case 4:
-		if (get_user(d4, s))
+		if (__get_user(d4, s))
 			goto fault;
 		memcpy(buf, &d4, 4);
 		break;
 	case 8:
-		if (get_user(d8, s))
+		if (__get_user(d8, s))
 			goto fault;
 		memcpy(buf, &d8, 8);
 		break;
-- 
GitLab


From fb6c79d7261afb7e942251254ea47951c2a9a706 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 10 Feb 2021 17:33:27 +0900
Subject: [PATCH 1110/3804] perf tools: Add 'cgroup-switches' software event

It counts how often cgroups are changed actually during the context
switches.

  # perf stat -a -e context-switches,cgroup-switches -a sleep 1

   Performance counter stats for 'system wide':

              11,267      context-switches
              10,950      cgroup-switches

         1.015634369 seconds time elapsed

Committer notes:

The kernel patches landed in v5.13, but this entry wasn't filled in
perf's parse-events tables, which was leading to a segfault when running
'perf list' on a kernel with that feature, as reported by Thomas
Richter.

Also removed the part touching tools/include/uapi/linux/perf_event.h as
it was updated in the usual sync with the kernel UAPI headers, in a
previous, already upstream, patch.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: http://lore.kernel.org/lkml/20210210083327.22726-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 4 ++++
 tools/perf/util/parse-events.l | 1 +
 2 files changed, 5 insertions(+)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4dad14265b81d..269997066f6e4 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -150,6 +150,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
 		.symbol = "bpf-output",
 		.alias  = "",
 	},
+	[PERF_COUNT_SW_CGROUP_SWITCHES] = {
+		.symbol = "cgroup-switches",
+		.alias  = "",
+	},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index fb8646cc3e834..923849024b15f 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -347,6 +347,7 @@ emulation-faults				{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EM
 dummy						{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
 duration_time					{ return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
 bpf-output					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+cgroup-switches					{ return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }
 
 	/*
 	 * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
-- 
GitLab


From 463a3f66473b58d71428a1c3ce69ea52c05440e5 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 14 May 2021 17:18:10 +0300
Subject: [PATCH 1111/3804] RDMA/uverbs: Fix a NULL vs IS_ERR() bug

The uapi_get_object() function returns error pointers, it never returns
NULL.

Fixes: 149d3845f4a5 ("RDMA/uverbs: Add a method to introspect handles in a context")
Link: https://lore.kernel.org/r/YJ6Got+U7lz+3n9a@mwanda
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/uverbs_std_types_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index a03021d94e110..049684880ae03 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -117,8 +117,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)(
 		return ret;
 
 	uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id);
-	if (!uapi_object)
-		return -EINVAL;
+	if (IS_ERR(uapi_object))
+		return PTR_ERR(uapi_object);
 
 	handles = gather_objects_handle(attrs->ufile, uapi_object, attrs,
 					out_len, &total);
-- 
GitLab


From c71b99640d2d350ee3146452c1057bd59cb2c5e0 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 19 May 2021 10:10:38 +0800
Subject: [PATCH 1112/3804] ethtool: stats: Fix a copy-paste error

data->ctrl_stats should be memset with correct size.

Fixes: bfad2b979ddc ("ethtool: add interface to read standard MAC Ctrl stats")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index b7642dc96d507..ec07f5765e030 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -119,7 +119,7 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
 	 */
 	memset(&data->phy_stats, 0xff, sizeof(data->phy_stats));
 	memset(&data->mac_stats, 0xff, sizeof(data->mac_stats));
-	memset(&data->ctrl_stats, 0xff, sizeof(data->mac_stats));
+	memset(&data->ctrl_stats, 0xff, sizeof(data->ctrl_stats));
 	memset(&data->rmon_stats, 0xff, sizeof(data->rmon_stats));
 
 	if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
-- 
GitLab


From 88c380df84fbd03f9b137c2b9d0a44b9f2f553b0 Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Wed, 19 May 2021 16:48:31 +0530
Subject: [PATCH 1113/3804] cxgb4: avoid accessing registers when clearing
 filters

Hardware register having the server TID base can contain
invalid values when adapter is in bad state (for example,
due to AER fatal error). Reading these invalid values in the
register can lead to out-of-bound memory access. So, fix
by using the saved server TID base when clearing filters.

Fixes: b1a79360ee86 ("cxgb4: Delete all hash and TCAM filters before resource cleanup")
Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index bc581b149b113..22c9ac922ebae 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1042,7 +1042,7 @@ void clear_all_filters(struct adapter *adapter)
 				cxgb4_del_filter(dev, f->tid, &f->fs);
 		}
 
-		sb = t4_read_reg(adapter, LE_DB_SRVR_START_INDEX_A);
+		sb = adapter->tids.stid_base;
 		for (i = 0; i < sb; i++) {
 			f = (struct filter_entry *)adapter->tids.tid_tab[i];
 
-- 
GitLab


From 8852c552402979508fdc395ae07aa8761aa46045 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Tue, 18 May 2021 18:59:15 -0500
Subject: [PATCH 1114/3804] kbuild: Fix objtool dependency for
 'OBJECT_FILES_NON_STANDARD_<obj> := n'

"OBJECT_FILES_NON_STANDARD_vma.o := n" has a dependency bug.  When
objtool source is updated, the affected object doesn't get re-analyzed
by objtool.

Peter's new variable-sized jump label feature relies on objtool
rewriting the object file.  Otherwise the system can fail to boot.  That
effectively upgrades this minor dependency issue to a major bug.

The problem is that variables in prerequisites are expanded early,
during the read-in phase.  The '$(objtool_dep)' variable indirectly uses
'$@', which isn't yet available when the target prerequisites are
evaluated.

Use '.SECONDEXPANSION:' which causes '$(objtool_dep)' to be expanded in
a later phase, after the target-specific '$@' variable has been defined.

Fixes: b9ab5ebb14ec ("objtool: Add CONFIG_STACK_VALIDATION option")
Fixes: ab3257042c26 ("jump_label, x86: Allow short NOPs")
Reported-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
---
 scripts/Makefile.build | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 949f723efe538..34d257653fb47 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -268,7 +268,8 @@ define rule_as_o_S
 endef
 
 # Built-in and composite module parts
-$(obj)/%.o: $(src)/%.c $(recordmcount_source) $(objtool_dep) FORCE
+.SECONDEXPANSION:
+$(obj)/%.o: $(src)/%.c $(recordmcount_source) $$(objtool_dep) FORCE
 	$(call if_changed_rule,cc_o_c)
 	$(call cmd,force_checksrc)
 
@@ -349,7 +350,7 @@ cmd_modversions_S =								\
 	fi
 endif
 
-$(obj)/%.o: $(src)/%.S $(objtool_dep) FORCE
+$(obj)/%.o: $(src)/%.S $$(objtool_dep) FORCE
 	$(call if_changed_rule,as_o_S)
 
 targets += $(filter-out $(subdir-builtin), $(real-obj-y))
-- 
GitLab


From d5b3bd6ab5418e34d85f64fba7c6ca02c3cbfb63 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 19 May 2021 15:02:53 +0200
Subject: [PATCH 1115/3804] dt-bindings: net: renesas,ether: Update Sergei's
 email address

Update Sergei's email address, as per commit 534a8bf0ccdd7b3f
("MAINTAINERS: switch to my private email for Renesas Ethernet
drivers").

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Sergei Shtylyov <sergei.shtylyov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/renesas,ether.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/renesas,ether.yaml b/Documentation/devicetree/bindings/net/renesas,ether.yaml
index 8ce5ed8a58dd7..c101a1ec846ea 100644
--- a/Documentation/devicetree/bindings/net/renesas,ether.yaml
+++ b/Documentation/devicetree/bindings/net/renesas,ether.yaml
@@ -10,7 +10,7 @@ allOf:
   - $ref: ethernet-controller.yaml#
 
 maintainers:
-  - Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
+  - Sergei Shtylyov <sergei.shtylyov@gmail.com>
 
 properties:
   compatible:
-- 
GitLab


From 4d52ebc7ace491d58f96d1f4a1cb9070c506b2e7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 19 May 2021 14:47:17 +0200
Subject: [PATCH 1116/3804] net: hso: bail out on interrupt URB allocation
 failure

Commit 31db0dbd7244 ("net: hso: check for allocation failure in
hso_create_bulk_serial_device()") recently started returning an error
when the driver fails to allocate resources for the interrupt endpoint
and tiocmget functionality.

For consistency let's bail out from probe also if the URB allocation
fails.

Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 260f850d69eb3..b48b2a25210cd 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2635,14 +2635,14 @@ static struct hso_device *hso_create_bulk_serial_device(
 		}
 
 		tiocmget->urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (tiocmget->urb) {
-			mutex_init(&tiocmget->mutex);
-			init_waitqueue_head(&tiocmget->waitq);
-		} else
-			hso_free_tiomget(serial);
-	}
-	else
+		if (!tiocmget->urb)
+			goto exit;
+
+		mutex_init(&tiocmget->mutex);
+		init_waitqueue_head(&tiocmget->waitq);
+	} else {
 		num_urbs = 1;
+	}
 
 	if (hso_serial_common_create(serial, num_urbs, BULK_URB_RX_SIZE,
 				     BULK_URB_TX_SIZE))
-- 
GitLab


From f1069a8756b9e9f6c055e709740d2d66650f0fb0 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 19 May 2021 15:03:08 +0200
Subject: [PATCH 1117/3804] compiler.h: Avoid using inline asm operand
 modifiers

The expansion of annotate_reachable/annotate_unreachable on s390 will
result in a compiler error if the __COUNTER__ value is high enough.
For example with "i" (154) the "%c0" operand of annotate_reachable
will be expanded to -102:

        -102:
        .pushsection .discard.reachable
        .long -102b - .
        .popsection

This is a quirk of the gcc backend for s390, it interprets the %c0
as a signed byte value. Avoid using operand modifiers in this case
by simply converting __COUNTER__ to string, with the same result,
but in an arch assembler independent way.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/patch-1.thread-1a26be.git-930d1b44844a.your-ad-here.call-01621428935-ext-2104@work.hours
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Borislav Petkov <bp@suse.de>
Cc: linux-kernel@vger.kernel.org
---
 include/linux/compiler.h | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index df5b405e63051..77047904cf70f 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -115,18 +115,24 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
  * The __COUNTER__ based labels are a hack to make each instance of the macros
  * unique, to convince GCC not to merge duplicate inline asm statements.
  */
-#define annotate_reachable() ({						\
-	asm volatile("%c0:\n\t"						\
+#define __stringify_label(n) #n
+
+#define __annotate_reachable(c) ({					\
+	asm volatile(__stringify_label(c) ":\n\t"			\
 		     ".pushsection .discard.reachable\n\t"		\
-		     ".long %c0b - .\n\t"				\
-		     ".popsection\n\t" : : "i" (__COUNTER__));		\
+		     ".long " __stringify_label(c) "b - .\n\t"		\
+		     ".popsection\n\t");				\
 })
-#define annotate_unreachable() ({					\
-	asm volatile("%c0:\n\t"						\
+#define annotate_reachable() __annotate_reachable(__COUNTER__)
+
+#define __annotate_unreachable(c) ({					\
+	asm volatile(__stringify_label(c) ":\n\t"			\
 		     ".pushsection .discard.unreachable\n\t"		\
-		     ".long %c0b - .\n\t"				\
-		     ".popsection\n\t" : : "i" (__COUNTER__));		\
+		     ".long " __stringify_label(c) "b - .\n\t"		\
+		     ".popsection\n\t");				\
 })
+#define annotate_unreachable() __annotate_unreachable(__COUNTER__)
+
 #define ASM_UNREACHABLE							\
 	"999:\n\t"							\
 	".pushsection .discard.unreachable\n\t"				\
-- 
GitLab


From c199f64ff93c48a45add92eee4456ffcabfc838e Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 19 May 2021 15:03:13 +0200
Subject: [PATCH 1118/3804] instrumentation.h: Avoid using inline asm operand
 modifiers

The expansion of instrumentation_begin/instrumentation_end on s390 will
result in a compiler error if the __COUNTER__ value is high enough.
For example with "i" (154) the "%c0" operand of annotate_reachable
will be expanded to -102:

        -102:
        .pushsection .discard.instr_begin
        .long -102b - .
        .popsection

This is a quirk of the gcc backend for s390, it interprets the %c0
as a signed byte value. Avoid using operand modifiers in this case
by simply converting __COUNTER__ to string, with the same result,
but in an arch assembler independent way.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lore.kernel.org/r/patch-2.thread-1a26be.git-1a26be80cb18.your-ad-here.call-01621428935-ext-2104@work.hours
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Borislav Petkov <bp@suse.de>
Cc: linux-kernel@vger.kernel.org
---
 include/linux/instrumentation.h | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h
index 93e2ad67fc10b..fa2cd8c63dcc9 100644
--- a/include/linux/instrumentation.h
+++ b/include/linux/instrumentation.h
@@ -4,13 +4,16 @@
 
 #if defined(CONFIG_DEBUG_ENTRY) && defined(CONFIG_STACK_VALIDATION)
 
+#include <linux/stringify.h>
+
 /* Begin/end of an instrumentation safe region */
-#define instrumentation_begin() ({					\
-	asm volatile("%c0: nop\n\t"						\
+#define __instrumentation_begin(c) ({					\
+	asm volatile(__stringify(c) ": nop\n\t"				\
 		     ".pushsection .discard.instr_begin\n\t"		\
-		     ".long %c0b - .\n\t"				\
-		     ".popsection\n\t" : : "i" (__COUNTER__));		\
+		     ".long " __stringify(c) "b - .\n\t"		\
+		     ".popsection\n\t");				\
 })
+#define instrumentation_begin() __instrumentation_begin(__COUNTER__)
 
 /*
  * Because instrumentation_{begin,end}() can nest, objtool validation considers
@@ -43,12 +46,13 @@
  * To avoid this, have _end() be a NOP instruction, this ensures it will be
  * part of the condition block and does not escape.
  */
-#define instrumentation_end() ({					\
-	asm volatile("%c0: nop\n\t"					\
+#define __instrumentation_end(c) ({					\
+	asm volatile(__stringify(c) ": nop\n\t"				\
 		     ".pushsection .discard.instr_end\n\t"		\
-		     ".long %c0b - .\n\t"				\
-		     ".popsection\n\t" : : "i" (__COUNTER__));		\
+		     ".long " __stringify(c) "b - .\n\t"		\
+		     ".popsection\n\t");				\
 })
+#define instrumentation_end() __instrumentation_end(__COUNTER__)
 #else
 # define instrumentation_begin()	do { } while(0)
 # define instrumentation_end()		do { } while(0)
-- 
GitLab


From bfc1f378c8953e68ccdbfe0a8c20748427488b80 Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omp.ru>
Date: Tue, 18 May 2021 23:38:54 +0300
Subject: [PATCH 1119/3804] pata_octeon_cf: avoid WARN_ON() in
 ata_host_activate()

Iff platform_get_irq() fails (or returns IRQ0) and thus the polling mode
has to be used, ata_host_activate() hits the WARN_ON() due to 'irq_handler'
parameter being non-NULL if the polling mode is selected.  Let's only set
the pointer to the driver's IRQ handler if platform_get_irq() returns a
valid IRQ # -- this should avoid the unnecessary WARN_ON()...

Fixes: 43f01da0f279 ("MIPS/OCTEON/ata: Convert pata_octeon_cf.c to use device tree.")
Signed-off-by: Sergey Shtylyov <s.shtylyov@omp.ru>
Link: https://lore.kernel.org/r/3a241167-f84d-1d25-5b9b-be910afbe666@omp.ru
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_octeon_cf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index bd87476ab4813..b5a3f710d76de 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -898,10 +898,11 @@ static int octeon_cf_probe(struct platform_device *pdev)
 					return -EINVAL;
 				}
 
-				irq_handler = octeon_cf_interrupt;
 				i = platform_get_irq(dma_dev, 0);
-				if (i > 0)
+				if (i > 0) {
 					irq = i;
+					irq_handler = octeon_cf_interrupt;
+				}
 			}
 			of_node_put(dma_node);
 		}
-- 
GitLab


From 84c63d040938f64a7dc195696301166e75231bf5 Mon Sep 17 00:00:00 2001
From: Nikola Cornij <nikola.cornij@amd.com>
Date: Thu, 6 May 2021 22:46:52 -0400
Subject: [PATCH 1120/3804] drm/amd/display: Use the correct max downscaling
 value for DCN3.x family

[why]
As per spec, DCN3.x can do 6:1 downscaling and DCN2.x can do 4:1. The
max downscaling limit value for DCN2.x is 250, which means it's
calculated as 1000 / 4 = 250. For DCN3.x this then gives 1000 / 6 = 167.

[how]
Set maximum downscaling limit to 167 for DCN3.x

Signed-off-by: Nikola Cornij <nikola.cornij@amd.com>
Reviewed-by: Charlene Liu <Charlene.Liu@amd.com>
Reviewed-by: Harry Wentland <Harry.Wentland@amd.com>
Acked-by: Stylon Wang <stylon.wang@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c   | 7 ++++---
 drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 7 ++++---
 drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c | 7 ++++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 4a5fa23d8e7b0..5fcc2e64305d5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -826,10 +826,11 @@ static const struct dc_plane_cap plane_cap = {
 			.fp16 = 16000
 	},
 
+	/* 6:1 downscaling ratio: 1000/6 = 166.666 */
 	.max_downscale_factor = {
-			.argb8888 = 600,
-			.nv12 = 600,
-			.fp16 = 600
+			.argb8888 = 167,
+			.nv12 = 167,
+			.fp16 = 167
 	}
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index 5b54b7fc5105d..472696f949ac3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -843,10 +843,11 @@ static const struct dc_plane_cap plane_cap = {
 			.fp16 = 16000
 	},
 
+	/* 6:1 downscaling ratio: 1000/6 = 166.666 */
 	.max_downscale_factor = {
-			.argb8888 = 600,
-			.nv12 = 600,
-			.fp16 = 600
+			.argb8888 = 167,
+			.nv12 = 167,
+			.fp16 = 167 
 	},
 	64,
 	64
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
index fc2dea243d1ba..a33f0365329b9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
@@ -284,10 +284,11 @@ static const struct dc_plane_cap plane_cap = {
 				.nv12 = 16000,
 				.fp16 = 16000
 		},
+		/* 6:1 downscaling ratio: 1000/6 = 166.666 */
 		.max_downscale_factor = {
-				.argb8888 = 600,
-				.nv12 = 600,
-				.fp16 = 600
+				.argb8888 = 167,
+				.nv12 = 167,
+				.fp16 = 167
 		},
 		16,
 		16
-- 
GitLab


From 0c8df343c200529e6b9820bdfed01814140f75e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Wed, 12 May 2021 10:36:43 +0200
Subject: [PATCH 1121/3804] drm/radeon: use the dummy page for GART if needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Imported BOs don't have a pagelist any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Fixes: 0575ff3d33cd ("drm/radeon: stop using pages with drm_prime_sg_to_page_addr_arrays v2")
CC: stable@vger.kernel.org # 5.12
---
 drivers/gpu/drm/radeon/radeon_gart.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 3808a753127bc..04109a2a6fd76 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -301,7 +301,8 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
 
 	for (i = 0; i < pages; i++, p++) {
-		rdev->gart.pages[p] = pagelist[i];
+		rdev->gart.pages[p] = pagelist ? pagelist[i] :
+			rdev->dummy_page.page;
 		page_base = dma_addr[i];
 		for (j = 0; j < (PAGE_SIZE / RADEON_GPU_PAGE_SIZE); j++, t++) {
 			page_entry = radeon_gart_get_page_entry(page_base, flags);
-- 
GitLab


From 22cbdbcfb61acc78d5fc21ebb13ccc0d7e29f793 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 4 May 2021 16:40:00 +0200
Subject: [PATCH 1122/3804] netfilter: conntrack: unregister ipv4 sockopts on
 error unwind

When ipv6 sockopt register fails, the ipv4 one needs to be removed.

Fixes: a0ae2562c6c ("netfilter: conntrack: remove l3proto abstraction")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_proto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 89e5bac384d70..dc9ca12b0489c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -664,7 +664,7 @@ int nf_conntrack_proto_init(void)
 
 #if IS_ENABLED(CONFIG_IPV6)
 cleanup_sockopt:
-	nf_unregister_sockopt(&so_getorigdst6);
+	nf_unregister_sockopt(&so_getorigdst);
 #endif
 	return ret;
 }
-- 
GitLab


From d53751568359e5b3ffb859b13cbd79dc77a571f1 Mon Sep 17 00:00:00 2001
From: Yi Li <liyi@loongson.cn>
Date: Fri, 14 May 2021 14:40:39 +0800
Subject: [PATCH 1123/3804] drm/amdgpu: Fix GPU TLB update error when PAGE_SIZE
 > AMDGPU_PAGE_SIZE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When PAGE_SIZE is larger than AMDGPU_PAGE_SIZE, the number of GPU TLB
entries which need to update in amdgpu_map_buffer() should be multiplied
by AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_PAGE_SIZE).

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Yi Li <liyi@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3bef0432cac2f..a376a993e4741 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -225,7 +225,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
 	*addr += mm_cur->start & ~PAGE_MASK;
 
 	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
-	num_bytes = num_pages * 8;
+	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 
 	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
 				     AMDGPU_IB_POOL_DELAYED, &job);
-- 
GitLab


From dbd1003d1252db5973dddf20b24bb0106ac52aa2 Mon Sep 17 00:00:00 2001
From: Changfeng <Changfeng.Zhu@amd.com>
Date: Fri, 14 May 2021 15:28:25 +0800
Subject: [PATCH 1124/3804] drm/amdgpu: disable 3DCGCG on picasso/raven1 to
 avoid compute hang

There is problem with 3DCGCG firmware and it will cause compute test
hang on picasso/raven1. It needs to disable 3DCGCG in driver to avoid
compute hang.

Signed-off-by: Changfeng <Changfeng.Zhu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++++++---
 drivers/gpu/drm/amd/amdgpu/soc15.c    |  2 --
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a078a38c2ceea..516467e962b72 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4943,7 +4943,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
 	amdgpu_gfx_rlc_enter_safe_mode(adev);
 
 	/* Enable 3D CGCG/CGLS */
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
+	if (enable) {
 		/* write cmd to clear cgcg/cgls ov */
 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
 		/* unset CGCG override */
@@ -4955,8 +4955,12 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
 		/* enable 3Dcgcg FSM(0x0000363f) */
 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
 
-		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
-			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
+			data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
+				RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
+		else
+			data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
+
 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 8e1b9a40839fc..980675052b53d 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1392,7 +1392,6 @@ static int soc15_common_early_init(void *handle)
 			adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
 				AMD_CG_SUPPORT_GFX_MGLS |
 				AMD_CG_SUPPORT_GFX_CP_LS |
-				AMD_CG_SUPPORT_GFX_3D_CGCG |
 				AMD_CG_SUPPORT_GFX_3D_CGLS |
 				AMD_CG_SUPPORT_GFX_CGCG |
 				AMD_CG_SUPPORT_GFX_CGLS |
@@ -1412,7 +1411,6 @@ static int soc15_common_early_init(void *handle)
 				AMD_CG_SUPPORT_GFX_MGLS |
 				AMD_CG_SUPPORT_GFX_RLC_LS |
 				AMD_CG_SUPPORT_GFX_CP_LS |
-				AMD_CG_SUPPORT_GFX_3D_CGCG |
 				AMD_CG_SUPPORT_GFX_3D_CGLS |
 				AMD_CG_SUPPORT_GFX_CGCG |
 				AMD_CG_SUPPORT_GFX_CGLS |
-- 
GitLab


From 080039273b126eeb0185a61c045893a25dbc046e Mon Sep 17 00:00:00 2001
From: Chris Park <Chris.Park@amd.com>
Date: Tue, 4 May 2021 16:20:55 -0400
Subject: [PATCH 1125/3804] drm/amd/display: Disconnect non-DP with no EDID

[Why]
Active DP dongles return no EDID when dongle
is connected, but VGA display is taken out.
Current driver behavior does not remove the
active display when this happens, and this is
a gap between dongle DTP and dongle behavior.

[How]
For active DP dongles and non-DP scenario,
disconnect sink on detection when no EDID
is read due to timeout.

Signed-off-by: Chris Park <Chris.Park@amd.com>
Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Acked-by: Stylon Wang <stylon.wang@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index f4374d83662ae..c1f5474c205a8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1076,6 +1076,24 @@ static bool dc_link_detect_helper(struct dc_link *link,
 			    dc_is_dvi_signal(link->connector_signal)) {
 				if (prev_sink)
 					dc_sink_release(prev_sink);
+				link_disconnect_sink(link);
+
+				return false;
+			}
+			/*
+			 * Abort detection for DP connectors if we have
+			 * no EDID and connector is active converter
+			 * as there are no display downstream
+			 *
+			 */
+			if (dc_is_dp_sst_signal(link->connector_signal) &&
+				(link->dpcd_caps.dongle_type ==
+						DISPLAY_DONGLE_DP_VGA_CONVERTER ||
+				link->dpcd_caps.dongle_type ==
+						DISPLAY_DONGLE_DP_DVI_CONVERTER)) {
+				if (prev_sink)
+					dc_sink_release(prev_sink);
+				link_disconnect_sink(link);
 
 				return false;
 			}
-- 
GitLab


From fa7e6abc75f3d491bc561734312d065dc9dc2a77 Mon Sep 17 00:00:00 2001
From: Jingwen Chen <Jingwen.Chen2@amd.com>
Date: Mon, 17 May 2021 16:16:10 +0800
Subject: [PATCH 1126/3804] drm/amd/amdgpu: fix refcount leak
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[Why]
the gem object rfb->base.obj[0] is get according to num_planes
in amdgpufb_create, but is not put according to num_planes

[How]
put rfb->base.obj[0] in amdgpu_fbdev_destroy according to num_planes

Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 4f10c45298406..09b0486475232 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -288,10 +288,13 @@ out:
 static int amdgpu_fbdev_destroy(struct drm_device *dev, struct amdgpu_fbdev *rfbdev)
 {
 	struct amdgpu_framebuffer *rfb = &rfbdev->rfb;
+	int i;
 
 	drm_fb_helper_unregister_fbi(&rfbdev->helper);
 
 	if (rfb->base.obj[0]) {
+		for (i = 0; i < rfb->base.format->num_planes; i++)
+			drm_gem_object_put(rfb->base.obj[0]);
 		amdgpufb_destroy_pinned_object(rfb->base.obj[0]);
 		rfb->base.obj[0] = NULL;
 		drm_framebuffer_unregister_private(&rfb->base);
-- 
GitLab


From ab95cb3e1bc44d4376bd8d331b1cff82b99020e3 Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Tue, 18 May 2021 08:44:23 -0400
Subject: [PATCH 1127/3804] drm/amdgpu: add video_codecs query support for
 aldebaran

Add video_codecs query support for aldebaran.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 980675052b53d..e65c286f93a6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -302,6 +302,7 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode,
 			*codecs = &rv_video_codecs_decode;
 		return 0;
 	case CHIP_ARCTURUS:
+	case CHIP_ALDEBARAN:
 	case CHIP_RENOIR:
 		if (encode)
 			*codecs = &vega_video_codecs_encode;
-- 
GitLab


From 1e5c37385097c35911b0f8a0c67ffd10ee1af9a2 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Tue, 18 May 2021 10:56:07 +0800
Subject: [PATCH 1128/3804] drm/amdgpu: Fix a use-after-free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

looks like we forget to set ttm->sg to NULL.
Hit panic below

[ 1235.844104] general protection fault, probably for non-canonical address 0x6b6b6b6b6b6b7b4b: 0000 [#1] SMP DEBUG_PAGEALLOC NOPTI
[ 1235.989074] Call Trace:
[ 1235.991751]  sg_free_table+0x17/0x20
[ 1235.995667]  amdgpu_ttm_backend_unbind.cold+0x4d/0xf7 [amdgpu]
[ 1236.002288]  amdgpu_ttm_backend_destroy+0x29/0x130 [amdgpu]
[ 1236.008464]  ttm_tt_destroy+0x1e/0x30 [ttm]
[ 1236.013066]  ttm_bo_cleanup_memtype_use+0x51/0xa0 [ttm]
[ 1236.018783]  ttm_bo_release+0x262/0xa50 [ttm]
[ 1236.023547]  ttm_bo_put+0x82/0xd0 [ttm]
[ 1236.027766]  amdgpu_bo_unref+0x26/0x50 [amdgpu]
[ 1236.032809]  amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x7aa/0xd90 [amdgpu]
[ 1236.040400]  kfd_ioctl_alloc_memory_of_gpu+0xe2/0x330 [amdgpu]
[ 1236.046912]  kfd_ioctl+0x463/0x690 [amdgpu]

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a376a993e4741..d5cbc51c5eaa8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1210,6 +1210,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
 	if (gtt && gtt->userptr) {
 		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
 		kfree(ttm->sg);
+		ttm->sg = NULL;
 		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
 		return;
 	}
-- 
GitLab


From 99c45ba5799d6b938bd9bd20edfeb6f3e3e039b9 Mon Sep 17 00:00:00 2001
From: Guchun Chen <guchun.chen@amd.com>
Date: Mon, 17 May 2021 16:35:40 +0800
Subject: [PATCH 1129/3804] drm/amdgpu: update gc golden setting for Navi12

Current golden setting is out of date.

Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 2408ed4c7d847..7ce76a6b3a350 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1395,9 +1395,10 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1800ff, 0x00000044),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4),
@@ -1415,12 +1416,13 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x001f0000, 0x00070104),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000)
 };
 
 static bool gfx_v10_is_rlcg_rw(struct amdgpu_device *adev, u32 offset, uint32_t *flag, bool write)
-- 
GitLab


From 77194d8642dd4cb7ea8ced77bfaea55610574c38 Mon Sep 17 00:00:00 2001
From: Guchun Chen <guchun.chen@amd.com>
Date: Mon, 17 May 2021 16:38:00 +0800
Subject: [PATCH 1130/3804] drm/amdgpu: update sdma golden setting for Navi12

Current golden setting is out of date.

Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 920fc6d4a1273..8859133ce37eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -123,6 +123,10 @@ static const struct soc15_reg_golden golden_settings_sdma_nv14[] = {
 
 static const struct soc15_reg_golden golden_settings_sdma_nv12[] = {
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG, 0x001877ff, 0x00000044),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x001877ff, 0x00000044),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
 };
 
-- 
GitLab


From 9c2876d56f1ce9b6b2072f1446fb1e8d1532cb3d Mon Sep 17 00:00:00 2001
From: Lang Yu <Lang.Yu@amd.com>
Date: Mon, 17 May 2021 12:47:20 +0800
Subject: [PATCH 1131/3804] drm/amd/amdgpu: fix a potential deadlock in gpu
 reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When amdgpu_ib_ring_tests failed, the reset logic called
amdgpu_device_ip_suspend twice, then deadlock occurred.
Deadlock log:

[  805.655192] amdgpu 0000:04:00.0: amdgpu: ib ring test failed (-110).
[  806.290952] [drm] free PSP TMR buffer

[  806.319406] ============================================
[  806.320315] WARNING: possible recursive locking detected
[  806.321225] 5.11.0-custom #1 Tainted: G        W  OEL
[  806.322135] --------------------------------------------
[  806.323043] cat/2593 is trying to acquire lock:
[  806.323825] ffff888136b1cdc8 (&adev->dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.325668]
               but task is already holding lock:
[  806.326664] ffff888136b1cdc8 (&adev->dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.328430]
               other info that might help us debug this:
[  806.329539]  Possible unsafe locking scenario:

[  806.330549]        CPU0
[  806.330983]        ----
[  806.331416]   lock(&adev->dm.dc_lock);
[  806.332086]   lock(&adev->dm.dc_lock);
[  806.332738]
                *** DEADLOCK ***

[  806.333747]  May be due to missing lock nesting notation

[  806.334899] 3 locks held by cat/2593:
[  806.335537]  #0: ffff888100d3f1b8 (&attr->mutex){+.+.}-{3:3}, at: simple_attr_read+0x4e/0x110
[  806.337009]  #1: ffff888136b1fd78 (&adev->reset_sem){++++}-{3:3}, at: amdgpu_device_lock_adev+0x42/0x94 [amdgpu]
[  806.339018]  #2: ffff888136b1cdc8 (&adev->dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.340869]
               stack backtrace:
[  806.341621] CPU: 6 PID: 2593 Comm: cat Tainted: G        W  OEL    5.11.0-custom #1
[  806.342921] Hardware name: AMD Celadon-CZN/Celadon-CZN, BIOS WLD0C23N_Weekly_20_12_2 12/23/2020
[  806.344413] Call Trace:
[  806.344849]  dump_stack+0x93/0xbd
[  806.345435]  __lock_acquire.cold+0x18a/0x2cf
[  806.346179]  lock_acquire+0xca/0x390
[  806.346807]  ? dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.347813]  __mutex_lock+0x9b/0x930
[  806.348454]  ? dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.349434]  ? amdgpu_device_indirect_rreg+0x58/0x70 [amdgpu]
[  806.350581]  ? _raw_spin_unlock_irqrestore+0x47/0x50
[  806.351437]  ? dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.352437]  ? rcu_read_lock_sched_held+0x4f/0x80
[  806.353252]  ? rcu_read_lock_sched_held+0x4f/0x80
[  806.354064]  mutex_lock_nested+0x1b/0x20
[  806.354747]  ? mutex_lock_nested+0x1b/0x20
[  806.355457]  dm_suspend+0xb8/0x1d0 [amdgpu]
[  806.356427]  ? soc15_common_set_clockgating_state+0x17d/0x19 [amdgpu]
[  806.357736]  amdgpu_device_ip_suspend_phase1+0x78/0xd0 [amdgpu]
[  806.360394]  amdgpu_device_ip_suspend+0x21/0x70 [amdgpu]
[  806.362926]  amdgpu_device_pre_asic_reset+0xb3/0x270 [amdgpu]
[  806.365560]  amdgpu_device_gpu_recover.cold+0x679/0x8eb [amdgpu]

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Acked-by: Christian KÃnig <christian.koenig@amd.com>
Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8b2a37bf2adf1..66ddfe4f58c2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4479,7 +4479,6 @@ out:
 			r = amdgpu_ib_ring_tests(tmp_adev);
 			if (r) {
 				dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
-				r = amdgpu_device_ip_suspend(tmp_adev);
 				need_full_reset = true;
 				r = -EAGAIN;
 				goto end;
-- 
GitLab


From a2b4785f01280a4291edb9fda69032fc2e4bfd3f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Tue, 18 May 2021 17:48:02 +0200
Subject: [PATCH 1132/3804] drm/amdgpu: stop touching sched.ready in the
 backend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This unfortunately comes up in regular intervals and breaks
GPU reset for the engine in question.

The sched.ready flag controls if an engine can't get working
during hw_init, but should never be set to false during hw_fini.

v2: squash in unused variable fix (Alex)

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 2 --
 drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 2 --
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 5 -----
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c  | 8 +-------
 4 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 83531997aeba9..938ef4ce5b760 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -198,8 +198,6 @@ static int jpeg_v2_5_hw_fini(void *handle)
 		if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
 		      RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
 			jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
-
-		ring->sched.ready = false;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index de5dfcfb38591..94be35357f7d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -166,8 +166,6 @@ static int jpeg_v3_0_hw_fini(void *handle)
 	      RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
 		jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
-	ring->sched.ready = false;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index b1ad9e52b2347..240596b25fe4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -497,11 +497,6 @@ static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev)
 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
 		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
 	}
-
-	sdma0->sched.ready = false;
-	sdma1->sched.ready = false;
-	sdma2->sched.ready = false;
-	sdma3->sched.ready = false;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index cf165ab5dd26d..14470da521138 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -373,7 +373,7 @@ static int vcn_v3_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring;
-	int i, j;
+	int i;
 
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
@@ -388,12 +388,6 @@ static int vcn_v3_0_hw_fini(void *handle)
 				vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 			}
 		}
-		ring->sched.ready = false;
-
-		for (j = 0; j < adev->vcn.num_enc_rings; ++j) {
-			ring = &adev->vcn.inst[i].ring_enc[j];
-			ring->sched.ready = false;
-		}
 	}
 
 	return 0;
-- 
GitLab


From d201d7631ca170b038e7f8921120d05eec70d7c5 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Wed, 19 May 2021 08:40:11 +1000
Subject: [PATCH 1133/3804] cifs: fix memory leak in smb2_copychunk_range

When using smb2_copychunk_range() for large ranges we will
run through several iterations of a loop calling SMB2_ioctl()
but never actually free the returned buffer except for the final
iteration.
This leads to memory leaks everytime a large copychunk is requested.

Fixes: 9bf0c9cd4314 ("CIFS: Fix SMB2/SMB3 Copy offload support (refcopy) for large files")
Cc: <stable@vger.kernel.org>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2ops.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index dd0eb665b680c..c693624a7267b 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1861,6 +1861,8 @@ smb2_copychunk_range(const unsigned int xid,
 			cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk));
 
 		/* Request server copy to target from src identified by key */
+		kfree(retbuf);
+		retbuf = NULL;
 		rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
 			trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE,
 			true /* is_fsctl */, (char *)pcchunk,
-- 
GitLab


From e83aa3528a38bddae182a35d0efb5a6c35143c1c Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 19 May 2021 18:47:07 +0800
Subject: [PATCH 1134/3804] cifs: Fix inconsistent indenting

Eliminate the follow smatch warning:

fs/cifs/fs_context.c:1148 smb3_fs_context_parse_param() warn:
inconsistent indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/fs_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 5d21cd905315d..92d4ab029c917 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -1145,7 +1145,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 		/* if iocharset not set then load_nls_default
 		 * is used by caller
 		 */
-		 cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
+		cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
 		break;
 	case Opt_netbiosname:
 		memset(ctx->source_rfc1001_name, 0x20,
-- 
GitLab


From 860b69a9d77160d21ca00357fd6c5217f9d41fb1 Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Wed, 5 May 2021 10:56:47 +0000
Subject: [PATCH 1135/3804] Fix kernel oops when CONFIG_DEBUG_ATOMIC_SLEEP is
 enabled.

Removed oplock_break_received flag which was added to achieve
synchronization between oplock handler and open handler by earlier commit.

It is not needed because there is an existing lock open_file_lock to achieve
the same. find_readable_file takes open_file_lock and then traverses the
openFileList. Similarly, cifs_oplock_break while closing the deferred
handle (i.e cifsFileInfo_put) takes open_file_lock and then sends close
to the server.

Added comments for better readability.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.c   |  2 +-
 fs/cifs/cifsglob.h |  3 +--
 fs/cifs/file.c     | 27 ++++++++++++---------------
 fs/cifs/misc.c     |  9 +++++++++
 4 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index d7ea9c5fe0f84..2ffcb29d5c8f4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -133,7 +133,7 @@ struct workqueue_struct	*cifsiod_wq;
 struct workqueue_struct	*decrypt_wq;
 struct workqueue_struct	*fileinfo_put_wq;
 struct workqueue_struct	*cifsoplockd_wq;
-struct workqueue_struct *deferredclose_wq;
+struct workqueue_struct	*deferredclose_wq;
 __u32 cifs_lock_secret;
 
 /*
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d88b4b523dcc4..ea90c53386b81 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1257,8 +1257,7 @@ struct cifsFileInfo {
 	struct work_struct oplock_break; /* work for oplock breaks */
 	struct work_struct put; /* work for the final part of _put */
 	struct delayed_work deferred;
-	bool oplock_break_received; /* Flag to indicate oplock break */
-	bool deferred_scheduled;
+	bool deferred_close_scheduled; /* Flag to indicate close is scheduled */
 };
 
 struct cifs_io_parms {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6caad100c3f36..304d9d3783c66 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -323,8 +323,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 	cfile->dentry = dget(dentry);
 	cfile->f_flags = file->f_flags;
 	cfile->invalidHandle = false;
-	cfile->oplock_break_received = false;
-	cfile->deferred_scheduled = false;
+	cfile->deferred_close_scheduled = false;
 	cfile->tlink = cifs_get_tlink(tlink);
 	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
 	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
@@ -574,21 +573,18 @@ int cifs_open(struct inode *inode, struct file *file)
 			file->f_op = &cifs_file_direct_ops;
 	}
 
-	spin_lock(&CIFS_I(inode)->deferred_lock);
 	/* Get the cached handle as SMB2 close is deferred */
 	rc = cifs_get_readable_path(tcon, full_path, &cfile);
 	if (rc == 0) {
 		if (file->f_flags == cfile->f_flags) {
 			file->private_data = cfile;
+			spin_lock(&CIFS_I(inode)->deferred_lock);
 			cifs_del_deferred_close(cfile);
 			spin_unlock(&CIFS_I(inode)->deferred_lock);
 			goto out;
 		} else {
-			spin_unlock(&CIFS_I(inode)->deferred_lock);
 			_cifsFileInfo_put(cfile, true, false);
 		}
-	} else {
-		spin_unlock(&CIFS_I(inode)->deferred_lock);
 	}
 
 	if (server->oplocks)
@@ -878,12 +874,12 @@ void smb2_deferred_work_close(struct work_struct *work)
 			struct cifsFileInfo, deferred.work);
 
 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
-	if (!cfile->deferred_scheduled) {
+	if (!cfile->deferred_close_scheduled) {
 		spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 		return;
 	}
 	cifs_del_deferred_close(cfile);
-	cfile->deferred_scheduled = false;
+	cfile->deferred_close_scheduled = false;
 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
 	_cifsFileInfo_put(cfile, true, false);
 }
@@ -905,14 +901,15 @@ int cifs_close(struct inode *inode, struct file *file)
 				inode->i_ctime = inode->i_mtime = current_time(inode);
 			spin_lock(&cinode->deferred_lock);
 			cifs_add_deferred_close(cfile, dclose);
-			if (cfile->deferred_scheduled) {
+			if (cfile->deferred_close_scheduled &&
+			    delayed_work_pending(&cfile->deferred)) {
 				mod_delayed_work(deferredclose_wq,
 						&cfile->deferred, cifs_sb->ctx->acregmax);
 			} else {
 				/* Deferred close for files */
 				queue_delayed_work(deferredclose_wq,
 						&cfile->deferred, cifs_sb->ctx->acregmax);
-				cfile->deferred_scheduled = true;
+				cfile->deferred_close_scheduled = true;
 				spin_unlock(&cinode->deferred_lock);
 				return 0;
 			}
@@ -2020,8 +2017,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
 			continue;
 		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
-			if ((!open_file->invalidHandle) &&
-				(!open_file->oplock_break_received)) {
+			if ((!open_file->invalidHandle)) {
 				/* found a good file */
 				/* lock it so it will not be closed on us */
 				cifsFileInfo_get(open_file);
@@ -4874,13 +4870,14 @@ oplock_break_ack:
 	}
 	/*
 	 * When oplock break is received and there are no active
-	 * file handles but cached, then set the flag oplock_break_received.
+	 * file handles but cached, then schedule deferred close immediately.
 	 * So, new open will not use cached handle.
 	 */
 	spin_lock(&CIFS_I(inode)->deferred_lock);
 	is_deferred = cifs_is_deferred_close(cfile, &dclose);
-	if (is_deferred && cfile->deferred_scheduled) {
-		cfile->oplock_break_received = true;
+	if (is_deferred &&
+	    cfile->deferred_close_scheduled &&
+	    delayed_work_pending(&cfile->deferred)) {
 		mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
 	}
 	spin_unlock(&CIFS_I(inode)->deferred_lock);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 801a5300f765d..34f2a7e80c581 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -672,6 +672,9 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 	spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
 
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
 bool
 cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
 {
@@ -688,6 +691,9 @@ cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **
 	return false;
 }
 
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
 void
 cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *dclose)
 {
@@ -707,6 +713,9 @@ cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *
 	list_add_tail(&dclose->dlist, &CIFS_I(d_inode(cfile->dentry))->deferred_closes);
 }
 
+/*
+ * Critical section which runs after acquiring deferred_lock.
+ */
 void
 cifs_del_deferred_close(struct cifsFileInfo *cfile)
 {
-- 
GitLab


From 0ab95c2510b641fb860a773b3d242ef9768a8f66 Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Mon, 17 May 2021 11:28:34 +0000
Subject: [PATCH 1136/3804] Defer close only when lease is enabled.

When smb2 lease parameter is disabled on server. Server grants
batch oplock instead of RHW lease by default on open, inode page cache
needs to be zapped immediatley upon close as cache is not valid.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsglob.h | 1 +
 fs/cifs/file.c     | 1 +
 fs/cifs/smb2ops.c  | 2 ++
 3 files changed, 4 insertions(+)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ea90c53386b81..8488d70244620 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1417,6 +1417,7 @@ struct cifsInodeInfo {
 	struct inode vfs_inode;
 	struct list_head deferred_closes; /* list of deferred closes */
 	spinlock_t deferred_lock; /* protection on deferred list */
+	bool lease_granted; /* Flag to indicate whether lease or oplock is granted. */
 };
 
 static inline struct cifsInodeInfo *
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 304d9d3783c66..a1abd3da1d442 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -896,6 +896,7 @@ int cifs_close(struct inode *inode, struct file *file)
 		file->private_data = NULL;
 		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
 		if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
+		    cinode->lease_granted &&
 		    dclose) {
 			if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
 				inode->i_ctime = inode->i_mtime = current_time(inode);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index c693624a7267b..21ef51d338e0c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3983,6 +3983,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
 		      unsigned int epoch, bool *purge_cache)
 {
 	oplock &= 0xFF;
+	cinode->lease_granted = false;
 	if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
 		return;
 	if (oplock == SMB2_OPLOCK_LEVEL_BATCH) {
@@ -4009,6 +4010,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
 	unsigned int new_oplock = 0;
 
 	oplock &= 0xFF;
+	cinode->lease_granted = true;
 	if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
 		return;
 
-- 
GitLab


From 18b380ed61f892ed06838d1f1a5124d966292ed3 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 14 May 2021 14:48:43 +0800
Subject: [PATCH 1137/3804] PM / devfreq: Add missing error code in
 devfreq_add_device()

Set err code in the error path before jumping to the end of the function.

Fixes: 4dc3bab8687f ("PM / devfreq: Add support delayed timer for polling mode")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/devfreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index fe08c46642f7c..28f3e0ba6cdd9 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -823,6 +823,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	if (devfreq->profile->timer < 0
 		|| devfreq->profile->timer >= DEVFREQ_TIMER_NUM) {
 		mutex_unlock(&devfreq->lock);
+		err = -EINVAL;
 		goto err_dev;
 	}
 
-- 
GitLab


From ac9fd3c8034011cc10a4c161b70a5837d95203f6 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 18 May 2021 12:49:10 +0800
Subject: [PATCH 1138/3804] opp: use list_del_init instead of
 list_del/INIT_LIST_HEAD

Using list_del_init() instead of list_del() + INIT_LIST_HEAD()
to simpify the code.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index c582a9ca397bb..aa75a1caf08a3 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -433,8 +433,7 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
 
 		/* All required opp-tables found, remove from lazy list */
 		if (!lazy) {
-			list_del(&opp_table->lazy);
-			INIT_LIST_HEAD(&opp_table->lazy);
+			list_del_init(&opp_table->lazy);
 
 			list_for_each_entry(opp, &opp_table->opp_list, node)
 				_required_opps_available(opp, opp_table->required_opp_count);
-- 
GitLab


From e2f5efd0f0e229bd110eab513e7c0331d61a4649 Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Thu, 20 May 2021 13:29:19 +1000
Subject: [PATCH 1139/3804] powerpc: Fix early setup to make early_ioremap()
 work

The immediate problem is that after commit
0bd3f9e953bd ("powerpc/legacy_serial: Use early_ioremap()") the kernel
silently reboots on some systems.

The reason is that early_ioremap() returns broken addresses as it uses
slot_virt[] array which initialized with offsets from FIXADDR_TOP ==
IOREMAP_END+FIXADDR_SIZE == KERN_IO_END - FIXADDR_SIZ + FIXADDR_SIZE ==
__kernel_io_end which is 0 when early_ioremap_setup() is called.
__kernel_io_end is initialized little bit later in early_init_mmu().

This fixes the initialization by swapping early_ioremap_setup() and
early_init_mmu().

Fixes: 265c3491c4bc ("powerpc: Add support for GENERIC_EARLY_IOREMAP")
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Drop unrelated cleanup & cleanup change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210520032919.358935-1-aik@ozlabs.ru
---
 arch/powerpc/kernel/setup_64.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index b779d25761cf9..e42b85e4f1aa5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr)
 	apply_feature_fixups();
 	setup_feature_keys();
 
-	early_ioremap_setup();
-
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
 
+	early_ioremap_setup();
+
 	/*
 	 * After firmware and early platform setup code has set things up,
 	 * we note the SPR values for configurable control/performance
-- 
GitLab


From b106776080a1cf953a1b2fd50cb2a995db4732be Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Tue, 27 Apr 2021 15:48:29 +0300
Subject: [PATCH 1140/3804] thunderbolt: dma_port: Fix NVM read buffer bounds
 and offset issue

Up to 64 bytes of data can be read from NVM in one go. Read address
must be dword aligned. Data is read into a local buffer.

If caller asks to read data starting at an unaligned address then full
dword is anyway read from NVM into a local buffer. Data is then copied
from the local buffer starting at the unaligned offset to the caller
buffer.

In cases where asked data length + unaligned offset is over 64 bytes
we need to make sure we don't read past the 64 bytes in the local
buffer when copying to caller buffer, and make sure that we don't
skip copying unaligned offset bytes from local buffer anymore after
the first round of 64 byte NVM data read.

Fixes: 3e13676862f9 ("thunderbolt: Add support for DMA configuration based mailbox")
Cc: stable@vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/thunderbolt/dma_port.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/thunderbolt/dma_port.c b/drivers/thunderbolt/dma_port.c
index 7288aaf01ae6a..5631319f7b205 100644
--- a/drivers/thunderbolt/dma_port.c
+++ b/drivers/thunderbolt/dma_port.c
@@ -366,15 +366,15 @@ int dma_port_flash_read(struct tb_dma_port *dma, unsigned int address,
 			void *buf, size_t size)
 {
 	unsigned int retries = DMA_PORT_RETRIES;
-	unsigned int offset;
-
-	offset = address & 3;
-	address = address & ~3;
 
 	do {
-		u32 nbytes = min_t(u32, size, MAIL_DATA_DWORDS * 4);
+		unsigned int offset;
+		size_t nbytes;
 		int ret;
 
+		offset = address & 3;
+		nbytes = min_t(size_t, size + offset, MAIL_DATA_DWORDS * 4);
+
 		ret = dma_port_flash_read_block(dma, address, dma->buf,
 						ALIGN(nbytes, 4));
 		if (ret) {
@@ -386,6 +386,7 @@ int dma_port_flash_read(struct tb_dma_port *dma, unsigned int address,
 			return ret;
 		}
 
+		nbytes -= offset;
 		memcpy(buf, dma->buf + offset, nbytes);
 
 		size -= nbytes;
-- 
GitLab


From 22c7a18ed5f007faccb7527bc890463763214081 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 25 Mar 2021 10:32:50 +0200
Subject: [PATCH 1141/3804] thunderbolt: usb4: Fix NVM read buffer bounds and
 offset issue

Up to 64 bytes of data can be read from NVM in one go.
Read address must be dword aligned. Data is read into a local buffer.

If caller asks to read data starting at an unaligned address then full
dword is anyway read from NVM into a local buffer. Data is then copied
from the local buffer starting at the unaligned offset to the caller
buffer.

In cases where asked data length + unaligned offset is over 64 bytes
we need to make sure we don't read past the 64 bytes in the local
buffer when copying to caller buffer, and make sure that we don't
skip copying unaligned offset bytes from local buffer anymore after
the first round of 64 byte NVM data read.

Fixes: b04079837b20 ("thunderbolt: Add initial support for USB4")
Cc: stable@vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/thunderbolt/usb4.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c
index 680bc738dd66d..671d72af8ba13 100644
--- a/drivers/thunderbolt/usb4.c
+++ b/drivers/thunderbolt/usb4.c
@@ -68,15 +68,15 @@ static int usb4_do_read_data(u16 address, void *buf, size_t size,
 	unsigned int retries = USB4_DATA_RETRIES;
 	unsigned int offset;
 
-	offset = address & 3;
-	address = address & ~3;
-
 	do {
-		size_t nbytes = min_t(size_t, size, USB4_DATA_DWORDS * 4);
 		unsigned int dwaddress, dwords;
 		u8 data[USB4_DATA_DWORDS * 4];
+		size_t nbytes;
 		int ret;
 
+		offset = address & 3;
+		nbytes = min_t(size_t, size + offset, USB4_DATA_DWORDS * 4);
+
 		dwaddress = address / 4;
 		dwords = ALIGN(nbytes, 4) / 4;
 
@@ -87,6 +87,7 @@ static int usb4_do_read_data(u16 address, void *buf, size_t size,
 			return ret;
 		}
 
+		nbytes -= offset;
 		memcpy(buf, data + offset, nbytes);
 
 		size -= nbytes;
-- 
GitLab


From 43a511c44e58e357a687d61a20cf5ef1dc9e5a7c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 10 May 2021 12:25:59 +0200
Subject: [PATCH 1142/3804] gfs2: Prevent direct-I/O write fallback errors from
 getting lost

When a direct I/O write falls entirely and falls back to buffered I/O and the
buffered I/O fails, the write failed with return value 0 instead of the error
number reported by the buffered I/O. Fix that.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index a0b542d84cd9e..493a83e3f5906 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -911,8 +911,11 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		current->backing_dev_info = inode_to_bdi(inode);
 		buffered = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
 		current->backing_dev_info = NULL;
-		if (unlikely(buffered <= 0))
+		if (unlikely(buffered <= 0)) {
+			if (!ret)
+				ret = buffered;
 			goto out_unlock;
+		}
 
 		/*
 		 * We need to ensure that the page cache pages are written to
-- 
GitLab


From 4194dec4b4169e5a9a5171db60c2ec00c4d8cf16 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 19 May 2021 14:45:56 -0400
Subject: [PATCH 1143/3804] gfs2: Fix I_NEW check in gfs2_dinode_in

Patch 4a378d8a0d96 added a new check for I_NEW inodes, but unfortunately
it used the wrong variable, i_flags. This caused GFS2 to withdraw when
gfs2_lookup_by_inum needed to refresh an I_NEW inode. This patch switches
to use the correct variable, i_state.

Fixes: 4a378d8a0d96 ("gfs2: be careful with inode refresh")
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 454095e9fedfd..54d3fbeb3002f 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -396,7 +396,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	struct timespec64 atime;
 	u16 height, depth;
 	umode_t mode = be32_to_cpu(str->di_mode);
-	bool is_new = ip->i_inode.i_flags & I_NEW;
+	bool is_new = ip->i_inode.i_state & I_NEW;
 
 	if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
 		goto corrupt;
-- 
GitLab


From 20265d9a67e40eafd39a8884658ca2e36f05985d Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 18 May 2021 09:12:10 -0400
Subject: [PATCH 1144/3804] gfs2: fix scheduling while atomic bug in glocks

Before this patch, in the unlikely event that gfs2_glock_dq encountered
a withdraw, it would do a wait_on_bit to wait for its journal to be
recovered, but it never released the glock's spin_lock, which caused a
scheduling-while-atomic error.

This patch unlocks the lockref spin_lock before waiting for recovery.

Fixes: 601ef0d52e96 ("gfs2: Force withdraw to replay journals and wait for it to finish")
Cc: stable@vger.kernel.org # v5.7+
Reported-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ea7fc5c641c7e..8c547db210fbc 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1466,9 +1466,11 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 	    glock_blocked_by_withdraw(gl) &&
 	    gh->gh_gl != sdp->sd_jinode_gl) {
 		sdp->sd_glock_dqs_held++;
+		spin_unlock(&gl->gl_lockref.lock);
 		might_sleep();
 		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
 			    TASK_UNINTERRUPTIBLE);
+		spin_lock(&gl->gl_lockref.lock);
 	}
 	if (gh->gh_flags & GL_NOCACHE)
 		handle_callback(gl, LM_ST_UNLOCKED, 0, false);
-- 
GitLab


From 865cc3e9cc0b1d4b81c10d53174bced76decf888 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Tue, 18 May 2021 09:14:31 -0400
Subject: [PATCH 1145/3804] gfs2: fix a deadlock on withdraw-during-mount

Before this patch, gfs2 would deadlock because of the following
sequence during mount:

mount
   gfs2_fill_super
      gfs2_make_fs_rw <--- Detects IO error with glock
         kthread_stop(sdp->sd_quotad_process);
            <--- Blocked waiting for quotad to finish

logd
   Detects IO error and the need to withdraw
   calls gfs2_withdraw
      gfs2_make_fs_ro
         kthread_stop(sdp->sd_quotad_process);
            <--- Blocked waiting for quotad to finish

gfs2_quotad
   gfs2_statfs_sync
      gfs2_glock_wait <---- Blocked waiting for statfs glock to be granted

glock_work_func
   do_xmote <---Detects IO error, can't release glock: blocked on withdraw
      glops->go_inval
      glock_blocked_by_withdraw
         requeue glock work & exit <--- work requeued, blocked by withdraw

This patch makes a special exception for the statfs system inode glock,
which allows the statfs glock UNLOCK to proceed normally. That allows the
quotad daemon to exit during the withdraw, which allows the logd daemon
to exit during the withdraw, which allows the mount to exit.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 8c547db210fbc..797949e784ccd 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -582,6 +582,16 @@ out_locked:
 	spin_unlock(&gl->gl_lockref.lock);
 }
 
+static bool is_system_glock(struct gfs2_glock *gl)
+{
+	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+
+	if (gl == m_ip->i_gl)
+		return true;
+	return false;
+}
+
 /**
  * do_xmote - Calls the DLM to change the state of a lock
  * @gl: The lock state
@@ -671,17 +681,25 @@ skip_inval:
 	 * to see sd_log_error and withdraw, and in the meantime, requeue the
 	 * work for later.
 	 *
+	 * We make a special exception for some system glocks, such as the
+	 * system statfs inode glock, which needs to be granted before the
+	 * gfs2_quotad daemon can exit, and that exit needs to finish before
+	 * we can unmount the withdrawn file system.
+	 *
 	 * However, if we're just unlocking the lock (say, for unmount, when
 	 * gfs2_gl_hash_clear calls clear_glock) and recovery is complete
 	 * then it's okay to tell dlm to unlock it.
 	 */
 	if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp)))
 		gfs2_withdraw_delayed(sdp);
-	if (glock_blocked_by_withdraw(gl)) {
-		if (target != LM_ST_UNLOCKED ||
-		    test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags)) {
+	if (glock_blocked_by_withdraw(gl) &&
+	    (target != LM_ST_UNLOCKED ||
+	     test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) {
+		if (!is_system_glock(gl)) {
 			gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
 			goto out;
+		} else {
+			clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
 		}
 	}
 
-- 
GitLab


From f5456b5d67cf812fd31fe3e130ca216b2e0908e5 Mon Sep 17 00:00:00 2001
From: Bob Peterson <rpeterso@redhat.com>
Date: Wed, 19 May 2021 14:54:02 -0400
Subject: [PATCH 1146/3804] gfs2: Clean up revokes on normal withdraws

Before this patch, the system ail lists were cleaned up if the logd
process withdrew, but on other withdraws, they were not cleaned up.
This included the cleaning up of the revokes as well.

This patch reorganizes things a bit so that all withdraws (not just logd)
clean up the ail lists, including any pending revokes.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/log.c  | 6 +++---
 fs/gfs2/log.h  | 1 +
 fs/gfs2/lops.c | 7 ++++++-
 fs/gfs2/lops.h | 1 +
 fs/gfs2/util.c | 1 +
 5 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 97d54e581a7bd..42c15cfc08219 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -926,10 +926,10 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 }
 
 /**
- * ail_drain - drain the ail lists after a withdraw
+ * gfs2_ail_drain - drain the ail lists after a withdraw
  * @sdp: Pointer to GFS2 superblock
  */
-static void ail_drain(struct gfs2_sbd *sdp)
+void gfs2_ail_drain(struct gfs2_sbd *sdp)
 {
 	struct gfs2_trans *tr;
 
@@ -956,6 +956,7 @@ static void ail_drain(struct gfs2_sbd *sdp)
 		list_del(&tr->tr_list);
 		gfs2_trans_free(sdp, tr);
 	}
+	gfs2_drain_revokes(sdp);
 	spin_unlock(&sdp->sd_ail_lock);
 }
 
@@ -1162,7 +1163,6 @@ out_withdraw:
 	if (tr && list_empty(&tr->tr_list))
 		list_add(&tr->tr_list, &sdp->sd_ail1_list);
 	spin_unlock(&sdp->sd_ail_lock);
-	ail_drain(sdp); /* frees all transactions */
 	tr = NULL;
 	goto out_end;
 }
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index eea58015710e7..fc905c2af53ce 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -93,5 +93,6 @@ extern int gfs2_logd(void *data);
 extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
 extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
 extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
+extern void gfs2_ail_drain(struct gfs2_sbd *sdp);
 
 #endif /* __LOG_DOT_H__ */
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 221e7118cc3b9..8ee05d25dfa6c 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -885,7 +885,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	gfs2_log_write_page(sdp, page);
 }
 
-static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+void gfs2_drain_revokes(struct gfs2_sbd *sdp)
 {
 	struct list_head *head = &sdp->sd_log_revokes;
 	struct gfs2_bufdata *bd;
@@ -900,6 +900,11 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 	}
 }
 
+static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+{
+	gfs2_drain_revokes(sdp);
+}
+
 static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
 				  struct gfs2_log_header_host *head, int pass)
 {
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 31b6dd0d2e5d8..f707601597dcc 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -20,6 +20,7 @@ extern void gfs2_log_submit_bio(struct bio **biop, int opf);
 extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
 extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
 			   struct gfs2_log_header_host *head, bool keep_cache);
+extern void gfs2_drain_revokes(struct gfs2_sbd *sdp);
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
 {
 	return sdp->sd_ldptrs;
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 3e08027a6c81b..f4325b44956dc 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -131,6 +131,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
 	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
 		return;
 
+	gfs2_ail_drain(sdp); /* frees all transactions */
 	inode = sdp->sd_jdesc->jd_inode;
 	ip = GFS2_I(inode);
 	i_gl = ip->i_gl;
-- 
GitLab


From 7e008b02557ccece4d2c31fb0eaf6243cbc87121 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 17 May 2021 13:20:17 +0200
Subject: [PATCH 1147/3804] dma-buf: fix unintended pin/unpin warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DMA-buf internal users call the pin/unpin functions without having a
dynamic attachment. Avoid the warning and backtrace in the logs.

Signed-off-by: Christian König <christian.koenig@amd.com>
Bugs: https://gitlab.freedesktop.org/drm/intel/-/issues/3481
Fixes: c545781e1c55 ("dma-buf: doc polish for pin/unpin")
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
CC: stable@kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20210517115705.2141-1-christian.koenig@amd.com
---
 drivers/dma-buf/dma-buf.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index f264b70c383eb..eadd1eaa2fb54 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -760,7 +760,7 @@ dma_buf_dynamic_attach(struct dma_buf *dmabuf, struct device *dev,
 
 		if (dma_buf_is_dynamic(attach->dmabuf)) {
 			dma_resv_lock(attach->dmabuf->resv, NULL);
-			ret = dma_buf_pin(attach);
+			ret = dmabuf->ops->pin(attach);
 			if (ret)
 				goto err_unlock;
 		}
@@ -786,7 +786,7 @@ err_attach:
 
 err_unpin:
 	if (dma_buf_is_dynamic(attach->dmabuf))
-		dma_buf_unpin(attach);
+		dmabuf->ops->unpin(attach);
 
 err_unlock:
 	if (dma_buf_is_dynamic(attach->dmabuf))
@@ -843,7 +843,7 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 		__unmap_dma_buf(attach, attach->sgt, attach->dir);
 
 		if (dma_buf_is_dynamic(attach->dmabuf)) {
-			dma_buf_unpin(attach);
+			dmabuf->ops->unpin(attach);
 			dma_resv_unlock(attach->dmabuf->resv);
 		}
 	}
@@ -956,7 +956,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 	if (dma_buf_is_dynamic(attach->dmabuf)) {
 		dma_resv_assert_held(attach->dmabuf->resv);
 		if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
-			r = dma_buf_pin(attach);
+			r = attach->dmabuf->ops->pin(attach);
 			if (r)
 				return ERR_PTR(r);
 		}
@@ -968,7 +968,7 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 
 	if (IS_ERR(sg_table) && dma_buf_is_dynamic(attach->dmabuf) &&
 	     !IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY))
-		dma_buf_unpin(attach);
+		attach->dmabuf->ops->unpin(attach);
 
 	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
 		attach->sgt = sg_table;
-- 
GitLab


From e68671e9e1275dfdda333c3e83b6d28963af16b6 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 20 May 2021 11:32:28 +0200
Subject: [PATCH 1148/3804] platform/x86: touchscreen_dmi: Add info for the
 Chuwi Hi10 Pro (CWI529) tablet

Add touchscreen info for the Chuwi Hi10 Pro (CWI529) tablet. This includes
info for getting the firmware directly from the UEFI, so that the user does
not need to manually install the firmware in /lib/firmware/silead.

This change will make the touchscreen on these devices work OOTB,
without requiring any manual setup.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20210520093228.7439-1-hdegoede@redhat.com
---
 drivers/platform/x86/touchscreen_dmi.c | 35 ++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 875519c6c2064..bde740d6120e1 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -115,6 +115,32 @@ static const struct ts_dmi_data chuwi_hi10_plus_data = {
 	.properties     = chuwi_hi10_plus_props,
 };
 
+static const struct property_entry chuwi_hi10_pro_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-min-x", 8),
+	PROPERTY_ENTRY_U32("touchscreen-min-y", 8),
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1912),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 1272),
+	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hi10-pro.fw"),
+	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+	PROPERTY_ENTRY_BOOL("silead,home-button"),
+	{ }
+};
+
+static const struct ts_dmi_data chuwi_hi10_pro_data = {
+	.embedded_fw = {
+		.name	= "silead/gsl1680-chuwi-hi10-pro.fw",
+		.prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 },
+		.length	= 42504,
+		.sha256	= { 0xdb, 0x92, 0x68, 0xa8, 0xdb, 0x81, 0x31, 0x00,
+			    0x1f, 0x58, 0x89, 0xdb, 0x19, 0x1b, 0x15, 0x8c,
+			    0x05, 0x14, 0xf4, 0x95, 0xba, 0x15, 0x45, 0x98,
+			    0x42, 0xa3, 0xbb, 0x65, 0xe3, 0x30, 0xa5, 0x93 },
+	},
+	.acpi_name      = "MSSL1680:00",
+	.properties     = chuwi_hi10_pro_props,
+};
+
 static const struct property_entry chuwi_vi8_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-min-x", 4),
 	PROPERTY_ENTRY_U32("touchscreen-min-y", 6),
@@ -915,6 +941,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
 		},
 	},
+	{
+		/* Chuwi Hi10 Prus (CWI597) */
+		.driver_data = (void *)&chuwi_hi10_pro_data,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Hi10 pro tablet"),
+			DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"),
+		},
+	},
 	{
 		/* Chuwi Vi8 (CWI506) */
 		.driver_data = (void *)&chuwi_vi8_data,
-- 
GitLab


From 2e958a8a510d956ec8528f0bd20e309b5bb5156c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:09 +0900
Subject: [PATCH 1149/3804] x86/entry/x32: Rename __x32_compat_sys_* to
 __x64_compat_sys_*

The SYSCALL macros are mapped to symbols as follows:

  __SYSCALL_COMMON(nr, sym)  -->  __x64_<sym>
  __SYSCALL_X32(nr, sym)     -->  __x32_<sym>

Originally, the syscalls in the x32 special range (512-547) were all
compat.

This assumption is now broken after the following commits:

  55db9c0e8534 ("net: remove compat_sys_{get,set}sockopt")
  5f764d624a89 ("fs: remove the compat readv/writev syscalls")
  598b3cec831f ("fs: remove compat_sys_vmsplice")
  c3973b401ef2 ("mm: remove compat_process_vm_{readv,writev}")

Those commits redefined __x32_sys_* to __x64_sys_* because there is no stub
like __x32_sys_*.

Defining them as follows is more sensible and cleaner.

  __SYSCALL_COMMON(nr, sym)  -->  __x64_<sym>
  __SYSCALL_X32(nr, sym)     -->  __x64_<sym>

This works because both x86_64 and x32 use the same ABI (RDI, RSI, RDX,
R10, R8, R9)

The ugly #define __x32_sys_* will go away.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-2-masahiroy@kernel.org
---
 arch/x86/entry/syscall_x32.c           | 16 ++--------------
 arch/x86/include/asm/syscall_wrapper.h | 10 +++++-----
 2 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index f2fe0a33bcfdd..3fea8fb9cd6a6 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -8,27 +8,15 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
-/*
- * Reuse the 64-bit entry points for the x32 versions that occupy different
- * slots in the syscall table.
- */
-#define __x32_sys_readv		__x64_sys_readv
-#define __x32_sys_writev	__x64_sys_writev
-#define __x32_sys_getsockopt	__x64_sys_getsockopt
-#define __x32_sys_setsockopt	__x64_sys_setsockopt
-#define __x32_sys_vmsplice	__x64_sys_vmsplice
-#define __x32_sys_process_vm_readv	__x64_sys_process_vm_readv
-#define __x32_sys_process_vm_writev	__x64_sys_process_vm_writev
-
 #define __SYSCALL_64(nr, sym)
 
-#define __SYSCALL_X32(nr, sym) extern long __x32_##sym(const struct pt_regs *);
+#define __SYSCALL_X32(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #define __SYSCALL_COMMON(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_X32
 #undef __SYSCALL_COMMON
 
-#define __SYSCALL_X32(nr, sym) [nr] = __x32_##sym,
+#define __SYSCALL_X32(nr, sym) [nr] = __x64_##sym,
 #define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,
 
 asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index 80c08c7d5e72e..6a2827d0681fc 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -17,7 +17,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
  * __x64_sys_*()         - 64-bit native syscall
  * __ia32_sys_*()        - 32-bit native syscall or common compat syscall
  * __ia32_compat_sys_*() - 32-bit compat syscall
- * __x32_compat_sys_*()  - 64-bit X32 compat syscall
+ * __x64_compat_sys_*()  - 64-bit X32 compat syscall
  *
  * The registers are decoded according to the ABI:
  * 64-bit: RDI, RSI, RDX, R10, R8, R9
@@ -166,17 +166,17 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs);
  * with x86_64 obviously do not need such care.
  */
 #define __X32_COMPAT_SYS_STUB0(name)					\
-	__SYS_STUB0(x32, compat_sys_##name)
+	__SYS_STUB0(x64, compat_sys_##name)
 
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)				\
-	__SYS_STUBx(x32, compat_sys##name,				\
+	__SYS_STUBx(x64, compat_sys##name,				\
 		    SC_X86_64_REGS_TO_ARGS(x, __VA_ARGS__))
 
 #define __X32_COMPAT_COND_SYSCALL(name)					\
-	__COND_SYSCALL(x32, compat_sys_##name)
+	__COND_SYSCALL(x64, compat_sys_##name)
 
 #define __X32_COMPAT_SYS_NI(name)					\
-	__SYS_NI(x32, compat_sys_##name)
+	__SYS_NI(x64, compat_sys_##name)
 #else /* CONFIG_X86_X32 */
 #define __X32_COMPAT_SYS_STUB0(name)
 #define __X32_COMPAT_SYS_STUBx(x, name, ...)
-- 
GitLab


From 6218d0f6b8dece1f2e82f0a47a0e6b8ecb631ef6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:10 +0900
Subject: [PATCH 1150/3804] x86/syscalls: Switch to generic syscalltbl.sh

Many architectures duplicate similar shell scripts.

Convert x86 and UML to use scripts/syscalltbl.sh. The generic script
generates seperate headers for x86/64 and x86/x32 syscalls, while the x86
specific script coalesced them into one. Adjust the code accordingly.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-3-masahiroy@kernel.org
---
 arch/x86/entry/syscall_32.c           | 12 +++++--
 arch/x86/entry/syscall_64.c           |  9 ++----
 arch/x86/entry/syscall_x32.c          | 15 +++------
 arch/x86/entry/syscalls/Makefile      | 10 ++++--
 arch/x86/entry/syscalls/syscalltbl.sh | 46 ---------------------------
 arch/x86/include/asm/Kbuild           |  1 +
 arch/x86/um/sys_call_table_32.c       |  8 +++--
 arch/x86/um/sys_call_table_64.c       |  9 ++----
 8 files changed, 34 insertions(+), 76 deletions(-)
 delete mode 100644 arch/x86/entry/syscalls/syscalltbl.sh

diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 86eb0d89d46fa..70bf46e73b1cf 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -8,12 +8,18 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_I386(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
+#ifdef CONFIG_IA32_EMULATION
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, compat)
+#else
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
+#endif
+
+#define __SYSCALL(nr, sym) extern long __ia32_##sym(const struct pt_regs *);
 
 #include <asm/syscalls_32.h>
-#undef __SYSCALL_I386
+#undef __SYSCALL
 
-#define __SYSCALL_I386(nr, sym) [nr] = __ia32_##sym,
+#define __SYSCALL(nr, sym) [nr] = __ia32_##sym,
 
 __visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
 	/*
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 1594ec72bcbb7..82670bb109319 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -8,14 +8,11 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_X32(nr, sym)
-#define __SYSCALL_COMMON(nr, sym) __SYSCALL_64(nr, sym)
-
-#define __SYSCALL_64(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
-#undef __SYSCALL_64
+#undef __SYSCALL
 
-#define __SYSCALL_64(nr, sym) [nr] = __x64_##sym,
+#define __SYSCALL(nr, sym) [nr] = __x64_##sym,
 
 asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	/*
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 3fea8fb9cd6a6..6d2ef887d7b60 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -8,16 +8,11 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
-#define __SYSCALL_64(nr, sym)
+#define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
+#include <asm/syscalls_x32.h>
+#undef __SYSCALL
 
-#define __SYSCALL_X32(nr, sym) extern long __x64_##sym(const struct pt_regs *);
-#define __SYSCALL_COMMON(nr, sym) extern long __x64_##sym(const struct pt_regs *);
-#include <asm/syscalls_64.h>
-#undef __SYSCALL_X32
-#undef __SYSCALL_COMMON
-
-#define __SYSCALL_X32(nr, sym) [nr] = __x64_##sym,
-#define __SYSCALL_COMMON(nr, sym) [nr] = __x64_##sym,
+#define __SYSCALL(nr, sym) [nr] = __x64_##sym,
 
 asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
 	/*
@@ -25,5 +20,5 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
 	 * when the & below is removed.
 	 */
 	[0 ... __NR_x32_syscall_max] = &__x64_sys_ni_syscall,
-#include <asm/syscalls_64.h>
+#include <asm/syscalls_x32.h>
 };
diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index d8c4f6c9eadc0..c4bd8dd82bb16 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -10,7 +10,7 @@ syscall32 := $(src)/syscall_32.tbl
 syscall64 := $(src)/syscall_64.tbl
 
 syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
 
 quiet_cmd_syshdr = SYSHDR  $@
       cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
@@ -18,7 +18,7 @@ quiet_cmd_syshdr = SYSHDR  $@
 		   '$(syshdr_pfx_$(basetarget))' \
 		   '$(syshdr_offset_$(basetarget))'
 quiet_cmd_systbl = SYSTBL  $@
-      cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
+      cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
 
 quiet_cmd_hypercalls = HYPERCALLS $@
       cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<, $(real-prereqs))
@@ -46,10 +46,15 @@ syshdr_pfx_unistd_64_x32 := x32_
 $(out)/unistd_64_x32.h: $(syscall64) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
+$(out)/syscalls_32.h: abis := i386
 $(out)/syscalls_32.h: $(syscall32) $(systbl) FORCE
 	$(call if_changed,systbl)
+$(out)/syscalls_64.h: abis := common,64
 $(out)/syscalls_64.h: $(syscall64) $(systbl) FORCE
 	$(call if_changed,systbl)
+$(out)/syscalls_x32.h: abis := common,x32
+$(out)/syscalls_x32.h: $(syscall64) $(systbl) FORCE
+	$(call if_changed,systbl)
 
 $(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh FORCE
 	$(call if_changed,hypercalls)
@@ -60,6 +65,7 @@ uapisyshdr-y			+= unistd_32.h unistd_64.h unistd_x32.h
 syshdr-y			+= syscalls_32.h
 syshdr-$(CONFIG_X86_64)		+= unistd_32_ia32.h unistd_64_x32.h
 syshdr-$(CONFIG_X86_64)		+= syscalls_64.h
+syshdr-$(CONFIG_X86_X32)	+= syscalls_x32.h
 syshdr-$(CONFIG_XEN)		+= xen-hypercalls.h
 
 uapisyshdr-y	:= $(addprefix $(uapi)/, $(uapisyshdr-y))
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
deleted file mode 100644
index 929bde120d6bb..0000000000000
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-
-syscall_macro() {
-    local abi="$1"
-    local nr="$2"
-    local entry="$3"
-
-    echo "__SYSCALL_${abi}($nr, $entry)"
-}
-
-emit() {
-    local abi="$1"
-    local nr="$2"
-    local entry="$3"
-    local compat="$4"
-
-    if [ "$abi" != "I386" -a -n "$compat" ]; then
-	echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
-	exit 1
-    fi
-
-    if [ -z "$compat" ]; then
-	if [ -n "$entry" ]; then
-	    syscall_macro "$abi" "$nr" "$entry"
-	fi
-    else
-	echo "#ifdef CONFIG_X86_32"
-	if [ -n "$entry" ]; then
-	    syscall_macro "$abi" "$nr" "$entry"
-	fi
-	echo "#else"
-	syscall_macro "$abi" "$nr" "$compat"
-	echo "#endif"
-    fi
-}
-
-grep '^[0-9]' "$in" | sort -n | (
-    while read nr abi name entry compat; do
-	abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
-	emit "$abi" "$nr" "$entry" "$compat"
-    done
-) > "$out"
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index b19ec8282d507..1e51650b79d7c 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -3,6 +3,7 @@
 
 generated-y += syscalls_32.h
 generated-y += syscalls_64.h
+generated-y += syscalls_x32.h
 generated-y += unistd_32_ia32.h
 generated-y += unistd_64_x32.h
 generated-y += xen-hypercalls.h
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 2ed81e581755b..e83619c365dcc 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -26,11 +26,13 @@
 
 #define old_mmap sys_old_mmap
 
-#define __SYSCALL_I386(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL_WITH_COMPAT(nr, native, compat)	__SYSCALL(nr, native)
+
+#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 #include <asm/syscalls_32.h>
 
-#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym) [ nr ] = sym,
+#undef __SYSCALL
+#define __SYSCALL(nr, sym) [ nr ] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 2e8544dafbb01..6fb75af7cf54b 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -36,14 +36,11 @@
 #define stub_execveat sys_execveat
 #define stub_rt_sigreturn sys_rt_sigreturn
 
-#define __SYSCALL_X32(nr, sym)
-#define __SYSCALL_COMMON(nr, sym) __SYSCALL_64(nr, sym)
-
-#define __SYSCALL_64(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long) ;
+#define __SYSCALL(nr, sym) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 #include <asm/syscalls_64.h>
 
-#undef __SYSCALL_64
-#define __SYSCALL_64(nr, sym) [ nr ] = sym,
+#undef __SYSCALL
+#define __SYSCALL(nr, sym) [ nr ] = sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
-- 
GitLab


From 44fe4895f47cbe9f4692e1d3cdc2ef8352f4d88e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:11 +0900
Subject: [PATCH 1151/3804] x86/syscalls: Stop filling syscall arrays with
 *_sys_ni_syscall

This is a follow-up cleanup after switching to the generic syscalltbl.sh.

The old x86 specific script skipped non-existing syscalls. So, the
generated syscalls_64.h, for example, had a big hole in the syscall numbers
335-423 range. That is why there exists [0 ... __NR_*_syscall_max] =
&__*_sys_ni_cyscall.

The new script, scripts/syscalltbl.sh automatically fills holes
with __SYSCALL(<nr>, sys_ni_syscall), hence such ugly code can
go away. The designated initializers, '[nr] =' are also unneeded.

Also, there is no need to give __NR_*_syscall_max+1 because the array
size is implied by the number of syscalls in the generated headers.
Hence, there is no need to include <asm/unistd.h>, either.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-4-masahiroy@kernel.org
---
 arch/x86/entry/syscall_32.c     | 10 ++--------
 arch/x86/entry/syscall_64.c     | 10 ++--------
 arch/x86/entry/syscall_x32.c    | 10 ++--------
 arch/x86/um/sys_call_table_32.c |  6 ------
 arch/x86/um/sys_call_table_64.c |  6 ------
 5 files changed, 6 insertions(+), 36 deletions(-)

diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index 70bf46e73b1cf..8cfc9bc73e7f8 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -5,7 +5,6 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <linux/syscalls.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #ifdef CONFIG_IA32_EMULATION
@@ -19,13 +18,8 @@
 #include <asm/syscalls_32.h>
 #undef __SYSCALL
 
-#define __SYSCALL(nr, sym) [nr] = __ia32_##sym,
+#define __SYSCALL(nr, sym) __ia32_##sym,
 
-__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_ia32_syscall_max] = &__ia32_sys_ni_syscall,
+__visible const sys_call_ptr_t ia32_sys_call_table[] = {
 #include <asm/syscalls_32.h>
 };
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index 82670bb109319..be120eec1fc9f 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -5,20 +5,14 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <linux/syscalls.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_64.h>
 #undef __SYSCALL
 
-#define __SYSCALL(nr, sym) [nr] = __x64_##sym,
+#define __SYSCALL(nr, sym) __x64_##sym,
 
-asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_syscall_max] = &__x64_sys_ni_syscall,
+asmlinkage const sys_call_ptr_t sys_call_table[] = {
 #include <asm/syscalls_64.h>
 };
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 6d2ef887d7b60..bdd0e03a1265d 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -5,20 +5,14 @@
 #include <linux/sys.h>
 #include <linux/cache.h>
 #include <linux/syscalls.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #define __SYSCALL(nr, sym) extern long __x64_##sym(const struct pt_regs *);
 #include <asm/syscalls_x32.h>
 #undef __SYSCALL
 
-#define __SYSCALL(nr, sym) [nr] = __x64_##sym,
+#define __SYSCALL(nr, sym) __x64_##sym,
 
-asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_x32_syscall_max+1] = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_x32_syscall_max] = &__x64_sys_ni_syscall,
+asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
 #include <asm/syscalls_x32.h>
 };
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index e83619c365dcc..f8323104e3536 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -7,7 +7,6 @@
 #include <linux/linkage.h>
 #include <linux/sys.h>
 #include <linux/cache.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #define __NO_STUBS
@@ -37,11 +36,6 @@
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
 const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_32.h>
 };
 
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 6fb75af7cf54b..5ed665dc785fb 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -7,7 +7,6 @@
 #include <linux/linkage.h>
 #include <linux/sys.h>
 #include <linux/cache.h>
-#include <asm/unistd.h>
 #include <asm/syscall.h>
 
 #define __NO_STUBS
@@ -45,11 +44,6 @@
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
 const sys_call_ptr_t sys_call_table[] ____cacheline_aligned = {
-	/*
-	 * Smells like a compiler bug -- it doesn't work
-	 * when the & below is removed.
-	 */
-	[0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
 
-- 
GitLab


From f63815eb1d909a4121806e60928108ff040bf291 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:12 +0900
Subject: [PATCH 1152/3804] x86/unistd: Define X32_NR_syscalls only for 64-bit
 kernel

X32_NR_syscalls is needed only when building a 64bit kernel.

Move it to proper #ifdef guard.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-5-masahiroy@kernel.org
---
 arch/x86/include/asm/unistd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index c1c3d31b15c06..1bc6020bc58db 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -26,11 +26,11 @@
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64V2
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
+#  define X32_NR_syscalls (__NR_x32_syscall_max + 1)
 
 # endif
 
 # define NR_syscalls (__NR_syscall_max + 1)
-# define X32_NR_syscalls (__NR_x32_syscall_max + 1)
 # define IA32_NR_syscalls (__NR_ia32_syscall_max + 1)
 
 # define __ARCH_WANT_NEW_STAT
-- 
GitLab


From 49f731f1972e6e44d8a5c3982a72902b3944bc34 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:13 +0900
Subject: [PATCH 1153/3804] x86/syscalls: Use __NR_syscalls instead of
 __NR_syscall_max

__NR_syscall_max is only used by x86 and UML. In contrast, __NR_syscalls is
widely used by all the architectures.

Convert __NR_syscall_max to __NR_syscalls and adjust the usage sites.

This prepares x86 to switch to the generic syscallhdr.sh script.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-6-masahiroy@kernel.org
---
 arch/um/kernel/skas/syscall.c         | 2 +-
 arch/x86/entry/syscalls/syscallhdr.sh | 2 +-
 arch/x86/include/asm/unistd.h         | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index 3d91f89fd8527..9ee19e566da3b 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -41,7 +41,7 @@ void handle_syscall(struct uml_pt_regs *r)
 		goto out;
 
 	syscall = UPT_SYSCALL_NR(r);
-	if (syscall >= 0 && syscall <= __NR_syscall_max)
+	if (syscall >= 0 && syscall < __NR_syscalls)
 		PT_REGS_SET_SYSCALL_RETURN(regs,
 				EXECUTE_SYSCALL(syscall, regs));
 
diff --git a/arch/x86/entry/syscalls/syscallhdr.sh b/arch/x86/entry/syscalls/syscallhdr.sh
index cc1e638574270..75e66af067732 100644
--- a/arch/x86/entry/syscalls/syscallhdr.sh
+++ b/arch/x86/entry/syscalls/syscallhdr.sh
@@ -28,7 +28,7 @@ grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
 
     echo ""
     echo "#ifdef __KERNEL__"
-    echo "#define __NR_${prefix}syscall_max $max"
+    echo "#define __NR_${prefix}syscalls $(($max + 1))"
     echo "#endif"
     echo ""
     echo "#endif /* ${fileguard} */"
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 1bc6020bc58db..80e9d5206a715 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -13,7 +13,7 @@
 #  define __ARCH_WANT_SYS_OLD_MMAP
 #  define __ARCH_WANT_SYS_OLD_SELECT
 
-#  define __NR_ia32_syscall_max __NR_syscall_max
+#  define IA32_NR_syscalls (__NR_syscalls)
 
 # else
 
@@ -26,12 +26,12 @@
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64V2
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
-#  define X32_NR_syscalls (__NR_x32_syscall_max + 1)
+#  define X32_NR_syscalls (__NR_x32_syscalls)
+#  define IA32_NR_syscalls (__NR_ia32_syscalls)
 
 # endif
 
-# define NR_syscalls (__NR_syscall_max + 1)
-# define IA32_NR_syscalls (__NR_ia32_syscall_max + 1)
+# define NR_syscalls (__NR_syscalls)
 
 # define __ARCH_WANT_NEW_STAT
 # define __ARCH_WANT_OLD_READDIR
-- 
GitLab


From 3cba325b358f86357b5ce50eb9e6633183927eee Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 17 May 2021 16:38:14 +0900
Subject: [PATCH 1154/3804] x86/syscalls: Switch to generic syscallhdr.sh

Many architectures duplicate similar shell scripts.

Converts x86 to use scripts/syscallhdr.sh.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210517073815.97426-7-masahiroy@kernel.org
---
 arch/x86/entry/syscalls/Makefile      | 26 ++++++++++----------
 arch/x86/entry/syscalls/syscallhdr.sh | 35 ---------------------------
 2 files changed, 13 insertions(+), 48 deletions(-)
 delete mode 100644 arch/x86/entry/syscalls/syscallhdr.sh

diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index c4bd8dd82bb16..8eb014bca8c94 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -9,40 +9,40 @@ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
 syscall32 := $(src)/syscall_32.tbl
 syscall64 := $(src)/syscall_64.tbl
 
-syshdr := $(srctree)/$(src)/syscallhdr.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
 systbl := $(srctree)/scripts/syscalltbl.sh
 
 quiet_cmd_syshdr = SYSHDR  $@
-      cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
-		   '$(syshdr_abi_$(basetarget))' \
-		   '$(syshdr_pfx_$(basetarget))' \
-		   '$(syshdr_offset_$(basetarget))'
+      cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --abis $(abis) --emit-nr \
+		$(if $(offset),--offset $(offset)) \
+		$(if $(prefix),--prefix $(prefix)) \
+		$< $@
 quiet_cmd_systbl = SYSTBL  $@
       cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
 
 quiet_cmd_hypercalls = HYPERCALLS $@
       cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<, $(real-prereqs))
 
-syshdr_abi_unistd_32 := i386
+$(uapi)/unistd_32.h: abis := i386
 $(uapi)/unistd_32.h: $(syscall32) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-syshdr_abi_unistd_32_ia32 := i386
-syshdr_pfx_unistd_32_ia32 := ia32_
+$(out)/unistd_32_ia32.h: abis := i386
+$(out)/unistd_32_ia32.h: prefix := ia32_
 $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-syshdr_abi_unistd_x32 := common,x32
-syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT
+$(uapi)/unistd_x32.h: abis := common,x32
+$(uapi)/unistd_x32.h: offset := __X32_SYSCALL_BIT
 $(uapi)/unistd_x32.h: $(syscall64) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-syshdr_abi_unistd_64 := common,64
+$(uapi)/unistd_64.h: abis := common,64
 $(uapi)/unistd_64.h: $(syscall64) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
-syshdr_abi_unistd_64_x32 := x32
-syshdr_pfx_unistd_64_x32 := x32_
+$(out)/unistd_64_x32.h: abis := x32
+$(out)/unistd_64_x32.h: prefix := x32_
 $(out)/unistd_64_x32.h: $(syscall64) $(syshdr) FORCE
 	$(call if_changed,syshdr)
 
diff --git a/arch/x86/entry/syscalls/syscallhdr.sh b/arch/x86/entry/syscalls/syscallhdr.sh
deleted file mode 100644
index 75e66af067732..0000000000000
--- a/arch/x86/entry/syscalls/syscallhdr.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_ASM_X86_`basename "$out" | sed \
-    -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
-    -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
-    echo "#ifndef ${fileguard}"
-    echo "#define ${fileguard} 1"
-    echo ""
-
-    max=0
-    while read nr abi name entry ; do
-	if [ -z "$offset" ]; then
-	    echo "#define __NR_${prefix}${name} $nr"
-	else
-	    echo "#define __NR_${prefix}${name} ($offset + $nr)"
-        fi
-
-	max=$nr
-    done
-
-    echo ""
-    echo "#ifdef __KERNEL__"
-    echo "#define __NR_${prefix}syscalls $(($max + 1))"
-    echo "#endif"
-    echo ""
-    echo "#endif /* ${fileguard} */"
-) > "$out"
-- 
GitLab


From 15c82d98a0f783bd4b2715ea910f7bb526367f54 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:12:58 -0700
Subject: [PATCH 1155/3804] selftests/x86/syscall: Update and extend
 syscall_numbering_64

Update the syscall_numbering_64 selftest to reflect that a system call is
to be extended from 32 bits. Add a mix of tests for valid and invalid
system calls in 64-bit and x32 space.

Use an explicit system call instruction, because the glibc syscall()
wrapper might intercept instructions, extend the system call number
independently, or anything similar.

Use long long instead of long to make it possible to compile this test
on x32 as well as 64 bits.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-2-hpa@zytor.com
---
 .../testing/selftests/x86/syscall_numbering.c | 274 ++++++++++++++----
 1 file changed, 222 insertions(+), 52 deletions(-)

diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index d6b09cb1aa2c1..7dd86bcbee251 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -1,6 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * syscall_numbering.c - test calling the x86-64 kernel with various
+ * valid and invalid system call numbers.
+ *
  * Copyright (c) 2018 Andrew Lutomirski
  */
 
@@ -11,79 +13,247 @@
 #include <stdbool.h>
 #include <errno.h>
 #include <unistd.h>
-#include <syscall.h>
+#include <string.h>
+#include <fcntl.h>
+#include <limits.h>
 
-static int nerrs;
+/* Common system call numbers */
+#define SYS_READ	  0
+#define SYS_WRITE	  1
+#define SYS_GETPID	 39
+/* x64-only system call numbers */
+#define X64_IOCTL	 16
+#define X64_READV	 19
+#define X64_WRITEV	 20
+/* x32-only system call numbers (without X32_BIT) */
+#define X32_IOCTL	514
+#define X32_READV	515
+#define X32_WRITEV	516
 
-#define X32_BIT 0x40000000UL
+#define X32_BIT 0x40000000
 
-static void check_enosys(unsigned long nr, bool *ok)
+static unsigned int nerr = 0;	/* Cumulative error count */
+static int nullfd = -1;		/* File descriptor for /dev/null */
+
+/*
+ * Directly invokes the given syscall with nullfd as the first argument
+ * and the rest zero. Avoids involving glibc wrappers in case they ever
+ * end up intercepting some system calls for some reason, or modify
+ * the system call number itself.
+ */
+static inline long long probe_syscall(int msb, int lsb)
 {
-	/* If this fails, a segfault is reasonably likely. */
-	fflush(stdout);
-
-	long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
-	if (ret == 0) {
-		printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
-		*ok = false;
-	} else if (errno != ENOSYS) {
-		printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
-		*ok = false;
-	}
+	register long long arg1 asm("rdi") = nullfd;
+	register long long arg2 asm("rsi") = 0;
+	register long long arg3 asm("rdx") = 0;
+	register long long arg4 asm("r10") = 0;
+	register long long arg5 asm("r8")  = 0;
+	register long long arg6 asm("r9")  = 0;
+	long long nr = ((long long)msb << 32) | (unsigned int)lsb;
+	long long ret;
+
+	asm volatile("syscall"
+		     : "=a" (ret)
+		     : "a" (nr), "r" (arg1), "r" (arg2), "r" (arg3),
+		       "r" (arg4), "r" (arg5), "r" (arg6)
+		     : "rcx", "r11", "memory", "cc");
+
+	return ret;
 }
 
-static void test_x32_without_x32_bit(void)
+static const char *syscall_str(int msb, int start, int end)
 {
-	bool ok = true;
+	static char buf[64];
+	const char * const type = (start & X32_BIT) ? "x32" : "x64";
+	int lsb = start;
 
 	/*
-	 * Syscalls 512-547 are "x32" syscalls.  They are intended to be
-	 * called with the x32 (0x40000000) bit set.  Calling them without
-	 * the x32 bit set is nonsense and should not work.
+	 * Improve readability by stripping the x32 bit, but round
+	 * toward zero so we don't display -1 as -1073741825.
 	 */
-	printf("[RUN]\tChecking syscalls 512-547\n");
-	for (int i = 512; i <= 547; i++)
-		check_enosys(i, &ok);
+	if (lsb < 0)
+		lsb |= X32_BIT;
+	else
+		lsb &= ~X32_BIT;
+
+	if (start == end)
+		snprintf(buf, sizeof buf, "%s syscall %d:%d",
+			 type, msb, lsb);
+	else
+		snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
+			 type, msb, lsb, lsb + (end-start));
+
+	return buf;
+}
+
+static unsigned int _check_for(int msb, int start, int end, long long expect,
+			       const char *expect_str)
+{
+	unsigned int err = 0;
+
+	for (int nr = start; nr <= end; nr++) {
+		long long ret = probe_syscall(msb, nr);
+
+		if (ret != expect) {
+			printf("[FAIL]\t      %s returned %lld, but it should have returned %s\n",
+			       syscall_str(msb, nr, nr),
+			       ret, expect_str);
+			err++;
+		}
+	}
 
+	if (err) {
+		nerr += err;
+		if (start != end)
+			printf("[FAIL]\t      %s had %u failure%s\n",
+			       syscall_str(msb, start, end),
+			       err, (err == 1) ? "s" : "");
+	} else {
+		printf("[OK]\t      %s returned %s as expected\n",
+		       syscall_str(msb, start, end), expect_str);
+	}
+
+	return err;
+}
+
+#define check_for(msb,start,end,expect) \
+	_check_for(msb,start,end,expect,#expect)
+
+static bool check_zero(int msb, int nr)
+{
+	return check_for(msb, nr, nr, 0);
+}
+
+static bool check_enosys(int msb, int nr)
+{
+	return check_for(msb, nr, nr, -ENOSYS);
+}
+
+/*
+ * Anyone diagnosing a failure will want to know whether the kernel
+ * supports x32. Tell them. This can also be used to conditionalize
+ * tests based on existence or nonexistence of x32.
+ */
+static bool test_x32(void)
+{
+	long long ret;
+	long long mypid = getpid();
+
+	printf("[RUN]\tChecking for x32 by calling x32 getpid()\n");
+	ret = probe_syscall(0, SYS_GETPID | X32_BIT);
+
+	if (ret == mypid) {
+		printf("[INFO]\t   x32 is supported\n");
+		return true;
+	} else if (ret == -ENOSYS) {
+		printf("[INFO]\t   x32 is not supported\n");
+		return false;
+	} else {
+		printf("[FAIL]\t   x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, mypid);
+		nerr++;
+		return true;	/* Proceed as if... */
+	}
+}
+
+static void test_syscalls_common(int msb)
+{
+	printf("[RUN]\t   Checking some common syscalls as 64 bit\n");
+	check_zero(msb, SYS_READ);
+	check_zero(msb, SYS_WRITE);
+
+	printf("[RUN]\t   Checking some 64-bit only syscalls as 64 bit\n");
+	check_zero(msb, X64_READV);
+	check_zero(msb, X64_WRITEV);
+
+	printf("[RUN]\t   Checking out of range system calls\n");
+	check_for(msb, -64, -1, -ENOSYS);
+	check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
+	check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
+	check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
+}
+
+static void test_syscalls_with_x32(int msb)
+{
 	/*
-	 * Check that a handful of 64-bit-only syscalls are rejected if the x32
-	 * bit is set.
+	 * Syscalls 512-547 are "x32" syscalls.  They are
+	 * intended to be called with the x32 (0x40000000) bit
+	 * set.  Calling them without the x32 bit set is
+	 * nonsense and should not work.
 	 */
-	printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
-	check_enosys(16 | X32_BIT, &ok);	/* ioctl */
-	check_enosys(19 | X32_BIT, &ok);	/* readv */
-	check_enosys(20 | X32_BIT, &ok);	/* writev */
+	printf("[RUN]\t   Checking x32 syscalls as 64 bit\n");
+	check_for(msb, 512, 547, -ENOSYS);
+
+	printf("[RUN]\t   Checking some common syscalls as x32\n");
+	check_zero(msb, SYS_READ   | X32_BIT);
+	check_zero(msb, SYS_WRITE  | X32_BIT);
+
+	printf("[RUN]\t   Checking some x32 syscalls as x32\n");
+	check_zero(msb, X32_READV  | X32_BIT);
+	check_zero(msb, X32_WRITEV | X32_BIT);
+
+	printf("[RUN]\t   Checking some 64-bit syscalls as x32\n");
+	check_enosys(msb, X64_IOCTL  | X32_BIT);
+	check_enosys(msb, X64_READV  | X32_BIT);
+	check_enosys(msb, X64_WRITEV | X32_BIT);
+}
+
+static void test_syscalls_without_x32(int msb)
+{
+	printf("[RUN]\t  Checking for absence of x32 system calls\n");
+	check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
+}
+
+static void test_syscall_numbering(void)
+{
+	static const int msbs[] = {
+		0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
+		INT_MIN, INT_MIN+1
+	};
+	bool with_x32 = test_x32();
 
 	/*
-	 * Check some syscalls with high bits set.
+	 * The MSB is supposed to be ignored, so we loop over a few
+	 * to test that out.
 	 */
-	printf("[RUN]\tChecking numbers above 2^32-1\n");
-	check_enosys((1UL << 32), &ok);
-	check_enosys(X32_BIT | (1UL << 32), &ok);
+	for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+		int msb = msbs[i];
+		printf("[RUN]\tChecking system calls with msb = %d (0x%x)\n",
+		       msb, msb);
 
-	if (!ok)
-		nerrs++;
-	else
-		printf("[OK]\tThey all returned -ENOSYS\n");
+		test_syscalls_common(msb);
+		if (with_x32)
+			test_syscalls_with_x32(msb);
+		else
+			test_syscalls_without_x32(msb);
+	}
 }
 
-int main()
+int main(void)
 {
 	/*
-	 * Anyone diagnosing a failure will want to know whether the kernel
-	 * supports x32.  Tell them.
+	 * It is quite likely to get a segfault on a failure, so make
+	 * sure the message gets out by setting stdout to nonbuffered.
 	 */
-	printf("\tChecking for x32...");
-	fflush(stdout);
-	if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
-		printf(" supported\n");
-	} else if (errno == ENOSYS) {
-		printf(" not supported\n");
-	} else {
-		printf(" confused\n");
-	}
+	setvbuf(stdout, NULL, _IONBF, 0);
 
-	test_x32_without_x32_bit();
+	/*
+	 * Harmless file descriptor to work on...
+	 */
+	nullfd = open("/dev/null", O_RDWR);
+	if (nullfd < 0) {
+		printf("[FAIL]\tUnable to open /dev/null: %s\n",
+		       strerror(errno));
+		printf("[SKIP]\tCannot execute test\n");
+		return 71;	/* EX_OSERR */
+	}
 
-	return nerrs ? 1 : 0;
+	test_syscall_numbering();
+	if (!nerr) {
+		printf("[OK]\tAll system calls succeeded or failed as expected\n");
+		return 0;
+	} else {
+		printf("[FAIL]\tA total of %u system call%s had incorrect behavior\n",
+		       nerr, nerr != 1 ? "s" : "");
+		return 1;
+	}
 }
-- 
GitLab


From c5c39488dcb5f818bb07f856a349262d667ef147 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:12:59 -0700
Subject: [PATCH 1156/3804] selftests/x86/syscall: Simplify message reporting
 in syscall_numbering

Reduce some boiler plate in printing and indenting messages.
This makes it easier to produce clean status output.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-3-hpa@zytor.com
---
 .../testing/selftests/x86/syscall_numbering.c | 103 ++++++++++++------
 1 file changed, 72 insertions(+), 31 deletions(-)

diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index 7dd86bcbee251..434fe0efafa0c 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -16,6 +16,7 @@
 #include <string.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <sysexits.h>
 
 /* Common system call numbers */
 #define SYS_READ	  0
@@ -34,6 +35,33 @@
 
 static unsigned int nerr = 0;	/* Cumulative error count */
 static int nullfd = -1;		/* File descriptor for /dev/null */
+static int indent = 0;
+
+static inline unsigned int offset(void)
+{
+	return 8 + indent * 4;
+}
+
+#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
+				  ## __VA_ARGS__)
+
+#define run(fmt, ...)  msg(RUN,  fmt, ## __VA_ARGS__)
+#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
+#define ok(fmt, ...)   msg(OK,   fmt, ## __VA_ARGS__)
+
+#define fail(fmt, ...)					\
+	do {						\
+		msg(FAIL, fmt, ## __VA_ARGS__);		\
+		nerr++;					\
+	} while (0)
+
+#define crit(fmt, ...)					\
+	do {						\
+		indent = 0;				\
+		msg(FAIL, fmt, ## __VA_ARGS__);		\
+		msg(SKIP, "Unable to run test\n");	\
+		exit(EX_OSERR);
+	} while (0)
 
 /*
  * Directly invokes the given syscall with nullfd as the first argument
@@ -91,28 +119,37 @@ static unsigned int _check_for(int msb, int start, int end, long long expect,
 {
 	unsigned int err = 0;
 
+	indent++;
+	if (start != end)
+		indent++;
+
 	for (int nr = start; nr <= end; nr++) {
 		long long ret = probe_syscall(msb, nr);
 
 		if (ret != expect) {
-			printf("[FAIL]\t      %s returned %lld, but it should have returned %s\n",
+			fail("%s returned %lld, but it should have returned %s\n",
 			       syscall_str(msb, nr, nr),
 			       ret, expect_str);
 			err++;
 		}
 	}
 
+	if (start != end)
+		indent--;
+
 	if (err) {
 		nerr += err;
 		if (start != end)
-			printf("[FAIL]\t      %s had %u failure%s\n",
+			fail("%s had %u failure%s\n",
 			       syscall_str(msb, start, end),
-			       err, (err == 1) ? "s" : "");
+			       err, err == 1 ? "s" : "");
 	} else {
-		printf("[OK]\t      %s returned %s as expected\n",
-		       syscall_str(msb, start, end), expect_str);
+		ok("%s returned %s as expected\n",
+		   syscall_str(msb, start, end), expect_str);
 	}
 
+	indent--;
+
 	return err;
 }
 
@@ -137,35 +174,38 @@ static bool check_enosys(int msb, int nr)
 static bool test_x32(void)
 {
 	long long ret;
-	long long mypid = getpid();
+	pid_t mypid = getpid();
+	bool with_x32;
 
-	printf("[RUN]\tChecking for x32 by calling x32 getpid()\n");
+	run("Checking for x32 by calling x32 getpid()\n");
 	ret = probe_syscall(0, SYS_GETPID | X32_BIT);
 
+	indent++;
 	if (ret == mypid) {
-		printf("[INFO]\t   x32 is supported\n");
-		return true;
+		info("x32 is supported\n");
+		with_x32 = true;
 	} else if (ret == -ENOSYS) {
-		printf("[INFO]\t   x32 is not supported\n");
-		return false;
+		info("x32 is not supported\n");
+		with_x32 = false;
 	} else {
-		printf("[FAIL]\t   x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, mypid);
-		nerr++;
-		return true;	/* Proceed as if... */
+		fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, mypid);
+		with_x32 = false;
 	}
+	indent--;
+	return with_x32;
 }
 
 static void test_syscalls_common(int msb)
 {
-	printf("[RUN]\t   Checking some common syscalls as 64 bit\n");
+	run("Checking some common syscalls as 64 bit\n");
 	check_zero(msb, SYS_READ);
 	check_zero(msb, SYS_WRITE);
 
-	printf("[RUN]\t   Checking some 64-bit only syscalls as 64 bit\n");
+	run("Checking some 64-bit only syscalls as 64 bit\n");
 	check_zero(msb, X64_READV);
 	check_zero(msb, X64_WRITEV);
 
-	printf("[RUN]\t   Checking out of range system calls\n");
+	run("Checking out of range system calls\n");
 	check_for(msb, -64, -1, -ENOSYS);
 	check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
 	check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
@@ -180,18 +220,18 @@ static void test_syscalls_with_x32(int msb)
 	 * set.  Calling them without the x32 bit set is
 	 * nonsense and should not work.
 	 */
-	printf("[RUN]\t   Checking x32 syscalls as 64 bit\n");
+	run("Checking x32 syscalls as 64 bit\n");
 	check_for(msb, 512, 547, -ENOSYS);
 
-	printf("[RUN]\t   Checking some common syscalls as x32\n");
+	run("Checking some common syscalls as x32\n");
 	check_zero(msb, SYS_READ   | X32_BIT);
 	check_zero(msb, SYS_WRITE  | X32_BIT);
 
-	printf("[RUN]\t   Checking some x32 syscalls as x32\n");
+	run("Checking some x32 syscalls as x32\n");
 	check_zero(msb, X32_READV  | X32_BIT);
 	check_zero(msb, X32_WRITEV | X32_BIT);
 
-	printf("[RUN]\t   Checking some 64-bit syscalls as x32\n");
+	run("Checking some 64-bit syscalls as x32\n");
 	check_enosys(msb, X64_IOCTL  | X32_BIT);
 	check_enosys(msb, X64_READV  | X32_BIT);
 	check_enosys(msb, X64_WRITEV | X32_BIT);
@@ -199,7 +239,7 @@ static void test_syscalls_with_x32(int msb)
 
 static void test_syscalls_without_x32(int msb)
 {
-	printf("[RUN]\t  Checking for absence of x32 system calls\n");
+	run("Checking for absence of x32 system calls\n");
 	check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
 }
 
@@ -217,14 +257,18 @@ static void test_syscall_numbering(void)
 	 */
 	for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
 		int msb = msbs[i];
-		printf("[RUN]\tChecking system calls with msb = %d (0x%x)\n",
-		       msb, msb);
+		run("Checking system calls with msb = %d (0x%x)\n",
+		    msb, msb);
+
+		indent++;
 
 		test_syscalls_common(msb);
 		if (with_x32)
 			test_syscalls_with_x32(msb);
 		else
 			test_syscalls_without_x32(msb);
+
+		indent--;
 	}
 }
 
@@ -241,19 +285,16 @@ int main(void)
 	 */
 	nullfd = open("/dev/null", O_RDWR);
 	if (nullfd < 0) {
-		printf("[FAIL]\tUnable to open /dev/null: %s\n",
-		       strerror(errno));
-		printf("[SKIP]\tCannot execute test\n");
-		return 71;	/* EX_OSERR */
+		crit("Unable to open /dev/null: %s\n", strerror(errno));
 	}
 
 	test_syscall_numbering();
 	if (!nerr) {
-		printf("[OK]\tAll system calls succeeded or failed as expected\n");
+		ok("All system calls succeeded or failed as expected\n");
 		return 0;
 	} else {
-		printf("[FAIL]\tA total of %u system call%s had incorrect behavior\n",
-		       nerr, nerr != 1 ? "s" : "");
+		fail("A total of %u system call%s had incorrect behavior\n",
+		     nerr, nerr != 1 ? "s" : "");
 		return 1;
 	}
 }
-- 
GitLab


From 795e2a023b8080b95442811f26f0762184116caa Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:13:00 -0700
Subject: [PATCH 1157/3804] selftests/x86/syscall: Add tests under ptrace to
 syscall_numbering_64

Add tests running under ptrace for syscall_numbering_64. ptrace stopping on
syscall entry and possibly modifying the syscall number (regs.orig_rax) or
the default return value (regs.rax) can have different results than the
normal system call path.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-4-hpa@zytor.com
---
 .../testing/selftests/x86/syscall_numbering.c | 232 ++++++++++++++++--
 1 file changed, 207 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index 434fe0efafa0c..991591718bb0a 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -16,8 +16,16 @@
 #include <string.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <signal.h>
 #include <sysexits.h>
 
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include <linux/ptrace.h>
+
 /* Common system call numbers */
 #define SYS_READ	  0
 #define SYS_WRITE	  1
@@ -33,13 +41,45 @@
 
 #define X32_BIT 0x40000000
 
-static unsigned int nerr = 0;	/* Cumulative error count */
 static int nullfd = -1;		/* File descriptor for /dev/null */
-static int indent = 0;
+static bool with_x32;		/* x32 supported on this kernel? */
+
+enum ptrace_pass {
+	PTP_NOTHING,
+	PTP_GETREGS,
+	PTP_WRITEBACK,
+	PTP_FUZZRET,
+	PTP_FUZZHIGH,
+	PTP_INTNUM,
+	PTP_DONE
+};
+
+static const char * const ptrace_pass_name[] =
+{
+	[PTP_NOTHING]	= "just stop, no data read",
+	[PTP_GETREGS]	= "only getregs",
+	[PTP_WRITEBACK]	= "getregs, unmodified setregs",
+	[PTP_FUZZRET]	= "modifying the default return",
+	[PTP_FUZZHIGH]	= "clobbering the top 32 bits",
+	[PTP_INTNUM]	= "sign-extending the syscall number",
+};
+
+/*
+ * Shared memory block between tracer and test
+ */
+struct shared {
+	unsigned int nerr;	/* Total error count */
+	unsigned int indent;	/* Message indentation level */
+	enum ptrace_pass ptrace_pass;
+	bool probing_syscall;	/* In probe_syscall() */
+};
+static volatile struct shared *sh;
 
 static inline unsigned int offset(void)
 {
-	return 8 + indent * 4;
+	unsigned int level = sh ? sh->indent : 0;
+
+	return 8 + level * 4;
 }
 
 #define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
@@ -52,16 +92,19 @@ static inline unsigned int offset(void)
 #define fail(fmt, ...)					\
 	do {						\
 		msg(FAIL, fmt, ## __VA_ARGS__);		\
-		nerr++;					\
-	} while (0)
+		sh->nerr++;				\
+       } while (0)
 
 #define crit(fmt, ...)					\
 	do {						\
-		indent = 0;				\
+		sh->indent = 0;				\
 		msg(FAIL, fmt, ## __VA_ARGS__);		\
 		msg(SKIP, "Unable to run test\n");	\
-		exit(EX_OSERR);
-	} while (0)
+		exit(EX_OSERR);				\
+       } while (0)
+
+/* Sentinel for ptrace-modified return value */
+#define MODIFIED_BY_PTRACE	-9999
 
 /*
  * Directly invokes the given syscall with nullfd as the first argument
@@ -69,7 +112,7 @@ static inline unsigned int offset(void)
  * end up intercepting some system calls for some reason, or modify
  * the system call number itself.
  */
-static inline long long probe_syscall(int msb, int lsb)
+static long long probe_syscall(int msb, int lsb)
 {
 	register long long arg1 asm("rdi") = nullfd;
 	register long long arg2 asm("rsi") = 0;
@@ -80,11 +123,21 @@ static inline long long probe_syscall(int msb, int lsb)
 	long long nr = ((long long)msb << 32) | (unsigned int)lsb;
 	long long ret;
 
+	/*
+	 * We pass in an extra copy of the extended system call number
+	 * in %rbx, so we can examine it from the ptrace handler without
+	 * worrying about it being possibly modified. This is to test
+	 * the validity of struct user regs.orig_rax a.k.a.
+	 * struct pt_regs.orig_ax.
+	 */
+	sh->probing_syscall = true;
 	asm volatile("syscall"
 		     : "=a" (ret)
-		     : "a" (nr), "r" (arg1), "r" (arg2), "r" (arg3),
+		     : "a" (nr), "b" (nr),
+		       "r" (arg1), "r" (arg2), "r" (arg3),
 		       "r" (arg4), "r" (arg5), "r" (arg6)
 		     : "rcx", "r11", "memory", "cc");
+	sh->probing_syscall = false;
 
 	return ret;
 }
@@ -119,9 +172,9 @@ static unsigned int _check_for(int msb, int start, int end, long long expect,
 {
 	unsigned int err = 0;
 
-	indent++;
+	sh->indent++;
 	if (start != end)
-		indent++;
+		sh->indent++;
 
 	for (int nr = start; nr <= end; nr++) {
 		long long ret = probe_syscall(msb, nr);
@@ -135,20 +188,19 @@ static unsigned int _check_for(int msb, int start, int end, long long expect,
 	}
 
 	if (start != end)
-		indent--;
+		sh->indent--;
 
 	if (err) {
-		nerr += err;
 		if (start != end)
 			fail("%s had %u failure%s\n",
-			       syscall_str(msb, start, end),
-			       err, err == 1 ? "s" : "");
+			     syscall_str(msb, start, end),
+			     err, err == 1 ? "s" : "");
 	} else {
 		ok("%s returned %s as expected\n",
 		   syscall_str(msb, start, end), expect_str);
 	}
 
-	indent--;
+	sh->indent--;
 
 	return err;
 }
@@ -175,12 +227,11 @@ static bool test_x32(void)
 {
 	long long ret;
 	pid_t mypid = getpid();
-	bool with_x32;
 
 	run("Checking for x32 by calling x32 getpid()\n");
 	ret = probe_syscall(0, SYS_GETPID | X32_BIT);
 
-	indent++;
+	sh->indent++;
 	if (ret == mypid) {
 		info("x32 is supported\n");
 		with_x32 = true;
@@ -188,15 +239,17 @@ static bool test_x32(void)
 		info("x32 is not supported\n");
 		with_x32 = false;
 	} else {
-		fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, mypid);
+		fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
 		with_x32 = false;
 	}
-	indent--;
+	sh->indent--;
 	return with_x32;
 }
 
 static void test_syscalls_common(int msb)
 {
+	enum ptrace_pass pass = sh->ptrace_pass;
+
 	run("Checking some common syscalls as 64 bit\n");
 	check_zero(msb, SYS_READ);
 	check_zero(msb, SYS_WRITE);
@@ -206,7 +259,11 @@ static void test_syscalls_common(int msb)
 	check_zero(msb, X64_WRITEV);
 
 	run("Checking out of range system calls\n");
-	check_for(msb, -64, -1, -ENOSYS);
+	check_for(msb, -64, -2, -ENOSYS);
+	if (pass >= PTP_FUZZRET)
+		check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
+	else
+		check_for(msb, -1, -1, -ENOSYS);
 	check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
 	check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
 	check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
@@ -249,7 +306,8 @@ static void test_syscall_numbering(void)
 		0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
 		INT_MIN, INT_MIN+1
 	};
-	bool with_x32 = test_x32();
+
+	sh->indent++;
 
 	/*
 	 * The MSB is supposed to be ignored, so we loop over a few
@@ -260,7 +318,7 @@ static void test_syscall_numbering(void)
 		run("Checking system calls with msb = %d (0x%x)\n",
 		    msb, msb);
 
-		indent++;
+		sh->indent++;
 
 		test_syscalls_common(msb);
 		if (with_x32)
@@ -268,12 +326,119 @@ static void test_syscall_numbering(void)
 		else
 			test_syscalls_without_x32(msb);
 
-		indent--;
+		sh->indent--;
+	}
+
+	sh->indent--;
+}
+
+static void syscall_numbering_tracee(void)
+{
+	enum ptrace_pass pass;
+
+	if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+		crit("Failed to request tracing\n");
+		return;
+	}
+	raise(SIGSTOP);
+
+	for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
+	     sh->ptrace_pass = ++pass) {
+		run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
+		test_syscall_numbering();
+	}
+}
+
+static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
+{
+	struct user_regs_struct regs;
+
+	sh->probing_syscall = false; /* Do this on entry only */
+
+	/* For these, don't even getregs */
+	if (pass == PTP_NOTHING || pass == PTP_DONE)
+		return;
+
+	ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
+
+	if (regs.orig_rax != regs.rbx) {
+		fail("orig_rax %#llx doesn't match syscall number %#llx\n",
+		     (unsigned long long)regs.orig_rax,
+		     (unsigned long long)regs.rbx);
+	}
+
+	switch (pass) {
+	case PTP_GETREGS:
+		/* Just read, no writeback */
+		return;
+	case PTP_WRITEBACK:
+		/* Write back the same register state verbatim */
+		break;
+	case PTP_FUZZRET:
+		regs.rax = MODIFIED_BY_PTRACE;
+		break;
+	case PTP_FUZZHIGH:
+		regs.rax = MODIFIED_BY_PTRACE;
+		regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
+		break;
+	case PTP_INTNUM:
+		regs.rax = MODIFIED_BY_PTRACE;
+		regs.orig_rax = (int)regs.orig_rax;
+		break;
+	default:
+		crit("invalid ptrace_pass\n");
+		break;
+	}
+
+	ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
+}
+
+static void syscall_numbering_tracer(pid_t testpid)
+{
+	int wstatus;
+
+	do {
+		pid_t wpid = waitpid(testpid, &wstatus, 0);
+		if (wpid < 0 && errno != EINTR)
+			break;
+		if (wpid != testpid)
+			continue;
+		if (!WIFSTOPPED(wstatus))
+			break;	/* Thread exited? */
+
+		if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
+			mess_with_syscall(testpid, sh->ptrace_pass);
+	} while (sh->ptrace_pass != PTP_DONE &&
+		 !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
+
+	ptrace(PTRACE_DETACH, testpid, NULL, NULL);
+
+	/* Wait for the child process to terminate */
+	while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
+		/* wait some more */;
+}
+
+static void test_traced_syscall_numbering(void)
+{
+	pid_t testpid;
+
+	/* Launch the test thread; this thread continues as the tracer thread */
+	testpid = fork();
+
+	if (testpid < 0) {
+		crit("Unable to launch tracer process\n");
+	} else if (testpid == 0) {
+		syscall_numbering_tracee();
+		_exit(0);
+	} else {
+		syscall_numbering_tracer(testpid);
 	}
 }
 
 int main(void)
 {
+	unsigned int nerr;
+
 	/*
 	 * It is quite likely to get a segfault on a failure, so make
 	 * sure the message gets out by setting stdout to nonbuffered.
@@ -288,7 +453,24 @@ int main(void)
 		crit("Unable to open /dev/null: %s\n", strerror(errno));
 	}
 
+	/*
+	 * Set up a block of shared memory...
+	 */
+	sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
+		  MAP_ANONYMOUS|MAP_SHARED, 0, 0);
+	if (sh == MAP_FAILED) {
+		crit("Unable to allocated shared memory block: %s\n",
+		     strerror(errno));
+	}
+
+	with_x32 = test_x32();
+
+	run("Running tests without ptrace...\n");
 	test_syscall_numbering();
+
+	test_traced_syscall_numbering();
+
+	nerr = sh->nerr;
 	if (!nerr) {
 		ok("All system calls succeeded or failed as expected\n");
 		return 0;
-- 
GitLab


From 0595494891723a1dcca5eaa8eeca8ab54ad953b9 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:13:01 -0700
Subject: [PATCH 1158/3804] x86/entry/64: Sign-extend system calls on entry to
 int

Right now, *some* code will treat e.g. 0x0000000100000001 as a system
call and some will not. Some of the code, notably in ptrace, will
treat 0x000000018000000 as a system call and some will not. Finally,
right now, e.g. 335 for x86-64 will force the exit code to be set to
-ENOSYS even if poked by ptrace, but 548 will not, because there is an
observable difference between an out of range system call and a system
call number that falls outside the range of the table.

This is visible to the user: for example, the syscall_numbering_64
test fails if run under strace, because as strace uses ptrace, it ends
up clobbering the upper half of the 64-bit system call number.

The architecture independent code all assumes that a system call is "int"
that the value -1 specifically and not just any negative value is used for
a non-system call. This is the case on x86 as well when arch-independent
code is involved. The arch-independent API is defined/documented (but not
*implemented*!) in <asm-generic/syscall.h>.

This is an ABI change, but is in fact a revert to the original x86-64
ABI. The original assembly entry code would zero-extend the system call
number;

Use sign extend to be explicit that this is treated as a signed number
(although in practice it makes no difference, of course) and to avoid
people getting the idea of "optimizing" it, as has happened on at least
two(!) separate occasions.

Do not store the extended value into regs->orig_ax, however: on x86-64, the
ABI is that the callee is responsible for extending parameters, so only
examining the lower 32 bits is fully consistent with any "int" argument to
any system call, e.g. regs->di for write(2). The full value of %rax on
entry to the kernel is thus still available.

[ tglx: Add a comment to the ASM code ]

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-5-hpa@zytor.com
---
 arch/x86/entry/entry_64.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1d9db15fdc692..a5f02d03c585a 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -108,7 +108,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
 
 	/* IRQs are off. */
 	movq	%rsp, %rdi
-	movq	%rax, %rsi
+	/* Sign extend the lower 32bit as syscall numbers are treated as int */
+	movslq	%eax, %rsi
 	call	do_syscall_64		/* returns with IRQs disabled */
 
 	/*
-- 
GitLab


From b337b4965e3a3e567f11828a9e3fe3fb3faefa47 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:13:02 -0700
Subject: [PATCH 1159/3804] x86/entry: Treat out of range and gap system calls
 the same

The current 64-bit system call entry code treats out-of-range system
calls differently than system calls that map to a hole in the system
call table.

This is visible to the user if system calls are intercepted via ptrace or
seccomp and the return value (regs->ax) is modified: in the former case,
the return value is preserved, and in the latter case, sys_ni_syscall() is
called and the return value is forced to -ENOSYS.

The API spec in <asm-generic/syscalls.h> is very clear that only
(int)-1 is the non-system-call sentinel value, so make the system call
behavior consistent by calling sys_ni_syscall() for all invalid system
call numbers except for -1.

Although currently sys_ni_syscall() simply returns -ENOSYS, calling it
explicitly is friendly for tracing and future possible extensions, and
as this is an error path there is no reason to optimize it.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-6-hpa@zytor.com
---
 arch/x86/entry/common.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 00da0f5420de8..f51bc17262db1 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -52,6 +52,8 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, unsigned long nr)
 					X32_NR_syscalls);
 		regs->ax = x32_sys_call_table[nr](regs);
 #endif
+	} else if (unlikely((int)nr != -1)) {
+		regs->ax = __x64_sys_ni_syscall(regs);
 	}
 	instrumentation_end();
 	syscall_exit_to_user_mode(regs);
@@ -76,6 +78,8 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
 	if (likely(nr < IA32_NR_syscalls)) {
 		nr = array_index_nospec(nr, IA32_NR_syscalls);
 		regs->ax = ia32_sys_call_table[nr](regs);
+	} else if (unlikely((int)nr != -1)) {
+		regs->ax = __ia32_sys_ni_syscall(regs);
 	}
 }
 
-- 
GitLab


From ba5ef6dc8a827a904794210a227cdb94828e8ae7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 20 May 2021 13:21:20 +0100
Subject: [PATCH 1160/3804] io_uring: fortify tctx/io_wq cleanup

We don't want anyone poking into tctx->io_wq awhile it's being destroyed
by io_wq_put_and_exit(), and even though it shouldn't even happen, if
buggy would be preferable to get a NULL-deref instead of subtle delayed
failure or UAF.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/827b021de17926fd807610b3e53a5a5fa8530856.1621513214.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 89ec10471b30b..5f82954004f61 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9035,15 +9035,15 @@ static void io_uring_del_task_file(unsigned long index)
 
 static void io_uring_clean_tctx(struct io_uring_task *tctx)
 {
+	struct io_wq *wq = tctx->io_wq;
 	struct io_tctx_node *node;
 	unsigned long index;
 
+	tctx->io_wq = NULL;
 	xa_for_each(&tctx->xa, index, node)
 		io_uring_del_task_file(index);
-	if (tctx->io_wq) {
-		io_wq_put_and_exit(tctx->io_wq);
-		tctx->io_wq = NULL;
-	}
+	if (wq)
+		io_wq_put_and_exit(wq);
 }
 
 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
-- 
GitLab


From e380adfc213a13677993c0e35cb48f5a8e61ebb0 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 19 May 2021 00:40:27 +0900
Subject: [PATCH 1161/3804] btrfs: zoned: pass start block to
 btrfs_use_zone_append

btrfs_use_zone_append only needs the passed in extent_map's block_start
member, so there's no need to pass in the full extent map.

This also enables the use of btrfs_use_zone_append in places where we only
have a start byte but no extent_map.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 2 +-
 fs/btrfs/inode.c     | 2 +-
 fs/btrfs/zoned.c     | 4 ++--
 fs/btrfs/zoned.h     | 5 ++---
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 360d997c72263..d9f20ca3ac7db 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3762,7 +3762,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		/* Note that em_end from extent_map_end() is exclusive */
 		iosize = min(em_end, end + 1) - cur;
 
-		if (btrfs_use_zone_append(inode, em))
+		if (btrfs_use_zone_append(inode, em->block_start))
 			opf = REQ_OP_ZONE_APPEND;
 
 		free_extent_map(em);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 095e452f59f0f..bb4ab408d6701 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7796,7 +7796,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
 	iomap->bdev = fs_info->fs_devices->latest_bdev;
 	iomap->length = len;
 
-	if (write && btrfs_use_zone_append(BTRFS_I(inode), em))
+	if (write && btrfs_use_zone_append(BTRFS_I(inode), em->block_start))
 		iomap->flags |= IOMAP_F_ZONE_APPEND;
 
 	free_extent_map(em);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 304ce64c70a44..1bb8ee97aae09 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1278,7 +1278,7 @@ void btrfs_free_redirty_list(struct btrfs_transaction *trans)
 	spin_unlock(&trans->releasing_ebs_lock);
 }
 
-bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em)
+bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_block_group *cache;
@@ -1293,7 +1293,7 @@ bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em)
 	if (!is_data_inode(&inode->vfs_inode))
 		return false;
 
-	cache = btrfs_lookup_block_group(fs_info, em->block_start);
+	cache = btrfs_lookup_block_group(fs_info, start);
 	ASSERT(cache);
 	if (!cache)
 		return false;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 5e41a74a9cb24..e55d32595c2c0 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -53,7 +53,7 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
 void btrfs_redirty_list_add(struct btrfs_transaction *trans,
 			    struct extent_buffer *eb);
 void btrfs_free_redirty_list(struct btrfs_transaction *trans);
-bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em);
+bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start);
 void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
 				 struct bio *bio);
 void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered);
@@ -152,8 +152,7 @@ static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
 					  struct extent_buffer *eb) { }
 static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
 
-static inline bool btrfs_use_zone_append(struct btrfs_inode *inode,
-					 struct extent_map *em)
+static inline bool btrfs_use_zone_append(struct btrfs_inode *inode, u64 start)
 {
 	return false;
 }
-- 
GitLab


From 764c7c9a464b68f7c6a5a9ec0b923176a05e8e8f Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 19 May 2021 00:40:28 +0900
Subject: [PATCH 1162/3804] btrfs: zoned: fix parallel compressed writes

When multiple processes write data to the same block group on a
compressed zoned filesystem, the underlying device could report I/O
errors and data corruption is possible.

This happens because on a zoned file system, compressed data writes
where sent to the device via a REQ_OP_WRITE instead of a
REQ_OP_ZONE_APPEND operation. But with REQ_OP_WRITE and parallel
submission it cannot be guaranteed that the data is always submitted
aligned to the underlying zone's write pointer.

The change to using REQ_OP_ZONE_APPEND instead of REQ_OP_WRITE on a
zoned filesystem is non intrusive on a regular file system or when
submitting to a conventional zone on a zoned filesystem, as it is
guarded by btrfs_use_zone_append.

Reported-by: David Sterba <dsterba@suse.com>
Fixes: 9d294a685fbc ("btrfs: zoned: enable to mount ZONED incompat flag")
CC: stable@vger.kernel.org # 5.12.x: e380adfc213a13: btrfs: zoned: pass start block to btrfs_use_zone_append
CC: stable@vger.kernel.org # 5.12.x
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 17f93fd28f7e3..91743a0b34c51 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -28,6 +28,7 @@
 #include "compression.h"
 #include "extent_io.h"
 #include "extent_map.h"
+#include "zoned.h"
 
 static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
 
@@ -349,6 +350,7 @@ static void end_compressed_bio_write(struct bio *bio)
 	 */
 	inode = cb->inode;
 	cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
+	btrfs_record_physical_zoned(inode, cb->start, bio);
 	btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
 			cb->start, cb->start + cb->len - 1,
 			bio->bi_status == BLK_STS_OK);
@@ -401,6 +403,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	u64 first_byte = disk_start;
 	blk_status_t ret;
 	int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
+	const bool use_append = btrfs_use_zone_append(inode, disk_start);
+	const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
 
 	WARN_ON(!PAGE_ALIGNED(start));
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
@@ -418,10 +422,31 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	cb->nr_pages = nr_pages;
 
 	bio = btrfs_bio_alloc(first_byte);
-	bio->bi_opf = REQ_OP_WRITE | write_flags;
+	bio->bi_opf = bio_op | write_flags;
 	bio->bi_private = cb;
 	bio->bi_end_io = end_compressed_bio_write;
 
+	if (use_append) {
+		struct extent_map *em;
+		struct map_lookup *map;
+		struct block_device *bdev;
+
+		em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE);
+		if (IS_ERR(em)) {
+			kfree(cb);
+			bio_put(bio);
+			return BLK_STS_NOTSUPP;
+		}
+
+		map = em->map_lookup;
+		/* We only support single profile for now */
+		ASSERT(map->num_stripes == 1);
+		bdev = map->stripes[0].dev->bdev;
+
+		bio_set_dev(bio, bdev);
+		free_extent_map(em);
+	}
+
 	if (blkcg_css) {
 		bio->bi_opf |= REQ_CGROUP_PUNT;
 		kthread_associate_blkcg(blkcg_css);
@@ -432,6 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	bytes_left = compressed_len;
 	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
 		int submit = 0;
+		int len;
 
 		page = compressed_pages[pg_index];
 		page->mapping = inode->vfs_inode.i_mapping;
@@ -439,9 +465,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 			submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
 							  0);
 
+		if (pg_index == 0 && use_append)
+			len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
+		else
+			len = bio_add_page(bio, page, PAGE_SIZE, 0);
+
 		page->mapping = NULL;
-		if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
-		    PAGE_SIZE) {
+		if (submit || len < PAGE_SIZE) {
 			/*
 			 * inc the count before we submit the bio so
 			 * we know the end IO handler won't happen before
@@ -465,11 +495,15 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 			}
 
 			bio = btrfs_bio_alloc(first_byte);
-			bio->bi_opf = REQ_OP_WRITE | write_flags;
+			bio->bi_opf = bio_op | write_flags;
 			bio->bi_private = cb;
 			bio->bi_end_io = end_compressed_bio_write;
 			if (blkcg_css)
 				bio->bi_opf |= REQ_CGROUP_PUNT;
+			/*
+			 * Use bio_add_page() to ensure the bio has at least one
+			 * page.
+			 */
 			bio_add_page(bio, page, PAGE_SIZE, 0);
 		}
 		if (bytes_left < PAGE_SIZE) {
-- 
GitLab


From 6c60ff048ca1e0739f39aa25996543c6e662a46c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 14 May 2021 15:18:41 +0200
Subject: [PATCH 1163/3804] block: prevent block device lookups at the
 beginning of del_gendisk

As an artifact of how gendisk lookup used to work in earlier kernels,
GENHD_FL_UP is only cleared very late in del_gendisk, and a global lock
is used to prevent opens from succeeding while del_gendisk is tearing
down the gendisk.  Switch to clearing the flag early and under bd_mutex
so that callers can use bd_mutex to stabilize the flag, which removes
the need for the global mutex.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210514131842.1600568-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c         | 11 +----------
 fs/block_dev.c        | 15 +++++----------
 include/linux/genhd.h |  2 --
 3 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 39ca97b0edc61..9f8cb7beaad11 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -29,8 +29,6 @@
 
 static struct kobject *block_depr;
 
-DECLARE_RWSEM(bdev_lookup_sem);
-
 /* for extended dynamic devt allocation, currently only one major is used */
 #define NR_EXT_DEVT		(1 << MINORBITS)
 static DEFINE_IDA(ext_devt_ida);
@@ -609,13 +607,8 @@ void del_gendisk(struct gendisk *disk)
 	blk_integrity_del(disk);
 	disk_del_events(disk);
 
-	/*
-	 * Block lookups of the disk until all bdevs are unhashed and the
-	 * disk is marked as dead (GENHD_FL_UP cleared).
-	 */
-	down_write(&bdev_lookup_sem);
-
 	mutex_lock(&disk->part0->bd_mutex);
+	disk->flags &= ~GENHD_FL_UP;
 	blk_drop_partitions(disk);
 	mutex_unlock(&disk->part0->bd_mutex);
 
@@ -629,8 +622,6 @@ void del_gendisk(struct gendisk *disk)
 	remove_inode_hash(disk->part0->bd_inode);
 
 	set_capacity(disk, 0);
-	disk->flags &= ~GENHD_FL_UP;
-	up_write(&bdev_lookup_sem);
 
 	if (!(disk->flags & GENHD_FL_HIDDEN)) {
 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
diff --git a/fs/block_dev.c b/fs/block_dev.c
index eb265d72fce89..580bae995b879 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1298,6 +1298,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
 	struct gendisk *disk = bdev->bd_disk;
 	int ret = 0;
 
+	if (!(disk->flags & GENHD_FL_UP))
+		return -ENXIO;
+
 	if (!bdev->bd_openers) {
 		if (!bdev_is_partition(bdev)) {
 			ret = 0;
@@ -1332,8 +1335,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
 			whole->bd_part_count++;
 			mutex_unlock(&whole->bd_mutex);
 
-			if (!(disk->flags & GENHD_FL_UP) ||
-			    !bdev_nr_sectors(bdev)) {
+			if (!bdev_nr_sectors(bdev)) {
 				__blkdev_put(whole, mode, 1);
 				bdput(whole);
 				return -ENXIO;
@@ -1364,16 +1366,12 @@ struct block_device *blkdev_get_no_open(dev_t dev)
 	struct block_device *bdev;
 	struct gendisk *disk;
 
-	down_read(&bdev_lookup_sem);
 	bdev = bdget(dev);
 	if (!bdev) {
-		up_read(&bdev_lookup_sem);
 		blk_request_module(dev);
-		down_read(&bdev_lookup_sem);
-
 		bdev = bdget(dev);
 		if (!bdev)
-			goto unlock;
+			return NULL;
 	}
 
 	disk = bdev->bd_disk;
@@ -1383,14 +1381,11 @@ struct block_device *blkdev_get_no_open(dev_t dev)
 		goto put_disk;
 	if (!try_module_get(bdev->bd_disk->fops->owner))
 		goto put_disk;
-	up_read(&bdev_lookup_sem);
 	return bdev;
 put_disk:
 	put_disk(disk);
 bdput:
 	bdput(bdev);
-unlock:
-	up_read(&bdev_lookup_sem);
 	return NULL;
 }
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7e9660ea967d5..6fc26f7bdf71e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -306,8 +306,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
 }
 #endif /* CONFIG_SYSFS */
 
-extern struct rw_semaphore bdev_lookup_sem;
-
 dev_t blk_lookup_devt(const char *name, int partno);
 void blk_request_module(dev_t devt);
 #ifdef CONFIG_BLOCK
-- 
GitLab


From bc6a385132601c29a6da1dbf8148c0d3c9ad36dc Mon Sep 17 00:00:00 2001
From: Gulam Mohamed <gulam.mohamed@oracle.com>
Date: Fri, 14 May 2021 15:18:42 +0200
Subject: [PATCH 1164/3804] block: fix a race between del_gendisk and BLKRRPART

When BLKRRPART is called concurrently with del_gendisk, the partitions
rescan can create a stale partition that will never be be cleaned up.

Fix this by checking the the disk is up before rescanning partitions
while under bd_mutex.

Signed-off-by: Gulam Mohamed <gulam.mohamed@oracle.com>
[hch: split from a larger patch]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210514131842.1600568-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 580bae995b879..4494411fa4d3e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1244,6 +1244,9 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 
 	lockdep_assert_held(&bdev->bd_mutex);
 
+	if (!(disk->flags & GENHD_FL_UP))
+		return -ENXIO;
+
 rescan:
 	if (bdev->bd_part_count)
 		return -EBUSY;
-- 
GitLab


From c311f53362b05a82b47d1ed9dcfeb4637063b8d7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:19 +0200
Subject: [PATCH 1165/3804] media: rcar-fcp: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

As a bonus, pm_runtime_resume_and_get() always return 0 on success.
So, the code can be simplified.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rcar-fcp.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/media/platform/rcar-fcp.c b/drivers/media/platform/rcar-fcp.c
index 5c03318ae07b7..eb59a3ba6d0fe 100644
--- a/drivers/media/platform/rcar-fcp.c
+++ b/drivers/media/platform/rcar-fcp.c
@@ -96,18 +96,10 @@ EXPORT_SYMBOL_GPL(rcar_fcp_get_device);
  */
 int rcar_fcp_enable(struct rcar_fcp_device *fcp)
 {
-	int ret;
-
 	if (!fcp)
 		return 0;
 
-	ret = pm_runtime_get_sync(fcp->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(fcp->dev);
-		return ret;
-	}
-
-	return 0;
+	return pm_runtime_resume_and_get(fcp->dev);
 }
 EXPORT_SYMBOL_GPL(rcar_fcp_enable);
 
-- 
GitLab


From 8102cf89ecd594572433ffb9103ce104ae57927e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:20 +0200
Subject: [PATCH 1166/3804] media: rkisp1: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
index 9643bdd05b7b2..60cd2200e7aea 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-capture.c
@@ -1003,9 +1003,8 @@ rkisp1_vb2_start_streaming(struct vb2_queue *queue, unsigned int count)
 	if (ret)
 		goto err_pipeline_stop;
 
-	ret = pm_runtime_get_sync(cap->rkisp1->dev);
+	ret = pm_runtime_resume_and_get(cap->rkisp1->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(cap->rkisp1->dev);
 		dev_err(cap->rkisp1->dev, "power up failed %d\n", ret);
 		goto err_destroy_dummy;
 	}
-- 
GitLab


From a8779c2752f27f045f60bb191e257528374dc45c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:20 +0200
Subject: [PATCH 1167/3804] media: s3c-camif: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Sylwester Nawrocki <snawrocki@kernel.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/s3c-camif/camif-capture.c | 2 +-
 drivers/media/platform/s3c-camif/camif-core.c    | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/s3c-camif/camif-capture.c b/drivers/media/platform/s3c-camif/camif-capture.c
index 9ca49af29542d..62241ec3b978d 100644
--- a/drivers/media/platform/s3c-camif/camif-capture.c
+++ b/drivers/media/platform/s3c-camif/camif-capture.c
@@ -547,7 +547,7 @@ static int s3c_camif_open(struct file *file)
 	if (ret < 0)
 		goto unlock;
 
-	ret = pm_runtime_get_sync(camif->dev);
+	ret = pm_runtime_resume_and_get(camif->dev);
 	if (ret < 0)
 		goto err_pm;
 
diff --git a/drivers/media/platform/s3c-camif/camif-core.c b/drivers/media/platform/s3c-camif/camif-core.c
index 4c3c00d59c920..e1d51fd3e7008 100644
--- a/drivers/media/platform/s3c-camif/camif-core.c
+++ b/drivers/media/platform/s3c-camif/camif-core.c
@@ -460,9 +460,9 @@ static int s3c_camif_probe(struct platform_device *pdev)
 
 	pm_runtime_enable(dev);
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
-		goto err_pm;
+		goto err_disable;
 
 	ret = camif_media_dev_init(camif);
 	if (ret < 0)
@@ -502,6 +502,7 @@ err_sens:
 	camif_unregister_media_entities(camif);
 err_pm:
 	pm_runtime_put(dev);
+err_disable:
 	pm_runtime_disable(dev);
 	camif_clk_put(camif);
 err_clk:
-- 
GitLab


From 67b92f54186c0bf17038833cc376adaf133a4b6a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:20 +0200
Subject: [PATCH 1168/3804] media: s5p-mfc: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/s5p-mfc/s5p_mfc_pm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_pm.c b/drivers/media/platform/s5p-mfc/s5p_mfc_pm.c
index 62d2320a72186..88b7d33c91973 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_pm.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_pm.c
@@ -78,11 +78,9 @@ int s5p_mfc_power_on(void)
 {
 	int i, ret = 0;
 
-	ret = pm_runtime_get_sync(pm->device);
-	if (ret < 0) {
-		pm_runtime_put_noidle(pm->device);
+	ret = pm_runtime_resume_and_get(pm->device);
+	if (ret < 0)
 		return ret;
-	}
 
 	/* clock control */
 	for (i = 0; i < pm->num_clocks; i++) {
-- 
GitLab


From 75c573eb704178051637fdcd980e2850e0932080 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:21 +0200
Subject: [PATCH 1169/3804] media: stm32: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/stm32/stm32-dcmi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/stm32/stm32-dcmi.c b/drivers/media/platform/stm32/stm32-dcmi.c
index d9b4ad0abf0c0..b33c6e7ae0a1b 100644
--- a/drivers/media/platform/stm32/stm32-dcmi.c
+++ b/drivers/media/platform/stm32/stm32-dcmi.c
@@ -723,11 +723,11 @@ static int dcmi_start_streaming(struct vb2_queue *vq, unsigned int count)
 	u32 val = 0;
 	int ret;
 
-	ret = pm_runtime_get_sync(dcmi->dev);
+	ret = pm_runtime_resume_and_get(dcmi->dev);
 	if (ret < 0) {
 		dev_err(dcmi->dev, "%s: Failed to start streaming, cannot get sync (%d)\n",
 			__func__, ret);
-		goto err_pm_put;
+		goto err_unlocked;
 	}
 
 	ret = media_pipeline_start(&dcmi->vdev->entity, &dcmi->pipeline);
@@ -848,6 +848,7 @@ err_media_pipeline_stop:
 
 err_pm_put:
 	pm_runtime_put(dcmi->dev);
+err_unlocked:
 	spin_lock_irq(&dcmi->irqlock);
 	/*
 	 * Return all buffers to vb2 in QUEUED state.
-- 
GitLab


From 79e790ff0bc5192f874cc587c462825556133d1c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:21 +0200
Subject: [PATCH 1170/3804] media: sunxi: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sunxi/sun4i-csi/sun4i_v4l2.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/sunxi/sun4i-csi/sun4i_v4l2.c b/drivers/media/platform/sunxi/sun4i-csi/sun4i_v4l2.c
index 4785faddf6302..54b909987caa5 100644
--- a/drivers/media/platform/sunxi/sun4i-csi/sun4i_v4l2.c
+++ b/drivers/media/platform/sunxi/sun4i-csi/sun4i_v4l2.c
@@ -206,9 +206,9 @@ static int sun4i_csi_open(struct file *file)
 	if (ret)
 		return ret;
 
-	ret = pm_runtime_get_sync(csi->dev);
+	ret = pm_runtime_resume_and_get(csi->dev);
 	if (ret < 0)
-		goto err_pm_put;
+		goto err_unlock;
 
 	ret = v4l2_pipeline_pm_get(&csi->vdev.entity);
 	if (ret)
@@ -227,6 +227,8 @@ err_pipeline_pm_put:
 
 err_pm_put:
 	pm_runtime_put(csi->dev);
+
+err_unlock:
 	mutex_unlock(&csi->lock);
 
 	return ret;
-- 
GitLab


From 588bc430133c912f0ff39c375eae9baa81978d1e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:21 +0200
Subject: [PATCH 1171/3804] media: ti-vpe: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/ti-vpe/cal-video.c | 4 +++-
 drivers/media/platform/ti-vpe/cal.c       | 8 +++++---
 drivers/media/platform/ti-vpe/vpe.c       | 8 +++-----
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/media/platform/ti-vpe/cal-video.c b/drivers/media/platform/ti-vpe/cal-video.c
index 7b7436a355ee3..15fb5360cf13c 100644
--- a/drivers/media/platform/ti-vpe/cal-video.c
+++ b/drivers/media/platform/ti-vpe/cal-video.c
@@ -700,7 +700,9 @@ static int cal_start_streaming(struct vb2_queue *vq, unsigned int count)
 
 	addr = vb2_dma_contig_plane_dma_addr(&buf->vb.vb2_buf, 0);
 
-	pm_runtime_get_sync(ctx->cal->dev);
+	ret = pm_runtime_resume_and_get(ctx->cal->dev);
+	if (ret < 0)
+		goto error_pipeline;
 
 	cal_ctx_set_dma_addr(ctx, addr);
 	cal_ctx_start(ctx);
diff --git a/drivers/media/platform/ti-vpe/cal.c b/drivers/media/platform/ti-vpe/cal.c
index 2e2bef91b2b06..76fe7a8b33f6b 100644
--- a/drivers/media/platform/ti-vpe/cal.c
+++ b/drivers/media/platform/ti-vpe/cal.c
@@ -1024,7 +1024,7 @@ static int cal_probe(struct platform_device *pdev)
 
 	/* Read the revision and hardware info to verify hardware access. */
 	pm_runtime_enable(&pdev->dev);
-	ret = pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
 	if (ret)
 		goto error_pm_runtime;
 
@@ -1098,10 +1098,11 @@ static int cal_remove(struct platform_device *pdev)
 {
 	struct cal_dev *cal = platform_get_drvdata(pdev);
 	unsigned int i;
+	int ret;
 
 	cal_dbg(1, cal, "Removing %s\n", CAL_MODULE_NAME);
 
-	pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
 
 	cal_media_unregister(cal);
 
@@ -1115,7 +1116,8 @@ static int cal_remove(struct platform_device *pdev)
 	for (i = 0; i < cal->data->num_csi2_phy; i++)
 		cal_camerarx_destroy(cal->phy[i]);
 
-	pm_runtime_put_sync(&pdev->dev);
+	if (ret >= 0)
+		pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
 	return 0;
diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c
index 10251b787674a..5b1c5d96a4079 100644
--- a/drivers/media/platform/ti-vpe/vpe.c
+++ b/drivers/media/platform/ti-vpe/vpe.c
@@ -2471,11 +2471,9 @@ static int vpe_runtime_get(struct platform_device *pdev)
 
 	dev_dbg(&pdev->dev, "vpe_runtime_get\n");
 
-	r = pm_runtime_get_sync(&pdev->dev);
+	r = pm_runtime_resume_and_get(&pdev->dev);
 	WARN_ON(r < 0);
-	if (r)
-		pm_runtime_put_noidle(&pdev->dev);
-	return r < 0 ? r : 0;
+	return r;
 }
 
 static void vpe_runtime_put(struct platform_device *pdev)
@@ -2580,7 +2578,7 @@ static int vpe_probe(struct platform_device *pdev)
 	pm_runtime_enable(&pdev->dev);
 
 	ret = vpe_runtime_get(pdev);
-	if (ret)
+	if (ret < 0)
 		goto rel_m2m;
 
 	/* Perform clk enable followed by reset */
-- 
GitLab


From 71aeaedc968343c644b3c073b715b24b909ef088 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:21 +0200
Subject: [PATCH 1172/3804] media: vsp1: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

As a bonus, pm_runtime_resume_and_get() always return 0 on success.
So, the code can be simplified.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/vsp1/vsp1_drv.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/media/platform/vsp1/vsp1_drv.c b/drivers/media/platform/vsp1/vsp1_drv.c
index aa66e4f5f3f34..de442d6c99269 100644
--- a/drivers/media/platform/vsp1/vsp1_drv.c
+++ b/drivers/media/platform/vsp1/vsp1_drv.c
@@ -559,15 +559,7 @@ static int vsp1_device_init(struct vsp1_device *vsp1)
  */
 int vsp1_device_get(struct vsp1_device *vsp1)
 {
-	int ret;
-
-	ret = pm_runtime_get_sync(vsp1->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(vsp1->dev);
-		return ret;
-	}
-
-	return 0;
+	return pm_runtime_resume_and_get(vsp1->dev);
 }
 
 /*
-- 
GitLab


From 334fe327a8b4fdebea14af478f7f6185d45e566b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 23 Apr 2021 17:19:20 +0200
Subject: [PATCH 1173/3804] media: rcar-vin: use pm_runtime_resume_and_get()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

Use the new API, in order to cleanup the error check logic.

Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rcar-vin/rcar-csi2.c | 15 ++++++++++++---
 drivers/media/platform/rcar-vin/rcar-dma.c  |  6 ++----
 drivers/media/platform/rcar-vin/rcar-v4l2.c |  6 ++----
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/media/platform/rcar-vin/rcar-csi2.c b/drivers/media/platform/rcar-vin/rcar-csi2.c
index e06cd512aba20..99bf814eb2a78 100644
--- a/drivers/media/platform/rcar-vin/rcar-csi2.c
+++ b/drivers/media/platform/rcar-vin/rcar-csi2.c
@@ -406,10 +406,17 @@ static void rcsi2_enter_standby(struct rcar_csi2 *priv)
 	pm_runtime_put(priv->dev);
 }
 
-static void rcsi2_exit_standby(struct rcar_csi2 *priv)
+static int rcsi2_exit_standby(struct rcar_csi2 *priv)
 {
-	pm_runtime_get_sync(priv->dev);
+	int ret;
+
+	ret = pm_runtime_resume_and_get(priv->dev);
+	if (ret < 0)
+		return ret;
+
 	reset_control_deassert(priv->rstc);
+
+	return 0;
 }
 
 static int rcsi2_wait_phy_start(struct rcar_csi2 *priv,
@@ -657,7 +664,9 @@ static int rcsi2_start(struct rcar_csi2 *priv)
 {
 	int ret;
 
-	rcsi2_exit_standby(priv);
+	ret = rcsi2_exit_standby(priv);
+	if (ret < 0)
+		return ret;
 
 	ret = rcsi2_start_receiver(priv);
 	if (ret) {
diff --git a/drivers/media/platform/rcar-vin/rcar-dma.c b/drivers/media/platform/rcar-vin/rcar-dma.c
index f30dafbdf61ca..f5f722ab1d4e8 100644
--- a/drivers/media/platform/rcar-vin/rcar-dma.c
+++ b/drivers/media/platform/rcar-vin/rcar-dma.c
@@ -1458,11 +1458,9 @@ int rvin_set_channel_routing(struct rvin_dev *vin, u8 chsel)
 	u32 vnmc;
 	int ret;
 
-	ret = pm_runtime_get_sync(vin->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(vin->dev);
+	ret = pm_runtime_resume_and_get(vin->dev);
+	if (ret < 0)
 		return ret;
-	}
 
 	/* Make register writes take effect immediately. */
 	vnmc = rvin_read(vin, VNMC_REG);
diff --git a/drivers/media/platform/rcar-vin/rcar-v4l2.c b/drivers/media/platform/rcar-vin/rcar-v4l2.c
index 457a65bf6b664..b1e9f86caa5cf 100644
--- a/drivers/media/platform/rcar-vin/rcar-v4l2.c
+++ b/drivers/media/platform/rcar-vin/rcar-v4l2.c
@@ -870,11 +870,9 @@ static int rvin_open(struct file *file)
 	struct rvin_dev *vin = video_drvdata(file);
 	int ret;
 
-	ret = pm_runtime_get_sync(vin->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(vin->dev);
+	ret = pm_runtime_resume_and_get(vin->dev);
+	if (ret < 0)
 		return ret;
-	}
 
 	ret = mutex_lock_interruptible(&vin->lock);
 	if (ret)
-- 
GitLab


From 10f05966c52052c06ec4303ffc2f8185df713784 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 26 Apr 2021 14:26:43 +0200
Subject: [PATCH 1174/3804] media: hantro: use pm_runtime_resume_and_get()

Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter")
added pm_runtime_resume_and_get() in order to automatically handle
dev->power.usage_count decrement on errors.

While there's nothing wrong with the current usage on this driver,
as we're getting rid of the pm_runtime_get_sync() call all over
the media subsystem, let's remove the last occurrence on this
driver.

Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_drv.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 85dcb0882afcb..074b9bb30d6d9 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -160,11 +160,9 @@ static void device_run(void *priv)
 	src = hantro_get_src_buf(ctx);
 	dst = hantro_get_dst_buf(ctx);
 
-	ret = pm_runtime_get_sync(ctx->dev->dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(ctx->dev->dev);
+	ret = pm_runtime_resume_and_get(ctx->dev->dev);
+	if (ret < 0)
 		goto err_cancel_job;
-	}
 
 	ret = clk_bulk_enable(ctx->dev->variant->num_clocks, ctx->dev->clocks);
 	if (ret)
-- 
GitLab


From 89cf71e62cd39da12e7a6e36ae6db126391ca9e2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 20 May 2021 16:05:45 +0200
Subject: [PATCH 1175/3804] media: davinci: fix two kernel-doc comments

A typo there is causing two warnings:
	drivers/media/platform/davinci/vpif_display.c:114: warning: Function parameter or member 'nplanes' not described in 'vpif_buffer_queue_setup'
	drivers/media/platform/davinci/vpif_capture.c:112: warning: Function parameter or member 'nplanes' not described in 'vpif_buffer_queue_setup'

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/davinci/vpif_capture.c | 2 +-
 drivers/media/platform/davinci/vpif_display.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/davinci/vpif_capture.c b/drivers/media/platform/davinci/vpif_capture.c
index 8d2e165bf7de1..c034e25dd9aae 100644
--- a/drivers/media/platform/davinci/vpif_capture.c
+++ b/drivers/media/platform/davinci/vpif_capture.c
@@ -99,7 +99,7 @@ static int vpif_buffer_prepare(struct vb2_buffer *vb)
  * vpif_buffer_queue_setup : Callback function for buffer setup.
  * @vq: vb2_queue ptr
  * @nbuffers: ptr to number of buffers requested by application
- * @nplanes:: contains number of distinct video planes needed to hold a frame
+ * @nplanes: contains number of distinct video planes needed to hold a frame
  * @sizes: contains the size (in bytes) of each plane.
  * @alloc_devs: ptr to allocation context
  *
diff --git a/drivers/media/platform/davinci/vpif_display.c b/drivers/media/platform/davinci/vpif_display.c
index e5f61d9b221d3..59f6b782e1040 100644
--- a/drivers/media/platform/davinci/vpif_display.c
+++ b/drivers/media/platform/davinci/vpif_display.c
@@ -101,7 +101,7 @@ static int vpif_buffer_prepare(struct vb2_buffer *vb)
  * vpif_buffer_queue_setup : Callback function for buffer setup.
  * @vq: vb2_queue ptr
  * @nbuffers: ptr to number of buffers requested by application
- * @nplanes:: contains number of distinct video planes needed to hold a frame
+ * @nplanes: contains number of distinct video planes needed to hold a frame
  * @sizes: contains the size (in bytes) of each plane.
  * @alloc_devs: ptr to allocation context
  *
-- 
GitLab


From 5665bc35c1ed917ac8fd06cb651317bb47a65b10 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 20 May 2021 21:19:30 +1000
Subject: [PATCH 1176/3804] powerpc/64s/syscall: Use pt_regs.trap to
 distinguish syscall ABI difference between sc and scv syscalls

The sc and scv 0 system calls have different ABI conventions, and
ptracers need to know which system call type is being used if they want
to look at the syscall registers.

Document that pt_regs.trap can be used for this, and fix one in-tree user
to work with scv 0 syscalls.

Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions")
Cc: stable@vger.kernel.org # v5.9+
Reported-by: "Dmitry V. Levin" <ldv@altlinux.org>
Suggested-by: "Dmitry V. Levin" <ldv@altlinux.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210520111931.2597127-1-npiggin@gmail.com
---
 Documentation/powerpc/syscall64-abi.rst       | 10 +++++++
 tools/testing/selftests/seccomp/seccomp_bpf.c | 27 ++++++++++++-------
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst
index dabee3729e5a5..56490c4c0c07a 100644
--- a/Documentation/powerpc/syscall64-abi.rst
+++ b/Documentation/powerpc/syscall64-abi.rst
@@ -109,6 +109,16 @@ auxiliary vector.
 
 scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC.
 
+ptrace
+------
+When ptracing system calls (PTRACE_SYSCALL), the pt_regs.trap value contains
+the system call type that can be used to distinguish between sc and scv 0
+system calls, and the different register conventions can be accounted for.
+
+If the value of (pt_regs.trap & 0xfff0) is 0xc00 then the system call was
+performed with the sc instruction, if it is 0x3000 then the system call was
+performed with the scv 0 instruction.
+
 vsyscall
 ========
 
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 98c3b647f54dc..e3d5c77a86121 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1753,16 +1753,25 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 # define SYSCALL_RET_SET(_regs, _val)				\
 	do {							\
 		typeof(_val) _result = (_val);			\
-		/*						\
-		 * A syscall error is signaled by CR0 SO bit	\
-		 * and the code is stored as a positive value.	\
-		 */						\
-		if (_result < 0) {				\
-			SYSCALL_RET(_regs) = -_result;		\
-			(_regs).ccr |= 0x10000000;		\
-		} else {					\
+		if ((_regs.trap & 0xfff0) == 0x3000) {		\
+			/*					\
+			 * scv 0 system call uses -ve result	\
+			 * for error, so no need to adjust.	\
+			 */					\
 			SYSCALL_RET(_regs) = _result;		\
-			(_regs).ccr &= ~0x10000000;		\
+		} else {					\
+			/*					\
+			 * A syscall error is signaled by the	\
+			 * CR0 SO bit and the code is stored as	\
+			 * a positive value.			\
+			 */					\
+			if (_result < 0) {			\
+				SYSCALL_RET(_regs) = -_result;	\
+				(_regs).ccr |= 0x10000000;	\
+			} else {				\
+				SYSCALL_RET(_regs) = _result;	\
+				(_regs).ccr &= ~0x10000000;	\
+			}					\
 		}						\
 	} while (0)
 # define SYSCALL_RET_SET_ON_PTRACE_EXIT
-- 
GitLab


From d72500f992849d31ebae8f821a023660ddd0dcc2 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 20 May 2021 21:19:31 +1000
Subject: [PATCH 1177/3804] powerpc/64s/syscall: Fix ptrace syscall info with
 scv syscalls

The scv implementation missed updating syscall return value and error
value get/set functions to deal with the changed register ABI. This
broke ptrace PTRACE_GET_SYSCALL_INFO as well as some kernel auditing
and tracing functions.

Fix. tools/testing/selftests/ptrace/get_syscall_info now passes when
scv is used.

Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions")
Cc: stable@vger.kernel.org # v5.9+
Reported-by: "Dmitry V. Levin" <ldv@altlinux.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210520111931.2597127-2-npiggin@gmail.com
---
 arch/powerpc/include/asm/ptrace.h  | 45 +++++++++++++++++-------------
 arch/powerpc/include/asm/syscall.h | 42 +++++++++++++++++-----------
 2 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 9c9ab27461683..b476a685f066e 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -19,6 +19,7 @@
 #ifndef _ASM_POWERPC_PTRACE_H
 #define _ASM_POWERPC_PTRACE_H
 
+#include <linux/err.h>
 #include <uapi/asm/ptrace.h>
 #include <asm/asm-const.h>
 
@@ -152,25 +153,6 @@ extern unsigned long profile_pc(struct pt_regs *regs);
 long do_syscall_trace_enter(struct pt_regs *regs);
 void do_syscall_trace_leave(struct pt_regs *regs);
 
-#define kernel_stack_pointer(regs) ((regs)->gpr[1])
-static inline int is_syscall_success(struct pt_regs *regs)
-{
-	return !(regs->ccr & 0x10000000);
-}
-
-static inline long regs_return_value(struct pt_regs *regs)
-{
-	if (is_syscall_success(regs))
-		return regs->gpr[3];
-	else
-		return -regs->gpr[3];
-}
-
-static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
-{
-	regs->gpr[3] = rc;
-}
-
 #ifdef __powerpc64__
 #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
 #else
@@ -235,6 +217,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs)
 	regs->trap |= 0x1;
 }
 
+#define kernel_stack_pointer(regs) ((regs)->gpr[1])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+	if (trap_is_scv(regs))
+		return !IS_ERR_VALUE((unsigned long)regs->gpr[3]);
+	else
+		return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+	if (trap_is_scv(regs))
+		return regs->gpr[3];
+
+	if (is_syscall_success(regs))
+		return regs->gpr[3];
+	else
+		return -regs->gpr[3];
+}
+
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+	regs->gpr[3] = rc;
+}
+
 #define arch_has_single_step()	(1)
 #define arch_has_block_step()	(true)
 #define ARCH_HAS_USER_SINGLE_STEP_REPORT
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index fd1b518eed17c..ba0f88f3a30da 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task,
 static inline long syscall_get_error(struct task_struct *task,
 				     struct pt_regs *regs)
 {
-	/*
-	 * If the system call failed,
-	 * regs->gpr[3] contains a positive ERRORCODE.
-	 */
-	return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+	if (trap_is_scv(regs)) {
+		unsigned long error = regs->gpr[3];
+
+		return IS_ERR_VALUE(error) ? error : 0;
+	} else {
+		/*
+		 * If the system call failed,
+		 * regs->gpr[3] contains a positive ERRORCODE.
+		 */
+		return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+	}
 }
 
 static inline long syscall_get_return_value(struct task_struct *task,
@@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task,
 					    struct pt_regs *regs,
 					    int error, long val)
 {
-	/*
-	 * In the general case it's not obvious that we must deal with CCR
-	 * here, as the syscall exit path will also do that for us. However
-	 * there are some places, eg. the signal code, which check ccr to
-	 * decide if the value in r3 is actually an error.
-	 */
-	if (error) {
-		regs->ccr |= 0x10000000L;
-		regs->gpr[3] = error;
+	if (trap_is_scv(regs)) {
+		regs->gpr[3] = (long) error ?: val;
 	} else {
-		regs->ccr &= ~0x10000000L;
-		regs->gpr[3] = val;
+		/*
+		 * In the general case it's not obvious that we must deal with
+		 * CCR here, as the syscall exit path will also do that for us.
+		 * However there are some places, eg. the signal code, which
+		 * check ccr to decide if the value in r3 is actually an error.
+		 */
+		if (error) {
+			regs->ccr |= 0x10000000L;
+			regs->gpr[3] = error;
+		} else {
+			regs->ccr &= ~0x10000000L;
+			regs->gpr[3] = val;
+		}
 	}
 }
 
-- 
GitLab


From df8f2be2fd0b44b2cb6077068f52e05f0ac40897 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Thu, 20 May 2021 11:43:33 +0930
Subject: [PATCH 1178/3804] serial: 8250: Add UART_BUG_TXRACE workaround for
 Aspeed VUART

Aspeed Virtual UARTs directly bridge e.g. the system console UART on the
LPC bus to the UART interface on the BMC's internal APB. As such there's
no RS-232 signalling involved - the UART interfaces on each bus are
directly connected as the producers and consumers of the one set of
FIFOs.

The APB in the AST2600 generally runs at 100MHz while the LPC bus peaks
at 33MHz. The difference in clock speeds exposes a race in the VUART
design where a Tx data burst on the APB interface can result in a byte
lost on the LPC interface. The symptom is LSR[DR] remains clear on the
LPC interface despite data being present in its Rx FIFO, while LSR[THRE]
remains clear on the APB interface as the host has not consumed the data
the BMC has transmitted. In this state, the UART has stalled and no
further data can be transmitted without manual intervention (e.g.
resetting the FIFOs, resulting in loss of data).

The recommended work-around is to insert a read cycle on the APB
interface between writes to THR.

Cc: ChiaWei Wang <chiawei_wang@aspeedtech.com>
Tested-by: ChiaWei Wang <chiawei_wang@aspeedtech.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210520021334.497341-2-andrew@aj.id.au
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.h              |  1 +
 drivers/tty/serial/8250/8250_aspeed_vuart.c |  1 +
 drivers/tty/serial/8250/8250_port.c         | 12 ++++++++++++
 3 files changed, 14 insertions(+)

diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 52bb21205bb68..34aa2714f3c93 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -88,6 +88,7 @@ struct serial8250_config {
 #define UART_BUG_NOMSR	(1 << 2)	/* UART has buggy MSR status bits (Au1x00) */
 #define UART_BUG_THRE	(1 << 3)	/* UART has buggy THRE reassertion */
 #define UART_BUG_PARITY	(1 << 4)	/* UART mishandles parity if FIFO enabled */
+#define UART_BUG_TXRACE	(1 << 5)	/* UART Tx fails to set remote DR */
 
 
 #ifdef CONFIG_SERIAL_8250_SHARE_IRQ
diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c
index 61550f24a2d37..d035d08cb9871 100644
--- a/drivers/tty/serial/8250/8250_aspeed_vuart.c
+++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c
@@ -437,6 +437,7 @@ static int aspeed_vuart_probe(struct platform_device *pdev)
 	port.port.status = UPSTAT_SYNC_FIFO;
 	port.port.dev = &pdev->dev;
 	port.port.has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
+	port.bugs |= UART_BUG_TXRACE;
 
 	rc = sysfs_create_group(&vuart->dev->kobj, &aspeed_vuart_attr_group);
 	if (rc < 0)
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index d45dab1ab3164..fc5ab20322821 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1809,6 +1809,18 @@ void serial8250_tx_chars(struct uart_8250_port *up)
 	count = up->tx_loadsz;
 	do {
 		serial_out(up, UART_TX, xmit->buf[xmit->tail]);
+		if (up->bugs & UART_BUG_TXRACE) {
+			/*
+			 * The Aspeed BMC virtual UARTs have a bug where data
+			 * may get stuck in the BMC's Tx FIFO from bursts of
+			 * writes on the APB interface.
+			 *
+			 * Delay back-to-back writes by a read cycle to avoid
+			 * stalling the VUART. Read a register that won't have
+			 * side-effects and discard the result.
+			 */
+			serial_in(up, UART_SCR);
+		}
 		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
 		port->icount.tx++;
 		if (uart_circ_empty(xmit))
-- 
GitLab


From 1f06f5713f5278b7768031150ceb43d1127b9ad6 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Thu, 20 May 2021 11:43:34 +0930
Subject: [PATCH 1179/3804] serial: 8250: Use BIT(x) for UART_{CAP,BUG}_*

BIT(x) improves readability and safety with respect to shifts.

Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Link: https://lore.kernel.org/r/20210520021334.497341-3-andrew@aj.id.au
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.h | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 34aa2714f3c93..6473361525d1f 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -7,6 +7,7 @@
  *  Copyright (C) 2001 Russell King.
  */
 
+#include <linux/bits.h>
 #include <linux/serial_8250.h>
 #include <linux/serial_reg.h>
 #include <linux/dmaengine.h>
@@ -70,25 +71,25 @@ struct serial8250_config {
 	unsigned int	flags;
 };
 
-#define UART_CAP_FIFO	(1 << 8)	/* UART has FIFO */
-#define UART_CAP_EFR	(1 << 9)	/* UART has EFR */
-#define UART_CAP_SLEEP	(1 << 10)	/* UART has IER sleep */
-#define UART_CAP_AFE	(1 << 11)	/* MCR-based hw flow control */
-#define UART_CAP_UUE	(1 << 12)	/* UART needs IER bit 6 set (Xscale) */
-#define UART_CAP_RTOIE	(1 << 13)	/* UART needs IER bit 4 set (Xscale, Tegra) */
-#define UART_CAP_HFIFO	(1 << 14)	/* UART has a "hidden" FIFO */
-#define UART_CAP_RPM	(1 << 15)	/* Runtime PM is active while idle */
-#define UART_CAP_IRDA	(1 << 16)	/* UART supports IrDA line discipline */
-#define UART_CAP_MINI	(1 << 17)	/* Mini UART on BCM283X family lacks:
+#define UART_CAP_FIFO	BIT(8)	/* UART has FIFO */
+#define UART_CAP_EFR	BIT(9)	/* UART has EFR */
+#define UART_CAP_SLEEP	BIT(10)	/* UART has IER sleep */
+#define UART_CAP_AFE	BIT(11)	/* MCR-based hw flow control */
+#define UART_CAP_UUE	BIT(12)	/* UART needs IER bit 6 set (Xscale) */
+#define UART_CAP_RTOIE	BIT(13)	/* UART needs IER bit 4 set (Xscale, Tegra) */
+#define UART_CAP_HFIFO	BIT(14)	/* UART has a "hidden" FIFO */
+#define UART_CAP_RPM	BIT(15)	/* Runtime PM is active while idle */
+#define UART_CAP_IRDA	BIT(16)	/* UART supports IrDA line discipline */
+#define UART_CAP_MINI	BIT(17)	/* Mini UART on BCM283X family lacks:
 					 * STOP PARITY EPAR SPAR WLEN5 WLEN6
 					 */
 
-#define UART_BUG_QUOT	(1 << 0)	/* UART has buggy quot LSB */
-#define UART_BUG_TXEN	(1 << 1)	/* UART has buggy TX IIR status */
-#define UART_BUG_NOMSR	(1 << 2)	/* UART has buggy MSR status bits (Au1x00) */
-#define UART_BUG_THRE	(1 << 3)	/* UART has buggy THRE reassertion */
-#define UART_BUG_PARITY	(1 << 4)	/* UART mishandles parity if FIFO enabled */
-#define UART_BUG_TXRACE	(1 << 5)	/* UART Tx fails to set remote DR */
+#define UART_BUG_QUOT	BIT(0)	/* UART has buggy quot LSB */
+#define UART_BUG_TXEN	BIT(1)	/* UART has buggy TX IIR status */
+#define UART_BUG_NOMSR	BIT(2)	/* UART has buggy MSR status bits (Au1x00) */
+#define UART_BUG_THRE	BIT(3)	/* UART has buggy THRE reassertion */
+#define UART_BUG_PARITY	BIT(4)	/* UART mishandles parity if FIFO enabled */
+#define UART_BUG_TXRACE	BIT(5)	/* UART Tx fails to set remote DR */
 
 
 #ifdef CONFIG_SERIAL_8250_SHARE_IRQ
-- 
GitLab


From 31fae7c8b18c3f8029a2a5dce97a3182c1a167a0 Mon Sep 17 00:00:00 2001
From: Vignesh Raghavendra <vigneshr@ti.com>
Date: Tue, 11 May 2021 20:49:55 +0530
Subject: [PATCH 1180/3804] serial: 8250: 8250_omap: Fix possible interrupt
 storm

It is possible that RX TIMEOUT is signalled after RX FIFO has been
drained, in which case a dummy read of RX FIFO is required to clear RX
TIMEOUT condition. Otherwise, RX TIMEOUT condition is not cleared
leading to an interrupt storm

Cc: stable@vger.kernel.org
Reported-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Link: https://lore.kernel.org/r/20210511151955.28071-1-vigneshr@ti.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_omap.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 8ac11eaeca51b..c71bd766fa564 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -104,6 +104,9 @@
 #define UART_OMAP_EFR2			0x23
 #define UART_OMAP_EFR2_TIMEOUT_BEHAVE	BIT(6)
 
+/* RX FIFO occupancy indicator */
+#define UART_OMAP_RX_LVL		0x64
+
 struct omap8250_priv {
 	int line;
 	u8 habit;
@@ -625,6 +628,15 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
 	serial8250_rpm_get(up);
 	iir = serial_port_in(port, UART_IIR);
 	ret = serial8250_handle_irq(port, iir);
+	/*
+	 * It is possible that RX TIMEOUT is signalled after FIFO
+	 * has been drained, in which case a dummy read of RX FIFO is
+	 * required to clear RX TIMEOUT condition.
+	 */
+	if ((iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT) {
+		if (serial_port_in(port, UART_OMAP_RX_LVL) == 0)
+			serial_port_in(port, UART_RX);
+	}
 	serial8250_rpm_put(up);
 
 	return IRQ_RETVAL(ret);
-- 
GitLab


From e0e24208792080135248f23fdf6d51aa2e04df05 Mon Sep 17 00:00:00 2001
From: Randy Wright <rwright@hpe.com>
Date: Fri, 14 May 2021 10:26:54 -0600
Subject: [PATCH 1181/3804] serial: 8250_pci: Add support for new HPE serial
 device

Add support for new HPE serial device.  It is MSI enabled,
but otherwise similar to legacy HP server serial devices.

Tested-by: Jerry Hoemann <jerry.hoemann@hpe.com>
Signed-off-by: Randy Wright <rwright@hpe.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/1621009614-28836-1-git-send-email-rwright@hpe.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 689d8227f95f7..04fe424699903 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -56,6 +56,8 @@ struct serial_private {
 	int			line[];
 };
 
+#define PCI_DEVICE_ID_HPE_PCI_SERIAL	0x37e
+
 static const struct pci_device_id pci_use_msi[] = {
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9900,
 			 0xA000, 0x1000) },
@@ -63,6 +65,8 @@ static const struct pci_device_id pci_use_msi[] = {
 			 0xA000, 0x1000) },
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9922,
 			 0xA000, 0x1000) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_HP_3PAR, PCI_DEVICE_ID_HPE_PCI_SERIAL,
+			 PCI_ANY_ID, PCI_ANY_ID) },
 	{ }
 };
 
@@ -1997,6 +2001,16 @@ static struct pci_serial_quirk pci_serial_quirks[] = {
 		.init		= pci_hp_diva_init,
 		.setup		= pci_hp_diva_setup,
 	},
+	/*
+	 * HPE PCI serial device
+	 */
+	{
+		.vendor         = PCI_VENDOR_ID_HP_3PAR,
+		.device         = PCI_DEVICE_ID_HPE_PCI_SERIAL,
+		.subvendor      = PCI_ANY_ID,
+		.subdevice      = PCI_ANY_ID,
+		.setup		= pci_hp_diva_setup,
+	},
 	/*
 	 * Intel
 	 */
@@ -4973,6 +4987,10 @@ static const struct pci_device_id serial_pci_tbl[] = {
 	{	PCI_VENDOR_ID_HP, PCI_DEVICE_ID_HP_DIVA_AUX,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
 		pbn_b2_1_115200 },
+	/* HPE PCI serial device */
+	{	PCI_VENDOR_ID_HP_3PAR, PCI_DEVICE_ID_HPE_PCI_SERIAL,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_b1_1_115200 },
 
 	{	PCI_VENDOR_ID_DCI, PCI_DEVICE_ID_DCI_PCCOM2,
 		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
-- 
GitLab


From e0112a7c9e847ada15a631b88e279d547e8f26a7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 19 May 2021 17:16:50 +0300
Subject: [PATCH 1182/3804] staging: emxx_udc: fix loop in _nbu2ss_nuke()

The _nbu2ss_ep_done() function calls:

	list_del_init(&req->queue);

which means that the loop will never exit.

Fixes: ca3d253eb967 ("Staging: emxx_udc: Iterate list using list_for_each_entry")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/YKUd0sDyjm/lkJfJ@mwanda
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/emxx_udc/emxx_udc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/emxx_udc/emxx_udc.c b/drivers/staging/emxx_udc/emxx_udc.c
index 741147a4f0fe4..ecc5c9da90270 100644
--- a/drivers/staging/emxx_udc/emxx_udc.c
+++ b/drivers/staging/emxx_udc/emxx_udc.c
@@ -2064,7 +2064,7 @@ static int _nbu2ss_nuke(struct nbu2ss_udc *udc,
 			struct nbu2ss_ep *ep,
 			int status)
 {
-	struct nbu2ss_req *req;
+	struct nbu2ss_req *req, *n;
 
 	/* Endpoint Disable */
 	_nbu2ss_epn_exit(udc, ep);
@@ -2076,7 +2076,7 @@ static int _nbu2ss_nuke(struct nbu2ss_udc *udc,
 		return 0;
 
 	/* called with irqs blocked */
-	list_for_each_entry(req, &ep->queue, queue) {
+	list_for_each_entry_safe(req, n, &ep->queue, queue) {
 		_nbu2ss_ep_done(ep, req, status);
 	}
 
-- 
GitLab


From 676a659b60afb13166371580f3f6f434e9ba6f21 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 9 May 2021 16:22:55 -0700
Subject: [PATCH 1183/3804] xfs: retry allocations when locality-based search
 fails

If a realtime allocation fails because we can't find a sufficiently
large free extent satisfying locality rules, relax the locality rules
and try again.  This reduces the occurrence of short writes to realtime
files when the write size is large and the free space is fragmented.

This was originally discovered by running generic/186 with the realtime
reflink patchset and a 128k cow extent size hint, but the short write
symptoms can manifest with a 128k extent size hint and no reflink, so
apply the fix now.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c9381bf4f04bf..0936f3a96fe6e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -84,6 +84,7 @@ xfs_bmap_rtalloc(
 	xfs_extlen_t		minlen = mp->m_sb.sb_rextsize;
 	xfs_extlen_t		raminlen;
 	bool			rtlocked = false;
+	bool			ignore_locality = false;
 	int			error;
 
 	align = xfs_get_extsz_hint(ap->ip);
@@ -158,7 +159,10 @@ retry:
 	/*
 	 * Realtime allocation, done through xfs_rtallocate_extent.
 	 */
-	do_div(ap->blkno, mp->m_sb.sb_rextsize);
+	if (ignore_locality)
+		ap->blkno = 0;
+	else
+		do_div(ap->blkno, mp->m_sb.sb_rextsize);
 	rtb = ap->blkno;
 	ap->length = ralen;
 	raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize);
@@ -197,6 +201,15 @@ retry:
 		goto retry;
 	}
 
+	if (!ignore_locality && ap->blkno != 0) {
+		/*
+		 * If we can't allocate near a specific rt extent, try again
+		 * without locality criteria.
+		 */
+		ignore_locality = true;
+		goto retry;
+	}
+
 	ap->blkno = NULLFSBLOCK;
 	ap->length = 0;
 	return 0;
-- 
GitLab


From 16c9de54dc868c121918f2ae91e46330f919049f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 12 May 2021 16:41:13 -0700
Subject: [PATCH 1184/3804] xfs: fix deadlock retry tracepoint arguments

sc->ip is the inode that's being scrubbed, which means that it's not set
for scrub types that don't involve inodes.  If one of those scrubbers
(e.g. inode btrees) returns EDEADLOCK, we'll trip over the null pointer.
Fix that by reporting either the file being examined or the file that
was used to call scrub.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/scrub/common.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index aa874607618a2..be38c960da858 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -74,7 +74,9 @@ __xchk_process_error(
 		return true;
 	case -EDEADLOCK:
 		/* Used to restart an op with deadlock avoidance. */
-		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
+		trace_xchk_deadlock_retry(
+				sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
+				sc->sm, *error);
 		break;
 	case -EFSBADCRC:
 	case -EFSCORRUPTED:
-- 
GitLab


From e3c2b047475b52739bcf178a9e95176c42bbcf8f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 12 May 2021 16:43:10 -0700
Subject: [PATCH 1185/3804] xfs: restore old ioctl definitions

These ioctl definitions in xfs_fs.h are part of the userspace ABI and
were mistakenly removed during the 5.13 merge window.

Fixes: 9fefd5db08ce ("xfs: convert to fileattr")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_fs.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index a83bdd0c47a84..bde2b4c64dbe3 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -770,6 +770,8 @@ struct xfs_scrub_metadata {
 /*
  * ioctl commands that are used by Linux filesystems
  */
+#define XFS_IOC_GETXFLAGS	FS_IOC_GETFLAGS
+#define XFS_IOC_SETXFLAGS	FS_IOC_SETFLAGS
 #define XFS_IOC_GETVERSION	FS_IOC_GETVERSION
 
 /*
@@ -780,6 +782,8 @@ struct xfs_scrub_metadata {
 #define XFS_IOC_ALLOCSP		_IOW ('X', 10, struct xfs_flock64)
 #define XFS_IOC_FREESP		_IOW ('X', 11, struct xfs_flock64)
 #define XFS_IOC_DIOINFO		_IOR ('X', 30, struct dioattr)
+#define XFS_IOC_FSGETXATTR	FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR	FS_IOC_FSSETXATTR
 #define XFS_IOC_ALLOCSP64	_IOW ('X', 36, struct xfs_flock64)
 #define XFS_IOC_FREESP64	_IOW ('X', 37, struct xfs_flock64)
 #define XFS_IOC_GETBMAP		_IOWR('X', 38, struct getbmap)
-- 
GitLab


From bb002388901151fe35b6697ab116f6ed0721a9ed Mon Sep 17 00:00:00 2001
From: zhouchuangao <zhouchuangao@vivo.com>
Date: Sun, 9 May 2021 19:34:37 -0700
Subject: [PATCH 1186/3804] fs/nfs: Use fatal_signal_pending instead of
 signal_pending

We set the state of the current process to TASK_KILLABLE via
prepare_to_wait(). Should we use fatal_signal_pending() to detect
the signal here?

Fixes: b4868b44c562 ("NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE")
Signed-off-by: zhouchuangao <zhouchuangao@vivo.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 87d04f2c93852..0cd9658822329 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1706,7 +1706,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state,
 		rcu_read_unlock();
 		trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0);
 
-		if (!signal_pending(current)) {
+		if (!fatal_signal_pending(current)) {
 			if (schedule_timeout(5*HZ) == 0)
 				status = -EAGAIN;
 			else
@@ -3487,7 +3487,7 @@ static bool nfs4_refresh_open_old_stateid(nfs4_stateid *dst,
 		write_sequnlock(&state->seqlock);
 		trace_nfs4_close_stateid_update_wait(state->inode, dst, 0);
 
-		if (signal_pending(current))
+		if (fatal_signal_pending(current))
 			status = -EINTR;
 		else
 			if (schedule_timeout(5*HZ) != 0)
-- 
GitLab


From 769b01ea68b6c49dc3cde6adf7e53927dacbd3a8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 11 May 2021 11:49:42 +0300
Subject: [PATCH 1187/3804] NFS: fix an incorrect limit in
 filelayout_decode_layout()

The "sizeof(struct nfs_fh)" is two bytes too large and could lead to
memory corruption.  It should be NFS_MAXFHSIZE because that's the size
of the ->data[] buffer.

I reversed the size of the arguments to put the variable on the left.

Fixes: 16b374ca439f ("NFSv4.1: pnfs: filelayout: add driver's LAYOUTGET and GETDEVICEINFO infrastructure")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/filelayout/filelayout.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index d158a500c25c6..d2103852475fa 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -718,7 +718,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
 		if (unlikely(!p))
 			goto out_err;
 		fl->fh_array[i]->size = be32_to_cpup(p++);
-		if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
+		if (fl->fh_array[i]->size > NFS_MAXFHSIZE) {
 			printk(KERN_ERR "NFS: Too big fh %d received %d\n",
 			       i, fl->fh_array[i]->size);
 			goto out_err;
-- 
GitLab


From d1d973950aceecd646ea3bee66764414bfeac072 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 12 May 2021 17:20:04 +0800
Subject: [PATCH 1188/3804] pNFS/NFSv4: Remove redundant initialization of
 'rd_size'

Variable 'rd_size' is being initialized however
this value is never read as 'rd_size' is assigned
a new value in for statement. Remove the redundant
assignment.

Clean up clang warning:

fs/nfs/pnfs.c:2681:6: warning: Value stored to 'rd_size' during its
initialization is never read [clang-analyzer-deadcode.DeadStores]

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pnfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 03e0b34c4a64c..f076a6f3558da 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2678,7 +2678,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_range);
 void
 pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 {
-	u64 rd_size = req->wb_bytes;
+	u64 rd_size;
 
 	pnfs_generic_pg_check_layout(pgio);
 	pnfs_generic_pg_check_range(pgio, req);
-- 
GitLab


From e877a88d1f069edced4160792f42c2a8e2dba942 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 17 May 2021 09:59:10 +1000
Subject: [PATCH 1189/3804] SUNRPC in case of backlog, hand free slots directly
 to waiting task

If sunrpc.tcp_max_slot_table_entries is small and there are tasks
on the backlog queue, then when a request completes it is freed and the
first task on the queue is woken.  The expectation is that it will wake
and claim that request.  However if it was a sync task and the waiting
process was killed at just that moment, it will wake and NOT claim the
request.

As long as TASK_CONGESTED remains set, requests can only be claimed by
tasks woken from the backlog, and they are woken only as requests are
freed, so when a task doesn't claim a request, no other task can ever
get that request until TASK_CONGESTED is cleared.  Each time this
happens the number of available requests is decreased by one.

With a sufficiently high workload and sufficiently low setting of
max_slot (16 in the case where this was seen), TASK_CONGESTED can remain
set for an extended period, and the above scenario (of a process being
killed just as its task was woken) can repeat until no requests can be
allocated.  Then traffic stops.

This patch addresses the problem by introducing a positive handover of a
request from a completing task to a backlog task - the request is never
freed when there is a backlog.

When a task is woken it might not already have a request attached in
which case it is *not* freed (as with current code) but is initialised
(if needed) and used.  If it isn't used it will eventually be freed by
rpc_exit_task().  xprt_release() is enhanced to be able to correctly
release an uninitialised request.

Fixes: ba60eb25ff6b ("SUNRPC: Fix a livelock problem in the xprt->backlog queue")
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/clnt.c |  7 -----
 net/sunrpc/xprt.c | 68 ++++++++++++++++++++++++++++++++---------------
 2 files changed, 47 insertions(+), 28 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f555d335e910d..42623d6b8f0ec 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1677,13 +1677,6 @@ call_reserveresult(struct rpc_task *task)
 		return;
 	}
 
-	/*
-	 * Even though there was an error, we may have acquired
-	 * a request slot somehow.  Make sure not to leak it.
-	 */
-	if (task->tk_rqstp)
-		xprt_release(task);
-
 	switch (status) {
 	case -ENOMEM:
 		rpc_delay(task, HZ >> 2);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e5b5a960a69b8..5b3981fd37838 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -70,6 +70,7 @@
 static void	 xprt_init(struct rpc_xprt *xprt, struct net *net);
 static __be32	xprt_alloc_xid(struct rpc_xprt *xprt);
 static void	 xprt_destroy(struct rpc_xprt *xprt);
+static void	 xprt_request_init(struct rpc_task *task);
 
 static DEFINE_SPINLOCK(xprt_list_lock);
 static LIST_HEAD(xprt_list);
@@ -1612,10 +1613,26 @@ static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
 	rpc_sleep_on(&xprt->backlog, task, NULL);
 }
 
-static void xprt_wake_up_backlog(struct rpc_xprt *xprt)
+static bool __xprt_set_rq(struct rpc_task *task, void *data)
 {
-	if (rpc_wake_up_next(&xprt->backlog) == NULL)
+	struct rpc_rqst *req = data;
+
+	if (task->tk_rqstp == NULL) {
+		memset(req, 0, sizeof(*req));	/* mark unused */
+		task->tk_status = -EAGAIN;
+		task->tk_rqstp = req;
+		return true;
+	}
+	return false;
+}
+
+static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) {
 		clear_bit(XPRT_CONGESTED, &xprt->state);
+		return false;
+	}
+	return true;
 }
 
 static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
@@ -1703,11 +1720,11 @@ EXPORT_SYMBOL_GPL(xprt_alloc_slot);
 void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
 	spin_lock(&xprt->reserve_lock);
-	if (!xprt_dynamic_free_slot(xprt, req)) {
+	if (!xprt_wake_up_backlog(xprt, req) &&
+	    !xprt_dynamic_free_slot(xprt, req)) {
 		memset(req, 0, sizeof(*req));	/* mark unused */
 		list_add(&req->rq_list, &xprt->free);
 	}
-	xprt_wake_up_backlog(xprt);
 	spin_unlock(&xprt->reserve_lock);
 }
 EXPORT_SYMBOL_GPL(xprt_free_slot);
@@ -1795,6 +1812,10 @@ xprt_request_init(struct rpc_task *task)
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_rqst	*req = task->tk_rqstp;
 
+	if (req->rq_task)
+		/* Already initialized */
+		return;
+
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
 	req->rq_buffer  = NULL;
@@ -1855,8 +1876,10 @@ void xprt_retry_reserve(struct rpc_task *task)
 	struct rpc_xprt *xprt = task->tk_xprt;
 
 	task->tk_status = 0;
-	if (task->tk_rqstp != NULL)
+	if (task->tk_rqstp != NULL) {
+		xprt_request_init(task);
 		return;
+	}
 
 	task->tk_status = -EAGAIN;
 	xprt_do_reserve(xprt, task);
@@ -1881,23 +1904,26 @@ void xprt_release(struct rpc_task *task)
 	}
 
 	xprt = req->rq_xprt;
-	xprt_request_dequeue_xprt(task);
-	spin_lock(&xprt->transport_lock);
-	xprt->ops->release_xprt(xprt, task);
-	if (xprt->ops->release_request)
-		xprt->ops->release_request(task);
-	xprt_schedule_autodisconnect(xprt);
-	spin_unlock(&xprt->transport_lock);
-	if (req->rq_buffer)
-		xprt->ops->buf_free(task);
-	xdr_free_bvec(&req->rq_rcv_buf);
-	xdr_free_bvec(&req->rq_snd_buf);
-	if (req->rq_cred != NULL)
-		put_rpccred(req->rq_cred);
-	task->tk_rqstp = NULL;
-	if (req->rq_release_snd_buf)
-		req->rq_release_snd_buf(req);
+	if (xprt) {
+		xprt_request_dequeue_xprt(task);
+		spin_lock(&xprt->transport_lock);
+		xprt->ops->release_xprt(xprt, task);
+		if (xprt->ops->release_request)
+			xprt->ops->release_request(task);
+		xprt_schedule_autodisconnect(xprt);
+		spin_unlock(&xprt->transport_lock);
+		if (req->rq_buffer)
+			xprt->ops->buf_free(task);
+		xdr_free_bvec(&req->rq_rcv_buf);
+		xdr_free_bvec(&req->rq_snd_buf);
+		if (req->rq_cred != NULL)
+			put_rpccred(req->rq_cred);
+		if (req->rq_release_snd_buf)
+			req->rq_release_snd_buf(req);
+	} else
+		xprt = task->tk_xprt;
 
+	task->tk_rqstp = NULL;
 	if (likely(!bc_prealloc(req)))
 		xprt->ops->free_slot(xprt, req);
 	else
-- 
GitLab


From a421d218603ffa822a0b8045055c03eae394a7eb Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 19 May 2021 12:54:51 -0400
Subject: [PATCH 1190/3804] NFSv4: Fix a NULL pointer dereference in
 pnfs_mark_matching_lsegs_return()

Commit de144ff4234f changes _pnfs_return_layout() to call
pnfs_mark_matching_lsegs_return() passing NULL as the struct
pnfs_layout_range argument. Unfortunately,
pnfs_mark_matching_lsegs_return() doesn't check if we have a value here
before dereferencing it, causing an oops.

I'm able to hit this crash consistently when running connectathon basic
tests on NFS v4.1/v4.2 against Ontap.

Fixes: de144ff4234f ("NFSv4: Don't discard segments marked for return in _pnfs_return_layout()")
Cc: stable@vger.kernel.org
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pnfs.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f076a6f3558da..2c01ee805306c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1317,6 +1317,11 @@ _pnfs_return_layout(struct inode *ino)
 {
 	struct pnfs_layout_hdr *lo = NULL;
 	struct nfs_inode *nfsi = NFS_I(ino);
+	struct pnfs_layout_range range = {
+		.iomode		= IOMODE_ANY,
+		.offset		= 0,
+		.length		= NFS4_MAX_UINT64,
+	};
 	LIST_HEAD(tmp_list);
 	const struct cred *cred;
 	nfs4_stateid stateid;
@@ -1344,16 +1349,10 @@ _pnfs_return_layout(struct inode *ino)
 	}
 	valid_layout = pnfs_layout_is_valid(lo);
 	pnfs_clear_layoutcommit(ino, &tmp_list);
-	pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0);
+	pnfs_mark_matching_lsegs_return(lo, &tmp_list, &range, 0);
 
-	if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
-		struct pnfs_layout_range range = {
-			.iomode		= IOMODE_ANY,
-			.offset		= 0,
-			.length		= NFS4_MAX_UINT64,
-		};
+	if (NFS_SERVER(ino)->pnfs_curr_ld->return_range)
 		NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &range);
-	}
 
 	/* Don't send a LAYOUTRETURN if list was initially empty */
 	if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) ||
-- 
GitLab


From d275880abce9ac66cb842af828fbc2b1ba8082a0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 17 May 2021 08:50:11 -0400
Subject: [PATCH 1191/3804] SUNRPC: Fix Oops in xs_tcp_send_request() when
 transport is disconnected

If a disconnection occurs while we're trying to reply to a server
callback, then we may end up calling xs_tcp_send_request() with a NULL
value for transport->inet, which trips up the call to
tcp_sock_set_cork().

Fixes: d737e5d41870 ("SUNRPC: Set TCP_CORK until the transmit queue is empty")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprtsock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 47aa47a2b07c5..316d049455876 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1010,6 +1010,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
 			kernel_sock_shutdown(transport->sock, SHUT_RDWR);
 		return -ENOTCONN;
 	}
+	if (!transport->inet)
+		return -ENOTCONN;
 
 	xs_pktdump("packet data:",
 				req->rq_svec->iov_base,
-- 
GitLab


From 45e1ba40837ac2f6f4d4716bddb8d44bd7e4a251 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Wed, 12 May 2021 13:19:46 -0700
Subject: [PATCH 1192/3804] cgroup: disable controllers at parse time
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch effectively reverts the commit a3e72739b7a7 ("cgroup: fix
too early usage of static_branch_disable()"). The commit 6041186a3258
("init: initialize jump labels before command line option parsing") has
moved the jump_label_init() before parse_args() which has made the
commit a3e72739b7a7 unnecessary. On the other hand there are
consequences of disabling the controllers later as there are subsystems
doing the controller checks for different decisions. One such incident
is reported [1] regarding the memory controller and its impact on memory
reclaim code.

[1] https://lore.kernel.org/linux-mm/921e53f3-4b13-aab8-4a9e-e83ff15371e4@nec.com

Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reported-by: NOMURA JUNICHI(野村　淳一) <junichi.nomura@nec.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Tested-by: Jun'ichi Nomura <junichi.nomura@nec.com>
---
 kernel/cgroup/cgroup.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e049edd667760..e7a9a29982450 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5634,8 +5634,6 @@ int __init cgroup_init_early(void)
 	return 0;
 }
 
-static u16 cgroup_disable_mask __initdata;
-
 /**
  * cgroup_init - cgroup initialization
  *
@@ -5694,12 +5692,8 @@ int __init cgroup_init(void)
 		 * disabled flag and cftype registration needs kmalloc,
 		 * both of which aren't available during early_init.
 		 */
-		if (cgroup_disable_mask & (1 << ssid)) {
-			static_branch_disable(cgroup_subsys_enabled_key[ssid]);
-			printk(KERN_INFO "Disabling %s control group subsystem\n",
-			       ss->name);
+		if (!cgroup_ssid_enabled(ssid))
 			continue;
-		}
 
 		if (cgroup1_ssid_disabled(ssid))
 			printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
@@ -6214,7 +6208,10 @@ static int __init cgroup_disable(char *str)
 			if (strcmp(token, ss->name) &&
 			    strcmp(token, ss->legacy_name))
 				continue;
-			cgroup_disable_mask |= 1 << i;
+
+			static_branch_disable(cgroup_subsys_enabled_key[i]);
+			pr_info("Disabling %s control group subsystem\n",
+				ss->name);
 		}
 	}
 	return 1;
-- 
GitLab


From 833bc4cf9754643acc69b3c6b65988ca78df4460 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 20 May 2021 08:08:24 +0300
Subject: [PATCH 1193/3804] ASoC: cs35l33: fix an error code in probe()

This error path returns zero (success) but it should return -EINVAL.

Fixes: 3333cb7187b9 ("ASoC: cs35l33: Initial commit of the cs35l33 CODEC driver.")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/YKXuyGEzhPT35R3G@mwanda
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/cs35l33.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/codecs/cs35l33.c b/sound/soc/codecs/cs35l33.c
index 7ad7b733af9b6..e8f3dcfd144da 100644
--- a/sound/soc/codecs/cs35l33.c
+++ b/sound/soc/codecs/cs35l33.c
@@ -1201,6 +1201,7 @@ static int cs35l33_i2c_probe(struct i2c_client *i2c_client,
 		dev_err(&i2c_client->dev,
 			"CS35L33 Device ID (%X). Expected ID %X\n",
 			devid, CS35L33_CHIP_ID);
+		ret = -EINVAL;
 		goto err_enable;
 	}
 
-- 
GitLab


From 855bfff9d623e7aff6556bfb6831d324dec8d96a Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 20 May 2021 19:18:11 +0800
Subject: [PATCH 1194/3804] regulator: fixed: Ensure enable_counter is correct
 if reg_domain_disable fails

dev_pm_genpd_set_performance_state() may fail, so had better to check it's
return value before decreasing priv->enable_counter.

Fixes: bf3a28cf4241 ("regulator: fixed: support using power domain for enable/disable")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210520111811.1806293-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fixed.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 02ad83153e19a..34e255c235d4c 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -88,10 +88,15 @@ static int reg_domain_disable(struct regulator_dev *rdev)
 {
 	struct fixed_voltage_data *priv = rdev_get_drvdata(rdev);
 	struct device *dev = rdev->dev.parent;
+	int ret;
+
+	ret = dev_pm_genpd_set_performance_state(dev, 0);
+	if (ret)
+		return ret;
 
 	priv->enable_counter--;
 
-	return dev_pm_genpd_set_performance_state(dev, 0);
+	return 0;
 }
 
 static int reg_is_enabled(struct regulator_dev *rdev)
-- 
GitLab


From e56360d6a119f531506658ea87238e48ad4c95c2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 20 May 2021 15:05:18 +0300
Subject: [PATCH 1195/3804] regmap: mdio: Don't modify output if error happened

regmap_mdio_read() breaks the principle of "no touch output till it's known
that the operation succeeds". Refactor it accordingly.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210520120518.30490-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-mdio.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/base/regmap/regmap-mdio.c b/drivers/base/regmap/regmap-mdio.c
index 5f18fe409f569..5ec208279913f 100644
--- a/drivers/base/regmap/regmap-mdio.c
+++ b/drivers/base/regmap/regmap-mdio.c
@@ -11,9 +11,11 @@ static int regmap_mdio_read(void *context, unsigned int reg, unsigned int *val)
 	int ret;
 
 	ret = mdiobus_read(mdio_dev->bus, mdio_dev->addr, reg);
-	*val = ret & 0xffff;
+	if (ret < 0)
+		return ret;
 
-	return ret < 0 ? ret : 0;
+	*val = ret & 0xffff;
+	return 0;
 }
 
 static int regmap_mdio_write(void *context, unsigned int reg, unsigned int val)
-- 
GitLab


From 940d71c6462e8151c78f28e4919aa8882ff2054e Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Thu, 20 May 2021 19:14:22 +0900
Subject: [PATCH 1196/3804] wq: handle VM suspension in stall detection

If VCPU is suspended (VM suspend) in wq_watchdog_timer_fn() then
once this VCPU resumes it will see the new jiffies value, while it
may take a while before IRQ detects PVCLOCK_GUEST_STOPPED on this
VCPU and updates all the watchdogs via pvclock_touch_watchdogs().
There is a small chance of misreported WQ stalls in the meantime,
because new jiffies is time_after() old 'ts + thresh'.

wq_watchdog_timer_fn()
{
	for_each_pool(pool, pi) {
		if (time_after(jiffies, ts + thresh)) {
			pr_emerg("BUG: workqueue lockup - pool");
		}
	}
}

Save jiffies at the beginning of this function and use that value
for stall detection. If VM gets suspended then we continue using
"old" jiffies value and old WQ touch timestamps. If IRQ at some
point restarts the stall detection cycle (pvclock_touch_watchdogs())
then old jiffies will always be before new 'ts + thresh'.

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b19d759e55a5d..50142fc08902d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -50,6 +50,7 @@
 #include <linux/uaccess.h>
 #include <linux/sched/isolation.h>
 #include <linux/nmi.h>
+#include <linux/kvm_para.h>
 
 #include "workqueue_internal.h"
 
@@ -5772,6 +5773,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
 {
 	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
 	bool lockup_detected = false;
+	unsigned long now = jiffies;
 	struct worker_pool *pool;
 	int pi;
 
@@ -5786,6 +5788,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
 		if (list_empty(&pool->worklist))
 			continue;
 
+		/*
+		 * If a virtual machine is stopped by the host it can look to
+		 * the watchdog like a stall.
+		 */
+		kvm_check_and_clear_guest_paused();
+
 		/* get the latest of pool and touched timestamps */
 		if (pool->cpu >= 0)
 			touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
@@ -5799,12 +5807,12 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
 			ts = touched;
 
 		/* did we stall? */
-		if (time_after(jiffies, ts + thresh)) {
+		if (time_after(now, ts + thresh)) {
 			lockup_detected = true;
 			pr_emerg("BUG: workqueue lockup - pool");
 			pr_cont_pool_info(pool);
 			pr_cont(" stuck for %us!\n",
-				jiffies_to_msecs(jiffies - pool_ts) / 1000);
+				jiffies_to_msecs(now - pool_ts) / 1000);
 		}
 	}
 
-- 
GitLab


From 6328caf043208556e782a53a284c9acfcf6be3b0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:15:36 +0200
Subject: [PATCH 1197/3804] spi: fix some invalid char occurrences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

One of the author names got an invalid char, probably due to
a bad charset conversion, being replaced by the
REPLACEMENT CHARACTER U+fffd ('�').

Use the author's e-mail has the characters without accents,
as also used at the .mailmap file.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/ff8d296e1fdcc4f1c6df94434a5720bcedcd0ecf.1621412009.git.mchehab+huawei@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-omap-100k.c   | 2 +-
 drivers/spi/spi-omap2-mcspi.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c
index 96490888e9f14..20b0471729651 100644
--- a/drivers/spi/spi-omap-100k.c
+++ b/drivers/spi/spi-omap-100k.c
@@ -6,7 +6,7 @@
  *
  * Copyright (C) 2005, 2006 Nokia Corporation
  * Author:      Samuel Ortiz <samuel.ortiz@nokia.com> and
- *              Juha Yrj�l� <juha.yrjola@nokia.com>
+ *              Juha Yrjola <juha.yrjola@nokia.com>
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 999c227364164..e24520e87cfff 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -4,7 +4,7 @@
  *
  * Copyright (C) 2005, 2006 Nokia Corporation
  * Author:	Samuel Ortiz <samuel.ortiz@nokia.com> and
- *		Juha Yrj�l� <juha.yrjola@nokia.com>
+ *		Juha Yrjola <juha.yrjola@nokia.com>
  */
 
 #include <linux/kernel.h>
-- 
GitLab


From a2bd5afd59c1dec8e559096c3a5c912360c267ca Mon Sep 17 00:00:00 2001
From: Zhiqi Song <songzhiqi1@huawei.com>
Date: Tue, 18 May 2021 09:38:16 +0800
Subject: [PATCH 1198/3804] spi: lm70llp: add parenthesis for sizeof

Fix missing parenthesis of sizeof reported by checkpatch.pl:
 WARNING: sizeof *pp should be sizeof(*pp).

The kernel coding style suggests thinking of sizeof as a function
and add parenthesis.

Cc: Kaiwan N Billimoria <kaiwan@designergraphix.com>
Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
Link: https://lore.kernel.org/r/1621301902-64158-2-git-send-email-songzhiqi1@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-lm70llp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-lm70llp.c b/drivers/spi/spi-lm70llp.c
index f914b8d2043e3..ead0507c63be7 100644
--- a/drivers/spi/spi-lm70llp.c
+++ b/drivers/spi/spi-lm70llp.c
@@ -202,7 +202,7 @@ static void spi_lm70llp_attach(struct parport *p)
 	 * the lm70 driver could verify it, reading the manf ID.
 	 */
 
-	master = spi_alloc_master(p->physport->dev, sizeof *pp);
+	master = spi_alloc_master(p->physport->dev, sizeof(*pp));
 	if (!master) {
 		status = -ENOMEM;
 		goto out_fail;
-- 
GitLab


From 722cb2b197e125d6816aac43ec2d411c7b22daa9 Mon Sep 17 00:00:00 2001
From: Zhiqi Song <songzhiqi1@huawei.com>
Date: Tue, 18 May 2021 09:38:17 +0800
Subject: [PATCH 1199/3804] spi: mpc512x-psc: add parenthesis for sizeof

Fix missing parenthesis of sizeof reported by checkpatch.pl:
 WARNING: sizeof *pp should be sizeof(*pp).

The kernel coding style suggests thinking of sizeof as a function
and add parenthesis.

Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
Link: https://lore.kernel.org/r/1621301902-64158-3-git-send-email-songzhiqi1@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mpc512x-psc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index ea1b07953d38a..78a9bca8cc689 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -369,7 +369,7 @@ static int mpc512x_psc_spi_setup(struct spi_device *spi)
 		return -EINVAL;
 
 	if (!cs) {
-		cs = kzalloc(sizeof *cs, GFP_KERNEL);
+		cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 		if (!cs)
 			return -ENOMEM;
 
@@ -491,7 +491,7 @@ static int mpc512x_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	void *tempp;
 	struct clk *clk;
 
-	master = spi_alloc_master(dev, sizeof *mps);
+	master = spi_alloc_master(dev, sizeof(*mps));
 	if (master == NULL)
 		return -ENOMEM;
 
-- 
GitLab


From ac7357ac769e3b4bd52e691f22d745c89126069f Mon Sep 17 00:00:00 2001
From: Zhiqi Song <songzhiqi1@huawei.com>
Date: Tue, 18 May 2021 09:38:18 +0800
Subject: [PATCH 1200/3804] spi: mpc52xx: add parenthesis for sizeof

Fix missing parenthesis of sizeof reported by checkpatch.pl:
 WARNING: sizeof *pp should be sizeof(*pp).

The kernel coding style suggests thinking of sizeof as a function
and add parenthesis.

Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
Link: https://lore.kernel.org/r/1621301902-64158-4-git-send-email-songzhiqi1@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mpc52xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-mpc52xx.c b/drivers/spi/spi-mpc52xx.c
index 124cba7213f15..51041526546dd 100644
--- a/drivers/spi/spi-mpc52xx.c
+++ b/drivers/spi/spi-mpc52xx.c
@@ -415,7 +415,7 @@ static int mpc52xx_spi_probe(struct platform_device *op)
 	}
 
 	dev_dbg(&op->dev, "allocating spi_master struct\n");
-	master = spi_alloc_master(&op->dev, sizeof *ms);
+	master = spi_alloc_master(&op->dev, sizeof(*ms));
 	if (!master) {
 		rc = -ENOMEM;
 		goto err_alloc;
-- 
GitLab


From 75d4c2d64b30c8583b82afdcc9dc4db2083dee5b Mon Sep 17 00:00:00 2001
From: Zhiqi Song <songzhiqi1@huawei.com>
Date: Tue, 18 May 2021 09:38:19 +0800
Subject: [PATCH 1201/3804] spi: mpc52xx-psc: add parenthesis for sizeof

Fix missing parenthesis of sizeof reported by checkpatch.pl:
 WARNING: sizeof *pp should be sizeof(*pp).

The kernel coding style suggests thinking of sizeof as a function
and add parenthesis.

Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
Link: https://lore.kernel.org/r/1621301902-64158-5-git-send-email-songzhiqi1@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mpc52xx-psc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-mpc52xx-psc.c b/drivers/spi/spi-mpc52xx-psc.c
index 17935e71b02f5..21ef5d481faf4 100644
--- a/drivers/spi/spi-mpc52xx-psc.c
+++ b/drivers/spi/spi-mpc52xx-psc.c
@@ -265,7 +265,7 @@ static int mpc52xx_psc_spi_setup(struct spi_device *spi)
 		return -EINVAL;
 
 	if (!cs) {
-		cs = kzalloc(sizeof *cs, GFP_KERNEL);
+		cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 		if (!cs)
 			return -ENOMEM;
 		spi->controller_state = cs;
@@ -365,7 +365,7 @@ static int mpc52xx_psc_spi_do_probe(struct device *dev, u32 regaddr,
 	struct spi_master *master;
 	int ret;
 
-	master = spi_alloc_master(dev, sizeof *mps);
+	master = spi_alloc_master(dev, sizeof(*mps));
 	if (master == NULL)
 		return -ENOMEM;
 
-- 
GitLab


From 8267dc6d6889235e6dac21156cc9d6e5d5319d3b Mon Sep 17 00:00:00 2001
From: Zhiqi Song <songzhiqi1@huawei.com>
Date: Tue, 18 May 2021 09:38:20 +0800
Subject: [PATCH 1202/3804] spi: omap2-mcspi: add parenthesis for sizeof

Fix missing parenthesis of sizeof reported by checkpatch.pl:
 WARNING: sizeof *pp should be sizeof(*pp).

The kernel coding style suggests thinking of sizeof as a function
and add parenthesis.

Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
Link: https://lore.kernel.org/r/1621301902-64158-6-git-send-email-songzhiqi1@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-omap2-mcspi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 999c227364164..a06c8f499c519 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -1040,7 +1040,7 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 	struct omap2_mcspi_cs	*cs = spi->controller_state;
 
 	if (!cs) {
-		cs = kzalloc(sizeof *cs, GFP_KERNEL);
+		cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 		if (!cs)
 			return -ENOMEM;
 		cs->base = mcspi->base + spi->chip_select * 0x14;
-- 
GitLab


From 19bae51b0191129fd9a6d163678404b77cab24c9 Mon Sep 17 00:00:00 2001
From: Zhiqi Song <songzhiqi1@huawei.com>
Date: Tue, 18 May 2021 09:38:21 +0800
Subject: [PATCH 1203/3804] spi: omap-uwire: add parenthesis for sizeof

Fix missing parenthesis of sizeof reported by checkpatch.pl:
 WARNING: sizeof *pp should be sizeof(*pp).

The kernel coding style suggests thinking of sizeof as a function
and add parenthesis.

Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
Link: https://lore.kernel.org/r/1621301902-64158-7-git-send-email-songzhiqi1@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-omap-uwire.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-omap-uwire.c b/drivers/spi/spi-omap-uwire.c
index ceb479f5c88fe..c975e86405602 100644
--- a/drivers/spi/spi-omap-uwire.c
+++ b/drivers/spi/spi-omap-uwire.c
@@ -453,7 +453,7 @@ static int uwire_probe(struct platform_device *pdev)
 	struct uwire_spi	*uwire;
 	int			status;
 
-	master = spi_alloc_master(&pdev->dev, sizeof *uwire);
+	master = spi_alloc_master(&pdev->dev, sizeof(*uwire));
 	if (!master)
 		return -ENODEV;
 
-- 
GitLab


From 07c74f844b740a858e40fe6c15dd9a2f3b7f6476 Mon Sep 17 00:00:00 2001
From: Zhiqi Song <songzhiqi1@huawei.com>
Date: Tue, 18 May 2021 09:38:22 +0800
Subject: [PATCH 1204/3804] spi: ppc4xx: add parenthesis for sizeof

Fix missing parenthesis of sizeof reported by checkpatch.pl:
 WARNING: sizeof *pp should be sizeof(*pp).

The kernel coding style suggests thinking of sizeof as a function
and add parenthesis.

Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
Link: https://lore.kernel.org/r/1621301902-64158-8-git-send-email-songzhiqi1@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ppc4xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 59d201acbb394..d65f047b6c823 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -223,7 +223,7 @@ static int spi_ppc4xx_setup(struct spi_device *spi)
 	}
 
 	if (cs == NULL) {
-		cs = kzalloc(sizeof *cs, GFP_KERNEL);
+		cs = kzalloc(sizeof(*cs), GFP_KERNEL);
 		if (!cs)
 			return -ENOMEM;
 		spi->controller_state = cs;
@@ -349,7 +349,7 @@ static int spi_ppc4xx_of_probe(struct platform_device *op)
 	int ret;
 	const unsigned int *clk;
 
-	master = spi_alloc_master(dev, sizeof *hw);
+	master = spi_alloc_master(dev, sizeof(*hw));
 	if (master == NULL)
 		return -ENOMEM;
 	master->dev.of_node = np;
-- 
GitLab


From 9687c85dfbf84a6a37522626b4d5c5191a695e6c Mon Sep 17 00:00:00 2001
From: Rohith Surabattula <rohiths@microsoft.com>
Date: Thu, 20 May 2021 16:45:01 +0000
Subject: [PATCH 1205/3804] Fix KASAN identified use-after-free issue.

[  612.157429] ==================================================================
[  612.158275] BUG: KASAN: use-after-free in process_one_work+0x90/0x9b0
[  612.158801] Read of size 8 at addr ffff88810a31ca60 by task kworker/2:9/2382

[  612.159611] CPU: 2 PID: 2382 Comm: kworker/2:9 Tainted: G
OE     5.13.0-rc2+ #98
[  612.159623] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.14.0-1.fc33 04/01/2014
[  612.159640] Workqueue:  0x0 (deferredclose)
[  612.159669] Call Trace:
[  612.159685]  dump_stack+0xbb/0x107
[  612.159711]  print_address_description.constprop.0+0x18/0x140
[  612.159733]  ? process_one_work+0x90/0x9b0
[  612.159743]  ? process_one_work+0x90/0x9b0
[  612.159754]  kasan_report.cold+0x7c/0xd8
[  612.159778]  ? lock_is_held_type+0x80/0x130
[  612.159789]  ? process_one_work+0x90/0x9b0
[  612.159812]  kasan_check_range+0x145/0x1a0
[  612.159834]  process_one_work+0x90/0x9b0
[  612.159877]  ? pwq_dec_nr_in_flight+0x110/0x110
[  612.159914]  ? spin_bug+0x90/0x90
[  612.159967]  worker_thread+0x3b6/0x6c0
[  612.160023]  ? process_one_work+0x9b0/0x9b0
[  612.160038]  kthread+0x1dc/0x200
[  612.160051]  ? kthread_create_worker_on_cpu+0xd0/0xd0
[  612.160092]  ret_from_fork+0x1f/0x30

[  612.160399] Allocated by task 2358:
[  612.160757]  kasan_save_stack+0x1b/0x40
[  612.160768]  __kasan_kmalloc+0x9b/0xd0
[  612.160778]  cifs_new_fileinfo+0xb0/0x960 [cifs]
[  612.161170]  cifs_open+0xadf/0xf20 [cifs]
[  612.161421]  do_dentry_open+0x2aa/0x6b0
[  612.161432]  path_openat+0xbd9/0xfa0
[  612.161441]  do_filp_open+0x11d/0x230
[  612.161450]  do_sys_openat2+0x115/0x240
[  612.161460]  __x64_sys_openat+0xce/0x140

When mod_delayed_work is called to modify the delay of pending work,
it might return false and queue a new work when pending work is
already scheduled or when try to grab pending work failed.

So, Increase the reference count when new work is scheduled to
avoid use-after-free.

Signed-off-by: Rohith Surabattula <rohiths@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/file.c | 20 +++++++++++++-------
 fs/cifs/misc.c | 12 ++++++++++--
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a1abd3da1d442..379a427f3c2f1 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -874,10 +874,6 @@ void smb2_deferred_work_close(struct work_struct *work)
 			struct cifsFileInfo, deferred.work);
 
 	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
-	if (!cfile->deferred_close_scheduled) {
-		spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
-		return;
-	}
 	cifs_del_deferred_close(cfile);
 	cfile->deferred_close_scheduled = false;
 	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
@@ -904,8 +900,13 @@ int cifs_close(struct inode *inode, struct file *file)
 			cifs_add_deferred_close(cfile, dclose);
 			if (cfile->deferred_close_scheduled &&
 			    delayed_work_pending(&cfile->deferred)) {
-				mod_delayed_work(deferredclose_wq,
-						&cfile->deferred, cifs_sb->ctx->acregmax);
+				/*
+				 * If there is no pending work, mod_delayed_work queues new work.
+				 * So, Increase the ref count to avoid use-after-free.
+				 */
+				if (!mod_delayed_work(deferredclose_wq,
+						&cfile->deferred, cifs_sb->ctx->acregmax))
+					cifsFileInfo_get(cfile);
 			} else {
 				/* Deferred close for files */
 				queue_delayed_work(deferredclose_wq,
@@ -4879,7 +4880,12 @@ oplock_break_ack:
 	if (is_deferred &&
 	    cfile->deferred_close_scheduled &&
 	    delayed_work_pending(&cfile->deferred)) {
-		mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+		/*
+		 * If there is no pending work, mod_delayed_work queues new work.
+		 * So, Increase the ref count to avoid use-after-free.
+		 */
+		if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
+			cifsFileInfo_get(cfile);
 	}
 	spin_unlock(&CIFS_I(inode)->deferred_lock);
 	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 34f2a7e80c581..7207a63819cbf 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -674,6 +674,8 @@ cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
 
 /*
  * Critical section which runs after acquiring deferred_lock.
+ * As there is no reference count on cifs_deferred_close, pdclose
+ * should not be used outside deferred_lock.
  */
 bool
 cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
@@ -752,8 +754,14 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon)
 	spin_lock(&tcon->open_file_lock);
 	list_for_each(tmp, &tcon->openFileList) {
 		cfile = list_entry(tmp, struct cifsFileInfo, tlist);
-		if (delayed_work_pending(&cfile->deferred))
-			mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+		if (delayed_work_pending(&cfile->deferred)) {
+			/*
+			 * If there is no pending work, mod_delayed_work queues new work.
+			 * So, Increase the ref count to avoid use-after-free.
+			 */
+			if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0))
+				cifsFileInfo_get(cfile);
+		}
 	}
 	spin_unlock(&tcon->open_file_lock);
 }
-- 
GitLab


From ee62c89cd45999ba4e09938bd01ec6d1a83ca6d6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:51:38 +0200
Subject: [PATCH 1206/3804] docs: update sysfs-platform_profile.rst reference

The file name: Documentation/ABI/testing/sysfs-platform_profile.rst
should be, instead: Documentation/userspace-api/sysfs-platform_profile.rst.

Update its cross-reference accordingly.

Fixes: a2ff95e018f1 ("ACPI: platform: Add platform profile support")
Fixes: 8e0cbf356377 ("Documentation: Add documentation for new platform_profile sysfs attribute")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Mark Pearson <markpearson@lenovo.com>
Link: https://lore.kernel.org/r/295089effd8353578b9725c61c0453d920978d72.1621413933.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/platform_profile.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h
index a6329003aee7f..e5cbb6841f3ac 100644
--- a/include/linux/platform_profile.h
+++ b/include/linux/platform_profile.h
@@ -2,7 +2,7 @@
 /*
  * Platform profile sysfs interface
  *
- * See Documentation/ABI/testing/sysfs-platform_profile.rst for more
+ * See Documentation/userspace-api/sysfs-platform_profile.rst for more
  * information.
  */
 
-- 
GitLab


From e437c1a3e7137c0da035a2804bf6b4cc007d4f5e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:51:39 +0200
Subject: [PATCH 1207/3804] docs: vcpu-requests.rst: fix reference for atomic
 ops

Changeset f0400a77ebdc ("atomic: Delete obsolete documentation")
got rid of atomic_ops.rst, pointing that this was superseded by
Documentation/atomic_*.txt.

Update its reference accordingly.

Fixes: f0400a77ebdc ("atomic: Delete obsolete documentation")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/703af756ac26a06c2185c05dfe6d902253f11161.1621413933.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/virt/kvm/vcpu-requests.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst
index 5feb3706a7ae5..5f8798e7fdf85 100644
--- a/Documentation/virt/kvm/vcpu-requests.rst
+++ b/Documentation/virt/kvm/vcpu-requests.rst
@@ -302,6 +302,6 @@ VCPU returns from the call.
 References
 ==========
 
-.. [atomic-ops] Documentation/core-api/atomic_ops.rst
+.. [atomic-ops] Documentation/atomic_bitops.txt and Documentation/atomic_t.txt
 .. [memory-barriers] Documentation/memory-barriers.txt
 .. [lwn-mb] https://lwn.net/Articles/573436/
-- 
GitLab


From 50bd52fef16dc806be11ba86f406364366f3f23b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:51:40 +0200
Subject: [PATCH 1208/3804] docs: translations/zh_CN: fix a typo at
 8.Conclusion.rst

transaltions -> translations

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/40b3d5c983fb06d8a58d1f613c175a98e0631677.1621413933.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/process/8.Conclusion.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/translations/zh_CN/process/8.Conclusion.rst b/Documentation/translations/zh_CN/process/8.Conclusion.rst
index 71c3e30efc6f8..4707f01019646 100644
--- a/Documentation/translations/zh_CN/process/8.Conclusion.rst
+++ b/Documentation/translations/zh_CN/process/8.Conclusion.rst
@@ -19,7 +19,7 @@
 :ref:`Documentation/translations/zh_CN/process/howto.rst <cn_process_howto>`
 文件是一个重要的起点；
 :ref:`Documentation/translations/zh_CN/process/submitting-patches.rst <cn_submittingpatches>`
-和 :ref:`Documentation/transaltions/zh_CN/process/submitting-drivers.rst <cn_submittingdrivers>`
+和 :ref:`Documentation/translations/zh_CN/process/submitting-drivers.rst <cn_submittingdrivers>`
 也是所有内核开发人员都应该阅读的内容。许多内部内核API都是使用kerneldoc机制
 记录的；“make htmldocs”或“make pdfdocs”可用于以HTML或PDF格式生成这些文档
 （尽管某些发行版提供的tex版本会遇到内部限制，无法正确处理文档）。
-- 
GitLab


From 716c9d9403d061d02c419a6f63a4f3fd01278cae Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:51:41 +0200
Subject: [PATCH 1209/3804] docs: sched-bwc.rst: fix a typo on a doc name

cgroupv2.rst -> cgroup-v2.rst

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/1dc0203bd7df375ef45832f0c88566e22c4138ff.1621413933.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/scheduler/sched-bwc.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/scheduler/sched-bwc.rst b/Documentation/scheduler/sched-bwc.rst
index 845eee659199c..1fc73555f5c46 100644
--- a/Documentation/scheduler/sched-bwc.rst
+++ b/Documentation/scheduler/sched-bwc.rst
@@ -29,7 +29,7 @@ Quota and period are managed within the cpu subsystem via cgroupfs.
 .. note::
    The cgroupfs files described in this section are only applicable
    to cgroup v1. For cgroup v2, see
-   :ref:`Documentation/admin-guide/cgroupv2.rst <cgroup-v2-cpu>`.
+   :ref:`Documentation/admin-guide/cgroup-v2.rst <cgroup-v2-cpu>`.
 
 - cpu.cfs_quota_us: the total available run-time within a period (in
   microseconds)
-- 
GitLab


From 4b0c9948a4c2f446a11bd592bd7d23f06ad75d8e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:51:42 +0200
Subject: [PATCH 1210/3804] docs: update pin-control.rst references

Changeset 5513b411ea5b ("Documentation: rename pinctl to pin-control")
renamed: Documentation/driver-api/pinctl.rst
to: Documentation/driver-api/pin-control.rst.

Update the cross-references accordingly.

Fixes: 5513b411ea5b ("Documentation: rename pinctl to pin-control")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/46ac2e918c7c4a4b701d54870f167b78466ec578.1621413933.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 include/linux/device.h                  | 2 +-
 include/linux/mfd/madera/pdata.h        | 2 +-
 include/linux/pinctrl/pinconf-generic.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index 38a2071cf7768..d1183cfdc8fbb 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -399,7 +399,7 @@ struct dev_links_info {
  * 		along with subsystem-level and driver-level callbacks.
  * @em_pd:	device's energy model performance domain
  * @pins:	For device pin management.
- *		See Documentation/driver-api/pinctl.rst for details.
+ *		See Documentation/driver-api/pin-control.rst for details.
  * @msi_list:	Hosts MSI descriptors
  * @msi_domain: The generic MSI domain this device is using.
  * @numa_node:	NUMA node this device is close to.
diff --git a/include/linux/mfd/madera/pdata.h b/include/linux/mfd/madera/pdata.h
index 601cbbc10370c..32e3470708ed8 100644
--- a/include/linux/mfd/madera/pdata.h
+++ b/include/linux/mfd/madera/pdata.h
@@ -31,7 +31,7 @@ struct pinctrl_map;
  * @irq_flags:	    Mode for primary IRQ (defaults to active low)
  * @gpio_base:	    Base GPIO number
  * @gpio_configs:   Array of GPIO configurations (See
- *		    Documentation/driver-api/pinctl.rst)
+ *		    Documentation/driver-api/pin-control.rst)
  * @n_gpio_configs: Number of entries in gpio_configs
  * @gpsw:	    General purpose switch mode setting. Depends on the external
  *		    hardware connected to the switch. (See the SW1_MODE field
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index e18ab3d5908f6..5a96602a33160 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -89,7 +89,7 @@ struct pinctrl_map;
  * 	it.
  * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a
  * 	value on the line. Use argument 1 to indicate high level, argument 0 to
- *	indicate low level. (Please see Documentation/driver-api/pinctl.rst,
+ *	indicate low level. (Please see Documentation/driver-api/pin-control.rst,
  *	section "GPIO mode pitfalls" for a discussion around this parameter.)
  * @PIN_CONFIG_PERSIST_STATE: retain pin state across sleep or controller reset
  * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
-- 
GitLab


From 0a5fab9f085880dbd7f9b0055c74936ca8b64fc1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:51:43 +0200
Subject: [PATCH 1211/3804] docs: virt: api.rst: fix a pointer to SGX
 documentation

The document which describes the SGX kernel architecture was added at
commit 3fa97bf00126 ("Documentation/x86: Document SGX kernel architecture")

but the reference at virt/kvm/api.rst is pointing to some
non-existing document.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/138c24633c6e4edf862a2b4d77033c603fc10406.1621413933.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/virt/kvm/api.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 22d0775621496..e86fe34815745 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6360,7 +6360,7 @@ system fingerprint.  To prevent userspace from circumventing such restrictions
 by running an enclave in a VM, KVM prevents access to privileged attributes by
 default.
 
-See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
+See Documentation/x86/sgx.rst for more details.
 
 8. Other capabilities.
 ======================
-- 
GitLab


From 5286bd25e2095d71d248fbfd2a55ca4333dfd77e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:51:44 +0200
Subject: [PATCH 1212/3804] docs: ABI: iommu: remove duplicated definition for
 sysfs-kernel-iommu_groups

./scripts/get_abi.pl is reporting a duplicated definition for
/sys/kernel/iommu_groups/reserved_regions, both at the same
file:

	Warning: /sys/kernel/iommu_groups/reserved_regions is defined 2 times:  Documentation/ABI/testing/sysfs-kernel-iommu_groups:15  Documentation/ABI/testing/sysfs-kernel-iommu_groups:27

Fix it by merging those into an unified entry.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/ec33e8e9b8f120232ffb3b9fcc99c97b87f242e3.1621413933.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/testing/sysfs-kernel-iommu_groups | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
index 0fedbb0f94e4f..eae2f1c1e11e9 100644
--- a/Documentation/ABI/testing/sysfs-kernel-iommu_groups
+++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
@@ -25,14 +25,10 @@ Description:    /sys/kernel/iommu_groups/reserved_regions list IOVA
 		the base IOVA, the second is the end IOVA and the third
 		field describes the type of the region.
 
-What:		/sys/kernel/iommu_groups/reserved_regions
-Date: 		June 2019
-KernelVersion:  v5.3
-Contact: 	Eric Auger <eric.auger@redhat.com>
-Description:    In case an RMRR is used only by graphics or USB devices
-		it is now exposed as "direct-relaxable" instead of "direct".
-		In device assignment use case, for instance, those RMRR
-		are considered to be relaxable and safe.
+		Since kernel 5.3, in case an RMRR is used only by graphics or
+		USB devices it is now exposed as "direct-relaxable" instead
+		of "direct". In device assignment use case, for instance,
+		those RMRR are considered to be relaxable and safe.
 
 What:		/sys/kernel/iommu_groups/<grp_id>/type
 Date:		November 2020
-- 
GitLab


From 13d6f96750c89f7fc282788bd058559381ccec29 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:51:45 +0200
Subject: [PATCH 1213/3804] docs: ABI: sysfs-class-backlight: unify ambient
 light zone nodes

./scripts/get_abi.pl is warning about duplicated symbol
definition:

	Warning: /sys/class/backlight/<backlight>/l1_daylight_max is defined 2 times:  ./Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870:4  ./Documentation/ABI/testing/sysfs-class-backlight-adp8860:12

What happens is that 3 drivers use the same pattern to report
max and dim setting for different ambient light zones.

It should be noticed that the adp8870 doc was missing an
entry for l1_daylight_dim, which was fixed on this patch.

While the ambient light zone is device-specific, the sysfs
definition is actually common. So, unify them at:

	Documentation/ABI/testing/sysfs-class-backlight

and use as the contact point, the e-mail reported by
get_maintainers.pl for the subsystem.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/c13c6ebd03cd04a0d15d89018f8d529918fc0a73.1621413933.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../ABI/testing/sysfs-class-backlight         | 100 ++++++++++++++++++
 .../ABI/testing/sysfs-class-backlight-adp5520 |  31 ------
 .../ABI/testing/sysfs-class-backlight-adp8860 |  37 -------
 .../sysfs-class-backlight-driver-adp8870      |  32 ------
 4 files changed, 100 insertions(+), 100 deletions(-)
 delete mode 100644 Documentation/ABI/testing/sysfs-class-backlight-adp5520
 delete mode 100644 Documentation/ABI/testing/sysfs-class-backlight-adp8860
 delete mode 100644 Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870

diff --git a/Documentation/ABI/testing/sysfs-class-backlight b/Documentation/ABI/testing/sysfs-class-backlight
index 1fc86401bf959..c453646b06e2c 100644
--- a/Documentation/ABI/testing/sysfs-class-backlight
+++ b/Documentation/ABI/testing/sysfs-class-backlight
@@ -84,3 +84,103 @@ Description:
 		It can be enabled by writing the value stored in
 		/sys/class/backlight/<backlight>/max_brightness to
 		/sys/class/backlight/<backlight>/brightness.
+
+What:		/sys/class/backlight/<backlight>/<ambient light zone>_max
+Date:		Sep, 2009
+KernelVersion:	v2.6.32
+Contact:	device-drivers-devel@blackfin.uclinux.org
+Description:
+		Control the maximum brightness for <ambient light zone>
+		on this <backlight>. Values are between 0 and 127. This file
+		will also show the brightness level stored for this
+		<ambient light zone>.
+
+		The <ambient light zone> is device-driver specific:
+
+		For ADP5520 and ADP5501, <ambient light zone> can be:
+
+		===========  ================================================
+		Ambient      sysfs entry
+		light zone
+		===========  ================================================
+		daylight     /sys/class/backlight/<backlight>/daylight_max
+		office       /sys/class/backlight/<backlight>/office_max
+		dark         /sys/class/backlight/<backlight>/dark_max
+		===========  ================================================
+
+		For ADP8860, <ambient light zone> can be:
+
+		===========  ================================================
+		Ambient      sysfs entry
+		light zone
+		===========  ================================================
+		l1_daylight  /sys/class/backlight/<backlight>/l1_daylight_max
+		l2_office    /sys/class/backlight/<backlight>/l2_office_max
+		l3_dark      /sys/class/backlight/<backlight>/l3_dark_max
+		===========  ================================================
+
+		For ADP8870, <ambient light zone> can be:
+
+		===========  ================================================
+		Ambient      sysfs entry
+		light zone
+		===========  ================================================
+		l1_daylight  /sys/class/backlight/<backlight>/l1_daylight_max
+		l2_bright    /sys/class/backlight/<backlight>/l2_bright_max
+		l3_office    /sys/class/backlight/<backlight>/l3_office_max
+		l4_indoor    /sys/class/backlight/<backlight>/l4_indoor_max
+		l5_dark      /sys/class/backlight/<backlight>/l5_dark_max
+		===========  ================================================
+
+		See also: /sys/class/backlight/<backlight>/ambient_light_zone.
+
+What:		/sys/class/backlight/<backlight>/<ambient light zone>_dim
+Date:		Sep, 2009
+KernelVersion:	v2.6.32
+Contact:	device-drivers-devel@blackfin.uclinux.org
+Description:
+		Control the dim brightness for <ambient light zone>
+		on this <backlight>. Values are between 0 and 127, typically
+		set to 0. Full off when the backlight is disabled.
+		This file will also show the dim brightness level stored for
+		this <ambient light zone>.
+
+		The <ambient light zone> is device-driver specific:
+
+		For ADP5520 and ADP5501, <ambient light zone> can be:
+
+		===========  ================================================
+		Ambient      sysfs entry
+		light zone
+		===========  ================================================
+		daylight     /sys/class/backlight/<backlight>/daylight_dim
+		office       /sys/class/backlight/<backlight>/office_dim
+		dark         /sys/class/backlight/<backlight>/dark_dim
+		===========  ================================================
+
+		For ADP8860, <ambient light zone> can be:
+
+		===========  ================================================
+		Ambient      sysfs entry
+		light zone
+		===========  ================================================
+		l1_daylight  /sys/class/backlight/<backlight>/l1_daylight_dim
+		l2_office    /sys/class/backlight/<backlight>/l2_office_dim
+		l3_dark      /sys/class/backlight/<backlight>/l3_dark_dim
+		===========  ================================================
+
+		For ADP8870, <ambient light zone> can be:
+
+		===========  ================================================
+		Ambient      sysfs entry
+		light zone
+		===========  ================================================
+		l1_daylight  /sys/class/backlight/<backlight>/l1_daylight_dim
+		l2_bright    /sys/class/backlight/<backlight>/l2_bright_dim
+		l3_office    /sys/class/backlight/<backlight>/l3_office_dim
+		l4_indoor    /sys/class/backlight/<backlight>/l4_indoor_dim
+		l5_dark      /sys/class/backlight/<backlight>/l5_dark_dim
+		===========  ================================================
+
+		See also: /sys/class/backlight/<backlight>/ambient_light_zone.
+
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-adp5520 b/Documentation/ABI/testing/sysfs-class-backlight-adp5520
deleted file mode 100644
index 34b6ebafa2105..0000000000000
--- a/Documentation/ABI/testing/sysfs-class-backlight-adp5520
+++ /dev/null
@@ -1,31 +0,0 @@
-sysfs interface for analog devices adp5520(01) backlight driver
----------------------------------------------------------------
-
-The backlight brightness control operates at three different levels for the
-adp5520 and adp5501 devices: daylight (level 1), office (level 2) and dark
-(level 3). By default the brightness operates at the daylight brightness level.
-
-What:		/sys/class/backlight/<backlight>/daylight_max
-What:		/sys/class/backlight/<backlight>/office_max
-What:		/sys/class/backlight/<backlight>/dark_max
-Date:		Sep, 2009
-KernelVersion:	v2.6.32
-Contact:	Michael Hennerich <michael.hennerich@analog.com>
-Description:
-		(RW) Maximum current setting for the backlight when brightness
-		is at one of the three levels (daylight, office or dark). This
-		is an input code between 0 and 127, which is transformed to a
-		value between 0 mA and 30 mA using linear or non-linear
-		algorithms.
-
-What:		/sys/class/backlight/<backlight>/daylight_dim
-What:		/sys/class/backlight/<backlight>/office_dim
-What:		/sys/class/backlight/<backlight>/dark_dim
-Date:		Sep, 2009
-KernelVersion:	v2.6.32
-Contact:	Michael Hennerich <michael.hennerich@analog.com>
-Description:
-		(RW) Dim current setting for the backlight when brightness is at
-		one of the three levels (daylight, office or dark). This is an
-		input code between 0 and 127, which is transformed to a value
-		between 0 mA and 30 mA using linear or non-linear algorithms.
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-adp8860 b/Documentation/ABI/testing/sysfs-class-backlight-adp8860
deleted file mode 100644
index 6610ac73f9ba0..0000000000000
--- a/Documentation/ABI/testing/sysfs-class-backlight-adp8860
+++ /dev/null
@@ -1,37 +0,0 @@
-sysfs interface for analog devices adp8860 backlight driver
------------------------------------------------------------
-
-The backlight brightness control operates at three different levels for the
-adp8860, adp8861 and adp8863 devices: daylight (level 1), office (level 2) and
-dark (level 3). By default the brightness operates at the daylight brightness
-level.
-
-See also /sys/class/backlight/<backlight>/ambient_light_level and
-/sys/class/backlight/<backlight>/ambient_light_zone.
-
-
-What:		/sys/class/backlight/<backlight>/l1_daylight_max
-What:		/sys/class/backlight/<backlight>/l2_office_max
-What:		/sys/class/backlight/<backlight>/l3_dark_max
-Date:		Apr, 2010
-KernelVersion:	v2.6.35
-Contact:	Michael Hennerich <michael.hennerich@analog.com>
-Description:
-		(RW) Maximum current setting for the backlight when brightness
-		is at one of the three levels (daylight, office or dark). This
-		is an input code between 0 and 127, which is transformed to a
-		value between 0 mA and 30 mA using linear or non-linear
-		algorithms.
-
-
-What:		/sys/class/backlight/<backlight>/l1_daylight_dim
-What:		/sys/class/backlight/<backlight>/l2_office_dim
-What:		/sys/class/backlight/<backlight>/l3_dark_dim
-Date:		Apr, 2010
-KernelVersion:	v2.6.35
-Contact:	Michael Hennerich <michael.hennerich@analog.com>
-Description:
-		(RW) Dim current setting for the backlight when brightness is at
-		one of the three levels (daylight, office or dark). This is an
-		input code between 0 and 127, which is transformed to a value
-		between 0 mA and 30 mA using linear or non-linear algorithms.
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
deleted file mode 100644
index b08ca912cad40..0000000000000
--- a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
+++ /dev/null
@@ -1,32 +0,0 @@
-See also /sys/class/backlight/<backlight>/ambient_light_level and
-/sys/class/backlight/<backlight>/ambient_light_zone.
-
-What:		/sys/class/backlight/<backlight>/<ambient light zone>_max
-What:		/sys/class/backlight/<backlight>/l1_daylight_max
-What:		/sys/class/backlight/<backlight>/l2_bright_max
-What:		/sys/class/backlight/<backlight>/l3_office_max
-What:		/sys/class/backlight/<backlight>/l4_indoor_max
-What:		/sys/class/backlight/<backlight>/l5_dark_max
-Date:		May 2011
-KernelVersion:	3.0
-Contact:	device-drivers-devel@blackfin.uclinux.org
-Description:
-		Control the maximum brightness for <ambient light zone>
-		on this <backlight>. Values are between 0 and 127. This file
-		will also show the brightness level stored for this
-		<ambient light zone>.
-
-What:		/sys/class/backlight/<backlight>/<ambient light zone>_dim
-What:		/sys/class/backlight/<backlight>/l2_bright_dim
-What:		/sys/class/backlight/<backlight>/l3_office_dim
-What:		/sys/class/backlight/<backlight>/l4_indoor_dim
-What:		/sys/class/backlight/<backlight>/l5_dark_dim
-Date:		May 2011
-KernelVersion:	3.0
-Contact:	device-drivers-devel@blackfin.uclinux.org
-Description:
-		Control the dim brightness for <ambient light zone>
-		on this <backlight>. Values are between 0 and 127, typically
-		set to 0. Full off when the backlight is disabled.
-		This file will also show the dim brightness level stored for
-		this <ambient light zone>.
-- 
GitLab


From 1ca5d41c371e6abe788439e6eaecfa76baaf6979 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 19 May 2021 10:51:46 +0200
Subject: [PATCH 1214/3804] docs: ABI: sysfs-class-led-trigger-pattern: remove
 repeat duplication

As reported by scripts/get_abi.pl:

	Warning: /sys/class/leds/<led>/repeat is defined 2 times:  Documentation/ABI/testing/sysfs-class-led-driver-el15203000:0  Documentation/ABI/testing/sysfs-class-led-trigger-pattern:28

The definition for the EL15203000 is just a special case of
the sysfs led class. So, drop it and mentions the possible
exception at the class definition.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/394580bd2e007ffb640f97212973a772ed8f0409.1621413933.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../ABI/testing/sysfs-class-led-driver-el15203000        | 9 ---------
 .../ABI/testing/sysfs-class-led-trigger-pattern          | 3 +++
 2 files changed, 3 insertions(+), 9 deletions(-)
 delete mode 100644 Documentation/ABI/testing/sysfs-class-led-driver-el15203000

diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-el15203000 b/Documentation/ABI/testing/sysfs-class-led-driver-el15203000
deleted file mode 100644
index 04f3ffdc59361..0000000000000
--- a/Documentation/ABI/testing/sysfs-class-led-driver-el15203000
+++ /dev/null
@@ -1,9 +0,0 @@
-What:		/sys/class/leds/<led>/repeat
-Date:		September 2019
-KernelVersion:	5.5
-Description:
-		EL15203000 supports only indefinitely patterns,
-		so this file should always store -1.
-
-		For more info, please see:
-		Documentation/ABI/testing/sysfs-class-led-trigger-pattern
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
index d91a07767adf0..8c57d2780554e 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
@@ -35,3 +35,6 @@ Description:
 
 		This file will always return the originally written repeat
 		number.
+
+		It should be noticed that some leds, like EL15203000 may
+		only support indefinitely patterns, so they always store -1.
-- 
GitLab


From 8570e75a55430844a8e85e3458e5701556334ffd Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 19 May 2021 21:33:33 +0000
Subject: [PATCH 1215/3804] selftests: Add .gitignore for nci test suite

Building the nci test suite produces a binary, nci_dev, that git then
tries to track. Add a .gitignore file to tell git to ignore this binary.

Signed-off-by: David Matlack <dmatlack@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/nci/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tools/testing/selftests/nci/.gitignore

diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore
new file mode 100644
index 0000000000000..448eeb4590fcf
--- /dev/null
+++ b/tools/testing/selftests/nci/.gitignore
@@ -0,0 +1 @@
+/nci_dev
-- 
GitLab


From 61fa308f23b5b189196e8e5835433cdff99a44b0 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Mon, 17 May 2021 10:21:22 +0800
Subject: [PATCH 1216/3804] samples/kprobes: Fix typo in handler_fault()

Fix a defective format in handler_fault() ending with an 'n' that
should be '\n'.

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/1621218083-23519-2-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 samples/kprobes/kprobe_example.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index c495664c0a9b3..d77a5464d1076 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -101,7 +101,7 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
  */
 static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
 {
-	pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
+	pr_info("fault_handler: p->addr = 0x%p, trap #%d\n", p->addr, trapnr);
 	/* Return 0 because we don't handle the fault. */
 	return 0;
 }
-- 
GitLab


From db1ea668843e048a544021b9bc0aee5aac0b6424 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Mon, 17 May 2021 10:21:23 +0800
Subject: [PATCH 1217/3804] samples/kprobes: Fix typo in handler_post()

It should use post_handler instead of pre_handler in handler_post().

As Joe Perches suggested, it would be better to use pr_fmt and remove
all the embedded pre/post/fault strings. This would change the style of
the output through.

Fixes: e16c5dd5157e ("samples/kprobes: Add s390 case in kprobe example module")
Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Acked-by: Marc Koderer <marc@koderer.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/1621218083-23519-3-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 samples/kprobes/kprobe_example.c | 33 ++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index d77a5464d1076..5f1eb8b53c113 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -10,6 +10,8 @@
  * whenever kernel_clone() is invoked to create a new process.
  */
 
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
@@ -27,32 +29,31 @@ static struct kprobe kp = {
 static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs)
 {
 #ifdef CONFIG_X86
-	pr_info("<%s> pre_handler: p->addr = 0x%p, ip = %lx, flags = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, ip = %lx, flags = 0x%lx\n",
 		p->symbol_name, p->addr, regs->ip, regs->flags);
 #endif
 #ifdef CONFIG_PPC
-	pr_info("<%s> pre_handler: p->addr = 0x%p, nip = 0x%lx, msr = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, nip = 0x%lx, msr = 0x%lx\n",
 		p->symbol_name, p->addr, regs->nip, regs->msr);
 #endif
 #ifdef CONFIG_MIPS
-	pr_info("<%s> pre_handler: p->addr = 0x%p, epc = 0x%lx, status = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, epc = 0x%lx, status = 0x%lx\n",
 		p->symbol_name, p->addr, regs->cp0_epc, regs->cp0_status);
 #endif
 #ifdef CONFIG_ARM64
-	pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx,"
-			" pstate = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, pc = 0x%lx, pstate = 0x%lx\n",
 		p->symbol_name, p->addr, (long)regs->pc, (long)regs->pstate);
 #endif
 #ifdef CONFIG_ARM
-	pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx, cpsr = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, pc = 0x%lx, cpsr = 0x%lx\n",
 		p->symbol_name, p->addr, (long)regs->ARM_pc, (long)regs->ARM_cpsr);
 #endif
 #ifdef CONFIG_RISCV
-	pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx, status = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, pc = 0x%lx, status = 0x%lx\n",
 		p->symbol_name, p->addr, regs->epc, regs->status);
 #endif
 #ifdef CONFIG_S390
-	pr_info("<%s> pre_handler: p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
+	pr_info("<%s> p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
 		p->symbol_name, p->addr, regs->psw.addr, regs->flags);
 #endif
 
@@ -65,31 +66,31 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
 				unsigned long flags)
 {
 #ifdef CONFIG_X86
-	pr_info("<%s> post_handler: p->addr = 0x%p, flags = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, flags = 0x%lx\n",
 		p->symbol_name, p->addr, regs->flags);
 #endif
 #ifdef CONFIG_PPC
-	pr_info("<%s> post_handler: p->addr = 0x%p, msr = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, msr = 0x%lx\n",
 		p->symbol_name, p->addr, regs->msr);
 #endif
 #ifdef CONFIG_MIPS
-	pr_info("<%s> post_handler: p->addr = 0x%p, status = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, status = 0x%lx\n",
 		p->symbol_name, p->addr, regs->cp0_status);
 #endif
 #ifdef CONFIG_ARM64
-	pr_info("<%s> post_handler: p->addr = 0x%p, pstate = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, pstate = 0x%lx\n",
 		p->symbol_name, p->addr, (long)regs->pstate);
 #endif
 #ifdef CONFIG_ARM
-	pr_info("<%s> post_handler: p->addr = 0x%p, cpsr = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, cpsr = 0x%lx\n",
 		p->symbol_name, p->addr, (long)regs->ARM_cpsr);
 #endif
 #ifdef CONFIG_RISCV
-	pr_info("<%s> post_handler: p->addr = 0x%p, status = 0x%lx\n",
+	pr_info("<%s> p->addr = 0x%p, status = 0x%lx\n",
 		p->symbol_name, p->addr, regs->status);
 #endif
 #ifdef CONFIG_S390
-	pr_info("<%s> pre_handler: p->addr, 0x%p, flags = 0x%lx\n",
+	pr_info("<%s> p->addr, 0x%p, flags = 0x%lx\n",
 		p->symbol_name, p->addr, regs->flags);
 #endif
 }
@@ -101,7 +102,7 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
  */
 static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
 {
-	pr_info("fault_handler: p->addr = 0x%p, trap #%d\n", p->addr, trapnr);
+	pr_info("p->addr = 0x%p, trap #%d\n", p->addr, trapnr);
 	/* Return 0 because we don't handle the fault. */
 	return 0;
 }
-- 
GitLab


From cc3496bf8685a5bd0bdd79b23ef06e85184f8863 Mon Sep 17 00:00:00 2001
From: Wei Ming Chen <jj251510319013@gmail.com>
Date: Sat, 15 May 2021 23:51:42 +0800
Subject: [PATCH 1218/3804] docs: Use fallthrough pseudo-keyword

Replace /* fall through */ comment with fallthrough, make
it align with original process/coding-style.rst

Signed-off-by: Wei Ming Chen <jj251510319013@gmail.com>
Link: https://lore.kernel.org/r/20210515155142.2490-1-jj251510319013@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/it_IT/process/coding-style.rst | 2 +-
 Documentation/translations/zh_CN/process/coding-style.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/translations/it_IT/process/coding-style.rst b/Documentation/translations/it_IT/process/coding-style.rst
index 95f2e7c985e2d..ecc74ba50d3e8 100644
--- a/Documentation/translations/it_IT/process/coding-style.rst
+++ b/Documentation/translations/it_IT/process/coding-style.rst
@@ -62,7 +62,7 @@ i ``case``.  Un esempio.:
 	case 'K':
 	case 'k':
 		mem <<= 10;
-		/* fall through */
+		fallthrough;
 	default:
 		break;
 	}
diff --git a/Documentation/translations/zh_CN/process/coding-style.rst b/Documentation/translations/zh_CN/process/coding-style.rst
index 406d43a02c021..b8c484a84d105 100644
--- a/Documentation/translations/zh_CN/process/coding-style.rst
+++ b/Documentation/translations/zh_CN/process/coding-style.rst
@@ -61,7 +61,7 @@ Linux 内核代码风格
 	case 'K':
 	case 'k':
 		mem <<= 10;
-		/* fall through */
+		fallthrough;
 	default:
 		break;
 	}
-- 
GitLab


From 76001b8bbf48517e1cb64f5cddcbc4d1369aab0b Mon Sep 17 00:00:00 2001
From: Dwaipayan Ray <dwaipayanray1@gmail.com>
Date: Sat, 15 May 2021 18:53:48 +0530
Subject: [PATCH 1219/3804] docs: Add more message type documentations for
 checkpatch

- Document a couple of more checkpatch message types.
- Add a blank line before all `See:` lines to improve the
  rst output.
- Create a new subsection `Permissions` and move a few types
  to it.

Signed-off-by: Dwaipayan Ray <dwaipayanray1@gmail.com>
Acked-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Link: https://lore.kernel.org/r/20210515132348.19082-1-dwaipayanray1@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dev-tools/checkpatch.rst | 170 ++++++++++++++++++++++++-
 1 file changed, 163 insertions(+), 7 deletions(-)

diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst
index 51fed1bd72ec4..e409f27f48b6f 100644
--- a/Documentation/dev-tools/checkpatch.rst
+++ b/Documentation/dev-tools/checkpatch.rst
@@ -246,6 +246,7 @@ Allocation style
     The first argument for kcalloc or kmalloc_array should be the
     number of elements.  sizeof() as the first argument is generally
     wrong.
+
     See: https://www.kernel.org/doc/html/latest/core-api/memory-allocation.html
 
   **ALLOC_SIZEOF_STRUCT**
@@ -264,6 +265,7 @@ Allocation style
   **ALLOC_WITH_MULTIPLY**
     Prefer kmalloc_array/kcalloc over kmalloc/kzalloc with a
     sizeof multiply.
+
     See: https://www.kernel.org/doc/html/latest/core-api/memory-allocation.html
 
 
@@ -284,6 +286,7 @@ API usage
     BUG() or BUG_ON() should be avoided totally.
     Use WARN() and WARN_ON() instead, and handle the "impossible"
     error condition as gracefully as possible.
+
     See: https://www.kernel.org/doc/html/latest/process/deprecated.html#bug-and-bug-on
 
   **CONSIDER_KSTRTO**
@@ -292,12 +295,23 @@ API usage
     may lead to unexpected results in callers.  The respective kstrtol(),
     kstrtoll(), kstrtoul(), and kstrtoull() functions tend to be the
     correct replacements.
+
     See: https://www.kernel.org/doc/html/latest/process/deprecated.html#simple-strtol-simple-strtoll-simple-strtoul-simple-strtoull
 
+  **IN_ATOMIC**
+    in_atomic() is not for driver use so any such use is reported as an ERROR.
+    Also in_atomic() is often used to determine if we may sleep, but it is not
+    reliable in this use model therefore its use is strongly discouraged.
+
+    However, in_atomic() is ok for core kernel use.
+
+    See: https://lore.kernel.org/lkml/20080320201723.b87b3732.akpm@linux-foundation.org/
+
   **LOCKDEP**
     The lockdep_no_validate class was added as a temporary measure to
     prevent warnings on conversion of device->sem to device->mutex.
     It should not be used for any other purpose.
+
     See: https://lore.kernel.org/lkml/1268959062.9440.467.camel@laptop/
 
   **MALFORMED_INCLUDE**
@@ -308,11 +322,18 @@ API usage
   **USE_LOCKDEP**
     lockdep_assert_held() annotations should be preferred over
     assertions based on spin_is_locked()
+
     See: https://www.kernel.org/doc/html/latest/locking/lockdep-design.html#annotations
 
   **UAPI_INCLUDE**
     No #include statements in include/uapi should use a uapi/ path.
 
+  **USLEEP_RANGE**
+    usleep_range() should be preferred over udelay(). The proper way of
+    using usleep_range() is mentioned in the kernel docs.
+
+    See: https://www.kernel.org/doc/html/latest/timers/timers-howto.html#delays-information-on-the-various-kernel-delay-sleep-mechanisms
+
 
 Comment style
 -------------
@@ -338,6 +359,7 @@ Comment style
   **C99_COMMENTS**
     C99 style single line comments (//) should not be used.
     Prefer the block comment style instead.
+
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#commenting
 
 
@@ -347,6 +369,7 @@ Commit message
   **BAD_SIGN_OFF**
     The signed-off-by line does not fall in line with the standards
     specified by the community.
+
     See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#developer-s-certificate-of-origin-1-1
 
   **BAD_STABLE_ADDRESS_STYLE**
@@ -368,12 +391,26 @@ Commit message
   **COMMIT_MESSAGE**
     The patch is missing a commit description.  A brief
     description of the changes made by the patch should be added.
+
     See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes
 
+  **FROM_SIGN_OFF_MISMATCH**
+    The author's email does not match with that in the Signed-off-by:
+    line(s). This can be sometimes caused due to an improperly configured
+    email client.
+
+    This message is emitted due to any of the following reasons::
+
+      - The email names do not match.
+      - The email addresses do not match.
+      - The email subaddresses do not match.
+      - The email comments do not match.
+
   **MISSING_SIGN_OFF**
     The patch is missing a Signed-off-by line.  A signed-off-by
     line should be added according to Developer's certificate of
     Origin.
+
     See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
 
   **NO_AUTHOR_SIGN_OFF**
@@ -382,6 +419,7 @@ Commit message
     end of explanation of the patch to denote that the author has
     written it or otherwise has the rights to pass it on as an open
     source patch.
+
     See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#sign-your-work-the-developer-s-certificate-of-origin
 
   **DIFF_IN_COMMIT_MSG**
@@ -389,6 +427,7 @@ Commit message
     This causes problems when one tries to apply a file containing both
     the changelog and the diff because patch(1) tries to apply the diff
     which it found in the changelog.
+
     See: https://lore.kernel.org/lkml/20150611134006.9df79a893e3636019ad2759e@linux-foundation.org/
 
   **GERRIT_CHANGE_ID**
@@ -431,6 +470,7 @@ Comparison style
   **BOOL_COMPARISON**
     Comparisons of A to true and false are better written
     as A and !A.
+
     See: https://lore.kernel.org/lkml/1365563834.27174.12.camel@joe-AO722/
 
   **COMPARISON_TO_NULL**
@@ -492,6 +532,7 @@ Macros, Attributes and Symbols
     The kernel does *not* use the ``__DATE__`` and ``__TIME__`` macros,
     and enables warnings if they are used as they can lead to
     non-deterministic builds.
+
     See: https://www.kernel.org/doc/html/latest/kbuild/reproducible-builds.html#timestamps
 
   **DEFINE_ARCH_HAS**
@@ -502,6 +543,7 @@ Macros, Attributes and Symbols
     want architectures able to override them with optimized ones, we
     should either use weak functions (appropriate for some cases), or
     the symbol that protects them should be the same symbol we use.
+
     See: https://lore.kernel.org/lkml/CA+55aFycQ9XJvEOsiM3txHL5bjUc8CeKWJNR_H+MiicaddB42Q@mail.gmail.com/
 
   **INIT_ATTRIBUTE**
@@ -528,6 +570,20 @@ Macros, Attributes and Symbols
               ...
       }
 
+  **MISPLACED_INIT**
+    It is possible to use section markers on variables in a way
+    which gcc doesn't understand (or at least not the way the
+    developer intended)::
+
+      static struct __initdata samsung_pll_clock exynos4_plls[nr_plls] = {
+
+    does not put exynos4_plls in the .initdata section. The __initdata
+    marker can be virtually anywhere on the line, except right after
+    "struct". The preferred location is before the "=" sign if there is
+    one, or before the trailing ";" otherwise.
+
+    See: https://lore.kernel.org/lkml/1377655732.3619.19.camel@joe-AO722/
+
   **MULTISTATEMENT_MACRO_USE_DO_WHILE**
     Macros with multiple statements should be enclosed in a
     do - while block.  Same should also be the case for macros
@@ -541,6 +597,10 @@ Macros, Attributes and Symbols
 
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#macros-enums-and-rtl
 
+  **PREFER_FALLTHROUGH**
+    Use the `fallthrough;` pseudo keyword instead of
+    `/* fallthrough */` like comments.
+
   **WEAK_DECLARATION**
     Using weak declarations like __attribute__((weak)) or __weak
     can have unintended link defects.  Avoid using them.
@@ -551,6 +611,7 @@ Functions and Variables
 
   **CAMELCASE**
     Avoid CamelCase Identifiers.
+
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#naming
 
   **FUNCTION_WITHOUT_ARGS**
@@ -583,6 +644,27 @@ Functions and Variables
       return bar;
 
 
+Permissions
+-----------
+
+  **EXECUTE_PERMISSIONS**
+    There is no reason for source files to be executable.  The executable
+    bit can be removed safely.
+
+  **EXPORTED_WORLD_WRITABLE**
+    Exporting world writable sysfs/debugfs files is usually a bad thing.
+    When done arbitrarily they can introduce serious security bugs.
+    In the past, some of the debugfs vulnerabilities would seemingly allow
+    any local user to write arbitrary values into device registers - a
+    situation from which little good can be expected to emerge.
+
+    See: https://lore.kernel.org/linux-arm-kernel/cover.1296818921.git.segoon@openwall.com/
+
+  **NON_OCTAL_PERMISSIONS**
+    Permission bits should use 4 digit octal permissions (like 0700 or 0444).
+    Avoid using any other base like decimal.
+
+
 Spacing and Brackets
 --------------------
 
@@ -616,7 +698,7 @@ Spacing and Brackets
 
     1. With a type on the left::
 
-        ;int [] a;
+        int [] a;
 
     2. At the beginning of a line for slice initialisers::
 
@@ -630,6 +712,7 @@ Spacing and Brackets
     Code indent should use tabs instead of spaces.
     Outside of comments, documentation and Kconfig,
     spaces are never used for indentation.
+
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#indentation
 
   **CONCATENATED_STRING**
@@ -644,17 +727,20 @@ Spacing and Brackets
 
   **ELSE_AFTER_BRACE**
     `else {` should follow the closing block `}` on the same line.
+
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces
 
   **LINE_SPACING**
     Vertical space is wasted given the limited number of lines an
     editor window can display when multiple blank lines are used.
+
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#spaces
 
   **OPEN_BRACE**
     The opening brace should be following the function definitions on the
     next line.  For any non-functional block it should be on the same line
     as the last construct.
+
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces
 
   **POINTER_LOCATION**
@@ -671,6 +757,7 @@ Spacing and Brackets
 
   **SPACING**
     Whitespace style used in the kernel sources is described in kernel docs.
+
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#spaces
 
   **SWITCH_CASE_INDENT_LEVEL**
@@ -700,8 +787,40 @@ Spacing and Brackets
     Trailing whitespace should always be removed.
     Some editors highlight the trailing whitespace and cause visual
     distractions when editing files.
+
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#spaces
 
+  **UNNECESSARY_PARENTHESES**
+    Parentheses are not required in the following cases::
+
+      1. Function pointer uses::
+
+          (foo->bar)();
+
+        could be::
+
+          foo->bar();
+
+      2. Comparisons in if::
+
+          if ((foo->bar) && (foo->baz))
+          if ((foo == bar))
+
+        could be::
+
+          if (foo->bar && foo->baz)
+          if (foo == bar)
+
+      3. addressof/dereference single Lvalues::
+
+          &(foo->bar)
+          *(foo->bar)
+
+        could be::
+
+          &foo->bar
+          *foo->bar
+
   **WHILE_AFTER_BRACE**
     while should follow the closing bracket on the same line::
 
@@ -727,13 +846,40 @@ Others
     For DOS-formatted patches, there are extra ^M symbols at the end of
     the line.  These should be removed.
 
-  **EXECUTE_PERMISSIONS**
-    There is no reason for source files to be executable.  The executable
-    bit can be removed safely.
+  **FSF_MAILING_ADDRESS**
+    Kernel maintainers reject new instances of the GPL boilerplate paragraph
+    directing people to write to the FSF for a copy of the GPL, since the
+    FSF has moved in the past and may do so again.
+    So do not write paragraphs about writing to the Free Software Foundation's
+    mailing address.
 
-  **NON_OCTAL_PERMISSIONS**
-    Permission bits should use 4 digit octal permissions (like 0700 or 0444).
-    Avoid using any other base like decimal.
+    See: https://lore.kernel.org/lkml/20131006222342.GT19510@leaf/
+
+  **LONG_LINE**
+    The line has exceeded the specified maximum length. Consider refactoring
+    it.
+    To use a different maximum line length, the --max-line-length=n option
+    may be added while invoking checkpatch.
+
+    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+
+  **LONG_LINE_STRING**
+    A string starts before but extends beyond the maximum line length.
+    To use a different maximum line length, the --max-line-length=n option
+    may be added while invoking checkpatch.
+
+    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+
+  **LONG_LINE_COMMENT**
+    A comment starts before but extends beyond the maximum line length.
+    To use a different maximum line length, the --max-line-length=n option
+    may be added while invoking checkpatch.
+
+    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+
+  **MEMSET**
+    The memset use appears to be incorrect.  This may be caused due to
+    badly ordered parameters.  Please recheck the usage.
 
   **NOT_UNIFIED_DIFF**
     The patch file does not appear to be in unified-diff format.  Please
@@ -742,6 +888,13 @@ Others
   **PRINTF_0XDECIMAL**
     Prefixing 0x with decimal output is defective and should be corrected.
 
+  **SPDX_LICENSE_TAG**
+    The source file is missing or has an improper SPDX identifier tag.
+    The Linux kernel requires the precise SPDX identifier in all source files,
+    and it is thoroughly documented in the kernel docs.
+
+    See: https://www.kernel.org/doc/html/latest/process/license-rules.html
+
   **TRAILING_STATEMENTS**
     Trailing statements (for example after any conditional) should be
     on the next line.
@@ -753,3 +906,6 @@ Others
 
       if (x == y)
               break;
+
+  **TYPO_SPELLING**
+    Some words may have been misspelled.  Consider reviewing them.
-- 
GitLab


From fa5b8fef20b12bc7135e69a1b3689b1d677534de Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Wed, 12 May 2021 16:20:57 +0800
Subject: [PATCH 1220/3804] docs/zh_CN: add parisc index translation

This patch translates Documentation/parisc/index.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/c13ce34b11a65e1f18c4e37566509ead82f2c15f.1620805100.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/translations/zh_CN/index.rst    |  2 +-
 .../translations/zh_CN/parisc/index.rst       | 24 +++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/parisc/index.rst

diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index 95c3f313cea1a..a736057da41f3 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -153,6 +153,7 @@ TODOList:
    arm64/index
    riscv/index
    openrisc/index
+   parisc/index
 
 TODOList:
 
@@ -160,7 +161,6 @@ TODOList:
 * ia64/index
 * m68k/index
 * nios2/index
-* parisc/index
 * powerpc/index
 * s390/index
 * sh/index
diff --git a/Documentation/translations/zh_CN/parisc/index.rst b/Documentation/translations/zh_CN/parisc/index.rst
new file mode 100644
index 0000000000000..ef232d46b1baf
--- /dev/null
+++ b/Documentation/translations/zh_CN/parisc/index.rst
@@ -0,0 +1,24 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/parisc/index.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_parisc_index:
+
+====================
+PA-RISC体系架构
+====================
+
+Todolist:
+
+   debugging
+   registers
+   features
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
-- 
GitLab


From b24247ded3e3616d225769bdeaf8782c244c80ee Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Wed, 12 May 2021 16:20:58 +0800
Subject: [PATCH 1221/3804] docs/zh_CN: add parisc debugging.rst translation

This patch translates Documentation/parisc/debugging.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Link: https://lore.kernel.org/r/32661b39374f012442b760444ef149afcc0d22af.1620805100.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/parisc/debugging.rst   | 42 +++++++++++++++++++
 .../translations/zh_CN/parisc/index.rst       |  6 ++-
 2 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/parisc/debugging.rst

diff --git a/Documentation/translations/zh_CN/parisc/debugging.rst b/Documentation/translations/zh_CN/parisc/debugging.rst
new file mode 100644
index 0000000000000..c21beb986e15b
--- /dev/null
+++ b/Documentation/translations/zh_CN/parisc/debugging.rst
@@ -0,0 +1,42 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/parisc/debugging.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_parisc_debugging:
+
+=================
+调试PA-RISC
+=================
+
+好吧，这里有一些关于调试linux/parisc的较底层部分的信息。
+
+
+1. 绝对地址
+=====================
+
+很多汇编代码目前运行在实模式下，这意味着会使用绝对地址，而不是像内核其他
+部分那样使用虚拟地址。要将绝对地址转换为虚拟地址，你可以在System.map中查
+找，添加__PAGE_OFFSET（目前是0x10000000）。
+
+
+2. HPMCs
+========
+
+当实模式的代码试图访问不存在的内存时，会出现HPMC（high priority machine
+check）而不是内核oops。若要调试HPMC，请尝试找到系统响应程序/请求程序地址。
+系统请求程序地址应该与（某）处理器的HPA（I/O范围内的高地址）相匹配；系统响应程
+序地址是实模式代码试图访问的地址。
+
+系统响应程序地址的典型值是大于__PAGE_OFFSET （0x10000000）的地址，这意味着
+在实模式试图访问它之前，虚拟地址没有被翻译成物理地址。
+
+
+3. 有趣的Q位
+============
+
+某些非常关键的代码必须清除PSW中的Q位。当Q位被清除时，CPU不会更新中断处理
+程序所读取的寄存器，以找出机器被中断的位置——所以如果你在清除Q位的指令和再
+次设置Q位的RFI之间遇到中断，你不知道它到底发生在哪里。如果你幸运的话，IAOQ
+会指向清除Q位的指令，如果你不幸运的话，它会指向任何地方。通常Q位的问题会
+表现为无法解释的系统挂起或物理内存越界。
diff --git a/Documentation/translations/zh_CN/parisc/index.rst b/Documentation/translations/zh_CN/parisc/index.rst
index ef232d46b1baf..b913d664e7350 100644
--- a/Documentation/translations/zh_CN/parisc/index.rst
+++ b/Documentation/translations/zh_CN/parisc/index.rst
@@ -10,9 +10,13 @@
 PA-RISC体系架构
 ====================
 
-Todolist:
+.. toctree::
+   :maxdepth: 2
 
    debugging
+
+Todolist:
+
    registers
    features
 
-- 
GitLab


From 5fb82175a2aeb44d174ec60d264e54cb6e1e55f4 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Wed, 12 May 2021 16:20:59 +0800
Subject: [PATCH 1222/3804] docs/zh_CN: add parisc registers.rst translation

This patch translates Documentation/parisc/registers.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/b8375b5cd2c5163691691fe4757511ce984f3b83.1620805100.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/parisc/index.rst       |   2 +-
 .../translations/zh_CN/parisc/registers.rst   | 153 ++++++++++++++++++
 2 files changed, 154 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/parisc/registers.rst

diff --git a/Documentation/translations/zh_CN/parisc/index.rst b/Documentation/translations/zh_CN/parisc/index.rst
index b913d664e7350..a47454ebe32e6 100644
--- a/Documentation/translations/zh_CN/parisc/index.rst
+++ b/Documentation/translations/zh_CN/parisc/index.rst
@@ -14,10 +14,10 @@ PA-RISC体系架构
    :maxdepth: 2
 
    debugging
+   registers
 
 Todolist:
 
-   registers
    features
 
 .. only::  subproject and html
diff --git a/Documentation/translations/zh_CN/parisc/registers.rst b/Documentation/translations/zh_CN/parisc/registers.rst
new file mode 100644
index 0000000000000..71e2404cd1030
--- /dev/null
+++ b/Documentation/translations/zh_CN/parisc/registers.rst
@@ -0,0 +1,153 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/parisc/registers.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_parisc_registers:
+
+=========================
+Linux/PA-RISC的寄存器用法
+=========================
+
+[ 用星号表示目前尚未实现的计划用途。 ]
+
+ABI约定的通用寄存器
+===================
+
+控制寄存器
+----------
+
+============================    =================================
+CR 0 (恢复计数器)               用于ptrace
+CR 1-CR 7(无定义)               未使用
+CR 8 (Protection ID)            每进程值*
+CR 9, 12, 13 (PIDS)             未使用
+CR10 (CCR)                      FPU延迟保存*
+CR11                            按照ABI的规定（SAR）
+CR14 (中断向量)                 初始化为 fault_vector
+CR15 (EIEM)                     所有位初始化为1*
+CR16 (间隔计时器)               读取周期数/写入开始时间间隔计时器
+CR17-CR22                       中断参数
+CR19                            中断指令寄存器
+CR20                            中断空间寄存器
+CR21                            中断偏移量寄存器
+CR22                            中断 PSW
+CR23 (EIRR)                     读取未决中断/写入清除位
+CR24 (TR 0)                     内核空间页目录指针
+CR25 (TR 1)                     用户空间页目录指针
+CR26 (TR 2)                     不使用
+CR27 (TR 3)                     线程描述符指针
+CR28 (TR 4)                     不使用
+CR29 (TR 5)                     不使用
+CR30 (TR 6)                     当前 / 0
+CR31 (TR 7)                     临时寄存器，在不同地方使用
+============================    =================================
+
+空间寄存器（内核模式）
+----------------------
+
+========                        ==============================
+SR0                             临时空间寄存器
+SR4-SR7                         设置为0
+SR1                             临时空间寄存器
+SR2                             内核不应该破坏它
+SR3                             用于用户空间访问（当前进程）
+========                        ==============================
+
+空间寄存器（用户模式）
+----------------------
+
+========                        ============================
+SR0                             临时空间寄存器
+SR1                             临时空间寄存器
+SR2                             保存Linux gateway page的空间
+SR3                             在内核中保存用户地址空间的值
+SR4-SR7                         定义了用户/内核的短地址空间
+========                        ============================
+
+
+处理器状态字
+------------
+
+======================          ================================================
+W （64位地址）                  0
+E （小尾端）                    0
+S （安全间隔计时器）            0
+T （产生分支陷阱）              0
+H （高特权级陷阱）              0
+L （低特权级陷阱）              0
+N （撤销下一条指令）            被C代码使用
+X （数据存储中断禁用）          0
+B （产生分支）                  被C代码使用
+C （代码地址转译）              1, 在执行实模式代码时为0
+V （除法步长校正）              被C代码使用
+M （HPMC 掩码）                 0, 在执行HPMC操作*时为1
+C/B （进/借 位）                被C代码使用
+O （有序引用）                  1*
+F （性能监视器）                0
+R （回收计数器陷阱）            0
+Q （收集中断状态）              1 （在rfi之前的代码中为0）
+P （保护标识符）                1*
+D （数据地址转译）              1, 在执行实模式代码时为0
+I （外部中断掩码）              由cli()/sti()宏使用。
+======================          ================================================
+
+“隐形”寄存器（影子寄存器）
+---------------------------
+
+=============                   ===================
+PSW W 默认值                    0
+PSW E 默认值                    0
+影子寄存器                      被中断处理代码使用
+TOC启用位                       1
+=============                   ===================
+
+----------------------------------------------------------
+
+PA-RISC架构定义了7个寄存器作为“影子寄存器”。这些寄存器在
+RETURN FROM INTERRUPTION AND RESTORE指令中使用，通过消
+除中断处理程序中对一般寄存器（GR）的保存和恢复的需要来减
+少状态保存和恢复时间。影子寄存器是GRs 1, 8, 9, 16, 17,
+24和25。
+
+-------------------------------------------------------------------------
+
+寄存器使用说明，最初由John Marvin提供，并由Randolph Chung提供一些补充说明。
+
+对于通用寄存器:
+
+r1,r2,r19-r26,r28,r29 & r31可以在不保存它们的情况下被使用。当然，如果你
+关心它们，在调用另一个程序之前，你也需要保存它们。上面的一些寄存器确实
+有特殊的含义，你应该注意一下:
+
+    r1:
+       addil指令是硬性规定将其结果放在r1中，所以如果你使用这条指令要
+       注意这点。
+
+    r2:
+       这就是返回指针。一般来说，你不想使用它，因为你需要这个指针来返
+       回给你的调用者。然而，它与这组寄存器组合在一起，因为调用者不能
+       依赖你返回时的值是相同的，也就是说，你可以将r2复制到另一个寄存
+       器，并在作废r2后通过该寄存器返回，这应该不会给调用程序带来问题。
+
+    r19-r22:
+       这些通常被认为是临时寄存器。
+       请注意，在64位中它们是arg7-arg4。
+
+    r23-r26:
+       这些是arg3-arg0，也就是说，如果你不再关心传入的值，你可以使用
+       它们。
+
+    r28,r29:
+       这俩是ret0和ret1。它们是你传入返回值的地方。r28是主返回值。当返回
+       小结构体时，r29也可以用来将数据传回给调用程序。
+
+    r30:
+       栈指针
+
+    r31:
+       ble指令将返回指针放在这里。
+
+
+    r3-r18,r27,r30需要被保存和恢复。r3-r18只是一般用途的寄存器。
+    r27是数据指针，用来使对全局变量的引用更容易。r30是栈指针。
-- 
GitLab


From 2bc602cb0e0d92afec57967cf2212b66cba42ea5 Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Thu, 20 May 2021 15:42:25 +0800
Subject: [PATCH 1223/3804] docs: block: blk-mq.rst: correct drive -> driver

It is 'driver' to complete the request. Also remove a redundant space.

Signed-off-by: Yue Hu <huyue2@yulong.com>
Link: https://lore.kernel.org/r/20210520074225.1989-1-zbestahu@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/block/blk-mq.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/block/blk-mq.rst b/Documentation/block/blk-mq.rst
index a980d23af48c2..d96118c739541 100644
--- a/Documentation/block/blk-mq.rst
+++ b/Documentation/block/blk-mq.rst
@@ -62,7 +62,7 @@ queue, to be sent in the future, when the hardware is able.
 Software staging queues
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-The block IO subsystem adds requests  in the software staging queues
+The block IO subsystem adds requests in the software staging queues
 (represented by struct blk_mq_ctx) in case that they weren't sent
 directly to the driver. A request is one or more BIOs. They arrived at the
 block layer through the data structure struct bio. The block layer
@@ -132,7 +132,7 @@ In order to indicate which request has been completed, every request is
 identified by an integer, ranging from 0 to the dispatch queue size. This tag
 is generated by the block layer and later reused by the device driver, removing
 the need to create a redundant identifier. When a request is completed in the
-drive, the tag is sent back to the block layer to notify it of the finalization.
+driver, the tag is sent back to the block layer to notify it of the finalization.
 This removes the need to do a linear search to find out which IO has been
 completed.
 
-- 
GitLab


From 0e7c52da1ab82338fc91021cc34e8f2fdaf73de4 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Thu, 20 May 2021 19:09:49 +0930
Subject: [PATCH 1224/3804] Documentation: checkpatch: Tweak BIT() macro
 include

While include/linux/bitops.h brings in the BIT() macro, it was moved to
include/linux/bits.h in commit 8bd9cb51daac ("locking/atomics, asm-generic:
Move some macros from <linux/bitops.h> to a new <linux/bits.h> file").

Since that commit BIT() has moved again into include/vdso/bits.h via
commit 3945ff37d2f4 ("linux/bits.h: Extract common header for vDSO").

I think the move to the vDSO header can be considered an implementation
detail, so for now update the checkpatch documentation to recommend use
of include/linux/bits.h.

Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Acked-by: Jiri Slaby <jirislaby@kernel.org>
Acked-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20210520093949.511471-1-andrew@aj.id.au
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dev-tools/checkpatch.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst
index e409f27f48b6f..87b859f321de4 100644
--- a/Documentation/dev-tools/checkpatch.rst
+++ b/Documentation/dev-tools/checkpatch.rst
@@ -512,7 +512,7 @@ Macros, Attributes and Symbols
 
   **BIT_MACRO**
     Defines like: 1 << <digit> could be BIT(digit).
-    The BIT() macro is defined in include/linux/bitops.h::
+    The BIT() macro is defined via include/linux/bits.h::
 
       #define BIT(nr)         (1UL << (nr))
 
-- 
GitLab


From 1a0b713c73688c6bafbe6faf8c90390b11b26fc6 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Tue, 18 May 2021 09:01:47 +0800
Subject: [PATCH 1225/3804] drm/amd/pm: correct MGpuFanBoost setting

No MGpuFanBoost setting for those ASICs which do not support it.
Otherwise, it may breaks their fan control feature.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1580

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c        |  9 +++++++++
 .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c    | 10 ++++++++++
 2 files changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index ac13042672ea1..0eaf86b5e6986 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -2925,6 +2925,8 @@ static ssize_t navi1x_get_gpu_metrics(struct smu_context *smu,
 
 static int navi10_enable_mgpu_fan_boost(struct smu_context *smu)
 {
+	struct smu_table_context *table_context = &smu->smu_table;
+	PPTable_t *smc_pptable = table_context->driver_pptable;
 	struct amdgpu_device *adev = smu->adev;
 	uint32_t param = 0;
 
@@ -2932,6 +2934,13 @@ static int navi10_enable_mgpu_fan_boost(struct smu_context *smu)
 	if (adev->asic_type == CHIP_NAVI12)
 		return 0;
 
+	/*
+	 * Skip the MGpuFanBoost setting for those ASICs
+	 * which do not support it
+	 */
+	if (!smc_pptable->MGpuFanBoostLimitRpm)
+		return 0;
+
 	/* Workaround for WS SKU */
 	if (adev->pdev->device == 0x7312 &&
 	    adev->pdev->revision == 0)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index d2fd44b903ca4..b124a5e40dd6a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -3027,6 +3027,16 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
 
 static int sienna_cichlid_enable_mgpu_fan_boost(struct smu_context *smu)
 {
+	struct smu_table_context *table_context = &smu->smu_table;
+	PPTable_t *smc_pptable = table_context->driver_pptable;
+
+	/*
+	 * Skip the MGpuFanBoost setting for those ASICs
+	 * which do not support it
+	 */
+	if (!smc_pptable->MGpuFanBoostLimitRpm)
+		return 0;
+
 	return smu_cmn_send_smc_msg_with_param(smu,
 					       SMU_MSG_SetMGpuFanBoostLimitRpm,
 					       0,
-- 
GitLab


From 98e48cd9283dbac0e1445ee780889f10b3d1db6a Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 20 May 2021 01:12:23 +0300
Subject: [PATCH 1226/3804] regulator: core: resolve supply for
 boot-on/always-on regulators

For the boot-on/always-on regulators the set_machine_constrainst() is
called before resolving rdev->supply. Thus the code would try to enable
rdev before enabling supplying regulator. Enforce resolving supply
regulator before enabling rdev.

Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20210519221224.2868496-1-dmitry.baryshkov@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f192bf19492ed..e20e77e4c159d 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1425,6 +1425,12 @@ static int set_machine_constraints(struct regulator_dev *rdev)
 	 * and we have control then make sure it is enabled.
 	 */
 	if (rdev->constraints->always_on || rdev->constraints->boot_on) {
+		/* If we want to enable this regulator, make sure that we know
+		 * the supplying regulator.
+		 */
+		if (rdev->supply_name && !rdev->supply)
+			return -EPROBE_DEFER;
+
 		if (rdev->supply) {
 			ret = regulator_enable(rdev->supply);
 			if (ret < 0) {
-- 
GitLab


From ba515a5821dc0d101ded0379b14b1d1471ebfaba Mon Sep 17 00:00:00 2001
From: Kevin Wang <kevin1.wang@amd.com>
Date: Wed, 19 May 2021 11:03:11 +0800
Subject: [PATCH 1227/3804] drm/amdkfd: correct sienna_cichlid SDMA RLC
 register offset error

1.correct KFD SDMA RLC queue register offset error.
(all sdma rlc register offset is base on SDMA0.RLC0_RLC0_RB_CNTL)
2.HQD_N_REGS (19+6+7+12)
  12: the 2 more resgisters than navi1x (SDMAx_RLCy_MIDCMD_DATA{9,10})

the patch also can be fixed NULL pointer issue when read
/sys/kernel/debug/kfd/hqds on sienna_cichlid chip.

Signed-off-by: Kevin Wang <kevin1.wang@amd.com>
Reviewed-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
index fad3b91f74f54..d39cff4a1fe38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
@@ -156,16 +156,16 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
 				mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
 		break;
 	case 1:
-		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
 				mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
 		break;
 	case 2:
-		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
-				mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA2_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
 		break;
 	case 3:
-		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
-				mmSDMA3_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+		sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+				mmSDMA3_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
 		break;
 	}
 
@@ -450,7 +450,7 @@ static int hqd_sdma_dump_v10_3(struct kgd_dev *kgd,
 			engine_id, queue_id);
 	uint32_t i = 0, reg;
 #undef HQD_N_REGS
-#define HQD_N_REGS (19+6+7+10)
+#define HQD_N_REGS (19+6+7+12)
 
 	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
 	if (*dump == NULL)
-- 
GitLab


From b95f045ea35673572ef46d6483ad8bd6d353d63c Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Tue, 18 May 2021 10:58:22 -0400
Subject: [PATCH 1228/3804] drm/amdgpu/vcn1: add cancel_delayed_work_sync
 before power gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 0c1beefa3e498..27b1ced145d2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -231,9 +231,13 @@ static int vcn_v1_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
 	if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
-		RREG32_SOC15(VCN, 0, mmUVD_STATUS))
+		(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+		 RREG32_SOC15(VCN, 0, mmUVD_STATUS))) {
 		vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
+	}
 
 	return 0;
 }
-- 
GitLab


From 0c6013377b4027e69d8f3e63b6bf556b6cb87802 Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Wed, 19 May 2021 11:26:32 -0400
Subject: [PATCH 1229/3804] drm/amdgpu/vcn2.0: add cancel_delayed_work_sync
 before power gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index 116b9643d5bab..8af567c546dbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -262,6 +262,8 @@ static int vcn_v2_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
 	if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
 	    (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
 	      RREG32_SOC15(VCN, 0, mmUVD_STATUS)))
-- 
GitLab


From 2fb536ea42d557f39f70c755f68e1aa1ad466c55 Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Wed, 19 May 2021 11:40:39 -0400
Subject: [PATCH 1230/3804] drm/amdgpu/vcn2.5: add cancel_delayed_work_sync
 before power gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 948813d7caa02..888b17d84691c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -321,6 +321,8 @@ static int vcn_v2_5_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int i;
 
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
-- 
GitLab


From 4a62542ae064e3b645d6bbf2295a6c05136956c6 Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Mon, 17 May 2021 16:39:17 -0400
Subject: [PATCH 1231/3804] drm/amdgpu/vcn3: add cancel_delayed_work_sync
 before power gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 14470da521138..3b23de996db22 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -372,15 +372,14 @@ done:
 static int vcn_v3_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring;
 	int i;
 
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
 	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
 		if (adev->vcn.harvest_config & (1 << i))
 			continue;
 
-		ring = &adev->vcn.inst[i].ring_dec;
-
 		if (!amdgpu_sriov_vf(adev)) {
 			if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
 					(adev->vcn.cur_state != AMD_PG_STATE_GATE &&
-- 
GitLab


From ff48f6dbf0ff896c98d167a67a5b975fb034356b Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Wed, 19 May 2021 11:42:48 -0400
Subject: [PATCH 1232/3804] drm/amdgpu/jpeg2.0: add cancel_delayed_work_sync
 before power gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index de5abceced0dd..85967a5570cb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -172,6 +172,8 @@ static int jpeg_v2_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
 	if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
 	      RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
 		jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
-- 
GitLab


From 23f10a571da5eaa63b7845d16e2f49837e841ab9 Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Wed, 19 May 2021 12:04:38 -0400
Subject: [PATCH 1233/3804] drm/amdgpu/jpeg2.5: add cancel_delayed_work_sync
 before power gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 938ef4ce5b760..46096ad7f0d91 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -187,14 +187,14 @@ static int jpeg_v2_5_hw_init(void *handle)
 static int jpeg_v2_5_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring;
 	int i;
 
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
 	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
 		if (adev->jpeg.harvest_config & (1 << i))
 			continue;
 
-		ring = &adev->jpeg.inst[i].ring_dec;
 		if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
 		      RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS))
 			jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
-- 
GitLab


From 20ebbfd22f8115a1e4f60d3d289f66be4d47f1ec Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Wed, 19 May 2021 12:08:20 -0400
Subject: [PATCH 1234/3804] drm/amdgpu/jpeg3: add cancel_delayed_work_sync
 before power gate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add cancel_delayed_work_sync before set power gating state
to avoid race condition issue when power gating.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index 94be35357f7d6..bd77794315bc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -159,9 +159,9 @@ static int jpeg_v3_0_hw_init(void *handle)
 static int jpeg_v3_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	struct amdgpu_ring *ring;
 
-	ring = &adev->jpeg.inst->ring_dec;
+	cancel_delayed_work_sync(&adev->vcn.idle_work);
+
 	if (adev->jpeg.cur_state != AMD_PG_STATE_GATE &&
 	      RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS))
 		jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
-- 
GitLab


From 6bdacdb48e94ff26c03c6eeeef48c03c5e2f7dd4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 12 May 2021 20:57:14 +0200
Subject: [PATCH 1235/3804] bpf: Fix BPF_JIT kconfig symbol dependency

Randy reported a randconfig build error recently on i386:

  ld: arch/x86/net/bpf_jit_comp32.o: in function `do_jit':
  bpf_jit_comp32.c:(.text+0x28c9): undefined reference to `__bpf_call_base'
  ld: arch/x86/net/bpf_jit_comp32.o: in function `bpf_int_jit_compile':
  bpf_jit_comp32.c:(.text+0x3694): undefined reference to `bpf_jit_blind_constants'
  ld: bpf_jit_comp32.c:(.text+0x3719): undefined reference to `bpf_jit_binary_free'
  ld: bpf_jit_comp32.c:(.text+0x3745): undefined reference to `bpf_jit_binary_alloc'
  ld: bpf_jit_comp32.c:(.text+0x37d3): undefined reference to `bpf_jit_prog_release_other'
  [...]

The cause was that b24abcff918a ("bpf, kconfig: Add consolidated menu entry for
bpf with core options") moved BPF_JIT from net/Kconfig into kernel/bpf/Kconfig
and previously BPF_JIT was guarded by a 'if NET'. However, there is no actual
dependency on NET, it's just that menuconfig NET selects BPF. And the latter in
turn causes kernel/bpf/core.o to be built which contains above symbols. Randy's
randconfig didn't have NET set, and BPF wasn't either, but BPF_JIT otoh was.
Detangle this by making BPF_JIT depend on BPF instead. arm64 was the only arch
that pulled in its JIT in net/ via obj-$(CONFIG_NET), all others unconditionally
pull this dir in via obj-y. Do the same since CONFIG_NET guard there is really
useless as we compiled the JIT via obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o anyway.

Fixes: b24abcff918a ("bpf, kconfig: Add consolidated menu entry for bpf with core options")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
---
 arch/arm64/Kbuild  | 3 +--
 kernel/bpf/Kconfig | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild
index d6465823b2810..7b393cfec0716 100644
--- a/arch/arm64/Kbuild
+++ b/arch/arm64/Kbuild
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-y			+= kernel/ mm/
-obj-$(CONFIG_NET)	+= net/
+obj-y			+= kernel/ mm/ net/
 obj-$(CONFIG_KVM)	+= kvm/
 obj-$(CONFIG_XEN)	+= xen/
 obj-$(CONFIG_CRYPTO)	+= crypto/
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 26b591e23f16a..bd04f4a44c01d 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -37,6 +37,7 @@ config BPF_SYSCALL
 
 config BPF_JIT
 	bool "Enable BPF Just In Time compiler"
+	depends on BPF
 	depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
 	depends on MODULES
 	help
-- 
GitLab


From 8f1634b82189e715b0f82f16ce54fab43cfedd8a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 14 May 2021 10:05:28 -0700
Subject: [PATCH 1236/3804] selftests/bpf: Convert static to global in
 tc_redirect progs

Both IFINDEX_SRC and IFINDEX_DST are set from the userspace
and it won't work once bpf merges with bpf-next.

Fixes: 096eccdef0b3 ("selftests/bpf: Rewrite test_tc_redirect.sh as prog_tests/tc_redirect.c")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210514170528.3750250-1-sdf@google.com
---
 tools/testing/selftests/bpf/progs/test_tc_neigh.c | 4 ++--
 tools/testing/selftests/bpf/progs/test_tc_peer.c  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index 90f64a85998fa..0c93d326a663f 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -33,8 +33,8 @@
 				 a.s6_addr32[3] == b.s6_addr32[3])
 #endif
 
-static volatile const __u32 IFINDEX_SRC;
-static volatile const __u32 IFINDEX_DST;
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
 
 static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
 					    __be32 addr)
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index 72c72950c3bbe..ef264bced0e65 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -8,8 +8,8 @@
 
 #include <bpf/bpf_helpers.h>
 
-static volatile const __u32 IFINDEX_SRC;
-static volatile const __u32 IFINDEX_DST;
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
 
 SEC("classifier/chk_egress")
 int tc_chk(struct __sk_buff *skb)
-- 
GitLab


From 704e2beba23c45eaa056b1c03b5e1fb221e03f80 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 14 May 2021 11:07:26 -0700
Subject: [PATCH 1237/3804] selftests/bpf: Test ringbuf mmap read-only and
 read-write restrictions

Extend ringbuf selftest to validate read/write and read-only restrictions on
memory mapping consumer/producer/data pages. Ensure no "escalations" from
PROT_READ to PROT_WRITE/PROT_EXEC is allowed. And test that mremap() fails to
expand mmap()'ed area.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210514180726.843157-1-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/ringbuf.c        | 49 ++++++++++++++++++-
 1 file changed, 48 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index de78617f65501..f9a8ae331963d 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -86,8 +86,9 @@ void test_ringbuf(void)
 	const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
 	pthread_t thread;
 	long bg_ret = -1;
-	int err, cnt;
+	int err, cnt, rb_fd;
 	int page_size = getpagesize();
+	void *mmap_ptr, *tmp_ptr;
 
 	skel = test_ringbuf__open();
 	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
@@ -101,6 +102,52 @@ void test_ringbuf(void)
 	if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
 		goto cleanup;
 
+	rb_fd = bpf_map__fd(skel->maps.ringbuf);
+	/* good read/write cons_pos */
+	mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+	ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");
+	tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE);
+	if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend"))
+		goto cleanup;
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
+	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+	/* bad writeable prod_pos */
+	mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size);
+	err = -errno;
+	ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos");
+	ASSERT_EQ(err, -EPERM, "wr_prod_pos_err");
+
+	/* bad writeable data pages */
+	mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+	err = -errno;
+	ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one");
+	ASSERT_EQ(err, -EPERM, "wr_data_page_one_err");
+	mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size);
+	ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two");
+	mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+	ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all");
+
+	/* good read-only pages */
+	mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
+	if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+		goto cleanup;
+
+	ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect");
+	ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect");
+	ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap");
+	ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro");
+
+	/* good read-only pages with initial offset */
+	mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size);
+	if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+		goto cleanup;
+
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect");
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect");
+	ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap");
+	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro");
+
 	/* only trigger BPF program for current process */
 	skel->bss->pid = getpid();
 
-- 
GitLab


From 8afcc19fbf083a8459284d9a29b4b5ac1cb2396c Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 17 May 2021 11:28:29 +0200
Subject: [PATCH 1238/3804] bpf: Clarify a bpf_bprintf_prepare macro

The per-cpu buffers contain bprintf data rather than printf arguments.
The macro name and comment were a bit confusing, this rewords them in a
clearer way.

Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20210517092830.1026418-1-revest@chromium.org
---
 kernel/bpf/helpers.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index ef658a9ea5c93..3a5ab614cbb03 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -692,13 +692,14 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 	return -EINVAL;
 }
 
-/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
+/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
+ * arguments representation.
  */
-#define MAX_PRINTF_BUF_LEN	512
+#define MAX_BPRINTF_BUF_LEN	512
 
 /* Support executing three nested bprintf helper calls on a given CPU */
 struct bpf_bprintf_buffers {
-	char tmp_bufs[3][MAX_PRINTF_BUF_LEN];
+	char tmp_bufs[3][MAX_BPRINTF_BUF_LEN];
 };
 static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
@@ -761,7 +762,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
 		if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
 			return -EBUSY;
 
-		tmp_buf_end = tmp_buf + MAX_PRINTF_BUF_LEN;
+		tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
 		*bin_args = (u32 *)tmp_buf;
 	}
 
-- 
GitLab


From 0af02eb2a7d76ca85a1ecaf4b3775e2c86408fab Mon Sep 17 00:00:00 2001
From: Florent Revest <revest@chromium.org>
Date: Mon, 17 May 2021 11:28:30 +0200
Subject: [PATCH 1239/3804] bpf: Avoid using ARRAY_SIZE on an uninitialized
 pointer

The cppcheck static code analysis reported the following error:

    if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
                                             ^
ARRAY_SIZE is a macro that expands to sizeofs, so bufs is not actually
dereferenced at runtime, and the code is actually safe. But to keep
things tidy, this patch removes the need for a call to ARRAY_SIZE by
extracting the size of the array into a macro. Cppcheck should no longer
be confused and the code ends up being a bit cleaner.

Fixes: e2d5b2bb769f ("bpf: Fix nested bpf_bprintf_prepare with more per-cpu buffers")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Florent Revest <revest@chromium.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20210517092830.1026418-2-revest@chromium.org
---
 kernel/bpf/helpers.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 3a5ab614cbb03..73443498d88fc 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -698,8 +698,9 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 #define MAX_BPRINTF_BUF_LEN	512
 
 /* Support executing three nested bprintf helper calls on a given CPU */
+#define MAX_BPRINTF_NEST_LEVEL	3
 struct bpf_bprintf_buffers {
-	char tmp_bufs[3][MAX_BPRINTF_BUF_LEN];
+	char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
 };
 static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
 static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
@@ -711,7 +712,7 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
 
 	preempt_disable();
 	nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
-	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
+	if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
 		this_cpu_dec(bpf_bprintf_nest_level);
 		preempt_enable();
 		return -EBUSY;
-- 
GitLab


From ceb11679d9fcf3fdb358a310a38760fcbe9b63ed Mon Sep 17 00:00:00 2001
From: Yinjun Zhang <yinjun.zhang@corigine.com>
Date: Thu, 20 May 2021 10:58:34 +0200
Subject: [PATCH 1240/3804] bpf, offload: Reorder offload callback 'prepare' in
 verifier

Commit 4976b718c355 ("bpf: Introduce pseudo_btf_id") switched the
order of resolve_pseudo_ldimm(), in which some pseudo instructions
are rewritten. Thus those rewritten instructions cannot be passed
to driver via 'prepare' offload callback.

Reorder the 'prepare' offload callback to fix it.

Fixes: 4976b718c355 ("bpf: Introduce pseudo_btf_id")
Signed-off-by: Yinjun Zhang <yinjun.zhang@corigine.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20210520085834.15023-1-simon.horman@netronome.com
---
 kernel/bpf/verifier.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c58598ef4b5b5..09849e43f0352 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13368,12 +13368,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (is_priv)
 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
 
-	if (bpf_prog_is_dev_bound(env->prog->aux)) {
-		ret = bpf_prog_offload_verifier_prep(env->prog);
-		if (ret)
-			goto skip_full_check;
-	}
-
 	env->explored_states = kvcalloc(state_htab_size(env),
 				       sizeof(struct bpf_verifier_state_list *),
 				       GFP_USER);
@@ -13401,6 +13395,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (ret < 0)
 		goto skip_full_check;
 
+	if (bpf_prog_is_dev_bound(env->prog->aux)) {
+		ret = bpf_prog_offload_verifier_prep(env->prog);
+		if (ret)
+			goto skip_full_check;
+	}
+
 	ret = check_cfg(env);
 	if (ret < 0)
 		goto skip_full_check;
-- 
GitLab


From 84316ca4e100d8cbfccd9f774e23817cb2059868 Mon Sep 17 00:00:00 2001
From: Jussi Maki <joamaki@gmail.com>
Date: Wed, 19 May 2021 15:47:42 +0000
Subject: [PATCH 1241/3804] bpf: Set mac_len in bpf_skb_change_head

The skb_change_head() helper did not set "skb->mac_len", which is
problematic when it's used in combination with skb_redirect_peer().
Without it, redirecting a packet from a L3 device such as wireguard to
the veth peer device will cause skb->data to point to the middle of the
IP header on entry to tcp_v4_rcv() since the L2 header is not pulled
correctly due to mac_len=0.

Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure")
Signed-off-by: Jussi Maki <joamaki@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210519154743.2554771-2-joamaki@gmail.com
---
 net/core/filter.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d08a6707..65ab4e21c087f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
 		__skb_push(skb, head_room);
 		memset(skb->data, 0, head_room);
 		skb_reset_mac_header(skb);
+		skb_reset_mac_len(skb);
 	}
 
 	return ret;
-- 
GitLab


From 63e39d29b3da02e901349f6cd71159818a4737a6 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 20 May 2021 11:18:35 -0700
Subject: [PATCH 1242/3804] ixgbe: fix large MTU request from VF

Check that the MTU value requested by the VF is in the supported
range of MTUs before attempting to set the VF large packet enable,
otherwise reject the request. This also avoids unnecessary
register updates in the case of the 82599 controller.

Fixes: 872844ddb9e4 ("ixgbe: Enable jumbo frames support w/ SR-IOV")
Co-developed-by: Piotr Skajewski <piotrx.skajewski@intel.com>
Signed-off-by: Piotr Skajewski <piotrx.skajewski@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Co-developed-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 988db46bff0ee..214a38de3f415 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -467,12 +467,16 @@ static int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid,
 	return err;
 }
 
-static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
+static int ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 max_frame, u32 vf)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	int max_frame = msgbuf[1];
 	u32 max_frs;
 
+	if (max_frame < ETH_MIN_MTU || max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE) {
+		e_err(drv, "VF max_frame %d out of range\n", max_frame);
+		return -EINVAL;
+	}
+
 	/*
 	 * For 82599EB we have to keep all PFs and VFs operating with
 	 * the same max_frame value in order to avoid sending an oversize
@@ -533,12 +537,6 @@ static s32 ixgbe_set_vf_lpe(struct ixgbe_adapter *adapter, u32 *msgbuf, u32 vf)
 		}
 	}
 
-	/* MTU < 68 is an error and causes problems on some kernels */
-	if (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE) {
-		e_err(drv, "VF max_frame %d out of range\n", max_frame);
-		return -EINVAL;
-	}
-
 	/* pull current max frame size from hardware */
 	max_frs = IXGBE_READ_REG(hw, IXGBE_MAXFRS);
 	max_frs &= IXGBE_MHADD_MFS_MASK;
@@ -1249,7 +1247,7 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 		retval = ixgbe_set_vf_vlan_msg(adapter, msgbuf, vf);
 		break;
 	case IXGBE_VF_SET_LPE:
-		retval = ixgbe_set_vf_lpe(adapter, msgbuf, vf);
+		retval = ixgbe_set_vf_lpe(adapter, msgbuf[1], vf);
 		break;
 	case IXGBE_VF_SET_MACVLAN:
 		retval = ixgbe_set_vf_macvlan_msg(adapter, msgbuf, vf);
-- 
GitLab


From 503c599a4f53fe3d959aebfd22c34da27da49777 Mon Sep 17 00:00:00 2001
From: Aditya Srivastava <yashsri421@gmail.com>
Date: Fri, 21 May 2021 00:19:15 +0530
Subject: [PATCH 1243/3804] net: encx24j600: fix kernel-doc syntax in file
 headers

The opening comment mark '/**' is used for highlighting the beginning of
kernel-doc comments.
The header for drivers/net/ethernet/microchip/encx24j600 files follows
this syntax, but the content inside does not comply with kernel-doc.

This line was probably not meant for kernel-doc parsing, but is parsed
due to the presence of kernel-doc like comment syntax(i.e, '/**'), which
causes unexpected warning from kernel-doc.
For e.g., running scripts/kernel-doc -none
drivers/net/ethernet/microchip/encx24j600_hw.h emits:
warning: expecting prototype for h(). Prototype was for _ENCX24J600_HW_H() instead

Provide a simple fix by replacing such occurrences with general comment
format, i.e. '/*', to prevent kernel-doc from parsing it.

Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/encx24j600.c    | 2 +-
 drivers/net/ethernet/microchip/encx24j600_hw.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index 3658c4ae3c37e..ee921a99e439a 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
  * Microchip ENCX24J600 ethernet driver
  *
  * Copyright (C) 2015 Gridpoint
diff --git a/drivers/net/ethernet/microchip/encx24j600_hw.h b/drivers/net/ethernet/microchip/encx24j600_hw.h
index f604a260ede79..fac61a8fbd020 100644
--- a/drivers/net/ethernet/microchip/encx24j600_hw.h
+++ b/drivers/net/ethernet/microchip/encx24j600_hw.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/**
+/*
  * encx24j600_hw.h: Register definitions
  *
  */
-- 
GitLab


From 13a6f3153922391e90036ba2267d34eed63196fc Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Thu, 20 May 2021 12:32:36 +0000
Subject: [PATCH 1244/3804] net/qla3xxx: fix schedule while atomic in
 ql_sem_spinlock

When calling the 'ql_sem_spinlock', the driver has already acquired the
spin lock, so the driver should not call 'ssleep' in atomic context.

This bug can be fixed by using 'mdelay' instead of 'ssleep'.

The KASAN's log reveals it:

[    3.238124 ] BUG: scheduling while atomic: swapper/0/1/0x00000002
[    3.238748 ] 2 locks held by swapper/0/1:
[    3.239151 ]  #0: ffff88810177b240 (&dev->mutex){....}-{3:3}, at:
__device_driver_lock+0x41/0x60
[    3.240026 ]  #1: ffff888107c60e28 (&qdev->hw_lock){....}-{2:2}, at:
ql3xxx_probe+0x2aa/0xea0
[    3.240873 ] Modules linked in:
[    3.241187 ] irq event stamp: 460854
[    3.241541 ] hardirqs last  enabled at (460853): [<ffffffff843051bf>]
_raw_spin_unlock_irqrestore+0x4f/0x70
[    3.242245 ] hardirqs last disabled at (460854): [<ffffffff843058ca>]
_raw_spin_lock_irqsave+0x2a/0x70
[    3.242245 ] softirqs last  enabled at (446076): [<ffffffff846002e4>]
__do_softirq+0x2e4/0x4b1
[    3.242245 ] softirqs last disabled at (446069): [<ffffffff811ba5e0>]
irq_exit_rcu+0x100/0x110
[    3.242245 ] Preemption disabled at:
[    3.242245 ] [<ffffffff828ca5ba>] ql3xxx_probe+0x2aa/0xea0
[    3.242245 ] Kernel panic - not syncing: scheduling while atomic
[    3.242245 ] CPU: 2 PID: 1 Comm: swapper/0 Not tainted
5.13.0-rc1-00145
-gee7dc339169-dirty #16
[    3.242245 ] Call Trace:
[    3.242245 ]  dump_stack+0xba/0xf5
[    3.242245 ]  ? ql3xxx_probe+0x1f0/0xea0
[    3.242245 ]  panic+0x15a/0x3f2
[    3.242245 ]  ? vprintk+0x76/0x150
[    3.242245 ]  ? ql3xxx_probe+0x2aa/0xea0
[    3.242245 ]  __schedule_bug+0xae/0xe0
[    3.242245 ]  __schedule+0x72e/0xa00
[    3.242245 ]  schedule+0x43/0xf0
[    3.242245 ]  schedule_timeout+0x28b/0x500
[    3.242245 ]  ? del_timer_sync+0xf0/0xf0
[    3.242245 ]  ? msleep+0x2f/0x70
[    3.242245 ]  msleep+0x59/0x70
[    3.242245 ]  ql3xxx_probe+0x307/0xea0
[    3.242245 ]  ? _raw_spin_unlock_irqrestore+0x3a/0x70
[    3.242245 ]  ? pci_device_remove+0x110/0x110
[    3.242245 ]  local_pci_probe+0x45/0xa0
[    3.242245 ]  pci_device_probe+0x12b/0x1d0
[    3.242245 ]  really_probe+0x2a9/0x610
[    3.242245 ]  driver_probe_device+0x90/0x1d0
[    3.242245 ]  ? mutex_lock_nested+0x1b/0x20
[    3.242245 ]  device_driver_attach+0x68/0x70
[    3.242245 ]  __driver_attach+0x124/0x1b0
[    3.242245 ]  ? device_driver_attach+0x70/0x70
[    3.242245 ]  bus_for_each_dev+0xbb/0x110
[    3.242245 ]  ? rdinit_setup+0x45/0x45
[    3.242245 ]  driver_attach+0x27/0x30
[    3.242245 ]  bus_add_driver+0x1eb/0x2a0
[    3.242245 ]  driver_register+0xa9/0x180
[    3.242245 ]  __pci_register_driver+0x82/0x90
[    3.242245 ]  ? yellowfin_init+0x25/0x25
[    3.242245 ]  ql3xxx_driver_init+0x23/0x25
[    3.242245 ]  do_one_initcall+0x7f/0x3d0
[    3.242245 ]  ? rdinit_setup+0x45/0x45
[    3.242245 ]  ? rcu_read_lock_sched_held+0x4f/0x80
[    3.242245 ]  kernel_init_freeable+0x2aa/0x301
[    3.242245 ]  ? rest_init+0x2c0/0x2c0
[    3.242245 ]  kernel_init+0x18/0x190
[    3.242245 ]  ? rest_init+0x2c0/0x2c0
[    3.242245 ]  ? rest_init+0x2c0/0x2c0
[    3.242245 ]  ret_from_fork+0x1f/0x30
[    3.242245 ] Dumping ftrace buffer:
[    3.242245 ]    (ftrace buffer empty)
[    3.242245 ] Kernel Offset: disabled
[    3.242245 ] Rebooting in 1 seconds.

Reported-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qla3xxx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 214e347097a7a..2376b2729633f 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -114,7 +114,7 @@ static int ql_sem_spinlock(struct ql3_adapter *qdev,
 		value = readl(&port_regs->CommonRegs.semaphoreReg);
 		if ((value & (sem_mask >> 16)) == sem_bits)
 			return 0;
-		ssleep(1);
+		mdelay(1000);
 	} while (--seconds);
 	return -1;
 }
-- 
GitLab


From b3dcb312778664bfbe0a73242fa04a628719b066 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 20 May 2021 20:51:16 +0800
Subject: [PATCH 1245/3804] net: stmmac: correct clocks enabled in
 stmmac_vlan_rx_kill_vid()

This should be a mistake to fix conflicts when removing RFC tag to
repost the patch.

Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index fea3bf07ae892..df4ce5977fad3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6191,12 +6191,6 @@ static int stmmac_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid
 	bool is_double = false;
 	int ret;
 
-	ret = pm_runtime_get_sync(priv->device);
-	if (ret < 0) {
-		pm_runtime_put_noidle(priv->device);
-		return ret;
-	}
-
 	if (be16_to_cpu(proto) == ETH_P_8021AD)
 		is_double = true;
 
@@ -6222,6 +6216,12 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi
 	bool is_double = false;
 	int ret;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	if (be16_to_cpu(proto) == ETH_P_8021AD)
 		is_double = true;
 
-- 
GitLab


From 4691ffb18ac908609aab07d13af7995b6b89d33c Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 20 May 2021 20:51:17 +0800
Subject: [PATCH 1246/3804] net: stmmac: fix system hang if change mac address
 after interface ifdown

Fix system hang with below sequences:
~# ifconfig ethx down
~# ifconfig ethx hw ether xx:xx:xx:xx:xx:xx

After ethx down, stmmac all clocks gated off and then register access causes
system hang.

Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index df4ce5977fad3..5d956a5534345 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -5891,12 +5891,21 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	int ret = 0;
 
+	ret = pm_runtime_get_sync(priv->device);
+	if (ret < 0) {
+		pm_runtime_put_noidle(priv->device);
+		return ret;
+	}
+
 	ret = eth_mac_addr(ndev, addr);
 	if (ret)
-		return ret;
+		goto set_mac_error;
 
 	stmmac_set_umac_addr(priv, priv->hw, ndev->dev_addr, 0);
 
+set_mac_error:
+	pm_runtime_put(priv->device);
+
 	return ret;
 }
 
-- 
GitLab


From c781471d67a56d7d4c113669a11ede0463b5c719 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 19 May 2021 13:32:20 +0200
Subject: [PATCH 1247/3804] netfilter: nf_tables: missing error reporting for
 not selected expressions

Sometimes users forget to turn on nftables extensions from Kconfig that
they need. In such case, the error reporting from userspace is
misleading:

 $ sudo nft add rule x y counter
 Error: Could not process rule: No such file or directory
 add rule x y counter
 ^^^^^^^^^^^^^^^^^^^^

Add missing NL_SET_BAD_ATTR() to provide a hint:

 $ nft add rule x y counter
 Error: Could not process rule: No such file or directory
 add rule x y counter
              ^^^^^^^

Fixes: 83d9dcba06c5 ("netfilter: nf_tables: extended netlink error reporting for expressions")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d63d2d8f769c3..5a02b48af7fb7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3328,8 +3328,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
 			if (n == NFT_RULE_MAXEXPRS)
 				goto err1;
 			err = nf_tables_expr_parse(&ctx, tmp, &expr_info[n]);
-			if (err < 0)
+			if (err < 0) {
+				NL_SET_BAD_ATTR(extack, tmp);
 				goto err1;
+			}
 			size += expr_info[n].ops->size;
 			n++;
 		}
-- 
GitLab


From 983c4fcb81d6bd19c6035e5dda6bf1fca058c320 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 19 May 2021 21:16:40 +0200
Subject: [PATCH 1248/3804] netfilter: nf_tables: extended netlink error
 reporting for chain type

Users that forget to select the NAT chain type in netfilter's Kconfig
hit ENOENT when adding the basechain.

This report is however sparse since it might be the table, the chain
or the kernel module that is missing/does not exist.

This patch provides extended netlink error reporting for the
NFTA_CHAIN_TYPE netlink attribute, which conveys the basechain type.
If the user selects a basechain that his custom kernel does not support,
the netlink extended error provides a more accurate hint on the
described issue.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5a02b48af7fb7..c34a3c0a0d9c8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1905,7 +1905,7 @@ static int nft_chain_parse_netdev(struct net *net,
 static int nft_chain_parse_hook(struct net *net,
 				const struct nlattr * const nla[],
 				struct nft_chain_hook *hook, u8 family,
-				bool autoload)
+				struct netlink_ext_ack *extack, bool autoload)
 {
 	struct nftables_pernet *nft_net = nft_pernet(net);
 	struct nlattr *ha[NFTA_HOOK_MAX + 1];
@@ -1935,8 +1935,10 @@ static int nft_chain_parse_hook(struct net *net,
 	if (nla[NFTA_CHAIN_TYPE]) {
 		type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE],
 						   family, autoload);
-		if (IS_ERR(type))
+		if (IS_ERR(type)) {
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
 			return PTR_ERR(type);
+		}
 	}
 	if (hook->num >= NFT_MAX_HOOKS || !(type->hook_mask & (1 << hook->num)))
 		return -EOPNOTSUPP;
@@ -1945,8 +1947,11 @@ static int nft_chain_parse_hook(struct net *net,
 	    hook->priority <= NF_IP_PRI_CONNTRACK)
 		return -EOPNOTSUPP;
 
-	if (!try_module_get(type->owner))
+	if (!try_module_get(type->owner)) {
+		if (nla[NFTA_CHAIN_TYPE])
+			NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
 		return -ENOENT;
+	}
 
 	hook->type = type;
 
@@ -2057,7 +2062,8 @@ static int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
 static u64 chain_id;
 
 static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
-			      u8 policy, u32 flags)
+			      u8 policy, u32 flags,
+			      struct netlink_ext_ack *extack)
 {
 	const struct nlattr * const *nla = ctx->nla;
 	struct nft_table *table = ctx->table;
@@ -2079,7 +2085,8 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 		if (flags & NFT_CHAIN_BINDING)
 			return -EOPNOTSUPP;
 
-		err = nft_chain_parse_hook(net, nla, &hook, family, true);
+		err = nft_chain_parse_hook(net, nla, &hook, family, extack,
+					   true);
 		if (err < 0)
 			return err;
 
@@ -2234,7 +2241,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
 			return -EEXIST;
 		}
 		err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
-					   false);
+					   extack, false);
 		if (err < 0)
 			return err;
 
@@ -2447,7 +2454,7 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
 					  extack);
 	}
 
-	return nf_tables_addchain(&ctx, family, genmask, policy, flags);
+	return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack);
 }
 
 static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
-- 
GitLab


From 49219d9b8785ba712575c40e48ce0f7461254626 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Mon, 26 Apr 2021 18:13:45 -0400
Subject: [PATCH 1249/3804] evm: fix writing <securityfs>/evm overflow

EVM_SETUP_COMPLETE is defined as 0x80000000, which is larger than INT_MAX.
The "-fno-strict-overflow" compiler option properly prevents signaling
EVM that the EVM policy setup is complete.  Define and read an unsigned
int.

Fixes: f00d79750712 ("EVM: Allow userspace to signal an RSA key has been loaded")
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/evm/evm_secfs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index bbc85637e18b2..0007d3362754d 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -66,12 +66,13 @@ static ssize_t evm_read_key(struct file *filp, char __user *buf,
 static ssize_t evm_write_key(struct file *file, const char __user *buf,
 			     size_t count, loff_t *ppos)
 {
-	int i, ret;
+	unsigned int i;
+	int ret;
 
 	if (!capable(CAP_SYS_ADMIN) || (evm_initialized & EVM_SETUP_COMPLETE))
 		return -EPERM;
 
-	ret = kstrtoint_from_user(buf, count, 0, &i);
+	ret = kstrtouint_from_user(buf, count, 0, &i);
 
 	if (ret)
 		return ret;
-- 
GitLab


From b7f55d928e75557295c1ac280c291b738905b6fb Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 15 May 2021 17:27:14 +0200
Subject: [PATCH 1250/3804] gfs2: Fix mmap locking for write faults

When a write fault occurs, we need to take the inode glock of the underlying
inode in exclusive mode.  Otherwise, there's no guarantee that the dirty page
will be written back to disk.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 493a83e3f5906..8a35a0196b6da 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -540,9 +540,11 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	vm_fault_t ret;
+	u16 state;
 	int err;
 
-	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+	state = (vmf->flags & FAULT_FLAG_WRITE) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
+	gfs2_holder_init(ip->i_gl, state, 0, &gh);
 	err = gfs2_glock_nq(&gh);
 	if (err) {
 		ret = block_page_mkwrite_return(err);
-- 
GitLab


From a8867f4e3809050571c98de7a2d465aff5e4daf5 Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 12 Apr 2021 08:38:37 +0100
Subject: [PATCH 1251/3804] ext4: fix memory leak in ext4_mb_init_backend on
 error path.

Fix a memory leak discovered by syzbot when a file system is corrupted
with an illegally large s_log_groups_per_flex.

Reported-by: syzbot+aa12d6106ea4ca1b6aae@syzkaller.appspotmail.com
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20210412073837.1686-1-phil@philpotter.co.uk
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mballoc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 3239e6669e843..c2c22c2baac0b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3217,7 +3217,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 		 */
 		if (sbi->s_es->s_log_groups_per_flex >= 32) {
 			ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group");
-			goto err_freesgi;
+			goto err_freebuddy;
 		}
 		sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex,
 			BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
-- 
GitLab


From 7dbc0d246891acbb8ae5840b3237881b7a0787df Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Wed, 19 May 2021 15:05:44 +0800
Subject: [PATCH 1252/3804] PM / devfreq: imx-bus: Remove
 imx_bus_get_dev_status

Current driver actually does not support simple ondemand governor
as it's unable to provide device load information. So removing
the unnecessary callback to avoid confusing.
Right now the driver is using userspace governor by default.

polling_ms was also dropped as it's not needed for non-ondemand
governor.

Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/imx-bus.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/devfreq/imx-bus.c b/drivers/devfreq/imx-bus.c
index 3fc3fd77492d5..f3f6e25053ed2 100644
--- a/drivers/devfreq/imx-bus.c
+++ b/drivers/devfreq/imx-bus.c
@@ -45,18 +45,6 @@ static int imx_bus_get_cur_freq(struct device *dev, unsigned long *freq)
 	return 0;
 }
 
-static int imx_bus_get_dev_status(struct device *dev,
-		struct devfreq_dev_status *stat)
-{
-	struct imx_bus *priv = dev_get_drvdata(dev);
-
-	stat->busy_time = 0;
-	stat->total_time = 0;
-	stat->current_frequency = clk_get_rate(priv->clk);
-
-	return 0;
-}
-
 static void imx_bus_exit(struct device *dev)
 {
 	struct imx_bus *priv = dev_get_drvdata(dev);
@@ -129,9 +117,7 @@ static int imx_bus_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	priv->profile.polling_ms = 1000;
 	priv->profile.target = imx_bus_target;
-	priv->profile.get_dev_status = imx_bus_get_dev_status;
 	priv->profile.exit = imx_bus_exit;
 	priv->profile.get_cur_freq = imx_bus_get_cur_freq;
 	priv->profile.initial_freq = clk_get_rate(priv->clk);
-- 
GitLab


From 5e480ab94db8102baa73da33534e708a8636c2f9 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Tue, 11 May 2021 00:10:02 +0300
Subject: [PATCH 1253/3804] PM / devfreq: tegra30: Support thermal cooling

Expose ACTMON devfreq device as a cooling device in order to throttle
memory freq on overheat. Throttling of memory freq has a significant
cooling effect on NVIDIA Tegra SoCs since higher memory freqs require
higher SoC core voltage which is one of the main causes of the heating.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/tegra30-devfreq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index ce83f883ca654..10661eb2aed87 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -688,6 +688,7 @@ static struct devfreq_dev_profile tegra_devfreq_profile = {
 	.polling_ms	= ACTMON_SAMPLING_PERIOD,
 	.target		= tegra_devfreq_target,
 	.get_dev_status	= tegra_devfreq_get_dev_status,
+	.is_cooling_device = true,
 };
 
 static int tegra_governor_get_target(struct devfreq *devfreq,
-- 
GitLab


From a15fc9aa5b384e305ea25f42f744bb301fe39da0 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Fri, 21 May 2021 11:16:39 +0800
Subject: [PATCH 1254/3804] PM / devfreq: imx8m-ddrc: Remove
 DEVFREQ_GOV_SIMPLE_ONDEMAND dependency

The driver can't support simple ondemand governor due to missing
.get_dev_status() capability.

Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index 20373a893b445..e87d01c0b76a5 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -103,7 +103,6 @@ config ARM_IMX8M_DDRC_DEVFREQ
 	tristate "i.MX8M DDRC DEVFREQ Driver"
 	depends on (ARCH_MXC && HAVE_ARM_SMCCC) || \
 		(COMPILE_TEST && HAVE_ARM_SMCCC)
-	select DEVFREQ_GOV_SIMPLE_ONDEMAND
 	select DEVFREQ_GOV_USERSPACE
 	help
 	  This adds the DEVFREQ driver for the i.MX8M DDR Controller. It allows
-- 
GitLab


From ae897fda4f507e4b239f0bdfd578b3688ca96fb4 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Thu, 20 May 2021 13:42:42 +0200
Subject: [PATCH 1255/3804] x86/Xen: swap NX determination and GDT setup on BSP

xen_setup_gdt(), via xen_load_gdt_boot(), wants to adjust page tables.
For this to work when NX is not available, x86_configure_nx() needs to
be called first.

[jgross] Note that this is a revert of 36104cb9012a82e73 ("x86/xen:
Delay get_cpu_cap until stack canary is established"), which is possible
now that we no longer support running as PV guest in 32-bit mode.

Cc: <stable.vger.kernel.org> # 5.9
Fixes: 36104cb9012a82e73 ("x86/xen: Delay get_cpu_cap until stack canary is established")
Reported-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>

Link: https://lore.kernel.org/r/12a866b0-9e89-59f7-ebeb-a2a6cec0987a@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten_pv.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 17503fed20177..e87699aa2dc82 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1273,16 +1273,16 @@ asmlinkage __visible void __init xen_start_kernel(void)
 	/* Get mfn list */
 	xen_build_dynamic_phys_to_machine();
 
+	/* Work out if we support NX */
+	get_cpu_cap(&boot_cpu_data);
+	x86_configure_nx();
+
 	/*
 	 * Set up kernel GDT and segment registers, mainly so that
 	 * -fstack-protector code can be executed.
 	 */
 	xen_setup_gdt(0);
 
-	/* Work out if we support NX */
-	get_cpu_cap(&boot_cpu_data);
-	x86_configure_nx();
-
 	/* Determine virtual and physical address sizes */
 	get_cpu_address_sizes(&boot_cpu_data);
 
-- 
GitLab


From 4ba50e7c423c29639878c00573288869aa627068 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 18 May 2021 18:13:42 +0200
Subject: [PATCH 1256/3804] xen-pciback: redo VF placement in the virtual
 topology

The commit referenced below was incomplete: It merely affected what
would get written to the vdev-<N> xenstore node. The guest would still
find the function at the original function number as long as
__xen_pcibk_get_pci_dev() wouldn't be in sync. The same goes for AER wrt
__xen_pcibk_get_pcifront_dev().

Undo overriding the function to zero and instead make sure that VFs at
function zero remain alone in their slot. This has the added benefit of
improving overall capacity, considering that there's only a total of 32
slots available right now (PCI segment and bus can both only ever be
zero at present).

Fixes: 8a5248fe10b1 ("xen PV passthru: assign SR-IOV virtual functions to separate virtual slots")
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: stable@vger.kernel.org
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/8def783b-404c-3452-196d-3f3fd4d72c9e@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xen-pciback/vpci.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 4162d0e7e00d7..cc7450f2b2a93 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -70,7 +70,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 				   struct pci_dev *dev, int devid,
 				   publish_pci_dev_cb publish_cb)
 {
-	int err = 0, slot, func = -1;
+	int err = 0, slot, func = PCI_FUNC(dev->devfn);
 	struct pci_dev_entry *t, *dev_entry;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
 
@@ -95,22 +95,25 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 
 	/*
 	 * Keep multi-function devices together on the virtual PCI bus, except
-	 * virtual functions.
+	 * that we want to keep virtual functions at func 0 on their own. They
+	 * aren't multi-function devices and hence their presence at func 0
+	 * may cause guests to not scan the other functions.
 	 */
-	if (!dev->is_virtfn) {
+	if (!dev->is_virtfn || func) {
 		for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
 			if (list_empty(&vpci_dev->dev_list[slot]))
 				continue;
 
 			t = list_entry(list_first(&vpci_dev->dev_list[slot]),
 				       struct pci_dev_entry, list);
+			if (t->dev->is_virtfn && !PCI_FUNC(t->dev->devfn))
+				continue;
 
 			if (match_slot(dev, t->dev)) {
 				dev_info(&dev->dev, "vpci: assign to virtual slot %d func %d\n",
-					 slot, PCI_FUNC(dev->devfn));
+					 slot, func);
 				list_add_tail(&dev_entry->list,
 					      &vpci_dev->dev_list[slot]);
-				func = PCI_FUNC(dev->devfn);
 				goto unlock;
 			}
 		}
@@ -123,7 +126,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 				 slot);
 			list_add_tail(&dev_entry->list,
 				      &vpci_dev->dev_list[slot]);
-			func = dev->is_virtfn ? 0 : PCI_FUNC(dev->devfn);
 			goto unlock;
 		}
 	}
-- 
GitLab


From c81d3d24602540f65256f98831d0a25599ea6b87 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Tue, 18 May 2021 18:14:07 +0200
Subject: [PATCH 1257/3804] xen-pciback: reconfigure also from backend watch
 handler

When multiple PCI devices get assigned to a guest right at boot, libxl
incrementally populates the backend tree. The writes for the first of
the devices trigger the backend watch. In turn xen_pcibk_setup_backend()
will set the XenBus state to Initialised, at which point no further
reconfigures would happen unless a device got hotplugged. Arrange for
reconfigure to also get triggered from the backend watch handler.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: stable@vger.kernel.org
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/2337cbd6-94b9-4187-9862-c03ea12e0c61@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xen-pciback/xenbus.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 5188f02e75fb3..c09c7ebd6968d 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -359,7 +359,8 @@ out:
 	return err;
 }
 
-static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
+static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev,
+				 enum xenbus_state state)
 {
 	int err = 0;
 	int num_devs;
@@ -373,9 +374,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
 	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
 
 	mutex_lock(&pdev->dev_lock);
-	/* Make sure we only reconfigure once */
-	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
-	    XenbusStateReconfiguring)
+	if (xenbus_read_driver_state(pdev->xdev->nodename) != state)
 		goto out;
 
 	err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
@@ -500,6 +499,10 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
 		}
 	}
 
+	if (state != XenbusStateReconfiguring)
+		/* Make sure we only reconfigure once. */
+		goto out;
+
 	err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
 	if (err) {
 		xenbus_dev_fatal(pdev->xdev, err,
@@ -525,7 +528,7 @@ static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
 		break;
 
 	case XenbusStateReconfiguring:
-		xen_pcibk_reconfigure(pdev);
+		xen_pcibk_reconfigure(pdev, XenbusStateReconfiguring);
 		break;
 
 	case XenbusStateConnected:
@@ -664,6 +667,15 @@ static void xen_pcibk_be_watch(struct xenbus_watch *watch,
 		xen_pcibk_setup_backend(pdev);
 		break;
 
+	case XenbusStateInitialised:
+		/*
+		 * We typically move to Initialised when the first device was
+		 * added. Hence subsequent devices getting added may need
+		 * reconfiguring.
+		 */
+		xen_pcibk_reconfigure(pdev, XenbusStateInitialised);
+		break;
+
 	default:
 		break;
 	}
-- 
GitLab


From 2d016672528a592ada5188e53ac746e1b8b7a978 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Mon, 10 May 2021 16:54:08 +0800
Subject: [PATCH 1258/3804] crypto: testmgr - fix initialization of
 'secret_size'

Actual data length of the 'secret' is not equal to the 'secret_size'.

Since the 'curve_id' has removed in the 'secret', the 'secret_size'
should subtract the length of the 'curve_id'.

Fixes: 6763f5ea2d9a ("crypto: ecdh - move curve_id of ECDH from ...")
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 34e4a3db39917..aead75d904933 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -2719,7 +2719,7 @@ static const struct kpp_testvec ecdh_p192_tv_template[] = {
 	"\xf4\x57\xcc\x4f\x1f\x4e\x31\xcc"
 	"\xe3\x40\x60\xc8\x06\x93\xc6\x2e"
 	"\x99\x80\x81\x28\xaf\xc5\x51\x74",
-	.secret_size = 32,
+	.secret_size = 30,
 	.b_public_size = 48,
 	.expected_a_public_size = 48,
 	.expected_ss_size = 24
@@ -2766,7 +2766,7 @@ static const struct kpp_testvec ecdh_p256_tv_template[] = {
 	"\x9f\x4a\x38\xcc\xc0\x2c\x49\x2f"
 	"\xb1\x32\xbb\xaf\x22\x61\xda\xcb"
 	"\x6f\xdb\xa9\xaa\xfc\x77\x81\xf3",
-	.secret_size = 40,
+	.secret_size = 38,
 	.b_public_size = 64,
 	.expected_a_public_size = 64,
 	.expected_ss_size = 32
@@ -2804,8 +2804,8 @@ static const struct kpp_testvec ecdh_p256_tv_template[] = {
 	"\x37\x08\xcc\x40\x5e\x7a\xfd\x6a"
 	"\x6a\x02\x6e\x41\x87\x68\x38\x77"
 	"\xfa\xa9\x44\x43\x2d\xef\x09\xdf",
-	.secret_size = 8,
-	.b_secret_size = 40,
+	.secret_size = 6,
+	.b_secret_size = 38,
 	.b_public_size = 64,
 	.expected_a_public_size = 64,
 	.expected_ss_size = 32,
-- 
GitLab


From c5ae16f5c6b91dc78a08885a753489d608de4abd Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Mon, 10 May 2021 16:59:47 +0800
Subject: [PATCH 1259/3804] crypto: ecdh - extend 'cra_driver_name' with curve
 name

Currently, 'cra_driver_name' cannot be used to specify ecdh algorithm
with a special curve, so extending it with curve name.

Although using 'cra_name' can also specify a special curve, but ecdh
generic driver cannot be specified when vendor hardware accelerator
has registered.

Fixes: 6763f5ea2d9a ("crypto: ecdh - move curve_id of ECDH from ...")
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ecdh.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 04a427b8c9564..07eb34fef25b7 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -141,7 +141,7 @@ static struct kpp_alg ecdh_nist_p192 = {
 	.init = ecdh_nist_p192_init_tfm,
 	.base = {
 		.cra_name = "ecdh-nist-p192",
-		.cra_driver_name = "ecdh-generic",
+		.cra_driver_name = "ecdh-nist-p192-generic",
 		.cra_priority = 100,
 		.cra_module = THIS_MODULE,
 		.cra_ctxsize = sizeof(struct ecdh_ctx),
@@ -166,7 +166,7 @@ static struct kpp_alg ecdh_nist_p256 = {
 	.init = ecdh_nist_p256_init_tfm,
 	.base = {
 		.cra_name = "ecdh-nist-p256",
-		.cra_driver_name = "ecdh-generic",
+		.cra_driver_name = "ecdh-nist-p256-generic",
 		.cra_priority = 100,
 		.cra_module = THIS_MODULE,
 		.cra_ctxsize = sizeof(struct ecdh_ctx),
-- 
GitLab


From a225762057d6818e4a75ad5c2c16495662d71495 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Mon, 10 May 2021 16:59:48 +0800
Subject: [PATCH 1260/3804] crypto: hisilicon/hpre - extend 'cra_driver_name'
 with curve name

Currently,'cra_driver_name' cannot be used to specify ecdh algorithm
with a special curve, so extending it with curve name.

Fixes: 6763f5ea2d9a ("crypto: ecdh - move curve_id of ECDH from ...")
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index a380087c83f77..c07a7f52d857e 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -1940,7 +1940,7 @@ static struct kpp_alg ecdh_nist_p192 = {
 		.cra_ctxsize = sizeof(struct hpre_ctx),
 		.cra_priority = HPRE_CRYPTO_ALG_PRI,
 		.cra_name = "ecdh-nist-p192",
-		.cra_driver_name = "hpre-ecdh",
+		.cra_driver_name = "hpre-ecdh-nist-p192",
 		.cra_module = THIS_MODULE,
 	},
 };
@@ -1957,7 +1957,7 @@ static struct kpp_alg ecdh_nist_p256 = {
 		.cra_ctxsize = sizeof(struct hpre_ctx),
 		.cra_priority = HPRE_CRYPTO_ALG_PRI,
 		.cra_name = "ecdh-nist-p256",
-		.cra_driver_name = "hpre-ecdh",
+		.cra_driver_name = "hpre-ecdh-nist-p256",
 		.cra_module = THIS_MODULE,
 	},
 };
-- 
GitLab


From 0b0553b701f830d820ba9026e5799c24e400a4b5 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Mon, 10 May 2021 17:02:55 +0800
Subject: [PATCH 1261/3804] crypto: hisilicon/hpre - fix unmapping invalid dma
 address

Currently, an invalid dma address may be unmapped when calling
'xx_data_clr_all' in error path, so check dma address of sqe in/out
if initialized before calling 'dma_free_coherent' or 'dma_unmap_single'.

Fixes: a9214b0b6ed2 ("crypto: hisilicon - fix the check on dma address")
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index c07a7f52d857e..db00e9f763da0 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -298,6 +298,8 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx,
 	dma_addr_t tmp;
 
 	tmp = le64_to_cpu(sqe->in);
+	if (unlikely(dma_mapping_error(dev, tmp)))
+		return;
 
 	if (src) {
 		if (req->src)
@@ -307,6 +309,8 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx,
 	}
 
 	tmp = le64_to_cpu(sqe->out);
+	if (unlikely(dma_mapping_error(dev, tmp)))
+		return;
 
 	if (req->dst) {
 		if (dst)
@@ -524,6 +528,8 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa)
 		msg->key = cpu_to_le64(ctx->dh.dma_xa_p);
 	}
 
+	msg->in = cpu_to_le64(DMA_MAPPING_ERROR);
+	msg->out = cpu_to_le64(DMA_MAPPING_ERROR);
 	msg->dw0 |= cpu_to_le32(0x1 << HPRE_SQE_DONE_SHIFT);
 	msg->task_len1 = (ctx->key_sz >> HPRE_BITS_2_BYTES_SHIFT) - 1;
 	h_req->ctx = ctx;
@@ -1372,11 +1378,15 @@ static void hpre_ecdh_hw_data_clr_all(struct hpre_ctx *ctx,
 	dma_addr_t dma;
 
 	dma = le64_to_cpu(sqe->in);
+	if (unlikely(dma_mapping_error(dev, dma)))
+		return;
 
 	if (src && req->src)
 		dma_free_coherent(dev, ctx->key_sz << 2, req->src, dma);
 
 	dma = le64_to_cpu(sqe->out);
+	if (unlikely(dma_mapping_error(dev, dma)))
+		return;
 
 	if (req->dst)
 		dma_free_coherent(dev, ctx->key_sz << 1, req->dst, dma);
@@ -1431,6 +1441,8 @@ static int hpre_ecdh_msg_request_set(struct hpre_ctx *ctx,
 	h_req->areq.ecdh = req;
 	msg = &h_req->req;
 	memset(msg, 0, sizeof(*msg));
+	msg->in = cpu_to_le64(DMA_MAPPING_ERROR);
+	msg->out = cpu_to_le64(DMA_MAPPING_ERROR);
 	msg->key = cpu_to_le64(ctx->ecdh.dma_p);
 
 	msg->dw0 |= cpu_to_le32(0x1U << HPRE_SQE_DONE_SHIFT);
@@ -1667,11 +1679,15 @@ static void hpre_curve25519_hw_data_clr_all(struct hpre_ctx *ctx,
 	dma_addr_t dma;
 
 	dma = le64_to_cpu(sqe->in);
+	if (unlikely(dma_mapping_error(dev, dma)))
+		return;
 
 	if (src && req->src)
 		dma_free_coherent(dev, ctx->key_sz, req->src, dma);
 
 	dma = le64_to_cpu(sqe->out);
+	if (unlikely(dma_mapping_error(dev, dma)))
+		return;
 
 	if (req->dst)
 		dma_free_coherent(dev, ctx->key_sz, req->dst, dma);
@@ -1722,6 +1738,8 @@ static int hpre_curve25519_msg_request_set(struct hpre_ctx *ctx,
 	h_req->areq.curve25519 = req;
 	msg = &h_req->req;
 	memset(msg, 0, sizeof(*msg));
+	msg->in = cpu_to_le64(DMA_MAPPING_ERROR);
+	msg->out = cpu_to_le64(DMA_MAPPING_ERROR);
 	msg->key = cpu_to_le64(ctx->curve25519.dma_p);
 
 	msg->dw0 |= cpu_to_le32(0x1U << HPRE_SQE_DONE_SHIFT);
-- 
GitLab


From e0a6f390d44b7d4d04fb3f2dbba46824bdbd1b4f Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 12 May 2021 14:27:04 +0800
Subject: [PATCH 1262/3804] crypto: hisilicon/hpre - the macro 'HPRE_ADDR'
 expands

The macro 'HPRE_ADDR' is unnecessary, so expanding it.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c | 59 +++++++++++------------
 1 file changed, 29 insertions(+), 30 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 046bc962c8b2d..c914e0005859a 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -69,7 +69,6 @@
 #define HPRE_DBGFS_VAL_MAX_LEN		20
 #define HPRE_PCI_DEVICE_ID		0xa258
 #define HPRE_PCI_VF_DEVICE_ID		0xa259
-#define HPRE_ADDR(qm, offset)		((qm)->io_base + (offset))
 #define HPRE_QM_USR_CFG_MASK		0xfffffffe
 #define HPRE_QM_AXI_CFG_MASK		0xffff
 #define HPRE_QM_VFG_AX_MASK		0xff
@@ -302,10 +301,10 @@ static int hpre_set_cluster(struct hisi_qm *qm)
 
 		/* clusters initiating */
 		writel(cluster_core_mask,
-		       HPRE_ADDR(qm, offset + HPRE_CORE_ENB));
-		writel(0x1, HPRE_ADDR(qm, offset + HPRE_CORE_INI_CFG));
-		ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, offset +
-					HPRE_CORE_INI_STATUS), val,
+		       qm->io_base + offset + HPRE_CORE_ENB);
+		writel(0x1, qm->io_base + offset + HPRE_CORE_INI_CFG);
+		ret = readl_relaxed_poll_timeout(qm->io_base + offset +
+					HPRE_CORE_INI_STATUS, val,
 					((val & cluster_core_mask) ==
 					cluster_core_mask),
 					HPRE_REG_RD_INTVRL_US,
@@ -329,11 +328,11 @@ static void disable_flr_of_bme(struct hisi_qm *qm)
 {
 	u32 val;
 
-	val = readl(HPRE_ADDR(qm, QM_PEH_AXUSER_CFG));
+	val = readl(qm->io_base + QM_PEH_AXUSER_CFG);
 	val &= ~(HPRE_QM_BME_FLR | HPRE_QM_SRIOV_FLR);
 	val |= HPRE_QM_PM_FLR;
-	writel(val, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG));
-	writel(PEH_AXUSER_CFG_ENABLE, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG_ENABLE));
+	writel(val, qm->io_base + QM_PEH_AXUSER_CFG);
+	writel(PEH_AXUSER_CFG_ENABLE, qm->io_base + QM_PEH_AXUSER_CFG_ENABLE);
 }
 
 static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
@@ -342,33 +341,33 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
 	u32 val;
 	int ret;
 
-	writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_ARUSER_M_CFG_ENABLE));
-	writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_AWUSER_M_CFG_ENABLE));
-	writel_relaxed(HPRE_QM_AXI_CFG_MASK, HPRE_ADDR(qm, QM_AXI_M_CFG));
+	writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_ARUSER_M_CFG_ENABLE);
+	writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE);
+	writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG);
 
 	/* HPRE need more time, we close this interrupt */
-	val = readl_relaxed(HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK));
+	val = readl_relaxed(qm->io_base + HPRE_QM_ABNML_INT_MASK);
 	val |= BIT(HPRE_TIMEOUT_ABNML_BIT);
-	writel_relaxed(val, HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK));
+	writel_relaxed(val, qm->io_base + HPRE_QM_ABNML_INT_MASK);
 
 	if (qm->ver >= QM_HW_V3)
 		writel(HPRE_RSA_ENB | HPRE_ECC_ENB,
-			HPRE_ADDR(qm, HPRE_TYPES_ENB));
+			qm->io_base + HPRE_TYPES_ENB);
 	else
-		writel(HPRE_RSA_ENB, HPRE_ADDR(qm, HPRE_TYPES_ENB));
-
-	writel(HPRE_QM_VFG_AX_MASK, HPRE_ADDR(qm, HPRE_VFG_AXCACHE));
-	writel(0x0, HPRE_ADDR(qm, HPRE_BD_ENDIAN));
-	writel(0x0, HPRE_ADDR(qm, HPRE_INT_MASK));
-	writel(0x0, HPRE_ADDR(qm, HPRE_POISON_BYPASS));
-	writel(0x0, HPRE_ADDR(qm, HPRE_COMM_CNT_CLR_CE));
-	writel(0x0, HPRE_ADDR(qm, HPRE_ECC_BYPASS));
-
-	writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_ARUSR_CFG));
-	writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_AWUSR_CFG));
-	writel(0x1, HPRE_ADDR(qm, HPRE_RDCHN_INI_CFG));
-	ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, HPRE_RDCHN_INI_ST), val,
-					 val & BIT(0),
+		writel(HPRE_RSA_ENB, qm->io_base + HPRE_TYPES_ENB);
+
+	writel(HPRE_QM_VFG_AX_MASK, qm->io_base + HPRE_VFG_AXCACHE);
+	writel(0x0, qm->io_base + HPRE_BD_ENDIAN);
+	writel(0x0, qm->io_base + HPRE_INT_MASK);
+	writel(0x0, qm->io_base + HPRE_POISON_BYPASS);
+	writel(0x0, qm->io_base + HPRE_COMM_CNT_CLR_CE);
+	writel(0x0, qm->io_base + HPRE_ECC_BYPASS);
+
+	writel(HPRE_BD_USR_MASK, qm->io_base + HPRE_BD_ARUSR_CFG);
+	writel(HPRE_BD_USR_MASK, qm->io_base + HPRE_BD_AWUSR_CFG);
+	writel(0x1, qm->io_base + HPRE_RDCHN_INI_CFG);
+	ret = readl_relaxed_poll_timeout(qm->io_base + HPRE_RDCHN_INI_ST, val,
+			val & BIT(0),
 			HPRE_REG_RD_INTVRL_US,
 			HPRE_REG_RD_TMOUT_US);
 	if (ret) {
@@ -802,9 +801,9 @@ static void hpre_open_axi_master_ooo(struct hisi_qm *qm)
 
 	value = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
 	writel(value & ~HPRE_AM_OOO_SHUTDOWN_ENABLE,
-	       HPRE_ADDR(qm, HPRE_AM_OOO_SHUTDOWN_ENB));
+	       qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
 	writel(value | HPRE_AM_OOO_SHUTDOWN_ENABLE,
-	       HPRE_ADDR(qm, HPRE_AM_OOO_SHUTDOWN_ENB));
+	       qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
 }
 
 static void hpre_err_info_init(struct hisi_qm *qm)
-- 
GitLab


From c9a753b9733dd229ea736b27bdc55ef04cdc9f01 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 12 May 2021 14:27:05 +0800
Subject: [PATCH 1263/3804] crypto: hisilicon/hpre - init a structure member
 each line

Only init a structure member each line, just to keep the code neat.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c | 56 +++++++++++++++++------
 1 file changed, 42 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index c914e0005859a..47a169ce2833f 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -122,21 +122,49 @@ static const char * const hpre_debug_file_name[] = {
 };
 
 static const struct hpre_hw_error hpre_hw_errors[] = {
-	{ .int_msk = BIT(0), .msg = "core_ecc_1bit_err_int_set" },
-	{ .int_msk = BIT(1), .msg = "core_ecc_2bit_err_int_set" },
-	{ .int_msk = BIT(2), .msg = "dat_wb_poison_int_set" },
-	{ .int_msk = BIT(3), .msg = "dat_rd_poison_int_set" },
-	{ .int_msk = BIT(4), .msg = "bd_rd_poison_int_set" },
-	{ .int_msk = BIT(5), .msg = "ooo_ecc_2bit_err_int_set" },
-	{ .int_msk = BIT(6), .msg = "cluster1_shb_timeout_int_set" },
-	{ .int_msk = BIT(7), .msg = "cluster2_shb_timeout_int_set" },
-	{ .int_msk = BIT(8), .msg = "cluster3_shb_timeout_int_set" },
-	{ .int_msk = BIT(9), .msg = "cluster4_shb_timeout_int_set" },
-	{ .int_msk = GENMASK(15, 10), .msg = "ooo_rdrsp_err_int_set" },
-	{ .int_msk = GENMASK(21, 16), .msg = "ooo_wrrsp_err_int_set" },
-	{ .int_msk = BIT(22), .msg = "pt_rng_timeout_int_set"},
-	{ .int_msk = BIT(23), .msg = "sva_fsm_timeout_int_set"},
 	{
+		.int_msk = BIT(0),
+		.msg = "core_ecc_1bit_err_int_set"
+	}, {
+		.int_msk = BIT(1),
+		.msg = "core_ecc_2bit_err_int_set"
+	}, {
+		.int_msk = BIT(2),
+		.msg = "dat_wb_poison_int_set"
+	}, {
+		.int_msk = BIT(3),
+		.msg = "dat_rd_poison_int_set"
+	}, {
+		.int_msk = BIT(4),
+		.msg = "bd_rd_poison_int_set"
+	}, {
+		.int_msk = BIT(5),
+		.msg = "ooo_ecc_2bit_err_int_set"
+	}, {
+		.int_msk = BIT(6),
+		.msg = "cluster1_shb_timeout_int_set"
+	}, {
+		.int_msk = BIT(7),
+		.msg = "cluster2_shb_timeout_int_set"
+	}, {
+		.int_msk = BIT(8),
+		.msg = "cluster3_shb_timeout_int_set"
+	}, {
+		.int_msk = BIT(9),
+		.msg = "cluster4_shb_timeout_int_set"
+	}, {
+		.int_msk = GENMASK(15, 10),
+		.msg = "ooo_rdrsp_err_int_set"
+	}, {
+		.int_msk = GENMASK(21, 16),
+		.msg = "ooo_wrrsp_err_int_set"
+	}, {
+		.int_msk = BIT(22),
+		.msg = "pt_rng_timeout_int_set"
+	}, {
+		.int_msk = BIT(23),
+		.msg = "sva_fsm_timeout_int_set"
+	}, {
 		/* sentinel */
 	}
 };
-- 
GitLab


From 9201c0774c2203d5620eeb4f7cb872d7e33cbe75 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 12 May 2021 14:27:06 +0800
Subject: [PATCH 1264/3804] crypto: hisilicon/hpre - replace macro with inline
 function

Functional macro lacks type checking, which is not as strict as function
call checking.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c | 25 +++++++++++++++--------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 47a169ce2833f..1e7d1fb382ed0 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -87,11 +87,6 @@
 #define HPRE_QM_PM_FLR			BIT(11)
 #define HPRE_QM_SRIOV_FLR		BIT(12)
 
-#define HPRE_CLUSTERS_NUM(qm)		\
-	(((qm)->ver >= QM_HW_V3) ? HPRE_CLUSTERS_NUM_V3 : HPRE_CLUSTERS_NUM_V2)
-#define HPRE_CLUSTER_CORE_MASK(qm)	\
-	(((qm)->ver >= QM_HW_V3) ? HPRE_CLUSTER_CORE_MASK_V3 :\
-		HPRE_CLUSTER_CORE_MASK_V2)
 #define HPRE_VIA_MSI_DSM		1
 #define HPRE_SQE_MASK_OFFSET		8
 #define HPRE_SQE_MASK_LEN		24
@@ -251,6 +246,18 @@ static u32 vfs_num;
 module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444);
 MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
 
+static inline int hpre_cluster_num(struct hisi_qm *qm)
+{
+	return (qm->ver >= QM_HW_V3) ? HPRE_CLUSTERS_NUM_V3 :
+		HPRE_CLUSTERS_NUM_V2;
+}
+
+static inline int hpre_cluster_core_mask(struct hisi_qm *qm)
+{
+	return (qm->ver >= QM_HW_V3) ?
+		HPRE_CLUSTER_CORE_MASK_V3 : HPRE_CLUSTER_CORE_MASK_V2;
+}
+
 struct hisi_qp *hpre_create_qp(u8 type)
 {
 	int node = cpu_to_node(smp_processor_id());
@@ -317,8 +324,8 @@ static int hpre_cfg_by_dsm(struct hisi_qm *qm)
 
 static int hpre_set_cluster(struct hisi_qm *qm)
 {
-	u32 cluster_core_mask = HPRE_CLUSTER_CORE_MASK(qm);
-	u8 clusters_num = HPRE_CLUSTERS_NUM(qm);
+	u32 cluster_core_mask = hpre_cluster_core_mask(qm);
+	u8 clusters_num = hpre_cluster_num(qm);
 	struct device *dev = &qm->pdev->dev;
 	unsigned long offset;
 	u32 val = 0;
@@ -424,7 +431,7 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
 
 static void hpre_cnt_regs_clear(struct hisi_qm *qm)
 {
-	u8 clusters_num = HPRE_CLUSTERS_NUM(qm);
+	u8 clusters_num = hpre_cluster_num(qm);
 	unsigned long offset;
 	int i;
 
@@ -677,7 +684,7 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm)
 
 static int hpre_cluster_debugfs_init(struct hisi_qm *qm)
 {
-	u8 clusters_num = HPRE_CLUSTERS_NUM(qm);
+	u8 clusters_num = hpre_cluster_num(qm);
 	struct device *dev = &qm->pdev->dev;
 	char buf[HPRE_DBGFS_VAL_MAX_LEN];
 	struct debugfs_regset32 *regset;
-- 
GitLab


From b94c910afda050a9e95465ff0c4fe2548ea5ac0a Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 12 May 2021 14:27:07 +0800
Subject: [PATCH 1265/3804] crypto: hisilicon/hpre - remove the macro of
 'HPRE_DEV'

Remove complex macro of 'HPRE_DEV' and replace with the initialized
device pointer.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 53 ++++++++++-----------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index db00e9f763da0..3d0832b9c6134 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -30,7 +30,6 @@ struct hpre_ctx;
 #define HPRE_DH_G_FLAG		0x02
 #define HPRE_TRY_SEND_TIMES	100
 #define HPRE_INVLD_REQ_ID		(-1)
-#define HPRE_DEV(ctx)		(&((ctx)->qp->qm->pdev->dev))
 
 #define HPRE_SQE_ALG_BITS	5
 #define HPRE_SQE_DONE_SHIFT	30
@@ -102,6 +101,7 @@ struct hpre_curve25519_ctx {
 
 struct hpre_ctx {
 	struct hisi_qp *qp;
+	struct device *dev;
 	struct hpre_asym_request **req_list;
 	struct hpre *hpre;
 	spinlock_t req_lock;
@@ -214,8 +214,7 @@ static int hpre_get_data_dma_addr(struct hpre_asym_request *hpre_req,
 				  struct scatterlist *data, unsigned int len,
 				  int is_src, dma_addr_t *tmp)
 {
-	struct hpre_ctx *ctx = hpre_req->ctx;
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = hpre_req->ctx->dev;
 	enum dma_data_direction dma_dir;
 
 	if (is_src) {
@@ -239,7 +238,7 @@ static int hpre_prepare_dma_buf(struct hpre_asym_request *hpre_req,
 				int is_src, dma_addr_t *tmp)
 {
 	struct hpre_ctx *ctx = hpre_req->ctx;
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	void *ptr;
 	int shift;
 
@@ -293,7 +292,7 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx,
 				 struct scatterlist *dst,
 				 struct scatterlist *src)
 {
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	struct hpre_sqe *sqe = &req->req;
 	dma_addr_t tmp;
 
@@ -325,7 +324,6 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx,
 static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe,
 				void **kreq)
 {
-	struct device *dev = HPRE_DEV(ctx);
 	struct hpre_asym_request *req;
 	unsigned int err, done, alg;
 	int id;
@@ -350,7 +348,7 @@ static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe,
 		return 0;
 
 	alg = le32_to_cpu(sqe->dw0) & HREE_ALG_TYPE_MASK;
-	dev_err_ratelimited(dev, "alg[0x%x] error: done[0x%x], etype[0x%x]\n",
+	dev_err_ratelimited(ctx->dev, "alg[0x%x] error: done[0x%x], etype[0x%x]\n",
 		alg, done, err);
 
 	return -EINVAL;
@@ -365,6 +363,7 @@ static int hpre_ctx_set(struct hpre_ctx *ctx, struct hisi_qp *qp, int qlen)
 
 	spin_lock_init(&ctx->req_lock);
 	ctx->qp = qp;
+	ctx->dev = &qp->qm->pdev->dev;
 
 	hpre = container_of(ctx->qp->qm, struct hpre, qm);
 	ctx->hpre = hpre;
@@ -631,7 +630,7 @@ static int hpre_is_dh_params_length_valid(unsigned int key_sz)
 
 static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params)
 {
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	unsigned int sz;
 
 	if (params->p_size > HPRE_DH_MAX_P_SZ)
@@ -670,7 +669,7 @@ static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params)
 
 static void hpre_dh_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all)
 {
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	unsigned int sz = ctx->key_sz;
 
 	if (is_clear_all)
@@ -883,18 +882,18 @@ static int hpre_rsa_set_n(struct hpre_ctx *ctx, const char *value,
 	if (!hpre_rsa_key_size_is_support(ctx->key_sz))
 		return 0;
 
-	ctx->rsa.pubkey = dma_alloc_coherent(HPRE_DEV(ctx), vlen << 1,
+	ctx->rsa.pubkey = dma_alloc_coherent(ctx->dev, vlen << 1,
 					     &ctx->rsa.dma_pubkey,
 					     GFP_KERNEL);
 	if (!ctx->rsa.pubkey)
 		return -ENOMEM;
 
 	if (private) {
-		ctx->rsa.prikey = dma_alloc_coherent(HPRE_DEV(ctx), vlen << 1,
+		ctx->rsa.prikey = dma_alloc_coherent(ctx->dev, vlen << 1,
 						     &ctx->rsa.dma_prikey,
 						     GFP_KERNEL);
 		if (!ctx->rsa.prikey) {
-			dma_free_coherent(HPRE_DEV(ctx), vlen << 1,
+			dma_free_coherent(ctx->dev, vlen << 1,
 					  ctx->rsa.pubkey,
 					  ctx->rsa.dma_pubkey);
 			ctx->rsa.pubkey = NULL;
@@ -956,7 +955,7 @@ static int hpre_crt_para_get(char *para, size_t para_sz,
 static int hpre_rsa_setkey_crt(struct hpre_ctx *ctx, struct rsa_key *rsa_key)
 {
 	unsigned int hlf_ksz = ctx->key_sz >> 1;
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	u64 offset;
 	int ret;
 
@@ -1014,7 +1013,7 @@ free_key:
 static void hpre_rsa_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all)
 {
 	unsigned int half_key_sz = ctx->key_sz >> 1;
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 
 	if (is_clear_all)
 		hisi_qm_stop_qp(ctx->qp);
@@ -1185,7 +1184,7 @@ static void hpre_key_to_big_end(u8 *data, int len)
 static void hpre_ecc_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all,
 			       bool is_ecdh)
 {
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	unsigned int sz = ctx->key_sz;
 	unsigned int shift = sz << 1;
 
@@ -1287,7 +1286,7 @@ static unsigned int hpre_ecdh_get_curvesz(unsigned short id)
 
 static int hpre_ecdh_set_param(struct hpre_ctx *ctx, struct ecdh *params)
 {
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	unsigned int sz, shift, curve_sz;
 	int ret;
 
@@ -1338,7 +1337,7 @@ static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 				unsigned int len)
 {
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	unsigned int sz, sz_shift;
 	struct ecdh params;
 	int ret;
@@ -1373,7 +1372,7 @@ static void hpre_ecdh_hw_data_clr_all(struct hpre_ctx *ctx,
 				      struct scatterlist *dst,
 				      struct scatterlist *src)
 {
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	struct hpre_sqe *sqe = &req->req;
 	dma_addr_t dma;
 
@@ -1462,7 +1461,7 @@ static int hpre_ecdh_src_data_init(struct hpre_asym_request *hpre_req,
 {
 	struct hpre_sqe *msg = &hpre_req->req;
 	struct hpre_ctx *ctx = hpre_req->ctx;
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	unsigned int tmpshift;
 	dma_addr_t dma = 0;
 	void *ptr;
@@ -1492,7 +1491,7 @@ static int hpre_ecdh_dst_data_init(struct hpre_asym_request *hpre_req,
 {
 	struct hpre_sqe *msg = &hpre_req->req;
 	struct hpre_ctx *ctx = hpre_req->ctx;
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	dma_addr_t dma = 0;
 
 	if (unlikely(!data || !sg_is_last(data) || len != ctx->key_sz << 1)) {
@@ -1515,7 +1514,7 @@ static int hpre_ecdh_compute_value(struct kpp_request *req)
 {
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	void *tmp = kpp_request_ctx(req);
 	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
 	struct hpre_sqe *msg = &hpre_req->req;
@@ -1621,7 +1620,7 @@ static void hpre_curve25519_fill_curve(struct hpre_ctx *ctx, const void *buf,
 static int hpre_curve25519_set_param(struct hpre_ctx *ctx, const void *buf,
 				     unsigned int len)
 {
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	unsigned int sz = ctx->key_sz;
 	unsigned int shift = sz << 1;
 
@@ -1646,7 +1645,7 @@ static int hpre_curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
 				      unsigned int len)
 {
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	int ret = -EINVAL;
 
 	if (len != CURVE25519_KEY_SIZE ||
@@ -1674,7 +1673,7 @@ static void hpre_curve25519_hw_data_clr_all(struct hpre_ctx *ctx,
 					    struct scatterlist *dst,
 					    struct scatterlist *src)
 {
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	struct hpre_sqe *sqe = &req->req;
 	dma_addr_t dma;
 
@@ -1770,7 +1769,7 @@ static int hpre_curve25519_src_init(struct hpre_asym_request *hpre_req,
 {
 	struct hpre_sqe *msg = &hpre_req->req;
 	struct hpre_ctx *ctx = hpre_req->ctx;
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	u8 p[CURVE25519_KEY_SIZE] = { 0 };
 	const struct ecc_curve *curve;
 	dma_addr_t dma = 0;
@@ -1825,7 +1824,7 @@ static int hpre_curve25519_dst_init(struct hpre_asym_request *hpre_req,
 {
 	struct hpre_sqe *msg = &hpre_req->req;
 	struct hpre_ctx *ctx = hpre_req->ctx;
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	dma_addr_t dma = 0;
 
 	if (!data || !sg_is_last(data) || len != ctx->key_sz) {
@@ -1848,7 +1847,7 @@ static int hpre_curve25519_compute_value(struct kpp_request *req)
 {
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
-	struct device *dev = HPRE_DEV(ctx);
+	struct device *dev = ctx->dev;
 	void *tmp = kpp_request_ctx(req);
 	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
 	struct hpre_sqe *msg = &hpre_req->req;
-- 
GitLab


From 58be5ce3461e3fd623091d0bdc1080d0e4df2859 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 12 May 2021 14:27:08 +0800
Subject: [PATCH 1266/3804] crypto: hisilicon/hpre - delete rudundant
 initialization

Delete rudundant variable initialization.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 3d0832b9c6134..bc93cc926e225 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -1492,7 +1492,7 @@ static int hpre_ecdh_dst_data_init(struct hpre_asym_request *hpre_req,
 	struct hpre_sqe *msg = &hpre_req->req;
 	struct hpre_ctx *ctx = hpre_req->ctx;
 	struct device *dev = ctx->dev;
-	dma_addr_t dma = 0;
+	dma_addr_t dma;
 
 	if (unlikely(!data || !sg_is_last(data) || len != ctx->key_sz << 1)) {
 		dev_err(dev, "data or data length is illegal!\n");
@@ -1825,7 +1825,7 @@ static int hpre_curve25519_dst_init(struct hpre_asym_request *hpre_req,
 	struct hpre_sqe *msg = &hpre_req->req;
 	struct hpre_ctx *ctx = hpre_req->ctx;
 	struct device *dev = ctx->dev;
-	dma_addr_t dma = 0;
+	dma_addr_t dma;
 
 	if (!data || !sg_is_last(data) || len != ctx->key_sz) {
 		dev_err(dev, "data or data length is illegal!\n");
-- 
GitLab


From 82119db8cacb3921ab95e3f078c08c4bffacef15 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 12 May 2021 14:27:09 +0800
Subject: [PATCH 1267/3804] crypto: hisilicon/hpre - use 'GENMASK' to generate
 mask value

Use 'GENMASK' to generate mask value, just make the code clearer.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c |  6 +++---
 drivers/crypto/hisilicon/hpre/hpre_main.c   | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index bc93cc926e225..7449632986c5f 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -330,9 +330,9 @@ static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe,
 
 #define HPRE_NO_HW_ERR		0
 #define HPRE_HW_TASK_DONE	3
-#define HREE_HW_ERR_MASK	0x7ff
-#define HREE_SQE_DONE_MASK	0x3
-#define HREE_ALG_TYPE_MASK	0x1f
+#define HREE_HW_ERR_MASK	GENMASK(10, 0)
+#define HREE_SQE_DONE_MASK	GENMASK(1, 0)
+#define HREE_ALG_TYPE_MASK	GENMASK(4, 0)
 	id = (int)le16_to_cpu(sqe->tag);
 	req = ctx->req_list[id];
 	hpre_rm_req_from_ctx(req);
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 1e7d1fb382ed0..46c24f90693b3 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -36,7 +36,7 @@
 #define HPRE_INT_MASK			0x301400
 #define HPRE_INT_STATUS			0x301800
 #define HPRE_CORE_INT_ENABLE		0
-#define HPRE_CORE_INT_DISABLE		0x003fffff
+#define HPRE_CORE_INT_DISABLE		GENMASK(21, 0)
 #define HPRE_RDCHN_INI_ST		0x301a00
 #define HPRE_CLSTR_BASE			0x302000
 #define HPRE_CORE_EN_OFFSET		0x04
@@ -69,12 +69,12 @@
 #define HPRE_DBGFS_VAL_MAX_LEN		20
 #define HPRE_PCI_DEVICE_ID		0xa258
 #define HPRE_PCI_VF_DEVICE_ID		0xa259
-#define HPRE_QM_USR_CFG_MASK		0xfffffffe
-#define HPRE_QM_AXI_CFG_MASK		0xffff
-#define HPRE_QM_VFG_AX_MASK		0xff
-#define HPRE_BD_USR_MASK		0x3
-#define HPRE_CLUSTER_CORE_MASK_V2	0xf
-#define HPRE_CLUSTER_CORE_MASK_V3	0xff
+#define HPRE_QM_USR_CFG_MASK		GENMASK(31, 1)
+#define HPRE_QM_AXI_CFG_MASK		GENMASK(15, 0)
+#define HPRE_QM_VFG_AX_MASK		GENMASK(7, 0)
+#define HPRE_BD_USR_MASK		GENMASK(1, 0)
+#define HPRE_CLUSTER_CORE_MASK_V2	GENMASK(3, 0)
+#define HPRE_CLUSTER_CORE_MASK_V3	GENMASK(7, 0)
 
 #define HPRE_AM_OOO_SHUTDOWN_ENB	0x301044
 #define HPRE_AM_OOO_SHUTDOWN_ENABLE	BIT(0)
-- 
GitLab


From 0c176d8d7d970db6fed82db3495a73d10d2251fb Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 12 May 2021 14:27:10 +0800
Subject: [PATCH 1268/3804] crypto: hisilicon/hpre - delete rudundant macro
 definition

Delete rudundant macro definition.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 46c24f90693b3..31515ae054f8a 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -57,7 +57,6 @@
 #define HPRE_CORE_INI_STATUS (HPRE_CLSTR_BASE + HPRE_CORE_INI_STATUS_OFFSET)
 #define HPRE_HAC_ECC1_CNT		0x301a04
 #define HPRE_HAC_ECC2_CNT		0x301a08
-#define HPRE_HAC_INT_STATUS		0x301800
 #define HPRE_HAC_SOURCE_INT		0x301600
 #define HPRE_CLSTR_ADDR_INTRVL		0x1000
 #define HPRE_CLUSTER_INQURY		0x100
@@ -822,7 +821,7 @@ static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts)
 
 static u32 hpre_get_hw_err_status(struct hisi_qm *qm)
 {
-	return readl(qm->io_base + HPRE_HAC_INT_STATUS);
+	return readl(qm->io_base + HPRE_INT_STATUS);
 }
 
 static void hpre_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts)
-- 
GitLab


From 302e909cb22b5456ae71a9fd54b98ee0e6505613 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 12 May 2021 14:27:11 +0800
Subject: [PATCH 1269/3804] crypto: hisilicon/hpre - add 'default' for switch
 statement

Return error immediately if it goto 'default' path.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 7449632986c5f..294c3688aabb0 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -623,9 +623,9 @@ static int hpre_is_dh_params_length_valid(unsigned int key_sz)
 	case _HPRE_DH_GRP15:
 	case _HPRE_DH_GRP16:
 		return 0;
+	default:
+		return -EINVAL;
 	}
-
-	return -EINVAL;
 }
 
 static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params)
-- 
GitLab


From d06aca989c243dd9e5d3e20aa4e5c2ecfdd07050 Mon Sep 17 00:00:00 2001
From: Joe Richey <joerichey@google.com>
Date: Fri, 21 May 2021 01:58:42 -0700
Subject: [PATCH 1270/3804] x86/elf: Use _BITUL() macro in UAPI headers

Replace BIT() in x86's UAPI header with _BITUL(). BIT() is not defined
in the UAPI headers and its usage may cause userspace build errors.

Fixes: 742c45c3ecc9 ("x86/elf: Enumerate kernel FSGSBASE capability in AT_HWCAP2")
Signed-off-by: Joe Richey <joerichey@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210521085849.37676-2-joerichey94@gmail.com
---
 arch/x86/include/uapi/asm/hwcap2.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h
index 5fdfcb47000f9..054604aba9f00 100644
--- a/arch/x86/include/uapi/asm/hwcap2.h
+++ b/arch/x86/include/uapi/asm/hwcap2.h
@@ -2,10 +2,12 @@
 #ifndef _ASM_X86_HWCAP2_H
 #define _ASM_X86_HWCAP2_H
 
+#include <linux/const.h>
+
 /* MONITOR/MWAIT enabled in Ring 3 */
-#define HWCAP2_RING3MWAIT		(1 << 0)
+#define HWCAP2_RING3MWAIT		_BITUL(0)
 
 /* Kernel allows FSGSBASE instructions available in Ring 3 */
-#define HWCAP2_FSGSBASE			BIT(1)
+#define HWCAP2_FSGSBASE			_BITUL(1)
 
 #endif
-- 
GitLab


From 2ade8fc65076095460e3ea1ca65a8f619d7d9a3a Mon Sep 17 00:00:00 2001
From: David Bartley <andareed@gmail.com>
Date: Thu, 20 May 2021 10:41:30 -0700
Subject: [PATCH 1271/3804] x86/amd_nb: Add AMD family 19h model 50h PCI ids

This is required to support Zen3 APUs in k10temp.

Signed-off-by: David Bartley <andareed@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Wei Huang <wei.huang2@amd.com>
Link: https://lkml.kernel.org/r/20210520174130.94954-1-andareed@gmail.com
---
 arch/x86/kernel/amd_nb.c | 3 +++
 include/linux/pci_ids.h  | 1 +
 2 files changed, 4 insertions(+)

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 09083094eb575..23dda362dc0f3 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -25,6 +25,7 @@
 #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c
 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
 #define PCI_DEVICE_ID_AMD_19H_DF_F4	0x1654
+#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
 
 /* Protect the PCI config register pairs used for SMN and DF indirect access. */
 static DEFINE_MUTEX(smn_mutex);
@@ -57,6 +58,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
 	{}
 };
 
@@ -72,6 +74,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{}
 };
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4c3fa5293d763..5356ccf1c275b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -555,6 +555,7 @@
 #define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b
 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443
 #define PCI_DEVICE_ID_AMD_19H_DF_F3	0x1653
+#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F3 0x166d
 #define PCI_DEVICE_ID_AMD_CNB17H_F3	0x1703
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
-- 
GitLab


From f1b7d45d3f8f3e18e190e71cb54d4b1917300d1d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:49 -0700
Subject: [PATCH 1272/3804] x86/irq: Remove unused vectors defines

UV_BAU_MESSAGE is defined but not used anywhere in the kernel. Presumably
this is a stale vector number that can be reclaimed.

MCE_VECTOR is not an actual vector: #MC is an exception, not an interrupt
vector, and as such is correctly described as X86_TRAP_MC. MCE_VECTOR is
not used anywhere is the kernel.

Note that NMI_VECTOR *is* used; specifically it is the vector number
programmed into the APIC LVT when an NMI interrupt is configured. At
the moment it is always numerically identical to X86_TRAP_NMI, that is
not necessarily going to be the case indefinitely.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Steve Wahl <steve.wahl@hpe.com>
Link: https://lore.kernel.org/r/20210519212154.511983-4-hpa@zytor.com
---
 arch/x86/include/asm/irq_vectors.h       | 4 ++--
 tools/arch/x86/include/asm/irq_vectors.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 889f8b1b5b7f9..dc71b781be422 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -26,8 +26,8 @@
  * This file enumerates the exact layout of them:
  */
 
+/* This is used as an interrupt vector when programming the APIC. */
 #define NMI_VECTOR			0x02
-#define MCE_VECTOR			0x12
 
 /*
  * IDT vectors usable for external interrupt sources start at 0x20.
@@ -84,7 +84,7 @@
  */
 #define IRQ_WORK_VECTOR			0xf6
 
-#define UV_BAU_MESSAGE			0xf5
+/* 0xf5 - unused, was UV_BAU_MESSAGE */
 #define DEFERRED_ERROR_VECTOR		0xf4
 
 /* Vector on which hypervisor callbacks will be delivered */
diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h
index 889f8b1b5b7f9..dc71b781be422 100644
--- a/tools/arch/x86/include/asm/irq_vectors.h
+++ b/tools/arch/x86/include/asm/irq_vectors.h
@@ -26,8 +26,8 @@
  * This file enumerates the exact layout of them:
  */
 
+/* This is used as an interrupt vector when programming the APIC. */
 #define NMI_VECTOR			0x02
-#define MCE_VECTOR			0x12
 
 /*
  * IDT vectors usable for external interrupt sources start at 0x20.
@@ -84,7 +84,7 @@
  */
 #define IRQ_WORK_VECTOR			0xf6
 
-#define UV_BAU_MESSAGE			0xf5
+/* 0xf5 - unused, was UV_BAU_MESSAGE */
 #define DEFERRED_ERROR_VECTOR		0xf4
 
 /* Vector on which hypervisor callbacks will be delivered */
-- 
GitLab


From ff851003880de9d1111498877551ba16668c38ef Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:48 -0700
Subject: [PATCH 1273/3804] x86/irq: Add and use NR_EXTERNAL_VECTORS and
 NR_SYSTEM_VECTORS

Add defines for the number of external vectors and number of system
vectors instead of requiring the use of (FIRST_SYSTEM_VECTOR -
FIRST_EXTERNAL_VECTOR) and (NR_VECTORS - FIRST_SYSTEM_VECTOR)
respectively. Clean up the usage sites.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Link: https://lore.kernel.org/r/20210519212154.511983-3-hpa@zytor.com
---
 arch/x86/include/asm/idtentry.h          | 4 ++--
 arch/x86/include/asm/irq_vectors.h       | 3 +++
 tools/arch/x86/include/asm/irq_vectors.h | 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 73d45b0dfff2d..c03a18cac78ef 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -504,7 +504,7 @@ __visible noinstr void func(struct pt_regs *regs,			\
 	.align 8
 SYM_CODE_START(irq_entries_start)
     vector=FIRST_EXTERNAL_VECTOR
-    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+    .rept NR_EXTERNAL_VECTORS
 	UNWIND_HINT_IRET_REGS
 0 :
 	.byte	0x6a, vector
@@ -520,7 +520,7 @@ SYM_CODE_END(irq_entries_start)
 	.align 8
 SYM_CODE_START(spurious_entries_start)
     vector=FIRST_SYSTEM_VECTOR
-    .rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
+    .rept NR_SYSTEM_VECTORS
 	UNWIND_HINT_IRET_REGS
 0 :
 	.byte	0x6a, vector
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index dc71b781be422..43dcb92842088 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -114,6 +114,9 @@
 #define FIRST_SYSTEM_VECTOR		NR_VECTORS
 #endif
 
+#define NR_EXTERNAL_VECTORS		(FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+#define NR_SYSTEM_VECTORS		(NR_VECTORS - FIRST_SYSTEM_VECTOR)
+
 /*
  * Size the maximum number of interrupts.
  *
diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h
index dc71b781be422..43dcb92842088 100644
--- a/tools/arch/x86/include/asm/irq_vectors.h
+++ b/tools/arch/x86/include/asm/irq_vectors.h
@@ -114,6 +114,9 @@
 #define FIRST_SYSTEM_VECTOR		NR_VECTORS
 #endif
 
+#define NR_EXTERNAL_VECTORS		(FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+#define NR_SYSTEM_VECTORS		(NR_VECTORS - FIRST_SYSTEM_VECTOR)
+
 /*
  * Size the maximum number of interrupts.
  *
-- 
GitLab


From 8ec9069a432c873e52e6f4ce1496f282a4299604 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:50 -0700
Subject: [PATCH 1274/3804] x86/idt: Remove address argument from
 idt_invalidate()

There is no reason to specify any specific address to idt_invalidate(). It
looks mostly like an artifact of unifying code done differently by
accident. The most "sensible" address to set here is a NULL pointer -
virtual address zero, just as a visual marker.

This also makes it possible to mark the struct desc_ptr in idt_invalidate()
as static const.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210519212154.511983-5-hpa@zytor.com
---
 arch/x86/include/asm/desc.h        | 2 +-
 arch/x86/kernel/idt.c              | 5 ++---
 arch/x86/kernel/machine_kexec_32.c | 2 +-
 arch/x86/kernel/reboot.c           | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 476082a83d1c1..b8429ae50b71c 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -427,6 +427,6 @@ static inline void idt_setup_early_pf(void) { }
 static inline void idt_setup_ist_traps(void) { }
 #endif
 
-extern void idt_invalidate(void *addr);
+extern void idt_invalidate(void);
 
 #endif /* _ASM_X86_DESC_H */
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index d552f177eca0e..2779f5226dc21 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -331,11 +331,10 @@ void __init idt_setup_early_handler(void)
 
 /**
  * idt_invalidate - Invalidate interrupt descriptor table
- * @addr:	The virtual address of the 'invalid' IDT
  */
-void idt_invalidate(void *addr)
+void idt_invalidate(void)
 {
-	struct desc_ptr idt = { .address = (unsigned long) addr, .size = 0 };
+	static const struct desc_ptr idt = { .address = 0, .size = 0 };
 
 	load_idt(&idt);
 }
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 64b00b0d7fe80..1e34feebcd5de 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -232,7 +232,7 @@ void machine_kexec(struct kimage *image)
 	 * The gdt & idt are now invalid.
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
-	idt_invalidate(phys_to_virt(0));
+	idt_invalidate();
 	set_gdt(phys_to_virt(0), 0);
 
 	/* now call it */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index b29657b76e3fa..ebfb911082326 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -669,7 +669,7 @@ static void native_machine_emergency_restart(void)
 			break;
 
 		case BOOT_TRIPLE:
-			idt_invalidate(NULL);
+			idt_invalidate();
 			__asm__ __volatile__("int3");
 
 			/* We're probably dead after this, but... */
-- 
GitLab


From 283fa3b6483a84aeb62f1b97c2ec7c02eb2f5882 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:51 -0700
Subject: [PATCH 1275/3804] x86: Add native_[ig]dt_invalidate()

In some places, the native forms of descriptor table invalidation is
required. Rather than open-coding them, add explicitly native functions to
invalidate the GDT and IDT.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210519212154.511983-6-hpa@zytor.com
---
 arch/x86/include/asm/desc.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index b8429ae50b71c..400c178628709 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -224,6 +224,26 @@ static inline void store_idt(struct desc_ptr *dtr)
 	asm volatile("sidt %0":"=m" (*dtr));
 }
 
+static inline void native_gdt_invalidate(void)
+{
+	const struct desc_ptr invalid_gdt = {
+		.address = 0,
+		.size = 0
+	};
+
+	native_load_gdt(&invalid_gdt);
+}
+
+static inline void native_idt_invalidate(void)
+{
+	const struct desc_ptr invalid_idt = {
+		.address = 0,
+		.size = 0
+	};
+
+	native_load_idt(&invalid_idt);
+}
+
 /*
  * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
  * a read-only remapping. To prevent a page fault, the GDT is switched to the
-- 
GitLab


From 056c52f5e824c050c58fd27ea6d717cba32239c2 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Wed, 19 May 2021 14:21:52 -0700
Subject: [PATCH 1276/3804] x86/kexec: Set_[gi]dt() ->
 native_[gi]dt_invalidate() in machine_kexec_*.c

These files contain private set_gdt() functions which are only used to
invalid the gdt; machine_kexec_64.c also contains a set_idt()
function to invalidate the idt.

phys_to_virt(0) *really* doesn't make any sense for creating an
invalid GDT. A NULL pointer (virtual 0) makes a lot more sense;
although neither will allow any actual memory reference, a NULL
pointer stands out more.

Replace these calls with native_[gi]dt_invalidate().

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210519212154.511983-7-hpa@zytor.com
---
 arch/x86/kernel/machine_kexec_32.c | 15 ++------------
 arch/x86/kernel/machine_kexec_64.c | 33 ++----------------------------
 2 files changed, 4 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 1e34feebcd5de..1b373d79cedc4 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -23,17 +23,6 @@
 #include <asm/set_memory.h>
 #include <asm/debugreg.h>
 
-static void set_gdt(void *newgdt, __u16 limit)
-{
-	struct desc_ptr curgdt;
-
-	/* ia32 supports unaligned loads & stores */
-	curgdt.size    = limit;
-	curgdt.address = (unsigned long)newgdt;
-
-	load_gdt(&curgdt);
-}
-
 static void load_segments(void)
 {
 #define __STR(X) #X
@@ -232,8 +221,8 @@ void machine_kexec(struct kimage *image)
 	 * The gdt & idt are now invalid.
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
-	idt_invalidate();
-	set_gdt(phys_to_virt(0), 0);
+	native_idt_invalidate();
+	native_gdt_invalidate();
 
 	/* now call it */
 	image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c078b0d3ab0ee..131f30fdcfbdc 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -256,35 +256,6 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 	return init_transition_pgtable(image, level4p);
 }
 
-static void set_idt(void *newidt, u16 limit)
-{
-	struct desc_ptr curidt;
-
-	/* x86-64 supports unaligned loads & stores */
-	curidt.size    = limit;
-	curidt.address = (unsigned long)newidt;
-
-	__asm__ __volatile__ (
-		"lidtq %0\n"
-		: : "m" (curidt)
-		);
-};
-
-
-static void set_gdt(void *newgdt, u16 limit)
-{
-	struct desc_ptr curgdt;
-
-	/* x86-64 supports unaligned loads & stores */
-	curgdt.size    = limit;
-	curgdt.address = (unsigned long)newgdt;
-
-	__asm__ __volatile__ (
-		"lgdtq %0\n"
-		: : "m" (curgdt)
-		);
-};
-
 static void load_segments(void)
 {
 	__asm__ __volatile__ (
@@ -379,8 +350,8 @@ void machine_kexec(struct kimage *image)
 	 * The gdt & idt are now invalid.
 	 * If you want to load them you must set up your own idt & gdt.
 	 */
-	set_gdt(phys_to_virt(0), 0);
-	set_idt(phys_to_virt(0), 0);
+	native_idt_invalidate();
+	native_gdt_invalidate();
 
 	/* now call it */
 	image->start = relocate_kernel((unsigned long)image->head,
-- 
GitLab


From 3b2f17ad1770e51b8b4e68b5069c4f1ee477eff8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 19 May 2021 13:50:31 -0300
Subject: [PATCH 1277/3804] perf parse-events: Check if the software events
 array slots are populated

To avoid a NULL pointer dereference when the kernel supports the new
feature but the tooling still hasn't an entry for it.

This happened with the recently added PERF_COUNT_SW_CGROUP_SWITCHES
software event.

Reported-by: Thomas Richter <tmricht@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/linux-perf-users/YKVESEKRjKtILhog@kernel.org/
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 269997066f6e4..84108c17f48d4 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2932,9 +2932,14 @@ restart:
 	}
 
 	for (i = 0; i < max; i++, syms++) {
+		/*
+		 * New attr.config still not supported here, the latest
+		 * example was PERF_COUNT_SW_CGROUP_SWITCHES
+		 */
+		if (syms->symbol == NULL)
+			continue;
 
-		if (event_glob != NULL && syms->symbol != NULL &&
-		    !(strglobmatch(syms->symbol, event_glob) ||
+		if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) ||
 		      (syms->alias && strglobmatch(syms->alias, event_glob))))
 			continue;
 
-- 
GitLab


From af2702549d68519ac78228e915d9b2c199056787 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Wed, 19 May 2021 18:48:07 -0700
Subject: [PATCH 1278/3804] ASoC: qcom: lpass-cpu: Use optional clk APIs

This driver spits out a warning for me at boot:

 sc7180-lpass-cpu 62f00000.lpass: asoc_qcom_lpass_cpu_platform_probe() error getting optional null: -2

but it looks like it is all an optional clk. Use the optional clk APIs
here so that we don't see this message and everything else is the same.

Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Cc: Banajit Goswami <bgoswami@codeaurora.org>
Fixes: 3e53ac8230c1 ("ASoC: qcom: make osr clock optional")
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20210520014807.3749797-1-swboyd@chromium.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/qcom/lpass-cpu.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c
index c62d2612e8f5e..28c7497344e33 100644
--- a/sound/soc/qcom/lpass-cpu.c
+++ b/sound/soc/qcom/lpass-cpu.c
@@ -835,18 +835,8 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev)
 		if (dai_id == LPASS_DP_RX)
 			continue;
 
-		drvdata->mi2s_osr_clk[dai_id] = devm_clk_get(dev,
+		drvdata->mi2s_osr_clk[dai_id] = devm_clk_get_optional(dev,
 					     variant->dai_osr_clk_names[i]);
-		if (IS_ERR(drvdata->mi2s_osr_clk[dai_id])) {
-			dev_warn(dev,
-				"%s() error getting optional %s: %ld\n",
-				__func__,
-				variant->dai_osr_clk_names[i],
-				PTR_ERR(drvdata->mi2s_osr_clk[dai_id]));
-
-			drvdata->mi2s_osr_clk[dai_id] = NULL;
-		}
-
 		drvdata->mi2s_bit_clk[dai_id] = devm_clk_get(dev,
 						variant->dai_bit_clk_names[i]);
 		if (IS_ERR(drvdata->mi2s_bit_clk[dai_id])) {
-- 
GitLab


From bda7db1d952c3ff7c24c11bc295aa72aaeb98451 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 20 May 2021 16:12:37 +0300
Subject: [PATCH 1279/3804] spi: sc18is602: don't consider the chip select byte
 in sc18is602_check_transfer

For each spi_message, the sc18is602 I2C-to-SPI bridge driver checks the
length of each spi_transfer against 200 (the size of the chip's internal
buffer) minus hw->tlen (the number of bytes transferred so far).

The first byte of the transferred data is the Function ID (the SPI
slave's chip select) and as per the documentation of the chip:
https://www.nxp.com/docs/en/data-sheet/SC18IS602B.pdf
the data buffer is up to 200 bytes deep _without_ accounting for the
Function ID byte.

However, in sc18is602_txrx(), the driver keeps the Function ID as part
of the buffer, and increments hw->tlen from 0 to 1. Combined with the
check in sc18is602_check_transfer, this prevents us from issuing a
transfer that has exactly 200 bytes in size, but only 199.

Adjust the check function to reflect that the Function ID is not part of
the 200 byte deep data buffer.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20210520131238.2903024-2-olteanv@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sc18is602.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 297c512069a57..37871edc79629 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -174,7 +174,7 @@ static int sc18is602_setup_transfer(struct sc18is602 *hw, u32 hz, u8 mode)
 static int sc18is602_check_transfer(struct spi_device *spi,
 				    struct spi_transfer *t, int tlen)
 {
-	if (t && t->len + tlen > SC18IS602_BUFSIZ)
+	if (t && t->len + tlen > SC18IS602_BUFSIZ + 1)
 		return -EINVAL;
 
 	return 0;
-- 
GitLab


From b4e46c9954ad55092502e1e8c44ceb9b6744bade Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 20 May 2021 16:12:38 +0300
Subject: [PATCH 1280/3804] spi: sc18is602: implement
 .max_{transfer,message}_size() for the controller

Allow SPI peripherals attached to this controller to know what is the
maximum transfer size and message size, so they can limit their transfer
lengths properly in case they are otherwise capable of larger transfer
sizes. For the sc18is602, this is 200 bytes in both cases, since as far
as I understand, it isn't possible to tell the controller to keep the
chip select asserted after the STOP command is sent.

The controller can support SPI messages larger than 200 bytes if
cs_change is set for individual transfers such that the portions with
chip select asserted are never longer than 200 bytes. What is not
supported is just SPI messages with a continuous chip select larger than
200. I don't think it is possible to express this using the current API,
so drivers which do send SPI messages with cs_change can safely just
look at the max_transfer_size limit.

An example of user for this is sja1105_xfer() in
drivers/net/dsa/sja1105/sja1105_spi.c which sends by default 64 * 4 =
256 byte transfers.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20210520131238.2903024-3-olteanv@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sc18is602.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 37871edc79629..5d27ee4822376 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -219,6 +219,11 @@ static int sc18is602_transfer_one(struct spi_master *master,
 	return status;
 }
 
+static size_t sc18is602_max_transfer_size(struct spi_device *spi)
+{
+	return SC18IS602_BUFSIZ;
+}
+
 static int sc18is602_setup(struct spi_device *spi)
 {
 	struct sc18is602 *hw = spi_master_get_devdata(spi->master);
@@ -293,6 +298,8 @@ static int sc18is602_probe(struct i2c_client *client,
 	master->bits_per_word_mask = SPI_BPW_MASK(8);
 	master->setup = sc18is602_setup;
 	master->transfer_one_message = sc18is602_transfer_one;
+	master->max_transfer_size = sc18is602_max_transfer_size;
+	master->max_message_size = sc18is602_max_transfer_size;
 	master->dev.of_node = np;
 	master->min_speed_hz = hw->freq / 128;
 	master->max_speed_hz = hw->freq / 4;
-- 
GitLab


From dbfac814bb73624613f47d6e70391053ab6b8960 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 21 May 2021 17:47:45 +0800
Subject: [PATCH 1281/3804] spi: pxa2xx: Fix inconsistent indenting

Eliminate the follow smatch warning:

drivers/spi/spi-pxa2xx-pci.c:260 pxa2xx_spi_pci_probe() warn:
inconsistent indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/1621590465-73594-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 9c9992d4f5475..2e134eb4bd2c9 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -257,7 +257,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	snprintf(buf, sizeof(buf), "pxa2xx-spi.%d", ssp->port_id);
 	ssp->clk = clk_register_fixed_rate(&dev->dev, buf, NULL, 0,
 					   c->max_clk_rate);
-	 if (IS_ERR(ssp->clk))
+	if (IS_ERR(ssp->clk))
 		return PTR_ERR(ssp->clk);
 
 	memset(&pi, 0, sizeof(pi));
-- 
GitLab


From 4f2629ea67e7225c3fd292c7fe4f5b3c9d6392de Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 18 May 2021 16:18:35 -0400
Subject: [PATCH 1282/3804] USB: usbfs: Don't WARN about excessively large
 memory allocations

Syzbot found that the kernel generates a WARNing if the user tries to
submit a bulk transfer through usbfs with a buffer that is way too
large.  This isn't a bug in the kernel; it's merely an invalid request
from the user and the usbfs code does handle it correctly.

In theory the same thing can happen with async transfers, or with the
packet descriptor table for isochronous transfers.

To prevent the MM subsystem from complaining about these bad
allocation requests, add the __GFP_NOWARN flag to the kmalloc calls
for these buffers.

CC: Andrew Morton <akpm@linux-foundation.org>
CC: <stable@vger.kernel.org>
Reported-and-tested-by: syzbot+882a85c0c8ec4a3e2281@syzkaller.appspotmail.com
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/20210518201835.GA1140918@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devio.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 533236366a03b..2218941d35a3f 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1218,7 +1218,12 @@ static int do_proc_bulk(struct usb_dev_state *ps,
 	ret = usbfs_increase_memory_usage(len1 + sizeof(struct urb));
 	if (ret)
 		return ret;
-	tbuf = kmalloc(len1, GFP_KERNEL);
+
+	/*
+	 * len1 can be almost arbitrarily large.  Don't WARN if it's
+	 * too big, just fail the request.
+	 */
+	tbuf = kmalloc(len1, GFP_KERNEL | __GFP_NOWARN);
 	if (!tbuf) {
 		ret = -ENOMEM;
 		goto done;
@@ -1696,7 +1701,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 	if (num_sgs) {
 		as->urb->sg = kmalloc_array(num_sgs,
 					    sizeof(struct scatterlist),
-					    GFP_KERNEL);
+					    GFP_KERNEL | __GFP_NOWARN);
 		if (!as->urb->sg) {
 			ret = -ENOMEM;
 			goto error;
@@ -1731,7 +1736,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
 					(uurb_start - as->usbm->vm_start);
 		} else {
 			as->urb->transfer_buffer = kmalloc(uurb->buffer_length,
-					GFP_KERNEL);
+					GFP_KERNEL | __GFP_NOWARN);
 			if (!as->urb->transfer_buffer) {
 				ret = -ENOMEM;
 				goto error;
-- 
GitLab


From 25dda9fc56bd90d45f9a4516bcfa5211e61b4290 Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Wed, 12 May 2021 20:17:09 -0700
Subject: [PATCH 1283/3804] usb: dwc3: gadget: Properly track pending and
 queued SG

The driver incorrectly uses req->num_pending_sgs to track both the
number of pending and queued SG entries. It only prepares the next
request if the previous is done, and it doesn't update num_pending_sgs
until there is TRB completion interrupt. This may starve the controller
of more TRBs until the num_pending_sgs is decremented.

Fix this by decrementing the num_pending_sgs after they are queued and
properly track both num_mapped_sgs and num_queued_sgs.

Fixes: c96e6725db9d ("usb: dwc3: gadget: Correct the logic for queuing sgs")
Cc: <stable@vger.kernel.org>
Reported-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Tested-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://lore.kernel.org/r/ba24591dbcaad8f244a3e88bd449bb7205a5aec3.1620874069.git.Thinh.Nguyen@synopsys.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 49ca5da5e2794..612825a39f821 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1244,6 +1244,7 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep,
 			req->start_sg = sg_next(s);
 
 		req->num_queued_sgs++;
+		req->num_pending_sgs--;
 
 		/*
 		 * The number of pending SG entries may not correspond to the
@@ -1251,7 +1252,7 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep,
 		 * don't include unused SG entries.
 		 */
 		if (length == 0) {
-			req->num_pending_sgs -= req->request.num_mapped_sgs - req->num_queued_sgs;
+			req->num_pending_sgs = 0;
 			break;
 		}
 
@@ -2873,15 +2874,15 @@ static int dwc3_gadget_ep_reclaim_trb_sg(struct dwc3_ep *dep,
 	struct dwc3_trb *trb = &dep->trb_pool[dep->trb_dequeue];
 	struct scatterlist *sg = req->sg;
 	struct scatterlist *s;
-	unsigned int pending = req->num_pending_sgs;
+	unsigned int num_queued = req->num_queued_sgs;
 	unsigned int i;
 	int ret = 0;
 
-	for_each_sg(sg, s, pending, i) {
+	for_each_sg(sg, s, num_queued, i) {
 		trb = &dep->trb_pool[dep->trb_dequeue];
 
 		req->sg = sg_next(s);
-		req->num_pending_sgs--;
+		req->num_queued_sgs--;
 
 		ret = dwc3_gadget_ep_reclaim_completed_trb(dep, req,
 				trb, event, status, true);
@@ -2904,7 +2905,7 @@ static int dwc3_gadget_ep_reclaim_trb_linear(struct dwc3_ep *dep,
 
 static bool dwc3_gadget_ep_request_completed(struct dwc3_request *req)
 {
-	return req->num_pending_sgs == 0;
+	return req->num_pending_sgs == 0 && req->num_queued_sgs == 0;
 }
 
 static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
@@ -2913,7 +2914,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
 {
 	int ret;
 
-	if (req->num_pending_sgs)
+	if (req->request.num_mapped_sgs)
 		ret = dwc3_gadget_ep_reclaim_trb_sg(dep, req, event,
 				status);
 	else
-- 
GitLab


From dcb4b8ad6a448532d8b681b5d1a7036210b622de Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Fri, 14 May 2021 20:43:48 +0800
Subject: [PATCH 1284/3804] misc/uss720: fix memory leak in uss720_probe

uss720_probe forgets to decrease the refcount of usbdev in uss720_probe.
Fix this by decreasing the refcount of usbdev by usb_put_dev.

BUG: memory leak
unreferenced object 0xffff888101113800 (size 2048):
  comm "kworker/0:1", pid 7, jiffies 4294956777 (age 28.870s)
  hex dump (first 32 bytes):
    ff ff ff ff 31 00 00 00 00 00 00 00 00 00 00 00  ....1...........
    00 00 00 00 00 00 00 00 00 00 00 00 03 00 00 00  ................
  backtrace:
    [<ffffffff82b8e822>] kmalloc include/linux/slab.h:554 [inline]
    [<ffffffff82b8e822>] kzalloc include/linux/slab.h:684 [inline]
    [<ffffffff82b8e822>] usb_alloc_dev+0x32/0x450 drivers/usb/core/usb.c:582
    [<ffffffff82b98441>] hub_port_connect drivers/usb/core/hub.c:5129 [inline]
    [<ffffffff82b98441>] hub_port_connect_change drivers/usb/core/hub.c:5363 [inline]
    [<ffffffff82b98441>] port_event drivers/usb/core/hub.c:5509 [inline]
    [<ffffffff82b98441>] hub_event+0x1171/0x20c0 drivers/usb/core/hub.c:5591
    [<ffffffff81259229>] process_one_work+0x2c9/0x600 kernel/workqueue.c:2275
    [<ffffffff81259b19>] worker_thread+0x59/0x5d0 kernel/workqueue.c:2421
    [<ffffffff81261228>] kthread+0x178/0x1b0 kernel/kthread.c:292
    [<ffffffff8100227f>] ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294

Fixes: 0f36163d3abe ("[PATCH] usb: fix uss720 schedule with interrupts off")
Cc: stable <stable@vger.kernel.org>
Reported-by: syzbot+636c58f40a86b4a879e7@syzkaller.appspotmail.com
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Link: https://lore.kernel.org/r/20210514124348.6587-1-mudongliangabcd@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/uss720.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index b5d6616442635..748139d262633 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -736,6 +736,7 @@ static int uss720_probe(struct usb_interface *intf,
 	parport_announce_port(pp);
 
 	usb_set_intfdata(intf, pp);
+	usb_put_dev(usbdev);
 	return 0;
 
 probe_abort:
-- 
GitLab


From acf5631c239dfc53489f739c4ad47f490c5181ff Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Sat, 15 May 2021 20:47:30 -0700
Subject: [PATCH 1285/3804] usb: typec: mux: Fix matching with
 typec_altmode_desc

In typec_mux_match() "nval" is assigned the number of elements in the
"svid" fwnode property, then the variable is used to store the success
of the read and finally attempts to loop between 0 and "success" - i.e.
not at all - and the code returns indicating that no match was found.

Fix this by using a separate variable to track the success of the read,
to allow the loop to get a change to find a match.

Fixes: 96a6d031ca99 ("usb: typec: mux: Find the muxes by also matching against the device node")
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210516034730.621461-1-bjorn.andersson@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/mux.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
index 9da22ae3006c9..8514bec7e1b89 100644
--- a/drivers/usb/typec/mux.c
+++ b/drivers/usb/typec/mux.c
@@ -191,6 +191,7 @@ static void *typec_mux_match(struct fwnode_handle *fwnode, const char *id,
 	bool match;
 	int nval;
 	u16 *val;
+	int ret;
 	int i;
 
 	/*
@@ -218,10 +219,10 @@ static void *typec_mux_match(struct fwnode_handle *fwnode, const char *id,
 	if (!val)
 		return ERR_PTR(-ENOMEM);
 
-	nval = fwnode_property_read_u16_array(fwnode, "svid", val, nval);
-	if (nval < 0) {
+	ret = fwnode_property_read_u16_array(fwnode, "svid", val, nval);
+	if (ret < 0) {
 		kfree(val);
-		return ERR_PTR(nval);
+		return ERR_PTR(ret);
 	}
 
 	for (i = 0; i < nval; i++) {
-- 
GitLab


From 8c9b3caab3ac26db1da00b8117901640c55a69dd Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Sat, 15 May 2021 21:09:53 -0700
Subject: [PATCH 1286/3804] usb: typec: ucsi: Clear pending after acking
 connector change

It's possible that the interrupt handler for the UCSI driver signals a
connector changes after the handler clears the PENDING bit, but before
it has sent the acknowledge request. The result is that the handler is
invoked yet again, to ack the same connector change.

At least some versions of the Qualcomm UCSI firmware will not handle the
second - "spurious" - acknowledgment gracefully. So make sure to not
clear the pending flag until the change is acknowledged.

Any connector changes coming in after the acknowledgment, that would
have the pending flag incorrectly cleared, would afaict be covered by
the subsequent connector status check.

Fixes: 217504a05532 ("usb: typec: ucsi: Work around PPM losing change information")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-By: Benjamin Berg <bberg@redhat.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210516040953.622409-1-bjorn.andersson@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 1d8b7df59ff49..b433169ef6fa4 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -717,8 +717,8 @@ static void ucsi_handle_connector_change(struct work_struct *work)
 	ucsi_send_command(con->ucsi, command, NULL, 0);
 
 	/* 3. ACK connector change */
-	clear_bit(EVENT_PENDING, &ucsi->flags);
 	ret = ucsi_acknowledge_connector_change(ucsi);
+	clear_bit(EVENT_PENDING, &ucsi->flags);
 	if (ret) {
 		dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret);
 		goto out_unlock;
-- 
GitLab


From c58bbe3477f75deb7883983e6cf428404a107555 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 19 May 2021 13:03:58 +0300
Subject: [PATCH 1287/3804] usb: typec: tcpm: Use LE to CPU conversion when
 accessing msg->header

Sparse is not happy about strict type handling:
  .../typec/tcpm/tcpm.c:2720:27: warning: restricted __le16 degrades to integer
  .../typec/tcpm/tcpm.c:2814:32: warning: restricted __le16 degrades to integer

Fix this by converting LE to CPU before use.

Fixes: ae8a2ca8a221 ("usb: typec: Group all TCPCI/TCPM code together")
Fixes: 64f7c494a3c0 ("typec: tcpm: Add support for sink PPS related messages")
Cc: stable <stable@vger.kernel.org>
Cc: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210519100358.64018-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 64133e586c644..8fdfd7f65ad77 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -2717,7 +2717,7 @@ static void tcpm_pd_ext_msg_request(struct tcpm_port *port,
 	enum pd_ext_msg_type type = pd_header_type_le(msg->header);
 	unsigned int data_size = pd_ext_header_data_size_le(msg->ext_msg.header);
 
-	if (!(msg->ext_msg.header & PD_EXT_HDR_CHUNKED)) {
+	if (!(le16_to_cpu(msg->ext_msg.header) & PD_EXT_HDR_CHUNKED)) {
 		tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS);
 		tcpm_log(port, "Unchunked extended messages unsupported");
 		return;
@@ -2811,7 +2811,7 @@ static void tcpm_pd_rx_handler(struct kthread_work *work)
 				 "Data role mismatch, initiating error recovery");
 			tcpm_set_state(port, ERROR_RECOVERY, 0);
 		} else {
-			if (msg->header & PD_HEADER_EXT_HDR)
+			if (le16_to_cpu(msg->header) & PD_HEADER_EXT_HDR)
 				tcpm_pd_ext_msg_request(port, msg);
 			else if (cnt)
 				tcpm_pd_data_request(port, msg);
-- 
GitLab


From 10505b720189ecc3852596a70a7e391b2a5c5b57 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Thu, 20 May 2021 22:36:08 -0300
Subject: [PATCH 1288/3804] usb: Restore the usb_header label

Commit caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location")
removed the reference to the _usb_header label by mistake, which causes the
following htmldocs build warning:

Documentation/driver-api/usb/writing_usb_driver.rst:129: WARNING: undefined label: usb_header

Restore the label.

Fixes: caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Link: https://lore.kernel.org/r/20210521013608.17957-1-festevam@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/usb/usb.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst
index 820e867af45ab..2c94ff2f43857 100644
--- a/Documentation/driver-api/usb/usb.rst
+++ b/Documentation/driver-api/usb/usb.rst
@@ -123,6 +123,8 @@ are in ``drivers/usb/common/common.c``.
 In addition, some functions useful for creating debugging output are
 defined in ``drivers/usb/common/debug.c``.
 
+.. _usb_header:
+
 Host-Side Data Types and Macros
 ===============================
 
-- 
GitLab


From 12ccb76280f8c0c07794fa68f83286b934981ca5 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Tue, 13 Apr 2021 11:40:17 +0200
Subject: [PATCH 1289/3804] media: lirc: remove out of date comment

This file has been updated many times since 2010.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/lirc.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index c45a4eaea6676..9919f2062b14d 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  * lirc.h - linux infrared remote control header file
- * last modified 2010/07/13 by Jarod Wilson
  */
 
 #ifndef _LINUX_LIRC_H
-- 
GitLab


From 02625c965239b71869326dd0461615f27307ecb3 Mon Sep 17 00:00:00 2001
From: Anirudh Rayabharam <mail@anirudhrb.com>
Date: Mon, 17 May 2021 00:57:14 +0530
Subject: [PATCH 1290/3804] video: hgafb: correctly handle card detect failure
 during probe

The return value of hga_card_detect() is not properly handled causing
the probe to succeed even though hga_card_detect() failed. Since probe
succeeds, hgafb_open() can be called which will end up operating on an
unmapped hga_vram. This results in an out-of-bounds access as reported
by kernel test robot [1].

To fix this, correctly detect failure of hga_card_detect() by checking
for a non-zero error code.

[1]: https://lore.kernel.org/lkml/20210516150019.GB25903@xsang-OptiPlex-9020/

Fixes: dc13cac4862c ("video: hgafb: fix potential NULL pointer dereference")
Cc: stable <stable@vger.kernel.org>
Reported-by: kernel test robot <oliver.sang@intel.com>
Reviewed-by: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Signed-off-by: Anirudh Rayabharam <mail@anirudhrb.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/20210516192714.25823-1-mail@anirudhrb.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/hgafb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/hgafb.c b/drivers/video/fbdev/hgafb.c
index cc8e62ae93f6a..bd3d07aa4f0ec 100644
--- a/drivers/video/fbdev/hgafb.c
+++ b/drivers/video/fbdev/hgafb.c
@@ -558,7 +558,7 @@ static int hgafb_probe(struct platform_device *pdev)
 	int ret;
 
 	ret = hga_card_detect();
-	if (!ret)
+	if (ret)
 		return ret;
 
 	printk(KERN_INFO "hgafb: %s with %ldK of memory detected.\n",
-- 
GitLab


From 58c08df5751d823332ccdb49f1d5795479097119 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 21 Apr 2021 15:58:40 +0200
Subject: [PATCH 1291/3804] media: rc: remove tango ir driver and keymap

The tango platform was removed, so the driver is no longer needed.

Cc: Marc Gonzalez <marc.w.gonzalez@free.fr>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Mans Rullgard <mans@mansr.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../devicetree/bindings/media/rc.yaml         |   1 -
 .../devicetree/bindings/media/tango-ir.txt    |  21 --
 drivers/media/rc/Kconfig                      |  10 -
 drivers/media/rc/Makefile                     |   1 -
 drivers/media/rc/keymaps/Makefile             |   1 -
 drivers/media/rc/keymaps/rc-tango.c           |  89 ------
 drivers/media/rc/tango-ir.c                   | 267 ------------------
 include/media/rc-map.h                        |   1 -
 8 files changed, 391 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/media/tango-ir.txt
 delete mode 100644 drivers/media/rc/keymaps/rc-tango.c
 delete mode 100644 drivers/media/rc/tango-ir.c

diff --git a/Documentation/devicetree/bindings/media/rc.yaml b/Documentation/devicetree/bindings/media/rc.yaml
index af9e7e59e5a12..12d838b05632e 100644
--- a/Documentation/devicetree/bindings/media/rc.yaml
+++ b/Documentation/devicetree/bindings/media/rc.yaml
@@ -125,7 +125,6 @@ properties:
       - rc-snapstream-firefly
       - rc-streamzap
       - rc-su3000
-      - rc-tango
       - rc-tanix-tx3mini
       - rc-tanix-tx5max
       - rc-tbs-nec
diff --git a/Documentation/devicetree/bindings/media/tango-ir.txt b/Documentation/devicetree/bindings/media/tango-ir.txt
deleted file mode 100644
index a9f00c2bf8970..0000000000000
--- a/Documentation/devicetree/bindings/media/tango-ir.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-Sigma Designs Tango IR NEC/RC-5/RC-6 decoder (SMP86xx and SMP87xx)
-
-Required properties:
-
-- compatible: "sigma,smp8642-ir"
-- reg: address/size of NEC+RC5 area, address/size of RC6 area
-- interrupts: spec for IR IRQ
-- clocks: spec for IR clock (typically the crystal oscillator)
-
-Optional properties:
-
-- linux,rc-map-name: see Documentation/devicetree/bindings/media/rc.txt
-
-Example:
-
-	ir@10518 {
-		compatible = "sigma,smp8642-ir";
-		reg = <0x10518 0x18>, <0x105e0 0x1c>;
-		interrupts = <21 IRQ_TYPE_EDGE_RISING>;
-		clocks = <&xtal>;
-	};
diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig
index f016b35c2b17b..ae0025fba21c2 100644
--- a/drivers/media/rc/Kconfig
+++ b/drivers/media/rc/Kconfig
@@ -499,16 +499,6 @@ config IR_SIR
 	   To compile this driver as a module, choose M here: the module will
 	   be called sir-ir.
 
-config IR_TANGO
-	tristate "Sigma Designs SMP86xx IR decoder"
-	depends on RC_CORE
-	depends on ARCH_TANGO || COMPILE_TEST
-	help
-	   Adds support for the HW IR decoder embedded on Sigma Designs
-	   Tango-based systems (SMP86xx, SMP87xx).
-	   The HW decoder supports NEC, RC-5, RC-6 IR protocols.
-	   When compiled as a module, look for tango-ir.
-
 config RC_XBOX_DVD
 	tristate "Xbox DVD Movie Playback Kit"
 	depends on RC_CORE
diff --git a/drivers/media/rc/Makefile b/drivers/media/rc/Makefile
index f31002288f7c5..692e9b6b203f8 100644
--- a/drivers/media/rc/Makefile
+++ b/drivers/media/rc/Makefile
@@ -48,6 +48,5 @@ obj-$(CONFIG_IR_IMG) += img-ir/
 obj-$(CONFIG_IR_SERIAL) += serial_ir.o
 obj-$(CONFIG_IR_SIR) += sir_ir.o
 obj-$(CONFIG_IR_MTK) += mtk-cir.o
-obj-$(CONFIG_IR_TANGO) += tango-ir.o
 obj-$(CONFIG_RC_XBOX_DVD) += xbox_remote.o
 obj-$(CONFIG_IR_TOY) += ir_toy.o
diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index 50b2833dbe4f4..f609dfe7fd76f 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -100,7 +100,6 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-reddo.o \
 			rc-snapstream-firefly.o \
 			rc-streamzap.o \
-			rc-tango.o \
 			rc-tanix-tx3mini.o \
 			rc-tanix-tx5max.o \
 			rc-tbs-nec.o \
diff --git a/drivers/media/rc/keymaps/rc-tango.c b/drivers/media/rc/keymaps/rc-tango.c
deleted file mode 100644
index 2b9cef6ef5b5a..0000000000000
--- a/drivers/media/rc/keymaps/rc-tango.c
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2017 Sigma Designs
- */
-
-#include <linux/module.h>
-#include <media/rc-map.h>
-
-static struct rc_map_table tango_table[] = {
-	{ 0x4cb4a, KEY_POWER },
-	{ 0x4cb48, KEY_FILE },
-	{ 0x4cb0f, KEY_SETUP },
-	{ 0x4cb4d, KEY_SUSPEND },
-	{ 0x4cb4e, KEY_VOLUMEUP },
-	{ 0x4cb44, KEY_EJECTCD },
-	{ 0x4cb13, KEY_TV },
-	{ 0x4cb51, KEY_MUTE },
-	{ 0x4cb52, KEY_VOLUMEDOWN },
-
-	{ 0x4cb41, KEY_NUMERIC_1 },
-	{ 0x4cb03, KEY_NUMERIC_2 },
-	{ 0x4cb42, KEY_NUMERIC_3 },
-	{ 0x4cb45, KEY_NUMERIC_4 },
-	{ 0x4cb07, KEY_NUMERIC_5 },
-	{ 0x4cb46, KEY_NUMERIC_6 },
-	{ 0x4cb55, KEY_NUMERIC_7 },
-	{ 0x4cb17, KEY_NUMERIC_8 },
-	{ 0x4cb56, KEY_NUMERIC_9 },
-	{ 0x4cb1b, KEY_NUMERIC_0 },
-	{ 0x4cb59, KEY_DELETE },
-	{ 0x4cb5a, KEY_CAPSLOCK },
-
-	{ 0x4cb47, KEY_BACK },
-	{ 0x4cb05, KEY_SWITCHVIDEOMODE },
-	{ 0x4cb06, KEY_UP },
-	{ 0x4cb43, KEY_LEFT },
-	{ 0x4cb01, KEY_RIGHT },
-	{ 0x4cb0a, KEY_DOWN },
-	{ 0x4cb02, KEY_ENTER },
-	{ 0x4cb4b, KEY_INFO },
-	{ 0x4cb09, KEY_HOME },
-
-	{ 0x4cb53, KEY_MENU },
-	{ 0x4cb12, KEY_PREVIOUS },
-	{ 0x4cb50, KEY_PLAY },
-	{ 0x4cb11, KEY_NEXT },
-	{ 0x4cb4f, KEY_TITLE },
-	{ 0x4cb0e, KEY_REWIND },
-	{ 0x4cb4c, KEY_STOP },
-	{ 0x4cb0d, KEY_FORWARD },
-	{ 0x4cb57, KEY_MEDIA_REPEAT },
-	{ 0x4cb16, KEY_ANGLE },
-	{ 0x4cb54, KEY_PAUSE },
-	{ 0x4cb15, KEY_SLOW },
-	{ 0x4cb5b, KEY_TIME },
-	{ 0x4cb1a, KEY_AUDIO },
-	{ 0x4cb58, KEY_SUBTITLE },
-	{ 0x4cb19, KEY_ZOOM },
-
-	{ 0x4cb5f, KEY_RED },
-	{ 0x4cb1e, KEY_GREEN },
-	{ 0x4cb5c, KEY_YELLOW },
-	{ 0x4cb1d, KEY_BLUE },
-};
-
-static struct rc_map_list tango_map = {
-	.map = {
-		.scan = tango_table,
-		.size = ARRAY_SIZE(tango_table),
-		.rc_proto = RC_PROTO_NECX,
-		.name = RC_MAP_TANGO,
-	}
-};
-
-static int __init init_rc_map_tango(void)
-{
-	return rc_map_register(&tango_map);
-}
-
-static void __exit exit_rc_map_tango(void)
-{
-	rc_map_unregister(&tango_map);
-}
-
-module_init(init_rc_map_tango)
-module_exit(exit_rc_map_tango)
-
-MODULE_AUTHOR("Sigma Designs");
-MODULE_LICENSE("GPL");
diff --git a/drivers/media/rc/tango-ir.c b/drivers/media/rc/tango-ir.c
deleted file mode 100644
index b8eb5bc4d9be1..0000000000000
--- a/drivers/media/rc/tango-ir.c
+++ /dev/null
@@ -1,267 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2015 Mans Rullgard <mans@mansr.com>
- */
-
-#include <linux/input.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/of.h>
-#include <media/rc-core.h>
-
-#define DRIVER_NAME "tango-ir"
-
-#define IR_NEC_CTRL	0x00
-#define IR_NEC_DATA	0x04
-#define IR_CTRL		0x08
-#define IR_RC5_CLK_DIV	0x0c
-#define IR_RC5_DATA	0x10
-#define IR_INT		0x14
-
-#define NEC_TIME_BASE	560
-#define RC5_TIME_BASE	1778
-
-#define RC6_CTRL	0x00
-#define RC6_CLKDIV	0x04
-#define RC6_DATA0	0x08
-#define RC6_DATA1	0x0c
-#define RC6_DATA2	0x10
-#define RC6_DATA3	0x14
-#define RC6_DATA4	0x18
-
-#define RC6_CARRIER	36000
-#define RC6_TIME_BASE	16
-
-#define NEC_CAP(n)	((n) << 24)
-#define GPIO_SEL(n)	((n) << 16)
-#define DISABLE_NEC	(BIT(4) | BIT(8))
-#define ENABLE_RC5	(BIT(0) | BIT(9))
-#define ENABLE_RC6	(BIT(0) | BIT(7))
-#define ACK_IR_INT	(BIT(0) | BIT(1))
-#define ACK_RC6_INT	(BIT(31))
-
-#define NEC_ANY (RC_PROTO_BIT_NEC | RC_PROTO_BIT_NECX | RC_PROTO_BIT_NEC32)
-
-struct tango_ir {
-	void __iomem *rc5_base;
-	void __iomem *rc6_base;
-	struct rc_dev *rc;
-	struct clk *clk;
-};
-
-static void tango_ir_handle_nec(struct tango_ir *ir)
-{
-	u32 v, code;
-	enum rc_proto proto;
-
-	v = readl_relaxed(ir->rc5_base + IR_NEC_DATA);
-	if (!v) {
-		rc_repeat(ir->rc);
-		return;
-	}
-
-	code = ir_nec_bytes_to_scancode(v, v >> 8, v >> 16, v >> 24, &proto);
-	rc_keydown(ir->rc, proto, code, 0);
-}
-
-static void tango_ir_handle_rc5(struct tango_ir *ir)
-{
-	u32 data, field, toggle, addr, cmd, code;
-
-	data = readl_relaxed(ir->rc5_base + IR_RC5_DATA);
-	if (data & BIT(31))
-		return;
-
-	field = data >> 12 & 1;
-	toggle = data >> 11 & 1;
-	addr = data >> 6 & 0x1f;
-	cmd = (data & 0x3f) | (field ^ 1) << 6;
-
-	code = RC_SCANCODE_RC5(addr, cmd);
-	rc_keydown(ir->rc, RC_PROTO_RC5, code, toggle);
-}
-
-static void tango_ir_handle_rc6(struct tango_ir *ir)
-{
-	u32 data0, data1, toggle, mode, addr, cmd, code;
-
-	data0 = readl_relaxed(ir->rc6_base + RC6_DATA0);
-	data1 = readl_relaxed(ir->rc6_base + RC6_DATA1);
-
-	mode = data0 >> 1 & 7;
-	if (mode != 0)
-		return;
-
-	toggle = data0 & 1;
-	addr = data0 >> 16;
-	cmd = data1;
-
-	code = RC_SCANCODE_RC6_0(addr, cmd);
-	rc_keydown(ir->rc, RC_PROTO_RC6_0, code, toggle);
-}
-
-static irqreturn_t tango_ir_irq(int irq, void *dev_id)
-{
-	struct tango_ir *ir = dev_id;
-	unsigned int rc5_stat;
-	unsigned int rc6_stat;
-
-	rc5_stat = readl_relaxed(ir->rc5_base + IR_INT);
-	writel_relaxed(rc5_stat, ir->rc5_base + IR_INT);
-
-	rc6_stat = readl_relaxed(ir->rc6_base + RC6_CTRL);
-	writel_relaxed(rc6_stat, ir->rc6_base + RC6_CTRL);
-
-	if (!(rc5_stat & 3) && !(rc6_stat & BIT(31)))
-		return IRQ_NONE;
-
-	if (rc5_stat & BIT(0))
-		tango_ir_handle_rc5(ir);
-
-	if (rc5_stat & BIT(1))
-		tango_ir_handle_nec(ir);
-
-	if (rc6_stat & BIT(31))
-		tango_ir_handle_rc6(ir);
-
-	return IRQ_HANDLED;
-}
-
-static int tango_change_protocol(struct rc_dev *dev, u64 *rc_type)
-{
-	struct tango_ir *ir = dev->priv;
-	u32 rc5_ctrl = DISABLE_NEC;
-	u32 rc6_ctrl = 0;
-
-	if (*rc_type & NEC_ANY)
-		rc5_ctrl = 0;
-
-	if (*rc_type & RC_PROTO_BIT_RC5)
-		rc5_ctrl |= ENABLE_RC5;
-
-	if (*rc_type & RC_PROTO_BIT_RC6_0)
-		rc6_ctrl = ENABLE_RC6;
-
-	writel_relaxed(rc5_ctrl, ir->rc5_base + IR_CTRL);
-	writel_relaxed(rc6_ctrl, ir->rc6_base + RC6_CTRL);
-
-	return 0;
-}
-
-static int tango_ir_probe(struct platform_device *pdev)
-{
-	const char *map_name = RC_MAP_TANGO;
-	struct device *dev = &pdev->dev;
-	struct rc_dev *rc;
-	struct tango_ir *ir;
-	u64 clkrate, clkdiv;
-	int irq, err;
-	u32 val;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq <= 0)
-		return -EINVAL;
-
-	ir = devm_kzalloc(dev, sizeof(*ir), GFP_KERNEL);
-	if (!ir)
-		return -ENOMEM;
-
-	ir->rc5_base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(ir->rc5_base))
-		return PTR_ERR(ir->rc5_base);
-
-	ir->rc6_base = devm_platform_ioremap_resource(pdev, 1);
-	if (IS_ERR(ir->rc6_base))
-		return PTR_ERR(ir->rc6_base);
-
-	ir->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(ir->clk))
-		return PTR_ERR(ir->clk);
-
-	rc = devm_rc_allocate_device(dev, RC_DRIVER_SCANCODE);
-	if (!rc)
-		return -ENOMEM;
-
-	of_property_read_string(dev->of_node, "linux,rc-map-name", &map_name);
-
-	rc->device_name = DRIVER_NAME;
-	rc->driver_name = DRIVER_NAME;
-	rc->input_phys = DRIVER_NAME "/input0";
-	rc->map_name = map_name;
-	rc->allowed_protocols = NEC_ANY | RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC6_0;
-	rc->change_protocol = tango_change_protocol;
-	rc->priv = ir;
-	ir->rc = rc;
-
-	err = clk_prepare_enable(ir->clk);
-	if (err)
-		return err;
-
-	clkrate = clk_get_rate(ir->clk);
-
-	clkdiv = clkrate * NEC_TIME_BASE;
-	do_div(clkdiv, 1000000);
-
-	val = NEC_CAP(31) | GPIO_SEL(12) | clkdiv;
-	writel_relaxed(val, ir->rc5_base + IR_NEC_CTRL);
-
-	clkdiv = clkrate * RC5_TIME_BASE;
-	do_div(clkdiv, 1000000);
-
-	writel_relaxed(DISABLE_NEC, ir->rc5_base + IR_CTRL);
-	writel_relaxed(clkdiv, ir->rc5_base + IR_RC5_CLK_DIV);
-	writel_relaxed(ACK_IR_INT, ir->rc5_base + IR_INT);
-
-	clkdiv = clkrate * RC6_TIME_BASE;
-	do_div(clkdiv, RC6_CARRIER);
-
-	writel_relaxed(ACK_RC6_INT, ir->rc6_base + RC6_CTRL);
-	writel_relaxed((clkdiv >> 2) << 18 | clkdiv, ir->rc6_base + RC6_CLKDIV);
-
-	err = devm_request_irq(dev, irq, tango_ir_irq, IRQF_SHARED,
-			       dev_name(dev), ir);
-	if (err)
-		goto err_clk;
-
-	err = devm_rc_register_device(dev, rc);
-	if (err)
-		goto err_clk;
-
-	platform_set_drvdata(pdev, ir);
-	return 0;
-
-err_clk:
-	clk_disable_unprepare(ir->clk);
-	return err;
-}
-
-static int tango_ir_remove(struct platform_device *pdev)
-{
-	struct tango_ir *ir = platform_get_drvdata(pdev);
-
-	clk_disable_unprepare(ir->clk);
-	return 0;
-}
-
-static const struct of_device_id tango_ir_dt_ids[] = {
-	{ .compatible = "sigma,smp8642-ir" },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, tango_ir_dt_ids);
-
-static struct platform_driver tango_ir_driver = {
-	.probe	= tango_ir_probe,
-	.remove	= tango_ir_remove,
-	.driver	= {
-		.name		= DRIVER_NAME,
-		.of_match_table	= tango_ir_dt_ids,
-	},
-};
-module_platform_driver(tango_ir_driver);
-
-MODULE_DESCRIPTION("SMP86xx IR decoder driver");
-MODULE_AUTHOR("Mans Rullgard <mans@mansr.com>");
-MODULE_LICENSE("GPL");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index b5585d14fff4b..b50443d6fd776 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -312,7 +312,6 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_SNAPSTREAM_FIREFLY        "rc-snapstream-firefly"
 #define RC_MAP_STREAMZAP                 "rc-streamzap"
 #define RC_MAP_SU3000                    "rc-su3000"
-#define RC_MAP_TANGO                     "rc-tango"
 #define RC_MAP_TANIX_TX3MINI             "rc-tanix-tx3mini"
 #define RC_MAP_TANIX_TX5MAX              "rc-tanix-tx5max"
 #define RC_MAP_TBS_NEC                   "rc-tbs-nec"
-- 
GitLab


From bda7d3ab06f19c02dcef61fefcb9dd954dfd5e4f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 20 May 2021 15:08:39 +0200
Subject: [PATCH 1292/3804] kgdb: fix gcc-11 warnings harder

40cc3a80bb42 ("kgdb: fix gcc-11 warning on indentation") tried to fix up
the gcc-11 complaints in this file by just reformatting the #defines.
That worked for gcc 11.1.0, but in gcc 11.1.1 as shipped by Fedora 34,
the warning came back for one of the #defines.

Fix this up again by putting { } around the if statement, now it is
quiet again.

Fixes: 40cc3a80bb42 ("kgdb: fix gcc-11 warning on indentation")
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Link: https://lore.kernel.org/r/20210520130839.51987-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/kgdbts.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index 64d33e3685091..67c5b452dd356 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -101,8 +101,9 @@
 		printk(KERN_INFO a);	\
 } while (0)
 #define v2printk(a...) do {		\
-	if (verbose > 1)		\
+	if (verbose > 1) {		\
 		printk(KERN_INFO a);	\
+	}				\
 	touch_nmi_watchdog();		\
 } while (0)
 #define eprintk(a...) do {		\
-- 
GitLab


From 52518e513d6dbb71a9cd8f7a1b83b76fe6f8709c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 21 Apr 2021 15:58:41 +0200
Subject: [PATCH 1293/3804] media: rc: clean up Kconfig dependencies

I came across a randconfig build failure from one driver
that only depends on CONFIG_USB_ARCH_HAS_HCD but fails when
built without CONFIG_USB:

ld: drivers/media/rc/ir_toy.o: in function `irtoy_disconnect':
ir_toy.c:(.text+0x24): undefined reference to `usb_kill_urb'
ld: ir_toy.c:(.text+0x2c): undefined reference to `usb_free_urb'
ld: ir_toy.c:(.text+0x34): undefined reference to `usb_kill_urb'
ld: ir_toy.c:(.text+0x3c): undefined reference to `usb_free_urb'

Upon a closer look, I find that a lot of the other drivers
'select USB' rather than stating 'depends on USB' as is common
for most subsystems. I also find that all except one driver
have an extra 'depends on RC_CORE' that is already implied by
the top-level 'if RC_CORE' check.

Clean up both by reducing the dependencies to the required set.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/rc/Kconfig | 73 ++++++----------------------------------
 1 file changed, 11 insertions(+), 62 deletions(-)

diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig
index ae0025fba21c2..d0a8326b75c22 100644
--- a/drivers/media/rc/Kconfig
+++ b/drivers/media/rc/Kconfig
@@ -19,7 +19,6 @@ source "drivers/media/rc/keymaps/Kconfig"
 
 config LIRC
 	bool "LIRC user interface"
-	depends on RC_CORE
 	help
 	   Enable this option to enable the Linux Infrared Remote
 	   Control user interface (e.g. /dev/lirc*). This interface
@@ -41,12 +40,10 @@ config BPF_LIRC_MODE2
 
 menuconfig RC_DECODERS
 	bool "Remote controller decoders"
-	depends on RC_CORE
 
 if RC_DECODERS
 config IR_NEC_DECODER
 	tristate "Enable IR raw decoder for the NEC protocol"
-	depends on RC_CORE
 	select BITREVERSE
 
 	help
@@ -55,7 +52,6 @@ config IR_NEC_DECODER
 
 config IR_RC5_DECODER
 	tristate "Enable IR raw decoder for the RC-5 protocol"
-	depends on RC_CORE
 	select BITREVERSE
 
 	help
@@ -64,7 +60,6 @@ config IR_RC5_DECODER
 
 config IR_RC6_DECODER
 	tristate "Enable IR raw decoder for the RC6 protocol"
-	depends on RC_CORE
 	select BITREVERSE
 
 	help
@@ -73,7 +68,6 @@ config IR_RC6_DECODER
 
 config IR_JVC_DECODER
 	tristate "Enable IR raw decoder for the JVC protocol"
-	depends on RC_CORE
 	select BITREVERSE
 
 	help
@@ -82,7 +76,6 @@ config IR_JVC_DECODER
 
 config IR_SONY_DECODER
 	tristate "Enable IR raw decoder for the Sony protocol"
-	depends on RC_CORE
 	select BITREVERSE
 
 	help
@@ -91,7 +84,6 @@ config IR_SONY_DECODER
 
 config IR_SANYO_DECODER
 	tristate "Enable IR raw decoder for the Sanyo protocol"
-	depends on RC_CORE
 	select BITREVERSE
 
 	help
@@ -101,7 +93,6 @@ config IR_SANYO_DECODER
 
 config IR_SHARP_DECODER
 	tristate "Enable IR raw decoder for the Sharp protocol"
-	depends on RC_CORE
 	select BITREVERSE
 
 	help
@@ -111,7 +102,6 @@ config IR_SHARP_DECODER
 
 config IR_MCE_KBD_DECODER
 	tristate "Enable IR raw decoder for the MCE keyboard/mouse protocol"
-	depends on RC_CORE
 	select BITREVERSE
 
 	help
@@ -121,7 +111,6 @@ config IR_MCE_KBD_DECODER
 
 config IR_XMP_DECODER
 	tristate "Enable IR raw decoder for the XMP protocol"
-	depends on RC_CORE
 	select BITREVERSE
 
 	help
@@ -130,7 +119,6 @@ config IR_XMP_DECODER
 
 config IR_IMON_DECODER
 	tristate "Enable IR raw decoder for the iMON protocol"
-	depends on RC_CORE
 	help
 	   Enable this option if you have iMON PAD or Antec Veris infrared
 	   remote control and you would like to use it with a raw IR
@@ -138,7 +126,6 @@ config IR_IMON_DECODER
 
 config IR_RCMM_DECODER
 	tristate "Enable IR raw decoder for the RC-MM protocol"
-	depends on RC_CORE
 	help
 	   Enable this option when you have IR with RC-MM protocol, and
 	   you need the software decoder. The driver supports 12,
@@ -153,15 +140,12 @@ endif #RC_DECODERS
 
 menuconfig RC_DEVICES
 	bool "Remote Controller devices"
-	depends on RC_CORE
 
 if RC_DEVICES
 
 config RC_ATI_REMOTE
 	tristate "ATI / X10 based USB RF remote controls"
-	depends on USB_ARCH_HAS_HCD
-	depends on RC_CORE
-	select USB
+	depends on USB
 	help
 	   Say Y here if you want to use an X10 based USB remote control.
 	   These are RF remotes with USB receivers.
@@ -179,7 +163,6 @@ config RC_ATI_REMOTE
 config IR_ENE
 	tristate "ENE eHome Receiver/Transceiver (pnp id: ENE0100/ENE02xxx)"
 	depends on PNP || COMPILE_TEST
-	depends on RC_CORE
 	help
 	   Say Y here to enable support for integrated infrared receiver
 	   /transceiver made by ENE.
@@ -192,7 +175,6 @@ config IR_ENE
 
 config IR_HIX5HD2
 	tristate "Hisilicon hix5hd2 IR remote control"
-	depends on RC_CORE
 	depends on OF || COMPILE_TEST
 	help
 	   Say Y here if you want to use hisilicon hix5hd2 remote control.
@@ -203,9 +185,7 @@ config IR_HIX5HD2
 
 config IR_IMON
 	tristate "SoundGraph iMON Receiver and Display"
-	depends on USB_ARCH_HAS_HCD
-	depends on RC_CORE
-	select USB
+	depends on USB
 	help
 	   Say Y here if you want to use a SoundGraph iMON (aka Antec Veris)
 	   IR Receiver and/or LCD/VFD/VGA display.
@@ -215,9 +195,7 @@ config IR_IMON
 
 config IR_IMON_RAW
 	tristate "SoundGraph iMON Receiver (early raw IR models)"
-	depends on USB_ARCH_HAS_HCD
-	depends on RC_CORE
-	select USB
+	depends on USB
 	help
 	   Say Y here if you want to use a SoundGraph iMON IR Receiver,
 	   early raw models.
@@ -227,9 +205,7 @@ config IR_IMON_RAW
 
 config IR_MCEUSB
 	tristate "Windows Media Center Ed. eHome Infrared Transceiver"
-	depends on USB_ARCH_HAS_HCD
-	depends on RC_CORE
-	select USB
+	depends on USB
 	help
 	   Say Y here if you want to use a Windows Media Center Edition
 	   eHome Infrared Transceiver.
@@ -240,7 +216,6 @@ config IR_MCEUSB
 config IR_ITE_CIR
 	tristate "ITE Tech Inc. IT8712/IT8512 Consumer Infrared Transceiver"
 	depends on PNP || COMPILE_TEST
-	depends on RC_CORE
 	help
 	   Say Y here to enable support for integrated infrared receivers
 	   /transceivers made by ITE Tech Inc. These are found in
@@ -253,7 +228,6 @@ config IR_ITE_CIR
 config IR_FINTEK
 	tristate "Fintek Consumer Infrared Transceiver"
 	depends on PNP || COMPILE_TEST
-	depends on RC_CORE
 	help
 	   Say Y here to enable support for integrated infrared receiver
 	   /transceiver made by Fintek. This chip is found on assorted
@@ -264,7 +238,6 @@ config IR_FINTEK
 
 config IR_MESON
 	tristate "Amlogic Meson IR remote receiver"
-	depends on RC_CORE
 	depends on ARCH_MESON || COMPILE_TEST
 	help
 	   Say Y if you want to use the IR remote receiver available
@@ -275,7 +248,6 @@ config IR_MESON
 
 config IR_MTK
 	tristate "Mediatek IR remote receiver"
-	depends on RC_CORE
 	depends on ARCH_MEDIATEK || COMPILE_TEST
 	help
 	   Say Y if you want to use the IR remote receiver available
@@ -287,7 +259,6 @@ config IR_MTK
 config IR_NUVOTON
 	tristate "Nuvoton w836x7hg Consumer Infrared Transceiver"
 	depends on PNP || COMPILE_TEST
-	depends on RC_CORE
 	help
 	   Say Y here to enable support for integrated infrared receiver
 	   /transceiver made by Nuvoton (formerly Winbond). This chip is
@@ -299,11 +270,9 @@ config IR_NUVOTON
 
 config IR_REDRAT3
 	tristate "RedRat3 IR Transceiver"
-	depends on USB_ARCH_HAS_HCD
-	depends on RC_CORE
+	depends on USB
 	select NEW_LEDS
 	select LEDS_CLASS
-	select USB
 	help
 	   Say Y here if you want to use a RedRat3 Infrared Transceiver.
 
@@ -322,9 +291,7 @@ config IR_SPI
 
 config IR_STREAMZAP
 	tristate "Streamzap PC Remote IR Receiver"
-	depends on USB_ARCH_HAS_HCD
-	depends on RC_CORE
-	select USB
+	depends on USB
 	help
 	   Say Y here if you want to use a Streamzap PC Remote
 	   Infrared Receiver.
@@ -335,7 +302,6 @@ config IR_STREAMZAP
 config IR_WINBOND_CIR
 	tristate "Winbond IR remote control"
 	depends on (X86 && PNP) || COMPILE_TEST
-	depends on RC_CORE
 	select NEW_LEDS
 	select LEDS_CLASS
 	select BITREVERSE
@@ -350,9 +316,7 @@ config IR_WINBOND_CIR
 
 config IR_IGORPLUGUSB
 	tristate "IgorPlug-USB IR Receiver"
-	depends on USB_ARCH_HAS_HCD
-	depends on RC_CORE
-	select USB
+	depends on USB
 	help
 	   Say Y here if you want to use the IgorPlug-USB IR Receiver by
 	   Igor Cesko. This device is included on the Fit-PC2.
@@ -365,9 +329,7 @@ config IR_IGORPLUGUSB
 
 config IR_IGUANA
 	tristate "IguanaWorks USB IR Transceiver"
-	depends on USB_ARCH_HAS_HCD
-	depends on RC_CORE
-	select USB
+	depends on USB
 	help
 	   Say Y here if you want to use the IguanaWorks USB IR Transceiver.
 	   Both infrared receive and send are supported. If you want to
@@ -381,9 +343,7 @@ config IR_IGUANA
 
 config IR_TTUSBIR
 	tristate "TechnoTrend USB IR Receiver"
-	depends on USB_ARCH_HAS_HCD
-	depends on RC_CORE
-	select USB
+	depends on USB
 	select NEW_LEDS
 	select LEDS_CLASS
 	help
@@ -407,7 +367,6 @@ source "drivers/media/rc/img-ir/Kconfig"
 
 config RC_LOOPBACK
 	tristate "Remote Control Loopback Driver"
-	depends on RC_CORE
 	help
 	   Say Y here if you want support for the remote control loopback
 	   driver which allows TX data to be sent back as RX data.
@@ -420,7 +379,6 @@ config RC_LOOPBACK
 
 config IR_GPIO_CIR
 	tristate "GPIO IR remote control"
-	depends on RC_CORE
 	depends on (OF && GPIOLIB) || COMPILE_TEST
 	help
 	   Say Y if you want to use GPIO based IR Receiver.
@@ -430,7 +388,6 @@ config IR_GPIO_CIR
 
 config IR_GPIO_TX
 	tristate "GPIO IR Bit Banging Transmitter"
-	depends on RC_CORE
 	depends on LIRC
 	depends on (OF && GPIOLIB) || COMPILE_TEST
 	help
@@ -442,7 +399,6 @@ config IR_GPIO_TX
 
 config IR_PWM_TX
 	tristate "PWM IR transmitter"
-	depends on RC_CORE
 	depends on LIRC
 	depends on PWM
 	depends on OF || COMPILE_TEST
@@ -455,7 +411,6 @@ config IR_PWM_TX
 
 config RC_ST
 	tristate "ST remote control receiver"
-	depends on RC_CORE
 	depends on ARCH_STI || COMPILE_TEST
 	help
 	   Say Y here if you want support for ST remote control driver
@@ -466,7 +421,6 @@ config RC_ST
 
 config IR_SUNXI
 	tristate "SUNXI IR remote control"
-	depends on RC_CORE
 	depends on ARCH_SUNXI || COMPILE_TEST
 	help
 	   Say Y if you want to use sunXi internal IR Controller
@@ -476,7 +430,6 @@ config IR_SUNXI
 
 config IR_SERIAL
 	tristate "Homebrew Serial Port Receiver"
-	depends on RC_CORE
 	help
 	   Say Y if you want to use Homebrew Serial Port Receivers and
 	   Transceivers.
@@ -492,7 +445,6 @@ config IR_SERIAL_TRANSMITTER
 
 config IR_SIR
 	tristate "Built-in SIR IrDA port"
-	depends on RC_CORE
 	help
 	   Say Y if you want to use a IrDA SIR port Transceivers.
 
@@ -501,9 +453,7 @@ config IR_SIR
 
 config RC_XBOX_DVD
 	tristate "Xbox DVD Movie Playback Kit"
-	depends on RC_CORE
-	depends on USB_ARCH_HAS_HCD
-	select USB
+	depends on USB
 	help
 	   Say Y here if you want to use the Xbox DVD Movie Playback Kit.
 	   These are IR remotes with USB receivers for the Original Xbox (2001).
@@ -513,8 +463,7 @@ config RC_XBOX_DVD
 
 config IR_TOY
 	tristate "Infrared Toy and IR Droid"
-	depends on RC_CORE
-	depends on USB_ARCH_HAS_HCD
+	depends on USB
 	help
 	   Say Y here if you want to use the Infrared Toy or IR Droid, USB
 	   versions.
-- 
GitLab


From ca8519ddc4d7e27d941a50e310a0f6bcfafdc8a9 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Fri, 9 Apr 2021 11:51:44 +0200
Subject: [PATCH 1294/3804] media: siano: use DEFINE_MUTEX() for mutex lock and
 LIST_HEAD for list head

mutex lock can be initialized automatically with DEFINE_MUTEX()
rather than explicitly calling mutex_init().
list head can be initialized automatically with LIST_HEAD()
rather than explicitly calling INIT_LIST_HEAD().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Ye Bin <yebin10@huawei.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/common/siano/smsdvb-main.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/media/common/siano/smsdvb-main.c b/drivers/media/common/siano/smsdvb-main.c
index cd5bafe9a3aca..b8a163a47d09d 100644
--- a/drivers/media/common/siano/smsdvb-main.c
+++ b/drivers/media/common/siano/smsdvb-main.c
@@ -26,8 +26,8 @@ Copyright (C) 2006-2008, Uri Shkolnik
 
 DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
-static struct list_head g_smsdvb_clients;
-static struct mutex g_smsdvb_clientslock;
+static LIST_HEAD(g_smsdvb_clients);
+static DEFINE_MUTEX(g_smsdvb_clientslock);
 
 static u32 sms_to_guard_interval_table[] = {
 	[0] = GUARD_INTERVAL_1_32,
@@ -1236,9 +1236,6 @@ static int __init smsdvb_module_init(void)
 {
 	int rc;
 
-	INIT_LIST_HEAD(&g_smsdvb_clients);
-	mutex_init(&g_smsdvb_clientslock);
-
 	smsdvb_debugfs_register();
 
 	rc = smscore_register_hotplug(smsdvb_hotplug);
-- 
GitLab


From afccc0bbab594bf70f950eea19b60737e763b192 Mon Sep 17 00:00:00 2001
From: wengjianfeng <wengjianfeng@yulong.com>
Date: Thu, 8 Apr 2021 12:07:31 +0200
Subject: [PATCH 1295/3804] media: dvb-frontends: remove redundant words and
 fix several typos

change 'purpous' to 'purpose'.
change 'frequecy' to 'frequency'.
remove redundant words struct and enum.

Signed-off-by: wengjianfeng <wengjianfeng@yulong.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/dvb-frontends/drx39xyj/drxj.h | 35 +++++++++++----------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/media/dvb-frontends/drx39xyj/drxj.h b/drivers/media/dvb-frontends/drx39xyj/drxj.h
index d62412f71c887..232b3b0d68c81 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drxj.h
+++ b/drivers/media/dvb-frontends/drx39xyj/drxj.h
@@ -75,9 +75,9 @@ TYPEDEFS
 		u16 result_len;
 			/*< result length in byte */
 		u16 *parameter;
-			/*< General purpous param */
+			/*< General purpose param */
 		u16 *result;
-			/*< General purpous param */};
+			/*< General purpose param */};
 
 /*============================================================================*/
 /*============================================================================*/
@@ -131,7 +131,7 @@ TYPEDEFS
 		DRXJ_CFG_MAX	/* dummy, never to be used */};
 
 /*
-* /struct enum drxj_cfg_smart_ant_io * smart antenna i/o.
+* /enum drxj_cfg_smart_ant_io * smart antenna i/o.
 */
 enum drxj_cfg_smart_ant_io {
 	DRXJ_SMT_ANT_OUTPUT = 0,
@@ -139,7 +139,7 @@ enum drxj_cfg_smart_ant_io {
 };
 
 /*
-* /struct struct drxj_cfg_smart_ant * Set smart antenna.
+* /struct drxj_cfg_smart_ant * Set smart antenna.
 */
 	struct drxj_cfg_smart_ant {
 		enum drxj_cfg_smart_ant_io io;
@@ -159,7 +159,7 @@ struct drxj_agc_status {
 /* DRXJ_CFG_AGC_RF, DRXJ_CFG_AGC_IF */
 
 /*
-* /struct enum drxj_agc_ctrl_mode * Available AGCs modes in the DRXJ.
+* /enum drxj_agc_ctrl_mode * Available AGCs modes in the DRXJ.
 */
 	enum drxj_agc_ctrl_mode {
 		DRX_AGC_CTRL_AUTO = 0,
@@ -167,7 +167,7 @@ struct drxj_agc_status {
 		DRX_AGC_CTRL_OFF};
 
 /*
-* /struct struct drxj_cfg_agc * Generic interface for all AGCs present on the DRXJ.
+* /struct drxj_cfg_agc * Generic interface for all AGCs present on the DRXJ.
 */
 	struct drxj_cfg_agc {
 		enum drx_standard standard;	/* standard for which these settings apply */
@@ -183,7 +183,7 @@ struct drxj_agc_status {
 /* DRXJ_CFG_PRE_SAW */
 
 /*
-* /struct struct drxj_cfg_pre_saw * Interface to configure pre SAW sense.
+* /struct drxj_cfg_pre_saw * Interface to configure pre SAW sense.
 */
 	struct drxj_cfg_pre_saw {
 		enum drx_standard standard;	/* standard to which these settings apply */
@@ -193,7 +193,7 @@ struct drxj_agc_status {
 /* DRXJ_CFG_AFE_GAIN */
 
 /*
-* /struct struct drxj_cfg_afe_gain * Interface to configure gain of AFE (LNA + PGA).
+* /struct drxj_cfg_afe_gain * Interface to configure gain of AFE (LNA + PGA).
 */
 	struct drxj_cfg_afe_gain {
 		enum drx_standard standard;	/* standard to which these settings apply */
@@ -220,14 +220,14 @@ struct drxj_agc_status {
 	};
 
 /*
-* /struct struct drxj_cfg_vsb_misc * symbol error rate
+* /struct drxj_cfg_vsb_misc * symbol error rate
 */
 	struct drxj_cfg_vsb_misc {
 		u32 symb_error;
 			      /*< symbol error rate sps */};
 
 /*
-* /enum enum drxj_mpeg_output_clock_rate * Mpeg output clock rate.
+* /enum drxj_mpeg_output_clock_rate * Mpeg output clock rate.
 *
 */
 	enum drxj_mpeg_start_width {
@@ -235,7 +235,7 @@ struct drxj_agc_status {
 		DRXJ_MPEG_START_WIDTH_8CLKCYC};
 
 /*
-* /enum enum drxj_mpeg_output_clock_rate * Mpeg output clock rate.
+* /enum drxj_mpeg_output_clock_rate * Mpeg output clock rate.
 *
 */
 	enum drxj_mpeg_output_clock_rate {
@@ -261,7 +261,7 @@ struct drxj_agc_status {
 		enum drxj_mpeg_start_width mpeg_start_width;  /*< set MPEG output start width */};
 
 /*
-* /enum enum drxj_xtal_freq * Supported external crystal reference frequency.
+* /enum drxj_xtal_freq * Supported external crystal reference frequency.
 */
 	enum drxj_xtal_freq {
 		DRXJ_XTAL_FREQ_RSVD,
@@ -270,14 +270,15 @@ struct drxj_agc_status {
 		DRXJ_XTAL_FREQ_4MHZ};
 
 /*
-* /enum enum drxj_xtal_freq * Supported external crystal reference frequency.
+* /enum drxj_xtal_freq * Supported external crystal reference frequency.
 */
 	enum drxji2c_speed {
 		DRXJ_I2C_SPEED_400KBPS,
 		DRXJ_I2C_SPEED_100KBPS};
 
 /*
-* /struct struct drxj_cfg_hw_cfg * Get hw configuration, such as crystal reference frequency, I2C speed, etc...
+* /struct drxj_cfg_hw_cfg * Get hw configuration, such as crystal
+*  reference frequency, I2C speed, etc...
 */
 	struct drxj_cfg_hw_cfg {
 		enum drxj_xtal_freq xtal_freq;
@@ -364,7 +365,7 @@ struct drxj_cfg_oob_misc {
 		DRXJ_SIF_ATTENUATION_9DB};
 
 /*
-* /struct struct drxj_cfg_atv_output * SIF attenuation setting.
+* /struct drxj_cfg_atv_output * SIF attenuation setting.
 *
 */
 struct drxj_cfg_atv_output {
@@ -453,10 +454,10 @@ struct drxj_cfg_atv_output {
 		enum drxuio_mode uio_gpio_mode; /*< current mode of ASEL pin                         */
 		enum drxuio_mode uio_irqn_mode; /*< current mode of IRQN pin                         */
 
-		/* IQM fs frequecy shift and inversion */
+		/* IQM fs frequency shift and inversion */
 		u32 iqm_fs_rate_ofs;	   /*< frequency shifter setting after setchannel      */
 		bool pos_image;	   /*< True: positive image                            */
-		/* IQM RC frequecy shift */
+		/* IQM RC frequency shift */
 		u32 iqm_rc_rate_ofs;	   /*< frequency shifter setting after setchannel      */
 
 		/* ATV configuration */
-- 
GitLab


From 51cb8e206afd463e66f16869e5ddc95bef107142 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 21 May 2021 15:37:42 +0200
Subject: [PATCH 1296/3804] ALSA: usb-audio: fix control-request direction

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Fix the UAC2_CS_CUR request which erroneously used usb_sndctrlpipe().

Fixes: 93db51d06b32 ("ALSA: usb-audio: Check valid altsetting at parsing rates for UAC2/3")
Cc: stable@vger.kernel.org      # 5.10
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://lore.kernel.org/r/20210521133742.18098-1-johan@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/format.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/usb/format.c b/sound/usb/format.c
index e6ff317a67852..2287f8c653150 100644
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -436,7 +436,7 @@ static bool check_valid_altsetting_v2v3(struct snd_usb_audio *chip, int iface,
 	if (snd_BUG_ON(altsetting >= 64 - 8))
 		return false;
 
-	err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC2_CS_CUR,
+	err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR,
 			      USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
 			      UAC2_AS_VAL_ALT_SETTINGS << 8,
 			      iface, &raw_data, sizeof(raw_data));
-- 
GitLab


From 764fa6e686e0107c0357a988d193de04cf047583 Mon Sep 17 00:00:00 2001
From: "Geoffrey D. Bennett" <g@b4.vu>
Date: Fri, 21 May 2021 17:50:12 +0930
Subject: [PATCH 1297/3804] ALSA: usb-audio: scarlett2: Fix device hang with
 ehci-pci

Use usb_rcvctrlpipe() not usb_sndctrlpipe() for USB control input in
the Scarlett Gen 2 mixer driver. This fixes the device hang during
initialisation when used with the ehci-pci host driver.

Fixes: 9e4d5c1be21f ("ALSA: usb-audio: Scarlett Gen 2 mixer interface")
Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/66a3d05dac325d5b53e4930578e143cef1f50dbe.1621584566.git.g@b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer_scarlett_gen2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c
index 560c2ade829d0..dcff3e3a49f36 100644
--- a/sound/usb/mixer_scarlett_gen2.c
+++ b/sound/usb/mixer_scarlett_gen2.c
@@ -635,7 +635,7 @@ static int scarlett2_usb(
 	/* send a second message to get the response */
 
 	err = snd_usb_ctl_msg(mixer->chip->dev,
-			usb_sndctrlpipe(mixer->chip->dev, 0),
+			usb_rcvctrlpipe(mixer->chip->dev, 0),
 			SCARLETT2_USB_VENDOR_SPECIFIC_CMD_RESP,
 			USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
 			0,
-- 
GitLab


From 265d1a90e4fb6d3264d8122fbd10760e5e733be6 Mon Sep 17 00:00:00 2001
From: "Geoffrey D. Bennett" <g@b4.vu>
Date: Fri, 21 May 2021 17:50:13 +0930
Subject: [PATCH 1298/3804] ALSA: usb-audio: scarlett2: Improve driver startup
 messages

Add separate init function to call the existing controls_create
function so a custom error can be displayed if initialisation fails.

Use info level instead of error for notifications.

Display the VID/PID so device_setup is targeted to the right device.

Display "enabled" message to easily confirm that the driver is loaded.

Signed-off-by: Geoffrey D. Bennett <g@b4.vu>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/b5d140c65f640faf2427e085fbbc0297b32e5fce.1621584566.git.g@b4.vu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer_quirks.c        |  2 +-
 sound/usb/mixer_scarlett_gen2.c | 79 +++++++++++++++++++++------------
 sound/usb/mixer_scarlett_gen2.h |  2 +-
 3 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c
index fda66b2dbb017..37ad77524c0b3 100644
--- a/sound/usb/mixer_quirks.c
+++ b/sound/usb/mixer_quirks.c
@@ -3060,7 +3060,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer)
 	case USB_ID(0x1235, 0x8203): /* Focusrite Scarlett 6i6 2nd Gen */
 	case USB_ID(0x1235, 0x8204): /* Focusrite Scarlett 18i8 2nd Gen */
 	case USB_ID(0x1235, 0x8201): /* Focusrite Scarlett 18i20 2nd Gen */
-		err = snd_scarlett_gen2_controls_create(mixer);
+		err = snd_scarlett_gen2_init(mixer);
 		break;
 
 	case USB_ID(0x041e, 0x323b): /* Creative Sound Blaster E1 */
diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c
index dcff3e3a49f36..3ad8f61a2095f 100644
--- a/sound/usb/mixer_scarlett_gen2.c
+++ b/sound/usb/mixer_scarlett_gen2.c
@@ -1997,38 +1997,11 @@ static int scarlett2_mixer_status_create(struct usb_mixer_interface *mixer)
 	return usb_submit_urb(mixer->urb, GFP_KERNEL);
 }
 
-/* Entry point */
-int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer)
+int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer,
+				      const struct scarlett2_device_info *info)
 {
-	const struct scarlett2_device_info *info;
 	int err;
 
-	/* only use UAC_VERSION_2 */
-	if (!mixer->protocol)
-		return 0;
-
-	switch (mixer->chip->usb_id) {
-	case USB_ID(0x1235, 0x8203):
-		info = &s6i6_gen2_info;
-		break;
-	case USB_ID(0x1235, 0x8204):
-		info = &s18i8_gen2_info;
-		break;
-	case USB_ID(0x1235, 0x8201):
-		info = &s18i20_gen2_info;
-		break;
-	default: /* device not (yet) supported */
-		return -EINVAL;
-	}
-
-	if (!(mixer->chip->setup & SCARLETT2_ENABLE)) {
-		usb_audio_err(mixer->chip,
-			"Focusrite Scarlett Gen 2 Mixer Driver disabled; "
-			"use options snd_usb_audio device_setup=1 "
-			"to enable and report any issues to g@b4.vu");
-		return 0;
-	}
-
 	/* Initialise private data, routing, sequence number */
 	err = scarlett2_init_private(mixer, info);
 	if (err < 0)
@@ -2073,3 +2046,51 @@ int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer)
 
 	return 0;
 }
+
+int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer)
+{
+	struct snd_usb_audio *chip = mixer->chip;
+	const struct scarlett2_device_info *info;
+	int err;
+
+	/* only use UAC_VERSION_2 */
+	if (!mixer->protocol)
+		return 0;
+
+	switch (chip->usb_id) {
+	case USB_ID(0x1235, 0x8203):
+		info = &s6i6_gen2_info;
+		break;
+	case USB_ID(0x1235, 0x8204):
+		info = &s18i8_gen2_info;
+		break;
+	case USB_ID(0x1235, 0x8201):
+		info = &s18i20_gen2_info;
+		break;
+	default: /* device not (yet) supported */
+		return -EINVAL;
+	}
+
+	if (!(chip->setup & SCARLETT2_ENABLE)) {
+		usb_audio_info(chip,
+			"Focusrite Scarlett Gen 2 Mixer Driver disabled; "
+			"use options snd_usb_audio vid=0x%04x pid=0x%04x "
+			"device_setup=1 to enable and report any issues "
+			"to g@b4.vu",
+			USB_ID_VENDOR(chip->usb_id),
+			USB_ID_PRODUCT(chip->usb_id));
+		return 0;
+	}
+
+	usb_audio_info(chip,
+		"Focusrite Scarlett Gen 2 Mixer Driver enabled pid=0x%04x",
+		USB_ID_PRODUCT(chip->usb_id));
+
+	err = snd_scarlett_gen2_controls_create(mixer, info);
+	if (err < 0)
+		usb_audio_err(mixer->chip,
+			      "Error initialising Scarlett Mixer Driver: %d",
+			      err);
+
+	return err;
+}
diff --git a/sound/usb/mixer_scarlett_gen2.h b/sound/usb/mixer_scarlett_gen2.h
index 52e1dad77afd4..668c6b0cb50a6 100644
--- a/sound/usb/mixer_scarlett_gen2.h
+++ b/sound/usb/mixer_scarlett_gen2.h
@@ -2,6 +2,6 @@
 #ifndef __USB_MIXER_SCARLETT_GEN2_H
 #define __USB_MIXER_SCARLETT_GEN2_H
 
-int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer);
+int snd_scarlett_gen2_init(struct usb_mixer_interface *mixer);
 
 #endif /* __USB_MIXER_SCARLETT_GEN2_H */
-- 
GitLab


From c3d175e4852bfdfd1e4021dff8715fc407dedd98 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 12 May 2021 16:15:48 +0200
Subject: [PATCH 1299/3804] cpufreq: intel_pstate: hybrid: Avoid exposing two
 global attributes

The turbo_pct and num_pstates sysfs attributes represent CPU
properties that may be different for differenty types of CPUs in
a hybrid processor, so avoid exposing them in that case.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/intel_pstate.rst |  6 ++++++
 drivers/cpufreq/intel_pstate.c                | 15 +++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index df29b4f1f2195..235f1025a7e60 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -365,6 +365,9 @@ argument is passed to the kernel in the command line.
 	inclusive) including both turbo and non-turbo P-states (see
 	`Turbo P-states Support`_).
 
+	This attribute is present only if the value exposed by it is the same
+	for all of the CPUs in the system.
+
 	The value of this attribute is not affected by the ``no_turbo``
 	setting described `below <no_turbo_attr_>`_.
 
@@ -374,6 +377,9 @@ argument is passed to the kernel in the command line.
 	Ratio of the `turbo range <turbo_>`_ size to the size of the entire
 	range of supported P-states, in percent.
 
+	This attribute is present only if the value exposed by it is the same
+	for all of the CPUs in the system.
+
 	This attribute is read-only.
 
 .. _no_turbo_attr:
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 0e69dffd5a767..45f59e2827fe9 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1365,8 +1365,6 @@ define_one_global_rw(energy_efficiency);
 static struct attribute *intel_pstate_attributes[] = {
 	&status.attr,
 	&no_turbo.attr,
-	&turbo_pct.attr,
-	&num_pstates.attr,
 	NULL
 };
 
@@ -1391,6 +1389,14 @@ static void __init intel_pstate_sysfs_expose_params(void)
 	if (WARN_ON(rc))
 		return;
 
+	if (!boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
+		rc = sysfs_create_file(intel_pstate_kobject, &turbo_pct.attr);
+		WARN_ON(rc);
+
+		rc = sysfs_create_file(intel_pstate_kobject, &num_pstates.attr);
+		WARN_ON(rc);
+	}
+
 	/*
 	 * If per cpu limits are enforced there are no global limits, so
 	 * return without creating max/min_perf_pct attributes
@@ -1417,6 +1423,11 @@ static void __init intel_pstate_sysfs_remove(void)
 
 	sysfs_remove_group(intel_pstate_kobject, &intel_pstate_attr_group);
 
+	if (!boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
+		sysfs_remove_file(intel_pstate_kobject, &num_pstates.attr);
+		sysfs_remove_file(intel_pstate_kobject, &turbo_pct.attr);
+	}
+
 	if (!per_cpu_limits) {
 		sysfs_remove_file(intel_pstate_kobject, &max_perf_pct.attr);
 		sysfs_remove_file(intel_pstate_kobject, &min_perf_pct.attr);
-- 
GitLab


From eb3693f0521e020dd8617c7fa3ddf5c9f0d8dea0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 12 May 2021 16:19:30 +0200
Subject: [PATCH 1300/3804] cpufreq: intel_pstate: hybrid: CPU-specific scaling
 factor

The scaling factor between HWP performance levels and CPU frequency
may be different for different types of CPUs in a hybrid processor
and in general the HWP performance levels need not correspond to
"P-states" representing values that would be written to
MSR_IA32_PERF_CTL if HWP was disabled.

However, the policy limits control in cpufreq is defined in terms
of CPU frequency, so it is necessary to map the frequency limits set
through that interface to HWP performance levels with reasonable
accuracy and the behavior of that interface on hybrid processors
has to be compatible with its behavior on non-hybrid ones.

To address this problem, use the observations that (1) on hybrid
processors the sysfs interface can operate by mapping frequency
to "P-states" and translating those "P-states" to specific HWP
performance levels of the given CPU and (2) the scaling factor
between the MSR_IA32_PERF_CTL "P-states" and CPU frequency can be
regarded as a known value.  Moreover, the mapping between the
HWP performance levels and CPU frequency can be assumed to be
linear and such that HWP performance level 0 correspond to the
frequency value of 0, so it is only necessary to know the
frequency corresponding to one specific HWP performance level
to compute the scaling factor applicable to all of them.

One possibility is to take the nominal performance value from CPPC,
if available, and use cpu_khz as the corresponding frequency.  If
the CPPC capabilities interface is not there or the nominal
performance value provided by it is out of range, though, something
else needs to be done.

Namely, the guaranteed performance level either from CPPC or from
MSR_HWP_CAPABILITIES can be used instead, but the corresponding
frequency needs to be determined.  That can be done by computing the
product of the (known) scaling factor between the MSR_IA32_PERF_CTL
P-states and CPU frequency (the PERF_CTL scaling factor) and the
P-state value referred to as the "TDP ratio".

If the HWP-to-frequency scaling factor value obtained in one of the
ways above turns out to be euqal to the PERF_CTL scaling factor, it
can be assumed that the number of HWP performance levels is equal to
the number of P-states and the given CPU can be handled as though
this was not a hybrid processor.

Otherwise, one more adjustment may still need to be made, because the
HWP-to-frequency scaling factor computed so far may not be accurate
enough (e.g. because the CPPC information does not match the exact
behavior of the processor).  Specifically, in that case the frequency
corresponding to the highest HWP performance value from
MSR_HWP_CAPABILITIES (computed as the product of that value and the
HWP-to-frequency scaling factor) cannot exceed the frequency that
corresponds to the maximum 1-core turbo P-state value from
MSR_TURBO_RATIO_LIMIT (computed as the procuct of that value and the
PERF_CTL scaling factor) and the HWP-to-frequency scaling factor may
need to be adjusted accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 233 +++++++++++++++++++++++++++++----
 1 file changed, 210 insertions(+), 23 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 45f59e2827fe9..b0afb8629767b 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -121,9 +121,10 @@ struct sample {
  * @max_pstate_physical:This is physical Max P state for a processor
  *			This can be higher than the max_pstate which can
  *			be limited by platform thermal design power limits
- * @scaling:		Scaling factor to  convert frequency to cpufreq
- *			frequency units
+ * @perf_ctl_scaling:	PERF_CTL P-state to frequency scaling factor
+ * @scaling:		Scaling factor between performance and frequency
  * @turbo_pstate:	Max Turbo P state possible for this platform
+ * @min_freq:		@min_pstate frequency in cpufreq units
  * @max_freq:		@max_pstate frequency in cpufreq units
  * @turbo_freq:		@turbo_pstate frequency in cpufreq units
  *
@@ -134,8 +135,10 @@ struct pstate_data {
 	int	min_pstate;
 	int	max_pstate;
 	int	max_pstate_physical;
+	int	perf_ctl_scaling;
 	int	scaling;
 	int	turbo_pstate;
+	unsigned int min_freq;
 	unsigned int max_freq;
 	unsigned int turbo_freq;
 };
@@ -489,6 +492,149 @@ static int intel_pstate_get_cppc_guranteed(int cpu)
 }
 #endif /* CONFIG_ACPI_CPPC_LIB */
 
+static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
+{
+	return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
+}
+
+static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
+					struct cppc_perf_caps *caps)
+{
+	if (cppc_get_perf_caps(cpu->cpu, caps))
+		return false;
+
+	return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
+}
+
+static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
+{
+	pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu);
+
+	cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling;
+}
+
+/**
+ * intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels.
+ * @cpu: Target CPU.
+ *
+ * On hybrid processors, HWP may expose more performance levels than there are
+ * P-states accessible through the PERF_CTL interface.  If that happens, the
+ * scaling factor between HWP performance levels and CPU frequency will be less
+ * than the scaling factor between P-state values and CPU frequency.
+ *
+ * In that case, the scaling factor between HWP performance levels and CPU
+ * frequency needs to be determined which can be done with the help of the
+ * observation that certain HWP performance levels should correspond to certain
+ * P-states, like for example the HWP highest performance should correspond
+ * to the maximum turbo P-state of the CPU.
+ */
+static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
+{
+	struct cppc_perf_caps caps;
+	int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
+	int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
+	int perf_ctl_turbo = pstate_funcs.get_turbo();
+	int turbo_freq = perf_ctl_turbo * perf_ctl_scaling;
+	int perf_ctl_max = pstate_funcs.get_max();
+	int max_freq = perf_ctl_max * perf_ctl_scaling;
+	int scaling = INT_MAX;
+	int freq;
+
+	pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
+	pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max);
+	pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
+	pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
+
+	pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
+	pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
+
+	if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
+		if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
+			pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
+
+			/*
+			 * If the CPPC nominal performance is valid, it can be
+			 * assumed to correspond to cpu_khz.
+			 */
+			if (caps.nominal_perf == perf_ctl_max_phys) {
+				intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+				return;
+			}
+			scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
+		} else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
+			pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
+
+			/*
+			 * If the CPPC guaranteed performance is valid, it can
+			 * be assumed to correspond to max_freq.
+			 */
+			if (caps.guaranteed_perf == perf_ctl_max) {
+				intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+				return;
+			}
+			scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
+		}
+	}
+	/*
+	 * If using the CPPC data to compute the HWP-to-frequency scaling factor
+	 * doesn't work, use the HWP_CAP gauranteed perf for this purpose with
+	 * the assumption that it corresponds to max_freq.
+	 */
+	if (scaling > perf_ctl_scaling) {
+		pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu);
+
+		if (cpu->pstate.max_pstate == perf_ctl_max) {
+			intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+			return;
+		}
+		scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate);
+		if (scaling > perf_ctl_scaling) {
+			/*
+			 * This should not happen, because it would mean that
+			 * the number of HWP perf levels was less than the
+			 * number of P-states, so use the PERF_CTL scaling in
+			 * that case.
+			 */
+			pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu,
+				scaling);
+
+			intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+			return;
+		}
+	}
+
+	/*
+	 * If the product of the HWP performance scaling factor obtained above
+	 * and the HWP_CAP highest performance is greater than the maximum turbo
+	 * frequency corresponding to the pstate_funcs.get_turbo() return value,
+	 * the scaling factor is too high, so recompute it so that the HWP_CAP
+	 * highest performance corresponds to the maximum turbo frequency.
+	 */
+	if (turbo_freq < cpu->pstate.turbo_pstate * scaling) {
+		pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling);
+
+		cpu->pstate.turbo_freq = turbo_freq;
+		scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate);
+	}
+
+	cpu->pstate.scaling = scaling;
+
+	pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
+
+	cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
+					 perf_ctl_scaling);
+
+	freq = perf_ctl_max_phys * perf_ctl_scaling;
+	cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling);
+
+	cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
+	/*
+	 * Cast the min P-state value retrieved via pstate_funcs.get_min() to
+	 * the effective range of HWP performance levels.
+	 */
+	cpu->pstate.min_pstate = DIV_ROUND_UP(cpu->pstate.min_freq, scaling);
+}
+
 static inline void update_turbo_state(void)
 {
 	u64 misc_en;
@@ -795,19 +941,22 @@ cpufreq_freq_attr_rw(energy_performance_preference);
 
 static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
 {
-	struct cpudata *cpu;
-	u64 cap;
-	int ratio;
+	struct cpudata *cpu = all_cpu_data[policy->cpu];
+	int ratio, freq;
 
 	ratio = intel_pstate_get_cppc_guranteed(policy->cpu);
 	if (ratio <= 0) {
+		u64 cap;
+
 		rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
 		ratio = HWP_GUARANTEED_PERF(cap);
 	}
 
-	cpu = all_cpu_data[policy->cpu];
+	freq = ratio * cpu->pstate.scaling;
+	if (cpu->pstate.scaling != cpu->pstate.perf_ctl_scaling)
+		freq = rounddown(freq, cpu->pstate.perf_ctl_scaling);
 
-	return sprintf(buf, "%d\n", ratio * cpu->pstate.scaling);
+	return sprintf(buf, "%d\n", freq);
 }
 
 cpufreq_freq_attr_ro(base_frequency);
@@ -831,9 +980,20 @@ static void __intel_pstate_get_hwp_cap(struct cpudata *cpu)
 
 static void intel_pstate_get_hwp_cap(struct cpudata *cpu)
 {
+	int scaling = cpu->pstate.scaling;
+
 	__intel_pstate_get_hwp_cap(cpu);
-	cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
-	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+
+	cpu->pstate.max_freq = cpu->pstate.max_pstate * scaling;
+	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * scaling;
+	if (scaling != cpu->pstate.perf_ctl_scaling) {
+		int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
+
+		cpu->pstate.max_freq = rounddown(cpu->pstate.max_freq,
+						 perf_ctl_scaling);
+		cpu->pstate.turbo_freq = rounddown(cpu->pstate.turbo_freq,
+						   perf_ctl_scaling);
+	}
 }
 
 static void intel_pstate_hwp_set(unsigned int cpu)
@@ -1724,19 +1884,33 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu)
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 {
+	bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU);
+	int perf_ctl_max_phys = pstate_funcs.get_max_physical();
+	int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys :
+					    pstate_funcs.get_scaling();
+
 	cpu->pstate.min_pstate = pstate_funcs.get_min();
-	cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
-	cpu->pstate.scaling = pstate_funcs.get_scaling();
+	cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
+	cpu->pstate.perf_ctl_scaling = perf_ctl_scaling;
 
 	if (hwp_active && !hwp_mode_bdw) {
 		__intel_pstate_get_hwp_cap(cpu);
+
+		if (hybrid_cpu)
+			intel_pstate_hybrid_hwp_calibrate(cpu);
+		else
+			cpu->pstate.scaling = perf_ctl_scaling;
 	} else {
+		cpu->pstate.scaling = perf_ctl_scaling;
 		cpu->pstate.max_pstate = pstate_funcs.get_max();
 		cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
 	}
 
-	cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
-	cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+	if (cpu->pstate.scaling == perf_ctl_scaling) {
+		cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
+		cpu->pstate.max_freq = cpu->pstate.max_pstate * perf_ctl_scaling;
+		cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * perf_ctl_scaling;
+	}
 
 	if (pstate_funcs.get_aperf_mperf_shift)
 		cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
@@ -2206,23 +2380,34 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu,
 					    unsigned int policy_min,
 					    unsigned int policy_max)
 {
-	int scaling = cpu->pstate.scaling;
+	int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
 	int32_t max_policy_perf, min_policy_perf;
 
+	max_policy_perf = policy_max / perf_ctl_scaling;
+	if (policy_max == policy_min) {
+		min_policy_perf = max_policy_perf;
+	} else {
+		min_policy_perf = policy_min / perf_ctl_scaling;
+		min_policy_perf = clamp_t(int32_t, min_policy_perf,
+					  0, max_policy_perf);
+	}
+
 	/*
 	 * HWP needs some special consideration, because HWP_REQUEST uses
 	 * abstract values to represent performance rather than pure ratios.
 	 */
-	if (hwp_active)
+	if (hwp_active) {
 		intel_pstate_get_hwp_cap(cpu);
 
-	max_policy_perf = policy_max / scaling;
-	if (policy_max == policy_min) {
-		min_policy_perf = max_policy_perf;
-	} else {
-		min_policy_perf = policy_min / scaling;
-		min_policy_perf = clamp_t(int32_t, min_policy_perf,
-					  0, max_policy_perf);
+		if (cpu->pstate.scaling != perf_ctl_scaling) {
+			int scaling = cpu->pstate.scaling;
+			int freq;
+
+			freq = max_policy_perf * perf_ctl_scaling;
+			max_policy_perf = DIV_ROUND_UP(freq, scaling);
+			freq = min_policy_perf * perf_ctl_scaling;
+			min_policy_perf = DIV_ROUND_UP(freq, scaling);
+		}
 	}
 
 	pr_debug("cpu:%d min_policy_perf:%d max_policy_perf:%d\n",
@@ -2416,7 +2601,7 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
 	cpu->min_perf_ratio = 0;
 
 	/* cpuinfo and default policy values */
-	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
+	policy->cpuinfo.min_freq = cpu->pstate.min_freq;
 	update_turbo_state();
 	global.turbo_disabled_mf = global.turbo_disabled;
 	policy->cpuinfo.max_freq = global.turbo_disabled ?
@@ -3146,6 +3331,8 @@ hwp_cpu_matched:
 		}
 
 		pr_info("HWP enabled\n");
+	} else if (boot_cpu_has(X86_FEATURE_HYBRID_CPU)) {
+		pr_warn("Problematic setup: Hybrid processor with disabled HWP\n");
 	}
 
 	return 0;
-- 
GitLab


From 9eea2904292c2d8fa98df141d3bf7c41ec9dc1b5 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:42 +0200
Subject: [PATCH 1301/3804] evm: Execute evm_inode_init_security() only when an
 HMAC key is loaded

evm_inode_init_security() requires an HMAC key to calculate the HMAC on
initial xattrs provided by LSMs. However, it checks generically whether a
key has been loaded, including also public keys, which is not correct as
public keys are not suitable to calculate the HMAC.

Originally, support for signature verification was introduced to verify a
possibly immutable initial ram disk, when no new files are created, and to
switch to HMAC for the root filesystem. By that time, an HMAC key should
have been loaded and usable to calculate HMACs for new files.

More recently support for requiring an HMAC key was removed from the
kernel, so that signature verification can be used alone. Since this is a
legitimate use case, evm_inode_init_security() should not return an error
when no HMAC key has been loaded.

This patch fixes this problem by replacing the evm_key_loaded() check with
a check of the EVM_INIT_HMAC flag in evm_initialized.

Fixes: 26ddabfe96b ("evm: enable EVM when X509 certificate is loaded")
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Cc: stable@vger.kernel.org # 4.5.x
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/evm/evm_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 0de367aaa2d31..7ac5204c8d1f2 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -521,7 +521,7 @@ void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
 }
 
 /*
- * evm_inode_init_security - initializes security.evm
+ * evm_inode_init_security - initializes security.evm HMAC value
  */
 int evm_inode_init_security(struct inode *inode,
 				 const struct xattr *lsm_xattr,
@@ -530,7 +530,8 @@ int evm_inode_init_security(struct inode *inode,
 	struct evm_xattr *xattr_data;
 	int rc;
 
-	if (!evm_key_loaded() || !evm_protected_xattr(lsm_xattr->name))
+	if (!(evm_initialized & EVM_INIT_HMAC) ||
+	    !evm_protected_xattr(lsm_xattr->name))
 		return 0;
 
 	xattr_data = kzalloc(sizeof(*xattr_data), GFP_NOFS);
-- 
GitLab


From aa2ead71d9daa1b6645e1d25b1f14a6286b114d0 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:43 +0200
Subject: [PATCH 1302/3804] evm: Load EVM key in ima_load_x509() to avoid
 appraisal

The public builtin keys do not need to be appraised by IMA as the
restriction on the IMA/EVM trusted keyrings ensures that a key can be
loaded only if it is signed with a key on the builtin or secondary
keyrings.

However, when evm_load_x509() is called, appraisal is already enabled and
a valid IMA signature must be added to the EVM key to pass verification.

Since the restriction is applied on both IMA and EVM trusted keyrings, it
is safe to disable appraisal also when the EVM key is loaded. This patch
calls evm_load_x509() inside ima_load_x509() if CONFIG_IMA_LOAD_X509 is
enabled, which crosses the normal IMA and EVM boundary.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/iint.c         | 4 +++-
 security/integrity/ima/ima_init.c | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/security/integrity/iint.c b/security/integrity/iint.c
index fca8a9409e4ae..8638976f7990b 100644
--- a/security/integrity/iint.c
+++ b/security/integrity/iint.c
@@ -208,7 +208,9 @@ int integrity_kernel_read(struct file *file, loff_t offset,
 void __init integrity_load_keys(void)
 {
 	ima_load_x509();
-	evm_load_x509();
+
+	if (!IS_ENABLED(CONFIG_IMA_LOAD_X509))
+		evm_load_x509();
 }
 
 static int __init integrity_fs_init(void)
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 6e8742916d1de..5076a7d9d23ef 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -108,6 +108,10 @@ void __init ima_load_x509(void)
 
 	ima_policy_flag &= ~unset_flags;
 	integrity_load_x509(INTEGRITY_KEYRING_IMA, CONFIG_IMA_X509_PATH);
+
+	/* load also EVM key to avoid appraisal */
+	evm_load_x509();
+
 	ima_policy_flag |= unset_flags;
 }
 #endif
-- 
GitLab


From 9acc89d31f0c94c8e573ed61f3e4340bbd526d0c Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:44 +0200
Subject: [PATCH 1303/3804] evm: Refuse EVM_ALLOW_METADATA_WRITES only if an
 HMAC key is loaded

EVM_ALLOW_METADATA_WRITES is an EVM initialization flag that can be set to
temporarily disable metadata verification until all xattrs/attrs necessary
to verify an EVM portable signature are copied to the file. This flag is
cleared when EVM is initialized with an HMAC key, to avoid that the HMAC is
calculated on unverified xattrs/attrs.

Currently EVM unnecessarily denies setting this flag if EVM is initialized
with a public key, which is not a concern as it cannot be used to trust
xattrs/attrs updates. This patch removes this limitation.

Fixes: ae1ba1676b88e ("EVM: Allow userland to permit modification of EVM-protected metadata")
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Cc: stable@vger.kernel.org # 4.16.x
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/ABI/testing/evm      | 26 ++++++++++++++++++++++++--
 security/integrity/evm/evm_secfs.c |  8 ++++----
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/Documentation/ABI/testing/evm b/Documentation/ABI/testing/evm
index 3c477ba48a312..2243b72e41107 100644
--- a/Documentation/ABI/testing/evm
+++ b/Documentation/ABI/testing/evm
@@ -49,8 +49,30 @@ Description:
 		modification of EVM-protected metadata and
 		disable all further modification of policy
 
-		Note that once a key has been loaded, it will no longer be
-		possible to enable metadata modification.
+		Echoing a value is additive, the new value is added to the
+		existing initialization flags.
+
+		For example, after::
+
+		  echo 2 ><securityfs>/evm
+
+		another echo can be performed::
+
+		  echo 1 ><securityfs>/evm
+
+		and the resulting value will be 3.
+
+		Note that once an HMAC key has been loaded, it will no longer
+		be possible to enable metadata modification. Signaling that an
+		HMAC key has been loaded will clear the corresponding flag.
+		For example, if the current value is 6 (2 and 4 set)::
+
+		  echo 1 ><securityfs>/evm
+
+		will set the new value to 3 (4 cleared).
+
+		Loading an HMAC key is the only way to disable metadata
+		modification.
 
 		Until key loading has been signaled EVM can not create
 		or validate the 'security.evm' xattr, but returns
diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index 0007d3362754d..5f0da41bccd07 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -81,12 +81,12 @@ static ssize_t evm_write_key(struct file *file, const char __user *buf,
 	if (!i || (i & ~EVM_INIT_MASK) != 0)
 		return -EINVAL;
 
-	/* Don't allow a request to freshly enable metadata writes if
-	 * keys are loaded.
+	/*
+	 * Don't allow a request to enable metadata writes if
+	 * an HMAC key is loaded.
 	 */
 	if ((i & EVM_ALLOW_METADATA_WRITES) &&
-	    ((evm_initialized & EVM_KEY_MASK) != 0) &&
-	    !(evm_initialized & EVM_ALLOW_METADATA_WRITES))
+	    (evm_initialized & EVM_INIT_HMAC) != 0)
 		return -EPERM;
 
 	if (i & EVM_INIT_HMAC) {
-- 
GitLab


From e3ccfe1ad7d895487977ef64eda3441d16c9851a Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:45 +0200
Subject: [PATCH 1304/3804] evm: Introduce evm_revalidate_status()

When EVM_ALLOW_METADATA_WRITES is set, EVM allows any operation on
metadata. Its main purpose is to allow users to freely set metadata when it
is protected by a portable signature, until an HMAC key is loaded.

However, callers of evm_verifyxattr() are not notified about metadata
changes and continue to rely on the last status returned by the function.
For example IMA, since it caches the appraisal result, will not call again
evm_verifyxattr() until the appraisal flags are cleared, and will grant
access to the file even if there was a metadata operation that made the
portable signature invalid.

This patch introduces evm_revalidate_status(), which callers of
evm_verifyxattr() can use in their xattr hooks to determine whether
re-validation is necessary and to do the proper actions. IMA calls it in
its xattr hooks to reset the appraisal flags, so that the EVM status is
re-evaluated after a metadata operation.

Lastly, this patch also adds a call to evm_reset_status() in
evm_inode_post_setattr() to invalidate the cached EVM status after a
setattr operation.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/evm.h                   |  6 ++++
 security/integrity/evm/evm_main.c     | 40 ++++++++++++++++++++++++---
 security/integrity/ima/ima_appraise.c | 15 ++++++----
 3 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/include/linux/evm.h b/include/linux/evm.h
index 8302bc29bb358..39bb17a8236b1 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -35,6 +35,7 @@ extern void evm_inode_post_removexattr(struct dentry *dentry,
 extern int evm_inode_init_security(struct inode *inode,
 				   const struct xattr *xattr_array,
 				   struct xattr *evm);
+extern bool evm_revalidate_status(const char *xattr_name);
 #ifdef CONFIG_FS_POSIX_ACL
 extern int posix_xattr_acl(const char *xattrname);
 #else
@@ -104,5 +105,10 @@ static inline int evm_inode_init_security(struct inode *inode,
 	return 0;
 }
 
+static inline bool evm_revalidate_status(const char *xattr_name)
+{
+	return false;
+}
+
 #endif /* CONFIG_EVM */
 #endif /* LINUX_EVM_H */
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 7ac5204c8d1f2..782915117175f 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -425,6 +425,31 @@ static void evm_reset_status(struct inode *inode)
 		iint->evm_status = INTEGRITY_UNKNOWN;
 }
 
+/**
+ * evm_revalidate_status - report whether EVM status re-validation is necessary
+ * @xattr_name: pointer to the affected extended attribute name
+ *
+ * Report whether callers of evm_verifyxattr() should re-validate the
+ * EVM status.
+ *
+ * Return true if re-validation is necessary, false otherwise.
+ */
+bool evm_revalidate_status(const char *xattr_name)
+{
+	if (!evm_key_loaded())
+		return false;
+
+	/* evm_inode_post_setattr() passes NULL */
+	if (!xattr_name)
+		return true;
+
+	if (!evm_protected_xattr(xattr_name) && !posix_xattr_acl(xattr_name) &&
+	    strcmp(xattr_name, XATTR_NAME_EVM))
+		return false;
+
+	return true;
+}
+
 /**
  * evm_inode_post_setxattr - update 'security.evm' to reflect the changes
  * @dentry: pointer to the affected dentry
@@ -441,12 +466,14 @@ static void evm_reset_status(struct inode *inode)
 void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name,
 			     const void *xattr_value, size_t xattr_value_len)
 {
-	if (!evm_key_loaded() || (!evm_protected_xattr(xattr_name)
-				  && !posix_xattr_acl(xattr_name)))
+	if (!evm_revalidate_status(xattr_name))
 		return;
 
 	evm_reset_status(dentry->d_inode);
 
+	if (!strcmp(xattr_name, XATTR_NAME_EVM))
+		return;
+
 	evm_update_evmxattr(dentry, xattr_name, xattr_value, xattr_value_len);
 }
 
@@ -462,11 +489,14 @@ void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name,
  */
 void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
 {
-	if (!evm_key_loaded() || !evm_protected_xattr(xattr_name))
+	if (!evm_revalidate_status(xattr_name))
 		return;
 
 	evm_reset_status(dentry->d_inode);
 
+	if (!strcmp(xattr_name, XATTR_NAME_EVM))
+		return;
+
 	evm_update_evmxattr(dentry, xattr_name, NULL, 0);
 }
 
@@ -513,9 +543,11 @@ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
  */
 void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
 {
-	if (!evm_key_loaded())
+	if (!evm_revalidate_status(NULL))
 		return;
 
+	evm_reset_status(dentry->d_inode);
+
 	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
 		evm_update_evmxattr(dentry, NULL, NULL, 0);
 }
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 4e5eb0236278a..03894769dffa5 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -570,6 +570,7 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 		       const void *xattr_value, size_t xattr_value_len)
 {
 	const struct evm_ima_xattr_data *xvalue = xattr_value;
+	int digsig = 0;
 	int result;
 
 	result = ima_protect_xattr(dentry, xattr_name, xattr_value,
@@ -577,9 +578,12 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 	if (result == 1) {
 		if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
 			return -EINVAL;
-		ima_reset_appraise_flags(d_backing_inode(dentry),
-			xvalue->type == EVM_IMA_XATTR_DIGSIG);
-		result = 0;
+		digsig = (xvalue->type == EVM_IMA_XATTR_DIGSIG);
+	}
+	if (result == 1 || evm_revalidate_status(xattr_name)) {
+		ima_reset_appraise_flags(d_backing_inode(dentry), digsig);
+		if (result == 1)
+			result = 0;
 	}
 	return result;
 }
@@ -589,9 +593,10 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name)
 	int result;
 
 	result = ima_protect_xattr(dentry, xattr_name, NULL, 0);
-	if (result == 1) {
+	if (result == 1 || evm_revalidate_status(xattr_name)) {
 		ima_reset_appraise_flags(d_backing_inode(dentry), 0);
-		result = 0;
+		if (result == 1)
+			result = 0;
 	}
 	return result;
 }
-- 
GitLab


From fbdc21e9b038d00d0d56fa4e0f7701d42ae08f00 Mon Sep 17 00:00:00 2001
From: Giovanni Gherdovich <ggherdovich@suse.cz>
Date: Tue, 18 May 2021 14:34:12 +0200
Subject: [PATCH 1305/3804] cpufreq: intel_pstate: Add Icelake servers support
 in no-HWP mode

Users may disable HWP in firmware, in which case intel_pstate wouldn't load
unless the CPU model is explicitly supported.

Add ICELAKE_X to the list of CPUs that can register intel_pstate while not
advertising the HWP capability. Without this change, an ICELAKE_X in no-HWP
mode could only use the acpi_cpufreq frequency scaling driver.

See also commit d8de7a44e11f ("cpufreq: intel_pstate: Add Skylake servers
support").

Signed-off-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b0afb8629767b..d36d3b72d86bb 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2272,6 +2272,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 	X86_MATCH(ATOM_GOLDMONT,	core_funcs),
 	X86_MATCH(ATOM_GOLDMONT_PLUS,	core_funcs),
 	X86_MATCH(SKYLAKE_X,		core_funcs),
+	X86_MATCH(ICELAKE_X,		core_funcs),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
-- 
GitLab


From 706c5328851d23dec4d9b433cbf864d900a54edf Mon Sep 17 00:00:00 2001
From: Giovanni Gherdovich <ggherdovich@suse.cz>
Date: Tue, 18 May 2021 14:34:13 +0200
Subject: [PATCH 1306/3804] cpufreq: intel_pstate: Add Cometlake support in
 no-HWP mode

Users may disable HWP in firmware, in which case intel_pstate wouldn't load
unless the CPU model is explicitly supported.

See also commit d8de7a44e11f ("cpufreq: intel_pstate: Add Skylake servers
support").

Suggested-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index d36d3b72d86bb..03d8516e653e8 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2272,6 +2272,7 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
 	X86_MATCH(ATOM_GOLDMONT,	core_funcs),
 	X86_MATCH(ATOM_GOLDMONT_PLUS,	core_funcs),
 	X86_MATCH(SKYLAKE_X,		core_funcs),
+	X86_MATCH(COMETLAKE,		core_funcs),
 	X86_MATCH(ICELAKE_X,		core_funcs),
 	{}
 };
-- 
GitLab


From 4a804b8a4572dfc81c3a59709d49ae206e4370ba Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Thu, 20 May 2021 10:48:31 +0200
Subject: [PATCH 1307/3804] evm: Introduce evm_hmac_disabled() to safely ignore
 verification errors

When a file is being created, LSMs can set the initial label with the
inode_init_security hook. If no HMAC key is loaded, the new file will have
LSM xattrs but not the HMAC. It is also possible that the file remains
without protected xattrs after creation if no active LSM provided it, or
because the filesystem does not support them.

Unfortunately, EVM will deny any further metadata operation on new files,
as evm_protect_xattr() will return the INTEGRITY_NOLABEL error if protected
xattrs exist without security.evm, INTEGRITY_NOXATTRS if no protected
xattrs exist or INTEGRITY_UNKNOWN if xattrs are not supported. This would
limit the usability of EVM when only a public key is loaded, as commands
such as cp or tar with the option to preserve xattrs won't work.

This patch introduces the evm_hmac_disabled() function to determine whether
or not it is safe to ignore verification errors, based on the ability of
EVM to calculate HMACs. If the HMAC key is not loaded, and it cannot be
loaded in the future due to the EVM_SETUP_COMPLETE initialization flag,
allowing an operation despite the attrs/xattrs being found invalid will not
make them valid.

Since the post hooks can be executed even when the HMAC key is not loaded,
this patch also ensures that the EVM_INIT_HMAC initialization flag is set
before the post hooks call evm_update_evmxattr().

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Suggested-by: Mimi Zohar <zohar@linux.ibm.com> (for ensuring EVM_INIT_HMAC is set)
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/evm/evm_main.c | 39 ++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 782915117175f..4206c7e492ae5 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -90,6 +90,24 @@ static bool evm_key_loaded(void)
 	return (bool)(evm_initialized & EVM_KEY_MASK);
 }
 
+/*
+ * This function determines whether or not it is safe to ignore verification
+ * errors, based on the ability of EVM to calculate HMACs. If the HMAC key
+ * is not loaded, and it cannot be loaded in the future due to the
+ * EVM_SETUP_COMPLETE initialization flag, allowing an operation despite the
+ * attrs/xattrs being found invalid will not make them valid.
+ */
+static bool evm_hmac_disabled(void)
+{
+	if (evm_initialized & EVM_INIT_HMAC)
+		return false;
+
+	if (!(evm_initialized & EVM_SETUP_COMPLETE))
+		return false;
+
+	return true;
+}
+
 static int evm_find_protected_xattrs(struct dentry *dentry)
 {
 	struct inode *inode = d_backing_inode(dentry);
@@ -338,6 +356,10 @@ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name,
 	if (evm_status == INTEGRITY_NOXATTRS) {
 		struct integrity_iint_cache *iint;
 
+		/* Exception if the HMAC is not going to be calculated. */
+		if (evm_hmac_disabled())
+			return 0;
+
 		iint = integrity_iint_find(d_backing_inode(dentry));
 		if (iint && (iint->flags & IMA_NEW_FILE))
 			return 0;
@@ -354,6 +376,10 @@ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name,
 				    -EPERM, 0);
 	}
 out:
+	/* Exception if the HMAC is not going to be calculated. */
+	if (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL ||
+	    evm_status == INTEGRITY_UNKNOWN))
+		return 0;
 	if (evm_status != INTEGRITY_PASS)
 		integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
 				    dentry->d_name.name, "appraise_metadata",
@@ -474,6 +500,9 @@ void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name,
 	if (!strcmp(xattr_name, XATTR_NAME_EVM))
 		return;
 
+	if (!(evm_initialized & EVM_INIT_HMAC))
+		return;
+
 	evm_update_evmxattr(dentry, xattr_name, xattr_value, xattr_value_len);
 }
 
@@ -497,6 +526,9 @@ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
 	if (!strcmp(xattr_name, XATTR_NAME_EVM))
 		return;
 
+	if (!(evm_initialized & EVM_INIT_HMAC))
+		return;
+
 	evm_update_evmxattr(dentry, xattr_name, NULL, 0);
 }
 
@@ -522,7 +554,9 @@ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
 		return 0;
 	evm_status = evm_verify_current_integrity(dentry);
 	if ((evm_status == INTEGRITY_PASS) ||
-	    (evm_status == INTEGRITY_NOXATTRS))
+	    (evm_status == INTEGRITY_NOXATTRS) ||
+	    (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL ||
+	     evm_status == INTEGRITY_UNKNOWN)))
 		return 0;
 	integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
 			    dentry->d_name.name, "appraise_metadata",
@@ -548,6 +582,9 @@ void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
 
 	evm_reset_status(dentry->d_inode);
 
+	if (!(evm_initialized & EVM_INIT_HMAC))
+		return;
+
 	if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
 		evm_update_evmxattr(dentry, NULL, NULL, 0);
 }
-- 
GitLab


From cdef685be5b4ae55c3959289e72d520402839c29 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:47 +0200
Subject: [PATCH 1308/3804] evm: Allow xattr/attr operations for portable
 signatures

If files with portable signatures are copied from one location to another
or are extracted from an archive, verification can temporarily fail until
all xattrs/attrs are set in the destination. Only portable signatures may
be moved or copied from one file to another, as they don't depend on
system-specific information such as the inode generation. Instead portable
signatures must include security.ima.

Unlike other security.evm types, EVM portable signatures are also
immutable. Thus, it wouldn't be a problem to allow xattr/attr operations
when verification fails, as portable signatures will never be replaced with
the HMAC on possibly corrupted xattrs/attrs.

This patch first introduces a new integrity status called
INTEGRITY_FAIL_IMMUTABLE, that allows callers of
evm_verify_current_integrity() to detect that a portable signature didn't
pass verification and then adds an exception in evm_protect_xattr() and
evm_inode_setattr() for this status and returns 0 instead of -EPERM.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/integrity.h             |  1 +
 security/integrity/evm/evm_main.c     | 33 ++++++++++++++++++++++-----
 security/integrity/ima/ima_appraise.c |  2 ++
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/include/linux/integrity.h b/include/linux/integrity.h
index 2271939c5c314..2ea0f2f65ab6a 100644
--- a/include/linux/integrity.h
+++ b/include/linux/integrity.h
@@ -13,6 +13,7 @@ enum integrity_status {
 	INTEGRITY_PASS = 0,
 	INTEGRITY_PASS_IMMUTABLE,
 	INTEGRITY_FAIL,
+	INTEGRITY_FAIL_IMMUTABLE,
 	INTEGRITY_NOLABEL,
 	INTEGRITY_NOXATTRS,
 	INTEGRITY_UNKNOWN,
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 4206c7e492ae5..333524e879b5e 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -27,7 +27,8 @@
 int evm_initialized;
 
 static const char * const integrity_status_msg[] = {
-	"pass", "pass_immutable", "fail", "no_label", "no_xattrs", "unknown"
+	"pass", "pass_immutable", "fail", "fail_immutable", "no_label",
+	"no_xattrs", "unknown"
 };
 int evm_hmac_attrs;
 
@@ -155,7 +156,7 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
 	enum integrity_status evm_status = INTEGRITY_PASS;
 	struct evm_digest digest;
 	struct inode *inode;
-	int rc, xattr_len;
+	int rc, xattr_len, evm_immutable = 0;
 
 	if (iint && (iint->evm_status == INTEGRITY_PASS ||
 		     iint->evm_status == INTEGRITY_PASS_IMMUTABLE))
@@ -200,8 +201,10 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
 		if (rc)
 			rc = -EINVAL;
 		break;
-	case EVM_IMA_XATTR_DIGSIG:
 	case EVM_XATTR_PORTABLE_DIGSIG:
+		evm_immutable = 1;
+		fallthrough;
+	case EVM_IMA_XATTR_DIGSIG:
 		/* accept xattr with non-empty signature field */
 		if (xattr_len <= sizeof(struct signature_v2_hdr)) {
 			evm_status = INTEGRITY_FAIL;
@@ -238,9 +241,14 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
 		break;
 	}
 
-	if (rc)
-		evm_status = (rc == -ENODATA) ?
-				INTEGRITY_NOXATTRS : INTEGRITY_FAIL;
+	if (rc) {
+		if (rc == -ENODATA)
+			evm_status = INTEGRITY_NOXATTRS;
+		else if (evm_immutable)
+			evm_status = INTEGRITY_FAIL_IMMUTABLE;
+		else
+			evm_status = INTEGRITY_FAIL;
+	}
 out:
 	if (iint)
 		iint->evm_status = evm_status;
@@ -380,6 +388,14 @@ out:
 	if (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL ||
 	    evm_status == INTEGRITY_UNKNOWN))
 		return 0;
+
+	/*
+	 * Writing other xattrs is safe for portable signatures, as portable
+	 * signatures are immutable and can never be updated.
+	 */
+	if (evm_status == INTEGRITY_FAIL_IMMUTABLE)
+		return 0;
+
 	if (evm_status != INTEGRITY_PASS)
 		integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
 				    dentry->d_name.name, "appraise_metadata",
@@ -553,8 +569,13 @@ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
 	if (!(ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
 		return 0;
 	evm_status = evm_verify_current_integrity(dentry);
+	/*
+	 * Writing attrs is safe for portable signatures, as portable signatures
+	 * are immutable and can never be updated.
+	 */
 	if ((evm_status == INTEGRITY_PASS) ||
 	    (evm_status == INTEGRITY_NOXATTRS) ||
+	    (evm_status == INTEGRITY_FAIL_IMMUTABLE) ||
 	    (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL ||
 	     evm_status == INTEGRITY_UNKNOWN)))
 		return 0;
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 03894769dffa5..9bb351b933fbc 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -416,6 +416,8 @@ int ima_appraise_measurement(enum ima_hooks func,
 	case INTEGRITY_NOLABEL:		/* No security.evm xattr. */
 		cause = "missing-HMAC";
 		goto out;
+	case INTEGRITY_FAIL_IMMUTABLE:
+		fallthrough;
 	case INTEGRITY_FAIL:		/* Invalid HMAC/signature. */
 		cause = "invalid-HMAC";
 		goto out;
-- 
GitLab


From 9ff6774b9718d1a72d1b7c580fc579f1d9d7071f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 3 May 2021 09:18:40 -0700
Subject: [PATCH 1309/3804] cpufreq: sc520_freq: add 'fallthrough' to one case

Quieten an implicit-fallthrough warning in sc520_freq.c:

../drivers/cpufreq/sc520_freq.c: In function 'sc520_freq_get_cpu_frequency':
../include/linux/printk.h:343:2: warning: this statement may fall through [-Wimplicit-fallthrough=]
  printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
../drivers/cpufreq/sc520_freq.c:43:3: note: in expansion of macro 'pr_err'
   pr_err("error: cpuctl register has unexpected value %02x\n",
../drivers/cpufreq/sc520_freq.c:45:2: note: here
  case 0x01:

Fixes: bf6fc9fd2d848 ("[CPUFREQ] AMD Elan SC520 cpufreq driver.")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/sc520_freq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/sc520_freq.c b/drivers/cpufreq/sc520_freq.c
index 73a208559fe28..330c8d6cf93ca 100644
--- a/drivers/cpufreq/sc520_freq.c
+++ b/drivers/cpufreq/sc520_freq.c
@@ -42,6 +42,7 @@ static unsigned int sc520_freq_get_cpu_frequency(unsigned int cpu)
 	default:
 		pr_err("error: cpuctl register has unexpected value %02x\n",
 		       clockspeed_reg);
+		fallthrough;
 	case 0x01:
 		return 100000;
 	case 0x02:
-- 
GitLab


From 7e135dc725417ecc0629afb4b3b24457d2a4869d Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:48 +0200
Subject: [PATCH 1310/3804] evm: Pass user namespace to set/remove xattr hooks

In preparation for 'evm: Allow setxattr() and setattr() for unmodified
metadata', this patch passes mnt_userns to the inode set/remove xattr hooks
so that the GID of the inode on an idmapped mount is correctly determined
by posix_acl_update_mode().

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/evm.h               | 12 ++++++++----
 security/integrity/evm/evm_main.c | 17 +++++++++++------
 security/security.c               |  4 ++--
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/include/linux/evm.h b/include/linux/evm.h
index 39bb17a8236b1..31ef1dbbb3acd 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -23,13 +23,15 @@ extern enum integrity_status evm_verifyxattr(struct dentry *dentry,
 					     struct integrity_iint_cache *iint);
 extern int evm_inode_setattr(struct dentry *dentry, struct iattr *attr);
 extern void evm_inode_post_setattr(struct dentry *dentry, int ia_valid);
-extern int evm_inode_setxattr(struct dentry *dentry, const char *name,
+extern int evm_inode_setxattr(struct user_namespace *mnt_userns,
+			      struct dentry *dentry, const char *name,
 			      const void *value, size_t size);
 extern void evm_inode_post_setxattr(struct dentry *dentry,
 				    const char *xattr_name,
 				    const void *xattr_value,
 				    size_t xattr_value_len);
-extern int evm_inode_removexattr(struct dentry *dentry, const char *xattr_name);
+extern int evm_inode_removexattr(struct user_namespace *mnt_userns,
+				 struct dentry *dentry, const char *xattr_name);
 extern void evm_inode_post_removexattr(struct dentry *dentry,
 				       const char *xattr_name);
 extern int evm_inode_init_security(struct inode *inode,
@@ -72,7 +74,8 @@ static inline void evm_inode_post_setattr(struct dentry *dentry, int ia_valid)
 	return;
 }
 
-static inline int evm_inode_setxattr(struct dentry *dentry, const char *name,
+static inline int evm_inode_setxattr(struct user_namespace *mnt_userns,
+				     struct dentry *dentry, const char *name,
 				     const void *value, size_t size)
 {
 	return 0;
@@ -86,7 +89,8 @@ static inline void evm_inode_post_setxattr(struct dentry *dentry,
 	return;
 }
 
-static inline int evm_inode_removexattr(struct dentry *dentry,
+static inline int evm_inode_removexattr(struct user_namespace *mnt_userns,
+					struct dentry *dentry,
 					const char *xattr_name)
 {
 	return 0;
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 333524e879b5e..300df6906e05d 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -342,7 +342,8 @@ static enum integrity_status evm_verify_current_integrity(struct dentry *dentry)
  * For posix xattr acls only, permit security.evm, even if it currently
  * doesn't exist, to be updated unless the EVM signature is immutable.
  */
-static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name,
+static int evm_protect_xattr(struct user_namespace *mnt_userns,
+			     struct dentry *dentry, const char *xattr_name,
 			     const void *xattr_value, size_t xattr_value_len)
 {
 	enum integrity_status evm_status;
@@ -406,6 +407,7 @@ out:
 
 /**
  * evm_inode_setxattr - protect the EVM extended attribute
+ * @mnt_userns: user namespace of the idmapped mount
  * @dentry: pointer to the affected dentry
  * @xattr_name: pointer to the affected extended attribute name
  * @xattr_value: pointer to the new extended attribute value
@@ -417,8 +419,9 @@ out:
  * userspace from writing HMAC value.  Writing 'security.evm' requires
  * requires CAP_SYS_ADMIN privileges.
  */
-int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name,
-		       const void *xattr_value, size_t xattr_value_len)
+int evm_inode_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+		       const char *xattr_name, const void *xattr_value,
+		       size_t xattr_value_len)
 {
 	const struct evm_ima_xattr_data *xattr_data = xattr_value;
 
@@ -435,19 +438,21 @@ int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 		    xattr_data->type != EVM_XATTR_PORTABLE_DIGSIG)
 			return -EPERM;
 	}
-	return evm_protect_xattr(dentry, xattr_name, xattr_value,
+	return evm_protect_xattr(mnt_userns, dentry, xattr_name, xattr_value,
 				 xattr_value_len);
 }
 
 /**
  * evm_inode_removexattr - protect the EVM extended attribute
+ * @mnt_userns: user namespace of the idmapped mount
  * @dentry: pointer to the affected dentry
  * @xattr_name: pointer to the affected extended attribute name
  *
  * Removing 'security.evm' requires CAP_SYS_ADMIN privileges and that
  * the current value is valid.
  */
-int evm_inode_removexattr(struct dentry *dentry, const char *xattr_name)
+int evm_inode_removexattr(struct user_namespace *mnt_userns,
+			  struct dentry *dentry, const char *xattr_name)
 {
 	/* Policy permits modification of the protected xattrs even though
 	 * there's no HMAC key loaded
@@ -455,7 +460,7 @@ int evm_inode_removexattr(struct dentry *dentry, const char *xattr_name)
 	if (evm_initialized & EVM_ALLOW_METADATA_WRITES)
 		return 0;
 
-	return evm_protect_xattr(dentry, xattr_name, NULL, 0);
+	return evm_protect_xattr(mnt_userns, dentry, xattr_name, NULL, 0);
 }
 
 static void evm_reset_status(struct inode *inode)
diff --git a/security/security.c b/security/security.c
index b38155b2de83f..e9f8010a23418 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1354,7 +1354,7 @@ int security_inode_setxattr(struct user_namespace *mnt_userns,
 	ret = ima_inode_setxattr(dentry, name, value, size);
 	if (ret)
 		return ret;
-	return evm_inode_setxattr(dentry, name, value, size);
+	return evm_inode_setxattr(mnt_userns, dentry, name, value, size);
 }
 
 void security_inode_post_setxattr(struct dentry *dentry, const char *name,
@@ -1399,7 +1399,7 @@ int security_inode_removexattr(struct user_namespace *mnt_userns,
 	ret = ima_inode_removexattr(dentry, name);
 	if (ret)
 		return ret;
-	return evm_inode_removexattr(dentry, name);
+	return evm_inode_removexattr(mnt_userns, dentry, name);
 }
 
 int security_inode_need_killpriv(struct dentry *dentry)
-- 
GitLab


From 746e4acf87bcacf1406e05ef24a0b7139147c63e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 21 May 2021 15:31:09 +0200
Subject: [PATCH 1311/3804] USB: trancevibrator: fix control-request direction

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Fix the set-speed request which erroneously used USB_DIR_IN and update
the default timeout argument to match (same value).

Fixes: 5638e4d92e77 ("USB: add PlayStation 2 Trance Vibrator driver")
Cc: stable@vger.kernel.org      # 2.6.19
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://lore.kernel.org/r/20210521133109.17396-1-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/trancevibrator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/misc/trancevibrator.c b/drivers/usb/misc/trancevibrator.c
index a3dfc77578ea1..26baba3ab7d73 100644
--- a/drivers/usb/misc/trancevibrator.c
+++ b/drivers/usb/misc/trancevibrator.c
@@ -61,9 +61,9 @@ static ssize_t speed_store(struct device *dev, struct device_attribute *attr,
 	/* Set speed */
 	retval = usb_control_msg(tv->udev, usb_sndctrlpipe(tv->udev, 0),
 				 0x01, /* vendor request: set speed */
-				 USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_OTHER,
+				 USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_OTHER,
 				 tv->speed, /* speed value */
-				 0, NULL, 0, USB_CTRL_GET_TIMEOUT);
+				 0, NULL, 0, USB_CTRL_SET_TIMEOUT);
 	if (retval) {
 		tv->speed = old;
 		dev_dbg(&tv->udev->dev, "retval = %d\n", retval);
-- 
GitLab


From 016002848c82eeb5d460489ce392d91fe18c475c Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Fri, 21 May 2021 06:08:43 +0000
Subject: [PATCH 1312/3804] serial: rp2: use 'request_firmware' instead of
 'request_firmware_nowait'

In 'rp2_probe', the driver registers 'rp2_uart_interrupt' then calls
'rp2_fw_cb' through 'request_firmware_nowait'. In 'rp2_fw_cb', if the
firmware don't exists, function just return without initializing ports
of 'rp2_card'. But now the interrupt handler function has been
registered, and when an interrupt comes, 'rp2_uart_interrupt' may access
those ports then causing NULL pointer dereference or other bugs.

Because the driver does some initialization work in 'rp2_fw_cb', in
order to make the driver ready to handle interrupts, 'request_firmware'
should be used instead of asynchronous 'request_firmware_nowait'.

This report reveals it:

INFO: trying to register non-static key.
the code is fine but needs lockdep annotation.
turning off the locking correctness validator.
CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.19.177-gdba4159c14ef-dirty #45
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-
gc9ba5276e321-prebuilt.qemu.org 04/01/2014
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xec/0x156 lib/dump_stack.c:118
 assign_lock_key kernel/locking/lockdep.c:727 [inline]
 register_lock_class+0x14e5/0x1ba0 kernel/locking/lockdep.c:753
 __lock_acquire+0x187/0x3750 kernel/locking/lockdep.c:3303
 lock_acquire+0x124/0x340 kernel/locking/lockdep.c:3907
 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
 _raw_spin_lock+0x32/0x50 kernel/locking/spinlock.c:144
 spin_lock include/linux/spinlock.h:329 [inline]
 rp2_ch_interrupt drivers/tty/serial/rp2.c:466 [inline]
 rp2_asic_interrupt.isra.9+0x15d/0x990 drivers/tty/serial/rp2.c:493
 rp2_uart_interrupt+0x49/0xe0 drivers/tty/serial/rp2.c:504
 __handle_irq_event_percpu+0xfb/0x770 kernel/irq/handle.c:149
 handle_irq_event_percpu+0x79/0x150 kernel/irq/handle.c:189
 handle_irq_event+0xac/0x140 kernel/irq/handle.c:206
 handle_fasteoi_irq+0x232/0x5c0 kernel/irq/chip.c:725
 generic_handle_irq_desc include/linux/irqdesc.h:155 [inline]
 handle_irq+0x230/0x3a0 arch/x86/kernel/irq_64.c:87
 do_IRQ+0xa7/0x1e0 arch/x86/kernel/irq.c:247
 common_interrupt+0xf/0xf arch/x86/entry/entry_64.S:670
 </IRQ>
RIP: 0010:native_safe_halt+0x28/0x30 arch/x86/include/asm/irqflags.h:61
Code: 00 00 55 be 04 00 00 00 48 c7 c7 00 c2 2f 8c 48 89 e5 e8 fb 31 e7 f8
8b 05 75 af 8d 03 85 c0 7e 07 0f 00 2d 8a 61 65 00 fb f4 <5d> c3 90 90 90
90 90 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41
RSP: 0018:ffff88806b71fcc8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde
RAX: 0000000000000000 RBX: ffffffff8bde7e48 RCX: ffffffff88a21285
RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff8c2fc200
RBP: ffff88806b71fcc8 R08: fffffbfff185f840 R09: fffffbfff185f840
R10: 0000000000000001 R11: fffffbfff185f840 R12: 0000000000000002
R13: ffffffff8bea18a0 R14: 0000000000000000 R15: 0000000000000000
 arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline]
 default_idle+0x6f/0x360 arch/x86/kernel/process.c:557
 arch_cpu_idle+0xf/0x20 arch/x86/kernel/process.c:548
 default_idle_call+0x3b/0x60 kernel/sched/idle.c:93
 cpuidle_idle_call kernel/sched/idle.c:153 [inline]
 do_idle+0x2ab/0x3c0 kernel/sched/idle.c:263
 cpu_startup_entry+0xcb/0xe0 kernel/sched/idle.c:369
 start_secondary+0x3b8/0x4e0 arch/x86/kernel/smpboot.c:271
 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:243
BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
PGD 8000000056d27067 P4D 8000000056d27067 PUD 56d28067 PMD 0
Oops: 0000 [#1] PREEMPT SMP KASAN PTI
CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.19.177-gdba4159c14ef-dirty #45
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-
gc9ba5276e321-prebuilt.qemu.org 04/01/2014
RIP: 0010:readl arch/x86/include/asm/io.h:59 [inline]
RIP: 0010:rp2_ch_interrupt drivers/tty/serial/rp2.c:472 [inline]
RIP: 0010:rp2_asic_interrupt.isra.9+0x181/0x990 drivers/tty/serial/rp2.c:
493
Code: df e8 43 5d c2 05 48 8d 83 e8 01 00 00 48 89 85 60 ff ff ff 48 c1 e8
03 42 80 3c 30 00 0f 85 aa 07 00 00 48 8b 83 e8 01 00 00 <8b> 40 10 89 c1
89 85 68 ff ff ff 48 8b 83 e8 01 00 00 89 48 10 83
RSP: 0018:ffff88806c287cd0 EFLAGS: 00010046
RAX: 0000000000000000 RBX: ffff88806ade6820 RCX: ffffffff814300b1
RDX: 1ffff1100d5bcd06 RSI: 0000000000000004 RDI: ffff88806ade6820
RBP: ffff88806c287db8 R08: ffffed100d5bcd05 R09: ffffed100d5bcd05
R10: 0000000000000001 R11: ffffed100d5bcd04 R12: ffffc90001e00000
R13: ffff888069654e10 R14: dffffc0000000000 R15: ffff888069654df0
FS:  0000000000000000(0000) GS:ffff88806c280000(0000) knlGS:
0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000010 CR3: 000000006892c000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <IRQ>
 rp2_uart_interrupt+0x49/0xe0 drivers/tty/serial/rp2.c:504
 __handle_irq_event_percpu+0xfb/0x770 kernel/irq/handle.c:149
 handle_irq_event_percpu+0x79/0x150 kernel/irq/handle.c:189
 handle_irq_event+0xac/0x140 kernel/irq/handle.c:206
 handle_fasteoi_irq+0x232/0x5c0 kernel/irq/chip.c:725
 generic_handle_irq_desc include/linux/irqdesc.h:155 [inline]
 handle_irq+0x230/0x3a0 arch/x86/kernel/irq_64.c:87
 do_IRQ+0xa7/0x1e0 arch/x86/kernel/irq.c:247
 common_interrupt+0xf/0xf arch/x86/entry/entry_64.S:670
 </IRQ>
RIP: 0010:native_safe_halt+0x28/0x30 arch/x86/include/asm/irqflags.h:61
Code: 00 00 55 be 04 00 00 00 48 c7 c7 00 c2 2f 8c 48 89 e5 e8 fb 31 e7
f8 8b 05 75 af 8d 03 85 c0 7e 07 0f 00 2d 8a 61 65 00 fb f4 <5d> c3 90
90 90 90 90 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41
RSP: 0018:ffff88806b71fcc8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffde
RAX: 0000000000000000 RBX: ffffffff8bde7e48 RCX: ffffffff88a21285
RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff8c2fc200
RBP: ffff88806b71fcc8 R08: fffffbfff185f840 R09: fffffbfff185f840
R10: 0000000000000001 R11: fffffbfff185f840 R12: 0000000000000002
R13: ffffffff8bea18a0 R14: 0000000000000000 R15: 0000000000000000
 arch_safe_halt arch/x86/include/asm/paravirt.h:94 [inline]
 default_idle+0x6f/0x360 arch/x86/kernel/process.c:557
 arch_cpu_idle+0xf/0x20 arch/x86/kernel/process.c:548
 default_idle_call+0x3b/0x60 kernel/sched/idle.c:93
 cpuidle_idle_call kernel/sched/idle.c:153 [inline]
 do_idle+0x2ab/0x3c0 kernel/sched/idle.c:263
 cpu_startup_entry+0xcb/0xe0 kernel/sched/idle.c:369
 start_secondary+0x3b8/0x4e0 arch/x86/kernel/smpboot.c:271
 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:243
Modules linked in:
Dumping ftrace buffer:
   (ftrace buffer empty)
CR2: 0000000000000010
---[ end trace 11804dbb55cb1a64 ]---
RIP: 0010:readl arch/x86/include/asm/io.h:59 [inline]
RIP: 0010:rp2_ch_interrupt drivers/tty/serial/rp2.c:472 [inline]
RIP: 0010:rp2_asic_interrupt.isra.9+0x181/0x990 drivers/tty/serial/rp2.c:
493
Code: df e8 43 5d c2 05 48 8d 83 e8 01 00 00 48 89 85 60 ff ff ff 48 c1
e8 03 42 80 3c 30 00 0f 85 aa 07 00 00 48 8b 83 e8 01 00 00 <8b> 40 10 89
c1 89 85 68 ff ff ff 48 8b 83 e8 01 00 00 89 48 10 83
RSP: 0018:ffff88806c287cd0 EFLAGS: 00010046
RAX: 0000000000000000 RBX: ffff88806ade6820 RCX: ffffffff814300b1
RDX: 1ffff1100d5bcd06 RSI: 0000000000000004 RDI: ffff88806ade6820
RBP: ffff88806c287db8 R08: ffffed100d5bcd05 R09: ffffed100d5bcd05
R10: 0000000000000001 R11: ffffed100d5bcd04 R12: ffffc90001e00000
R13: ffff888069654e10 R14: dffffc0000000000 R15: ffff888069654df0
FS:  0000000000000000(0000) GS:ffff88806c280000(0000) knlGS:
0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000010 CR3: 000000006892c000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

Reported-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Link: https://lore.kernel.org/r/1621577323-1541-1-git-send-email-zheyuma97@gmail.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/rp2.c | 52 +++++++++++++---------------------------
 1 file changed, 17 insertions(+), 35 deletions(-)

diff --git a/drivers/tty/serial/rp2.c b/drivers/tty/serial/rp2.c
index d60abffab70ec..6689d8add8f7a 100644
--- a/drivers/tty/serial/rp2.c
+++ b/drivers/tty/serial/rp2.c
@@ -195,7 +195,6 @@ struct rp2_card {
 	void __iomem			*bar0;
 	void __iomem			*bar1;
 	spinlock_t			card_lock;
-	struct completion		fw_loaded;
 };
 
 #define RP_ID(prod) PCI_VDEVICE(RP, (prod))
@@ -662,17 +661,10 @@ static void rp2_remove_ports(struct rp2_card *card)
 	card->initialized_ports = 0;
 }
 
-static void rp2_fw_cb(const struct firmware *fw, void *context)
+static int rp2_load_firmware(struct rp2_card *card, const struct firmware *fw)
 {
-	struct rp2_card *card = context;
 	resource_size_t phys_base;
-	int i, rc = -ENOENT;
-
-	if (!fw) {
-		dev_err(&card->pdev->dev, "cannot find '%s' firmware image\n",
-			RP2_FW_NAME);
-		goto no_fw;
-	}
+	int i, rc = 0;
 
 	phys_base = pci_resource_start(card->pdev, 1);
 
@@ -718,23 +710,13 @@ static void rp2_fw_cb(const struct firmware *fw, void *context)
 		card->initialized_ports++;
 	}
 
-	release_firmware(fw);
-no_fw:
-	/*
-	 * rp2_fw_cb() is called from a workqueue long after rp2_probe()
-	 * has already returned success.  So if something failed here,
-	 * we'll just leave the now-dormant device in place until somebody
-	 * unbinds it.
-	 */
-	if (rc)
-		dev_warn(&card->pdev->dev, "driver initialization failed\n");
-
-	complete(&card->fw_loaded);
+	return rc;
 }
 
 static int rp2_probe(struct pci_dev *pdev,
 				   const struct pci_device_id *id)
 {
+	const struct firmware *fw;
 	struct rp2_card *card;
 	struct rp2_uart_port *ports;
 	void __iomem * const *bars;
@@ -745,7 +727,6 @@ static int rp2_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 	pci_set_drvdata(pdev, card);
 	spin_lock_init(&card->card_lock);
-	init_completion(&card->fw_loaded);
 
 	rc = pcim_enable_device(pdev);
 	if (rc)
@@ -778,21 +759,23 @@ static int rp2_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 	card->ports = ports;
 
-	rc = devm_request_irq(&pdev->dev, pdev->irq, rp2_uart_interrupt,
-			      IRQF_SHARED, DRV_NAME, card);
-	if (rc)
+	rc = request_firmware(&fw, RP2_FW_NAME, &pdev->dev);
+	if (rc < 0) {
+		dev_err(&pdev->dev, "cannot find '%s' firmware image\n",
+			RP2_FW_NAME);
 		return rc;
+	}
 
-	/*
-	 * Only catastrophic errors (e.g. ENOMEM) are reported here.
-	 * If the FW image is missing, we'll find out in rp2_fw_cb()
-	 * and print an error message.
-	 */
-	rc = request_firmware_nowait(THIS_MODULE, 1, RP2_FW_NAME, &pdev->dev,
-				     GFP_KERNEL, card, rp2_fw_cb);
+	rc = rp2_load_firmware(card, fw);
+
+	release_firmware(fw);
+	if (rc < 0)
+		return rc;
+
+	rc = devm_request_irq(&pdev->dev, pdev->irq, rp2_uart_interrupt,
+			      IRQF_SHARED, DRV_NAME, card);
 	if (rc)
 		return rc;
-	dev_dbg(&pdev->dev, "waiting for firmware blob...\n");
 
 	return 0;
 }
@@ -801,7 +784,6 @@ static void rp2_remove(struct pci_dev *pdev)
 {
 	struct rp2_card *card = pci_get_drvdata(pdev);
 
-	wait_for_completion(&card->fw_loaded);
 	rp2_remove_ports(card);
 }
 
-- 
GitLab


From ec347b7c319156c3b488681d1813d08d88499cc6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 21 May 2021 16:00:31 -0300
Subject: [PATCH 1313/3804] tools headers UAPI: Sync linux/fs.h with the kernel
 sources

To pick the trivial change in:

  63c8af5687f6b1b7 ("block: uapi: fix comment about block device ioctl")

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/fs.h' differs from latest version at 'include/uapi/linux/fs.h'
  diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index f44eb0a04afdd..4c32e97dcdf00 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -185,7 +185,7 @@ struct fsxattr {
 #define BLKROTATIONAL _IO(0x12,126)
 #define BLKZEROOUT _IO(0x12,127)
 /*
- * A jump here: 130-131 are reserved for zoned block devices
+ * A jump here: 130-136 are reserved for zoned block devices
  * (see uapi/linux/blkzoned.h)
  */
 
-- 
GitLab


From 4224680ee7aaf0f13ab762ffb2a77373737dce5e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 21 May 2021 16:00:31 -0300
Subject: [PATCH 1314/3804] tools headers UAPI: Sync linux/perf_event.h with
 the kernel sources

To pick the trivial change in:

  0683b53197b55343 ("signal: Deliver all of the siginfo perf data in _perf")

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h'
  diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h

Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index bf8143505c49d..f92880a15645a 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -464,7 +464,7 @@ struct perf_event_attr {
 
 	/*
 	 * User provided data if sigtrap=1, passed back to user via
-	 * siginfo_t::si_perf, e.g. to permit user to identify the event.
+	 * siginfo_t::si_perf_data, e.g. to permit user to identify the event.
 	 */
 	__u64	sig_data;
 };
-- 
GitLab


From 36cb555fae0875d5416e8514a84a427bec6e4cda Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 21 May 2021 15:30:20 +0800
Subject: [PATCH 1315/3804] regulator: scmi: Fix off-by-one for linear
 regulators .n_voltages setting

For linear regulators, the .n_voltages is (max_uv - min_uv) / uv_step + 1.

Fixes: 0fbeae70ee7c ("regulator: add SCMI driver")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
Link: https://lore.kernel.org/r/20210521073020.1944981-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/scmi-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/scmi-regulator.c b/drivers/regulator/scmi-regulator.c
index a917c81e99fac..51eb97815c001 100644
--- a/drivers/regulator/scmi-regulator.c
+++ b/drivers/regulator/scmi-regulator.c
@@ -176,7 +176,7 @@ scmi_config_linear_regulator_mappings(struct scmi_regulator *sreg,
 		sreg->desc.uV_step =
 			vinfo->levels_uv[SCMI_VOLTAGE_SEGMENT_STEP];
 		sreg->desc.linear_min_sel = 0;
-		sreg->desc.n_voltages = delta_uV / sreg->desc.uV_step;
+		sreg->desc.n_voltages = (delta_uV / sreg->desc.uV_step) + 1;
 		sreg->desc.ops = &scmi_reg_linear_ops;
 	}
 
-- 
GitLab


From bffcbe79370e8fda7f1d19899de83aa2a833bf69 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 21 May 2021 16:14:00 -0300
Subject: [PATCH 1316/3804] tools headers UAPI: Sync files changed by the
 quotactl_path unwiring

To pick the changes in this csets:

  5b9fedb31e476693 ("quota: Disable quotactl_path syscall")

That silences these perf build warnings:

  Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h'
  diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h
  Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'
  diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl'
  diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl'
  diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl
  Warning: Kernel ABI header at 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest version at 'arch/mips/kernel/syscalls/syscall_n64.tbl'
  diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl

Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 2 +-
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl  | 2 +-
 tools/perf/arch/s390/entry/syscalls/syscall.tbl     | 2 +-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 9974f5f8e49bc..9cd1c34f31b50 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -357,7 +357,7 @@
 440	n64	process_madvise			sys_process_madvise
 441	n64	epoll_pwait2			sys_epoll_pwait2
 442	n64	mount_setattr			sys_mount_setattr
-443	n64	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	n64	landlock_create_ruleset		sys_landlock_create_ruleset
 445	n64	landlock_add_rule		sys_landlock_add_rule
 446	n64	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 2e68fbb57cc66..8f052ff4058ce 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -522,7 +522,7 @@
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
 442	common	mount_setattr			sys_mount_setattr
-443	common	quotactl_path			sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset		sys_landlock_create_ruleset
 445	common	landlock_add_rule		sys_landlock_add_rule
 446	common	landlock_restrict_self		sys_landlock_restrict_self
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 7e4a2aba366df..0690263df1dd0 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -445,7 +445,7 @@
 440  common	process_madvise		sys_process_madvise		sys_process_madvise
 441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
 442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
-443  common	quotactl_path		sys_quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
 445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
 446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index ecd551b08d052..ce18119ea0d0f 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,7 +364,7 @@
 440	common	process_madvise		sys_process_madvise
 441	common	epoll_pwait2		sys_epoll_pwait2
 442	common	mount_setattr		sys_mount_setattr
-443	common	quotactl_path		sys_quotactl_path
+# 443 reserved for quotactl_path
 444	common	landlock_create_ruleset	sys_landlock_create_ruleset
 445	common	landlock_add_rule	sys_landlock_add_rule
 446	common	landlock_restrict_self	sys_landlock_restrict_self
-- 
GitLab


From a6172059758ba1b496ae024cece7d5bdc8d017db Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 21 May 2021 12:20:51 +0300
Subject: [PATCH 1317/3804] perf scripts python: exported-sql-viewer.py: Fix
 copy to clipboard from Top Calls by elapsed Time report

Provide missing argument to prevent following error when copying a
selection to the clipboard:

Traceback (most recent call last):
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4041, in <lambda>
    menu.addAction(CreateAction("&Copy selection", "Copy to clipboard", lambda: CopyCellsToClipboardHdr(self.view), self.view))
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4021, in CopyCellsToClipboardHdr
    CopyCellsToClipboard(view, False, True)
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4018, in CopyCellsToClipboard
    view.CopyCellsToClipboard(view, as_csv, with_hdr)
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 3871, in CopyTableCellsToClipboard
    val = model.headerData(col, Qt.Horizontal)
TypeError: headerData() missing 1 required positional argument: 'role'

Fixes: 96c43b9a7ab3b ("perf scripts python: exported-sql-viewer.py: Add copy to clipboard")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lore.kernel.org/lkml/20210521092053.25683-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/exported-sql-viewer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 7daa8bb70a5a0..b5078d65704ee 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -3868,7 +3868,7 @@ def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False):
 	if with_hdr:
 		model = indexes[0].model()
 		for col in range(min_col, max_col + 1):
-			val = model.headerData(col, Qt.Horizontal)
+			val = model.headerData(col, Qt.Horizontal, Qt.DisplayRole)
 			if as_csv:
 				text += sep + ToCSValue(val)
 				sep = ","
-- 
GitLab


From fd931b2e234a7cc451a7bbb1965d6ce623189158 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 21 May 2021 12:20:52 +0300
Subject: [PATCH 1318/3804] perf scripts python: exported-sql-viewer.py: Fix
 Array TypeError

The 'Array' class is present in more than one python standard library.
In some versions of Python 3, the following error occurs:

Traceback (most recent call last):
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4702, in <lambda>
    reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewBranchView(x), self))
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 4727, in NewBranchView
    BranchWindow(self.glb, event_id, ReportVars(), self)
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 3208, in __init__
    self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, report_vars.where_clause))
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 343, in LookupCreateModel
    model = create_fn()
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 3208, in <lambda>
    self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, report_vars.where_clause))
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 3124, in __init__
    self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample)
  File "tools/perf/scripts/python/exported-sql-viewer.py", line 2658, in __init__
    self.buffer = Array(c_char, self.buffer_size, lock=False)
TypeError: abstract class

This apparently happens because Python can be inconsistent about which
class of the name 'Array' gets imported. Fix by importing explicitly by
name so that only the desired 'Array' gets imported.

Fixes: 8392b74b575c3 ("perf scripts python: exported-sql-viewer.py: Add ability to display all the database tables")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org
Link: http://lore.kernel.org/lkml/20210521092053.25683-3-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/exported-sql-viewer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index b5078d65704ee..4a63843f623c4 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -125,8 +125,9 @@ if pyside_version_1:
 	from PySide.QtGui import *
 	from PySide.QtSql import *
 
-from decimal import *
-from ctypes import *
+from decimal import Decimal, ROUND_HALF_UP
+from ctypes import CDLL, Structure, create_string_buffer, addressof, sizeof, \
+		   c_void_p, c_bool, c_byte, c_char, c_int, c_uint, c_longlong, c_ulonglong
 from multiprocessing import Process, Array, Value, Event
 
 # xrange is range in Python3
-- 
GitLab


From f56299a9c998e0bfbd4ab07cafe9eb8444512448 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 21 May 2021 12:20:53 +0300
Subject: [PATCH 1319/3804] perf scripts python: exported-sql-viewer.py: Fix
 warning display

Deprecation warnings are useful only for the developer, not an end user.
Display warnings only when requested using the python -W option. This
stops the display of warnings like:

 tools/perf/scripts/python/exported-sql-viewer.py:5102: DeprecationWarning:
         an integer is required (got type PySide2.QtCore.Qt.AlignmentFlag).
         Implicit conversion to integers using __int__ is deprecated, and
         may be removed in a future version of Python.
    err = app.exec_()

Since the warning can be fixed only in PySide2, we must wait for it to
be finally fixed there.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: stable@vger.kernel.org      # v5.3+
Link: http://lore.kernel.org/lkml/20210521092053.25683-4-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/exported-sql-viewer.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 4a63843f623c4..711d4f9f5645c 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -91,6 +91,11 @@
 from __future__ import print_function
 
 import sys
+# Only change warnings if the python -W option was not used
+if not sys.warnoptions:
+	import warnings
+	# PySide2 causes deprecation warnings, ignore them.
+	warnings.filterwarnings("ignore", category=DeprecationWarning)
 import argparse
 import weakref
 import threading
-- 
GitLab


From f42907e8a4515635615a6ffd44242454ef843c04 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 21 May 2021 20:51:27 +0300
Subject: [PATCH 1320/3804] perf script: Add missing PERF_IP_FLAG_CHARS for
 VM-Entry and VM-Exit

Add 'g' (guest) for VM-Entry and 'h' (host) for VM-Exit.

Fixes: c025d46cd932c ("perf script: Add branch types for VM-Entry and VM-Exit")
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lore.kernel.org/lkml/20210521175127.27264-1-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-intel-pt.txt | 6 +++---
 tools/perf/Documentation/perf-script.txt   | 7 ++++---
 tools/perf/util/event.h                    | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 1dcec73c910c1..bcf3eca5afbe9 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -108,9 +108,9 @@ displayed as follows:
 
 	perf script --itrace=ibxwpe -F+flags
 
-The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
-system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
-in transaction, respectively.
+The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end,
+in transaction, VM-entry, and VM-exit respectively.
 
 perf script also supports higher level ways to dump instruction traces:
 
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 5b8b610750393..48a5f5b26dd44 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -183,14 +183,15 @@ OPTIONS
 	At this point usage is displayed, and perf-script exits.
 
 	The flags field is synthesized and may have a value when Instruction
-	Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
+	Trace decoding. The flags are "bcrosyiABExgh" which stand for branch,
 	call, return, conditional, system, asynchronous, interrupt,
-	transaction abort, trace begin, trace end, and in transaction,
+	transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit
 	respectively. Known combinations of flags are printed more nicely e.g.
 	"call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
 	"int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
 	"async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
-	"tr end" for "bE". However the "x" flag will be display separately in those
+	"tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch".
+	However the "x" flag will be displayed separately in those
 	cases e.g. "jcc     (x)" for a condition branch within a transaction.
 
 	The callindent field is synthesized and may have a value when
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8a62fb39e365c..19ad64f2bd830 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -100,7 +100,7 @@ enum {
 	PERF_IP_FLAG_VMEXIT		= 1ULL << 12,
 };
 
-#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
+#define PERF_IP_FLAG_CHARS "bcrosyiABExgh"
 
 #define PERF_BRANCH_MASK		(\
 	PERF_IP_FLAG_BRANCH		|\
-- 
GitLab


From f8b61bd20479c094fb421da42fef6b4ff22a589e Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Tue, 11 May 2021 23:51:16 -0700
Subject: [PATCH 1321/3804] perf stat: Skip evlist__[enable|disable] when all
 events uses BPF

When all events of a perf-stat session use BPF, it is not necessary to
call evlist__enable() and evlist__disable(). Skip them when
all_counters_use_bpf is true.

Signed-off-by: Song Liu <song@kernel.org>
Reported-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 13 ++++++++++---
 tools/perf/util/evlist.c  |  3 ---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5a830ae09418e..f9f74a5143159 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -572,7 +572,8 @@ static int enable_counters(void)
 	 * - we have initial delay configured
 	 */
 	if (!target__none(&target) || stat_config.initial_delay) {
-		evlist__enable(evsel_list);
+		if (!all_counters_use_bpf)
+			evlist__enable(evsel_list);
 		if (stat_config.initial_delay > 0)
 			pr_info(EVLIST_ENABLED_MSG);
 	}
@@ -581,13 +582,19 @@ static int enable_counters(void)
 
 static void disable_counters(void)
 {
+	struct evsel *counter;
+
 	/*
 	 * If we don't have tracee (attaching to task or cpu), counters may
 	 * still be running. To get accurate group ratios, we must stop groups
 	 * from counting before reading their constituent counters.
 	 */
-	if (!target__none(&target))
-		evlist__disable(evsel_list);
+	if (!target__none(&target)) {
+		evlist__for_each_entry(evsel_list, counter)
+			bpf_counter__disable(counter);
+		if (!all_counters_use_bpf)
+			evlist__disable(evsel_list);
+	}
 }
 
 static volatile int workload_exec_errno;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6e5c41528c7d0..6ea3e677dc1e7 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -425,9 +425,6 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name)
 	if (affinity__setup(&affinity) < 0)
 		return;
 
-	evlist__for_each_entry(evlist, pos)
-		bpf_counter__disable(pos);
-
 	/* Disable 'immediate' events last */
 	for (imm = 0; imm <= 1; imm++) {
 		evlist__for_each_cpu(evlist, i, cpu) {
-- 
GitLab


From 80dd33cf72d1ab4f0af303f1fa242c6d6c8d328f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 14 May 2021 14:10:15 +0200
Subject: [PATCH 1322/3804] drivers: base: Fix device link removal

When device_link_free() drops references to the supplier and
consumer devices of the device link going away and the reference
being dropped turns out to be the last one for any of those
device objects, its ->release callback will be invoked and it
may sleep which goes against the SRCU callback execution
requirements.

To address this issue, make the device link removal code carry out
the device_link_free() actions preceded by SRCU synchronization from
a separate work item (the "long" workqueue is used for that, because
it does not matter when the device link memory is released and it may
take time to get to that point) instead of using SRCU callbacks.

While at it, make the code work analogously when SRCU is not enabled
to reduce the differences between the SRCU and non-SRCU cases.

Fixes: 843e600b8a2b ("driver core: Fix sleeping in invalid context during device link deletion")
Cc: stable <stable@vger.kernel.org>
Reported-by: chenxiang (M) <chenxiang66@hisilicon.com>
Tested-by: chenxiang (M) <chenxiang66@hisilicon.com>
Reviewed-by: Saravana Kannan <saravanak@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/5722787.lOV4Wx5bFT@kreacher
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 37 +++++++++++++++++++++++--------------
 include/linux/device.h |  6 ++----
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 628e33939acae..61c19641e1d0b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -194,6 +194,11 @@ int device_links_read_lock_held(void)
 {
 	return srcu_read_lock_held(&device_links_srcu);
 }
+
+static void device_link_synchronize_removal(void)
+{
+	synchronize_srcu(&device_links_srcu);
+}
 #else /* !CONFIG_SRCU */
 static DECLARE_RWSEM(device_links_lock);
 
@@ -224,6 +229,10 @@ int device_links_read_lock_held(void)
 	return lockdep_is_held(&device_links_lock);
 }
 #endif
+
+static inline void device_link_synchronize_removal(void)
+{
+}
 #endif /* !CONFIG_SRCU */
 
 static bool device_is_ancestor(struct device *dev, struct device *target)
@@ -445,8 +454,13 @@ static struct attribute *devlink_attrs[] = {
 };
 ATTRIBUTE_GROUPS(devlink);
 
-static void device_link_free(struct device_link *link)
+static void device_link_release_fn(struct work_struct *work)
 {
+	struct device_link *link = container_of(work, struct device_link, rm_work);
+
+	/* Ensure that all references to the link object have been dropped. */
+	device_link_synchronize_removal();
+
 	while (refcount_dec_not_one(&link->rpm_active))
 		pm_runtime_put(link->supplier);
 
@@ -455,24 +469,19 @@ static void device_link_free(struct device_link *link)
 	kfree(link);
 }
 
-#ifdef CONFIG_SRCU
-static void __device_link_free_srcu(struct rcu_head *rhead)
-{
-	device_link_free(container_of(rhead, struct device_link, rcu_head));
-}
-
 static void devlink_dev_release(struct device *dev)
 {
 	struct device_link *link = to_devlink(dev);
 
-	call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu);
-}
-#else
-static void devlink_dev_release(struct device *dev)
-{
-	device_link_free(to_devlink(dev));
+	INIT_WORK(&link->rm_work, device_link_release_fn);
+	/*
+	 * It may take a while to complete this work because of the SRCU
+	 * synchronization in device_link_release_fn() and if the consumer or
+	 * supplier devices get deleted when it runs, so put it into the "long"
+	 * workqueue.
+	 */
+	queue_work(system_long_wq, &link->rm_work);
 }
-#endif
 
 static struct class devlink_class = {
 	.name = "devlink",
diff --git a/include/linux/device.h b/include/linux/device.h
index 38a2071cf7768..f1a00040fa534 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -570,7 +570,7 @@ struct device {
  * @flags: Link flags.
  * @rpm_active: Whether or not the consumer device is runtime-PM-active.
  * @kref: Count repeated addition of the same link.
- * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks.
+ * @rm_work: Work structure used for removing the link.
  * @supplier_preactivated: Supplier has been made active before consumer probe.
  */
 struct device_link {
@@ -583,9 +583,7 @@ struct device_link {
 	u32 flags;
 	refcount_t rpm_active;
 	struct kref kref;
-#ifdef CONFIG_SRCU
-	struct rcu_head rcu_head;
-#endif
+	struct work_struct rm_work;
 	bool supplier_preactivated; /* Owned by consumer probe. */
 };
 
-- 
GitLab


From 0c8713153fbf7ba4e45172e139d501c86006dc03 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 14 May 2021 14:11:19 +0200
Subject: [PATCH 1323/3804] drivers: base: Reduce device link removal code
 duplication

Reduce device link removal code duplication between the cases when
SRCU is enabled and when it is disabled by moving the only differing
piece of it (which is the removal of the link from the consumer and
supplier lists) into a separate wrapper function (defined differently
for each of the cases in question).

No intentional functional impact.

Reviewed-by: Saravana Kannan <saravanak@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/4326215.LvFx2qVVIh@kreacher
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 61c19641e1d0b..54ba506e5a89d 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -199,6 +199,12 @@ static void device_link_synchronize_removal(void)
 {
 	synchronize_srcu(&device_links_srcu);
 }
+
+static void device_link_remove_from_lists(struct device_link *link)
+{
+	list_del_rcu(&link->s_node);
+	list_del_rcu(&link->c_node);
+}
 #else /* !CONFIG_SRCU */
 static DECLARE_RWSEM(device_links_lock);
 
@@ -233,6 +239,12 @@ int device_links_read_lock_held(void)
 static inline void device_link_synchronize_removal(void)
 {
 }
+
+static void device_link_remove_from_lists(struct device_link *link)
+{
+	list_del(&link->s_node);
+	list_del(&link->c_node);
+}
 #endif /* !CONFIG_SRCU */
 
 static bool device_is_ancestor(struct device *dev, struct device *target)
@@ -855,7 +867,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(device_link_add);
 
-#ifdef CONFIG_SRCU
 static void __device_link_del(struct kref *kref)
 {
 	struct device_link *link = container_of(kref, struct device_link, kref);
@@ -865,25 +876,9 @@ static void __device_link_del(struct kref *kref)
 
 	pm_runtime_drop_link(link);
 
-	list_del_rcu(&link->s_node);
-	list_del_rcu(&link->c_node);
-	device_unregister(&link->link_dev);
-}
-#else /* !CONFIG_SRCU */
-static void __device_link_del(struct kref *kref)
-{
-	struct device_link *link = container_of(kref, struct device_link, kref);
-
-	dev_info(link->consumer, "Dropping the link to %s\n",
-		 dev_name(link->supplier));
-
-	pm_runtime_drop_link(link);
-
-	list_del(&link->s_node);
-	list_del(&link->c_node);
+	device_link_remove_from_lists(link);
 	device_unregister(&link->link_dev);
 }
-#endif /* !CONFIG_SRCU */
 
 static void device_link_put_kref(struct device_link *link)
 {
-- 
GitLab


From 70ca3c57ff914113f681e657634f7fbfa68e1ad1 Mon Sep 17 00:00:00 2001
From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Date: Sat, 15 May 2021 16:03:15 +0900
Subject: [PATCH 1324/3804] scsi: target: core: Avoid smp_processor_id() in
 preemptible code

The BUG message "BUG: using smp_processor_id() in preemptible [00000000]
code" was observed for TCMU devices with kernel config DEBUG_PREEMPT.

The message was observed when blktests block/005 was run on TCMU devices
with fileio backend or user:zbc backend [1]. The commit 1130b499b4a7
("scsi: target: tcm_loop: Use LIO wq cmd submission helper") triggered the
symptom. The commit modified work queue to handle commands and changed
'current->nr_cpu_allowed' at smp_processor_id() call.

The message was also observed at system shutdown when TCMU devices were not
cleaned up [2]. The function smp_processor_id() was called in SCSI host
work queue for abort handling, and triggered the BUG message. This symptom
was observed regardless of the commit 1130b499b4a7 ("scsi: target:
tcm_loop: Use LIO wq cmd submission helper").

To avoid the preemptible code check at smp_processor_id(), get CPU ID with
raw_smp_processor_id() instead. The CPU ID is used for performance
improvement then thread move to other CPU will not affect the code.

[1]

[   56.468103] run blktests block/005 at 2021-05-12 14:16:38
[   57.369473] check_preemption_disabled: 85 callbacks suppressed
[   57.369480] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1511
[   57.369506] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1510
[   57.369512] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1506
[   57.369552] caller is __target_init_cmd+0x157/0x170 [target_core_mod]
[   57.369606] CPU: 4 PID: 1506 Comm: fio Not tainted 5.13.0-rc1+ #34
[   57.369613] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 1302 03/15/2018
[   57.369617] Call Trace:
[   57.369621] BUG: using smp_processor_id() in preemptible [00000000] code: fio/1507
[   57.369628]  dump_stack+0x6d/0x89
[   57.369642]  check_preemption_disabled+0xc8/0xd0
[   57.369628] caller is __target_init_cmd+0x157/0x170 [target_core_mod]
[   57.369655]  __target_init_cmd+0x157/0x170 [target_core_mod]
[   57.369695]  target_init_cmd+0x76/0x90 [target_core_mod]
[   57.369732]  tcm_loop_queuecommand+0x109/0x210 [tcm_loop]
[   57.369744]  scsi_queue_rq+0x38e/0xc40
[   57.369761]  __blk_mq_try_issue_directly+0x109/0x1c0
[   57.369779]  blk_mq_try_issue_directly+0x43/0x90
[   57.369790]  blk_mq_submit_bio+0x4e5/0x5d0
[   57.369812]  submit_bio_noacct+0x46e/0x4e0
[   57.369830]  __blkdev_direct_IO_simple+0x1a3/0x2d0
[   57.369859]  ? set_init_blocksize.isra.0+0x60/0x60
[   57.369880]  generic_file_read_iter+0x89/0x160
[   57.369898]  blkdev_read_iter+0x44/0x60
[   57.369906]  new_sync_read+0x102/0x170
[   57.369929]  vfs_read+0xd4/0x160
[   57.369941]  __x64_sys_pread64+0x6e/0xa0
[   57.369946]  ? lockdep_hardirqs_on+0x79/0x100
[   57.369958]  do_syscall_64+0x3a/0x70
[   57.369965]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[   57.369973] RIP: 0033:0x7f7ed4c1399f
[   57.369979] Code: 08 89 3c 24 48 89 4c 24 18 e8 7d f3 ff ff 4c 8b 54 24 18 48 8b 54 24 10 41 89 c0 48 8b 74 24 08 8b 3c 24 b8 11 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 04 24 e8 cd f3 ff ff 48 8b
[   57.369983] RSP: 002b:00007ffd7918c580 EFLAGS: 00000293 ORIG_RAX: 0000000000000011
[   57.369990] RAX: ffffffffffffffda RBX: 00000000015b4540 RCX: 00007f7ed4c1399f
[   57.369993] RDX: 0000000000001000 RSI: 00000000015de000 RDI: 0000000000000009
[   57.369996] RBP: 00000000015b4540 R08: 0000000000000000 R09: 0000000000000001
[   57.369999] R10: 0000000000e5c000 R11: 0000000000000293 R12: 00007f7eb5269a70
[   57.370002] R13: 0000000000000000 R14: 0000000000001000 R15: 00000000015b4568
[   57.370031] CPU: 7 PID: 1507 Comm: fio Not tainted 5.13.0-rc1+ #34
[   57.370036] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 1302 03/15/2018
[   57.370039] Call Trace:
[   57.370045]  dump_stack+0x6d/0x89
[   57.370056]  check_preemption_disabled+0xc8/0xd0
[   57.370068]  __target_init_cmd+0x157/0x170 [target_core_mod]
[   57.370121]  target_init_cmd+0x76/0x90 [target_core_mod]
[   57.370178]  tcm_loop_queuecommand+0x109/0x210 [tcm_loop]
[   57.370197]  scsi_queue_rq+0x38e/0xc40
[   57.370224]  __blk_mq_try_issue_directly+0x109/0x1c0
...

[2]

[  117.458597] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u16:8
[  117.467279] caller is __target_init_cmd+0x157/0x170 [target_core_mod]
[  117.473893] CPU: 1 PID: 418 Comm: kworker/u16:6 Not tainted 5.13.0-rc1+ #34
[  117.481150] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 8
[  117.481153] Workqueue: scsi_tmf_7 scmd_eh_abort_handler
[  117.481156] Call Trace:
[  117.481158]  dump_stack+0x6d/0x89
[  117.481162]  check_preemption_disabled+0xc8/0xd0
[  117.512575]  target_submit_tmr+0x41/0x150 [target_core_mod]
[  117.519705]  tcm_loop_issue_tmr+0xa7/0x100 [tcm_loop]
[  117.524913]  tcm_loop_abort_task+0x43/0x60 [tcm_loop]
[  117.530137]  scmd_eh_abort_handler+0x7b/0x230
[  117.534681]  process_one_work+0x268/0x580
[  117.538862]  worker_thread+0x55/0x3b0
[  117.542652]  ? process_one_work+0x580/0x580
[  117.548351]  kthread+0x143/0x160
[  117.551675]  ? kthread_create_worker_on_cpu+0x40/0x40
[  117.556873]  ret_from_fork+0x1f/0x30

Link: https://lore.kernel.org/r/20210515070315.215801-1-shinichiro.kawasaki@wdc.com
Fixes: 1526d9f10c61 ("scsi: target: Make state_list per CPU")
Cc: stable@vger.kernel.org # v5.11+
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 8fbfe75c5744a..05d7ffd59df65 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1416,7 +1416,7 @@ void __target_init_cmd(
 	cmd->orig_fe_lun = unpacked_lun;
 
 	if (!(cmd->se_cmd_flags & SCF_USE_CPUID))
-		cmd->cpuid = smp_processor_id();
+		cmd->cpuid = raw_smp_processor_id();
 
 	cmd->state_active = false;
 }
-- 
GitLab


From b4150b68815e9e4447ce169224ed436b419f0153 Mon Sep 17 00:00:00 2001
From: Bodo Stroesser <bostroesser@gmail.com>
Date: Wed, 19 May 2021 15:54:40 +0200
Subject: [PATCH 1325/3804] scsi: target: tcmu: Fix xarray RCU warning

Commit f5ce815f34bc ("scsi: target: tcmu: Support DATA_BLOCK_SIZE = N *
PAGE_SIZE") introduced xas_next() calls to iterate xarray elements.  These
calls triggered the WARNING "suspicious RCU usage" at tcmu device set up
[1]. In the call stack of xas_next(), xas_load() was called.  According to
its comment, this function requires "the xa_lock or the RCU lock".

To avoid the warning:

 - Guard the small loop calling xas_next() in tcmu_get_empty_block with RCU
   lock.

 - In the large loop in tcmu_copy_data using RCU lock would possibly
   disable preemtion for a long time (copy multi MBs). Therefore replace
   XA_STATE, xas_set and xas_next with a single xa_load.

[1]

[ 1899.867091] =============================
[ 1899.871199] WARNING: suspicious RCU usage
[ 1899.875310] 5.13.0-rc1+ #41 Not tainted
[ 1899.879222] -----------------------------
[ 1899.883299] include/linux/xarray.h:1182 suspicious rcu_dereference_check() usage!
[ 1899.890940] other info that might help us debug this:
[ 1899.899082] rcu_scheduler_active = 2, debug_locks = 1
[ 1899.905719] 3 locks held by kworker/0:1/1368:
[ 1899.910161]  #0: ffffa1f8c8b98738 ((wq_completion)target_submission){+.+.}-{0:0}, at: process_one_work+0x1ee/0x580
[ 1899.920732]  #1: ffffbd7040cd7e78 ((work_completion)(&q->sq.work)){+.+.}-{0:0}, at: process_one_work+0x1ee/0x580
[ 1899.931146]  #2: ffffa1f8d1c99768 (&udev->cmdr_lock){+.+.}-{3:3}, at: tcmu_queue_cmd+0xea/0x160 [target_core_user]
[ 1899.941678] stack backtrace:
[ 1899.946093] CPU: 0 PID: 1368 Comm: kworker/0:1 Not tainted 5.13.0-rc1+ #41
[ 1899.953070] Hardware name: System manufacturer System Product Name/PRIME Z270-A, BIOS 1302 03/15/2018
[ 1899.962459] Workqueue: target_submission target_queued_submit_work [target_core_mod]
[ 1899.970337] Call Trace:
[ 1899.972839]  dump_stack+0x6d/0x89
[ 1899.976222]  xas_descend+0x10e/0x120
[ 1899.979875]  xas_load+0x39/0x50
[ 1899.983077]  tcmu_get_empty_blocks+0x115/0x1c0 [target_core_user]
[ 1899.989318]  queue_cmd_ring+0x1da/0x630 [target_core_user]
[ 1899.994897]  ? rcu_read_lock_sched_held+0x3f/0x70
[ 1899.999695]  ? trace_kmalloc+0xa6/0xd0
[ 1900.003501]  ? __kmalloc+0x205/0x380
[ 1900.007167]  tcmu_queue_cmd+0x12f/0x160 [target_core_user]
[ 1900.012746]  __target_execute_cmd+0x23/0xa0 [target_core_mod]
[ 1900.018589]  transport_generic_new_cmd+0x1f3/0x370 [target_core_mod]
[ 1900.025046]  transport_handle_cdb_direct+0x34/0x50 [target_core_mod]
[ 1900.031517]  target_queued_submit_work+0x43/0xe0 [target_core_mod]
[ 1900.037837]  process_one_work+0x268/0x580
[ 1900.041952]  ? process_one_work+0x580/0x580
[ 1900.046195]  worker_thread+0x55/0x3b0
[ 1900.049921]  ? process_one_work+0x580/0x580
[ 1900.054192]  kthread+0x143/0x160
[ 1900.057499]  ? kthread_create_worker_on_cpu+0x40/0x40
[ 1900.062661]  ret_from_fork+0x1f/0x30

Link: https://lore.kernel.org/r/20210519135440.26773-1-bostroesser@gmail.com
Fixes: f5ce815f34bc ("scsi: target: tcmu: Support DATA_BLOCK_SIZE = N * PAGE_SIZE")
Reported-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Bodo Stroesser <bostroesser@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_user.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 198d25ae482ab..4bba10e7755aa 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -516,8 +516,10 @@ static inline int tcmu_get_empty_block(struct tcmu_dev *udev,
 	dpi = dbi * udev->data_pages_per_blk;
 	/* Count the number of already allocated pages */
 	xas_set(&xas, dpi);
+	rcu_read_lock();
 	for (cnt = 0; xas_next(&xas) && cnt < page_cnt;)
 		cnt++;
+	rcu_read_unlock();
 
 	for (i = cnt; i < page_cnt; i++) {
 		/* try to get new page from the mm */
@@ -699,11 +701,10 @@ static inline void tcmu_copy_data(struct tcmu_dev *udev,
 				  struct scatterlist *sg, unsigned int sg_nents,
 				  struct iovec **iov, size_t data_len)
 {
-	XA_STATE(xas, &udev->data_pages, 0);
 	/* start value of dbi + 1 must not be a valid dbi */
 	int dbi = -2;
 	size_t page_remaining, cp_len;
-	int page_cnt, page_inx;
+	int page_cnt, page_inx, dpi;
 	struct sg_mapping_iter sg_iter;
 	unsigned int sg_flags;
 	struct page *page;
@@ -726,9 +727,10 @@ static inline void tcmu_copy_data(struct tcmu_dev *udev,
 		if (page_cnt > udev->data_pages_per_blk)
 			page_cnt = udev->data_pages_per_blk;
 
-		xas_set(&xas, dbi * udev->data_pages_per_blk);
-		for (page_inx = 0; page_inx < page_cnt && data_len; page_inx++) {
-			page = xas_next(&xas);
+		dpi = dbi * udev->data_pages_per_blk;
+		for (page_inx = 0; page_inx < page_cnt && data_len;
+		     page_inx++, dpi++) {
+			page = xa_load(&udev->data_pages, dpi);
 
 			if (direction == TCMU_DATA_AREA_TO_SG)
 				flush_dcache_page(page);
-- 
GitLab


From 430bfe0576120b52cf7f62116bc7549180da4706 Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Thu, 20 May 2021 10:43:18 +0200
Subject: [PATCH 1326/3804] net: ethernet: mtk_eth_soc: Fix DIM support for
 MT7628/88

When updating to latest mainline for some testing on the GARDENA smart
gateway based on the MT7628, I noticed that ethernet does not work any
more. Commit e9229ffd550b ("net: ethernet: mtk_eth_soc: implement
dynamic interrupt moderation") introduced this problem, as it missed the
RX_DIM & TX_DIM configuration for this SoC variant. This patch fixes
this by calling mtk_dim_rx() & mtk_dim_tx() in this case as well.

Signed-off-by: Stefan Roese <sr@denx.de>
Fixes: e9229ffd550b ("net: ethernet: mtk_eth_soc: implement dynamic interrupt moderation")
Cc: Felix Fietkau <nbd@nbd.name>
Cc: John Crispin <john@phrozen.org>
Cc: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Cc: Reto Schneider <code@reto-schneider.ch>
Cc: Reto Schneider <reto.schneider@husqvarnagroup.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index ed4eacef17cea..d6cc06ee0caa5 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2423,7 +2423,8 @@ static void mtk_dim_rx(struct work_struct *work)
 	val |= cur << MTK_PDMA_DELAY_RX_PINT_SHIFT;
 
 	mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
-	mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+		mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
 
 	spin_unlock_bh(&eth->dim_lock);
 
@@ -2452,7 +2453,8 @@ static void mtk_dim_tx(struct work_struct *work)
 	val |= cur << MTK_PDMA_DELAY_TX_PINT_SHIFT;
 
 	mtk_w32(eth, val, MTK_PDMA_DELAY_INT);
-	mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+		mtk_w32(eth, val, MTK_QDMA_DELAY_INT);
 
 	spin_unlock_bh(&eth->dim_lock);
 
@@ -2480,6 +2482,10 @@ static int mtk_hw_init(struct mtk_eth *eth)
 			goto err_disable_pm;
 		}
 
+		/* set interrupt delays based on current Net DIM sample */
+		mtk_dim_rx(&eth->rx_dim.work);
+		mtk_dim_tx(&eth->tx_dim.work);
+
 		/* disable delay and normal interrupt */
 		mtk_tx_irq_disable(eth, ~0);
 		mtk_rx_irq_disable(eth, ~0);
-- 
GitLab


From 8c7e7b8486cda21269d393245883c5e4737d5ee7 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 19 May 2021 17:20:27 +0300
Subject: [PATCH 1327/3804] scsi: libsas: Use _safe() loop in sas_resume_port()

If sas_notify_lldd_dev_found() fails then this code calls:

	sas_unregister_dev(port, dev);

which removes "dev", our list iterator, from the list.  This could lead to
an endless loop.  We need to use list_for_each_entry_safe().

Link: https://lore.kernel.org/r/YKUeq6gwfGcvvhty@mwanda
Fixes: 303694eeee5e ("[SCSI] libsas: suspend / resume support")
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_port.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index 19cf418928faa..e3d03d744713d 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -25,7 +25,7 @@ static bool phy_is_wideport_member(struct asd_sas_port *port, struct asd_sas_phy
 
 static void sas_resume_port(struct asd_sas_phy *phy)
 {
-	struct domain_device *dev;
+	struct domain_device *dev, *n;
 	struct asd_sas_port *port = phy->port;
 	struct sas_ha_struct *sas_ha = phy->ha;
 	struct sas_internal *si = to_sas_internal(sas_ha->core.shost->transportt);
@@ -44,7 +44,7 @@ static void sas_resume_port(struct asd_sas_phy *phy)
 	 * 1/ presume every device came back
 	 * 2/ force the next revalidation to check all expander phys
 	 */
-	list_for_each_entry(dev, &port->dev_list, dev_list_node) {
+	list_for_each_entry_safe(dev, n, &port->dev_list, dev_list_node) {
 		int i, rc;
 
 		rc = sas_notify_lldd_dev_found(dev);
-- 
GitLab


From 5aaeca258f5540ca5cd4a56758ef03faacb7716d Mon Sep 17 00:00:00 2001
From: Mike Christie <michael.christie@oracle.com>
Date: Wed, 19 May 2021 17:26:40 -0500
Subject: [PATCH 1328/3804] scsi: target: iblock: Fix smp_processor_id() BUG
 messages

This has us use raw_smp_processor_id() in iblock's plug_device callout.
smp_processor_id() is not needed here, because we are running from a per
CPU work item that is also queued to run on a worker thread that is
normally bound to a specific CPU. If the worker thread did end up switching
CPUs then it's handled the same way we handle when the work got moved to a
different CPU's worker thread, where we will just end up sending I/O from
the new CPU.

Link: https://lore.kernel.org/r/20210519222640.5153-1-michael.christie@oracle.com
Fixes: 415ccd9811da ("scsi: target: iblock: Add backend plug/unplug callouts")
Signed-off-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_iblock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index d6fdd1c61f903..a526f9678c34b 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -204,11 +204,11 @@ static struct se_dev_plug *iblock_plug_device(struct se_device *se_dev)
 	struct iblock_dev_plug *ib_dev_plug;
 
 	/*
-	 * Each se_device has a per cpu work this can be run from. Wwe
+	 * Each se_device has a per cpu work this can be run from. We
 	 * shouldn't have multiple threads on the same cpu calling this
 	 * at the same time.
 	 */
-	ib_dev_plug = &ib_dev->ibd_plug[smp_processor_id()];
+	ib_dev_plug = &ib_dev->ibd_plug[raw_smp_processor_id()];
 	if (test_and_set_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags))
 		return NULL;
 
-- 
GitLab


From e5bfaed7508fd34ae95a79d1eb76c38ecc82c947 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 21 May 2021 15:28:56 +0200
Subject: [PATCH 1329/3804] MAINTAINERS: s390/net: add netdev list

Discussions for network-related code should include the netdev list.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c1cb2e38ae2ed..88722efd94a11 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15944,6 +15944,7 @@ S390 IUCV NETWORK LAYER
 M:	Julian Wiedmann <jwi@linux.ibm.com>
 M:	Karsten Graul <kgraul@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
+L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.ibm.com/developerworks/linux/linux390/
 F:	drivers/s390/net/*iucv*
@@ -15954,6 +15955,7 @@ S390 NETWORK DRIVERS
 M:	Julian Wiedmann <jwi@linux.ibm.com>
 M:	Karsten Graul <kgraul@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
+L:	netdev@vger.kernel.org
 S:	Supported
 W:	http://www.ibm.com/developerworks/linux/linux390/
 F:	drivers/s390/net/
-- 
GitLab


From fc516d3a6aa2c6ffe27d0da8818d13839e023e7e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 21 May 2021 10:46:14 -0700
Subject: [PATCH 1330/3804] net: dsa: bcm_sf2: Fix bcm_sf2_reg_rgmii_cntrl()
 call for non-RGMII port
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We cannot call bcm_sf2_reg_rgmii_cntrl() for a port that is not RGMII,
yet we do that in bcm_sf2_sw_mac_link_up() irrespective of the port's
interface. Move that read until we have properly qualified the PHY
interface mode. This avoids triggering a warning on 7278 platforms that
have GMII ports.

Fixes: 55cfeb396965 ("net: dsa: bcm_sf2: add function finding RGMII register")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/bcm_sf2.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 9150038b60cb4..3b018fcf44124 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -821,11 +821,9 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 	bcm_sf2_sw_mac_link_set(ds, port, interface, true);
 
 	if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
-		u32 reg_rgmii_ctrl;
+		u32 reg_rgmii_ctrl = 0;
 		u32 reg, offset;
 
-		reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
-
 		if (priv->type == BCM4908_DEVICE_ID ||
 		    priv->type == BCM7445_DEVICE_ID)
 			offset = CORE_STS_OVERRIDE_GMIIP_PORT(port);
@@ -836,6 +834,7 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port,
 		    interface == PHY_INTERFACE_MODE_RGMII_TXID ||
 		    interface == PHY_INTERFACE_MODE_MII ||
 		    interface == PHY_INTERFACE_MODE_REVMII) {
+			reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
 			reg = reg_readl(priv, reg_rgmii_ctrl);
 			reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN);
 
-- 
GitLab


From c7718ee96dbc2f9c5fc3b578abdf296dd44b9c20 Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Fri, 21 May 2021 16:45:58 +0200
Subject: [PATCH 1331/3804] net: lantiq: fix memory corruption in RX ring

In a situation where memory allocation or dma mapping fails, an
invalid address is programmed into the descriptor. This can lead
to memory corruption. If the memory allocation fails, DMA should
reuse the previous skb and mapping and drop the packet. This patch
also increments rx drop counter.

Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver ")
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_xrx200.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 41c2ad210bc99..36dc3e5f62189 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -154,6 +154,7 @@ static int xrx200_close(struct net_device *net_dev)
 
 static int xrx200_alloc_skb(struct xrx200_chan *ch)
 {
+	dma_addr_t mapping;
 	int ret = 0;
 
 	ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(ch->priv->net_dev,
@@ -163,16 +164,17 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch)
 		goto skip;
 	}
 
-	ch->dma.desc_base[ch->dma.desc].addr = dma_map_single(ch->priv->dev,
-			ch->skb[ch->dma.desc]->data, XRX200_DMA_DATA_LEN,
-			DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(ch->priv->dev,
-				       ch->dma.desc_base[ch->dma.desc].addr))) {
+	mapping = dma_map_single(ch->priv->dev, ch->skb[ch->dma.desc]->data,
+				 XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) {
 		dev_kfree_skb_any(ch->skb[ch->dma.desc]);
 		ret = -ENOMEM;
 		goto skip;
 	}
 
+	ch->dma.desc_base[ch->dma.desc].addr = mapping;
+	/* Make sure the address is written before we give it to HW */
+	wmb();
 skip:
 	ch->dma.desc_base[ch->dma.desc].ctl =
 		LTQ_DMA_OWN | LTQ_DMA_RX_OFFSET(NET_IP_ALIGN) |
@@ -196,6 +198,8 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
 	ch->dma.desc %= LTQ_DESC_NUM;
 
 	if (ret) {
+		ch->skb[ch->dma.desc] = skb;
+		net_dev->stats.rx_dropped++;
 		netdev_err(net_dev, "failed to allocate new rx buffer\n");
 		return ret;
 	}
-- 
GitLab


From 29bf1993fdba17703a836cf098712cf15f96706d Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Sat, 22 May 2021 00:01:00 +0300
Subject: [PATCH 1332/3804] MAINTAINERS: remove Ioana Radulescu from dpaa2-eth

Remove Ioana Radulescu from dpaa2-eth since she is no longer working on
the DPAA2 set of drivers.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 88722efd94a11..251111e5da534 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5569,7 +5569,6 @@ F:	drivers/soc/fsl/dpio
 
 DPAA2 ETHERNET DRIVER
 M:	Ioana Ciornei <ioana.ciornei@nxp.com>
-M:	Ioana Radulescu <ruxandra.radulescu@nxp.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/networking/device_drivers/ethernet/freescale/dpaa2/ethernet-driver.rst
-- 
GitLab


From aced3ce57cd37b5ca332bcacd370d01f5a8c5371 Mon Sep 17 00:00:00 2001
From: Rao Shoaib <rao.shoaib@oracle.com>
Date: Fri, 21 May 2021 11:08:06 -0700
Subject: [PATCH 1333/3804] RDS tcp loopback connection can hang

When TCP is used as transport and a program on the
system connects to RDS port 16385, connection is
accepted but denied per the rules of RDS. However,
RDS connections object is left in the list. Next
loopback connection will select that connection
object as it is at the head of list. The connection
attempt will hang as the connection object is set
to connect over TCP which is not allowed

The issue can be reproduced easily, use rds-ping
to ping a local IP address. After that use any
program like ncat to connect to the same IP
address and port 16385. This will hang so ctrl-c out.
Now try rds-ping, it will hang.

To fix the issue this patch adds checks to disallow
the connection object creation and destroys the
connection object.

Signed-off-by: Rao Shoaib <rao.shoaib@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/connection.c | 23 +++++++++++++++++------
 net/rds/tcp.c        |  4 ++--
 net/rds/tcp.h        |  3 ++-
 net/rds/tcp_listen.c |  6 ++++++
 4 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index f2fcab182095c..a3bc4b54d4910 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -240,12 +240,23 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	if (loop_trans) {
 		rds_trans_put(loop_trans);
 		conn->c_loopback = 1;
-		if (is_outgoing && trans->t_prefer_loopback) {
-			/* "outgoing" connection - and the transport
-			 * says it wants the connection handled by the
-			 * loopback transport. This is what TCP does.
-			 */
-			trans = &rds_loop_transport;
+		if (trans->t_prefer_loopback) {
+			if (likely(is_outgoing)) {
+				/* "outgoing" connection to local address.
+				 * Protocol says it wants the connection
+				 * handled by the loopback transport.
+				 * This is what TCP does.
+				 */
+				trans = &rds_loop_transport;
+			} else {
+				/* No transport currently in use
+				 * should end up here, but if it
+				 * does, reset/destroy the connection.
+				 */
+				kmem_cache_free(rds_conn_slab, conn);
+				conn = ERR_PTR(-EOPNOTSUPP);
+				goto out;
+			}
 		}
 	}
 
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 43db0eca911fa..abf19c0e3ba0b 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -313,8 +313,8 @@ out:
 }
 #endif
 
-static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
-			       __u32 scope_id)
+int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
+			__u32 scope_id)
 {
 	struct net_device *dev = NULL;
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index bad9cf49d5657..dc8d745d68575 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -59,7 +59,8 @@ u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
 u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
 extern struct rds_transport rds_tcp_transport;
 void rds_tcp_accept_work(struct sock *sk);
-
+int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
+			__u32 scope_id);
 /* tcp_connect.c */
 int rds_tcp_conn_path_connect(struct rds_conn_path *cp);
 void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn);
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 101cf14215a0b..09cadd556d1e1 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -167,6 +167,12 @@ int rds_tcp_accept_one(struct socket *sock)
 	}
 #endif
 
+	if (!rds_tcp_laddr_check(sock_net(sock->sk), peer_addr, dev_if)) {
+		/* local address connection is only allowed via loopback */
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
 	conn = rds_conn_create(sock_net(sock->sk),
 			       my_addr, peer_addr,
 			       &rds_tcp_transport, 0, GFP_KERNEL, dev_if);
-- 
GitLab


From 8f03eeb6e0a0a0b8d617ee0a4bce729e47130036 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=8D=C3=B1igo=20Huguet?= <ihuguet@redhat.com>
Date: Fri, 21 May 2021 16:38:35 +0200
Subject: [PATCH 1334/3804] net:sfc: fix non-freed irq in legacy irq mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SFC driver can be configured via modparam to work using MSI-X, MSI or
legacy IRQ interrupts. In the last one, the interrupt was not properly
released on module remove.

It was not freed because the flag irqs_hooked was not set during
initialization in the case of using legacy IRQ.

Example of (trimmed) trace during module remove without this fix:

remove_proc_entry: removing non-empty directory 'irq/125', leaking at least '0000:3b:00.1'
WARNING: CPU: 39 PID: 3658 at fs/proc/generic.c:715 remove_proc_entry+0x15c/0x170
...trimmed...
Call Trace:
 unregister_irq_proc+0xe3/0x100
 free_desc+0x29/0x70
 irq_free_descs+0x47/0x70
 mp_unmap_irq+0x58/0x60
 acpi_unregister_gsi_ioapic+0x2a/0x40
 acpi_pci_irq_disable+0x78/0xb0
 pci_disable_device+0xd1/0x100
 efx_pci_remove+0xa1/0x1e0 [sfc]
 pci_device_remove+0x38/0xa0
 __device_release_driver+0x177/0x230
 driver_detach+0xcb/0x110
 bus_remove_driver+0x58/0xd0
 pci_unregister_driver+0x2a/0xb0
 efx_exit_module+0x24/0xf40 [sfc]
 __do_sys_delete_module.constprop.0+0x171/0x280
 ? exit_to_user_mode_prepare+0x83/0x1d0
 do_syscall_64+0x3d/0x80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f9f9385800b
...trimmed...

Signed-off-by: Íñigo Huguet <ihuguet@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/nic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index d1e908846f5dd..22fbb0ae77fba 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -90,6 +90,7 @@ int efx_nic_init_interrupt(struct efx_nic *efx)
 				  efx->pci_dev->irq);
 			goto fail1;
 		}
+		efx->irqs_hooked = true;
 		return 0;
 	}
 
-- 
GitLab


From e29f011e8fc04b2cdc742a2b9bbfa1b62518381a Mon Sep 17 00:00:00 2001
From: Francesco Ruggeri <fruggeri@arista.com>
Date: Fri, 21 May 2021 13:21:14 -0700
Subject: [PATCH 1335/3804] ipv6: record frag_max_size in atomic fragments in
 input path

Commit dbd1759e6a9c ("ipv6: on reassembly, record frag_max_size")
filled the frag_max_size field in IP6CB in the input path.
The field should also be filled in case of atomic fragments.

Fixes: dbd1759e6a9c ('ipv6: on reassembly, record frag_max_size')
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/reassembly.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 47a0dc46cbdb0..28e44782c94d1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -343,7 +343,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	hdr = ipv6_hdr(skb);
 	fhdr = (struct frag_hdr *)skb_transport_header(skb);
 
-	if (!(fhdr->frag_off & htons(0xFFF9))) {
+	if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) {
 		/* It is not a fragmented frame */
 		skb->transport_header += sizeof(struct frag_hdr);
 		__IP6_INC_STATS(net,
@@ -351,6 +351,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 
 		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
 		IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+		IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) +
+					    sizeof(struct ipv6hdr);
 		return 1;
 	}
 
-- 
GitLab


From 7e7606330b167a0ff483fb02caed9267bfab69ee Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Mon, 17 May 2021 06:24:51 -0700
Subject: [PATCH 1336/3804] scsi: aic7xxx: Restore several defines for aic7xxx
 firmware build

With CONFIG_AIC7XXX_BUILD_FIRMWARE, there is this representative error:

  aicasm: Stopped at file ./drivers/scsi/aic7xxx/aic7xxx.seq,
    line 271 - Undefined symbol MSG_SIMPLE_Q_TAG referenced

MSG_SIMPLE_Q_TAG used to be defined in drivers/scsi/aic7xxx/scsi_message.h
as:

  #define MSG_SIMPLE_Q_TAG	0x20 /* O/O */

The new definition in include/scsi/scsi.h is:

  #define SIMPLE_QUEUE_TAG    0x20

But aicasm can not handle the all the preprocessor directives in scsi.h, so
add MSG_SIMPLE_Q_TAB and other required defines back to scsi_message.h.

Link: https://lore.kernel.org/r/20210517132451.1832233-1-trix@redhat.com
Fixes: d8cd784ff7b3 ("scsi: aic7xxx: aic79xx: Drop internal SCSI message definition"
Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aic7xxx/scsi_message.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/scsi/aic7xxx/scsi_message.h b/drivers/scsi/aic7xxx/scsi_message.h
index a7515c3039edb..53343a6d8ae19 100644
--- a/drivers/scsi/aic7xxx/scsi_message.h
+++ b/drivers/scsi/aic7xxx/scsi_message.h
@@ -3,6 +3,17 @@
  * $FreeBSD: src/sys/cam/scsi/scsi_message.h,v 1.2 2000/05/01 20:21:29 peter Exp $
  */
 
+/* Messages (1 byte) */		     /* I/T (M)andatory or (O)ptional */
+#define MSG_SAVEDATAPOINTER	0x02 /* O/O */
+#define MSG_RESTOREPOINTERS	0x03 /* O/O */
+#define MSG_DISCONNECT		0x04 /* O/O */
+#define MSG_MESSAGE_REJECT	0x07 /* M/M */
+#define MSG_NOOP		0x08 /* M/M */
+
+/* Messages (2 byte) */
+#define MSG_SIMPLE_Q_TAG	0x20 /* O/O */
+#define MSG_IGN_WIDE_RESIDUE	0x23 /* O/O */
+
 /* Identify message */		     /* M/M */	
 #define MSG_IDENTIFYFLAG	0x80 
 #define MSG_IDENTIFY_DISCFLAG	0x40 
-- 
GitLab


From b4de11dfb569043be2cb38b2b1031e64f8ee0ff6 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Mon, 17 May 2021 13:50:57 -0700
Subject: [PATCH 1337/3804] scsi: aic7xxx: Remove multiple definition of
 globals

Building aicasm with gcc 10.2 + gas 26.1 causes these errors:

  multiple definition of `args';
  multiple definition of `yylineno';

args came from the expansion of:

  STAILQ_HEAD(macro_arg_list, macro_arg) args;

The definition of the macro_arg_list structure is needed, the global
variable 'args' is not, so delete it.

yylineno is defined by flex, so defining it in bison/*.y file is not
needed. Also delete this.

Link: https://lore.kernel.org/r/20210517205057.1850010-1-trix@redhat.com
Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aic7xxx/aicasm/aicasm_gram.y   | 1 -
 drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y b/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y
index 924d55a8acbfc..65182ad9cdf82 100644
--- a/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y
+++ b/drivers/scsi/aic7xxx/aicasm/aicasm_gram.y
@@ -58,7 +58,6 @@
 #include "aicasm_symbol.h"
 #include "aicasm_insformat.h"
 
-int yylineno;
 char *yyfilename;
 char stock_prefix[] = "aic_";
 char *prefix = stock_prefix;
diff --git a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h
index 7bf7fd5953ac9..ed3bdd43c2976 100644
--- a/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h
+++ b/drivers/scsi/aic7xxx/aicasm/aicasm_symbol.h
@@ -108,7 +108,7 @@ struct macro_arg {
 	regex_t	arg_regex;
 	char   *replacement_text;
 };
-STAILQ_HEAD(macro_arg_list, macro_arg) args;
+STAILQ_HEAD(macro_arg_list, macro_arg);
 
 struct macro_info {
 	struct macro_arg_list args;
-- 
GitLab


From 122c81c563b0c1c6b15ff76a9159af5ee1f21563 Mon Sep 17 00:00:00 2001
From: Javed Hasan <jhasan@marvell.com>
Date: Tue, 18 May 2021 23:14:16 -0700
Subject: [PATCH 1338/3804] scsi: bnx2fc: Return failure if io_req is already
 in ABTS processing

Return failure from bnx2fc_eh_abort() if io_req is already in ABTS
processing.

Link: https://lore.kernel.org/r/20210519061416.19321-1-jhasan@marvell.com
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Javed Hasan <jhasan@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index 1a0dc18d69155..ed300a279a387 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c
@@ -1220,6 +1220,7 @@ int bnx2fc_eh_abort(struct scsi_cmnd *sc_cmd)
 		   was a result from the ABTS request rather than the CLEANUP
 		   request */
 		set_bit(BNX2FC_FLAG_IO_CLEANUP,	&io_req->req_flags);
+		rc = FAILED;
 		goto done;
 	}
 
-- 
GitLab


From e662502b3a782d479e67736a5a1c169a703d853a Mon Sep 17 00:00:00 2001
From: Matt Wang <wwentao@vmware.com>
Date: Wed, 19 May 2021 09:49:32 +0000
Subject: [PATCH 1339/3804] scsi: vmw_pvscsi: Set correct residual data length

Some commands (such as INQUIRY) may return less data than the initiator
requested. To avoid conducting useless information, set the right residual
count to make upper layer aware of this.

Before (INQUIRY PAGE 0xB0 with 128B buffer):

$ sg_raw -r 128 /dev/sda 12 01 B0 00 80 00
SCSI Status: Good

Received 128 bytes of data:
 00 00 b0 00 3c 01 00 00 00 00 00 00 00 00 00 00 00 ...<............
 10 00 00 00 00 00 01 00 00 00 00 00 40 00 00 08 00 ...........@....
 20 80 00 00 00 00 00 00 00 00 00 20 00 00 00 00 00 .......... .....
 30 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
 40 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
 50 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
 60 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
 70 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................

After:

$ sg_raw -r 128 /dev/sda 12 01 B0 00 80 00
SCSI Status: Good

Received 64 bytes of data:
00 00 b0 00 3c 01 00 00 00 00 00 00 00 00 00 00 00 ...<............
10 00 00 00 00 00 01 00 00 00 00 00 40 00 00 08 00 ...........@....
20 80 00 00 00 00 00 00 00 00 00 20 00 00 00 00 00 .......... .....
30 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................

[mkp: clarified description]

Link: https://lore.kernel.org/r/03C41093-B62E-43A2-913E-CFC92F1C70C3@vmware.com
Signed-off-by: Matt Wang <wwentao@vmware.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/vmw_pvscsi.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 8a79605d96521..b9969fce6b4d1 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -585,7 +585,13 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
 		case BTSTAT_SUCCESS:
 		case BTSTAT_LINKED_COMMAND_COMPLETED:
 		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
-			/* If everything went fine, let's move on..  */
+			/*
+			 * Commands like INQUIRY may transfer less data than
+			 * requested by the initiator via bufflen. Set residual
+			 * count to make upper layer aware of the actual amount
+			 * of data returned.
+			 */
+			scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
 			cmd->result = (DID_OK << 16);
 			break;
 
-- 
GitLab


From 7907a021e4bbfa29cccacd2ba2dade894d9a7d4c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 19 May 2021 21:05:19 +0800
Subject: [PATCH 1340/3804] scsi: hisi_sas: Drop free_irq() of
 devm_request_irq() allocated irq

irqs allocated with devm_request_irq() should not be freed using
free_irq(). Doing so causes a dangling pointer and a subsequent double
free.

Link: https://lore.kernel.org/r/20210519130519.2661938-1-yangyingliang@huawei.com
Reported-by: Hulk Robot <hulkci@huawei.com>
Acked-by: John Garry <john.garry@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 499c770d405c7..e954083140786 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -4811,14 +4811,14 @@ hisi_sas_v3_destroy_irqs(struct pci_dev *pdev, struct hisi_hba *hisi_hba)
 {
 	int i;
 
-	free_irq(pci_irq_vector(pdev, 1), hisi_hba);
-	free_irq(pci_irq_vector(pdev, 2), hisi_hba);
-	free_irq(pci_irq_vector(pdev, 11), hisi_hba);
+	devm_free_irq(&pdev->dev, pci_irq_vector(pdev, 1), hisi_hba);
+	devm_free_irq(&pdev->dev, pci_irq_vector(pdev, 2), hisi_hba);
+	devm_free_irq(&pdev->dev, pci_irq_vector(pdev, 11), hisi_hba);
 	for (i = 0; i < hisi_hba->cq_nvecs; i++) {
 		struct hisi_sas_cq *cq = &hisi_hba->cq[i];
 		int nr = hisi_sas_intr_conv ? 16 : 16 + i;
 
-		free_irq(pci_irq_vector(pdev, nr), cq);
+		devm_free_irq(&pdev->dev, pci_irq_vector(pdev, nr), cq);
 	}
 	pci_free_irq_vectors(pdev);
 }
-- 
GitLab


From 2ef7665dfd88830f15415ba007c7c9a46be7acd8 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <d.bogdanov@yadro.com>
Date: Thu, 15 Apr 2021 23:35:54 +0300
Subject: [PATCH 1341/3804] scsi: target: qla2xxx: Wait for stop_phase1 at WWN
 removal

Target de-configuration panics at high CPU load because TPGT and WWPN can
be removed on separate threads.

TPGT removal requests a reset HBA on a separate thread and waits for reset
complete (phase1). Due to high CPU load that HBA reset can be delayed for
some time.

WWPN removal does qlt_stop_phase2(). There it is believed that phase1 has
already completed and thus tgt.tgt_ops is subsequently cleared. However,
tgt.tgt_ops is needed to process incoming traffic and therefore this will
cause one of the following panics:

NIP qlt_reset+0x7c/0x220 [qla2xxx]
LR  qlt_reset+0x68/0x220 [qla2xxx]
Call Trace:
0xc000003ffff63a78 (unreliable)
qlt_handle_imm_notify+0x800/0x10c0 [qla2xxx]
qlt_24xx_atio_pkt+0x208/0x590 [qla2xxx]
qlt_24xx_process_atio_queue+0x33c/0x7a0 [qla2xxx]
qla83xx_msix_atio_q+0x54/0x90 [qla2xxx]

or

NIP qlt_24xx_handle_abts+0xd0/0x2a0 [qla2xxx]
LR  qlt_24xx_handle_abts+0xb4/0x2a0 [qla2xxx]
Call Trace:
qlt_24xx_handle_abts+0x90/0x2a0 [qla2xxx] (unreliable)
qlt_24xx_process_atio_queue+0x500/0x7a0 [qla2xxx]
qla83xx_msix_atio_q+0x54/0x90 [qla2xxx]

or

NIP qlt_create_sess+0x90/0x4e0 [qla2xxx]
LR  qla24xx_do_nack_work+0xa8/0x180 [qla2xxx]
Call Trace:
0xc0000000348fba30 (unreliable)
qla24xx_do_nack_work+0xa8/0x180 [qla2xxx]
qla2x00_do_work+0x674/0xbf0 [qla2xxx]
qla2x00_iocb_work_fn

The patch fixes the issue by serializing qlt_stop_phase1() and
qlt_stop_phase2() functions to make WWPN removal wait for phase1
completion.

Link: https://lore.kernel.org/r/20210415203554.27890-1-d.bogdanov@yadro.com
Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_target.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index b2008fb1dd386..12a6848ade435 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1563,10 +1563,12 @@ void qlt_stop_phase2(struct qla_tgt *tgt)
 		return;
 	}
 
+	mutex_lock(&tgt->ha->optrom_mutex);
 	mutex_lock(&vha->vha_tgt.tgt_mutex);
 	tgt->tgt_stop = 0;
 	tgt->tgt_stopped = 1;
 	mutex_unlock(&vha->vha_tgt.tgt_mutex);
+	mutex_unlock(&tgt->ha->optrom_mutex);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf00c, "Stop of tgt %p finished\n",
 	    tgt);
-- 
GitLab


From 119b75c150773425a89033215eab4d15d4198f8b Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Sat, 22 May 2021 11:47:41 +0800
Subject: [PATCH 1342/3804] ALSA: hda/realtek: Headphone volume is controlled
 by Front mixer

On some ASUS and MSI machines, the audio codec is alc1220 and the
Headphone is connected to audio mixer 0xf and DAC 0x5, in theory
the Headphone volume is controlled by DAC 0x5 (Heapdhone Playback
Volume), but somehow it is controlled by DAC 0x2 (Front Playback
Volume), maybe this is a defect on the codec alc1220.

Because of this issue, the PA couldn't switch the headphone and
Lineout correctly, If we apply the quirk CLEVO_P950 to those machines,
the Lineout and Headphone will share the audio mixer 0xc and DAC 0x2,
and generate Headphone+LO mixer, then PA could handle them when
switching between them.

BugLink: https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/1206
Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Link: https://lore.kernel.org/r/20210522034741.13415-1-hui.wang@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 552e2cb73291e..ffaeb8d3c316e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2603,6 +2603,28 @@ static const struct hda_model_fixup alc882_fixup_models[] = {
 	{}
 };
 
+static const struct snd_hda_pin_quirk alc882_pin_fixup_tbl[] = {
+	SND_HDA_PIN_QUIRK(0x10ec1220, 0x1043, "ASUS", ALC1220_FIXUP_CLEVO_P950,
+		{0x14, 0x01014010},
+		{0x15, 0x01011012},
+		{0x16, 0x01016011},
+		{0x18, 0x01a19040},
+		{0x19, 0x02a19050},
+		{0x1a, 0x0181304f},
+		{0x1b, 0x0221401f},
+		{0x1e, 0x01456130}),
+	SND_HDA_PIN_QUIRK(0x10ec1220, 0x1462, "MS-7C35", ALC1220_FIXUP_CLEVO_P950,
+		{0x14, 0x01015010},
+		{0x15, 0x01011012},
+		{0x16, 0x01011011},
+		{0x18, 0x01a11040},
+		{0x19, 0x02a19050},
+		{0x1a, 0x0181104f},
+		{0x1b, 0x0221401f},
+		{0x1e, 0x01451130}),
+	{}
+};
+
 /*
  * BIOS auto configuration
  */
@@ -2644,6 +2666,7 @@ static int patch_alc882(struct hda_codec *codec)
 
 	snd_hda_pick_fixup(codec, alc882_fixup_models, alc882_fixup_tbl,
 		       alc882_fixups);
+	snd_hda_pick_pin_fixup(codec, alc882_pin_fixup_tbl, alc882_fixups, true);
 	snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PRE_PROBE);
 
 	alc_auto_parse_customize_define(codec);
-- 
GitLab


From 9ebaef0540a981093bce5df15af32354d32391d9 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Sat, 22 May 2021 12:26:45 +0800
Subject: [PATCH 1343/3804] ALSA: hda/realtek: the bass speaker can't output
 sound on Yoga 9i

The Lenovo Yoga 9i has bass speaker, but the bass speaker can't work,
that is because there is an i2s amplifier on that speaker, need to
run ideapad_s740_coef() to initialize the amplifier.

And also needs to apply ALC285_FIXUP_THINKPAD_HEADSET_JACK to rename
the speaker's mixer control name, otherwise the PA can't handle them.

BugLink: http://bugs.launchpad.net/bugs/1926165
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210522042645.14221-1-hui.wang@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ffaeb8d3c316e..6571c37137324 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6566,6 +6566,7 @@ enum {
 	ALC295_FIXUP_ASUS_DACS,
 	ALC295_FIXUP_HP_OMEN,
 	ALC285_FIXUP_HP_SPECTRE_X360,
+	ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8132,6 +8133,12 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC285_FIXUP_SPEAKER2_TO_DAC1,
 	},
+	[ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc285_fixup_ideapad_s740_coef,
+		.chained = true,
+		.chain_id = ALC285_FIXUP_THINKPAD_HEADSET_JACK,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8500,6 +8507,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
 	SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF),
+	SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP),
 	SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
 	SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
@@ -8715,6 +8723,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"},
 	{.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"},
 	{.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"},
+	{.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"},
 	{}
 };
 #define ALC225_STANDARD_PINS \
-- 
GitLab


From 2a54c8c9ebc2006bf72554afc84ffc67768979a0 Mon Sep 17 00:00:00 2001
From: Rui Miguel Silva <rui.silva@linaro.org>
Date: Wed, 12 May 2021 23:39:29 +0100
Subject: [PATCH 1344/3804] iio: gyro: fxas21002c: balance runtime power in
 error path

If we fail to read temperature or axis we need to decrement the
runtime pm reference count to trigger autosuspend.

Add the call to pm_put to do that in case of error.

Fixes: a0701b6263ae ("iio: gyro: add core driver for fxas21002c")
Suggested-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Rui Miguel Silva <rui.silva@linaro.org>
Link: https://lore.kernel.org/linux-iio/CBBZA9T1OY9C.2611WSV49DV2G@arch-thunder/
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/fxas21002c_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iio/gyro/fxas21002c_core.c b/drivers/iio/gyro/fxas21002c_core.c
index 1a20c6b88e7db..645461c704547 100644
--- a/drivers/iio/gyro/fxas21002c_core.c
+++ b/drivers/iio/gyro/fxas21002c_core.c
@@ -399,6 +399,7 @@ static int fxas21002c_temp_get(struct fxas21002c_data *data, int *val)
 	ret = regmap_field_read(data->regmap_fields[F_TEMP], &temp);
 	if (ret < 0) {
 		dev_err(dev, "failed to read temp: %d\n", ret);
+		fxas21002c_pm_put(data);
 		goto data_unlock;
 	}
 
@@ -432,6 +433,7 @@ static int fxas21002c_axis_get(struct fxas21002c_data *data,
 			       &axis_be, sizeof(axis_be));
 	if (ret < 0) {
 		dev_err(dev, "failed to read axis: %d: %d\n", index, ret);
+		fxas21002c_pm_put(data);
 		goto data_unlock;
 	}
 
-- 
GitLab


From 98b7b0ca0828907dbb706387c11356a45463e2ea Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Mon, 10 May 2021 12:56:49 +0300
Subject: [PATCH 1345/3804] iio: dac: ad5770r: Put fwnode in error case during
 ->probe()

device_for_each_child_node() bumps a reference counting of a returned variable.
We have to balance it whenever we return to the caller.

Fixes: cbbb819837f6 ("iio: dac: ad5770r: Add AD5770R support")
Cc: Alexandru Tachici <alexandru.tachici@analog.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Alexandru Ardelean <ardeleanalex@gmail.com>
Link: https://lore.kernel.org/r/20210510095649.3302835-1-andy.shevchenko@gmail.com
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/ad5770r.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/iio/dac/ad5770r.c b/drivers/iio/dac/ad5770r.c
index 7ab2ccf908639..8107f7bbbe3c5 100644
--- a/drivers/iio/dac/ad5770r.c
+++ b/drivers/iio/dac/ad5770r.c
@@ -524,23 +524,29 @@ static int ad5770r_channel_config(struct ad5770r_state *st)
 	device_for_each_child_node(&st->spi->dev, child) {
 		ret = fwnode_property_read_u32(child, "num", &num);
 		if (ret)
-			return ret;
-		if (num >= AD5770R_MAX_CHANNELS)
-			return -EINVAL;
+			goto err_child_out;
+		if (num >= AD5770R_MAX_CHANNELS) {
+			ret = -EINVAL;
+			goto err_child_out;
+		}
 
 		ret = fwnode_property_read_u32_array(child,
 						     "adi,range-microamp",
 						     tmp, 2);
 		if (ret)
-			return ret;
+			goto err_child_out;
 
 		min = tmp[0] / 1000;
 		max = tmp[1] / 1000;
 		ret = ad5770r_store_output_range(st, min, max, num);
 		if (ret)
-			return ret;
+			goto err_child_out;
 	}
 
+	return 0;
+
+err_child_out:
+	fwnode_handle_put(child);
 	return ret;
 }
 
-- 
GitLab


From a1caeebab07e9d72eec534489f47964782b93ba9 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sat, 1 May 2021 17:53:13 +0100
Subject: [PATCH 1346/3804] iio: adc: ad7768-1: Fix too small buffer passed to
 iio_push_to_buffers_with_timestamp()

Add space for the timestamp to be inserted.  Also ensure correct
alignment for passing to iio_push_to_buffers_with_timestamp()

Fixes: a5f8c7da3dbe ("iio: adc: Add AD7768-1 ADC basic support")
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210501165314.511954-2-jic23@kernel.org
Cc: <Stable@vger.kernel.org>
---
 drivers/iio/adc/ad7768-1.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c
index c945f1349623f..60f21fed6dcbe 100644
--- a/drivers/iio/adc/ad7768-1.c
+++ b/drivers/iio/adc/ad7768-1.c
@@ -167,6 +167,10 @@ struct ad7768_state {
 	 * transfer buffers to live in their own cache lines.
 	 */
 	union {
+		struct {
+			__be32 chan;
+			s64 timestamp;
+		} scan;
 		__be32 d32;
 		u8 d8[2];
 	} data ____cacheline_aligned;
@@ -469,11 +473,11 @@ static irqreturn_t ad7768_trigger_handler(int irq, void *p)
 
 	mutex_lock(&st->lock);
 
-	ret = spi_read(st->spi, &st->data.d32, 3);
+	ret = spi_read(st->spi, &st->data.scan.chan, 3);
 	if (ret < 0)
 		goto err_unlock;
 
-	iio_push_to_buffers_with_timestamp(indio_dev, &st->data.d32,
+	iio_push_to_buffers_with_timestamp(indio_dev, &st->data.scan,
 					   iio_get_time_ns(indio_dev));
 
 	iio_trigger_notify_done(indio_dev->trig);
-- 
GitLab


From 01fcf129f61b26d5b3d2d8afb03e770dee271bc8 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sat, 1 May 2021 17:53:14 +0100
Subject: [PATCH 1347/3804] iio: adc: ad7923: Fix undersized rx buffer.

Fixes tag is where the max channels became 8, but timestamp space was missing
before that.

Fixes: 851644a60d20 ("iio: adc: ad7923: Add support for the ad7908/ad7918/ad7928")
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Daniel Junho <djunho@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210501165314.511954-3-jic23@kernel.org
Cc: <Stable@vger.kernel.org>
---
 drivers/iio/adc/ad7923.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/adc/ad7923.c b/drivers/iio/adc/ad7923.c
index 9a649745cd0a8..069b561ee7689 100644
--- a/drivers/iio/adc/ad7923.c
+++ b/drivers/iio/adc/ad7923.c
@@ -59,8 +59,10 @@ struct ad7923_state {
 	/*
 	 * DMA (thus cache coherency maintenance) requires the
 	 * transfer buffers to live in their own cache lines.
+	 * Ensure rx_buf can be directly used in iio_push_to_buffers_with_timetamp
+	 * Length = 8 channels + 4 extra for 8 byte timestamp
 	 */
-	__be16				rx_buf[4] ____cacheline_aligned;
+	__be16				rx_buf[12] ____cacheline_aligned;
 	__be16				tx_buf[4];
 };
 
-- 
GitLab


From 4ed243b1da169bcbc1ec5507867e56250c5f1ff9 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 14 May 2021 16:02:54 +0800
Subject: [PATCH 1348/3804] iio: adc: ad7793: Add missing error code in
 ad7793_setup()

Set error code while device ID query failed.

Fixes: 88bc30548aae ("IIO: ADC: New driver for AD7792/AD7793 3 Channel SPI ADC")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7793.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c
index 5e980a06258e6..440ef4c7be074 100644
--- a/drivers/iio/adc/ad7793.c
+++ b/drivers/iio/adc/ad7793.c
@@ -279,6 +279,7 @@ static int ad7793_setup(struct iio_dev *indio_dev,
 	id &= AD7793_ID_MASK;
 
 	if (id != st->chip_info->id) {
+		ret = -ENODEV;
 		dev_err(&st->sd.spi->dev, "device ID query failed\n");
 		goto out;
 	}
-- 
GitLab


From 668a84c1bfb2b3fd5a10847825a854d63fac7baa Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Wed, 24 Mar 2021 22:54:35 +0800
Subject: [PATCH 1349/3804] efi/fdt: fix panic when no valid fdt found

setup_arch() would invoke efi_init()->efi_get_fdt_params(). If no
valid fdt found then initial_boot_params will be null. So we
should stop further fdt processing here. I encountered this
issue on risc-v.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Fixes: b91540d52a08b ("RISC-V: Add EFI runtime services")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/fdtparams.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/firmware/efi/fdtparams.c b/drivers/firmware/efi/fdtparams.c
index bb042ab7c2be6..e901f8564ca0c 100644
--- a/drivers/firmware/efi/fdtparams.c
+++ b/drivers/firmware/efi/fdtparams.c
@@ -98,6 +98,9 @@ u64 __init efi_get_fdt_params(struct efi_memory_map_data *mm)
 	BUILD_BUG_ON(ARRAY_SIZE(target) != ARRAY_SIZE(name));
 	BUILD_BUG_ON(ARRAY_SIZE(target) != ARRAY_SIZE(dt_params[0].params));
 
+	if (!fdt)
+		return 0;
+
 	for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
 		node = fdt_path_offset(fdt, dt_params[i].path);
 		if (node < 0)
-- 
GitLab


From 45add3cc99feaaf57d4b6f01d52d532c16a1caee Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 30 Apr 2021 16:22:51 +0200
Subject: [PATCH 1350/3804] efi: Allow EFI_MEMORY_XP and EFI_MEMORY_RO both to
 be cleared

UEFI spec 2.9, p.108, table 4-1 lists the scenario that both attributes
are cleared with the description "No memory access protection is
possible for Entry". So we can have valid entries where both attributes
are cleared, so remove the check.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Fixes: 10f0d2f577053 ("efi: Implement generic support for the Memory Attributes table")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/memattr.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c
index 5737cb0fcd44e..0a9aba5f9ceff 100644
--- a/drivers/firmware/efi/memattr.c
+++ b/drivers/firmware/efi/memattr.c
@@ -67,11 +67,6 @@ static bool entry_is_valid(const efi_memory_desc_t *in, efi_memory_desc_t *out)
 		return false;
 	}
 
-	if (!(in->attribute & (EFI_MEMORY_RO | EFI_MEMORY_XP))) {
-		pr_warn("Entry attributes invalid: RO and XP bits both cleared\n");
-		return false;
-	}
-
 	if (PAGE_SIZE > EFI_PAGE_SIZE &&
 	    (!PAGE_ALIGNED(in->phys_addr) ||
 	     !PAGE_ALIGNED(in->num_pages << EFI_PAGE_SHIFT))) {
-- 
GitLab


From c4039b29fe9637e1135912813f830994af4c867f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 23 Apr 2021 14:48:31 +0300
Subject: [PATCH 1351/3804] efi/libstub: prevent read overflow in
 find_file_option()

If the buffer has slashes up to the end then this will read past the end
of the array.  I don't anticipate that this is an issue for many people
in real life, but it's the right thing to do and it makes static
checkers happy.

Fixes: 7a88a6227dc7 ("efi/libstub: Fix path separator regression")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/libstub/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/libstub/file.c b/drivers/firmware/efi/libstub/file.c
index 4e81c6077188e..dd95f330fe6e1 100644
--- a/drivers/firmware/efi/libstub/file.c
+++ b/drivers/firmware/efi/libstub/file.c
@@ -103,7 +103,7 @@ static int find_file_option(const efi_char16_t *cmdline, int cmdline_len,
 		return 0;
 
 	/* Skip any leading slashes */
-	while (cmdline[i] == L'/' || cmdline[i] == L'\\')
+	while (i < cmdline_len && (cmdline[i] == L'/' || cmdline[i] == L'\\'))
 		i++;
 
 	while (--result_len > 0 && i < cmdline_len) {
-- 
GitLab


From 942859d969de7f6f7f2659a79237a758b42782da Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 21 Apr 2021 21:46:36 +0200
Subject: [PATCH 1352/3804] efi: cper: fix snprintf() use in
 cper_dimm_err_location()

snprintf() should be given the full buffer size, not one less. And it
guarantees nul-termination, so doing it manually afterwards is
pointless.

It's even potentially harmful (though probably not in practice because
CPER_REC_LEN is 256), due to the "return how much would have been
written had the buffer been big enough" semantics. I.e., if the bank
and/or device strings are long enough that the "DIMM location ..."
output gets truncated, writing to msg[n] is a buffer overflow.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Fixes: 3760cd20402d4 ("CPER: Adjust code flow of some functions")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/cper.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index e15d484b6a5a7..ea7ca74fc1730 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -276,8 +276,7 @@ static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
 	if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
 		return 0;
 
-	n = 0;
-	len = CPER_REC_LEN - 1;
+	len = CPER_REC_LEN;
 	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
 	if (bank && device)
 		n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
@@ -286,7 +285,6 @@ static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
 			     "DIMM location: not present. DMI handle: 0x%.4x ",
 			     mem->mem_dev_handle);
 
-	msg[n] = '\0';
 	return n;
 }
 
-- 
GitLab


From 55fc610c8cdae353737dbc2d59febd3c1a697095 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 4 Mar 2021 11:28:37 +0300
Subject: [PATCH 1353/3804] efi/apple-properties: Handle device properties with
 software node API

The old device property API is going to be removed.
Replacing the device_add_properties() call with the software
node API equivalent, device_create_managed_software_node().

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/apple-properties.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index e1926483ae2fd..4c3201e290e29 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -157,7 +157,7 @@ static int __init unmarshal_devices(struct properties_header *properties)
 		if (!entry[0].name)
 			goto skip_device;
 
-		ret = device_add_properties(dev, entry); /* makes deep copy */
+		ret = device_create_managed_software_node(dev, entry, NULL);
 		if (ret)
 			dev_err(dev, "error %d assigning properties\n", ret);
 
-- 
GitLab


From edbd1bc4951eff8da65732dbe0d381e555054428 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Sun, 4 Apr 2021 21:12:16 +0300
Subject: [PATCH 1354/3804] efi/dev-path-parser: Switch to use
 for_each_acpi_dev_match()

Switch to use for_each_acpi_dev_match() instead of home grown analogue.
No functional change intended.

Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/dev-path-parser.c | 49 ++++++++++----------------
 1 file changed, 18 insertions(+), 31 deletions(-)

diff --git a/drivers/firmware/efi/dev-path-parser.c b/drivers/firmware/efi/dev-path-parser.c
index 5c9625e552f4f..10d4457417a4f 100644
--- a/drivers/firmware/efi/dev-path-parser.c
+++ b/drivers/firmware/efi/dev-path-parser.c
@@ -12,52 +12,39 @@
 #include <linux/efi.h>
 #include <linux/pci.h>
 
-struct acpi_hid_uid {
-	struct acpi_device_id hid[2];
-	char uid[11]; /* UINT_MAX + null byte */
-};
-
-static int __init match_acpi_dev(struct device *dev, const void *data)
-{
-	struct acpi_hid_uid hid_uid = *(const struct acpi_hid_uid *)data;
-	struct acpi_device *adev = to_acpi_device(dev);
-
-	if (acpi_match_device_ids(adev, hid_uid.hid))
-		return 0;
-
-	if (adev->pnp.unique_id)
-		return !strcmp(adev->pnp.unique_id, hid_uid.uid);
-	else
-		return !strcmp("0", hid_uid.uid);
-}
-
 static long __init parse_acpi_path(const struct efi_dev_path *node,
 				   struct device *parent, struct device **child)
 {
-	struct acpi_hid_uid hid_uid = {};
+	char hid[ACPI_ID_LEN], uid[11]; /* UINT_MAX + null byte */
+	struct acpi_device *adev;
 	struct device *phys_dev;
 
 	if (node->header.length != 12)
 		return -EINVAL;
 
-	sprintf(hid_uid.hid[0].id, "%c%c%c%04X",
+	sprintf(hid, "%c%c%c%04X",
 		'A' + ((node->acpi.hid >> 10) & 0x1f) - 1,
 		'A' + ((node->acpi.hid >>  5) & 0x1f) - 1,
 		'A' + ((node->acpi.hid >>  0) & 0x1f) - 1,
 			node->acpi.hid >> 16);
-	sprintf(hid_uid.uid, "%u", node->acpi.uid);
-
-	*child = bus_find_device(&acpi_bus_type, NULL, &hid_uid,
-				 match_acpi_dev);
-	if (!*child)
+	sprintf(uid, "%u", node->acpi.uid);
+
+	for_each_acpi_dev_match(adev, hid, NULL, -1) {
+		if (adev->pnp.unique_id && !strcmp(adev->pnp.unique_id, uid))
+			break;
+		if (!adev->pnp.unique_id && node->acpi.uid == 0)
+			break;
+		acpi_dev_put(adev);
+	}
+	if (!adev)
 		return -ENODEV;
 
-	phys_dev = acpi_get_first_physical_node(to_acpi_device(*child));
+	phys_dev = acpi_get_first_physical_node(adev);
 	if (phys_dev) {
-		get_device(phys_dev);
-		put_device(*child);
-		*child = phys_dev;
-	}
+		*child = get_device(phys_dev);
+		acpi_dev_put(adev);
+	} else
+		*child = &adev->dev;
 
 	return 0;
 }
-- 
GitLab


From bb11580f61b6c4ba5c35706abd927c8ac8c32852 Mon Sep 17 00:00:00 2001
From: Paul Menzel <pmenzel@molgen.mpg.de>
Date: Sat, 15 May 2021 10:14:04 +0200
Subject: [PATCH 1355/3804] x86/efi: Log 32/64-bit mismatch with kernel as an
 error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Log the message

    No EFI runtime due to 32/64-bit mismatch with kernel

as an error condition, as several things like efivarfs won’t work
without the EFI runtime.

Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/platform/efi/efi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 8a26e705cb060..147c30a81f15b 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -468,7 +468,7 @@ void __init efi_init(void)
 	 */
 
 	if (!efi_runtime_supported())
-		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
+		pr_err("No EFI runtime due to 32/64-bit mismatch with kernel\n");
 
 	if (!efi_runtime_supported() || efi_runtime_disabled()) {
 		efi_memmap_unmap();
-- 
GitLab


From dabea675faf16e8682aa478ff3ce65dd775620bc Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Thu, 8 Apr 2021 13:02:18 +0200
Subject: [PATCH 1356/3804] arm64: dts: ls1028a: fix memory node

While enabling EDAC support for the LS1028A it was discovered that the
memory node has a wrong endianness setting as well as a wrong interrupt
assignment. Fix both.

This was tested on a sl28 board. To force ECC errors, you can use the
error injection supported by the controller in hardware (with
CONFIG_EDAC_DEBUG enabled):

 # enable error injection
 $ echo 0x100 > /sys/devices/system/edac/mc/mc0/inject_ctrl
 # flip lowest bit of the data
 $ echo 0x1 > /sys/devices/system/edac/mc/mc0/inject_data_lo

Fixes: 8897f3255c9c ("arm64: dts: Add support for NXP LS1028A SoC")
Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index eca06a0c3cf87..a30249ebffa8c 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -197,8 +197,8 @@
 		ddr: memory-controller@1080000 {
 			compatible = "fsl,qoriq-memory-controller";
 			reg = <0x0 0x1080000 0x0 0x1000>;
-			interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>;
-			big-endian;
+			interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+			little-endian;
 		};
 
 		dcfg: syscon@1e00000 {
-- 
GitLab


From 5c93a2ebc7ad85046fab23f8ac297fc3a86bc903 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 May 2021 11:12:41 -0500
Subject: [PATCH 1357/3804] hwrng: omap - Enable driver for TI K3 family

The TI K3 family of SoCs have a SA2UL IP that contains a
SafeXcel IP-76 RNG block which is supported by the OMAP
RNG driver. Allow this driver to be built for TI K3
family as well.

Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 1fe006f3f12fa..6450074c0ad7d 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -165,7 +165,7 @@ config HW_RANDOM_IXP4XX
 
 config HW_RANDOM_OMAP
 	tristate "OMAP Random Number Generator support"
-	depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU
+	depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU || ARCH_K3
 	default HW_RANDOM
 	help
  	  This driver provides kernel-side support for the Random Number
-- 
GitLab


From 4c0716ee1d973f6504d13f0e8d4d10350c85ad37 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 May 2021 11:12:42 -0500
Subject: [PATCH 1358/3804] crypto: sa2ul - Fix leaks on failure paths with
 sa_dma_init()

The sa_dma_init() function doesn't release the requested dma channels
on all failure paths. Any failure in this function also ends up
leaking the dma pool created in sa_init_mem() in the sa_ul_probe()
function. Fix all of these issues.

Fixes: 7694b6ca649f ("crypto: sa2ul - Add crypto driver")
Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sa2ul.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 1c6929fb3a131..3d6f0af2f9388 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -2300,9 +2300,9 @@ static int sa_dma_init(struct sa_crypto_data *dd)
 
 	dd->dma_rx2 = dma_request_chan(dd->dev, "rx2");
 	if (IS_ERR(dd->dma_rx2)) {
-		dma_release_channel(dd->dma_rx1);
-		return dev_err_probe(dd->dev, PTR_ERR(dd->dma_rx2),
-				     "Unable to request rx2 DMA channel\n");
+		ret = dev_err_probe(dd->dev, PTR_ERR(dd->dma_rx2),
+				    "Unable to request rx2 DMA channel\n");
+		goto err_dma_rx2;
 	}
 
 	dd->dma_tx = dma_request_chan(dd->dev, "tx");
@@ -2323,28 +2323,31 @@ static int sa_dma_init(struct sa_crypto_data *dd)
 	if (ret) {
 		dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n",
 			ret);
-		return ret;
+		goto err_dma_config;
 	}
 
 	ret = dmaengine_slave_config(dd->dma_rx2, &cfg);
 	if (ret) {
 		dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n",
 			ret);
-		return ret;
+		goto err_dma_config;
 	}
 
 	ret = dmaengine_slave_config(dd->dma_tx, &cfg);
 	if (ret) {
 		dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n",
 			ret);
-		return ret;
+		goto err_dma_config;
 	}
 
 	return 0;
 
+err_dma_config:
+	dma_release_channel(dd->dma_tx);
 err_dma_tx:
-	dma_release_channel(dd->dma_rx1);
 	dma_release_channel(dd->dma_rx2);
+err_dma_rx2:
+	dma_release_channel(dd->dma_rx1);
 
 	return ret;
 }
@@ -2414,7 +2417,7 @@ static int sa_ul_probe(struct platform_device *pdev)
 	sa_init_mem(dev_data);
 	ret = sa_dma_init(dev_data);
 	if (ret)
-		goto disable_pm_runtime;
+		goto destroy_dma_pool;
 
 	match = of_match_node(of_match, dev->of_node);
 	if (!match) {
@@ -2454,9 +2457,9 @@ release_dma:
 	dma_release_channel(dev_data->dma_rx1);
 	dma_release_channel(dev_data->dma_tx);
 
+destroy_dma_pool:
 	dma_pool_destroy(dev_data->sc_pool);
 
-disable_pm_runtime:
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-- 
GitLab


From 5c8552325e013cbdabc443cd1f1b4d03c4a2e64e Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 May 2021 11:12:43 -0500
Subject: [PATCH 1359/3804] crypto: sa2ul - Fix pm_runtime enable in
 sa_ul_probe()

The pm_runtime APIs added first in commit 7694b6ca649f ("crypto: sa2ul -
Add crypto driver") are not unwound properly and was fixed up partially
in commit 13343badae09 ("crypto: sa2ul - Fix PM reference leak in
sa_ul_probe()"). This fixed up the pm_runtime usage count but not the
state. Fix this properly.

Fixes: 13343badae09 ("crypto: sa2ul - Fix PM reference leak in sa_ul_probe()")
Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sa2ul.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 3d6f0af2f9388..a215daedf78af 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -2411,6 +2411,7 @@ static int sa_ul_probe(struct platform_device *pdev)
 	if (ret < 0) {
 		dev_err(&pdev->dev, "%s: failed to get sync: %d\n", __func__,
 			ret);
+		pm_runtime_disable(dev);
 		return ret;
 	}
 
-- 
GitLab


From d699c5d0bd811e48de72aeeb8e3872c63e957745 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 May 2021 11:12:44 -0500
Subject: [PATCH 1360/3804] crypto: sa2ul - Use of_device_get_match_data()
 helper

Simplify the probe function by using the of_device_get_match_data()
helper instead of open coding. The logic is also moved up to fix the
missing pm_runtime cleanup in case of a match failure.

Fixes: 0bc42311cdff ("crypto: sa2ul - Add support for AM64")
Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sa2ul.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index a215daedf78af..9f077ec9dbb7f 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -2388,7 +2388,6 @@ MODULE_DEVICE_TABLE(of, of_match);
 
 static int sa_ul_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *match;
 	struct device *dev = &pdev->dev;
 	struct device_node *node = dev->of_node;
 	struct resource *res;
@@ -2400,6 +2399,10 @@ static int sa_ul_probe(struct platform_device *pdev)
 	if (!dev_data)
 		return -ENOMEM;
 
+	dev_data->match_data = of_device_get_match_data(dev);
+	if (!dev_data->match_data)
+		return -ENODEV;
+
 	sa_k3_dev = dev;
 	dev_data->dev = dev;
 	dev_data->pdev = pdev;
@@ -2420,13 +2423,6 @@ static int sa_ul_probe(struct platform_device *pdev)
 	if (ret)
 		goto destroy_dma_pool;
 
-	match = of_match_node(of_match, dev->of_node);
-	if (!match) {
-		dev_err(dev, "No compatible match found\n");
-		return -ENODEV;
-	}
-	dev_data->match_data = match->data;
-
 	spin_lock_init(&dev_data->scid_lock);
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	saul_base = devm_ioremap_resource(dev, res);
-- 
GitLab


From daeec7388eb2c5dbff17630b76c22786ffa1e55a Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 May 2021 11:12:45 -0500
Subject: [PATCH 1361/3804] crypto: sa2ul - Use
 devm_platform_ioremap_resource()

Simplify the platform_get_resource() and devm_ioremap_resource()
calls with devm_platform_ioremap_resource(). Also add error checking
and move up this block to simplify the cleanup in sa_ul_probe().

Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sa2ul.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 9f077ec9dbb7f..216702fef9451 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -2390,7 +2390,6 @@ static int sa_ul_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *node = dev->of_node;
-	struct resource *res;
 	static void __iomem *saul_base;
 	struct sa_crypto_data *dev_data;
 	int ret;
@@ -2403,9 +2402,14 @@ static int sa_ul_probe(struct platform_device *pdev)
 	if (!dev_data->match_data)
 		return -ENODEV;
 
+	saul_base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(saul_base))
+		return PTR_ERR(saul_base);
+
 	sa_k3_dev = dev;
 	dev_data->dev = dev;
 	dev_data->pdev = pdev;
+	dev_data->base = saul_base;
 	platform_set_drvdata(pdev, dev_data);
 	dev_set_drvdata(sa_k3_dev, dev_data);
 
@@ -2424,10 +2428,6 @@ static int sa_ul_probe(struct platform_device *pdev)
 		goto destroy_dma_pool;
 
 	spin_lock_init(&dev_data->scid_lock);
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	saul_base = devm_ioremap_resource(dev, res);
-
-	dev_data->base = saul_base;
 
 	if (!dev_data->match_data->skip_engine_control) {
 		u32 val = SA_EEC_ENCSS_EN | SA_EEC_AUTHSS_EN | SA_EEC_CTXCACH_EN |
-- 
GitLab


From c858401cb4a884b840fa3214b8999e8feba3a59b Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 May 2021 11:12:46 -0500
Subject: [PATCH 1362/3804] crypto: sa2ul - Remove child devices in remove

The sa_ul_probe creates child devices using of_platform_populate(),
but these are not cleaned up in driver remove. Clean these up
by removing the child devices using of_platform_depopulate().

Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Tero Kristo <kristo@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sa2ul.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 216702fef9451..51bb69bc573c3 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -2467,6 +2467,8 @@ static int sa_ul_remove(struct platform_device *pdev)
 {
 	struct sa_crypto_data *dev_data = platform_get_drvdata(pdev);
 
+	of_platform_depopulate(&pdev->dev);
+
 	sa_unregister_algos(&pdev->dev);
 
 	dma_release_channel(dev_data->dma_rx2);
-- 
GitLab


From dbbc5c06955cb9a56aed51170040a3967b79371d Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 15 May 2021 18:44:37 +0800
Subject: [PATCH 1363/3804] crypto: hisilicon/qm - initialize the device before
 doing tasks

The device needs to be initialized first, and then restart the queue to
execute tasks after PF reset.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index ce439a0c66c9e..6a9c18f948769 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4085,6 +4085,14 @@ void hisi_qm_reset_done(struct pci_dev *pdev)
 	struct hisi_qm *qm = pci_get_drvdata(pdev);
 	int ret;
 
+	if (qm->fun_type == QM_HW_PF) {
+		ret = qm_dev_hw_init(qm);
+		if (ret) {
+			pci_err(pdev, "Failed to init PF, ret = %d.\n", ret);
+			goto flr_done;
+		}
+	}
+
 	hisi_qm_dev_err_init(pf_qm);
 
 	ret = qm_restart(qm);
@@ -4094,12 +4102,6 @@ void hisi_qm_reset_done(struct pci_dev *pdev)
 	}
 
 	if (qm->fun_type == QM_HW_PF) {
-		ret = qm_dev_hw_init(qm);
-		if (ret) {
-			pci_err(pdev, "Failed to init PF, ret = %d.\n", ret);
-			goto flr_done;
-		}
-
 		if (!qm->vfs_num)
 			goto flr_done;
 
-- 
GitLab


From 3121f021c00aeed599d6f5d1c737b1bc8e6a05d8 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 15 May 2021 18:44:38 +0800
Subject: [PATCH 1364/3804] crypto: hisilicon/qm - modify 'QM_RESETTING'
 clearing error

Before device reset, the driver sets 'QM_RESETTING' flag, but after reset,
the wrong flag is cleared. This patch modifies the problem of inconsistent
flags.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 6a9c18f948769..09f0370881ab8 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4122,7 +4122,7 @@ flr_done:
 	if (qm_flr_reset_complete(pdev))
 		pci_info(pdev, "FLR reset complete\n");
 
-	clear_bit(QM_RESETTING, &qm->misc_ctl);
+	clear_bit(QM_RESETTING, &pf_qm->misc_ctl);
 }
 EXPORT_SYMBOL_GPL(hisi_qm_reset_done);
 
-- 
GitLab


From 3b9c24dec891d418e26032709d6f01fe3757a4a6 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 15 May 2021 18:44:39 +0800
Subject: [PATCH 1365/3804] crypto: hisilicon/qm - adjust order of device error
 configuration

In order to avoid reporting an exception but the error type is not
configured, the driver needs to configure the error type first, and then
enable the error interrupt. Before executing the task, hardware error
initialization is needed so that the hardware can detect the error in time.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c |  6 ++++--
 drivers/crypto/hisilicon/qm.c             |  2 +-
 drivers/crypto/hisilicon/sec2/sec_main.c  | 24 ++++++++++-------------
 drivers/crypto/hisilicon/zip/zip_main.c   |  6 +++---
 4 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 31515ae054f8a..1f47bb5e39902 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -466,8 +466,7 @@ static void hpre_hw_error_enable(struct hisi_qm *qm)
 	/* clear HPRE hw error source if having */
 	writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_HAC_SOURCE_INT);
 
-	/* enable hpre hw error interrupts */
-	writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK);
+	/* configure error type */
 	writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB);
 	writel(HPRE_HAC_RAS_NFE_ENABLE, qm->io_base + HPRE_RAS_NFE_ENB);
 	writel(HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_RAS_FE_ENB);
@@ -476,6 +475,9 @@ static void hpre_hw_error_enable(struct hisi_qm *qm)
 	val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
 	val |= HPRE_AM_OOO_SHUTDOWN_ENABLE;
 	writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+
+	/* enable hpre hw error interrupts */
+	writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK);
 }
 
 static inline struct hisi_qm *hpre_file_to_qm(struct hpre_debugfs_file *file)
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 09f0370881ab8..04c09053ea11c 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3917,6 +3917,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
 	}
 
 	qm_restart_prepare(qm);
+	hisi_qm_dev_err_init(qm);
 
 	ret = qm_restart(qm);
 	if (ret) {
@@ -3938,7 +3939,6 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
 		return -EPERM;
 	}
 
-	hisi_qm_dev_err_init(qm);
 	qm_restart_done(qm);
 
 	clear_bit(QM_RESETTING, &qm->misc_ctl);
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 6f0062d4408c3..e57167da6be0f 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -440,43 +440,39 @@ static void sec_hw_error_enable(struct hisi_qm *qm)
 		return;
 	}
 
-	val = readl(qm->io_base + SEC_CONTROL_REG);
-
 	/* clear SEC hw error source if having */
 	writel(SEC_CORE_INT_CLEAR, qm->io_base + SEC_CORE_INT_SOURCE);
 
-	/* enable SEC hw error interrupts */
-	writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK);
-
 	/* enable RAS int */
 	writel(SEC_RAS_CE_ENB_MSK, qm->io_base + SEC_RAS_CE_REG);
 	writel(SEC_RAS_FE_ENB_MSK, qm->io_base + SEC_RAS_FE_REG);
 	writel(SEC_RAS_NFE_ENB_MSK, qm->io_base + SEC_RAS_NFE_REG);
 
 	/* enable SEC block master OOO when m-bit error occur */
+	val = readl(qm->io_base + SEC_CONTROL_REG);
 	val = val | SEC_AXI_SHUTDOWN_ENABLE;
-
 	writel(val, qm->io_base + SEC_CONTROL_REG);
+
+	/* enable SEC hw error interrupts */
+	writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK);
 }
 
 static void sec_hw_error_disable(struct hisi_qm *qm)
 {
 	u32 val;
 
-	val = readl(qm->io_base + SEC_CONTROL_REG);
-
-	/* disable RAS int */
-	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_CE_REG);
-	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_FE_REG);
-	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG);
-
 	/* disable SEC hw error interrupts */
 	writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK);
 
 	/* disable SEC block master OOO when m-bit error occur */
+	val = readl(qm->io_base + SEC_CONTROL_REG);
 	val = val & SEC_AXI_SHUTDOWN_DISABLE;
-
 	writel(val, qm->io_base + SEC_CONTROL_REG);
+
+	/* disable RAS int */
+	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_CE_REG);
+	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_FE_REG);
+	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG);
 }
 
 static u32 sec_clear_enable_read(struct sec_debug_file *file)
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 2178b40e9f825..8e3a52218774a 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -333,13 +333,13 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
 	writel(HZIP_CORE_INT_RAS_NFE_ENABLE,
 	       qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
 
-	/* enable ZIP hw error interrupts */
-	writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG);
-
 	/* enable ZIP block master OOO when m-bit error occur */
 	val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
 	val = val | HZIP_AXI_SHUTDOWN_ENABLE;
 	writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+
+	/* enable ZIP hw error interrupts */
+	writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG);
 }
 
 static void hisi_zip_hw_error_disable(struct hisi_qm *qm)
-- 
GitLab


From b7da13d092a4919823c2b260ca7ea6ef1690b80b Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 15 May 2021 18:44:40 +0800
Subject: [PATCH 1366/3804] crypto: hisilicon/qm - enable to close master ooo
 when NFE occurs

Kunpeng930 could be able to close master ooo when NFE occurs, which will
disable memory accessing from device and execute tasks. This ensures that
errors do not spread.

This patch enables the hardware to close master ooo when an error occurs
by writing hardware registers, and ensures that the driver will not drain
qp because the hardware will empty the tasks automatically.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c |  36 ++++---
 drivers/crypto/hisilicon/qm.c             | 118 ++++++++++++++--------
 drivers/crypto/hisilicon/sec2/sec_main.c  |  36 ++++---
 drivers/crypto/hisilicon/zip/zip_main.c   |  36 ++++---
 4 files changed, 150 insertions(+), 76 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 1f47bb5e39902..13323baf393e8 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -50,6 +50,7 @@
 #define HPRE_RAS_NFE_ENB		0x301414
 #define HPRE_HAC_RAS_NFE_ENABLE		0x3ffffe
 #define HPRE_RAS_FE_ENB			0x301418
+#define HPRE_OOO_SHUTDOWN_SEL		0x301a3c
 #define HPRE_HAC_RAS_FE_ENABLE		0
 
 #define HPRE_CORE_ENB		(HPRE_CLSTR_BASE + HPRE_CORE_EN_OFFSET)
@@ -446,23 +447,36 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm)
 	hisi_qm_debug_regs_clear(qm);
 }
 
-static void hpre_hw_error_disable(struct hisi_qm *qm)
+static void hpre_master_ooo_ctrl(struct hisi_qm *qm, bool enable)
 {
-	u32 val;
+	u32 val1, val2;
+
+	val1 = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+	if (enable) {
+		val1 |= HPRE_AM_OOO_SHUTDOWN_ENABLE;
+		val2 = HPRE_HAC_RAS_NFE_ENABLE;
+	} else {
+		val1 &= ~HPRE_AM_OOO_SHUTDOWN_ENABLE;
+		val2 = 0x0;
+	}
+
+	if (qm->ver > QM_HW_V2)
+		writel(val2, qm->io_base + HPRE_OOO_SHUTDOWN_SEL);
 
+	writel(val1, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+}
+
+static void hpre_hw_error_disable(struct hisi_qm *qm)
+{
 	/* disable hpre hw error interrupts */
 	writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK);
 
-	/* disable HPRE block master OOO when m-bit error occur */
-	val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
-	val &= ~HPRE_AM_OOO_SHUTDOWN_ENABLE;
-	writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+	/* disable HPRE block master OOO when nfe occurs on Kunpeng930 */
+	hpre_master_ooo_ctrl(qm, false);
 }
 
 static void hpre_hw_error_enable(struct hisi_qm *qm)
 {
-	u32 val;
-
 	/* clear HPRE hw error source if having */
 	writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_HAC_SOURCE_INT);
 
@@ -471,10 +485,8 @@ static void hpre_hw_error_enable(struct hisi_qm *qm)
 	writel(HPRE_HAC_RAS_NFE_ENABLE, qm->io_base + HPRE_RAS_NFE_ENB);
 	writel(HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_RAS_FE_ENB);
 
-	/* enable HPRE block master OOO when m-bit error occur */
-	val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
-	val |= HPRE_AM_OOO_SHUTDOWN_ENABLE;
-	writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+	/* enable HPRE block master OOO when nfe occurs on Kunpeng930 */
+	hpre_master_ooo_ctrl(qm, true);
 
 	/* enable hpre hw error interrupts */
 	writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK);
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 04c09053ea11c..8f7ea504ce80c 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -155,6 +155,7 @@
 #define QM_RAS_CE_THRESHOLD		0x1000f8
 #define QM_RAS_CE_TIMES_PER_IRQ		1
 #define QM_RAS_MSI_INT_SEL		0x1040f4
+#define QM_OOO_SHUTDOWN_SEL		0x1040f8
 
 #define QM_RESET_WAIT_TIMEOUT		400
 #define QM_PEH_VENDOR_ID		0x1000d8
@@ -1623,13 +1624,9 @@ static void qm_hw_error_init_v1(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
 	writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
 }
 
-static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
+static void qm_hw_error_cfg(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
 {
-	u32 irq_enable = ce | nfe | fe;
-	u32 irq_unmask = ~irq_enable;
-
 	qm->error_mask = ce | nfe | fe;
-
 	/* clear QM hw residual error source */
 	writel(QM_ABNORMAL_INT_SOURCE_CLR,
 	       qm->io_base + QM_ABNORMAL_INT_SOURCE);
@@ -1639,6 +1636,14 @@ static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
 	writel(QM_RAS_CE_TIMES_PER_IRQ, qm->io_base + QM_RAS_CE_THRESHOLD);
 	writel(nfe, qm->io_base + QM_RAS_NFE_ENABLE);
 	writel(fe, qm->io_base + QM_RAS_FE_ENABLE);
+}
+
+static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
+{
+	u32 irq_enable = ce | nfe | fe;
+	u32 irq_unmask = ~irq_enable;
+
+	qm_hw_error_cfg(qm, ce, nfe, fe);
 
 	irq_unmask &= readl(qm->io_base + QM_ABNORMAL_INT_MASK);
 	writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK);
@@ -1649,6 +1654,28 @@ static void qm_hw_error_uninit_v2(struct hisi_qm *qm)
 	writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
 }
 
+static void qm_hw_error_init_v3(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
+{
+	u32 irq_enable = ce | nfe | fe;
+	u32 irq_unmask = ~irq_enable;
+
+	qm_hw_error_cfg(qm, ce, nfe, fe);
+
+	/* enable close master ooo when hardware error happened */
+	writel(nfe & (~QM_DB_RANDOM_INVALID), qm->io_base + QM_OOO_SHUTDOWN_SEL);
+
+	irq_unmask &= readl(qm->io_base + QM_ABNORMAL_INT_MASK);
+	writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK);
+}
+
+static void qm_hw_error_uninit_v3(struct hisi_qm *qm)
+{
+	writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
+
+	/* disable close master ooo when hardware error happened */
+	writel(0x0, qm->io_base + QM_OOO_SHUTDOWN_SEL);
+}
+
 static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status)
 {
 	const struct hisi_qm_hw_error *err;
@@ -1715,6 +1742,35 @@ static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm)
 	return ACC_ERR_RECOVERED;
 }
 
+static u32 qm_get_hw_error_status(struct hisi_qm *qm)
+{
+	return readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
+}
+
+static u32 qm_get_dev_err_status(struct hisi_qm *qm)
+{
+	return qm->err_ini->get_dev_hw_err_status(qm);
+}
+
+/* Check if the error causes the master ooo block */
+static int qm_check_dev_error(struct hisi_qm *qm)
+{
+	u32 val, dev_val;
+
+	if (qm->fun_type == QM_HW_VF)
+		return 0;
+
+	val = qm_get_hw_error_status(qm);
+	dev_val = qm_get_dev_err_status(qm);
+
+	if (qm->ver < QM_HW_V3)
+		return (val & QM_ECC_MBIT) ||
+		       (dev_val & qm->err_info.ecc_2bits_mask);
+
+	return (val & readl(qm->io_base + QM_OOO_SHUTDOWN_SEL)) ||
+	       (dev_val & (~qm->err_info.dev_ce_mask));
+}
+
 static int qm_stop_qp(struct hisi_qp *qp)
 {
 	return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
@@ -1739,8 +1795,8 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v3 = {
 	.get_vft = qm_get_vft_v2,
 	.qm_db = qm_db_v2,
 	.get_irq_num = qm_get_irq_num_v2,
-	.hw_error_init = qm_hw_error_init_v2,
-	.hw_error_uninit = qm_hw_error_uninit_v2,
+	.hw_error_init = qm_hw_error_init_v3,
+	.hw_error_uninit = qm_hw_error_uninit_v3,
 	.hw_error_handle = qm_hw_error_handle_v2,
 	.stop_qp = qm_stop_qp,
 };
@@ -2017,11 +2073,8 @@ static int qm_drain_qp(struct hisi_qp *qp)
 	int ret = 0, i = 0;
 	void *addr;
 
-	/*
-	 * No need to judge if ECC multi-bit error occurs because the
-	 * master OOO will be blocked.
-	 */
-	if (qm->err_status.is_qm_ecc_mbit || qm->err_status.is_dev_ecc_mbit)
+	/* No need to judge if master OOO is blocked. */
+	if (qm_check_dev_error(qm))
 		return 0;
 
 	/* Kunpeng930 supports drain qp by device */
@@ -3527,11 +3580,6 @@ pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
 }
 EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected);
 
-static u32 qm_get_hw_error_status(struct hisi_qm *qm)
-{
-	return readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
-}
-
 static int qm_check_req_recv(struct hisi_qm *qm)
 {
 	struct pci_dev *pdev = qm->pdev;
@@ -3712,6 +3760,10 @@ static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm)
 {
 	u32 nfe_enb = 0;
 
+	/* Kunpeng930 hardware automatically close master ooo when NFE occurs */
+	if (qm->ver >= QM_HW_V3)
+		return;
+
 	if (!qm->err_status.is_dev_ecc_mbit &&
 	    qm->err_status.is_qm_ecc_mbit &&
 	    qm->err_ini->close_axi_master_ooo) {
@@ -3830,11 +3882,6 @@ restart_fail:
 	return ret;
 }
 
-static u32 qm_get_dev_err_status(struct hisi_qm *qm)
-{
-	return qm->err_ini->get_dev_hw_err_status(qm);
-}
-
 static int qm_dev_hw_init(struct hisi_qm *qm)
 {
 	return qm->err_ini->hw_init(qm);
@@ -3844,6 +3891,9 @@ static void qm_restart_prepare(struct hisi_qm *qm)
 {
 	u32 value;
 
+	if (qm->ver >= QM_HW_V3)
+		return;
+
 	if (!qm->err_status.is_qm_ecc_mbit &&
 	    !qm->err_status.is_dev_ecc_mbit)
 		return;
@@ -3863,15 +3913,15 @@ static void qm_restart_prepare(struct hisi_qm *qm)
 
 	/* clear AM Reorder Buffer ecc mbit source */
 	writel(ACC_ROB_ECC_ERR_MULTPL, qm->io_base + ACC_AM_ROB_ECC_INT_STS);
-
-	if (qm->err_ini->open_axi_master_ooo)
-		qm->err_ini->open_axi_master_ooo(qm);
 }
 
 static void qm_restart_done(struct hisi_qm *qm)
 {
 	u32 value;
 
+	if (qm->ver >= QM_HW_V3)
+		goto clear_flags;
+
 	if (!qm->err_status.is_qm_ecc_mbit &&
 	    !qm->err_status.is_dev_ecc_mbit)
 		return;
@@ -3881,6 +3931,7 @@ static void qm_restart_done(struct hisi_qm *qm)
 	value |= qm->err_info.msi_wr_port;
 	writel(value, qm->io_base + ACC_AM_CFG_PORT_WR_EN);
 
+clear_flags:
 	qm->err_status.is_qm_ecc_mbit = false;
 	qm->err_status.is_dev_ecc_mbit = false;
 }
@@ -3918,6 +3969,8 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
 
 	qm_restart_prepare(qm);
 	hisi_qm_dev_err_init(qm);
+	if (qm->err_ini->open_axi_master_ooo)
+		qm->err_ini->open_axi_master_ooo(qm);
 
 	ret = qm_restart(qm);
 	if (ret) {
@@ -4005,21 +4058,6 @@ pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL_GPL(hisi_qm_dev_slot_reset);
 
-/* check the interrupt is ecc-mbit error or not */
-static int qm_check_dev_error(struct hisi_qm *qm)
-{
-	int ret;
-
-	if (qm->fun_type == QM_HW_VF)
-		return 0;
-
-	ret = qm_get_hw_error_status(qm) & QM_ECC_MBIT;
-	if (ret)
-		return ret;
-
-	return (qm_get_dev_err_status(qm) & qm->err_info.ecc_2bits_mask);
-}
-
 void hisi_qm_reset_prepare(struct pci_dev *pdev)
 {
 	struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index e57167da6be0f..6a4408ea18c1c 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -52,6 +52,7 @@
 #define SEC_RAS_CE_ENB_MSK		0x88
 #define SEC_RAS_FE_ENB_MSK		0x0
 #define SEC_RAS_NFE_ENB_MSK		0x7c177
+#define SEC_OOO_SHUTDOWN_SEL		0x301014
 #define SEC_RAS_DISABLE		0x0
 #define SEC_MEM_START_INIT_REG	0x301100
 #define SEC_MEM_INIT_DONE_REG		0x301104
@@ -430,10 +431,27 @@ static void sec_debug_regs_clear(struct hisi_qm *qm)
 	hisi_qm_debug_regs_clear(qm);
 }
 
-static void sec_hw_error_enable(struct hisi_qm *qm)
+static void sec_master_ooo_ctrl(struct hisi_qm *qm, bool enable)
 {
-	u32 val;
+	u32 val1, val2;
+
+	val1 = readl(qm->io_base + SEC_CONTROL_REG);
+	if (enable) {
+		val1 |= SEC_AXI_SHUTDOWN_ENABLE;
+		val2 = SEC_RAS_NFE_ENB_MSK;
+	} else {
+		val1 &= SEC_AXI_SHUTDOWN_DISABLE;
+		val2 = 0x0;
+	}
+
+	if (qm->ver > QM_HW_V2)
+		writel(val2, qm->io_base + SEC_OOO_SHUTDOWN_SEL);
+
+	writel(val1, qm->io_base + SEC_CONTROL_REG);
+}
 
+static void sec_hw_error_enable(struct hisi_qm *qm)
+{
 	if (qm->ver == QM_HW_V1) {
 		writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK);
 		pci_info(qm->pdev, "V1 not support hw error handle\n");
@@ -448,10 +466,8 @@ static void sec_hw_error_enable(struct hisi_qm *qm)
 	writel(SEC_RAS_FE_ENB_MSK, qm->io_base + SEC_RAS_FE_REG);
 	writel(SEC_RAS_NFE_ENB_MSK, qm->io_base + SEC_RAS_NFE_REG);
 
-	/* enable SEC block master OOO when m-bit error occur */
-	val = readl(qm->io_base + SEC_CONTROL_REG);
-	val = val | SEC_AXI_SHUTDOWN_ENABLE;
-	writel(val, qm->io_base + SEC_CONTROL_REG);
+	/* enable SEC block master OOO when nfe occurs on Kunpeng930 */
+	sec_master_ooo_ctrl(qm, true);
 
 	/* enable SEC hw error interrupts */
 	writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK);
@@ -459,15 +475,11 @@ static void sec_hw_error_enable(struct hisi_qm *qm)
 
 static void sec_hw_error_disable(struct hisi_qm *qm)
 {
-	u32 val;
-
 	/* disable SEC hw error interrupts */
 	writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK);
 
-	/* disable SEC block master OOO when m-bit error occur */
-	val = readl(qm->io_base + SEC_CONTROL_REG);
-	val = val & SEC_AXI_SHUTDOWN_DISABLE;
-	writel(val, qm->io_base + SEC_CONTROL_REG);
+	/* disable SEC block master OOO when nfe occurs on Kunpeng930 */
+	sec_master_ooo_ctrl(qm, false);
 
 	/* disable RAS int */
 	writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_CE_REG);
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 8e3a52218774a..3e23f2a1cf5a5 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -68,6 +68,7 @@
 #define HZIP_CORE_INT_RAS_CE_ENABLE	0x1
 #define HZIP_CORE_INT_RAS_NFE_ENB	0x301164
 #define HZIP_CORE_INT_RAS_FE_ENB        0x301168
+#define HZIP_OOO_SHUTDOWN_SEL		0x30120C
 #define HZIP_CORE_INT_RAS_NFE_ENABLE	0x1FFE
 #define HZIP_SRAM_ECC_ERR_NUM_SHIFT	16
 #define HZIP_SRAM_ECC_ERR_ADDR_SHIFT	24
@@ -312,10 +313,27 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
 	return 0;
 }
 
-static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
+static void hisi_zip_master_ooo_ctrl(struct hisi_qm *qm, bool enable)
 {
-	u32 val;
+	u32 val1, val2;
+
+	val1 = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+	if (enable) {
+		val1 |= HZIP_AXI_SHUTDOWN_ENABLE;
+		val2 = HZIP_CORE_INT_RAS_NFE_ENABLE;
+	} else {
+		val1 &= ~HZIP_AXI_SHUTDOWN_ENABLE;
+		val2 = 0x0;
+	}
+
+	if (qm->ver > QM_HW_V2)
+		writel(val2, qm->io_base + HZIP_OOO_SHUTDOWN_SEL);
+
+	writel(val1, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+}
 
+static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
+{
 	if (qm->ver == QM_HW_V1) {
 		writel(HZIP_CORE_INT_MASK_ALL,
 		       qm->io_base + HZIP_CORE_INT_MASK_REG);
@@ -333,10 +351,8 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
 	writel(HZIP_CORE_INT_RAS_NFE_ENABLE,
 	       qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
 
-	/* enable ZIP block master OOO when m-bit error occur */
-	val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
-	val = val | HZIP_AXI_SHUTDOWN_ENABLE;
-	writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+	/* enable ZIP block master OOO when nfe occurs on Kunpeng930 */
+	hisi_zip_master_ooo_ctrl(qm, true);
 
 	/* enable ZIP hw error interrupts */
 	writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG);
@@ -344,15 +360,11 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
 
 static void hisi_zip_hw_error_disable(struct hisi_qm *qm)
 {
-	u32 val;
-
 	/* disable ZIP hw error interrupts */
 	writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG);
 
-	/* disable ZIP block master OOO when m-bit error occur */
-	val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
-	val = val & ~HZIP_AXI_SHUTDOWN_ENABLE;
-	writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+	/* disable ZIP block master OOO when nfe occurs on Kunpeng930 */
+	hisi_zip_master_ooo_ctrl(qm, false);
 }
 
 static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
-- 
GitLab


From a6f8e68e238a15bb15f1726b35c695136c64eaba Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 16 May 2021 08:58:04 +0200
Subject: [PATCH 1367/3804] crypto: ccp - Fix a resource leak in an error
 handling path

If an error occurs after calling 'sp_get_irqs()', 'sp_free_irqs()' must be
called as already done in the error handling path.

Fixes: f4d18d656f88 ("crypto: ccp - Abstract interrupt registeration")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: John Allen <john.allen@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/sp-pci.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index f468594ef8afa..6fb6ba35f89d4 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -222,7 +222,7 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		if (ret) {
 			dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
 				ret);
-			goto e_err;
+			goto free_irqs;
 		}
 	}
 
@@ -230,10 +230,12 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	ret = sp_init(sp);
 	if (ret)
-		goto e_err;
+		goto free_irqs;
 
 	return 0;
 
+free_irqs:
+	sp_free_irqs(sp);
 e_err:
 	dev_notice(dev, "initialization failed\n");
 	return ret;
-- 
GitLab


From f9f74dc218c3cfdf0b7f9a95ddae81a081bdb79d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 22 May 2021 17:41:43 -0700
Subject: [PATCH 1368/3804] mm/shuffle: fix section mismatch warning

clang sometimes decides not to inline shuffle_zone(), but it calls a
__meminit function.  Without the extra __meminit annotation we get this
warning:

  WARNING: modpost: vmlinux.o(.text+0x2a86d4): Section mismatch in reference from the function shuffle_zone() to the function .meminit.text:__shuffle_zone()
  The function shuffle_zone() references
  the function __meminit __shuffle_zone().
  This is often because shuffle_zone lacks a __meminit
  annotation or the annotation of __shuffle_zone is wrong.

shuffle_free_memory() did not show the same problem in my tests, but it
could happen in theory as well, so mark both as __meminit.

Link: https://lkml.kernel.org/r/20210514135952.2928094-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shuffle.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/shuffle.h b/mm/shuffle.h
index 71b784f0b7c3e..cec62984f7d3b 100644
--- a/mm/shuffle.h
+++ b/mm/shuffle.h
@@ -10,7 +10,7 @@
 DECLARE_STATIC_KEY_FALSE(page_alloc_shuffle_key);
 extern void __shuffle_free_memory(pg_data_t *pgdat);
 extern bool shuffle_pick_tail(void);
-static inline void shuffle_free_memory(pg_data_t *pgdat)
+static inline void __meminit shuffle_free_memory(pg_data_t *pgdat)
 {
 	if (!static_branch_unlikely(&page_alloc_shuffle_key))
 		return;
@@ -18,7 +18,7 @@ static inline void shuffle_free_memory(pg_data_t *pgdat)
 }
 
 extern void __shuffle_zone(struct zone *z);
-static inline void shuffle_zone(struct zone *z)
+static inline void __meminit shuffle_zone(struct zone *z)
 {
 	if (!static_branch_unlikely(&page_alloc_shuffle_key))
 		return;
-- 
GitLab


From f10628d2f613195132532e0fbda439eeed8d12a2 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Sat, 22 May 2021 17:41:46 -0700
Subject: [PATCH 1369/3804] Revert "mm/gup: check page posion status for
 coredump."

While reviewing [1] I came across commit d3378e86d182 ("mm/gup: check
page posion status for coredump.") and noticed that this patch is broken
in two ways.  First it doesn't really prevent hwpoison pages from being
dumped because hwpoison pages can be marked asynchornously at any time
after the check.  Secondly, and more importantly, the patch introduces a
ref count leak because get_dump_page takes a reference on the page which
is not released.

It also seems that the patch was merged incorrectly because there were
follow up changes not included as well as discussions on how to address
the underlying problem [2]

Therefore revert the original patch.

Link: http://lkml.kernel.org/r/20210429122519.15183-4-david@redhat.com [1]
Link: http://lkml.kernel.org/r/57ac524c-b49a-99ec-c1e4-ef5027bfb61b@redhat.com [2]
Link: https://lkml.kernel.org/r/20210505135407.31590-1-mhocko@kernel.org
Fixes: d3378e86d182 ("mm/gup: check page posion status for coredump.")
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Aili Yao <yaoaili@kingsoft.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c      |  4 ----
 mm/internal.h | 20 --------------------
 2 files changed, 24 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 0697134b6a12c..3ded6a5f26b25 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1593,10 +1593,6 @@ struct page *get_dump_page(unsigned long addr)
 				      FOLL_FORCE | FOLL_DUMP | FOLL_GET);
 	if (locked)
 		mmap_read_unlock(mm);
-
-	if (ret == 1 && is_page_poisoned(page))
-		return NULL;
-
 	return (ret == 1) ? page : NULL;
 }
 #endif /* CONFIG_ELF_CORE */
diff --git a/mm/internal.h b/mm/internal.h
index 54bd0dc2c23c1..2f1182948aa6e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -96,26 +96,6 @@ static inline void set_page_refcounted(struct page *page)
 	set_page_count(page, 1);
 }
 
-/*
- * When kernel touch the user page, the user page may be have been marked
- * poison but still mapped in user space, if without this page, the kernel
- * can guarantee the data integrity and operation success, the kernel is
- * better to check the posion status and avoid touching it, be good not to
- * panic, coredump for process fatal signal is a sample case matching this
- * scenario. Or if kernel can't guarantee the data integrity, it's better
- * not to call this function, let kernel touch the poison page and get to
- * panic.
- */
-static inline bool is_page_poisoned(struct page *page)
-{
-	if (PageHWPoison(page))
-		return true;
-	else if (PageHuge(page) && PageHWPoison(compound_head(page)))
-		return true;
-
-	return false;
-}
-
 extern unsigned long highest_memmap_pfn;
 
 /*
-- 
GitLab


From a11ddb37bf367e6b5239b95ca759e5389bb46048 Mon Sep 17 00:00:00 2001
From: Varad Gautam <varad.gautam@suse.com>
Date: Sat, 22 May 2021 17:41:49 -0700
Subject: [PATCH 1370/3804] ipc/mqueue, msg, sem: avoid relying on a stack
 reference past its expiry

do_mq_timedreceive calls wq_sleep with a stack local address.  The
sender (do_mq_timedsend) uses this address to later call pipelined_send.

This leads to a very hard to trigger race where a do_mq_timedreceive
call might return and leave do_mq_timedsend to rely on an invalid
address, causing the following crash:

  RIP: 0010:wake_q_add_safe+0x13/0x60
  Call Trace:
   __x64_sys_mq_timedsend+0x2a9/0x490
   do_syscall_64+0x80/0x680
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f5928e40343

The race occurs as:

1. do_mq_timedreceive calls wq_sleep with the address of `struct
   ext_wait_queue` on function stack (aliased as `ewq_addr` here) - it
   holds a valid `struct ext_wait_queue *` as long as the stack has not
   been overwritten.

2. `ewq_addr` gets added to info->e_wait_q[RECV].list in wq_add, and
   do_mq_timedsend receives it via wq_get_first_waiter(info, RECV) to call
   __pipelined_op.

3. Sender calls __pipelined_op::smp_store_release(&this->state,
   STATE_READY).  Here is where the race window begins.  (`this` is
   `ewq_addr`.)

4. If the receiver wakes up now in do_mq_timedreceive::wq_sleep, it
   will see `state == STATE_READY` and break.

5. do_mq_timedreceive returns, and `ewq_addr` is no longer guaranteed
   to be a `struct ext_wait_queue *` since it was on do_mq_timedreceive's
   stack.  (Although the address may not get overwritten until another
   function happens to touch it, which means it can persist around for an
   indefinite time.)

6. do_mq_timedsend::__pipelined_op() still believes `ewq_addr` is a
   `struct ext_wait_queue *`, and uses it to find a task_struct to pass to
   the wake_q_add_safe call.  In the lucky case where nothing has
   overwritten `ewq_addr` yet, `ewq_addr->task` is the right task_struct.
   In the unlucky case, __pipelined_op::wake_q_add_safe gets handed a
   bogus address as the receiver's task_struct causing the crash.

do_mq_timedsend::__pipelined_op() should not dereference `this` after
setting STATE_READY, as the receiver counterpart is now free to return.
Change __pipelined_op to call wake_q_add_safe on the receiver's
task_struct returned by get_task_struct, instead of dereferencing `this`
which sits on the receiver's stack.

As Manfred pointed out, the race potentially also exists in
ipc/msg.c::expunge_all and ipc/sem.c::wake_up_sem_queue_prepare.  Fix
those in the same way.

Link: https://lkml.kernel.org/r/20210510102950.12551-1-varad.gautam@suse.com
Fixes: c5b2cbdbdac563 ("ipc/mqueue.c: update/document memory barriers")
Fixes: 8116b54e7e23ef ("ipc/sem.c: document and update memory barriers")
Fixes: 0d97a82ba830d8 ("ipc/msg.c: update and document memory barriers")
Signed-off-by: Varad Gautam <varad.gautam@suse.com>
Reported-by: Matthias von Faber <matthias.vonfaber@aox-tech.de>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/mqueue.c | 6 ++++--
 ipc/msg.c    | 6 ++++--
 ipc/sem.c    | 6 ++++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 8031464ed4ae2..4e4e61111500c 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1004,12 +1004,14 @@ static inline void __pipelined_op(struct wake_q_head *wake_q,
 				  struct mqueue_inode_info *info,
 				  struct ext_wait_queue *this)
 {
+	struct task_struct *task;
+
 	list_del(&this->list);
-	get_task_struct(this->task);
+	task = get_task_struct(this->task);
 
 	/* see MQ_BARRIER for purpose/pairing */
 	smp_store_release(&this->state, STATE_READY);
-	wake_q_add_safe(wake_q, this->task);
+	wake_q_add_safe(wake_q, task);
 }
 
 /* pipelined_send() - send a message directly to the task waiting in
diff --git a/ipc/msg.c b/ipc/msg.c
index acd1bc7af55a2..6e6c8e0c9380e 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -251,11 +251,13 @@ static void expunge_all(struct msg_queue *msq, int res,
 	struct msg_receiver *msr, *t;
 
 	list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
-		get_task_struct(msr->r_tsk);
+		struct task_struct *r_tsk;
+
+		r_tsk = get_task_struct(msr->r_tsk);
 
 		/* see MSG_BARRIER for purpose/pairing */
 		smp_store_release(&msr->r_msg, ERR_PTR(res));
-		wake_q_add_safe(wake_q, msr->r_tsk);
+		wake_q_add_safe(wake_q, r_tsk);
 	}
 }
 
diff --git a/ipc/sem.c b/ipc/sem.c
index e0ec239680cbd..bf534c74293e1 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -784,12 +784,14 @@ would_block:
 static inline void wake_up_sem_queue_prepare(struct sem_queue *q, int error,
 					     struct wake_q_head *wake_q)
 {
-	get_task_struct(q->sleeper);
+	struct task_struct *sleeper;
+
+	sleeper = get_task_struct(q->sleeper);
 
 	/* see SEM_BARRIER_2 for purpose/pairing */
 	smp_store_release(&q->status, error);
 
-	wake_q_add_safe(wake_q, q->sleeper);
+	wake_q_add_safe(wake_q, sleeper);
 }
 
 static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
-- 
GitLab


From 4d1cd3b2c5c1c32826454de3a18c6183238d47ed Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 22 May 2021 17:41:53 -0700
Subject: [PATCH 1371/3804] tools/testing/selftests/exec: fix link error

Fix the link error by adding '-static':

  gcc -Wall  -Wl,-z,max-page-size=0x1000 -pie load_address.c -o /home/yang/linux/tools/testing/selftests/exec/load_address_4096
  /usr/bin/ld: /tmp/ccopEGun.o: relocation R_AARCH64_ADR_PREL_PG_HI21 against symbol `stderr@@GLIBC_2.17' which may bind externally can not be used when making a shared object; recompile with -fPIC
  /usr/bin/ld: /tmp/ccopEGun.o(.text+0x158): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `stderr@@GLIBC_2.17'
  /usr/bin/ld: final link failed: bad value
  collect2: error: ld returned 1 exit status
  make: *** [Makefile:25: tools/testing/selftests/exec/load_address_4096] Error 1

Link: https://lkml.kernel.org/r/20210514092422.2367367-1-yangyingliang@huawei.com
Fixes: 206e22f01941 ("tools/testing/selftests: add self-test for verifying load alignment")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Cc: Chris Kennelly <ckennelly@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/exec/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index cf69b2fcce59e..dd61118df66ed 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -28,8 +28,8 @@ $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
 	cp $< $@
 	chmod -x $@
 $(OUTPUT)/load_address_4096: load_address.c
-	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@
+	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@
 $(OUTPUT)/load_address_2097152: load_address.c
-	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@
+	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@
 $(OUTPUT)/load_address_16777216: load_address.c
-	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@
+	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@
-- 
GitLab


From f70b00496f2a0669fdb19a783e613bdbdedcf901 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Sat, 22 May 2021 17:41:56 -0700
Subject: [PATCH 1372/3804] kasan: slab: always reset the tag in
 get_freepointer_safe()

With CONFIG_DEBUG_PAGEALLOC enabled, the kernel should also untag the
object pointer, as done in get_freepointer().

Failing to do so reportedly leads to SLUB freelist corruptions that
manifest as boot-time crashes.

Link: https://lkml.kernel.org/r/20210514072228.534418-1-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Marco Elver <elver@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Elliot Berman <eberman@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/slub.c b/mm/slub.c
index 438fa8d4c970d..3f96e099817a1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -301,6 +301,7 @@ static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
 	if (!debug_pagealloc_enabled_static())
 		return get_freepointer(s, object);
 
+	object = kasan_reset_tag(object);
 	freepointer_addr = (unsigned long)object + s->offset;
 	copy_from_kernel_nofault(&p, (void **)freepointer_addr, sizeof(p));
 	return freelist_ptr(s, p, freepointer_addr);
-- 
GitLab


From 0f90b88dbcd1143e0f408502eba0af97429c502a Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Sat, 22 May 2021 17:41:59 -0700
Subject: [PATCH 1373/3804] watchdog: reliable handling of timestamps

Commit 9bf3bc949f8a ("watchdog: cleanup handling of false positives")
tried to handle a virtual host stopped by the host a more
straightforward and cleaner way.

But it introduced a risk of false softlockup reports.  The virtual host
might be stopped at any time, for example between
kvm_check_and_clear_guest_paused() and is_softlockup().  As a result,
is_softlockup() might read the updated jiffies and detects a softlockup.

A solution might be to put back kvm_check_and_clear_guest_paused() after
is_softlockup() and detect it.  But it would put back the cycle that
complicates the logic.

In fact, the handling of all the timestamps is not reliable.  The code
does not guarantee when and how many times the timestamps are read.  For
example, "period_ts" might be touched anytime also from NMI and re-read in
is_softlockup().  It works just by chance.

Fix all the problems by making the code even more explicit.

1. Make sure that "now" and "period_ts" timestamps are read only once.
   They might be changed at anytime by NMI or when the virtual guest is
   stopped by the host.  Note that "now" timestamp does this implicitly
   because "jiffies" is marked volatile.

2. "now" time must be read first.  The state of "period_ts" will
   decide whether it will be used or the period will get restarted.

3. kvm_check_and_clear_guest_paused() must be called before reading
   "period_ts".  It touches the variable when the guest was stopped.

As a result, "now" timestamp is used only when the watchdog was not
touched and the guest not stopped in the meantime.  "period_ts" is
restarted in all other situations.

Link: https://lkml.kernel.org/r/YKT55gw+RZfyoFf7@alley
Fixes: 9bf3bc949f8aeefeacea4b ("watchdog: cleanup handling of false positives")
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reported-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/watchdog.c | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 7c397907d0e95..92d3bcc5a5e09 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -302,10 +302,10 @@ void touch_softlockup_watchdog_sync(void)
 	__this_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT);
 }
 
-static int is_softlockup(unsigned long touch_ts, unsigned long period_ts)
+static int is_softlockup(unsigned long touch_ts,
+			 unsigned long period_ts,
+			 unsigned long now)
 {
-	unsigned long now = get_timestamp();
-
 	if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){
 		/* Warn about unreasonable delays. */
 		if (time_after(now, period_ts + get_softlockup_thresh()))
@@ -353,8 +353,7 @@ static int softlockup_fn(void *data)
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
-	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
-	unsigned long period_ts = __this_cpu_read(watchdog_report_ts);
+	unsigned long touch_ts, period_ts, now;
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
 	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
@@ -376,12 +375,23 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	/* .. and repeat */
 	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 
+	/*
+	 * Read the current timestamp first. It might become invalid anytime
+	 * when a virtual machine is stopped by the host or when the watchog
+	 * is touched from NMI.
+	 */
+	now = get_timestamp();
 	/*
 	 * If a virtual machine is stopped by the host it can look to
-	 * the watchdog like a soft lockup. Check to see if the host
-	 * stopped the vm before we process the timestamps.
+	 * the watchdog like a soft lockup. This function touches the watchdog.
 	 */
 	kvm_check_and_clear_guest_paused();
+	/*
+	 * The stored timestamp is comparable with @now only when not touched.
+	 * It might get touched anytime from NMI. Make sure that is_softlockup()
+	 * uses the same (valid) value.
+	 */
+	period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts));
 
 	/* Reset the interval when touched by known problematic code. */
 	if (period_ts == SOFTLOCKUP_DELAY_REPORT) {
@@ -398,13 +408,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		return HRTIMER_RESTART;
 	}
 
-	/* check for a softlockup
-	 * This is done by making sure a high priority task is
-	 * being scheduled.  The task touches the watchdog to
-	 * indicate it is getting cpu time.  If it hasn't then
-	 * this is a good indication some task is hogging the cpu
-	 */
-	duration = is_softlockup(touch_ts, period_ts);
+	/* Check for a softlockup. */
+	touch_ts = __this_cpu_read(watchdog_touch_ts);
+	duration = is_softlockup(touch_ts, period_ts, now);
 	if (unlikely(duration)) {
 		/*
 		 * Prevent multiple soft-lockup reports if one cpu is already
-- 
GitLab


From f747e6667ebb2ffb8133486c9cd19800d72b0d98 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 22 May 2021 17:42:02 -0700
Subject: [PATCH 1374/3804] linux/bits.h: fix compilation error with GENMASK

GENMASK() has an input check which uses __builtin_choose_expr() to
enable a compile time sanity check of its inputs if they are known at
compile time.

However, it turns out that __builtin_constant_p() does not always return
a compile time constant [0].  It was thought this problem was fixed with
gcc 4.9 [1], but apparently this is not the case [2].

Switch to use __is_constexpr() instead which always returns a compile time
constant, regardless of its inputs.

Link: https://lore.kernel.org/lkml/42b4342b-aefc-a16a-0d43-9f9c0d63ba7a@rasmusvillemoes.dk [0]
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19449 [1]
Link: https://lore.kernel.org/lkml/1ac7bbc2-45d9-26ed-0b33-bf382b8d858b@I-love.SAKURA.ne.jp [2]
Link: https://lkml.kernel.org/r/20210511203716.117010-1-rikard.falkeborn@gmail.com
Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Yury Norov <yury.norov@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bits.h        |  2 +-
 include/linux/const.h       |  8 ++++++++
 include/linux/minmax.h      | 10 ++--------
 tools/include/linux/bits.h  |  2 +-
 tools/include/linux/const.h |  8 ++++++++
 5 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/linux/bits.h b/include/linux/bits.h
index 7f475d59a0974..87d112650dfbb 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -22,7 +22,7 @@
 #include <linux/build_bug.h>
 #define GENMASK_INPUT_CHECK(h, l) \
 	(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
-		__builtin_constant_p((l) > (h)), (l) > (h), 0)))
+		__is_constexpr((l) > (h)), (l) > (h), 0)))
 #else
 /*
  * BUILD_BUG_ON_ZERO is not available in h files included from asm files,
diff --git a/include/linux/const.h b/include/linux/const.h
index 81b8aae5a8559..435ddd72d2c46 100644
--- a/include/linux/const.h
+++ b/include/linux/const.h
@@ -3,4 +3,12 @@
 
 #include <vdso/const.h>
 
+/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ */
+#define __is_constexpr(x) \
+	(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
 #endif /* _LINUX_CONST_H */
diff --git a/include/linux/minmax.h b/include/linux/minmax.h
index c0f57b0c64d90..5433c08fcc685 100644
--- a/include/linux/minmax.h
+++ b/include/linux/minmax.h
@@ -2,6 +2,8 @@
 #ifndef _LINUX_MINMAX_H
 #define _LINUX_MINMAX_H
 
+#include <linux/const.h>
+
 /*
  * min()/max()/clamp() macros must accomplish three things:
  *
@@ -17,14 +19,6 @@
 #define __typecheck(x, y) \
 	(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
 
-/*
- * This returns a constant expression while determining if an argument is
- * a constant expression, most importantly without evaluating the argument.
- * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
- */
-#define __is_constexpr(x) \
-	(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
-
 #define __no_side_effects(x, y) \
 		(__is_constexpr(x) && __is_constexpr(y))
 
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 7f475d59a0974..87d112650dfbb 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -22,7 +22,7 @@
 #include <linux/build_bug.h>
 #define GENMASK_INPUT_CHECK(h, l) \
 	(BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
-		__builtin_constant_p((l) > (h)), (l) > (h), 0)))
+		__is_constexpr((l) > (h)), (l) > (h), 0)))
 #else
 /*
  * BUILD_BUG_ON_ZERO is not available in h files included from asm files,
diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h
index 81b8aae5a8559..435ddd72d2c46 100644
--- a/tools/include/linux/const.h
+++ b/tools/include/linux/const.h
@@ -3,4 +3,12 @@
 
 #include <vdso/const.h>
 
+/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ */
+#define __is_constexpr(x) \
+	(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
 #endif /* _LINUX_CONST_H */
-- 
GitLab


From 43b2ec977ce33b53e2be30999824b584e2be248a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 22 May 2021 17:42:05 -0700
Subject: [PATCH 1375/3804] proc: remove Alexey from MAINTAINERS

People Cc me and I don't have time.

Link: https://lkml.kernel.org/r/YKarMxHJBIhMHQIh@localhost.localdomain
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index def1640cc2943..81e1edeceae40 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14735,7 +14735,6 @@ W:	https://wireless.wiki.kernel.org/en/users/Drivers/p54
 F:	drivers/net/wireless/intersil/prism54/
 
 PROC FILESYSTEM
-R:	Alexey Dobriyan <adobriyan@gmail.com>
 L:	linux-kernel@vger.kernel.org
 L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
-- 
GitLab


From 1b6d63938a9d868df01d5bc6e2da212133121b8d Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 22 May 2021 17:42:08 -0700
Subject: [PATCH 1376/3804] lib: kunit: suppress a compilation warning of frame
 size

lib/bitfield_kunit.c: In function `test_bitfields_constants':
lib/bitfield_kunit.c:93:1: warning: the frame size of 7456 bytes is larger than 2048 bytes [-Wframe-larger-than=]
 }
 ^

As the description of BITFIELD_KUNIT in lib/Kconfig.debug, it "Only useful
for kernel devs running the KUnit test harness, and not intended for
inclusion into a production build".  Therefore, it is not worth modifying
variable 'test_bitfields_constants' to clear this warning.  Just suppress
it.

Link: https://lkml.kernel.org/r/20210518094533.7652-1-thunder.leizhen@huawei.com
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Vitor Massaru Iha <vitor@massaru.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Makefile b/lib/Makefile
index e11cfc18b6c08..2cc359ec1fdd3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -348,6 +348,7 @@ obj-$(CONFIG_OBJAGG) += objagg.o
 obj-$(CONFIG_PLDMFW) += pldmfw/
 
 # KUnit tests
+CFLAGS_bitfield_kunit.o := $(call cc-option,-Wframe-larger-than=10240)
 obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
-- 
GitLab


From e32905e57358fdfb82f9de024534f205b3af7dac Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Sat, 22 May 2021 17:42:11 -0700
Subject: [PATCH 1377/3804] userfaultfd: hugetlbfs: fix new flag usage in error
 path

In commit d6995da31122 ("hugetlb: use page.private for hugetlb specific
page flags") the use of PagePrivate to indicate a reservation count
should be restored at free time was changed to the hugetlb specific flag
HPageRestoreReserve.  Changes to a userfaultfd error path as well as a
VM_BUG_ON() in remove_inode_hugepages() were overlooked.

Users could see incorrect hugetlb reserve counts if they experience an
error with a UFFDIO_COPY operation.  Specifically, this would be the
result of an unlikely copy_huge_page_from_user error.  There is not an
increased chance of hitting the VM_BUG_ON.

Link: https://lkml.kernel.org/r/20210521233952.236434-1-mike.kravetz@oracle.com
Fixes: d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Mina Almasry <almasry.mina@google.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c |  2 +-
 mm/userfaultfd.c     | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9d9e0097c1d38..55efd3dd04f62 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -529,7 +529,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 			 * the subpool and global reserve usage count can need
 			 * to be adjusted.
 			 */
-			VM_BUG_ON(PagePrivate(page));
+			VM_BUG_ON(HPageRestoreReserve(page));
 			remove_huge_page(page);
 			freed++;
 			if (!truncate_op) {
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index e14b3820c6a81..63a73e164d551 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -360,38 +360,38 @@ out:
 		 * If a reservation for the page existed in the reservation
 		 * map of a private mapping, the map was modified to indicate
 		 * the reservation was consumed when the page was allocated.
-		 * We clear the PagePrivate flag now so that the global
+		 * We clear the HPageRestoreReserve flag now so that the global
 		 * reserve count will not be incremented in free_huge_page.
 		 * The reservation map will still indicate the reservation
 		 * was consumed and possibly prevent later page allocation.
 		 * This is better than leaking a global reservation.  If no
-		 * reservation existed, it is still safe to clear PagePrivate
-		 * as no adjustments to reservation counts were made during
-		 * allocation.
+		 * reservation existed, it is still safe to clear
+		 * HPageRestoreReserve as no adjustments to reservation counts
+		 * were made during allocation.
 		 *
 		 * The reservation map for shared mappings indicates which
 		 * pages have reservations.  When a huge page is allocated
 		 * for an address with a reservation, no change is made to
-		 * the reserve map.  In this case PagePrivate will be set
-		 * to indicate that the global reservation count should be
+		 * the reserve map.  In this case HPageRestoreReserve will be
+		 * set to indicate that the global reservation count should be
 		 * incremented when the page is freed.  This is the desired
 		 * behavior.  However, when a huge page is allocated for an
 		 * address without a reservation a reservation entry is added
-		 * to the reservation map, and PagePrivate will not be set.
-		 * When the page is freed, the global reserve count will NOT
-		 * be incremented and it will appear as though we have leaked
-		 * reserved page.  In this case, set PagePrivate so that the
-		 * global reserve count will be incremented to match the
-		 * reservation map entry which was created.
+		 * to the reservation map, and HPageRestoreReserve will not be
+		 * set. When the page is freed, the global reserve count will
+		 * NOT be incremented and it will appear as though we have
+		 * leaked reserved page.  In this case, set HPageRestoreReserve
+		 * so that the global reserve count will be incremented to
+		 * match the reservation map entry which was created.
 		 *
 		 * Note that vm_alloc_shared is based on the flags of the vma
 		 * for which the page was originally allocated.  dst_vma could
 		 * be different or NULL on error.
 		 */
 		if (vm_alloc_shared)
-			SetPagePrivate(page);
+			SetHPageRestoreReserve(page);
 		else
-			ClearPagePrivate(page);
+			ClearHPageRestoreReserve(page);
 		put_page(page);
 	}
 	BUG_ON(copied < 0);
-- 
GitLab


From eac2f3059e02382d91f8c887462083841d6ea2a3 Mon Sep 17 00:00:00 2001
From: Chen Huang <chenhuang5@huawei.com>
Date: Thu, 29 Apr 2021 07:03:48 +0000
Subject: [PATCH 1378/3804] riscv: stacktrace: fix the riscv stacktrace when
 CONFIG_FRAME_POINTER enabled

As [1] and [2] said, the arch_stack_walk should not to trace itself, or it will
leave the trace unexpectedly when called. The example is when we do "cat
/sys/kernel/debug/page_owner", all pages' stack is the same.

arch_stack_walk+0x18/0x20
stack_trace_save+0x40/0x60
register_dummy_stack+0x24/0x5e
init_page_owner+0x2e

So we use __builtin_frame_address(1) as the first frame to be walked. And mark
the arch_stack_walk() noinline.

We found that pr_cont will affact pages' stack whose task state is RUNNING when
testing "echo t > /proc/sysrq-trigger". So move the place of pr_cont and mark
the function dump_backtrace() noinline.

Also we move the case when task == NULL into else branch, and test for it in
"echo c > /proc/sysrq-trigger".

[1] https://lore.kernel.org/lkml/20210319184106.5688-1-mark.rutland@arm.com/
[2] https://lore.kernel.org/lkml/20210317142050.57712-1-chenjun102@huawei.com/

Signed-off-by: Chen Huang <chenhuang5@huawei.com>
Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/stacktrace.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 2b3e0cb90d789..bde85fc53357f 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -27,10 +27,10 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 		fp = frame_pointer(regs);
 		sp = user_stack_pointer(regs);
 		pc = instruction_pointer(regs);
-	} else if (task == NULL || task == current) {
-		fp = (unsigned long)__builtin_frame_address(0);
-		sp = sp_in_global;
-		pc = (unsigned long)walk_stackframe;
+	} else if (task == current) {
+		fp = (unsigned long)__builtin_frame_address(1);
+		sp = (unsigned long)__builtin_frame_address(0);
+		pc = (unsigned long)__builtin_return_address(0);
 	} else {
 		/* task blocked in __switch_to */
 		fp = task->thread.s[0];
@@ -106,15 +106,15 @@ static bool print_trace_address(void *arg, unsigned long pc)
 	return true;
 }
 
-void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
+noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
 		    const char *loglvl)
 {
-	pr_cont("%sCall Trace:\n", loglvl);
 	walk_stackframe(task, regs, print_trace_address, (void *)loglvl);
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
 {
+	pr_cont("%sCall Trace:\n", loglvl);
 	dump_backtrace(NULL, task, loglvl);
 }
 
@@ -139,7 +139,7 @@ unsigned long get_wchan(struct task_struct *task)
 
 #ifdef CONFIG_STACKTRACE
 
-void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
 		     struct task_struct *task, struct pt_regs *regs)
 {
 	walk_stackframe(task, regs, consume_entry, cookie);
-- 
GitLab


From 97a031082320897ee5b06352d0ab3d7cf47321d3 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 7 May 2021 17:47:15 +0800
Subject: [PATCH 1379/3804] riscv: Select ARCH_USE_MEMTEST

As of commit dce44566192e ("mm/memtest: add ARCH_USE_MEMTEST"),
architectures must select ARCH_USE_MEMTESET to enable CONFIG_MEMTEST.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Fixes: f6e5aedf470b ("riscv: Add support for memtest")
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a8ad8eb761206..c5914e70a0fd7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -34,6 +34,7 @@ config RISCV
 	select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
 	select ARCH_SUPPORTS_HUGETLBFS if MMU
+	select ARCH_USE_MEMTEST
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
-- 
GitLab


From e98d98028989e023e0cbff539dc616c4e5036839 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 7 May 2021 21:44:39 +0200
Subject: [PATCH 1380/3804] arm64: dts: zii-ultra: remove second GEN_3V3
 regulator instance

When adding the sound support a second instance of the GEN_3V3 regulator was
added by accident. Remove it and point the consumers to the first instance.

Fixes: 663a5b5efa51 ("arm64: dts: zii-ultra: add sound support")
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 .../dts/freescale/imx8mq-zii-ultra-rmb3.dts   | 10 +++++-----
 .../boot/dts/freescale/imx8mq-zii-ultra.dtsi  | 19 +++++--------------
 2 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts
index 631e01c1b9fd4..be1e7d6f0ecb5 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra-rmb3.dts
@@ -88,11 +88,11 @@
 		pinctrl-0 = <&pinctrl_codec2>;
 		reg = <0x18>;
 		#sound-dai-cells = <0>;
-		HPVDD-supply = <&reg_3p3v>;
-		SPRVDD-supply = <&reg_3p3v>;
-		SPLVDD-supply = <&reg_3p3v>;
-		AVDD-supply = <&reg_3p3v>;
-		IOVDD-supply = <&reg_3p3v>;
+		HPVDD-supply = <&reg_gen_3p3>;
+		SPRVDD-supply = <&reg_gen_3p3>;
+		SPLVDD-supply = <&reg_gen_3p3>;
+		AVDD-supply = <&reg_gen_3p3>;
+		IOVDD-supply = <&reg_gen_3p3>;
 		DVDD-supply = <&vgen4_reg>;
 		reset-gpios = <&gpio3 4 GPIO_ACTIVE_HIGH>;
 	};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
index 4dc8383478ee2..1e5d34e81ab72 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
@@ -77,15 +77,6 @@
 		regulator-always-on;
 	};
 
-	reg_3p3v: regulator-3p3v {
-		compatible = "regulator-fixed";
-		vin-supply = <&reg_3p3_main>;
-		regulator-name = "GEN_3V3";
-		regulator-min-microvolt = <3300000>;
-		regulator-max-microvolt = <3300000>;
-		regulator-always-on;
-	};
-
 	reg_usdhc2_vmmc: regulator-vsd-3v3 {
 		pinctrl-names = "default";
 		pinctrl-0 = <&pinctrl_reg_usdhc2>;
@@ -415,11 +406,11 @@
 		pinctrl-0 = <&pinctrl_codec1>;
 		reg = <0x18>;
 		#sound-dai-cells = <0>;
-		HPVDD-supply = <&reg_3p3v>;
-		SPRVDD-supply = <&reg_3p3v>;
-		SPLVDD-supply = <&reg_3p3v>;
-		AVDD-supply = <&reg_3p3v>;
-		IOVDD-supply = <&reg_3p3v>;
+		HPVDD-supply = <&reg_gen_3p3>;
+		SPRVDD-supply = <&reg_gen_3p3>;
+		SPLVDD-supply = <&reg_gen_3p3>;
+		AVDD-supply = <&reg_gen_3p3>;
+		IOVDD-supply = <&reg_gen_3p3>;
 		DVDD-supply = <&vgen4_reg>;
 		reset-gpios = <&gpio3 3 GPIO_ACTIVE_LOW>;
 	};
-- 
GitLab


From ac0cbf9d13dccfd09bebc2f8f5697b6d3ffe27c4 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 7 May 2021 21:44:40 +0200
Subject: [PATCH 1381/3804] arm64: dts: zii-ultra: fix 12V_MAIN voltage

As this is a fixed regulator on the board there was no harm in the wrong
voltage being specified, apart from a confusing reporting to userspace.

Fixes: 4a13b3bec3b4 ("arm64: dts: imx: add Zii Ultra board support")
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
index 1e5d34e81ab72..a08a568c31d92 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
@@ -45,8 +45,8 @@
 	reg_12p0_main: regulator-12p0-main {
 		compatible = "regulator-fixed";
 		regulator-name = "12V_MAIN";
-		regulator-min-microvolt = <5000000>;
-		regulator-max-microvolt = <5000000>;
+		regulator-min-microvolt = <12000000>;
+		regulator-max-microvolt = <12000000>;
 		regulator-always-on;
 	};
 
-- 
GitLab


From 779b56bb679767712761a79232331f8519402e75 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Sat, 8 May 2021 13:03:19 -0300
Subject: [PATCH 1382/3804] ARM: imx: pm-imx27: Include "common.h"

Since commit 879c0e5e0ac7 ("ARM: imx: Remove i.MX27 board files")
the following W=1 build warning is seen:

arch/arm/mach-imx/pm-imx27.c:40:13: warning: no previous prototype for 'imx27_pm_init' [-Wmissing-prototypes]

Fix it by including the "common.h" header file, which
contains the prototype for imx27_pm_init().

Fixes: 879c0e5e0ac7 ("ARM: imx: Remove i.MX27 board files")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/pm-imx27.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-imx/pm-imx27.c b/arch/arm/mach-imx/pm-imx27.c
index 020e6deb67c8c..237e8aa9fe837 100644
--- a/arch/arm/mach-imx/pm-imx27.c
+++ b/arch/arm/mach-imx/pm-imx27.c
@@ -12,6 +12,7 @@
 #include <linux/suspend.h>
 #include <linux/io.h>
 
+#include "common.h"
 #include "hardware.h"
 
 static int mx27_suspend_enter(suspend_state_t state)
-- 
GitLab


From 02ccdeed1817a587161ad091887e11ac8a2586b2 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Sat, 8 May 2021 23:43:47 +0800
Subject: [PATCH 1383/3804] riscv: kprobes: Fix build error when MMU=n

lkp reported a randconfig failure:

arch/riscv/kernel/probes/kprobes.c:90:22: error: use of undeclared identifier 'PAGE_KERNEL_READ_EXEC'

We implemented the alloc_insn_page() to allocate PAGE_KERNEL_READ_EXEC
page for kprobes insn page for STRICT_MODULE_RWX. But if MMU=n, we
should fall back to the generic weak alloc_insn_page() by generic
kprobe subsystem.

Fixes: cdd1b2bd358f ("riscv: kprobes: Implement alloc_insn_page()")
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/probes/kprobes.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 10b965c345366..15cc65ac7ca65 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -84,6 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	return 0;
 }
 
+#ifdef CONFIG_MMU
 void *alloc_insn_page(void)
 {
 	return  __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
@@ -91,6 +92,7 @@ void *alloc_insn_page(void)
 				     VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
 				     __builtin_return_address(0));
 }
+#endif
 
 /* install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
-- 
GitLab


From bab0d47c0ebb50ae0bcfa4e84986a60113bf7d6b Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Sun, 9 May 2021 00:44:43 +0800
Subject: [PATCH 1384/3804] riscv: kexec: Fix W=1 build warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 build warning(s):

In file included from include/linux/kexec.h:28,
                 from arch/riscv/kernel/machine_kexec.c:7:
arch/riscv/include/asm/kexec.h:45:1: warning: ‘extern’ is not at beginning of declaration [-Wold-style-declaration]
   45 | const extern unsigned char riscv_kexec_relocate[];
      | ^~~~~
arch/riscv/include/asm/kexec.h:46:1: warning: ‘extern’ is not at beginning of declaration [-Wold-style-declaration]
   46 | const extern unsigned int riscv_kexec_relocate_size;
      | ^~~~~
arch/riscv/kernel/machine_kexec.c:125:6: warning: no previous prototype for ‘machine_shutdown’ [-Wmissing-prototypes]
  125 | void machine_shutdown(void)
      |      ^~~~~~~~~~~~~~~~
arch/riscv/kernel/machine_kexec.c:147:1: warning: no previous prototype for ‘machine_crash_shutdown’ [-Wmissing-prototypes]
  147 | machine_crash_shutdown(struct pt_regs *regs)
      | ^~~~~~~~~~~~~~~~~~~~~~
arch/riscv/kernel/machine_kexec.c:23: warning: Function parameter or member 'image' not described in 'kexec_image_info'
arch/riscv/kernel/machine_kexec.c:53: warning: Function parameter or member 'image' not described in 'machine_kexec_prepare'
arch/riscv/kernel/machine_kexec.c:114: warning: Function parameter or member 'image' not described in 'machine_kexec_cleanup'
arch/riscv/kernel/machine_kexec.c:148: warning: Function parameter or member 'regs' not described in 'machine_crash_shutdown'
arch/riscv/kernel/machine_kexec.c:167: warning: Function parameter or member 'image' not described in 'machine_kexec'

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/kexec.h    |  4 ++--
 arch/riscv/kernel/machine_kexec.c | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h
index 1e954101906ac..e4e291d40759f 100644
--- a/arch/riscv/include/asm/kexec.h
+++ b/arch/riscv/include/asm/kexec.h
@@ -42,8 +42,8 @@ struct kimage_arch {
 	unsigned long fdt_addr;
 };
 
-const extern unsigned char riscv_kexec_relocate[];
-const extern unsigned int riscv_kexec_relocate_size;
+extern const unsigned char riscv_kexec_relocate[];
+extern const unsigned int riscv_kexec_relocate_size;
 
 typedef void (*riscv_kexec_method)(unsigned long first_ind_entry,
 				   unsigned long jump_addr,
diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c
index cc048143fba55..9e99e1db156bc 100644
--- a/arch/riscv/kernel/machine_kexec.c
+++ b/arch/riscv/kernel/machine_kexec.c
@@ -14,8 +14,9 @@
 #include <asm/set_memory.h>	/* For set_memory_x() */
 #include <linux/compiler.h>	/* For unreachable() */
 #include <linux/cpu.h>		/* For cpu_down() */
+#include <linux/reboot.h>
 
-/**
+/*
  * kexec_image_info - Print received image details
  */
 static void
@@ -39,7 +40,7 @@ kexec_image_info(const struct kimage *image)
 	}
 }
 
-/**
+/*
  * machine_kexec_prepare - Initialize kexec
  *
  * This function is called from do_kexec_load, when the user has
@@ -100,7 +101,7 @@ machine_kexec_prepare(struct kimage *image)
 }
 
 
-/**
+/*
  * machine_kexec_cleanup - Cleanup any leftovers from
  *			   machine_kexec_prepare
  *
@@ -135,7 +136,7 @@ void machine_shutdown(void)
 #endif
 }
 
-/**
+/*
  * machine_crash_shutdown - Prepare to kexec after a kernel crash
  *
  * This function is called by crash_kexec just before machine_kexec
@@ -151,7 +152,7 @@ machine_crash_shutdown(struct pt_regs *regs)
 	pr_info("Starting crashdump kernel...\n");
 }
 
-/**
+/*
  * machine_kexec - Jump to the loaded kimage
  *
  * This function is called by kernel_kexec which is called by the
-- 
GitLab


From 25201269c6ec3e9398426962ccdd55428261f7d0 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Fri, 14 May 2021 20:55:52 +0200
Subject: [PATCH 1385/3804] arm64: dts: freescale: sl28: var4: fix RGMII clock
 and voltage

During hardware validation it was noticed that the clock isn't
continuously enabled when there is no link. This is because the 125MHz
clock is derived from the internal PLL which seems to go into some kind
of power-down mode every once in a while. The LS1028A expects a contiuous
clock. Thus enable the PLL all the time.

Also, the RGMII pad voltage is wrong. It was configured to 2.5V (that is
the VDDH regulator). The correct voltage is 1.8V, i.e. the VDDIO
regulator.

This fix is for the freescale/fsl-ls1028a-kontron-sl28-var4.dts.

Fixes: 815364d0424e ("arm64: dts: freescale: add Kontron sl28 support")
Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 .../boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts     | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts
index df212ed5bb942..e65d1c477e2ce 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var4.dts
@@ -31,11 +31,10 @@
 			reg = <0x4>;
 			eee-broken-1000t;
 			eee-broken-100tx;
-
 			qca,clk-out-frequency = <125000000>;
 			qca,clk-out-strength = <AR803X_STRENGTH_FULL>;
-
-			vddio-supply = <&vddh>;
+			qca,keep-pll-enabled;
+			vddio-supply = <&vddio>;
 
 			vddio: vddio-regulator {
 				regulator-name = "VDDIO";
-- 
GitLab


From 52387bb9a4a75b88887383cb91d3995ae6f4044a Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Fri, 14 May 2021 20:55:53 +0200
Subject: [PATCH 1386/3804] arm64: dts: freescale: sl28: var1: fix RGMII clock
 and voltage

During hardware validation it was noticed that the clock isn't
continuously enabled when there is no link. This is because the 125MHz
clock is derived from the internal PLL which seems to go into some kind
of power-down mode every once in a while. The LS1028A expects a contiuous
clock. Thus enable the PLL all the time.

Also, the RGMII pad voltage is wrong, it was configured to 2.5V (that is
the VDDH regulator). The correct voltage is 1.8V, i.e. the VDDIO
regulator.

This fix is for the freescale/fsl-ls1028a-kontron-sl28-var1.dts.

Fixes: 642856097c18 ("arm64: dts: freescale: sl28: add variant 1")
Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 .../arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts
index 6c309b97587df..e8d31279b7a34 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28-var1.dts
@@ -46,7 +46,8 @@
 			eee-broken-100tx;
 			qca,clk-out-frequency = <125000000>;
 			qca,clk-out-strength = <AR803X_STRENGTH_FULL>;
-			vddio-supply = <&vddh>;
+			qca,keep-pll-enabled;
+			vddio-supply = <&vddio>;
 
 			vddio: vddio-regulator {
 				regulator-name = "VDDIO";
-- 
GitLab


From 7c8f0338cdacc90fdf6468adafa8e27952987f00 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Thu, 20 May 2021 18:42:12 -0300
Subject: [PATCH 1387/3804] ARM: dts: imx7d-meerkat96: Fix the 'tuning-step'
 property

According to Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml, the
correct name of the property is 'fsl,tuning-step'.

Fix it accordingly.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Fixes: ae7b3384b61b ("ARM: dts: Add support for 96Boards Meerkat96 board")
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx7d-meerkat96.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx7d-meerkat96.dts b/arch/arm/boot/dts/imx7d-meerkat96.dts
index 5339210b63d0f..dd8003bd1fc09 100644
--- a/arch/arm/boot/dts/imx7d-meerkat96.dts
+++ b/arch/arm/boot/dts/imx7d-meerkat96.dts
@@ -193,7 +193,7 @@
 	pinctrl-names = "default";
 	pinctrl-0 = <&pinctrl_usdhc1>;
 	keep-power-in-suspend;
-	tuning-step = <2>;
+	fsl,tuning-step = <2>;
 	vmmc-supply = <&reg_3p3v>;
 	no-1-8-v;
 	broken-cd;
-- 
GitLab


From 0e2fa4959c4f44815ce33e46e4054eeb0f346053 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Thu, 20 May 2021 18:42:13 -0300
Subject: [PATCH 1388/3804] ARM: dts: imx7d-pico: Fix the 'tuning-step'
 property

According to Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.yaml, the
correct name of the property is 'fsl,tuning-step'.

Fix it accordingly.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Fixes: f13f571ac8a1 ("ARM: dts: imx7d-pico: Extend peripherals support")
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx7d-pico.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi
index e57da0d32b98d..e519897fae082 100644
--- a/arch/arm/boot/dts/imx7d-pico.dtsi
+++ b/arch/arm/boot/dts/imx7d-pico.dtsi
@@ -351,7 +351,7 @@
 	pinctrl-2 = <&pinctrl_usdhc1_200mhz>;
 	cd-gpios = <&gpio5 0 GPIO_ACTIVE_LOW>;
 	bus-width = <4>;
-	tuning-step = <2>;
+	fsl,tuning-step = <2>;
 	vmmc-supply = <&reg_3p3v>;
 	wakeup-source;
 	no-1-8-v;
-- 
GitLab


From b73eb6b3b91ff7d76cff5f8c7ab92fe0c51e3829 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 21 May 2021 09:54:07 +0200
Subject: [PATCH 1389/3804] ARM: dts: imx: emcon-avari: Fix nxp,pca8574
 #gpio-cells

According to the DT bindings, #gpio-cells must be two.

Fixes: 63e71fedc07c4ece ("ARM: dts: Add support for emtrion emCON-MX6 series")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi
index 828cf3e39784a..c4e146f3341bb 100644
--- a/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-emcon-avari.dtsi
@@ -126,7 +126,7 @@
 		compatible = "nxp,pca8574";
 		reg = <0x3a>;
 		gpio-controller;
-		#gpio-cells = <1>;
+		#gpio-cells = <2>;
 	};
 };
 
-- 
GitLab


From 2b899f31f1a6db2db4608bac2ac04fe2c4ad89eb Mon Sep 17 00:00:00 2001
From: kernel test robot <lkp@intel.com>
Date: Sun, 23 May 2021 02:09:00 +0800
Subject: [PATCH 1390/3804] ALSA: usb-audio: scarlett2:
 snd_scarlett_gen2_controls_create() can be static

sound/usb/mixer_scarlett_gen2.c:2000:5: warning: symbol 'snd_scarlett_gen2_controls_create' was not declared. Should it be static?

Fixes: 265d1a90e4fb ("ALSA: usb-audio: scarlett2: Improve driver startup messages")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20210522180900.GA83915@f59a3af2f1d9
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer_scarlett_gen2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/usb/mixer_scarlett_gen2.c b/sound/usb/mixer_scarlett_gen2.c
index 3ad8f61a2095f..4caf379d5b991 100644
--- a/sound/usb/mixer_scarlett_gen2.c
+++ b/sound/usb/mixer_scarlett_gen2.c
@@ -1997,8 +1997,8 @@ static int scarlett2_mixer_status_create(struct usb_mixer_interface *mixer)
 	return usb_submit_urb(mixer->urb, GFP_KERNEL);
 }
 
-int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer,
-				      const struct scarlett2_device_info *info)
+static int snd_scarlett_gen2_controls_create(struct usb_mixer_interface *mixer,
+					     const struct scarlett2_device_info *info)
 {
 	int err;
 
-- 
GitLab


From 9c87ae1a0dbeb5794957421157fd266d38a869b4 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 6 May 2021 07:38:56 +0200
Subject: [PATCH 1391/3804] media: rc: i2c: Fix an error message

'ret' is known to be 1 here. In fact 'i' is expected instead.
Store the return value of 'i2c_master_recv()' in 'ret' so that the error
message print the correct error code.

Fixes: acaa34bf06e9 ("media: rc: implement zilog transmitter")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ir-kbd-i2c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/i2c/ir-kbd-i2c.c b/drivers/media/i2c/ir-kbd-i2c.c
index e8119ad0bc71d..92376592455ee 100644
--- a/drivers/media/i2c/ir-kbd-i2c.c
+++ b/drivers/media/i2c/ir-kbd-i2c.c
@@ -678,8 +678,8 @@ static int zilog_tx(struct rc_dev *rcdev, unsigned int *txbuf,
 		goto out_unlock;
 	}
 
-	i = i2c_master_recv(ir->tx_c, buf, 1);
-	if (i != 1) {
+	ret = i2c_master_recv(ir->tx_c, buf, 1);
+	if (ret != 1) {
 		dev_err(&ir->rc->dev, "i2c_master_recv failed with %d\n", ret);
 		ret = -EIO;
 		goto out_unlock;
-- 
GitLab


From 37d9d42f9e78bfe8ec04fd5e049111beb5f59e01 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 11 May 2021 09:22:19 +0200
Subject: [PATCH 1392/3804] media: lmedm04: delete lme2510_get_adapter_count()

The adapter count is fixed at compile time so we can delete the
lme2510_get_adapter_count() function and set ".num_adapters = 1"
instead.  There is also no need to create a zeroed adapter
element at the end of the array.  Remove that as well.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/dvb-usb-v2/lmedm04.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c
index 1b6d4e4c52ca9..fe4d886442a41 100644
--- a/drivers/media/usb/dvb-usb-v2/lmedm04.c
+++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c
@@ -1122,11 +1122,6 @@ static int lme2510_powerup(struct dvb_usb_device *d, int onoff)
 	return ret;
 }
 
-static int lme2510_get_adapter_count(struct dvb_usb_device *d)
-{
-	return 1;
-}
-
 static int lme2510_identify_state(struct dvb_usb_device *d, const char **name)
 {
 	struct lme2510_state *st = d->priv;
@@ -1211,12 +1206,12 @@ static struct dvb_usb_device_properties lme2510_props = {
 	.frontend_attach  = dm04_lme2510_frontend_attach,
 	.tuner_attach = dm04_lme2510_tuner,
 	.get_stream_config = lme2510_get_stream_config,
-	.get_adapter_count = lme2510_get_adapter_count,
 	.streaming_ctrl   = lme2510_streaming_ctrl,
 
 	.get_rc_config = lme2510_get_rc_config,
 
 	.exit = lme2510_exit,
+	.num_adapters = 1,
 	.adapter = {
 		{
 			.caps = DVB_USB_ADAP_HAS_PID_FILTER|
@@ -1227,8 +1222,6 @@ static struct dvb_usb_device_properties lme2510_props = {
 			.stream =
 			DVB_USB_STREAM_BULK(0x86, 10, 4096),
 		},
-		{
-		}
 	},
 };
 
-- 
GitLab


From c680ed46e418e9c785d76cf44eb33bfd1e8cf3f6 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 7 May 2021 14:50:43 +0200
Subject: [PATCH 1393/3804] media: dvb-usb: fix wrong definition

syzbot reported WARNING in vmalloc. The problem
was in zero size passed to vmalloc.

The root case was in wrong cxusb_bluebird_lgz201_properties
definition. adapter array has only 1 entry, but num_adapters was
2.

Call Trace:
 __vmalloc_node mm/vmalloc.c:2963 [inline]
 vmalloc+0x67/0x80 mm/vmalloc.c:2996
 dvb_dmx_init+0xe4/0xb90 drivers/media/dvb-core/dvb_demux.c:1251
 dvb_usb_adapter_dvb_init+0x564/0x860 drivers/media/usb/dvb-usb/dvb-usb-dvb.c:184
 dvb_usb_adapter_init drivers/media/usb/dvb-usb/dvb-usb-init.c:86 [inline]
 dvb_usb_init drivers/media/usb/dvb-usb/dvb-usb-init.c:184 [inline]
 dvb_usb_device_init.cold+0xc94/0x146e drivers/media/usb/dvb-usb/dvb-usb-init.c:308
 cxusb_probe+0x159/0x5e0 drivers/media/usb/dvb-usb/cxusb.c:1634

Fixes: 4d43e13f723e ("V4L/DVB (4643): Multi-input patch for DVB-USB device")
Cc: stable@vger.kernel.org
Reported-by: syzbot+7336195c02c1bd2f64e1@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/dvb-usb/cxusb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c
index 761992ad05e2a..7707de7bae7ca 100644
--- a/drivers/media/usb/dvb-usb/cxusb.c
+++ b/drivers/media/usb/dvb-usb/cxusb.c
@@ -1947,7 +1947,7 @@ static struct dvb_usb_device_properties cxusb_bluebird_lgz201_properties = {
 
 	.size_of_priv     = sizeof(struct cxusb_state),
 
-	.num_adapters = 2,
+	.num_adapters = 1,
 	.adapter = {
 		{
 		.num_frontends = 1,
-- 
GitLab


From 118f3e1562f2b15e30ed65a2718cd9ed710054b1 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:09 +0200
Subject: [PATCH 1394/3804] media: uapi: mpeg2: Rename "quantization" to
 "quantisation"

The MPEG-2 specification refers to the quantisation matrices
using the word "quantisation". Make the V4L2 interface more
ergonomic by matching the MPEG-2 spec.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/v4l/ext-ctrls-codec.rst             | 16 ++++++------
 .../media/v4l/pixfmt-compressed.rst           |  4 +--
 .../media/v4l/vidioc-queryctrl.rst            |  6 ++---
 .../media/videodev2.h.rst.exceptions          |  2 +-
 drivers/media/v4l2-core/v4l2-ctrls.c          | 12 ++++-----
 drivers/staging/media/hantro/hantro_drv.c     |  2 +-
 .../media/hantro/hantro_g1_mpeg2_dec.c        | 17 +++++-------
 drivers/staging/media/hantro/hantro_hw.h      |  2 +-
 drivers/staging/media/hantro/hantro_mpeg2.c   |  2 +-
 .../media/hantro/rk3399_vpu_hw_mpeg2_dec.c    | 14 +++++-----
 drivers/staging/media/sunxi/cedrus/cedrus.c   |  2 +-
 drivers/staging/media/sunxi/cedrus/cedrus.h   |  2 +-
 .../staging/media/sunxi/cedrus/cedrus_dec.c   |  4 +--
 .../staging/media/sunxi/cedrus/cedrus_mpeg2.c | 26 +++++++++----------
 include/media/mpeg2-ctrls.h                   |  6 ++---
 include/media/v4l2-ctrls.h                    |  4 +--
 16 files changed, 58 insertions(+), 63 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index 514b334470eab..2835ce7394781 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1755,8 +1755,8 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
 
     \normalsize
 
-``V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION (struct)``
-    Specifies quantization matrices (as extracted from the bitstream) for the
+``V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION (struct)``
+    Specifies quantisation matrices (as extracted from the bitstream) for the
     associated MPEG-2 slice data.
 
     .. note::
@@ -1764,7 +1764,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
        This compound control is not yet part of the public kernel API and
        it is expected to change.
 
-.. c:type:: v4l2_ctrl_mpeg2_quantization
+.. c:type:: v4l2_ctrl_mpeg2_quantisation
 
 .. tabularcolumns:: |p{0.8cm}|p{8.0cm}|p{8.5cm}|
 
@@ -1774,7 +1774,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
 
     \small
 
-.. flat-table:: struct v4l2_ctrl_mpeg2_quantization
+.. flat-table:: struct v4l2_ctrl_mpeg2_quantisation
     :header-rows:  0
     :stub-columns: 0
     :widths:       1 1 2
@@ -1798,24 +1798,24 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
 	YUV formats.
     * - __u8
       - ``intra_quantiser_matrix[64]``
-      - The quantization matrix coefficients for intra-coded frames, in zigzag
+      - The quantisation matrix coefficients for intra-coded frames, in zigzag
 	scanning order. It is relevant for both luma and chroma components,
 	although it can be superseded by the chroma-specific matrix for
 	non-4:2:0 YUV formats.
     * - __u8
       - ``non_intra_quantiser_matrix[64]``
-      - The quantization matrix coefficients for non-intra-coded frames, in
+      - The quantisation matrix coefficients for non-intra-coded frames, in
 	zigzag scanning order. It is relevant for both luma and chroma
 	components, although it can be superseded by the chroma-specific matrix
 	for non-4:2:0 YUV formats.
     * - __u8
       - ``chroma_intra_quantiser_matrix[64]``
-      - The quantization matrix coefficients for the chominance component of
+      - The quantisation matrix coefficients for the chominance component of
 	intra-coded frames, in zigzag scanning order. Only relevant for
 	non-4:2:0 YUV formats.
     * - __u8
       - ``chroma_non_intra_quantiser_matrix[64]``
-      - The quantization matrix coefficients for the chrominance component of
+      - The quantisation matrix coefficients for the chrominance component of
 	non-intra-coded frames, in zigzag scanning order. Only relevant for
 	non-4:2:0 YUV formats.
 
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
index 6dba70da822b8..cba607f789f08 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
@@ -115,8 +115,8 @@ Compressed Formats
 	MPEG-2 pipeline (using the :ref:`mem2mem` and :ref:`media-request-api`).
 	Metadata associated with the frame to decode is required to be passed
 	through the ``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS`` control and
-	quantization matrices can optionally be specified through the
-	``V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION`` control.
+	quantisation matrices can optionally be specified through the
+	``V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION`` control.
 	See the :ref:`associated Codec Control IDs <v4l2-mpeg-mpeg2>`.
 	Exactly one output and one capture buffer must be provided for use with
 	this pixel format. The output buffer must contain the appropriate number
diff --git a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
index 8a285daedc6a2..4362945fd39ba 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
@@ -423,12 +423,12 @@ See also the examples in :ref:`control`.
       - n/a
       - A struct :c:type:`v4l2_ctrl_mpeg2_slice_params`, containing MPEG-2
 	slice parameters for stateless video decoders.
-    * - ``V4L2_CTRL_TYPE_MPEG2_QUANTIZATION``
+    * - ``V4L2_CTRL_TYPE_MPEG2_QUANTISATION``
       - n/a
       - n/a
       - n/a
-      - A struct :c:type:`v4l2_ctrl_mpeg2_quantization`, containing MPEG-2
-	quantization matrices for stateless video decoders.
+      - A struct :c:type:`v4l2_ctrl_mpeg2_quantisation`, containing MPEG-2
+	quantisation matrices for stateless video decoders.
     * - ``V4L2_CTRL_TYPE_AREA``
       - n/a
       - n/a
diff --git a/Documentation/userspace-api/media/videodev2.h.rst.exceptions b/Documentation/userspace-api/media/videodev2.h.rst.exceptions
index f59940352faac..5b2ebaa35d246 100644
--- a/Documentation/userspace-api/media/videodev2.h.rst.exceptions
+++ b/Documentation/userspace-api/media/videodev2.h.rst.exceptions
@@ -135,7 +135,7 @@ replace symbol V4L2_CTRL_TYPE_U16 :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_U32 :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_U8 :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
-replace symbol V4L2_CTRL_TYPE_MPEG2_QUANTIZATION :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_MPEG2_QUANTISATION :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_H264_SPS :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_H264_PPS :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_H264_SCALING_MATRIX :c:type:`v4l2_ctrl_type`
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 0d7fe1bd975a3..1ed62f0ed66f0 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -966,7 +966,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:		return "Frame LTR Index";
 	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:		return "Use LTR Frames";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS:		return "MPEG-2 Slice Parameters";
-	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION:		return "MPEG-2 Quantization Matrices";
+	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION:		return "MPEG-2 Quantisation Matrices";
 	case V4L2_CID_FWHT_I_FRAME_QP:				return "FWHT I-Frame QP Value";
 	case V4L2_CID_FWHT_P_FRAME_QP:				return "FWHT P-Frame QP Value";
 
@@ -1490,8 +1490,8 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS:
 		*type = V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS;
 		break;
-	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION:
-		*type = V4L2_CTRL_TYPE_MPEG2_QUANTIZATION;
+	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION:
+		*type = V4L2_CTRL_TYPE_MPEG2_QUANTISATION;
 		break;
 	case V4L2_CID_STATELESS_FWHT_PARAMS:
 		*type = V4L2_CTRL_TYPE_FWHT_PARAMS;
@@ -1942,7 +1942,7 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 
 		break;
 
-	case V4L2_CTRL_TYPE_MPEG2_QUANTIZATION:
+	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
 		break;
 
 	case V4L2_CTRL_TYPE_FWHT_PARAMS:
@@ -2911,8 +2911,8 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
 	case V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS:
 		elem_size = sizeof(struct v4l2_ctrl_mpeg2_slice_params);
 		break;
-	case V4L2_CTRL_TYPE_MPEG2_QUANTIZATION:
-		elem_size = sizeof(struct v4l2_ctrl_mpeg2_quantization);
+	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
+		elem_size = sizeof(struct v4l2_ctrl_mpeg2_quantisation);
 		break;
 	case V4L2_CTRL_TYPE_FWHT_PARAMS:
 		elem_size = sizeof(struct v4l2_ctrl_fwht_params);
diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 074b9bb30d6d9..b7b4328d3c6d3 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -303,7 +303,7 @@ static const struct hantro_ctrl controls[] = {
 	}, {
 		.codec = HANTRO_MPEG2_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION,
+			.id = V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION,
 		},
 	}, {
 		.codec = HANTRO_VP8_DECODER,
diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
index 0fd306806f166..55d07aa7756b6 100644
--- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
@@ -82,17 +82,14 @@
 #define PICT_FRAME         3
 
 static void
-hantro_g1_mpeg2_dec_set_quantization(struct hantro_dev *vpu,
+hantro_g1_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
 				     struct hantro_ctx *ctx)
 {
-	struct v4l2_ctrl_mpeg2_quantization *quantization;
-
-	quantization = hantro_get_ctrl(ctx,
-				       V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION);
-	hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu,
-				     quantization);
-	vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma,
-			   G1_REG_QTABLE_BASE);
+	struct v4l2_ctrl_mpeg2_quantisation *q;
+
+	q = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION);
+	hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu, q);
+	vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma, G1_REG_QTABLE_BASE);
 }
 
 static void
@@ -238,7 +235,7 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 	reg = G1_REG_APF_THRESHOLD(8);
 	vdpu_write_relaxed(vpu, reg, G1_SWREG(55));
 
-	hantro_g1_mpeg2_dec_set_quantization(vpu, ctx);
+	hantro_g1_mpeg2_dec_set_quantisation(vpu, ctx);
 
 	hantro_g1_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
 					&dst_buf->vb2_buf,
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 0a42df22472e6..3d8b53567f16b 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -223,7 +223,7 @@ hantro_h264_mv_size(unsigned int width, unsigned int height)
 void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx);
 void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx);
 void hantro_mpeg2_dec_copy_qtable(u8 *qtable,
-				  const struct v4l2_ctrl_mpeg2_quantization *ctrl);
+				  const struct v4l2_ctrl_mpeg2_quantisation *ctrl);
 int hantro_mpeg2_dec_init(struct hantro_ctx *ctx);
 void hantro_mpeg2_dec_exit(struct hantro_ctx *ctx);
 
diff --git a/drivers/staging/media/hantro/hantro_mpeg2.c b/drivers/staging/media/hantro/hantro_mpeg2.c
index 53a99a9988d51..04e545eb0a830 100644
--- a/drivers/staging/media/hantro/hantro_mpeg2.c
+++ b/drivers/staging/media/hantro/hantro_mpeg2.c
@@ -19,7 +19,7 @@ static const u8 zigzag[64] = {
 };
 
 void hantro_mpeg2_dec_copy_qtable(u8 *qtable,
-				  const struct v4l2_ctrl_mpeg2_quantization *ctrl)
+				  const struct v4l2_ctrl_mpeg2_quantisation *ctrl)
 {
 	int i, n;
 
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
index f610fa5b43354..61a54549774db 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
@@ -84,16 +84,14 @@
 #define PICT_FRAME         3
 
 static void
-rk3399_vpu_mpeg2_dec_set_quantization(struct hantro_dev *vpu,
+rk3399_vpu_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
 				      struct hantro_ctx *ctx)
 {
-	struct v4l2_ctrl_mpeg2_quantization *quantization;
+	struct v4l2_ctrl_mpeg2_quantisation *q;
 
-	quantization = hantro_get_ctrl(ctx,
-				       V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION);
-	hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu, quantization);
-	vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma,
-			   VDPU_REG_QTABLE_BASE);
+	q = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION);
+	hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu, q);
+	vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma, VDPU_REG_QTABLE_BASE);
 }
 
 static void
@@ -243,7 +241,7 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 	      VDPU_REG_MV_ACCURACY_BWD(1);
 	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(136));
 
-	rk3399_vpu_mpeg2_dec_set_quantization(vpu, ctx);
+	rk3399_vpu_mpeg2_dec_set_quantisation(vpu, ctx);
 
 	rk3399_vpu_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
 					 &dst_buf->vb2_buf,
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 92812d1a39d49..62a5407664ae3 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -37,7 +37,7 @@ static const struct cedrus_control cedrus_controls[] = {
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION,
+			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION,
 		},
 		.codec		= CEDRUS_CODEC_MPEG2,
 	},
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
index 15f147dad4cbb..6516bff3d319f 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
@@ -69,7 +69,7 @@ struct cedrus_h264_run {
 
 struct cedrus_mpeg2_run {
 	const struct v4l2_ctrl_mpeg2_slice_params	*slice_params;
-	const struct v4l2_ctrl_mpeg2_quantization	*quantization;
+	const struct v4l2_ctrl_mpeg2_quantisation	*quantisation;
 };
 
 struct cedrus_h265_run {
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index d696b3ec70c0a..238f779d2ba42 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -42,8 +42,8 @@ void cedrus_device_run(void *priv)
 	case V4L2_PIX_FMT_MPEG2_SLICE:
 		run.mpeg2.slice_params = cedrus_find_control_data(ctx,
 			V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS);
-		run.mpeg2.quantization = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION);
+		run.mpeg2.quantisation = cedrus_find_control_data(ctx,
+			V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION);
 		break;
 
 	case V4L2_PIX_FMT_H264_SLICE:
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
index 8bcd6b8f9e2db..459f71679a4f7 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
@@ -13,9 +13,9 @@
 #include "cedrus_hw.h"
 #include "cedrus_regs.h"
 
-/* Default MPEG-2 quantization coefficients, from the specification. */
+/* Default MPEG-2 quantisation coefficients, from the specification. */
 
-static const u8 intra_quantization_matrix_default[64] = {
+static const u8 intra_quantisation_matrix_default[64] = {
 	8,  16, 16, 19, 16, 19, 22, 22,
 	22, 22, 22, 22, 26, 24, 26, 27,
 	27, 27, 26, 26, 26, 26, 27, 27,
@@ -26,7 +26,7 @@ static const u8 intra_quantization_matrix_default[64] = {
 	46, 46, 56, 56, 58, 69, 69, 83
 };
 
-static const u8 non_intra_quantization_matrix_default[64] = {
+static const u8 non_intra_quantisation_matrix_default[64] = {
 	16, 16, 16, 16, 16, 16, 16, 16,
 	16, 16, 16, 16, 16, 16, 16, 16,
 	16, 16, 16, 16, 16, 16, 16, 16,
@@ -77,7 +77,7 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
 	const struct v4l2_mpeg2_sequence *sequence;
 	const struct v4l2_mpeg2_picture *picture;
-	const struct v4l2_ctrl_mpeg2_quantization *quantization;
+	const struct v4l2_ctrl_mpeg2_quantisation *quantisation;
 	dma_addr_t src_buf_addr, dst_luma_addr, dst_chroma_addr;
 	dma_addr_t fwd_luma_addr, fwd_chroma_addr;
 	dma_addr_t bwd_luma_addr, bwd_chroma_addr;
@@ -93,17 +93,17 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 	sequence = &slice_params->sequence;
 	picture = &slice_params->picture;
 
-	quantization = run->mpeg2.quantization;
+	quantisation = run->mpeg2.quantisation;
 
 	/* Activate MPEG engine. */
 	cedrus_engine_enable(ctx, CEDRUS_CODEC_MPEG2);
 
-	/* Set intra quantization matrix. */
+	/* Set intra quantisation matrix. */
 
-	if (quantization && quantization->load_intra_quantiser_matrix)
-		matrix = quantization->intra_quantiser_matrix;
+	if (quantisation && quantisation->load_intra_quantiser_matrix)
+		matrix = quantisation->intra_quantiser_matrix;
 	else
-		matrix = intra_quantization_matrix_default;
+		matrix = intra_quantisation_matrix_default;
 
 	for (i = 0; i < 64; i++) {
 		reg = VE_DEC_MPEG_IQMINPUT_WEIGHT(i, matrix[i]);
@@ -112,12 +112,12 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 		cedrus_write(dev, VE_DEC_MPEG_IQMINPUT, reg);
 	}
 
-	/* Set non-intra quantization matrix. */
+	/* Set non-intra quantisation matrix. */
 
-	if (quantization && quantization->load_non_intra_quantiser_matrix)
-		matrix = quantization->non_intra_quantiser_matrix;
+	if (quantisation && quantisation->load_non_intra_quantiser_matrix)
+		matrix = quantisation->non_intra_quantiser_matrix;
 	else
-		matrix = non_intra_quantization_matrix_default;
+		matrix = non_intra_quantisation_matrix_default;
 
 	for (i = 0; i < 64; i++) {
 		reg = VE_DEC_MPEG_IQMINPUT_WEIGHT(i, matrix[i]);
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
index 2a4ae6701166e..b8adf3ac2c1d7 100644
--- a/include/media/mpeg2-ctrls.h
+++ b/include/media/mpeg2-ctrls.h
@@ -12,11 +12,11 @@
 #define _MPEG2_CTRLS_H_
 
 #define V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS		(V4L2_CID_CODEC_BASE+250)
-#define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION		(V4L2_CID_CODEC_BASE+251)
+#define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION		(V4L2_CID_CODEC_BASE+251)
 
 /* enum v4l2_ctrl_type type values */
 #define V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS 0x0103
-#define	V4L2_CTRL_TYPE_MPEG2_QUANTIZATION 0x0104
+#define	V4L2_CTRL_TYPE_MPEG2_QUANTISATION 0x0104
 
 #define V4L2_MPEG2_PICTURE_CODING_TYPE_I	1
 #define V4L2_MPEG2_PICTURE_CODING_TYPE_P	2
@@ -66,7 +66,7 @@ struct v4l2_ctrl_mpeg2_slice_params {
 	__u32	quantiser_scale_code;
 };
 
-struct v4l2_ctrl_mpeg2_quantization {
+struct v4l2_ctrl_mpeg2_quantisation {
 	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Quant matrix extension */
 	__u8	load_intra_quantiser_matrix;
 	__u8	load_non_intra_quantiser_matrix;
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index a5953b812878f..a38e6bd02a6a1 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -41,7 +41,7 @@ struct video_device;
  * @p_u32:			Pointer to a 32-bit unsigned value.
  * @p_char:			Pointer to a string.
  * @p_mpeg2_slice_params:	Pointer to a MPEG2 slice parameters structure.
- * @p_mpeg2_quantization:	Pointer to a MPEG2 quantization data structure.
+ * @p_mpeg2_quantisation:	Pointer to a MPEG2 quantisation data structure.
  * @p_fwht_params:		Pointer to a FWHT stateless parameters structure.
  * @p_h264_sps:			Pointer to a struct v4l2_ctrl_h264_sps.
  * @p_h264_pps:			Pointer to a struct v4l2_ctrl_h264_pps.
@@ -67,7 +67,7 @@ union v4l2_ctrl_ptr {
 	u32 *p_u32;
 	char *p_char;
 	struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
-	struct v4l2_ctrl_mpeg2_quantization *p_mpeg2_quantization;
+	struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quantisation;
 	struct v4l2_ctrl_fwht_params *p_fwht_params;
 	struct v4l2_ctrl_h264_sps *p_h264_sps;
 	struct v4l2_ctrl_h264_pps *p_h264_pps;
-- 
GitLab


From 81bbb65f19819440b42270e1f033d9b14279540c Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:10 +0200
Subject: [PATCH 1395/3804] media: uapi: mpeg2: rework quantisation matrices
 semantics

As stated in the MPEG-2 specification, section 6.3.7 "Quant matrix
extension":

  Each quantisation matrix has a default set of values. When a
  sequence_header_code is decoded all matrices shall be reset to
  their default values. User defined matrices may be downloaded
  and this can occur in a sequence_header() or in a
  quant_matrix_extension().

The load_intra_quantiser_matrix syntax elements are transmitted
in the bitstream headers, signalling that a quantisation matrix
needs to be loaded and used for pictures transmitted afterwards
(until the matrices are reset).

This "load" semantics are implemented in the V4L2 interface
without the need of any "load" flags: passing the control
is effectively a load.

Therefore, rework the V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION
semantics to match the MPEG-2 semantics. Quantisation matrices
values are now initialized by the V4L2 control core to their
reset default value, and applications are expected to reset
their values as specified.

The quantisation control is therefore optional, and used to
load bitstream-defined values in the quantisation matrices.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Co-developed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/v4l/ext-ctrls-codec.rst             | 17 ---------
 drivers/media/v4l2-core/v4l2-ctrls.c          | 26 +++++++++++++
 .../staging/media/sunxi/cedrus/cedrus_mpeg2.c | 38 +------------------
 include/media/mpeg2-ctrls.h                   |  5 ---
 4 files changed, 28 insertions(+), 58 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index 2835ce7394781..bfbc5fda9f9bb 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1779,23 +1779,6 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     :stub-columns: 0
     :widths:       1 1 2
 
-    * - __u8
-      - ``load_intra_quantiser_matrix``
-      - One bit to indicate whether to load the ``intra_quantiser_matrix`` data.
-    * - __u8
-      - ``load_non_intra_quantiser_matrix``
-      - One bit to indicate whether to load the ``non_intra_quantiser_matrix``
-	data.
-    * - __u8
-      - ``load_chroma_intra_quantiser_matrix``
-      - One bit to indicate whether to load the
-	``chroma_intra_quantiser_matrix`` data, only relevant for non-4:2:0 YUV
-	formats.
-    * - __u8
-      - ``load_chroma_non_intra_quantiser_matrix``
-      - One bit to indicate whether to load the
-	``chroma_non_intra_quantiser_matrix`` data, only relevant for non-4:2:0
-	YUV formats.
     * - __u8
       - ``intra_quantiser_matrix[64]``
       - The quantisation matrix coefficients for intra-coded frames, in zigzag
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 1ed62f0ed66f0..41955ea9230d6 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -57,6 +57,18 @@ static bool is_new_manual(const struct v4l2_ctrl *master)
 	return master->is_auto && master->val == master->manual_mode_value;
 }
 
+/* Default intra MPEG-2 quantisation coefficients, from the specification. */
+static const u8 mpeg2_intra_quant_matrix[64] = {
+	8,  16, 16, 19, 16, 19, 22, 22,
+	22, 22, 22, 22, 26, 24, 26, 27,
+	27, 27, 26, 26, 26, 26, 27, 27,
+	27, 29, 29, 29, 34, 34, 34, 29,
+	29, 29, 27, 27, 29, 29, 32, 32,
+	34, 34, 37, 38, 37, 35, 35, 34,
+	35, 38, 38, 40, 40, 40, 48, 48,
+	46, 46, 56, 56, 58, 69, 69, 83
+};
+
 /* Returns NULL or a character pointer array containing the menu for
    the given control ID. The pointer array ends with a NULL pointer.
    An empty string signifies a menu entry that is invalid. This allows
@@ -1692,6 +1704,7 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 			      union v4l2_ctrl_ptr ptr)
 {
 	struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
+	struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quant;
 	struct v4l2_ctrl_vp8_frame *p_vp8_frame;
 	struct v4l2_ctrl_fwht_params *p_fwht_params;
 	void *p = ptr.p + idx * ctrl->elem_size;
@@ -1716,6 +1729,19 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 		p_mpeg2_slice_params->picture.picture_coding_type =
 					V4L2_MPEG2_PICTURE_CODING_TYPE_I;
 		break;
+	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
+		p_mpeg2_quant = p;
+
+		memcpy(p_mpeg2_quant->intra_quantiser_matrix,
+		       mpeg2_intra_quant_matrix,
+		       ARRAY_SIZE(mpeg2_intra_quant_matrix));
+		/*
+		 * The default non-intra MPEG-2 quantisation
+		 * coefficients are all 16, as per the specification.
+		 */
+		memset(p_mpeg2_quant->non_intra_quantiser_matrix, 16,
+		       sizeof(p_mpeg2_quant->non_intra_quantiser_matrix));
+		break;
 	case V4L2_CTRL_TYPE_VP8_FRAME:
 		p_vp8_frame = p;
 		p_vp8_frame->num_dct_parts = 1;
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
index 459f71679a4f7..e3154f6318585 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
@@ -13,30 +13,6 @@
 #include "cedrus_hw.h"
 #include "cedrus_regs.h"
 
-/* Default MPEG-2 quantisation coefficients, from the specification. */
-
-static const u8 intra_quantisation_matrix_default[64] = {
-	8,  16, 16, 19, 16, 19, 22, 22,
-	22, 22, 22, 22, 26, 24, 26, 27,
-	27, 27, 26, 26, 26, 26, 27, 27,
-	27, 29, 29, 29, 34, 34, 34, 29,
-	29, 29, 27, 27, 29, 29, 32, 32,
-	34, 34, 37, 38, 37, 35, 35, 34,
-	35, 38, 38, 40, 40, 40, 48, 48,
-	46, 46, 56, 56, 58, 69, 69, 83
-};
-
-static const u8 non_intra_quantisation_matrix_default[64] = {
-	16, 16, 16, 16, 16, 16, 16, 16,
-	16, 16, 16, 16, 16, 16, 16, 16,
-	16, 16, 16, 16, 16, 16, 16, 16,
-	16, 16, 16, 16, 16, 16, 16, 16,
-	16, 16, 16, 16, 16, 16, 16, 16,
-	16, 16, 16, 16, 16, 16, 16, 16,
-	16, 16, 16, 16, 16, 16, 16, 16,
-	16, 16, 16, 16, 16, 16, 16, 16
-};
-
 static enum cedrus_irq_status cedrus_mpeg2_irq_status(struct cedrus_ctx *ctx)
 {
 	struct cedrus_dev *dev = ctx->dev;
@@ -99,12 +75,7 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 	cedrus_engine_enable(ctx, CEDRUS_CODEC_MPEG2);
 
 	/* Set intra quantisation matrix. */
-
-	if (quantisation && quantisation->load_intra_quantiser_matrix)
-		matrix = quantisation->intra_quantiser_matrix;
-	else
-		matrix = intra_quantisation_matrix_default;
-
+	matrix = quantisation->intra_quantiser_matrix;
 	for (i = 0; i < 64; i++) {
 		reg = VE_DEC_MPEG_IQMINPUT_WEIGHT(i, matrix[i]);
 		reg |= VE_DEC_MPEG_IQMINPUT_FLAG_INTRA;
@@ -113,12 +84,7 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 	}
 
 	/* Set non-intra quantisation matrix. */
-
-	if (quantisation && quantisation->load_non_intra_quantiser_matrix)
-		matrix = quantisation->non_intra_quantiser_matrix;
-	else
-		matrix = non_intra_quantisation_matrix_default;
-
+	matrix = quantisation->non_intra_quantiser_matrix;
 	for (i = 0; i < 64; i++) {
 		reg = VE_DEC_MPEG_IQMINPUT_WEIGHT(i, matrix[i]);
 		reg |= VE_DEC_MPEG_IQMINPUT_FLAG_NON_INTRA;
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
index b8adf3ac2c1d7..8ea2c7f3a172c 100644
--- a/include/media/mpeg2-ctrls.h
+++ b/include/media/mpeg2-ctrls.h
@@ -68,11 +68,6 @@ struct v4l2_ctrl_mpeg2_slice_params {
 
 struct v4l2_ctrl_mpeg2_quantisation {
 	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Quant matrix extension */
-	__u8	load_intra_quantiser_matrix;
-	__u8	load_non_intra_quantiser_matrix;
-	__u8	load_chroma_intra_quantiser_matrix;
-	__u8	load_chroma_non_intra_quantiser_matrix;
-
 	__u8	intra_quantiser_matrix[64];
 	__u8	non_intra_quantiser_matrix[64];
 	__u8	chroma_intra_quantiser_matrix[64];
-- 
GitLab


From 88e78409a83a579fde7f150be7ebeefab0e1f774 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:11 +0200
Subject: [PATCH 1396/3804] media: uapi: mpeg2: Cleanup flags

Our current MPEG-2 uAPI uses 1-byte fields for MPEG-2
boolean syntax elements. Clean these by adding a 'flags'
field and flag macro for each boolean syntax element.

A follow-up change will refactor this uAPI so we don't need
to add padding fields just yet.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jonas Karlman <jonas@kwiboo.se>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/v4l/ext-ctrls-codec.rst             | 77 +++++++++++++------
 drivers/media/v4l2-core/v4l2-ctrls.c          | 14 ++--
 .../media/hantro/hantro_g1_mpeg2_dec.c        | 76 +++++++++---------
 .../media/hantro/rk3399_vpu_hw_mpeg2_dec.c    | 76 +++++++++---------
 .../staging/media/sunxi/cedrus/cedrus_mpeg2.c | 38 ++++-----
 include/media/mpeg2-ctrls.h                   | 36 +++++----
 6 files changed, 175 insertions(+), 142 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index bfbc5fda9f9bb..aa11346e7e273 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1687,13 +1687,28 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
       - ``profile_and_level_indication``
       - The current profile and level indication as extracted from the
 	bitstream.
-    * - __u8
-      - ``progressive_sequence``
-      - Indication that all the frames for the sequence are progressive instead
-	of interlaced.
     * - __u8
       - ``chroma_format``
       - The chrominance sub-sampling format (1: 4:2:0, 2: 4:2:2, 3: 4:4:4).
+    * - __u32
+      - ``flags``
+      - See :ref:`MPEG-2 Sequence Flags <mpeg2_sequence_flags>`.
+
+.. _mpeg2_sequence_flags:
+
+``MPEG-2 Sequence Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE``
+      - 0x00000001
+      - Indication that all the frames for the sequence are progressive instead
+	of interlaced.
 
 .. c:type:: v4l2_mpeg2_picture
 
@@ -1726,29 +1741,45 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
       - ``picture_structure``
       - Picture structure (1: interlaced top field, 2: interlaced bottom field,
 	3: progressive frame).
-    * - __u8
-      - ``top_field_first``
-      - If set to 1 and interlaced stream, top field is output first.
-    * - __u8
-      - ``frame_pred_frame_dct``
-      - If set to 1, only frame-DCT and frame prediction are used.
-    * - __u8
-      - ``concealment_motion_vectors``
-      -  If set to 1, motion vectors are coded for intra macroblocks.
-    * - __u8
-      - ``q_scale_type``
+    * - __u32
+      - ``flags``
+      - See :ref:`MPEG-2 Picture Flags <mpeg2_picture_flags>`.
+
+
+.. _mpeg2_picture_flags:
+
+``MPEG-2 Picture Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST``
+      - 0x00000001
+      - If set and it's an interlaced stream, top field is output first.
+    * - ``V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT``
+      - 0x00000002
+      - If set only frame-DCT and frame prediction are used.
+    * - ``V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV``
+      - 0x00000004
+      -  If set motion vectors are coded for intra macroblocks.
+    * - ``V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE``
+      - 0x00000008
       - This flag affects the inverse quantization process.
-    * - __u8
-      - ``intra_vlc_format``
+    * - ``V4L2_MPEG2_PIC_FLAG_INTRA_VLC``
+      - 0x00000010
       - This flag affects the decoding of transform coefficient data.
-    * - __u8
-      - ``alternate_scan``
+    * - ``V4L2_MPEG2_PIC_FLAG_ALT_SCAN``
+      - 0x00000020
       - This flag affects the decoding of transform coefficient data.
-    * - __u8
-      - ``repeat_first_field``
+    * - ``V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST``
+      - 0x00000040
       - This flag affects the decoding process of progressive frames.
-    * - __u16
-      - ``progressive_frame``
+    * - ``V4L2_MPEG2_PIC_FLAG_PROGRESSIVE``
+      - 0x00000080
       - Indicates whether the current frame is progressive.
 
 .. raw:: latex
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 41955ea9230d6..37531302dd3a8 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -1727,7 +1727,7 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 		/* interlaced top field */
 		p_mpeg2_slice_params->picture.picture_structure = 1;
 		p_mpeg2_slice_params->picture.picture_coding_type =
-					V4L2_MPEG2_PICTURE_CODING_TYPE_I;
+					V4L2_MPEG2_PIC_CODING_TYPE_I;
 		break;
 	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
 		p_mpeg2_quant = p;
@@ -1949,18 +1949,18 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 		}
 
 		switch (p_mpeg2_slice_params->picture.picture_structure) {
-		case 1: /* interlaced top field */
-		case 2: /* interlaced bottom field */
-		case 3: /* progressive */
+		case V4L2_MPEG2_PIC_TOP_FIELD:
+		case V4L2_MPEG2_PIC_BOTTOM_FIELD:
+		case V4L2_MPEG2_PIC_FRAME:
 			break;
 		default:
 			return -EINVAL;
 		}
 
 		switch (p_mpeg2_slice_params->picture.picture_coding_type) {
-		case V4L2_MPEG2_PICTURE_CODING_TYPE_I:
-		case V4L2_MPEG2_PICTURE_CODING_TYPE_P:
-		case V4L2_MPEG2_PICTURE_CODING_TYPE_B:
+		case V4L2_MPEG2_PIC_CODING_TYPE_I:
+		case V4L2_MPEG2_PIC_CODING_TYPE_P:
+		case V4L2_MPEG2_PIC_CODING_TYPE_B:
 			break;
 		default:
 			return -EINVAL;
diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
index 55d07aa7756b6..925341891b7fa 100644
--- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
@@ -77,10 +77,6 @@
 
 #define G1_REG_APF_THRESHOLD(v)		(((v) << 0) & GENMASK(13, 0))
 
-#define PICT_TOP_FIELD     1
-#define PICT_BOTTOM_FIELD  2
-#define PICT_FRAME         3
-
 static void
 hantro_g1_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
 				     struct hantro_ctx *ctx)
@@ -96,19 +92,19 @@ static void
 hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx,
 				struct vb2_buffer *src_buf,
 				struct vb2_buffer *dst_buf,
-				const struct v4l2_mpeg2_sequence *sequence,
-				const struct v4l2_mpeg2_picture *picture,
+				const struct v4l2_mpeg2_sequence *seq,
+				const struct v4l2_mpeg2_picture *pic,
 				const struct v4l2_ctrl_mpeg2_slice_params *slice_params)
 {
 	dma_addr_t forward_addr = 0, backward_addr = 0;
 	dma_addr_t current_addr, addr;
 
-	switch (picture->picture_coding_type) {
-	case V4L2_MPEG2_PICTURE_CODING_TYPE_B:
+	switch (pic->picture_coding_type) {
+	case V4L2_MPEG2_PIC_CODING_TYPE_B:
 		backward_addr = hantro_get_ref(ctx,
 					       slice_params->backward_ref_ts);
 		fallthrough;
-	case V4L2_MPEG2_PICTURE_CODING_TYPE_P:
+	case V4L2_MPEG2_PIC_CODING_TYPE_P:
 		forward_addr = hantro_get_ref(ctx,
 					      slice_params->forward_ref_ts);
 	}
@@ -121,7 +117,7 @@ hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx,
 	addr = hantro_get_dec_buf_addr(ctx, dst_buf);
 	current_addr = addr;
 
-	if (picture->picture_structure == PICT_BOTTOM_FIELD)
+	if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD)
 		addr += ALIGN(ctx->dst_fmt.width, 16);
 	vdpu_write_relaxed(vpu, addr, G1_REG_DEC_OUT_BASE);
 
@@ -131,18 +127,18 @@ hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx,
 		backward_addr = current_addr;
 
 	/* Set forward ref frame (top/bottom field) */
-	if (picture->picture_structure == PICT_FRAME ||
-	    picture->picture_coding_type == V4L2_MPEG2_PICTURE_CODING_TYPE_B ||
-	    (picture->picture_structure == PICT_TOP_FIELD &&
-	     picture->top_field_first) ||
-	    (picture->picture_structure == PICT_BOTTOM_FIELD &&
-	     !picture->top_field_first)) {
+	if (pic->picture_structure == V4L2_MPEG2_PIC_FRAME ||
+	    pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B ||
+	    (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD &&
+	     pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST) ||
+	    (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD &&
+	     !(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST))) {
 		vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER0_BASE);
 		vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER1_BASE);
-	} else if (picture->picture_structure == PICT_TOP_FIELD) {
+	} else if (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) {
 		vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER0_BASE);
 		vdpu_write_relaxed(vpu, current_addr, G1_REG_REFER1_BASE);
-	} else if (picture->picture_structure == PICT_BOTTOM_FIELD) {
+	} else if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD) {
 		vdpu_write_relaxed(vpu, current_addr, G1_REG_REFER0_BASE);
 		vdpu_write_relaxed(vpu, forward_addr, G1_REG_REFER1_BASE);
 	}
@@ -157,8 +153,8 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
 	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
-	const struct v4l2_mpeg2_sequence *sequence;
-	const struct v4l2_mpeg2_picture *picture;
+	const struct v4l2_mpeg2_sequence *seq;
+	const struct v4l2_mpeg2_picture *pic;
 	u32 reg;
 
 	src_buf = hantro_get_src_buf(ctx);
@@ -169,8 +165,8 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	slice_params = hantro_get_ctrl(ctx,
 				       V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS);
-	sequence = &slice_params->sequence;
-	picture = &slice_params->picture;
+	seq = &slice_params->sequence;
+	pic = &slice_params->picture;
 
 	reg = G1_REG_DEC_AXI_RD_ID(0) |
 	      G1_REG_DEC_TIMEOUT_E(1) |
@@ -190,11 +186,11 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	reg = G1_REG_DEC_MODE(5) |
 	      G1_REG_RLC_MODE_E(0) |
-	      G1_REG_PIC_INTERLACE_E(!sequence->progressive_sequence) |
-	      G1_REG_PIC_FIELDMODE_E(picture->picture_structure != PICT_FRAME) |
-	      G1_REG_PIC_B_E(picture->picture_coding_type == V4L2_MPEG2_PICTURE_CODING_TYPE_B) |
-	      G1_REG_PIC_INTER_E(picture->picture_coding_type != V4L2_MPEG2_PICTURE_CODING_TYPE_I) |
-	      G1_REG_PIC_TOPFIELD_E(picture->picture_structure == PICT_TOP_FIELD) |
+	      G1_REG_PIC_INTERLACE_E(!(seq->flags & V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE)) |
+	      G1_REG_PIC_FIELDMODE_E(pic->picture_structure != V4L2_MPEG2_PIC_FRAME) |
+	      G1_REG_PIC_B_E(pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B) |
+	      G1_REG_PIC_INTER_E(pic->picture_coding_type != V4L2_MPEG2_PIC_CODING_TYPE_I) |
+	      G1_REG_PIC_TOPFIELD_E(pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) |
 	      G1_REG_FWD_INTERLACE_E(0) |
 	      G1_REG_FILTERING_DIS(1) |
 	      G1_REG_WRITE_MVS_E(0) |
@@ -203,27 +199,27 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	reg = G1_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->dst_fmt.width)) |
 	      G1_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->dst_fmt.height)) |
-	      G1_REG_ALT_SCAN_E(picture->alternate_scan) |
-	      G1_REG_TOPFIELDFIRST_E(picture->top_field_first);
+	      G1_REG_ALT_SCAN_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) |
+	      G1_REG_TOPFIELDFIRST_E(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST);
 	vdpu_write_relaxed(vpu, reg, G1_SWREG(4));
 
 	reg = G1_REG_STRM_START_BIT(slice_params->data_bit_offset) |
-	      G1_REG_QSCALE_TYPE(picture->q_scale_type) |
-	      G1_REG_CON_MV_E(picture->concealment_motion_vectors) |
-	      G1_REG_INTRA_DC_PREC(picture->intra_dc_precision) |
-	      G1_REG_INTRA_VLC_TAB(picture->intra_vlc_format) |
-	      G1_REG_FRAME_PRED_DCT(picture->frame_pred_frame_dct);
+	      G1_REG_QSCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE) |
+	      G1_REG_CON_MV_E(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV) |
+	      G1_REG_INTRA_DC_PREC(pic->intra_dc_precision) |
+	      G1_REG_INTRA_VLC_TAB(pic->flags & V4L2_MPEG2_PIC_FLAG_INTRA_VLC) |
+	      G1_REG_FRAME_PRED_DCT(pic->flags & V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT);
 	vdpu_write_relaxed(vpu, reg, G1_SWREG(5));
 
 	reg = G1_REG_INIT_QP(1) |
 	      G1_REG_STREAM_LEN(slice_params->bit_size >> 3);
 	vdpu_write_relaxed(vpu, reg, G1_SWREG(6));
 
-	reg = G1_REG_ALT_SCAN_FLAG_E(picture->alternate_scan) |
-	      G1_REG_FCODE_FWD_HOR(picture->f_code[0][0]) |
-	      G1_REG_FCODE_FWD_VER(picture->f_code[0][1]) |
-	      G1_REG_FCODE_BWD_HOR(picture->f_code[1][0]) |
-	      G1_REG_FCODE_BWD_VER(picture->f_code[1][1]) |
+	reg = G1_REG_ALT_SCAN_FLAG_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) |
+	      G1_REG_FCODE_FWD_HOR(pic->f_code[0][0]) |
+	      G1_REG_FCODE_FWD_VER(pic->f_code[0][1]) |
+	      G1_REG_FCODE_BWD_HOR(pic->f_code[1][0]) |
+	      G1_REG_FCODE_BWD_VER(pic->f_code[1][1]) |
 	      G1_REG_MV_ACCURACY_FWD(1) |
 	      G1_REG_MV_ACCURACY_BWD(1);
 	vdpu_write_relaxed(vpu, reg, G1_SWREG(18));
@@ -239,7 +235,7 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	hantro_g1_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
 					&dst_buf->vb2_buf,
-					sequence, picture, slice_params);
+					seq, pic, slice_params);
 
 	hantro_end_prepare_run(ctx);
 
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
index 61a54549774db..ff54398f6643c 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
@@ -79,10 +79,6 @@
 #define VDPU_REG_MV_ACCURACY_FWD(v)	((v) ? BIT(2) : 0)
 #define VDPU_REG_MV_ACCURACY_BWD(v)	((v) ? BIT(1) : 0)
 
-#define PICT_TOP_FIELD     1
-#define PICT_BOTTOM_FIELD  2
-#define PICT_FRAME         3
-
 static void
 rk3399_vpu_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
 				      struct hantro_ctx *ctx)
@@ -99,19 +95,19 @@ rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
 				 struct hantro_ctx *ctx,
 				 struct vb2_buffer *src_buf,
 				 struct vb2_buffer *dst_buf,
-				 const struct v4l2_mpeg2_sequence *sequence,
-				 const struct v4l2_mpeg2_picture *picture,
+				 const struct v4l2_mpeg2_sequence *seq,
+				 const struct v4l2_mpeg2_picture *pic,
 				 const struct v4l2_ctrl_mpeg2_slice_params *slice_params)
 {
 	dma_addr_t forward_addr = 0, backward_addr = 0;
 	dma_addr_t current_addr, addr;
 
-	switch (picture->picture_coding_type) {
-	case V4L2_MPEG2_PICTURE_CODING_TYPE_B:
+	switch (pic->picture_coding_type) {
+	case V4L2_MPEG2_PIC_CODING_TYPE_B:
 		backward_addr = hantro_get_ref(ctx,
 					       slice_params->backward_ref_ts);
 		fallthrough;
-	case V4L2_MPEG2_PICTURE_CODING_TYPE_P:
+	case V4L2_MPEG2_PIC_CODING_TYPE_P:
 		forward_addr = hantro_get_ref(ctx,
 					      slice_params->forward_ref_ts);
 	}
@@ -124,7 +120,7 @@ rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
 	addr = vb2_dma_contig_plane_dma_addr(dst_buf, 0);
 	current_addr = addr;
 
-	if (picture->picture_structure == PICT_BOTTOM_FIELD)
+	if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD)
 		addr += ALIGN(ctx->dst_fmt.width, 16);
 	vdpu_write_relaxed(vpu, addr, VDPU_REG_DEC_OUT_BASE);
 
@@ -134,18 +130,18 @@ rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
 		backward_addr = current_addr;
 
 	/* Set forward ref frame (top/bottom field) */
-	if (picture->picture_structure == PICT_FRAME ||
-	    picture->picture_coding_type == V4L2_MPEG2_PICTURE_CODING_TYPE_B ||
-	    (picture->picture_structure == PICT_TOP_FIELD &&
-	     picture->top_field_first) ||
-	    (picture->picture_structure == PICT_BOTTOM_FIELD &&
-	     !picture->top_field_first)) {
+	if (pic->picture_structure == V4L2_MPEG2_PIC_FRAME ||
+	    pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B ||
+	    (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD &&
+	     pic->flags & V4L2_MPEG2_PIC_TOP_FIELD) ||
+	    (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD &&
+	     !(pic->flags & V4L2_MPEG2_PIC_TOP_FIELD))) {
 		vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER0_BASE);
 		vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER1_BASE);
-	} else if (picture->picture_structure == PICT_TOP_FIELD) {
+	} else if (pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) {
 		vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER0_BASE);
 		vdpu_write_relaxed(vpu, current_addr, VDPU_REG_REFER1_BASE);
-	} else if (picture->picture_structure == PICT_BOTTOM_FIELD) {
+	} else if (pic->picture_structure == V4L2_MPEG2_PIC_BOTTOM_FIELD) {
 		vdpu_write_relaxed(vpu, current_addr, VDPU_REG_REFER0_BASE);
 		vdpu_write_relaxed(vpu, forward_addr, VDPU_REG_REFER1_BASE);
 	}
@@ -160,8 +156,8 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
 	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
-	const struct v4l2_mpeg2_sequence *sequence;
-	const struct v4l2_mpeg2_picture *picture;
+	const struct v4l2_mpeg2_sequence *seq;
+	const struct v4l2_mpeg2_picture *pic;
 	u32 reg;
 
 	src_buf = hantro_get_src_buf(ctx);
@@ -171,8 +167,8 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	slice_params = hantro_get_ctrl(ctx,
 				       V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS);
-	sequence = &slice_params->sequence;
-	picture = &slice_params->picture;
+	seq = &slice_params->sequence;
+	pic = &slice_params->picture;
 
 	reg = VDPU_REG_DEC_ADV_PRE_DIS(0) |
 	      VDPU_REG_DEC_SCMD_DIS(0) |
@@ -207,11 +203,11 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(56));
 
 	reg = VDPU_REG_RLC_MODE_E(0) |
-	      VDPU_REG_PIC_INTERLACE_E(!sequence->progressive_sequence) |
-	      VDPU_REG_PIC_FIELDMODE_E(picture->picture_structure != PICT_FRAME) |
-	      VDPU_REG_PIC_B_E(picture->picture_coding_type == V4L2_MPEG2_PICTURE_CODING_TYPE_B) |
-	      VDPU_REG_PIC_INTER_E(picture->picture_coding_type != V4L2_MPEG2_PICTURE_CODING_TYPE_I) |
-	      VDPU_REG_PIC_TOPFIELD_E(picture->picture_structure == PICT_TOP_FIELD) |
+	      VDPU_REG_PIC_INTERLACE_E(!(seq->flags & V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE)) |
+	      VDPU_REG_PIC_FIELDMODE_E(pic->picture_structure != V4L2_MPEG2_PIC_FRAME) |
+	      VDPU_REG_PIC_B_E(pic->picture_coding_type == V4L2_MPEG2_PIC_CODING_TYPE_B) |
+	      VDPU_REG_PIC_INTER_E(pic->picture_coding_type != V4L2_MPEG2_PIC_CODING_TYPE_I) |
+	      VDPU_REG_PIC_TOPFIELD_E(pic->picture_structure == V4L2_MPEG2_PIC_TOP_FIELD) |
 	      VDPU_REG_FWD_INTERLACE_E(0) |
 	      VDPU_REG_WRITE_MVS_E(0) |
 	      VDPU_REG_DEC_TIMEOUT_E(1) |
@@ -220,23 +216,23 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	reg = VDPU_REG_PIC_MB_WIDTH(MB_WIDTH(ctx->dst_fmt.width)) |
 	      VDPU_REG_PIC_MB_HEIGHT_P(MB_HEIGHT(ctx->dst_fmt.height)) |
-	      VDPU_REG_ALT_SCAN_E(picture->alternate_scan) |
-	      VDPU_REG_TOPFIELDFIRST_E(picture->top_field_first);
+	      VDPU_REG_ALT_SCAN_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) |
+	      VDPU_REG_TOPFIELDFIRST_E(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST);
 	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(120));
 
 	reg = VDPU_REG_STRM_START_BIT(slice_params->data_bit_offset) |
-	      VDPU_REG_QSCALE_TYPE(picture->q_scale_type) |
-	      VDPU_REG_CON_MV_E(picture->concealment_motion_vectors) |
-	      VDPU_REG_INTRA_DC_PREC(picture->intra_dc_precision) |
-	      VDPU_REG_INTRA_VLC_TAB(picture->intra_vlc_format) |
-	      VDPU_REG_FRAME_PRED_DCT(picture->frame_pred_frame_dct);
+	      VDPU_REG_QSCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE) |
+	      VDPU_REG_CON_MV_E(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV) |
+	      VDPU_REG_INTRA_DC_PREC(pic->intra_dc_precision) |
+	      VDPU_REG_INTRA_VLC_TAB(pic->flags & V4L2_MPEG2_PIC_FLAG_INTRA_VLC) |
+	      VDPU_REG_FRAME_PRED_DCT(pic->flags & V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT);
 	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(122));
 
-	reg = VDPU_REG_ALT_SCAN_FLAG_E(picture->alternate_scan) |
-	      VDPU_REG_FCODE_FWD_HOR(picture->f_code[0][0]) |
-	      VDPU_REG_FCODE_FWD_VER(picture->f_code[0][1]) |
-	      VDPU_REG_FCODE_BWD_HOR(picture->f_code[1][0]) |
-	      VDPU_REG_FCODE_BWD_VER(picture->f_code[1][1]) |
+	reg = VDPU_REG_ALT_SCAN_FLAG_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) |
+	      VDPU_REG_FCODE_FWD_HOR(pic->f_code[0][0]) |
+	      VDPU_REG_FCODE_FWD_VER(pic->f_code[0][1]) |
+	      VDPU_REG_FCODE_BWD_HOR(pic->f_code[1][0]) |
+	      VDPU_REG_FCODE_BWD_VER(pic->f_code[1][1]) |
 	      VDPU_REG_MV_ACCURACY_FWD(1) |
 	      VDPU_REG_MV_ACCURACY_BWD(1);
 	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(136));
@@ -245,7 +241,7 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	rk3399_vpu_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
 					 &dst_buf->vb2_buf,
-					 sequence, picture, slice_params);
+					 seq, pic, slice_params);
 
 	/* Kick the watchdog and start decoding */
 	hantro_end_prepare_run(ctx);
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
index e3154f6318585..e39a17d28c7d9 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
@@ -51,8 +51,8 @@ static void cedrus_mpeg2_irq_disable(struct cedrus_ctx *ctx)
 static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 {
 	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
-	const struct v4l2_mpeg2_sequence *sequence;
-	const struct v4l2_mpeg2_picture *picture;
+	const struct v4l2_mpeg2_sequence *seq;
+	const struct v4l2_mpeg2_picture *pic;
 	const struct v4l2_ctrl_mpeg2_quantisation *quantisation;
 	dma_addr_t src_buf_addr, dst_luma_addr, dst_chroma_addr;
 	dma_addr_t fwd_luma_addr, fwd_chroma_addr;
@@ -66,8 +66,8 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 	u32 reg;
 
 	slice_params = run->mpeg2.slice_params;
-	sequence = &slice_params->sequence;
-	picture = &slice_params->picture;
+	seq = &slice_params->sequence;
+	pic = &slice_params->picture;
 
 	quantisation = run->mpeg2.quantisation;
 
@@ -94,19 +94,19 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 
 	/* Set MPEG picture header. */
 
-	reg = VE_DEC_MPEG_MP12HDR_SLICE_TYPE(picture->picture_coding_type);
-	reg |= VE_DEC_MPEG_MP12HDR_F_CODE(0, 0, picture->f_code[0][0]);
-	reg |= VE_DEC_MPEG_MP12HDR_F_CODE(0, 1, picture->f_code[0][1]);
-	reg |= VE_DEC_MPEG_MP12HDR_F_CODE(1, 0, picture->f_code[1][0]);
-	reg |= VE_DEC_MPEG_MP12HDR_F_CODE(1, 1, picture->f_code[1][1]);
-	reg |= VE_DEC_MPEG_MP12HDR_INTRA_DC_PRECISION(picture->intra_dc_precision);
-	reg |= VE_DEC_MPEG_MP12HDR_INTRA_PICTURE_STRUCTURE(picture->picture_structure);
-	reg |= VE_DEC_MPEG_MP12HDR_TOP_FIELD_FIRST(picture->top_field_first);
-	reg |= VE_DEC_MPEG_MP12HDR_FRAME_PRED_FRAME_DCT(picture->frame_pred_frame_dct);
-	reg |= VE_DEC_MPEG_MP12HDR_CONCEALMENT_MOTION_VECTORS(picture->concealment_motion_vectors);
-	reg |= VE_DEC_MPEG_MP12HDR_Q_SCALE_TYPE(picture->q_scale_type);
-	reg |= VE_DEC_MPEG_MP12HDR_INTRA_VLC_FORMAT(picture->intra_vlc_format);
-	reg |= VE_DEC_MPEG_MP12HDR_ALTERNATE_SCAN(picture->alternate_scan);
+	reg = VE_DEC_MPEG_MP12HDR_SLICE_TYPE(pic->picture_coding_type);
+	reg |= VE_DEC_MPEG_MP12HDR_F_CODE(0, 0, pic->f_code[0][0]);
+	reg |= VE_DEC_MPEG_MP12HDR_F_CODE(0, 1, pic->f_code[0][1]);
+	reg |= VE_DEC_MPEG_MP12HDR_F_CODE(1, 0, pic->f_code[1][0]);
+	reg |= VE_DEC_MPEG_MP12HDR_F_CODE(1, 1, pic->f_code[1][1]);
+	reg |= VE_DEC_MPEG_MP12HDR_INTRA_DC_PRECISION(pic->intra_dc_precision);
+	reg |= VE_DEC_MPEG_MP12HDR_INTRA_PICTURE_STRUCTURE(pic->picture_structure);
+	reg |= VE_DEC_MPEG_MP12HDR_TOP_FIELD_FIRST(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST);
+	reg |= VE_DEC_MPEG_MP12HDR_FRAME_PRED_FRAME_DCT(pic->flags & V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT);
+	reg |= VE_DEC_MPEG_MP12HDR_CONCEALMENT_MOTION_VECTORS(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV);
+	reg |= VE_DEC_MPEG_MP12HDR_Q_SCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE);
+	reg |= VE_DEC_MPEG_MP12HDR_INTRA_VLC_FORMAT(pic->flags & V4L2_MPEG2_PIC_FLAG_INTRA_VLC);
+	reg |= VE_DEC_MPEG_MP12HDR_ALTERNATE_SCAN(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN);
 	reg |= VE_DEC_MPEG_MP12HDR_FULL_PEL_FORWARD_VECTOR(0);
 	reg |= VE_DEC_MPEG_MP12HDR_FULL_PEL_BACKWARD_VECTOR(0);
 
@@ -114,8 +114,8 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 
 	/* Set frame dimensions. */
 
-	reg = VE_DEC_MPEG_PICCODEDSIZE_WIDTH(sequence->horizontal_size);
-	reg |= VE_DEC_MPEG_PICCODEDSIZE_HEIGHT(sequence->vertical_size);
+	reg = VE_DEC_MPEG_PICCODEDSIZE_WIDTH(seq->horizontal_size);
+	reg |= VE_DEC_MPEG_PICCODEDSIZE_HEIGHT(seq->vertical_size);
 
 	cedrus_write(dev, VE_DEC_MPEG_PICCODEDSIZE, reg);
 
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
index 8ea2c7f3a172c..d3190979d5745 100644
--- a/include/media/mpeg2-ctrls.h
+++ b/include/media/mpeg2-ctrls.h
@@ -18,10 +18,7 @@
 #define V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS 0x0103
 #define	V4L2_CTRL_TYPE_MPEG2_QUANTISATION 0x0104
 
-#define V4L2_MPEG2_PICTURE_CODING_TYPE_I	1
-#define V4L2_MPEG2_PICTURE_CODING_TYPE_P	2
-#define V4L2_MPEG2_PICTURE_CODING_TYPE_B	3
-#define V4L2_MPEG2_PICTURE_CODING_TYPE_D	4
+#define V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE		0x0001
 
 struct v4l2_mpeg2_sequence {
 	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence header */
@@ -31,10 +28,29 @@ struct v4l2_mpeg2_sequence {
 
 	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence extension */
 	__u16	profile_and_level_indication;
-	__u8	progressive_sequence;
 	__u8	chroma_format;
+
+	__u32	flags;
 };
 
+#define V4L2_MPEG2_PIC_CODING_TYPE_I			1
+#define V4L2_MPEG2_PIC_CODING_TYPE_P			2
+#define V4L2_MPEG2_PIC_CODING_TYPE_B			3
+#define V4L2_MPEG2_PIC_CODING_TYPE_D			4
+
+#define V4L2_MPEG2_PIC_TOP_FIELD			0x1
+#define V4L2_MPEG2_PIC_BOTTOM_FIELD			0x2
+#define V4L2_MPEG2_PIC_FRAME				0x3
+
+#define V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST		0x0001
+#define V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT		0x0002
+#define V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV		0x0004
+#define V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE		0x0008
+#define V4L2_MPEG2_PIC_FLAG_INTRA_VLC			0x0010
+#define V4L2_MPEG2_PIC_FLAG_ALT_SCAN			0x0020
+#define V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST		0x0040
+#define V4L2_MPEG2_PIC_FLAG_PROGRESSIVE			0x0080
+
 struct v4l2_mpeg2_picture {
 	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture header */
 	__u8	picture_coding_type;
@@ -43,14 +59,8 @@ struct v4l2_mpeg2_picture {
 	__u8	f_code[2][2];
 	__u8	intra_dc_precision;
 	__u8	picture_structure;
-	__u8	top_field_first;
-	__u8	frame_pred_frame_dct;
-	__u8	concealment_motion_vectors;
-	__u8	q_scale_type;
-	__u8	intra_vlc_format;
-	__u8	alternate_scan;
-	__u8	repeat_first_field;
-	__u16	progressive_frame;
+
+	__u32	flags;
 };
 
 struct v4l2_ctrl_mpeg2_slice_params {
-- 
GitLab


From f329e21e9dadc5c8ee37c781b30fe63bf7217201 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:12 +0200
Subject: [PATCH 1397/3804] media: uapi: mpeg2: Split sequence and picture
 parameters

Typically, bitstreams are composed of a sequence header,
followed by a number of picture header and picture coding extension
headers. Each picture can be composed of a number of slices.

Let's split the MPEG-2 uAPI to follow these semantics more closely,
allowing more usage flexibility. Having these controls split up
allows applications to set a sequence control at the beginning
of a sequence, and then set a picture control for each frame.

While here add padding fields where needed, and document
the uAPI header thoroughly.

Note that the V4L2_CTRL_TYPE_{} defines had to be moved because
it clashes with existing ones. This is not really an issue
since they will be re-defined when the controls are moved
out of staging.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jonas Karlman <jonas@kwiboo.se>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/v4l/ext-ctrls-codec.rst             |  64 +++++++----
 .../media/v4l/pixfmt-compressed.rst           |   5 +-
 .../media/v4l/vidioc-queryctrl.rst            |  12 ++
 .../media/videodev2.h.rst.exceptions          |   2 +
 drivers/media/v4l2-core/v4l2-ctrls.c          |  56 ++++++++--
 drivers/staging/media/hantro/hantro_drv.c     |  10 ++
 .../media/hantro/hantro_g1_mpeg2_dec.c        |  14 ++-
 .../media/hantro/rk3399_vpu_hw_mpeg2_dec.c    |  14 ++-
 drivers/staging/media/sunxi/cedrus/cedrus.c   |  12 ++
 drivers/staging/media/sunxi/cedrus/cedrus.h   |   2 +
 .../staging/media/sunxi/cedrus/cedrus_dec.c   |   4 +
 .../staging/media/sunxi/cedrus/cedrus_mpeg2.c |   8 +-
 include/media/mpeg2-ctrls.h                   | 105 ++++++++++++++----
 include/media/v4l2-ctrls.h                    |   4 +
 14 files changed, 237 insertions(+), 75 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index aa11346e7e273..f96a2dcb22cc5 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1636,14 +1636,6 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     * - __u32
       - ``data_bit_offset``
       - Offset (in bits) to the video data in the current slice data.
-    * - struct :c:type:`v4l2_mpeg2_sequence`
-      - ``sequence``
-      - Structure with MPEG-2 sequence metadata, merging relevant fields from
-	the sequence header and sequence extension parts of the bitstream.
-    * - struct :c:type:`v4l2_mpeg2_picture`
-      - ``picture``
-      - Structure with MPEG-2 picture metadata, merging relevant fields from
-	the picture header and picture coding extension parts of the bitstream.
     * - __u64
       - ``backward_ref_ts``
       - Timestamp of the V4L2 capture buffer to use as backward reference, used
@@ -1661,14 +1653,28 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     * - __u32
       - ``quantiser_scale_code``
       - Code used to determine the quantization scale to use for the IDCT.
+    * - __u8
+      - ``reserved``
+      - Applications and drivers must set this to zero.
 
-.. c:type:: v4l2_mpeg2_sequence
+``V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE (struct)``
+    Specifies the sequence parameters (as extracted from the bitstream) for the
+    associated MPEG-2 slice data. This includes fields matching the syntax
+    elements from the sequence header and sequence extension parts of the
+    bitstream as specified by :ref:`mpeg2part2`.
+
+    .. note::
+
+       This compound control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_ctrl_mpeg2_sequence
 
 .. cssclass:: longtable
 
 .. tabularcolumns:: |p{1.4cm}|p{6.5cm}|p{9.4cm}|
 
-.. flat-table:: struct v4l2_mpeg2_sequence
+.. flat-table:: struct v4l2_ctrl_mpeg2_sequence
     :header-rows:  0
     :stub-columns: 0
     :widths:       1 1 2
@@ -1690,7 +1696,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     * - __u8
       - ``chroma_format``
       - The chrominance sub-sampling format (1: 4:2:0, 2: 4:2:2, 3: 4:4:4).
-    * - __u32
+    * - __u8
       - ``flags``
       - See :ref:`MPEG-2 Sequence Flags <mpeg2_sequence_flags>`.
 
@@ -1706,11 +1712,22 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     :widths:       1 1 2
 
     * - ``V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE``
-      - 0x00000001
+      - 0x01
       - Indication that all the frames for the sequence are progressive instead
 	of interlaced.
 
-.. c:type:: v4l2_mpeg2_picture
+``V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE (struct)``
+    Specifies the picture parameters (as extracted from the bitstream) for the
+    associated MPEG-2 slice data. This includes fields matching the syntax
+    elements from the picture header and picture coding extension parts of the
+    bitstream as specified by :ref:`mpeg2part2`.
+
+    .. note::
+
+       This compound control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_ctrl_mpeg2_picture
 
 .. raw:: latex
 
@@ -1720,30 +1737,33 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
 
 .. tabularcolumns:: |p{1.0cm}|p{5.6cm}|p{10.7cm}|
 
-.. flat-table:: struct v4l2_mpeg2_picture
+.. flat-table:: struct v4l2_ctrl_mpeg2_picture
     :header-rows:  0
     :stub-columns: 0
     :widths:       1 1 2
 
+    * - __u32
+      - ``flags``
+      - See :ref:`MPEG-2 Picture Flags <mpeg2_picture_flags>`.
+    * - __u8
+      - ``f_code[2][2]``
+      - Motion vector codes.
     * - __u8
       - ``picture_coding_type``
       - Picture coding type for the frame covered by the current slice
 	(V4L2_MPEG2_PICTURE_CODING_TYPE_I, V4L2_MPEG2_PICTURE_CODING_TYPE_P or
 	V4L2_MPEG2_PICTURE_CODING_TYPE_B).
     * - __u8
-      - ``f_code[2][2]``
-      - Motion vector codes.
+      - ``picture_structure``
+      - Picture structure (1: interlaced top field, 2: interlaced bottom field,
+	3: progressive frame).
     * - __u8
       - ``intra_dc_precision``
       - Precision of Discrete Cosine transform (0: 8 bits precision,
 	1: 9 bits precision, 2: 10 bits precision, 3: 11 bits precision).
     * - __u8
-      - ``picture_structure``
-      - Picture structure (1: interlaced top field, 2: interlaced bottom field,
-	3: progressive frame).
-    * - __u32
-      - ``flags``
-      - See :ref:`MPEG-2 Picture Flags <mpeg2_picture_flags>`.
+      - ``reserved[5]``
+      - Applications and drivers must set this to zero.
 
 
 .. _mpeg2_picture_flags:
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
index cba607f789f08..bbbacbd65d6fb 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
@@ -114,8 +114,9 @@ Compressed Formats
 	This format is adapted for stateless video decoders that implement a
 	MPEG-2 pipeline (using the :ref:`mem2mem` and :ref:`media-request-api`).
 	Metadata associated with the frame to decode is required to be passed
-	through the ``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS`` control and
-	quantisation matrices can optionally be specified through the
+	through the ``V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE``,
+        ``V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE``, and ``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS``
+        controls. Quantisation matrices can optionally be specified through the
 	``V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION`` control.
 	See the :ref:`associated Codec Control IDs <v4l2-mpeg-mpeg2>`.
 	Exactly one output and one capture buffer must be provided for use with
diff --git a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
index 4362945fd39ba..afc1505a3a7e1 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
@@ -429,6 +429,18 @@ See also the examples in :ref:`control`.
       - n/a
       - A struct :c:type:`v4l2_ctrl_mpeg2_quantisation`, containing MPEG-2
 	quantisation matrices for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_MPEG2_SEQUENCE``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_mpeg2_sequence`, containing MPEG-2
+	sequence parameters for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_MPEG2_PICTURE``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_mpeg2_picture`, containing MPEG-2
+	picture parameters for stateless video decoders.
     * - ``V4L2_CTRL_TYPE_AREA``
       - n/a
       - n/a
diff --git a/Documentation/userspace-api/media/videodev2.h.rst.exceptions b/Documentation/userspace-api/media/videodev2.h.rst.exceptions
index 5b2ebaa35d246..928fdc419ee37 100644
--- a/Documentation/userspace-api/media/videodev2.h.rst.exceptions
+++ b/Documentation/userspace-api/media/videodev2.h.rst.exceptions
@@ -134,6 +134,8 @@ replace symbol V4L2_CTRL_TYPE_STRING :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_U16 :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_U32 :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_U8 :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_MPEG2_SEQUENCE :c:type:`v4l2_ctrl_type`
+replace symbol V4L2_CTRL_TYPE_MPEG2_PICTURE :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_MPEG2_QUANTISATION :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_H264_SPS :c:type:`v4l2_ctrl_type`
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 37531302dd3a8..59b16f70b0934 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -977,6 +977,8 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_LTR_COUNT:			return "LTR Count";
 	case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:		return "Frame LTR Index";
 	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:		return "Use LTR Frames";
+	case V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE:		return "MPEG-2 Sequence Header";
+	case V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE:			return "MPEG-2 Picture Header";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS:		return "MPEG-2 Slice Parameters";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION:		return "MPEG-2 Quantisation Matrices";
 	case V4L2_CID_FWHT_I_FRAME_QP:				return "FWHT I-Frame QP Value";
@@ -1499,6 +1501,12 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_RDS_TX_ALT_FREQS:
 		*type = V4L2_CTRL_TYPE_U32;
 		break;
+	case V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE:
+		*type = V4L2_CTRL_TYPE_MPEG2_SEQUENCE;
+		break;
+	case V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE:
+		*type = V4L2_CTRL_TYPE_MPEG2_PICTURE;
+		break;
 	case V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS:
 		*type = V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS;
 		break;
@@ -1703,7 +1711,8 @@ static bool std_equal(const struct v4l2_ctrl *ctrl, u32 idx,
 static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 			      union v4l2_ctrl_ptr ptr)
 {
-	struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
+	struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
+	struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
 	struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quant;
 	struct v4l2_ctrl_vp8_frame *p_vp8_frame;
 	struct v4l2_ctrl_fwht_params *p_fwht_params;
@@ -1720,13 +1729,18 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 	 * v4l2_ctrl_type enum.
 	 */
 	switch ((u32)ctrl->type) {
-	case V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS:
-		p_mpeg2_slice_params = p;
+	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
+		p_mpeg2_sequence = p;
+
 		/* 4:2:0 */
-		p_mpeg2_slice_params->sequence.chroma_format = 1;
+		p_mpeg2_sequence->chroma_format = 1;
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
+		p_mpeg2_picture = p;
+
 		/* interlaced top field */
-		p_mpeg2_slice_params->picture.picture_structure = 1;
-		p_mpeg2_slice_params->picture.picture_coding_type =
+		p_mpeg2_picture->picture_structure = V4L2_MPEG2_PIC_TOP_FIELD;
+		p_mpeg2_picture->picture_coding_type =
 					V4L2_MPEG2_PIC_CODING_TYPE_I;
 		break;
 	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
@@ -1909,6 +1923,8 @@ static void std_log(const struct v4l2_ctrl *ctrl)
 static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 				 union v4l2_ctrl_ptr ptr)
 {
+	struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
+	struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
 	struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
 	struct v4l2_ctrl_vp8_frame *p_vp8_frame;
 	struct v4l2_ctrl_fwht_params *p_fwht_params;
@@ -1926,10 +1942,10 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 	unsigned int i;
 
 	switch ((u32)ctrl->type) {
-	case V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS:
-		p_mpeg2_slice_params = p;
+	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
+		p_mpeg2_sequence = p;
 
-		switch (p_mpeg2_slice_params->sequence.chroma_format) {
+		switch (p_mpeg2_sequence->chroma_format) {
 		case 1: /* 4:2:0 */
 		case 2: /* 4:2:2 */
 		case 3: /* 4:4:4 */
@@ -1937,8 +1953,12 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 		default:
 			return -EINVAL;
 		}
+		break;
+
+	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
+		p_mpeg2_picture = p;
 
-		switch (p_mpeg2_slice_params->picture.intra_dc_precision) {
+		switch (p_mpeg2_picture->intra_dc_precision) {
 		case 0: /* 8 bits */
 		case 1: /* 9 bits */
 		case 2: /* 10 bits */
@@ -1948,7 +1968,7 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 			return -EINVAL;
 		}
 
-		switch (p_mpeg2_slice_params->picture.picture_structure) {
+		switch (p_mpeg2_picture->picture_structure) {
 		case V4L2_MPEG2_PIC_TOP_FIELD:
 		case V4L2_MPEG2_PIC_BOTTOM_FIELD:
 		case V4L2_MPEG2_PIC_FRAME:
@@ -1957,7 +1977,7 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 			return -EINVAL;
 		}
 
-		switch (p_mpeg2_slice_params->picture.picture_coding_type) {
+		switch (p_mpeg2_picture->picture_coding_type) {
 		case V4L2_MPEG2_PIC_CODING_TYPE_I:
 		case V4L2_MPEG2_PIC_CODING_TYPE_P:
 		case V4L2_MPEG2_PIC_CODING_TYPE_B:
@@ -1965,7 +1985,13 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 		default:
 			return -EINVAL;
 		}
+		zero_reserved(*p_mpeg2_picture);
+		break;
 
+	case V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS:
+		p_mpeg2_slice_params = p;
+
+		zero_reserved(*p_mpeg2_slice_params);
 		break;
 
 	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
@@ -2934,6 +2960,12 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
 	case V4L2_CTRL_TYPE_U32:
 		elem_size = sizeof(u32);
 		break;
+	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
+		elem_size = sizeof(struct v4l2_ctrl_mpeg2_sequence);
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
+		elem_size = sizeof(struct v4l2_ctrl_mpeg2_picture);
+		break;
 	case V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS:
 		elem_size = sizeof(struct v4l2_ctrl_mpeg2_slice_params);
 		break;
diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index b7b4328d3c6d3..4505aac2b9bb8 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -295,6 +295,16 @@ static const struct hantro_ctrl controls[] = {
 			.def = 50,
 			.ops = &hantro_jpeg_ctrl_ops,
 		},
+	}, {
+		.codec = HANTRO_MPEG2_DECODER,
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE,
+		},
+	}, {
+		.codec = HANTRO_MPEG2_DECODER,
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE,
+		},
 	}, {
 		.codec = HANTRO_MPEG2_DECODER,
 		.cfg = {
diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
index 925341891b7fa..fd61e1fae30e3 100644
--- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
@@ -92,8 +92,8 @@ static void
 hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx,
 				struct vb2_buffer *src_buf,
 				struct vb2_buffer *dst_buf,
-				const struct v4l2_mpeg2_sequence *seq,
-				const struct v4l2_mpeg2_picture *pic,
+				const struct v4l2_ctrl_mpeg2_sequence *seq,
+				const struct v4l2_ctrl_mpeg2_picture *pic,
 				const struct v4l2_ctrl_mpeg2_slice_params *slice_params)
 {
 	dma_addr_t forward_addr = 0, backward_addr = 0;
@@ -153,8 +153,8 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
 	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
-	const struct v4l2_mpeg2_sequence *seq;
-	const struct v4l2_mpeg2_picture *pic;
+	const struct v4l2_ctrl_mpeg2_sequence *seq;
+	const struct v4l2_ctrl_mpeg2_picture *pic;
 	u32 reg;
 
 	src_buf = hantro_get_src_buf(ctx);
@@ -165,8 +165,10 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	slice_params = hantro_get_ctrl(ctx,
 				       V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS);
-	seq = &slice_params->sequence;
-	pic = &slice_params->picture;
+	seq = hantro_get_ctrl(ctx,
+			      V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE);
+	pic = hantro_get_ctrl(ctx,
+			      V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE);
 
 	reg = G1_REG_DEC_AXI_RD_ID(0) |
 	      G1_REG_DEC_TIMEOUT_E(1) |
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
index ff54398f6643c..5b383906af59b 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
@@ -95,8 +95,8 @@ rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
 				 struct hantro_ctx *ctx,
 				 struct vb2_buffer *src_buf,
 				 struct vb2_buffer *dst_buf,
-				 const struct v4l2_mpeg2_sequence *seq,
-				 const struct v4l2_mpeg2_picture *pic,
+				 const struct v4l2_ctrl_mpeg2_sequence *seq,
+				 const struct v4l2_ctrl_mpeg2_picture *pic,
 				 const struct v4l2_ctrl_mpeg2_slice_params *slice_params)
 {
 	dma_addr_t forward_addr = 0, backward_addr = 0;
@@ -156,8 +156,8 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
 	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
-	const struct v4l2_mpeg2_sequence *seq;
-	const struct v4l2_mpeg2_picture *pic;
+	const struct v4l2_ctrl_mpeg2_sequence *seq;
+	const struct v4l2_ctrl_mpeg2_picture *pic;
 	u32 reg;
 
 	src_buf = hantro_get_src_buf(ctx);
@@ -167,8 +167,10 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	slice_params = hantro_get_ctrl(ctx,
 				       V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS);
-	seq = &slice_params->sequence;
-	pic = &slice_params->picture;
+	seq = hantro_get_ctrl(ctx,
+			      V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE);
+	pic = hantro_get_ctrl(ctx,
+			      V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE);
 
 	reg = VDPU_REG_DEC_ADV_PRE_DIS(0) |
 	      VDPU_REG_DEC_SCMD_DIS(0) |
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 62a5407664ae3..878752b30c103 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -29,6 +29,18 @@
 #include "cedrus_hw.h"
 
 static const struct cedrus_control cedrus_controls[] = {
+	{
+		.cfg = {
+			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE,
+		},
+		.codec		= CEDRUS_CODEC_MPEG2,
+	},
+	{
+		.cfg = {
+			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE,
+		},
+		.codec		= CEDRUS_CODEC_MPEG2,
+	},
 	{
 		.cfg = {
 			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS,
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
index 6516bff3d319f..989873ccb98c6 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
@@ -68,6 +68,8 @@ struct cedrus_h264_run {
 };
 
 struct cedrus_mpeg2_run {
+	const struct v4l2_ctrl_mpeg2_sequence		*sequence;
+	const struct v4l2_ctrl_mpeg2_picture		*picture;
 	const struct v4l2_ctrl_mpeg2_slice_params	*slice_params;
 	const struct v4l2_ctrl_mpeg2_quantisation	*quantisation;
 };
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index 238f779d2ba42..f4cc6aebfac9b 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -40,6 +40,10 @@ void cedrus_device_run(void *priv)
 
 	switch (ctx->src_fmt.pixelformat) {
 	case V4L2_PIX_FMT_MPEG2_SLICE:
+		run.mpeg2.sequence = cedrus_find_control_data(ctx,
+			V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE);
+		run.mpeg2.picture = cedrus_find_control_data(ctx,
+			V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE);
 		run.mpeg2.slice_params = cedrus_find_control_data(ctx,
 			V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS);
 		run.mpeg2.quantisation = cedrus_find_control_data(ctx,
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
index e39a17d28c7d9..65a175c6a5c2f 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
@@ -51,8 +51,8 @@ static void cedrus_mpeg2_irq_disable(struct cedrus_ctx *ctx)
 static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 {
 	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
-	const struct v4l2_mpeg2_sequence *seq;
-	const struct v4l2_mpeg2_picture *pic;
+	const struct v4l2_ctrl_mpeg2_sequence *seq;
+	const struct v4l2_ctrl_mpeg2_picture *pic;
 	const struct v4l2_ctrl_mpeg2_quantisation *quantisation;
 	dma_addr_t src_buf_addr, dst_luma_addr, dst_chroma_addr;
 	dma_addr_t fwd_luma_addr, fwd_chroma_addr;
@@ -66,8 +66,8 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 	u32 reg;
 
 	slice_params = run->mpeg2.slice_params;
-	seq = &slice_params->sequence;
-	pic = &slice_params->picture;
+	seq = run->mpeg2.sequence;
+	pic = run->mpeg2.picture;
 
 	quantisation = run->mpeg2.quantisation;
 
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
index d3190979d5745..b4a6aa16d4c0a 100644
--- a/include/media/mpeg2-ctrls.h
+++ b/include/media/mpeg2-ctrls.h
@@ -13,24 +13,44 @@
 
 #define V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS		(V4L2_CID_CODEC_BASE+250)
 #define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION		(V4L2_CID_CODEC_BASE+251)
+#define V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE		(V4L2_CID_CODEC_BASE+252)
+#define V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE		(V4L2_CID_CODEC_BASE+253)
 
 /* enum v4l2_ctrl_type type values */
-#define V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS 0x0103
-#define	V4L2_CTRL_TYPE_MPEG2_QUANTISATION 0x0104
+#define V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS 0x0130
+#define V4L2_CTRL_TYPE_MPEG2_QUANTISATION 0x0131
+#define V4L2_CTRL_TYPE_MPEG2_SEQUENCE 0x0132
+#define V4L2_CTRL_TYPE_MPEG2_PICTURE 0x0133
 
-#define V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE		0x0001
+#define V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE		0x01
 
-struct v4l2_mpeg2_sequence {
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence header */
+/**
+ * struct v4l2_ctrl_mpeg2_sequence - MPEG-2 sequence header
+ *
+ * All the members on this structure match the sequence header and sequence
+ * extension syntaxes as specified by the MPEG-2 specification.
+ *
+ * Fields horizontal_size, vertical_size and vbv_buffer_size are a
+ * combination of respective _value and extension syntax elements,
+ * as described in section 6.3.3 "Sequence header".
+ *
+ * @horizontal_size: combination of elements horizontal_size_value and
+ * horizontal_size_extension.
+ * @vertical_size: combination of elements vertical_size_value and
+ * vertical_size_extension.
+ * @vbv_buffer_size: combination of elements vbv_buffer_size_value and
+ * vbv_buffer_size_extension.
+ * @profile_and_level_indication: see MPEG-2 specification.
+ * @chroma_format: see MPEG-2 specification.
+ * @flags: see V4L2_MPEG2_SEQ_FLAG_{}.
+ */
+struct v4l2_ctrl_mpeg2_sequence {
 	__u16	horizontal_size;
 	__u16	vertical_size;
 	__u32	vbv_buffer_size;
-
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence extension */
 	__u16	profile_and_level_indication;
 	__u8	chroma_format;
-
-	__u32	flags;
+	__u8	flags;
 };
 
 #define V4L2_MPEG2_PIC_CODING_TYPE_I			1
@@ -51,33 +71,72 @@ struct v4l2_mpeg2_sequence {
 #define V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST		0x0040
 #define V4L2_MPEG2_PIC_FLAG_PROGRESSIVE			0x0080
 
-struct v4l2_mpeg2_picture {
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture header */
-	__u8	picture_coding_type;
-
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Picture coding extension */
+/**
+ * struct v4l2_ctrl_mpeg2_picture - MPEG-2 picture header
+ *
+ * All the members on this structure match the picture header and picture
+ * coding extension syntaxes as specified by the MPEG-2 specification.
+ *
+ * @flags: see V4L2_MPEG2_PIC_FLAG_{}.
+ * @f_code[2][2]: see MPEG-2 specification.
+ * @picture_coding_type: see MPEG-2 specification.
+ * @picture_structure: see V4L2_MPEG2_PIC_{}_FIELD.
+ * @intra_dc_precision: see MPEG-2 specification.
+ * @reserved: padding field. Should be zeroed by applications.
+ */
+struct v4l2_ctrl_mpeg2_picture {
+	__u32	flags;
 	__u8	f_code[2][2];
-	__u8	intra_dc_precision;
+	__u8	picture_coding_type;
 	__u8	picture_structure;
-
-	__u32	flags;
+	__u8	intra_dc_precision;
+	__u8	reserved[5];
 };
 
+/**
+ * struct v4l2_ctrl_mpeg2_slice_params - MPEG-2 slice header
+ *
+ * @backward_ref_ts: timestamp of the V4L2 capture buffer to use as
+ * reference for backward prediction.
+ * @forward_ref_ts: timestamp of the V4L2 capture buffer to use as
+ * reference for forward prediction. These timestamp refers to the
+ * timestamp field in struct v4l2_buffer. Use v4l2_timeval_to_ns()
+ * to convert the struct timeval to a __u64.
+ * @quantiser_scale_code: quantiser scale integer matching an
+ * homonymous syntax element.
+ * @reserved: padding field. Should be zeroed by applications.
+ */
 struct v4l2_ctrl_mpeg2_slice_params {
 	__u32	bit_size;
 	__u32	data_bit_offset;
 	__u64	backward_ref_ts;
 	__u64	forward_ref_ts;
-
-	struct v4l2_mpeg2_sequence sequence;
-	struct v4l2_mpeg2_picture picture;
-
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Slice */
 	__u32	quantiser_scale_code;
+	__u32	reserved;
 };
 
+/**
+ * struct v4l2_ctrl_mpeg2_quantisation - MPEG-2 quantisation
+ *
+ * Quantization matrices as specified by section 6.3.7
+ * "Quant matrix extension".
+ *
+ * @intra_quantiser_matrix: The quantisation matrix coefficients
+ * for intra-coded frames, in zigzag scanning order. It is relevant
+ * for both luma and chroma components, although it can be superseded
+ * by the chroma-specific matrix for non-4:2:0 YUV formats.
+ * @non_intra_quantiser_matrix: The quantisation matrix coefficients
+ * for non-intra-coded frames, in zigzag scanning order. It is relevant
+ * for both luma and chroma components, although it can be superseded
+ * by the chroma-specific matrix for non-4:2:0 YUV formats.
+ * @chroma_intra_quantiser_matrix: The quantisation matrix coefficients
+ * for the chominance component of intra-coded frames, in zigzag scanning
+ * order. Only relevant for 4:2:2 and 4:4:4 YUV formats.
+ * @chroma_non_intra_quantiser_matrix: The quantisation matrix coefficients
+ * for the chrominance component of non-intra-coded frames, in zigzag scanning
+ * order. Only relevant for 4:2:2 and 4:4:4 YUV formats.
+ */
 struct v4l2_ctrl_mpeg2_quantisation {
-	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Quant matrix extension */
 	__u8	intra_quantiser_matrix[64];
 	__u8	non_intra_quantiser_matrix[64];
 	__u8	chroma_intra_quantiser_matrix[64];
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index a38e6bd02a6a1..572ff7eb7be10 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -40,6 +40,8 @@ struct video_device;
  * @p_u16:			Pointer to a 16-bit unsigned value.
  * @p_u32:			Pointer to a 32-bit unsigned value.
  * @p_char:			Pointer to a string.
+ * @p_mpeg2_sequence:		Pointer to a MPEG2 sequence structure.
+ * @p_mpeg2_picture:		Pointer to a MPEG2 picture structure.
  * @p_mpeg2_slice_params:	Pointer to a MPEG2 slice parameters structure.
  * @p_mpeg2_quantisation:	Pointer to a MPEG2 quantisation data structure.
  * @p_fwht_params:		Pointer to a FWHT stateless parameters structure.
@@ -66,6 +68,8 @@ union v4l2_ctrl_ptr {
 	u16 *p_u16;
 	u32 *p_u32;
 	char *p_char;
+	struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
+	struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
 	struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
 	struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quantisation;
 	struct v4l2_ctrl_fwht_params *p_fwht_params;
-- 
GitLab


From b6d7e8031c9c17462935329ca8b37f0da2f99da0 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:13 +0200
Subject: [PATCH 1398/3804] media: uapi: mpeg2: Move reference buffer fields

The forward and backwards references are specified per-picture
and not per-slice. Move it to V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/v4l/ext-ctrls-codec.rst             | 28 +++++++++----------
 .../media/hantro/hantro_g1_mpeg2_dec.c        |  6 ++--
 .../media/hantro/rk3399_vpu_hw_mpeg2_dec.c    |  6 ++--
 .../staging/media/sunxi/cedrus/cedrus_mpeg2.c |  4 +--
 include/media/mpeg2-ctrls.h                   | 16 +++++------
 5 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index f96a2dcb22cc5..1765b2a1129d6 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1636,20 +1636,6 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     * - __u32
       - ``data_bit_offset``
       - Offset (in bits) to the video data in the current slice data.
-    * - __u64
-      - ``backward_ref_ts``
-      - Timestamp of the V4L2 capture buffer to use as backward reference, used
-        with B-coded and P-coded frames. The timestamp refers to the
-	``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
-	:c:func:`v4l2_timeval_to_ns()` function to convert the struct
-	:c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
-    * - __u64
-      - ``forward_ref_ts``
-      - Timestamp for the V4L2 capture buffer to use as forward reference, used
-        with B-coded frames. The timestamp refers to the ``timestamp`` field in
-	struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()`
-	function to convert the struct :c:type:`timeval` in struct
-	:c:type:`v4l2_buffer` to a __u64.
     * - __u32
       - ``quantiser_scale_code``
       - Code used to determine the quantization scale to use for the IDCT.
@@ -1742,6 +1728,20 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     :stub-columns: 0
     :widths:       1 1 2
 
+    * - __u64
+      - ``backward_ref_ts``
+      - Timestamp of the V4L2 capture buffer to use as backward reference, used
+        with B-coded and P-coded frames. The timestamp refers to the
+	``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
+	:c:func:`v4l2_timeval_to_ns()` function to convert the struct
+	:c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
+    * - __u64
+      - ``forward_ref_ts``
+      - Timestamp for the V4L2 capture buffer to use as forward reference, used
+        with B-coded frames. The timestamp refers to the ``timestamp`` field in
+	struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()`
+	function to convert the struct :c:type:`timeval` in struct
+	:c:type:`v4l2_buffer` to a __u64.
     * - __u32
       - ``flags``
       - See :ref:`MPEG-2 Picture Flags <mpeg2_picture_flags>`.
diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
index fd61e1fae30e3..19c897cbd3483 100644
--- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
@@ -101,12 +101,10 @@ hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx,
 
 	switch (pic->picture_coding_type) {
 	case V4L2_MPEG2_PIC_CODING_TYPE_B:
-		backward_addr = hantro_get_ref(ctx,
-					       slice_params->backward_ref_ts);
+		backward_addr = hantro_get_ref(ctx, pic->backward_ref_ts);
 		fallthrough;
 	case V4L2_MPEG2_PIC_CODING_TYPE_P:
-		forward_addr = hantro_get_ref(ctx,
-					      slice_params->forward_ref_ts);
+		forward_addr = hantro_get_ref(ctx, pic->forward_ref_ts);
 	}
 
 	/* Source bitstream buffer */
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
index 5b383906af59b..18bd14704ebff 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
@@ -104,12 +104,10 @@ rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
 
 	switch (pic->picture_coding_type) {
 	case V4L2_MPEG2_PIC_CODING_TYPE_B:
-		backward_addr = hantro_get_ref(ctx,
-					       slice_params->backward_ref_ts);
+		backward_addr = hantro_get_ref(ctx, pic->backward_ref_ts);
 		fallthrough;
 	case V4L2_MPEG2_PIC_CODING_TYPE_P:
-		forward_addr = hantro_get_ref(ctx,
-					      slice_params->forward_ref_ts);
+		forward_addr = hantro_get_ref(ctx, pic->forward_ref_ts);
 	}
 
 	/* Source bitstream buffer */
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
index 65a175c6a5c2f..16e99792cf42f 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
@@ -128,14 +128,14 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 
 	vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, V4L2_BUF_TYPE_VIDEO_CAPTURE);
 
-	forward_idx = vb2_find_timestamp(vq, slice_params->forward_ref_ts, 0);
+	forward_idx = vb2_find_timestamp(vq, pic->forward_ref_ts, 0);
 	fwd_luma_addr = cedrus_dst_buf_addr(ctx, forward_idx, 0);
 	fwd_chroma_addr = cedrus_dst_buf_addr(ctx, forward_idx, 1);
 
 	cedrus_write(dev, VE_DEC_MPEG_FWD_REF_LUMA_ADDR, fwd_luma_addr);
 	cedrus_write(dev, VE_DEC_MPEG_FWD_REF_CHROMA_ADDR, fwd_chroma_addr);
 
-	backward_idx = vb2_find_timestamp(vq, slice_params->backward_ref_ts, 0);
+	backward_idx = vb2_find_timestamp(vq, pic->backward_ref_ts, 0);
 	bwd_luma_addr = cedrus_dst_buf_addr(ctx, backward_idx, 0);
 	bwd_chroma_addr = cedrus_dst_buf_addr(ctx, backward_idx, 1);
 
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
index b4a6aa16d4c0a..922ca2243f440 100644
--- a/include/media/mpeg2-ctrls.h
+++ b/include/media/mpeg2-ctrls.h
@@ -77,6 +77,12 @@ struct v4l2_ctrl_mpeg2_sequence {
  * All the members on this structure match the picture header and picture
  * coding extension syntaxes as specified by the MPEG-2 specification.
  *
+ * @backward_ref_ts: timestamp of the V4L2 capture buffer to use as
+ * reference for backward prediction.
+ * @forward_ref_ts: timestamp of the V4L2 capture buffer to use as
+ * reference for forward prediction. These timestamp refers to the
+ * timestamp field in struct v4l2_buffer. Use v4l2_timeval_to_ns()
+ * to convert the struct timeval to a __u64.
  * @flags: see V4L2_MPEG2_PIC_FLAG_{}.
  * @f_code[2][2]: see MPEG-2 specification.
  * @picture_coding_type: see MPEG-2 specification.
@@ -85,6 +91,8 @@ struct v4l2_ctrl_mpeg2_sequence {
  * @reserved: padding field. Should be zeroed by applications.
  */
 struct v4l2_ctrl_mpeg2_picture {
+	__u64	backward_ref_ts;
+	__u64	forward_ref_ts;
 	__u32	flags;
 	__u8	f_code[2][2];
 	__u8	picture_coding_type;
@@ -96,12 +104,6 @@ struct v4l2_ctrl_mpeg2_picture {
 /**
  * struct v4l2_ctrl_mpeg2_slice_params - MPEG-2 slice header
  *
- * @backward_ref_ts: timestamp of the V4L2 capture buffer to use as
- * reference for backward prediction.
- * @forward_ref_ts: timestamp of the V4L2 capture buffer to use as
- * reference for forward prediction. These timestamp refers to the
- * timestamp field in struct v4l2_buffer. Use v4l2_timeval_to_ns()
- * to convert the struct timeval to a __u64.
  * @quantiser_scale_code: quantiser scale integer matching an
  * homonymous syntax element.
  * @reserved: padding field. Should be zeroed by applications.
@@ -109,8 +111,6 @@ struct v4l2_ctrl_mpeg2_picture {
 struct v4l2_ctrl_mpeg2_slice_params {
 	__u32	bit_size;
 	__u32	data_bit_offset;
-	__u64	backward_ref_ts;
-	__u64	forward_ref_ts;
 	__u32	quantiser_scale_code;
 	__u32	reserved;
 };
-- 
GitLab


From 701a6a410c319729c86bfb696860f21adbff1bfa Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:14 +0200
Subject: [PATCH 1399/3804] media: hantro/cedrus: Remove unneeded slice size
 and slice offset

The MPEG2_SLICE_PARAMS control is designed to refer to a
single slice. However, the Hantro and Cedrus drivers operate
in per-frame mode, and so does the current Ffmpeg and GStreamer
implementations that are tested with these two drivers.

In other words, the drivers are expecting all the slices in a picture
(with either frame or field structure) to be contained in
the OUTPUT buffer, which means the slice size and offset shouldn't be used.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c     | 4 ++--
 drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c | 4 ++--
 drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c      | 7 +++----
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
index 19c897cbd3483..b9c8b288987a8 100644
--- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
@@ -203,7 +203,7 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 	      G1_REG_TOPFIELDFIRST_E(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST);
 	vdpu_write_relaxed(vpu, reg, G1_SWREG(4));
 
-	reg = G1_REG_STRM_START_BIT(slice_params->data_bit_offset) |
+	reg = G1_REG_STRM_START_BIT(0) |
 	      G1_REG_QSCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE) |
 	      G1_REG_CON_MV_E(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV) |
 	      G1_REG_INTRA_DC_PREC(pic->intra_dc_precision) |
@@ -212,7 +212,7 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 	vdpu_write_relaxed(vpu, reg, G1_SWREG(5));
 
 	reg = G1_REG_INIT_QP(1) |
-	      G1_REG_STREAM_LEN(slice_params->bit_size >> 3);
+	      G1_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0));
 	vdpu_write_relaxed(vpu, reg, G1_SWREG(6));
 
 	reg = G1_REG_ALT_SCAN_FLAG_E(pic->flags & V4L2_MPEG2_PIC_FLAG_ALT_SCAN) |
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
index 18bd14704ebff..314269811244c 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
@@ -177,7 +177,7 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(50));
 
 	reg = VDPU_REG_INIT_QP(1) |
-	      VDPU_REG_STREAM_LEN(slice_params->bit_size >> 3);
+	      VDPU_REG_STREAM_LEN(vb2_get_plane_payload(&src_buf->vb2_buf, 0));
 	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(51));
 
 	reg = VDPU_REG_APF_THRESHOLD(8) |
@@ -220,7 +220,7 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 	      VDPU_REG_TOPFIELDFIRST_E(pic->flags & V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST);
 	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(120));
 
-	reg = VDPU_REG_STRM_START_BIT(slice_params->data_bit_offset) |
+	reg = VDPU_REG_STRM_START_BIT(0) |
 	      VDPU_REG_QSCALE_TYPE(pic->flags & V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE) |
 	      VDPU_REG_CON_MV_E(pic->flags & V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV) |
 	      VDPU_REG_INTRA_DC_PREC(pic->intra_dc_precision) |
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
index 16e99792cf42f..fd71cb175318d 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
@@ -152,10 +152,9 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 
 	/* Source offset and length in bits. */
 
-	cedrus_write(dev, VE_DEC_MPEG_VLD_OFFSET,
-		     slice_params->data_bit_offset);
+	cedrus_write(dev, VE_DEC_MPEG_VLD_OFFSET, 0);
 
-	reg = slice_params->bit_size - slice_params->data_bit_offset;
+	reg = vb2_get_plane_payload(&run->src->vb2_buf, 0) * 8;
 	cedrus_write(dev, VE_DEC_MPEG_VLD_LEN, reg);
 
 	/* Source beginning and end addresses. */
@@ -169,7 +168,7 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 
 	cedrus_write(dev, VE_DEC_MPEG_VLD_ADDR, reg);
 
-	reg = src_buf_addr + DIV_ROUND_UP(slice_params->bit_size, 8);
+	reg = src_buf_addr + vb2_get_plane_payload(&run->src->vb2_buf, 0);
 	cedrus_write(dev, VE_DEC_MPEG_VLD_END_ADDR, reg);
 
 	/* Macroblock address: start at the beginning. */
-- 
GitLab


From 45f97ba1ce8059632c6f1518fda1faedd7db55fb Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:15 +0200
Subject: [PATCH 1400/3804] media: uapi: mpeg2: Remove
 V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS

The Hantro and Cedrus drivers work in frame-mode,
meaning they expect all the slices in a picture (either frame
or field structure) to be passed in each OUTPUT buffer.

These two are the only V4L2 MPEG-2 stateless decoders currently
supported. Given the VA-API drivers also work per-frame,
coalescing all the MPEG-2 slices in a buffer before the decoding
operation, it makes sense to not expect slice-mode drivers and
therefore remove V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS.

This is done to avoid carrying an unused interface. If needed,
this control can be added without breaking backwards compatibility.
Note that this would mean introducing a enumerator control to
specify the decoding mode (see V4L2_CID_STATELESS_H264_DECODE_MODE).

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Co-developed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/v4l/ext-ctrls-codec.rst             | 35 -------------------
 .../media/v4l/pixfmt-compressed.rst           |  6 ++--
 .../media/v4l/vidioc-queryctrl.rst            |  6 ----
 .../media/videodev2.h.rst.exceptions          |  1 -
 drivers/media/v4l2-core/v4l2-ctrls.c          | 19 ----------
 drivers/staging/media/hantro/hantro_drv.c     |  5 ---
 .../media/hantro/hantro_g1_mpeg2_dec.c        |  9 ++---
 .../media/hantro/rk3399_vpu_hw_mpeg2_dec.c    |  8 ++---
 drivers/staging/media/sunxi/cedrus/cedrus.c   |  6 ----
 drivers/staging/media/sunxi/cedrus/cedrus.h   |  1 -
 .../staging/media/sunxi/cedrus/cedrus_dec.c   |  2 --
 .../staging/media/sunxi/cedrus/cedrus_mpeg2.c |  2 --
 include/media/mpeg2-ctrls.h                   | 16 ---------
 include/media/v4l2-ctrls.h                    |  2 --
 14 files changed, 7 insertions(+), 111 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index 1765b2a1129d6..f10b04fba2294 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1608,41 +1608,6 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
 
 .. _v4l2-mpeg-mpeg2:
 
-``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS (struct)``
-    Specifies the slice parameters (as extracted from the bitstream) for the
-    associated MPEG-2 slice data. This includes the necessary parameters for
-    configuring a stateless hardware decoding pipeline for MPEG-2.
-    The bitstream parameters are defined according to :ref:`mpeg2part2`.
-
-    .. note::
-
-       This compound control is not yet part of the public kernel API and
-       it is expected to change.
-
-.. c:type:: v4l2_ctrl_mpeg2_slice_params
-
-.. tabularcolumns:: |p{5.6cm}|p{4.6cm}|p{7.1cm}|
-
-.. cssclass:: longtable
-
-.. flat-table:: struct v4l2_ctrl_mpeg2_slice_params
-    :header-rows:  0
-    :stub-columns: 0
-    :widths:       1 1 2
-
-    * - __u32
-      - ``bit_size``
-      - Size (in bits) of the current slice data.
-    * - __u32
-      - ``data_bit_offset``
-      - Offset (in bits) to the video data in the current slice data.
-    * - __u32
-      - ``quantiser_scale_code``
-      - Code used to determine the quantization scale to use for the IDCT.
-    * - __u8
-      - ``reserved``
-      - Applications and drivers must set this to zero.
-
 ``V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE (struct)``
     Specifies the sequence parameters (as extracted from the bitstream) for the
     associated MPEG-2 slice data. This includes fields matching the syntax
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
index bbbacbd65d6fb..6c10a062adace 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
@@ -114,9 +114,9 @@ Compressed Formats
 	This format is adapted for stateless video decoders that implement a
 	MPEG-2 pipeline (using the :ref:`mem2mem` and :ref:`media-request-api`).
 	Metadata associated with the frame to decode is required to be passed
-	through the ``V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE``,
-        ``V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE``, and ``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS``
-        controls. Quantisation matrices can optionally be specified through the
+	through the ``V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE`` and
+        ``V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE`` controls.
+        Quantisation matrices can optionally be specified through the
 	``V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION`` control.
 	See the :ref:`associated Codec Control IDs <v4l2-mpeg-mpeg2>`.
 	Exactly one output and one capture buffer must be provided for use with
diff --git a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
index afc1505a3a7e1..07e54029e1e91 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
@@ -417,12 +417,6 @@ See also the examples in :ref:`control`.
       - any
       - An unsigned 32-bit valued control ranging from minimum to maximum
 	inclusive. The step value indicates the increment between values.
-    * - ``V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS``
-      - n/a
-      - n/a
-      - n/a
-      - A struct :c:type:`v4l2_ctrl_mpeg2_slice_params`, containing MPEG-2
-	slice parameters for stateless video decoders.
     * - ``V4L2_CTRL_TYPE_MPEG2_QUANTISATION``
       - n/a
       - n/a
diff --git a/Documentation/userspace-api/media/videodev2.h.rst.exceptions b/Documentation/userspace-api/media/videodev2.h.rst.exceptions
index 928fdc419ee37..2217b56c26867 100644
--- a/Documentation/userspace-api/media/videodev2.h.rst.exceptions
+++ b/Documentation/userspace-api/media/videodev2.h.rst.exceptions
@@ -136,7 +136,6 @@ replace symbol V4L2_CTRL_TYPE_U32 :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_U8 :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_MPEG2_SEQUENCE :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_MPEG2_PICTURE :c:type:`v4l2_ctrl_type`
-replace symbol V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_MPEG2_QUANTISATION :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_H264_SPS :c:type:`v4l2_ctrl_type`
 replace symbol V4L2_CTRL_TYPE_H264_PPS :c:type:`v4l2_ctrl_type`
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 59b16f70b0934..6a033102d31b8 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -979,7 +979,6 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:		return "Use LTR Frames";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE:		return "MPEG-2 Sequence Header";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE:			return "MPEG-2 Picture Header";
-	case V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS:		return "MPEG-2 Slice Parameters";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION:		return "MPEG-2 Quantisation Matrices";
 	case V4L2_CID_FWHT_I_FRAME_QP:				return "FWHT I-Frame QP Value";
 	case V4L2_CID_FWHT_P_FRAME_QP:				return "FWHT P-Frame QP Value";
@@ -1507,9 +1506,6 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE:
 		*type = V4L2_CTRL_TYPE_MPEG2_PICTURE;
 		break;
-	case V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS:
-		*type = V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS;
-		break;
 	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION:
 		*type = V4L2_CTRL_TYPE_MPEG2_QUANTISATION;
 		break;
@@ -1723,11 +1719,6 @@ static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 	else
 		memset(p, 0, ctrl->elem_size);
 
-	/*
-	 * The cast is needed to get rid of a gcc warning complaining that
-	 * V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS is not part of the
-	 * v4l2_ctrl_type enum.
-	 */
 	switch ((u32)ctrl->type) {
 	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
 		p_mpeg2_sequence = p;
@@ -1925,7 +1916,6 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 {
 	struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
 	struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
-	struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
 	struct v4l2_ctrl_vp8_frame *p_vp8_frame;
 	struct v4l2_ctrl_fwht_params *p_fwht_params;
 	struct v4l2_ctrl_h264_sps *p_h264_sps;
@@ -1988,12 +1978,6 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 		zero_reserved(*p_mpeg2_picture);
 		break;
 
-	case V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS:
-		p_mpeg2_slice_params = p;
-
-		zero_reserved(*p_mpeg2_slice_params);
-		break;
-
 	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
 		break;
 
@@ -2966,9 +2950,6 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
 	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
 		elem_size = sizeof(struct v4l2_ctrl_mpeg2_picture);
 		break;
-	case V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS:
-		elem_size = sizeof(struct v4l2_ctrl_mpeg2_slice_params);
-		break;
 	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
 		elem_size = sizeof(struct v4l2_ctrl_mpeg2_quantisation);
 		break;
diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 4505aac2b9bb8..dc9478ac7141f 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -305,11 +305,6 @@ static const struct hantro_ctrl controls[] = {
 		.cfg = {
 			.id = V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE,
 		},
-	}, {
-		.codec = HANTRO_MPEG2_DECODER,
-		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS,
-		},
 	}, {
 		.codec = HANTRO_MPEG2_DECODER,
 		.cfg = {
diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
index b9c8b288987a8..25d912cbe2ffa 100644
--- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
@@ -93,8 +93,7 @@ hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx,
 				struct vb2_buffer *src_buf,
 				struct vb2_buffer *dst_buf,
 				const struct v4l2_ctrl_mpeg2_sequence *seq,
-				const struct v4l2_ctrl_mpeg2_picture *pic,
-				const struct v4l2_ctrl_mpeg2_slice_params *slice_params)
+				const struct v4l2_ctrl_mpeg2_picture *pic)
 {
 	dma_addr_t forward_addr = 0, backward_addr = 0;
 	dma_addr_t current_addr, addr;
@@ -150,7 +149,6 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
-	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
 	const struct v4l2_ctrl_mpeg2_sequence *seq;
 	const struct v4l2_ctrl_mpeg2_picture *pic;
 	u32 reg;
@@ -161,8 +159,6 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 	/* Apply request controls if any */
 	hantro_start_prepare_run(ctx);
 
-	slice_params = hantro_get_ctrl(ctx,
-				       V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS);
 	seq = hantro_get_ctrl(ctx,
 			      V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE);
 	pic = hantro_get_ctrl(ctx,
@@ -232,10 +228,9 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 	vdpu_write_relaxed(vpu, reg, G1_SWREG(55));
 
 	hantro_g1_mpeg2_dec_set_quantisation(vpu, ctx);
-
 	hantro_g1_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
 					&dst_buf->vb2_buf,
-					seq, pic, slice_params);
+					seq, pic);
 
 	hantro_end_prepare_run(ctx);
 
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
index 314269811244c..d16d76760278c 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
@@ -96,8 +96,7 @@ rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
 				 struct vb2_buffer *src_buf,
 				 struct vb2_buffer *dst_buf,
 				 const struct v4l2_ctrl_mpeg2_sequence *seq,
-				 const struct v4l2_ctrl_mpeg2_picture *pic,
-				 const struct v4l2_ctrl_mpeg2_slice_params *slice_params)
+				 const struct v4l2_ctrl_mpeg2_picture *pic)
 {
 	dma_addr_t forward_addr = 0, backward_addr = 0;
 	dma_addr_t current_addr, addr;
@@ -153,7 +152,6 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
-	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
 	const struct v4l2_ctrl_mpeg2_sequence *seq;
 	const struct v4l2_ctrl_mpeg2_picture *pic;
 	u32 reg;
@@ -163,8 +161,6 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	hantro_start_prepare_run(ctx);
 
-	slice_params = hantro_get_ctrl(ctx,
-				       V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS);
 	seq = hantro_get_ctrl(ctx,
 			      V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE);
 	pic = hantro_get_ctrl(ctx,
@@ -241,7 +237,7 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	rk3399_vpu_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
 					 &dst_buf->vb2_buf,
-					 seq, pic, slice_params);
+					 seq, pic);
 
 	/* Kick the watchdog and start decoding */
 	hantro_end_prepare_run(ctx);
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 878752b30c103..4430c8fa2cc7d 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -41,12 +41,6 @@ static const struct cedrus_control cedrus_controls[] = {
 		},
 		.codec		= CEDRUS_CODEC_MPEG2,
 	},
-	{
-		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS,
-		},
-		.codec		= CEDRUS_CODEC_MPEG2,
-	},
 	{
 		.cfg = {
 			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION,
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
index 989873ccb98c6..bbcdcd0787cf7 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
@@ -70,7 +70,6 @@ struct cedrus_h264_run {
 struct cedrus_mpeg2_run {
 	const struct v4l2_ctrl_mpeg2_sequence		*sequence;
 	const struct v4l2_ctrl_mpeg2_picture		*picture;
-	const struct v4l2_ctrl_mpeg2_slice_params	*slice_params;
 	const struct v4l2_ctrl_mpeg2_quantisation	*quantisation;
 };
 
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index f4cc6aebfac9b..e98185c1f5a73 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -44,8 +44,6 @@ void cedrus_device_run(void *priv)
 			V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE);
 		run.mpeg2.picture = cedrus_find_control_data(ctx,
 			V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE);
-		run.mpeg2.slice_params = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS);
 		run.mpeg2.quantisation = cedrus_find_control_data(ctx,
 			V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION);
 		break;
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
index fd71cb175318d..5dad2f296c6d9 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
@@ -50,7 +50,6 @@ static void cedrus_mpeg2_irq_disable(struct cedrus_ctx *ctx)
 
 static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 {
-	const struct v4l2_ctrl_mpeg2_slice_params *slice_params;
 	const struct v4l2_ctrl_mpeg2_sequence *seq;
 	const struct v4l2_ctrl_mpeg2_picture *pic;
 	const struct v4l2_ctrl_mpeg2_quantisation *quantisation;
@@ -65,7 +64,6 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 	unsigned int i;
 	u32 reg;
 
-	slice_params = run->mpeg2.slice_params;
 	seq = run->mpeg2.sequence;
 	pic = run->mpeg2.picture;
 
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
index 922ca2243f440..a84ce088a42e0 100644
--- a/include/media/mpeg2-ctrls.h
+++ b/include/media/mpeg2-ctrls.h
@@ -11,13 +11,11 @@
 #ifndef _MPEG2_CTRLS_H_
 #define _MPEG2_CTRLS_H_
 
-#define V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS		(V4L2_CID_CODEC_BASE+250)
 #define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION		(V4L2_CID_CODEC_BASE+251)
 #define V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE		(V4L2_CID_CODEC_BASE+252)
 #define V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE		(V4L2_CID_CODEC_BASE+253)
 
 /* enum v4l2_ctrl_type type values */
-#define V4L2_CTRL_TYPE_MPEG2_SLICE_PARAMS 0x0130
 #define V4L2_CTRL_TYPE_MPEG2_QUANTISATION 0x0131
 #define V4L2_CTRL_TYPE_MPEG2_SEQUENCE 0x0132
 #define V4L2_CTRL_TYPE_MPEG2_PICTURE 0x0133
@@ -101,20 +99,6 @@ struct v4l2_ctrl_mpeg2_picture {
 	__u8	reserved[5];
 };
 
-/**
- * struct v4l2_ctrl_mpeg2_slice_params - MPEG-2 slice header
- *
- * @quantiser_scale_code: quantiser scale integer matching an
- * homonymous syntax element.
- * @reserved: padding field. Should be zeroed by applications.
- */
-struct v4l2_ctrl_mpeg2_slice_params {
-	__u32	bit_size;
-	__u32	data_bit_offset;
-	__u32	quantiser_scale_code;
-	__u32	reserved;
-};
-
 /**
  * struct v4l2_ctrl_mpeg2_quantisation - MPEG-2 quantisation
  *
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 572ff7eb7be10..215e44172c66c 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -42,7 +42,6 @@ struct video_device;
  * @p_char:			Pointer to a string.
  * @p_mpeg2_sequence:		Pointer to a MPEG2 sequence structure.
  * @p_mpeg2_picture:		Pointer to a MPEG2 picture structure.
- * @p_mpeg2_slice_params:	Pointer to a MPEG2 slice parameters structure.
  * @p_mpeg2_quantisation:	Pointer to a MPEG2 quantisation data structure.
  * @p_fwht_params:		Pointer to a FWHT stateless parameters structure.
  * @p_h264_sps:			Pointer to a struct v4l2_ctrl_h264_sps.
@@ -70,7 +69,6 @@ union v4l2_ctrl_ptr {
 	char *p_char;
 	struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
 	struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
-	struct v4l2_ctrl_mpeg2_slice_params *p_mpeg2_slice_params;
 	struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quantisation;
 	struct v4l2_ctrl_fwht_params *p_fwht_params;
 	struct v4l2_ctrl_h264_sps *p_h264_sps;
-- 
GitLab


From 2f0968827a48a3b01a0cc9185abd41978d5ce918 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:16 +0200
Subject: [PATCH 1401/3804] media: uapi: Move the MPEG-2 stateless control type
 out of staging

Move the MPEG-2 stateless control types out of staging,
and re-number it to avoid any confusion.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/media/mpeg2-ctrls.h    | 4 ----
 include/uapi/linux/videodev2.h | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
index a84ce088a42e0..a3d19de9e53ab 100644
--- a/include/media/mpeg2-ctrls.h
+++ b/include/media/mpeg2-ctrls.h
@@ -16,10 +16,6 @@
 #define V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE		(V4L2_CID_CODEC_BASE+253)
 
 /* enum v4l2_ctrl_type type values */
-#define V4L2_CTRL_TYPE_MPEG2_QUANTISATION 0x0131
-#define V4L2_CTRL_TYPE_MPEG2_SEQUENCE 0x0132
-#define V4L2_CTRL_TYPE_MPEG2_PICTURE 0x0133
-
 #define V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE		0x01
 
 /**
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 311a01cc57752..d3bb18a3a51bd 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1807,6 +1807,10 @@ enum v4l2_ctrl_type {
 	V4L2_CTRL_TYPE_FWHT_PARAMS	    = 0x0220,
 
 	V4L2_CTRL_TYPE_VP8_FRAME            = 0x0240,
+
+	V4L2_CTRL_TYPE_MPEG2_QUANTISATION   = 0x0250,
+	V4L2_CTRL_TYPE_MPEG2_SEQUENCE       = 0x0251,
+	V4L2_CTRL_TYPE_MPEG2_PICTURE        = 0x0252,
 };
 
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
-- 
GitLab


From 3c994c6d21db2269518648f0dab21f10fd46e4ee Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:17 +0200
Subject: [PATCH 1402/3804] media: controls: Log MPEG-2 stateless control in
 .std_log

Simply print the type of the control.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 6a033102d31b8..a693ff8dc3dc6 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -1873,6 +1873,15 @@ static void std_log(const struct v4l2_ctrl *ctrl)
 	case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
 		pr_cont("HDR10_MASTERING_DISPLAY");
 		break;
+	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
+		pr_cont("MPEG2_QUANTISATION");
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
+		pr_cont("MPEG2_SEQUENCE");
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
+		pr_cont("MPEG2_PICTURE");
+		break;
 	default:
 		pr_cont("unknown type %d", ctrl->type);
 		break;
-- 
GitLab


From f4815b399111d992c1118c708f464a847dfd29e2 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 29 Apr 2021 16:48:18 +0200
Subject: [PATCH 1403/3804] media: uapi: move MPEG-2 stateless controls out of
 staging

Until now, the MPEG-2 V4L2 API was not exported as a public API,
and only defined in a private media header (media/mpeg2-ctrls.h).

After reviewing the MPEG-2 specification in detail, and reworking
the controls so they match the MPEG-2 semantics properly,
we can consider it ready.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Jernej Skrabec <jernej.skrabec@siol.net>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Tested-by: Daniel Almeida <daniel.almeida@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/v4l/ext-ctrls-codec-stateless.rst   | 214 +++++++++++++++++
 .../media/v4l/ext-ctrls-codec.rst             | 216 ------------------
 .../media/v4l/pixfmt-compressed.rst           |  10 +-
 .../media/v4l/vidioc-g-ext-ctrls.rst          |  12 +
 drivers/media/v4l2-core/v4l2-ctrls.c          |  12 +-
 drivers/staging/media/hantro/hantro_drv.c     |   6 +-
 .../media/hantro/hantro_g1_mpeg2_dec.c        |   6 +-
 .../media/hantro/rk3399_vpu_hw_mpeg2_dec.c    |   6 +-
 drivers/staging/media/sunxi/cedrus/cedrus.c   |   6 +-
 .../staging/media/sunxi/cedrus/cedrus_dec.c   |   6 +-
 include/media/mpeg2-ctrls.h                   | 126 ----------
 include/media/v4l2-ctrls.h                    |   1 -
 include/uapi/linux/v4l2-controls.h            | 112 +++++++++
 include/uapi/linux/videodev2.h                |   3 +
 14 files changed, 367 insertions(+), 369 deletions(-)
 delete mode 100644 include/media/mpeg2-ctrls.h

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
index 3fc04daa9ffbf..2aa508ffb6b9f 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
@@ -1244,3 +1244,217 @@ FWHT Flags
     * - __u8
       - ``padding[3]``
       - Applications and drivers must set this to zero.
+
+.. _v4l2-codec-stateless-mpeg2:
+
+``V4L2_CID_STATELESS_MPEG2_SEQUENCE (struct)``
+    Specifies the sequence parameters (as extracted from the bitstream) for the
+    associated MPEG-2 slice data. This includes fields matching the syntax
+    elements from the sequence header and sequence extension parts of the
+    bitstream as specified by :ref:`mpeg2part2`.
+
+.. c:type:: v4l2_ctrl_mpeg2_sequence
+
+.. raw:: latex
+
+    \small
+
+.. cssclass:: longtable
+
+.. tabularcolumns:: |p{1.4cm}|p{6.5cm}|p{9.4cm}|
+
+.. flat-table:: struct v4l2_ctrl_mpeg2_sequence
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u16
+      - ``horizontal_size``
+      - The width of the displayable part of the frame's luminance component.
+    * - __u16
+      - ``vertical_size``
+      - The height of the displayable part of the frame's luminance component.
+    * - __u32
+      - ``vbv_buffer_size``
+      - Used to calculate the required size of the video buffering verifier,
+	defined (in bits) as: 16 * 1024 * vbv_buffer_size.
+    * - __u16
+      - ``profile_and_level_indication``
+      - The current profile and level indication as extracted from the
+	bitstream.
+    * - __u8
+      - ``chroma_format``
+      - The chrominance sub-sampling format (1: 4:2:0, 2: 4:2:2, 3: 4:4:4).
+    * - __u8
+      - ``flags``
+      - See :ref:`MPEG-2 Sequence Flags <mpeg2_sequence_flags>`.
+
+.. _mpeg2_sequence_flags:
+
+``MPEG-2 Sequence Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE``
+      - 0x01
+      - Indication that all the frames for the sequence are progressive instead
+	of interlaced.
+
+.. raw:: latex
+
+    \normalsize
+
+``V4L2_CID_STATELESS_MPEG2_PICTURE (struct)``
+    Specifies the picture parameters (as extracted from the bitstream) for the
+    associated MPEG-2 slice data. This includes fields matching the syntax
+    elements from the picture header and picture coding extension parts of the
+    bitstream as specified by :ref:`mpeg2part2`.
+
+.. c:type:: v4l2_ctrl_mpeg2_picture
+
+.. raw:: latex
+
+    \small
+
+.. cssclass:: longtable
+
+.. tabularcolumns:: |p{1.0cm}|p{5.6cm}|p{10.7cm}|
+
+.. flat-table:: struct v4l2_ctrl_mpeg2_picture
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u64
+      - ``backward_ref_ts``
+      - Timestamp of the V4L2 capture buffer to use as backward reference, used
+        with B-coded and P-coded frames. The timestamp refers to the
+	``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
+	:c:func:`v4l2_timeval_to_ns()` function to convert the struct
+	:c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
+    * - __u64
+      - ``forward_ref_ts``
+      - Timestamp for the V4L2 capture buffer to use as forward reference, used
+        with B-coded frames. The timestamp refers to the ``timestamp`` field in
+	struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()`
+	function to convert the struct :c:type:`timeval` in struct
+	:c:type:`v4l2_buffer` to a __u64.
+    * - __u32
+      - ``flags``
+      - See :ref:`MPEG-2 Picture Flags <mpeg2_picture_flags>`.
+    * - __u8
+      - ``f_code[2][2]``
+      - Motion vector codes.
+    * - __u8
+      - ``picture_coding_type``
+      - Picture coding type for the frame covered by the current slice
+	(V4L2_MPEG2_PICTURE_CODING_TYPE_I, V4L2_MPEG2_PICTURE_CODING_TYPE_P or
+	V4L2_MPEG2_PICTURE_CODING_TYPE_B).
+    * - __u8
+      - ``picture_structure``
+      - Picture structure (1: interlaced top field, 2: interlaced bottom field,
+	3: progressive frame).
+    * - __u8
+      - ``intra_dc_precision``
+      - Precision of Discrete Cosine transform (0: 8 bits precision,
+	1: 9 bits precision, 2: 10 bits precision, 3: 11 bits precision).
+    * - __u8
+      - ``reserved[5]``
+      - Applications and drivers must set this to zero.
+
+.. _mpeg2_picture_flags:
+
+``MPEG-2 Picture Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST``
+      - 0x00000001
+      - If set and it's an interlaced stream, top field is output first.
+    * - ``V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT``
+      - 0x00000002
+      - If set only frame-DCT and frame prediction are used.
+    * - ``V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV``
+      - 0x00000004
+      -  If set motion vectors are coded for intra macroblocks.
+    * - ``V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE``
+      - 0x00000008
+      - This flag affects the inverse quantization process.
+    * - ``V4L2_MPEG2_PIC_FLAG_INTRA_VLC``
+      - 0x00000010
+      - This flag affects the decoding of transform coefficient data.
+    * - ``V4L2_MPEG2_PIC_FLAG_ALT_SCAN``
+      - 0x00000020
+      - This flag affects the decoding of transform coefficient data.
+    * - ``V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST``
+      - 0x00000040
+      - This flag affects the decoding process of progressive frames.
+    * - ``V4L2_MPEG2_PIC_FLAG_PROGRESSIVE``
+      - 0x00000080
+      - Indicates whether the current frame is progressive.
+
+.. raw:: latex
+
+    \normalsize
+
+``V4L2_CID_STATELESS_MPEG2_QUANTISATION (struct)``
+    Specifies quantisation matrices, in zigzag scanning order, for the
+    associated MPEG-2 slice data. This control is initialized by the kernel
+    to the matrices default values. If a bitstream transmits a user-defined
+    quantisation matrices load, applications are expected to use this control.
+    Applications are also expected to set the control loading the default
+    values, if the quantisation matrices need to be reset, for instance on a
+    sequence header. This process is specified by section 6.3.7.
+    "Quant matrix extension" of the specification.
+
+.. c:type:: v4l2_ctrl_mpeg2_quantisation
+
+.. tabularcolumns:: |p{0.8cm}|p{8.0cm}|p{8.5cm}|
+
+.. cssclass:: longtable
+
+.. raw:: latex
+
+    \small
+
+.. flat-table:: struct v4l2_ctrl_mpeg2_quantisation
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u8
+      - ``intra_quantiser_matrix[64]``
+      - The quantisation matrix coefficients for intra-coded frames, in zigzag
+	scanning order. It is relevant for both luma and chroma components,
+	although it can be superseded by the chroma-specific matrix for
+	non-4:2:0 YUV formats.
+    * - __u8
+      - ``non_intra_quantiser_matrix[64]``
+      - The quantisation matrix coefficients for non-intra-coded frames, in
+	zigzag scanning order. It is relevant for both luma and chroma
+	components, although it can be superseded by the chroma-specific matrix
+	for non-4:2:0 YUV formats.
+    * - __u8
+      - ``chroma_intra_quantiser_matrix[64]``
+      - The quantisation matrix coefficients for the chominance component of
+	intra-coded frames, in zigzag scanning order. Only relevant for
+	non-4:2:0 YUV formats.
+    * - __u8
+      - ``chroma_non_intra_quantiser_matrix[64]``
+      - The quantisation matrix coefficients for the chrominance component of
+	non-intra-coded frames, in zigzag scanning order. Only relevant for
+	non-4:2:0 YUV formats.
+
+.. raw:: latex
+
+    \normalsize
diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index f10b04fba2294..0b8061666c57c 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1606,222 +1606,6 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
 ``V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L6_BR (integer)``
     Indicates bit rate (bps) for hierarchical coding layer 6 for H264 encoder.
 
-.. _v4l2-mpeg-mpeg2:
-
-``V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE (struct)``
-    Specifies the sequence parameters (as extracted from the bitstream) for the
-    associated MPEG-2 slice data. This includes fields matching the syntax
-    elements from the sequence header and sequence extension parts of the
-    bitstream as specified by :ref:`mpeg2part2`.
-
-    .. note::
-
-       This compound control is not yet part of the public kernel API and
-       it is expected to change.
-
-.. c:type:: v4l2_ctrl_mpeg2_sequence
-
-.. cssclass:: longtable
-
-.. tabularcolumns:: |p{1.4cm}|p{6.5cm}|p{9.4cm}|
-
-.. flat-table:: struct v4l2_ctrl_mpeg2_sequence
-    :header-rows:  0
-    :stub-columns: 0
-    :widths:       1 1 2
-
-    * - __u16
-      - ``horizontal_size``
-      - The width of the displayable part of the frame's luminance component.
-    * - __u16
-      - ``vertical_size``
-      - The height of the displayable part of the frame's luminance component.
-    * - __u32
-      - ``vbv_buffer_size``
-      - Used to calculate the required size of the video buffering verifier,
-	defined (in bits) as: 16 * 1024 * vbv_buffer_size.
-    * - __u16
-      - ``profile_and_level_indication``
-      - The current profile and level indication as extracted from the
-	bitstream.
-    * - __u8
-      - ``chroma_format``
-      - The chrominance sub-sampling format (1: 4:2:0, 2: 4:2:2, 3: 4:4:4).
-    * - __u8
-      - ``flags``
-      - See :ref:`MPEG-2 Sequence Flags <mpeg2_sequence_flags>`.
-
-.. _mpeg2_sequence_flags:
-
-``MPEG-2 Sequence Flags``
-
-.. cssclass:: longtable
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-    :widths:       1 1 2
-
-    * - ``V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE``
-      - 0x01
-      - Indication that all the frames for the sequence are progressive instead
-	of interlaced.
-
-``V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE (struct)``
-    Specifies the picture parameters (as extracted from the bitstream) for the
-    associated MPEG-2 slice data. This includes fields matching the syntax
-    elements from the picture header and picture coding extension parts of the
-    bitstream as specified by :ref:`mpeg2part2`.
-
-    .. note::
-
-       This compound control is not yet part of the public kernel API and
-       it is expected to change.
-
-.. c:type:: v4l2_ctrl_mpeg2_picture
-
-.. raw:: latex
-
-    \small
-
-.. cssclass:: longtable
-
-.. tabularcolumns:: |p{1.0cm}|p{5.6cm}|p{10.7cm}|
-
-.. flat-table:: struct v4l2_ctrl_mpeg2_picture
-    :header-rows:  0
-    :stub-columns: 0
-    :widths:       1 1 2
-
-    * - __u64
-      - ``backward_ref_ts``
-      - Timestamp of the V4L2 capture buffer to use as backward reference, used
-        with B-coded and P-coded frames. The timestamp refers to the
-	``timestamp`` field in struct :c:type:`v4l2_buffer`. Use the
-	:c:func:`v4l2_timeval_to_ns()` function to convert the struct
-	:c:type:`timeval` in struct :c:type:`v4l2_buffer` to a __u64.
-    * - __u64
-      - ``forward_ref_ts``
-      - Timestamp for the V4L2 capture buffer to use as forward reference, used
-        with B-coded frames. The timestamp refers to the ``timestamp`` field in
-	struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()`
-	function to convert the struct :c:type:`timeval` in struct
-	:c:type:`v4l2_buffer` to a __u64.
-    * - __u32
-      - ``flags``
-      - See :ref:`MPEG-2 Picture Flags <mpeg2_picture_flags>`.
-    * - __u8
-      - ``f_code[2][2]``
-      - Motion vector codes.
-    * - __u8
-      - ``picture_coding_type``
-      - Picture coding type for the frame covered by the current slice
-	(V4L2_MPEG2_PICTURE_CODING_TYPE_I, V4L2_MPEG2_PICTURE_CODING_TYPE_P or
-	V4L2_MPEG2_PICTURE_CODING_TYPE_B).
-    * - __u8
-      - ``picture_structure``
-      - Picture structure (1: interlaced top field, 2: interlaced bottom field,
-	3: progressive frame).
-    * - __u8
-      - ``intra_dc_precision``
-      - Precision of Discrete Cosine transform (0: 8 bits precision,
-	1: 9 bits precision, 2: 10 bits precision, 3: 11 bits precision).
-    * - __u8
-      - ``reserved[5]``
-      - Applications and drivers must set this to zero.
-
-
-.. _mpeg2_picture_flags:
-
-``MPEG-2 Picture Flags``
-
-.. cssclass:: longtable
-
-.. flat-table::
-    :header-rows:  0
-    :stub-columns: 0
-    :widths:       1 1 2
-
-    * - ``V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST``
-      - 0x00000001
-      - If set and it's an interlaced stream, top field is output first.
-    * - ``V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT``
-      - 0x00000002
-      - If set only frame-DCT and frame prediction are used.
-    * - ``V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV``
-      - 0x00000004
-      -  If set motion vectors are coded for intra macroblocks.
-    * - ``V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE``
-      - 0x00000008
-      - This flag affects the inverse quantization process.
-    * - ``V4L2_MPEG2_PIC_FLAG_INTRA_VLC``
-      - 0x00000010
-      - This flag affects the decoding of transform coefficient data.
-    * - ``V4L2_MPEG2_PIC_FLAG_ALT_SCAN``
-      - 0x00000020
-      - This flag affects the decoding of transform coefficient data.
-    * - ``V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST``
-      - 0x00000040
-      - This flag affects the decoding process of progressive frames.
-    * - ``V4L2_MPEG2_PIC_FLAG_PROGRESSIVE``
-      - 0x00000080
-      - Indicates whether the current frame is progressive.
-
-.. raw:: latex
-
-    \normalsize
-
-``V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION (struct)``
-    Specifies quantisation matrices (as extracted from the bitstream) for the
-    associated MPEG-2 slice data.
-
-    .. note::
-
-       This compound control is not yet part of the public kernel API and
-       it is expected to change.
-
-.. c:type:: v4l2_ctrl_mpeg2_quantisation
-
-.. tabularcolumns:: |p{0.8cm}|p{8.0cm}|p{8.5cm}|
-
-.. cssclass:: longtable
-
-.. raw:: latex
-
-    \small
-
-.. flat-table:: struct v4l2_ctrl_mpeg2_quantisation
-    :header-rows:  0
-    :stub-columns: 0
-    :widths:       1 1 2
-
-    * - __u8
-      - ``intra_quantiser_matrix[64]``
-      - The quantisation matrix coefficients for intra-coded frames, in zigzag
-	scanning order. It is relevant for both luma and chroma components,
-	although it can be superseded by the chroma-specific matrix for
-	non-4:2:0 YUV formats.
-    * - __u8
-      - ``non_intra_quantiser_matrix[64]``
-      - The quantisation matrix coefficients for non-intra-coded frames, in
-	zigzag scanning order. It is relevant for both luma and chroma
-	components, although it can be superseded by the chroma-specific matrix
-	for non-4:2:0 YUV formats.
-    * - __u8
-      - ``chroma_intra_quantiser_matrix[64]``
-      - The quantisation matrix coefficients for the chominance component of
-	intra-coded frames, in zigzag scanning order. Only relevant for
-	non-4:2:0 YUV formats.
-    * - __u8
-      - ``chroma_non_intra_quantiser_matrix[64]``
-      - The quantisation matrix coefficients for the chrominance component of
-	non-intra-coded frames, in zigzag scanning order. Only relevant for
-	non-4:2:0 YUV formats.
-
-.. raw:: latex
-
-    \normalsize
-
 ``V4L2_CID_FWHT_I_FRAME_QP (integer)``
     Quantization parameter for an I frame for FWHT. Valid range: from 1
     to 31.
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
index 6c10a062adace..0ede39907ee2f 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-compressed.rst
@@ -112,13 +112,13 @@ Compressed Formats
       - 'MG2S'
       - MPEG-2 parsed slice data, as extracted from the MPEG-2 bitstream.
 	This format is adapted for stateless video decoders that implement a
-	MPEG-2 pipeline (using the :ref:`mem2mem` and :ref:`media-request-api`).
+	MPEG-2 pipeline with the :ref:`stateless_decoder`.
 	Metadata associated with the frame to decode is required to be passed
-	through the ``V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE`` and
-        ``V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE`` controls.
+	through the ``V4L2_CID_STATELESS_MPEG2_SEQUENCE`` and
+        ``V4L2_CID_STATELESS_MPEG2_PICTURE`` controls.
         Quantisation matrices can optionally be specified through the
-	``V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION`` control.
-	See the :ref:`associated Codec Control IDs <v4l2-mpeg-mpeg2>`.
+	``V4L2_CID_STATELESS_MPEG2_QUANTISATION`` control.
+	See the :ref:`associated Codec Control IDs <v4l2-codec-stateless-mpeg2>`.
 	Exactly one output and one capture buffer must be provided for use with
 	this pixel format. The output buffer must contain the appropriate number
 	of macroblocks to decode a full corresponding frame to the matching
diff --git a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
index 3ba22983d21f3..2d6bc8d943806 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-g-ext-ctrls.rst
@@ -221,6 +221,18 @@ still cause this situation.
       - ``p_vp8_frame``
       - A pointer to a struct :c:type:`v4l2_ctrl_vp8_frame`. Valid if this control is
         of type ``V4L2_CTRL_TYPE_VP8_FRAME``.
+    * - struct :c:type:`v4l2_ctrl_mpeg2_sequence` *
+      - ``p_mpeg2_sequence``
+      - A pointer to a struct :c:type:`v4l2_ctrl_mpeg2_sequence`. Valid if this control is
+        of type ``V4L2_CTRL_TYPE_MPEG2_SEQUENCE``.
+    * - struct :c:type:`v4l2_ctrl_mpeg2_picture` *
+      - ``p_mpeg2_picture``
+      - A pointer to a struct :c:type:`v4l2_ctrl_mpeg2_picture`. Valid if this control is
+        of type ``V4L2_CTRL_TYPE_MPEG2_PICTURE``.
+    * - struct :c:type:`v4l2_ctrl_mpeg2_quantisation` *
+      - ``p_mpeg2_quantisation``
+      - A pointer to a struct :c:type:`v4l2_ctrl_mpeg2_quantisation`. Valid if this control is
+        of type ``V4L2_CTRL_TYPE_MPEG2_QUANTISATION``.
     * - struct :c:type:`v4l2_ctrl_hdr10_cll_info` *
       - ``p_hdr10_cll``
       - A pointer to a struct :c:type:`v4l2_ctrl_hdr10_cll_info`. Valid if this control is
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index a693ff8dc3dc6..d4e2c7318ee6d 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -977,9 +977,6 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_LTR_COUNT:			return "LTR Count";
 	case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:		return "Frame LTR Index";
 	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:		return "Use LTR Frames";
-	case V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE:		return "MPEG-2 Sequence Header";
-	case V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE:			return "MPEG-2 Picture Header";
-	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION:		return "MPEG-2 Quantisation Matrices";
 	case V4L2_CID_FWHT_I_FRAME_QP:				return "FWHT I-Frame QP Value";
 	case V4L2_CID_FWHT_P_FRAME_QP:				return "FWHT P-Frame QP Value";
 
@@ -1228,6 +1225,9 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_STATELESS_H264_DECODE_PARAMS:		return "H264 Decode Parameters";
 	case V4L2_CID_STATELESS_FWHT_PARAMS:			return "FWHT Stateless Parameters";
 	case V4L2_CID_STATELESS_VP8_FRAME:			return "VP8 Frame Parameters";
+	case V4L2_CID_STATELESS_MPEG2_SEQUENCE:			return "MPEG-2 Sequence Header";
+	case V4L2_CID_STATELESS_MPEG2_PICTURE:			return "MPEG-2 Picture Header";
+	case V4L2_CID_STATELESS_MPEG2_QUANTISATION:		return "MPEG-2 Quantisation Matrices";
 
 	/* Colorimetry controls */
 	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
@@ -1500,13 +1500,13 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_RDS_TX_ALT_FREQS:
 		*type = V4L2_CTRL_TYPE_U32;
 		break;
-	case V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE:
+	case V4L2_CID_STATELESS_MPEG2_SEQUENCE:
 		*type = V4L2_CTRL_TYPE_MPEG2_SEQUENCE;
 		break;
-	case V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE:
+	case V4L2_CID_STATELESS_MPEG2_PICTURE:
 		*type = V4L2_CTRL_TYPE_MPEG2_PICTURE;
 		break;
-	case V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION:
+	case V4L2_CID_STATELESS_MPEG2_QUANTISATION:
 		*type = V4L2_CTRL_TYPE_MPEG2_QUANTISATION;
 		break;
 	case V4L2_CID_STATELESS_FWHT_PARAMS:
diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index dc9478ac7141f..2f6b01c7a6a0f 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -298,17 +298,17 @@ static const struct hantro_ctrl controls[] = {
 	}, {
 		.codec = HANTRO_MPEG2_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE,
+			.id = V4L2_CID_STATELESS_MPEG2_SEQUENCE,
 		},
 	}, {
 		.codec = HANTRO_MPEG2_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE,
+			.id = V4L2_CID_STATELESS_MPEG2_PICTURE,
 		},
 	}, {
 		.codec = HANTRO_MPEG2_DECODER,
 		.cfg = {
-			.id = V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION,
+			.id = V4L2_CID_STATELESS_MPEG2_QUANTISATION,
 		},
 	}, {
 		.codec = HANTRO_VP8_DECODER,
diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
index 25d912cbe2ffa..6ee1a19d189b8 100644
--- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
@@ -83,7 +83,7 @@ hantro_g1_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
 {
 	struct v4l2_ctrl_mpeg2_quantisation *q;
 
-	q = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION);
+	q = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_MPEG2_QUANTISATION);
 	hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu, q);
 	vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma, G1_REG_QTABLE_BASE);
 }
@@ -160,9 +160,9 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 	hantro_start_prepare_run(ctx);
 
 	seq = hantro_get_ctrl(ctx,
-			      V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE);
+			      V4L2_CID_STATELESS_MPEG2_SEQUENCE);
 	pic = hantro_get_ctrl(ctx,
-			      V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE);
+			      V4L2_CID_STATELESS_MPEG2_PICTURE);
 
 	reg = G1_REG_DEC_AXI_RD_ID(0) |
 	      G1_REG_DEC_TIMEOUT_E(1) |
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
index d16d76760278c..2527dce7eb18f 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
@@ -85,7 +85,7 @@ rk3399_vpu_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
 {
 	struct v4l2_ctrl_mpeg2_quantisation *q;
 
-	q = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION);
+	q = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_MPEG2_QUANTISATION);
 	hantro_mpeg2_dec_copy_qtable(ctx->mpeg2_dec.qtable.cpu, q);
 	vdpu_write_relaxed(vpu, ctx->mpeg2_dec.qtable.dma, VDPU_REG_QTABLE_BASE);
 }
@@ -162,9 +162,9 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 	hantro_start_prepare_run(ctx);
 
 	seq = hantro_get_ctrl(ctx,
-			      V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE);
+			      V4L2_CID_STATELESS_MPEG2_SEQUENCE);
 	pic = hantro_get_ctrl(ctx,
-			      V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE);
+			      V4L2_CID_STATELESS_MPEG2_PICTURE);
 
 	reg = VDPU_REG_DEC_ADV_PRE_DIS(0) |
 	      VDPU_REG_DEC_SCMD_DIS(0) |
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 4430c8fa2cc7d..fa348c09f8444 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -31,19 +31,19 @@
 static const struct cedrus_control cedrus_controls[] = {
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE,
+			.id	= V4L2_CID_STATELESS_MPEG2_SEQUENCE,
 		},
 		.codec		= CEDRUS_CODEC_MPEG2,
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE,
+			.id	= V4L2_CID_STATELESS_MPEG2_PICTURE,
 		},
 		.codec		= CEDRUS_CODEC_MPEG2,
 	},
 	{
 		.cfg = {
-			.id	= V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION,
+			.id	= V4L2_CID_STATELESS_MPEG2_QUANTISATION,
 		},
 		.codec		= CEDRUS_CODEC_MPEG2,
 	},
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index e98185c1f5a73..97e410d925068 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -41,11 +41,11 @@ void cedrus_device_run(void *priv)
 	switch (ctx->src_fmt.pixelformat) {
 	case V4L2_PIX_FMT_MPEG2_SLICE:
 		run.mpeg2.sequence = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE);
+			V4L2_CID_STATELESS_MPEG2_SEQUENCE);
 		run.mpeg2.picture = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE);
+			V4L2_CID_STATELESS_MPEG2_PICTURE);
 		run.mpeg2.quantisation = cedrus_find_control_data(ctx,
-			V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION);
+			V4L2_CID_STATELESS_MPEG2_QUANTISATION);
 		break;
 
 	case V4L2_PIX_FMT_H264_SLICE:
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
deleted file mode 100644
index a3d19de9e53ab..0000000000000
--- a/include/media/mpeg2-ctrls.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * These are the MPEG2 state controls for use with stateless MPEG-2
- * codec drivers.
- *
- * It turns out that these structs are not stable yet and will undergo
- * more changes. So keep them private until they are stable and ready to
- * become part of the official public API.
- */
-
-#ifndef _MPEG2_CTRLS_H_
-#define _MPEG2_CTRLS_H_
-
-#define V4L2_CID_MPEG_VIDEO_MPEG2_QUANTISATION		(V4L2_CID_CODEC_BASE+251)
-#define V4L2_CID_MPEG_VIDEO_MPEG2_SEQUENCE		(V4L2_CID_CODEC_BASE+252)
-#define V4L2_CID_MPEG_VIDEO_MPEG2_PICTURE		(V4L2_CID_CODEC_BASE+253)
-
-/* enum v4l2_ctrl_type type values */
-#define V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE		0x01
-
-/**
- * struct v4l2_ctrl_mpeg2_sequence - MPEG-2 sequence header
- *
- * All the members on this structure match the sequence header and sequence
- * extension syntaxes as specified by the MPEG-2 specification.
- *
- * Fields horizontal_size, vertical_size and vbv_buffer_size are a
- * combination of respective _value and extension syntax elements,
- * as described in section 6.3.3 "Sequence header".
- *
- * @horizontal_size: combination of elements horizontal_size_value and
- * horizontal_size_extension.
- * @vertical_size: combination of elements vertical_size_value and
- * vertical_size_extension.
- * @vbv_buffer_size: combination of elements vbv_buffer_size_value and
- * vbv_buffer_size_extension.
- * @profile_and_level_indication: see MPEG-2 specification.
- * @chroma_format: see MPEG-2 specification.
- * @flags: see V4L2_MPEG2_SEQ_FLAG_{}.
- */
-struct v4l2_ctrl_mpeg2_sequence {
-	__u16	horizontal_size;
-	__u16	vertical_size;
-	__u32	vbv_buffer_size;
-	__u16	profile_and_level_indication;
-	__u8	chroma_format;
-	__u8	flags;
-};
-
-#define V4L2_MPEG2_PIC_CODING_TYPE_I			1
-#define V4L2_MPEG2_PIC_CODING_TYPE_P			2
-#define V4L2_MPEG2_PIC_CODING_TYPE_B			3
-#define V4L2_MPEG2_PIC_CODING_TYPE_D			4
-
-#define V4L2_MPEG2_PIC_TOP_FIELD			0x1
-#define V4L2_MPEG2_PIC_BOTTOM_FIELD			0x2
-#define V4L2_MPEG2_PIC_FRAME				0x3
-
-#define V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST		0x0001
-#define V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT		0x0002
-#define V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV		0x0004
-#define V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE		0x0008
-#define V4L2_MPEG2_PIC_FLAG_INTRA_VLC			0x0010
-#define V4L2_MPEG2_PIC_FLAG_ALT_SCAN			0x0020
-#define V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST		0x0040
-#define V4L2_MPEG2_PIC_FLAG_PROGRESSIVE			0x0080
-
-/**
- * struct v4l2_ctrl_mpeg2_picture - MPEG-2 picture header
- *
- * All the members on this structure match the picture header and picture
- * coding extension syntaxes as specified by the MPEG-2 specification.
- *
- * @backward_ref_ts: timestamp of the V4L2 capture buffer to use as
- * reference for backward prediction.
- * @forward_ref_ts: timestamp of the V4L2 capture buffer to use as
- * reference for forward prediction. These timestamp refers to the
- * timestamp field in struct v4l2_buffer. Use v4l2_timeval_to_ns()
- * to convert the struct timeval to a __u64.
- * @flags: see V4L2_MPEG2_PIC_FLAG_{}.
- * @f_code[2][2]: see MPEG-2 specification.
- * @picture_coding_type: see MPEG-2 specification.
- * @picture_structure: see V4L2_MPEG2_PIC_{}_FIELD.
- * @intra_dc_precision: see MPEG-2 specification.
- * @reserved: padding field. Should be zeroed by applications.
- */
-struct v4l2_ctrl_mpeg2_picture {
-	__u64	backward_ref_ts;
-	__u64	forward_ref_ts;
-	__u32	flags;
-	__u8	f_code[2][2];
-	__u8	picture_coding_type;
-	__u8	picture_structure;
-	__u8	intra_dc_precision;
-	__u8	reserved[5];
-};
-
-/**
- * struct v4l2_ctrl_mpeg2_quantisation - MPEG-2 quantisation
- *
- * Quantization matrices as specified by section 6.3.7
- * "Quant matrix extension".
- *
- * @intra_quantiser_matrix: The quantisation matrix coefficients
- * for intra-coded frames, in zigzag scanning order. It is relevant
- * for both luma and chroma components, although it can be superseded
- * by the chroma-specific matrix for non-4:2:0 YUV formats.
- * @non_intra_quantiser_matrix: The quantisation matrix coefficients
- * for non-intra-coded frames, in zigzag scanning order. It is relevant
- * for both luma and chroma components, although it can be superseded
- * by the chroma-specific matrix for non-4:2:0 YUV formats.
- * @chroma_intra_quantiser_matrix: The quantisation matrix coefficients
- * for the chominance component of intra-coded frames, in zigzag scanning
- * order. Only relevant for 4:2:2 and 4:4:4 YUV formats.
- * @chroma_non_intra_quantiser_matrix: The quantisation matrix coefficients
- * for the chrominance component of non-intra-coded frames, in zigzag scanning
- * order. Only relevant for 4:2:2 and 4:4:4 YUV formats.
- */
-struct v4l2_ctrl_mpeg2_quantisation {
-	__u8	intra_quantiser_matrix[64];
-	__u8	non_intra_quantiser_matrix[64];
-	__u8	chroma_intra_quantiser_matrix[64];
-	__u8	chroma_non_intra_quantiser_matrix[64];
-};
-
-#endif
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 215e44172c66c..575b59fbac779 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -17,7 +17,6 @@
  * Include the stateless codec compound control definitions.
  * This will move to the public headers once this API is fully stable.
  */
-#include <media/mpeg2-ctrls.h>
 #include <media/hevc-ctrls.h>
 
 /* forward references */
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index d43bec5f1afd0..f96bea19c9916 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -1862,6 +1862,118 @@ struct v4l2_ctrl_vp8_frame {
 	__u64 flags;
 };
 
+/* Stateless MPEG-2 controls */
+
+#define V4L2_MPEG2_SEQ_FLAG_PROGRESSIVE	0x01
+
+#define V4L2_CID_STATELESS_MPEG2_SEQUENCE (V4L2_CID_CODEC_STATELESS_BASE+220)
+/**
+ * struct v4l2_ctrl_mpeg2_sequence - MPEG-2 sequence header
+ *
+ * All the members on this structure match the sequence header and sequence
+ * extension syntaxes as specified by the MPEG-2 specification.
+ *
+ * Fields horizontal_size, vertical_size and vbv_buffer_size are a
+ * combination of respective _value and extension syntax elements,
+ * as described in section 6.3.3 "Sequence header".
+ *
+ * @horizontal_size: combination of elements horizontal_size_value and
+ * horizontal_size_extension.
+ * @vertical_size: combination of elements vertical_size_value and
+ * vertical_size_extension.
+ * @vbv_buffer_size: combination of elements vbv_buffer_size_value and
+ * vbv_buffer_size_extension.
+ * @profile_and_level_indication: see MPEG-2 specification.
+ * @chroma_format: see MPEG-2 specification.
+ * @flags: see V4L2_MPEG2_SEQ_FLAG_{}.
+ */
+struct v4l2_ctrl_mpeg2_sequence {
+	__u16	horizontal_size;
+	__u16	vertical_size;
+	__u32	vbv_buffer_size;
+	__u16	profile_and_level_indication;
+	__u8	chroma_format;
+	__u8	flags;
+};
+
+#define V4L2_MPEG2_PIC_CODING_TYPE_I			1
+#define V4L2_MPEG2_PIC_CODING_TYPE_P			2
+#define V4L2_MPEG2_PIC_CODING_TYPE_B			3
+#define V4L2_MPEG2_PIC_CODING_TYPE_D			4
+
+#define V4L2_MPEG2_PIC_TOP_FIELD			0x1
+#define V4L2_MPEG2_PIC_BOTTOM_FIELD			0x2
+#define V4L2_MPEG2_PIC_FRAME				0x3
+
+#define V4L2_MPEG2_PIC_FLAG_TOP_FIELD_FIRST		0x0001
+#define V4L2_MPEG2_PIC_FLAG_FRAME_PRED_DCT		0x0002
+#define V4L2_MPEG2_PIC_FLAG_CONCEALMENT_MV		0x0004
+#define V4L2_MPEG2_PIC_FLAG_Q_SCALE_TYPE		0x0008
+#define V4L2_MPEG2_PIC_FLAG_INTRA_VLC			0x0010
+#define V4L2_MPEG2_PIC_FLAG_ALT_SCAN			0x0020
+#define V4L2_MPEG2_PIC_FLAG_REPEAT_FIRST		0x0040
+#define V4L2_MPEG2_PIC_FLAG_PROGRESSIVE			0x0080
+
+#define V4L2_CID_STATELESS_MPEG2_PICTURE (V4L2_CID_CODEC_STATELESS_BASE+221)
+/**
+ * struct v4l2_ctrl_mpeg2_picture - MPEG-2 picture header
+ *
+ * All the members on this structure match the picture header and picture
+ * coding extension syntaxes as specified by the MPEG-2 specification.
+ *
+ * @backward_ref_ts: timestamp of the V4L2 capture buffer to use as
+ * reference for backward prediction.
+ * @forward_ref_ts: timestamp of the V4L2 capture buffer to use as
+ * reference for forward prediction. These timestamp refers to the
+ * timestamp field in struct v4l2_buffer. Use v4l2_timeval_to_ns()
+ * to convert the struct timeval to a __u64.
+ * @flags: see V4L2_MPEG2_PIC_FLAG_{}.
+ * @f_code: see MPEG-2 specification.
+ * @picture_coding_type: see MPEG-2 specification.
+ * @picture_structure: see V4L2_MPEG2_PIC_{}_FIELD.
+ * @intra_dc_precision: see MPEG-2 specification.
+ * @reserved: padding field. Should be zeroed by applications.
+ */
+struct v4l2_ctrl_mpeg2_picture {
+	__u64	backward_ref_ts;
+	__u64	forward_ref_ts;
+	__u32	flags;
+	__u8	f_code[2][2];
+	__u8	picture_coding_type;
+	__u8	picture_structure;
+	__u8	intra_dc_precision;
+	__u8	reserved[5];
+};
+
+#define V4L2_CID_STATELESS_MPEG2_QUANTISATION (V4L2_CID_CODEC_STATELESS_BASE+222)
+/**
+ * struct v4l2_ctrl_mpeg2_quantisation - MPEG-2 quantisation
+ *
+ * Quantisation matrices as specified by section 6.3.7
+ * "Quant matrix extension".
+ *
+ * @intra_quantiser_matrix: The quantisation matrix coefficients
+ * for intra-coded frames, in zigzag scanning order. It is relevant
+ * for both luma and chroma components, although it can be superseded
+ * by the chroma-specific matrix for non-4:2:0 YUV formats.
+ * @non_intra_quantiser_matrix: The quantisation matrix coefficients
+ * for non-intra-coded frames, in zigzag scanning order. It is relevant
+ * for both luma and chroma components, although it can be superseded
+ * by the chroma-specific matrix for non-4:2:0 YUV formats.
+ * @chroma_intra_quantiser_matrix: The quantisation matrix coefficients
+ * for the chominance component of intra-coded frames, in zigzag scanning
+ * order. Only relevant for 4:2:2 and 4:4:4 YUV formats.
+ * @chroma_non_intra_quantiser_matrix: The quantisation matrix coefficients
+ * for the chrominance component of non-intra-coded frames, in zigzag scanning
+ * order. Only relevant for 4:2:2 and 4:4:4 YUV formats.
+ */
+struct v4l2_ctrl_mpeg2_quantisation {
+	__u8	intra_quantiser_matrix[64];
+	__u8	non_intra_quantiser_matrix[64];
+	__u8	chroma_intra_quantiser_matrix[64];
+	__u8	chroma_non_intra_quantiser_matrix[64];
+};
+
 #define V4L2_CID_COLORIMETRY_CLASS_BASE	(V4L2_CTRL_CLASS_COLORIMETRY | 0x900)
 #define V4L2_CID_COLORIMETRY_CLASS	(V4L2_CTRL_CLASS_COLORIMETRY | 1)
 
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index d3bb18a3a51bd..9260791b8438f 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1747,6 +1747,9 @@ struct v4l2_ext_control {
 		struct v4l2_ctrl_h264_decode_params __user *p_h264_decode_params;
 		struct v4l2_ctrl_fwht_params __user *p_fwht_params;
 		struct v4l2_ctrl_vp8_frame __user *p_vp8_frame;
+		struct v4l2_ctrl_mpeg2_sequence __user *p_mpeg2_sequence;
+		struct v4l2_ctrl_mpeg2_picture __user *p_mpeg2_picture;
+		struct v4l2_ctrl_mpeg2_quantisation __user *p_mpeg2_quantisation;
 		void __user *ptr;
 	};
 } __attribute__ ((packed));
-- 
GitLab


From d2fcc9c2de1191ea80366e3658711753738dd10a Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:29:52 +0200
Subject: [PATCH 1404/3804] media: imx: imx7_mipi_csis: Fix logging of only
 error event counters

The mipi_csis_events array ends with 6 non-error events, not 4. Update
mipi_csis_log_counters() accordingly. While at it, log event counters in
forward order, as there's no reason to log them backward.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Reviewed-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 1dc680d94a46a..47e3175729c02 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -666,13 +666,15 @@ static void mipi_csis_clear_counters(struct csi_state *state)
 
 static void mipi_csis_log_counters(struct csi_state *state, bool non_errors)
 {
-	int i = non_errors ? MIPI_CSIS_NUM_EVENTS : MIPI_CSIS_NUM_EVENTS - 4;
+	unsigned int num_events = non_errors ? MIPI_CSIS_NUM_EVENTS
+				: MIPI_CSIS_NUM_EVENTS - 6;
 	struct device *dev = &state->pdev->dev;
 	unsigned long flags;
+	unsigned int i;
 
 	spin_lock_irqsave(&state->slock, flags);
 
-	for (i--; i >= 0; i--) {
+	for (i = 0; i < num_events; ++i) {
 		if (state->events[i].counter > 0 || state->debug)
 			dev_info(dev, "%s events: %d\n", state->events[i].name,
 				 state->events[i].counter);
-- 
GitLab


From 7fe1de81ddda28f584e55b847bc4f036e95c8ed2 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:29:53 +0200
Subject: [PATCH 1405/3804] media: imx: imx7_mipi_csis: Count the CSI-2 debug
 interrupts

In addition to the main interrupts that flag errors and other events,
the CSI-2 receiver has debug interrupt sources that flag various events
useful for debugging. Add those sources to the event counter mechanism
and print them when debugging is enabled.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Reviewed-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 69 ++++++++++++++++------
 1 file changed, 51 insertions(+), 18 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 47e3175729c02..4d1ac228eb598 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -195,6 +195,24 @@
 
 /* Debug control register */
 #define MIPI_CSIS_DBG_CTRL			0xc0
+#define MIPI_CSIS_DBG_INTR_MSK			0xc4
+#define MIPI_CSIS_DBG_INTR_MSK_DT_NOT_SUPPORT	BIT(25)
+#define MIPI_CSIS_DBG_INTR_MSK_DT_IGNORE	BIT(24)
+#define MIPI_CSIS_DBG_INTR_MSK_ERR_FRAME_SIZE	BIT(20)
+#define MIPI_CSIS_DBG_INTR_MSK_TRUNCATED_FRAME	BIT(16)
+#define MIPI_CSIS_DBG_INTR_MSK_EARLY_FE		BIT(12)
+#define MIPI_CSIS_DBG_INTR_MSK_EARLY_FS		BIT(8)
+#define MIPI_CSIS_DBG_INTR_MSK_CAM_VSYNC_FALL	BIT(4)
+#define MIPI_CSIS_DBG_INTR_MSK_CAM_VSYNC_RISE	BIT(0)
+#define MIPI_CSIS_DBG_INTR_SRC			0xc8
+#define MIPI_CSIS_DBG_INTR_SRC_DT_NOT_SUPPORT	BIT(25)
+#define MIPI_CSIS_DBG_INTR_SRC_DT_IGNORE	BIT(24)
+#define MIPI_CSIS_DBG_INTR_SRC_ERR_FRAME_SIZE	BIT(20)
+#define MIPI_CSIS_DBG_INTR_SRC_TRUNCATED_FRAME	BIT(16)
+#define MIPI_CSIS_DBG_INTR_SRC_EARLY_FE		BIT(12)
+#define MIPI_CSIS_DBG_INTR_SRC_EARLY_FS		BIT(8)
+#define MIPI_CSIS_DBG_INTR_SRC_CAM_VSYNC_FALL	BIT(4)
+#define MIPI_CSIS_DBG_INTR_SRC_CAM_VSYNC_RISE	BIT(0)
 
 /* Non-image packet data buffers */
 #define MIPI_CSIS_PKTDATA_ODD			0x2000
@@ -210,6 +228,7 @@ enum {
 };
 
 struct mipi_csis_event {
+	bool debug;
 	u32 mask;
 	const char * const name;
 	unsigned int counter;
@@ -217,22 +236,30 @@ struct mipi_csis_event {
 
 static const struct mipi_csis_event mipi_csis_events[] = {
 	/* Errors */
-	{ MIPI_CSIS_INT_SRC_ERR_SOT_HS,		"SOT Error" },
-	{ MIPI_CSIS_INT_SRC_ERR_LOST_FS,	"Lost Frame Start Error" },
-	{ MIPI_CSIS_INT_SRC_ERR_LOST_FE,	"Lost Frame End Error" },
-	{ MIPI_CSIS_INT_SRC_ERR_OVER,		"FIFO Overflow Error" },
-	{ MIPI_CSIS_INT_SRC_ERR_WRONG_CFG,	"Wrong Configuration Error" },
-	{ MIPI_CSIS_INT_SRC_ERR_ECC,		"ECC Error" },
-	{ MIPI_CSIS_INT_SRC_ERR_CRC,		"CRC Error" },
-	{ MIPI_CSIS_INT_SRC_ERR_UNKNOWN,	"Unknown Error" },
+	{ false, MIPI_CSIS_INT_SRC_ERR_SOT_HS,		"SOT Error" },
+	{ false, MIPI_CSIS_INT_SRC_ERR_LOST_FS,		"Lost Frame Start Error" },
+	{ false, MIPI_CSIS_INT_SRC_ERR_LOST_FE,		"Lost Frame End Error" },
+	{ false, MIPI_CSIS_INT_SRC_ERR_OVER,		"FIFO Overflow Error" },
+	{ false, MIPI_CSIS_INT_SRC_ERR_WRONG_CFG,	"Wrong Configuration Error" },
+	{ false, MIPI_CSIS_INT_SRC_ERR_ECC,		"ECC Error" },
+	{ false, MIPI_CSIS_INT_SRC_ERR_CRC,		"CRC Error" },
+	{ false, MIPI_CSIS_INT_SRC_ERR_UNKNOWN,		"Unknown Error" },
+	{ true, MIPI_CSIS_DBG_INTR_SRC_DT_NOT_SUPPORT,	"Data Type Not Supported" },
+	{ true, MIPI_CSIS_DBG_INTR_SRC_DT_IGNORE,	"Data Type Ignored" },
+	{ true, MIPI_CSIS_DBG_INTR_SRC_ERR_FRAME_SIZE,	"Frame Size Error" },
+	{ true, MIPI_CSIS_DBG_INTR_SRC_TRUNCATED_FRAME,	"Truncated Frame" },
+	{ true, MIPI_CSIS_DBG_INTR_SRC_EARLY_FE,	"Early Frame End" },
+	{ true, MIPI_CSIS_DBG_INTR_SRC_EARLY_FS,	"Early Frame Start" },
 	/* Non-image data receive events */
-	{ MIPI_CSIS_INT_SRC_EVEN_BEFORE,	"Non-image data before even frame" },
-	{ MIPI_CSIS_INT_SRC_EVEN_AFTER,		"Non-image data after even frame" },
-	{ MIPI_CSIS_INT_SRC_ODD_BEFORE,		"Non-image data before odd frame" },
-	{ MIPI_CSIS_INT_SRC_ODD_AFTER,		"Non-image data after odd frame" },
+	{ false, MIPI_CSIS_INT_SRC_EVEN_BEFORE,		"Non-image data before even frame" },
+	{ false, MIPI_CSIS_INT_SRC_EVEN_AFTER,		"Non-image data after even frame" },
+	{ false, MIPI_CSIS_INT_SRC_ODD_BEFORE,		"Non-image data before odd frame" },
+	{ false, MIPI_CSIS_INT_SRC_ODD_AFTER,		"Non-image data after odd frame" },
 	/* Frame start/end */
-	{ MIPI_CSIS_INT_SRC_FRAME_START,	"Frame Start" },
-	{ MIPI_CSIS_INT_SRC_FRAME_END,		"Frame End" },
+	{ false, MIPI_CSIS_INT_SRC_FRAME_START,		"Frame Start" },
+	{ false, MIPI_CSIS_INT_SRC_FRAME_END,		"Frame End" },
+	{ true, MIPI_CSIS_DBG_INTR_SRC_CAM_VSYNC_FALL,	"VSYNC Falling Edge" },
+	{ true, MIPI_CSIS_DBG_INTR_SRC_CAM_VSYNC_RISE,	"VSYNC Rising Edge" },
 };
 
 #define MIPI_CSIS_NUM_EVENTS ARRAY_SIZE(mipi_csis_events)
@@ -455,6 +482,7 @@ static const struct csis_pix_format *find_csis_format(u32 code)
 static void mipi_csis_enable_interrupts(struct csi_state *state, bool on)
 {
 	mipi_csis_write(state, MIPI_CSIS_INT_MSK, on ? 0xffffffff : 0);
+	mipi_csis_write(state, MIPI_CSIS_DBG_INTR_MSK, on ? 0xffffffff : 0);
 }
 
 static void mipi_csis_sw_reset(struct csi_state *state)
@@ -667,7 +695,7 @@ static void mipi_csis_clear_counters(struct csi_state *state)
 static void mipi_csis_log_counters(struct csi_state *state, bool non_errors)
 {
 	unsigned int num_events = non_errors ? MIPI_CSIS_NUM_EVENTS
-				: MIPI_CSIS_NUM_EVENTS - 6;
+				: MIPI_CSIS_NUM_EVENTS - 8;
 	struct device *dev = &state->pdev->dev;
 	unsigned long flags;
 	unsigned int i;
@@ -962,22 +990,27 @@ static irqreturn_t mipi_csis_irq_handler(int irq, void *dev_id)
 	unsigned long flags;
 	unsigned int i;
 	u32 status;
+	u32 dbg_status;
 
 	status = mipi_csis_read(state, MIPI_CSIS_INT_SRC);
+	dbg_status = mipi_csis_read(state, MIPI_CSIS_DBG_INTR_SRC);
 
 	spin_lock_irqsave(&state->slock, flags);
 
 	/* Update the event/error counters */
 	if ((status & MIPI_CSIS_INT_SRC_ERRORS) || state->debug) {
 		for (i = 0; i < MIPI_CSIS_NUM_EVENTS; i++) {
-			if (!(status & state->events[i].mask))
-				continue;
-			state->events[i].counter++;
+			struct mipi_csis_event *event = &state->events[i];
+
+			if ((!event->debug && (status & event->mask)) ||
+			    (event->debug && (dbg_status & event->mask)))
+				event->counter++;
 		}
 	}
 	spin_unlock_irqrestore(&state->slock, flags);
 
 	mipi_csis_write(state, MIPI_CSIS_INT_SRC, status);
+	mipi_csis_write(state, MIPI_CSIS_DBG_INTR_SRC, dbg_status);
 
 	return IRQ_HANDLED;
 }
-- 
GitLab


From ca403b37cd9548115e2aeed61f6d147cc6f0d507 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:29:54 +0200
Subject: [PATCH 1406/3804] media: imx: imx7_mipi_csis: Update ISP_CONFIG
 macros for quad pixel mode

The i.MX8MM expands the DOUBLE_CMPNT bit in the ISP_CONFIG register into
a two bits field that support quad pixel mode in addition to the single
and double modes. Update the ISP_CONFIG register macros to support this.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Reviewed-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 4d1ac228eb598..fe6aa1d0afa1c 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -166,7 +166,9 @@
 #define MIPI_CSIS_ISP_CONFIG_CH(n)		(0x40 + (n) * 0x10)
 #define MIPI_CSIS_ISPCFG_MEM_FULL_GAP_MSK	(0xff << 24)
 #define MIPI_CSIS_ISPCFG_MEM_FULL_GAP(x)	((x) << 24)
-#define MIPI_CSIS_ISPCFG_DOUBLE_CMPNT		BIT(12)
+#define MIPI_CSIS_ISPCFG_PIXEL_MODE_SINGLE	(0 << 12)
+#define MIPI_CSIS_ISPCFG_PIXEL_MODE_DUAL	(1 << 12)
+#define MIPI_CSIS_ISPCFG_PIXEL_MODE_QUAD	(2 << 12)	/* i.MX8M[MNP] only */
 #define MIPI_CSIS_ISPCFG_ALIGN_32BIT		BIT(11)
 #define MIPI_CSIS_ISPCFG_FMT_YCBCR422_8BIT	(0x1e << 2)
 #define MIPI_CSIS_ISPCFG_FMT_RAW8		(0x2a << 2)
-- 
GitLab


From eed6a93044e38a63e397f7aef8dbc7ee667459fb Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:29:55 +0200
Subject: [PATCH 1407/3804] media: imx: imx7_mipi_csis: Move static data to top
 of mipi_csis_dump_regs()

It's customary to declare static variables at the top of the function,
with a blank line separating them from the non-static variables.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Reviewed-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index fe6aa1d0afa1c..1697d87402419 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -429,9 +429,6 @@ static inline u32 mipi_csis_read(struct csi_state *state, u32 reg)
 
 static int mipi_csis_dump_regs(struct csi_state *state)
 {
-	struct device *dev = &state->pdev->dev;
-	unsigned int i;
-	u32 cfg;
 	static const struct {
 		u32 offset;
 		const char * const name;
@@ -450,6 +447,10 @@ static int mipi_csis_dump_regs(struct csi_state *state)
 		{ MIPI_CSIS_DBG_CTRL, "DBG_CTRL" },
 	};
 
+	struct device *dev = &state->pdev->dev;
+	unsigned int i;
+	u32 cfg;
+
 	dev_info(dev, "--- REGISTERS ---\n");
 
 	for (i = 0; i < ARRAY_SIZE(registers); i++) {
-- 
GitLab


From 2cb7c5c08cf25b0d2d375732a76b1185c92a1853 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:29:56 +0200
Subject: [PATCH 1408/3804] media: imx: imx7_mipi_csis: Minimize locking in
 get/set format

Reduce the code sections that are run with the lock held in the get/set
format handlers:

- mipi_csis_get_format() retrieves a pointer to the format, and thus
  doesn't need locking as long as the arguments passed to the function
  don't require locking either.

- sdformat is a structure passed by the caller, not an internal state,
  and thus doesn't require locking.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 36 ++++++++++++----------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 1697d87402419..f195c65563e72 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -859,8 +859,9 @@ static int mipi_csis_get_fmt(struct v4l2_subdev *mipi_sd,
 	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
 	struct v4l2_mbus_framefmt *fmt;
 
-	mutex_lock(&state->lock);
 	fmt = mipi_csis_get_format(state, cfg, sdformat->which, sdformat->pad);
+
+	mutex_lock(&state->lock);
 	sdformat->format = *fmt;
 	mutex_unlock(&state->lock);
 
@@ -918,24 +919,17 @@ static int mipi_csis_set_fmt(struct v4l2_subdev *mipi_sd,
 	if (sdformat->pad != CSIS_PAD_SINK)
 		return -EINVAL;
 
-	fmt = mipi_csis_get_format(state, cfg, sdformat->which, sdformat->pad);
-
-	mutex_lock(&state->lock);
-
-	/* Validate the media bus code and clamp the size. */
-	csis_fmt = find_csis_format(sdformat->format.code);
-	if (!csis_fmt)
-		csis_fmt = &mipi_csis_formats[0];
-
-	fmt->code = csis_fmt->code;
-	fmt->width = sdformat->format.width;
-	fmt->height = sdformat->format.height;
-
 	/*
+	 * Validate the media bus code and clamp and align the size.
+	 *
 	 * The total number of bits per line must be a multiple of 8. We thus
 	 * need to align the width for formats that are not multiples of 8
 	 * bits.
 	 */
+	csis_fmt = find_csis_format(sdformat->format.code);
+	if (!csis_fmt)
+		csis_fmt = &mipi_csis_formats[0];
+
 	switch (csis_fmt->width % 8) {
 	case 0:
 		align = 0;
@@ -955,8 +949,18 @@ static int mipi_csis_set_fmt(struct v4l2_subdev *mipi_sd,
 		break;
 	}
 
-	v4l_bound_align_image(&fmt->width, 1, CSIS_MAX_PIX_WIDTH, align,
-			      &fmt->height, 1, CSIS_MAX_PIX_HEIGHT, 0, 0);
+	v4l_bound_align_image(&sdformat->format.width, 1,
+			      CSIS_MAX_PIX_WIDTH, align,
+			      &sdformat->format.height, 1,
+			      CSIS_MAX_PIX_HEIGHT, 0, 0);
+
+	fmt = mipi_csis_get_format(state, cfg, sdformat->which, sdformat->pad);
+
+	mutex_lock(&state->lock);
+
+	fmt->code = csis_fmt->code;
+	fmt->width = sdformat->format.width;
+	fmt->height = sdformat->format.height;
 
 	sdformat->format = *fmt;
 
-- 
GitLab


From 90ce0472182f5dffc67155601ea0a927dfb3e9d6 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:29:57 +0200
Subject: [PATCH 1409/3804] media: imx: imx7_mipi_csis: Don't set subdev data

The driver doesn't need to store subdev data, as the subdev is embedded
in csi_state and is thus accessed using container_of.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index f195c65563e72..08ec87950699a 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -1104,8 +1104,6 @@ static int mipi_csis_subdev_init(struct v4l2_subdev *mipi_sd,
 	state->csis_fmt = &mipi_csis_formats[0];
 	mipi_csis_init_cfg(mipi_sd, NULL);
 
-	v4l2_set_subdevdata(mipi_sd, &pdev->dev);
-
 	state->pads[CSIS_PAD_SINK].flags = MEDIA_PAD_FL_SINK
 					 | MEDIA_PAD_FL_MUST_CONNECT;
 	state->pads[CSIS_PAD_SOURCE].flags = MEDIA_PAD_FL_SOURCE
-- 
GitLab


From b329650e3f2d4369ec66e2fae2227beb7de37bca Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:29:58 +0200
Subject: [PATCH 1410/3804] media: imx: imx7_mipi_csis: Reorganize code in
 sections

Improve readability by reorganizing the code in sections. No functional
change intended.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 539 +++++++++++----------
 1 file changed, 283 insertions(+), 256 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 08ec87950699a..1831ff4cc6295 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -324,6 +324,10 @@ struct csi_state {
 	struct regulator *mipi_phy_regulator;
 };
 
+/* -----------------------------------------------------------------------------
+ * Format helpers
+ */
+
 struct csis_pix_format {
 	u32 code;
 	u32 fmt_reg;
@@ -417,69 +421,28 @@ static const struct csis_pix_format mipi_csis_formats[] = {
 	}
 };
 
-static inline void mipi_csis_write(struct csi_state *state, u32 reg, u32 val)
-{
-	writel(val, state->regs + reg);
-}
-
-static inline u32 mipi_csis_read(struct csi_state *state, u32 reg)
-{
-	return readl(state->regs + reg);
-}
-
-static int mipi_csis_dump_regs(struct csi_state *state)
+static const struct csis_pix_format *find_csis_format(u32 code)
 {
-	static const struct {
-		u32 offset;
-		const char * const name;
-	} registers[] = {
-		{ MIPI_CSIS_CMN_CTRL, "CMN_CTRL" },
-		{ MIPI_CSIS_CLK_CTRL, "CLK_CTRL" },
-		{ MIPI_CSIS_INT_MSK, "INT_MSK" },
-		{ MIPI_CSIS_DPHY_STATUS, "DPHY_STATUS" },
-		{ MIPI_CSIS_DPHY_CMN_CTRL, "DPHY_CMN_CTRL" },
-		{ MIPI_CSIS_DPHY_SCTRL_L, "DPHY_SCTRL_L" },
-		{ MIPI_CSIS_DPHY_SCTRL_H, "DPHY_SCTRL_H" },
-		{ MIPI_CSIS_ISP_CONFIG_CH(0), "ISP_CONFIG_CH0" },
-		{ MIPI_CSIS_ISP_RESOL_CH(0), "ISP_RESOL_CH0" },
-		{ MIPI_CSIS_SDW_CONFIG_CH(0), "SDW_CONFIG_CH0" },
-		{ MIPI_CSIS_SDW_RESOL_CH(0), "SDW_RESOL_CH0" },
-		{ MIPI_CSIS_DBG_CTRL, "DBG_CTRL" },
-	};
-
-	struct device *dev = &state->pdev->dev;
 	unsigned int i;
-	u32 cfg;
-
-	dev_info(dev, "--- REGISTERS ---\n");
-
-	for (i = 0; i < ARRAY_SIZE(registers); i++) {
-		cfg = mipi_csis_read(state, registers[i].offset);
-		dev_info(dev, "%14s: 0x%08x\n", registers[i].name, cfg);
-	}
 
-	return 0;
+	for (i = 0; i < ARRAY_SIZE(mipi_csis_formats); i++)
+		if (code == mipi_csis_formats[i].code)
+			return &mipi_csis_formats[i];
+	return NULL;
 }
 
-static struct csi_state *
-mipi_notifier_to_csis_state(struct v4l2_async_notifier *n)
-{
-	return container_of(n, struct csi_state, notifier);
-}
+/* -----------------------------------------------------------------------------
+ * Hardware configuration
+ */
 
-static struct csi_state *mipi_sd_to_csis_state(struct v4l2_subdev *sdev)
+static inline u32 mipi_csis_read(struct csi_state *state, u32 reg)
 {
-	return container_of(sdev, struct csi_state, mipi_sd);
+	return readl(state->regs + reg);
 }
 
-static const struct csis_pix_format *find_csis_format(u32 code)
+static inline void mipi_csis_write(struct csi_state *state, u32 reg, u32 val)
 {
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(mipi_csis_formats); i++)
-		if (code == mipi_csis_formats[i].code)
-			return &mipi_csis_formats[i];
-	return NULL;
+	writel(val, state->regs + reg);
 }
 
 static void mipi_csis_enable_interrupts(struct csi_state *state, bool on)
@@ -684,6 +647,41 @@ static void mipi_csis_stop_stream(struct csi_state *state)
 	mipi_csis_system_enable(state, false);
 }
 
+static irqreturn_t mipi_csis_irq_handler(int irq, void *dev_id)
+{
+	struct csi_state *state = dev_id;
+	unsigned long flags;
+	unsigned int i;
+	u32 status;
+	u32 dbg_status;
+
+	status = mipi_csis_read(state, MIPI_CSIS_INT_SRC);
+	dbg_status = mipi_csis_read(state, MIPI_CSIS_DBG_INTR_SRC);
+
+	spin_lock_irqsave(&state->slock, flags);
+
+	/* Update the event/error counters */
+	if ((status & MIPI_CSIS_INT_SRC_ERRORS) || state->debug) {
+		for (i = 0; i < MIPI_CSIS_NUM_EVENTS; i++) {
+			struct mipi_csis_event *event = &state->events[i];
+
+			if ((!event->debug && (status & event->mask)) ||
+			    (event->debug && (dbg_status & event->mask)))
+				event->counter++;
+		}
+	}
+	spin_unlock_irqrestore(&state->slock, flags);
+
+	mipi_csis_write(state, MIPI_CSIS_INT_SRC, status);
+	mipi_csis_write(state, MIPI_CSIS_DBG_INTR_SRC, dbg_status);
+
+	return IRQ_HANDLED;
+}
+
+/* -----------------------------------------------------------------------------
+ * Debug
+ */
+
 static void mipi_csis_clear_counters(struct csi_state *state)
 {
 	unsigned long flags;
@@ -713,9 +711,72 @@ static void mipi_csis_log_counters(struct csi_state *state, bool non_errors)
 	spin_unlock_irqrestore(&state->slock, flags);
 }
 
-/*
+static int mipi_csis_dump_regs(struct csi_state *state)
+{
+	static const struct {
+		u32 offset;
+		const char * const name;
+	} registers[] = {
+		{ MIPI_CSIS_CMN_CTRL, "CMN_CTRL" },
+		{ MIPI_CSIS_CLK_CTRL, "CLK_CTRL" },
+		{ MIPI_CSIS_INT_MSK, "INT_MSK" },
+		{ MIPI_CSIS_DPHY_STATUS, "DPHY_STATUS" },
+		{ MIPI_CSIS_DPHY_CMN_CTRL, "DPHY_CMN_CTRL" },
+		{ MIPI_CSIS_DPHY_SCTRL_L, "DPHY_SCTRL_L" },
+		{ MIPI_CSIS_DPHY_SCTRL_H, "DPHY_SCTRL_H" },
+		{ MIPI_CSIS_ISP_CONFIG_CH(0), "ISP_CONFIG_CH0" },
+		{ MIPI_CSIS_ISP_RESOL_CH(0), "ISP_RESOL_CH0" },
+		{ MIPI_CSIS_SDW_CONFIG_CH(0), "SDW_CONFIG_CH0" },
+		{ MIPI_CSIS_SDW_RESOL_CH(0), "SDW_RESOL_CH0" },
+		{ MIPI_CSIS_DBG_CTRL, "DBG_CTRL" },
+	};
+
+	struct device *dev = &state->pdev->dev;
+	unsigned int i;
+	u32 cfg;
+
+	dev_info(dev, "--- REGISTERS ---\n");
+
+	for (i = 0; i < ARRAY_SIZE(registers); i++) {
+		cfg = mipi_csis_read(state, registers[i].offset);
+		dev_info(dev, "%14s: 0x%08x\n", registers[i].name, cfg);
+	}
+
+	return 0;
+}
+
+static int mipi_csis_dump_regs_show(struct seq_file *m, void *private)
+{
+	struct csi_state *state = m->private;
+
+	return mipi_csis_dump_regs(state);
+}
+DEFINE_SHOW_ATTRIBUTE(mipi_csis_dump_regs);
+
+static void mipi_csis_debugfs_init(struct csi_state *state)
+{
+	state->debugfs_root = debugfs_create_dir(dev_name(state->dev), NULL);
+
+	debugfs_create_bool("debug_enable", 0600, state->debugfs_root,
+			    &state->debug);
+	debugfs_create_file("dump_regs", 0600, state->debugfs_root, state,
+			    &mipi_csis_dump_regs_fops);
+}
+
+static void mipi_csis_debugfs_exit(struct csi_state *state)
+{
+	debugfs_remove_recursive(state->debugfs_root);
+}
+
+/* -----------------------------------------------------------------------------
  * V4L2 subdev operations
  */
+
+static struct csi_state *mipi_sd_to_csis_state(struct v4l2_subdev *sdev)
+{
+	return container_of(sdev, struct csi_state, mipi_sd);
+}
+
 static int mipi_csis_s_stream(struct v4l2_subdev *mipi_sd, int enable)
 {
 	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
@@ -774,35 +835,6 @@ done:
 	return ret;
 }
 
-static int mipi_csis_link_setup(struct media_entity *entity,
-				const struct media_pad *local_pad,
-				const struct media_pad *remote_pad, u32 flags)
-{
-	struct v4l2_subdev *mipi_sd = media_entity_to_v4l2_subdev(entity);
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
-	struct v4l2_subdev *remote_sd;
-
-	dev_dbg(state->dev, "link setup %s -> %s", remote_pad->entity->name,
-		local_pad->entity->name);
-
-	/* We only care about the link to the source. */
-	if (!(local_pad->flags & MEDIA_PAD_FL_SINK))
-		return 0;
-
-	remote_sd = media_entity_to_v4l2_subdev(remote_pad->entity);
-
-	if (flags & MEDIA_LNK_FL_ENABLED) {
-		if (state->src_sd)
-			return -EBUSY;
-
-		state->src_sd = remote_sd;
-	} else {
-		state->src_sd = NULL;
-	}
-
-	return 0;
-}
-
 static struct v4l2_mbus_framefmt *
 mipi_csis_get_format(struct csi_state *state,
 		     struct v4l2_subdev_pad_config *cfg,
@@ -991,47 +1023,10 @@ static int mipi_csis_log_status(struct v4l2_subdev *mipi_sd)
 	return 0;
 }
 
-static irqreturn_t mipi_csis_irq_handler(int irq, void *dev_id)
-{
-	struct csi_state *state = dev_id;
-	unsigned long flags;
-	unsigned int i;
-	u32 status;
-	u32 dbg_status;
-
-	status = mipi_csis_read(state, MIPI_CSIS_INT_SRC);
-	dbg_status = mipi_csis_read(state, MIPI_CSIS_DBG_INTR_SRC);
-
-	spin_lock_irqsave(&state->slock, flags);
-
-	/* Update the event/error counters */
-	if ((status & MIPI_CSIS_INT_SRC_ERRORS) || state->debug) {
-		for (i = 0; i < MIPI_CSIS_NUM_EVENTS; i++) {
-			struct mipi_csis_event *event = &state->events[i];
-
-			if ((!event->debug && (status & event->mask)) ||
-			    (event->debug && (dbg_status & event->mask)))
-				event->counter++;
-		}
-	}
-	spin_unlock_irqrestore(&state->slock, flags);
-
-	mipi_csis_write(state, MIPI_CSIS_INT_SRC, status);
-	mipi_csis_write(state, MIPI_CSIS_DBG_INTR_SRC, dbg_status);
-
-	return IRQ_HANDLED;
-}
-
 static const struct v4l2_subdev_core_ops mipi_csis_core_ops = {
 	.log_status	= mipi_csis_log_status,
 };
 
-static const struct media_entity_operations mipi_csis_entity_ops = {
-	.link_setup	= mipi_csis_link_setup,
-	.link_validate	= v4l2_subdev_link_validate,
-	.get_fwnode_pad = v4l2_subdev_get_fwnode_pad_1_to_1,
-};
-
 static const struct v4l2_subdev_video_ops mipi_csis_video_ops = {
 	.s_stream	= mipi_csis_s_stream,
 };
@@ -1049,26 +1044,56 @@ static const struct v4l2_subdev_ops mipi_csis_subdev_ops = {
 	.pad	= &mipi_csis_pad_ops,
 };
 
-static int mipi_csis_parse_dt(struct platform_device *pdev,
-			      struct csi_state *state)
-{
-	struct device_node *node = pdev->dev.of_node;
+/* -----------------------------------------------------------------------------
+ * Media entity operations
+ */
 
-	if (of_property_read_u32(node, "clock-frequency",
-				 &state->clk_frequency))
-		state->clk_frequency = DEFAULT_SCLK_CSIS_FREQ;
+static int mipi_csis_link_setup(struct media_entity *entity,
+				const struct media_pad *local_pad,
+				const struct media_pad *remote_pad, u32 flags)
+{
+	struct v4l2_subdev *mipi_sd = media_entity_to_v4l2_subdev(entity);
+	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct v4l2_subdev *remote_sd;
 
-	/* Get MIPI PHY resets */
-	state->mrst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
-	if (IS_ERR(state->mrst))
-		return PTR_ERR(state->mrst);
+	dev_dbg(state->dev, "link setup %s -> %s", remote_pad->entity->name,
+		local_pad->entity->name);
 
-	return 0;
-}
+	/* We only care about the link to the source. */
+	if (!(local_pad->flags & MEDIA_PAD_FL_SINK))
+		return 0;
 
-static int mipi_csis_pm_resume(struct device *dev, bool runtime);
+	remote_sd = media_entity_to_v4l2_subdev(remote_pad->entity);
 
-static int mipi_csis_notify_bound(struct v4l2_async_notifier *notifier,
+	if (flags & MEDIA_LNK_FL_ENABLED) {
+		if (state->src_sd)
+			return -EBUSY;
+
+		state->src_sd = remote_sd;
+	} else {
+		state->src_sd = NULL;
+	}
+
+	return 0;
+}
+
+static const struct media_entity_operations mipi_csis_entity_ops = {
+	.link_setup	= mipi_csis_link_setup,
+	.link_validate	= v4l2_subdev_link_validate,
+	.get_fwnode_pad = v4l2_subdev_get_fwnode_pad_1_to_1,
+};
+
+/* -----------------------------------------------------------------------------
+ * Async subdev notifier
+ */
+
+static struct csi_state *
+mipi_notifier_to_csis_state(struct v4l2_async_notifier *n)
+{
+	return container_of(n, struct csi_state, notifier);
+}
+
+static int mipi_csis_notify_bound(struct v4l2_async_notifier *notifier,
 				  struct v4l2_subdev *sd,
 				  struct v4l2_async_subdev *asd)
 {
@@ -1082,36 +1107,6 @@ static const struct v4l2_async_notifier_operations mipi_csis_notify_ops = {
 	.bound = mipi_csis_notify_bound,
 };
 
-static int mipi_csis_subdev_init(struct v4l2_subdev *mipi_sd,
-				 struct platform_device *pdev,
-				 const struct v4l2_subdev_ops *ops)
-{
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
-
-	v4l2_subdev_init(mipi_sd, ops);
-	mipi_sd->owner = THIS_MODULE;
-	snprintf(mipi_sd->name, sizeof(mipi_sd->name), "%s.%d",
-		 CSIS_SUBDEV_NAME, state->index);
-
-	mipi_sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
-	mipi_sd->ctrl_handler = NULL;
-
-	mipi_sd->entity.function = MEDIA_ENT_F_VID_IF_BRIDGE;
-	mipi_sd->entity.ops = &mipi_csis_entity_ops;
-
-	mipi_sd->dev = &pdev->dev;
-
-	state->csis_fmt = &mipi_csis_formats[0];
-	mipi_csis_init_cfg(mipi_sd, NULL);
-
-	state->pads[CSIS_PAD_SINK].flags = MEDIA_PAD_FL_SINK
-					 | MEDIA_PAD_FL_MUST_CONNECT;
-	state->pads[CSIS_PAD_SOURCE].flags = MEDIA_PAD_FL_SOURCE
-					   | MEDIA_PAD_FL_MUST_CONNECT;
-	return media_entity_pads_init(&mipi_sd->entity, CSIS_PADS_NUM,
-				      state->pads);
-}
-
 static int mipi_csis_async_register(struct csi_state *state)
 {
 	struct v4l2_fwnode_endpoint vep = {
@@ -1161,27 +1156,138 @@ err_parse:
 	return ret;
 }
 
-static int mipi_csis_dump_regs_show(struct seq_file *m, void *private)
+/* -----------------------------------------------------------------------------
+ * Suspend/resume
+ */
+
+static int mipi_csis_pm_suspend(struct device *dev, bool runtime)
 {
-	struct csi_state *state = m->private;
+	struct v4l2_subdev *mipi_sd = dev_get_drvdata(dev);
+	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	int ret = 0;
 
-	return mipi_csis_dump_regs(state);
+	mutex_lock(&state->lock);
+	if (state->flags & ST_POWERED) {
+		mipi_csis_stop_stream(state);
+		ret = regulator_disable(state->mipi_phy_regulator);
+		if (ret)
+			goto unlock;
+		mipi_csis_clk_disable(state);
+		state->flags &= ~ST_POWERED;
+		if (!runtime)
+			state->flags |= ST_SUSPENDED;
+	}
+
+unlock:
+	mutex_unlock(&state->lock);
+
+	return ret ? -EAGAIN : 0;
 }
-DEFINE_SHOW_ATTRIBUTE(mipi_csis_dump_regs);
 
-static void mipi_csis_debugfs_init(struct csi_state *state)
+static int mipi_csis_pm_resume(struct device *dev, bool runtime)
 {
-	state->debugfs_root = debugfs_create_dir(dev_name(state->dev), NULL);
+	struct v4l2_subdev *mipi_sd = dev_get_drvdata(dev);
+	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	int ret = 0;
 
-	debugfs_create_bool("debug_enable", 0600, state->debugfs_root,
-			    &state->debug);
-	debugfs_create_file("dump_regs", 0600, state->debugfs_root, state,
-			    &mipi_csis_dump_regs_fops);
+	mutex_lock(&state->lock);
+	if (!runtime && !(state->flags & ST_SUSPENDED))
+		goto unlock;
+
+	if (!(state->flags & ST_POWERED)) {
+		ret = regulator_enable(state->mipi_phy_regulator);
+		if (ret)
+			goto unlock;
+
+		state->flags |= ST_POWERED;
+		mipi_csis_clk_enable(state);
+	}
+	if (state->flags & ST_STREAMING)
+		mipi_csis_start_stream(state);
+
+	state->flags &= ~ST_SUSPENDED;
+
+unlock:
+	mutex_unlock(&state->lock);
+
+	return ret ? -EAGAIN : 0;
 }
 
-static void mipi_csis_debugfs_exit(struct csi_state *state)
+static int __maybe_unused mipi_csis_suspend(struct device *dev)
 {
-	debugfs_remove_recursive(state->debugfs_root);
+	return mipi_csis_pm_suspend(dev, false);
+}
+
+static int __maybe_unused mipi_csis_resume(struct device *dev)
+{
+	return mipi_csis_pm_resume(dev, false);
+}
+
+static int __maybe_unused mipi_csis_runtime_suspend(struct device *dev)
+{
+	return mipi_csis_pm_suspend(dev, true);
+}
+
+static int __maybe_unused mipi_csis_runtime_resume(struct device *dev)
+{
+	return mipi_csis_pm_resume(dev, true);
+}
+
+static const struct dev_pm_ops mipi_csis_pm_ops = {
+	SET_RUNTIME_PM_OPS(mipi_csis_runtime_suspend, mipi_csis_runtime_resume,
+			   NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(mipi_csis_suspend, mipi_csis_resume)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe/remove & platform driver
+ */
+
+static int mipi_csis_subdev_init(struct v4l2_subdev *mipi_sd,
+				 struct platform_device *pdev,
+				 const struct v4l2_subdev_ops *ops)
+{
+	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+
+	v4l2_subdev_init(mipi_sd, ops);
+	mipi_sd->owner = THIS_MODULE;
+	snprintf(mipi_sd->name, sizeof(mipi_sd->name), "%s.%d",
+		 CSIS_SUBDEV_NAME, state->index);
+
+	mipi_sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
+	mipi_sd->ctrl_handler = NULL;
+
+	mipi_sd->entity.function = MEDIA_ENT_F_VID_IF_BRIDGE;
+	mipi_sd->entity.ops = &mipi_csis_entity_ops;
+
+	mipi_sd->dev = &pdev->dev;
+
+	state->csis_fmt = &mipi_csis_formats[0];
+	mipi_csis_init_cfg(mipi_sd, NULL);
+
+	state->pads[CSIS_PAD_SINK].flags = MEDIA_PAD_FL_SINK
+					 | MEDIA_PAD_FL_MUST_CONNECT;
+	state->pads[CSIS_PAD_SOURCE].flags = MEDIA_PAD_FL_SOURCE
+					   | MEDIA_PAD_FL_MUST_CONNECT;
+	return media_entity_pads_init(&mipi_sd->entity, CSIS_PADS_NUM,
+				      state->pads);
+}
+
+static int mipi_csis_parse_dt(struct platform_device *pdev,
+			      struct csi_state *state)
+{
+	struct device_node *node = pdev->dev.of_node;
+
+	if (of_property_read_u32(node, "clock-frequency",
+				 &state->clk_frequency))
+		state->clk_frequency = DEFAULT_SCLK_CSIS_FREQ;
+
+	/* Get MIPI PHY resets */
+	state->mrst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+	if (IS_ERR(state->mrst))
+		return PTR_ERR(state->mrst);
+
+	return 0;
 }
 
 static int mipi_csis_probe(struct platform_device *pdev)
@@ -1279,79 +1385,6 @@ disable_clock:
 	return ret;
 }
 
-static int mipi_csis_pm_suspend(struct device *dev, bool runtime)
-{
-	struct v4l2_subdev *mipi_sd = dev_get_drvdata(dev);
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
-	int ret = 0;
-
-	mutex_lock(&state->lock);
-	if (state->flags & ST_POWERED) {
-		mipi_csis_stop_stream(state);
-		ret = regulator_disable(state->mipi_phy_regulator);
-		if (ret)
-			goto unlock;
-		mipi_csis_clk_disable(state);
-		state->flags &= ~ST_POWERED;
-		if (!runtime)
-			state->flags |= ST_SUSPENDED;
-	}
-
-unlock:
-	mutex_unlock(&state->lock);
-
-	return ret ? -EAGAIN : 0;
-}
-
-static int mipi_csis_pm_resume(struct device *dev, bool runtime)
-{
-	struct v4l2_subdev *mipi_sd = dev_get_drvdata(dev);
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
-	int ret = 0;
-
-	mutex_lock(&state->lock);
-	if (!runtime && !(state->flags & ST_SUSPENDED))
-		goto unlock;
-
-	if (!(state->flags & ST_POWERED)) {
-		ret = regulator_enable(state->mipi_phy_regulator);
-		if (ret)
-			goto unlock;
-
-		state->flags |= ST_POWERED;
-		mipi_csis_clk_enable(state);
-	}
-	if (state->flags & ST_STREAMING)
-		mipi_csis_start_stream(state);
-
-	state->flags &= ~ST_SUSPENDED;
-
-unlock:
-	mutex_unlock(&state->lock);
-
-	return ret ? -EAGAIN : 0;
-}
-
-static int __maybe_unused mipi_csis_suspend(struct device *dev)
-{
-	return mipi_csis_pm_suspend(dev, false);
-}
-
-static int __maybe_unused mipi_csis_resume(struct device *dev)
-{
-	return mipi_csis_pm_resume(dev, false);
-}
-
-static int __maybe_unused mipi_csis_runtime_suspend(struct device *dev)
-{
-	return mipi_csis_pm_suspend(dev, true);
-}
-
-static int __maybe_unused mipi_csis_runtime_resume(struct device *dev)
-{
-	return mipi_csis_pm_resume(dev, true);
-}
-
 static int mipi_csis_remove(struct platform_device *pdev)
 {
 	struct v4l2_subdev *mipi_sd = platform_get_drvdata(pdev);
@@ -1372,12 +1405,6 @@ static int mipi_csis_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct dev_pm_ops mipi_csis_pm_ops = {
-	SET_RUNTIME_PM_OPS(mipi_csis_runtime_suspend, mipi_csis_runtime_resume,
-			   NULL)
-	SET_SYSTEM_SLEEP_PM_OPS(mipi_csis_suspend, mipi_csis_resume)
-};
-
 static const struct of_device_id mipi_csis_of_match[] = {
 	{ .compatible = "fsl,imx7-mipi-csi2", },
 	{ /* sentinel */ },
-- 
GitLab


From 996f6f517dc79f337faf29d9db54ceb9a3169787 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:29:59 +0200
Subject: [PATCH 1411/3804] media: imx: imx7_mipi_csis: Set the CLKSETTLE
 register field

Set the CLKSETTLE field explicitly, with a value hardcoded to 0. This
brings no functional change, but prepares for calculation of the
CLKSETTLE value.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 1831ff4cc6295..5dc001a415d4a 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -310,6 +310,7 @@ struct csi_state {
 
 	u32 clk_frequency;
 	u32 hs_settle;
+	u32 clk_settle;
 
 	struct reset_control *mrst;
 
@@ -540,11 +541,15 @@ static int mipi_csis_calculate_params(struct csi_state *state)
 
 	/*
 	 * The HSSETTLE counter value is document in a table, but can also
-	 * easily be calculated.
+	 * easily be calculated. Hardcode the CLKSETTLE value to 0 for now
+	 * (which is documented as corresponding to CSI-2 v0.87 to v1.00) until
+	 * we figure out how to compute it correctly.
 	 */
 	state->hs_settle = (lane_rate - 5000000) / 45000000;
-	dev_dbg(state->dev, "lane rate %u, Ths_settle %u\n",
-		lane_rate, state->hs_settle);
+	state->clk_settle = 0;
+
+	dev_dbg(state->dev, "lane rate %u, Tclk_settle %u, Ths_settle %u\n",
+		lane_rate, state->clk_settle, state->hs_settle);
 
 	return 0;
 }
@@ -563,7 +568,8 @@ static void mipi_csis_set_params(struct csi_state *state)
 	__mipi_csis_set_format(state);
 
 	mipi_csis_write(state, MIPI_CSIS_DPHY_CMN_CTRL,
-			MIPI_CSIS_DPHY_CMN_CTRL_HSSETTLE(state->hs_settle));
+			MIPI_CSIS_DPHY_CMN_CTRL_HSSETTLE(state->hs_settle) |
+			MIPI_CSIS_DPHY_CMN_CTRL_CLKSETTLE(state->clk_settle));
 
 	val = (0 << MIPI_CSIS_ISP_SYNC_HSYNC_LINTV_OFFSET)
 	    | (0 << MIPI_CSIS_ISP_SYNC_VSYNC_SINTV_OFFSET)
-- 
GitLab


From 492d418c082c10c97ad1ca3d35ac8c2438b4fc17 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:00 +0200
Subject: [PATCH 1412/3804] media: imx: imx7_mipi_csis: Drop unused
 csis_hw_reset structure

The csis_hw_reset structure is instantiated as a member of csi_state,
but that member is never used. Drop it.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 5dc001a415d4a..e01838931e6fa 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -278,12 +278,6 @@ static const char * const mipi_csis_clk_id[] = {
 	"phy",
 };
 
-struct csis_hw_reset {
-	struct regmap *src;
-	u8 req_src;
-	u8 rst_bit;
-};
-
 struct csi_state {
 	/* lock elements below */
 	struct mutex lock;
@@ -321,7 +315,6 @@ struct csi_state {
 
 	struct mipi_csis_event events[MIPI_CSIS_NUM_EVENTS];
 
-	struct csis_hw_reset hw_reset;
 	struct regulator *mipi_phy_regulator;
 };
 
-- 
GitLab


From 3acb88893a799d5bd612213ea3fc67f2f3280f29 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:01 +0200
Subject: [PATCH 1413/3804] media: imx: imx7_mipi_csis: Store CSI-2 data type
 in format structure

Replace the register value stored in the csis_pix_format structure with
the CSI-2 data type. The register value is simply computed from the data
type using a shift. This prepares for i.MX8MP support that needs the
same data type in a different hardware register.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 71 +++++++++++++---------
 1 file changed, 42 insertions(+), 29 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index e01838931e6fa..1036e39ce0aba 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -170,13 +170,7 @@
 #define MIPI_CSIS_ISPCFG_PIXEL_MODE_DUAL	(1 << 12)
 #define MIPI_CSIS_ISPCFG_PIXEL_MODE_QUAD	(2 << 12)	/* i.MX8M[MNP] only */
 #define MIPI_CSIS_ISPCFG_ALIGN_32BIT		BIT(11)
-#define MIPI_CSIS_ISPCFG_FMT_YCBCR422_8BIT	(0x1e << 2)
-#define MIPI_CSIS_ISPCFG_FMT_RAW8		(0x2a << 2)
-#define MIPI_CSIS_ISPCFG_FMT_RAW10		(0x2b << 2)
-#define MIPI_CSIS_ISPCFG_FMT_RAW12		(0x2c << 2)
-#define MIPI_CSIS_ISPCFG_FMT_RAW14		(0x2d << 2)
-/* User defined formats, x = 1...4 */
-#define MIPI_CSIS_ISPCFG_FMT_USER(x)		((0x30 + (x) - 1) << 2)
+#define MIPI_CSIS_ISPCFG_FMT(fmt)		((fmt) << 2)
 #define MIPI_CSIS_ISPCFG_FMT_MASK		(0x3f << 2)
 
 /* ISP Image Resolution register */
@@ -223,6 +217,25 @@
 
 #define DEFAULT_SCLK_CSIS_FREQ			166000000UL
 
+/* MIPI CSI-2 Data Types */
+#define MIPI_CSI2_DATA_TYPE_YUV420_8		0x18
+#define MIPI_CSI2_DATA_TYPE_YUV420_10		0x19
+#define MIPI_CSI2_DATA_TYPE_LE_YUV420_8		0x1a
+#define MIPI_CSI2_DATA_TYPE_CS_YUV420_8		0x1c
+#define MIPI_CSI2_DATA_TYPE_CS_YUV420_10	0x1d
+#define MIPI_CSI2_DATA_TYPE_YUV422_8		0x1e
+#define MIPI_CSI2_DATA_TYPE_YUV422_10		0x1f
+#define MIPI_CSI2_DATA_TYPE_RGB565		0x22
+#define MIPI_CSI2_DATA_TYPE_RGB666		0x23
+#define MIPI_CSI2_DATA_TYPE_RGB888		0x24
+#define MIPI_CSI2_DATA_TYPE_RAW6		0x28
+#define MIPI_CSI2_DATA_TYPE_RAW7		0x29
+#define MIPI_CSI2_DATA_TYPE_RAW8		0x2a
+#define MIPI_CSI2_DATA_TYPE_RAW10		0x2b
+#define MIPI_CSI2_DATA_TYPE_RAW12		0x2c
+#define MIPI_CSI2_DATA_TYPE_RAW14		0x2d
+#define MIPI_CSI2_DATA_TYPE_USER(x)		(0x30 + (x))
+
 enum {
 	ST_POWERED	= 1,
 	ST_STREAMING	= 2,
@@ -324,7 +337,7 @@ struct csi_state {
 
 struct csis_pix_format {
 	u32 code;
-	u32 fmt_reg;
+	u32 data_type;
 	u8 width;
 };
 
@@ -332,85 +345,85 @@ static const struct csis_pix_format mipi_csis_formats[] = {
 	/* YUV formats. */
 	{
 		.code = MEDIA_BUS_FMT_UYVY8_1X16,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_YCBCR422_8BIT,
+		.data_type = MIPI_CSI2_DATA_TYPE_YUV422_8,
 		.width = 16,
 	},
 	/* RAW (Bayer and greyscale) formats. */
 	{
 		.code = MEDIA_BUS_FMT_SBGGR8_1X8,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW8,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW8,
 		.width = 8,
 	}, {
 		.code = MEDIA_BUS_FMT_SGBRG8_1X8,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW8,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW8,
 		.width = 8,
 	}, {
 		.code = MEDIA_BUS_FMT_SGRBG8_1X8,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW8,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW8,
 		.width = 8,
 	}, {
 		.code = MEDIA_BUS_FMT_SRGGB8_1X8,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW8,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW8,
 		.width = 8,
 	}, {
 		.code = MEDIA_BUS_FMT_Y8_1X8,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW8,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW8,
 		.width = 8,
 	}, {
 		.code = MEDIA_BUS_FMT_SBGGR10_1X10,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW10,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW10,
 		.width = 10,
 	}, {
 		.code = MEDIA_BUS_FMT_SGBRG10_1X10,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW10,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW10,
 		.width = 10,
 	}, {
 		.code = MEDIA_BUS_FMT_SGRBG10_1X10,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW10,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW10,
 		.width = 10,
 	}, {
 		.code = MEDIA_BUS_FMT_SRGGB10_1X10,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW10,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW10,
 		.width = 10,
 	}, {
 		.code = MEDIA_BUS_FMT_Y10_1X10,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW10,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW10,
 		.width = 10,
 	}, {
 		.code = MEDIA_BUS_FMT_SBGGR12_1X12,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW12,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW12,
 		.width = 12,
 	}, {
 		.code = MEDIA_BUS_FMT_SGBRG12_1X12,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW12,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW12,
 		.width = 12,
 	}, {
 		.code = MEDIA_BUS_FMT_SGRBG12_1X12,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW12,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW12,
 		.width = 12,
 	}, {
 		.code = MEDIA_BUS_FMT_SRGGB12_1X12,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW12,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW12,
 		.width = 12,
 	}, {
 		.code = MEDIA_BUS_FMT_Y12_1X12,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW12,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW12,
 		.width = 12,
 	}, {
 		.code = MEDIA_BUS_FMT_SBGGR14_1X14,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW14,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW14,
 		.width = 14,
 	}, {
 		.code = MEDIA_BUS_FMT_SGBRG14_1X14,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW14,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW14,
 		.width = 14,
 	}, {
 		.code = MEDIA_BUS_FMT_SGRBG14_1X14,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW14,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW14,
 		.width = 14,
 	}, {
 		.code = MEDIA_BUS_FMT_SRGGB14_1X14,
-		.fmt_reg = MIPI_CSIS_ISPCFG_FMT_RAW14,
+		.data_type = MIPI_CSI2_DATA_TYPE_RAW14,
 		.width = 14,
 	}
 };
@@ -502,7 +515,7 @@ static void __mipi_csis_set_format(struct csi_state *state)
 	/* Color format */
 	val = mipi_csis_read(state, MIPI_CSIS_ISP_CONFIG_CH(0));
 	val &= ~(MIPI_CSIS_ISPCFG_ALIGN_32BIT | MIPI_CSIS_ISPCFG_FMT_MASK);
-	val |= state->csis_fmt->fmt_reg;
+	val |= MIPI_CSIS_ISPCFG_FMT(state->csis_fmt->data_type);
 	mipi_csis_write(state, MIPI_CSIS_ISP_CONFIG_CH(0), val);
 
 	/* Pixel resolution */
-- 
GitLab


From cbf15686dd8b10f1c3c0ce29d09fc88fc3db4b9e Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:02 +0200
Subject: [PATCH 1414/3804] media: imx: imx7_mipi_csis: Drop csi_state phy
 field

The phy field of the csi_state structure is unused. Drop it.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 1036e39ce0aba..d818b066e5116 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -304,7 +304,6 @@ struct csi_state {
 
 	u8 index;
 	struct platform_device *pdev;
-	struct phy *phy;
 	void __iomem *regs;
 	int irq;
 	u32 flags;
-- 
GitLab


From b0db06bb98b55ce5040aab5133f6fcd7c0631e29 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:03 +0200
Subject: [PATCH 1415/3804] media: imx: imx7_mipi_csis: Rename mipi_sd to sd

The CSIS is modelled as a single subdev, there's thus no ambiguity
regarding which subdev the code refers to. Rename mipi_sd to sd.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 91 +++++++++++-----------
 1 file changed, 45 insertions(+), 46 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index d818b066e5116..382d02f54239e 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -298,7 +298,7 @@ struct csi_state {
 	spinlock_t slock;
 	struct device *dev;
 	struct media_pad pads[CSIS_PADS_NUM];
-	struct v4l2_subdev mipi_sd;
+	struct v4l2_subdev sd;
 	struct v4l2_async_notifier notifier;
 	struct v4l2_subdev *src_sd;
 
@@ -785,12 +785,12 @@ static void mipi_csis_debugfs_exit(struct csi_state *state)
 
 static struct csi_state *mipi_sd_to_csis_state(struct v4l2_subdev *sdev)
 {
-	return container_of(sdev, struct csi_state, mipi_sd);
+	return container_of(sdev, struct csi_state, sd);
 }
 
-static int mipi_csis_s_stream(struct v4l2_subdev *mipi_sd, int enable)
+static int mipi_csis_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 	int ret;
 
 	if (enable) {
@@ -853,15 +853,15 @@ mipi_csis_get_format(struct csi_state *state,
 		     unsigned int pad)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&state->mipi_sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&state->sd, cfg, pad);
 
 	return &state->format_mbus;
 }
 
-static int mipi_csis_init_cfg(struct v4l2_subdev *mipi_sd,
+static int mipi_csis_init_cfg(struct v4l2_subdev *sd,
 			      struct v4l2_subdev_pad_config *cfg)
 {
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 	struct v4l2_mbus_framefmt *fmt_sink;
 	struct v4l2_mbus_framefmt *fmt_source;
 	enum v4l2_subdev_format_whence which;
@@ -895,11 +895,11 @@ static int mipi_csis_init_cfg(struct v4l2_subdev *mipi_sd,
 	return 0;
 }
 
-static int mipi_csis_get_fmt(struct v4l2_subdev *mipi_sd,
+static int mipi_csis_get_fmt(struct v4l2_subdev *sd,
 			     struct v4l2_subdev_pad_config *cfg,
 			     struct v4l2_subdev_format *sdformat)
 {
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 	struct v4l2_mbus_framefmt *fmt;
 
 	fmt = mipi_csis_get_format(state, cfg, sdformat->which, sdformat->pad);
@@ -911,11 +911,11 @@ static int mipi_csis_get_fmt(struct v4l2_subdev *mipi_sd,
 	return 0;
 }
 
-static int mipi_csis_enum_mbus_code(struct v4l2_subdev *mipi_sd,
+static int mipi_csis_enum_mbus_code(struct v4l2_subdev *sd,
 				    struct v4l2_subdev_pad_config *cfg,
 				    struct v4l2_subdev_mbus_code_enum *code)
 {
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 
 	/*
 	 * The CSIS can't transcode in any way, the source format is identical
@@ -943,11 +943,11 @@ static int mipi_csis_enum_mbus_code(struct v4l2_subdev *mipi_sd,
 	return 0;
 }
 
-static int mipi_csis_set_fmt(struct v4l2_subdev *mipi_sd,
+static int mipi_csis_set_fmt(struct v4l2_subdev *sd,
 			     struct v4l2_subdev_pad_config *cfg,
 			     struct v4l2_subdev_format *sdformat)
 {
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 	struct csis_pix_format const *csis_fmt;
 	struct v4l2_mbus_framefmt *fmt;
 	unsigned int align;
@@ -957,7 +957,7 @@ static int mipi_csis_set_fmt(struct v4l2_subdev *mipi_sd,
 	 * modified.
 	 */
 	if (sdformat->pad == CSIS_PAD_SOURCE)
-		return mipi_csis_get_fmt(mipi_sd, cfg, sdformat);
+		return mipi_csis_get_fmt(sd, cfg, sdformat);
 
 	if (sdformat->pad != CSIS_PAD_SINK)
 		return -EINVAL;
@@ -1021,9 +1021,9 @@ static int mipi_csis_set_fmt(struct v4l2_subdev *mipi_sd,
 	return 0;
 }
 
-static int mipi_csis_log_status(struct v4l2_subdev *mipi_sd)
+static int mipi_csis_log_status(struct v4l2_subdev *sd)
 {
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 
 	mutex_lock(&state->lock);
 	mipi_csis_log_counters(state, true);
@@ -1063,8 +1063,8 @@ static int mipi_csis_link_setup(struct media_entity *entity,
 				const struct media_pad *local_pad,
 				const struct media_pad *remote_pad, u32 flags)
 {
-	struct v4l2_subdev *mipi_sd = media_entity_to_v4l2_subdev(entity);
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct v4l2_subdev *sd = media_entity_to_v4l2_subdev(entity);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 	struct v4l2_subdev *remote_sd;
 
 	dev_dbg(state->dev, "link setup %s -> %s", remote_pad->entity->name,
@@ -1109,7 +1109,7 @@ static int mipi_csis_notify_bound(struct v4l2_async_notifier *notifier,
 				  struct v4l2_async_subdev *asd)
 {
 	struct csi_state *state = mipi_notifier_to_csis_state(notifier);
-	struct media_pad *sink = &state->mipi_sd.entity.pads[CSIS_PAD_SINK];
+	struct media_pad *sink = &state->sd.entity.pads[CSIS_PAD_SINK];
 
 	return v4l2_create_fwnode_links_to_pad(sd, sink, 0);
 }
@@ -1154,12 +1154,11 @@ static int mipi_csis_async_register(struct csi_state *state)
 
 	state->notifier.ops = &mipi_csis_notify_ops;
 
-	ret = v4l2_async_subdev_notifier_register(&state->mipi_sd,
-						  &state->notifier);
+	ret = v4l2_async_subdev_notifier_register(&state->sd, &state->notifier);
 	if (ret)
 		return ret;
 
-	return v4l2_async_register_subdev(&state->mipi_sd);
+	return v4l2_async_register_subdev(&state->sd);
 
 err_parse:
 	fwnode_handle_put(ep);
@@ -1173,8 +1172,8 @@ err_parse:
 
 static int mipi_csis_pm_suspend(struct device *dev, bool runtime)
 {
-	struct v4l2_subdev *mipi_sd = dev_get_drvdata(dev);
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct v4l2_subdev *sd = dev_get_drvdata(dev);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 	int ret = 0;
 
 	mutex_lock(&state->lock);
@@ -1197,8 +1196,8 @@ unlock:
 
 static int mipi_csis_pm_resume(struct device *dev, bool runtime)
 {
-	struct v4l2_subdev *mipi_sd = dev_get_drvdata(dev);
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct v4l2_subdev *sd = dev_get_drvdata(dev);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 	int ret = 0;
 
 	mutex_lock(&state->lock);
@@ -1254,33 +1253,33 @@ static const struct dev_pm_ops mipi_csis_pm_ops = {
  * Probe/remove & platform driver
  */
 
-static int mipi_csis_subdev_init(struct v4l2_subdev *mipi_sd,
+static int mipi_csis_subdev_init(struct v4l2_subdev *sd,
 				 struct platform_device *pdev,
 				 const struct v4l2_subdev_ops *ops)
 {
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 
-	v4l2_subdev_init(mipi_sd, ops);
-	mipi_sd->owner = THIS_MODULE;
-	snprintf(mipi_sd->name, sizeof(mipi_sd->name), "%s.%d",
+	v4l2_subdev_init(sd, ops);
+	sd->owner = THIS_MODULE;
+	snprintf(sd->name, sizeof(sd->name), "%s.%d",
 		 CSIS_SUBDEV_NAME, state->index);
 
-	mipi_sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
-	mipi_sd->ctrl_handler = NULL;
+	sd->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
+	sd->ctrl_handler = NULL;
 
-	mipi_sd->entity.function = MEDIA_ENT_F_VID_IF_BRIDGE;
-	mipi_sd->entity.ops = &mipi_csis_entity_ops;
+	sd->entity.function = MEDIA_ENT_F_VID_IF_BRIDGE;
+	sd->entity.ops = &mipi_csis_entity_ops;
 
-	mipi_sd->dev = &pdev->dev;
+	sd->dev = &pdev->dev;
 
 	state->csis_fmt = &mipi_csis_formats[0];
-	mipi_csis_init_cfg(mipi_sd, NULL);
+	mipi_csis_init_cfg(sd, NULL);
 
 	state->pads[CSIS_PAD_SINK].flags = MEDIA_PAD_FL_SINK
 					 | MEDIA_PAD_FL_MUST_CONNECT;
 	state->pads[CSIS_PAD_SOURCE].flags = MEDIA_PAD_FL_SOURCE
 					   | MEDIA_PAD_FL_MUST_CONNECT;
-	return media_entity_pads_init(&mipi_sd->entity, CSIS_PADS_NUM,
+	return media_entity_pads_init(&sd->entity, CSIS_PADS_NUM,
 				      state->pads);
 }
 
@@ -1353,10 +1352,10 @@ static int mipi_csis_probe(struct platform_device *pdev)
 		goto disable_clock;
 	}
 
-	platform_set_drvdata(pdev, &state->mipi_sd);
+	platform_set_drvdata(pdev, &state->sd);
 
 	mutex_init(&state->lock);
-	ret = mipi_csis_subdev_init(&state->mipi_sd, pdev,
+	ret = mipi_csis_subdev_init(&state->sd, pdev,
 				    &mipi_csis_subdev_ops);
 	if (ret < 0)
 		goto disable_clock;
@@ -1385,10 +1384,10 @@ static int mipi_csis_probe(struct platform_device *pdev)
 unregister_all:
 	mipi_csis_debugfs_exit(state);
 cleanup:
-	media_entity_cleanup(&state->mipi_sd.entity);
+	media_entity_cleanup(&state->sd.entity);
 	v4l2_async_notifier_unregister(&state->notifier);
 	v4l2_async_notifier_cleanup(&state->notifier);
-	v4l2_async_unregister_subdev(&state->mipi_sd);
+	v4l2_async_unregister_subdev(&state->sd);
 disable_clock:
 	mipi_csis_clk_disable(state);
 	mutex_destroy(&state->lock);
@@ -1398,18 +1397,18 @@ disable_clock:
 
 static int mipi_csis_remove(struct platform_device *pdev)
 {
-	struct v4l2_subdev *mipi_sd = platform_get_drvdata(pdev);
-	struct csi_state *state = mipi_sd_to_csis_state(mipi_sd);
+	struct v4l2_subdev *sd = platform_get_drvdata(pdev);
+	struct csi_state *state = mipi_sd_to_csis_state(sd);
 
 	mipi_csis_debugfs_exit(state);
 	v4l2_async_notifier_unregister(&state->notifier);
 	v4l2_async_notifier_cleanup(&state->notifier);
-	v4l2_async_unregister_subdev(&state->mipi_sd);
+	v4l2_async_unregister_subdev(&state->sd);
 
 	pm_runtime_disable(&pdev->dev);
 	mipi_csis_pm_suspend(&pdev->dev, true);
 	mipi_csis_clk_disable(state);
-	media_entity_cleanup(&state->mipi_sd.entity);
+	media_entity_cleanup(&state->sd.entity);
 	mutex_destroy(&state->lock);
 	pm_runtime_set_suspended(&pdev->dev);
 
-- 
GitLab


From f65ffcd8abf976d90c6fbcb1c1045c22974dfd81 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:04 +0200
Subject: [PATCH 1416/3804] media: imx: imx7_mipi_csis: Rename csi_state flag
 field to state

The flag field of the csi_state structure contains the device state.
Rename it accordingly.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 26 +++++++++++-----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 382d02f54239e..a8e0a5c487a09 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -306,7 +306,7 @@ struct csi_state {
 	struct platform_device *pdev;
 	void __iomem *regs;
 	int irq;
-	u32 flags;
+	u32 state;
 
 	struct dentry *debugfs_root;
 	bool debug;
@@ -812,7 +812,7 @@ static int mipi_csis_s_stream(struct v4l2_subdev *sd, int enable)
 	mutex_lock(&state->lock);
 
 	if (enable) {
-		if (state->flags & ST_SUSPENDED) {
+		if (state->state & ST_SUSPENDED) {
 			ret = -EBUSY;
 			goto unlock;
 		}
@@ -824,14 +824,14 @@ static int mipi_csis_s_stream(struct v4l2_subdev *sd, int enable)
 
 		mipi_csis_log_counters(state, true);
 
-		state->flags |= ST_STREAMING;
+		state->state |= ST_STREAMING;
 	} else {
 		v4l2_subdev_call(state->src_sd, video, s_stream, 0);
 		ret = v4l2_subdev_call(state->src_sd, core, s_power, 0);
 		if (ret == -ENOIOCTLCMD)
 			ret = 0;
 		mipi_csis_stop_stream(state);
-		state->flags &= ~ST_STREAMING;
+		state->state &= ~ST_STREAMING;
 		if (state->debug)
 			mipi_csis_log_counters(state, true);
 	}
@@ -1027,7 +1027,7 @@ static int mipi_csis_log_status(struct v4l2_subdev *sd)
 
 	mutex_lock(&state->lock);
 	mipi_csis_log_counters(state, true);
-	if (state->debug && (state->flags & ST_POWERED))
+	if (state->debug && (state->state & ST_POWERED))
 		mipi_csis_dump_regs(state);
 	mutex_unlock(&state->lock);
 
@@ -1177,15 +1177,15 @@ static int mipi_csis_pm_suspend(struct device *dev, bool runtime)
 	int ret = 0;
 
 	mutex_lock(&state->lock);
-	if (state->flags & ST_POWERED) {
+	if (state->state & ST_POWERED) {
 		mipi_csis_stop_stream(state);
 		ret = regulator_disable(state->mipi_phy_regulator);
 		if (ret)
 			goto unlock;
 		mipi_csis_clk_disable(state);
-		state->flags &= ~ST_POWERED;
+		state->state &= ~ST_POWERED;
 		if (!runtime)
-			state->flags |= ST_SUSPENDED;
+			state->state |= ST_SUSPENDED;
 	}
 
 unlock:
@@ -1201,21 +1201,21 @@ static int mipi_csis_pm_resume(struct device *dev, bool runtime)
 	int ret = 0;
 
 	mutex_lock(&state->lock);
-	if (!runtime && !(state->flags & ST_SUSPENDED))
+	if (!runtime && !(state->state & ST_SUSPENDED))
 		goto unlock;
 
-	if (!(state->flags & ST_POWERED)) {
+	if (!(state->state & ST_POWERED)) {
 		ret = regulator_enable(state->mipi_phy_regulator);
 		if (ret)
 			goto unlock;
 
-		state->flags |= ST_POWERED;
+		state->state |= ST_POWERED;
 		mipi_csis_clk_enable(state);
 	}
-	if (state->flags & ST_STREAMING)
+	if (state->state & ST_STREAMING)
 		mipi_csis_start_stream(state);
 
-	state->flags &= ~ST_SUSPENDED;
+	state->state &= ~ST_SUSPENDED;
 
 unlock:
 	mutex_unlock(&state->lock);
-- 
GitLab


From e71bcbe65f67abdcc144729c9bad957424043b81 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:05 +0200
Subject: [PATCH 1417/3804] media: imx: imx7_mipi_csis: Turn csi_state irq
 field into local variable

The irq field of the csi_state structure is only used in
mipi_csis_probe(). Turn it into a local variable.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index a8e0a5c487a09..89e9a2cc77848 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -305,7 +305,6 @@ struct csi_state {
 	u8 index;
 	struct platform_device *pdev;
 	void __iomem *regs;
-	int irq;
 	u32 state;
 
 	struct dentry *debugfs_root;
@@ -1304,6 +1303,7 @@ static int mipi_csis_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct csi_state *state;
+	int irq;
 	int ret;
 
 	state = devm_kzalloc(dev, sizeof(*state), GFP_KERNEL);
@@ -1331,9 +1331,9 @@ static int mipi_csis_probe(struct platform_device *pdev)
 	if (IS_ERR(state->regs))
 		return PTR_ERR(state->regs);
 
-	state->irq = platform_get_irq(pdev, 0);
-	if (state->irq < 0)
-		return state->irq;
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
 
 	ret = mipi_csis_clk_get(state);
 	if (ret < 0)
@@ -1345,8 +1345,8 @@ static int mipi_csis_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = devm_request_irq(dev, state->irq, mipi_csis_irq_handler,
-			       0, dev_name(dev), state);
+	ret = devm_request_irq(dev, irq, mipi_csis_irq_handler, 0,
+			       dev_name(dev), state);
 	if (ret) {
 		dev_err(dev, "Interrupt request failed\n");
 		goto disable_clock;
-- 
GitLab


From deb1c97283a4065ae99bbb67b9b373dd02826d13 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:06 +0200
Subject: [PATCH 1418/3804] media: imx: imx7_mipi_csis: Don't pass pdev to
 mipi_csis_parse_dt()

The mipi_csis_parse_dt() function is called with a pointer to the
csi_state, which contains all the information necessary. Don't pass the
platform device pointer as well.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 89e9a2cc77848..4d1819b7c9c45 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -1282,17 +1282,16 @@ static int mipi_csis_subdev_init(struct v4l2_subdev *sd,
 				      state->pads);
 }
 
-static int mipi_csis_parse_dt(struct platform_device *pdev,
-			      struct csi_state *state)
+static int mipi_csis_parse_dt(struct csi_state *state)
 {
-	struct device_node *node = pdev->dev.of_node;
+	struct device_node *node = state->dev->of_node;
 
 	if (of_property_read_u32(node, "clock-frequency",
 				 &state->clk_frequency))
 		state->clk_frequency = DEFAULT_SCLK_CSIS_FREQ;
 
 	/* Get MIPI PHY resets */
-	state->mrst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+	state->mrst = devm_reset_control_get_exclusive(state->dev, NULL);
 	if (IS_ERR(state->mrst))
 		return PTR_ERR(state->mrst);
 
@@ -1315,7 +1314,7 @@ static int mipi_csis_probe(struct platform_device *pdev)
 	state->pdev = pdev;
 	state->dev = dev;
 
-	ret = mipi_csis_parse_dt(pdev, state);
+	ret = mipi_csis_parse_dt(state);
 	if (ret < 0) {
 		dev_err(dev, "Failed to parse device tree: %d\n", ret);
 		return ret;
-- 
GitLab


From 96703073ecb36376579c7b8e2a34dbaf025909cb Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:07 +0200
Subject: [PATCH 1419/3804] media: imx: imx7_mipi_csis: Pass csi_state to
 mipi_csis_subdev_init()

Pass the csi_state pointer to the mipi_csis_subdev_init() function,
instead of miscellaneous information scattered in different arguments.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 4d1819b7c9c45..5e2ae59fc9df4 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -1252,13 +1252,11 @@ static const struct dev_pm_ops mipi_csis_pm_ops = {
  * Probe/remove & platform driver
  */
 
-static int mipi_csis_subdev_init(struct v4l2_subdev *sd,
-				 struct platform_device *pdev,
-				 const struct v4l2_subdev_ops *ops)
+static int mipi_csis_subdev_init(struct csi_state *state)
 {
-	struct csi_state *state = mipi_sd_to_csis_state(sd);
+	struct v4l2_subdev *sd = &state->sd;
 
-	v4l2_subdev_init(sd, ops);
+	v4l2_subdev_init(sd, &mipi_csis_subdev_ops);
 	sd->owner = THIS_MODULE;
 	snprintf(sd->name, sizeof(sd->name), "%s.%d",
 		 CSIS_SUBDEV_NAME, state->index);
@@ -1269,7 +1267,7 @@ static int mipi_csis_subdev_init(struct v4l2_subdev *sd,
 	sd->entity.function = MEDIA_ENT_F_VID_IF_BRIDGE;
 	sd->entity.ops = &mipi_csis_entity_ops;
 
-	sd->dev = &pdev->dev;
+	sd->dev = state->dev;
 
 	state->csis_fmt = &mipi_csis_formats[0];
 	mipi_csis_init_cfg(sd, NULL);
@@ -1354,8 +1352,7 @@ static int mipi_csis_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, &state->sd);
 
 	mutex_init(&state->lock);
-	ret = mipi_csis_subdev_init(&state->sd, pdev,
-				    &mipi_csis_subdev_ops);
+	ret = mipi_csis_subdev_init(state);
 	if (ret < 0)
 		goto disable_clock;
 
-- 
GitLab


From 62bd05a4f9fffe17f9bddb9f5d28fe229c200abd Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:08 +0200
Subject: [PATCH 1420/3804] media: imx: imx7_mipi_csis: Drop csi_state pdev
 field

The pdev field of the csi_state structure is only used to access the
device pointer, which is stored in a separate field. Drop the pdev
field, as well as a few local dev variables.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 30 ++++++++++------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 5e2ae59fc9df4..5c7f9f28103b8 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -303,7 +303,6 @@ struct csi_state {
 	struct v4l2_subdev *src_sd;
 
 	u8 index;
-	struct platform_device *pdev;
 	void __iomem *regs;
 	u32 state;
 
@@ -615,13 +614,12 @@ static void mipi_csis_clk_disable(struct csi_state *state)
 
 static int mipi_csis_clk_get(struct csi_state *state)
 {
-	struct device *dev = &state->pdev->dev;
 	unsigned int i;
 	int ret;
 
 	state->num_clks = ARRAY_SIZE(mipi_csis_clk_id);
-	state->clks = devm_kcalloc(dev, state->num_clks, sizeof(*state->clks),
-				   GFP_KERNEL);
+	state->clks = devm_kcalloc(state->dev, state->num_clks,
+				   sizeof(*state->clks), GFP_KERNEL);
 
 	if (!state->clks)
 		return -ENOMEM;
@@ -629,7 +627,7 @@ static int mipi_csis_clk_get(struct csi_state *state)
 	for (i = 0; i < state->num_clks; i++)
 		state->clks[i].id = mipi_csis_clk_id[i];
 
-	ret = devm_clk_bulk_get(dev, state->num_clks, state->clks);
+	ret = devm_clk_bulk_get(state->dev, state->num_clks, state->clks);
 	if (ret < 0)
 		return ret;
 
@@ -637,8 +635,8 @@ static int mipi_csis_clk_get(struct csi_state *state)
 	ret = clk_set_rate(state->clks[MIPI_CSIS_CLK_WRAP].clk,
 			   state->clk_frequency);
 	if (ret < 0)
-		dev_err(dev, "set rate=%d failed: %d\n", state->clk_frequency,
-			ret);
+		dev_err(state->dev, "set rate=%d failed: %d\n",
+			state->clk_frequency, ret);
 
 	return ret;
 }
@@ -707,7 +705,6 @@ static void mipi_csis_log_counters(struct csi_state *state, bool non_errors)
 {
 	unsigned int num_events = non_errors ? MIPI_CSIS_NUM_EVENTS
 				: MIPI_CSIS_NUM_EVENTS - 8;
-	struct device *dev = &state->pdev->dev;
 	unsigned long flags;
 	unsigned int i;
 
@@ -715,7 +712,8 @@ static void mipi_csis_log_counters(struct csi_state *state, bool non_errors)
 
 	for (i = 0; i < num_events; ++i) {
 		if (state->events[i].counter > 0 || state->debug)
-			dev_info(dev, "%s events: %d\n", state->events[i].name,
+			dev_info(state->dev, "%s events: %d\n",
+				 state->events[i].name,
 				 state->events[i].counter);
 	}
 	spin_unlock_irqrestore(&state->slock, flags);
@@ -741,15 +739,14 @@ static int mipi_csis_dump_regs(struct csi_state *state)
 		{ MIPI_CSIS_DBG_CTRL, "DBG_CTRL" },
 	};
 
-	struct device *dev = &state->pdev->dev;
 	unsigned int i;
 	u32 cfg;
 
-	dev_info(dev, "--- REGISTERS ---\n");
+	dev_info(state->dev, "--- REGISTERS ---\n");
 
 	for (i = 0; i < ARRAY_SIZE(registers); i++) {
 		cfg = mipi_csis_read(state, registers[i].offset);
-		dev_info(dev, "%14s: 0x%08x\n", registers[i].name, cfg);
+		dev_info(state->dev, "%14s: 0x%08x\n", registers[i].name, cfg);
 	}
 
 	return 0;
@@ -799,7 +796,7 @@ static int mipi_csis_s_stream(struct v4l2_subdev *sd, int enable)
 
 		mipi_csis_clear_counters(state);
 
-		ret = pm_runtime_resume_and_get(&state->pdev->dev);
+		ret = pm_runtime_resume_and_get(state->dev);
 		if (ret < 0)
 			return ret;
 
@@ -840,7 +837,7 @@ unlock:
 
 done:
 	if (!enable || ret < 0)
-		pm_runtime_put(&state->pdev->dev);
+		pm_runtime_put(state->dev);
 
 	return ret;
 }
@@ -1309,7 +1306,6 @@ static int mipi_csis_probe(struct platform_device *pdev)
 
 	spin_lock_init(&state->slock);
 
-	state->pdev = pdev;
 	state->dev = dev;
 
 	ret = mipi_csis_parse_dt(state);
@@ -1358,7 +1354,7 @@ static int mipi_csis_probe(struct platform_device *pdev)
 
 	ret = mipi_csis_async_register(state);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "async register failed: %d\n", ret);
+		dev_err(dev, "async register failed: %d\n", ret);
 		goto cleanup;
 	}
 
@@ -1372,7 +1368,7 @@ static int mipi_csis_probe(struct platform_device *pdev)
 			goto unregister_all;
 	}
 
-	dev_info(&pdev->dev, "lanes: %d, freq: %u\n",
+	dev_info(dev, "lanes: %d, freq: %u\n",
 		 state->bus.num_data_lanes, state->clk_frequency);
 
 	return 0;
-- 
GitLab


From b4eb02656d0944e03eb991f3feda52f653693653 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:09 +0200
Subject: [PATCH 1421/3804] media: imx: imx7_mipi_csis: Make csi_state
 num_clocks field unsigned

The num_clocks field of the csi_state only stores positive values, make
it unsigned.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 5c7f9f28103b8..363aa28fae571 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -309,7 +309,7 @@ struct csi_state {
 	struct dentry *debugfs_root;
 	bool debug;
 
-	int num_clks;
+	unsigned int num_clks;
 	struct clk_bulk_data *clks;
 
 	u32 clk_frequency;
-- 
GitLab


From 0092d4a8ea7f7414a2ca64d455e2fdb0306d8d56 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:10 +0200
Subject: [PATCH 1422/3804] media: imx: imx7_mipi_csis: Reorganize csi_state
 structure

Group the fiels of the csi_state structure logically to improve
readability.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 35 +++++++++-------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 363aa28fae571..c302b095ce74e 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -292,40 +292,33 @@ static const char * const mipi_csis_clk_id[] = {
 };
 
 struct csi_state {
-	/* lock elements below */
-	struct mutex lock;
-	/* lock for event handler */
-	spinlock_t slock;
 	struct device *dev;
-	struct media_pad pads[CSIS_PADS_NUM];
-	struct v4l2_subdev sd;
-	struct v4l2_async_notifier notifier;
-	struct v4l2_subdev *src_sd;
-
-	u8 index;
 	void __iomem *regs;
-	u32 state;
-
-	struct dentry *debugfs_root;
-	bool debug;
-
 	unsigned int num_clks;
 	struct clk_bulk_data *clks;
+	struct reset_control *mrst;
+	struct regulator *mipi_phy_regulator;
+	u8 index;
 
+	struct v4l2_subdev sd;
+	struct media_pad pads[CSIS_PADS_NUM];
+	struct v4l2_async_notifier notifier;
+	struct v4l2_subdev *src_sd;
+
+	struct v4l2_fwnode_bus_mipi_csi2 bus;
 	u32 clk_frequency;
 	u32 hs_settle;
 	u32 clk_settle;
 
-	struct reset_control *mrst;
-
+	struct mutex lock;	/* Protect csis_fmt, format_mbus and state */
 	const struct csis_pix_format *csis_fmt;
 	struct v4l2_mbus_framefmt format_mbus;
+	u32 state;
 
-	struct v4l2_fwnode_bus_mipi_csi2 bus;
-
+	spinlock_t slock;	/* Protect events */
 	struct mipi_csis_event events[MIPI_CSIS_NUM_EVENTS];
-
-	struct regulator *mipi_phy_regulator;
+	struct dentry *debugfs_root;
+	bool debug;
 };
 
 /* -----------------------------------------------------------------------------
-- 
GitLab


From 7479454cfef039f208cfd1e5b6cead38dc0caa05 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:11 +0200
Subject: [PATCH 1423/3804] media: imx: imx7_mipi_csis: Reorganize
 mipi_csis_probe()

Group the operations performed in mipi_csis_probe() logically to improve
readability.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 30 ++++++++++++++--------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index c302b095ce74e..a1eaccc922a25 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -1297,22 +1297,21 @@ static int mipi_csis_probe(struct platform_device *pdev)
 	if (!state)
 		return -ENOMEM;
 
+	mutex_init(&state->lock);
 	spin_lock_init(&state->slock);
 
 	state->dev = dev;
 
+	memcpy(state->events, mipi_csis_events, sizeof(state->events));
+
+	/* Parse DT properties. */
 	ret = mipi_csis_parse_dt(state);
 	if (ret < 0) {
 		dev_err(dev, "Failed to parse device tree: %d\n", ret);
 		return ret;
 	}
 
-	ret = mipi_csis_phy_init(state);
-	if (ret < 0)
-		return ret;
-
-	mipi_csis_phy_reset(state);
-
+	/* Acquire resources. */
 	state->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(state->regs))
 		return PTR_ERR(state->regs);
@@ -1321,16 +1320,24 @@ static int mipi_csis_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
+	ret = mipi_csis_phy_init(state);
+	if (ret < 0)
+		return ret;
+
 	ret = mipi_csis_clk_get(state);
 	if (ret < 0)
 		return ret;
 
+	/* Reset PHY and enable the clocks. */
+	mipi_csis_phy_reset(state);
+
 	ret = mipi_csis_clk_enable(state);
 	if (ret < 0) {
 		dev_err(state->dev, "failed to enable clocks: %d\n", ret);
 		return ret;
 	}
 
+	/* Now that the hardware is initialized, request the interrupt. */
 	ret = devm_request_irq(dev, irq, mipi_csis_irq_handler, 0,
 			       dev_name(dev), state);
 	if (ret) {
@@ -1338,22 +1345,23 @@ static int mipi_csis_probe(struct platform_device *pdev)
 		goto disable_clock;
 	}
 
-	platform_set_drvdata(pdev, &state->sd);
-
-	mutex_init(&state->lock);
+	/* Initialize and register the subdev. */
 	ret = mipi_csis_subdev_init(state);
 	if (ret < 0)
 		goto disable_clock;
 
+	platform_set_drvdata(pdev, &state->sd);
+
 	ret = mipi_csis_async_register(state);
 	if (ret < 0) {
 		dev_err(dev, "async register failed: %d\n", ret);
 		goto cleanup;
 	}
 
-	memcpy(state->events, mipi_csis_events, sizeof(state->events));
-
+	/* Initialize debugfs. */
 	mipi_csis_debugfs_init(state);
+
+	/* Enable runtime PM. */
 	pm_runtime_enable(dev);
 	if (!pm_runtime_enabled(dev)) {
 		ret = mipi_csis_pm_resume(dev, true);
-- 
GitLab


From 88fc81388df942e580b65afa197c97c490b5b855 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:12 +0200
Subject: [PATCH 1424/3804] media: imx: imx7_mipi_csis: Reject invalid
 data-lanes settings

The CSIS doesn't support data lanes reordering. Reject invalid settings.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index a1eaccc922a25..14ff785ba5d56 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -1114,6 +1114,7 @@ static int mipi_csis_async_register(struct csi_state *state)
 	};
 	struct v4l2_async_subdev *asd;
 	struct fwnode_handle *ep;
+	unsigned int i;
 	int ret;
 
 	v4l2_async_notifier_init(&state->notifier);
@@ -1127,6 +1128,14 @@ static int mipi_csis_async_register(struct csi_state *state)
 	if (ret)
 		goto err_parse;
 
+	for (i = 0; i < vep.bus.mipi_csi2.num_data_lanes; ++i) {
+		if (vep.bus.mipi_csi2.data_lanes[i] != i + 1) {
+			dev_err(state->dev,
+				"data lanes reordering is not supported");
+			goto err_parse;
+		}
+	}
+
 	state->bus = vep.bus.mipi_csi2;
 
 	dev_dbg(state->dev, "data lanes: %d\n", state->bus.num_data_lanes);
-- 
GitLab


From acdff8e14ae9e992526d050f8cee2264710de33e Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sun, 16 May 2021 00:32:26 +0200
Subject: [PATCH 1425/3804] media: imx: imx7_mipi_csis: Move PHY control to
 dedicated functions

Move the PHY regulator and reset handling to dedicated functions. This
groups all related code together, and prepares for i.MX8 support that
doesn't require control of the PHY regulator and reset.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 64 +++++++++++++---------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 14ff785ba5d56..3c43441653c30 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -457,25 +457,6 @@ static void mipi_csis_sw_reset(struct csi_state *state)
 	usleep_range(10, 20);
 }
 
-static int mipi_csis_phy_init(struct csi_state *state)
-{
-	state->mipi_phy_regulator = devm_regulator_get(state->dev, "phy");
-	if (IS_ERR(state->mipi_phy_regulator))
-		return PTR_ERR(state->mipi_phy_regulator);
-
-	return regulator_set_voltage(state->mipi_phy_regulator, 1000000,
-				     1000000);
-}
-
-static void mipi_csis_phy_reset(struct csi_state *state)
-{
-	reset_control_assert(state->mrst);
-
-	msleep(20);
-
-	reset_control_deassert(state->mrst);
-}
-
 static void mipi_csis_system_enable(struct csi_state *state, int on)
 {
 	u32 val, mask;
@@ -679,6 +660,42 @@ static irqreturn_t mipi_csis_irq_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+/* -----------------------------------------------------------------------------
+ * PHY regulator and reset
+ */
+
+static int mipi_csis_phy_enable(struct csi_state *state)
+{
+	return regulator_enable(state->mipi_phy_regulator);
+}
+
+static int mipi_csis_phy_disable(struct csi_state *state)
+{
+	return regulator_disable(state->mipi_phy_regulator);
+}
+
+static void mipi_csis_phy_reset(struct csi_state *state)
+{
+	reset_control_assert(state->mrst);
+	msleep(20);
+	reset_control_deassert(state->mrst);
+}
+
+static int mipi_csis_phy_init(struct csi_state *state)
+{
+	/* Get MIPI PHY reset and regulator. */
+	state->mrst = devm_reset_control_get_exclusive(state->dev, NULL);
+	if (IS_ERR(state->mrst))
+		return PTR_ERR(state->mrst);
+
+	state->mipi_phy_regulator = devm_regulator_get(state->dev, "phy");
+	if (IS_ERR(state->mipi_phy_regulator))
+		return PTR_ERR(state->mipi_phy_regulator);
+
+	return regulator_set_voltage(state->mipi_phy_regulator, 1000000,
+				     1000000);
+}
+
 /* -----------------------------------------------------------------------------
  * Debug
  */
@@ -1177,7 +1194,7 @@ static int mipi_csis_pm_suspend(struct device *dev, bool runtime)
 	mutex_lock(&state->lock);
 	if (state->state & ST_POWERED) {
 		mipi_csis_stop_stream(state);
-		ret = regulator_disable(state->mipi_phy_regulator);
+		ret = mipi_csis_phy_disable(state);
 		if (ret)
 			goto unlock;
 		mipi_csis_clk_disable(state);
@@ -1203,7 +1220,7 @@ static int mipi_csis_pm_resume(struct device *dev, bool runtime)
 		goto unlock;
 
 	if (!(state->state & ST_POWERED)) {
-		ret = regulator_enable(state->mipi_phy_regulator);
+		ret = mipi_csis_phy_enable(state);
 		if (ret)
 			goto unlock;
 
@@ -1287,11 +1304,6 @@ static int mipi_csis_parse_dt(struct csi_state *state)
 				 &state->clk_frequency))
 		state->clk_frequency = DEFAULT_SCLK_CSIS_FREQ;
 
-	/* Get MIPI PHY resets */
-	state->mrst = devm_reset_control_get_exclusive(state->dev, NULL);
-	if (IS_ERR(state->mrst))
-		return PTR_ERR(state->mrst);
-
 	return 0;
 }
 
-- 
GitLab


From 85b62ff2cb971c53a9a0cfafd31b07a92bb0fa19 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sun, 18 Apr 2021 22:15:56 +0200
Subject: [PATCH 1426/3804] media: dt-bindings: media: nxp,imx7-mipi-csi2: Add
 i.MX8MM support

The i.MX8MM integrates a newer version of the CSIS CSI-2 receiver as the
i.MX7 family. Differences in integration are are:

- An additional clock is required
- Up to 4 data lanes are supported
- No reset or PHY supply is present

Support it in the DT binding.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../bindings/media/nxp,imx7-mipi-csi2.yaml    | 109 +++++++++++++++---
 1 file changed, 95 insertions(+), 14 deletions(-)

diff --git a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
index d8ed480482b94..7c09eec78ce5a 100644
--- a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
+++ b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml
@@ -4,15 +4,17 @@
 $id: http://devicetree.org/schemas/media/nxp,imx7-mipi-csi2.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP i.MX7 MIPI CSI-2 receiver
+title: NXP i.MX7 and i.MX8 MIPI CSI-2 receiver
 
 maintainers:
   - Rui Miguel Silva <rmfrfs@gmail.com>
+  - Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 
 description: |-
-  The NXP i.MX7 SoC family includes a MIPI CSI-2 receiver IP core, documented
-  as "CSIS V3.3". The IP core seems to originate from Samsung, and may be
-  compatible with some of the Exynos4 ad S5P SoCs.
+  The NXP i.MX7 and i.MX8 families contain SoCs that include a MIPI CSI-2
+  receiver IP core named CSIS. The IP core originates from Samsung, and may be
+  compatible with some of the Exynos4 and S5P SoCs. i.MX7 SoCs use CSIS version
+  3.3, and i.MX8 SoCs use CSIS version 3.6.3.
 
   While the CSI-2 receiver is separate from the MIPI D-PHY IP core, the PHY is
   completely wrapped by the CSIS and doesn't expose a control interface of its
@@ -20,7 +22,9 @@ description: |-
 
 properties:
   compatible:
-    const: fsl,imx7-mipi-csi2
+    enum:
+      - fsl,imx7-mipi-csi2
+      - fsl,imx8mm-mipi-csi2
 
   reg:
     maxItems: 1
@@ -29,16 +33,20 @@ properties:
     maxItems: 1
 
   clocks:
+    minItems: 3
     items:
       - description: The peripheral clock (a.k.a. APB clock)
       - description: The external clock (optionally used as the pixel clock)
       - description: The MIPI D-PHY clock
+      - description: The AXI clock
 
   clock-names:
+    minItems: 3
     items:
       - const: pclk
       - const: wrap
       - const: phy
+      - const: axi
 
   power-domains:
     maxItems: 1
@@ -71,16 +79,30 @@ properties:
 
             properties:
               data-lanes:
-                oneOf:
-                  - items:
-                      - const: 1
-                  - items:
-                      - const: 1
-                      - const: 2
+                items:
+                  minItems: 1
+                  maxItems: 4
+                  items:
+                    - const: 1
+                    - const: 2
+                    - const: 3
+                    - const: 4
 
             required:
               - data-lanes
 
+            allOf:
+              - if:
+                  properties:
+                    compatible:
+                      contains:
+                        const: fsl,imx7-mipi-csi2
+                then:
+                  properties:
+                    data-lanes:
+                      items:
+                        maxItems: 2
+
       port@1:
         $ref: /schemas/graph.yaml#/properties/port
         description:
@@ -93,12 +115,29 @@ required:
   - clocks
   - clock-names
   - power-domains
-  - phy-supply
-  - resets
   - ports
 
 additionalProperties: false
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: fsl,imx7-mipi-csi2
+    then:
+      required:
+        - phy-supply
+        - resets
+    else:
+      properties:
+        clocks:
+          minItems: 4
+        clock-names:
+          minItems: 4
+        phy-supply: false
+        resets: false
+
 examples:
   - |
     #include <dt-bindings/clock/imx7d-clock.h>
@@ -106,7 +145,7 @@ examples:
     #include <dt-bindings/interrupt-controller/irq.h>
     #include <dt-bindings/reset/imx7-reset.h>
 
-    mipi_csi: mipi-csi@30750000 {
+    mipi-csi@30750000 {
         compatible = "fsl,imx7-mipi-csi2";
         reg = <0x30750000 0x10000>;
         interrupts = <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>;
@@ -144,4 +183,46 @@ examples:
         };
     };
 
+  - |
+    #include <dt-bindings/clock/imx8mm-clock.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    mipi-csi@32e30000 {
+        compatible = "fsl,imx8mm-mipi-csi2";
+        reg = <0x32e30000 0x1000>;
+        interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+        clock-frequency = <333000000>;
+        clocks = <&clk IMX8MM_CLK_DISP_APB_ROOT>,
+                 <&clk IMX8MM_CLK_CSI1_ROOT>,
+                 <&clk IMX8MM_CLK_CSI1_PHY_REF>,
+                 <&clk IMX8MM_CLK_DISP_AXI_ROOT>;
+        clock-names = "pclk", "wrap", "phy", "axi";
+        power-domains = <&mipi_pd>;
+
+        status = "disabled";
+
+        ports {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            port@0 {
+                reg = <0>;
+
+                imx8mm_mipi_csi_in: endpoint {
+                    remote-endpoint = <&imx477_out>;
+                    data-lanes = <1 2 3 4>;
+                };
+            };
+
+            port@1 {
+                reg = <1>;
+
+                imx8mm_mipi_csi_out: endpoint {
+                    remote-endpoint = <&csi_in>;
+                };
+            };
+        };
+    };
+
 ...
-- 
GitLab


From f0e7cfbb43f1961f12b9903f602b6e6ddf1ada02 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Tue, 13 Apr 2021 04:30:14 +0200
Subject: [PATCH 1427/3804] media: imx: imx7_mipi_csis: Add i.MX8MM support

The CSI-2 receiver in the i.MX8MM is a newer version of the one found in
the i.MX7. Differences are minimal, support it in the imx7_mipi_csis
driver.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Tested-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 59 ++++++++++++++++++----
 1 file changed, 49 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 3c43441653c30..d573f3475d28f 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
@@ -283,21 +284,33 @@ enum mipi_csis_clk {
 	MIPI_CSIS_CLK_PCLK,
 	MIPI_CSIS_CLK_WRAP,
 	MIPI_CSIS_CLK_PHY,
+	MIPI_CSIS_CLK_AXI,
 };
 
 static const char * const mipi_csis_clk_id[] = {
 	"pclk",
 	"wrap",
 	"phy",
+	"axi",
+};
+
+enum mipi_csis_version {
+	MIPI_CSIS_V3_3,
+	MIPI_CSIS_V3_6_3,
+};
+
+struct mipi_csis_info {
+	enum mipi_csis_version version;
+	unsigned int num_clocks;
 };
 
 struct csi_state {
 	struct device *dev;
 	void __iomem *regs;
-	unsigned int num_clks;
 	struct clk_bulk_data *clks;
 	struct reset_control *mrst;
 	struct regulator *mipi_phy_regulator;
+	const struct mipi_csis_info *info;
 	u8 index;
 
 	struct v4l2_subdev sd;
@@ -539,7 +552,8 @@ static void mipi_csis_set_params(struct csi_state *state)
 	val = mipi_csis_read(state, MIPI_CSIS_CMN_CTRL);
 	val &= ~MIPI_CSIS_CMN_CTRL_LANE_NR_MASK;
 	val |= (lanes - 1) << MIPI_CSIS_CMN_CTRL_LANE_NR_OFFSET;
-	val |= MIPI_CSIS_CMN_CTRL_INTER_MODE;
+	if (state->info->version == MIPI_CSIS_V3_3)
+		val |= MIPI_CSIS_CMN_CTRL_INTER_MODE;
 	mipi_csis_write(state, MIPI_CSIS_CMN_CTRL, val);
 
 	__mipi_csis_set_format(state);
@@ -578,12 +592,12 @@ static void mipi_csis_set_params(struct csi_state *state)
 
 static int mipi_csis_clk_enable(struct csi_state *state)
 {
-	return clk_bulk_prepare_enable(state->num_clks, state->clks);
+	return clk_bulk_prepare_enable(state->info->num_clocks, state->clks);
 }
 
 static void mipi_csis_clk_disable(struct csi_state *state)
 {
-	clk_bulk_disable_unprepare(state->num_clks, state->clks);
+	clk_bulk_disable_unprepare(state->info->num_clocks, state->clks);
 }
 
 static int mipi_csis_clk_get(struct csi_state *state)
@@ -591,17 +605,17 @@ static int mipi_csis_clk_get(struct csi_state *state)
 	unsigned int i;
 	int ret;
 
-	state->num_clks = ARRAY_SIZE(mipi_csis_clk_id);
-	state->clks = devm_kcalloc(state->dev, state->num_clks,
+	state->clks = devm_kcalloc(state->dev, state->info->num_clocks,
 				   sizeof(*state->clks), GFP_KERNEL);
 
 	if (!state->clks)
 		return -ENOMEM;
 
-	for (i = 0; i < state->num_clks; i++)
+	for (i = 0; i < state->info->num_clocks; i++)
 		state->clks[i].id = mipi_csis_clk_id[i];
 
-	ret = devm_clk_bulk_get(state->dev, state->num_clks, state->clks);
+	ret = devm_clk_bulk_get(state->dev, state->info->num_clocks,
+				state->clks);
 	if (ret < 0)
 		return ret;
 
@@ -666,16 +680,25 @@ static irqreturn_t mipi_csis_irq_handler(int irq, void *dev_id)
 
 static int mipi_csis_phy_enable(struct csi_state *state)
 {
+	if (state->info->version != MIPI_CSIS_V3_3)
+		return 0;
+
 	return regulator_enable(state->mipi_phy_regulator);
 }
 
 static int mipi_csis_phy_disable(struct csi_state *state)
 {
+	if (state->info->version != MIPI_CSIS_V3_3)
+		return 0;
+
 	return regulator_disable(state->mipi_phy_regulator);
 }
 
 static void mipi_csis_phy_reset(struct csi_state *state)
 {
+	if (state->info->version != MIPI_CSIS_V3_3)
+		return;
+
 	reset_control_assert(state->mrst);
 	msleep(20);
 	reset_control_deassert(state->mrst);
@@ -683,6 +706,9 @@ static void mipi_csis_phy_reset(struct csi_state *state)
 
 static int mipi_csis_phy_init(struct csi_state *state)
 {
+	if (state->info->version != MIPI_CSIS_V3_3)
+		return 0;
+
 	/* Get MIPI PHY reset and regulator. */
 	state->mrst = devm_reset_control_get_exclusive(state->dev, NULL);
 	if (IS_ERR(state->mrst))
@@ -1322,6 +1348,7 @@ static int mipi_csis_probe(struct platform_device *pdev)
 	spin_lock_init(&state->slock);
 
 	state->dev = dev;
+	state->info = of_device_get_match_data(dev);
 
 	memcpy(state->events, mipi_csis_events, sizeof(state->events));
 
@@ -1430,7 +1457,19 @@ static int mipi_csis_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id mipi_csis_of_match[] = {
-	{ .compatible = "fsl,imx7-mipi-csi2", },
+	{
+		.compatible = "fsl,imx7-mipi-csi2",
+		.data = &(const struct mipi_csis_info){
+			.version = MIPI_CSIS_V3_3,
+			.num_clocks = 3,
+		},
+	}, {
+		.compatible = "fsl,imx8mm-mipi-csi2",
+		.data = &(const struct mipi_csis_info){
+			.version = MIPI_CSIS_V3_6_3,
+			.num_clocks = 4,
+		},
+	},
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, mipi_csis_of_match);
@@ -1447,6 +1486,6 @@ static struct platform_driver mipi_csis_driver = {
 
 module_platform_driver(mipi_csis_driver);
 
-MODULE_DESCRIPTION("i.MX7 MIPI CSI-2 Receiver driver");
+MODULE_DESCRIPTION("i.MX7 & i.MX8 MIPI CSI-2 receiver driver");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:imx7-mipi-csi2");
-- 
GitLab


From 2fb27551ba4053ae503ce6c3b7b5d87cd206b1fd Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sun, 18 Apr 2021 22:14:06 +0200
Subject: [PATCH 1428/3804] media: imx: imx7_mipi_csis: Update MAINTAINERS

Given my recent contributions to the imx7-mipi-csis driver, I can as
well be listed as a maintainer.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Rui Miguel Silva <rmfrfs@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..2d06dc43fe70c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11283,6 +11283,7 @@ F:	include/media/imx.h
 
 MEDIA DRIVERS FOR FREESCALE IMX7
 M:	Rui Miguel Silva <rmfrfs@gmail.com>
+M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 L:	linux-media@vger.kernel.org
 S:	Maintained
 T:	git git://linuxtv.org/media_tree.git
-- 
GitLab


From c4681547bcce777daf576925a966ffa824edd09d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 23 May 2021 11:42:48 -1000
Subject: [PATCH 1429/3804] Linux 5.13-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0ed7e061c8e9e..e4468353425a6 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Frozen Wasteland
 
 # *DOCUMENTATION*
-- 
GitLab


From 1e69abf98921fa27e2064970b614502d85230f9f Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Fri, 21 May 2021 17:46:54 -0700
Subject: [PATCH 1430/3804] MAINTAINERS: Add entries for CBS, ETF and taprio
 qdiscs

Add Vinicius Costa Gomes as maintainer for these qdiscs.

These qdiscs are all TSN (Time Sensitive Networking) related.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Acked-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 251111e5da534..2cc1cb72bc923 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4138,6 +4138,14 @@ S:	Odd Fixes
 F:	Documentation/devicetree/bindings/arm/cavium-thunder2.txt
 F:	arch/arm64/boot/dts/cavium/thunder2-99xx*
 
+CBS/ETF/TAPRIO QDISCS
+M:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
+S:	Maintained
+L:	netdev@vger.kernel.org
+F:	net/sched/sch_cbs.c
+F:	net/sched/sch_etf.c
+F:	net/sched/sch_taprio.c
+
 CC2520 IEEE-802.15.4 RADIO DRIVER
 M:	Varka Bhadram <varkabhadram@gmail.com>
 L:	linux-wpan@vger.kernel.org
-- 
GitLab


From ad79fd2c42f7626bdf6935cd72134c2a5a59ff2d Mon Sep 17 00:00:00 2001
From: Stefan Roese <sr@denx.de>
Date: Sat, 22 May 2021 09:56:30 +0200
Subject: [PATCH 1431/3804] net: ethernet: mtk_eth_soc: Fix packet statistics
 support for MT7628/88

The MT7628/88 SoC(s) have other (limited) packet counter registers than
currently supported in the mtk_eth_soc driver. This patch adds support
for reading these registers, so that the packet statistics are correctly
updated.

Additionally the defines for the non-MT7628 variant packet counter
registers are added and used in this patch instead of using hard coded
values.

Signed-off-by: Stefan Roese <sr@denx.de>
Fixes: 296c9120752b ("net: ethernet: mediatek: Add MT7628/88 SoC support")
Cc: Felix Fietkau <nbd@nbd.name>
Cc: John Crispin <john@phrozen.org>
Cc: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Cc: Reto Schneider <code@reto-schneider.ch>
Cc: Reto Schneider <reto.schneider@husqvarnagroup.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 67 ++++++++++++++-------
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 24 +++++++-
 2 files changed, 66 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index d6cc06ee0caa5..64adfd24e134d 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -681,32 +681,53 @@ static int mtk_set_mac_address(struct net_device *dev, void *p)
 void mtk_stats_update_mac(struct mtk_mac *mac)
 {
 	struct mtk_hw_stats *hw_stats = mac->hw_stats;
-	unsigned int base = MTK_GDM1_TX_GBCNT;
-	u64 stats;
-
-	base += hw_stats->reg_offset;
+	struct mtk_eth *eth = mac->hw;
 
 	u64_stats_update_begin(&hw_stats->syncp);
 
-	hw_stats->rx_bytes += mtk_r32(mac->hw, base);
-	stats =  mtk_r32(mac->hw, base + 0x04);
-	if (stats)
-		hw_stats->rx_bytes += (stats << 32);
-	hw_stats->rx_packets += mtk_r32(mac->hw, base + 0x08);
-	hw_stats->rx_overflow += mtk_r32(mac->hw, base + 0x10);
-	hw_stats->rx_fcs_errors += mtk_r32(mac->hw, base + 0x14);
-	hw_stats->rx_short_errors += mtk_r32(mac->hw, base + 0x18);
-	hw_stats->rx_long_errors += mtk_r32(mac->hw, base + 0x1c);
-	hw_stats->rx_checksum_errors += mtk_r32(mac->hw, base + 0x20);
-	hw_stats->rx_flow_control_packets +=
-					mtk_r32(mac->hw, base + 0x24);
-	hw_stats->tx_skip += mtk_r32(mac->hw, base + 0x28);
-	hw_stats->tx_collisions += mtk_r32(mac->hw, base + 0x2c);
-	hw_stats->tx_bytes += mtk_r32(mac->hw, base + 0x30);
-	stats =  mtk_r32(mac->hw, base + 0x34);
-	if (stats)
-		hw_stats->tx_bytes += (stats << 32);
-	hw_stats->tx_packets += mtk_r32(mac->hw, base + 0x38);
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+		hw_stats->tx_packets += mtk_r32(mac->hw, MT7628_SDM_TPCNT);
+		hw_stats->tx_bytes += mtk_r32(mac->hw, MT7628_SDM_TBCNT);
+		hw_stats->rx_packets += mtk_r32(mac->hw, MT7628_SDM_RPCNT);
+		hw_stats->rx_bytes += mtk_r32(mac->hw, MT7628_SDM_RBCNT);
+		hw_stats->rx_checksum_errors +=
+			mtk_r32(mac->hw, MT7628_SDM_CS_ERR);
+	} else {
+		unsigned int offs = hw_stats->reg_offset;
+		u64 stats;
+
+		hw_stats->rx_bytes += mtk_r32(mac->hw,
+					      MTK_GDM1_RX_GBCNT_L + offs);
+		stats = mtk_r32(mac->hw, MTK_GDM1_RX_GBCNT_H + offs);
+		if (stats)
+			hw_stats->rx_bytes += (stats << 32);
+		hw_stats->rx_packets +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_GPCNT + offs);
+		hw_stats->rx_overflow +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_OERCNT + offs);
+		hw_stats->rx_fcs_errors +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_FERCNT + offs);
+		hw_stats->rx_short_errors +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_SERCNT + offs);
+		hw_stats->rx_long_errors +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_LENCNT + offs);
+		hw_stats->rx_checksum_errors +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_CERCNT + offs);
+		hw_stats->rx_flow_control_packets +=
+			mtk_r32(mac->hw, MTK_GDM1_RX_FCCNT + offs);
+		hw_stats->tx_skip +=
+			mtk_r32(mac->hw, MTK_GDM1_TX_SKIPCNT + offs);
+		hw_stats->tx_collisions +=
+			mtk_r32(mac->hw, MTK_GDM1_TX_COLCNT + offs);
+		hw_stats->tx_bytes +=
+			mtk_r32(mac->hw, MTK_GDM1_TX_GBCNT_L + offs);
+		stats =  mtk_r32(mac->hw, MTK_GDM1_TX_GBCNT_H + offs);
+		if (stats)
+			hw_stats->tx_bytes += (stats << 32);
+		hw_stats->tx_packets +=
+			mtk_r32(mac->hw, MTK_GDM1_TX_GPCNT + offs);
+	}
+
 	u64_stats_update_end(&hw_stats->syncp);
 }
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 11331b44ba079..5ef70dd8b49c6 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -278,8 +278,21 @@
 /* QDMA FQ Free Page Buffer Length Register */
 #define MTK_QDMA_FQ_BLEN	0x1B2C
 
-/* GMA1 Received Good Byte Count Register */
-#define MTK_GDM1_TX_GBCNT	0x2400
+/* GMA1 counter / statics register */
+#define MTK_GDM1_RX_GBCNT_L	0x2400
+#define MTK_GDM1_RX_GBCNT_H	0x2404
+#define MTK_GDM1_RX_GPCNT	0x2408
+#define MTK_GDM1_RX_OERCNT	0x2410
+#define MTK_GDM1_RX_FERCNT	0x2414
+#define MTK_GDM1_RX_SERCNT	0x2418
+#define MTK_GDM1_RX_LENCNT	0x241c
+#define MTK_GDM1_RX_CERCNT	0x2420
+#define MTK_GDM1_RX_FCCNT	0x2424
+#define MTK_GDM1_TX_SKIPCNT	0x2428
+#define MTK_GDM1_TX_COLCNT	0x242c
+#define MTK_GDM1_TX_GBCNT_L	0x2430
+#define MTK_GDM1_TX_GBCNT_H	0x2434
+#define MTK_GDM1_TX_GPCNT	0x2438
 #define MTK_STAT_OFFSET		0x40
 
 /* QDMA descriptor txd4 */
@@ -502,6 +515,13 @@
 #define MT7628_SDM_MAC_ADRL	(MT7628_SDM_OFFSET + 0x0c)
 #define MT7628_SDM_MAC_ADRH	(MT7628_SDM_OFFSET + 0x10)
 
+/* Counter / stat register */
+#define MT7628_SDM_TPCNT	(MT7628_SDM_OFFSET + 0x100)
+#define MT7628_SDM_TBCNT	(MT7628_SDM_OFFSET + 0x104)
+#define MT7628_SDM_RPCNT	(MT7628_SDM_OFFSET + 0x108)
+#define MT7628_SDM_RBCNT	(MT7628_SDM_OFFSET + 0x10c)
+#define MT7628_SDM_CS_ERR	(MT7628_SDM_OFFSET + 0x110)
+
 struct mtk_rx_dma {
 	unsigned int rxd1;
 	unsigned int rxd2;
-- 
GitLab


From 5eff1461a6dec84f04fafa9128548bad51d96147 Mon Sep 17 00:00:00 2001
From: Zong Li <zong.li@sifive.com>
Date: Sat, 22 May 2021 17:16:11 +0800
Subject: [PATCH 1432/3804] net: macb: ensure the device is available before
 accessing GEMGXL control registers

If runtime power menagement is enabled, the gigabit ethernet PLL would
be disabled after macb_probe(). During this period of time, the system
would hang up if we try to access GEMGXL control registers.

We can't put runtime_pm_get/runtime_pm_put/ there due to the issue of
sleep inside atomic section (7fa2955ff70ce453 ("sh_eth: Fix sleeping
function called from invalid context"). Add netif_running checking to
ensure the device is available before accessing GEMGXL device.

Changed in v2:
 - Use netif_running instead of its own flag

Signed-off-by: Zong Li <zong.li@sifive.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 6bc7d41d519b7..a0c7b1167dbb2 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2867,6 +2867,9 @@ static struct net_device_stats *gem_get_stats(struct macb *bp)
 	struct gem_stats *hwstat = &bp->hw_stats.gem;
 	struct net_device_stats *nstat = &bp->dev->stats;
 
+	if (!netif_running(bp->dev))
+		return nstat;
+
 	gem_update_stats(bp);
 
 	nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors +
-- 
GitLab


From 3a62fed2fd7b6fea96d720e779cafc30dfb3a22e Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Sat, 22 May 2021 15:14:45 +0200
Subject: [PATCH 1433/3804] net/sched: fq_pie: re-factor fix for fq_pie endless
 loop

the patch that fixed an endless loop in_fq_pie_init() was not considering
that 65535 is a valid class id. The correct bugfix for this infinite loop
is to change 'idx' to become an u32, like Colin proposed in the past [1].

Fix this as follows:
 - restore 65536 as maximum possible values of 'flows_cnt'
 - use u32 'idx' when iterating on 'q->flows'
 - fix the TDC selftest

This reverts commit bb2f930d6dd708469a587dc9ed1efe1ef969c0bf.

[1] https://lore.kernel.org/netdev/20210407163808.499027-1-colin.king@canonical.com/

CC: Colin Ian King <colin.king@canonical.com>
CC: stable@vger.kernel.org
Fixes: bb2f930d6dd7 ("net/sched: fix infinite loop in sch_fq_pie")
Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq_pie.c                                 | 10 +++++-----
 .../selftests/tc-testing/tc-tests/qdiscs/fq_pie.json   |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 949163fe68afd..266c7c1869d93 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -297,9 +297,9 @@ static int fq_pie_change(struct Qdisc *sch, struct nlattr *opt,
 			goto flow_error;
 		}
 		q->flows_cnt = nla_get_u32(tb[TCA_FQ_PIE_FLOWS]);
-		if (!q->flows_cnt || q->flows_cnt >= 65536) {
+		if (!q->flows_cnt || q->flows_cnt > 65536) {
 			NL_SET_ERR_MSG_MOD(extack,
-					   "Number of flows must range in [1..65535]");
+					   "Number of flows must range in [1..65536]");
 			goto flow_error;
 		}
 	}
@@ -367,7 +367,7 @@ static void fq_pie_timer(struct timer_list *t)
 	struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
 	struct Qdisc *sch = q->sch;
 	spinlock_t *root_lock; /* to lock qdisc for probability calculations */
-	u16 idx;
+	u32 idx;
 
 	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
 	spin_lock(root_lock);
@@ -388,7 +388,7 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt,
 {
 	struct fq_pie_sched_data *q = qdisc_priv(sch);
 	int err;
-	u16 idx;
+	u32 idx;
 
 	pie_params_init(&q->p_params);
 	sch->limit = 10 * 1024;
@@ -500,7 +500,7 @@ static int fq_pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 static void fq_pie_reset(struct Qdisc *sch)
 {
 	struct fq_pie_sched_data *q = qdisc_priv(sch);
-	u16 idx;
+	u32 idx;
 
 	INIT_LIST_HEAD(&q->new_flows);
 	INIT_LIST_HEAD(&q->old_flows);
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
index 1cda2e11b3ad9..773c5027553d2 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -9,11 +9,11 @@
         "setup": [
             "$IP link add dev $DUMMY type dummy || /bin/true"
         ],
-        "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536",
-        "expExitCode": "2",
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_pie flows 65536",
+        "expExitCode": "0",
         "verifyCmd": "$TC qdisc show dev $DUMMY",
-        "matchPattern": "qdisc",
-        "matchCount": "0",
+        "matchPattern": "qdisc fq_pie 1: root refcnt 2 limit 10240p flows 65536",
+        "matchCount": "1",
         "teardown": [
             "$IP link del dev $DUMMY"
         ]
-- 
GitLab


From e70f7a11876a1a788ceadf75e9e5f7af2c868680 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Sat, 22 May 2021 15:15:13 +0200
Subject: [PATCH 1434/3804] net/sched: fq_pie: fix OOB access in the traffic
 path

the following script:

  # tc qdisc add dev eth0 handle 0x1 root fq_pie flows 2
  # tc qdisc add dev eth0 clsact
  # tc filter add dev eth0 egress matchall action skbedit priority 0x10002
  # ping 192.0.2.2 -I eth0 -c2 -w1 -q

produces the following splat:

 BUG: KASAN: slab-out-of-bounds in fq_pie_qdisc_enqueue+0x1314/0x19d0 [sch_fq_pie]
 Read of size 4 at addr ffff888171306924 by task ping/942

 CPU: 3 PID: 942 Comm: ping Not tainted 5.12.0+ #441
 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014
 Call Trace:
  dump_stack+0x92/0xc1
  print_address_description.constprop.7+0x1a/0x150
  kasan_report.cold.13+0x7f/0x111
  fq_pie_qdisc_enqueue+0x1314/0x19d0 [sch_fq_pie]
  __dev_queue_xmit+0x1034/0x2b10
  ip_finish_output2+0xc62/0x2120
  __ip_finish_output+0x553/0xea0
  ip_output+0x1ca/0x4d0
  ip_send_skb+0x37/0xa0
  raw_sendmsg+0x1c4b/0x2d00
  sock_sendmsg+0xdb/0x110
  __sys_sendto+0x1d7/0x2b0
  __x64_sys_sendto+0xdd/0x1b0
  do_syscall_64+0x3c/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae
 RIP: 0033:0x7fe69735c3eb
 Code: 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f3 0f 1e fa 48 8d 05 75 42 2c 00 41 89 ca 8b 00 85 c0 75 14 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 75 c3 0f 1f 40 00 41 57 4d 89 c7 41 56 41 89
 RSP: 002b:00007fff06d7fb38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 000055e961413700 RCX: 00007fe69735c3eb
 RDX: 0000000000000040 RSI: 000055e961413700 RDI: 0000000000000003
 RBP: 0000000000000040 R08: 000055e961410500 R09: 0000000000000010
 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fff06d81260
 R13: 00007fff06d7fb40 R14: 00007fff06d7fc30 R15: 000055e96140f0a0

 Allocated by task 917:
  kasan_save_stack+0x19/0x40
  __kasan_kmalloc+0x7f/0xa0
  __kmalloc_node+0x139/0x280
  fq_pie_init+0x555/0x8e8 [sch_fq_pie]
  qdisc_create+0x407/0x11b0
  tc_modify_qdisc+0x3c2/0x17e0
  rtnetlink_rcv_msg+0x346/0x8e0
  netlink_rcv_skb+0x120/0x380
  netlink_unicast+0x439/0x630
  netlink_sendmsg+0x719/0xbf0
  sock_sendmsg+0xe2/0x110
  ____sys_sendmsg+0x5ba/0x890
  ___sys_sendmsg+0xe9/0x160
  __sys_sendmsg+0xd3/0x170
  do_syscall_64+0x3c/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae

 The buggy address belongs to the object at ffff888171306800
  which belongs to the cache kmalloc-256 of size 256
 The buggy address is located 36 bytes to the right of
  256-byte region [ffff888171306800, ffff888171306900)
 The buggy address belongs to the page:
 page:00000000bcfb624e refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x171306
 head:00000000bcfb624e order:1 compound_mapcount:0
 flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff)
 raw: 0017ffffc0010200 dead000000000100 dead000000000122 ffff888100042b40
 raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
 page dumped because: kasan: bad access detected

 Memory state around the buggy address:
  ffff888171306800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  ffff888171306880: 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc
 >ffff888171306900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                                ^
  ffff888171306980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
  ffff888171306a00: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

fix fq_pie traffic path to avoid selecting 'q->flows + q->flows_cnt' as a
valid flow: it's an address beyond the allocated memory.

Fixes: ec97ecf1ebe4 ("net: sched: add Flow Queue PIE packet scheduler")
CC: stable@vger.kernel.org
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq_pie.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c
index 266c7c1869d93..cac684952edc5 100644
--- a/net/sched/sch_fq_pie.c
+++ b/net/sched/sch_fq_pie.c
@@ -138,8 +138,15 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 	/* Classifies packet into corresponding flow */
 	idx = fq_pie_classify(skb, sch, &ret);
-	sel_flow = &q->flows[idx];
+	if (idx == 0) {
+		if (ret & __NET_XMIT_BYPASS)
+			qdisc_qstats_drop(sch);
+		__qdisc_drop(skb, to_free);
+		return ret;
+	}
+	idx--;
 
+	sel_flow = &q->flows[idx];
 	/* Checks whether adding a new packet would exceed memory limit */
 	get_pie_cb(skb)->mem_usage = skb->truesize;
 	memory_limited = q->memory_usage > q->memory_limit + skb->truesize;
-- 
GitLab


From 474a2ddaa192777522a7499784f1d60691cd831a Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Sun, 23 May 2021 22:51:54 +0800
Subject: [PATCH 1435/3804] net: dsa: mt7530: fix VLAN traffic leaks

PCR_MATRIX field was set to all 1's when VLAN filtering is enabled, but
was not reset when it is disabled, which may cause traffic leaks:

	ip link add br0 type bridge vlan_filtering 1
	ip link add br1 type bridge vlan_filtering 1
	ip link set swp0 master br0
	ip link set swp1 master br1
	ip link set br0 type bridge vlan_filtering 0
	ip link set br1 type bridge vlan_filtering 0
	# traffic in br0 and br1 will start leaking to each other

As port_bridge_{add,del} have set up PCR_MATRIX properly, remove the
PCR_MATRIX write from mt7530_port_set_vlan_aware.

Fixes: 83163f7dca56 ("net: dsa: mediatek: add VLAN support for MT7530")
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 96f7c9eede358..9b90f3d3a8f50 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1262,14 +1262,6 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
 {
 	struct mt7530_priv *priv = ds->priv;
 
-	/* The real fabric path would be decided on the membership in the
-	 * entry of VLAN table. PCR_MATRIX set up here with ALL_MEMBERS
-	 * means potential VLAN can be consisting of certain subset of all
-	 * ports.
-	 */
-	mt7530_rmw(priv, MT7530_PCR_P(port),
-		   PCR_MATRIX_MASK, PCR_MATRIX(MT7530_ALL_MEMBERS));
-
 	/* Trapped into security mode allows packet forwarding through VLAN
 	 * table lookup. CPU port is set to fallback mode to let untagged
 	 * frames pass through.
-- 
GitLab


From 4dd649d130c634415c26df771e09e373f77fc688 Mon Sep 17 00:00:00 2001
From: Aditya Srivastava <yashsri421@gmail.com>
Date: Mon, 24 May 2021 02:39:09 +0530
Subject: [PATCH 1436/3804] NFC: nfcmrvl: fix kernel-doc syntax in file headers

The opening comment mark '/**' is used for highlighting the beginning of
kernel-doc comments.
The header for drivers/nfc/nfcmrvl follows this syntax, but the content
inside does not comply with kernel-doc.

This line was probably not meant for kernel-doc parsing, but is parsed
due to the presence of kernel-doc like comment syntax(i.e, '/**'), which
causes unexpected warnings from kernel-doc.
For e.g., running scripts/kernel-doc -none on drivers/nfc/nfcmrvl/spi.c
causes warning:
warning: expecting prototype for Marvell NFC(). Prototype was for SPI_WAIT_HANDSHAKE() instead

Provide a simple fix by replacing such occurrences with general comment
format, i.e. '/*', to prevent kernel-doc from parsing it.

Signed-off-by: Aditya Srivastava <yashsri421@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nfc/nfcmrvl/fw_dnld.h | 2 +-
 drivers/nfc/nfcmrvl/i2c.c     | 2 +-
 drivers/nfc/nfcmrvl/nfcmrvl.h | 2 +-
 drivers/nfc/nfcmrvl/spi.c     | 2 +-
 drivers/nfc/nfcmrvl/uart.c    | 2 +-
 drivers/nfc/nfcmrvl/usb.c     | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/nfc/nfcmrvl/fw_dnld.h b/drivers/nfc/nfcmrvl/fw_dnld.h
index ee4a339c05fd0..058ce77b3cbce 100644
--- a/drivers/nfc/nfcmrvl/fw_dnld.h
+++ b/drivers/nfc/nfcmrvl/fw_dnld.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC driver: Firmware downloader
  *
  * Copyright (C) 2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c
index 18cd96284b77a..c5420616b7bca 100644
--- a/drivers/nfc/nfcmrvl/i2c.c
+++ b/drivers/nfc/nfcmrvl/i2c.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC-over-I2C driver: I2C interface related functions
  *
  * Copyright (C) 2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/nfcmrvl.h b/drivers/nfc/nfcmrvl/nfcmrvl.h
index de68ff45e49a4..e84ee18c73aeb 100644
--- a/drivers/nfc/nfcmrvl/nfcmrvl.h
+++ b/drivers/nfc/nfcmrvl/nfcmrvl.h
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC driver
  *
  * Copyright (C) 2014-2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c
index 8e0ddb4347704..dec0d3eb3648a 100644
--- a/drivers/nfc/nfcmrvl/spi.c
+++ b/drivers/nfc/nfcmrvl/spi.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC-over-SPI driver: SPI interface related functions
  *
  * Copyright (C) 2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/uart.c b/drivers/nfc/nfcmrvl/uart.c
index e5a622ce4b951..7194dd7ef0f1f 100644
--- a/drivers/nfc/nfcmrvl/uart.c
+++ b/drivers/nfc/nfcmrvl/uart.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC-over-UART driver
  *
  * Copyright (C) 2015, Marvell International Ltd.
diff --git a/drivers/nfc/nfcmrvl/usb.c b/drivers/nfc/nfcmrvl/usb.c
index 888e298f610b8..bcd563cb556ce 100644
--- a/drivers/nfc/nfcmrvl/usb.c
+++ b/drivers/nfc/nfcmrvl/usb.c
@@ -1,4 +1,4 @@
-/**
+/*
  * Marvell NFC-over-USB driver: USB interface related functions
  *
  * Copyright (C) 2014, Marvell International Ltd.
-- 
GitLab


From 41daf6ba594d55f201c50280ebcd430590441da1 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 24 May 2021 10:49:41 +0800
Subject: [PATCH 1437/3804] ASoC: core: Fix Null-point-dereference in
 fmt_single_name()

Check the return value of devm_kstrdup() in case of
Null-point-dereference.

Fixes: 45dd9943fce0 ("ASoC: core: remove artificial component and DAI name constraint")
Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Link: https://lore.kernel.org/r/20210524024941.159952-1-wangkefeng.wang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 1c0904acb9356..a76974ccfce10 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2225,6 +2225,8 @@ static char *fmt_single_name(struct device *dev, int *id)
 		return NULL;
 
 	name = devm_kstrdup(dev, devname, GFP_KERNEL);
+	if (!name)
+		return NULL;
 
 	/* are we a "%s.%d" name (platform and SPI components) */
 	found = strstr(name, dev->driver->name);
-- 
GitLab


From 6f55c5dd1118b3076d11d9cb17f5c5f4bc3a1162 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Mon, 24 May 2021 01:42:42 +0300
Subject: [PATCH 1438/3804] regulator: max77620: Use
 device_set_of_node_from_dev()

The MAX77620 driver fails to re-probe on deferred probe because driver
core tries to claim resources that are already claimed by the PINCTRL
device. Use device_set_of_node_from_dev() helper which marks OF node as
reused, skipping erroneous execution of pinctrl_bind_pins() for the PMIC
device on the re-probe.

Fixes: aea6cb99703e ("regulator: resolve supply after creating regulator")
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Link: https://lore.kernel.org/r/20210523224243.13219-2-digetx@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77620-regulator.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index 8d9731e4052bf..5c439c850d090 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -814,6 +814,13 @@ static int max77620_regulator_probe(struct platform_device *pdev)
 	config.dev = dev;
 	config.driver_data = pmic;
 
+	/*
+	 * Set of_node_reuse flag to prevent driver core from attempting to
+	 * claim any pinmux resources already claimed by the parent device.
+	 * Otherwise PMIC driver will fail to re-probe.
+	 */
+	device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
+
 	for (id = 0; id < MAX77620_NUM_REGS; id++) {
 		struct regulator_dev *rdev;
 		struct regulator_desc *rdesc;
-- 
GitLab


From 62499a94ce5b9a41047dbadaad885347b1176079 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Mon, 24 May 2021 01:42:43 +0300
Subject: [PATCH 1439/3804] regulator: max77620: Silence deferred probe error

One of previous changes to regulator core causes PMIC regulators to
re-probe until supply regulator is registered. Silence noisy error
message about the deferred probe.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Link: https://lore.kernel.org/r/20210523224243.13219-3-digetx@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77620-regulator.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/regulator/max77620-regulator.c b/drivers/regulator/max77620-regulator.c
index 5c439c850d090..3cf8f085170a0 100644
--- a/drivers/regulator/max77620-regulator.c
+++ b/drivers/regulator/max77620-regulator.c
@@ -846,12 +846,10 @@ static int max77620_regulator_probe(struct platform_device *pdev)
 			return ret;
 
 		rdev = devm_regulator_register(dev, rdesc, &config);
-		if (IS_ERR(rdev)) {
-			ret = PTR_ERR(rdev);
-			dev_err(dev, "Regulator registration %s failed: %d\n",
-				rdesc->name, ret);
-			return ret;
-		}
+		if (IS_ERR(rdev))
+			return dev_err_probe(dev, PTR_ERR(rdev),
+					     "Regulator registration %s failed\n",
+					     rdesc->name);
 	}
 
 	return 0;
-- 
GitLab


From 0514582a1a5b4ac1a3fd64792826d392d7ae9ddc Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 23 May 2021 15:10:44 +0800
Subject: [PATCH 1440/3804] regulator: bd70528: Fix off-by-one for buck123
 .n_voltages setting

The valid selectors for bd70528 bucks are 0 ~ 0xf, so the .n_voltages
should be 16 (0x10). Use 0x10 to make it consistent with BD70528_LDO_VOLTS.
Also remove redundant defines for BD70528_BUCK_VOLTS.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/20210523071045.2168904-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/rohm-bd70528.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/linux/mfd/rohm-bd70528.h b/include/linux/mfd/rohm-bd70528.h
index a57af878fd0cd..4a5966475a35a 100644
--- a/include/linux/mfd/rohm-bd70528.h
+++ b/include/linux/mfd/rohm-bd70528.h
@@ -26,9 +26,7 @@ struct bd70528_data {
 	struct mutex rtc_timer_lock;
 };
 
-#define BD70528_BUCK_VOLTS 17
-#define BD70528_BUCK_VOLTS 17
-#define BD70528_BUCK_VOLTS 17
+#define BD70528_BUCK_VOLTS 0x10
 #define BD70528_LDO_VOLTS 0x20
 
 #define BD70528_REG_BUCK1_EN	0x0F
-- 
GitLab


From 4c668630bf8ea90a041fc69c9984486e0f56682d Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 23 May 2021 15:10:45 +0800
Subject: [PATCH 1441/3804] regulator: bd71828: Fix .n_voltages settings

Current .n_voltages settings do not cover the latest 2 valid selectors,
so it fails to set voltage for the hightest voltage support.
The latest linear range has step_uV = 0, so it does not matter if we
count the .n_voltages to maximum selector + 1 or the first selector of
latest linear range + 1.
To simplify calculating the n_voltages, let's just set the
.n_voltages to maximum selector + 1.

Fixes: 522498f8cb8c ("regulator: bd71828: Basic support for ROHM bd71828 PMIC regulators")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/20210523071045.2168904-2-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/rohm-bd71828.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/mfd/rohm-bd71828.h b/include/linux/mfd/rohm-bd71828.h
index 017a4c01cb315..61f0974c33d72 100644
--- a/include/linux/mfd/rohm-bd71828.h
+++ b/include/linux/mfd/rohm-bd71828.h
@@ -26,11 +26,11 @@ enum {
 	BD71828_REGULATOR_AMOUNT,
 };
 
-#define BD71828_BUCK1267_VOLTS		0xEF
-#define BD71828_BUCK3_VOLTS		0x10
-#define BD71828_BUCK4_VOLTS		0x20
-#define BD71828_BUCK5_VOLTS		0x10
-#define BD71828_LDO_VOLTS		0x32
+#define BD71828_BUCK1267_VOLTS		0x100
+#define BD71828_BUCK3_VOLTS		0x20
+#define BD71828_BUCK4_VOLTS		0x40
+#define BD71828_BUCK5_VOLTS		0x20
+#define BD71828_LDO_VOLTS		0x40
 /* LDO6 is fixed 1.8V voltage */
 #define BD71828_LDO_6_VOLTAGE		1800000
 
-- 
GitLab


From 8d6ee30c11a95f84974c2d7f590a7012f27b8f15 Mon Sep 17 00:00:00 2001
From: Hao Fang <fanghao11@huawei.com>
Date: Sat, 22 May 2021 18:25:51 +0800
Subject: [PATCH 1442/3804] regulator: hisilicon: use the correct HiSilicon
 copyright

s/Hisilicon/HiSilicon/.
It should use capital S, according to the official website
https://www.hisilicon.com/en.

Signed-off-by: Hao Fang <fanghao11@huawei.com>
Link: https://lore.kernel.org/r/1621679151-15617-1-git-send-email-fanghao11@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/hi655x-regulator.c            | 2 +-
 drivers/staging/hikey9xx/hi6421v600-regulator.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/hi655x-regulator.c b/drivers/regulator/hi655x-regulator.c
index ac2ee2030211a..68cdb173196d6 100644
--- a/drivers/regulator/hi655x-regulator.c
+++ b/drivers/regulator/hi655x-regulator.c
@@ -2,7 +2,7 @@
 //
 // Device driver for regulators in Hi655x IC
 //
-// Copyright (c) 2016 Hisilicon.
+// Copyright (c) 2016 HiSilicon Ltd.
 //
 // Authors:
 // Chen Feng <puck.chen@hisilicon.com>
diff --git a/drivers/staging/hikey9xx/hi6421v600-regulator.c b/drivers/staging/hikey9xx/hi6421v600-regulator.c
index f6a14e9c3cbfe..612b964b13406 100644
--- a/drivers/staging/hikey9xx/hi6421v600-regulator.c
+++ b/drivers/staging/hikey9xx/hi6421v600-regulator.c
@@ -3,7 +3,7 @@
 // Device driver for regulators in Hisi IC
 //
 // Copyright (c) 2013 Linaro Ltd.
-// Copyright (c) 2011 Hisilicon.
+// Copyright (c) 2011 HiSilicon Ltd.
 // Copyright (c) 2020-2021 Huawei Technologies Co., Ltd
 //
 // Guodong Xu <guodong.xu@linaro.org>
-- 
GitLab


From 13817d466eb8713a1ffd254f537402f091d48444 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 22 May 2021 19:49:50 +0200
Subject: [PATCH 1443/3804] spi: bcm2835: Fix out-of-bounds access with more
 than 4 slaves

Commit 571e31fa60b3 ("spi: bcm2835: Cache CS register value for
->prepare_message()") limited the number of slaves to 3 at compile-time.
The limitation was necessitated by a statically-sized array prepare_cs[]
in the driver private data which contains a per-slave register value.

The commit sought to enforce the limitation at run-time by setting the
controller's num_chipselect to 3:  Slaves with a higher chipselect are
rejected by spi_add_device().

However the commit neglected that num_chipselect only limits the number
of *native* chipselects.  If GPIO chipselects are specified in the
device tree for more than 3 slaves, num_chipselect is silently raised by
of_spi_get_gpio_numbers() and the result are out-of-bounds accesses to
the statically-sized array prepare_cs[].

As a bandaid fix which is backportable to stable, raise the number of
allowed slaves to 24 (which "ought to be enough for anybody"), enforce
the limitation on slave ->setup and revert num_chipselect to 3 (which is
the number of native chipselects supported by the controller).
An upcoming for-next commit will allow an arbitrary number of slaves.

Fixes: 571e31fa60b3 ("spi: bcm2835: Cache CS register value for ->prepare_message()")
Reported-by: Joe Burmeister <joe.burmeister@devtank.co.uk>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: stable@vger.kernel.org # v5.4+
Cc: Phil Elwell <phil@raspberrypi.com>
Link: https://lore.kernel.org/r/75854affc1923309fde05e47494263bde73e5592.1621703210.git.lukas@wunner.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm2835.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 8965fe61c8b44..fe40626e45aa8 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -68,7 +68,7 @@
 #define BCM2835_SPI_FIFO_SIZE		64
 #define BCM2835_SPI_FIFO_SIZE_3_4	48
 #define BCM2835_SPI_DMA_MIN_LENGTH	96
-#define BCM2835_SPI_NUM_CS		4   /* raise as necessary */
+#define BCM2835_SPI_NUM_CS		24  /* raise as necessary */
 #define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
 				| SPI_NO_CS | SPI_3WIRE)
 
@@ -1195,6 +1195,12 @@ static int bcm2835_spi_setup(struct spi_device *spi)
 	struct gpio_chip *chip;
 	u32 cs;
 
+	if (spi->chip_select >= BCM2835_SPI_NUM_CS) {
+		dev_err(&spi->dev, "only %d chip-selects supported\n",
+			BCM2835_SPI_NUM_CS - 1);
+		return -EINVAL;
+	}
+
 	/*
 	 * Precalculate SPI slave's CS register value for ->prepare_message():
 	 * The driver always uses software-controlled GPIO chip select, hence
@@ -1288,7 +1294,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 	ctlr->use_gpio_descriptors = true;
 	ctlr->mode_bits = BCM2835_SPI_MODE_BITS;
 	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
-	ctlr->num_chipselect = BCM2835_SPI_NUM_CS;
+	ctlr->num_chipselect = 3;
 	ctlr->setup = bcm2835_spi_setup;
 	ctlr->transfer_one = bcm2835_spi_transfer_one;
 	ctlr->handle_err = bcm2835_spi_handle_err;
-- 
GitLab


From 5ba3747dbc9ade2d22a8f5bff3c928cb41d35030 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 22 May 2021 17:14:27 -0700
Subject: [PATCH 1444/3804] regulator: bd71815: add select to fix build

Mend the Kconfig for REGULATOR_BD71815 to prevent build errors:

riscv32-linux-ld: drivers/regulator/bd71815-regulator.o: in function `.L0 ':
regulator.c:289: undefined reference to `rohm_regulator_set_dvs_levels'
riscv32-linux-ld: drivers/regulator/bd71815-regulator.c:370: undefined reference to `rohm_regulator_set_dvs_levels'

Fixes: 1aad39001e85 ("regulator: Support ROHM BD71815 regulators")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Reviewed-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/20210523001427.13500-1-rdunlap@infradead.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 9d84d9245490e..9aeb32c320aab 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -207,6 +207,7 @@ config REGULATOR_BD70528
 config REGULATOR_BD71815
 	tristate "ROHM BD71815 Power Regulator"
 	depends on MFD_ROHM_BD71828
+	select REGULATOR_ROHM
 	help
 	  This driver supports voltage regulators on ROHM BD71815 PMIC.
 	  This will enable support for the software controllable buck
-- 
GitLab


From 8590ccd4dfd207d89c3312cf8a8b25990acaa079 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 22 May 2021 12:08:14 +0800
Subject: [PATCH 1445/3804] regulator: max8973: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210522040814.2042397-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max8973-regulator.c | 37 +++++++--------------------
 1 file changed, 9 insertions(+), 28 deletions(-)

diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c
index 9aee1444181da..8da8f9b6c4fd2 100644
--- a/drivers/regulator/max8973-regulator.c
+++ b/drivers/regulator/max8973-regulator.c
@@ -265,33 +265,6 @@ static unsigned int max8973_dcdc_get_mode(struct regulator_dev *rdev)
 		REGULATOR_MODE_FAST : REGULATOR_MODE_NORMAL;
 }
 
-static int max8973_set_ramp_delay(struct regulator_dev *rdev,
-		int ramp_delay)
-{
-	struct max8973_chip *max = rdev_get_drvdata(rdev);
-	unsigned int control;
-	int ret;
-
-	/* Set ramp delay */
-	if (ramp_delay <= 12000)
-		control = MAX8973_RAMP_12mV_PER_US;
-	else if (ramp_delay <= 25000)
-		control = MAX8973_RAMP_25mV_PER_US;
-	else if (ramp_delay <= 50000)
-		control = MAX8973_RAMP_50mV_PER_US;
-	else if (ramp_delay <= 200000)
-		control = MAX8973_RAMP_200mV_PER_US;
-	else
-		return -EINVAL;
-
-	ret = regmap_update_bits(max->regmap, MAX8973_CONTROL1,
-			MAX8973_RAMP_MASK, control);
-	if (ret < 0)
-		dev_err(max->dev, "register %d update failed, %d",
-				MAX8973_CONTROL1, ret);
-	return ret;
-}
-
 static int max8973_set_current_limit(struct regulator_dev *rdev,
 		int min_ua, int max_ua)
 {
@@ -341,6 +314,10 @@ static int max8973_get_current_limit(struct regulator_dev *rdev)
 	return 9000000;
 }
 
+static const unsigned int max8973_buck_ramp_table[] = {
+	12000, 25000, 50000, 200000
+};
+
 static const struct regulator_ops max8973_dcdc_ops = {
 	.get_voltage_sel	= max8973_dcdc_get_voltage_sel,
 	.set_voltage_sel	= max8973_dcdc_set_voltage_sel,
@@ -348,7 +325,7 @@ static const struct regulator_ops max8973_dcdc_ops = {
 	.set_mode		= max8973_dcdc_set_mode,
 	.get_mode		= max8973_dcdc_get_mode,
 	.set_voltage_time_sel	= regulator_set_voltage_time_sel,
-	.set_ramp_delay		= max8973_set_ramp_delay,
+	.set_ramp_delay		= regulator_set_ramp_delay_regmap,
 };
 
 static int max8973_init_dcdc(struct max8973_chip *max,
@@ -694,6 +671,10 @@ static int max8973_probe(struct i2c_client *client,
 	max->desc.min_uV = MAX8973_MIN_VOLATGE;
 	max->desc.uV_step = MAX8973_VOLATGE_STEP;
 	max->desc.n_voltages = MAX8973_BUCK_N_VOLTAGE;
+	max->desc.ramp_reg = MAX8973_CONTROL1;
+	max->desc.ramp_mask = MAX8973_RAMP_MASK;
+	max->desc.ramp_delay_table = max8973_buck_ramp_table;
+	max->desc.n_ramp_values = ARRAY_SIZE(max8973_buck_ramp_table);
 
 	max->dvs_gpio = (pdata->dvs_gpio) ? pdata->dvs_gpio : -EINVAL;
 	max->enable_external_control = pdata->enable_ext_control;
-- 
GitLab


From 7c556aec14099c87c95bb7011c74fafe45d93679 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 22 May 2021 20:42:50 +0800
Subject: [PATCH 1446/3804] regulator: bd70528: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/20210522124250.2121076-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd70528-regulator.c | 37 ++++++++++++---------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/drivers/regulator/bd70528-regulator.c b/drivers/regulator/bd70528-regulator.c
index 1f5f9482b209c..e6fec70fabfa3 100644
--- a/drivers/regulator/bd70528-regulator.c
+++ b/drivers/regulator/bd70528-regulator.c
@@ -16,10 +16,6 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/slab.h>
 
-#define BUCK_RAMPRATE_250MV 0
-#define BUCK_RAMPRATE_125MV 1
-#define BUCK_RAMP_MAX 250
-
 static const struct linear_range bd70528_buck1_volts[] = {
 	REGULATOR_LINEAR_RANGE(1200000, 0x00, 0x1, 600000),
 	REGULATOR_LINEAR_RANGE(2750000, 0x2, 0xf, 50000),
@@ -47,22 +43,9 @@ static const unsigned int led_volts[] = {
 	20000, 30000
 };
 
-static int bd70528_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
-{
-	if (ramp_delay > 0 && ramp_delay <= BUCK_RAMP_MAX) {
-		unsigned int ramp_value = BUCK_RAMPRATE_250MV;
-
-		if (ramp_delay <= 125)
-			ramp_value = BUCK_RAMPRATE_125MV;
-
-		return regmap_update_bits(rdev->regmap, rdev->desc->vsel_reg,
-				  BD70528_MASK_BUCK_RAMP,
-				  ramp_value << BD70528_SIFT_BUCK_RAMP);
-	}
-	dev_err(&rdev->dev, "%s: ramp_delay: %d not supported\n",
-		rdev->desc->name, ramp_delay);
-	return -EINVAL;
-}
+static const unsigned int bd70528_buck_ramp_table[] = {
+	250, 125
+};
 
 static int bd70528_led_set_voltage_sel(struct regulator_dev *rdev,
 				       unsigned int sel)
@@ -90,7 +73,7 @@ static const struct regulator_ops bd70528_buck_ops = {
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
-	.set_ramp_delay = bd70528_set_ramp_delay,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
 };
 
 static const struct regulator_ops bd70528_ldo_ops = {
@@ -127,6 +110,10 @@ static const struct regulator_desc bd70528_desc[] = {
 		.enable_mask = BD70528_MASK_RUN_EN,
 		.vsel_reg = BD70528_REG_BUCK1_VOLT,
 		.vsel_mask = BD70528_MASK_BUCK_VOLT,
+		.ramp_reg = BD70528_REG_BUCK1_VOLT,
+		.ramp_mask = BD70528_MASK_BUCK_RAMP,
+		.ramp_delay_table = bd70528_buck_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(bd70528_buck_ramp_table),
 		.owner = THIS_MODULE,
 	},
 	{
@@ -143,6 +130,10 @@ static const struct regulator_desc bd70528_desc[] = {
 		.enable_mask = BD70528_MASK_RUN_EN,
 		.vsel_reg = BD70528_REG_BUCK2_VOLT,
 		.vsel_mask = BD70528_MASK_BUCK_VOLT,
+		.ramp_reg = BD70528_REG_BUCK2_VOLT,
+		.ramp_mask = BD70528_MASK_BUCK_RAMP,
+		.ramp_delay_table = bd70528_buck_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(bd70528_buck_ramp_table),
 		.owner = THIS_MODULE,
 	},
 	{
@@ -159,6 +150,10 @@ static const struct regulator_desc bd70528_desc[] = {
 		.enable_mask = BD70528_MASK_RUN_EN,
 		.vsel_reg = BD70528_REG_BUCK3_VOLT,
 		.vsel_mask = BD70528_MASK_BUCK_VOLT,
+		.ramp_reg = BD70528_REG_BUCK3_VOLT,
+		.ramp_mask = BD70528_MASK_BUCK_RAMP,
+		.ramp_delay_table = bd70528_buck_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(bd70528_buck_ramp_table),
 		.owner = THIS_MODULE,
 	},
 	{
-- 
GitLab


From 42a7dfa26fc6df1624d7c2955200e5053dd0b818 Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Sat, 22 May 2021 09:44:52 +0200
Subject: [PATCH 1447/3804] spi: ath79: drop platform data

The ath79 platform has been converted to pure OF. The platform data is
not needed anymore because of this.

Signed-off-by: David Bauer <mail@david-bauer.net>
Link: https://lore.kernel.org/r/20210522074453.39299-1-mail@david-bauer.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ath79.c                 |  8 --------
 include/linux/platform_data/spi-ath79.h | 16 ----------------
 2 files changed, 24 deletions(-)
 delete mode 100644 include/linux/platform_data/spi-ath79.h

diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index 98ace748cd986..497d5c028496a 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -19,7 +19,6 @@
 #include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/err.h>
-#include <linux/platform_data/spi-ath79.h>
 
 #define DRV_NAME	"ath79-spi"
 
@@ -138,7 +137,6 @@ static int ath79_spi_probe(struct platform_device *pdev)
 {
 	struct spi_master *master;
 	struct ath79_spi *sp;
-	struct ath79_spi_platform_data *pdata;
 	unsigned long rate;
 	int ret;
 
@@ -152,15 +150,9 @@ static int ath79_spi_probe(struct platform_device *pdev)
 	master->dev.of_node = pdev->dev.of_node;
 	platform_set_drvdata(pdev, sp);
 
-	pdata = dev_get_platdata(&pdev->dev);
-
 	master->use_gpio_descriptors = true;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
 	master->flags = SPI_MASTER_GPIO_SS;
-	if (pdata) {
-		master->bus_num = pdata->bus_num;
-		master->num_chipselect = pdata->num_chipselect;
-	}
 
 	sp->bitbang.master = master;
 	sp->bitbang.chipselect = ath79_spi_chipselect;
diff --git a/include/linux/platform_data/spi-ath79.h b/include/linux/platform_data/spi-ath79.h
deleted file mode 100644
index 81a388ff58ccd..0000000000000
--- a/include/linux/platform_data/spi-ath79.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Platform data definition for Atheros AR71XX/AR724X/AR913X SPI controller
- *
- *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
- */
-
-#ifndef _ATH79_SPI_PLATFORM_H
-#define _ATH79_SPI_PLATFORM_H
-
-struct ath79_spi_platform_data {
-	unsigned	bus_num;
-	unsigned	num_chipselect;
-};
-
-#endif /* _ATH79_SPI_PLATFORM_H */
-- 
GitLab


From ab053f48f9264ed5c714d0427b3115f121d4c476 Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Sat, 22 May 2021 09:44:53 +0200
Subject: [PATCH 1448/3804] spi: ath79: set number of chipselect lines

All chipsets from AR7100 up to QCA9563 have three dedicated chipselect
lines for the integrated SPI controller. Set the number of chipselect
lines available on the controller to this value.

Signed-off-by: David Bauer <mail@david-bauer.net>
Link: https://lore.kernel.org/r/20210522074453.39299-2-mail@david-bauer.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-ath79.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index 497d5c028496a..d1e287d2d9cdc 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -153,6 +153,7 @@ static int ath79_spi_probe(struct platform_device *pdev)
 	master->use_gpio_descriptors = true;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
 	master->flags = SPI_MASTER_GPIO_SS;
+	master->num_chipselect = 3;
 
 	sp->bitbang.master = master;
 	sp->bitbang.chipselect = ath79_spi_chipselect;
-- 
GitLab


From 84fca8ba620581067c16f2b578f277b1c72fb74b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 18 May 2021 11:17:27 +0200
Subject: [PATCH 1449/3804] perf/arm-ccn: Use irq_set_affinity()

The driver uses irq_set_affinity_hint() to set the affinity for the PMU
interrupts, which relies on the undocumented side effect that this function
actually sets the affinity under the hood.

Setting an hint is clearly not a guarantee and for these PMU interrupts an
affinity hint, which is supposed to guide userspace for setting affinity,
is beyond pointless, because the affinity of these interrupts cannot be
modified from user space.

Aside of that the error checks are bogus because the only error which is
returned from irq_set_affinity_hint() is when there is no irq descriptor
for the interrupt number, but not when the affinity set fails. That's on
purpose because the hint can point to an offline CPU.

Replace the mindless abuse with irq_set_affinity().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518093118.128250213@linutronix.de
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-ccn.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 96d47cb302dd1..a96c316045459 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1211,7 +1211,7 @@ static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 	perf_pmu_migrate_context(&dt->pmu, cpu, target);
 	dt->cpu = target;
 	if (ccn->irq)
-		WARN_ON(irq_set_affinity_hint(ccn->irq, cpumask_of(dt->cpu)));
+		WARN_ON(irq_set_affinity(ccn->irq, cpumask_of(dt->cpu)));
 	return 0;
 }
 
@@ -1291,7 +1291,7 @@ static int arm_ccn_pmu_init(struct arm_ccn *ccn)
 
 	/* Also make sure that the overflow interrupt is handled by this CPU */
 	if (ccn->irq) {
-		err = irq_set_affinity_hint(ccn->irq, cpumask_of(ccn->dt.cpu));
+		err = irq_set_affinity(ccn->irq, cpumask_of(ccn->dt.cpu));
 		if (err) {
 			dev_err(ccn->dev, "Failed to set interrupt affinity!\n");
 			goto error_set_affinity;
@@ -1325,8 +1325,6 @@ static void arm_ccn_pmu_cleanup(struct arm_ccn *ccn)
 
 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
 					    &ccn->dt.node);
-	if (ccn->irq)
-		irq_set_affinity_hint(ccn->irq, NULL);
 	for (i = 0; i < ccn->num_xps; i++)
 		writel(0, ccn->xp[i].base + CCN_XP_DT_CONTROL);
 	writel(0, ccn->dt.base + CCN_DT_PMCR);
-- 
GitLab


From 8ec25d34012da3bf417a4d16c057a54064626058 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 18 May 2021 11:17:28 +0200
Subject: [PATCH 1450/3804] perf/arm-cmn: Use irq_set_affinity()

The driver uses irq_set_affinity_hint() to set the affinity for the PMU
interrupts, which relies on the undocumented side effect that this function
actually sets the affinity under the hood.

Setting an hint is clearly not a guarantee and for these PMU interrupts an
affinity hint, which is supposed to guide userspace for setting affinity,
is beyond pointless, because the affinity of these interrupts cannot be
modified from user space.

Aside of that the error checks are bogus because the only error which is
returned from irq_set_affinity_hint() is when there is no irq descriptor
for the interrupt number, but not when the affinity set fails. That's on
purpose because the hint can point to an offline CPU.

Replace the mindless abuse with irq_set_affinity().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518093118.277228577@linutronix.de
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 56a5c355701d0..9417e9c5bcb34 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -1162,7 +1162,7 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 
 	perf_pmu_migrate_context(&cmn->pmu, cpu, target);
 	for (i = 0; i < cmn->num_dtcs; i++)
-		irq_set_affinity_hint(cmn->dtc[i].irq, cpumask_of(target));
+		irq_set_affinity(cmn->dtc[i].irq, cpumask_of(target));
 	cmn->cpu = target;
 	return 0;
 }
@@ -1222,7 +1222,7 @@ static int arm_cmn_init_irqs(struct arm_cmn *cmn)
 		if (err)
 			return err;
 
-		err = irq_set_affinity_hint(irq, cpumask_of(cmn->cpu));
+		err = irq_set_affinity(irq, cpumask_of(cmn->cpu));
 		if (err)
 			return err;
 	next:
@@ -1568,16 +1568,11 @@ static int arm_cmn_probe(struct platform_device *pdev)
 static int arm_cmn_remove(struct platform_device *pdev)
 {
 	struct arm_cmn *cmn = platform_get_drvdata(pdev);
-	int i;
 
 	writel_relaxed(0, cmn->dtc[0].base + CMN_DT_DTC_CTL);
 
 	perf_pmu_unregister(&cmn->pmu);
 	cpuhp_state_remove_instance(arm_cmn_hp_state, &cmn->cpuhp_node);
-
-	for (i = 0; i < cmn->num_dtcs; i++)
-		irq_set_affinity_hint(cmn->dtc[i].irq, NULL);
-
 	return 0;
 }
 
-- 
GitLab


From 1ceeb8d430f5ea780b8f7d02466a7454cc845528 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 18 May 2021 11:17:29 +0200
Subject: [PATCH 1451/3804] perf/arm-dmc620: Use irq_set_affinity()

The driver uses irq_set_affinity_hint() to set the affinity for the PMU
interrupts, which relies on the undocumented side effect that this function
actually sets the affinity under the hood.

Setting an hint is clearly not a guarantee and for these PMU interrupts an
affinity hint, which is supposed to guide userspace for setting affinity,
is beyond pointless, because the affinity of these interrupts cannot be
modified from user space.

Aside of that the error checks are bogus because the only error which is
returned from irq_set_affinity_hint() is when there is no irq descriptor
for the interrupt number, but not when the affinity set fails. That's on
purpose because the hint can point to an offline CPU.

Replace the mindless abuse with irq_set_affinity().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518093118.395086573@linutronix.de
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_dmc620_pmu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index b6c2511d59af2..280a6ae3e27cf 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -421,7 +421,7 @@ static struct dmc620_pmu_irq *__dmc620_pmu_get_irq(int irq_num)
 	if (ret)
 		goto out_free_aff;
 
-	ret = irq_set_affinity_hint(irq_num, cpumask_of(irq->cpu));
+	ret = irq_set_affinity(irq_num, cpumask_of(irq->cpu));
 	if (ret)
 		goto out_free_irq;
 
@@ -475,7 +475,6 @@ static void dmc620_pmu_put_irq(struct dmc620_pmu *dmc620_pmu)
 	list_del(&irq->irqs_node);
 	mutex_unlock(&dmc620_pmu_irqs_lock);
 
-	WARN_ON(irq_set_affinity_hint(irq->irq_num, NULL));
 	free_irq(irq->irq_num, irq);
 	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &irq->node);
 	kfree(irq);
@@ -622,7 +621,7 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu,
 		perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
 	mutex_unlock(&dmc620_pmu_irqs_lock);
 
-	WARN_ON(irq_set_affinity_hint(irq->irq_num, cpumask_of(target)));
+	WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
 	irq->cpu = target;
 
 	return 0;
-- 
GitLab


From 41ea281724c097e15aca1a8522abbfa54a60acde Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 18 May 2021 11:17:30 +0200
Subject: [PATCH 1452/3804] perf/arm-dsu: Use irq_set_affinity()

The driver uses irq_set_affinity_hint() to set the affinity for the PMU
interrupts, which relies on the undocumented side effect that this function
actually sets the affinity under the hood.

Setting an hint is clearly not a guarantee and for these PMU interrupts an
affinity hint, which is supposed to guide userspace for setting affinity,
is beyond pointless, because the affinity of these interrupts cannot be
modified from user space.

Aside of that the error checks are bogus because the only error which is
returned from irq_set_affinity_hint() is when there is no irq descriptor
for the interrupt number, but not when the affinity set fails. That's on
purpose because the hint can point to an offline CPU.

Replace the mindless abuse with irq_set_affinity().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518093118.505110632@linutronix.de
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_dsu_pmu.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
index 196faea074d04..a36698a90d2f2 100644
--- a/drivers/perf/arm_dsu_pmu.c
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -687,7 +687,7 @@ static void dsu_pmu_probe_pmu(struct dsu_pmu *dsu_pmu)
 static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu)
 {
 	cpumask_set_cpu(cpu, &dsu_pmu->active_cpu);
-	if (irq_set_affinity_hint(dsu_pmu->irq, &dsu_pmu->active_cpu))
+	if (irq_set_affinity(dsu_pmu->irq, &dsu_pmu->active_cpu))
 		pr_warn("Failed to set irq affinity to %d\n", cpu);
 }
 
@@ -769,7 +769,6 @@ static int dsu_pmu_device_probe(struct platform_device *pdev)
 	if (rc) {
 		cpuhp_state_remove_instance(dsu_pmu_cpuhp_state,
 						 &dsu_pmu->cpuhp_node);
-		irq_set_affinity_hint(dsu_pmu->irq, NULL);
 	}
 
 	return rc;
@@ -781,7 +780,6 @@ static int dsu_pmu_device_remove(struct platform_device *pdev)
 
 	perf_pmu_unregister(&dsu_pmu->pmu);
 	cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node);
-	irq_set_affinity_hint(dsu_pmu->irq, NULL);
 
 	return 0;
 }
@@ -840,10 +838,8 @@ static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
 
 	dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu);
 	/* If there are no active CPUs in the DSU, leave IRQ disabled */
-	if (dst >= nr_cpu_ids) {
-		irq_set_affinity_hint(dsu_pmu->irq, NULL);
+	if (dst >= nr_cpu_ids)
 		return 0;
-	}
 
 	perf_pmu_migrate_context(&dsu_pmu->pmu, cpu, dst);
 	dsu_pmu_set_active_cpu(dst, dsu_pmu);
-- 
GitLab


From 2621054535955fb78ea96b76b279eb481f40fcef Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 18 May 2021 11:17:31 +0200
Subject: [PATCH 1453/3804] perf/arm-smmuv3: Use irq_set_affinity()

The driver uses irq_set_affinity_hint() to set the affinity for the PMU
interrupts, which relies on the undocumented side effect that this function
actually sets the affinity under the hood.

Setting an hint is clearly not a guarantee and for these PMU interrupts an
affinity hint, which is supposed to guide userspace for setting affinity,
is beyond pointless, because the affinity of these interrupts cannot be
modified from user space.

Aside of that the error checks are bogus because the only error which is
returned from irq_set_affinity_hint() is when there is no irq descriptor
for the interrupt number, but not when the affinity set fails. That's on
purpose because the hint can point to an offline CPU.

Replace the mindless abuse with irq_set_affinity().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518093118.603636289@linutronix.de
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_smmuv3_pmu.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index ff6fab4bae30d..7786ccc6d12f2 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -628,7 +628,7 @@ static int smmu_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 
 	perf_pmu_migrate_context(&smmu_pmu->pmu, cpu, target);
 	smmu_pmu->on_cpu = target;
-	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq, cpumask_of(target)));
+	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(target)));
 
 	return 0;
 }
@@ -839,15 +839,14 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 
 	/* Pick one CPU to be the preferred one to use */
 	smmu_pmu->on_cpu = raw_smp_processor_id();
-	WARN_ON(irq_set_affinity_hint(smmu_pmu->irq,
-				      cpumask_of(smmu_pmu->on_cpu)));
+	WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
 
 	err = cpuhp_state_add_instance_nocalls(cpuhp_state_num,
 					       &smmu_pmu->node);
 	if (err) {
 		dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
 			err, &res_0->start);
-		goto out_clear_affinity;
+		return err;
 	}
 
 	err = perf_pmu_register(&smmu_pmu->pmu, name, -1);
@@ -866,8 +865,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
 
 out_unregister:
 	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
-out_clear_affinity:
-	irq_set_affinity_hint(smmu_pmu->irq, NULL);
 	return err;
 }
 
@@ -877,7 +874,6 @@ static int smmu_pmu_remove(struct platform_device *pdev)
 
 	perf_pmu_unregister(&smmu_pmu->pmu);
 	cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
-	irq_set_affinity_hint(smmu_pmu->irq, NULL);
 
 	return 0;
 }
-- 
GitLab


From ba4489fb949cbd9c9b877dceae361129ed6280f1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 18 May 2021 11:17:32 +0200
Subject: [PATCH 1454/3804] perf/imx_ddr: Use irq_set_affinity()

The driver uses irq_set_affinity_hint() to set the affinity for the PMU
interrupts, which relies on the undocumented side effect that this function
actually sets the affinity under the hood.

Setting an hint is clearly not a guarantee and for these PMU interrupts an
affinity hint, which is supposed to guide userspace for setting affinity,
is beyond pointless, because the affinity of these interrupts cannot be
modified from user space.

Aside of that the error checks are bogus because the only error which is
returned from irq_set_affinity_hint() is when there is no irq descriptor
for the interrupt number, but not when the affinity set fails. That's on
purpose because the hint can point to an offline CPU.

Replace the mindless abuse with irq_set_affinity().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Frank Li <Frank.li@nxp.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: Fabio Estevam <festevam@gmail.com>
Cc: NXP Linux Team <linux-imx@nxp.com>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518093118.699566062@linutronix.de
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/fsl_imx8_ddr_perf.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 2bbb931880649..df048fe42fc2a 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -674,7 +674,7 @@ static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
 	perf_pmu_migrate_context(&pmu->pmu, cpu, target);
 	pmu->cpu = target;
 
-	WARN_ON(irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu)));
+	WARN_ON(irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu)));
 
 	return 0;
 }
@@ -749,7 +749,7 @@ static int ddr_perf_probe(struct platform_device *pdev)
 	}
 
 	pmu->irq = irq;
-	ret = irq_set_affinity_hint(pmu->irq, cpumask_of(pmu->cpu));
+	ret = irq_set_affinity(pmu->irq, cpumask_of(pmu->cpu));
 	if (ret) {
 		dev_err(pmu->dev, "Failed to set interrupt affinity!\n");
 		goto ddr_perf_err;
@@ -777,7 +777,6 @@ static int ddr_perf_remove(struct platform_device *pdev)
 
 	cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
 	cpuhp_remove_multi_state(pmu->cpuhp_state);
-	irq_set_affinity_hint(pmu->irq, NULL);
 
 	perf_pmu_unregister(&pmu->pmu);
 
-- 
GitLab


From 77b06ddc04354293f746d0434f00700110d3392d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 18 May 2021 11:17:33 +0200
Subject: [PATCH 1455/3804] perf/hisi: Use irq_set_affinity()

These drivers use irq_set_affinity_hint() to set the affinity for the PMU
interrupts, which relies on the undocumented side effect that this function
actually sets the affinity under the hood.

Setting an hint is clearly not a guarantee and for these PMU interrupts an
affinity hint, which is supposed to guide userspace for setting affinity,
is beyond pointless, because the affinity of these interrupts cannot be
modified from user space.

Aside of that the error checks are bogus because the only error which is
returned from irq_set_affinity_hint() is when there is no irq descriptor
for the interrupt number, but not when the affinity set fails. That's on
purpose because the hint can point to an offline CPU.

Replace the mindless abuse with irq_set_affinity().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Shaokun Zhang <zhangshaokun@hisilicon.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518093118.813375875@linutronix.de
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 3 ---
 drivers/perf/hisilicon/hisi_uncore_hha_pmu.c  | 3 ---
 drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c  | 3 ---
 drivers/perf/hisilicon/hisi_uncore_pa_pmu.c   | 3 ---
 drivers/perf/hisilicon/hisi_uncore_pmu.c      | 4 ++--
 drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 3 ---
 6 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index 7c8a4bc21db4c..0c7777bf1542d 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -537,7 +537,6 @@ static int hisi_ddrc_pmu_probe(struct platform_device *pdev)
 		dev_err(ddrc_pmu->dev, "DDRC PMU register failed!\n");
 		cpuhp_state_remove_instance_nocalls(
 			CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, &ddrc_pmu->node);
-		irq_set_affinity_hint(ddrc_pmu->irq, NULL);
 	}
 
 	return ret;
@@ -550,8 +549,6 @@ static int hisi_ddrc_pmu_remove(struct platform_device *pdev)
 	perf_pmu_unregister(&ddrc_pmu->pmu);
 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE,
 					    &ddrc_pmu->node);
-	irq_set_affinity_hint(ddrc_pmu->irq, NULL);
-
 	return 0;
 }
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index 0316fabe32f1a..12eb41ab1b8af 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -540,7 +540,6 @@ static int hisi_hha_pmu_probe(struct platform_device *pdev)
 		dev_err(hha_pmu->dev, "HHA PMU register failed!\n");
 		cpuhp_state_remove_instance_nocalls(
 			CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, &hha_pmu->node);
-		irq_set_affinity_hint(hha_pmu->irq, NULL);
 	}
 
 	return ret;
@@ -553,8 +552,6 @@ static int hisi_hha_pmu_remove(struct platform_device *pdev)
 	perf_pmu_unregister(&hha_pmu->pmu);
 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE,
 					    &hha_pmu->node);
-	irq_set_affinity_hint(hha_pmu->irq, NULL);
-
 	return 0;
 }
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index bf9f7772cac96..773f69538090d 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -578,7 +578,6 @@ static int hisi_l3c_pmu_probe(struct platform_device *pdev)
 		dev_err(l3c_pmu->dev, "L3C PMU register failed!\n");
 		cpuhp_state_remove_instance_nocalls(
 			CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, &l3c_pmu->node);
-		irq_set_affinity_hint(l3c_pmu->irq, NULL);
 	}
 
 	return ret;
@@ -591,8 +590,6 @@ static int hisi_l3c_pmu_remove(struct platform_device *pdev)
 	perf_pmu_unregister(&l3c_pmu->pmu);
 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
 					    &l3c_pmu->node);
-	irq_set_affinity_hint(l3c_pmu->irq, NULL);
-
 	return 0;
 }
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
index 14f23eb312484..e1f71eab56409 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -436,7 +436,6 @@ static int hisi_pa_pmu_probe(struct platform_device *pdev)
 		dev_err(pa_pmu->dev, "PMU register failed, ret = %d\n", ret);
 		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
 					    &pa_pmu->node);
-		irq_set_affinity_hint(pa_pmu->irq, NULL);
 		return ret;
 	}
 
@@ -451,8 +450,6 @@ static int hisi_pa_pmu_remove(struct platform_device *pdev)
 	perf_pmu_unregister(&pa_pmu->pmu);
 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_PA_ONLINE,
 					    &pa_pmu->node);
-	irq_set_affinity_hint(pa_pmu->irq, NULL);
-
 	return 0;
 }
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 13c68b5e39c4c..5842593632e43 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -488,7 +488,7 @@ int hisi_uncore_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
 	hisi_pmu->on_cpu = cpu;
 
 	/* Overflow interrupt also should use the same CPU */
-	WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(cpu)));
+	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(cpu)));
 
 	return 0;
 }
@@ -521,7 +521,7 @@ int hisi_uncore_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 	perf_pmu_migrate_context(&hisi_pmu->pmu, cpu, target);
 	/* Use this CPU for event counting */
 	hisi_pmu->on_cpu = target;
-	WARN_ON(irq_set_affinity_hint(hisi_pmu->irq, cpumask_of(target)));
+	WARN_ON(irq_set_affinity(hisi_pmu->irq, cpumask_of(target)));
 
 	return 0;
 }
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
index 46be312fa1262..835ec3e2178fe 100644
--- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -465,7 +465,6 @@ static int hisi_sllc_pmu_probe(struct platform_device *pdev)
 		dev_err(sllc_pmu->dev, "PMU register failed, ret = %d\n", ret);
 		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
 					    &sllc_pmu->node);
-		irq_set_affinity_hint(sllc_pmu->irq, NULL);
 		return ret;
 	}
 
@@ -481,8 +480,6 @@ static int hisi_sllc_pmu_remove(struct platform_device *pdev)
 	perf_pmu_unregister(&sllc_pmu->pmu);
 	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_HISI_SLLC_ONLINE,
 					    &sllc_pmu->node);
-	irq_set_affinity_hint(sllc_pmu->irq, NULL);
-
 	return 0;
 }
 
-- 
GitLab


From 0bc3ee92880d910a1d100b73a781904f359e1f1c Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Sun, 23 May 2021 09:58:54 +0800
Subject: [PATCH 1456/3804] usb: typec: tcpm: Properly interrupt VDM AMS

When a VDM AMS is interrupted by Messages other than VDM, the AMS needs
to be finished properly. Also start a VDM AMS if receiving SVDM Commands
from the port partner to complement the functionality of tcpm_vdm_ams().

Fixes: 0908c5aca31e ("usb: typec: tcpm: AMS and Collision Avoidance")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210523015855.1785484-2-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 8fdfd7f65ad77..6ea5df3782cf5 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -1550,6 +1550,8 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev,
 			if (PD_VDO_SVDM_VER(p[0]) < svdm_version)
 				typec_partner_set_svdm_version(port->partner,
 							       PD_VDO_SVDM_VER(p[0]));
+
+			tcpm_ams_start(port, DISCOVER_IDENTITY);
 			/* 6.4.4.3.1: Only respond as UFP (device) */
 			if (port->data_role == TYPEC_DEVICE &&
 			    port->nr_snk_vdo) {
@@ -1568,14 +1570,19 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev,
 			}
 			break;
 		case CMD_DISCOVER_SVID:
+			tcpm_ams_start(port, DISCOVER_SVIDS);
 			break;
 		case CMD_DISCOVER_MODES:
+			tcpm_ams_start(port, DISCOVER_MODES);
 			break;
 		case CMD_ENTER_MODE:
+			tcpm_ams_start(port, DFP_TO_UFP_ENTER_MODE);
 			break;
 		case CMD_EXIT_MODE:
+			tcpm_ams_start(port, DFP_TO_UFP_EXIT_MODE);
 			break;
 		case CMD_ATTENTION:
+			tcpm_ams_start(port, ATTENTION);
 			/* Attention command does not have response */
 			*adev_action = ADEV_ATTENTION;
 			return 0;
@@ -2287,6 +2294,12 @@ static void tcpm_pd_data_request(struct tcpm_port *port,
 	bool frs_enable;
 	int ret;
 
+	if (tcpm_vdm_ams(port) && type != PD_DATA_VENDOR_DEF) {
+		port->vdm_state = VDM_STATE_ERR_BUSY;
+		tcpm_ams_finish(port);
+		mod_vdm_delayed_work(port, 0);
+	}
+
 	switch (type) {
 	case PD_DATA_SOURCE_CAP:
 		for (i = 0; i < cnt; i++)
@@ -2459,6 +2472,16 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port,
 	enum pd_ctrl_msg_type type = pd_header_type_le(msg->header);
 	enum tcpm_state next_state;
 
+	/*
+	 * Stop VDM state machine if interrupted by other Messages while NOT_SUPP is allowed in
+	 * VDM AMS if waiting for VDM responses and will be handled later.
+	 */
+	if (tcpm_vdm_ams(port) && type != PD_CTRL_NOT_SUPP && type != PD_CTRL_GOOD_CRC) {
+		port->vdm_state = VDM_STATE_ERR_BUSY;
+		tcpm_ams_finish(port);
+		mod_vdm_delayed_work(port, 0);
+	}
+
 	switch (type) {
 	case PD_CTRL_GOOD_CRC:
 	case PD_CTRL_PING:
@@ -2717,6 +2740,13 @@ static void tcpm_pd_ext_msg_request(struct tcpm_port *port,
 	enum pd_ext_msg_type type = pd_header_type_le(msg->header);
 	unsigned int data_size = pd_ext_header_data_size_le(msg->ext_msg.header);
 
+	/* stopping VDM state machine if interrupted by other Messages */
+	if (tcpm_vdm_ams(port)) {
+		port->vdm_state = VDM_STATE_ERR_BUSY;
+		tcpm_ams_finish(port);
+		mod_vdm_delayed_work(port, 0);
+	}
+
 	if (!(le16_to_cpu(msg->ext_msg.header) & PD_EXT_HDR_CHUNKED)) {
 		tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS);
 		tcpm_log(port, "Unchunked extended messages unsupported");
-- 
GitLab


From a20dcf53ea9836387b229c4878f9559cf1b55b71 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Sun, 23 May 2021 09:58:55 +0800
Subject: [PATCH 1457/3804] usb: typec: tcpm: Respond Not_Supported if no
 snk_vdo

If snk_vdo is not populated from fwnode, it implies the port does not
support responding to SVDM commands. Not_Supported Message shall be sent
if the contract is in PD3. And for PD2, the port shall ignore the
commands.

Fixes: 193a68011fdc ("staging: typec: tcpm: Respond to Discover Identity commands")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210523015855.1785484-3-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 6ea5df3782cf5..9ce8c9af4da58 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -2430,7 +2430,10 @@ static void tcpm_pd_data_request(struct tcpm_port *port,
 					   NONE_AMS);
 		break;
 	case PD_DATA_VENDOR_DEF:
-		tcpm_handle_vdm_request(port, msg->payload, cnt);
+		if (tcpm_vdm_ams(port) || port->nr_snk_vdo)
+			tcpm_handle_vdm_request(port, msg->payload, cnt);
+		else if (port->negotiated_rev > PD_REV20)
+			tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS);
 		break;
 	case PD_DATA_BIST:
 		port->bist_request = le32_to_cpu(msg->payload[0]);
-- 
GitLab


From e752dbc59e1241b13b8c4f7b6eb582862e7668fe Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Mon, 24 May 2021 15:01:55 +0900
Subject: [PATCH 1458/3804] usb: gadget: udc: renesas_usb3: Fix a race in
 usb3_start_pipen()

The usb3_start_pipen() is called by renesas_usb3_ep_queue() and
usb3_request_done_pipen() so that usb3_start_pipen() is possible
to cause a race when getting usb3_first_req like below:

renesas_usb3_ep_queue()
 spin_lock_irqsave()
 list_add_tail()
 spin_unlock_irqrestore()
 usb3_start_pipen()
  usb3_first_req = usb3_get_request() --- [1]
 --- interrupt ---
 usb3_irq_dma_int()
 usb3_request_done_pipen()
  usb3_get_request()
  usb3_start_pipen()
  usb3_first_req = usb3_get_request()
  ...
  (the req is possible to be finished in the interrupt)

The usb3_first_req [1] above may have been finished after the interrupt
ended so that this driver caused to start a transfer wrongly. To fix this
issue, getting/checking the usb3_first_req are under spin_lock_irqsave()
in the same section.

Fixes: 746bfe63bba3 ("usb: gadget: renesas_usb3: add support for Renesas USB3.0 peripheral controller")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Link: https://lore.kernel.org/r/20210524060155.1178724-1-yoshihiro.shimoda.uh@renesas.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/renesas_usb3.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 0c418ce50ba0f..f1b35a39d1ba8 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -1488,7 +1488,7 @@ static void usb3_start_pipen(struct renesas_usb3_ep *usb3_ep,
 			     struct renesas_usb3_request *usb3_req)
 {
 	struct renesas_usb3 *usb3 = usb3_ep_to_usb3(usb3_ep);
-	struct renesas_usb3_request *usb3_req_first = usb3_get_request(usb3_ep);
+	struct renesas_usb3_request *usb3_req_first;
 	unsigned long flags;
 	int ret = -EAGAIN;
 	u32 enable_bits = 0;
@@ -1496,7 +1496,8 @@ static void usb3_start_pipen(struct renesas_usb3_ep *usb3_ep,
 	spin_lock_irqsave(&usb3->lock, flags);
 	if (usb3_ep->halt || usb3_ep->started)
 		goto out;
-	if (usb3_req != usb3_req_first)
+	usb3_req_first = __usb3_get_request(usb3_ep);
+	if (!usb3_req_first || usb3_req != usb3_req_first)
 		goto out;
 
 	if (usb3_pn_change(usb3, usb3_ep->num) < 0)
-- 
GitLab


From 558642bccede3d0e6ffebe4106b0719e29b9e4a8 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Thu, 20 May 2021 15:34:58 +0800
Subject: [PATCH 1459/3804] PM: wakeirq: Set IRQF_NO_AUTOEN when requesting the
 IRQ

request_irq() after setting IRQ_NOAUTOEN as below
irq_set_status_flags(irq, IRQ_NOAUTOEN);
request_irq(dev, irq...);
can be replaced by request_irq() with IRQF_NO_AUTOEN flag.

This change is just to simplify the code, no actual functional
changes.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Tony Lindgren <tony@atomide.com>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeirq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index 8e021082dba8c..3bad3266a2ad3 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -182,7 +182,6 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 
 	wirq->dev = dev;
 	wirq->irq = irq;
-	irq_set_status_flags(irq, IRQ_NOAUTOEN);
 
 	/* Prevent deferred spurious wakeirqs with disable_irq_nosync() */
 	irq_set_status_flags(irq, IRQ_DISABLE_UNLAZY);
@@ -192,7 +191,8 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 	 * so we use a threaded irq.
 	 */
 	err = request_threaded_irq(irq, NULL, handle_threaded_wake_irq,
-				   IRQF_ONESHOT, wirq->name, wirq);
+				   IRQF_ONESHOT | IRQF_NO_AUTOEN,
+				   wirq->name, wirq);
 	if (err)
 		goto err_free_name;
 
-- 
GitLab


From 9b7ff25d129df7c4f61e08382993e1988d56f6a7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 21 May 2021 15:13:11 +0200
Subject: [PATCH 1460/3804] ACPI: power: Refine turning off unused power
 resources

Commit 7e4fdeafa61f ("ACPI: power: Turn off unused power resources
unconditionally") dropped the power resource state check from
acpi_turn_off_unused_power_resources(), because according to the
ACPI specification (e.g. ACPI 6.4, Section 7.2.2) the OS "may run
the _OFF method repeatedly, even if the resource is already off".

However, it turns out that some systems do not follow the
specification in this particular respect and that commit introduced
boot issues on them, so refine acpi_turn_off_unused_power_resources()
to only turn off power resources without any users after device
enumeration and restore its previous behavior in the system-wide
resume path.

Fixes: 7e4fdeafa61f ("ACPI: power: Turn off unused power resources unconditionally")
Link: https://uefi.org/specs/ACPI/6.4/07_Power_and_Performance_Mgmt/declaring-a-power-resource-object.html#off
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213019
Reported-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reported-by: Dave Olsthoorn <dave@bewaar.me>
Tested-by: Dave Olsthoorn <dave@bewaar.me>
Reported-by: Shujun Wang <wsj20369@163.com>
Tested-by: Shujun Wang <wsj20369@163.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/internal.h |  4 +--
 drivers/acpi/power.c    | 59 ++++++++++++++++++++++++++++++++---------
 drivers/acpi/scan.c     |  2 +-
 drivers/acpi/sleep.c    |  2 +-
 4 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index f973bbe90e5ee..e21611c9a1703 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -134,7 +134,7 @@ int acpi_power_init(void);
 void acpi_power_resources_list_free(struct list_head *list);
 int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
 				 struct list_head *list);
-int acpi_add_power_resource(acpi_handle handle);
+struct acpi_device *acpi_add_power_resource(acpi_handle handle);
 void acpi_power_add_remove_device(struct acpi_device *adev, bool add);
 int acpi_power_wakeup_list_init(struct list_head *list, int *system_level);
 int acpi_device_sleep_wake(struct acpi_device *dev,
@@ -142,7 +142,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
 int acpi_power_get_inferred_state(struct acpi_device *device, int *state);
 int acpi_power_on_resources(struct acpi_device *device, int state);
 int acpi_power_transition(struct acpi_device *device, int state);
-void acpi_turn_off_unused_power_resources(void);
+void acpi_turn_off_unused_power_resources(bool init);
 
 /* --------------------------------------------------------------------------
                               Device Power Management
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index 56102eaaa2da8..97c9a94a1a308 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -52,6 +52,7 @@ struct acpi_power_resource {
 	u32 system_level;
 	u32 order;
 	unsigned int ref_count;
+	unsigned int users;
 	bool wakeup_enabled;
 	struct mutex resource_lock;
 	struct list_head dependents;
@@ -147,6 +148,7 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
 
 	for (i = start; i < package->package.count; i++) {
 		union acpi_object *element = &package->package.elements[i];
+		struct acpi_device *rdev;
 		acpi_handle rhandle;
 
 		if (element->type != ACPI_TYPE_LOCAL_REFERENCE) {
@@ -163,13 +165,16 @@ int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
 		if (acpi_power_resource_is_dup(package, start, i))
 			continue;
 
-		err = acpi_add_power_resource(rhandle);
-		if (err)
+		rdev = acpi_add_power_resource(rhandle);
+		if (!rdev) {
+			err = -ENODEV;
 			break;
-
+		}
 		err = acpi_power_resources_list_add(rhandle, list);
 		if (err)
 			break;
+
+		to_power_resource(rdev)->users++;
 	}
 	if (err)
 		acpi_power_resources_list_free(list);
@@ -907,7 +912,7 @@ static void acpi_power_add_resource_to_list(struct acpi_power_resource *resource
 	mutex_unlock(&power_resource_list_lock);
 }
 
-int acpi_add_power_resource(acpi_handle handle)
+struct acpi_device *acpi_add_power_resource(acpi_handle handle)
 {
 	struct acpi_power_resource *resource;
 	struct acpi_device *device = NULL;
@@ -918,11 +923,11 @@ int acpi_add_power_resource(acpi_handle handle)
 
 	acpi_bus_get_device(handle, &device);
 	if (device)
-		return 0;
+		return device;
 
 	resource = kzalloc(sizeof(*resource), GFP_KERNEL);
 	if (!resource)
-		return -ENOMEM;
+		return NULL;
 
 	device = &resource->device;
 	acpi_init_device_object(device, handle, ACPI_BUS_TYPE_POWER);
@@ -959,11 +964,11 @@ int acpi_add_power_resource(acpi_handle handle)
 
 	acpi_power_add_resource_to_list(resource);
 	acpi_device_add_finalize(device);
-	return 0;
+	return device;
 
  err:
 	acpi_release_power_resource(&device->dev);
-	return result;
+	return NULL;
 }
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -997,7 +1002,38 @@ void acpi_resume_power_resources(void)
 }
 #endif
 
-void acpi_turn_off_unused_power_resources(void)
+static void acpi_power_turn_off_if_unused(struct acpi_power_resource *resource,
+				       bool init)
+{
+	if (resource->ref_count > 0)
+		return;
+
+	if (init) {
+		if (resource->users > 0)
+			return;
+	} else {
+		int result, state;
+
+		result = acpi_power_get_state(resource->device.handle, &state);
+		if (result || state == ACPI_POWER_RESOURCE_STATE_OFF)
+			return;
+	}
+
+	dev_info(&resource->device.dev, "Turning OFF\n");
+	__acpi_power_off(resource);
+}
+
+/**
+ * acpi_turn_off_unused_power_resources - Turn off power resources not in use.
+ * @init: Control switch.
+ *
+ * If @ainit is set, unconditionally turn off all of the ACPI power resources
+ * without any users.
+ *
+ * Otherwise, turn off all ACPI power resources without active references (that
+ * is, the ones that should be "off" at the moment) that are "on".
+ */
+void acpi_turn_off_unused_power_resources(bool init)
 {
 	struct acpi_power_resource *resource;
 
@@ -1006,10 +1042,7 @@ void acpi_turn_off_unused_power_resources(void)
 	list_for_each_entry_reverse(resource, &acpi_power_resource_list, list_node) {
 		mutex_lock(&resource->resource_lock);
 
-		if (!resource->ref_count) {
-			dev_info(&resource->device.dev, "Turning OFF\n");
-			__acpi_power_off(resource);
-		}
+		acpi_power_turn_off_if_unused(resource, init);
 
 		mutex_unlock(&resource->resource_lock);
 	}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 453eff8ec8c33..e10d38ac7cf28 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2360,7 +2360,7 @@ int __init acpi_scan_init(void)
 		}
 	}
 
-	acpi_turn_off_unused_power_resources();
+	acpi_turn_off_unused_power_resources(true);
 
 	acpi_scan_initialized = true;
 
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 09fd13757b658..df386571da98b 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -504,7 +504,7 @@ static void acpi_pm_start(u32 acpi_state)
  */
 static void acpi_pm_end(void)
 {
-	acpi_turn_off_unused_power_resources();
+	acpi_turn_off_unused_power_resources(false);
 	acpi_scan_lock_release();
 	/*
 	 * This is necessary in case acpi_pm_finish() is not called during a
-- 
GitLab


From 6be2408a1ef632a48149044d1757c80ab1096213 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Mon, 24 May 2021 17:30:10 +0800
Subject: [PATCH 1461/3804] PM: hibernate: fix spelling mistakes

Fix some spelling mistakes in comments:

corresonds ==> corresponds
alocated ==> allocated
unitialized ==> uninitialized
Deompression ==> Decompression

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/snapshot.c | 8 ++++----
 kernel/power/swap.c     | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 1a221dcb3c01c..af507c8c895b6 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -331,7 +331,7 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
  *
  * Memory bitmap is a structure consisting of many linked lists of
  * objects.  The main list's elements are of type struct zone_bitmap
- * and each of them corresonds to one zone.  For each zone bitmap
+ * and each of them corresponds to one zone.  For each zone bitmap
  * object there is a list of objects of type struct bm_block that
  * represent each blocks of bitmap in which information is stored.
  *
@@ -1500,7 +1500,7 @@ static struct memory_bitmap copy_bm;
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
- * Image pages are alocated before snapshot creation, so they need to be
+ * Image pages are allocated before snapshot creation, so they need to be
  * released after resume.
  */
 void swsusp_free(void)
@@ -2326,7 +2326,7 @@ static struct memory_bitmap *safe_highmem_bm;
  * (@nr_highmem_p points to the variable containing the number of highmem image
  * pages).  The pages that are "safe" (ie. will not be overwritten when the
  * hibernation image is restored entirely) have the corresponding bits set in
- * @bm (it must be unitialized).
+ * @bm (it must be uninitialized).
  *
  * NOTE: This function should not be called if there are no highmem image pages.
  */
@@ -2483,7 +2483,7 @@ static inline void free_highmem_data(void) {}
 
 /**
  * prepare_image - Make room for loading hibernation image.
- * @new_bm: Unitialized memory bitmap structure.
+ * @new_bm: Uninitialized memory bitmap structure.
  * @bm: Memory bitmap with unsafe pages marked.
  *
  * Use @bm to mark the pages that will be overwritten in the process of
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index bea3cb8afa112..3cb89baebc796 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -1125,7 +1125,7 @@ struct dec_data {
 };
 
 /**
- * Deompression function that runs in its own thread.
+ * Decompression function that runs in its own thread.
  */
 static int lzo_decompress_threadfn(void *data)
 {
-- 
GitLab


From 179d9ba5559a756f4322583388b3213fe4e391b0 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 24 May 2021 17:10:18 +0200
Subject: [PATCH 1462/3804] netfilter: nf_tables: fix table flag updates

The dormant flag need to be updated from the preparation phase,
otherwise, two consecutive requests to dorm a table in the same batch
might try to remove the same hooks twice, resulting in the following
warning:

 hook not found, pf 3 num 0
 WARNING: CPU: 0 PID: 334 at net/netfilter/core.c:480 __nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480
 Modules linked in:
 CPU: 0 PID: 334 Comm: kworker/u4:5 Not tainted 5.12.0-syzkaller #0
 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
 Workqueue: netns cleanup_net
 RIP: 0010:__nf_unregister_net_hook+0x1eb/0x610 net/netfilter/core.c:480

This patch is a partial revert of 0ce7cf4127f1 ("netfilter: nftables:
update table flags from the commit phase") to restore the previous
behaviour.

However, there is still another problem: A batch containing a series of
dorm-wakeup-dorm table and vice-versa also trigger the warning above
since hook unregistration happens from the preparation phase, while hook
registration occurs from the commit phase.

To fix this problem, this patch adds two internal flags to annotate the
original dormant flag status which are __NFT_TABLE_F_WAS_DORMANT and
__NFT_TABLE_F_WAS_AWAKEN, to restore it from the abort path.

The __NFT_TABLE_F_UPDATE bitmask allows to handle the dormant flag update
with one single transaction.

Reported-by: syzbot+7ad5cd1615f2d89c6e7e@syzkaller.appspotmail.com
Fixes: 0ce7cf4127f1 ("netfilter: nftables: update table flags from the commit phase")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  6 ----
 net/netfilter/nf_tables_api.c     | 59 +++++++++++++++++++++----------
 2 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 27eeb613bb4e8..0a5655e300b51 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1506,16 +1506,10 @@ struct nft_trans_chain {
 
 struct nft_trans_table {
 	bool				update;
-	u8				state;
-	u32				flags;
 };
 
 #define nft_trans_table_update(trans)	\
 	(((struct nft_trans_table *)trans->data)->update)
-#define nft_trans_table_state(trans)	\
-	(((struct nft_trans_table *)trans->data)->state)
-#define nft_trans_table_flags(trans)	\
-	(((struct nft_trans_table *)trans->data)->flags)
 
 struct nft_trans_elem {
 	struct nft_set			*set;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c34a3c0a0d9c8..72bc759179efc 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -736,7 +736,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 		goto nla_put_failure;
 
 	if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
-	    nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
+	    nla_put_be32(skb, NFTA_TABLE_FLAGS,
+			 htonl(table->flags & NFT_TABLE_F_MASK)) ||
 	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
 	    nla_put_be64(skb, NFTA_TABLE_HANDLE, cpu_to_be64(table->handle),
 			 NFTA_TABLE_PAD))
@@ -947,20 +948,22 @@ err_register_hooks:
 
 static void nf_tables_table_disable(struct net *net, struct nft_table *table)
 {
+	table->flags &= ~NFT_TABLE_F_DORMANT;
 	nft_table_disable(net, table, 0);
+	table->flags |= NFT_TABLE_F_DORMANT;
 }
 
-enum {
-	NFT_TABLE_STATE_UNCHANGED	= 0,
-	NFT_TABLE_STATE_DORMANT,
-	NFT_TABLE_STATE_WAKEUP
-};
+#define __NFT_TABLE_F_INTERNAL		(NFT_TABLE_F_MASK + 1)
+#define __NFT_TABLE_F_WAS_DORMANT	(__NFT_TABLE_F_INTERNAL << 0)
+#define __NFT_TABLE_F_WAS_AWAKEN	(__NFT_TABLE_F_INTERNAL << 1)
+#define __NFT_TABLE_F_UPDATE		(__NFT_TABLE_F_WAS_DORMANT | \
+					 __NFT_TABLE_F_WAS_AWAKEN)
 
 static int nf_tables_updtable(struct nft_ctx *ctx)
 {
 	struct nft_trans *trans;
 	u32 flags;
-	int ret = 0;
+	int ret;
 
 	if (!ctx->nla[NFTA_TABLE_FLAGS])
 		return 0;
@@ -985,21 +988,27 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 
 	if ((flags & NFT_TABLE_F_DORMANT) &&
 	    !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
-		nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
+		ctx->table->flags |= NFT_TABLE_F_DORMANT;
+		if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE))
+			ctx->table->flags |= __NFT_TABLE_F_WAS_AWAKEN;
 	} else if (!(flags & NFT_TABLE_F_DORMANT) &&
 		   ctx->table->flags & NFT_TABLE_F_DORMANT) {
-		ret = nf_tables_table_enable(ctx->net, ctx->table);
-		if (ret >= 0)
-			nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
+		ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+		if (!(ctx->table->flags & __NFT_TABLE_F_UPDATE)) {
+			ret = nf_tables_table_enable(ctx->net, ctx->table);
+			if (ret < 0)
+				goto err_register_hooks;
+
+			ctx->table->flags |= __NFT_TABLE_F_WAS_DORMANT;
+		}
 	}
-	if (ret < 0)
-		goto err;
 
-	nft_trans_table_flags(trans) = flags;
 	nft_trans_table_update(trans) = true;
 	nft_trans_commit_list_add_tail(ctx->net, trans);
+
 	return 0;
-err:
+
+err_register_hooks:
 	nft_trans_destroy(trans);
 	return ret;
 }
@@ -8556,10 +8565,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
-				if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
+				if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+					nft_trans_destroy(trans);
+					break;
+				}
+				if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT)
 					nf_tables_table_disable(net, trans->ctx.table);
 
-				trans->ctx.table->flags = nft_trans_table_flags(trans);
+				trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
 			} else {
 				nft_clear(net, trans->ctx.table);
 			}
@@ -8777,9 +8790,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 		switch (trans->msg_type) {
 		case NFT_MSG_NEWTABLE:
 			if (nft_trans_table_update(trans)) {
-				if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
+				if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) {
+					nft_trans_destroy(trans);
+					break;
+				}
+				if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) {
 					nf_tables_table_disable(net, trans->ctx.table);
-
+					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
+				} else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) {
+					trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT;
+				}
+				trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE;
 				nft_trans_destroy(trans);
 			} else {
 				list_del_rcu(&trans->ctx.table->list);
-- 
GitLab


From 08b2b6fdf6b26032f025084ce2893924a0cdb4a2 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Mon, 24 May 2021 16:29:43 +0800
Subject: [PATCH 1463/3804] cgroup: fix spelling mistakes

Fix some spelling mistakes in comments:
hierarhcy ==> hierarchy
automtically ==> automatically
overriden ==> overridden
In absense of .. or ==> In absence of .. and
assocaited ==> associated
taget ==> target
initate ==> initiate
succeded ==> succeeded
curremt ==> current
udpated ==> updated

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h | 6 +++---
 include/linux/cgroup.h      | 2 +-
 kernel/cgroup/cgroup-v1.c   | 2 +-
 kernel/cgroup/cgroup.c      | 8 ++++----
 kernel/cgroup/cpuset.c      | 2 +-
 kernel/cgroup/rdma.c        | 2 +-
 kernel/cgroup/rstat.c       | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 559ee05f86b2e..fb8f6d2cd1042 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -232,7 +232,7 @@ struct css_set {
 	struct list_head task_iters;
 
 	/*
-	 * On the default hierarhcy, ->subsys[ssid] may point to a css
+	 * On the default hierarchy, ->subsys[ssid] may point to a css
 	 * attached to an ancestor instead of the cgroup this css_set is
 	 * associated with.  The following node is anchored at
 	 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
@@ -668,7 +668,7 @@ struct cgroup_subsys {
 	 */
 	bool threaded:1;
 
-	/* the following two fields are initialized automtically during boot */
+	/* the following two fields are initialized automatically during boot */
 	int id;
 	const char *name;
 
@@ -757,7 +757,7 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
  * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
  * On boot, sock_cgroup_data records the cgroup that the sock was created
  * in so that cgroup2 matches can be made; however, once either net_prio or
- * net_cls starts being used, the area is overriden to carry prioidx and/or
+ * net_cls starts being used, the area is overridden to carry prioidx and/or
  * classid.  The two modes are distinguished by whether the lowest bit is
  * set.  Clear bit indicates cgroup pointer while set bit prioidx and
  * classid.
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4f2f79de083e2..6bc9c76680b22 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -32,7 +32,7 @@ struct kernel_clone_args;
 #ifdef CONFIG_CGROUPS
 
 /*
- * All weight knobs on the default hierarhcy should use the following min,
+ * All weight knobs on the default hierarchy should use the following min,
  * default and max values.  The default value is the logarithmic center of
  * MIN and MAX and allows 100x to be expressed in both directions.
  */
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 391aa570369bb..8190b6bfc9784 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -1001,7 +1001,7 @@ static int check_cgroupfs_options(struct fs_context *fc)
 	ctx->subsys_mask &= enabled;
 
 	/*
-	 * In absense of 'none', 'name=' or subsystem name options,
+	 * In absence of 'none', 'name=' and subsystem name options,
 	 * let's default to 'all'.
 	 */
 	if (!ctx->subsys_mask && !ctx->none && !ctx->name)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index e7a9a29982450..21ecc6ee6a6d3 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -468,7 +468,7 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
  * @cgrp: the cgroup of interest
  * @ss: the subsystem of interest
  *
- * Find and get @cgrp's css assocaited with @ss.  If the css doesn't exist
+ * Find and get @cgrp's css associated with @ss.  If the css doesn't exist
  * or is offline, %NULL is returned.
  */
 static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
@@ -1633,7 +1633,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
 
 /**
  * css_clear_dir - remove subsys files in a cgroup directory
- * @css: taget css
+ * @css: target css
  */
 static void css_clear_dir(struct cgroup_subsys_state *css)
 {
@@ -5350,7 +5350,7 @@ out_unlock:
 /*
  * This is called when the refcnt of a css is confirmed to be killed.
  * css_tryget_online() is now guaranteed to fail.  Tell the subsystem to
- * initate destruction and put the css ref from kill_css().
+ * initiate destruction and put the css ref from kill_css().
  */
 static void css_killed_work_fn(struct work_struct *work)
 {
@@ -6052,7 +6052,7 @@ out_revert:
  * @kargs: the arguments passed to create the child process
  *
  * This calls the cancel_fork() callbacks if a fork failed *after*
- * cgroup_can_fork() succeded and cleans up references we took to
+ * cgroup_can_fork() succeeded and cleans up references we took to
  * prepare a new css_set for the child process in cgroup_can_fork().
  */
 void cgroup_cancel_fork(struct task_struct *child,
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index a945504c0ae7f..adb5190c44296 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -3376,7 +3376,7 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
 }
 
 /**
- * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
+ * cpuset_nodemask_valid_mems_allowed - check nodemask vs. current mems_allowed
  * @nodemask: the nodemask to be checked
  *
  * Are any of the nodes in the nodemask allowed in current->mems_allowed?
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index ae042c347c640..3135406608c75 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(rdmacg_uncharge);
  * This function follows charging resource in hierarchical way.
  * It will fail if the charge would cause the new value to exceed the
  * hierarchical limit.
- * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL.
+ * Returns 0 if the charge succeeded, otherwise -EAGAIN, -ENOMEM or -EINVAL.
  * Returns pointer to rdmacg for this resource when charging is successful.
  *
  * Charger needs to account resources on two criteria.
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 3a3fd2993a650..cee265cb535cc 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -75,7 +75,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
  * @root: root of the tree to traversal
  * @cpu: target cpu
  *
- * Walks the udpated rstat_cpu tree on @cpu from @root.  %NULL @pos starts
+ * Walks the updated rstat_cpu tree on @cpu from @root.  %NULL @pos starts
  * the traversal and %NULL return indicates the end.  During traversal,
  * each returned cgroup is unlinked from the tree.  Must be called with the
  * matching cgroup_rstat_cpu_lock held.
-- 
GitLab


From 778a136e48be6b1b703328a0a4d6d459cf97449f Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 18 May 2021 16:43:35 +0200
Subject: [PATCH 1464/3804] KVM: SVM: Drop unneeded CONFIG_X86_LOCAL_APIC check

AVIC dependency on CONFIG_X86_LOCAL_APIC is dead code since
commit e42eef4ba388 ("KVM: add X86_LOCAL_APIC dependency").

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210518144339.1987982-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/avic.c | 2 --
 arch/x86/kvm/svm/svm.c  | 4 +---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 712b4e0de4818..1c1bf911e02b4 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -29,9 +29,7 @@
 
 /* enable / disable AVIC */
 int avic;
-#ifdef CONFIG_X86_LOCAL_APIC
 module_param(avic, int, S_IRUGO);
-#endif
 
 #define SVM_AVIC_DOORBELL	0xc001011b
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index dfa351e605dec..8c3918a11826a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1010,9 +1010,7 @@ static __init int svm_hardware_setup(void)
 	}
 
 	if (avic) {
-		if (!npt_enabled ||
-		    !boot_cpu_has(X86_FEATURE_AVIC) ||
-		    !IS_ENABLED(CONFIG_X86_LOCAL_APIC)) {
+		if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)) {
 			avic = false;
 		} else {
 			pr_info("AVIC enabled\n");
-- 
GitLab


From 377872b3355b9a7f04f25388e2c9399845259c05 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 18 May 2021 16:43:36 +0200
Subject: [PATCH 1465/3804] KVM: VMX: Drop unneeded CONFIG_X86_LOCAL_APIC check

CONFIG_X86_LOCAL_APIC is always on when CONFIG_KVM (on x86) since
commit e42eef4ba388 ("KVM: add X86_LOCAL_APIC dependency").

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210518144339.1987982-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/capabilities.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 8dee8a5fbc17f..aa0e7872fcc9f 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -90,8 +90,7 @@ static inline bool cpu_has_vmx_preemption_timer(void)
 
 static inline bool cpu_has_vmx_posted_intr(void)
 {
-	return IS_ENABLED(CONFIG_X86_LOCAL_APIC) &&
-		vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
+	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_POSTED_INTR;
 }
 
 static inline bool cpu_has_load_ia32_efer(void)
-- 
GitLab


From 28a4aa1160d71187a44414dac40b57d1fd9fcd77 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 May 2021 18:22:28 +0200
Subject: [PATCH 1466/3804] KVM: SVM: make the avic parameter a bool

Make it consistent with kvm_intel.enable_apicv.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/avic.c | 4 ++--
 arch/x86/kvm/svm/svm.h  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 1c1bf911e02b4..0e62e6a2438cf 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -28,8 +28,8 @@
 #include "svm.h"
 
 /* enable / disable AVIC */
-int avic;
-module_param(avic, int, S_IRUGO);
+bool avic;
+module_param(avic, bool, S_IRUGO);
 
 #define SVM_AVIC_DOORBELL	0xc001011b
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index e44567ceb8655..70419e417c0d9 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -479,7 +479,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
 
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 
-extern int avic;
+extern bool avic;
 
 static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
 {
-- 
GitLab


From d1ce2c79156d3baf0830990ab06d296477b93c26 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 15 May 2021 10:04:58 +0800
Subject: [PATCH 1467/3804] vfio/pci: Fix error return code in vfio_ecap_init()

The error code returned from vfio_ext_cap_len() is stored in 'len', not
in 'ret'.

Fixes: 89e1f7d4c66d ("vfio: Add PCI device driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Message-Id: <20210515020458.6771-1-thunder.leizhen@huawei.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci_config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index d57f037f65b85..70e28efbc51f8 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1581,7 +1581,7 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev)
 			if (len == 0xFF) {
 				len = vfio_ext_cap_len(vdev, ecap, epos);
 				if (len < 0)
-					return ret;
+					return len;
 			}
 		}
 
-- 
GitLab


From 2a55ca37350171d9b43d561528f23d4130097255 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 15 May 2021 12:08:56 -0700
Subject: [PATCH 1468/3804] vfio/pci: zap_vma_ptes() needs MMU

zap_vma_ptes() is only available when CONFIG_MMU is set/enabled.
Without CONFIG_MMU, vfio_pci.o has build errors, so make
VFIO_PCI depend on MMU.

riscv64-linux-ld: drivers/vfio/pci/vfio_pci.o: in function `vfio_pci_mmap_open':
vfio_pci.c:(.text+0x1ec): undefined reference to `zap_vma_ptes'
riscv64-linux-ld: drivers/vfio/pci/vfio_pci.o: in function `.L0 ':
vfio_pci.c:(.text+0x165c): undefined reference to `zap_vma_ptes'

Fixes: 11c4cd07ba11 ("vfio-pci: Fault mmaps to enable vma tracking")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: kvm@vger.kernel.org
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Eric Auger <eric.auger@redhat.com>
Message-Id: <20210515190856.2130-1-rdunlap@infradead.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 53ce78d7d07be..5e2e1b9a9fd3a 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -2,6 +2,7 @@
 config VFIO_PCI
 	tristate "VFIO support for PCI devices"
 	depends on VFIO && PCI && EVENTFD
+	depends on MMU
 	select VFIO_VIRQFD
 	select IRQ_BYPASS_MANAGER
 	help
-- 
GitLab


From 78b238147e4d241bc1681d2559477c995f9dcb0a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 13 May 2021 18:01:55 -0500
Subject: [PATCH 1469/3804] vfio/iommu_type1: Use struct_size() for kzalloc()

Make use of the struct_size() helper instead of an open-coded version,
in order to avoid any potential type mistakes or integer overflows
that, in the worst scenario, could lead to heap overflows.

This code was detected with the help of Coccinelle and, audited and
fixed manually.

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Message-Id: <20210513230155.GA217517@embeddedor>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index a0747c35a7781..a3e925a41b0de 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2795,7 +2795,7 @@ static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
 		return 0;
 	}
 
-	size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges));
+	size = struct_size(cap_iovas, iova_ranges, iovas);
 
 	cap_iovas = kzalloc(size, GFP_KERNEL);
 	if (!cap_iovas)
-- 
GitLab


From 752774ce7793a1f8baa55aae31f3b4caac49cbe4 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 20 May 2021 13:36:41 +0000
Subject: [PATCH 1470/3804] samples: vfio-mdev: fix error handing in
 mdpy_fb_probe()

Fix to return a negative error code from the framebuffer_alloc() error
handling case instead of 0, also release regions in some error handing
cases.

Fixes: cacade1946a4 ("sample: vfio mdev display - guest driver")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Message-Id: <20210520133641.1421378-1-weiyongjun1@huawei.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mdpy-fb.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
index 21dbf63d6e415..9ec93d90e8a5a 100644
--- a/samples/vfio-mdev/mdpy-fb.c
+++ b/samples/vfio-mdev/mdpy-fb.c
@@ -117,22 +117,27 @@ static int mdpy_fb_probe(struct pci_dev *pdev,
 	if (format != DRM_FORMAT_XRGB8888) {
 		pci_err(pdev, "format mismatch (0x%x != 0x%x)\n",
 			format, DRM_FORMAT_XRGB8888);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_release_regions;
 	}
 	if (width < 100	 || width > 10000) {
 		pci_err(pdev, "width (%d) out of range\n", width);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_release_regions;
 	}
 	if (height < 100 || height > 10000) {
 		pci_err(pdev, "height (%d) out of range\n", height);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_release_regions;
 	}
 	pci_info(pdev, "mdpy found: %dx%d framebuffer\n",
 		 width, height);
 
 	info = framebuffer_alloc(sizeof(struct mdpy_fb_par), &pdev->dev);
-	if (!info)
+	if (!info) {
+		ret = -ENOMEM;
 		goto err_release_regions;
+	}
 	pci_set_drvdata(pdev, info);
 	par = info->par;
 
-- 
GitLab


From dc51ff91cf2d1e9a2d941da483602f71d4a51472 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Tue, 18 May 2021 22:21:31 +0300
Subject: [PATCH 1471/3804] vfio/platform: fix module_put call in error flow

The ->parent_module is the one that use in try_module_get. It should
also be the one the we use in module_put during vfio_platform_open().

Fixes: 32a2d71c4e80 ("vfio: platform: introduce vfio-platform-base module")
Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Message-Id: <20210518192133.59195-1-mgurtovoy@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/platform/vfio_platform_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index 361e5b57e3693..470fcf7dac564 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -291,7 +291,7 @@ err_irq:
 	vfio_platform_regions_cleanup(vdev);
 err_reg:
 	mutex_unlock(&driver_lock);
-	module_put(THIS_MODULE);
+	module_put(vdev->parent_module);
 	return ret;
 }
 
-- 
GitLab


From 9b76eade16423ef06829cccfe3e100cfce31afcd Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 23 May 2021 14:38:53 +0000
Subject: [PATCH 1472/3804] sch_dsmark: fix a NULL deref in qdisc_reset()

If Qdisc_ops->init() is failed, Qdisc_ops->reset() would be called.
When dsmark_init(Qdisc_ops->init()) is failed, it possibly doesn't
initialize dsmark_qdisc_data->q. But dsmark_reset(Qdisc_ops->reset())
uses dsmark_qdisc_data->q pointer wihtout any null checking.
So, panic would occur.

Test commands:
    sysctl net.core.default_qdisc=dsmark -w
    ip link add dummy0 type dummy
    ip link add vw0 link dummy0 type virt_wifi
    ip link set vw0 up

Splat looks like:
KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
CPU: 3 PID: 684 Comm: ip Not tainted 5.12.0+ #910
RIP: 0010:qdisc_reset+0x2b/0x680
Code: 1f 44 00 00 48 b8 00 00 00 00 00 fc ff df 41 57 41 56 41 55 41 54
55 48 89 fd 48 83 c7 18 53 48 89 fa 48 c1 ea 03 48 83 ec 20 <80> 3c 02
00 0f 85 09 06 00 00 4c 8b 65 18 0f 1f 44 00 00 65 8b 1d
RSP: 0018:ffff88800fda6bf8 EFLAGS: 00010282
RAX: dffffc0000000000 RBX: ffff8880050ed800 RCX: 0000000000000000
RDX: 0000000000000003 RSI: ffffffff99e34100 RDI: 0000000000000018
RBP: 0000000000000000 R08: fffffbfff346b553 R09: fffffbfff346b553
R10: 0000000000000001 R11: fffffbfff346b552 R12: ffffffffc0824940
R13: ffff888109e83800 R14: 00000000ffffffff R15: ffffffffc08249e0
FS:  00007f5042287680(0000) GS:ffff888119800000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055ae1f4dbd90 CR3: 0000000006760002 CR4: 00000000003706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 ? rcu_read_lock_bh_held+0xa0/0xa0
 dsmark_reset+0x3d/0xf0 [sch_dsmark]
 qdisc_reset+0xa9/0x680
 qdisc_destroy+0x84/0x370
 qdisc_create_dflt+0x1fe/0x380
 attach_one_default_qdisc.constprop.41+0xa4/0x180
 dev_activate+0x4d5/0x8c0
 ? __dev_open+0x268/0x390
 __dev_open+0x270/0x390

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_dsmark.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index cd2748e2d4a20..d320bcfb2da2c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -407,7 +407,8 @@ static void dsmark_reset(struct Qdisc *sch)
 	struct dsmark_qdisc_data *p = qdisc_priv(sch);
 
 	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-	qdisc_reset(p->q);
+	if (p->q)
+		qdisc_reset(p->q);
 	sch->qstats.backlog = 0;
 	sch->q.qlen = 0;
 }
-- 
GitLab


From 1a44fb38cc65bc30bac490291412aa1940659fe1 Mon Sep 17 00:00:00 2001
From: Hayes Wang <hayeswang@realtek.com>
Date: Mon, 24 May 2021 14:49:42 +0800
Subject: [PATCH 1473/3804] r8152: check the informaton of the device

Verify some fields of the USB descriptor to make sure the driver
could be used by the device.

Besides, remove the check of endpoint number in rtl8152_probe().
usb_find_common_endpoints() includes it.

BugLink: https://syzkaller.appspot.com/bug?id=912c9c373656996801b4de61f1e3cb326fe940aa
Reported-by: syzbot+95afd23673f5dd295c57@syzkaller.appspotmail.com
Fixes: c2198943e33b ("r8152: search the configuration of vendor mode")
Signed-off-by: Hayes Wang <hayeswang@realtek.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 42 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 136ea06540ff7..f6abb2fbf9728 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -8107,6 +8107,37 @@ static void r8156b_init(struct r8152 *tp)
 	tp->coalesce = 15000;	/* 15 us */
 }
 
+static bool rtl_check_vendor_ok(struct usb_interface *intf)
+{
+	struct usb_host_interface *alt = intf->cur_altsetting;
+	struct usb_endpoint_descriptor *in, *out, *intr;
+
+	if (usb_find_common_endpoints(alt, &in, &out, &intr, NULL) < 0) {
+		dev_err(&intf->dev, "Expected endpoints are not found\n");
+		return false;
+	}
+
+	/* Check Rx endpoint address */
+	if (usb_endpoint_num(in) != 1) {
+		dev_err(&intf->dev, "Invalid Rx endpoint address\n");
+		return false;
+	}
+
+	/* Check Tx endpoint address */
+	if (usb_endpoint_num(out) != 2) {
+		dev_err(&intf->dev, "Invalid Tx endpoint address\n");
+		return false;
+	}
+
+	/* Check interrupt endpoint address */
+	if (usb_endpoint_num(intr) != 3) {
+		dev_err(&intf->dev, "Invalid interrupt endpoint address\n");
+		return false;
+	}
+
+	return true;
+}
+
 static bool rtl_vendor_mode(struct usb_interface *intf)
 {
 	struct usb_host_interface *alt = intf->cur_altsetting;
@@ -8115,12 +8146,15 @@ static bool rtl_vendor_mode(struct usb_interface *intf)
 	int i, num_configs;
 
 	if (alt->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC)
-		return true;
+		return rtl_check_vendor_ok(intf);
 
 	/* The vendor mode is not always config #1, so to find it out. */
 	udev = interface_to_usbdev(intf);
 	c = udev->config;
 	num_configs = udev->descriptor.bNumConfigurations;
+	if (num_configs < 2)
+		return false;
+
 	for (i = 0; i < num_configs; (i++, c++)) {
 		struct usb_interface_descriptor	*desc = NULL;
 
@@ -8135,7 +8169,8 @@ static bool rtl_vendor_mode(struct usb_interface *intf)
 		}
 	}
 
-	WARN_ON_ONCE(i == num_configs);
+	if (i == num_configs)
+		dev_err(&intf->dev, "Unexpected Device\n");
 
 	return false;
 }
@@ -9381,9 +9416,6 @@ static int rtl8152_probe(struct usb_interface *intf,
 	if (!rtl_vendor_mode(intf))
 		return -ENODEV;
 
-	if (intf->cur_altsetting->desc.bNumEndpoints < 3)
-		return -ENODEV;
-
 	usb_reset_device(udev);
 	netdev = alloc_etherdev(sizeof(struct r8152));
 	if (!netdev) {
-- 
GitLab


From 1a6e9a9c68c1f183872e4bcc947382111c2e04eb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 24 May 2021 11:25:11 +0200
Subject: [PATCH 1474/3804] net: hso: fix control-request directions

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Fix the tiocmset and rfkill requests which erroneously used
usb_rcvctrlpipe().

Fixes: 72dc1c096c70 ("HSO: add option hso driver")
Cc: stable@vger.kernel.org      # 2.6.27
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index b48b2a25210cd..5c779cc0ea112 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -1689,7 +1689,7 @@ static int hso_serial_tiocmset(struct tty_struct *tty,
 	spin_unlock_irqrestore(&serial->serial_lock, flags);
 
 	return usb_control_msg(serial->parent->usb,
-			       usb_rcvctrlpipe(serial->parent->usb, 0), 0x22,
+			       usb_sndctrlpipe(serial->parent->usb, 0), 0x22,
 			       0x21, val, if_num, NULL, 0,
 			       USB_CTRL_SET_TIMEOUT);
 }
@@ -2436,7 +2436,7 @@ static int hso_rfkill_set_block(void *data, bool blocked)
 	if (hso_dev->usb_gone)
 		rv = 0;
 	else
-		rv = usb_control_msg(hso_dev->usb, usb_rcvctrlpipe(hso_dev->usb, 0),
+		rv = usb_control_msg(hso_dev->usb, usb_sndctrlpipe(hso_dev->usb, 0),
 				       enabled ? 0x82 : 0x81, 0x40, 0, 0, NULL, 0,
 				       USB_CTRL_SET_TIMEOUT);
 	mutex_unlock(&hso_dev->mutex);
-- 
GitLab


From ba61cf167cb77e54c1ec5adb7aa49a22ab3c9b28 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:22 +0300
Subject: [PATCH 1475/3804] net: dsa: sja1105: fix VL lookup command packing
 for P/Q/R/S

At the beginning of the sja1105_dynamic_config.c file there is a diagram
of the dynamic config interface layout:

 packed_buf

 |
 V
 +-----------------------------------------+------------------+
 |              ENTRY BUFFER               |  COMMAND BUFFER  |
 +-----------------------------------------+------------------+

 <----------------------- packed_size ------------------------>

So in order to pack/unpack the command bits into the buffer,
sja1105_vl_lookup_cmd_packing must first advance the buffer pointer by
the length of the entry. This is similar to what the other *cmd_packing
functions do.

This bug exists because the command packing function for P/Q/R/S was
copied from the E/T generation, and on E/T, the command was actually
embedded within the entry buffer itself.

Fixes: 94f94d4acfb2 ("net: dsa: sja1105: add static tables for virtual links")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/dsa/sja1105/sja1105_dynamic_config.c  | 23 +++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index b777d3f375736..12cd04b568030 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -167,9 +167,10 @@ enum sja1105_hostcmd {
 	SJA1105_HOSTCMD_INVALIDATE = 4,
 };
 
+/* Command and entry overlap */
 static void
-sja1105_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
-			      enum packing_op op)
+sja1105et_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				enum packing_op op)
 {
 	const int size = SJA1105_SIZE_DYN_CMD;
 
@@ -179,6 +180,20 @@ sja1105_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(buf, &cmd->index,    9,  0, size, op);
 }
 
+/* Command and entry are separate */
+static void
+sja1105pqrs_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				  enum packing_op op)
+{
+	u8 *p = buf + SJA1105_SIZE_VL_LOOKUP_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->errors,  30, 30, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 29, 29, size, op);
+	sja1105_packing(p, &cmd->index,    9,  0, size, op);
+}
+
 static size_t sja1105et_vl_lookup_entry_packing(void *buf, void *entry_ptr,
 						enum packing_op op)
 {
@@ -641,7 +656,7 @@ static size_t sja1105pqrs_cbs_entry_packing(void *buf, void *entry_ptr,
 const struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_VL_LOOKUP] = {
 		.entry_packing = sja1105et_vl_lookup_entry_packing,
-		.cmd_packing = sja1105_vl_lookup_cmd_packing,
+		.cmd_packing = sja1105et_vl_lookup_cmd_packing,
 		.access = OP_WRITE,
 		.max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT,
 		.packed_size = SJA1105ET_SIZE_VL_LOOKUP_DYN_CMD,
@@ -725,7 +740,7 @@ const struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 const struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_VL_LOOKUP] = {
 		.entry_packing = sja1105_vl_lookup_entry_packing,
-		.cmd_packing = sja1105_vl_lookup_cmd_packing,
+		.cmd_packing = sja1105pqrs_vl_lookup_cmd_packing,
 		.access = (OP_READ | OP_WRITE),
 		.max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT,
 		.packed_size = SJA1105PQRS_SIZE_VL_LOOKUP_DYN_CMD,
-- 
GitLab


From dc596e3fe63f88e3d1e509f64e7f761cd4135538 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:23 +0300
Subject: [PATCH 1476/3804] net: dsa: sja1105: call dsa_unregister_switch when
 allocating memory fails

Unlike other drivers which pretty much end their .probe() execution with
dsa_register_switch(), the sja1105 does some extra stuff. When that
fails with -ENOMEM, the driver is quick to return that, forgetting to
call dsa_unregister_switch(). Not critical, but a bug nonetheless.

Fixes: 4d7525085a9b ("net: dsa: sja1105: offload the Credit-Based Shaper qdisc")
Fixes: a68578c20a96 ("net: dsa: Make deferred_xmit private to sja1105")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 405024b637d6c..2248152b4836c 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3646,8 +3646,10 @@ static int sja1105_probe(struct spi_device *spi)
 		priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers,
 					 sizeof(struct sja1105_cbs_entry),
 					 GFP_KERNEL);
-		if (!priv->cbs)
-			return -ENOMEM;
+		if (!priv->cbs) {
+			rc = -ENOMEM;
+			goto out_unregister_switch;
+		}
 	}
 
 	/* Connections between dsa_port and sja1105_port */
@@ -3672,7 +3674,7 @@ static int sja1105_probe(struct spi_device *spi)
 			dev_err(ds->dev,
 				"failed to create deferred xmit thread: %d\n",
 				rc);
-			goto out;
+			goto out_destroy_workers;
 		}
 		skb_queue_head_init(&sp->xmit_queue);
 		sp->xmit_tpid = ETH_P_SJA1105;
@@ -3682,7 +3684,8 @@ static int sja1105_probe(struct spi_device *spi)
 	}
 
 	return 0;
-out:
+
+out_destroy_workers:
 	while (port-- > 0) {
 		struct sja1105_port *sp = &priv->ports[port];
 
@@ -3691,6 +3694,10 @@ out:
 
 		kthread_destroy_worker(sp->xmit_worker);
 	}
+
+out_unregister_switch:
+	dsa_unregister_switch(ds);
+
 	return rc;
 }
 
-- 
GitLab


From cec279a898a3b004411682f212215ccaea1cd0fb Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:24 +0300
Subject: [PATCH 1477/3804] net: dsa: sja1105: add error handling in
 sja1105_setup()

If any of sja1105_static_config_load(), sja1105_clocking_setup() or
sja1105_devlink_setup() fails, we can't just return in the middle of
sja1105_setup() or memory will leak. Add a cleanup path.

Fixes: 0a7bdbc23d8a ("net: dsa: sja1105: move devlink param code to sja1105_devlink.c")
Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port L2 switch")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 2248152b4836c..c7a1be8bbddfc 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2976,13 +2976,13 @@ static int sja1105_setup(struct dsa_switch *ds)
 	rc = sja1105_static_config_load(priv, ports);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to load static config: %d\n", rc);
-		return rc;
+		goto out_ptp_clock_unregister;
 	}
 	/* Configure the CGU (PHY link modes and speeds) */
 	rc = sja1105_clocking_setup(priv);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to configure MII clocking: %d\n", rc);
-		return rc;
+		goto out_static_config_free;
 	}
 	/* On SJA1105, VLAN filtering per se is always enabled in hardware.
 	 * The only thing we can do to disable it is lie about what the 802.1Q
@@ -3003,7 +3003,7 @@ static int sja1105_setup(struct dsa_switch *ds)
 
 	rc = sja1105_devlink_setup(ds);
 	if (rc < 0)
-		return rc;
+		goto out_static_config_free;
 
 	/* The DSA/switchdev model brings up switch ports in standalone mode by
 	 * default, and that means vlan_filtering is 0 since they're not under
@@ -3012,6 +3012,17 @@ static int sja1105_setup(struct dsa_switch *ds)
 	rtnl_lock();
 	rc = sja1105_setup_8021q_tagging(ds, true);
 	rtnl_unlock();
+	if (rc)
+		goto out_devlink_teardown;
+
+	return 0;
+
+out_devlink_teardown:
+	sja1105_devlink_teardown(ds);
+out_ptp_clock_unregister:
+	sja1105_ptp_clock_unregister(ds);
+out_static_config_free:
+	sja1105_static_config_free(&priv->static_config);
 
 	return rc;
 }
-- 
GitLab


From 6729188d2646709941903052e4b78e1d82c239b9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:25 +0300
Subject: [PATCH 1478/3804] net: dsa: sja1105: error out on unsupported PHY
 mode

The driver continues probing when a port is configured for an
unsupported PHY interface type, instead it should stop.

Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port L2 switch")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index c7a1be8bbddfc..7f7e0424a4427 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -207,6 +207,7 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv,
 		default:
 			dev_err(dev, "Unsupported PHY mode %s!\n",
 				phy_modes(ports[i].phy_mode));
+			return -EINVAL;
 		}
 
 		/* Even though the SerDes port is able to drive SGMII autoneg
-- 
GitLab


From ed040abca4c1db72dfd3b8483b6ed6bfb7c2571e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:26 +0300
Subject: [PATCH 1479/3804] net: dsa: sja1105: use 4095 as the private VLAN for
 untagged traffic

One thing became visible when writing the blamed commit, and that was
that STP and PTP frames injected by net/dsa/tag_sja1105.c using the
deferred xmit mechanism are always classified to the pvid of the CPU
port, regardless of whatever VLAN there might be in these packets.

So a decision needed to be taken regarding the mechanism through which
we should ensure that delivery of STP and PTP traffic is possible when
we are in a VLAN awareness mode that involves tag_8021q. This is because
tag_8021q is not concerned with managing the pvid of the CPU port, since
as far as tag_8021q is concerned, no traffic should be sent as untagged
from the CPU port. So we end up not actually having a pvid on the CPU
port if we only listen to tag_8021q, and unless we do something about it.

The decision taken at the time was to keep VLAN 1 in the list of
priv->dsa_8021q_vlans, and make it a pvid of the CPU port. This ensures
that STP and PTP frames can always be sent to the outside world.

However there is a problem. If we do the following while we are in
the best_effort_vlan_filtering=true mode:

ip link add br0 type bridge vlan_filtering 1
ip link set swp2 master br0
bridge vlan del dev swp2 vid 1

Then untagged and pvid-tagged frames should be dropped. But we observe
that they aren't, and this is because of the precaution we took that VID
1 is always installed on all ports.

So clearly VLAN 1 is not good for this purpose. What about VLAN 0?
Well, VLAN 0 is managed by the 8021q module, and that module wants to
ensure that 802.1p tagged frames are always received by a port, and are
always transmitted as VLAN-tagged (with VLAN ID 0). Whereas we want our
STP and PTP frames to be untagged if the stack sent them as untagged -
we don't want the driver to just decide out of the blue that it adds
VID 0 to some packets.

So what to do?

Well, there is one other VLAN that is reserved, and that is 4095:
$ ip link add link swp2 name swp2.4095 type vlan id 4095
Error: 8021q: Invalid VLAN id.
$ bridge vlan add dev swp2 vid 4095
Error: bridge: Vlan id is invalid.

After we made this change, VLAN 1 is indeed forwarded and/or dropped
according to the bridge VLAN table, there are no further alterations
done by the sja1105 driver.

Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 7f7e0424a4427..dffa7dd838770 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -26,6 +26,7 @@
 #include "sja1105_tas.h"
 
 #define SJA1105_UNKNOWN_MULTICAST	0x010000000000ull
+#define SJA1105_DEFAULT_VLAN		(VLAN_N_VID - 1)
 
 static const struct dsa_switch_ops sja1105_switch_ops;
 
@@ -322,6 +323,13 @@ static int sja1105_init_l2_lookup_params(struct sja1105_private *priv)
 	return 0;
 }
 
+/* Set up a default VLAN for untagged traffic injected from the CPU
+ * using management routes (e.g. STP, PTP) as opposed to tag_8021q.
+ * All DT-defined ports are members of this VLAN, and there are no
+ * restrictions on forwarding (since the CPU selects the destination).
+ * Frames from this VLAN will always be transmitted as untagged, and
+ * neither the bridge nor the 8021q module cannot create this VLAN ID.
+ */
 static int sja1105_init_static_vlan(struct sja1105_private *priv)
 {
 	struct sja1105_table *table;
@@ -331,17 +339,13 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 		.vmemb_port = 0,
 		.vlan_bc = 0,
 		.tag_port = 0,
-		.vlanid = 1,
+		.vlanid = SJA1105_DEFAULT_VLAN,
 	};
 	struct dsa_switch *ds = priv->ds;
 	int port;
 
 	table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
 
-	/* The static VLAN table will only contain the initial pvid of 1.
-	 * All other VLANs are to be configured through dynamic entries,
-	 * and kept in the static configuration table as backing memory.
-	 */
 	if (table->entry_count) {
 		kfree(table->entries);
 		table->entry_count = 0;
@@ -354,9 +358,6 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 
 	table->entry_count = 1;
 
-	/* VLAN 1: all DT-defined ports are members; no restrictions on
-	 * forwarding; always transmit as untagged.
-	 */
 	for (port = 0; port < ds->num_ports; port++) {
 		struct sja1105_bridge_vlan *v;
 
@@ -367,15 +368,12 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 		pvid.vlan_bc |= BIT(port);
 		pvid.tag_port &= ~BIT(port);
 
-		/* Let traffic that don't need dsa_8021q (e.g. STP, PTP) be
-		 * transmitted as untagged.
-		 */
 		v = kzalloc(sizeof(*v), GFP_KERNEL);
 		if (!v)
 			return -ENOMEM;
 
 		v->port = port;
-		v->vid = 1;
+		v->vid = SJA1105_DEFAULT_VLAN;
 		v->untagged = true;
 		if (dsa_is_cpu_port(ds, port))
 			v->pvid = true;
-- 
GitLab


From b38e659de966a122fe2cb178c1e39c9bea06bc62 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 24 May 2021 12:25:27 +0300
Subject: [PATCH 1480/3804] net: dsa: sja1105: update existing VLANs from the
 bridge VLAN list

When running this sequence of operations:

ip link add br0 type bridge vlan_filtering 1
ip link set swp4 master br0
bridge vlan add dev swp4 vid 1

We observe the traffic sent on swp4 is still untagged, even though the
bridge has overwritten the existing VLAN entry:

port    vlan ids
swp4     1 PVID

br0      1 PVID Egress Untagged

This happens because we didn't consider that the 'bridge vlan add'
command just overwrites VLANs like it's nothing. We treat the 'vid 1
pvid untagged' and the 'vid 1' as two separate VLANs, and the first
still has precedence when calling sja1105_build_vlan_table. Obviously
there is a disagreement regarding semantics, and we end up doing
something unexpected from the PoV of the bridge.

Let's actually consider an "existing VLAN" to be one which is on the
same port, and has the same VLAN ID, as one we already have, and update
it if it has different flags than we do.

The first blamed commit is the one introducing the bug, the second one
is the latest on top of which the bugfix still applies.

Fixes: ec5ae61076d0 ("net: dsa: sja1105: save/restore VLANs using a delta commit method")
Fixes: 5899ee367ab3 ("net: dsa: tag_8021q: add a context structure")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index dffa7dd838770..b88d9ef45a1f1 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2816,11 +2816,22 @@ static int sja1105_vlan_add_one(struct dsa_switch *ds, int port, u16 vid,
 	bool pvid = flags & BRIDGE_VLAN_INFO_PVID;
 	struct sja1105_bridge_vlan *v;
 
-	list_for_each_entry(v, vlan_list, list)
-		if (v->port == port && v->vid == vid &&
-		    v->untagged == untagged && v->pvid == pvid)
+	list_for_each_entry(v, vlan_list, list) {
+		if (v->port == port && v->vid == vid) {
 			/* Already added */
-			return 0;
+			if (v->untagged == untagged && v->pvid == pvid)
+				/* Nothing changed */
+				return 0;
+
+			/* It's the same VLAN, but some of the flags changed
+			 * and the user did not bother to delete it first.
+			 * Update it and trigger sja1105_build_vlan_table.
+			 */
+			v->untagged = untagged;
+			v->pvid = pvid;
+			return 1;
+		}
+	}
 
 	v = kzalloc(sizeof(*v), GFP_KERNEL);
 	if (!v) {
-- 
GitLab


From 8d84733dee9f7dfa7a323c59e9bd61b2a83bee3f Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Mon, 24 May 2021 18:26:03 +0800
Subject: [PATCH 1481/3804] thermal/ti-soc-thermal: Fix kernel-doc

Fix function name in ti-bandgap.c kernel-doc comment
to remove a warning.

drivers/thermal/ti-soc-thermal/ti-bandgap.c:787: warning: expecting
prototype for ti_bandgap_alert_init(). Prototype was for
ti_bandgap_talert_init() instead.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Acked-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/1621851963-36548-1-git-send-email-yang.lee@linux.alibaba.com
---
 drivers/thermal/ti-soc-thermal/ti-bandgap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
index ebe7cb70bfb63..ea0603b59309f 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
@@ -770,7 +770,7 @@ static int ti_bandgap_tshut_init(struct ti_bandgap *bgp,
 }
 
 /**
- * ti_bandgap_alert_init() - setup and initialize talert handling
+ * ti_bandgap_talert_init() - setup and initialize talert handling
  * @bgp: pointer to struct ti_bandgap
  * @pdev: pointer to device struct platform_device
  *
-- 
GitLab


From a4dd4fc6105e54393d637450a11d4cddb5fabc4f Mon Sep 17 00:00:00 2001
From: Saubhik Mukherjee <saubhik.mukherjee@gmail.com>
Date: Mon, 24 May 2021 19:07:12 +0530
Subject: [PATCH 1482/3804] net: appletalk: cops: Fix data race in cops_probe1

In cops_probe1(), there is a write to dev->base_addr after requesting an
interrupt line and registering the interrupt handler cops_interrupt().
The handler might be called in parallel to handle an interrupt.
cops_interrupt() tries to read dev->base_addr leading to a potential
data race. So write to dev->base_addr before calling request_irq().

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Saubhik Mukherjee <saubhik.mukherjee@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/appletalk/cops.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index ba8e70a8e3125..6b12ce822e51a 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -327,6 +327,8 @@ static int __init cops_probe1(struct net_device *dev, int ioaddr)
 			break;
 	}
 
+	dev->base_addr = ioaddr;
+
 	/* Reserve any actual interrupt. */
 	if (dev->irq) {
 		retval = request_irq(dev->irq, cops_interrupt, 0, dev->name, dev);
@@ -334,8 +336,6 @@ static int __init cops_probe1(struct net_device *dev, int ioaddr)
 			goto err_out;
 	}
 
-	dev->base_addr = ioaddr;
-
         lp = netdev_priv(dev);
         spin_lock_init(&lp->lock);
 
-- 
GitLab


From 48b491a5cc74333c4a6a82fe21cea42c055a3b0b Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Mon, 24 May 2021 13:50:54 -0500
Subject: [PATCH 1483/3804] net: hsr: fix mac_len checks

Commit 2e9f60932a2c ("net: hsr: check skb can contain struct hsr_ethhdr
in fill_frame_info") added the following which resulted in -EINVAL
always being returned:
	if (skb->mac_len < sizeof(struct hsr_ethhdr))
		return -EINVAL;

mac_len was not being set correctly so this check completely broke
HSR/PRP since it was always 14, not 20.

Set mac_len correctly and modify the mac_len checks to test in the
correct places since sometimes it is legitimately 14.

Fixes: 2e9f60932a2c ("net: hsr: check skb can contain struct hsr_ethhdr in fill_frame_info")
Signed-off-by: George McCollister <george.mccollister@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c  |  2 ++
 net/hsr/hsr_forward.c | 30 +++++++++++++++++++++---------
 net/hsr/hsr_forward.h |  8 ++++----
 net/hsr/hsr_main.h    |  4 ++--
 net/hsr/hsr_slave.c   | 11 +++++------
 5 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index bfcdc75fc01e6..26c32407f0290 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -218,6 +218,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (master) {
 		skb->dev = master->dev;
 		skb_reset_mac_header(skb);
+		skb_reset_mac_len(skb);
 		hsr_forward_skb(skb, master);
 	} else {
 		atomic_long_inc(&dev->tx_dropped);
@@ -259,6 +260,7 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master)
 		goto out;
 
 	skb_reset_mac_header(skb);
+	skb_reset_mac_len(skb);
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 6852e9bccf5b8..ceb8afb2a62f4 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -474,8 +474,8 @@ static void handle_std_frame(struct sk_buff *skb,
 	}
 }
 
-void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
-			 struct hsr_frame_info *frame)
+int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
+			struct hsr_frame_info *frame)
 {
 	struct hsr_port *port = frame->port_rcv;
 	struct hsr_priv *hsr = port->hsr;
@@ -483,20 +483,26 @@ void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
 	/* HSRv0 supervisory frames double as a tag so treat them as tagged. */
 	if ((!hsr->prot_version && proto == htons(ETH_P_PRP)) ||
 	    proto == htons(ETH_P_HSR)) {
+		/* Check if skb contains hsr_ethhdr */
+		if (skb->mac_len < sizeof(struct hsr_ethhdr))
+			return -EINVAL;
+
 		/* HSR tagged frame :- Data or Supervision */
 		frame->skb_std = NULL;
 		frame->skb_prp = NULL;
 		frame->skb_hsr = skb;
 		frame->sequence_nr = hsr_get_skb_sequence_nr(skb);
-		return;
+		return 0;
 	}
 
 	/* Standard frame or PRP from master port */
 	handle_std_frame(skb, frame);
+
+	return 0;
 }
 
-void prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
-			 struct hsr_frame_info *frame)
+int prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
+			struct hsr_frame_info *frame)
 {
 	/* Supervision frame */
 	struct prp_rct *rct = skb_get_PRP_rct(skb);
@@ -507,9 +513,11 @@ void prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
 		frame->skb_std = NULL;
 		frame->skb_prp = skb;
 		frame->sequence_nr = prp_get_skb_sequence_nr(rct);
-		return;
+		return 0;
 	}
 	handle_std_frame(skb, frame);
+
+	return 0;
 }
 
 static int fill_frame_info(struct hsr_frame_info *frame,
@@ -519,9 +527,10 @@ static int fill_frame_info(struct hsr_frame_info *frame,
 	struct hsr_vlan_ethhdr *vlan_hdr;
 	struct ethhdr *ethhdr;
 	__be16 proto;
+	int ret;
 
-	/* Check if skb contains hsr_ethhdr */
-	if (skb->mac_len < sizeof(struct hsr_ethhdr))
+	/* Check if skb contains ethhdr */
+	if (skb->mac_len < sizeof(struct ethhdr))
 		return -EINVAL;
 
 	memset(frame, 0, sizeof(*frame));
@@ -548,7 +557,10 @@ static int fill_frame_info(struct hsr_frame_info *frame,
 
 	frame->is_from_san = false;
 	frame->port_rcv = port;
-	hsr->proto_ops->fill_frame_info(proto, skb, frame);
+	ret = hsr->proto_ops->fill_frame_info(proto, skb, frame);
+	if (ret)
+		return ret;
+
 	check_local_dest(port->hsr, skb, frame);
 
 	return 0;
diff --git a/net/hsr/hsr_forward.h b/net/hsr/hsr_forward.h
index b6acaafa83fc2..206636750b300 100644
--- a/net/hsr/hsr_forward.h
+++ b/net/hsr/hsr_forward.h
@@ -24,8 +24,8 @@ struct sk_buff *prp_get_untagged_frame(struct hsr_frame_info *frame,
 				       struct hsr_port *port);
 bool prp_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port);
 bool hsr_drop_frame(struct hsr_frame_info *frame, struct hsr_port *port);
-void prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
-			 struct hsr_frame_info *frame);
-void hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
-			 struct hsr_frame_info *frame);
+int prp_fill_frame_info(__be16 proto, struct sk_buff *skb,
+			struct hsr_frame_info *frame);
+int hsr_fill_frame_info(__be16 proto, struct sk_buff *skb,
+			struct hsr_frame_info *frame);
 #endif /* __HSR_FORWARD_H */
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 8f264672b70bd..53d1f7a824630 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -186,8 +186,8 @@ struct hsr_proto_ops {
 					       struct hsr_port *port);
 	struct sk_buff * (*create_tagged_frame)(struct hsr_frame_info *frame,
 						struct hsr_port *port);
-	void (*fill_frame_info)(__be16 proto, struct sk_buff *skb,
-				struct hsr_frame_info *frame);
+	int (*fill_frame_info)(__be16 proto, struct sk_buff *skb,
+			       struct hsr_frame_info *frame);
 	bool (*invalid_dan_ingress_frame)(__be16 protocol);
 	void (*update_san_info)(struct hsr_node *node, bool is_sup);
 };
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index c5227d42faf56..b70e6bbf6021f 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -60,12 +60,11 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb)
 		goto finish_pass;
 
 	skb_push(skb, ETH_HLEN);
-
-	if (skb_mac_header(skb) != skb->data) {
-		WARN_ONCE(1, "%s:%d: Malformed frame at source port %s)\n",
-			  __func__, __LINE__, port->dev->name);
-		goto finish_consume;
-	}
+	skb_reset_mac_header(skb);
+	if ((!hsr->prot_version && protocol == htons(ETH_P_PRP)) ||
+	    protocol == htons(ETH_P_HSR))
+		skb_set_network_header(skb, ETH_HLEN + HSR_HLEN);
+	skb_reset_mac_len(skb);
 
 	hsr_forward_skb(skb, port);
 
-- 
GitLab


From 46a8b29c6306d8bbfd92b614ef65a47c900d8e70 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Mon, 24 May 2021 23:02:08 +0300
Subject: [PATCH 1484/3804] net: usb: fix memory leak in smsc75xx_bind

Syzbot reported memory leak in smsc75xx_bind().
The problem was is non-freed memory in case of
errors after memory allocation.

backtrace:
  [<ffffffff84245b62>] kmalloc include/linux/slab.h:556 [inline]
  [<ffffffff84245b62>] kzalloc include/linux/slab.h:686 [inline]
  [<ffffffff84245b62>] smsc75xx_bind+0x7a/0x334 drivers/net/usb/smsc75xx.c:1460
  [<ffffffff82b5b2e6>] usbnet_probe+0x3b6/0xc30 drivers/net/usb/usbnet.c:1728

Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver")
Cc: stable@kernel.vger.org
Reported-and-tested-by: syzbot+b558506ba8165425fee2@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc75xx.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index f8cdabb9ef5a4..b286993da67c9 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1483,7 +1483,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	ret = smsc75xx_wait_ready(dev, 0);
 	if (ret < 0) {
 		netdev_warn(dev->net, "device not ready in smsc75xx_bind\n");
-		return ret;
+		goto err;
 	}
 
 	smsc75xx_init_mac_address(dev);
@@ -1492,7 +1492,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	ret = smsc75xx_reset(dev);
 	if (ret < 0) {
 		netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret);
-		return ret;
+		goto err;
 	}
 
 	dev->net->netdev_ops = &smsc75xx_netdev_ops;
@@ -1502,6 +1502,10 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
 	dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE;
 	return 0;
+
+err:
+	kfree(pdata);
+	return ret;
 }
 
 static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
-- 
GitLab


From 8c42a49738f16af0061f9ae5c2f5a955f268d9e3 Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Mon, 24 May 2021 15:29:53 -0500
Subject: [PATCH 1485/3804] net: dsa: microchip: enable phy errata workaround
 on 9567

Also enable phy errata workaround on 9567 since has the same errata as
the 9477 according to the manufacture's documentation.

Signed-off-by: George McCollister <george.mccollister@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz9477.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 55e5d479acce3..854e25f43fa70 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -1530,6 +1530,7 @@ static const struct ksz_chip_data ksz9477_switch_chips[] = {
 		.num_statics = 16,
 		.cpu_ports = 0x7F,	/* can be configured as cpu port */
 		.port_cnt = 7,		/* total physical port count */
+		.phy_errata_9477 = true,
 	},
 };
 
-- 
GitLab


From 1cb61759d40716643281b8e0f8c7afebc8699249 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 21 May 2021 09:26:10 +0200
Subject: [PATCH 1486/3804] init: verify that function is initcall_t at
 compile-time

In the spirit of making it hard to misuse an interface, add a
compile-time assertion in the CONFIG_HAVE_ARCH_PREL32_RELOCATIONS case
to verify the initcall function matches initcall_t, because the inline
asm bypasses any type-checking the compiler would otherwise do. This
will help developers catch incorrect API use in all configurations.

A recent example of this is:
https://lkml.kernel.org/r/20210514140015.2944744-1-arnd@kernel.org

Signed-off-by: Marco Elver <elver@google.com>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210521072610.2880286-1-elver@google.com
---
 include/linux/init.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index 045ad1650ed16..d82b4b2e1d25d 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -242,7 +242,8 @@ extern bool initcall_debug;
 	asm(".section	\"" __sec "\", \"a\"		\n"	\
 	    __stringify(__name) ":			\n"	\
 	    ".long	" __stringify(__stub) " - .	\n"	\
-	    ".previous					\n");
+	    ".previous					\n");	\
+	static_assert(__same_type(initcall_t, &fn));
 #else
 #define ____define_initcall(fn, __unused, __name, __sec)	\
 	static initcall_t __name __used 			\
-- 
GitLab


From 24845dcb170e16b3100bd49743687648c71387ae Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 11 Mar 2021 17:09:41 -0800
Subject: [PATCH 1487/3804] Makefile: LTO: have linker check
 -Wframe-larger-than

-Wframe-larger-than= requires stack frame information, which the
frontend cannot provide. This diagnostic is emitted late during
compilation once stack frame size is available.

When building with LTO, the frontend simply lowers C to LLVM IR and does
not have stack frame information, so it cannot emit this diagnostic.
When the linker drives LTO, it restarts optimizations and lowers LLVM IR
to object code. At that point, it has stack frame information but
doesn't know to check for a specific max stack frame size.

I consider this a bug in LLVM that we need to fix. There are some
details we're working out related to LTO such as which value to use when
there are multiple different values specified per TU, or how to
propagate these to compiler synthesized routines properly, if at all.

Until it's fixed, ensure we don't miss these. At that point we can wrap
this in a compiler version guard or revert this based on the minimum
support version of Clang.

The error message is not generated during link:
  LTO     vmlinux.o
ld.lld: warning: stack size limit exceeded (8224) in foobarbaz

Cc: Sami Tolvanen <samitolvanen@google.com>
Reported-by: Candle Sun <candlesea@gmail.com>
Suggested-by: Fangrui Song <maskray@google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210312010942.1546679-1-ndesaulniers@google.com
---
 Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Makefile b/Makefile
index 0ed7e061c8e9e..90d1e1d7b927f 100644
--- a/Makefile
+++ b/Makefile
@@ -928,6 +928,11 @@ CC_FLAGS_LTO	+= -fvisibility=hidden
 
 # Limit inlining across translation units to reduce binary size
 KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
+
+# Check for frame size exceeding threshold during prolog/epilog insertion.
+ifneq ($(CONFIG_FRAME_WARN),0)
+KBUILD_LDFLAGS	+= -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN)
+endif
 endif
 
 ifdef CONFIG_LTO
-- 
GitLab


From d6eef886903c4bb5af41b9a31d4ba11dc7a6f8e8 Mon Sep 17 00:00:00 2001
From: Sanket Parmar <sparmar@cadence.com>
Date: Mon, 17 May 2021 17:05:12 +0200
Subject: [PATCH 1488/3804] usb: cdns3: Enable TDL_CHK only for OUT ep

ZLP gets stuck if TDL_CHK bit is set and TDL_FROM_TRB is used
as TDL source for IN endpoints. To fix it, TDL_CHK is only
enabled for OUT endpoints.

Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
Reported-by: Aswath Govindraju <a-govindraju@ti.com>
Signed-off-by: Sanket Parmar <sparmar@cadence.com>
Link: https://lore.kernel.org/r/1621263912-13175-1-git-send-email-sparmar@cadence.com
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/cdns3/cdns3-gadget.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
index a8b7b50abf645..5281f8d3fb3d1 100644
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -2007,7 +2007,7 @@ static void cdns3_configure_dmult(struct cdns3_device *priv_dev,
 		else
 			mask = BIT(priv_ep->num);
 
-		if (priv_ep->type != USB_ENDPOINT_XFER_ISOC) {
+		if (priv_ep->type != USB_ENDPOINT_XFER_ISOC  && !priv_ep->dir) {
 			cdns3_set_register_bit(&regs->tdl_from_trb, mask);
 			cdns3_set_register_bit(&regs->tdl_beh, mask);
 			cdns3_set_register_bit(&regs->tdl_beh2, mask);
@@ -2046,15 +2046,13 @@ int cdns3_ep_config(struct cdns3_endpoint *priv_ep, bool enable)
 	case USB_ENDPOINT_XFER_INT:
 		ep_cfg = EP_CFG_EPTYPE(USB_ENDPOINT_XFER_INT);
 
-		if ((priv_dev->dev_ver == DEV_VER_V2 && !priv_ep->dir) ||
-		    priv_dev->dev_ver > DEV_VER_V2)
+		if (priv_dev->dev_ver >= DEV_VER_V2 && !priv_ep->dir)
 			ep_cfg |= EP_CFG_TDL_CHK;
 		break;
 	case USB_ENDPOINT_XFER_BULK:
 		ep_cfg = EP_CFG_EPTYPE(USB_ENDPOINT_XFER_BULK);
 
-		if ((priv_dev->dev_ver == DEV_VER_V2  && !priv_ep->dir) ||
-		    priv_dev->dev_ver > DEV_VER_V2)
+		if (priv_dev->dev_ver >= DEV_VER_V2 && !priv_ep->dir)
 			ep_cfg |= EP_CFG_TDL_CHK;
 		break;
 	default:
-- 
GitLab


From 0f9342513cc78a31a4a272a19b35eee4e8cd7107 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Thu, 20 May 2021 17:15:49 -0700
Subject: [PATCH 1489/3804] xfs: check free AG space when making per-AG
 reservations

The new online shrink code exposed a gap in the per-AG reservation
code, which is that we only return ENOSPC to callers if the entire fs
doesn't have enough free blocks.  Except for debugging mode, the
reservation init code doesn't ever check that there's enough free space
in that AG to cover the reservation.

Not having enough space is not considered an immediate fatal error that
requires filesystem offlining because (a) it's shouldn't be possible to
wind up in that state through normal file operations and (b) even if
one did, freeing data blocks would recover the situation.

However, online shrink now needs to know if shrinking would not leave
enough space so that it can abort the shrink operation.  Hence we need
to promote this assertion into an actual error return.

Observed by running xfs/168 with a 1k block size, though in theory this
could happen with any configuration.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/xfs/libxfs/xfs_ag_resv.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index e32a1833d5231..bbfea8022a3b9 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -325,10 +325,22 @@ out:
 		error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0);
 		if (error2)
 			return error2;
-		ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
-		       xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
-		       pag->pagf_freeblks + pag->pagf_flcount);
+
+		/*
+		 * If there isn't enough space in the AG to satisfy the
+		 * reservation, let the caller know that there wasn't enough
+		 * space.  Callers are responsible for deciding what to do
+		 * next, since (in theory) we can stumble along with
+		 * insufficient reservation if data blocks are being freed to
+		 * replenish the AG's free space.
+		 */
+		if (!error &&
+		    xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
+		    xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved >
+		    pag->pagf_freeblks + pag->pagf_flcount)
+			error = -ENOSPC;
 	}
+
 	return error;
 }
 
-- 
GitLab


From 6b69e485894b355b333bd286f0f0958e41d8754a Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 12 May 2021 12:49:19 -0700
Subject: [PATCH 1490/3804] xfs: standardize extent size hint validation

While chasing a bug involving invalid extent size hints being propagated
into newly created realtime files, I noticed that the xfs_ioctl_setattr
checks for the extent size hints weren't the same as the ones now
encoded in libxfs and used for validation in repair and mkfs.

Because the checks in libxfs are more stringent than the ones in the
ioctl, it's possible for a live system to set inode flags that
immediately result in corruption warnings.  Specifically, it's possible
to set an extent size hint on an rtinherit directory without checking if
the hint is aligned to the realtime extent size, which makes no sense
since that combination is used only to seed new realtime files.

Replace the open-coded and inadequate checks with the libxfs verifier
versions and update the code comments a bit.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_inode_buf.c | 24 ++++++++--
 fs/xfs/xfs_ioctl.c            | 90 ++++++++---------------------------
 2 files changed, 41 insertions(+), 73 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 5c9a7440d9e4e..045118c7bf789 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -559,8 +559,17 @@ xfs_dinode_calc_crc(
 /*
  * Validate di_extsize hint.
  *
- * The rules are documented at xfs_ioctl_setattr_check_extsize().
- * These functions must be kept in sync with each other.
+ * 1. Extent size hint is only valid for directories and regular files.
+ * 2. FS_XFLAG_EXTSIZE is only valid for regular files.
+ * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
+ * 4. Hint cannot be larger than MAXTEXTLEN.
+ * 5. Can be changed on directories at any time.
+ * 6. Hint value of 0 turns off hints, clears inode flags.
+ * 7. Extent size must be a multiple of the appropriate block size.
+ *    For realtime files, this is the rt extent size.
+ * 8. For non-realtime files, the extent size hint must be limited
+ *    to half the AG size to avoid alignment extending the extent beyond the
+ *    limits of the AG.
  */
 xfs_failaddr_t
 xfs_inode_validate_extsize(
@@ -616,8 +625,15 @@ xfs_inode_validate_extsize(
 /*
  * Validate di_cowextsize hint.
  *
- * The rules are documented at xfs_ioctl_setattr_check_cowextsize().
- * These functions must be kept in sync with each other.
+ * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
+ *    The inode does not have to have any shared blocks, but it must be a v3.
+ * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
+ *    for a directory, the hint is propagated to new files.
+ * 3. Can be changed on files & directories at any time.
+ * 4. Hint value of 0 turns off hints, clears inode flags.
+ * 5. Extent size must be a multiple of the appropriate block size.
+ * 6. The extent size hint must be limited to half the AG size to avoid
+ *    alignment extending the extent beyond the limits of the AG.
  */
 xfs_failaddr_t
 xfs_inode_validate_cowextsize(
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3925bfcb23657..6407921aca961 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1267,20 +1267,8 @@ out_error:
 }
 
 /*
- * extent size hint validation is somewhat cumbersome. Rules are:
- *
- * 1. extent size hint is only valid for directories and regular files
- * 2. FS_XFLAG_EXTSIZE is only valid for regular files
- * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
- * 4. can only be changed on regular files if no extents are allocated
- * 5. can be changed on directories at any time
- * 6. extsize hint of 0 turns off hints, clears inode flags.
- * 7. Extent size must be a multiple of the appropriate block size.
- * 8. for non-realtime files, the extent size hint must be limited
- *    to half the AG size to avoid alignment extending the extent beyond the
- *    limits of the AG.
- *
- * Please keep this function in sync with xfs_scrub_inode_extsize.
+ * Validate a proposed extent size hint.  For regular files, the hint can only
+ * be changed if no extents are allocated.
  */
 static int
 xfs_ioctl_setattr_check_extsize(
@@ -1288,86 +1276,50 @@ xfs_ioctl_setattr_check_extsize(
 	struct fileattr		*fa)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	xfs_extlen_t		size;
-	xfs_fsblock_t		extsize_fsb;
+	xfs_failaddr_t		failaddr;
+	uint16_t		new_diflags;
 
 	if (!fa->fsx_valid)
 		return 0;
 
 	if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
-	    ((ip->i_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
+	    XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize)
 		return -EINVAL;
 
-	if (fa->fsx_extsize == 0)
-		return 0;
-
-	extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
-	if (extsize_fsb > MAXEXTLEN)
+	if (fa->fsx_extsize & mp->m_blockmask)
 		return -EINVAL;
 
-	if (XFS_IS_REALTIME_INODE(ip) ||
-	    (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
-		size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
-	} else {
-		size = mp->m_sb.sb_blocksize;
-		if (extsize_fsb > mp->m_sb.sb_agblocks / 2)
-			return -EINVAL;
-	}
-
-	if (fa->fsx_extsize % size)
-		return -EINVAL;
+	new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
 
-	return 0;
+	failaddr = xfs_inode_validate_extsize(ip->i_mount,
+			XFS_B_TO_FSB(mp, fa->fsx_extsize),
+			VFS_I(ip)->i_mode, new_diflags);
+	return failaddr != NULL ? -EINVAL : 0;
 }
 
-/*
- * CoW extent size hint validation rules are:
- *
- * 1. CoW extent size hint can only be set if reflink is enabled on the fs.
- *    The inode does not have to have any shared blocks, but it must be a v3.
- * 2. FS_XFLAG_COWEXTSIZE is only valid for directories and regular files;
- *    for a directory, the hint is propagated to new files.
- * 3. Can be changed on files & directories at any time.
- * 4. CoW extsize hint of 0 turns off hints, clears inode flags.
- * 5. Extent size must be a multiple of the appropriate block size.
- * 6. The extent size hint must be limited to half the AG size to avoid
- *    alignment extending the extent beyond the limits of the AG.
- *
- * Please keep this function in sync with xfs_scrub_inode_cowextsize.
- */
 static int
 xfs_ioctl_setattr_check_cowextsize(
 	struct xfs_inode	*ip,
 	struct fileattr		*fa)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	xfs_extlen_t		size;
-	xfs_fsblock_t		cowextsize_fsb;
+	xfs_failaddr_t		failaddr;
+	uint64_t		new_diflags2;
+	uint16_t		new_diflags;
 
 	if (!fa->fsx_valid)
 		return 0;
 
-	if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE))
-		return 0;
-
-	if (!xfs_sb_version_hasreflink(&ip->i_mount->m_sb))
-		return -EINVAL;
-
-	if (fa->fsx_cowextsize == 0)
-		return 0;
-
-	cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize);
-	if (cowextsize_fsb > MAXEXTLEN)
-		return -EINVAL;
-
-	size = mp->m_sb.sb_blocksize;
-	if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2)
+	if (fa->fsx_cowextsize & mp->m_blockmask)
 		return -EINVAL;
 
-	if (fa->fsx_cowextsize % size)
-		return -EINVAL;
+	new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
+	new_diflags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
 
-	return 0;
+	failaddr = xfs_inode_validate_cowextsize(ip->i_mount,
+			XFS_B_TO_FSB(mp, fa->fsx_cowextsize),
+			VFS_I(ip)->i_mode, new_diflags, new_diflags2);
+	return failaddr != NULL ? -EINVAL : 0;
 }
 
 static int
-- 
GitLab


From 603f000b15f21ce8932f76689c7aa9fe58261cf5 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 12 May 2021 12:51:26 -0700
Subject: [PATCH 1491/3804] xfs: validate extsz hints against rt extent size
 when rtinherit is set

The RTINHERIT bit can be set on a directory so that newly created
regular files will have the REALTIME bit set to store their data on the
realtime volume.  If an extent size hint (and EXTSZINHERIT) are set on
the directory, the hint will also be copied into the new file.

As pointed out in previous patches, for realtime files we require the
extent size hint be an integer multiple of the realtime extent, but we
don't perform the same validation on a directory with both RTINHERIT and
EXTSZINHERIT set, even though the only use-case of that combination is
to propagate extent size hints into new realtime files.  This leads to
inode corruption errors when the bad values are propagated.

Because there may be existing filesystems with such a configuration, we
cannot simply amend the inode verifier to trip on these directories and
call it a day because that will cause previously "working" filesystems
to start throwing errors abruptly.  Note that it's valid to have
directories with rtinherit set even if there is no realtime volume, in
which case the problem does not manifest because rtinherit is ignored if
there's no realtime device; and it's possible that someone set the flag,
crashed, repaired the filesystem (which clears the hint on the realtime
file) and continued.

Therefore, mitigate this issue in several ways: First, if we try to
write out an inode with both rtinherit/extszinherit set and an unaligned
extent size hint, turn off the hint to correct the error.  Second, if
someone tries to misconfigure a directory via the fssetxattr ioctl, fail
the ioctl.  Third, reverify both extent size hint values when we
propagate heritable inode attributes from parent to child, to prevent
misconfigurations from spreading.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/libxfs/xfs_inode_buf.c   | 22 ++++++++++++++++++++++
 fs/xfs/libxfs/xfs_trans_inode.c | 17 +++++++++++++++++
 fs/xfs/xfs_inode.c              | 29 +++++++++++++++++++++++++++++
 fs/xfs/xfs_ioctl.c              | 15 +++++++++++++++
 fs/xfs/xfs_message.h            |  2 ++
 5 files changed, 85 insertions(+)

diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 045118c7bf789..f3254a4f4cb4b 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -589,6 +589,28 @@ xfs_inode_validate_extsize(
 	inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT);
 	extsize_bytes = XFS_FSB_TO_B(mp, extsize);
 
+	/*
+	 * This comment describes a historic gap in this verifier function.
+	 *
+	 * On older kernels, the extent size hint verifier doesn't check that
+	 * the extent size hint is an integer multiple of the realtime extent
+	 * size on a directory with both RTINHERIT and EXTSZINHERIT flags set.
+	 * The verifier has always enforced the alignment rule for regular
+	 * files with the REALTIME flag set.
+	 *
+	 * If a directory with a misaligned extent size hint is allowed to
+	 * propagate that hint into a new regular realtime file, the result
+	 * is that the inode cluster buffer verifier will trigger a corruption
+	 * shutdown the next time it is run.
+	 *
+	 * Unfortunately, there could be filesystems with these misconfigured
+	 * directories in the wild, so we cannot add a check to this verifier
+	 * at this time because that will result a new source of directory
+	 * corruption errors when reading an existing filesystem.  Instead, we
+	 * permit the misconfiguration to pass through the verifiers so that
+	 * callers of this function can correct and mitigate externally.
+	 */
+
 	if (rt_flag)
 		blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
 	else
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 78324e043e257..8d595a5c4abd1 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -142,6 +142,23 @@ xfs_trans_log_inode(
 		flags |= XFS_ILOG_CORE;
 	}
 
+	/*
+	 * Inode verifiers on older kernels don't check that the extent size
+	 * hint is an integer multiple of the rt extent size on a directory
+	 * with both rtinherit and extszinherit flags set.  If we're logging a
+	 * directory that is misconfigured in this way, clear the hint.
+	 */
+	if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+	    (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
+	    (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
+		xfs_info_once(ip->i_mount,
+	"Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino);
+		ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+				   XFS_DIFLAG_EXTSZINHERIT);
+		ip->i_extsize = 0;
+		flags |= XFS_ILOG_CORE;
+	}
+
 	/*
 	 * Record the specific change for fdatasync optimisation. This allows
 	 * fdatasync to skip log forces for inodes that are only timestamp
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 0369eb22c1bb0..e4c2da4566f13 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -690,6 +690,7 @@ xfs_inode_inherit_flags(
 	const struct xfs_inode	*pip)
 {
 	unsigned int		di_flags = 0;
+	xfs_failaddr_t		failaddr;
 	umode_t			mode = VFS_I(ip)->i_mode;
 
 	if (S_ISDIR(mode)) {
@@ -729,6 +730,24 @@ xfs_inode_inherit_flags(
 		di_flags |= XFS_DIFLAG_FILESTREAM;
 
 	ip->i_diflags |= di_flags;
+
+	/*
+	 * Inode verifiers on older kernels only check that the extent size
+	 * hint is an integer multiple of the rt extent size on realtime files.
+	 * They did not check the hint alignment on a directory with both
+	 * rtinherit and extszinherit flags set.  If the misaligned hint is
+	 * propagated from a directory into a new realtime file, new file
+	 * allocations will fail due to math errors in the rt allocator and/or
+	 * trip the verifiers.  Validate the hint settings in the new file so
+	 * that we don't let broken hints propagate.
+	 */
+	failaddr = xfs_inode_validate_extsize(ip->i_mount, ip->i_extsize,
+			VFS_I(ip)->i_mode, ip->i_diflags);
+	if (failaddr) {
+		ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
+				   XFS_DIFLAG_EXTSZINHERIT);
+		ip->i_extsize = 0;
+	}
 }
 
 /* Propagate di_flags2 from a parent inode to a child inode. */
@@ -737,12 +756,22 @@ xfs_inode_inherit_flags2(
 	struct xfs_inode	*ip,
 	const struct xfs_inode	*pip)
 {
+	xfs_failaddr_t		failaddr;
+
 	if (pip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) {
 		ip->i_diflags2 |= XFS_DIFLAG2_COWEXTSIZE;
 		ip->i_cowextsize = pip->i_cowextsize;
 	}
 	if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
 		ip->i_diflags2 |= XFS_DIFLAG2_DAX;
+
+	/* Don't let invalid cowextsize hints propagate. */
+	failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
+			VFS_I(ip)->i_mode, ip->i_diflags, ip->i_diflags2);
+	if (failaddr) {
+		ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE;
+		ip->i_cowextsize = 0;
+	}
 }
 
 /*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6407921aca961..1fe4c1fc0aeae 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1291,6 +1291,21 @@ xfs_ioctl_setattr_check_extsize(
 
 	new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
 
+	/*
+	 * Inode verifiers on older kernels don't check that the extent size
+	 * hint is an integer multiple of the rt extent size on a directory
+	 * with both rtinherit and extszinherit flags set.  Don't let sysadmins
+	 * misconfigure directories.
+	 */
+	if ((new_diflags & XFS_DIFLAG_RTINHERIT) &&
+	    (new_diflags & XFS_DIFLAG_EXTSZINHERIT)) {
+		unsigned int	rtextsize_bytes;
+
+		rtextsize_bytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize);
+		if (fa->fsx_extsize % rtextsize_bytes)
+			return -EINVAL;
+	}
+
 	failaddr = xfs_inode_validate_extsize(ip->i_mount,
 			XFS_B_TO_FSB(mp, fa->fsx_extsize),
 			VFS_I(ip)->i_mode, new_diflags);
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 3c392b1512ac0..7ec1a9207517f 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -73,6 +73,8 @@ do {									\
 	xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__)
 #define xfs_notice_once(dev, fmt, ...)				\
 	xfs_printk_once(xfs_notice, dev, fmt, ##__VA_ARGS__)
+#define xfs_info_once(dev, fmt, ...)				\
+	xfs_printk_once(xfs_info, dev, fmt, ##__VA_ARGS__)
 
 void assfail(struct xfs_mount *mp, char *expr, char *f, int l);
 void asswarn(struct xfs_mount *mp, char *expr, char *f, int l);
-- 
GitLab


From eb8dbe80326c3d44c1e38ee4f40e0d8d3e06f2d0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 24 May 2021 11:17:05 +0200
Subject: [PATCH 1492/3804] USB: serial: quatech2: fix control-request
 directions

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Fix the three requests which erroneously used usb_rcvctrlpipe().

Fixes: f7a33e608d9a ("USB: serial: add quatech2 usb to serial driver")
Cc: stable@vger.kernel.org      # 3.5
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/quatech2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c
index 5f2e7f668e687..067690dac24ca 100644
--- a/drivers/usb/serial/quatech2.c
+++ b/drivers/usb/serial/quatech2.c
@@ -416,7 +416,7 @@ static void qt2_close(struct usb_serial_port *port)
 
 	/* flush the port transmit buffer */
 	i = usb_control_msg(serial->dev,
-			    usb_rcvctrlpipe(serial->dev, 0),
+			    usb_sndctrlpipe(serial->dev, 0),
 			    QT2_FLUSH_DEVICE, 0x40, 1,
 			    port_priv->device_port, NULL, 0, QT2_USB_TIMEOUT);
 
@@ -426,7 +426,7 @@ static void qt2_close(struct usb_serial_port *port)
 
 	/* flush the port receive buffer */
 	i = usb_control_msg(serial->dev,
-			    usb_rcvctrlpipe(serial->dev, 0),
+			    usb_sndctrlpipe(serial->dev, 0),
 			    QT2_FLUSH_DEVICE, 0x40, 0,
 			    port_priv->device_port, NULL, 0, QT2_USB_TIMEOUT);
 
@@ -639,7 +639,7 @@ static int qt2_attach(struct usb_serial *serial)
 	int status;
 
 	/* power on unit */
-	status = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
+	status = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0),
 				 0xc2, 0x40, 0x8000, 0, NULL, 0,
 				 QT2_USB_TIMEOUT);
 	if (status < 0) {
-- 
GitLab


From c8692ad416dcc420ce1b403596a425c8f4c2720b Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 25 May 2021 09:08:23 +0300
Subject: [PATCH 1493/3804] bus: ti-sysc: Fix flakey idling of uarts and stop
 using swsup_sidle_act

Looks like the swsup_sidle_act quirk handling is unreliable for serial
ports. The serial ports just eventually stop idling until woken up and
re-idled again. As the serial port not idling blocks any deeper SoC idle
states, it's adds an annoying random flakeyness for power management.

Let's just switch to swsup_sidle quirk instead like we already do for
omap3 uarts. This means we manually idle the port instead of trying to
use the hardware autoidle features when not in use.

For more details on why the serial ports have been using swsup_idle_act,
see commit 66dde54e978a ("ARM: OMAP2+: hwmod-data: UART IP needs software
control to manage sidle modes"). It seems that the swsup_idle_act quirk
handling is not enough though, and for example the TI Android kernel
changed to using swsup_sidle with commit 77c34c84e1e0 ("OMAP4: HWMOD:
UART1: disable smart-idle.").

Fixes: b4a9a7a38917 ("bus: ti-sysc: Handle swsup idle mode quirks")
Cc: Carl Philipp Klemm <philipp@uvos.xyz>
Cc: Ivan Jelincic <parazyd@dyne.org>
Cc: Merlijn Wajer <merlijn@wizzup.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Sebastian Reichel <sre@kernel.org>
Cc: Sicelo A. Mhlongo <absicsz@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 4ff319863be2d..38cb116ed433f 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -1454,9 +1454,9 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_LEGACY_IDLE),
 	/* Uarts on omap4 and later */
 	SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x50411e03, 0xffff00ff,
-		   SYSC_QUIRK_SWSUP_SIDLE_ACT | SYSC_QUIRK_LEGACY_IDLE),
+		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_LEGACY_IDLE),
 	SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x47422e03, 0xffffffff,
-		   SYSC_QUIRK_SWSUP_SIDLE_ACT | SYSC_QUIRK_LEGACY_IDLE),
+		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_LEGACY_IDLE),
 
 	/* Quirks that need to be set based on the module address */
 	SYSC_QUIRK("mcpdm", 0x40132000, 0, 0x10, -ENODEV, 0x50000800, 0xffffffff,
-- 
GitLab


From fc0b3dc9a11771c3919eaaaf9d649138b095aa0f Mon Sep 17 00:00:00 2001
From: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Date: Sun, 23 May 2021 18:35:21 +0200
Subject: [PATCH 1494/3804] USB: serial: omninet: add device id for Zyxel Omni
 56K Plus

Add device id for Zyxel Omni 56K Plus modem, this modem include:

USB chip:
NetChip
NET2888

Main chip:
901041A
F721501APGF

Another modem using the same chips is the Zyxel Omni 56K DUO/NEO,
could be added with the right USB ID.

Signed-off-by: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/omninet.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 83c62f920c501..c2ece584724e9 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -26,6 +26,7 @@
 
 #define ZYXEL_VENDOR_ID		0x0586
 #define ZYXEL_OMNINET_ID	0x1000
+#define ZYXEL_OMNI_56K_PLUS_ID	0x1500
 /* This one seems to be a re-branded ZyXEL device */
 #define BT_IGNITIONPRO_ID	0x2000
 
@@ -40,6 +41,7 @@ static void omninet_port_remove(struct usb_serial_port *port);
 
 static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(ZYXEL_VENDOR_ID, ZYXEL_OMNINET_ID) },
+	{ USB_DEVICE(ZYXEL_VENDOR_ID, ZYXEL_OMNI_56K_PLUS_ID) },
 	{ USB_DEVICE(ZYXEL_VENDOR_ID, BT_IGNITIONPRO_ID) },
 	{ }						/* Terminating entry */
 };
-- 
GitLab


From 56df0c758aff7e5a7c59e2b255d1846f935b2cea Mon Sep 17 00:00:00 2001
From: Alexandre GRIVEAUX <agriveaux@deutnet.info>
Date: Sun, 23 May 2021 18:35:22 +0200
Subject: [PATCH 1495/3804] USB: serial: omninet: update driver description

With the inclusion of Omni 56K Plus, this driver seem to be more common
among the family of Zyxel omni modem. Update the driver and module
descriptions.

Signed-off-by: Alexandre GRIVEAUX <agriveaux@deutnet.info>
[ johan: amend commit message ]
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/omninet.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index c2ece584724e9..41f1b872d277b 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * USB ZyXEL omni.net LCD PLUS driver
+ * USB ZyXEL omni.net driver
  *
  * Copyright (C) 2013,2017 Johan Hovold <johan@kernel.org>
  *
@@ -22,7 +22,7 @@
 #include <linux/usb/serial.h>
 
 #define DRIVER_AUTHOR "Alessandro Zummo"
-#define DRIVER_DESC "USB ZyXEL omni.net LCD PLUS Driver"
+#define DRIVER_DESC "USB ZyXEL omni.net Driver"
 
 #define ZYXEL_VENDOR_ID		0x0586
 #define ZYXEL_OMNINET_ID	0x1000
@@ -52,7 +52,7 @@ static struct usb_serial_driver zyxel_omninet_device = {
 		.owner =	THIS_MODULE,
 		.name =		"omninet",
 	},
-	.description =		"ZyXEL - omni.net lcd plus usb",
+	.description =		"ZyXEL - omni.net usb",
 	.id_table =		id_table,
 	.num_bulk_out =		2,
 	.calc_num_ports =	omninet_calc_num_ports,
-- 
GitLab


From 3596a06583a16cf7f76d836440dfba5714c9c710 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <jgq516@gmail.com>
Date: Fri, 21 May 2021 15:32:39 +0800
Subject: [PATCH 1496/3804] nvme: fix potential memory leaks in nvme_cdev_add
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to call put_device if cdev_device_add failed, otherwise
kmemleak has below report.

[<0000000024c71758>] kmem_cache_alloc_trace+0x233/0x480
[<00000000ad2813ed>] device_add+0x7ff/0xe10
[<0000000035bc54c4>] cdev_device_add+0x72/0xa0
[<000000006c9aa1e8>] nvme_cdev_add+0xa9/0xf0 [nvme_core]
[<000000003c4d492d>] nvme_mpath_set_live+0x251/0x290 [nvme_core]
[<00000000889a58da>] nvme_mpath_add_disk+0x268/0x320 [nvme_core]
[<00000000192e7161>] nvme_alloc_ns+0x669/0xac0 [nvme_core]
[<000000007a1a6041>] nvme_validate_or_alloc_ns+0x156/0x280 [nvme_core]
[<000000003a763c35>] nvme_scan_work+0x221/0x3c0 [nvme_core]
[<000000009ff10706>] process_one_work+0x5cf/0xb10
[<000000000644ee25>] worker_thread+0x7a/0x680
[<00000000285ebd2f>] kthread+0x1c6/0x210
[<00000000e297c6ea>] ret_from_fork+0x22/0x30

Fixes: 2637baed7801 ("nvme: introduce generic per-namespace chardev")
Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn>
Reviewed-by: Javier González <javier.gonz@samsung.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 762125f2905f7..66973bb563055 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3485,8 +3485,10 @@ int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
 	cdev_init(cdev, fops);
 	cdev->owner = owner;
 	ret = cdev_device_add(cdev, cdev_device);
-	if (ret)
+	if (ret) {
+		put_device(cdev_device);
 		ida_simple_remove(&nvme_ns_chr_minor_ida, minor);
+	}
 	return ret;
 }
 
-- 
GitLab


From f25f8ef70ce2e85bae1a266dd5de714aefda81d2 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 21 May 2021 10:23:00 +0200
Subject: [PATCH 1497/3804] nvme-fc: short-circuit reconnect retries

Returning an nvme status from nvme_fc_create_association() indicates
that the association is established, and we should honour the DNR bit.
If it's set a reconnect attempt will just return the same error, so
we can short-circuit the reconnect attempts and fail the connection
directly.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 256e87721a01f..f183f9fa03d0e 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3107,6 +3107,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	if (ctrl->ctrl.icdoff) {
 		dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
 				ctrl->ctrl.icdoff);
+		ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 		goto out_disconnect_admin_queue;
 	}
 
@@ -3114,6 +3115,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 	if (!(ctrl->ctrl.sgls & ((1 << 0) | (1 << 1)))) {
 		dev_err(ctrl->ctrl.device,
 			"Mandatory sgls are not supported!\n");
+		ret = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 		goto out_disconnect_admin_queue;
 	}
 
@@ -3280,11 +3282,13 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING)
 		return;
 
-	if (portptr->port_state == FC_OBJSTATE_ONLINE)
+	if (portptr->port_state == FC_OBJSTATE_ONLINE) {
 		dev_info(ctrl->ctrl.device,
 			"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
 			ctrl->cnum, status);
-	else if (time_after_eq(jiffies, rport->dev_loss_end))
+		if (status > 0 && (status & NVME_SC_DNR))
+			recon = false;
+	} else if (time_after_eq(jiffies, rport->dev_loss_end))
 		recon = false;
 
 	if (recon && nvmf_should_reconnect(&ctrl->ctrl)) {
@@ -3298,12 +3302,17 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 
 		queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay);
 	} else {
-		if (portptr->port_state == FC_OBJSTATE_ONLINE)
-			dev_warn(ctrl->ctrl.device,
-				"NVME-FC{%d}: Max reconnect attempts (%d) "
-				"reached.\n",
-				ctrl->cnum, ctrl->ctrl.nr_reconnects);
-		else
+		if (portptr->port_state == FC_OBJSTATE_ONLINE) {
+			if (status > 0 && (status & NVME_SC_DNR))
+				dev_warn(ctrl->ctrl.device,
+					 "NVME-FC{%d}: reconnect failure\n",
+					 ctrl->cnum);
+			else
+				dev_warn(ctrl->ctrl.device,
+					 "NVME-FC{%d}: Max reconnect attempts "
+					 "(%d) reached.\n",
+					 ctrl->cnum, ctrl->ctrl.nr_reconnects);
+		} else
 			dev_warn(ctrl->ctrl.device,
 				"NVME-FC{%d}: dev_loss_tmo (%d) expired "
 				"while waiting for remoteport connectivity.\n",
-- 
GitLab


From 4d9442bf263ac45d495bb7ecf75009e59c0622b2 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 21 May 2021 10:23:46 +0200
Subject: [PATCH 1498/3804] nvme-fabrics: decode host pathing error for connect

Add an additional decoding for 'host pathing error' during connect.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index a2bb7fc63a735..34a84d2086c74 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -336,6 +336,11 @@ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
 			cmd->connect.recfmt);
 		break;
 
+	case NVME_SC_HOST_PATH_ERROR:
+		dev_err(ctrl->device,
+			"Connect command failed: host path error\n");
+		break;
+
 	default:
 		dev_err(ctrl->device,
 			"Connect command failed, error wo/DNR bit: %d\n",
-- 
GitLab


From 7cfc4ea78fc103ea51ecbacd9236abb5b1c490d2 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Fri, 30 Apr 2021 10:27:44 +0200
Subject: [PATCH 1499/3804] drm/meson: fix shutdown crash when component not
 probed

When main component is not probed, by example when the dw-hdmi module is
not loaded yet or in probe defer, the following crash appears on shutdown:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000038
...
pc : meson_drv_shutdown+0x24/0x50
lr : platform_drv_shutdown+0x20/0x30
...
Call trace:
meson_drv_shutdown+0x24/0x50
platform_drv_shutdown+0x20/0x30
device_shutdown+0x158/0x360
kernel_restart_prepare+0x38/0x48
kernel_restart+0x18/0x68
__do_sys_reboot+0x224/0x250
__arm64_sys_reboot+0x24/0x30
...

Simply check if the priv struct has been allocated before using it.

Fixes: fa0c16caf3d7 ("drm: meson_drv add shutdown function")
Reported-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Tested-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210430082744.3638743-1-narmstrong@baylibre.com
---
 drivers/gpu/drm/meson/meson_drv.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 453d8b4c5763d..07fcd12dca160 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -485,11 +485,12 @@ static int meson_probe_remote(struct platform_device *pdev,
 static void meson_drv_shutdown(struct platform_device *pdev)
 {
 	struct meson_drm *priv = dev_get_drvdata(&pdev->dev);
-	struct drm_device *drm = priv->drm;
 
-	DRM_DEBUG_DRIVER("\n");
-	drm_kms_helper_poll_fini(drm);
-	drm_atomic_helper_shutdown(drm);
+	if (!priv)
+		return;
+
+	drm_kms_helper_poll_fini(priv->drm);
+	drm_atomic_helper_shutdown(priv->drm);
 }
 
 static int meson_drv_probe(struct platform_device *pdev)
-- 
GitLab


From 2978996f620001f4e748c79af0fe89be729ef58d Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Tue, 18 May 2021 12:13:03 -0700
Subject: [PATCH 1500/3804] x86/entry: Use int everywhere for system call
 numbers

System call numbers are defined as int, so use int everywhere for system
call numbers. This is strictly a cleanup; it should not change anything
user visible; all ABI changes have been done in the preceeding patches.

[ tglx: Replaced the unsigned long cast ]

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210518191303.4135296-7-hpa@zytor.com
---
 arch/x86/entry/common.c        | 87 +++++++++++++++++++++++-----------
 arch/x86/include/asm/syscall.h |  2 +-
 2 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index f51bc17262db1..ee95fe3f15185 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -36,49 +36,81 @@
 #include <asm/irq_stack.h>
 
 #ifdef CONFIG_X86_64
-__visible noinstr void do_syscall_64(struct pt_regs *regs, unsigned long nr)
+
+static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
+{
+	/*
+	 * Convert negative numbers to very high and thus out of range
+	 * numbers for comparisons.
+	 */
+	unsigned int unr = nr;
+
+	if (likely(unr < NR_syscalls)) {
+		unr = array_index_nospec(unr, NR_syscalls);
+		regs->ax = sys_call_table[unr](regs);
+		return true;
+	}
+	return false;
+}
+
+static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
+{
+	/*
+	 * Adjust the starting offset of the table, and convert numbers
+	 * < __X32_SYSCALL_BIT to very high and thus out of range
+	 * numbers for comparisons.
+	 */
+	unsigned int xnr = nr - __X32_SYSCALL_BIT;
+
+	if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
+		xnr = array_index_nospec(xnr, X32_NR_syscalls);
+		regs->ax = x32_sys_call_table[xnr](regs);
+		return true;
+	}
+	return false;
+}
+
+__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
 {
 	add_random_kstack_offset();
 	nr = syscall_enter_from_user_mode(regs, nr);
 
 	instrumentation_begin();
-	if (likely(nr < NR_syscalls)) {
-		nr = array_index_nospec(nr, NR_syscalls);
-		regs->ax = sys_call_table[nr](regs);
-#ifdef CONFIG_X86_X32_ABI
-	} else if (likely((nr & __X32_SYSCALL_BIT) &&
-			  (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
-		nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
-					X32_NR_syscalls);
-		regs->ax = x32_sys_call_table[nr](regs);
-#endif
-	} else if (unlikely((int)nr != -1)) {
+
+	if (!do_syscall_x64(regs, nr) && !do_syscall_x32(regs, nr) && nr != -1) {
+		/* Invalid system call, but still a system call. */
 		regs->ax = __x64_sys_ni_syscall(regs);
 	}
+
 	instrumentation_end();
 	syscall_exit_to_user_mode(regs);
 }
 #endif
 
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
-static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs)
+static __always_inline int syscall_32_enter(struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_IA32_EMULATION))
 		current_thread_info()->status |= TS_COMPAT;
 
-	return (unsigned int)regs->orig_ax;
+	return (int)regs->orig_ax;
 }
 
 /*
  * Invoke a 32-bit syscall.  Called with IRQs on in CONTEXT_KERNEL.
  */
-static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
-						  unsigned int nr)
+static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
 {
-	if (likely(nr < IA32_NR_syscalls)) {
-		nr = array_index_nospec(nr, IA32_NR_syscalls);
-		regs->ax = ia32_sys_call_table[nr](regs);
-	} else if (unlikely((int)nr != -1)) {
+	/*
+	 * Convert negative numbers to very high and thus out of range
+	 * numbers for comparisons.
+	 */
+	unsigned int unr = nr;
+
+	if (likely(unr < IA32_NR_syscalls)) {
+		unr = array_index_nospec(unr, IA32_NR_syscalls);
+		regs->ax = ia32_sys_call_table[unr](regs);
+	} else if (nr != -1) {
 		regs->ax = __ia32_sys_ni_syscall(regs);
 	}
 }
@@ -86,15 +118,15 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs,
 /* Handles int $0x80 */
 __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 {
-	unsigned int nr = syscall_32_enter(regs);
+	int nr = syscall_32_enter(regs);
 
 	add_random_kstack_offset();
 	/*
-	 * Subtlety here: if ptrace pokes something larger than 2^32-1 into
-	 * orig_ax, the unsigned int return value truncates it.  This may
-	 * or may not be necessary, but it matches the old asm behavior.
+	 * Subtlety here: if ptrace pokes something larger than 2^31-1 into
+	 * orig_ax, the int return value truncates it. This matches
+	 * the semantics of syscall_get_nr().
 	 */
-	nr = (unsigned int)syscall_enter_from_user_mode(regs, nr);
+	nr = syscall_enter_from_user_mode(regs, nr);
 	instrumentation_begin();
 
 	do_syscall_32_irqs_on(regs, nr);
@@ -105,7 +137,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
 
 static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 {
-	unsigned int nr = syscall_32_enter(regs);
+	int nr = syscall_32_enter(regs);
 	int res;
 
 	add_random_kstack_offset();
@@ -140,8 +172,7 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 		return false;
 	}
 
-	/* The case truncates any ptrace induced syscall nr > 2^32 -1 */
-	nr = (unsigned int)syscall_enter_from_user_mode_work(regs, nr);
+	nr = syscall_enter_from_user_mode_work(regs, nr);
 
 	/* Now this is just like a normal syscall. */
 	do_syscall_32_irqs_on(regs, nr);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index f6593cafdbd93..f7e2d82d24fb1 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -159,7 +159,7 @@ static inline int syscall_get_arch(struct task_struct *task)
 		? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 }
 
-void do_syscall_64(struct pt_regs *regs, unsigned long nr);
+void do_syscall_64(struct pt_regs *regs, int nr);
 void do_int80_syscall_32(struct pt_regs *regs);
 long do_fast_syscall_32(struct pt_regs *regs);
 
-- 
GitLab


From a80c203c3f1c06d2201c19ae071d0ae770a2b1ca Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Tue, 25 May 2021 10:40:59 +0300
Subject: [PATCH 1501/3804] xhci: fix giving back URB with incorrect status
 regression in 5.12

5.12 kernel changes how xhci handles cancelled URBs and halted
endpoints. Among these changes cancelled and stalled URBs are no longer
given back before they are cleared from xHC hardware cache.

These changes unfortunately cleared the -EPIPE status of a stalled
transfer in one case before giving bak the URB, causing a USB card reader
to fail from working.

Fixes: 674f8438c121 ("xhci: split handling halted endpoints into two steps")
Cc: <stable@vger.kernel.org> # 5.12
Reported-by: Peter Ganzhorn <peter.ganzhorn@googlemail.com>
Tested-by: Peter Ganzhorn <peter.ganzhorn@googlemail.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210525074100.1154090-2-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a8e4189277da8..256d336354a0b 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -828,14 +828,10 @@ static void xhci_giveback_invalidated_tds(struct xhci_virt_ep *ep)
 	list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list,
 				 cancelled_td_list) {
 
-		/*
-		 * Doesn't matter what we pass for status, since the core will
-		 * just overwrite it (because the URB has been unlinked).
-		 */
 		ring = xhci_urb_to_transfer_ring(ep->xhci, td->urb);
 
 		if (td->cancel_status == TD_CLEARED)
-			xhci_td_cleanup(ep->xhci, td, ring, 0);
+			xhci_td_cleanup(ep->xhci, td, ring, td->status);
 
 		if (ep->xhci->xhc_state & XHCI_STATE_DYING)
 			return;
-- 
GitLab


From a7f2e9272aff1ccfe0fc801dab1d5a7a1c6b7ed2 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Tue, 25 May 2021 10:41:00 +0300
Subject: [PATCH 1502/3804] xhci: Fix 5.12 regression of missing xHC cache
 clearing command after a Stall

If endpoints halts due to a stall then the dequeue pointer read from
hardware may already be set ahead of the stalled TRB.
After commit 674f8438c121 ("xhci: split handling halted endpoints into two
steps") in 5.12 xhci driver won't issue a Set TR Dequeue if hardware
dequeue pointer is already in the right place.

Turns out the "Set TR Dequeue pointer" command is anyway needed as it in
addition to moving the dequeue pointer also clears endpoint state and
cache.

Fixes: 674f8438c121 ("xhci: split handling halted endpoints into two steps")
Cc: <stable@vger.kernel.org> # 5.12
Reported-by: Peter Ganzhorn <peter.ganzhorn@googlemail.com>
Tested-by: Peter Ganzhorn <peter.ganzhorn@googlemail.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20210525074100.1154090-3-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 256d336354a0b..6acd2329e08d4 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -933,14 +933,18 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep)
 			continue;
 		}
 		/*
-		 * If ring stopped on the TD we need to cancel, then we have to
+		 * If a ring stopped on the TD we need to cancel then we have to
 		 * move the xHC endpoint ring dequeue pointer past this TD.
+		 * Rings halted due to STALL may show hw_deq is past the stalled
+		 * TD, but still require a set TR Deq command to flush xHC cache.
 		 */
 		hw_deq = xhci_get_hw_deq(xhci, ep->vdev, ep->ep_index,
 					 td->urb->stream_id);
 		hw_deq &= ~0xf;
 
-		if (trb_in_td(xhci, td->start_seg, td->first_trb,
+		if (td->cancel_status == TD_HALTED) {
+			cached_td = td;
+		} else if (trb_in_td(xhci, td->start_seg, td->first_trb,
 			      td->last_trb, hw_deq, false)) {
 			switch (td->cancel_status) {
 			case TD_CLEARED: /* TD is already no-op */
-- 
GitLab


From e11851429fdc23524aa244f76508c3c7aeaefdf6 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Thu, 13 May 2021 00:28:09 +0300
Subject: [PATCH 1503/3804] drm/i915: Reenable LTTPR non-transparent LT mode
 for DPCD_REV<1.4

The driver currently disables the LTTPR non-transparent link training
mode for sinks with a DPCD_REV<1.4, based on the following description
of the LTTPR DPCD register range in DP standard 2.0 (at the 0xF0000
register description):

""
LTTPR-related registers at DPCD Addresses F0000h through F02FFh are valid
only for DPCD r1.4 (or higher).
"""

The transparent link training mode should still work fine, however the
implementation for this in some retimer FWs seems to be broken, see the
References: link below.

After discussions with DP standard authors the above "DPCD r1.4" does
not refer to the DPCD revision (stored in the DPCD_REV reg at 0x00000),
rather to the "LTTPR field data structure revision" stored in the
0xF0000 reg. An update request has been filed at vesa.org (see
wg/Link/documentComment/3746) for the upcoming v2.1 specification to
clarify the above description along the following lines:

"""
LTTPR-related registers at DPCD Addresses F0000h through F02FFh are
valid only for LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV 1.4 (or
higher)
"""

Based on my tests Windows uses the non-transparent link training mode
for DPCD_REV==1.2 sinks as well (so presumably for all DPCD_REVs), and
forcing it to use transparent mode on ICL/TGL platforms leads to the
same LT failure as reported at the References: link.

Based on the above let's assume that the transparent link training mode
is not well tested/supported and align the code to the correct
interpretation of what the r1.4 version refers to.

Reported-and-tested-by: Casey Harkins <caseyharkins@gmail.com>
Tested-by: Khaled Almahallawy <khaled.almahallawy@intel.com>
References: https://gitlab.freedesktop.org/drm/intel/-/issues/3415
Fixes: 264613b406eb ("drm/i915: Disable LTTPR support when the DPCD rev < 1.4")
Cc: <stable@vger.kernel.org> # v5.11+
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Khaled Almahallawy <khaled.almahallawy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210512212809.1234701-1-imre.deak@intel.com
(cherry picked from commit cb4920cc40f630b5a247f4ed7d3dea66749df588)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 .../drm/i915/display/intel_dp_link_training.c | 71 +++++++++----------
 1 file changed, 33 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 02a003fd48fb2..50cae0198a3d0 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -128,49 +128,13 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable)
 	return drm_dp_dpcd_write(&intel_dp->aux, DP_PHY_REPEATER_MODE, &val, 1) == 1;
 }
 
-/**
- * intel_dp_init_lttpr_and_dprx_caps - detect LTTPR and DPRX caps, init the LTTPR link training mode
- * @intel_dp: Intel DP struct
- *
- * Read the LTTPR common and DPRX capabilities and switch to non-transparent
- * link training mode if any is detected and read the PHY capabilities for all
- * detected LTTPRs. In case of an LTTPR detection error or if the number of
- * LTTPRs is more than is supported (8), fall back to the no-LTTPR,
- * transparent mode link training mode.
- *
- * Returns:
- *   >0  if LTTPRs were detected and the non-transparent LT mode was set. The
- *       DPRX capabilities are read out.
- *    0  if no LTTPRs or more than 8 LTTPRs were detected or in case of a
- *       detection failure and the transparent LT mode was set. The DPRX
- *       capabilities are read out.
- *   <0  Reading out the DPRX capabilities failed.
- */
-int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp)
+static int intel_dp_init_lttpr(struct intel_dp *intel_dp)
 {
 	int lttpr_count;
-	bool ret;
 	int i;
 
-	ret = intel_dp_read_lttpr_common_caps(intel_dp);
-
-	/* The DPTX shall read the DPRX caps after LTTPR detection. */
-	if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd)) {
-		intel_dp_reset_lttpr_common_caps(intel_dp);
-		return -EIO;
-	}
-
-	if (!ret)
-		return 0;
-
-	/*
-	 * The 0xF0000-0xF02FF range is only valid if the DPCD revision is
-	 * at least 1.4.
-	 */
-	if (intel_dp->dpcd[DP_DPCD_REV] < 0x14) {
-		intel_dp_reset_lttpr_common_caps(intel_dp);
+	if (!intel_dp_read_lttpr_common_caps(intel_dp))
 		return 0;
-	}
 
 	lttpr_count = drm_dp_lttpr_count(intel_dp->lttpr_common_caps);
 	/*
@@ -211,6 +175,37 @@ int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp)
 
 	return lttpr_count;
 }
+
+/**
+ * intel_dp_init_lttpr_and_dprx_caps - detect LTTPR and DPRX caps, init the LTTPR link training mode
+ * @intel_dp: Intel DP struct
+ *
+ * Read the LTTPR common and DPRX capabilities and switch to non-transparent
+ * link training mode if any is detected and read the PHY capabilities for all
+ * detected LTTPRs. In case of an LTTPR detection error or if the number of
+ * LTTPRs is more than is supported (8), fall back to the no-LTTPR,
+ * transparent mode link training mode.
+ *
+ * Returns:
+ *   >0  if LTTPRs were detected and the non-transparent LT mode was set. The
+ *       DPRX capabilities are read out.
+ *    0  if no LTTPRs or more than 8 LTTPRs were detected or in case of a
+ *       detection failure and the transparent LT mode was set. The DPRX
+ *       capabilities are read out.
+ *   <0  Reading out the DPRX capabilities failed.
+ */
+int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp)
+{
+	int lttpr_count = intel_dp_init_lttpr(intel_dp);
+
+	/* The DPTX shall read the DPRX caps after LTTPR detection. */
+	if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd)) {
+		intel_dp_reset_lttpr_common_caps(intel_dp);
+		return -EIO;
+	}
+
+	return lttpr_count;
+}
 EXPORT_SYMBOL(intel_dp_init_lttpr_and_dprx_caps);
 
 static u8 dp_voltage_max(u8 preemph)
-- 
GitLab


From a8b98c808eab3ec8f1b5a64be967b0f4af4cae43 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 24 May 2021 16:53:21 +0300
Subject: [PATCH 1504/3804] fanotify: fix permission model of unprivileged
 group

Reporting event->pid should depend on the privileges of the user that
initialized the group, not the privileges of the user reading the
events.

Use an internal group flag FANOTIFY_UNPRIV to record the fact that the
group was initialized by an unprivileged user.

To be on the safe side, the premissions to setup filesystem and mount
marks now require that both the user that initialized the group and
the user setting up the mark have CAP_SYS_ADMIN.

Link: https://lore.kernel.org/linux-fsdevel/CAOQ4uxiA77_P5vtv7e83g0+9d7B5W9ZTE4GfQEYbWmfT1rA=VA@mail.gmail.com/
Fixes: 7cea2a3c505e ("fanotify: support limited functionality for unprivileged users")
Cc: <Stable@vger.kernel.org> # v5.12+
Link: https://lore.kernel.org/r/20210524135321.2190062-1-amir73il@gmail.com
Reviewed-by: Matthew Bobrowski <repnop@google.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 30 ++++++++++++++++++++++++------
 fs/notify/fdinfo.c                 |  2 +-
 include/linux/fanotify.h           |  4 ++++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 71fefb30e0156..be5b6d2c01e7a 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -424,11 +424,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	 * events generated by the listener process itself, without disclosing
 	 * the pids of other processes.
 	 */
-	if (!capable(CAP_SYS_ADMIN) &&
+	if (FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) &&
 	    task_tgid(current) != event->pid)
 		metadata.pid = 0;
 
-	if (path && path->mnt && path->dentry) {
+	/*
+	 * For now, fid mode is required for an unprivileged listener and
+	 * fid mode does not report fd in events.  Keep this check anyway
+	 * for safety in case fid mode requirement is relaxed in the future
+	 * to allow unprivileged listener to get events with no fd and no fid.
+	 */
+	if (!FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV) &&
+	    path && path->mnt && path->dentry) {
 		fd = create_fd(group, path, &f);
 		if (fd < 0)
 			return fd;
@@ -1040,6 +1047,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	int f_flags, fd;
 	unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
 	unsigned int class = flags & FANOTIFY_CLASS_BITS;
+	unsigned int internal_flags = 0;
 
 	pr_debug("%s: flags=%x event_f_flags=%x\n",
 		 __func__, flags, event_f_flags);
@@ -1053,6 +1061,13 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		 */
 		if ((flags & FANOTIFY_ADMIN_INIT_FLAGS) || !fid_mode)
 			return -EPERM;
+
+		/*
+		 * Setting the internal flag FANOTIFY_UNPRIV on the group
+		 * prevents setting mount/filesystem marks on this group and
+		 * prevents reporting pid and open fd in events.
+		 */
+		internal_flags |= FANOTIFY_UNPRIV;
 	}
 
 #ifdef CONFIG_AUDITSYSCALL
@@ -1105,7 +1120,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		goto out_destroy_group;
 	}
 
-	group->fanotify_data.flags = flags;
+	group->fanotify_data.flags = flags | internal_flags;
 	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
 	group->fanotify_data.merge_hash = fanotify_alloc_merge_hash();
@@ -1305,11 +1320,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	group = f.file->private_data;
 
 	/*
-	 * An unprivileged user is not allowed to watch a mount point nor
-	 * a filesystem.
+	 * An unprivileged user is not allowed to setup mount nor filesystem
+	 * marks.  This also includes setting up such marks by a group that
+	 * was initialized by an unprivileged user.
 	 */
 	ret = -EPERM;
-	if (!capable(CAP_SYS_ADMIN) &&
+	if ((!capable(CAP_SYS_ADMIN) ||
+	     FAN_GROUP_FLAG(group, FANOTIFY_UNPRIV)) &&
 	    mark_type != FAN_MARK_INODE)
 		goto fput_and_out;
 
@@ -1460,6 +1477,7 @@ static int __init fanotify_user_setup(void)
 	max_marks = clamp(max_marks, FANOTIFY_OLD_DEFAULT_MAX_MARKS,
 				     FANOTIFY_DEFAULT_MAX_USER_MARKS);
 
+	BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
 	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 10);
 	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
 
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index a712b2aaa9ac9..57f0d5d9f934e 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -144,7 +144,7 @@ void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
 	struct fsnotify_group *group = f->private_data;
 
 	seq_printf(m, "fanotify flags:%x event-flags:%x\n",
-		   group->fanotify_data.flags,
+		   group->fanotify_data.flags & FANOTIFY_INIT_FLAGS,
 		   group->fanotify_data.f_flags);
 
 	show_fdinfo(m, f, fanotify_fdinfo);
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index bad41bcb25dfb..a16dbeced1528 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -51,6 +51,10 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
 #define FANOTIFY_INIT_FLAGS	(FANOTIFY_ADMIN_INIT_FLAGS | \
 				 FANOTIFY_USER_INIT_FLAGS)
 
+/* Internal group flags */
+#define FANOTIFY_UNPRIV		0x80000000
+#define FANOTIFY_INTERNAL_GROUP_FLAGS	(FANOTIFY_UNPRIV)
+
 #define FANOTIFY_MARK_TYPE_BITS	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
 				 FAN_MARK_FILESYSTEM)
 
-- 
GitLab


From 19dee613816d5065ad09f2ccc20b35d23dca9f28 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 13 May 2021 11:03:32 +0100
Subject: [PATCH 1505/3804] netfs: Pass flags through to
 grab_cache_page_write_begin()

In netfs_write_begin(), pass the AOP flags through to
grab_cache_page_write_begin() so that a request to use GFP_NOFS is
honoured.

Fixes: e1b1240c1ff5 ("netfs: Add write_begin helper")
Reported-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/162090295383.3165945.13595101698295243662.stgit@warthog.procyon.org.uk # v1
---
 fs/netfs/read_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 193841d03de09..725614625ed48 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -1068,7 +1068,7 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
 	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
 
 retry:
-	page = grab_cache_page_write_begin(mapping, index, 0);
+	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page)
 		return -ENOMEM;
 
-- 
GitLab


From b71c791254ff5e78a124c8949585dccd9e225e06 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 13 May 2021 11:40:27 +0100
Subject: [PATCH 1506/3804] netfs: Make CONFIG_NETFS_SUPPORT auto-selected
 rather than manual

Make the netfs helper library selected automatically by the things that use
it rather than being manually configured, even though it's required[1].

Fixes: 3a5829fefd3b ("netfs: Make a netfs helper module")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
cc: linux-mm@kvack.org
cc: linux-cachefs@redhat.com
cc: linux-afs@lists.infradead.org
cc: linux-nfs@vger.kernel.org
cc: linux-cifs@vger.kernel.org
cc: ceph-devel@vger.kernel.org
cc: v9fs-developer@lists.sourceforge.net
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/CAMuHMdXJZ7iNQE964CdBOU=vRKVMFzo=YF_eiwsGgqzuvZ+TuA@mail.gmail.com [1]
Link: https://lore.kernel.org/r/162090298141.3166007.2971118149366779916.stgit@warthog.procyon.org.uk # v1
---
 fs/netfs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig
index 578112713703b..b4db21022cb43 100644
--- a/fs/netfs/Kconfig
+++ b/fs/netfs/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
 config NETFS_SUPPORT
-	tristate "Support for network filesystem high-level I/O"
+	tristate
 	help
 	  This option enables support for network filesystems, including
 	  helpers for high-level buffered I/O, abstracting out read
-- 
GitLab


From 5fafeeb4da1a1a3452fb8035e422d779777ed844 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 23 May 2021 15:45:26 +0200
Subject: [PATCH 1507/3804] platform/surface: aggregator_registry: Update
 comments for 15" AMD Surface Laptop 4

The 15" AMD version of the Surface Laptop 4 shares its WSID HID with the
15" AMD version of the Surface Laptop 3. Update the comments
accordingly.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210523134528.798887-2-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/surface_aggregator_registry.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
index 685d37a7add14..bdc09305aab75 100644
--- a/drivers/platform/surface/surface_aggregator_registry.c
+++ b/drivers/platform/surface/surface_aggregator_registry.c
@@ -156,7 +156,7 @@ static const struct software_node *ssam_node_group_sl2[] = {
 	NULL,
 };
 
-/* Devices for Surface Laptop 3. */
+/* Devices for Surface Laptop 3 and 4. */
 static const struct software_node *ssam_node_group_sl3[] = {
 	&ssam_node_root,
 	&ssam_node_bat_ac,
@@ -521,7 +521,7 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
 	/* Surface Laptop 3 (13", Intel) */
 	{ "MSHW0114", (unsigned long)ssam_node_group_sl3 },
 
-	/* Surface Laptop 3 (15", AMD) */
+	/* Surface Laptop 3 (15", AMD) and 4 (15", AMD) */
 	{ "MSHW0110", (unsigned long)ssam_node_group_sl3 },
 
 	/* Surface Laptop Go 1 */
-- 
GitLab


From 2f26dc05af87dfdb8eba831e59878ef3f48767be Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Sun, 23 May 2021 15:45:27 +0200
Subject: [PATCH 1508/3804] platform/surface: aggregator_registry: Add support
 for 13" Intel Surface Laptop 4

Add support for the 13" Intel version of the Surface Laptop 4.

Use the existing node group for the Surface Laptop 3 since the 15" AMD
version already shares its WSID HID with its predecessor and there don't
seem to be any significant differences with regards to SAM.

Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210523134528.798887-3-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/surface_aggregator_registry.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c
index bdc09305aab75..ef83461fa536d 100644
--- a/drivers/platform/surface/surface_aggregator_registry.c
+++ b/drivers/platform/surface/surface_aggregator_registry.c
@@ -524,6 +524,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = {
 	/* Surface Laptop 3 (15", AMD) and 4 (15", AMD) */
 	{ "MSHW0110", (unsigned long)ssam_node_group_sl3 },
 
+	/* Surface Laptop 4 (13", Intel) */
+	{ "MSHW0250", (unsigned long)ssam_node_group_sl3 },
+
 	/* Surface Laptop Go 1 */
 	{ "MSHW0118", (unsigned long)ssam_node_group_slg1 },
 
-- 
GitLab


From e69012400b0cb42b2070748322cb72f9effec00f Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Tue, 25 May 2021 10:45:51 +0800
Subject: [PATCH 1509/3804] arm64: mm: don't use CON and BLK mapping if KFENCE
 is enabled

When we added KFENCE support for arm64, we intended that it would
force the entire linear map to be mapped at page granularity, but we
only enforced this in arch_add_memory() and not in map_mem(), so
memory mapped at boot time can be mapped at a larger granularity.

When booting a kernel with KFENCE=y and RODATA_FULL=n, this results in
the following WARNING at boot:

[    0.000000] ------------[ cut here ]------------
[    0.000000] WARNING: CPU: 0 PID: 0 at mm/memory.c:2462 apply_to_pmd_range+0xec/0x190
[    0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.13.0-rc1+ #10
[    0.000000] Hardware name: linux,dummy-virt (DT)
[    0.000000] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO BTYPE=--)
[    0.000000] pc : apply_to_pmd_range+0xec/0x190
[    0.000000] lr : __apply_to_page_range+0x94/0x170
[    0.000000] sp : ffffffc010573e20
[    0.000000] x29: ffffffc010573e20 x28: ffffff801f400000 x27: ffffff801f401000
[    0.000000] x26: 0000000000000001 x25: ffffff801f400fff x24: ffffffc010573f28
[    0.000000] x23: ffffffc01002b710 x22: ffffffc0105fa450 x21: ffffffc010573ee4
[    0.000000] x20: ffffff801fffb7d0 x19: ffffff801f401000 x18: 00000000fffffffe
[    0.000000] x17: 000000000000003f x16: 000000000000000a x15: ffffffc01060b940
[    0.000000] x14: 0000000000000000 x13: 0098968000000000 x12: 0000000098968000
[    0.000000] x11: 0000000000000000 x10: 0000000098968000 x9 : 0000000000000001
[    0.000000] x8 : 0000000000000000 x7 : ffffffc010573ee4 x6 : 0000000000000001
[    0.000000] x5 : ffffffc010573f28 x4 : ffffffc01002b710 x3 : 0000000040000000
[    0.000000] x2 : ffffff801f5fffff x1 : 0000000000000001 x0 : 007800005f400705
[    0.000000] Call trace:
[    0.000000]  apply_to_pmd_range+0xec/0x190
[    0.000000]  __apply_to_page_range+0x94/0x170
[    0.000000]  apply_to_page_range+0x10/0x20
[    0.000000]  __change_memory_common+0x50/0xdc
[    0.000000]  set_memory_valid+0x30/0x40
[    0.000000]  kfence_init_pool+0x9c/0x16c
[    0.000000]  kfence_init+0x20/0x98
[    0.000000]  start_kernel+0x284/0x3f8

Fixes: 840b23986344 ("arm64, kfence: enable KFENCE for ARM64")
Cc: <stable@vger.kernel.org> # 5.12.x
Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marco Elver <elver@google.com>
Tested-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20210525104551.2ec37f77@xhacker.debian
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6dd9369e3ea0e..89b66ef43a0ff 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -515,7 +515,8 @@ static void __init map_mem(pgd_t *pgdp)
 	 */
 	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
 
-	if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
+	if (rodata_full || crash_mem_map || debug_pagealloc_enabled() ||
+	    IS_ENABLED(CONFIG_KFENCE))
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	/*
-- 
GitLab


From ff4cff962a7eedc73e54b5096693da7f86c61346 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 16 May 2021 17:01:08 -0700
Subject: [PATCH 1510/3804] MIPS: alchemy: xxs1500: add gpio-au1000.h header
 file

board-xxs1500.c references 2 functions without declaring them, so add
the header file to placate the build.

../arch/mips/alchemy/board-xxs1500.c: In function 'board_setup':
../arch/mips/alchemy/board-xxs1500.c:56:2: error: implicit declaration of function 'alchemy_gpio1_input_enable' [-Werror=implicit-function-declaration]
   56 |  alchemy_gpio1_input_enable();
../arch/mips/alchemy/board-xxs1500.c:57:2: error: implicit declaration of function 'alchemy_gpio2_enable'; did you mean 'alchemy_uart_enable'? [-Werror=implicit-function-declaration]
   57 |  alchemy_gpio2_enable();

Fixes: 8e026910fcd4 ("MIPS: Alchemy: merge GPR/MTX-1/XXS1500 board code into single files")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Cc: Manuel Lauss <manuel.lauss@googlemail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Manuel Lauss <manuel.lauss@gmail.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/alchemy/board-xxs1500.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/alchemy/board-xxs1500.c b/arch/mips/alchemy/board-xxs1500.c
index b184baa4e56a6..f175bce2987fa 100644
--- a/arch/mips/alchemy/board-xxs1500.c
+++ b/arch/mips/alchemy/board-xxs1500.c
@@ -18,6 +18,7 @@
 #include <asm/reboot.h>
 #include <asm/setup.h>
 #include <asm/mach-au1x00/au1000.h>
+#include <asm/mach-au1x00/gpio-au1000.h>
 #include <prom.h>
 
 const char *get_system_type(void)
-- 
GitLab


From 6855adc2c5d9dff08be9e6e01deb319738b28780 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 20 May 2021 22:13:43 -0700
Subject: [PATCH 1511/3804] MIPS: launch.h: add include guard to prevent build
 errors

arch/mips/include/asm/mips-boards/launch.h needs an include guard
to prevent it from being #included more than once.
Prevents these build errors:

In file included from ../arch/mips/mti-malta/malta-amon.c:16:
../arch/mips/include/asm/mips-boards/launch.h:8:8: error: redefinition of 'struct cpulaunch'
    8 | struct cpulaunch {
      |        ^~~~~~~~~
In file included from ../arch/mips/include/asm/mips-cps.h:13,
                 from ../arch/mips/include/asm/smp-ops.h:16,
                 from ../arch/mips/include/asm/smp.h:21,
                 from ../include/linux/smp.h:114,
                 from ../arch/mips/mti-malta/malta-amon.c:12:
../arch/mips/include/asm/mips-boards/launch.h:8:8: note: originally defined here
    8 | struct cpulaunch {
      |        ^~~~~~~~~
make[3]: [../scripts/Makefile.build:273: arch/mips/mti-malta/malta-amon.o] Error 1 (ignored)

Fixes: 6decd1aad15f ("MIPS: add support for buggy MT7621S core detection")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: linux-mips@vger.kernel.org
Cc: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Reviewed-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/include/asm/mips-boards/launch.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/mips/include/asm/mips-boards/launch.h b/arch/mips/include/asm/mips-boards/launch.h
index f93aa5ee2e2e3..3481ed4c117bd 100644
--- a/arch/mips/include/asm/mips-boards/launch.h
+++ b/arch/mips/include/asm/mips-boards/launch.h
@@ -3,6 +3,9 @@
  *
  */
 
+#ifndef _ASM_MIPS_BOARDS_LAUNCH_H
+#define _ASM_MIPS_BOARDS_LAUNCH_H
+
 #ifndef _ASSEMBLER_
 
 struct cpulaunch {
@@ -34,3 +37,5 @@ struct cpulaunch {
 
 /* Polling period in count cycles for secondary CPU's */
 #define LAUNCHPERIOD	10000
+
+#endif /* _ASM_MIPS_BOARDS_LAUNCH_H */
-- 
GitLab


From fef532ea0cd871afab7d9a7b6e9da99ac2c24371 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 16 May 2021 17:54:17 -0700
Subject: [PATCH 1512/3804] MIPS: ralink: export rt_sysc_membase for
 rt2880_wdt.c

rt2880_wdt.c uses (well, attempts to use) rt_sysc_membase. However,
when this watchdog driver is built as a loadable module, there is a
build error since the rt_sysc_membase symbol is not exported.
Export it to quell the build error.

ERROR: modpost: "rt_sysc_membase" [drivers/watchdog/rt2880_wdt.ko] undefined!

Fixes: 473cf939ff34 ("watchdog: add ralink watchdog driver")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Wim Van Sebroeck <wim@iguana.be>
Cc: John Crispin <john@phrozen.org>
Cc: linux-mips@vger.kernel.org
Cc: linux-watchdog@vger.kernel.org
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/ralink/of.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index 0c5de07da097a..0135376c5de5d 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -8,6 +8,7 @@
 
 #include <linux/io.h>
 #include <linux/clk.h>
+#include <linux/export.h>
 #include <linux/init.h>
 #include <linux/sizes.h>
 #include <linux/of_fdt.h>
@@ -25,6 +26,7 @@
 
 __iomem void *rt_sysc_membase;
 __iomem void *rt_memc_membase;
+EXPORT_SYMBOL_GPL(rt_sysc_membase);
 
 __iomem void *plat_of_remap_node(const char *node)
 {
-- 
GitLab


From 78cf0eb926cb1abeff2106bae67752e032fe5f3e Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Sat, 15 May 2021 19:02:01 +0800
Subject: [PATCH 1513/3804] MIPS: Fix kernel hang under FUNCTION_GRAPH_TRACER
 and PREEMPT_TRACER

When update the latest mainline kernel with the following three configs,
the kernel hangs during startup:

(1) CONFIG_FUNCTION_GRAPH_TRACER=y
(2) CONFIG_PREEMPT_TRACER=y
(3) CONFIG_FTRACE_STARTUP_TEST=y

When update the latest mainline kernel with the above two configs (1)
and (2), the kernel starts normally, but it still hangs when execute
the following command:

echo "function_graph" > /sys/kernel/debug/tracing/current_tracer

Without CONFIG_PREEMPT_TRACER=y, the above two kinds of kernel hangs
disappeared, so it seems that CONFIG_PREEMPT_TRACER has some influences
with function_graph tracer at the first glance.

I use ejtag to find out the epc address is related with preempt_enable()
in the file arch/mips/lib/mips-atomic.c, because function tracing can
trace the preempt_{enable,disable} calls that are traced, replace them
with preempt_{enable,disable}_notrace to prevent function tracing from
going into an infinite loop, and then it can fix the kernel hang issue.

By the way, it seems that this commit is a complement and improvement of
commit f93a1a00f2bd ("MIPS: Fix crash that occurs when function tracing
is enabled").

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/lib/mips-atomic.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c
index de03838b343b8..a9b72eacfc0b3 100644
--- a/arch/mips/lib/mips-atomic.c
+++ b/arch/mips/lib/mips-atomic.c
@@ -37,7 +37,7 @@
  */
 notrace void arch_local_irq_disable(void)
 {
-	preempt_disable();
+	preempt_disable_notrace();
 
 	__asm__ __volatile__(
 	"	.set	push						\n"
@@ -53,7 +53,7 @@ notrace void arch_local_irq_disable(void)
 	: /* no inputs */
 	: "memory");
 
-	preempt_enable();
+	preempt_enable_notrace();
 }
 EXPORT_SYMBOL(arch_local_irq_disable);
 
@@ -61,7 +61,7 @@ notrace unsigned long arch_local_irq_save(void)
 {
 	unsigned long flags;
 
-	preempt_disable();
+	preempt_disable_notrace();
 
 	__asm__ __volatile__(
 	"	.set	push						\n"
@@ -78,7 +78,7 @@ notrace unsigned long arch_local_irq_save(void)
 	: /* no inputs */
 	: "memory");
 
-	preempt_enable();
+	preempt_enable_notrace();
 
 	return flags;
 }
@@ -88,7 +88,7 @@ notrace void arch_local_irq_restore(unsigned long flags)
 {
 	unsigned long __tmp1;
 
-	preempt_disable();
+	preempt_disable_notrace();
 
 	__asm__ __volatile__(
 	"	.set	push						\n"
@@ -106,7 +106,7 @@ notrace void arch_local_irq_restore(unsigned long flags)
 	: "0" (flags)
 	: "memory");
 
-	preempt_enable();
+	preempt_enable_notrace();
 }
 EXPORT_SYMBOL(arch_local_irq_restore);
 
-- 
GitLab


From a8deba8547e39f26440101164a3bbc2899c5b305 Mon Sep 17 00:00:00 2001
From: Liu Jian <liujian56@huawei.com>
Date: Tue, 25 May 2021 09:41:39 +0800
Subject: [PATCH 1514/3804] bpftool: Add sock_release help info for cgroup
 attach/prog load command

The help information was not added at the time when the function got added.
Fix this and add the missing information to its cli, documentation and bash
completion.

Fixes: db94cc0b4805 ("bpftool: Add support for BPF_CGROUP_INET_SOCK_RELEASE")
Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20210525014139.323859-1-liujian56@huawei.com
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 4 +++-
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   | 2 +-
 tools/bpf/bpftool/bash-completion/bpftool          | 6 +++---
 tools/bpf/bpftool/cgroup.c                         | 3 ++-
 tools/bpf/bpftool/prog.c                           | 2 +-
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 790944c356025..baee8591ac76a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -30,7 +30,8 @@ CGROUP COMMANDS
 |	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
 |		**bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
 |               **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
-|               **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** }
+|               **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
+|               **sock_release** }
 |	*ATTACH_FLAGS* := { **multi** | **override** }
 
 DESCRIPTION
@@ -106,6 +107,7 @@ DESCRIPTION
 		  **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
 		  **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
 		  **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
+		  **sock_release** closing an userspace inet socket (since 5.9).
 
 	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 		  Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 358c7309d4191..fe1b38e7e887d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -44,7 +44,7 @@ PROG COMMANDS
 |		**cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
 |               **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
 |		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
-|		**cgroup/getsockopt** | **cgroup/setsockopt** |
+|		**cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
 |		**struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
 |	}
 |       *ATTACH_TYPE* := {
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index d67518bcbd448..cc33c5824a2f2 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -478,7 +478,7 @@ _bpftool()
                                 cgroup/recvmsg4 cgroup/recvmsg6 \
                                 cgroup/post_bind4 cgroup/post_bind6 \
                                 cgroup/sysctl cgroup/getsockopt \
-                                cgroup/setsockopt struct_ops \
+                                cgroup/setsockopt cgroup/sock_release struct_ops \
                                 fentry fexit freplace sk_lookup" -- \
                                                    "$cur" ) )
                             return 0
@@ -1021,7 +1021,7 @@ _bpftool()
                         device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
                         getpeername4 getpeername6 getsockname4 getsockname6 \
                         sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
-                        setsockopt'
+                        setsockopt sock_release'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag name'
                     case $prev in
@@ -1032,7 +1032,7 @@ _bpftool()
                         ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
                         post_bind4|post_bind6|connect4|connect6|getpeername4|\
                         getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
-                        recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt)
+                        recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index d901cc1b904af..6e53b1d393f4a 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -28,7 +28,8 @@
 	"                        connect6 | getpeername4 | getpeername6 |\n"   \
 	"                        getsockname4 | getsockname6 | sendmsg4 |\n"   \
 	"                        sendmsg6 | recvmsg4 | recvmsg6 |\n"           \
-	"                        sysctl | getsockopt | setsockopt }"
+	"                        sysctl | getsockopt | setsockopt |\n"	       \
+	"                        sock_release }"
 
 static unsigned int query_flags;
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 3f067d2d75845..da4846c9856af 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -2138,7 +2138,7 @@ static int do_help(int argc, char **argv)
 		"                 cgroup/getpeername4 | cgroup/getpeername6 |\n"
 		"                 cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
 		"                 cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
-		"                 cgroup/getsockopt | cgroup/setsockopt |\n"
+		"                 cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
 		"                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
 		"       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
 		"                        flow_dissector }\n"
-- 
GitLab


From 1eb8a49836949a77c4f7d738786719e7fde0c333 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 25 May 2021 20:54:20 +0900
Subject: [PATCH 1515/3804] x86/syscalls: Clear 'offset' and 'prefix' in case
 they are set in env

If the environment variable 'prefix' is set on the build host, it is
wrongly used as syscall macro prefixes.

  $ export prefix=/usr
  $ make -s defconfig all
  In file included from ./arch/x86/include/asm/unistd.h:20,
                   from <stdin>:2:
  ./arch/x86/include/generated/uapi/asm/unistd_64.h:4:9: warning: missing whitespace after the macro name
      4 | #define __NR_/usrread 0
        |         ^~~~~

arch/x86/entry/syscalls/Makefile should clear 'offset' and 'prefix'.

Fixes: 3cba325b358f ("x86/syscalls: Switch to generic syscallhdr.sh")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210525115420.679416-1-masahiroy@kernel.org
---
 arch/x86/entry/syscalls/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile
index 8eb014bca8c94..5b3efed0e4e86 100644
--- a/arch/x86/entry/syscalls/Makefile
+++ b/arch/x86/entry/syscalls/Makefile
@@ -11,6 +11,8 @@ syscall64 := $(src)/syscall_64.tbl
 
 syshdr := $(srctree)/scripts/syscallhdr.sh
 systbl := $(srctree)/scripts/syscalltbl.sh
+offset :=
+prefix :=
 
 quiet_cmd_syshdr = SYSHDR  $@
       cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --abis $(abis) --emit-nr \
-- 
GitLab


From d48ca5b98fa5d21444e04bb17373d339200b679a Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 24 May 2021 14:17:05 -0400
Subject: [PATCH 1516/3804] x86/uml/syscalls: Remove array index from syscall
 initializers

The recent syscall table generator rework removed the index from the
initializers for native x86 syscall tables, but missed the UML syscall
tables.

Fixes: 44fe4895f47c ("Stop filling syscall arrays with *_sys_ni_syscall")
Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20210524181707.132844-2-brgerst@gmail.com
---
 arch/x86/um/sys_call_table_32.c | 2 +-
 arch/x86/um/sys_call_table_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index f8323104e3536..0575decb5e544 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -31,7 +31,7 @@
 #include <asm/syscalls_32.h>
 
 #undef __SYSCALL
-#define __SYSCALL(nr, sym) [ nr ] = sym,
+#define __SYSCALL(nr, sym) sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 5ed665dc785fb..95725b5a41ac8 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -39,7 +39,7 @@
 #include <asm/syscalls_64.h>
 
 #undef __SYSCALL
-#define __SYSCALL(nr, sym) [ nr ] = sym,
+#define __SYSCALL(nr, sym) sym,
 
 extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
 
-- 
GitLab


From fd9e8691f38712892fa2ac73132dcc8b85b07a8f Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 24 May 2021 14:17:06 -0400
Subject: [PATCH 1517/3804] x86/syscalls: Remove -Wno-override-init for syscall
 tables

Commit 44fe4895f47c ("Stop filling syscall arrays with *_sys_ni_syscall")
removes the need for -Wno-override-init, since the table is now filled
sequentially instead of overriding a default value.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20210524181707.132844-3-brgerst@gmail.com
---
 arch/x86/entry/Makefile | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 08bf95dbc9112..94d2843ce80c6 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -17,10 +17,6 @@ CFLAGS_syscall_64.o		+= -fno-stack-protector
 CFLAGS_syscall_32.o		+= -fno-stack-protector
 CFLAGS_syscall_x32.o		+= -fno-stack-protector
 
-CFLAGS_syscall_64.o		+= $(call cc-option,-Wno-override-init,)
-CFLAGS_syscall_32.o		+= $(call cc-option,-Wno-override-init,)
-CFLAGS_syscall_x32.o		+= $(call cc-option,-Wno-override-init,)
-
 obj-y				:= entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y				+= common.o
 
-- 
GitLab


From 48f7eee81cd53a94699d28959566b41a9dcac1d9 Mon Sep 17 00:00:00 2001
From: Brian Gerst <brgerst@gmail.com>
Date: Mon, 24 May 2021 14:17:07 -0400
Subject: [PATCH 1518/3804] x86/syscalls: Don't adjust CFLAGS for syscall
 tables

The syscall_*.c files only contain data (the syscall tables).  There
is no need to adjust CFLAGS for tracing and stack protector since they
contain no code.

Signed-off-by: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Link: https://lore.kernel.org/r/20210524181707.132844-4-brgerst@gmail.com
---
 arch/x86/entry/Makefile | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 94d2843ce80c6..7fec5dcf64386 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -8,14 +8,8 @@ UBSAN_SANITIZE := n
 KCOV_INSTRUMENT := n
 
 CFLAGS_REMOVE_common.o		= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_syscall_64.o	= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_syscall_32.o	= $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_syscall_x32.o	= $(CC_FLAGS_FTRACE)
 
 CFLAGS_common.o			+= -fno-stack-protector
-CFLAGS_syscall_64.o		+= -fno-stack-protector
-CFLAGS_syscall_32.o		+= -fno-stack-protector
-CFLAGS_syscall_x32.o		+= -fno-stack-protector
 
 obj-y				:= entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y				+= common.o
-- 
GitLab


From c3bf5129f33923c92bf3bddaf4359b7b25ecb4ba Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Tue, 27 Apr 2021 10:38:33 +0200
Subject: [PATCH 1519/3804] media: v4l2-ctrls: always copy the controls on
 completion

When v4l2_ctrl_request_complete() is called and there is no control
handler object found in the request, then create such an object so
that all controls at completion state can be stored and are available
to userspace.

Otherwise any attempt by userspace to read the completed request data
will fail.

If allocating the control handler object failed, then indicate that
by returning ENOMEM when attempting to get the controls from the
completed request instead of returning ENOENT.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 36 ++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index d4e2c7318ee6d..09992e76bad64 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -4121,8 +4121,19 @@ v4l2_ctrls_find_req_obj(struct v4l2_ctrl_handler *hdl,
 	obj = media_request_object_find(req, &req_ops, hdl);
 	if (obj)
 		return obj;
+	/*
+	 * If there are no controls in this completed request,
+	 * then that can only happen if:
+	 *
+	 * 1) no controls were present in the queued request, and
+	 * 2) v4l2_ctrl_request_complete() could not allocate a
+	 *    control handler object to store the completed state in.
+	 *
+	 * So return ENOMEM to indicate that there was an out-of-memory
+	 * error.
+	 */
 	if (!set)
-		return ERR_PTR(-ENOENT);
+		return ERR_PTR(-ENOMEM);
 
 	new_hdl = kzalloc(sizeof(*new_hdl), GFP_KERNEL);
 	if (!new_hdl)
@@ -4133,8 +4144,8 @@ v4l2_ctrls_find_req_obj(struct v4l2_ctrl_handler *hdl,
 	if (!ret)
 		ret = v4l2_ctrl_request_bind(req, new_hdl, hdl);
 	if (ret) {
+		v4l2_ctrl_handler_free(new_hdl);
 		kfree(new_hdl);
-
 		return ERR_PTR(ret);
 	}
 
@@ -4728,8 +4739,25 @@ void v4l2_ctrl_request_complete(struct media_request *req,
 	 * wants to leave the controls unchanged.
 	 */
 	obj = media_request_object_find(req, &req_ops, main_hdl);
-	if (!obj)
-		return;
+	if (!obj) {
+		int ret;
+
+		/* Create a new request so the driver can return controls */
+		hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
+		if (!hdl)
+			return;
+
+		ret = v4l2_ctrl_handler_init(hdl, (main_hdl->nr_of_buckets - 1) * 8);
+		if (!ret)
+			ret = v4l2_ctrl_request_bind(req, hdl, main_hdl);
+		if (ret) {
+			v4l2_ctrl_handler_free(hdl);
+			kfree(hdl);
+			return;
+		}
+		hdl->request_is_queued = true;
+		obj = media_request_object_find(req, &req_ops, main_hdl);
+	}
 	hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj);
 
 	list_for_each_entry(ref, &hdl->ctrl_refs, node) {
-- 
GitLab


From 71c689dc2e732d4cb190aaf0edea73116b1611bd Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Tue, 27 Apr 2021 14:07:03 +0200
Subject: [PATCH 1520/3804] media: v4l2-ctrls: split up into four source files

The v4l2-ctrls.c source has become much too big, so split it up
into four separate parts:

v4l2-ctrls-core.c: contains the core framework code
v4l2-ctrls-api.c: contains the uAPI interface to the framework
v4l2-ctrls-defs.c: contains the control definitions
v4l2-ctrls-request.c: contains the Request API helpers

And it adds a new v4l2-ctrls-priv.h.

No code was changed, but a number of checkpatch.pl warnings were
fixed (alignment, f == NULL -> !f, long comment block coding style,
unsigned -> unsigned int).

The copyright statements were updated as well since they were
quite out of date.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/v4l/ext-ctrls-codec-stateless.rst   |    4 +-
 drivers/media/v4l2-core/Makefile              |    5 +-
 drivers/media/v4l2-core/v4l2-ctrls-api.c      | 1225 ++++
 drivers/media/v4l2-core/v4l2-ctrls-core.c     | 1939 +++++++
 drivers/media/v4l2-core/v4l2-ctrls-defs.c     | 1575 +++++
 drivers/media/v4l2-core/v4l2-ctrls-priv.h     |   96 +
 drivers/media/v4l2-core/v4l2-ctrls-request.c  |  496 ++
 drivers/media/v4l2-core/v4l2-ctrls.c          | 5111 -----------------
 8 files changed, 5336 insertions(+), 5115 deletions(-)
 create mode 100644 drivers/media/v4l2-core/v4l2-ctrls-api.c
 create mode 100644 drivers/media/v4l2-core/v4l2-ctrls-core.c
 create mode 100644 drivers/media/v4l2-core/v4l2-ctrls-defs.c
 create mode 100644 drivers/media/v4l2-core/v4l2-ctrls-priv.h
 create mode 100644 drivers/media/v4l2-core/v4l2-ctrls-request.c
 delete mode 100644 drivers/media/v4l2-core/v4l2-ctrls.c

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
index 2aa508ffb6b9f..72f5e85b4f342 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec-stateless.rst
@@ -1353,8 +1353,8 @@ FWHT Flags
     * - __u8
       - ``picture_coding_type``
       - Picture coding type for the frame covered by the current slice
-	(V4L2_MPEG2_PICTURE_CODING_TYPE_I, V4L2_MPEG2_PICTURE_CODING_TYPE_P or
-	V4L2_MPEG2_PICTURE_CODING_TYPE_B).
+	(V4L2_MPEG2_PIC_CODING_TYPE_I, V4L2_MPEG2_PIC_CODING_TYPE_P or
+	V4L2_MPEG2_PIC_CODING_TYPE_B).
     * - __u8
       - ``picture_structure``
       - Picture structure (1: interlaced top field, 2: interlaced bottom field,
diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
index e4cd589b99a50..ad967b72fb5d0 100644
--- a/drivers/media/v4l2-core/Makefile
+++ b/drivers/media/v4l2-core/Makefile
@@ -6,8 +6,9 @@
 tuner-objs	:=	tuner-core.o
 
 videodev-objs	:=	v4l2-dev.o v4l2-ioctl.o v4l2-device.o v4l2-fh.o \
-			v4l2-event.o v4l2-ctrls.o v4l2-subdev.o \
-			v4l2-async.o v4l2-common.o
+			v4l2-event.o v4l2-subdev.o v4l2-async.o v4l2-common.o \
+			v4l2-ctrls-core.o v4l2-ctrls-api.o \
+			v4l2-ctrls-request.o v4l2-ctrls-defs.o
 videodev-$(CONFIG_COMPAT) += v4l2-compat-ioctl32.o
 videodev-$(CONFIG_TRACEPOINTS) += v4l2-trace.o
 videodev-$(CONFIG_MEDIA_CONTROLLER) += v4l2-mc.o
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-api.c b/drivers/media/v4l2-core/v4l2-ctrls-api.c
new file mode 100644
index 0000000000000..db9baa0bd05fa
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2-ctrls-api.c
@@ -0,0 +1,1225 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * V4L2 controls framework uAPI implementation:
+ *
+ * Copyright (C) 2010-2021  Hans Verkuil <hverkuil-cisco@xs4all.nl>
+ */
+
+#define pr_fmt(fmt) "v4l2-ctrls: " fmt
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-dev.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-ioctl.h>
+
+#include "v4l2-ctrls-priv.h"
+
+/* Internal temporary helper struct, one for each v4l2_ext_control */
+struct v4l2_ctrl_helper {
+	/* Pointer to the control reference of the master control */
+	struct v4l2_ctrl_ref *mref;
+	/* The control ref corresponding to the v4l2_ext_control ID field. */
+	struct v4l2_ctrl_ref *ref;
+	/*
+	 * v4l2_ext_control index of the next control belonging to the
+	 * same cluster, or 0 if there isn't any.
+	 */
+	u32 next;
+};
+
+/*
+ * Helper functions to copy control payload data from kernel space to
+ * user space and vice versa.
+ */
+
+/* Helper function: copy the given control value back to the caller */
+static int ptr_to_user(struct v4l2_ext_control *c,
+		       struct v4l2_ctrl *ctrl,
+		       union v4l2_ctrl_ptr ptr)
+{
+	u32 len;
+
+	if (ctrl->is_ptr && !ctrl->is_string)
+		return copy_to_user(c->ptr, ptr.p_const, c->size) ?
+		       -EFAULT : 0;
+
+	switch (ctrl->type) {
+	case V4L2_CTRL_TYPE_STRING:
+		len = strlen(ptr.p_char);
+		if (c->size < len + 1) {
+			c->size = ctrl->elem_size;
+			return -ENOSPC;
+		}
+		return copy_to_user(c->string, ptr.p_char, len + 1) ?
+		       -EFAULT : 0;
+	case V4L2_CTRL_TYPE_INTEGER64:
+		c->value64 = *ptr.p_s64;
+		break;
+	default:
+		c->value = *ptr.p_s32;
+		break;
+	}
+	return 0;
+}
+
+/* Helper function: copy the current control value back to the caller */
+static int cur_to_user(struct v4l2_ext_control *c, struct v4l2_ctrl *ctrl)
+{
+	return ptr_to_user(c, ctrl, ctrl->p_cur);
+}
+
+/* Helper function: copy the new control value back to the caller */
+static int new_to_user(struct v4l2_ext_control *c,
+		       struct v4l2_ctrl *ctrl)
+{
+	return ptr_to_user(c, ctrl, ctrl->p_new);
+}
+
+/* Helper function: copy the request value back to the caller */
+static int req_to_user(struct v4l2_ext_control *c,
+		       struct v4l2_ctrl_ref *ref)
+{
+	return ptr_to_user(c, ref->ctrl, ref->p_req);
+}
+
+/* Helper function: copy the initial control value back to the caller */
+static int def_to_user(struct v4l2_ext_control *c, struct v4l2_ctrl *ctrl)
+{
+	int idx;
+
+	for (idx = 0; idx < ctrl->elems; idx++)
+		ctrl->type_ops->init(ctrl, idx, ctrl->p_new);
+
+	return ptr_to_user(c, ctrl, ctrl->p_new);
+}
+
+/* Helper function: copy the caller-provider value to the given control value */
+static int user_to_ptr(struct v4l2_ext_control *c,
+		       struct v4l2_ctrl *ctrl,
+		       union v4l2_ctrl_ptr ptr)
+{
+	int ret;
+	u32 size;
+
+	ctrl->is_new = 1;
+	if (ctrl->is_ptr && !ctrl->is_string) {
+		unsigned int idx;
+
+		ret = copy_from_user(ptr.p, c->ptr, c->size) ? -EFAULT : 0;
+		if (ret || !ctrl->is_array)
+			return ret;
+		for (idx = c->size / ctrl->elem_size; idx < ctrl->elems; idx++)
+			ctrl->type_ops->init(ctrl, idx, ptr);
+		return 0;
+	}
+
+	switch (ctrl->type) {
+	case V4L2_CTRL_TYPE_INTEGER64:
+		*ptr.p_s64 = c->value64;
+		break;
+	case V4L2_CTRL_TYPE_STRING:
+		size = c->size;
+		if (size == 0)
+			return -ERANGE;
+		if (size > ctrl->maximum + 1)
+			size = ctrl->maximum + 1;
+		ret = copy_from_user(ptr.p_char, c->string, size) ? -EFAULT : 0;
+		if (!ret) {
+			char last = ptr.p_char[size - 1];
+
+			ptr.p_char[size - 1] = 0;
+			/*
+			 * If the string was longer than ctrl->maximum,
+			 * then return an error.
+			 */
+			if (strlen(ptr.p_char) == ctrl->maximum && last)
+				return -ERANGE;
+		}
+		return ret;
+	default:
+		*ptr.p_s32 = c->value;
+		break;
+	}
+	return 0;
+}
+
+/* Helper function: copy the caller-provider value as the new control value */
+static int user_to_new(struct v4l2_ext_control *c, struct v4l2_ctrl *ctrl)
+{
+	return user_to_ptr(c, ctrl, ctrl->p_new);
+}
+
+/*
+ * VIDIOC_G/TRY/S_EXT_CTRLS implementation
+ */
+
+/*
+ * Some general notes on the atomic requirements of VIDIOC_G/TRY/S_EXT_CTRLS:
+ *
+ * It is not a fully atomic operation, just best-effort only. After all, if
+ * multiple controls have to be set through multiple i2c writes (for example)
+ * then some initial writes may succeed while others fail. Thus leaving the
+ * system in an inconsistent state. The question is how much effort you are
+ * willing to spend on trying to make something atomic that really isn't.
+ *
+ * From the point of view of an application the main requirement is that
+ * when you call VIDIOC_S_EXT_CTRLS and some values are invalid then an
+ * error should be returned without actually affecting any controls.
+ *
+ * If all the values are correct, then it is acceptable to just give up
+ * in case of low-level errors.
+ *
+ * It is important though that the application can tell when only a partial
+ * configuration was done. The way we do that is through the error_idx field
+ * of struct v4l2_ext_controls: if that is equal to the count field then no
+ * controls were affected. Otherwise all controls before that index were
+ * successful in performing their 'get' or 'set' operation, the control at
+ * the given index failed, and you don't know what happened with the controls
+ * after the failed one. Since if they were part of a control cluster they
+ * could have been successfully processed (if a cluster member was encountered
+ * at index < error_idx), they could have failed (if a cluster member was at
+ * error_idx), or they may not have been processed yet (if the first cluster
+ * member appeared after error_idx).
+ *
+ * It is all fairly theoretical, though. In practice all you can do is to
+ * bail out. If error_idx == count, then it is an application bug. If
+ * error_idx < count then it is only an application bug if the error code was
+ * EBUSY. That usually means that something started streaming just when you
+ * tried to set the controls. In all other cases it is a driver/hardware
+ * problem and all you can do is to retry or bail out.
+ *
+ * Note that these rules do not apply to VIDIOC_TRY_EXT_CTRLS: since that
+ * never modifies controls the error_idx is just set to whatever control
+ * has an invalid value.
+ */
+
+/*
+ * Prepare for the extended g/s/try functions.
+ * Find the controls in the control array and do some basic checks.
+ */
+static int prepare_ext_ctrls(struct v4l2_ctrl_handler *hdl,
+			     struct v4l2_ext_controls *cs,
+			     struct v4l2_ctrl_helper *helpers,
+			     struct video_device *vdev,
+			     bool get)
+{
+	struct v4l2_ctrl_helper *h;
+	bool have_clusters = false;
+	u32 i;
+
+	for (i = 0, h = helpers; i < cs->count; i++, h++) {
+		struct v4l2_ext_control *c = &cs->controls[i];
+		struct v4l2_ctrl_ref *ref;
+		struct v4l2_ctrl *ctrl;
+		u32 id = c->id & V4L2_CTRL_ID_MASK;
+
+		cs->error_idx = i;
+
+		if (cs->which &&
+		    cs->which != V4L2_CTRL_WHICH_DEF_VAL &&
+		    cs->which != V4L2_CTRL_WHICH_REQUEST_VAL &&
+		    V4L2_CTRL_ID2WHICH(id) != cs->which) {
+			dprintk(vdev,
+				"invalid which 0x%x or control id 0x%x\n",
+				cs->which, id);
+			return -EINVAL;
+		}
+
+		/*
+		 * Old-style private controls are not allowed for
+		 * extended controls.
+		 */
+		if (id >= V4L2_CID_PRIVATE_BASE) {
+			dprintk(vdev,
+				"old-style private controls not allowed\n");
+			return -EINVAL;
+		}
+		ref = find_ref_lock(hdl, id);
+		if (!ref) {
+			dprintk(vdev, "cannot find control id 0x%x\n", id);
+			return -EINVAL;
+		}
+		h->ref = ref;
+		ctrl = ref->ctrl;
+		if (ctrl->flags & V4L2_CTRL_FLAG_DISABLED) {
+			dprintk(vdev, "control id 0x%x is disabled\n", id);
+			return -EINVAL;
+		}
+
+		if (ctrl->cluster[0]->ncontrols > 1)
+			have_clusters = true;
+		if (ctrl->cluster[0] != ctrl)
+			ref = find_ref_lock(hdl, ctrl->cluster[0]->id);
+		if (ctrl->is_ptr && !ctrl->is_string) {
+			unsigned int tot_size = ctrl->elems * ctrl->elem_size;
+
+			if (c->size < tot_size) {
+				/*
+				 * In the get case the application first
+				 * queries to obtain the size of the control.
+				 */
+				if (get) {
+					c->size = tot_size;
+					return -ENOSPC;
+				}
+				dprintk(vdev,
+					"pointer control id 0x%x size too small, %d bytes but %d bytes needed\n",
+					id, c->size, tot_size);
+				return -EFAULT;
+			}
+			c->size = tot_size;
+		}
+		/* Store the ref to the master control of the cluster */
+		h->mref = ref;
+		/*
+		 * Initially set next to 0, meaning that there is no other
+		 * control in this helper array belonging to the same
+		 * cluster.
+		 */
+		h->next = 0;
+	}
+
+	/*
+	 * We are done if there were no controls that belong to a multi-
+	 * control cluster.
+	 */
+	if (!have_clusters)
+		return 0;
+
+	/*
+	 * The code below figures out in O(n) time which controls in the list
+	 * belong to the same cluster.
+	 */
+
+	/* This has to be done with the handler lock taken. */
+	mutex_lock(hdl->lock);
+
+	/* First zero the helper field in the master control references */
+	for (i = 0; i < cs->count; i++)
+		helpers[i].mref->helper = NULL;
+	for (i = 0, h = helpers; i < cs->count; i++, h++) {
+		struct v4l2_ctrl_ref *mref = h->mref;
+
+		/*
+		 * If the mref->helper is set, then it points to an earlier
+		 * helper that belongs to the same cluster.
+		 */
+		if (mref->helper) {
+			/*
+			 * Set the next field of mref->helper to the current
+			 * index: this means that the earlier helper now
+			 * points to the next helper in the same cluster.
+			 */
+			mref->helper->next = i;
+			/*
+			 * mref should be set only for the first helper in the
+			 * cluster, clear the others.
+			 */
+			h->mref = NULL;
+		}
+		/* Point the mref helper to the current helper struct. */
+		mref->helper = h;
+	}
+	mutex_unlock(hdl->lock);
+	return 0;
+}
+
+/*
+ * Handles the corner case where cs->count == 0. It checks whether the
+ * specified control class exists. If that class ID is 0, then it checks
+ * whether there are any controls at all.
+ */
+static int class_check(struct v4l2_ctrl_handler *hdl, u32 which)
+{
+	if (which == 0 || which == V4L2_CTRL_WHICH_DEF_VAL ||
+	    which == V4L2_CTRL_WHICH_REQUEST_VAL)
+		return 0;
+	return find_ref_lock(hdl, which | 1) ? 0 : -EINVAL;
+}
+
+/*
+ * Get extended controls. Allocates the helpers array if needed.
+ *
+ * Note that v4l2_g_ext_ctrls_common() with 'which' set to
+ * V4L2_CTRL_WHICH_REQUEST_VAL is only called if the request was
+ * completed, and in that case valid_p_req is true for all controls.
+ */
+int v4l2_g_ext_ctrls_common(struct v4l2_ctrl_handler *hdl,
+			    struct v4l2_ext_controls *cs,
+			    struct video_device *vdev)
+{
+	struct v4l2_ctrl_helper helper[4];
+	struct v4l2_ctrl_helper *helpers = helper;
+	int ret;
+	int i, j;
+	bool is_default, is_request;
+
+	is_default = (cs->which == V4L2_CTRL_WHICH_DEF_VAL);
+	is_request = (cs->which == V4L2_CTRL_WHICH_REQUEST_VAL);
+
+	cs->error_idx = cs->count;
+	cs->which = V4L2_CTRL_ID2WHICH(cs->which);
+
+	if (!hdl)
+		return -EINVAL;
+
+	if (cs->count == 0)
+		return class_check(hdl, cs->which);
+
+	if (cs->count > ARRAY_SIZE(helper)) {
+		helpers = kvmalloc_array(cs->count, sizeof(helper[0]),
+					 GFP_KERNEL);
+		if (!helpers)
+			return -ENOMEM;
+	}
+
+	ret = prepare_ext_ctrls(hdl, cs, helpers, vdev, true);
+	cs->error_idx = cs->count;
+
+	for (i = 0; !ret && i < cs->count; i++)
+		if (helpers[i].ref->ctrl->flags & V4L2_CTRL_FLAG_WRITE_ONLY)
+			ret = -EACCES;
+
+	for (i = 0; !ret && i < cs->count; i++) {
+		struct v4l2_ctrl *master;
+		bool is_volatile = false;
+		u32 idx = i;
+
+		if (!helpers[i].mref)
+			continue;
+
+		master = helpers[i].mref->ctrl;
+		cs->error_idx = i;
+
+		v4l2_ctrl_lock(master);
+
+		/*
+		 * g_volatile_ctrl will update the new control values.
+		 * This makes no sense for V4L2_CTRL_WHICH_DEF_VAL and
+		 * V4L2_CTRL_WHICH_REQUEST_VAL. In the case of requests
+		 * it is v4l2_ctrl_request_complete() that copies the
+		 * volatile controls at the time of request completion
+		 * to the request, so you don't want to do that again.
+		 */
+		if (!is_default && !is_request &&
+		    ((master->flags & V4L2_CTRL_FLAG_VOLATILE) ||
+		    (master->has_volatiles && !is_cur_manual(master)))) {
+			for (j = 0; j < master->ncontrols; j++)
+				cur_to_new(master->cluster[j]);
+			ret = call_op(master, g_volatile_ctrl);
+			is_volatile = true;
+		}
+
+		if (ret) {
+			v4l2_ctrl_unlock(master);
+			break;
+		}
+
+		/*
+		 * Copy the default value (if is_default is true), the
+		 * request value (if is_request is true and p_req is valid),
+		 * the new volatile value (if is_volatile is true) or the
+		 * current value.
+		 */
+		do {
+			struct v4l2_ctrl_ref *ref = helpers[idx].ref;
+
+			if (is_default)
+				ret = def_to_user(cs->controls + idx, ref->ctrl);
+			else if (is_request && ref->valid_p_req)
+				ret = req_to_user(cs->controls + idx, ref);
+			else if (is_volatile)
+				ret = new_to_user(cs->controls + idx, ref->ctrl);
+			else
+				ret = cur_to_user(cs->controls + idx, ref->ctrl);
+			idx = helpers[idx].next;
+		} while (!ret && idx);
+
+		v4l2_ctrl_unlock(master);
+	}
+
+	if (cs->count > ARRAY_SIZE(helper))
+		kvfree(helpers);
+	return ret;
+}
+
+int v4l2_g_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct video_device *vdev,
+		     struct media_device *mdev, struct v4l2_ext_controls *cs)
+{
+	if (cs->which == V4L2_CTRL_WHICH_REQUEST_VAL)
+		return v4l2_g_ext_ctrls_request(hdl, vdev, mdev, cs);
+
+	return v4l2_g_ext_ctrls_common(hdl, cs, vdev);
+}
+EXPORT_SYMBOL(v4l2_g_ext_ctrls);
+
+/* Validate controls. */
+static int validate_ctrls(struct v4l2_ext_controls *cs,
+			  struct v4l2_ctrl_helper *helpers,
+			  struct video_device *vdev,
+			  bool set)
+{
+	unsigned int i;
+	int ret = 0;
+
+	cs->error_idx = cs->count;
+	for (i = 0; i < cs->count; i++) {
+		struct v4l2_ctrl *ctrl = helpers[i].ref->ctrl;
+		union v4l2_ctrl_ptr p_new;
+
+		cs->error_idx = i;
+
+		if (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY) {
+			dprintk(vdev,
+				"control id 0x%x is read-only\n",
+				ctrl->id);
+			return -EACCES;
+		}
+		/*
+		 * This test is also done in try_set_control_cluster() which
+		 * is called in atomic context, so that has the final say,
+		 * but it makes sense to do an up-front check as well. Once
+		 * an error occurs in try_set_control_cluster() some other
+		 * controls may have been set already and we want to do a
+		 * best-effort to avoid that.
+		 */
+		if (set && (ctrl->flags & V4L2_CTRL_FLAG_GRABBED)) {
+			dprintk(vdev,
+				"control id 0x%x is grabbed, cannot set\n",
+				ctrl->id);
+			return -EBUSY;
+		}
+		/*
+		 * Skip validation for now if the payload needs to be copied
+		 * from userspace into kernelspace. We'll validate those later.
+		 */
+		if (ctrl->is_ptr)
+			continue;
+		if (ctrl->type == V4L2_CTRL_TYPE_INTEGER64)
+			p_new.p_s64 = &cs->controls[i].value64;
+		else
+			p_new.p_s32 = &cs->controls[i].value;
+		ret = validate_new(ctrl, p_new);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+/* Try or try-and-set controls */
+int try_set_ext_ctrls_common(struct v4l2_fh *fh,
+			     struct v4l2_ctrl_handler *hdl,
+			     struct v4l2_ext_controls *cs,
+			     struct video_device *vdev, bool set)
+{
+	struct v4l2_ctrl_helper helper[4];
+	struct v4l2_ctrl_helper *helpers = helper;
+	unsigned int i, j;
+	int ret;
+
+	cs->error_idx = cs->count;
+
+	/* Default value cannot be changed */
+	if (cs->which == V4L2_CTRL_WHICH_DEF_VAL) {
+		dprintk(vdev, "%s: cannot change default value\n",
+			video_device_node_name(vdev));
+		return -EINVAL;
+	}
+
+	cs->which = V4L2_CTRL_ID2WHICH(cs->which);
+
+	if (!hdl) {
+		dprintk(vdev, "%s: invalid null control handler\n",
+			video_device_node_name(vdev));
+		return -EINVAL;
+	}
+
+	if (cs->count == 0)
+		return class_check(hdl, cs->which);
+
+	if (cs->count > ARRAY_SIZE(helper)) {
+		helpers = kvmalloc_array(cs->count, sizeof(helper[0]),
+					 GFP_KERNEL);
+		if (!helpers)
+			return -ENOMEM;
+	}
+	ret = prepare_ext_ctrls(hdl, cs, helpers, vdev, false);
+	if (!ret)
+		ret = validate_ctrls(cs, helpers, vdev, set);
+	if (ret && set)
+		cs->error_idx = cs->count;
+	for (i = 0; !ret && i < cs->count; i++) {
+		struct v4l2_ctrl *master;
+		u32 idx = i;
+
+		if (!helpers[i].mref)
+			continue;
+
+		cs->error_idx = i;
+		master = helpers[i].mref->ctrl;
+		v4l2_ctrl_lock(master);
+
+		/* Reset the 'is_new' flags of the cluster */
+		for (j = 0; j < master->ncontrols; j++)
+			if (master->cluster[j])
+				master->cluster[j]->is_new = 0;
+
+		/*
+		 * For volatile autoclusters that are currently in auto mode
+		 * we need to discover if it will be set to manual mode.
+		 * If so, then we have to copy the current volatile values
+		 * first since those will become the new manual values (which
+		 * may be overwritten by explicit new values from this set
+		 * of controls).
+		 */
+		if (master->is_auto && master->has_volatiles &&
+		    !is_cur_manual(master)) {
+			/* Pick an initial non-manual value */
+			s32 new_auto_val = master->manual_mode_value + 1;
+			u32 tmp_idx = idx;
+
+			do {
+				/*
+				 * Check if the auto control is part of the
+				 * list, and remember the new value.
+				 */
+				if (helpers[tmp_idx].ref->ctrl == master)
+					new_auto_val = cs->controls[tmp_idx].value;
+				tmp_idx = helpers[tmp_idx].next;
+			} while (tmp_idx);
+			/*
+			 * If the new value == the manual value, then copy
+			 * the current volatile values.
+			 */
+			if (new_auto_val == master->manual_mode_value)
+				update_from_auto_cluster(master);
+		}
+
+		/*
+		 * Copy the new caller-supplied control values.
+		 * user_to_new() sets 'is_new' to 1.
+		 */
+		do {
+			struct v4l2_ctrl *ctrl = helpers[idx].ref->ctrl;
+
+			ret = user_to_new(cs->controls + idx, ctrl);
+			if (!ret && ctrl->is_ptr) {
+				ret = validate_new(ctrl, ctrl->p_new);
+				if (ret)
+					dprintk(vdev,
+						"failed to validate control %s (%d)\n",
+						v4l2_ctrl_get_name(ctrl->id), ret);
+			}
+			idx = helpers[idx].next;
+		} while (!ret && idx);
+
+		if (!ret)
+			ret = try_or_set_cluster(fh, master,
+						 !hdl->req_obj.req && set, 0);
+		if (!ret && hdl->req_obj.req && set) {
+			for (j = 0; j < master->ncontrols; j++) {
+				struct v4l2_ctrl_ref *ref =
+					find_ref(hdl, master->cluster[j]->id);
+
+				new_to_req(ref);
+			}
+		}
+
+		/* Copy the new values back to userspace. */
+		if (!ret) {
+			idx = i;
+			do {
+				ret = new_to_user(cs->controls + idx,
+						  helpers[idx].ref->ctrl);
+				idx = helpers[idx].next;
+			} while (!ret && idx);
+		}
+		v4l2_ctrl_unlock(master);
+	}
+
+	if (cs->count > ARRAY_SIZE(helper))
+		kvfree(helpers);
+	return ret;
+}
+
+static int try_set_ext_ctrls(struct v4l2_fh *fh,
+			     struct v4l2_ctrl_handler *hdl,
+			     struct video_device *vdev,
+			     struct media_device *mdev,
+			     struct v4l2_ext_controls *cs, bool set)
+{
+	int ret;
+
+	if (cs->which == V4L2_CTRL_WHICH_REQUEST_VAL)
+		return try_set_ext_ctrls_request(fh, hdl, vdev, mdev, cs, set);
+
+	ret = try_set_ext_ctrls_common(fh, hdl, cs, vdev, set);
+	if (ret)
+		dprintk(vdev,
+			"%s: try_set_ext_ctrls_common failed (%d)\n",
+			video_device_node_name(vdev), ret);
+
+	return ret;
+}
+
+int v4l2_try_ext_ctrls(struct v4l2_ctrl_handler *hdl,
+		       struct video_device *vdev,
+		       struct media_device *mdev,
+		       struct v4l2_ext_controls *cs)
+{
+	return try_set_ext_ctrls(NULL, hdl, vdev, mdev, cs, false);
+}
+EXPORT_SYMBOL(v4l2_try_ext_ctrls);
+
+int v4l2_s_ext_ctrls(struct v4l2_fh *fh,
+		     struct v4l2_ctrl_handler *hdl,
+		     struct video_device *vdev,
+		     struct media_device *mdev,
+		     struct v4l2_ext_controls *cs)
+{
+	return try_set_ext_ctrls(fh, hdl, vdev, mdev, cs, true);
+}
+EXPORT_SYMBOL(v4l2_s_ext_ctrls);
+
+/*
+ * VIDIOC_G/S_CTRL implementation
+ */
+
+/* Helper function to get a single control */
+static int get_ctrl(struct v4l2_ctrl *ctrl, struct v4l2_ext_control *c)
+{
+	struct v4l2_ctrl *master = ctrl->cluster[0];
+	int ret = 0;
+	int i;
+
+	/* Compound controls are not supported. The new_to_user() and
+	 * cur_to_user() calls below would need to be modified not to access
+	 * userspace memory when called from get_ctrl().
+	 */
+	if (!ctrl->is_int && ctrl->type != V4L2_CTRL_TYPE_INTEGER64)
+		return -EINVAL;
+
+	if (ctrl->flags & V4L2_CTRL_FLAG_WRITE_ONLY)
+		return -EACCES;
+
+	v4l2_ctrl_lock(master);
+	/* g_volatile_ctrl will update the current control values */
+	if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE) {
+		for (i = 0; i < master->ncontrols; i++)
+			cur_to_new(master->cluster[i]);
+		ret = call_op(master, g_volatile_ctrl);
+		new_to_user(c, ctrl);
+	} else {
+		cur_to_user(c, ctrl);
+	}
+	v4l2_ctrl_unlock(master);
+	return ret;
+}
+
+int v4l2_g_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_control *control)
+{
+	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, control->id);
+	struct v4l2_ext_control c;
+	int ret;
+
+	if (!ctrl || !ctrl->is_int)
+		return -EINVAL;
+	ret = get_ctrl(ctrl, &c);
+	control->value = c.value;
+	return ret;
+}
+EXPORT_SYMBOL(v4l2_g_ctrl);
+
+/* Helper function for VIDIOC_S_CTRL compatibility */
+static int set_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 ch_flags)
+{
+	struct v4l2_ctrl *master = ctrl->cluster[0];
+	int ret;
+	int i;
+
+	/* Reset the 'is_new' flags of the cluster */
+	for (i = 0; i < master->ncontrols; i++)
+		if (master->cluster[i])
+			master->cluster[i]->is_new = 0;
+
+	ret = validate_new(ctrl, ctrl->p_new);
+	if (ret)
+		return ret;
+
+	/*
+	 * For autoclusters with volatiles that are switched from auto to
+	 * manual mode we have to update the current volatile values since
+	 * those will become the initial manual values after such a switch.
+	 */
+	if (master->is_auto && master->has_volatiles && ctrl == master &&
+	    !is_cur_manual(master) && ctrl->val == master->manual_mode_value)
+		update_from_auto_cluster(master);
+
+	ctrl->is_new = 1;
+	return try_or_set_cluster(fh, master, true, ch_flags);
+}
+
+/* Helper function for VIDIOC_S_CTRL compatibility */
+static int set_ctrl_lock(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl,
+			 struct v4l2_ext_control *c)
+{
+	int ret;
+
+	v4l2_ctrl_lock(ctrl);
+	user_to_new(c, ctrl);
+	ret = set_ctrl(fh, ctrl, 0);
+	if (!ret)
+		cur_to_user(c, ctrl);
+	v4l2_ctrl_unlock(ctrl);
+	return ret;
+}
+
+int v4l2_s_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl,
+		struct v4l2_control *control)
+{
+	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, control->id);
+	struct v4l2_ext_control c = { control->id };
+	int ret;
+
+	if (!ctrl || !ctrl->is_int)
+		return -EINVAL;
+
+	if (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY)
+		return -EACCES;
+
+	c.value = control->value;
+	ret = set_ctrl_lock(fh, ctrl, &c);
+	control->value = c.value;
+	return ret;
+}
+EXPORT_SYMBOL(v4l2_s_ctrl);
+
+/*
+ * Helper functions for drivers to get/set controls.
+ */
+
+s32 v4l2_ctrl_g_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct v4l2_ext_control c;
+
+	/* It's a driver bug if this happens. */
+	if (WARN_ON(!ctrl->is_int))
+		return 0;
+	c.value = 0;
+	get_ctrl(ctrl, &c);
+	return c.value;
+}
+EXPORT_SYMBOL(v4l2_ctrl_g_ctrl);
+
+s64 v4l2_ctrl_g_ctrl_int64(struct v4l2_ctrl *ctrl)
+{
+	struct v4l2_ext_control c;
+
+	/* It's a driver bug if this happens. */
+	if (WARN_ON(ctrl->is_ptr || ctrl->type != V4L2_CTRL_TYPE_INTEGER64))
+		return 0;
+	c.value64 = 0;
+	get_ctrl(ctrl, &c);
+	return c.value64;
+}
+EXPORT_SYMBOL(v4l2_ctrl_g_ctrl_int64);
+
+int __v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val)
+{
+	lockdep_assert_held(ctrl->handler->lock);
+
+	/* It's a driver bug if this happens. */
+	if (WARN_ON(!ctrl->is_int))
+		return -EINVAL;
+	ctrl->val = val;
+	return set_ctrl(NULL, ctrl, 0);
+}
+EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl);
+
+int __v4l2_ctrl_s_ctrl_int64(struct v4l2_ctrl *ctrl, s64 val)
+{
+	lockdep_assert_held(ctrl->handler->lock);
+
+	/* It's a driver bug if this happens. */
+	if (WARN_ON(ctrl->is_ptr || ctrl->type != V4L2_CTRL_TYPE_INTEGER64))
+		return -EINVAL;
+	*ctrl->p_new.p_s64 = val;
+	return set_ctrl(NULL, ctrl, 0);
+}
+EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl_int64);
+
+int __v4l2_ctrl_s_ctrl_string(struct v4l2_ctrl *ctrl, const char *s)
+{
+	lockdep_assert_held(ctrl->handler->lock);
+
+	/* It's a driver bug if this happens. */
+	if (WARN_ON(ctrl->type != V4L2_CTRL_TYPE_STRING))
+		return -EINVAL;
+	strscpy(ctrl->p_new.p_char, s, ctrl->maximum + 1);
+	return set_ctrl(NULL, ctrl, 0);
+}
+EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl_string);
+
+int __v4l2_ctrl_s_ctrl_compound(struct v4l2_ctrl *ctrl,
+				enum v4l2_ctrl_type type, const void *p)
+{
+	lockdep_assert_held(ctrl->handler->lock);
+
+	/* It's a driver bug if this happens. */
+	if (WARN_ON(ctrl->type != type))
+		return -EINVAL;
+	memcpy(ctrl->p_new.p, p, ctrl->elems * ctrl->elem_size);
+	return set_ctrl(NULL, ctrl, 0);
+}
+EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl_compound);
+
+/*
+ * Modify the range of a control.
+ */
+int __v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl,
+			     s64 min, s64 max, u64 step, s64 def)
+{
+	bool value_changed;
+	bool range_changed = false;
+	int ret;
+
+	lockdep_assert_held(ctrl->handler->lock);
+
+	switch (ctrl->type) {
+	case V4L2_CTRL_TYPE_INTEGER:
+	case V4L2_CTRL_TYPE_INTEGER64:
+	case V4L2_CTRL_TYPE_BOOLEAN:
+	case V4L2_CTRL_TYPE_MENU:
+	case V4L2_CTRL_TYPE_INTEGER_MENU:
+	case V4L2_CTRL_TYPE_BITMASK:
+	case V4L2_CTRL_TYPE_U8:
+	case V4L2_CTRL_TYPE_U16:
+	case V4L2_CTRL_TYPE_U32:
+		if (ctrl->is_array)
+			return -EINVAL;
+		ret = check_range(ctrl->type, min, max, step, def);
+		if (ret)
+			return ret;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (ctrl->minimum != min || ctrl->maximum != max ||
+	    ctrl->step != step || ctrl->default_value != def) {
+		range_changed = true;
+		ctrl->minimum = min;
+		ctrl->maximum = max;
+		ctrl->step = step;
+		ctrl->default_value = def;
+	}
+	cur_to_new(ctrl);
+	if (validate_new(ctrl, ctrl->p_new)) {
+		if (ctrl->type == V4L2_CTRL_TYPE_INTEGER64)
+			*ctrl->p_new.p_s64 = def;
+		else
+			*ctrl->p_new.p_s32 = def;
+	}
+
+	if (ctrl->type == V4L2_CTRL_TYPE_INTEGER64)
+		value_changed = *ctrl->p_new.p_s64 != *ctrl->p_cur.p_s64;
+	else
+		value_changed = *ctrl->p_new.p_s32 != *ctrl->p_cur.p_s32;
+	if (value_changed)
+		ret = set_ctrl(NULL, ctrl, V4L2_EVENT_CTRL_CH_RANGE);
+	else if (range_changed)
+		send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_RANGE);
+	return ret;
+}
+EXPORT_SYMBOL(__v4l2_ctrl_modify_range);
+
+/* Implement VIDIOC_QUERY_EXT_CTRL */
+int v4l2_query_ext_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_query_ext_ctrl *qc)
+{
+	const unsigned int next_flags = V4L2_CTRL_FLAG_NEXT_CTRL | V4L2_CTRL_FLAG_NEXT_COMPOUND;
+	u32 id = qc->id & V4L2_CTRL_ID_MASK;
+	struct v4l2_ctrl_ref *ref;
+	struct v4l2_ctrl *ctrl;
+
+	if (!hdl)
+		return -EINVAL;
+
+	mutex_lock(hdl->lock);
+
+	/* Try to find it */
+	ref = find_ref(hdl, id);
+
+	if ((qc->id & next_flags) && !list_empty(&hdl->ctrl_refs)) {
+		bool is_compound;
+		/* Match any control that is not hidden */
+		unsigned int mask = 1;
+		bool match = false;
+
+		if ((qc->id & next_flags) == V4L2_CTRL_FLAG_NEXT_COMPOUND) {
+			/* Match any hidden control */
+			match = true;
+		} else if ((qc->id & next_flags) == next_flags) {
+			/* Match any control, compound or not */
+			mask = 0;
+		}
+
+		/* Find the next control with ID > qc->id */
+
+		/* Did we reach the end of the control list? */
+		if (id >= node2id(hdl->ctrl_refs.prev)) {
+			ref = NULL; /* Yes, so there is no next control */
+		} else if (ref) {
+			/*
+			 * We found a control with the given ID, so just get
+			 * the next valid one in the list.
+			 */
+			list_for_each_entry_continue(ref, &hdl->ctrl_refs, node) {
+				is_compound = ref->ctrl->is_array ||
+					ref->ctrl->type >= V4L2_CTRL_COMPOUND_TYPES;
+				if (id < ref->ctrl->id &&
+				    (is_compound & mask) == match)
+					break;
+			}
+			if (&ref->node == &hdl->ctrl_refs)
+				ref = NULL;
+		} else {
+			/*
+			 * No control with the given ID exists, so start
+			 * searching for the next largest ID. We know there
+			 * is one, otherwise the first 'if' above would have
+			 * been true.
+			 */
+			list_for_each_entry(ref, &hdl->ctrl_refs, node) {
+				is_compound = ref->ctrl->is_array ||
+					ref->ctrl->type >= V4L2_CTRL_COMPOUND_TYPES;
+				if (id < ref->ctrl->id &&
+				    (is_compound & mask) == match)
+					break;
+			}
+			if (&ref->node == &hdl->ctrl_refs)
+				ref = NULL;
+		}
+	}
+	mutex_unlock(hdl->lock);
+
+	if (!ref)
+		return -EINVAL;
+
+	ctrl = ref->ctrl;
+	memset(qc, 0, sizeof(*qc));
+	if (id >= V4L2_CID_PRIVATE_BASE)
+		qc->id = id;
+	else
+		qc->id = ctrl->id;
+	strscpy(qc->name, ctrl->name, sizeof(qc->name));
+	qc->flags = user_flags(ctrl);
+	qc->type = ctrl->type;
+	qc->elem_size = ctrl->elem_size;
+	qc->elems = ctrl->elems;
+	qc->nr_of_dims = ctrl->nr_of_dims;
+	memcpy(qc->dims, ctrl->dims, qc->nr_of_dims * sizeof(qc->dims[0]));
+	qc->minimum = ctrl->minimum;
+	qc->maximum = ctrl->maximum;
+	qc->default_value = ctrl->default_value;
+	if (ctrl->type == V4L2_CTRL_TYPE_MENU ||
+	    ctrl->type == V4L2_CTRL_TYPE_INTEGER_MENU)
+		qc->step = 1;
+	else
+		qc->step = ctrl->step;
+	return 0;
+}
+EXPORT_SYMBOL(v4l2_query_ext_ctrl);
+
+/* Implement VIDIOC_QUERYCTRL */
+int v4l2_queryctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_queryctrl *qc)
+{
+	struct v4l2_query_ext_ctrl qec = { qc->id };
+	int rc;
+
+	rc = v4l2_query_ext_ctrl(hdl, &qec);
+	if (rc)
+		return rc;
+
+	qc->id = qec.id;
+	qc->type = qec.type;
+	qc->flags = qec.flags;
+	strscpy(qc->name, qec.name, sizeof(qc->name));
+	switch (qc->type) {
+	case V4L2_CTRL_TYPE_INTEGER:
+	case V4L2_CTRL_TYPE_BOOLEAN:
+	case V4L2_CTRL_TYPE_MENU:
+	case V4L2_CTRL_TYPE_INTEGER_MENU:
+	case V4L2_CTRL_TYPE_STRING:
+	case V4L2_CTRL_TYPE_BITMASK:
+		qc->minimum = qec.minimum;
+		qc->maximum = qec.maximum;
+		qc->step = qec.step;
+		qc->default_value = qec.default_value;
+		break;
+	default:
+		qc->minimum = 0;
+		qc->maximum = 0;
+		qc->step = 0;
+		qc->default_value = 0;
+		break;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(v4l2_queryctrl);
+
+/* Implement VIDIOC_QUERYMENU */
+int v4l2_querymenu(struct v4l2_ctrl_handler *hdl, struct v4l2_querymenu *qm)
+{
+	struct v4l2_ctrl *ctrl;
+	u32 i = qm->index;
+
+	ctrl = v4l2_ctrl_find(hdl, qm->id);
+	if (!ctrl)
+		return -EINVAL;
+
+	qm->reserved = 0;
+	/* Sanity checks */
+	switch (ctrl->type) {
+	case V4L2_CTRL_TYPE_MENU:
+		if (!ctrl->qmenu)
+			return -EINVAL;
+		break;
+	case V4L2_CTRL_TYPE_INTEGER_MENU:
+		if (!ctrl->qmenu_int)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (i < ctrl->minimum || i > ctrl->maximum)
+		return -EINVAL;
+
+	/* Use mask to see if this menu item should be skipped */
+	if (ctrl->menu_skip_mask & (1ULL << i))
+		return -EINVAL;
+	/* Empty menu items should also be skipped */
+	if (ctrl->type == V4L2_CTRL_TYPE_MENU) {
+		if (!ctrl->qmenu[i] || ctrl->qmenu[i][0] == '\0')
+			return -EINVAL;
+		strscpy(qm->name, ctrl->qmenu[i], sizeof(qm->name));
+	} else {
+		qm->value = ctrl->qmenu_int[i];
+	}
+	return 0;
+}
+EXPORT_SYMBOL(v4l2_querymenu);
+
+/*
+ * VIDIOC_LOG_STATUS helpers
+ */
+
+int v4l2_ctrl_log_status(struct file *file, void *fh)
+{
+	struct video_device *vfd = video_devdata(file);
+	struct v4l2_fh *vfh = file->private_data;
+
+	if (test_bit(V4L2_FL_USES_V4L2_FH, &vfd->flags) && vfd->v4l2_dev)
+		v4l2_ctrl_handler_log_status(vfh->ctrl_handler,
+					     vfd->v4l2_dev->name);
+	return 0;
+}
+EXPORT_SYMBOL(v4l2_ctrl_log_status);
+
+int v4l2_ctrl_subdev_log_status(struct v4l2_subdev *sd)
+{
+	v4l2_ctrl_handler_log_status(sd->ctrl_handler, sd->name);
+	return 0;
+}
+EXPORT_SYMBOL(v4l2_ctrl_subdev_log_status);
+
+/*
+ * VIDIOC_(UN)SUBSCRIBE_EVENT implementation
+ */
+
+static int v4l2_ctrl_add_event(struct v4l2_subscribed_event *sev,
+			       unsigned int elems)
+{
+	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(sev->fh->ctrl_handler, sev->id);
+
+	if (!ctrl)
+		return -EINVAL;
+
+	v4l2_ctrl_lock(ctrl);
+	list_add_tail(&sev->node, &ctrl->ev_subs);
+	if (ctrl->type != V4L2_CTRL_TYPE_CTRL_CLASS &&
+	    (sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL))
+		send_initial_event(sev->fh, ctrl);
+	v4l2_ctrl_unlock(ctrl);
+	return 0;
+}
+
+static void v4l2_ctrl_del_event(struct v4l2_subscribed_event *sev)
+{
+	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(sev->fh->ctrl_handler, sev->id);
+
+	if (!ctrl)
+		return;
+
+	v4l2_ctrl_lock(ctrl);
+	list_del(&sev->node);
+	v4l2_ctrl_unlock(ctrl);
+}
+
+void v4l2_ctrl_replace(struct v4l2_event *old, const struct v4l2_event *new)
+{
+	u32 old_changes = old->u.ctrl.changes;
+
+	old->u.ctrl = new->u.ctrl;
+	old->u.ctrl.changes |= old_changes;
+}
+EXPORT_SYMBOL(v4l2_ctrl_replace);
+
+void v4l2_ctrl_merge(const struct v4l2_event *old, struct v4l2_event *new)
+{
+	new->u.ctrl.changes |= old->u.ctrl.changes;
+}
+EXPORT_SYMBOL(v4l2_ctrl_merge);
+
+const struct v4l2_subscribed_event_ops v4l2_ctrl_sub_ev_ops = {
+	.add = v4l2_ctrl_add_event,
+	.del = v4l2_ctrl_del_event,
+	.replace = v4l2_ctrl_replace,
+	.merge = v4l2_ctrl_merge,
+};
+EXPORT_SYMBOL(v4l2_ctrl_sub_ev_ops);
+
+int v4l2_ctrl_subscribe_event(struct v4l2_fh *fh,
+			      const struct v4l2_event_subscription *sub)
+{
+	if (sub->type == V4L2_EVENT_CTRL)
+		return v4l2_event_subscribe(fh, sub, 0, &v4l2_ctrl_sub_ev_ops);
+	return -EINVAL;
+}
+EXPORT_SYMBOL(v4l2_ctrl_subscribe_event);
+
+int v4l2_ctrl_subdev_subscribe_event(struct v4l2_subdev *sd, struct v4l2_fh *fh,
+				     struct v4l2_event_subscription *sub)
+{
+	if (!sd->ctrl_handler)
+		return -EINVAL;
+	return v4l2_ctrl_subscribe_event(fh, sub);
+}
+EXPORT_SYMBOL(v4l2_ctrl_subdev_subscribe_event);
+
+/*
+ * poll helper
+ */
+__poll_t v4l2_ctrl_poll(struct file *file, struct poll_table_struct *wait)
+{
+	struct v4l2_fh *fh = file->private_data;
+
+	poll_wait(file, &fh->wait, wait);
+	if (v4l2_event_pending(fh))
+		return EPOLLPRI;
+	return 0;
+}
+EXPORT_SYMBOL(v4l2_ctrl_poll);
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c b/drivers/media/v4l2-core/v4l2-ctrls-core.c
new file mode 100644
index 0000000000000..0814392243572
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c
@@ -0,0 +1,1939 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * V4L2 controls framework core implementation.
+ *
+ * Copyright (C) 2010-2021  Hans Verkuil <hverkuil-cisco@xs4all.nl>
+ */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-fwnode.h>
+
+#include "v4l2-ctrls-priv.h"
+
+static const union v4l2_ctrl_ptr ptr_null;
+
+static void fill_event(struct v4l2_event *ev, struct v4l2_ctrl *ctrl,
+		       u32 changes)
+{
+	memset(ev, 0, sizeof(*ev));
+	ev->type = V4L2_EVENT_CTRL;
+	ev->id = ctrl->id;
+	ev->u.ctrl.changes = changes;
+	ev->u.ctrl.type = ctrl->type;
+	ev->u.ctrl.flags = user_flags(ctrl);
+	if (ctrl->is_ptr)
+		ev->u.ctrl.value64 = 0;
+	else
+		ev->u.ctrl.value64 = *ctrl->p_cur.p_s64;
+	ev->u.ctrl.minimum = ctrl->minimum;
+	ev->u.ctrl.maximum = ctrl->maximum;
+	if (ctrl->type == V4L2_CTRL_TYPE_MENU
+	    || ctrl->type == V4L2_CTRL_TYPE_INTEGER_MENU)
+		ev->u.ctrl.step = 1;
+	else
+		ev->u.ctrl.step = ctrl->step;
+	ev->u.ctrl.default_value = ctrl->default_value;
+}
+
+void send_initial_event(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl)
+{
+	struct v4l2_event ev;
+	u32 changes = V4L2_EVENT_CTRL_CH_FLAGS;
+
+	if (!(ctrl->flags & V4L2_CTRL_FLAG_WRITE_ONLY))
+		changes |= V4L2_EVENT_CTRL_CH_VALUE;
+	fill_event(&ev, ctrl, changes);
+	v4l2_event_queue_fh(fh, &ev);
+}
+
+void send_event(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 changes)
+{
+	struct v4l2_event ev;
+	struct v4l2_subscribed_event *sev;
+
+	if (list_empty(&ctrl->ev_subs))
+		return;
+	fill_event(&ev, ctrl, changes);
+
+	list_for_each_entry(sev, &ctrl->ev_subs, node)
+		if (sev->fh != fh ||
+		    (sev->flags & V4L2_EVENT_SUB_FL_ALLOW_FEEDBACK))
+			v4l2_event_queue_fh(sev->fh, &ev);
+}
+
+static bool std_equal(const struct v4l2_ctrl *ctrl, u32 idx,
+		      union v4l2_ctrl_ptr ptr1,
+		      union v4l2_ctrl_ptr ptr2)
+{
+	switch (ctrl->type) {
+	case V4L2_CTRL_TYPE_BUTTON:
+		return false;
+	case V4L2_CTRL_TYPE_STRING:
+		idx *= ctrl->elem_size;
+		/* strings are always 0-terminated */
+		return !strcmp(ptr1.p_char + idx, ptr2.p_char + idx);
+	case V4L2_CTRL_TYPE_INTEGER64:
+		return ptr1.p_s64[idx] == ptr2.p_s64[idx];
+	case V4L2_CTRL_TYPE_U8:
+		return ptr1.p_u8[idx] == ptr2.p_u8[idx];
+	case V4L2_CTRL_TYPE_U16:
+		return ptr1.p_u16[idx] == ptr2.p_u16[idx];
+	case V4L2_CTRL_TYPE_U32:
+		return ptr1.p_u32[idx] == ptr2.p_u32[idx];
+	default:
+		if (ctrl->is_int)
+			return ptr1.p_s32[idx] == ptr2.p_s32[idx];
+		idx *= ctrl->elem_size;
+		return !memcmp(ptr1.p_const + idx, ptr2.p_const + idx,
+			       ctrl->elem_size);
+	}
+}
+
+/* Default intra MPEG-2 quantisation coefficients, from the specification. */
+static const u8 mpeg2_intra_quant_matrix[64] = {
+	8,  16, 16, 19, 16, 19, 22, 22,
+	22, 22, 22, 22, 26, 24, 26, 27,
+	27, 27, 26, 26, 26, 26, 27, 27,
+	27, 29, 29, 29, 34, 34, 34, 29,
+	29, 29, 27, 27, 29, 29, 32, 32,
+	34, 34, 37, 38, 37, 35, 35, 34,
+	35, 38, 38, 40, 40, 40, 48, 48,
+	46, 46, 56, 56, 58, 69, 69, 83
+};
+
+static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
+			      union v4l2_ctrl_ptr ptr)
+{
+	struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
+	struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
+	struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quant;
+	struct v4l2_ctrl_vp8_frame *p_vp8_frame;
+	struct v4l2_ctrl_fwht_params *p_fwht_params;
+	void *p = ptr.p + idx * ctrl->elem_size;
+
+	if (ctrl->p_def.p_const)
+		memcpy(p, ctrl->p_def.p_const, ctrl->elem_size);
+	else
+		memset(p, 0, ctrl->elem_size);
+
+	switch ((u32)ctrl->type) {
+	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
+		p_mpeg2_sequence = p;
+
+		/* 4:2:0 */
+		p_mpeg2_sequence->chroma_format = 1;
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
+		p_mpeg2_picture = p;
+
+		/* interlaced top field */
+		p_mpeg2_picture->picture_structure = V4L2_MPEG2_PIC_TOP_FIELD;
+		p_mpeg2_picture->picture_coding_type =
+					V4L2_MPEG2_PIC_CODING_TYPE_I;
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
+		p_mpeg2_quant = p;
+
+		memcpy(p_mpeg2_quant->intra_quantiser_matrix,
+		       mpeg2_intra_quant_matrix,
+		       ARRAY_SIZE(mpeg2_intra_quant_matrix));
+		/*
+		 * The default non-intra MPEG-2 quantisation
+		 * coefficients are all 16, as per the specification.
+		 */
+		memset(p_mpeg2_quant->non_intra_quantiser_matrix, 16,
+		       sizeof(p_mpeg2_quant->non_intra_quantiser_matrix));
+		break;
+	case V4L2_CTRL_TYPE_VP8_FRAME:
+		p_vp8_frame = p;
+		p_vp8_frame->num_dct_parts = 1;
+		break;
+	case V4L2_CTRL_TYPE_FWHT_PARAMS:
+		p_fwht_params = p;
+		p_fwht_params->version = V4L2_FWHT_VERSION;
+		p_fwht_params->width = 1280;
+		p_fwht_params->height = 720;
+		p_fwht_params->flags = V4L2_FWHT_FL_PIXENC_YUV |
+			(2 << V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET);
+		break;
+	}
+}
+
+static void std_init(const struct v4l2_ctrl *ctrl, u32 idx,
+		     union v4l2_ctrl_ptr ptr)
+{
+	switch (ctrl->type) {
+	case V4L2_CTRL_TYPE_STRING:
+		idx *= ctrl->elem_size;
+		memset(ptr.p_char + idx, ' ', ctrl->minimum);
+		ptr.p_char[idx + ctrl->minimum] = '\0';
+		break;
+	case V4L2_CTRL_TYPE_INTEGER64:
+		ptr.p_s64[idx] = ctrl->default_value;
+		break;
+	case V4L2_CTRL_TYPE_INTEGER:
+	case V4L2_CTRL_TYPE_INTEGER_MENU:
+	case V4L2_CTRL_TYPE_MENU:
+	case V4L2_CTRL_TYPE_BITMASK:
+	case V4L2_CTRL_TYPE_BOOLEAN:
+		ptr.p_s32[idx] = ctrl->default_value;
+		break;
+	case V4L2_CTRL_TYPE_BUTTON:
+	case V4L2_CTRL_TYPE_CTRL_CLASS:
+		ptr.p_s32[idx] = 0;
+		break;
+	case V4L2_CTRL_TYPE_U8:
+		ptr.p_u8[idx] = ctrl->default_value;
+		break;
+	case V4L2_CTRL_TYPE_U16:
+		ptr.p_u16[idx] = ctrl->default_value;
+		break;
+	case V4L2_CTRL_TYPE_U32:
+		ptr.p_u32[idx] = ctrl->default_value;
+		break;
+	default:
+		std_init_compound(ctrl, idx, ptr);
+		break;
+	}
+}
+
+static void std_log(const struct v4l2_ctrl *ctrl)
+{
+	union v4l2_ctrl_ptr ptr = ctrl->p_cur;
+
+	if (ctrl->is_array) {
+		unsigned i;
+
+		for (i = 0; i < ctrl->nr_of_dims; i++)
+			pr_cont("[%u]", ctrl->dims[i]);
+		pr_cont(" ");
+	}
+
+	switch (ctrl->type) {
+	case V4L2_CTRL_TYPE_INTEGER:
+		pr_cont("%d", *ptr.p_s32);
+		break;
+	case V4L2_CTRL_TYPE_BOOLEAN:
+		pr_cont("%s", *ptr.p_s32 ? "true" : "false");
+		break;
+	case V4L2_CTRL_TYPE_MENU:
+		pr_cont("%s", ctrl->qmenu[*ptr.p_s32]);
+		break;
+	case V4L2_CTRL_TYPE_INTEGER_MENU:
+		pr_cont("%lld", ctrl->qmenu_int[*ptr.p_s32]);
+		break;
+	case V4L2_CTRL_TYPE_BITMASK:
+		pr_cont("0x%08x", *ptr.p_s32);
+		break;
+	case V4L2_CTRL_TYPE_INTEGER64:
+		pr_cont("%lld", *ptr.p_s64);
+		break;
+	case V4L2_CTRL_TYPE_STRING:
+		pr_cont("%s", ptr.p_char);
+		break;
+	case V4L2_CTRL_TYPE_U8:
+		pr_cont("%u", (unsigned)*ptr.p_u8);
+		break;
+	case V4L2_CTRL_TYPE_U16:
+		pr_cont("%u", (unsigned)*ptr.p_u16);
+		break;
+	case V4L2_CTRL_TYPE_U32:
+		pr_cont("%u", (unsigned)*ptr.p_u32);
+		break;
+	case V4L2_CTRL_TYPE_H264_SPS:
+		pr_cont("H264_SPS");
+		break;
+	case V4L2_CTRL_TYPE_H264_PPS:
+		pr_cont("H264_PPS");
+		break;
+	case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
+		pr_cont("H264_SCALING_MATRIX");
+		break;
+	case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
+		pr_cont("H264_SLICE_PARAMS");
+		break;
+	case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
+		pr_cont("H264_DECODE_PARAMS");
+		break;
+	case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
+		pr_cont("H264_PRED_WEIGHTS");
+		break;
+	case V4L2_CTRL_TYPE_FWHT_PARAMS:
+		pr_cont("FWHT_PARAMS");
+		break;
+	case V4L2_CTRL_TYPE_VP8_FRAME:
+		pr_cont("VP8_FRAME");
+		break;
+	case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
+		pr_cont("HDR10_CLL_INFO");
+		break;
+	case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
+		pr_cont("HDR10_MASTERING_DISPLAY");
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
+		pr_cont("MPEG2_QUANTISATION");
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
+		pr_cont("MPEG2_SEQUENCE");
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
+		pr_cont("MPEG2_PICTURE");
+		break;
+	default:
+		pr_cont("unknown type %d", ctrl->type);
+		break;
+	}
+}
+
+/*
+ * Round towards the closest legal value. Be careful when we are
+ * close to the maximum range of the control type to prevent
+ * wrap-arounds.
+ */
+#define ROUND_TO_RANGE(val, offset_type, ctrl)			\
+({								\
+	offset_type offset;					\
+	if ((ctrl)->maximum >= 0 &&				\
+	    val >= (ctrl)->maximum - (s32)((ctrl)->step / 2))	\
+		val = (ctrl)->maximum;				\
+	else							\
+		val += (s32)((ctrl)->step / 2);			\
+	val = clamp_t(typeof(val), val,				\
+		      (ctrl)->minimum, (ctrl)->maximum);	\
+	offset = (val) - (ctrl)->minimum;			\
+	offset = (ctrl)->step * (offset / (u32)(ctrl)->step);	\
+	val = (ctrl)->minimum + offset;				\
+	0;							\
+})
+
+/* Validate a new control */
+
+#define zero_padding(s) \
+	memset(&(s).padding, 0, sizeof((s).padding))
+#define zero_reserved(s) \
+	memset(&(s).reserved, 0, sizeof((s).reserved))
+
+/*
+ * Compound controls validation requires setting unused fields/flags to zero
+ * in order to properly detect unchanged controls with std_equal's memcmp.
+ */
+static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
+				 union v4l2_ctrl_ptr ptr)
+{
+	struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
+	struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
+	struct v4l2_ctrl_vp8_frame *p_vp8_frame;
+	struct v4l2_ctrl_fwht_params *p_fwht_params;
+	struct v4l2_ctrl_h264_sps *p_h264_sps;
+	struct v4l2_ctrl_h264_pps *p_h264_pps;
+	struct v4l2_ctrl_h264_pred_weights *p_h264_pred_weights;
+	struct v4l2_ctrl_h264_slice_params *p_h264_slice_params;
+	struct v4l2_ctrl_h264_decode_params *p_h264_dec_params;
+	struct v4l2_ctrl_hevc_sps *p_hevc_sps;
+	struct v4l2_ctrl_hevc_pps *p_hevc_pps;
+	struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params;
+	struct v4l2_ctrl_hdr10_mastering_display *p_hdr10_mastering;
+	struct v4l2_area *area;
+	void *p = ptr.p + idx * ctrl->elem_size;
+	unsigned int i;
+
+	switch ((u32)ctrl->type) {
+	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
+		p_mpeg2_sequence = p;
+
+		switch (p_mpeg2_sequence->chroma_format) {
+		case 1: /* 4:2:0 */
+		case 2: /* 4:2:2 */
+		case 3: /* 4:4:4 */
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+
+	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
+		p_mpeg2_picture = p;
+
+		switch (p_mpeg2_picture->intra_dc_precision) {
+		case 0: /* 8 bits */
+		case 1: /* 9 bits */
+		case 2: /* 10 bits */
+		case 3: /* 11 bits */
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		switch (p_mpeg2_picture->picture_structure) {
+		case V4L2_MPEG2_PIC_TOP_FIELD:
+		case V4L2_MPEG2_PIC_BOTTOM_FIELD:
+		case V4L2_MPEG2_PIC_FRAME:
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		switch (p_mpeg2_picture->picture_coding_type) {
+		case V4L2_MPEG2_PIC_CODING_TYPE_I:
+		case V4L2_MPEG2_PIC_CODING_TYPE_P:
+		case V4L2_MPEG2_PIC_CODING_TYPE_B:
+			break;
+		default:
+			return -EINVAL;
+		}
+		zero_reserved(*p_mpeg2_picture);
+		break;
+
+	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
+		break;
+
+	case V4L2_CTRL_TYPE_FWHT_PARAMS:
+		p_fwht_params = p;
+		if (p_fwht_params->version < V4L2_FWHT_VERSION)
+			return -EINVAL;
+		if (!p_fwht_params->width || !p_fwht_params->height)
+			return -EINVAL;
+		break;
+
+	case V4L2_CTRL_TYPE_H264_SPS:
+		p_h264_sps = p;
+
+		/* Some syntax elements are only conditionally valid */
+		if (p_h264_sps->pic_order_cnt_type != 0) {
+			p_h264_sps->log2_max_pic_order_cnt_lsb_minus4 = 0;
+		} else if (p_h264_sps->pic_order_cnt_type != 1) {
+			p_h264_sps->num_ref_frames_in_pic_order_cnt_cycle = 0;
+			p_h264_sps->offset_for_non_ref_pic = 0;
+			p_h264_sps->offset_for_top_to_bottom_field = 0;
+			memset(&p_h264_sps->offset_for_ref_frame, 0,
+			       sizeof(p_h264_sps->offset_for_ref_frame));
+		}
+
+		if (!V4L2_H264_SPS_HAS_CHROMA_FORMAT(p_h264_sps)) {
+			p_h264_sps->chroma_format_idc = 1;
+			p_h264_sps->bit_depth_luma_minus8 = 0;
+			p_h264_sps->bit_depth_chroma_minus8 = 0;
+
+			p_h264_sps->flags &=
+				~V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS;
+
+			if (p_h264_sps->chroma_format_idc < 3)
+				p_h264_sps->flags &=
+					~V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE;
+		}
+
+		if (p_h264_sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
+			p_h264_sps->flags &=
+				~V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD;
+
+		/*
+		 * Chroma 4:2:2 format require at least High 4:2:2 profile.
+		 *
+		 * The H264 specification and well-known parser implementations
+		 * use profile-idc values directly, as that is clearer and
+		 * less ambiguous. We do the same here.
+		 */
+		if (p_h264_sps->profile_idc < 122 &&
+		    p_h264_sps->chroma_format_idc > 1)
+			return -EINVAL;
+		/* Chroma 4:4:4 format require at least High 4:2:2 profile */
+		if (p_h264_sps->profile_idc < 244 &&
+		    p_h264_sps->chroma_format_idc > 2)
+			return -EINVAL;
+		if (p_h264_sps->chroma_format_idc > 3)
+			return -EINVAL;
+
+		if (p_h264_sps->bit_depth_luma_minus8 > 6)
+			return -EINVAL;
+		if (p_h264_sps->bit_depth_chroma_minus8 > 6)
+			return -EINVAL;
+		if (p_h264_sps->log2_max_frame_num_minus4 > 12)
+			return -EINVAL;
+		if (p_h264_sps->pic_order_cnt_type > 2)
+			return -EINVAL;
+		if (p_h264_sps->log2_max_pic_order_cnt_lsb_minus4 > 12)
+			return -EINVAL;
+		if (p_h264_sps->max_num_ref_frames > V4L2_H264_REF_LIST_LEN)
+			return -EINVAL;
+		break;
+
+	case V4L2_CTRL_TYPE_H264_PPS:
+		p_h264_pps = p;
+
+		if (p_h264_pps->num_slice_groups_minus1 > 7)
+			return -EINVAL;
+		if (p_h264_pps->num_ref_idx_l0_default_active_minus1 >
+		    (V4L2_H264_REF_LIST_LEN - 1))
+			return -EINVAL;
+		if (p_h264_pps->num_ref_idx_l1_default_active_minus1 >
+		    (V4L2_H264_REF_LIST_LEN - 1))
+			return -EINVAL;
+		if (p_h264_pps->weighted_bipred_idc > 2)
+			return -EINVAL;
+		/*
+		 * pic_init_qp_minus26 shall be in the range of
+		 * -(26 + QpBdOffset_y) to +25, inclusive,
+		 *  where QpBdOffset_y is 6 * bit_depth_luma_minus8
+		 */
+		if (p_h264_pps->pic_init_qp_minus26 < -62 ||
+		    p_h264_pps->pic_init_qp_minus26 > 25)
+			return -EINVAL;
+		if (p_h264_pps->pic_init_qs_minus26 < -26 ||
+		    p_h264_pps->pic_init_qs_minus26 > 25)
+			return -EINVAL;
+		if (p_h264_pps->chroma_qp_index_offset < -12 ||
+		    p_h264_pps->chroma_qp_index_offset > 12)
+			return -EINVAL;
+		if (p_h264_pps->second_chroma_qp_index_offset < -12 ||
+		    p_h264_pps->second_chroma_qp_index_offset > 12)
+			return -EINVAL;
+		break;
+
+	case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
+		break;
+
+	case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
+		p_h264_pred_weights = p;
+
+		if (p_h264_pred_weights->luma_log2_weight_denom > 7)
+			return -EINVAL;
+		if (p_h264_pred_weights->chroma_log2_weight_denom > 7)
+			return -EINVAL;
+		break;
+
+	case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
+		p_h264_slice_params = p;
+
+		if (p_h264_slice_params->slice_type != V4L2_H264_SLICE_TYPE_B)
+			p_h264_slice_params->flags &=
+				~V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED;
+
+		if (p_h264_slice_params->colour_plane_id > 2)
+			return -EINVAL;
+		if (p_h264_slice_params->cabac_init_idc > 2)
+			return -EINVAL;
+		if (p_h264_slice_params->disable_deblocking_filter_idc > 2)
+			return -EINVAL;
+		if (p_h264_slice_params->slice_alpha_c0_offset_div2 < -6 ||
+		    p_h264_slice_params->slice_alpha_c0_offset_div2 > 6)
+			return -EINVAL;
+		if (p_h264_slice_params->slice_beta_offset_div2 < -6 ||
+		    p_h264_slice_params->slice_beta_offset_div2 > 6)
+			return -EINVAL;
+
+		if (p_h264_slice_params->slice_type == V4L2_H264_SLICE_TYPE_I ||
+		    p_h264_slice_params->slice_type == V4L2_H264_SLICE_TYPE_SI)
+			p_h264_slice_params->num_ref_idx_l0_active_minus1 = 0;
+		if (p_h264_slice_params->slice_type != V4L2_H264_SLICE_TYPE_B)
+			p_h264_slice_params->num_ref_idx_l1_active_minus1 = 0;
+
+		if (p_h264_slice_params->num_ref_idx_l0_active_minus1 >
+		    (V4L2_H264_REF_LIST_LEN - 1))
+			return -EINVAL;
+		if (p_h264_slice_params->num_ref_idx_l1_active_minus1 >
+		    (V4L2_H264_REF_LIST_LEN - 1))
+			return -EINVAL;
+		zero_reserved(*p_h264_slice_params);
+		break;
+
+	case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
+		p_h264_dec_params = p;
+
+		if (p_h264_dec_params->nal_ref_idc > 3)
+			return -EINVAL;
+		for (i = 0; i < V4L2_H264_NUM_DPB_ENTRIES; i++) {
+			struct v4l2_h264_dpb_entry *dpb_entry =
+				&p_h264_dec_params->dpb[i];
+
+			zero_reserved(*dpb_entry);
+		}
+		zero_reserved(*p_h264_dec_params);
+		break;
+
+	case V4L2_CTRL_TYPE_VP8_FRAME:
+		p_vp8_frame = p;
+
+		switch (p_vp8_frame->num_dct_parts) {
+		case 1:
+		case 2:
+		case 4:
+		case 8:
+			break;
+		default:
+			return -EINVAL;
+		}
+		zero_padding(p_vp8_frame->segment);
+		zero_padding(p_vp8_frame->lf);
+		zero_padding(p_vp8_frame->quant);
+		zero_padding(p_vp8_frame->entropy);
+		zero_padding(p_vp8_frame->coder_state);
+		break;
+
+	case V4L2_CTRL_TYPE_HEVC_SPS:
+		p_hevc_sps = p;
+
+		if (!(p_hevc_sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) {
+			p_hevc_sps->pcm_sample_bit_depth_luma_minus1 = 0;
+			p_hevc_sps->pcm_sample_bit_depth_chroma_minus1 = 0;
+			p_hevc_sps->log2_min_pcm_luma_coding_block_size_minus3 = 0;
+			p_hevc_sps->log2_diff_max_min_pcm_luma_coding_block_size = 0;
+		}
+
+		if (!(p_hevc_sps->flags &
+		      V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT))
+			p_hevc_sps->num_long_term_ref_pics_sps = 0;
+		break;
+
+	case V4L2_CTRL_TYPE_HEVC_PPS:
+		p_hevc_pps = p;
+
+		if (!(p_hevc_pps->flags &
+		      V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED))
+			p_hevc_pps->diff_cu_qp_delta_depth = 0;
+
+		if (!(p_hevc_pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
+			p_hevc_pps->num_tile_columns_minus1 = 0;
+			p_hevc_pps->num_tile_rows_minus1 = 0;
+			memset(&p_hevc_pps->column_width_minus1, 0,
+			       sizeof(p_hevc_pps->column_width_minus1));
+			memset(&p_hevc_pps->row_height_minus1, 0,
+			       sizeof(p_hevc_pps->row_height_minus1));
+
+			p_hevc_pps->flags &=
+				~V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
+		}
+
+		if (p_hevc_pps->flags &
+		    V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER) {
+			p_hevc_pps->pps_beta_offset_div2 = 0;
+			p_hevc_pps->pps_tc_offset_div2 = 0;
+		}
+
+		zero_padding(*p_hevc_pps);
+		break;
+
+	case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
+		p_hevc_slice_params = p;
+
+		if (p_hevc_slice_params->num_active_dpb_entries >
+		    V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
+			return -EINVAL;
+
+		zero_padding(p_hevc_slice_params->pred_weight_table);
+
+		for (i = 0; i < p_hevc_slice_params->num_active_dpb_entries;
+		     i++) {
+			struct v4l2_hevc_dpb_entry *dpb_entry =
+				&p_hevc_slice_params->dpb[i];
+
+			zero_padding(*dpb_entry);
+		}
+
+		zero_padding(*p_hevc_slice_params);
+		break;
+
+	case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
+		break;
+
+	case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
+		p_hdr10_mastering = p;
+
+		for (i = 0; i < 3; ++i) {
+			if (p_hdr10_mastering->display_primaries_x[i] <
+				V4L2_HDR10_MASTERING_PRIMARIES_X_LOW ||
+			    p_hdr10_mastering->display_primaries_x[i] >
+				V4L2_HDR10_MASTERING_PRIMARIES_X_HIGH ||
+			    p_hdr10_mastering->display_primaries_y[i] <
+				V4L2_HDR10_MASTERING_PRIMARIES_Y_LOW ||
+			    p_hdr10_mastering->display_primaries_y[i] >
+				V4L2_HDR10_MASTERING_PRIMARIES_Y_HIGH)
+				return -EINVAL;
+		}
+
+		if (p_hdr10_mastering->white_point_x <
+			V4L2_HDR10_MASTERING_WHITE_POINT_X_LOW ||
+		    p_hdr10_mastering->white_point_x >
+			V4L2_HDR10_MASTERING_WHITE_POINT_X_HIGH ||
+		    p_hdr10_mastering->white_point_y <
+			V4L2_HDR10_MASTERING_WHITE_POINT_Y_LOW ||
+		    p_hdr10_mastering->white_point_y >
+			V4L2_HDR10_MASTERING_WHITE_POINT_Y_HIGH)
+			return -EINVAL;
+
+		if (p_hdr10_mastering->max_display_mastering_luminance <
+			V4L2_HDR10_MASTERING_MAX_LUMA_LOW ||
+		    p_hdr10_mastering->max_display_mastering_luminance >
+			V4L2_HDR10_MASTERING_MAX_LUMA_HIGH ||
+		    p_hdr10_mastering->min_display_mastering_luminance <
+			V4L2_HDR10_MASTERING_MIN_LUMA_LOW ||
+		    p_hdr10_mastering->min_display_mastering_luminance >
+			V4L2_HDR10_MASTERING_MIN_LUMA_HIGH)
+			return -EINVAL;
+
+		/* The following restriction comes from ITU-T Rec. H.265 spec */
+		if (p_hdr10_mastering->max_display_mastering_luminance ==
+			V4L2_HDR10_MASTERING_MAX_LUMA_LOW &&
+		    p_hdr10_mastering->min_display_mastering_luminance ==
+			V4L2_HDR10_MASTERING_MIN_LUMA_HIGH)
+			return -EINVAL;
+
+		break;
+
+	case V4L2_CTRL_TYPE_AREA:
+		area = p;
+		if (!area->width || !area->height)
+			return -EINVAL;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx,
+			union v4l2_ctrl_ptr ptr)
+{
+	size_t len;
+	u64 offset;
+	s64 val;
+
+	switch ((u32)ctrl->type) {
+	case V4L2_CTRL_TYPE_INTEGER:
+		return ROUND_TO_RANGE(ptr.p_s32[idx], u32, ctrl);
+	case V4L2_CTRL_TYPE_INTEGER64:
+		/*
+		 * We can't use the ROUND_TO_RANGE define here due to
+		 * the u64 divide that needs special care.
+		 */
+		val = ptr.p_s64[idx];
+		if (ctrl->maximum >= 0 && val >= ctrl->maximum - (s64)(ctrl->step / 2))
+			val = ctrl->maximum;
+		else
+			val += (s64)(ctrl->step / 2);
+		val = clamp_t(s64, val, ctrl->minimum, ctrl->maximum);
+		offset = val - ctrl->minimum;
+		do_div(offset, ctrl->step);
+		ptr.p_s64[idx] = ctrl->minimum + offset * ctrl->step;
+		return 0;
+	case V4L2_CTRL_TYPE_U8:
+		return ROUND_TO_RANGE(ptr.p_u8[idx], u8, ctrl);
+	case V4L2_CTRL_TYPE_U16:
+		return ROUND_TO_RANGE(ptr.p_u16[idx], u16, ctrl);
+	case V4L2_CTRL_TYPE_U32:
+		return ROUND_TO_RANGE(ptr.p_u32[idx], u32, ctrl);
+
+	case V4L2_CTRL_TYPE_BOOLEAN:
+		ptr.p_s32[idx] = !!ptr.p_s32[idx];
+		return 0;
+
+	case V4L2_CTRL_TYPE_MENU:
+	case V4L2_CTRL_TYPE_INTEGER_MENU:
+		if (ptr.p_s32[idx] < ctrl->minimum || ptr.p_s32[idx] > ctrl->maximum)
+			return -ERANGE;
+		if (ptr.p_s32[idx] < BITS_PER_LONG_LONG &&
+		    (ctrl->menu_skip_mask & BIT_ULL(ptr.p_s32[idx])))
+			return -EINVAL;
+		if (ctrl->type == V4L2_CTRL_TYPE_MENU &&
+		    ctrl->qmenu[ptr.p_s32[idx]][0] == '\0')
+			return -EINVAL;
+		return 0;
+
+	case V4L2_CTRL_TYPE_BITMASK:
+		ptr.p_s32[idx] &= ctrl->maximum;
+		return 0;
+
+	case V4L2_CTRL_TYPE_BUTTON:
+	case V4L2_CTRL_TYPE_CTRL_CLASS:
+		ptr.p_s32[idx] = 0;
+		return 0;
+
+	case V4L2_CTRL_TYPE_STRING:
+		idx *= ctrl->elem_size;
+		len = strlen(ptr.p_char + idx);
+		if (len < ctrl->minimum)
+			return -ERANGE;
+		if ((len - (u32)ctrl->minimum) % (u32)ctrl->step)
+			return -ERANGE;
+		return 0;
+
+	default:
+		return std_validate_compound(ctrl, idx, ptr);
+	}
+}
+
+static const struct v4l2_ctrl_type_ops std_type_ops = {
+	.equal = std_equal,
+	.init = std_init,
+	.log = std_log,
+	.validate = std_validate,
+};
+
+void v4l2_ctrl_notify(struct v4l2_ctrl *ctrl, v4l2_ctrl_notify_fnc notify, void *priv)
+{
+	if (!ctrl)
+		return;
+	if (!notify) {
+		ctrl->call_notify = 0;
+		return;
+	}
+	if (WARN_ON(ctrl->handler->notify && ctrl->handler->notify != notify))
+		return;
+	ctrl->handler->notify = notify;
+	ctrl->handler->notify_priv = priv;
+	ctrl->call_notify = 1;
+}
+EXPORT_SYMBOL(v4l2_ctrl_notify);
+
+/* Copy the one value to another. */
+static void ptr_to_ptr(struct v4l2_ctrl *ctrl,
+		       union v4l2_ctrl_ptr from, union v4l2_ctrl_ptr to)
+{
+	if (ctrl == NULL)
+		return;
+	memcpy(to.p, from.p_const, ctrl->elems * ctrl->elem_size);
+}
+
+/* Copy the new value to the current value. */
+void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 ch_flags)
+{
+	bool changed;
+
+	if (ctrl == NULL)
+		return;
+
+	/* has_changed is set by cluster_changed */
+	changed = ctrl->has_changed;
+	if (changed)
+		ptr_to_ptr(ctrl, ctrl->p_new, ctrl->p_cur);
+
+	if (ch_flags & V4L2_EVENT_CTRL_CH_FLAGS) {
+		/* Note: CH_FLAGS is only set for auto clusters. */
+		ctrl->flags &=
+			~(V4L2_CTRL_FLAG_INACTIVE | V4L2_CTRL_FLAG_VOLATILE);
+		if (!is_cur_manual(ctrl->cluster[0])) {
+			ctrl->flags |= V4L2_CTRL_FLAG_INACTIVE;
+			if (ctrl->cluster[0]->has_volatiles)
+				ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE;
+		}
+		fh = NULL;
+	}
+	if (changed || ch_flags) {
+		/* If a control was changed that was not one of the controls
+		   modified by the application, then send the event to all. */
+		if (!ctrl->is_new)
+			fh = NULL;
+		send_event(fh, ctrl,
+			(changed ? V4L2_EVENT_CTRL_CH_VALUE : 0) | ch_flags);
+		if (ctrl->call_notify && changed && ctrl->handler->notify)
+			ctrl->handler->notify(ctrl, ctrl->handler->notify_priv);
+	}
+}
+
+/* Copy the current value to the new value */
+void cur_to_new(struct v4l2_ctrl *ctrl)
+{
+	if (ctrl == NULL)
+		return;
+	ptr_to_ptr(ctrl, ctrl->p_cur, ctrl->p_new);
+}
+
+/* Copy the new value to the request value */
+void new_to_req(struct v4l2_ctrl_ref *ref)
+{
+	if (!ref)
+		return;
+	ptr_to_ptr(ref->ctrl, ref->ctrl->p_new, ref->p_req);
+	ref->valid_p_req = true;
+}
+
+/* Copy the current value to the request value */
+void cur_to_req(struct v4l2_ctrl_ref *ref)
+{
+	if (!ref)
+		return;
+	ptr_to_ptr(ref->ctrl, ref->ctrl->p_cur, ref->p_req);
+	ref->valid_p_req = true;
+}
+
+/* Copy the request value to the new value */
+void req_to_new(struct v4l2_ctrl_ref *ref)
+{
+	if (!ref)
+		return;
+	if (ref->valid_p_req)
+		ptr_to_ptr(ref->ctrl, ref->p_req, ref->ctrl->p_new);
+	else
+		ptr_to_ptr(ref->ctrl, ref->ctrl->p_cur, ref->ctrl->p_new);
+}
+
+/* Control range checking */
+int check_range(enum v4l2_ctrl_type type,
+		s64 min, s64 max, u64 step, s64 def)
+{
+	switch (type) {
+	case V4L2_CTRL_TYPE_BOOLEAN:
+		if (step != 1 || max > 1 || min < 0)
+			return -ERANGE;
+		fallthrough;
+	case V4L2_CTRL_TYPE_U8:
+	case V4L2_CTRL_TYPE_U16:
+	case V4L2_CTRL_TYPE_U32:
+	case V4L2_CTRL_TYPE_INTEGER:
+	case V4L2_CTRL_TYPE_INTEGER64:
+		if (step == 0 || min > max || def < min || def > max)
+			return -ERANGE;
+		return 0;
+	case V4L2_CTRL_TYPE_BITMASK:
+		if (step || min || !max || (def & ~max))
+			return -ERANGE;
+		return 0;
+	case V4L2_CTRL_TYPE_MENU:
+	case V4L2_CTRL_TYPE_INTEGER_MENU:
+		if (min > max || def < min || def > max)
+			return -ERANGE;
+		/* Note: step == menu_skip_mask for menu controls.
+		   So here we check if the default value is masked out. */
+		if (step && ((1 << def) & step))
+			return -EINVAL;
+		return 0;
+	case V4L2_CTRL_TYPE_STRING:
+		if (min > max || min < 0 || step < 1 || def)
+			return -ERANGE;
+		return 0;
+	default:
+		return 0;
+	}
+}
+
+/* Validate a new control */
+int validate_new(const struct v4l2_ctrl *ctrl, union v4l2_ctrl_ptr p_new)
+{
+	unsigned idx;
+	int err = 0;
+
+	for (idx = 0; !err && idx < ctrl->elems; idx++)
+		err = ctrl->type_ops->validate(ctrl, idx, p_new);
+	return err;
+}
+
+/* Set the handler's error code if it wasn't set earlier already */
+static inline int handler_set_err(struct v4l2_ctrl_handler *hdl, int err)
+{
+	if (hdl->error == 0)
+		hdl->error = err;
+	return err;
+}
+
+/* Initialize the handler */
+int v4l2_ctrl_handler_init_class(struct v4l2_ctrl_handler *hdl,
+				 unsigned nr_of_controls_hint,
+				 struct lock_class_key *key, const char *name)
+{
+	mutex_init(&hdl->_lock);
+	hdl->lock = &hdl->_lock;
+	lockdep_set_class_and_name(hdl->lock, key, name);
+	INIT_LIST_HEAD(&hdl->ctrls);
+	INIT_LIST_HEAD(&hdl->ctrl_refs);
+	hdl->nr_of_buckets = 1 + nr_of_controls_hint / 8;
+	hdl->buckets = kvmalloc_array(hdl->nr_of_buckets,
+				      sizeof(hdl->buckets[0]),
+				      GFP_KERNEL | __GFP_ZERO);
+	hdl->error = hdl->buckets ? 0 : -ENOMEM;
+	v4l2_ctrl_handler_init_request(hdl);
+	return hdl->error;
+}
+EXPORT_SYMBOL(v4l2_ctrl_handler_init_class);
+
+/* Free all controls and control refs */
+void v4l2_ctrl_handler_free(struct v4l2_ctrl_handler *hdl)
+{
+	struct v4l2_ctrl_ref *ref, *next_ref;
+	struct v4l2_ctrl *ctrl, *next_ctrl;
+	struct v4l2_subscribed_event *sev, *next_sev;
+
+	if (hdl == NULL || hdl->buckets == NULL)
+		return;
+
+	v4l2_ctrl_handler_free_request(hdl);
+
+	mutex_lock(hdl->lock);
+	/* Free all nodes */
+	list_for_each_entry_safe(ref, next_ref, &hdl->ctrl_refs, node) {
+		list_del(&ref->node);
+		kfree(ref);
+	}
+	/* Free all controls owned by the handler */
+	list_for_each_entry_safe(ctrl, next_ctrl, &hdl->ctrls, node) {
+		list_del(&ctrl->node);
+		list_for_each_entry_safe(sev, next_sev, &ctrl->ev_subs, node)
+			list_del(&sev->node);
+		kvfree(ctrl);
+	}
+	kvfree(hdl->buckets);
+	hdl->buckets = NULL;
+	hdl->cached = NULL;
+	hdl->error = 0;
+	mutex_unlock(hdl->lock);
+	mutex_destroy(&hdl->_lock);
+}
+EXPORT_SYMBOL(v4l2_ctrl_handler_free);
+
+/* For backwards compatibility: V4L2_CID_PRIVATE_BASE should no longer
+   be used except in G_CTRL, S_CTRL, QUERYCTRL and QUERYMENU when dealing
+   with applications that do not use the NEXT_CTRL flag.
+
+   We just find the n-th private user control. It's O(N), but that should not
+   be an issue in this particular case. */
+static struct v4l2_ctrl_ref *find_private_ref(
+		struct v4l2_ctrl_handler *hdl, u32 id)
+{
+	struct v4l2_ctrl_ref *ref;
+
+	id -= V4L2_CID_PRIVATE_BASE;
+	list_for_each_entry(ref, &hdl->ctrl_refs, node) {
+		/* Search for private user controls that are compatible with
+		   VIDIOC_G/S_CTRL. */
+		if (V4L2_CTRL_ID2WHICH(ref->ctrl->id) == V4L2_CTRL_CLASS_USER &&
+		    V4L2_CTRL_DRIVER_PRIV(ref->ctrl->id)) {
+			if (!ref->ctrl->is_int)
+				continue;
+			if (id == 0)
+				return ref;
+			id--;
+		}
+	}
+	return NULL;
+}
+
+/* Find a control with the given ID. */
+struct v4l2_ctrl_ref *find_ref(struct v4l2_ctrl_handler *hdl, u32 id)
+{
+	struct v4l2_ctrl_ref *ref;
+	int bucket;
+
+	id &= V4L2_CTRL_ID_MASK;
+
+	/* Old-style private controls need special handling */
+	if (id >= V4L2_CID_PRIVATE_BASE)
+		return find_private_ref(hdl, id);
+	bucket = id % hdl->nr_of_buckets;
+
+	/* Simple optimization: cache the last control found */
+	if (hdl->cached && hdl->cached->ctrl->id == id)
+		return hdl->cached;
+
+	/* Not in cache, search the hash */
+	ref = hdl->buckets ? hdl->buckets[bucket] : NULL;
+	while (ref && ref->ctrl->id != id)
+		ref = ref->next;
+
+	if (ref)
+		hdl->cached = ref; /* cache it! */
+	return ref;
+}
+
+/* Find a control with the given ID. Take the handler's lock first. */
+struct v4l2_ctrl_ref *find_ref_lock(struct v4l2_ctrl_handler *hdl, u32 id)
+{
+	struct v4l2_ctrl_ref *ref = NULL;
+
+	if (hdl) {
+		mutex_lock(hdl->lock);
+		ref = find_ref(hdl, id);
+		mutex_unlock(hdl->lock);
+	}
+	return ref;
+}
+
+/* Find a control with the given ID. */
+struct v4l2_ctrl *v4l2_ctrl_find(struct v4l2_ctrl_handler *hdl, u32 id)
+{
+	struct v4l2_ctrl_ref *ref = find_ref_lock(hdl, id);
+
+	return ref ? ref->ctrl : NULL;
+}
+EXPORT_SYMBOL(v4l2_ctrl_find);
+
+/* Allocate a new v4l2_ctrl_ref and hook it into the handler. */
+int handler_new_ref(struct v4l2_ctrl_handler *hdl,
+		    struct v4l2_ctrl *ctrl,
+		    struct v4l2_ctrl_ref **ctrl_ref,
+		    bool from_other_dev, bool allocate_req)
+{
+	struct v4l2_ctrl_ref *ref;
+	struct v4l2_ctrl_ref *new_ref;
+	u32 id = ctrl->id;
+	u32 class_ctrl = V4L2_CTRL_ID2WHICH(id) | 1;
+	int bucket = id % hdl->nr_of_buckets;	/* which bucket to use */
+	unsigned int size_extra_req = 0;
+
+	if (ctrl_ref)
+		*ctrl_ref = NULL;
+
+	/*
+	 * Automatically add the control class if it is not yet present and
+	 * the new control is not a compound control.
+	 */
+	if (ctrl->type < V4L2_CTRL_COMPOUND_TYPES &&
+	    id != class_ctrl && find_ref_lock(hdl, class_ctrl) == NULL)
+		if (!v4l2_ctrl_new_std(hdl, NULL, class_ctrl, 0, 0, 0, 0))
+			return hdl->error;
+
+	if (hdl->error)
+		return hdl->error;
+
+	if (allocate_req)
+		size_extra_req = ctrl->elems * ctrl->elem_size;
+	new_ref = kzalloc(sizeof(*new_ref) + size_extra_req, GFP_KERNEL);
+	if (!new_ref)
+		return handler_set_err(hdl, -ENOMEM);
+	new_ref->ctrl = ctrl;
+	new_ref->from_other_dev = from_other_dev;
+	if (size_extra_req)
+		new_ref->p_req.p = &new_ref[1];
+
+	INIT_LIST_HEAD(&new_ref->node);
+
+	mutex_lock(hdl->lock);
+
+	/* Add immediately at the end of the list if the list is empty, or if
+	   the last element in the list has a lower ID.
+	   This ensures that when elements are added in ascending order the
+	   insertion is an O(1) operation. */
+	if (list_empty(&hdl->ctrl_refs) || id > node2id(hdl->ctrl_refs.prev)) {
+		list_add_tail(&new_ref->node, &hdl->ctrl_refs);
+		goto insert_in_hash;
+	}
+
+	/* Find insert position in sorted list */
+	list_for_each_entry(ref, &hdl->ctrl_refs, node) {
+		if (ref->ctrl->id < id)
+			continue;
+		/* Don't add duplicates */
+		if (ref->ctrl->id == id) {
+			kfree(new_ref);
+			goto unlock;
+		}
+		list_add(&new_ref->node, ref->node.prev);
+		break;
+	}
+
+insert_in_hash:
+	/* Insert the control node in the hash */
+	new_ref->next = hdl->buckets[bucket];
+	hdl->buckets[bucket] = new_ref;
+	if (ctrl_ref)
+		*ctrl_ref = new_ref;
+	if (ctrl->handler == hdl) {
+		/* By default each control starts in a cluster of its own.
+		 * new_ref->ctrl is basically a cluster array with one
+		 * element, so that's perfect to use as the cluster pointer.
+		 * But only do this for the handler that owns the control.
+		 */
+		ctrl->cluster = &new_ref->ctrl;
+		ctrl->ncontrols = 1;
+	}
+
+unlock:
+	mutex_unlock(hdl->lock);
+	return 0;
+}
+
+/* Add a new control */
+static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
+			const struct v4l2_ctrl_ops *ops,
+			const struct v4l2_ctrl_type_ops *type_ops,
+			u32 id, const char *name, enum v4l2_ctrl_type type,
+			s64 min, s64 max, u64 step, s64 def,
+			const u32 dims[V4L2_CTRL_MAX_DIMS], u32 elem_size,
+			u32 flags, const char * const *qmenu,
+			const s64 *qmenu_int, const union v4l2_ctrl_ptr p_def,
+			void *priv)
+{
+	struct v4l2_ctrl *ctrl;
+	unsigned sz_extra;
+	unsigned nr_of_dims = 0;
+	unsigned elems = 1;
+	bool is_array;
+	unsigned tot_ctrl_size;
+	unsigned idx;
+	void *data;
+	int err;
+
+	if (hdl->error)
+		return NULL;
+
+	while (dims && dims[nr_of_dims]) {
+		elems *= dims[nr_of_dims];
+		nr_of_dims++;
+		if (nr_of_dims == V4L2_CTRL_MAX_DIMS)
+			break;
+	}
+	is_array = nr_of_dims > 0;
+
+	/* Prefill elem_size for all types handled by std_type_ops */
+	switch ((u32)type) {
+	case V4L2_CTRL_TYPE_INTEGER64:
+		elem_size = sizeof(s64);
+		break;
+	case V4L2_CTRL_TYPE_STRING:
+		elem_size = max + 1;
+		break;
+	case V4L2_CTRL_TYPE_U8:
+		elem_size = sizeof(u8);
+		break;
+	case V4L2_CTRL_TYPE_U16:
+		elem_size = sizeof(u16);
+		break;
+	case V4L2_CTRL_TYPE_U32:
+		elem_size = sizeof(u32);
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
+		elem_size = sizeof(struct v4l2_ctrl_mpeg2_sequence);
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
+		elem_size = sizeof(struct v4l2_ctrl_mpeg2_picture);
+		break;
+	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
+		elem_size = sizeof(struct v4l2_ctrl_mpeg2_quantisation);
+		break;
+	case V4L2_CTRL_TYPE_FWHT_PARAMS:
+		elem_size = sizeof(struct v4l2_ctrl_fwht_params);
+		break;
+	case V4L2_CTRL_TYPE_H264_SPS:
+		elem_size = sizeof(struct v4l2_ctrl_h264_sps);
+		break;
+	case V4L2_CTRL_TYPE_H264_PPS:
+		elem_size = sizeof(struct v4l2_ctrl_h264_pps);
+		break;
+	case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
+		elem_size = sizeof(struct v4l2_ctrl_h264_scaling_matrix);
+		break;
+	case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
+		elem_size = sizeof(struct v4l2_ctrl_h264_slice_params);
+		break;
+	case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
+		elem_size = sizeof(struct v4l2_ctrl_h264_decode_params);
+		break;
+	case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
+		elem_size = sizeof(struct v4l2_ctrl_h264_pred_weights);
+		break;
+	case V4L2_CTRL_TYPE_VP8_FRAME:
+		elem_size = sizeof(struct v4l2_ctrl_vp8_frame);
+		break;
+	case V4L2_CTRL_TYPE_HEVC_SPS:
+		elem_size = sizeof(struct v4l2_ctrl_hevc_sps);
+		break;
+	case V4L2_CTRL_TYPE_HEVC_PPS:
+		elem_size = sizeof(struct v4l2_ctrl_hevc_pps);
+		break;
+	case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
+		elem_size = sizeof(struct v4l2_ctrl_hevc_slice_params);
+		break;
+	case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
+		elem_size = sizeof(struct v4l2_ctrl_hdr10_cll_info);
+		break;
+	case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
+		elem_size = sizeof(struct v4l2_ctrl_hdr10_mastering_display);
+		break;
+	case V4L2_CTRL_TYPE_AREA:
+		elem_size = sizeof(struct v4l2_area);
+		break;
+	default:
+		if (type < V4L2_CTRL_COMPOUND_TYPES)
+			elem_size = sizeof(s32);
+		break;
+	}
+	tot_ctrl_size = elem_size * elems;
+
+	/* Sanity checks */
+	if (id == 0 || name == NULL || !elem_size ||
+	    id >= V4L2_CID_PRIVATE_BASE ||
+	    (type == V4L2_CTRL_TYPE_MENU && qmenu == NULL) ||
+	    (type == V4L2_CTRL_TYPE_INTEGER_MENU && qmenu_int == NULL)) {
+		handler_set_err(hdl, -ERANGE);
+		return NULL;
+	}
+	err = check_range(type, min, max, step, def);
+	if (err) {
+		handler_set_err(hdl, err);
+		return NULL;
+	}
+	if (is_array &&
+	    (type == V4L2_CTRL_TYPE_BUTTON ||
+	     type == V4L2_CTRL_TYPE_CTRL_CLASS)) {
+		handler_set_err(hdl, -EINVAL);
+		return NULL;
+	}
+
+	sz_extra = 0;
+	if (type == V4L2_CTRL_TYPE_BUTTON)
+		flags |= V4L2_CTRL_FLAG_WRITE_ONLY |
+			V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
+	else if (type == V4L2_CTRL_TYPE_CTRL_CLASS)
+		flags |= V4L2_CTRL_FLAG_READ_ONLY;
+	else if (type == V4L2_CTRL_TYPE_INTEGER64 ||
+		 type == V4L2_CTRL_TYPE_STRING ||
+		 type >= V4L2_CTRL_COMPOUND_TYPES ||
+		 is_array)
+		sz_extra += 2 * tot_ctrl_size;
+
+	if (type >= V4L2_CTRL_COMPOUND_TYPES && p_def.p_const)
+		sz_extra += elem_size;
+
+	ctrl = kvzalloc(sizeof(*ctrl) + sz_extra, GFP_KERNEL);
+	if (ctrl == NULL) {
+		handler_set_err(hdl, -ENOMEM);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&ctrl->node);
+	INIT_LIST_HEAD(&ctrl->ev_subs);
+	ctrl->handler = hdl;
+	ctrl->ops = ops;
+	ctrl->type_ops = type_ops ? type_ops : &std_type_ops;
+	ctrl->id = id;
+	ctrl->name = name;
+	ctrl->type = type;
+	ctrl->flags = flags;
+	ctrl->minimum = min;
+	ctrl->maximum = max;
+	ctrl->step = step;
+	ctrl->default_value = def;
+	ctrl->is_string = !is_array && type == V4L2_CTRL_TYPE_STRING;
+	ctrl->is_ptr = is_array || type >= V4L2_CTRL_COMPOUND_TYPES || ctrl->is_string;
+	ctrl->is_int = !ctrl->is_ptr && type != V4L2_CTRL_TYPE_INTEGER64;
+	ctrl->is_array = is_array;
+	ctrl->elems = elems;
+	ctrl->nr_of_dims = nr_of_dims;
+	if (nr_of_dims)
+		memcpy(ctrl->dims, dims, nr_of_dims * sizeof(dims[0]));
+	ctrl->elem_size = elem_size;
+	if (type == V4L2_CTRL_TYPE_MENU)
+		ctrl->qmenu = qmenu;
+	else if (type == V4L2_CTRL_TYPE_INTEGER_MENU)
+		ctrl->qmenu_int = qmenu_int;
+	ctrl->priv = priv;
+	ctrl->cur.val = ctrl->val = def;
+	data = &ctrl[1];
+
+	if (!ctrl->is_int) {
+		ctrl->p_new.p = data;
+		ctrl->p_cur.p = data + tot_ctrl_size;
+	} else {
+		ctrl->p_new.p = &ctrl->val;
+		ctrl->p_cur.p = &ctrl->cur.val;
+	}
+
+	if (type >= V4L2_CTRL_COMPOUND_TYPES && p_def.p_const) {
+		ctrl->p_def.p = ctrl->p_cur.p + tot_ctrl_size;
+		memcpy(ctrl->p_def.p, p_def.p_const, elem_size);
+	}
+
+	for (idx = 0; idx < elems; idx++) {
+		ctrl->type_ops->init(ctrl, idx, ctrl->p_cur);
+		ctrl->type_ops->init(ctrl, idx, ctrl->p_new);
+	}
+
+	if (handler_new_ref(hdl, ctrl, NULL, false, false)) {
+		kvfree(ctrl);
+		return NULL;
+	}
+	mutex_lock(hdl->lock);
+	list_add_tail(&ctrl->node, &hdl->ctrls);
+	mutex_unlock(hdl->lock);
+	return ctrl;
+}
+
+struct v4l2_ctrl *v4l2_ctrl_new_custom(struct v4l2_ctrl_handler *hdl,
+			const struct v4l2_ctrl_config *cfg, void *priv)
+{
+	bool is_menu;
+	struct v4l2_ctrl *ctrl;
+	const char *name = cfg->name;
+	const char * const *qmenu = cfg->qmenu;
+	const s64 *qmenu_int = cfg->qmenu_int;
+	enum v4l2_ctrl_type type = cfg->type;
+	u32 flags = cfg->flags;
+	s64 min = cfg->min;
+	s64 max = cfg->max;
+	u64 step = cfg->step;
+	s64 def = cfg->def;
+
+	if (name == NULL)
+		v4l2_ctrl_fill(cfg->id, &name, &type, &min, &max, &step,
+								&def, &flags);
+
+	is_menu = (type == V4L2_CTRL_TYPE_MENU ||
+		   type == V4L2_CTRL_TYPE_INTEGER_MENU);
+	if (is_menu)
+		WARN_ON(step);
+	else
+		WARN_ON(cfg->menu_skip_mask);
+	if (type == V4L2_CTRL_TYPE_MENU && !qmenu) {
+		qmenu = v4l2_ctrl_get_menu(cfg->id);
+	} else if (type == V4L2_CTRL_TYPE_INTEGER_MENU && !qmenu_int) {
+		handler_set_err(hdl, -EINVAL);
+		return NULL;
+	}
+
+	ctrl = v4l2_ctrl_new(hdl, cfg->ops, cfg->type_ops, cfg->id, name,
+			type, min, max,
+			is_menu ? cfg->menu_skip_mask : step, def,
+			cfg->dims, cfg->elem_size,
+			flags, qmenu, qmenu_int, cfg->p_def, priv);
+	if (ctrl)
+		ctrl->is_private = cfg->is_private;
+	return ctrl;
+}
+EXPORT_SYMBOL(v4l2_ctrl_new_custom);
+
+/* Helper function for standard non-menu controls */
+struct v4l2_ctrl *v4l2_ctrl_new_std(struct v4l2_ctrl_handler *hdl,
+			const struct v4l2_ctrl_ops *ops,
+			u32 id, s64 min, s64 max, u64 step, s64 def)
+{
+	const char *name;
+	enum v4l2_ctrl_type type;
+	u32 flags;
+
+	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
+	if (type == V4L2_CTRL_TYPE_MENU ||
+	    type == V4L2_CTRL_TYPE_INTEGER_MENU ||
+	    type >= V4L2_CTRL_COMPOUND_TYPES) {
+		handler_set_err(hdl, -EINVAL);
+		return NULL;
+	}
+	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
+			     min, max, step, def, NULL, 0,
+			     flags, NULL, NULL, ptr_null, NULL);
+}
+EXPORT_SYMBOL(v4l2_ctrl_new_std);
+
+/* Helper function for standard menu controls */
+struct v4l2_ctrl *v4l2_ctrl_new_std_menu(struct v4l2_ctrl_handler *hdl,
+			const struct v4l2_ctrl_ops *ops,
+			u32 id, u8 _max, u64 mask, u8 _def)
+{
+	const char * const *qmenu = NULL;
+	const s64 *qmenu_int = NULL;
+	unsigned int qmenu_int_len = 0;
+	const char *name;
+	enum v4l2_ctrl_type type;
+	s64 min;
+	s64 max = _max;
+	s64 def = _def;
+	u64 step;
+	u32 flags;
+
+	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
+
+	if (type == V4L2_CTRL_TYPE_MENU)
+		qmenu = v4l2_ctrl_get_menu(id);
+	else if (type == V4L2_CTRL_TYPE_INTEGER_MENU)
+		qmenu_int = v4l2_ctrl_get_int_menu(id, &qmenu_int_len);
+
+	if ((!qmenu && !qmenu_int) || (qmenu_int && max > qmenu_int_len)) {
+		handler_set_err(hdl, -EINVAL);
+		return NULL;
+	}
+	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
+			     0, max, mask, def, NULL, 0,
+			     flags, qmenu, qmenu_int, ptr_null, NULL);
+}
+EXPORT_SYMBOL(v4l2_ctrl_new_std_menu);
+
+/* Helper function for standard menu controls with driver defined menu */
+struct v4l2_ctrl *v4l2_ctrl_new_std_menu_items(struct v4l2_ctrl_handler *hdl,
+			const struct v4l2_ctrl_ops *ops, u32 id, u8 _max,
+			u64 mask, u8 _def, const char * const *qmenu)
+{
+	enum v4l2_ctrl_type type;
+	const char *name;
+	u32 flags;
+	u64 step;
+	s64 min;
+	s64 max = _max;
+	s64 def = _def;
+
+	/* v4l2_ctrl_new_std_menu_items() should only be called for
+	 * standard controls without a standard menu.
+	 */
+	if (v4l2_ctrl_get_menu(id)) {
+		handler_set_err(hdl, -EINVAL);
+		return NULL;
+	}
+
+	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
+	if (type != V4L2_CTRL_TYPE_MENU || qmenu == NULL) {
+		handler_set_err(hdl, -EINVAL);
+		return NULL;
+	}
+	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
+			     0, max, mask, def, NULL, 0,
+			     flags, qmenu, NULL, ptr_null, NULL);
+
+}
+EXPORT_SYMBOL(v4l2_ctrl_new_std_menu_items);
+
+/* Helper function for standard compound controls */
+struct v4l2_ctrl *v4l2_ctrl_new_std_compound(struct v4l2_ctrl_handler *hdl,
+				const struct v4l2_ctrl_ops *ops, u32 id,
+				const union v4l2_ctrl_ptr p_def)
+{
+	const char *name;
+	enum v4l2_ctrl_type type;
+	u32 flags;
+	s64 min, max, step, def;
+
+	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
+	if (type < V4L2_CTRL_COMPOUND_TYPES) {
+		handler_set_err(hdl, -EINVAL);
+		return NULL;
+	}
+	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
+			     min, max, step, def, NULL, 0,
+			     flags, NULL, NULL, p_def, NULL);
+}
+EXPORT_SYMBOL(v4l2_ctrl_new_std_compound);
+
+/* Helper function for standard integer menu controls */
+struct v4l2_ctrl *v4l2_ctrl_new_int_menu(struct v4l2_ctrl_handler *hdl,
+			const struct v4l2_ctrl_ops *ops,
+			u32 id, u8 _max, u8 _def, const s64 *qmenu_int)
+{
+	const char *name;
+	enum v4l2_ctrl_type type;
+	s64 min;
+	u64 step;
+	s64 max = _max;
+	s64 def = _def;
+	u32 flags;
+
+	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
+	if (type != V4L2_CTRL_TYPE_INTEGER_MENU) {
+		handler_set_err(hdl, -EINVAL);
+		return NULL;
+	}
+	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
+			     0, max, 0, def, NULL, 0,
+			     flags, NULL, qmenu_int, ptr_null, NULL);
+}
+EXPORT_SYMBOL(v4l2_ctrl_new_int_menu);
+
+/* Add the controls from another handler to our own. */
+int v4l2_ctrl_add_handler(struct v4l2_ctrl_handler *hdl,
+			  struct v4l2_ctrl_handler *add,
+			  bool (*filter)(const struct v4l2_ctrl *ctrl),
+			  bool from_other_dev)
+{
+	struct v4l2_ctrl_ref *ref;
+	int ret = 0;
+
+	/* Do nothing if either handler is NULL or if they are the same */
+	if (!hdl || !add || hdl == add)
+		return 0;
+	if (hdl->error)
+		return hdl->error;
+	mutex_lock(add->lock);
+	list_for_each_entry(ref, &add->ctrl_refs, node) {
+		struct v4l2_ctrl *ctrl = ref->ctrl;
+
+		/* Skip handler-private controls. */
+		if (ctrl->is_private)
+			continue;
+		/* And control classes */
+		if (ctrl->type == V4L2_CTRL_TYPE_CTRL_CLASS)
+			continue;
+		/* Filter any unwanted controls */
+		if (filter && !filter(ctrl))
+			continue;
+		ret = handler_new_ref(hdl, ctrl, NULL, from_other_dev, false);
+		if (ret)
+			break;
+	}
+	mutex_unlock(add->lock);
+	return ret;
+}
+EXPORT_SYMBOL(v4l2_ctrl_add_handler);
+
+bool v4l2_ctrl_radio_filter(const struct v4l2_ctrl *ctrl)
+{
+	if (V4L2_CTRL_ID2WHICH(ctrl->id) == V4L2_CTRL_CLASS_FM_TX)
+		return true;
+	if (V4L2_CTRL_ID2WHICH(ctrl->id) == V4L2_CTRL_CLASS_FM_RX)
+		return true;
+	switch (ctrl->id) {
+	case V4L2_CID_AUDIO_MUTE:
+	case V4L2_CID_AUDIO_VOLUME:
+	case V4L2_CID_AUDIO_BALANCE:
+	case V4L2_CID_AUDIO_BASS:
+	case V4L2_CID_AUDIO_TREBLE:
+	case V4L2_CID_AUDIO_LOUDNESS:
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+EXPORT_SYMBOL(v4l2_ctrl_radio_filter);
+
+/* Cluster controls */
+void v4l2_ctrl_cluster(unsigned ncontrols, struct v4l2_ctrl **controls)
+{
+	bool has_volatiles = false;
+	int i;
+
+	/* The first control is the master control and it must not be NULL */
+	if (WARN_ON(ncontrols == 0 || controls[0] == NULL))
+		return;
+
+	for (i = 0; i < ncontrols; i++) {
+		if (controls[i]) {
+			controls[i]->cluster = controls;
+			controls[i]->ncontrols = ncontrols;
+			if (controls[i]->flags & V4L2_CTRL_FLAG_VOLATILE)
+				has_volatiles = true;
+		}
+	}
+	controls[0]->has_volatiles = has_volatiles;
+}
+EXPORT_SYMBOL(v4l2_ctrl_cluster);
+
+void v4l2_ctrl_auto_cluster(unsigned ncontrols, struct v4l2_ctrl **controls,
+			    u8 manual_val, bool set_volatile)
+{
+	struct v4l2_ctrl *master = controls[0];
+	u32 flag = 0;
+	int i;
+
+	v4l2_ctrl_cluster(ncontrols, controls);
+	WARN_ON(ncontrols <= 1);
+	WARN_ON(manual_val < master->minimum || manual_val > master->maximum);
+	WARN_ON(set_volatile && !has_op(master, g_volatile_ctrl));
+	master->is_auto = true;
+	master->has_volatiles = set_volatile;
+	master->manual_mode_value = manual_val;
+	master->flags |= V4L2_CTRL_FLAG_UPDATE;
+
+	if (!is_cur_manual(master))
+		flag = V4L2_CTRL_FLAG_INACTIVE |
+			(set_volatile ? V4L2_CTRL_FLAG_VOLATILE : 0);
+
+	for (i = 1; i < ncontrols; i++)
+		if (controls[i])
+			controls[i]->flags |= flag;
+}
+EXPORT_SYMBOL(v4l2_ctrl_auto_cluster);
+
+/*
+ * Obtain the current volatile values of an autocluster and mark them
+ * as new.
+ */
+void update_from_auto_cluster(struct v4l2_ctrl *master)
+{
+	int i;
+
+	for (i = 1; i < master->ncontrols; i++)
+		cur_to_new(master->cluster[i]);
+	if (!call_op(master, g_volatile_ctrl))
+		for (i = 1; i < master->ncontrols; i++)
+			if (master->cluster[i])
+				master->cluster[i]->is_new = 1;
+}
+
+/*
+ * Return non-zero if one or more of the controls in the cluster has a new
+ * value that differs from the current value.
+ */
+static int cluster_changed(struct v4l2_ctrl *master)
+{
+	bool changed = false;
+	unsigned int idx;
+	int i;
+
+	for (i = 0; i < master->ncontrols; i++) {
+		struct v4l2_ctrl *ctrl = master->cluster[i];
+		bool ctrl_changed = false;
+
+		if (!ctrl)
+			continue;
+
+		if (ctrl->flags & V4L2_CTRL_FLAG_EXECUTE_ON_WRITE) {
+			changed = true;
+			ctrl_changed = true;
+		}
+
+		/*
+		 * Set has_changed to false to avoid generating
+		 * the event V4L2_EVENT_CTRL_CH_VALUE
+		 */
+		if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE) {
+			ctrl->has_changed = false;
+			continue;
+		}
+
+		for (idx = 0; !ctrl_changed && idx < ctrl->elems; idx++)
+			ctrl_changed = !ctrl->type_ops->equal(ctrl, idx,
+				ctrl->p_cur, ctrl->p_new);
+		ctrl->has_changed = ctrl_changed;
+		changed |= ctrl->has_changed;
+	}
+	return changed;
+}
+
+/*
+ * Core function that calls try/s_ctrl and ensures that the new value is
+ * copied to the current value on a set.
+ * Must be called with ctrl->handler->lock held.
+ */
+int try_or_set_cluster(struct v4l2_fh *fh, struct v4l2_ctrl *master,
+		       bool set, u32 ch_flags)
+{
+	bool update_flag;
+	int ret;
+	int i;
+
+	/*
+	 * Go through the cluster and either validate the new value or
+	 * (if no new value was set), copy the current value to the new
+	 * value, ensuring a consistent view for the control ops when
+	 * called.
+	 */
+	for (i = 0; i < master->ncontrols; i++) {
+		struct v4l2_ctrl *ctrl = master->cluster[i];
+
+		if (!ctrl)
+			continue;
+
+		if (!ctrl->is_new) {
+			cur_to_new(ctrl);
+			continue;
+		}
+		/*
+		 * Check again: it may have changed since the
+		 * previous check in try_or_set_ext_ctrls().
+		 */
+		if (set && (ctrl->flags & V4L2_CTRL_FLAG_GRABBED))
+			return -EBUSY;
+	}
+
+	ret = call_op(master, try_ctrl);
+
+	/* Don't set if there is no change */
+	if (ret || !set || !cluster_changed(master))
+		return ret;
+	ret = call_op(master, s_ctrl);
+	if (ret)
+		return ret;
+
+	/* If OK, then make the new values permanent. */
+	update_flag = is_cur_manual(master) != is_new_manual(master);
+
+	for (i = 0; i < master->ncontrols; i++) {
+		/*
+		 * If we switch from auto to manual mode, and this cluster
+		 * contains volatile controls, then all non-master controls
+		 * have to be marked as changed. The 'new' value contains
+		 * the volatile value (obtained by update_from_auto_cluster),
+		 * which now has to become the current value.
+		 */
+		if (i && update_flag && is_new_manual(master) &&
+		    master->has_volatiles && master->cluster[i])
+			master->cluster[i]->has_changed = true;
+
+		new_to_cur(fh, master->cluster[i], ch_flags |
+			((update_flag && i > 0) ? V4L2_EVENT_CTRL_CH_FLAGS : 0));
+	}
+	return 0;
+}
+
+/* Activate/deactivate a control. */
+void v4l2_ctrl_activate(struct v4l2_ctrl *ctrl, bool active)
+{
+	/* invert since the actual flag is called 'inactive' */
+	bool inactive = !active;
+	bool old;
+
+	if (ctrl == NULL)
+		return;
+
+	if (inactive)
+		/* set V4L2_CTRL_FLAG_INACTIVE */
+		old = test_and_set_bit(4, &ctrl->flags);
+	else
+		/* clear V4L2_CTRL_FLAG_INACTIVE */
+		old = test_and_clear_bit(4, &ctrl->flags);
+	if (old != inactive)
+		send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_FLAGS);
+}
+EXPORT_SYMBOL(v4l2_ctrl_activate);
+
+void __v4l2_ctrl_grab(struct v4l2_ctrl *ctrl, bool grabbed)
+{
+	bool old;
+
+	if (ctrl == NULL)
+		return;
+
+	lockdep_assert_held(ctrl->handler->lock);
+
+	if (grabbed)
+		/* set V4L2_CTRL_FLAG_GRABBED */
+		old = test_and_set_bit(1, &ctrl->flags);
+	else
+		/* clear V4L2_CTRL_FLAG_GRABBED */
+		old = test_and_clear_bit(1, &ctrl->flags);
+	if (old != grabbed)
+		send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_FLAGS);
+}
+EXPORT_SYMBOL(__v4l2_ctrl_grab);
+
+/* Call s_ctrl for all controls owned by the handler */
+int __v4l2_ctrl_handler_setup(struct v4l2_ctrl_handler *hdl)
+{
+	struct v4l2_ctrl *ctrl;
+	int ret = 0;
+
+	if (hdl == NULL)
+		return 0;
+
+	lockdep_assert_held(hdl->lock);
+
+	list_for_each_entry(ctrl, &hdl->ctrls, node)
+		ctrl->done = false;
+
+	list_for_each_entry(ctrl, &hdl->ctrls, node) {
+		struct v4l2_ctrl *master = ctrl->cluster[0];
+		int i;
+
+		/* Skip if this control was already handled by a cluster. */
+		/* Skip button controls and read-only controls. */
+		if (ctrl->done || ctrl->type == V4L2_CTRL_TYPE_BUTTON ||
+		    (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY))
+			continue;
+
+		for (i = 0; i < master->ncontrols; i++) {
+			if (master->cluster[i]) {
+				cur_to_new(master->cluster[i]);
+				master->cluster[i]->is_new = 1;
+				master->cluster[i]->done = true;
+			}
+		}
+		ret = call_op(master, s_ctrl);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__v4l2_ctrl_handler_setup);
+
+int v4l2_ctrl_handler_setup(struct v4l2_ctrl_handler *hdl)
+{
+	int ret;
+
+	if (hdl == NULL)
+		return 0;
+
+	mutex_lock(hdl->lock);
+	ret = __v4l2_ctrl_handler_setup(hdl);
+	mutex_unlock(hdl->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(v4l2_ctrl_handler_setup);
+
+/* Log the control name and value */
+static void log_ctrl(const struct v4l2_ctrl *ctrl,
+		     const char *prefix, const char *colon)
+{
+	if (ctrl->flags & (V4L2_CTRL_FLAG_DISABLED | V4L2_CTRL_FLAG_WRITE_ONLY))
+		return;
+	if (ctrl->type == V4L2_CTRL_TYPE_CTRL_CLASS)
+		return;
+
+	pr_info("%s%s%s: ", prefix, colon, ctrl->name);
+
+	ctrl->type_ops->log(ctrl);
+
+	if (ctrl->flags & (V4L2_CTRL_FLAG_INACTIVE |
+			   V4L2_CTRL_FLAG_GRABBED |
+			   V4L2_CTRL_FLAG_VOLATILE)) {
+		if (ctrl->flags & V4L2_CTRL_FLAG_INACTIVE)
+			pr_cont(" inactive");
+		if (ctrl->flags & V4L2_CTRL_FLAG_GRABBED)
+			pr_cont(" grabbed");
+		if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE)
+			pr_cont(" volatile");
+	}
+	pr_cont("\n");
+}
+
+/* Log all controls owned by the handler */
+void v4l2_ctrl_handler_log_status(struct v4l2_ctrl_handler *hdl,
+				  const char *prefix)
+{
+	struct v4l2_ctrl *ctrl;
+	const char *colon = "";
+	int len;
+
+	if (!hdl)
+		return;
+	if (!prefix)
+		prefix = "";
+	len = strlen(prefix);
+	if (len && prefix[len - 1] != ' ')
+		colon = ": ";
+	mutex_lock(hdl->lock);
+	list_for_each_entry(ctrl, &hdl->ctrls, node)
+		if (!(ctrl->flags & V4L2_CTRL_FLAG_DISABLED))
+			log_ctrl(ctrl, prefix, colon);
+	mutex_unlock(hdl->lock);
+}
+EXPORT_SYMBOL(v4l2_ctrl_handler_log_status);
+
+int v4l2_ctrl_new_fwnode_properties(struct v4l2_ctrl_handler *hdl,
+				    const struct v4l2_ctrl_ops *ctrl_ops,
+				    const struct v4l2_fwnode_device_properties *p)
+{
+	if (p->orientation != V4L2_FWNODE_PROPERTY_UNSET) {
+		u32 orientation_ctrl;
+
+		switch (p->orientation) {
+		case V4L2_FWNODE_ORIENTATION_FRONT:
+			orientation_ctrl = V4L2_CAMERA_ORIENTATION_FRONT;
+			break;
+		case V4L2_FWNODE_ORIENTATION_BACK:
+			orientation_ctrl = V4L2_CAMERA_ORIENTATION_BACK;
+			break;
+		case V4L2_FWNODE_ORIENTATION_EXTERNAL:
+			orientation_ctrl = V4L2_CAMERA_ORIENTATION_EXTERNAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+		if (!v4l2_ctrl_new_std_menu(hdl, ctrl_ops,
+					    V4L2_CID_CAMERA_ORIENTATION,
+					    V4L2_CAMERA_ORIENTATION_EXTERNAL, 0,
+					    orientation_ctrl))
+			return hdl->error;
+	}
+
+	if (p->rotation != V4L2_FWNODE_PROPERTY_UNSET) {
+		if (!v4l2_ctrl_new_std(hdl, ctrl_ops,
+				       V4L2_CID_CAMERA_SENSOR_ROTATION,
+				       p->rotation, p->rotation, 1,
+				       p->rotation))
+			return hdl->error;
+	}
+
+	return hdl->error;
+}
+EXPORT_SYMBOL(v4l2_ctrl_new_fwnode_properties);
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
new file mode 100644
index 0000000000000..7963c7b434504
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
@@ -0,0 +1,1575 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * V4L2 controls framework control definitions.
+ *
+ * Copyright (C) 2010-2021  Hans Verkuil <hverkuil-cisco@xs4all.nl>
+ */
+
+#include <linux/export.h>
+#include <media/v4l2-ctrls.h>
+
+/*
+ * Returns NULL or a character pointer array containing the menu for
+ * the given control ID. The pointer array ends with a NULL pointer.
+ * An empty string signifies a menu entry that is invalid. This allows
+ * drivers to disable certain options if it is not supported.
+ */
+const char * const *v4l2_ctrl_get_menu(u32 id)
+{
+	static const char * const mpeg_audio_sampling_freq[] = {
+		"44.1 kHz",
+		"48 kHz",
+		"32 kHz",
+		NULL
+	};
+	static const char * const mpeg_audio_encoding[] = {
+		"MPEG-1/2 Layer I",
+		"MPEG-1/2 Layer II",
+		"MPEG-1/2 Layer III",
+		"MPEG-2/4 AAC",
+		"AC-3",
+		NULL
+	};
+	static const char * const mpeg_audio_l1_bitrate[] = {
+		"32 kbps",
+		"64 kbps",
+		"96 kbps",
+		"128 kbps",
+		"160 kbps",
+		"192 kbps",
+		"224 kbps",
+		"256 kbps",
+		"288 kbps",
+		"320 kbps",
+		"352 kbps",
+		"384 kbps",
+		"416 kbps",
+		"448 kbps",
+		NULL
+	};
+	static const char * const mpeg_audio_l2_bitrate[] = {
+		"32 kbps",
+		"48 kbps",
+		"56 kbps",
+		"64 kbps",
+		"80 kbps",
+		"96 kbps",
+		"112 kbps",
+		"128 kbps",
+		"160 kbps",
+		"192 kbps",
+		"224 kbps",
+		"256 kbps",
+		"320 kbps",
+		"384 kbps",
+		NULL
+	};
+	static const char * const mpeg_audio_l3_bitrate[] = {
+		"32 kbps",
+		"40 kbps",
+		"48 kbps",
+		"56 kbps",
+		"64 kbps",
+		"80 kbps",
+		"96 kbps",
+		"112 kbps",
+		"128 kbps",
+		"160 kbps",
+		"192 kbps",
+		"224 kbps",
+		"256 kbps",
+		"320 kbps",
+		NULL
+	};
+	static const char * const mpeg_audio_ac3_bitrate[] = {
+		"32 kbps",
+		"40 kbps",
+		"48 kbps",
+		"56 kbps",
+		"64 kbps",
+		"80 kbps",
+		"96 kbps",
+		"112 kbps",
+		"128 kbps",
+		"160 kbps",
+		"192 kbps",
+		"224 kbps",
+		"256 kbps",
+		"320 kbps",
+		"384 kbps",
+		"448 kbps",
+		"512 kbps",
+		"576 kbps",
+		"640 kbps",
+		NULL
+	};
+	static const char * const mpeg_audio_mode[] = {
+		"Stereo",
+		"Joint Stereo",
+		"Dual",
+		"Mono",
+		NULL
+	};
+	static const char * const mpeg_audio_mode_extension[] = {
+		"Bound 4",
+		"Bound 8",
+		"Bound 12",
+		"Bound 16",
+		NULL
+	};
+	static const char * const mpeg_audio_emphasis[] = {
+		"No Emphasis",
+		"50/15 us",
+		"CCITT J17",
+		NULL
+	};
+	static const char * const mpeg_audio_crc[] = {
+		"No CRC",
+		"16-bit CRC",
+		NULL
+	};
+	static const char * const mpeg_audio_dec_playback[] = {
+		"Auto",
+		"Stereo",
+		"Left",
+		"Right",
+		"Mono",
+		"Swapped Stereo",
+		NULL
+	};
+	static const char * const mpeg_video_encoding[] = {
+		"MPEG-1",
+		"MPEG-2",
+		"MPEG-4 AVC",
+		NULL
+	};
+	static const char * const mpeg_video_aspect[] = {
+		"1x1",
+		"4x3",
+		"16x9",
+		"2.21x1",
+		NULL
+	};
+	static const char * const mpeg_video_bitrate_mode[] = {
+		"Variable Bitrate",
+		"Constant Bitrate",
+		"Constant Quality",
+		NULL
+	};
+	static const char * const mpeg_stream_type[] = {
+		"MPEG-2 Program Stream",
+		"MPEG-2 Transport Stream",
+		"MPEG-1 System Stream",
+		"MPEG-2 DVD-compatible Stream",
+		"MPEG-1 VCD-compatible Stream",
+		"MPEG-2 SVCD-compatible Stream",
+		NULL
+	};
+	static const char * const mpeg_stream_vbi_fmt[] = {
+		"No VBI",
+		"Private Packet, IVTV Format",
+		NULL
+	};
+	static const char * const camera_power_line_frequency[] = {
+		"Disabled",
+		"50 Hz",
+		"60 Hz",
+		"Auto",
+		NULL
+	};
+	static const char * const camera_exposure_auto[] = {
+		"Auto Mode",
+		"Manual Mode",
+		"Shutter Priority Mode",
+		"Aperture Priority Mode",
+		NULL
+	};
+	static const char * const camera_exposure_metering[] = {
+		"Average",
+		"Center Weighted",
+		"Spot",
+		"Matrix",
+		NULL
+	};
+	static const char * const camera_auto_focus_range[] = {
+		"Auto",
+		"Normal",
+		"Macro",
+		"Infinity",
+		NULL
+	};
+	static const char * const colorfx[] = {
+		"None",
+		"Black & White",
+		"Sepia",
+		"Negative",
+		"Emboss",
+		"Sketch",
+		"Sky Blue",
+		"Grass Green",
+		"Skin Whiten",
+		"Vivid",
+		"Aqua",
+		"Art Freeze",
+		"Silhouette",
+		"Solarization",
+		"Antique",
+		"Set Cb/Cr",
+		NULL
+	};
+	static const char * const auto_n_preset_white_balance[] = {
+		"Manual",
+		"Auto",
+		"Incandescent",
+		"Fluorescent",
+		"Fluorescent H",
+		"Horizon",
+		"Daylight",
+		"Flash",
+		"Cloudy",
+		"Shade",
+		NULL,
+	};
+	static const char * const camera_iso_sensitivity_auto[] = {
+		"Manual",
+		"Auto",
+		NULL
+	};
+	static const char * const scene_mode[] = {
+		"None",
+		"Backlight",
+		"Beach/Snow",
+		"Candle Light",
+		"Dusk/Dawn",
+		"Fall Colors",
+		"Fireworks",
+		"Landscape",
+		"Night",
+		"Party/Indoor",
+		"Portrait",
+		"Sports",
+		"Sunset",
+		"Text",
+		NULL
+	};
+	static const char * const tune_emphasis[] = {
+		"None",
+		"50 Microseconds",
+		"75 Microseconds",
+		NULL,
+	};
+	static const char * const header_mode[] = {
+		"Separate Buffer",
+		"Joined With 1st Frame",
+		NULL,
+	};
+	static const char * const multi_slice[] = {
+		"Single",
+		"Max Macroblocks",
+		"Max Bytes",
+		NULL,
+	};
+	static const char * const entropy_mode[] = {
+		"CAVLC",
+		"CABAC",
+		NULL,
+	};
+	static const char * const mpeg_h264_level[] = {
+		"1",
+		"1b",
+		"1.1",
+		"1.2",
+		"1.3",
+		"2",
+		"2.1",
+		"2.2",
+		"3",
+		"3.1",
+		"3.2",
+		"4",
+		"4.1",
+		"4.2",
+		"5",
+		"5.1",
+		"5.2",
+		"6.0",
+		"6.1",
+		"6.2",
+		NULL,
+	};
+	static const char * const h264_loop_filter[] = {
+		"Enabled",
+		"Disabled",
+		"Disabled at Slice Boundary",
+		NULL,
+	};
+	static const char * const h264_profile[] = {
+		"Baseline",
+		"Constrained Baseline",
+		"Main",
+		"Extended",
+		"High",
+		"High 10",
+		"High 422",
+		"High 444 Predictive",
+		"High 10 Intra",
+		"High 422 Intra",
+		"High 444 Intra",
+		"CAVLC 444 Intra",
+		"Scalable Baseline",
+		"Scalable High",
+		"Scalable High Intra",
+		"Stereo High",
+		"Multiview High",
+		"Constrained High",
+		NULL,
+	};
+	static const char * const vui_sar_idc[] = {
+		"Unspecified",
+		"1:1",
+		"12:11",
+		"10:11",
+		"16:11",
+		"40:33",
+		"24:11",
+		"20:11",
+		"32:11",
+		"80:33",
+		"18:11",
+		"15:11",
+		"64:33",
+		"160:99",
+		"4:3",
+		"3:2",
+		"2:1",
+		"Extended SAR",
+		NULL,
+	};
+	static const char * const h264_fp_arrangement_type[] = {
+		"Checkerboard",
+		"Column",
+		"Row",
+		"Side by Side",
+		"Top Bottom",
+		"Temporal",
+		NULL,
+	};
+	static const char * const h264_fmo_map_type[] = {
+		"Interleaved Slices",
+		"Scattered Slices",
+		"Foreground with Leftover",
+		"Box Out",
+		"Raster Scan",
+		"Wipe Scan",
+		"Explicit",
+		NULL,
+	};
+	static const char * const h264_decode_mode[] = {
+		"Slice-Based",
+		"Frame-Based",
+		NULL,
+	};
+	static const char * const h264_start_code[] = {
+		"No Start Code",
+		"Annex B Start Code",
+		NULL,
+	};
+	static const char * const h264_hierarchical_coding_type[] = {
+		"Hier Coding B",
+		"Hier Coding P",
+		NULL,
+	};
+	static const char * const mpeg_mpeg2_level[] = {
+		"Low",
+		"Main",
+		"High 1440",
+		"High",
+		NULL,
+	};
+	static const char * const mpeg2_profile[] = {
+		"Simple",
+		"Main",
+		"SNR Scalable",
+		"Spatially Scalable",
+		"High",
+		NULL,
+	};
+	static const char * const mpeg_mpeg4_level[] = {
+		"0",
+		"0b",
+		"1",
+		"2",
+		"3",
+		"3b",
+		"4",
+		"5",
+		NULL,
+	};
+	static const char * const mpeg4_profile[] = {
+		"Simple",
+		"Advanced Simple",
+		"Core",
+		"Simple Scalable",
+		"Advanced Coding Efficiency",
+		NULL,
+	};
+
+	static const char * const vpx_golden_frame_sel[] = {
+		"Use Previous Frame",
+		"Use Previous Specific Frame",
+		NULL,
+	};
+	static const char * const vp8_profile[] = {
+		"0",
+		"1",
+		"2",
+		"3",
+		NULL,
+	};
+	static const char * const vp9_profile[] = {
+		"0",
+		"1",
+		"2",
+		"3",
+		NULL,
+	};
+	static const char * const vp9_level[] = {
+		"1",
+		"1.1",
+		"2",
+		"2.1",
+		"3",
+		"3.1",
+		"4",
+		"4.1",
+		"5",
+		"5.1",
+		"5.2",
+		"6",
+		"6.1",
+		"6.2",
+		NULL,
+	};
+
+	static const char * const flash_led_mode[] = {
+		"Off",
+		"Flash",
+		"Torch",
+		NULL,
+	};
+	static const char * const flash_strobe_source[] = {
+		"Software",
+		"External",
+		NULL,
+	};
+
+	static const char * const jpeg_chroma_subsampling[] = {
+		"4:4:4",
+		"4:2:2",
+		"4:2:0",
+		"4:1:1",
+		"4:1:0",
+		"Gray",
+		NULL,
+	};
+	static const char * const dv_tx_mode[] = {
+		"DVI-D",
+		"HDMI",
+		NULL,
+	};
+	static const char * const dv_rgb_range[] = {
+		"Automatic",
+		"RGB Limited Range (16-235)",
+		"RGB Full Range (0-255)",
+		NULL,
+	};
+	static const char * const dv_it_content_type[] = {
+		"Graphics",
+		"Photo",
+		"Cinema",
+		"Game",
+		"No IT Content",
+		NULL,
+	};
+	static const char * const detect_md_mode[] = {
+		"Disabled",
+		"Global",
+		"Threshold Grid",
+		"Region Grid",
+		NULL,
+	};
+
+	static const char * const hevc_profile[] = {
+		"Main",
+		"Main Still Picture",
+		"Main 10",
+		NULL,
+	};
+	static const char * const hevc_level[] = {
+		"1",
+		"2",
+		"2.1",
+		"3",
+		"3.1",
+		"4",
+		"4.1",
+		"5",
+		"5.1",
+		"5.2",
+		"6",
+		"6.1",
+		"6.2",
+		NULL,
+	};
+	static const char * const hevc_hierarchial_coding_type[] = {
+		"B",
+		"P",
+		NULL,
+	};
+	static const char * const hevc_refresh_type[] = {
+		"None",
+		"CRA",
+		"IDR",
+		NULL,
+	};
+	static const char * const hevc_size_of_length_field[] = {
+		"0",
+		"1",
+		"2",
+		"4",
+		NULL,
+	};
+	static const char * const hevc_tier[] = {
+		"Main",
+		"High",
+		NULL,
+	};
+	static const char * const hevc_loop_filter_mode[] = {
+		"Disabled",
+		"Enabled",
+		"Disabled at slice boundary",
+		"NULL",
+	};
+	static const char * const hevc_decode_mode[] = {
+		"Slice-Based",
+		"Frame-Based",
+		NULL,
+	};
+	static const char * const hevc_start_code[] = {
+		"No Start Code",
+		"Annex B Start Code",
+		NULL,
+	};
+	static const char * const camera_orientation[] = {
+		"Front",
+		"Back",
+		"External",
+		NULL,
+	};
+	static const char * const mpeg_video_frame_skip[] = {
+		"Disabled",
+		"Level Limit",
+		"VBV/CPB Limit",
+		NULL,
+	};
+
+	switch (id) {
+	case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
+		return mpeg_audio_sampling_freq;
+	case V4L2_CID_MPEG_AUDIO_ENCODING:
+		return mpeg_audio_encoding;
+	case V4L2_CID_MPEG_AUDIO_L1_BITRATE:
+		return mpeg_audio_l1_bitrate;
+	case V4L2_CID_MPEG_AUDIO_L2_BITRATE:
+		return mpeg_audio_l2_bitrate;
+	case V4L2_CID_MPEG_AUDIO_L3_BITRATE:
+		return mpeg_audio_l3_bitrate;
+	case V4L2_CID_MPEG_AUDIO_AC3_BITRATE:
+		return mpeg_audio_ac3_bitrate;
+	case V4L2_CID_MPEG_AUDIO_MODE:
+		return mpeg_audio_mode;
+	case V4L2_CID_MPEG_AUDIO_MODE_EXTENSION:
+		return mpeg_audio_mode_extension;
+	case V4L2_CID_MPEG_AUDIO_EMPHASIS:
+		return mpeg_audio_emphasis;
+	case V4L2_CID_MPEG_AUDIO_CRC:
+		return mpeg_audio_crc;
+	case V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK:
+	case V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK:
+		return mpeg_audio_dec_playback;
+	case V4L2_CID_MPEG_VIDEO_ENCODING:
+		return mpeg_video_encoding;
+	case V4L2_CID_MPEG_VIDEO_ASPECT:
+		return mpeg_video_aspect;
+	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:
+		return mpeg_video_bitrate_mode;
+	case V4L2_CID_MPEG_STREAM_TYPE:
+		return mpeg_stream_type;
+	case V4L2_CID_MPEG_STREAM_VBI_FMT:
+		return mpeg_stream_vbi_fmt;
+	case V4L2_CID_POWER_LINE_FREQUENCY:
+		return camera_power_line_frequency;
+	case V4L2_CID_EXPOSURE_AUTO:
+		return camera_exposure_auto;
+	case V4L2_CID_EXPOSURE_METERING:
+		return camera_exposure_metering;
+	case V4L2_CID_AUTO_FOCUS_RANGE:
+		return camera_auto_focus_range;
+	case V4L2_CID_COLORFX:
+		return colorfx;
+	case V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE:
+		return auto_n_preset_white_balance;
+	case V4L2_CID_ISO_SENSITIVITY_AUTO:
+		return camera_iso_sensitivity_auto;
+	case V4L2_CID_SCENE_MODE:
+		return scene_mode;
+	case V4L2_CID_TUNE_PREEMPHASIS:
+		return tune_emphasis;
+	case V4L2_CID_TUNE_DEEMPHASIS:
+		return tune_emphasis;
+	case V4L2_CID_FLASH_LED_MODE:
+		return flash_led_mode;
+	case V4L2_CID_FLASH_STROBE_SOURCE:
+		return flash_strobe_source;
+	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:
+		return header_mode;
+	case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE:
+		return mpeg_video_frame_skip;
+	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE:
+		return multi_slice;
+	case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE:
+		return entropy_mode;
+	case V4L2_CID_MPEG_VIDEO_H264_LEVEL:
+		return mpeg_h264_level;
+	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE:
+		return h264_loop_filter;
+	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
+		return h264_profile;
+	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC:
+		return vui_sar_idc;
+	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE:
+		return h264_fp_arrangement_type;
+	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:
+		return h264_fmo_map_type;
+	case V4L2_CID_STATELESS_H264_DECODE_MODE:
+		return h264_decode_mode;
+	case V4L2_CID_STATELESS_H264_START_CODE:
+		return h264_start_code;
+	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE:
+		return h264_hierarchical_coding_type;
+	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
+		return mpeg_mpeg2_level;
+	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:
+		return mpeg2_profile;
+	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:
+		return mpeg_mpeg4_level;
+	case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE:
+		return mpeg4_profile;
+	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:
+		return vpx_golden_frame_sel;
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
+		return vp8_profile;
+	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:
+		return vp9_profile;
+	case V4L2_CID_MPEG_VIDEO_VP9_LEVEL:
+		return vp9_level;
+	case V4L2_CID_JPEG_CHROMA_SUBSAMPLING:
+		return jpeg_chroma_subsampling;
+	case V4L2_CID_DV_TX_MODE:
+		return dv_tx_mode;
+	case V4L2_CID_DV_TX_RGB_RANGE:
+	case V4L2_CID_DV_RX_RGB_RANGE:
+		return dv_rgb_range;
+	case V4L2_CID_DV_TX_IT_CONTENT_TYPE:
+	case V4L2_CID_DV_RX_IT_CONTENT_TYPE:
+		return dv_it_content_type;
+	case V4L2_CID_DETECT_MD_MODE:
+		return detect_md_mode;
+	case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:
+		return hevc_profile;
+	case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:
+		return hevc_level;
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE:
+		return hevc_hierarchial_coding_type;
+	case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE:
+		return hevc_refresh_type;
+	case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD:
+		return hevc_size_of_length_field;
+	case V4L2_CID_MPEG_VIDEO_HEVC_TIER:
+		return hevc_tier;
+	case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE:
+		return hevc_loop_filter_mode;
+	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:
+		return hevc_decode_mode;
+	case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:
+		return hevc_start_code;
+	case V4L2_CID_CAMERA_ORIENTATION:
+		return camera_orientation;
+	default:
+		return NULL;
+	}
+}
+EXPORT_SYMBOL(v4l2_ctrl_get_menu);
+
+#define __v4l2_qmenu_int_len(arr, len) ({ *(len) = ARRAY_SIZE(arr); (arr); })
+/*
+ * Returns NULL or an s64 type array containing the menu for given
+ * control ID. The total number of the menu items is returned in @len.
+ */
+const s64 *v4l2_ctrl_get_int_menu(u32 id, u32 *len)
+{
+	static const s64 qmenu_int_vpx_num_partitions[] = {
+		1, 2, 4, 8,
+	};
+
+	static const s64 qmenu_int_vpx_num_ref_frames[] = {
+		1, 2, 3,
+	};
+
+	switch (id) {
+	case V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS:
+		return __v4l2_qmenu_int_len(qmenu_int_vpx_num_partitions, len);
+	case V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES:
+		return __v4l2_qmenu_int_len(qmenu_int_vpx_num_ref_frames, len);
+	default:
+		*len = 0;
+		return NULL;
+	}
+}
+EXPORT_SYMBOL(v4l2_ctrl_get_int_menu);
+
+/* Return the control name. */
+const char *v4l2_ctrl_get_name(u32 id)
+{
+	switch (id) {
+	/* USER controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_USER_CLASS:		return "User Controls";
+	case V4L2_CID_BRIGHTNESS:		return "Brightness";
+	case V4L2_CID_CONTRAST:			return "Contrast";
+	case V4L2_CID_SATURATION:		return "Saturation";
+	case V4L2_CID_HUE:			return "Hue";
+	case V4L2_CID_AUDIO_VOLUME:		return "Volume";
+	case V4L2_CID_AUDIO_BALANCE:		return "Balance";
+	case V4L2_CID_AUDIO_BASS:		return "Bass";
+	case V4L2_CID_AUDIO_TREBLE:		return "Treble";
+	case V4L2_CID_AUDIO_MUTE:		return "Mute";
+	case V4L2_CID_AUDIO_LOUDNESS:		return "Loudness";
+	case V4L2_CID_BLACK_LEVEL:		return "Black Level";
+	case V4L2_CID_AUTO_WHITE_BALANCE:	return "White Balance, Automatic";
+	case V4L2_CID_DO_WHITE_BALANCE:		return "Do White Balance";
+	case V4L2_CID_RED_BALANCE:		return "Red Balance";
+	case V4L2_CID_BLUE_BALANCE:		return "Blue Balance";
+	case V4L2_CID_GAMMA:			return "Gamma";
+	case V4L2_CID_EXPOSURE:			return "Exposure";
+	case V4L2_CID_AUTOGAIN:			return "Gain, Automatic";
+	case V4L2_CID_GAIN:			return "Gain";
+	case V4L2_CID_HFLIP:			return "Horizontal Flip";
+	case V4L2_CID_VFLIP:			return "Vertical Flip";
+	case V4L2_CID_POWER_LINE_FREQUENCY:	return "Power Line Frequency";
+	case V4L2_CID_HUE_AUTO:			return "Hue, Automatic";
+	case V4L2_CID_WHITE_BALANCE_TEMPERATURE: return "White Balance Temperature";
+	case V4L2_CID_SHARPNESS:		return "Sharpness";
+	case V4L2_CID_BACKLIGHT_COMPENSATION:	return "Backlight Compensation";
+	case V4L2_CID_CHROMA_AGC:		return "Chroma AGC";
+	case V4L2_CID_COLOR_KILLER:		return "Color Killer";
+	case V4L2_CID_COLORFX:			return "Color Effects";
+	case V4L2_CID_AUTOBRIGHTNESS:		return "Brightness, Automatic";
+	case V4L2_CID_BAND_STOP_FILTER:		return "Band-Stop Filter";
+	case V4L2_CID_ROTATE:			return "Rotate";
+	case V4L2_CID_BG_COLOR:			return "Background Color";
+	case V4L2_CID_CHROMA_GAIN:		return "Chroma Gain";
+	case V4L2_CID_ILLUMINATORS_1:		return "Illuminator 1";
+	case V4L2_CID_ILLUMINATORS_2:		return "Illuminator 2";
+	case V4L2_CID_MIN_BUFFERS_FOR_CAPTURE:	return "Min Number of Capture Buffers";
+	case V4L2_CID_MIN_BUFFERS_FOR_OUTPUT:	return "Min Number of Output Buffers";
+	case V4L2_CID_ALPHA_COMPONENT:		return "Alpha Component";
+	case V4L2_CID_COLORFX_CBCR:		return "Color Effects, CbCr";
+
+	/*
+	 * Codec controls
+	 *
+	 * The MPEG controls are applicable to all codec controls
+	 * and the 'MPEG' part of the define is historical.
+	 *
+	 * Keep the order of the 'case's the same as in videodev2.h!
+	 */
+	case V4L2_CID_CODEC_CLASS:		return "Codec Controls";
+	case V4L2_CID_MPEG_STREAM_TYPE:		return "Stream Type";
+	case V4L2_CID_MPEG_STREAM_PID_PMT:	return "Stream PMT Program ID";
+	case V4L2_CID_MPEG_STREAM_PID_AUDIO:	return "Stream Audio Program ID";
+	case V4L2_CID_MPEG_STREAM_PID_VIDEO:	return "Stream Video Program ID";
+	case V4L2_CID_MPEG_STREAM_PID_PCR:	return "Stream PCR Program ID";
+	case V4L2_CID_MPEG_STREAM_PES_ID_AUDIO: return "Stream PES Audio ID";
+	case V4L2_CID_MPEG_STREAM_PES_ID_VIDEO: return "Stream PES Video ID";
+	case V4L2_CID_MPEG_STREAM_VBI_FMT:	return "Stream VBI Format";
+	case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ: return "Audio Sampling Frequency";
+	case V4L2_CID_MPEG_AUDIO_ENCODING:	return "Audio Encoding";
+	case V4L2_CID_MPEG_AUDIO_L1_BITRATE:	return "Audio Layer I Bitrate";
+	case V4L2_CID_MPEG_AUDIO_L2_BITRATE:	return "Audio Layer II Bitrate";
+	case V4L2_CID_MPEG_AUDIO_L3_BITRATE:	return "Audio Layer III Bitrate";
+	case V4L2_CID_MPEG_AUDIO_MODE:		return "Audio Stereo Mode";
+	case V4L2_CID_MPEG_AUDIO_MODE_EXTENSION: return "Audio Stereo Mode Extension";
+	case V4L2_CID_MPEG_AUDIO_EMPHASIS:	return "Audio Emphasis";
+	case V4L2_CID_MPEG_AUDIO_CRC:		return "Audio CRC";
+	case V4L2_CID_MPEG_AUDIO_MUTE:		return "Audio Mute";
+	case V4L2_CID_MPEG_AUDIO_AAC_BITRATE:	return "Audio AAC Bitrate";
+	case V4L2_CID_MPEG_AUDIO_AC3_BITRATE:	return "Audio AC-3 Bitrate";
+	case V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK:	return "Audio Playback";
+	case V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK: return "Audio Multilingual Playback";
+	case V4L2_CID_MPEG_VIDEO_ENCODING:	return "Video Encoding";
+	case V4L2_CID_MPEG_VIDEO_ASPECT:	return "Video Aspect";
+	case V4L2_CID_MPEG_VIDEO_B_FRAMES:	return "Video B Frames";
+	case V4L2_CID_MPEG_VIDEO_GOP_SIZE:	return "Video GOP Size";
+	case V4L2_CID_MPEG_VIDEO_GOP_CLOSURE:	return "Video GOP Closure";
+	case V4L2_CID_MPEG_VIDEO_PULLDOWN:	return "Video Pulldown";
+	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:	return "Video Bitrate Mode";
+	case V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY:	return "Constant Quality";
+	case V4L2_CID_MPEG_VIDEO_BITRATE:	return "Video Bitrate";
+	case V4L2_CID_MPEG_VIDEO_BITRATE_PEAK:	return "Video Peak Bitrate";
+	case V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION: return "Video Temporal Decimation";
+	case V4L2_CID_MPEG_VIDEO_MUTE:		return "Video Mute";
+	case V4L2_CID_MPEG_VIDEO_MUTE_YUV:	return "Video Mute YUV";
+	case V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE:	return "Decoder Slice Interface";
+	case V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER:	return "MPEG4 Loop Filter Enable";
+	case V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB:	return "Number of Intra Refresh MBs";
+	case V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE:		return "Frame Level Rate Control Enable";
+	case V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE:			return "H264 MB Level Rate Control";
+	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:			return "Sequence Header Mode";
+	case V4L2_CID_MPEG_VIDEO_MAX_REF_PIC:			return "Max Number of Reference Pics";
+	case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE:		return "Frame Skip Mode";
+	case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY:		return "Display Delay";
+	case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY_ENABLE:	return "Display Delay Enable";
+	case V4L2_CID_MPEG_VIDEO_AU_DELIMITER:			return "Generate Access Unit Delimiters";
+	case V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP:		return "H263 I-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP:		return "H263 P-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP:		return "H263 B-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_H263_MIN_QP:			return "H263 Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H263_MAX_QP:			return "H263 Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP:		return "H264 I-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_QP:		return "H264 P-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_B_FRAME_QP:		return "H264 B-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_MAX_QP:			return "H264 Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_MIN_QP:			return "H264 Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM:		return "H264 8x8 Transform Enable";
+	case V4L2_CID_MPEG_VIDEO_H264_CPB_SIZE:			return "H264 CPB Buffer Size";
+	case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE:		return "H264 Entropy Mode";
+	case V4L2_CID_MPEG_VIDEO_H264_I_PERIOD:			return "H264 I-Frame Period";
+	case V4L2_CID_MPEG_VIDEO_H264_LEVEL:			return "H264 Level";
+	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA:	return "H264 Loop Filter Alpha Offset";
+	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA:		return "H264 Loop Filter Beta Offset";
+	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE:		return "H264 Loop Filter Mode";
+	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:			return "H264 Profile";
+	case V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_HEIGHT:	return "Vertical Size of SAR";
+	case V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH:	return "Horizontal Size of SAR";
+	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE:		return "Aspect Ratio VUI Enable";
+	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC:		return "VUI Aspect Ratio IDC";
+	case V4L2_CID_MPEG_VIDEO_H264_SEI_FRAME_PACKING:	return "H264 Enable Frame Packing SEI";
+	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_CURRENT_FRAME_0:	return "H264 Set Curr. Frame as Frame0";
+	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE:	return "H264 FP Arrangement Type";
+	case V4L2_CID_MPEG_VIDEO_H264_FMO:			return "H264 Flexible MB Ordering";
+	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:		return "H264 Map Type for FMO";
+	case V4L2_CID_MPEG_VIDEO_H264_FMO_SLICE_GROUP:		return "H264 FMO Number of Slice Groups";
+	case V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_DIRECTION:	return "H264 FMO Direction of Change";
+	case V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_RATE:		return "H264 FMO Size of 1st Slice Grp";
+	case V4L2_CID_MPEG_VIDEO_H264_FMO_RUN_LENGTH:		return "H264 FMO No. of Consecutive MBs";
+	case V4L2_CID_MPEG_VIDEO_H264_ASO:			return "H264 Arbitrary Slice Ordering";
+	case V4L2_CID_MPEG_VIDEO_H264_ASO_SLICE_ORDER:		return "H264 ASO Slice Order";
+	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING:	return "Enable H264 Hierarchical Coding";
+	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE:	return "H264 Hierarchical Coding Type";
+	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER:return "H264 Number of HC Layers";
+	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP:
+								return "H264 Set QP Value for HC Layers";
+	case V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION:
+								return "H264 Constrained Intra Pred";
+	case V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET:	return "H264 Chroma QP Index Offset";
+	case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MIN_QP:		return "H264 I-Frame Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MAX_QP:		return "H264 I-Frame Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MIN_QP:		return "H264 P-Frame Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MAX_QP:		return "H264 P-Frame Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_B_FRAME_MIN_QP:		return "H264 B-Frame Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_B_FRAME_MAX_QP:		return "H264 B-Frame Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L0_BR:	return "H264 Hierarchical Lay 0 Bitrate";
+	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L1_BR:	return "H264 Hierarchical Lay 1 Bitrate";
+	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L2_BR:	return "H264 Hierarchical Lay 2 Bitrate";
+	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L3_BR:	return "H264 Hierarchical Lay 3 Bitrate";
+	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L4_BR:	return "H264 Hierarchical Lay 4 Bitrate";
+	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L5_BR:	return "H264 Hierarchical Lay 5 Bitrate";
+	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L6_BR:	return "H264 Hierarchical Lay 6 Bitrate";
+	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:			return "MPEG2 Level";
+	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:			return "MPEG2 Profile";
+	case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP:		return "MPEG4 I-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP:		return "MPEG4 P-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP:		return "MPEG4 B-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_MPEG4_MIN_QP:			return "MPEG4 Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_MPEG4_MAX_QP:			return "MPEG4 Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:			return "MPEG4 Level";
+	case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE:			return "MPEG4 Profile";
+	case V4L2_CID_MPEG_VIDEO_MPEG4_QPEL:			return "Quarter Pixel Search Enable";
+	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_BYTES:		return "Maximum Bytes in a Slice";
+	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_MB:		return "Number of MBs in a Slice";
+	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE:		return "Slice Partitioning Method";
+	case V4L2_CID_MPEG_VIDEO_VBV_SIZE:			return "VBV Buffer Size";
+	case V4L2_CID_MPEG_VIDEO_DEC_PTS:			return "Video Decoder PTS";
+	case V4L2_CID_MPEG_VIDEO_DEC_FRAME:			return "Video Decoder Frame Count";
+	case V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR:		return "Video Decoder Conceal Color";
+	case V4L2_CID_MPEG_VIDEO_VBV_DELAY:			return "Initial Delay for VBV Control";
+	case V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE:		return "Horizontal MV Search Range";
+	case V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE:		return "Vertical MV Search Range";
+	case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER:		return "Repeat Sequence Header";
+	case V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME:		return "Force Key Frame";
+	case V4L2_CID_MPEG_VIDEO_BASELAYER_PRIORITY_ID:		return "Base Layer Priority ID";
+	case V4L2_CID_MPEG_VIDEO_LTR_COUNT:			return "LTR Count";
+	case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:		return "Frame LTR Index";
+	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:		return "Use LTR Frames";
+	case V4L2_CID_FWHT_I_FRAME_QP:				return "FWHT I-Frame QP Value";
+	case V4L2_CID_FWHT_P_FRAME_QP:				return "FWHT P-Frame QP Value";
+
+	/* VPX controls */
+	case V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS:		return "VPX Number of Partitions";
+	case V4L2_CID_MPEG_VIDEO_VPX_IMD_DISABLE_4X4:		return "VPX Intra Mode Decision Disable";
+	case V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES:		return "VPX No. of Refs for P Frame";
+	case V4L2_CID_MPEG_VIDEO_VPX_FILTER_LEVEL:		return "VPX Loop Filter Level Range";
+	case V4L2_CID_MPEG_VIDEO_VPX_FILTER_SHARPNESS:		return "VPX Deblocking Effect Control";
+	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_REF_PERIOD:	return "VPX Golden Frame Refresh Period";
+	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:		return "VPX Golden Frame Indicator";
+	case V4L2_CID_MPEG_VIDEO_VPX_MIN_QP:			return "VPX Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_VPX_MAX_QP:			return "VPX Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_VPX_I_FRAME_QP:		return "VPX I-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP:		return "VPX P-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:			return "VP8 Profile";
+	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:			return "VP9 Profile";
+	case V4L2_CID_MPEG_VIDEO_VP9_LEVEL:			return "VP9 Level";
+
+	/* HEVC controls */
+	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP:		return "HEVC I-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_QP:		return "HEVC P-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_QP:		return "HEVC B-Frame QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_MIN_QP:			return "HEVC Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_MAX_QP:			return "HEVC Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_MIN_QP:		return "HEVC I-Frame Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_MAX_QP:		return "HEVC I-Frame Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_MIN_QP:		return "HEVC P-Frame Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_MAX_QP:		return "HEVC P-Frame Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_MIN_QP:		return "HEVC B-Frame Minimum QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_MAX_QP:		return "HEVC B-Frame Maximum QP Value";
+	case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:			return "HEVC Profile";
+	case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:			return "HEVC Level";
+	case V4L2_CID_MPEG_VIDEO_HEVC_TIER:			return "HEVC Tier";
+	case V4L2_CID_MPEG_VIDEO_HEVC_FRAME_RATE_RESOLUTION:	return "HEVC Frame Rate Resolution";
+	case V4L2_CID_MPEG_VIDEO_HEVC_MAX_PARTITION_DEPTH:	return "HEVC Maximum Coding Unit Depth";
+	case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE:		return "HEVC Refresh Type";
+	case V4L2_CID_MPEG_VIDEO_HEVC_CONST_INTRA_PRED:		return "HEVC Constant Intra Prediction";
+	case V4L2_CID_MPEG_VIDEO_HEVC_LOSSLESS_CU:		return "HEVC Lossless Encoding";
+	case V4L2_CID_MPEG_VIDEO_HEVC_WAVEFRONT:		return "HEVC Wavefront";
+	case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE:		return "HEVC Loop Filter";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_QP:			return "HEVC QP Values";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE:		return "HEVC Hierarchical Coding Type";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER:	return "HEVC Hierarchical Coding Layer";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_QP:	return "HEVC Hierarchical Layer 0 QP";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_QP:	return "HEVC Hierarchical Layer 1 QP";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_QP:	return "HEVC Hierarchical Layer 2 QP";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_QP:	return "HEVC Hierarchical Layer 3 QP";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_QP:	return "HEVC Hierarchical Layer 4 QP";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_QP:	return "HEVC Hierarchical Layer 5 QP";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_QP:	return "HEVC Hierarchical Layer 6 QP";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_BR:	return "HEVC Hierarchical Lay 0 BitRate";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_BR:	return "HEVC Hierarchical Lay 1 BitRate";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_BR:	return "HEVC Hierarchical Lay 2 BitRate";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_BR:	return "HEVC Hierarchical Lay 3 BitRate";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_BR:	return "HEVC Hierarchical Lay 4 BitRate";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_BR:	return "HEVC Hierarchical Lay 5 BitRate";
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_BR:	return "HEVC Hierarchical Lay 6 BitRate";
+	case V4L2_CID_MPEG_VIDEO_HEVC_GENERAL_PB:		return "HEVC General PB";
+	case V4L2_CID_MPEG_VIDEO_HEVC_TEMPORAL_ID:		return "HEVC Temporal ID";
+	case V4L2_CID_MPEG_VIDEO_HEVC_STRONG_SMOOTHING:		return "HEVC Strong Intra Smoothing";
+	case V4L2_CID_MPEG_VIDEO_HEVC_INTRA_PU_SPLIT:		return "HEVC Intra PU Split";
+	case V4L2_CID_MPEG_VIDEO_HEVC_TMV_PREDICTION:		return "HEVC TMV Prediction";
+	case V4L2_CID_MPEG_VIDEO_HEVC_MAX_NUM_MERGE_MV_MINUS1:	return "HEVC Max Num of Candidate MVs";
+	case V4L2_CID_MPEG_VIDEO_HEVC_WITHOUT_STARTCODE:	return "HEVC ENC Without Startcode";
+	case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_PERIOD:		return "HEVC Num of I-Frame b/w 2 IDR";
+	case V4L2_CID_MPEG_VIDEO_HEVC_LF_BETA_OFFSET_DIV2:	return "HEVC Loop Filter Beta Offset";
+	case V4L2_CID_MPEG_VIDEO_HEVC_LF_TC_OFFSET_DIV2:	return "HEVC Loop Filter TC Offset";
+	case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD:	return "HEVC Size of Length Field";
+	case V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES:	return "Reference Frames for a P-Frame";
+	case V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR:		return "Prepend SPS and PPS to IDR";
+	case V4L2_CID_MPEG_VIDEO_HEVC_SPS:			return "HEVC Sequence Parameter Set";
+	case V4L2_CID_MPEG_VIDEO_HEVC_PPS:			return "HEVC Picture Parameter Set";
+	case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:		return "HEVC Slice Parameters";
+	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:		return "HEVC Decode Mode";
+	case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:		return "HEVC Start Code";
+
+	/* CAMERA controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_CAMERA_CLASS:		return "Camera Controls";
+	case V4L2_CID_EXPOSURE_AUTO:		return "Auto Exposure";
+	case V4L2_CID_EXPOSURE_ABSOLUTE:	return "Exposure Time, Absolute";
+	case V4L2_CID_EXPOSURE_AUTO_PRIORITY:	return "Exposure, Dynamic Framerate";
+	case V4L2_CID_PAN_RELATIVE:		return "Pan, Relative";
+	case V4L2_CID_TILT_RELATIVE:		return "Tilt, Relative";
+	case V4L2_CID_PAN_RESET:		return "Pan, Reset";
+	case V4L2_CID_TILT_RESET:		return "Tilt, Reset";
+	case V4L2_CID_PAN_ABSOLUTE:		return "Pan, Absolute";
+	case V4L2_CID_TILT_ABSOLUTE:		return "Tilt, Absolute";
+	case V4L2_CID_FOCUS_ABSOLUTE:		return "Focus, Absolute";
+	case V4L2_CID_FOCUS_RELATIVE:		return "Focus, Relative";
+	case V4L2_CID_FOCUS_AUTO:		return "Focus, Automatic Continuous";
+	case V4L2_CID_ZOOM_ABSOLUTE:		return "Zoom, Absolute";
+	case V4L2_CID_ZOOM_RELATIVE:		return "Zoom, Relative";
+	case V4L2_CID_ZOOM_CONTINUOUS:		return "Zoom, Continuous";
+	case V4L2_CID_PRIVACY:			return "Privacy";
+	case V4L2_CID_IRIS_ABSOLUTE:		return "Iris, Absolute";
+	case V4L2_CID_IRIS_RELATIVE:		return "Iris, Relative";
+	case V4L2_CID_AUTO_EXPOSURE_BIAS:	return "Auto Exposure, Bias";
+	case V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE: return "White Balance, Auto & Preset";
+	case V4L2_CID_WIDE_DYNAMIC_RANGE:	return "Wide Dynamic Range";
+	case V4L2_CID_IMAGE_STABILIZATION:	return "Image Stabilization";
+	case V4L2_CID_ISO_SENSITIVITY:		return "ISO Sensitivity";
+	case V4L2_CID_ISO_SENSITIVITY_AUTO:	return "ISO Sensitivity, Auto";
+	case V4L2_CID_EXPOSURE_METERING:	return "Exposure, Metering Mode";
+	case V4L2_CID_SCENE_MODE:		return "Scene Mode";
+	case V4L2_CID_3A_LOCK:			return "3A Lock";
+	case V4L2_CID_AUTO_FOCUS_START:		return "Auto Focus, Start";
+	case V4L2_CID_AUTO_FOCUS_STOP:		return "Auto Focus, Stop";
+	case V4L2_CID_AUTO_FOCUS_STATUS:	return "Auto Focus, Status";
+	case V4L2_CID_AUTO_FOCUS_RANGE:		return "Auto Focus, Range";
+	case V4L2_CID_PAN_SPEED:		return "Pan, Speed";
+	case V4L2_CID_TILT_SPEED:		return "Tilt, Speed";
+	case V4L2_CID_UNIT_CELL_SIZE:		return "Unit Cell Size";
+	case V4L2_CID_CAMERA_ORIENTATION:	return "Camera Orientation";
+	case V4L2_CID_CAMERA_SENSOR_ROTATION:	return "Camera Sensor Rotation";
+
+	/* FM Radio Modulator controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_FM_TX_CLASS:		return "FM Radio Modulator Controls";
+	case V4L2_CID_RDS_TX_DEVIATION:		return "RDS Signal Deviation";
+	case V4L2_CID_RDS_TX_PI:		return "RDS Program ID";
+	case V4L2_CID_RDS_TX_PTY:		return "RDS Program Type";
+	case V4L2_CID_RDS_TX_PS_NAME:		return "RDS PS Name";
+	case V4L2_CID_RDS_TX_RADIO_TEXT:	return "RDS Radio Text";
+	case V4L2_CID_RDS_TX_MONO_STEREO:	return "RDS Stereo";
+	case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD:	return "RDS Artificial Head";
+	case V4L2_CID_RDS_TX_COMPRESSED:	return "RDS Compressed";
+	case V4L2_CID_RDS_TX_DYNAMIC_PTY:	return "RDS Dynamic PTY";
+	case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement";
+	case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM:	return "RDS Traffic Program";
+	case V4L2_CID_RDS_TX_MUSIC_SPEECH:	return "RDS Music";
+	case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE:	return "RDS Enable Alt Frequencies";
+	case V4L2_CID_RDS_TX_ALT_FREQS:		return "RDS Alternate Frequencies";
+	case V4L2_CID_AUDIO_LIMITER_ENABLED:	return "Audio Limiter Feature Enabled";
+	case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME: return "Audio Limiter Release Time";
+	case V4L2_CID_AUDIO_LIMITER_DEVIATION:	return "Audio Limiter Deviation";
+	case V4L2_CID_AUDIO_COMPRESSION_ENABLED: return "Audio Compression Enabled";
+	case V4L2_CID_AUDIO_COMPRESSION_GAIN:	return "Audio Compression Gain";
+	case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD: return "Audio Compression Threshold";
+	case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME: return "Audio Compression Attack Time";
+	case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME: return "Audio Compression Release Time";
+	case V4L2_CID_PILOT_TONE_ENABLED:	return "Pilot Tone Feature Enabled";
+	case V4L2_CID_PILOT_TONE_DEVIATION:	return "Pilot Tone Deviation";
+	case V4L2_CID_PILOT_TONE_FREQUENCY:	return "Pilot Tone Frequency";
+	case V4L2_CID_TUNE_PREEMPHASIS:		return "Pre-Emphasis";
+	case V4L2_CID_TUNE_POWER_LEVEL:		return "Tune Power Level";
+	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:	return "Tune Antenna Capacitor";
+
+	/* Flash controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_FLASH_CLASS:		return "Flash Controls";
+	case V4L2_CID_FLASH_LED_MODE:		return "LED Mode";
+	case V4L2_CID_FLASH_STROBE_SOURCE:	return "Strobe Source";
+	case V4L2_CID_FLASH_STROBE:		return "Strobe";
+	case V4L2_CID_FLASH_STROBE_STOP:	return "Stop Strobe";
+	case V4L2_CID_FLASH_STROBE_STATUS:	return "Strobe Status";
+	case V4L2_CID_FLASH_TIMEOUT:		return "Strobe Timeout";
+	case V4L2_CID_FLASH_INTENSITY:		return "Intensity, Flash Mode";
+	case V4L2_CID_FLASH_TORCH_INTENSITY:	return "Intensity, Torch Mode";
+	case V4L2_CID_FLASH_INDICATOR_INTENSITY: return "Intensity, Indicator";
+	case V4L2_CID_FLASH_FAULT:		return "Faults";
+	case V4L2_CID_FLASH_CHARGE:		return "Charge";
+	case V4L2_CID_FLASH_READY:		return "Ready to Strobe";
+
+	/* JPEG encoder controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_JPEG_CLASS:		return "JPEG Compression Controls";
+	case V4L2_CID_JPEG_CHROMA_SUBSAMPLING:	return "Chroma Subsampling";
+	case V4L2_CID_JPEG_RESTART_INTERVAL:	return "Restart Interval";
+	case V4L2_CID_JPEG_COMPRESSION_QUALITY:	return "Compression Quality";
+	case V4L2_CID_JPEG_ACTIVE_MARKER:	return "Active Markers";
+
+	/* Image source controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_IMAGE_SOURCE_CLASS:	return "Image Source Controls";
+	case V4L2_CID_VBLANK:			return "Vertical Blanking";
+	case V4L2_CID_HBLANK:			return "Horizontal Blanking";
+	case V4L2_CID_ANALOGUE_GAIN:		return "Analogue Gain";
+	case V4L2_CID_TEST_PATTERN_RED:		return "Red Pixel Value";
+	case V4L2_CID_TEST_PATTERN_GREENR:	return "Green (Red) Pixel Value";
+	case V4L2_CID_TEST_PATTERN_BLUE:	return "Blue Pixel Value";
+	case V4L2_CID_TEST_PATTERN_GREENB:	return "Green (Blue) Pixel Value";
+
+	/* Image processing controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_IMAGE_PROC_CLASS:		return "Image Processing Controls";
+	case V4L2_CID_LINK_FREQ:		return "Link Frequency";
+	case V4L2_CID_PIXEL_RATE:		return "Pixel Rate";
+	case V4L2_CID_TEST_PATTERN:		return "Test Pattern";
+	case V4L2_CID_DEINTERLACING_MODE:	return "Deinterlacing Mode";
+	case V4L2_CID_DIGITAL_GAIN:		return "Digital Gain";
+
+	/* DV controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_DV_CLASS:			return "Digital Video Controls";
+	case V4L2_CID_DV_TX_HOTPLUG:		return "Hotplug Present";
+	case V4L2_CID_DV_TX_RXSENSE:		return "RxSense Present";
+	case V4L2_CID_DV_TX_EDID_PRESENT:	return "EDID Present";
+	case V4L2_CID_DV_TX_MODE:		return "Transmit Mode";
+	case V4L2_CID_DV_TX_RGB_RANGE:		return "Tx RGB Quantization Range";
+	case V4L2_CID_DV_TX_IT_CONTENT_TYPE:	return "Tx IT Content Type";
+	case V4L2_CID_DV_RX_POWER_PRESENT:	return "Power Present";
+	case V4L2_CID_DV_RX_RGB_RANGE:		return "Rx RGB Quantization Range";
+	case V4L2_CID_DV_RX_IT_CONTENT_TYPE:	return "Rx IT Content Type";
+
+	case V4L2_CID_FM_RX_CLASS:		return "FM Radio Receiver Controls";
+	case V4L2_CID_TUNE_DEEMPHASIS:		return "De-Emphasis";
+	case V4L2_CID_RDS_RECEPTION:		return "RDS Reception";
+	case V4L2_CID_RF_TUNER_CLASS:		return "RF Tuner Controls";
+	case V4L2_CID_RF_TUNER_RF_GAIN:		return "RF Gain";
+	case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO:	return "LNA Gain, Auto";
+	case V4L2_CID_RF_TUNER_LNA_GAIN:	return "LNA Gain";
+	case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO:	return "Mixer Gain, Auto";
+	case V4L2_CID_RF_TUNER_MIXER_GAIN:	return "Mixer Gain";
+	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:	return "IF Gain, Auto";
+	case V4L2_CID_RF_TUNER_IF_GAIN:		return "IF Gain";
+	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:	return "Bandwidth, Auto";
+	case V4L2_CID_RF_TUNER_BANDWIDTH:	return "Bandwidth";
+	case V4L2_CID_RF_TUNER_PLL_LOCK:	return "PLL Lock";
+	case V4L2_CID_RDS_RX_PTY:		return "RDS Program Type";
+	case V4L2_CID_RDS_RX_PS_NAME:		return "RDS PS Name";
+	case V4L2_CID_RDS_RX_RADIO_TEXT:	return "RDS Radio Text";
+	case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement";
+	case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM:	return "RDS Traffic Program";
+	case V4L2_CID_RDS_RX_MUSIC_SPEECH:	return "RDS Music";
+
+	/* Detection controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_DETECT_CLASS:		return "Detection Controls";
+	case V4L2_CID_DETECT_MD_MODE:		return "Motion Detection Mode";
+	case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: return "MD Global Threshold";
+	case V4L2_CID_DETECT_MD_THRESHOLD_GRID:	return "MD Threshold Grid";
+	case V4L2_CID_DETECT_MD_REGION_GRID:	return "MD Region Grid";
+
+	/* Stateless Codec controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_CODEC_STATELESS_CLASS:	return "Stateless Codec Controls";
+	case V4L2_CID_STATELESS_H264_DECODE_MODE:		return "H264 Decode Mode";
+	case V4L2_CID_STATELESS_H264_START_CODE:		return "H264 Start Code";
+	case V4L2_CID_STATELESS_H264_SPS:			return "H264 Sequence Parameter Set";
+	case V4L2_CID_STATELESS_H264_PPS:			return "H264 Picture Parameter Set";
+	case V4L2_CID_STATELESS_H264_SCALING_MATRIX:		return "H264 Scaling Matrix";
+	case V4L2_CID_STATELESS_H264_PRED_WEIGHTS:		return "H264 Prediction Weight Table";
+	case V4L2_CID_STATELESS_H264_SLICE_PARAMS:		return "H264 Slice Parameters";
+	case V4L2_CID_STATELESS_H264_DECODE_PARAMS:		return "H264 Decode Parameters";
+	case V4L2_CID_STATELESS_FWHT_PARAMS:			return "FWHT Stateless Parameters";
+	case V4L2_CID_STATELESS_VP8_FRAME:			return "VP8 Frame Parameters";
+	case V4L2_CID_STATELESS_MPEG2_SEQUENCE:			return "MPEG-2 Sequence Header";
+	case V4L2_CID_STATELESS_MPEG2_PICTURE:			return "MPEG-2 Picture Header";
+	case V4L2_CID_STATELESS_MPEG2_QUANTISATION:		return "MPEG-2 Quantisation Matrices";
+
+	/* Colorimetry controls */
+	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
+	case V4L2_CID_COLORIMETRY_CLASS:	return "Colorimetry Controls";
+	case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO:		return "HDR10 Content Light Info";
+	case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY:	return "HDR10 Mastering Display";
+	default:
+		return NULL;
+	}
+}
+EXPORT_SYMBOL(v4l2_ctrl_get_name);
+
+void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
+		    s64 *min, s64 *max, u64 *step, s64 *def, u32 *flags)
+{
+	*name = v4l2_ctrl_get_name(id);
+	*flags = 0;
+
+	switch (id) {
+	case V4L2_CID_AUDIO_MUTE:
+	case V4L2_CID_AUDIO_LOUDNESS:
+	case V4L2_CID_AUTO_WHITE_BALANCE:
+	case V4L2_CID_AUTOGAIN:
+	case V4L2_CID_HFLIP:
+	case V4L2_CID_VFLIP:
+	case V4L2_CID_HUE_AUTO:
+	case V4L2_CID_CHROMA_AGC:
+	case V4L2_CID_COLOR_KILLER:
+	case V4L2_CID_AUTOBRIGHTNESS:
+	case V4L2_CID_MPEG_AUDIO_MUTE:
+	case V4L2_CID_MPEG_VIDEO_MUTE:
+	case V4L2_CID_MPEG_VIDEO_GOP_CLOSURE:
+	case V4L2_CID_MPEG_VIDEO_PULLDOWN:
+	case V4L2_CID_EXPOSURE_AUTO_PRIORITY:
+	case V4L2_CID_FOCUS_AUTO:
+	case V4L2_CID_PRIVACY:
+	case V4L2_CID_AUDIO_LIMITER_ENABLED:
+	case V4L2_CID_AUDIO_COMPRESSION_ENABLED:
+	case V4L2_CID_PILOT_TONE_ENABLED:
+	case V4L2_CID_ILLUMINATORS_1:
+	case V4L2_CID_ILLUMINATORS_2:
+	case V4L2_CID_FLASH_STROBE_STATUS:
+	case V4L2_CID_FLASH_CHARGE:
+	case V4L2_CID_FLASH_READY:
+	case V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER:
+	case V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE:
+	case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY_ENABLE:
+	case V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE:
+	case V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE:
+	case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM:
+	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE:
+	case V4L2_CID_MPEG_VIDEO_MPEG4_QPEL:
+	case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER:
+	case V4L2_CID_MPEG_VIDEO_AU_DELIMITER:
+	case V4L2_CID_WIDE_DYNAMIC_RANGE:
+	case V4L2_CID_IMAGE_STABILIZATION:
+	case V4L2_CID_RDS_RECEPTION:
+	case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO:
+	case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO:
+	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:
+	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:
+	case V4L2_CID_RF_TUNER_PLL_LOCK:
+	case V4L2_CID_RDS_TX_MONO_STEREO:
+	case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD:
+	case V4L2_CID_RDS_TX_COMPRESSED:
+	case V4L2_CID_RDS_TX_DYNAMIC_PTY:
+	case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT:
+	case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM:
+	case V4L2_CID_RDS_TX_MUSIC_SPEECH:
+	case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE:
+	case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT:
+	case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM:
+	case V4L2_CID_RDS_RX_MUSIC_SPEECH:
+		*type = V4L2_CTRL_TYPE_BOOLEAN;
+		*min = 0;
+		*max = *step = 1;
+		break;
+	case V4L2_CID_ROTATE:
+		*type = V4L2_CTRL_TYPE_INTEGER;
+		*flags |= V4L2_CTRL_FLAG_MODIFY_LAYOUT;
+		break;
+	case V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE:
+	case V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE:
+	case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY:
+		*type = V4L2_CTRL_TYPE_INTEGER;
+		break;
+	case V4L2_CID_MPEG_VIDEO_LTR_COUNT:
+		*type = V4L2_CTRL_TYPE_INTEGER;
+		break;
+	case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:
+		*type = V4L2_CTRL_TYPE_INTEGER;
+		*flags |= V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
+		break;
+	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:
+		*type = V4L2_CTRL_TYPE_BITMASK;
+		*flags |= V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
+		break;
+	case V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME:
+	case V4L2_CID_PAN_RESET:
+	case V4L2_CID_TILT_RESET:
+	case V4L2_CID_FLASH_STROBE:
+	case V4L2_CID_FLASH_STROBE_STOP:
+	case V4L2_CID_AUTO_FOCUS_START:
+	case V4L2_CID_AUTO_FOCUS_STOP:
+	case V4L2_CID_DO_WHITE_BALANCE:
+		*type = V4L2_CTRL_TYPE_BUTTON;
+		*flags |= V4L2_CTRL_FLAG_WRITE_ONLY |
+			  V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
+		*min = *max = *step = *def = 0;
+		break;
+	case V4L2_CID_POWER_LINE_FREQUENCY:
+	case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
+	case V4L2_CID_MPEG_AUDIO_ENCODING:
+	case V4L2_CID_MPEG_AUDIO_L1_BITRATE:
+	case V4L2_CID_MPEG_AUDIO_L2_BITRATE:
+	case V4L2_CID_MPEG_AUDIO_L3_BITRATE:
+	case V4L2_CID_MPEG_AUDIO_AC3_BITRATE:
+	case V4L2_CID_MPEG_AUDIO_MODE:
+	case V4L2_CID_MPEG_AUDIO_MODE_EXTENSION:
+	case V4L2_CID_MPEG_AUDIO_EMPHASIS:
+	case V4L2_CID_MPEG_AUDIO_CRC:
+	case V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK:
+	case V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK:
+	case V4L2_CID_MPEG_VIDEO_ENCODING:
+	case V4L2_CID_MPEG_VIDEO_ASPECT:
+	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:
+	case V4L2_CID_MPEG_STREAM_TYPE:
+	case V4L2_CID_MPEG_STREAM_VBI_FMT:
+	case V4L2_CID_EXPOSURE_AUTO:
+	case V4L2_CID_AUTO_FOCUS_RANGE:
+	case V4L2_CID_COLORFX:
+	case V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE:
+	case V4L2_CID_TUNE_PREEMPHASIS:
+	case V4L2_CID_FLASH_LED_MODE:
+	case V4L2_CID_FLASH_STROBE_SOURCE:
+	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:
+	case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE:
+	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE:
+	case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE:
+	case V4L2_CID_MPEG_VIDEO_H264_LEVEL:
+	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE:
+	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC:
+	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE:
+	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:
+	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE:
+	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
+	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:
+	case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE:
+	case V4L2_CID_JPEG_CHROMA_SUBSAMPLING:
+	case V4L2_CID_ISO_SENSITIVITY_AUTO:
+	case V4L2_CID_EXPOSURE_METERING:
+	case V4L2_CID_SCENE_MODE:
+	case V4L2_CID_DV_TX_MODE:
+	case V4L2_CID_DV_TX_RGB_RANGE:
+	case V4L2_CID_DV_TX_IT_CONTENT_TYPE:
+	case V4L2_CID_DV_RX_RGB_RANGE:
+	case V4L2_CID_DV_RX_IT_CONTENT_TYPE:
+	case V4L2_CID_TEST_PATTERN:
+	case V4L2_CID_DEINTERLACING_MODE:
+	case V4L2_CID_TUNE_DEEMPHASIS:
+	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:
+	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_VP9_LEVEL:
+	case V4L2_CID_DETECT_MD_MODE:
+	case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:
+	case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:
+	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE:
+	case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE:
+	case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD:
+	case V4L2_CID_MPEG_VIDEO_HEVC_TIER:
+	case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE:
+	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:
+	case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:
+	case V4L2_CID_STATELESS_H264_DECODE_MODE:
+	case V4L2_CID_STATELESS_H264_START_CODE:
+	case V4L2_CID_CAMERA_ORIENTATION:
+		*type = V4L2_CTRL_TYPE_MENU;
+		break;
+	case V4L2_CID_LINK_FREQ:
+		*type = V4L2_CTRL_TYPE_INTEGER_MENU;
+		break;
+	case V4L2_CID_RDS_TX_PS_NAME:
+	case V4L2_CID_RDS_TX_RADIO_TEXT:
+	case V4L2_CID_RDS_RX_PS_NAME:
+	case V4L2_CID_RDS_RX_RADIO_TEXT:
+		*type = V4L2_CTRL_TYPE_STRING;
+		break;
+	case V4L2_CID_ISO_SENSITIVITY:
+	case V4L2_CID_AUTO_EXPOSURE_BIAS:
+	case V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS:
+	case V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES:
+		*type = V4L2_CTRL_TYPE_INTEGER_MENU;
+		break;
+	case V4L2_CID_USER_CLASS:
+	case V4L2_CID_CAMERA_CLASS:
+	case V4L2_CID_CODEC_CLASS:
+	case V4L2_CID_FM_TX_CLASS:
+	case V4L2_CID_FLASH_CLASS:
+	case V4L2_CID_JPEG_CLASS:
+	case V4L2_CID_IMAGE_SOURCE_CLASS:
+	case V4L2_CID_IMAGE_PROC_CLASS:
+	case V4L2_CID_DV_CLASS:
+	case V4L2_CID_FM_RX_CLASS:
+	case V4L2_CID_RF_TUNER_CLASS:
+	case V4L2_CID_DETECT_CLASS:
+	case V4L2_CID_CODEC_STATELESS_CLASS:
+	case V4L2_CID_COLORIMETRY_CLASS:
+		*type = V4L2_CTRL_TYPE_CTRL_CLASS;
+		/* You can neither read nor write these */
+		*flags |= V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY;
+		*min = *max = *step = *def = 0;
+		break;
+	case V4L2_CID_BG_COLOR:
+		*type = V4L2_CTRL_TYPE_INTEGER;
+		*step = 1;
+		*min = 0;
+		/* Max is calculated as RGB888 that is 2^24 */
+		*max = 0xFFFFFF;
+		break;
+	case V4L2_CID_FLASH_FAULT:
+	case V4L2_CID_JPEG_ACTIVE_MARKER:
+	case V4L2_CID_3A_LOCK:
+	case V4L2_CID_AUTO_FOCUS_STATUS:
+	case V4L2_CID_DV_TX_HOTPLUG:
+	case V4L2_CID_DV_TX_RXSENSE:
+	case V4L2_CID_DV_TX_EDID_PRESENT:
+	case V4L2_CID_DV_RX_POWER_PRESENT:
+		*type = V4L2_CTRL_TYPE_BITMASK;
+		break;
+	case V4L2_CID_MIN_BUFFERS_FOR_CAPTURE:
+	case V4L2_CID_MIN_BUFFERS_FOR_OUTPUT:
+		*type = V4L2_CTRL_TYPE_INTEGER;
+		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
+		break;
+	case V4L2_CID_MPEG_VIDEO_DEC_PTS:
+		*type = V4L2_CTRL_TYPE_INTEGER64;
+		*flags |= V4L2_CTRL_FLAG_VOLATILE | V4L2_CTRL_FLAG_READ_ONLY;
+		*min = *def = 0;
+		*max = 0x1ffffffffLL;
+		*step = 1;
+		break;
+	case V4L2_CID_MPEG_VIDEO_DEC_FRAME:
+		*type = V4L2_CTRL_TYPE_INTEGER64;
+		*flags |= V4L2_CTRL_FLAG_VOLATILE | V4L2_CTRL_FLAG_READ_ONLY;
+		*min = *def = 0;
+		*max = 0x7fffffffffffffffLL;
+		*step = 1;
+		break;
+	case V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR:
+		*type = V4L2_CTRL_TYPE_INTEGER64;
+		*min = 0;
+		/* default for 8 bit black, luma is 16, chroma is 128 */
+		*def = 0x8000800010LL;
+		*max = 0xffffffffffffLL;
+		*step = 1;
+		break;
+	case V4L2_CID_PIXEL_RATE:
+		*type = V4L2_CTRL_TYPE_INTEGER64;
+		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
+		break;
+	case V4L2_CID_DETECT_MD_REGION_GRID:
+		*type = V4L2_CTRL_TYPE_U8;
+		break;
+	case V4L2_CID_DETECT_MD_THRESHOLD_GRID:
+		*type = V4L2_CTRL_TYPE_U16;
+		break;
+	case V4L2_CID_RDS_TX_ALT_FREQS:
+		*type = V4L2_CTRL_TYPE_U32;
+		break;
+	case V4L2_CID_STATELESS_MPEG2_SEQUENCE:
+		*type = V4L2_CTRL_TYPE_MPEG2_SEQUENCE;
+		break;
+	case V4L2_CID_STATELESS_MPEG2_PICTURE:
+		*type = V4L2_CTRL_TYPE_MPEG2_PICTURE;
+		break;
+	case V4L2_CID_STATELESS_MPEG2_QUANTISATION:
+		*type = V4L2_CTRL_TYPE_MPEG2_QUANTISATION;
+		break;
+	case V4L2_CID_STATELESS_FWHT_PARAMS:
+		*type = V4L2_CTRL_TYPE_FWHT_PARAMS;
+		break;
+	case V4L2_CID_STATELESS_H264_SPS:
+		*type = V4L2_CTRL_TYPE_H264_SPS;
+		break;
+	case V4L2_CID_STATELESS_H264_PPS:
+		*type = V4L2_CTRL_TYPE_H264_PPS;
+		break;
+	case V4L2_CID_STATELESS_H264_SCALING_MATRIX:
+		*type = V4L2_CTRL_TYPE_H264_SCALING_MATRIX;
+		break;
+	case V4L2_CID_STATELESS_H264_SLICE_PARAMS:
+		*type = V4L2_CTRL_TYPE_H264_SLICE_PARAMS;
+		break;
+	case V4L2_CID_STATELESS_H264_DECODE_PARAMS:
+		*type = V4L2_CTRL_TYPE_H264_DECODE_PARAMS;
+		break;
+	case V4L2_CID_STATELESS_H264_PRED_WEIGHTS:
+		*type = V4L2_CTRL_TYPE_H264_PRED_WEIGHTS;
+		break;
+	case V4L2_CID_STATELESS_VP8_FRAME:
+		*type = V4L2_CTRL_TYPE_VP8_FRAME;
+		break;
+	case V4L2_CID_MPEG_VIDEO_HEVC_SPS:
+		*type = V4L2_CTRL_TYPE_HEVC_SPS;
+		break;
+	case V4L2_CID_MPEG_VIDEO_HEVC_PPS:
+		*type = V4L2_CTRL_TYPE_HEVC_PPS;
+		break;
+	case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:
+		*type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS;
+		break;
+	case V4L2_CID_UNIT_CELL_SIZE:
+		*type = V4L2_CTRL_TYPE_AREA;
+		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
+		break;
+	case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO:
+		*type = V4L2_CTRL_TYPE_HDR10_CLL_INFO;
+		break;
+	case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY:
+		*type = V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY;
+		break;
+	default:
+		*type = V4L2_CTRL_TYPE_INTEGER;
+		break;
+	}
+	switch (id) {
+	case V4L2_CID_MPEG_AUDIO_ENCODING:
+	case V4L2_CID_MPEG_AUDIO_MODE:
+	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:
+	case V4L2_CID_MPEG_VIDEO_B_FRAMES:
+	case V4L2_CID_MPEG_STREAM_TYPE:
+		*flags |= V4L2_CTRL_FLAG_UPDATE;
+		break;
+	case V4L2_CID_AUDIO_VOLUME:
+	case V4L2_CID_AUDIO_BALANCE:
+	case V4L2_CID_AUDIO_BASS:
+	case V4L2_CID_AUDIO_TREBLE:
+	case V4L2_CID_BRIGHTNESS:
+	case V4L2_CID_CONTRAST:
+	case V4L2_CID_SATURATION:
+	case V4L2_CID_HUE:
+	case V4L2_CID_RED_BALANCE:
+	case V4L2_CID_BLUE_BALANCE:
+	case V4L2_CID_GAMMA:
+	case V4L2_CID_SHARPNESS:
+	case V4L2_CID_CHROMA_GAIN:
+	case V4L2_CID_RDS_TX_DEVIATION:
+	case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME:
+	case V4L2_CID_AUDIO_LIMITER_DEVIATION:
+	case V4L2_CID_AUDIO_COMPRESSION_GAIN:
+	case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD:
+	case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME:
+	case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME:
+	case V4L2_CID_PILOT_TONE_DEVIATION:
+	case V4L2_CID_PILOT_TONE_FREQUENCY:
+	case V4L2_CID_TUNE_POWER_LEVEL:
+	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+	case V4L2_CID_RF_TUNER_RF_GAIN:
+	case V4L2_CID_RF_TUNER_LNA_GAIN:
+	case V4L2_CID_RF_TUNER_MIXER_GAIN:
+	case V4L2_CID_RF_TUNER_IF_GAIN:
+	case V4L2_CID_RF_TUNER_BANDWIDTH:
+	case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD:
+		*flags |= V4L2_CTRL_FLAG_SLIDER;
+		break;
+	case V4L2_CID_PAN_RELATIVE:
+	case V4L2_CID_TILT_RELATIVE:
+	case V4L2_CID_FOCUS_RELATIVE:
+	case V4L2_CID_IRIS_RELATIVE:
+	case V4L2_CID_ZOOM_RELATIVE:
+		*flags |= V4L2_CTRL_FLAG_WRITE_ONLY |
+			  V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
+		break;
+	case V4L2_CID_FLASH_STROBE_STATUS:
+	case V4L2_CID_AUTO_FOCUS_STATUS:
+	case V4L2_CID_FLASH_READY:
+	case V4L2_CID_DV_TX_HOTPLUG:
+	case V4L2_CID_DV_TX_RXSENSE:
+	case V4L2_CID_DV_TX_EDID_PRESENT:
+	case V4L2_CID_DV_RX_POWER_PRESENT:
+	case V4L2_CID_DV_RX_IT_CONTENT_TYPE:
+	case V4L2_CID_RDS_RX_PTY:
+	case V4L2_CID_RDS_RX_PS_NAME:
+	case V4L2_CID_RDS_RX_RADIO_TEXT:
+	case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT:
+	case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM:
+	case V4L2_CID_RDS_RX_MUSIC_SPEECH:
+	case V4L2_CID_CAMERA_ORIENTATION:
+	case V4L2_CID_CAMERA_SENSOR_ROTATION:
+		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
+		break;
+	case V4L2_CID_RF_TUNER_PLL_LOCK:
+		*flags |= V4L2_CTRL_FLAG_VOLATILE;
+		break;
+	}
+}
+EXPORT_SYMBOL(v4l2_ctrl_fill);
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-priv.h b/drivers/media/v4l2-core/v4l2-ctrls-priv.h
new file mode 100644
index 0000000000000..d4bf2c716f975
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2-ctrls-priv.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * V4L2 controls framework private header.
+ *
+ * Copyright (C) 2010-2021  Hans Verkuil <hverkuil-cisco@xs4all.nl>
+ */
+
+#ifndef _V4L2_CTRLS_PRIV_H_
+#define _V4L2_CTRLS_PRIV_H_
+
+#define dprintk(vdev, fmt, arg...) do {					\
+	if (!WARN_ON(!(vdev)) && ((vdev)->dev_debug & V4L2_DEV_DEBUG_CTRL)) \
+		printk(KERN_DEBUG pr_fmt("%s: %s: " fmt),		\
+		       __func__, video_device_node_name(vdev), ##arg);	\
+} while (0)
+
+#define has_op(master, op) \
+	((master)->ops && (master)->ops->op)
+#define call_op(master, op) \
+	(has_op(master, op) ? (master)->ops->op(master) : 0)
+
+static inline u32 node2id(struct list_head *node)
+{
+	return list_entry(node, struct v4l2_ctrl_ref, node)->ctrl->id;
+}
+
+/*
+ * Small helper function to determine if the autocluster is set to manual
+ * mode.
+ */
+static inline bool is_cur_manual(const struct v4l2_ctrl *master)
+{
+	return master->is_auto && master->cur.val == master->manual_mode_value;
+}
+
+/*
+ * Small helper function to determine if the autocluster will be set to manual
+ * mode.
+ */
+static inline bool is_new_manual(const struct v4l2_ctrl *master)
+{
+	return master->is_auto && master->val == master->manual_mode_value;
+}
+
+static inline u32 user_flags(const struct v4l2_ctrl *ctrl)
+{
+	u32 flags = ctrl->flags;
+
+	if (ctrl->is_ptr)
+		flags |= V4L2_CTRL_FLAG_HAS_PAYLOAD;
+
+	return flags;
+}
+
+/* v4l2-ctrls-core.c */
+void cur_to_new(struct v4l2_ctrl *ctrl);
+void cur_to_req(struct v4l2_ctrl_ref *ref);
+void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 ch_flags);
+void new_to_req(struct v4l2_ctrl_ref *ref);
+void req_to_new(struct v4l2_ctrl_ref *ref);
+void send_initial_event(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl);
+void send_event(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 changes);
+int validate_new(const struct v4l2_ctrl *ctrl, union v4l2_ctrl_ptr p_new);
+int handler_new_ref(struct v4l2_ctrl_handler *hdl,
+		    struct v4l2_ctrl *ctrl,
+		    struct v4l2_ctrl_ref **ctrl_ref,
+		    bool from_other_dev, bool allocate_req);
+struct v4l2_ctrl_ref *find_ref(struct v4l2_ctrl_handler *hdl, u32 id);
+struct v4l2_ctrl_ref *find_ref_lock(struct v4l2_ctrl_handler *hdl, u32 id);
+int check_range(enum v4l2_ctrl_type type,
+		s64 min, s64 max, u64 step, s64 def);
+void update_from_auto_cluster(struct v4l2_ctrl *master);
+int try_or_set_cluster(struct v4l2_fh *fh, struct v4l2_ctrl *master,
+		       bool set, u32 ch_flags);
+
+/* v4l2-ctrls-api.c */
+int v4l2_g_ext_ctrls_common(struct v4l2_ctrl_handler *hdl,
+			    struct v4l2_ext_controls *cs,
+			    struct video_device *vdev);
+int try_set_ext_ctrls_common(struct v4l2_fh *fh,
+			     struct v4l2_ctrl_handler *hdl,
+			     struct v4l2_ext_controls *cs,
+			     struct video_device *vdev, bool set);
+
+/* v4l2-ctrls-request.c */
+void v4l2_ctrl_handler_init_request(struct v4l2_ctrl_handler *hdl);
+void v4l2_ctrl_handler_free_request(struct v4l2_ctrl_handler *hdl);
+int v4l2_g_ext_ctrls_request(struct v4l2_ctrl_handler *hdl, struct video_device *vdev,
+			     struct media_device *mdev, struct v4l2_ext_controls *cs);
+int try_set_ext_ctrls_request(struct v4l2_fh *fh,
+			      struct v4l2_ctrl_handler *hdl,
+			      struct video_device *vdev,
+			      struct media_device *mdev,
+			      struct v4l2_ext_controls *cs, bool set);
+
+#endif
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-request.c b/drivers/media/v4l2-core/v4l2-ctrls-request.c
new file mode 100644
index 0000000000000..7d098f287fd95
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2-ctrls-request.c
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * V4L2 controls framework Request API implementation.
+ *
+ * Copyright (C) 2018-2021  Hans Verkuil <hverkuil-cisco@xs4all.nl>
+ */
+
+#define pr_fmt(fmt) "v4l2-ctrls: " fmt
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-dev.h>
+#include <media/v4l2-ioctl.h>
+
+#include "v4l2-ctrls-priv.h"
+
+/* Initialize the request-related fields in a control handler */
+void v4l2_ctrl_handler_init_request(struct v4l2_ctrl_handler *hdl)
+{
+	INIT_LIST_HEAD(&hdl->requests);
+	INIT_LIST_HEAD(&hdl->requests_queued);
+	hdl->request_is_queued = false;
+	media_request_object_init(&hdl->req_obj);
+}
+
+/* Free the request-related fields in a control handler */
+void v4l2_ctrl_handler_free_request(struct v4l2_ctrl_handler *hdl)
+{
+	struct v4l2_ctrl_handler *req, *next_req;
+
+	/*
+	 * Do nothing if this isn't the main handler or the main
+	 * handler is not used in any request.
+	 *
+	 * The main handler can be identified by having a NULL ops pointer in
+	 * the request object.
+	 */
+	if (hdl->req_obj.ops || list_empty(&hdl->requests))
+		return;
+
+	/*
+	 * If the main handler is freed and it is used by handler objects in
+	 * outstanding requests, then unbind and put those objects before
+	 * freeing the main handler.
+	 */
+	list_for_each_entry_safe(req, next_req, &hdl->requests, requests) {
+		media_request_object_unbind(&req->req_obj);
+		media_request_object_put(&req->req_obj);
+	}
+}
+
+static int v4l2_ctrl_request_clone(struct v4l2_ctrl_handler *hdl,
+				   const struct v4l2_ctrl_handler *from)
+{
+	struct v4l2_ctrl_ref *ref;
+	int err = 0;
+
+	if (WARN_ON(!hdl || hdl == from))
+		return -EINVAL;
+
+	if (hdl->error)
+		return hdl->error;
+
+	WARN_ON(hdl->lock != &hdl->_lock);
+
+	mutex_lock(from->lock);
+	list_for_each_entry(ref, &from->ctrl_refs, node) {
+		struct v4l2_ctrl *ctrl = ref->ctrl;
+		struct v4l2_ctrl_ref *new_ref;
+
+		/* Skip refs inherited from other devices */
+		if (ref->from_other_dev)
+			continue;
+		err = handler_new_ref(hdl, ctrl, &new_ref, false, true);
+		if (err)
+			break;
+	}
+	mutex_unlock(from->lock);
+	return err;
+}
+
+static void v4l2_ctrl_request_queue(struct media_request_object *obj)
+{
+	struct v4l2_ctrl_handler *hdl =
+		container_of(obj, struct v4l2_ctrl_handler, req_obj);
+	struct v4l2_ctrl_handler *main_hdl = obj->priv;
+
+	mutex_lock(main_hdl->lock);
+	list_add_tail(&hdl->requests_queued, &main_hdl->requests_queued);
+	hdl->request_is_queued = true;
+	mutex_unlock(main_hdl->lock);
+}
+
+static void v4l2_ctrl_request_unbind(struct media_request_object *obj)
+{
+	struct v4l2_ctrl_handler *hdl =
+		container_of(obj, struct v4l2_ctrl_handler, req_obj);
+	struct v4l2_ctrl_handler *main_hdl = obj->priv;
+
+	mutex_lock(main_hdl->lock);
+	list_del_init(&hdl->requests);
+	if (hdl->request_is_queued) {
+		list_del_init(&hdl->requests_queued);
+		hdl->request_is_queued = false;
+	}
+	mutex_unlock(main_hdl->lock);
+}
+
+static void v4l2_ctrl_request_release(struct media_request_object *obj)
+{
+	struct v4l2_ctrl_handler *hdl =
+		container_of(obj, struct v4l2_ctrl_handler, req_obj);
+
+	v4l2_ctrl_handler_free(hdl);
+	kfree(hdl);
+}
+
+static const struct media_request_object_ops req_ops = {
+	.queue = v4l2_ctrl_request_queue,
+	.unbind = v4l2_ctrl_request_unbind,
+	.release = v4l2_ctrl_request_release,
+};
+
+struct v4l2_ctrl_handler *v4l2_ctrl_request_hdl_find(struct media_request *req,
+						     struct v4l2_ctrl_handler *parent)
+{
+	struct media_request_object *obj;
+
+	if (WARN_ON(req->state != MEDIA_REQUEST_STATE_VALIDATING &&
+		    req->state != MEDIA_REQUEST_STATE_QUEUED))
+		return NULL;
+
+	obj = media_request_object_find(req, &req_ops, parent);
+	if (obj)
+		return container_of(obj, struct v4l2_ctrl_handler, req_obj);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(v4l2_ctrl_request_hdl_find);
+
+struct v4l2_ctrl *
+v4l2_ctrl_request_hdl_ctrl_find(struct v4l2_ctrl_handler *hdl, u32 id)
+{
+	struct v4l2_ctrl_ref *ref = find_ref_lock(hdl, id);
+
+	return (ref && ref->valid_p_req) ? ref->ctrl : NULL;
+}
+EXPORT_SYMBOL_GPL(v4l2_ctrl_request_hdl_ctrl_find);
+
+static int v4l2_ctrl_request_bind(struct media_request *req,
+				  struct v4l2_ctrl_handler *hdl,
+				  struct v4l2_ctrl_handler *from)
+{
+	int ret;
+
+	ret = v4l2_ctrl_request_clone(hdl, from);
+
+	if (!ret) {
+		ret = media_request_object_bind(req, &req_ops,
+						from, false, &hdl->req_obj);
+		if (!ret) {
+			mutex_lock(from->lock);
+			list_add_tail(&hdl->requests, &from->requests);
+			mutex_unlock(from->lock);
+		}
+	}
+	return ret;
+}
+
+static struct media_request_object *
+v4l2_ctrls_find_req_obj(struct v4l2_ctrl_handler *hdl,
+			struct media_request *req, bool set)
+{
+	struct media_request_object *obj;
+	struct v4l2_ctrl_handler *new_hdl;
+	int ret;
+
+	if (IS_ERR(req))
+		return ERR_CAST(req);
+
+	if (set && WARN_ON(req->state != MEDIA_REQUEST_STATE_UPDATING))
+		return ERR_PTR(-EBUSY);
+
+	obj = media_request_object_find(req, &req_ops, hdl);
+	if (obj)
+		return obj;
+	/*
+	 * If there are no controls in this completed request,
+	 * then that can only happen if:
+	 *
+	 * 1) no controls were present in the queued request, and
+	 * 2) v4l2_ctrl_request_complete() could not allocate a
+	 *    control handler object to store the completed state in.
+	 *
+	 * So return ENOMEM to indicate that there was an out-of-memory
+	 * error.
+	 */
+	if (!set)
+		return ERR_PTR(-ENOMEM);
+
+	new_hdl = kzalloc(sizeof(*new_hdl), GFP_KERNEL);
+	if (!new_hdl)
+		return ERR_PTR(-ENOMEM);
+
+	obj = &new_hdl->req_obj;
+	ret = v4l2_ctrl_handler_init(new_hdl, (hdl->nr_of_buckets - 1) * 8);
+	if (!ret)
+		ret = v4l2_ctrl_request_bind(req, new_hdl, hdl);
+	if (ret) {
+		v4l2_ctrl_handler_free(new_hdl);
+		kfree(new_hdl);
+		return ERR_PTR(ret);
+	}
+
+	media_request_object_get(obj);
+	return obj;
+}
+
+int v4l2_g_ext_ctrls_request(struct v4l2_ctrl_handler *hdl, struct video_device *vdev,
+			     struct media_device *mdev, struct v4l2_ext_controls *cs)
+{
+	struct media_request_object *obj = NULL;
+	struct media_request *req = NULL;
+	int ret;
+
+	if (!mdev || cs->request_fd < 0)
+		return -EINVAL;
+
+	req = media_request_get_by_fd(mdev, cs->request_fd);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	if (req->state != MEDIA_REQUEST_STATE_COMPLETE) {
+		media_request_put(req);
+		return -EACCES;
+	}
+
+	ret = media_request_lock_for_access(req);
+	if (ret) {
+		media_request_put(req);
+		return ret;
+	}
+
+	obj = v4l2_ctrls_find_req_obj(hdl, req, false);
+	if (IS_ERR(obj)) {
+		media_request_unlock_for_access(req);
+		media_request_put(req);
+		return PTR_ERR(obj);
+	}
+
+	hdl = container_of(obj, struct v4l2_ctrl_handler,
+			   req_obj);
+	ret = v4l2_g_ext_ctrls_common(hdl, cs, vdev);
+
+	media_request_unlock_for_access(req);
+	media_request_object_put(obj);
+	media_request_put(req);
+	return ret;
+}
+
+int try_set_ext_ctrls_request(struct v4l2_fh *fh,
+			      struct v4l2_ctrl_handler *hdl,
+			      struct video_device *vdev,
+			      struct media_device *mdev,
+			      struct v4l2_ext_controls *cs, bool set)
+{
+	struct media_request_object *obj = NULL;
+	struct media_request *req = NULL;
+	int ret;
+
+	if (!mdev) {
+		dprintk(vdev, "%s: missing media device\n",
+			video_device_node_name(vdev));
+		return -EINVAL;
+	}
+
+	if (cs->request_fd < 0) {
+		dprintk(vdev, "%s: invalid request fd %d\n",
+			video_device_node_name(vdev), cs->request_fd);
+		return -EINVAL;
+	}
+
+	req = media_request_get_by_fd(mdev, cs->request_fd);
+	if (IS_ERR(req)) {
+		dprintk(vdev, "%s: cannot find request fd %d\n",
+			video_device_node_name(vdev), cs->request_fd);
+		return PTR_ERR(req);
+	}
+
+	ret = media_request_lock_for_update(req);
+	if (ret) {
+		dprintk(vdev, "%s: cannot lock request fd %d\n",
+			video_device_node_name(vdev), cs->request_fd);
+		media_request_put(req);
+		return ret;
+	}
+
+	obj = v4l2_ctrls_find_req_obj(hdl, req, set);
+	if (IS_ERR(obj)) {
+		dprintk(vdev,
+			"%s: cannot find request object for request fd %d\n",
+			video_device_node_name(vdev),
+			cs->request_fd);
+		media_request_unlock_for_update(req);
+		media_request_put(req);
+		return PTR_ERR(obj);
+	}
+
+	hdl = container_of(obj, struct v4l2_ctrl_handler,
+			   req_obj);
+	ret = try_set_ext_ctrls_common(fh, hdl, cs, vdev, set);
+	if (ret)
+		dprintk(vdev,
+			"%s: try_set_ext_ctrls_common failed (%d)\n",
+			video_device_node_name(vdev), ret);
+
+	media_request_unlock_for_update(req);
+	media_request_object_put(obj);
+	media_request_put(req);
+
+	return ret;
+}
+
+void v4l2_ctrl_request_complete(struct media_request *req,
+				struct v4l2_ctrl_handler *main_hdl)
+{
+	struct media_request_object *obj;
+	struct v4l2_ctrl_handler *hdl;
+	struct v4l2_ctrl_ref *ref;
+
+	if (!req || !main_hdl)
+		return;
+
+	/*
+	 * Note that it is valid if nothing was found. It means
+	 * that this request doesn't have any controls and so just
+	 * wants to leave the controls unchanged.
+	 */
+	obj = media_request_object_find(req, &req_ops, main_hdl);
+	if (!obj) {
+		int ret;
+
+		/* Create a new request so the driver can return controls */
+		hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
+		if (!hdl)
+			return;
+
+		ret = v4l2_ctrl_handler_init(hdl, (main_hdl->nr_of_buckets - 1) * 8);
+		if (!ret)
+			ret = v4l2_ctrl_request_bind(req, hdl, main_hdl);
+		if (ret) {
+			v4l2_ctrl_handler_free(hdl);
+			kfree(hdl);
+			return;
+		}
+		hdl->request_is_queued = true;
+		obj = media_request_object_find(req, &req_ops, main_hdl);
+	}
+	hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj);
+
+	list_for_each_entry(ref, &hdl->ctrl_refs, node) {
+		struct v4l2_ctrl *ctrl = ref->ctrl;
+		struct v4l2_ctrl *master = ctrl->cluster[0];
+		unsigned int i;
+
+		if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE) {
+			v4l2_ctrl_lock(master);
+			/* g_volatile_ctrl will update the current control values */
+			for (i = 0; i < master->ncontrols; i++)
+				cur_to_new(master->cluster[i]);
+			call_op(master, g_volatile_ctrl);
+			new_to_req(ref);
+			v4l2_ctrl_unlock(master);
+			continue;
+		}
+		if (ref->valid_p_req)
+			continue;
+
+		/* Copy the current control value into the request */
+		v4l2_ctrl_lock(ctrl);
+		cur_to_req(ref);
+		v4l2_ctrl_unlock(ctrl);
+	}
+
+	mutex_lock(main_hdl->lock);
+	WARN_ON(!hdl->request_is_queued);
+	list_del_init(&hdl->requests_queued);
+	hdl->request_is_queued = false;
+	mutex_unlock(main_hdl->lock);
+	media_request_object_complete(obj);
+	media_request_object_put(obj);
+}
+EXPORT_SYMBOL(v4l2_ctrl_request_complete);
+
+int v4l2_ctrl_request_setup(struct media_request *req,
+			    struct v4l2_ctrl_handler *main_hdl)
+{
+	struct media_request_object *obj;
+	struct v4l2_ctrl_handler *hdl;
+	struct v4l2_ctrl_ref *ref;
+	int ret = 0;
+
+	if (!req || !main_hdl)
+		return 0;
+
+	if (WARN_ON(req->state != MEDIA_REQUEST_STATE_QUEUED))
+		return -EBUSY;
+
+	/*
+	 * Note that it is valid if nothing was found. It means
+	 * that this request doesn't have any controls and so just
+	 * wants to leave the controls unchanged.
+	 */
+	obj = media_request_object_find(req, &req_ops, main_hdl);
+	if (!obj)
+		return 0;
+	if (obj->completed) {
+		media_request_object_put(obj);
+		return -EBUSY;
+	}
+	hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj);
+
+	list_for_each_entry(ref, &hdl->ctrl_refs, node)
+		ref->req_done = false;
+
+	list_for_each_entry(ref, &hdl->ctrl_refs, node) {
+		struct v4l2_ctrl *ctrl = ref->ctrl;
+		struct v4l2_ctrl *master = ctrl->cluster[0];
+		bool have_new_data = false;
+		int i;
+
+		/*
+		 * Skip if this control was already handled by a cluster.
+		 * Skip button controls and read-only controls.
+		 */
+		if (ref->req_done || (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY))
+			continue;
+
+		v4l2_ctrl_lock(master);
+		for (i = 0; i < master->ncontrols; i++) {
+			if (master->cluster[i]) {
+				struct v4l2_ctrl_ref *r =
+					find_ref(hdl, master->cluster[i]->id);
+
+				if (r->valid_p_req) {
+					have_new_data = true;
+					break;
+				}
+			}
+		}
+		if (!have_new_data) {
+			v4l2_ctrl_unlock(master);
+			continue;
+		}
+
+		for (i = 0; i < master->ncontrols; i++) {
+			if (master->cluster[i]) {
+				struct v4l2_ctrl_ref *r =
+					find_ref(hdl, master->cluster[i]->id);
+
+				req_to_new(r);
+				master->cluster[i]->is_new = 1;
+				r->req_done = true;
+			}
+		}
+		/*
+		 * For volatile autoclusters that are currently in auto mode
+		 * we need to discover if it will be set to manual mode.
+		 * If so, then we have to copy the current volatile values
+		 * first since those will become the new manual values (which
+		 * may be overwritten by explicit new values from this set
+		 * of controls).
+		 */
+		if (master->is_auto && master->has_volatiles &&
+		    !is_cur_manual(master)) {
+			s32 new_auto_val = *master->p_new.p_s32;
+
+			/*
+			 * If the new value == the manual value, then copy
+			 * the current volatile values.
+			 */
+			if (new_auto_val == master->manual_mode_value)
+				update_from_auto_cluster(master);
+		}
+
+		ret = try_or_set_cluster(NULL, master, true, 0);
+		v4l2_ctrl_unlock(master);
+
+		if (ret)
+			break;
+	}
+
+	media_request_object_put(obj);
+	return ret;
+}
+EXPORT_SYMBOL(v4l2_ctrl_request_setup);
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
deleted file mode 100644
index 09992e76bad64..0000000000000
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ /dev/null
@@ -1,5111 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
-    V4L2 controls framework implementation.
-
-    Copyright (C) 2010  Hans Verkuil <hverkuil@xs4all.nl>
-
- */
-
-#define pr_fmt(fmt) "v4l2-ctrls: " fmt
-
-#include <linux/ctype.h>
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <media/v4l2-ctrls.h>
-#include <media/v4l2-dev.h>
-#include <media/v4l2-device.h>
-#include <media/v4l2-event.h>
-#include <media/v4l2-fwnode.h>
-#include <media/v4l2-ioctl.h>
-
-#define dprintk(vdev, fmt, arg...) do {					\
-	if (!WARN_ON(!(vdev)) && ((vdev)->dev_debug & V4L2_DEV_DEBUG_CTRL)) \
-		printk(KERN_DEBUG pr_fmt("%s: %s: " fmt),		\
-		       __func__, video_device_node_name(vdev), ##arg);	\
-} while (0)
-
-#define has_op(master, op) \
-	(master->ops && master->ops->op)
-#define call_op(master, op) \
-	(has_op(master, op) ? master->ops->op(master) : 0)
-
-static const union v4l2_ctrl_ptr ptr_null;
-
-/* Internal temporary helper struct, one for each v4l2_ext_control */
-struct v4l2_ctrl_helper {
-	/* Pointer to the control reference of the master control */
-	struct v4l2_ctrl_ref *mref;
-	/* The control ref corresponding to the v4l2_ext_control ID field. */
-	struct v4l2_ctrl_ref *ref;
-	/* v4l2_ext_control index of the next control belonging to the
-	   same cluster, or 0 if there isn't any. */
-	u32 next;
-};
-
-/* Small helper function to determine if the autocluster is set to manual
-   mode. */
-static bool is_cur_manual(const struct v4l2_ctrl *master)
-{
-	return master->is_auto && master->cur.val == master->manual_mode_value;
-}
-
-/* Same as above, but this checks the against the new value instead of the
-   current value. */
-static bool is_new_manual(const struct v4l2_ctrl *master)
-{
-	return master->is_auto && master->val == master->manual_mode_value;
-}
-
-/* Default intra MPEG-2 quantisation coefficients, from the specification. */
-static const u8 mpeg2_intra_quant_matrix[64] = {
-	8,  16, 16, 19, 16, 19, 22, 22,
-	22, 22, 22, 22, 26, 24, 26, 27,
-	27, 27, 26, 26, 26, 26, 27, 27,
-	27, 29, 29, 29, 34, 34, 34, 29,
-	29, 29, 27, 27, 29, 29, 32, 32,
-	34, 34, 37, 38, 37, 35, 35, 34,
-	35, 38, 38, 40, 40, 40, 48, 48,
-	46, 46, 56, 56, 58, 69, 69, 83
-};
-
-/* Returns NULL or a character pointer array containing the menu for
-   the given control ID. The pointer array ends with a NULL pointer.
-   An empty string signifies a menu entry that is invalid. This allows
-   drivers to disable certain options if it is not supported. */
-const char * const *v4l2_ctrl_get_menu(u32 id)
-{
-	static const char * const mpeg_audio_sampling_freq[] = {
-		"44.1 kHz",
-		"48 kHz",
-		"32 kHz",
-		NULL
-	};
-	static const char * const mpeg_audio_encoding[] = {
-		"MPEG-1/2 Layer I",
-		"MPEG-1/2 Layer II",
-		"MPEG-1/2 Layer III",
-		"MPEG-2/4 AAC",
-		"AC-3",
-		NULL
-	};
-	static const char * const mpeg_audio_l1_bitrate[] = {
-		"32 kbps",
-		"64 kbps",
-		"96 kbps",
-		"128 kbps",
-		"160 kbps",
-		"192 kbps",
-		"224 kbps",
-		"256 kbps",
-		"288 kbps",
-		"320 kbps",
-		"352 kbps",
-		"384 kbps",
-		"416 kbps",
-		"448 kbps",
-		NULL
-	};
-	static const char * const mpeg_audio_l2_bitrate[] = {
-		"32 kbps",
-		"48 kbps",
-		"56 kbps",
-		"64 kbps",
-		"80 kbps",
-		"96 kbps",
-		"112 kbps",
-		"128 kbps",
-		"160 kbps",
-		"192 kbps",
-		"224 kbps",
-		"256 kbps",
-		"320 kbps",
-		"384 kbps",
-		NULL
-	};
-	static const char * const mpeg_audio_l3_bitrate[] = {
-		"32 kbps",
-		"40 kbps",
-		"48 kbps",
-		"56 kbps",
-		"64 kbps",
-		"80 kbps",
-		"96 kbps",
-		"112 kbps",
-		"128 kbps",
-		"160 kbps",
-		"192 kbps",
-		"224 kbps",
-		"256 kbps",
-		"320 kbps",
-		NULL
-	};
-	static const char * const mpeg_audio_ac3_bitrate[] = {
-		"32 kbps",
-		"40 kbps",
-		"48 kbps",
-		"56 kbps",
-		"64 kbps",
-		"80 kbps",
-		"96 kbps",
-		"112 kbps",
-		"128 kbps",
-		"160 kbps",
-		"192 kbps",
-		"224 kbps",
-		"256 kbps",
-		"320 kbps",
-		"384 kbps",
-		"448 kbps",
-		"512 kbps",
-		"576 kbps",
-		"640 kbps",
-		NULL
-	};
-	static const char * const mpeg_audio_mode[] = {
-		"Stereo",
-		"Joint Stereo",
-		"Dual",
-		"Mono",
-		NULL
-	};
-	static const char * const mpeg_audio_mode_extension[] = {
-		"Bound 4",
-		"Bound 8",
-		"Bound 12",
-		"Bound 16",
-		NULL
-	};
-	static const char * const mpeg_audio_emphasis[] = {
-		"No Emphasis",
-		"50/15 us",
-		"CCITT J17",
-		NULL
-	};
-	static const char * const mpeg_audio_crc[] = {
-		"No CRC",
-		"16-bit CRC",
-		NULL
-	};
-	static const char * const mpeg_audio_dec_playback[] = {
-		"Auto",
-		"Stereo",
-		"Left",
-		"Right",
-		"Mono",
-		"Swapped Stereo",
-		NULL
-	};
-	static const char * const mpeg_video_encoding[] = {
-		"MPEG-1",
-		"MPEG-2",
-		"MPEG-4 AVC",
-		NULL
-	};
-	static const char * const mpeg_video_aspect[] = {
-		"1x1",
-		"4x3",
-		"16x9",
-		"2.21x1",
-		NULL
-	};
-	static const char * const mpeg_video_bitrate_mode[] = {
-		"Variable Bitrate",
-		"Constant Bitrate",
-		"Constant Quality",
-		NULL
-	};
-	static const char * const mpeg_stream_type[] = {
-		"MPEG-2 Program Stream",
-		"MPEG-2 Transport Stream",
-		"MPEG-1 System Stream",
-		"MPEG-2 DVD-compatible Stream",
-		"MPEG-1 VCD-compatible Stream",
-		"MPEG-2 SVCD-compatible Stream",
-		NULL
-	};
-	static const char * const mpeg_stream_vbi_fmt[] = {
-		"No VBI",
-		"Private Packet, IVTV Format",
-		NULL
-	};
-	static const char * const camera_power_line_frequency[] = {
-		"Disabled",
-		"50 Hz",
-		"60 Hz",
-		"Auto",
-		NULL
-	};
-	static const char * const camera_exposure_auto[] = {
-		"Auto Mode",
-		"Manual Mode",
-		"Shutter Priority Mode",
-		"Aperture Priority Mode",
-		NULL
-	};
-	static const char * const camera_exposure_metering[] = {
-		"Average",
-		"Center Weighted",
-		"Spot",
-		"Matrix",
-		NULL
-	};
-	static const char * const camera_auto_focus_range[] = {
-		"Auto",
-		"Normal",
-		"Macro",
-		"Infinity",
-		NULL
-	};
-	static const char * const colorfx[] = {
-		"None",
-		"Black & White",
-		"Sepia",
-		"Negative",
-		"Emboss",
-		"Sketch",
-		"Sky Blue",
-		"Grass Green",
-		"Skin Whiten",
-		"Vivid",
-		"Aqua",
-		"Art Freeze",
-		"Silhouette",
-		"Solarization",
-		"Antique",
-		"Set Cb/Cr",
-		NULL
-	};
-	static const char * const auto_n_preset_white_balance[] = {
-		"Manual",
-		"Auto",
-		"Incandescent",
-		"Fluorescent",
-		"Fluorescent H",
-		"Horizon",
-		"Daylight",
-		"Flash",
-		"Cloudy",
-		"Shade",
-		NULL,
-	};
-	static const char * const camera_iso_sensitivity_auto[] = {
-		"Manual",
-		"Auto",
-		NULL
-	};
-	static const char * const scene_mode[] = {
-		"None",
-		"Backlight",
-		"Beach/Snow",
-		"Candle Light",
-		"Dusk/Dawn",
-		"Fall Colors",
-		"Fireworks",
-		"Landscape",
-		"Night",
-		"Party/Indoor",
-		"Portrait",
-		"Sports",
-		"Sunset",
-		"Text",
-		NULL
-	};
-	static const char * const tune_emphasis[] = {
-		"None",
-		"50 Microseconds",
-		"75 Microseconds",
-		NULL,
-	};
-	static const char * const header_mode[] = {
-		"Separate Buffer",
-		"Joined With 1st Frame",
-		NULL,
-	};
-	static const char * const multi_slice[] = {
-		"Single",
-		"Max Macroblocks",
-		"Max Bytes",
-		NULL,
-	};
-	static const char * const entropy_mode[] = {
-		"CAVLC",
-		"CABAC",
-		NULL,
-	};
-	static const char * const mpeg_h264_level[] = {
-		"1",
-		"1b",
-		"1.1",
-		"1.2",
-		"1.3",
-		"2",
-		"2.1",
-		"2.2",
-		"3",
-		"3.1",
-		"3.2",
-		"4",
-		"4.1",
-		"4.2",
-		"5",
-		"5.1",
-		"5.2",
-		"6.0",
-		"6.1",
-		"6.2",
-		NULL,
-	};
-	static const char * const h264_loop_filter[] = {
-		"Enabled",
-		"Disabled",
-		"Disabled at Slice Boundary",
-		NULL,
-	};
-	static const char * const h264_profile[] = {
-		"Baseline",
-		"Constrained Baseline",
-		"Main",
-		"Extended",
-		"High",
-		"High 10",
-		"High 422",
-		"High 444 Predictive",
-		"High 10 Intra",
-		"High 422 Intra",
-		"High 444 Intra",
-		"CAVLC 444 Intra",
-		"Scalable Baseline",
-		"Scalable High",
-		"Scalable High Intra",
-		"Stereo High",
-		"Multiview High",
-		"Constrained High",
-		NULL,
-	};
-	static const char * const vui_sar_idc[] = {
-		"Unspecified",
-		"1:1",
-		"12:11",
-		"10:11",
-		"16:11",
-		"40:33",
-		"24:11",
-		"20:11",
-		"32:11",
-		"80:33",
-		"18:11",
-		"15:11",
-		"64:33",
-		"160:99",
-		"4:3",
-		"3:2",
-		"2:1",
-		"Extended SAR",
-		NULL,
-	};
-	static const char * const h264_fp_arrangement_type[] = {
-		"Checkerboard",
-		"Column",
-		"Row",
-		"Side by Side",
-		"Top Bottom",
-		"Temporal",
-		NULL,
-	};
-	static const char * const h264_fmo_map_type[] = {
-		"Interleaved Slices",
-		"Scattered Slices",
-		"Foreground with Leftover",
-		"Box Out",
-		"Raster Scan",
-		"Wipe Scan",
-		"Explicit",
-		NULL,
-	};
-	static const char * const h264_decode_mode[] = {
-		"Slice-Based",
-		"Frame-Based",
-		NULL,
-	};
-	static const char * const h264_start_code[] = {
-		"No Start Code",
-		"Annex B Start Code",
-		NULL,
-	};
-	static const char * const h264_hierarchical_coding_type[] = {
-		"Hier Coding B",
-		"Hier Coding P",
-		NULL,
-	};
-	static const char * const mpeg_mpeg2_level[] = {
-		"Low",
-		"Main",
-		"High 1440",
-		"High",
-		NULL,
-	};
-	static const char * const mpeg2_profile[] = {
-		"Simple",
-		"Main",
-		"SNR Scalable",
-		"Spatially Scalable",
-		"High",
-		NULL,
-	};
-	static const char * const mpeg_mpeg4_level[] = {
-		"0",
-		"0b",
-		"1",
-		"2",
-		"3",
-		"3b",
-		"4",
-		"5",
-		NULL,
-	};
-	static const char * const mpeg4_profile[] = {
-		"Simple",
-		"Advanced Simple",
-		"Core",
-		"Simple Scalable",
-		"Advanced Coding Efficiency",
-		NULL,
-	};
-
-	static const char * const vpx_golden_frame_sel[] = {
-		"Use Previous Frame",
-		"Use Previous Specific Frame",
-		NULL,
-	};
-	static const char * const vp8_profile[] = {
-		"0",
-		"1",
-		"2",
-		"3",
-		NULL,
-	};
-	static const char * const vp9_profile[] = {
-		"0",
-		"1",
-		"2",
-		"3",
-		NULL,
-	};
-	static const char * const vp9_level[] = {
-		"1",
-		"1.1",
-		"2",
-		"2.1",
-		"3",
-		"3.1",
-		"4",
-		"4.1",
-		"5",
-		"5.1",
-		"5.2",
-		"6",
-		"6.1",
-		"6.2",
-		NULL,
-	};
-
-	static const char * const flash_led_mode[] = {
-		"Off",
-		"Flash",
-		"Torch",
-		NULL,
-	};
-	static const char * const flash_strobe_source[] = {
-		"Software",
-		"External",
-		NULL,
-	};
-
-	static const char * const jpeg_chroma_subsampling[] = {
-		"4:4:4",
-		"4:2:2",
-		"4:2:0",
-		"4:1:1",
-		"4:1:0",
-		"Gray",
-		NULL,
-	};
-	static const char * const dv_tx_mode[] = {
-		"DVI-D",
-		"HDMI",
-		NULL,
-	};
-	static const char * const dv_rgb_range[] = {
-		"Automatic",
-		"RGB Limited Range (16-235)",
-		"RGB Full Range (0-255)",
-		NULL,
-	};
-	static const char * const dv_it_content_type[] = {
-		"Graphics",
-		"Photo",
-		"Cinema",
-		"Game",
-		"No IT Content",
-		NULL,
-	};
-	static const char * const detect_md_mode[] = {
-		"Disabled",
-		"Global",
-		"Threshold Grid",
-		"Region Grid",
-		NULL,
-	};
-
-	static const char * const hevc_profile[] = {
-		"Main",
-		"Main Still Picture",
-		"Main 10",
-		NULL,
-	};
-	static const char * const hevc_level[] = {
-		"1",
-		"2",
-		"2.1",
-		"3",
-		"3.1",
-		"4",
-		"4.1",
-		"5",
-		"5.1",
-		"5.2",
-		"6",
-		"6.1",
-		"6.2",
-		NULL,
-	};
-	static const char * const hevc_hierarchial_coding_type[] = {
-		"B",
-		"P",
-		NULL,
-	};
-	static const char * const hevc_refresh_type[] = {
-		"None",
-		"CRA",
-		"IDR",
-		NULL,
-	};
-	static const char * const hevc_size_of_length_field[] = {
-		"0",
-		"1",
-		"2",
-		"4",
-		NULL,
-	};
-	static const char * const hevc_tier[] = {
-		"Main",
-		"High",
-		NULL,
-	};
-	static const char * const hevc_loop_filter_mode[] = {
-		"Disabled",
-		"Enabled",
-		"Disabled at slice boundary",
-		"NULL",
-	};
-	static const char * const hevc_decode_mode[] = {
-		"Slice-Based",
-		"Frame-Based",
-		NULL,
-	};
-	static const char * const hevc_start_code[] = {
-		"No Start Code",
-		"Annex B Start Code",
-		NULL,
-	};
-	static const char * const camera_orientation[] = {
-		"Front",
-		"Back",
-		"External",
-		NULL,
-	};
-	static const char * const mpeg_video_frame_skip[] = {
-		"Disabled",
-		"Level Limit",
-		"VBV/CPB Limit",
-		NULL,
-	};
-
-	switch (id) {
-	case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
-		return mpeg_audio_sampling_freq;
-	case V4L2_CID_MPEG_AUDIO_ENCODING:
-		return mpeg_audio_encoding;
-	case V4L2_CID_MPEG_AUDIO_L1_BITRATE:
-		return mpeg_audio_l1_bitrate;
-	case V4L2_CID_MPEG_AUDIO_L2_BITRATE:
-		return mpeg_audio_l2_bitrate;
-	case V4L2_CID_MPEG_AUDIO_L3_BITRATE:
-		return mpeg_audio_l3_bitrate;
-	case V4L2_CID_MPEG_AUDIO_AC3_BITRATE:
-		return mpeg_audio_ac3_bitrate;
-	case V4L2_CID_MPEG_AUDIO_MODE:
-		return mpeg_audio_mode;
-	case V4L2_CID_MPEG_AUDIO_MODE_EXTENSION:
-		return mpeg_audio_mode_extension;
-	case V4L2_CID_MPEG_AUDIO_EMPHASIS:
-		return mpeg_audio_emphasis;
-	case V4L2_CID_MPEG_AUDIO_CRC:
-		return mpeg_audio_crc;
-	case V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK:
-	case V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK:
-		return mpeg_audio_dec_playback;
-	case V4L2_CID_MPEG_VIDEO_ENCODING:
-		return mpeg_video_encoding;
-	case V4L2_CID_MPEG_VIDEO_ASPECT:
-		return mpeg_video_aspect;
-	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:
-		return mpeg_video_bitrate_mode;
-	case V4L2_CID_MPEG_STREAM_TYPE:
-		return mpeg_stream_type;
-	case V4L2_CID_MPEG_STREAM_VBI_FMT:
-		return mpeg_stream_vbi_fmt;
-	case V4L2_CID_POWER_LINE_FREQUENCY:
-		return camera_power_line_frequency;
-	case V4L2_CID_EXPOSURE_AUTO:
-		return camera_exposure_auto;
-	case V4L2_CID_EXPOSURE_METERING:
-		return camera_exposure_metering;
-	case V4L2_CID_AUTO_FOCUS_RANGE:
-		return camera_auto_focus_range;
-	case V4L2_CID_COLORFX:
-		return colorfx;
-	case V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE:
-		return auto_n_preset_white_balance;
-	case V4L2_CID_ISO_SENSITIVITY_AUTO:
-		return camera_iso_sensitivity_auto;
-	case V4L2_CID_SCENE_MODE:
-		return scene_mode;
-	case V4L2_CID_TUNE_PREEMPHASIS:
-		return tune_emphasis;
-	case V4L2_CID_TUNE_DEEMPHASIS:
-		return tune_emphasis;
-	case V4L2_CID_FLASH_LED_MODE:
-		return flash_led_mode;
-	case V4L2_CID_FLASH_STROBE_SOURCE:
-		return flash_strobe_source;
-	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:
-		return header_mode;
-	case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE:
-		return mpeg_video_frame_skip;
-	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE:
-		return multi_slice;
-	case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE:
-		return entropy_mode;
-	case V4L2_CID_MPEG_VIDEO_H264_LEVEL:
-		return mpeg_h264_level;
-	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE:
-		return h264_loop_filter;
-	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
-		return h264_profile;
-	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC:
-		return vui_sar_idc;
-	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE:
-		return h264_fp_arrangement_type;
-	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:
-		return h264_fmo_map_type;
-	case V4L2_CID_STATELESS_H264_DECODE_MODE:
-		return h264_decode_mode;
-	case V4L2_CID_STATELESS_H264_START_CODE:
-		return h264_start_code;
-	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE:
-		return h264_hierarchical_coding_type;
-	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
-		return mpeg_mpeg2_level;
-	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:
-		return mpeg2_profile;
-	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:
-		return mpeg_mpeg4_level;
-	case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE:
-		return mpeg4_profile;
-	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:
-		return vpx_golden_frame_sel;
-	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
-		return vp8_profile;
-	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:
-		return vp9_profile;
-	case V4L2_CID_MPEG_VIDEO_VP9_LEVEL:
-		return vp9_level;
-	case V4L2_CID_JPEG_CHROMA_SUBSAMPLING:
-		return jpeg_chroma_subsampling;
-	case V4L2_CID_DV_TX_MODE:
-		return dv_tx_mode;
-	case V4L2_CID_DV_TX_RGB_RANGE:
-	case V4L2_CID_DV_RX_RGB_RANGE:
-		return dv_rgb_range;
-	case V4L2_CID_DV_TX_IT_CONTENT_TYPE:
-	case V4L2_CID_DV_RX_IT_CONTENT_TYPE:
-		return dv_it_content_type;
-	case V4L2_CID_DETECT_MD_MODE:
-		return detect_md_mode;
-	case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:
-		return hevc_profile;
-	case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:
-		return hevc_level;
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE:
-		return hevc_hierarchial_coding_type;
-	case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE:
-		return hevc_refresh_type;
-	case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD:
-		return hevc_size_of_length_field;
-	case V4L2_CID_MPEG_VIDEO_HEVC_TIER:
-		return hevc_tier;
-	case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE:
-		return hevc_loop_filter_mode;
-	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:
-		return hevc_decode_mode;
-	case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:
-		return hevc_start_code;
-	case V4L2_CID_CAMERA_ORIENTATION:
-		return camera_orientation;
-	default:
-		return NULL;
-	}
-}
-EXPORT_SYMBOL(v4l2_ctrl_get_menu);
-
-#define __v4l2_qmenu_int_len(arr, len) ({ *(len) = ARRAY_SIZE(arr); arr; })
-/*
- * Returns NULL or an s64 type array containing the menu for given
- * control ID. The total number of the menu items is returned in @len.
- */
-const s64 *v4l2_ctrl_get_int_menu(u32 id, u32 *len)
-{
-	static const s64 qmenu_int_vpx_num_partitions[] = {
-		1, 2, 4, 8,
-	};
-
-	static const s64 qmenu_int_vpx_num_ref_frames[] = {
-		1, 2, 3,
-	};
-
-	switch (id) {
-	case V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS:
-		return __v4l2_qmenu_int_len(qmenu_int_vpx_num_partitions, len);
-	case V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES:
-		return __v4l2_qmenu_int_len(qmenu_int_vpx_num_ref_frames, len);
-	default:
-		*len = 0;
-		return NULL;
-	}
-}
-EXPORT_SYMBOL(v4l2_ctrl_get_int_menu);
-
-/* Return the control name. */
-const char *v4l2_ctrl_get_name(u32 id)
-{
-	switch (id) {
-	/* USER controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_USER_CLASS:		return "User Controls";
-	case V4L2_CID_BRIGHTNESS:		return "Brightness";
-	case V4L2_CID_CONTRAST:			return "Contrast";
-	case V4L2_CID_SATURATION:		return "Saturation";
-	case V4L2_CID_HUE:			return "Hue";
-	case V4L2_CID_AUDIO_VOLUME:		return "Volume";
-	case V4L2_CID_AUDIO_BALANCE:		return "Balance";
-	case V4L2_CID_AUDIO_BASS:		return "Bass";
-	case V4L2_CID_AUDIO_TREBLE:		return "Treble";
-	case V4L2_CID_AUDIO_MUTE:		return "Mute";
-	case V4L2_CID_AUDIO_LOUDNESS:		return "Loudness";
-	case V4L2_CID_BLACK_LEVEL:		return "Black Level";
-	case V4L2_CID_AUTO_WHITE_BALANCE:	return "White Balance, Automatic";
-	case V4L2_CID_DO_WHITE_BALANCE:		return "Do White Balance";
-	case V4L2_CID_RED_BALANCE:		return "Red Balance";
-	case V4L2_CID_BLUE_BALANCE:		return "Blue Balance";
-	case V4L2_CID_GAMMA:			return "Gamma";
-	case V4L2_CID_EXPOSURE:			return "Exposure";
-	case V4L2_CID_AUTOGAIN:			return "Gain, Automatic";
-	case V4L2_CID_GAIN:			return "Gain";
-	case V4L2_CID_HFLIP:			return "Horizontal Flip";
-	case V4L2_CID_VFLIP:			return "Vertical Flip";
-	case V4L2_CID_POWER_LINE_FREQUENCY:	return "Power Line Frequency";
-	case V4L2_CID_HUE_AUTO:			return "Hue, Automatic";
-	case V4L2_CID_WHITE_BALANCE_TEMPERATURE: return "White Balance Temperature";
-	case V4L2_CID_SHARPNESS:		return "Sharpness";
-	case V4L2_CID_BACKLIGHT_COMPENSATION:	return "Backlight Compensation";
-	case V4L2_CID_CHROMA_AGC:		return "Chroma AGC";
-	case V4L2_CID_COLOR_KILLER:		return "Color Killer";
-	case V4L2_CID_COLORFX:			return "Color Effects";
-	case V4L2_CID_AUTOBRIGHTNESS:		return "Brightness, Automatic";
-	case V4L2_CID_BAND_STOP_FILTER:		return "Band-Stop Filter";
-	case V4L2_CID_ROTATE:			return "Rotate";
-	case V4L2_CID_BG_COLOR:			return "Background Color";
-	case V4L2_CID_CHROMA_GAIN:		return "Chroma Gain";
-	case V4L2_CID_ILLUMINATORS_1:		return "Illuminator 1";
-	case V4L2_CID_ILLUMINATORS_2:		return "Illuminator 2";
-	case V4L2_CID_MIN_BUFFERS_FOR_CAPTURE:	return "Min Number of Capture Buffers";
-	case V4L2_CID_MIN_BUFFERS_FOR_OUTPUT:	return "Min Number of Output Buffers";
-	case V4L2_CID_ALPHA_COMPONENT:		return "Alpha Component";
-	case V4L2_CID_COLORFX_CBCR:		return "Color Effects, CbCr";
-
-	/* Codec controls */
-	/* The MPEG controls are applicable to all codec controls
-	 * and the 'MPEG' part of the define is historical */
-	/* Keep the order of the 'case's the same as in videodev2.h! */
-	case V4L2_CID_CODEC_CLASS:		return "Codec Controls";
-	case V4L2_CID_MPEG_STREAM_TYPE:		return "Stream Type";
-	case V4L2_CID_MPEG_STREAM_PID_PMT:	return "Stream PMT Program ID";
-	case V4L2_CID_MPEG_STREAM_PID_AUDIO:	return "Stream Audio Program ID";
-	case V4L2_CID_MPEG_STREAM_PID_VIDEO:	return "Stream Video Program ID";
-	case V4L2_CID_MPEG_STREAM_PID_PCR:	return "Stream PCR Program ID";
-	case V4L2_CID_MPEG_STREAM_PES_ID_AUDIO: return "Stream PES Audio ID";
-	case V4L2_CID_MPEG_STREAM_PES_ID_VIDEO: return "Stream PES Video ID";
-	case V4L2_CID_MPEG_STREAM_VBI_FMT:	return "Stream VBI Format";
-	case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ: return "Audio Sampling Frequency";
-	case V4L2_CID_MPEG_AUDIO_ENCODING:	return "Audio Encoding";
-	case V4L2_CID_MPEG_AUDIO_L1_BITRATE:	return "Audio Layer I Bitrate";
-	case V4L2_CID_MPEG_AUDIO_L2_BITRATE:	return "Audio Layer II Bitrate";
-	case V4L2_CID_MPEG_AUDIO_L3_BITRATE:	return "Audio Layer III Bitrate";
-	case V4L2_CID_MPEG_AUDIO_MODE:		return "Audio Stereo Mode";
-	case V4L2_CID_MPEG_AUDIO_MODE_EXTENSION: return "Audio Stereo Mode Extension";
-	case V4L2_CID_MPEG_AUDIO_EMPHASIS:	return "Audio Emphasis";
-	case V4L2_CID_MPEG_AUDIO_CRC:		return "Audio CRC";
-	case V4L2_CID_MPEG_AUDIO_MUTE:		return "Audio Mute";
-	case V4L2_CID_MPEG_AUDIO_AAC_BITRATE:	return "Audio AAC Bitrate";
-	case V4L2_CID_MPEG_AUDIO_AC3_BITRATE:	return "Audio AC-3 Bitrate";
-	case V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK:	return "Audio Playback";
-	case V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK: return "Audio Multilingual Playback";
-	case V4L2_CID_MPEG_VIDEO_ENCODING:	return "Video Encoding";
-	case V4L2_CID_MPEG_VIDEO_ASPECT:	return "Video Aspect";
-	case V4L2_CID_MPEG_VIDEO_B_FRAMES:	return "Video B Frames";
-	case V4L2_CID_MPEG_VIDEO_GOP_SIZE:	return "Video GOP Size";
-	case V4L2_CID_MPEG_VIDEO_GOP_CLOSURE:	return "Video GOP Closure";
-	case V4L2_CID_MPEG_VIDEO_PULLDOWN:	return "Video Pulldown";
-	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:	return "Video Bitrate Mode";
-	case V4L2_CID_MPEG_VIDEO_CONSTANT_QUALITY:	return "Constant Quality";
-	case V4L2_CID_MPEG_VIDEO_BITRATE:	return "Video Bitrate";
-	case V4L2_CID_MPEG_VIDEO_BITRATE_PEAK:	return "Video Peak Bitrate";
-	case V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION: return "Video Temporal Decimation";
-	case V4L2_CID_MPEG_VIDEO_MUTE:		return "Video Mute";
-	case V4L2_CID_MPEG_VIDEO_MUTE_YUV:	return "Video Mute YUV";
-	case V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE:	return "Decoder Slice Interface";
-	case V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER:	return "MPEG4 Loop Filter Enable";
-	case V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB:	return "Number of Intra Refresh MBs";
-	case V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE:		return "Frame Level Rate Control Enable";
-	case V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE:			return "H264 MB Level Rate Control";
-	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:			return "Sequence Header Mode";
-	case V4L2_CID_MPEG_VIDEO_MAX_REF_PIC:			return "Max Number of Reference Pics";
-	case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE:		return "Frame Skip Mode";
-	case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY:		return "Display Delay";
-	case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY_ENABLE:	return "Display Delay Enable";
-	case V4L2_CID_MPEG_VIDEO_AU_DELIMITER:			return "Generate Access Unit Delimiters";
-	case V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP:		return "H263 I-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP:		return "H263 P-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP:		return "H263 B-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_H263_MIN_QP:			return "H263 Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H263_MAX_QP:			return "H263 Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP:		return "H264 I-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_QP:		return "H264 P-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_B_FRAME_QP:		return "H264 B-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_MAX_QP:			return "H264 Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_MIN_QP:			return "H264 Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM:		return "H264 8x8 Transform Enable";
-	case V4L2_CID_MPEG_VIDEO_H264_CPB_SIZE:			return "H264 CPB Buffer Size";
-	case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE:		return "H264 Entropy Mode";
-	case V4L2_CID_MPEG_VIDEO_H264_I_PERIOD:			return "H264 I-Frame Period";
-	case V4L2_CID_MPEG_VIDEO_H264_LEVEL:			return "H264 Level";
-	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA:	return "H264 Loop Filter Alpha Offset";
-	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA:		return "H264 Loop Filter Beta Offset";
-	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE:		return "H264 Loop Filter Mode";
-	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:			return "H264 Profile";
-	case V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_HEIGHT:	return "Vertical Size of SAR";
-	case V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH:	return "Horizontal Size of SAR";
-	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE:		return "Aspect Ratio VUI Enable";
-	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC:		return "VUI Aspect Ratio IDC";
-	case V4L2_CID_MPEG_VIDEO_H264_SEI_FRAME_PACKING:	return "H264 Enable Frame Packing SEI";
-	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_CURRENT_FRAME_0:	return "H264 Set Curr. Frame as Frame0";
-	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE:	return "H264 FP Arrangement Type";
-	case V4L2_CID_MPEG_VIDEO_H264_FMO:			return "H264 Flexible MB Ordering";
-	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:		return "H264 Map Type for FMO";
-	case V4L2_CID_MPEG_VIDEO_H264_FMO_SLICE_GROUP:		return "H264 FMO Number of Slice Groups";
-	case V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_DIRECTION:	return "H264 FMO Direction of Change";
-	case V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_RATE:		return "H264 FMO Size of 1st Slice Grp";
-	case V4L2_CID_MPEG_VIDEO_H264_FMO_RUN_LENGTH:		return "H264 FMO No. of Consecutive MBs";
-	case V4L2_CID_MPEG_VIDEO_H264_ASO:			return "H264 Arbitrary Slice Ordering";
-	case V4L2_CID_MPEG_VIDEO_H264_ASO_SLICE_ORDER:		return "H264 ASO Slice Order";
-	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING:	return "Enable H264 Hierarchical Coding";
-	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE:	return "H264 Hierarchical Coding Type";
-	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER:return "H264 Number of HC Layers";
-	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP:
-								return "H264 Set QP Value for HC Layers";
-	case V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION:
-								return "H264 Constrained Intra Pred";
-	case V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET:	return "H264 Chroma QP Index Offset";
-	case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MIN_QP:		return "H264 I-Frame Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_I_FRAME_MAX_QP:		return "H264 I-Frame Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MIN_QP:		return "H264 P-Frame Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_P_FRAME_MAX_QP:		return "H264 P-Frame Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_B_FRAME_MIN_QP:		return "H264 B-Frame Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_B_FRAME_MAX_QP:		return "H264 B-Frame Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L0_BR:	return "H264 Hierarchical Lay 0 Bitrate";
-	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L1_BR:	return "H264 Hierarchical Lay 1 Bitrate";
-	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L2_BR:	return "H264 Hierarchical Lay 2 Bitrate";
-	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L3_BR:	return "H264 Hierarchical Lay 3 Bitrate";
-	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L4_BR:	return "H264 Hierarchical Lay 4 Bitrate";
-	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L5_BR:	return "H264 Hierarchical Lay 5 Bitrate";
-	case V4L2_CID_MPEG_VIDEO_H264_HIER_CODING_L6_BR:	return "H264 Hierarchical Lay 6 Bitrate";
-	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:			return "MPEG2 Level";
-	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:			return "MPEG2 Profile";
-	case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP:		return "MPEG4 I-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP:		return "MPEG4 P-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP:		return "MPEG4 B-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_MPEG4_MIN_QP:			return "MPEG4 Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_MPEG4_MAX_QP:			return "MPEG4 Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:			return "MPEG4 Level";
-	case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE:			return "MPEG4 Profile";
-	case V4L2_CID_MPEG_VIDEO_MPEG4_QPEL:			return "Quarter Pixel Search Enable";
-	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_BYTES:		return "Maximum Bytes in a Slice";
-	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_MB:		return "Number of MBs in a Slice";
-	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE:		return "Slice Partitioning Method";
-	case V4L2_CID_MPEG_VIDEO_VBV_SIZE:			return "VBV Buffer Size";
-	case V4L2_CID_MPEG_VIDEO_DEC_PTS:			return "Video Decoder PTS";
-	case V4L2_CID_MPEG_VIDEO_DEC_FRAME:			return "Video Decoder Frame Count";
-	case V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR:		return "Video Decoder Conceal Color";
-	case V4L2_CID_MPEG_VIDEO_VBV_DELAY:			return "Initial Delay for VBV Control";
-	case V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE:		return "Horizontal MV Search Range";
-	case V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE:		return "Vertical MV Search Range";
-	case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER:		return "Repeat Sequence Header";
-	case V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME:		return "Force Key Frame";
-	case V4L2_CID_MPEG_VIDEO_BASELAYER_PRIORITY_ID:		return "Base Layer Priority ID";
-	case V4L2_CID_MPEG_VIDEO_LTR_COUNT:			return "LTR Count";
-	case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:		return "Frame LTR Index";
-	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:		return "Use LTR Frames";
-	case V4L2_CID_FWHT_I_FRAME_QP:				return "FWHT I-Frame QP Value";
-	case V4L2_CID_FWHT_P_FRAME_QP:				return "FWHT P-Frame QP Value";
-
-	/* VPX controls */
-	case V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS:		return "VPX Number of Partitions";
-	case V4L2_CID_MPEG_VIDEO_VPX_IMD_DISABLE_4X4:		return "VPX Intra Mode Decision Disable";
-	case V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES:		return "VPX No. of Refs for P Frame";
-	case V4L2_CID_MPEG_VIDEO_VPX_FILTER_LEVEL:		return "VPX Loop Filter Level Range";
-	case V4L2_CID_MPEG_VIDEO_VPX_FILTER_SHARPNESS:		return "VPX Deblocking Effect Control";
-	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_REF_PERIOD:	return "VPX Golden Frame Refresh Period";
-	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:		return "VPX Golden Frame Indicator";
-	case V4L2_CID_MPEG_VIDEO_VPX_MIN_QP:			return "VPX Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_VPX_MAX_QP:			return "VPX Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_VPX_I_FRAME_QP:		return "VPX I-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP:		return "VPX P-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:			return "VP8 Profile";
-	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:			return "VP9 Profile";
-	case V4L2_CID_MPEG_VIDEO_VP9_LEVEL:			return "VP9 Level";
-
-	/* HEVC controls */
-	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP:		return "HEVC I-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_QP:		return "HEVC P-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_QP:		return "HEVC B-Frame QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_MIN_QP:			return "HEVC Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_MAX_QP:			return "HEVC Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_MIN_QP:		return "HEVC I-Frame Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_MAX_QP:		return "HEVC I-Frame Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_MIN_QP:		return "HEVC P-Frame Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_P_FRAME_MAX_QP:		return "HEVC P-Frame Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_MIN_QP:		return "HEVC B-Frame Minimum QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_B_FRAME_MAX_QP:		return "HEVC B-Frame Maximum QP Value";
-	case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:			return "HEVC Profile";
-	case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:			return "HEVC Level";
-	case V4L2_CID_MPEG_VIDEO_HEVC_TIER:			return "HEVC Tier";
-	case V4L2_CID_MPEG_VIDEO_HEVC_FRAME_RATE_RESOLUTION:	return "HEVC Frame Rate Resolution";
-	case V4L2_CID_MPEG_VIDEO_HEVC_MAX_PARTITION_DEPTH:	return "HEVC Maximum Coding Unit Depth";
-	case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE:		return "HEVC Refresh Type";
-	case V4L2_CID_MPEG_VIDEO_HEVC_CONST_INTRA_PRED:		return "HEVC Constant Intra Prediction";
-	case V4L2_CID_MPEG_VIDEO_HEVC_LOSSLESS_CU:		return "HEVC Lossless Encoding";
-	case V4L2_CID_MPEG_VIDEO_HEVC_WAVEFRONT:		return "HEVC Wavefront";
-	case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE:		return "HEVC Loop Filter";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_QP:			return "HEVC QP Values";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE:		return "HEVC Hierarchical Coding Type";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_LAYER:	return "HEVC Hierarchical Coding Layer";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_QP:	return "HEVC Hierarchical Layer 0 QP";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_QP:	return "HEVC Hierarchical Layer 1 QP";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_QP:	return "HEVC Hierarchical Layer 2 QP";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_QP:	return "HEVC Hierarchical Layer 3 QP";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_QP:	return "HEVC Hierarchical Layer 4 QP";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_QP:	return "HEVC Hierarchical Layer 5 QP";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_QP:	return "HEVC Hierarchical Layer 6 QP";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L0_BR:	return "HEVC Hierarchical Lay 0 BitRate";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L1_BR:	return "HEVC Hierarchical Lay 1 BitRate";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L2_BR:	return "HEVC Hierarchical Lay 2 BitRate";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L3_BR:	return "HEVC Hierarchical Lay 3 BitRate";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L4_BR:	return "HEVC Hierarchical Lay 4 BitRate";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L5_BR:	return "HEVC Hierarchical Lay 5 BitRate";
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_L6_BR:	return "HEVC Hierarchical Lay 6 BitRate";
-	case V4L2_CID_MPEG_VIDEO_HEVC_GENERAL_PB:		return "HEVC General PB";
-	case V4L2_CID_MPEG_VIDEO_HEVC_TEMPORAL_ID:		return "HEVC Temporal ID";
-	case V4L2_CID_MPEG_VIDEO_HEVC_STRONG_SMOOTHING:		return "HEVC Strong Intra Smoothing";
-	case V4L2_CID_MPEG_VIDEO_HEVC_INTRA_PU_SPLIT:		return "HEVC Intra PU Split";
-	case V4L2_CID_MPEG_VIDEO_HEVC_TMV_PREDICTION:		return "HEVC TMV Prediction";
-	case V4L2_CID_MPEG_VIDEO_HEVC_MAX_NUM_MERGE_MV_MINUS1:	return "HEVC Max Num of Candidate MVs";
-	case V4L2_CID_MPEG_VIDEO_HEVC_WITHOUT_STARTCODE:	return "HEVC ENC Without Startcode";
-	case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_PERIOD:		return "HEVC Num of I-Frame b/w 2 IDR";
-	case V4L2_CID_MPEG_VIDEO_HEVC_LF_BETA_OFFSET_DIV2:	return "HEVC Loop Filter Beta Offset";
-	case V4L2_CID_MPEG_VIDEO_HEVC_LF_TC_OFFSET_DIV2:	return "HEVC Loop Filter TC Offset";
-	case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD:	return "HEVC Size of Length Field";
-	case V4L2_CID_MPEG_VIDEO_REF_NUMBER_FOR_PFRAMES:	return "Reference Frames for a P-Frame";
-	case V4L2_CID_MPEG_VIDEO_PREPEND_SPSPPS_TO_IDR:		return "Prepend SPS and PPS to IDR";
-	case V4L2_CID_MPEG_VIDEO_HEVC_SPS:			return "HEVC Sequence Parameter Set";
-	case V4L2_CID_MPEG_VIDEO_HEVC_PPS:			return "HEVC Picture Parameter Set";
-	case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:		return "HEVC Slice Parameters";
-	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:		return "HEVC Decode Mode";
-	case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:		return "HEVC Start Code";
-
-	/* CAMERA controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_CAMERA_CLASS:		return "Camera Controls";
-	case V4L2_CID_EXPOSURE_AUTO:		return "Auto Exposure";
-	case V4L2_CID_EXPOSURE_ABSOLUTE:	return "Exposure Time, Absolute";
-	case V4L2_CID_EXPOSURE_AUTO_PRIORITY:	return "Exposure, Dynamic Framerate";
-	case V4L2_CID_PAN_RELATIVE:		return "Pan, Relative";
-	case V4L2_CID_TILT_RELATIVE:		return "Tilt, Relative";
-	case V4L2_CID_PAN_RESET:		return "Pan, Reset";
-	case V4L2_CID_TILT_RESET:		return "Tilt, Reset";
-	case V4L2_CID_PAN_ABSOLUTE:		return "Pan, Absolute";
-	case V4L2_CID_TILT_ABSOLUTE:		return "Tilt, Absolute";
-	case V4L2_CID_FOCUS_ABSOLUTE:		return "Focus, Absolute";
-	case V4L2_CID_FOCUS_RELATIVE:		return "Focus, Relative";
-	case V4L2_CID_FOCUS_AUTO:		return "Focus, Automatic Continuous";
-	case V4L2_CID_ZOOM_ABSOLUTE:		return "Zoom, Absolute";
-	case V4L2_CID_ZOOM_RELATIVE:		return "Zoom, Relative";
-	case V4L2_CID_ZOOM_CONTINUOUS:		return "Zoom, Continuous";
-	case V4L2_CID_PRIVACY:			return "Privacy";
-	case V4L2_CID_IRIS_ABSOLUTE:		return "Iris, Absolute";
-	case V4L2_CID_IRIS_RELATIVE:		return "Iris, Relative";
-	case V4L2_CID_AUTO_EXPOSURE_BIAS:	return "Auto Exposure, Bias";
-	case V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE: return "White Balance, Auto & Preset";
-	case V4L2_CID_WIDE_DYNAMIC_RANGE:	return "Wide Dynamic Range";
-	case V4L2_CID_IMAGE_STABILIZATION:	return "Image Stabilization";
-	case V4L2_CID_ISO_SENSITIVITY:		return "ISO Sensitivity";
-	case V4L2_CID_ISO_SENSITIVITY_AUTO:	return "ISO Sensitivity, Auto";
-	case V4L2_CID_EXPOSURE_METERING:	return "Exposure, Metering Mode";
-	case V4L2_CID_SCENE_MODE:		return "Scene Mode";
-	case V4L2_CID_3A_LOCK:			return "3A Lock";
-	case V4L2_CID_AUTO_FOCUS_START:		return "Auto Focus, Start";
-	case V4L2_CID_AUTO_FOCUS_STOP:		return "Auto Focus, Stop";
-	case V4L2_CID_AUTO_FOCUS_STATUS:	return "Auto Focus, Status";
-	case V4L2_CID_AUTO_FOCUS_RANGE:		return "Auto Focus, Range";
-	case V4L2_CID_PAN_SPEED:		return "Pan, Speed";
-	case V4L2_CID_TILT_SPEED:		return "Tilt, Speed";
-	case V4L2_CID_UNIT_CELL_SIZE:		return "Unit Cell Size";
-	case V4L2_CID_CAMERA_ORIENTATION:	return "Camera Orientation";
-	case V4L2_CID_CAMERA_SENSOR_ROTATION:	return "Camera Sensor Rotation";
-
-	/* FM Radio Modulator controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_FM_TX_CLASS:		return "FM Radio Modulator Controls";
-	case V4L2_CID_RDS_TX_DEVIATION:		return "RDS Signal Deviation";
-	case V4L2_CID_RDS_TX_PI:		return "RDS Program ID";
-	case V4L2_CID_RDS_TX_PTY:		return "RDS Program Type";
-	case V4L2_CID_RDS_TX_PS_NAME:		return "RDS PS Name";
-	case V4L2_CID_RDS_TX_RADIO_TEXT:	return "RDS Radio Text";
-	case V4L2_CID_RDS_TX_MONO_STEREO:	return "RDS Stereo";
-	case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD:	return "RDS Artificial Head";
-	case V4L2_CID_RDS_TX_COMPRESSED:	return "RDS Compressed";
-	case V4L2_CID_RDS_TX_DYNAMIC_PTY:	return "RDS Dynamic PTY";
-	case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement";
-	case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM:	return "RDS Traffic Program";
-	case V4L2_CID_RDS_TX_MUSIC_SPEECH:	return "RDS Music";
-	case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE:	return "RDS Enable Alt Frequencies";
-	case V4L2_CID_RDS_TX_ALT_FREQS:		return "RDS Alternate Frequencies";
-	case V4L2_CID_AUDIO_LIMITER_ENABLED:	return "Audio Limiter Feature Enabled";
-	case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME: return "Audio Limiter Release Time";
-	case V4L2_CID_AUDIO_LIMITER_DEVIATION:	return "Audio Limiter Deviation";
-	case V4L2_CID_AUDIO_COMPRESSION_ENABLED: return "Audio Compression Enabled";
-	case V4L2_CID_AUDIO_COMPRESSION_GAIN:	return "Audio Compression Gain";
-	case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD: return "Audio Compression Threshold";
-	case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME: return "Audio Compression Attack Time";
-	case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME: return "Audio Compression Release Time";
-	case V4L2_CID_PILOT_TONE_ENABLED:	return "Pilot Tone Feature Enabled";
-	case V4L2_CID_PILOT_TONE_DEVIATION:	return "Pilot Tone Deviation";
-	case V4L2_CID_PILOT_TONE_FREQUENCY:	return "Pilot Tone Frequency";
-	case V4L2_CID_TUNE_PREEMPHASIS:		return "Pre-Emphasis";
-	case V4L2_CID_TUNE_POWER_LEVEL:		return "Tune Power Level";
-	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:	return "Tune Antenna Capacitor";
-
-	/* Flash controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_FLASH_CLASS:		return "Flash Controls";
-	case V4L2_CID_FLASH_LED_MODE:		return "LED Mode";
-	case V4L2_CID_FLASH_STROBE_SOURCE:	return "Strobe Source";
-	case V4L2_CID_FLASH_STROBE:		return "Strobe";
-	case V4L2_CID_FLASH_STROBE_STOP:	return "Stop Strobe";
-	case V4L2_CID_FLASH_STROBE_STATUS:	return "Strobe Status";
-	case V4L2_CID_FLASH_TIMEOUT:		return "Strobe Timeout";
-	case V4L2_CID_FLASH_INTENSITY:		return "Intensity, Flash Mode";
-	case V4L2_CID_FLASH_TORCH_INTENSITY:	return "Intensity, Torch Mode";
-	case V4L2_CID_FLASH_INDICATOR_INTENSITY: return "Intensity, Indicator";
-	case V4L2_CID_FLASH_FAULT:		return "Faults";
-	case V4L2_CID_FLASH_CHARGE:		return "Charge";
-	case V4L2_CID_FLASH_READY:		return "Ready to Strobe";
-
-	/* JPEG encoder controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_JPEG_CLASS:		return "JPEG Compression Controls";
-	case V4L2_CID_JPEG_CHROMA_SUBSAMPLING:	return "Chroma Subsampling";
-	case V4L2_CID_JPEG_RESTART_INTERVAL:	return "Restart Interval";
-	case V4L2_CID_JPEG_COMPRESSION_QUALITY:	return "Compression Quality";
-	case V4L2_CID_JPEG_ACTIVE_MARKER:	return "Active Markers";
-
-	/* Image source controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_IMAGE_SOURCE_CLASS:	return "Image Source Controls";
-	case V4L2_CID_VBLANK:			return "Vertical Blanking";
-	case V4L2_CID_HBLANK:			return "Horizontal Blanking";
-	case V4L2_CID_ANALOGUE_GAIN:		return "Analogue Gain";
-	case V4L2_CID_TEST_PATTERN_RED:		return "Red Pixel Value";
-	case V4L2_CID_TEST_PATTERN_GREENR:	return "Green (Red) Pixel Value";
-	case V4L2_CID_TEST_PATTERN_BLUE:	return "Blue Pixel Value";
-	case V4L2_CID_TEST_PATTERN_GREENB:	return "Green (Blue) Pixel Value";
-
-	/* Image processing controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_IMAGE_PROC_CLASS:		return "Image Processing Controls";
-	case V4L2_CID_LINK_FREQ:		return "Link Frequency";
-	case V4L2_CID_PIXEL_RATE:		return "Pixel Rate";
-	case V4L2_CID_TEST_PATTERN:		return "Test Pattern";
-	case V4L2_CID_DEINTERLACING_MODE:	return "Deinterlacing Mode";
-	case V4L2_CID_DIGITAL_GAIN:		return "Digital Gain";
-
-	/* DV controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_DV_CLASS:			return "Digital Video Controls";
-	case V4L2_CID_DV_TX_HOTPLUG:		return "Hotplug Present";
-	case V4L2_CID_DV_TX_RXSENSE:		return "RxSense Present";
-	case V4L2_CID_DV_TX_EDID_PRESENT:	return "EDID Present";
-	case V4L2_CID_DV_TX_MODE:		return "Transmit Mode";
-	case V4L2_CID_DV_TX_RGB_RANGE:		return "Tx RGB Quantization Range";
-	case V4L2_CID_DV_TX_IT_CONTENT_TYPE:	return "Tx IT Content Type";
-	case V4L2_CID_DV_RX_POWER_PRESENT:	return "Power Present";
-	case V4L2_CID_DV_RX_RGB_RANGE:		return "Rx RGB Quantization Range";
-	case V4L2_CID_DV_RX_IT_CONTENT_TYPE:	return "Rx IT Content Type";
-
-	case V4L2_CID_FM_RX_CLASS:		return "FM Radio Receiver Controls";
-	case V4L2_CID_TUNE_DEEMPHASIS:		return "De-Emphasis";
-	case V4L2_CID_RDS_RECEPTION:		return "RDS Reception";
-	case V4L2_CID_RF_TUNER_CLASS:		return "RF Tuner Controls";
-	case V4L2_CID_RF_TUNER_RF_GAIN:		return "RF Gain";
-	case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO:	return "LNA Gain, Auto";
-	case V4L2_CID_RF_TUNER_LNA_GAIN:	return "LNA Gain";
-	case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO:	return "Mixer Gain, Auto";
-	case V4L2_CID_RF_TUNER_MIXER_GAIN:	return "Mixer Gain";
-	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:	return "IF Gain, Auto";
-	case V4L2_CID_RF_TUNER_IF_GAIN:		return "IF Gain";
-	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:	return "Bandwidth, Auto";
-	case V4L2_CID_RF_TUNER_BANDWIDTH:	return "Bandwidth";
-	case V4L2_CID_RF_TUNER_PLL_LOCK:	return "PLL Lock";
-	case V4L2_CID_RDS_RX_PTY:		return "RDS Program Type";
-	case V4L2_CID_RDS_RX_PS_NAME:		return "RDS PS Name";
-	case V4L2_CID_RDS_RX_RADIO_TEXT:	return "RDS Radio Text";
-	case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT: return "RDS Traffic Announcement";
-	case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM:	return "RDS Traffic Program";
-	case V4L2_CID_RDS_RX_MUSIC_SPEECH:	return "RDS Music";
-
-	/* Detection controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_DETECT_CLASS:		return "Detection Controls";
-	case V4L2_CID_DETECT_MD_MODE:		return "Motion Detection Mode";
-	case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD: return "MD Global Threshold";
-	case V4L2_CID_DETECT_MD_THRESHOLD_GRID:	return "MD Threshold Grid";
-	case V4L2_CID_DETECT_MD_REGION_GRID:	return "MD Region Grid";
-
-	/* Stateless Codec controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_CODEC_STATELESS_CLASS:	return "Stateless Codec Controls";
-	case V4L2_CID_STATELESS_H264_DECODE_MODE:		return "H264 Decode Mode";
-	case V4L2_CID_STATELESS_H264_START_CODE:		return "H264 Start Code";
-	case V4L2_CID_STATELESS_H264_SPS:			return "H264 Sequence Parameter Set";
-	case V4L2_CID_STATELESS_H264_PPS:			return "H264 Picture Parameter Set";
-	case V4L2_CID_STATELESS_H264_SCALING_MATRIX:		return "H264 Scaling Matrix";
-	case V4L2_CID_STATELESS_H264_PRED_WEIGHTS:		return "H264 Prediction Weight Table";
-	case V4L2_CID_STATELESS_H264_SLICE_PARAMS:		return "H264 Slice Parameters";
-	case V4L2_CID_STATELESS_H264_DECODE_PARAMS:		return "H264 Decode Parameters";
-	case V4L2_CID_STATELESS_FWHT_PARAMS:			return "FWHT Stateless Parameters";
-	case V4L2_CID_STATELESS_VP8_FRAME:			return "VP8 Frame Parameters";
-	case V4L2_CID_STATELESS_MPEG2_SEQUENCE:			return "MPEG-2 Sequence Header";
-	case V4L2_CID_STATELESS_MPEG2_PICTURE:			return "MPEG-2 Picture Header";
-	case V4L2_CID_STATELESS_MPEG2_QUANTISATION:		return "MPEG-2 Quantisation Matrices";
-
-	/* Colorimetry controls */
-	/* Keep the order of the 'case's the same as in v4l2-controls.h! */
-	case V4L2_CID_COLORIMETRY_CLASS:	return "Colorimetry Controls";
-	case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO:		return "HDR10 Content Light Info";
-	case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY:	return "HDR10 Mastering Display";
-	default:
-		return NULL;
-	}
-}
-EXPORT_SYMBOL(v4l2_ctrl_get_name);
-
-void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
-		    s64 *min, s64 *max, u64 *step, s64 *def, u32 *flags)
-{
-	*name = v4l2_ctrl_get_name(id);
-	*flags = 0;
-
-	switch (id) {
-	case V4L2_CID_AUDIO_MUTE:
-	case V4L2_CID_AUDIO_LOUDNESS:
-	case V4L2_CID_AUTO_WHITE_BALANCE:
-	case V4L2_CID_AUTOGAIN:
-	case V4L2_CID_HFLIP:
-	case V4L2_CID_VFLIP:
-	case V4L2_CID_HUE_AUTO:
-	case V4L2_CID_CHROMA_AGC:
-	case V4L2_CID_COLOR_KILLER:
-	case V4L2_CID_AUTOBRIGHTNESS:
-	case V4L2_CID_MPEG_AUDIO_MUTE:
-	case V4L2_CID_MPEG_VIDEO_MUTE:
-	case V4L2_CID_MPEG_VIDEO_GOP_CLOSURE:
-	case V4L2_CID_MPEG_VIDEO_PULLDOWN:
-	case V4L2_CID_EXPOSURE_AUTO_PRIORITY:
-	case V4L2_CID_FOCUS_AUTO:
-	case V4L2_CID_PRIVACY:
-	case V4L2_CID_AUDIO_LIMITER_ENABLED:
-	case V4L2_CID_AUDIO_COMPRESSION_ENABLED:
-	case V4L2_CID_PILOT_TONE_ENABLED:
-	case V4L2_CID_ILLUMINATORS_1:
-	case V4L2_CID_ILLUMINATORS_2:
-	case V4L2_CID_FLASH_STROBE_STATUS:
-	case V4L2_CID_FLASH_CHARGE:
-	case V4L2_CID_FLASH_READY:
-	case V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER:
-	case V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE:
-	case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY_ENABLE:
-	case V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE:
-	case V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE:
-	case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM:
-	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE:
-	case V4L2_CID_MPEG_VIDEO_MPEG4_QPEL:
-	case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER:
-	case V4L2_CID_MPEG_VIDEO_AU_DELIMITER:
-	case V4L2_CID_WIDE_DYNAMIC_RANGE:
-	case V4L2_CID_IMAGE_STABILIZATION:
-	case V4L2_CID_RDS_RECEPTION:
-	case V4L2_CID_RF_TUNER_LNA_GAIN_AUTO:
-	case V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO:
-	case V4L2_CID_RF_TUNER_IF_GAIN_AUTO:
-	case V4L2_CID_RF_TUNER_BANDWIDTH_AUTO:
-	case V4L2_CID_RF_TUNER_PLL_LOCK:
-	case V4L2_CID_RDS_TX_MONO_STEREO:
-	case V4L2_CID_RDS_TX_ARTIFICIAL_HEAD:
-	case V4L2_CID_RDS_TX_COMPRESSED:
-	case V4L2_CID_RDS_TX_DYNAMIC_PTY:
-	case V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT:
-	case V4L2_CID_RDS_TX_TRAFFIC_PROGRAM:
-	case V4L2_CID_RDS_TX_MUSIC_SPEECH:
-	case V4L2_CID_RDS_TX_ALT_FREQS_ENABLE:
-	case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT:
-	case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM:
-	case V4L2_CID_RDS_RX_MUSIC_SPEECH:
-		*type = V4L2_CTRL_TYPE_BOOLEAN;
-		*min = 0;
-		*max = *step = 1;
-		break;
-	case V4L2_CID_ROTATE:
-		*type = V4L2_CTRL_TYPE_INTEGER;
-		*flags |= V4L2_CTRL_FLAG_MODIFY_LAYOUT;
-		break;
-	case V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE:
-	case V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE:
-	case V4L2_CID_MPEG_VIDEO_DEC_DISPLAY_DELAY:
-		*type = V4L2_CTRL_TYPE_INTEGER;
-		break;
-	case V4L2_CID_MPEG_VIDEO_LTR_COUNT:
-		*type = V4L2_CTRL_TYPE_INTEGER;
-		break;
-	case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:
-		*type = V4L2_CTRL_TYPE_INTEGER;
-		*flags |= V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
-		break;
-	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:
-		*type = V4L2_CTRL_TYPE_BITMASK;
-		*flags |= V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
-		break;
-	case V4L2_CID_MPEG_VIDEO_FORCE_KEY_FRAME:
-	case V4L2_CID_PAN_RESET:
-	case V4L2_CID_TILT_RESET:
-	case V4L2_CID_FLASH_STROBE:
-	case V4L2_CID_FLASH_STROBE_STOP:
-	case V4L2_CID_AUTO_FOCUS_START:
-	case V4L2_CID_AUTO_FOCUS_STOP:
-	case V4L2_CID_DO_WHITE_BALANCE:
-		*type = V4L2_CTRL_TYPE_BUTTON;
-		*flags |= V4L2_CTRL_FLAG_WRITE_ONLY |
-			  V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
-		*min = *max = *step = *def = 0;
-		break;
-	case V4L2_CID_POWER_LINE_FREQUENCY:
-	case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
-	case V4L2_CID_MPEG_AUDIO_ENCODING:
-	case V4L2_CID_MPEG_AUDIO_L1_BITRATE:
-	case V4L2_CID_MPEG_AUDIO_L2_BITRATE:
-	case V4L2_CID_MPEG_AUDIO_L3_BITRATE:
-	case V4L2_CID_MPEG_AUDIO_AC3_BITRATE:
-	case V4L2_CID_MPEG_AUDIO_MODE:
-	case V4L2_CID_MPEG_AUDIO_MODE_EXTENSION:
-	case V4L2_CID_MPEG_AUDIO_EMPHASIS:
-	case V4L2_CID_MPEG_AUDIO_CRC:
-	case V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK:
-	case V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK:
-	case V4L2_CID_MPEG_VIDEO_ENCODING:
-	case V4L2_CID_MPEG_VIDEO_ASPECT:
-	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:
-	case V4L2_CID_MPEG_STREAM_TYPE:
-	case V4L2_CID_MPEG_STREAM_VBI_FMT:
-	case V4L2_CID_EXPOSURE_AUTO:
-	case V4L2_CID_AUTO_FOCUS_RANGE:
-	case V4L2_CID_COLORFX:
-	case V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE:
-	case V4L2_CID_TUNE_PREEMPHASIS:
-	case V4L2_CID_FLASH_LED_MODE:
-	case V4L2_CID_FLASH_STROBE_SOURCE:
-	case V4L2_CID_MPEG_VIDEO_HEADER_MODE:
-	case V4L2_CID_MPEG_VIDEO_FRAME_SKIP_MODE:
-	case V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE:
-	case V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE:
-	case V4L2_CID_MPEG_VIDEO_H264_LEVEL:
-	case V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE:
-	case V4L2_CID_MPEG_VIDEO_H264_PROFILE:
-	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC:
-	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE:
-	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:
-	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE:
-	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
-	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:
-	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:
-	case V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE:
-	case V4L2_CID_JPEG_CHROMA_SUBSAMPLING:
-	case V4L2_CID_ISO_SENSITIVITY_AUTO:
-	case V4L2_CID_EXPOSURE_METERING:
-	case V4L2_CID_SCENE_MODE:
-	case V4L2_CID_DV_TX_MODE:
-	case V4L2_CID_DV_TX_RGB_RANGE:
-	case V4L2_CID_DV_TX_IT_CONTENT_TYPE:
-	case V4L2_CID_DV_RX_RGB_RANGE:
-	case V4L2_CID_DV_RX_IT_CONTENT_TYPE:
-	case V4L2_CID_TEST_PATTERN:
-	case V4L2_CID_DEINTERLACING_MODE:
-	case V4L2_CID_TUNE_DEEMPHASIS:
-	case V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL:
-	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:
-	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:
-	case V4L2_CID_MPEG_VIDEO_VP9_LEVEL:
-	case V4L2_CID_DETECT_MD_MODE:
-	case V4L2_CID_MPEG_VIDEO_HEVC_PROFILE:
-	case V4L2_CID_MPEG_VIDEO_HEVC_LEVEL:
-	case V4L2_CID_MPEG_VIDEO_HEVC_HIER_CODING_TYPE:
-	case V4L2_CID_MPEG_VIDEO_HEVC_REFRESH_TYPE:
-	case V4L2_CID_MPEG_VIDEO_HEVC_SIZE_OF_LENGTH_FIELD:
-	case V4L2_CID_MPEG_VIDEO_HEVC_TIER:
-	case V4L2_CID_MPEG_VIDEO_HEVC_LOOP_FILTER_MODE:
-	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:
-	case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:
-	case V4L2_CID_STATELESS_H264_DECODE_MODE:
-	case V4L2_CID_STATELESS_H264_START_CODE:
-	case V4L2_CID_CAMERA_ORIENTATION:
-		*type = V4L2_CTRL_TYPE_MENU;
-		break;
-	case V4L2_CID_LINK_FREQ:
-		*type = V4L2_CTRL_TYPE_INTEGER_MENU;
-		break;
-	case V4L2_CID_RDS_TX_PS_NAME:
-	case V4L2_CID_RDS_TX_RADIO_TEXT:
-	case V4L2_CID_RDS_RX_PS_NAME:
-	case V4L2_CID_RDS_RX_RADIO_TEXT:
-		*type = V4L2_CTRL_TYPE_STRING;
-		break;
-	case V4L2_CID_ISO_SENSITIVITY:
-	case V4L2_CID_AUTO_EXPOSURE_BIAS:
-	case V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS:
-	case V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES:
-		*type = V4L2_CTRL_TYPE_INTEGER_MENU;
-		break;
-	case V4L2_CID_USER_CLASS:
-	case V4L2_CID_CAMERA_CLASS:
-	case V4L2_CID_CODEC_CLASS:
-	case V4L2_CID_FM_TX_CLASS:
-	case V4L2_CID_FLASH_CLASS:
-	case V4L2_CID_JPEG_CLASS:
-	case V4L2_CID_IMAGE_SOURCE_CLASS:
-	case V4L2_CID_IMAGE_PROC_CLASS:
-	case V4L2_CID_DV_CLASS:
-	case V4L2_CID_FM_RX_CLASS:
-	case V4L2_CID_RF_TUNER_CLASS:
-	case V4L2_CID_DETECT_CLASS:
-	case V4L2_CID_CODEC_STATELESS_CLASS:
-	case V4L2_CID_COLORIMETRY_CLASS:
-		*type = V4L2_CTRL_TYPE_CTRL_CLASS;
-		/* You can neither read nor write these */
-		*flags |= V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY;
-		*min = *max = *step = *def = 0;
-		break;
-	case V4L2_CID_BG_COLOR:
-		*type = V4L2_CTRL_TYPE_INTEGER;
-		*step = 1;
-		*min = 0;
-		/* Max is calculated as RGB888 that is 2^24 */
-		*max = 0xFFFFFF;
-		break;
-	case V4L2_CID_FLASH_FAULT:
-	case V4L2_CID_JPEG_ACTIVE_MARKER:
-	case V4L2_CID_3A_LOCK:
-	case V4L2_CID_AUTO_FOCUS_STATUS:
-	case V4L2_CID_DV_TX_HOTPLUG:
-	case V4L2_CID_DV_TX_RXSENSE:
-	case V4L2_CID_DV_TX_EDID_PRESENT:
-	case V4L2_CID_DV_RX_POWER_PRESENT:
-		*type = V4L2_CTRL_TYPE_BITMASK;
-		break;
-	case V4L2_CID_MIN_BUFFERS_FOR_CAPTURE:
-	case V4L2_CID_MIN_BUFFERS_FOR_OUTPUT:
-		*type = V4L2_CTRL_TYPE_INTEGER;
-		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
-		break;
-	case V4L2_CID_MPEG_VIDEO_DEC_PTS:
-		*type = V4L2_CTRL_TYPE_INTEGER64;
-		*flags |= V4L2_CTRL_FLAG_VOLATILE | V4L2_CTRL_FLAG_READ_ONLY;
-		*min = *def = 0;
-		*max = 0x1ffffffffLL;
-		*step = 1;
-		break;
-	case V4L2_CID_MPEG_VIDEO_DEC_FRAME:
-		*type = V4L2_CTRL_TYPE_INTEGER64;
-		*flags |= V4L2_CTRL_FLAG_VOLATILE | V4L2_CTRL_FLAG_READ_ONLY;
-		*min = *def = 0;
-		*max = 0x7fffffffffffffffLL;
-		*step = 1;
-		break;
-	case V4L2_CID_MPEG_VIDEO_DEC_CONCEAL_COLOR:
-		*type = V4L2_CTRL_TYPE_INTEGER64;
-		*min = 0;
-		/* default for 8 bit black, luma is 16, chroma is 128 */
-		*def = 0x8000800010LL;
-		*max = 0xffffffffffffLL;
-		*step = 1;
-		break;
-	case V4L2_CID_PIXEL_RATE:
-		*type = V4L2_CTRL_TYPE_INTEGER64;
-		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
-		break;
-	case V4L2_CID_DETECT_MD_REGION_GRID:
-		*type = V4L2_CTRL_TYPE_U8;
-		break;
-	case V4L2_CID_DETECT_MD_THRESHOLD_GRID:
-		*type = V4L2_CTRL_TYPE_U16;
-		break;
-	case V4L2_CID_RDS_TX_ALT_FREQS:
-		*type = V4L2_CTRL_TYPE_U32;
-		break;
-	case V4L2_CID_STATELESS_MPEG2_SEQUENCE:
-		*type = V4L2_CTRL_TYPE_MPEG2_SEQUENCE;
-		break;
-	case V4L2_CID_STATELESS_MPEG2_PICTURE:
-		*type = V4L2_CTRL_TYPE_MPEG2_PICTURE;
-		break;
-	case V4L2_CID_STATELESS_MPEG2_QUANTISATION:
-		*type = V4L2_CTRL_TYPE_MPEG2_QUANTISATION;
-		break;
-	case V4L2_CID_STATELESS_FWHT_PARAMS:
-		*type = V4L2_CTRL_TYPE_FWHT_PARAMS;
-		break;
-	case V4L2_CID_STATELESS_H264_SPS:
-		*type = V4L2_CTRL_TYPE_H264_SPS;
-		break;
-	case V4L2_CID_STATELESS_H264_PPS:
-		*type = V4L2_CTRL_TYPE_H264_PPS;
-		break;
-	case V4L2_CID_STATELESS_H264_SCALING_MATRIX:
-		*type = V4L2_CTRL_TYPE_H264_SCALING_MATRIX;
-		break;
-	case V4L2_CID_STATELESS_H264_SLICE_PARAMS:
-		*type = V4L2_CTRL_TYPE_H264_SLICE_PARAMS;
-		break;
-	case V4L2_CID_STATELESS_H264_DECODE_PARAMS:
-		*type = V4L2_CTRL_TYPE_H264_DECODE_PARAMS;
-		break;
-	case V4L2_CID_STATELESS_H264_PRED_WEIGHTS:
-		*type = V4L2_CTRL_TYPE_H264_PRED_WEIGHTS;
-		break;
-	case V4L2_CID_STATELESS_VP8_FRAME:
-		*type = V4L2_CTRL_TYPE_VP8_FRAME;
-		break;
-	case V4L2_CID_MPEG_VIDEO_HEVC_SPS:
-		*type = V4L2_CTRL_TYPE_HEVC_SPS;
-		break;
-	case V4L2_CID_MPEG_VIDEO_HEVC_PPS:
-		*type = V4L2_CTRL_TYPE_HEVC_PPS;
-		break;
-	case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:
-		*type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS;
-		break;
-	case V4L2_CID_UNIT_CELL_SIZE:
-		*type = V4L2_CTRL_TYPE_AREA;
-		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
-		break;
-	case V4L2_CID_COLORIMETRY_HDR10_CLL_INFO:
-		*type = V4L2_CTRL_TYPE_HDR10_CLL_INFO;
-		break;
-	case V4L2_CID_COLORIMETRY_HDR10_MASTERING_DISPLAY:
-		*type = V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY;
-		break;
-	default:
-		*type = V4L2_CTRL_TYPE_INTEGER;
-		break;
-	}
-	switch (id) {
-	case V4L2_CID_MPEG_AUDIO_ENCODING:
-	case V4L2_CID_MPEG_AUDIO_MODE:
-	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:
-	case V4L2_CID_MPEG_VIDEO_B_FRAMES:
-	case V4L2_CID_MPEG_STREAM_TYPE:
-		*flags |= V4L2_CTRL_FLAG_UPDATE;
-		break;
-	case V4L2_CID_AUDIO_VOLUME:
-	case V4L2_CID_AUDIO_BALANCE:
-	case V4L2_CID_AUDIO_BASS:
-	case V4L2_CID_AUDIO_TREBLE:
-	case V4L2_CID_BRIGHTNESS:
-	case V4L2_CID_CONTRAST:
-	case V4L2_CID_SATURATION:
-	case V4L2_CID_HUE:
-	case V4L2_CID_RED_BALANCE:
-	case V4L2_CID_BLUE_BALANCE:
-	case V4L2_CID_GAMMA:
-	case V4L2_CID_SHARPNESS:
-	case V4L2_CID_CHROMA_GAIN:
-	case V4L2_CID_RDS_TX_DEVIATION:
-	case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME:
-	case V4L2_CID_AUDIO_LIMITER_DEVIATION:
-	case V4L2_CID_AUDIO_COMPRESSION_GAIN:
-	case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD:
-	case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME:
-	case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME:
-	case V4L2_CID_PILOT_TONE_DEVIATION:
-	case V4L2_CID_PILOT_TONE_FREQUENCY:
-	case V4L2_CID_TUNE_POWER_LEVEL:
-	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
-	case V4L2_CID_RF_TUNER_RF_GAIN:
-	case V4L2_CID_RF_TUNER_LNA_GAIN:
-	case V4L2_CID_RF_TUNER_MIXER_GAIN:
-	case V4L2_CID_RF_TUNER_IF_GAIN:
-	case V4L2_CID_RF_TUNER_BANDWIDTH:
-	case V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD:
-		*flags |= V4L2_CTRL_FLAG_SLIDER;
-		break;
-	case V4L2_CID_PAN_RELATIVE:
-	case V4L2_CID_TILT_RELATIVE:
-	case V4L2_CID_FOCUS_RELATIVE:
-	case V4L2_CID_IRIS_RELATIVE:
-	case V4L2_CID_ZOOM_RELATIVE:
-		*flags |= V4L2_CTRL_FLAG_WRITE_ONLY |
-			  V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
-		break;
-	case V4L2_CID_FLASH_STROBE_STATUS:
-	case V4L2_CID_AUTO_FOCUS_STATUS:
-	case V4L2_CID_FLASH_READY:
-	case V4L2_CID_DV_TX_HOTPLUG:
-	case V4L2_CID_DV_TX_RXSENSE:
-	case V4L2_CID_DV_TX_EDID_PRESENT:
-	case V4L2_CID_DV_RX_POWER_PRESENT:
-	case V4L2_CID_DV_RX_IT_CONTENT_TYPE:
-	case V4L2_CID_RDS_RX_PTY:
-	case V4L2_CID_RDS_RX_PS_NAME:
-	case V4L2_CID_RDS_RX_RADIO_TEXT:
-	case V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT:
-	case V4L2_CID_RDS_RX_TRAFFIC_PROGRAM:
-	case V4L2_CID_RDS_RX_MUSIC_SPEECH:
-	case V4L2_CID_CAMERA_ORIENTATION:
-	case V4L2_CID_CAMERA_SENSOR_ROTATION:
-		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
-		break;
-	case V4L2_CID_RF_TUNER_PLL_LOCK:
-		*flags |= V4L2_CTRL_FLAG_VOLATILE;
-		break;
-	}
-}
-EXPORT_SYMBOL(v4l2_ctrl_fill);
-
-static u32 user_flags(const struct v4l2_ctrl *ctrl)
-{
-	u32 flags = ctrl->flags;
-
-	if (ctrl->is_ptr)
-		flags |= V4L2_CTRL_FLAG_HAS_PAYLOAD;
-
-	return flags;
-}
-
-static void fill_event(struct v4l2_event *ev, struct v4l2_ctrl *ctrl, u32 changes)
-{
-	memset(ev, 0, sizeof(*ev));
-	ev->type = V4L2_EVENT_CTRL;
-	ev->id = ctrl->id;
-	ev->u.ctrl.changes = changes;
-	ev->u.ctrl.type = ctrl->type;
-	ev->u.ctrl.flags = user_flags(ctrl);
-	if (ctrl->is_ptr)
-		ev->u.ctrl.value64 = 0;
-	else
-		ev->u.ctrl.value64 = *ctrl->p_cur.p_s64;
-	ev->u.ctrl.minimum = ctrl->minimum;
-	ev->u.ctrl.maximum = ctrl->maximum;
-	if (ctrl->type == V4L2_CTRL_TYPE_MENU
-	    || ctrl->type == V4L2_CTRL_TYPE_INTEGER_MENU)
-		ev->u.ctrl.step = 1;
-	else
-		ev->u.ctrl.step = ctrl->step;
-	ev->u.ctrl.default_value = ctrl->default_value;
-}
-
-static void send_event(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 changes)
-{
-	struct v4l2_event ev;
-	struct v4l2_subscribed_event *sev;
-
-	if (list_empty(&ctrl->ev_subs))
-		return;
-	fill_event(&ev, ctrl, changes);
-
-	list_for_each_entry(sev, &ctrl->ev_subs, node)
-		if (sev->fh != fh ||
-		    (sev->flags & V4L2_EVENT_SUB_FL_ALLOW_FEEDBACK))
-			v4l2_event_queue_fh(sev->fh, &ev);
-}
-
-static bool std_equal(const struct v4l2_ctrl *ctrl, u32 idx,
-		      union v4l2_ctrl_ptr ptr1,
-		      union v4l2_ctrl_ptr ptr2)
-{
-	switch (ctrl->type) {
-	case V4L2_CTRL_TYPE_BUTTON:
-		return false;
-	case V4L2_CTRL_TYPE_STRING:
-		idx *= ctrl->elem_size;
-		/* strings are always 0-terminated */
-		return !strcmp(ptr1.p_char + idx, ptr2.p_char + idx);
-	case V4L2_CTRL_TYPE_INTEGER64:
-		return ptr1.p_s64[idx] == ptr2.p_s64[idx];
-	case V4L2_CTRL_TYPE_U8:
-		return ptr1.p_u8[idx] == ptr2.p_u8[idx];
-	case V4L2_CTRL_TYPE_U16:
-		return ptr1.p_u16[idx] == ptr2.p_u16[idx];
-	case V4L2_CTRL_TYPE_U32:
-		return ptr1.p_u32[idx] == ptr2.p_u32[idx];
-	default:
-		if (ctrl->is_int)
-			return ptr1.p_s32[idx] == ptr2.p_s32[idx];
-		idx *= ctrl->elem_size;
-		return !memcmp(ptr1.p_const + idx, ptr2.p_const + idx,
-			       ctrl->elem_size);
-	}
-}
-
-static void std_init_compound(const struct v4l2_ctrl *ctrl, u32 idx,
-			      union v4l2_ctrl_ptr ptr)
-{
-	struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
-	struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
-	struct v4l2_ctrl_mpeg2_quantisation *p_mpeg2_quant;
-	struct v4l2_ctrl_vp8_frame *p_vp8_frame;
-	struct v4l2_ctrl_fwht_params *p_fwht_params;
-	void *p = ptr.p + idx * ctrl->elem_size;
-
-	if (ctrl->p_def.p_const)
-		memcpy(p, ctrl->p_def.p_const, ctrl->elem_size);
-	else
-		memset(p, 0, ctrl->elem_size);
-
-	switch ((u32)ctrl->type) {
-	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
-		p_mpeg2_sequence = p;
-
-		/* 4:2:0 */
-		p_mpeg2_sequence->chroma_format = 1;
-		break;
-	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
-		p_mpeg2_picture = p;
-
-		/* interlaced top field */
-		p_mpeg2_picture->picture_structure = V4L2_MPEG2_PIC_TOP_FIELD;
-		p_mpeg2_picture->picture_coding_type =
-					V4L2_MPEG2_PIC_CODING_TYPE_I;
-		break;
-	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
-		p_mpeg2_quant = p;
-
-		memcpy(p_mpeg2_quant->intra_quantiser_matrix,
-		       mpeg2_intra_quant_matrix,
-		       ARRAY_SIZE(mpeg2_intra_quant_matrix));
-		/*
-		 * The default non-intra MPEG-2 quantisation
-		 * coefficients are all 16, as per the specification.
-		 */
-		memset(p_mpeg2_quant->non_intra_quantiser_matrix, 16,
-		       sizeof(p_mpeg2_quant->non_intra_quantiser_matrix));
-		break;
-	case V4L2_CTRL_TYPE_VP8_FRAME:
-		p_vp8_frame = p;
-		p_vp8_frame->num_dct_parts = 1;
-		break;
-	case V4L2_CTRL_TYPE_FWHT_PARAMS:
-		p_fwht_params = p;
-		p_fwht_params->version = V4L2_FWHT_VERSION;
-		p_fwht_params->width = 1280;
-		p_fwht_params->height = 720;
-		p_fwht_params->flags = V4L2_FWHT_FL_PIXENC_YUV |
-			(2 << V4L2_FWHT_FL_COMPONENTS_NUM_OFFSET);
-		break;
-	}
-}
-
-static void std_init(const struct v4l2_ctrl *ctrl, u32 idx,
-		     union v4l2_ctrl_ptr ptr)
-{
-	switch (ctrl->type) {
-	case V4L2_CTRL_TYPE_STRING:
-		idx *= ctrl->elem_size;
-		memset(ptr.p_char + idx, ' ', ctrl->minimum);
-		ptr.p_char[idx + ctrl->minimum] = '\0';
-		break;
-	case V4L2_CTRL_TYPE_INTEGER64:
-		ptr.p_s64[idx] = ctrl->default_value;
-		break;
-	case V4L2_CTRL_TYPE_INTEGER:
-	case V4L2_CTRL_TYPE_INTEGER_MENU:
-	case V4L2_CTRL_TYPE_MENU:
-	case V4L2_CTRL_TYPE_BITMASK:
-	case V4L2_CTRL_TYPE_BOOLEAN:
-		ptr.p_s32[idx] = ctrl->default_value;
-		break;
-	case V4L2_CTRL_TYPE_BUTTON:
-	case V4L2_CTRL_TYPE_CTRL_CLASS:
-		ptr.p_s32[idx] = 0;
-		break;
-	case V4L2_CTRL_TYPE_U8:
-		ptr.p_u8[idx] = ctrl->default_value;
-		break;
-	case V4L2_CTRL_TYPE_U16:
-		ptr.p_u16[idx] = ctrl->default_value;
-		break;
-	case V4L2_CTRL_TYPE_U32:
-		ptr.p_u32[idx] = ctrl->default_value;
-		break;
-	default:
-		std_init_compound(ctrl, idx, ptr);
-		break;
-	}
-}
-
-static void std_log(const struct v4l2_ctrl *ctrl)
-{
-	union v4l2_ctrl_ptr ptr = ctrl->p_cur;
-
-	if (ctrl->is_array) {
-		unsigned i;
-
-		for (i = 0; i < ctrl->nr_of_dims; i++)
-			pr_cont("[%u]", ctrl->dims[i]);
-		pr_cont(" ");
-	}
-
-	switch (ctrl->type) {
-	case V4L2_CTRL_TYPE_INTEGER:
-		pr_cont("%d", *ptr.p_s32);
-		break;
-	case V4L2_CTRL_TYPE_BOOLEAN:
-		pr_cont("%s", *ptr.p_s32 ? "true" : "false");
-		break;
-	case V4L2_CTRL_TYPE_MENU:
-		pr_cont("%s", ctrl->qmenu[*ptr.p_s32]);
-		break;
-	case V4L2_CTRL_TYPE_INTEGER_MENU:
-		pr_cont("%lld", ctrl->qmenu_int[*ptr.p_s32]);
-		break;
-	case V4L2_CTRL_TYPE_BITMASK:
-		pr_cont("0x%08x", *ptr.p_s32);
-		break;
-	case V4L2_CTRL_TYPE_INTEGER64:
-		pr_cont("%lld", *ptr.p_s64);
-		break;
-	case V4L2_CTRL_TYPE_STRING:
-		pr_cont("%s", ptr.p_char);
-		break;
-	case V4L2_CTRL_TYPE_U8:
-		pr_cont("%u", (unsigned)*ptr.p_u8);
-		break;
-	case V4L2_CTRL_TYPE_U16:
-		pr_cont("%u", (unsigned)*ptr.p_u16);
-		break;
-	case V4L2_CTRL_TYPE_U32:
-		pr_cont("%u", (unsigned)*ptr.p_u32);
-		break;
-	case V4L2_CTRL_TYPE_H264_SPS:
-		pr_cont("H264_SPS");
-		break;
-	case V4L2_CTRL_TYPE_H264_PPS:
-		pr_cont("H264_PPS");
-		break;
-	case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
-		pr_cont("H264_SCALING_MATRIX");
-		break;
-	case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
-		pr_cont("H264_SLICE_PARAMS");
-		break;
-	case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
-		pr_cont("H264_DECODE_PARAMS");
-		break;
-	case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
-		pr_cont("H264_PRED_WEIGHTS");
-		break;
-	case V4L2_CTRL_TYPE_FWHT_PARAMS:
-		pr_cont("FWHT_PARAMS");
-		break;
-	case V4L2_CTRL_TYPE_VP8_FRAME:
-		pr_cont("VP8_FRAME");
-		break;
-	case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
-		pr_cont("HDR10_CLL_INFO");
-		break;
-	case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
-		pr_cont("HDR10_MASTERING_DISPLAY");
-		break;
-	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
-		pr_cont("MPEG2_QUANTISATION");
-		break;
-	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
-		pr_cont("MPEG2_SEQUENCE");
-		break;
-	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
-		pr_cont("MPEG2_PICTURE");
-		break;
-	default:
-		pr_cont("unknown type %d", ctrl->type);
-		break;
-	}
-}
-
-/*
- * Round towards the closest legal value. Be careful when we are
- * close to the maximum range of the control type to prevent
- * wrap-arounds.
- */
-#define ROUND_TO_RANGE(val, offset_type, ctrl)			\
-({								\
-	offset_type offset;					\
-	if ((ctrl)->maximum >= 0 &&				\
-	    val >= (ctrl)->maximum - (s32)((ctrl)->step / 2))	\
-		val = (ctrl)->maximum;				\
-	else							\
-		val += (s32)((ctrl)->step / 2);			\
-	val = clamp_t(typeof(val), val,				\
-		      (ctrl)->minimum, (ctrl)->maximum);	\
-	offset = (val) - (ctrl)->minimum;			\
-	offset = (ctrl)->step * (offset / (u32)(ctrl)->step);	\
-	val = (ctrl)->minimum + offset;				\
-	0;							\
-})
-
-/* Validate a new control */
-
-#define zero_padding(s) \
-	memset(&(s).padding, 0, sizeof((s).padding))
-#define zero_reserved(s) \
-	memset(&(s).reserved, 0, sizeof((s).reserved))
-
-/*
- * Compound controls validation requires setting unused fields/flags to zero
- * in order to properly detect unchanged controls with std_equal's memcmp.
- */
-static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
-				 union v4l2_ctrl_ptr ptr)
-{
-	struct v4l2_ctrl_mpeg2_sequence *p_mpeg2_sequence;
-	struct v4l2_ctrl_mpeg2_picture *p_mpeg2_picture;
-	struct v4l2_ctrl_vp8_frame *p_vp8_frame;
-	struct v4l2_ctrl_fwht_params *p_fwht_params;
-	struct v4l2_ctrl_h264_sps *p_h264_sps;
-	struct v4l2_ctrl_h264_pps *p_h264_pps;
-	struct v4l2_ctrl_h264_pred_weights *p_h264_pred_weights;
-	struct v4l2_ctrl_h264_slice_params *p_h264_slice_params;
-	struct v4l2_ctrl_h264_decode_params *p_h264_dec_params;
-	struct v4l2_ctrl_hevc_sps *p_hevc_sps;
-	struct v4l2_ctrl_hevc_pps *p_hevc_pps;
-	struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params;
-	struct v4l2_ctrl_hdr10_mastering_display *p_hdr10_mastering;
-	struct v4l2_area *area;
-	void *p = ptr.p + idx * ctrl->elem_size;
-	unsigned int i;
-
-	switch ((u32)ctrl->type) {
-	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
-		p_mpeg2_sequence = p;
-
-		switch (p_mpeg2_sequence->chroma_format) {
-		case 1: /* 4:2:0 */
-		case 2: /* 4:2:2 */
-		case 3: /* 4:4:4 */
-			break;
-		default:
-			return -EINVAL;
-		}
-		break;
-
-	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
-		p_mpeg2_picture = p;
-
-		switch (p_mpeg2_picture->intra_dc_precision) {
-		case 0: /* 8 bits */
-		case 1: /* 9 bits */
-		case 2: /* 10 bits */
-		case 3: /* 11 bits */
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		switch (p_mpeg2_picture->picture_structure) {
-		case V4L2_MPEG2_PIC_TOP_FIELD:
-		case V4L2_MPEG2_PIC_BOTTOM_FIELD:
-		case V4L2_MPEG2_PIC_FRAME:
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		switch (p_mpeg2_picture->picture_coding_type) {
-		case V4L2_MPEG2_PIC_CODING_TYPE_I:
-		case V4L2_MPEG2_PIC_CODING_TYPE_P:
-		case V4L2_MPEG2_PIC_CODING_TYPE_B:
-			break;
-		default:
-			return -EINVAL;
-		}
-		zero_reserved(*p_mpeg2_picture);
-		break;
-
-	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
-		break;
-
-	case V4L2_CTRL_TYPE_FWHT_PARAMS:
-		p_fwht_params = p;
-		if (p_fwht_params->version < V4L2_FWHT_VERSION)
-			return -EINVAL;
-		if (!p_fwht_params->width || !p_fwht_params->height)
-			return -EINVAL;
-		break;
-
-	case V4L2_CTRL_TYPE_H264_SPS:
-		p_h264_sps = p;
-
-		/* Some syntax elements are only conditionally valid */
-		if (p_h264_sps->pic_order_cnt_type != 0) {
-			p_h264_sps->log2_max_pic_order_cnt_lsb_minus4 = 0;
-		} else if (p_h264_sps->pic_order_cnt_type != 1) {
-			p_h264_sps->num_ref_frames_in_pic_order_cnt_cycle = 0;
-			p_h264_sps->offset_for_non_ref_pic = 0;
-			p_h264_sps->offset_for_top_to_bottom_field = 0;
-			memset(&p_h264_sps->offset_for_ref_frame, 0,
-			       sizeof(p_h264_sps->offset_for_ref_frame));
-		}
-
-		if (!V4L2_H264_SPS_HAS_CHROMA_FORMAT(p_h264_sps)) {
-			p_h264_sps->chroma_format_idc = 1;
-			p_h264_sps->bit_depth_luma_minus8 = 0;
-			p_h264_sps->bit_depth_chroma_minus8 = 0;
-
-			p_h264_sps->flags &=
-				~V4L2_H264_SPS_FLAG_QPPRIME_Y_ZERO_TRANSFORM_BYPASS;
-
-			if (p_h264_sps->chroma_format_idc < 3)
-				p_h264_sps->flags &=
-					~V4L2_H264_SPS_FLAG_SEPARATE_COLOUR_PLANE;
-		}
-
-		if (p_h264_sps->flags & V4L2_H264_SPS_FLAG_FRAME_MBS_ONLY)
-			p_h264_sps->flags &=
-				~V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD;
-
-		/*
-		 * Chroma 4:2:2 format require at least High 4:2:2 profile.
-		 *
-		 * The H264 specification and well-known parser implementations
-		 * use profile-idc values directly, as that is clearer and
-		 * less ambiguous. We do the same here.
-		 */
-		if (p_h264_sps->profile_idc < 122 &&
-		    p_h264_sps->chroma_format_idc > 1)
-			return -EINVAL;
-		/* Chroma 4:4:4 format require at least High 4:2:2 profile */
-		if (p_h264_sps->profile_idc < 244 &&
-		    p_h264_sps->chroma_format_idc > 2)
-			return -EINVAL;
-		if (p_h264_sps->chroma_format_idc > 3)
-			return -EINVAL;
-
-		if (p_h264_sps->bit_depth_luma_minus8 > 6)
-			return -EINVAL;
-		if (p_h264_sps->bit_depth_chroma_minus8 > 6)
-			return -EINVAL;
-		if (p_h264_sps->log2_max_frame_num_minus4 > 12)
-			return -EINVAL;
-		if (p_h264_sps->pic_order_cnt_type > 2)
-			return -EINVAL;
-		if (p_h264_sps->log2_max_pic_order_cnt_lsb_minus4 > 12)
-			return -EINVAL;
-		if (p_h264_sps->max_num_ref_frames > V4L2_H264_REF_LIST_LEN)
-			return -EINVAL;
-		break;
-
-	case V4L2_CTRL_TYPE_H264_PPS:
-		p_h264_pps = p;
-
-		if (p_h264_pps->num_slice_groups_minus1 > 7)
-			return -EINVAL;
-		if (p_h264_pps->num_ref_idx_l0_default_active_minus1 >
-		    (V4L2_H264_REF_LIST_LEN - 1))
-			return -EINVAL;
-		if (p_h264_pps->num_ref_idx_l1_default_active_minus1 >
-		    (V4L2_H264_REF_LIST_LEN - 1))
-			return -EINVAL;
-		if (p_h264_pps->weighted_bipred_idc > 2)
-			return -EINVAL;
-		/*
-		 * pic_init_qp_minus26 shall be in the range of
-		 * -(26 + QpBdOffset_y) to +25, inclusive,
-		 *  where QpBdOffset_y is 6 * bit_depth_luma_minus8
-		 */
-		if (p_h264_pps->pic_init_qp_minus26 < -62 ||
-		    p_h264_pps->pic_init_qp_minus26 > 25)
-			return -EINVAL;
-		if (p_h264_pps->pic_init_qs_minus26 < -26 ||
-		    p_h264_pps->pic_init_qs_minus26 > 25)
-			return -EINVAL;
-		if (p_h264_pps->chroma_qp_index_offset < -12 ||
-		    p_h264_pps->chroma_qp_index_offset > 12)
-			return -EINVAL;
-		if (p_h264_pps->second_chroma_qp_index_offset < -12 ||
-		    p_h264_pps->second_chroma_qp_index_offset > 12)
-			return -EINVAL;
-		break;
-
-	case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
-		break;
-
-	case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
-		p_h264_pred_weights = p;
-
-		if (p_h264_pred_weights->luma_log2_weight_denom > 7)
-			return -EINVAL;
-		if (p_h264_pred_weights->chroma_log2_weight_denom > 7)
-			return -EINVAL;
-		break;
-
-	case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
-		p_h264_slice_params = p;
-
-		if (p_h264_slice_params->slice_type != V4L2_H264_SLICE_TYPE_B)
-			p_h264_slice_params->flags &=
-				~V4L2_H264_SLICE_FLAG_DIRECT_SPATIAL_MV_PRED;
-
-		if (p_h264_slice_params->colour_plane_id > 2)
-			return -EINVAL;
-		if (p_h264_slice_params->cabac_init_idc > 2)
-			return -EINVAL;
-		if (p_h264_slice_params->disable_deblocking_filter_idc > 2)
-			return -EINVAL;
-		if (p_h264_slice_params->slice_alpha_c0_offset_div2 < -6 ||
-		    p_h264_slice_params->slice_alpha_c0_offset_div2 > 6)
-			return -EINVAL;
-		if (p_h264_slice_params->slice_beta_offset_div2 < -6 ||
-		    p_h264_slice_params->slice_beta_offset_div2 > 6)
-			return -EINVAL;
-
-		if (p_h264_slice_params->slice_type == V4L2_H264_SLICE_TYPE_I ||
-		    p_h264_slice_params->slice_type == V4L2_H264_SLICE_TYPE_SI)
-			p_h264_slice_params->num_ref_idx_l0_active_minus1 = 0;
-		if (p_h264_slice_params->slice_type != V4L2_H264_SLICE_TYPE_B)
-			p_h264_slice_params->num_ref_idx_l1_active_minus1 = 0;
-
-		if (p_h264_slice_params->num_ref_idx_l0_active_minus1 >
-		    (V4L2_H264_REF_LIST_LEN - 1))
-			return -EINVAL;
-		if (p_h264_slice_params->num_ref_idx_l1_active_minus1 >
-		    (V4L2_H264_REF_LIST_LEN - 1))
-			return -EINVAL;
-		zero_reserved(*p_h264_slice_params);
-		break;
-
-	case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
-		p_h264_dec_params = p;
-
-		if (p_h264_dec_params->nal_ref_idc > 3)
-			return -EINVAL;
-		for (i = 0; i < V4L2_H264_NUM_DPB_ENTRIES; i++) {
-			struct v4l2_h264_dpb_entry *dpb_entry =
-				&p_h264_dec_params->dpb[i];
-
-			zero_reserved(*dpb_entry);
-		}
-		zero_reserved(*p_h264_dec_params);
-		break;
-
-	case V4L2_CTRL_TYPE_VP8_FRAME:
-		p_vp8_frame = p;
-
-		switch (p_vp8_frame->num_dct_parts) {
-		case 1:
-		case 2:
-		case 4:
-		case 8:
-			break;
-		default:
-			return -EINVAL;
-		}
-		zero_padding(p_vp8_frame->segment);
-		zero_padding(p_vp8_frame->lf);
-		zero_padding(p_vp8_frame->quant);
-		zero_padding(p_vp8_frame->entropy);
-		zero_padding(p_vp8_frame->coder_state);
-		break;
-
-	case V4L2_CTRL_TYPE_HEVC_SPS:
-		p_hevc_sps = p;
-
-		if (!(p_hevc_sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED)) {
-			p_hevc_sps->pcm_sample_bit_depth_luma_minus1 = 0;
-			p_hevc_sps->pcm_sample_bit_depth_chroma_minus1 = 0;
-			p_hevc_sps->log2_min_pcm_luma_coding_block_size_minus3 = 0;
-			p_hevc_sps->log2_diff_max_min_pcm_luma_coding_block_size = 0;
-		}
-
-		if (!(p_hevc_sps->flags &
-		      V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT))
-			p_hevc_sps->num_long_term_ref_pics_sps = 0;
-		break;
-
-	case V4L2_CTRL_TYPE_HEVC_PPS:
-		p_hevc_pps = p;
-
-		if (!(p_hevc_pps->flags &
-		      V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED))
-			p_hevc_pps->diff_cu_qp_delta_depth = 0;
-
-		if (!(p_hevc_pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED)) {
-			p_hevc_pps->num_tile_columns_minus1 = 0;
-			p_hevc_pps->num_tile_rows_minus1 = 0;
-			memset(&p_hevc_pps->column_width_minus1, 0,
-			       sizeof(p_hevc_pps->column_width_minus1));
-			memset(&p_hevc_pps->row_height_minus1, 0,
-			       sizeof(p_hevc_pps->row_height_minus1));
-
-			p_hevc_pps->flags &=
-				~V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
-		}
-
-		if (p_hevc_pps->flags &
-		    V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER) {
-			p_hevc_pps->pps_beta_offset_div2 = 0;
-			p_hevc_pps->pps_tc_offset_div2 = 0;
-		}
-
-		zero_padding(*p_hevc_pps);
-		break;
-
-	case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
-		p_hevc_slice_params = p;
-
-		if (p_hevc_slice_params->num_active_dpb_entries >
-		    V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
-			return -EINVAL;
-
-		zero_padding(p_hevc_slice_params->pred_weight_table);
-
-		for (i = 0; i < p_hevc_slice_params->num_active_dpb_entries;
-		     i++) {
-			struct v4l2_hevc_dpb_entry *dpb_entry =
-				&p_hevc_slice_params->dpb[i];
-
-			zero_padding(*dpb_entry);
-		}
-
-		zero_padding(*p_hevc_slice_params);
-		break;
-
-	case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
-		break;
-
-	case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
-		p_hdr10_mastering = p;
-
-		for (i = 0; i < 3; ++i) {
-			if (p_hdr10_mastering->display_primaries_x[i] <
-				V4L2_HDR10_MASTERING_PRIMARIES_X_LOW ||
-			    p_hdr10_mastering->display_primaries_x[i] >
-				V4L2_HDR10_MASTERING_PRIMARIES_X_HIGH ||
-			    p_hdr10_mastering->display_primaries_y[i] <
-				V4L2_HDR10_MASTERING_PRIMARIES_Y_LOW ||
-			    p_hdr10_mastering->display_primaries_y[i] >
-				V4L2_HDR10_MASTERING_PRIMARIES_Y_HIGH)
-				return -EINVAL;
-		}
-
-		if (p_hdr10_mastering->white_point_x <
-			V4L2_HDR10_MASTERING_WHITE_POINT_X_LOW ||
-		    p_hdr10_mastering->white_point_x >
-			V4L2_HDR10_MASTERING_WHITE_POINT_X_HIGH ||
-		    p_hdr10_mastering->white_point_y <
-			V4L2_HDR10_MASTERING_WHITE_POINT_Y_LOW ||
-		    p_hdr10_mastering->white_point_y >
-			V4L2_HDR10_MASTERING_WHITE_POINT_Y_HIGH)
-			return -EINVAL;
-
-		if (p_hdr10_mastering->max_display_mastering_luminance <
-			V4L2_HDR10_MASTERING_MAX_LUMA_LOW ||
-		    p_hdr10_mastering->max_display_mastering_luminance >
-			V4L2_HDR10_MASTERING_MAX_LUMA_HIGH ||
-		    p_hdr10_mastering->min_display_mastering_luminance <
-			V4L2_HDR10_MASTERING_MIN_LUMA_LOW ||
-		    p_hdr10_mastering->min_display_mastering_luminance >
-			V4L2_HDR10_MASTERING_MIN_LUMA_HIGH)
-			return -EINVAL;
-
-		/* The following restriction comes from ITU-T Rec. H.265 spec */
-		if (p_hdr10_mastering->max_display_mastering_luminance ==
-			V4L2_HDR10_MASTERING_MAX_LUMA_LOW &&
-		    p_hdr10_mastering->min_display_mastering_luminance ==
-			V4L2_HDR10_MASTERING_MIN_LUMA_HIGH)
-			return -EINVAL;
-
-		break;
-
-	case V4L2_CTRL_TYPE_AREA:
-		area = p;
-		if (!area->width || !area->height)
-			return -EINVAL;
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx,
-			union v4l2_ctrl_ptr ptr)
-{
-	size_t len;
-	u64 offset;
-	s64 val;
-
-	switch ((u32)ctrl->type) {
-	case V4L2_CTRL_TYPE_INTEGER:
-		return ROUND_TO_RANGE(ptr.p_s32[idx], u32, ctrl);
-	case V4L2_CTRL_TYPE_INTEGER64:
-		/*
-		 * We can't use the ROUND_TO_RANGE define here due to
-		 * the u64 divide that needs special care.
-		 */
-		val = ptr.p_s64[idx];
-		if (ctrl->maximum >= 0 && val >= ctrl->maximum - (s64)(ctrl->step / 2))
-			val = ctrl->maximum;
-		else
-			val += (s64)(ctrl->step / 2);
-		val = clamp_t(s64, val, ctrl->minimum, ctrl->maximum);
-		offset = val - ctrl->minimum;
-		do_div(offset, ctrl->step);
-		ptr.p_s64[idx] = ctrl->minimum + offset * ctrl->step;
-		return 0;
-	case V4L2_CTRL_TYPE_U8:
-		return ROUND_TO_RANGE(ptr.p_u8[idx], u8, ctrl);
-	case V4L2_CTRL_TYPE_U16:
-		return ROUND_TO_RANGE(ptr.p_u16[idx], u16, ctrl);
-	case V4L2_CTRL_TYPE_U32:
-		return ROUND_TO_RANGE(ptr.p_u32[idx], u32, ctrl);
-
-	case V4L2_CTRL_TYPE_BOOLEAN:
-		ptr.p_s32[idx] = !!ptr.p_s32[idx];
-		return 0;
-
-	case V4L2_CTRL_TYPE_MENU:
-	case V4L2_CTRL_TYPE_INTEGER_MENU:
-		if (ptr.p_s32[idx] < ctrl->minimum || ptr.p_s32[idx] > ctrl->maximum)
-			return -ERANGE;
-		if (ptr.p_s32[idx] < BITS_PER_LONG_LONG &&
-		    (ctrl->menu_skip_mask & BIT_ULL(ptr.p_s32[idx])))
-			return -EINVAL;
-		if (ctrl->type == V4L2_CTRL_TYPE_MENU &&
-		    ctrl->qmenu[ptr.p_s32[idx]][0] == '\0')
-			return -EINVAL;
-		return 0;
-
-	case V4L2_CTRL_TYPE_BITMASK:
-		ptr.p_s32[idx] &= ctrl->maximum;
-		return 0;
-
-	case V4L2_CTRL_TYPE_BUTTON:
-	case V4L2_CTRL_TYPE_CTRL_CLASS:
-		ptr.p_s32[idx] = 0;
-		return 0;
-
-	case V4L2_CTRL_TYPE_STRING:
-		idx *= ctrl->elem_size;
-		len = strlen(ptr.p_char + idx);
-		if (len < ctrl->minimum)
-			return -ERANGE;
-		if ((len - (u32)ctrl->minimum) % (u32)ctrl->step)
-			return -ERANGE;
-		return 0;
-
-	default:
-		return std_validate_compound(ctrl, idx, ptr);
-	}
-}
-
-static const struct v4l2_ctrl_type_ops std_type_ops = {
-	.equal = std_equal,
-	.init = std_init,
-	.log = std_log,
-	.validate = std_validate,
-};
-
-/* Helper function: copy the given control value back to the caller */
-static int ptr_to_user(struct v4l2_ext_control *c,
-		       struct v4l2_ctrl *ctrl,
-		       union v4l2_ctrl_ptr ptr)
-{
-	u32 len;
-
-	if (ctrl->is_ptr && !ctrl->is_string)
-		return copy_to_user(c->ptr, ptr.p_const, c->size) ?
-		       -EFAULT : 0;
-
-	switch (ctrl->type) {
-	case V4L2_CTRL_TYPE_STRING:
-		len = strlen(ptr.p_char);
-		if (c->size < len + 1) {
-			c->size = ctrl->elem_size;
-			return -ENOSPC;
-		}
-		return copy_to_user(c->string, ptr.p_char, len + 1) ?
-		       -EFAULT : 0;
-	case V4L2_CTRL_TYPE_INTEGER64:
-		c->value64 = *ptr.p_s64;
-		break;
-	default:
-		c->value = *ptr.p_s32;
-		break;
-	}
-	return 0;
-}
-
-/* Helper function: copy the current control value back to the caller */
-static int cur_to_user(struct v4l2_ext_control *c,
-		       struct v4l2_ctrl *ctrl)
-{
-	return ptr_to_user(c, ctrl, ctrl->p_cur);
-}
-
-/* Helper function: copy the new control value back to the caller */
-static int new_to_user(struct v4l2_ext_control *c,
-		       struct v4l2_ctrl *ctrl)
-{
-	return ptr_to_user(c, ctrl, ctrl->p_new);
-}
-
-/* Helper function: copy the request value back to the caller */
-static int req_to_user(struct v4l2_ext_control *c,
-		       struct v4l2_ctrl_ref *ref)
-{
-	return ptr_to_user(c, ref->ctrl, ref->p_req);
-}
-
-/* Helper function: copy the initial control value back to the caller */
-static int def_to_user(struct v4l2_ext_control *c, struct v4l2_ctrl *ctrl)
-{
-	int idx;
-
-	for (idx = 0; idx < ctrl->elems; idx++)
-		ctrl->type_ops->init(ctrl, idx, ctrl->p_new);
-
-	return ptr_to_user(c, ctrl, ctrl->p_new);
-}
-
-/* Helper function: copy the caller-provider value to the given control value */
-static int user_to_ptr(struct v4l2_ext_control *c,
-		       struct v4l2_ctrl *ctrl,
-		       union v4l2_ctrl_ptr ptr)
-{
-	int ret;
-	u32 size;
-
-	ctrl->is_new = 1;
-	if (ctrl->is_ptr && !ctrl->is_string) {
-		unsigned idx;
-
-		ret = copy_from_user(ptr.p, c->ptr, c->size) ? -EFAULT : 0;
-		if (ret || !ctrl->is_array)
-			return ret;
-		for (idx = c->size / ctrl->elem_size; idx < ctrl->elems; idx++)
-			ctrl->type_ops->init(ctrl, idx, ptr);
-		return 0;
-	}
-
-	switch (ctrl->type) {
-	case V4L2_CTRL_TYPE_INTEGER64:
-		*ptr.p_s64 = c->value64;
-		break;
-	case V4L2_CTRL_TYPE_STRING:
-		size = c->size;
-		if (size == 0)
-			return -ERANGE;
-		if (size > ctrl->maximum + 1)
-			size = ctrl->maximum + 1;
-		ret = copy_from_user(ptr.p_char, c->string, size) ? -EFAULT : 0;
-		if (!ret) {
-			char last = ptr.p_char[size - 1];
-
-			ptr.p_char[size - 1] = 0;
-			/* If the string was longer than ctrl->maximum,
-			   then return an error. */
-			if (strlen(ptr.p_char) == ctrl->maximum && last)
-				return -ERANGE;
-		}
-		return ret;
-	default:
-		*ptr.p_s32 = c->value;
-		break;
-	}
-	return 0;
-}
-
-/* Helper function: copy the caller-provider value as the new control value */
-static int user_to_new(struct v4l2_ext_control *c,
-		       struct v4l2_ctrl *ctrl)
-{
-	return user_to_ptr(c, ctrl, ctrl->p_new);
-}
-
-/* Copy the one value to another. */
-static void ptr_to_ptr(struct v4l2_ctrl *ctrl,
-		       union v4l2_ctrl_ptr from, union v4l2_ctrl_ptr to)
-{
-	if (ctrl == NULL)
-		return;
-	memcpy(to.p, from.p_const, ctrl->elems * ctrl->elem_size);
-}
-
-/* Copy the new value to the current value. */
-static void new_to_cur(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 ch_flags)
-{
-	bool changed;
-
-	if (ctrl == NULL)
-		return;
-
-	/* has_changed is set by cluster_changed */
-	changed = ctrl->has_changed;
-	if (changed)
-		ptr_to_ptr(ctrl, ctrl->p_new, ctrl->p_cur);
-
-	if (ch_flags & V4L2_EVENT_CTRL_CH_FLAGS) {
-		/* Note: CH_FLAGS is only set for auto clusters. */
-		ctrl->flags &=
-			~(V4L2_CTRL_FLAG_INACTIVE | V4L2_CTRL_FLAG_VOLATILE);
-		if (!is_cur_manual(ctrl->cluster[0])) {
-			ctrl->flags |= V4L2_CTRL_FLAG_INACTIVE;
-			if (ctrl->cluster[0]->has_volatiles)
-				ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE;
-		}
-		fh = NULL;
-	}
-	if (changed || ch_flags) {
-		/* If a control was changed that was not one of the controls
-		   modified by the application, then send the event to all. */
-		if (!ctrl->is_new)
-			fh = NULL;
-		send_event(fh, ctrl,
-			(changed ? V4L2_EVENT_CTRL_CH_VALUE : 0) | ch_flags);
-		if (ctrl->call_notify && changed && ctrl->handler->notify)
-			ctrl->handler->notify(ctrl, ctrl->handler->notify_priv);
-	}
-}
-
-/* Copy the current value to the new value */
-static void cur_to_new(struct v4l2_ctrl *ctrl)
-{
-	if (ctrl == NULL)
-		return;
-	ptr_to_ptr(ctrl, ctrl->p_cur, ctrl->p_new);
-}
-
-/* Copy the new value to the request value */
-static void new_to_req(struct v4l2_ctrl_ref *ref)
-{
-	if (!ref)
-		return;
-	ptr_to_ptr(ref->ctrl, ref->ctrl->p_new, ref->p_req);
-	ref->valid_p_req = true;
-}
-
-/* Copy the current value to the request value */
-static void cur_to_req(struct v4l2_ctrl_ref *ref)
-{
-	if (!ref)
-		return;
-	ptr_to_ptr(ref->ctrl, ref->ctrl->p_cur, ref->p_req);
-	ref->valid_p_req = true;
-}
-
-/* Copy the request value to the new value */
-static void req_to_new(struct v4l2_ctrl_ref *ref)
-{
-	if (!ref)
-		return;
-	if (ref->valid_p_req)
-		ptr_to_ptr(ref->ctrl, ref->p_req, ref->ctrl->p_new);
-	else
-		ptr_to_ptr(ref->ctrl, ref->ctrl->p_cur, ref->ctrl->p_new);
-}
-
-/* Return non-zero if one or more of the controls in the cluster has a new
-   value that differs from the current value. */
-static int cluster_changed(struct v4l2_ctrl *master)
-{
-	bool changed = false;
-	unsigned idx;
-	int i;
-
-	for (i = 0; i < master->ncontrols; i++) {
-		struct v4l2_ctrl *ctrl = master->cluster[i];
-		bool ctrl_changed = false;
-
-		if (ctrl == NULL)
-			continue;
-
-		if (ctrl->flags & V4L2_CTRL_FLAG_EXECUTE_ON_WRITE)
-			changed = ctrl_changed = true;
-
-		/*
-		 * Set has_changed to false to avoid generating
-		 * the event V4L2_EVENT_CTRL_CH_VALUE
-		 */
-		if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE) {
-			ctrl->has_changed = false;
-			continue;
-		}
-
-		for (idx = 0; !ctrl_changed && idx < ctrl->elems; idx++)
-			ctrl_changed = !ctrl->type_ops->equal(ctrl, idx,
-				ctrl->p_cur, ctrl->p_new);
-		ctrl->has_changed = ctrl_changed;
-		changed |= ctrl->has_changed;
-	}
-	return changed;
-}
-
-/* Control range checking */
-static int check_range(enum v4l2_ctrl_type type,
-		s64 min, s64 max, u64 step, s64 def)
-{
-	switch (type) {
-	case V4L2_CTRL_TYPE_BOOLEAN:
-		if (step != 1 || max > 1 || min < 0)
-			return -ERANGE;
-		fallthrough;
-	case V4L2_CTRL_TYPE_U8:
-	case V4L2_CTRL_TYPE_U16:
-	case V4L2_CTRL_TYPE_U32:
-	case V4L2_CTRL_TYPE_INTEGER:
-	case V4L2_CTRL_TYPE_INTEGER64:
-		if (step == 0 || min > max || def < min || def > max)
-			return -ERANGE;
-		return 0;
-	case V4L2_CTRL_TYPE_BITMASK:
-		if (step || min || !max || (def & ~max))
-			return -ERANGE;
-		return 0;
-	case V4L2_CTRL_TYPE_MENU:
-	case V4L2_CTRL_TYPE_INTEGER_MENU:
-		if (min > max || def < min || def > max)
-			return -ERANGE;
-		/* Note: step == menu_skip_mask for menu controls.
-		   So here we check if the default value is masked out. */
-		if (step && ((1 << def) & step))
-			return -EINVAL;
-		return 0;
-	case V4L2_CTRL_TYPE_STRING:
-		if (min > max || min < 0 || step < 1 || def)
-			return -ERANGE;
-		return 0;
-	default:
-		return 0;
-	}
-}
-
-/* Validate a new control */
-static int validate_new(const struct v4l2_ctrl *ctrl, union v4l2_ctrl_ptr p_new)
-{
-	unsigned idx;
-	int err = 0;
-
-	for (idx = 0; !err && idx < ctrl->elems; idx++)
-		err = ctrl->type_ops->validate(ctrl, idx, p_new);
-	return err;
-}
-
-static inline u32 node2id(struct list_head *node)
-{
-	return list_entry(node, struct v4l2_ctrl_ref, node)->ctrl->id;
-}
-
-/* Set the handler's error code if it wasn't set earlier already */
-static inline int handler_set_err(struct v4l2_ctrl_handler *hdl, int err)
-{
-	if (hdl->error == 0)
-		hdl->error = err;
-	return err;
-}
-
-/* Initialize the handler */
-int v4l2_ctrl_handler_init_class(struct v4l2_ctrl_handler *hdl,
-				 unsigned nr_of_controls_hint,
-				 struct lock_class_key *key, const char *name)
-{
-	mutex_init(&hdl->_lock);
-	hdl->lock = &hdl->_lock;
-	lockdep_set_class_and_name(hdl->lock, key, name);
-	INIT_LIST_HEAD(&hdl->ctrls);
-	INIT_LIST_HEAD(&hdl->ctrl_refs);
-	INIT_LIST_HEAD(&hdl->requests);
-	INIT_LIST_HEAD(&hdl->requests_queued);
-	hdl->request_is_queued = false;
-	hdl->nr_of_buckets = 1 + nr_of_controls_hint / 8;
-	hdl->buckets = kvmalloc_array(hdl->nr_of_buckets,
-				      sizeof(hdl->buckets[0]),
-				      GFP_KERNEL | __GFP_ZERO);
-	hdl->error = hdl->buckets ? 0 : -ENOMEM;
-	media_request_object_init(&hdl->req_obj);
-	return hdl->error;
-}
-EXPORT_SYMBOL(v4l2_ctrl_handler_init_class);
-
-/* Free all controls and control refs */
-void v4l2_ctrl_handler_free(struct v4l2_ctrl_handler *hdl)
-{
-	struct v4l2_ctrl_ref *ref, *next_ref;
-	struct v4l2_ctrl *ctrl, *next_ctrl;
-	struct v4l2_subscribed_event *sev, *next_sev;
-
-	if (hdl == NULL || hdl->buckets == NULL)
-		return;
-
-	/*
-	 * If the main handler is freed and it is used by handler objects in
-	 * outstanding requests, then unbind and put those objects before
-	 * freeing the main handler.
-	 *
-	 * The main handler can be identified by having a NULL ops pointer in
-	 * the request object.
-	 */
-	if (!hdl->req_obj.ops && !list_empty(&hdl->requests)) {
-		struct v4l2_ctrl_handler *req, *next_req;
-
-		list_for_each_entry_safe(req, next_req, &hdl->requests, requests) {
-			media_request_object_unbind(&req->req_obj);
-			media_request_object_put(&req->req_obj);
-		}
-	}
-	mutex_lock(hdl->lock);
-	/* Free all nodes */
-	list_for_each_entry_safe(ref, next_ref, &hdl->ctrl_refs, node) {
-		list_del(&ref->node);
-		kfree(ref);
-	}
-	/* Free all controls owned by the handler */
-	list_for_each_entry_safe(ctrl, next_ctrl, &hdl->ctrls, node) {
-		list_del(&ctrl->node);
-		list_for_each_entry_safe(sev, next_sev, &ctrl->ev_subs, node)
-			list_del(&sev->node);
-		kvfree(ctrl);
-	}
-	kvfree(hdl->buckets);
-	hdl->buckets = NULL;
-	hdl->cached = NULL;
-	hdl->error = 0;
-	mutex_unlock(hdl->lock);
-	mutex_destroy(&hdl->_lock);
-}
-EXPORT_SYMBOL(v4l2_ctrl_handler_free);
-
-/* For backwards compatibility: V4L2_CID_PRIVATE_BASE should no longer
-   be used except in G_CTRL, S_CTRL, QUERYCTRL and QUERYMENU when dealing
-   with applications that do not use the NEXT_CTRL flag.
-
-   We just find the n-th private user control. It's O(N), but that should not
-   be an issue in this particular case. */
-static struct v4l2_ctrl_ref *find_private_ref(
-		struct v4l2_ctrl_handler *hdl, u32 id)
-{
-	struct v4l2_ctrl_ref *ref;
-
-	id -= V4L2_CID_PRIVATE_BASE;
-	list_for_each_entry(ref, &hdl->ctrl_refs, node) {
-		/* Search for private user controls that are compatible with
-		   VIDIOC_G/S_CTRL. */
-		if (V4L2_CTRL_ID2WHICH(ref->ctrl->id) == V4L2_CTRL_CLASS_USER &&
-		    V4L2_CTRL_DRIVER_PRIV(ref->ctrl->id)) {
-			if (!ref->ctrl->is_int)
-				continue;
-			if (id == 0)
-				return ref;
-			id--;
-		}
-	}
-	return NULL;
-}
-
-/* Find a control with the given ID. */
-static struct v4l2_ctrl_ref *find_ref(struct v4l2_ctrl_handler *hdl, u32 id)
-{
-	struct v4l2_ctrl_ref *ref;
-	int bucket;
-
-	id &= V4L2_CTRL_ID_MASK;
-
-	/* Old-style private controls need special handling */
-	if (id >= V4L2_CID_PRIVATE_BASE)
-		return find_private_ref(hdl, id);
-	bucket = id % hdl->nr_of_buckets;
-
-	/* Simple optimization: cache the last control found */
-	if (hdl->cached && hdl->cached->ctrl->id == id)
-		return hdl->cached;
-
-	/* Not in cache, search the hash */
-	ref = hdl->buckets ? hdl->buckets[bucket] : NULL;
-	while (ref && ref->ctrl->id != id)
-		ref = ref->next;
-
-	if (ref)
-		hdl->cached = ref; /* cache it! */
-	return ref;
-}
-
-/* Find a control with the given ID. Take the handler's lock first. */
-static struct v4l2_ctrl_ref *find_ref_lock(
-		struct v4l2_ctrl_handler *hdl, u32 id)
-{
-	struct v4l2_ctrl_ref *ref = NULL;
-
-	if (hdl) {
-		mutex_lock(hdl->lock);
-		ref = find_ref(hdl, id);
-		mutex_unlock(hdl->lock);
-	}
-	return ref;
-}
-
-/* Find a control with the given ID. */
-struct v4l2_ctrl *v4l2_ctrl_find(struct v4l2_ctrl_handler *hdl, u32 id)
-{
-	struct v4l2_ctrl_ref *ref = find_ref_lock(hdl, id);
-
-	return ref ? ref->ctrl : NULL;
-}
-EXPORT_SYMBOL(v4l2_ctrl_find);
-
-/* Allocate a new v4l2_ctrl_ref and hook it into the handler. */
-static int handler_new_ref(struct v4l2_ctrl_handler *hdl,
-			   struct v4l2_ctrl *ctrl,
-			   struct v4l2_ctrl_ref **ctrl_ref,
-			   bool from_other_dev, bool allocate_req)
-{
-	struct v4l2_ctrl_ref *ref;
-	struct v4l2_ctrl_ref *new_ref;
-	u32 id = ctrl->id;
-	u32 class_ctrl = V4L2_CTRL_ID2WHICH(id) | 1;
-	int bucket = id % hdl->nr_of_buckets;	/* which bucket to use */
-	unsigned int size_extra_req = 0;
-
-	if (ctrl_ref)
-		*ctrl_ref = NULL;
-
-	/*
-	 * Automatically add the control class if it is not yet present and
-	 * the new control is not a compound control.
-	 */
-	if (ctrl->type < V4L2_CTRL_COMPOUND_TYPES &&
-	    id != class_ctrl && find_ref_lock(hdl, class_ctrl) == NULL)
-		if (!v4l2_ctrl_new_std(hdl, NULL, class_ctrl, 0, 0, 0, 0))
-			return hdl->error;
-
-	if (hdl->error)
-		return hdl->error;
-
-	if (allocate_req)
-		size_extra_req = ctrl->elems * ctrl->elem_size;
-	new_ref = kzalloc(sizeof(*new_ref) + size_extra_req, GFP_KERNEL);
-	if (!new_ref)
-		return handler_set_err(hdl, -ENOMEM);
-	new_ref->ctrl = ctrl;
-	new_ref->from_other_dev = from_other_dev;
-	if (size_extra_req)
-		new_ref->p_req.p = &new_ref[1];
-
-	INIT_LIST_HEAD(&new_ref->node);
-
-	mutex_lock(hdl->lock);
-
-	/* Add immediately at the end of the list if the list is empty, or if
-	   the last element in the list has a lower ID.
-	   This ensures that when elements are added in ascending order the
-	   insertion is an O(1) operation. */
-	if (list_empty(&hdl->ctrl_refs) || id > node2id(hdl->ctrl_refs.prev)) {
-		list_add_tail(&new_ref->node, &hdl->ctrl_refs);
-		goto insert_in_hash;
-	}
-
-	/* Find insert position in sorted list */
-	list_for_each_entry(ref, &hdl->ctrl_refs, node) {
-		if (ref->ctrl->id < id)
-			continue;
-		/* Don't add duplicates */
-		if (ref->ctrl->id == id) {
-			kfree(new_ref);
-			goto unlock;
-		}
-		list_add(&new_ref->node, ref->node.prev);
-		break;
-	}
-
-insert_in_hash:
-	/* Insert the control node in the hash */
-	new_ref->next = hdl->buckets[bucket];
-	hdl->buckets[bucket] = new_ref;
-	if (ctrl_ref)
-		*ctrl_ref = new_ref;
-	if (ctrl->handler == hdl) {
-		/* By default each control starts in a cluster of its own.
-		 * new_ref->ctrl is basically a cluster array with one
-		 * element, so that's perfect to use as the cluster pointer.
-		 * But only do this for the handler that owns the control.
-		 */
-		ctrl->cluster = &new_ref->ctrl;
-		ctrl->ncontrols = 1;
-	}
-
-unlock:
-	mutex_unlock(hdl->lock);
-	return 0;
-}
-
-/* Add a new control */
-static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
-			const struct v4l2_ctrl_ops *ops,
-			const struct v4l2_ctrl_type_ops *type_ops,
-			u32 id, const char *name, enum v4l2_ctrl_type type,
-			s64 min, s64 max, u64 step, s64 def,
-			const u32 dims[V4L2_CTRL_MAX_DIMS], u32 elem_size,
-			u32 flags, const char * const *qmenu,
-			const s64 *qmenu_int, const union v4l2_ctrl_ptr p_def,
-			void *priv)
-{
-	struct v4l2_ctrl *ctrl;
-	unsigned sz_extra;
-	unsigned nr_of_dims = 0;
-	unsigned elems = 1;
-	bool is_array;
-	unsigned tot_ctrl_size;
-	unsigned idx;
-	void *data;
-	int err;
-
-	if (hdl->error)
-		return NULL;
-
-	while (dims && dims[nr_of_dims]) {
-		elems *= dims[nr_of_dims];
-		nr_of_dims++;
-		if (nr_of_dims == V4L2_CTRL_MAX_DIMS)
-			break;
-	}
-	is_array = nr_of_dims > 0;
-
-	/* Prefill elem_size for all types handled by std_type_ops */
-	switch ((u32)type) {
-	case V4L2_CTRL_TYPE_INTEGER64:
-		elem_size = sizeof(s64);
-		break;
-	case V4L2_CTRL_TYPE_STRING:
-		elem_size = max + 1;
-		break;
-	case V4L2_CTRL_TYPE_U8:
-		elem_size = sizeof(u8);
-		break;
-	case V4L2_CTRL_TYPE_U16:
-		elem_size = sizeof(u16);
-		break;
-	case V4L2_CTRL_TYPE_U32:
-		elem_size = sizeof(u32);
-		break;
-	case V4L2_CTRL_TYPE_MPEG2_SEQUENCE:
-		elem_size = sizeof(struct v4l2_ctrl_mpeg2_sequence);
-		break;
-	case V4L2_CTRL_TYPE_MPEG2_PICTURE:
-		elem_size = sizeof(struct v4l2_ctrl_mpeg2_picture);
-		break;
-	case V4L2_CTRL_TYPE_MPEG2_QUANTISATION:
-		elem_size = sizeof(struct v4l2_ctrl_mpeg2_quantisation);
-		break;
-	case V4L2_CTRL_TYPE_FWHT_PARAMS:
-		elem_size = sizeof(struct v4l2_ctrl_fwht_params);
-		break;
-	case V4L2_CTRL_TYPE_H264_SPS:
-		elem_size = sizeof(struct v4l2_ctrl_h264_sps);
-		break;
-	case V4L2_CTRL_TYPE_H264_PPS:
-		elem_size = sizeof(struct v4l2_ctrl_h264_pps);
-		break;
-	case V4L2_CTRL_TYPE_H264_SCALING_MATRIX:
-		elem_size = sizeof(struct v4l2_ctrl_h264_scaling_matrix);
-		break;
-	case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
-		elem_size = sizeof(struct v4l2_ctrl_h264_slice_params);
-		break;
-	case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
-		elem_size = sizeof(struct v4l2_ctrl_h264_decode_params);
-		break;
-	case V4L2_CTRL_TYPE_H264_PRED_WEIGHTS:
-		elem_size = sizeof(struct v4l2_ctrl_h264_pred_weights);
-		break;
-	case V4L2_CTRL_TYPE_VP8_FRAME:
-		elem_size = sizeof(struct v4l2_ctrl_vp8_frame);
-		break;
-	case V4L2_CTRL_TYPE_HEVC_SPS:
-		elem_size = sizeof(struct v4l2_ctrl_hevc_sps);
-		break;
-	case V4L2_CTRL_TYPE_HEVC_PPS:
-		elem_size = sizeof(struct v4l2_ctrl_hevc_pps);
-		break;
-	case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
-		elem_size = sizeof(struct v4l2_ctrl_hevc_slice_params);
-		break;
-	case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
-		elem_size = sizeof(struct v4l2_ctrl_hdr10_cll_info);
-		break;
-	case V4L2_CTRL_TYPE_HDR10_MASTERING_DISPLAY:
-		elem_size = sizeof(struct v4l2_ctrl_hdr10_mastering_display);
-		break;
-	case V4L2_CTRL_TYPE_AREA:
-		elem_size = sizeof(struct v4l2_area);
-		break;
-	default:
-		if (type < V4L2_CTRL_COMPOUND_TYPES)
-			elem_size = sizeof(s32);
-		break;
-	}
-	tot_ctrl_size = elem_size * elems;
-
-	/* Sanity checks */
-	if (id == 0 || name == NULL || !elem_size ||
-	    id >= V4L2_CID_PRIVATE_BASE ||
-	    (type == V4L2_CTRL_TYPE_MENU && qmenu == NULL) ||
-	    (type == V4L2_CTRL_TYPE_INTEGER_MENU && qmenu_int == NULL)) {
-		handler_set_err(hdl, -ERANGE);
-		return NULL;
-	}
-	err = check_range(type, min, max, step, def);
-	if (err) {
-		handler_set_err(hdl, err);
-		return NULL;
-	}
-	if (is_array &&
-	    (type == V4L2_CTRL_TYPE_BUTTON ||
-	     type == V4L2_CTRL_TYPE_CTRL_CLASS)) {
-		handler_set_err(hdl, -EINVAL);
-		return NULL;
-	}
-
-	sz_extra = 0;
-	if (type == V4L2_CTRL_TYPE_BUTTON)
-		flags |= V4L2_CTRL_FLAG_WRITE_ONLY |
-			V4L2_CTRL_FLAG_EXECUTE_ON_WRITE;
-	else if (type == V4L2_CTRL_TYPE_CTRL_CLASS)
-		flags |= V4L2_CTRL_FLAG_READ_ONLY;
-	else if (type == V4L2_CTRL_TYPE_INTEGER64 ||
-		 type == V4L2_CTRL_TYPE_STRING ||
-		 type >= V4L2_CTRL_COMPOUND_TYPES ||
-		 is_array)
-		sz_extra += 2 * tot_ctrl_size;
-
-	if (type >= V4L2_CTRL_COMPOUND_TYPES && p_def.p_const)
-		sz_extra += elem_size;
-
-	ctrl = kvzalloc(sizeof(*ctrl) + sz_extra, GFP_KERNEL);
-	if (ctrl == NULL) {
-		handler_set_err(hdl, -ENOMEM);
-		return NULL;
-	}
-
-	INIT_LIST_HEAD(&ctrl->node);
-	INIT_LIST_HEAD(&ctrl->ev_subs);
-	ctrl->handler = hdl;
-	ctrl->ops = ops;
-	ctrl->type_ops = type_ops ? type_ops : &std_type_ops;
-	ctrl->id = id;
-	ctrl->name = name;
-	ctrl->type = type;
-	ctrl->flags = flags;
-	ctrl->minimum = min;
-	ctrl->maximum = max;
-	ctrl->step = step;
-	ctrl->default_value = def;
-	ctrl->is_string = !is_array && type == V4L2_CTRL_TYPE_STRING;
-	ctrl->is_ptr = is_array || type >= V4L2_CTRL_COMPOUND_TYPES || ctrl->is_string;
-	ctrl->is_int = !ctrl->is_ptr && type != V4L2_CTRL_TYPE_INTEGER64;
-	ctrl->is_array = is_array;
-	ctrl->elems = elems;
-	ctrl->nr_of_dims = nr_of_dims;
-	if (nr_of_dims)
-		memcpy(ctrl->dims, dims, nr_of_dims * sizeof(dims[0]));
-	ctrl->elem_size = elem_size;
-	if (type == V4L2_CTRL_TYPE_MENU)
-		ctrl->qmenu = qmenu;
-	else if (type == V4L2_CTRL_TYPE_INTEGER_MENU)
-		ctrl->qmenu_int = qmenu_int;
-	ctrl->priv = priv;
-	ctrl->cur.val = ctrl->val = def;
-	data = &ctrl[1];
-
-	if (!ctrl->is_int) {
-		ctrl->p_new.p = data;
-		ctrl->p_cur.p = data + tot_ctrl_size;
-	} else {
-		ctrl->p_new.p = &ctrl->val;
-		ctrl->p_cur.p = &ctrl->cur.val;
-	}
-
-	if (type >= V4L2_CTRL_COMPOUND_TYPES && p_def.p_const) {
-		ctrl->p_def.p = ctrl->p_cur.p + tot_ctrl_size;
-		memcpy(ctrl->p_def.p, p_def.p_const, elem_size);
-	}
-
-	for (idx = 0; idx < elems; idx++) {
-		ctrl->type_ops->init(ctrl, idx, ctrl->p_cur);
-		ctrl->type_ops->init(ctrl, idx, ctrl->p_new);
-	}
-
-	if (handler_new_ref(hdl, ctrl, NULL, false, false)) {
-		kvfree(ctrl);
-		return NULL;
-	}
-	mutex_lock(hdl->lock);
-	list_add_tail(&ctrl->node, &hdl->ctrls);
-	mutex_unlock(hdl->lock);
-	return ctrl;
-}
-
-struct v4l2_ctrl *v4l2_ctrl_new_custom(struct v4l2_ctrl_handler *hdl,
-			const struct v4l2_ctrl_config *cfg, void *priv)
-{
-	bool is_menu;
-	struct v4l2_ctrl *ctrl;
-	const char *name = cfg->name;
-	const char * const *qmenu = cfg->qmenu;
-	const s64 *qmenu_int = cfg->qmenu_int;
-	enum v4l2_ctrl_type type = cfg->type;
-	u32 flags = cfg->flags;
-	s64 min = cfg->min;
-	s64 max = cfg->max;
-	u64 step = cfg->step;
-	s64 def = cfg->def;
-
-	if (name == NULL)
-		v4l2_ctrl_fill(cfg->id, &name, &type, &min, &max, &step,
-								&def, &flags);
-
-	is_menu = (type == V4L2_CTRL_TYPE_MENU ||
-		   type == V4L2_CTRL_TYPE_INTEGER_MENU);
-	if (is_menu)
-		WARN_ON(step);
-	else
-		WARN_ON(cfg->menu_skip_mask);
-	if (type == V4L2_CTRL_TYPE_MENU && !qmenu) {
-		qmenu = v4l2_ctrl_get_menu(cfg->id);
-	} else if (type == V4L2_CTRL_TYPE_INTEGER_MENU && !qmenu_int) {
-		handler_set_err(hdl, -EINVAL);
-		return NULL;
-	}
-
-	ctrl = v4l2_ctrl_new(hdl, cfg->ops, cfg->type_ops, cfg->id, name,
-			type, min, max,
-			is_menu ? cfg->menu_skip_mask : step, def,
-			cfg->dims, cfg->elem_size,
-			flags, qmenu, qmenu_int, cfg->p_def, priv);
-	if (ctrl)
-		ctrl->is_private = cfg->is_private;
-	return ctrl;
-}
-EXPORT_SYMBOL(v4l2_ctrl_new_custom);
-
-/* Helper function for standard non-menu controls */
-struct v4l2_ctrl *v4l2_ctrl_new_std(struct v4l2_ctrl_handler *hdl,
-			const struct v4l2_ctrl_ops *ops,
-			u32 id, s64 min, s64 max, u64 step, s64 def)
-{
-	const char *name;
-	enum v4l2_ctrl_type type;
-	u32 flags;
-
-	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
-	if (type == V4L2_CTRL_TYPE_MENU ||
-	    type == V4L2_CTRL_TYPE_INTEGER_MENU ||
-	    type >= V4L2_CTRL_COMPOUND_TYPES) {
-		handler_set_err(hdl, -EINVAL);
-		return NULL;
-	}
-	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
-			     min, max, step, def, NULL, 0,
-			     flags, NULL, NULL, ptr_null, NULL);
-}
-EXPORT_SYMBOL(v4l2_ctrl_new_std);
-
-/* Helper function for standard menu controls */
-struct v4l2_ctrl *v4l2_ctrl_new_std_menu(struct v4l2_ctrl_handler *hdl,
-			const struct v4l2_ctrl_ops *ops,
-			u32 id, u8 _max, u64 mask, u8 _def)
-{
-	const char * const *qmenu = NULL;
-	const s64 *qmenu_int = NULL;
-	unsigned int qmenu_int_len = 0;
-	const char *name;
-	enum v4l2_ctrl_type type;
-	s64 min;
-	s64 max = _max;
-	s64 def = _def;
-	u64 step;
-	u32 flags;
-
-	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
-
-	if (type == V4L2_CTRL_TYPE_MENU)
-		qmenu = v4l2_ctrl_get_menu(id);
-	else if (type == V4L2_CTRL_TYPE_INTEGER_MENU)
-		qmenu_int = v4l2_ctrl_get_int_menu(id, &qmenu_int_len);
-
-	if ((!qmenu && !qmenu_int) || (qmenu_int && max > qmenu_int_len)) {
-		handler_set_err(hdl, -EINVAL);
-		return NULL;
-	}
-	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
-			     0, max, mask, def, NULL, 0,
-			     flags, qmenu, qmenu_int, ptr_null, NULL);
-}
-EXPORT_SYMBOL(v4l2_ctrl_new_std_menu);
-
-/* Helper function for standard menu controls with driver defined menu */
-struct v4l2_ctrl *v4l2_ctrl_new_std_menu_items(struct v4l2_ctrl_handler *hdl,
-			const struct v4l2_ctrl_ops *ops, u32 id, u8 _max,
-			u64 mask, u8 _def, const char * const *qmenu)
-{
-	enum v4l2_ctrl_type type;
-	const char *name;
-	u32 flags;
-	u64 step;
-	s64 min;
-	s64 max = _max;
-	s64 def = _def;
-
-	/* v4l2_ctrl_new_std_menu_items() should only be called for
-	 * standard controls without a standard menu.
-	 */
-	if (v4l2_ctrl_get_menu(id)) {
-		handler_set_err(hdl, -EINVAL);
-		return NULL;
-	}
-
-	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
-	if (type != V4L2_CTRL_TYPE_MENU || qmenu == NULL) {
-		handler_set_err(hdl, -EINVAL);
-		return NULL;
-	}
-	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
-			     0, max, mask, def, NULL, 0,
-			     flags, qmenu, NULL, ptr_null, NULL);
-
-}
-EXPORT_SYMBOL(v4l2_ctrl_new_std_menu_items);
-
-/* Helper function for standard compound controls */
-struct v4l2_ctrl *v4l2_ctrl_new_std_compound(struct v4l2_ctrl_handler *hdl,
-				const struct v4l2_ctrl_ops *ops, u32 id,
-				const union v4l2_ctrl_ptr p_def)
-{
-	const char *name;
-	enum v4l2_ctrl_type type;
-	u32 flags;
-	s64 min, max, step, def;
-
-	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
-	if (type < V4L2_CTRL_COMPOUND_TYPES) {
-		handler_set_err(hdl, -EINVAL);
-		return NULL;
-	}
-	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
-			     min, max, step, def, NULL, 0,
-			     flags, NULL, NULL, p_def, NULL);
-}
-EXPORT_SYMBOL(v4l2_ctrl_new_std_compound);
-
-/* Helper function for standard integer menu controls */
-struct v4l2_ctrl *v4l2_ctrl_new_int_menu(struct v4l2_ctrl_handler *hdl,
-			const struct v4l2_ctrl_ops *ops,
-			u32 id, u8 _max, u8 _def, const s64 *qmenu_int)
-{
-	const char *name;
-	enum v4l2_ctrl_type type;
-	s64 min;
-	u64 step;
-	s64 max = _max;
-	s64 def = _def;
-	u32 flags;
-
-	v4l2_ctrl_fill(id, &name, &type, &min, &max, &step, &def, &flags);
-	if (type != V4L2_CTRL_TYPE_INTEGER_MENU) {
-		handler_set_err(hdl, -EINVAL);
-		return NULL;
-	}
-	return v4l2_ctrl_new(hdl, ops, NULL, id, name, type,
-			     0, max, 0, def, NULL, 0,
-			     flags, NULL, qmenu_int, ptr_null, NULL);
-}
-EXPORT_SYMBOL(v4l2_ctrl_new_int_menu);
-
-/* Add the controls from another handler to our own. */
-int v4l2_ctrl_add_handler(struct v4l2_ctrl_handler *hdl,
-			  struct v4l2_ctrl_handler *add,
-			  bool (*filter)(const struct v4l2_ctrl *ctrl),
-			  bool from_other_dev)
-{
-	struct v4l2_ctrl_ref *ref;
-	int ret = 0;
-
-	/* Do nothing if either handler is NULL or if they are the same */
-	if (!hdl || !add || hdl == add)
-		return 0;
-	if (hdl->error)
-		return hdl->error;
-	mutex_lock(add->lock);
-	list_for_each_entry(ref, &add->ctrl_refs, node) {
-		struct v4l2_ctrl *ctrl = ref->ctrl;
-
-		/* Skip handler-private controls. */
-		if (ctrl->is_private)
-			continue;
-		/* And control classes */
-		if (ctrl->type == V4L2_CTRL_TYPE_CTRL_CLASS)
-			continue;
-		/* Filter any unwanted controls */
-		if (filter && !filter(ctrl))
-			continue;
-		ret = handler_new_ref(hdl, ctrl, NULL, from_other_dev, false);
-		if (ret)
-			break;
-	}
-	mutex_unlock(add->lock);
-	return ret;
-}
-EXPORT_SYMBOL(v4l2_ctrl_add_handler);
-
-bool v4l2_ctrl_radio_filter(const struct v4l2_ctrl *ctrl)
-{
-	if (V4L2_CTRL_ID2WHICH(ctrl->id) == V4L2_CTRL_CLASS_FM_TX)
-		return true;
-	if (V4L2_CTRL_ID2WHICH(ctrl->id) == V4L2_CTRL_CLASS_FM_RX)
-		return true;
-	switch (ctrl->id) {
-	case V4L2_CID_AUDIO_MUTE:
-	case V4L2_CID_AUDIO_VOLUME:
-	case V4L2_CID_AUDIO_BALANCE:
-	case V4L2_CID_AUDIO_BASS:
-	case V4L2_CID_AUDIO_TREBLE:
-	case V4L2_CID_AUDIO_LOUDNESS:
-		return true;
-	default:
-		break;
-	}
-	return false;
-}
-EXPORT_SYMBOL(v4l2_ctrl_radio_filter);
-
-/* Cluster controls */
-void v4l2_ctrl_cluster(unsigned ncontrols, struct v4l2_ctrl **controls)
-{
-	bool has_volatiles = false;
-	int i;
-
-	/* The first control is the master control and it must not be NULL */
-	if (WARN_ON(ncontrols == 0 || controls[0] == NULL))
-		return;
-
-	for (i = 0; i < ncontrols; i++) {
-		if (controls[i]) {
-			controls[i]->cluster = controls;
-			controls[i]->ncontrols = ncontrols;
-			if (controls[i]->flags & V4L2_CTRL_FLAG_VOLATILE)
-				has_volatiles = true;
-		}
-	}
-	controls[0]->has_volatiles = has_volatiles;
-}
-EXPORT_SYMBOL(v4l2_ctrl_cluster);
-
-void v4l2_ctrl_auto_cluster(unsigned ncontrols, struct v4l2_ctrl **controls,
-			    u8 manual_val, bool set_volatile)
-{
-	struct v4l2_ctrl *master = controls[0];
-	u32 flag = 0;
-	int i;
-
-	v4l2_ctrl_cluster(ncontrols, controls);
-	WARN_ON(ncontrols <= 1);
-	WARN_ON(manual_val < master->minimum || manual_val > master->maximum);
-	WARN_ON(set_volatile && !has_op(master, g_volatile_ctrl));
-	master->is_auto = true;
-	master->has_volatiles = set_volatile;
-	master->manual_mode_value = manual_val;
-	master->flags |= V4L2_CTRL_FLAG_UPDATE;
-
-	if (!is_cur_manual(master))
-		flag = V4L2_CTRL_FLAG_INACTIVE |
-			(set_volatile ? V4L2_CTRL_FLAG_VOLATILE : 0);
-
-	for (i = 1; i < ncontrols; i++)
-		if (controls[i])
-			controls[i]->flags |= flag;
-}
-EXPORT_SYMBOL(v4l2_ctrl_auto_cluster);
-
-/* Activate/deactivate a control. */
-void v4l2_ctrl_activate(struct v4l2_ctrl *ctrl, bool active)
-{
-	/* invert since the actual flag is called 'inactive' */
-	bool inactive = !active;
-	bool old;
-
-	if (ctrl == NULL)
-		return;
-
-	if (inactive)
-		/* set V4L2_CTRL_FLAG_INACTIVE */
-		old = test_and_set_bit(4, &ctrl->flags);
-	else
-		/* clear V4L2_CTRL_FLAG_INACTIVE */
-		old = test_and_clear_bit(4, &ctrl->flags);
-	if (old != inactive)
-		send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_FLAGS);
-}
-EXPORT_SYMBOL(v4l2_ctrl_activate);
-
-void __v4l2_ctrl_grab(struct v4l2_ctrl *ctrl, bool grabbed)
-{
-	bool old;
-
-	if (ctrl == NULL)
-		return;
-
-	lockdep_assert_held(ctrl->handler->lock);
-
-	if (grabbed)
-		/* set V4L2_CTRL_FLAG_GRABBED */
-		old = test_and_set_bit(1, &ctrl->flags);
-	else
-		/* clear V4L2_CTRL_FLAG_GRABBED */
-		old = test_and_clear_bit(1, &ctrl->flags);
-	if (old != grabbed)
-		send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_FLAGS);
-}
-EXPORT_SYMBOL(__v4l2_ctrl_grab);
-
-/* Log the control name and value */
-static void log_ctrl(const struct v4l2_ctrl *ctrl,
-		     const char *prefix, const char *colon)
-{
-	if (ctrl->flags & (V4L2_CTRL_FLAG_DISABLED | V4L2_CTRL_FLAG_WRITE_ONLY))
-		return;
-	if (ctrl->type == V4L2_CTRL_TYPE_CTRL_CLASS)
-		return;
-
-	pr_info("%s%s%s: ", prefix, colon, ctrl->name);
-
-	ctrl->type_ops->log(ctrl);
-
-	if (ctrl->flags & (V4L2_CTRL_FLAG_INACTIVE |
-			   V4L2_CTRL_FLAG_GRABBED |
-			   V4L2_CTRL_FLAG_VOLATILE)) {
-		if (ctrl->flags & V4L2_CTRL_FLAG_INACTIVE)
-			pr_cont(" inactive");
-		if (ctrl->flags & V4L2_CTRL_FLAG_GRABBED)
-			pr_cont(" grabbed");
-		if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE)
-			pr_cont(" volatile");
-	}
-	pr_cont("\n");
-}
-
-/* Log all controls owned by the handler */
-void v4l2_ctrl_handler_log_status(struct v4l2_ctrl_handler *hdl,
-				  const char *prefix)
-{
-	struct v4l2_ctrl *ctrl;
-	const char *colon = "";
-	int len;
-
-	if (hdl == NULL)
-		return;
-	if (prefix == NULL)
-		prefix = "";
-	len = strlen(prefix);
-	if (len && prefix[len - 1] != ' ')
-		colon = ": ";
-	mutex_lock(hdl->lock);
-	list_for_each_entry(ctrl, &hdl->ctrls, node)
-		if (!(ctrl->flags & V4L2_CTRL_FLAG_DISABLED))
-			log_ctrl(ctrl, prefix, colon);
-	mutex_unlock(hdl->lock);
-}
-EXPORT_SYMBOL(v4l2_ctrl_handler_log_status);
-
-int v4l2_ctrl_subdev_log_status(struct v4l2_subdev *sd)
-{
-	v4l2_ctrl_handler_log_status(sd->ctrl_handler, sd->name);
-	return 0;
-}
-EXPORT_SYMBOL(v4l2_ctrl_subdev_log_status);
-
-/* Call s_ctrl for all controls owned by the handler */
-int __v4l2_ctrl_handler_setup(struct v4l2_ctrl_handler *hdl)
-{
-	struct v4l2_ctrl *ctrl;
-	int ret = 0;
-
-	if (hdl == NULL)
-		return 0;
-
-	lockdep_assert_held(hdl->lock);
-
-	list_for_each_entry(ctrl, &hdl->ctrls, node)
-		ctrl->done = false;
-
-	list_for_each_entry(ctrl, &hdl->ctrls, node) {
-		struct v4l2_ctrl *master = ctrl->cluster[0];
-		int i;
-
-		/* Skip if this control was already handled by a cluster. */
-		/* Skip button controls and read-only controls. */
-		if (ctrl->done || ctrl->type == V4L2_CTRL_TYPE_BUTTON ||
-		    (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY))
-			continue;
-
-		for (i = 0; i < master->ncontrols; i++) {
-			if (master->cluster[i]) {
-				cur_to_new(master->cluster[i]);
-				master->cluster[i]->is_new = 1;
-				master->cluster[i]->done = true;
-			}
-		}
-		ret = call_op(master, s_ctrl);
-		if (ret)
-			break;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(__v4l2_ctrl_handler_setup);
-
-int v4l2_ctrl_handler_setup(struct v4l2_ctrl_handler *hdl)
-{
-	int ret;
-
-	if (hdl == NULL)
-		return 0;
-
-	mutex_lock(hdl->lock);
-	ret = __v4l2_ctrl_handler_setup(hdl);
-	mutex_unlock(hdl->lock);
-
-	return ret;
-}
-EXPORT_SYMBOL(v4l2_ctrl_handler_setup);
-
-/* Implement VIDIOC_QUERY_EXT_CTRL */
-int v4l2_query_ext_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_query_ext_ctrl *qc)
-{
-	const unsigned next_flags = V4L2_CTRL_FLAG_NEXT_CTRL | V4L2_CTRL_FLAG_NEXT_COMPOUND;
-	u32 id = qc->id & V4L2_CTRL_ID_MASK;
-	struct v4l2_ctrl_ref *ref;
-	struct v4l2_ctrl *ctrl;
-
-	if (hdl == NULL)
-		return -EINVAL;
-
-	mutex_lock(hdl->lock);
-
-	/* Try to find it */
-	ref = find_ref(hdl, id);
-
-	if ((qc->id & next_flags) && !list_empty(&hdl->ctrl_refs)) {
-		bool is_compound;
-		/* Match any control that is not hidden */
-		unsigned mask = 1;
-		bool match = false;
-
-		if ((qc->id & next_flags) == V4L2_CTRL_FLAG_NEXT_COMPOUND) {
-			/* Match any hidden control */
-			match = true;
-		} else if ((qc->id & next_flags) == next_flags) {
-			/* Match any control, compound or not */
-			mask = 0;
-		}
-
-		/* Find the next control with ID > qc->id */
-
-		/* Did we reach the end of the control list? */
-		if (id >= node2id(hdl->ctrl_refs.prev)) {
-			ref = NULL; /* Yes, so there is no next control */
-		} else if (ref) {
-			/* We found a control with the given ID, so just get
-			   the next valid one in the list. */
-			list_for_each_entry_continue(ref, &hdl->ctrl_refs, node) {
-				is_compound = ref->ctrl->is_array ||
-					ref->ctrl->type >= V4L2_CTRL_COMPOUND_TYPES;
-				if (id < ref->ctrl->id &&
-				    (is_compound & mask) == match)
-					break;
-			}
-			if (&ref->node == &hdl->ctrl_refs)
-				ref = NULL;
-		} else {
-			/* No control with the given ID exists, so start
-			   searching for the next largest ID. We know there
-			   is one, otherwise the first 'if' above would have
-			   been true. */
-			list_for_each_entry(ref, &hdl->ctrl_refs, node) {
-				is_compound = ref->ctrl->is_array ||
-					ref->ctrl->type >= V4L2_CTRL_COMPOUND_TYPES;
-				if (id < ref->ctrl->id &&
-				    (is_compound & mask) == match)
-					break;
-			}
-			if (&ref->node == &hdl->ctrl_refs)
-				ref = NULL;
-		}
-	}
-	mutex_unlock(hdl->lock);
-
-	if (!ref)
-		return -EINVAL;
-
-	ctrl = ref->ctrl;
-	memset(qc, 0, sizeof(*qc));
-	if (id >= V4L2_CID_PRIVATE_BASE)
-		qc->id = id;
-	else
-		qc->id = ctrl->id;
-	strscpy(qc->name, ctrl->name, sizeof(qc->name));
-	qc->flags = user_flags(ctrl);
-	qc->type = ctrl->type;
-	qc->elem_size = ctrl->elem_size;
-	qc->elems = ctrl->elems;
-	qc->nr_of_dims = ctrl->nr_of_dims;
-	memcpy(qc->dims, ctrl->dims, qc->nr_of_dims * sizeof(qc->dims[0]));
-	qc->minimum = ctrl->minimum;
-	qc->maximum = ctrl->maximum;
-	qc->default_value = ctrl->default_value;
-	if (ctrl->type == V4L2_CTRL_TYPE_MENU
-	    || ctrl->type == V4L2_CTRL_TYPE_INTEGER_MENU)
-		qc->step = 1;
-	else
-		qc->step = ctrl->step;
-	return 0;
-}
-EXPORT_SYMBOL(v4l2_query_ext_ctrl);
-
-/* Implement VIDIOC_QUERYCTRL */
-int v4l2_queryctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_queryctrl *qc)
-{
-	struct v4l2_query_ext_ctrl qec = { qc->id };
-	int rc;
-
-	rc = v4l2_query_ext_ctrl(hdl, &qec);
-	if (rc)
-		return rc;
-
-	qc->id = qec.id;
-	qc->type = qec.type;
-	qc->flags = qec.flags;
-	strscpy(qc->name, qec.name, sizeof(qc->name));
-	switch (qc->type) {
-	case V4L2_CTRL_TYPE_INTEGER:
-	case V4L2_CTRL_TYPE_BOOLEAN:
-	case V4L2_CTRL_TYPE_MENU:
-	case V4L2_CTRL_TYPE_INTEGER_MENU:
-	case V4L2_CTRL_TYPE_STRING:
-	case V4L2_CTRL_TYPE_BITMASK:
-		qc->minimum = qec.minimum;
-		qc->maximum = qec.maximum;
-		qc->step = qec.step;
-		qc->default_value = qec.default_value;
-		break;
-	default:
-		qc->minimum = 0;
-		qc->maximum = 0;
-		qc->step = 0;
-		qc->default_value = 0;
-		break;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(v4l2_queryctrl);
-
-/* Implement VIDIOC_QUERYMENU */
-int v4l2_querymenu(struct v4l2_ctrl_handler *hdl, struct v4l2_querymenu *qm)
-{
-	struct v4l2_ctrl *ctrl;
-	u32 i = qm->index;
-
-	ctrl = v4l2_ctrl_find(hdl, qm->id);
-	if (!ctrl)
-		return -EINVAL;
-
-	qm->reserved = 0;
-	/* Sanity checks */
-	switch (ctrl->type) {
-	case V4L2_CTRL_TYPE_MENU:
-		if (ctrl->qmenu == NULL)
-			return -EINVAL;
-		break;
-	case V4L2_CTRL_TYPE_INTEGER_MENU:
-		if (ctrl->qmenu_int == NULL)
-			return -EINVAL;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	if (i < ctrl->minimum || i > ctrl->maximum)
-		return -EINVAL;
-
-	/* Use mask to see if this menu item should be skipped */
-	if (ctrl->menu_skip_mask & (1ULL << i))
-		return -EINVAL;
-	/* Empty menu items should also be skipped */
-	if (ctrl->type == V4L2_CTRL_TYPE_MENU) {
-		if (ctrl->qmenu[i] == NULL || ctrl->qmenu[i][0] == '\0')
-			return -EINVAL;
-		strscpy(qm->name, ctrl->qmenu[i], sizeof(qm->name));
-	} else {
-		qm->value = ctrl->qmenu_int[i];
-	}
-	return 0;
-}
-EXPORT_SYMBOL(v4l2_querymenu);
-
-static int v4l2_ctrl_request_clone(struct v4l2_ctrl_handler *hdl,
-				   const struct v4l2_ctrl_handler *from)
-{
-	struct v4l2_ctrl_ref *ref;
-	int err = 0;
-
-	if (WARN_ON(!hdl || hdl == from))
-		return -EINVAL;
-
-	if (hdl->error)
-		return hdl->error;
-
-	WARN_ON(hdl->lock != &hdl->_lock);
-
-	mutex_lock(from->lock);
-	list_for_each_entry(ref, &from->ctrl_refs, node) {
-		struct v4l2_ctrl *ctrl = ref->ctrl;
-		struct v4l2_ctrl_ref *new_ref;
-
-		/* Skip refs inherited from other devices */
-		if (ref->from_other_dev)
-			continue;
-		err = handler_new_ref(hdl, ctrl, &new_ref, false, true);
-		if (err)
-			break;
-	}
-	mutex_unlock(from->lock);
-	return err;
-}
-
-static void v4l2_ctrl_request_queue(struct media_request_object *obj)
-{
-	struct v4l2_ctrl_handler *hdl =
-		container_of(obj, struct v4l2_ctrl_handler, req_obj);
-	struct v4l2_ctrl_handler *main_hdl = obj->priv;
-
-	mutex_lock(main_hdl->lock);
-	list_add_tail(&hdl->requests_queued, &main_hdl->requests_queued);
-	hdl->request_is_queued = true;
-	mutex_unlock(main_hdl->lock);
-}
-
-static void v4l2_ctrl_request_unbind(struct media_request_object *obj)
-{
-	struct v4l2_ctrl_handler *hdl =
-		container_of(obj, struct v4l2_ctrl_handler, req_obj);
-	struct v4l2_ctrl_handler *main_hdl = obj->priv;
-
-	mutex_lock(main_hdl->lock);
-	list_del_init(&hdl->requests);
-	if (hdl->request_is_queued) {
-		list_del_init(&hdl->requests_queued);
-		hdl->request_is_queued = false;
-	}
-	mutex_unlock(main_hdl->lock);
-}
-
-static void v4l2_ctrl_request_release(struct media_request_object *obj)
-{
-	struct v4l2_ctrl_handler *hdl =
-		container_of(obj, struct v4l2_ctrl_handler, req_obj);
-
-	v4l2_ctrl_handler_free(hdl);
-	kfree(hdl);
-}
-
-static const struct media_request_object_ops req_ops = {
-	.queue = v4l2_ctrl_request_queue,
-	.unbind = v4l2_ctrl_request_unbind,
-	.release = v4l2_ctrl_request_release,
-};
-
-struct v4l2_ctrl_handler *v4l2_ctrl_request_hdl_find(struct media_request *req,
-					struct v4l2_ctrl_handler *parent)
-{
-	struct media_request_object *obj;
-
-	if (WARN_ON(req->state != MEDIA_REQUEST_STATE_VALIDATING &&
-		    req->state != MEDIA_REQUEST_STATE_QUEUED))
-		return NULL;
-
-	obj = media_request_object_find(req, &req_ops, parent);
-	if (obj)
-		return container_of(obj, struct v4l2_ctrl_handler, req_obj);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(v4l2_ctrl_request_hdl_find);
-
-struct v4l2_ctrl *
-v4l2_ctrl_request_hdl_ctrl_find(struct v4l2_ctrl_handler *hdl, u32 id)
-{
-	struct v4l2_ctrl_ref *ref = find_ref_lock(hdl, id);
-
-	return (ref && ref->valid_p_req) ? ref->ctrl : NULL;
-}
-EXPORT_SYMBOL_GPL(v4l2_ctrl_request_hdl_ctrl_find);
-
-static int v4l2_ctrl_request_bind(struct media_request *req,
-			   struct v4l2_ctrl_handler *hdl,
-			   struct v4l2_ctrl_handler *from)
-{
-	int ret;
-
-	ret = v4l2_ctrl_request_clone(hdl, from);
-
-	if (!ret) {
-		ret = media_request_object_bind(req, &req_ops,
-						from, false, &hdl->req_obj);
-		if (!ret) {
-			mutex_lock(from->lock);
-			list_add_tail(&hdl->requests, &from->requests);
-			mutex_unlock(from->lock);
-		}
-	}
-	return ret;
-}
-
-/* Some general notes on the atomic requirements of VIDIOC_G/TRY/S_EXT_CTRLS:
-
-   It is not a fully atomic operation, just best-effort only. After all, if
-   multiple controls have to be set through multiple i2c writes (for example)
-   then some initial writes may succeed while others fail. Thus leaving the
-   system in an inconsistent state. The question is how much effort you are
-   willing to spend on trying to make something atomic that really isn't.
-
-   From the point of view of an application the main requirement is that
-   when you call VIDIOC_S_EXT_CTRLS and some values are invalid then an
-   error should be returned without actually affecting any controls.
-
-   If all the values are correct, then it is acceptable to just give up
-   in case of low-level errors.
-
-   It is important though that the application can tell when only a partial
-   configuration was done. The way we do that is through the error_idx field
-   of struct v4l2_ext_controls: if that is equal to the count field then no
-   controls were affected. Otherwise all controls before that index were
-   successful in performing their 'get' or 'set' operation, the control at
-   the given index failed, and you don't know what happened with the controls
-   after the failed one. Since if they were part of a control cluster they
-   could have been successfully processed (if a cluster member was encountered
-   at index < error_idx), they could have failed (if a cluster member was at
-   error_idx), or they may not have been processed yet (if the first cluster
-   member appeared after error_idx).
-
-   It is all fairly theoretical, though. In practice all you can do is to
-   bail out. If error_idx == count, then it is an application bug. If
-   error_idx < count then it is only an application bug if the error code was
-   EBUSY. That usually means that something started streaming just when you
-   tried to set the controls. In all other cases it is a driver/hardware
-   problem and all you can do is to retry or bail out.
-
-   Note that these rules do not apply to VIDIOC_TRY_EXT_CTRLS: since that
-   never modifies controls the error_idx is just set to whatever control
-   has an invalid value.
- */
-
-/* Prepare for the extended g/s/try functions.
-   Find the controls in the control array and do some basic checks. */
-static int prepare_ext_ctrls(struct v4l2_ctrl_handler *hdl,
-			     struct v4l2_ext_controls *cs,
-			     struct v4l2_ctrl_helper *helpers,
-			     struct video_device *vdev,
-			     bool get)
-{
-	struct v4l2_ctrl_helper *h;
-	bool have_clusters = false;
-	u32 i;
-
-	for (i = 0, h = helpers; i < cs->count; i++, h++) {
-		struct v4l2_ext_control *c = &cs->controls[i];
-		struct v4l2_ctrl_ref *ref;
-		struct v4l2_ctrl *ctrl;
-		u32 id = c->id & V4L2_CTRL_ID_MASK;
-
-		cs->error_idx = i;
-
-		if (cs->which &&
-		    cs->which != V4L2_CTRL_WHICH_DEF_VAL &&
-		    cs->which != V4L2_CTRL_WHICH_REQUEST_VAL &&
-		    V4L2_CTRL_ID2WHICH(id) != cs->which) {
-			dprintk(vdev,
-				"invalid which 0x%x or control id 0x%x\n",
-				cs->which, id);
-			return -EINVAL;
-		}
-
-		/* Old-style private controls are not allowed for
-		   extended controls */
-		if (id >= V4L2_CID_PRIVATE_BASE) {
-			dprintk(vdev,
-				"old-style private controls not allowed\n");
-			return -EINVAL;
-		}
-		ref = find_ref_lock(hdl, id);
-		if (ref == NULL) {
-			dprintk(vdev, "cannot find control id 0x%x\n", id);
-			return -EINVAL;
-		}
-		h->ref = ref;
-		ctrl = ref->ctrl;
-		if (ctrl->flags & V4L2_CTRL_FLAG_DISABLED) {
-			dprintk(vdev, "control id 0x%x is disabled\n", id);
-			return -EINVAL;
-		}
-
-		if (ctrl->cluster[0]->ncontrols > 1)
-			have_clusters = true;
-		if (ctrl->cluster[0] != ctrl)
-			ref = find_ref_lock(hdl, ctrl->cluster[0]->id);
-		if (ctrl->is_ptr && !ctrl->is_string) {
-			unsigned tot_size = ctrl->elems * ctrl->elem_size;
-
-			if (c->size < tot_size) {
-				/*
-				 * In the get case the application first
-				 * queries to obtain the size of the control.
-				 */
-				if (get) {
-					c->size = tot_size;
-					return -ENOSPC;
-				}
-				dprintk(vdev,
-					"pointer control id 0x%x size too small, %d bytes but %d bytes needed\n",
-					id, c->size, tot_size);
-				return -EFAULT;
-			}
-			c->size = tot_size;
-		}
-		/* Store the ref to the master control of the cluster */
-		h->mref = ref;
-		/* Initially set next to 0, meaning that there is no other
-		   control in this helper array belonging to the same
-		   cluster */
-		h->next = 0;
-	}
-
-	/* We are done if there were no controls that belong to a multi-
-	   control cluster. */
-	if (!have_clusters)
-		return 0;
-
-	/* The code below figures out in O(n) time which controls in the list
-	   belong to the same cluster. */
-
-	/* This has to be done with the handler lock taken. */
-	mutex_lock(hdl->lock);
-
-	/* First zero the helper field in the master control references */
-	for (i = 0; i < cs->count; i++)
-		helpers[i].mref->helper = NULL;
-	for (i = 0, h = helpers; i < cs->count; i++, h++) {
-		struct v4l2_ctrl_ref *mref = h->mref;
-
-		/* If the mref->helper is set, then it points to an earlier
-		   helper that belongs to the same cluster. */
-		if (mref->helper) {
-			/* Set the next field of mref->helper to the current
-			   index: this means that that earlier helper now
-			   points to the next helper in the same cluster. */
-			mref->helper->next = i;
-			/* mref should be set only for the first helper in the
-			   cluster, clear the others. */
-			h->mref = NULL;
-		}
-		/* Point the mref helper to the current helper struct. */
-		mref->helper = h;
-	}
-	mutex_unlock(hdl->lock);
-	return 0;
-}
-
-/* Handles the corner case where cs->count == 0. It checks whether the
-   specified control class exists. If that class ID is 0, then it checks
-   whether there are any controls at all. */
-static int class_check(struct v4l2_ctrl_handler *hdl, u32 which)
-{
-	if (which == 0 || which == V4L2_CTRL_WHICH_DEF_VAL ||
-	    which == V4L2_CTRL_WHICH_REQUEST_VAL)
-		return 0;
-	return find_ref_lock(hdl, which | 1) ? 0 : -EINVAL;
-}
-
-/*
- * Get extended controls. Allocates the helpers array if needed.
- *
- * Note that v4l2_g_ext_ctrls_common() with 'which' set to
- * V4L2_CTRL_WHICH_REQUEST_VAL is only called if the request was
- * completed, and in that case valid_p_req is true for all controls.
- */
-static int v4l2_g_ext_ctrls_common(struct v4l2_ctrl_handler *hdl,
-				   struct v4l2_ext_controls *cs,
-				   struct video_device *vdev)
-{
-	struct v4l2_ctrl_helper helper[4];
-	struct v4l2_ctrl_helper *helpers = helper;
-	int ret;
-	int i, j;
-	bool is_default, is_request;
-
-	is_default = (cs->which == V4L2_CTRL_WHICH_DEF_VAL);
-	is_request = (cs->which == V4L2_CTRL_WHICH_REQUEST_VAL);
-
-	cs->error_idx = cs->count;
-	cs->which = V4L2_CTRL_ID2WHICH(cs->which);
-
-	if (hdl == NULL)
-		return -EINVAL;
-
-	if (cs->count == 0)
-		return class_check(hdl, cs->which);
-
-	if (cs->count > ARRAY_SIZE(helper)) {
-		helpers = kvmalloc_array(cs->count, sizeof(helper[0]),
-					 GFP_KERNEL);
-		if (helpers == NULL)
-			return -ENOMEM;
-	}
-
-	ret = prepare_ext_ctrls(hdl, cs, helpers, vdev, true);
-	cs->error_idx = cs->count;
-
-	for (i = 0; !ret && i < cs->count; i++)
-		if (helpers[i].ref->ctrl->flags & V4L2_CTRL_FLAG_WRITE_ONLY)
-			ret = -EACCES;
-
-	for (i = 0; !ret && i < cs->count; i++) {
-		struct v4l2_ctrl *master;
-		bool is_volatile = false;
-		u32 idx = i;
-
-		if (helpers[i].mref == NULL)
-			continue;
-
-		master = helpers[i].mref->ctrl;
-		cs->error_idx = i;
-
-		v4l2_ctrl_lock(master);
-
-		/*
-		 * g_volatile_ctrl will update the new control values.
-		 * This makes no sense for V4L2_CTRL_WHICH_DEF_VAL and
-		 * V4L2_CTRL_WHICH_REQUEST_VAL. In the case of requests
-		 * it is v4l2_ctrl_request_complete() that copies the
-		 * volatile controls at the time of request completion
-		 * to the request, so you don't want to do that again.
-		 */
-		if (!is_default && !is_request &&
-		    ((master->flags & V4L2_CTRL_FLAG_VOLATILE) ||
-		    (master->has_volatiles && !is_cur_manual(master)))) {
-			for (j = 0; j < master->ncontrols; j++)
-				cur_to_new(master->cluster[j]);
-			ret = call_op(master, g_volatile_ctrl);
-			is_volatile = true;
-		}
-
-		if (ret) {
-			v4l2_ctrl_unlock(master);
-			break;
-		}
-
-		/*
-		 * Copy the default value (if is_default is true), the
-		 * request value (if is_request is true and p_req is valid),
-		 * the new volatile value (if is_volatile is true) or the
-		 * current value.
-		 */
-		do {
-			struct v4l2_ctrl_ref *ref = helpers[idx].ref;
-
-			if (is_default)
-				ret = def_to_user(cs->controls + idx, ref->ctrl);
-			else if (is_request && ref->valid_p_req)
-				ret = req_to_user(cs->controls + idx, ref);
-			else if (is_volatile)
-				ret = new_to_user(cs->controls + idx, ref->ctrl);
-			else
-				ret = cur_to_user(cs->controls + idx, ref->ctrl);
-			idx = helpers[idx].next;
-		} while (!ret && idx);
-
-		v4l2_ctrl_unlock(master);
-	}
-
-	if (cs->count > ARRAY_SIZE(helper))
-		kvfree(helpers);
-	return ret;
-}
-
-static struct media_request_object *
-v4l2_ctrls_find_req_obj(struct v4l2_ctrl_handler *hdl,
-			struct media_request *req, bool set)
-{
-	struct media_request_object *obj;
-	struct v4l2_ctrl_handler *new_hdl;
-	int ret;
-
-	if (IS_ERR(req))
-		return ERR_CAST(req);
-
-	if (set && WARN_ON(req->state != MEDIA_REQUEST_STATE_UPDATING))
-		return ERR_PTR(-EBUSY);
-
-	obj = media_request_object_find(req, &req_ops, hdl);
-	if (obj)
-		return obj;
-	/*
-	 * If there are no controls in this completed request,
-	 * then that can only happen if:
-	 *
-	 * 1) no controls were present in the queued request, and
-	 * 2) v4l2_ctrl_request_complete() could not allocate a
-	 *    control handler object to store the completed state in.
-	 *
-	 * So return ENOMEM to indicate that there was an out-of-memory
-	 * error.
-	 */
-	if (!set)
-		return ERR_PTR(-ENOMEM);
-
-	new_hdl = kzalloc(sizeof(*new_hdl), GFP_KERNEL);
-	if (!new_hdl)
-		return ERR_PTR(-ENOMEM);
-
-	obj = &new_hdl->req_obj;
-	ret = v4l2_ctrl_handler_init(new_hdl, (hdl->nr_of_buckets - 1) * 8);
-	if (!ret)
-		ret = v4l2_ctrl_request_bind(req, new_hdl, hdl);
-	if (ret) {
-		v4l2_ctrl_handler_free(new_hdl);
-		kfree(new_hdl);
-		return ERR_PTR(ret);
-	}
-
-	media_request_object_get(obj);
-	return obj;
-}
-
-int v4l2_g_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct video_device *vdev,
-		     struct media_device *mdev, struct v4l2_ext_controls *cs)
-{
-	struct media_request_object *obj = NULL;
-	struct media_request *req = NULL;
-	int ret;
-
-	if (cs->which == V4L2_CTRL_WHICH_REQUEST_VAL) {
-		if (!mdev || cs->request_fd < 0)
-			return -EINVAL;
-
-		req = media_request_get_by_fd(mdev, cs->request_fd);
-		if (IS_ERR(req))
-			return PTR_ERR(req);
-
-		if (req->state != MEDIA_REQUEST_STATE_COMPLETE) {
-			media_request_put(req);
-			return -EACCES;
-		}
-
-		ret = media_request_lock_for_access(req);
-		if (ret) {
-			media_request_put(req);
-			return ret;
-		}
-
-		obj = v4l2_ctrls_find_req_obj(hdl, req, false);
-		if (IS_ERR(obj)) {
-			media_request_unlock_for_access(req);
-			media_request_put(req);
-			return PTR_ERR(obj);
-		}
-
-		hdl = container_of(obj, struct v4l2_ctrl_handler,
-				   req_obj);
-	}
-
-	ret = v4l2_g_ext_ctrls_common(hdl, cs, vdev);
-
-	if (obj) {
-		media_request_unlock_for_access(req);
-		media_request_object_put(obj);
-		media_request_put(req);
-	}
-	return ret;
-}
-EXPORT_SYMBOL(v4l2_g_ext_ctrls);
-
-/* Helper function to get a single control */
-static int get_ctrl(struct v4l2_ctrl *ctrl, struct v4l2_ext_control *c)
-{
-	struct v4l2_ctrl *master = ctrl->cluster[0];
-	int ret = 0;
-	int i;
-
-	/* Compound controls are not supported. The new_to_user() and
-	 * cur_to_user() calls below would need to be modified not to access
-	 * userspace memory when called from get_ctrl().
-	 */
-	if (!ctrl->is_int && ctrl->type != V4L2_CTRL_TYPE_INTEGER64)
-		return -EINVAL;
-
-	if (ctrl->flags & V4L2_CTRL_FLAG_WRITE_ONLY)
-		return -EACCES;
-
-	v4l2_ctrl_lock(master);
-	/* g_volatile_ctrl will update the current control values */
-	if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE) {
-		for (i = 0; i < master->ncontrols; i++)
-			cur_to_new(master->cluster[i]);
-		ret = call_op(master, g_volatile_ctrl);
-		new_to_user(c, ctrl);
-	} else {
-		cur_to_user(c, ctrl);
-	}
-	v4l2_ctrl_unlock(master);
-	return ret;
-}
-
-int v4l2_g_ctrl(struct v4l2_ctrl_handler *hdl, struct v4l2_control *control)
-{
-	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, control->id);
-	struct v4l2_ext_control c;
-	int ret;
-
-	if (ctrl == NULL || !ctrl->is_int)
-		return -EINVAL;
-	ret = get_ctrl(ctrl, &c);
-	control->value = c.value;
-	return ret;
-}
-EXPORT_SYMBOL(v4l2_g_ctrl);
-
-s32 v4l2_ctrl_g_ctrl(struct v4l2_ctrl *ctrl)
-{
-	struct v4l2_ext_control c;
-
-	/* It's a driver bug if this happens. */
-	if (WARN_ON(!ctrl->is_int))
-		return 0;
-	c.value = 0;
-	get_ctrl(ctrl, &c);
-	return c.value;
-}
-EXPORT_SYMBOL(v4l2_ctrl_g_ctrl);
-
-s64 v4l2_ctrl_g_ctrl_int64(struct v4l2_ctrl *ctrl)
-{
-	struct v4l2_ext_control c;
-
-	/* It's a driver bug if this happens. */
-	if (WARN_ON(ctrl->is_ptr || ctrl->type != V4L2_CTRL_TYPE_INTEGER64))
-		return 0;
-	c.value64 = 0;
-	get_ctrl(ctrl, &c);
-	return c.value64;
-}
-EXPORT_SYMBOL(v4l2_ctrl_g_ctrl_int64);
-
-
-/* Core function that calls try/s_ctrl and ensures that the new value is
-   copied to the current value on a set.
-   Must be called with ctrl->handler->lock held. */
-static int try_or_set_cluster(struct v4l2_fh *fh, struct v4l2_ctrl *master,
-			      bool set, u32 ch_flags)
-{
-	bool update_flag;
-	int ret;
-	int i;
-
-	/* Go through the cluster and either validate the new value or
-	   (if no new value was set), copy the current value to the new
-	   value, ensuring a consistent view for the control ops when
-	   called. */
-	for (i = 0; i < master->ncontrols; i++) {
-		struct v4l2_ctrl *ctrl = master->cluster[i];
-
-		if (ctrl == NULL)
-			continue;
-
-		if (!ctrl->is_new) {
-			cur_to_new(ctrl);
-			continue;
-		}
-		/* Check again: it may have changed since the
-		   previous check in try_or_set_ext_ctrls(). */
-		if (set && (ctrl->flags & V4L2_CTRL_FLAG_GRABBED))
-			return -EBUSY;
-	}
-
-	ret = call_op(master, try_ctrl);
-
-	/* Don't set if there is no change */
-	if (ret || !set || !cluster_changed(master))
-		return ret;
-	ret = call_op(master, s_ctrl);
-	if (ret)
-		return ret;
-
-	/* If OK, then make the new values permanent. */
-	update_flag = is_cur_manual(master) != is_new_manual(master);
-
-	for (i = 0; i < master->ncontrols; i++) {
-		/*
-		 * If we switch from auto to manual mode, and this cluster
-		 * contains volatile controls, then all non-master controls
-		 * have to be marked as changed. The 'new' value contains
-		 * the volatile value (obtained by update_from_auto_cluster),
-		 * which now has to become the current value.
-		 */
-		if (i && update_flag && is_new_manual(master) &&
-		    master->has_volatiles && master->cluster[i])
-			master->cluster[i]->has_changed = true;
-
-		new_to_cur(fh, master->cluster[i], ch_flags |
-			((update_flag && i > 0) ? V4L2_EVENT_CTRL_CH_FLAGS : 0));
-	}
-	return 0;
-}
-
-/* Validate controls. */
-static int validate_ctrls(struct v4l2_ext_controls *cs,
-			  struct v4l2_ctrl_helper *helpers,
-			  struct video_device *vdev,
-			  bool set)
-{
-	unsigned i;
-	int ret = 0;
-
-	cs->error_idx = cs->count;
-	for (i = 0; i < cs->count; i++) {
-		struct v4l2_ctrl *ctrl = helpers[i].ref->ctrl;
-		union v4l2_ctrl_ptr p_new;
-
-		cs->error_idx = i;
-
-		if (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY) {
-			dprintk(vdev,
-				"control id 0x%x is read-only\n",
-				ctrl->id);
-			return -EACCES;
-		}
-		/* This test is also done in try_set_control_cluster() which
-		   is called in atomic context, so that has the final say,
-		   but it makes sense to do an up-front check as well. Once
-		   an error occurs in try_set_control_cluster() some other
-		   controls may have been set already and we want to do a
-		   best-effort to avoid that. */
-		if (set && (ctrl->flags & V4L2_CTRL_FLAG_GRABBED)) {
-			dprintk(vdev,
-				"control id 0x%x is grabbed, cannot set\n",
-				ctrl->id);
-			return -EBUSY;
-		}
-		/*
-		 * Skip validation for now if the payload needs to be copied
-		 * from userspace into kernelspace. We'll validate those later.
-		 */
-		if (ctrl->is_ptr)
-			continue;
-		if (ctrl->type == V4L2_CTRL_TYPE_INTEGER64)
-			p_new.p_s64 = &cs->controls[i].value64;
-		else
-			p_new.p_s32 = &cs->controls[i].value;
-		ret = validate_new(ctrl, p_new);
-		if (ret)
-			return ret;
-	}
-	return 0;
-}
-
-/* Obtain the current volatile values of an autocluster and mark them
-   as new. */
-static void update_from_auto_cluster(struct v4l2_ctrl *master)
-{
-	int i;
-
-	for (i = 1; i < master->ncontrols; i++)
-		cur_to_new(master->cluster[i]);
-	if (!call_op(master, g_volatile_ctrl))
-		for (i = 1; i < master->ncontrols; i++)
-			if (master->cluster[i])
-				master->cluster[i]->is_new = 1;
-}
-
-/* Try or try-and-set controls */
-static int try_set_ext_ctrls_common(struct v4l2_fh *fh,
-				    struct v4l2_ctrl_handler *hdl,
-				    struct v4l2_ext_controls *cs,
-				    struct video_device *vdev, bool set)
-{
-	struct v4l2_ctrl_helper helper[4];
-	struct v4l2_ctrl_helper *helpers = helper;
-	unsigned i, j;
-	int ret;
-
-	cs->error_idx = cs->count;
-
-	/* Default value cannot be changed */
-	if (cs->which == V4L2_CTRL_WHICH_DEF_VAL) {
-		dprintk(vdev, "%s: cannot change default value\n",
-			video_device_node_name(vdev));
-		return -EINVAL;
-	}
-
-	cs->which = V4L2_CTRL_ID2WHICH(cs->which);
-
-	if (hdl == NULL) {
-		dprintk(vdev, "%s: invalid null control handler\n",
-			video_device_node_name(vdev));
-		return -EINVAL;
-	}
-
-	if (cs->count == 0)
-		return class_check(hdl, cs->which);
-
-	if (cs->count > ARRAY_SIZE(helper)) {
-		helpers = kvmalloc_array(cs->count, sizeof(helper[0]),
-					 GFP_KERNEL);
-		if (!helpers)
-			return -ENOMEM;
-	}
-	ret = prepare_ext_ctrls(hdl, cs, helpers, vdev, false);
-	if (!ret)
-		ret = validate_ctrls(cs, helpers, vdev, set);
-	if (ret && set)
-		cs->error_idx = cs->count;
-	for (i = 0; !ret && i < cs->count; i++) {
-		struct v4l2_ctrl *master;
-		u32 idx = i;
-
-		if (helpers[i].mref == NULL)
-			continue;
-
-		cs->error_idx = i;
-		master = helpers[i].mref->ctrl;
-		v4l2_ctrl_lock(master);
-
-		/* Reset the 'is_new' flags of the cluster */
-		for (j = 0; j < master->ncontrols; j++)
-			if (master->cluster[j])
-				master->cluster[j]->is_new = 0;
-
-		/* For volatile autoclusters that are currently in auto mode
-		   we need to discover if it will be set to manual mode.
-		   If so, then we have to copy the current volatile values
-		   first since those will become the new manual values (which
-		   may be overwritten by explicit new values from this set
-		   of controls). */
-		if (master->is_auto && master->has_volatiles &&
-						!is_cur_manual(master)) {
-			/* Pick an initial non-manual value */
-			s32 new_auto_val = master->manual_mode_value + 1;
-			u32 tmp_idx = idx;
-
-			do {
-				/* Check if the auto control is part of the
-				   list, and remember the new value. */
-				if (helpers[tmp_idx].ref->ctrl == master)
-					new_auto_val = cs->controls[tmp_idx].value;
-				tmp_idx = helpers[tmp_idx].next;
-			} while (tmp_idx);
-			/* If the new value == the manual value, then copy
-			   the current volatile values. */
-			if (new_auto_val == master->manual_mode_value)
-				update_from_auto_cluster(master);
-		}
-
-		/* Copy the new caller-supplied control values.
-		   user_to_new() sets 'is_new' to 1. */
-		do {
-			struct v4l2_ctrl *ctrl = helpers[idx].ref->ctrl;
-
-			ret = user_to_new(cs->controls + idx, ctrl);
-			if (!ret && ctrl->is_ptr) {
-				ret = validate_new(ctrl, ctrl->p_new);
-				if (ret)
-					dprintk(vdev,
-						"failed to validate control %s (%d)\n",
-						v4l2_ctrl_get_name(ctrl->id), ret);
-			}
-			idx = helpers[idx].next;
-		} while (!ret && idx);
-
-		if (!ret)
-			ret = try_or_set_cluster(fh, master,
-						 !hdl->req_obj.req && set, 0);
-		if (!ret && hdl->req_obj.req && set) {
-			for (j = 0; j < master->ncontrols; j++) {
-				struct v4l2_ctrl_ref *ref =
-					find_ref(hdl, master->cluster[j]->id);
-
-				new_to_req(ref);
-			}
-		}
-
-		/* Copy the new values back to userspace. */
-		if (!ret) {
-			idx = i;
-			do {
-				ret = new_to_user(cs->controls + idx,
-						helpers[idx].ref->ctrl);
-				idx = helpers[idx].next;
-			} while (!ret && idx);
-		}
-		v4l2_ctrl_unlock(master);
-	}
-
-	if (cs->count > ARRAY_SIZE(helper))
-		kvfree(helpers);
-	return ret;
-}
-
-static int try_set_ext_ctrls(struct v4l2_fh *fh,
-			     struct v4l2_ctrl_handler *hdl,
-			     struct video_device *vdev,
-			     struct media_device *mdev,
-			     struct v4l2_ext_controls *cs, bool set)
-{
-	struct media_request_object *obj = NULL;
-	struct media_request *req = NULL;
-	int ret;
-
-	if (cs->which == V4L2_CTRL_WHICH_REQUEST_VAL) {
-		if (!mdev) {
-			dprintk(vdev, "%s: missing media device\n",
-				video_device_node_name(vdev));
-			return -EINVAL;
-		}
-
-		if (cs->request_fd < 0) {
-			dprintk(vdev, "%s: invalid request fd %d\n",
-				video_device_node_name(vdev), cs->request_fd);
-			return -EINVAL;
-		}
-
-		req = media_request_get_by_fd(mdev, cs->request_fd);
-		if (IS_ERR(req)) {
-			dprintk(vdev, "%s: cannot find request fd %d\n",
-				video_device_node_name(vdev), cs->request_fd);
-			return PTR_ERR(req);
-		}
-
-		ret = media_request_lock_for_update(req);
-		if (ret) {
-			dprintk(vdev, "%s: cannot lock request fd %d\n",
-				video_device_node_name(vdev), cs->request_fd);
-			media_request_put(req);
-			return ret;
-		}
-
-		obj = v4l2_ctrls_find_req_obj(hdl, req, set);
-		if (IS_ERR(obj)) {
-			dprintk(vdev,
-				"%s: cannot find request object for request fd %d\n",
-				video_device_node_name(vdev),
-				cs->request_fd);
-			media_request_unlock_for_update(req);
-			media_request_put(req);
-			return PTR_ERR(obj);
-		}
-		hdl = container_of(obj, struct v4l2_ctrl_handler,
-				   req_obj);
-	}
-
-	ret = try_set_ext_ctrls_common(fh, hdl, cs, vdev, set);
-	if (ret)
-		dprintk(vdev,
-			"%s: try_set_ext_ctrls_common failed (%d)\n",
-			video_device_node_name(vdev), ret);
-
-	if (obj) {
-		media_request_unlock_for_update(req);
-		media_request_object_put(obj);
-		media_request_put(req);
-	}
-
-	return ret;
-}
-
-int v4l2_try_ext_ctrls(struct v4l2_ctrl_handler *hdl,
-		       struct video_device *vdev,
-		       struct media_device *mdev,
-		       struct v4l2_ext_controls *cs)
-{
-	return try_set_ext_ctrls(NULL, hdl, vdev, mdev, cs, false);
-}
-EXPORT_SYMBOL(v4l2_try_ext_ctrls);
-
-int v4l2_s_ext_ctrls(struct v4l2_fh *fh,
-		     struct v4l2_ctrl_handler *hdl,
-		     struct video_device *vdev,
-		     struct media_device *mdev,
-		     struct v4l2_ext_controls *cs)
-{
-	return try_set_ext_ctrls(fh, hdl, vdev, mdev, cs, true);
-}
-EXPORT_SYMBOL(v4l2_s_ext_ctrls);
-
-/* Helper function for VIDIOC_S_CTRL compatibility */
-static int set_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl, u32 ch_flags)
-{
-	struct v4l2_ctrl *master = ctrl->cluster[0];
-	int ret;
-	int i;
-
-	/* Reset the 'is_new' flags of the cluster */
-	for (i = 0; i < master->ncontrols; i++)
-		if (master->cluster[i])
-			master->cluster[i]->is_new = 0;
-
-	ret = validate_new(ctrl, ctrl->p_new);
-	if (ret)
-		return ret;
-
-	/* For autoclusters with volatiles that are switched from auto to
-	   manual mode we have to update the current volatile values since
-	   those will become the initial manual values after such a switch. */
-	if (master->is_auto && master->has_volatiles && ctrl == master &&
-	    !is_cur_manual(master) && ctrl->val == master->manual_mode_value)
-		update_from_auto_cluster(master);
-
-	ctrl->is_new = 1;
-	return try_or_set_cluster(fh, master, true, ch_flags);
-}
-
-/* Helper function for VIDIOC_S_CTRL compatibility */
-static int set_ctrl_lock(struct v4l2_fh *fh, struct v4l2_ctrl *ctrl,
-			 struct v4l2_ext_control *c)
-{
-	int ret;
-
-	v4l2_ctrl_lock(ctrl);
-	user_to_new(c, ctrl);
-	ret = set_ctrl(fh, ctrl, 0);
-	if (!ret)
-		cur_to_user(c, ctrl);
-	v4l2_ctrl_unlock(ctrl);
-	return ret;
-}
-
-int v4l2_s_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl,
-					struct v4l2_control *control)
-{
-	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(hdl, control->id);
-	struct v4l2_ext_control c = { control->id };
-	int ret;
-
-	if (ctrl == NULL || !ctrl->is_int)
-		return -EINVAL;
-
-	if (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY)
-		return -EACCES;
-
-	c.value = control->value;
-	ret = set_ctrl_lock(fh, ctrl, &c);
-	control->value = c.value;
-	return ret;
-}
-EXPORT_SYMBOL(v4l2_s_ctrl);
-
-int __v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val)
-{
-	lockdep_assert_held(ctrl->handler->lock);
-
-	/* It's a driver bug if this happens. */
-	if (WARN_ON(!ctrl->is_int))
-		return -EINVAL;
-	ctrl->val = val;
-	return set_ctrl(NULL, ctrl, 0);
-}
-EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl);
-
-int __v4l2_ctrl_s_ctrl_int64(struct v4l2_ctrl *ctrl, s64 val)
-{
-	lockdep_assert_held(ctrl->handler->lock);
-
-	/* It's a driver bug if this happens. */
-	if (WARN_ON(ctrl->is_ptr || ctrl->type != V4L2_CTRL_TYPE_INTEGER64))
-		return -EINVAL;
-	*ctrl->p_new.p_s64 = val;
-	return set_ctrl(NULL, ctrl, 0);
-}
-EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl_int64);
-
-int __v4l2_ctrl_s_ctrl_string(struct v4l2_ctrl *ctrl, const char *s)
-{
-	lockdep_assert_held(ctrl->handler->lock);
-
-	/* It's a driver bug if this happens. */
-	if (WARN_ON(ctrl->type != V4L2_CTRL_TYPE_STRING))
-		return -EINVAL;
-	strscpy(ctrl->p_new.p_char, s, ctrl->maximum + 1);
-	return set_ctrl(NULL, ctrl, 0);
-}
-EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl_string);
-
-int __v4l2_ctrl_s_ctrl_compound(struct v4l2_ctrl *ctrl,
-				enum v4l2_ctrl_type type, const void *p)
-{
-	lockdep_assert_held(ctrl->handler->lock);
-
-	/* It's a driver bug if this happens. */
-	if (WARN_ON(ctrl->type != type))
-		return -EINVAL;
-	memcpy(ctrl->p_new.p, p, ctrl->elems * ctrl->elem_size);
-	return set_ctrl(NULL, ctrl, 0);
-}
-EXPORT_SYMBOL(__v4l2_ctrl_s_ctrl_compound);
-
-void v4l2_ctrl_request_complete(struct media_request *req,
-				struct v4l2_ctrl_handler *main_hdl)
-{
-	struct media_request_object *obj;
-	struct v4l2_ctrl_handler *hdl;
-	struct v4l2_ctrl_ref *ref;
-
-	if (!req || !main_hdl)
-		return;
-
-	/*
-	 * Note that it is valid if nothing was found. It means
-	 * that this request doesn't have any controls and so just
-	 * wants to leave the controls unchanged.
-	 */
-	obj = media_request_object_find(req, &req_ops, main_hdl);
-	if (!obj) {
-		int ret;
-
-		/* Create a new request so the driver can return controls */
-		hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
-		if (!hdl)
-			return;
-
-		ret = v4l2_ctrl_handler_init(hdl, (main_hdl->nr_of_buckets - 1) * 8);
-		if (!ret)
-			ret = v4l2_ctrl_request_bind(req, hdl, main_hdl);
-		if (ret) {
-			v4l2_ctrl_handler_free(hdl);
-			kfree(hdl);
-			return;
-		}
-		hdl->request_is_queued = true;
-		obj = media_request_object_find(req, &req_ops, main_hdl);
-	}
-	hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj);
-
-	list_for_each_entry(ref, &hdl->ctrl_refs, node) {
-		struct v4l2_ctrl *ctrl = ref->ctrl;
-		struct v4l2_ctrl *master = ctrl->cluster[0];
-		unsigned int i;
-
-		if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE) {
-			v4l2_ctrl_lock(master);
-			/* g_volatile_ctrl will update the current control values */
-			for (i = 0; i < master->ncontrols; i++)
-				cur_to_new(master->cluster[i]);
-			call_op(master, g_volatile_ctrl);
-			new_to_req(ref);
-			v4l2_ctrl_unlock(master);
-			continue;
-		}
-		if (ref->valid_p_req)
-			continue;
-
-		/* Copy the current control value into the request */
-		v4l2_ctrl_lock(ctrl);
-		cur_to_req(ref);
-		v4l2_ctrl_unlock(ctrl);
-	}
-
-	mutex_lock(main_hdl->lock);
-	WARN_ON(!hdl->request_is_queued);
-	list_del_init(&hdl->requests_queued);
-	hdl->request_is_queued = false;
-	mutex_unlock(main_hdl->lock);
-	media_request_object_complete(obj);
-	media_request_object_put(obj);
-}
-EXPORT_SYMBOL(v4l2_ctrl_request_complete);
-
-int v4l2_ctrl_request_setup(struct media_request *req,
-			     struct v4l2_ctrl_handler *main_hdl)
-{
-	struct media_request_object *obj;
-	struct v4l2_ctrl_handler *hdl;
-	struct v4l2_ctrl_ref *ref;
-	int ret = 0;
-
-	if (!req || !main_hdl)
-		return 0;
-
-	if (WARN_ON(req->state != MEDIA_REQUEST_STATE_QUEUED))
-		return -EBUSY;
-
-	/*
-	 * Note that it is valid if nothing was found. It means
-	 * that this request doesn't have any controls and so just
-	 * wants to leave the controls unchanged.
-	 */
-	obj = media_request_object_find(req, &req_ops, main_hdl);
-	if (!obj)
-		return 0;
-	if (obj->completed) {
-		media_request_object_put(obj);
-		return -EBUSY;
-	}
-	hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj);
-
-	list_for_each_entry(ref, &hdl->ctrl_refs, node)
-		ref->req_done = false;
-
-	list_for_each_entry(ref, &hdl->ctrl_refs, node) {
-		struct v4l2_ctrl *ctrl = ref->ctrl;
-		struct v4l2_ctrl *master = ctrl->cluster[0];
-		bool have_new_data = false;
-		int i;
-
-		/*
-		 * Skip if this control was already handled by a cluster.
-		 * Skip button controls and read-only controls.
-		 */
-		if (ref->req_done || (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY))
-			continue;
-
-		v4l2_ctrl_lock(master);
-		for (i = 0; i < master->ncontrols; i++) {
-			if (master->cluster[i]) {
-				struct v4l2_ctrl_ref *r =
-					find_ref(hdl, master->cluster[i]->id);
-
-				if (r->valid_p_req) {
-					have_new_data = true;
-					break;
-				}
-			}
-		}
-		if (!have_new_data) {
-			v4l2_ctrl_unlock(master);
-			continue;
-		}
-
-		for (i = 0; i < master->ncontrols; i++) {
-			if (master->cluster[i]) {
-				struct v4l2_ctrl_ref *r =
-					find_ref(hdl, master->cluster[i]->id);
-
-				req_to_new(r);
-				master->cluster[i]->is_new = 1;
-				r->req_done = true;
-			}
-		}
-		/*
-		 * For volatile autoclusters that are currently in auto mode
-		 * we need to discover if it will be set to manual mode.
-		 * If so, then we have to copy the current volatile values
-		 * first since those will become the new manual values (which
-		 * may be overwritten by explicit new values from this set
-		 * of controls).
-		 */
-		if (master->is_auto && master->has_volatiles &&
-		    !is_cur_manual(master)) {
-			s32 new_auto_val = *master->p_new.p_s32;
-
-			/*
-			 * If the new value == the manual value, then copy
-			 * the current volatile values.
-			 */
-			if (new_auto_val == master->manual_mode_value)
-				update_from_auto_cluster(master);
-		}
-
-		ret = try_or_set_cluster(NULL, master, true, 0);
-		v4l2_ctrl_unlock(master);
-
-		if (ret)
-			break;
-	}
-
-	media_request_object_put(obj);
-	return ret;
-}
-EXPORT_SYMBOL(v4l2_ctrl_request_setup);
-
-void v4l2_ctrl_notify(struct v4l2_ctrl *ctrl, v4l2_ctrl_notify_fnc notify, void *priv)
-{
-	if (ctrl == NULL)
-		return;
-	if (notify == NULL) {
-		ctrl->call_notify = 0;
-		return;
-	}
-	if (WARN_ON(ctrl->handler->notify && ctrl->handler->notify != notify))
-		return;
-	ctrl->handler->notify = notify;
-	ctrl->handler->notify_priv = priv;
-	ctrl->call_notify = 1;
-}
-EXPORT_SYMBOL(v4l2_ctrl_notify);
-
-int __v4l2_ctrl_modify_range(struct v4l2_ctrl *ctrl,
-			s64 min, s64 max, u64 step, s64 def)
-{
-	bool value_changed;
-	bool range_changed = false;
-	int ret;
-
-	lockdep_assert_held(ctrl->handler->lock);
-
-	switch (ctrl->type) {
-	case V4L2_CTRL_TYPE_INTEGER:
-	case V4L2_CTRL_TYPE_INTEGER64:
-	case V4L2_CTRL_TYPE_BOOLEAN:
-	case V4L2_CTRL_TYPE_MENU:
-	case V4L2_CTRL_TYPE_INTEGER_MENU:
-	case V4L2_CTRL_TYPE_BITMASK:
-	case V4L2_CTRL_TYPE_U8:
-	case V4L2_CTRL_TYPE_U16:
-	case V4L2_CTRL_TYPE_U32:
-		if (ctrl->is_array)
-			return -EINVAL;
-		ret = check_range(ctrl->type, min, max, step, def);
-		if (ret)
-			return ret;
-		break;
-	default:
-		return -EINVAL;
-	}
-	if ((ctrl->minimum != min) || (ctrl->maximum != max) ||
-		(ctrl->step != step) || ctrl->default_value != def) {
-		range_changed = true;
-		ctrl->minimum = min;
-		ctrl->maximum = max;
-		ctrl->step = step;
-		ctrl->default_value = def;
-	}
-	cur_to_new(ctrl);
-	if (validate_new(ctrl, ctrl->p_new)) {
-		if (ctrl->type == V4L2_CTRL_TYPE_INTEGER64)
-			*ctrl->p_new.p_s64 = def;
-		else
-			*ctrl->p_new.p_s32 = def;
-	}
-
-	if (ctrl->type == V4L2_CTRL_TYPE_INTEGER64)
-		value_changed = *ctrl->p_new.p_s64 != *ctrl->p_cur.p_s64;
-	else
-		value_changed = *ctrl->p_new.p_s32 != *ctrl->p_cur.p_s32;
-	if (value_changed)
-		ret = set_ctrl(NULL, ctrl, V4L2_EVENT_CTRL_CH_RANGE);
-	else if (range_changed)
-		send_event(NULL, ctrl, V4L2_EVENT_CTRL_CH_RANGE);
-	return ret;
-}
-EXPORT_SYMBOL(__v4l2_ctrl_modify_range);
-
-static int v4l2_ctrl_add_event(struct v4l2_subscribed_event *sev, unsigned elems)
-{
-	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(sev->fh->ctrl_handler, sev->id);
-
-	if (ctrl == NULL)
-		return -EINVAL;
-
-	v4l2_ctrl_lock(ctrl);
-	list_add_tail(&sev->node, &ctrl->ev_subs);
-	if (ctrl->type != V4L2_CTRL_TYPE_CTRL_CLASS &&
-	    (sev->flags & V4L2_EVENT_SUB_FL_SEND_INITIAL)) {
-		struct v4l2_event ev;
-		u32 changes = V4L2_EVENT_CTRL_CH_FLAGS;
-
-		if (!(ctrl->flags & V4L2_CTRL_FLAG_WRITE_ONLY))
-			changes |= V4L2_EVENT_CTRL_CH_VALUE;
-		fill_event(&ev, ctrl, changes);
-		/* Mark the queue as active, allowing this initial
-		   event to be accepted. */
-		sev->elems = elems;
-		v4l2_event_queue_fh(sev->fh, &ev);
-	}
-	v4l2_ctrl_unlock(ctrl);
-	return 0;
-}
-
-static void v4l2_ctrl_del_event(struct v4l2_subscribed_event *sev)
-{
-	struct v4l2_ctrl *ctrl = v4l2_ctrl_find(sev->fh->ctrl_handler, sev->id);
-
-	if (ctrl == NULL)
-		return;
-
-	v4l2_ctrl_lock(ctrl);
-	list_del(&sev->node);
-	v4l2_ctrl_unlock(ctrl);
-}
-
-void v4l2_ctrl_replace(struct v4l2_event *old, const struct v4l2_event *new)
-{
-	u32 old_changes = old->u.ctrl.changes;
-
-	old->u.ctrl = new->u.ctrl;
-	old->u.ctrl.changes |= old_changes;
-}
-EXPORT_SYMBOL(v4l2_ctrl_replace);
-
-void v4l2_ctrl_merge(const struct v4l2_event *old, struct v4l2_event *new)
-{
-	new->u.ctrl.changes |= old->u.ctrl.changes;
-}
-EXPORT_SYMBOL(v4l2_ctrl_merge);
-
-const struct v4l2_subscribed_event_ops v4l2_ctrl_sub_ev_ops = {
-	.add = v4l2_ctrl_add_event,
-	.del = v4l2_ctrl_del_event,
-	.replace = v4l2_ctrl_replace,
-	.merge = v4l2_ctrl_merge,
-};
-EXPORT_SYMBOL(v4l2_ctrl_sub_ev_ops);
-
-int v4l2_ctrl_log_status(struct file *file, void *fh)
-{
-	struct video_device *vfd = video_devdata(file);
-	struct v4l2_fh *vfh = file->private_data;
-
-	if (test_bit(V4L2_FL_USES_V4L2_FH, &vfd->flags) && vfd->v4l2_dev)
-		v4l2_ctrl_handler_log_status(vfh->ctrl_handler,
-			vfd->v4l2_dev->name);
-	return 0;
-}
-EXPORT_SYMBOL(v4l2_ctrl_log_status);
-
-int v4l2_ctrl_subscribe_event(struct v4l2_fh *fh,
-				const struct v4l2_event_subscription *sub)
-{
-	if (sub->type == V4L2_EVENT_CTRL)
-		return v4l2_event_subscribe(fh, sub, 0, &v4l2_ctrl_sub_ev_ops);
-	return -EINVAL;
-}
-EXPORT_SYMBOL(v4l2_ctrl_subscribe_event);
-
-int v4l2_ctrl_subdev_subscribe_event(struct v4l2_subdev *sd, struct v4l2_fh *fh,
-				     struct v4l2_event_subscription *sub)
-{
-	if (!sd->ctrl_handler)
-		return -EINVAL;
-	return v4l2_ctrl_subscribe_event(fh, sub);
-}
-EXPORT_SYMBOL(v4l2_ctrl_subdev_subscribe_event);
-
-__poll_t v4l2_ctrl_poll(struct file *file, struct poll_table_struct *wait)
-{
-	struct v4l2_fh *fh = file->private_data;
-
-	poll_wait(file, &fh->wait, wait);
-	if (v4l2_event_pending(fh))
-		return EPOLLPRI;
-	return 0;
-}
-EXPORT_SYMBOL(v4l2_ctrl_poll);
-
-int v4l2_ctrl_new_fwnode_properties(struct v4l2_ctrl_handler *hdl,
-				    const struct v4l2_ctrl_ops *ctrl_ops,
-				    const struct v4l2_fwnode_device_properties *p)
-{
-	if (p->orientation != V4L2_FWNODE_PROPERTY_UNSET) {
-		u32 orientation_ctrl;
-
-		switch (p->orientation) {
-		case V4L2_FWNODE_ORIENTATION_FRONT:
-			orientation_ctrl = V4L2_CAMERA_ORIENTATION_FRONT;
-			break;
-		case V4L2_FWNODE_ORIENTATION_BACK:
-			orientation_ctrl = V4L2_CAMERA_ORIENTATION_BACK;
-			break;
-		case V4L2_FWNODE_ORIENTATION_EXTERNAL:
-			orientation_ctrl = V4L2_CAMERA_ORIENTATION_EXTERNAL;
-			break;
-		default:
-			return -EINVAL;
-		}
-		if (!v4l2_ctrl_new_std_menu(hdl, ctrl_ops,
-					    V4L2_CID_CAMERA_ORIENTATION,
-					    V4L2_CAMERA_ORIENTATION_EXTERNAL, 0,
-					    orientation_ctrl))
-			return hdl->error;
-	}
-
-	if (p->rotation != V4L2_FWNODE_PROPERTY_UNSET) {
-		if (!v4l2_ctrl_new_std(hdl, ctrl_ops,
-				       V4L2_CID_CAMERA_SENSOR_ROTATION,
-				       p->rotation, p->rotation, 1,
-				       p->rotation))
-			return hdl->error;
-	}
-
-	return hdl->error;
-}
-EXPORT_SYMBOL(v4l2_ctrl_new_fwnode_properties);
-- 
GitLab


From 130708331bc6b03a3c3a78599333faddfebbd0f3 Mon Sep 17 00:00:00 2001
From: Yuan ZhaoXiong <yuanzhaoxiong@baidu.com>
Date: Sun, 23 May 2021 21:31:30 +0800
Subject: [PATCH 1521/3804] cpu/hotplug: Simplify access to percpu cpuhp_state

It is unnecessary to invoke per_cpu_ptr() everytime to access cpuhp_state.
Use the available pointer instead.

Signed-off-by: Yuan ZhaoXiong <yuanzhaoxiong@baidu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Link: https://lore.kernel.org/r/1621776690-13264-1-git-send-email-yuanzhaoxiong@baidu.com
---
 kernel/cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index e538518556f47..2942cb4644c55 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -961,7 +961,7 @@ static int takedown_cpu(unsigned int cpu)
 	int err;
 
 	/* Park the smpboot threads */
-	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+	kthread_park(st->thread);
 
 	/*
 	 * Prevent irq alloc/free while the dying cpu reorganizes the
@@ -977,7 +977,7 @@ static int takedown_cpu(unsigned int cpu)
 		/* CPU refused to die */
 		irq_unlock_sparse();
 		/* Unpark the hotplug thread so we can rollback there */
-		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
+		kthread_unpark(st->thread);
 		return err;
 	}
 	BUG_ON(cpu_online(cpu));
-- 
GitLab


From a82adc7650044b5555d65078bda07866efa4a73d Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 17 May 2021 14:30:12 +0100
Subject: [PATCH 1522/3804] futex: Deduplicate cond_resched() invocation in
 futex_wake_op()

After pagefaulting in futex_wake_op() both branches do cond_resched()
before retry. Deduplicate it as compilers cannot figure it out themself.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
Link: https://lore.kernel.org/r/9b2588c1fd33c91fb01c4e348a3b647ab2c8baab.1621258128.git.asml.silence@gmail.com
---
 kernel/futex.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 2f386f0129001..08008c225bec0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1728,12 +1728,9 @@ retry_private:
 				return ret;
 		}
 
-		if (!(flags & FLAGS_SHARED)) {
-			cond_resched();
-			goto retry_private;
-		}
-
 		cond_resched();
+		if (!(flags & FLAGS_SHARED))
+			goto retry_private;
 		goto retry;
 	}
 
-- 
GitLab


From e0f339213c3bf1b2a8790bd6b5783e383818526b Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 20 May 2021 19:27:19 +0800
Subject: [PATCH 1523/3804] regulator: qcom_smd: Make pm8953_lnldo
 linear_ranges entries properly sorted

For better readability, make linear_ranges entries sort by selector.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Message-Id: <20210520112719.1814396-1-axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@sirena.org.uk>
---
 drivers/regulator/qcom_smd-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 05df7b00e3b17..198fcc6551f6d 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -449,8 +449,8 @@ static const struct regulator_desc pm8950_pldo = {
 
 static const struct regulator_desc pm8953_lnldo = {
 	.linear_ranges = (struct linear_range[]) {
-		REGULATOR_LINEAR_RANGE(1380000, 8, 15, 120000),
 		REGULATOR_LINEAR_RANGE(690000, 0, 7, 60000),
+		REGULATOR_LINEAR_RANGE(1380000, 8, 15, 120000),
 	},
 	.n_linear_ranges = 2,
 	.n_voltages = 16,
-- 
GitLab


From e1e8d55bb90c9a07aa66a0c9fa17bd5a67d2689c Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 25 May 2021 18:27:08 +0800
Subject: [PATCH 1524/3804] regulator: mp886x: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Message-Id: <20210525102708.2519323-1-axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@sirena.org.uk>
---
 drivers/regulator/mp886x.c | 32 +++++++-------------------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/drivers/regulator/mp886x.c b/drivers/regulator/mp886x.c
index a84fd74081de8..8ad4722eca4b3 100644
--- a/drivers/regulator/mp886x.c
+++ b/drivers/regulator/mp886x.c
@@ -26,7 +26,7 @@
 
 struct mp886x_cfg_info {
 	const struct regulator_ops *rops;
-	const int slew_rates[8];
+	const unsigned int slew_rates[8];
 	const int switch_freq[4];
 	const u8 fs_reg;
 	const u8 fs_shift;
@@ -42,28 +42,6 @@ struct mp886x_device_info {
 	unsigned int sel;
 };
 
-static int mp886x_set_ramp(struct regulator_dev *rdev, int ramp)
-{
-	struct mp886x_device_info *di = rdev_get_drvdata(rdev);
-	const struct mp886x_cfg_info *ci = di->ci;
-	int reg = -1, i;
-
-	for (i = 0; i < ARRAY_SIZE(ci->slew_rates); i++) {
-		if (ramp <= ci->slew_rates[i])
-			reg = i;
-		else
-			break;
-	}
-
-	if (reg < 0) {
-		dev_err(di->dev, "unsupported ramp value %d\n", ramp);
-		return -EINVAL;
-	}
-
-	return regmap_update_bits(rdev->regmap, MP886X_SYSCNTLREG1,
-				  MP886X_SLEW_MASK, reg << MP886X_SLEW_SHIFT);
-}
-
 static void mp886x_set_switch_freq(struct mp886x_device_info *di,
 				   struct regmap *regmap,
 				   u32 freq)
@@ -169,7 +147,7 @@ static const struct regulator_ops mp8869_regulator_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.set_mode = mp886x_set_mode,
 	.get_mode = mp886x_get_mode,
-	.set_ramp_delay = mp886x_set_ramp,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
 };
 
 static const struct mp886x_cfg_info mp8869_ci = {
@@ -248,7 +226,7 @@ static const struct regulator_ops mp8867_regulator_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.set_mode = mp886x_set_mode,
 	.get_mode = mp886x_get_mode,
-	.set_ramp_delay = mp886x_set_ramp,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
 };
 
 static const struct mp886x_cfg_info mp8867_ci = {
@@ -290,6 +268,10 @@ static int mp886x_regulator_register(struct mp886x_device_info *di,
 	rdesc->uV_step = 10000;
 	rdesc->vsel_reg = MP886X_VSEL;
 	rdesc->vsel_mask = 0x3f;
+	rdesc->ramp_reg = MP886X_SYSCNTLREG1;
+	rdesc->ramp_mask = MP886X_SLEW_MASK;
+	rdesc->ramp_delay_table = di->ci->slew_rates;
+	rdesc->n_ramp_values = ARRAY_SIZE(di->ci->slew_rates);
 	rdesc->owner = THIS_MODULE;
 
 	rdev = devm_regulator_register(di->dev, &di->desc, config);
-- 
GitLab


From 15413ce566c248967c96f71f824b79aa8d328e03 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 22 May 2021 10:30:18 +0800
Subject: [PATCH 1525/3804] regulator: mp5416: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Message-Id: <20210522023018.2025188-1-axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@sirena.org.uk>
---
 drivers/regulator/mp5416.c | 44 +++++++++++++-------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/drivers/regulator/mp5416.c b/drivers/regulator/mp5416.c
index 67ce1b52a1a1f..39cebec0edb66 100644
--- a/drivers/regulator/mp5416.c
+++ b/drivers/regulator/mp5416.c
@@ -67,6 +67,10 @@
 		.vsel_mask = MP5416_MASK_VSET,				\
 		.enable_reg = MP5416_REG_BUCK ## _id,			\
 		.enable_mask = MP5416_REGULATOR_EN,			\
+		.ramp_reg = MP5416_REG_CTL2,				\
+		.ramp_mask = MP5416_MASK_DVS_SLEWRATE,			\
+		.ramp_delay_table = mp5416_buck_ramp_table,		\
+		.n_ramp_values = ARRAY_SIZE(mp5416_buck_ramp_table),	\
 		.active_discharge_on	= _dval,			\
 		.active_discharge_reg	= _dreg,			\
 		.active_discharge_mask	= _dval,			\
@@ -123,7 +127,16 @@ static const unsigned int mp5416_I_limits2[] = {
 	2200000, 3200000, 4200000, 5200000
 };
 
-static int mp5416_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay);
+/*
+ * DVS ramp rate BUCK1 to BUCK4
+ * 00: 32mV/us
+ * 01: 16mV/us
+ * 10: 8mV/us
+ * 11: 4mV/us
+ */
+static const unsigned int mp5416_buck_ramp_table[] = {
+	32000, 16000, 8000, 4000
+};
 
 static const struct regulator_ops mp5416_ldo_ops = {
 	.enable			= regulator_enable_regmap,
@@ -147,7 +160,7 @@ static const struct regulator_ops mp5416_buck_ops = {
 	.set_active_discharge	= regulator_set_active_discharge_regmap,
 	.get_current_limit	= regulator_get_current_limit_regmap,
 	.set_current_limit	= regulator_set_current_limit_regmap,
-	.set_ramp_delay		= mp5416_set_ramp_delay,
+	.set_ramp_delay		= regulator_set_ramp_delay_regmap,
 };
 
 static struct regulator_desc mp5416_regulators_desc[MP5416_MAX_REGULATORS] = {
@@ -161,33 +174,6 @@ static struct regulator_desc mp5416_regulators_desc[MP5416_MAX_REGULATORS] = {
 	MP5416LDO("ldo4", 4, BIT(1)),
 };
 
-/*
- * DVS ramp rate BUCK1 to BUCK4
- * 00: 32mV/us
- * 01: 16mV/us
- * 10: 8mV/us
- * 11: 4mV/us
- */
-static int mp5416_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
-{
-	unsigned int ramp_val;
-
-	if (ramp_delay > 32000 || ramp_delay < 0)
-		return -EINVAL;
-
-	if (ramp_delay <= 4000)
-		ramp_val = 3;
-	else if (ramp_delay <= 8000)
-		ramp_val = 2;
-	else if (ramp_delay <= 16000)
-		ramp_val = 1;
-	else
-		ramp_val = 0;
-
-	return regmap_update_bits(rdev->regmap, MP5416_REG_CTL2,
-				  MP5416_MASK_DVS_SLEWRATE, ramp_val << 6);
-}
-
 static int mp5416_i2c_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
-- 
GitLab


From 5c42903e144b8e914ea22098f872669188680d9b Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 24 May 2021 20:37:35 +0800
Subject: [PATCH 1526/3804] regulator: rt4831: Add missing .owner field in
 regulator_desc

Add missing .owner field in regulator_desc, which is used for refcounting.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Message-Id: <20210524123735.2363676-1-axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@sirena.org.uk>
---
 drivers/regulator/rt4831-regulator.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/regulator/rt4831-regulator.c b/drivers/regulator/rt4831-regulator.c
index e3aaac90d238f..676b0419e48f7 100644
--- a/drivers/regulator/rt4831-regulator.c
+++ b/drivers/regulator/rt4831-regulator.c
@@ -108,6 +108,7 @@ static const struct regulator_desc rt4831_regulator_descs[] = {
 		.bypass_reg = RT4831_REG_DSVEN,
 		.bypass_val_on = DSV_MODE_BYPASS,
 		.bypass_val_off = DSV_MODE_NORMAL,
+		.owner = THIS_MODULE,
 	},
 	{
 		.name = "DSVP",
@@ -125,6 +126,7 @@ static const struct regulator_desc rt4831_regulator_descs[] = {
 		.enable_mask = RT4831_POSEN_MASK,
 		.active_discharge_reg = RT4831_REG_DSVEN,
 		.active_discharge_mask = RT4831_POSADEN_MASK,
+		.owner = THIS_MODULE,
 	},
 	{
 		.name = "DSVN",
@@ -142,6 +144,7 @@ static const struct regulator_desc rt4831_regulator_descs[] = {
 		.enable_mask = RT4831_NEGEN_MASK,
 		.active_discharge_reg = RT4831_REG_DSVEN,
 		.active_discharge_mask = RT4831_NEGADEN_MASK,
+		.owner = THIS_MODULE,
 	}
 };
 
-- 
GitLab


From 29c8f40b54a45dd23971e2bc395697731bcffbe1 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Mon, 24 May 2021 23:37:26 +0300
Subject: [PATCH 1527/3804] ALSA: hda/realtek: Chain in pop reduction fixup for
 ThinkStation P340

Lenovo ThinkStation P340 uses ALC623 codec (SSID 17aa:1048) and it produces
bug plock/pop noise over line out (green jack on the back) which can be
fixed by applying ALC269_FIXUP_NO_SHUTUP tot he machine.

Convert the existing entry for the same SSID to chain to apply this fixup
as well.

Suggested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210524203726.2278-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 6571c37137324..90bf0d3a830a3 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6567,6 +6567,7 @@ enum {
 	ALC295_FIXUP_HP_OMEN,
 	ALC285_FIXUP_HP_SPECTRE_X360,
 	ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP,
+	ALC623_FIXUP_LENOVO_THINKSTATION_P340,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8139,6 +8140,12 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC285_FIXUP_THINKPAD_HEADSET_JACK,
 	},
+	[ALC623_FIXUP_LENOVO_THINKSTATION_P340] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_no_shutup,
+		.chained = true,
+		.chain_id = ALC283_FIXUP_HEADSET_MIC,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8457,7 +8464,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1558, 0xc019, "Clevo NH77D[BE]Q", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1558, 0xc022, "Clevo NH77[DC][QW]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
-	SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+	SND_PCI_QUIRK(0x17aa, 0x1048, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
@@ -8724,6 +8731,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"},
 	{.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"},
 	{.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"},
+	{.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"},
 	{}
 };
 #define ALC225_STANDARD_PINS \
-- 
GitLab


From 6fd5fb63820a9a1146aba0bba2fdbc1db4b903e7 Mon Sep 17 00:00:00 2001
From: Jussi Maki <joamaki@gmail.com>
Date: Tue, 25 May 2021 10:29:55 +0000
Subject: [PATCH 1528/3804] selftests/bpf: Add test for l3 use of
 bpf_redirect_peer

Add a test case for using bpf_skb_change_head() in combination with
bpf_redirect_peer() to redirect a packet from a L3 device to veth and back.

The test uses a BPF program that adds L2 headers to the packet coming
from a L3 device and then calls bpf_redirect_peer() to redirect the packet
to a veth device. The test fails as skb->mac_len is not set properly and
thus the ethernet headers are not properly skb_pull'd in cls_bpf_classify(),
causing tcp_v4_rcv() to point the TCP header into middle of the IP header.

Signed-off-by: Jussi Maki <joamaki@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210525102955.2811090-1-joamaki@gmail.com
---
 .../selftests/bpf/prog_tests/tc_redirect.c    | 552 ++++++++++++------
 .../selftests/bpf/progs/test_tc_peer.c        |  31 +
 2 files changed, 405 insertions(+), 178 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 95ef9fcd31d8b..5703c918812bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -11,14 +11,17 @@
  */
 
 #define _GNU_SOURCE
-#include <fcntl.h>
+
+#include <arpa/inet.h>
 #include <linux/limits.h>
 #include <linux/sysctl.h>
+#include <linux/if_tun.h>
+#include <linux/if.h>
 #include <sched.h>
 #include <stdbool.h>
 #include <stdio.h>
 #include <sys/stat.h>
-#include <sys/types.h>
+#include <sys/mount.h>
 
 #include "test_progs.h"
 #include "network_helpers.h"
@@ -32,18 +35,25 @@
 
 #define IP4_SRC "172.16.1.100"
 #define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
 #define IP4_PORT 9004
 
-#define IP6_SRC "::1:dead:beef:cafe"
-#define IP6_DST "::2:dead:beef:cafe"
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
 #define IP6_PORT 9006
 
 #define IP4_SLL "169.254.0.1"
 #define IP4_DLL "169.254.0.2"
 #define IP4_NET "169.254.0.0"
 
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
 #define IFADDR_STR_LEN 18
-#define PING_ARGS "-c 3 -w 10 -q"
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
 
 #define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
 #define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
@@ -51,120 +61,104 @@
 
 #define TIMEOUT_MILLIS 10000
 
-#define MAX_PROC_MODS 128
-#define MAX_PROC_VALUE_LEN 16
-
 #define log_err(MSG, ...) \
 	fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
 		__FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
 
-struct proc_mod {
-	char path[PATH_MAX];
-	char oldval[MAX_PROC_VALUE_LEN];
-	int oldlen;
-};
-
 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
-static int root_netns_fd = -1;
-static int num_proc_mods;
-static struct proc_mod proc_mods[MAX_PROC_MODS];
 
-/**
- * modify_proc() - Modify entry in /proc
- *
- * Modifies an entry in /proc and saves the original value for later
- * restoration with restore_proc().
- */
-static int modify_proc(const char *path, const char *newval)
+static int write_file(const char *path, const char *newval)
 {
-	struct proc_mod *mod;
 	FILE *f;
 
-	if (num_proc_mods + 1 > MAX_PROC_MODS)
-		return -1;
-
 	f = fopen(path, "r+");
 	if (!f)
 		return -1;
-
-	mod = &proc_mods[num_proc_mods];
-	num_proc_mods++;
-
-	strncpy(mod->path, path, PATH_MAX);
-
-	if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) {
-		log_err("reading from %s failed", path);
-		goto fail;
-	}
-	rewind(f);
 	if (fwrite(newval, strlen(newval), 1, f) != 1) {
 		log_err("writing to %s failed", path);
-		goto fail;
+		fclose(f);
+		return -1;
 	}
-
 	fclose(f);
 	return 0;
-
-fail:
-	fclose(f);
-	num_proc_mods--;
-	return -1;
 }
 
-/**
- * restore_proc() - Restore all /proc modifications
- */
-static void restore_proc(void)
+struct nstoken {
+	int orig_netns_fd;
+};
+
+static int setns_by_fd(int nsfd)
 {
-	int i;
+	int err;
 
-	for (i = 0; i < num_proc_mods; i++) {
-		struct proc_mod *mod = &proc_mods[i];
-		FILE *f;
+	err = setns(nsfd, CLONE_NEWNET);
+	close(nsfd);
 
-		f = fopen(mod->path, "w");
-		if (!f) {
-			log_err("fopen of %s failed", mod->path);
-			continue;
-		}
+	if (!ASSERT_OK(err, "setns"))
+		return err;
 
-		if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1)
-			log_err("fwrite to %s failed", mod->path);
+	/* Switch /sys to the new namespace so that e.g. /sys/class/net
+	 * reflects the devices in the new namespace.
+	 */
+	err = unshare(CLONE_NEWNS);
+	if (!ASSERT_OK(err, "unshare"))
+		return err;
 
-		fclose(f);
-	}
-	num_proc_mods = 0;
+	err = umount2("/sys", MNT_DETACH);
+	if (!ASSERT_OK(err, "umount2 /sys"))
+		return err;
+
+	err = mount("sysfs", "/sys", "sysfs", 0, NULL);
+	if (!ASSERT_OK(err, "mount /sys"))
+		return err;
+
+	err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
+	if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
+		return err;
+
+	return 0;
 }
 
 /**
- * setns_by_name() - Set networks namespace by name
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
  */
-static int setns_by_name(const char *name)
+static struct nstoken *open_netns(const char *name)
 {
 	int nsfd;
 	char nspath[PATH_MAX];
 	int err;
+	struct nstoken *token;
+
+	token = malloc(sizeof(struct nstoken));
+	if (!ASSERT_OK_PTR(token, "malloc token"))
+		return NULL;
+
+	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+		goto fail;
 
 	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
 	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
-	if (nsfd < 0)
-		return nsfd;
+	if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+		goto fail;
 
-	err = setns(nsfd, CLONE_NEWNET);
-	close(nsfd);
+	err = setns_by_fd(nsfd);
+	if (!ASSERT_OK(err, "setns_by_fd"))
+		goto fail;
 
-	return err;
+	return token;
+fail:
+	free(token);
+	return NULL;
 }
 
-/**
- * setns_root() - Set network namespace to original (root) namespace
- *
- * Not expected to ever fail, so error not returned, but failure logged
- * and test marked as failed.
- */
-static void setns_root(void)
+static void close_netns(struct nstoken *token)
 {
-	ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root");
+	ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
+	free(token);
 }
 
 static int netns_setup_namespaces(const char *verb)
@@ -237,15 +231,17 @@ static int get_ifindex(const char *name)
 
 static int netns_setup_links_and_routes(struct netns_setup_result *result)
 {
+	struct nstoken *nstoken = NULL;
 	char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
-	char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {};
 
 	SYS("ip link add veth_src type veth peer name veth_src_fwd");
 	SYS("ip link add veth_dst type veth peer name veth_dst_fwd");
+
+	SYS("ip link set veth_dst_fwd address " MAC_DST_FWD);
+	SYS("ip link set veth_dst address " MAC_DST);
+
 	if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
 		goto fail;
-	if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr))
-		goto fail;
 
 	result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
 	if (result->ifindex_veth_src_fwd < 0)
@@ -260,7 +256,8 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip link set veth_dst netns " NS_DST);
 
 	/** setup in 'src' namespace */
-	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+	nstoken = open_netns(NS_SRC);
+	if (!ASSERT_OK_PTR(nstoken, "setns src"))
 		goto fail;
 
 	SYS("ip addr add " IP4_SRC "/32 dev veth_src");
@@ -276,8 +273,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s",
 	    veth_src_fwd_addr);
 
+	close_netns(nstoken);
+
 	/** setup in 'fwd' namespace */
-	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
 		goto fail;
 
 	/* The fwd netns automatically gets a v6 LL address / routes, but also
@@ -294,8 +294,11 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
 	SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
 
+	close_netns(nstoken);
+
 	/** setup in 'dst' namespace */
-	if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+	nstoken = open_netns(NS_DST);
+	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
 		goto fail;
 
 	SYS("ip addr add " IP4_DST "/32 dev veth_dst");
@@ -306,23 +309,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	SYS("ip route add " IP4_NET "/16 dev veth_dst scope global");
 	SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global");
 
-	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s",
-	    veth_dst_fwd_addr);
-	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s",
-	    veth_dst_fwd_addr);
+	SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+	close_netns(nstoken);
 
-	setns_root();
 	return 0;
 fail:
-	setns_root();
+	if (nstoken)
+		close_netns(nstoken);
 	return -1;
 }
 
 static int netns_load_bpf(void)
 {
-	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
-		return -1;
-
 	SYS("tc qdisc add dev veth_src_fwd clsact");
 	SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
 	    SRC_PROG_PIN_FILE);
@@ -335,42 +335,29 @@ static int netns_load_bpf(void)
 	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
 	    CHK_PROG_PIN_FILE);
 
-	setns_root();
-	return -1;
-fail:
-	setns_root();
-	return -1;
-}
-
-static int netns_unload_bpf(void)
-{
-	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
-		goto fail;
-	SYS("tc qdisc delete dev veth_src_fwd clsact");
-	SYS("tc qdisc delete dev veth_dst_fwd clsact");
-
-	setns_root();
 	return 0;
 fail:
-	setns_root();
 	return -1;
 }
 
-
 static void test_tcp(int family, const char *addr, __u16 port)
 {
 	int listen_fd = -1, accept_fd = -1, client_fd = -1;
 	char buf[] = "testing testing";
 	int n;
+	struct nstoken *nstoken;
 
-	if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst"))
+	nstoken = open_netns(NS_DST);
+	if (!ASSERT_OK_PTR(nstoken, "setns dst"))
 		return;
 
 	listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
 	if (!ASSERT_GE(listen_fd, 0, "listen"))
 		goto done;
 
-	if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src"))
+	close_netns(nstoken);
+	nstoken = open_netns(NS_SRC);
+	if (!ASSERT_OK_PTR(nstoken, "setns src"))
 		goto done;
 
 	client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
@@ -392,7 +379,8 @@ static void test_tcp(int family, const char *addr, __u16 port)
 	ASSERT_EQ(n, sizeof(buf), "recv from server");
 
 done:
-	setns_root();
+	if (nstoken)
+		close_netns(nstoken);
 	if (listen_fd >= 0)
 		close(listen_fd);
 	if (accept_fd >= 0)
@@ -405,7 +393,7 @@ static int test_ping(int family, const char *addr)
 {
 	const char *ping = family == AF_INET6 ? "ping6" : "ping";
 
-	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr);
+	SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
 	return 0;
 fail:
 	return -1;
@@ -419,19 +407,37 @@ static void test_connectivity(void)
 	test_ping(AF_INET6, IP6_DST);
 }
 
+static int set_forwarding(bool enable)
+{
+	int err;
+
+	err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+	if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+		return err;
+
+	err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+	if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+		return err;
+
+	return 0;
+}
+
 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
 {
-	struct test_tc_neigh_fib *skel;
+	struct nstoken *nstoken = NULL;
+	struct test_tc_neigh_fib *skel = NULL;
 	int err;
 
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+		return;
+
 	skel = test_tc_neigh_fib__open();
 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
-		return;
+		goto done;
 
-	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) {
-		test_tc_neigh_fib__destroy(skel);
-		return;
-	}
+	if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+		goto done;
 
 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -449,46 +455,37 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
 		goto done;
 
 	/* bpf_fib_lookup() checks if forwarding is enabled */
-	if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd"))
+	if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
 		goto done;
 
-	err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1");
-	if (!ASSERT_OK(err, "set ipv4.ip_forward"))
-		goto done;
-
-	err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1");
-	if (!ASSERT_OK(err, "set ipv6.forwarding"))
-		goto done;
-	setns_root();
-
 	test_connectivity();
+
 done:
-	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
-	test_tc_neigh_fib__destroy(skel);
-	netns_unload_bpf();
-	setns_root();
-	restore_proc();
+	if (skel)
+		test_tc_neigh_fib__destroy(skel);
+	close_netns(nstoken);
 }
 
 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
 {
-	struct test_tc_neigh *skel;
+	struct nstoken *nstoken = NULL;
+	struct test_tc_neigh *skel = NULL;
 	int err;
 
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+		return;
+
 	skel = test_tc_neigh__open();
 	if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
-		return;
+		goto done;
 
 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
 
 	err = test_tc_neigh__load(skel);
-	if (!ASSERT_OK(err, "test_tc_neigh__load")) {
-		test_tc_neigh__destroy(skel);
-		return;
-	}
+	if (!ASSERT_OK(err, "test_tc_neigh__load"))
+		goto done;
 
 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -505,34 +502,37 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
 	if (netns_load_bpf())
 		goto done;
 
+	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+		goto done;
+
 	test_connectivity();
 
 done:
-	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
-	test_tc_neigh__destroy(skel);
-	netns_unload_bpf();
-	setns_root();
+	if (skel)
+		test_tc_neigh__destroy(skel);
+	close_netns(nstoken);
 }
 
 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
 {
+	struct nstoken *nstoken;
 	struct test_tc_peer *skel;
 	int err;
 
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+		return;
+
 	skel = test_tc_peer__open();
 	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
-		return;
+		goto done;
 
 	skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
 	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
 
 	err = test_tc_peer__load(skel);
-	if (!ASSERT_OK(err, "test_tc_peer__load")) {
-		test_tc_peer__destroy(skel);
-		return;
-	}
+	if (!ASSERT_OK(err, "test_tc_peer__load"))
+		goto done;
 
 	err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
 	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
@@ -549,41 +549,237 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
 	if (netns_load_bpf())
 		goto done;
 
+	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+		goto done;
+
 	test_connectivity();
 
 done:
-	bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
-	bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
-	test_tc_peer__destroy(skel);
-	netns_unload_bpf();
-	setns_root();
+	if (skel)
+		test_tc_peer__destroy(skel);
+	close_netns(nstoken);
 }
 
-void test_tc_redirect(void)
+static int tun_open(char *name)
+{
+	struct ifreq ifr;
+	int fd, err;
+
+	fd = open("/dev/net/tun", O_RDWR);
+	if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+		return -1;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+	if (*name)
+		strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+	err = ioctl(fd, TUNSETIFF, &ifr);
+	if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+		goto fail;
+
+	SYS("ip link set dev %s up", name);
+
+	return fd;
+fail:
+	close(fd);
+	return -1;
+}
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+enum {
+	SRC_TO_TARGET = 0,
+	TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
 {
-	struct netns_setup_result setup_result;
+	fd_set rfds, wfds;
 
-	root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
-	if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+	FD_ZERO(&rfds);
+	FD_ZERO(&wfds);
+
+	for (;;) {
+		char buf[1500];
+		int direction, nread, nwrite;
+
+		FD_SET(src_fd, &rfds);
+		FD_SET(target_fd, &rfds);
+
+		if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+			log_err("select failed");
+			return 1;
+		}
+
+		direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+		nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+		if (nread < 0) {
+			log_err("read failed");
+			return 1;
+		}
+
+		nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+		if (nwrite != nread) {
+			log_err("write failed");
+			return 1;
+		}
+	}
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+	struct test_tc_peer *skel = NULL;
+	struct nstoken *nstoken = NULL;
+	int err;
+	int tunnel_pid = -1;
+	int src_fd, target_fd;
+	int ifindex;
+
+	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+	 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+	 * expose the L2 headers encapsulating the IP packet to BPF and hence
+	 * don't have skb in suitable state for this test. Alternative to TUN/TAP
+	 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+	 * but that requires much more complicated setup.
+	 */
+	nstoken = open_netns(NS_SRC);
+	if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
 		return;
 
-	if (netns_setup_namespaces("add"))
-		goto done;
+	src_fd = tun_open("tun_src");
+	if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+		goto fail;
 
-	if (netns_setup_links_and_routes(&setup_result))
-		goto done;
+	close_netns(nstoken);
+
+	nstoken = open_netns(NS_FWD);
+	if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+		goto fail;
 
-	if (test__start_subtest("tc_redirect_peer"))
-		test_tc_redirect_peer(&setup_result);
+	target_fd = tun_open("tun_fwd");
+	if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+		goto fail;
 
-	if (test__start_subtest("tc_redirect_neigh"))
-		test_tc_redirect_neigh(&setup_result);
+	tunnel_pid = fork();
+	if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+		goto fail;
 
-	if (test__start_subtest("tc_redirect_neigh_fib"))
-		test_tc_redirect_neigh_fib(&setup_result);
+	if (tunnel_pid == 0)
+		exit(tun_relay_loop(src_fd, target_fd));
 
-done:
-	close(root_netns_fd);
-	netns_setup_namespaces("delete");
+	skel = test_tc_peer__open();
+	if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+		goto fail;
+
+	ifindex = get_ifindex("tun_fwd");
+	if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
+		goto fail;
+
+	skel->rodata->IFINDEX_SRC = ifindex;
+	skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+
+	err = test_tc_peer__load(skel);
+	if (!ASSERT_OK(err, "test_tc_peer__load"))
+		goto fail;
+
+	err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
+		goto fail;
+
+	err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
+		goto fail;
+
+	err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
+	if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
+		goto fail;
+
+	/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+	 * towards dst, and "tc_dst" to redirect packets
+	 * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+	 */
+	SYS("tc qdisc add dev tun_fwd clsact");
+	SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
+	    SRC_PROG_PIN_FILE);
+
+	SYS("tc qdisc add dev veth_dst_fwd clsact");
+	SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
+	    DST_PROG_PIN_FILE);
+	SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
+	    CHK_PROG_PIN_FILE);
+
+	/* Setup route and neigh tables */
+	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+	SYS("ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+	SYS("ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+	SYS("ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+	SYS("ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+	    " dev tun_src scope global");
+	SYS("ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
+	SYS("ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+	SYS("ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+	    " dev tun_src scope global");
+	SYS("ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+
+	SYS("ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+	SYS("ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+
+	if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+		goto fail;
+
+	test_connectivity();
+
+fail:
+	if (tunnel_pid > 0) {
+		kill(tunnel_pid, SIGTERM);
+		waitpid(tunnel_pid, NULL, 0);
+	}
+	if (src_fd >= 0)
+		close(src_fd);
+	if (target_fd >= 0)
+		close(target_fd);
+	if (skel)
+		test_tc_peer__destroy(skel);
+	if (nstoken)
+		close_netns(nstoken);
+}
+
+#define RUN_TEST(name)                                                                      \
+	({                                                                                  \
+		struct netns_setup_result setup_result;                                     \
+		if (test__start_subtest(#name))                                             \
+			if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+				if (ASSERT_OK(netns_setup_links_and_routes(&setup_result),  \
+					      "setup links and routes"))                    \
+					test_ ## name(&setup_result);                       \
+				netns_setup_namespaces("delete");                           \
+			}                                                                   \
+	})
+
+static void *test_tc_redirect_run_tests(void *arg)
+{
+	RUN_TEST(tc_redirect_peer);
+	RUN_TEST(tc_redirect_peer_l3);
+	RUN_TEST(tc_redirect_neigh);
+	RUN_TEST(tc_redirect_neigh_fib);
+	return NULL;
+}
+
+void test_tc_redirect(void)
+{
+	pthread_t test_thread;
+	int err;
+
+	/* Run the tests in their own thread to isolate the namespace changes
+	 * so they do not affect the environment of other tests.
+	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+	 */
+	err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
+	if (ASSERT_OK(err, "pthread_create"))
+		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
 }
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index ef264bced0e65..fe818cd5f0109 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -5,12 +5,17 @@
 #include <linux/bpf.h>
 #include <linux/stddef.h>
 #include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
 
 #include <bpf/bpf_helpers.h>
 
 volatile const __u32 IFINDEX_SRC;
 volatile const __u32 IFINDEX_DST;
 
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
 SEC("classifier/chk_egress")
 int tc_chk(struct __sk_buff *skb)
 {
@@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb)
 	return bpf_redirect_peer(IFINDEX_DST, 0);
 }
 
+SEC("classifier/dst_ingress_l3")
+int tc_dst_l3(struct __sk_buff *skb)
+{
+	return bpf_redirect(IFINDEX_SRC, 0);
+}
+
+SEC("classifier/src_ingress_l3")
+int tc_src_l3(struct __sk_buff *skb)
+{
+	__u16 proto = skb->protocol;
+
+	if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+		return TC_ACT_SHOT;
+
+	if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+		return TC_ACT_SHOT;
+
+	return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
 char __license[] SEC("license") = "GPL";
-- 
GitLab


From b2db6c35ba986ebe1ddd6b65f21a810346299d7f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 25 May 2021 15:40:22 +0100
Subject: [PATCH 1529/3804] afs: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple
warnings by explicitly adding multiple fallthrough pseudo-keywords in
places where the code is intended to fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: linux-afs@lists.infradead.org
cc: linux-hardening@vger.kernel.org
Link: https://lore.kernel.org/r/51150b54e0b0431a2c401cd54f2c4e7f50e94601.1605896059.git.gustavoars@kernel.org/ # v1
Link: https://lore.kernel.org/r/20210420211615.GA51432@embeddedor/ # v2
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/cmservice.c | 5 +++++
 fs/afs/fsclient.c  | 4 ++++
 fs/afs/vlclient.c  | 1 +
 3 files changed, 10 insertions(+)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index a4e9e6e07e939..d3c6bb22c5f48 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -322,6 +322,8 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 			return ret;
 
 		call->unmarshall++;
+		fallthrough;
+
 	case 5:
 		break;
 	}
@@ -418,6 +420,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 			r->node[loop] = ntohl(b[loop + 5]);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 2:
 		break;
@@ -530,6 +533,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 			r->node[loop] = ntohl(b[loop + 5]);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 2:
 		break;
@@ -663,6 +667,7 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
 
 		afs_extract_to_tmp(call);
 		call->unmarshall++;
+		fallthrough;
 
 	case 3:
 		break;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2f695a2604425..dd3f45d906d23 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -388,6 +388,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		req->file_size = vp->scb.status.size;
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 5:
 		break;
@@ -1408,6 +1409,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		_debug("motd '%s'", p);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 8:
 		break;
@@ -1845,6 +1847,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 6:
 		break;
@@ -1979,6 +1982,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
 		xdr_decode_AFSVolSync(&bp, &op->volsync);
 
 		call->unmarshall++;
+		fallthrough;
 
 	case 4:
 		break;
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index dc9327332f069..00fca3c66ba61 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -593,6 +593,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		if (ret < 0)
 			return ret;
 		call->unmarshall = 6;
+		fallthrough;
 
 	case 6:
 		break;
-- 
GitLab


From 7d7b720a4b8049446cffce870b1dd3ffa89d4b40 Mon Sep 17 00:00:00 2001
From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>
Date: Mon, 10 May 2021 12:00:26 +0100
Subject: [PATCH 1530/3804] arm64: Implement stack trace termination record

Reliable stacktracing requires that we identify when a stacktrace is
terminated early. We can do this by ensuring all tasks have a final
frame record at a known location on their task stack, and checking
that this is the final frame record in the chain.

We'd like to use task_pt_regs(task)->stackframe as the final frame
record, as this is already setup upon exception entry from EL0. For
kernel tasks we need to consistently reserve the pt_regs and point x29
at this, which we can do with small changes to __primary_switched,
__secondary_switched, and copy_process().

Since the final frame record must be at a specific location, we must
create the final frame record in __primary_switched and
__secondary_switched rather than leaving this to start_kernel and
secondary_start_kernel. Thus, __primary_switched and
__secondary_switched will now show up in stacktraces for the idle tasks.

Since the final frame record is now identified by its location rather
than by its contents, we identify it at the start of unwind_frame(),
before we read any values from it.

External debuggers may terminate the stack trace when FP == 0. In the
pt_regs->stackframe, the PC is 0 as well. So, stack traces taken in the
debugger may print an extra record 0x0 at the end. While this is not
pretty, this does not do any harm. This is a small price to pay for
having reliable stack trace termination in the kernel. That said, gdb
does not show the extra record probably because it uses DWARF and not
frame pointers for stack traces.

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
[Mark: rebase, use ASM_BUG(), update comments, update commit message]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210510110026.18061-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry.S      |  2 +-
 arch/arm64/kernel/head.S       | 25 +++++++++++++++++++------
 arch/arm64/kernel/process.c    |  5 +++++
 arch/arm64/kernel/stacktrace.c | 16 +++++++---------
 4 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 3513984a88bd1..294f24e16feeb 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -285,7 +285,7 @@ alternative_else_nop_endif
 	stp	lr, x21, [sp, #S_LR]
 
 	/*
-	 * For exceptions from EL0, create a terminal frame record.
+	 * For exceptions from EL0, create a final frame record.
 	 * For exceptions from EL1, create a synthetic frame record so the
 	 * interrupted code shows up in the backtrace.
 	 */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 96873dfa67fd5..cc2d45d54838f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -16,6 +16,7 @@
 #include <asm/asm_pointer_auth.h>
 #include <asm/assembler.h>
 #include <asm/boot.h>
+#include <asm/bug.h>
 #include <asm/ptrace.h>
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
@@ -393,6 +394,18 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 	ret	x28
 SYM_FUNC_END(__create_page_tables)
 
+	/*
+	 * Create a final frame record at task_pt_regs(current)->stackframe, so
+	 * that the unwinder can identify the final frame record of any task by
+	 * its location in the task stack. We reserve the entire pt_regs space
+	 * for consistency with user tasks and kthreads.
+	 */
+	.macro setup_final_frame
+	sub	sp, sp, #PT_REGS_SIZE
+	stp	xzr, xzr, [sp, #S_STACKFRAME]
+	add	x29, sp, #S_STACKFRAME
+	.endm
+
 /*
  * The following fragment of code is executed with the MMU enabled.
  *
@@ -447,9 +460,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 #endif
 	bl	switch_to_vhe			// Prefer VHE if possible
 	add	sp, sp, #16
-	mov	x29, #0
-	mov	x30, #0
-	b	start_kernel
+	setup_final_frame
+	bl	start_kernel
+	ASM_BUG()
 SYM_FUNC_END(__primary_switched)
 
 	.pushsection ".rodata", "a"
@@ -639,14 +652,14 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	cbz	x2, __secondary_too_slow
 	msr	sp_el0, x2
 	scs_load x2, x3
-	mov	x29, #0
-	mov	x30, #0
+	setup_final_frame
 
 #ifdef CONFIG_ARM64_PTR_AUTH
 	ptrauth_keys_init_cpu x2, x3, x4, x5
 #endif
 
-	b	secondary_start_kernel
+	bl	secondary_start_kernel
+	ASM_BUG()
 SYM_FUNC_END(__secondary_switched)
 
 SYM_FUNC_START_LOCAL(__secondary_too_slow)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b4bb67f17a2ca..8928fba54e4bd 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -435,6 +435,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	}
 	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
 	p->thread.cpu_context.sp = (unsigned long)childregs;
+	/*
+	 * For the benefit of the unwinder, set up childregs->stackframe
+	 * as the final frame for the new task.
+	 */
+	p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
 
 	ptrace_hw_copy_thread(p);
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index de07147a79260..36cf05d5eb9ee 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -68,12 +68,16 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	unsigned long fp = frame->fp;
 	struct stack_info info;
 
-	if (fp & 0xf)
-		return -EINVAL;
-
 	if (!tsk)
 		tsk = current;
 
+	/* Final frame; nothing to unwind */
+	if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
+		return -ENOENT;
+
+	if (fp & 0xf)
+		return -EINVAL;
+
 	if (!on_accessible_stack(tsk, fp, &info))
 		return -EINVAL;
 
@@ -128,12 +132,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 
 	frame->pc = ptrauth_strip_insn_pac(frame->pc);
 
-	/*
-	 * This is a terminal record, so we have finished unwinding.
-	 */
-	if (!frame->fp && !frame->pc)
-		return -ENOENT;
-
 	return 0;
 }
 NOKPROBE_SYMBOL(unwind_frame);
-- 
GitLab


From 7e04cc918954f9090952e8d17cb2c3c4a5ad055e Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 10 May 2021 17:52:06 +0530
Subject: [PATCH 1531/3804] arm64/mm: Validate CONFIG_PGTABLE_LEVELS

CONFIG_PGTABLE_LEVELS has been statically defined in (arch/arm64/Kconfig)
depending on the page size and requested virtual address range. In order to
validate this page table levels selection this adds a BUILD_BUG_ON() as per
the existing formula ARM64_HW_PGTABLE_LEVELS(). This would help protect any
inadvertent changes to CONFIG_PGTABLE_LEVELS selection.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/1620649326-24115-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/init.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index e55409caaee34..6e1ca044ca907 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -499,6 +499,13 @@ void __init mem_init(void)
 	BUILD_BUG_ON(TASK_SIZE_32 > DEFAULT_MAP_WINDOW_64);
 #endif
 
+	/*
+	 * Selected page table levels should match when derived from
+	 * scratch using the virtual address range and page size.
+	 */
+	BUILD_BUG_ON(ARM64_HW_PGTABLE_LEVELS(CONFIG_ARM64_VA_BITS) !=
+		     CONFIG_PGTABLE_LEVELS);
+
 	if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
 		extern int sysctl_overcommit_memory;
 		/*
-- 
GitLab


From bf2367aaed73f06a43c0be3c61dafdc59f986161 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 11 May 2021 14:42:44 +0800
Subject: [PATCH 1532/3804] drivers/perf: Remove redundant dev_err call in
 tx2_uncore_pmu_init_dev()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Link: https://lore.kernel.org/r/1620715364-107460-1-git-send-email-zou_wei@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/thunderx2_pmu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index 06a6d569b0b56..fc1a376ee906e 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -817,10 +817,8 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
 	}
 
 	base = devm_ioremap_resource(dev, &res);
-	if (IS_ERR(base)) {
-		dev_err(dev, "PMU type %d: Fail to map resource\n", type);
+	if (IS_ERR(base))
 		return NULL;
-	}
 
 	tx2_pmu = devm_kzalloc(dev, sizeof(*tx2_pmu), GFP_KERNEL);
 	if (!tx2_pmu)
-- 
GitLab


From 27e4482075718997b366e19eaa81aeb7e42e1df3 Mon Sep 17 00:00:00 2001
From: Junhao He <hejunhao2@hisilicon.com>
Date: Tue, 11 May 2021 20:27:31 +0800
Subject: [PATCH 1533/3804] drivers/perf: arm_spe_pmu: Fix some coding style
 issues

Fix some coding style issues reported by checkpatch.pl, including
following types:

WARNING: void function return statements are not generally useful
WARNING: Possible unnecessary 'out of memory' message

Signed-off-by: Junhao He <hejunhao2@hisilicon.com>
Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1620736054-58412-2-git-send-email-f.fangjian@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_spe_pmu.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 8a1e86ab2d8e4..e3711cb4c1b55 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -1044,7 +1044,6 @@ static void __arm_spe_pmu_dev_probe(void *info)
 		 spe_pmu->max_record_sz, spe_pmu->align, spe_pmu->features);
 
 	spe_pmu->features |= SPE_PMU_FEAT_DEV_PROBED;
-	return;
 }
 
 static void __arm_spe_pmu_reset_local(void)
@@ -1190,10 +1189,8 @@ static int arm_spe_pmu_device_probe(struct platform_device *pdev)
 	}
 
 	spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL);
-	if (!spe_pmu) {
-		dev_err(dev, "failed to allocate spe_pmu\n");
+	if (!spe_pmu)
 		return -ENOMEM;
-	}
 
 	spe_pmu->handle = alloc_percpu(typeof(*spe_pmu->handle));
 	if (!spe_pmu->handle)
-- 
GitLab


From f265fd166bce9837ce1ae6c2a4b56f8bd18d1fe4 Mon Sep 17 00:00:00 2001
From: Junhao He <hejunhao2@hisilicon.com>
Date: Tue, 11 May 2021 20:27:32 +0800
Subject: [PATCH 1534/3804] drivers/perf: arm_pmu: Fix some coding style issues

Fix some coding style issues reported by checkpatch.pl, including
following types:

ERROR: spaces required around that '=' (ctx:VxW)
WARNING: Possible unnecessary 'out of memory' message

Signed-off-by: Junhao He <hejunhao2@hisilicon.com>
Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1620736054-58412-3-git-send-email-f.fangjian@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index d4f7f1f9cc77d..e57b348c1628b 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -670,7 +670,7 @@ int armpmu_request_irq(int irq, int cpu)
 						 &cpu_armpmu);
 			irq_ops = &percpu_pmuirq_ops;
 		} else {
-			has_nmi= true;
+			has_nmi = true;
 			irq_ops = &percpu_pmunmi_ops;
 		}
 	} else {
@@ -869,10 +869,8 @@ static struct arm_pmu *__armpmu_alloc(gfp_t flags)
 	int cpu;
 
 	pmu = kzalloc(sizeof(*pmu), flags);
-	if (!pmu) {
-		pr_info("failed to allocate PMU device!\n");
+	if (!pmu)
 		goto out;
-	}
 
 	pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
 	if (!pmu->hw_events) {
-- 
GitLab


From a9f00c9760febb84215bcb489855b5b23e3ab4dc Mon Sep 17 00:00:00 2001
From: Junhao He <hejunhao2@hisilicon.com>
Date: Tue, 11 May 2021 20:27:33 +0800
Subject: [PATCH 1535/3804] drivers/perf: arm-cmn: Add space after ','

Fix a warning from checkpatch.pl.

ERROR: space required after that ',' (ctx:VxV)

Signed-off-by: Junhao He <hejunhao2@hisilicon.com>
Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1620736054-58412-4-git-send-email-f.fangjian@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 9417e9c5bcb34..4f46f654279d3 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -31,7 +31,7 @@
 #define CMN_CI_CHILD_COUNT		GENMASK_ULL(15, 0)
 #define CMN_CI_CHILD_PTR_OFFSET		GENMASK_ULL(31, 16)
 
-#define CMN_CHILD_NODE_ADDR		GENMASK(27,0)
+#define CMN_CHILD_NODE_ADDR		GENMASK(27, 0)
 #define CMN_CHILD_NODE_EXTERNAL		BIT(31)
 
 #define CMN_ADDR_NODE_PTR		GENMASK(27, 14)
-- 
GitLab


From eb2b22f024c3615d576cead56f2a7d2c90355716 Mon Sep 17 00:00:00 2001
From: Junhao He <hejunhao2@hisilicon.com>
Date: Tue, 11 May 2021 20:27:34 +0800
Subject: [PATCH 1536/3804] drivers/perf: arm-cci: Fix checkpatch spacing error

Fix some coding style issues reported by checkpatch.pl, including
following types:

ERROR: need consistent spacing around '-' (ctx:WxV)
ERROR: space required before the open parenthesis '('

Signed-off-by: Junhao He <hejunhao2@hisilicon.com>
Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1620736054-58412-5-git-send-email-f.fangjian@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 666d8a9b557fc..54aca3a628147 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -37,7 +37,7 @@
 
 #define CCI_PMU_CNTR_SIZE(model)	((model)->cntr_size)
 #define CCI_PMU_CNTR_BASE(model, idx)	((idx) * CCI_PMU_CNTR_SIZE(model))
-#define CCI_PMU_CNTR_MASK		((1ULL << 32) -1)
+#define CCI_PMU_CNTR_MASK		((1ULL << 32) - 1)
 #define CCI_PMU_CNTR_LAST(cci_pmu)	(cci_pmu->num_cntrs - 1)
 
 #define CCI_PMU_MAX_HW_CNTRS(model) \
@@ -806,7 +806,7 @@ static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *ev
 		return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
 
 	/* Generic code to find an unused idx from the mask */
-	for(idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
+	for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
 		if (!test_and_set_bit(idx, hw->used_mask))
 			return idx;
 
-- 
GitLab


From e377ab82311af95c99648c6424a6b888a0ccb102 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 10 May 2021 16:37:51 +0530
Subject: [PATCH 1537/3804] arm64/mm: Remove [PUD|PMD]_TABLE_BIT from
 [pud|pmd]_bad()

Semantics wise, [pud|pmd]_bad() have always implied that a given [PUD|PMD]
entry does not have a pointer to the next level page table. This had been
made clear in the commit a1c76574f345 ("arm64: mm: use *_sect to check for
section maps"). Hence explicitly check for a table entry rather than just
testing a single bit. This basically redefines [pud|pmd]_bad() in terms of
[pud|pmd]_table() making the semantics clear.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/1620644871-26280-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/pgtable.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0b10204e72fcb..11e60d0cd9b69 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -511,13 +511,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 #define pmd_none(pmd)		(!pmd_val(pmd))
 
-#define pmd_bad(pmd)		(!(pmd_val(pmd) & PMD_TABLE_BIT))
-
 #define pmd_table(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
 				 PMD_TYPE_TABLE)
 #define pmd_sect(pmd)		((pmd_val(pmd) & PMD_TYPE_MASK) == \
 				 PMD_TYPE_SECT)
 #define pmd_leaf(pmd)		pmd_sect(pmd)
+#define pmd_bad(pmd)		(!pmd_table(pmd))
 
 #define pmd_leaf_size(pmd)	(pmd_cont(pmd) ? CONT_PMD_SIZE : PMD_SIZE)
 #define pte_leaf_size(pte)	(pte_cont(pte) ? CONT_PTE_SIZE : PAGE_SIZE)
@@ -604,7 +603,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 	pr_err("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
 
 #define pud_none(pud)		(!pud_val(pud))
-#define pud_bad(pud)		(!(pud_val(pud) & PUD_TABLE_BIT))
+#define pud_bad(pud)		(!pud_table(pud))
 #define pud_present(pud)	pte_present(pud_pte(pud))
 #define pud_leaf(pud)		pud_sect(pud)
 #define pud_valid(pud)		pte_valid(pud_pte(pud))
-- 
GitLab


From ca940790d2ddc91e976f1e9e685052a54a1c50cf Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 12 May 2021 17:23:50 +0100
Subject: [PATCH 1538/3804] arm64: Document requirement for access to FEAT_HCX

v8.7 of the architecture introduced FEAT_HCX which adds an additional
hypervisor configuration register HCRX_EL2. Even though Linux does not
currently make use of this feature let's document that the EL3 trap for
access to the register should be disabled so that we are able to make
use of it in future.

Signed-off-by: Mark Brown <broonie@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210512162350.20349-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arm64/booting.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index 18b8cc1bf32c6..a9192e7a231bf 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -277,6 +277,12 @@ Before jumping into the kernel, the following conditions must be met:
 
     - SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
 
+  For CPUs with support for HCRX_EL2 (FEAT_HCX) present:
+
+  - If EL3 is present and the kernel is entered at EL2:
+
+    - SCR_EL3.HXEn (bit 38) must be initialised to 0b1.
+
   For CPUs with Advanced SIMD and floating point support:
 
   - If EL3 is present:
-- 
GitLab


From 63ebdb77afa96068ac570e87643eb4cd5b3e31c3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 18 May 2021 17:33:31 +0100
Subject: [PATCH 1539/3804] kselftest/arm64: Add missing newline to SVE test
 skipping output

The newline is expected to come from the caller but got missed for this
test.

Signed-off-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210518163331.38268-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 tools/testing/selftests/arm64/fp/sve-probe-vls.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index b29cbc642c574..76e138525d550 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -25,7 +25,7 @@ int main(int argc, char **argv)
 	ksft_set_plan(2);
 
 	if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
-		ksft_exit_skip("SVE not available");
+		ksft_exit_skip("SVE not available\n");
 
 	/*
 	 * Enumerate up to SVE_VQ_MAX vector lengths
-- 
GitLab


From 3d0cca0b02ac98eac9157b26cf3951997db68b37 Mon Sep 17 00:00:00 2001
From: Evgenii Stepanov <eugenis@google.com>
Date: Thu, 20 May 2021 18:00:23 -0700
Subject: [PATCH 1540/3804] kasan: speed up mte_set_mem_tag_range

Use DC GVA / DC GZVA to speed up KASan memory tagging in HW tags mode.

The first cacheline is always tagged using STG/STZG even if the address is
cacheline-aligned, as benchmarks show it is faster than a conditional
branch.

Signed-off-by: Evgenii Stepanov <eugenis@google.com>
Co-developed-by: Peter Collingbourne <pcc@google.com>
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210521010023.3244784-1-eugenis@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte-kasan.h | 93 +++++++++++++++++++++---------
 1 file changed, 67 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index ddd4d17cf9a07..d952352bd0088 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -48,43 +48,84 @@ static inline u8 mte_get_random_tag(void)
 	return mte_get_ptr_tag(addr);
 }
 
+static inline u64 __stg_post(u64 p)
+{
+	asm volatile(__MTE_PREAMBLE "stg %0, [%0], #16"
+		     : "+r"(p)
+		     :
+		     : "memory");
+	return p;
+}
+
+static inline u64 __stzg_post(u64 p)
+{
+	asm volatile(__MTE_PREAMBLE "stzg %0, [%0], #16"
+		     : "+r"(p)
+		     :
+		     : "memory");
+	return p;
+}
+
+static inline void __dc_gva(u64 p)
+{
+	asm volatile(__MTE_PREAMBLE "dc gva, %0" : : "r"(p) : "memory");
+}
+
+static inline void __dc_gzva(u64 p)
+{
+	asm volatile(__MTE_PREAMBLE "dc gzva, %0" : : "r"(p) : "memory");
+}
+
 /*
  * Assign allocation tags for a region of memory based on the pointer tag.
  * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
- * size must be non-zero and MTE_GRANULE_SIZE aligned.
+ * size must be MTE_GRANULE_SIZE aligned.
  */
-static inline void mte_set_mem_tag_range(void *addr, size_t size,
-						u8 tag, bool init)
+static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag,
+					 bool init)
 {
-	u64 curr, end;
+	u64 curr, mask, dczid_bs, end1, end2, end3;
 
-	if (!size)
-		return;
+	/* Read DC G(Z)VA block size from the system register. */
+	dczid_bs = 4ul << (read_cpuid(DCZID_EL0) & 0xf);
 
 	curr = (u64)__tag_set(addr, tag);
-	end = curr + size;
+	mask = dczid_bs - 1;
+	/* STG/STZG up to the end of the first block. */
+	end1 = curr | mask;
+	end3 = curr + size;
+	/* DC GVA / GZVA in [end1, end2) */
+	end2 = end3 & ~mask;
 
 	/*
-	 * 'asm volatile' is required to prevent the compiler to move
-	 * the statement outside of the loop.
+	 * The following code uses STG on the first DC GVA block even if the
+	 * start address is aligned - it appears to be faster than an alignment
+	 * check + conditional branch. Also, if the range size is at least 2 DC
+	 * GVA blocks, the first two loops can use post-condition to save one
+	 * branch each.
 	 */
-	if (init) {
-		do {
-			asm volatile(__MTE_PREAMBLE "stzg %0, [%0]"
-				     :
-				     : "r" (curr)
-				     : "memory");
-			curr += MTE_GRANULE_SIZE;
-		} while (curr != end);
-	} else {
-		do {
-			asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
-				     :
-				     : "r" (curr)
-				     : "memory");
-			curr += MTE_GRANULE_SIZE;
-		} while (curr != end);
-	}
+#define SET_MEMTAG_RANGE(stg_post, dc_gva)		\
+	do {						\
+		if (size >= 2 * dczid_bs) {		\
+			do {				\
+				curr = stg_post(curr);	\
+			} while (curr < end1);		\
+							\
+			do {				\
+				dc_gva(curr);		\
+				curr += dczid_bs;	\
+			} while (curr < end2);		\
+		}					\
+							\
+		while (curr < end3)			\
+			curr = stg_post(curr);		\
+	} while (0)
+
+	if (init)
+		SET_MEMTAG_RANGE(__stzg_post, __dc_gzva);
+	else
+		SET_MEMTAG_RANGE(__stg_post, __dc_gva);
+#undef SET_MEMTAG_RANGE
 }
 
 void mte_enable_kernel_sync(void);
-- 
GitLab


From 40221c737608cf324870c58ef063159c3a6a4c81 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 24 May 2021 13:10:30 +0530
Subject: [PATCH 1541/3804] arm64/mm: Make vmemmap_free() available only with
 CONFIG_MEMORY_HOTPLUG

vmemmap_free() callsites (mm/sparse.c) and declaration (include/linux/mm.h)
are protected with CONFIG_MEMORY_HOTPLUG. This function is not required if
CONFIG_MEMORY_HOTPLUG is not enabled. Hence move the config wrapper outside
the function definition.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/1621842030-23256-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/mmu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 6dd9369e3ea0e..3d34cd127f6b6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1166,16 +1166,17 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 	return 0;
 }
 #endif	/* !ARM64_SWAPPER_USES_SECTION_MAPS */
+
+#ifdef CONFIG_MEMORY_HOTPLUG
 void vmemmap_free(unsigned long start, unsigned long end,
 		struct vmem_altmap *altmap)
 {
-#ifdef CONFIG_MEMORY_HOTPLUG
 	WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
 
 	unmap_hotplug_range(start, end, true, altmap);
 	free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END);
-#endif
 }
+#endif /* CONFIG_MEMORY_HOTPLUG */
 
 static inline pud_t *fixmap_pud(unsigned long addr)
 {
-- 
GitLab


From e89d6cc51034998607502cd3899173bfa7189571 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 24 May 2021 09:29:44 +0100
Subject: [PATCH 1542/3804] arm64: assembler: replace `kaddr` with `addr`

The `__dcache_op_workaround_clean_cache` and `dcache_by_line_op` macros
are only expected to be usedc on kernel memory, without a user fault
fixup, and so we named their address variables `kaddr` to make this
clear.

Subseuqent patches will modify these to also work on user memory with an
(optional) user fault fixup, where `kaddr` won't make as much sense. To
aid the legibility of patches, this patch (only) replaces `kaddr` with
`addr` as a preparatory step.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Cc: Ard Biesheuvel <aedb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-2-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/assembler.h | 32 +++++++++++++++---------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8418c1bd8f044..6a0fbc599196e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -377,47 +377,47 @@ alternative_cb_end
 
 /*
  * Macro to perform a data cache maintenance for the interval
- * [kaddr, kaddr + size)
+ * [addr, addr + size)
  *
  * 	op:		operation passed to dc instruction
  * 	domain:		domain used in dsb instruciton
- * 	kaddr:		starting virtual address of the region
+ * 	addr:		starting virtual address of the region
  * 	size:		size of the region
- * 	Corrupts:	kaddr, size, tmp1, tmp2
+ * 	Corrupts:	addr, size, tmp1, tmp2
  */
-	.macro __dcache_op_workaround_clean_cache, op, kaddr
+	.macro __dcache_op_workaround_clean_cache, op, addr
 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
-	dc	\op, \kaddr
+	dc	\op, \addr
 alternative_else
-	dc	civac, \kaddr
+	dc	civac, \addr
 alternative_endif
 	.endm
 
-	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+	.macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2
 	dcache_line_size \tmp1, \tmp2
-	add	\size, \kaddr, \size
+	add	\size, \addr, \size
 	sub	\tmp2, \tmp1, #1
-	bic	\kaddr, \kaddr, \tmp2
+	bic	\addr, \addr, \tmp2
 9998:
 	.ifc	\op, cvau
-	__dcache_op_workaround_clean_cache \op, \kaddr
+	__dcache_op_workaround_clean_cache \op, \addr
 	.else
 	.ifc	\op, cvac
-	__dcache_op_workaround_clean_cache \op, \kaddr
+	__dcache_op_workaround_clean_cache \op, \addr
 	.else
 	.ifc	\op, cvap
-	sys	3, c7, c12, 1, \kaddr	// dc cvap
+	sys	3, c7, c12, 1, \addr	// dc cvap
 	.else
 	.ifc	\op, cvadp
-	sys	3, c7, c13, 1, \kaddr	// dc cvadp
+	sys	3, c7, c13, 1, \addr	// dc cvadp
 	.else
-	dc	\op, \kaddr
+	dc	\op, \addr
 	.endif
 	.endif
 	.endif
 	.endif
-	add	\kaddr, \kaddr, \tmp1
-	cmp	\kaddr, \size
+	add	\addr, \addr, \tmp1
+	cmp	\addr, \size
 	b.lo	9998b
 	dsb	\domain
 	.endm
-- 
GitLab


From d11b187760f52480dd83bda0429ee3c94e542b1d Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 24 May 2021 09:29:45 +0100
Subject: [PATCH 1543/3804] arm64: assembler: add conditional cache fixups

It would be helpful if we could use both `dcache_by_line_op` and
`invalidate_icache_by_line` for user memory without accidentally fixing
up unexpected faults when performing maintenance on kernel addresses.

Let's make this possible by having both macros take an optional fixup
label, and only generating an extable entry if a label is provided.

At the same time, let's clean up the labels used to be globally unique
using \@ as we do for other macros.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Cc: Ard Biesheuvel <aedb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Fuad Tabba <tabba@google.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-3-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/assembler.h | 39 +++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 6a0fbc599196e..0a276b46ef501 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -130,15 +130,27 @@ alternative_endif
 	.endm
 
 /*
- * Emit an entry into the exception table
+ * Create an exception table entry for `insn`, which will branch to `fixup`
+ * when an unhandled fault is taken.
  */
-	.macro		_asm_extable, from, to
+	.macro		_asm_extable, insn, fixup
 	.pushsection	__ex_table, "a"
 	.align		3
-	.long		(\from - .), (\to - .)
+	.long		(\insn - .), (\fixup - .)
 	.popsection
 	.endm
 
+/*
+ * Create an exception table entry for `insn` if `fixup` is provided. Otherwise
+ * do nothing.
+ */
+	.macro		_cond_extable, insn, fixup
+	.ifnc		\fixup,
+	_asm_extable	\insn, \fixup
+	.endif
+	.endm
+
+
 #define USER(l, x...)				\
 9999:	x;					\
 	_asm_extable	9999b, l
@@ -383,6 +395,7 @@ alternative_cb_end
  * 	domain:		domain used in dsb instruciton
  * 	addr:		starting virtual address of the region
  * 	size:		size of the region
+ * 	fixup:		optional label to branch to on user fault
  * 	Corrupts:	addr, size, tmp1, tmp2
  */
 	.macro __dcache_op_workaround_clean_cache, op, addr
@@ -393,12 +406,12 @@ alternative_else
 alternative_endif
 	.endm
 
-	.macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2
+	.macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2, fixup
 	dcache_line_size \tmp1, \tmp2
 	add	\size, \addr, \size
 	sub	\tmp2, \tmp1, #1
 	bic	\addr, \addr, \tmp2
-9998:
+.Ldcache_op\@:
 	.ifc	\op, cvau
 	__dcache_op_workaround_clean_cache \op, \addr
 	.else
@@ -418,8 +431,10 @@ alternative_endif
 	.endif
 	add	\addr, \addr, \tmp1
 	cmp	\addr, \size
-	b.lo	9998b
+	b.lo	.Ldcache_op\@
 	dsb	\domain
+
+	_cond_extable .Ldcache_op\@, \fixup
 	.endm
 
 /*
@@ -427,20 +442,22 @@ alternative_endif
  * [start, end)
  *
  * 	start, end:	virtual addresses describing the region
- *	label:		A label to branch to on user fault.
+ *	fixup:		optional label to branch to on user fault
  * 	Corrupts:	tmp1, tmp2
  */
-	.macro invalidate_icache_by_line start, end, tmp1, tmp2, label
+	.macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup
 	icache_line_size \tmp1, \tmp2
 	sub	\tmp2, \tmp1, #1
 	bic	\tmp2, \start, \tmp2
-9997:
-USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
+.Licache_op\@:
+	ic	ivau, \tmp2			// invalidate I line PoU
 	add	\tmp2, \tmp2, \tmp1
 	cmp	\tmp2, \end
-	b.lo	9997b
+	b.lo	.Licache_op\@
 	dsb	ish
 	isb
+
+	_cond_extable .Licache_op\@, \fixup
 	.endm
 
 /*
-- 
GitLab


From 46710cf1fcb6235388e8d80619cdf2c196ad554b Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:46 +0100
Subject: [PATCH 1544/3804] arm64: Apply errata to swsusp_arch_suspend_exit

The Arm errata covered by ARM64_WORKAROUND_CLEAN_CACHE require
that "dc cvau" instructions get promoted to "dc civac".

Reported-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-4-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/hibernate-asm.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 8ccca660034e4..0ed2f72a6b943 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -91,7 +91,8 @@ SYM_CODE_START(swsusp_arch_suspend_exit)
 	raw_dcache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x4, x10, x3
-2:	dc	cvau, x4	/* clean D line / unified line */
+2:	/* clean D line / unified line */
+alternative_insn "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
 	add	x4, x4, x2
 	cmp	x4, x1
 	b.lo	2b
-- 
GitLab


From 116b7f559492b719ae4bd22ee773cb7fb046a736 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:47 +0100
Subject: [PATCH 1545/3804] arm64: Do not enable uaccess for flush_icache_range

__flush_icache_range works on kernel addresses, and doesn't need
uaccess. The existing code is a side-effect of its current
implementation with __flush_cache_user_range fallthrough.

Instead of fallthrough to share the code, use a common macro for
the two where the caller specifies an optional fixup label if
user access is needed. If provided, this label would be used to
generate an extable entry.

Simplify the code to use dcache_by_line_op, instead of
replicating much of its functionality.

No functional change intended.
Possible performance impact due to the reduced number of
instructions.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Will Deacon <will@kernel.org>
Reported-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
Link: https://lore.kernel.org/linux-arm-kernel/20210521121846.GB1040@C02TD0UTHF1T.local/
Signed-off-by: Fuad Tabba <tabba@google.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-5-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/cache.S | 57 ++++++++++++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 2d881f34dd9d5..7c54bcbf5a367 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -14,6 +14,34 @@
 #include <asm/alternative.h>
 #include <asm/asm-uaccess.h>
 
+/*
+ *	__flush_cache_range(start,end) [fixup]
+ *
+ *	Ensure that the I and D caches are coherent within specified region.
+ *	This is typically used when code has been written to a memory region,
+ *	and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *	- fixup   - optional label to branch to on user fault
+ */
+.macro	__flush_cache_range, fixup
+alternative_if ARM64_HAS_CACHE_IDC
+	dsb     ishst
+	b       .Ldc_skip_\@
+alternative_else_nop_endif
+	mov     x2, x0
+	sub     x3, x1, x0
+	dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup
+.Ldc_skip_\@:
+alternative_if ARM64_HAS_CACHE_DIC
+	isb
+	b	.Lic_skip_\@
+alternative_else_nop_endif
+	invalidate_icache_by_line x0, x1, x2, x3, \fixup
+.Lic_skip_\@:
+.endm
+
 /*
  *	flush_icache_range(start,end)
  *
@@ -25,7 +53,9 @@
  *	- end     - virtual end address of region
  */
 SYM_FUNC_START(__flush_icache_range)
-	/* FALLTHROUGH */
+	__flush_cache_range
+	ret
+SYM_FUNC_END(__flush_icache_range)
 
 /*
  *	__flush_cache_user_range(start,end)
@@ -39,34 +69,15 @@ SYM_FUNC_START(__flush_icache_range)
  */
 SYM_FUNC_START(__flush_cache_user_range)
 	uaccess_ttbr0_enable x2, x3, x4
-alternative_if ARM64_HAS_CACHE_IDC
-	dsb	ishst
-	b	7f
-alternative_else_nop_endif
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x4, x0, x3
-1:
-user_alt 9f, "dc cvau, x4",  "dc civac, x4",  ARM64_WORKAROUND_CLEAN_CACHE
-	add	x4, x4, x2
-	cmp	x4, x1
-	b.lo	1b
-	dsb	ish
 
-7:
-alternative_if ARM64_HAS_CACHE_DIC
-	isb
-	b	8f
-alternative_else_nop_endif
-	invalidate_icache_by_line x0, x1, x2, x3, 9f
-8:	mov	x0, #0
+	__flush_cache_range 2f
+	mov	x0, xzr
 1:
 	uaccess_ttbr0_disable x1, x2
 	ret
-9:
+2:
 	mov	x0, #-EFAULT
 	b	1b
-SYM_FUNC_END(__flush_icache_range)
 SYM_FUNC_END(__flush_cache_user_range)
 
 /*
-- 
GitLab


From 7908072da535dca52b3a011ed6e1f73534546b59 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:48 +0100
Subject: [PATCH 1546/3804] arm64: Do not enable uaccess for
 invalidate_icache_range

invalidate_icache_range() works on kernel addresses, and doesn't
need uaccess. Remove the code that toggles uaccess_ttbr0_enable,
as well as the code that emits an entry into the exception table
(via the macro invalidate_icache_by_line).

Changes return type of invalidate_icache_range() from int (which
used to indicate a fault) to void, since it doesn't need uaccess
and won't fault. Note that return value was never checked by any
of the callers.

No functional change intended.
Possible performance impact due to the reduced number of
instructions.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-6-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cacheflush.h |  2 +-
 arch/arm64/mm/cache.S               | 11 +----------
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 52e5c16232240..a586afa84172d 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -57,7 +57,7 @@
  *		- size   - region size
  */
 extern void __flush_icache_range(unsigned long start, unsigned long end);
-extern int  invalidate_icache_range(unsigned long start, unsigned long end);
+extern void invalidate_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
 extern void __inval_dcache_area(void *addr, size_t len);
 extern void __clean_dcache_area_poc(void *addr, size_t len);
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 7c54bcbf5a367..14eac9d76d57a 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -90,21 +90,12 @@ SYM_FUNC_END(__flush_cache_user_range)
  */
 SYM_FUNC_START(invalidate_icache_range)
 alternative_if ARM64_HAS_CACHE_DIC
-	mov	x0, xzr
 	isb
 	ret
 alternative_else_nop_endif
 
-	uaccess_ttbr0_enable x2, x3, x4
-
-	invalidate_icache_by_line x0, x1, x2, x3, 2f
-	mov	x0, xzr
-1:
-	uaccess_ttbr0_disable x1, x2
+	invalidate_icache_by_line x0, x1, x2, x3
 	ret
-2:
-	mov	x0, #-EFAULT
-	b	1b
 SYM_FUNC_END(invalidate_icache_range)
 
 /*
-- 
GitLab


From 5e20e3499682c4f1724438d23afcafd473526a54 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:49 +0100
Subject: [PATCH 1547/3804] arm64: Downgrade flush_icache_range to invalidate

Since __flush_dcache_area is called right before,
invalidate_icache_range is sufficient in this case.

Rewrite the comment to better explain the rationale behind the
cache maintenance operations used here.

No functional change intended.
Possible performance impact due to invalidating only the icache
rather than invalidating and cleaning both caches.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-7-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/machine_kexec.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 90a335c744425..a03944fd0cd4e 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -68,10 +68,14 @@ int machine_kexec_post_load(struct kimage *kimage)
 	kimage->arch.kern_reloc = __pa(reloc_code);
 	kexec_image_info(kimage);
 
-	/* Flush the reloc_code in preparation for its execution. */
+	/*
+	 * For execution with the MMU off, reloc_code needs to be cleaned to the
+	 * PoC and invalidated from the I-cache.
+	 */
 	__flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
-	flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code +
-			   arm64_relocate_new_kernel_size);
+	invalidate_icache_range((uintptr_t)reloc_code,
+				(uintptr_t)reloc_code +
+					arm64_relocate_new_kernel_size);
 
 	return 0;
 }
-- 
GitLab


From 55272ecc3ada8ec947bb5e94ee2fcde6cf31e166 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:50 +0100
Subject: [PATCH 1548/3804] arm64: assembler: remove user_alt

user_alt isn't being used anymore. It's also simpler and clearer
to directly use alternative_insn and _cond_extable in-line when
needed.

Reported-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/linux-arm-kernel/20210520125735.GF17233@C02TD0UTHF1T.local/
Signed-off-by: Fuad Tabba <tabba@google.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-8-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/alternative-macros.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 8a078fc662ac5..477703578caa4 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -197,11 +197,6 @@ alternative_endif
 #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)	\
 	alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
 
-.macro user_alt, label, oldinstr, newinstr, cond
-9999:	alternative_insn "\oldinstr", "\newinstr", \cond
-	_asm_extable 9999b, \label
-.endm
-
 #endif  /*  __ASSEMBLY__  */
 
 /*
-- 
GitLab


From 06b7a568ca5e9cb79a0cc4737f498ea90d8fa89d Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:51 +0100
Subject: [PATCH 1549/3804] arm64: Move documentation of dcache_by_line_op

The comment describing the macro dcache_by_line_op is placed
right before the previous macro of the one it describes, which is
a bit confusing. Move it to the macro it describes (dcache_by_line_op).

No functional change intended.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-9-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/assembler.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 0a276b46ef501..ced791124b283 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -387,6 +387,14 @@ alternative_cb_end
 	bfi	\tcr, \tmp0, \pos, #3
 	.endm
 
+	.macro __dcache_op_workaround_clean_cache, op, addr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+	dc	\op, \addr
+alternative_else
+	dc	civac, \addr
+alternative_endif
+	.endm
+
 /*
  * Macro to perform a data cache maintenance for the interval
  * [addr, addr + size)
@@ -398,14 +406,6 @@ alternative_cb_end
  * 	fixup:		optional label to branch to on user fault
  * 	Corrupts:	addr, size, tmp1, tmp2
  */
-	.macro __dcache_op_workaround_clean_cache, op, addr
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
-	dc	\op, \addr
-alternative_else
-	dc	civac, \addr
-alternative_endif
-	.endm
-
 	.macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2, fixup
 	dcache_line_size \tmp1, \tmp2
 	add	\size, \addr, \size
-- 
GitLab


From d044f8141847bee542998a6fd8de2c270fe40e48 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:52 +0100
Subject: [PATCH 1550/3804] arm64: Fix comments to refer to correct function
 __flush_icache_range

Many comments refer to the function flush_icache_range, where the
intent is in fact __flush_icache_range. Fix these comments to
refer to the intended function.

That's probably due to commit 3b8c9f1cdfc506e9 ("arm64: IPI each
CPU after invalidating the I-cache for kernel mappings"), which
renamed flush_icache_range() to __flush_icache_range() and added
a wrapper.

No functional change intended.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-10-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/hibernate-asm.S | 4 ++--
 arch/arm64/mm/cache.S             | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 0ed2f72a6b943..ef2ab7caf8155 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -45,7 +45,7 @@
  * Because this code has to be copied to a 'safe' page, it can't call out to
  * other functions by PC-relative address. Also remember that it may be
  * mid-way through over-writing other functions. For this reason it contains
- * code from flush_icache_range() and uses the copy_page() macro.
+ * code from __flush_icache_range() and uses the copy_page() macro.
  *
  * This 'safe' page is mapped via ttbr0, and executed from there. This function
  * switches to a copy of the linear map in ttbr1, performs the restore, then
@@ -87,7 +87,7 @@ SYM_CODE_START(swsusp_arch_suspend_exit)
 	copy_page	x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
 
 	add	x1, x10, #PAGE_SIZE
-	/* Clean the copied page to PoU - based on flush_icache_range() */
+	/* Clean the copied page to PoU - based on __flush_icache_range() */
 	raw_dcache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x4, x10, x3
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 14eac9d76d57a..910ae8f6a3897 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -43,7 +43,7 @@ alternative_else_nop_endif
 .endm
 
 /*
- *	flush_icache_range(start,end)
+ *	__flush_icache_range(start,end)
  *
  *	Ensure that the I and D caches are coherent within specified region.
  *	This is typically used when code has been written to a memory region,
-- 
GitLab


From e3974adb4ef591e898956083a3dfa6336bb88638 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:53 +0100
Subject: [PATCH 1551/3804] arm64: __inval_dcache_area to take end parameter
 instead of size

To be consistent with other functions with similar names and
functionality in cacheflush.h, cache.S, and cachetlb.rst, change
to specify the range in terms of start and end, as opposed to
start and size.

Because the code is shared with __dma_inv_area, it changes the
parameters for that as well. However, __dma_inv_area is local to
cache.S, so no other users are affected.

No functional change intended.

Reported-by: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-11-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cacheflush.h |  2 +-
 arch/arm64/kernel/head.S            |  5 +----
 arch/arm64/mm/cache.S               | 16 +++++++++-------
 arch/arm64/mm/flush.c               |  2 +-
 4 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index a586afa84172d..1572347068170 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -59,7 +59,7 @@
 extern void __flush_icache_range(unsigned long start, unsigned long end);
 extern void invalidate_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
-extern void __inval_dcache_area(void *addr, size_t len);
+extern void __inval_dcache_area(unsigned long start, unsigned long end);
 extern void __clean_dcache_area_poc(void *addr, size_t len);
 extern void __clean_dcache_area_pop(void *addr, size_t len);
 extern void __clean_dcache_area_pou(void *addr, size_t len);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 96873dfa67fd5..8df0ac8d9123e 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -117,7 +117,7 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
 	dmb	sy				// needed before dc ivac with
 						// MMU off
 
-	mov	x1, #0x20			// 4 x 8 bytes
+	add	x1, x0, #0x20			// 4 x 8 bytes
 	b	__inval_dcache_area		// tail call
 SYM_CODE_END(preserve_boot_args)
 
@@ -268,7 +268,6 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 	 */
 	adrp	x0, init_pg_dir
 	adrp	x1, init_pg_end
-	sub	x1, x1, x0
 	bl	__inval_dcache_area
 
 	/*
@@ -382,12 +381,10 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 
 	adrp	x0, idmap_pg_dir
 	adrp	x1, idmap_pg_end
-	sub	x1, x1, x0
 	bl	__inval_dcache_area
 
 	adrp	x0, init_pg_dir
 	adrp	x1, init_pg_end
-	sub	x1, x1, x0
 	bl	__inval_dcache_area
 
 	ret	x28
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 910ae8f6a3897..03c1a7659ffbe 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -131,25 +131,24 @@ alternative_else_nop_endif
 SYM_FUNC_END(__clean_dcache_area_pou)
 
 /*
- *	__inval_dcache_area(kaddr, size)
+ *	__inval_dcache_area(start, end)
  *
- * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	Ensure that any D-cache lines for the interval [start, end)
  * 	are invalidated. Any partial lines at the ends of the interval are
  *	also cleaned to PoC to prevent data loss.
  *
- *	- kaddr   - kernel address
- *	- size    - size in question
+ *	- start   - kernel start address of region
+ *	- end     - kernel end address of region
  */
 SYM_FUNC_START_LOCAL(__dma_inv_area)
 SYM_FUNC_START_PI(__inval_dcache_area)
 	/* FALLTHROUGH */
 
 /*
- *	__dma_inv_area(start, size)
+ *	__dma_inv_area(start, end)
  *	- start   - virtual start address of region
- *	- size    - size in question
+ *	- end     - virtual end address of region
  */
-	add	x1, x1, x0
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	tst	x1, x3				// end cache line aligned?
@@ -230,8 +229,10 @@ SYM_FUNC_END_PI(__dma_flush_area)
  *	- dir	- DMA direction
  */
 SYM_FUNC_START_PI(__dma_map_area)
+	add	x1, x0, x1
 	cmp	w2, #DMA_FROM_DEVICE
 	b.eq	__dma_inv_area
+	sub	x1, x1, x0
 	b	__dma_clean_area
 SYM_FUNC_END_PI(__dma_map_area)
 
@@ -242,6 +243,7 @@ SYM_FUNC_END_PI(__dma_map_area)
  *	- dir	- DMA direction
  */
 SYM_FUNC_START_PI(__dma_unmap_area)
+	add	x1, x0, x1
 	cmp	w2, #DMA_TO_DEVICE
 	b.ne	__dma_inv_area
 	ret
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 6d44c028d1c9e..be650b573b2a1 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -90,7 +90,7 @@ EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
-	__inval_dcache_area(addr, size);
+	__inval_dcache_area((unsigned long)addr, (unsigned long)addr + size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 #endif
-- 
GitLab


From 163d3f80695e31068c7d32244c9e6d406d5c5c00 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:54 +0100
Subject: [PATCH 1552/3804] arm64: dcache_by_line_op to take end parameter
 instead of size

To be consistent with other functions with similar names and
functionality in cacheflush.h, cache.S, and cachetlb.rst, change
to specify the range in terms of start and end, as opposed to
start and size.

No functional change intended.

Reported-by: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-12-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/assembler.h | 27 +++++++++++++--------------
 arch/arm64/kvm/hyp/nvhe/cache.S    |  1 +
 arch/arm64/mm/cache.S              |  7 ++++++-
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index ced791124b283..c4cecf85dccf5 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -397,40 +397,39 @@ alternative_endif
 
 /*
  * Macro to perform a data cache maintenance for the interval
- * [addr, addr + size)
+ * [start, end)
  *
  * 	op:		operation passed to dc instruction
  * 	domain:		domain used in dsb instruciton
- * 	addr:		starting virtual address of the region
- * 	size:		size of the region
+ * 	start:          starting virtual address of the region
+ * 	end:            end virtual address of the region
  * 	fixup:		optional label to branch to on user fault
- * 	Corrupts:	addr, size, tmp1, tmp2
+ * 	Corrupts:       start, end, tmp1, tmp2
  */
-	.macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2, fixup
+	.macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup
 	dcache_line_size \tmp1, \tmp2
-	add	\size, \addr, \size
 	sub	\tmp2, \tmp1, #1
-	bic	\addr, \addr, \tmp2
+	bic	\start, \start, \tmp2
 .Ldcache_op\@:
 	.ifc	\op, cvau
-	__dcache_op_workaround_clean_cache \op, \addr
+	__dcache_op_workaround_clean_cache \op, \start
 	.else
 	.ifc	\op, cvac
-	__dcache_op_workaround_clean_cache \op, \addr
+	__dcache_op_workaround_clean_cache \op, \start
 	.else
 	.ifc	\op, cvap
-	sys	3, c7, c12, 1, \addr	// dc cvap
+	sys	3, c7, c12, 1, \start	// dc cvap
 	.else
 	.ifc	\op, cvadp
-	sys	3, c7, c13, 1, \addr	// dc cvadp
+	sys	3, c7, c13, 1, \start	// dc cvadp
 	.else
-	dc	\op, \addr
+	dc	\op, \start
 	.endif
 	.endif
 	.endif
 	.endif
-	add	\addr, \addr, \tmp1
-	cmp	\addr, \size
+	add	\start, \start, \tmp1
+	cmp	\start, \end
 	b.lo	.Ldcache_op\@
 	dsb	\domain
 
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 36cef69154281..3bcfa3cac46fe 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -8,6 +8,7 @@
 #include <asm/alternative.h>
 
 SYM_FUNC_START_PI(__flush_dcache_area)
+	add	x1, x0, x1
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
 SYM_FUNC_END_PI(__flush_dcache_area)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 03c1a7659ffbe..fff883f691f2f 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -31,7 +31,7 @@ alternative_if ARM64_HAS_CACHE_IDC
 	b       .Ldc_skip_\@
 alternative_else_nop_endif
 	mov     x2, x0
-	sub     x3, x1, x0
+	mov     x3, x1
 	dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup
 .Ldc_skip_\@:
 alternative_if ARM64_HAS_CACHE_DIC
@@ -108,6 +108,7 @@ SYM_FUNC_END(invalidate_icache_range)
  *	- size    - size in question
  */
 SYM_FUNC_START_PI(__flush_dcache_area)
+	add	x1, x0, x1
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
 SYM_FUNC_END_PI(__flush_dcache_area)
@@ -126,6 +127,7 @@ alternative_if ARM64_HAS_CACHE_IDC
 	dsb	ishst
 	ret
 alternative_else_nop_endif
+	add	x1, x0, x1
 	dcache_by_line_op cvau, ish, x0, x1, x2, x3
 	ret
 SYM_FUNC_END(__clean_dcache_area_pou)
@@ -187,6 +189,7 @@ SYM_FUNC_START_PI(__clean_dcache_area_poc)
  *	- start   - virtual start address of region
  *	- size    - size in question
  */
+	add	x1, x0, x1
 	dcache_by_line_op cvac, sy, x0, x1, x2, x3
 	ret
 SYM_FUNC_END_PI(__clean_dcache_area_poc)
@@ -205,6 +208,7 @@ SYM_FUNC_START_PI(__clean_dcache_area_pop)
 	alternative_if_not ARM64_HAS_DCPOP
 	b	__clean_dcache_area_poc
 	alternative_else_nop_endif
+	add	x1, x0, x1
 	dcache_by_line_op cvap, sy, x0, x1, x2, x3
 	ret
 SYM_FUNC_END_PI(__clean_dcache_area_pop)
@@ -218,6 +222,7 @@ SYM_FUNC_END_PI(__clean_dcache_area_pop)
  *	- size    - size in question
  */
 SYM_FUNC_START_PI(__dma_flush_area)
+	add	x1, x0, x1
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
 SYM_FUNC_END_PI(__dma_flush_area)
-- 
GitLab


From 814b186079cd54d3fe3b6b8ab539cbd44705ef9d Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:55 +0100
Subject: [PATCH 1553/3804] arm64: __flush_dcache_area to take end parameter
 instead of size

To be consistent with other functions with similar names and
functionality in cacheflush.h, cache.S, and cachetlb.rst, change
to specify the range in terms of start and end, as opposed to
start and size.

No functional change intended.

Reported-by: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-13-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/arch_gicv3.h |  3 ++-
 arch/arm64/include/asm/cacheflush.h |  8 ++++----
 arch/arm64/include/asm/efi.h        |  2 +-
 arch/arm64/include/asm/kvm_mmu.h    |  3 ++-
 arch/arm64/kernel/hibernate.c       | 18 +++++++++++-------
 arch/arm64/kernel/idreg-override.c  |  3 ++-
 arch/arm64/kernel/kaslr.c           | 12 +++++++++---
 arch/arm64/kernel/machine_kexec.c   | 20 +++++++++++++-------
 arch/arm64/kernel/smp.c             |  8 ++++++--
 arch/arm64/kernel/smp_spin_table.c  |  7 ++++---
 arch/arm64/kvm/hyp/nvhe/cache.S     |  1 -
 arch/arm64/kvm/hyp/nvhe/setup.c     |  3 ++-
 arch/arm64/kvm/hyp/pgtable.c        | 13 ++++++++++---
 arch/arm64/mm/cache.S               |  9 ++++-----
 14 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 934b9be582d21..ed1cc9d8e6df7 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -124,7 +124,8 @@ static inline u32 gic_read_rpr(void)
 #define gic_read_lpir(c)		readq_relaxed(c)
 #define gic_write_lpir(v, c)		writeq_relaxed(v, c)
 
-#define gic_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+#define gic_flush_dcache_to_poc(a,l)	\
+	__flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l))
 
 #define gits_read_baser(c)		readq_relaxed(c)
 #define gits_write_baser(v, c)		writeq_relaxed(v, c)
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 1572347068170..695f88864784f 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -50,15 +50,15 @@
  *		- start  - virtual start address
  *		- end    - virtual end address
  *
- *	__flush_dcache_area(kaddr, size)
+ *	__flush_dcache_area(start, end)
  *
  *		Ensure that the data held in page is written back.
- *		- kaddr  - page address
- *		- size   - region size
+ *		- start  - virtual start address
+ *		- end    - virtual end address
  */
 extern void __flush_icache_range(unsigned long start, unsigned long end);
 extern void invalidate_icache_range(unsigned long start, unsigned long end);
-extern void __flush_dcache_area(void *addr, size_t len);
+extern void __flush_dcache_area(unsigned long start, unsigned long end);
 extern void __inval_dcache_area(unsigned long start, unsigned long end);
 extern void __clean_dcache_area_poc(void *addr, size_t len);
 extern void __clean_dcache_area_pop(void *addr, size_t len);
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 3578aba9c6080..0ae2397076fd7 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -137,7 +137,7 @@ void efi_virtmap_unload(void);
 
 static inline void efi_capsule_flush_cache_range(void *addr, int size)
 {
-	__flush_dcache_area(addr, size);
+	__flush_dcache_area((unsigned long)addr, (unsigned long)addr + size);
 }
 
 #endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 25ed956f9af15..33293d5855af4 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -180,7 +180,8 @@ static inline void *__kvm_vector_slot2addr(void *base,
 
 struct kvm;
 
-#define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+#define kvm_flush_dcache_to_poc(a,l)	\
+	__flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l))
 
 static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index b1cef371df2b2..b40ddce715073 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -240,8 +240,6 @@ static int create_safe_exec_page(void *src_start, size_t length,
 	return 0;
 }
 
-#define dcache_clean_range(start, end)	__flush_dcache_area(start, (end - start))
-
 #ifdef CONFIG_ARM64_MTE
 
 static DEFINE_XARRAY(mte_pages);
@@ -383,13 +381,18 @@ int swsusp_arch_suspend(void)
 		ret = swsusp_save();
 	} else {
 		/* Clean kernel core startup/idle code to PoC*/
-		dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
-		dcache_clean_range(__idmap_text_start, __idmap_text_end);
+		__flush_dcache_area((unsigned long)__mmuoff_data_start,
+				    (unsigned long)__mmuoff_data_end);
+		__flush_dcache_area((unsigned long)__idmap_text_start,
+				    (unsigned long)__idmap_text_end);
 
 		/* Clean kvm setup code to PoC? */
 		if (el2_reset_needed()) {
-			dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
-			dcache_clean_range(__hyp_text_start, __hyp_text_end);
+			__flush_dcache_area(
+				(unsigned long)__hyp_idmap_text_start,
+				(unsigned long)__hyp_idmap_text_end);
+			__flush_dcache_area((unsigned long)__hyp_text_start,
+					    (unsigned long)__hyp_text_end);
 		}
 
 		swsusp_mte_restore_tags();
@@ -474,7 +477,8 @@ int swsusp_arch_resume(void)
 	 * The hibernate exit text contains a set of el2 vectors, that will
 	 * be executed at el2 with the mmu off in order to reload hyp-stub.
 	 */
-	__flush_dcache_area(hibernate_exit, exit_size);
+	__flush_dcache_area((unsigned long)hibernate_exit,
+			    (unsigned long)hibernate_exit + exit_size);
 
 	/*
 	 * KASLR will cause the el2 vectors to be in a different location in
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index e628c8ce1ffe2..3dd515baf5268 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -237,7 +237,8 @@ asmlinkage void __init init_feature_override(void)
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
 		if (regs[i]->override)
-			__flush_dcache_area(regs[i]->override,
+			__flush_dcache_area((unsigned long)regs[i]->override,
+					    (unsigned long)regs[i]->override +
 					    sizeof(*regs[i]->override));
 	}
 }
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 341342b207f63..49cccd03cb370 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -72,7 +72,9 @@ u64 __init kaslr_early_init(void)
 	 * we end up running with module randomization disabled.
 	 */
 	module_alloc_base = (u64)_etext - MODULES_VSIZE;
-	__flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
+	__flush_dcache_area((unsigned long)&module_alloc_base,
+			    (unsigned long)&module_alloc_base +
+				    sizeof(module_alloc_base));
 
 	/*
 	 * Try to map the FDT early. If this fails, we simply bail,
@@ -170,8 +172,12 @@ u64 __init kaslr_early_init(void)
 	module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
 	module_alloc_base &= PAGE_MASK;
 
-	__flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base));
-	__flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed));
+	__flush_dcache_area((unsigned long)&module_alloc_base,
+			    (unsigned long)&module_alloc_base +
+				    sizeof(module_alloc_base));
+	__flush_dcache_area((unsigned long)&memstart_offset_seed,
+			    (unsigned long)&memstart_offset_seed +
+				    sizeof(memstart_offset_seed));
 
 	return offset;
 }
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index a03944fd0cd4e..3e79110c8f3a8 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -72,7 +72,9 @@ int machine_kexec_post_load(struct kimage *kimage)
 	 * For execution with the MMU off, reloc_code needs to be cleaned to the
 	 * PoC and invalidated from the I-cache.
 	 */
-	__flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size);
+	__flush_dcache_area((unsigned long)reloc_code,
+			    (unsigned long)reloc_code +
+				    arm64_relocate_new_kernel_size);
 	invalidate_icache_range((uintptr_t)reloc_code,
 				(uintptr_t)reloc_code +
 					arm64_relocate_new_kernel_size);
@@ -106,16 +108,18 @@ static void kexec_list_flush(struct kimage *kimage)
 
 	for (entry = &kimage->head; ; entry++) {
 		unsigned int flag;
-		void *addr;
+		unsigned long addr;
 
 		/* flush the list entries. */
-		__flush_dcache_area(entry, sizeof(kimage_entry_t));
+		__flush_dcache_area((unsigned long)entry,
+				    (unsigned long)entry +
+					    sizeof(kimage_entry_t));
 
 		flag = *entry & IND_FLAGS;
 		if (flag == IND_DONE)
 			break;
 
-		addr = phys_to_virt(*entry & PAGE_MASK);
+		addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK);
 
 		switch (flag) {
 		case IND_INDIRECTION:
@@ -124,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage)
 			break;
 		case IND_SOURCE:
 			/* flush the source pages. */
-			__flush_dcache_area(addr, PAGE_SIZE);
+			__flush_dcache_area(addr, addr + PAGE_SIZE);
 			break;
 		case IND_DESTINATION:
 			break;
@@ -151,8 +155,10 @@ static void kexec_segment_flush(const struct kimage *kimage)
 			kimage->segment[i].memsz,
 			kimage->segment[i].memsz /  PAGE_SIZE);
 
-		__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
-			kimage->segment[i].memsz);
+		__flush_dcache_area(
+			(unsigned long)phys_to_virt(kimage->segment[i].mem),
+			(unsigned long)phys_to_virt(kimage->segment[i].mem) +
+				kimage->segment[i].memsz);
 	}
 }
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dcd7041b2b077..5fcdee3310874 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -122,7 +122,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	secondary_data.task = idle;
 	secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
 	update_cpu_boot_status(CPU_MMU_OFF);
-	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
+	__flush_dcache_area((unsigned long)&secondary_data,
+			    (unsigned long)&secondary_data +
+				    sizeof(secondary_data));
 
 	/* Now bring the CPU into our world */
 	ret = boot_secondary(cpu, idle);
@@ -143,7 +145,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	pr_crit("CPU%u: failed to come online\n", cpu);
 	secondary_data.task = NULL;
 	secondary_data.stack = NULL;
-	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
+	__flush_dcache_area((unsigned long)&secondary_data,
+			    (unsigned long)&secondary_data +
+				    sizeof(secondary_data));
 	status = READ_ONCE(secondary_data.status);
 	if (status == CPU_MMU_OFF)
 		status = READ_ONCE(__early_cpu_boot_status);
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index c45a835128057..58d804582a35f 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -36,7 +36,7 @@ static void write_pen_release(u64 val)
 	unsigned long size = sizeof(secondary_holding_pen_release);
 
 	secondary_holding_pen_release = val;
-	__flush_dcache_area(start, size);
+	__flush_dcache_area((unsigned long)start, (unsigned long)start + size);
 }
 
 
@@ -90,8 +90,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
 	 * the boot protocol.
 	 */
 	writeq_relaxed(pa_holding_pen, release_addr);
-	__flush_dcache_area((__force void *)release_addr,
-			    sizeof(*release_addr));
+	__flush_dcache_area((__force unsigned long)release_addr,
+			    (__force unsigned long)release_addr +
+				    sizeof(*release_addr));
 
 	/*
 	 * Send an event to wake up the secondary CPU.
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 3bcfa3cac46fe..36cef69154281 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -8,7 +8,6 @@
 #include <asm/alternative.h>
 
 SYM_FUNC_START_PI(__flush_dcache_area)
-	add	x1, x0, x1
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
 SYM_FUNC_END_PI(__flush_dcache_area)
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 7488f53b0aa2f..5dffe928f2563 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -134,7 +134,8 @@ static void update_nvhe_init_params(void)
 	for (i = 0; i < hyp_nr_cpus; i++) {
 		params = per_cpu_ptr(&kvm_init_params, i);
 		params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd);
-		__flush_dcache_area(params, sizeof(*params));
+		__flush_dcache_area((unsigned long)params,
+				    (unsigned long)params + sizeof(*params));
 	}
 }
 
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index c37c1dc4feafa..10d2f04013d44 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -839,8 +839,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 	stage2_put_pte(ptep, mmu, addr, level, mm_ops);
 
 	if (need_flush) {
-		__flush_dcache_area(kvm_pte_follow(pte, mm_ops),
-				    kvm_granule_size(level));
+		kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
+
+		__flush_dcache_area((unsigned long)pte_follow,
+				    (unsigned long)pte_follow +
+					    kvm_granule_size(level));
 	}
 
 	if (childp)
@@ -988,11 +991,15 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 	struct kvm_pgtable *pgt = arg;
 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
 	kvm_pte_t pte = *ptep;
+	kvm_pte_t *pte_follow;
 
 	if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
 		return 0;
 
-	__flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level));
+	pte_follow = kvm_pte_follow(pte, mm_ops);
+	__flush_dcache_area((unsigned long)pte_follow,
+			    (unsigned long)pte_follow +
+				    kvm_granule_size(level));
 	return 0;
 }
 
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index fff883f691f2f..b2880aeba7ca5 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -99,16 +99,15 @@ alternative_else_nop_endif
 SYM_FUNC_END(invalidate_icache_range)
 
 /*
- *	__flush_dcache_area(kaddr, size)
+ *	__flush_dcache_area(start, end)
  *
- *	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *	Ensure that any D-cache lines for the interval [start, end)
  *	are cleaned and invalidated to the PoC.
  *
- *	- kaddr   - kernel address
- *	- size    - size in question
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
  */
 SYM_FUNC_START_PI(__flush_dcache_area)
-	add	x1, x0, x1
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
 SYM_FUNC_END_PI(__flush_dcache_area)
-- 
GitLab


From 1f42faf1d25de2ae239f322fda8af1c92c20e953 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:56 +0100
Subject: [PATCH 1554/3804] arm64: __clean_dcache_area_poc to take end
 parameter instead of size

To be consistent with other functions with similar names and
functionality in cacheflush.h, cache.S, and cachetlb.rst, change
to specify the range in terms of start and end, as opposed to
start and size.

Because the code is shared with __dma_clean_area, it changes the
parameters for that as well. However, __dma_clean_area is local to
cache.S, so no other users are affected.

No functional change intended.

Reported-by: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-14-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cacheflush.h |  2 +-
 arch/arm64/kernel/efi-entry.S       |  5 +++--
 arch/arm64/mm/cache.S               | 16 +++++++---------
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 695f88864784f..3255878d6f309 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -60,7 +60,7 @@ extern void __flush_icache_range(unsigned long start, unsigned long end);
 extern void invalidate_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(unsigned long start, unsigned long end);
 extern void __inval_dcache_area(unsigned long start, unsigned long end);
-extern void __clean_dcache_area_poc(void *addr, size_t len);
+extern void __clean_dcache_area_poc(unsigned long start, unsigned long end);
 extern void __clean_dcache_area_pop(void *addr, size_t len);
 extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index 0073b24b5d25e..b0f728fb61f01 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -28,6 +28,7 @@ SYM_CODE_START(efi_enter_kernel)
 	 * stale icache entries from before relocation.
 	 */
 	ldr	w1, =kernel_size
+	add	x1, x0, x1
 	bl	__clean_dcache_area_poc
 	ic	ialluis
 
@@ -36,7 +37,7 @@ SYM_CODE_START(efi_enter_kernel)
 	 * so that we can safely disable the MMU and caches.
 	 */
 	adr	x0, 0f
-	ldr	w1, 3f
+	adr	x1, 3f
 	bl	__clean_dcache_area_poc
 0:
 	/* Turn off Dcache and MMU */
@@ -64,5 +65,5 @@ SYM_CODE_START(efi_enter_kernel)
 	mov	x2, xzr
 	mov	x3, xzr
 	br	x19
+3:
 SYM_CODE_END(efi_enter_kernel)
-3:	.long	. - 0b
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index b2880aeba7ca5..e2e2740c55cea 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -171,24 +171,23 @@ SYM_FUNC_END_PI(__inval_dcache_area)
 SYM_FUNC_END(__dma_inv_area)
 
 /*
- *	__clean_dcache_area_poc(kaddr, size)
+ *	__clean_dcache_area_poc(start, end)
  *
- * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	Ensure that any D-cache lines for the interval [start, end)
  * 	are cleaned to the PoC.
  *
- *	- kaddr   - kernel address
- *	- size    - size in question
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
  */
 SYM_FUNC_START_LOCAL(__dma_clean_area)
 SYM_FUNC_START_PI(__clean_dcache_area_poc)
 	/* FALLTHROUGH */
 
 /*
- *	__dma_clean_area(start, size)
+ *	__dma_clean_area(start, end)
  *	- start   - virtual start address of region
- *	- size    - size in question
+ *	- end     - virtual end address of region
  */
-	add	x1, x0, x1
 	dcache_by_line_op cvac, sy, x0, x1, x2, x3
 	ret
 SYM_FUNC_END_PI(__clean_dcache_area_poc)
@@ -204,10 +203,10 @@ SYM_FUNC_END(__dma_clean_area)
  *	- size    - size in question
  */
 SYM_FUNC_START_PI(__clean_dcache_area_pop)
+	add	x1, x0, x1
 	alternative_if_not ARM64_HAS_DCPOP
 	b	__clean_dcache_area_poc
 	alternative_else_nop_endif
-	add	x1, x0, x1
 	dcache_by_line_op cvap, sy, x0, x1, x2, x3
 	ret
 SYM_FUNC_END_PI(__clean_dcache_area_pop)
@@ -236,7 +235,6 @@ SYM_FUNC_START_PI(__dma_map_area)
 	add	x1, x0, x1
 	cmp	w2, #DMA_FROM_DEVICE
 	b.eq	__dma_inv_area
-	sub	x1, x1, x0
 	b	__dma_clean_area
 SYM_FUNC_END_PI(__dma_map_area)
 
-- 
GitLab


From f749448edb9c98bece0aeec5536260a8794af24b Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:57 +0100
Subject: [PATCH 1555/3804] arm64: __clean_dcache_area_pop to take end
 parameter instead of size

To be consistent with other functions with similar names and
functionality in cacheflush.h, cache.S, and cachetlb.rst, change
to specify the range in terms of start and end, as opposed to
start and size.

No functional change intended.

Reported-by: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-15-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cacheflush.h | 2 +-
 arch/arm64/lib/uaccess_flushcache.c | 4 ++--
 arch/arm64/mm/cache.S               | 9 ++++-----
 arch/arm64/mm/flush.c               | 2 +-
 4 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 3255878d6f309..fa5641868d65d 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -61,7 +61,7 @@ extern void invalidate_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(unsigned long start, unsigned long end);
 extern void __inval_dcache_area(unsigned long start, unsigned long end);
 extern void __clean_dcache_area_poc(unsigned long start, unsigned long end);
-extern void __clean_dcache_area_pop(void *addr, size_t len);
+extern void __clean_dcache_area_pop(unsigned long start, unsigned long end);
 extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 extern void sync_icache_aliases(void *kaddr, unsigned long len);
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
index c83bb5a4aad2c..62ea989effe80 100644
--- a/arch/arm64/lib/uaccess_flushcache.c
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt)
 	 * barrier to order the cache maintenance against the memcpy.
 	 */
 	memcpy(dst, src, cnt);
-	__clean_dcache_area_pop(dst, cnt);
+	__clean_dcache_area_pop((unsigned long)dst, (unsigned long)dst + cnt);
 }
 EXPORT_SYMBOL_GPL(memcpy_flushcache);
 
@@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from,
 	rc = raw_copy_from_user(to, from, n);
 
 	/* See above */
-	__clean_dcache_area_pop(to, n - rc);
+	__clean_dcache_area_pop((unsigned long)to, (unsigned long)to + n - rc);
 	return rc;
 }
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index e2e2740c55cea..b71fcf56516b8 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -194,16 +194,15 @@ SYM_FUNC_END_PI(__clean_dcache_area_poc)
 SYM_FUNC_END(__dma_clean_area)
 
 /*
- *	__clean_dcache_area_pop(kaddr, size)
+ *	__clean_dcache_area_pop(start, end)
  *
- * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	Ensure that any D-cache lines for the interval [start, end)
  * 	are cleaned to the PoP.
  *
- *	- kaddr   - kernel address
- *	- size    - size in question
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
  */
 SYM_FUNC_START_PI(__clean_dcache_area_pop)
-	add	x1, x0, x1
 	alternative_if_not ARM64_HAS_DCPOP
 	b	__clean_dcache_area_poc
 	alternative_else_nop_endif
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index be650b573b2a1..b2c226d93ca52 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -84,7 +84,7 @@ void arch_wb_cache_pmem(void *addr, size_t size)
 {
 	/* Ensure order against any prior non-cacheable writes */
 	dmb(osh);
-	__clean_dcache_area_pop(addr, size);
+	__clean_dcache_area_pop((unsigned long)addr, (unsigned long)addr + size);
 }
 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
-- 
GitLab


From 406d7d4e2bc76d38a6dc88733a0f72fabf02d305 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:58 +0100
Subject: [PATCH 1556/3804] arm64: __clean_dcache_area_pou to take end
 parameter instead of size

To be consistent with other functions with similar names and
functionality in cacheflush.h, cache.S, and cachetlb.rst, change
to specify the range in terms of start and end, as opposed to
start and size.

No functional change intended.

Reported-by: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-16-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cacheflush.h | 2 +-
 arch/arm64/mm/cache.S               | 9 ++++-----
 arch/arm64/mm/flush.c               | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index fa5641868d65d..f867230473150 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -62,7 +62,7 @@ extern void __flush_dcache_area(unsigned long start, unsigned long end);
 extern void __inval_dcache_area(unsigned long start, unsigned long end);
 extern void __clean_dcache_area_poc(unsigned long start, unsigned long end);
 extern void __clean_dcache_area_pop(unsigned long start, unsigned long end);
-extern void __clean_dcache_area_pou(void *addr, size_t len);
+extern void __clean_dcache_area_pou(unsigned long start, unsigned long end);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 extern void sync_icache_aliases(void *kaddr, unsigned long len);
 
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index b71fcf56516b8..ea605d94182fd 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -113,20 +113,19 @@ SYM_FUNC_START_PI(__flush_dcache_area)
 SYM_FUNC_END_PI(__flush_dcache_area)
 
 /*
- *	__clean_dcache_area_pou(kaddr, size)
+ *	__clean_dcache_area_pou(start, end)
  *
- * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	Ensure that any D-cache lines for the interval [start, end)
  * 	are cleaned to the PoU.
  *
- *	- kaddr   - kernel address
- *	- size    - size in question
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
  */
 SYM_FUNC_START(__clean_dcache_area_pou)
 alternative_if ARM64_HAS_CACHE_IDC
 	dsb	ishst
 	ret
 alternative_else_nop_endif
-	add	x1, x0, x1
 	dcache_by_line_op cvau, ish, x0, x1, x2, x3
 	ret
 SYM_FUNC_END(__clean_dcache_area_pou)
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index b2c226d93ca52..0341bcc6fdf31 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -19,7 +19,7 @@ void sync_icache_aliases(void *kaddr, unsigned long len)
 	unsigned long addr = (unsigned long)kaddr;
 
 	if (icache_is_aliasing()) {
-		__clean_dcache_area_pou(kaddr, len);
+		__clean_dcache_area_pou(kaddr, kaddr + len);
 		__flush_icache_all();
 	} else {
 		/*
-- 
GitLab


From 8c28d52ccd1d6e3a5aca8a37e465a5f8b77edbc1 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:29:59 +0100
Subject: [PATCH 1557/3804] arm64: sync_icache_aliases to take end parameter
 instead of size

To be consistent with other functions with similar names and
functionality in cacheflush.h, cache.S, and cachetlb.rst, change
to specify the range in terms of start and end, as opposed to
start and size.

No functional change intended.

Reported-by: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-17-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cacheflush.h |  2 +-
 arch/arm64/kernel/probes/uprobes.c  |  2 +-
 arch/arm64/mm/flush.c               | 21 ++++++++++-----------
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index f867230473150..70b389a8dea5a 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -64,7 +64,7 @@ extern void __clean_dcache_area_poc(unsigned long start, unsigned long end);
 extern void __clean_dcache_area_pop(unsigned long start, unsigned long end);
 extern void __clean_dcache_area_pou(unsigned long start, unsigned long end);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
-extern void sync_icache_aliases(void *kaddr, unsigned long len);
+extern void sync_icache_aliases(unsigned long start, unsigned long end);
 
 static inline void flush_icache_range(unsigned long start, unsigned long end)
 {
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 2c247634552b1..9be668f3f0341 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 	memcpy(dst, src, len);
 
 	/* flush caches (dcache/icache) */
-	sync_icache_aliases(dst, len);
+	sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len);
 
 	kunmap_atomic(xol_page_kaddr);
 }
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 0341bcc6fdf31..c4ca7e05fdb80 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -14,28 +14,25 @@
 #include <asm/cache.h>
 #include <asm/tlbflush.h>
 
-void sync_icache_aliases(void *kaddr, unsigned long len)
+void sync_icache_aliases(unsigned long start, unsigned long end)
 {
-	unsigned long addr = (unsigned long)kaddr;
-
 	if (icache_is_aliasing()) {
-		__clean_dcache_area_pou(kaddr, kaddr + len);
+		__clean_dcache_area_pou(start, end);
 		__flush_icache_all();
 	} else {
 		/*
 		 * Don't issue kick_all_cpus_sync() after I-cache invalidation
 		 * for user mappings.
 		 */
-		__flush_icache_range(addr, addr + len);
+		__flush_icache_range(start, end);
 	}
 }
 
-static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
-				unsigned long uaddr, void *kaddr,
-				unsigned long len)
+static void flush_ptrace_access(struct vm_area_struct *vma, unsigned long start,
+				unsigned long end)
 {
 	if (vma->vm_flags & VM_EXEC)
-		sync_icache_aliases(kaddr, len);
+		sync_icache_aliases(start, end);
 }
 
 /*
@@ -48,7 +45,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long len)
 {
 	memcpy(dst, src, len);
-	flush_ptrace_access(vma, page, uaddr, dst, len);
+	flush_ptrace_access(vma, (unsigned long)dst, (unsigned long)dst + len);
 }
 
 void __sync_icache_dcache(pte_t pte)
@@ -56,7 +53,9 @@ void __sync_icache_dcache(pte_t pte)
 	struct page *page = pte_page(pte);
 
 	if (!test_bit(PG_dcache_clean, &page->flags)) {
-		sync_icache_aliases(page_address(page), page_size(page));
+		sync_icache_aliases((unsigned long)page_address(page),
+				    (unsigned long)page_address(page) +
+					    page_size(page));
 		set_bit(PG_dcache_clean, &page->flags);
 	}
 }
-- 
GitLab


From 393239be1ba69dcd29be504ffe14938509795821 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:30:00 +0100
Subject: [PATCH 1558/3804] arm64: Fix cache maintenance function comments

Fix and expand comments for the cache maintenance functions in
cacheflush.h. Adds comments to functions that weren't described
before. Explains what the functions do using Arm Architecture
Reference Manual terminology.

No functional change intended.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-18-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cacheflush.h | 47 ++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 70b389a8dea5a..26617df1fa459 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -30,31 +30,44 @@
  *	the implementation assumes non-aliasing VIPT D-cache and (aliasing)
  *	VIPT I-cache.
  *
- *	flush_icache_range(start, end)
+ *	All functions below apply to the interval [start, end)
+ *		- start  - virtual start address (inclusive)
+ *		- end    - virtual end address (exclusive)
  *
- *		Ensure coherency between the I-cache and the D-cache in the
- *		region described by start, end.
- *		- start  - virtual start address
- *		- end    - virtual end address
+ *	__flush_icache_range(start, end)
  *
- *	invalidate_icache_range(start, end)
- *
- *		Invalidate the I-cache in the region described by start, end.
- *		- start  - virtual start address
- *		- end    - virtual end address
+ *		Ensure coherency between the I-cache and the D-cache region to
+ *		the Point of Unification.
  *
  *	__flush_cache_user_range(start, end)
  *
- *		Ensure coherency between the I-cache and the D-cache in the
- *		region described by start, end.
- *		- start  - virtual start address
- *		- end    - virtual end address
+ *		Ensure coherency between the I-cache and the D-cache region to
+ *		the Point of Unification.
+ *		Use only if the region might access user memory.
+ *
+ *	invalidate_icache_range(start, end)
+ *
+ *		Invalidate I-cache region to the Point of Unification.
  *
  *	__flush_dcache_area(start, end)
  *
- *		Ensure that the data held in page is written back.
- *		- start  - virtual start address
- *		- end    - virtual end address
+ *		Clean and invalidate D-cache region to the Point of Coherency.
+ *
+ *	__inval_dcache_area(start, end)
+ *
+ *		Invalidate D-cache region to the Point of Coherency.
+ *
+ *	__clean_dcache_area_poc(start, end)
+ *
+ *		Clean D-cache region to the Point of Coherency.
+ *
+ *	__clean_dcache_area_pop(start, end)
+ *
+ *		Clean D-cache region to the Point of Persistence.
+ *
+ *	__clean_dcache_area_pou(start, end)
+ *
+ *		Clean D-cache region to the Point of Unification.
  */
 extern void __flush_icache_range(unsigned long start, unsigned long end);
 extern void invalidate_icache_range(unsigned long start, unsigned long end);
-- 
GitLab


From fade9c2c6ee2baea7df8e6059b3f143c681e5ce4 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Mon, 24 May 2021 09:30:01 +0100
Subject: [PATCH 1559/3804] arm64: Rename arm64-internal cache maintenance
 functions

Although naming across the codebase isn't that consistent, it
tends to follow certain patterns. Moreover, the term "flush"
isn't defined in the Arm Architecture reference manual, and might
be interpreted to mean clean, invalidate, or both for a cache.

Rename arm64-internal functions to make the naming internally
consistent, as well as making it consistent with the Arm ARM, by
specifying whether it applies to the instruction, data, or both
caches, whether the operation is a clean, invalidate, or both.
Also specify which point the operation applies to, i.e., to the
point of unification (PoU), coherency (PoC), or persistence
(PoP).

This commit applies the following sed transformation to all files
under arch/arm64:

"s/\b__flush_cache_range\b/caches_clean_inval_pou_macro/g;"\
"s/\b__flush_icache_range\b/caches_clean_inval_pou/g;"\
"s/\binvalidate_icache_range\b/icache_inval_pou/g;"\
"s/\b__flush_dcache_area\b/dcache_clean_inval_poc/g;"\
"s/\b__inval_dcache_area\b/dcache_inval_poc/g;"\
"s/__clean_dcache_area_poc\b/dcache_clean_poc/g;"\
"s/\b__clean_dcache_area_pop\b/dcache_clean_pop/g;"\
"s/\b__clean_dcache_area_pou\b/dcache_clean_pou/g;"\
"s/\b__flush_cache_user_range\b/caches_clean_inval_user_pou/g;"\
"s/\b__flush_icache_all\b/icache_inval_all_pou/g;"

Note that __clean_dcache_area_poc is deliberately missing a word
boundary check at the beginning in order to match the efistub
symbols in image-vars.h.

Also note that, despite its name, __flush_icache_range operates
on both instruction and data caches. The name change here
reflects that.

No functional change intended.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-19-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/arch_gicv3.h |  2 +-
 arch/arm64/include/asm/cacheflush.h | 36 +++++++++---------
 arch/arm64/include/asm/efi.h        |  2 +-
 arch/arm64/include/asm/kvm_mmu.h    |  6 +--
 arch/arm64/kernel/alternative.c     |  2 +-
 arch/arm64/kernel/efi-entry.S       |  4 +-
 arch/arm64/kernel/head.S            |  8 ++--
 arch/arm64/kernel/hibernate-asm.S   |  4 +-
 arch/arm64/kernel/hibernate.c       | 12 +++---
 arch/arm64/kernel/idreg-override.c  |  2 +-
 arch/arm64/kernel/image-vars.h      |  2 +-
 arch/arm64/kernel/insn.c            |  2 +-
 arch/arm64/kernel/kaslr.c           |  6 +--
 arch/arm64/kernel/machine_kexec.c   | 10 ++---
 arch/arm64/kernel/smp.c             |  4 +-
 arch/arm64/kernel/smp_spin_table.c  |  4 +-
 arch/arm64/kernel/sys_compat.c      |  2 +-
 arch/arm64/kvm/arm.c                |  2 +-
 arch/arm64/kvm/hyp/nvhe/cache.S     |  4 +-
 arch/arm64/kvm/hyp/nvhe/setup.c     |  2 +-
 arch/arm64/kvm/hyp/nvhe/tlb.c       |  2 +-
 arch/arm64/kvm/hyp/pgtable.c        |  4 +-
 arch/arm64/lib/uaccess_flushcache.c |  4 +-
 arch/arm64/mm/cache.S               | 58 ++++++++++++++---------------
 arch/arm64/mm/flush.c               | 12 +++---
 25 files changed, 98 insertions(+), 98 deletions(-)

diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index ed1cc9d8e6df7..4ad22c3135dbb 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -125,7 +125,7 @@ static inline u32 gic_read_rpr(void)
 #define gic_write_lpir(v, c)		writeq_relaxed(v, c)
 
 #define gic_flush_dcache_to_poc(a,l)	\
-	__flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l))
+	dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
 
 #define gits_read_baser(c)		readq_relaxed(c)
 #define gits_write_baser(v, c)		writeq_relaxed(v, c)
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 26617df1fa459..543c997eb3b7d 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -34,54 +34,54 @@
  *		- start  - virtual start address (inclusive)
  *		- end    - virtual end address (exclusive)
  *
- *	__flush_icache_range(start, end)
+ *	caches_clean_inval_pou(start, end)
  *
  *		Ensure coherency between the I-cache and the D-cache region to
  *		the Point of Unification.
  *
- *	__flush_cache_user_range(start, end)
+ *	caches_clean_inval_user_pou(start, end)
  *
  *		Ensure coherency between the I-cache and the D-cache region to
  *		the Point of Unification.
  *		Use only if the region might access user memory.
  *
- *	invalidate_icache_range(start, end)
+ *	icache_inval_pou(start, end)
  *
  *		Invalidate I-cache region to the Point of Unification.
  *
- *	__flush_dcache_area(start, end)
+ *	dcache_clean_inval_poc(start, end)
  *
  *		Clean and invalidate D-cache region to the Point of Coherency.
  *
- *	__inval_dcache_area(start, end)
+ *	dcache_inval_poc(start, end)
  *
  *		Invalidate D-cache region to the Point of Coherency.
  *
- *	__clean_dcache_area_poc(start, end)
+ *	dcache_clean_poc(start, end)
  *
  *		Clean D-cache region to the Point of Coherency.
  *
- *	__clean_dcache_area_pop(start, end)
+ *	dcache_clean_pop(start, end)
  *
  *		Clean D-cache region to the Point of Persistence.
  *
- *	__clean_dcache_area_pou(start, end)
+ *	dcache_clean_pou(start, end)
  *
  *		Clean D-cache region to the Point of Unification.
  */
-extern void __flush_icache_range(unsigned long start, unsigned long end);
-extern void invalidate_icache_range(unsigned long start, unsigned long end);
-extern void __flush_dcache_area(unsigned long start, unsigned long end);
-extern void __inval_dcache_area(unsigned long start, unsigned long end);
-extern void __clean_dcache_area_poc(unsigned long start, unsigned long end);
-extern void __clean_dcache_area_pop(unsigned long start, unsigned long end);
-extern void __clean_dcache_area_pou(unsigned long start, unsigned long end);
-extern long __flush_cache_user_range(unsigned long start, unsigned long end);
+extern void caches_clean_inval_pou(unsigned long start, unsigned long end);
+extern void icache_inval_pou(unsigned long start, unsigned long end);
+extern void dcache_clean_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_inval_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_poc(unsigned long start, unsigned long end);
+extern void dcache_clean_pop(unsigned long start, unsigned long end);
+extern void dcache_clean_pou(unsigned long start, unsigned long end);
+extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end);
 extern void sync_icache_aliases(unsigned long start, unsigned long end);
 
 static inline void flush_icache_range(unsigned long start, unsigned long end)
 {
-	__flush_icache_range(start, end);
+	caches_clean_inval_pou(start, end);
 
 	/*
 	 * IPI all online CPUs so that they undergo a context synchronization
@@ -135,7 +135,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *,
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
 
-static __always_inline void __flush_icache_all(void)
+static __always_inline void icache_inval_all_pou(void)
 {
 	if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
 		return;
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 0ae2397076fd7..1bed37eb013a1 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -137,7 +137,7 @@ void efi_virtmap_unload(void);
 
 static inline void efi_capsule_flush_cache_range(void *addr, int size)
 {
-	__flush_dcache_area((unsigned long)addr, (unsigned long)addr + size);
+	dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size);
 }
 
 #endif /* _ASM_EFI_H */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 33293d5855af4..f4cbfa9025a83 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -181,7 +181,7 @@ static inline void *__kvm_vector_slot2addr(void *base,
 struct kvm;
 
 #define kvm_flush_dcache_to_poc(a,l)	\
-	__flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l))
+	dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l))
 
 static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 {
@@ -209,12 +209,12 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
 {
 	if (icache_is_aliasing()) {
 		/* any kind of VIPT cache */
-		__flush_icache_all();
+		icache_inval_all_pou();
 	} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
 		/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
 		void *va = page_address(pfn_to_page(pfn));
 
-		invalidate_icache_range((unsigned long)va,
+		icache_inval_pou((unsigned long)va,
 					(unsigned long)va + size);
 	}
 }
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index c906d20c7b529..3fb79b76e9d96 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -181,7 +181,7 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu
 	 */
 	if (!is_module) {
 		dsb(ish);
-		__flush_icache_all();
+		icache_inval_all_pou();
 		isb();
 
 		/* Ignore ARM64_CB bit from feature mask */
diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index b0f728fb61f01..61a87fa1c3055 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -29,7 +29,7 @@ SYM_CODE_START(efi_enter_kernel)
 	 */
 	ldr	w1, =kernel_size
 	add	x1, x0, x1
-	bl	__clean_dcache_area_poc
+	bl	dcache_clean_poc
 	ic	ialluis
 
 	/*
@@ -38,7 +38,7 @@ SYM_CODE_START(efi_enter_kernel)
 	 */
 	adr	x0, 0f
 	adr	x1, 3f
-	bl	__clean_dcache_area_poc
+	bl	dcache_clean_poc
 0:
 	/* Turn off Dcache and MMU */
 	mrs	x0, CurrentEL
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 8df0ac8d9123e..6928cb67d3a03 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -118,7 +118,7 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
 						// MMU off
 
 	add	x1, x0, #0x20			// 4 x 8 bytes
-	b	__inval_dcache_area		// tail call
+	b	dcache_inval_poc		// tail call
 SYM_CODE_END(preserve_boot_args)
 
 /*
@@ -268,7 +268,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 	 */
 	adrp	x0, init_pg_dir
 	adrp	x1, init_pg_end
-	bl	__inval_dcache_area
+	bl	dcache_inval_poc
 
 	/*
 	 * Clear the init page tables.
@@ -381,11 +381,11 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 
 	adrp	x0, idmap_pg_dir
 	adrp	x1, idmap_pg_end
-	bl	__inval_dcache_area
+	bl	dcache_inval_poc
 
 	adrp	x0, init_pg_dir
 	adrp	x1, init_pg_end
-	bl	__inval_dcache_area
+	bl	dcache_inval_poc
 
 	ret	x28
 SYM_FUNC_END(__create_page_tables)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index ef2ab7caf8155..81c0186a5e322 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -45,7 +45,7 @@
  * Because this code has to be copied to a 'safe' page, it can't call out to
  * other functions by PC-relative address. Also remember that it may be
  * mid-way through over-writing other functions. For this reason it contains
- * code from __flush_icache_range() and uses the copy_page() macro.
+ * code from caches_clean_inval_pou() and uses the copy_page() macro.
  *
  * This 'safe' page is mapped via ttbr0, and executed from there. This function
  * switches to a copy of the linear map in ttbr1, performs the restore, then
@@ -87,7 +87,7 @@ SYM_CODE_START(swsusp_arch_suspend_exit)
 	copy_page	x0, x1, x2, x3, x4, x5, x6, x7, x8, x9
 
 	add	x1, x10, #PAGE_SIZE
-	/* Clean the copied page to PoU - based on __flush_icache_range() */
+	/* Clean the copied page to PoU - based on caches_clean_inval_pou() */
 	raw_dcache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x4, x10, x3
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index b40ddce715073..46a0b4d6e2519 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -210,7 +210,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
 		return -ENOMEM;
 
 	memcpy(page, src_start, length);
-	__flush_icache_range((unsigned long)page, (unsigned long)page + length);
+	caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length);
 	rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page);
 	if (rc)
 		return rc;
@@ -381,17 +381,17 @@ int swsusp_arch_suspend(void)
 		ret = swsusp_save();
 	} else {
 		/* Clean kernel core startup/idle code to PoC*/
-		__flush_dcache_area((unsigned long)__mmuoff_data_start,
+		dcache_clean_inval_poc((unsigned long)__mmuoff_data_start,
 				    (unsigned long)__mmuoff_data_end);
-		__flush_dcache_area((unsigned long)__idmap_text_start,
+		dcache_clean_inval_poc((unsigned long)__idmap_text_start,
 				    (unsigned long)__idmap_text_end);
 
 		/* Clean kvm setup code to PoC? */
 		if (el2_reset_needed()) {
-			__flush_dcache_area(
+			dcache_clean_inval_poc(
 				(unsigned long)__hyp_idmap_text_start,
 				(unsigned long)__hyp_idmap_text_end);
-			__flush_dcache_area((unsigned long)__hyp_text_start,
+			dcache_clean_inval_poc((unsigned long)__hyp_text_start,
 					    (unsigned long)__hyp_text_end);
 		}
 
@@ -477,7 +477,7 @@ int swsusp_arch_resume(void)
 	 * The hibernate exit text contains a set of el2 vectors, that will
 	 * be executed at el2 with the mmu off in order to reload hyp-stub.
 	 */
-	__flush_dcache_area((unsigned long)hibernate_exit,
+	dcache_clean_inval_poc((unsigned long)hibernate_exit,
 			    (unsigned long)hibernate_exit + exit_size);
 
 	/*
diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c
index 3dd515baf5268..53a381a7f65dd 100644
--- a/arch/arm64/kernel/idreg-override.c
+++ b/arch/arm64/kernel/idreg-override.c
@@ -237,7 +237,7 @@ asmlinkage void __init init_feature_override(void)
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
 		if (regs[i]->override)
-			__flush_dcache_area((unsigned long)regs[i]->override,
+			dcache_clean_inval_poc((unsigned long)regs[i]->override,
 					    (unsigned long)regs[i]->override +
 					    sizeof(*regs[i]->override));
 	}
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index bcf3c27553708..c96a9a0043bf4 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -35,7 +35,7 @@ __efistub_strnlen		= __pi_strnlen;
 __efistub_strcmp		= __pi_strcmp;
 __efistub_strncmp		= __pi_strncmp;
 __efistub_strrchr		= __pi_strrchr;
-__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc;
+__efistub_dcache_clean_poc = __pi_dcache_clean_poc;
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 __efistub___memcpy		= __pi_memcpy;
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 6c0de2f60ea96..51cb8dc98d008 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -198,7 +198,7 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
 
 	ret = aarch64_insn_write(tp, insn);
 	if (ret == 0)
-		__flush_icache_range((uintptr_t)tp,
+		caches_clean_inval_pou((uintptr_t)tp,
 				     (uintptr_t)tp + AARCH64_INSN_SIZE);
 
 	return ret;
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 49cccd03cb370..cfa2cfde3019d 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -72,7 +72,7 @@ u64 __init kaslr_early_init(void)
 	 * we end up running with module randomization disabled.
 	 */
 	module_alloc_base = (u64)_etext - MODULES_VSIZE;
-	__flush_dcache_area((unsigned long)&module_alloc_base,
+	dcache_clean_inval_poc((unsigned long)&module_alloc_base,
 			    (unsigned long)&module_alloc_base +
 				    sizeof(module_alloc_base));
 
@@ -172,10 +172,10 @@ u64 __init kaslr_early_init(void)
 	module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
 	module_alloc_base &= PAGE_MASK;
 
-	__flush_dcache_area((unsigned long)&module_alloc_base,
+	dcache_clean_inval_poc((unsigned long)&module_alloc_base,
 			    (unsigned long)&module_alloc_base +
 				    sizeof(module_alloc_base));
-	__flush_dcache_area((unsigned long)&memstart_offset_seed,
+	dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
 			    (unsigned long)&memstart_offset_seed +
 				    sizeof(memstart_offset_seed));
 
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index 3e79110c8f3a8..03ceabe4d912c 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -72,10 +72,10 @@ int machine_kexec_post_load(struct kimage *kimage)
 	 * For execution with the MMU off, reloc_code needs to be cleaned to the
 	 * PoC and invalidated from the I-cache.
 	 */
-	__flush_dcache_area((unsigned long)reloc_code,
+	dcache_clean_inval_poc((unsigned long)reloc_code,
 			    (unsigned long)reloc_code +
 				    arm64_relocate_new_kernel_size);
-	invalidate_icache_range((uintptr_t)reloc_code,
+	icache_inval_pou((uintptr_t)reloc_code,
 				(uintptr_t)reloc_code +
 					arm64_relocate_new_kernel_size);
 
@@ -111,7 +111,7 @@ static void kexec_list_flush(struct kimage *kimage)
 		unsigned long addr;
 
 		/* flush the list entries. */
-		__flush_dcache_area((unsigned long)entry,
+		dcache_clean_inval_poc((unsigned long)entry,
 				    (unsigned long)entry +
 					    sizeof(kimage_entry_t));
 
@@ -128,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage)
 			break;
 		case IND_SOURCE:
 			/* flush the source pages. */
-			__flush_dcache_area(addr, addr + PAGE_SIZE);
+			dcache_clean_inval_poc(addr, addr + PAGE_SIZE);
 			break;
 		case IND_DESTINATION:
 			break;
@@ -155,7 +155,7 @@ static void kexec_segment_flush(const struct kimage *kimage)
 			kimage->segment[i].memsz,
 			kimage->segment[i].memsz /  PAGE_SIZE);
 
-		__flush_dcache_area(
+		dcache_clean_inval_poc(
 			(unsigned long)phys_to_virt(kimage->segment[i].mem),
 			(unsigned long)phys_to_virt(kimage->segment[i].mem) +
 				kimage->segment[i].memsz);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 5fcdee3310874..9b4c1118194da 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -122,7 +122,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	secondary_data.task = idle;
 	secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
 	update_cpu_boot_status(CPU_MMU_OFF);
-	__flush_dcache_area((unsigned long)&secondary_data,
+	dcache_clean_inval_poc((unsigned long)&secondary_data,
 			    (unsigned long)&secondary_data +
 				    sizeof(secondary_data));
 
@@ -145,7 +145,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	pr_crit("CPU%u: failed to come online\n", cpu);
 	secondary_data.task = NULL;
 	secondary_data.stack = NULL;
-	__flush_dcache_area((unsigned long)&secondary_data,
+	dcache_clean_inval_poc((unsigned long)&secondary_data,
 			    (unsigned long)&secondary_data +
 				    sizeof(secondary_data));
 	status = READ_ONCE(secondary_data.status);
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 58d804582a35f..7e1624ecab3c8 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -36,7 +36,7 @@ static void write_pen_release(u64 val)
 	unsigned long size = sizeof(secondary_holding_pen_release);
 
 	secondary_holding_pen_release = val;
-	__flush_dcache_area((unsigned long)start, (unsigned long)start + size);
+	dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size);
 }
 
 
@@ -90,7 +90,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
 	 * the boot protocol.
 	 */
 	writeq_relaxed(pa_holding_pen, release_addr);
-	__flush_dcache_area((__force unsigned long)release_addr,
+	dcache_clean_inval_poc((__force unsigned long)release_addr,
 			    (__force unsigned long)release_addr +
 				    sizeof(*release_addr));
 
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 265fe3eb10699..db5159a3055fc 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -41,7 +41,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
 			dsb(ish);
 		}
 
-		ret = __flush_cache_user_range(start, start + chunk);
+		ret = caches_clean_inval_user_pou(start, start + chunk);
 		if (ret)
 			return ret;
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 1cb39c0803a44..c1953f65ca0ec 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1064,7 +1064,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 		if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
 			stage2_unmap_vm(vcpu->kvm);
 		else
-			__flush_icache_all();
+			icache_inval_all_pou();
 	}
 
 	vcpu_reset_hcr(vcpu);
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 36cef69154281..958734f4d6b0e 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -7,7 +7,7 @@
 #include <asm/assembler.h>
 #include <asm/alternative.h>
 
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 5dffe928f2563..8143ebd4fb721 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -134,7 +134,7 @@ static void update_nvhe_init_params(void)
 	for (i = 0; i < hyp_nr_cpus; i++) {
 		params = per_cpu_ptr(&kvm_init_params, i);
 		params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd);
-		__flush_dcache_area((unsigned long)params,
+		dcache_clean_inval_poc((unsigned long)params,
 				    (unsigned long)params + sizeof(*params));
 	}
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 83dc3b271bc5b..38ed0f6f27032 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -104,7 +104,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
 	 * you should be running with VHE enabled.
 	 */
 	if (icache_is_vpipt())
-		__flush_icache_all();
+		icache_inval_all_pou();
 
 	__tlb_switch_to_host(&cxt);
 }
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 10d2f04013d44..e9ad7fb28ee32 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -841,7 +841,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 	if (need_flush) {
 		kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
 
-		__flush_dcache_area((unsigned long)pte_follow,
+		dcache_clean_inval_poc((unsigned long)pte_follow,
 				    (unsigned long)pte_follow +
 					    kvm_granule_size(level));
 	}
@@ -997,7 +997,7 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 		return 0;
 
 	pte_follow = kvm_pte_follow(pte, mm_ops);
-	__flush_dcache_area((unsigned long)pte_follow,
+	dcache_clean_inval_poc((unsigned long)pte_follow,
 			    (unsigned long)pte_follow +
 				    kvm_granule_size(level));
 	return 0;
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
index 62ea989effe80..baee22961bdba 100644
--- a/arch/arm64/lib/uaccess_flushcache.c
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt)
 	 * barrier to order the cache maintenance against the memcpy.
 	 */
 	memcpy(dst, src, cnt);
-	__clean_dcache_area_pop((unsigned long)dst, (unsigned long)dst + cnt);
+	dcache_clean_pop((unsigned long)dst, (unsigned long)dst + cnt);
 }
 EXPORT_SYMBOL_GPL(memcpy_flushcache);
 
@@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from,
 	rc = raw_copy_from_user(to, from, n);
 
 	/* See above */
-	__clean_dcache_area_pop((unsigned long)to, (unsigned long)to + n - rc);
+	dcache_clean_pop((unsigned long)to, (unsigned long)to + n - rc);
 	return rc;
 }
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index ea605d94182fd..5051b3c1a4f12 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -15,7 +15,7 @@
 #include <asm/asm-uaccess.h>
 
 /*
- *	__flush_cache_range(start,end) [fixup]
+ *	caches_clean_inval_pou_macro(start,end) [fixup]
  *
  *	Ensure that the I and D caches are coherent within specified region.
  *	This is typically used when code has been written to a memory region,
@@ -25,7 +25,7 @@
  *	- end     - virtual end address of region
  *	- fixup   - optional label to branch to on user fault
  */
-.macro	__flush_cache_range, fixup
+.macro	caches_clean_inval_pou_macro, fixup
 alternative_if ARM64_HAS_CACHE_IDC
 	dsb     ishst
 	b       .Ldc_skip_\@
@@ -43,7 +43,7 @@ alternative_else_nop_endif
 .endm
 
 /*
- *	__flush_icache_range(start,end)
+ *	caches_clean_inval_pou(start,end)
  *
  *	Ensure that the I and D caches are coherent within specified region.
  *	This is typically used when code has been written to a memory region,
@@ -52,13 +52,13 @@ alternative_else_nop_endif
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START(__flush_icache_range)
-	__flush_cache_range
+SYM_FUNC_START(caches_clean_inval_pou)
+	caches_clean_inval_pou_macro
 	ret
-SYM_FUNC_END(__flush_icache_range)
+SYM_FUNC_END(caches_clean_inval_pou)
 
 /*
- *	__flush_cache_user_range(start,end)
+ *	caches_clean_inval_user_pou(start,end)
  *
  *	Ensure that the I and D caches are coherent within specified region.
  *	This is typically used when code has been written to a memory region,
@@ -67,10 +67,10 @@ SYM_FUNC_END(__flush_icache_range)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START(__flush_cache_user_range)
+SYM_FUNC_START(caches_clean_inval_user_pou)
 	uaccess_ttbr0_enable x2, x3, x4
 
-	__flush_cache_range 2f
+	caches_clean_inval_pou_macro 2f
 	mov	x0, xzr
 1:
 	uaccess_ttbr0_disable x1, x2
@@ -78,17 +78,17 @@ SYM_FUNC_START(__flush_cache_user_range)
 2:
 	mov	x0, #-EFAULT
 	b	1b
-SYM_FUNC_END(__flush_cache_user_range)
+SYM_FUNC_END(caches_clean_inval_user_pou)
 
 /*
- *	invalidate_icache_range(start,end)
+ *	icache_inval_pou(start,end)
  *
  *	Ensure that the I cache is invalid within specified region.
  *
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START(invalidate_icache_range)
+SYM_FUNC_START(icache_inval_pou)
 alternative_if ARM64_HAS_CACHE_DIC
 	isb
 	ret
@@ -96,10 +96,10 @@ alternative_else_nop_endif
 
 	invalidate_icache_by_line x0, x1, x2, x3
 	ret
-SYM_FUNC_END(invalidate_icache_range)
+SYM_FUNC_END(icache_inval_pou)
 
 /*
- *	__flush_dcache_area(start, end)
+ *	dcache_clean_inval_poc(start, end)
  *
  *	Ensure that any D-cache lines for the interval [start, end)
  *	are cleaned and invalidated to the PoC.
@@ -107,13 +107,13 @@ SYM_FUNC_END(invalidate_icache_range)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START_PI(__flush_dcache_area)
+SYM_FUNC_START_PI(dcache_clean_inval_poc)
 	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(__flush_dcache_area)
+SYM_FUNC_END_PI(dcache_clean_inval_poc)
 
 /*
- *	__clean_dcache_area_pou(start, end)
+ *	dcache_clean_pou(start, end)
  *
  * 	Ensure that any D-cache lines for the interval [start, end)
  * 	are cleaned to the PoU.
@@ -121,17 +121,17 @@ SYM_FUNC_END_PI(__flush_dcache_area)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START(__clean_dcache_area_pou)
+SYM_FUNC_START(dcache_clean_pou)
 alternative_if ARM64_HAS_CACHE_IDC
 	dsb	ishst
 	ret
 alternative_else_nop_endif
 	dcache_by_line_op cvau, ish, x0, x1, x2, x3
 	ret
-SYM_FUNC_END(__clean_dcache_area_pou)
+SYM_FUNC_END(dcache_clean_pou)
 
 /*
- *	__inval_dcache_area(start, end)
+ *	dcache_inval_poc(start, end)
  *
  * 	Ensure that any D-cache lines for the interval [start, end)
  * 	are invalidated. Any partial lines at the ends of the interval are
@@ -141,7 +141,7 @@ SYM_FUNC_END(__clean_dcache_area_pou)
  *	- end     - kernel end address of region
  */
 SYM_FUNC_START_LOCAL(__dma_inv_area)
-SYM_FUNC_START_PI(__inval_dcache_area)
+SYM_FUNC_START_PI(dcache_inval_poc)
 	/* FALLTHROUGH */
 
 /*
@@ -166,11 +166,11 @@ SYM_FUNC_START_PI(__inval_dcache_area)
 	b.lo	2b
 	dsb	sy
 	ret
-SYM_FUNC_END_PI(__inval_dcache_area)
+SYM_FUNC_END_PI(dcache_inval_poc)
 SYM_FUNC_END(__dma_inv_area)
 
 /*
- *	__clean_dcache_area_poc(start, end)
+ *	dcache_clean_poc(start, end)
  *
  * 	Ensure that any D-cache lines for the interval [start, end)
  * 	are cleaned to the PoC.
@@ -179,7 +179,7 @@ SYM_FUNC_END(__dma_inv_area)
  *	- end     - virtual end address of region
  */
 SYM_FUNC_START_LOCAL(__dma_clean_area)
-SYM_FUNC_START_PI(__clean_dcache_area_poc)
+SYM_FUNC_START_PI(dcache_clean_poc)
 	/* FALLTHROUGH */
 
 /*
@@ -189,11 +189,11 @@ SYM_FUNC_START_PI(__clean_dcache_area_poc)
  */
 	dcache_by_line_op cvac, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(__clean_dcache_area_poc)
+SYM_FUNC_END_PI(dcache_clean_poc)
 SYM_FUNC_END(__dma_clean_area)
 
 /*
- *	__clean_dcache_area_pop(start, end)
+ *	dcache_clean_pop(start, end)
  *
  * 	Ensure that any D-cache lines for the interval [start, end)
  * 	are cleaned to the PoP.
@@ -201,13 +201,13 @@ SYM_FUNC_END(__dma_clean_area)
  *	- start   - virtual start address of region
  *	- end     - virtual end address of region
  */
-SYM_FUNC_START_PI(__clean_dcache_area_pop)
+SYM_FUNC_START_PI(dcache_clean_pop)
 	alternative_if_not ARM64_HAS_DCPOP
-	b	__clean_dcache_area_poc
+	b	dcache_clean_poc
 	alternative_else_nop_endif
 	dcache_by_line_op cvap, sy, x0, x1, x2, x3
 	ret
-SYM_FUNC_END_PI(__clean_dcache_area_pop)
+SYM_FUNC_END_PI(dcache_clean_pop)
 
 /*
  *	__dma_flush_area(start, size)
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index c4ca7e05fdb80..2aaf950b906cb 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -17,14 +17,14 @@
 void sync_icache_aliases(unsigned long start, unsigned long end)
 {
 	if (icache_is_aliasing()) {
-		__clean_dcache_area_pou(start, end);
-		__flush_icache_all();
+		dcache_clean_pou(start, end);
+		icache_inval_all_pou();
 	} else {
 		/*
 		 * Don't issue kick_all_cpus_sync() after I-cache invalidation
 		 * for user mappings.
 		 */
-		__flush_icache_range(start, end);
+		caches_clean_inval_pou(start, end);
 	}
 }
 
@@ -76,20 +76,20 @@ EXPORT_SYMBOL(flush_dcache_page);
 /*
  * Additional functions defined in assembly.
  */
-EXPORT_SYMBOL(__flush_icache_range);
+EXPORT_SYMBOL(caches_clean_inval_pou);
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
 void arch_wb_cache_pmem(void *addr, size_t size)
 {
 	/* Ensure order against any prior non-cacheable writes */
 	dmb(osh);
-	__clean_dcache_area_pop((unsigned long)addr, (unsigned long)addr + size);
+	dcache_clean_pop((unsigned long)addr, (unsigned long)addr + size);
 }
 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
-	__inval_dcache_area((unsigned long)addr, (unsigned long)addr + size);
+	dcache_inval_poc((unsigned long)addr, (unsigned long)addr + size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 #endif
-- 
GitLab


From c0c8a8397fa8a74d04915f4d3d28cb4a5d401427 Mon Sep 17 00:00:00 2001
From: Stefan Haberland <sth@linux.ibm.com>
Date: Tue, 25 May 2021 14:50:06 +0200
Subject: [PATCH 1560/3804] s390/dasd: add missing discipline function

Fix crash with illegal operation exception in dasd_device_tasklet.
Commit b72949328869 ("s390/dasd: Prepare for additional path event handling")
renamed the verify_path function for ECKD but not for FBA and DIAG.
This leads to a panic when the path verification function is called for a
FBA or DIAG device.

Fix by defining a wrapper function for dasd_generic_verify_path().

Fixes: b72949328869 ("s390/dasd: Prepare for additional path event handling")
Cc: <stable@vger.kernel.org> #5.11
Reviewed-by: Jan Hoeppner <hoeppner@linux.ibm.com>
Signed-off-by: Stefan Haberland <sth@linux.ibm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Link: https://lore.kernel.org/r/20210525125006.157531-2-sth@linux.ibm.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/s390/block/dasd_diag.c | 8 +++++++-
 drivers/s390/block/dasd_fba.c  | 8 +++++++-
 drivers/s390/block/dasd_int.h  | 1 -
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 1b9e1442e6a50..fd42a5fffaed1 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -642,12 +642,18 @@ static void dasd_diag_setup_blk_queue(struct dasd_block *block)
 	blk_queue_segment_boundary(q, PAGE_SIZE - 1);
 }
 
+static int dasd_diag_pe_handler(struct dasd_device *device,
+				__u8 tbvpm, __u8 fcsecpm)
+{
+	return dasd_generic_verify_path(device, tbvpm);
+}
+
 static struct dasd_discipline dasd_diag_discipline = {
 	.owner = THIS_MODULE,
 	.name = "DIAG",
 	.ebcname = "DIAG",
 	.check_device = dasd_diag_check_device,
-	.verify_path = dasd_generic_verify_path,
+	.pe_handler = dasd_diag_pe_handler,
 	.fill_geometry = dasd_diag_fill_geometry,
 	.setup_blk_queue = dasd_diag_setup_blk_queue,
 	.start_IO = dasd_start_diag,
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 4789410885e4f..3ad319aee51ed 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -794,13 +794,19 @@ static void dasd_fba_setup_blk_queue(struct dasd_block *block)
 	blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 }
 
+static int dasd_fba_pe_handler(struct dasd_device *device,
+			       __u8 tbvpm, __u8 fcsecpm)
+{
+	return dasd_generic_verify_path(device, tbvpm);
+}
+
 static struct dasd_discipline dasd_fba_discipline = {
 	.owner = THIS_MODULE,
 	.name = "FBA ",
 	.ebcname = "FBA ",
 	.check_device = dasd_fba_check_characteristics,
 	.do_analysis = dasd_fba_do_analysis,
-	.verify_path = dasd_generic_verify_path,
+	.pe_handler = dasd_fba_pe_handler,
 	.setup_blk_queue = dasd_fba_setup_blk_queue,
 	.fill_geometry = dasd_fba_fill_geometry,
 	.start_IO = dasd_start_IO,
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 1c59b0e86a9f0..155428bfed8ac 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -297,7 +297,6 @@ struct dasd_discipline {
 	 * e.g. verify that new path is compatible with the current
 	 * configuration.
 	 */
-	int (*verify_path)(struct dasd_device *, __u8);
 	int (*pe_handler)(struct dasd_device *, __u8, __u8);
 
 	/*
-- 
GitLab


From 5c9d706f61336d9f7f285df64c734af778c70f39 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 25 May 2021 20:35:29 +0200
Subject: [PATCH 1561/3804] bpf: Fix BPF_LSM kconfig symbol dependency

Similarly as 6bdacdb48e94 ("bpf: Fix BPF_JIT kconfig symbol dependency") we
need to detangle the hard BPF_LSM dependency on NET. This was previously
implicit by its dependency on BPF_JIT which itself was dependent on NET (but
without any actual/real hard dependency code-wise). Given the latter was
lifted, so should be the former as BPF_LSMs could well exist on net-less
systems. This therefore also fixes a randconfig build error recently reported
by Randy:

  ld: kernel/bpf/bpf_lsm.o: in function `bpf_lsm_func_proto':
  bpf_lsm.c:(.text+0x1a0): undefined reference to `bpf_sk_storage_get_proto'
  ld: bpf_lsm.c:(.text+0x1b8): undefined reference to `bpf_sk_storage_delete_proto'
  [...]

Fixes: b24abcff918a ("bpf, kconfig: Add consolidated menu entry for bpf with core options")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
---
 kernel/bpf/bpf_lsm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 5efb2b24012c9..da471bf01b977 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -107,10 +107,12 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_inode_storage_get_proto;
 	case BPF_FUNC_inode_storage_delete:
 		return &bpf_inode_storage_delete_proto;
+#ifdef CONFIG_NET
 	case BPF_FUNC_sk_storage_get:
 		return &bpf_sk_storage_get_proto;
 	case BPF_FUNC_sk_storage_delete:
 		return &bpf_sk_storage_delete_proto;
+#endif /* CONFIG_NET */
 	case BPF_FUNC_spin_lock:
 		return &bpf_spin_lock_proto;
 	case BPF_FUNC_spin_unlock:
-- 
GitLab


From d4b250562fb89ba6f94156b8bea12b8829cfa9a6 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 3 Nov 2019 21:22:04 +0000
Subject: [PATCH 1562/3804] i2c: qcom-geni: fix spelling mistake "unepxected"
 -> "unexpected"

There is a spelling mistake in an error message string, fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Akash Asthana <akashast@codeaurora.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-qcom-geni.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 214b4c913a139..07b710a774df4 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -100,7 +100,7 @@ static const struct geni_i2c_err_log gi2c_log[] = {
 	[GP_IRQ0] = {-EIO, "Unknown I2C err GP_IRQ0"},
 	[NACK] = {-ENXIO, "NACK: slv unresponsive, check its power/reset-ln"},
 	[GP_IRQ2] = {-EIO, "Unknown I2C err GP IRQ2"},
-	[BUS_PROTO] = {-EPROTO, "Bus proto err, noisy/unepxected start/stop"},
+	[BUS_PROTO] = {-EPROTO, "Bus proto err, noisy/unexpected start/stop"},
 	[ARB_LOST] = {-EAGAIN, "Bus arbitration lost, clock line undriveable"},
 	[GP_IRQ5] = {-EIO, "Unknown I2C err GP IRQ5"},
 	[GENI_OVERRUN] = {-EIO, "Cmd overrun, check GENI cmd-state machine"},
-- 
GitLab


From 9dd45bbad947f7cc4f3d4eff7fc02a7e3804e47b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 3 May 2021 09:02:20 +0200
Subject: [PATCH 1563/3804] i2c: icy: Remove unused variable new_fwnode in
 icy_probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The last user of new_fwnode was removed, leading to:

    drivers/i2c/busses/i2c-icy.c: In function ‘icy_probe’:
    drivers/i2c/busses/i2c-icy.c:126:24: warning: unused variable ‘new_fwnode’ [-Wunused-variable]
      126 |  struct fwnode_handle *new_fwnode;
	  |                        ^~~~~~~~~~

Fixes: dd7a37102b79ae55 ("i2c: icy: Constify the software node")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Max Staudt <max@enpas.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-icy.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-icy.c b/drivers/i2c/busses/i2c-icy.c
index c8c422e9dda43..5dae7cab72605 100644
--- a/drivers/i2c/busses/i2c-icy.c
+++ b/drivers/i2c/busses/i2c-icy.c
@@ -123,7 +123,6 @@ static int icy_probe(struct zorro_dev *z,
 {
 	struct icy_i2c *i2c;
 	struct i2c_algo_pcf_data *algo_data;
-	struct fwnode_handle *new_fwnode;
 	struct i2c_board_info ltc2990_info = {
 		.type		= "ltc2990",
 		.swnode		= &icy_ltc2990_node,
-- 
GitLab


From 52b806e8d6b3c06d5f8415f82d7353695acb2f00 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 4 May 2021 11:06:32 +0200
Subject: [PATCH 1564/3804] i2c: I2C_HISI should depend on ACPI

The HiSilicon Kunpeng I2C controller driver relies on ACPI to probe for
its presence.  Hence add a dependency on ACPI, to prevent asking the
user about this driver when configuring a kernel without ACPI firmware
support.

Fixes: d62fbdb99a85730a ("i2c: add support for HiSilicon I2C controller")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 281a65d9b44bb..10acece9d7b93 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -647,7 +647,7 @@ config I2C_HIGHLANDER
 
 config I2C_HISI
 	tristate "HiSilicon I2C controller"
-	depends on ARM64 || COMPILE_TEST
+	depends on (ARM64 && ACPI) || COMPILE_TEST
 	help
 	  Say Y here if you want to have Hisilicon I2C controller support
 	  available on the Kunpeng Server.
-- 
GitLab


From c4740e293c93c747e65d53d9aacc2ba8521d1489 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 6 May 2021 13:15:40 +0200
Subject: [PATCH 1565/3804] i2c: sh_mobile: Use new clock calculation formulas
 for RZ/G2E

When switching the Gen3 SoCs to the new clock calculation formulas, the
match entry for RZ/G2E added in commit 51243b73455f2d12 ("i2c:
sh_mobile: Add support for r8a774c0 (RZ/G2E)") was forgotten.

Fixes: e8a27567509b2439 ("i2c: sh_mobile: use new clock calculation formulas for Gen3")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-sh_mobile.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 3ae6ca21a02c6..2d2e630fd4387 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -807,7 +807,7 @@ static const struct sh_mobile_dt_config r8a7740_dt_config = {
 static const struct of_device_id sh_mobile_i2c_dt_ids[] = {
 	{ .compatible = "renesas,iic-r8a73a4", .data = &fast_clock_dt_config },
 	{ .compatible = "renesas,iic-r8a7740", .data = &r8a7740_dt_config },
-	{ .compatible = "renesas,iic-r8a774c0", .data = &fast_clock_dt_config },
+	{ .compatible = "renesas,iic-r8a774c0", .data = &v2_freq_calc_dt_config },
 	{ .compatible = "renesas,iic-r8a7790", .data = &v2_freq_calc_dt_config },
 	{ .compatible = "renesas,iic-r8a7791", .data = &v2_freq_calc_dt_config },
 	{ .compatible = "renesas,iic-r8a7792", .data = &v2_freq_calc_dt_config },
-- 
GitLab


From 3d0220f6861d713213b015b582e9f21e5b28d2e0 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 May 2021 10:17:36 +0000
Subject: [PATCH 1566/3804] bpf: Wrap aux data inside bpf_sanitize_info
 container

Add a container structure struct bpf_sanitize_info which holds
the current aux info, and update call-sites to sanitize_ptr_alu()
to pass it in. This is needed for passing in additional state
later on.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 09849e43f0352..98690f5367f9c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6486,15 +6486,19 @@ static bool sanitize_needed(u8 opcode)
 	return opcode == BPF_ADD || opcode == BPF_SUB;
 }
 
+struct bpf_sanitize_info {
+	struct bpf_insn_aux_data aux;
+};
+
 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 			    struct bpf_insn *insn,
 			    const struct bpf_reg_state *ptr_reg,
 			    const struct bpf_reg_state *off_reg,
 			    struct bpf_reg_state *dst_reg,
-			    struct bpf_insn_aux_data *tmp_aux,
+			    struct bpf_sanitize_info *info,
 			    const bool commit_window)
 {
-	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux;
+	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
 	struct bpf_verifier_state *vstate = env->cur_state;
 	bool off_is_imm = tnum_is_const(off_reg->var_off);
 	bool off_is_neg = off_reg->smin_value < 0;
@@ -6523,8 +6527,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 		/* In commit phase we narrow the masking window based on
 		 * the observed pointer move after the simulated operation.
 		 */
-		alu_state = tmp_aux->alu_state;
-		alu_limit = abs(tmp_aux->alu_limit - alu_limit);
+		alu_state = info->aux.alu_state;
+		alu_limit = abs(info->aux.alu_limit - alu_limit);
 	} else {
 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
@@ -6685,7 +6689,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
-	struct bpf_insn_aux_data tmp_aux = {};
+	struct bpf_sanitize_info info = {};
 	u8 opcode = BPF_OP(insn->code);
 	u32 dst = insn->dst_reg;
 	int ret;
@@ -6754,7 +6758,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 
 	if (sanitize_needed(opcode)) {
 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
-				       &tmp_aux, false);
+				       &info, false);
 		if (ret < 0)
 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
 	}
@@ -6895,7 +6899,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		return -EACCES;
 	if (sanitize_needed(opcode)) {
 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
-				       &tmp_aux, true);
+				       &info, true);
 		if (ret < 0)
 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
 	}
-- 
GitLab


From bb01a1bba579b4b1c5566af24d95f1767859771e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 21 May 2021 10:19:22 +0000
Subject: [PATCH 1567/3804] bpf: Fix mask direction swap upon off reg sign
 change

Masking direction as indicated via mask_to_left is considered to be
calculated once and then used to derive pointer limits. Thus, this
needs to be placed into bpf_sanitize_info instead so we can pass it
to sanitize_ptr_alu() call after the pointer move. Piotr noticed a
corner case where the off reg causes masking direction change which
then results in an incorrect final aux->alu_limit.

Fixes: 7fedb63a8307 ("bpf: Tighten speculative pointer arithmetic mask")
Reported-by: Piotr Krysiuk <piotras@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 98690f5367f9c..8574cb60915ad 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6409,18 +6409,10 @@ enum {
 };
 
 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
-			      const struct bpf_reg_state *off_reg,
-			      u32 *alu_limit, u8 opcode)
+			      u32 *alu_limit, bool mask_to_left)
 {
-	bool off_is_neg = off_reg->smin_value < 0;
-	bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
-			    (opcode == BPF_SUB && !off_is_neg);
 	u32 max = 0, ptr_limit = 0;
 
-	if (!tnum_is_const(off_reg->var_off) &&
-	    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
-		return REASON_BOUNDS;
-
 	switch (ptr_reg->type) {
 	case PTR_TO_STACK:
 		/* Offset 0 is out-of-bounds, but acceptable start for the
@@ -6488,6 +6480,7 @@ static bool sanitize_needed(u8 opcode)
 
 struct bpf_sanitize_info {
 	struct bpf_insn_aux_data aux;
+	bool mask_to_left;
 };
 
 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
@@ -6519,7 +6512,16 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 	if (vstate->speculative)
 		goto do_sim;
 
-	err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode);
+	if (!commit_window) {
+		if (!tnum_is_const(off_reg->var_off) &&
+		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
+			return REASON_BOUNDS;
+
+		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
+				     (opcode == BPF_SUB && !off_is_neg);
+	}
+
+	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
 	if (err < 0)
 		return err;
 
-- 
GitLab


From a7036191277f9fa68d92f2071ddc38c09b1e5ee5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 4 May 2021 08:58:25 +0000
Subject: [PATCH 1568/3804] bpf: No need to simulate speculative domain for
 immediates

In 801c6058d14a ("bpf: Fix leakage of uninitialized bpf stack under
speculation") we replaced masking logic with direct loads of immediates
if the register is a known constant. Given in this case we do not apply
any masking, there is also no reason for the operation to be truncated
under the speculative domain.

Therefore, there is also zero reason for the verifier to branch-off and
simulate this case, it only needs to do it for unknown but bounded scalars.
As a side-effect, this also enables few test cases that were previously
rejected due to simulation under zero truncation.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8574cb60915ad..94ba5163d4c54 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6545,8 +6545,12 @@ do_sim:
 	/* If we're in commit phase, we're done here given we already
 	 * pushed the truncated dst_reg into the speculative verification
 	 * stack.
+	 *
+	 * Also, when register is a known constant, we rewrite register-based
+	 * operation to immediate-based, and thus do not need masking (and as
+	 * a consequence, do not need to simulate the zero-truncation either).
 	 */
-	if (commit_window)
+	if (commit_window || off_is_imm)
 		return 0;
 
 	/* Simulate and find potential out-of-bounds access under
-- 
GitLab


From 1bad6fd52be4ce12d207e2820ceb0f29ab31fc53 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 4 May 2021 08:58:25 +0000
Subject: [PATCH 1569/3804] bpf, selftests: Adjust few selftest result_unpriv
 outcomes

Given we don't need to simulate the speculative domain for registers with
immediates anymore since the verifier uses direct imm-based rewrites instead
of having to mask, we can also lift a few cases that were previously rejected.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/stack_ptr.c       | 2 --
 tools/testing/selftests/bpf/verifier/value_ptr_arith.c | 8 --------
 2 files changed, 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c
index 07eaa04412ae4..8ab94d65f3d54 100644
--- a/tools/testing/selftests/bpf/verifier/stack_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c
@@ -295,8 +295,6 @@
 	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
 	BPF_EXIT_INSN(),
 	},
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "invalid write to stack R1 off=0 size=1",
 	.result = ACCEPT,
 	.retval = 42,
 },
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index e5913fd3b9030..7ae2859d495c5 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -300,8 +300,6 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = ACCEPT,
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 	.retval = 1,
 },
 {
@@ -371,8 +369,6 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = ACCEPT,
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 	.retval = 1,
 },
 {
@@ -472,8 +468,6 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = ACCEPT,
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 	.retval = 1,
 },
 {
@@ -766,8 +760,6 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = ACCEPT,
-	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 	.retval = 1,
 },
 {
-- 
GitLab


From 0c1f3193b1cdd21e7182f97dc9bca7d284d18a15 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Wed, 12 May 2021 12:14:21 +0100
Subject: [PATCH 1570/3804] dm verity: fix require_signatures module_param
 permissions

The third parameter of module_param() is permissions for the sysfs node
but it looks like it is being used as the initial value of the parameter
here.  In fact, false here equates to omitting the file from sysfs and
does not affect the value of require_signatures.

Making the parameter writable is not simple because going from
false->true is fine but it should not be possible to remove the
requirement to verify a signature.  But it can be useful to inspect the
value of this parameter from userspace, so change the permissions to
make a read-only file in sysfs.

Signed-off-by: John Keeping <john@metanate.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-verity-verify-sig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c
index 29385dc470d5d..db61a1f43ae91 100644
--- a/drivers/md/dm-verity-verify-sig.c
+++ b/drivers/md/dm-verity-verify-sig.c
@@ -15,7 +15,7 @@
 #define DM_VERITY_VERIFY_ERR(s) DM_VERITY_ROOT_HASH_VERIFICATION " " s
 
 static bool require_signatures;
-module_param(require_signatures, bool, false);
+module_param(require_signatures, bool, 0444);
 MODULE_PARM_DESC(require_signatures,
 		"Verify the roothash of dm-verity hash tree");
 
-- 
GitLab


From f16dba5dc6f094041ab8c356e1e3a48ee0e3c8cd Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 25 May 2021 13:16:21 -0400
Subject: [PATCH 1571/3804] dm snapshot: revert "fix a crash when an origin has
 no snapshots"

Commit 7ee06ddc4038f936b0d4459d37a7d4d844fb03db ("dm snapshot: fix a
crash when an origin has no snapshots") introduced a regression in
snapshot merging - causing the lvm2 test lvcreate-cache-snapshot.sh
got stuck in an infinite loop.

Even though commit 7ee06ddc4038f936b0d4459d37a7d4d844fb03db was marked
for stable@ the stable team was notified to _not_ backport it.

Fixes: 7ee06ddc4038 ("dm snapshot: fix a crash when an origin has no snapshots")
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index b8e4d31124eaa..75e59294ef77a 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -855,11 +855,12 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 static uint32_t __minimum_chunk_size(struct origin *o)
 {
 	struct dm_snapshot *snap;
-	unsigned chunk_size = rounddown_pow_of_two(UINT_MAX);
+	unsigned chunk_size = 0;
 
 	if (o)
 		list_for_each_entry(snap, &o->snapshots, list)
-			chunk_size = min(chunk_size, snap->store->chunk_size);
+			chunk_size = min_not_zero(chunk_size,
+						  snap->store->chunk_size);
 
 	return (uint32_t) chunk_size;
 }
-- 
GitLab


From 7e768532b2396bcb7fbf6f82384b85c0f1d2f197 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 25 May 2021 13:17:19 -0400
Subject: [PATCH 1572/3804] dm snapshot: properly fix a crash when an origin
 has no snapshots

If an origin target has no snapshots, o->split_boundary is set to 0.
This causes BUG_ON(sectors <= 0) in block/bio.c:bio_split().

Fix this by initializing chunk_size, and in turn split_boundary, to
rounddown_pow_of_two(UINT_MAX) -- the largest power of two that fits
into "unsigned" type.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-snap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 75e59294ef77a..751ec5ea1dbb5 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -855,7 +855,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
 static uint32_t __minimum_chunk_size(struct origin *o)
 {
 	struct dm_snapshot *snap;
-	unsigned chunk_size = 0;
+	unsigned chunk_size = rounddown_pow_of_two(UINT_MAX);
 
 	if (o)
 		list_for_each_entry(snap, &o->snapshots, list)
-- 
GitLab


From bfb819ea20ce8bbeeba17e1a6418bf8bda91fc28 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 25 May 2021 12:37:35 -0700
Subject: [PATCH 1573/3804] proc: Check /proc/$pid/attr/ writes against file
 opener

Fix another "confused deputy" weakness[1]. Writes to /proc/$pid/attr/
files need to check the opener credentials, since these fds do not
transition state across execve(). Without this, it is possible to
trick another process (which may have different credentials) to write
to its own /proc/$pid/attr/ files, leading to unexpected and possibly
exploitable behaviors.

[1] https://www.kernel.org/doc/html/latest/security/credentials.html?highlight=confused#open-file-credentials

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3851bfcdba56e..58bbf334265b7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2703,6 +2703,10 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 	void *page;
 	int rv;
 
+	/* A task may only write when it was the opener. */
+	if (file->f_cred != current_real_cred())
+		return -EPERM;
+
 	rcu_read_lock();
 	task = pid_task(proc_pid(inode), PIDTYPE_PID);
 	if (!task) {
-- 
GitLab


From 297739bd73f6e49d80bac4bfd27f3598b798c0d4 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 24 May 2021 22:49:24 -0400
Subject: [PATCH 1574/3804] sctp: add the missing setting for asoc encap_port

This patch is to add the missing setting back for asoc encap_port.

Fixes: 8dba29603b5c ("sctp: add SCTP_REMOTE_UDP_ENCAPS_PORT sockopt")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 40f9f6c4a0a1d..a79d193ff8720 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4473,6 +4473,7 @@ static int sctp_setsockopt_encap_port(struct sock *sk,
 				    transports)
 			t->encap_port = encap_port;
 
+		asoc->encap_port = encap_port;
 		return 0;
 	}
 
-- 
GitLab


From b2540cdce6e22ecf3de54daf5129cc37951348cc Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 24 May 2021 22:49:42 -0400
Subject: [PATCH 1575/3804] sctp: fix the proc_handler for sysctl encap_port

proc_dointvec() cannot do min and max check for setting a value
when extra1/extra2 is set, so change it to proc_dointvec_minmax()
for sysctl encap_port.

Fixes: e8a3001c2120 ("sctp: add encap_port for netns sock asoc and transport")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index e92df779af733..55871b277f475 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -307,7 +307,7 @@ static struct ctl_table sctp_net_table[] = {
 		.data		= &init_net.sctp.encap_port,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= &udp_port_max,
 	},
-- 
GitLab


From c1cf1afd8b0f2f1b077df84e90497c07094406fc Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 25 May 2021 18:52:47 +0800
Subject: [PATCH 1576/3804] net: hns: Fix kernel-doc

Fix function name in hns_ethtool.c kernel-doc comment
to remove these warnings found by clang_w1.

drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:202: warning: expecting
prototype for hns_nic_set_link_settings(). Prototype was for
hns_nic_set_link_ksettings() instead.
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:837: warning: expecting
prototype for get_ethtool_stats(). Prototype was for
hns_get_ethtool_stats() instead.
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c:894: warning:
expecting prototype for get_strings(). Prototype was for
hns_get_strings() instead.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Fixes: 'commit 262b38cdb3e4 ("net: ethernet: hisilicon: hns: use phydev
from struct net_device")'
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index da48c05435ea6..7e62dcff24264 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -192,7 +192,7 @@ static int hns_nic_get_link_ksettings(struct net_device *net_dev,
 }
 
 /**
- *hns_nic_set_link_settings - implement ethtool set link ksettings
+ *hns_nic_set_link_ksettings - implement ethtool set link ksettings
  *@net_dev: net_device
  *@cmd: ethtool_link_ksettings
  *retuen 0 - success , negative --fail
@@ -827,7 +827,7 @@ hns_get_channels(struct net_device *net_dev, struct ethtool_channels *ch)
 }
 
 /**
- * get_ethtool_stats - get detail statistics.
+ * hns_get_ethtool_stats - get detail statistics.
  * @netdev: net device
  * @stats: statistics info.
  * @data: statistics data.
@@ -885,7 +885,7 @@ static void hns_get_ethtool_stats(struct net_device *netdev,
 }
 
 /**
- * get_strings: Return a set of strings that describe the requested objects
+ * hns_get_strings: Return a set of strings that describe the requested objects
  * @netdev: net device
  * @stringset: string set ID.
  * @data: objects data.
-- 
GitLab


From 9453d45ecb6c2199d72e73c993e9d98677a2801b Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Tue, 25 May 2021 16:21:52 +0300
Subject: [PATCH 1577/3804] net: zero-initialize tc skb extension on allocation

Function skb_ext_add() doesn't initialize created skb extension with any
value and leaves it up to the user. However, since extension of type
TC_SKB_EXT originally contained only single value tc_skb_ext->chain its
users used to just assign the chain value without setting whole extension
memory to zero first. This assumption changed when TC_SKB_EXT extension was
extended with additional fields but not all users were updated to
initialize the new fields which leads to use of uninitialized memory
afterwards. UBSAN log:

[  778.299821] UBSAN: invalid-load in net/openvswitch/flow.c:899:28
[  778.301495] load of value 107 is not a valid value for type '_Bool'
[  778.303215] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc7+ #2
[  778.304933] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[  778.307901] Call Trace:
[  778.308680]  <IRQ>
[  778.309358]  dump_stack+0xbb/0x107
[  778.310307]  ubsan_epilogue+0x5/0x40
[  778.311167]  __ubsan_handle_load_invalid_value.cold+0x43/0x48
[  778.312454]  ? memset+0x20/0x40
[  778.313230]  ovs_flow_key_extract.cold+0xf/0x14 [openvswitch]
[  778.314532]  ovs_vport_receive+0x19e/0x2e0 [openvswitch]
[  778.315749]  ? ovs_vport_find_upcall_portid+0x330/0x330 [openvswitch]
[  778.317188]  ? create_prof_cpu_mask+0x20/0x20
[  778.318220]  ? arch_stack_walk+0x82/0xf0
[  778.319153]  ? secondary_startup_64_no_verify+0xb0/0xbb
[  778.320399]  ? stack_trace_save+0x91/0xc0
[  778.321362]  ? stack_trace_consume_entry+0x160/0x160
[  778.322517]  ? lock_release+0x52e/0x760
[  778.323444]  netdev_frame_hook+0x323/0x610 [openvswitch]
[  778.324668]  ? ovs_netdev_get_vport+0xe0/0xe0 [openvswitch]
[  778.325950]  __netif_receive_skb_core+0x771/0x2db0
[  778.327067]  ? lock_downgrade+0x6e0/0x6f0
[  778.328021]  ? lock_acquire+0x565/0x720
[  778.328940]  ? generic_xdp_tx+0x4f0/0x4f0
[  778.329902]  ? inet_gro_receive+0x2a7/0x10a0
[  778.330914]  ? lock_downgrade+0x6f0/0x6f0
[  778.331867]  ? udp4_gro_receive+0x4c4/0x13e0
[  778.332876]  ? lock_release+0x52e/0x760
[  778.333808]  ? dev_gro_receive+0xcc8/0x2380
[  778.334810]  ? lock_downgrade+0x6f0/0x6f0
[  778.335769]  __netif_receive_skb_list_core+0x295/0x820
[  778.336955]  ? process_backlog+0x780/0x780
[  778.337941]  ? mlx5e_rep_tc_netdevice_event_unregister+0x20/0x20 [mlx5_core]
[  778.339613]  ? seqcount_lockdep_reader_access.constprop.0+0xa7/0xc0
[  778.341033]  ? kvm_clock_get_cycles+0x14/0x20
[  778.342072]  netif_receive_skb_list_internal+0x5f5/0xcb0
[  778.343288]  ? __kasan_kmalloc+0x7a/0x90
[  778.344234]  ? mlx5e_handle_rx_cqe_mpwrq+0x9e0/0x9e0 [mlx5_core]
[  778.345676]  ? mlx5e_xmit_xdp_frame_mpwqe+0x14d0/0x14d0 [mlx5_core]
[  778.347140]  ? __netif_receive_skb_list_core+0x820/0x820
[  778.348351]  ? mlx5e_post_rx_mpwqes+0xa6/0x25d0 [mlx5_core]
[  778.349688]  ? napi_gro_flush+0x26c/0x3c0
[  778.350641]  napi_complete_done+0x188/0x6b0
[  778.351627]  mlx5e_napi_poll+0x373/0x1b80 [mlx5_core]
[  778.352853]  __napi_poll+0x9f/0x510
[  778.353704]  ? mlx5_flow_namespace_set_mode+0x260/0x260 [mlx5_core]
[  778.355158]  net_rx_action+0x34c/0xa40
[  778.356060]  ? napi_threaded_poll+0x3d0/0x3d0
[  778.357083]  ? sched_clock_cpu+0x18/0x190
[  778.358041]  ? __common_interrupt+0x8e/0x1a0
[  778.359045]  __do_softirq+0x1ce/0x984
[  778.359938]  __irq_exit_rcu+0x137/0x1d0
[  778.360865]  irq_exit_rcu+0xa/0x20
[  778.361708]  common_interrupt+0x80/0xa0
[  778.362640]  </IRQ>
[  778.363212]  asm_common_interrupt+0x1e/0x40
[  778.364204] RIP: 0010:native_safe_halt+0xe/0x10
[  778.365273] Code: 4f ff ff ff 4c 89 e7 e8 50 3f 40 fe e9 dc fe ff ff 48 89 df e8 43 3f 40 fe eb 90 cc e9 07 00 00 00 0f 00 2d 74 05 62 00 fb f4 <c3> 90 e9 07 00 00 00 0f 00 2d 64 05 62 00 f4 c3 cc cc 0f 1f 44 00
[  778.369355] RSP: 0018:ffffffff84407e48 EFLAGS: 00000246
[  778.370570] RAX: ffff88842de46a80 RBX: ffffffff84425840 RCX: ffffffff83418468
[  778.372143] RDX: 000000000026f1da RSI: 0000000000000004 RDI: ffffffff8343af5e
[  778.373722] RBP: fffffbfff0884b08 R08: 0000000000000000 R09: ffff88842de46bcb
[  778.375292] R10: ffffed1085bc8d79 R11: 0000000000000001 R12: 0000000000000000
[  778.376860] R13: ffffffff851124a0 R14: 0000000000000000 R15: dffffc0000000000
[  778.378491]  ? rcu_eqs_enter.constprop.0+0xb8/0xe0
[  778.379606]  ? default_idle_call+0x5e/0xe0
[  778.380578]  default_idle+0xa/0x10
[  778.381406]  default_idle_call+0x96/0xe0
[  778.382350]  do_idle+0x3d4/0x550
[  778.383153]  ? arch_cpu_idle_exit+0x40/0x40
[  778.384143]  cpu_startup_entry+0x19/0x20
[  778.385078]  start_kernel+0x3c7/0x3e5
[  778.385978]  secondary_startup_64_no_verify+0xb0/0xbb

Fix the issue by providing new function tc_skb_ext_alloc() that allocates
tc skb extension and initializes its memory to 0 before returning it to the
caller. Change all existing users to use new API instead of calling
skb_ext_add() directly.

Fixes: 038ebb1a713d ("net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct")
Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Acked-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c     |  2 +-
 include/net/pkt_cls.h                               | 11 +++++++++++
 net/sched/cls_api.c                                 |  2 +-
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 6cdc52d50a488..3113822618402 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -626,7 +626,7 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
 		struct mlx5_eswitch *esw;
 		u32 zone_restore_id;
 
-		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+		tc_skb_ext = tc_skb_ext_alloc(skb);
 		if (!tc_skb_ext) {
 			WARN_ON(1);
 			return false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index bccdb43a880b1..2c776e7a7692a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -5090,7 +5090,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
 
 	if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
 		chain = mapped_obj.chain;
-		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+		tc_skb_ext = tc_skb_ext_alloc(skb);
 		if (WARN_ON(!tc_skb_ext))
 			return false;
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 255e4f4b521f4..ec7823921bd26 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -709,6 +709,17 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
 		cls_common->extack = extack;
 }
 
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
+{
+	struct tc_skb_ext *tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+
+	if (tc_skb_ext)
+		memset(tc_skb_ext, 0, sizeof(*tc_skb_ext));
+	return tc_skb_ext;
+}
+#endif
+
 enum tc_matchall_command {
 	TC_CLSMATCHALL_REPLACE,
 	TC_CLSMATCHALL_DESTROY,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 40fbea626dfd2..279f9e2a2319a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1624,7 +1624,7 @@ int tcf_classify_ingress(struct sk_buff *skb,
 
 	/* If we missed on some chain */
 	if (ret == TC_ACT_UNSPEC && last_executed_chain) {
-		ext = skb_ext_add(skb, TC_SKB_EXT);
+		ext = tc_skb_ext_alloc(skb);
 		if (WARN_ON_ONCE(!ext))
 			return TC_ACT_SHOT;
 		ext->chain = last_executed_chain;
-- 
GitLab


From 65161c35554f7135e6656b3df1ce2c500ca0bdcf Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 25 May 2021 19:00:12 +0800
Subject: [PATCH 1578/3804] bnx2x: Fix missing error code in
 bnx2x_iov_init_one()

Eliminate the follow smatch warning:

drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c:1227
bnx2x_iov_init_one() warn: missing error code 'err'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index d21f085044cdd..27943b0446c28 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1223,8 +1223,10 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param,
 		goto failed;
 
 	/* SR-IOV capability was enabled but there are no VFs*/
-	if (iov->total == 0)
+	if (iov->total == 0) {
+		err = -EINVAL;
 		goto failed;
+	}
 
 	iov->nr_virtfn = min_t(u16, iov->total, num_vfs_param);
 
-- 
GitLab


From 17f9c1b63cdd4439523cfcdf5683e5070b911f24 Mon Sep 17 00:00:00 2001
From: Stefan Chulski <stefanc@marvell.com>
Date: Tue, 25 May 2021 19:04:41 +0300
Subject: [PATCH 1579/3804] net: mvpp2: add buffer header handling in RX

If Link Partner sends frames larger than RX buffer size, MAC mark it
as oversize but still would pass it to the Packet Processor.
In this scenario, Packet Processor scatter frame between multiple buffers,
but only a single buffer would be returned to the Buffer Manager pool and
it would not refill the poll.

Patch add handling of oversize error with buffer header handling, so all
buffers would be returned to the Buffer Manager pool.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Reported-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Stefan Chulski <stefanc@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2.h    | 22 ++++++++
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c   | 54 +++++++++++++++----
 2 files changed, 67 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 8edba5ea90f03..4a61c90003b5e 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -993,6 +993,14 @@ enum mvpp22_ptp_packet_format {
 
 #define MVPP2_DESC_DMA_MASK	DMA_BIT_MASK(40)
 
+/* Buffer header info bits */
+#define MVPP2_B_HDR_INFO_MC_ID_MASK	0xfff
+#define MVPP2_B_HDR_INFO_MC_ID(info)	((info) & MVPP2_B_HDR_INFO_MC_ID_MASK)
+#define MVPP2_B_HDR_INFO_LAST_OFFS	12
+#define MVPP2_B_HDR_INFO_LAST_MASK	BIT(12)
+#define MVPP2_B_HDR_INFO_IS_LAST(info) \
+	   (((info) & MVPP2_B_HDR_INFO_LAST_MASK) >> MVPP2_B_HDR_INFO_LAST_OFFS)
+
 struct mvpp2_tai;
 
 /* Definitions */
@@ -1002,6 +1010,20 @@ struct mvpp2_rss_table {
 	u32 indir[MVPP22_RSS_TABLE_ENTRIES];
 };
 
+struct mvpp2_buff_hdr {
+	__le32 next_phys_addr;
+	__le32 next_dma_addr;
+	__le16 byte_count;
+	__le16 info;
+	__le16 reserved1;	/* bm_qset (for future use, BM) */
+	u8 next_phys_addr_high;
+	u8 next_dma_addr_high;
+	__le16 reserved2;
+	__le16 reserved3;
+	__le16 reserved4;
+	__le16 reserved5;
+};
+
 /* Shared Packet Processor resources */
 struct mvpp2 {
 	/* Shared registers' base addresses */
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index ec706d614cacc..d39c7639cdbab 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3839,6 +3839,35 @@ mvpp2_run_xdp(struct mvpp2_port *port, struct mvpp2_rx_queue *rxq,
 	return ret;
 }
 
+static void mvpp2_buff_hdr_pool_put(struct mvpp2_port *port, struct mvpp2_rx_desc *rx_desc,
+				    int pool, u32 rx_status)
+{
+	phys_addr_t phys_addr, phys_addr_next;
+	dma_addr_t dma_addr, dma_addr_next;
+	struct mvpp2_buff_hdr *buff_hdr;
+
+	phys_addr = mvpp2_rxdesc_dma_addr_get(port, rx_desc);
+	dma_addr = mvpp2_rxdesc_cookie_get(port, rx_desc);
+
+	do {
+		buff_hdr = (struct mvpp2_buff_hdr *)phys_to_virt(phys_addr);
+
+		phys_addr_next = le32_to_cpu(buff_hdr->next_phys_addr);
+		dma_addr_next = le32_to_cpu(buff_hdr->next_dma_addr);
+
+		if (port->priv->hw_version >= MVPP22) {
+			phys_addr_next |= ((u64)buff_hdr->next_phys_addr_high << 32);
+			dma_addr_next |= ((u64)buff_hdr->next_dma_addr_high << 32);
+		}
+
+		mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
+
+		phys_addr = phys_addr_next;
+		dma_addr = dma_addr_next;
+
+	} while (!MVPP2_B_HDR_INFO_IS_LAST(le16_to_cpu(buff_hdr->info)));
+}
+
 /* Main rx processing */
 static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 		    int rx_todo, struct mvpp2_rx_queue *rxq)
@@ -3885,14 +3914,6 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 			MVPP2_RXD_BM_POOL_ID_OFFS;
 		bm_pool = &port->priv->bm_pools[pool];
 
-		/* In case of an error, release the requested buffer pointer
-		 * to the Buffer Manager. This request process is controlled
-		 * by the hardware, and the information about the buffer is
-		 * comprised by the RX descriptor.
-		 */
-		if (rx_status & MVPP2_RXD_ERR_SUMMARY)
-			goto err_drop_frame;
-
 		if (port->priv->percpu_pools) {
 			pp = port->priv->page_pool[pool];
 			dma_dir = page_pool_get_dma_dir(pp);
@@ -3904,6 +3925,18 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
 					rx_bytes + MVPP2_MH_SIZE,
 					dma_dir);
 
+		/* Buffer header not supported */
+		if (rx_status & MVPP2_RXD_BUF_HDR)
+			goto err_drop_frame;
+
+		/* In case of an error, release the requested buffer pointer
+		 * to the Buffer Manager. This request process is controlled
+		 * by the hardware, and the information about the buffer is
+		 * comprised by the RX descriptor.
+		 */
+		if (rx_status & MVPP2_RXD_ERR_SUMMARY)
+			goto err_drop_frame;
+
 		/* Prefetch header */
 		prefetch(data);
 
@@ -3985,7 +4018,10 @@ err_drop_frame:
 		dev->stats.rx_errors++;
 		mvpp2_rx_error(port, rx_desc);
 		/* Return the buffer to the pool */
-		mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
+		if (rx_status & MVPP2_RXD_BUF_HDR)
+			mvpp2_buff_hdr_pool_put(port, rx_desc, pool, rx_status);
+		else
+			mvpp2_bm_pool_put(port, pool, dma_addr, phys_addr);
 	}
 
 	rcu_read_unlock();
-- 
GitLab


From bab09fe2f65200a67209a360988bc24f3de4b95d Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@corigine.com>
Date: Tue, 25 May 2021 17:47:04 +0200
Subject: [PATCH 1580/3804] nfp: update maintainer and mailing list addresses

Some of Netronome's activities and people have moved over to Corigine,
including NFP driver maintenance and myself.

Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Louis Peens <louis.peens@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2cc1cb72bc923..d34c0036bdcd9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12687,9 +12687,9 @@ F:	drivers/rtc/rtc-ntxec.c
 F:	include/linux/mfd/ntxec.h
 
 NETRONOME ETHERNET DRIVERS
-M:	Simon Horman <simon.horman@netronome.com>
+M:	Simon Horman <simon.horman@corigine.com>
 R:	Jakub Kicinski <kuba@kernel.org>
-L:	oss-drivers@netronome.com
+L:	oss-drivers@corigine.com
 S:	Maintained
 F:	drivers/net/ethernet/netronome/
 
-- 
GitLab


From 20b5759f21cf53a0e03031bd3fe539e332b13568 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 25 May 2021 14:23:10 -0700
Subject: [PATCH 1581/3804] mptcp: avoid OOB access in setsockopt()

We can't use tcp_set_congestion_control() on an mptcp socket, as
such function can end-up accessing a tcp-specific field -
prior_ssthresh - causing an OOB access.

To allow propagating the correct ca algo on subflow, cache the ca
name at initialization time.

Additionally avoid overriding the user-selected CA (if any) at
clone time.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/182
Fixes: aa1fbd94e5c7 ("mptcp: sockopt: add TCP_CONGESTION and TCP_INFO")
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 14 +++++++++++---
 net/mptcp/protocol.h |  1 +
 net/mptcp/sockopt.c  |  4 ++--
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2d21a4793d9d0..2bc199549a887 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2424,13 +2424,12 @@ static int __mptcp_init_sock(struct sock *sk)
 	timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
 	timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
 
-	tcp_assign_congestion_control(sk);
-
 	return 0;
 }
 
 static int mptcp_init_sock(struct sock *sk)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct net *net = sock_net(sk);
 	int ret;
 
@@ -2448,6 +2447,16 @@ static int mptcp_init_sock(struct sock *sk)
 	if (ret)
 		return ret;
 
+	/* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
+	 * propagate the correct value
+	 */
+	tcp_assign_congestion_control(sk);
+	strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
+
+	/* no need to keep a reference to the ops, the name will suffice */
+	tcp_cleanup_congestion_control(sk);
+	icsk->icsk_ca_ops = NULL;
+
 	sk_sockets_allocated_inc(sk);
 	sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
 	sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
@@ -2622,7 +2631,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
 	sk_stream_kill_queues(sk);
 	xfrm_sk_free_policy(sk);
 
-	tcp_cleanup_congestion_control(sk);
 	sk_refcnt_debug_release(sk);
 	mptcp_dispose_initial_subflow(msk);
 	sock_put(sk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index edc0128730dfe..165c8b40b3842 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -258,6 +258,7 @@ struct mptcp_sock {
 	} rcvq_space;
 
 	u32 setsockopt_seq;
+	char		ca_name[TCP_CA_NAME_MAX];
 };
 
 #define mptcp_lock_sock(___sk, cb) do {					\
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 00d941b66c1e5..a797981895995 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -547,7 +547,7 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t
 	}
 
 	if (ret == 0)
-		tcp_set_congestion_control(sk, name, false, cap_net_admin);
+		strcpy(msk->ca_name, name);
 
 	release_sock(sk);
 	return ret;
@@ -705,7 +705,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
 	sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG));
 
 	if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops)
-		tcp_set_congestion_control(ssk, inet_csk(sk)->icsk_ca_ops->name, false, true);
+		tcp_set_congestion_control(ssk, msk->ca_name, false, true);
 }
 
 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
-- 
GitLab


From 3812ce895047afdb78dc750a236515416e0ccded Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 25 May 2021 14:23:11 -0700
Subject: [PATCH 1582/3804] mptcp: drop unconditional pr_warn on bad opt

This is a left-over of early day. A malicious peer can flood
the kernel logs with useless messages, just drop it.

Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 99fc21406168b..71c535f4e1eff 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -130,7 +130,6 @@ static void mptcp_parse_option(const struct sk_buff *skb,
 			memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
 			pr_debug("MP_JOIN hmac");
 		} else {
-			pr_warn("MP_JOIN bad option size");
 			mp_opt->mp_join = 0;
 		}
 		break;
-- 
GitLab


From 3ed0a585bfadb6bd7080f11184adbc9edcce7dbc Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 25 May 2021 14:23:12 -0700
Subject: [PATCH 1583/3804] mptcp: avoid error message on infinite mapping

Another left-over. Avoid flooding dmesg with useless text,
we already have a MIB for that event.

Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a5ede357cfbc5..bde6be77ea73b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -867,7 +867,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 
 	data_len = mpext->data_len;
 	if (data_len == 0) {
-		pr_err("Infinite mapping not handled");
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
 		return MAPPING_INVALID;
 	}
-- 
GitLab


From d58300c3185b78ab910092488126b97f0abe3ae2 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Tue, 25 May 2021 14:23:13 -0700
Subject: [PATCH 1584/3804] mptcp: validate 'id' when stopping the ADD_ADDR
 retransmit timer

when Linux receives an echo-ed ADD_ADDR, it checks the IP address against
the list of "announced" addresses. In case of a positive match, the timer
that handles retransmissions is stopped regardless of the 'Address Id' in
the received packet: this behaviour does not comply with RFC8684 3.4.1.

Fix it by validating the 'Address Id' in received echo-ed ADD_ADDRs.
Tested using packetdrill, with the following captured output:

 unpatched kernel:

 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0xfd2e62517888fe29,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 1 1.2.3.4,mptcp dss ack 3013740213], length 0
 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0xfd2e62517888fe29,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 90 198.51.100.2,mptcp dss ack 3013740213], length 0
        ^^^ retransmission is stopped here, but 'Address Id' is 90

 patched kernel:

 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0x1cf372d59e05f4b8,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 1 1.2.3.4,mptcp dss ack 1672384568], length 0
 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0x1cf372d59e05f4b8,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 90 198.51.100.2,mptcp dss ack 1672384568], length 0
 Out <...> Flags [.], ack 1, win 256, options [mptcp add-addr v1 id 1 198.51.100.2 hmac 0x1cf372d59e05f4b8,mptcp dss ack 3007449509], length 0
 In  <...> Flags [.], ack 1, win 257, options [mptcp add-addr v1-echo id 1 198.51.100.2,mptcp dss ack 1672384568], length 0
        ^^^ retransmission is stopped here, only when both 'Address Id' and 'IP Address' match

Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c    | 2 +-
 net/mptcp/pm_netlink.c | 8 ++++----
 net/mptcp/protocol.h   | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 71c535f4e1eff..6b825fb3fa832 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1023,7 +1023,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR);
 		} else {
 			mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
-			mptcp_pm_del_add_timer(msk, &mp_opt.addr);
+			mptcp_pm_del_add_timer(msk, &mp_opt.addr, true);
 			MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADD);
 		}
 
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 6ba040897738b..2469e06a3a9d6 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -346,18 +346,18 @@ out:
 
 struct mptcp_pm_add_entry *
 mptcp_pm_del_add_timer(struct mptcp_sock *msk,
-		       struct mptcp_addr_info *addr)
+		       struct mptcp_addr_info *addr, bool check_id)
 {
 	struct mptcp_pm_add_entry *entry;
 	struct sock *sk = (struct sock *)msk;
 
 	spin_lock_bh(&msk->pm.lock);
 	entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
-	if (entry)
+	if (entry && (!check_id || entry->addr.id == addr->id))
 		entry->retrans_times = ADD_ADDR_RETRANS_MAX;
 	spin_unlock_bh(&msk->pm.lock);
 
-	if (entry)
+	if (entry && (!check_id || entry->addr.id == addr->id))
 		sk_stop_timer_sync(sk, &entry->add_timer);
 
 	return entry;
@@ -1064,7 +1064,7 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
 {
 	struct mptcp_pm_add_entry *entry;
 
-	entry = mptcp_pm_del_add_timer(msk, addr);
+	entry = mptcp_pm_del_add_timer(msk, addr, false);
 	if (entry) {
 		list_del(&entry->list);
 		kfree(entry);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 165c8b40b3842..0c6f99c673457 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -672,7 +672,7 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
 struct mptcp_pm_add_entry *
 mptcp_pm_del_add_timer(struct mptcp_sock *msk,
-		       struct mptcp_addr_info *addr);
+		       struct mptcp_addr_info *addr, bool check_id);
 struct mptcp_pm_add_entry *
 mptcp_lookup_anno_list_by_saddr(struct mptcp_sock *msk,
 				struct mptcp_addr_info *addr);
-- 
GitLab


From 85aabbd7b315c65673084b6227bee92c00405239 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 10 May 2021 19:31:30 +0200
Subject: [PATCH 1585/3804] PCI/MSI: Fix MSIs for generic hosts that use
 device-tree's "msi-map"

Since commit 9ec37efb8783 ("PCI/MSI: Make pci_host_common_probe() declare
its reliance on MSI domains"), platforms that rely on the "msi-map"
device-tree property don't get MSIs anymore.

On the Arm Fast Model for example [1], the host bridge doesn't have a
"msi-parent" property since it doesn't itself generate MSIs, and so doesn't
get a MSI domain. It has an "msi-map" property instead to describe MSI
controllers of child devices. As a result, due to the new msi_domain check
in pci_register_host_bridge(), the whole bus gets PCI_BUS_FLAGS_NO_MSI.

Check whether the root complex has an "msi-map" property before giving
up on MSIs.

[1] arch/arm64/boot/dts/arm/fvp-base-revc.dts

Fixes: 9ec37efb8783 ("PCI/MSI: Make pci_host_common_probe() declare its reliance on MSI domains")
Link: https://lore.kernel.org/r/20210510173129.750496-1-jean-philippe@linaro.org
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Marc Zyngier <maz@kernel.org>
---
 drivers/pci/of.c    | 7 +++++++
 drivers/pci/probe.c | 3 ++-
 include/linux/pci.h | 2 ++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index da5b414d585ab..85dcb7097da4c 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -103,6 +103,13 @@ struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus)
 #endif
 }
 
+bool pci_host_of_has_msi_map(struct device *dev)
+{
+	if (dev && dev->of_node)
+		return of_get_property(dev->of_node, "msi-map", NULL);
+	return false;
+}
+
 static inline int __of_pci_pci_compare(struct device_node *node,
 				       unsigned int data)
 {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 3a62d09b8869e..275204646c68c 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -925,7 +925,8 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
 	device_enable_async_suspend(bus->bridge);
 	pci_set_bus_of_node(bus);
 	pci_set_bus_msi_domain(bus);
-	if (bridge->msi_domain && !dev_get_msi_domain(&bus->dev))
+	if (bridge->msi_domain && !dev_get_msi_domain(&bus->dev) &&
+	    !pci_host_of_has_msi_map(parent))
 		bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
 
 	if (!parent)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c20211e59a576..24306504226ab 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2344,6 +2344,7 @@ int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
 struct device_node;
 struct irq_domain;
 struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
+bool pci_host_of_has_msi_map(struct device *dev);
 
 /* Arch may override this (weak) */
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
@@ -2351,6 +2352,7 @@ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
 #else	/* CONFIG_OF */
 static inline struct irq_domain *
 pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
+static inline bool pci_host_of_has_msi_map(struct device *dev) { return false; }
 #endif  /* CONFIG_OF */
 
 static inline struct device_node *
-- 
GitLab


From cc146267914950b12c2bdee68c1e9e5453c81cde Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 19 May 2021 08:22:15 +0200
Subject: [PATCH 1586/3804] md/raid5: remove an incorrect assert in
 in_chunk_boundary

Now that the original bdev is stored in the bio this assert is incorrect
and will trigger for any partitioned raid5 device.

Reported-by: Florian Dazinger <spam02@dazinger.net>
Tested-by: Florian Dazinger <spam02@dazinger.net>
Cc: stable@vger.kernel.org # 5.12
Fixes: 309dca309fc3 ("block: store a block_device pointer in struct bio"),
Reviewed-by:  Guoqing Jiang <jiangguoqing@kylinos.cn>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/raid5.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 841e1c1aa5e63..7d4ff8a5c55e2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5311,8 +5311,6 @@ static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
 	unsigned int chunk_sectors;
 	unsigned int bio_sectors = bio_sectors(bio);
 
-	WARN_ON_ONCE(bio->bi_bdev->bd_partno);
-
 	chunk_sectors = min(conf->chunk_sectors, conf->prev_chunk_sectors);
 	return  chunk_sectors >=
 		((sector & (chunk_sectors - 1)) + bio_sectors);
-- 
GitLab


From 17a91051fe63b40ec651b80097c9fff5b093fdc5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sun, 23 May 2021 15:48:39 +0100
Subject: [PATCH 1587/3804] io_uring/io-wq: close io-wq full-stop gap

There is an old problem with io-wq cancellation where requests should be
killed and are in io-wq but are not discoverable, e.g. in @next_hashed
or @linked vars of io_worker_handle_work(). It adds some unreliability
to individual request canellation, but also may potentially get
__io_uring_cancel() stuck. For instance:

1) An __io_uring_cancel()'s cancellation round have not found any
   request but there are some as desribed.
2) __io_uring_cancel() goes to sleep
3) Then workers wake up and try to execute those hidden requests
   that happen to be unbound.

As we already cancel all requests of io-wq there, set IO_WQ_BIT_EXIT
in advance, so preventing 3) from executing unbound requests. The
workers will initially break looping because of getting a signal as they
are threads of the dying/exec()'ing user task.

Cc: stable@vger.kernel.org
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/abfcf8c54cb9e8f7bfbad7e9a0cc5433cc70bdc2.1621781238.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c    | 20 +++++++++-----------
 fs/io-wq.h    |  2 +-
 fs/io_uring.c |  6 ++++++
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5361a9b4b47b5..de9b7ba3ba015 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -979,13 +979,16 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
 	return cwd->wqe->wq == data;
 }
 
+void io_wq_exit_start(struct io_wq *wq)
+{
+	set_bit(IO_WQ_BIT_EXIT, &wq->state);
+}
+
 static void io_wq_exit_workers(struct io_wq *wq)
 {
 	struct callback_head *cb;
 	int node;
 
-	set_bit(IO_WQ_BIT_EXIT, &wq->state);
-
 	if (!wq->task)
 		return;
 
@@ -1020,8 +1023,6 @@ static void io_wq_destroy(struct io_wq *wq)
 
 	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
 
-	io_wq_exit_workers(wq);
-
 	for_each_node(node) {
 		struct io_wqe *wqe = wq->wqes[node];
 		struct io_cb_cancel_data match = {
@@ -1036,16 +1037,13 @@ static void io_wq_destroy(struct io_wq *wq)
 	kfree(wq);
 }
 
-void io_wq_put(struct io_wq *wq)
-{
-	if (refcount_dec_and_test(&wq->refs))
-		io_wq_destroy(wq);
-}
-
 void io_wq_put_and_exit(struct io_wq *wq)
 {
+	WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state));
+
 	io_wq_exit_workers(wq);
-	io_wq_put(wq);
+	if (refcount_dec_and_test(&wq->refs))
+		io_wq_destroy(wq);
 }
 
 static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 0e6d310999e89..af2df0680ee22 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -122,7 +122,7 @@ struct io_wq_data {
 };
 
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
-void io_wq_put(struct io_wq *wq);
+void io_wq_exit_start(struct io_wq *wq);
 void io_wq_put_and_exit(struct io_wq *wq);
 
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5f82954004f61..6af8ca0cb01c7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9078,6 +9078,9 @@ static void io_uring_cancel_sqpoll(struct io_sq_data *sqd)
 
 	if (!current->io_uring)
 		return;
+	if (tctx->io_wq)
+		io_wq_exit_start(tctx->io_wq);
+
 	WARN_ON_ONCE(!sqd || sqd->thread != current);
 
 	atomic_inc(&tctx->in_idle);
@@ -9112,6 +9115,9 @@ void __io_uring_cancel(struct files_struct *files)
 	DEFINE_WAIT(wait);
 	s64 inflight;
 
+	if (tctx->io_wq)
+		io_wq_exit_start(tctx->io_wq);
+
 	/* make sure overflow events are dropped */
 	atomic_inc(&tctx->in_idle);
 	do {
-- 
GitLab


From a8ea6fc9b089156d9230bfeef964dd9be101a4a9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Wed, 26 May 2021 01:58:49 +0200
Subject: [PATCH 1588/3804] sched: Stop PF_NO_SETAFFINITY from being inherited
 by various init system threads

Commit:

  00b89fe0197f ("sched: Make the idle task quack like a per-CPU kthread")

... added PF_KTHREAD | PF_NO_SETAFFINITY to the idle kernel threads.

Unfortunately these properties are inherited to the init/0 children
through kernel_thread() calls: init/1 and kthreadd. There are several
side effects to that:

1) kthreadd affinity can not be reset anymore from userspace. Also
   PF_NO_SETAFFINITY propagates to all kthreadd children, including
   the unbound kthreads Therefore it's not possible anymore to overwrite
   the affinity of any of them. Here is an example of warning reported
   by rcutorture:

		WARNING: CPU: 0 PID: 116 at kernel/rcu/tree_nocb.h:1306 rcu_bind_current_to_nocb+0x31/0x40
		Call Trace:
		 rcu_torture_fwd_prog+0x62/0x730
		 kthread+0x122/0x140
		 ret_from_fork+0x22/0x30

2) init/1 does an exec() in the end which clears both
   PF_KTHREAD and PF_NO_SETAFFINITY so we are fine once kernel_init()
   escapes to userspace. But until then, no initcall or init code can
   successfully call sched_setaffinity() to init/1.

   Also PF_KTHREAD looks legit on init/1 before it calls exec() but
   we better be careful with unknown introduced side effects.

One way to solve the PF_NO_SETAFFINITY issue is to not inherit this flag
on copy_process() at all. The cases where it matters are:

* fork_idle(): explicitly set the flag already.
* fork() syscalls: userspace tasks that shouldn't be concerned by that.
* create_io_thread(): the callers explicitly attribute the flag to the
                      newly created tasks.
* kernel_thread():
	- Fix the issues on init/1 and kthreadd
	- Fix the issues on kthreadd children.
	- Usermode helper created by an unbound workqueue. This shouldn't
	  matter. In the worst case it gives more control to userspace
	  on setting affinity to these short living tasks although this can
	  be tuned with inherited unbound workqueues affinity already.

Fixes: 00b89fe0197f ("sched: Make the idle task quack like a per-CPU kthread")
Reported-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20210525235849.441842-1-frederic@kernel.org
---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index ace4631b5b547..e595e77913eb7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2000,7 +2000,7 @@ static __latent_entropy struct task_struct *copy_process(
 		goto bad_fork_cleanup_count;
 
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
-	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);
+	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE | PF_NO_SETAFFINITY);
 	p->flags |= PF_FORKNOEXEC;
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
-- 
GitLab


From e86be3a04bc4aeaf12f93af35f08f8d4385bcd98 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 25 May 2021 18:43:38 -0400
Subject: [PATCH 1589/3804] SUNRPC: More fixes for backlog congestion

Ensure that we fix the XPRT_CONGESTED starvation issue for RDMA as well
as socket based transports.
Ensure we always initialise the request after waking up from the backlog
list.

Fixes: e877a88d1f06 ("SUNRPC in case of backlog, hand free slots directly to waiting task")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h     |  2 ++
 net/sunrpc/xprt.c               | 58 ++++++++++++++++-----------------
 net/sunrpc/xprtrdma/transport.c | 12 +++----
 net/sunrpc/xprtrdma/verbs.c     | 18 ++++++++--
 net/sunrpc/xprtrdma/xprt_rdma.h |  1 +
 5 files changed, 52 insertions(+), 39 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index d81fe8b364d00..61b622e334ee5 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -368,6 +368,8 @@ struct rpc_xprt *	xprt_alloc(struct net *net, size_t size,
 				unsigned int num_prealloc,
 				unsigned int max_req);
 void			xprt_free(struct rpc_xprt *);
+void			xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task);
+bool			xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req);
 
 static inline int
 xprt_enable_swap(struct rpc_xprt *xprt)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 5b3981fd37838..3509a7f139b98 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1607,11 +1607,18 @@ xprt_transmit(struct rpc_task *task)
 	spin_unlock(&xprt->queue_lock);
 }
 
-static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
+static void xprt_complete_request_init(struct rpc_task *task)
+{
+	if (task->tk_rqstp)
+		xprt_request_init(task);
+}
+
+void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	set_bit(XPRT_CONGESTED, &xprt->state);
-	rpc_sleep_on(&xprt->backlog, task, NULL);
+	rpc_sleep_on(&xprt->backlog, task, xprt_complete_request_init);
 }
+EXPORT_SYMBOL_GPL(xprt_add_backlog);
 
 static bool __xprt_set_rq(struct rpc_task *task, void *data)
 {
@@ -1619,14 +1626,13 @@ static bool __xprt_set_rq(struct rpc_task *task, void *data)
 
 	if (task->tk_rqstp == NULL) {
 		memset(req, 0, sizeof(*req));	/* mark unused */
-		task->tk_status = -EAGAIN;
 		task->tk_rqstp = req;
 		return true;
 	}
 	return false;
 }
 
-static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
+bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
 	if (rpc_wake_up_first(&xprt->backlog, __xprt_set_rq, req) == NULL) {
 		clear_bit(XPRT_CONGESTED, &xprt->state);
@@ -1634,6 +1640,7 @@ static bool xprt_wake_up_backlog(struct rpc_xprt *xprt, struct rpc_rqst *req)
 	}
 	return true;
 }
+EXPORT_SYMBOL_GPL(xprt_wake_up_backlog);
 
 static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task)
 {
@@ -1643,7 +1650,7 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task
 		goto out;
 	spin_lock(&xprt->reserve_lock);
 	if (test_bit(XPRT_CONGESTED, &xprt->state)) {
-		rpc_sleep_on(&xprt->backlog, task, NULL);
+		xprt_add_backlog(xprt, task);
 		ret = true;
 	}
 	spin_unlock(&xprt->reserve_lock);
@@ -1812,10 +1819,6 @@ xprt_request_init(struct rpc_task *task)
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_rqst	*req = task->tk_rqstp;
 
-	if (req->rq_task)
-		/* Already initialized */
-		return;
-
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
 	req->rq_buffer  = NULL;
@@ -1876,10 +1879,8 @@ void xprt_retry_reserve(struct rpc_task *task)
 	struct rpc_xprt *xprt = task->tk_xprt;
 
 	task->tk_status = 0;
-	if (task->tk_rqstp != NULL) {
-		xprt_request_init(task);
+	if (task->tk_rqstp != NULL)
 		return;
-	}
 
 	task->tk_status = -EAGAIN;
 	xprt_do_reserve(xprt, task);
@@ -1904,24 +1905,21 @@ void xprt_release(struct rpc_task *task)
 	}
 
 	xprt = req->rq_xprt;
-	if (xprt) {
-		xprt_request_dequeue_xprt(task);
-		spin_lock(&xprt->transport_lock);
-		xprt->ops->release_xprt(xprt, task);
-		if (xprt->ops->release_request)
-			xprt->ops->release_request(task);
-		xprt_schedule_autodisconnect(xprt);
-		spin_unlock(&xprt->transport_lock);
-		if (req->rq_buffer)
-			xprt->ops->buf_free(task);
-		xdr_free_bvec(&req->rq_rcv_buf);
-		xdr_free_bvec(&req->rq_snd_buf);
-		if (req->rq_cred != NULL)
-			put_rpccred(req->rq_cred);
-		if (req->rq_release_snd_buf)
-			req->rq_release_snd_buf(req);
-	} else
-		xprt = task->tk_xprt;
+	xprt_request_dequeue_xprt(task);
+	spin_lock(&xprt->transport_lock);
+	xprt->ops->release_xprt(xprt, task);
+	if (xprt->ops->release_request)
+		xprt->ops->release_request(task);
+	xprt_schedule_autodisconnect(xprt);
+	spin_unlock(&xprt->transport_lock);
+	if (req->rq_buffer)
+		xprt->ops->buf_free(task);
+	xdr_free_bvec(&req->rq_rcv_buf);
+	xdr_free_bvec(&req->rq_snd_buf);
+	if (req->rq_cred != NULL)
+		put_rpccred(req->rq_cred);
+	if (req->rq_release_snd_buf)
+		req->rq_release_snd_buf(req);
 
 	task->tk_rqstp = NULL;
 	if (likely(!bc_prealloc(req)))
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 09953597d055a..19a49d26b1e41 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -520,9 +520,8 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
 	return;
 
 out_sleep:
-	set_bit(XPRT_CONGESTED, &xprt->state);
-	rpc_sleep_on(&xprt->backlog, task, NULL);
 	task->tk_status = -EAGAIN;
+	xprt_add_backlog(xprt, task);
 }
 
 /**
@@ -537,10 +536,11 @@ xprt_rdma_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *rqst)
 	struct rpcrdma_xprt *r_xprt =
 		container_of(xprt, struct rpcrdma_xprt, rx_xprt);
 
-	memset(rqst, 0, sizeof(*rqst));
-	rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
-	if (unlikely(!rpc_wake_up_next(&xprt->backlog)))
-		clear_bit(XPRT_CONGESTED, &xprt->state);
+	rpcrdma_reply_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
+	if (!xprt_wake_up_backlog(xprt, rqst)) {
+		memset(rqst, 0, sizeof(*rqst));
+		rpcrdma_buffer_put(&r_xprt->rx_buf, rpcr_to_rdmar(rqst));
+	}
 }
 
 static bool rpcrdma_check_regbuf(struct rpcrdma_xprt *r_xprt,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 1e965a3808966..649c23518ec04 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1200,6 +1200,20 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
 	return mr;
 }
 
+/**
+ * rpcrdma_reply_put - Put reply buffers back into pool
+ * @buffers: buffer pool
+ * @req: object to return
+ *
+ */
+void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
+{
+	if (req->rl_reply) {
+		rpcrdma_rep_put(buffers, req->rl_reply);
+		req->rl_reply = NULL;
+	}
+}
+
 /**
  * rpcrdma_buffer_get - Get a request buffer
  * @buffers: Buffer pool from which to obtain a buffer
@@ -1228,9 +1242,7 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
  */
 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
 {
-	if (req->rl_reply)
-		rpcrdma_rep_put(buffers, req->rl_reply);
-	req->rl_reply = NULL;
+	rpcrdma_reply_put(buffers, req);
 
 	spin_lock(&buffers->rb_lock);
 	list_add(&req->rl_list, &buffers->rb_send_bufs);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 436ad73126141..5d231d94e9440 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -479,6 +479,7 @@ struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
 			struct rpcrdma_req *req);
 void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
+void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req);
 
 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
 			    gfp_t flags);
-- 
GitLab


From 56517ab958b7c11030e626250c00b9b1a24b41eb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 25 May 2021 10:23:05 -0400
Subject: [PATCH 1590/3804] NFS: Fix an Oopsable condition in
 __nfs_pageio_add_request()

Ensure that nfs_pageio_error_cleanup() resets the mirror array contents,
so that the structure reflects the fact that it is now empty.
Also change the test in nfs_pageio_do_add_request() to be more robust by
checking whether or not the list is empty rather than relying on the
value of pg_count.

Fixes: a7d42ddb3099 ("nfs: add mirroring support to pgio layer")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pagelist.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6c20b28d9d7c1..d35c84af44e01 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1094,15 +1094,16 @@ nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
 	struct nfs_page *prev = NULL;
 	unsigned int size;
 
-	if (mirror->pg_count != 0) {
-		prev = nfs_list_entry(mirror->pg_list.prev);
-	} else {
+	if (list_empty(&mirror->pg_list)) {
 		if (desc->pg_ops->pg_init)
 			desc->pg_ops->pg_init(desc, req);
 		if (desc->pg_error < 0)
 			return 0;
 		mirror->pg_base = req->wb_pgbase;
-	}
+		mirror->pg_count = 0;
+		mirror->pg_recoalesce = 0;
+	} else
+		prev = nfs_list_entry(mirror->pg_list.prev);
 
 	if (desc->pg_maxretrans && req->wb_nio > desc->pg_maxretrans) {
 		if (NFS_SERVER(desc->pg_inode)->flags & NFS_MOUNT_SOFTERR)
-- 
GitLab


From 0d0ea309357dea0d85a82815f02157eb7fcda39f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 25 May 2021 10:40:12 -0400
Subject: [PATCH 1591/3804] NFS: Don't corrupt the value of pg_bytes_written in
 nfs_do_recoalesce()

The value of mirror->pg_bytes_written should only be updated after a
successful attempt to flush out the requests on the list.

Fixes: a7d42ddb3099 ("nfs: add mirroring support to pgio layer")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pagelist.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d35c84af44e01..daf6658517f43 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1128,17 +1128,16 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
 {
 	struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
 
-
 	if (!list_empty(&mirror->pg_list)) {
 		int error = desc->pg_ops->pg_doio(desc);
 		if (error < 0)
 			desc->pg_error = error;
-		else
+		if (list_empty(&mirror->pg_list)) {
 			mirror->pg_bytes_written += mirror->pg_count;
-	}
-	if (list_empty(&mirror->pg_list)) {
-		mirror->pg_count = 0;
-		mirror->pg_base = 0;
+			mirror->pg_count = 0;
+			mirror->pg_base = 0;
+			mirror->pg_recoalesce = 0;
+		}
 	}
 }
 
@@ -1228,7 +1227,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
 
 	do {
 		list_splice_init(&mirror->pg_list, &head);
-		mirror->pg_bytes_written -= mirror->pg_count;
 		mirror->pg_count = 0;
 		mirror->pg_base = 0;
 		mirror->pg_recoalesce = 0;
-- 
GitLab


From 70536bf4eb07ed5d2816ccb274e5e6b41b95a437 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 25 May 2021 11:26:35 -0400
Subject: [PATCH 1592/3804] NFS: Clean up reset of the mirror accounting
 variables

Now that nfs_pageio_do_add_request() resets the pg_count, we don't need
these other inlined resets.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pagelist.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index daf6658517f43..cf9cc62ec48ec 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -1132,12 +1132,8 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
 		int error = desc->pg_ops->pg_doio(desc);
 		if (error < 0)
 			desc->pg_error = error;
-		if (list_empty(&mirror->pg_list)) {
+		if (list_empty(&mirror->pg_list))
 			mirror->pg_bytes_written += mirror->pg_count;
-			mirror->pg_count = 0;
-			mirror->pg_base = 0;
-			mirror->pg_recoalesce = 0;
-		}
 	}
 }
 
@@ -1227,9 +1223,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
 
 	do {
 		list_splice_init(&mirror->pg_list, &head);
-		mirror->pg_count = 0;
-		mirror->pg_base = 0;
-		mirror->pg_recoalesce = 0;
 
 		while (!list_empty(&head)) {
 			struct nfs_page *req;
-- 
GitLab


From 3202f482417cefc0f8fad5aaba6eea00f68141a0 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Thu, 15 Apr 2021 11:52:32 -0700
Subject: [PATCH 1593/3804] HID: hid-sensor-custom: Process failure of
 sensor_hub_set_feature()

When user modifies a custom feature value and sensor_hub_set_feature()
fails, return error.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-sensor-custom.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
index 2e6662173a79c..32c2306e240d6 100644
--- a/drivers/hid/hid-sensor-custom.c
+++ b/drivers/hid/hid-sensor-custom.c
@@ -387,7 +387,7 @@ static ssize_t store_value(struct device *dev, struct device_attribute *attr,
 	struct hid_sensor_custom *sensor_inst = dev_get_drvdata(dev);
 	int index, field_index, usage;
 	char name[HID_CUSTOM_NAME_LENGTH];
-	int value;
+	int value, ret;
 
 	if (sscanf(attr->attr.name, "feature-%x-%x-%s", &index, &usage,
 		   name) == 3) {
@@ -403,8 +403,10 @@ static ssize_t store_value(struct device *dev, struct device_attribute *attr,
 
 		report_id = sensor_inst->fields[field_index].attribute.
 								report_id;
-		sensor_hub_set_feature(sensor_inst->hsdev, report_id,
-				       index, sizeof(value), &value);
+		ret = sensor_hub_set_feature(sensor_inst->hsdev, report_id,
+					     index, sizeof(value), &value);
+		if (ret)
+			return ret;
 	} else
 		return -EINVAL;
 
-- 
GitLab


From bae989c4bc53f861cc1b706aab0194703e9907a8 Mon Sep 17 00:00:00 2001
From: Maciej Falkowski <maciej.falkowski9@gmail.com>
Date: Thu, 1 Apr 2021 18:04:34 +0200
Subject: [PATCH 1594/3804] ARM: OMAP1: ams-delta: remove unused function
 ams_delta_camera_power

The ams_delta_camera_power() function is unused as reports
Clang compilation with omap1_defconfig on linux-next:

arch/arm/mach-omap1/board-ams-delta.c:462:12: warning: unused function 'ams_delta_camera_power' [-Wunused-function]
static int ams_delta_camera_power(struct device *dev, int power)
           ^
1 warning generated.

The soc_camera support was dropped without removing
ams_delta_camera_power() function, making it unused.

Fixes: ce548396a433 ("media: mach-omap1: board-ams-delta.c: remove soc_camera dependencies")
Signed-off-by: Maciej Falkowski <maciej.falkowski9@gmail.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Link: https://github.com/ClangBuiltLinux/linux/issues/1326
---
 arch/arm/mach-omap1/board-ams-delta.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 2ee527c002840..1026a816dcc02 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -458,20 +458,6 @@ static struct gpiod_lookup_table leds_gpio_table = {
 
 #ifdef CONFIG_LEDS_TRIGGERS
 DEFINE_LED_TRIGGER(ams_delta_camera_led_trigger);
-
-static int ams_delta_camera_power(struct device *dev, int power)
-{
-	/*
-	 * turn on camera LED
-	 */
-	if (power)
-		led_trigger_event(ams_delta_camera_led_trigger, LED_FULL);
-	else
-		led_trigger_event(ams_delta_camera_led_trigger, LED_OFF);
-	return 0;
-}
-#else
-#define ams_delta_camera_power	NULL
 #endif
 
 static struct platform_device ams_delta_audio_device = {
-- 
GitLab


From b24412aff37c58286a0aeafc5678fbdc6a527d54 Mon Sep 17 00:00:00 2001
From: Antoniu Miclaus <antoniu.miclaus@analog.com>
Date: Wed, 26 May 2021 11:52:24 +0300
Subject: [PATCH 1595/3804] regmap: add support for 7/17 register formating

This patch adds support for 7 bits register, 17 bits value type register
formating. This is used, for example, by the Analog Devices
ADMV1013/ADMV1014.

Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Signed-off-by: Andrei Drimbarean <andrei.drimbarean@analog.com>
Message-Id: <20210526085223.14896-1-antoniu.miclaus@analog.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 0d185ec018a5c..fe3e38dd5324f 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -243,6 +243,16 @@ static void regmap_format_7_9_write(struct regmap *map,
 	*out = cpu_to_be16((reg << 9) | val);
 }
 
+static void regmap_format_7_17_write(struct regmap *map,
+				    unsigned int reg, unsigned int val)
+{
+	u8 *out = map->work_buf;
+
+	out[2] = val;
+	out[1] = val >> 8;
+	out[0] = (val >> 16) | (reg << 1);
+}
+
 static void regmap_format_10_14_write(struct regmap *map,
 				    unsigned int reg, unsigned int val)
 {
@@ -885,6 +895,9 @@ struct regmap *__regmap_init(struct device *dev,
 		case 9:
 			map->format.format_write = regmap_format_7_9_write;
 			break;
+		case 17:
+			map->format.format_write = regmap_format_7_17_write;
+			break;
 		default:
 			goto err_hwlock;
 		}
-- 
GitLab


From 9be85de97786a75f62080de1c0c13656f65cba84 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:00 +0100
Subject: [PATCH 1596/3804] locking/atomic: make ARCH_ATOMIC a Kconfig symbol

Subsequent patches will move architectures over to the ARCH_ATOMIC API,
after preparing the asm-generic atomic implementations to function with
or without ARCH_ATOMIC.

As some architectures use the asm-generic implementations exclusively
(and don't have a local atomic.h), and to avoid the risk that
ARCH_ATOMIC isn't defined in some cases we expect, let's make the
ARCH_ATOMIC macro a Kconfig symbol instead, so that we can guarantee it
is consistently available where needed.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-2-mark.rutland@arm.com
---
 arch/Kconfig                    | 3 +++
 arch/arm64/Kconfig              | 1 +
 arch/arm64/include/asm/atomic.h | 2 --
 arch/s390/Kconfig               | 1 +
 arch/s390/include/asm/atomic.h  | 2 --
 arch/um/Kconfig                 | 1 +
 arch/x86/Kconfig                | 1 +
 arch/x86/include/asm/atomic.h   | 2 --
 include/linux/atomic.h          | 2 +-
 9 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index c45b770d3579a..3fb3b12d4a958 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -11,6 +11,9 @@ source "arch/$(SRCARCH)/Kconfig"
 
 menu "General architecture-dependent options"
 
+config ARCH_ATOMIC
+	bool
+
 config CRASH_CORE
 	bool
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9f1d8566bbf95..62ab429d1f42e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -9,6 +9,7 @@ config ARM64
 	select ACPI_MCFG if (ACPI && PCI)
 	select ACPI_SPCR_TABLE if ACPI
 	select ACPI_PPTT if ACPI
+	select ARCH_ATOMIC
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_BINFMT_ELF_STATE
 	select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h
index b56a4b2bc2486..c9979273d3898 100644
--- a/arch/arm64/include/asm/atomic.h
+++ b/arch/arm64/include/asm/atomic.h
@@ -223,6 +223,4 @@ static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
 
 #define arch_atomic64_dec_if_positive		arch_atomic64_dec_if_positive
 
-#define ARCH_ATOMIC
-
 #endif /* __ASM_ATOMIC_H */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b4c7c34069f81..85374a36c69e1 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -58,6 +58,7 @@ config S390
 	# Note: keep this list sorted alphabetically
 	#
 	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
+	select ARCH_ATOMIC
 	select ARCH_32BIT_USTAT_F_TINODE
 	select ARCH_BINFMT_ELF_STATE
 	select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index 7c93c6573524d..7138d189cc420 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -147,6 +147,4 @@ ATOMIC64_OPS(xor)
 #define arch_atomic64_fetch_sub(_i, _v)  arch_atomic64_fetch_add(-(s64)(_i), _v)
 #define arch_atomic64_sub(_i, _v)	 arch_atomic64_add(-(s64)(_i), _v)
 
-#define ARCH_ATOMIC
-
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 57cfd9a1c082e..4370a9521ea46 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -5,6 +5,7 @@ menu "UML-specific options"
 config UML
 	bool
 	default y
+	select ARCH_ATOMIC
 	select ARCH_EPHEMERAL_INODES
 	select ARCH_HAS_KCOV
 	select ARCH_NO_PREEMPT
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0045e1b441902..11a27563033de 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -58,6 +58,7 @@ config X86
 	#
 	select ACPI_LEGACY_TABLES_LOOKUP	if ACPI
 	select ACPI_SYSTEM_POWER_STATES_SUPPORT	if ACPI
+	select ARCH_ATOMIC
 	select ARCH_32BIT_OFF_T			if X86_32
 	select ARCH_CLOCKSOURCE_INIT
 	select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index f732741ad7c72..5e754e8957671 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -269,6 +269,4 @@ static __always_inline int arch_atomic_fetch_xor(int i, atomic_t *v)
 # include <asm/atomic64_64.h>
 #endif
 
-#define ARCH_ATOMIC
-
 #endif /* _ASM_X86_ATOMIC_H */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 571a11008ab5b..4f8d83f9e480a 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -77,7 +77,7 @@
 	__ret;								\
 })
 
-#ifdef ARCH_ATOMIC
+#ifdef CONFIG_ARCH_ATOMIC
 #include <linux/atomic-arch-fallback.h>
 #include <asm-generic/atomic-instrumented.h>
 #else
-- 
GitLab


From 201e2c1bbe659720913ed5272a2c44e6ab646c8a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:01 +0100
Subject: [PATCH 1597/3804] locking/atomic: net: use linux/atomic.h for xchg &
 cmpxchg

As xchg*() and cmpxchg*() may be instrumented by atomic-instrumented.h,
it's necessary to include <linux/atomic.h> to use these, rather than
<asm/cmpxchg.h>, which is effectively an arch-internal header.

In a couple of places we include <asm/cmpxchg.h>, but get away with this
as <linux/atomic.h> gets pulled in inidrectly by another include. Before
we convert more architectures to use atomic-instrumented.h, let's fix
these up to use <linux/atomic.h> so that we don't make things more
fragile.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-3-mark.rutland@arm.com
---
 net/core/filter.c          | 2 +-
 net/sunrpc/xprtmultipath.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index cae56d08a6707..ce4ae1a19a714 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -17,6 +17,7 @@
  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
 
+#include <linux/atomic.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/mm.h>
@@ -41,7 +42,6 @@
 #include <linux/timer.h>
 #include <linux/uaccess.h>
 #include <asm/unaligned.h>
-#include <asm/cmpxchg.h>
 #include <linux/filter.h>
 #include <linux/ratelimit.h>
 #include <linux/seccomp.h>
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 78c075a68c047..1b4073131c6fd 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -7,13 +7,13 @@
  * Trond Myklebust <trond.myklebust@primarydata.com>
  *
  */
+#include <linux/atomic.h>
 #include <linux/types.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/slab.h>
-#include <asm/cmpxchg.h>
 #include <linux/spinlock.h>
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/addr.h>
-- 
GitLab


From c7178cdecdbef8321f418fac55f3afaca3bb4c96 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:02 +0100
Subject: [PATCH 1598/3804] locking/atomic: h8300: use asm-generic exclusively

As h8300's implementation of the atomics isn't using any arch-specific
functionality, and its implementation of cmpxchg only uses assembly to
non-atomically swap two elements in memory, we may as well use the
asm-generic atomic.h and cmpxchg.h, and avoid the duplicate code.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-4-mark.rutland@arm.com
---
 arch/h8300/include/asm/Kbuild    |  1 +
 arch/h8300/include/asm/atomic.h  | 97 --------------------------------
 arch/h8300/include/asm/cmpxchg.h | 66 ----------------------
 3 files changed, 1 insertion(+), 163 deletions(-)
 delete mode 100644 arch/h8300/include/asm/atomic.h
 delete mode 100644 arch/h8300/include/asm/cmpxchg.h

diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 60ee7f0d60a8f..e23139c8fc0d7 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += asm-offsets.h
+generic-y += cmpxchg.h
 generic-y += extable.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
deleted file mode 100644
index a990d151f1633..0000000000000
--- a/arch/h8300/include/asm/atomic.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ARCH_H8300_ATOMIC__
-#define __ARCH_H8300_ATOMIC__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/cmpxchg.h>
-#include <asm/irqflags.h>
-
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- */
-
-#define atomic_read(v)		READ_ONCE((v)->counter)
-#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
-
-#define ATOMIC_OP_RETURN(op, c_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)	\
-{								\
-	h8300flags flags;					\
-	int ret;						\
-								\
-	flags = arch_local_irq_save();				\
-	ret = v->counter c_op i;				\
-	arch_local_irq_restore(flags);				\
-	return ret;						\
-}
-
-#define ATOMIC_FETCH_OP(op, c_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)		\
-{								\
-	h8300flags flags;					\
-	int ret;						\
-								\
-	flags = arch_local_irq_save();				\
-	ret = v->counter;					\
-	v->counter c_op i;					\
-	arch_local_irq_restore(flags);				\
-	return ret;						\
-}
-
-#define ATOMIC_OP(op, c_op)					\
-static inline void atomic_##op(int i, atomic_t *v)		\
-{								\
-	h8300flags flags;					\
-								\
-	flags = arch_local_irq_save();				\
-	v->counter c_op i;					\
-	arch_local_irq_restore(flags);				\
-}
-
-ATOMIC_OP_RETURN(add, +=)
-ATOMIC_OP_RETURN(sub, -=)
-
-#define ATOMIC_OPS(op, c_op)					\
-	ATOMIC_OP(op, c_op)					\
-	ATOMIC_FETCH_OP(op, c_op)
-
-ATOMIC_OPS(and, &=)
-ATOMIC_OPS(or,  |=)
-ATOMIC_OPS(xor, ^=)
-ATOMIC_OPS(add, +=)
-ATOMIC_OPS(sub, -=)
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
-{
-	int ret;
-	h8300flags flags;
-
-	flags = arch_local_irq_save();
-	ret = v->counter;
-	if (likely(ret == old))
-		v->counter = new;
-	arch_local_irq_restore(flags);
-	return ret;
-}
-
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int ret;
-	h8300flags flags;
-
-	flags = arch_local_irq_save();
-	ret = v->counter;
-	if (ret != u)
-		v->counter += a;
-	arch_local_irq_restore(flags);
-	return ret;
-}
-#define atomic_fetch_add_unless		atomic_fetch_add_unless
-
-#endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/arch/h8300/include/asm/cmpxchg.h b/arch/h8300/include/asm/cmpxchg.h
deleted file mode 100644
index c64bb38ce2428..0000000000000
--- a/arch/h8300/include/asm/cmpxchg.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ARCH_H8300_CMPXCHG__
-#define __ARCH_H8300_CMPXCHG__
-
-#include <linux/irqflags.h>
-
-#define xchg(ptr, x) \
-	((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), \
-				    sizeof(*(ptr))))
-
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((volatile struct __xchg_dummy *)(x))
-
-static inline unsigned long __xchg(unsigned long x,
-				   volatile void *ptr, int size)
-{
-	unsigned long tmp, flags;
-
-	local_irq_save(flags);
-
-	switch (size) {
-	case 1:
-		__asm__ __volatile__
-			("mov.b %2,%0\n\t"
-			 "mov.b %1,%2"
-			 : "=&r" (tmp) : "r" (x), "m" (*__xg(ptr)));
-		break;
-	case 2:
-		__asm__ __volatile__
-			("mov.w %2,%0\n\t"
-			 "mov.w %1,%2"
-			 : "=&r" (tmp) : "r" (x), "m" (*__xg(ptr)));
-		break;
-	case 4:
-		__asm__ __volatile__
-			("mov.l %2,%0\n\t"
-			 "mov.l %1,%2"
-			 : "=&r" (tmp) : "r" (x), "m" (*__xg(ptr)));
-		break;
-	default:
-		tmp = 0;
-	}
-	local_irq_restore(flags);
-	return tmp;
-}
-
-#include <asm-generic/cmpxchg-local.h>
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)					 \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr),		 \
-						     (unsigned long)(o), \
-						     (unsigned long)(n), \
-						     sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-
-#ifndef CONFIG_SMP
-#include <asm-generic/cmpxchg.h>
-#endif
-
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-#endif /* __ARCH_H8300_CMPXCHG__ */
-- 
GitLab


From b68622a86c8f30423c0a09204b1db2b74a06b5f0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:03 +0100
Subject: [PATCH 1599/3804] locking/atomic: microblaze: use asm-generic
 exclusively

Microblaze provides its own implementation of atomic_dec_if_positive(),
but nothing else. For a while now, the conditional inc/dec ops have been
optional, and the core code will provide generic implementations using
the code templates in scripts/atomic/fallbacks/.

For simplicity, and for consistency with the other conditional atomic
ops, let's drop the microblaze implementation of
atomic_dec_if_positive(), and use the generic implementation.

With that, we can also drop the local asm/atomic.h and asm/cmpxchg.h
headers, as asm-generic/atomic.h is mandatory-y, and we can pull in
asm-generic/cmpxchg.h via generic-y. This matches what nios2 and nds32
do today.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-5-mark.rutland@arm.com
---
 arch/microblaze/include/asm/Kbuild    |  1 +
 arch/microblaze/include/asm/atomic.h  | 28 ---------------------------
 arch/microblaze/include/asm/cmpxchg.h |  9 ---------
 3 files changed, 1 insertion(+), 37 deletions(-)
 delete mode 100644 arch/microblaze/include/asm/atomic.h
 delete mode 100644 arch/microblaze/include/asm/cmpxchg.h

diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 29b0e557aa7c5..a055f5dbe00a3 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
+generic-y += cmpxchg.h
 generic-y += extable.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
deleted file mode 100644
index 41e9aff23a623..0000000000000
--- a/arch/microblaze/include/asm/atomic.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_MICROBLAZE_ATOMIC_H
-#define _ASM_MICROBLAZE_ATOMIC_H
-
-#include <asm/cmpxchg.h>
-#include <asm-generic/atomic.h>
-#include <asm-generic/atomic64.h>
-
-/*
- * Atomically test *v and decrement if it is greater than 0.
- * The function returns the old value of *v minus 1.
- */
-static inline int atomic_dec_if_positive(atomic_t *v)
-{
-	unsigned long flags;
-	int res;
-
-	local_irq_save(flags);
-	res = v->counter - 1;
-	if (res >= 0)
-		v->counter = res;
-	local_irq_restore(flags);
-
-	return res;
-}
-#define atomic_dec_if_positive atomic_dec_if_positive
-
-#endif /* _ASM_MICROBLAZE_ATOMIC_H */
diff --git a/arch/microblaze/include/asm/cmpxchg.h b/arch/microblaze/include/asm/cmpxchg.h
deleted file mode 100644
index 3523b51aab363..0000000000000
--- a/arch/microblaze/include/asm/cmpxchg.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_MICROBLAZE_CMPXCHG_H
-#define _ASM_MICROBLAZE_CMPXCHG_H
-
-#ifndef CONFIG_SMP
-# include <asm-generic/cmpxchg.h>
-#endif
-
-#endif /* _ASM_MICROBLAZE_CMPXCHG_H */
-- 
GitLab


From f0c7bf1b77c65c9a273207d228df27009f09ec0b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:04 +0100
Subject: [PATCH 1600/3804] locking/atomic: openrisc: avoid
 asm-generic/atomic.h

OpenRISC is the only architecture which uses asm-generic/atomic.h and
also provides its own implementation of some functions, requiring
ifdeferry in the asm-generic header. As OpenRISC provides the vast
majority of functions itself, it would be simpler overall if it also
provided the few functions it cribs from asm-generic.

This patch decouples OpenRISC from asm-generic/atomic.h. Subsequent
patches will simplify the asm-generic implementation and remove the now
unnecessary ifdeferry.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Stafford Horne <shorne@gmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-6-mark.rutland@arm.com
---
 arch/openrisc/include/asm/atomic.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
index b589fac39b923..cb86970d38591 100644
--- a/arch/openrisc/include/asm/atomic.h
+++ b/arch/openrisc/include/asm/atomic.h
@@ -121,6 +121,12 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 }
 #define atomic_fetch_add_unless	atomic_fetch_add_unless
 
-#include <asm-generic/atomic.h>
+#define atomic_read(v)			READ_ONCE((v)->counter)
+#define atomic_set(v,i)			WRITE_ONCE((v)->counter, (i))
+
+#include <asm/cmpxchg.h>
+
+#define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
+#define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
 #endif /* __ASM_OPENRISC_ATOMIC_H */
-- 
GitLab


From 2609a195fbd58f77d281c013f10b8dbaffca1637 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:05 +0100
Subject: [PATCH 1601/3804] locking/atomic: atomic: remove stale comments

The commentary in asm-generic/atomic.h is stale; let's bring it up-to
date:

* The block comment at the start of the file mentions this is only
  usable on UP systems, but is immediately followed by an SMP
  implementation using cmpxchg. Let's delete the misleading statement.

* A comment near the end of the file was originally at the top of the
  file, but over time rework has shuffled it near the end, and it's long
  been superceded by the block comment at the top of the file. Let's
  remove it.

* Since asm-generic/atomic.h isn't the canonical documentation for the
  atomic ops, and since the existing comments are not in kerneldoc
  format, we don't need to document the semantics of each operation here
  (and this would be better done in a centralised document). Let's
  remove these comments.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-7-mark.rutland@arm.com
---
 include/asm-generic/atomic.h | 39 ++----------------------------------
 1 file changed, 2 insertions(+), 37 deletions(-)

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 11f96f40f4a79..ebacbc6b363b7 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
- * Generic C implementation of atomic counter operations. Usable on
- * UP systems only. Do not include in machine independent code.
+ * Generic C implementation of atomic counter operations. Do not include in
+ * machine independent code.
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
@@ -12,23 +12,6 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
-/*
- * atomic_$op() - $op integer to atomic variable
- * @i: integer value to $op
- * @v: pointer to the atomic variable
- *
- * Atomically $ops @i to @v. Does not strictly guarantee a memory-barrier, use
- * smp_mb__{before,after}_atomic().
- */
-
-/*
- * atomic_$op_return() - $op interer to atomic variable and returns the result
- * @i: integer value to $op
- * @v: pointer to the atomic variable
- *
- * Atomically $ops @i to @v. Does imply a full memory barrier.
- */
-
 #ifdef CONFIG_SMP
 
 /* we can build all atomic primitives from cmpxchg */
@@ -154,28 +137,10 @@ ATOMIC_OP(xor, ^)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- */
-
-/**
- * atomic_read - read atomic variable
- * @v: pointer of type atomic_t
- *
- * Atomically reads the value of @v.
- */
 #ifndef atomic_read
 #define atomic_read(v)	READ_ONCE((v)->counter)
 #endif
 
-/**
- * atomic_set - set atomic variable
- * @v: pointer of type atomic_t
- * @i: required value
- *
- * Atomically sets the value of @v to @i.
- */
 #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #include <linux/irqflags.h>
-- 
GitLab


From 89eb78d542394a8461164009272ea654357795ad Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:06 +0100
Subject: [PATCH 1602/3804] locking/atomic: atomic: remove redundant include

Since commit:

  560cb12a4080a48b ("locking,arch: Rewrite generic atomic support")

... we conditionally include <linux/irqflags.h> before defining atomics
using locking, and hence do not need to do so unconditionally later in
the header.

This patch removes the redundant include.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-8-mark.rutland@arm.com
---
 include/asm-generic/atomic.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index ebacbc6b363b7..d4bf803d6491c 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -143,8 +143,6 @@ ATOMIC_OP(xor, ^)
 
 #define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
-#include <linux/irqflags.h>
-
 static inline void atomic_add(int i, atomic_t *v)
 {
 	atomic_add_return(i, v);
-- 
GitLab


From d0e03218ca3be48c6f7109e4810d58e7b7dd4135 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:07 +0100
Subject: [PATCH 1603/3804] locking/atomic: atomic: simplify ifdeffery

Now that asm-generic/atomic.h is only used by architectures without any
architecture-specific atomic definitions, we know that there will be no
architecture-specific implementations to override, and can remove the
ifdeffery this has previously required, bringing it into line with
asm-generic/atomic64.h.

At the same time, we can implement atomic_add() and atomic_sub()
directly using ATOMIC_OP(), since we know architectures won't provide
atomic_add_return() or atomic_sub_return().

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-9-mark.rutland@arm.com
---
 include/asm-generic/atomic.h | 46 ++++--------------------------------
 1 file changed, 4 insertions(+), 42 deletions(-)

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index d4bf803d6491c..316c82a27b0ac 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -93,65 +93,27 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 
 #endif /* CONFIG_SMP */
 
-#ifndef atomic_add_return
 ATOMIC_OP_RETURN(add, +)
-#endif
-
-#ifndef atomic_sub_return
 ATOMIC_OP_RETURN(sub, -)
-#endif
 
-#ifndef atomic_fetch_add
 ATOMIC_FETCH_OP(add, +)
-#endif
-
-#ifndef atomic_fetch_sub
 ATOMIC_FETCH_OP(sub, -)
-#endif
-
-#ifndef atomic_fetch_and
 ATOMIC_FETCH_OP(and, &)
-#endif
-
-#ifndef atomic_fetch_or
 ATOMIC_FETCH_OP(or, |)
-#endif
-
-#ifndef atomic_fetch_xor
 ATOMIC_FETCH_OP(xor, ^)
-#endif
 
-#ifndef atomic_and
+ATOMIC_OP(add, +)
+ATOMIC_OP(sub, -)
 ATOMIC_OP(and, &)
-#endif
-
-#ifndef atomic_or
 ATOMIC_OP(or, |)
-#endif
-
-#ifndef atomic_xor
 ATOMIC_OP(xor, ^)
-#endif
 
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#ifndef atomic_read
-#define atomic_read(v)	READ_ONCE((v)->counter)
-#endif
-
-#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
-
-static inline void atomic_add(int i, atomic_t *v)
-{
-	atomic_add_return(i, v);
-}
-
-static inline void atomic_sub(int i, atomic_t *v)
-{
-	atomic_sub_return(i, v);
-}
+#define atomic_read(v)			READ_ONCE((v)->counter)
+#define atomic_set(v, i)		WRITE_ONCE(((v)->counter), (i))
 
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
-- 
GitLab


From f8b6455a9d381fc513efbec0be0c312b96e6eb6b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:08 +0100
Subject: [PATCH 1604/3804] locking/atomic: atomic: support ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as this will
enable functionality, and once all architectures are converted it will
be possible to make significant cleanups to the atomic headers.

A number of architectures use asm-generic/atomic.h, and it's impractical
to convert the header and all these architectures in one go. To make it
possible to convert them one-by-one, let's make the asm-generic
implementation function as either atomic_*() or arch_atomic_*()
depending on whether ARCH_ATOMIC is selected. To do this, the C
implementations are prefixed as generic_atomic_*(), and preprocessor
definitions map atomic_*()/arch_atomic_*() onto these as
appropriate.

Once all users are moved over to ARCH_ATOMIC the ifdeffery in the header
can be simplified and/or removed entirely.

For existing users (none of which select ARCH_ATOMIC), there should be
no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-10-mark.rutland@arm.com
---
 include/asm-generic/atomic.h | 71 +++++++++++++++++++++++++++++++-----
 1 file changed, 62 insertions(+), 9 deletions(-)

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 316c82a27b0ac..649060fa0fe8e 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -12,39 +12,47 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
+#ifdef CONFIG_ARCH_ATOMIC
+#define __ga_cmpxchg	arch_cmpxchg
+#define __ga_xchg	arch_xchg
+#else
+#define __ga_cmpxchg	cmpxchg
+#define __ga_xchg	xchg
+#endif
+
 #ifdef CONFIG_SMP
 
 /* we can build all atomic primitives from cmpxchg */
 
 #define ATOMIC_OP(op, c_op)						\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void generic_atomic_##op(int i, atomic_t *v)		\
 {									\
 	int c, old;							\
 									\
 	c = v->counter;							\
-	while ((old = cmpxchg(&v->counter, c, c c_op i)) != c)		\
+	while ((old = __ga_cmpxchg(&v->counter, c, c c_op i)) != c)	\
 		c = old;						\
 }
 
 #define ATOMIC_OP_RETURN(op, c_op)					\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int generic_atomic_##op##_return(int i, atomic_t *v)	\
 {									\
 	int c, old;							\
 									\
 	c = v->counter;							\
-	while ((old = cmpxchg(&v->counter, c, c c_op i)) != c)		\
+	while ((old = __ga_cmpxchg(&v->counter, c, c c_op i)) != c)	\
 		c = old;						\
 									\
 	return c c_op i;						\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op)					\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int generic_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	int c, old;							\
 									\
 	c = v->counter;							\
-	while ((old = cmpxchg(&v->counter, c, c c_op i)) != c)		\
+	while ((old = __ga_cmpxchg(&v->counter, c, c c_op i)) != c)	\
 		c = old;						\
 									\
 	return c;							\
@@ -55,7 +63,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 #include <linux/irqflags.h>
 
 #define ATOMIC_OP(op, c_op)						\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void generic_atomic_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
 									\
@@ -65,7 +73,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }
 
 #define ATOMIC_OP_RETURN(op, c_op)					\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int generic_atomic_##op##_return(int i, atomic_t *v)	\
 {									\
 	unsigned long flags;						\
 	int ret;							\
@@ -78,7 +86,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op)					\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int generic_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
 	int ret;							\
@@ -112,10 +120,55 @@ ATOMIC_OP(xor, ^)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
+#undef __ga_cmpxchg
+#undef __ga_xchg
+
+#ifdef CONFIG_ARCH_ATOMIC
+
+#define arch_atomic_add_return			generic_atomic_add_return
+#define arch_atomic_sub_return			generic_atomic_sub_return
+
+#define arch_atomic_fetch_add			generic_atomic_fetch_add
+#define arch_atomic_fetch_sub			generic_atomic_fetch_sub
+#define arch_atomic_fetch_and			generic_atomic_fetch_and
+#define arch_atomic_fetch_or			generic_atomic_fetch_or
+#define arch_atomic_fetch_xor			generic_atomic_fetch_xor
+
+#define arch_atomic_add				generic_atomic_add
+#define arch_atomic_sub				generic_atomic_sub
+#define arch_atomic_and				generic_atomic_and
+#define arch_atomic_or				generic_atomic_or
+#define arch_atomic_xor				generic_atomic_xor
+
+#define arch_atomic_read(v)			READ_ONCE((v)->counter)
+#define arch_atomic_set(v, i)			WRITE_ONCE(((v)->counter), (i))
+
+#define arch_atomic_xchg(ptr, v)		(arch_xchg(&(ptr)->counter, (v)))
+#define arch_atomic_cmpxchg(v, old, new)	(arch_cmpxchg(&((v)->counter), (old), (new)))
+
+#else /* CONFIG_ARCH_ATOMIC */
+
+#define atomic_add_return		generic_atomic_add_return
+#define atomic_sub_return		generic_atomic_sub_return
+
+#define atomic_fetch_add		generic_atomic_fetch_add
+#define atomic_fetch_sub		generic_atomic_fetch_sub
+#define atomic_fetch_and		generic_atomic_fetch_and
+#define atomic_fetch_or			generic_atomic_fetch_or
+#define atomic_fetch_xor		generic_atomic_fetch_xor
+
+#define atomic_add			generic_atomic_add
+#define atomic_sub			generic_atomic_sub
+#define atomic_and			generic_atomic_and
+#define atomic_or			generic_atomic_or
+#define atomic_xor			generic_atomic_xor
+
 #define atomic_read(v)			READ_ONCE((v)->counter)
 #define atomic_set(v, i)		WRITE_ONCE(((v)->counter), (i))
 
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
+#endif /* CONFIG_ARCH_ATOMIC */
+
 #endif /* __ASM_GENERIC_ATOMIC_H */
-- 
GitLab


From 1bdadf46eff6804ace5fa46b6856da4799f12b5c Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:09 +0100
Subject: [PATCH 1605/3804] locking/atomic: atomic64: support ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as this will
enable functionality, and once all architectures are converted it will
be possible to make significant cleanups to the atomic headers.

A number of architectures use asm-generic/atomic64.h, and it's
impractical to convert the header and all these architectures in one go.
To make it possible to convert them one-by-one, let's make the
asm-generic implementation function as either atomic64_*() or
arch_atomic64_*() depending on whether ARCH_ATOMIC is selected. To do
this, the generic implementations are prefixed as generic_atomic64_*(),
and preprocessor definitions map atomic64_*()/arch_atomic64_*() onto
these as appropriate.

Once all users are moved over to ARCH_ATOMIC the ifdeffery in the header
can be simplified and/or removed entirely.

For existing users (none of which select ARCH_ATOMIC), there should be
no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-11-mark.rutland@arm.com
---
 include/asm-generic/atomic64.h | 74 ++++++++++++++++++++++++++++------
 lib/atomic64.c                 | 36 ++++++++---------
 2 files changed, 79 insertions(+), 31 deletions(-)

diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index 370f01d4450f5..c8c7d9fae8203 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -15,19 +15,17 @@ typedef struct {
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-extern s64 atomic64_read(const atomic64_t *v);
-extern void atomic64_set(atomic64_t *v, s64 i);
-
-#define atomic64_set_release(v, i)	atomic64_set((v), (i))
+extern s64 generic_atomic64_read(const atomic64_t *v);
+extern void generic_atomic64_set(atomic64_t *v, s64 i);
 
 #define ATOMIC64_OP(op)							\
-extern void	 atomic64_##op(s64 a, atomic64_t *v);
+extern void generic_atomic64_##op(s64 a, atomic64_t *v);
 
 #define ATOMIC64_OP_RETURN(op)						\
-extern s64 atomic64_##op##_return(s64 a, atomic64_t *v);
+extern s64 generic_atomic64_##op##_return(s64 a, atomic64_t *v);
 
 #define ATOMIC64_FETCH_OP(op)						\
-extern s64 atomic64_fetch_##op(s64 a, atomic64_t *v);
+extern s64 generic_atomic64_fetch_##op(s64 a, atomic64_t *v);
 
 #define ATOMIC64_OPS(op)	ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
 
@@ -46,11 +44,61 @@ ATOMIC64_OPS(xor)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-extern s64 atomic64_dec_if_positive(atomic64_t *v);
-#define atomic64_dec_if_positive atomic64_dec_if_positive
-extern s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
-extern s64 atomic64_xchg(atomic64_t *v, s64 new);
-extern s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+extern s64 generic_atomic64_dec_if_positive(atomic64_t *v);
+extern s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
+extern s64 generic_atomic64_xchg(atomic64_t *v, s64 new);
+extern s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
+
+#ifdef CONFIG_ARCH_ATOMIC
+
+#define arch_atomic64_read		generic_atomic64_read
+#define arch_atomic64_set		generic_atomic64_set
+#define arch_atomic64_set_release	generic_atomic64_set
+
+#define arch_atomic64_add		generic_atomic64_add
+#define arch_atomic64_add_return	generic_atomic64_add_return
+#define arch_atomic64_fetch_add		generic_atomic64_fetch_add
+#define arch_atomic64_sub		generic_atomic64_sub
+#define arch_atomic64_sub_return	generic_atomic64_sub_return
+#define arch_atomic64_fetch_sub		generic_atomic64_fetch_sub
+
+#define arch_atomic64_and		generic_atomic64_and
+#define arch_atomic64_fetch_and		generic_atomic64_fetch_and
+#define arch_atomic64_or		generic_atomic64_or
+#define arch_atomic64_fetch_or		generic_atomic64_fetch_or
+#define arch_atomic64_xor		generic_atomic64_xor
+#define arch_atomic64_fetch_xor		generic_atomic64_fetch_xor
+
+#define arch_atomic64_dec_if_positive	generic_atomic64_dec_if_positive
+#define arch_atomic64_cmpxchg		generic_atomic64_cmpxchg
+#define arch_atomic64_xchg		generic_atomic64_xchg
+#define arch_atomic64_fetch_add_unless	generic_atomic64_fetch_add_unless
+
+#else /* CONFIG_ARCH_ATOMIC */
+
+#define atomic64_read			generic_atomic64_read
+#define atomic64_set			generic_atomic64_set
+#define atomic64_set_release		generic_atomic64_set
+
+#define atomic64_add			generic_atomic64_add
+#define atomic64_add_return		generic_atomic64_add_return
+#define atomic64_fetch_add		generic_atomic64_fetch_add
+#define atomic64_sub			generic_atomic64_sub
+#define atomic64_sub_return		generic_atomic64_sub_return
+#define atomic64_fetch_sub		generic_atomic64_fetch_sub
+
+#define atomic64_and			generic_atomic64_and
+#define atomic64_fetch_and		generic_atomic64_fetch_and
+#define atomic64_or			generic_atomic64_or
+#define atomic64_fetch_or		generic_atomic64_fetch_or
+#define atomic64_xor			generic_atomic64_xor
+#define atomic64_fetch_xor		generic_atomic64_fetch_xor
+
+#define atomic64_dec_if_positive	generic_atomic64_dec_if_positive
+#define atomic64_cmpxchg		generic_atomic64_cmpxchg
+#define atomic64_xchg			generic_atomic64_xchg
+#define atomic64_fetch_add_unless	generic_atomic64_fetch_add_unless
+
+#endif /* CONFIG_ARCH_ATOMIC */
 
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/lib/atomic64.c b/lib/atomic64.c
index e98c85a99787f..3df653994177d 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -42,7 +42,7 @@ static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
 	return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
 }
 
-s64 atomic64_read(const atomic64_t *v)
+s64 generic_atomic64_read(const atomic64_t *v)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -53,9 +53,9 @@ s64 atomic64_read(const atomic64_t *v)
 	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
-EXPORT_SYMBOL(atomic64_read);
+EXPORT_SYMBOL(generic_atomic64_read);
 
-void atomic64_set(atomic64_t *v, s64 i)
+void generic_atomic64_set(atomic64_t *v, s64 i)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -64,10 +64,10 @@ void atomic64_set(atomic64_t *v, s64 i)
 	v->counter = i;
 	raw_spin_unlock_irqrestore(lock, flags);
 }
-EXPORT_SYMBOL(atomic64_set);
+EXPORT_SYMBOL(generic_atomic64_set);
 
 #define ATOMIC64_OP(op, c_op)						\
-void atomic64_##op(s64 a, atomic64_t *v)				\
+void generic_atomic64_##op(s64 a, atomic64_t *v)			\
 {									\
 	unsigned long flags;						\
 	raw_spinlock_t *lock = lock_addr(v);				\
@@ -76,10 +76,10 @@ void atomic64_##op(s64 a, atomic64_t *v)				\
 	v->counter c_op a;						\
 	raw_spin_unlock_irqrestore(lock, flags);			\
 }									\
-EXPORT_SYMBOL(atomic64_##op);
+EXPORT_SYMBOL(generic_atomic64_##op);
 
 #define ATOMIC64_OP_RETURN(op, c_op)					\
-s64 atomic64_##op##_return(s64 a, atomic64_t *v)			\
+s64 generic_atomic64_##op##_return(s64 a, atomic64_t *v)		\
 {									\
 	unsigned long flags;						\
 	raw_spinlock_t *lock = lock_addr(v);				\
@@ -90,10 +90,10 @@ s64 atomic64_##op##_return(s64 a, atomic64_t *v)			\
 	raw_spin_unlock_irqrestore(lock, flags);			\
 	return val;							\
 }									\
-EXPORT_SYMBOL(atomic64_##op##_return);
+EXPORT_SYMBOL(generic_atomic64_##op##_return);
 
 #define ATOMIC64_FETCH_OP(op, c_op)					\
-s64 atomic64_fetch_##op(s64 a, atomic64_t *v)				\
+s64 generic_atomic64_fetch_##op(s64 a, atomic64_t *v)			\
 {									\
 	unsigned long flags;						\
 	raw_spinlock_t *lock = lock_addr(v);				\
@@ -105,7 +105,7 @@ s64 atomic64_fetch_##op(s64 a, atomic64_t *v)				\
 	raw_spin_unlock_irqrestore(lock, flags);			\
 	return val;							\
 }									\
-EXPORT_SYMBOL(atomic64_fetch_##op);
+EXPORT_SYMBOL(generic_atomic64_fetch_##op);
 
 #define ATOMIC64_OPS(op, c_op)						\
 	ATOMIC64_OP(op, c_op)						\
@@ -130,7 +130,7 @@ ATOMIC64_OPS(xor, ^=)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-s64 atomic64_dec_if_positive(atomic64_t *v)
+s64 generic_atomic64_dec_if_positive(atomic64_t *v)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -143,9 +143,9 @@ s64 atomic64_dec_if_positive(atomic64_t *v)
 	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
-EXPORT_SYMBOL(atomic64_dec_if_positive);
+EXPORT_SYMBOL(generic_atomic64_dec_if_positive);
 
-s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
+s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -158,9 +158,9 @@ s64 atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
 	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
-EXPORT_SYMBOL(atomic64_cmpxchg);
+EXPORT_SYMBOL(generic_atomic64_cmpxchg);
 
-s64 atomic64_xchg(atomic64_t *v, s64 new)
+s64 generic_atomic64_xchg(atomic64_t *v, s64 new)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -172,9 +172,9 @@ s64 atomic64_xchg(atomic64_t *v, s64 new)
 	raw_spin_unlock_irqrestore(lock, flags);
 	return val;
 }
-EXPORT_SYMBOL(atomic64_xchg);
+EXPORT_SYMBOL(generic_atomic64_xchg);
 
-s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	unsigned long flags;
 	raw_spinlock_t *lock = lock_addr(v);
@@ -188,4 +188,4 @@ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 
 	return val;
 }
-EXPORT_SYMBOL(atomic64_fetch_add_unless);
+EXPORT_SYMBOL(generic_atomic64_fetch_add_unless);
-- 
GitLab


From 6988631bdfddcedc1d27f83723ea36a442f00ea1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:10 +0100
Subject: [PATCH 1606/3804] locking/atomic: cmpxchg: make `generic` a prefix

The asm-generic implementations of cmpxchg_local() and cmpxchg64_local()
use a `_generic` suffix to distinguish themselves from arch code or
wrappers used elsewhere.

Subsequent patches will add ARCH_ATOMIC support to these
implementations, and will distinguish more functions with a `generic`
portion. To align with how ARCH_ATOMIC uses an `arch_` prefix, it would
be helpful to use a `generic_` prefix rather than a `_generic` suffix.

In preparation for this, this patch renames the existing functions to
make `generic` a prefix rather than a suffix. There should be no
functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-12-mark.rutland@arm.com
---
 arch/arm/include/asm/cmpxchg.h      | 6 +++---
 arch/m68k/include/asm/cmpxchg.h     | 2 +-
 arch/mips/include/asm/cmpxchg.h     | 2 +-
 arch/parisc/include/asm/cmpxchg.h   | 4 ++--
 arch/powerpc/include/asm/cmpxchg.h  | 2 +-
 arch/sparc/include/asm/cmpxchg_32.h | 4 ++--
 arch/sparc/include/asm/cmpxchg_64.h | 2 +-
 arch/xtensa/include/asm/cmpxchg.h   | 6 +++---
 include/asm-generic/cmpxchg-local.h | 4 ++--
 include/asm-generic/cmpxchg.h       | 4 ++--
 10 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index 8b701f8e175c0..06bd8cea861ad 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -135,13 +135,13 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
  * them available.
  */
 #define cmpxchg_local(ptr, o, n) ({					\
-	(__typeof(*ptr))__cmpxchg_local_generic((ptr),			\
+	(__typeof(*ptr))__generic_cmpxchg_local((ptr),			\
 					        (unsigned long)(o),	\
 					        (unsigned long)(n),	\
 					        sizeof(*(ptr)));	\
 })
 
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 
 #include <asm-generic/cmpxchg.h>
 
@@ -224,7 +224,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 #ifdef CONFIG_CPU_V6	/* min ARCH == ARMv6 */
 	case 1:
 	case 2:
-		ret = __cmpxchg_local_generic(ptr, old, new, size);
+		ret = __generic_cmpxchg_local(ptr, old, new, size);
 		break;
 #endif
 	default:
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index a4aa82021d3b2..7629c9c1ed5b3 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -80,7 +80,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 
 #include <asm-generic/cmpxchg-local.h>
 
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 
 extern unsigned long __invalid_cmpxchg_size(volatile void *,
 					    unsigned long, unsigned long, int);
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index ed8f3f3c4304a..c7e0455d4d462 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -222,7 +222,7 @@ unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 #else
 
 # include <asm-generic/cmpxchg-local.h>
-# define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+# define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 
 # ifdef CONFIG_SMP
 
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index 84ee232278a6a..c2015654b684d 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -98,7 +98,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 #endif
 	case 4:	return __cmpxchg_u32(ptr, old, new_);
 	default:
-		return __cmpxchg_local_generic(ptr, old, new_, size);
+		return __generic_cmpxchg_local(ptr, old, new_, size);
 	}
 }
 
@@ -116,7 +116,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 	cmpxchg_local((ptr), (o), (n));					\
 })
 #else
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 #endif
 
 #define cmpxchg64(ptr, o, n) __cmpxchg_u64(ptr, o, n)
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e53..69f52fdcf0647 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -524,7 +524,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
 })
 #else
 #include <asm-generic/cmpxchg-local.h>
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index a53d744d42125..86e3da1d973d7 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -73,8 +73,8 @@ u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new);
  * them available.
  */
 #define cmpxchg_local(ptr, o, n)				  	       \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
+	((__typeof__(*(ptr)))__generic_cmpxchg_local((ptr), (unsigned long)(o),\
 			(unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 
 #endif /* __ARCH_SPARC_CMPXCHG__ */
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h
index 316faa0130bab..8915b577b92fe 100644
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -189,7 +189,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 	case 4:
 	case 8:	return __cmpxchg(ptr, old, new, size);
 	default:
-		return __cmpxchg_local_generic(ptr, old, new, size);
+		return __generic_cmpxchg_local(ptr, old, new, size);
 	}
 
 	return old;
diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index a175f8aec3fbf..9c4d6e5316cee 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h
@@ -97,7 +97,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 	case 4:
 		return __cmpxchg_u32(ptr, old, new);
 	default:
-		return __cmpxchg_local_generic(ptr, old, new, size);
+		return __generic_cmpxchg_local(ptr, old, new, size);
 	}
 
 	return old;
@@ -108,9 +108,9 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
  * them available.
  */
 #define cmpxchg_local(ptr, o, n)				  	       \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
+	((__typeof__(*(ptr)))__generic_cmpxchg_local((ptr), (unsigned long)(o),\
 			(unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 #define cmpxchg64(ptr, o, n)    cmpxchg64_local((ptr), (o), (n))
 
 /*
diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h
index f17f14f84d09e..380cdc824e4ba 100644
--- a/include/asm-generic/cmpxchg-local.h
+++ b/include/asm-generic/cmpxchg-local.h
@@ -12,7 +12,7 @@ extern unsigned long wrong_size_cmpxchg(volatile void *ptr)
  * Generic version of __cmpxchg_local (disables interrupts). Takes an unsigned
  * long parameter, supporting various types of architectures.
  */
-static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
+static inline unsigned long __generic_cmpxchg_local(volatile void *ptr,
 		unsigned long old, unsigned long new, int size)
 {
 	unsigned long flags, prev;
@@ -51,7 +51,7 @@ static inline unsigned long __cmpxchg_local_generic(volatile void *ptr,
 /*
  * Generic version of __cmpxchg64_local. Takes an u64 parameter.
  */
-static inline u64 __cmpxchg64_local_generic(volatile void *ptr,
+static inline u64 __generic_cmpxchg64_local(volatile void *ptr,
 		u64 old, u64 new)
 {
 	u64 prev;
diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h
index 9a24510cd8c18..b9d54c7afc526 100644
--- a/include/asm-generic/cmpxchg.h
+++ b/include/asm-generic/cmpxchg.h
@@ -94,13 +94,13 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
 
 #ifndef cmpxchg_local
 #define cmpxchg_local(ptr, o, n) ({					       \
-	((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
+	((__typeof__(*(ptr)))__generic_cmpxchg_local((ptr), (unsigned long)(o),\
 			(unsigned long)(n), sizeof(*(ptr))));		       \
 })
 #endif
 
 #ifndef cmpxchg64_local
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 #endif
 
 #define cmpxchg(ptr, o, n)	cmpxchg_local((ptr), (o), (n))
-- 
GitLab


From 82b993e8249ae3cb29c1b6eb8f6548f5748508b7 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:11 +0100
Subject: [PATCH 1607/3804] locking/atomic: cmpxchg: support ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as this will
enable functionality, and once all architectures are converted it will
be possible to make significant cleanups to the atomic headers.

A number of architectures use asm-generic/cmpxchg.h or
asm-generic/cmpxhg-local.h, and it's impractical to convert the headers
and all these architectures in one go. To make it possible to convert
them one-by-one, let's make the asm-generic implementation function as
either cmpxchg*() or arch_cmpxchg*() depending on whether ARCH_ATOMIC is
selected. To do this, the generic implementations are prefixed as
generic_cmpxchg_*(), and preprocessor definitions map
cmpxchg_*()/arch_cmpxchg_*() onto these as appropriate.

Once all users are moved over to ARCH_ATOMIC the ifdeffery in the header
can be simplified and/or removed entirely.

For existing users (none of which select ARCH_ATOMIC), there should be
no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-13-mark.rutland@arm.com
---
 include/asm-generic/cmpxchg.h | 61 +++++++++++++++++++++++++----------
 1 file changed, 44 insertions(+), 17 deletions(-)

diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h
index b9d54c7afc526..98c9311990894 100644
--- a/include/asm-generic/cmpxchg.h
+++ b/include/asm-generic/cmpxchg.h
@@ -14,16 +14,14 @@
 #include <linux/types.h>
 #include <linux/irqflags.h>
 
-#ifndef xchg
-
 /*
  * This function doesn't exist, so you'll get a linker error if
  * something tries to do an invalidly-sized xchg().
  */
-extern void __xchg_called_with_bad_pointer(void);
+extern void __generic_xchg_called_with_bad_pointer(void);
 
 static inline
-unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
+unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size)
 {
 	unsigned long ret, flags;
 
@@ -75,35 +73,64 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
 #endif /* CONFIG_64BIT */
 
 	default:
-		__xchg_called_with_bad_pointer();
+		__generic_xchg_called_with_bad_pointer();
 		return x;
 	}
 }
 
-#define xchg(ptr, x) ({							\
-	((__typeof__(*(ptr)))						\
-		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))));	\
+#define generic_xchg(ptr, x) ({							\
+	((__typeof__(*(ptr)))							\
+		__generic_xchg((unsigned long)(x), (ptr), sizeof(*(ptr))));	\
 })
 
-#endif /* xchg */
-
 /*
  * Atomic compare and exchange.
  */
 #include <asm-generic/cmpxchg-local.h>
 
-#ifndef cmpxchg_local
-#define cmpxchg_local(ptr, o, n) ({					       \
-	((__typeof__(*(ptr)))__generic_cmpxchg_local((ptr), (unsigned long)(o),\
-			(unsigned long)(n), sizeof(*(ptr))));		       \
+#define generic_cmpxchg_local(ptr, o, n) ({					\
+	((__typeof__(*(ptr)))__generic_cmpxchg_local((ptr), (unsigned long)(o),	\
+			(unsigned long)(n), sizeof(*(ptr))));			\
 })
+
+#define generic_cmpxchg64_local(ptr, o, n) \
+	__generic_cmpxchg64_local((ptr), (o), (n))
+
+
+#ifdef CONFIG_ARCH_ATOMIC
+
+#ifndef arch_xchg
+#define arch_xchg		generic_xchg
+#endif
+
+#ifndef arch_cmpxchg_local
+#define arch_cmpxchg_local	generic_cmpxchg_local
+#endif
+
+#ifndef arch_cmpxchg64_local
+#define arch_cmpxchg64_local	generic_cmpxchg64_local
+#endif
+
+#define arch_cmpxchg		arch_cmpxchg_local
+#define arch_cmpxchg64		arch_cmpxchg64_local
+
+#else /* CONFIG_ARCH_ATOMIC */
+
+#ifndef xchg
+#define xchg			generic_xchg
+#endif
+
+#ifndef cmpxchg_local
+#define cmpxchg_local		generic_cmpxchg_local
 #endif
 
 #ifndef cmpxchg64_local
-#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+#define cmpxchg64_local		generic_cmpxchg64_local
 #endif
 
-#define cmpxchg(ptr, o, n)	cmpxchg_local((ptr), (o), (n))
-#define cmpxchg64(ptr, o, n)	cmpxchg64_local((ptr), (o), (n))
+#define cmpxchg			cmpxchg_local
+#define cmpxchg64		cmpxchg64_local
+
+#endif /* CONFIG_ARCH_ATOMIC */
 
 #endif /* __ASM_GENERIC_CMPXCHG_H */
-- 
GitLab


From 96d330aff7060f0882a5440ddb281cc3ab232d96 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:12 +0100
Subject: [PATCH 1608/3804] locking/atomic: alpha: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates alpha to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Note: xchg_local() is NOT currently part of the generic atomic
arch_atomic API, and is not instrumented.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-14-mark.rutland@arm.com
---
 arch/alpha/Kconfig               |  1 +
 arch/alpha/include/asm/atomic.h  | 88 +++++++++++++++++---------------
 arch/alpha/include/asm/cmpxchg.h | 12 ++---
 3 files changed, 54 insertions(+), 47 deletions(-)

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 5998106faa600..7920fc2e2a2a6 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -2,6 +2,7 @@
 config ALPHA
 	bool
 	default y
+	select ARCH_ATOMIC
 	select ARCH_32BIT_USTAT_F_TINODE
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index e41c113c66883..f2861a43a61ef 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -26,11 +26,11 @@
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic_read(v)		READ_ONCE((v)->counter)
-#define atomic64_read(v)	READ_ONCE((v)->counter)
+#define arch_atomic_read(v)	READ_ONCE((v)->counter)
+#define arch_atomic64_read(v)	READ_ONCE((v)->counter)
 
-#define atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
-#define atomic64_set(v,i)	WRITE_ONCE((v)->counter, (i))
+#define arch_atomic_set(v,i)	WRITE_ONCE((v)->counter, (i))
+#define arch_atomic64_set(v,i)	WRITE_ONCE((v)->counter, (i))
 
 /*
  * To get proper branch prediction for the main line, we must branch
@@ -39,7 +39,7 @@
  */
 
 #define ATOMIC_OP(op, asm_op)						\
-static __inline__ void atomic_##op(int i, atomic_t * v)			\
+static __inline__ void arch_atomic_##op(int i, atomic_t * v)		\
 {									\
 	unsigned long temp;						\
 	__asm__ __volatile__(						\
@@ -55,7 +55,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op, asm_op)					\
-static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
+static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 {									\
 	long temp, result;						\
 	__asm__ __volatile__(						\
@@ -74,7 +74,7 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 }
 
 #define ATOMIC_FETCH_OP(op, asm_op)					\
-static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
 {									\
 	long temp, result;						\
 	__asm__ __volatile__(						\
@@ -92,7 +92,7 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
 }
 
 #define ATOMIC64_OP(op, asm_op)						\
-static __inline__ void atomic64_##op(s64 i, atomic64_t * v)		\
+static __inline__ void arch_atomic64_##op(s64 i, atomic64_t * v)	\
 {									\
 	s64 temp;							\
 	__asm__ __volatile__(						\
@@ -108,7 +108,8 @@ static __inline__ void atomic64_##op(s64 i, atomic64_t * v)		\
 }									\
 
 #define ATOMIC64_OP_RETURN(op, asm_op)					\
-static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)	\
+static __inline__ s64							\
+arch_atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)		\
 {									\
 	s64 temp, result;						\
 	__asm__ __volatile__(						\
@@ -127,7 +128,8 @@ static __inline__ s64 atomic64_##op##_return_relaxed(s64 i, atomic64_t * v)	\
 }
 
 #define ATOMIC64_FETCH_OP(op, asm_op)					\
-static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)	\
+static __inline__ s64							\
+arch_atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)		\
 {									\
 	s64 temp, result;						\
 	__asm__ __volatile__(						\
@@ -155,18 +157,18 @@ static __inline__ s64 atomic64_fetch_##op##_relaxed(s64 i, atomic64_t * v)	\
 ATOMIC_OPS(add)
 ATOMIC_OPS(sub)
 
-#define atomic_add_return_relaxed	atomic_add_return_relaxed
-#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
-#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+#define arch_atomic_add_return_relaxed		arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed		arch_atomic_sub_return_relaxed
+#define arch_atomic_fetch_add_relaxed		arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed		arch_atomic_fetch_sub_relaxed
 
-#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
-#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
 
-#define atomic_andnot atomic_andnot
-#define atomic64_andnot atomic64_andnot
+#define arch_atomic_andnot			arch_atomic_andnot
+#define arch_atomic64_andnot			arch_atomic64_andnot
 
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, asm)						\
@@ -180,15 +182,15 @@ ATOMIC_OPS(andnot, bic)
 ATOMIC_OPS(or, bis)
 ATOMIC_OPS(xor, xor)
 
-#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
-#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
-#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and_relaxed		arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_andnot_relaxed	arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_or_relaxed		arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed		arch_atomic_fetch_xor_relaxed
 
-#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
-#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
-#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed
 
 #undef ATOMIC_OPS
 #undef ATOMIC64_FETCH_OP
@@ -198,14 +200,18 @@ ATOMIC_OPS(xor, xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic64_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic64_cmpxchg(v, old, new) \
+	(arch_cmpxchg(&((v)->counter), old, new))
+#define arch_atomic64_xchg(v, new) \
+	(arch_xchg(&((v)->counter), new))
 
-#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic_cmpxchg(v, old, new) \
+	(arch_cmpxchg(&((v)->counter), old, new))
+#define arch_atomic_xchg(v, new) \
+	(arch_xchg(&((v)->counter), new))
 
 /**
- * atomic_fetch_add_unless - add unless the number is a given value
+ * arch_atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -213,7 +219,7 @@ ATOMIC_OPS(xor, xor)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c, new, old;
 	smp_mb();
@@ -234,10 +240,10 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	smp_mb();
 	return old;
 }
-#define atomic_fetch_add_unless atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
 
 /**
- * atomic64_fetch_add_unless - add unless the number is a given value
+ * arch_atomic64_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic64_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -245,7 +251,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	s64 c, new, old;
 	smp_mb();
@@ -266,16 +272,16 @@ static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 	smp_mb();
 	return old;
 }
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
 
 /*
- * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic_t
  *
  * The function returns the old value of *v minus 1, even if
  * the atomic variable, v, was not decremented.
  */
-static inline s64 atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
 {
 	s64 old, tmp;
 	smp_mb();
@@ -295,6 +301,6 @@ static inline s64 atomic64_dec_if_positive(atomic64_t *v)
 	smp_mb();
 	return old - 1;
 }
-#define atomic64_dec_if_positive atomic64_dec_if_positive
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 
 #endif /* _ALPHA_ATOMIC_H */
diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 6c7c394524714..6e0a850aa9d38 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -17,7 +17,7 @@
 				       sizeof(*(ptr)));			\
 })
 
-#define cmpxchg_local(ptr, o, n)					\
+#define arch_cmpxchg_local(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -26,7 +26,7 @@
 					  sizeof(*(ptr)));		\
 })
 
-#define cmpxchg64_local(ptr, o, n)					\
+#define arch_cmpxchg64_local(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
 	cmpxchg_local((ptr), (o), (n));					\
@@ -42,7 +42,7 @@
  * The leading and the trailing memory barriers guarantee that these
  * operations are fully ordered.
  */
-#define xchg(ptr, x)							\
+#define arch_xchg(ptr, x)						\
 ({									\
 	__typeof__(*(ptr)) __ret;					\
 	__typeof__(*(ptr)) _x_ = (x);					\
@@ -53,7 +53,7 @@
 	__ret;								\
 })
 
-#define cmpxchg(ptr, o, n)						\
+#define arch_cmpxchg(ptr, o, n)						\
 ({									\
 	__typeof__(*(ptr)) __ret;					\
 	__typeof__(*(ptr)) _o_ = (o);					\
@@ -65,10 +65,10 @@
 	__ret;								\
 })
 
-#define cmpxchg64(ptr, o, n)						\
+#define arch_cmpxchg64(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg((ptr), (o), (n));					\
+	arch_cmpxchg((ptr), (o), (n));					\
 })
 
 #undef ____cmpxchg
-- 
GitLab


From 6db5d99304dce6d3b9b1251b788f0ff6aaf1c054 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:13 +0100
Subject: [PATCH 1609/3804] locking/atomic: arc: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates alpha to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-15-mark.rutland@arm.com
---
 arch/arc/Kconfig               |  1 +
 arch/arc/include/asm/atomic.h  | 60 +++++++++++++++++-----------------
 arch/arc/include/asm/cmpxchg.h | 10 +++---
 3 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 2d98501c08971..098ecc72d0488 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -6,6 +6,7 @@
 config ARC
 	def_bool y
 	select ARC_TIMERS
+	select ARCH_ATOMIC
 	select ARCH_HAS_CACHE_LINE_SIZE
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DMA_PREP_COHERENT
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 5afc79c9b2f5b..7a36d79b5b2f6 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -14,14 +14,14 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
-#define atomic_read(v)  READ_ONCE((v)->counter)
+#define arch_atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned int val;						\
 									\
@@ -37,7 +37,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	unsigned int val;						\
 									\
@@ -63,7 +63,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	unsigned int val, orig;						\
 									\
@@ -94,11 +94,11 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 #ifndef CONFIG_SMP
 
  /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #else
 
-static inline void atomic_set(atomic_t *v, int i)
+static inline void arch_atomic_set(atomic_t *v, int i)
 {
 	/*
 	 * Independent of hardware support, all of the atomic_xxx() APIs need
@@ -116,7 +116,7 @@ static inline void atomic_set(atomic_t *v, int i)
 	atomic_ops_unlock(flags);
 }
 
-#define atomic_set_release(v, i)	atomic_set((v), (i))
+#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
 
 #endif
 
@@ -126,7 +126,7 @@ static inline void atomic_set(atomic_t *v, int i)
  */
 
 #define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned long flags;						\
 									\
@@ -136,7 +136,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
 	unsigned long temp;						\
@@ -154,7 +154,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
 	unsigned long orig;						\
@@ -180,9 +180,6 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define atomic_andnot		atomic_andnot
-#define atomic_fetch_andnot	atomic_fetch_andnot
-
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, c_op, asm_op)					\
 	ATOMIC_OP(op, c_op, asm_op)					\
@@ -193,6 +190,9 @@ ATOMIC_OPS(andnot, &= ~, bic)
 ATOMIC_OPS(or, |=, or)
 ATOMIC_OPS(xor, ^=, xor)
 
+#define arch_atomic_andnot		arch_atomic_andnot
+#define arch_atomic_fetch_andnot	arch_atomic_fetch_andnot
+
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
@@ -220,7 +220,7 @@ typedef struct {
 
 #define ATOMIC64_INIT(a) { (a) }
 
-static inline s64 atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
 {
 	s64 val;
 
@@ -232,7 +232,7 @@ static inline s64 atomic64_read(const atomic64_t *v)
 	return val;
 }
 
-static inline void atomic64_set(atomic64_t *v, s64 a)
+static inline void arch_atomic64_set(atomic64_t *v, s64 a)
 {
 	/*
 	 * This could have been a simple assignment in "C" but would need
@@ -253,7 +253,7 @@ static inline void atomic64_set(atomic64_t *v, s64 a)
 }
 
 #define ATOMIC64_OP(op, op1, op2)					\
-static inline void atomic64_##op(s64 a, atomic64_t *v)			\
+static inline void arch_atomic64_##op(s64 a, atomic64_t *v)		\
 {									\
 	s64 val;							\
 									\
@@ -270,7 +270,7 @@ static inline void atomic64_##op(s64 a, atomic64_t *v)			\
 }									\
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
-static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v)		\
+static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v)	\
 {									\
 	s64 val;							\
 									\
@@ -293,7 +293,7 @@ static inline s64 atomic64_##op##_return(s64 a, atomic64_t *v)		\
 }
 
 #define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
-static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v)		\
+static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v)	\
 {									\
 	s64 val, orig;							\
 									\
@@ -320,9 +320,6 @@ static inline s64 atomic64_fetch_##op(s64 a, atomic64_t *v)		\
 	ATOMIC64_OP_RETURN(op, op1, op2)				\
 	ATOMIC64_FETCH_OP(op, op1, op2)
 
-#define atomic64_andnot		atomic64_andnot
-#define atomic64_fetch_andnot	atomic64_fetch_andnot
-
 ATOMIC64_OPS(add, add.f, adc)
 ATOMIC64_OPS(sub, sub.f, sbc)
 ATOMIC64_OPS(and, and, and)
@@ -330,13 +327,16 @@ ATOMIC64_OPS(andnot, bic, bic)
 ATOMIC64_OPS(or, or, or)
 ATOMIC64_OPS(xor, xor, xor)
 
+#define arch_atomic64_andnot		arch_atomic64_andnot
+#define arch_atomic64_fetch_andnot	arch_atomic64_fetch_andnot
+
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
 static inline s64
-atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
+arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
 {
 	s64 prev;
 
@@ -358,7 +358,7 @@ atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new)
 	return prev;
 }
 
-static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new)
+static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new)
 {
 	s64 prev;
 
@@ -379,14 +379,14 @@ static inline s64 atomic64_xchg(atomic64_t *ptr, s64 new)
 }
 
 /**
- * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * arch_atomic64_dec_if_positive - decrement by 1 if old value positive
  * @v: pointer of type atomic64_t
  *
  * The function returns the old value of *v minus 1, even if
  * the atomic variable, v, was not decremented.
  */
 
-static inline s64 atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
 {
 	s64 val;
 
@@ -408,10 +408,10 @@ static inline s64 atomic64_dec_if_positive(atomic64_t *v)
 
 	return val;
 }
-#define atomic64_dec_if_positive atomic64_dec_if_positive
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 
 /**
- * atomic64_fetch_add_unless - add unless the number is a given value
+ * arch_atomic64_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic64_t
  * @a: the amount to add to v...
  * @u: ...unless v is equal to u.
@@ -419,7 +419,7 @@ static inline s64 atomic64_dec_if_positive(atomic64_t *v)
  * Atomically adds @a to @v, if it was not @u.
  * Returns the old value of @v
  */
-static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	s64 old, temp;
 
@@ -443,7 +443,7 @@ static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 
 	return old;
 }
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
 
 #endif	/* !CONFIG_GENERIC_ATOMIC64 */
 
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h
index 9b87e162e539b..d1781bdf65276 100644
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -63,7 +63,7 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
 
 #endif
 
-#define cmpxchg(ptr, o, n) ({				\
+#define arch_cmpxchg(ptr, o, n) ({			\
 	(typeof(*(ptr)))__cmpxchg((ptr),		\
 				  (unsigned long)(o),	\
 				  (unsigned long)(n));	\
@@ -75,7 +75,7 @@ __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new)
  *  !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee
  *         semantics, and this lock also happens to be used by atomic_*()
  */
-#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
 
 
 /*
@@ -123,7 +123,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 
 #if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
 
-#define xchg(ptr, with)			\
+#define arch_xchg(ptr, with)		\
 ({					\
 	unsigned long flags;		\
 	typeof(*(ptr)) old_val;		\
@@ -136,7 +136,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
 
 #else
 
-#define xchg(ptr, with)  _xchg(ptr, with)
+#define arch_xchg(ptr, with)  _xchg(ptr, with)
 
 #endif
 
@@ -153,6 +153,6 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
  *         can't be clobbered by others. Thus no serialization required when
  *         atomic_xchg is involved.
  */
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
 
 #endif
-- 
GitLab


From fc63a6e08a8c97a3dc3a6f2e1946b949b9a6c2d3 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:14 +0100
Subject: [PATCH 1610/3804] locking/atomic: arm: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates alpha to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-16-mark.rutland@arm.com
---
 arch/arm/Kconfig                   |  1 +
 arch/arm/include/asm/atomic.h      | 96 +++++++++++++++---------------
 arch/arm/include/asm/cmpxchg.h     | 16 ++---
 arch/arm/include/asm/sync_bitops.h |  2 +-
 4 files changed, 58 insertions(+), 57 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 24804f11302d7..b7334a6643b9c 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -3,6 +3,7 @@ config ARM
 	bool
 	default y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
 	select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 455eb19a5ac14..db8512d9a918d 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -22,8 +22,8 @@
  * strex/ldrex monitor on some implementations. The reason we can use it for
  * atomic_set() is the clrex or dummy strex done on every exception return.
  */
-#define atomic_read(v)	READ_ONCE((v)->counter)
-#define atomic_set(v,i)	WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic_read(v)	READ_ONCE((v)->counter)
+#define arch_atomic_set(v,i)	WRITE_ONCE(((v)->counter), (i))
 
 #if __LINUX_ARM_ARCH__ >= 6
 
@@ -34,7 +34,7 @@
  */
 
 #define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned long tmp;						\
 	int result;							\
@@ -52,7 +52,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
+static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 {									\
 	unsigned long tmp;						\
 	int result;							\
@@ -73,7 +73,7 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)	\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
+static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
 {									\
 	unsigned long tmp;						\
 	int result, val;						\
@@ -93,17 +93,17 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v)	\
 	return result;							\
 }
 
-#define atomic_add_return_relaxed	atomic_add_return_relaxed
-#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
-#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+#define arch_atomic_add_return_relaxed		arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed		arch_atomic_sub_return_relaxed
+#define arch_atomic_fetch_add_relaxed		arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed		arch_atomic_fetch_sub_relaxed
 
-#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
-#define atomic_fetch_andnot_relaxed	atomic_fetch_andnot_relaxed
-#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and_relaxed		arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_andnot_relaxed	arch_atomic_fetch_andnot_relaxed
+#define arch_atomic_fetch_or_relaxed		arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed		arch_atomic_fetch_xor_relaxed
 
-static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
+static inline int arch_atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 {
 	int oldval;
 	unsigned long res;
@@ -123,9 +123,9 @@ static inline int atomic_cmpxchg_relaxed(atomic_t *ptr, int old, int new)
 
 	return oldval;
 }
-#define atomic_cmpxchg_relaxed		atomic_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_relaxed		arch_atomic_cmpxchg_relaxed
 
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int oldval, newval;
 	unsigned long tmp;
@@ -151,7 +151,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 
 	return oldval;
 }
-#define atomic_fetch_add_unless		atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless		arch_atomic_fetch_add_unless
 
 #else /* ARM_ARCH_6 */
 
@@ -160,7 +160,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 #endif
 
 #define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned long flags;						\
 									\
@@ -170,7 +170,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
 	int val;							\
@@ -184,7 +184,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
 	int val;							\
@@ -197,7 +197,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	return val;							\
 }
 
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
 	unsigned long flags;
@@ -211,7 +211,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
-#define atomic_fetch_andnot		atomic_fetch_andnot
+#define arch_atomic_fetch_andnot		arch_atomic_fetch_andnot
 
 #endif /* __LINUX_ARM_ARCH__ */
 
@@ -223,7 +223,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 ATOMIC_OPS(add, +=, add)
 ATOMIC_OPS(sub, -=, sub)
 
-#define atomic_andnot atomic_andnot
+#define arch_atomic_andnot arch_atomic_andnot
 
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, c_op, asm_op)					\
@@ -240,7 +240,7 @@ ATOMIC_OPS(xor, ^=, eor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
 
 #ifndef CONFIG_GENERIC_ATOMIC64
 typedef struct {
@@ -250,7 +250,7 @@ typedef struct {
 #define ATOMIC64_INIT(i) { (i) }
 
 #ifdef CONFIG_ARM_LPAE
-static inline s64 atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
 {
 	s64 result;
 
@@ -263,7 +263,7 @@ static inline s64 atomic64_read(const atomic64_t *v)
 	return result;
 }
 
-static inline void atomic64_set(atomic64_t *v, s64 i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
 {
 	__asm__ __volatile__("@ atomic64_set\n"
 "	strd	%2, %H2, [%1]"
@@ -272,7 +272,7 @@ static inline void atomic64_set(atomic64_t *v, s64 i)
 	);
 }
 #else
-static inline s64 atomic64_read(const atomic64_t *v)
+static inline s64 arch_atomic64_read(const atomic64_t *v)
 {
 	s64 result;
 
@@ -285,7 +285,7 @@ static inline s64 atomic64_read(const atomic64_t *v)
 	return result;
 }
 
-static inline void atomic64_set(atomic64_t *v, s64 i)
+static inline void arch_atomic64_set(atomic64_t *v, s64 i)
 {
 	s64 tmp;
 
@@ -302,7 +302,7 @@ static inline void atomic64_set(atomic64_t *v, s64 i)
 #endif
 
 #define ATOMIC64_OP(op, op1, op2)					\
-static inline void atomic64_##op(s64 i, atomic64_t *v)			\
+static inline void arch_atomic64_##op(s64 i, atomic64_t *v)		\
 {									\
 	s64 result;							\
 	unsigned long tmp;						\
@@ -322,7 +322,7 @@ static inline void atomic64_##op(s64 i, atomic64_t *v)			\
 
 #define ATOMIC64_OP_RETURN(op, op1, op2)				\
 static inline s64							\
-atomic64_##op##_return_relaxed(s64 i, atomic64_t *v)			\
+arch_atomic64_##op##_return_relaxed(s64 i, atomic64_t *v)		\
 {									\
 	s64 result;							\
 	unsigned long tmp;						\
@@ -345,7 +345,7 @@ atomic64_##op##_return_relaxed(s64 i, atomic64_t *v)			\
 
 #define ATOMIC64_FETCH_OP(op, op1, op2)					\
 static inline s64							\
-atomic64_fetch_##op##_relaxed(s64 i, atomic64_t *v)			\
+arch_atomic64_fetch_##op##_relaxed(s64 i, atomic64_t *v)		\
 {									\
 	s64 result, val;						\
 	unsigned long tmp;						\
@@ -374,34 +374,34 @@ atomic64_fetch_##op##_relaxed(s64 i, atomic64_t *v)			\
 ATOMIC64_OPS(add, adds, adc)
 ATOMIC64_OPS(sub, subs, sbc)
 
-#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
-#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+#define arch_atomic64_fetch_add_relaxed		arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed		arch_atomic64_fetch_sub_relaxed
 
 #undef ATOMIC64_OPS
 #define ATOMIC64_OPS(op, op1, op2)					\
 	ATOMIC64_OP(op, op1, op2)					\
 	ATOMIC64_FETCH_OP(op, op1, op2)
 
-#define atomic64_andnot atomic64_andnot
+#define arch_atomic64_andnot arch_atomic64_andnot
 
 ATOMIC64_OPS(and, and, and)
 ATOMIC64_OPS(andnot, bic, bic)
 ATOMIC64_OPS(or,  orr, orr)
 ATOMIC64_OPS(xor, eor, eor)
 
-#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
-#define atomic64_fetch_andnot_relaxed	atomic64_fetch_andnot_relaxed
-#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_and_relaxed		arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_andnot_relaxed	arch_atomic64_fetch_andnot_relaxed
+#define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed		arch_atomic64_fetch_xor_relaxed
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-static inline s64 atomic64_cmpxchg_relaxed(atomic64_t *ptr, s64 old, s64 new)
+static inline s64 arch_atomic64_cmpxchg_relaxed(atomic64_t *ptr, s64 old, s64 new)
 {
 	s64 oldval;
 	unsigned long res;
@@ -422,9 +422,9 @@ static inline s64 atomic64_cmpxchg_relaxed(atomic64_t *ptr, s64 old, s64 new)
 
 	return oldval;
 }
-#define atomic64_cmpxchg_relaxed	atomic64_cmpxchg_relaxed
+#define arch_atomic64_cmpxchg_relaxed	arch_atomic64_cmpxchg_relaxed
 
-static inline s64 atomic64_xchg_relaxed(atomic64_t *ptr, s64 new)
+static inline s64 arch_atomic64_xchg_relaxed(atomic64_t *ptr, s64 new)
 {
 	s64 result;
 	unsigned long tmp;
@@ -442,9 +442,9 @@ static inline s64 atomic64_xchg_relaxed(atomic64_t *ptr, s64 new)
 
 	return result;
 }
-#define atomic64_xchg_relaxed		atomic64_xchg_relaxed
+#define arch_atomic64_xchg_relaxed		arch_atomic64_xchg_relaxed
 
-static inline s64 atomic64_dec_if_positive(atomic64_t *v)
+static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
 {
 	s64 result;
 	unsigned long tmp;
@@ -470,9 +470,9 @@ static inline s64 atomic64_dec_if_positive(atomic64_t *v)
 
 	return result;
 }
-#define atomic64_dec_if_positive atomic64_dec_if_positive
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 
-static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	s64 oldval, newval;
 	unsigned long tmp;
@@ -500,7 +500,7 @@ static inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 
 	return oldval;
 }
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
 
 #endif /* !CONFIG_GENERIC_ATOMIC64 */
 #endif
diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h
index 06bd8cea861ad..4dfe538dfc689 100644
--- a/arch/arm/include/asm/cmpxchg.h
+++ b/arch/arm/include/asm/cmpxchg.h
@@ -114,7 +114,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 	return ret;
 }
 
-#define xchg_relaxed(ptr, x) ({						\
+#define arch_xchg_relaxed(ptr, x) ({					\
 	(__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr),		\
 				   sizeof(*(ptr)));			\
 })
@@ -128,20 +128,20 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 #error "SMP is not supported on this platform"
 #endif
 
-#define xchg xchg_relaxed
+#define arch_xchg arch_xchg_relaxed
 
 /*
  * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
  * them available.
  */
-#define cmpxchg_local(ptr, o, n) ({					\
+#define arch_cmpxchg_local(ptr, o, n) ({				\
 	(__typeof(*ptr))__generic_cmpxchg_local((ptr),			\
 					        (unsigned long)(o),	\
 					        (unsigned long)(n),	\
 					        sizeof(*(ptr)));	\
 })
 
-#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 
 #include <asm-generic/cmpxchg.h>
 
@@ -207,7 +207,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	return oldval;
 }
 
-#define cmpxchg_relaxed(ptr,o,n) ({					\
+#define arch_cmpxchg_relaxed(ptr,o,n) ({				\
 	(__typeof__(*(ptr)))__cmpxchg((ptr),				\
 				      (unsigned long)(o),		\
 				      (unsigned long)(n),		\
@@ -234,7 +234,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 	return ret;
 }
 
-#define cmpxchg_local(ptr, o, n) ({					\
+#define arch_cmpxchg_local(ptr, o, n) ({				\
 	(__typeof(*ptr))__cmpxchg_local((ptr),				\
 				        (unsigned long)(o),		\
 				        (unsigned long)(n),		\
@@ -266,13 +266,13 @@ static inline unsigned long long __cmpxchg64(unsigned long long *ptr,
 	return oldval;
 }
 
-#define cmpxchg64_relaxed(ptr, o, n) ({					\
+#define arch_cmpxchg64_relaxed(ptr, o, n) ({				\
 	(__typeof__(*(ptr)))__cmpxchg64((ptr),				\
 					(unsigned long long)(o),	\
 					(unsigned long long)(n));	\
 })
 
-#define cmpxchg64_local(ptr, o, n) cmpxchg64_relaxed((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) arch_cmpxchg64_relaxed((ptr), (o), (n))
 
 #endif	/* __LINUX_ARM_ARCH__ >= 6 */
 
diff --git a/arch/arm/include/asm/sync_bitops.h b/arch/arm/include/asm/sync_bitops.h
index 39ff217136d1f..6f5d627c44a3c 100644
--- a/arch/arm/include/asm/sync_bitops.h
+++ b/arch/arm/include/asm/sync_bitops.h
@@ -21,7 +21,7 @@
 #define sync_test_and_clear_bit(nr, p)	_test_and_clear_bit(nr, p)
 #define sync_test_and_change_bit(nr, p)	_test_and_change_bit(nr, p)
 #define sync_test_bit(nr, addr)		test_bit(nr, addr)
-#define sync_cmpxchg			cmpxchg
+#define arch_sync_cmpxchg		arch_cmpxchg
 
 
 #endif
-- 
GitLab


From a5fb82d7e2695e667badeac202fb7d113a8ae9a9 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:15 +0100
Subject: [PATCH 1611/3804] locking/atomic: csky: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates csky to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Guo Ren <guoren@kernel.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-17-mark.rutland@arm.com
---
 arch/csky/Kconfig               | 1 +
 arch/csky/include/asm/cmpxchg.h | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 8de5b987edb9f..3521f14bcd969 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -2,6 +2,7 @@
 config CSKY
 	def_bool y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
diff --git a/arch/csky/include/asm/cmpxchg.h b/arch/csky/include/asm/cmpxchg.h
index dabc8e46ce7b4..d1bef11f8dc97 100644
--- a/arch/csky/include/asm/cmpxchg.h
+++ b/arch/csky/include/asm/cmpxchg.h
@@ -31,7 +31,7 @@ extern void __bad_xchg(void);
 	__ret;							\
 })
 
-#define xchg_relaxed(ptr, x) \
+#define arch_xchg_relaxed(ptr, x) \
 		(__xchg_relaxed((x), (ptr), sizeof(*(ptr))))
 
 #define __cmpxchg_relaxed(ptr, old, new, size)			\
@@ -61,14 +61,14 @@ extern void __bad_xchg(void);
 	__ret;							\
 })
 
-#define cmpxchg_relaxed(ptr, o, n) \
+#define arch_cmpxchg_relaxed(ptr, o, n) \
 	(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
 
-#define cmpxchg(ptr, o, n) 					\
+#define arch_cmpxchg(ptr, o, n) 				\
 ({								\
 	__typeof__(*(ptr)) __ret;				\
 	__smp_release_fence();					\
-	__ret = cmpxchg_relaxed(ptr, o, n);			\
+	__ret = arch_cmpxchg_relaxed(ptr, o, n);		\
 	__smp_acquire_fence();					\
 	__ret;							\
 })
-- 
GitLab


From c879c39ebc3a9bea280675840d623a40b4636c80 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:16 +0100
Subject: [PATCH 1612/3804] locking/atomic: h8300: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates h8300 to ARCH_ATOMIC, using
the asm-generic implementations.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-18-mark.rutland@arm.com
---
 arch/h8300/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 3e3e0f16f7e0a..bdf05ad3206a3 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -2,6 +2,7 @@
 config H8300
         def_bool y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_HAS_BINFMT_FLAT
 	select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
 	select BINFMT_FLAT_OLD_ALWAYS_RAM
-- 
GitLab


From 94b63eb6e131a7fe94f1c1eb8e10162931506176 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:17 +0100
Subject: [PATCH 1613/3804] locking/atomic: hexagon: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates hexagon to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Brian Cain <bcain@codeaurora.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-19-mark.rutland@arm.com
---
 arch/hexagon/Kconfig               |  1 +
 arch/hexagon/include/asm/atomic.h  | 28 ++++++++++++++--------------
 arch/hexagon/include/asm/cmpxchg.h |  4 ++--
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 44a409967af1c..1368954ef679b 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -5,6 +5,7 @@ comment "Linux Kernel Configuration for Hexagon"
 config HEXAGON
 	def_bool y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_PREEMPT
 	# Other pending projects/to-do items.
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 4ab895d7111f6..6e94f8d04146f 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -14,7 +14,7 @@
 
 /*  Normal writes in our arch don't clear lock reservations  */
 
-static inline void atomic_set(atomic_t *v, int new)
+static inline void arch_atomic_set(atomic_t *v, int new)
 {
 	asm volatile(
 		"1:	r6 = memw_locked(%0);\n"
@@ -26,26 +26,26 @@ static inline void atomic_set(atomic_t *v, int new)
 	);
 }
 
-#define atomic_set_release(v, i)	atomic_set((v), (i))
+#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
 
 /**
- * atomic_read - reads a word, atomically
+ * arch_atomic_read - reads a word, atomically
  * @v: pointer to atomic value
  *
  * Assumes all word reads on our architecture are atomic.
  */
-#define atomic_read(v)		READ_ONCE((v)->counter)
+#define arch_atomic_read(v)		READ_ONCE((v)->counter)
 
 /**
- * atomic_xchg - atomic
+ * arch_atomic_xchg - atomic
  * @v: pointer to memory to change
  * @new: new value (technically passed in a register -- see xchg)
  */
-#define atomic_xchg(v, new)	(xchg(&((v)->counter), (new)))
+#define arch_atomic_xchg(v, new)	(arch_xchg(&((v)->counter), (new)))
 
 
 /**
- * atomic_cmpxchg - atomic compare-and-exchange values
+ * arch_atomic_cmpxchg - atomic compare-and-exchange values
  * @v: pointer to value to change
  * @old:  desired old value to match
  * @new:  new value to put in
@@ -61,7 +61,7 @@ static inline void atomic_set(atomic_t *v, int new)
  *
  * "old" is "expected" old val, __oldval is actual old value
  */
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int __oldval;
 
@@ -81,7 +81,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 }
 
 #define ATOMIC_OP(op)							\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	int output;							\
 									\
@@ -97,7 +97,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op)						\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	int output;							\
 									\
@@ -114,7 +114,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op)						\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	int output, val;						\
 									\
@@ -148,7 +148,7 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP
 
 /**
- * atomic_fetch_add_unless - add unless the number is a given value
+ * arch_atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer to value
  * @a: amount to add
  * @u: unless value is equal to u
@@ -157,7 +157,7 @@ ATOMIC_OPS(xor)
  *
  */
 
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int __oldval;
 	register int tmp;
@@ -180,6 +180,6 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	);
 	return __oldval;
 }
-#define atomic_fetch_add_unless atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
 
 #endif
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 92b8a02e588ac..cdb705e1496af 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -42,7 +42,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
  * Atomically swap the contents of a register with memory.  Should be atomic
  * between multiple CPU's and within interrupts on the same CPU.
  */
-#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), \
+#define arch_xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), \
 	sizeof(*(ptr))))
 
 /*
@@ -51,7 +51,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
  *  variable casting.
  */
 
-#define cmpxchg(ptr, old, new)					\
+#define arch_cmpxchg(ptr, old, new)				\
 ({								\
 	__typeof__(ptr) __ptr = (ptr);				\
 	__typeof__(*(ptr)) __old = (old);			\
-- 
GitLab


From f84f1b9c47a55eb8db4ba5270a504f78c316ce1d Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:18 +0100
Subject: [PATCH 1614/3804] locking/atomic: ia64: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates ia64 to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-20-mark.rutland@arm.com
---
 arch/ia64/Kconfig                    |  1 +
 arch/ia64/include/asm/atomic.h       | 74 ++++++++++++++--------------
 arch/ia64/include/asm/cmpxchg.h      | 16 ++++++
 arch/ia64/include/uapi/asm/cmpxchg.h | 10 ++--
 4 files changed, 61 insertions(+), 40 deletions(-)
 create mode 100644 arch/ia64/include/asm/cmpxchg.h

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 279252e3e0f74..c5414dcd5d0de 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -8,6 +8,7 @@ menu "Processor type and features"
 
 config IA64
 	bool
+	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_MARK_CLEAN
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index f267d956458f5..266c429b91372 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -21,11 +21,11 @@
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic_read(v)		READ_ONCE((v)->counter)
-#define atomic64_read(v)	READ_ONCE((v)->counter)
+#define arch_atomic_read(v)	READ_ONCE((v)->counter)
+#define arch_atomic64_read(v)	READ_ONCE((v)->counter)
 
-#define atomic_set(v,i)		WRITE_ONCE(((v)->counter), (i))
-#define atomic64_set(v,i)	WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic_set(v,i)	WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic64_set(v,i)	WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op, c_op)						\
 static __inline__ int							\
@@ -36,7 +36,7 @@ ia64_atomic_##op (int i, atomic_t *v)					\
 									\
 	do {								\
 		CMPXCHG_BUGCHECK(v);					\
-		old = atomic_read(v);					\
+		old = arch_atomic_read(v);				\
 		new = old c_op i;					\
 	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \
 	return new;							\
@@ -51,7 +51,7 @@ ia64_atomic_fetch_##op (int i, atomic_t *v)				\
 									\
 	do {								\
 		CMPXCHG_BUGCHECK(v);					\
-		old = atomic_read(v);					\
+		old = arch_atomic_read(v);				\
 		new = old c_op i;					\
 	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \
 	return old;							\
@@ -74,7 +74,7 @@ ATOMIC_OPS(sub, -)
 #define __ia64_atomic_const(i)	0
 #endif
 
-#define atomic_add_return(i,v)						\
+#define arch_atomic_add_return(i,v)					\
 ({									\
 	int __ia64_aar_i = (i);						\
 	__ia64_atomic_const(i)						\
@@ -82,7 +82,7 @@ ATOMIC_OPS(sub, -)
 		: ia64_atomic_add(__ia64_aar_i, v);			\
 })
 
-#define atomic_sub_return(i,v)						\
+#define arch_atomic_sub_return(i,v)					\
 ({									\
 	int __ia64_asr_i = (i);						\
 	__ia64_atomic_const(i)						\
@@ -90,7 +90,7 @@ ATOMIC_OPS(sub, -)
 		: ia64_atomic_sub(__ia64_asr_i, v);			\
 })
 
-#define atomic_fetch_add(i,v)						\
+#define arch_atomic_fetch_add(i,v)					\
 ({									\
 	int __ia64_aar_i = (i);						\
 	__ia64_atomic_const(i)						\
@@ -98,7 +98,7 @@ ATOMIC_OPS(sub, -)
 		: ia64_atomic_fetch_add(__ia64_aar_i, v);		\
 })
 
-#define atomic_fetch_sub(i,v)						\
+#define arch_atomic_fetch_sub(i,v)					\
 ({									\
 	int __ia64_asr_i = (i);						\
 	__ia64_atomic_const(i)						\
@@ -110,13 +110,13 @@ ATOMIC_FETCH_OP(and, &)
 ATOMIC_FETCH_OP(or, |)
 ATOMIC_FETCH_OP(xor, ^)
 
-#define atomic_and(i,v)	(void)ia64_atomic_fetch_and(i,v)
-#define atomic_or(i,v)	(void)ia64_atomic_fetch_or(i,v)
-#define atomic_xor(i,v)	(void)ia64_atomic_fetch_xor(i,v)
+#define arch_atomic_and(i,v)	(void)ia64_atomic_fetch_and(i,v)
+#define arch_atomic_or(i,v)	(void)ia64_atomic_fetch_or(i,v)
+#define arch_atomic_xor(i,v)	(void)ia64_atomic_fetch_xor(i,v)
 
-#define atomic_fetch_and(i,v)	ia64_atomic_fetch_and(i,v)
-#define atomic_fetch_or(i,v)	ia64_atomic_fetch_or(i,v)
-#define atomic_fetch_xor(i,v)	ia64_atomic_fetch_xor(i,v)
+#define arch_atomic_fetch_and(i,v)	ia64_atomic_fetch_and(i,v)
+#define arch_atomic_fetch_or(i,v)	ia64_atomic_fetch_or(i,v)
+#define arch_atomic_fetch_xor(i,v)	ia64_atomic_fetch_xor(i,v)
 
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP
@@ -131,7 +131,7 @@ ia64_atomic64_##op (s64 i, atomic64_t *v)				\
 									\
 	do {								\
 		CMPXCHG_BUGCHECK(v);					\
-		old = atomic64_read(v);					\
+		old = arch_atomic64_read(v);				\
 		new = old c_op i;					\
 	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \
 	return new;							\
@@ -146,7 +146,7 @@ ia64_atomic64_fetch_##op (s64 i, atomic64_t *v)				\
 									\
 	do {								\
 		CMPXCHG_BUGCHECK(v);					\
-		old = atomic64_read(v);					\
+		old = arch_atomic64_read(v);				\
 		new = old c_op i;					\
 	} while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \
 	return old;							\
@@ -159,7 +159,7 @@ ia64_atomic64_fetch_##op (s64 i, atomic64_t *v)				\
 ATOMIC64_OPS(add, +)
 ATOMIC64_OPS(sub, -)
 
-#define atomic64_add_return(i,v)					\
+#define arch_atomic64_add_return(i,v)					\
 ({									\
 	s64 __ia64_aar_i = (i);						\
 	__ia64_atomic_const(i)						\
@@ -167,7 +167,7 @@ ATOMIC64_OPS(sub, -)
 		: ia64_atomic64_add(__ia64_aar_i, v);			\
 })
 
-#define atomic64_sub_return(i,v)					\
+#define arch_atomic64_sub_return(i,v)					\
 ({									\
 	s64 __ia64_asr_i = (i);						\
 	__ia64_atomic_const(i)						\
@@ -175,7 +175,7 @@ ATOMIC64_OPS(sub, -)
 		: ia64_atomic64_sub(__ia64_asr_i, v);			\
 })
 
-#define atomic64_fetch_add(i,v)						\
+#define arch_atomic64_fetch_add(i,v)					\
 ({									\
 	s64 __ia64_aar_i = (i);						\
 	__ia64_atomic_const(i)						\
@@ -183,7 +183,7 @@ ATOMIC64_OPS(sub, -)
 		: ia64_atomic64_fetch_add(__ia64_aar_i, v);		\
 })
 
-#define atomic64_fetch_sub(i,v)						\
+#define arch_atomic64_fetch_sub(i,v)					\
 ({									\
 	s64 __ia64_asr_i = (i);						\
 	__ia64_atomic_const(i)						\
@@ -195,29 +195,29 @@ ATOMIC64_FETCH_OP(and, &)
 ATOMIC64_FETCH_OP(or, |)
 ATOMIC64_FETCH_OP(xor, ^)
 
-#define atomic64_and(i,v)	(void)ia64_atomic64_fetch_and(i,v)
-#define atomic64_or(i,v)	(void)ia64_atomic64_fetch_or(i,v)
-#define atomic64_xor(i,v)	(void)ia64_atomic64_fetch_xor(i,v)
+#define arch_atomic64_and(i,v)	(void)ia64_atomic64_fetch_and(i,v)
+#define arch_atomic64_or(i,v)	(void)ia64_atomic64_fetch_or(i,v)
+#define arch_atomic64_xor(i,v)	(void)ia64_atomic64_fetch_xor(i,v)
 
-#define atomic64_fetch_and(i,v)	ia64_atomic64_fetch_and(i,v)
-#define atomic64_fetch_or(i,v)	ia64_atomic64_fetch_or(i,v)
-#define atomic64_fetch_xor(i,v)	ia64_atomic64_fetch_xor(i,v)
+#define arch_atomic64_fetch_and(i,v)	ia64_atomic64_fetch_and(i,v)
+#define arch_atomic64_fetch_or(i,v)	ia64_atomic64_fetch_or(i,v)
+#define arch_atomic64_fetch_xor(i,v)	ia64_atomic64_fetch_xor(i,v)
 
 #undef ATOMIC64_OPS
 #undef ATOMIC64_FETCH_OP
 #undef ATOMIC64_OP
 
-#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), old, new))
+#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
 
-#define atomic64_cmpxchg(v, old, new) \
-	(cmpxchg(&((v)->counter), old, new))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic64_cmpxchg(v, old, new) \
+	(arch_cmpxchg(&((v)->counter), old, new))
+#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
 
-#define atomic_add(i,v)			(void)atomic_add_return((i), (v))
-#define atomic_sub(i,v)			(void)atomic_sub_return((i), (v))
+#define arch_atomic_add(i,v)		(void)arch_atomic_add_return((i), (v))
+#define arch_atomic_sub(i,v)		(void)arch_atomic_sub_return((i), (v))
 
-#define atomic64_add(i,v)		(void)atomic64_add_return((i), (v))
-#define atomic64_sub(i,v)		(void)atomic64_sub_return((i), (v))
+#define arch_atomic64_add(i,v)		(void)arch_atomic64_add_return((i), (v))
+#define arch_atomic64_sub(i,v)		(void)arch_atomic64_sub_return((i), (v))
 
 #endif /* _ASM_IA64_ATOMIC_H */
diff --git a/arch/ia64/include/asm/cmpxchg.h b/arch/ia64/include/asm/cmpxchg.h
new file mode 100644
index 0000000000000..94ef844298431
--- /dev/null
+++ b/arch/ia64/include/asm/cmpxchg.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_IA64_CMPXCHG_H
+#define _ASM_IA64_CMPXCHG_H
+
+#include <uapi/asm/cmpxchg.h>
+
+#define arch_xchg(ptr, x)	\
+({(__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr)));})
+
+#define arch_cmpxchg(ptr, o, n)		cmpxchg_acq((ptr), (o), (n))
+#define arch_cmpxchg64(ptr, o, n)	cmpxchg_acq((ptr), (o), (n))
+
+#define arch_cmpxchg_local		arch_cmpxchg
+#define arch_cmpxchg64_local		arch_cmpxchg64
+
+#endif /* _ASM_IA64_CMPXCHG_H */
diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h
index 5d90307fd6e07..926c6cb1e0297 100644
--- a/arch/ia64/include/uapi/asm/cmpxchg.h
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_CMPXCHG_H
-#define _ASM_IA64_CMPXCHG_H
+#ifndef _UAPI_ASM_IA64_CMPXCHG_H
+#define _UAPI_ASM_IA64_CMPXCHG_H
 
 /*
  * Compare/Exchange, forked from asm/intrinsics.h
@@ -53,8 +53,10 @@ extern void ia64_xchg_called_with_bad_pointer(void);
 	__xchg_result;							\
 })
 
+#ifndef __KERNEL__
 #define xchg(ptr, x)							\
 ({(__typeof__(*(ptr))) __xchg((unsigned long) (x), (ptr), sizeof(*(ptr)));})
+#endif
 
 /*
  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
@@ -126,12 +128,14 @@ extern long ia64_cmpxchg_called_with_bad_pointer(void);
  * we had to back-pedal and keep the "legacy" behavior of a full fence :-(
  */
 
+#ifndef __KERNEL__
 /* for compatibility with other platforms: */
 #define cmpxchg(ptr, o, n)	cmpxchg_acq((ptr), (o), (n))
 #define cmpxchg64(ptr, o, n)	cmpxchg_acq((ptr), (o), (n))
 
 #define cmpxchg_local		cmpxchg
 #define cmpxchg64_local		cmpxchg64
+#endif
 
 #ifdef CONFIG_IA64_DEBUG_CMPXCHG
 # define CMPXCHG_BUGCHECK_DECL	int _cmpxchg_bugcheck_count = 128;
@@ -152,4 +156,4 @@ do {									\
 
 #endif /* !__ASSEMBLY__ */
 
-#endif /* _ASM_IA64_CMPXCHG_H */
+#endif /* _UAPI_ASM_IA64_CMPXCHG_H */
-- 
GitLab


From e86e793c28e76ab5a0288c468713ab513b79fdd0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:19 +0100
Subject: [PATCH 1615/3804] locking/atomic: m68k: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates m68k to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

While atomic_dec_and_test_lt() is not part of the common atomic API, it
is also given an `arch_` prefix for consistency.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Greg Ungerer <gerg@linux-m68k.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-21-mark.rutland@arm.com
---
 arch/m68k/Kconfig                   |  1 +
 arch/m68k/include/asm/atomic.h      | 60 ++++++++++++++---------------
 arch/m68k/include/asm/cmpxchg.h     | 10 ++---
 arch/m68k/include/asm/mmu_context.h |  2 +-
 4 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 372e4e69c43ac..d1d91ac47f514 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -3,6 +3,7 @@ config M68K
 	bool
 	default y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 756c5cc58f944..8637bf8a2f652 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -16,8 +16,8 @@
  * We do not have SMP m68k systems, so we don't have to deal with that.
  */
 
-#define atomic_read(v)		READ_ONCE((v)->counter)
-#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic_read(v)	READ_ONCE((v)->counter)
+#define arch_atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 /*
  * The ColdFire parts cannot do some immediate to memory operations,
@@ -30,7 +30,7 @@
 #endif
 
 #define ATOMIC_OP(op, c_op, asm_op)					\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	__asm__ __volatile__(#asm_op "l %1,%0" : "+m" (*v) : ASM_DI (i));\
 }									\
@@ -38,7 +38,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 #ifdef CONFIG_RMW_INSNS
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	int t, tmp;							\
 									\
@@ -48,12 +48,12 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 			"	casl %2,%1,%0\n"			\
 			"	jne 1b"					\
 			: "+m" (*v), "=&d" (t), "=&d" (tmp)		\
-			: "g" (i), "2" (atomic_read(v)));		\
+			: "g" (i), "2" (arch_atomic_read(v)));		\
 	return t;							\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	int t, tmp;							\
 									\
@@ -63,14 +63,14 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 			"	casl %2,%1,%0\n"			\
 			"	jne 1b"					\
 			: "+m" (*v), "=&d" (t), "=&d" (tmp)		\
-			: "g" (i), "2" (atomic_read(v)));		\
+			: "g" (i), "2" (arch_atomic_read(v)));		\
 	return tmp;							\
 }
 
 #else
 
 #define ATOMIC_OP_RETURN(op, c_op, asm_op)				\
-static inline int atomic_##op##_return(int i, atomic_t * v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t * v)	\
 {									\
 	unsigned long flags;						\
 	int t;								\
@@ -83,7 +83,7 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op, asm_op)				\
-static inline int atomic_fetch_##op(int i, atomic_t * v)		\
+static inline int arch_atomic_fetch_##op(int i, atomic_t * v)		\
 {									\
 	unsigned long flags;						\
 	int t;								\
@@ -120,27 +120,27 @@ ATOMIC_OPS(xor, ^=, eor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-static inline void atomic_inc(atomic_t *v)
+static inline void arch_atomic_inc(atomic_t *v)
 {
 	__asm__ __volatile__("addql #1,%0" : "+m" (*v));
 }
-#define atomic_inc atomic_inc
+#define arch_atomic_inc arch_atomic_inc
 
-static inline void atomic_dec(atomic_t *v)
+static inline void arch_atomic_dec(atomic_t *v)
 {
 	__asm__ __volatile__("subql #1,%0" : "+m" (*v));
 }
-#define atomic_dec atomic_dec
+#define arch_atomic_dec arch_atomic_dec
 
-static inline int atomic_dec_and_test(atomic_t *v)
+static inline int arch_atomic_dec_and_test(atomic_t *v)
 {
 	char c;
 	__asm__ __volatile__("subql #1,%1; seq %0" : "=d" (c), "+m" (*v));
 	return c != 0;
 }
-#define atomic_dec_and_test atomic_dec_and_test
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
 
-static inline int atomic_dec_and_test_lt(atomic_t *v)
+static inline int arch_atomic_dec_and_test_lt(atomic_t *v)
 {
 	char c;
 	__asm__ __volatile__(
@@ -150,49 +150,49 @@ static inline int atomic_dec_and_test_lt(atomic_t *v)
 	return c != 0;
 }
 
-static inline int atomic_inc_and_test(atomic_t *v)
+static inline int arch_atomic_inc_and_test(atomic_t *v)
 {
 	char c;
 	__asm__ __volatile__("addql #1,%1; seq %0" : "=d" (c), "+m" (*v));
 	return c != 0;
 }
-#define atomic_inc_and_test atomic_inc_and_test
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
 
 #ifdef CONFIG_RMW_INSNS
 
-#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
 
 #else /* !CONFIG_RMW_INSNS */
 
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	unsigned long flags;
 	int prev;
 
 	local_irq_save(flags);
-	prev = atomic_read(v);
+	prev = arch_atomic_read(v);
 	if (prev == old)
-		atomic_set(v, new);
+		arch_atomic_set(v, new);
 	local_irq_restore(flags);
 	return prev;
 }
 
-static inline int atomic_xchg(atomic_t *v, int new)
+static inline int arch_atomic_xchg(atomic_t *v, int new)
 {
 	unsigned long flags;
 	int prev;
 
 	local_irq_save(flags);
-	prev = atomic_read(v);
-	atomic_set(v, new);
+	prev = arch_atomic_read(v);
+	arch_atomic_set(v, new);
 	local_irq_restore(flags);
 	return prev;
 }
 
 #endif /* !CONFIG_RMW_INSNS */
 
-static inline int atomic_sub_and_test(int i, atomic_t *v)
+static inline int arch_atomic_sub_and_test(int i, atomic_t *v)
 {
 	char c;
 	__asm__ __volatile__("subl %2,%1; seq %0"
@@ -200,9 +200,9 @@ static inline int atomic_sub_and_test(int i, atomic_t *v)
 			     : ASM_DI (i));
 	return c != 0;
 }
-#define atomic_sub_and_test atomic_sub_and_test
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
 
-static inline int atomic_add_negative(int i, atomic_t *v)
+static inline int arch_atomic_add_negative(int i, atomic_t *v)
 {
 	char c;
 	__asm__ __volatile__("addl %2,%1; smi %0"
@@ -210,6 +210,6 @@ static inline int atomic_add_negative(int i, atomic_t *v)
 			     : ASM_DI (i));
 	return c != 0;
 }
-#define atomic_add_negative atomic_add_negative
+#define arch_atomic_add_negative arch_atomic_add_negative
 
 #endif /* __ARCH_M68K_ATOMIC __ */
diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h
index 7629c9c1ed5b3..e8ca4b0ccefaa 100644
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -76,11 +76,11 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 }
 #endif
 
-#define xchg(ptr,x) ({(__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)));})
+#define arch_xchg(ptr,x) ({(__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)));})
 
 #include <asm-generic/cmpxchg-local.h>
 
-#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 
 extern unsigned long __invalid_cmpxchg_size(volatile void *,
 					    unsigned long, unsigned long, int);
@@ -118,14 +118,14 @@ static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
 	return old;
 }
 
-#define cmpxchg(ptr, o, n)						    \
+#define arch_cmpxchg(ptr, o, n)						    \
 	({(__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	    \
 			(unsigned long)(n), sizeof(*(ptr)));})
-#define cmpxchg_local(ptr, o, n)					    \
+#define arch_cmpxchg_local(ptr, o, n)					    \
 	({(__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o),	    \
 			(unsigned long)(n), sizeof(*(ptr)));})
 
-#define cmpxchg64(ptr, o, n)	cmpxchg64_local((ptr), (o), (n))
+#define arch_cmpxchg64(ptr, o, n)	arch_cmpxchg64_local((ptr), (o), (n))
 
 #else
 
diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h
index a5d3588558784..8ed6ac14d99ff 100644
--- a/arch/m68k/include/asm/mmu_context.h
+++ b/arch/m68k/include/asm/mmu_context.h
@@ -31,7 +31,7 @@ static inline void get_mmu_context(struct mm_struct *mm)
 
 	if (mm->context != NO_CONTEXT)
 		return;
-	while (atomic_dec_and_test_lt(&nr_free_contexts)) {
+	while (arch_atomic_dec_and_test_lt(&nr_free_contexts)) {
 		atomic_inc(&nr_free_contexts);
 		steal_context();
 	}
-- 
GitLab


From f5b1c0f951e7b0d5634b82d57971cae25a0ba435 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:20 +0100
Subject: [PATCH 1616/3804] locking/atomic: microblaze: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates microblaze to ARCH_ATOMIC,
using the asm-generic implementations.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-22-mark.rutland@arm.com
---
 arch/microblaze/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 0660f47012bcb..5a52922dc225c 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -2,6 +2,7 @@
 config MICROBLAZE
 	def_bool y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_NO_SWAP
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_GCOV_PROFILE_ALL
-- 
GitLab


From c7b5fd6faa1dc6cdc721a978d9d122cd31bbd7b1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:21 +0100
Subject: [PATCH 1617/3804] locking/atomic: mips: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates mips to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-23-mark.rutland@arm.com
---
 arch/mips/Kconfig               |  1 +
 arch/mips/include/asm/atomic.h  | 55 +++++++++++++++++----------------
 arch/mips/include/asm/cmpxchg.h | 22 ++++++-------
 arch/mips/kernel/cmpxchg.c      |  4 +--
 4 files changed, 43 insertions(+), 39 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ed51970c08e75..55b4da96872f9 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -3,6 +3,7 @@ config MIPS
 	bool
 	default y
 	select ARCH_32BIT_OFF_T if !64BIT
+	select ARCH_ATOMIC
 	select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
 	select ARCH_HAS_DEBUG_VIRTUAL if !64BIT
 	select ARCH_HAS_FORTIFY_SOURCE
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 27ad767915390..95e1f7f3597f4 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -25,24 +25,25 @@
 #include <asm/war.h>
 
 #define ATOMIC_OPS(pfx, type)						\
-static __always_inline type pfx##_read(const pfx##_t *v)		\
+static __always_inline type arch_##pfx##_read(const pfx##_t *v)		\
 {									\
 	return READ_ONCE(v->counter);					\
 }									\
 									\
-static __always_inline void pfx##_set(pfx##_t *v, type i)		\
+static __always_inline void arch_##pfx##_set(pfx##_t *v, type i)	\
 {									\
 	WRITE_ONCE(v->counter, i);					\
 }									\
 									\
-static __always_inline type pfx##_cmpxchg(pfx##_t *v, type o, type n)	\
+static __always_inline type						\
+arch_##pfx##_cmpxchg(pfx##_t *v, type o, type n)			\
 {									\
-	return cmpxchg(&v->counter, o, n);				\
+	return arch_cmpxchg(&v->counter, o, n);				\
 }									\
 									\
-static __always_inline type pfx##_xchg(pfx##_t *v, type n)		\
+static __always_inline type arch_##pfx##_xchg(pfx##_t *v, type n)	\
 {									\
-	return xchg(&v->counter, n);					\
+	return arch_xchg(&v->counter, n);				\
 }
 
 ATOMIC_OPS(atomic, int)
@@ -53,7 +54,7 @@ ATOMIC_OPS(atomic64, s64)
 #endif
 
 #define ATOMIC_OP(pfx, op, type, c_op, asm_op, ll, sc)			\
-static __inline__ void pfx##_##op(type i, pfx##_t * v)			\
+static __inline__ void arch_##pfx##_##op(type i, pfx##_t * v)		\
 {									\
 	type temp;							\
 									\
@@ -80,7 +81,8 @@ static __inline__ void pfx##_##op(type i, pfx##_t * v)			\
 }
 
 #define ATOMIC_OP_RETURN(pfx, op, type, c_op, asm_op, ll, sc)		\
-static __inline__ type pfx##_##op##_return_relaxed(type i, pfx##_t * v)	\
+static __inline__ type							\
+arch_##pfx##_##op##_return_relaxed(type i, pfx##_t * v)			\
 {									\
 	type temp, result;						\
 									\
@@ -113,7 +115,8 @@ static __inline__ type pfx##_##op##_return_relaxed(type i, pfx##_t * v)	\
 }
 
 #define ATOMIC_FETCH_OP(pfx, op, type, c_op, asm_op, ll, sc)		\
-static __inline__ type pfx##_fetch_##op##_relaxed(type i, pfx##_t * v)	\
+static __inline__ type							\
+arch_##pfx##_fetch_##op##_relaxed(type i, pfx##_t * v)			\
 {									\
 	int temp, result;						\
 									\
@@ -153,18 +156,18 @@ static __inline__ type pfx##_fetch_##op##_relaxed(type i, pfx##_t * v)	\
 ATOMIC_OPS(atomic, add, int, +=, addu, ll, sc)
 ATOMIC_OPS(atomic, sub, int, -=, subu, ll, sc)
 
-#define atomic_add_return_relaxed	atomic_add_return_relaxed
-#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
-#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
+#define arch_atomic_add_return_relaxed	arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed	arch_atomic_sub_return_relaxed
+#define arch_atomic_fetch_add_relaxed	arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed	arch_atomic_fetch_sub_relaxed
 
 #ifdef CONFIG_64BIT
 ATOMIC_OPS(atomic64, add, s64, +=, daddu, lld, scd)
 ATOMIC_OPS(atomic64, sub, s64, -=, dsubu, lld, scd)
-# define atomic64_add_return_relaxed	atomic64_add_return_relaxed
-# define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
-# define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
-# define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
+# define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+# define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+# define arch_atomic64_fetch_add_relaxed	arch_atomic64_fetch_add_relaxed
+# define arch_atomic64_fetch_sub_relaxed	arch_atomic64_fetch_sub_relaxed
 #endif /* CONFIG_64BIT */
 
 #undef ATOMIC_OPS
@@ -176,17 +179,17 @@ ATOMIC_OPS(atomic, and, int, &=, and, ll, sc)
 ATOMIC_OPS(atomic, or, int, |=, or, ll, sc)
 ATOMIC_OPS(atomic, xor, int, ^=, xor, ll, sc)
 
-#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
-#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and_relaxed	arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed	arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed	arch_atomic_fetch_xor_relaxed
 
 #ifdef CONFIG_64BIT
 ATOMIC_OPS(atomic64, and, s64, &=, and, lld, scd)
 ATOMIC_OPS(atomic64, or, s64, |=, or, lld, scd)
 ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd)
-# define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
-# define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
-# define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
+# define arch_atomic64_fetch_and_relaxed	arch_atomic64_fetch_and_relaxed
+# define arch_atomic64_fetch_or_relaxed		arch_atomic64_fetch_or_relaxed
+# define arch_atomic64_fetch_xor_relaxed	arch_atomic64_fetch_xor_relaxed
 #endif
 
 #undef ATOMIC_OPS
@@ -203,7 +206,7 @@ ATOMIC_OPS(atomic64, xor, s64, ^=, xor, lld, scd)
  * The function returns the old value of @v minus @i.
  */
 #define ATOMIC_SIP_OP(pfx, type, op, ll, sc)				\
-static __inline__ int pfx##_sub_if_positive(type i, pfx##_t * v)	\
+static __inline__ int arch_##pfx##_sub_if_positive(type i, pfx##_t * v)	\
 {									\
 	type temp, result;						\
 									\
@@ -255,11 +258,11 @@ static __inline__ int pfx##_sub_if_positive(type i, pfx##_t * v)	\
 }
 
 ATOMIC_SIP_OP(atomic, int, subu, ll, sc)
-#define atomic_dec_if_positive(v)	atomic_sub_if_positive(1, v)
+#define arch_atomic_dec_if_positive(v)	arch_atomic_sub_if_positive(1, v)
 
 #ifdef CONFIG_64BIT
 ATOMIC_SIP_OP(atomic64, s64, dsubu, lld, scd)
-#define atomic64_dec_if_positive(v)	atomic64_sub_if_positive(1, v)
+#define arch_atomic64_dec_if_positive(v)	arch_atomic64_sub_if_positive(1, v)
 #endif
 
 #undef ATOMIC_SIP_OP
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index c7e0455d4d462..0b983800f48b7 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -90,7 +90,7 @@ unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
 	}
 }
 
-#define xchg(ptr, x)							\
+#define arch_xchg(ptr, x)						\
 ({									\
 	__typeof__(*(ptr)) __res;					\
 									\
@@ -175,14 +175,14 @@ unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	}
 }
 
-#define cmpxchg_local(ptr, old, new)					\
+#define arch_cmpxchg_local(ptr, old, new)				\
 	((__typeof__(*(ptr)))						\
 		__cmpxchg((ptr),					\
 			  (unsigned long)(__typeof__(*(ptr)))(old),	\
 			  (unsigned long)(__typeof__(*(ptr)))(new),	\
 			  sizeof(*(ptr))))
 
-#define cmpxchg(ptr, old, new)						\
+#define arch_cmpxchg(ptr, old, new)					\
 ({									\
 	__typeof__(*(ptr)) __res;					\
 									\
@@ -194,7 +194,7 @@ unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	if (__SYNC_loongson3_war == 0)					\
 		smp_mb__before_llsc();					\
 									\
-	__res = cmpxchg_local((ptr), (old), (new));			\
+	__res = arch_cmpxchg_local((ptr), (old), (new));		\
 									\
 	/*								\
 	 * In the Loongson3 workaround case __cmpxchg_asm() already	\
@@ -208,21 +208,21 @@ unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 })
 
 #ifdef CONFIG_64BIT
-#define cmpxchg64_local(ptr, o, n)					\
+#define arch_cmpxchg64_local(ptr, o, n)					\
   ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
+	arch_cmpxchg_local((ptr), (o), (n));				\
   })
 
-#define cmpxchg64(ptr, o, n)						\
+#define arch_cmpxchg64(ptr, o, n)					\
   ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg((ptr), (o), (n));					\
+	arch_cmpxchg((ptr), (o), (n));					\
   })
 #else
 
 # include <asm-generic/cmpxchg-local.h>
-# define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+# define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 
 # ifdef CONFIG_SMP
 
@@ -294,7 +294,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 	return ret;
 }
 
-#  define cmpxchg64(ptr, o, n) ({					\
+#  define arch_cmpxchg64(ptr, o, n) ({					\
 	unsigned long long __old = (__typeof__(*(ptr)))(o);		\
 	unsigned long long __new = (__typeof__(*(ptr)))(n);		\
 	__typeof__(*(ptr)) __res;					\
@@ -317,7 +317,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr,
 })
 
 # else /* !CONFIG_SMP */
-#  define cmpxchg64(ptr, o, n) cmpxchg64_local((ptr), (o), (n))
+#  define arch_cmpxchg64(ptr, o, n) arch_cmpxchg64_local((ptr), (o), (n))
 # endif /* !CONFIG_SMP */
 #endif /* !CONFIG_64BIT */
 
diff --git a/arch/mips/kernel/cmpxchg.c b/arch/mips/kernel/cmpxchg.c
index 89107deb03fcb..ac9c8cfb2ba9e 100644
--- a/arch/mips/kernel/cmpxchg.c
+++ b/arch/mips/kernel/cmpxchg.c
@@ -41,7 +41,7 @@ unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int s
 	do {
 		old32 = load32;
 		new32 = (load32 & ~mask) | (val << shift);
-		load32 = cmpxchg(ptr32, old32, new32);
+		load32 = arch_cmpxchg(ptr32, old32, new32);
 	} while (load32 != old32);
 
 	return (load32 & mask) >> shift;
@@ -97,7 +97,7 @@ unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old,
 		 */
 		old32 = (load32 & ~mask) | (old << shift);
 		new32 = (load32 & ~mask) | (new << shift);
-		load32 = cmpxchg(ptr32, old32, new32);
+		load32 = arch_cmpxchg(ptr32, old32, new32);
 		if (load32 == old32)
 			return old;
 	}
-- 
GitLab


From 0cc70f54ee4394b49608f0aaee50c2b4109c3be6 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:22 +0100
Subject: [PATCH 1618/3804] locking/atomic: nds32: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates nds32 to ARCH_ATOMIC, using
the asm-generic implementations.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Nick Hu <nickhu@andestech.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-24-mark.rutland@arm.com
---
 arch/nds32/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 62313902d75d9..352913573aee0 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -7,6 +7,7 @@
 config NDS32
 	def_bool y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
-- 
GitLab


From 7e517b4c11200be3b0a941b33b26798a5e808dbc Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:23 +0100
Subject: [PATCH 1619/3804] locking/atomic: nios2: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates nios2 to ARCH_ATOMIC, using
the asm-generic implementations.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Ley Foon Tan <ley.foon.tan@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-25-mark.rutland@arm.com
---
 arch/nios2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index c24955c81c927..67dae88c5b53d 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -2,6 +2,7 @@
 config NIOS2
 	def_bool y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
-- 
GitLab


From 3f1e931d158124bbdd5c25300333096bfff805db Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:24 +0100
Subject: [PATCH 1620/3804] locking/atomic: openrisc: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates openrisc to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Stafford Horne <shorne@gmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-26-mark.rutland@arm.com
---
 arch/openrisc/Kconfig               |  1 +
 arch/openrisc/include/asm/atomic.h  | 42 ++++++++++++++++-------------
 arch/openrisc/include/asm/cmpxchg.h |  4 +--
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 591acc5990dc5..8c50bc9674f56 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -7,6 +7,7 @@
 config OPENRISC
 	def_bool y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_SET_UNCACHED
 	select ARCH_HAS_DMA_CLEAR_UNCACHED
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h
index cb86970d38591..326167e4783a9 100644
--- a/arch/openrisc/include/asm/atomic.h
+++ b/arch/openrisc/include/asm/atomic.h
@@ -13,7 +13,7 @@
 
 /* Atomically perform op with v->counter and i */
 #define ATOMIC_OP(op)							\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	int tmp;							\
 									\
@@ -30,7 +30,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 
 /* Atomically perform op with v->counter and i, return the result */
 #define ATOMIC_OP_RETURN(op)						\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	int tmp;							\
 									\
@@ -49,7 +49,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 
 /* Atomically perform op with v->counter and i, return orig v->counter */
 #define ATOMIC_FETCH_OP(op)						\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	int tmp, old;							\
 									\
@@ -75,6 +75,8 @@ ATOMIC_FETCH_OP(and)
 ATOMIC_FETCH_OP(or)
 ATOMIC_FETCH_OP(xor)
 
+ATOMIC_OP(add)
+ATOMIC_OP(sub)
 ATOMIC_OP(and)
 ATOMIC_OP(or)
 ATOMIC_OP(xor)
@@ -83,16 +85,18 @@ ATOMIC_OP(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_add_return	atomic_add_return
-#define atomic_sub_return	atomic_sub_return
-#define atomic_fetch_add	atomic_fetch_add
-#define atomic_fetch_sub	atomic_fetch_sub
-#define atomic_fetch_and	atomic_fetch_and
-#define atomic_fetch_or		atomic_fetch_or
-#define atomic_fetch_xor	atomic_fetch_xor
-#define atomic_and	atomic_and
-#define atomic_or	atomic_or
-#define atomic_xor	atomic_xor
+#define arch_atomic_add_return	arch_atomic_add_return
+#define arch_atomic_sub_return	arch_atomic_sub_return
+#define arch_atomic_fetch_add	arch_atomic_fetch_add
+#define arch_atomic_fetch_sub	arch_atomic_fetch_sub
+#define arch_atomic_fetch_and	arch_atomic_fetch_and
+#define arch_atomic_fetch_or	arch_atomic_fetch_or
+#define arch_atomic_fetch_xor	arch_atomic_fetch_xor
+#define arch_atomic_add		arch_atomic_add
+#define arch_atomic_sub		arch_atomic_sub
+#define arch_atomic_and		arch_atomic_and
+#define arch_atomic_or		arch_atomic_or
+#define arch_atomic_xor		arch_atomic_xor
 
 /*
  * Atomically add a to v->counter as long as v is not already u.
@@ -100,7 +104,7 @@ ATOMIC_OP(xor)
  *
  * This is often used through atomic_inc_not_zero()
  */
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int old, tmp;
 
@@ -119,14 +123,14 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 
 	return old;
 }
-#define atomic_fetch_add_unless	atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless	arch_atomic_fetch_add_unless
 
-#define atomic_read(v)			READ_ONCE((v)->counter)
-#define atomic_set(v,i)			WRITE_ONCE((v)->counter, (i))
+#define arch_atomic_read(v)		READ_ONCE((v)->counter)
+#define arch_atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
 
 #include <asm/cmpxchg.h>
 
-#define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
-#define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
+#define arch_atomic_xchg(ptr, v)		(arch_xchg(&(ptr)->counter, (v)))
+#define arch_atomic_cmpxchg(v, old, new)	(arch_cmpxchg(&((v)->counter), (old), (new)))
 
 #endif /* __ASM_OPENRISC_ATOMIC_H */
diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
index f9cd43a39d726..79fd16162ccb6 100644
--- a/arch/openrisc/include/asm/cmpxchg.h
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -132,7 +132,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 	}
 }
 
-#define cmpxchg(ptr, o, n)						\
+#define arch_cmpxchg(ptr, o, n)						\
 	({								\
 		(__typeof__(*(ptr))) __cmpxchg((ptr),			\
 					       (unsigned long)(o),	\
@@ -161,7 +161,7 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long with,
 	}
 }
 
-#define xchg(ptr, with) 						\
+#define arch_xchg(ptr, with) 						\
 	({								\
 		(__typeof__(*(ptr))) __xchg((ptr),			\
 					    (unsigned long)(with),	\
-- 
GitLab


From 329c161b8baeff5fff69fe37d3ebb4bcffef91fa Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:25 +0100
Subject: [PATCH 1621/3804] locking/atomic: parisc: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates parisc to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-27-mark.rutland@arm.com
---
 arch/parisc/Kconfig               |  1 +
 arch/parisc/include/asm/atomic.h  | 34 +++++++++++++++----------------
 arch/parisc/include/asm/cmpxchg.h | 12 +++++------
 3 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index bde9907bc5b25..bfa120a4add12 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -2,6 +2,7 @@
 config PARISC
 	def_bool y
 	select ARCH_32BIT_OFF_T if !64BIT
+	select ARCH_ATOMIC
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select HAVE_IDE
 	select HAVE_FUNCTION_TRACER
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index 21b375c67e533..dd5a299ada695 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -56,7 +56,7 @@ extern arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned;
  * are atomic, so a reader never sees inconsistent values.
  */
 
-static __inline__ void atomic_set(atomic_t *v, int i)
+static __inline__ void arch_atomic_set(atomic_t *v, int i)
 {
 	unsigned long flags;
 	_atomic_spin_lock_irqsave(v, flags);
@@ -66,19 +66,19 @@ static __inline__ void atomic_set(atomic_t *v, int i)
 	_atomic_spin_unlock_irqrestore(v, flags);
 }
 
-#define atomic_set_release(v, i)	atomic_set((v), (i))
+#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
 
-static __inline__ int atomic_read(const atomic_t *v)
+static __inline__ int arch_atomic_read(const atomic_t *v)
 {
 	return READ_ONCE((v)->counter);
 }
 
 /* exported interface */
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic_cmpxchg(v, o, n)	(arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic_xchg(v, new)	(arch_xchg(&((v)->counter), new))
 
 #define ATOMIC_OP(op, c_op)						\
-static __inline__ void atomic_##op(int i, atomic_t *v)			\
+static __inline__ void arch_atomic_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long flags;						\
 									\
@@ -88,7 +88,7 @@ static __inline__ void atomic_##op(int i, atomic_t *v)			\
 }
 
 #define ATOMIC_OP_RETURN(op, c_op)					\
-static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
+static __inline__ int arch_atomic_##op##_return(int i, atomic_t *v)	\
 {									\
 	unsigned long flags;						\
 	int ret;							\
@@ -101,7 +101,7 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op)					\
-static __inline__ int atomic_fetch_##op(int i, atomic_t *v)		\
+static __inline__ int arch_atomic_fetch_##op(int i, atomic_t *v)	\
 {									\
 	unsigned long flags;						\
 	int ret;							\
@@ -141,7 +141,7 @@ ATOMIC_OPS(xor, ^=)
 #define ATOMIC64_INIT(i) { (i) }
 
 #define ATOMIC64_OP(op, c_op)						\
-static __inline__ void atomic64_##op(s64 i, atomic64_t *v)		\
+static __inline__ void arch_atomic64_##op(s64 i, atomic64_t *v)		\
 {									\
 	unsigned long flags;						\
 									\
@@ -151,7 +151,7 @@ static __inline__ void atomic64_##op(s64 i, atomic64_t *v)		\
 }
 
 #define ATOMIC64_OP_RETURN(op, c_op)					\
-static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v)	\
+static __inline__ s64 arch_atomic64_##op##_return(s64 i, atomic64_t *v)	\
 {									\
 	unsigned long flags;						\
 	s64 ret;							\
@@ -164,7 +164,7 @@ static __inline__ s64 atomic64_##op##_return(s64 i, atomic64_t *v)	\
 }
 
 #define ATOMIC64_FETCH_OP(op, c_op)					\
-static __inline__ s64 atomic64_fetch_##op(s64 i, atomic64_t *v)		\
+static __inline__ s64 arch_atomic64_fetch_##op(s64 i, atomic64_t *v)	\
 {									\
 	unsigned long flags;						\
 	s64 ret;							\
@@ -200,7 +200,7 @@ ATOMIC64_OPS(xor, ^=)
 #undef ATOMIC64_OP
 
 static __inline__ void
-atomic64_set(atomic64_t *v, s64 i)
+arch_atomic64_set(atomic64_t *v, s64 i)
 {
 	unsigned long flags;
 	_atomic_spin_lock_irqsave(v, flags);
@@ -210,18 +210,18 @@ atomic64_set(atomic64_t *v, s64 i)
 	_atomic_spin_unlock_irqrestore(v, flags);
 }
 
-#define atomic64_set_release(v, i)	atomic64_set((v), (i))
+#define arch_atomic64_set_release(v, i)	arch_atomic64_set((v), (i))
 
 static __inline__ s64
-atomic64_read(const atomic64_t *v)
+arch_atomic64_read(const atomic64_t *v)
 {
 	return READ_ONCE((v)->counter);
 }
 
 /* exported interface */
-#define atomic64_cmpxchg(v, o, n) \
-	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic64_cmpxchg(v, o, n) \
+	((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
 
 #endif /* !CONFIG_64BIT */
 
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index c2015654b684d..5f274be105671 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -44,7 +44,7 @@ __xchg(unsigned long x, volatile void *ptr, int size)
 **		if (((unsigned long)p & 0xf) == 0)
 **			return __ldcw(p);
 */
-#define xchg(ptr, x)							\
+#define arch_xchg(ptr, x)						\
 ({									\
 	__typeof__(*(ptr)) __ret;					\
 	__typeof__(*(ptr)) _x_ = (x);					\
@@ -78,7 +78,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
 	return old;
 }
 
-#define cmpxchg(ptr, o, n)						 \
+#define arch_cmpxchg(ptr, o, n)						 \
 ({									 \
 	__typeof__(*(ptr)) _o_ = (o);					 \
 	__typeof__(*(ptr)) _n_ = (n);					 \
@@ -106,19 +106,19 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
  * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
  * them available.
  */
-#define cmpxchg_local(ptr, o, n)					\
+#define arch_cmpxchg_local(ptr, o, n)					\
 	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
 			(unsigned long)(n), sizeof(*(ptr))))
 #ifdef CONFIG_64BIT
-#define cmpxchg64_local(ptr, o, n)					\
+#define arch_cmpxchg64_local(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
 	cmpxchg_local((ptr), (o), (n));					\
 })
 #else
-#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 #endif
 
-#define cmpxchg64(ptr, o, n) __cmpxchg_u64(ptr, o, n)
+#define arch_cmpxchg64(ptr, o, n) __cmpxchg_u64(ptr, o, n)
 
 #endif /* _ASM_PARISC_CMPXCHG_H_ */
-- 
GitLab


From 9eaa82935dccb74a22e3da5045bed1dac59ad2b0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:26 +0100
Subject: [PATCH 1622/3804] locking/atomic: powerpc: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates powerpc to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

While atomic_try_cmpxchg_lock() is not part of the common atomic API, it
is given an `arch_` prefix for consistency.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-28-mark.rutland@arm.com
---
 arch/powerpc/Kconfig                 |   1 +
 arch/powerpc/include/asm/atomic.h    | 140 ++++++++++++++-------------
 arch/powerpc/include/asm/cmpxchg.h   |  30 +++---
 arch/powerpc/include/asm/qspinlock.h |   2 +-
 4 files changed, 90 insertions(+), 83 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 088dd2afcfe47..d143c2b616f08 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -118,6 +118,7 @@ config PPC
 	# Please keep this list sorted alphabetically.
 	#
 	select ARCH_32BIT_OFF_T if PPC32
+	select ARCH_ATOMIC
 	select ARCH_ENABLE_MEMORY_HOTPLUG
 	select ARCH_ENABLE_MEMORY_HOTREMOVE
 	select ARCH_HAS_COPY_MC			if PPC64
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 61c6e8b200e8c..a1732a79e92ac 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -23,7 +23,7 @@
 #define __atomic_release_fence()					\
 	__asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
 
-static __inline__ int atomic_read(const atomic_t *v)
+static __inline__ int arch_atomic_read(const atomic_t *v)
 {
 	int t;
 
@@ -32,13 +32,13 @@ static __inline__ int atomic_read(const atomic_t *v)
 	return t;
 }
 
-static __inline__ void atomic_set(atomic_t *v, int i)
+static __inline__ void arch_atomic_set(atomic_t *v, int i)
 {
 	__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
 }
 
 #define ATOMIC_OP(op, asm_op)						\
-static __inline__ void atomic_##op(int a, atomic_t *v)			\
+static __inline__ void arch_atomic_##op(int a, atomic_t *v)		\
 {									\
 	int t;								\
 									\
@@ -53,7 +53,7 @@ static __inline__ void atomic_##op(int a, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
-static inline int atomic_##op##_return_relaxed(int a, atomic_t *v)	\
+static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v)	\
 {									\
 	int t;								\
 									\
@@ -70,7 +70,7 @@ static inline int atomic_##op##_return_relaxed(int a, atomic_t *v)	\
 }
 
 #define ATOMIC_FETCH_OP_RELAXED(op, asm_op)				\
-static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
+static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
 {									\
 	int res, t;							\
 									\
@@ -94,11 +94,11 @@ static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
 ATOMIC_OPS(add, add)
 ATOMIC_OPS(sub, subf)
 
-#define atomic_add_return_relaxed atomic_add_return_relaxed
-#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
 
-#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
 
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, asm_op)						\
@@ -109,16 +109,16 @@ ATOMIC_OPS(and, and)
 ATOMIC_OPS(or, or)
 ATOMIC_OPS(xor, xor)
 
-#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
-#define atomic_fetch_or_relaxed  atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed  arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
 
 #undef ATOMIC_OPS
 #undef ATOMIC_FETCH_OP_RELAXED
 #undef ATOMIC_OP_RETURN_RELAXED
 #undef ATOMIC_OP
 
-static __inline__ void atomic_inc(atomic_t *v)
+static __inline__ void arch_atomic_inc(atomic_t *v)
 {
 	int t;
 
@@ -131,9 +131,9 @@ static __inline__ void atomic_inc(atomic_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
-#define atomic_inc atomic_inc
+#define arch_atomic_inc arch_atomic_inc
 
-static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
+static __inline__ int arch_atomic_inc_return_relaxed(atomic_t *v)
 {
 	int t;
 
@@ -149,7 +149,7 @@ static __inline__ int atomic_inc_return_relaxed(atomic_t *v)
 	return t;
 }
 
-static __inline__ void atomic_dec(atomic_t *v)
+static __inline__ void arch_atomic_dec(atomic_t *v)
 {
 	int t;
 
@@ -162,9 +162,9 @@ static __inline__ void atomic_dec(atomic_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
-#define atomic_dec atomic_dec
+#define arch_atomic_dec arch_atomic_dec
 
-static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
+static __inline__ int arch_atomic_dec_return_relaxed(atomic_t *v)
 {
 	int t;
 
@@ -180,17 +180,20 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
 	return t;
 }
 
-#define atomic_inc_return_relaxed atomic_inc_return_relaxed
-#define atomic_dec_return_relaxed atomic_dec_return_relaxed
+#define arch_atomic_inc_return_relaxed arch_atomic_inc_return_relaxed
+#define arch_atomic_dec_return_relaxed arch_atomic_dec_return_relaxed
 
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_cmpxchg_relaxed(v, o, n) \
-	cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic_cmpxchg_acquire(v, o, n) \
-	cmpxchg_acquire(&((v)->counter), (o), (n))
+#define arch_atomic_cmpxchg(v, o, n) \
+	(arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic_cmpxchg_relaxed(v, o, n) \
+	arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic_cmpxchg_acquire(v, o, n) \
+	arch_cmpxchg_acquire(&((v)->counter), (o), (n))
 
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+#define arch_atomic_xchg(v, new) \
+	(arch_xchg(&((v)->counter), new))
+#define arch_atomic_xchg_relaxed(v, new) \
+	arch_xchg_relaxed(&((v)->counter), (new))
 
 /*
  * Don't want to override the generic atomic_try_cmpxchg_acquire, because
@@ -199,7 +202,7 @@ static __inline__ int atomic_dec_return_relaxed(atomic_t *v)
  * would be a surprise).
  */
 static __always_inline bool
-atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
+arch_atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
 {
 	int r, o = *old;
 
@@ -229,7 +232,7 @@ atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int t;
 
@@ -250,7 +253,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 
 	return t;
 }
-#define atomic_fetch_add_unless atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
 
 /**
  * atomic_inc_not_zero - increment unless the number is zero
@@ -259,7 +262,7 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
  * Atomically increments @v by 1, so long as @v is non-zero.
  * Returns non-zero if @v was non-zero, and zero otherwise.
  */
-static __inline__ int atomic_inc_not_zero(atomic_t *v)
+static __inline__ int arch_atomic_inc_not_zero(atomic_t *v)
 {
 	int t1, t2;
 
@@ -280,14 +283,14 @@ static __inline__ int atomic_inc_not_zero(atomic_t *v)
 
 	return t1;
 }
-#define atomic_inc_not_zero(v) atomic_inc_not_zero((v))
+#define arch_atomic_inc_not_zero(v) arch_atomic_inc_not_zero((v))
 
 /*
  * Atomically test *v and decrement if it is greater than 0.
  * The function returns the old value of *v minus 1, even if
  * the atomic variable, v, was not decremented.
  */
-static __inline__ int atomic_dec_if_positive(atomic_t *v)
+static __inline__ int arch_atomic_dec_if_positive(atomic_t *v)
 {
 	int t;
 
@@ -307,13 +310,13 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
 
 	return t;
 }
-#define atomic_dec_if_positive atomic_dec_if_positive
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
 
 #ifdef __powerpc64__
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-static __inline__ s64 atomic64_read(const atomic64_t *v)
+static __inline__ s64 arch_atomic64_read(const atomic64_t *v)
 {
 	s64 t;
 
@@ -322,13 +325,13 @@ static __inline__ s64 atomic64_read(const atomic64_t *v)
 	return t;
 }
 
-static __inline__ void atomic64_set(atomic64_t *v, s64 i)
+static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i)
 {
 	__asm__ __volatile__("std%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
 }
 
 #define ATOMIC64_OP(op, asm_op)						\
-static __inline__ void atomic64_##op(s64 a, atomic64_t *v)		\
+static __inline__ void arch_atomic64_##op(s64 a, atomic64_t *v)		\
 {									\
 	s64 t;								\
 									\
@@ -344,7 +347,7 @@ static __inline__ void atomic64_##op(s64 a, atomic64_t *v)		\
 
 #define ATOMIC64_OP_RETURN_RELAXED(op, asm_op)				\
 static inline s64							\
-atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)			\
+arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)		\
 {									\
 	s64 t;								\
 									\
@@ -362,7 +365,7 @@ atomic64_##op##_return_relaxed(s64 a, atomic64_t *v)			\
 
 #define ATOMIC64_FETCH_OP_RELAXED(op, asm_op)				\
 static inline s64							\
-atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)			\
+arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)		\
 {									\
 	s64 res, t;							\
 									\
@@ -386,11 +389,11 @@ atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v)			\
 ATOMIC64_OPS(add, add)
 ATOMIC64_OPS(sub, subf)
 
-#define atomic64_add_return_relaxed atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
 
-#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
 
 #undef ATOMIC64_OPS
 #define ATOMIC64_OPS(op, asm_op)					\
@@ -401,16 +404,16 @@ ATOMIC64_OPS(and, and)
 ATOMIC64_OPS(or, or)
 ATOMIC64_OPS(xor, xor)
 
-#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed  atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or_relaxed  arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
 
 #undef ATOPIC64_OPS
 #undef ATOMIC64_FETCH_OP_RELAXED
 #undef ATOMIC64_OP_RETURN_RELAXED
 #undef ATOMIC64_OP
 
-static __inline__ void atomic64_inc(atomic64_t *v)
+static __inline__ void arch_atomic64_inc(atomic64_t *v)
 {
 	s64 t;
 
@@ -423,9 +426,9 @@ static __inline__ void atomic64_inc(atomic64_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
-#define atomic64_inc atomic64_inc
+#define arch_atomic64_inc arch_atomic64_inc
 
-static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
+static __inline__ s64 arch_atomic64_inc_return_relaxed(atomic64_t *v)
 {
 	s64 t;
 
@@ -441,7 +444,7 @@ static __inline__ s64 atomic64_inc_return_relaxed(atomic64_t *v)
 	return t;
 }
 
-static __inline__ void atomic64_dec(atomic64_t *v)
+static __inline__ void arch_atomic64_dec(atomic64_t *v)
 {
 	s64 t;
 
@@ -454,9 +457,9 @@ static __inline__ void atomic64_dec(atomic64_t *v)
 	: "r" (&v->counter)
 	: "cc", "xer");
 }
-#define atomic64_dec atomic64_dec
+#define arch_atomic64_dec arch_atomic64_dec
 
-static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
+static __inline__ s64 arch_atomic64_dec_return_relaxed(atomic64_t *v)
 {
 	s64 t;
 
@@ -472,14 +475,14 @@ static __inline__ s64 atomic64_dec_return_relaxed(atomic64_t *v)
 	return t;
 }
 
-#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
-#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
+#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
+#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
 
 /*
  * Atomically test *v and decrement if it is greater than 0.
  * The function returns the old value of *v minus 1.
  */
-static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
+static __inline__ s64 arch_atomic64_dec_if_positive(atomic64_t *v)
 {
 	s64 t;
 
@@ -498,16 +501,19 @@ static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
 
 	return t;
 }
-#define atomic64_dec_if_positive atomic64_dec_if_positive
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 
-#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_cmpxchg_relaxed(v, o, n) \
-	cmpxchg_relaxed(&((v)->counter), (o), (n))
-#define atomic64_cmpxchg_acquire(v, o, n) \
-	cmpxchg_acquire(&((v)->counter), (o), (n))
+#define arch_atomic64_cmpxchg(v, o, n) \
+	(arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic64_cmpxchg_relaxed(v, o, n) \
+	arch_cmpxchg_relaxed(&((v)->counter), (o), (n))
+#define arch_atomic64_cmpxchg_acquire(v, o, n) \
+	arch_cmpxchg_acquire(&((v)->counter), (o), (n))
 
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-#define atomic64_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
+#define arch_atomic64_xchg(v, new) \
+	(arch_xchg(&((v)->counter), new))
+#define arch_atomic64_xchg_relaxed(v, new) \
+	arch_xchg_relaxed(&((v)->counter), (new))
 
 /**
  * atomic64_fetch_add_unless - add unless the number is a given value
@@ -518,7 +524,7 @@ static __inline__ s64 atomic64_dec_if_positive(atomic64_t *v)
  * Atomically adds @a to @v, so long as it was not @u.
  * Returns the old value of @v.
  */
-static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	s64 t;
 
@@ -539,7 +545,7 @@ static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 
 	return t;
 }
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
 
 /**
  * atomic_inc64_not_zero - increment unless the number is zero
@@ -548,7 +554,7 @@ static __inline__ s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
  * Atomically increments @v by 1, so long as @v is non-zero.
  * Returns non-zero if @v was non-zero, and zero otherwise.
  */
-static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
+static __inline__ int arch_atomic64_inc_not_zero(atomic64_t *v)
 {
 	s64 t1, t2;
 
@@ -569,7 +575,7 @@ static __inline__ int atomic64_inc_not_zero(atomic64_t *v)
 
 	return t1 != 0;
 }
-#define atomic64_inc_not_zero(v) atomic64_inc_not_zero((v))
+#define arch_atomic64_inc_not_zero(v) arch_atomic64_inc_not_zero((v))
 
 #endif /* __powerpc64__ */
 
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index 69f52fdcf0647..05f246c0e36eb 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -185,14 +185,14 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
 	BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_local");
 	return x;
 }
-#define xchg_local(ptr,x)						     \
+#define arch_xchg_local(ptr,x)						     \
   ({									     \
      __typeof__(*(ptr)) _x_ = (x);					     \
      (__typeof__(*(ptr))) __xchg_local((ptr),				     \
      		(unsigned long)_x_, sizeof(*(ptr))); 			     \
   })
 
-#define xchg_relaxed(ptr, x)						\
+#define arch_xchg_relaxed(ptr, x)					\
 ({									\
 	__typeof__(*(ptr)) _x_ = (x);					\
 	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
@@ -467,7 +467,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
 	BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
 	return old;
 }
-#define cmpxchg(ptr, o, n)						 \
+#define arch_cmpxchg(ptr, o, n)						 \
   ({									 \
      __typeof__(*(ptr)) _o_ = (o);					 \
      __typeof__(*(ptr)) _n_ = (n);					 \
@@ -476,7 +476,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
   })
 
 
-#define cmpxchg_local(ptr, o, n)					 \
+#define arch_cmpxchg_local(ptr, o, n)					 \
   ({									 \
      __typeof__(*(ptr)) _o_ = (o);					 \
      __typeof__(*(ptr)) _n_ = (n);					 \
@@ -484,7 +484,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
 				    (unsigned long)_n_, sizeof(*(ptr))); \
   })
 
-#define cmpxchg_relaxed(ptr, o, n)					\
+#define arch_cmpxchg_relaxed(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -493,7 +493,7 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
 			sizeof(*(ptr)));				\
 })
 
-#define cmpxchg_acquire(ptr, o, n)					\
+#define arch_cmpxchg_acquire(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -502,29 +502,29 @@ __cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
 			sizeof(*(ptr)));				\
 })
 #ifdef CONFIG_PPC64
-#define cmpxchg64(ptr, o, n)						\
+#define arch_cmpxchg64(ptr, o, n)					\
   ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg((ptr), (o), (n));					\
+	arch_cmpxchg((ptr), (o), (n));					\
   })
-#define cmpxchg64_local(ptr, o, n)					\
+#define arch_cmpxchg64_local(ptr, o, n)					\
   ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_local((ptr), (o), (n));					\
+	arch_cmpxchg_local((ptr), (o), (n));				\
   })
-#define cmpxchg64_relaxed(ptr, o, n)					\
+#define arch_cmpxchg64_relaxed(ptr, o, n)				\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_relaxed((ptr), (o), (n));				\
+	arch_cmpxchg_relaxed((ptr), (o), (n));				\
 })
-#define cmpxchg64_acquire(ptr, o, n)					\
+#define arch_cmpxchg64_acquire(ptr, o, n)				\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_acquire((ptr), (o), (n));				\
+	arch_cmpxchg_acquire((ptr), (o), (n));				\
 })
 #else
 #include <asm-generic/cmpxchg-local.h>
-#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 07318bc63e3d0..b676c4fb90fd7 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -37,7 +37,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
 	u32 val = 0;
 
-	if (likely(atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
+	if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
 		return;
 
 	queued_spin_lock_slowpath(lock, val);
-- 
GitLab


From 9efbb355831014ca004d241db8ede182c019b9bf Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:27 +0100
Subject: [PATCH 1623/3804] locking/atomic: riscv: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates riscv to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Palmer Dabbelt <palmerdabbelt@google.com>
Acked-by: Palmer Dabbelt <palmerdabbelt@google.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-29-mark.rutland@arm.com
---
 arch/riscv/Kconfig               |   1 +
 arch/riscv/include/asm/atomic.h  | 128 +++++++++++++++----------------
 arch/riscv/include/asm/cmpxchg.h |  34 ++++----
 3 files changed, 82 insertions(+), 81 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index a8ad8eb761206..c59b9f4a9d62a 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -12,6 +12,7 @@ config 32BIT
 
 config RISCV
 	def_bool y
+	select ARCH_ATOMIC
 	select ARCH_CLOCKSOURCE_INIT
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
index 400a8c8b6de75..ac9bdf4fc4044 100644
--- a/arch/riscv/include/asm/atomic.h
+++ b/arch/riscv/include/asm/atomic.h
@@ -25,22 +25,22 @@
 #define __atomic_release_fence()					\
 	__asm__ __volatile__(RISCV_RELEASE_BARRIER "" ::: "memory");
 
-static __always_inline int atomic_read(const atomic_t *v)
+static __always_inline int arch_atomic_read(const atomic_t *v)
 {
 	return READ_ONCE(v->counter);
 }
-static __always_inline void atomic_set(atomic_t *v, int i)
+static __always_inline void arch_atomic_set(atomic_t *v, int i)
 {
 	WRITE_ONCE(v->counter, i);
 }
 
 #ifndef CONFIG_GENERIC_ATOMIC64
 #define ATOMIC64_INIT(i) { (i) }
-static __always_inline s64 atomic64_read(const atomic64_t *v)
+static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
 {
 	return READ_ONCE(v->counter);
 }
-static __always_inline void atomic64_set(atomic64_t *v, s64 i)
+static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
 {
 	WRITE_ONCE(v->counter, i);
 }
@@ -53,7 +53,7 @@ static __always_inline void atomic64_set(atomic64_t *v, s64 i)
  */
 #define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix)		\
 static __always_inline							\
-void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)		\
+void arch_atomic##prefix##_##op(c_type i, atomic##prefix##_t *v)	\
 {									\
 	__asm__ __volatile__ (						\
 		"	amo" #asm_op "." #asm_type " zero, %1, %0"	\
@@ -87,7 +87,7 @@ ATOMIC_OPS(xor, xor,  i)
  */
 #define ATOMIC_FETCH_OP(op, asm_op, I, asm_type, c_type, prefix)	\
 static __always_inline							\
-c_type atomic##prefix##_fetch_##op##_relaxed(c_type i,			\
+c_type arch_atomic##prefix##_fetch_##op##_relaxed(c_type i,		\
 					     atomic##prefix##_t *v)	\
 {									\
 	register c_type ret;						\
@@ -99,7 +99,7 @@ c_type atomic##prefix##_fetch_##op##_relaxed(c_type i,			\
 	return ret;							\
 }									\
 static __always_inline							\
-c_type atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
+c_type arch_atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
 {									\
 	register c_type ret;						\
 	__asm__ __volatile__ (						\
@@ -112,15 +112,15 @@ c_type atomic##prefix##_fetch_##op(c_type i, atomic##prefix##_t *v)	\
 
 #define ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_type, c_type, prefix)	\
 static __always_inline							\
-c_type atomic##prefix##_##op##_return_relaxed(c_type i,			\
+c_type arch_atomic##prefix##_##op##_return_relaxed(c_type i,		\
 					      atomic##prefix##_t *v)	\
 {									\
-        return atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I;	\
+        return arch_atomic##prefix##_fetch_##op##_relaxed(i, v) c_op I;	\
 }									\
 static __always_inline							\
-c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v)	\
+c_type arch_atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v)	\
 {									\
-        return atomic##prefix##_fetch_##op(i, v) c_op I;		\
+        return arch_atomic##prefix##_fetch_##op(i, v) c_op I;		\
 }
 
 #ifdef CONFIG_GENERIC_ATOMIC64
@@ -138,26 +138,26 @@ c_type atomic##prefix##_##op##_return(c_type i, atomic##prefix##_t *v)	\
 ATOMIC_OPS(add, add, +,  i)
 ATOMIC_OPS(sub, add, +, -i)
 
-#define atomic_add_return_relaxed	atomic_add_return_relaxed
-#define atomic_sub_return_relaxed	atomic_sub_return_relaxed
-#define atomic_add_return		atomic_add_return
-#define atomic_sub_return		atomic_sub_return
+#define arch_atomic_add_return_relaxed	arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed	arch_atomic_sub_return_relaxed
+#define arch_atomic_add_return		arch_atomic_add_return
+#define arch_atomic_sub_return		arch_atomic_sub_return
 
-#define atomic_fetch_add_relaxed	atomic_fetch_add_relaxed
-#define atomic_fetch_sub_relaxed	atomic_fetch_sub_relaxed
-#define atomic_fetch_add		atomic_fetch_add
-#define atomic_fetch_sub		atomic_fetch_sub
+#define arch_atomic_fetch_add_relaxed	arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed	arch_atomic_fetch_sub_relaxed
+#define arch_atomic_fetch_add		arch_atomic_fetch_add
+#define arch_atomic_fetch_sub		arch_atomic_fetch_sub
 
 #ifndef CONFIG_GENERIC_ATOMIC64
-#define atomic64_add_return_relaxed	atomic64_add_return_relaxed
-#define atomic64_sub_return_relaxed	atomic64_sub_return_relaxed
-#define atomic64_add_return		atomic64_add_return
-#define atomic64_sub_return		atomic64_sub_return
-
-#define atomic64_fetch_add_relaxed	atomic64_fetch_add_relaxed
-#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub_relaxed
-#define atomic64_fetch_add		atomic64_fetch_add
-#define atomic64_fetch_sub		atomic64_fetch_sub
+#define arch_atomic64_add_return_relaxed	arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed	arch_atomic64_sub_return_relaxed
+#define arch_atomic64_add_return		arch_atomic64_add_return
+#define arch_atomic64_sub_return		arch_atomic64_sub_return
+
+#define arch_atomic64_fetch_add_relaxed	arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed	arch_atomic64_fetch_sub_relaxed
+#define arch_atomic64_fetch_add		arch_atomic64_fetch_add
+#define arch_atomic64_fetch_sub		arch_atomic64_fetch_sub
 #endif
 
 #undef ATOMIC_OPS
@@ -175,20 +175,20 @@ ATOMIC_OPS(and, and, i)
 ATOMIC_OPS( or,  or, i)
 ATOMIC_OPS(xor, xor, i)
 
-#define atomic_fetch_and_relaxed	atomic_fetch_and_relaxed
-#define atomic_fetch_or_relaxed		atomic_fetch_or_relaxed
-#define atomic_fetch_xor_relaxed	atomic_fetch_xor_relaxed
-#define atomic_fetch_and		atomic_fetch_and
-#define atomic_fetch_or			atomic_fetch_or
-#define atomic_fetch_xor		atomic_fetch_xor
+#define arch_atomic_fetch_and_relaxed	arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed	arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed	arch_atomic_fetch_xor_relaxed
+#define arch_atomic_fetch_and		arch_atomic_fetch_and
+#define arch_atomic_fetch_or		arch_atomic_fetch_or
+#define arch_atomic_fetch_xor		arch_atomic_fetch_xor
 
 #ifndef CONFIG_GENERIC_ATOMIC64
-#define atomic64_fetch_and_relaxed	atomic64_fetch_and_relaxed
-#define atomic64_fetch_or_relaxed	atomic64_fetch_or_relaxed
-#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor_relaxed
-#define atomic64_fetch_and		atomic64_fetch_and
-#define atomic64_fetch_or		atomic64_fetch_or
-#define atomic64_fetch_xor		atomic64_fetch_xor
+#define arch_atomic64_fetch_and_relaxed	arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or_relaxed	arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed	arch_atomic64_fetch_xor_relaxed
+#define arch_atomic64_fetch_and		arch_atomic64_fetch_and
+#define arch_atomic64_fetch_or		arch_atomic64_fetch_or
+#define arch_atomic64_fetch_xor		arch_atomic64_fetch_xor
 #endif
 
 #undef ATOMIC_OPS
@@ -197,7 +197,7 @@ ATOMIC_OPS(xor, xor, i)
 #undef ATOMIC_OP_RETURN
 
 /* This is required to provide a full barrier on success. */
-static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+static __always_inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
        int prev, rc;
 
@@ -214,10 +214,10 @@ static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 		: "memory");
 	return prev;
 }
-#define atomic_fetch_add_unless atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
 
 #ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+static __always_inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
        s64 prev;
        long rc;
@@ -235,7 +235,7 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
 		: "memory");
 	return prev;
 }
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
 #endif
 
 /*
@@ -244,45 +244,45 @@ static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u
  */
 #define ATOMIC_OP(c_t, prefix, size)					\
 static __always_inline							\
-c_t atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n)		\
+c_t arch_atomic##prefix##_xchg_relaxed(atomic##prefix##_t *v, c_t n)	\
 {									\
 	return __xchg_relaxed(&(v->counter), n, size);			\
 }									\
 static __always_inline							\
-c_t atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n)		\
+c_t arch_atomic##prefix##_xchg_acquire(atomic##prefix##_t *v, c_t n)	\
 {									\
 	return __xchg_acquire(&(v->counter), n, size);			\
 }									\
 static __always_inline							\
-c_t atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n)		\
+c_t arch_atomic##prefix##_xchg_release(atomic##prefix##_t *v, c_t n)	\
 {									\
 	return __xchg_release(&(v->counter), n, size);			\
 }									\
 static __always_inline							\
-c_t atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n)			\
+c_t arch_atomic##prefix##_xchg(atomic##prefix##_t *v, c_t n)		\
 {									\
 	return __xchg(&(v->counter), n, size);				\
 }									\
 static __always_inline							\
-c_t atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v,		\
+c_t arch_atomic##prefix##_cmpxchg_relaxed(atomic##prefix##_t *v,	\
 				     c_t o, c_t n)			\
 {									\
 	return __cmpxchg_relaxed(&(v->counter), o, n, size);		\
 }									\
 static __always_inline							\
-c_t atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v,		\
+c_t arch_atomic##prefix##_cmpxchg_acquire(atomic##prefix##_t *v,	\
 				     c_t o, c_t n)			\
 {									\
 	return __cmpxchg_acquire(&(v->counter), o, n, size);		\
 }									\
 static __always_inline							\
-c_t atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v,		\
+c_t arch_atomic##prefix##_cmpxchg_release(atomic##prefix##_t *v,	\
 				     c_t o, c_t n)			\
 {									\
 	return __cmpxchg_release(&(v->counter), o, n, size);		\
 }									\
 static __always_inline							\
-c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n)	\
+c_t arch_atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n)	\
 {									\
 	return __cmpxchg(&(v->counter), o, n, size);			\
 }
@@ -298,19 +298,19 @@ c_t atomic##prefix##_cmpxchg(atomic##prefix##_t *v, c_t o, c_t n)	\
 
 ATOMIC_OPS()
 
-#define atomic_xchg_relaxed atomic_xchg_relaxed
-#define atomic_xchg_acquire atomic_xchg_acquire
-#define atomic_xchg_release atomic_xchg_release
-#define atomic_xchg atomic_xchg
-#define atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed
-#define atomic_cmpxchg_acquire atomic_cmpxchg_acquire
-#define atomic_cmpxchg_release atomic_cmpxchg_release
-#define atomic_cmpxchg atomic_cmpxchg
+#define arch_atomic_xchg_relaxed	arch_atomic_xchg_relaxed
+#define arch_atomic_xchg_acquire	arch_atomic_xchg_acquire
+#define arch_atomic_xchg_release	arch_atomic_xchg_release
+#define arch_atomic_xchg		arch_atomic_xchg
+#define arch_atomic_cmpxchg_relaxed	arch_atomic_cmpxchg_relaxed
+#define arch_atomic_cmpxchg_acquire	arch_atomic_cmpxchg_acquire
+#define arch_atomic_cmpxchg_release	arch_atomic_cmpxchg_release
+#define arch_atomic_cmpxchg		arch_atomic_cmpxchg
 
 #undef ATOMIC_OPS
 #undef ATOMIC_OP
 
-static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
+static __always_inline int arch_atomic_sub_if_positive(atomic_t *v, int offset)
 {
        int prev, rc;
 
@@ -328,10 +328,10 @@ static __always_inline int atomic_sub_if_positive(atomic_t *v, int offset)
 	return prev - offset;
 }
 
-#define atomic_dec_if_positive(v)	atomic_sub_if_positive(v, 1)
+#define arch_atomic_dec_if_positive(v)	arch_atomic_sub_if_positive(v, 1)
 
 #ifndef CONFIG_GENERIC_ATOMIC64
-static __always_inline s64 atomic64_sub_if_positive(atomic64_t *v, s64 offset)
+static __always_inline s64 arch_atomic64_sub_if_positive(atomic64_t *v, s64 offset)
 {
        s64 prev;
        long rc;
@@ -350,7 +350,7 @@ static __always_inline s64 atomic64_sub_if_positive(atomic64_t *v, s64 offset)
 	return prev - offset;
 }
 
-#define atomic64_dec_if_positive(v)	atomic64_sub_if_positive(v, 1)
+#define arch_atomic64_dec_if_positive(v)	arch_atomic64_sub_if_positive(v, 1)
 #endif
 
 #endif /* _ASM_RISCV_ATOMIC_H */
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 262e5bbb27760..36dc962f63436 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -37,7 +37,7 @@
 	__ret;								\
 })
 
-#define xchg_relaxed(ptr, x)						\
+#define arch_xchg_relaxed(ptr, x)					\
 ({									\
 	__typeof__(*(ptr)) _x_ = (x);					\
 	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
@@ -72,7 +72,7 @@
 	__ret;								\
 })
 
-#define xchg_acquire(ptr, x)						\
+#define arch_xchg_acquire(ptr, x)					\
 ({									\
 	__typeof__(*(ptr)) _x_ = (x);					\
 	(__typeof__(*(ptr))) __xchg_acquire((ptr),			\
@@ -107,7 +107,7 @@
 	__ret;								\
 })
 
-#define xchg_release(ptr, x)						\
+#define arch_xchg_release(ptr, x)					\
 ({									\
 	__typeof__(*(ptr)) _x_ = (x);					\
 	(__typeof__(*(ptr))) __xchg_release((ptr),			\
@@ -140,7 +140,7 @@
 	__ret;								\
 })
 
-#define xchg(ptr, x)							\
+#define arch_xchg(ptr, x)						\
 ({									\
 	__typeof__(*(ptr)) _x_ = (x);					\
 	(__typeof__(*(ptr))) __xchg((ptr), _x_, sizeof(*(ptr)));	\
@@ -149,13 +149,13 @@
 #define xchg32(ptr, x)							\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
-	xchg((ptr), (x));						\
+	arch_xchg((ptr), (x));						\
 })
 
 #define xchg64(ptr, x)							\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	xchg((ptr), (x));						\
+	arch_xchg((ptr), (x));						\
 })
 
 /*
@@ -199,7 +199,7 @@
 	__ret;								\
 })
 
-#define cmpxchg_relaxed(ptr, o, n)					\
+#define arch_cmpxchg_relaxed(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -245,7 +245,7 @@
 	__ret;								\
 })
 
-#define cmpxchg_acquire(ptr, o, n)					\
+#define arch_cmpxchg_acquire(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -291,7 +291,7 @@
 	__ret;								\
 })
 
-#define cmpxchg_release(ptr, o, n)					\
+#define arch_cmpxchg_release(ptr, o, n)					\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -337,7 +337,7 @@
 	__ret;								\
 })
 
-#define cmpxchg(ptr, o, n)						\
+#define arch_cmpxchg(ptr, o, n)						\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -345,31 +345,31 @@
 				       _o_, _n_, sizeof(*(ptr)));	\
 })
 
-#define cmpxchg_local(ptr, o, n)					\
+#define arch_cmpxchg_local(ptr, o, n)					\
 	(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
 
 #define cmpxchg32(ptr, o, n)						\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
-	cmpxchg((ptr), (o), (n));					\
+	arch_cmpxchg((ptr), (o), (n));					\
 })
 
 #define cmpxchg32_local(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 4);				\
-	cmpxchg_relaxed((ptr), (o), (n))				\
+	arch_cmpxchg_relaxed((ptr), (o), (n))				\
 })
 
-#define cmpxchg64(ptr, o, n)						\
+#define arch_cmpxchg64(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg((ptr), (o), (n));					\
+	arch_cmpxchg((ptr), (o), (n));					\
 })
 
-#define cmpxchg64_local(ptr, o, n)					\
+#define arch_cmpxchg64_local(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
-	cmpxchg_relaxed((ptr), (o), (n));				\
+	arch_cmpxchg_relaxed((ptr), (o), (n));				\
 })
 
 #endif /* _ASM_RISCV_CMPXCHG_H */
-- 
GitLab


From 8c6417551309fe3654b5f761214303aef361d3e8 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:28 +0100
Subject: [PATCH 1624/3804] locking/atomic: sh: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates sh to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rich Felker <dalias@libc.org>
Cc: Will Deacon <will@kernel.org>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-30-mark.rutland@arm.com
---
 arch/sh/Kconfig                   | 1 +
 arch/sh/include/asm/atomic-grb.h  | 6 +++---
 arch/sh/include/asm/atomic-irq.h  | 6 +++---
 arch/sh/include/asm/atomic-llsc.h | 6 +++---
 arch/sh/include/asm/atomic.h      | 8 ++++----
 arch/sh/include/asm/cmpxchg.h     | 4 ++--
 6 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 68129537e3509..d2925cbb6fa44 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -2,6 +2,7 @@
 config SUPERH
 	def_bool y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM && MMU
 	select ARCH_ENABLE_MEMORY_HOTREMOVE if SPARSEMEM && MMU
 	select ARCH_HAVE_CUSTOM_GPIO_H
diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h
index aace62d42288a..059791fd394fc 100644
--- a/arch/sh/include/asm/atomic-grb.h
+++ b/arch/sh/include/asm/atomic-grb.h
@@ -3,7 +3,7 @@
 #define __ASM_SH_ATOMIC_GRB_H
 
 #define ATOMIC_OP(op)							\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	int tmp;							\
 									\
@@ -23,7 +23,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op)						\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	int tmp;							\
 									\
@@ -45,7 +45,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op)						\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	int res, tmp;							\
 									\
diff --git a/arch/sh/include/asm/atomic-irq.h b/arch/sh/include/asm/atomic-irq.h
index ee523bd2120f7..7665de9d00d0d 100644
--- a/arch/sh/include/asm/atomic-irq.h
+++ b/arch/sh/include/asm/atomic-irq.h
@@ -11,7 +11,7 @@
  */
 
 #define ATOMIC_OP(op, c_op)						\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned long flags;						\
 									\
@@ -21,7 +21,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }
 
 #define ATOMIC_OP_RETURN(op, c_op)					\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	unsigned long temp, flags;					\
 									\
@@ -35,7 +35,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op, c_op)					\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long temp, flags;					\
 									\
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index 1d06e4d288dc2..b63dcfbfa14ef 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -17,7 +17,7 @@
  */
 
 #define ATOMIC_OP(op)							\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned long tmp;						\
 									\
@@ -32,7 +32,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }
 
 #define ATOMIC_OP_RETURN(op)						\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	unsigned long temp;						\
 									\
@@ -50,7 +50,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op)						\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long res, temp;					\
 									\
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index 7c2a8a703b9a2..528bfeda78f56 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -19,8 +19,8 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
-#define atomic_read(v)		READ_ONCE((v)->counter)
-#define atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
+#define arch_atomic_read(v)		READ_ONCE((v)->counter)
+#define arch_atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
 
 #if defined(CONFIG_GUSA_RB)
 #include <asm/atomic-grb.h>
@@ -30,8 +30,8 @@
 #include <asm/atomic-irq.h>
 #endif
 
-#define atomic_xchg(v, new)		(xchg(&((v)->counter), new))
-#define atomic_cmpxchg(v, o, n)		(cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic_xchg(v, new)	(arch_xchg(&((v)->counter), new))
+#define arch_atomic_cmpxchg(v, o, n)	(arch_cmpxchg(&((v)->counter), (o), (n)))
 
 #endif /* CONFIG_CPU_J2 */
 
diff --git a/arch/sh/include/asm/cmpxchg.h b/arch/sh/include/asm/cmpxchg.h
index e9501d85c2787..0ed9b3f4a5779 100644
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -45,7 +45,7 @@ extern void __xchg_called_with_bad_pointer(void);
 	__xchg__res;					\
 })
 
-#define xchg(ptr,x)	\
+#define arch_xchg(ptr,x)	\
 	((__typeof__(*(ptr)))__xchg((ptr),(unsigned long)(x), sizeof(*(ptr))))
 
 /* This function doesn't exist, so you'll get a linker error
@@ -63,7 +63,7 @@ static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
 	return old;
 }
 
-#define cmpxchg(ptr,o,n)						 \
+#define arch_cmpxchg(ptr,o,n)						 \
   ({									 \
      __typeof__(*(ptr)) _o_ = (o);					 \
      __typeof__(*(ptr)) _n_ = (n);					 \
-- 
GitLab


From ff5b4f1ed580c59d1f26ddddc6b2622347571cec Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:29 +0100
Subject: [PATCH 1625/3804] locking/atomic: sparc: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates sparc to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-31-mark.rutland@arm.com
---
 arch/sparc/Kconfig                  |  1 +
 arch/sparc/include/asm/atomic_32.h  | 38 +++++++++++++-------------
 arch/sparc/include/asm/atomic_64.h  | 36 ++++++++++++-------------
 arch/sparc/include/asm/cmpxchg_32.h | 10 +++----
 arch/sparc/include/asm/cmpxchg_64.h | 10 +++----
 arch/sparc/lib/atomic32.c           | 24 ++++++++---------
 arch/sparc/lib/atomic_64.S          | 42 ++++++++++++++---------------
 7 files changed, 81 insertions(+), 80 deletions(-)

diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 164a5254c91c0..46790083e918e 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -13,6 +13,7 @@ config 64BIT
 config SPARC
 	bool
 	default y
+	select ARCH_ATOMIC
 	select ARCH_MIGHT_HAVE_PC_PARPORT if SPARC64 && PCI
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select DMA_OPS
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index efad5532f1699..d775daa83d129 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -18,30 +18,30 @@
 #include <asm/barrier.h>
 #include <asm-generic/atomic64.h>
 
-int atomic_add_return(int, atomic_t *);
-int atomic_fetch_add(int, atomic_t *);
-int atomic_fetch_and(int, atomic_t *);
-int atomic_fetch_or(int, atomic_t *);
-int atomic_fetch_xor(int, atomic_t *);
-int atomic_cmpxchg(atomic_t *, int, int);
-int atomic_xchg(atomic_t *, int);
-int atomic_fetch_add_unless(atomic_t *, int, int);
-void atomic_set(atomic_t *, int);
+int arch_atomic_add_return(int, atomic_t *);
+int arch_atomic_fetch_add(int, atomic_t *);
+int arch_atomic_fetch_and(int, atomic_t *);
+int arch_atomic_fetch_or(int, atomic_t *);
+int arch_atomic_fetch_xor(int, atomic_t *);
+int arch_atomic_cmpxchg(atomic_t *, int, int);
+int arch_atomic_xchg(atomic_t *, int);
+int arch_atomic_fetch_add_unless(atomic_t *, int, int);
+void arch_atomic_set(atomic_t *, int);
 
-#define atomic_fetch_add_unless	atomic_fetch_add_unless
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
 
-#define atomic_set_release(v, i)	atomic_set((v), (i))
+#define arch_atomic_set_release(v, i)	arch_atomic_set((v), (i))
 
-#define atomic_read(v)          READ_ONCE((v)->counter)
+#define arch_atomic_read(v)		READ_ONCE((v)->counter)
 
-#define atomic_add(i, v)	((void)atomic_add_return( (int)(i), (v)))
-#define atomic_sub(i, v)	((void)atomic_add_return(-(int)(i), (v)))
+#define arch_atomic_add(i, v)	((void)arch_atomic_add_return( (int)(i), (v)))
+#define arch_atomic_sub(i, v)	((void)arch_atomic_add_return(-(int)(i), (v)))
 
-#define atomic_and(i, v)	((void)atomic_fetch_and((i), (v)))
-#define atomic_or(i, v)		((void)atomic_fetch_or((i), (v)))
-#define atomic_xor(i, v)	((void)atomic_fetch_xor((i), (v)))
+#define arch_atomic_and(i, v)	((void)arch_atomic_fetch_and((i), (v)))
+#define arch_atomic_or(i, v)	((void)arch_atomic_fetch_or((i), (v)))
+#define arch_atomic_xor(i, v)	((void)arch_atomic_fetch_xor((i), (v)))
 
-#define atomic_sub_return(i, v)	(atomic_add_return(-(int)(i), (v)))
-#define atomic_fetch_sub(i, v)  (atomic_fetch_add (-(int)(i), (v)))
+#define arch_atomic_sub_return(i, v)	(arch_atomic_add_return(-(int)(i), (v)))
+#define arch_atomic_fetch_sub(i, v)	(arch_atomic_fetch_add (-(int)(i), (v)))
 
 #endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 6b235d3d1d9db..077891686715a 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -14,23 +14,23 @@
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic_read(v)		READ_ONCE((v)->counter)
-#define atomic64_read(v)	READ_ONCE((v)->counter)
+#define arch_atomic_read(v)	READ_ONCE((v)->counter)
+#define arch_atomic64_read(v)	READ_ONCE((v)->counter)
 
-#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
-#define atomic64_set(v, i)	WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
+#define arch_atomic64_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op)							\
-void atomic_##op(int, atomic_t *);					\
-void atomic64_##op(s64, atomic64_t *);
+void arch_atomic_##op(int, atomic_t *);					\
+void arch_atomic64_##op(s64, atomic64_t *);
 
 #define ATOMIC_OP_RETURN(op)						\
-int atomic_##op##_return(int, atomic_t *);				\
-s64 atomic64_##op##_return(s64, atomic64_t *);
+int arch_atomic_##op##_return(int, atomic_t *);				\
+s64 arch_atomic64_##op##_return(s64, atomic64_t *);
 
 #define ATOMIC_FETCH_OP(op)						\
-int atomic_fetch_##op(int, atomic_t *);					\
-s64 atomic64_fetch_##op(s64, atomic64_t *);
+int arch_atomic_fetch_##op(int, atomic_t *);				\
+s64 arch_atomic64_fetch_##op(s64, atomic64_t *);
 
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
@@ -49,18 +49,18 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic_cmpxchg(v, o, n) (arch_cmpxchg(&((v)->counter), (o), (n)))
 
-static inline int atomic_xchg(atomic_t *v, int new)
+static inline int arch_atomic_xchg(atomic_t *v, int new)
 {
-	return xchg(&v->counter, new);
+	return arch_xchg(&v->counter, new);
 }
 
-#define atomic64_cmpxchg(v, o, n) \
-	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic64_cmpxchg(v, o, n) \
+	((__typeof__((v)->counter))arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new))
 
-s64 atomic64_dec_if_positive(atomic64_t *v);
-#define atomic64_dec_if_positive atomic64_dec_if_positive
+s64 arch_atomic64_dec_if_positive(atomic64_t *v);
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h
index 86e3da1d973d7..27a57a3a7597e 100644
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -25,7 +25,7 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
 	return x;
 }
 
-#define xchg(ptr,x) ({(__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)));})
+#define arch_xchg(ptr,x) ({(__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)));})
 
 /* Emulate cmpxchg() the same way we emulate atomics,
  * by hashing the object address and indexing into an array
@@ -55,7 +55,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
 	return old;
 }
 
-#define cmpxchg(ptr, o, n)						\
+#define arch_cmpxchg(ptr, o, n)						\
 ({									\
 	__typeof__(*(ptr)) _o_ = (o);					\
 	__typeof__(*(ptr)) _n_ = (n);					\
@@ -64,7 +64,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
 })
 
 u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new);
-#define cmpxchg64(ptr, old, new)	__cmpxchg_u64(ptr, old, new)
+#define arch_cmpxchg64(ptr, old, new)	__cmpxchg_u64(ptr, old, new)
 
 #include <asm-generic/cmpxchg-local.h>
 
@@ -72,9 +72,9 @@ u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new);
  * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
  * them available.
  */
-#define cmpxchg_local(ptr, o, n)				  	       \
+#define arch_cmpxchg_local(ptr, o, n)				  	       \
 	((__typeof__(*(ptr)))__generic_cmpxchg_local((ptr), (unsigned long)(o),\
 			(unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
 
 #endif /* __ARCH_SPARC_CMPXCHG__ */
diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h
index 8915b577b92fe..8c39a99811871 100644
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -52,7 +52,7 @@ static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long
 	return val;
 }
 
-#define xchg(ptr,x)							\
+#define arch_xchg(ptr,x)							\
 ({	__typeof__(*(ptr)) __ret;					\
 	__ret = (__typeof__(*(ptr)))					\
 		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)));	\
@@ -168,7 +168,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
 	return old;
 }
 
-#define cmpxchg(ptr,o,n)						 \
+#define arch_cmpxchg(ptr,o,n)						 \
   ({									 \
      __typeof__(*(ptr)) _o_ = (o);					 \
      __typeof__(*(ptr)) _n_ = (n);					 \
@@ -195,14 +195,14 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
 	return old;
 }
 
-#define cmpxchg_local(ptr, o, n)				  	\
+#define arch_cmpxchg_local(ptr, o, n)				  	\
 	((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o),	\
 			(unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n)					\
+#define arch_cmpxchg64_local(ptr, o, n)					\
   ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
 	cmpxchg_local((ptr), (o), (n));					\
   })
-#define cmpxchg64(ptr, o, n)	cmpxchg64_local((ptr), (o), (n))
+#define arch_cmpxchg64(ptr, o, n)	arch_cmpxchg64_local((ptr), (o), (n))
 
 #endif /* __ARCH_SPARC64_CMPXCHG__ */
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index 281fa634bb1a8..8b81d0f00c971 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -29,7 +29,7 @@ static DEFINE_SPINLOCK(dummy);
 #endif /* SMP */
 
 #define ATOMIC_FETCH_OP(op, c_op)					\
-int atomic_fetch_##op(int i, atomic_t *v)				\
+int arch_atomic_fetch_##op(int i, atomic_t *v)				\
 {									\
 	int ret;							\
 	unsigned long flags;						\
@@ -41,10 +41,10 @@ int atomic_fetch_##op(int i, atomic_t *v)				\
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
 	return ret;							\
 }									\
-EXPORT_SYMBOL(atomic_fetch_##op);
+EXPORT_SYMBOL(arch_atomic_fetch_##op);
 
 #define ATOMIC_OP_RETURN(op, c_op)					\
-int atomic_##op##_return(int i, atomic_t *v)				\
+int arch_atomic_##op##_return(int i, atomic_t *v)			\
 {									\
 	int ret;							\
 	unsigned long flags;						\
@@ -55,7 +55,7 @@ int atomic_##op##_return(int i, atomic_t *v)				\
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);			\
 	return ret;							\
 }									\
-EXPORT_SYMBOL(atomic_##op##_return);
+EXPORT_SYMBOL(arch_atomic_##op##_return);
 
 ATOMIC_OP_RETURN(add, +=)
 
@@ -67,7 +67,7 @@ ATOMIC_FETCH_OP(xor, ^=)
 #undef ATOMIC_FETCH_OP
 #undef ATOMIC_OP_RETURN
 
-int atomic_xchg(atomic_t *v, int new)
+int arch_atomic_xchg(atomic_t *v, int new)
 {
 	int ret;
 	unsigned long flags;
@@ -78,9 +78,9 @@ int atomic_xchg(atomic_t *v, int new)
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
 	return ret;
 }
-EXPORT_SYMBOL(atomic_xchg);
+EXPORT_SYMBOL(arch_atomic_xchg);
 
-int atomic_cmpxchg(atomic_t *v, int old, int new)
+int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
 	unsigned long flags;
@@ -93,9 +93,9 @@ int atomic_cmpxchg(atomic_t *v, int old, int new)
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
 	return ret;
 }
-EXPORT_SYMBOL(atomic_cmpxchg);
+EXPORT_SYMBOL(arch_atomic_cmpxchg);
 
-int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
 	unsigned long flags;
@@ -107,10 +107,10 @@ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
 	return ret;
 }
-EXPORT_SYMBOL(atomic_fetch_add_unless);
+EXPORT_SYMBOL(arch_atomic_fetch_add_unless);
 
 /* Atomic operations are already serializing */
-void atomic_set(atomic_t *v, int i)
+void arch_atomic_set(atomic_t *v, int i)
 {
 	unsigned long flags;
 
@@ -118,7 +118,7 @@ void atomic_set(atomic_t *v, int i)
 	v->counter = i;
 	spin_unlock_irqrestore(ATOMIC_HASH(v), flags);
 }
-EXPORT_SYMBOL(atomic_set);
+EXPORT_SYMBOL(arch_atomic_set);
 
 unsigned long ___set_bit(unsigned long *addr, unsigned long mask)
 {
diff --git a/arch/sparc/lib/atomic_64.S b/arch/sparc/lib/atomic_64.S
index 456b65a30ecf6..8245d4a97301c 100644
--- a/arch/sparc/lib/atomic_64.S
+++ b/arch/sparc/lib/atomic_64.S
@@ -19,7 +19,7 @@
 	 */
 
 #define ATOMIC_OP(op)							\
-ENTRY(atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
+ENTRY(arch_atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
 	BACKOFF_SETUP(%o2);						\
 1:	lduw	[%o1], %g1;						\
 	op	%g1, %o0, %g7;						\
@@ -30,11 +30,11 @@ ENTRY(atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
 	retl;								\
 	 nop;								\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic_##op);							\
-EXPORT_SYMBOL(atomic_##op);
+ENDPROC(arch_atomic_##op);						\
+EXPORT_SYMBOL(arch_atomic_##op);
 
 #define ATOMIC_OP_RETURN(op)						\
-ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
+ENTRY(arch_atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */\
 	BACKOFF_SETUP(%o2);						\
 1:	lduw	[%o1], %g1;						\
 	op	%g1, %o0, %g7;						\
@@ -45,11 +45,11 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
 	retl;								\
 	 sra	%g1, 0, %o0;						\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic_##op##_return);						\
-EXPORT_SYMBOL(atomic_##op##_return);
+ENDPROC(arch_atomic_##op##_return);					\
+EXPORT_SYMBOL(arch_atomic_##op##_return);
 
 #define ATOMIC_FETCH_OP(op)						\
-ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
+ENTRY(arch_atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
 	BACKOFF_SETUP(%o2);						\
 1:	lduw	[%o1], %g1;						\
 	op	%g1, %o0, %g7;						\
@@ -60,8 +60,8 @@ ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
 	retl;								\
 	 sra	%g1, 0, %o0;						\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic_fetch_##op);						\
-EXPORT_SYMBOL(atomic_fetch_##op);
+ENDPROC(arch_atomic_fetch_##op);					\
+EXPORT_SYMBOL(arch_atomic_fetch_##op);
 
 ATOMIC_OP(add)
 ATOMIC_OP_RETURN(add)
@@ -85,7 +85,7 @@ ATOMIC_FETCH_OP(xor)
 #undef ATOMIC_OP
 
 #define ATOMIC64_OP(op)							\
-ENTRY(atomic64_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
+ENTRY(arch_atomic64_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
 	BACKOFF_SETUP(%o2);						\
 1:	ldx	[%o1], %g1;						\
 	op	%g1, %o0, %g7;						\
@@ -96,11 +96,11 @@ ENTRY(atomic64_##op) /* %o0 = increment, %o1 = atomic_ptr */		\
 	retl;								\
 	 nop;								\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic64_##op);							\
-EXPORT_SYMBOL(atomic64_##op);
+ENDPROC(arch_atomic64_##op);						\
+EXPORT_SYMBOL(arch_atomic64_##op);
 
 #define ATOMIC64_OP_RETURN(op)						\
-ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
+ENTRY(arch_atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
 	BACKOFF_SETUP(%o2);						\
 1:	ldx	[%o1], %g1;						\
 	op	%g1, %o0, %g7;						\
@@ -111,11 +111,11 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */	\
 	retl;								\
 	 op	%g1, %o0, %o0;						\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic64_##op##_return);					\
-EXPORT_SYMBOL(atomic64_##op##_return);
+ENDPROC(arch_atomic64_##op##_return);					\
+EXPORT_SYMBOL(arch_atomic64_##op##_return);
 
 #define ATOMIC64_FETCH_OP(op)						\
-ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
+ENTRY(arch_atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
 	BACKOFF_SETUP(%o2);						\
 1:	ldx	[%o1], %g1;						\
 	op	%g1, %o0, %g7;						\
@@ -126,8 +126,8 @@ ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */	\
 	retl;								\
 	 mov	%g1, %o0;						\
 2:	BACKOFF_SPIN(%o2, %o3, 1b);					\
-ENDPROC(atomic64_fetch_##op);						\
-EXPORT_SYMBOL(atomic64_fetch_##op);
+ENDPROC(arch_atomic64_fetch_##op);					\
+EXPORT_SYMBOL(arch_atomic64_fetch_##op);
 
 ATOMIC64_OP(add)
 ATOMIC64_OP_RETURN(add)
@@ -150,7 +150,7 @@ ATOMIC64_FETCH_OP(xor)
 #undef ATOMIC64_OP_RETURN
 #undef ATOMIC64_OP
 
-ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */
+ENTRY(arch_atomic64_dec_if_positive) /* %o0 = atomic_ptr */
 	BACKOFF_SETUP(%o2)
 1:	ldx	[%o0], %g1
 	brlez,pn %g1, 3f
@@ -162,5 +162,5 @@ ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */
 3:	retl
 	 sub	%g1, 1, %o0
 2:	BACKOFF_SPIN(%o2, %o3, 1b)
-ENDPROC(atomic64_dec_if_positive)
-EXPORT_SYMBOL(atomic64_dec_if_positive)
+ENDPROC(arch_atomic64_dec_if_positive)
+EXPORT_SYMBOL(arch_atomic64_dec_if_positive)
-- 
GitLab


From b9b12978a8e9a4bb77746e74eae37e587f7f8994 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:30 +0100
Subject: [PATCH 1626/3804] locking/atomic: xtensa: move to ARCH_ATOMIC

We'd like all architectures to convert to ARCH_ATOMIC, as once all
architectures are converted it will be possible to make significant
cleanups to the atomics headers, and this will make it much easier to
generically enable atomic functionality (e.g. debug logic in the
instrumented wrappers).

As a step towards that, this patch migrates xtensa to ARCH_ATOMIC. The
arch code provides arch_{atomic,atomic64,xchg,cmpxchg}*(), and common
code wraps these with optional instrumentation to provide the regular
functions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Max Filippov <jcmvbkbc@gmail.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-32-mark.rutland@arm.com
---
 arch/xtensa/Kconfig               |  1 +
 arch/xtensa/include/asm/atomic.h  | 26 +++++++++++++-------------
 arch/xtensa/include/asm/cmpxchg.h | 10 +++++-----
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 2332b21569938..39bb9bdae6b13 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -2,6 +2,7 @@
 config XTENSA
 	def_bool y
 	select ARCH_32BIT_OFF_T
+	select ARCH_ATOMIC
 	select ARCH_HAS_BINFMT_FLAT if !MMU
 	select ARCH_HAS_DMA_PREP_COHERENT if MMU
 	select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 744c2f463845d..4361fe4247e30 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -43,7 +43,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)		READ_ONCE((v)->counter)
+#define arch_atomic_read(v)		READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -52,11 +52,11 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
+#define arch_atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
 
 #if XCHAL_HAVE_EXCLUSIVE
 #define ATOMIC_OP(op)							\
-static inline void atomic_##op(int i, atomic_t *v)			\
+static inline void arch_atomic_##op(int i, atomic_t *v)			\
 {									\
 	unsigned long tmp;						\
 	int result;							\
@@ -74,7 +74,7 @@ static inline void atomic_##op(int i, atomic_t *v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op)						\
-static inline int atomic_##op##_return(int i, atomic_t *v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t *v)		\
 {									\
 	unsigned long tmp;						\
 	int result;							\
@@ -95,7 +95,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 }
 
 #define ATOMIC_FETCH_OP(op)						\
-static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+static inline int arch_atomic_fetch_##op(int i, atomic_t *v)		\
 {									\
 	unsigned long tmp;						\
 	int result;							\
@@ -116,7 +116,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 
 #elif XCHAL_HAVE_S32C1I
 #define ATOMIC_OP(op)							\
-static inline void atomic_##op(int i, atomic_t * v)			\
+static inline void arch_atomic_##op(int i, atomic_t * v)		\
 {									\
 	unsigned long tmp;						\
 	int result;							\
@@ -135,7 +135,7 @@ static inline void atomic_##op(int i, atomic_t * v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op)						\
-static inline int atomic_##op##_return(int i, atomic_t * v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t * v)	\
 {									\
 	unsigned long tmp;						\
 	int result;							\
@@ -157,7 +157,7 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 }
 
 #define ATOMIC_FETCH_OP(op)						\
-static inline int atomic_fetch_##op(int i, atomic_t * v)		\
+static inline int arch_atomic_fetch_##op(int i, atomic_t * v)		\
 {									\
 	unsigned long tmp;						\
 	int result;							\
@@ -180,7 +180,7 @@ static inline int atomic_fetch_##op(int i, atomic_t * v)		\
 #else /* XCHAL_HAVE_S32C1I */
 
 #define ATOMIC_OP(op)							\
-static inline void atomic_##op(int i, atomic_t * v)			\
+static inline void arch_atomic_##op(int i, atomic_t * v)		\
 {									\
 	unsigned int vval;						\
 									\
@@ -198,7 +198,7 @@ static inline void atomic_##op(int i, atomic_t * v)			\
 }									\
 
 #define ATOMIC_OP_RETURN(op)						\
-static inline int atomic_##op##_return(int i, atomic_t * v)		\
+static inline int arch_atomic_##op##_return(int i, atomic_t * v)	\
 {									\
 	unsigned int vval;						\
 									\
@@ -218,7 +218,7 @@ static inline int atomic_##op##_return(int i, atomic_t * v)		\
 }
 
 #define ATOMIC_FETCH_OP(op)						\
-static inline int atomic_fetch_##op(int i, atomic_t * v)		\
+static inline int arch_atomic_fetch_##op(int i, atomic_t * v)		\
 {									\
 	unsigned int tmp, vval;						\
 									\
@@ -257,7 +257,7 @@ ATOMIC_OPS(xor)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n)))
+#define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new))
 
 #endif /* _XTENSA_ATOMIC_H */
diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index 9c4d6e5316cee..3699e2818efb7 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h
@@ -80,7 +80,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
 	}
 }
 
-#define cmpxchg(ptr,o,n)						      \
+#define arch_cmpxchg(ptr,o,n)						      \
 	({ __typeof__(*(ptr)) _o_ = (o);				      \
 	   __typeof__(*(ptr)) _n_ = (n);				      \
 	   (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,	      \
@@ -107,11 +107,11 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
  * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
  * them available.
  */
-#define cmpxchg_local(ptr, o, n)				  	       \
+#define arch_cmpxchg_local(ptr, o, n)				  	       \
 	((__typeof__(*(ptr)))__generic_cmpxchg_local((ptr), (unsigned long)(o),\
 			(unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
-#define cmpxchg64(ptr, o, n)    cmpxchg64_local((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
+#define arch_cmpxchg64(ptr, o, n)    arch_cmpxchg64_local((ptr), (o), (n))
 
 /*
  * xchg_u32
@@ -169,7 +169,7 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
 #endif
 }
 
-#define xchg(ptr,x) \
+#define arch_xchg(ptr,x) \
 	((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
 
 static inline u32 xchg_small(volatile void *ptr, u32 x, int size)
-- 
GitLab


From 3c1885187bc1faa0a1c52f7bd34550740a208169 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:31 +0100
Subject: [PATCH 1627/3804] locking/atomic: delete !ARCH_ATOMIC remnants

Now that all architectures implement ARCH_ATOMIC, we can make it
mandatory, removing the Kconfig symbol and logic for !ARCH_ATOMIC.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-33-mark.rutland@arm.com
---
 arch/Kconfig                    |    3 -
 arch/alpha/Kconfig              |    1 -
 arch/arc/Kconfig                |    1 -
 arch/arm/Kconfig                |    1 -
 arch/arm64/Kconfig              |    1 -
 arch/csky/Kconfig               |    1 -
 arch/h8300/Kconfig              |    1 -
 arch/hexagon/Kconfig            |    1 -
 arch/ia64/Kconfig               |    1 -
 arch/m68k/Kconfig               |    1 -
 arch/microblaze/Kconfig         |    1 -
 arch/mips/Kconfig               |    1 -
 arch/nds32/Kconfig              |    1 -
 arch/nios2/Kconfig              |    1 -
 arch/openrisc/Kconfig           |    1 -
 arch/parisc/Kconfig             |    1 -
 arch/powerpc/Kconfig            |    1 -
 arch/riscv/Kconfig              |    1 -
 arch/s390/Kconfig               |    1 -
 arch/sh/Kconfig                 |    1 -
 arch/sparc/Kconfig              |    1 -
 arch/um/Kconfig                 |    1 -
 arch/x86/Kconfig                |    1 -
 arch/xtensa/Kconfig             |    1 -
 include/asm-generic/atomic.h    |   44 +-
 include/asm-generic/atomic64.h  |   29 -
 include/asm-generic/cmpxchg.h   |   21 -
 include/linux/atomic-fallback.h | 2595 -------------------------------
 include/linux/atomic.h          |    4 -
 scripts/atomic/check-atomics.sh |    1 -
 scripts/atomic/gen-atomics.sh   |    1 -
 31 files changed, 3 insertions(+), 2718 deletions(-)
 delete mode 100644 include/linux/atomic-fallback.h

diff --git a/arch/Kconfig b/arch/Kconfig
index 3fb3b12d4a958..c45b770d3579a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -11,9 +11,6 @@ source "arch/$(SRCARCH)/Kconfig"
 
 menu "General architecture-dependent options"
 
-config ARCH_ATOMIC
-	bool
-
 config CRASH_CORE
 	bool
 
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 7920fc2e2a2a6..5998106faa600 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -2,7 +2,6 @@
 config ALPHA
 	bool
 	default y
-	select ARCH_ATOMIC
 	select ARCH_32BIT_USTAT_F_TINODE
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 098ecc72d0488..2d98501c08971 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -6,7 +6,6 @@
 config ARC
 	def_bool y
 	select ARC_TIMERS
-	select ARCH_ATOMIC
 	select ARCH_HAS_CACHE_LINE_SIZE
 	select ARCH_HAS_DEBUG_VM_PGTABLE
 	select ARCH_HAS_DMA_PREP_COHERENT
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b7334a6643b9c..24804f11302d7 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -3,7 +3,6 @@ config ARM
 	bool
 	default y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
 	select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 62ab429d1f42e..9f1d8566bbf95 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -9,7 +9,6 @@ config ARM64
 	select ACPI_MCFG if (ACPI && PCI)
 	select ACPI_SPCR_TABLE if ACPI
 	select ACPI_PPTT if ACPI
-	select ARCH_ATOMIC
 	select ARCH_HAS_DEBUG_WX
 	select ARCH_BINFMT_ELF_STATE
 	select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 3521f14bcd969..8de5b987edb9f 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -2,7 +2,6 @@
 config CSKY
 	def_bool y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index bdf05ad3206a3..3e3e0f16f7e0a 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -2,7 +2,6 @@
 config H8300
         def_bool y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_HAS_BINFMT_FLAT
 	select BINFMT_FLAT_ARGVP_ENVP_ON_STACK
 	select BINFMT_FLAT_OLD_ALWAYS_RAM
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 1368954ef679b..44a409967af1c 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -5,7 +5,6 @@ comment "Linux Kernel Configuration for Hexagon"
 config HEXAGON
 	def_bool y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_PREEMPT
 	# Other pending projects/to-do items.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index c5414dcd5d0de..279252e3e0f74 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -8,7 +8,6 @@ menu "Processor type and features"
 
 config IA64
 	bool
-	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_MARK_CLEAN
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index d1d91ac47f514..372e4e69c43ac 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -3,7 +3,6 @@ config M68K
 	bool
 	default y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_HAS_BINFMT_FLAT
 	select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 5a52922dc225c..0660f47012bcb 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -2,7 +2,6 @@
 config MICROBLAZE
 	def_bool y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_NO_SWAP
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 55b4da96872f9..ed51970c08e75 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -3,7 +3,6 @@ config MIPS
 	bool
 	default y
 	select ARCH_32BIT_OFF_T if !64BIT
-	select ARCH_ATOMIC
 	select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
 	select ARCH_HAS_DEBUG_VIRTUAL if !64BIT
 	select ARCH_HAS_FORTIFY_SOURCE
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 352913573aee0..62313902d75d9 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -7,7 +7,6 @@
 config NDS32
 	def_bool y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 67dae88c5b53d..c24955c81c927 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -2,7 +2,6 @@
 config NIOS2
 	def_bool y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 8c50bc9674f56..591acc5990dc5 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -7,7 +7,6 @@
 config OPENRISC
 	def_bool y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_HAS_DMA_SET_UNCACHED
 	select ARCH_HAS_DMA_CLEAR_UNCACHED
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index bfa120a4add12..bde9907bc5b25 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -2,7 +2,6 @@
 config PARISC
 	def_bool y
 	select ARCH_32BIT_OFF_T if !64BIT
-	select ARCH_ATOMIC
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select HAVE_IDE
 	select HAVE_FUNCTION_TRACER
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d143c2b616f08..088dd2afcfe47 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -118,7 +118,6 @@ config PPC
 	# Please keep this list sorted alphabetically.
 	#
 	select ARCH_32BIT_OFF_T if PPC32
-	select ARCH_ATOMIC
 	select ARCH_ENABLE_MEMORY_HOTPLUG
 	select ARCH_ENABLE_MEMORY_HOTREMOVE
 	select ARCH_HAS_COPY_MC			if PPC64
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c59b9f4a9d62a..a8ad8eb761206 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -12,7 +12,6 @@ config 32BIT
 
 config RISCV
 	def_bool y
-	select ARCH_ATOMIC
 	select ARCH_CLOCKSOURCE_INIT
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 85374a36c69e1..b4c7c34069f81 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -58,7 +58,6 @@ config S390
 	# Note: keep this list sorted alphabetically
 	#
 	imply IMA_SECURE_AND_OR_TRUSTED_BOOT
-	select ARCH_ATOMIC
 	select ARCH_32BIT_USTAT_F_TINODE
 	select ARCH_BINFMT_ELF_STATE
 	select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index d2925cbb6fa44..68129537e3509 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -2,7 +2,6 @@
 config SUPERH
 	def_bool y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM && MMU
 	select ARCH_ENABLE_MEMORY_HOTREMOVE if SPARSEMEM && MMU
 	select ARCH_HAVE_CUSTOM_GPIO_H
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 46790083e918e..164a5254c91c0 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -13,7 +13,6 @@ config 64BIT
 config SPARC
 	bool
 	default y
-	select ARCH_ATOMIC
 	select ARCH_MIGHT_HAVE_PC_PARPORT if SPARC64 && PCI
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select DMA_OPS
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 4370a9521ea46..57cfd9a1c082e 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -5,7 +5,6 @@ menu "UML-specific options"
 config UML
 	bool
 	default y
-	select ARCH_ATOMIC
 	select ARCH_EPHEMERAL_INODES
 	select ARCH_HAS_KCOV
 	select ARCH_NO_PREEMPT
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 11a27563033de..0045e1b441902 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -58,7 +58,6 @@ config X86
 	#
 	select ACPI_LEGACY_TABLES_LOOKUP	if ACPI
 	select ACPI_SYSTEM_POWER_STATES_SUPPORT	if ACPI
-	select ARCH_ATOMIC
 	select ARCH_32BIT_OFF_T			if X86_32
 	select ARCH_CLOCKSOURCE_INIT
 	select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 39bb9bdae6b13..2332b21569938 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -2,7 +2,6 @@
 config XTENSA
 	def_bool y
 	select ARCH_32BIT_OFF_T
-	select ARCH_ATOMIC
 	select ARCH_HAS_BINFMT_FLAT if !MMU
 	select ARCH_HAS_DMA_PREP_COHERENT if MMU
 	select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 649060fa0fe8e..04b8be9f1a77c 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -12,14 +12,6 @@
 #include <asm/cmpxchg.h>
 #include <asm/barrier.h>
 
-#ifdef CONFIG_ARCH_ATOMIC
-#define __ga_cmpxchg	arch_cmpxchg
-#define __ga_xchg	arch_xchg
-#else
-#define __ga_cmpxchg	cmpxchg
-#define __ga_xchg	xchg
-#endif
-
 #ifdef CONFIG_SMP
 
 /* we can build all atomic primitives from cmpxchg */
@@ -30,7 +22,7 @@ static inline void generic_atomic_##op(int i, atomic_t *v)		\
 	int c, old;							\
 									\
 	c = v->counter;							\
-	while ((old = __ga_cmpxchg(&v->counter, c, c c_op i)) != c)	\
+	while ((old = arch_cmpxchg(&v->counter, c, c c_op i)) != c)	\
 		c = old;						\
 }
 
@@ -40,7 +32,7 @@ static inline int generic_atomic_##op##_return(int i, atomic_t *v)	\
 	int c, old;							\
 									\
 	c = v->counter;							\
-	while ((old = __ga_cmpxchg(&v->counter, c, c c_op i)) != c)	\
+	while ((old = arch_cmpxchg(&v->counter, c, c c_op i)) != c)	\
 		c = old;						\
 									\
 	return c c_op i;						\
@@ -52,7 +44,7 @@ static inline int generic_atomic_fetch_##op(int i, atomic_t *v)		\
 	int c, old;							\
 									\
 	c = v->counter;							\
-	while ((old = __ga_cmpxchg(&v->counter, c, c c_op i)) != c)	\
+	while ((old = arch_cmpxchg(&v->counter, c, c c_op i)) != c)	\
 		c = old;						\
 									\
 	return c;							\
@@ -120,11 +112,6 @@ ATOMIC_OP(xor, ^)
 #undef ATOMIC_OP_RETURN
 #undef ATOMIC_OP
 
-#undef __ga_cmpxchg
-#undef __ga_xchg
-
-#ifdef CONFIG_ARCH_ATOMIC
-
 #define arch_atomic_add_return			generic_atomic_add_return
 #define arch_atomic_sub_return			generic_atomic_sub_return
 
@@ -146,29 +133,4 @@ ATOMIC_OP(xor, ^)
 #define arch_atomic_xchg(ptr, v)		(arch_xchg(&(ptr)->counter, (v)))
 #define arch_atomic_cmpxchg(v, old, new)	(arch_cmpxchg(&((v)->counter), (old), (new)))
 
-#else /* CONFIG_ARCH_ATOMIC */
-
-#define atomic_add_return		generic_atomic_add_return
-#define atomic_sub_return		generic_atomic_sub_return
-
-#define atomic_fetch_add		generic_atomic_fetch_add
-#define atomic_fetch_sub		generic_atomic_fetch_sub
-#define atomic_fetch_and		generic_atomic_fetch_and
-#define atomic_fetch_or			generic_atomic_fetch_or
-#define atomic_fetch_xor		generic_atomic_fetch_xor
-
-#define atomic_add			generic_atomic_add
-#define atomic_sub			generic_atomic_sub
-#define atomic_and			generic_atomic_and
-#define atomic_or			generic_atomic_or
-#define atomic_xor			generic_atomic_xor
-
-#define atomic_read(v)			READ_ONCE((v)->counter)
-#define atomic_set(v, i)		WRITE_ONCE(((v)->counter), (i))
-
-#define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
-#define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
-
-#endif /* CONFIG_ARCH_ATOMIC */
-
 #endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h
index c8c7d9fae8203..100d24b02e52d 100644
--- a/include/asm-generic/atomic64.h
+++ b/include/asm-generic/atomic64.h
@@ -49,8 +49,6 @@ extern s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n);
 extern s64 generic_atomic64_xchg(atomic64_t *v, s64 new);
 extern s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
 
-#ifdef CONFIG_ARCH_ATOMIC
-
 #define arch_atomic64_read		generic_atomic64_read
 #define arch_atomic64_set		generic_atomic64_set
 #define arch_atomic64_set_release	generic_atomic64_set
@@ -74,31 +72,4 @@ extern s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u);
 #define arch_atomic64_xchg		generic_atomic64_xchg
 #define arch_atomic64_fetch_add_unless	generic_atomic64_fetch_add_unless
 
-#else /* CONFIG_ARCH_ATOMIC */
-
-#define atomic64_read			generic_atomic64_read
-#define atomic64_set			generic_atomic64_set
-#define atomic64_set_release		generic_atomic64_set
-
-#define atomic64_add			generic_atomic64_add
-#define atomic64_add_return		generic_atomic64_add_return
-#define atomic64_fetch_add		generic_atomic64_fetch_add
-#define atomic64_sub			generic_atomic64_sub
-#define atomic64_sub_return		generic_atomic64_sub_return
-#define atomic64_fetch_sub		generic_atomic64_fetch_sub
-
-#define atomic64_and			generic_atomic64_and
-#define atomic64_fetch_and		generic_atomic64_fetch_and
-#define atomic64_or			generic_atomic64_or
-#define atomic64_fetch_or		generic_atomic64_fetch_or
-#define atomic64_xor			generic_atomic64_xor
-#define atomic64_fetch_xor		generic_atomic64_fetch_xor
-
-#define atomic64_dec_if_positive	generic_atomic64_dec_if_positive
-#define atomic64_cmpxchg		generic_atomic64_cmpxchg
-#define atomic64_xchg			generic_atomic64_xchg
-#define atomic64_fetch_add_unless	generic_atomic64_fetch_add_unless
-
-#endif /* CONFIG_ARCH_ATOMIC */
-
 #endif  /*  _ASM_GENERIC_ATOMIC64_H  */
diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h
index 98c9311990894..dca4419922a97 100644
--- a/include/asm-generic/cmpxchg.h
+++ b/include/asm-generic/cmpxchg.h
@@ -97,8 +97,6 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size)
 	__generic_cmpxchg64_local((ptr), (o), (n))
 
 
-#ifdef CONFIG_ARCH_ATOMIC
-
 #ifndef arch_xchg
 #define arch_xchg		generic_xchg
 #endif
@@ -114,23 +112,4 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size)
 #define arch_cmpxchg		arch_cmpxchg_local
 #define arch_cmpxchg64		arch_cmpxchg64_local
 
-#else /* CONFIG_ARCH_ATOMIC */
-
-#ifndef xchg
-#define xchg			generic_xchg
-#endif
-
-#ifndef cmpxchg_local
-#define cmpxchg_local		generic_cmpxchg_local
-#endif
-
-#ifndef cmpxchg64_local
-#define cmpxchg64_local		generic_cmpxchg64_local
-#endif
-
-#define cmpxchg			cmpxchg_local
-#define cmpxchg64		cmpxchg64_local
-
-#endif /* CONFIG_ARCH_ATOMIC */
-
 #endif /* __ASM_GENERIC_CMPXCHG_H */
diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h
deleted file mode 100644
index 2a3f55d98be9d..0000000000000
--- a/include/linux/atomic-fallback.h
+++ /dev/null
@@ -1,2595 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// Generated by scripts/atomic/gen-atomic-fallback.sh
-// DO NOT MODIFY THIS FILE DIRECTLY
-
-#ifndef _LINUX_ATOMIC_FALLBACK_H
-#define _LINUX_ATOMIC_FALLBACK_H
-
-#include <linux/compiler.h>
-
-#ifndef xchg_relaxed
-#define xchg_acquire xchg
-#define xchg_release xchg
-#define xchg_relaxed xchg
-#else /* xchg_relaxed */
-
-#ifndef xchg_acquire
-#define xchg_acquire(...) \
-	__atomic_op_acquire(xchg, __VA_ARGS__)
-#endif
-
-#ifndef xchg_release
-#define xchg_release(...) \
-	__atomic_op_release(xchg, __VA_ARGS__)
-#endif
-
-#ifndef xchg
-#define xchg(...) \
-	__atomic_op_fence(xchg, __VA_ARGS__)
-#endif
-
-#endif /* xchg_relaxed */
-
-#ifndef cmpxchg_relaxed
-#define cmpxchg_acquire cmpxchg
-#define cmpxchg_release cmpxchg
-#define cmpxchg_relaxed cmpxchg
-#else /* cmpxchg_relaxed */
-
-#ifndef cmpxchg_acquire
-#define cmpxchg_acquire(...) \
-	__atomic_op_acquire(cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef cmpxchg_release
-#define cmpxchg_release(...) \
-	__atomic_op_release(cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef cmpxchg
-#define cmpxchg(...) \
-	__atomic_op_fence(cmpxchg, __VA_ARGS__)
-#endif
-
-#endif /* cmpxchg_relaxed */
-
-#ifndef cmpxchg64_relaxed
-#define cmpxchg64_acquire cmpxchg64
-#define cmpxchg64_release cmpxchg64
-#define cmpxchg64_relaxed cmpxchg64
-#else /* cmpxchg64_relaxed */
-
-#ifndef cmpxchg64_acquire
-#define cmpxchg64_acquire(...) \
-	__atomic_op_acquire(cmpxchg64, __VA_ARGS__)
-#endif
-
-#ifndef cmpxchg64_release
-#define cmpxchg64_release(...) \
-	__atomic_op_release(cmpxchg64, __VA_ARGS__)
-#endif
-
-#ifndef cmpxchg64
-#define cmpxchg64(...) \
-	__atomic_op_fence(cmpxchg64, __VA_ARGS__)
-#endif
-
-#endif /* cmpxchg64_relaxed */
-
-#ifndef try_cmpxchg_relaxed
-#ifdef try_cmpxchg
-#define try_cmpxchg_acquire try_cmpxchg
-#define try_cmpxchg_release try_cmpxchg
-#define try_cmpxchg_relaxed try_cmpxchg
-#endif /* try_cmpxchg */
-
-#ifndef try_cmpxchg
-#define try_cmpxchg(_ptr, _oldp, _new) \
-({ \
-	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = cmpxchg((_ptr), ___o, (_new)); \
-	if (unlikely(___r != ___o)) \
-		*___op = ___r; \
-	likely(___r == ___o); \
-})
-#endif /* try_cmpxchg */
-
-#ifndef try_cmpxchg_acquire
-#define try_cmpxchg_acquire(_ptr, _oldp, _new) \
-({ \
-	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = cmpxchg_acquire((_ptr), ___o, (_new)); \
-	if (unlikely(___r != ___o)) \
-		*___op = ___r; \
-	likely(___r == ___o); \
-})
-#endif /* try_cmpxchg_acquire */
-
-#ifndef try_cmpxchg_release
-#define try_cmpxchg_release(_ptr, _oldp, _new) \
-({ \
-	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = cmpxchg_release((_ptr), ___o, (_new)); \
-	if (unlikely(___r != ___o)) \
-		*___op = ___r; \
-	likely(___r == ___o); \
-})
-#endif /* try_cmpxchg_release */
-
-#ifndef try_cmpxchg_relaxed
-#define try_cmpxchg_relaxed(_ptr, _oldp, _new) \
-({ \
-	typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
-	___r = cmpxchg_relaxed((_ptr), ___o, (_new)); \
-	if (unlikely(___r != ___o)) \
-		*___op = ___r; \
-	likely(___r == ___o); \
-})
-#endif /* try_cmpxchg_relaxed */
-
-#else /* try_cmpxchg_relaxed */
-
-#ifndef try_cmpxchg_acquire
-#define try_cmpxchg_acquire(...) \
-	__atomic_op_acquire(try_cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef try_cmpxchg_release
-#define try_cmpxchg_release(...) \
-	__atomic_op_release(try_cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef try_cmpxchg
-#define try_cmpxchg(...) \
-	__atomic_op_fence(try_cmpxchg, __VA_ARGS__)
-#endif
-
-#endif /* try_cmpxchg_relaxed */
-
-#define arch_atomic_read atomic_read
-#define arch_atomic_read_acquire atomic_read_acquire
-
-#ifndef atomic_read_acquire
-static __always_inline int
-atomic_read_acquire(const atomic_t *v)
-{
-	return smp_load_acquire(&(v)->counter);
-}
-#define atomic_read_acquire atomic_read_acquire
-#endif
-
-#define arch_atomic_set atomic_set
-#define arch_atomic_set_release atomic_set_release
-
-#ifndef atomic_set_release
-static __always_inline void
-atomic_set_release(atomic_t *v, int i)
-{
-	smp_store_release(&(v)->counter, i);
-}
-#define atomic_set_release atomic_set_release
-#endif
-
-#define arch_atomic_add atomic_add
-
-#define arch_atomic_add_return atomic_add_return
-#define arch_atomic_add_return_acquire atomic_add_return_acquire
-#define arch_atomic_add_return_release atomic_add_return_release
-#define arch_atomic_add_return_relaxed atomic_add_return_relaxed
-
-#ifndef atomic_add_return_relaxed
-#define atomic_add_return_acquire atomic_add_return
-#define atomic_add_return_release atomic_add_return
-#define atomic_add_return_relaxed atomic_add_return
-#else /* atomic_add_return_relaxed */
-
-#ifndef atomic_add_return_acquire
-static __always_inline int
-atomic_add_return_acquire(int i, atomic_t *v)
-{
-	int ret = atomic_add_return_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_add_return_acquire atomic_add_return_acquire
-#endif
-
-#ifndef atomic_add_return_release
-static __always_inline int
-atomic_add_return_release(int i, atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_add_return_relaxed(i, v);
-}
-#define atomic_add_return_release atomic_add_return_release
-#endif
-
-#ifndef atomic_add_return
-static __always_inline int
-atomic_add_return(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_add_return_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_add_return atomic_add_return
-#endif
-
-#endif /* atomic_add_return_relaxed */
-
-#define arch_atomic_fetch_add atomic_fetch_add
-#define arch_atomic_fetch_add_acquire atomic_fetch_add_acquire
-#define arch_atomic_fetch_add_release atomic_fetch_add_release
-#define arch_atomic_fetch_add_relaxed atomic_fetch_add_relaxed
-
-#ifndef atomic_fetch_add_relaxed
-#define atomic_fetch_add_acquire atomic_fetch_add
-#define atomic_fetch_add_release atomic_fetch_add
-#define atomic_fetch_add_relaxed atomic_fetch_add
-#else /* atomic_fetch_add_relaxed */
-
-#ifndef atomic_fetch_add_acquire
-static __always_inline int
-atomic_fetch_add_acquire(int i, atomic_t *v)
-{
-	int ret = atomic_fetch_add_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_fetch_add_acquire atomic_fetch_add_acquire
-#endif
-
-#ifndef atomic_fetch_add_release
-static __always_inline int
-atomic_fetch_add_release(int i, atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_fetch_add_relaxed(i, v);
-}
-#define atomic_fetch_add_release atomic_fetch_add_release
-#endif
-
-#ifndef atomic_fetch_add
-static __always_inline int
-atomic_fetch_add(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_fetch_add_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_fetch_add atomic_fetch_add
-#endif
-
-#endif /* atomic_fetch_add_relaxed */
-
-#define arch_atomic_sub atomic_sub
-
-#define arch_atomic_sub_return atomic_sub_return
-#define arch_atomic_sub_return_acquire atomic_sub_return_acquire
-#define arch_atomic_sub_return_release atomic_sub_return_release
-#define arch_atomic_sub_return_relaxed atomic_sub_return_relaxed
-
-#ifndef atomic_sub_return_relaxed
-#define atomic_sub_return_acquire atomic_sub_return
-#define atomic_sub_return_release atomic_sub_return
-#define atomic_sub_return_relaxed atomic_sub_return
-#else /* atomic_sub_return_relaxed */
-
-#ifndef atomic_sub_return_acquire
-static __always_inline int
-atomic_sub_return_acquire(int i, atomic_t *v)
-{
-	int ret = atomic_sub_return_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_sub_return_acquire atomic_sub_return_acquire
-#endif
-
-#ifndef atomic_sub_return_release
-static __always_inline int
-atomic_sub_return_release(int i, atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_sub_return_relaxed(i, v);
-}
-#define atomic_sub_return_release atomic_sub_return_release
-#endif
-
-#ifndef atomic_sub_return
-static __always_inline int
-atomic_sub_return(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_sub_return_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_sub_return atomic_sub_return
-#endif
-
-#endif /* atomic_sub_return_relaxed */
-
-#define arch_atomic_fetch_sub atomic_fetch_sub
-#define arch_atomic_fetch_sub_acquire atomic_fetch_sub_acquire
-#define arch_atomic_fetch_sub_release atomic_fetch_sub_release
-#define arch_atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
-
-#ifndef atomic_fetch_sub_relaxed
-#define atomic_fetch_sub_acquire atomic_fetch_sub
-#define atomic_fetch_sub_release atomic_fetch_sub
-#define atomic_fetch_sub_relaxed atomic_fetch_sub
-#else /* atomic_fetch_sub_relaxed */
-
-#ifndef atomic_fetch_sub_acquire
-static __always_inline int
-atomic_fetch_sub_acquire(int i, atomic_t *v)
-{
-	int ret = atomic_fetch_sub_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire
-#endif
-
-#ifndef atomic_fetch_sub_release
-static __always_inline int
-atomic_fetch_sub_release(int i, atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_fetch_sub_relaxed(i, v);
-}
-#define atomic_fetch_sub_release atomic_fetch_sub_release
-#endif
-
-#ifndef atomic_fetch_sub
-static __always_inline int
-atomic_fetch_sub(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_fetch_sub_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_fetch_sub atomic_fetch_sub
-#endif
-
-#endif /* atomic_fetch_sub_relaxed */
-
-#define arch_atomic_inc atomic_inc
-
-#ifndef atomic_inc
-static __always_inline void
-atomic_inc(atomic_t *v)
-{
-	atomic_add(1, v);
-}
-#define atomic_inc atomic_inc
-#endif
-
-#define arch_atomic_inc_return atomic_inc_return
-#define arch_atomic_inc_return_acquire atomic_inc_return_acquire
-#define arch_atomic_inc_return_release atomic_inc_return_release
-#define arch_atomic_inc_return_relaxed atomic_inc_return_relaxed
-
-#ifndef atomic_inc_return_relaxed
-#ifdef atomic_inc_return
-#define atomic_inc_return_acquire atomic_inc_return
-#define atomic_inc_return_release atomic_inc_return
-#define atomic_inc_return_relaxed atomic_inc_return
-#endif /* atomic_inc_return */
-
-#ifndef atomic_inc_return
-static __always_inline int
-atomic_inc_return(atomic_t *v)
-{
-	return atomic_add_return(1, v);
-}
-#define atomic_inc_return atomic_inc_return
-#endif
-
-#ifndef atomic_inc_return_acquire
-static __always_inline int
-atomic_inc_return_acquire(atomic_t *v)
-{
-	return atomic_add_return_acquire(1, v);
-}
-#define atomic_inc_return_acquire atomic_inc_return_acquire
-#endif
-
-#ifndef atomic_inc_return_release
-static __always_inline int
-atomic_inc_return_release(atomic_t *v)
-{
-	return atomic_add_return_release(1, v);
-}
-#define atomic_inc_return_release atomic_inc_return_release
-#endif
-
-#ifndef atomic_inc_return_relaxed
-static __always_inline int
-atomic_inc_return_relaxed(atomic_t *v)
-{
-	return atomic_add_return_relaxed(1, v);
-}
-#define atomic_inc_return_relaxed atomic_inc_return_relaxed
-#endif
-
-#else /* atomic_inc_return_relaxed */
-
-#ifndef atomic_inc_return_acquire
-static __always_inline int
-atomic_inc_return_acquire(atomic_t *v)
-{
-	int ret = atomic_inc_return_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_inc_return_acquire atomic_inc_return_acquire
-#endif
-
-#ifndef atomic_inc_return_release
-static __always_inline int
-atomic_inc_return_release(atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_inc_return_relaxed(v);
-}
-#define atomic_inc_return_release atomic_inc_return_release
-#endif
-
-#ifndef atomic_inc_return
-static __always_inline int
-atomic_inc_return(atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_inc_return_relaxed(v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_inc_return atomic_inc_return
-#endif
-
-#endif /* atomic_inc_return_relaxed */
-
-#define arch_atomic_fetch_inc atomic_fetch_inc
-#define arch_atomic_fetch_inc_acquire atomic_fetch_inc_acquire
-#define arch_atomic_fetch_inc_release atomic_fetch_inc_release
-#define arch_atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed
-
-#ifndef atomic_fetch_inc_relaxed
-#ifdef atomic_fetch_inc
-#define atomic_fetch_inc_acquire atomic_fetch_inc
-#define atomic_fetch_inc_release atomic_fetch_inc
-#define atomic_fetch_inc_relaxed atomic_fetch_inc
-#endif /* atomic_fetch_inc */
-
-#ifndef atomic_fetch_inc
-static __always_inline int
-atomic_fetch_inc(atomic_t *v)
-{
-	return atomic_fetch_add(1, v);
-}
-#define atomic_fetch_inc atomic_fetch_inc
-#endif
-
-#ifndef atomic_fetch_inc_acquire
-static __always_inline int
-atomic_fetch_inc_acquire(atomic_t *v)
-{
-	return atomic_fetch_add_acquire(1, v);
-}
-#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire
-#endif
-
-#ifndef atomic_fetch_inc_release
-static __always_inline int
-atomic_fetch_inc_release(atomic_t *v)
-{
-	return atomic_fetch_add_release(1, v);
-}
-#define atomic_fetch_inc_release atomic_fetch_inc_release
-#endif
-
-#ifndef atomic_fetch_inc_relaxed
-static __always_inline int
-atomic_fetch_inc_relaxed(atomic_t *v)
-{
-	return atomic_fetch_add_relaxed(1, v);
-}
-#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed
-#endif
-
-#else /* atomic_fetch_inc_relaxed */
-
-#ifndef atomic_fetch_inc_acquire
-static __always_inline int
-atomic_fetch_inc_acquire(atomic_t *v)
-{
-	int ret = atomic_fetch_inc_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire
-#endif
-
-#ifndef atomic_fetch_inc_release
-static __always_inline int
-atomic_fetch_inc_release(atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_fetch_inc_relaxed(v);
-}
-#define atomic_fetch_inc_release atomic_fetch_inc_release
-#endif
-
-#ifndef atomic_fetch_inc
-static __always_inline int
-atomic_fetch_inc(atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_fetch_inc_relaxed(v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_fetch_inc atomic_fetch_inc
-#endif
-
-#endif /* atomic_fetch_inc_relaxed */
-
-#define arch_atomic_dec atomic_dec
-
-#ifndef atomic_dec
-static __always_inline void
-atomic_dec(atomic_t *v)
-{
-	atomic_sub(1, v);
-}
-#define atomic_dec atomic_dec
-#endif
-
-#define arch_atomic_dec_return atomic_dec_return
-#define arch_atomic_dec_return_acquire atomic_dec_return_acquire
-#define arch_atomic_dec_return_release atomic_dec_return_release
-#define arch_atomic_dec_return_relaxed atomic_dec_return_relaxed
-
-#ifndef atomic_dec_return_relaxed
-#ifdef atomic_dec_return
-#define atomic_dec_return_acquire atomic_dec_return
-#define atomic_dec_return_release atomic_dec_return
-#define atomic_dec_return_relaxed atomic_dec_return
-#endif /* atomic_dec_return */
-
-#ifndef atomic_dec_return
-static __always_inline int
-atomic_dec_return(atomic_t *v)
-{
-	return atomic_sub_return(1, v);
-}
-#define atomic_dec_return atomic_dec_return
-#endif
-
-#ifndef atomic_dec_return_acquire
-static __always_inline int
-atomic_dec_return_acquire(atomic_t *v)
-{
-	return atomic_sub_return_acquire(1, v);
-}
-#define atomic_dec_return_acquire atomic_dec_return_acquire
-#endif
-
-#ifndef atomic_dec_return_release
-static __always_inline int
-atomic_dec_return_release(atomic_t *v)
-{
-	return atomic_sub_return_release(1, v);
-}
-#define atomic_dec_return_release atomic_dec_return_release
-#endif
-
-#ifndef atomic_dec_return_relaxed
-static __always_inline int
-atomic_dec_return_relaxed(atomic_t *v)
-{
-	return atomic_sub_return_relaxed(1, v);
-}
-#define atomic_dec_return_relaxed atomic_dec_return_relaxed
-#endif
-
-#else /* atomic_dec_return_relaxed */
-
-#ifndef atomic_dec_return_acquire
-static __always_inline int
-atomic_dec_return_acquire(atomic_t *v)
-{
-	int ret = atomic_dec_return_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_dec_return_acquire atomic_dec_return_acquire
-#endif
-
-#ifndef atomic_dec_return_release
-static __always_inline int
-atomic_dec_return_release(atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_dec_return_relaxed(v);
-}
-#define atomic_dec_return_release atomic_dec_return_release
-#endif
-
-#ifndef atomic_dec_return
-static __always_inline int
-atomic_dec_return(atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_dec_return_relaxed(v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_dec_return atomic_dec_return
-#endif
-
-#endif /* atomic_dec_return_relaxed */
-
-#define arch_atomic_fetch_dec atomic_fetch_dec
-#define arch_atomic_fetch_dec_acquire atomic_fetch_dec_acquire
-#define arch_atomic_fetch_dec_release atomic_fetch_dec_release
-#define arch_atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
-
-#ifndef atomic_fetch_dec_relaxed
-#ifdef atomic_fetch_dec
-#define atomic_fetch_dec_acquire atomic_fetch_dec
-#define atomic_fetch_dec_release atomic_fetch_dec
-#define atomic_fetch_dec_relaxed atomic_fetch_dec
-#endif /* atomic_fetch_dec */
-
-#ifndef atomic_fetch_dec
-static __always_inline int
-atomic_fetch_dec(atomic_t *v)
-{
-	return atomic_fetch_sub(1, v);
-}
-#define atomic_fetch_dec atomic_fetch_dec
-#endif
-
-#ifndef atomic_fetch_dec_acquire
-static __always_inline int
-atomic_fetch_dec_acquire(atomic_t *v)
-{
-	return atomic_fetch_sub_acquire(1, v);
-}
-#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire
-#endif
-
-#ifndef atomic_fetch_dec_release
-static __always_inline int
-atomic_fetch_dec_release(atomic_t *v)
-{
-	return atomic_fetch_sub_release(1, v);
-}
-#define atomic_fetch_dec_release atomic_fetch_dec_release
-#endif
-
-#ifndef atomic_fetch_dec_relaxed
-static __always_inline int
-atomic_fetch_dec_relaxed(atomic_t *v)
-{
-	return atomic_fetch_sub_relaxed(1, v);
-}
-#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
-#endif
-
-#else /* atomic_fetch_dec_relaxed */
-
-#ifndef atomic_fetch_dec_acquire
-static __always_inline int
-atomic_fetch_dec_acquire(atomic_t *v)
-{
-	int ret = atomic_fetch_dec_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire
-#endif
-
-#ifndef atomic_fetch_dec_release
-static __always_inline int
-atomic_fetch_dec_release(atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_fetch_dec_relaxed(v);
-}
-#define atomic_fetch_dec_release atomic_fetch_dec_release
-#endif
-
-#ifndef atomic_fetch_dec
-static __always_inline int
-atomic_fetch_dec(atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_fetch_dec_relaxed(v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_fetch_dec atomic_fetch_dec
-#endif
-
-#endif /* atomic_fetch_dec_relaxed */
-
-#define arch_atomic_and atomic_and
-
-#define arch_atomic_fetch_and atomic_fetch_and
-#define arch_atomic_fetch_and_acquire atomic_fetch_and_acquire
-#define arch_atomic_fetch_and_release atomic_fetch_and_release
-#define arch_atomic_fetch_and_relaxed atomic_fetch_and_relaxed
-
-#ifndef atomic_fetch_and_relaxed
-#define atomic_fetch_and_acquire atomic_fetch_and
-#define atomic_fetch_and_release atomic_fetch_and
-#define atomic_fetch_and_relaxed atomic_fetch_and
-#else /* atomic_fetch_and_relaxed */
-
-#ifndef atomic_fetch_and_acquire
-static __always_inline int
-atomic_fetch_and_acquire(int i, atomic_t *v)
-{
-	int ret = atomic_fetch_and_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_fetch_and_acquire atomic_fetch_and_acquire
-#endif
-
-#ifndef atomic_fetch_and_release
-static __always_inline int
-atomic_fetch_and_release(int i, atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_fetch_and_relaxed(i, v);
-}
-#define atomic_fetch_and_release atomic_fetch_and_release
-#endif
-
-#ifndef atomic_fetch_and
-static __always_inline int
-atomic_fetch_and(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_fetch_and_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_fetch_and atomic_fetch_and
-#endif
-
-#endif /* atomic_fetch_and_relaxed */
-
-#define arch_atomic_andnot atomic_andnot
-
-#ifndef atomic_andnot
-static __always_inline void
-atomic_andnot(int i, atomic_t *v)
-{
-	atomic_and(~i, v);
-}
-#define atomic_andnot atomic_andnot
-#endif
-
-#define arch_atomic_fetch_andnot atomic_fetch_andnot
-#define arch_atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
-#define arch_atomic_fetch_andnot_release atomic_fetch_andnot_release
-#define arch_atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
-
-#ifndef atomic_fetch_andnot_relaxed
-#ifdef atomic_fetch_andnot
-#define atomic_fetch_andnot_acquire atomic_fetch_andnot
-#define atomic_fetch_andnot_release atomic_fetch_andnot
-#define atomic_fetch_andnot_relaxed atomic_fetch_andnot
-#endif /* atomic_fetch_andnot */
-
-#ifndef atomic_fetch_andnot
-static __always_inline int
-atomic_fetch_andnot(int i, atomic_t *v)
-{
-	return atomic_fetch_and(~i, v);
-}
-#define atomic_fetch_andnot atomic_fetch_andnot
-#endif
-
-#ifndef atomic_fetch_andnot_acquire
-static __always_inline int
-atomic_fetch_andnot_acquire(int i, atomic_t *v)
-{
-	return atomic_fetch_and_acquire(~i, v);
-}
-#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
-#endif
-
-#ifndef atomic_fetch_andnot_release
-static __always_inline int
-atomic_fetch_andnot_release(int i, atomic_t *v)
-{
-	return atomic_fetch_and_release(~i, v);
-}
-#define atomic_fetch_andnot_release atomic_fetch_andnot_release
-#endif
-
-#ifndef atomic_fetch_andnot_relaxed
-static __always_inline int
-atomic_fetch_andnot_relaxed(int i, atomic_t *v)
-{
-	return atomic_fetch_and_relaxed(~i, v);
-}
-#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
-#endif
-
-#else /* atomic_fetch_andnot_relaxed */
-
-#ifndef atomic_fetch_andnot_acquire
-static __always_inline int
-atomic_fetch_andnot_acquire(int i, atomic_t *v)
-{
-	int ret = atomic_fetch_andnot_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
-#endif
-
-#ifndef atomic_fetch_andnot_release
-static __always_inline int
-atomic_fetch_andnot_release(int i, atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_fetch_andnot_relaxed(i, v);
-}
-#define atomic_fetch_andnot_release atomic_fetch_andnot_release
-#endif
-
-#ifndef atomic_fetch_andnot
-static __always_inline int
-atomic_fetch_andnot(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_fetch_andnot_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_fetch_andnot atomic_fetch_andnot
-#endif
-
-#endif /* atomic_fetch_andnot_relaxed */
-
-#define arch_atomic_or atomic_or
-
-#define arch_atomic_fetch_or atomic_fetch_or
-#define arch_atomic_fetch_or_acquire atomic_fetch_or_acquire
-#define arch_atomic_fetch_or_release atomic_fetch_or_release
-#define arch_atomic_fetch_or_relaxed atomic_fetch_or_relaxed
-
-#ifndef atomic_fetch_or_relaxed
-#define atomic_fetch_or_acquire atomic_fetch_or
-#define atomic_fetch_or_release atomic_fetch_or
-#define atomic_fetch_or_relaxed atomic_fetch_or
-#else /* atomic_fetch_or_relaxed */
-
-#ifndef atomic_fetch_or_acquire
-static __always_inline int
-atomic_fetch_or_acquire(int i, atomic_t *v)
-{
-	int ret = atomic_fetch_or_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_fetch_or_acquire atomic_fetch_or_acquire
-#endif
-
-#ifndef atomic_fetch_or_release
-static __always_inline int
-atomic_fetch_or_release(int i, atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_fetch_or_relaxed(i, v);
-}
-#define atomic_fetch_or_release atomic_fetch_or_release
-#endif
-
-#ifndef atomic_fetch_or
-static __always_inline int
-atomic_fetch_or(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_fetch_or_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_fetch_or atomic_fetch_or
-#endif
-
-#endif /* atomic_fetch_or_relaxed */
-
-#define arch_atomic_xor atomic_xor
-
-#define arch_atomic_fetch_xor atomic_fetch_xor
-#define arch_atomic_fetch_xor_acquire atomic_fetch_xor_acquire
-#define arch_atomic_fetch_xor_release atomic_fetch_xor_release
-#define arch_atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
-
-#ifndef atomic_fetch_xor_relaxed
-#define atomic_fetch_xor_acquire atomic_fetch_xor
-#define atomic_fetch_xor_release atomic_fetch_xor
-#define atomic_fetch_xor_relaxed atomic_fetch_xor
-#else /* atomic_fetch_xor_relaxed */
-
-#ifndef atomic_fetch_xor_acquire
-static __always_inline int
-atomic_fetch_xor_acquire(int i, atomic_t *v)
-{
-	int ret = atomic_fetch_xor_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire
-#endif
-
-#ifndef atomic_fetch_xor_release
-static __always_inline int
-atomic_fetch_xor_release(int i, atomic_t *v)
-{
-	__atomic_release_fence();
-	return atomic_fetch_xor_relaxed(i, v);
-}
-#define atomic_fetch_xor_release atomic_fetch_xor_release
-#endif
-
-#ifndef atomic_fetch_xor
-static __always_inline int
-atomic_fetch_xor(int i, atomic_t *v)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_fetch_xor_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_fetch_xor atomic_fetch_xor
-#endif
-
-#endif /* atomic_fetch_xor_relaxed */
-
-#define arch_atomic_xchg atomic_xchg
-#define arch_atomic_xchg_acquire atomic_xchg_acquire
-#define arch_atomic_xchg_release atomic_xchg_release
-#define arch_atomic_xchg_relaxed atomic_xchg_relaxed
-
-#ifndef atomic_xchg_relaxed
-#define atomic_xchg_acquire atomic_xchg
-#define atomic_xchg_release atomic_xchg
-#define atomic_xchg_relaxed atomic_xchg
-#else /* atomic_xchg_relaxed */
-
-#ifndef atomic_xchg_acquire
-static __always_inline int
-atomic_xchg_acquire(atomic_t *v, int i)
-{
-	int ret = atomic_xchg_relaxed(v, i);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_xchg_acquire atomic_xchg_acquire
-#endif
-
-#ifndef atomic_xchg_release
-static __always_inline int
-atomic_xchg_release(atomic_t *v, int i)
-{
-	__atomic_release_fence();
-	return atomic_xchg_relaxed(v, i);
-}
-#define atomic_xchg_release atomic_xchg_release
-#endif
-
-#ifndef atomic_xchg
-static __always_inline int
-atomic_xchg(atomic_t *v, int i)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_xchg_relaxed(v, i);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_xchg atomic_xchg
-#endif
-
-#endif /* atomic_xchg_relaxed */
-
-#define arch_atomic_cmpxchg atomic_cmpxchg
-#define arch_atomic_cmpxchg_acquire atomic_cmpxchg_acquire
-#define arch_atomic_cmpxchg_release atomic_cmpxchg_release
-#define arch_atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed
-
-#ifndef atomic_cmpxchg_relaxed
-#define atomic_cmpxchg_acquire atomic_cmpxchg
-#define atomic_cmpxchg_release atomic_cmpxchg
-#define atomic_cmpxchg_relaxed atomic_cmpxchg
-#else /* atomic_cmpxchg_relaxed */
-
-#ifndef atomic_cmpxchg_acquire
-static __always_inline int
-atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
-{
-	int ret = atomic_cmpxchg_relaxed(v, old, new);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_cmpxchg_acquire atomic_cmpxchg_acquire
-#endif
-
-#ifndef atomic_cmpxchg_release
-static __always_inline int
-atomic_cmpxchg_release(atomic_t *v, int old, int new)
-{
-	__atomic_release_fence();
-	return atomic_cmpxchg_relaxed(v, old, new);
-}
-#define atomic_cmpxchg_release atomic_cmpxchg_release
-#endif
-
-#ifndef atomic_cmpxchg
-static __always_inline int
-atomic_cmpxchg(atomic_t *v, int old, int new)
-{
-	int ret;
-	__atomic_pre_full_fence();
-	ret = atomic_cmpxchg_relaxed(v, old, new);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_cmpxchg atomic_cmpxchg
-#endif
-
-#endif /* atomic_cmpxchg_relaxed */
-
-#define arch_atomic_try_cmpxchg atomic_try_cmpxchg
-#define arch_atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire
-#define arch_atomic_try_cmpxchg_release atomic_try_cmpxchg_release
-#define arch_atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed
-
-#ifndef atomic_try_cmpxchg_relaxed
-#ifdef atomic_try_cmpxchg
-#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg
-#define atomic_try_cmpxchg_release atomic_try_cmpxchg
-#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg
-#endif /* atomic_try_cmpxchg */
-
-#ifndef atomic_try_cmpxchg
-static __always_inline bool
-atomic_try_cmpxchg(atomic_t *v, int *old, int new)
-{
-	int r, o = *old;
-	r = atomic_cmpxchg(v, o, new);
-	if (unlikely(r != o))
-		*old = r;
-	return likely(r == o);
-}
-#define atomic_try_cmpxchg atomic_try_cmpxchg
-#endif
-
-#ifndef atomic_try_cmpxchg_acquire
-static __always_inline bool
-atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
-{
-	int r, o = *old;
-	r = atomic_cmpxchg_acquire(v, o, new);
-	if (unlikely(r != o))
-		*old = r;
-	return likely(r == o);
-}
-#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire
-#endif
-
-#ifndef atomic_try_cmpxchg_release
-static __always_inline bool
-atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
-{
-	int r, o = *old;
-	r = atomic_cmpxchg_release(v, o, new);
-	if (unlikely(r != o))
-		*old = r;
-	return likely(r == o);
-}
-#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release
-#endif
-
-#ifndef atomic_try_cmpxchg_relaxed
-static __always_inline bool
-atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
-{
-	int r, o = *old;
-	r = atomic_cmpxchg_relaxed(v, o, new);
-	if (unlikely(r != o))
-		*old = r;
-	return likely(r == o);
-}
-#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed
-#endif
-
-#else /* atomic_try_cmpxchg_relaxed */
-
-#ifndef atomic_try_cmpxchg_acquire
-static __always_inline bool
-atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
-{
-	bool ret = atomic_try_cmpxchg_relaxed(v, old, new);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire
-#endif
-
-#ifndef atomic_try_cmpxchg_release
-static __always_inline bool
-atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
-{
-	__atomic_release_fence();
-	return atomic_try_cmpxchg_relaxed(v, old, new);
-}
-#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release
-#endif
-
-#ifndef atomic_try_cmpxchg
-static __always_inline bool
-atomic_try_cmpxchg(atomic_t *v, int *old, int new)
-{
-	bool ret;
-	__atomic_pre_full_fence();
-	ret = atomic_try_cmpxchg_relaxed(v, old, new);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic_try_cmpxchg atomic_try_cmpxchg
-#endif
-
-#endif /* atomic_try_cmpxchg_relaxed */
-
-#define arch_atomic_sub_and_test atomic_sub_and_test
-
-#ifndef atomic_sub_and_test
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static __always_inline bool
-atomic_sub_and_test(int i, atomic_t *v)
-{
-	return atomic_sub_return(i, v) == 0;
-}
-#define atomic_sub_and_test atomic_sub_and_test
-#endif
-
-#define arch_atomic_dec_and_test atomic_dec_and_test
-
-#ifndef atomic_dec_and_test
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static __always_inline bool
-atomic_dec_and_test(atomic_t *v)
-{
-	return atomic_dec_return(v) == 0;
-}
-#define atomic_dec_and_test atomic_dec_and_test
-#endif
-
-#define arch_atomic_inc_and_test atomic_inc_and_test
-
-#ifndef atomic_inc_and_test
-/**
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static __always_inline bool
-atomic_inc_and_test(atomic_t *v)
-{
-	return atomic_inc_return(v) == 0;
-}
-#define atomic_inc_and_test atomic_inc_and_test
-#endif
-
-#define arch_atomic_add_negative atomic_add_negative
-
-#ifndef atomic_add_negative
-/**
- * atomic_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static __always_inline bool
-atomic_add_negative(int i, atomic_t *v)
-{
-	return atomic_add_return(i, v) < 0;
-}
-#define atomic_add_negative atomic_add_negative
-#endif
-
-#define arch_atomic_fetch_add_unless atomic_fetch_add_unless
-
-#ifndef atomic_fetch_add_unless
-/**
- * atomic_fetch_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns original value of @v
- */
-static __always_inline int
-atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c = atomic_read(v);
-
-	do {
-		if (unlikely(c == u))
-			break;
-	} while (!atomic_try_cmpxchg(v, &c, c + a));
-
-	return c;
-}
-#define atomic_fetch_add_unless atomic_fetch_add_unless
-#endif
-
-#define arch_atomic_add_unless atomic_add_unless
-
-#ifndef atomic_add_unless
-/**
- * atomic_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if @v was not already @u.
- * Returns true if the addition was done.
- */
-static __always_inline bool
-atomic_add_unless(atomic_t *v, int a, int u)
-{
-	return atomic_fetch_add_unless(v, a, u) != u;
-}
-#define atomic_add_unless atomic_add_unless
-#endif
-
-#define arch_atomic_inc_not_zero atomic_inc_not_zero
-
-#ifndef atomic_inc_not_zero
-/**
- * atomic_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1, if @v is non-zero.
- * Returns true if the increment was done.
- */
-static __always_inline bool
-atomic_inc_not_zero(atomic_t *v)
-{
-	return atomic_add_unless(v, 1, 0);
-}
-#define atomic_inc_not_zero atomic_inc_not_zero
-#endif
-
-#define arch_atomic_inc_unless_negative atomic_inc_unless_negative
-
-#ifndef atomic_inc_unless_negative
-static __always_inline bool
-atomic_inc_unless_negative(atomic_t *v)
-{
-	int c = atomic_read(v);
-
-	do {
-		if (unlikely(c < 0))
-			return false;
-	} while (!atomic_try_cmpxchg(v, &c, c + 1));
-
-	return true;
-}
-#define atomic_inc_unless_negative atomic_inc_unless_negative
-#endif
-
-#define arch_atomic_dec_unless_positive atomic_dec_unless_positive
-
-#ifndef atomic_dec_unless_positive
-static __always_inline bool
-atomic_dec_unless_positive(atomic_t *v)
-{
-	int c = atomic_read(v);
-
-	do {
-		if (unlikely(c > 0))
-			return false;
-	} while (!atomic_try_cmpxchg(v, &c, c - 1));
-
-	return true;
-}
-#define atomic_dec_unless_positive atomic_dec_unless_positive
-#endif
-
-#define arch_atomic_dec_if_positive atomic_dec_if_positive
-
-#ifndef atomic_dec_if_positive
-static __always_inline int
-atomic_dec_if_positive(atomic_t *v)
-{
-	int dec, c = atomic_read(v);
-
-	do {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-	} while (!atomic_try_cmpxchg(v, &c, dec));
-
-	return dec;
-}
-#define atomic_dec_if_positive atomic_dec_if_positive
-#endif
-
-#ifdef CONFIG_GENERIC_ATOMIC64
-#include <asm-generic/atomic64.h>
-#endif
-
-#define arch_atomic64_read atomic64_read
-#define arch_atomic64_read_acquire atomic64_read_acquire
-
-#ifndef atomic64_read_acquire
-static __always_inline s64
-atomic64_read_acquire(const atomic64_t *v)
-{
-	return smp_load_acquire(&(v)->counter);
-}
-#define atomic64_read_acquire atomic64_read_acquire
-#endif
-
-#define arch_atomic64_set atomic64_set
-#define arch_atomic64_set_release atomic64_set_release
-
-#ifndef atomic64_set_release
-static __always_inline void
-atomic64_set_release(atomic64_t *v, s64 i)
-{
-	smp_store_release(&(v)->counter, i);
-}
-#define atomic64_set_release atomic64_set_release
-#endif
-
-#define arch_atomic64_add atomic64_add
-
-#define arch_atomic64_add_return atomic64_add_return
-#define arch_atomic64_add_return_acquire atomic64_add_return_acquire
-#define arch_atomic64_add_return_release atomic64_add_return_release
-#define arch_atomic64_add_return_relaxed atomic64_add_return_relaxed
-
-#ifndef atomic64_add_return_relaxed
-#define atomic64_add_return_acquire atomic64_add_return
-#define atomic64_add_return_release atomic64_add_return
-#define atomic64_add_return_relaxed atomic64_add_return
-#else /* atomic64_add_return_relaxed */
-
-#ifndef atomic64_add_return_acquire
-static __always_inline s64
-atomic64_add_return_acquire(s64 i, atomic64_t *v)
-{
-	s64 ret = atomic64_add_return_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_add_return_acquire atomic64_add_return_acquire
-#endif
-
-#ifndef atomic64_add_return_release
-static __always_inline s64
-atomic64_add_return_release(s64 i, atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_add_return_relaxed(i, v);
-}
-#define atomic64_add_return_release atomic64_add_return_release
-#endif
-
-#ifndef atomic64_add_return
-static __always_inline s64
-atomic64_add_return(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_add_return_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_add_return atomic64_add_return
-#endif
-
-#endif /* atomic64_add_return_relaxed */
-
-#define arch_atomic64_fetch_add atomic64_fetch_add
-#define arch_atomic64_fetch_add_acquire atomic64_fetch_add_acquire
-#define arch_atomic64_fetch_add_release atomic64_fetch_add_release
-#define arch_atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
-
-#ifndef atomic64_fetch_add_relaxed
-#define atomic64_fetch_add_acquire atomic64_fetch_add
-#define atomic64_fetch_add_release atomic64_fetch_add
-#define atomic64_fetch_add_relaxed atomic64_fetch_add
-#else /* atomic64_fetch_add_relaxed */
-
-#ifndef atomic64_fetch_add_acquire
-static __always_inline s64
-atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
-{
-	s64 ret = atomic64_fetch_add_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
-#endif
-
-#ifndef atomic64_fetch_add_release
-static __always_inline s64
-atomic64_fetch_add_release(s64 i, atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_fetch_add_relaxed(i, v);
-}
-#define atomic64_fetch_add_release atomic64_fetch_add_release
-#endif
-
-#ifndef atomic64_fetch_add
-static __always_inline s64
-atomic64_fetch_add(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_fetch_add_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_fetch_add atomic64_fetch_add
-#endif
-
-#endif /* atomic64_fetch_add_relaxed */
-
-#define arch_atomic64_sub atomic64_sub
-
-#define arch_atomic64_sub_return atomic64_sub_return
-#define arch_atomic64_sub_return_acquire atomic64_sub_return_acquire
-#define arch_atomic64_sub_return_release atomic64_sub_return_release
-#define arch_atomic64_sub_return_relaxed atomic64_sub_return_relaxed
-
-#ifndef atomic64_sub_return_relaxed
-#define atomic64_sub_return_acquire atomic64_sub_return
-#define atomic64_sub_return_release atomic64_sub_return
-#define atomic64_sub_return_relaxed atomic64_sub_return
-#else /* atomic64_sub_return_relaxed */
-
-#ifndef atomic64_sub_return_acquire
-static __always_inline s64
-atomic64_sub_return_acquire(s64 i, atomic64_t *v)
-{
-	s64 ret = atomic64_sub_return_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_sub_return_acquire atomic64_sub_return_acquire
-#endif
-
-#ifndef atomic64_sub_return_release
-static __always_inline s64
-atomic64_sub_return_release(s64 i, atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_sub_return_relaxed(i, v);
-}
-#define atomic64_sub_return_release atomic64_sub_return_release
-#endif
-
-#ifndef atomic64_sub_return
-static __always_inline s64
-atomic64_sub_return(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_sub_return_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_sub_return atomic64_sub_return
-#endif
-
-#endif /* atomic64_sub_return_relaxed */
-
-#define arch_atomic64_fetch_sub atomic64_fetch_sub
-#define arch_atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
-#define arch_atomic64_fetch_sub_release atomic64_fetch_sub_release
-#define arch_atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
-
-#ifndef atomic64_fetch_sub_relaxed
-#define atomic64_fetch_sub_acquire atomic64_fetch_sub
-#define atomic64_fetch_sub_release atomic64_fetch_sub
-#define atomic64_fetch_sub_relaxed atomic64_fetch_sub
-#else /* atomic64_fetch_sub_relaxed */
-
-#ifndef atomic64_fetch_sub_acquire
-static __always_inline s64
-atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
-{
-	s64 ret = atomic64_fetch_sub_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
-#endif
-
-#ifndef atomic64_fetch_sub_release
-static __always_inline s64
-atomic64_fetch_sub_release(s64 i, atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_fetch_sub_relaxed(i, v);
-}
-#define atomic64_fetch_sub_release atomic64_fetch_sub_release
-#endif
-
-#ifndef atomic64_fetch_sub
-static __always_inline s64
-atomic64_fetch_sub(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_fetch_sub_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_fetch_sub atomic64_fetch_sub
-#endif
-
-#endif /* atomic64_fetch_sub_relaxed */
-
-#define arch_atomic64_inc atomic64_inc
-
-#ifndef atomic64_inc
-static __always_inline void
-atomic64_inc(atomic64_t *v)
-{
-	atomic64_add(1, v);
-}
-#define atomic64_inc atomic64_inc
-#endif
-
-#define arch_atomic64_inc_return atomic64_inc_return
-#define arch_atomic64_inc_return_acquire atomic64_inc_return_acquire
-#define arch_atomic64_inc_return_release atomic64_inc_return_release
-#define arch_atomic64_inc_return_relaxed atomic64_inc_return_relaxed
-
-#ifndef atomic64_inc_return_relaxed
-#ifdef atomic64_inc_return
-#define atomic64_inc_return_acquire atomic64_inc_return
-#define atomic64_inc_return_release atomic64_inc_return
-#define atomic64_inc_return_relaxed atomic64_inc_return
-#endif /* atomic64_inc_return */
-
-#ifndef atomic64_inc_return
-static __always_inline s64
-atomic64_inc_return(atomic64_t *v)
-{
-	return atomic64_add_return(1, v);
-}
-#define atomic64_inc_return atomic64_inc_return
-#endif
-
-#ifndef atomic64_inc_return_acquire
-static __always_inline s64
-atomic64_inc_return_acquire(atomic64_t *v)
-{
-	return atomic64_add_return_acquire(1, v);
-}
-#define atomic64_inc_return_acquire atomic64_inc_return_acquire
-#endif
-
-#ifndef atomic64_inc_return_release
-static __always_inline s64
-atomic64_inc_return_release(atomic64_t *v)
-{
-	return atomic64_add_return_release(1, v);
-}
-#define atomic64_inc_return_release atomic64_inc_return_release
-#endif
-
-#ifndef atomic64_inc_return_relaxed
-static __always_inline s64
-atomic64_inc_return_relaxed(atomic64_t *v)
-{
-	return atomic64_add_return_relaxed(1, v);
-}
-#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
-#endif
-
-#else /* atomic64_inc_return_relaxed */
-
-#ifndef atomic64_inc_return_acquire
-static __always_inline s64
-atomic64_inc_return_acquire(atomic64_t *v)
-{
-	s64 ret = atomic64_inc_return_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_inc_return_acquire atomic64_inc_return_acquire
-#endif
-
-#ifndef atomic64_inc_return_release
-static __always_inline s64
-atomic64_inc_return_release(atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_inc_return_relaxed(v);
-}
-#define atomic64_inc_return_release atomic64_inc_return_release
-#endif
-
-#ifndef atomic64_inc_return
-static __always_inline s64
-atomic64_inc_return(atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_inc_return_relaxed(v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_inc_return atomic64_inc_return
-#endif
-
-#endif /* atomic64_inc_return_relaxed */
-
-#define arch_atomic64_fetch_inc atomic64_fetch_inc
-#define arch_atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire
-#define arch_atomic64_fetch_inc_release atomic64_fetch_inc_release
-#define arch_atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed
-
-#ifndef atomic64_fetch_inc_relaxed
-#ifdef atomic64_fetch_inc
-#define atomic64_fetch_inc_acquire atomic64_fetch_inc
-#define atomic64_fetch_inc_release atomic64_fetch_inc
-#define atomic64_fetch_inc_relaxed atomic64_fetch_inc
-#endif /* atomic64_fetch_inc */
-
-#ifndef atomic64_fetch_inc
-static __always_inline s64
-atomic64_fetch_inc(atomic64_t *v)
-{
-	return atomic64_fetch_add(1, v);
-}
-#define atomic64_fetch_inc atomic64_fetch_inc
-#endif
-
-#ifndef atomic64_fetch_inc_acquire
-static __always_inline s64
-atomic64_fetch_inc_acquire(atomic64_t *v)
-{
-	return atomic64_fetch_add_acquire(1, v);
-}
-#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire
-#endif
-
-#ifndef atomic64_fetch_inc_release
-static __always_inline s64
-atomic64_fetch_inc_release(atomic64_t *v)
-{
-	return atomic64_fetch_add_release(1, v);
-}
-#define atomic64_fetch_inc_release atomic64_fetch_inc_release
-#endif
-
-#ifndef atomic64_fetch_inc_relaxed
-static __always_inline s64
-atomic64_fetch_inc_relaxed(atomic64_t *v)
-{
-	return atomic64_fetch_add_relaxed(1, v);
-}
-#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed
-#endif
-
-#else /* atomic64_fetch_inc_relaxed */
-
-#ifndef atomic64_fetch_inc_acquire
-static __always_inline s64
-atomic64_fetch_inc_acquire(atomic64_t *v)
-{
-	s64 ret = atomic64_fetch_inc_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire
-#endif
-
-#ifndef atomic64_fetch_inc_release
-static __always_inline s64
-atomic64_fetch_inc_release(atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_fetch_inc_relaxed(v);
-}
-#define atomic64_fetch_inc_release atomic64_fetch_inc_release
-#endif
-
-#ifndef atomic64_fetch_inc
-static __always_inline s64
-atomic64_fetch_inc(atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_fetch_inc_relaxed(v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_fetch_inc atomic64_fetch_inc
-#endif
-
-#endif /* atomic64_fetch_inc_relaxed */
-
-#define arch_atomic64_dec atomic64_dec
-
-#ifndef atomic64_dec
-static __always_inline void
-atomic64_dec(atomic64_t *v)
-{
-	atomic64_sub(1, v);
-}
-#define atomic64_dec atomic64_dec
-#endif
-
-#define arch_atomic64_dec_return atomic64_dec_return
-#define arch_atomic64_dec_return_acquire atomic64_dec_return_acquire
-#define arch_atomic64_dec_return_release atomic64_dec_return_release
-#define arch_atomic64_dec_return_relaxed atomic64_dec_return_relaxed
-
-#ifndef atomic64_dec_return_relaxed
-#ifdef atomic64_dec_return
-#define atomic64_dec_return_acquire atomic64_dec_return
-#define atomic64_dec_return_release atomic64_dec_return
-#define atomic64_dec_return_relaxed atomic64_dec_return
-#endif /* atomic64_dec_return */
-
-#ifndef atomic64_dec_return
-static __always_inline s64
-atomic64_dec_return(atomic64_t *v)
-{
-	return atomic64_sub_return(1, v);
-}
-#define atomic64_dec_return atomic64_dec_return
-#endif
-
-#ifndef atomic64_dec_return_acquire
-static __always_inline s64
-atomic64_dec_return_acquire(atomic64_t *v)
-{
-	return atomic64_sub_return_acquire(1, v);
-}
-#define atomic64_dec_return_acquire atomic64_dec_return_acquire
-#endif
-
-#ifndef atomic64_dec_return_release
-static __always_inline s64
-atomic64_dec_return_release(atomic64_t *v)
-{
-	return atomic64_sub_return_release(1, v);
-}
-#define atomic64_dec_return_release atomic64_dec_return_release
-#endif
-
-#ifndef atomic64_dec_return_relaxed
-static __always_inline s64
-atomic64_dec_return_relaxed(atomic64_t *v)
-{
-	return atomic64_sub_return_relaxed(1, v);
-}
-#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
-#endif
-
-#else /* atomic64_dec_return_relaxed */
-
-#ifndef atomic64_dec_return_acquire
-static __always_inline s64
-atomic64_dec_return_acquire(atomic64_t *v)
-{
-	s64 ret = atomic64_dec_return_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_dec_return_acquire atomic64_dec_return_acquire
-#endif
-
-#ifndef atomic64_dec_return_release
-static __always_inline s64
-atomic64_dec_return_release(atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_dec_return_relaxed(v);
-}
-#define atomic64_dec_return_release atomic64_dec_return_release
-#endif
-
-#ifndef atomic64_dec_return
-static __always_inline s64
-atomic64_dec_return(atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_dec_return_relaxed(v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_dec_return atomic64_dec_return
-#endif
-
-#endif /* atomic64_dec_return_relaxed */
-
-#define arch_atomic64_fetch_dec atomic64_fetch_dec
-#define arch_atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire
-#define arch_atomic64_fetch_dec_release atomic64_fetch_dec_release
-#define arch_atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
-
-#ifndef atomic64_fetch_dec_relaxed
-#ifdef atomic64_fetch_dec
-#define atomic64_fetch_dec_acquire atomic64_fetch_dec
-#define atomic64_fetch_dec_release atomic64_fetch_dec
-#define atomic64_fetch_dec_relaxed atomic64_fetch_dec
-#endif /* atomic64_fetch_dec */
-
-#ifndef atomic64_fetch_dec
-static __always_inline s64
-atomic64_fetch_dec(atomic64_t *v)
-{
-	return atomic64_fetch_sub(1, v);
-}
-#define atomic64_fetch_dec atomic64_fetch_dec
-#endif
-
-#ifndef atomic64_fetch_dec_acquire
-static __always_inline s64
-atomic64_fetch_dec_acquire(atomic64_t *v)
-{
-	return atomic64_fetch_sub_acquire(1, v);
-}
-#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire
-#endif
-
-#ifndef atomic64_fetch_dec_release
-static __always_inline s64
-atomic64_fetch_dec_release(atomic64_t *v)
-{
-	return atomic64_fetch_sub_release(1, v);
-}
-#define atomic64_fetch_dec_release atomic64_fetch_dec_release
-#endif
-
-#ifndef atomic64_fetch_dec_relaxed
-static __always_inline s64
-atomic64_fetch_dec_relaxed(atomic64_t *v)
-{
-	return atomic64_fetch_sub_relaxed(1, v);
-}
-#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
-#endif
-
-#else /* atomic64_fetch_dec_relaxed */
-
-#ifndef atomic64_fetch_dec_acquire
-static __always_inline s64
-atomic64_fetch_dec_acquire(atomic64_t *v)
-{
-	s64 ret = atomic64_fetch_dec_relaxed(v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire
-#endif
-
-#ifndef atomic64_fetch_dec_release
-static __always_inline s64
-atomic64_fetch_dec_release(atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_fetch_dec_relaxed(v);
-}
-#define atomic64_fetch_dec_release atomic64_fetch_dec_release
-#endif
-
-#ifndef atomic64_fetch_dec
-static __always_inline s64
-atomic64_fetch_dec(atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_fetch_dec_relaxed(v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_fetch_dec atomic64_fetch_dec
-#endif
-
-#endif /* atomic64_fetch_dec_relaxed */
-
-#define arch_atomic64_and atomic64_and
-
-#define arch_atomic64_fetch_and atomic64_fetch_and
-#define arch_atomic64_fetch_and_acquire atomic64_fetch_and_acquire
-#define arch_atomic64_fetch_and_release atomic64_fetch_and_release
-#define arch_atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
-
-#ifndef atomic64_fetch_and_relaxed
-#define atomic64_fetch_and_acquire atomic64_fetch_and
-#define atomic64_fetch_and_release atomic64_fetch_and
-#define atomic64_fetch_and_relaxed atomic64_fetch_and
-#else /* atomic64_fetch_and_relaxed */
-
-#ifndef atomic64_fetch_and_acquire
-static __always_inline s64
-atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
-{
-	s64 ret = atomic64_fetch_and_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire
-#endif
-
-#ifndef atomic64_fetch_and_release
-static __always_inline s64
-atomic64_fetch_and_release(s64 i, atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_fetch_and_relaxed(i, v);
-}
-#define atomic64_fetch_and_release atomic64_fetch_and_release
-#endif
-
-#ifndef atomic64_fetch_and
-static __always_inline s64
-atomic64_fetch_and(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_fetch_and_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_fetch_and atomic64_fetch_and
-#endif
-
-#endif /* atomic64_fetch_and_relaxed */
-
-#define arch_atomic64_andnot atomic64_andnot
-
-#ifndef atomic64_andnot
-static __always_inline void
-atomic64_andnot(s64 i, atomic64_t *v)
-{
-	atomic64_and(~i, v);
-}
-#define atomic64_andnot atomic64_andnot
-#endif
-
-#define arch_atomic64_fetch_andnot atomic64_fetch_andnot
-#define arch_atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
-#define arch_atomic64_fetch_andnot_release atomic64_fetch_andnot_release
-#define arch_atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
-
-#ifndef atomic64_fetch_andnot_relaxed
-#ifdef atomic64_fetch_andnot
-#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot
-#define atomic64_fetch_andnot_release atomic64_fetch_andnot
-#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot
-#endif /* atomic64_fetch_andnot */
-
-#ifndef atomic64_fetch_andnot
-static __always_inline s64
-atomic64_fetch_andnot(s64 i, atomic64_t *v)
-{
-	return atomic64_fetch_and(~i, v);
-}
-#define atomic64_fetch_andnot atomic64_fetch_andnot
-#endif
-
-#ifndef atomic64_fetch_andnot_acquire
-static __always_inline s64
-atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
-{
-	return atomic64_fetch_and_acquire(~i, v);
-}
-#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
-#endif
-
-#ifndef atomic64_fetch_andnot_release
-static __always_inline s64
-atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
-{
-	return atomic64_fetch_and_release(~i, v);
-}
-#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
-#endif
-
-#ifndef atomic64_fetch_andnot_relaxed
-static __always_inline s64
-atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
-{
-	return atomic64_fetch_and_relaxed(~i, v);
-}
-#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
-#endif
-
-#else /* atomic64_fetch_andnot_relaxed */
-
-#ifndef atomic64_fetch_andnot_acquire
-static __always_inline s64
-atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
-{
-	s64 ret = atomic64_fetch_andnot_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
-#endif
-
-#ifndef atomic64_fetch_andnot_release
-static __always_inline s64
-atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_fetch_andnot_relaxed(i, v);
-}
-#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
-#endif
-
-#ifndef atomic64_fetch_andnot
-static __always_inline s64
-atomic64_fetch_andnot(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_fetch_andnot_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_fetch_andnot atomic64_fetch_andnot
-#endif
-
-#endif /* atomic64_fetch_andnot_relaxed */
-
-#define arch_atomic64_or atomic64_or
-
-#define arch_atomic64_fetch_or atomic64_fetch_or
-#define arch_atomic64_fetch_or_acquire atomic64_fetch_or_acquire
-#define arch_atomic64_fetch_or_release atomic64_fetch_or_release
-#define arch_atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
-
-#ifndef atomic64_fetch_or_relaxed
-#define atomic64_fetch_or_acquire atomic64_fetch_or
-#define atomic64_fetch_or_release atomic64_fetch_or
-#define atomic64_fetch_or_relaxed atomic64_fetch_or
-#else /* atomic64_fetch_or_relaxed */
-
-#ifndef atomic64_fetch_or_acquire
-static __always_inline s64
-atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
-{
-	s64 ret = atomic64_fetch_or_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire
-#endif
-
-#ifndef atomic64_fetch_or_release
-static __always_inline s64
-atomic64_fetch_or_release(s64 i, atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_fetch_or_relaxed(i, v);
-}
-#define atomic64_fetch_or_release atomic64_fetch_or_release
-#endif
-
-#ifndef atomic64_fetch_or
-static __always_inline s64
-atomic64_fetch_or(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_fetch_or_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_fetch_or atomic64_fetch_or
-#endif
-
-#endif /* atomic64_fetch_or_relaxed */
-
-#define arch_atomic64_xor atomic64_xor
-
-#define arch_atomic64_fetch_xor atomic64_fetch_xor
-#define arch_atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
-#define arch_atomic64_fetch_xor_release atomic64_fetch_xor_release
-#define arch_atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
-
-#ifndef atomic64_fetch_xor_relaxed
-#define atomic64_fetch_xor_acquire atomic64_fetch_xor
-#define atomic64_fetch_xor_release atomic64_fetch_xor
-#define atomic64_fetch_xor_relaxed atomic64_fetch_xor
-#else /* atomic64_fetch_xor_relaxed */
-
-#ifndef atomic64_fetch_xor_acquire
-static __always_inline s64
-atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
-{
-	s64 ret = atomic64_fetch_xor_relaxed(i, v);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
-#endif
-
-#ifndef atomic64_fetch_xor_release
-static __always_inline s64
-atomic64_fetch_xor_release(s64 i, atomic64_t *v)
-{
-	__atomic_release_fence();
-	return atomic64_fetch_xor_relaxed(i, v);
-}
-#define atomic64_fetch_xor_release atomic64_fetch_xor_release
-#endif
-
-#ifndef atomic64_fetch_xor
-static __always_inline s64
-atomic64_fetch_xor(s64 i, atomic64_t *v)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_fetch_xor_relaxed(i, v);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_fetch_xor atomic64_fetch_xor
-#endif
-
-#endif /* atomic64_fetch_xor_relaxed */
-
-#define arch_atomic64_xchg atomic64_xchg
-#define arch_atomic64_xchg_acquire atomic64_xchg_acquire
-#define arch_atomic64_xchg_release atomic64_xchg_release
-#define arch_atomic64_xchg_relaxed atomic64_xchg_relaxed
-
-#ifndef atomic64_xchg_relaxed
-#define atomic64_xchg_acquire atomic64_xchg
-#define atomic64_xchg_release atomic64_xchg
-#define atomic64_xchg_relaxed atomic64_xchg
-#else /* atomic64_xchg_relaxed */
-
-#ifndef atomic64_xchg_acquire
-static __always_inline s64
-atomic64_xchg_acquire(atomic64_t *v, s64 i)
-{
-	s64 ret = atomic64_xchg_relaxed(v, i);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_xchg_acquire atomic64_xchg_acquire
-#endif
-
-#ifndef atomic64_xchg_release
-static __always_inline s64
-atomic64_xchg_release(atomic64_t *v, s64 i)
-{
-	__atomic_release_fence();
-	return atomic64_xchg_relaxed(v, i);
-}
-#define atomic64_xchg_release atomic64_xchg_release
-#endif
-
-#ifndef atomic64_xchg
-static __always_inline s64
-atomic64_xchg(atomic64_t *v, s64 i)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_xchg_relaxed(v, i);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_xchg atomic64_xchg
-#endif
-
-#endif /* atomic64_xchg_relaxed */
-
-#define arch_atomic64_cmpxchg atomic64_cmpxchg
-#define arch_atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire
-#define arch_atomic64_cmpxchg_release atomic64_cmpxchg_release
-#define arch_atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed
-
-#ifndef atomic64_cmpxchg_relaxed
-#define atomic64_cmpxchg_acquire atomic64_cmpxchg
-#define atomic64_cmpxchg_release atomic64_cmpxchg
-#define atomic64_cmpxchg_relaxed atomic64_cmpxchg
-#else /* atomic64_cmpxchg_relaxed */
-
-#ifndef atomic64_cmpxchg_acquire
-static __always_inline s64
-atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
-{
-	s64 ret = atomic64_cmpxchg_relaxed(v, old, new);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire
-#endif
-
-#ifndef atomic64_cmpxchg_release
-static __always_inline s64
-atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
-{
-	__atomic_release_fence();
-	return atomic64_cmpxchg_relaxed(v, old, new);
-}
-#define atomic64_cmpxchg_release atomic64_cmpxchg_release
-#endif
-
-#ifndef atomic64_cmpxchg
-static __always_inline s64
-atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
-{
-	s64 ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_cmpxchg_relaxed(v, old, new);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_cmpxchg atomic64_cmpxchg
-#endif
-
-#endif /* atomic64_cmpxchg_relaxed */
-
-#define arch_atomic64_try_cmpxchg atomic64_try_cmpxchg
-#define arch_atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire
-#define arch_atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release
-#define arch_atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed
-
-#ifndef atomic64_try_cmpxchg_relaxed
-#ifdef atomic64_try_cmpxchg
-#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg
-#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg
-#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg
-#endif /* atomic64_try_cmpxchg */
-
-#ifndef atomic64_try_cmpxchg
-static __always_inline bool
-atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
-{
-	s64 r, o = *old;
-	r = atomic64_cmpxchg(v, o, new);
-	if (unlikely(r != o))
-		*old = r;
-	return likely(r == o);
-}
-#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-#endif
-
-#ifndef atomic64_try_cmpxchg_acquire
-static __always_inline bool
-atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
-{
-	s64 r, o = *old;
-	r = atomic64_cmpxchg_acquire(v, o, new);
-	if (unlikely(r != o))
-		*old = r;
-	return likely(r == o);
-}
-#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire
-#endif
-
-#ifndef atomic64_try_cmpxchg_release
-static __always_inline bool
-atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
-{
-	s64 r, o = *old;
-	r = atomic64_cmpxchg_release(v, o, new);
-	if (unlikely(r != o))
-		*old = r;
-	return likely(r == o);
-}
-#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release
-#endif
-
-#ifndef atomic64_try_cmpxchg_relaxed
-static __always_inline bool
-atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
-{
-	s64 r, o = *old;
-	r = atomic64_cmpxchg_relaxed(v, o, new);
-	if (unlikely(r != o))
-		*old = r;
-	return likely(r == o);
-}
-#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed
-#endif
-
-#else /* atomic64_try_cmpxchg_relaxed */
-
-#ifndef atomic64_try_cmpxchg_acquire
-static __always_inline bool
-atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
-{
-	bool ret = atomic64_try_cmpxchg_relaxed(v, old, new);
-	__atomic_acquire_fence();
-	return ret;
-}
-#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire
-#endif
-
-#ifndef atomic64_try_cmpxchg_release
-static __always_inline bool
-atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
-{
-	__atomic_release_fence();
-	return atomic64_try_cmpxchg_relaxed(v, old, new);
-}
-#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release
-#endif
-
-#ifndef atomic64_try_cmpxchg
-static __always_inline bool
-atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
-{
-	bool ret;
-	__atomic_pre_full_fence();
-	ret = atomic64_try_cmpxchg_relaxed(v, old, new);
-	__atomic_post_full_fence();
-	return ret;
-}
-#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-#endif
-
-#endif /* atomic64_try_cmpxchg_relaxed */
-
-#define arch_atomic64_sub_and_test atomic64_sub_and_test
-
-#ifndef atomic64_sub_and_test
-/**
- * atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-static __always_inline bool
-atomic64_sub_and_test(s64 i, atomic64_t *v)
-{
-	return atomic64_sub_return(i, v) == 0;
-}
-#define atomic64_sub_and_test atomic64_sub_and_test
-#endif
-
-#define arch_atomic64_dec_and_test atomic64_dec_and_test
-
-#ifndef atomic64_dec_and_test
-/**
- * atomic64_dec_and_test - decrement and test
- * @v: pointer of type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-static __always_inline bool
-atomic64_dec_and_test(atomic64_t *v)
-{
-	return atomic64_dec_return(v) == 0;
-}
-#define atomic64_dec_and_test atomic64_dec_and_test
-#endif
-
-#define arch_atomic64_inc_and_test atomic64_inc_and_test
-
-#ifndef atomic64_inc_and_test
-/**
- * atomic64_inc_and_test - increment and test
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-static __always_inline bool
-atomic64_inc_and_test(atomic64_t *v)
-{
-	return atomic64_inc_return(v) == 0;
-}
-#define atomic64_inc_and_test atomic64_inc_and_test
-#endif
-
-#define arch_atomic64_add_negative atomic64_add_negative
-
-#ifndef atomic64_add_negative
-/**
- * atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-static __always_inline bool
-atomic64_add_negative(s64 i, atomic64_t *v)
-{
-	return atomic64_add_return(i, v) < 0;
-}
-#define atomic64_add_negative atomic64_add_negative
-#endif
-
-#define arch_atomic64_fetch_add_unless atomic64_fetch_add_unless
-
-#ifndef atomic64_fetch_add_unless
-/**
- * atomic64_fetch_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, so long as @v was not already @u.
- * Returns original value of @v
- */
-static __always_inline s64
-atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	s64 c = atomic64_read(v);
-
-	do {
-		if (unlikely(c == u))
-			break;
-	} while (!atomic64_try_cmpxchg(v, &c, c + a));
-
-	return c;
-}
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
-#endif
-
-#define arch_atomic64_add_unless atomic64_add_unless
-
-#ifndef atomic64_add_unless
-/**
- * atomic64_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if @v was not already @u.
- * Returns true if the addition was done.
- */
-static __always_inline bool
-atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
-{
-	return atomic64_fetch_add_unless(v, a, u) != u;
-}
-#define atomic64_add_unless atomic64_add_unless
-#endif
-
-#define arch_atomic64_inc_not_zero atomic64_inc_not_zero
-
-#ifndef atomic64_inc_not_zero
-/**
- * atomic64_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1, if @v is non-zero.
- * Returns true if the increment was done.
- */
-static __always_inline bool
-atomic64_inc_not_zero(atomic64_t *v)
-{
-	return atomic64_add_unless(v, 1, 0);
-}
-#define atomic64_inc_not_zero atomic64_inc_not_zero
-#endif
-
-#define arch_atomic64_inc_unless_negative atomic64_inc_unless_negative
-
-#ifndef atomic64_inc_unless_negative
-static __always_inline bool
-atomic64_inc_unless_negative(atomic64_t *v)
-{
-	s64 c = atomic64_read(v);
-
-	do {
-		if (unlikely(c < 0))
-			return false;
-	} while (!atomic64_try_cmpxchg(v, &c, c + 1));
-
-	return true;
-}
-#define atomic64_inc_unless_negative atomic64_inc_unless_negative
-#endif
-
-#define arch_atomic64_dec_unless_positive atomic64_dec_unless_positive
-
-#ifndef atomic64_dec_unless_positive
-static __always_inline bool
-atomic64_dec_unless_positive(atomic64_t *v)
-{
-	s64 c = atomic64_read(v);
-
-	do {
-		if (unlikely(c > 0))
-			return false;
-	} while (!atomic64_try_cmpxchg(v, &c, c - 1));
-
-	return true;
-}
-#define atomic64_dec_unless_positive atomic64_dec_unless_positive
-#endif
-
-#define arch_atomic64_dec_if_positive atomic64_dec_if_positive
-
-#ifndef atomic64_dec_if_positive
-static __always_inline s64
-atomic64_dec_if_positive(atomic64_t *v)
-{
-	s64 dec, c = atomic64_read(v);
-
-	do {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-	} while (!atomic64_try_cmpxchg(v, &c, dec));
-
-	return dec;
-}
-#define atomic64_dec_if_positive atomic64_dec_if_positive
-#endif
-
-#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// d78e6c293c661c15188f0ec05bce45188c8d5892
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 4f8d83f9e480a..ed1d3ffd5b9dc 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -77,12 +77,8 @@
 	__ret;								\
 })
 
-#ifdef CONFIG_ARCH_ATOMIC
 #include <linux/atomic-arch-fallback.h>
 #include <asm-generic/atomic-instrumented.h>
-#else
-#include <linux/atomic-fallback.h>
-#endif
 
 #include <asm-generic/atomic-long.h>
 
diff --git a/scripts/atomic/check-atomics.sh b/scripts/atomic/check-atomics.sh
index 82748d42ecc5a..9c7fbd4bcbce8 100755
--- a/scripts/atomic/check-atomics.sh
+++ b/scripts/atomic/check-atomics.sh
@@ -17,7 +17,6 @@ cat <<EOF |
 asm-generic/atomic-instrumented.h
 asm-generic/atomic-long.h
 linux/atomic-arch-fallback.h
-linux/atomic-fallback.h
 EOF
 while read header; do
 	OLDSUM="$(tail -n 1 ${LINUXDIR}/include/${header})"
diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh
index d29e159ef4891..f776a574224d3 100755
--- a/scripts/atomic/gen-atomics.sh
+++ b/scripts/atomic/gen-atomics.sh
@@ -11,7 +11,6 @@ cat <<EOF |
 gen-atomic-instrumented.sh      asm-generic/atomic-instrumented.h
 gen-atomic-long.sh              asm-generic/atomic-long.h
 gen-atomic-fallback.sh          linux/atomic-arch-fallback.h		arch_
-gen-atomic-fallback.sh          linux/atomic-fallback.h
 EOF
 while read script header args; do
 	/bin/sh ${ATOMICDIR}/${script} ${ATOMICTBL} ${args} > ${LINUXDIR}/include/${header}
-- 
GitLab


From bccf1ec369ac126b0997d01a6e1deae00e2cf6b3 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 25 May 2021 15:02:32 +0100
Subject: [PATCH 1628/3804] locking/atomics: atomic-instrumented: simplify
 ifdeffery

Now that all architectures implement ARCH_ATOMIC, the fallbacks are
generated before the instrumented wrappers are generated. Due to this,
in atomic-instrumented.h we can assume that the whole set of atomic
functions has been generated. Likewise, atomic-instrumented.h doesn't
need to provide a preprocessor definition for every atomic it wraps.

This patch removes the redundant ifdeffery.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210525140232.53872-34-mark.rutland@arm.com
---
 include/asm-generic/atomic-instrumented.h | 498 +---------------------
 scripts/atomic/gen-atomic-instrumented.sh |  51 +--
 2 files changed, 3 insertions(+), 546 deletions(-)

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 888b6cfeed91a..bc45af52c93bf 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -27,17 +27,13 @@ atomic_read(const atomic_t *v)
 	instrument_atomic_read(v, sizeof(*v));
 	return arch_atomic_read(v);
 }
-#define atomic_read atomic_read
 
-#if defined(arch_atomic_read_acquire)
 static __always_inline int
 atomic_read_acquire(const atomic_t *v)
 {
 	instrument_atomic_read(v, sizeof(*v));
 	return arch_atomic_read_acquire(v);
 }
-#define atomic_read_acquire atomic_read_acquire
-#endif
 
 static __always_inline void
 atomic_set(atomic_t *v, int i)
@@ -45,17 +41,13 @@ atomic_set(atomic_t *v, int i)
 	instrument_atomic_write(v, sizeof(*v));
 	arch_atomic_set(v, i);
 }
-#define atomic_set atomic_set
 
-#if defined(arch_atomic_set_release)
 static __always_inline void
 atomic_set_release(atomic_t *v, int i)
 {
 	instrument_atomic_write(v, sizeof(*v));
 	arch_atomic_set_release(v, i);
 }
-#define atomic_set_release atomic_set_release
-#endif
 
 static __always_inline void
 atomic_add(int i, atomic_t *v)
@@ -63,87 +55,62 @@ atomic_add(int i, atomic_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic_add(i, v);
 }
-#define atomic_add atomic_add
 
-#if !defined(arch_atomic_add_return_relaxed) || defined(arch_atomic_add_return)
 static __always_inline int
 atomic_add_return(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_return(i, v);
 }
-#define atomic_add_return atomic_add_return
-#endif
 
-#if defined(arch_atomic_add_return_acquire)
 static __always_inline int
 atomic_add_return_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_return_acquire(i, v);
 }
-#define atomic_add_return_acquire atomic_add_return_acquire
-#endif
 
-#if defined(arch_atomic_add_return_release)
 static __always_inline int
 atomic_add_return_release(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_return_release(i, v);
 }
-#define atomic_add_return_release atomic_add_return_release
-#endif
 
-#if defined(arch_atomic_add_return_relaxed)
 static __always_inline int
 atomic_add_return_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_return_relaxed(i, v);
 }
-#define atomic_add_return_relaxed atomic_add_return_relaxed
-#endif
 
-#if !defined(arch_atomic_fetch_add_relaxed) || defined(arch_atomic_fetch_add)
 static __always_inline int
 atomic_fetch_add(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_add(i, v);
 }
-#define atomic_fetch_add atomic_fetch_add
-#endif
 
-#if defined(arch_atomic_fetch_add_acquire)
 static __always_inline int
 atomic_fetch_add_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_add_acquire(i, v);
 }
-#define atomic_fetch_add_acquire atomic_fetch_add_acquire
-#endif
 
-#if defined(arch_atomic_fetch_add_release)
 static __always_inline int
 atomic_fetch_add_release(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_add_release(i, v);
 }
-#define atomic_fetch_add_release atomic_fetch_add_release
-#endif
 
-#if defined(arch_atomic_fetch_add_relaxed)
 static __always_inline int
 atomic_fetch_add_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_add_relaxed(i, v);
 }
-#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
-#endif
 
 static __always_inline void
 atomic_sub(int i, atomic_t *v)
@@ -151,267 +118,188 @@ atomic_sub(int i, atomic_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic_sub(i, v);
 }
-#define atomic_sub atomic_sub
 
-#if !defined(arch_atomic_sub_return_relaxed) || defined(arch_atomic_sub_return)
 static __always_inline int
 atomic_sub_return(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_sub_return(i, v);
 }
-#define atomic_sub_return atomic_sub_return
-#endif
 
-#if defined(arch_atomic_sub_return_acquire)
 static __always_inline int
 atomic_sub_return_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_sub_return_acquire(i, v);
 }
-#define atomic_sub_return_acquire atomic_sub_return_acquire
-#endif
 
-#if defined(arch_atomic_sub_return_release)
 static __always_inline int
 atomic_sub_return_release(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_sub_return_release(i, v);
 }
-#define atomic_sub_return_release atomic_sub_return_release
-#endif
 
-#if defined(arch_atomic_sub_return_relaxed)
 static __always_inline int
 atomic_sub_return_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_sub_return_relaxed(i, v);
 }
-#define atomic_sub_return_relaxed atomic_sub_return_relaxed
-#endif
 
-#if !defined(arch_atomic_fetch_sub_relaxed) || defined(arch_atomic_fetch_sub)
 static __always_inline int
 atomic_fetch_sub(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_sub(i, v);
 }
-#define atomic_fetch_sub atomic_fetch_sub
-#endif
 
-#if defined(arch_atomic_fetch_sub_acquire)
 static __always_inline int
 atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_sub_acquire(i, v);
 }
-#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire
-#endif
 
-#if defined(arch_atomic_fetch_sub_release)
 static __always_inline int
 atomic_fetch_sub_release(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_sub_release(i, v);
 }
-#define atomic_fetch_sub_release atomic_fetch_sub_release
-#endif
 
-#if defined(arch_atomic_fetch_sub_relaxed)
 static __always_inline int
 atomic_fetch_sub_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_sub_relaxed(i, v);
 }
-#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
-#endif
 
-#if defined(arch_atomic_inc)
 static __always_inline void
 atomic_inc(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic_inc(v);
 }
-#define atomic_inc atomic_inc
-#endif
 
-#if defined(arch_atomic_inc_return)
 static __always_inline int
 atomic_inc_return(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_return(v);
 }
-#define atomic_inc_return atomic_inc_return
-#endif
 
-#if defined(arch_atomic_inc_return_acquire)
 static __always_inline int
 atomic_inc_return_acquire(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_return_acquire(v);
 }
-#define atomic_inc_return_acquire atomic_inc_return_acquire
-#endif
 
-#if defined(arch_atomic_inc_return_release)
 static __always_inline int
 atomic_inc_return_release(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_return_release(v);
 }
-#define atomic_inc_return_release atomic_inc_return_release
-#endif
 
-#if defined(arch_atomic_inc_return_relaxed)
 static __always_inline int
 atomic_inc_return_relaxed(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_return_relaxed(v);
 }
-#define atomic_inc_return_relaxed atomic_inc_return_relaxed
-#endif
 
-#if defined(arch_atomic_fetch_inc)
 static __always_inline int
 atomic_fetch_inc(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_inc(v);
 }
-#define atomic_fetch_inc atomic_fetch_inc
-#endif
 
-#if defined(arch_atomic_fetch_inc_acquire)
 static __always_inline int
 atomic_fetch_inc_acquire(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_inc_acquire(v);
 }
-#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire
-#endif
 
-#if defined(arch_atomic_fetch_inc_release)
 static __always_inline int
 atomic_fetch_inc_release(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_inc_release(v);
 }
-#define atomic_fetch_inc_release atomic_fetch_inc_release
-#endif
 
-#if defined(arch_atomic_fetch_inc_relaxed)
 static __always_inline int
 atomic_fetch_inc_relaxed(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_inc_relaxed(v);
 }
-#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed
-#endif
 
-#if defined(arch_atomic_dec)
 static __always_inline void
 atomic_dec(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic_dec(v);
 }
-#define atomic_dec atomic_dec
-#endif
 
-#if defined(arch_atomic_dec_return)
 static __always_inline int
 atomic_dec_return(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_return(v);
 }
-#define atomic_dec_return atomic_dec_return
-#endif
 
-#if defined(arch_atomic_dec_return_acquire)
 static __always_inline int
 atomic_dec_return_acquire(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_return_acquire(v);
 }
-#define atomic_dec_return_acquire atomic_dec_return_acquire
-#endif
 
-#if defined(arch_atomic_dec_return_release)
 static __always_inline int
 atomic_dec_return_release(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_return_release(v);
 }
-#define atomic_dec_return_release atomic_dec_return_release
-#endif
 
-#if defined(arch_atomic_dec_return_relaxed)
 static __always_inline int
 atomic_dec_return_relaxed(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_return_relaxed(v);
 }
-#define atomic_dec_return_relaxed atomic_dec_return_relaxed
-#endif
 
-#if defined(arch_atomic_fetch_dec)
 static __always_inline int
 atomic_fetch_dec(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_dec(v);
 }
-#define atomic_fetch_dec atomic_fetch_dec
-#endif
 
-#if defined(arch_atomic_fetch_dec_acquire)
 static __always_inline int
 atomic_fetch_dec_acquire(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_dec_acquire(v);
 }
-#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire
-#endif
 
-#if defined(arch_atomic_fetch_dec_release)
 static __always_inline int
 atomic_fetch_dec_release(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_dec_release(v);
 }
-#define atomic_fetch_dec_release atomic_fetch_dec_release
-#endif
 
-#if defined(arch_atomic_fetch_dec_relaxed)
 static __always_inline int
 atomic_fetch_dec_relaxed(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_dec_relaxed(v);
 }
-#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
-#endif
 
 static __always_inline void
 atomic_and(int i, atomic_t *v)
@@ -419,97 +307,69 @@ atomic_and(int i, atomic_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic_and(i, v);
 }
-#define atomic_and atomic_and
 
-#if !defined(arch_atomic_fetch_and_relaxed) || defined(arch_atomic_fetch_and)
 static __always_inline int
 atomic_fetch_and(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_and(i, v);
 }
-#define atomic_fetch_and atomic_fetch_and
-#endif
 
-#if defined(arch_atomic_fetch_and_acquire)
 static __always_inline int
 atomic_fetch_and_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_and_acquire(i, v);
 }
-#define atomic_fetch_and_acquire atomic_fetch_and_acquire
-#endif
 
-#if defined(arch_atomic_fetch_and_release)
 static __always_inline int
 atomic_fetch_and_release(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_and_release(i, v);
 }
-#define atomic_fetch_and_release atomic_fetch_and_release
-#endif
 
-#if defined(arch_atomic_fetch_and_relaxed)
 static __always_inline int
 atomic_fetch_and_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_and_relaxed(i, v);
 }
-#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
-#endif
 
-#if defined(arch_atomic_andnot)
 static __always_inline void
 atomic_andnot(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic_andnot(i, v);
 }
-#define atomic_andnot atomic_andnot
-#endif
 
-#if defined(arch_atomic_fetch_andnot)
 static __always_inline int
 atomic_fetch_andnot(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_andnot(i, v);
 }
-#define atomic_fetch_andnot atomic_fetch_andnot
-#endif
 
-#if defined(arch_atomic_fetch_andnot_acquire)
 static __always_inline int
 atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_andnot_acquire(i, v);
 }
-#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
-#endif
 
-#if defined(arch_atomic_fetch_andnot_release)
 static __always_inline int
 atomic_fetch_andnot_release(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_andnot_release(i, v);
 }
-#define atomic_fetch_andnot_release atomic_fetch_andnot_release
-#endif
 
-#if defined(arch_atomic_fetch_andnot_relaxed)
 static __always_inline int
 atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_andnot_relaxed(i, v);
 }
-#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
-#endif
 
 static __always_inline void
 atomic_or(int i, atomic_t *v)
@@ -517,47 +377,34 @@ atomic_or(int i, atomic_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic_or(i, v);
 }
-#define atomic_or atomic_or
 
-#if !defined(arch_atomic_fetch_or_relaxed) || defined(arch_atomic_fetch_or)
 static __always_inline int
 atomic_fetch_or(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_or(i, v);
 }
-#define atomic_fetch_or atomic_fetch_or
-#endif
 
-#if defined(arch_atomic_fetch_or_acquire)
 static __always_inline int
 atomic_fetch_or_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_or_acquire(i, v);
 }
-#define atomic_fetch_or_acquire atomic_fetch_or_acquire
-#endif
 
-#if defined(arch_atomic_fetch_or_release)
 static __always_inline int
 atomic_fetch_or_release(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_or_release(i, v);
 }
-#define atomic_fetch_or_release atomic_fetch_or_release
-#endif
 
-#if defined(arch_atomic_fetch_or_relaxed)
 static __always_inline int
 atomic_fetch_or_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_or_relaxed(i, v);
 }
-#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
-#endif
 
 static __always_inline void
 atomic_xor(int i, atomic_t *v)
@@ -565,129 +412,91 @@ atomic_xor(int i, atomic_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic_xor(i, v);
 }
-#define atomic_xor atomic_xor
 
-#if !defined(arch_atomic_fetch_xor_relaxed) || defined(arch_atomic_fetch_xor)
 static __always_inline int
 atomic_fetch_xor(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_xor(i, v);
 }
-#define atomic_fetch_xor atomic_fetch_xor
-#endif
 
-#if defined(arch_atomic_fetch_xor_acquire)
 static __always_inline int
 atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_xor_acquire(i, v);
 }
-#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire
-#endif
 
-#if defined(arch_atomic_fetch_xor_release)
 static __always_inline int
 atomic_fetch_xor_release(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_xor_release(i, v);
 }
-#define atomic_fetch_xor_release atomic_fetch_xor_release
-#endif
 
-#if defined(arch_atomic_fetch_xor_relaxed)
 static __always_inline int
 atomic_fetch_xor_relaxed(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_xor_relaxed(i, v);
 }
-#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
-#endif
 
-#if !defined(arch_atomic_xchg_relaxed) || defined(arch_atomic_xchg)
 static __always_inline int
 atomic_xchg(atomic_t *v, int i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_xchg(v, i);
 }
-#define atomic_xchg atomic_xchg
-#endif
 
-#if defined(arch_atomic_xchg_acquire)
 static __always_inline int
 atomic_xchg_acquire(atomic_t *v, int i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_xchg_acquire(v, i);
 }
-#define atomic_xchg_acquire atomic_xchg_acquire
-#endif
 
-#if defined(arch_atomic_xchg_release)
 static __always_inline int
 atomic_xchg_release(atomic_t *v, int i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_xchg_release(v, i);
 }
-#define atomic_xchg_release atomic_xchg_release
-#endif
 
-#if defined(arch_atomic_xchg_relaxed)
 static __always_inline int
 atomic_xchg_relaxed(atomic_t *v, int i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_xchg_relaxed(v, i);
 }
-#define atomic_xchg_relaxed atomic_xchg_relaxed
-#endif
 
-#if !defined(arch_atomic_cmpxchg_relaxed) || defined(arch_atomic_cmpxchg)
 static __always_inline int
 atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_cmpxchg(v, old, new);
 }
-#define atomic_cmpxchg atomic_cmpxchg
-#endif
 
-#if defined(arch_atomic_cmpxchg_acquire)
 static __always_inline int
 atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_cmpxchg_acquire(v, old, new);
 }
-#define atomic_cmpxchg_acquire atomic_cmpxchg_acquire
-#endif
 
-#if defined(arch_atomic_cmpxchg_release)
 static __always_inline int
 atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_cmpxchg_release(v, old, new);
 }
-#define atomic_cmpxchg_release atomic_cmpxchg_release
-#endif
 
-#if defined(arch_atomic_cmpxchg_relaxed)
 static __always_inline int
 atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_cmpxchg_relaxed(v, old, new);
 }
-#define atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed
-#endif
 
-#if defined(arch_atomic_try_cmpxchg)
 static __always_inline bool
 atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
@@ -695,10 +504,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic_try_cmpxchg(v, old, new);
 }
-#define atomic_try_cmpxchg atomic_try_cmpxchg
-#endif
 
-#if defined(arch_atomic_try_cmpxchg_acquire)
 static __always_inline bool
 atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
@@ -706,10 +512,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic_try_cmpxchg_acquire(v, old, new);
 }
-#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire
-#endif
 
-#if defined(arch_atomic_try_cmpxchg_release)
 static __always_inline bool
 atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
@@ -717,10 +520,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic_try_cmpxchg_release(v, old, new);
 }
-#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release
-#endif
 
-#if defined(arch_atomic_try_cmpxchg_relaxed)
 static __always_inline bool
 atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
@@ -728,108 +528,76 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic_try_cmpxchg_relaxed(v, old, new);
 }
-#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed
-#endif
 
-#if defined(arch_atomic_sub_and_test)
 static __always_inline bool
 atomic_sub_and_test(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_sub_and_test(i, v);
 }
-#define atomic_sub_and_test atomic_sub_and_test
-#endif
 
-#if defined(arch_atomic_dec_and_test)
 static __always_inline bool
 atomic_dec_and_test(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_and_test(v);
 }
-#define atomic_dec_and_test atomic_dec_and_test
-#endif
 
-#if defined(arch_atomic_inc_and_test)
 static __always_inline bool
 atomic_inc_and_test(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_and_test(v);
 }
-#define atomic_inc_and_test atomic_inc_and_test
-#endif
 
-#if defined(arch_atomic_add_negative)
 static __always_inline bool
 atomic_add_negative(int i, atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_negative(i, v);
 }
-#define atomic_add_negative atomic_add_negative
-#endif
 
-#if defined(arch_atomic_fetch_add_unless)
 static __always_inline int
 atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_fetch_add_unless(v, a, u);
 }
-#define atomic_fetch_add_unless atomic_fetch_add_unless
-#endif
 
-#if defined(arch_atomic_add_unless)
 static __always_inline bool
 atomic_add_unless(atomic_t *v, int a, int u)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_add_unless(v, a, u);
 }
-#define atomic_add_unless atomic_add_unless
-#endif
 
-#if defined(arch_atomic_inc_not_zero)
 static __always_inline bool
 atomic_inc_not_zero(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_not_zero(v);
 }
-#define atomic_inc_not_zero atomic_inc_not_zero
-#endif
 
-#if defined(arch_atomic_inc_unless_negative)
 static __always_inline bool
 atomic_inc_unless_negative(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_inc_unless_negative(v);
 }
-#define atomic_inc_unless_negative atomic_inc_unless_negative
-#endif
 
-#if defined(arch_atomic_dec_unless_positive)
 static __always_inline bool
 atomic_dec_unless_positive(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_unless_positive(v);
 }
-#define atomic_dec_unless_positive atomic_dec_unless_positive
-#endif
 
-#if defined(arch_atomic_dec_if_positive)
 static __always_inline int
 atomic_dec_if_positive(atomic_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic_dec_if_positive(v);
 }
-#define atomic_dec_if_positive atomic_dec_if_positive
-#endif
 
 static __always_inline s64
 atomic64_read(const atomic64_t *v)
@@ -837,17 +605,13 @@ atomic64_read(const atomic64_t *v)
 	instrument_atomic_read(v, sizeof(*v));
 	return arch_atomic64_read(v);
 }
-#define atomic64_read atomic64_read
 
-#if defined(arch_atomic64_read_acquire)
 static __always_inline s64
 atomic64_read_acquire(const atomic64_t *v)
 {
 	instrument_atomic_read(v, sizeof(*v));
 	return arch_atomic64_read_acquire(v);
 }
-#define atomic64_read_acquire atomic64_read_acquire
-#endif
 
 static __always_inline void
 atomic64_set(atomic64_t *v, s64 i)
@@ -855,17 +619,13 @@ atomic64_set(atomic64_t *v, s64 i)
 	instrument_atomic_write(v, sizeof(*v));
 	arch_atomic64_set(v, i);
 }
-#define atomic64_set atomic64_set
 
-#if defined(arch_atomic64_set_release)
 static __always_inline void
 atomic64_set_release(atomic64_t *v, s64 i)
 {
 	instrument_atomic_write(v, sizeof(*v));
 	arch_atomic64_set_release(v, i);
 }
-#define atomic64_set_release atomic64_set_release
-#endif
 
 static __always_inline void
 atomic64_add(s64 i, atomic64_t *v)
@@ -873,87 +633,62 @@ atomic64_add(s64 i, atomic64_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic64_add(i, v);
 }
-#define atomic64_add atomic64_add
 
-#if !defined(arch_atomic64_add_return_relaxed) || defined(arch_atomic64_add_return)
 static __always_inline s64
 atomic64_add_return(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_return(i, v);
 }
-#define atomic64_add_return atomic64_add_return
-#endif
 
-#if defined(arch_atomic64_add_return_acquire)
 static __always_inline s64
 atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_return_acquire(i, v);
 }
-#define atomic64_add_return_acquire atomic64_add_return_acquire
-#endif
 
-#if defined(arch_atomic64_add_return_release)
 static __always_inline s64
 atomic64_add_return_release(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_return_release(i, v);
 }
-#define atomic64_add_return_release atomic64_add_return_release
-#endif
 
-#if defined(arch_atomic64_add_return_relaxed)
 static __always_inline s64
 atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_return_relaxed(i, v);
 }
-#define atomic64_add_return_relaxed atomic64_add_return_relaxed
-#endif
 
-#if !defined(arch_atomic64_fetch_add_relaxed) || defined(arch_atomic64_fetch_add)
 static __always_inline s64
 atomic64_fetch_add(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add(i, v);
 }
-#define atomic64_fetch_add atomic64_fetch_add
-#endif
 
-#if defined(arch_atomic64_fetch_add_acquire)
 static __always_inline s64
 atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add_acquire(i, v);
 }
-#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
-#endif
 
-#if defined(arch_atomic64_fetch_add_release)
 static __always_inline s64
 atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add_release(i, v);
 }
-#define atomic64_fetch_add_release atomic64_fetch_add_release
-#endif
 
-#if defined(arch_atomic64_fetch_add_relaxed)
 static __always_inline s64
 atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add_relaxed(i, v);
 }
-#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
-#endif
 
 static __always_inline void
 atomic64_sub(s64 i, atomic64_t *v)
@@ -961,267 +696,188 @@ atomic64_sub(s64 i, atomic64_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic64_sub(i, v);
 }
-#define atomic64_sub atomic64_sub
 
-#if !defined(arch_atomic64_sub_return_relaxed) || defined(arch_atomic64_sub_return)
 static __always_inline s64
 atomic64_sub_return(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_sub_return(i, v);
 }
-#define atomic64_sub_return atomic64_sub_return
-#endif
 
-#if defined(arch_atomic64_sub_return_acquire)
 static __always_inline s64
 atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_sub_return_acquire(i, v);
 }
-#define atomic64_sub_return_acquire atomic64_sub_return_acquire
-#endif
 
-#if defined(arch_atomic64_sub_return_release)
 static __always_inline s64
 atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_sub_return_release(i, v);
 }
-#define atomic64_sub_return_release atomic64_sub_return_release
-#endif
 
-#if defined(arch_atomic64_sub_return_relaxed)
 static __always_inline s64
 atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_sub_return_relaxed(i, v);
 }
-#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
-#endif
 
-#if !defined(arch_atomic64_fetch_sub_relaxed) || defined(arch_atomic64_fetch_sub)
 static __always_inline s64
 atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_sub(i, v);
 }
-#define atomic64_fetch_sub atomic64_fetch_sub
-#endif
 
-#if defined(arch_atomic64_fetch_sub_acquire)
 static __always_inline s64
 atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_sub_acquire(i, v);
 }
-#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
-#endif
 
-#if defined(arch_atomic64_fetch_sub_release)
 static __always_inline s64
 atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_sub_release(i, v);
 }
-#define atomic64_fetch_sub_release atomic64_fetch_sub_release
-#endif
 
-#if defined(arch_atomic64_fetch_sub_relaxed)
 static __always_inline s64
 atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_sub_relaxed(i, v);
 }
-#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
-#endif
 
-#if defined(arch_atomic64_inc)
 static __always_inline void
 atomic64_inc(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic64_inc(v);
 }
-#define atomic64_inc atomic64_inc
-#endif
 
-#if defined(arch_atomic64_inc_return)
 static __always_inline s64
 atomic64_inc_return(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_return(v);
 }
-#define atomic64_inc_return atomic64_inc_return
-#endif
 
-#if defined(arch_atomic64_inc_return_acquire)
 static __always_inline s64
 atomic64_inc_return_acquire(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_return_acquire(v);
 }
-#define atomic64_inc_return_acquire atomic64_inc_return_acquire
-#endif
 
-#if defined(arch_atomic64_inc_return_release)
 static __always_inline s64
 atomic64_inc_return_release(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_return_release(v);
 }
-#define atomic64_inc_return_release atomic64_inc_return_release
-#endif
 
-#if defined(arch_atomic64_inc_return_relaxed)
 static __always_inline s64
 atomic64_inc_return_relaxed(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_return_relaxed(v);
 }
-#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
-#endif
 
-#if defined(arch_atomic64_fetch_inc)
 static __always_inline s64
 atomic64_fetch_inc(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_inc(v);
 }
-#define atomic64_fetch_inc atomic64_fetch_inc
-#endif
 
-#if defined(arch_atomic64_fetch_inc_acquire)
 static __always_inline s64
 atomic64_fetch_inc_acquire(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_inc_acquire(v);
 }
-#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire
-#endif
 
-#if defined(arch_atomic64_fetch_inc_release)
 static __always_inline s64
 atomic64_fetch_inc_release(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_inc_release(v);
 }
-#define atomic64_fetch_inc_release atomic64_fetch_inc_release
-#endif
 
-#if defined(arch_atomic64_fetch_inc_relaxed)
 static __always_inline s64
 atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_inc_relaxed(v);
 }
-#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed
-#endif
 
-#if defined(arch_atomic64_dec)
 static __always_inline void
 atomic64_dec(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic64_dec(v);
 }
-#define atomic64_dec atomic64_dec
-#endif
 
-#if defined(arch_atomic64_dec_return)
 static __always_inline s64
 atomic64_dec_return(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_return(v);
 }
-#define atomic64_dec_return atomic64_dec_return
-#endif
 
-#if defined(arch_atomic64_dec_return_acquire)
 static __always_inline s64
 atomic64_dec_return_acquire(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_return_acquire(v);
 }
-#define atomic64_dec_return_acquire atomic64_dec_return_acquire
-#endif
 
-#if defined(arch_atomic64_dec_return_release)
 static __always_inline s64
 atomic64_dec_return_release(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_return_release(v);
 }
-#define atomic64_dec_return_release atomic64_dec_return_release
-#endif
 
-#if defined(arch_atomic64_dec_return_relaxed)
 static __always_inline s64
 atomic64_dec_return_relaxed(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_return_relaxed(v);
 }
-#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
-#endif
 
-#if defined(arch_atomic64_fetch_dec)
 static __always_inline s64
 atomic64_fetch_dec(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_dec(v);
 }
-#define atomic64_fetch_dec atomic64_fetch_dec
-#endif
 
-#if defined(arch_atomic64_fetch_dec_acquire)
 static __always_inline s64
 atomic64_fetch_dec_acquire(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_dec_acquire(v);
 }
-#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire
-#endif
 
-#if defined(arch_atomic64_fetch_dec_release)
 static __always_inline s64
 atomic64_fetch_dec_release(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_dec_release(v);
 }
-#define atomic64_fetch_dec_release atomic64_fetch_dec_release
-#endif
 
-#if defined(arch_atomic64_fetch_dec_relaxed)
 static __always_inline s64
 atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_dec_relaxed(v);
 }
-#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
-#endif
 
 static __always_inline void
 atomic64_and(s64 i, atomic64_t *v)
@@ -1229,97 +885,69 @@ atomic64_and(s64 i, atomic64_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic64_and(i, v);
 }
-#define atomic64_and atomic64_and
 
-#if !defined(arch_atomic64_fetch_and_relaxed) || defined(arch_atomic64_fetch_and)
 static __always_inline s64
 atomic64_fetch_and(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_and(i, v);
 }
-#define atomic64_fetch_and atomic64_fetch_and
-#endif
 
-#if defined(arch_atomic64_fetch_and_acquire)
 static __always_inline s64
 atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_and_acquire(i, v);
 }
-#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire
-#endif
 
-#if defined(arch_atomic64_fetch_and_release)
 static __always_inline s64
 atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_and_release(i, v);
 }
-#define atomic64_fetch_and_release atomic64_fetch_and_release
-#endif
 
-#if defined(arch_atomic64_fetch_and_relaxed)
 static __always_inline s64
 atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_and_relaxed(i, v);
 }
-#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
-#endif
 
-#if defined(arch_atomic64_andnot)
 static __always_inline void
 atomic64_andnot(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic64_andnot(i, v);
 }
-#define atomic64_andnot atomic64_andnot
-#endif
 
-#if defined(arch_atomic64_fetch_andnot)
 static __always_inline s64
 atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_andnot(i, v);
 }
-#define atomic64_fetch_andnot atomic64_fetch_andnot
-#endif
 
-#if defined(arch_atomic64_fetch_andnot_acquire)
 static __always_inline s64
 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_andnot_acquire(i, v);
 }
-#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
-#endif
 
-#if defined(arch_atomic64_fetch_andnot_release)
 static __always_inline s64
 atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_andnot_release(i, v);
 }
-#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
-#endif
 
-#if defined(arch_atomic64_fetch_andnot_relaxed)
 static __always_inline s64
 atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_andnot_relaxed(i, v);
 }
-#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
-#endif
 
 static __always_inline void
 atomic64_or(s64 i, atomic64_t *v)
@@ -1327,47 +955,34 @@ atomic64_or(s64 i, atomic64_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic64_or(i, v);
 }
-#define atomic64_or atomic64_or
 
-#if !defined(arch_atomic64_fetch_or_relaxed) || defined(arch_atomic64_fetch_or)
 static __always_inline s64
 atomic64_fetch_or(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_or(i, v);
 }
-#define atomic64_fetch_or atomic64_fetch_or
-#endif
 
-#if defined(arch_atomic64_fetch_or_acquire)
 static __always_inline s64
 atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_or_acquire(i, v);
 }
-#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire
-#endif
 
-#if defined(arch_atomic64_fetch_or_release)
 static __always_inline s64
 atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_or_release(i, v);
 }
-#define atomic64_fetch_or_release atomic64_fetch_or_release
-#endif
 
-#if defined(arch_atomic64_fetch_or_relaxed)
 static __always_inline s64
 atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_or_relaxed(i, v);
 }
-#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
-#endif
 
 static __always_inline void
 atomic64_xor(s64 i, atomic64_t *v)
@@ -1375,129 +990,91 @@ atomic64_xor(s64 i, atomic64_t *v)
 	instrument_atomic_read_write(v, sizeof(*v));
 	arch_atomic64_xor(i, v);
 }
-#define atomic64_xor atomic64_xor
 
-#if !defined(arch_atomic64_fetch_xor_relaxed) || defined(arch_atomic64_fetch_xor)
 static __always_inline s64
 atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_xor(i, v);
 }
-#define atomic64_fetch_xor atomic64_fetch_xor
-#endif
 
-#if defined(arch_atomic64_fetch_xor_acquire)
 static __always_inline s64
 atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_xor_acquire(i, v);
 }
-#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
-#endif
 
-#if defined(arch_atomic64_fetch_xor_release)
 static __always_inline s64
 atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_xor_release(i, v);
 }
-#define atomic64_fetch_xor_release atomic64_fetch_xor_release
-#endif
 
-#if defined(arch_atomic64_fetch_xor_relaxed)
 static __always_inline s64
 atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_xor_relaxed(i, v);
 }
-#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
-#endif
 
-#if !defined(arch_atomic64_xchg_relaxed) || defined(arch_atomic64_xchg)
 static __always_inline s64
 atomic64_xchg(atomic64_t *v, s64 i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_xchg(v, i);
 }
-#define atomic64_xchg atomic64_xchg
-#endif
 
-#if defined(arch_atomic64_xchg_acquire)
 static __always_inline s64
 atomic64_xchg_acquire(atomic64_t *v, s64 i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_xchg_acquire(v, i);
 }
-#define atomic64_xchg_acquire atomic64_xchg_acquire
-#endif
 
-#if defined(arch_atomic64_xchg_release)
 static __always_inline s64
 atomic64_xchg_release(atomic64_t *v, s64 i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_xchg_release(v, i);
 }
-#define atomic64_xchg_release atomic64_xchg_release
-#endif
 
-#if defined(arch_atomic64_xchg_relaxed)
 static __always_inline s64
 atomic64_xchg_relaxed(atomic64_t *v, s64 i)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_xchg_relaxed(v, i);
 }
-#define atomic64_xchg_relaxed atomic64_xchg_relaxed
-#endif
 
-#if !defined(arch_atomic64_cmpxchg_relaxed) || defined(arch_atomic64_cmpxchg)
 static __always_inline s64
 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_cmpxchg(v, old, new);
 }
-#define atomic64_cmpxchg atomic64_cmpxchg
-#endif
 
-#if defined(arch_atomic64_cmpxchg_acquire)
 static __always_inline s64
 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_cmpxchg_acquire(v, old, new);
 }
-#define atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire
-#endif
 
-#if defined(arch_atomic64_cmpxchg_release)
 static __always_inline s64
 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_cmpxchg_release(v, old, new);
 }
-#define atomic64_cmpxchg_release atomic64_cmpxchg_release
-#endif
 
-#if defined(arch_atomic64_cmpxchg_relaxed)
 static __always_inline s64
 atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_cmpxchg_relaxed(v, old, new);
 }
-#define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed
-#endif
 
-#if defined(arch_atomic64_try_cmpxchg)
 static __always_inline bool
 atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
@@ -1505,10 +1082,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic64_try_cmpxchg(v, old, new);
 }
-#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-#endif
 
-#if defined(arch_atomic64_try_cmpxchg_acquire)
 static __always_inline bool
 atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
@@ -1516,10 +1090,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic64_try_cmpxchg_acquire(v, old, new);
 }
-#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire
-#endif
 
-#if defined(arch_atomic64_try_cmpxchg_release)
 static __always_inline bool
 atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
@@ -1527,10 +1098,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic64_try_cmpxchg_release(v, old, new);
 }
-#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release
-#endif
 
-#if defined(arch_atomic64_try_cmpxchg_relaxed)
 static __always_inline bool
 atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
@@ -1538,218 +1106,161 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 	instrument_atomic_read_write(old, sizeof(*old));
 	return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
 }
-#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed
-#endif
 
-#if defined(arch_atomic64_sub_and_test)
 static __always_inline bool
 atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_sub_and_test(i, v);
 }
-#define atomic64_sub_and_test atomic64_sub_and_test
-#endif
 
-#if defined(arch_atomic64_dec_and_test)
 static __always_inline bool
 atomic64_dec_and_test(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_and_test(v);
 }
-#define atomic64_dec_and_test atomic64_dec_and_test
-#endif
 
-#if defined(arch_atomic64_inc_and_test)
 static __always_inline bool
 atomic64_inc_and_test(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_and_test(v);
 }
-#define atomic64_inc_and_test atomic64_inc_and_test
-#endif
 
-#if defined(arch_atomic64_add_negative)
 static __always_inline bool
 atomic64_add_negative(s64 i, atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_negative(i, v);
 }
-#define atomic64_add_negative atomic64_add_negative
-#endif
 
-#if defined(arch_atomic64_fetch_add_unless)
 static __always_inline s64
 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_fetch_add_unless(v, a, u);
 }
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
-#endif
 
-#if defined(arch_atomic64_add_unless)
 static __always_inline bool
 atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_add_unless(v, a, u);
 }
-#define atomic64_add_unless atomic64_add_unless
-#endif
 
-#if defined(arch_atomic64_inc_not_zero)
 static __always_inline bool
 atomic64_inc_not_zero(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_not_zero(v);
 }
-#define atomic64_inc_not_zero atomic64_inc_not_zero
-#endif
 
-#if defined(arch_atomic64_inc_unless_negative)
 static __always_inline bool
 atomic64_inc_unless_negative(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_inc_unless_negative(v);
 }
-#define atomic64_inc_unless_negative atomic64_inc_unless_negative
-#endif
 
-#if defined(arch_atomic64_dec_unless_positive)
 static __always_inline bool
 atomic64_dec_unless_positive(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_unless_positive(v);
 }
-#define atomic64_dec_unless_positive atomic64_dec_unless_positive
-#endif
 
-#if defined(arch_atomic64_dec_if_positive)
 static __always_inline s64
 atomic64_dec_if_positive(atomic64_t *v)
 {
 	instrument_atomic_read_write(v, sizeof(*v));
 	return arch_atomic64_dec_if_positive(v);
 }
-#define atomic64_dec_if_positive atomic64_dec_if_positive
-#endif
 
-#if !defined(arch_xchg_relaxed) || defined(arch_xchg)
 #define xchg(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_xchg(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_xchg_acquire)
 #define xchg_acquire(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_xchg_acquire(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_xchg_release)
 #define xchg_release(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_xchg_release(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_xchg_relaxed)
 #define xchg_relaxed(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_xchg_relaxed(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if !defined(arch_cmpxchg_relaxed) || defined(arch_cmpxchg)
 #define cmpxchg(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_cmpxchg_acquire)
 #define cmpxchg_acquire(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg_acquire(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_cmpxchg_release)
 #define cmpxchg_release(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg_release(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_cmpxchg_relaxed)
 #define cmpxchg_relaxed(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if !defined(arch_cmpxchg64_relaxed) || defined(arch_cmpxchg64)
 #define cmpxchg64(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg64(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_cmpxchg64_acquire)
 #define cmpxchg64_acquire(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg64_acquire(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_cmpxchg64_release)
 #define cmpxchg64_release(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_cmpxchg64_relaxed)
 #define cmpxchg64_relaxed(ptr, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
 	instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \
 	arch_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__); \
 })
-#endif
 
-#if !defined(arch_try_cmpxchg_relaxed) || defined(arch_try_cmpxchg)
 #define try_cmpxchg(ptr, oldp, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
@@ -1758,9 +1269,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 	instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
 	arch_try_cmpxchg(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_try_cmpxchg_acquire)
 #define try_cmpxchg_acquire(ptr, oldp, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
@@ -1769,9 +1278,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 	instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
 	arch_try_cmpxchg_acquire(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_try_cmpxchg_release)
 #define try_cmpxchg_release(ptr, oldp, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
@@ -1780,9 +1287,7 @@ atomic64_dec_if_positive(atomic64_t *v)
 	instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
 	arch_try_cmpxchg_release(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
-#endif
 
-#if defined(arch_try_cmpxchg_relaxed)
 #define try_cmpxchg_relaxed(ptr, oldp, ...) \
 ({ \
 	typeof(ptr) __ai_ptr = (ptr); \
@@ -1791,7 +1296,6 @@ atomic64_dec_if_positive(atomic64_t *v)
 	instrument_atomic_write(__ai_oldp, sizeof(*__ai_oldp)); \
 	arch_try_cmpxchg_relaxed(__ai_ptr, __ai_oldp, __VA_ARGS__); \
 })
-#endif
 
 #define cmpxchg_local(ptr, ...) \
 ({ \
@@ -1830,4 +1334,4 @@ atomic64_dec_if_positive(atomic64_t *v)
 })
 
 #endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
-// 4bec382e44520f4d8267e42620054db26a659ea3
+// 1d7c3a25aca5c7fb031c307be4c3d24c7b48fcd5
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 5766ffcec7c57..b0c45aee19d79 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -41,34 +41,6 @@ gen_params_checks()
 	done
 }
 
-# gen_guard(meta, atomic, pfx, name, sfx, order)
-gen_guard()
-{
-	local meta="$1"; shift
-	local atomic="$1"; shift
-	local pfx="$1"; shift
-	local name="$1"; shift
-	local sfx="$1"; shift
-	local order="$1"; shift
-
-	local atomicname="arch_${atomic}_${pfx}${name}${sfx}${order}"
-
-	local template="$(find_fallback_template "${pfx}" "${name}" "${sfx}" "${order}")"
-
-	# We definitely need a preprocessor symbol for this atomic if it is an
-	# ordering variant, or if there's a generic fallback.
-	if [ ! -z "${order}" ] || [ ! -z "${template}" ]; then
-		printf "defined(${atomicname})"
-		return
-	fi
-
-	# If this is a base variant, but a relaxed variant *may* exist, then we
-	# only have a preprocessor symbol if the relaxed variant isn't defined
-	if meta_has_relaxed "${meta}"; then
-		printf "!defined(${atomicname}_relaxed) || defined(${atomicname})"
-	fi
-}
-
 #gen_proto_order_variant(meta, pfx, name, sfx, order, atomic, int, arg...)
 gen_proto_order_variant()
 {
@@ -82,16 +54,12 @@ gen_proto_order_variant()
 
 	local atomicname="${atomic}_${pfx}${name}${sfx}${order}"
 
-	local guard="$(gen_guard "${meta}" "${atomic}" "${pfx}" "${name}" "${sfx}" "${order}")"
-
 	local ret="$(gen_ret_type "${meta}" "${int}")"
 	local params="$(gen_params "${int}" "${atomic}" "$@")"
 	local checks="$(gen_params_checks "${meta}" "$@")"
 	local args="$(gen_args "$@")"
 	local retstmt="$(gen_ret_stmt "${meta}")"
 
-	[ ! -z "${guard}" ] && printf "#if ${guard}\n"
-
 cat <<EOF
 static __always_inline ${ret}
 ${atomicname}(${params})
@@ -99,11 +67,8 @@ ${atomicname}(${params})
 ${checks}
 	${retstmt}arch_${atomicname}(${args});
 }
-#define ${atomicname} ${atomicname}
 EOF
 
-	[ ! -z "${guard}" ] && printf "#endif\n"
-
 	printf "\n"
 }
 
@@ -139,19 +104,6 @@ EOF
 	fi
 }
 
-gen_optional_xchg()
-{
-	local name="$1"; shift
-	local sfx="$1"; shift
-	local guard="defined(arch_${name}${sfx})"
-
-	[ -z "${sfx}" ] && guard="!defined(arch_${name}_relaxed) || defined(arch_${name})"
-
-	printf "#if ${guard}\n"
-	gen_xchg "${name}${sfx}" ""
-	printf "#endif\n\n"
-}
-
 cat << EOF
 // SPDX-License-Identifier: GPL-2.0
 
@@ -188,7 +140,8 @@ done
 
 for xchg in "xchg" "cmpxchg" "cmpxchg64" "try_cmpxchg"; do
 	for order in "" "_acquire" "_release" "_relaxed"; do
-		gen_optional_xchg "${xchg}" "${order}"
+		gen_xchg "${xchg}${order}" ""
+		printf "\n"
 	done
 done
 
-- 
GitLab


From 35f819d218035ddfbc71e7cf62a4849231701e58 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Fri, 21 May 2021 16:31:12 +0800
Subject: [PATCH 1629/3804] drm/ttm: Skip swapout if ttm object is not
 populated
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Swapping a ttm object which has no backend pages makes no sense.

Suggested-by: Christian König <christian.koenig@amd.com>
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210521083112.33176-1-xinhui.pan@amd.com
CC: stable@kernel.org
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index 510e3e001dabe..a1dcf7d55c903 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -145,7 +145,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 			list_for_each_entry(bo, &man->lru[j], lru) {
 				uint32_t num_pages;
 
-				if (!bo->ttm ||
+				if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) ||
 				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
 				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
 					continue;
-- 
GitLab


From 75ea44e356b5de8c817f821c9dd68ae329e82add Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 25 May 2021 18:07:58 +0200
Subject: [PATCH 1630/3804] perf jevents: Fix getting maximum number of fds

On some hosts, rlim.rlim_max can be returned as RLIM_INFINITY.
By casting it to int, it is interpreted as -1, which will cause get_maxfds
to return 0, causing "Invalid argument" errors in nftw() calls.
Fix this by casting the second argument of min() to rlim_t instead.

Fixes: 80eeb67fe577 ("perf jevents: Program to convert JSON file")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: http://lore.kernel.org/lkml/20210525160758.97829-1-nbd@nbd.name
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/jevents.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 7422b0ea87901..9604446f8360b 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -960,7 +960,7 @@ static int get_maxfds(void)
 	struct rlimit rlim;
 
 	if (getrlimit(RLIMIT_NOFILE, &rlim) == 0)
-		return min((int)rlim.rlim_max / 2, 512);
+		return min(rlim.rlim_max / 2, (rlim_t)512);
 
 	return 512;
 }
-- 
GitLab


From 042a3eaad6daeabcfaf163aa44da8ea3cf8b5496 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Fri, 21 May 2021 14:51:15 -0700
Subject: [PATCH 1631/3804] nvme-tcp: remove incorrect Kconfig dep in
 BLK_DEV_NVME

We need to select NVME_CORE.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index a44d49d63968a..494675aeaaad7 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -71,7 +71,8 @@ config NVME_FC
 config NVME_TCP
 	tristate "NVM Express over Fabrics TCP host driver"
 	depends on INET
-	depends on BLK_DEV_NVME
+	depends on BLOCK
+	select NVME_CORE
 	select NVME_FABRICS
 	select CRYPTO
 	select CRYPTO_CRC32C
-- 
GitLab


From 25df1acd2d36eb72b14c3d00f6b861b1e00b3aab Mon Sep 17 00:00:00 2001
From: Hou Pu <houpu.main@gmail.com>
Date: Thu, 20 May 2021 19:30:45 +0800
Subject: [PATCH 1632/3804] nvmet-tcp: fix inline data size comparison in
 nvmet_tcp_queue_response

Using "<=" instead "<" to compare inline data size.

Fixes: bdaf13279192 ("nvmet-tcp: fix a segmentation fault during io parsing error")
Signed-off-by: Hou Pu <houpu.main@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f9f34f6caf5e8..d8aceef832846 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -550,7 +550,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
 		 * nvmet_req_init is completed.
 		 */
 		if (queue->rcv_state == NVMET_TCP_RECV_PDU &&
-		    len && len < cmd->req.port->inline_data_size &&
+		    len && len <= cmd->req.port->inline_data_size &&
 		    nvme_is_write(cmd->req.cmd))
 			return;
 	}
-- 
GitLab


From aaeadd7075dc9e184bc7876e9dd7b3bada771df2 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 25 May 2021 08:49:05 -0700
Subject: [PATCH 1633/3804] nvmet: fix false keep-alive timeout when a
 controller is torn down

Controller teardown flow may take some time in case it has many I/O
queues, and the host may not send us keep-alive during this period.
Hence reset the traffic based keep-alive timer so we don't trigger
a controller teardown as a result of a keep-alive expiration.

Reported-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c  | 15 +++++++++++----
 drivers/nvme/target/nvmet.h |  2 +-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 1853db38b6820..4b29a5bac8969 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -388,10 +388,10 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
 {
 	struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
 			struct nvmet_ctrl, ka_work);
-	bool cmd_seen = ctrl->cmd_seen;
+	bool reset_tbkas = ctrl->reset_tbkas;
 
-	ctrl->cmd_seen = false;
-	if (cmd_seen) {
+	ctrl->reset_tbkas = false;
+	if (reset_tbkas) {
 		pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
 			ctrl->cntlid);
 		schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
@@ -804,6 +804,13 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
 	percpu_ref_exit(&sq->ref);
 
 	if (ctrl) {
+		/*
+		 * The teardown flow may take some time, and the host may not
+		 * send us keep-alive during this period, hence reset the
+		 * traffic based keep-alive timer so we don't trigger a
+		 * controller teardown as a result of a keep-alive expiration.
+		 */
+		ctrl->reset_tbkas = true;
 		nvmet_ctrl_put(ctrl);
 		sq->ctrl = NULL; /* allows reusing the queue later */
 	}
@@ -952,7 +959,7 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 	}
 
 	if (sq->ctrl)
-		sq->ctrl->cmd_seen = true;
+		sq->ctrl->reset_tbkas = true;
 
 	return true;
 
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index d69a409515d65..53aea9a8056e7 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -167,7 +167,7 @@ struct nvmet_ctrl {
 	struct nvmet_subsys	*subsys;
 	struct nvmet_sq		**sqs;
 
-	bool			cmd_seen;
+	bool			reset_tbkas;
 
 	struct mutex		lock;
 	u64			cap;
-- 
GitLab


From 3743c1723bfc62e69dbf022417720eed3f431b29 Mon Sep 17 00:00:00 2001
From: Zqiang <qiang.zhang@windriver.com>
Date: Wed, 26 May 2021 13:08:26 +0800
Subject: [PATCH 1634/3804] io-wq: Fix UAF when wakeup wqe in hash waitqueue

BUG: KASAN: use-after-free in __wake_up_common+0x637/0x650
Read of size 8 at addr ffff8880304250d8 by task iou-wrk-28796/28802

Call Trace:
 __dump_stack [inline]
 dump_stack+0x141/0x1d7
 print_address_description.constprop.0.cold+0x5b/0x2c6
 __kasan_report [inline]
 kasan_report.cold+0x7c/0xd8
 __wake_up_common+0x637/0x650
 __wake_up_common_lock+0xd0/0x130
 io_worker_handle_work+0x9dd/0x1790
 io_wqe_worker+0xb2a/0xd40
 ret_from_fork+0x1f/0x30

Allocated by task 28798:
 kzalloc_node [inline]
 io_wq_create+0x3c4/0xdd0
 io_init_wq_offload [inline]
 io_uring_alloc_task_context+0x1bf/0x6b0
 __io_uring_add_task_file+0x29a/0x3c0
 io_uring_add_task_file [inline]
 io_uring_install_fd [inline]
 io_uring_create [inline]
 io_uring_setup+0x209a/0x2bd0
 do_syscall_64+0x3a/0xb0
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Freed by task 28798:
 kfree+0x106/0x2c0
 io_wq_destroy+0x182/0x380
 io_wq_put [inline]
 io_wq_put_and_exit+0x7a/0xa0
 io_uring_clean_tctx [inline]
 __io_uring_cancel+0x428/0x530
 io_uring_files_cancel
 do_exit+0x299/0x2a60
 do_group_exit+0x125/0x310
 get_signal+0x47f/0x2150
 arch_do_signal_or_restart+0x2a8/0x1eb0
 handle_signal_work[inline]
 exit_to_user_mode_loop [inline]
 exit_to_user_mode_prepare+0x171/0x280
 __syscall_exit_to_user_mode_work [inline]
 syscall_exit_to_user_mode+0x19/0x60
 do_syscall_64+0x47/0xb0
 entry_SYSCALL_64_after_hwframe

There are the following scenarios, hash waitqueue is shared by
io-wq1 and io-wq2. (note: wqe is worker)

io-wq1:worker2     | locks bit1
io-wq2:worker1     | waits bit1
io-wq1:worker3     | waits bit1

io-wq1:worker2     | completes all wqe bit1 work items
io-wq1:worker2     | drop bit1, exit

io-wq2:worker1     | locks bit1
io-wq1:worker3     | can not locks bit1, waits bit1 and exit
io-wq1             | exit and free io-wq1
io-wq2:worker1     | drops bit1
io-wq1:worker3     | be waked up, even though wqe is freed

After all iou-wrk belonging to io-wq1 have exited, remove wqe
form hash waitqueue, it is guaranteed that there will be no more
wqe belonging to io-wq1 in the hash waitqueue.

Reported-by: syzbot+6cb11ade52aa17095297@syzkaller.appspotmail.com
Signed-off-by: Zqiang <qiang.zhang@windriver.com>
Link: https://lore.kernel.org/r/20210526050826.30500-1-qiang.zhang@windriver.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io-wq.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index de9b7ba3ba015..b3e8624a37d09 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -1006,13 +1006,16 @@ static void io_wq_exit_workers(struct io_wq *wq)
 		struct io_wqe *wqe = wq->wqes[node];
 
 		io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL);
-		spin_lock_irq(&wq->hash->wait.lock);
-		list_del_init(&wq->wqes[node]->wait.entry);
-		spin_unlock_irq(&wq->hash->wait.lock);
 	}
 	rcu_read_unlock();
 	io_worker_ref_put(wq);
 	wait_for_completion(&wq->worker_done);
+
+	for_each_node(node) {
+		spin_lock_irq(&wq->hash->wait.lock);
+		list_del_init(&wq->wqes[node]->wait.entry);
+		spin_unlock_irq(&wq->hash->wait.lock);
+	}
 	put_task_struct(wq->task);
 	wq->task = NULL;
 }
-- 
GitLab


From 9f5815315e0b93146d7b0be4d96ee2d74eeabb98 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 25 May 2021 22:19:01 -0700
Subject: [PATCH 1635/3804] xfs: add new IRC channel to MAINTAINERS

Add our new OFTC channel to the MAINTAINERS list so everyone will know
where to go.  Ignore the XFS wikis, we have no access to them.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 008fcad7ac008..ceb146e9b506c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19998,6 +19998,7 @@ F:	arch/x86/xen/*swiotlb*
 F:	drivers/xen/*swiotlb*
 
 XFS FILESYSTEM
+C:	irc://irc.oftc.net/xfs
 M:	Darrick J. Wong <djwong@kernel.org>
 M:	linux-xfs@vger.kernel.org
 L:	linux-xfs@vger.kernel.org
-- 
GitLab


From 3fdc0cb59d97f87e2cc708d424f1538e31744286 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 18 May 2021 17:36:18 +0100
Subject: [PATCH 1636/3804] arm64: smccc: Add support for SMCCCv1.2 extended
 input/output registers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SMCCC v1.2 allows x8-x17 to be used as parameter registers and x4—x17
to be used as result registers in SMC64/HVC64. Arm Firmware Framework
for Armv8-A specification makes use of x0-x7 as parameter and result
registers. There are other users like Hyper-V who intend to use beyond
x0-x7 as well.

Current SMCCC interface in the kernel just use x0-x7 as parameter and
x0-x3 as result registers as required by SMCCCv1.0. Let us add new
interface to support this extended set of input/output registers namely
x0-x17 as both parameter and result registers.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20210518163618.43950-1-sudeep.holla@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/asm-offsets.c |  9 ++++++
 arch/arm64/kernel/smccc-call.S  | 57 +++++++++++++++++++++++++++++++++
 include/linux/arm-smccc.h       | 55 +++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0cb34ccb6e733..74321bc9a4590 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -138,6 +138,15 @@ int main(void)
   DEFINE(ARM_SMCCC_RES_X2_OFFS,		offsetof(struct arm_smccc_res, a2));
   DEFINE(ARM_SMCCC_QUIRK_ID_OFFS,	offsetof(struct arm_smccc_quirk, id));
   DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS,	offsetof(struct arm_smccc_quirk, state));
+  DEFINE(ARM_SMCCC_1_2_REGS_X0_OFFS,	offsetof(struct arm_smccc_1_2_regs, a0));
+  DEFINE(ARM_SMCCC_1_2_REGS_X2_OFFS,	offsetof(struct arm_smccc_1_2_regs, a2));
+  DEFINE(ARM_SMCCC_1_2_REGS_X4_OFFS,	offsetof(struct arm_smccc_1_2_regs, a4));
+  DEFINE(ARM_SMCCC_1_2_REGS_X6_OFFS,	offsetof(struct arm_smccc_1_2_regs, a6));
+  DEFINE(ARM_SMCCC_1_2_REGS_X8_OFFS,	offsetof(struct arm_smccc_1_2_regs, a8));
+  DEFINE(ARM_SMCCC_1_2_REGS_X10_OFFS,	offsetof(struct arm_smccc_1_2_regs, a10));
+  DEFINE(ARM_SMCCC_1_2_REGS_X12_OFFS,	offsetof(struct arm_smccc_1_2_regs, a12));
+  DEFINE(ARM_SMCCC_1_2_REGS_X14_OFFS,	offsetof(struct arm_smccc_1_2_regs, a14));
+  DEFINE(ARM_SMCCC_1_2_REGS_X16_OFFS,	offsetof(struct arm_smccc_1_2_regs, a16));
   BLANK();
   DEFINE(HIBERN_PBE_ORIG,	offsetof(struct pbe, orig_address));
   DEFINE(HIBERN_PBE_ADDR,	offsetof(struct pbe, address));
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index d62447964ed91..2def9d0dd3ddb 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -43,3 +43,60 @@ SYM_FUNC_START(__arm_smccc_hvc)
 	SMCCC	hvc
 SYM_FUNC_END(__arm_smccc_hvc)
 EXPORT_SYMBOL(__arm_smccc_hvc)
+
+	.macro SMCCC_1_2 instr
+	/* Save `res` and free a GPR that won't be clobbered */
+	stp     x1, x19, [sp, #-16]!
+
+	/* Ensure `args` won't be clobbered while loading regs in next step */
+	mov	x19, x0
+
+	/* Load the registers x0 - x17 from the struct arm_smccc_1_2_regs */
+	ldp	x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS]
+	ldp	x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS]
+	ldp	x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS]
+	ldp	x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS]
+	ldp	x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS]
+	ldp	x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS]
+	ldp	x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS]
+	ldp	x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS]
+	ldp	x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS]
+
+	\instr #0
+
+	/* Load the `res` from the stack */
+	ldr	x19, [sp]
+
+	/* Store the registers x0 - x17 into the result structure */
+	stp	x0, x1, [x19, #ARM_SMCCC_1_2_REGS_X0_OFFS]
+	stp	x2, x3, [x19, #ARM_SMCCC_1_2_REGS_X2_OFFS]
+	stp	x4, x5, [x19, #ARM_SMCCC_1_2_REGS_X4_OFFS]
+	stp	x6, x7, [x19, #ARM_SMCCC_1_2_REGS_X6_OFFS]
+	stp	x8, x9, [x19, #ARM_SMCCC_1_2_REGS_X8_OFFS]
+	stp	x10, x11, [x19, #ARM_SMCCC_1_2_REGS_X10_OFFS]
+	stp	x12, x13, [x19, #ARM_SMCCC_1_2_REGS_X12_OFFS]
+	stp	x14, x15, [x19, #ARM_SMCCC_1_2_REGS_X14_OFFS]
+	stp	x16, x17, [x19, #ARM_SMCCC_1_2_REGS_X16_OFFS]
+
+	/* Restore original x19 */
+	ldp     xzr, x19, [sp], #16
+	ret
+.endm
+
+/*
+ * void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args,
+ *			  struct arm_smccc_1_2_regs *res);
+ */
+SYM_FUNC_START(arm_smccc_1_2_hvc)
+	SMCCC_1_2 hvc
+SYM_FUNC_END(arm_smccc_1_2_hvc)
+EXPORT_SYMBOL(arm_smccc_1_2_hvc)
+
+/*
+ * void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args,
+ *			  struct arm_smccc_1_2_regs *res);
+ */
+SYM_FUNC_START(arm_smccc_1_2_smc)
+	SMCCC_1_2 smc
+SYM_FUNC_END(arm_smccc_1_2_smc)
+EXPORT_SYMBOL(arm_smccc_1_2_smc)
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 6861489a18900..5cef2b8b0479e 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -227,6 +227,61 @@ struct arm_smccc_res {
 	unsigned long a3;
 };
 
+#ifdef CONFIG_ARM64
+/**
+ * struct arm_smccc_1_2_regs - Arguments for or Results from SMC/HVC call
+ * @a0-a17 argument values from registers 0 to 17
+ */
+struct arm_smccc_1_2_regs {
+	unsigned long a0;
+	unsigned long a1;
+	unsigned long a2;
+	unsigned long a3;
+	unsigned long a4;
+	unsigned long a5;
+	unsigned long a6;
+	unsigned long a7;
+	unsigned long a8;
+	unsigned long a9;
+	unsigned long a10;
+	unsigned long a11;
+	unsigned long a12;
+	unsigned long a13;
+	unsigned long a14;
+	unsigned long a15;
+	unsigned long a16;
+	unsigned long a17;
+};
+
+/**
+ * arm_smccc_1_2_hvc() - make HVC calls
+ * @args: arguments passed via struct arm_smccc_1_2_regs
+ * @res: result values via struct arm_smccc_1_2_regs
+ *
+ * This function is used to make HVC calls following SMC Calling Convention
+ * v1.2 or above. The content of the supplied param are copied from the
+ * structure to registers prior to the HVC instruction. The return values
+ * are updated with the content from registers on return from the HVC
+ * instruction.
+ */
+asmlinkage void arm_smccc_1_2_hvc(const struct arm_smccc_1_2_regs *args,
+				  struct arm_smccc_1_2_regs *res);
+
+/**
+ * arm_smccc_1_2_smc() - make SMC calls
+ * @args: arguments passed via struct arm_smccc_1_2_regs
+ * @res: result values via struct arm_smccc_1_2_regs
+ *
+ * This function is used to make SMC calls following SMC Calling Convention
+ * v1.2 or above. The content of the supplied param are copied from the
+ * structure to registers prior to the SMC instruction. The return values
+ * are updated with the content from registers on return from the SMC
+ * instruction.
+ */
+asmlinkage void arm_smccc_1_2_smc(const struct arm_smccc_1_2_regs *args,
+				  struct arm_smccc_1_2_regs *res);
+#endif
+
 /**
  * struct arm_smccc_quirk - Contains quirk information
  * @id: quirk identification
-- 
GitLab


From f85ea4945a268be6b0a6373f8ef1b2450d3f394b Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 25 May 2021 22:12:03 +0800
Subject: [PATCH 1637/3804] regulator: rk808: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Message-Id: <20210525141203.2562884-1-axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rk808-regulator.c | 116 ++++++++++++----------------
 1 file changed, 51 insertions(+), 65 deletions(-)

diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
index e926c1a858460..127dc2e2e6903 100644
--- a/drivers/regulator/rk808-regulator.c
+++ b/drivers/regulator/rk808-regulator.c
@@ -158,13 +158,6 @@ struct rk808_regulator_data {
 	struct gpio_desc *dvs_gpio[2];
 };
 
-static const int rk808_buck_config_regs[] = {
-	RK808_BUCK1_CONFIG_REG,
-	RK808_BUCK2_CONFIG_REG,
-	RK808_BUCK3_CONFIG_REG,
-	RK808_BUCK4_CONFIG_REG,
-};
-
 static const struct linear_range rk808_ldo3_voltage_ranges[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0, 13, 100000),
 	REGULATOR_LINEAR_RANGE(2500000, 15, 15, 0),
@@ -215,6 +208,15 @@ static const struct linear_range rk817_buck3_voltage_ranges[] = {
 			       RK817_BUCK3_SEL_CNT, RK817_BUCK1_STP1),
 };
 
+static const unsigned int rk808_buck1_2_ramp_table[] = {
+	2000, 4000, 6000, 10000
+};
+
+/* RK817 RK809 */
+static const unsigned int rk817_buck1_4_ramp_table[] = {
+	3000, 6300, 12500, 25000
+};
+
 static int rk808_buck1_2_get_voltage_sel_regmap(struct regulator_dev *rdev)
 {
 	struct rk808_regulator_data *pdata = rdev_get_drvdata(rdev);
@@ -340,62 +342,6 @@ static int rk808_buck1_2_set_voltage_time_sel(struct regulator_dev *rdev,
 	return regulator_set_voltage_time_sel(rdev, old_selector, new_selector);
 }
 
-static int rk808_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
-{
-	unsigned int ramp_value = RK808_RAMP_RATE_10MV_PER_US;
-	unsigned int reg = rk808_buck_config_regs[rdev_get_id(rdev)];
-
-	switch (ramp_delay) {
-	case 1 ... 2000:
-		ramp_value = RK808_RAMP_RATE_2MV_PER_US;
-		break;
-	case 2001 ... 4000:
-		ramp_value = RK808_RAMP_RATE_4MV_PER_US;
-		break;
-	case 4001 ... 6000:
-		ramp_value = RK808_RAMP_RATE_6MV_PER_US;
-		break;
-	case 6001 ... 10000:
-		break;
-	default:
-		pr_warn("%s ramp_delay: %d not supported, setting 10000\n",
-			rdev->desc->name, ramp_delay);
-	}
-
-	return regmap_update_bits(rdev->regmap, reg,
-				  RK808_RAMP_RATE_MASK, ramp_value);
-}
-
-/*
- * RK817 RK809
- */
-static int rk817_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
-{
-	unsigned int ramp_value = RK817_RAMP_RATE_25MV_PER_US;
-	unsigned int reg = RK817_BUCK_CONFIG_REG(rdev_get_id(rdev));
-
-	switch (ramp_delay) {
-	case 0 ... 3000:
-		ramp_value = RK817_RAMP_RATE_3MV_PER_US;
-		break;
-	case 3001 ... 6300:
-		ramp_value = RK817_RAMP_RATE_6_3MV_PER_US;
-		break;
-	case 6301 ... 12500:
-		ramp_value = RK817_RAMP_RATE_12_5MV_PER_US;
-		break;
-	case 12501 ... 25000:
-		break;
-	default:
-		dev_warn(&rdev->dev,
-			 "%s ramp_delay: %d not supported, setting 25000\n",
-			 rdev->desc->name, ramp_delay);
-	}
-
-	return regmap_update_bits(rdev->regmap, reg,
-				  RK817_RAMP_RATE_MASK, ramp_value);
-}
-
 static int rk808_set_suspend_voltage(struct regulator_dev *rdev, int uv)
 {
 	unsigned int reg;
@@ -625,7 +571,7 @@ static const struct regulator_ops rk808_buck1_2_ops = {
 	.enable			= regulator_enable_regmap,
 	.disable		= regulator_disable_regmap,
 	.is_enabled		= regulator_is_enabled_regmap,
-	.set_ramp_delay		= rk808_set_ramp_delay,
+	.set_ramp_delay		= regulator_set_ramp_delay_regmap,
 	.set_suspend_voltage	= rk808_set_suspend_voltage,
 	.set_suspend_enable	= rk808_set_suspend_enable,
 	.set_suspend_disable	= rk808_set_suspend_disable,
@@ -722,7 +668,7 @@ static const struct regulator_ops rk817_buck_ops_range = {
 	.set_mode		= rk8xx_set_mode,
 	.get_mode		= rk8xx_get_mode,
 	.set_suspend_mode	= rk8xx_set_suspend_mode,
-	.set_ramp_delay		= rk817_set_ramp_delay,
+	.set_ramp_delay		= regulator_set_ramp_delay_regmap,
 	.set_suspend_voltage	= rk808_set_suspend_voltage_range,
 	.set_suspend_enable	= rk817_set_suspend_enable,
 	.set_suspend_disable	= rk817_set_suspend_disable,
@@ -814,6 +760,10 @@ static const struct regulator_desc rk808_reg[] = {
 		.vsel_mask = RK808_BUCK_VSEL_MASK,
 		.enable_reg = RK808_DCDC_EN_REG,
 		.enable_mask = BIT(0),
+		.ramp_reg = RK808_BUCK1_CONFIG_REG,
+		.ramp_mask = RK808_RAMP_RATE_MASK,
+		.ramp_delay_table = rk808_buck1_2_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk808_buck1_2_ramp_table),
 		.owner = THIS_MODULE,
 	}, {
 		.name = "DCDC_REG2",
@@ -830,6 +780,10 @@ static const struct regulator_desc rk808_reg[] = {
 		.vsel_mask = RK808_BUCK_VSEL_MASK,
 		.enable_reg = RK808_DCDC_EN_REG,
 		.enable_mask = BIT(1),
+		.ramp_reg = RK808_BUCK2_CONFIG_REG,
+		.ramp_mask = RK808_RAMP_RATE_MASK,
+		.ramp_delay_table = rk808_buck1_2_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk808_buck1_2_ramp_table),
 		.owner = THIS_MODULE,
 	}, {
 		.name = "DCDC_REG3",
@@ -910,6 +864,10 @@ static const struct regulator_desc rk809_reg[] = {
 		.enable_mask = ENABLE_MASK(RK817_ID_DCDC1),
 		.enable_val = ENABLE_MASK(RK817_ID_DCDC1),
 		.disable_val = DISABLE_VAL(RK817_ID_DCDC1),
+		.ramp_reg = RK817_BUCK_CONFIG_REG(RK817_ID_DCDC1),
+		.ramp_mask = RK817_RAMP_RATE_MASK,
+		.ramp_delay_table = rk817_buck1_4_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk817_buck1_4_ramp_table),
 		.of_map_mode = rk8xx_regulator_of_map_mode,
 		.owner = THIS_MODULE,
 	}, {
@@ -929,6 +887,10 @@ static const struct regulator_desc rk809_reg[] = {
 		.enable_mask = ENABLE_MASK(RK817_ID_DCDC2),
 		.enable_val = ENABLE_MASK(RK817_ID_DCDC2),
 		.disable_val = DISABLE_VAL(RK817_ID_DCDC2),
+		.ramp_reg = RK817_BUCK_CONFIG_REG(RK817_ID_DCDC2),
+		.ramp_mask = RK817_RAMP_RATE_MASK,
+		.ramp_delay_table = rk817_buck1_4_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk817_buck1_4_ramp_table),
 		.of_map_mode = rk8xx_regulator_of_map_mode,
 		.owner = THIS_MODULE,
 	}, {
@@ -948,6 +910,10 @@ static const struct regulator_desc rk809_reg[] = {
 		.enable_mask = ENABLE_MASK(RK817_ID_DCDC3),
 		.enable_val = ENABLE_MASK(RK817_ID_DCDC3),
 		.disable_val = DISABLE_VAL(RK817_ID_DCDC3),
+		.ramp_reg = RK817_BUCK_CONFIG_REG(RK817_ID_DCDC3),
+		.ramp_mask = RK817_RAMP_RATE_MASK,
+		.ramp_delay_table = rk817_buck1_4_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk817_buck1_4_ramp_table),
 		.of_map_mode = rk8xx_regulator_of_map_mode,
 		.owner = THIS_MODULE,
 	}, {
@@ -967,6 +933,10 @@ static const struct regulator_desc rk809_reg[] = {
 		.enable_mask = ENABLE_MASK(RK817_ID_DCDC4),
 		.enable_val = ENABLE_MASK(RK817_ID_DCDC4),
 		.disable_val = DISABLE_VAL(RK817_ID_DCDC4),
+		.ramp_reg = RK817_BUCK_CONFIG_REG(RK817_ID_DCDC4),
+		.ramp_mask = RK817_RAMP_RATE_MASK,
+		.ramp_delay_table = rk817_buck1_4_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk817_buck1_4_ramp_table),
 		.of_map_mode = rk8xx_regulator_of_map_mode,
 		.owner = THIS_MODULE,
 	},
@@ -1052,6 +1022,10 @@ static const struct regulator_desc rk817_reg[] = {
 		.enable_mask = ENABLE_MASK(RK817_ID_DCDC1),
 		.enable_val = ENABLE_MASK(RK817_ID_DCDC1),
 		.disable_val = DISABLE_VAL(RK817_ID_DCDC1),
+		.ramp_reg = RK817_BUCK_CONFIG_REG(RK817_ID_DCDC1),
+		.ramp_mask = RK817_RAMP_RATE_MASK,
+		.ramp_delay_table = rk817_buck1_4_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk817_buck1_4_ramp_table),
 		.of_map_mode = rk8xx_regulator_of_map_mode,
 		.owner = THIS_MODULE,
 	}, {
@@ -1071,6 +1045,10 @@ static const struct regulator_desc rk817_reg[] = {
 		.enable_mask = ENABLE_MASK(RK817_ID_DCDC2),
 		.enable_val = ENABLE_MASK(RK817_ID_DCDC2),
 		.disable_val = DISABLE_VAL(RK817_ID_DCDC2),
+		.ramp_reg = RK817_BUCK_CONFIG_REG(RK817_ID_DCDC2),
+		.ramp_mask = RK817_RAMP_RATE_MASK,
+		.ramp_delay_table = rk817_buck1_4_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk817_buck1_4_ramp_table),
 		.of_map_mode = rk8xx_regulator_of_map_mode,
 		.owner = THIS_MODULE,
 	}, {
@@ -1090,6 +1068,10 @@ static const struct regulator_desc rk817_reg[] = {
 		.enable_mask = ENABLE_MASK(RK817_ID_DCDC3),
 		.enable_val = ENABLE_MASK(RK817_ID_DCDC3),
 		.disable_val = DISABLE_VAL(RK817_ID_DCDC3),
+		.ramp_reg = RK817_BUCK_CONFIG_REG(RK817_ID_DCDC3),
+		.ramp_mask = RK817_RAMP_RATE_MASK,
+		.ramp_delay_table = rk817_buck1_4_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk817_buck1_4_ramp_table),
 		.of_map_mode = rk8xx_regulator_of_map_mode,
 		.owner = THIS_MODULE,
 	}, {
@@ -1109,6 +1091,10 @@ static const struct regulator_desc rk817_reg[] = {
 		.enable_mask = ENABLE_MASK(RK817_ID_DCDC4),
 		.enable_val = ENABLE_MASK(RK817_ID_DCDC4),
 		.disable_val = DISABLE_VAL(RK817_ID_DCDC4),
+		.ramp_reg = RK817_BUCK_CONFIG_REG(RK817_ID_DCDC4),
+		.ramp_mask = RK817_RAMP_RATE_MASK,
+		.ramp_delay_table = rk817_buck1_4_ramp_table,
+		.n_ramp_values = ARRAY_SIZE(rk817_buck1_4_ramp_table),
 		.of_map_mode = rk8xx_regulator_of_map_mode,
 		.owner = THIS_MODULE,
 	},
-- 
GitLab


From 76734d26b54192a31440039459eef2612da63ed4 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Wed, 26 May 2021 10:49:25 -0700
Subject: [PATCH 1638/3804] arm64: Change the on_*stack functions to take a
 size argument

unwind_frame() was previously implicitly checking that the frame
record is in bounds of the stack by enforcing that FP is both aligned
to 16 and in bounds of the stack. Once the FP alignment requirement
is relaxed to 8 this will not be sufficient because it does not
account for the case where FP points to 8 bytes before the end of the
stack.

Make the check explicit by changing the on_*stack functions to take a
size argument and adjusting the callers to pass the appropriate sizes.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ib7a3eb3eea41b0687ffaba045ceb2012d077d8b4
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210526174927.2477847-1-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/processor.h  | 12 +++++------
 arch/arm64/include/asm/sdei.h       |  7 ++++---
 arch/arm64/include/asm/stacktrace.h | 32 ++++++++++++++---------------
 arch/arm64/kernel/ptrace.c          |  2 +-
 arch/arm64/kernel/sdei.c            | 16 ++++++++-------
 arch/arm64/kernel/stacktrace.c      |  2 +-
 6 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 9df3feeee8909..7a094aafec200 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -329,13 +329,13 @@ long get_tagged_addr_ctrl(struct task_struct *task);
  * of header definitions for the use of task_stack_page.
  */
 
-#define current_top_of_stack()							\
-({										\
-	struct stack_info _info;						\
-	BUG_ON(!on_accessible_stack(current, current_stack_pointer, &_info));	\
-	_info.high;								\
+#define current_top_of_stack()								\
+({											\
+	struct stack_info _info;							\
+	BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info));	\
+	_info.high;									\
 })
-#define on_thread_stack()	(on_task_stack(current, current_stack_pointer, NULL))
+#define on_thread_stack()	(on_task_stack(current, current_stack_pointer, 1, NULL))
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index 63e0b92a5fbb0..8bc30a5c45693 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -42,8 +42,9 @@ unsigned long sdei_arch_get_entry_point(int conduit);
 
 struct stack_info;
 
-bool _on_sdei_stack(unsigned long sp, struct stack_info *info);
-static inline bool on_sdei_stack(unsigned long sp,
+bool _on_sdei_stack(unsigned long sp, unsigned long size,
+		    struct stack_info *info);
+static inline bool on_sdei_stack(unsigned long sp, unsigned long size,
 				struct stack_info *info)
 {
 	if (!IS_ENABLED(CONFIG_VMAP_STACK))
@@ -51,7 +52,7 @@ static inline bool on_sdei_stack(unsigned long sp,
 	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
 		return false;
 	if (in_nmi())
-		return _on_sdei_stack(sp, info);
+		return _on_sdei_stack(sp, size, info);
 
 	return false;
 }
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 4b33ca6206793..1801399204d79 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -69,14 +69,14 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
 
 DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
 
-static inline bool on_stack(unsigned long sp, unsigned long low,
-				unsigned long high, enum stack_type type,
-				struct stack_info *info)
+static inline bool on_stack(unsigned long sp, unsigned long size,
+			    unsigned long low, unsigned long high,
+			    enum stack_type type, struct stack_info *info)
 {
 	if (!low)
 		return false;
 
-	if (sp < low || sp >= high)
+	if (sp < low || sp + size < sp || sp + size > high)
 		return false;
 
 	if (info) {
@@ -87,38 +87,38 @@ static inline bool on_stack(unsigned long sp, unsigned long low,
 	return true;
 }
 
-static inline bool on_irq_stack(unsigned long sp,
+static inline bool on_irq_stack(unsigned long sp, unsigned long size,
 				struct stack_info *info)
 {
 	unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr);
 	unsigned long high = low + IRQ_STACK_SIZE;
 
-	return on_stack(sp, low, high, STACK_TYPE_IRQ, info);
+	return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info);
 }
 
 static inline bool on_task_stack(const struct task_struct *tsk,
-				 unsigned long sp,
+				 unsigned long sp, unsigned long size,
 				 struct stack_info *info)
 {
 	unsigned long low = (unsigned long)task_stack_page(tsk);
 	unsigned long high = low + THREAD_SIZE;
 
-	return on_stack(sp, low, high, STACK_TYPE_TASK, info);
+	return on_stack(sp, size, low, high, STACK_TYPE_TASK, info);
 }
 
 #ifdef CONFIG_VMAP_STACK
 DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
 
-static inline bool on_overflow_stack(unsigned long sp,
+static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
 				struct stack_info *info)
 {
 	unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack);
 	unsigned long high = low + OVERFLOW_STACK_SIZE;
 
-	return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info);
+	return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info);
 }
 #else
-static inline bool on_overflow_stack(unsigned long sp,
+static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
 			struct stack_info *info) { return false; }
 #endif
 
@@ -128,21 +128,21 @@ static inline bool on_overflow_stack(unsigned long sp,
  * context.
  */
 static inline bool on_accessible_stack(const struct task_struct *tsk,
-				       unsigned long sp,
+				       unsigned long sp, unsigned long size,
 				       struct stack_info *info)
 {
 	if (info)
 		info->type = STACK_TYPE_UNKNOWN;
 
-	if (on_task_stack(tsk, sp, info))
+	if (on_task_stack(tsk, sp, size, info))
 		return true;
 	if (tsk != current || preemptible())
 		return false;
-	if (on_irq_stack(sp, info))
+	if (on_irq_stack(sp, size, info))
 		return true;
-	if (on_overflow_stack(sp, info))
+	if (on_overflow_stack(sp, size, info))
 		return true;
-	if (on_sdei_stack(sp, info))
+	if (on_sdei_stack(sp, size, info))
 		return true;
 
 	return false;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index eb2f73939b7bb..499b6b2f9757f 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -122,7 +122,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
 {
 	return ((addr & ~(THREAD_SIZE - 1))  ==
 		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
-		on_irq_stack(addr, NULL);
+		on_irq_stack(addr, sizeof(unsigned long), NULL);
 }
 
 /**
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 2c7ca449dd511..c524f96f97c49 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -162,31 +162,33 @@ static int init_sdei_scs(void)
 	return err;
 }
 
-static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info)
+static bool on_sdei_normal_stack(unsigned long sp, unsigned long size,
+				 struct stack_info *info)
 {
 	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
 	unsigned long high = low + SDEI_STACK_SIZE;
 
-	return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info);
+	return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info);
 }
 
-static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info)
+static bool on_sdei_critical_stack(unsigned long sp, unsigned long size,
+				   struct stack_info *info)
 {
 	unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
 	unsigned long high = low + SDEI_STACK_SIZE;
 
-	return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info);
+	return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info);
 }
 
-bool _on_sdei_stack(unsigned long sp, struct stack_info *info)
+bool _on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info)
 {
 	if (!IS_ENABLED(CONFIG_VMAP_STACK))
 		return false;
 
-	if (on_sdei_critical_stack(sp, info))
+	if (on_sdei_critical_stack(sp, size, info))
 		return true;
 
-	if (on_sdei_normal_stack(sp, info))
+	if (on_sdei_normal_stack(sp, size, info))
 		return true;
 
 	return false;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 36cf05d5eb9ee..5c70f247645bf 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -78,7 +78,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	if (fp & 0xf)
 		return -EINVAL;
 
-	if (!on_accessible_stack(tsk, fp, &info))
+	if (!on_accessible_stack(tsk, fp, 16, &info))
 		return -EINVAL;
 
 	if (test_bit(info.type, frame->stacks_done))
-- 
GitLab


From 33c222aeda14596ca5b9a1a3002858c6c3565ddd Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Wed, 26 May 2021 10:49:26 -0700
Subject: [PATCH 1639/3804] arm64: stacktrace: Relax frame record alignment
 requirement to 8 bytes

The AAPCS places no requirements on the alignment of the frame
record. In theory it could be placed anywhere, although it seems
sensible to require it to be aligned to 8 bytes. With an upcoming
enhancement to tag-based KASAN Clang will begin creating frame records
located at an address that is only aligned to 8 bytes. Accommodate
such frame records in the stack unwinding code.

As pointed out by Mark Rutland, the userspace stack unwinding code
has the same problem, so fix it there as well.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ia22c375230e67ca055e9e4bb639383567f7ad268
Acked-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210526174927.2477847-2-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_callchain.c | 2 +-
 arch/arm64/kernel/stacktrace.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 88ff471b0bce5..4a72c27273097 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -116,7 +116,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
 		tail = (struct frame_tail __user *)regs->regs[29];
 
 		while (entry->nr < entry->max_stack &&
-		       tail && !((unsigned long)tail & 0xf))
+		       tail && !((unsigned long)tail & 0x7))
 			tail = user_backtrace(tail, entry);
 	} else {
 #ifdef CONFIG_COMPAT
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 5c70f247645bf..b189de5ca6cbc 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -75,7 +75,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	if (fp == (unsigned long)task_pt_regs(tsk)->stackframe)
 		return -ENOENT;
 
-	if (fp & 0xf)
+	if (fp & 0x7)
 		return -EINVAL;
 
 	if (!on_accessible_stack(tsk, fp, 16, &info))
-- 
GitLab


From 483dbf6a35907610597fdc304bd32ecba40cdff0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 12 May 2021 16:11:29 +0100
Subject: [PATCH 1640/3804] arm64/sve: Split _sve_flush macro into separate Z
 and predicate flushes

Trivial refactoring to support further work, no change to generated code.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210512151131.27877-2-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/fpsimdmacros.h | 4 +++-
 arch/arm64/kernel/entry-fpsimd.S      | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index a2563992d2dc8..059204477ce66 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -213,8 +213,10 @@
 	mov	v\nz\().16b, v\nz\().16b
 .endm
 
-.macro sve_flush
+.macro sve_flush_z
  _for n, 0, 31, _sve_flush_z	\n
+.endm
+.macro sve_flush_p_ffr
  _for n, 0, 15, _sve_pfalse	\n
 		_sve_wrffr	0
 .endm
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 3ecec60d32958..7921d58427c27 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -72,7 +72,8 @@ SYM_FUNC_END(sve_load_from_fpsimd_state)
 
 /* Zero all SVE registers but the first 128-bits of each vector */
 SYM_FUNC_START(sve_flush_live)
-	sve_flush
+	sve_flush_z
+	sve_flush_p_ffr
 	ret
 SYM_FUNC_END(sve_flush_live)
 
-- 
GitLab


From c9f6890bca111a879a8af1f2390ac49cf05b11df Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 12 May 2021 16:11:30 +0100
Subject: [PATCH 1641/3804] arm64/sve: Use the sve_flush macros in
 sve_load_from_fpsimd_state()

This makes the code a bit clearer and as a result we can also make the
indentation more normal, there is no change to the generated code.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210512151131.27877-3-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-fpsimd.S | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 7921d58427c27..dd8382e5ce825 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -63,11 +63,10 @@ SYM_FUNC_END(sve_set_vq)
  * and the rest zeroed. All the other SVE registers will be zeroed.
  */
 SYM_FUNC_START(sve_load_from_fpsimd_state)
-		sve_load_vq	x1, x2, x3
-		fpsimd_restore	x0, 8
- _for n, 0, 15, _sve_pfalse	\n
-		_sve_wrffr	0
-		ret
+	sve_load_vq	x1, x2, x3
+	fpsimd_restore	x0, 8
+	sve_flush_p_ffr
+	ret
 SYM_FUNC_END(sve_load_from_fpsimd_state)
 
 /* Zero all SVE registers but the first 128-bits of each vector */
-- 
GitLab


From ad4711f962e08eff8d6e9b03f9670b1af6ea9395 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 12 May 2021 16:11:31 +0100
Subject: [PATCH 1642/3804] arm64/sve: Skip flushing Z registers with 128 bit
 vectors

When the SVE vector length is 128 bits then there are no bits in the Z
registers which are not shared with the V registers so we can skip them
when zeroing state not shared with FPSIMD, this results in a minor
performance improvement.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210512151131.27877-4-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/fpsimd.h  |  2 +-
 arch/arm64/kernel/entry-fpsimd.S | 12 ++++++++++--
 arch/arm64/kernel/fpsimd.c       |  6 ++++--
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 2599504674b52..c072161d5c653 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -69,7 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread)
 extern void sve_save_state(void *state, u32 *pfpsr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
 			   unsigned long vq_minus_1);
-extern void sve_flush_live(void);
+extern void sve_flush_live(unsigned long vq_minus_1);
 extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
 				       unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index dd8382e5ce825..0a7a647538787 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -69,10 +69,18 @@ SYM_FUNC_START(sve_load_from_fpsimd_state)
 	ret
 SYM_FUNC_END(sve_load_from_fpsimd_state)
 
-/* Zero all SVE registers but the first 128-bits of each vector */
+/*
+ * Zero all SVE registers but the first 128-bits of each vector
+ *
+ * VQ must already be configured by caller, any further updates of VQ
+ * will need to ensure that the register state remains valid.
+ *
+ * x0 = VQ - 1
+ */
 SYM_FUNC_START(sve_flush_live)
+	cbz		x0, 1f	// A VQ-1 of 0 is 128 bits so no extra Z state
 	sve_flush_z
-	sve_flush_p_ffr
+1:	sve_flush_p_ffr
 	ret
 SYM_FUNC_END(sve_flush_live)
 
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index ad3dd34a83cf9..e57b23f952846 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -957,8 +957,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
 	 * disabling the trap, otherwise update our in-memory copy.
 	 */
 	if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
-		sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1);
-		sve_flush_live();
+		unsigned long vq_minus_one =
+			sve_vq_from_vl(current->thread.sve_vl) - 1;
+		sve_set_vq(vq_minus_one);
+		sve_flush_live(vq_minus_one);
 		fpsimd_bind_task_to_cpu();
 	} else {
 		fpsimd_to_sve(current);
-- 
GitLab


From 53004ee78d6273c994534ccf79d993098ac89769 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 20 Apr 2021 17:54:36 -0500
Subject: [PATCH 1643/3804] xfs: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix
the following warnings by replacing /* fall through */ comments,
and its variants, with the new pseudo-keyword macro fallthrough:

fs/xfs/libxfs/xfs_alloc.c:3167:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/libxfs/xfs_da_btree.c:286:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/libxfs/xfs_ag_resv.c:346:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/libxfs/xfs_ag_resv.c:388:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_bmap_util.c:246:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_export.c:88:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_export.c:96:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_file.c:867:3: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_ioctl.c:562:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_ioctl.c:1548:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_iomap.c:1040:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_inode.c:852:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_log.c:2627:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/xfs_trans_buf.c:298:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/scrub/bmap.c:275:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/scrub/btree.c:48:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/scrub/common.c:85:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/scrub/common.c:138:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/scrub/common.c:698:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/scrub/dabtree.c:51:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/scrub/repair.c:951:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]
fs/xfs/scrub/agheader.c:89:2: warning: unannotated fall-through between switch labels [-Wimplicit-fallthrough]

Notice that Clang doesn't recognize /* fall through */ comments as
implicit fall-through markings, so in order to globally enable
-Wimplicit-fallthrough for Clang, these comments need to be
replaced with fallthrough; in the whole codebase.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 fs/xfs/libxfs/xfs_ag_resv.c  | 4 ++--
 fs/xfs/libxfs/xfs_alloc.c    | 2 +-
 fs/xfs/libxfs/xfs_da_btree.c | 2 +-
 fs/xfs/scrub/agheader.c      | 1 +
 fs/xfs/scrub/bmap.c          | 2 +-
 fs/xfs/scrub/btree.c         | 2 +-
 fs/xfs/scrub/common.c        | 6 +++---
 fs/xfs/scrub/dabtree.c       | 2 +-
 fs/xfs/scrub/repair.c        | 2 +-
 fs/xfs/xfs_bmap_util.c       | 2 +-
 fs/xfs/xfs_export.c          | 4 ++--
 fs/xfs/xfs_file.c            | 2 +-
 fs/xfs/xfs_inode.c           | 2 +-
 fs/xfs/xfs_ioctl.c           | 4 ++--
 fs/xfs/xfs_iomap.c           | 2 +-
 fs/xfs/xfs_log.c             | 1 +
 fs/xfs/xfs_trans_buf.c       | 2 +-
 17 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index e32a1833d5231..637d954e148fb 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -354,7 +354,7 @@ xfs_ag_resv_alloc_extent(
 		break;
 	default:
 		ASSERT(0);
-		/* fall through */
+		fallthrough;
 	case XFS_AG_RESV_NONE:
 		field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
 				       XFS_TRANS_SB_FDBLOCKS;
@@ -396,7 +396,7 @@ xfs_ag_resv_free_extent(
 		break;
 	default:
 		ASSERT(0);
-		/* fall through */
+		fallthrough;
 	case XFS_AG_RESV_NONE:
 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
 		return;
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 82b7cbb1f24f3..af3d5f9271f61 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -3174,7 +3174,7 @@ xfs_alloc_vextent(
 		}
 		args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
 		args->type = XFS_ALLOCTYPE_NEAR_BNO;
-		/* FALLTHROUGH */
+		fallthrough;
 	case XFS_ALLOCTYPE_FIRST_AG:
 		/*
 		 * Rotate through the allocation groups looking for a winner.
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 83ac9771bfb58..747ec77912c3f 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -282,7 +282,7 @@ xfs_da3_node_read_verify(
 						__this_address);
 				break;
 			}
-			/* fall through */
+			fallthrough;
 		case XFS_DA_NODE_MAGIC:
 			fa = xfs_da3_node_verify(bp);
 			if (fa)
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 7a2f9b5f2db5b..f96e84793cc96 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -86,6 +86,7 @@ xchk_superblock(
 	case -ENOSYS:
 	case -EFBIG:
 		error = -EFSCORRUPTED;
+		fallthrough;
 	default:
 		break;
 	}
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index b5ebf1d1b4db4..77d5c4a0f09f2 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -271,7 +271,7 @@ xchk_bmap_iextent_xref(
 	case XFS_DATA_FORK:
 		if (xfs_is_reflink_inode(info->sc->ip))
 			break;
-		/* fall through */
+		fallthrough;
 	case XFS_ATTR_FORK:
 		xchk_xref_is_not_shared(info->sc, agbno,
 				irec->br_blockcount);
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index a94bd8122c604..bd1172358964e 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -44,7 +44,7 @@ __xchk_btree_process_error(
 		/* Note the badness but don't abort. */
 		sc->sm->sm_flags |= errflag;
 		*error = 0;
-		/* fall through */
+		fallthrough;
 	default:
 		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
 			trace_xchk_ifork_btree_op_error(sc, cur, level,
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index aa874607618a2..ce9a44ea69486 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -81,7 +81,7 @@ __xchk_process_error(
 		/* Note the badness but don't abort. */
 		sc->sm->sm_flags |= errflag;
 		*error = 0;
-		/* fall through */
+		fallthrough;
 	default:
 		trace_xchk_op_error(sc, agno, bno, *error,
 				ret_ip);
@@ -134,7 +134,7 @@ __xchk_fblock_process_error(
 		/* Note the badness but don't abort. */
 		sc->sm->sm_flags |= errflag;
 		*error = 0;
-		/* fall through */
+		fallthrough;
 	default:
 		trace_xchk_file_op_error(sc, whichfork, offset, *error,
 				ret_ip);
@@ -694,7 +694,7 @@ xchk_get_inode(
 		if (error)
 			return -ENOENT;
 		error = -EFSCORRUPTED;
-		/* fall through */
+		fallthrough;
 	default:
 		trace_xchk_op_error(sc,
 				XFS_INO_TO_AGNO(mp, sc->sm->sm_ino),
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 653f3280e1c18..9f0dbb47c82c0 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -47,7 +47,7 @@ xchk_da_process_error(
 		/* Note the badness but don't abort. */
 		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
 		*error = 0;
-		/* fall through */
+		fallthrough;
 	default:
 		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
 				xfs_dir2_da_to_db(ds->dargs.geo,
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index c2857d854c83f..b8202dd089392 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -947,7 +947,7 @@ xrep_ino_dqattach(
 			xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
 		if (XFS_IS_PQUOTA_ON(sc->mp) && !sc->ip->i_pdquot)
 			xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
-		/* fall through */
+		fallthrough;
 	case -ESRCH:
 		error = 0;
 		break;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index a5e9d7d34023f..cc628475f9b65 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -242,7 +242,7 @@ xfs_bmap_count_blocks(
 		 */
 		*count += btblocks - 1;
 
-		/* fall through */
+		fallthrough;
 	case XFS_DINODE_FMT_EXTENTS:
 		*nextents = xfs_bmap_count_leaves(ifp, count);
 		break;
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 465fd9e048d4f..1da59bdff245c 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -84,7 +84,7 @@ xfs_fs_encode_fh(
 	case FILEID_INO32_GEN_PARENT:
 		fid->i32.parent_ino = XFS_I(parent)->i_ino;
 		fid->i32.parent_gen = parent->i_generation;
-		/*FALLTHRU*/
+		fallthrough;
 	case FILEID_INO32_GEN:
 		fid->i32.ino = XFS_I(inode)->i_ino;
 		fid->i32.gen = inode->i_generation;
@@ -92,7 +92,7 @@ xfs_fs_encode_fh(
 	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
 		fid64->parent_ino = XFS_I(parent)->i_ino;
 		fid64->parent_gen = parent->i_generation;
-		/*FALLTHRU*/
+		fallthrough;
 	case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
 		fid64->ino = XFS_I(inode)->i_ino;
 		fid64->gen = inode->i_generation;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 396ef36dcd0a1..3c0749ab9e407 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -863,7 +863,7 @@ xfs_break_layouts(
 			error = xfs_break_dax_layouts(inode, &retry);
 			if (error || retry)
 				break;
-			/* fall through */
+			fallthrough;
 		case BREAK_WRITE:
 			error = xfs_break_leased_layouts(inode, iolock, &retry);
 			break;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 0369eb22c1bb0..f2846997c3a8a 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -848,7 +848,7 @@ xfs_init_new_inode(
 			xfs_inode_inherit_flags(ip, pip);
 		if (pip && (pip->i_diflags2 & XFS_DIFLAG2_ANY))
 			xfs_inode_inherit_flags2(ip, pip);
-		/* FALLTHROUGH */
+		fallthrough;
 	case S_IFLNK:
 		ip->i_df.if_format = XFS_DINODE_FMT_EXTENTS;
 		ip->i_df.if_bytes = 0;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3925bfcb23657..c4dc6c72ac37d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -558,7 +558,7 @@ xfs_ioc_attrmulti_one(
 	case ATTR_OP_REMOVE:
 		value = NULL;
 		*len = 0;
-		/* fall through */
+		fallthrough;
 	case ATTR_OP_SET:
 		error = mnt_want_write_file(parfilp);
 		if (error)
@@ -1544,7 +1544,7 @@ xfs_ioc_getbmap(
 	switch (cmd) {
 	case XFS_IOC_GETBMAPA:
 		bmx.bmv_iflags = BMV_IF_ATTRFORK;
-		/*FALLTHRU*/
+		fallthrough;
 	case XFS_IOC_GETBMAP:
 		/* struct getbmap is a strict subset of struct getbmapx. */
 		recsize = sizeof(struct getbmap);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index d154f42e2dc68..d8cd2583dedbf 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1036,7 +1036,7 @@ retry:
 			prealloc_blocks = 0;
 			goto retry;
 		}
-		/*FALLTHRU*/
+		fallthrough;
 	default:
 		goto out_unlock;
 	}
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c19a82adea1ed..a002425377b5d 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2626,6 +2626,7 @@ xlog_covered_state(
 	case XLOG_STATE_COVER_IDLE:
 		if (iclogs_changed == 1)
 			return XLOG_STATE_COVER_IDLE;
+		fallthrough;
 	case XLOG_STATE_COVER_NEED:
 	case XLOG_STATE_COVER_NEED2:
 		break;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 9aced0a00003c..d11d032da0b41 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -294,7 +294,7 @@ xfs_trans_read_buf_map(
 	default:
 		if (tp && (tp->t_flags & XFS_TRANS_DIRTY))
 			xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
-		/* fall through */
+		fallthrough;
 	case -ENOMEM:
 	case -EAGAIN:
 		return error;
-- 
GitLab


From 62f3415db237b8d2aa9a804ff84ce2efa87df179 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 26 May 2021 11:46:17 -0700
Subject: [PATCH 1644/3804] net: phy: Document phydev::dev_flags bits
 allocation

Document the phydev::dev_flags bit allocation to allow bits 15:0 to
define PHY driver specific behavior, bits 23:16 to be reserved for now,
and bits 31:24 to hold generic PHY driver flags.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/20210526184617.3105012-1-f.fainelli@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 60d2b26026a2d..852743f07e3e6 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -496,6 +496,11 @@ struct macsec_ops;
  * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY
  * @state: State of the PHY for management purposes
  * @dev_flags: Device-specific flags used by the PHY driver.
+ *		Bits [15:0] are free to use by the PHY driver to communicate
+ *			    driver specific behavior.
+ *		Bits [23:16] are currently reserved for future use.
+ *		Bits [31:24] are reserved for defining generic
+ *			     PHY driver behavior.
  * @irq: IRQ number of the PHY's interrupt (-1 if none)
  * @phy_timer: The timer for handling the state machine
  * @phylink: Pointer to phylink instance for this PHY
-- 
GitLab


From 5cb4e1f33e5eeadbce3814282e010d4dd31816af Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 26 May 2021 22:56:55 +0300
Subject: [PATCH 1645/3804] spi: Enable tracing of the SPI setup CS selection

It is helpful to see what state of CS signal was during one
or another SPI operation. All the same for SPI setup.

Enable tracing of the SPI setup and CS selection.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Message-Id: <20210526195655.75691-1-andriy.shevchenko@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c          |  4 +++
 include/trace/events/spi.h | 57 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 956dce3aafcad..20932752a7ef8 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -804,6 +804,8 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
 	    (spi->controller->last_cs_mode_high == (spi->mode & SPI_CS_HIGH)))
 		return;
 
+	trace_spi_set_cs(spi, activate);
+
 	spi->controller->last_cs_enable = enable;
 	spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH;
 
@@ -3441,6 +3443,8 @@ int spi_setup(struct spi_device *spi)
 		spi_set_thread_rt(spi->controller);
 	}
 
+	trace_spi_setup(spi, status);
+
 	dev_dbg(&spi->dev, "setup mode %lu, %s%s%s%s%u bits/w, %u Hz max --> %d\n",
 			spi->mode & SPI_MODE_X_MASK,
 			(spi->mode & SPI_CS_HIGH) ? "cs_high, " : "",
diff --git a/include/trace/events/spi.h b/include/trace/events/spi.h
index 0dd9171d2ad8e..c0d9844befd7a 100644
--- a/include/trace/events/spi.h
+++ b/include/trace/events/spi.h
@@ -42,6 +42,63 @@ DEFINE_EVENT(spi_controller, spi_controller_busy,
 
 );
 
+TRACE_EVENT(spi_setup,
+	TP_PROTO(struct spi_device *spi, int status),
+	TP_ARGS(spi, status),
+
+	TP_STRUCT__entry(
+		__field(int, bus_num)
+		__field(int, chip_select)
+		__field(unsigned long, mode)
+		__field(unsigned int, bits_per_word)
+		__field(unsigned int, max_speed_hz)
+		__field(int, status)
+	),
+
+	TP_fast_assign(
+		__entry->bus_num = spi->controller->bus_num;
+		__entry->chip_select = spi->chip_select;
+		__entry->mode = spi->mode;
+		__entry->bits_per_word = spi->bits_per_word;
+		__entry->max_speed_hz = spi->max_speed_hz;
+		__entry->status = status;
+	),
+
+	TP_printk("spi%d.%d setup mode %lu, %s%s%s%s%u bits/w, %u Hz max --> %d",
+		  __entry->bus_num, __entry->chip_select,
+		  (__entry->mode & SPI_MODE_X_MASK),
+		  (__entry->mode & SPI_CS_HIGH) ? "cs_high, " : "",
+		  (__entry->mode & SPI_LSB_FIRST) ? "lsb, " : "",
+		  (__entry->mode & SPI_3WIRE) ? "3wire, " : "",
+		  (__entry->mode & SPI_LOOP) ? "loopback, " : "",
+		  __entry->bits_per_word, __entry->max_speed_hz,
+		  __entry->status)
+);
+
+TRACE_EVENT(spi_set_cs,
+	TP_PROTO(struct spi_device *spi, bool enable),
+	TP_ARGS(spi, enable),
+
+	TP_STRUCT__entry(
+		__field(int, bus_num)
+		__field(int, chip_select)
+		__field(unsigned long, mode)
+		__field(bool, enable)
+	),
+
+	TP_fast_assign(
+		__entry->bus_num = spi->controller->bus_num;
+		__entry->chip_select = spi->chip_select;
+		__entry->mode = spi->mode;
+		__entry->enable = enable;
+	),
+
+	TP_printk("spi%d.%d %s%s",
+		  __entry->bus_num, __entry->chip_select,
+		  __entry->enable ? "activate" : "deactivate",
+		  (__entry->mode & SPI_CS_HIGH) ? ", cs_high" : "")
+);
+
 DECLARE_EVENT_CLASS(spi_message,
 
 	TP_PROTO(struct spi_message *msg),
-- 
GitLab


From 7513cc8a1b741bee6fb39cbb94a9842d37ca3ace Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 26 May 2021 20:36:20 +0100
Subject: [PATCH 1646/3804] arm64: Change the cpuinfo_arm64 member type for
 some sysregs to u64

The architecture has been updated and the CTR_EL0, CNTFRQ_EL0,
DCZID_EL0, MIDR_EL1, REVIDR_EL1 registers are all 64-bit, even if most
of them have a RES0 top 32-bit.

Change their type to u64 in struct cpuinfo_arm64.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Suzuki K Poulose <Suzuki.Poulose@arm.com>
Link: https://lore.kernel.org/r/20210526193621.21559-2-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpu.h | 10 +++++-----
 arch/arm64/kernel/cpuinfo.c  |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 7faae6ff3ab4d..fe5a8499ddc29 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -15,11 +15,11 @@
 struct cpuinfo_arm64 {
 	struct cpu	cpu;
 	struct kobject	kobj;
-	u32		reg_ctr;
-	u32		reg_cntfrq;
-	u32		reg_dczid;
-	u32		reg_midr;
-	u32		reg_revidr;
+	u64		reg_ctr;
+	u64		reg_cntfrq;
+	u64		reg_dczid;
+	u64		reg_midr;
+	u64		reg_revidr;
 
 	u64		reg_id_aa64dfr0;
 	u64		reg_id_aa64dfr1;
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 51fcf99d53514..0e9e965e18d8b 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -246,7 +246,7 @@ static struct kobj_type cpuregs_kobj_type = {
 		struct cpuinfo_arm64 *info = kobj_to_cpuinfo(kobj);		\
 										\
 		if (info->reg_midr)						\
-			return sprintf(buf, "0x%016x\n", info->reg_##_field);	\
+			return sprintf(buf, "0x%016llx\n", info->reg_##_field);	\
 		else								\
 			return 0;						\
 	}									\
-- 
GitLab


From 21047e91a5a674b97ebbf2c2c1751f1e9c317f09 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 26 May 2021 20:36:21 +0100
Subject: [PATCH 1647/3804] arm64: Check if GMID_EL1.BS is the same on all CPUs

The GMID_EL1.BS field determines the number of tags accessed by the
LDGM/STGM instructions (EL1 and up), used by the kernel for copying or
zeroing page tags.

Taint the kernel if GMID_EL1.BS differs between CPUs but only of
CONFIG_ARM64_MTE is enabled.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Suzuki K Poulose <Suzuki.Poulose@arm.com>
Link: https://lore.kernel.org/r/20210526193621.21559-3-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpu.h        |  1 +
 arch/arm64/include/asm/cpufeature.h |  7 +++++++
 arch/arm64/kernel/cpufeature.c      | 21 +++++++++++++++++++++
 arch/arm64/kernel/cpuinfo.c         |  3 +++
 4 files changed, 32 insertions(+)

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index fe5a8499ddc29..9088e72c7cf66 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -20,6 +20,7 @@ struct cpuinfo_arm64 {
 	u64		reg_dczid;
 	u64		reg_midr;
 	u64		reg_revidr;
+	u64		reg_gmid;
 
 	u64		reg_id_aa64dfr0;
 	u64		reg_id_aa64dfr1;
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 338840c00e8ed..650de920e0679 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -619,6 +619,13 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
 	return val > 0;
 }
 
+static inline bool id_aa64pfr1_mte(u64 pfr1)
+{
+	u32 val = cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_MTE_SHIFT);
+
+	return val >= ID_AA64PFR1_MTE;
+}
+
 void __init setup_cpu_features(void);
 void check_local_cpu_capabilities(void);
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index efed2830d141f..0645300cc1a8a 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -400,6 +400,11 @@ static const struct arm64_ftr_bits ftr_dczid[] = {
 	ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_gmid[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, SYS_GMID_EL1_BS_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_id_isar0[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0),
@@ -617,6 +622,9 @@ static const struct __ftr_reg_entry {
 	/* Op1 = 0, CRn = 1, CRm = 2 */
 	ARM64_FTR_REG(SYS_ZCR_EL1, ftr_zcr),
 
+	/* Op1 = 1, CRn = 0, CRm = 0 */
+	ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid),
+
 	/* Op1 = 3, CRn = 0, CRm = 0 */
 	{ SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
 	ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
@@ -911,6 +919,9 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 		sve_init_vq_map();
 	}
 
+	if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+		init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid);
+
 	/*
 	 * Initialize the indirect array of CPU hwcaps capabilities pointers
 	 * before we handle the boot CPU below.
@@ -1134,6 +1145,16 @@ void update_cpu_features(int cpu,
 			sve_update_vq_map();
 	}
 
+	/*
+	 * The kernel uses the LDGM/STGM instructions and the number of tags
+	 * they read/write depends on the GMID_EL1.BS field. Check that the
+	 * value is the same on all CPUs.
+	 */
+	if (IS_ENABLED(CONFIG_ARM64_MTE) &&
+	    id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+		taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu,
+					      info->reg_gmid, boot->reg_gmid);
+
 	/*
 	 * This relies on a sanitised view of the AArch64 ID registers
 	 * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last.
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 0e9e965e18d8b..5321b82185912 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -371,6 +371,9 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
 	info->reg_id_aa64zfr0 = read_cpuid(ID_AA64ZFR0_EL1);
 
+	if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+		info->reg_gmid = read_cpuid(GMID_EL1);
+
 	/* Update the 32bit ID registers only if AArch32 is implemented */
 	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
 		info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
-- 
GitLab


From e176e2677cccd458f99c69d16d27f86adcdd02e4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 20 May 2021 12:50:27 +0100
Subject: [PATCH 1648/3804] arm64: assembler: add set_this_cpu_offset

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210520115031.18509-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/assembler.h | 18 +++++++++++++-----
 arch/arm64/mm/proc.S               | 12 ++----------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8418c1bd8f044..f0188903557f0 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -232,15 +232,23 @@ lr	.req	x30		// link register
 	 * @dst: destination register
 	 */
 #if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
-	.macro	this_cpu_offset, dst
+	.macro	get_this_cpu_offset, dst
 	mrs	\dst, tpidr_el2
 	.endm
 #else
-	.macro	this_cpu_offset, dst
+	.macro	get_this_cpu_offset, dst
 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
 	mrs	\dst, tpidr_el1
 alternative_else
 	mrs	\dst, tpidr_el2
+alternative_endif
+	.endm
+
+	.macro	set_this_cpu_offset, src
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+	msr	tpidr_el1, \src
+alternative_else
+	msr	tpidr_el2, \src
 alternative_endif
 	.endm
 #endif
@@ -253,7 +261,7 @@ alternative_endif
 	.macro adr_this_cpu, dst, sym, tmp
 	adrp	\tmp, \sym
 	add	\dst, \tmp, #:lo12:\sym
-	this_cpu_offset \tmp
+	get_this_cpu_offset \tmp
 	add	\dst, \dst, \tmp
 	.endm
 
@@ -264,7 +272,7 @@ alternative_endif
 	 */
 	.macro ldr_this_cpu dst, sym, tmp
 	adr_l	\dst, \sym
-	this_cpu_offset \tmp
+	get_this_cpu_offset \tmp
 	ldr	\dst, [\dst, \tmp]
 	.endm
 
@@ -745,7 +753,7 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 	cbz		\tmp, \lbl
 #endif
 	adr_l		\tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING
-	this_cpu_offset	\tmp2
+	get_this_cpu_offset	\tmp2
 	ldr		w\tmp, [\tmp, \tmp2]
 	cbnz		w\tmp, \lbl	// yield on pending softirq in task context
 .Lnoyield_\@:
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 97d7bcd8d4f26..bc555cd5e6b1e 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -83,11 +83,7 @@ SYM_FUNC_START(cpu_do_suspend)
 	mrs	x9, mdscr_el1
 	mrs	x10, oslsr_el1
 	mrs	x11, sctlr_el1
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	mrs	x12, tpidr_el1
-alternative_else
-	mrs	x12, tpidr_el2
-alternative_endif
+	get_this_cpu_offset x12
 	mrs	x13, sp_el0
 	stp	x2, x3, [x0]
 	stp	x4, x5, [x0, #16]
@@ -145,11 +141,7 @@ SYM_FUNC_START(cpu_do_resume)
 	msr	mdscr_el1, x10
 
 	msr	sctlr_el1, x12
-alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
-	msr	tpidr_el1, x13
-alternative_else
-	msr	tpidr_el2, x13
-alternative_endif
+	set_this_cpu_offset x13
 	msr	sp_el0, x14
 	/*
 	 * Restore oslsr_el1 by writing oslar_el1
-- 
GitLab


From 98c7a1666ee94af59a65f2787a887a05a546d163 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 20 May 2021 12:50:28 +0100
Subject: [PATCH 1649/3804] arm64: smp: remove pointless secondary_data
 maintenance

All reads and writes of secondary_data occur with the MMU on, using
coherent attributes, so there's no need to perform any cache maintenance
for this.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210520115031.18509-4-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/smp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dcd7041b2b077..92e83e8bac948 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -122,7 +122,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	secondary_data.task = idle;
 	secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
 	update_cpu_boot_status(CPU_MMU_OFF);
-	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
 
 	/* Now bring the CPU into our world */
 	ret = boot_secondary(cpu, idle);
@@ -143,7 +142,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	pr_crit("CPU%u: failed to come online\n", cpu);
 	secondary_data.task = NULL;
 	secondary_data.stack = NULL;
-	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
 	status = READ_ONCE(secondary_data.status);
 	if (status == CPU_MMU_OFF)
 		status = READ_ONCE(__early_cpu_boot_status);
-- 
GitLab


From 3305e7f74a14cdb19e61af4febb098ad62820d71 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 20 May 2021 12:50:29 +0100
Subject: [PATCH 1650/3804] arm64: smp: remove stack from secondary_data

When we boot a secondary CPU, we pass it a task and a stack to use. As
the stack is always the task's stack, which can be derived from the
task, let's have the secondary CPU derive this itself and avoid passing
redundant information.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210520115031.18509-5-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/smp.h    | 2 --
 arch/arm64/kernel/asm-offsets.c | 1 -
 arch/arm64/kernel/head.S        | 7 ++++---
 arch/arm64/kernel/smp.c         | 2 --
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 0e357757c0cca..fc55f5a57a06e 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -73,12 +73,10 @@ asmlinkage void secondary_start_kernel(void);
 
 /*
  * Initial data for bringing up a secondary CPU.
- * @stack  - sp for the secondary CPU
  * @status - Result passed back from the secondary CPU to
  *           indicate failure.
  */
 struct secondary_data {
-	void *stack;
 	struct task_struct *task;
 	long status;
 };
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0cb34ccb6e733..4a5e204c33af7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -99,7 +99,6 @@ int main(void)
   DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT);
   DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending));
   BLANK();
-  DEFINE(CPU_BOOT_STACK,	offsetof(struct secondary_data, stack));
   DEFINE(CPU_BOOT_TASK,		offsetof(struct secondary_data, task));
   BLANK();
   DEFINE(FTR_OVR_VAL_OFFSET,	offsetof(struct arm64_ftr_override, val));
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index cc2d45d54838f..9be95e11367d6 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -645,11 +645,12 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	isb
 
 	adr_l	x0, secondary_data
-	ldr	x1, [x0, #CPU_BOOT_STACK]	// get secondary_data.stack
-	cbz	x1, __secondary_too_slow
-	mov	sp, x1
 	ldr	x2, [x0, #CPU_BOOT_TASK]
 	cbz	x2, __secondary_too_slow
+
+	ldr	x1, [x2, #TSK_STACK]
+	add	sp, x1, #THREAD_SIZE
+
 	msr	sp_el0, x2
 	scs_load x2, x3
 	setup_final_frame
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 92e83e8bac948..73625cc39574b 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -120,7 +120,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	 * page tables.
 	 */
 	secondary_data.task = idle;
-	secondary_data.stack = task_stack_page(idle) + THREAD_SIZE;
 	update_cpu_boot_status(CPU_MMU_OFF);
 
 	/* Now bring the CPU into our world */
@@ -141,7 +140,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 
 	pr_crit("CPU%u: failed to come online\n", cpu);
 	secondary_data.task = NULL;
-	secondary_data.stack = NULL;
 	status = READ_ONCE(secondary_data.status);
 	if (status == CPU_MMU_OFF)
 		status = READ_ONCE(__early_cpu_boot_status);
-- 
GitLab


From 8e334d729bc4787f728e9e5abc91649f131124ff Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 20 May 2021 12:50:30 +0100
Subject: [PATCH 1651/3804] arm64: smp: unify task and sp setup

Once we enable the MMU, we have to initialize:

* SP_EL0 to point at the active task
* SP to point at the active task's stack
* SCS_SP to point at the active task's shadow stack

For all tasks (including init_task), this information can be derived
from the task's task_struct.

Let's unify __primary_switched and __secondary_switched to consistently
acquire this information from the relevant task_struct. At the same
time, let's fold this together with initializing a task's final frame.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210520115031.18509-6-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/head.S | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 9be95e11367d6..e83b2899dce5b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -395,15 +395,24 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 SYM_FUNC_END(__create_page_tables)
 
 	/*
+	 * Initialize CPU registers with task-specific and cpu-specific context.
+	 *
 	 * Create a final frame record at task_pt_regs(current)->stackframe, so
 	 * that the unwinder can identify the final frame record of any task by
 	 * its location in the task stack. We reserve the entire pt_regs space
 	 * for consistency with user tasks and kthreads.
 	 */
-	.macro setup_final_frame
+	.macro	init_cpu_task tsk, tmp
+	msr	sp_el0, \tsk
+
+	ldr	\tmp, [\tsk, #TSK_STACK]
+	add	sp, \tmp, #THREAD_SIZE
 	sub	sp, sp, #PT_REGS_SIZE
+
 	stp	xzr, xzr, [sp, #S_STACKFRAME]
 	add	x29, sp, #S_STACKFRAME
+
+	scs_load \tsk, \tmp
 	.endm
 
 /*
@@ -412,22 +421,16 @@ SYM_FUNC_END(__create_page_tables)
  *   x0 = __PHYS_OFFSET
  */
 SYM_FUNC_START_LOCAL(__primary_switched)
-	adrp	x4, init_thread_union
-	add	sp, x4, #THREAD_SIZE
-	adr_l	x5, init_task
-	msr	sp_el0, x5			// Save thread_info
+	adr_l	x4, init_task
+	init_cpu_task x4, x5
 
 	adr_l	x8, vectors			// load VBAR_EL1 with virtual
 	msr	vbar_el1, x8			// vector table address
 	isb
 
-	stp	xzr, x30, [sp, #-16]!
+	stp	x29, x30, [sp, #-16]!
 	mov	x29, sp
 
-#ifdef CONFIG_SHADOW_CALL_STACK
-	adr_l	scs_sp, init_shadow_call_stack	// Set shadow call stack
-#endif
-
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
 
 	ldr_l	x4, kimage_vaddr		// Save the offset between
@@ -459,8 +462,7 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 0:
 #endif
 	bl	switch_to_vhe			// Prefer VHE if possible
-	add	sp, sp, #16
-	setup_final_frame
+	ldp	x29, x30, [sp], #16
 	bl	start_kernel
 	ASM_BUG()
 SYM_FUNC_END(__primary_switched)
@@ -648,12 +650,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	ldr	x2, [x0, #CPU_BOOT_TASK]
 	cbz	x2, __secondary_too_slow
 
-	ldr	x1, [x2, #TSK_STACK]
-	add	sp, x1, #THREAD_SIZE
-
-	msr	sp_el0, x2
-	scs_load x2, x3
-	setup_final_frame
+	init_cpu_task x2, x1
 
 #ifdef CONFIG_ARM64_PTR_AUTH
 	ptrauth_keys_init_cpu x2, x3, x4, x5
-- 
GitLab


From 3d8c1a013d78f32ee266097496cbd89b734b5fcb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 20 May 2021 12:50:31 +0100
Subject: [PATCH 1652/3804] arm64: smp: initialize cpu offset earlier

Now that we have a consistent place to initialize CPU context registers
early in the boot path, let's also initialize the per-cpu offset here.
This makes the primary and secondary boot paths more consistent, and
allows for the use of per-cpu operations earlier, which will be
necessary for instrumentation with KCSAN.

Note that smp_prepare_boot_cpu() still needs to re-initialize CPU0's
offset as immediately prior to this the per-cpu areas may be
reallocated, and hence the boot-time offset may be stale. A comment is
added to make this clear.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210520115031.18509-7-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/asm-offsets.c |  1 +
 arch/arm64/kernel/head.S        | 17 +++++++++++------
 arch/arm64/kernel/setup.c       |  6 ------
 arch/arm64/kernel/smp.c         | 10 ++++++----
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 4a5e204c33af7..bd0fc23d8719c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -27,6 +27,7 @@
 int main(void)
 {
   DEFINE(TSK_ACTIVE_MM,		offsetof(struct task_struct, active_mm));
+  DEFINE(TSK_CPU,		offsetof(struct task_struct, cpu));
   BLANK();
   DEFINE(TSK_TI_FLAGS,		offsetof(struct task_struct, thread_info.flags));
   DEFINE(TSK_TI_PREEMPT,	offsetof(struct task_struct, thread_info.preempt_count));
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index e83b2899dce5b..070ed53c049d4 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -402,17 +402,22 @@ SYM_FUNC_END(__create_page_tables)
 	 * its location in the task stack. We reserve the entire pt_regs space
 	 * for consistency with user tasks and kthreads.
 	 */
-	.macro	init_cpu_task tsk, tmp
+	.macro	init_cpu_task tsk, tmp1, tmp2
 	msr	sp_el0, \tsk
 
-	ldr	\tmp, [\tsk, #TSK_STACK]
-	add	sp, \tmp, #THREAD_SIZE
+	ldr	\tmp1, [\tsk, #TSK_STACK]
+	add	sp, \tmp1, #THREAD_SIZE
 	sub	sp, sp, #PT_REGS_SIZE
 
 	stp	xzr, xzr, [sp, #S_STACKFRAME]
 	add	x29, sp, #S_STACKFRAME
 
-	scs_load \tsk, \tmp
+	scs_load \tsk, \tmp1
+
+	adr_l	\tmp1, __per_cpu_offset
+	ldr	w\tmp2, [\tsk, #TSK_CPU]
+	ldr	\tmp1, [\tmp1, \tmp2, lsl #3]
+	set_this_cpu_offset \tmp1
 	.endm
 
 /*
@@ -422,7 +427,7 @@ SYM_FUNC_END(__create_page_tables)
  */
 SYM_FUNC_START_LOCAL(__primary_switched)
 	adr_l	x4, init_task
-	init_cpu_task x4, x5
+	init_cpu_task x4, x5, x6
 
 	adr_l	x8, vectors			// load VBAR_EL1 with virtual
 	msr	vbar_el1, x8			// vector table address
@@ -650,7 +655,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	ldr	x2, [x0, #CPU_BOOT_TASK]
 	cbz	x2, __secondary_too_slow
 
-	init_cpu_task x2, x1
+	init_cpu_task x2, x1, x3
 
 #ifdef CONFIG_ARM64_PTR_AUTH
 	ptrauth_keys_init_cpu x2, x3, x4, x5
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 61845c0821d9d..b7a35a03e9b90 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -87,12 +87,6 @@ void __init smp_setup_processor_id(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	set_cpu_logical_map(0, mpidr);
 
-	/*
-	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
-	 * using percpu variable early, for example, lockdep will
-	 * access percpu variable inside lock_release
-	 */
-	set_my_cpu_offset(0);
 	pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
 		(unsigned long)mpidr, read_cpuid_id());
 }
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 73625cc39574b..2fe8fab886e2b 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -198,10 +198,7 @@ asmlinkage notrace void secondary_start_kernel(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	struct mm_struct *mm = &init_mm;
 	const struct cpu_operations *ops;
-	unsigned int cpu;
-
-	cpu = task_cpu(current);
-	set_my_cpu_offset(per_cpu_offset(cpu));
+	unsigned int cpu = smp_processor_id();
 
 	/*
 	 * All kernel threads share the same mm context; grab a
@@ -448,6 +445,11 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
+	/*
+	 * The runtime per-cpu areas have been allocated by
+	 * setup_per_cpu_areas(), and CPU0's boot time per-cpu area will be
+	 * freed shortly, so we must move over to the runtime per-cpu area.
+	 */
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
 	cpuinfo_store_boot_cpu();
 
-- 
GitLab


From 1cbdf60bd1b74e397d48aa877367cfc621f45ffe Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Wed, 26 May 2021 10:49:27 -0700
Subject: [PATCH 1653/3804] kasan: arm64: support specialized outlined tag
 mismatch checks

By using outlined checks we can achieve a significant code size
improvement by moving the tag-based ASAN checks into separate
functions. Unlike the existing CONFIG_KASAN_OUTLINE mode these
functions have a custom calling convention that preserves most
registers and is specialized to the register containing the address
and the type of access, and as a result we can eliminate the code
size and performance overhead of a standard calling convention such
as AAPCS for these functions.

This change depends on a separate series of changes to Clang [1] to
support outlined checks in the kernel, although the change works fine
without them (we just don't get outlined checks). This is because the
flag -mllvm -hwasan-inline-all-checks=0 has no effect until the Clang
changes land. The flag was introduced in the Clang 9.0 timeframe as
part of the support for outlined checks in userspace and because our
minimum Clang version is 10.0 we can pass it unconditionally.

Outlined checks require a new runtime function with a custom calling
convention. Add this function to arch/arm64/lib.

I measured the code size of defconfig + tag-based KASAN, as well
as boot time (i.e. time to init launch) on a DragonBoard 845c with
an Android arm64 GKI kernel. The results are below:

                               code size    boot time
CONFIG_KASAN_INLINE=y before    92824064      6.18s
CONFIG_KASAN_INLINE=y after     38822400      6.65s
CONFIG_KASAN_OUTLINE=y          39215616     11.48s

We can see straight away that specialized outlined checks beat the
existing CONFIG_KASAN_OUTLINE=y on both code size and boot time
for tag-based ASAN.

As for the comparison between CONFIG_KASAN_INLINE=y before and after
we saw similar performance numbers in userspace [2] and decided
that since the performance overhead is minimal compared to the
overhead of tag-based ASAN itself as well as compared to the code
size improvements we would just replace the inlined checks with the
specialized outlined checks without the option to select between them,
and that is what I have implemented in this patch.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Acked-by: Andrey Konovalov <andreyknvl@gmail.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Link: https://linux-review.googlesource.com/id/I1a30036c70ab3c3ee78d75ed9b87ef7cdc3fdb76
Link: [1] https://reviews.llvm.org/D90426
Link: [2] https://reviews.llvm.org/D56954
Link: https://lore.kernel.org/r/20210526174927.2477847-3-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/asm-prototypes.h |  6 ++
 arch/arm64/include/asm/module.lds.h     | 17 +++++-
 arch/arm64/lib/Makefile                 |  2 +
 arch/arm64/lib/kasan_sw_tags.S          | 76 +++++++++++++++++++++++++
 mm/kasan/sw_tags.c                      |  7 +++
 scripts/Makefile.kasan                  |  1 +
 6 files changed, 107 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/lib/kasan_sw_tags.S

diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h
index 1c9a3a0c5fa5f..ec1d9655f8850 100644
--- a/arch/arm64/include/asm/asm-prototypes.h
+++ b/arch/arm64/include/asm/asm-prototypes.h
@@ -23,4 +23,10 @@ long long __ashlti3(long long a, int b);
 long long __ashrti3(long long a, int b);
 long long __lshrti3(long long a, int b);
 
+/*
+ * This function uses a custom calling convention and cannot be called from C so
+ * this prototype is not entirely accurate.
+ */
+void __hwasan_tag_mismatch(unsigned long addr, unsigned long access_info);
+
 #endif /* __ASM_PROTOTYPES_H */
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index 810045628c66e..a11ccadd47d29 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,7 +1,20 @@
-#ifdef CONFIG_ARM64_MODULE_PLTS
 SECTIONS {
+#ifdef CONFIG_ARM64_MODULE_PLTS
 	.plt 0 (NOLOAD) : { BYTE(0) }
 	.init.plt 0 (NOLOAD) : { BYTE(0) }
 	.text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
-}
 #endif
+
+#ifdef CONFIG_KASAN_SW_TAGS
+	/*
+	 * Outlined checks go into comdat-deduplicated sections named .text.hot.
+	 * Because they are in comdats they are not combined by the linker and
+	 * we otherwise end up with multiple sections with the same .text.hot
+	 * name in the .ko file. The kernel module loader warns if it sees
+	 * multiple sections with the same name so we use this sections
+	 * directive to force them into a single section and silence the
+	 * warning.
+	 */
+	.text.hot : { *(.text.hot) }
+#endif
+}
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index d31e1169d9b8e..8e60d76a1b473 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -18,3 +18,5 @@ obj-$(CONFIG_CRC32) += crc32.o
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 
 obj-$(CONFIG_ARM64_MTE) += mte.o
+
+obj-$(CONFIG_KASAN_SW_TAGS) += kasan_sw_tags.o
diff --git a/arch/arm64/lib/kasan_sw_tags.S b/arch/arm64/lib/kasan_sw_tags.S
new file mode 100644
index 0000000000000..5b04464c045eb
--- /dev/null
+++ b/arch/arm64/lib/kasan_sw_tags.S
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Google LLC
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Report a tag mismatch detected by tag-based KASAN.
+ *
+ * A compiler-generated thunk calls this with a non-AAPCS calling
+ * convention. Upon entry to this function, registers are as follows:
+ *
+ * x0:         fault address (see below for restore)
+ * x1:         fault description (see below for restore)
+ * x2 to x15:  callee-saved
+ * x16 to x17: safe to clobber
+ * x18 to x30: callee-saved
+ * sp:         pre-decremented by 256 bytes (see below for restore)
+ *
+ * The caller has decremented the SP by 256 bytes, and created a
+ * structure on the stack as follows:
+ *
+ * sp + 0..15:    x0 and x1 to be restored
+ * sp + 16..231:  free for use
+ * sp + 232..247: x29 and x30 (same as in GPRs)
+ * sp + 248..255: free for use
+ *
+ * Note that this is not a struct pt_regs.
+ *
+ * To call a regular AAPCS function we must save x2 to x15 (which we can
+ * store in the gaps), and create a frame record (for which we can use
+ * x29 and x30 spilled by the caller as those match the GPRs).
+ *
+ * The caller expects x0 and x1 to be restored from the structure, and
+ * for the structure to be removed from the stack (i.e. the SP must be
+ * incremented by 256 prior to return).
+ */
+SYM_CODE_START(__hwasan_tag_mismatch)
+#ifdef BTI_C
+	BTI_C
+#endif
+	add	x29, sp, #232
+	stp	x2, x3, [sp, #8 * 2]
+	stp	x4, x5, [sp, #8 * 4]
+	stp	x6, x7, [sp, #8 * 6]
+	stp	x8, x9, [sp, #8 * 8]
+	stp	x10, x11, [sp, #8 * 10]
+	stp	x12, x13, [sp, #8 * 12]
+	stp	x14, x15, [sp, #8 * 14]
+#ifndef CONFIG_SHADOW_CALL_STACK
+	str	x18, [sp, #8 * 18]
+#endif
+
+	mov	x2, x30
+	bl	kasan_tag_mismatch
+
+	ldp	x0, x1, [sp]
+	ldp	x2, x3, [sp, #8 * 2]
+	ldp	x4, x5, [sp, #8 * 4]
+	ldp	x6, x7, [sp, #8 * 6]
+	ldp	x8, x9, [sp, #8 * 8]
+	ldp	x10, x11, [sp, #8 * 10]
+	ldp	x12, x13, [sp, #8 * 12]
+	ldp	x14, x15, [sp, #8 * 14]
+#ifndef CONFIG_SHADOW_CALL_STACK
+	ldr	x18, [sp, #8 * 18]
+#endif
+	ldp	x29, x30, [sp, #8 * 29]
+
+	/* remove the structure from the stack */
+	add	sp, sp, #256
+	ret
+SYM_CODE_END(__hwasan_tag_mismatch)
+EXPORT_SYMBOL(__hwasan_tag_mismatch)
diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c
index 9df8e7f69e870..9362938abbfa5 100644
--- a/mm/kasan/sw_tags.c
+++ b/mm/kasan/sw_tags.c
@@ -207,3 +207,10 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
 
 	return &alloc_meta->free_track[i];
 }
+
+void kasan_tag_mismatch(unsigned long addr, unsigned long access_info,
+			unsigned long ret_ip)
+{
+	kasan_report(addr, 1 << (access_info & 0xf), access_info & 0x10,
+		     ret_ip);
+}
diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan
index 3d791908ed364..801c415bac59d 100644
--- a/scripts/Makefile.kasan
+++ b/scripts/Makefile.kasan
@@ -50,6 +50,7 @@ endif
 CFLAGS_KASAN := -fsanitize=kernel-hwaddress \
 		$(call cc-param,hwasan-instrument-stack=$(stack_enable)) \
 		$(call cc-param,hwasan-use-short-granules=0) \
+		$(call cc-param,hwasan-inline-all-checks=0) \
 		$(instrumentation_flags)
 
 endif # CONFIG_KASAN_SW_TAGS
-- 
GitLab


From a9aecef198faae3240921b707bc09b602e966fce Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell@cadence.com>
Date: Wed, 26 May 2021 08:05:27 +0200
Subject: [PATCH 1654/3804] usb: cdnsp: Fix deadlock issue in
 cdnsp_thread_irq_handler

Patch fixes the following critical issue caused by deadlock which has been
detected during testing NCM class:

smp: csd: Detected non-responsive CSD lock (#1) on CPU#0
smp:     csd: CSD lock (#1) unresponsive.
....
RIP: 0010:native_queued_spin_lock_slowpath+0x61/0x1d0
RSP: 0018:ffffbc494011cde0 EFLAGS: 00000002
RAX: 0000000000000101 RBX: ffff9ee8116b4a68 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9ee8116b4658
RBP: ffffbc494011cde0 R08: 0000000000000001 R09: 0000000000000000
R10: ffff9ee8116b4670 R11: 0000000000000000 R12: ffff9ee8116b4658
R13: ffff9ee8116b4670 R14: 0000000000000246 R15: ffff9ee8116b4658
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f7bcc41a830 CR3: 000000007a612003 CR4: 00000000001706e0
Call Trace:
 <IRQ>
 do_raw_spin_lock+0xc0/0xd0
 _raw_spin_lock_irqsave+0x95/0xa0
 cdnsp_gadget_ep_queue.cold+0x88/0x107 [cdnsp_udc_pci]
 usb_ep_queue+0x35/0x110
 eth_start_xmit+0x220/0x3d0 [u_ether]
 ncm_tx_timeout+0x34/0x40 [usb_f_ncm]
 ? ncm_free_inst+0x50/0x50 [usb_f_ncm]
 __hrtimer_run_queues+0xac/0x440
 hrtimer_run_softirq+0x8c/0xb0
 __do_softirq+0xcf/0x428
 asm_call_irq_on_stack+0x12/0x20
 </IRQ>
 do_softirq_own_stack+0x61/0x70
 irq_exit_rcu+0xc1/0xd0
 sysvec_apic_timer_interrupt+0x52/0xb0
 asm_sysvec_apic_timer_interrupt+0x12/0x20
RIP: 0010:do_raw_spin_trylock+0x18/0x40
RSP: 0018:ffffbc494138bda8 EFLAGS: 00000246
RAX: 0000000000000000 RBX: ffff9ee8116b4658 RCX: 0000000000000000
RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff9ee8116b4658
RBP: ffffbc494138bda8 R08: 0000000000000001 R09: 0000000000000000
R10: ffff9ee8116b4670 R11: 0000000000000000 R12: ffff9ee8116b4658
R13: ffff9ee8116b4670 R14: ffff9ee7b5c73d80 R15: ffff9ee8116b4000
 _raw_spin_lock+0x3d/0x70
 ? cdnsp_thread_irq_handler.cold+0x32/0x112c [cdnsp_udc_pci]
 cdnsp_thread_irq_handler.cold+0x32/0x112c [cdnsp_udc_pci]
 ? cdnsp_remove_request+0x1f0/0x1f0 [cdnsp_udc_pci]
 ? cdnsp_thread_irq_handler+0x5/0xa0 [cdnsp_udc_pci]
 ? irq_thread+0xa0/0x1c0
 irq_thread_fn+0x28/0x60
 irq_thread+0x105/0x1c0
 ? __kthread_parkme+0x42/0x90
 ? irq_forced_thread_fn+0x90/0x90
 ? wake_threads_waitq+0x30/0x30
 ? irq_thread_check_affinity+0xe0/0xe0
 kthread+0x12a/0x160
 ? kthread_park+0x90/0x90
 ret_from_fork+0x22/0x30

The root cause of issue is spin_lock/spin_unlock instruction instead
spin_lock_irqsave/spin_lock_irqrestore in cdnsp_thread_irq_handler
function.

Cc: stable@vger.kernel.org
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
Signed-off-by: Pawel Laszczak <pawell@cadence.com>

Link: https://lore.kernel.org/r/20210526060527.7197-1-pawell@gli-login.cadence.com
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/cdns3/cdnsp-ring.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c
index 5f0513c96c04e..68972746e3636 100644
--- a/drivers/usb/cdns3/cdnsp-ring.c
+++ b/drivers/usb/cdns3/cdnsp-ring.c
@@ -1517,13 +1517,14 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data)
 {
 	struct cdnsp_device *pdev = (struct cdnsp_device *)data;
 	union cdnsp_trb *event_ring_deq;
+	unsigned long flags;
 	int counter = 0;
 
-	spin_lock(&pdev->lock);
+	spin_lock_irqsave(&pdev->lock, flags);
 
 	if (pdev->cdnsp_state & (CDNSP_STATE_HALTED | CDNSP_STATE_DYING)) {
 		cdnsp_died(pdev);
-		spin_unlock(&pdev->lock);
+		spin_unlock_irqrestore(&pdev->lock, flags);
 		return IRQ_HANDLED;
 	}
 
@@ -1539,7 +1540,7 @@ irqreturn_t cdnsp_thread_irq_handler(int irq, void *data)
 
 	cdnsp_update_erst_dequeue(pdev, event_ring_deq, 1);
 
-	spin_unlock(&pdev->lock);
+	spin_unlock_irqrestore(&pdev->lock, flags);
 
 	return IRQ_HANDLED;
 }
-- 
GitLab


From 0e68c4b11f1e66d211ad242007e9f1076a6b7709 Mon Sep 17 00:00:00 2001
From: Jeremy Szu <jeremy.szu@canonical.com>
Date: Thu, 20 May 2021 01:03:53 +0800
Subject: [PATCH 1655/3804] ALSA: hda/realtek: fix mute/micmute LEDs for HP 855
 G8

The HP EliteBook 855 G8 Notebook PC is using ALC285 codec which needs
ALC285_FIXUP_HP_MUTE_LED fixup to make it works. After applying the
fixup, the mute/micmute LEDs work good.

Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210519170357.58410-1-jeremy.szu@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 90bf0d3a830a3..7f743382d3951 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8328,6 +8328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
 	SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
-- 
GitLab


From bbe183e07817a46cf8d3d7fc88093df81d23a957 Mon Sep 17 00:00:00 2001
From: Jeremy Szu <jeremy.szu@canonical.com>
Date: Thu, 20 May 2021 01:03:54 +0800
Subject: [PATCH 1656/3804] ALSA: hda/realtek: fix mute/micmute LEDs and
 speaker for HP Zbook G8

The HP ZBook Studio 15.6 Inch G8 is using ALC285 codec which is
using 0x04 to control mute LED and 0x01 to control micmute LED.
In the other hand, there is no output from right channel of speaker.
Therefore, add a quirk to make it works.

Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210519170357.58410-2-jeremy.szu@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 7f743382d3951..f33537099ae27 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8328,6 +8328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
 	SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
-- 
GitLab


From e650c1a959da49f2b873cb56564b825882c22e7a Mon Sep 17 00:00:00 2001
From: Jeremy Szu <jeremy.szu@canonical.com>
Date: Thu, 20 May 2021 01:03:55 +0800
Subject: [PATCH 1657/3804] ALSA: hda/realtek: fix mute/micmute LEDs and
 speaker for HP Zbook Fury 15 G8

The HP ZBook Fury 15.6 Inch G8 is using ALC285 codec which is
using 0x04 to control mute LED and 0x01 to control micmute LED.
In the other hand, there is no output from right channel of speaker.
Therefore, add a quirk to make it works.

Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210519170357.58410-3-jeremy.szu@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index f33537099ae27..784fdeb8dfeae 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8328,6 +8328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
 	SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
-- 
GitLab


From 50dbfae972cbe0e3c631e73c7c58cbc48bfc6a49 Mon Sep 17 00:00:00 2001
From: Jeremy Szu <jeremy.szu@canonical.com>
Date: Thu, 20 May 2021 01:03:56 +0800
Subject: [PATCH 1658/3804] ALSA: hda/realtek: fix mute/micmute LEDs and
 speaker for HP Zbook Fury 17 G8

The HP ZBook Studio 17.3 Inch G8 is using ALC285 codec which is
using 0x04 to control mute LED and 0x01 to control micmute LED.
In the other hand, there is no output from right channel of speaker.
Therefore, add a quirk to make it works.

Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210519170357.58410-4-jeremy.szu@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 784fdeb8dfeae..61a60c420f6fd 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8328,6 +8328,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
 	SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
-- 
GitLab


From 32961aecf9da85c9e4c98d91ab8337424e0c8372 Mon Sep 17 00:00:00 2001
From: Haocheng Xie <xiehaocheng.cn@gmail.com>
Date: Thu, 27 May 2021 11:19:45 +0800
Subject: [PATCH 1659/3804] perf/core: Make local function
 perf_pmu_snapshot_aux() static

Fixes the following W=1 kernel build warning:

  kernel/events/core.c:6670:6: warning: no previous prototype for 'perf_pmu_snapshot_aux' [-Wmissing-prototypes]

Signed-off-by: Haocheng Xie <xiehaocheng.cn@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210527031947.1801-2-xiehaocheng.cn@gmail.com
---
 kernel/events/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2e947a4858983..4c6b3205051a5 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6674,10 +6674,10 @@ out:
 	return data->aux_size;
 }
 
-long perf_pmu_snapshot_aux(struct perf_buffer *rb,
-			   struct perf_event *event,
-			   struct perf_output_handle *handle,
-			   unsigned long size)
+static long perf_pmu_snapshot_aux(struct perf_buffer *rb,
+                                 struct perf_event *event,
+                                 struct perf_output_handle *handle,
+                                 unsigned long size)
 {
 	unsigned long flags;
 	long ret;
-- 
GitLab


From a1ddf5249f2c50f2e6e5efe604f01a01d5c23ef5 Mon Sep 17 00:00:00 2001
From: Haocheng Xie <xiehaocheng.cn@gmail.com>
Date: Thu, 27 May 2021 11:19:46 +0800
Subject: [PATCH 1660/3804] perf/core: Fix DocBook warnings

Fix the following W=1 kernel build warning(s):

  kernel/events/core.c:143: warning: Function parameter or member 'cpu' not described in 'cpu_function_call'
  kernel/events/core.c:11924: warning: Function parameter or member 'flags' not described in 'sys_perf_event_open'
  kernel/events/core.c:12382: warning: Function parameter or member 'overflow_handler' not described in 'perf_event_create_kernel_counter'
  kernel/events/core.c:12382: warning: Function parameter or member 'context' not described in 'perf_event_create_kernel_counter'

Signed-off-by: Haocheng Xie <xiehaocheng.cn@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210527031947.1801-3-xiehaocheng.cn@gmail.com
---
 kernel/events/core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4c6b3205051a5..6c964dee2cd7e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -132,6 +132,7 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info)
 
 /**
  * cpu_function_call - call a function on the cpu
+ * @cpu:	target cpu to queue this function
  * @func:	the function to be called
  * @info:	the function call argument
  *
@@ -11924,6 +11925,7 @@ again:
  * @pid:		target pid
  * @cpu:		target cpu
  * @group_fd:		group leader event fd
+ * @flags:		perf event open flags
  */
 SYSCALL_DEFINE5(perf_event_open,
 		struct perf_event_attr __user *, attr_uptr,
@@ -12380,6 +12382,8 @@ err_fd:
  * @attr: attributes of the counter to create
  * @cpu: cpu in which the counter is bound
  * @task: task to profile (NULL for percpu)
+ * @overflow_handler: callback to trigger when we hit the event
+ * @context: context data could be used in overflow_handler callback
  */
 struct perf_event *
 perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
-- 
GitLab


From 875dd7bf548104bc1d2c5784a6af6cf38215a216 Mon Sep 17 00:00:00 2001
From: Haocheng Xie <xiehaocheng.cn@gmail.com>
Date: Thu, 27 May 2021 11:19:47 +0800
Subject: [PATCH 1661/3804] perf/hw_breakpoint: Fix DocBook warnings in perf
 hw_breakpoint

Fix the following W=1 kernel build warning(s):

  kernel/events/hw_breakpoint.c:461: warning: Function parameter or member 'context' not described in 'register_user_hw_breakpoint'
  kernel/events/hw_breakpoint.c:560: warning: Function parameter or member 'context' not described in 'register_wide_hw_breakpoint'

Signed-off-by: Haocheng Xie <xiehaocheng.cn@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210527031947.1801-4-xiehaocheng.cn@gmail.com
---
 kernel/events/hw_breakpoint.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index b48d7039a015d..835973444a1e7 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -451,6 +451,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
  * register_user_hw_breakpoint - register a hardware breakpoint for user space
  * @attr: breakpoint attributes
  * @triggered: callback to trigger when we hit the breakpoint
+ * @context: context data could be used in the triggered callback
  * @tsk: pointer to 'task_struct' of the process to which the address belongs
  */
 struct perf_event *
@@ -550,6 +551,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
  * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
  * @attr: breakpoint attributes
  * @triggered: callback to trigger when we hit the breakpoint
+ * @context: context data could be used in the triggered callback
  *
  * @return a set of per_cpu pointers to perf events
  */
-- 
GitLab


From dbec64b11c65d74f31427e2b9d5746fbf17bf840 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 25 May 2021 17:55:39 +0100
Subject: [PATCH 1662/3804] gpio: wcd934x: Fix shift-out-of-bounds error

bit-mask for pins 0 to 4 is BIT(0) to BIT(4) however we ended up with BIT(n - 1)
which is not right, and this was caught by below usban check

UBSAN: shift-out-of-bounds in drivers/gpio/gpio-wcd934x.c:34:14

Fixes: 59c324683400 ("gpio: wcd934x: Add support to wcd934x gpio controller")
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-wcd934x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-wcd934x.c b/drivers/gpio/gpio-wcd934x.c
index 1cbce59908558..97e6caedf1f33 100644
--- a/drivers/gpio/gpio-wcd934x.c
+++ b/drivers/gpio/gpio-wcd934x.c
@@ -7,7 +7,7 @@
 #include <linux/slab.h>
 #include <linux/of_device.h>
 
-#define WCD_PIN_MASK(p) BIT(p - 1)
+#define WCD_PIN_MASK(p) BIT(p)
 #define WCD_REG_DIR_CTL_OFFSET 0x42
 #define WCD_REG_VAL_CTL_OFFSET 0x43
 #define WCD934X_NPINS		5
-- 
GitLab


From c0e0436cb4f6627146acdae8c77828f18db01151 Mon Sep 17 00:00:00 2001
From: Til Jasper Ullrich <tju@tju.me>
Date: Tue, 25 May 2021 17:09:52 +0200
Subject: [PATCH 1663/3804] platform/x86: thinkpad_acpi: Add X1 Carbon Gen 9
 second fan support

The X1 Carbon Gen 9 uses two fans instead of one like the previous
generation. This adds support for the second fan. It has been tested
on my X1 Carbon Gen 9 (20XXS00100) and works fine.

Signed-off-by: Til Jasper Ullrich <tju@tju.me>
Link: https://lore.kernel.org/r/20210525150950.14805-1-tju@tju.me
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index dd60c9397d352..edd71e744d275 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -8853,6 +8853,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = {
 	TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (2nd gen) */
 	TPACPI_Q_LNV3('N', '2', 'V', TPACPI_FAN_2CTL),	/* P1 / X1 Extreme (3nd gen) */
 	TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL),	/* P15 (1st gen) / P15v (1st gen) */
+	TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL),	/* X1 Carbon (9th gen) */
 };
 
 static int __init fan_init(struct ibm_init_struct *iibm)
-- 
GitLab


From e3e880bb1518eb10a4b4bb4344ed614d6856f190 Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Wed, 26 May 2021 22:18:31 +0800
Subject: [PATCH 1664/3804] KVM: arm64: Resolve all pending PC updates before
 immediate exit

Commit 26778aaa134a ("KVM: arm64: Commit pending PC adjustemnts before
returning to userspace") fixed the PC updating issue by forcing an explicit
synchronisation of the exception state on vcpu exit to userspace.

However, we forgot to take into account the case where immediate_exit is
set by userspace and KVM_RUN will exit immediately. Fix it by resolving all
pending PC updates before returning to userspace.

Since __kvm_adjust_pc() relies on a loaded vcpu context, I moved the
immediate_exit checking right after vcpu_load(). We will get some overhead
if immediate_exit is true (which should hopefully be rare).

Fixes: 26778aaa134a ("KVM: arm64: Commit pending PC adjustemnts before returning to userspace")
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210526141831.1662-1-yuzenghui@huawei.com
Cc: stable@vger.kernel.org # 5.11
---
 arch/arm64/kvm/arm.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 1126eae274000..e720148232a06 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -720,11 +720,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 			return ret;
 	}
 
-	if (run->immediate_exit)
-		return -EINTR;
-
 	vcpu_load(vcpu);
 
+	if (run->immediate_exit) {
+		ret = -EINTR;
+		goto out;
+	}
+
 	kvm_sigset_activate(vcpu);
 
 	ret = 1;
@@ -897,6 +899,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 
 	kvm_sigset_deactivate(vcpu);
 
+out:
 	/*
 	 * In the unlikely event that we are returning to userspace
 	 * with pending exceptions or PC adjustment, commit these
-- 
GitLab


From 66e94d5cafd4decd4f92d16a022ea587d7f4094f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 24 May 2021 18:07:52 +0100
Subject: [PATCH 1665/3804] KVM: arm64: Prevent mixed-width VM creation

It looks like we have tolerated creating mixed-width VMs since...
forever. However, that was never the intention, and we'd rather
not have to support that pointless complexity.

Forbid such a setup by making sure all the vcpus have the same
register width.

Reported-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Cc: stable@vger.kernel.org
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210524170752.1549797-1-maz@kernel.org
---
 arch/arm64/include/asm/kvm_emulate.h |  5 +++++
 arch/arm64/kvm/reset.c               | 28 ++++++++++++++++++++++++----
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index f612c090f2e41..01b9857757f2a 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -463,4 +463,9 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
 	vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC;
 }
 
+static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
+{
+	return test_bit(feature, vcpu->arch.features);
+}
+
 #endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 956cdc240148b..d37ebee085cfe 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -166,6 +166,25 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu *tmp;
+	bool is32bit;
+	int i;
+
+	is32bit = vcpu_has_feature(vcpu, KVM_ARM_VCPU_EL1_32BIT);
+	if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
+		return false;
+
+	/* Check that the vcpus are either all 32bit or all 64bit */
+	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+		if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
+			return false;
+	}
+
+	return true;
+}
+
 /**
  * kvm_reset_vcpu - sets core registers and sys_regs to reset value
  * @vcpu: The VCPU pointer
@@ -217,13 +236,14 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	if (!vcpu_allowed_register_width(vcpu)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	switch (vcpu->arch.target) {
 	default:
 		if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) {
-			if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) {
-				ret = -EINVAL;
-				goto out;
-			}
 			pstate = VCPU_RESET_PSTATE_SVC;
 		} else {
 			pstate = VCPU_RESET_PSTATE_EL1;
-- 
GitLab


From 5d8db38ad7660e4d78f4e2a63f14336f31f07a63 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 27 May 2021 17:26:40 +0800
Subject: [PATCH 1666/3804] thermal/drivers/qcom: Fix error code in
 adc_tm5_get_dt_channel_data()

Return -EINVAL when args is invalid instead of 'ret' which is set to
zero by a previous successful call to a function.

Fixes: ca66dca5eda6 ("thermal: qcom: add support for adc-tm5 PMIC thermal monitor")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210527092640.2070555-1-yangyingliang@huawei.com
---
 drivers/thermal/qcom/qcom-spmi-adc-tm5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
index b460b56e981cc..232fd0b333251 100644
--- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
+++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
@@ -441,7 +441,7 @@ static int adc_tm5_get_dt_channel_data(struct adc_tm5_chip *adc_tm,
 
 	if (args.args_count != 1 || args.args[0] >= ADC5_MAX_CHANNEL) {
 		dev_err(dev, "%s: invalid ADC channel number %d\n", name, chan);
-		return ret;
+		return -EINVAL;
 	}
 	channel->adc_channel = args.args[0];
 
-- 
GitLab


From d149b855b955fe92ab16ddd59c1d540f82e6a40f Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Thu, 27 May 2021 17:54:04 +0800
Subject: [PATCH 1667/3804] regulator: bd71815: fix platform_no_drv_owner.cocci
 warnings

./drivers/regulator/bd71815-regulator.c:644:3-8: No need to set .owner here. The core will do it.

 Remove .owner field if calls are used which set it automatically

Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Acked-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Message-Id: <1622109244-54739-1-git-send-email-zou_wei@huawei.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd71815-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/bd71815-regulator.c b/drivers/regulator/bd71815-regulator.c
index a079efa800925..4dd21ac24ddf8 100644
--- a/drivers/regulator/bd71815-regulator.c
+++ b/drivers/regulator/bd71815-regulator.c
@@ -641,7 +641,6 @@ MODULE_DEVICE_TABLE(platform, bd7181x_pmic_id);
 static struct platform_driver bd7181x_regulator = {
 	.driver = {
 		.name = "bd7181x-pmic",
-		.owner = THIS_MODULE,
 	},
 	.probe = bd7181x_probe,
 	.id_table = bd7181x_pmic_id,
-- 
GitLab


From 56e4ee82e850026d71223262c07df7d6af3bd872 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Mon, 24 May 2021 22:54:57 +0300
Subject: [PATCH 1668/3804] ipvs: ignore IP_VS_SVC_F_HASHED flag when adding
 service

syzbot reported memory leak [1] when adding service with
HASHED flag. We should ignore this flag both from sockopt
and netlink provided data, otherwise the service is not
hashed and not visible while releasing resources.

[1]
BUG: memory leak
unreferenced object 0xffff888115227800 (size 512):
  comm "syz-executor263", pid 8658, jiffies 4294951882 (age 12.560s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff83977188>] kmalloc include/linux/slab.h:556 [inline]
    [<ffffffff83977188>] kzalloc include/linux/slab.h:686 [inline]
    [<ffffffff83977188>] ip_vs_add_service+0x598/0x7c0 net/netfilter/ipvs/ip_vs_ctl.c:1343
    [<ffffffff8397d770>] do_ip_vs_set_ctl+0x810/0xa40 net/netfilter/ipvs/ip_vs_ctl.c:2570
    [<ffffffff838449a8>] nf_setsockopt+0x68/0xa0 net/netfilter/nf_sockopt.c:101
    [<ffffffff839ae4e9>] ip_setsockopt+0x259/0x1ff0 net/ipv4/ip_sockglue.c:1435
    [<ffffffff839fa03c>] raw_setsockopt+0x18c/0x1b0 net/ipv4/raw.c:857
    [<ffffffff83691f20>] __sys_setsockopt+0x1b0/0x360 net/socket.c:2117
    [<ffffffff836920f2>] __do_sys_setsockopt net/socket.c:2128 [inline]
    [<ffffffff836920f2>] __se_sys_setsockopt net/socket.c:2125 [inline]
    [<ffffffff836920f2>] __x64_sys_setsockopt+0x22/0x30 net/socket.c:2125
    [<ffffffff84350efa>] do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47
    [<ffffffff84400068>] entry_SYSCALL_64_after_hwframe+0x44/0xae

Reported-and-tested-by: syzbot+e562383183e4b1766930@syzkaller.appspotmail.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Reviewed-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index d45dbcba8b49c..c25097092a060 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1367,7 +1367,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
 	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
 	svc->port = u->port;
 	svc->fwmark = u->fwmark;
-	svc->flags = u->flags;
+	svc->flags = u->flags & ~IP_VS_SVC_F_HASHED;
 	svc->timeout = u->timeout * HZ;
 	svc->netmask = u->netmask;
 	svc->ipvs = ipvs;
-- 
GitLab


From 6bd5b743686243dae7351d5dcceeb7f171201bb4 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 18 May 2021 05:00:31 -0700
Subject: [PATCH 1669/3804] KVM: PPC: exit halt polling on need_resched()

This is inspired by commit 262de4102c7bb8 (kvm: exit halt polling on
need_resched() as well). Due to PPC implements an arch specific halt
polling logic, we have to the need_resched() check there as well. This
patch adds a helper function that can be shared between book3s and generic
halt-polling loops.

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Venkatesh Srinivas <venkateshs@chromium.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Venkatesh Srinivas <venkateshs@chromium.org>
Cc: Jim Mattson <jmattson@google.com>
Cc: David Matlack <dmatlack@google.com>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1621339235-11131-1-git-send-email-wanpengli@tencent.com>
[Make the function inline. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 include/linux/kvm_host.h     | 6 ++++++
 virt/kvm/kvm_main.c          | 3 +--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 28a80d240b764..7360350e66fff 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3936,7 +3936,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 				break;
 			}
 			cur = ktime_get();
-		} while (single_task_running() && ktime_before(cur, stop));
+		} while (kvm_vcpu_can_poll(cur, stop));
 
 		spin_lock(&vc->lock);
 		vc->vcore_state = VCORE_INACTIVE;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2f34487e21f21..5d4b96b36ec0c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -10,6 +10,7 @@
 #include <linux/spinlock.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/sched/stat.h>
 #include <linux/bug.h>
 #include <linux/minmax.h>
 #include <linux/mm.h>
@@ -265,6 +266,11 @@ static inline bool kvm_vcpu_mapped(struct kvm_host_map *map)
 	return !!map->hva;
 }
 
+static inline bool kvm_vcpu_can_poll(ktime_t cur, ktime_t stop)
+{
+	return single_task_running() && !need_resched() && ktime_before(cur, stop);
+}
+
 /*
  * Sometimes a large or cross-page mmio needs to be broken up into separate
  * exits for userspace servicing.
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6b4feb92dc797..5f40725144f59 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2973,8 +2973,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 				goto out;
 			}
 			poll_end = cur = ktime_get();
-		} while (single_task_running() && !need_resched() &&
-			 ktime_before(cur, stop));
+		} while (kvm_vcpu_can_poll(cur, stop));
 	}
 
 	prepare_to_rcuwait(&vcpu->wait);
-- 
GitLab


From 72b268a8e9307a1757f61af080e990b5baa11d2a Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 18 May 2021 05:00:32 -0700
Subject: [PATCH 1670/3804] KVM: X86: Bail out of direct yield in case of
 under-committed scenarios

In case of under-committed scenarios, vCPUs can be scheduled easily;
kvm_vcpu_yield_to adds extra overhead, and it is also common to see
when vcpu->ready is true but yield later failing due to p->state is
TASK_RUNNING.

Let's bail out in such scenarios by checking the length of current cpu
runqueue, which can be treated as a hint of under-committed instead of
guarantee of accuracy. 30%+ of directed-yield attempts can now avoid
the expensive lookups in kvm_sched_yield() in an under-committed scenario.

Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1621339235-11131-2-git-send-email-wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9b6bca6169291..dfb7c320581fc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8360,6 +8360,9 @@ static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
 
 	vcpu->stat.directed_yield_attempted++;
 
+	if (single_task_running())
+		goto no_yield;
+
 	rcu_read_lock();
 	map = rcu_dereference(vcpu->kvm->arch.apic_map);
 
-- 
GitLab


From 1eff0ada88b48e4ac1e3fe26483b3684fedecd27 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 18 May 2021 05:00:33 -0700
Subject: [PATCH 1671/3804] KVM: X86: Fix vCPU preempted state from guest's
 point of view

Commit 66570e966dd9 (kvm: x86: only provide PV features if enabled in guest's
CPUID) avoids to access pv tlb shootdown host side logic when this pv feature
is not exposed to guest, however, kvm_steal_time.preempted not only leveraged
by pv tlb shootdown logic but also mitigate the lock holder preemption issue.
From guest's point of view, vCPU is always preempted since we lose the reset
of kvm_steal_time.preempted before vmentry if pv tlb shootdown feature is not
exposed. This patch fixes it by clearing kvm_steal_time.preempted before
vmentry.

Fixes: 66570e966dd9 (kvm: x86: only provide PV features if enabled in guest's CPUID)
Reviewed-by: Sean Christopherson <seanjc@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1621339235-11131-3-git-send-email-wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dfb7c320581fc..bed7b5348c0e9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3105,6 +3105,8 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 				       st->preempted & KVM_VCPU_FLUSH_TLB);
 		if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
 			kvm_vcpu_flush_tlb_guest(vcpu);
+	} else {
+		st->preempted = 0;
 	}
 
 	vcpu->arch.st.preempted = 0;
-- 
GitLab


From da6d63a0062a3ee721b84123b83ec093f25759b0 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 18 May 2021 05:00:34 -0700
Subject: [PATCH 1672/3804] KVM: X86: hyper-v: Task srcu lock when accessing
 kvm_memslots()

   WARNING: suspicious RCU usage
   5.13.0-rc1 #4 Not tainted
   -----------------------------
   ./include/linux/kvm_host.h:710 suspicious rcu_dereference_check() usage!

  other info that might help us debug this:

  rcu_scheduler_active = 2, debug_locks = 1
   1 lock held by hyperv_clock/8318:
    #0: ffffb6b8cb05a7d8 (&hv->hv_lock){+.+.}-{3:3}, at: kvm_hv_invalidate_tsc_page+0x3e/0xa0 [kvm]

  stack backtrace:
  CPU: 3 PID: 8318 Comm: hyperv_clock Not tainted 5.13.0-rc1 #4
  Call Trace:
   dump_stack+0x87/0xb7
   lockdep_rcu_suspicious+0xce/0xf0
   kvm_write_guest_page+0x1c1/0x1d0 [kvm]
   kvm_write_guest+0x50/0x90 [kvm]
   kvm_hv_invalidate_tsc_page+0x79/0xa0 [kvm]
   kvm_gen_update_masterclock+0x1d/0x110 [kvm]
   kvm_arch_vm_ioctl+0x2a7/0xc50 [kvm]
   kvm_vm_ioctl+0x123/0x11d0 [kvm]
   __x64_sys_ioctl+0x3ed/0x9d0
   do_syscall_64+0x3d/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xae

kvm_memslots() will be called by kvm_write_guest(), so we should take the srcu lock.

Fixes: e880c6ea5 (KVM: x86: hyper-v: Prevent using not-yet-updated TSC page by secondary CPUs)
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1621339235-11131-4-git-send-email-wanpengli@tencent.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f98370a399361..f00830e5202fe 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1172,6 +1172,7 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
 {
 	struct kvm_hv *hv = to_kvm_hv(kvm);
 	u64 gfn;
+	int idx;
 
 	if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
 	    hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
@@ -1190,9 +1191,16 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
 	gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
 
 	hv->tsc_ref.tsc_sequence = 0;
+
+	/*
+	 * Take the srcu lock as memslots will be accessed to check the gfn
+	 * cache generation against the memslots generation.
+	 */
+	idx = srcu_read_lock(&kvm->srcu);
 	if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
 			    &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
 		hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
+	srcu_read_unlock(&kvm->srcu, idx);
 
 out_unlock:
 	mutex_unlock(&hv->hv_lock);
-- 
GitLab


From 39fe2fc96694164723846fccf6caa42c3aee6ec4 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 12 May 2021 12:31:06 +0800
Subject: [PATCH 1673/3804] selftests: kvm: make allocation of extra memory
 take effect

The extra memory pages is missed to be allocated during VM creating.
perf_test_util and kvm_page_table_test use it to alloc extra memory
currently.

Fix it by adding extra_mem_pages to the total memory calculation before
allocate.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Message-Id: <20210512043107.30076-1-zhenzhong.duan@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index fc83f6c5902dd..159f4d62241d7 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -295,7 +295,7 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
 	 */
 	uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
 	uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
-	uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
+	uint64_t pages = DEFAULT_GUEST_PHY_PAGES + extra_mem_pages + vcpu_pages + extra_pg_pages;
 	struct kvm_vm *vm;
 	int i;
 
-- 
GitLab


From a13534d6676d2f2a9aa286e27e482b4896ff90e3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 May 2021 14:27:38 +0200
Subject: [PATCH 1674/3804] selftests: kvm: fix potential issue with ELF
 loading

vm_vaddr_alloc() sets up GVA to GPA mapping page by page; therefore, GPAs
may not be continuous if same memslot is used for data and page table allocation.

kvm_vm_elf_load() however expects a continuous range of HVAs (and thus GPAs)
because it does not try to read file data page by page.  Fix this mismatch
by allocating memory in one step.

Reported-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 159f4d62241d7..12d953d8ee35e 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1099,6 +1099,9 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
 
 	virt_pgd_alloc(vm, pgd_memslot);
+	vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
+					      KVM_UTIL_MIN_PFN * vm->page_size,
+					      data_memslot);
 
 	/*
 	 * Find an unused range of virtual page addresses of at least
@@ -1108,11 +1111,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 
 	/* Map the virtual pages. */
 	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
-		pages--, vaddr += vm->page_size) {
-		vm_paddr_t paddr;
-
-		paddr = vm_phy_page_alloc(vm,
-				KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
+		pages--, vaddr += vm->page_size, paddr += vm->page_size) {
 
 		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
 
-- 
GitLab


From 22721a56109940f15b673d0f01907b7a7202275e Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Tue, 13 Apr 2021 16:08:27 +0200
Subject: [PATCH 1675/3804] KVM: selftests: Keep track of memslots more
 efficiently

The KVM selftest framework was using a simple list for keeping track of
the memslots currently in use.
This resulted in lookups and adding a single memslot being O(n), the
later due to linear scanning of the existing memslot set to check for
the presence of any conflicting entries.

Before this change, benchmarking high count of memslots was more or less
impossible as pretty much all the benchmark time was spent in the
selftest framework code.

We can simply use a rbtree for keeping track of both of gfn and hva.
We don't need an interval tree for hva here as we can't have overlapping
memslots because we allocate a completely new memory chunk for each new
memslot.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Message-Id: <b12749d47ee860468240cf027412c91b76dbe3db.1618253574.git.maciej.szmigiero@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile          |   2 +-
 tools/testing/selftests/kvm/lib/kvm_util.c    | 141 ++++++++++++++----
 .../selftests/kvm/lib/kvm_util_internal.h     |  15 +-
 tools/testing/selftests/kvm/lib/rbtree.c      |   1 +
 4 files changed, 124 insertions(+), 35 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/lib/rbtree.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index e439d027939dd..a8c30f888d40f 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -33,7 +33,7 @@ ifeq ($(ARCH),s390)
 	UNAME_M := s390x
 endif
 
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 12d953d8ee35e..1255744758e36 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -203,7 +203,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	TEST_ASSERT(vm != NULL, "Insufficient Memory");
 
 	INIT_LIST_HEAD(&vm->vcpus);
-	INIT_LIST_HEAD(&vm->userspace_mem_regions);
+	vm->regions.gpa_tree = RB_ROOT;
+	vm->regions.hva_tree = RB_ROOT;
+	hash_init(vm->regions.slot_hash);
 
 	vm->mode = mode;
 	vm->type = 0;
@@ -355,13 +357,14 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
  */
 void kvm_vm_restart(struct kvm_vm *vmp, int perm)
 {
+	int ctr;
 	struct userspace_mem_region *region;
 
 	vm_open(vmp, perm);
 	if (vmp->has_irqchip)
 		vm_create_irqchip(vmp);
 
-	list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
+	hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
 		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
 		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
 			    "  rc: %i errno: %i\n"
@@ -424,14 +427,21 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
 static struct userspace_mem_region *
 userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
 {
-	struct userspace_mem_region *region;
+	struct rb_node *node;
 
-	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+	for (node = vm->regions.gpa_tree.rb_node; node; ) {
+		struct userspace_mem_region *region =
+			container_of(node, struct userspace_mem_region, gpa_node);
 		uint64_t existing_start = region->region.guest_phys_addr;
 		uint64_t existing_end = region->region.guest_phys_addr
 			+ region->region.memory_size - 1;
 		if (start <= existing_end && end >= existing_start)
 			return region;
+
+		if (start < existing_start)
+			node = node->rb_left;
+		else
+			node = node->rb_right;
 	}
 
 	return NULL;
@@ -546,11 +556,16 @@ void kvm_vm_release(struct kvm_vm *vmp)
 }
 
 static void __vm_mem_region_delete(struct kvm_vm *vm,
-				   struct userspace_mem_region *region)
+				   struct userspace_mem_region *region,
+				   bool unlink)
 {
 	int ret;
 
-	list_del(&region->list);
+	if (unlink) {
+		rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
+		rb_erase(&region->hva_node, &vm->regions.hva_tree);
+		hash_del(&region->slot_node);
+	}
 
 	region->region.memory_size = 0;
 	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
@@ -569,14 +584,16 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
  */
 void kvm_vm_free(struct kvm_vm *vmp)
 {
-	struct userspace_mem_region *region, *tmp;
+	int ctr;
+	struct hlist_node *node;
+	struct userspace_mem_region *region;
 
 	if (vmp == NULL)
 		return;
 
 	/* Free userspace_mem_regions. */
-	list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
-		__vm_mem_region_delete(vmp, region);
+	hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
+		__vm_mem_region_delete(vmp, region, false);
 
 	/* Free sparsebit arrays. */
 	sparsebit_free(&vmp->vpages_valid);
@@ -658,6 +675,57 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
 	return 0;
 }
 
+static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
+					       struct userspace_mem_region *region)
+{
+	struct rb_node **cur, *parent;
+
+	for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
+		struct userspace_mem_region *cregion;
+
+		cregion = container_of(*cur, typeof(*cregion), gpa_node);
+		parent = *cur;
+		if (region->region.guest_phys_addr <
+		    cregion->region.guest_phys_addr)
+			cur = &(*cur)->rb_left;
+		else {
+			TEST_ASSERT(region->region.guest_phys_addr !=
+				    cregion->region.guest_phys_addr,
+				    "Duplicate GPA in region tree");
+
+			cur = &(*cur)->rb_right;
+		}
+	}
+
+	rb_link_node(&region->gpa_node, parent, cur);
+	rb_insert_color(&region->gpa_node, gpa_tree);
+}
+
+static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
+					       struct userspace_mem_region *region)
+{
+	struct rb_node **cur, *parent;
+
+	for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
+		struct userspace_mem_region *cregion;
+
+		cregion = container_of(*cur, typeof(*cregion), hva_node);
+		parent = *cur;
+		if (region->host_mem < cregion->host_mem)
+			cur = &(*cur)->rb_left;
+		else {
+			TEST_ASSERT(region->host_mem !=
+				    cregion->host_mem,
+				    "Duplicate HVA in region tree");
+
+			cur = &(*cur)->rb_right;
+		}
+	}
+
+	rb_link_node(&region->hva_node, parent, cur);
+	rb_insert_color(&region->hva_node, hva_tree);
+}
+
 /*
  * VM Userspace Memory Region Add
  *
@@ -722,7 +790,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 			(uint64_t) region->region.memory_size);
 
 	/* Confirm no region with the requested slot already exists. */
-	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+	hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+			       slot) {
 		if (region->region.slot != slot)
 			continue;
 
@@ -793,8 +862,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 		ret, errno, slot, flags,
 		guest_paddr, (uint64_t) region->region.memory_size);
 
-	/* Add to linked-list of memory regions. */
-	list_add(&region->list, &vm->userspace_mem_regions);
+	/* Add to quick lookup data structures */
+	vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
+	vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
+	hash_add(vm->regions.slot_hash, &region->slot_node, slot);
 }
 
 /*
@@ -817,10 +888,10 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
 {
 	struct userspace_mem_region *region;
 
-	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+	hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+			       memslot)
 		if (region->region.slot == memslot)
 			return region;
-	}
 
 	fprintf(stderr, "No mem region with the requested slot found,\n"
 		"  requested slot: %u\n", memslot);
@@ -905,7 +976,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
  */
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
 {
-	__vm_mem_region_delete(vm, memslot2region(vm, slot));
+	__vm_mem_region_delete(vm, memslot2region(vm, slot), true);
 }
 
 /*
@@ -1176,16 +1247,14 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
 {
 	struct userspace_mem_region *region;
 
-	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
-		if ((gpa >= region->region.guest_phys_addr)
-			&& (gpa <= (region->region.guest_phys_addr
-				+ region->region.memory_size - 1)))
-			return (void *) ((uintptr_t) region->host_mem
-				+ (gpa - region->region.guest_phys_addr));
+	region = userspace_mem_region_find(vm, gpa, gpa);
+	if (!region) {
+		TEST_FAIL("No vm physical memory at 0x%lx", gpa);
+		return NULL;
 	}
 
-	TEST_FAIL("No vm physical memory at 0x%lx", gpa);
-	return NULL;
+	return (void *)((uintptr_t)region->host_mem
+		+ (gpa - region->region.guest_phys_addr));
 }
 
 /*
@@ -1207,15 +1276,22 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
  */
 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
 {
-	struct userspace_mem_region *region;
+	struct rb_node *node;
+
+	for (node = vm->regions.hva_tree.rb_node; node; ) {
+		struct userspace_mem_region *region =
+			container_of(node, struct userspace_mem_region, hva_node);
+
+		if (hva >= region->host_mem) {
+			if (hva <= (region->host_mem
+				+ region->region.memory_size - 1))
+				return (vm_paddr_t)((uintptr_t)
+					region->region.guest_phys_addr
+					+ (hva - (uintptr_t)region->host_mem));
 
-	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
-		if ((hva >= region->host_mem)
-			&& (hva <= (region->host_mem
-				+ region->region.memory_size - 1)))
-			return (vm_paddr_t) ((uintptr_t)
-				region->region.guest_phys_addr
-				+ (hva - (uintptr_t) region->host_mem));
+			node = node->rb_right;
+		} else
+			node = node->rb_left;
 	}
 
 	TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
@@ -1821,6 +1897,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
  */
 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
+	int ctr;
 	struct userspace_mem_region *region;
 	struct vcpu *vcpu;
 
@@ -1828,7 +1905,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
 	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
 	fprintf(stream, "%*sMem Regions:\n", indent, "");
-	list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+	hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
 		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
 			"host_virt: %p\n", indent + 2, "",
 			(uint64_t) region->region.guest_phys_addr,
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 91ce1b5d480b2..b30e8c7b119b8 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -8,6 +8,9 @@
 #ifndef SELFTEST_KVM_UTIL_INTERNAL_H
 #define SELFTEST_KVM_UTIL_INTERNAL_H
 
+#include "linux/hashtable.h"
+#include "linux/rbtree.h"
+
 #include "sparsebit.h"
 
 struct userspace_mem_region {
@@ -18,7 +21,9 @@ struct userspace_mem_region {
 	void *host_mem;
 	void *mmap_start;
 	size_t mmap_size;
-	struct list_head list;
+	struct rb_node gpa_node;
+	struct rb_node hva_node;
+	struct hlist_node slot_node;
 };
 
 struct vcpu {
@@ -31,6 +36,12 @@ struct vcpu {
 	uint32_t dirty_gfns_count;
 };
 
+struct userspace_mem_regions {
+	struct rb_root gpa_tree;
+	struct rb_root hva_tree;
+	DECLARE_HASHTABLE(slot_hash, 9);
+};
+
 struct kvm_vm {
 	int mode;
 	unsigned long type;
@@ -43,7 +54,7 @@ struct kvm_vm {
 	unsigned int va_bits;
 	uint64_t max_gfn;
 	struct list_head vcpus;
-	struct list_head userspace_mem_regions;
+	struct userspace_mem_regions regions;
 	struct sparsebit *vpages_valid;
 	struct sparsebit *vpages_mapped;
 	bool has_irqchip;
diff --git a/tools/testing/selftests/kvm/lib/rbtree.c b/tools/testing/selftests/kvm/lib/rbtree.c
new file mode 100644
index 0000000000000..a703f0194ea3a
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/rbtree.c
@@ -0,0 +1 @@
+#include "../../../../lib/rbtree.c"
-- 
GitLab


From cad347fab142bcb9bebc125b5ba0c1e52ce74fdc Mon Sep 17 00:00:00 2001
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Date: Tue, 13 Apr 2021 16:08:28 +0200
Subject: [PATCH 1676/3804] KVM: selftests: add a memslot-related performance
 benchmark

This benchmark contains the following tests:
* Map test, where the host unmaps guest memory while the guest writes to
it (maps it).

The test is designed in a way to make the unmap operation on the host
take a negligible amount of time in comparison with the mapping
operation in the guest.

The test area is actually split in two: the first half is being mapped
by the guest while the second half in being unmapped by the host.
Then a guest <-> host sync happens and the areas are reversed.

* Unmap test which is broadly similar to the above map test, but it is
designed in an opposite way: to make the mapping operation in the guest
take a negligible amount of time in comparison with the unmap operation
on the host.
This test is available in two variants: with per-page unmap operation
or a chunked one (using 2 MiB chunk size).

* Move active area test which involves moving the last (highest gfn)
memslot a bit back and forth on the host while the guest is
concurrently writing around the area being moved (including over the
moved memslot).

* Move inactive area test which is similar to the previous move active
area test, but now guest writes all happen outside of the area being
moved.

* Read / write test in which the guest writes to the beginning of each
page of the test area while the host writes to the middle of each such
page.
Then each side checks the values the other side has written.
This particular test is not expected to give different results depending
on particular memslots implementation, it is meant as a rough sanity
check and to provide insight on the spread of test results expected.

Each test performs its operation in a loop until a test period ends
(this is 5 seconds by default, but it is configurable).
Then the total count of loops done is divided by the actual elapsed
time to give the test result.

The tests have a configurable memslot cap with the "-s" test option, by
default the system maximum is used.
Each test is repeated a particular number of times (by default 20
times), the best result achieved is printed.

The test memory area is divided equally between memslots, the reminder
is added to the last memslot.
The test area size does not depend on the number of memslots in use.

The tests also measure the time that it took to add all these memslots.
The best result from the tests that use the whole test area is printed
after all the requested tests are done.

In general, these tests are designed to use as much memory as possible
(within reason) while still doing 100+ loops even on high memslot counts
with the default test length.
Increasing the test runtime makes it increasingly more likely that some
event will happen on the system during the test run, which might lower
the test result.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Message-Id: <8d31bb3d92bc8fa33a9756fa802ee14266ab994e.1618253574.git.maciej.szmigiero@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore        |    1 +
 tools/testing/selftests/kvm/Makefile          |    1 +
 .../testing/selftests/kvm/memslot_perf_test.c | 1037 +++++++++++++++++
 3 files changed, 1039 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/memslot_perf_test.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index bd83158e0e0b5..524c857a049c3 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -41,5 +41,6 @@
 /kvm_create_max_vcpus
 /kvm_page_table_test
 /memslot_modification_stress_test
+/memslot_perf_test
 /set_memory_region_test
 /steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a8c30f888d40f..daaee1888b128 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -74,6 +74,7 @@ TEST_GEN_PROGS_x86_64 += hardware_disable_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += kvm_page_table_test
 TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86_64 += memslot_perf_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
new file mode 100644
index 0000000000000..4ae0e5ec0f740
--- /dev/null
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -0,0 +1,1037 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A memslot-related performance benchmark.
+ *
+ * Copyright (C) 2021 Oracle and/or its affiliates.
+ *
+ * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
+ */
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/compiler.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+#define MEM_SIZE		((512U << 20) + 4096)
+#define MEM_SIZE_PAGES		(MEM_SIZE / 4096)
+#define MEM_GPA		0x10000000UL
+#define MEM_AUX_GPA		MEM_GPA
+#define MEM_SYNC_GPA		MEM_AUX_GPA
+#define MEM_TEST_GPA		(MEM_AUX_GPA + 4096)
+#define MEM_TEST_SIZE		(MEM_SIZE - 4096)
+static_assert(MEM_SIZE % 4096 == 0, "invalid mem size");
+static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
+
+/*
+ * 32 MiB is max size that gets well over 100 iterations on 509 slots.
+ * Considering that each slot needs to have at least one page up to
+ * 8194 slots in use can then be tested (although with slightly
+ * limited resolution).
+ */
+#define MEM_SIZE_MAP		((32U << 20) + 4096)
+#define MEM_SIZE_MAP_PAGES	(MEM_SIZE_MAP / 4096)
+#define MEM_TEST_MAP_SIZE	(MEM_SIZE_MAP - 4096)
+#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
+static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size");
+static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size");
+static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size");
+static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size");
+
+/*
+ * 128 MiB is min size that fills 32k slots with at least one page in each
+ * while at the same time gets 100+ iterations in such test
+ */
+#define MEM_TEST_UNMAP_SIZE		(128U << 20)
+#define MEM_TEST_UNMAP_SIZE_PAGES	(MEM_TEST_UNMAP_SIZE / 4096)
+/* 2 MiB chunk size like a typical huge page */
+#define MEM_TEST_UNMAP_CHUNK_PAGES	(2U << (20 - 12))
+static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE,
+	      "invalid unmap test region size");
+static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0,
+	      "invalid unmap test region size");
+static_assert(MEM_TEST_UNMAP_SIZE_PAGES %
+	      (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0,
+	      "invalid unmap test region size");
+
+/*
+ * For the move active test the middle of the test area is placed on
+ * a memslot boundary: half lies in the memslot being moved, half in
+ * other memslot(s).
+ *
+ * When running this test with 32k memslots (32764, really) each memslot
+ * contains 4 pages.
+ * The last one additionally contains the remaining 21 pages of memory,
+ * for the total size of 25 pages.
+ * Hence, the maximum size here is 50 pages.
+ */
+#define MEM_TEST_MOVE_SIZE_PAGES	(50)
+#define MEM_TEST_MOVE_SIZE		(MEM_TEST_MOVE_SIZE_PAGES * 4096)
+#define MEM_TEST_MOVE_GPA_DEST		(MEM_GPA + MEM_SIZE)
+static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
+	      "invalid move test region size");
+
+#define MEM_TEST_VAL_1 0x1122334455667788
+#define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
+
+struct vm_data {
+	struct kvm_vm *vm;
+	pthread_t vcpu_thread;
+	uint32_t nslots;
+	uint64_t npages;
+	uint64_t pages_per_slot;
+	void **hva_slots;
+	bool mmio_ok;
+	uint64_t mmio_gpa_min;
+	uint64_t mmio_gpa_max;
+};
+
+struct sync_area {
+	atomic_bool start_flag;
+	atomic_bool exit_flag;
+	atomic_bool sync_flag;
+	void *move_area_ptr;
+};
+
+/*
+ * Technically, we need also for the atomic bool to be address-free, which
+ * is recommended, but not strictly required, by C11 for lockless
+ * implementations.
+ * However, in practice both GCC and Clang fulfill this requirement on
+ * all KVM-supported platforms.
+ */
+static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
+
+static sem_t vcpu_ready;
+
+static bool map_unmap_verify;
+
+static bool verbose;
+#define pr_info_v(...)				\
+	do {					\
+		if (verbose)			\
+			pr_info(__VA_ARGS__);	\
+	} while (0)
+
+static void *vcpu_worker(void *data)
+{
+	struct vm_data *vm = data;
+	struct kvm_run *run;
+	struct ucall uc;
+	uint64_t cmd;
+
+	run = vcpu_state(vm->vm, VCPU_ID);
+	while (1) {
+		vcpu_run(vm->vm, VCPU_ID);
+
+		if (run->exit_reason == KVM_EXIT_IO) {
+			cmd = get_ucall(vm->vm, VCPU_ID, &uc);
+			if (cmd != UCALL_SYNC)
+				break;
+
+			sem_post(&vcpu_ready);
+			continue;
+		}
+
+		if (run->exit_reason != KVM_EXIT_MMIO)
+			break;
+
+		TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
+		TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
+		TEST_ASSERT(run->mmio.len == 8,
+			    "Unexpected exit mmio size = %u", run->mmio.len);
+		TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
+			    run->mmio.phys_addr <= vm->mmio_gpa_max,
+			    "Unexpected exit mmio address = 0x%llx",
+			    run->mmio.phys_addr);
+	}
+
+	if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
+		TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
+			  __FILE__, uc.args[1], uc.args[2]);
+
+	return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+	struct timespec ts;
+
+	TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+		    "clock_gettime() failed: %d\n", errno);
+
+	ts.tv_sec += 2;
+	TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+		    "sem_timedwait() failed: %d\n", errno);
+}
+
+static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
+{
+	uint64_t gpage, pgoffs;
+	uint32_t slot, slotoffs;
+	void *base;
+
+	TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
+	TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096,
+		    "Too high gpa to translate");
+	gpa -= MEM_GPA;
+
+	gpage = gpa / 4096;
+	pgoffs = gpa % 4096;
+	slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
+	slotoffs = gpage - (slot * data->pages_per_slot);
+
+	if (rempages) {
+		uint64_t slotpages;
+
+		if (slot == data->nslots - 1)
+			slotpages = data->npages - slot * data->pages_per_slot;
+		else
+			slotpages = data->pages_per_slot;
+
+		TEST_ASSERT(!pgoffs,
+			    "Asking for remaining pages in slot but gpa not page aligned");
+		*rempages = slotpages - slotoffs;
+	}
+
+	base = data->hva_slots[slot];
+	return (uint8_t *)base + slotoffs * 4096 + pgoffs;
+}
+
+static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
+{
+	TEST_ASSERT(slot < data->nslots, "Too high slot number");
+
+	return MEM_GPA + slot * data->pages_per_slot * 4096;
+}
+
+static struct vm_data *alloc_vm(void)
+{
+	struct vm_data *data;
+
+	data = malloc(sizeof(*data));
+	TEST_ASSERT(data, "malloc(vmdata) failed");
+
+	data->vm = NULL;
+	data->hva_slots = NULL;
+
+	return data;
+}
+
+static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
+		       void *guest_code, uint64_t mempages,
+		       struct timespec *slot_runtime)
+{
+	uint32_t max_mem_slots;
+	uint64_t rempages;
+	uint64_t guest_addr;
+	uint32_t slot;
+	struct timespec tstart;
+	struct sync_area *sync;
+
+	max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+	TEST_ASSERT(max_mem_slots > 1,
+		    "KVM_CAP_NR_MEMSLOTS should be greater than 1");
+	TEST_ASSERT(nslots > 1 || nslots == -1,
+		    "Slot count cap should be greater than 1");
+	if (nslots != -1)
+		max_mem_slots = min(max_mem_slots, (uint32_t)nslots);
+	pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots);
+
+	TEST_ASSERT(mempages > 1,
+		    "Can't test without any memory");
+
+	data->npages = mempages;
+	data->nslots = max_mem_slots - 1;
+	data->pages_per_slot = mempages / data->nslots;
+	if (!data->pages_per_slot) {
+		*maxslots = mempages + 1;
+		return false;
+	}
+
+	rempages = mempages % data->nslots;
+	data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
+	TEST_ASSERT(data->hva_slots, "malloc() fail");
+
+	data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
+
+	pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
+		max_mem_slots - 1, data->pages_per_slot, rempages);
+
+	clock_gettime(CLOCK_MONOTONIC, &tstart);
+	for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) {
+		uint64_t npages;
+
+		npages = data->pages_per_slot;
+		if (slot == max_mem_slots - 1)
+			npages += rempages;
+
+		vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
+					    guest_addr, slot, npages,
+					    0);
+		guest_addr += npages * 4096;
+	}
+	*slot_runtime = timespec_elapsed(tstart);
+
+	for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) {
+		uint64_t npages;
+		uint64_t gpa;
+
+		npages = data->pages_per_slot;
+		if (slot == max_mem_slots - 2)
+			npages += rempages;
+
+		gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
+					 slot + 1);
+		TEST_ASSERT(gpa == guest_addr,
+			    "vm_phy_pages_alloc() failed\n");
+
+		data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr);
+		memset(data->hva_slots[slot], 0, npages * 4096);
+
+		guest_addr += npages * 4096;
+	}
+
+	virt_map(data->vm, MEM_GPA, MEM_GPA, mempages, 0);
+
+	sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+	atomic_init(&sync->start_flag, false);
+	atomic_init(&sync->exit_flag, false);
+	atomic_init(&sync->sync_flag, false);
+
+	data->mmio_ok = false;
+
+	return true;
+}
+
+static void launch_vm(struct vm_data *data)
+{
+	pr_info_v("Launching the test VM\n");
+
+	pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
+
+	/* Ensure the guest thread is spun up. */
+	wait_for_vcpu();
+}
+
+static void free_vm(struct vm_data *data)
+{
+	kvm_vm_free(data->vm);
+	free(data->hva_slots);
+	free(data);
+}
+
+static void wait_guest_exit(struct vm_data *data)
+{
+	pthread_join(data->vcpu_thread, NULL);
+}
+
+static void let_guest_run(struct sync_area *sync)
+{
+	atomic_store_explicit(&sync->start_flag, true, memory_order_release);
+}
+
+static void guest_spin_until_start(void)
+{
+	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+	while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
+		;
+}
+
+static void make_guest_exit(struct sync_area *sync)
+{
+	atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
+}
+
+static bool _guest_should_exit(void)
+{
+	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+	return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
+}
+
+#define guest_should_exit() unlikely(_guest_should_exit())
+
+/*
+ * noinline so we can easily see how much time the host spends waiting
+ * for the guest.
+ * For the same reason use alarm() instead of polling clock_gettime()
+ * to implement a wait timeout.
+ */
+static noinline void host_perform_sync(struct sync_area *sync)
+{
+	alarm(2);
+
+	atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
+	while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
+		;
+
+	alarm(0);
+}
+
+static bool guest_perform_sync(void)
+{
+	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+	bool expected;
+
+	do {
+		if (guest_should_exit())
+			return false;
+
+		expected = true;
+	} while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
+							&expected, false,
+							memory_order_acq_rel,
+							memory_order_relaxed));
+
+	return true;
+}
+
+static void guest_code_test_memslot_move(void)
+{
+	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+	uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
+
+	GUEST_SYNC(0);
+
+	guest_spin_until_start();
+
+	while (!guest_should_exit()) {
+		uintptr_t ptr;
+
+		for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
+		     ptr += 4096)
+			*(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+		/*
+		 * No host sync here since the MMIO exits are so expensive
+		 * that the host would spend most of its time waiting for
+		 * the guest and so instead of measuring memslot move
+		 * performance we would measure the performance and
+		 * likelihood of MMIO exits
+		 */
+	}
+
+	GUEST_DONE();
+}
+
+static void guest_code_test_memslot_map(void)
+{
+	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+	GUEST_SYNC(0);
+
+	guest_spin_until_start();
+
+	while (1) {
+		uintptr_t ptr;
+
+		for (ptr = MEM_TEST_GPA;
+		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096)
+			*(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+		if (!guest_perform_sync())
+			break;
+
+		for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
+		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096)
+			*(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+		if (!guest_perform_sync())
+			break;
+	}
+
+	GUEST_DONE();
+}
+
+static void guest_code_test_memslot_unmap(void)
+{
+	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+	GUEST_SYNC(0);
+
+	guest_spin_until_start();
+
+	while (1) {
+		uintptr_t ptr = MEM_TEST_GPA;
+
+		/*
+		 * We can afford to access (map) just a small number of pages
+		 * per host sync as otherwise the host will spend
+		 * a significant amount of its time waiting for the guest
+		 * (instead of doing unmap operations), so this will
+		 * effectively turn this test into a map performance test.
+		 *
+		 * Just access a single page to be on the safe side.
+		 */
+		*(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+		if (!guest_perform_sync())
+			break;
+
+		ptr += MEM_TEST_UNMAP_SIZE / 2;
+		*(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+		if (!guest_perform_sync())
+			break;
+	}
+
+	GUEST_DONE();
+}
+
+static void guest_code_test_memslot_rw(void)
+{
+	GUEST_SYNC(0);
+
+	guest_spin_until_start();
+
+	while (1) {
+		uintptr_t ptr;
+
+		for (ptr = MEM_TEST_GPA;
+		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096)
+			*(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+		if (!guest_perform_sync())
+			break;
+
+		for (ptr = MEM_TEST_GPA + 4096 / 2;
+		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) {
+			uint64_t val = *(uint64_t *)ptr;
+
+			GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
+			*(uint64_t *)ptr = 0;
+		}
+
+		if (!guest_perform_sync())
+			break;
+	}
+
+	GUEST_DONE();
+}
+
+static bool test_memslot_move_prepare(struct vm_data *data,
+				      struct sync_area *sync,
+				      uint64_t *maxslots, bool isactive)
+{
+	uint64_t movesrcgpa, movetestgpa;
+
+	movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+
+	if (isactive) {
+		uint64_t lastpages;
+
+		vm_gpa2hva(data, movesrcgpa, &lastpages);
+		if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) {
+			*maxslots = 0;
+			return false;
+		}
+	}
+
+	movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
+	sync->move_area_ptr = (void *)movetestgpa;
+
+	if (isactive) {
+		data->mmio_ok = true;
+		data->mmio_gpa_min = movesrcgpa;
+		data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
+	}
+
+	return true;
+}
+
+static bool test_memslot_move_prepare_active(struct vm_data *data,
+					     struct sync_area *sync,
+					     uint64_t *maxslots)
+{
+	return test_memslot_move_prepare(data, sync, maxslots, true);
+}
+
+static bool test_memslot_move_prepare_inactive(struct vm_data *data,
+					       struct sync_area *sync,
+					       uint64_t *maxslots)
+{
+	return test_memslot_move_prepare(data, sync, maxslots, false);
+}
+
+static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
+{
+	uint64_t movesrcgpa;
+
+	movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+	vm_mem_region_move(data->vm, data->nslots - 1 + 1,
+			   MEM_TEST_MOVE_GPA_DEST);
+	vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
+}
+
+static void test_memslot_do_unmap(struct vm_data *data,
+				  uint64_t offsp, uint64_t count)
+{
+	uint64_t gpa, ctr;
+
+	for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) {
+		uint64_t npages;
+		void *hva;
+		int ret;
+
+		hva = vm_gpa2hva(data, gpa, &npages);
+		TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
+		npages = min(npages, count - ctr);
+		ret = madvise(hva, npages * 4096, MADV_DONTNEED);
+		TEST_ASSERT(!ret,
+			    "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
+			    hva, gpa);
+		ctr += npages;
+		gpa += npages * 4096;
+	}
+	TEST_ASSERT(ctr == count,
+		    "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
+}
+
+static void test_memslot_map_unmap_check(struct vm_data *data,
+					 uint64_t offsp, uint64_t valexp)
+{
+	uint64_t gpa;
+	uint64_t *val;
+
+	if (!map_unmap_verify)
+		return;
+
+	gpa = MEM_TEST_GPA + offsp * 4096;
+	val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
+	TEST_ASSERT(*val == valexp,
+		    "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
+		    *val, valexp, gpa);
+	*val = 0;
+}
+
+static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
+{
+	/*
+	 * Unmap the second half of the test area while guest writes to (maps)
+	 * the first half.
+	 */
+	test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2,
+			      MEM_TEST_MAP_SIZE_PAGES / 2);
+
+	/*
+	 * Wait for the guest to finish writing the first half of the test
+	 * area, verify the written value on the first and the last page of
+	 * this area and then unmap it.
+	 * Meanwhile, the guest is writing to (mapping) the second half of
+	 * the test area.
+	 */
+	host_perform_sync(sync);
+	test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+	test_memslot_map_unmap_check(data,
+				     MEM_TEST_MAP_SIZE_PAGES / 2 - 1,
+				     MEM_TEST_VAL_1);
+	test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2);
+
+
+	/*
+	 * Wait for the guest to finish writing the second half of the test
+	 * area and verify the written value on the first and the last page
+	 * of this area.
+	 * The area will be unmapped at the beginning of the next loop
+	 * iteration.
+	 * Meanwhile, the guest is writing to (mapping) the first half of
+	 * the test area.
+	 */
+	host_perform_sync(sync);
+	test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2,
+				     MEM_TEST_VAL_2);
+	test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1,
+				     MEM_TEST_VAL_2);
+}
+
+static void test_memslot_unmap_loop_common(struct vm_data *data,
+					   struct sync_area *sync,
+					   uint64_t chunk)
+{
+	uint64_t ctr;
+
+	/*
+	 * Wait for the guest to finish mapping page(s) in the first half
+	 * of the test area, verify the written value and then perform unmap
+	 * of this area.
+	 * Meanwhile, the guest is writing to (mapping) page(s) in the second
+	 * half of the test area.
+	 */
+	host_perform_sync(sync);
+	test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+	for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk)
+		test_memslot_do_unmap(data, ctr, chunk);
+
+	/* Likewise, but for the opposite host / guest areas */
+	host_perform_sync(sync);
+	test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2,
+				     MEM_TEST_VAL_2);
+	for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2;
+	     ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk)
+		test_memslot_do_unmap(data, ctr, chunk);
+}
+
+static void test_memslot_unmap_loop(struct vm_data *data,
+				    struct sync_area *sync)
+{
+	test_memslot_unmap_loop_common(data, sync, 1);
+}
+
+static void test_memslot_unmap_loop_chunked(struct vm_data *data,
+					    struct sync_area *sync)
+{
+	test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES);
+}
+
+static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
+{
+	uint64_t gptr;
+
+	for (gptr = MEM_TEST_GPA + 4096 / 2;
+	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096)
+		*(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
+
+	host_perform_sync(sync);
+
+	for (gptr = MEM_TEST_GPA;
+	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) {
+		uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
+		uint64_t val = *vptr;
+
+		TEST_ASSERT(val == MEM_TEST_VAL_1,
+			    "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
+			    val, gptr);
+		*vptr = 0;
+	}
+
+	host_perform_sync(sync);
+}
+
+struct test_data {
+	const char *name;
+	uint64_t mem_size;
+	void (*guest_code)(void);
+	bool (*prepare)(struct vm_data *data, struct sync_area *sync,
+			uint64_t *maxslots);
+	void (*loop)(struct vm_data *data, struct sync_area *sync);
+};
+
+static bool test_execute(int nslots, uint64_t *maxslots,
+			 unsigned int maxtime,
+			 const struct test_data *tdata,
+			 uint64_t *nloops,
+			 struct timespec *slot_runtime,
+			 struct timespec *guest_runtime)
+{
+	uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES;
+	struct vm_data *data;
+	struct sync_area *sync;
+	struct timespec tstart;
+	bool ret = true;
+
+	data = alloc_vm();
+	if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
+			mem_size, slot_runtime)) {
+		ret = false;
+		goto exit_free;
+	}
+
+	sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+
+	if (tdata->prepare &&
+	    !tdata->prepare(data, sync, maxslots)) {
+		ret = false;
+		goto exit_free;
+	}
+
+	launch_vm(data);
+
+	clock_gettime(CLOCK_MONOTONIC, &tstart);
+	let_guest_run(sync);
+
+	while (1) {
+		*guest_runtime = timespec_elapsed(tstart);
+		if (guest_runtime->tv_sec >= maxtime)
+			break;
+
+		tdata->loop(data, sync);
+
+		(*nloops)++;
+	}
+
+	make_guest_exit(sync);
+	wait_guest_exit(data);
+
+exit_free:
+	free_vm(data);
+
+	return ret;
+}
+
+static const struct test_data tests[] = {
+	{
+		.name = "map",
+		.mem_size = MEM_SIZE_MAP_PAGES,
+		.guest_code = guest_code_test_memslot_map,
+		.loop = test_memslot_map_loop,
+	},
+	{
+		.name = "unmap",
+		.mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
+		.guest_code = guest_code_test_memslot_unmap,
+		.loop = test_memslot_unmap_loop,
+	},
+	{
+		.name = "unmap chunked",
+		.mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
+		.guest_code = guest_code_test_memslot_unmap,
+		.loop = test_memslot_unmap_loop_chunked,
+	},
+	{
+		.name = "move active area",
+		.guest_code = guest_code_test_memslot_move,
+		.prepare = test_memslot_move_prepare_active,
+		.loop = test_memslot_move_loop,
+	},
+	{
+		.name = "move inactive area",
+		.guest_code = guest_code_test_memslot_move,
+		.prepare = test_memslot_move_prepare_inactive,
+		.loop = test_memslot_move_loop,
+	},
+	{
+		.name = "RW",
+		.guest_code = guest_code_test_memslot_rw,
+		.loop = test_memslot_rw_loop
+	},
+};
+
+#define NTESTS ARRAY_SIZE(tests)
+
+struct test_args {
+	int tfirst;
+	int tlast;
+	int nslots;
+	int seconds;
+	int runs;
+};
+
+static void help(char *name, struct test_args *targs)
+{
+	int ctr;
+
+	pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
+		name);
+	pr_info(" -h: print this help screen.\n");
+	pr_info(" -v: enable verbose mode (not for benchmarking).\n");
+	pr_info(" -d: enable extra debug checks.\n");
+	pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
+		targs->nslots);
+	pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
+		targs->tfirst, NTESTS - 1);
+	pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
+		targs->tlast, NTESTS - 1);
+	pr_info(" -l: specify the test length in seconds (currently: %i)\n",
+		targs->seconds);
+	pr_info(" -r: specify the number of runs per test (currently: %i)\n",
+		targs->runs);
+
+	pr_info("\nAvailable tests:\n");
+	for (ctr = 0; ctr < NTESTS; ctr++)
+		pr_info("%d: %s\n", ctr, tests[ctr].name);
+}
+
+static bool parse_args(int argc, char *argv[],
+		       struct test_args *targs)
+{
+	int opt;
+
+	while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
+		switch (opt) {
+		case 'h':
+		default:
+			help(argv[0], targs);
+			return false;
+		case 'v':
+			verbose = true;
+			break;
+		case 'd':
+			map_unmap_verify = true;
+			break;
+		case 's':
+			targs->nslots = atoi(optarg);
+			if (targs->nslots <= 0 && targs->nslots != -1) {
+				pr_info("Slot count cap has to be positive or -1 for no cap\n");
+				return false;
+			}
+			break;
+		case 'f':
+			targs->tfirst = atoi(optarg);
+			if (targs->tfirst < 0) {
+				pr_info("First test to run has to be non-negative\n");
+				return false;
+			}
+			break;
+		case 'e':
+			targs->tlast = atoi(optarg);
+			if (targs->tlast < 0 || targs->tlast >= NTESTS) {
+				pr_info("Last test to run has to be non-negative and less than %zu\n",
+					NTESTS);
+				return false;
+			}
+			break;
+		case 'l':
+			targs->seconds = atoi(optarg);
+			if (targs->seconds < 0) {
+				pr_info("Test length in seconds has to be non-negative\n");
+				return false;
+			}
+			break;
+		case 'r':
+			targs->runs = atoi(optarg);
+			if (targs->runs <= 0) {
+				pr_info("Runs per test has to be positive\n");
+				return false;
+			}
+			break;
+		}
+	}
+
+	if (optind < argc) {
+		help(argv[0], targs);
+		return false;
+	}
+
+	if (targs->tfirst > targs->tlast) {
+		pr_info("First test to run cannot be greater than the last test to run\n");
+		return false;
+	}
+
+	return true;
+}
+
+struct test_result {
+	struct timespec slot_runtime, guest_runtime, iter_runtime;
+	int64_t slottimens, runtimens;
+	uint64_t nloops;
+};
+
+static bool test_loop(const struct test_data *data,
+		      const struct test_args *targs,
+		      struct test_result *rbestslottime,
+		      struct test_result *rbestruntime)
+{
+	uint64_t maxslots;
+	struct test_result result;
+
+	result.nloops = 0;
+	if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
+			  &result.nloops,
+			  &result.slot_runtime, &result.guest_runtime)) {
+		if (maxslots)
+			pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
+				maxslots);
+		else
+			pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
+
+		return false;
+	}
+
+	pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
+		result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
+		result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
+	if (!result.nloops) {
+		pr_info("No full loops done - too short test time or system too loaded?\n");
+		return true;
+	}
+
+	result.iter_runtime = timespec_div(result.guest_runtime,
+					   result.nloops);
+	pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
+		result.nloops,
+		result.iter_runtime.tv_sec,
+		result.iter_runtime.tv_nsec);
+	result.slottimens = timespec_to_ns(result.slot_runtime);
+	result.runtimens = timespec_to_ns(result.iter_runtime);
+
+	/*
+	 * Only rank the slot setup time for tests using the whole test memory
+	 * area so they are comparable
+	 */
+	if (!data->mem_size &&
+	    (!rbestslottime->slottimens ||
+	     result.slottimens < rbestslottime->slottimens))
+		*rbestslottime = result;
+	if (!rbestruntime->runtimens ||
+	    result.runtimens < rbestruntime->runtimens)
+		*rbestruntime = result;
+
+	return true;
+}
+
+int main(int argc, char *argv[])
+{
+	struct test_args targs = {
+		.tfirst = 0,
+		.tlast = NTESTS - 1,
+		.nslots = -1,
+		.seconds = 5,
+		.runs = 20,
+	};
+	struct test_result rbestslottime;
+	int tctr;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	if (!parse_args(argc, argv, &targs))
+		return -1;
+
+	rbestslottime.slottimens = 0;
+	for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
+		const struct test_data *data = &tests[tctr];
+		unsigned int runctr;
+		struct test_result rbestruntime;
+
+		if (tctr > targs.tfirst)
+			pr_info("\n");
+
+		pr_info("Testing %s performance with %i runs, %d seconds each\n",
+			data->name, targs.runs, targs.seconds);
+
+		rbestruntime.runtimens = 0;
+		for (runctr = 0; runctr < targs.runs; runctr++)
+			if (!test_loop(data, &targs,
+				       &rbestslottime, &rbestruntime))
+				break;
+
+		if (rbestruntime.runtimens)
+			pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
+				rbestruntime.iter_runtime.tv_sec,
+				rbestruntime.iter_runtime.tv_nsec,
+				rbestruntime.nloops);
+	}
+
+	if (rbestslottime.slottimens)
+		pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
+			rbestslottime.slot_runtime.tv_sec,
+			rbestslottime.slot_runtime.tv_nsec);
+
+	return 0;
+}
-- 
GitLab


From ef4c9f4f654622fa15b7a94a9bd1f19e76bb7feb Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 21 May 2021 17:38:28 +0000
Subject: [PATCH 1677/3804] KVM: selftests: Fix 32-bit truncation of
 vm_get_max_gfn()

vm_get_max_gfn() casts vm->max_gfn from a uint64_t to an unsigned int,
which causes the upper 32-bits of the max_gfn to get truncated.

Nobody noticed until now likely because vm_get_max_gfn() is only used
as a mechanism to create a memslot in an unused region of the guest
physical address space (the top), and the top of the 32-bit physical
address space was always good enough.

This fix reveals a bug in memslot_modification_stress_test which was
trying to create a dummy memslot past the end of guest physical memory.
Fix that by moving the dummy memslot lower.

Fixes: 52200d0d944e ("KVM: selftests: Remove duplicate guest mode handling")
Reviewed-by: Venkatesh Srinivas <venkateshs@chromium.org>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210521173828.1180619-1-dmatlack@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/kvm_util.h |  2 +-
 tools/testing/selftests/kvm/lib/kvm_util.c     |  2 +-
 .../testing/selftests/kvm/lib/perf_test_util.c |  4 +++-
 .../kvm/memslot_modification_stress_test.c     | 18 +++++++++++-------
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index a8f022794ce3c..2e0d253dabd64 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -302,7 +302,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
 
 unsigned int vm_get_page_size(struct kvm_vm *vm);
 unsigned int vm_get_page_shift(struct kvm_vm *vm);
-unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+uint64_t vm_get_max_gfn(struct kvm_vm *vm);
 int vm_get_fd(struct kvm_vm *vm);
 
 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 1255744758e36..ea3f0db85b3e7 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -2117,7 +2117,7 @@ unsigned int vm_get_page_shift(struct kvm_vm *vm)
 	return vm->page_shift;
 }
 
-unsigned int vm_get_max_gfn(struct kvm_vm *vm)
+uint64_t vm_get_max_gfn(struct kvm_vm *vm)
 {
 	return vm->max_gfn;
 }
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 81490b9b4e32a..abf381800a590 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2020, Google LLC.
  */
+#include <inttypes.h>
 
 #include "kvm_util.h"
 #include "perf_test_util.h"
@@ -80,7 +81,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 	 */
 	TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
 		    "Requested more guest memory than address space allows.\n"
-		    "    guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+		    "    guest pages: %" PRIx64 " max gfn: %" PRIx64
+		    " vcpus: %d wss: %" PRIx64 "]\n",
 		    guest_num_pages, vm_get_max_gfn(vm), vcpus,
 		    vcpu_memory_bytes);
 
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 6096bf0a5b34f..98351ba0933cd 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -71,14 +71,22 @@ struct memslot_antagonist_args {
 };
 
 static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
-			      uint64_t nr_modifications, uint64_t gpa)
+			       uint64_t nr_modifications)
 {
+	const uint64_t pages = 1;
+	uint64_t gpa;
 	int i;
 
+	/*
+	 * Add the dummy memslot just below the perf_test_util memslot, which is
+	 * at the top of the guest physical address space.
+	 */
+	gpa = guest_test_phys_mem - pages * vm_get_page_size(vm);
+
 	for (i = 0; i < nr_modifications; i++) {
 		usleep(delay);
 		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa,
-					    DUMMY_MEMSLOT_INDEX, 1, 0);
+					    DUMMY_MEMSLOT_INDEX, pages, 0);
 
 		vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX);
 	}
@@ -120,11 +128,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	pr_info("Started all vCPUs\n");
 
 	add_remove_memslot(vm, p->memslot_modification_delay,
-			   p->nr_memslot_modifications,
-			   guest_test_phys_mem +
-			   (guest_percpu_mem_size * nr_vcpus) +
-			   perf_test_args.host_page_size +
-			   perf_test_args.guest_page_size);
+			   p->nr_memslot_modifications);
 
 	run_vcpus = false;
 
-- 
GitLab


From 50bc913d526beb9937f1eb0159ec63c43234f961 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 19 May 2021 21:13:45 +0000
Subject: [PATCH 1678/3804] KVM: selftests: Ignore CPUID.0DH.1H in
 get_cpuid_test

Similar to CPUID.0DH.0H this entry depends on the vCPU's XCR0 register
and IA32_XSS MSR. Since this test does not control for either before
assigning the vCPU's CPUID, these entries will not necessarily match
the supported CPUID exposed by KVM.

This fixes get_cpuid_test on Cascade Lake CPUs.

Suggested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210519211345.3944063-1-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/get_cpuid_test.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
index 9b78e88896385..8c77537af5a1c 100644
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
@@ -19,7 +19,12 @@ struct {
 	u32 function;
 	u32 index;
 } mangled_cpuids[] = {
+	/*
+	 * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR,
+	 * which are not controlled for by this test.
+	 */
 	{.function = 0xd, .index = 0},
+	{.function = 0xd, .index = 1},
 };
 
 static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
-- 
GitLab


From a10453c038a7e97169185405242d20d21de0bb91 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 14 May 2021 23:05:21 +0000
Subject: [PATCH 1679/3804] KVM: selftests: Fix hang in hardware_disable_test

If /dev/kvm is not available then hardware_disable_test will hang
indefinitely because the child process exits before posting to the
semaphore for which the parent is waiting.

Fix this by making the parent periodically check if the child has
exited. We have to be careful to forward the child's exit status to
preserve a KSFT_SKIP status.

I considered just checking for /dev/kvm before creating the child
process, but there are so many other reasons why the child could exit
early that it seemed better to handle that as general case.

Tested:

$ ./hardware_disable_test
/dev/kvm not available, skipping test
$ echo $?
4
$ modprobe kvm_intel
$ ./hardware_disable_test
$ echo $?
0

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210514230521.2608768-1-dmatlack@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/hardware_disable_test.c     | 32 ++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index 5aadf84c91c04..4b8db3bce6102 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -132,6 +132,36 @@ static void run_test(uint32_t run)
 	TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
 }
 
+void wait_for_child_setup(pid_t pid)
+{
+	/*
+	 * Wait for the child to post to the semaphore, but wake up periodically
+	 * to check if the child exited prematurely.
+	 */
+	for (;;) {
+		const struct timespec wait_period = { .tv_sec = 1 };
+		int status;
+
+		if (!sem_timedwait(sem, &wait_period))
+			return;
+
+		/* Child is still running, keep waiting. */
+		if (pid != waitpid(pid, &status, WNOHANG))
+			continue;
+
+		/*
+		 * Child is no longer running, which is not expected.
+		 *
+		 * If it exited with a non-zero status, we explicitly forward
+		 * the child's status in case it exited with KSFT_SKIP.
+		 */
+		if (WIFEXITED(status))
+			exit(WEXITSTATUS(status));
+		else
+			TEST_ASSERT(false, "Child exited unexpectedly");
+	}
+}
+
 int main(int argc, char **argv)
 {
 	uint32_t i;
@@ -148,7 +178,7 @@ int main(int argc, char **argv)
 			run_test(i); /* This function always exits */
 
 		pr_debug("%s: [%d] waiting semaphore\n", __func__, i);
-		sem_wait(sem);
+		wait_for_child_setup(pid);
 		r = (rand() % DELAY_US_MAX) + 1;
 		pr_debug("%s: [%d] waiting %dus\n", __func__, i, r);
 		usleep(r);
-- 
GitLab


From c887d6a126dfc50b27872527615dd46cb3d96bc1 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Wed, 19 May 2021 13:03:30 -0700
Subject: [PATCH 1680/3804] KVM: selftests: trivial comment/logging fixes

Some trivial fixes I found while touching related code in this series,
factored out into a separate commit for easier reviewing:

- s/gor/got/ and add a newline in demand_paging_test.c
- s/backing_src/src_type/ in a comment to be consistent with the real
  function signature in kvm_util.c

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-2-axelrasmussen@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c | 2 +-
 tools/testing/selftests/kvm/lib/kvm_util.c       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 5f7a229c3af10..9398ba6ef023d 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -169,7 +169,7 @@ static void *uffd_handler_thread_fn(void *arg)
 		if (r == -1) {
 			if (errno == EAGAIN)
 				continue;
-			pr_info("Read of uffd gor errno %d", errno);
+			pr_info("Read of uffd got errno %d\n", errno);
 			return NULL;
 		}
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index ea3f0db85b3e7..f4484e1edcfaf 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -731,8 +731,8 @@ static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
  *
  * Input Args:
  *   vm - Virtual Machine
- *   backing_src - Storage source for this region.
- *                 NULL to use anonymous memory.
+ *   src_type - Storage source for this region.
+ *              NULL to use anonymous memory.
  *   guest_paddr - Starting guest physical address
  *   slot - KVM region slot
  *   npages - Number of physical pages
-- 
GitLab


From 2aab4b355cbbe1deacfd9349729c43509042b557 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Tue, 11 May 2021 20:21:20 +0000
Subject: [PATCH 1681/3804] KVM: selftests: Print a message if /dev/kvm is
 missing

If a KVM selftest is run on a machine without /dev/kvm, it will exit
silently. Make it easy to tell what's happening by printing an error
message.

Opportunistically consolidate all codepaths that open /dev/kvm into a
single function so they all print the same message.

This slightly changes the semantics of vm_is_unrestricted_guest() by
changing a TEST_ASSERT() to exit(KSFT_SKIP). However
vm_is_unrestricted_guest() is only called in one place
(x86_64/mmio_warning_test.c) and that is to determine if the test should
be skipped or not.

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210511202120.1371800-1-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/include/kvm_util.h  |  1 +
 tools/testing/selftests/kvm/lib/kvm_util.c    | 46 +++++++++++++------
 .../selftests/kvm/lib/x86_64/processor.c      | 16 ++-----
 .../kvm/x86_64/get_msr_index_features.c       |  8 +---
 4 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 2e0d253dabd64..5d9b35d092517 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -77,6 +77,7 @@ struct vm_guest_mode_params {
 };
 extern const struct vm_guest_mode_params vm_guest_mode_params[];
 
+int open_kvm_dev_path_or_exit(void);
 int kvm_check_cap(long cap);
 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
 int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index f4484e1edcfaf..d00e49b73d689 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -31,6 +31,34 @@ static void *align(void *x, size_t size)
 	return (void *) (((size_t) x + mask) & ~mask);
 }
 
+/*
+ * Open KVM_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Input Args:
+ *   flags - The flags to pass when opening KVM_DEV_PATH.
+ *
+ * Return:
+ *   The opened file descriptor of /dev/kvm.
+ */
+static int _open_kvm_dev_path_or_exit(int flags)
+{
+	int fd;
+
+	fd = open(KVM_DEV_PATH, flags);
+	if (fd < 0) {
+		print_skip("%s not available, is KVM loaded? (errno: %d)",
+			   KVM_DEV_PATH, errno);
+		exit(KSFT_SKIP);
+	}
+
+	return fd;
+}
+
+int open_kvm_dev_path_or_exit(void)
+{
+	return _open_kvm_dev_path_or_exit(O_RDONLY);
+}
+
 /*
  * Capability
  *
@@ -52,10 +80,7 @@ int kvm_check_cap(long cap)
 	int ret;
 	int kvm_fd;
 
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
-
+	kvm_fd = open_kvm_dev_path_or_exit();
 	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
 	TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
 		"  rc: %i errno: %i", ret, errno);
@@ -128,9 +153,7 @@ void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
 
 static void vm_open(struct kvm_vm *vm, int perm)
 {
-	vm->kvm_fd = open(KVM_DEV_PATH, perm);
-	if (vm->kvm_fd < 0)
-		exit(KSFT_SKIP);
+	vm->kvm_fd = _open_kvm_dev_path_or_exit(perm);
 
 	if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
 		print_skip("immediate_exit not available");
@@ -996,9 +1019,7 @@ static int vcpu_mmap_sz(void)
 {
 	int dev_fd, ret;
 
-	dev_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (dev_fd < 0)
-		exit(KSFT_SKIP);
+	dev_fd = open_kvm_dev_path_or_exit();
 
 	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
 	TEST_ASSERT(ret >= sizeof(struct kvm_run),
@@ -2091,10 +2112,7 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm)
 
 	if (vm == NULL) {
 		/* Ensure that the KVM vendor-specific module is loaded. */
-		f = fopen(KVM_DEV_PATH, "r");
-		TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
-			    errno);
-		fclose(f);
+		close(open_kvm_dev_path_or_exit());
 	}
 
 	f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index a8906e60a1081..efe2350444213 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -657,9 +657,7 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
 		return cpuid;
 
 	cpuid = allocate_kvm_cpuid2();
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
+	kvm_fd = open_kvm_dev_path_or_exit();
 
 	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
 	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
@@ -691,9 +689,7 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index)
 
 	buffer.header.nmsrs = 1;
 	buffer.entry.index = msr_index;
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
+	kvm_fd = open_kvm_dev_path_or_exit();
 
 	r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
 	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
@@ -986,9 +982,7 @@ struct kvm_msr_list *kvm_get_msr_index_list(void)
 	struct kvm_msr_list *list;
 	int nmsrs, r, kvm_fd;
 
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
+	kvm_fd = open_kvm_dev_path_or_exit();
 
 	nmsrs = kvm_get_num_msrs_fd(kvm_fd);
 	list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
@@ -1312,9 +1306,7 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
 		return cpuid;
 
 	cpuid = allocate_kvm_cpuid2();
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
+	kvm_fd = open_kvm_dev_path_or_exit();
 
 	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
 	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n",
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
index cb953df4d7d0a..8aed0db1331d1 100644
--- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
+++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
@@ -37,9 +37,7 @@ static void test_get_msr_index(void)
 	int old_res, res, kvm_fd, r;
 	struct kvm_msr_list *list;
 
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
+	kvm_fd = open_kvm_dev_path_or_exit();
 
 	old_res = kvm_num_index_msrs(kvm_fd, 0);
 	TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
@@ -101,9 +99,7 @@ static void test_get_msr_feature(void)
 	int res, old_res, i, kvm_fd;
 	struct kvm_msr_list *feature_list;
 
-	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
-	if (kvm_fd < 0)
-		exit(KSFT_SKIP);
+	kvm_fd = open_kvm_dev_path_or_exit();
 
 	old_res = kvm_num_feature_msrs(kvm_fd, 0);
 	TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
-- 
GitLab


From 25408e5a0246048e3e36d2cd513565ebcc481f51 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Wed, 19 May 2021 13:03:31 -0700
Subject: [PATCH 1682/3804] KVM: selftests: simplify setup_demand_paging error
 handling

A small cleanup. Our caller writes:

  r = setup_demand_paging(...);
  if (r < 0) exit(-r);

Since we're just going to exit anyway, instead of returning an error we
can just re-use TEST_ASSERT. This makes the caller simpler, as well as
the function itself - no need to write our branches, etc.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-3-axelrasmussen@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/demand_paging_test.c        | 50 +++++++------------
 1 file changed, 18 insertions(+), 32 deletions(-)

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 9398ba6ef023d..8ce53488d6aff 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -9,6 +9,7 @@
 
 #define _GNU_SOURCE /* for pipe2 */
 
+#include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
@@ -198,42 +199,32 @@ static void *uffd_handler_thread_fn(void *arg)
 	return NULL;
 }
 
-static int setup_demand_paging(struct kvm_vm *vm,
-			       pthread_t *uffd_handler_thread, int pipefd,
-			       useconds_t uffd_delay,
-			       struct uffd_handler_args *uffd_args,
-			       void *hva, uint64_t len)
+static void setup_demand_paging(struct kvm_vm *vm,
+				pthread_t *uffd_handler_thread, int pipefd,
+				useconds_t uffd_delay,
+				struct uffd_handler_args *uffd_args,
+				void *hva, uint64_t len)
 {
 	int uffd;
 	struct uffdio_api uffdio_api;
 	struct uffdio_register uffdio_register;
 
 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
-	if (uffd == -1) {
-		pr_info("uffd creation failed\n");
-		return -1;
-	}
+	TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
 
 	uffdio_api.api = UFFD_API;
 	uffdio_api.features = 0;
-	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
-		pr_info("ioctl uffdio_api failed\n");
-		return -1;
-	}
+	TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
+		    "ioctl UFFDIO_API failed: %" PRIu64,
+		    (uint64_t)uffdio_api.api);
 
 	uffdio_register.range.start = (uint64_t)hva;
 	uffdio_register.range.len = len;
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
-	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
-		pr_info("ioctl uffdio_register failed\n");
-		return -1;
-	}
-
-	if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
-			UFFD_API_RANGE_IOCTLS) {
-		pr_info("unexpected userfaultfd ioctl set\n");
-		return -1;
-	}
+	TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
+		    "ioctl UFFDIO_REGISTER failed");
+	TEST_ASSERT((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) ==
+		    UFFD_API_RANGE_IOCTLS, "unexpected userfaultfd ioctl set");
 
 	uffd_args->uffd = uffd;
 	uffd_args->pipefd = pipefd;
@@ -243,8 +234,6 @@ static int setup_demand_paging(struct kvm_vm *vm,
 
 	PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
 		       hva, hva + len);
-
-	return 0;
 }
 
 struct test_params {
@@ -321,13 +310,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 				  O_CLOEXEC | O_NONBLOCK);
 			TEST_ASSERT(!r, "Failed to set up pipefd");
 
-			r = setup_demand_paging(vm,
-						&uffd_handler_threads[vcpu_id],
-						pipefds[vcpu_id * 2],
-						p->uffd_delay, &uffd_args[vcpu_id],
-						vcpu_hva, vcpu_mem_size);
-			if (r < 0)
-				exit(-r);
+			setup_demand_paging(vm, &uffd_handler_threads[vcpu_id],
+					    pipefds[vcpu_id * 2], p->uffd_delay,
+					    &uffd_args[vcpu_id], vcpu_hva,
+					    vcpu_mem_size);
 		}
 	}
 
-- 
GitLab


From 32ffa4f71e10009498ae6b54da65ab316db967bd Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Wed, 19 May 2021 13:03:33 -0700
Subject: [PATCH 1683/3804] KVM: selftests: compute correct demand paging size

This is a preparatory commit needed before we can use different kinds of
backing pages for guest memory.

Previously, we used perf_test_args.host_page_size, which is the host's
native page size (commonly 4K). For VM_MEM_SRC_ANONYMOUS this turns out
to be okay, but in a follow-up commit we want to allow using different
kinds of backing memory.

Take VM_MEM_SRC_ANONYMOUS_HUGETLB for example. Without this change, if
we used that backing page type, when we issued a UFFDIO_COPY ioctl we'd
only do so with 4K, rather than the full 2M of a backing hugepage. In
this case, UFFDIO_COPY returns -EINVAL (__mcopy_atomic_hugetlb checks
the size).

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-5-axelrasmussen@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 8ce53488d6aff..e6582f504c0f6 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -39,6 +39,7 @@
 
 static int nr_vcpus = 1;
 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static size_t demand_paging_size;
 static char *guest_data_prototype;
 
 static void *vcpu_worker(void *data)
@@ -84,7 +85,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
 
 	copy.src = (uint64_t)guest_data_prototype;
 	copy.dst = addr;
-	copy.len = perf_test_args.host_page_size;
+	copy.len = demand_paging_size;
 	copy.mode = 0;
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
@@ -101,7 +102,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
 	PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
 		       timespec_to_ns(ts_diff));
 	PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
-		       perf_test_args.host_page_size, addr, tid);
+		       demand_paging_size, addr, tid);
 
 	return 0;
 }
@@ -260,10 +261,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	perf_test_args.wr_fract = 1;
 
-	guest_data_prototype = malloc(perf_test_args.host_page_size);
+	demand_paging_size = get_backing_src_pagesz(VM_MEM_SRC_ANONYMOUS);
+
+	guest_data_prototype = malloc(demand_paging_size);
 	TEST_ASSERT(guest_data_prototype,
 		    "Failed to allocate buffer for guest data pattern");
-	memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
+	memset(guest_data_prototype, 0xAB, demand_paging_size);
 
 	vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
 	TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-- 
GitLab


From 0368c2c1b422c94968b5286f289aed7fe6af93c2 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Wed, 19 May 2021 13:03:34 -0700
Subject: [PATCH 1684/3804] KVM: selftests: allow different backing source
 types

Add an argument which lets us specify a different backing memory type
for the test. The default is just to use anonymous, matching existing
behavior.

This is in preparation for testing UFFD minor faults. For that, we'll
need to use a new backing memory type which is setup with MAP_SHARED.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-6-axelrasmussen@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index e6582f504c0f6..8c03484a57848 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -240,6 +240,7 @@ static void setup_demand_paging(struct kvm_vm *vm,
 struct test_params {
 	bool use_uffd;
 	useconds_t uffd_delay;
+	enum vm_mem_backing_src_type src_type;
 	bool partition_vcpu_memory_access;
 };
 
@@ -257,11 +258,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	int r;
 
 	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
-				 VM_MEM_SRC_ANONYMOUS);
+				 p->src_type);
 
 	perf_test_args.wr_fract = 1;
 
-	demand_paging_size = get_backing_src_pagesz(VM_MEM_SRC_ANONYMOUS);
+	demand_paging_size = get_backing_src_pagesz(p->src_type);
 
 	guest_data_prototype = malloc(demand_paging_size);
 	TEST_ASSERT(guest_data_prototype,
@@ -377,7 +378,7 @@ static void help(char *name)
 {
 	puts("");
 	printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
-	       "          [-b memory] [-v vcpus] [-o]\n", name);
+	       "          [-b memory] [-t type] [-v vcpus] [-o]\n", name);
 	guest_modes_help();
 	printf(" -u: use User Fault FD to handle vCPU page\n"
 	       "     faults.\n");
@@ -387,6 +388,8 @@ static void help(char *name)
 	printf(" -b: specify the size of the memory region which should be\n"
 	       "     demand paged by each vCPU. e.g. 10M or 3G.\n"
 	       "     Default: 1G\n");
+	printf(" -t: The type of backing memory to use. Default: anonymous\n");
+	backing_src_help();
 	printf(" -v: specify the number of vCPUs to run.\n");
 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
 	       "     them into a separate region of memory for each vCPU.\n");
@@ -398,13 +401,14 @@ int main(int argc, char *argv[])
 {
 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
 	struct test_params p = {
+		.src_type = VM_MEM_SRC_ANONYMOUS,
 		.partition_vcpu_memory_access = true,
 	};
 	int opt;
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "hm:ud:b:v:o")) != -1) {
+	while ((opt = getopt(argc, argv, "hm:ud:b:t:v:o")) != -1) {
 		switch (opt) {
 		case 'm':
 			guest_modes_cmdline(optarg);
@@ -419,6 +423,9 @@ int main(int argc, char *argv[])
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
+		case 't':
+			p.src_type = parse_backing_src_type(optarg);
+			break;
 		case 'v':
 			nr_vcpus = atoi(optarg);
 			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
-- 
GitLab


From b3784bc28ccc0d9b44d265a1d947c8766295ba00 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Wed, 19 May 2021 13:03:35 -0700
Subject: [PATCH 1685/3804] KVM: selftests: refactor vm_mem_backing_src_type
 flags

Each struct vm_mem_backing_src_alias has a flags field, which denotes
the flags used to mmap() an area of that type. Previously, this field
never included MAP_PRIVATE | MAP_ANONYMOUS, because
vm_userspace_mem_region_add assumed that *all* types would always use
those flags, and so it hardcoded them.

In a follow-up commit, we'll add a new type: shmem. Areas of this type
must not have MAP_PRIVATE | MAP_ANONYMOUS, and instead they must have
MAP_SHARED.

So, refactor things. Make it so that the flags field of
struct vm_mem_backing_src_alias really is a complete set of flags, and
don't add in any extras in vm_userspace_mem_region_add. This will let us
easily tack on shmem.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-7-axelrasmussen@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c  |  3 +-
 tools/testing/selftests/kvm/lib/test_util.c | 35 +++++++++++----------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index d00e49b73d689..491be22b410c8 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -849,8 +849,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 
 	region->mmap_start = mmap(NULL, region->mmap_size,
 				  PROT_READ | PROT_WRITE,
-				  MAP_PRIVATE | MAP_ANONYMOUS
-				  | vm_mem_backing_src_alias(src_type)->flag,
+				  vm_mem_backing_src_alias(src_type)->flag,
 				  -1, 0);
 	TEST_ASSERT(region->mmap_start != MAP_FAILED,
 		    "test_malloc failed, mmap_start: %p errno: %i",
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 63d2bc7d757b4..06ddde0687368 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -168,70 +168,73 @@ size_t get_def_hugetlb_pagesz(void)
 
 const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
 {
+	static const int anon_flags = MAP_PRIVATE | MAP_ANONYMOUS;
+	static const int anon_huge_flags = anon_flags | MAP_HUGETLB;
+
 	static const struct vm_mem_backing_src_alias aliases[] = {
 		[VM_MEM_SRC_ANONYMOUS] = {
 			.name = "anonymous",
-			.flag = 0,
+			.flag = anon_flags,
 		},
 		[VM_MEM_SRC_ANONYMOUS_THP] = {
 			.name = "anonymous_thp",
-			.flag = 0,
+			.flag = anon_flags,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
 			.name = "anonymous_hugetlb",
-			.flag = MAP_HUGETLB,
+			.flag = anon_huge_flags,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
 			.name = "anonymous_hugetlb_16kb",
-			.flag = MAP_HUGETLB | MAP_HUGE_16KB,
+			.flag = anon_huge_flags | MAP_HUGE_16KB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
 			.name = "anonymous_hugetlb_64kb",
-			.flag = MAP_HUGETLB | MAP_HUGE_64KB,
+			.flag = anon_huge_flags | MAP_HUGE_64KB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
 			.name = "anonymous_hugetlb_512kb",
-			.flag = MAP_HUGETLB | MAP_HUGE_512KB,
+			.flag = anon_huge_flags | MAP_HUGE_512KB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
 			.name = "anonymous_hugetlb_1mb",
-			.flag = MAP_HUGETLB | MAP_HUGE_1MB,
+			.flag = anon_huge_flags | MAP_HUGE_1MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
 			.name = "anonymous_hugetlb_2mb",
-			.flag = MAP_HUGETLB | MAP_HUGE_2MB,
+			.flag = anon_huge_flags | MAP_HUGE_2MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
 			.name = "anonymous_hugetlb_8mb",
-			.flag = MAP_HUGETLB | MAP_HUGE_8MB,
+			.flag = anon_huge_flags | MAP_HUGE_8MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
 			.name = "anonymous_hugetlb_16mb",
-			.flag = MAP_HUGETLB | MAP_HUGE_16MB,
+			.flag = anon_huge_flags | MAP_HUGE_16MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
 			.name = "anonymous_hugetlb_32mb",
-			.flag = MAP_HUGETLB | MAP_HUGE_32MB,
+			.flag = anon_huge_flags | MAP_HUGE_32MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
 			.name = "anonymous_hugetlb_256mb",
-			.flag = MAP_HUGETLB | MAP_HUGE_256MB,
+			.flag = anon_huge_flags | MAP_HUGE_256MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
 			.name = "anonymous_hugetlb_512mb",
-			.flag = MAP_HUGETLB | MAP_HUGE_512MB,
+			.flag = anon_huge_flags | MAP_HUGE_512MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
 			.name = "anonymous_hugetlb_1gb",
-			.flag = MAP_HUGETLB | MAP_HUGE_1GB,
+			.flag = anon_huge_flags | MAP_HUGE_1GB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
 			.name = "anonymous_hugetlb_2gb",
-			.flag = MAP_HUGETLB | MAP_HUGE_2GB,
+			.flag = anon_huge_flags | MAP_HUGE_2GB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
 			.name = "anonymous_hugetlb_16gb",
-			.flag = MAP_HUGETLB | MAP_HUGE_16GB,
+			.flag = anon_huge_flags | MAP_HUGE_16GB,
 		},
 	};
 	_Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
-- 
GitLab


From c9befd5958fdf8913db69049d47b6ac1d970af03 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Wed, 19 May 2021 13:03:36 -0700
Subject: [PATCH 1686/3804] KVM: selftests: add shmem backing source type

This lets us run the demand paging test on top of a shmem-backed area.
In follow-up commits, we'll 1) leverage this new capability to create an
alias mapping, and then 2) use the alias mapping to exercise UFFD minor
faults.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-8-axelrasmussen@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/test_util.h |  1 +
 tools/testing/selftests/kvm/lib/kvm_util.c      | 17 ++++++++++++++++-
 tools/testing/selftests/kvm/lib/test_util.c     |  5 +++++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index fade3130eb01e..7377f00469ef4 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -84,6 +84,7 @@ enum vm_mem_backing_src_type {
 	VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB,
 	VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB,
 	VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
+	VM_MEM_SRC_SHMEM,
 	NUM_SRC_TYPES,
 };
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 491be22b410c8..bc50ca6390d30 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -847,10 +847,25 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	if (alignment > 1)
 		region->mmap_size += alignment;
 
+	region->fd = -1;
+	if (src_type == VM_MEM_SRC_SHMEM) {
+		region->fd = memfd_create("kvm_selftest", MFD_CLOEXEC);
+		TEST_ASSERT(region->fd != -1,
+			    "memfd_create failed, errno: %i", errno);
+
+		ret = ftruncate(region->fd, region->mmap_size);
+		TEST_ASSERT(ret == 0, "ftruncate failed, errno: %i", errno);
+
+		ret = fallocate(region->fd,
+				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
+				region->mmap_size);
+		TEST_ASSERT(ret == 0, "fallocate failed, errno: %i", errno);
+	}
+
 	region->mmap_start = mmap(NULL, region->mmap_size,
 				  PROT_READ | PROT_WRITE,
 				  vm_mem_backing_src_alias(src_type)->flag,
-				  -1, 0);
+				  region->fd, 0);
 	TEST_ASSERT(region->mmap_start != MAP_FAILED,
 		    "test_malloc failed, mmap_start: %p errno: %i",
 		    region->mmap_start, errno);
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 06ddde0687368..c7a265da50904 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -236,6 +236,10 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
 			.name = "anonymous_hugetlb_16gb",
 			.flag = anon_huge_flags | MAP_HUGE_16GB,
 		},
+		[VM_MEM_SRC_SHMEM] = {
+			.name = "shmem",
+			.flag = MAP_SHARED,
+		},
 	};
 	_Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
 		       "Missing new backing src types?");
@@ -253,6 +257,7 @@ size_t get_backing_src_pagesz(uint32_t i)
 
 	switch (i) {
 	case VM_MEM_SRC_ANONYMOUS:
+	case VM_MEM_SRC_SHMEM:
 		return getpagesize();
 	case VM_MEM_SRC_ANONYMOUS_THP:
 		return get_trans_hugepagesz();
-- 
GitLab


From 94f3f2b31a8a9e8bd30bf6f4903ff84acc612e0e Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Wed, 19 May 2021 13:03:37 -0700
Subject: [PATCH 1687/3804] KVM: selftests: create alias mappings when using
 shared memory

When a memory region is added with a src_type specifying that it should
use some kind of shared memory, also create an alias mapping to the same
underlying physical pages.

And, add an API so tests can get access to these alias addresses.
Basically, for a guest physical address, let us look up the analogous
host *alias* address.

In a future commit, we'll modify the demand paging test to take
advantage of this to exercise UFFD minor faults. The idea is, we
pre-fault the underlying pages *via the alias*. When the *guest*
faults, it gets a "minor" fault (PTEs don't exist yet, but a page is
already in the page cache). Then, the userfaultfd theads can handle the
fault: they could potentially modify the underlying memory *via the
alias* if they wanted to, and then they install the PTEs and let the
guest carry on via a UFFDIO_CONTINUE ioctl.

Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-9-axelrasmussen@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/include/kvm_util.h  |  1 +
 tools/testing/selftests/kvm/lib/kvm_util.c    | 49 +++++++++++++++++++
 .../selftests/kvm/lib/kvm_util_internal.h     |  2 +
 3 files changed, 52 insertions(+)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 5d9b35d092517..fcd8e3855111c 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -147,6 +147,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
 
 /*
  * Address Guest Virtual to Guest Physical
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index bc50ca6390d30..f627807191763 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -903,6 +903,19 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 	vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
 	vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
 	hash_add(vm->regions.slot_hash, &region->slot_node, slot);
+
+	/* If shared memory, create an alias. */
+	if (region->fd >= 0) {
+		region->mmap_alias = mmap(NULL, region->mmap_size,
+					  PROT_READ | PROT_WRITE,
+					  vm_mem_backing_src_alias(src_type)->flag,
+					  region->fd, 0);
+		TEST_ASSERT(region->mmap_alias != MAP_FAILED,
+			    "mmap of alias failed, errno: %i", errno);
+
+		/* Align host alias address */
+		region->host_alias = align(region->mmap_alias, alignment);
+	}
 }
 
 /*
@@ -1333,6 +1346,42 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
 	return -1;
 }
 
+/*
+ * Address VM physical to Host Virtual *alias*.
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent address within the host virtual *alias* area, or NULL
+ *   (without failing the test) if the guest memory is not shared (so
+ *   no alias exists).
+ *
+ * When vm_create() and related functions are called with a shared memory
+ * src_type, we also create a writable, shared alias mapping of the
+ * underlying guest memory. This allows the host to manipulate guest memory
+ * without mapping that memory in the guest's address space. And, for
+ * userfaultfd-based demand paging, we can do so without triggering userfaults.
+ */
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+	struct userspace_mem_region *region;
+	uintptr_t offset;
+
+	region = userspace_mem_region_find(vm, gpa, gpa);
+	if (!region)
+		return NULL;
+
+	if (!region->host_alias)
+		return NULL;
+
+	offset = gpa - region->region.guest_phys_addr;
+	return (void *) ((uintptr_t) region->host_alias + offset);
+}
+
 /*
  * VM Create IRQ Chip
  *
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index b30e8c7b119b8..a03febc24ba63 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -19,7 +19,9 @@ struct userspace_mem_region {
 	int fd;
 	off_t offset;
 	void *host_mem;
+	void *host_alias;
 	void *mmap_start;
+	void *mmap_alias;
 	size_t mmap_size;
 	struct rb_node gpa_node;
 	struct rb_node hva_node;
-- 
GitLab


From a4b9722a5996017264feb19ebe86efe4380f7afb Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Wed, 19 May 2021 13:03:38 -0700
Subject: [PATCH 1688/3804] KVM: selftests: allow using UFFD minor faults for
 demand paging

UFFD handling of MINOR faults is a new feature whose use case is to
speed up demand paging (compared to MISSING faults). So, it's
interesting to let this selftest exercise this new mode.

Modify the demand paging test to have the option of using UFFD minor
faults, as opposed to missing faults. Now, when turning on userfaultfd
with '-u', the desired mode has to be specified ("MISSING" or "MINOR").

If we're in minor mode, before registering, prefault via the *alias*.
This way, the guest will trigger minor faults, instead of missing
faults, and we can UFFDIO_CONTINUE to resolve them.

Modify the page fault handler function to use the right ioctl depending
on the mode we're running in. In MINOR mode, use UFFDIO_CONTINUE.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-10-axelrasmussen@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/demand_paging_test.c        | 112 ++++++++++++------
 1 file changed, 79 insertions(+), 33 deletions(-)

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 8c03484a57848..fcba527c29a68 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -73,33 +73,48 @@ static void *vcpu_worker(void *data)
 	return NULL;
 }
 
-static int handle_uffd_page_request(int uffd, uint64_t addr)
+static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
 {
-	pid_t tid;
+	pid_t tid = syscall(__NR_gettid);
 	struct timespec start;
 	struct timespec ts_diff;
-	struct uffdio_copy copy;
 	int r;
 
-	tid = syscall(__NR_gettid);
+	clock_gettime(CLOCK_MONOTONIC, &start);
 
-	copy.src = (uint64_t)guest_data_prototype;
-	copy.dst = addr;
-	copy.len = demand_paging_size;
-	copy.mode = 0;
+	if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
+		struct uffdio_copy copy;
 
-	clock_gettime(CLOCK_MONOTONIC, &start);
+		copy.src = (uint64_t)guest_data_prototype;
+		copy.dst = addr;
+		copy.len = demand_paging_size;
+		copy.mode = 0;
+
+		r = ioctl(uffd, UFFDIO_COPY, &copy);
+		if (r == -1) {
+			pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
+				addr, tid, errno);
+			return r;
+		}
+	} else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
+		struct uffdio_continue cont = {0};
+
+		cont.range.start = addr;
+		cont.range.len = demand_paging_size;
 
-	r = ioctl(uffd, UFFDIO_COPY, &copy);
-	if (r == -1) {
-		pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
-			addr, tid, errno);
-		return r;
+		r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
+		if (r == -1) {
+			pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
+				addr, tid, errno);
+			return r;
+		}
+	} else {
+		TEST_FAIL("Invalid uffd mode %d", uffd_mode);
 	}
 
 	ts_diff = timespec_elapsed(start);
 
-	PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
+	PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
 		       timespec_to_ns(ts_diff));
 	PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
 		       demand_paging_size, addr, tid);
@@ -110,6 +125,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
 bool quit_uffd_thread;
 
 struct uffd_handler_args {
+	int uffd_mode;
 	int uffd;
 	int pipefd;
 	useconds_t delay;
@@ -186,7 +202,7 @@ static void *uffd_handler_thread_fn(void *arg)
 		if (delay)
 			usleep(delay);
 		addr =  msg.arg.pagefault.address;
-		r = handle_uffd_page_request(uffd, addr);
+		r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
 		if (r < 0)
 			return NULL;
 		pages++;
@@ -202,13 +218,32 @@ static void *uffd_handler_thread_fn(void *arg)
 
 static void setup_demand_paging(struct kvm_vm *vm,
 				pthread_t *uffd_handler_thread, int pipefd,
-				useconds_t uffd_delay,
+				int uffd_mode, useconds_t uffd_delay,
 				struct uffd_handler_args *uffd_args,
-				void *hva, uint64_t len)
+				void *hva, void *alias, uint64_t len)
 {
+	bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
 	int uffd;
 	struct uffdio_api uffdio_api;
 	struct uffdio_register uffdio_register;
+	uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
+
+	PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
+		       is_minor ? "MINOR" : "MISSING",
+		       is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+
+	/* In order to get minor faults, prefault via the alias. */
+	if (is_minor) {
+		size_t p;
+
+		expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
+
+		TEST_ASSERT(alias != NULL, "Alias required for minor faults");
+		for (p = 0; p < (len / demand_paging_size); ++p) {
+			memcpy(alias + (p * demand_paging_size),
+			       guest_data_prototype, demand_paging_size);
+		}
+	}
 
 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 	TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
@@ -221,12 +256,13 @@ static void setup_demand_paging(struct kvm_vm *vm,
 
 	uffdio_register.range.start = (uint64_t)hva;
 	uffdio_register.range.len = len;
-	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	uffdio_register.mode = uffd_mode;
 	TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
 		    "ioctl UFFDIO_REGISTER failed");
-	TEST_ASSERT((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) ==
-		    UFFD_API_RANGE_IOCTLS, "unexpected userfaultfd ioctl set");
+	TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
+		    expected_ioctls, "missing userfaultfd ioctls");
 
+	uffd_args->uffd_mode = uffd_mode;
 	uffd_args->uffd = uffd;
 	uffd_args->pipefd = pipefd;
 	uffd_args->delay = uffd_delay;
@@ -238,7 +274,7 @@ static void setup_demand_paging(struct kvm_vm *vm,
 }
 
 struct test_params {
-	bool use_uffd;
+	int uffd_mode;
 	useconds_t uffd_delay;
 	enum vm_mem_backing_src_type src_type;
 	bool partition_vcpu_memory_access;
@@ -275,7 +311,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
 			      p->partition_vcpu_memory_access);
 
-	if (p->use_uffd) {
+	if (p->uffd_mode) {
 		uffd_handler_threads =
 			malloc(nr_vcpus * sizeof(*uffd_handler_threads));
 		TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
@@ -289,6 +325,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
 			vm_paddr_t vcpu_gpa;
 			void *vcpu_hva;
+			void *vcpu_alias;
 			uint64_t vcpu_mem_size;
 
 
@@ -303,8 +340,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
 				       vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
 
-			/* Cache the HVA pointer of the region */
+			/* Cache the host addresses of the region */
 			vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+			vcpu_alias = addr_gpa2alias(vm, vcpu_gpa);
 
 			/*
 			 * Set up user fault fd to handle demand paging
@@ -315,8 +353,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			TEST_ASSERT(!r, "Failed to set up pipefd");
 
 			setup_demand_paging(vm, &uffd_handler_threads[vcpu_id],
-					    pipefds[vcpu_id * 2], p->uffd_delay,
-					    &uffd_args[vcpu_id], vcpu_hva,
+					    pipefds[vcpu_id * 2], p->uffd_mode,
+					    p->uffd_delay, &uffd_args[vcpu_id],
+					    vcpu_hva, vcpu_alias,
 					    vcpu_mem_size);
 		}
 	}
@@ -345,7 +384,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	pr_info("All vCPU threads joined\n");
 
-	if (p->use_uffd) {
+	if (p->uffd_mode) {
 		char c;
 
 		/* Tell the user fault fd handler threads to quit */
@@ -367,7 +406,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	free(guest_data_prototype);
 	free(vcpu_threads);
-	if (p->use_uffd) {
+	if (p->uffd_mode) {
 		free(uffd_handler_threads);
 		free(uffd_args);
 		free(pipefds);
@@ -377,11 +416,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 static void help(char *name)
 {
 	puts("");
-	printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
+	printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
 	       "          [-b memory] [-t type] [-v vcpus] [-o]\n", name);
 	guest_modes_help();
-	printf(" -u: use User Fault FD to handle vCPU page\n"
-	       "     faults.\n");
+	printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
+	       "     UFFD registration mode: 'MISSING' or 'MINOR'.\n");
 	printf(" -d: add a delay in usec to the User Fault\n"
 	       "     FD handler to simulate demand paging\n"
 	       "     overheads. Ignored without -u.\n");
@@ -408,13 +447,17 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "hm:ud:b:t:v:o")) != -1) {
+	while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
 		switch (opt) {
 		case 'm':
 			guest_modes_cmdline(optarg);
 			break;
 		case 'u':
-			p.use_uffd = true;
+			if (!strcmp("MISSING", optarg))
+				p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+			else if (!strcmp("MINOR", optarg))
+				p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
+			TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
 			break;
 		case 'd':
 			p.uffd_delay = strtoul(optarg, NULL, 0);
@@ -441,6 +484,9 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	TEST_ASSERT(p.uffd_mode != UFFDIO_REGISTER_MODE_MINOR || p.src_type == VM_MEM_SRC_SHMEM,
+		    "userfaultfd MINOR mode requires shared memory; pick a different -t");
+
 	for_each_guest_mode(run_test, &p);
 
 	return 0;
-- 
GitLab


From 33090a884da5e9760f11441ac269f754375f80f5 Mon Sep 17 00:00:00 2001
From: Axel Rasmussen <axelrasmussen@google.com>
Date: Wed, 19 May 2021 13:03:39 -0700
Subject: [PATCH 1689/3804] KVM: selftests: add shared hugetlbfs backing source
 type

This lets us run the demand paging test on top of a shared
hugetlbfs-backed area. The "shared" is key, as this allows us to
exercise userfaultfd minor faults on hugetlbfs.

Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Message-Id: <20210519200339.829146-11-axelrasmussen@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/demand_paging_test.c |  6 ++++--
 tools/testing/selftests/kvm/include/test_util.h  | 11 +++++++++++
 tools/testing/selftests/kvm/lib/kvm_util.c       |  9 +++++++--
 tools/testing/selftests/kvm/lib/test_util.c      | 11 +++++++++++
 4 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index fcba527c29a68..b74704305835e 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -484,8 +484,10 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	TEST_ASSERT(p.uffd_mode != UFFDIO_REGISTER_MODE_MINOR || p.src_type == VM_MEM_SRC_SHMEM,
-		    "userfaultfd MINOR mode requires shared memory; pick a different -t");
+	if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
+	    !backing_src_is_shared(p.src_type)) {
+		TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
+	}
 
 	for_each_guest_mode(run_test, &p);
 
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 7377f00469ef4..d79be15dd3d20 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -17,6 +17,7 @@
 #include <errno.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <sys/mman.h>
 #include "kselftest.h"
 
 static inline int _no_printf(const char *format, ...) { return 0; }
@@ -85,6 +86,7 @@ enum vm_mem_backing_src_type {
 	VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB,
 	VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
 	VM_MEM_SRC_SHMEM,
+	VM_MEM_SRC_SHARED_HUGETLB,
 	NUM_SRC_TYPES,
 };
 
@@ -101,4 +103,13 @@ size_t get_backing_src_pagesz(uint32_t i);
 void backing_src_help(void);
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
 
+/*
+ * Whether or not the given source type is shared memory (as opposed to
+ * anonymous).
+ */
+static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
+{
+	return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
+}
+
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index f627807191763..28e528c19d285 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -848,8 +848,13 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 		region->mmap_size += alignment;
 
 	region->fd = -1;
-	if (src_type == VM_MEM_SRC_SHMEM) {
-		region->fd = memfd_create("kvm_selftest", MFD_CLOEXEC);
+	if (backing_src_is_shared(src_type)) {
+		int memfd_flags = MFD_CLOEXEC;
+
+		if (src_type == VM_MEM_SRC_SHARED_HUGETLB)
+			memfd_flags |= MFD_HUGETLB;
+
+		region->fd = memfd_create("kvm_selftest", memfd_flags);
 		TEST_ASSERT(region->fd != -1,
 			    "memfd_create failed, errno: %i", errno);
 
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index c7a265da50904..6ad6c8276b2eb 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -240,6 +240,16 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
 			.name = "shmem",
 			.flag = MAP_SHARED,
 		},
+		[VM_MEM_SRC_SHARED_HUGETLB] = {
+			.name = "shared_hugetlb",
+			/*
+			 * No MAP_HUGETLB, we use MFD_HUGETLB instead. Since
+			 * we're using "file backed" memory, we need to specify
+			 * this when the FD is created, not when the area is
+			 * mapped.
+			 */
+			.flag = MAP_SHARED,
+		},
 	};
 	_Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
 		       "Missing new backing src types?");
@@ -262,6 +272,7 @@ size_t get_backing_src_pagesz(uint32_t i)
 	case VM_MEM_SRC_ANONYMOUS_THP:
 		return get_trans_hugepagesz();
 	case VM_MEM_SRC_ANONYMOUS_HUGETLB:
+	case VM_MEM_SRC_SHARED_HUGETLB:
 		return get_def_hugetlb_pagesz();
 	default:
 		return MAP_HUGE_PAGE_SIZE(flag);
-- 
GitLab


From fb1070d18edb37daf3979662975bc54625a19953 Mon Sep 17 00:00:00 2001
From: Joe Richey <joerichey@google.com>
Date: Fri, 21 May 2021 01:58:43 -0700
Subject: [PATCH 1690/3804] KVM: X86: Use _BITUL() macro in UAPI headers

Replace BIT() in KVM's UPAI header with _BITUL(). BIT() is not defined
in the UAPI headers and its usage may cause userspace build errors.

Fixes: fb04a1eddb1a ("KVM: X86: Implement ring-based dirty memory tracking")
Signed-off-by: Joe Richey <joerichey@google.com>
Message-Id: <20210521085849.37676-3-joerichey94@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/kvm.h       | 5 +++--
 tools/include/uapi/linux/kvm.h | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 3fd9a7e9d90cd..79d9c44d1ad73 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -8,6 +8,7 @@
  * Note: you must update KVM_API_VERSION if you change this interface.
  */
 
+#include <linux/const.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/ioctl.h>
@@ -1879,8 +1880,8 @@ struct kvm_hyperv_eventfd {
  * conversion after harvesting an entry.  Also, it must not skip any
  * dirty bits, so that dirty bits are always harvested in sequence.
  */
-#define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
-#define KVM_DIRTY_GFN_F_RESET           BIT(1)
+#define KVM_DIRTY_GFN_F_DIRTY           _BITUL(0)
+#define KVM_DIRTY_GFN_F_RESET           _BITUL(1)
 #define KVM_DIRTY_GFN_F_MASK            0x3
 
 /*
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index f6afee209620d..26e6d94d64ed4 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -8,6 +8,7 @@
  * Note: you must update KVM_API_VERSION if you change this interface.
  */
 
+#include <linux/const.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/ioctl.h>
@@ -1834,8 +1835,8 @@ struct kvm_hyperv_eventfd {
  * conversion after harvesting an entry.  Also, it must not skip any
  * dirty bits, so that dirty bits are always harvested in sequence.
  */
-#define KVM_DIRTY_GFN_F_DIRTY           BIT(0)
-#define KVM_DIRTY_GFN_F_RESET           BIT(1)
+#define KVM_DIRTY_GFN_F_DIRTY           _BITUL(0)
+#define KVM_DIRTY_GFN_F_RESET           _BITUL(1)
 #define KVM_DIRTY_GFN_F_MASK            0x3
 
 /*
-- 
GitLab


From fb0f94794bb7558c078ce37b1a6e30d881fd7888 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 26 May 2021 14:36:14 -0400
Subject: [PATCH 1691/3804] selftests: kvm: do only 1 memslot_perf_test run by
 default

The test takes a long time with the current implementation of
memslots, so cut the run time a bit.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/memslot_perf_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 4ae0e5ec0f740..11239652d8057 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -992,7 +992,7 @@ int main(int argc, char *argv[])
 		.tlast = NTESTS - 1,
 		.nslots = -1,
 		.seconds = 5,
-		.runs = 20,
+		.runs = 1,
 	};
 	struct test_result rbestslottime;
 	int tctr;
-- 
GitLab


From 9805cf03fdb6828091fe09e4ef0fb544fca3eaf6 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 18 May 2021 05:00:35 -0700
Subject: [PATCH 1692/3804] KVM: LAPIC: Narrow the timer latency between
 wait_lapic_expire and world switch

Let's treat lapic_timer_advance_ns automatic tuning logic as hypervisor
overhead, move it before wait_lapic_expire instead of between wait_lapic_expire
and the world switch, the wait duration should be calculated by the
up-to-date guest_tsc after the overhead of automatic tuning logic. This
patch reduces ~30+ cycles for kvm-unit-tests/tscdeadline-latency when testing
busy waits.

Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1621339235-11131-5-git-send-email-wanpengli@tencent.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c0ebef560bd14..5d91f2367c310 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1598,11 +1598,19 @@ static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
 	guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
 	apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
 
+	if (lapic_timer_advance_dynamic) {
+		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
+		/*
+		 * If the timer fired early, reread the TSC to account for the
+		 * overhead of the above adjustment to avoid waiting longer
+		 * than is necessary.
+		 */
+		if (guest_tsc < tsc_deadline)
+			guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
+	}
+
 	if (guest_tsc < tsc_deadline)
 		__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
-
-	if (lapic_timer_advance_dynamic)
-		adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
 }
 
 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
-- 
GitLab


From 57ab87947abfc4e0b0b9864dc4717326a1c28a39 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 25 May 2021 10:41:16 -0300
Subject: [PATCH 1693/3804] KVM: x86: add start_assignment hook to kvm_x86_ops

Add a start_assignment hook to kvm_x86_ops, which is called when
kvm_arch_start_assignment is done.

The hook is required to update the wakeup vector of a sleeping vCPU
when a device is assigned to the guest.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Message-Id: <20210525134321.254128742@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h | 1 +
 arch/x86/include/asm/kvm_host.h    | 1 +
 arch/x86/kvm/x86.c                 | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 323641097f63a..e7bef91cee04a 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -99,6 +99,7 @@ KVM_X86_OP_NULL(post_block)
 KVM_X86_OP_NULL(vcpu_blocking)
 KVM_X86_OP_NULL(vcpu_unblocking)
 KVM_X86_OP_NULL(update_pi_irte)
+KVM_X86_OP_NULL(start_assignment)
 KVM_X86_OP_NULL(apicv_post_state_restore)
 KVM_X86_OP_NULL(dy_apicv_has_pending_interrupt)
 KVM_X86_OP_NULL(set_hv_timer)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55efbacfc2445..9c7ced0e31718 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1352,6 +1352,7 @@ struct kvm_x86_ops {
 
 	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
 			      uint32_t guest_irq, bool set);
+	void (*start_assignment)(struct kvm *kvm);
 	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
 	bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bed7b5348c0e9..98538b1cb4536 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11504,7 +11504,8 @@ bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
 
 void kvm_arch_start_assignment(struct kvm *kvm)
 {
-	atomic_inc(&kvm->arch.assigned_device_count);
+	if (atomic_inc_return(&kvm->arch.assigned_device_count) == 1)
+		static_call_cond(kvm_x86_start_assignment)(kvm);
 }
 EXPORT_SYMBOL_GPL(kvm_arch_start_assignment);
 
-- 
GitLab


From 084071d5e9226add45a6031928bf10e6afc855fd Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 25 May 2021 10:41:17 -0300
Subject: [PATCH 1694/3804] KVM: rename KVM_REQ_PENDING_TIMER to
 KVM_REQ_UNBLOCK

KVM_REQ_UNBLOCK will be used to exit a vcpu from
its inner vcpu halt emulation loop.

Rename KVM_REQ_PENDING_TIMER to KVM_REQ_UNBLOCK, switch
PowerPC to arch specific request bit.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Message-Id: <20210525134321.303768132@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/vcpu-requests.rst | 8 +++++---
 arch/powerpc/include/asm/kvm_host.h      | 1 +
 arch/x86/kvm/lapic.c                     | 2 +-
 arch/x86/kvm/x86.c                       | 2 +-
 include/linux/kvm_host.h                 | 2 +-
 virt/kvm/kvm_main.c                      | 2 ++
 6 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst
index 5feb3706a7ae5..af1b37441e0ae 100644
--- a/Documentation/virt/kvm/vcpu-requests.rst
+++ b/Documentation/virt/kvm/vcpu-requests.rst
@@ -118,10 +118,12 @@ KVM_REQ_MMU_RELOAD
   necessary to inform each VCPU to completely refresh the tables.  This
   request is used for that.
 
-KVM_REQ_PENDING_TIMER
+KVM_REQ_UNBLOCK
 
-  This request may be made from a timer handler run on the host on behalf
-  of a VCPU.  It informs the VCPU thread to inject a timer interrupt.
+  This request informs the vCPU to exit kvm_vcpu_block.  It is used for
+  example from timer handlers that run on the host on behalf of a vCPU,
+  or in order to update the interrupt routing and ensure that assigned
+  devices will wake up the vCPU.
 
 KVM_REQ_UNHALT
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1e83359f286b9..7f2e90db2050b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -51,6 +51,7 @@
 /* PPC-specific vcpu->requests bit members */
 #define KVM_REQ_WATCHDOG	KVM_ARCH_REQ(0)
 #define KVM_REQ_EPR_EXIT	KVM_ARCH_REQ(1)
+#define KVM_REQ_PENDING_TIMER	KVM_ARCH_REQ(2)
 
 #include <linux/mmu_notifier.h>
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5d91f2367c310..8120e8614b92d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1669,7 +1669,7 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
 	}
 
 	atomic_inc(&apic->lapic_timer.pending);
-	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+	kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
 	if (from_timer_fn)
 		kvm_vcpu_kick(vcpu);
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 98538b1cb4536..fe464b66898ff 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9501,7 +9501,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 		if (r <= 0)
 			break;
 
-		kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
+		kvm_clear_request(KVM_REQ_UNBLOCK, vcpu);
 		if (kvm_cpu_has_pending_timer(vcpu))
 			kvm_inject_pending_timer_irqs(vcpu);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5d4b96b36ec0c..76102efbf0796 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -147,7 +147,7 @@ static inline bool is_error_page(struct page *page)
  */
 #define KVM_REQ_TLB_FLUSH         (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_MMU_RELOAD        (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_PENDING_TIMER     2
+#define KVM_REQ_UNBLOCK           2
 #define KVM_REQ_UNHALT            3
 #define KVM_REQUEST_ARCH_BASE     8
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5f40725144f59..37a2d500a1485 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2929,6 +2929,8 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
 		goto out;
 	if (signal_pending(current))
 		goto out;
+	if (kvm_check_request(KVM_REQ_UNBLOCK, vcpu))
+		goto out;
 
 	ret = 0;
 out:
-- 
GitLab


From a2486020a82eefad686993695eb42d1b64f3f2fd Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 26 May 2021 14:20:14 -0300
Subject: [PATCH 1695/3804] KVM: VMX: update vcpu posted-interrupt descriptor
 when assigning device

For VMX, when a vcpu enters HLT emulation, pi_post_block will:

1) Add vcpu to per-cpu list of blocked vcpus.

2) Program the posted-interrupt descriptor "notification vector"
to POSTED_INTR_WAKEUP_VECTOR

With interrupt remapping, an interrupt will set the PIR bit for the
vector programmed for the device on the CPU, test-and-set the
ON bit on the posted interrupt descriptor, and if the ON bit is clear
generate an interrupt for the notification vector.

This way, the target CPU wakes upon a device interrupt and wakes up
the target vcpu.

Problem is that pi_post_block only programs the notification vector
if kvm_arch_has_assigned_device() is true. Its possible for the
following to happen:

1) vcpu V HLTs on pcpu P, kvm_arch_has_assigned_device is false,
notification vector is not programmed
2) device is assigned to VM
3) device interrupts vcpu V, sets ON bit
(notification vector not programmed, so pcpu P remains in idle)
4) vcpu 0 IPIs vcpu V (in guest), but since pi descriptor ON bit is set,
kvm_vcpu_kick is skipped
5) vcpu 0 busy spins on vcpu V's response for several seconds, until
RCU watchdog NMIs all vCPUs.

To fix this, use the start_assignment kvm_x86_ops callback to kick
vcpus out of the halt loop, so the notification vector is
properly reprogrammed to the wakeup vector.

Reported-by: Pei Zhang <pezhang@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Message-Id: <20210526172014.GA29007@fuller.cnet>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/posted_intr.c | 14 ++++++++++++++
 arch/x86/kvm/vmx/posted_intr.h |  1 +
 arch/x86/kvm/vmx/vmx.c         |  1 +
 virt/kvm/kvm_main.c            |  1 +
 4 files changed, 17 insertions(+)

diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index 459748680daf2..5f81ef092bd43 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -237,6 +237,20 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
 }
 
 
+/*
+ * Bail out of the block loop if the VM has an assigned
+ * device, but the blocking vCPU didn't reconfigure the
+ * PI.NV to the wakeup vector, i.e. the assigned device
+ * came along after the initial check in pi_pre_block().
+ */
+void vmx_pi_start_assignment(struct kvm *kvm)
+{
+	if (!irq_remapping_cap(IRQ_POSTING_CAP))
+		return;
+
+	kvm_make_all_cpus_request(kvm, KVM_REQ_UNBLOCK);
+}
+
 /*
  * pi_update_irte - set IRTE for Posted-Interrupts
  *
diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
index 0bdc41391c5b8..7f7b2326caf53 100644
--- a/arch/x86/kvm/vmx/posted_intr.h
+++ b/arch/x86/kvm/vmx/posted_intr.h
@@ -95,5 +95,6 @@ void __init pi_init_cpu(int cpu);
 bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu);
 int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq,
 		   bool set);
+void vmx_pi_start_assignment(struct kvm *kvm);
 
 #endif /* __KVM_X86_VMX_POSTED_INTR_H */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4bceb5ca3a899..639ec3eba9b80 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7721,6 +7721,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.nested_ops = &vmx_nested_ops,
 
 	.update_pi_irte = pi_update_irte,
+	.start_assignment = vmx_pi_start_assignment,
 
 #ifdef CONFIG_X86_64
 	.set_hv_timer = vmx_set_hv_timer,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 37a2d500a1485..6a6bc7af0e28d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -307,6 +307,7 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	return kvm_make_all_cpus_request_except(kvm, req, NULL);
 }
+EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
 
 #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
-- 
GitLab


From e67afa7ee4a59584d7253e45d7f63b9528819a13 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Date: Tue, 25 May 2021 23:32:35 -0400
Subject: [PATCH 1696/3804] NFSv4: Fix v4.0/v4.1 SEEK_DATA return -ENOTSUPP
 when set NFS_V4_2 config

Since commit bdcc2cd14e4e ("NFSv4.2: handle NFS-specific llseek errors"),
nfs42_proc_llseek would return -EOPNOTSUPP rather than -ENOTSUPP when
SEEK_DATA on NFSv4.0/v4.1.

This will lead xfstests generic/285 not run on NFSv4.0/v4.1 when set the
CONFIG_NFS_V4_2, rather than run failed.

Fixes: bdcc2cd14e4e ("NFSv4.2: handle NFS-specific llseek errors")
Cc: <stable.vger.kernel.org> # 4.2
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 57b3821d975a3..a1e5c6b85dedc 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -211,7 +211,7 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
 	case SEEK_HOLE:
 	case SEEK_DATA:
 		ret = nfs42_proc_llseek(filep, offset, whence);
-		if (ret != -ENOTSUPP)
+		if (ret != -EOPNOTSUPP)
 			return ret;
 		fallthrough;
 	default:
-- 
GitLab


From ae605ee9830840f14566a3b1cde27fa8096dbdd4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 26 May 2021 15:35:20 -0400
Subject: [PATCH 1697/3804] xprtrdma: Revert 586a0787ce35

Commit 9ed5af268e88 ("SUNRPC: Clean up the handling of page padding
in rpc_prepare_reply_pages()") [Dec 2020] affects RPC Replies that
have a data payload (i.e., Write chunks).

rpcrdma_prepare_readch(), as its name suggests, sets up Read chunks
which are data payloads within RPC Calls. Those payloads are
constructed by xdr_write_pages(), which continues to stuff the call
buffer's tail kvec with the payload's XDR roundup. Thus removing
the tail buffer logic in rpcrdma_prepare_readch() was the wrong
thing to do.

Fixes: 586a0787ce35 ("xprtrdma: Clean up rpcrdma_prepare_readch()")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 649f7d8b97331..c335c13615645 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -628,8 +628,9 @@ out_mapping_err:
 	return false;
 }
 
-/* The tail iovec might not reside in the same page as the
- * head iovec.
+/* The tail iovec may include an XDR pad for the page list,
+ * as well as additional content, and may not reside in the
+ * same page as the head iovec.
  */
 static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
 				     struct xdr_buf *xdr,
@@ -747,19 +748,27 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
 				   struct rpcrdma_req *req,
 				   struct xdr_buf *xdr)
 {
-	struct kvec *tail = &xdr->tail[0];
-
 	if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
 		return false;
 
-	/* If there is a Read chunk, the page list is handled
+	/* If there is a Read chunk, the page list is being handled
 	 * via explicit RDMA, and thus is skipped here.
 	 */
 
-	if (tail->iov_len) {
-		if (!rpcrdma_prepare_tail_iov(req, xdr,
-					      offset_in_page(tail->iov_base),
-					      tail->iov_len))
+	/* Do not include the tail if it is only an XDR pad */
+	if (xdr->tail[0].iov_len > 3) {
+		unsigned int page_base, len;
+
+		/* If the content in the page list is an odd length,
+		 * xdr_write_pages() adds a pad at the beginning of
+		 * the tail iovec. Force the tail's non-pad content to
+		 * land at the next XDR position in the Send message.
+		 */
+		page_base = offset_in_page(xdr->tail[0].iov_base);
+		len = xdr->tail[0].iov_len;
+		page_base += len & 3;
+		len -= len & 3;
+		if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
 			return false;
 		kref_get(&req->rl_kref);
 	}
-- 
GitLab


From bedd9195df3dfea7165e7d6f7519a1568bc41936 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 26 May 2021 16:32:27 +0000
Subject: [PATCH 1698/3804] KVM: x86/mmu: Fix comment mentioning skip_4k

This comment was left over from a previous version of the patch that
introduced wrprot_gfn_range, when skip_4k was passed in instead of
min_level.

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210526163227.3113557-1-dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 95eeb5ac6a8a7..237317b1eddda 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1192,9 +1192,9 @@ bool kvm_tdp_mmu_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 }
 
 /*
- * Remove write access from all the SPTEs mapping GFNs [start, end). If
- * skip_4k is set, SPTEs that map 4k pages, will not be write-protected.
- * Returns true if an SPTE has been changed and the TLBs need to be flushed.
+ * Remove write access from all SPTEs at or above min_level that map GFNs
+ * [start, end). Returns true if an SPTE has been changed and the TLBs need to
+ * be flushed.
  */
 static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
 			     gfn_t start, gfn_t end, int min_level)
-- 
GitLab


From bbf0a94744edfeee298e4a9ab6fd694d639a5cdf Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Wed, 26 May 2021 22:33:34 +0300
Subject: [PATCH 1699/3804] mei: request autosuspend after sending rx flow
 control

A rx flow control waiting in the control queue may block autosuspend.
Re-request autosuspend after flow control been sent to unblock
the transition to the low power state.

Cc: <stable@vger.kernel.org>
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Link: https://lore.kernel.org/r/20210526193334.445759-1-tomas.winkler@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/interrupt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index a98f6b895af71..aab3ebfa9fc4d 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -277,6 +277,9 @@ static int mei_cl_irq_read(struct mei_cl *cl, struct mei_cl_cb *cb,
 		return ret;
 	}
 
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_request_autosuspend(dev->dev);
+
 	list_move_tail(&cb->list, &cl->rd_pending);
 
 	return 0;
-- 
GitLab


From a799b68a7c7ac97b457aba4ede4122a2a9f536ab Mon Sep 17 00:00:00 2001
From: Huilong Deng <denghuilong@cdjrlc.com>
Date: Fri, 21 May 2021 22:07:37 +0800
Subject: [PATCH 1700/3804] nfs: Remove trailing semicolon in macros

Macros should not use a trailing semicolon.

Signed-off-by: Huilong Deng <denghuilong@cdjrlc.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/namespace.c | 2 +-
 fs/nfs/super.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 93e60e921f926..bc0c698f33508 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -362,7 +362,7 @@ static const struct kernel_param_ops param_ops_nfs_timeout = {
 	.set = param_set_nfs_timeout,
 	.get = param_get_nfs_timeout,
 };
-#define param_check_nfs_timeout(name, p) __param_check(name, p, int);
+#define param_check_nfs_timeout(name, p) __param_check(name, p, int)
 
 module_param(nfs_mountpoint_expiry_timeout, nfs_timeout, 0644);
 MODULE_PARM_DESC(nfs_mountpoint_expiry_timeout,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 19a212f9725de..fe58525cfed48 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1379,7 +1379,7 @@ static const struct kernel_param_ops param_ops_portnr = {
 	.set = param_set_portnr,
 	.get = param_get_uint,
 };
-#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
+#define param_check_portnr(name, p) __param_check(name, p, unsigned int)
 
 module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
 module_param_named(callback_nr_threads, nfs_callback_nr_threads, ushort, 0644);
-- 
GitLab


From 9808f9be31c68af43f6e531f2c851ebb066513fe Mon Sep 17 00:00:00 2001
From: Christian Gmeiner <christian.gmeiner@gmail.com>
Date: Thu, 27 May 2021 11:54:40 +0200
Subject: [PATCH 1701/3804] serial: 8250_pci: handle FL_NOIRQ board flag

In commit 8428413b1d14 ("serial: 8250_pci: Implement MSI(-X) support")
the way the irq gets allocated was changed. With that change the
handling FL_NOIRQ got lost. Restore the old behaviour.

Fixes: 8428413b1d14 ("serial: 8250_pci: Implement MSI(-X) support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com>
Link: https://lore.kernel.org/r/20210527095529.26281-1-christian.gmeiner@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 04fe424699903..780cc99732b62 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -3958,21 +3958,26 @@ pciserial_init_ports(struct pci_dev *dev, const struct pciserial_board *board)
 	uart.port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ;
 	uart.port.uartclk = board->base_baud * 16;
 
-	if (pci_match_id(pci_use_msi, dev)) {
-		dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n");
-		pci_set_master(dev);
-		rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (board->flags & FL_NOIRQ) {
+		uart.port.irq = 0;
 	} else {
-		dev_dbg(&dev->dev, "Using legacy interrupts\n");
-		rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY);
-	}
-	if (rc < 0) {
-		kfree(priv);
-		priv = ERR_PTR(rc);
-		goto err_deinit;
+		if (pci_match_id(pci_use_msi, dev)) {
+			dev_dbg(&dev->dev, "Using MSI(-X) interrupts\n");
+			pci_set_master(dev);
+			rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_ALL_TYPES);
+		} else {
+			dev_dbg(&dev->dev, "Using legacy interrupts\n");
+			rc = pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_LEGACY);
+		}
+		if (rc < 0) {
+			kfree(priv);
+			priv = ERR_PTR(rc);
+			goto err_deinit;
+		}
+
+		uart.port.irq = pci_irq_vector(dev, 0);
 	}
 
-	uart.port.irq = pci_irq_vector(dev, 0);
 	uart.port.dev = &dev->dev;
 
 	for (i = 0; i < nr_ports; i++) {
-- 
GitLab


From 1b932689c77766b68e2ead51ca0fb84ec5bb8965 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Tue, 25 May 2021 13:20:11 +0100
Subject: [PATCH 1702/3804] lib: test_scanf: Remove pointless use of type_min()
 with unsigned types

sparse was producing warnings of the form:

 sparse: cast truncates bits from constant value (ffff0001 becomes 1)

There is no actual problem here. Using type_min() on an unsigned type
results in an (expected) truncation.

However, there is no need to test an unsigned value against type_min().
The minimum value of an unsigned is obviously 0, and any value cast to
an unsigned type is >= 0, so for unsigneds only type_max() need be tested.

This patch also takes the opportunity to clean up the implementation of
simple_numbers_loop() to use a common pattern for the positive and
negative test.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210525122012.6336-2-rf@opensource.cirrus.com
---
 lib/test_scanf.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/test_scanf.c b/lib/test_scanf.c
index 8d577aec6c285..48ff5747a4da0 100644
--- a/lib/test_scanf.c
+++ b/lib/test_scanf.c
@@ -187,8 +187,8 @@ static const unsigned long long numbers[] __initconst = {
 #define value_representable_in_type(T, val)					 \
 (is_signed_type(T)								 \
 	? ((long long)(val) >= type_min(T)) && ((long long)(val) <= type_max(T)) \
-	: ((unsigned long long)(val) >= type_min(T)) &&				 \
-	  ((unsigned long long)(val) <= type_max(T)))
+	: ((unsigned long long)(val) <= type_max(T)))
+
 
 #define test_one_number(T, gen_fmt, scan_fmt, val, fn)			\
 do {									\
@@ -204,12 +204,11 @@ do {									\
 	int i;								\
 									\
 	for (i = 0; i < ARRAY_SIZE(numbers); i++) {			\
-		if (!value_representable_in_type(T, numbers[i]))	\
-			continue;					\
-									\
-		test_one_number(T, gen_fmt, scan_fmt, numbers[i], fn);	\
+		if (value_representable_in_type(T, numbers[i]))		\
+			test_one_number(T, gen_fmt, scan_fmt,		\
+					numbers[i], fn);		\
 									\
-		if (is_signed_type(T))					\
+		if (value_representable_in_type(T, -numbers[i]))	\
 			test_one_number(T, gen_fmt, scan_fmt,		\
 					-numbers[i], fn);		\
 	}								\
-- 
GitLab


From d327ea15a305024ef0085252fa3657bbb1ce25f5 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Tue, 25 May 2021 13:20:12 +0100
Subject: [PATCH 1703/3804] random32: Fix implicit truncation warning in
 prandom_seed_state()

sparse generates the following warning:

 include/linux/prandom.h:114:45: sparse: sparse: cast truncates bits from
 constant value

This is because the 64-bit seed value is manipulated and then placed in a
u32, causing an implicit cast and truncation. A forced cast to u32 doesn't
prevent this warning, which is reasonable because a typecast doesn't prove
that truncation was expected.

Logical-AND the value with 0xffffffff to make explicit that truncation to
32-bit is intended.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210525122012.6336-3-rf@opensource.cirrus.com
---
 include/linux/prandom.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index bbf4b4ad61dfd..056d31317e499 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -111,7 +111,7 @@ static inline u32 __seed(u32 x, u32 m)
  */
 static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 {
-	u32 i = (seed >> 32) ^ (seed << 10) ^ seed;
+	u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL;
 
 	state->s1 = __seed(i,   2U);
 	state->s2 = __seed(i,   8U);
-- 
GitLab


From e62b91cd8a8d4a18955802b852cac86cd72f79b1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 5 May 2021 23:39:30 +0200
Subject: [PATCH 1704/3804] HID: core: Remove extraneous empty line before
 EXPORT_SYMBOL_GPL(hid_check_keys_pressed)

Normally the EXPORT_SYMBOL of a function immediately follows the
declaration of the function and all the other functions in hid-core.c
follow this pattern, drop the extraneous empty line before the
EXPORT_SYMBOL_GPL(hid_check_keys_pressed); line.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 265cbe592374c..0de2788b9814c 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -2591,7 +2591,6 @@ int hid_check_keys_pressed(struct hid_device *hid)
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(hid_check_keys_pressed);
 
 static int __init hid_init(void)
-- 
GitLab


From 31a4cf1d223dc6144d2e7c679cc3a98f84a1607b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 5 May 2021 23:39:32 +0200
Subject: [PATCH 1705/3804] HID: multitouch: Disable event reporting on suspend
 on the Asus T101HA touchpad

The Asus T101HA has a problem with spurious wakeups when the lid is
closed, this is caused by the screen sitting so close to the touchpad
that the touchpad ends up reporting touch events, causing these wakeups.

Add a quirk which disables event reporting on suspend when set, and
enable this quirk for the Asus T101HA touchpad fixing the spurious
wakeups, while still allowing the device to be woken by pressing a
key on the keyboard (which is part of the same USB device).

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-multitouch.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index eed81bdc2e869..2e4fb76c45f3d 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -70,6 +70,7 @@ MODULE_LICENSE("GPL");
 #define MT_QUIRK_WIN8_PTP_BUTTONS	BIT(18)
 #define MT_QUIRK_SEPARATE_APP_REPORT	BIT(19)
 #define MT_QUIRK_FORCE_MULTI_INPUT	BIT(20)
+#define MT_QUIRK_DISABLE_WAKEUP		BIT(21)
 
 #define MT_INPUTMODE_TOUCHSCREEN	0x02
 #define MT_INPUTMODE_TOUCHPAD		0x03
@@ -191,6 +192,7 @@ static void mt_post_parse(struct mt_device *td, struct mt_application *app);
 #define MT_CLS_EXPORT_ALL_INPUTS		0x0013
 /* reserved					0x0014 */
 #define MT_CLS_WIN_8_FORCE_MULTI_INPUT		0x0015
+#define MT_CLS_WIN_8_DISABLE_WAKEUP		0x0016
 
 /* vendor specific classes */
 #define MT_CLS_3M				0x0101
@@ -283,6 +285,15 @@ static const struct mt_class mt_classes[] = {
 			MT_QUIRK_WIN8_PTP_BUTTONS |
 			MT_QUIRK_FORCE_MULTI_INPUT,
 		.export_all_inputs = true },
+	{ .name = MT_CLS_WIN_8_DISABLE_WAKEUP,
+		.quirks = MT_QUIRK_ALWAYS_VALID |
+			MT_QUIRK_IGNORE_DUPLICATES |
+			MT_QUIRK_HOVERING |
+			MT_QUIRK_CONTACT_CNT_ACCURATE |
+			MT_QUIRK_STICKY_FINGERS |
+			MT_QUIRK_WIN8_PTP_BUTTONS |
+			MT_QUIRK_DISABLE_WAKEUP,
+		.export_all_inputs = true },
 
 	/*
 	 * vendor specific classes
@@ -763,7 +774,8 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
 			return 1;
 		case HID_DG_CONFIDENCE:
 			if ((cls->name == MT_CLS_WIN_8 ||
-			     cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT) &&
+			     cls->name == MT_CLS_WIN_8_FORCE_MULTI_INPUT ||
+			     cls->name == MT_CLS_WIN_8_DISABLE_WAKEUP) &&
 				(field->application == HID_DG_TOUCHPAD ||
 				 field->application == HID_DG_TOUCHSCREEN))
 				app->quirks |= MT_QUIRK_CONFIDENCE;
@@ -1753,8 +1765,14 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
 #ifdef CONFIG_PM
 static int mt_suspend(struct hid_device *hdev, pm_message_t state)
 {
+	struct mt_device *td = hid_get_drvdata(hdev);
+
 	/* High latency is desirable for power savings during S3/S0ix */
-	mt_set_modes(hdev, HID_LATENCY_HIGH, true, true);
+	if (td->mtclass.quirks & MT_QUIRK_DISABLE_WAKEUP)
+		mt_set_modes(hdev, HID_LATENCY_HIGH, false, false);
+	else
+		mt_set_modes(hdev, HID_LATENCY_HIGH, true, true);
+
 	return 0;
 }
 
@@ -1813,6 +1831,12 @@ static const struct hid_device_id mt_devices[] = {
 		MT_USB_DEVICE(USB_VENDOR_ID_ANTON,
 			USB_DEVICE_ID_ANTON_TOUCH_PAD) },
 
+	/* Asus T101HA */
+	{ .driver_data = MT_CLS_WIN_8_DISABLE_WAKEUP,
+		HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
+			   USB_VENDOR_ID_ASUSTEK,
+			   USB_DEVICE_ID_ASUSTEK_T101HA_KEYBOARD) },
+
 	/* Asus T304UA */
 	{ .driver_data = MT_CLS_ASUS,
 		HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8,
-- 
GitLab


From 81c8bf9170477d453b24a6bc3300d201d641e645 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Fri, 7 May 2021 12:18:19 -0700
Subject: [PATCH 1706/3804] HID: logitech-hidpp: initialize level variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Static analysis reports this representative problem

hid-logitech-hidpp.c:1356:23: warning: Assigned value is
  garbage or undefined
        hidpp->battery.level = level;
                             ^ ~~~~~

In some cases, 'level' is never set in hidpp20_battery_map_status_voltage()
Since level is not available on all hw, initialize level to unknown.

Fixes: be281368f297 ("hid-logitech-hidpp: read battery voltage from newer devices")
Signed-off-by: Tom Rix <trix@redhat.com>
Reviewed-by: Filipe Laíns <lains@riseup.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-logitech-hidpp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index d598094dadd0c..fee4e54a3ce08 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -1263,6 +1263,7 @@ static int hidpp20_battery_map_status_voltage(u8 data[3], int *voltage,
 	int status;
 
 	long flags = (long) data[2];
+	*level = POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;
 
 	if (flags & 0x80)
 		switch (flags & 0x07) {
-- 
GitLab


From 3dd653c077efda8152f4dd395359617d577a54cd Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 10:47:37 +0800
Subject: [PATCH 1707/3804] HID: pidff: fix error return code in
 hid_pidff_init()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 224ee88fe395 ("Input: add force feedback driver for PID devices")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hid-pidff.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/usbhid/hid-pidff.c b/drivers/hid/usbhid/hid-pidff.c
index ea126c50acc3b..3b4ee21cd8111 100644
--- a/drivers/hid/usbhid/hid-pidff.c
+++ b/drivers/hid/usbhid/hid-pidff.c
@@ -1292,6 +1292,7 @@ int hid_pidff_init(struct hid_device *hid)
 
 	if (pidff->pool[PID_DEVICE_MANAGED_POOL].value &&
 	    pidff->pool[PID_DEVICE_MANAGED_POOL].value[0] == 0) {
+		error = -EPERM;
 		hid_notice(hid,
 			   "device does not support device managed pool\n");
 		goto fail;
-- 
GitLab


From a4b494099ad657f1cb85436d333cf38870ee95bc Mon Sep 17 00:00:00 2001
From: Bixuan Cui <cuibixuan@huawei.com>
Date: Sat, 8 May 2021 11:14:48 +0800
Subject: [PATCH 1708/3804] HID: gt683r: add missing MODULE_DEVICE_TABLE

This patch adds missing MODULE_DEVICE_TABLE definition which generates
correct modalias for automatic loading of this driver when it is built
as an external module.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Bixuan Cui <cuibixuan@huawei.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-gt683r.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/hid-gt683r.c b/drivers/hid/hid-gt683r.c
index 898871c8c768e..29ccb0accfba8 100644
--- a/drivers/hid/hid-gt683r.c
+++ b/drivers/hid/hid-gt683r.c
@@ -54,6 +54,7 @@ static const struct hid_device_id gt683r_led_id[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_MSI, USB_DEVICE_ID_MSI_GT683R_LED_PANEL) },
 	{ }
 };
+MODULE_DEVICE_TABLE(hid, gt683r_led_id);
 
 static void gt683r_brightness_set(struct led_classdev *led_cdev,
 				enum led_brightness brightness)
-- 
GitLab


From 4fb125192563670e820991de48f8db495ecc7ff7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Exp=C3=B3sito?= <jose.exposito89@gmail.com>
Date: Mon, 10 May 2021 08:22:37 +0200
Subject: [PATCH 1709/3804] HID: magicmouse: fix crash when disconnecting Magic
 Trackpad 2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the Apple Magic Trackpad 2 is connected over USB it registers four
hid_device report descriptors, however, the driver only handles the one
with type HID_TYPE_USBMOUSE and ignores the other three, thus, no driver
data is attached to them.

When the device is disconnected, the remove callback is called for the
four hid_device report descriptors, crashing when the driver data is
NULL.

Check that the driver data is not NULL before using it in the remove
callback.

Signed-off-by: José Expósito <jose.exposito89@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-magicmouse.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 2bb473d8c424e..097870e43cfe8 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -779,7 +779,10 @@ err_stop_hw:
 static void magicmouse_remove(struct hid_device *hdev)
 {
 	struct magicmouse_sc *msc = hid_get_drvdata(hdev);
-	cancel_delayed_work_sync(&msc->work);
+
+	if (msc)
+		cancel_delayed_work_sync(&msc->work);
+
 	hid_hw_stop(hdev);
 }
 
-- 
GitLab


From 82f09a637dd3215bce5314664f0171cdc3e43bb5 Mon Sep 17 00:00:00 2001
From: Michael Zaidman <michael.zaidman@gmail.com>
Date: Tue, 11 May 2021 13:12:08 +0300
Subject: [PATCH 1710/3804] HID: ft260: improve error handling of
 ft260_hid_feature_report_get()

The ft260_hid_feature_report_get() checks if the return size matches the
requested size. But the function can also fail with at least -ENOMEM.  Add the
< 0 checks.

In ft260_hid_feature_report_get(), do not do the memcpy to the caller's buffer
if there is an error.

Fixes: 6a82582d9fa4 ("HID: ft260: add usb hid to i2c host bridge driver")
Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Michael Zaidman <michael.zaidman@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-ft260.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c
index 7a9ba984a75ac..f43a8406cb9a9 100644
--- a/drivers/hid/hid-ft260.c
+++ b/drivers/hid/hid-ft260.c
@@ -249,7 +249,10 @@ static int ft260_hid_feature_report_get(struct hid_device *hdev,
 
 	ret = hid_hw_raw_request(hdev, report_id, buf, len, HID_FEATURE_REPORT,
 				 HID_REQ_GET_REPORT);
-	memcpy(data, buf, len);
+	if (likely(ret == len))
+		memcpy(data, buf, len);
+	else if (ret >= 0)
+		ret = -EIO;
 	kfree(buf);
 	return ret;
 }
@@ -298,7 +301,7 @@ static int ft260_xfer_status(struct ft260_device *dev)
 
 	ret = ft260_hid_feature_report_get(hdev, FT260_I2C_STATUS,
 					   (u8 *)&report, sizeof(report));
-	if (ret < 0) {
+	if (unlikely(ret < 0)) {
 		hid_err(hdev, "failed to retrieve status: %d\n", ret);
 		return ret;
 	}
@@ -724,10 +727,9 @@ static int ft260_get_system_config(struct hid_device *hdev,
 
 	ret = ft260_hid_feature_report_get(hdev, FT260_SYSTEM_SETTINGS,
 					   (u8 *)cfg, len);
-	if (ret != len) {
+	if (ret < 0) {
 		hid_err(hdev, "failed to retrieve system status\n");
-		if (ret >= 0)
-			return -EIO;
+		return ret;
 	}
 	return 0;
 }
@@ -780,8 +782,8 @@ static int ft260_byte_show(struct hid_device *hdev, int id, u8 *cfg, int len,
 	int ret;
 
 	ret = ft260_hid_feature_report_get(hdev, id, cfg, len);
-	if (ret != len && ret >= 0)
-		return -EIO;
+	if (ret < 0)
+		return ret;
 
 	return scnprintf(buf, PAGE_SIZE, "%hi\n", *field);
 }
@@ -792,8 +794,8 @@ static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len,
 	int ret;
 
 	ret = ft260_hid_feature_report_get(hdev, id, cfg, len);
-	if (ret != len && ret >= 0)
-		return -EIO;
+	if (ret < 0)
+		return ret;
 
 	return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field));
 }
@@ -944,10 +946,8 @@ static int ft260_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
 	ret = ft260_hid_feature_report_get(hdev, FT260_CHIP_VERSION,
 					   (u8 *)&version, sizeof(version));
-	if (ret != sizeof(version)) {
+	if (ret < 0) {
 		hid_err(hdev, "failed to retrieve chip version\n");
-		if (ret >= 0)
-			ret = -EIO;
 		goto err_hid_close;
 	}
 
-- 
GitLab


From e3d6a599969b8244eeb447e372ec3b1eddd7534e Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Wed, 12 May 2021 18:41:55 +0530
Subject: [PATCH 1711/3804] HID: amd_sfh: Use devm_kzalloc() instead of
 kzalloc()

Replace kzalloc with devm_kzalloc in driver initialization sequence. The
allocation can be tied to the lifetime of the amd_sfh driver. This cleans
up an exit & error paths, since the objects does not need to be
explicitly freed anymore.

Fixes: 4b2c53d93a4b ("SFH:Transport Driver to add support of AMD Sensor Fusion Hub (SFH)")
Reviewed-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/amd-sfh-hid/amd_sfh_client.c | 18 +++++++++---------
 drivers/hid/amd-sfh-hid/amd_sfh_hid.c    |  3 ---
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
index 2ab38b7153477..d04d6bd4623d4 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
@@ -142,7 +142,7 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 	int rc, i;
 
 	dev = &privdata->pdev->dev;
-	cl_data = kzalloc(sizeof(*cl_data), GFP_KERNEL);
+	cl_data = devm_kzalloc(dev, sizeof(*cl_data), GFP_KERNEL);
 	if (!cl_data)
 		return -ENOMEM;
 
@@ -175,12 +175,12 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 			rc = -EINVAL;
 			goto cleanup;
 		}
-		cl_data->feature_report[i] = kzalloc(feature_report_size, GFP_KERNEL);
+		cl_data->feature_report[i] = devm_kzalloc(dev, feature_report_size, GFP_KERNEL);
 		if (!cl_data->feature_report[i]) {
 			rc = -ENOMEM;
 			goto cleanup;
 		}
-		cl_data->input_report[i] = kzalloc(input_report_size, GFP_KERNEL);
+		cl_data->input_report[i] = devm_kzalloc(dev, input_report_size, GFP_KERNEL);
 		if (!cl_data->input_report[i]) {
 			rc = -ENOMEM;
 			goto cleanup;
@@ -189,7 +189,8 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata)
 		info.sensor_idx = cl_idx;
 		info.dma_address = cl_data->sensor_dma_addr[i];
 
-		cl_data->report_descr[i] = kzalloc(cl_data->report_descr_sz[i], GFP_KERNEL);
+		cl_data->report_descr[i] =
+			devm_kzalloc(dev, cl_data->report_descr_sz[i], GFP_KERNEL);
 		if (!cl_data->report_descr[i]) {
 			rc = -ENOMEM;
 			goto cleanup;
@@ -214,11 +215,11 @@ cleanup:
 					  cl_data->sensor_virt_addr[i],
 					  cl_data->sensor_dma_addr[i]);
 		}
-		kfree(cl_data->feature_report[i]);
-		kfree(cl_data->input_report[i]);
-		kfree(cl_data->report_descr[i]);
+		devm_kfree(dev, cl_data->feature_report[i]);
+		devm_kfree(dev, cl_data->input_report[i]);
+		devm_kfree(dev, cl_data->report_descr[i]);
 	}
-	kfree(cl_data);
+	devm_kfree(dev, cl_data);
 	return rc;
 }
 
@@ -241,6 +242,5 @@ int amd_sfh_hid_client_deinit(struct amd_mp2_dev *privdata)
 					  cl_data->sensor_dma_addr[i]);
 		}
 	}
-	kfree(cl_data);
 	return 0;
 }
diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c
index 4f989483aa03d..5ad1e7acd294e 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c
@@ -162,9 +162,6 @@ void amdtp_hid_remove(struct amdtp_cl_data *cli_data)
 	int i;
 
 	for (i = 0; i < cli_data->num_hid_devices; ++i) {
-		kfree(cli_data->feature_report[i]);
-		kfree(cli_data->input_report[i]);
-		kfree(cli_data->report_descr[i]);
 		if (cli_data->hid_sensor_hubs[i]) {
 			kfree(cli_data->hid_sensor_hubs[i]->driver_data);
 			hid_destroy_device(cli_data->hid_sensor_hubs[i]);
-- 
GitLab


From 5ad755fd2b326aa2bc8910b0eb351ee6aece21b1 Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Wed, 12 May 2021 18:41:56 +0530
Subject: [PATCH 1712/3804] HID: amd_sfh: Fix memory leak in amd_sfh_work

Kmemleak tool detected a memory leak in the amd_sfh driver.

====================
unreferenced object 0xffff88810228ada0 (size 32):
  comm "insmod", pid 3968, jiffies 4295056001 (age 775.792s)
  hex dump (first 32 bytes):
    00 20 73 1f 81 88 ff ff 00 01 00 00 00 00 ad de  . s.............
    22 01 00 00 00 00 ad de 01 00 02 00 00 00 00 00  "...............
  backtrace:
    [<000000007b4c8799>] kmem_cache_alloc_trace+0x163/0x4f0
    [<0000000005326893>] amd_sfh_get_report+0xa4/0x1d0 [amd_sfh]
    [<000000002a9e5ec4>] amdtp_hid_request+0x62/0x80 [amd_sfh]
    [<00000000b8a95807>] sensor_hub_get_feature+0x145/0x270 [hid_sensor_hub]
    [<00000000fda054ee>] hid_sensor_parse_common_attributes+0x215/0x460 [hid_sensor_iio_common]
    [<0000000021279ecf>] hid_accel_3d_probe+0xff/0x4a0 [hid_sensor_accel_3d]
    [<00000000915760ce>] platform_probe+0x6a/0xd0
    [<0000000060258a1f>] really_probe+0x192/0x620
    [<00000000fa812f2d>] driver_probe_device+0x14a/0x1d0
    [<000000005e79f7fd>] __device_attach_driver+0xbd/0x110
    [<0000000070d15018>] bus_for_each_drv+0xfd/0x160
    [<0000000013a3c312>] __device_attach+0x18b/0x220
    [<000000008c7b4afc>] device_initial_probe+0x13/0x20
    [<00000000e6e99665>] bus_probe_device+0xfe/0x120
    [<00000000833fa90b>] device_add+0x6a6/0xe00
    [<00000000fa901078>] platform_device_add+0x180/0x380
====================

The fix is to freeing request_list entry once the processed entry is
removed from the request_list.

Fixes: 4b2c53d93a4b ("SFH:Transport Driver to add support of AMD Sensor Fusion Hub (SFH)")
Reviewed-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/amd-sfh-hid/amd_sfh_client.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
index d04d6bd4623d4..3589d9945da1c 100644
--- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c
+++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c
@@ -88,6 +88,7 @@ static void amd_sfh_work(struct work_struct *work)
 	sensor_index = req_node->sensor_idx;
 	report_id = req_node->report_id;
 	node_type = req_node->report_type;
+	kfree(req_node);
 
 	if (node_type == HID_FEATURE_REPORT) {
 		report_size = get_feature_report(sensor_index, report_id,
-- 
GitLab


From dc5f9f55502e13ba05731d5046a14620aa2ff456 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 15:58:50 +0200
Subject: [PATCH 1713/3804] HID: i2c-hid: fix format string mismatch

clang doesn't like printing a 32-bit integer using %hX format string:

drivers/hid/i2c-hid/i2c-hid-core.c:994:18: error: format specifies type 'unsigned short' but the argument has type '__u32' (aka 'unsigned int') [-Werror,-Wformat]
                 client->name, hid->vendor, hid->product);
                               ^~~~~~~~~~~
drivers/hid/i2c-hid/i2c-hid-core.c:994:31: error: format specifies type 'unsigned short' but the argument has type '__u32' (aka 'unsigned int') [-Werror,-Wformat]
                 client->name, hid->vendor, hid->product);
                                            ^~~~~~~~~~~~

Use an explicit cast to truncate it to the low 16 bits instead.

Fixes: 9ee3e06610fd ("HID: i2c-hid: override HID descriptors for certain devices")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/i2c-hid/i2c-hid-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index ce91b1e57876d..46474612e73c6 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -997,8 +997,8 @@ int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops,
 	hid->vendor = le16_to_cpu(ihid->hdesc.wVendorID);
 	hid->product = le16_to_cpu(ihid->hdesc.wProductID);
 
-	snprintf(hid->name, sizeof(hid->name), "%s %04hX:%04hX",
-		 client->name, hid->vendor, hid->product);
+	snprintf(hid->name, sizeof(hid->name), "%s %04X:%04X",
+		 client->name, (u16)hid->vendor, (u16)hid->product);
 	strlcpy(hid->phys, dev_name(&client->dev), sizeof(hid->phys));
 
 	ihid->quirks = i2c_hid_lookup_quirk(hid->vendor, hid->product);
-- 
GitLab


From 22db5e0003e1441cd829180cebb42f7a6b7a46b7 Mon Sep 17 00:00:00 2001
From: Ye Xiang <xiang.ye@intel.com>
Date: Mon, 17 May 2021 14:36:09 +0800
Subject: [PATCH 1714/3804] HID: intel-ish-hid: ipc: Add Alder Lake device IDs

Add Alder Lake PCI device IDs to the supported device list.

Signed-off-by: Ye Xiang <xiang.ye@intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ipc/hw-ish.h  | 2 ++
 drivers/hid/intel-ish-hid/ipc/pci-ish.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
index 21b87e4003afc..07e3cbc86bef1 100644
--- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h
+++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
@@ -28,6 +28,8 @@
 #define EHL_Ax_DEVICE_ID	0x4BB3
 #define TGL_LP_DEVICE_ID	0xA0FC
 #define TGL_H_DEVICE_ID		0x43FC
+#define ADL_S_DEVICE_ID		0x7AF8
+#define ADL_P_DEVICE_ID		0x51FC
 
 #define	REVISION_ID_CHT_A0	0x6
 #define	REVISION_ID_CHT_Ax_SI	0x0
diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index 06081cf9b85a2..a6d5173ac0030 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -39,6 +39,8 @@ static const struct pci_device_id ish_pci_tbl[] = {
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, EHL_Ax_DEVICE_ID)},
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, TGL_LP_DEVICE_ID)},
 	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, TGL_H_DEVICE_ID)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, ADL_S_DEVICE_ID)},
+	{PCI_DEVICE(PCI_VENDOR_ID_INTEL, ADL_P_DEVICE_ID)},
 	{0, }
 };
 MODULE_DEVICE_TABLE(pci, ish_pci_tbl);
-- 
GitLab


From 4b4f6cecca446abcb686c6e6c451d4f1ec1a7497 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 17 May 2021 12:04:30 +0200
Subject: [PATCH 1715/3804] HID: magicmouse: fix NULL-deref on disconnect

Commit 9d7b18668956 ("HID: magicmouse: add support for Apple Magic
Trackpad 2") added a sanity check for an Apple trackpad but returned
success instead of -ENODEV when the check failed. This means that the
remove callback will dereference the never-initialised driver data
pointer when the driver is later unbound (e.g. on USB disconnect).

Reported-by: syzbot+ee6f6e2e68886ca256a8@syzkaller.appspotmail.com
Fixes: 9d7b18668956 ("HID: magicmouse: add support for Apple Magic Trackpad 2")
Cc: stable@vger.kernel.org      # 4.20
Cc: Claudio Mettler <claudio@ponyfleisch.ch>
Cc: Marek Wyborski <marek.wyborski@emwesoft.com>
Cc: Sean O'Brien <seobrien@chromium.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-magicmouse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c
index 097870e43cfe8..8bcaee4ccae03 100644
--- a/drivers/hid/hid-magicmouse.c
+++ b/drivers/hid/hid-magicmouse.c
@@ -693,7 +693,7 @@ static int magicmouse_probe(struct hid_device *hdev,
 	if (id->vendor == USB_VENDOR_ID_APPLE &&
 	    id->product == USB_DEVICE_ID_APPLE_MAGICTRACKPAD2 &&
 	    hdev->type != HID_TYPE_USBMOUSE)
-		return 0;
+		return -ENODEV;
 
 	msc = devm_kzalloc(&hdev->dev, sizeof(*msc), GFP_KERNEL);
 	if (msc == NULL) {
-- 
GitLab


From a94f66aecdaa498d83314cadac466d8b65674b94 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 5 May 2021 23:39:34 +0200
Subject: [PATCH 1716/3804] HID: asus: Cleanup Asus T101HA keyboard-dock
 handling

There is no need to use a quirk and then return -ENODEV from the
asus_probe() function to avoid that hid-asus binds to the hiddev
for the USB-interface for the hid-multitouch touchpad.

The hid-multitouch hiddev has a group of HID_GROUP_MULTITOUCH_WIN_8,
so the same result can be achieved by making the hid_device_id entry
for the dock in the asus_devices[] table only match on HID_GROUP_GENERIC
instead of having it match HID_GROUP_ANY.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-asus.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 60606c11bdaf0..fca8fc78a78a3 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -79,10 +79,9 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad");
 #define QUIRK_T100_KEYBOARD		BIT(6)
 #define QUIRK_T100CHI			BIT(7)
 #define QUIRK_G752_KEYBOARD		BIT(8)
-#define QUIRK_T101HA_DOCK		BIT(9)
-#define QUIRK_T90CHI			BIT(10)
-#define QUIRK_MEDION_E1239T		BIT(11)
-#define QUIRK_ROG_NKEY_KEYBOARD		BIT(12)
+#define QUIRK_T90CHI			BIT(9)
+#define QUIRK_MEDION_E1239T		BIT(10)
+#define QUIRK_ROG_NKEY_KEYBOARD		BIT(11)
 
 #define I2C_KEYBOARD_QUIRKS			(QUIRK_FIX_NOTEBOOK_REPORT | \
 						 QUIRK_NO_INIT_REPORTS | \
@@ -1082,11 +1081,6 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		return ret;
 	}
 
-	/* use hid-multitouch for T101HA touchpad */
-	if (id->driver_data & QUIRK_T101HA_DOCK &&
-	    hdev->collection->usage == HID_GD_MOUSE)
-		return -ENODEV;
-
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
 		hid_err(hdev, "Asus hw start failed: %d\n", ret);
@@ -1240,8 +1234,6 @@ static const struct hid_device_id asus_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
 		USB_DEVICE_ID_ASUSTEK_T100TAF_KEYBOARD),
 	  QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
-		USB_DEVICE_ID_ASUSTEK_T101HA_KEYBOARD), QUIRK_T101HA_DOCK },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_ASUS_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_ASUS_MD_5110) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_ASUS_MD_5112) },
@@ -1249,6 +1241,12 @@ static const struct hid_device_id asus_devices[] = {
 		USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD), QUIRK_T100CHI },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE_MEDION_E1239T),
 		QUIRK_MEDION_E1239T },
+	/*
+	 * Note bind to the HID_GROUP_GENERIC group, so that we only bind to the keyboard
+	 * part, while letting hid-multitouch.c handle the touchpad.
+	 */
+	{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+		USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_T101HA_KEYBOARD) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, asus_devices);
-- 
GitLab


From b16ef427adf31fb4f6522458d37b3fe21d6d03b8 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Thu, 27 May 2021 11:25:48 +0200
Subject: [PATCH 1717/3804] io_uring: fix data race to avoid potential
 NULL-deref

Commit ba5ef6dc8a82 ("io_uring: fortify tctx/io_wq cleanup") introduced
setting tctx->io_wq to NULL a bit earlier. This has caused KCSAN to
detect a data race between accesses to tctx->io_wq:

  write to 0xffff88811d8df330 of 8 bytes by task 3709 on cpu 1:
   io_uring_clean_tctx                  fs/io_uring.c:9042 [inline]
   __io_uring_cancel                    fs/io_uring.c:9136
   io_uring_files_cancel                include/linux/io_uring.h:16 [inline]
   do_exit                              kernel/exit.c:781
   do_group_exit                        kernel/exit.c:923
   get_signal                           kernel/signal.c:2835
   arch_do_signal_or_restart            arch/x86/kernel/signal.c:789
   handle_signal_work                   kernel/entry/common.c:147 [inline]
   exit_to_user_mode_loop               kernel/entry/common.c:171 [inline]
   ...
  read to 0xffff88811d8df330 of 8 bytes by task 6412 on cpu 0:
   io_uring_try_cancel_iowq             fs/io_uring.c:8911 [inline]
   io_uring_try_cancel_requests         fs/io_uring.c:8933
   io_ring_exit_work                    fs/io_uring.c:8736
   process_one_work                     kernel/workqueue.c:2276
   ...

With the config used, KCSAN only reports data races with value changes:
this implies that in the case here we also know that tctx->io_wq was
non-NULL. Therefore, depending on interleaving, we may end up with:

              [CPU 0]                 |        [CPU 1]
  io_uring_try_cancel_iowq()          | io_uring_clean_tctx()
    if (!tctx->io_wq) // false        |   ...
    ...                               |   tctx->io_wq = NULL
    io_wq_cancel_cb(tctx->io_wq, ...) |   ...
      -> NULL-deref                   |

Note: It is likely that thus far we've gotten lucky and the compiler
optimizes the double-read into a single read into a register -- but this
is never guaranteed, and can easily change with a different config!

Fix the data race by restoring the previous behaviour, where both
setting io_wq to NULL and put of the wq are _serialized_ after
concurrent io_uring_try_cancel_iowq() via acquisition of the uring_lock
and removal of the node in io_uring_del_task_file().

Fixes: ba5ef6dc8a82 ("io_uring: fortify tctx/io_wq cleanup")
Suggested-by: Pavel Begunkov <asml.silence@gmail.com>
Reported-by: syzbot+bf2b3d0435b9b728946c@syzkaller.appspotmail.com
Signed-off-by: Marco Elver <elver@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20210527092547.2656514-1-elver@google.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6af8ca0cb01c7..903458afd56c1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9039,11 +9039,16 @@ static void io_uring_clean_tctx(struct io_uring_task *tctx)
 	struct io_tctx_node *node;
 	unsigned long index;
 
-	tctx->io_wq = NULL;
 	xa_for_each(&tctx->xa, index, node)
 		io_uring_del_task_file(index);
-	if (wq)
+	if (wq) {
+		/*
+		 * Must be after io_uring_del_task_file() (removes nodes under
+		 * uring_lock) to avoid race with io_uring_try_cancel_iowq().
+		 */
+		tctx->io_wq = NULL;
 		io_wq_put_and_exit(wq);
+	}
 }
 
 static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
-- 
GitLab


From 0ee74d5a48635c848c20f152d0d488bf84641304 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eb@emlix.com>
Date: Tue, 25 May 2021 15:08:02 +0800
Subject: [PATCH 1718/3804] iommu/vt-d: Fix sysfs leak in alloc_iommu()

iommu_device_sysfs_add() is called before, so is has to be cleaned on subsequent
errors.

Fixes: 39ab9555c2411 ("iommu: Add sysfs bindings for struct iommu_device")
Cc: stable@vger.kernel.org # 4.11.x
Signed-off-by: Rolf Eike Beer <eb@emlix.com>
Acked-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/17411490.HIIP88n32C@mobilepool36.emlix.com
Link: https://lore.kernel.org/r/20210525070802.361755-2-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/dmar.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 1757ac1e1623e..84057cb9596cb 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1142,7 +1142,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 
 		err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
 		if (err)
-			goto err_unmap;
+			goto err_sysfs;
 	}
 
 	drhd->iommu = iommu;
@@ -1150,6 +1150,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 
 	return 0;
 
+err_sysfs:
+	iommu_device_sysfs_remove(&iommu->iommu);
 err_unmap:
 	unmap_iommu(iommu);
 error_free_seq_id:
-- 
GitLab


From 991c2c5980fb97ae6194f7c46b44f9446629eb4e Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 26 May 2021 19:57:42 -0700
Subject: [PATCH 1719/3804] xfs: btree format inode forks can have zero extents

xfs/538 is assert failing with this trace when testing with
directory block sizes of 64kB:

XFS: Assertion failed: !xfs_need_iread_extents(ifp), file: fs/xfs/libxfs/xfs_bmap.c, line: 608
....
Call Trace:
 xfs_bmap_btree_to_extents+0x2a9/0x470
 ? kmem_cache_alloc+0xe7/0x220
 __xfs_bunmapi+0x4ca/0xdf0
 xfs_bunmapi+0x1a/0x30
 xfs_dir2_shrink_inode+0x71/0x210
 xfs_dir2_block_to_sf+0x2ae/0x410
 xfs_dir2_block_removename+0x21a/0x280
 xfs_dir_removename+0x195/0x1d0
 xfs_remove+0x244/0x460
 xfs_vn_unlink+0x53/0xa0
 ? selinux_inode_unlink+0x13/0x20
 vfs_unlink+0x117/0x220
 do_unlinkat+0x1a2/0x2d0
 __x64_sys_unlink+0x42/0x60
 do_syscall_64+0x3a/0x70
 entry_SYSCALL_64_after_hwframe+0x44/0xae

This is a check to ensure that the extents have been read into
memory before we are doing a ifork btree manipulation. This assert
is bogus in the above case.

We have a fragmented directory block that has more extents in it
than can fit in extent format, so the inode data fork is in btree
format. xfs_dir2_shrink_inode() asks to remove all remaining 16
filesystem blocks from the inode so it can convert to short form,
and __xfs_bunmapi() removes all the extents. We now have a data fork
in btree format but have zero extents in the fork. This incorrectly
trips the xfs_need_iread_extents() assert because it assumes that an
empty extent btree means the extent tree has not been read into
memory yet. This is clearly not the case with xfs_bunmapi(), as it
has an explicit call to xfs_iread_extents() in it to pull the
extents into memory before it starts unmapping.

Also, the assert directly after this bogus one is:

	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);

Which covers the context in which it is legal to call
xfs_bmap_btree_to_extents just fine. Hence we should just remove the
bogus assert as it is clearly wrong and causes a regression.

The returns the test behaviour to the pre-existing assert failure in
xfs_dir2_shrink_inode() that indicates xfs_bunmapi() has failed to
remove all the extents in the range it was asked to unmap.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_bmap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 7e3b9b01431e5..3f8b6da092611 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -605,7 +605,6 @@ xfs_bmap_btree_to_extents(
 
 	ASSERT(cur);
 	ASSERT(whichfork != XFS_COW_FORK);
-	ASSERT(!xfs_need_iread_extents(ifp));
 	ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE);
 	ASSERT(be16_to_cpu(rblock->bb_level) == 1);
 	ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
-- 
GitLab


From 0fe0bbe00a6fb77adf75085b7d06b71a830dd6f2 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 27 May 2021 08:11:01 -0700
Subject: [PATCH 1720/3804] xfs: bunmapi has unnecessary AG lock ordering
 issues

large directory block size operations are assert failing because
xfs_bunmapi() is not completely removing fragmented directory blocks
like so:

XFS: Assertion failed: done, file: fs/xfs/libxfs/xfs_dir2.c, line: 677
....
Call Trace:
 xfs_dir2_shrink_inode+0x1a8/0x210
 xfs_dir2_block_to_sf+0x2ae/0x410
 xfs_dir2_block_removename+0x21a/0x280
 xfs_dir_removename+0x195/0x1d0
 xfs_rename+0xb79/0xc50
 ? avc_has_perm+0x8d/0x1a0
 ? avc_has_perm_noaudit+0x9a/0x120
 xfs_vn_rename+0xdb/0x150
 vfs_rename+0x719/0xb50
 ? __lookup_hash+0x6a/0xa0
 do_renameat2+0x413/0x5e0
 __x64_sys_rename+0x45/0x50
 do_syscall_64+0x3a/0x70
 entry_SYSCALL_64_after_hwframe+0x44/0xae

We are aborting the bunmapi() pass because of this specific chunk of
code:

                /*
                 * Make sure we don't touch multiple AGF headers out of order
                 * in a single transaction, as that could cause AB-BA deadlocks.
                 */
                if (!wasdel && !isrt) {
                        agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
                        if (prev_agno != NULLAGNUMBER && prev_agno > agno)
                                break;
                        prev_agno = agno;
                }

This is designed to prevent deadlocks in AGF locking when freeing
multiple extents by ensuring that we only ever lock in increasing
AG number order. Unfortunately, this also violates the "bunmapi will
always succeed" semantic that some high level callers depend on,
such as xfs_dir2_shrink_inode(), xfs_da_shrink_inode() and
xfs_inactive_symlink_rmt().

This AG lock ordering was introduced back in 2017 to fix deadlocks
triggered by generic/299 as reported here:

https://lore.kernel.org/linux-xfs/800468eb-3ded-9166-20a4-047de8018582@gmail.com/

This codebase is old enough that it was before we were defering all
AG based extent freeing from within xfs_bunmapi(). THat is, we never
actually lock AGs in xfs_bunmapi() any more - every non-rt based
extent free is added to the defer ops list, as is all BMBT block
freeing. And RT extents are not RT based, so there's no lock
ordering issues associated with them.

Hence this AGF lock ordering code is both broken and dead. Let's
just remove it so that the large directory block code works reliably
again.

Tested against xfs/538 and generic/299 which is the original test
that exposed the deadlocks that this code fixed.

Fixes: 5b094d6dac04 ("xfs: fix multi-AG deadlock in xfs_bunmapi")
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_bmap.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 3f8b6da092611..a3e0e6f672d63 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5349,7 +5349,6 @@ __xfs_bunmapi(
 	xfs_fsblock_t		sum;
 	xfs_filblks_t		len = *rlen;	/* length to unmap in file */
 	xfs_fileoff_t		max_len;
-	xfs_agnumber_t		prev_agno = NULLAGNUMBER, agno;
 	xfs_fileoff_t		end;
 	struct xfs_iext_cursor	icur;
 	bool			done = false;
@@ -5441,16 +5440,6 @@ __xfs_bunmapi(
 		del = got;
 		wasdel = isnullstartblock(del.br_startblock);
 
-		/*
-		 * Make sure we don't touch multiple AGF headers out of order
-		 * in a single transaction, as that could cause AB-BA deadlocks.
-		 */
-		if (!wasdel && !isrt) {
-			agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
-			if (prev_agno != NULLAGNUMBER && prev_agno > agno)
-				break;
-			prev_agno = agno;
-		}
 		if (got.br_startoff < start) {
 			del.br_startoff = start;
 			del.br_blockcount -= start - got.br_startoff;
-- 
GitLab


From 6308c44ed6eeadf65c0a7ba68d609773ed860fbb Mon Sep 17 00:00:00 2001
From: Jack Yu <jack.yu@realtek.com>
Date: Thu, 27 May 2021 01:06:51 +0000
Subject: [PATCH 1721/3804] ASoC: rt5659: Fix the lost powers for the HDA
 header

The power of "LDO2", "MICBIAS1" and "Mic Det Power" were powered off after
the DAPM widgets were added, and these powers were set by the JD settings
"RT5659_JD_HDA_HEADER" in the probe function. In the codec probe function,
these powers were ignored to prevent them controlled by DAPM.

Signed-off-by: Oder Chiou <oder_chiou@realtek.com>
Signed-off-by: Jack Yu <jack.yu@realtek.com>
Message-Id: <15fced51977b458798ca4eebf03dafb9@realtek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5659.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/sound/soc/codecs/rt5659.c b/sound/soc/codecs/rt5659.c
index 87f5709fe2cca..4a50b169fe032 100644
--- a/sound/soc/codecs/rt5659.c
+++ b/sound/soc/codecs/rt5659.c
@@ -2433,13 +2433,18 @@ static int set_dmic_power(struct snd_soc_dapm_widget *w,
 	return 0;
 }
 
-static const struct snd_soc_dapm_widget rt5659_dapm_widgets[] = {
+static const struct snd_soc_dapm_widget rt5659_particular_dapm_widgets[] = {
 	SND_SOC_DAPM_SUPPLY("LDO2", RT5659_PWR_ANLG_3, RT5659_PWR_LDO2_BIT, 0,
 		NULL, 0),
-	SND_SOC_DAPM_SUPPLY("PLL", RT5659_PWR_ANLG_3, RT5659_PWR_PLL_BIT, 0,
-		NULL, 0),
+	SND_SOC_DAPM_SUPPLY("MICBIAS1", RT5659_PWR_ANLG_2, RT5659_PWR_MB1_BIT,
+		0, NULL, 0),
 	SND_SOC_DAPM_SUPPLY("Mic Det Power", RT5659_PWR_VOL,
 		RT5659_PWR_MIC_DET_BIT, 0, NULL, 0),
+};
+
+static const struct snd_soc_dapm_widget rt5659_dapm_widgets[] = {
+	SND_SOC_DAPM_SUPPLY("PLL", RT5659_PWR_ANLG_3, RT5659_PWR_PLL_BIT, 0,
+		NULL, 0),
 	SND_SOC_DAPM_SUPPLY("Mono Vref", RT5659_PWR_ANLG_1,
 		RT5659_PWR_VREF3_BIT, 0, NULL, 0),
 
@@ -2464,8 +2469,6 @@ static const struct snd_soc_dapm_widget rt5659_dapm_widgets[] = {
 		RT5659_ADC_MONO_R_ASRC_SFT, 0, NULL, 0),
 
 	/* Input Side */
-	SND_SOC_DAPM_SUPPLY("MICBIAS1", RT5659_PWR_ANLG_2, RT5659_PWR_MB1_BIT,
-		0, NULL, 0),
 	SND_SOC_DAPM_SUPPLY("MICBIAS2", RT5659_PWR_ANLG_2, RT5659_PWR_MB2_BIT,
 		0, NULL, 0),
 	SND_SOC_DAPM_SUPPLY("MICBIAS3", RT5659_PWR_ANLG_2, RT5659_PWR_MB3_BIT,
@@ -3660,10 +3663,23 @@ static int rt5659_set_bias_level(struct snd_soc_component *component,
 
 static int rt5659_probe(struct snd_soc_component *component)
 {
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
 	struct rt5659_priv *rt5659 = snd_soc_component_get_drvdata(component);
 
 	rt5659->component = component;
 
+	switch (rt5659->pdata.jd_src) {
+	case RT5659_JD_HDA_HEADER:
+		break;
+
+	default:
+		snd_soc_dapm_new_controls(dapm,
+			rt5659_particular_dapm_widgets,
+			ARRAY_SIZE(rt5659_particular_dapm_widgets));
+		break;
+	}
+
 	return 0;
 }
 
-- 
GitLab


From f7ebe6b76940f873645ff110192b08e64334a112 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Wed, 26 May 2021 00:25:39 +0900
Subject: [PATCH 1722/3804] docs: Activate exCJK only in CJK chapters

Activating xeCJK in English and Italian-translation documents
results in sub-optimal typesetting with wide-looking apostrophes
and quotation marks.

The xeCJK package provides macros for enabling and disabling its
effect in the middle of a document, namely \makexeCJKactive and
\makexeCJKinactive.

So the goal of this change is to activate xeCJK in the relevant
chapters in translations.

To do this:

    o Define custom macros in the preamble depending on the
      availability of the "Noto Sans CJK" font so that those
      macros can be used regardless of the use of xeCJK package.

    o Patch \sphinxtableofcontents so that xeCJK is inactivated
      after table of contents.

    o Embed those custom macros in each language's index.rst file
      as a ".. raw:: latex" construct.

Note: A CJK chapter needs \kerneldocCJKon in front of its chapter
heading, while a non-CJK chapter should have \kerneldocCJKoff
below its chapter heading.

This is to make sure the CJK font is available to CJK chapter's
heading and ending page's footer.

Tested against Sphinx versions 2.4.4 and 4.0.2.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Tested-by: Wu XiangCheng <bobwxc@email.cn>
Reviewed-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/lkml/2061da0a-6ab1-35f3-99c1-dbc415444f37@gmail.com
Link: https://lore.kernel.org/r/83208ddc-5de9-b283-3fd6-92c635348ca0@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py                      | 13 +++++++++++++
 Documentation/translations/index.rst       |  4 ++++
 Documentation/translations/it_IT/index.rst |  4 ++++
 Documentation/translations/ja_JP/index.rst |  5 +++--
 Documentation/translations/ko_KR/index.rst |  5 +++--
 Documentation/translations/zh_CN/index.rst |  1 +
 6 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index 879e86dbea667..25aa00c707b02 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -371,6 +371,19 @@ if cjk_cmd.find("Noto Sans CJK SC") >= 0:
 	% This is needed for translations
         \\usepackage{xeCJK}
         \\setCJKmainfont{Noto Sans CJK SC}
+	% Define custom macros to on/off CJK
+	\\newcommand{\\kerneldocCJKon}{\\makexeCJKactive}
+	\\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive}
+	% To customize \sphinxtableofcontents
+	\\usepackage{etoolbox}
+	% Inactivate CJK after tableofcontents
+	\\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{}
+     '''
+else:
+    latex_elements['preamble']  += '''
+	% Custom macros to on/off CJK (Dummy)
+	\\newcommand{\\kerneldocCJKon}{}
+	\\newcommand{\\kerneldocCJKoff}{}
      '''
 
 # Fix reference escape troubles with Sphinx 1.4.x
diff --git a/Documentation/translations/index.rst b/Documentation/translations/index.rst
index e446e5ed00a6a..556b050884fce 100644
--- a/Documentation/translations/index.rst
+++ b/Documentation/translations/index.rst
@@ -18,6 +18,10 @@ Translations
 Disclaimer
 ----------
 
+.. raw:: latex
+
+	\kerneldocCJKoff
+
 Translation's purpose is to ease reading and understanding in languages other
 than English. Its aim is to help people who do not understand English or have
 doubts about its interpretation. Additionally, some people prefer to read
diff --git a/Documentation/translations/it_IT/index.rst b/Documentation/translations/it_IT/index.rst
index bb8fa7346939b..e80a3097aa578 100644
--- a/Documentation/translations/it_IT/index.rst
+++ b/Documentation/translations/it_IT/index.rst
@@ -4,6 +4,10 @@
 Traduzione italiana
 ===================
 
+.. raw:: latex
+
+	\kerneldocCJKoff
+
 :manutentore: Federico Vaga <federico.vaga@vaga.pv.it>
 
 .. _it_disclaimer:
diff --git a/Documentation/translations/ja_JP/index.rst b/Documentation/translations/ja_JP/index.rst
index 2f91b895e3c27..f94ba62d41c36 100644
--- a/Documentation/translations/ja_JP/index.rst
+++ b/Documentation/translations/ja_JP/index.rst
@@ -1,7 +1,8 @@
 .. raw:: latex
 
-        \renewcommand\thesection*
-        \renewcommand\thesubsection*
+	\renewcommand\thesection*
+	\renewcommand\thesubsection*
+	\kerneldocCJKon
 
 Japanese translations
 =====================
diff --git a/Documentation/translations/ko_KR/index.rst b/Documentation/translations/ko_KR/index.rst
index b9e27d20b039b..6ae258118bdf2 100644
--- a/Documentation/translations/ko_KR/index.rst
+++ b/Documentation/translations/ko_KR/index.rst
@@ -1,7 +1,8 @@
 .. raw:: latex
 
-        \renewcommand\thesection*
-        \renewcommand\thesubsection*
+	\renewcommand\thesection*
+	\renewcommand\thesubsection*
+	\kerneldocCJKon
 
 한국어 번역
 ===========
diff --git a/Documentation/translations/zh_CN/index.rst b/Documentation/translations/zh_CN/index.rst
index a736057da41f3..1f953d3439a52 100644
--- a/Documentation/translations/zh_CN/index.rst
+++ b/Documentation/translations/zh_CN/index.rst
@@ -4,6 +4,7 @@
 
 	\renewcommand\thesection*
 	\renewcommand\thesubsection*
+	\kerneldocCJKon
 
 .. _linux_doc_zh:
 
-- 
GitLab


From b77e4c4e655b455c4aba196838d1102c0e3414a4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 25 May 2021 14:23:52 +0200
Subject: [PATCH 1723/3804] iio: ABI: sysfs-bus-iio: fix a typo

Descrption -> Description

This causes some errors when parsed via scripts/get_abi.pl.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/fa90a2deebac80da42b1ad4cf570c4ace436577d.1621944866.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/testing/sysfs-bus-iio | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 267973541e721..433fe0ab74bed 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -786,7 +786,7 @@ What:		/sys/.../events/in_capacitanceY_adaptive_thresh_rising_en
 What:		/sys/.../events/in_capacitanceY_adaptive_thresh_falling_en
 KernelVersion:	5.13
 Contact:	linux-iio@vger.kernel.org
-Descrption:
+Description:
 		Adaptive thresholds are similar to normal fixed thresholds
 		but the value is expressed as an offset from a value which
 		provides a low frequency approximation of the channel itself.
@@ -798,7 +798,7 @@ What:		/sys/.../in_capacitanceY_adaptive_thresh_rising_timeout
 What:		/sys/.../in_capacitanceY_adaptive_thresh_falling_timeout
 KernelVersion:	5.11
 Contact:	linux-iio@vger.kernel.org
-Descrption:
+Description:
 		When adaptive thresholds are used, the tracking signal
 		may adjust too slowly to step changes in the raw signal.
 		*_timeout (in seconds) specifies a time for which the
-- 
GitLab


From 1e03fe240512621605ec47f93dc29994026a2984 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 25 May 2021 14:23:53 +0200
Subject: [PATCH 1724/3804] iio: ABI: sysfs-bus-iio: avoid a warning when doc
 is built

The description of those vars produce this warning:
  Documentation/ABI/testing/sysfs-bus-iio:799: WARNING: Inline emphasis start-string without end-string.

Due to an asterisk, which is the markup for emphasis. One possible
fix would be to use ``*_timeout`` to avoid it, but looking at
the descriptions of other fields in this file, a common pattern
is to refer to "these" when talking about the API calls that
are described.

So, change the text in order to preserve the meaning while
avoiding the need of using an asterisk there.

Reported-by: Jonathan Corbet <corbet@lwn.net>
Reported-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/dbf0d94f85217f103d77dc8389c8db272f5702d2.1621944866.git.mchehab+huawei@kernel.org
[jc fixed specifiy->specify]
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/ABI/testing/sysfs-bus-iio | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 433fe0ab74bed..6f98b6a9b7854 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -801,7 +801,7 @@ Contact:	linux-iio@vger.kernel.org
 Description:
 		When adaptive thresholds are used, the tracking signal
 		may adjust too slowly to step changes in the raw signal.
-		*_timeout (in seconds) specifies a time for which the
+		Thus these specify the time in seconds for which the
 		difference between the slow tracking signal and the raw
 		signal is allowed to remain out-of-range before a reset
 		event occurs in which the tracking signal is made equal
-- 
GitLab


From 544ef682c60484151292eb04183e44a9dd6bb0de Mon Sep 17 00:00:00 2001
From: Barry Song <song.bao.hua@hisilicon.com>
Date: Mon, 24 May 2021 17:17:15 +1200
Subject: [PATCH 1725/3804] docs: kernel-parameters: mark numa=off is supported
 by a bundle of architectures

risc-v and arm64 support numa=off by common arch_numa_init()
in drivers/base/arch_numa.c. x86, ppc, mips, sparc support it
by arch-level early_param.
numa=off is widely used in linux distributions. it is better
to document it.

Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Link: https://lore.kernel.org/r/20210524051715.13604-1-song.bao.hua@hisilicon.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kernel-parameters.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb89dbdedc463..a388fbdaa2ecc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3513,6 +3513,9 @@
 
 	nr_uarts=	[SERIAL] maximum number of UARTs to be registered.
 
+	numa=off 	[KNL, ARM64, PPC, RISCV, SPARC, X86] Disable NUMA, Only
+			set up a single NUMA node spanning all memory.
+
 	numa_balancing=	[KNL,ARM64,PPC,RISCV,S390,X86] Enable or disable automatic
 			NUMA balancing.
 			Allowed values are enable and disable
-- 
GitLab


From 811c3c4723cc2309654c58e8615c775d41ac53ef Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Mon, 24 May 2021 10:40:16 +0800
Subject: [PATCH 1726/3804] docs/zh_CN:add core-api refcount-vs-atomic.rst
 translation.

Translate Documentation/core-api/refcount-vs-atomic.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/cbdd1d8b23b8ce6dff0a98a0d89b78673365aa28.1621823299.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   3 +-
 .../zh_CN/core-api/refcount-vs-atomic.rst     | 154 ++++++++++++++++++
 2 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/refcount-vs-atomic.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index a1dd792e46f76..90c9a72a4b0e0 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -65,10 +65,11 @@ Linux如何让一切同时发生。 详情请参阅
    :maxdepth: 1
 
    irq/index
+   refcount-vs-atomic
+
 
 Todolist:
 
-   refcount-vs-atomic
    local_ops
    padata
    ../RCU/index
diff --git a/Documentation/translations/zh_CN/core-api/refcount-vs-atomic.rst b/Documentation/translations/zh_CN/core-api/refcount-vs-atomic.rst
new file mode 100644
index 0000000000000..ea834e38d2f67
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/refcount-vs-atomic.rst
@@ -0,0 +1,154 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/refcount-vs-atomic.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_refcount-vs-atomic:
+
+
+=======================================
+与atomic_t相比，refcount_t的API是这样的
+=======================================
+
+.. contents:: :local:
+
+简介
+====
+
+refcount_t API的目标是为实现对象的引用计数器提供一个最小的API。虽然来自
+lib/refcount.c的独立于架构的通用实现在下面使用了原子操作，但一些 ``refcount_*()``
+和 ``atomic_*()`` 函数在内存顺序保证方面有很多不同。本文档概述了这些差异，并
+提供了相应的例子，以帮助开发者根据这些内存顺序保证的变化来验证他们的代码。
+
+本文档中使用的术语尽量遵循tools/memory-model/Documentation/explanation.txt
+中定义的正式LKMM。
+
+memory-barriers.txt和atomic_t.txt提供了更多关于内存顺序的背景，包括通用的
+和针对原子操作的。
+
+内存顺序的相关类型
+==================
+
+.. note:: 下面的部分只涵盖了本文使用的与原子操作和引用计数器有关的一些内存顺
+   序类型。如果想了解更广泛的情况，请查阅memory-barriers.txt文件。
+
+在没有任何内存顺序保证的情况下（即完全无序），atomics和refcounters只提供原
+子性和程序顺序（program order, po）关系（在同一个CPU上）。它保证每个
+``atomic_* ()`` 和 ``refcount_*()`` 操作都是原子性的，指令在单个CPU上按程序
+顺序执行。这是用READ_ONCE()/WRITE_ONCE()和比较并交换原语实现的。
+
+强（完全）内存顺序保证在同一CPU上的所有较早加载和存储的指令（所有程序顺序较早
+[po-earlier]指令）在执行任何程序顺序较后指令（po-later）之前完成。它还保证
+同一CPU上储存的程序优先较早的指令和来自其他CPU传播的指令必须在该CPU执行任何
+程序顺序较后指令之前传播到其他CPU（A-累积属性）。这是用smp_mb()实现的。
+
+RELEASE内存顺序保证了在同一CPU上所有较早加载和存储的指令（所有程序顺序较早
+指令）在此操作前完成。它还保证同一CPU上储存的程序优先较早的指令和来自其他CPU
+传播的指令必须在释放（release）操作之前传播到所有其他CPU（A-累积属性）。这是用
+smp_store_release()实现的。
+
+ACQUIRE内存顺序保证了同一CPU上的所有后加载和存储的指令（所有程序顺序较后
+指令）在获取（acquire）操作之后完成。它还保证在获取操作执行后，同一CPU上
+储存的所有程序顺序较后指令必须传播到所有其他CPU。这是用
+smp_acquire__after_ctrl_dep()实现的。
+
+对Refcounters的控制依赖（取决于成功）保证了如果一个对象的引用被成功获得（引用计数
+器的增量或增加行为发生了，函数返回true），那么进一步的存储是针对这个操作的命令。对存
+储的控制依赖没有使用任何明确的屏障来实现，而是依赖于CPU不对存储进行猜测。这只是
+一个单一的CPU关系，对其他CPU不提供任何保证。
+
+
+函数的比较
+==========
+
+情况1） - 非 “读/修改/写”（RMW）操作
+------------------------------------
+
+函数变化:
+
+ * atomic_set() --> refcount_set()
+ * atomic_read() --> refcount_read()
+
+内存顺序保证变化:
+
+ * none (两者都是完全无序的)
+
+
+情况2） - 基于增量的操作，不返回任何值
+--------------------------------------
+
+函数变化:
+
+ * atomic_inc() --> refcount_inc()
+ * atomic_add() --> refcount_add()
+
+内存顺序保证变化:
+
+ * none (两者都是完全无序的)
+
+情况3） - 基于递减的RMW操作，没有返回值
+---------------------------------------
+
+函数变化:
+
+ * atomic_dec() --> refcount_dec()
+
+内存顺序保证变化:
+
+ * 完全无序的 --> RELEASE顺序
+
+
+情况4） - 基于增量的RMW操作，返回一个值
+---------------------------------------
+
+函数变化:
+
+ * atomic_inc_not_zero() --> refcount_inc_not_zero()
+ * 无原子性对应函数 --> refcount_add_not_zero()
+
+内存顺序保证变化:
+
+ * 完全有序的 --> 控制依赖于存储的成功
+
+.. note:: 此处 **假设** 了，必要的顺序是作为获得对象指针的结果而提供的。
+
+
+情况 5） - 基于Dec/Sub递减的通用RMW操作，返回一个值
+---------------------------------------------------
+
+函数变化:
+
+ * atomic_dec_and_test() --> refcount_dec_and_test()
+ * atomic_sub_and_test() --> refcount_sub_and_test()
+
+内存顺序保证变化:
+
+ * 完全有序的 --> RELEASE顺序 + 成功后ACQUIRE顺序
+
+
+情况6）其他基于递减的RMW操作，返回一个值
+----------------------------------------
+
+函数变化:
+
+ * 无原子性对应函数 --> refcount_dec_if_one()
+ * ``atomic_add_unless(&var, -1, 1)`` --> ``refcount_dec_not_one(&var)``
+
+内存顺序保证变化:
+
+ * 完全有序的 --> RELEASE顺序 + 控制依赖
+
+.. note:: atomic_add_unless()只在执行成功时提供完整的顺序。
+
+
+情况7）--基于锁的RMW
+--------------------
+
+函数变化:
+
+ * atomic_dec_and_lock() --> refcount_dec_and_lock()
+ * atomic_dec_and_mutex_lock() --> refcount_dec_and_mutex_lock()
+
+内存顺序保证变化:
+
+ * 完全有序 --> RELEASE顺序 + 控制依赖 + 持有
-- 
GitLab


From 8de8fe4f5db6b6bdaf23977f4d165f8c4e94f4ce Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Mon, 24 May 2021 10:40:17 +0800
Subject: [PATCH 1727/3804] docs/zh_CN: add core api local_ops.rst translation

Translate Documentation/core-api/local_ops.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/11da5738679fbab9e875f434745d16db1f167f90.1621823299.git.siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   3 +-
 .../translations/zh_CN/core-api/local_ops.rst | 194 ++++++++++++++++++
 2 files changed, 195 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/local_ops.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index 90c9a72a4b0e0..4b7efb7edb183 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -66,11 +66,10 @@ Linux如何让一切同时发生。 详情请参阅
 
    irq/index
    refcount-vs-atomic
-
+   local_ops
 
 Todolist:
 
-   local_ops
    padata
    ../RCU/index
 
diff --git a/Documentation/translations/zh_CN/core-api/local_ops.rst b/Documentation/translations/zh_CN/core-api/local_ops.rst
new file mode 100644
index 0000000000000..ee67379b68694
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/local_ops.rst
@@ -0,0 +1,194 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/local_ops.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_local_ops:
+
+
+========================
+本地原子操作的语义和行为
+========================
+
+:作者: Mathieu Desnoyers
+
+
+本文解释了本地原子操作的目的，如何为任何给定的架构实现这些操作，并说明了
+如何正确使用这些操作。它还强调了在内存写入顺序很重要的情况下，跨CPU读取
+这些本地变量时必须采取的预防措施。
+
+.. note::
+
+    注意，基于 ``local_t`` 的操作不建议用于一般内核操作。请使用 ``this_cpu``
+    操作来代替使用，除非真的有特殊目的。大多数内核中使用的 ``local_t`` 已
+    经被 ``this_cpu`` 操作所取代。 ``this_cpu`` 操作在一条指令中结合了重
+    定位和类似 ``local_t`` 的语义，产生了更紧凑和更快的执行代码。
+
+
+本地原子操作的目的
+==================
+
+本地原子操作的目的是提供快速和高度可重入的每CPU计数器。它们通过移除LOCK前
+缀和通常需要在CPU间同步的内存屏障，将标准原子操作的性能成本降到最低。
+
+在许多情况下，拥有快速的每CPU原子计数器是很有吸引力的：它不需要禁用中断来保护中
+断处理程序，它允许在NMI（Non Maskable Interrupt）处理程序中使用连贯的计数器。
+它对追踪目的和各种性能监测计数器特别有用。
+
+本地原子操作只保证在拥有数据的CPU上的变量修改的原子性。因此，必须注意确保只
+有一个CPU写到 ``local_t`` 的数据。这是通过使用每CPU的数据来实现的，并确
+保我们在一个抢占式安全上下文中修改它。然而，从任何一个CPU读取 ``local_t``
+数据都是允许的：这样它就会显得与所有者CPU的其他内存写入顺序不一致。
+
+
+针对特定架构的实现
+==================
+
+这可以通过稍微修改标准的原子操作来实现：只有它们的UP变体必须被保留。这通常
+意味着删除LOCK前缀（在i386和x86_64上）和任何SMP同步屏障。如果架构在SMP和
+UP之间没有不同的行为，在你的架构的 ``local.h`` 中包括 ``asm-generic/local.h``
+就足够了。
+
+通过在一个结构体中嵌入一个 ``atomic_long_t`` ， ``local_t`` 类型被定义为
+一个不透明的 ``signed long`` 。这样做的目的是为了使从这个类型到
+``long`` 的转换失败。该定义看起来像::
+
+    typedef struct { atomic_long_t a; } local_t;
+
+
+使用本地原子操作时应遵循的规则
+==============================
+
+* 被本地操作触及的变量必须是每cpu的变量。
+
+* *只有* 这些变量的CPU所有者才可以写入这些变量。
+
+* 这个CPU可以从任何上下文（进程、中断、软中断、nmi...）中使用本地操作来更新
+  它的local_t变量。
+
+* 当在进程上下文中使用本地操作时，必须禁用抢占（或中断），以确保进程在获得每
+  CPU变量和进行实际的本地操作之间不会被迁移到不同的CPU。
+
+* 当在中断上下文中使用本地操作时，在主线内核上不需要特别注意，因为它们将在局
+  部CPU上运行，并且已经禁用了抢占。然而，我建议无论如何都要明确地禁用抢占，
+  以确保它在-rt内核上仍能正确工作。
+
+* 读取本地cpu变量将提供该变量的当前拷贝。
+
+* 对这些变量的读取可以从任何CPU进行，因为对 “ ``long`` ”，对齐的变量的更新
+  总是原子的。由于写入程序的CPU没有进行内存同步，所以在读取 *其他* cpu的变
+  量时，可以读取该变量的过期副本。
+
+
+如何使用本地原子操作
+====================
+
+::
+
+    #include <linux/percpu.h>
+    #include <asm/local.h>
+
+    static DEFINE_PER_CPU(local_t, counters) = LOCAL_INIT(0);
+
+
+计数器
+======
+
+计数是在一个signed long的所有位上进行的。
+
+在可抢占的上下文中，围绕本地原子操作使用 ``get_cpu_var()`` 和
+``put_cpu_var()`` ：它确保在对每个cpu变量进行写访问时，抢占被禁用。比如
+说::
+
+    local_inc(&get_cpu_var(counters));
+    put_cpu_var(counters);
+
+如果你已经在一个抢占安全上下文中，你可以使用 ``this_cpu_ptr()`` 代替::
+
+    local_inc(this_cpu_ptr(&counters));
+
+
+
+读取计数器
+==========
+
+那些本地计数器可以从外部的CPU中读取，以求得计数的总和。请注意，local_read
+所看到的跨CPU的数据必须被认为是相对于拥有该数据的CPU上发生的其他内存写入来
+说不符合顺序的::
+
+    long sum = 0;
+    for_each_online_cpu(cpu)
+            sum += local_read(&per_cpu(counters, cpu));
+
+如果你想使用远程local_read来同步CPU之间对资源的访问，必须在写入者和读取者
+的CPU上分别使用显式的 ``smp_wmb()`` 和 ``smp_rmb()`` 内存屏障。如果你使
+用 ``local_t`` 变量作为写在缓冲区中的字节的计数器，就会出现这种情况：在缓
+冲区写和计数器增量之间应该有一个 ``smp_wmb()`` ，在计数器读和缓冲区读之间
+也应有一个 ``smp_rmb()`` 。
+
+下面是一个使用 ``local.h`` 实现每个cpu基本计数器的示例模块::
+
+    /* test-local.c
+     *
+     * Sample module for local.h usage.
+     */
+
+
+    #include <asm/local.h>
+    #include <linux/module.h>
+    #include <linux/timer.h>
+
+    static DEFINE_PER_CPU(local_t, counters) = LOCAL_INIT(0);
+
+    static struct timer_list test_timer;
+
+    /* IPI called on each CPU. */
+    static void test_each(void *info)
+    {
+            /* Increment the counter from a non preemptible context */
+            printk("Increment on cpu %d\n", smp_processor_id());
+            local_inc(this_cpu_ptr(&counters));
+
+            /* This is what incrementing the variable would look like within a
+             * preemptible context (it disables preemption) :
+             *
+             * local_inc(&get_cpu_var(counters));
+             * put_cpu_var(counters);
+             */
+    }
+
+    static void do_test_timer(unsigned long data)
+    {
+            int cpu;
+
+            /* Increment the counters */
+            on_each_cpu(test_each, NULL, 1);
+            /* Read all the counters */
+            printk("Counters read from CPU %d\n", smp_processor_id());
+            for_each_online_cpu(cpu) {
+                    printk("Read : CPU %d, count %ld\n", cpu,
+                            local_read(&per_cpu(counters, cpu)));
+            }
+            mod_timer(&test_timer, jiffies + 1000);
+    }
+
+    static int __init test_init(void)
+    {
+            /* initialize the timer that will increment the counter */
+            timer_setup(&test_timer, do_test_timer, 0);
+            mod_timer(&test_timer, jiffies + 1);
+
+            return 0;
+    }
+
+    static void __exit test_exit(void)
+    {
+            del_timer_sync(&test_timer);
+    }
+
+    module_init(test_init);
+    module_exit(test_exit);
+
+    MODULE_LICENSE("GPL");
+    MODULE_AUTHOR("Mathieu Desnoyers");
+    MODULE_DESCRIPTION("Local Atomic Ops");
-- 
GitLab


From 6a137caec23aeb9e036cdfd8a46dd8a366460e5d Mon Sep 17 00:00:00 2001
From: Lin Ma <linma@zju.edu.cn>
Date: Tue, 25 May 2021 14:39:02 +0200
Subject: [PATCH 1728/3804] Bluetooth: fix the erroneous flush_work() order

In the cleanup routine for failed initialization of HCI device,
the flush_work(&hdev->rx_work) need to be finished before the
flush_work(&hdev->cmd_work). Otherwise, the hci_rx_work() can
possibly invoke new cmd_work and cause a bug, like double free,
in late processings.

This was assigned CVE-2021-3564.

This patch reorder the flush_work() to fix this bug.

Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Cc: Luiz Augusto von Dentz <luiz.dentz@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: linux-bluetooth@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Lin Ma <linma@zju.edu.cn>
Signed-off-by: Hao Xiong <mart1n@zju.edu.cn>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index fd12f1652bdf4..7d71d104fdfda 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1610,8 +1610,13 @@ setup_failed:
 	} else {
 		/* Init failed, cleanup */
 		flush_work(&hdev->tx_work);
-		flush_work(&hdev->cmd_work);
+
+		/* Since hci_rx_work() is possible to awake new cmd_work
+		 * it should be flushed first to avoid unexpected call of
+		 * hci_cmd_work()
+		 */
 		flush_work(&hdev->rx_work);
+		flush_work(&hdev->cmd_work);
 
 		skb_queue_purge(&hdev->cmd_q);
 		skb_queue_purge(&hdev->rx_q);
-- 
GitLab


From c8237760cc56c79e04a6a47696ef8bb0aab8c77a Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Mon, 24 May 2021 10:23:08 +0800
Subject: [PATCH 1729/3804] docs: zh_CN: update Chinese translations

Two new commits were added to the original document:

commit ddba35031db2ea89facc91c745e5ad55ba2e0e7f
commit 20bc8c1e972f29afcac85e524e430c11a6df5f58

translate them into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/20210524022308.1216098-1-siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../zh_CN/core-api/printk-formats.rst           | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/Documentation/translations/zh_CN/core-api/printk-formats.rst b/Documentation/translations/zh_CN/core-api/printk-formats.rst
index 624a090e6ee5a..a680c8f164c3a 100644
--- a/Documentation/translations/zh_CN/core-api/printk-formats.rst
+++ b/Documentation/translations/zh_CN/core-api/printk-formats.rst
@@ -122,6 +122,17 @@ seq_printf()，而不是printk()）由用户空间进程读取，使用下面描
 ``B`` 占位符的结果是带有偏移量的符号名，在打印堆栈回溯时应该使用。占位符将考虑编译器优化
 的影响，当使用尾部调用并使用noreturn GCC属性标记时，可能会发生这种优化。
 
+如果指针在一个模块内，模块名称和可选的构建ID将被打印在符号名称之后，并在说明符的末尾添加
+一个额外的 ``b`` 。
+
+::
+
+	%pS	versatile_init+0x0/0x110 [module_name]
+	%pSb	versatile_init+0x0/0x110 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
+	%pSRb	versatile_init+0x9/0x110 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
+		(with __builtin_extract_return_addr() translation)
+	%pBb	prev_fn_of_versatile_init+0x88/0x88 [module_name ed5019fdf5e53be37cb1ba7899292d7e143b259e]
+
 来自BPF / tracing追踪的探查指针
 ----------------------------------
 
@@ -483,9 +494,10 @@ Fwnode handles
 ::
 
 	%pt[RT]			YYYY-mm-ddTHH:MM:SS
+	%pt[RT]s		YYYY-mm-dd HH:MM:SS
 	%pt[RT]d		YYYY-mm-dd
 	%pt[RT]t		HH:MM:SS
-	%pt[RT][dt][r]
+	%pt[RT][dt][r][s]
 
 用于打印日期和时间::
 
@@ -497,6 +509,9 @@ Fwnode handles
 默认情况下，年将以1900为单位递增，月将以1为单位递增。 使用%pt[RT]r (raw)
 来抑制这种行为。
 
+%pt[RT]s（空格）将覆盖ISO 8601的分隔符，在日期和时间之间使用''（空格）而
+不是'T'（大写T）。当日期或时间被省略时，它不会有任何影响。
+
 通过引用传递。
 
 clk结构体
-- 
GitLab


From f610a5a29c3cfb7d37bdfa4ef52f72ea51f24a76 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 27 May 2021 11:24:33 +0100
Subject: [PATCH 1730/3804] afs: Fix the nlink handling of dir-over-dir rename

Fix rename of one directory over another such that the nlink on the deleted
directory is cleared to 0 rather than being decremented to 1.

This was causing the generic/035 xfstest to fail.

Fixes: e49c7b2f6de7 ("afs: Build an abstraction around an "operation" concept")
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: https://lore.kernel.org/r/162194384460.3999479.7605572278074191079.stgit@warthog.procyon.org.uk/ # v1
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/dir.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 9fbe5a5ec9bd4..78719f2f567e9 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1919,7 +1919,9 @@ static void afs_rename_edit_dir(struct afs_operation *op)
 	new_inode = d_inode(new_dentry);
 	if (new_inode) {
 		spin_lock(&new_inode->i_lock);
-		if (new_inode->i_nlink > 0)
+		if (S_ISDIR(new_inode->i_mode))
+			clear_nlink(new_inode);
+		else if (new_inode->i_nlink > 0)
 			drop_nlink(new_inode);
 		spin_unlock(&new_inode->i_lock);
 	}
-- 
GitLab


From c59870e2110e1229a6e4b2457aece6ffe8d68d99 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 19 May 2021 09:44:47 -0700
Subject: [PATCH 1731/3804] perf debug: Move debug initialization earlier

This avoids segfaults during option handlers that use pr_err. For
example, "perf --debug nopager list" segfaults before this change.

Fixes: 8abceacff87d (perf debug: Add debug_set_file function)
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20210519164447.2672030-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/perf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 20cb91ef06ffc..2f6b67189b426 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -443,6 +443,8 @@ int main(int argc, const char **argv)
 	const char *cmd;
 	char sbuf[STRERR_BUFSIZE];
 
+	perf_debug_setup();
+
 	/* libsubcmd init */
 	exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
 	pager_init(PERF_PAGER_ENVIRONMENT);
@@ -531,8 +533,6 @@ int main(int argc, const char **argv)
 	 */
 	pthread__block_sigwinch();
 
-	perf_debug_setup();
-
 	while (1) {
 		static int done_help;
 
-- 
GitLab


From 5f154c4e20d7edd38bddec78f3e0a7628057ef76 Mon Sep 17 00:00:00 2001
From: Julien Thierry <jthierry@redhat.com>
Date: Wed, 3 Mar 2021 18:05:29 +0100
Subject: [PATCH 1732/3804] arm64: Move patching utilities out of instruction
 encoding/decoding

Files insn.[c|h] containt some functions used for instruction patching.
In order to reuse the instruction encoder/decoder, move the patching
utilities to their own file.

Signed-off-by: Julien Thierry <jthierry@redhat.com>
Link: https://lore.kernel.org/r/20210303170536.1838032-2-jthierry@redhat.com
[will: Include patching.h in insn.h to fix header mess; add __ASSEMBLY__ guards]
Signed-off-by: Will Deacon <will@kernel.org>

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h     |   6 +-
 arch/arm64/include/asm/patching.h |  15 +++
 arch/arm64/kernel/Makefile        |   2 +-
 arch/arm64/kernel/insn.c          | 149 +-----------------------------
 arch/arm64/kernel/patching.c      | 148 +++++++++++++++++++++++++++++
 5 files changed, 168 insertions(+), 152 deletions(-)
 create mode 100644 arch/arm64/include/asm/patching.h
 create mode 100644 arch/arm64/kernel/patching.c

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4ebb9c054cccd..f08579e5119e9 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 
 #include <asm/alternative.h>
+#include <asm/patching.h>
 
 #ifndef __ASSEMBLY__
 /*
@@ -379,8 +380,6 @@ static inline bool aarch64_insn_is_adr_adrp(u32 insn)
 	return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn);
 }
 
-int aarch64_insn_read(void *addr, u32 *insnp);
-int aarch64_insn_write(void *addr, u32 insn);
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
 bool aarch64_insn_uses_literal(u32 insn);
 bool aarch64_insn_is_branch(u32 insn);
@@ -487,9 +486,6 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base,
 s32 aarch64_get_branch_offset(u32 insn);
 u32 aarch64_set_branch_offset(u32 insn, s32 offset);
 
-int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
-int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
-
 s32 aarch64_insn_adrp_get_offset(u32 insn);
 u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset);
 
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
new file mode 100644
index 0000000000000..5ebab129222f4
--- /dev/null
+++ b/arch/arm64/include/asm/patching.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef	__ASM_PATCHING_H
+#define	__ASM_PATCHING_H
+
+#include <linux/types.h>
+
+#ifndef __ASSEMBLY__
+int aarch64_insn_read(void *addr, u32 *insnp);
+int aarch64_insn_write(void *addr, u32 insn);
+
+int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
+int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+#endif /* __ASSEMBLY__ */
+
+#endif	/* __ASM_PATCHING_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6cc97730790e7..3693156acc75f 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -22,7 +22,7 @@ obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
-			   syscall.o proton-pack.o idreg-override.o
+			   syscall.o proton-pack.o idreg-override.o patching.o
 
 targets			+= efi-entry.o
 
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 6c0de2f60ea96..952e7d6fe60e2 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -7,21 +7,14 @@
  */
 #include <linux/bitops.h>
 #include <linux/bug.h>
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/spinlock.h>
-#include <linux/stop_machine.h>
+#include <linux/printk.h>
+#include <linux/sizes.h>
 #include <linux/types.h>
-#include <linux/uaccess.h>
 
-#include <asm/cacheflush.h>
 #include <asm/debug-monitors.h>
-#include <asm/fixmap.h>
+#include <asm/errno.h>
 #include <asm/insn.h>
 #include <asm/kprobes.h>
-#include <asm/sections.h>
 
 #define AARCH64_INSN_SF_BIT	BIT(31)
 #define AARCH64_INSN_N_BIT	BIT(22)
@@ -83,81 +76,6 @@ bool aarch64_insn_is_branch_imm(u32 insn)
 		aarch64_insn_is_bcond(insn));
 }
 
-static DEFINE_RAW_SPINLOCK(patch_lock);
-
-static bool is_exit_text(unsigned long addr)
-{
-	/* discarded with init text/data */
-	return system_state < SYSTEM_RUNNING &&
-		addr >= (unsigned long)__exittext_begin &&
-		addr < (unsigned long)__exittext_end;
-}
-
-static bool is_image_text(unsigned long addr)
-{
-	return core_kernel_text(addr) || is_exit_text(addr);
-}
-
-static void __kprobes *patch_map(void *addr, int fixmap)
-{
-	unsigned long uintaddr = (uintptr_t) addr;
-	bool image = is_image_text(uintaddr);
-	struct page *page;
-
-	if (image)
-		page = phys_to_page(__pa_symbol(addr));
-	else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
-		page = vmalloc_to_page(addr);
-	else
-		return addr;
-
-	BUG_ON(!page);
-	return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
-			(uintaddr & ~PAGE_MASK));
-}
-
-static void __kprobes patch_unmap(int fixmap)
-{
-	clear_fixmap(fixmap);
-}
-/*
- * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
- * little-endian.
- */
-int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
-{
-	int ret;
-	__le32 val;
-
-	ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE);
-	if (!ret)
-		*insnp = le32_to_cpu(val);
-
-	return ret;
-}
-
-static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
-{
-	void *waddr = addr;
-	unsigned long flags = 0;
-	int ret;
-
-	raw_spin_lock_irqsave(&patch_lock, flags);
-	waddr = patch_map(addr, FIX_TEXT_POKE0);
-
-	ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
-
-	patch_unmap(FIX_TEXT_POKE0);
-	raw_spin_unlock_irqrestore(&patch_lock, flags);
-
-	return ret;
-}
-
-int __kprobes aarch64_insn_write(void *addr, u32 insn)
-{
-	return __aarch64_insn_write(addr, cpu_to_le32(insn));
-}
-
 bool __kprobes aarch64_insn_uses_literal(u32 insn)
 {
 	/* ldr/ldrsw (literal), prfm */
@@ -187,67 +105,6 @@ bool __kprobes aarch64_insn_is_branch(u32 insn)
 		aarch64_insn_is_bcond(insn);
 }
 
-int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
-{
-	u32 *tp = addr;
-	int ret;
-
-	/* A64 instructions must be word aligned */
-	if ((uintptr_t)tp & 0x3)
-		return -EINVAL;
-
-	ret = aarch64_insn_write(tp, insn);
-	if (ret == 0)
-		__flush_icache_range((uintptr_t)tp,
-				     (uintptr_t)tp + AARCH64_INSN_SIZE);
-
-	return ret;
-}
-
-struct aarch64_insn_patch {
-	void		**text_addrs;
-	u32		*new_insns;
-	int		insn_cnt;
-	atomic_t	cpu_count;
-};
-
-static int __kprobes aarch64_insn_patch_text_cb(void *arg)
-{
-	int i, ret = 0;
-	struct aarch64_insn_patch *pp = arg;
-
-	/* The first CPU becomes master */
-	if (atomic_inc_return(&pp->cpu_count) == 1) {
-		for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
-			ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
-							     pp->new_insns[i]);
-		/* Notify other processors with an additional increment. */
-		atomic_inc(&pp->cpu_count);
-	} else {
-		while (atomic_read(&pp->cpu_count) <= num_online_cpus())
-			cpu_relax();
-		isb();
-	}
-
-	return ret;
-}
-
-int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
-{
-	struct aarch64_insn_patch patch = {
-		.text_addrs = addrs,
-		.new_insns = insns,
-		.insn_cnt = cnt,
-		.cpu_count = ATOMIC_INIT(0),
-	};
-
-	if (cnt <= 0)
-		return -EINVAL;
-
-	return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
-				       cpu_online_mask);
-}
-
 static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
 						u32 *maskp, int *shiftp)
 {
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
new file mode 100644
index 0000000000000..9d050e33901b7
--- /dev/null
+++ b/arch/arm64/kernel/patching.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/stop_machine.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+#include <asm/kprobes.h>
+#include <asm/sections.h>
+
+static DEFINE_RAW_SPINLOCK(patch_lock);
+
+static bool is_exit_text(unsigned long addr)
+{
+	/* discarded with init text/data */
+	return system_state < SYSTEM_RUNNING &&
+		addr >= (unsigned long)__exittext_begin &&
+		addr < (unsigned long)__exittext_end;
+}
+
+static bool is_image_text(unsigned long addr)
+{
+	return core_kernel_text(addr) || is_exit_text(addr);
+}
+
+static void __kprobes *patch_map(void *addr, int fixmap)
+{
+	unsigned long uintaddr = (uintptr_t) addr;
+	bool image = is_image_text(uintaddr);
+	struct page *page;
+
+	if (image)
+		page = phys_to_page(__pa_symbol(addr));
+	else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+		page = vmalloc_to_page(addr);
+	else
+		return addr;
+
+	BUG_ON(!page);
+	return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
+			(uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap)
+{
+	clear_fixmap(fixmap);
+}
+/*
+ * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always
+ * little-endian.
+ */
+int __kprobes aarch64_insn_read(void *addr, u32 *insnp)
+{
+	int ret;
+	__le32 val;
+
+	ret = copy_from_kernel_nofault(&val, addr, AARCH64_INSN_SIZE);
+	if (!ret)
+		*insnp = le32_to_cpu(val);
+
+	return ret;
+}
+
+static int __kprobes __aarch64_insn_write(void *addr, __le32 insn)
+{
+	void *waddr = addr;
+	unsigned long flags = 0;
+	int ret;
+
+	raw_spin_lock_irqsave(&patch_lock, flags);
+	waddr = patch_map(addr, FIX_TEXT_POKE0);
+
+	ret = copy_to_kernel_nofault(waddr, &insn, AARCH64_INSN_SIZE);
+
+	patch_unmap(FIX_TEXT_POKE0);
+	raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+	return ret;
+}
+
+int __kprobes aarch64_insn_write(void *addr, u32 insn)
+{
+	return __aarch64_insn_write(addr, cpu_to_le32(insn));
+}
+
+int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn)
+{
+	u32 *tp = addr;
+	int ret;
+
+	/* A64 instructions must be word aligned */
+	if ((uintptr_t)tp & 0x3)
+		return -EINVAL;
+
+	ret = aarch64_insn_write(tp, insn);
+	if (ret == 0)
+		__flush_icache_range((uintptr_t)tp,
+				     (uintptr_t)tp + AARCH64_INSN_SIZE);
+
+	return ret;
+}
+
+struct aarch64_insn_patch {
+	void		**text_addrs;
+	u32		*new_insns;
+	int		insn_cnt;
+	atomic_t	cpu_count;
+};
+
+static int __kprobes aarch64_insn_patch_text_cb(void *arg)
+{
+	int i, ret = 0;
+	struct aarch64_insn_patch *pp = arg;
+
+	/* The first CPU becomes master */
+	if (atomic_inc_return(&pp->cpu_count) == 1) {
+		for (i = 0; ret == 0 && i < pp->insn_cnt; i++)
+			ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i],
+							     pp->new_insns[i]);
+		/* Notify other processors with an additional increment. */
+		atomic_inc(&pp->cpu_count);
+	} else {
+		while (atomic_read(&pp->cpu_count) <= num_online_cpus())
+			cpu_relax();
+		isb();
+	}
+
+	return ret;
+}
+
+int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt)
+{
+	struct aarch64_insn_patch patch = {
+		.text_addrs = addrs,
+		.new_insns = insns,
+		.insn_cnt = cnt,
+		.cpu_count = ATOMIC_INIT(0),
+	};
+
+	if (cnt <= 0)
+		return -EINVAL;
+
+	return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
+				       cpu_online_mask);
+}
-- 
GitLab


From 633e5e938fea957577e6db33540a78debf0c5cbe Mon Sep 17 00:00:00 2001
From: Julien Thierry <jthierry@redhat.com>
Date: Wed, 3 Mar 2021 18:05:30 +0100
Subject: [PATCH 1733/3804] arm64: Move aarch32 condition check functions

The functions to check condition flags for aarch32 execution is only
used to emulate aarch32 instructions. Move them from the instruction
encoding/decoding code to the trap handling files.

Signed-off-by: Julien Thierry <jthierry@redhat.com>
Link: https://lore.kernel.org/r/20210303170536.1838032-3-jthierry@redhat.com
[will: leave aarch32_opcode_cond_checks where it is]
Signed-off-by: Will Deacon <will@kernel.org>

Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h            |  1 +
 arch/arm64/kernel/insn.c                 | 98 -----------------------
 arch/arm64/kernel/probes/simulate-insn.c |  1 +
 arch/arm64/kernel/traps.c                | 99 +++++++++++++++++++++++-
 4 files changed, 100 insertions(+), 99 deletions(-)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index f08579e5119e9..7adc4398fadbf 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -502,6 +502,7 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn);
 
 typedef bool (pstate_check_t)(unsigned long);
 extern pstate_check_t * const aarch32_opcode_cond_checks[16];
+
 #endif /* __ASSEMBLY__ */
 
 #endif	/* __ASM_INSN_H */
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 952e7d6fe60e2..6ff8826ae7ea0 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -1289,104 +1289,6 @@ u32 aarch32_insn_mcr_extract_crm(u32 insn)
 	return insn & CRM_MASK;
 }
 
-static bool __kprobes __check_eq(unsigned long pstate)
-{
-	return (pstate & PSR_Z_BIT) != 0;
-}
-
-static bool __kprobes __check_ne(unsigned long pstate)
-{
-	return (pstate & PSR_Z_BIT) == 0;
-}
-
-static bool __kprobes __check_cs(unsigned long pstate)
-{
-	return (pstate & PSR_C_BIT) != 0;
-}
-
-static bool __kprobes __check_cc(unsigned long pstate)
-{
-	return (pstate & PSR_C_BIT) == 0;
-}
-
-static bool __kprobes __check_mi(unsigned long pstate)
-{
-	return (pstate & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_pl(unsigned long pstate)
-{
-	return (pstate & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_vs(unsigned long pstate)
-{
-	return (pstate & PSR_V_BIT) != 0;
-}
-
-static bool __kprobes __check_vc(unsigned long pstate)
-{
-	return (pstate & PSR_V_BIT) == 0;
-}
-
-static bool __kprobes __check_hi(unsigned long pstate)
-{
-	pstate &= ~(pstate >> 1);	/* PSR_C_BIT &= ~PSR_Z_BIT */
-	return (pstate & PSR_C_BIT) != 0;
-}
-
-static bool __kprobes __check_ls(unsigned long pstate)
-{
-	pstate &= ~(pstate >> 1);	/* PSR_C_BIT &= ~PSR_Z_BIT */
-	return (pstate & PSR_C_BIT) == 0;
-}
-
-static bool __kprobes __check_ge(unsigned long pstate)
-{
-	pstate ^= (pstate << 3);	/* PSR_N_BIT ^= PSR_V_BIT */
-	return (pstate & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_lt(unsigned long pstate)
-{
-	pstate ^= (pstate << 3);	/* PSR_N_BIT ^= PSR_V_BIT */
-	return (pstate & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_gt(unsigned long pstate)
-{
-	/*PSR_N_BIT ^= PSR_V_BIT */
-	unsigned long temp = pstate ^ (pstate << 3);
-
-	temp |= (pstate << 1);	/*PSR_N_BIT |= PSR_Z_BIT */
-	return (temp & PSR_N_BIT) == 0;
-}
-
-static bool __kprobes __check_le(unsigned long pstate)
-{
-	/*PSR_N_BIT ^= PSR_V_BIT */
-	unsigned long temp = pstate ^ (pstate << 3);
-
-	temp |= (pstate << 1);	/*PSR_N_BIT |= PSR_Z_BIT */
-	return (temp & PSR_N_BIT) != 0;
-}
-
-static bool __kprobes __check_al(unsigned long pstate)
-{
-	return true;
-}
-
-/*
- * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
- * it behaves identically to 0b1110 ("al").
- */
-pstate_check_t * const aarch32_opcode_cond_checks[16] = {
-	__check_eq, __check_ne, __check_cs, __check_cc,
-	__check_mi, __check_pl, __check_vs, __check_vc,
-	__check_hi, __check_ls, __check_ge, __check_lt,
-	__check_gt, __check_le, __check_al, __check_al
-};
-
 static bool range_of_ones(u64 val)
 {
 	/* Doesn't handle full ones or full zeroes */
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 25f67ec596353..22d0b32524763 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -10,6 +10,7 @@
 #include <linux/kprobes.h>
 
 #include <asm/ptrace.h>
+#include <asm/traps.h>
 
 #include "simulate-insn.h"
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index a05d34f0e82a7..9b683b2381cf1 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -36,7 +36,6 @@
 #include <asm/esr.h>
 #include <asm/exception.h>
 #include <asm/extable.h>
-#include <asm/insn.h>
 #include <asm/kprobes.h>
 #include <asm/traps.h>
 #include <asm/smp.h>
@@ -45,6 +44,104 @@
 #include <asm/system_misc.h>
 #include <asm/sysreg.h>
 
+static bool __kprobes __check_eq(unsigned long pstate)
+{
+	return (pstate & PSR_Z_BIT) != 0;
+}
+
+static bool __kprobes __check_ne(unsigned long pstate)
+{
+	return (pstate & PSR_Z_BIT) == 0;
+}
+
+static bool __kprobes __check_cs(unsigned long pstate)
+{
+	return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_cc(unsigned long pstate)
+{
+	return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_mi(unsigned long pstate)
+{
+	return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_pl(unsigned long pstate)
+{
+	return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_vs(unsigned long pstate)
+{
+	return (pstate & PSR_V_BIT) != 0;
+}
+
+static bool __kprobes __check_vc(unsigned long pstate)
+{
+	return (pstate & PSR_V_BIT) == 0;
+}
+
+static bool __kprobes __check_hi(unsigned long pstate)
+{
+	pstate &= ~(pstate >> 1);	/* PSR_C_BIT &= ~PSR_Z_BIT */
+	return (pstate & PSR_C_BIT) != 0;
+}
+
+static bool __kprobes __check_ls(unsigned long pstate)
+{
+	pstate &= ~(pstate >> 1);	/* PSR_C_BIT &= ~PSR_Z_BIT */
+	return (pstate & PSR_C_BIT) == 0;
+}
+
+static bool __kprobes __check_ge(unsigned long pstate)
+{
+	pstate ^= (pstate << 3);	/* PSR_N_BIT ^= PSR_V_BIT */
+	return (pstate & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_lt(unsigned long pstate)
+{
+	pstate ^= (pstate << 3);	/* PSR_N_BIT ^= PSR_V_BIT */
+	return (pstate & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_gt(unsigned long pstate)
+{
+	/*PSR_N_BIT ^= PSR_V_BIT */
+	unsigned long temp = pstate ^ (pstate << 3);
+
+	temp |= (pstate << 1);	/*PSR_N_BIT |= PSR_Z_BIT */
+	return (temp & PSR_N_BIT) == 0;
+}
+
+static bool __kprobes __check_le(unsigned long pstate)
+{
+	/*PSR_N_BIT ^= PSR_V_BIT */
+	unsigned long temp = pstate ^ (pstate << 3);
+
+	temp |= (pstate << 1);	/*PSR_N_BIT |= PSR_Z_BIT */
+	return (temp & PSR_N_BIT) != 0;
+}
+
+static bool __kprobes __check_al(unsigned long pstate)
+{
+	return true;
+}
+
+/*
+ * Note that the ARMv8 ARM calls condition code 0b1111 "nv", but states that
+ * it behaves identically to 0b1110 ("al").
+ */
+pstate_check_t * const aarch32_opcode_cond_checks[16] = {
+	__check_eq, __check_ne, __check_cs, __check_cc,
+	__check_mi, __check_pl, __check_vs, __check_vc,
+	__check_hi, __check_ls, __check_ge, __check_lt,
+	__check_gt, __check_le, __check_al, __check_al
+};
+
 static const char *handler[] = {
 	"Synchronous Abort",
 	"IRQ",
-- 
GitLab


From 72fd723694b6f4f1d1f19f673fb93801d7d1a0e8 Mon Sep 17 00:00:00 2001
From: Julien Thierry <jthierry@redhat.com>
Date: Wed, 3 Mar 2021 18:05:32 +0100
Subject: [PATCH 1734/3804] arm64: Move instruction encoder/decoder under lib/

Aarch64 instruction set encoding and decoding logic can prove useful
for some features/tools both part of the kernel and outside the kernel.

Isolate the function dealing only with encoding/decoding instructions,
with minimal dependency on kernel utilities in order to be able to reuse
that code.

Code was only moved, no code should have been added, removed nor
modifier.

Signed-off-by: Julien Thierry <jthierry@redhat.com>
Link: https://lore.kernel.org/r/20210303170536.1838032-5-jthierry@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/Makefile        | 2 +-
 arch/arm64/lib/Makefile           | 6 +++---
 arch/arm64/{kernel => lib}/insn.c | 0
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename arch/arm64/{kernel => lib}/insn.c (100%)

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 3693156acc75f..03e8311ce5762 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -18,7 +18,7 @@ CFLAGS_syscall.o	+= -fno-stack-protector
 obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   entry-common.o entry-fpsimd.o process.o ptrace.o	\
 			   setup.o signal.o sys.o stacktrace.o time.o traps.o	\
-			   io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o	\
+			   io.o vdso.o hyp-stub.o psci.o cpu_ops.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index d31e1169d9b8e..9cd83908717da 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 lib-y		:= clear_user.o delay.o copy_from_user.o		\
 		   copy_to_user.o copy_in_user.o copy_page.o		\
-		   clear_page.o csum.o memchr.o memcpy.o memmove.o	\
-		   memset.o memcmp.o strcmp.o strncmp.o strlen.o	\
-		   strnlen.o strchr.o strrchr.o tishift.o
+		   clear_page.o csum.o insn.o memchr.o memcpy.o		\
+		   memmove.o memset.o memcmp.o strcmp.o strncmp.o	\
+		   strlen.o strnlen.o strchr.o strrchr.o tishift.o
 
 ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
 obj-$(CONFIG_XOR_BLOCKS)	+= xor-neon.o
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/lib/insn.c
similarity index 100%
rename from arch/arm64/kernel/insn.c
rename to arch/arm64/lib/insn.c
-- 
GitLab


From 427bfc59e2281eaede70f050062dc31257c46652 Mon Sep 17 00:00:00 2001
From: Julien Thierry <jthierry@redhat.com>
Date: Wed, 3 Mar 2021 18:05:33 +0100
Subject: [PATCH 1735/3804] arm64: insn: Add SVE instruction class

SVE has been public for some time now. Let the decoder acknowledge
its existence.

Signed-off-by: Julien Thierry <jthierry@redhat.com>
Link: https://lore.kernel.org/r/20210303170536.1838032-6-jthierry@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h | 1 +
 arch/arm64/lib/insn.c         | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 7adc4398fadbf..93f7b0c86dfd2 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -31,6 +31,7 @@
  */
 enum aarch64_insn_encoding_class {
 	AARCH64_INSN_CLS_UNKNOWN,	/* UNALLOCATED */
+	AARCH64_INSN_CLS_SVE,		/* SVE instructions */
 	AARCH64_INSN_CLS_DP_IMM,	/* Data processing - immediate */
 	AARCH64_INSN_CLS_DP_REG,	/* Data processing - register */
 	AARCH64_INSN_CLS_DP_FPSIMD,	/* Data processing - SIMD and FP */
diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c
index 6ff8826ae7ea0..b506a4b1e38cf 100644
--- a/arch/arm64/lib/insn.c
+++ b/arch/arm64/lib/insn.c
@@ -23,7 +23,7 @@
 static const int aarch64_insn_encoding_class[] = {
 	AARCH64_INSN_CLS_UNKNOWN,
 	AARCH64_INSN_CLS_UNKNOWN,
-	AARCH64_INSN_CLS_UNKNOWN,
+	AARCH64_INSN_CLS_SVE,
 	AARCH64_INSN_CLS_UNKNOWN,
 	AARCH64_INSN_CLS_LDST,
 	AARCH64_INSN_CLS_DP_REG,
-- 
GitLab


From d4b217330d7e0320084ff04c8491964f1f68980a Mon Sep 17 00:00:00 2001
From: Julien Thierry <jthierry@redhat.com>
Date: Wed, 3 Mar 2021 18:05:34 +0100
Subject: [PATCH 1736/3804] arm64: insn: Add barrier encodings

Create necessary functions to encode/decode aarch64 barrier
instructions.

DSB needs special case handling as it has multiple encodings.

Signed-off-by: Julien Thierry <jthierry@redhat.com>
Link: https://lore.kernel.org/r/20210303170536.1838032-7-jthierry@redhat.com
[will: Don't reject DSB #4]
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 93f7b0c86dfd2..b8e2c6c465471 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -370,6 +370,14 @@ __AARCH64_INSN_FUNCS(eret_auth,	0xFFFFFBFF, 0xD69F0BFF)
 __AARCH64_INSN_FUNCS(mrs,	0xFFF00000, 0xD5300000)
 __AARCH64_INSN_FUNCS(msr_imm,	0xFFF8F01F, 0xD500401F)
 __AARCH64_INSN_FUNCS(msr_reg,	0xFFF00000, 0xD5100000)
+__AARCH64_INSN_FUNCS(dmb,	0xFFFFF0FF, 0xD50330BF)
+__AARCH64_INSN_FUNCS(dsb_base,	0xFFFFF0FF, 0xD503309F)
+__AARCH64_INSN_FUNCS(dsb_nxs,	0xFFFFF3FF, 0xD503323F)
+__AARCH64_INSN_FUNCS(isb,	0xFFFFF0FF, 0xD50330DF)
+__AARCH64_INSN_FUNCS(sb,	0xFFFFFFFF, 0xD50330FF)
+__AARCH64_INSN_FUNCS(clrex,	0xFFFFF0FF, 0xD503305F)
+__AARCH64_INSN_FUNCS(ssbb,	0xFFFFFFFF, 0xD503309F)
+__AARCH64_INSN_FUNCS(pssbb,	0xFFFFFFFF, 0xD503349F)
 
 #undef	__AARCH64_INSN_FUNCS
 
@@ -381,6 +389,19 @@ static inline bool aarch64_insn_is_adr_adrp(u32 insn)
 	return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn);
 }
 
+static inline bool aarch64_insn_is_dsb(u32 insn)
+{
+	return aarch64_insn_is_dsb_base(insn) || aarch64_insn_is_dsb_nxs(insn);
+}
+
+static inline bool aarch64_insn_is_barrier(u32 insn)
+{
+	return aarch64_insn_is_dmb(insn) || aarch64_insn_is_dsb(insn) ||
+	       aarch64_insn_is_isb(insn) || aarch64_insn_is_sb(insn) ||
+	       aarch64_insn_is_clrex(insn) || aarch64_insn_is_ssbb(insn) ||
+	       aarch64_insn_is_pssbb(insn);
+}
+
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
 bool aarch64_insn_uses_literal(u32 insn);
 bool aarch64_insn_is_branch(u32 insn);
-- 
GitLab


From 54880044c639f9c59346eabe637f9f8f39a112b8 Mon Sep 17 00:00:00 2001
From: Julien Thierry <jthierry@redhat.com>
Date: Wed, 3 Mar 2021 18:05:35 +0100
Subject: [PATCH 1737/3804] arm64: insn: Add some opcodes to instruction
 decoder

Add decoding capability for some instructions that objtool will need
to decode.

Signed-off-by: Julien Thierry <jthierry@redhat.com>
Link: https://lore.kernel.org/r/20210303170536.1838032-8-jthierry@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index b8e2c6c465471..ac8f47ff7b18b 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -296,6 +296,12 @@ __AARCH64_INSN_FUNCS(adr,	0x9F000000, 0x10000000)
 __AARCH64_INSN_FUNCS(adrp,	0x9F000000, 0x90000000)
 __AARCH64_INSN_FUNCS(prfm,	0x3FC00000, 0x39800000)
 __AARCH64_INSN_FUNCS(prfm_lit,	0xFF000000, 0xD8000000)
+__AARCH64_INSN_FUNCS(store_imm,	0x3FC00000, 0x39000000)
+__AARCH64_INSN_FUNCS(load_imm,	0x3FC00000, 0x39400000)
+__AARCH64_INSN_FUNCS(store_pre,	0x3FE00C00, 0x38000C00)
+__AARCH64_INSN_FUNCS(load_pre,	0x3FE00C00, 0x38400C00)
+__AARCH64_INSN_FUNCS(store_post,	0x3FE00C00, 0x38000400)
+__AARCH64_INSN_FUNCS(load_post,	0x3FE00C00, 0x38400400)
 __AARCH64_INSN_FUNCS(str_reg,	0x3FE0EC00, 0x38206800)
 __AARCH64_INSN_FUNCS(ldadd,	0x3F20FC00, 0x38200000)
 __AARCH64_INSN_FUNCS(ldr_reg,	0x3FE0EC00, 0x38606800)
@@ -304,6 +310,8 @@ __AARCH64_INSN_FUNCS(ldrsw_lit,	0xFF000000, 0x98000000)
 __AARCH64_INSN_FUNCS(exclusive,	0x3F800000, 0x08000000)
 __AARCH64_INSN_FUNCS(load_ex,	0x3F400000, 0x08400000)
 __AARCH64_INSN_FUNCS(store_ex,	0x3F400000, 0x08000000)
+__AARCH64_INSN_FUNCS(stp,	0x7FC00000, 0x29000000)
+__AARCH64_INSN_FUNCS(ldp,	0x7FC00000, 0x29400000)
 __AARCH64_INSN_FUNCS(stp_post,	0x7FC00000, 0x28800000)
 __AARCH64_INSN_FUNCS(ldp_post,	0x7FC00000, 0x28C00000)
 __AARCH64_INSN_FUNCS(stp_pre,	0x7FC00000, 0x29800000)
@@ -336,6 +344,7 @@ __AARCH64_INSN_FUNCS(rev64,	0x7FFFFC00, 0x5AC00C00)
 __AARCH64_INSN_FUNCS(and,	0x7F200000, 0x0A000000)
 __AARCH64_INSN_FUNCS(bic,	0x7F200000, 0x0A200000)
 __AARCH64_INSN_FUNCS(orr,	0x7F200000, 0x2A000000)
+__AARCH64_INSN_FUNCS(mov_reg,	0x7FE0FFE0, 0x2A0003E0)
 __AARCH64_INSN_FUNCS(orn,	0x7F200000, 0x2A200000)
 __AARCH64_INSN_FUNCS(eor,	0x7F200000, 0x4A000000)
 __AARCH64_INSN_FUNCS(eon,	0x7F200000, 0x4A200000)
-- 
GitLab


From 71766b81de8204a0fb56de3ad1972516bac99f5b Mon Sep 17 00:00:00 2001
From: Julien Thierry <jthierry@redhat.com>
Date: Wed, 3 Mar 2021 18:05:36 +0100
Subject: [PATCH 1738/3804] arm64: insn: Add load/store decoding helpers

Provide some function to group different load/store instructions.

Signed-off-by: Julien Thierry <jthierry@redhat.com>
Link: https://lore.kernel.org/r/20210303170536.1838032-9-jthierry@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index ac8f47ff7b18b..1ea9611545bb4 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -411,6 +411,34 @@ static inline bool aarch64_insn_is_barrier(u32 insn)
 	       aarch64_insn_is_pssbb(insn);
 }
 
+static inline bool aarch64_insn_is_store_single(u32 insn)
+{
+	return aarch64_insn_is_store_imm(insn) ||
+	       aarch64_insn_is_store_pre(insn) ||
+	       aarch64_insn_is_store_post(insn);
+}
+
+static inline bool aarch64_insn_is_store_pair(u32 insn)
+{
+	return aarch64_insn_is_stp(insn) ||
+	       aarch64_insn_is_stp_pre(insn) ||
+	       aarch64_insn_is_stp_post(insn);
+}
+
+static inline bool aarch64_insn_is_load_single(u32 insn)
+{
+	return aarch64_insn_is_load_imm(insn) ||
+	       aarch64_insn_is_load_pre(insn) ||
+	       aarch64_insn_is_load_post(insn);
+}
+
+static inline bool aarch64_insn_is_load_pair(u32 insn)
+{
+	return aarch64_insn_is_ldp(insn) ||
+	       aarch64_insn_is_ldp_pre(insn) ||
+	       aarch64_insn_is_ldp_post(insn);
+}
+
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
 bool aarch64_insn_uses_literal(u32 insn);
 bool aarch64_insn_is_branch(u32 insn);
-- 
GitLab


From 16c230b30de8b69ae75d2b98d04a77904da58d15 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 27 May 2021 11:55:29 +0100
Subject: [PATCH 1739/3804] arm64: scs: Drop unused 'tmp' argument to
 scs_{load, save} asm macros

The scs_load and scs_save asm macros don't make use of the mandatory
'tmp' register argument, so drop it and fix up the callers.

Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Link: https://lore.kernel.org/r/20210527105529.21967-1-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/scs.h | 8 ++++----
 arch/arm64/kernel/entry.S    | 8 ++++----
 arch/arm64/kernel/head.S     | 2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index eaa2cd92e4c10..8297bccf07845 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,18 +9,18 @@
 #ifdef CONFIG_SHADOW_CALL_STACK
 	scs_sp	.req	x18
 
-	.macro scs_load tsk, tmp
+	.macro scs_load tsk
 	ldr	scs_sp, [\tsk, #TSK_TI_SCS_SP]
 	.endm
 
-	.macro scs_save tsk, tmp
+	.macro scs_save tsk
 	str	scs_sp, [\tsk, #TSK_TI_SCS_SP]
 	.endm
 #else
-	.macro scs_load tsk, tmp
+	.macro scs_load tsk
 	.endm
 
-	.macro scs_save tsk, tmp
+	.macro scs_save tsk
 	.endm
 #endif /* CONFIG_SHADOW_CALL_STACK */
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 294f24e16feeb..3153f1448cdb7 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -275,7 +275,7 @@ alternative_else_nop_endif
 
 	mte_set_kernel_gcr x22, x23
 
-	scs_load tsk, x20
+	scs_load tsk
 	.else
 	add	x21, sp, #PT_REGS_SIZE
 	get_current_task tsk
@@ -375,7 +375,7 @@ alternative_if ARM64_WORKAROUND_845719
 alternative_else_nop_endif
 #endif
 3:
-	scs_save tsk, x0
+	scs_save tsk
 
 #ifdef CONFIG_ARM64_PTR_AUTH
 alternative_if ARM64_HAS_ADDRESS_AUTH
@@ -979,8 +979,8 @@ SYM_FUNC_START(cpu_switch_to)
 	mov	sp, x9
 	msr	sp_el0, x1
 	ptrauth_keys_install_kernel x1, x8, x9, x10
-	scs_save x0, x8
-	scs_load x1, x8
+	scs_save x0
+	scs_load x1
 	ret
 SYM_FUNC_END(cpu_switch_to)
 NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 070ed53c049d4..6a700526b1174 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -412,7 +412,7 @@ SYM_FUNC_END(__create_page_tables)
 	stp	xzr, xzr, [sp, #S_STACKFRAME]
 	add	x29, sp, #S_STACKFRAME
 
-	scs_load \tsk, \tmp1
+	scs_load \tsk
 
 	adr_l	\tmp1, __per_cpu_offset
 	ldr	w\tmp2, [\tsk, #TSK_CPU]
-- 
GitLab


From 94a311ce248e0b53c76e110fd00511af47b72ffb Mon Sep 17 00:00:00 2001
From: Muralidhara M K <muralimk@amd.com>
Date: Wed, 26 May 2021 22:16:01 +0530
Subject: [PATCH 1740/3804] x86/MCE/AMD, EDAC/mce_amd: Add new SMCA bank types

Add the (HWID, MCATYPE) tuples and names for new SMCA bank types.

Also, add their respective error descriptions to the MCE decoding module
edac_mce_amd. Also while at it, optimize the string names for some SMCA
banks.

 [ bp: Drop repeated comments, explain why UMC_V2 is a separate entry. ]

Signed-off-by: Muralidhara M K <muralimk@amd.com>
Signed-off-by: Naveen Krishna Chatradhi  <nchatrad@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Link: https://lkml.kernel.org/r/20210526164601.66228-1-nchatrad@amd.com
---
 arch/x86/include/asm/mce.h    | 13 +++++--
 arch/x86/kernel/cpu/mce/amd.c | 55 ++++++++++++++++-----------
 drivers/edac/mce_amd.c        | 70 +++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ddfb3cad8dff2..0607ec4f50914 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -305,7 +305,7 @@ extern void apei_mce_report_mem_error(int corrected,
 /* These may be used by multiple smca_hwid_mcatypes */
 enum smca_bank_types {
 	SMCA_LS = 0,	/* Load Store */
-	SMCA_LS_V2,	/* Load Store */
+	SMCA_LS_V2,
 	SMCA_IF,	/* Instruction Fetch */
 	SMCA_L2_CACHE,	/* L2 Cache */
 	SMCA_DE,	/* Decoder Unit */
@@ -314,17 +314,22 @@ enum smca_bank_types {
 	SMCA_FP,	/* Floating Point */
 	SMCA_L3_CACHE,	/* L3 Cache */
 	SMCA_CS,	/* Coherent Slave */
-	SMCA_CS_V2,	/* Coherent Slave */
+	SMCA_CS_V2,
 	SMCA_PIE,	/* Power, Interrupts, etc. */
 	SMCA_UMC,	/* Unified Memory Controller */
+	SMCA_UMC_V2,
 	SMCA_PB,	/* Parameter Block */
 	SMCA_PSP,	/* Platform Security Processor */
-	SMCA_PSP_V2,	/* Platform Security Processor */
+	SMCA_PSP_V2,
 	SMCA_SMU,	/* System Management Unit */
-	SMCA_SMU_V2,	/* System Management Unit */
+	SMCA_SMU_V2,
 	SMCA_MP5,	/* Microprocessor 5 Unit */
 	SMCA_NBIO,	/* Northbridge IO Unit */
 	SMCA_PCIE,	/* PCI Express Unit */
+	SMCA_PCIE_V2,
+	SMCA_XGMI_PCS,	/* xGMI PCS Unit */
+	SMCA_XGMI_PHY,	/* xGMI PHY Unit */
+	SMCA_WAFL_PHY,	/* WAFL PHY Unit */
 	N_SMCA_BANK_TYPES
 };
 
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index e486f96b3cb32..08831acc1d036 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -77,27 +77,29 @@ struct smca_bank_name {
 };
 
 static struct smca_bank_name smca_names[] = {
-	[SMCA_LS]	= { "load_store",	"Load Store Unit" },
-	[SMCA_LS_V2]	= { "load_store",	"Load Store Unit" },
-	[SMCA_IF]	= { "insn_fetch",	"Instruction Fetch Unit" },
-	[SMCA_L2_CACHE]	= { "l2_cache",		"L2 Cache" },
-	[SMCA_DE]	= { "decode_unit",	"Decode Unit" },
-	[SMCA_RESERVED]	= { "reserved",		"Reserved" },
-	[SMCA_EX]	= { "execution_unit",	"Execution Unit" },
-	[SMCA_FP]	= { "floating_point",	"Floating Point Unit" },
-	[SMCA_L3_CACHE]	= { "l3_cache",		"L3 Cache" },
-	[SMCA_CS]	= { "coherent_slave",	"Coherent Slave" },
-	[SMCA_CS_V2]	= { "coherent_slave",	"Coherent Slave" },
-	[SMCA_PIE]	= { "pie",		"Power, Interrupts, etc." },
-	[SMCA_UMC]	= { "umc",		"Unified Memory Controller" },
-	[SMCA_PB]	= { "param_block",	"Parameter Block" },
-	[SMCA_PSP]	= { "psp",		"Platform Security Processor" },
-	[SMCA_PSP_V2]	= { "psp",		"Platform Security Processor" },
-	[SMCA_SMU]	= { "smu",		"System Management Unit" },
-	[SMCA_SMU_V2]	= { "smu",		"System Management Unit" },
-	[SMCA_MP5]	= { "mp5",		"Microprocessor 5 Unit" },
-	[SMCA_NBIO]	= { "nbio",		"Northbridge IO Unit" },
-	[SMCA_PCIE]	= { "pcie",		"PCI Express Unit" },
+	[SMCA_LS ... SMCA_LS_V2]	= { "load_store",	"Load Store Unit" },
+	[SMCA_IF]			= { "insn_fetch",	"Instruction Fetch Unit" },
+	[SMCA_L2_CACHE]			= { "l2_cache",		"L2 Cache" },
+	[SMCA_DE]			= { "decode_unit",	"Decode Unit" },
+	[SMCA_RESERVED]			= { "reserved",		"Reserved" },
+	[SMCA_EX]			= { "execution_unit",	"Execution Unit" },
+	[SMCA_FP]			= { "floating_point",	"Floating Point Unit" },
+	[SMCA_L3_CACHE]			= { "l3_cache",		"L3 Cache" },
+	[SMCA_CS ... SMCA_CS_V2]	= { "coherent_slave",	"Coherent Slave" },
+	[SMCA_PIE]			= { "pie",		"Power, Interrupts, etc." },
+
+	/* UMC v2 is separate because both of them can exist in a single system. */
+	[SMCA_UMC]			= { "umc",		"Unified Memory Controller" },
+	[SMCA_UMC_V2]			= { "umc_v2",		"Unified Memory Controller v2" },
+	[SMCA_PB]			= { "param_block",	"Parameter Block" },
+	[SMCA_PSP ... SMCA_PSP_V2]	= { "psp",		"Platform Security Processor" },
+	[SMCA_SMU ... SMCA_SMU_V2]	= { "smu",		"System Management Unit" },
+	[SMCA_MP5]			= { "mp5",		"Microprocessor 5 Unit" },
+	[SMCA_NBIO]			= { "nbio",		"Northbridge IO Unit" },
+	[SMCA_PCIE ... SMCA_PCIE_V2]	= { "pcie",		"PCI Express Unit" },
+	[SMCA_XGMI_PCS]			= { "xgmi_pcs",		"Ext Global Memory Interconnect PCS Unit" },
+	[SMCA_XGMI_PHY]			= { "xgmi_phy",		"Ext Global Memory Interconnect PHY Unit" },
+	[SMCA_WAFL_PHY]			= { "wafl_phy",		"WAFL PHY Unit" },
 };
 
 static const char *smca_get_name(enum smca_bank_types t)
@@ -155,6 +157,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 
 	/* Unified Memory Controller MCA type */
 	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0)	},
+	{ SMCA_UMC_V2,	 HWID_MCATYPE(0x96, 0x1)	},
 
 	/* Parameter Block MCA type */
 	{ SMCA_PB,	 HWID_MCATYPE(0x05, 0x0)	},
@@ -175,6 +178,16 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
 
 	/* PCI Express Unit MCA type */
 	{ SMCA_PCIE,	 HWID_MCATYPE(0x46, 0x0)	},
+	{ SMCA_PCIE_V2,	 HWID_MCATYPE(0x46, 0x1)	},
+
+	/* xGMI PCS MCA type */
+	{ SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0)	},
+
+	/* xGMI PHY MCA type */
+	{ SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0)	},
+
+	/* WAFL PHY MCA type */
+	{ SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0)	},
 };
 
 struct smca_bank smca_banks[MAX_NR_BANKS];
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 5dd905a3f30ca..43ba0f931629f 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -323,6 +323,21 @@ static const char * const smca_umc_mce_desc[] = {
 	"AES SRAM ECC error",
 };
 
+static const char * const smca_umc2_mce_desc[] = {
+	"DRAM ECC error",
+	"Data poison error",
+	"SDP parity error",
+	"Reserved",
+	"Address/Command parity error",
+	"Write data parity error",
+	"DCQ SRAM ECC error",
+	"Reserved",
+	"Read data parity error",
+	"Rdb SRAM ECC error",
+	"RdRsp SRAM ECC error",
+	"LM32 MP errors",
+};
+
 static const char * const smca_pb_mce_desc[] = {
 	"An ECC error in the Parameter Block RAM array",
 };
@@ -400,6 +415,56 @@ static const char * const smca_pcie_mce_desc[] = {
 	"CCIX Non-okay write response with data error",
 };
 
+static const char * const smca_pcie2_mce_desc[] = {
+	"SDP Parity Error logging",
+};
+
+static const char * const smca_xgmipcs_mce_desc[] = {
+	"Data Loss Error",
+	"Training Error",
+	"Flow Control Acknowledge Error",
+	"Rx Fifo Underflow Error",
+	"Rx Fifo Overflow Error",
+	"CRC Error",
+	"BER Exceeded Error",
+	"Tx Vcid Data Error",
+	"Replay Buffer Parity Error",
+	"Data Parity Error",
+	"Replay Fifo Overflow Error",
+	"Replay FIfo Underflow Error",
+	"Elastic Fifo Overflow Error",
+	"Deskew Error",
+	"Flow Control CRC Error",
+	"Data Startup Limit Error",
+	"FC Init Timeout Error",
+	"Recovery Timeout Error",
+	"Ready Serial Timeout Error",
+	"Ready Serial Attempt Error",
+	"Recovery Attempt Error",
+	"Recovery Relock Attempt Error",
+	"Replay Attempt Error",
+	"Sync Header Error",
+	"Tx Replay Timeout Error",
+	"Rx Replay Timeout Error",
+	"LinkSub Tx Timeout Error",
+	"LinkSub Rx Timeout Error",
+	"Rx CMD Pocket Error",
+};
+
+static const char * const smca_xgmiphy_mce_desc[] = {
+	"RAM ECC Error",
+	"ARC instruction buffer parity error",
+	"ARC data buffer parity error",
+	"PHY APB error",
+};
+
+static const char * const smca_waflphy_mce_desc[] = {
+	"RAM ECC Error",
+	"ARC instruction buffer parity error",
+	"ARC data buffer parity error",
+	"PHY APB error",
+};
+
 struct smca_mce_desc {
 	const char * const *descs;
 	unsigned int num_descs;
@@ -418,6 +483,7 @@ static struct smca_mce_desc smca_mce_descs[] = {
 	[SMCA_CS_V2]	= { smca_cs2_mce_desc,	ARRAY_SIZE(smca_cs2_mce_desc)	},
 	[SMCA_PIE]	= { smca_pie_mce_desc,	ARRAY_SIZE(smca_pie_mce_desc)	},
 	[SMCA_UMC]	= { smca_umc_mce_desc,	ARRAY_SIZE(smca_umc_mce_desc)	},
+	[SMCA_UMC_V2]	= { smca_umc2_mce_desc,	ARRAY_SIZE(smca_umc2_mce_desc)	},
 	[SMCA_PB]	= { smca_pb_mce_desc,	ARRAY_SIZE(smca_pb_mce_desc)	},
 	[SMCA_PSP]	= { smca_psp_mce_desc,	ARRAY_SIZE(smca_psp_mce_desc)	},
 	[SMCA_PSP_V2]	= { smca_psp2_mce_desc,	ARRAY_SIZE(smca_psp2_mce_desc)	},
@@ -426,6 +492,10 @@ static struct smca_mce_desc smca_mce_descs[] = {
 	[SMCA_MP5]	= { smca_mp5_mce_desc,	ARRAY_SIZE(smca_mp5_mce_desc)	},
 	[SMCA_NBIO]	= { smca_nbio_mce_desc,	ARRAY_SIZE(smca_nbio_mce_desc)	},
 	[SMCA_PCIE]	= { smca_pcie_mce_desc,	ARRAY_SIZE(smca_pcie_mce_desc)	},
+	[SMCA_PCIE_V2]	= { smca_pcie2_mce_desc,   ARRAY_SIZE(smca_pcie2_mce_desc)	},
+	[SMCA_XGMI_PCS]	= { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc)	},
+	[SMCA_XGMI_PHY]	= { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc)	},
+	[SMCA_WAFL_PHY]	= { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc)	},
 };
 
 static bool f12h_mc0_mce(u16 ec, u8 xec)
-- 
GitLab


From 1d15a10395e5a036f571ac727f202f9572e255f9 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Fri, 14 May 2021 18:13:05 -0400
Subject: [PATCH 1741/3804] drm/tegra: Get ref for DP AUX channel, not its ddc
 adapter

While we're taking a reference of the DDC adapter for a DP AUX channel in
tegra_sor_probe() because we're going to be using that adapter with the
SOR, now that we've moved where AUX registration happens the actual device
structure for the DDC adapter isn't initialized yet. Which means that we
can't really take a reference from it to try to keep it around anymore.

This should be fine though, because we can just take a reference of its
parent instead.

v2:
* Avoid calling i2c_put_adapter() in tegra_output_remove() for eDP/DP cases

Signed-off-by: Lyude Paul <lyude@redhat.com>
Fixes: 39c17ae60ea9 ("drm/tegra: Don't register DP AUX channels before connectors")
Cc: Lyude Paul <lyude@redhat.com>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Jonathan Hunter <jonathanh@nvidia.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-tegra@vger.kernel.org
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/output.c | 5 ++++-
 drivers/gpu/drm/tegra/sor.c    | 6 +++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 47d26b5d99456..2dacce1ab6ee8 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -180,10 +180,13 @@ int tegra_output_probe(struct tegra_output *output)
 
 void tegra_output_remove(struct tegra_output *output)
 {
+	int connector_type = output->connector.connector_type;
+
 	if (output->hpd_gpio)
 		free_irq(output->hpd_irq, output);
 
-	if (output->ddc)
+	if (connector_type != DRM_MODE_CONNECTOR_eDP &&
+	    connector_type != DRM_MODE_CONNECTOR_DisplayPort && output->ddc)
 		i2c_put_adapter(output->ddc);
 }
 
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 32c83f2e386ca..8f99de08b2bee 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3745,11 +3745,11 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		if (!sor->aux)
 			return -EPROBE_DEFER;
 
-		if (get_device(&sor->aux->ddc.dev)) {
-			if (try_module_get(sor->aux->ddc.owner))
+		if (get_device(sor->aux->dev)) {
+			if (try_module_get(sor->aux->dev->driver->owner))
 				sor->output.ddc = &sor->aux->ddc;
 			else
-				put_device(&sor->aux->ddc.dev);
+				put_device(sor->aux->dev);
 		}
 	}
 
-- 
GitLab


From b79b6081c440c0c197a3e8a51e8b9cf343fb210f Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 27 May 2021 20:09:08 +0200
Subject: [PATCH 1742/3804] drm/tegra: sor: Fix AUX device reference leak

In the case where the AUX provides an I2C-over-AUX DDC channel, a
reference is taken on the AUX parent device of the DDC channel rather
than the DDC channel like it would be for regular I2C controllers. To
make sure the correct reference is dropped, move the unreferencing code
into the SOR driver and make sure not to drop the I2C adapter reference
in that case.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/output.c |  5 +----
 drivers/gpu/drm/tegra/sor.c    | 29 +++++++++++++++++++----------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 2dacce1ab6ee8..47d26b5d99456 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -180,13 +180,10 @@ int tegra_output_probe(struct tegra_output *output)
 
 void tegra_output_remove(struct tegra_output *output)
 {
-	int connector_type = output->connector.connector_type;
-
 	if (output->hpd_gpio)
 		free_irq(output->hpd_irq, output);
 
-	if (connector_type != DRM_MODE_CONNECTOR_eDP &&
-	    connector_type != DRM_MODE_CONNECTOR_DisplayPort && output->ddc)
+	if (output->ddc)
 		i2c_put_adapter(output->ddc);
 }
 
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 8f99de08b2bee..0ea320c1092bd 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3745,12 +3745,8 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		if (!sor->aux)
 			return -EPROBE_DEFER;
 
-		if (get_device(sor->aux->dev)) {
-			if (try_module_get(sor->aux->dev->driver->owner))
-				sor->output.ddc = &sor->aux->ddc;
-			else
-				put_device(sor->aux->dev);
-		}
+		if (get_device(sor->aux->dev))
+			sor->output.ddc = &sor->aux->ddc;
 	}
 
 	if (!sor->aux) {
@@ -3778,12 +3774,13 @@ static int tegra_sor_probe(struct platform_device *pdev)
 
 	err = tegra_sor_parse_dt(sor);
 	if (err < 0)
-		return err;
+		goto put_aux;
 
 	err = tegra_output_probe(&sor->output);
-	if (err < 0)
-		return dev_err_probe(&pdev->dev, err,
-				     "failed to probe output\n");
+	if (err < 0) {
+		dev_err_probe(&pdev->dev, err, "failed to probe output\n");
+		goto put_aux;
+	}
 
 	if (sor->ops && sor->ops->probe) {
 		err = sor->ops->probe(sor);
@@ -3970,7 +3967,14 @@ uninit:
 	host1x_client_exit(&sor->client);
 	pm_runtime_disable(&pdev->dev);
 remove:
+	if (sor->aux)
+		sor->output.ddc = NULL;
+
 	tegra_output_remove(&sor->output);
+put_aux:
+	if (sor->aux)
+		put_device(sor->aux->dev);
+
 	return err;
 }
 
@@ -3988,6 +3992,11 @@ static int tegra_sor_remove(struct platform_device *pdev)
 
 	pm_runtime_disable(&pdev->dev);
 
+	if (sor->aux) {
+		put_device(sor->aux->dev);
+		sor->output.ddc = NULL;
+	}
+
 	tegra_output_remove(&sor->output);
 
 	return 0;
-- 
GitLab


From ff2e6efda0d5c51b33e2bcc0b0b981ac0a0ef214 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Wed, 26 May 2021 23:52:28 +0200
Subject: [PATCH 1743/3804] kbuild: Quote OBJCOPY var to avoid a pahole call
 break the build

The ccache tool can be used to speed up cross-compilation, by calling the
compiler and binutils through ccache. For example, following should work:

    $ export ARCH=arm64 CROSS_COMPILE="ccache aarch64-linux-gnu-"

    $ make M=drivers/gpu/drm/rockchip/

but pahole fails to extract the BTF info from DWARF, breaking the build:

      CC [M]  drivers/gpu/drm/rockchip//rockchipdrm.mod.o
      LD [M]  drivers/gpu/drm/rockchip//rockchipdrm.ko
      BTF [M] drivers/gpu/drm/rockchip//rockchipdrm.ko
    aarch64-linux-gnu-objcopy: invalid option -- 'J'
    Usage: aarch64-linux-gnu-objcopy [option(s)] in-file [out-file]
     Copies a binary file, possibly transforming it in the process
    ...
    make[1]: *** [scripts/Makefile.modpost:156: __modpost] Error 2
    make: *** [Makefile:1866: modules] Error 2

this fails because OBJCOPY is set to "ccache aarch64-linux-gnu-copy" and
later pahole is executed with the following command line:

    LLVM_OBJCOPY=$(OBJCOPY) $(PAHOLE) -J --btf_base vmlinux $@

which gets expanded to:

    LLVM_OBJCOPY=ccache aarch64-linux-gnu-objcopy pahole -J ...

instead of:

    LLVM_OBJCOPY="ccache aarch64-linux-gnu-objcopy" pahole -J ...

Fixes: 5f9ae91f7c0d ("kbuild: Build kernel module BTFs if BTF is enabled and pahole supports it")
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/bpf/20210526215228.3729875-1-javierm@redhat.com
---
 scripts/Makefile.modfinal | 2 +-
 scripts/link-vmlinux.sh   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index dd87cea9fba78..a7883e455290e 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -59,7 +59,7 @@ quiet_cmd_ld_ko_o = LD [M]  $@
 quiet_cmd_btf_ko = BTF [M] $@
       cmd_btf_ko = 							\
 	if [ -f vmlinux ]; then						\
-		LLVM_OBJCOPY=$(OBJCOPY) $(PAHOLE) -J --btf_base vmlinux $@; \
+		LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \
 	else								\
 		printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \
 	fi;
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index f4de4c97015bc..0e0f6466b18d6 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -240,7 +240,7 @@ gen_btf()
 	fi
 
 	info "BTF" ${2}
-	LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${extra_paholeopt} ${1}
+	LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${extra_paholeopt} ${1}
 
 	# Create ${2} which contains just .BTF section but no symbols. Add
 	# SHF_ALLOC because .BTF will be part of the vmlinux image. --strip-all
-- 
GitLab


From 6d2fcfe6b517fe7cbf2687adfb0a16cdcd5d9243 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Fri, 21 May 2021 17:19:27 +0200
Subject: [PATCH 1744/3804] cifs: set server->cipher_type to AES-128-CCM for
 SMB3.0

SMB3.0 doesn't have encryption negotiate context but simply uses
the SMB2_GLOBAL_CAP_ENCRYPTION flag.

When that flag is present in the neg response cifs.ko uses AES-128-CCM
which is the only cipher available in this context.

cipher_type was set to the server cipher only when parsing encryption
negotiate context (SMB3.1.1).

For SMB3.0 it was set to 0. This means cipher_type value can be 0 or 1
for AES-128-CCM.

Fix this by checking for SMB3.0 and encryption capability and setting
cipher_type appropriately.

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2pdu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 9f24eb88297a8..c205f93e0a10f 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -958,6 +958,13 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 	/* Internal types */
 	server->capabilities |= SMB2_NT_FIND | SMB2_LARGE_FILES;
 
+	/*
+	 * SMB3.0 supports only 1 cipher and doesn't have a encryption neg context
+	 * Set the cipher type manually.
+	 */
+	if (server->dialect == SMB30_PROT_ID && (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION))
+		server->cipher_type = SMB2_ENCRYPTION_AES128_CCM;
+
 	security_blob = smb2_get_data_area_len(&blob_offset, &blob_length,
 					       (struct smb2_sync_hdr *)rsp);
 	/*
-- 
GitLab


From 5c8121262484d99bffb598f39a0df445cecd8efb Mon Sep 17 00:00:00 2001
From: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Date: Sat, 20 Mar 2021 23:32:38 +0300
Subject: [PATCH 1745/3804] pata_ep93xx: fix deferred probing

The driver overrides the error codes returned by platform_get_irq() to
-ENXIO, so if it returns -EPROBE_DEFER, the driver would fail the probe
permanently instead of the deferred probing.  Propagate the error code
upstream, as it should have been done from the start...

Fixes: 2fff27512600 ("PATA host controller driver for ep93xx")
Signed-off-by: Sergey Shtylyov <s.shtylyov@omprussia.ru>
Link: https://lore.kernel.org/r/509fda88-2e0d-2cc7-f411-695d7e94b136@omprussia.ru
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_ep93xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c
index badab67088935..46208ececbb6a 100644
--- a/drivers/ata/pata_ep93xx.c
+++ b/drivers/ata/pata_ep93xx.c
@@ -928,7 +928,7 @@ static int ep93xx_pata_probe(struct platform_device *pdev)
 	/* INT[3] (IRQ_EP93XX_EXT3) line connected as pull down */
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		err = -ENXIO;
+		err = irq;
 		goto err_rel_gpio;
 	}
 
-- 
GitLab


From eb0688180549e3b72464e9f78df58cb7a5592c7f Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad@microsoft.com>
Date: Fri, 21 May 2021 06:35:52 +0000
Subject: [PATCH 1746/3804] cifs: fix string declarations and assignments in
 tracepoints

We missed using the variable length string macros in several
tracepoints. Fixed them in this change.

There's probably more useful macros that we can use to print
others like flags etc. But I'll submit sepawrate patches for
those at a future date.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Cc: <stable@vger.kernel.org> # v5.12
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/trace.h | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index d6df908dccade..dafcb6ab050dd 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -12,6 +12,11 @@
 
 #include <linux/tracepoint.h>
 
+/*
+ * Please use this 3-part article as a reference for writing new tracepoints:
+ * https://lwn.net/Articles/379903/
+ */
+
 /* For logging errors in read or write */
 DECLARE_EVENT_CLASS(smb3_rw_err_class,
 	TP_PROTO(unsigned int xid,
@@ -529,16 +534,16 @@ DECLARE_EVENT_CLASS(smb3_exit_err_class,
 	TP_ARGS(xid, func_name, rc),
 	TP_STRUCT__entry(
 		__field(unsigned int, xid)
-		__field(const char *, func_name)
+		__string(func_name, func_name)
 		__field(int, rc)
 	),
 	TP_fast_assign(
 		__entry->xid = xid;
-		__entry->func_name = func_name;
+		__assign_str(func_name, func_name);
 		__entry->rc = rc;
 	),
 	TP_printk("\t%s: xid=%u rc=%d",
-		__entry->func_name, __entry->xid, __entry->rc)
+		__get_str(func_name), __entry->xid, __entry->rc)
 )
 
 #define DEFINE_SMB3_EXIT_ERR_EVENT(name)          \
@@ -583,14 +588,14 @@ DECLARE_EVENT_CLASS(smb3_enter_exit_class,
 	TP_ARGS(xid, func_name),
 	TP_STRUCT__entry(
 		__field(unsigned int, xid)
-		__field(const char *, func_name)
+		__string(func_name, func_name)
 	),
 	TP_fast_assign(
 		__entry->xid = xid;
-		__entry->func_name = func_name;
+		__assign_str(func_name, func_name);
 	),
 	TP_printk("\t%s: xid=%u",
-		__entry->func_name, __entry->xid)
+		__get_str(func_name), __entry->xid)
 )
 
 #define DEFINE_SMB3_ENTER_EXIT_EVENT(name)        \
@@ -857,16 +862,16 @@ DECLARE_EVENT_CLASS(smb3_reconnect_class,
 	TP_STRUCT__entry(
 		__field(__u64, currmid)
 		__field(__u64, conn_id)
-		__field(char *, hostname)
+		__string(hostname, hostname)
 	),
 	TP_fast_assign(
 		__entry->currmid = currmid;
 		__entry->conn_id = conn_id;
-		__entry->hostname = hostname;
+		__assign_str(hostname, hostname);
 	),
 	TP_printk("conn_id=0x%llx server=%s current_mid=%llu",
 		__entry->conn_id,
-		__entry->hostname,
+		__get_str(hostname),
 		__entry->currmid)
 )
 
@@ -891,7 +896,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class,
 	TP_STRUCT__entry(
 		__field(__u64, currmid)
 		__field(__u64, conn_id)
-		__field(char *, hostname)
+		__string(hostname, hostname)
 		__field(int, credits)
 		__field(int, credits_to_add)
 		__field(int, in_flight)
@@ -899,7 +904,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class,
 	TP_fast_assign(
 		__entry->currmid = currmid;
 		__entry->conn_id = conn_id;
-		__entry->hostname = hostname;
+		__assign_str(hostname, hostname);
 		__entry->credits = credits;
 		__entry->credits_to_add = credits_to_add;
 		__entry->in_flight = in_flight;
@@ -907,7 +912,7 @@ DECLARE_EVENT_CLASS(smb3_credit_class,
 	TP_printk("conn_id=0x%llx server=%s current_mid=%llu "
 			"credits=%d credit_change=%d in_flight=%d",
 		__entry->conn_id,
-		__entry->hostname,
+		__get_str(hostname),
 		__entry->currmid,
 		__entry->credits,
 		__entry->credits_to_add,
-- 
GitLab


From 72ab7b6bb1a60bfc7baba1864fa28383dab4f862 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:00:50 +0100
Subject: [PATCH 1747/3804] i2c: busses: i2c-nomadik: Fix formatting issue
 pertaining to 'timeout'

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-nomadik.c:184: warning: Function parameter or member 'timeout' not described in 'nmk_i2c_dev'

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-nomadik.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c
index dc77e1c4e80f9..a2d12a5b1c34c 100644
--- a/drivers/i2c/busses/i2c-nomadik.c
+++ b/drivers/i2c/busses/i2c-nomadik.c
@@ -159,7 +159,7 @@ struct i2c_nmk_client {
  * @clk_freq: clock frequency for the operation mode
  * @tft: Tx FIFO Threshold in bytes
  * @rft: Rx FIFO Threshold in bytes
- * @timeout Slave response timeout (ms)
+ * @timeout: Slave response timeout (ms)
  * @sm: speed mode
  * @stop: stop condition.
  * @xfer_complete: acknowledge completion for a I2C message.
-- 
GitLab


From 45ce82f5eaedd5868b366d09d921a3205166d625 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:00:51 +0100
Subject: [PATCH 1748/3804] i2c: muxes: i2c-arb-gpio-challenge: Demote
 non-conformant kernel-doc headers

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/muxes/i2c-arb-gpio-challenge.c:43: warning: Function parameter or member 'muxc' not described in 'i2c_arbitrator_select'
 drivers/i2c/muxes/i2c-arb-gpio-challenge.c:43: warning: Function parameter or member 'chan' not described in 'i2c_arbitrator_select'
 drivers/i2c/muxes/i2c-arb-gpio-challenge.c:86: warning: Function parameter or member 'muxc' not described in 'i2c_arbitrator_deselect'
 drivers/i2c/muxes/i2c-arb-gpio-challenge.c:86: warning: Function parameter or member 'chan' not described in 'i2c_arbitrator_deselect'

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/muxes/i2c-arb-gpio-challenge.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
index 6dc88902c189f..1c78657631f4f 100644
--- a/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
+++ b/drivers/i2c/muxes/i2c-arb-gpio-challenge.c
@@ -34,7 +34,7 @@ struct i2c_arbitrator_data {
 };
 
 
-/**
+/*
  * i2c_arbitrator_select - claim the I2C bus
  *
  * Use the GPIO-based signalling protocol; return -EBUSY if we fail.
@@ -77,7 +77,7 @@ static int i2c_arbitrator_select(struct i2c_mux_core *muxc, u32 chan)
 	return -EBUSY;
 }
 
-/**
+/*
  * i2c_arbitrator_deselect - release the I2C bus
  *
  * Release the I2C bus using the GPIO-based signalling protocol.
-- 
GitLab


From f09aa114c4aff5c5b170be3498b63a006ea46f92 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:00:52 +0100
Subject: [PATCH 1749/3804] i2c: busses: i2c-ali1563: File headers are not good
 candidates for kernel-doc

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-ali1563.c:24: warning: expecting prototype for i2c(). Prototype was for ALI1563_MAX_TIMEOUT() instead

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-ali1563.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-ali1563.c b/drivers/i2c/busses/i2c-ali1563.c
index 4d12e3da12f0d..55a9e93fbfeb5 100644
--- a/drivers/i2c/busses/i2c-ali1563.c
+++ b/drivers/i2c/busses/i2c-ali1563.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
  *	i2c-ali1563.c - i2c driver for the ALi 1563 Southbridge
  *
  *	Copyright (C) 2004 Patrick Mochel
-- 
GitLab


From 6eb8a473693149f814a5082f395e130e75d41d57 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:00:55 +0100
Subject: [PATCH 1750/3804] i2c: busses: i2c-cadence: Fix incorrectly
 documented 'enum cdns_i2c_slave_mode'

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-cadence.c:157: warning: expecting prototype for enum cdns_i2c_slave_mode. Prototype was for enum cdns_i2c_slave_state instead

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-cadence.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index c1bbc4caeb5c9..66aafa7d11234 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -144,7 +144,7 @@ enum cdns_i2c_mode {
 };
 
 /**
- * enum cdns_i2c_slave_mode - Slave state when I2C is operating in slave mode
+ * enum cdns_i2c_slave_state - Slave state when I2C is operating in slave mode
  *
  * @CDNS_I2C_SLAVE_STATE_IDLE: I2C slave idle
  * @CDNS_I2C_SLAVE_STATE_SEND: I2C slave sending data to master
-- 
GitLab


From b4c760de3cedd41e63797b7eea73baf2a165dde2 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:00:56 +0100
Subject: [PATCH 1751/3804] i2c: busses: i2c-designware-master: Fix misnaming
 of 'i2c_dw_init_master()'

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-designware-master.c:176: warning: expecting prototype for i2c_dw_init(). Prototype was for i2c_dw_init_master() instead

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-master.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index 13be1d678c399..9b08bb5df38d2 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -165,7 +165,7 @@ static int i2c_dw_set_timings_master(struct dw_i2c_dev *dev)
 }
 
 /**
- * i2c_dw_init() - Initialize the designware I2C master hardware
+ * i2c_dw_init_master() - Initialize the designware I2C master hardware
  * @dev: device private data
  *
  * This functions configures and enables the I2C master.
-- 
GitLab


From f9f193fc222bd5352a414ba34406303cfedd2c5e Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:00:57 +0100
Subject: [PATCH 1752/3804] i2c: busses: i2c-eg20t: Fix 'bad line' issue and
 provide description for 'msgs' param

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-eg20t.c:151: warning: bad line:                          PCH i2c controller
 drivers/i2c/busses/i2c-eg20t.c:369: warning: Function parameter or member 'msgs' not described in 'pch_i2c_writebytes'

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-eg20t.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 843b31a0f752b..321b2770feabc 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -148,7 +148,7 @@ struct i2c_algo_pch_data {
 
 /**
  * struct adapter_info - This structure holds the adapter information for the
-			 PCH i2c controller
+ *			 PCH i2c controller
  * @pch_data:		stores a list of i2c_algo_pch_data
  * @pch_i2c_suspended:	specifies whether the system is suspended or not
  *			perhaps with more lines and words.
@@ -358,6 +358,7 @@ static void pch_i2c_repstart(struct i2c_algo_pch_data *adap)
 /**
  * pch_i2c_writebytes() - write data to I2C bus in normal mode
  * @i2c_adap:	Pointer to the struct i2c_adapter.
+ * @msgs:	Pointer to the i2c message structure.
  * @last:	specifies whether last message or not.
  *		In the case of compound mode it will be 1 for last message,
  *		otherwise 0.
-- 
GitLab


From d4c73d41bef08f6d7878cb3e55d7e50df13d02c1 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:00:59 +0100
Subject: [PATCH 1753/3804] i2c: busses: i2c-ocores: Place the expected
 function names into the documentation headers

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-ocores.c:253: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/i2c/busses/i2c-ocores.c:267: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/i2c/busses/i2c-ocores.c:299: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst
 drivers/i2c/busses/i2c-ocores.c:347: warning: expecting prototype for It handles an IRQ(). Prototype was for ocores_process_polling() instead

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Peter Korsgaard <peter@korsgaard.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-ocores.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 273222e38056e..a0af027db04c1 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -250,7 +250,7 @@ static irqreturn_t ocores_isr(int irq, void *dev_id)
 }
 
 /**
- * Process timeout event
+ * ocores_process_timeout() - Process timeout event
  * @i2c: ocores I2C device instance
  */
 static void ocores_process_timeout(struct ocores_i2c *i2c)
@@ -264,7 +264,7 @@ static void ocores_process_timeout(struct ocores_i2c *i2c)
 }
 
 /**
- * Wait until something change in a given register
+ * ocores_wait() - Wait until something change in a given register
  * @i2c: ocores I2C device instance
  * @reg: register to query
  * @mask: bitmask to apply on register value
@@ -296,7 +296,7 @@ static int ocores_wait(struct ocores_i2c *i2c,
 }
 
 /**
- * Wait until is possible to process some data
+ * ocores_poll_wait() - Wait until is possible to process some data
  * @i2c: ocores I2C device instance
  *
  * Used when the device is in polling mode (interrupts disabled).
@@ -334,7 +334,7 @@ static int ocores_poll_wait(struct ocores_i2c *i2c)
 }
 
 /**
- * It handles an IRQ-less transfer
+ * ocores_process_polling() - It handles an IRQ-less transfer
  * @i2c: ocores I2C device instance
  *
  * Even if IRQ are disabled, the I2C OpenCore IP behavior is exactly the same
-- 
GitLab


From 3e0f8672f1685ed1fbbc4b3388fe8093e43e9783 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:01:00 +0100
Subject: [PATCH 1754/3804] i2c: busses: i2c-pnx: Provide descriptions for
 'alg_data' data structure

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-pnx.c:147: warning: Function parameter or member 'alg_data' not described in 'i2c_pnx_start'
 drivers/i2c/busses/i2c-pnx.c:147: warning: Excess function parameter 'adap' description in 'i2c_pnx_start'
 drivers/i2c/busses/i2c-pnx.c:202: warning: Function parameter or member 'alg_data' not described in 'i2c_pnx_stop'
 drivers/i2c/busses/i2c-pnx.c:202: warning: Excess function parameter 'adap' description in 'i2c_pnx_stop'
 drivers/i2c/busses/i2c-pnx.c:231: warning: Function parameter or member 'alg_data' not described in 'i2c_pnx_master_xmit'
 drivers/i2c/busses/i2c-pnx.c:231: warning: Excess function parameter 'adap' description in 'i2c_pnx_master_xmit'
 drivers/i2c/busses/i2c-pnx.c:301: warning: Function parameter or member 'alg_data' not described in 'i2c_pnx_master_rcv'
 drivers/i2c/busses/i2c-pnx.c:301: warning: Excess function parameter 'adap' description in 'i2c_pnx_master_rcv'

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-pnx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index 8c4ec7f13f5ab..50f21cdbe90d3 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -138,7 +138,7 @@ static inline void i2c_pnx_arm_timer(struct i2c_pnx_algo_data *alg_data)
 /**
  * i2c_pnx_start - start a device
  * @slave_addr:		slave address
- * @adap:		pointer to adapter structure
+ * @alg_data:		pointer to local driver data structure
  *
  * Generate a START signal in the desired mode.
  */
@@ -194,7 +194,7 @@ static int i2c_pnx_start(unsigned char slave_addr,
 
 /**
  * i2c_pnx_stop - stop a device
- * @adap:		pointer to I2C adapter structure
+ * @alg_data:		pointer to local driver data structure
  *
  * Generate a STOP signal to terminate the master transaction.
  */
@@ -223,7 +223,7 @@ static void i2c_pnx_stop(struct i2c_pnx_algo_data *alg_data)
 
 /**
  * i2c_pnx_master_xmit - transmit data to slave
- * @adap:		pointer to I2C adapter structure
+ * @alg_data:		pointer to local driver data structure
  *
  * Sends one byte of data to the slave
  */
@@ -293,7 +293,7 @@ static int i2c_pnx_master_xmit(struct i2c_pnx_algo_data *alg_data)
 
 /**
  * i2c_pnx_master_rcv - receive data from slave
- * @adap:		pointer to I2C adapter structure
+ * @alg_data:		pointer to local driver data structure
  *
  * Reads one byte data from the slave
  */
-- 
GitLab


From 721a6fe5f9584357617b463e687f379412d1c213 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:01:02 +0100
Subject: [PATCH 1755/3804] i2c: busses: i2c-st: Fix copy/paste function
 misnaming issues

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-st.c:531: warning: expecting prototype for st_i2c_handle_write(). Prototype was for st_i2c_handle_read() instead
 drivers/i2c/busses/i2c-st.c:566: warning: expecting prototype for st_i2c_isr(). Prototype was for st_i2c_isr_thread() instead

Fix the "enmpty" typo while here.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Alain Volmat <alain.volmat@foss.st.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-st.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-st.c b/drivers/i2c/busses/i2c-st.c
index faa81a95551fe..88482316d22a0 100644
--- a/drivers/i2c/busses/i2c-st.c
+++ b/drivers/i2c/busses/i2c-st.c
@@ -524,7 +524,7 @@ static void st_i2c_handle_write(struct st_i2c_dev *i2c_dev)
 }
 
 /**
- * st_i2c_handle_write() - Handle FIFO enmpty interrupt in case of read
+ * st_i2c_handle_read() - Handle FIFO empty interrupt in case of read
  * @i2c_dev: Controller's private data
  */
 static void st_i2c_handle_read(struct st_i2c_dev *i2c_dev)
@@ -558,7 +558,7 @@ static void st_i2c_handle_read(struct st_i2c_dev *i2c_dev)
 }
 
 /**
- * st_i2c_isr() - Interrupt routine
+ * st_i2c_isr_thread() - Interrupt routine
  * @irq: interrupt number
  * @data: Controller's private data
  */
-- 
GitLab


From a00cb25169d508908c6baa886035e0aa9121942a Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:01:03 +0100
Subject: [PATCH 1756/3804] i2c: busses: i2c-stm32f4: Remove incorrectly placed
 ' ' from function name

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-stm32f4.c:321: warning: expecting prototype for stm32f4_i2c_write_ byte()(). Prototype was for stm32f4_i2c_write_byte() instead

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Alain Volmat <alain.volmat@foss.st.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-stm32f4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-stm32f4.c b/drivers/i2c/busses/i2c-stm32f4.c
index 4933fc8ce3fd1..eebce7ecef25b 100644
--- a/drivers/i2c/busses/i2c-stm32f4.c
+++ b/drivers/i2c/busses/i2c-stm32f4.c
@@ -313,7 +313,7 @@ static int stm32f4_i2c_wait_free_bus(struct stm32f4_i2c_dev *i2c_dev)
 }
 
 /**
- * stm32f4_i2c_write_ byte() - Write a byte in the data register
+ * stm32f4_i2c_write_byte() - Write a byte in the data register
  * @i2c_dev: Controller's private data
  * @byte: Data to write in the register
  */
-- 
GitLab


From a5063ab976024f72865029646d7c8c9dfa63b595 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Wed, 12 May 2021 09:20:49 +1200
Subject: [PATCH 1757/3804] dt-bindings: i2c: mpc: Add fsl,i2c-erratum-a004447
 flag

Document the fsl,i2c-erratum-a004447 flag which indicates the presence
of an i2c erratum on some QorIQ SoCs.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 Documentation/devicetree/bindings/i2c/i2c-mpc.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/i2c/i2c-mpc.yaml b/Documentation/devicetree/bindings/i2c/i2c-mpc.yaml
index 7b553d559c832..98c6fcf7bf265 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-mpc.yaml
+++ b/Documentation/devicetree/bindings/i2c/i2c-mpc.yaml
@@ -46,6 +46,13 @@ properties:
     description: |
       I2C bus timeout in microseconds
 
+  fsl,i2c-erratum-a004447:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      Indicates the presence of QorIQ erratum A-004447, which
+      says that the standard i2c recovery scheme mechanism does
+      not work and an alternate implementation is needed.
+
 required:
   - compatible
   - reg
-- 
GitLab


From 7adc7b225cddcfd0f346d10144fd7a3d3d9f9ea7 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Wed, 12 May 2021 09:20:50 +1200
Subject: [PATCH 1758/3804] powerpc/fsl: set fsl,i2c-erratum-a004447 flag for
 P2041 i2c controllers

The i2c controllers on the P2040/P2041 have an erratum where the
documented scheme for i2c bus recovery will not work (A-004447). A
different mechanism is needed which is documented in the P2040 Chip
Errata Rev Q (latest available at the time of writing).

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 arch/powerpc/boot/dts/fsl/p2041si-post.dtsi | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
index 872e4485dc3f0..ddc018d42252f 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
@@ -371,7 +371,23 @@
 	};
 
 /include/ "qoriq-i2c-0.dtsi"
+	i2c@118000 {
+		fsl,i2c-erratum-a004447;
+	};
+
+	i2c@118100 {
+		fsl,i2c-erratum-a004447;
+	};
+
 /include/ "qoriq-i2c-1.dtsi"
+	i2c@119000 {
+		fsl,i2c-erratum-a004447;
+	};
+
+	i2c@119100 {
+		fsl,i2c-erratum-a004447;
+	};
+
 /include/ "qoriq-duart-0.dtsi"
 /include/ "qoriq-duart-1.dtsi"
 /include/ "qoriq-gpio-0.dtsi"
-- 
GitLab


From 19ae697a1e4edf1d755b413e3aa38da65e2db23b Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Wed, 12 May 2021 09:20:51 +1200
Subject: [PATCH 1759/3804] powerpc/fsl: set fsl,i2c-erratum-a004447 flag for
 P1010 i2c controllers

The i2c controllers on the P1010 have an erratum where the documented
scheme for i2c bus recovery will not work (A-004447). A different
mechanism is needed which is documented in the P1010 Chip Errata Rev L.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 arch/powerpc/boot/dts/fsl/p1010si-post.dtsi | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
index c2717f31925a2..ccda0a91abf00 100644
--- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
@@ -122,7 +122,15 @@
 	};
 
 /include/ "pq3-i2c-0.dtsi"
+	i2c@3000 {
+		fsl,i2c-erratum-a004447;
+	};
+
 /include/ "pq3-i2c-1.dtsi"
+	i2c@3100 {
+		fsl,i2c-erratum-a004447;
+	};
+
 /include/ "pq3-duart-0.dtsi"
 /include/ "pq3-espi-0.dtsi"
 	spi0: spi@7000 {
-- 
GitLab


From 8f0cdec8b5fd94135d643662506ee94ae9e98785 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Wed, 12 May 2021 09:20:52 +1200
Subject: [PATCH 1760/3804] i2c: mpc: implement erratum A-004447 workaround

The P2040/P2041 has an erratum where the normal i2c recovery mechanism
does not work. Implement the alternative recovery mechanism documented
in the P2040 Chip Errata Rev Q.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-mpc.c | 81 +++++++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c
index 30d9e89a3db20..dcca9c2396db1 100644
--- a/drivers/i2c/busses/i2c-mpc.c
+++ b/drivers/i2c/busses/i2c-mpc.c
@@ -19,6 +19,7 @@
 
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/fsl_devices.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
@@ -45,6 +46,7 @@
 #define CCR_MTX  0x10
 #define CCR_TXAK 0x08
 #define CCR_RSTA 0x04
+#define CCR_RSVD 0x02
 
 #define CSR_MCF  0x80
 #define CSR_MAAS 0x40
@@ -97,7 +99,7 @@ struct mpc_i2c {
 	u32 block;
 	int rc;
 	int expect_rxack;
-
+	bool has_errata_A004447;
 };
 
 struct mpc_i2c_divider {
@@ -136,6 +138,75 @@ static void mpc_i2c_fixup(struct mpc_i2c *i2c)
 	}
 }
 
+static int i2c_mpc_wait_sr(struct mpc_i2c *i2c, int mask)
+{
+	void __iomem *addr = i2c->base + MPC_I2C_SR;
+	u8 val;
+
+	return readb_poll_timeout(addr, val, val & mask, 0, 100);
+}
+
+/*
+ * Workaround for Erratum A004447. From the P2040CE Rev Q
+ *
+ * 1.  Set up the frequency divider and sampling rate.
+ * 2.  I2CCR - a0h
+ * 3.  Poll for I2CSR[MBB] to get set.
+ * 4.  If I2CSR[MAL] is set (an indication that SDA is stuck low), then go to
+ *     step 5. If MAL is not set, then go to step 13.
+ * 5.  I2CCR - 00h
+ * 6.  I2CCR - 22h
+ * 7.  I2CCR - a2h
+ * 8.  Poll for I2CSR[MBB] to get set.
+ * 9.  Issue read to I2CDR.
+ * 10. Poll for I2CSR[MIF] to be set.
+ * 11. I2CCR - 82h
+ * 12. Workaround complete. Skip the next steps.
+ * 13. Issue read to I2CDR.
+ * 14. Poll for I2CSR[MIF] to be set.
+ * 15. I2CCR - 80h
+ */
+static void mpc_i2c_fixup_A004447(struct mpc_i2c *i2c)
+{
+	int ret;
+	u32 val;
+
+	writeccr(i2c, CCR_MEN | CCR_MSTA);
+	ret = i2c_mpc_wait_sr(i2c, CSR_MBB);
+	if (ret) {
+		dev_err(i2c->dev, "timeout waiting for CSR_MBB\n");
+		return;
+	}
+
+	val = readb(i2c->base + MPC_I2C_SR);
+
+	if (val & CSR_MAL) {
+		writeccr(i2c, 0x00);
+		writeccr(i2c, CCR_MSTA | CCR_RSVD);
+		writeccr(i2c, CCR_MEN | CCR_MSTA | CCR_RSVD);
+		ret = i2c_mpc_wait_sr(i2c, CSR_MBB);
+		if (ret) {
+			dev_err(i2c->dev, "timeout waiting for CSR_MBB\n");
+			return;
+		}
+		val = readb(i2c->base + MPC_I2C_DR);
+		ret = i2c_mpc_wait_sr(i2c, CSR_MIF);
+		if (ret) {
+			dev_err(i2c->dev, "timeout waiting for CSR_MIF\n");
+			return;
+		}
+		writeccr(i2c, CCR_MEN | CCR_RSVD);
+	} else {
+		val = readb(i2c->base + MPC_I2C_DR);
+		ret = i2c_mpc_wait_sr(i2c, CSR_MIF);
+		if (ret) {
+			dev_err(i2c->dev, "timeout waiting for CSR_MIF\n");
+			return;
+		}
+		writeccr(i2c, CCR_MEN);
+	}
+}
+
 #if defined(CONFIG_PPC_MPC52xx) || defined(CONFIG_PPC_MPC512x)
 static const struct mpc_i2c_divider mpc_i2c_dividers_52xx[] = {
 	{20, 0x20}, {22, 0x21}, {24, 0x22}, {26, 0x23},
@@ -670,7 +741,10 @@ static int fsl_i2c_bus_recovery(struct i2c_adapter *adap)
 {
 	struct mpc_i2c *i2c = i2c_get_adapdata(adap);
 
-	mpc_i2c_fixup(i2c);
+	if (i2c->has_errata_A004447)
+		mpc_i2c_fixup_A004447(i2c);
+	else
+		mpc_i2c_fixup(i2c);
 
 	return 0;
 }
@@ -767,6 +841,9 @@ static int fsl_i2c_probe(struct platform_device *op)
 	}
 	dev_info(i2c->dev, "timeout %u us\n", mpc_ops.timeout * 1000000 / HZ);
 
+	if (of_property_read_bool(op->dev.of_node, "fsl,i2c-erratum-a004447"))
+		i2c->has_errata_A004447 = true;
+
 	i2c->adap = mpc_ops;
 	scnprintf(i2c->adap.name, sizeof(i2c->adap.name),
 		  "MPC adapter (%s)", of_node_full_name(op->dev.of_node));
-- 
GitLab


From e4d8716c3dcec47f1557024add24e1f3c09eb24b Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Tue, 25 May 2021 17:03:36 +0200
Subject: [PATCH 1761/3804] i2c: i801: Don't generate an interrupt on bus reset

Now that the i2c-i801 driver supports interrupts, setting the KILL bit
in a attempt to recover from a timed out transaction triggers an
interrupt. Unfortunately, the interrupt handler (i801_isr) is not
prepared for this situation and will try to process the interrupt as
if it was signaling the end of a successful transaction. In the case
of a block transaction, this can result in an out-of-range memory
access.

This condition was reproduced several times by syzbot:
https://syzkaller.appspot.com/bug?extid=ed71512d469895b5b34e
https://syzkaller.appspot.com/bug?extid=8c8dedc0ba9e03f6c79e
https://syzkaller.appspot.com/bug?extid=c8ff0b6d6c73d81b610e
https://syzkaller.appspot.com/bug?extid=33f6c360821c399d69eb
https://syzkaller.appspot.com/bug?extid=be15dc0b1933f04b043a
https://syzkaller.appspot.com/bug?extid=b4d3fd1dfd53e90afd79

So disable interrupts while trying to reset the bus. Interrupts will
be enabled again for the following transaction.

Fixes: 636752bcb517 ("i2c-i801: Enable IRQ for SMBus transactions")
Reported-by: syzbot+b4d3fd1dfd53e90afd79@syzkaller.appspotmail.com
Signed-off-by: Jean Delvare <jdelvare@suse.de>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-i801.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 99d446763530e..f9e1c2ceaac05 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -395,11 +395,9 @@ static int i801_check_post(struct i801_priv *priv, int status)
 		dev_err(&priv->pci_dev->dev, "Transaction timeout\n");
 		/* try to stop the current command */
 		dev_dbg(&priv->pci_dev->dev, "Terminating the current operation\n");
-		outb_p(inb_p(SMBHSTCNT(priv)) | SMBHSTCNT_KILL,
-		       SMBHSTCNT(priv));
+		outb_p(SMBHSTCNT_KILL, SMBHSTCNT(priv));
 		usleep_range(1000, 2000);
-		outb_p(inb_p(SMBHSTCNT(priv)) & (~SMBHSTCNT_KILL),
-		       SMBHSTCNT(priv));
+		outb_p(0, SMBHSTCNT(priv));
 
 		/* Check if it worked */
 		status = inb_p(SMBHSTSTS(priv));
-- 
GitLab


From 1bb56810677f26b78d57a3038054943efd334a1c Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Fri, 21 May 2021 17:19:28 +0200
Subject: [PATCH 1762/3804] cifs: change format of CIFS_FULL_KEY_DUMP ioctl

Make CIFS_FULL_KEY_DUMP ioctl able to return variable-length keys.

* userspace needs to pass the struct size along with optional
  session_id and some space at the end to store keys
* if there is enough space kernel returns keys in the extra space and
  sets the length of each key via xyz_key_length fields

This also fixes the build error for get_user() on ARM.

Sample program:

	#include <stdlib.h>
	#include <stdio.h>
	#include <stdint.h>
	#include <sys/fcntl.h>
	#include <sys/ioctl.h>

	struct smb3_full_key_debug_info {
	        uint32_t   in_size;
	        uint64_t   session_id;
	        uint16_t   cipher_type;
	        uint8_t    session_key_length;
	        uint8_t    server_in_key_length;
	        uint8_t    server_out_key_length;
	        uint8_t    data[];
	        /*
	         * return this struct with the keys appended at the end:
	         * uint8_t session_key[session_key_length];
	         * uint8_t server_in_key[server_in_key_length];
	         * uint8_t server_out_key[server_out_key_length];
	         */
	} __attribute__((packed));

	#define CIFS_IOCTL_MAGIC 0xCF
	#define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info)

	void dump(const void *p, size_t len) {
	        const char *hex = "0123456789ABCDEF";
	        const uint8_t *b = p;
	        for (int i = 0; i < len; i++)
	                printf("%c%c ", hex[(b[i]>>4)&0xf], hex[b[i]&0xf]);
	        putchar('\n');
	}

	int main(int argc, char **argv)
	{
	        struct smb3_full_key_debug_info *keys;
	        uint8_t buf[sizeof(*keys)+1024] = {0};
	        size_t off = 0;
	        int fd, rc;

	        keys = (struct smb3_full_key_debug_info *)&buf;
	        keys->in_size = sizeof(buf);

	        fd = open(argv[1], O_RDONLY);
	        if (fd < 0)
	                perror("open"), exit(1);

	        rc = ioctl(fd, CIFS_DUMP_FULL_KEY, keys);
	        if (rc < 0)
	                perror("ioctl"), exit(1);

	        printf("SessionId      ");
	        dump(&keys->session_id, 8);
	        printf("Cipher         %04x\n", keys->cipher_type);

	        printf("SessionKey     ");
	        dump(keys->data+off, keys->session_key_length);
	        off += keys->session_key_length;

	        printf("ServerIn Key   ");
	        dump(keys->data+off, keys->server_in_key_length);
	        off += keys->server_in_key_length;

	        printf("ServerOut Key  ");
	        dump(keys->data+off, keys->server_out_key_length);

	        return 0;
	}

Usage:

	$ gcc -o dumpkeys dumpkeys.c

Against Windows Server 2020 preview (with AES-256-GCM support):

	# mount.cifs //$ip/test /mnt -o "username=administrator,password=foo,vers=3.0,seal"
	# ./dumpkeys /mnt/somefile
	SessionId      0D 00 00 00 00 0C 00 00
	Cipher         0002
	SessionKey     AB CD CC 0D E4 15 05 0C 6F 3C 92 90 19 F3 0D 25
	ServerIn Key   73 C6 6A C8 6B 08 CF A2 CB 8E A5 7D 10 D1 5B DC
	ServerOut Key  6D 7E 2B A1 71 9D D7 2B 94 7B BA C4 F0 A5 A4 F8
	# umount /mnt

	With 256 bit keys:

	# echo 1 > /sys/module/cifs/parameters/require_gcm_256
	# mount.cifs //$ip/test /mnt -o "username=administrator,password=foo,vers=3.11,seal"
	# ./dumpkeys /mnt/somefile
	SessionId      09 00 00 00 00 0C 00 00
	Cipher         0004
	SessionKey     93 F5 82 3B 2F B7 2A 50 0B B9 BA 26 FB 8C 8B 03
	ServerIn Key   6C 6A 89 B2 CB 7B 78 E8 04 93 37 DA 22 53 47 DF B3 2C 5F 02 26 70 43 DB 8D 33 7B DC 66 D3 75 A9
	ServerOut Key  04 11 AA D7 52 C7 A8 0F ED E3 93 3A 65 FE 03 AD 3F 63 03 01 2B C0 1B D7 D7 E5 52 19 7F CC 46 B4

Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_ioctl.h |  25 ++++++--
 fs/cifs/cifspdu.h    |   3 +-
 fs/cifs/ioctl.c      | 143 +++++++++++++++++++++++++++++++------------
 3 files changed, 126 insertions(+), 45 deletions(-)

diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index 4a97fe12006b7..37fc7d6ac457a 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -72,15 +72,28 @@ struct smb3_key_debug_info {
 } __packed;
 
 /*
- * Dump full key (32 byte encrypt/decrypt keys instead of 16 bytes)
- * is needed if GCM256 (stronger encryption) negotiated
+ * Dump variable-sized keys
  */
 struct smb3_full_key_debug_info {
-	__u64	Suid;
+	/* INPUT: size of userspace buffer */
+	__u32   in_size;
+
+	/*
+	 * INPUT: 0 for current user, otherwise session to dump
+	 * OUTPUT: session id that was dumped
+	 */
+	__u64	session_id;
 	__u16	cipher_type;
-	__u8	auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */
-	__u8	smb3encryptionkey[32]; /* SMB3_ENC_DEC_KEY_SIZE */
-	__u8	smb3decryptionkey[32]; /* SMB3_ENC_DEC_KEY_SIZE */
+	__u8    session_key_length;
+	__u8    server_in_key_length;
+	__u8    server_out_key_length;
+	__u8    data[];
+	/*
+	 * return this struct with the keys appended at the end:
+	 * __u8 session_key[session_key_length];
+	 * __u8 server_in_key[server_in_key_length];
+	 * __u8 server_out_key[server_out_key_length];
+	 */
 } __packed;
 
 struct smb3_notify {
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b53a87db282f9..554d64fe171e0 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -148,7 +148,8 @@
 #define SMB3_SIGN_KEY_SIZE (16)
 
 /*
- * Size of the smb3 encryption/decryption keys
+ * Size of the smb3 encryption/decryption key storage.
+ * This size is big enough to store any cipher key types.
  */
 #define SMB3_ENC_DEC_KEY_SIZE (32)
 
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 28ec8d7c521a9..d67d281ab8632 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -33,6 +33,7 @@
 #include "cifsfs.h"
 #include "cifs_ioctl.h"
 #include "smb2proto.h"
+#include "smb2glob.h"
 #include <linux/btrfs.h>
 
 static long cifs_ioctl_query_info(unsigned int xid, struct file *filep,
@@ -214,48 +215,112 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg)
 	return 0;
 }
 
-static int cifs_dump_full_key(struct cifs_tcon *tcon, unsigned long arg)
+static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in)
 {
-	struct smb3_full_key_debug_info pfull_key_inf;
-	__u64 suid;
-	struct list_head *tmp;
+	struct smb3_full_key_debug_info out;
 	struct cifs_ses *ses;
+	int rc = 0;
 	bool found = false;
+	u8 __user *end;
 
-	if (!smb3_encryption_required(tcon))
-		return -EOPNOTSUPP;
+	if (!smb3_encryption_required(tcon)) {
+		rc = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* copy user input into our output buffer */
+	if (copy_from_user(&out, in, sizeof(out))) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (!out.session_id) {
+		/* if ses id is 0, use current user session */
+		ses = tcon->ses;
+	} else {
+		/* otherwise if a session id is given, look for it in all our sessions */
+		struct cifs_ses *ses_it = NULL;
+		struct TCP_Server_Info *server_it = NULL;
 
-	ses = tcon->ses; /* default to user id for current user */
-	if (get_user(suid, (__u64 __user *)arg))
-		suid = 0;
-	if (suid) {
-		/* search to see if there is a session with a matching SMB UID */
 		spin_lock(&cifs_tcp_ses_lock);
-		list_for_each(tmp, &tcon->ses->server->smb_ses_list) {
-			ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
-			if (ses->Suid == suid) {
-				found = true;
-				break;
+		list_for_each_entry(server_it, &cifs_tcp_ses_list, tcp_ses_list) {
+			list_for_each_entry(ses_it, &server_it->smb_ses_list, smb_ses_list) {
+				if (ses_it->Suid == out.session_id) {
+					ses = ses_it;
+					/*
+					 * since we are using the session outside the crit
+					 * section, we need to make sure it won't be released
+					 * so increment its refcount
+					 */
+					ses->ses_count++;
+					found = true;
+					goto search_end;
+				}
 			}
 		}
+search_end:
 		spin_unlock(&cifs_tcp_ses_lock);
-		if (found == false)
-			return -EINVAL;
-	} /* else uses default user's SMB UID (ie current user) */
-
-	pfull_key_inf.cipher_type = le16_to_cpu(ses->server->cipher_type);
-	pfull_key_inf.Suid = ses->Suid;
-	memcpy(pfull_key_inf.auth_key, ses->auth_key.response,
-	       16 /* SMB2_NTLMV2_SESSKEY_SIZE */);
-	memcpy(pfull_key_inf.smb3decryptionkey, ses->smb3decryptionkey,
-	       32 /* SMB3_ENC_DEC_KEY_SIZE */);
-	memcpy(pfull_key_inf.smb3encryptionkey,
-	       ses->smb3encryptionkey, 32 /* SMB3_ENC_DEC_KEY_SIZE */);
-	if (copy_to_user((void __user *)arg, &pfull_key_inf,
-			 sizeof(struct smb3_full_key_debug_info)))
-		return -EFAULT;
+		if (!found) {
+			rc = -ENOENT;
+			goto out;
+		}
+	}
 
-	return 0;
+	switch (ses->server->cipher_type) {
+	case SMB2_ENCRYPTION_AES128_CCM:
+	case SMB2_ENCRYPTION_AES128_GCM:
+		out.session_key_length = CIFS_SESS_KEY_SIZE;
+		out.server_in_key_length = out.server_out_key_length = SMB3_GCM128_CRYPTKEY_SIZE;
+		break;
+	case SMB2_ENCRYPTION_AES256_CCM:
+	case SMB2_ENCRYPTION_AES256_GCM:
+		out.session_key_length = CIFS_SESS_KEY_SIZE;
+		out.server_in_key_length = out.server_out_key_length = SMB3_GCM256_CRYPTKEY_SIZE;
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* check if user buffer is big enough to store all the keys */
+	if (out.in_size < sizeof(out) + out.session_key_length + out.server_in_key_length
+	    + out.server_out_key_length) {
+		rc = -ENOBUFS;
+		goto out;
+	}
+
+	out.session_id = ses->Suid;
+	out.cipher_type = le16_to_cpu(ses->server->cipher_type);
+
+	/* overwrite user input with our output */
+	if (copy_to_user(in, &out, sizeof(out))) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* append all the keys at the end of the user buffer */
+	end = in->data;
+	if (copy_to_user(end, ses->auth_key.response, out.session_key_length)) {
+		rc = -EINVAL;
+		goto out;
+	}
+	end += out.session_key_length;
+
+	if (copy_to_user(end, ses->smb3encryptionkey, out.server_in_key_length)) {
+		rc = -EINVAL;
+		goto out;
+	}
+	end += out.server_in_key_length;
+
+	if (copy_to_user(end, ses->smb3decryptionkey, out.server_out_key_length)) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+out:
+	if (found)
+		cifs_put_smb_ses(ses);
+	return rc;
 }
 
 long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
@@ -371,6 +436,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 				rc = -EOPNOTSUPP;
 			break;
 		case CIFS_DUMP_KEY:
+			/*
+			 * Dump encryption keys. This is an old ioctl that only
+			 * handles AES-128-{CCM,GCM}.
+			 */
 			if (pSMBFile == NULL)
 				break;
 			if (!capable(CAP_SYS_ADMIN)) {
@@ -398,11 +467,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 			else
 				rc = 0;
 			break;
-		/*
-		 * Dump full key (32 bytes instead of 16 bytes) is
-		 * needed if GCM256 (stronger encryption) negotiated
-		 */
 		case CIFS_DUMP_FULL_KEY:
+			/*
+			 * Dump encryption keys (handles any key sizes)
+			 */
 			if (pSMBFile == NULL)
 				break;
 			if (!capable(CAP_SYS_ADMIN)) {
@@ -410,8 +478,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 				break;
 			}
 			tcon = tlink_tcon(pSMBFile->tlink);
-			rc = cifs_dump_full_key(tcon, arg);
-
+			rc = cifs_dump_full_key(tcon, (void __user *)arg);
 			break;
 		case CIFS_IOC_NOTIFY:
 			if (!S_ISDIR(inode->i_mode)) {
-- 
GitLab


From 4c80a97d7b02cf68e169118ef2bda0725fc87f6f Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 25 May 2021 13:52:43 +0800
Subject: [PATCH 1763/3804] btrfs: fix compressed writes that cross stripe
 boundary

[BUG]
When running btrfs/027 with "-o compress" mount option, it always
crashes with the following call trace:

  BTRFS critical (device dm-4): mapping failed logical 298901504 bio len 12288 len 8192
  ------------[ cut here ]------------
  kernel BUG at fs/btrfs/volumes.c:6651!
  invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
  CPU: 5 PID: 31089 Comm: kworker/u24:10 Tainted: G           OE     5.13.0-rc2-custom+ #26
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  Workqueue: btrfs-delalloc btrfs_work_helper [btrfs]
  RIP: 0010:btrfs_map_bio.cold+0x58/0x5a [btrfs]
  Call Trace:
   btrfs_submit_compressed_write+0x2d7/0x470 [btrfs]
   submit_compressed_extents+0x3b0/0x470 [btrfs]
   ? mark_held_locks+0x49/0x70
   btrfs_work_helper+0x131/0x3e0 [btrfs]
   process_one_work+0x28f/0x5d0
   worker_thread+0x55/0x3c0
   ? process_one_work+0x5d0/0x5d0
   kthread+0x141/0x160
   ? __kthread_bind_mask+0x60/0x60
   ret_from_fork+0x22/0x30
  ---[ end trace 63113a3a91f34e68 ]---

[CAUSE]
The critical message before the crash means we have a bio at logical
bytenr 298901504 length 12288, but only 8192 bytes can fit into one
stripe, the remaining 4096 bytes go to another stripe.

In btrfs, all bios are properly split to avoid cross stripe boundary,
but commit 764c7c9a464b ("btrfs: zoned: fix parallel compressed writes")
changed the behavior for compressed writes.

Previously if we find our new page can't be fitted into current stripe,
ie. "submit == 1" case, we submit current bio without adding current
page.

       submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio, 0);

   page->mapping = NULL;
   if (submit || bio_add_page(bio, page, PAGE_SIZE, 0) <
       PAGE_SIZE) {

But after the modification, we will add the page no matter if it crosses
stripe boundary, leading to the above crash.

       submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio, 0);

   if (pg_index == 0 && use_append)
           len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
   else
           len = bio_add_page(bio, page, PAGE_SIZE, 0);

   page->mapping = NULL;
   if (submit || len < PAGE_SIZE) {

[FIX]
It's no longer possible to revert to the original code style as we have
two different bio_add_*_page() calls now.

The new fix is to skip the bio_add_*_page() call if @submit is true.

Also to avoid @len to be uninitialized, always initialize it to zero.

If @submit is true, @len will not be checked.
If @submit is not true, @len will be the return value of
bio_add_*_page() call.
Either way, the behavior is still the same as the old code.

Reported-by: Josef Bacik <josef@toxicpanda.com>
Fixes: 764c7c9a464b ("btrfs: zoned: fix parallel compressed writes")
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 91743a0b34c51..5cb4f3b882853 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -457,7 +457,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	bytes_left = compressed_len;
 	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
 		int submit = 0;
-		int len;
+		int len = 0;
 
 		page = compressed_pages[pg_index];
 		page->mapping = inode->vfs_inode.i_mapping;
@@ -465,10 +465,17 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 			submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE, bio,
 							  0);
 
-		if (pg_index == 0 && use_append)
-			len = bio_add_zone_append_page(bio, page, PAGE_SIZE, 0);
-		else
-			len = bio_add_page(bio, page, PAGE_SIZE, 0);
+		/*
+		 * Page can only be added to bio if the current bio fits in
+		 * stripe.
+		 */
+		if (!submit) {
+			if (pg_index == 0 && use_append)
+				len = bio_add_zone_append_page(bio, page,
+							       PAGE_SIZE, 0);
+			else
+				len = bio_add_page(bio, page, PAGE_SIZE, 0);
+		}
 
 		page->mapping = NULL;
 		if (submit || len < PAGE_SIZE) {
-- 
GitLab


From b86652be7c83f70bf406bed18ecf55adb9bfb91b Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 19 May 2021 10:52:45 -0400
Subject: [PATCH 1764/3804] btrfs: fix error handling in btrfs_del_csums

Error injection stress would sometimes fail with checksums on disk that
did not have a corresponding extent.  This occurred because the pattern
in btrfs_del_csums was

	while (1) {
		ret = btrfs_search_slot();
		if (ret < 0)
			break;
	}
	ret = 0;
out:
	btrfs_free_path(path);
	return ret;

If we got an error from btrfs_search_slot we'd clear the error because
we were breaking instead of goto out.  Instead of using goto out, simply
handle the cases where we may leave a random value in ret, and get rid
of the

	ret = 0;
out:

pattern and simply allow break to have the proper error reporting.  With
this fix we properly abort the transaction and do not commit thinking we
successfully deleted the csum.

Reviewed-by: Qu Wenruo <wqu@suse.com>
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file-item.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 294602f139efa..a5a8dac334e82 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -788,7 +788,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 	u64 end_byte = bytenr + len;
 	u64 csum_end;
 	struct extent_buffer *leaf;
-	int ret;
+	int ret = 0;
 	const u32 csum_size = fs_info->csum_size;
 	u32 blocksize_bits = fs_info->sectorsize_bits;
 
@@ -806,6 +806,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 
 		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 		if (ret > 0) {
+			ret = 0;
 			if (path->slots[0] == 0)
 				break;
 			path->slots[0]--;
@@ -862,7 +863,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 			ret = btrfs_del_items(trans, root, path,
 					      path->slots[0], del_nr);
 			if (ret)
-				goto out;
+				break;
 			if (key.offset == bytenr)
 				break;
 		} else if (key.offset < bytenr && csum_end > end_byte) {
@@ -906,8 +907,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 			ret = btrfs_split_item(trans, root, path, &key, offset);
 			if (ret && ret != -EAGAIN) {
 				btrfs_abort_transaction(trans, ret);
-				goto out;
+				break;
 			}
+			ret = 0;
 
 			key.offset = end_byte - 1;
 		} else {
@@ -917,8 +919,6 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 		}
 		btrfs_release_path(path);
 	}
-	ret = 0;
-out:
 	btrfs_free_path(path);
 	return ret;
 }
-- 
GitLab


From 856bd270dc4db209c779ce1e9555c7641ffbc88e Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 19 May 2021 10:52:46 -0400
Subject: [PATCH 1765/3804] btrfs: return errors from btrfs_del_csums in
 cleanup_ref_head

We are unconditionally returning 0 in cleanup_ref_head, despite the fact
that btrfs_del_csums could fail.  We need to return the error so the
transaction gets aborted properly, fix this by returning ret from
btrfs_del_csums in cleanup_ref_head.

Reviewed-by: Qu Wenruo <wqu@suse.com>
CC: stable@vger.kernel.org # 4.19+
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f1d15b68994a0..3d5c35e4cb76e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1868,7 +1868,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
 	trace_run_delayed_ref_head(fs_info, head, 0);
 	btrfs_delayed_ref_unlock(head);
 	btrfs_put_delayed_ref_head(head);
-	return 0;
+	return ret;
 }
 
 static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
-- 
GitLab


From d61bec08b904cf171835db98168f82bc338e92e4 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 19 May 2021 09:38:27 -0400
Subject: [PATCH 1766/3804] btrfs: mark ordered extent and inode with error if
 we fail to finish

While doing error injection testing I saw that sometimes we'd get an
abort that wouldn't stop the current transaction commit from completing.
This abort was coming from finish ordered IO, but at this point in the
transaction commit we should have gotten an error and stopped.

It turns out the abort came from finish ordered io while trying to write
out the free space cache.  It occurred to me that any failure inside of
finish_ordered_io isn't actually raised to the person doing the writing,
so we could have any number of failures in this path and think the
ordered extent completed successfully and the inode was fine.

Fix this by marking the ordered extent with BTRFS_ORDERED_IOERR, and
marking the mapping of the inode with mapping_set_error, so any callers
that simply call fdatawait will also get the error.

With this we're seeing the IO error on the free space inode when we fail
to do the finish_ordered_io.

CC: stable@vger.kernel.org # 4.19+
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bb4ab408d6701..e7de0c08b9816 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3005,6 +3005,18 @@ out:
 	if (ret || truncated) {
 		u64 unwritten_start = start;
 
+		/*
+		 * If we failed to finish this ordered extent for any reason we
+		 * need to make sure BTRFS_ORDERED_IOERR is set on the ordered
+		 * extent, and mark the inode with the error if it wasn't
+		 * already set.  Any error during writeback would have already
+		 * set the mapping error, so we need to set it if we're the ones
+		 * marking this ordered extent as failed.
+		 */
+		if (ret && !test_and_set_bit(BTRFS_ORDERED_IOERR,
+					     &ordered_extent->flags))
+			mapping_set_error(ordered_extent->inode->i_mapping, -EIO);
+
 		if (truncated)
 			unwritten_start += logical_len;
 		clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
-- 
GitLab


From 011b28acf940eb61c000059dd9e2cfcbf52ed96b Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 19 May 2021 13:13:15 -0400
Subject: [PATCH 1767/3804] btrfs: fixup error handling in
 fixup_inode_link_counts

This function has the following pattern

	while (1) {
		ret = whatever();
		if (ret)
			goto out;
	}
	ret = 0
out:
	return ret;

However several places in this while loop we simply break; when there's
a problem, thus clearing the return value, and in one case we do a
return -EIO, and leak the memory for the path.

Fix this by re-arranging the loop to deal with ret == 1 coming from
btrfs_search_slot, and then simply delete the

	ret = 0;
out:

bit so everybody can break if there is an error, which will allow for
proper error handling to occur.

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c17d6b827b42e..375c4642f4803 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1787,6 +1787,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
 			break;
 
 		if (ret == 1) {
+			ret = 0;
 			if (path->slots[0] == 0)
 				break;
 			path->slots[0]--;
@@ -1799,17 +1800,19 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
 
 		ret = btrfs_del_item(trans, root, path);
 		if (ret)
-			goto out;
+			break;
 
 		btrfs_release_path(path);
 		inode = read_one_inode(root, key.offset);
-		if (!inode)
-			return -EIO;
+		if (!inode) {
+			ret = -EIO;
+			break;
+		}
 
 		ret = fixup_inode_link_count(trans, root, inode);
 		iput(inode);
 		if (ret)
-			goto out;
+			break;
 
 		/*
 		 * fixup on a directory may create new entries,
@@ -1818,8 +1821,6 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
 		 */
 		key.offset = (u64)-1;
 	}
-	ret = 0;
-out:
 	btrfs_release_path(path);
 	return ret;
 }
-- 
GitLab


From f96d44743a44e3332f75d23d2075bb8270900e1d Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 19 May 2021 11:26:25 -0400
Subject: [PATCH 1768/3804] btrfs: check error value from btrfs_update_inode in
 tree log

Error injection testing uncovered a case where we ended up with invalid
link counts on an inode.  This happened because we failed to notice an
error when updating the inode while replaying the tree log, and
committed the transaction with an invalid file system.

Fix this by checking the return value of btrfs_update_inode.  This
resolved the link count errors I was seeing, and we already properly
handle passing up the error values in these paths.

CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 375c4642f4803..e4820e88cba03 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1574,7 +1574,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 			if (ret)
 				goto out;
 
-			btrfs_update_inode(trans, root, BTRFS_I(inode));
+			ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
+			if (ret)
+				goto out;
 		}
 
 		ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
@@ -1749,7 +1751,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
 
 	if (nlink != inode->i_nlink) {
 		set_nlink(inode, nlink);
-		btrfs_update_inode(trans, root, BTRFS_I(inode));
+		ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
+		if (ret)
+			goto out;
 	}
 	BTRFS_I(inode)->index_cnt = (u64)-1;
 
-- 
GitLab


From dc09ef3562726cd520c8338c1640872a60187af5 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 19 May 2021 14:04:21 -0400
Subject: [PATCH 1769/3804] btrfs: abort in rename_exchange if we fail to
 insert the second ref

Error injection stress uncovered a problem where we'd leave a dangling
inode ref if we failed during a rename_exchange.  This happens because
we insert the inode ref for one side of the rename, and then for the
other side.  If this second inode ref insert fails we'll leave the first
one dangling and leave a corrupt file system behind.  Fix this by
aborting if we did the insert for the first inode ref.

CC: stable@vger.kernel.org # 4.9+
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e7de0c08b9816..f5d32d85247a1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9101,6 +9101,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	int ret2;
 	bool root_log_pinned = false;
 	bool dest_log_pinned = false;
+	bool need_abort = false;
 
 	/* we only allow rename subvolume link between subvolumes */
 	if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
@@ -9160,6 +9161,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 					     old_idx);
 		if (ret)
 			goto out_fail;
+		need_abort = true;
 	}
 
 	/* And now for the dest. */
@@ -9175,8 +9177,11 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 					     new_ino,
 					     btrfs_ino(BTRFS_I(old_dir)),
 					     new_idx);
-		if (ret)
+		if (ret) {
+			if (need_abort)
+				btrfs_abort_transaction(trans, ret);
 			goto out_fail;
+		}
 	}
 
 	/* Update inode version and ctime/mtime. */
-- 
GitLab


From ea7036de0d36c4e6c9508f68789e9567d514333a Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 24 May 2021 11:35:53 +0100
Subject: [PATCH 1770/3804] btrfs: fix fsync failure and transaction abort
 after writes to prealloc extents

When doing a series of partial writes to different ranges of preallocated
extents with transaction commits and fsyncs in between, we can end up with
a checksum items in a log tree. This causes an fsync to fail with -EIO and
abort the transaction, turning the filesystem to RO mode, when syncing the
log.

For this to happen, we need to have a full fsync of a file following one
or more fast fsyncs.

The following example reproduces the problem and explains how it happens:

  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt

  # Create our test file with 2 preallocated extents. Leave a 1M hole
  # between them to ensure that we get two file extent items that will
  # never be merged into a single one. The extents are contiguous on disk,
  # which will later result in the checksums for their data to be merged
  # into a single checksum item in the csums btree.
  #
  $ xfs_io -f \
           -c "falloc 0 1M" \
           -c "falloc 3M 3M" \
           /mnt/foobar

  # Now write to the second extent and leave only 1M of it as unwritten,
  # which corresponds to the file range [4M, 5M[.
  #
  # Then fsync the file to flush delalloc and to clear full sync flag from
  # the inode, so that a future fsync will use the fast code path.
  #
  # After the writeback triggered by the fsync we have 3 file extent items
  # that point to the second extent we previously allocated:
  #
  # 1) One file extent item of type BTRFS_FILE_EXTENT_REG that covers the
  #    file range [3M, 4M[
  #
  # 2) One file extent item of type BTRFS_FILE_EXTENT_PREALLOC that covers
  #    the file range [4M, 5M[
  #
  # 3) One file extent item of type BTRFS_FILE_EXTENT_REG that covers the
  #    file range [5M, 6M[
  #
  # All these file extent items have a generation of 6, which is the ID of
  # the transaction where they were created. The split of the original file
  # extent item is done at btrfs_mark_extent_written() when ordered extents
  # complete for the file ranges [3M, 4M[ and [5M, 6M[.
  #
  $ xfs_io -c "pwrite -S 0xab 3M 1M" \
           -c "pwrite -S 0xef 5M 1M" \
           -c "fsync" \
           /mnt/foobar

  # Commit the current transaction. This wipes out the log tree created by
  # the previous fsync.
  sync

  # Now write to the unwritten range of the second extent we allocated,
  # corresponding to the file range [4M, 5M[, and fsync the file, which
  # triggers the fast fsync code path.
  #
  # The fast fsync code path sees that there is a new extent map covering
  # the file range [4M, 5M[ and therefore it will log a checksum item
  # covering the range [1M, 2M[ of the second extent we allocated.
  #
  # Also, after the fsync finishes we no longer have the 3 file extent
  # items that pointed to 3 sections of the second extent we allocated.
  # Instead we end up with a single file extent item pointing to the whole
  # extent, with a type of BTRFS_FILE_EXTENT_REG and a generation of 7 (the
  # current transaction ID). This is due to the file extent item merging we
  # do when completing ordered extents into ranges that point to unwritten
  # (preallocated) extents. This merging is done at
  # btrfs_mark_extent_written().
  #
  $ xfs_io -c "pwrite -S 0xcd 4M 1M" \
           -c "fsync" \
           /mnt/foobar

  # Now do some write to our file outside the range of the second extent
  # that we allocated with fallocate() and truncate the file size from 6M
  # down to 5M.
  #
  # The truncate operation sets the full sync runtime flag on the inode,
  # forcing the next fsync to use the slow code path. It also changes the
  # length of the second file extent item so that it represents the file
  # range [3M, 5M[ and not the range [3M, 6M[ anymore.
  #
  # Finally fsync the file. Since this is a fsync that triggers the slow
  # code path, it will remove all items associated to the inode from the
  # log tree and then it will scan for file extent items in the
  # fs/subvolume tree that have a generation matching the current
  # transaction ID, which is 7. This means it will log 2 file extent
  # items:
  #
  # 1) One for the first extent we allocated, covering the file range
  #    [0, 1M[
  #
  # 2) Another for the first 2M of the second extent we allocated,
  #    covering the file range [3M, 5M[
  #
  # When logging the first file extent item we log a single checksum item
  # that has all the checksums for the entire extent.
  #
  # When logging the second file extent item, we also lookup for the
  # checksums that are associated with the range [0, 2M[ of the second
  # extent we allocated (file range [3M, 5M[), and then we log them with
  # btrfs_csum_file_blocks(). However that results in ending up with a log
  # that has two checksum items with ranges that overlap:
  #
  # 1) One for the range [1M, 2M[ of the second extent we allocated,
  #    corresponding to the file range [4M, 5M[, which we logged in the
  #    previous fsync that used the fast code path;
  #
  # 2) One for the ranges [0, 1M[ and [0, 2M[ of the first and second
  #    extents, respectively, corresponding to the files ranges [0, 1M[
  #    and [3M, 5M[. This one was added during this last fsync that uses
  #    the slow code path and overlaps with the previous one logged by
  #    the previous fast fsync.
  #
  # This happens because when logging the checksums for the second
  # extent, we notice they start at an offset that matches the end of the
  # checksums item that we logged for the first extent, and because both
  # extents are contiguous on disk, btrfs_csum_file_blocks() decides to
  # extend that existing checksums item and append the checksums for the
  # second extent to this item. The end result is we end up with two
  # checksum items in the log tree that have overlapping ranges, as
  # listed before, resulting in the fsync to fail with -EIO and aborting
  # the transaction, turning the filesystem into RO mode.
  #
  $ xfs_io -c "pwrite -S 0xff 0 1M" \
           -c "truncate 5M" \
           -c "fsync" \
           /mnt/foobar
  fsync: Input/output error

After running the example, dmesg/syslog shows the tree checker complained
about the checksum items with overlapping ranges and we aborted the
transaction:

  $ dmesg
  (...)
  [756289.557487] BTRFS critical (device sdc): corrupt leaf: root=18446744073709551610 block=30720000 slot=5, csum end range (16777216) goes beyond the start range (15728640) of the next csum item
  [756289.560583] BTRFS info (device sdc): leaf 30720000 gen 7 total ptrs 7 free space 11677 owner 18446744073709551610
  [756289.562435] BTRFS info (device sdc): refs 2 lock_owner 0 current 2303929
  [756289.563654] 	item 0 key (257 1 0) itemoff 16123 itemsize 160
  [756289.564649] 		inode generation 6 size 5242880 mode 100600
  [756289.565636] 	item 1 key (257 12 256) itemoff 16107 itemsize 16
  [756289.566694] 	item 2 key (257 108 0) itemoff 16054 itemsize 53
  [756289.567725] 		extent data disk bytenr 13631488 nr 1048576
  [756289.568697] 		extent data offset 0 nr 1048576 ram 1048576
  [756289.569689] 	item 3 key (257 108 1048576) itemoff 16001 itemsize 53
  [756289.570682] 		extent data disk bytenr 0 nr 0
  [756289.571363] 		extent data offset 0 nr 2097152 ram 2097152
  [756289.572213] 	item 4 key (257 108 3145728) itemoff 15948 itemsize 53
  [756289.573246] 		extent data disk bytenr 14680064 nr 3145728
  [756289.574121] 		extent data offset 0 nr 2097152 ram 3145728
  [756289.574993] 	item 5 key (18446744073709551606 128 13631488) itemoff 12876 itemsize 3072
  [756289.576113] 	item 6 key (18446744073709551606 128 15728640) itemoff 11852 itemsize 1024
  [756289.577286] BTRFS error (device sdc): block=30720000 write time tree block corruption detected
  [756289.578644] ------------[ cut here ]------------
  [756289.579376] WARNING: CPU: 0 PID: 2303929 at fs/btrfs/disk-io.c:465 csum_one_extent_buffer+0xed/0x100 [btrfs]
  [756289.580857] Modules linked in: btrfs dm_zero dm_dust loop dm_snapshot (...)
  [756289.591534] CPU: 0 PID: 2303929 Comm: xfs_io Tainted: G        W         5.12.0-rc8-btrfs-next-87 #1
  [756289.592580] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
  [756289.594161] RIP: 0010:csum_one_extent_buffer+0xed/0x100 [btrfs]
  [756289.595122] Code: 5d c3 e8 76 60 (...)
  [756289.597509] RSP: 0018:ffffb51b416cb898 EFLAGS: 00010282
  [756289.598142] RAX: 0000000000000000 RBX: fffff02b8a365bc0 RCX: 0000000000000000
  [756289.598970] RDX: 0000000000000000 RSI: ffffffffa9112421 RDI: 00000000ffffffff
  [756289.599798] RBP: ffffa06500880000 R08: 0000000000000000 R09: 0000000000000000
  [756289.600619] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
  [756289.601456] R13: ffffa0652b1d8980 R14: ffffa06500880000 R15: 0000000000000000
  [756289.602278] FS:  00007f08b23c9800(0000) GS:ffffa0682be00000(0000) knlGS:0000000000000000
  [756289.603217] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [756289.603892] CR2: 00005652f32d0138 CR3: 000000025d616003 CR4: 0000000000370ef0
  [756289.604725] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  [756289.605563] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  [756289.606400] Call Trace:
  [756289.606704]  btree_csum_one_bio+0x244/0x2b0 [btrfs]
  [756289.607313]  btrfs_submit_metadata_bio+0xb7/0x100 [btrfs]
  [756289.608040]  submit_one_bio+0x61/0x70 [btrfs]
  [756289.608587]  btree_write_cache_pages+0x587/0x610 [btrfs]
  [756289.609258]  ? free_debug_processing+0x1d5/0x240
  [756289.609812]  ? __module_address+0x28/0xf0
  [756289.610298]  ? lock_acquire+0x1a0/0x3e0
  [756289.610754]  ? lock_acquired+0x19f/0x430
  [756289.611220]  ? lock_acquire+0x1a0/0x3e0
  [756289.611675]  do_writepages+0x43/0xf0
  [756289.612101]  ? __filemap_fdatawrite_range+0xa4/0x100
  [756289.612800]  __filemap_fdatawrite_range+0xc5/0x100
  [756289.613393]  btrfs_write_marked_extents+0x68/0x160 [btrfs]
  [756289.614085]  btrfs_sync_log+0x21c/0xf20 [btrfs]
  [756289.614661]  ? finish_wait+0x90/0x90
  [756289.615096]  ? __mutex_unlock_slowpath+0x45/0x2a0
  [756289.615661]  ? btrfs_log_inode_parent+0x3c9/0xdc0 [btrfs]
  [756289.616338]  ? lock_acquire+0x1a0/0x3e0
  [756289.616801]  ? lock_acquired+0x19f/0x430
  [756289.617284]  ? lock_acquire+0x1a0/0x3e0
  [756289.617750]  ? lock_release+0x214/0x470
  [756289.618221]  ? lock_acquired+0x19f/0x430
  [756289.618704]  ? dput+0x20/0x4a0
  [756289.619079]  ? dput+0x20/0x4a0
  [756289.619452]  ? lockref_put_or_lock+0x9/0x30
  [756289.619969]  ? lock_release+0x214/0x470
  [756289.620445]  ? lock_release+0x214/0x470
  [756289.620924]  ? lock_release+0x214/0x470
  [756289.621415]  btrfs_sync_file+0x46a/0x5b0 [btrfs]
  [756289.621982]  do_fsync+0x38/0x70
  [756289.622395]  __x64_sys_fsync+0x10/0x20
  [756289.622907]  do_syscall_64+0x33/0x80
  [756289.623438]  entry_SYSCALL_64_after_hwframe+0x44/0xae
  [756289.624063] RIP: 0033:0x7f08b27fbb7b
  [756289.624588] Code: 0f 05 48 3d 00 (...)
  [756289.626760] RSP: 002b:00007ffe2583f940 EFLAGS: 00000293 ORIG_RAX: 000000000000004a
  [756289.627639] RAX: ffffffffffffffda RBX: 00005652f32cd0f0 RCX: 00007f08b27fbb7b
  [756289.628464] RDX: 00005652f32cbca0 RSI: 00005652f32cd110 RDI: 0000000000000003
  [756289.629323] RBP: 00005652f32cd110 R08: 0000000000000000 R09: 00007f08b28c4be0
  [756289.630172] R10: fffffffffffff39a R11: 0000000000000293 R12: 0000000000000001
  [756289.631007] R13: 00005652f32cd0f0 R14: 0000000000000001 R15: 00005652f32cc480
  [756289.631819] irq event stamp: 0
  [756289.632188] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
  [756289.632911] hardirqs last disabled at (0): [<ffffffffa7e97c29>] copy_process+0x879/0x1cc0
  [756289.633893] softirqs last  enabled at (0): [<ffffffffa7e97c29>] copy_process+0x879/0x1cc0
  [756289.634871] softirqs last disabled at (0): [<0000000000000000>] 0x0
  [756289.635606] ---[ end trace 0a039fdc16ff3fef ]---
  [756289.636179] BTRFS: error (device sdc) in btrfs_sync_log:3136: errno=-5 IO failure
  [756289.637082] BTRFS info (device sdc): forced readonly

Having checksum items covering ranges that overlap is dangerous as in some
cases it can lead to having extent ranges for which we miss checksums
after log replay or getting the wrong checksum item. There were some fixes
in the past for bugs that resulted in this problem, and were explained and
fixed by the following commits:

  27b9a8122ff71a ("Btrfs: fix csum tree corruption, duplicate and outdated checksums")
  b84b8390d6009c ("Btrfs: fix file read corruption after extent cloning and fsync")
  40e046acbd2f36 ("Btrfs: fix missing data checksums after replaying a log tree")
  e289f03ea79bbc ("btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents")

Fix the issue by making btrfs_csum_file_blocks() taking into account the
start offset of the next checksum item when it decides to extend an
existing checksum item, so that it never extends the checksum to end at a
range that goes beyond the start range of the next checksum item.

When we can not access the next checksum item without releasing the path,
simply drop the optimization of extending the previous checksum item and
fallback to inserting a new checksum item - this happens rarely and the
optimization is not significant enough for a log tree in order to justify
the extra complexity, as it would only save a few bytes (the size of a
struct btrfs_item) of leaf space.

This behaviour is only needed when inserting into a log tree because
for the regular checksums tree we never have a case where we try to
insert a range of checksums that overlap with a range that was previously
inserted.

A test case for fstests will follow soon.

Reported-by: Philipp Fent <fent@in.tum.de>
Link: https://lore.kernel.org/linux-btrfs/93c4600e-5263-5cba-adf0-6f47526e7561@in.tum.de/
CC: stable@vger.kernel.org # 5.4+
Tested-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file-item.c | 98 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 76 insertions(+), 22 deletions(-)

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a5a8dac334e82..441cee7fbb629 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -923,6 +923,37 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
+static int find_next_csum_offset(struct btrfs_root *root,
+				 struct btrfs_path *path,
+				 u64 *next_offset)
+{
+	const u32 nritems = btrfs_header_nritems(path->nodes[0]);
+	struct btrfs_key found_key;
+	int slot = path->slots[0] + 1;
+	int ret;
+
+	if (nritems == 0 || slot >= nritems) {
+		ret = btrfs_next_leaf(root, path);
+		if (ret < 0) {
+			return ret;
+		} else if (ret > 0) {
+			*next_offset = (u64)-1;
+			return 0;
+		}
+		slot = path->slots[0];
+	}
+
+	btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
+
+	if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+	    found_key.type != BTRFS_EXTENT_CSUM_KEY)
+		*next_offset = (u64)-1;
+	else
+		*next_offset = found_key.offset;
+
+	return 0;
+}
+
 int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums)
@@ -938,7 +969,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 	u64 total_bytes = 0;
 	u64 csum_offset;
 	u64 bytenr;
-	u32 nritems;
 	u32 ins_size;
 	int index = 0;
 	int found_next;
@@ -981,26 +1011,10 @@ again:
 			goto insert;
 		}
 	} else {
-		int slot = path->slots[0] + 1;
-		/* we didn't find a csum item, insert one */
-		nritems = btrfs_header_nritems(path->nodes[0]);
-		if (!nritems || (path->slots[0] >= nritems - 1)) {
-			ret = btrfs_next_leaf(root, path);
-			if (ret < 0) {
-				goto out;
-			} else if (ret > 0) {
-				found_next = 1;
-				goto insert;
-			}
-			slot = path->slots[0];
-		}
-		btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
-		if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
-		    found_key.type != BTRFS_EXTENT_CSUM_KEY) {
-			found_next = 1;
-			goto insert;
-		}
-		next_offset = found_key.offset;
+		/* We didn't find a csum item, insert one. */
+		ret = find_next_csum_offset(root, path, &next_offset);
+		if (ret < 0)
+			goto out;
 		found_next = 1;
 		goto insert;
 	}
@@ -1056,8 +1070,48 @@ extend_csum:
 		tmp = sums->len - total_bytes;
 		tmp >>= fs_info->sectorsize_bits;
 		WARN_ON(tmp < 1);
+		extend_nr = max_t(int, 1, tmp);
+
+		/*
+		 * A log tree can already have checksum items with a subset of
+		 * the checksums we are trying to log. This can happen after
+		 * doing a sequence of partial writes into prealloc extents and
+		 * fsyncs in between, with a full fsync logging a larger subrange
+		 * of an extent for which a previous fast fsync logged a smaller
+		 * subrange. And this happens in particular due to merging file
+		 * extent items when we complete an ordered extent for a range
+		 * covered by a prealloc extent - this is done at
+		 * btrfs_mark_extent_written().
+		 *
+		 * So if we try to extend the previous checksum item, which has
+		 * a range that ends at the start of the range we want to insert,
+		 * make sure we don't extend beyond the start offset of the next
+		 * checksum item. If we are at the last item in the leaf, then
+		 * forget the optimization of extending and add a new checksum
+		 * item - it is not worth the complexity of releasing the path,
+		 * getting the first key for the next leaf, repeat the btree
+		 * search, etc, because log trees are temporary anyway and it
+		 * would only save a few bytes of leaf space.
+		 */
+		if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
+			if (path->slots[0] + 1 >=
+			    btrfs_header_nritems(path->nodes[0])) {
+				ret = find_next_csum_offset(root, path, &next_offset);
+				if (ret < 0)
+					goto out;
+				found_next = 1;
+				goto insert;
+			}
+
+			ret = find_next_csum_offset(root, path, &next_offset);
+			if (ret < 0)
+				goto out;
+
+			tmp = (next_offset - bytenr) >> fs_info->sectorsize_bits;
+			if (tmp <= INT_MAX)
+				extend_nr = min_t(int, extend_nr, tmp);
+		}
 
-		extend_nr = max_t(int, 1, (int)tmp);
 		diff = (csum_offset + extend_nr) * csum_size;
 		diff = min(diff,
 			   MAX_CSUM_ITEMS(fs_info, csum_size) * csum_size);
-- 
GitLab


From 76a6d5cd74479e7ec8a7f9a29bce63d5549b6b2e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 25 May 2021 11:05:28 +0100
Subject: [PATCH 1771/3804] btrfs: fix deadlock when cloning inline extents and
 low on available space

There are a few cases where cloning an inline extent requires copying data
into a page of the destination inode. For these cases we are allocating
the required data and metadata space while holding a leaf locked. This can
result in a deadlock when we are low on available space because allocating
the space may flush delalloc and two deadlock scenarios can happen:

1) When starting writeback for an inode with a very small dirty range that
   fits in an inline extent, we deadlock during the writeback when trying
   to insert the inline extent, at cow_file_range_inline(), if the extent
   is going to be located in the leaf for which we are already holding a
   read lock;

2) After successfully starting writeback, for non-inline extent cases,
   the async reclaim thread will hang waiting for an ordered extent to
   complete if the ordered extent completion needs to modify the leaf
   for which the clone task is holding a read lock (for adding or
   replacing file extent items). So the cloning task will wait forever
   on the async reclaim thread to make progress, which in turn is
   waiting for the ordered extent completion which in turn is waiting
   to acquire a write lock on the same leaf.

So fix this by making sure we release the path (and therefore the leaf)
every time we need to copy the inline extent's data into a page of the
destination inode, as by that time we do not need to have the leaf locked.

Fixes: 05a5a7621ce66c ("Btrfs: implement full reflink support for inline extents")
CC: stable@vger.kernel.org # 5.10+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reflink.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 06682128d8fae..58ddc7ed9e849 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -207,10 +207,7 @@ static int clone_copy_inline_extent(struct inode *dst,
 			 * inline extent's data to the page.
 			 */
 			ASSERT(key.offset > 0);
-			ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
-						  inline_data, size, datal,
-						  comp_type);
-			goto out;
+			goto copy_to_page;
 		}
 	} else if (i_size_read(dst) <= datal) {
 		struct btrfs_file_extent_item *ei;
@@ -226,13 +223,10 @@ static int clone_copy_inline_extent(struct inode *dst,
 		    BTRFS_FILE_EXTENT_INLINE)
 			goto copy_inline_extent;
 
-		ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
-					  inline_data, size, datal, comp_type);
-		goto out;
+		goto copy_to_page;
 	}
 
 copy_inline_extent:
-	ret = 0;
 	/*
 	 * We have no extent items, or we have an extent at offset 0 which may
 	 * or may not be inlined. All these cases are dealt the same way.
@@ -244,11 +238,13 @@ copy_inline_extent:
 		 * clone. Deal with all these cases by copying the inline extent
 		 * data into the respective page at the destination inode.
 		 */
-		ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
-					  inline_data, size, datal, comp_type);
-		goto out;
+		goto copy_to_page;
 	}
 
+	/*
+	 * Release path before starting a new transaction so we don't hold locks
+	 * that would confuse lockdep.
+	 */
 	btrfs_release_path(path);
 	/*
 	 * If we end up here it means were copy the inline extent into a leaf
@@ -285,11 +281,6 @@ copy_inline_extent:
 	ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
 out:
 	if (!ret && !trans) {
-		/*
-		 * Release path before starting a new transaction so we don't
-		 * hold locks that would confuse lockdep.
-		 */
-		btrfs_release_path(path);
 		/*
 		 * No transaction here means we copied the inline extent into a
 		 * page of the destination inode.
@@ -310,6 +301,21 @@ out:
 		*trans_out = trans;
 
 	return ret;
+
+copy_to_page:
+	/*
+	 * Release our path because we don't need it anymore and also because
+	 * copy_inline_to_page() needs to reserve data and metadata, which may
+	 * need to flush delalloc when we are low on available space and
+	 * therefore cause a deadlock if writeback of an inline extent needs to
+	 * write to the same leaf or an ordered extent completion needs to write
+	 * to the same leaf.
+	 */
+	btrfs_release_path(path);
+
+	ret = copy_inline_to_page(BTRFS_I(dst), new_key->offset,
+				  inline_data, size, datal, comp_type);
+	goto out;
 }
 
 /**
-- 
GitLab


From b28d8f0c25a9b0355116cace5f53ea52bd4020c8 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 26 May 2021 23:00:27 +0300
Subject: [PATCH 1772/3804] devlink: Correct VIRTUAL port to not have phys_port
 attributes

Physical port name, port number attributes do not belong to virtual port
flavour. When VF or SF virtual ports are registered they incorrectly
append "np0" string in the netdevice name of the VF/SF.

Before this fix, VF netdevice name were ens2f0np0v0, ens2f0np0v1 for VF
0 and 1 respectively.

After the fix, they are ens2f0v0, ens2f0v1.

With this fix, reading /sys/class/net/ens2f0v0/phys_port_name returns
-EOPNOTSUPP.

Also devlink port show example for 2 VFs on one PF to ensure that any
physical port attributes are not exposed.

$ devlink port show
pci/0000:06:00.0/65535: type eth netdev ens2f0np0 flavour physical port 0 splittable false
pci/0000:06:00.3/196608: type eth netdev ens2f0v0 flavour virtual splittable false
pci/0000:06:00.4/262144: type eth netdev ens2f0v1 flavour virtual splittable false

This change introduces a netdevice name change on systemd/udev
version 245 and higher which honors phys_port_name sysfs file for
generation of netdevice name.

This also aligns to phys_port_name usage which is limited to switchdev
ports as described in [1].

[1] https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/tree/Documentation/networking/switchdev.rst

Fixes: acf1ee44ca5d ("devlink: Introduce devlink port flavour virtual")
Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20210526200027.14008-1-parav@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/devlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4eb969518ee07..051432ea4f69e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -705,7 +705,6 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
 	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
 	case DEVLINK_PORT_FLAVOUR_CPU:
 	case DEVLINK_PORT_FLAVOUR_DSA:
-	case DEVLINK_PORT_FLAVOUR_VIRTUAL:
 		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
 				attrs->phys.port_number))
 			return -EMSGSIZE;
@@ -8631,7 +8630,6 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 
 	switch (attrs->flavour) {
 	case DEVLINK_PORT_FLAVOUR_PHYSICAL:
-	case DEVLINK_PORT_FLAVOUR_VIRTUAL:
 		if (!attrs->split)
 			n = snprintf(name, len, "p%u", attrs->phys.port_number);
 		else
@@ -8679,6 +8677,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
 		n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
 			     attrs->pci_sf.sf);
 		break;
+	case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+		return -EOPNOTSUPP;
 	}
 
 	if (n >= len)
-- 
GitLab


From 0cc254e5aa37cf05f65bcdcdc0ac5c58010feb33 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Wed, 26 May 2021 14:44:09 +0300
Subject: [PATCH 1773/3804] net/sched: act_ct: Offload connections with commit
 action

Currently established connections are not offloaded if the filter has a
"ct commit" action. This behavior will not offload connections of the
following scenario:

$ tc_filter add dev $DEV ingress protocol ip prio 1 flower \
  ct_state -trk \
  action ct commit action goto chain 1

$ tc_filter add dev $DEV ingress protocol ip chain 1 prio 1 flower \
  action mirred egress redirect dev $DEV2

$ tc_filter add dev $DEV2 ingress protocol ip prio 1 flower \
  action ct commit action goto chain 1

$ tc_filter add dev $DEV2 ingress protocol ip prio 1 chain 1 flower \
  ct_state +trk+est \
  action mirred egress redirect dev $DEV

Offload established connections, regardless of the commit flag.

Fixes: 46475bb20f4b ("net/sched: act_ct: Software offload of established flows")
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Link: https://lore.kernel.org/r/1622029449-27060-1-git-send-email-paulb@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/act_ct.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index ec7a1c438df94..b1473a1aecdd9 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -984,7 +984,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
 	 */
 	cached = tcf_ct_skb_nfct_cached(net, skb, p->zone, force);
 	if (!cached) {
-		if (!commit && tcf_ct_flow_table_lookup(p, skb, family)) {
+		if (tcf_ct_flow_table_lookup(p, skb, family)) {
 			skip_add = true;
 			goto do_nat;
 		}
@@ -1022,10 +1022,11 @@ do_nat:
 		 * even if the connection is already confirmed.
 		 */
 		nf_conntrack_confirm(skb);
-	} else if (!skip_add) {
-		tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
 	}
 
+	if (!skip_add)
+		tcf_ct_flow_table_process_conn(p->ct_ft, ct, ctinfo);
+
 out_push:
 	skb_push_rcsum(skb, nh_ofs);
 
-- 
GitLab


From fb91702b743dec78d6507c53a2dec8a8883f509d Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@nvidia.com>
Date: Wed, 26 May 2021 20:01:10 +0300
Subject: [PATCH 1774/3804] net/sched: act_ct: Fix ct template allocation for
 zone 0

Fix current behavior of skipping template allocation in case the
ct action is in zone 0.

Skipping the allocation may cause the datapath ct code to ignore the
entire ct action with all its attributes (commit, nat) in case the ct
action in zone 0 was preceded by a ct clear action.

The ct clear action sets the ct_state to untracked and resets the
skb->_nfct pointer. Under these conditions and without an allocated
ct template, the skb->_nfct pointer will remain NULL which will
cause the tc ct action handler to exit without handling commit and nat
actions, if such exist.

For example, the following rule in OVS dp:
recirc_id(0x2),ct_state(+new-est-rel-rpl+trk),ct_label(0/0x1), \
in_port(eth0),actions:ct_clear,ct(commit,nat(src=10.11.0.12)), \
recirc(0x37a)

Will result in act_ct skipping the commit and nat actions in zone 0.

The change removes the skipping of template allocation for zone 0 and
treats it the same as any other zone.

Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct")
Signed-off-by: Ariel Levkovich <lariel@nvidia.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Link: https://lore.kernel.org/r/20210526170110.54864-1-lariel@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/act_ct.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index b1473a1aecdd9..18edd9ad14109 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1203,9 +1203,6 @@ static int tcf_ct_fill_params(struct net *net,
 				   sizeof(p->zone));
 	}
 
-	if (p->zone == NF_CT_DEFAULT_ZONE_ID)
-		return 0;
-
 	nf_ct_zone_init(&zone, p->zone, NF_CT_DEFAULT_ZONE_DIR, 0);
 	tmpl = nf_ct_tmpl_alloc(net, &zone, GFP_KERNEL);
 	if (!tmpl) {
-- 
GitLab


From c673b7f59e940061467200f1746820a178444bd0 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 27 May 2021 15:00:52 -0700
Subject: [PATCH 1775/3804] perf stat: Fix error check for bpf_program__attach

It seems the bpf_program__attach() returns a negative error code instead
of a NULL pointer in case of error.

Fixes: 7fac83aaf2ee ("perf stat: Introduce 'bperf' to share hardware PMCs with BPF")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Link: http://lore.kernel.org/lkml/20210527220052.1657578-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf_counter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index ddb52f748c8e8..974f10e356f06 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -451,10 +451,10 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd,
 		goto out;
 	}
 
-	err = -1;
 	link = bpf_program__attach(skel->progs.on_switch);
-	if (!link) {
+	if (IS_ERR(link)) {
 		pr_err("Failed to attach leader program\n");
+		err = PTR_ERR(link);
 		goto out;
 	}
 
-- 
GitLab


From fc9818e6c54d19f1189bbda906042af3aaf93d80 Mon Sep 17 00:00:00 2001
From: Jack Xu <jack.xu@intel.com>
Date: Mon, 17 May 2021 05:13:12 -0400
Subject: [PATCH 1776/3804] crypto: qat - return error when failing to map FW

Save the return value of qat_uclo_map_auth_fw() function so that the
function qat_uclo_wr_mimage() could return the correct value.
This way, the procedure of adf_gen2_ae_fw_load() function could stop
and exit properly by checking the return value of qat_uclo_wr_mimage().

Signed-off-by: Jack Xu <jack.xu@intel.com>
Co-developed-by: Zhehui Xiang <zhehui.xiang@intel.com>
Signed-off-by: Zhehui Xiang <zhehui.xiang@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_uclo.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c
index 1fb5fc852f6b8..d2c2db58c93ff 100644
--- a/drivers/crypto/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/qat/qat_common/qat_uclo.c
@@ -1546,7 +1546,8 @@ int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle,
 	int status = 0;
 
 	if (handle->chip_info->fw_auth) {
-		if (!qat_uclo_map_auth_fw(handle, addr_ptr, mem_size, &desc))
+		status = qat_uclo_map_auth_fw(handle, addr_ptr, mem_size, &desc);
+		if (!status)
 			status = qat_uclo_auth_fw(handle, desc);
 		qat_uclo_ummap_auth_fw(handle, &desc);
 	} else {
-- 
GitLab


From b9f7c36de97814d13c31aba560d520d61b86be0e Mon Sep 17 00:00:00 2001
From: Jack Xu <jack.xu@intel.com>
Date: Mon, 17 May 2021 05:13:13 -0400
Subject: [PATCH 1777/3804] crypto: qat - check MMP size before writing to the
 SRAM

Change "sram_visible" to "mmp_sram_size" and compare it with the MMP
size to prevent an overly large MMP file being written to SRAM.

Signed-off-by: Jack Xu <jack.xu@intel.com>
Co-developed-by: Zhehui Xiang <zhehui.xiang@intel.com>
Signed-off-by: Zhehui Xiang <zhehui.xiang@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h | 2 +-
 drivers/crypto/qat/qat_common/qat_hal.c                  | 8 ++++----
 drivers/crypto/qat/qat_common/qat_uclo.c                 | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h b/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h
index b8f3463be6ef4..7eb5daef4f885 100644
--- a/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h
+++ b/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h
@@ -24,7 +24,7 @@ struct icp_qat_fw_loader_hal_handle {
 };
 
 struct icp_qat_fw_loader_chip_info {
-	bool sram_visible;
+	int mmp_sram_size;
 	bool nn;
 	bool lm2lm3;
 	u32 lm_size;
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index bd3028126cbe6..ed9b813471443 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -696,7 +696,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
 	handle->pci_dev = pci_info->pci_dev;
 	switch (handle->pci_dev->device) {
 	case ADF_4XXX_PCI_DEVICE_ID:
-		handle->chip_info->sram_visible = false;
+		handle->chip_info->mmp_sram_size = 0;
 		handle->chip_info->nn = false;
 		handle->chip_info->lm2lm3 = true;
 		handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG_2X;
@@ -730,7 +730,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
 		break;
 	case PCI_DEVICE_ID_INTEL_QAT_C62X:
 	case PCI_DEVICE_ID_INTEL_QAT_C3XXX:
-		handle->chip_info->sram_visible = false;
+		handle->chip_info->mmp_sram_size = 0;
 		handle->chip_info->nn = true;
 		handle->chip_info->lm2lm3 = false;
 		handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG;
@@ -763,7 +763,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
 			+ LOCAL_TO_XFER_REG_OFFSET);
 		break;
 	case PCI_DEVICE_ID_INTEL_QAT_DH895XCC:
-		handle->chip_info->sram_visible = true;
+		handle->chip_info->mmp_sram_size = 0x40000;
 		handle->chip_info->nn = true;
 		handle->chip_info->lm2lm3 = false;
 		handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG;
@@ -800,7 +800,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
 		goto out_err;
 	}
 
-	if (handle->chip_info->sram_visible) {
+	if (handle->chip_info->mmp_sram_size > 0) {
 		sram_bar =
 			&pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
 		handle->hal_sram_addr_v = sram_bar->virt_addr;
diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c
index d2c2db58c93ff..8adf25769128a 100644
--- a/drivers/crypto/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/qat/qat_common/qat_uclo.c
@@ -1551,7 +1551,7 @@ int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle,
 			status = qat_uclo_auth_fw(handle, desc);
 		qat_uclo_ummap_auth_fw(handle, &desc);
 	} else {
-		if (!handle->chip_info->sram_visible) {
+		if (handle->chip_info->mmp_sram_size < mem_size) {
 			dev_dbg(&handle->pci_dev->dev,
 				"QAT MMP fw not loaded for device 0x%x",
 				handle->pci_dev->device);
-- 
GitLab


From 78b4267a1524849202d4ab45d32ca561775e0f23 Mon Sep 17 00:00:00 2001
From: Jack Xu <jack.xu@intel.com>
Date: Mon, 17 May 2021 05:13:14 -0400
Subject: [PATCH 1778/3804] crypto: qat - report an error if MMP file size is
 too large

Change the return status to error if MMP file size is too large so the
driver load fails early if a large MMP firmware is loaded.

Signed-off-by: Jack Xu <jack.xu@intel.com>
Co-developed-by: Zhehui Xiang <zhehui.xiang@intel.com>
Signed-off-by: Zhehui Xiang <zhehui.xiang@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_uclo.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c
index 8adf25769128a..ed1343bb36ac7 100644
--- a/drivers/crypto/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/qat/qat_common/qat_uclo.c
@@ -1552,10 +1552,8 @@ int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle,
 		qat_uclo_ummap_auth_fw(handle, &desc);
 	} else {
 		if (handle->chip_info->mmp_sram_size < mem_size) {
-			dev_dbg(&handle->pci_dev->dev,
-				"QAT MMP fw not loaded for device 0x%x",
-				handle->pci_dev->device);
-			return status;
+			pr_err("QAT: MMP size is too large: 0x%x\n", mem_size);
+			return -EFBIG;
 		}
 		qat_uclo_wr_sram_by_words(handle, 0, addr_ptr, mem_size);
 	}
-- 
GitLab


From 96b57229209490c8bca4335b01a426a96173dc56 Mon Sep 17 00:00:00 2001
From: Jack Xu <jack.xu@intel.com>
Date: Mon, 17 May 2021 05:13:15 -0400
Subject: [PATCH 1779/3804] crypto: qat - check return code of
 qat_hal_rd_rel_reg()

Check the return code of the function qat_hal_rd_rel_reg() and return it
to the caller.

This is to fix the following warning when compiling the driver with
clang scan-build:

    drivers/crypto/qat/qat_common/qat_hal.c:1436:2: warning: 6th function call argument is an uninitialized value

Signed-off-by: Jack Xu <jack.xu@intel.com>
Co-developed-by: Zhehui Xiang <zhehui.xiang@intel.com>
Signed-off-by: Zhehui Xiang <zhehui.xiang@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_hal.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index ed9b813471443..12ca6b8764aaa 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -1417,7 +1417,11 @@ static int qat_hal_put_rel_wr_xfer(struct icp_qat_fw_loader_handle *handle,
 		pr_err("QAT: bad xfrAddr=0x%x\n", xfr_addr);
 		return -EINVAL;
 	}
-	qat_hal_rd_rel_reg(handle, ae, ctx, ICP_GPB_REL, gprnum, &gprval);
+	status = qat_hal_rd_rel_reg(handle, ae, ctx, ICP_GPB_REL, gprnum, &gprval);
+	if (status) {
+		pr_err("QAT: failed to read register");
+		return status;
+	}
 	gpr_addr = qat_hal_get_reg_addr(ICP_GPB_REL, gprnum);
 	data16low = 0xffff & data;
 	data16hi = 0xffff & (data >> 0x10);
-- 
GitLab


From 9afe77cf25d9670e61b489fd52cc6f75fd7f6803 Mon Sep 17 00:00:00 2001
From: Jack Xu <jack.xu@intel.com>
Date: Mon, 17 May 2021 05:13:16 -0400
Subject: [PATCH 1780/3804] crypto: qat - remove unused macro in FW loader

Remove the unused macro ICP_DH895XCC_PESRAM_BAR_SIZE in the firmware
loader.

This is to fix the following warning when compiling the driver using the
clang compiler with CC=clang W=2:

    drivers/crypto/qat/qat_common/qat_uclo.c:345:9: warning: macro is not used [-Wunused-macros]

Signed-off-by: Jack Xu <jack.xu@intel.com>
Co-developed-by: Zhehui Xiang <zhehui.xiang@intel.com>
Signed-off-by: Zhehui Xiang <zhehui.xiang@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_uclo.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c
index ed1343bb36ac7..2026cc6be8f0e 100644
--- a/drivers/crypto/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/qat/qat_common/qat_uclo.c
@@ -342,7 +342,6 @@ static int qat_uclo_init_umem_seg(struct icp_qat_fw_loader_handle *handle,
 	return 0;
 }
 
-#define ICP_DH895XCC_PESRAM_BAR_SIZE 0x80000
 static int qat_uclo_init_ae_memory(struct icp_qat_fw_loader_handle *handle,
 				   struct icp_qat_uof_initmem *init_mem)
 {
-- 
GitLab


From 11e0ca8f028c24cade0022888599e1273ce31694 Mon Sep 17 00:00:00 2001
From: Juerg Haefliger <juerg.haefliger@canonical.com>
Date: Mon, 17 May 2021 11:58:31 +0200
Subject: [PATCH 1781/3804] hwrng: Kconfig - Remove leading spaces

Remove leading spaces before tabs in Kconfig file(s) by running the
following command:

  $ find drivers/char/hw_random -name 'Kconfig*' | x\
    args sed -r -i 's/^[ ]+\t/\t/'

Signed-off-by: Juerg Haefliger <juergh@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 6450074c0ad7d..c11f12d4ab534 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -168,14 +168,14 @@ config HW_RANDOM_OMAP
 	depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU || ARCH_K3
 	default HW_RANDOM
 	help
- 	  This driver provides kernel-side support for the Random Number
+	  This driver provides kernel-side support for the Random Number
 	  Generator hardware found on OMAP16xx, OMAP2/3/4/5, AM33xx/AM43xx
 	  multimedia processors, and Marvell Armada 7k/8k SoCs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called omap-rng.
 
- 	  If unsure, say Y.
+	  If unsure, say Y.
 
 config HW_RANDOM_OMAP3_ROM
 	tristate "OMAP3 ROM Random Number Generator support"
@@ -485,13 +485,13 @@ config HW_RANDOM_NPCM
 	depends on ARCH_NPCM || COMPILE_TEST
 	default HW_RANDOM
 	help
- 	  This driver provides support for the Random Number
+	  This driver provides support for the Random Number
 	  Generator hardware available in Nuvoton NPCM SoCs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called npcm-rng.
 
- 	  If unsure, say Y.
+	  If unsure, say Y.
 
 config HW_RANDOM_KEYSTONE
 	depends on ARCH_KEYSTONE || COMPILE_TEST
-- 
GitLab


From 155f7d321f021c084595d33efafa66f197fb2c00 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Mon, 17 May 2021 19:02:34 +0800
Subject: [PATCH 1782/3804] crypto: header - Fix spelling errors

Fix some spelling mistakes in comments:
cipherntext ==> ciphertext
syncronise ==> synchronise
feeded ==> fed

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/aead.h   | 2 +-
 include/crypto/engine.h | 2 +-
 include/crypto/hash.h   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index e728469c4cccb..5af914c1ab8ee 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -490,7 +490,7 @@ static inline void aead_request_set_callback(struct aead_request *req,
  * The memory structure for cipher operation has the following structure:
  *
  * - AEAD encryption input:  assoc data || plaintext
- * - AEAD encryption output: assoc data || cipherntext || auth tag
+ * - AEAD encryption output: assoc data || ciphertext || auth tag
  * - AEAD decryption input:  assoc data || ciphertext || auth tag
  * - AEAD decryption output: assoc data || plaintext
  *
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index 3f06e40d063a6..26cac19b0f46c 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -28,7 +28,7 @@
  * of a failed backlog request
  * crypto-engine, in head position to keep order
  * @list: link with the global crypto engine list
- * @queue_lock: spinlock to syncronise access to request queue
+ * @queue_lock: spinlock to synchronise access to request queue
  * @queue: the crypto queue of the engine
  * @rt: whether this queue is set to run as a realtime task
  * @prepare_crypt_hardware: a request will soon arrive from the queue
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index b2bc1e46e86a7..f140e4643949b 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -458,7 +458,7 @@ int crypto_ahash_finup(struct ahash_request *req);
  *
  * Return:
  * 0		if the message digest was successfully calculated;
- * -EINPROGRESS	if data is feeded into hardware (DMA) or queued for later;
+ * -EINPROGRESS	if data is fed into hardware (DMA) or queued for later;
  * -EBUSY	if queue is full and request should be resubmitted later;
  * other < 0	if an error occurred
  */
-- 
GitLab


From 308365483351fad2c2c15e173df60c7168c828a5 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Wed, 19 May 2021 13:59:44 +0800
Subject: [PATCH 1783/3804] crypto: khazad,wp512 - remove leading spaces before
 tabs

There are a few leading spaces before tabs and remove it by running the
following commard:

	$ find . -name '*.c' | xargs sed -r -i 's/^[ ]+\t/\t/'

At the same time, fix two warning by running checkpatch.pl:
	WARNING: suspect code indent for conditional statements (16, 16)
	WARNING: braces {} are not necessary for single statement blocks

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/khazad.c |  2 +-
 crypto/wp512.c  | 40 +++++++++++++++++++---------------------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/crypto/khazad.c b/crypto/khazad.c
index 14ca7f1631c79..f19339954c89e 100644
--- a/crypto/khazad.c
+++ b/crypto/khazad.c
@@ -819,7 +819,7 @@ static void khazad_crypt(const u64 roundKey[KHAZAD_ROUNDS + 1],
 			T6[(int)(state >>  8) & 0xff] ^
 			T7[(int)(state      ) & 0xff] ^
 			roundKey[r];
-    	}
+	}
 
 	state = (T0[(int)(state >> 56)       ] & 0xff00000000000000ULL) ^
 		(T1[(int)(state >> 48) & 0xff] & 0x00ff000000000000ULL) ^
diff --git a/crypto/wp512.c b/crypto/wp512.c
index feadc13ccae06..bf79fbb2340fa 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -1066,33 +1066,31 @@ static int wp512_final(struct shash_desc *desc, u8 *out)
 {
 	struct wp512_ctx *wctx = shash_desc_ctx(desc);
 	int i;
-   	u8 *buffer      = wctx->buffer;
-   	u8 *bitLength   = wctx->bitLength;
-   	int bufferBits  = wctx->bufferBits;
-   	int bufferPos   = wctx->bufferPos;
+	u8 *buffer      = wctx->buffer;
+	u8 *bitLength   = wctx->bitLength;
+	int bufferBits  = wctx->bufferBits;
+	int bufferPos   = wctx->bufferPos;
 	__be64 *digest  = (__be64 *)out;
 
-   	buffer[bufferPos] |= 0x80U >> (bufferBits & 7);
-   	bufferPos++;
-   	if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) {
-   		if (bufferPos < WP512_BLOCK_SIZE) {
-	   	memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos);
-   		}
-   		wp512_process_buffer(wctx);
-   		bufferPos = 0;
-   	}
-   	if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) {
-   		memset(&buffer[bufferPos], 0,
+	buffer[bufferPos] |= 0x80U >> (bufferBits & 7);
+	bufferPos++;
+	if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) {
+		if (bufferPos < WP512_BLOCK_SIZE)
+			memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos);
+		wp512_process_buffer(wctx);
+		bufferPos = 0;
+	}
+	if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES)
+		memset(&buffer[bufferPos], 0,
 			  (WP512_BLOCK_SIZE - WP512_LENGTHBYTES) - bufferPos);
-   	}
-   	bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES;
-   	memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES],
+	bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES;
+	memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES],
 		   bitLength, WP512_LENGTHBYTES);
-   	wp512_process_buffer(wctx);
+	wp512_process_buffer(wctx);
 	for (i = 0; i < WP512_DIGEST_SIZE/8; i++)
 		digest[i] = cpu_to_be64(wctx->hash[i]);
-   	wctx->bufferBits   = bufferBits;
-   	wctx->bufferPos    = bufferPos;
+	wctx->bufferBits   = bufferBits;
+	wctx->bufferPos    = bufferPos;
 
 	return 0;
 }
-- 
GitLab


From 5c0ecc2e81ecfd9eba8a4945d49c401615c167ca Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 19 May 2021 14:16:50 +0000
Subject: [PATCH 1784/3804] crypto: qce - Fix some error handling path

Fix to return negative error code from the error handling
cases instead of 0.

Fixes: 9363efb4181c ("crypto: qce - Add support for AEAD algorithms")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/aead.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c
index 6d06a19b48e49..d47f4171ad830 100644
--- a/drivers/crypto/qce/aead.c
+++ b/drivers/crypto/qce/aead.c
@@ -280,8 +280,10 @@ qce_aead_ccm_prepare_buf_assoclen(struct aead_request *req)
 
 	if (diff_dst) {
 		sg = qce_aead_prepare_dst_buf(req);
-		if (IS_ERR(sg))
+		if (IS_ERR(sg)) {
+			ret = PTR_ERR(sg);
 			goto err_free;
+		}
 	} else {
 		if (IS_ENCRYPT(rctx->flags))
 			rctx->dst_nents = rctx->src_nents + 1;
@@ -448,13 +450,17 @@ qce_aead_async_req_handle(struct crypto_async_request *async_req)
 	if (ret)
 		return ret;
 	dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
-	if (dst_nents < 0)
+	if (dst_nents < 0) {
+		ret = dst_nents;
 		goto error_free;
+	}
 
 	if (diff_dst) {
 		src_nents = dma_map_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src);
-		if (src_nents < 0)
+		if (src_nents < 0) {
+			ret = src_nents;
 			goto error_unmap_dst;
+		}
 	} else {
 		if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags))
 			src_nents = dst_nents;
-- 
GitLab


From cb028f1662a9910d4b8e3fbe9eb38f7a545540a3 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Thu, 20 May 2021 15:56:11 +0800
Subject: [PATCH 1785/3804] hwrng: core - remove redundant initialization of
 variable err

'err' will be assigned later and cleanup the redundant initialization.

Cc: PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index adb3c2bd7783e..322e3d0ea98ca 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -323,7 +323,7 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 					struct device_attribute *attr,
 					const char *buf, size_t len)
 {
-	int err = -ENODEV;
+	int err;
 	struct hwrng *rng, *old_rng, *new_rng;
 
 	err = mutex_lock_interruptible(&rng_mutex);
-- 
GitLab


From c4d7d31874a7a8aa804721e082ffe1491f279dd2 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 10:27:31 +0100
Subject: [PATCH 1786/3804] crypto: cavium: Fix a bunch of kernel-doc related
 issues

Fixes the following W=1 kernel build warning(s):

 drivers/crypto/cavium/nitrox/nitrox_main.c:41: warning: cannot understand function prototype: 'const struct pci_device_id nitrox_pci_tbl[] = '
 drivers/crypto/cavium/nitrox/nitrox_main.c:73: warning: Function parameter or member 'ndev' not described in 'write_to_ucd_unit'
 drivers/crypto/cavium/nitrox/nitrox_main.c:73: warning: Function parameter or member 'ucode_size' not described in 'write_to_ucd_unit'
 drivers/crypto/cavium/nitrox/nitrox_main.c:73: warning: Function parameter or member 'ucode_data' not described in 'write_to_ucd_unit'
 drivers/crypto/cavium/nitrox/nitrox_main.c:73: warning: Function parameter or member 'block_num' not described in 'write_to_ucd_unit'
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:46: warning: Function parameter or member 'index' not described in 'incr_index'
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:46: warning: Function parameter or member 'count' not described in 'incr_index'
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:46: warning: Function parameter or member 'max' not described in 'incr_index'
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:46: warning: expecting prototype for Response codes from SE microcode(). Prototype was for incr_index() instead
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:287: warning: Function parameter or member 'cmdq' not described in 'post_se_instr'
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:381: warning: Function parameter or member 'callback' not described in 'nitrox_process_se_request'
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:381: warning: Function parameter or member 'cb_arg' not described in 'nitrox_process_se_request'
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:381: warning: expecting prototype for nitrox_se_request(). Prototype was for nitrox_process_se_request() instead
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:535: warning: Function parameter or member 'cmdq' not described in 'process_response_list'
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:535: warning: expecting prototype for process_request_list(). Prototype was for process_response_list() instead
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c:584: warning: Function parameter or member 'data' not described in 'pkt_slc_resp_tasklet'
 drivers/crypto/cavium/nitrox/nitrox_mbx.c:14: warning: cannot understand function prototype: 'enum mbx_msg_type '
 drivers/crypto/cavium/nitrox/nitrox_mbx.c:24: warning: cannot understand function prototype: 'enum mbx_msg_opcode '
 drivers/crypto/cavium/nitrox/nitrox_skcipher.c:26: warning: cannot understand function prototype: 'const struct nitrox_cipher flexi_cipher_table[] = '
 drivers/crypto/cavium/cpt/cptpf_main.c:411: warning: Function parameter or member 'cpt' not described in 'cpt_unload_microcode'
 drivers/crypto/cavium/cpt/cptpf_main.c:411: warning: expecting prototype for Ensure all cores are disengaged from all groups by(). Prototype was for cpt_unload_microcode() instead
 drivers/crypto/cavium/cpt/cptvf_reqmanager.c:17: warning: Function parameter or member 'q' not described in 'get_free_pending_entry'
 drivers/crypto/cavium/cpt/cptvf_reqmanager.c:17: warning: Function parameter or member 'qlen' not described in 'get_free_pending_entry'

Cc: George Cherian <gcherian@marvell.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-crypto@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/cpt/cptpf_main.c         |  2 +-
 drivers/crypto/cavium/cpt/cptvf_reqmanager.c   |  4 ++--
 drivers/crypto/cavium/nitrox/nitrox_main.c     |  4 ++--
 drivers/crypto/cavium/nitrox/nitrox_mbx.c      |  4 ++--
 drivers/crypto/cavium/nitrox/nitrox_reqmgr.c   | 12 +++++++-----
 drivers/crypto/cavium/nitrox/nitrox_skcipher.c |  2 +-
 6 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c
index 06ee42e8a2458..8c32d0eb8fcf2 100644
--- a/drivers/crypto/cavium/cpt/cptpf_main.c
+++ b/drivers/crypto/cavium/cpt/cptpf_main.c
@@ -401,7 +401,7 @@ static void cpt_disable_all_cores(struct cpt_device *cpt)
 	cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), 0);
 }
 
-/**
+/*
  * Ensure all cores are disengaged from all groups by
  * calling cpt_disable_all_cores() before calling this
  * function.
diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
index feb0f76783dda..153004bdfb5cd 100644
--- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
+++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
@@ -9,8 +9,8 @@
 
 /**
  * get_free_pending_entry - get free entry from pending queue
- * @param pqinfo: pending_qinfo structure
- * @param qno: queue number
+ * @q: pending queue
+ * @qlen: queue length
  */
 static struct pending_entry *get_free_pending_entry(struct pending_queue *q,
 						    int qlen)
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index facc8e6bc5801..6af05df281a98 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -35,7 +35,7 @@ static LIST_HEAD(ndevlist);
 static DEFINE_MUTEX(devlist_lock);
 static unsigned int num_devices;
 
-/**
+/*
  * nitrox_pci_tbl - PCI Device ID Table
  */
 static const struct pci_device_id nitrox_pci_tbl[] = {
@@ -65,7 +65,7 @@ struct ucode {
 	u64 code[];
 };
 
-/**
+/*
  * write_to_ucd_unit - Write Firmware to NITROX UCD unit
  */
 static void write_to_ucd_unit(struct nitrox_device *ndev, u32 ucode_size,
diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
index c1af9d4fca6e3..2e9c0d2143632 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
@@ -8,7 +8,7 @@
 
 #define RING_TO_VFNO(_x, _y)	((_x) / (_y))
 
-/**
+/*
  * mbx_msg_type - Mailbox message types
  */
 enum mbx_msg_type {
@@ -18,7 +18,7 @@ enum mbx_msg_type {
 	MBX_MSG_TYPE_NACK,
 };
 
-/**
+/*
  * mbx_msg_opcode - Mailbox message opcodes
  */
 enum mbx_msg_opcode {
diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
index 4434c92d6229f..55c18da4a5007 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -19,7 +19,7 @@
 #define REQ_BACKLOG    2
 #define REQ_POSTED     3
 
-/**
+/*
  * Response codes from SE microcode
  * 0x00 - Success
  *   Completion with no error
@@ -279,6 +279,7 @@ static inline bool cmdq_full(struct nitrox_cmdq *cmdq, int qlen)
 /**
  * post_se_instr - Post SE instruction to Packet Input ring
  * @sr: Request structure
+ * @cmdq: Command queue structure
  *
  * Returns 0 if successful or a negative error code,
  * if no space in ring.
@@ -372,6 +373,8 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr)
  * nitrox_process_se_request - Send request to SE core
  * @ndev: NITROX device
  * @req: Crypto request
+ * @callback: Completion callback
+ * @cb_arg: Completion callback arguments
  *
  * Returns 0 on success, or a negative error code.
  */
@@ -526,9 +529,8 @@ static bool sr_completed(struct nitrox_softreq *sr)
 }
 
 /**
- * process_request_list - process completed requests
- * @ndev: N5 device
- * @qno: queue to operate
+ * process_response_list - process completed requests
+ * @cmdq: Command queue structure
  *
  * Returns the number of responses processed.
  */
@@ -578,7 +580,7 @@ static void process_response_list(struct nitrox_cmdq *cmdq)
 	}
 }
 
-/**
+/*
  * pkt_slc_resp_tasklet - post processing of SE responses
  */
 void pkt_slc_resp_tasklet(unsigned long data)
diff --git a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
index a553ac65f3249..248b4fff1c729 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
@@ -20,7 +20,7 @@ struct nitrox_cipher {
 	enum flexi_cipher value;
 };
 
-/**
+/*
  * supported cipher list
  */
 static const struct nitrox_cipher flexi_cipher_table[] = {
-- 
GitLab


From c215b513513386afd82a099047474c67f5b8f45c Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 10:27:32 +0100
Subject: [PATCH 1787/3804] crypto: nx: nx-aes-gcm: Kernel-doc formatting
 should not be used for headers

Fixes the following W=1 kernel build warning(s):

 drivers/crypto/nx/nx-aes-gcm.c:26: warning: Function parameter or member 'tfm' not described in 'gcm_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-gcm.c:26: warning: Function parameter or member 'in_key' not described in 'gcm_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-gcm.c:26: warning: Function parameter or member 'key_len' not described in 'gcm_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-gcm.c:26: warning: expecting prototype for Nest Accelerators driver(). Prototype was for gcm_aes_nx_set_key() instead
 drivers/crypto/nx/nx-aes-ecb.c:24: warning: Function parameter or member 'tfm' not described in 'ecb_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-ecb.c:24: warning: Function parameter or member 'in_key' not described in 'ecb_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-ecb.c:24: warning: Function parameter or member 'key_len' not described in 'ecb_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-ecb.c:24: warning: expecting prototype for Nest Accelerators driver(). Prototype was for ecb_aes_nx_set_key() instead
 drivers/crypto/nx/nx-aes-ccm.c:26: warning: Function parameter or member 'tfm' not described in 'ccm_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-ccm.c:26: warning: Function parameter or member 'in_key' not described in 'ccm_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-ccm.c:26: warning: Function parameter or member 'key_len' not described in 'ccm_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-ccm.c:26: warning: expecting prototype for Nest Accelerators driver(). Prototype was for ccm_aes_nx_set_key() instead
 drivers/crypto/nx/nx-aes-ctr.c:25: warning: Function parameter or member 'tfm' not described in 'ctr_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-ctr.c:25: warning: Function parameter or member 'in_key' not described in 'ctr_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-ctr.c:25: warning: Function parameter or member 'key_len' not described in 'ctr_aes_nx_set_key'
 drivers/crypto/nx/nx-aes-ctr.c:25: warning: expecting prototype for Nest Accelerators driver(). Prototype was for ctr_aes_nx_set_key() instead
 drivers/crypto/nx/nx-aes-xcbc.c:22: warning: cannot understand function prototype: 'struct xcbc_state '
 drivers/crypto/nx/nx-sha256.c:21: warning: Function parameter or member 'tfm' not described in 'nx_crypto_ctx_sha256_init'
 drivers/crypto/nx/nx-sha256.c:21: warning: expecting prototype for SHA(). Prototype was for nx_crypto_ctx_sha256_init() instead
 drivers/crypto/nx/nx-sha512.c:20: warning: Function parameter or member 'tfm' not described in 'nx_crypto_ctx_sha512_init'
 drivers/crypto/nx/nx-sha512.c:20: warning: expecting prototype for SHA(). Prototype was for nx_crypto_ctx_sha512_init() instead
 drivers/crypto/nx/nx-842-pseries.c:280: warning: Function parameter or member 'wmem' not described in 'nx842_pseries_compress'
 drivers/crypto/nx/nx-842-pseries.c:280: warning: Excess function parameter 'wrkmem' description in 'nx842_pseries_compress'
 drivers/crypto/nx/nx-842-pseries.c:410: warning: Function parameter or member 'wmem' not described in 'nx842_pseries_decompress'
 drivers/crypto/nx/nx-842-pseries.c:410: warning: Excess function parameter 'wrkmem' description in 'nx842_pseries_decompress'
 drivers/crypto/nx/nx-842-pseries.c:523: warning: Function parameter or member 'devdata' not described in 'nx842_OF_set_defaults'
 drivers/crypto/nx/nx-842-pseries.c:548: warning: Function parameter or member 'prop' not described in 'nx842_OF_upd_status'
 drivers/crypto/nx/nx-842-pseries.c:582: warning: Function parameter or member 'devdata' not described in 'nx842_OF_upd_maxsglen'
 drivers/crypto/nx/nx-842-pseries.c:582: warning: Function parameter or member 'prop' not described in 'nx842_OF_upd_maxsglen'
 drivers/crypto/nx/nx-842-pseries.c:630: warning: Function parameter or member 'devdata' not described in 'nx842_OF_upd_maxsyncop'
 drivers/crypto/nx/nx-842-pseries.c:630: warning: Function parameter or member 'prop' not described in 'nx842_OF_upd_maxsyncop'
 drivers/crypto/nx/nx-842-pseries.c:692: warning: Cannot understand  *
 drivers/crypto/nx/nx-842-pseries.c:825: warning: Function parameter or member 'data' not described in 'nx842_OF_notifier'
 drivers/crypto/nx/nx-842-pseries.c:825: warning: Excess function parameter 'update' description in 'nx842_OF_notifier'

Cc: Haren Myneni <haren@us.ibm.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Jennings <rcj@linux.vnet.ibm.com>
Cc: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: linux-crypto@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/nx/nx-842-pseries.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index 8ee547ee378ec..67caff73f058f 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -264,8 +264,8 @@ static int nx842_validate_result(struct device *dev,
  * @inlen: Length of input buffer
  * @out: Pointer to output buffer
  * @outlen: Length of output buffer
- * @wrkmem: ptr to buffer for working memory, size determined by
- *          nx842_pseries_driver.workmem_size
+ * @wmem: ptr to buffer for working memory, size determined by
+ *        nx842_pseries_driver.workmem_size
  *
  * Returns:
  *   0		Success, output of length @outlen stored in the buffer at @out
@@ -393,8 +393,8 @@ unlock:
  * @inlen: Length of input buffer
  * @out: Pointer to output buffer
  * @outlen: Length of output buffer
- * @wrkmem: ptr to buffer for working memory, size determined by
- *          nx842_pseries_driver.workmem_size
+ * @wmem: ptr to buffer for working memory, size determined by
+ *        nx842_pseries_driver.workmem_size
  *
  * Returns:
  *   0		Success, output of length @outlen stored in the buffer at @out
@@ -513,7 +513,7 @@ unlock:
 /**
  * nx842_OF_set_defaults -- Set default (disabled) values for devdata
  *
- * @devdata - struct nx842_devdata to update
+ * @devdata: struct nx842_devdata to update
  *
  * Returns:
  *  0 on success
@@ -538,7 +538,7 @@ static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
  * The status field indicates if the device is enabled when the status
  * is 'okay'.  Otherwise the device driver will be disabled.
  *
- * @prop - struct property point containing the maxsyncop for the update
+ * @prop: struct property point containing the maxsyncop for the update
  *
  * Returns:
  *  0 - Device is available
@@ -571,8 +571,8 @@ static int nx842_OF_upd_status(struct property *prop)
  *  In this example, the maximum byte length of a scatter list is
  *  0x0ff0 (4,080).
  *
- * @devdata - struct nx842_devdata to update
- * @prop - struct property point containing the maxsyncop for the update
+ * @devdata: struct nx842_devdata to update
+ * @prop: struct property point containing the maxsyncop for the update
  *
  * Returns:
  *  0 on success
@@ -619,8 +619,8 @@ static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
  *  0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
  *  elements.
  *
- * @devdata - struct nx842_devdata to update
- * @prop - struct property point containing the maxsyncop for the update
+ * @devdata: struct nx842_devdata to update
+ * @prop: struct property point containing the maxsyncop for the update
  *
  * Returns:
  *  0 on success
@@ -689,7 +689,6 @@ out:
 }
 
 /**
- *
  * nx842_OF_upd -- Handle OF properties updates for the device.
  *
  * Set all properties from the OF tree.  Optionally, a new property
@@ -812,8 +811,7 @@ error_out:
  *
  * @np: notifier block
  * @action: notifier action
- * @update: struct pSeries_reconfig_prop_update pointer if action is
- *	PSERIES_UPDATE_PROPERTY
+ * @data: struct of_reconfig_data pointer
  *
  * Returns:
  *	NOTIFY_OK on success
-- 
GitLab


From 01df08b93e400ce45d86ef8dd7dd849f44b0e9d3 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 10:27:33 +0100
Subject: [PATCH 1788/3804] crypto: ccp: ccp-dev: Fix a little doc-rot

Fixes the following W=1 kernel build warning(s):

 drivers/crypto/ccp/ccp-dev.c:476: warning: Function parameter or member 'sp' not described in 'ccp_alloc_struct'
 drivers/crypto/ccp/ccp-dev.c:476: warning: Excess function parameter 'dev' description in 'ccp_alloc_struct'
 drivers/crypto/ccp/ccp-dev.c:476: warning: Function parameter or member 'sp' not described in 'ccp_alloc_struct'
 drivers/crypto/ccp/ccp-dev.c:476: warning: Excess function parameter 'dev' description in 'ccp_alloc_struct'

Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: John Allen <john.allen@amd.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Gary R Hook <gary.hook@amd.com>
Cc: linux-crypto@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 6777582aa1cee..9ce4b68e9c483 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -470,7 +470,7 @@ int ccp_cmd_queue_thread(void *data)
 /**
  * ccp_alloc_struct - allocate and initialize the ccp_device struct
  *
- * @dev: device struct of the CCP
+ * @sp: sp_device struct of the CCP
  */
 struct ccp_device *ccp_alloc_struct(struct sp_device *sp)
 {
-- 
GitLab


From aa22cd7f67807eb8047221e57f8a327432ab8309 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 20 May 2021 12:40:00 +0200
Subject: [PATCH 1789/3804] crypto: tcrypt - enable tests for xxhash and blake2

Fill some of the recently freed up slots in tcrypt with xxhash64 and
blake2b/blake2s, so we can easily benchmark their kernel implementations
from user space.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 6b7c158dc5087..f8d06da78e4f3 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1847,10 +1847,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret += tcrypt_test("cts(cbc(aes))");
 		break;
 
+        case 39:
+		ret += tcrypt_test("xxhash64");
+		break;
+
         case 40:
 		ret += tcrypt_test("rmd160");
 		break;
 
+	case 41:
+		ret += tcrypt_test("blake2s-256");
+		break;
+
+	case 42:
+		ret += tcrypt_test("blake2b-512");
+		break;
+
 	case 43:
 		ret += tcrypt_test("ecb(seed)");
 		break;
@@ -2356,10 +2368,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		test_hash_speed("sha224", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 		fallthrough;
+	case 314:
+		test_hash_speed("xxhash64", sec, generic_hash_speed_template);
+		if (mode > 300 && mode < 400) break;
+		fallthrough;
 	case 315:
 		test_hash_speed("rmd160", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 		fallthrough;
+	case 316:
+		test_hash_speed("blake2s-256", sec, generic_hash_speed_template);
+		if (mode > 300 && mode < 400) break;
+		fallthrough;
+	case 317:
+		test_hash_speed("blake2b-512", sec, generic_hash_speed_template);
+		if (mode > 300 && mode < 400) break;
+		fallthrough;
 	case 318:
 		klen = 16;
 		test_hash_speed("ghash", sec, generic_hash_speed_template);
@@ -2456,10 +2480,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		test_ahash_speed("sha224", sec, generic_hash_speed_template);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
+	case 414:
+		test_ahash_speed("xxhash64", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+		fallthrough;
 	case 415:
 		test_ahash_speed("rmd160", sec, generic_hash_speed_template);
 		if (mode > 400 && mode < 500) break;
 		fallthrough;
+	case 416:
+		test_ahash_speed("blake2s-256", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+		fallthrough;
+	case 417:
+		test_ahash_speed("blake2b-512", sec, generic_hash_speed_template);
+		if (mode > 400 && mode < 500) break;
+		fallthrough;
 	case 418:
 		test_ahash_speed("sha3-224", sec, generic_hash_speed_template);
 		if (mode > 400 && mode < 500) break;
-- 
GitLab


From 98f481f22de235b5356f9fa94b0fcffeacc772d8 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 20 May 2021 21:57:13 +0800
Subject: [PATCH 1790/3804] hwrng: core - Use DEVICE_ATTR_<RW|RO> macro

Use DEVICE_ATTR_RW()/DEVICE_ATTR_RO() helper instead of
plain DEVICE_ATTR, which makes the code a bit shorter and
easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 36 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 322e3d0ea98ca..a3db27916256d 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -319,9 +319,9 @@ static int enable_best_rng(void)
 	return ret;
 }
 
-static ssize_t hwrng_attr_current_store(struct device *dev,
-					struct device_attribute *attr,
-					const char *buf, size_t len)
+static ssize_t rng_current_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t len)
 {
 	int err;
 	struct hwrng *rng, *old_rng, *new_rng;
@@ -354,9 +354,9 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
 	return err ? : len;
 }
 
-static ssize_t hwrng_attr_current_show(struct device *dev,
-				       struct device_attribute *attr,
-				       char *buf)
+static ssize_t rng_current_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
 {
 	ssize_t ret;
 	struct hwrng *rng;
@@ -371,9 +371,9 @@ static ssize_t hwrng_attr_current_show(struct device *dev,
 	return ret;
 }
 
-static ssize_t hwrng_attr_available_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
+static ssize_t rng_available_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
 {
 	int err;
 	struct hwrng *rng;
@@ -392,22 +392,16 @@ static ssize_t hwrng_attr_available_show(struct device *dev,
 	return strlen(buf);
 }
 
-static ssize_t hwrng_attr_selected_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
+static ssize_t rng_selected_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
 {
 	return sysfs_emit(buf, "%d\n", cur_rng_set_by_user);
 }
 
-static DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR,
-		   hwrng_attr_current_show,
-		   hwrng_attr_current_store);
-static DEVICE_ATTR(rng_available, S_IRUGO,
-		   hwrng_attr_available_show,
-		   NULL);
-static DEVICE_ATTR(rng_selected, S_IRUGO,
-		   hwrng_attr_selected_show,
-		   NULL);
+static DEVICE_ATTR_RW(rng_current);
+static DEVICE_ATTR_RO(rng_available);
+static DEVICE_ATTR_RO(rng_selected);
 
 static struct attribute *rng_dev_attrs[] = {
 	&dev_attr_rng_current.attr,
-- 
GitLab


From 9b7b94683a9b9c42a743d591e48b9f51f505dd1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Stephan=20M=C3=BCller?= <smueller@chronox.de>
Date: Thu, 20 May 2021 21:31:11 +0200
Subject: [PATCH 1791/3804] crypto: DRBG - switch to HMAC SHA512 DRBG as
 default DRBG

The default DRBG is the one that has the highest priority. The priority
is defined based on the order of the list drbg_cores[] where the highest
priority is given to the last entry by drbg_fill_array.

With this patch the default DRBG is switched from HMAC SHA256 to HMAC
SHA512 to support compliance with SP800-90B and SP800-90C (current
draft).

The user of the crypto API is completely unaffected by the change.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Acked-by: simo Sorce <simo@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/drbg.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/crypto/drbg.c b/crypto/drbg.c
index 1b4587e0ddad8..ea85d4a0fe9e9 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -176,18 +176,18 @@ static const struct drbg_core drbg_cores[] = {
 		.blocklen_bytes = 48,
 		.cra_name = "hmac_sha384",
 		.backend_cra_name = "hmac(sha384)",
-	}, {
-		.flags = DRBG_HMAC | DRBG_STRENGTH256,
-		.statelen = 64, /* block length of cipher */
-		.blocklen_bytes = 64,
-		.cra_name = "hmac_sha512",
-		.backend_cra_name = "hmac(sha512)",
 	}, {
 		.flags = DRBG_HMAC | DRBG_STRENGTH256,
 		.statelen = 32, /* block length of cipher */
 		.blocklen_bytes = 32,
 		.cra_name = "hmac_sha256",
 		.backend_cra_name = "hmac(sha256)",
+	}, {
+		.flags = DRBG_HMAC | DRBG_STRENGTH256,
+		.statelen = 64, /* block length of cipher */
+		.blocklen_bytes = 64,
+		.cra_name = "hmac_sha512",
+		.backend_cra_name = "hmac(sha512)",
 	},
 #endif /* CONFIG_CRYPTO_DRBG_HMAC */
 };
-- 
GitLab


From 1339a7c3ba05137a2d2fe75f602311bbfc6fab33 Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 20 May 2021 22:20:23 -0400
Subject: [PATCH 1792/3804] crypto: qce: skcipher: Fix incorrect sg count for
 dma transfers

Use the sg count returned by dma_map_sg to call into
dmaengine_prep_slave_sg rather than using the original sg count. dma_map_sg
can merge consecutive sglist entries, thus making the original sg count
wrong. This is a fix for memory coruption issues observed while testing
encryption/decryption of large messages using libkcapi framework.

Patch has been tested further by running full suite of tcrypt.ko tests
including fuzz tests.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/skcipher.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c
index c0a0d8c4fce19..2594184792272 100644
--- a/drivers/crypto/qce/skcipher.c
+++ b/drivers/crypto/qce/skcipher.c
@@ -72,7 +72,7 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req)
 	struct scatterlist *sg;
 	bool diff_dst;
 	gfp_t gfp;
-	int ret;
+	int dst_nents, src_nents, ret;
 
 	rctx->iv = req->iv;
 	rctx->ivsize = crypto_skcipher_ivsize(skcipher);
@@ -123,21 +123,22 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req)
 	sg_mark_end(sg);
 	rctx->dst_sg = rctx->dst_tbl.sgl;
 
-	ret = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
-	if (ret < 0)
+	dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+	if (dst_nents < 0)
 		goto error_free;
 
 	if (diff_dst) {
-		ret = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src);
-		if (ret < 0)
+		src_nents = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src);
+		if (src_nents < 0)
 			goto error_unmap_dst;
 		rctx->src_sg = req->src;
 	} else {
 		rctx->src_sg = rctx->dst_sg;
+		src_nents = dst_nents - 1;
 	}
 
-	ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, rctx->src_nents,
-			       rctx->dst_sg, rctx->dst_nents,
+	ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, src_nents,
+			       rctx->dst_sg, dst_nents,
 			       qce_skcipher_done, async_req);
 	if (ret)
 		goto error_unmap_src;
-- 
GitLab


From abf790a9b52d91750a07bfe055aaf0f152f6d4ac Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Thu, 20 May 2021 22:58:44 -0400
Subject: [PATCH 1793/3804] MAINTAINERS: Add maintainer for Qualcomm crypto
 drivers

There is no maintainer for Qualcomm crypto drivers and we are seeing more
development in this area. Add myself as the maintainer so that I can help
in reviewing the changes submitted to these drivers.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6df5a401ff92f..d478f44be7ce6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15136,6 +15136,13 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt
 F:	drivers/cpufreq/qcom-cpufreq-nvmem.c
 
+QUALCOMM CRYPTO DRIVERS
+M:	Thara Gopinath <thara.gopinath@linaro.org>
+L:	linux-crypto@vger.kernel.org
+L:	linux-arm-msm@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/qce/
+
 QUALCOMM EMAC GIGABIT ETHERNET DRIVER
 M:	Timur Tabi <timur@kernel.org>
 L:	netdev@vger.kernel.org
-- 
GitLab


From 3f4a8567b50e47da075f3ca676a899954d4c3d8d Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Fri, 21 May 2021 16:41:47 +0800
Subject: [PATCH 1794/3804] crypto: nx - Fix typo in comment

Fix typo '@workmem' -> '@wmem'.

Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/nx/nx-common-powernv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c
index 446f611726df5..655361ba91070 100644
--- a/drivers/crypto/nx/nx-common-powernv.c
+++ b/drivers/crypto/nx/nx-common-powernv.c
@@ -660,8 +660,8 @@ static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
  * @inlen: input buffer size
  * @out: output buffer pointer
  * @outlenp: output buffer size pointer
- * @workmem: working memory buffer pointer, size determined by
- *           nx842_powernv_driver.workmem_size
+ * @wmem: working memory buffer pointer, size determined by
+ *        nx842_powernv_driver.workmem_size
  *
  * Returns: see @nx842_powernv_exec()
  */
-- 
GitLab


From e5764377aa54b32bfcb651f8188729e7b35e7a7c Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 21 May 2021 17:44:52 +0800
Subject: [PATCH 1795/3804] crypto: qce - Fix inconsistent indenting

Eliminate the follow smatch warning:

drivers/crypto/qce/aead.c:85 qce_aead_done() warn: inconsistent
indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/aead.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c
index d47f4171ad830..290e2446a2f35 100644
--- a/drivers/crypto/qce/aead.c
+++ b/drivers/crypto/qce/aead.c
@@ -82,7 +82,7 @@ static void qce_aead_done(void *data)
 		ret = memcmp(result_buf->auth_iv, tag, ctx->authsize);
 		if (ret) {
 			pr_err("Bad message error\n");
-			 error = -EBADMSG;
+			error = -EBADMSG;
 		}
 	}
 
-- 
GitLab


From dc11803409fbf8bc5a326ddd9f24cde620b3519d Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 21 May 2021 18:02:43 +0800
Subject: [PATCH 1796/3804] crypto: hisilicon/qm - add dfx log if not use
 hardware crypto algs

Add print information necessary if not use hardware crypto algs.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 8f7ea504ce80c..deb104e2bd244 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4252,11 +4252,14 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
  */
 int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
 {
+	struct device *dev = &qm->pdev->dev;
 	int flag = 0;
 	int ret = 0;
-	/* HW V2 not support both use uacce sva mode and hardware crypto algs */
-	if (qm->ver <= QM_HW_V2 && qm->use_sva)
+
+	if (qm->ver <= QM_HW_V2 && qm->use_sva) {
+		dev_info(dev, "HW V2 not both use uacce sva mode and hardware crypto algs.\n");
 		return 0;
+	}
 
 	mutex_lock(&qm_list->lock);
 	if (list_empty(&qm_list->list))
-- 
GitLab


From 0dbcf1a24e6875d51c290a174a7f2526498e2836 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 21 May 2021 18:02:44 +0800
Subject: [PATCH 1797/3804] crypto: hisilicon/qm - fix the process of VF's list
 adding

If Kunpeng 920 enabled the sva mode, the "qm alg register" process will
return directly. So the list of VF wasn't added to QM list.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index deb104e2bd244..c671f9433716f 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4256,17 +4256,17 @@ int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
 	int flag = 0;
 	int ret = 0;
 
-	if (qm->ver <= QM_HW_V2 && qm->use_sva) {
-		dev_info(dev, "HW V2 not both use uacce sva mode and hardware crypto algs.\n");
-		return 0;
-	}
-
 	mutex_lock(&qm_list->lock);
 	if (list_empty(&qm_list->list))
 		flag = 1;
 	list_add_tail(&qm->list, &qm_list->list);
 	mutex_unlock(&qm_list->lock);
 
+	if (qm->ver <= QM_HW_V2 && qm->use_sva) {
+		dev_info(dev, "HW V2 not both use uacce sva mode and hardware crypto algs.\n");
+		return 0;
+	}
+
 	if (flag) {
 		ret = qm_list->register_to_crypto(qm);
 		if (ret) {
@@ -4291,13 +4291,13 @@ EXPORT_SYMBOL_GPL(hisi_qm_alg_register);
  */
 void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
 {
-	if (qm->ver <= QM_HW_V2 && qm->use_sva)
-		return;
-
 	mutex_lock(&qm_list->lock);
 	list_del(&qm->list);
 	mutex_unlock(&qm_list->lock);
 
+	if (qm->ver <= QM_HW_V2 && qm->use_sva)
+		return;
+
 	if (list_empty(&qm_list->list))
 		qm_list->unregister_from_crypto(qm);
 }
-- 
GitLab


From 6889fc2104e5d20899b91e61daf07a7524b2010d Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Sat, 22 May 2021 10:44:28 +0800
Subject: [PATCH 1798/3804] crypto: ecdh - fix ecdh-nist-p192's entry in
 testmgr

Add a comment that p192 will fail to register in FIPS mode.

Fix ecdh-nist-p192's entry in testmgr by removing the ifdefs
and not setting fips_allowed.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ecdh.c    | 1 +
 crypto/testmgr.c | 3 ---
 crypto/testmgr.h | 2 --
 3 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 07eb34fef25b7..1974675093239 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -179,6 +179,7 @@ static int ecdh_init(void)
 {
 	int ret;
 
+	/* NIST p192 will fail to register in FIPS mode */
 	ret = crypto_register_kpp(&ecdh_nist_p192);
 	ecdh_nist_p192_registered = ret == 0;
 
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 10c5b3b01ec47..26e40dba9ad29 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4899,15 +4899,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 		}
 	}, {
 #endif
-#ifndef CONFIG_CRYPTO_FIPS
 		.alg = "ecdh-nist-p192",
 		.test = alg_test_kpp,
-		.fips_allowed = 1,
 		.suite = {
 			.kpp = __VECS(ecdh_p192_tv_template)
 		}
 	}, {
-#endif
 		.alg = "ecdh-nist-p256",
 		.test = alg_test_kpp,
 		.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index aead75d904933..b9cf5b815532a 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -2685,7 +2685,6 @@ static const struct kpp_testvec curve25519_tv_template[] = {
 }
 };
 
-#ifndef CONFIG_CRYPTO_FIPS
 static const struct kpp_testvec ecdh_p192_tv_template[] = {
 	{
 	.secret =
@@ -2725,7 +2724,6 @@ static const struct kpp_testvec ecdh_p192_tv_template[] = {
 	.expected_ss_size = 24
 	}
 };
-#endif
 
 static const struct kpp_testvec ecdh_p256_tv_template[] = {
 	{
-- 
GitLab


From 8fd28fa5046b377039d5bbc0ab2f625dec703980 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Sat, 22 May 2021 10:44:29 +0800
Subject: [PATCH 1799/3804] crypto: ecdh - fix 'ecdh_init'

NIST P192 is not unregistered if failed to register NIST P256,
actually it need to unregister the algorithms already registered.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ecdh.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 1974675093239..b6f493e828128 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -183,7 +183,16 @@ static int ecdh_init(void)
 	ret = crypto_register_kpp(&ecdh_nist_p192);
 	ecdh_nist_p192_registered = ret == 0;
 
-	return crypto_register_kpp(&ecdh_nist_p256);
+	ret = crypto_register_kpp(&ecdh_nist_p256);
+	if (ret)
+		goto nist_p256_error;
+
+	return 0;
+
+nist_p256_error:
+	if (ecdh_nist_p192_registered)
+		crypto_unregister_kpp(&ecdh_nist_p192);
+	return ret;
 }
 
 static void ecdh_exit(void)
-- 
GitLab


From 8154132521e9cd6d28a7e9778c4ae23b716994bf Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Sat, 22 May 2021 10:44:30 +0800
Subject: [PATCH 1800/3804] crypto: ecdh - register NIST P384 tfm

Add ecdh_nist_p384_init_tfm and register and unregister P384 tfm.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ecdh.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index b6f493e828128..c6f61c2211dc7 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -173,6 +173,31 @@ static struct kpp_alg ecdh_nist_p256 = {
 	},
 };
 
+static int ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm)
+{
+	struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
+
+	ctx->curve_id = ECC_CURVE_NIST_P384;
+	ctx->ndigits = ECC_CURVE_NIST_P384_DIGITS;
+
+	return 0;
+}
+
+static struct kpp_alg ecdh_nist_p384 = {
+	.set_secret = ecdh_set_secret,
+	.generate_public_key = ecdh_compute_value,
+	.compute_shared_secret = ecdh_compute_value,
+	.max_size = ecdh_max_size,
+	.init = ecdh_nist_p384_init_tfm,
+	.base = {
+		.cra_name = "ecdh-nist-p384",
+		.cra_driver_name = "ecdh-nist-p384-generic",
+		.cra_priority = 100,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = sizeof(struct ecdh_ctx),
+	},
+};
+
 static bool ecdh_nist_p192_registered;
 
 static int ecdh_init(void)
@@ -187,8 +212,15 @@ static int ecdh_init(void)
 	if (ret)
 		goto nist_p256_error;
 
+	ret = crypto_register_kpp(&ecdh_nist_p384);
+	if (ret)
+		goto nist_p384_error;
+
 	return 0;
 
+nist_p384_error:
+	crypto_unregister_kpp(&ecdh_nist_p256);
+
 nist_p256_error:
 	if (ecdh_nist_p192_registered)
 		crypto_unregister_kpp(&ecdh_nist_p192);
@@ -200,6 +232,7 @@ static void ecdh_exit(void)
 	if (ecdh_nist_p192_registered)
 		crypto_unregister_kpp(&ecdh_nist_p192);
 	crypto_unregister_kpp(&ecdh_nist_p256);
+	crypto_unregister_kpp(&ecdh_nist_p384);
 }
 
 subsys_initcall(ecdh_init);
-- 
GitLab


From 8e568fc2a71d097a5549043a39984a46262b6035 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Sat, 22 May 2021 10:44:31 +0800
Subject: [PATCH 1801/3804] crypto: ecdh - add test suite for NIST P384

Add test vector params for NIST P384, add test vector for
NIST P384 on vector of tests.

Vector param from:
https://datatracker.ietf.org/doc/html/rfc5903#section-3.1

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.c |  7 ++++++
 crypto/testmgr.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 26e40dba9ad29..1f7f63e836ae2 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4911,6 +4911,13 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.suite = {
 			.kpp = __VECS(ecdh_p256_tv_template)
 		}
+	}, {
+		.alg = "ecdh-nist-p384",
+		.test = alg_test_kpp,
+		.fips_allowed = 1,
+		.suite = {
+			.kpp = __VECS(ecdh_p384_tv_template)
+		}
 	}, {
 		.alg = "ecdsa-nist-p192",
 		.test = alg_test_akcipher,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index b9cf5b815532a..96eb7ce9f81be 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -2811,6 +2811,67 @@ static const struct kpp_testvec ecdh_p256_tv_template[] = {
 	}
 };
 
+/*
+ * NIST P384 test vectors from RFC5903
+ */
+static const struct kpp_testvec ecdh_p384_tv_template[] = {
+	{
+	.secret =
+#ifdef __LITTLE_ENDIAN
+	"\x02\x00" /* type */
+	"\x36\x00" /* len */
+	"\x30\x00" /* key_size */
+#else
+	"\x00\x02" /* type */
+	"\x00\x36" /* len */
+	"\x00\x30" /* key_size */
+#endif
+	"\x09\x9F\x3C\x70\x34\xD4\xA2\xC6"
+	"\x99\x88\x4D\x73\xA3\x75\xA6\x7F"
+	"\x76\x24\xEF\x7C\x6B\x3C\x0F\x16"
+	"\x06\x47\xB6\x74\x14\xDC\xE6\x55"
+	"\xE3\x5B\x53\x80\x41\xE6\x49\xEE"
+	"\x3F\xAE\xF8\x96\x78\x3A\xB1\x94",
+	.b_public =
+	"\xE5\x58\xDB\xEF\x53\xEE\xCD\xE3"
+	"\xD3\xFC\xCF\xC1\xAE\xA0\x8A\x89"
+	"\xA9\x87\x47\x5D\x12\xFD\x95\x0D"
+	"\x83\xCF\xA4\x17\x32\xBC\x50\x9D"
+	"\x0D\x1A\xC4\x3A\x03\x36\xDE\xF9"
+	"\x6F\xDA\x41\xD0\x77\x4A\x35\x71"
+	"\xDC\xFB\xEC\x7A\xAC\xF3\x19\x64"
+	"\x72\x16\x9E\x83\x84\x30\x36\x7F"
+	"\x66\xEE\xBE\x3C\x6E\x70\xC4\x16"
+	"\xDD\x5F\x0C\x68\x75\x9D\xD1\xFF"
+	"\xF8\x3F\xA4\x01\x42\x20\x9D\xFF"
+	"\x5E\xAA\xD9\x6D\xB9\xE6\x38\x6C",
+	.expected_a_public =
+	"\x66\x78\x42\xD7\xD1\x80\xAC\x2C"
+	"\xDE\x6F\x74\xF3\x75\x51\xF5\x57"
+	"\x55\xC7\x64\x5C\x20\xEF\x73\xE3"
+	"\x16\x34\xFE\x72\xB4\xC5\x5E\xE6"
+	"\xDE\x3A\xC8\x08\xAC\xB4\xBD\xB4"
+	"\xC8\x87\x32\xAE\xE9\x5F\x41\xAA"
+	"\x94\x82\xED\x1F\xC0\xEE\xB9\xCA"
+	"\xFC\x49\x84\x62\x5C\xCF\xC2\x3F"
+	"\x65\x03\x21\x49\xE0\xE1\x44\xAD"
+	"\xA0\x24\x18\x15\x35\xA0\xF3\x8E"
+	"\xEB\x9F\xCF\xF3\xC2\xC9\x47\xDA"
+	"\xE6\x9B\x4C\x63\x45\x73\xA8\x1C",
+	.expected_ss =
+	"\x11\x18\x73\x31\xC2\x79\x96\x2D"
+	"\x93\xD6\x04\x24\x3F\xD5\x92\xCB"
+	"\x9D\x0A\x92\x6F\x42\x2E\x47\x18"
+	"\x75\x21\x28\x7E\x71\x56\xC5\xC4"
+	"\xD6\x03\x13\x55\x69\xB9\xE9\xD0"
+	"\x9C\xF5\xD4\xA2\x70\xF5\x97\x46",
+	.secret_size = 54,
+	.b_public_size = 96,
+	.expected_a_public_size = 96,
+	.expected_ss_size = 48
+	}
+};
+
 /*
  * MD4 test vectors from RFC1320
  */
-- 
GitLab


From 9b75e311acadb978001c81400a6ba64f48bf00e8 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 22 May 2021 14:49:21 +0800
Subject: [PATCH 1802/3804] crypto: hisilicon/qm - add MSI detection steps on
 Kunpeng930

Compared with Kunpeng920, Kunpeng930 adds MSI configuration steps to wait
for the interrupt to be emptied. In order to be compatible with the
kunpeng920 driver, 'set_msi' callback is added in 'hisi_qm_hw_ops' to
configure hardware register. Call 'set_msi' to disable or enable MSI
during reset.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 124 +++++++++++++++++++++++++++-------
 1 file changed, 100 insertions(+), 24 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index c671f9433716f..a7cd314073c24 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -161,6 +161,9 @@
 #define QM_PEH_VENDOR_ID		0x1000d8
 #define ACC_VENDOR_ID_VALUE		0x5a5a
 #define QM_PEH_DFX_INFO0		0x1000fc
+#define QM_PEH_DFX_INFO1		0x100100
+#define QM_PEH_DFX_MASK			(BIT(0) | BIT(2))
+#define QM_PEH_MSI_FINISH_MASK		GENMASK(19, 16)
 #define ACC_PEH_SRIOV_CTRL_VF_MSE_SHIFT	3
 #define ACC_PEH_MSI_DISABLE		GENMASK(31, 0)
 #define ACC_MASTER_GLOBAL_CTRL_SHUTDOWN	0x1
@@ -171,6 +174,7 @@
 #define QM_RAS_NFE_MBIT_DISABLE		~QM_ECC_MBIT
 #define ACC_AM_ROB_ECC_INT_STS		0x300104
 #define ACC_ROB_ECC_ERR_MULTPL		BIT(1)
+#define QM_MSI_CAP_ENABLE		BIT(16)
 
 #define QM_DFX_MB_CNT_VF		0x104010
 #define QM_DFX_DB_CNT_VF		0x104020
@@ -352,6 +356,7 @@ struct hisi_qm_hw_ops {
 	void (*hw_error_uninit)(struct hisi_qm *qm);
 	enum acc_err_result (*hw_error_handle)(struct hisi_qm *qm);
 	int (*stop_qp)(struct hisi_qp *qp);
+	int (*set_msi)(struct hisi_qm *qm, bool set);
 };
 
 struct qm_dfx_item {
@@ -1776,10 +1781,98 @@ static int qm_stop_qp(struct hisi_qp *qp)
 	return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
 }
 
+static int qm_set_msi(struct hisi_qm *qm, bool set)
+{
+	struct pci_dev *pdev = qm->pdev;
+
+	if (set) {
+		pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
+				       0);
+	} else {
+		pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
+				       ACC_PEH_MSI_DISABLE);
+		if (qm->err_status.is_qm_ecc_mbit ||
+		    qm->err_status.is_dev_ecc_mbit)
+			return 0;
+
+		mdelay(1);
+		if (readl(qm->io_base + QM_PEH_DFX_INFO0))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static void qm_wait_msi_finish(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	u32 cmd = ~0;
+	int cnt = 0;
+	u32 val;
+	int ret;
+
+	while (true) {
+		pci_read_config_dword(pdev, pdev->msi_cap +
+				      PCI_MSI_PENDING_64, &cmd);
+		if (!cmd)
+			break;
+
+		if (++cnt > MAX_WAIT_COUNTS) {
+			pci_warn(pdev, "failed to empty MSI PENDING!\n");
+			break;
+		}
+
+		udelay(1);
+	}
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_DFX_INFO0,
+					 val, !(val & QM_PEH_DFX_MASK),
+					 POLL_PERIOD, POLL_TIMEOUT);
+	if (ret)
+		pci_warn(pdev, "failed to empty PEH MSI!\n");
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_DFX_INFO1,
+					 val, !(val & QM_PEH_MSI_FINISH_MASK),
+					 POLL_PERIOD, POLL_TIMEOUT);
+	if (ret)
+		pci_warn(pdev, "failed to finish MSI operation!\n");
+}
+
+static int qm_set_msi_v3(struct hisi_qm *qm, bool set)
+{
+	struct pci_dev *pdev = qm->pdev;
+	int ret = -ETIMEDOUT;
+	u32 cmd, i;
+
+	pci_read_config_dword(pdev, pdev->msi_cap, &cmd);
+	if (set)
+		cmd |= QM_MSI_CAP_ENABLE;
+	else
+		cmd &= ~QM_MSI_CAP_ENABLE;
+
+	pci_write_config_dword(pdev, pdev->msi_cap, cmd);
+	if (set) {
+		for (i = 0; i < MAX_WAIT_COUNTS; i++) {
+			pci_read_config_dword(pdev, pdev->msi_cap, &cmd);
+			if (cmd & QM_MSI_CAP_ENABLE)
+				return 0;
+
+			udelay(1);
+		}
+	} else {
+		udelay(WAIT_PERIOD_US_MIN);
+		qm_wait_msi_finish(qm);
+		ret = 0;
+	}
+
+	return ret;
+}
+
 static const struct hisi_qm_hw_ops qm_hw_ops_v1 = {
 	.qm_db = qm_db_v1,
 	.get_irq_num = qm_get_irq_num_v1,
 	.hw_error_init = qm_hw_error_init_v1,
+	.set_msi = qm_set_msi,
 };
 
 static const struct hisi_qm_hw_ops qm_hw_ops_v2 = {
@@ -1789,6 +1882,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = {
 	.hw_error_init = qm_hw_error_init_v2,
 	.hw_error_uninit = qm_hw_error_uninit_v2,
 	.hw_error_handle = qm_hw_error_handle_v2,
+	.set_msi = qm_set_msi,
 };
 
 static const struct hisi_qm_hw_ops qm_hw_ops_v3 = {
@@ -1799,6 +1893,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v3 = {
 	.hw_error_uninit = qm_hw_error_uninit_v3,
 	.hw_error_handle = qm_hw_error_handle_v2,
 	.stop_qp = qm_stop_qp,
+	.set_msi = qm_set_msi_v3,
 };
 
 static void *qm_get_avail_sqe(struct hisi_qp *qp)
@@ -3586,6 +3681,9 @@ static int qm_check_req_recv(struct hisi_qm *qm)
 	int ret;
 	u32 val;
 
+	if (qm->ver >= QM_HW_V3)
+		return 0;
+
 	writel(ACC_VENDOR_ID_VALUE, qm->io_base + QM_PEH_VENDOR_ID);
 	ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_VENDOR_ID, val,
 					 (val == ACC_VENDOR_ID_VALUE),
@@ -3656,28 +3754,6 @@ static int qm_set_vf_mse(struct hisi_qm *qm, bool set)
 	return -ETIMEDOUT;
 }
 
-static int qm_set_msi(struct hisi_qm *qm, bool set)
-{
-	struct pci_dev *pdev = qm->pdev;
-
-	if (set) {
-		pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
-				       0);
-	} else {
-		pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
-				       ACC_PEH_MSI_DISABLE);
-		if (qm->err_status.is_qm_ecc_mbit ||
-		    qm->err_status.is_dev_ecc_mbit)
-			return 0;
-
-		mdelay(1);
-		if (readl(qm->io_base + QM_PEH_DFX_INFO0))
-			return -EFAULT;
-	}
-
-	return 0;
-}
-
 static int qm_vf_reset_prepare(struct hisi_qm *qm,
 			       enum qm_stop_reason stop_reason)
 {
@@ -3800,7 +3876,7 @@ static int qm_soft_reset(struct hisi_qm *qm)
 		}
 	}
 
-	ret = qm_set_msi(qm, false);
+	ret = qm->ops->set_msi(qm, false);
 	if (ret) {
 		pci_err(pdev, "Fails to disable PEH MSI bit.\n");
 		return ret;
@@ -3941,7 +4017,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
 	struct pci_dev *pdev = qm->pdev;
 	int ret;
 
-	ret = qm_set_msi(qm, true);
+	ret = qm->ops->set_msi(qm, true);
 	if (ret) {
 		pci_err(pdev, "Fails to enable PEH MSI bit!\n");
 		return ret;
-- 
GitLab


From a5c164b195a89aedc8179d68cedf00e7f8baa58e Mon Sep 17 00:00:00 2001
From: Longfang Liu <liulongfang@huawei.com>
Date: Sat, 22 May 2021 15:30:04 +0800
Subject: [PATCH 1803/3804] crypto: hisilicon/qm - support address prefetching

Kunpeng930 hardware supports address prefetching to improve performance
before doing tasks in SVA scenario.

This patch enables this function in device initialization by writing
hardware registers. In the process of reset, address prefetching is
disabled to avoid the failure of interaction between accelerator device
and SMMU.

Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c | 50 +++++++++++++++++++++++
 drivers/crypto/hisilicon/qm.c             | 35 ++++++++++++++++
 drivers/crypto/hisilicon/qm.h             |  2 +
 drivers/crypto/hisilicon/sec2/sec_main.c  | 45 ++++++++++++++++++++
 drivers/crypto/hisilicon/zip/zip_main.c   | 50 +++++++++++++++++++++++
 5 files changed, 182 insertions(+)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 13323baf393e8..37c5296008474 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -75,6 +75,11 @@
 #define HPRE_BD_USR_MASK		GENMASK(1, 0)
 #define HPRE_CLUSTER_CORE_MASK_V2	GENMASK(3, 0)
 #define HPRE_CLUSTER_CORE_MASK_V3	GENMASK(7, 0)
+#define HPRE_PREFETCH_CFG		0x301130
+#define HPRE_SVA_PREFTCH_DFX		0x30115C
+#define HPRE_PREFETCH_ENABLE		(~(BIT(0) | BIT(30)))
+#define HPRE_PREFETCH_DISABLE		BIT(30)
+#define HPRE_SVA_DISABLE_READY		(BIT(4) | BIT(8))
 
 #define HPRE_AM_OOO_SHUTDOWN_ENB	0x301044
 #define HPRE_AM_OOO_SHUTDOWN_ENABLE	BIT(0)
@@ -370,6 +375,47 @@ static void disable_flr_of_bme(struct hisi_qm *qm)
 	writel(PEH_AXUSER_CFG_ENABLE, qm->io_base + QM_PEH_AXUSER_CFG_ENABLE);
 }
 
+static void hpre_open_sva_prefetch(struct hisi_qm *qm)
+{
+	u32 val;
+	int ret;
+
+	if (qm->ver < QM_HW_V3)
+		return;
+
+	/* Enable prefetch */
+	val = readl_relaxed(qm->io_base + HPRE_PREFETCH_CFG);
+	val &= HPRE_PREFETCH_ENABLE;
+	writel(val, qm->io_base + HPRE_PREFETCH_CFG);
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + HPRE_PREFETCH_CFG,
+					 val, !(val & HPRE_PREFETCH_DISABLE),
+					 HPRE_REG_RD_INTVRL_US,
+					 HPRE_REG_RD_TMOUT_US);
+	if (ret)
+		pci_err(qm->pdev, "failed to open sva prefetch\n");
+}
+
+static void hpre_close_sva_prefetch(struct hisi_qm *qm)
+{
+	u32 val;
+	int ret;
+
+	if (qm->ver < QM_HW_V3)
+		return;
+
+	val = readl_relaxed(qm->io_base + HPRE_PREFETCH_CFG);
+	val |= HPRE_PREFETCH_DISABLE;
+	writel(val, qm->io_base + HPRE_PREFETCH_CFG);
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + HPRE_SVA_PREFTCH_DFX,
+					 val, !(val & HPRE_SVA_DISABLE_READY),
+					 HPRE_REG_RD_INTVRL_US,
+					 HPRE_REG_RD_TMOUT_US);
+	if (ret)
+		pci_err(qm->pdev, "failed to close sva prefetch\n");
+}
+
 static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
 {
 	struct device *dev = &qm->pdev->dev;
@@ -876,6 +922,8 @@ static const struct hisi_qm_err_ini hpre_err_ini = {
 	.clear_dev_hw_err_status = hpre_clear_hw_err_status,
 	.log_dev_hw_err		= hpre_log_hw_error,
 	.open_axi_master_ooo	= hpre_open_axi_master_ooo,
+	.open_sva_prefetch	= hpre_open_sva_prefetch,
+	.close_sva_prefetch	= hpre_close_sva_prefetch,
 	.err_info_init		= hpre_err_info_init,
 };
 
@@ -888,6 +936,8 @@ static int hpre_pf_probe_init(struct hpre *hpre)
 	if (ret)
 		return ret;
 
+	hpre_open_sva_prefetch(qm);
+
 	qm->err_ini = &hpre_err_ini;
 	qm->err_ini->err_info_init(qm);
 	hisi_qm_dev_err_init(qm);
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index a7cd314073c24..fe35ea949a5bb 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -95,6 +95,7 @@
 #define QM_DOORBELL_SQ_CQ_BASE_V2	0x1000
 #define QM_DOORBELL_EQ_AEQ_BASE_V2	0x2000
 #define QM_QUE_ISO_CFG_V		0x0030
+#define QM_PAGE_SIZE			0x0034
 #define QM_QUE_ISO_EN			0x100154
 #define QM_CAPBILITY			0x100158
 #define QM_QP_NUN_MASK			GENMASK(10, 0)
@@ -796,6 +797,32 @@ static void qm_init_qp_status(struct hisi_qp *qp)
 	atomic_set(&qp_status->used, 0);
 }
 
+static void qm_init_prefetch(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	u32 page_type = 0x0;
+
+	if (qm->ver < QM_HW_V3)
+		return;
+
+	switch (PAGE_SIZE) {
+	case SZ_4K:
+		page_type = 0x0;
+		break;
+	case SZ_16K:
+		page_type = 0x1;
+		break;
+	case SZ_64K:
+		page_type = 0x2;
+		break;
+	default:
+		dev_err(dev, "system page size is not support: %lu, default set to 4KB",
+			PAGE_SIZE);
+	}
+
+	writel(page_type, qm->io_base + QM_PAGE_SIZE);
+}
+
 static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
 			    u32 number)
 {
@@ -2974,6 +3001,8 @@ static int __hisi_qm_start(struct hisi_qm *qm)
 	if (ret)
 		return ret;
 
+	qm_init_prefetch(qm);
+
 	writel(0x0, qm->io_base + QM_VF_EQ_INT_MASK);
 	writel(0x0, qm->io_base + QM_VF_AEQ_INT_MASK);
 
@@ -3898,6 +3927,9 @@ static int qm_soft_reset(struct hisi_qm *qm)
 		return ret;
 	}
 
+	if (qm->err_ini->close_sva_prefetch)
+		qm->err_ini->close_sva_prefetch(qm);
+
 	ret = qm_set_pf_mse(qm, false);
 	if (ret) {
 		pci_err(pdev, "Fails to disable pf MSE bit.\n");
@@ -3967,6 +3999,9 @@ static void qm_restart_prepare(struct hisi_qm *qm)
 {
 	u32 value;
 
+	if (qm->err_ini->open_sva_prefetch)
+		qm->err_ini->open_sva_prefetch(qm);
+
 	if (qm->ver >= QM_HW_V3)
 		return;
 
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index acefdf8b3a50e..9048aa6e5f8ab 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -188,6 +188,8 @@ struct hisi_qm_err_ini {
 	void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts);
 	void (*open_axi_master_ooo)(struct hisi_qm *qm);
 	void (*close_axi_master_ooo)(struct hisi_qm *qm);
+	void (*open_sva_prefetch)(struct hisi_qm *qm);
+	void (*close_sva_prefetch)(struct hisi_qm *qm);
 	void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
 	void (*err_info_init)(struct hisi_qm *qm);
 };
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 6a4408ea18c1c..8ab4e67b8a417 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -85,6 +85,12 @@
 #define SEC_USER1_SMMU_MASK		(~SEC_USER1_SVA_SET)
 #define SEC_CORE_INT_STATUS_M_ECC	BIT(2)
 
+#define SEC_PREFETCH_CFG		0x301130
+#define SEC_SVA_TRANS			0x301EC4
+#define SEC_PREFETCH_ENABLE		(~(BIT(0) | BIT(1) | BIT(11)))
+#define SEC_PREFETCH_DISABLE		BIT(1)
+#define SEC_SVA_DISABLE_READY		(BIT(7) | BIT(11))
+
 #define SEC_DELAY_10_US			10
 #define SEC_POLL_TIMEOUT_US		1000
 #define SEC_DBGFS_VAL_MAX_LEN		20
@@ -332,6 +338,42 @@ static u8 sec_get_endian(struct hisi_qm *qm)
 		return SEC_64BE;
 }
 
+static void sec_open_sva_prefetch(struct hisi_qm *qm)
+{
+	u32 val;
+	int ret;
+
+	if (qm->ver < QM_HW_V3)
+		return;
+
+	/* Enable prefetch */
+	val = readl_relaxed(qm->io_base + SEC_PREFETCH_CFG);
+	val &= SEC_PREFETCH_ENABLE;
+	writel(val, qm->io_base + SEC_PREFETCH_CFG);
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + SEC_PREFETCH_CFG,
+					 val, !(val & SEC_PREFETCH_DISABLE),
+					 SEC_DELAY_10_US, SEC_POLL_TIMEOUT_US);
+	if (ret)
+		pci_err(qm->pdev, "failed to open sva prefetch\n");
+}
+
+static void sec_close_sva_prefetch(struct hisi_qm *qm)
+{
+	u32 val;
+	int ret;
+
+	val = readl_relaxed(qm->io_base + SEC_PREFETCH_CFG);
+	val |= SEC_PREFETCH_DISABLE;
+	writel(val, qm->io_base + SEC_PREFETCH_CFG);
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + SEC_SVA_TRANS,
+					 val, !(val & SEC_SVA_DISABLE_READY),
+					 SEC_DELAY_10_US, SEC_POLL_TIMEOUT_US);
+	if (ret)
+		pci_err(qm->pdev, "failed to close sva prefetch\n");
+}
+
 static int sec_engine_init(struct hisi_qm *qm)
 {
 	int ret;
@@ -751,6 +793,8 @@ static const struct hisi_qm_err_ini sec_err_ini = {
 	.clear_dev_hw_err_status = sec_clear_hw_err_status,
 	.log_dev_hw_err		= sec_log_hw_error,
 	.open_axi_master_ooo	= sec_open_axi_master_ooo,
+	.open_sva_prefetch	= sec_open_sva_prefetch,
+	.close_sva_prefetch	= sec_close_sva_prefetch,
 	.err_info_init		= sec_err_info_init,
 };
 
@@ -766,6 +810,7 @@ static int sec_pf_probe_init(struct sec_dev *sec)
 	if (ret)
 		return ret;
 
+	sec_open_sva_prefetch(qm);
 	hisi_qm_dev_err_init(qm);
 	sec_debug_regs_clear(qm);
 
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 3e23f2a1cf5a5..9e4c49cd6f3ab 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -97,6 +97,14 @@
 #define HZIP_RD_CNT_CLR_CE_EN		(HZIP_CNT_CLR_CE_EN | \
 					 HZIP_RO_CNT_CLR_CE_EN)
 
+#define HZIP_PREFETCH_CFG		0x3011B0
+#define HZIP_SVA_TRANS			0x3011C4
+#define HZIP_PREFETCH_ENABLE		(~(BIT(26) | BIT(17) | BIT(0)))
+#define HZIP_SVA_PREFETCH_DISABLE	BIT(26)
+#define HZIP_SVA_DISABLE_READY		(BIT(26) | BIT(30))
+#define HZIP_DELAY_1_US		1
+#define HZIP_POLL_TIMEOUT_US	1000
+
 static const char hisi_zip_name[] = "hisi_zip";
 static struct dentry *hzip_debugfs_root;
 
@@ -263,6 +271,45 @@ int zip_create_qps(struct hisi_qp **qps, int qp_num, int node)
 	return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
 }
 
+static void hisi_zip_open_sva_prefetch(struct hisi_qm *qm)
+{
+	u32 val;
+	int ret;
+
+	if (qm->ver < QM_HW_V3)
+		return;
+
+	/* Enable prefetch */
+	val = readl_relaxed(qm->io_base + HZIP_PREFETCH_CFG);
+	val &= HZIP_PREFETCH_ENABLE;
+	writel(val, qm->io_base + HZIP_PREFETCH_CFG);
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + HZIP_PREFETCH_CFG,
+					 val, !(val & HZIP_SVA_PREFETCH_DISABLE),
+					 HZIP_DELAY_1_US, HZIP_POLL_TIMEOUT_US);
+	if (ret)
+		pci_err(qm->pdev, "failed to open sva prefetch\n");
+}
+
+static void hisi_zip_close_sva_prefetch(struct hisi_qm *qm)
+{
+	u32 val;
+	int ret;
+
+	if (qm->ver < QM_HW_V3)
+		return;
+
+	val = readl_relaxed(qm->io_base + HZIP_PREFETCH_CFG);
+	val |= HZIP_SVA_PREFETCH_DISABLE;
+	writel(val, qm->io_base + HZIP_PREFETCH_CFG);
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + HZIP_SVA_TRANS,
+					 val, !(val & HZIP_SVA_DISABLE_READY),
+					 HZIP_DELAY_1_US, HZIP_POLL_TIMEOUT_US);
+	if (ret)
+		pci_err(qm->pdev, "failed to close sva prefetch\n");
+}
+
 static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
 {
 	void __iomem *base = qm->io_base;
@@ -696,6 +743,8 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = {
 	.log_dev_hw_err		= hisi_zip_log_hw_error,
 	.open_axi_master_ooo	= hisi_zip_open_axi_master_ooo,
 	.close_axi_master_ooo	= hisi_zip_close_axi_master_ooo,
+	.open_sva_prefetch	= hisi_zip_open_sva_prefetch,
+	.close_sva_prefetch	= hisi_zip_close_sva_prefetch,
 	.err_info_init		= hisi_zip_err_info_init,
 };
 
@@ -714,6 +763,7 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
 	qm->err_ini->err_info_init(qm);
 
 	hisi_zip_set_user_domain_and_cache(qm);
+	hisi_zip_open_sva_prefetch(qm);
 	hisi_qm_dev_err_init(qm);
 	hisi_zip_debug_regs_clear(qm);
 
-- 
GitLab


From fed1bd51a504eb96caa38b4f13ab138fc169ea75 Mon Sep 17 00:00:00 2001
From: Qii Wang <qii.wang@mediatek.com>
Date: Thu, 27 May 2021 20:04:04 +0800
Subject: [PATCH 1804/3804] i2c: mediatek: Disable i2c start_en and clear
 intr_stat brfore reset

The i2c controller driver do dma reset after transfer timeout,
but sometimes dma reset will trigger an unexpected DMA_ERR irq.
It will cause the i2c controller to continuously send interrupts
to the system and cause soft lock-up. So we need to disable i2c
start_en and clear intr_stat to stop i2c controller before dma
reset when transfer timeout.

Fixes: aafced673c06("i2c: mediatek: move dma reset before i2c reset")
Signed-off-by: Qii Wang <qii.wang@mediatek.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-mt65xx.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index 5ddfa4e56ee23..4e9fb6b44436a 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -479,6 +479,11 @@ static void mtk_i2c_clock_disable(struct mtk_i2c *i2c)
 static void mtk_i2c_init_hw(struct mtk_i2c *i2c)
 {
 	u16 control_reg;
+	u16 intr_stat_reg;
+
+	mtk_i2c_writew(i2c, I2C_CHN_CLR_FLAG, OFFSET_START);
+	intr_stat_reg = mtk_i2c_readw(i2c, OFFSET_INTR_STAT);
+	mtk_i2c_writew(i2c, intr_stat_reg, OFFSET_INTR_STAT);
 
 	if (i2c->dev_comp->apdma_sync) {
 		writel(I2C_DMA_WARM_RST, i2c->pdmabase + OFFSET_RST);
-- 
GitLab


From 24990423267ec283b9d86f07f362b753eb9b0ed5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 26 May 2021 08:39:37 -0400
Subject: [PATCH 1805/3804] i2c: s3c2410: fix possible NULL pointer deref on
 read message after write

Interrupt handler processes multiple message write requests one after
another, till the driver message queue is drained.  However if driver
encounters a read message without preceding START, it stops the I2C
transfer as it is an invalid condition for the controller.  At least the
comment describes a requirement "the controller forces us to send a new
START when we change direction".  This stop results in clearing the
message queue (i2c->msg = NULL).

The code however immediately jumped back to label "retry_write" which
dereferenced the "i2c->msg" making it a possible NULL pointer
dereference.

The Coverity analysis:
1. Condition !is_msgend(i2c), taking false branch.
   if (!is_msgend(i2c)) {

2. Condition !is_lastmsg(i2c), taking true branch.
   } else if (!is_lastmsg(i2c)) {

3. Condition i2c->msg->flags & 1, taking true branch.
   if (i2c->msg->flags & I2C_M_RD) {

4. write_zero_model: Passing i2c to s3c24xx_i2c_stop, which sets i2c->msg to NULL.
   s3c24xx_i2c_stop(i2c, -EINVAL);

5. Jumping to label retry_write.
   goto retry_write;

6. var_deref_model: Passing i2c to is_msgend, which dereferences null i2c->msg.
   if (!is_msgend(i2c)) {"

All previous calls to s3c24xx_i2c_stop() in this interrupt service
routine are followed by jumping to end of function (acknowledging
the interrupt and returning).  This seems a reasonable choice also here
since message buffer was entirely emptied.

Addresses-Coverity: Explicit null dereferenced
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-s3c2410.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c
index ab928613afba4..4d82761e1585e 100644
--- a/drivers/i2c/busses/i2c-s3c2410.c
+++ b/drivers/i2c/busses/i2c-s3c2410.c
@@ -480,7 +480,10 @@ static int i2c_s3c_irq_nextbyte(struct s3c24xx_i2c *i2c, unsigned long iicstat)
 					 * forces us to send a new START
 					 * when we change direction
 					 */
+					dev_dbg(i2c->dev,
+						"missing START before write->read\n");
 					s3c24xx_i2c_stop(i2c, -EINVAL);
+					break;
 				}
 
 				goto retry_write;
-- 
GitLab


From 56dde68f85be0a20935bb4ed996db7a7f68b3202 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 28 May 2021 10:58:49 +0200
Subject: [PATCH 1806/3804] Revert "serial: 8250: 8250_omap: Fix possible
 interrupt storm"

This reverts commit 31fae7c8b18c3f8029a2a5dce97a3182c1a167a0.

Tony writes:
	I just noticed this causes the following regression in Linux
	next when pressing a key on uart console after boot at least on
	omap3. This seems to happen on serial_port_in(port, UART_RX) in
	the quirk handling.

So let's drop this.

Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/YLCCJzkkB4N7LTQS@atomide.com
Fixes: 31fae7c8b18c ("serial: 8250: 8250_omap: Fix possible interrupt storm")
Reported-by: Tony Lindgren <tony@atomide.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_omap.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index c71bd766fa564..8ac11eaeca51b 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -104,9 +104,6 @@
 #define UART_OMAP_EFR2			0x23
 #define UART_OMAP_EFR2_TIMEOUT_BEHAVE	BIT(6)
 
-/* RX FIFO occupancy indicator */
-#define UART_OMAP_RX_LVL		0x64
-
 struct omap8250_priv {
 	int line;
 	u8 habit;
@@ -628,15 +625,6 @@ static irqreturn_t omap8250_irq(int irq, void *dev_id)
 	serial8250_rpm_get(up);
 	iir = serial_port_in(port, UART_IIR);
 	ret = serial8250_handle_irq(port, iir);
-	/*
-	 * It is possible that RX TIMEOUT is signalled after FIFO
-	 * has been drained, in which case a dummy read of RX FIFO is
-	 * required to clear RX TIMEOUT condition.
-	 */
-	if ((iir & UART_IIR_RX_TIMEOUT) == UART_IIR_RX_TIMEOUT) {
-		if (serial_port_in(port, UART_OMAP_RX_LVL) == 0)
-			serial_port_in(port, UART_RX);
-	}
 	serial8250_rpm_put(up);
 
 	return IRQ_RETVAL(ret);
-- 
GitLab


From 82123a3d1d5a306fdf50c968a474cc60fe43a80f Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Wed, 19 May 2021 16:17:17 +0530
Subject: [PATCH 1807/3804] powerpc/kprobes: Fix validation of prefixed
 instructions across page boundary

When checking if the probed instruction is the suffix of a prefixed
instruction, we access the instruction at the previous word. If the
probed instruction is the very first word of a module, we can end up
trying to access an invalid page.

Fix this by skipping the check for all instructions at the beginning of
a page. Prefixed instructions cannot cross a 64-byte boundary and as
such, we don't expect to encounter a suffix as the very first word in a
page for kernel text. Even if there are prefixed instructions crossing
a page boundary (from a module, for instance), the instruction will be
illegal, so preventing probing on the suffix of such prefix instructions
isn't worthwhile.

Fixes: b4657f7650ba ("powerpc/kprobes: Don't allow breakpoints on suffixes")
Cc: stable@vger.kernel.org # v5.8+
Reported-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/0df9a032a05576a2fa8e97d1b769af2ff0eafbd6.1621416666.git.naveen.n.rao@linux.vnet.ibm.com
---
 arch/powerpc/kernel/kprobes.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 01ab2163659e4..e8c2a6373157d 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -108,7 +108,6 @@ int arch_prepare_kprobe(struct kprobe *p)
 	int ret = 0;
 	struct kprobe *prev;
 	struct ppc_inst insn = ppc_inst_read((struct ppc_inst *)p->addr);
-	struct ppc_inst prefix = ppc_inst_read((struct ppc_inst *)(p->addr - 1));
 
 	if ((unsigned long)p->addr & 0x03) {
 		printk("Attempt to register kprobe at an unaligned address\n");
@@ -116,7 +115,8 @@ int arch_prepare_kprobe(struct kprobe *p)
 	} else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
 		printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
 		ret = -EINVAL;
-	} else if (ppc_inst_prefixed(prefix)) {
+	} else if ((unsigned long)p->addr & ~PAGE_MASK &&
+		   ppc_inst_prefixed(ppc_inst_read((struct ppc_inst *)(p->addr - 1)))) {
 		printk("Cannot register a kprobe on the second word of prefixed instruction\n");
 		ret = -EINVAL;
 	}
-- 
GitLab


From 8fc4e4aa2bfca8d32e8bc2a01526ea2da450e6cb Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Tue, 25 May 2021 12:07:23 +0530
Subject: [PATCH 1808/3804] perf vendor events powerpc: Fix eventcode of
 power10 JSON events

Fixed the eventcode values in the power10 JSON event files to prepend
"0x" since these are hexadecimal values.

The patch also changes the event description of the PM_EXEC_STALL_LOAD_FINISH
and PM_EXEC_STALL_NTC_FLUSH event and move some events to correct files.

Fixes: 32daa5d7899e ("perf vendor events: Initial JSON/events list for power10 platform")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Paul A. Clarke <pc@us.ibm.com>
Tested-by: Nageswara R Sastry <rnsastry@linux.ibm.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lore.kernel.org/lkml/20210525063723.1191514-1-kjain@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/powerpc/power10/cache.json           |  30 ++--
 .../arch/powerpc/power10/floating_point.json  |   2 +-
 .../arch/powerpc/power10/frontend.json        | 124 ++++++++++------
 .../arch/powerpc/power10/locks.json           |   4 +-
 .../arch/powerpc/power10/marked.json          |  61 ++++----
 .../arch/powerpc/power10/memory.json          |  79 +++++-----
 .../arch/powerpc/power10/others.json          | 133 +++++++----------
 .../arch/powerpc/power10/pipeline.json        | 135 +++++++++---------
 .../pmu-events/arch/powerpc/power10/pmc.json  |   8 +-
 .../arch/powerpc/power10/translation.json     |  22 +--
 10 files changed, 299 insertions(+), 299 deletions(-)

diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
index 616f29098c710..605be14f441c8 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/cache.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json
@@ -1,46 +1,56 @@
 [
   {
-    "EventCode": "1003C",
+    "EventCode": "0x1003C",
     "EventName": "PM_EXEC_STALL_DMISS_L2L3",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3."
   },
   {
-    "EventCode": "34056",
+    "EventCode": "0x1E054",
+    "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
+  },
+  {
+    "EventCode": "0x34054",
+    "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
+  },
+  {
+    "EventCode": "0x34056",
     "EventName": "PM_EXEC_STALL_LOAD_FINISH",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ."
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ; cycles in which the NTF instruction is waiting for a data reload for a load miss, but the data comes back with a non-NTF instruction."
   },
   {
-    "EventCode": "3006C",
+    "EventCode": "0x3006C",
     "EventName": "PM_RUN_CYC_SMT2_MODE",
     "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode."
   },
   {
-    "EventCode": "300F4",
+    "EventCode": "0x300F4",
     "EventName": "PM_RUN_INST_CMPL_CONC",
     "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set."
   },
   {
-    "EventCode": "4C016",
+    "EventCode": "0x4C016",
     "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict."
   },
   {
-    "EventCode": "4D014",
+    "EventCode": "0x4D014",
     "EventName": "PM_EXEC_STALL_LOAD",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit."
   },
   {
-    "EventCode": "4D016",
+    "EventCode": "0x4D016",
     "EventName": "PM_EXEC_STALL_PTESYNC",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit."
   },
   {
-    "EventCode": "401EA",
+    "EventCode": "0x401EA",
     "EventName": "PM_THRESH_EXC_128",
     "BriefDescription": "Threshold counter exceeded a value of 128."
   },
   {
-    "EventCode": "400F6",
+    "EventCode": "0x400F6",
     "EventName": "PM_BR_MPRED_CMPL",
     "BriefDescription": "A mispredicted branch completed. Includes direction and target."
   }
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
index 703cd431ae5b0..54acb55e2c8c6 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json
@@ -1,6 +1,6 @@
 [
   {
-    "EventCode": "4016E",
+    "EventCode": "0x4016E",
     "EventName": "PM_THRESH_NOT_MET",
     "BriefDescription": "Threshold counter did not meet threshold."
   }
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
index eac8609dcc90d..558f9530f54ec 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json
@@ -1,216 +1,246 @@
 [
   {
-    "EventCode": "10004",
+    "EventCode": "0x10004",
     "EventName": "PM_EXEC_STALL_TRANSLATION",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve."
   },
   {
-    "EventCode": "10010",
+    "EventCode": "0x10006",
+    "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
+    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
+  },
+  {
+    "EventCode": "0x10010",
     "EventName": "PM_PMC4_OVERFLOW",
     "BriefDescription": "The event selected for PMC4 caused the event counter to overflow."
   },
   {
-    "EventCode": "10020",
+    "EventCode": "0x10020",
     "EventName": "PM_PMC4_REWIND",
     "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged."
   },
   {
-    "EventCode": "10038",
+    "EventCode": "0x10038",
     "EventName": "PM_DISP_STALL_TRANSLATION",
     "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss."
   },
   {
-    "EventCode": "1003A",
+    "EventCode": "0x1003A",
     "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2",
     "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict."
   },
   {
-    "EventCode": "1E050",
+    "EventCode": "0x1D05E",
+    "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
+    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
+  },
+  {
+    "EventCode": "0x1E050",
     "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC",
     "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR."
   },
   {
-    "EventCode": "1F054",
+    "EventCode": "0x1F054",
     "EventName": "PM_DTLB_HIT",
     "BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT."
   },
   {
-    "EventCode": "101E8",
+    "EventCode": "0x10064",
+    "EventName": "PM_DISP_STALL_IC_L2",
+    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+  },
+  {
+    "EventCode": "0x101E8",
     "EventName": "PM_THRESH_EXC_256",
     "BriefDescription": "Threshold counter exceeded a count of 256."
   },
   {
-    "EventCode": "101EC",
+    "EventCode": "0x101EC",
     "EventName": "PM_THRESH_MET",
     "BriefDescription": "Threshold exceeded."
   },
   {
-    "EventCode": "100F2",
+    "EventCode": "0x100F2",
     "EventName": "PM_1PLUS_PPC_CMPL",
     "BriefDescription": "Cycles in which at least one instruction is completed by this thread."
   },
   {
-    "EventCode": "100F6",
+    "EventCode": "0x100F6",
     "EventName": "PM_IERAT_MISS",
     "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event."
   },
   {
-    "EventCode": "100F8",
+    "EventCode": "0x100F8",
     "EventName": "PM_DISP_STALL_CYC",
     "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)."
   },
   {
-    "EventCode": "20114",
+    "EventCode": "0x20006",
+    "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
+    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
+  },
+  {
+    "EventCode": "0x20114",
     "EventName": "PM_MRK_L2_RC_DISP",
     "BriefDescription": "Marked instruction RC dispatched in L2."
   },
   {
-    "EventCode": "2C010",
+    "EventCode": "0x2C010",
     "EventName": "PM_EXEC_STALL_LSU",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions."
   },
   {
-    "EventCode": "2C016",
+    "EventCode": "0x2C016",
     "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS",
     "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss."
   },
   {
-    "EventCode": "2C01E",
+    "EventCode": "0x2C01E",
     "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3",
     "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict."
   },
   {
-    "EventCode": "2D01A",
+    "EventCode": "0x2D01A",
     "EventName": "PM_DISP_STALL_IC_MISS",
     "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss."
   },
   {
-    "EventCode": "2D01C",
-    "EventName": "PM_CMPL_STALL_STCX",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
-  },
-  {
-    "EventCode": "2E018",
+    "EventCode": "0x2E018",
     "EventName": "PM_DISP_STALL_FETCH",
     "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held."
   },
   {
-    "EventCode": "2E01A",
+    "EventCode": "0x2E01A",
     "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC",
     "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full."
   },
   {
-    "EventCode": "2C142",
+    "EventCode": "0x2C142",
     "EventName": "PM_MRK_XFER_FROM_SRC_PMC2",
     "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "24050",
+    "EventCode": "0x24050",
     "EventName": "PM_IOPS_DISP",
     "BriefDescription": "Internal Operations dispatched. PM_IOPS_DISP / PM_INST_DISP will show the average number of internal operations per PowerPC instruction."
   },
   {
-    "EventCode": "2405E",
+    "EventCode": "0x2405E",
     "EventName": "PM_ISSUE_CANCEL",
     "BriefDescription": "An instruction issued and the issue was later cancelled. Only one cancel per PowerPC instruction."
   },
   {
-    "EventCode": "200FA",
+    "EventCode": "0x200FA",
     "EventName": "PM_BR_TAKEN_CMPL",
     "BriefDescription": "Branch Taken instruction completed."
   },
   {
-    "EventCode": "30012",
+    "EventCode": "0x30004",
+    "EventName": "PM_DISP_STALL_FLUSH",
+    "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
+  },
+  {
+    "EventCode": "0x3000A",
+    "EventName": "PM_DISP_STALL_ITLB_MISS",
+    "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss."
+  },
+  {
+    "EventCode": "0x30012",
     "EventName": "PM_FLUSH_COMPLETION",
     "BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush."
   },
   {
-    "EventCode": "30014",
+    "EventCode": "0x30014",
     "EventName": "PM_EXEC_STALL_STORE",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit."
   },
   {
-    "EventCode": "30018",
+    "EventCode": "0x30018",
     "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC",
     "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together."
   },
   {
-    "EventCode": "30026",
+    "EventCode": "0x30026",
     "EventName": "PM_EXEC_STALL_STORE_MISS",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1."
   },
   {
-    "EventCode": "3012A",
+    "EventCode": "0x3012A",
     "EventName": "PM_MRK_L2_RC_DONE",
     "BriefDescription": "L2 RC machine completed the transaction for the marked instruction."
   },
   {
-    "EventCode": "3F046",
+    "EventCode": "0x3F046",
     "EventName": "PM_ITLB_HIT_1G",
     "BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "34058",
+    "EventCode": "0x34058",
     "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS",
     "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss."
   },
   {
-    "EventCode": "3D05C",
+    "EventCode": "0x3D05C",
     "EventName": "PM_DISP_STALL_HELD_RENAME_CYC",
     "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC."
   },
   {
-    "EventCode": "3E052",
+    "EventCode": "0x3E052",
     "EventName": "PM_DISP_STALL_IC_L3",
     "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3."
   },
   {
-    "EventCode": "3E054",
+    "EventCode": "0x3E054",
     "EventName": "PM_LD_MISS_L1",
     "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
   },
   {
-    "EventCode": "301EA",
+    "EventCode": "0x301EA",
     "EventName": "PM_THRESH_EXC_1024",
     "BriefDescription": "Threshold counter exceeded a value of 1024."
   },
   {
-    "EventCode": "300FA",
+    "EventCode": "0x300FA",
     "EventName": "PM_INST_FROM_L3MISS",
     "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
   },
   {
-    "EventCode": "40006",
+    "EventCode": "0x40006",
     "EventName": "PM_ISSUE_KILL",
     "BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group."
   },
   {
-    "EventCode": "40116",
+    "EventCode": "0x40116",
     "EventName": "PM_MRK_LARX_FIN",
     "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
   },
   {
-    "EventCode": "4C010",
+    "EventCode": "0x4C010",
     "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS",
     "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch."
   },
   {
-    "EventCode": "4D01E",
+    "EventCode": "0x4D01E",
     "EventName": "PM_DISP_STALL_BR_MPRED",
     "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch."
   },
   {
-    "EventCode": "4E010",
+    "EventCode": "0x4E010",
     "EventName": "PM_DISP_STALL_IC_L3MISS",
     "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3."
   },
   {
-    "EventCode": "4E01A",
+    "EventCode": "0x4E01A",
     "EventName": "PM_DISP_STALL_HELD_CYC",
     "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason."
   },
   {
-    "EventCode": "44056",
+    "EventCode": "0x4003C",
+    "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
+    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
+  },
+  {
+    "EventCode": "0x44056",
     "EventName": "PM_VECTOR_ST_CMPL",
     "BriefDescription": "Vector store instructions completed."
   }
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/locks.json b/tools/perf/pmu-events/arch/powerpc/power10/locks.json
index 016d8de0e14ac..b5a0d65219631 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/locks.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/locks.json
@@ -1,11 +1,11 @@
 [
   {
-    "EventCode": "1E058",
+    "EventCode": "0x1E058",
     "EventName": "PM_STCX_FAIL_FIN",
     "BriefDescription": "Conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock."
   },
   {
-    "EventCode": "4E050",
+    "EventCode": "0x4E050",
     "EventName": "PM_STCX_PASS_FIN",
     "BriefDescription": "Conditional store instruction (STCX) passed. LARX and STCX are instructions used to acquire a lock."
   }
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
index 93a5a59106480..58b5dfe3a2731 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/marked.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json
@@ -1,146 +1,141 @@
 [
   {
-    "EventCode": "1002C",
+    "EventCode": "0x1002C",
     "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS",
     "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request."
   },
   {
-    "EventCode": "10132",
+    "EventCode": "0x10132",
     "EventName": "PM_MRK_INST_ISSUED",
     "BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction."
   },
   {
-    "EventCode": "101E0",
+    "EventCode": "0x101E0",
     "EventName": "PM_MRK_INST_DISP",
     "BriefDescription": "The thread has dispatched a randomly sampled marked instruction."
   },
   {
-    "EventCode": "101E2",
+    "EventCode": "0x101E2",
     "EventName": "PM_MRK_BR_TAKEN_CMPL",
     "BriefDescription": "Marked Branch Taken instruction completed."
   },
   {
-    "EventCode": "20112",
+    "EventCode": "0x20112",
     "EventName": "PM_MRK_NTF_FIN",
     "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch."
   },
   {
-    "EventCode": "2C01C",
+    "EventCode": "0x2C01C",
     "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip."
   },
   {
-    "EventCode": "20138",
+    "EventCode": "0x20138",
     "EventName": "PM_MRK_ST_NEST",
     "BriefDescription": "A store has been sampled/marked and is at the point of execution where it has completed in the core and can no longer be flushed. At this point the store is sent to the L2."
   },
   {
-    "EventCode": "2013A",
+    "EventCode": "0x2013A",
     "EventName": "PM_MRK_BRU_FIN",
     "BriefDescription": "Marked Branch instruction finished."
   },
   {
-    "EventCode": "2C144",
+    "EventCode": "0x2C144",
     "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2",
     "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]."
   },
   {
-    "EventCode": "24156",
+    "EventCode": "0x24156",
     "EventName": "PM_MRK_STCX_FIN",
     "BriefDescription": "Marked conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
   },
   {
-    "EventCode": "24158",
+    "EventCode": "0x24158",
     "EventName": "PM_MRK_INST",
     "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens."
   },
   {
-    "EventCode": "2415C",
+    "EventCode": "0x2415C",
     "EventName": "PM_MRK_BR_CMPL",
     "BriefDescription": "A marked branch completed. All branches are included."
   },
   {
-    "EventCode": "200FD",
+    "EventCode": "0x200FD",
     "EventName": "PM_L1_ICACHE_MISS",
     "BriefDescription": "Demand iCache Miss."
   },
   {
-    "EventCode": "30130",
+    "EventCode": "0x30130",
     "EventName": "PM_MRK_INST_FIN",
     "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU."
   },
   {
-    "EventCode": "34146",
+    "EventCode": "0x34146",
     "EventName": "PM_MRK_LD_CMPL",
     "BriefDescription": "Marked loads completed."
   },
   {
-    "EventCode": "3E158",
+    "EventCode": "0x3E158",
     "EventName": "PM_MRK_STCX_FAIL",
     "BriefDescription": "Marked conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock."
   },
   {
-    "EventCode": "3E15A",
+    "EventCode": "0x3E15A",
     "EventName": "PM_MRK_ST_FIN",
     "BriefDescription": "The marked instruction was a store of any kind."
   },
   {
-    "EventCode": "30068",
+    "EventCode": "0x30068",
     "EventName": "PM_L1_ICACHE_RELOADED_PREF",
     "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)."
   },
   {
-    "EventCode": "301E4",
+    "EventCode": "0x301E4",
     "EventName": "PM_MRK_BR_MPRED_CMPL",
     "BriefDescription": "Marked Branch Mispredicted. Includes direction and target."
   },
   {
-    "EventCode": "300F6",
+    "EventCode": "0x300F6",
     "EventName": "PM_LD_DEMAND_MISS_L1",
     "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish."
   },
   {
-    "EventCode": "300FE",
+    "EventCode": "0x300FE",
     "EventName": "PM_DATA_FROM_L3MISS",
     "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss."
   },
   {
-    "EventCode": "40012",
+    "EventCode": "0x40012",
     "EventName": "PM_L1_ICACHE_RELOADED_ALL",
     "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch."
   },
   {
-    "EventCode": "40134",
+    "EventCode": "0x40134",
     "EventName": "PM_MRK_INST_TIMEO",
     "BriefDescription": "Marked instruction finish timeout (instruction was lost)."
   },
   {
-    "EventCode": "4003C",
-    "EventName": "PM_DISP_STALL_HELD_SYNC_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch."
-  },
-  {
-    "EventCode": "4505A",
+    "EventCode": "0x4505A",
     "EventName": "PM_SP_FLOP_CMPL",
     "BriefDescription": "Single Precision floating point instructions completed."
   },
   {
-    "EventCode": "4D058",
+    "EventCode": "0x4D058",
     "EventName": "PM_VECTOR_FLOP_CMPL",
     "BriefDescription": "Vector floating point instructions completed."
   },
   {
-    "EventCode": "4D05A",
+    "EventCode": "0x4D05A",
     "EventName": "PM_NON_MATH_FLOP_CMPL",
     "BriefDescription": "Non Math instructions completed."
   },
   {
-    "EventCode": "401E0",
+    "EventCode": "0x401E0",
     "EventName": "PM_MRK_INST_CMPL",
     "BriefDescription": "marked instruction completed."
   },
   {
-    "EventCode": "400FE",
+    "EventCode": "0x400FE",
     "EventName": "PM_DATA_FROM_MEMORY",
     "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss."
   }
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
index b01141eeebee1..843b51f531e95 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/memory.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json
@@ -1,191 +1,186 @@
 [
   {
-    "EventCode": "1000A",
+    "EventCode": "0x1000A",
     "EventName": "PM_PMC3_REWIND",
     "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged."
   },
   {
-    "EventCode": "1C040",
+    "EventCode": "0x1C040",
     "EventName": "PM_XFER_FROM_SRC_PMC1",
     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "1C142",
+    "EventCode": "0x1C142",
     "EventName": "PM_MRK_XFER_FROM_SRC_PMC1",
     "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "1C144",
+    "EventCode": "0x1C144",
     "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1",
     "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]."
   },
   {
-    "EventCode": "1C056",
+    "EventCode": "0x1C056",
     "EventName": "PM_DERAT_MISS_4K",
     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "1C058",
+    "EventCode": "0x1C058",
     "EventName": "PM_DTLB_MISS_16G",
     "BriefDescription": "Data TLB reload (after a miss) page size 16G. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "1C05C",
+    "EventCode": "0x1C05C",
     "EventName": "PM_DTLB_MISS_2M",
     "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "1E056",
+    "EventCode": "0x1E056",
     "EventName": "PM_EXEC_STALL_STORE_PIPE",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions."
   },
   {
-    "EventCode": "1F150",
+    "EventCode": "0x1F150",
     "EventName": "PM_MRK_ST_L2_CYC",
     "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion."
   },
   {
-    "EventCode": "10062",
+    "EventCode": "0x10062",
     "EventName": "PM_LD_L3MISS_PEND_CYC",
     "BriefDescription": "Cycles L3 miss was pending for this thread."
   },
   {
-    "EventCode": "20010",
+    "EventCode": "0x20010",
     "EventName": "PM_PMC1_OVERFLOW",
     "BriefDescription": "The event selected for PMC1 caused the event counter to overflow."
   },
   {
-    "EventCode": "2001A",
+    "EventCode": "0x2001A",
     "EventName": "PM_ITLB_HIT",
     "BriefDescription": "The PTE required to translate the instruction address was resident in the TLB (instruction TLB access/IERAT reload). Applies to both HPT and RPT. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "2003E",
+    "EventCode": "0x2003E",
     "EventName": "PM_PTESYNC_FIN",
     "BriefDescription": "Ptesync instruction finished in the store unit. Only one ptesync can finish at a time."
   },
   {
-    "EventCode": "2C040",
+    "EventCode": "0x2C040",
     "EventName": "PM_XFER_FROM_SRC_PMC2",
     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "2C054",
+    "EventCode": "0x2C054",
     "EventName": "PM_DERAT_MISS_64K",
     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "2C056",
+    "EventCode": "0x2C056",
     "EventName": "PM_DTLB_MISS_4K",
     "BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "2D154",
+    "EventCode": "0x2D154",
     "EventName": "PM_MRK_DERAT_MISS_64K",
     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "200F6",
+    "EventCode": "0x200F6",
     "EventName": "PM_DERAT_MISS",
     "BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "3000A",
-    "EventName": "PM_DISP_STALL_ITLB_MISS",
-    "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss."
-  },
-  {
-    "EventCode": "30016",
+    "EventCode": "0x30016",
     "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve."
   },
   {
-    "EventCode": "3C040",
+    "EventCode": "0x3C040",
     "EventName": "PM_XFER_FROM_SRC_PMC3",
     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "3C142",
+    "EventCode": "0x3C142",
     "EventName": "PM_MRK_XFER_FROM_SRC_PMC3",
     "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "3C144",
+    "EventCode": "0x3C144",
     "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3",
     "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]."
   },
   {
-    "EventCode": "3C054",
+    "EventCode": "0x3C054",
     "EventName": "PM_DERAT_MISS_16M",
     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "3C056",
+    "EventCode": "0x3C056",
     "EventName": "PM_DTLB_MISS_64K",
     "BriefDescription": "Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "3C058",
+    "EventCode": "0x3C058",
     "EventName": "PM_LARX_FIN",
     "BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock."
   },
   {
-    "EventCode": "301E2",
+    "EventCode": "0x301E2",
     "EventName": "PM_MRK_ST_CMPL",
     "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores."
   },
   {
-    "EventCode": "300FC",
+    "EventCode": "0x300FC",
     "EventName": "PM_DTLB_MISS",
     "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity."
   },
   {
-    "EventCode": "4D02C",
+    "EventCode": "0x4D02C",
     "EventName": "PM_PMC1_REWIND",
     "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged."
   },
   {
-    "EventCode": "4003E",
+    "EventCode": "0x4003E",
     "EventName": "PM_LD_CMPL",
     "BriefDescription": "Loads completed."
   },
   {
-    "EventCode": "4C040",
+    "EventCode": "0x4C040",
     "EventName": "PM_XFER_FROM_SRC_PMC4",
     "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "4C142",
+    "EventCode": "0x4C142",
     "EventName": "PM_MRK_XFER_FROM_SRC_PMC4",
     "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads."
   },
   {
-    "EventCode": "4C144",
+    "EventCode": "0x4C144",
     "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4",
     "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]."
   },
   {
-    "EventCode": "4C056",
+    "EventCode": "0x4C056",
     "EventName": "PM_DTLB_MISS_16M",
     "BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "4C05A",
+    "EventCode": "0x4C05A",
     "EventName": "PM_DTLB_MISS_1G",
     "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "4C15E",
+    "EventCode": "0x4C15E",
     "EventName": "PM_MRK_DTLB_MISS_64K",
     "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "4D056",
+    "EventCode": "0x4D056",
     "EventName": "PM_NON_FMA_FLOP_CMPL",
     "BriefDescription": "Non FMA instruction completed."
   },
   {
-    "EventCode": "40164",
+    "EventCode": "0x40164",
     "EventName": "PM_MRK_DERAT_MISS_2M",
     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   }
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json
index a119e56cbf1c3..7d0de1a2860b4 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/others.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json
@@ -1,296 +1,271 @@
 [
   {
-    "EventCode": "10016",
+    "EventCode": "0x10016",
     "EventName": "PM_VSU0_ISSUE",
     "BriefDescription": "VSU instructions issued to VSU pipe 0."
   },
   {
-    "EventCode": "1001C",
+    "EventCode": "0x1001C",
     "EventName": "PM_ULTRAVISOR_INST_CMPL",
     "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state."
   },
   {
-    "EventCode": "100F0",
+    "EventCode": "0x100F0",
     "EventName": "PM_CYC",
     "BriefDescription": "Processor cycles."
   },
   {
-    "EventCode": "10134",
+    "EventCode": "0x10134",
     "EventName": "PM_MRK_ST_DONE_L2",
     "BriefDescription": "Marked stores completed in L2 (RC machine done)."
   },
   {
-    "EventCode": "1505E",
+    "EventCode": "0x1505E",
     "EventName": "PM_LD_HIT_L1",
     "BriefDescription": "Loads that finished without experiencing an L1 miss."
   },
   {
-    "EventCode": "1D05E",
-    "EventName": "PM_DISP_STALL_HELD_HALT_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management."
-  },
-  {
-    "EventCode": "1E054",
-    "EventName": "PM_EXEC_STALL_DMISS_L21_L31",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip."
-  },
-  {
-    "EventCode": "1E05A",
-    "EventName": "PM_CMPL_STALL_LWSYNC",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
-  },
-  {
-    "EventCode": "1F056",
+    "EventCode": "0x1F056",
     "EventName": "PM_DISP_SS0_2_INSTR_CYC",
     "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions."
   },
   {
-    "EventCode": "1F15C",
+    "EventCode": "0x1F15C",
     "EventName": "PM_MRK_STCX_L2_CYC",
     "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)."
   },
   {
-    "EventCode": "10066",
+    "EventCode": "0x10066",
     "EventName": "PM_ADJUNCT_CYC",
     "BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011."
   },
   {
-    "EventCode": "101E4",
+    "EventCode": "0x101E4",
     "EventName": "PM_MRK_L1_ICACHE_MISS",
     "BriefDescription": "Marked Instruction suffered an icache Miss."
   },
   {
-    "EventCode": "101EA",
+    "EventCode": "0x101EA",
     "EventName": "PM_MRK_L1_RELOAD_VALID",
     "BriefDescription": "Marked demand reload."
   },
   {
-    "EventCode": "100F4",
+    "EventCode": "0x100F4",
     "EventName": "PM_FLOP_CMPL",
     "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops."
   },
   {
-    "EventCode": "100FA",
+    "EventCode": "0x100FA",
     "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC",
     "BriefDescription": "Cycles when at least one thread has the run latch set."
   },
   {
-    "EventCode": "100FC",
+    "EventCode": "0x100FC",
     "EventName": "PM_LD_REF_L1",
     "BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included."
   },
   {
-    "EventCode": "20006",
-    "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue."
-  },
-  {
-    "EventCode": "2000C",
+    "EventCode": "0x2000C",
     "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC",
     "BriefDescription": "Cycles when the run latch is set for all threads."
   },
   {
-    "EventCode": "2E010",
+    "EventCode": "0x2E010",
     "EventName": "PM_ADJUNCT_INST_CMPL",
     "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state."
   },
   {
-    "EventCode": "2E014",
+    "EventCode": "0x2E014",
     "EventName": "PM_STCX_FIN",
     "BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock."
   },
   {
-    "EventCode": "20130",
+    "EventCode": "0x20130",
     "EventName": "PM_MRK_INST_DECODED",
     "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only."
   },
   {
-    "EventCode": "20132",
+    "EventCode": "0x20132",
     "EventName": "PM_MRK_DFU_ISSUE",
     "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time."
   },
   {
-    "EventCode": "20134",
+    "EventCode": "0x20134",
     "EventName": "PM_MRK_FXU_ISSUE",
     "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time."
   },
   {
-    "EventCode": "2505C",
+    "EventCode": "0x2505C",
     "EventName": "PM_VSU_ISSUE",
     "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations."
   },
   {
-    "EventCode": "2F054",
+    "EventCode": "0x2F054",
     "EventName": "PM_DISP_SS1_2_INSTR_CYC",
     "BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions."
   },
   {
-    "EventCode": "2F056",
+    "EventCode": "0x2F056",
     "EventName": "PM_DISP_SS1_4_INSTR_CYC",
     "BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions."
   },
   {
-    "EventCode": "2006C",
+    "EventCode": "0x2006C",
     "EventName": "PM_RUN_CYC_SMT4_MODE",
     "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode."
   },
   {
-    "EventCode": "201E0",
+    "EventCode": "0x201E0",
     "EventName": "PM_MRK_DATA_FROM_MEMORY",
     "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load."
   },
   {
-    "EventCode": "201E4",
+    "EventCode": "0x201E4",
     "EventName": "PM_MRK_DATA_FROM_L3MISS",
     "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load."
   },
   {
-    "EventCode": "201E8",
+    "EventCode": "0x201E8",
     "EventName": "PM_THRESH_EXC_512",
     "BriefDescription": "Threshold counter exceeded a value of 512."
   },
   {
-    "EventCode": "200F2",
+    "EventCode": "0x200F2",
     "EventName": "PM_INST_DISP",
     "BriefDescription": "PowerPC instructions dispatched."
   },
   {
-    "EventCode": "30132",
+    "EventCode": "0x30132",
     "EventName": "PM_MRK_VSU_FIN",
     "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit."
   },
   {
-    "EventCode": "30038",
+    "EventCode": "0x30038",
     "EventName": "PM_EXEC_STALL_DMISS_LMEM",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory."
   },
   {
-    "EventCode": "3F04A",
+    "EventCode": "0x3F04A",
     "EventName": "PM_LSU_ST5_FIN",
     "BriefDescription": "LSU Finished an internal operation in ST2 port."
   },
   {
-    "EventCode": "34054",
-    "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict."
-  },
-  {
-    "EventCode": "3405A",
+    "EventCode": "0x3405A",
     "EventName": "PM_PRIVILEGED_INST_CMPL",
     "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state."
   },
   {
-    "EventCode": "3F150",
+    "EventCode": "0x3F150",
     "EventName": "PM_MRK_ST_DRAIN_CYC",
     "BriefDescription": "cycles to drain st from core to L2."
   },
   {
-    "EventCode": "3F054",
+    "EventCode": "0x3F054",
     "EventName": "PM_DISP_SS0_4_INSTR_CYC",
     "BriefDescription": "Cycles in which Superslice 0 dispatches either 3 or 4 instructions."
   },
   {
-    "EventCode": "3F056",
+    "EventCode": "0x3F056",
     "EventName": "PM_DISP_SS0_8_INSTR_CYC",
     "BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions."
   },
   {
-    "EventCode": "30162",
+    "EventCode": "0x30162",
     "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD",
     "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill."
   },
   {
-    "EventCode": "40114",
+    "EventCode": "0x40114",
     "EventName": "PM_MRK_START_PROBE_NOP_DISP",
     "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0."
   },
   {
-    "EventCode": "4001C",
+    "EventCode": "0x4001C",
     "EventName": "PM_VSU_FIN",
     "BriefDescription": "VSU instructions finished."
   },
   {
-    "EventCode": "4C01A",
+    "EventCode": "0x4C01A",
     "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip."
   },
   {
-    "EventCode": "4D012",
+    "EventCode": "0x4D012",
     "EventName": "PM_PMC3_SAVED",
     "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged."
   },
   {
-    "EventCode": "4D022",
+    "EventCode": "0x4D022",
     "EventName": "PM_HYPERVISOR_INST_CMPL",
     "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state."
   },
   {
-    "EventCode": "4D026",
+    "EventCode": "0x4D026",
     "EventName": "PM_ULTRAVISOR_CYC",
     "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110."
   },
   {
-    "EventCode": "4D028",
+    "EventCode": "0x4D028",
     "EventName": "PM_PRIVILEGED_CYC",
     "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00."
   },
   {
-    "EventCode": "40030",
+    "EventCode": "0x40030",
     "EventName": "PM_INST_FIN",
     "BriefDescription": "Instructions finished."
   },
   {
-    "EventCode": "44146",
+    "EventCode": "0x44146",
     "EventName": "PM_MRK_STCX_CORE_CYC",
     "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2."
   },
   {
-    "EventCode": "44054",
+    "EventCode": "0x44054",
     "EventName": "PM_VECTOR_LD_CMPL",
     "BriefDescription": "Vector load instructions completed."
   },
   {
-    "EventCode": "45054",
+    "EventCode": "0x45054",
     "EventName": "PM_FMA_CMPL",
     "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only."
   },
   {
-    "EventCode": "45056",
+    "EventCode": "0x45056",
     "EventName": "PM_SCALAR_FLOP_CMPL",
     "BriefDescription": "Scalar floating point instructions completed."
   },
   {
-    "EventCode": "4505C",
+    "EventCode": "0x4505C",
     "EventName": "PM_MATH_FLOP_CMPL",
     "BriefDescription": "Math floating point instructions completed."
   },
   {
-    "EventCode": "4D05E",
+    "EventCode": "0x4D05E",
     "EventName": "PM_BR_CMPL",
     "BriefDescription": "A branch completed. All branches are included."
   },
   {
-    "EventCode": "4E15E",
+    "EventCode": "0x4E15E",
     "EventName": "PM_MRK_INST_FLUSHED",
     "BriefDescription": "The marked instruction was flushed."
   },
   {
-    "EventCode": "401E6",
+    "EventCode": "0x401E6",
     "EventName": "PM_MRK_INST_FROM_L3MISS",
     "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction."
   },
   {
-    "EventCode": "401E8",
+    "EventCode": "0x401E8",
     "EventName": "PM_MRK_DATA_FROM_L2MISS",
     "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load."
   },
   {
-    "EventCode": "400F0",
+    "EventCode": "0x400F0",
     "EventName": "PM_LD_DEMAND_MISS_L1_FIN",
     "BriefDescription": "Load Missed L1, counted at finish time."
   },
   {
-    "EventCode": "400FA",
+    "EventCode": "0x400FA",
     "EventName": "PM_RUN_INST_CMPL",
     "BriefDescription": "Completed PowerPC instructions gated by the run latch."
   }
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
index b61b5cc157ee3..b8aded6045faa 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json
@@ -1,296 +1,291 @@
 [
   {
-    "EventCode": "100FE",
+    "EventCode": "0x100FE",
     "EventName": "PM_INST_CMPL",
     "BriefDescription": "PowerPC instructions completed."
   },
   {
-    "EventCode": "10006",
-    "EventName": "PM_DISP_STALL_HELD_OTHER_CYC",
-    "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason."
-  },
-  {
-    "EventCode": "1000C",
+    "EventCode": "0x1000C",
     "EventName": "PM_LSU_LD0_FIN",
     "BriefDescription": "LSU Finished an internal operation in LD0 port."
   },
   {
-    "EventCode": "1000E",
+    "EventCode": "0x1000E",
     "EventName": "PM_MMA_ISSUED",
     "BriefDescription": "MMA instructions issued."
   },
   {
-    "EventCode": "10012",
+    "EventCode": "0x10012",
     "EventName": "PM_LSU_ST0_FIN",
     "BriefDescription": "LSU Finished an internal operation in ST0 port."
   },
   {
-    "EventCode": "10014",
+    "EventCode": "0x10014",
     "EventName": "PM_LSU_ST4_FIN",
     "BriefDescription": "LSU Finished an internal operation in ST4 port."
   },
   {
-    "EventCode": "10018",
+    "EventCode": "0x10018",
     "EventName": "PM_IC_DEMAND_CYC",
     "BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss."
   },
   {
-    "EventCode": "10022",
+    "EventCode": "0x10022",
     "EventName": "PM_PMC2_SAVED",
     "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged."
   },
   {
-    "EventCode": "10024",
+    "EventCode": "0x10024",
     "EventName": "PM_PMC5_OVERFLOW",
     "BriefDescription": "The event selected for PMC5 caused the event counter to overflow."
   },
   {
-    "EventCode": "10058",
+    "EventCode": "0x10058",
     "EventName": "PM_EXEC_STALL_FIN_AT_DISP",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline finished at dispatch and did not require execution in the LSU, BRU or VSU."
   },
   {
-    "EventCode": "1005A",
+    "EventCode": "0x1005A",
     "EventName": "PM_FLUSH_MPRED",
     "BriefDescription": "A flush occurred due to a mispredicted branch. Includes target and direction."
   },
   {
-    "EventCode": "1C05A",
+    "EventCode": "0x1C05A",
     "EventName": "PM_DERAT_MISS_2M",
     "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches."
   },
   {
-    "EventCode": "10064",
-    "EventName": "PM_DISP_STALL_IC_L2",
-    "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2."
+    "EventCode": "0x1E05A",
+    "EventName": "PM_CMPL_STALL_LWSYNC",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete."
   },
   {
-    "EventCode": "10068",
+    "EventCode": "0x10068",
     "EventName": "PM_BR_FIN",
     "BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional."
   },
   {
-    "EventCode": "1006A",
+    "EventCode": "0x1006A",
     "EventName": "PM_FX_LSU_FIN",
     "BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time."
   },
   {
-    "EventCode": "1006C",
+    "EventCode": "0x1006C",
     "EventName": "PM_RUN_CYC_ST_MODE",
     "BriefDescription": "Cycles when the run latch is set and the core is in ST mode."
   },
   {
-    "EventCode": "20004",
+    "EventCode": "0x20004",
     "EventName": "PM_ISSUE_STALL",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet."
   },
   {
-    "EventCode": "2000A",
+    "EventCode": "0x2000A",
     "EventName": "PM_HYPERVISOR_CYC",
     "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010."
   },
   {
-    "EventCode": "2000E",
+    "EventCode": "0x2000E",
     "EventName": "PM_LSU_LD1_FIN",
     "BriefDescription": "LSU Finished an internal operation in LD1 port."
   },
   {
-    "EventCode": "2C014",
+    "EventCode": "0x2C014",
     "EventName": "PM_CMPL_STALL_SPECIAL",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing."
   },
   {
-    "EventCode": "2C018",
+    "EventCode": "0x2C018",
     "EventName": "PM_EXEC_STALL_DMISS_L3MISS",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3."
   },
   {
-    "EventCode": "2D010",
+    "EventCode": "0x2D010",
     "EventName": "PM_LSU_ST1_FIN",
     "BriefDescription": "LSU Finished an internal operation in ST1 port."
   },
   {
-    "EventCode": "2D012",
+    "EventCode": "0x2D012",
     "EventName": "PM_VSU1_ISSUE",
     "BriefDescription": "VSU instructions issued to VSU pipe 1."
   },
   {
-    "EventCode": "2D018",
+    "EventCode": "0x2D018",
     "EventName": "PM_EXEC_STALL_VSU",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)."
   },
   {
-    "EventCode": "2E01E",
+    "EventCode": "0x2D01C",
+    "EventName": "PM_CMPL_STALL_STCX",
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing."
+  },
+  {
+    "EventCode": "0x2E01E",
     "EventName": "PM_EXEC_STALL_NTC_FLUSH",
-    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children."
+    "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children. This event will also count cycles when the previous NTF instruction is still completing and the new NTF instruction is stalled at dispatch."
   },
   {
-    "EventCode": "2013C",
+    "EventCode": "0x2013C",
     "EventName": "PM_MRK_FX_LSU_FIN",
     "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time."
   },
   {
-    "EventCode": "2405A",
+    "EventCode": "0x2405A",
     "EventName": "PM_NTC_FIN",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status."
   },
   {
-    "EventCode": "201E2",
+    "EventCode": "0x201E2",
     "EventName": "PM_MRK_LD_MISS_L1",
     "BriefDescription": "Marked DL1 Demand Miss counted at finish time."
   },
   {
-    "EventCode": "200F4",
+    "EventCode": "0x200F4",
     "EventName": "PM_RUN_CYC",
     "BriefDescription": "Processor cycles gated by the run latch."
   },
   {
-    "EventCode": "30004",
-    "EventName": "PM_DISP_STALL_FLUSH",
-    "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC."
-  },
-  {
-    "EventCode": "30008",
+    "EventCode": "0x30008",
     "EventName": "PM_EXEC_STALL",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category."
   },
   {
-    "EventCode": "3001A",
+    "EventCode": "0x3001A",
     "EventName": "PM_LSU_ST2_FIN",
     "BriefDescription": "LSU Finished an internal operation in ST2 port."
   },
   {
-    "EventCode": "30020",
+    "EventCode": "0x30020",
     "EventName": "PM_PMC2_REWIND",
     "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged."
   },
   {
-    "EventCode": "30022",
+    "EventCode": "0x30022",
     "EventName": "PM_PMC4_SAVED",
     "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged."
   },
   {
-    "EventCode": "30024",
+    "EventCode": "0x30024",
     "EventName": "PM_PMC6_OVERFLOW",
     "BriefDescription": "The event selected for PMC6 caused the event counter to overflow."
   },
   {
-    "EventCode": "30028",
+    "EventCode": "0x30028",
     "EventName": "PM_CMPL_STALL_MEM_ECC",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC."
   },
   {
-    "EventCode": "30036",
+    "EventCode": "0x30036",
     "EventName": "PM_EXEC_STALL_SIMPLE_FX",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit."
   },
   {
-    "EventCode": "3003A",
+    "EventCode": "0x3003A",
     "EventName": "PM_CMPL_STALL_EXCEPTION",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete."
   },
   {
-    "EventCode": "3F044",
+    "EventCode": "0x3F044",
     "EventName": "PM_VSU2_ISSUE",
     "BriefDescription": "VSU instructions issued to VSU pipe 2."
   },
   {
-    "EventCode": "30058",
+    "EventCode": "0x30058",
     "EventName": "PM_TLBIE_FIN",
     "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted."
   },
   {
-    "EventCode": "3D058",
+    "EventCode": "0x3D058",
     "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE",
     "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)."
   },
   {
-    "EventCode": "30066",
+    "EventCode": "0x30066",
     "EventName": "PM_LSU_FIN",
     "BriefDescription": "LSU Finished an internal operation (up to 4 per cycle)."
   },
   {
-    "EventCode": "40004",
+    "EventCode": "0x40004",
     "EventName": "PM_FXU_ISSUE",
     "BriefDescription": "A fixed point instruction was issued to the VSU."
   },
   {
-    "EventCode": "40008",
+    "EventCode": "0x40008",
     "EventName": "PM_NTC_ALL_FIN",
     "BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline."
   },
   {
-    "EventCode": "40010",
+    "EventCode": "0x40010",
     "EventName": "PM_PMC3_OVERFLOW",
     "BriefDescription": "The event selected for PMC3 caused the event counter to overflow."
   },
   {
-    "EventCode": "4C012",
+    "EventCode": "0x4C012",
     "EventName": "PM_EXEC_STALL_DERAT_ONLY_MISS",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve."
   },
   {
-    "EventCode": "4C018",
+    "EventCode": "0x4C018",
     "EventName": "PM_CMPL_STALL",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason."
   },
   {
-    "EventCode": "4C01E",
+    "EventCode": "0x4C01E",
     "EventName": "PM_LSU_ST3_FIN",
     "BriefDescription": "LSU Finished an internal operation in ST3 port."
   },
   {
-    "EventCode": "4D018",
+    "EventCode": "0x4D018",
     "EventName": "PM_EXEC_STALL_BRU",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit."
   },
   {
-    "EventCode": "4D01A",
+    "EventCode": "0x4D01A",
     "EventName": "PM_CMPL_STALL_HWSYNC",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a hwsync waiting for response from L2 before completing."
   },
   {
-    "EventCode": "4D01C",
+    "EventCode": "0x4D01C",
     "EventName": "PM_EXEC_STALL_TLBIEL",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest."
   },
   {
-    "EventCode": "4E012",
+    "EventCode": "0x4E012",
     "EventName": "PM_EXEC_STALL_UNKNOWN",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together."
   },
   {
-    "EventCode": "4D020",
+    "EventCode": "0x4D020",
     "EventName": "PM_VSU3_ISSUE",
     "BriefDescription": "VSU instruction was issued to VSU pipe 3."
   },
   {
-    "EventCode": "40132",
+    "EventCode": "0x40132",
     "EventName": "PM_MRK_LSU_FIN",
     "BriefDescription": "LSU marked instruction finish."
   },
   {
-    "EventCode": "45058",
+    "EventCode": "0x45058",
     "EventName": "PM_IC_MISS_CMPL",
     "BriefDescription": "Non-speculative icache miss, counted at completion."
   },
   {
-    "EventCode": "4D050",
+    "EventCode": "0x4D050",
     "EventName": "PM_VSU_NON_FLOP_CMPL",
     "BriefDescription": "Non-floating point VSU instructions completed."
   },
   {
-    "EventCode": "4D052",
+    "EventCode": "0x4D052",
     "EventName": "PM_2FLOP_CMPL",
     "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed."
   },
   {
-    "EventCode": "400F2",
+    "EventCode": "0x400F2",
     "EventName": "PM_1PLUS_PPC_DISP",
     "BriefDescription": "Cycles at least one Instr Dispatched."
   },
   {
-    "EventCode": "400F8",
+    "EventCode": "0x400F8",
     "EventName": "PM_FLUSH",
     "BriefDescription": "Flush (any type)."
   }
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
index ea122a91ceb0f..b5d1bd39cfb22 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json
@@ -1,21 +1,21 @@
 [
   {
-    "EventCode": "301E8",
+    "EventCode": "0x301E8",
     "EventName": "PM_THRESH_EXC_64",
     "BriefDescription": "Threshold counter exceeded a value of 64."
   },
   {
-    "EventCode": "45050",
+    "EventCode": "0x45050",
     "EventName": "PM_1FLOP_CMPL",
     "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
   },
   {
-    "EventCode": "45052",
+    "EventCode": "0x45052",
     "EventName": "PM_4FLOP_CMPL",
     "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)."
   },
   {
-    "EventCode": "4D054",
+    "EventCode": "0x4D054",
     "EventName": "PM_8FLOP_CMPL",
     "BriefDescription": "Four Double Precision vector instructions completed."
   }
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
index 5a714e3dd71ac..db3766dca07c5 100644
--- a/tools/perf/pmu-events/arch/powerpc/power10/translation.json
+++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json
@@ -1,56 +1,56 @@
 [
   {
-    "EventCode": "1F15E",
+    "EventCode": "0x1F15E",
     "EventName": "PM_MRK_START_PROBE_NOP_CMPL",
     "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed."
   },
   {
-    "EventCode": "20016",
+    "EventCode": "0x20016",
     "EventName": "PM_ST_FIN",
     "BriefDescription": "Store finish count. Includes speculative activity."
   },
   {
-    "EventCode": "20018",
+    "EventCode": "0x20018",
     "EventName": "PM_ST_FWD",
     "BriefDescription": "Store forwards that finished."
   },
   {
-    "EventCode": "2011C",
+    "EventCode": "0x2011C",
     "EventName": "PM_MRK_NTF_CYC",
     "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)."
   },
   {
-    "EventCode": "2E01C",
+    "EventCode": "0x2E01C",
     "EventName": "PM_EXEC_STALL_TLBIE",
     "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit."
   },
   {
-    "EventCode": "201E6",
+    "EventCode": "0x201E6",
     "EventName": "PM_THRESH_EXC_32",
     "BriefDescription": "Threshold counter exceeded a value of 32."
   },
   {
-    "EventCode": "200F0",
+    "EventCode": "0x200F0",
     "EventName": "PM_ST_CMPL",
     "BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)."
   },
   {
-    "EventCode": "200FE",
+    "EventCode": "0x200FE",
     "EventName": "PM_DATA_FROM_L2MISS",
     "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss."
   },
   {
-    "EventCode": "30010",
+    "EventCode": "0x30010",
     "EventName": "PM_PMC2_OVERFLOW",
     "BriefDescription": "The event selected for PMC2 caused the event counter to overflow."
   },
   {
-    "EventCode": "4D010",
+    "EventCode": "0x4D010",
     "EventName": "PM_PMC1_SAVED",
     "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged."
   },
   {
-    "EventCode": "4D05C",
+    "EventCode": "0x4D05C",
     "EventName": "PM_DPP_FLOP_CMPL",
     "BriefDescription": "Double-Precision or Quad-Precision instructions completed."
   }
-- 
GitLab


From 5362a4b6ee6136018558ef6b2c4701aa15ebc602 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 26 May 2021 22:00:05 +1000
Subject: [PATCH 1809/3804] powerpc: Fix reverse map real-mode address lookup
 with huge vmalloc

real_vmalloc_addr() does not currently work for huge vmalloc, which is
what the reverse map can be allocated with for radix host, hash guest.

Extract the hugepage aware equivalent from eeh code into a helper, and
convert existing sites including this one to use it.

Fixes: 8abddd968a30 ("powerpc/64s/radix: Enable huge vmalloc mappings")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210526120005.3432222-1-npiggin@gmail.com
---
 arch/powerpc/include/asm/pte-walk.h  | 29 ++++++++++++++++++++++++++++
 arch/powerpc/kernel/eeh.c            | 23 +---------------------
 arch/powerpc/kernel/io-workarounds.c | 16 +++------------
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  | 15 ++------------
 4 files changed, 35 insertions(+), 48 deletions(-)

diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
index 33fa5dd8ee6a7..714a35f0d425b 100644
--- a/arch/powerpc/include/asm/pte-walk.h
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -31,6 +31,35 @@ static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
 	pgd_t *pgdir = init_mm.pgd;
 	return __find_linux_pte(pgdir, ea, NULL, hshift);
 }
+
+/*
+ * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a
+ * physical address, without taking locks. This can be used in real-mode.
+ */
+static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr)
+{
+	pte_t *ptep;
+	phys_addr_t pa;
+	int hugepage_shift;
+
+	/*
+	 * init_mm does not free page tables, and does not do THP. It may
+	 * have huge pages from huge vmalloc / ioremap etc.
+	 */
+	ptep = find_init_mm_pte(addr, &hugepage_shift);
+	if (WARN_ON(!ptep))
+		return 0;
+
+	pa = PFN_PHYS(pte_pfn(*ptep));
+
+	if (!hugepage_shift)
+		hugepage_shift = PAGE_SHIFT;
+
+	pa |= addr & ((1ul << hugepage_shift) - 1);
+
+	return pa;
+}
+
 /*
  * This is what we should always use. Any other lockless page table lookup needs
  * careful audit against THP split.
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f24cd53ff26e2..3bbdcc86d01ba 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -346,28 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
  */
 static inline unsigned long eeh_token_to_phys(unsigned long token)
 {
-	pte_t *ptep;
-	unsigned long pa;
-	int hugepage_shift;
-
-	/*
-	 * We won't find hugepages here(this is iomem). Hence we are not
-	 * worried about _PAGE_SPLITTING/collapse. Also we will not hit
-	 * page table free, because of init_mm.
-	 */
-	ptep = find_init_mm_pte(token, &hugepage_shift);
-	if (!ptep)
-		return token;
-
-	pa = pte_pfn(*ptep);
-
-	/* On radix we can do hugepage mappings for io, so handle that */
-	if (!hugepage_shift)
-		hugepage_shift = PAGE_SHIFT;
-
-	pa <<= PAGE_SHIFT;
-	pa |= token & ((1ul << hugepage_shift) - 1);
-	return pa;
+	return ppc_find_vmap_phys(token);
 }
 
 /*
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index 51bbaae94cccf..c877f074d1749 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -55,7 +55,6 @@ static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
 #ifdef CONFIG_PPC_INDIRECT_MMIO
 struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
 {
-	unsigned hugepage_shift;
 	struct iowa_bus *bus;
 	int token;
 
@@ -65,22 +64,13 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
 		bus = &iowa_busses[token - 1];
 	else {
 		unsigned long vaddr, paddr;
-		pte_t *ptep;
 
 		vaddr = (unsigned long)PCI_FIX_ADDR(addr);
 		if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
 			return NULL;
-		/*
-		 * We won't find huge pages here (iomem). Also can't hit
-		 * a page table free due to init_mm
-		 */
-		ptep = find_init_mm_pte(vaddr, &hugepage_shift);
-		if (ptep == NULL)
-			paddr = 0;
-		else {
-			WARN_ON(hugepage_shift);
-			paddr = pte_pfn(*ptep) << PAGE_SHIFT;
-		}
+
+		paddr = ppc_find_vmap_phys(vaddr);
+
 		bus = iowa_pci_find(vaddr, paddr);
 
 		if (bus == NULL)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7af7c70f14680..7a0f12404e0ee 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -23,20 +23,9 @@
 #include <asm/pte-walk.h>
 
 /* Translate address of a vmalloc'd thing to a linear map address */
-static void *real_vmalloc_addr(void *x)
+static void *real_vmalloc_addr(void *addr)
 {
-	unsigned long addr = (unsigned long) x;
-	pte_t *p;
-	/*
-	 * assume we don't have huge pages in vmalloc space...
-	 * So don't worry about THP collapse/split. Called
-	 * Only in realmode with MSR_EE = 0, hence won't need irq_save/restore.
-	 */
-	p = find_init_mm_pte(addr, NULL);
-	if (!p || !pte_present(*p))
-		return NULL;
-	addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
-	return __va(addr);
+	return __va(ppc_find_vmap_phys((unsigned long)addr));
 }
 
 /* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
-- 
GitLab


From 1438709e6328925ef496dafd467dbd0353137434 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 26 May 2021 22:58:51 +1000
Subject: [PATCH 1810/3804] KVM: PPC: Book3S HV: Save host FSCR in the P7/8
 path

Similar to commit 25edcc50d76c ("KVM: PPC: Book3S HV: Save and restore
FSCR in the P9 path"), ensure the P7/8 path saves and restores the host
FSCR. The logic explained in that patch actually applies there to the
old path well: a context switch can be made before kvmppc_vcpu_run_hv
restores the host FSCR and returns.

Now both the p9 and the p7/8 paths now save and restore their FSCR, it
no longer needs to be restored at the end of kvmppc_vcpu_run_hv

Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs")
Cc: stable@vger.kernel.org # v3.14+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210526125851.3436735-1-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c            | 1 -
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 7 +++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 28a80d240b764..13728495ac660 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4455,7 +4455,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 		mtspr(SPRN_EBBRR, ebb_regs[1]);
 		mtspr(SPRN_BESCR, ebb_regs[2]);
 		mtspr(SPRN_TAR, user_tar);
-		mtspr(SPRN_FSCR, current->thread.fscr);
 	}
 	mtspr(SPRN_VRSAVE, user_vrsave);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 5e634db4809bf..004f0d4e665f8 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -59,6 +59,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define STACK_SLOT_UAMOR	(SFS-88)
 #define STACK_SLOT_DAWR1	(SFS-96)
 #define STACK_SLOT_DAWRX1	(SFS-104)
+#define STACK_SLOT_FSCR		(SFS-112)
 /* the following is used by the P9 short path */
 #define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
 
@@ -686,6 +687,8 @@ BEGIN_FTR_SECTION
 	std	r6, STACK_SLOT_DAWR0(r1)
 	std	r7, STACK_SLOT_DAWRX0(r1)
 	std	r8, STACK_SLOT_IAMR(r1)
+	mfspr	r5, SPRN_FSCR
+	std	r5, STACK_SLOT_FSCR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 BEGIN_FTR_SECTION
 	mfspr	r6, SPRN_DAWR1
@@ -1663,6 +1666,10 @@ FTR_SECTION_ELSE
 	ld	r7, STACK_SLOT_HFSCR(r1)
 	mtspr	SPRN_HFSCR, r7
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+	ld	r5, STACK_SLOT_FSCR(r1)
+	mtspr	SPRN_FSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	/*
 	 * Restore various registers to 0, where non-zero values
 	 * set by the guest could disrupt the host.
-- 
GitLab


From 8aa0ae439966364da86fc6437375e32f2890c4c3 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Mon, 19 Apr 2021 08:18:09 +0200
Subject: [PATCH 1811/3804] MAINTAINERS: adjust to removing i2c designware
 platform data

Commit 5a517b5bf687 ("i2c: designware: Get rid of legacy platform data")
removes ./include/linux/platform_data/i2c-designware.h, but misses to
adjust the SYNOPSYS DESIGNWARE I2C DRIVER section in MAINTAINERS.

Hence, ./scripts/get_maintainer.pl --self-test=patterns complains:

  warning: no file matches F: include/linux/platform_data/i2c-designware.h

Remove the file entry to this removed file as well.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 81e1edeceae40..e686cf6142627 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17662,7 +17662,6 @@ R:	Mika Westerberg <mika.westerberg@linux.intel.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/busses/i2c-designware-*
-F:	include/linux/platform_data/i2c-designware.h
 
 SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
 M:	Jaehoon Chung <jh80.chung@samsung.com>
-- 
GitLab


From 40cd0aae5957ec175b73dc17dce6079d33fa74f6 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 27 May 2021 15:28:46 -0700
Subject: [PATCH 1812/3804] x86/mce: Include a MCi_MISC value in faked mce logs

When BIOS reports memory errors to Linux using the ACPI/APEI
error reporting method Linux creates a "struct mce" to pass
to the normal reporting code path.

The constructed record doesn't include a value for the "misc"
field of the structure, and so mce_usable_address() says this
record doesn't include a valid address.

Net result is that functions like uc_decode_notifier() will
just ignore this record instead of taking action to offline
a page.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210527222846.931851-1-tony.luck@intel.com
---
 arch/x86/kernel/cpu/mce/apei.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index b58b85380ddb5..0e3ae64d3b76b 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -36,7 +36,8 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 	mce_setup(&m);
 	m.bank = -1;
 	/* Fake a memory read error with unknown channel */
-	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
+	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | MCI_STATUS_MISCV | 0x9f;
+	m.misc = (MCI_MISC_ADDR_PHYS << 6) | PAGE_SHIFT;
 
 	if (severity >= GHES_SEV_RECOVERABLE)
 		m.status |= MCI_STATUS_UC;
-- 
GitLab


From aac902925ea646e461c95edc98a8a57eb0def917 Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Mon, 17 May 2021 12:39:05 -0700
Subject: [PATCH 1813/3804] Documentation: seccomp: Fix user notification
 documentation

The documentation had some previously incorrect information about how
userspace notifications (and responses) were handled due to a change
from a previously proposed patchset.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Acked-by: Tycho Andersen <tycho@tycho.pizza>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210517193908.3113-2-sargun@sargun.me
---
 Documentation/userspace-api/seccomp_filter.rst | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index bd9165241b6c8..6efb41cc80725 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -250,14 +250,14 @@ Users can read via ``ioctl(SECCOMP_IOCTL_NOTIF_RECV)``  (or ``poll()``) on a
 seccomp notification fd to receive a ``struct seccomp_notif``, which contains
 five members: the input length of the structure, a unique-per-filter ``id``,
 the ``pid`` of the task which triggered this request (which may be 0 if the
-task is in a pid ns not visible from the listener's pid namespace), a ``flags``
-member which for now only has ``SECCOMP_NOTIF_FLAG_SIGNALED``, representing
-whether or not the notification is a result of a non-fatal signal, and the
-``data`` passed to seccomp. Userspace can then make a decision based on this
-information about what to do, and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a
-response, indicating what should be returned to userspace. The ``id`` member of
-``struct seccomp_notif_resp`` should be the same ``id`` as in ``struct
-seccomp_notif``.
+task is in a pid ns not visible from the listener's pid namespace). The
+notification also contains the ``data`` passed to seccomp, and a filters flag.
+The structure should be zeroed out prior to calling the ioctl.
+
+Userspace can then make a decision based on this information about what to do,
+and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be
+returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should
+be the same ``id`` as in ``struct seccomp_notif``.
 
 It is worth noting that ``struct seccomp_data`` contains the values of register
 arguments to the syscall, but does not contain pointers to memory. The task's
-- 
GitLab


From e87e46d5f3182f82d997641d95db01a7feacef92 Mon Sep 17 00:00:00 2001
From: Yuan Yao <yuan.yao@intel.com>
Date: Wed, 26 May 2021 14:38:28 +0800
Subject: [PATCH 1814/3804] KVM: X86: Use kvm_get_linear_rip() in single-step
 and #DB/#BP interception

The kvm_get_linear_rip() handles x86/long mode cases well and has
better readability, __kvm_set_rflags() also use the paired
function kvm_is_linear_rip() to check the vcpu->arch.singlestep_rip
set in kvm_arch_vcpu_ioctl_set_guest_debug(), so change the
"CS.BASE + RIP" code in kvm_arch_vcpu_ioctl_set_guest_debug() and
handle_exception_nmi() to this one.

Signed-off-by: Yuan Yao <yuan.yao@intel.com>
Message-Id: <20210526063828.1173-1-yuan.yao@linux.intel.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 5 ++---
 arch/x86/kvm/x86.c     | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 639ec3eba9b80..50b42d7a8a117 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4843,7 +4843,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct kvm_run *kvm_run = vcpu->run;
 	u32 intr_info, ex_no, error_code;
-	unsigned long cr2, rip, dr6;
+	unsigned long cr2, dr6;
 	u32 vect_info;
 
 	vect_info = vmx->idt_vectoring_info;
@@ -4933,8 +4933,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 		vmx->vcpu.arch.event_exit_inst_len =
 			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 		kvm_run->exit_reason = KVM_EXIT_DEBUG;
-		rip = kvm_rip_read(vcpu);
-		kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
+		kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
 		kvm_run->debug.arch.exception = ex_no;
 		break;
 	case AC_VECTOR:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe464b66898ff..2d725567961f5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10120,8 +10120,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	kvm_update_dr7(vcpu);
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
-		vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
-			get_segment_base(vcpu, VCPU_SREG_CS);
+		vcpu->arch.singlestep_rip = kvm_get_linear_rip(vcpu);
 
 	/*
 	 * Trigger an rflags update that will inject or remove the trace
-- 
GitLab


From da6393cdd8aaa354b3a2437cd73ebb34cac958e3 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Thu, 27 May 2021 17:01:36 -0700
Subject: [PATCH 1815/3804] KVM: X86: Fix warning caused by stale emulation
 context

Reported by syzkaller:

  WARNING: CPU: 7 PID: 10526 at linux/arch/x86/kvm//x86.c:7621 x86_emulate_instruction+0x41b/0x510 [kvm]
  RIP: 0010:x86_emulate_instruction+0x41b/0x510 [kvm]
  Call Trace:
   kvm_mmu_page_fault+0x126/0x8f0 [kvm]
   vmx_handle_exit+0x11e/0x680 [kvm_intel]
   vcpu_enter_guest+0xd95/0x1b40 [kvm]
   kvm_arch_vcpu_ioctl_run+0x377/0x6a0 [kvm]
   kvm_vcpu_ioctl+0x389/0x630 [kvm]
   __x64_sys_ioctl+0x8e/0xd0
   do_syscall_64+0x3c/0xb0
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Commit 4a1e10d5b5d8 ("KVM: x86: handle hardware breakpoints during emulation())
adds hardware breakpoints check before emulation the instruction and parts of
emulation context initialization, actually we don't have the EMULTYPE_NO_DECODE flag
here and the emulation context will not be reused. Commit c8848cee74ff ("KVM: x86:
set ctxt->have_exception in x86_decode_insn()) triggers the warning because it
catches the stale emulation context has #UD, however, it is not during instruction
decoding which should result in EMULATION_FAILED. This patch fixes it by moving
the second part emulation context initialization into init_emulate_ctxt() and
before hardware breakpoints check. The ctxt->ud will be dropped by a follow-up
patch.

syzkaller source: https://syzkaller.appspot.com/x/repro.c?x=134683fdd00000

Reported-by: syzbot+71271244f206d17f6441@syzkaller.appspotmail.com
Fixes: 4a1e10d5b5d8 (KVM: x86: handle hardware breakpoints during emulation)
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <1622160097-37633-1-git-send-email-wanpengli@tencent.com>
---
 arch/x86/kvm/x86.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2d725567961f5..622cba2ed6997 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7228,6 +7228,11 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
 	BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
 
+	ctxt->interruptibility = 0;
+	ctxt->have_exception = false;
+	ctxt->exception.vector = -1;
+	ctxt->perm_ok = false;
+
 	init_decode_cache(ctxt);
 	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
@@ -7563,11 +7568,6 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
 	    kvm_vcpu_check_breakpoint(vcpu, &r))
 		return r;
 
-	ctxt->interruptibility = 0;
-	ctxt->have_exception = false;
-	ctxt->exception.vector = -1;
-	ctxt->perm_ok = false;
-
 	ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
 
 	r = x86_decode_insn(ctxt, insn, insn_len);
-- 
GitLab


From b35491e66c87946f380ebf8ab10a7e1f795e5ece Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Thu, 27 May 2021 17:01:37 -0700
Subject: [PATCH 1816/3804] KVM: X86: Kill off ctxt->ud

ctxt->ud is consumed only by x86_decode_insn(), we can kill it off by
passing emulation_type to x86_decode_insn() and dropping ctxt->ud
altogether. Tracking that info in ctxt for literally one call is silly.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <1622160097-37633-2-git-send-email-wanpengli@tencent.com>
---
 arch/x86/kvm/emulate.c     | 5 +++--
 arch/x86/kvm/kvm_emulate.h | 3 +--
 arch/x86/kvm/x86.c         | 4 +---
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8a0ccdb560766..5e5de05a8fbfa 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5111,7 +5111,7 @@ done:
 	return rc;
 }
 
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type)
 {
 	int rc = X86EMUL_CONTINUE;
 	int mode = ctxt->mode;
@@ -5322,7 +5322,8 @@ done_prefixes:
 
 	ctxt->execute = opcode.u.execute;
 
-	if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
+	if (unlikely(emulation_type & EMULTYPE_TRAP_UD) &&
+	    likely(!(ctxt->d & EmulateOnUD)))
 		return EMULATION_FAILED;
 
 	if (unlikely(ctxt->d &
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index f016838faedd6..3e870bf9ca4d5 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -314,7 +314,6 @@ struct x86_emulate_ctxt {
 	int interruptibility;
 
 	bool perm_ok; /* do not check permissions if true */
-	bool ud;	/* inject an #UD if host doesn't support insn */
 	bool tf;	/* TF value before instruction (after for syscall/sysret) */
 
 	bool have_exception;
@@ -491,7 +490,7 @@ enum x86_intercept {
 #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
 #endif
 
-int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len);
+int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len, int emulation_type);
 bool x86_page_table_writing_insn(struct x86_emulate_ctxt *ctxt);
 #define EMULATION_FAILED -1
 #define EMULATION_OK 0
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 622cba2ed6997..1cd6d4685932a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7568,9 +7568,7 @@ int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
 	    kvm_vcpu_check_breakpoint(vcpu, &r))
 		return r;
 
-	ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
-
-	r = x86_decode_insn(ctxt, insn, insn_len);
+	r = x86_decode_insn(ctxt, insn, insn_len, emulation_type);
 
 	trace_kvm_emulate_insn_start(vcpu);
 	++vcpu->stat.insn_emulation;
-- 
GitLab


From 5e6b8a50a7cec5686ee2c4bda1d49899c79a7eae Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 26 May 2021 22:38:05 +0800
Subject: [PATCH 1817/3804] cred: add missing return error code when
 set_cred_ucounts() failed

If set_cred_ucounts() failed, we need return the error code.

Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lkml.kernel.org/r/20210526143805.2549649-1-yangyingliang@huawei.com
Reviewed-by: Alexey Gladkov <legion@kernel.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/cred.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/cred.c b/kernel/cred.c
index dcfa30b337c5a..5a1d9702658ea 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -372,7 +372,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 		ret = create_user_ns(new);
 		if (ret < 0)
 			goto error_put;
-		if (set_cred_ucounts(new) < 0)
+		ret = set_cred_ucounts(new);
+		if (ret < 0)
 			goto error_put;
 	}
 
-- 
GitLab


From b5941f066b4ca331db225a976dae1d6ca8cf0ae3 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 27 May 2021 16:31:37 -0700
Subject: [PATCH 1818/3804] mptcp: fix sk_forward_memory corruption on
 retransmission

MPTCP sk_forward_memory handling is a bit special, as such field
is protected by the msk socket spin_lock, instead of the plain
socket lock.

Currently we have a code path updating such field without handling
the relevant lock:

__mptcp_retrans() -> __mptcp_clean_una_wakeup()

Several helpers in __mptcp_clean_una_wakeup() will update
sk_forward_alloc, possibly causing such field corruption, as reported
by Matthieu.

Address the issue providing and using a new variant of blamed function
which explicitly acquires the msk spin lock.

Fixes: 64b9cea7a0af ("mptcp: fix spurious retransmissions")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/172
Reported-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Tested-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/protocol.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2bc199549a887..5edc686faff15 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -947,6 +947,10 @@ static void __mptcp_update_wmem(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 
+#ifdef CONFIG_LOCKDEP
+	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
+#endif
+
 	if (!msk->wmem_reserved)
 		return;
 
@@ -1085,10 +1089,20 @@ out:
 
 static void __mptcp_clean_una_wakeup(struct sock *sk)
 {
+#ifdef CONFIG_LOCKDEP
+	WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
+#endif
 	__mptcp_clean_una(sk);
 	mptcp_write_space(sk);
 }
 
+static void mptcp_clean_una_wakeup(struct sock *sk)
+{
+	mptcp_data_lock(sk);
+	__mptcp_clean_una_wakeup(sk);
+	mptcp_data_unlock(sk);
+}
+
 static void mptcp_enter_memory_pressure(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow;
@@ -2299,7 +2313,7 @@ static void __mptcp_retrans(struct sock *sk)
 	struct sock *ssk;
 	int ret;
 
-	__mptcp_clean_una_wakeup(sk);
+	mptcp_clean_una_wakeup(sk);
 	dfrag = mptcp_rtx_head(sk);
 	if (!dfrag) {
 		if (mptcp_data_fin_enabled(msk)) {
-- 
GitLab


From 06f9a435b3aa12f4de6da91f11fdce8ce7b46205 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 27 May 2021 16:31:38 -0700
Subject: [PATCH 1819/3804] mptcp: always parse mptcp options for MPC reqsk

In subflow_syn_recv_sock() we currently skip options parsing
for OoO packet, given that such packets may not carry the relevant
MPC option.

If the peer generates an MPC+data TSO packet and some of the early
segments are lost or get reorder, we server will ignore the peer key,
causing transient, unexpected fallback to TCP.

The solution is always parsing the incoming MPTCP options, and
do the fallback only for in-order packets. This actually cleans
the existing code a bit.

Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option")
Reported-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/subflow.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index bde6be77ea73b..c6ee811498290 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -630,21 +630,20 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 
 	/* if the sk is MP_CAPABLE, we try to fetch the client key */
 	if (subflow_req->mp_capable) {
-		if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
-			/* here we can receive and accept an in-window,
-			 * out-of-order pkt, which will not carry the MP_CAPABLE
-			 * opt even on mptcp enabled paths
-			 */
-			goto create_msk;
-		}
-
+		/* we can receive and accept an in-window, out-of-order pkt,
+		 * which may not carry the MP_CAPABLE opt even on mptcp enabled
+		 * paths: always try to extract the peer key, and fallback
+		 * for packets missing it.
+		 * Even OoO DSS packets coming legitly after dropped or
+		 * reordered MPC will cause fallback, but we don't have other
+		 * options.
+		 */
 		mptcp_get_options(skb, &mp_opt);
 		if (!mp_opt.mp_capable) {
 			fallback = true;
 			goto create_child;
 		}
 
-create_msk:
 		new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
 		if (!new_msk)
 			fallback = true;
-- 
GitLab


From dea2b1ea9c705c5ba351a9174403fd83dbb68fc3 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 27 May 2021 16:31:39 -0700
Subject: [PATCH 1820/3804] mptcp: do not reset MP_CAPABLE subflow on mapping
 errors

When some mapping related errors occurs we close the main
MPC subflow with a RST. We should instead fallback gracefully
to TCP, and do the reset only for MPJ subflows.

Fixes: d22f4988ffec ("mptcp: process MP_CAPABLE data option")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/192
Reported-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/subflow.c | 62 +++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 30 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index c6ee811498290..ef3d037f984a9 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1011,21 +1011,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
 
 		status = get_mapping_status(ssk, msk);
 		trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
-		if (status == MAPPING_INVALID) {
-			ssk->sk_err = EBADMSG;
-			goto fatal;
-		}
-		if (status == MAPPING_DUMMY) {
-			__mptcp_do_fallback(msk);
-			skb = skb_peek(&ssk->sk_receive_queue);
-			subflow->map_valid = 1;
-			subflow->map_seq = READ_ONCE(msk->ack_seq);
-			subflow->map_data_len = skb->len;
-			subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
-						   subflow->ssn_offset;
-			subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
-			return true;
-		}
+		if (unlikely(status == MAPPING_INVALID))
+			goto fallback;
+
+		if (unlikely(status == MAPPING_DUMMY))
+			goto fallback;
 
 		if (status != MAPPING_OK)
 			goto no_data;
@@ -1038,10 +1028,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
 		 * MP_CAPABLE-based mapping
 		 */
 		if (unlikely(!READ_ONCE(msk->can_ack))) {
-			if (!subflow->mpc_map) {
-				ssk->sk_err = EBADMSG;
-				goto fatal;
-			}
+			if (!subflow->mpc_map)
+				goto fallback;
 			WRITE_ONCE(msk->remote_key, subflow->remote_key);
 			WRITE_ONCE(msk->ack_seq, subflow->map_seq);
 			WRITE_ONCE(msk->can_ack, true);
@@ -1069,17 +1057,31 @@ static bool subflow_check_data_avail(struct sock *ssk)
 no_data:
 	subflow_sched_work_if_closed(msk, ssk);
 	return false;
-fatal:
-	/* fatal protocol error, close the socket */
-	/* This barrier is coupled with smp_rmb() in tcp_poll() */
-	smp_wmb();
-	ssk->sk_error_report(ssk);
-	tcp_set_state(ssk, TCP_CLOSE);
-	subflow->reset_transient = 0;
-	subflow->reset_reason = MPTCP_RST_EMPTCP;
-	tcp_send_active_reset(ssk, GFP_ATOMIC);
-	subflow->data_avail = 0;
-	return false;
+
+fallback:
+	/* RFC 8684 section 3.7. */
+	if (subflow->mp_join || subflow->fully_established) {
+		/* fatal protocol error, close the socket.
+		 * subflow_error_report() will introduce the appropriate barriers
+		 */
+		ssk->sk_err = EBADMSG;
+		ssk->sk_error_report(ssk);
+		tcp_set_state(ssk, TCP_CLOSE);
+		subflow->reset_transient = 0;
+		subflow->reset_reason = MPTCP_RST_EMPTCP;
+		tcp_send_active_reset(ssk, GFP_ATOMIC);
+		subflow->data_avail = 0;
+		return false;
+	}
+
+	__mptcp_do_fallback(msk);
+	skb = skb_peek(&ssk->sk_receive_queue);
+	subflow->map_valid = 1;
+	subflow->map_seq = READ_ONCE(msk->ack_seq);
+	subflow->map_data_len = skb->len;
+	subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
+	subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+	return true;
 }
 
 bool mptcp_subflow_data_available(struct sock *sk)
-- 
GitLab


From 69ca3d29a75554122b998e8dfa20117766f52f48 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 27 May 2021 16:31:40 -0700
Subject: [PATCH 1821/3804] mptcp: update selftest for fallback due to OoO

The previous commit noted that we can have fallback
scenario due to OoO (or packet drop). Update the self-tests
accordingly

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 3c4cb72ed8a4a..9ca5f1ba461ec 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -501,6 +501,7 @@ do_transfer()
 	local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
 	local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
 	local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+	local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")
 
 	expect_synrx=$((stat_synrx_last_l))
 	expect_ackrx=$((stat_ackrx_last_l))
@@ -518,10 +519,14 @@ do_transfer()
 			"${stat_synrx_now_l}" "${expect_synrx}" 1>&2
 		retc=1
 	fi
-	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then
-		printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
-			"${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
-		rets=1
+	if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then
+		if [ ${stat_ooo_now} -eq 0 ]; then
+			printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \
+				"${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2
+			rets=1
+		else
+			printf "[ Note ] fallback due to TCP OoO"
+		fi
 	fi
 
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
-- 
GitLab


From 7d65f9e80646c595e8c853640a9d0768a33e204c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 25 May 2021 13:08:41 +0200
Subject: [PATCH 1822/3804] x86/apic: Mark _all_ legacy interrupts when IO/APIC
 is missing

PIC interrupts do not support affinity setting and they can end up on
any online CPU. Therefore, it's required to mark the associated vectors
as system-wide reserved. Otherwise, the corresponding irq descriptors
are copied to the secondary CPUs but the vectors are not marked as
assigned or reserved. This works correctly for the IO/APIC case.

When the IO/APIC is disabled via config, kernel command line or lack of
enumeration then all legacy interrupts are routed through the PIC, but
nothing marks them as system-wide reserved vectors.

As a consequence, a subsequent allocation on a secondary CPU can result in
allocating one of these vectors, which triggers the BUG() in
apic_update_vector() because the interrupt descriptor slot is not empty.

Imran tried to work around that by marking those interrupts as allocated
when a CPU comes online. But that's wrong in case that the IO/APIC is
available and one of the legacy interrupts, e.g. IRQ0, has been switched to
PIC mode because then marking them as allocated will fail as they are
already marked as system vectors.

Stay consistent and update the legacy vectors after attempting IO/APIC
initialization and mark them as system vectors in case that no IO/APIC is
available.

Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment")
Reported-by: Imran Khan <imran.f.khan@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20210519233928.2157496-1-imran.f.khan@oracle.com
---
 arch/x86/include/asm/apic.h   |  1 +
 arch/x86/kernel/apic/apic.c   |  1 +
 arch/x86/kernel/apic/vector.c | 20 ++++++++++++++++++++
 3 files changed, 22 insertions(+)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 412b51e059c80..48067af946785 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void)
 extern int setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask);
 extern void lapic_assign_system_vectors(void);
 extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace);
+extern void lapic_update_legacy_vectors(void);
 extern void lapic_online(void);
 extern void lapic_offline(void);
 extern bool apic_needs_pit(void);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 4a39fb429f15b..d262811ce14b5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2604,6 +2604,7 @@ static void __init apic_bsp_setup(bool upmode)
 	end_local_APIC_setup();
 	irq_remap_enable_fault_handling();
 	setup_IO_APIC();
+	lapic_update_legacy_vectors();
 }
 
 #ifdef CONFIG_UP_LATE_INIT
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 6dbdc7c22bb75..fb67ed5e7e6a8 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -738,6 +738,26 @@ void lapic_assign_legacy_vector(unsigned int irq, bool replace)
 	irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
 }
 
+void __init lapic_update_legacy_vectors(void)
+{
+	unsigned int i;
+
+	if (IS_ENABLED(CONFIG_X86_IO_APIC) && nr_ioapics > 0)
+		return;
+
+	/*
+	 * If the IO/APIC is disabled via config, kernel command line or
+	 * lack of enumeration then all legacy interrupts are routed
+	 * through the PIC. Make sure that they are marked as legacy
+	 * vectors. PIC_CASCADE_IRQ has already been marked in
+	 * lapic_assign_system_vectors().
+	 */
+	for (i = 0; i < nr_legacy_irqs(); i++) {
+		if (i != PIC_CASCADE_IR)
+			lapic_assign_legacy_vector(i, true);
+	}
+}
+
 void __init lapic_assign_system_vectors(void)
 {
 	unsigned int i, vector = 0;
-- 
GitLab


From 000ac42953395a4f0a63d5db640c5e4c88a548c5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 May 2021 15:10:58 -0400
Subject: [PATCH 1823/3804] selftests: kvm: fix overlapping addresses in
 memslot_perf_test

vm_create allocates memory and maps it close to GPA.  This memory
is separate from what is allocated in subsequent calls to
vm_userspace_mem_region_add, so it is incorrect to pass the
test memory size to vm_create_default.  Just pass a small
fixed amount of memory which can be used later for page table,
otherwise GPAs are already allocated at MEM_GPA and the
test aborts.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/memslot_perf_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 11239652d8057..9307f25d8130e 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -267,7 +267,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 	data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
 	TEST_ASSERT(data->hva_slots, "malloc() fail");
 
-	data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
+	data->vm = vm_create_default(VCPU_ID, 1024, guest_code);
 
 	pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
 		max_mem_slots - 1, data->pages_per_slot, rempages);
-- 
GitLab


From ddc473916955f7710d1eb17c1273d91c8622a9fe Mon Sep 17 00:00:00 2001
From: Sargun Dhillon <sargun@sargun.me>
Date: Mon, 17 May 2021 12:39:06 -0700
Subject: [PATCH 1824/3804] seccomp: Refactor notification handler to prepare
 for new semantics

This refactors the user notification code to have a do / while loop around
the completion condition. This has a small change in semantic, in that
previously we ignored addfd calls upon wakeup if the notification had been
responded to, but instead with the new change we check for an outstanding
addfd calls prior to returning to userspace.

Rodrigo Campos also identified a bug that can result in addfd causing
an early return, when the supervisor didn't actually handle the
syscall [1].

[1]: https://lore.kernel.org/lkml/20210413160151.3301-1-rodrigo@kinvolk.io/

Fixes: 7cf97b125455 ("seccomp: Introduce addfd ioctl to seccomp user notifier")
Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Acked-by: Tycho Andersen <tycho@tycho.pizza>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Rodrigo Campos <rodrigo@kinvolk.io>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210517193908.3113-3-sargun@sargun.me
---
 kernel/seccomp.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 6ecd3f3a52b5b..9f58049ac16d9 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1105,28 +1105,30 @@ static int seccomp_do_user_notification(int this_syscall,
 
 	up(&match->notif->request);
 	wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
-	mutex_unlock(&match->notify_lock);
 
 	/*
 	 * This is where we wait for a reply from userspace.
 	 */
-wait:
-	err = wait_for_completion_interruptible(&n.ready);
-	mutex_lock(&match->notify_lock);
-	if (err == 0) {
-		/* Check if we were woken up by a addfd message */
+	do {
+		mutex_unlock(&match->notify_lock);
+		err = wait_for_completion_interruptible(&n.ready);
+		mutex_lock(&match->notify_lock);
+		if (err != 0)
+			goto interrupted;
+
 		addfd = list_first_entry_or_null(&n.addfd,
 						 struct seccomp_kaddfd, list);
-		if (addfd && n.state != SECCOMP_NOTIFY_REPLIED) {
+		/* Check if we were woken up by a addfd message */
+		if (addfd)
 			seccomp_handle_addfd(addfd);
-			mutex_unlock(&match->notify_lock);
-			goto wait;
-		}
-		ret = n.val;
-		err = n.error;
-		flags = n.flags;
-	}
 
+	}  while (n.state != SECCOMP_NOTIFY_REPLIED);
+
+	ret = n.val;
+	err = n.error;
+	flags = n.flags;
+
+interrupted:
 	/* If there were any pending addfd calls, clear them out */
 	list_for_each_entry_safe(addfd, tmp, &n.addfd, list) {
 		/* The process went away before we got a chance to handle it */
-- 
GitLab


From ec3a5cb61146c91f0f7dcec8b7e7157a4879a9ee Mon Sep 17 00:00:00 2001
From: Khem Raj <raj.khem@gmail.com>
Date: Fri, 14 May 2021 14:37:41 -0700
Subject: [PATCH 1825/3804] riscv: Use -mno-relax when using lld linker

lld does not implement the RISCV relaxation optimizations like GNU ld
therefore disable it when building with lld, Also pass it to
assembler when using external GNU assembler ( LLVM_IAS != 1 ), this
ensures that relevant assembler option is also enabled along. if these
options are not used then we see following relocations in objects

0000000000000000 R_RISCV_ALIGN     *ABS*+0x0000000000000002

These are then rejected by lld
ld.lld: error: capability.c:(.fixup+0x0): relocation R_RISCV_ALIGN requires unimplemented linker relaxation; recompile with -mno-relax but the .o is already compiled with -mno-relax

Signed-off-by: Khem Raj <raj.khem@gmail.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Makefile | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 3eb9590a07759..4be0206954289 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -38,6 +38,15 @@ else
 	KBUILD_LDFLAGS += -melf32lriscv
 endif
 
+ifeq ($(CONFIG_LD_IS_LLD),y)
+	KBUILD_CFLAGS += -mno-relax
+	KBUILD_AFLAGS += -mno-relax
+ifneq ($(LLVM_IAS),1)
+	KBUILD_CFLAGS += -Wa,-mno-relax
+	KBUILD_AFLAGS += -Wa,-mno-relax
+endif
+endif
+
 # ISA string setting
 riscv-march-$(CONFIG_ARCH_RV32I)	:= rv32ima
 riscv-march-$(CONFIG_ARCH_RV64I)	:= rv64ima
-- 
GitLab


From 216e5835966a709bb87a4d94a7343dd90ab0bd64 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 29 May 2021 12:01:02 +0100
Subject: [PATCH 1826/3804] io_uring: fix misaccounting fix buf pinned pages

As Andres reports "... io_sqe_buffer_register() doesn't initialize imu.
io_buffer_account_pin() does imu->acct_pages++, before calling
io_account_mem(ctx, imu->acct_pages).", leading to evevntual -ENOMEM.

Initialise the field.

Reported-by: Andres Freund <andres@anarazel.de>
Fixes: 41edf1a5ec967 ("io_uring: keep table of pointers to ubufs")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/438a6f46739ae5e05d9c75a0c8fa235320ff367c.1622285901.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 903458afd56c1..42380ed563c41 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8228,6 +8228,7 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
 {
 	int i, ret;
 
+	imu->acct_pages = 0;
 	for (i = 0; i < nr_pages; i++) {
 		if (!PageCompound(pages[i])) {
 			imu->acct_pages++;
-- 
GitLab


From 4ad7935df6a566225c3d51900bde8f2f0f8b6de3 Mon Sep 17 00:00:00 2001
From: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Date: Fri, 28 May 2021 21:51:23 +0300
Subject: [PATCH 1827/3804] ALSA: hda: Add AlderLake-M PCI ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add HD Audio PCI ID for Intel AlderLake-M. Add rules to
snd_intel_dsp_find_config() to choose SOF driver for ADL-M systems with
PCH-DMIC or Soundwire codecs, and legacy driver for the rest.

Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20210528185123.48332-1-kai.vehmanen@linux.intel.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/hda/intel-dsp-config.c | 4 ++++
 sound/pci/hda/hda_intel.c    | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index ab5ff7867eb99..d8be146793eee 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -331,6 +331,10 @@ static const struct config_entry config_table[] = {
 		.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
 		.device = 0x51c8,
 	},
+	{
+		.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+		.device = 0x51cc,
+	},
 #endif
 
 };
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 79ade335c8a09..470753b36c8a1 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2485,6 +2485,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* Alderlake-P */
 	{ PCI_DEVICE(0x8086, 0x51c8),
 	  .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+	/* Alderlake-M */
+	{ PCI_DEVICE(0x8086, 0x51cc),
+	  .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
 	/* Elkhart Lake */
 	{ PCI_DEVICE(0x8086, 0x4b55),
 	  .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
-- 
GitLab


From 02de318afa7a06216570ab69e028751590636a0e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 15 May 2021 15:26:12 +0200
Subject: [PATCH 1828/3804] mt76: mt7615: do not set MT76_STATE_PM at bootstrap

Remove MT76_STATE_PM in mt7615_init_device() and introduce
__mt7663s_mcu_drv_pmctrl for fw loading in mt7663s.
This patch fixes a crash at bootstrap for device (e.g. mt7622) that do
not support runtime-pm

Fixes: 7f2bc8ba11a0 ("mt76: connac: introduce wake counter for fw_pmctrl synchronization")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/e5a2618574007113d844874420f7855891abf167.1621085028.git.lorenzo@kernel.org
---
 .../net/wireless/mediatek/mt76/mt7615/init.c  |  1 -
 .../wireless/mediatek/mt76/mt7615/sdio_mcu.c  | 19 ++++++++++++-------
 .../wireless/mediatek/mt76/mt7615/usb_mcu.c   |  3 ---
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 86341d1f82f36..d20f05a7717d0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -510,7 +510,6 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	mutex_init(&dev->pm.mutex);
 	init_waitqueue_head(&dev->pm.wait);
 	spin_lock_init(&dev->pm.txq_lock);
-	set_bit(MT76_STATE_PM, &dev->mphy.state);
 	INIT_DELAYED_WORK(&dev->mphy.mac_work, mt7615_mac_work);
 	INIT_DELAYED_WORK(&dev->phy.scan_work, mt7615_scan_work);
 	INIT_DELAYED_WORK(&dev->coredump.work, mt7615_coredump_work);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
index 17fe4187d1de0..d1be78b0711c9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/sdio_mcu.c
@@ -51,16 +51,13 @@ mt7663s_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb,
 	return ret;
 }
 
-static int mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
+static int __mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
 {
 	struct sdio_func *func = dev->mt76.sdio.func;
 	struct mt76_phy *mphy = &dev->mt76.phy;
 	u32 status;
 	int ret;
 
-	if (!test_and_clear_bit(MT76_STATE_PM, &mphy->state))
-		goto out;
-
 	sdio_claim_host(func);
 
 	sdio_writel(func, WHLPCR_FW_OWN_REQ_CLR, MCR_WHLPCR, NULL);
@@ -76,13 +73,21 @@ static int mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
 	}
 
 	sdio_release_host(func);
-
-out:
 	dev->pm.last_activity = jiffies;
 
 	return 0;
 }
 
+static int mt7663s_mcu_drv_pmctrl(struct mt7615_dev *dev)
+{
+	struct mt76_phy *mphy = &dev->mt76.phy;
+
+	if (test_and_clear_bit(MT76_STATE_PM, &mphy->state))
+		return __mt7663s_mcu_drv_pmctrl(dev);
+
+	return 0;
+}
+
 static int mt7663s_mcu_fw_pmctrl(struct mt7615_dev *dev)
 {
 	struct sdio_func *func = dev->mt76.sdio.func;
@@ -123,7 +128,7 @@ int mt7663s_mcu_init(struct mt7615_dev *dev)
 	struct mt7615_mcu_ops *mcu_ops;
 	int ret;
 
-	ret = mt7663s_mcu_drv_pmctrl(dev);
+	ret = __mt7663s_mcu_drv_pmctrl(dev);
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_mcu.c
index c55698f9c49af..028ff432d811f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_mcu.c
@@ -55,10 +55,7 @@ int mt7663u_mcu_init(struct mt7615_dev *dev)
 
 	dev->mt76.mcu_ops = &mt7663u_mcu_ops,
 
-	/* usb does not support runtime-pm */
-	clear_bit(MT76_STATE_PM, &dev->mphy.state);
 	mt76_set(dev, MT_UDMA_TX_QSEL, MT_FW_DL_EN);
-
 	if (test_and_clear_bit(MT76_STATE_POWER_OFF, &dev->mphy.state)) {
 		mt7615_mcu_restart(&dev->mt76);
 		if (!mt76_poll_msec(dev, MT_CONN_ON_MISC,
-- 
GitLab


From d4826d17b3931cf0d8351d8f614332dd4b71efc4 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 28 May 2021 14:03:04 +0200
Subject: [PATCH 1829/3804] mt76: mt7921: remove leftover 80+80 HE capability

Fixes interop issues with some APs that disable HE Tx if this is present

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210528120304.34751-1-nbd@nbd.name
---
 drivers/net/wireless/mediatek/mt76/mt7921/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index f4c27aa410486..97a0ef331ac32 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -74,8 +74,7 @@ mt7921_init_he_caps(struct mt7921_phy *phy, enum nl80211_band band,
 				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G;
 		else if (band == NL80211_BAND_5GHZ)
 			he_cap_elem->phy_cap_info[0] =
-				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
-				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G;
 
 		he_cap_elem->phy_cap_info[1] =
 			IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD;
-- 
GitLab


From 593f555fbc6091bbaec8dd2a38b47ee643412e61 Mon Sep 17 00:00:00 2001
From: Sriranjani P <sriranjani.p@samsung.com>
Date: Fri, 28 May 2021 12:40:56 +0530
Subject: [PATCH 1830/3804] net: stmmac: fix kernel panic due to NULL pointer
 dereference of mdio_bus_data

Fixed link does not need mdio bus and in that case mdio_bus_data will
not be allocated. Before using mdio_bus_data we should check for NULL.

This patch fix the kernel panic due to NULL pointer dereference of
mdio_bus_data when it is not allocated.

Without this patch we do see following kernel crash caused due to kernel
NULL pointer dereference.

Call trace:
stmmac_dvr_probe+0x3c/0x10b0
dwc_eth_dwmac_probe+0x224/0x378
platform_probe+0x68/0xe0
really_probe+0x130/0x3d8
driver_probe_device+0x68/0xd0
device_driver_attach+0x74/0x80
__driver_attach+0x58/0xf8
bus_for_each_dev+0x7c/0xd8
driver_attach+0x24/0x30
bus_add_driver+0x148/0x1f0
driver_register+0x64/0x120
__platform_driver_register+0x28/0x38
dwc_eth_dwmac_driver_init+0x1c/0x28
do_one_initcall+0x78/0x158
kernel_init_freeable+0x1f0/0x244
kernel_init+0x14/0x118
ret_from_fork+0x10/0x30
Code: f9002bfb 9113e2d9 910e6273 aa0003f7 (f9405c78)
---[ end trace 32d9d41562ddc081 ]---

Fixes: e5e5b771f684 ("net: stmmac: make in-band AN mode parsing is supported for non-DT")
Signed-off-by: Sriranjani P <sriranjani.p@samsung.com>
Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Link: https://lore.kernel.org/r/20210528071056.35252-1-sriranjani.p@samsung.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5d956a5534345..342bdefcb8b4e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1240,8 +1240,9 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 	priv->phylink_config.dev = &priv->dev->dev;
 	priv->phylink_config.type = PHYLINK_NETDEV;
 	priv->phylink_config.pcs_poll = true;
-	priv->phylink_config.ovr_an_inband =
-		priv->plat->mdio_bus_data->xpcs_an_inband;
+	if (priv->plat->mdio_bus_data)
+		priv->phylink_config.ovr_an_inband =
+			priv->plat->mdio_bus_data->xpcs_an_inband;
 
 	if (!fwnode)
 		fwnode = dev_fwnode(priv->device);
-- 
GitLab


From 8124c8a6b35386f73523d27eacb71b5364a68c4c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 30 May 2021 11:58:25 -1000
Subject: [PATCH 1831/3804] Linux 5.13-rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 82ef373caf4d8..b79e0e8acbe33 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Frozen Wasteland
 
 # *DOCUMENTATION*
-- 
GitLab


From 8982d48af36d2562c0f904736b0fc80efc9f2532 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Mon, 17 May 2021 16:18:26 +0800
Subject: [PATCH 1832/3804] dmaengine: zynqmp_dma: Fix PM reference leak in
 zynqmp_dma_alloc_chan_resourc()

pm_runtime_get_sync will increment pm usage counter even it failed.
Forgetting to putting operation will result in reference leak here.
Fix it by replacing it with pm_runtime_resume_and_get to keep usage
counter balanced.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20210517081826.1564698-4-yukuai3@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/zynqmp_dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
index d8419565b92cc..5fecf5aa6e858 100644
--- a/drivers/dma/xilinx/zynqmp_dma.c
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -468,7 +468,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan)
 	struct zynqmp_dma_desc_sw *desc;
 	int i, ret;
 
-	ret = pm_runtime_get_sync(chan->dev);
+	ret = pm_runtime_resume_and_get(chan->dev);
 	if (ret < 0)
 		return ret;
 
-- 
GitLab


From 83eb4868d325b86e18509d0874e911497667cb54 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Mon, 17 May 2021 16:18:24 +0800
Subject: [PATCH 1833/3804] dmaengine: stm32-mdma: fix PM reference leak in
 stm32_mdma_alloc_chan_resourc()

pm_runtime_get_sync will increment pm usage counter even it failed.
Forgetting to putting operation will result in reference leak here.
Fix it by replacing it with pm_runtime_resume_and_get to keep usage
counter balanced.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20210517081826.1564698-2-yukuai3@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/stm32-mdma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index 36ba8b43e78de..18cbd1e43c2e8 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -1452,7 +1452,7 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c)
 		return -ENOMEM;
 	}
 
-	ret = pm_runtime_get_sync(dmadev->ddev.dev);
+	ret = pm_runtime_resume_and_get(dmadev->ddev.dev);
 	if (ret < 0)
 		return ret;
 
@@ -1718,7 +1718,7 @@ static int stm32_mdma_pm_suspend(struct device *dev)
 	u32 ccr, id;
 	int ret;
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0)
 		return ret;
 
-- 
GitLab


From 32828b82fb875b06511918b139d3a3cd93d34262 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Thu, 20 May 2021 18:24:17 +0300
Subject: [PATCH 1834/3804] dmaengine: xilinx: dpdma: Add missing dependencies
 to Kconfig

The driver depends on both OF and IOMEM support, express those
dependencies in Kconfig. This fixes a build failure on S390 reported by
the 0day bot.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Tested-by: Jianqiang Chen <jianqiang.chen@xilinx.com>
Reviewed-by: Jianqiang Chen <jianqiang.chen@xilinx.com>
Link: https://lore.kernel.org/r/20210520152420.23986-2-laurent.pinchart@ideasonboard.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 6ab9d9a488a6e..e47d4efbe7c53 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -701,6 +701,7 @@ config XILINX_ZYNQMP_DMA
 
 config XILINX_ZYNQMP_DPDMA
 	tristate "Xilinx DPDMA Engine"
+	depends on HAS_IOMEM && OF
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-- 
GitLab


From 9f007e7b6643799e2a6538a5fe04f51c371c6657 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Thu, 20 May 2021 18:24:20 +0300
Subject: [PATCH 1835/3804] dmaengine: xilinx: dpdma: Limit descriptor IDs to
 16 bits

While the descriptor ID is stored in a 32-bit field in the hardware
descriptor, only 16 bits are used by the hardware and are reported
through the XILINX_DPDMA_CH_DESC_ID register. Failure to handle the
wrap-around results in a descriptor ID mismatch after 65536 frames. Fix
it.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Tested-by: Jianqiang Chen <jianqiang.chen@xilinx.com>
Reviewed-by: Jianqiang Chen <jianqiang.chen@xilinx.com>
Link: https://lore.kernel.org/r/20210520152420.23986-5-laurent.pinchart@ideasonboard.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dpdma.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
index ff7dfb3fdeb47..6c709803203ad 100644
--- a/drivers/dma/xilinx/xilinx_dpdma.c
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -113,6 +113,7 @@
 #define XILINX_DPDMA_CH_VDO				0x020
 #define XILINX_DPDMA_CH_PYLD_SZ				0x024
 #define XILINX_DPDMA_CH_DESC_ID				0x028
+#define XILINX_DPDMA_CH_DESC_ID_MASK			GENMASK(15, 0)
 
 /* DPDMA descriptor fields */
 #define XILINX_DPDMA_DESC_CONTROL_PREEMBLE		0xa5
@@ -866,7 +867,8 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan)
 	 * will be used, but it should be enough.
 	 */
 	list_for_each_entry(sw_desc, &desc->descriptors, node)
-		sw_desc->hw.desc_id = desc->vdesc.tx.cookie;
+		sw_desc->hw.desc_id = desc->vdesc.tx.cookie
+				    & XILINX_DPDMA_CH_DESC_ID_MASK;
 
 	sw_desc = list_first_entry(&desc->descriptors,
 				   struct xilinx_dpdma_sw_desc, node);
@@ -1086,7 +1088,8 @@ static void xilinx_dpdma_chan_vsync_irq(struct  xilinx_dpdma_chan *chan)
 	if (!chan->running || !pending)
 		goto out;
 
-	desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID);
+	desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID)
+		& XILINX_DPDMA_CH_DESC_ID_MASK;
 
 	/* If the retrigger raced with vsync, retry at the next frame. */
 	sw_desc = list_first_entry(&pending->descriptors,
-- 
GitLab


From ddf742d4f3f12a6ba1b8e6ecbbf3ae736942f970 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 25 May 2021 12:23:37 -0700
Subject: [PATCH 1836/3804] dmaengine: idxd: Add missing cleanup for early
 error out in probe call

The probe call stack is missing some cleanup when things fail in the
middle. Add the appropriate cleanup routines to make sure we exit
gracefully.

Fixes: a39c7cd0438e ("dmaengine: idxd: removal of pcim managed mmio mapping")
Reported-by: Nikhil Rao <nikhil.rao@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/162197061707.392656.15760573520817310791.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/init.c | 61 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 6201f52f13f5d..2286232ebc7be 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -168,6 +168,32 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
 	return rc;
 }
 
+static void idxd_cleanup_interrupts(struct idxd_device *idxd)
+{
+	struct pci_dev *pdev = idxd->pdev;
+	struct idxd_irq_entry *irq_entry;
+	int i, msixcnt;
+
+	msixcnt = pci_msix_vec_count(pdev);
+	if (msixcnt <= 0)
+		return;
+
+	irq_entry = &idxd->irq_entries[0];
+	free_irq(irq_entry->vector, irq_entry);
+
+	for (i = 1; i < msixcnt; i++) {
+
+		irq_entry = &idxd->irq_entries[i];
+		if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))
+			idxd_device_release_int_handle(idxd, idxd->int_handles[i],
+						       IDXD_IRQ_MSIX);
+		free_irq(irq_entry->vector, irq_entry);
+	}
+
+	idxd_mask_error_interrupts(idxd);
+	pci_free_irq_vectors(pdev);
+}
+
 static int idxd_setup_wqs(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
@@ -304,6 +330,19 @@ static int idxd_setup_groups(struct idxd_device *idxd)
 	return rc;
 }
 
+static void idxd_cleanup_internals(struct idxd_device *idxd)
+{
+	int i;
+
+	for (i = 0; i < idxd->max_groups; i++)
+		put_device(&idxd->groups[i]->conf_dev);
+	for (i = 0; i < idxd->max_engines; i++)
+		put_device(&idxd->engines[i]->conf_dev);
+	for (i = 0; i < idxd->max_wqs; i++)
+		put_device(&idxd->wqs[i]->conf_dev);
+	destroy_workqueue(idxd->wq);
+}
+
 static int idxd_setup_internals(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
@@ -532,12 +571,12 @@ static int idxd_probe(struct idxd_device *idxd)
 		dev_dbg(dev, "Loading RO device config\n");
 		rc = idxd_device_load_config(idxd);
 		if (rc < 0)
-			goto err;
+			goto err_config;
 	}
 
 	rc = idxd_setup_interrupts(idxd);
 	if (rc)
-		goto err;
+		goto err_config;
 
 	dev_dbg(dev, "IDXD interrupt setup complete.\n");
 
@@ -550,6 +589,8 @@ static int idxd_probe(struct idxd_device *idxd)
 	dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
 	return 0;
 
+ err_config:
+	idxd_cleanup_internals(idxd);
  err:
 	if (device_pasid_enabled(idxd))
 		idxd_disable_system_pasid(idxd);
@@ -557,6 +598,18 @@ static int idxd_probe(struct idxd_device *idxd)
 	return rc;
 }
 
+static void idxd_cleanup(struct idxd_device *idxd)
+{
+	struct device *dev = &idxd->pdev->dev;
+
+	perfmon_pmu_remove(idxd);
+	idxd_cleanup_interrupts(idxd);
+	idxd_cleanup_internals(idxd);
+	if (device_pasid_enabled(idxd))
+		idxd_disable_system_pasid(idxd);
+	iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+}
+
 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct device *dev = &pdev->dev;
@@ -609,7 +662,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	rc = idxd_register_devices(idxd);
 	if (rc) {
 		dev_err(dev, "IDXD sysfs setup failed\n");
-		goto err;
+		goto err_dev_register;
 	}
 
 	idxd->state = IDXD_DEV_CONF_READY;
@@ -619,6 +672,8 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	return 0;
 
+ err_dev_register:
+	idxd_cleanup(idxd);
  err:
 	pci_iounmap(pdev, idxd->reg_base);
  err_iomap:
-- 
GitLab


From 253697b93c2a1c237d34d3ae326e394aeb0ca7b3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 21 May 2021 19:13:10 -0700
Subject: [PATCH 1837/3804] dmaengine: ALTERA_MSGDMA depends on HAS_IOMEM

When CONFIG_HAS_IOMEM is not set/enabled, certain iomap() family
functions [including ioremap(), devm_ioremap(), etc.] are not
available.
Drivers that use these functions should depend on HAS_IOMEM so that
they do not cause build errors.

Repairs this build error:
s390-linux-ld: drivers/dma/altera-msgdma.o: in function `request_and_map':
altera-msgdma.c:(.text+0x14b0): undefined reference to `devm_ioremap'

Fixes: a85c6f1b2921 ("dmaengine: Add driver for Altera / Intel mSGDMA IP core")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Stefan Roese <sr@denx.de>
Cc: Vinod Koul <vkoul@kernel.org>
Cc: dmaengine@vger.kernel.org
Reviewed-by: Stefan Roese <sr@denx.de>
Phone: (+49)-8142-66989-51 Fax: (+49)-8142-66989-80 Email: sr@denx.de
Link: https://lore.kernel.org/r/20210522021313.16405-2-rdunlap@infradead.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index e47d4efbe7c53..39b5b46e880f2 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -59,6 +59,7 @@ config DMA_OF
 #devices
 config ALTERA_MSGDMA
 	tristate "Altera / Intel mSGDMA Engine"
+	depends on HAS_IOMEM
 	select DMA_ENGINE
 	help
 	  Enable support for Altera / Intel mSGDMA controller.
-- 
GitLab


From 0cfbb589d67f16fa55b26ae02b69c31b52e344b1 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 21 May 2021 19:13:11 -0700
Subject: [PATCH 1838/3804] dmaengine: QCOM_HIDMA_MGMT depends on HAS_IOMEM

When CONFIG_HAS_IOMEM is not set/enabled, certain iomap() family
functions [including ioremap(), devm_ioremap(), etc.] are not
available.
Drivers that use these functions should depend on HAS_IOMEM so that
they do not cause build errors.

Rectifies these build errors:
s390-linux-ld: drivers/dma/qcom/hidma_mgmt.o: in function `hidma_mgmt_probe':
hidma_mgmt.c:(.text+0x780): undefined reference to `devm_ioremap_resource'
s390-linux-ld: drivers/dma/qcom/hidma_mgmt.o: in function `hidma_mgmt_init':
hidma_mgmt.c:(.init.text+0x126): undefined reference to `of_address_to_resource'
s390-linux-ld: hidma_mgmt.c:(.init.text+0x16e): undefined reference to `of_address_to_resource'

Fixes: 67a2003e0607 ("dmaengine: add Qualcomm Technologies HIDMA channel driver")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Sinan Kaya <okaya@codeaurora.org>
Cc: Vinod Koul <vkoul@kernel.org>
Cc: dmaengine@vger.kernel.org
Link: https://lore.kernel.org/r/20210522021313.16405-3-rdunlap@infradead.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/qcom/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/qcom/Kconfig b/drivers/dma/qcom/Kconfig
index 365f94eb3b081..3f926a653bd88 100644
--- a/drivers/dma/qcom/Kconfig
+++ b/drivers/dma/qcom/Kconfig
@@ -33,6 +33,7 @@ config QCOM_GPI_DMA
 
 config QCOM_HIDMA_MGMT
 	tristate "Qualcomm Technologies HIDMA Management support"
+	depends on HAS_IOMEM
 	select DMA_ENGINE
 	help
 	  Enable support for the Qualcomm Technologies HIDMA Management.
-- 
GitLab


From 8e2e4f3c58528c6040b5762b666734f8cceba568 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 21 May 2021 19:13:12 -0700
Subject: [PATCH 1839/3804] dmaengine: SF_PDMA depends on HAS_IOMEM

When CONFIG_HAS_IOMEM is not set/enabled, certain iomap() family
functions [including ioremap(), devm_ioremap(), etc.] are not
available.
Drivers that use these functions should depend on HAS_IOMEM so that
they do not cause build errors.

Mends this build error:
s390-linux-ld: drivers/dma/sf-pdma/sf-pdma.o: in function `sf_pdma_probe':
sf-pdma.c:(.text+0x1668): undefined reference to `devm_ioremap_resource'

Fixes: 6973886ad58e ("dmaengine: sf-pdma: add platform DMA support for HiFive Unleashed A00")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Green Wan <green.wan@sifive.com>
Cc: Vinod Koul <vkoul@kernel.org>
Cc: dmaengine@vger.kernel.org
Link: https://lore.kernel.org/r/20210522021313.16405-4-rdunlap@infradead.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/sf-pdma/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/sf-pdma/Kconfig b/drivers/dma/sf-pdma/Kconfig
index f8ffa02e279ff..ba46a0a15a936 100644
--- a/drivers/dma/sf-pdma/Kconfig
+++ b/drivers/dma/sf-pdma/Kconfig
@@ -1,5 +1,6 @@
 config SF_PDMA
 	tristate "Sifive PDMA controller driver"
+	depends on HAS_IOMEM
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-- 
GitLab


From fffdaba402cea79b8d219355487d342ec23f91c6 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 18 May 2021 22:11:08 +0800
Subject: [PATCH 1840/3804] dmaengine: stedma40: add missing iounmap() on error
 in d40_probe()

Add the missing iounmap() before return from d40_probe()
in the error handling case.

Fixes: 8d318a50b3d7 ("DMAENGINE: Support for ST-Ericssons DMA40 block v3")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20210518141108.1324127-1-yangyingliang@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ste_dma40.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 265d7c07b348e..e1827393143f1 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -3675,6 +3675,9 @@ static int __init d40_probe(struct platform_device *pdev)
 
 	kfree(base->lcla_pool.base_unaligned);
 
+	if (base->lcpa_base)
+		iounmap(base->lcpa_base);
+
 	if (base->phy_lcpa)
 		release_mem_region(base->phy_lcpa,
 				   base->lcpa_size);
-- 
GitLab


From 12b2aaadb6d5ef77434e8db21f469f46fe2d392e Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 27 May 2021 18:16:38 -0700
Subject: [PATCH 1841/3804] nvme-rdma: fix in-casule data send for chained sgls

We have only 2 inline sg entries and we allow 4 sg entries for the send
wr sge. Larger sgls entries will be chained. However when we build
in-capsule send wr sge, we iterate without taking into account that the
sgl may be chained and still fit in-capsule (which can happen if the sgl
is bigger than 2, but lower-equal to 4).

Fix in-capsule data mapping to correctly iterate chained sgls.

Fixes: 38e1800275d3 ("nvme-rdma: Avoid preallocating big SGL for data")
Reported-by: Walker, Benjamin <benjamin.walker@intel.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 37943dc4c2c11..4697a94c09459 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1320,16 +1320,17 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
 		int count)
 {
 	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
-	struct scatterlist *sgl = req->data_sgl.sg_table.sgl;
 	struct ib_sge *sge = &req->sge[1];
+	struct scatterlist *sgl;
 	u32 len = 0;
 	int i;
 
-	for (i = 0; i < count; i++, sgl++, sge++) {
+	for_each_sg(req->data_sgl.sg_table.sgl, sgl, count, i) {
 		sge->addr = sg_dma_address(sgl);
 		sge->length = sg_dma_len(sgl);
 		sge->lkey = queue->device->pd->local_dma_lkey;
 		len += sge->length;
+		sge++;
 	}
 
 	sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
-- 
GitLab


From a06bc96902617e93920fea4ce376b8aca9dd3326 Mon Sep 17 00:00:00 2001
From: Qiheng Lin <linqiheng@huawei.com>
Date: Fri, 9 Apr 2021 19:02:43 +0800
Subject: [PATCH 1842/3804] soc: amlogic: meson-clk-measure: remove redundant
 dev_err call in meson_msr_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Qiheng Lin <linqiheng@huawei.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lore.kernel.org/r/20210409110243.41-1-linqiheng@huawei.com
---
 drivers/soc/amlogic/meson-clk-measure.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/soc/amlogic/meson-clk-measure.c b/drivers/soc/amlogic/meson-clk-measure.c
index e1957476a0068..6dd190270123f 100644
--- a/drivers/soc/amlogic/meson-clk-measure.c
+++ b/drivers/soc/amlogic/meson-clk-measure.c
@@ -626,10 +626,8 @@ static int meson_msr_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base)) {
-		dev_err(&pdev->dev, "io resource mapping failed\n");
+	if (IS_ERR(base))
 		return PTR_ERR(base);
-	}
 
 	priv->regmap = devm_regmap_init_mmio(&pdev->dev, base,
 					     &meson_clk_msr_regmap_config);
-- 
GitLab


From 4cce442ffe5448ef572adc8b3abe7001b398e709 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 29 Apr 2021 10:38:23 +0200
Subject: [PATCH 1843/3804] arm64: meson: select COMMON_CLK

This fix the recent removal of clock drivers selection.
While it is not necessary to select the clock drivers themselves, we need
to select a proper implementation of the clock API, which for the meson, is
CCF

Fixes: ba66a25536dd ("arm64: meson: ship only the necessary clock controllers")
Reviewed-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lore.kernel.org/r/20210429083823.59546-1-jbrunet@baylibre.com
---
 arch/arm64/Kconfig.platforms | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 6409b47b73e4a..7336c1fd0ddac 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -165,6 +165,7 @@ config ARCH_MEDIATEK
 
 config ARCH_MESON
 	bool "Amlogic Platforms"
+	select COMMON_CLK
 	select MESON_IRQ_GPIO
 	help
 	  This enables support for the arm64 based Amlogic SoCs
-- 
GitLab


From 7c7ad626d9a0ff0a36c1e2a3cfbbc6a13828d5eb Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Thu, 27 May 2021 14:29:15 +0200
Subject: [PATCH 1844/3804] sched/fair: Keep load_avg and load_sum synced

when removing a cfs_rq from the list we only check _sum value so we must
ensure that _avg and _sum stay synced so load_sum can't be null whereas
load_avg is not after propagating load in the cgroup hierarchy.

Use load_avg to compute load_sum similarly to what is done for util_sum
and runnable_sum.

Fixes: 0e2d2aaaae52 ("sched/fair: Rewrite PELT migration propagation")
Reported-by: Odin Ugedal <odin@uged.al>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Odin Ugedal <odin@uged.al>
Link: https://lkml.kernel.org/r/20210527122916.27683-2-vincent.guittot@linaro.org
---
 kernel/sched/fair.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3248e24a90b0f..f4795b8008415 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3499,10 +3499,9 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
 static inline void
 update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
 {
-	long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
+	long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
 	unsigned long load_avg;
 	u64 load_sum = 0;
-	s64 delta_sum;
 	u32 divider;
 
 	if (!runnable_sum)
@@ -3549,13 +3548,13 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 	load_sum = (s64)se_weight(se) * runnable_sum;
 	load_avg = div_s64(load_sum, divider);
 
-	delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
-	delta_avg = load_avg - se->avg.load_avg;
+	delta = load_avg - se->avg.load_avg;
 
 	se->avg.load_sum = runnable_sum;
 	se->avg.load_avg = load_avg;
-	add_positive(&cfs_rq->avg.load_avg, delta_avg);
-	add_positive(&cfs_rq->avg.load_sum, delta_sum);
+
+	add_positive(&cfs_rq->avg.load_avg, delta);
+	cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
 }
 
 static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
-- 
GitLab


From 02da26ad5ed6ea8680e5d01f20661439611ed776 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Thu, 27 May 2021 14:29:16 +0200
Subject: [PATCH 1845/3804] sched/fair: Make sure to update tg contrib for
 blocked load

During the update of fair blocked load (__update_blocked_fair()), we
update the contribution of the cfs in tg->load_avg if cfs_rq's pelt
has decayed.  Nevertheless, the pelt values of a cfs_rq could have
been recently updated while propagating the change of a child. In this
case, cfs_rq's pelt will not decayed because it has already been
updated and we don't update tg->load_avg.

__update_blocked_fair
  ...
  for_each_leaf_cfs_rq_safe: child cfs_rq
    update cfs_rq_load_avg() for child cfs_rq
    ...
    update_load_avg(cfs_rq_of(se), se, 0)
      ...
      update cfs_rq_load_avg() for parent cfs_rq
		-propagation of child's load makes parent cfs_rq->load_sum
		 becoming null
        -UPDATE_TG is not set so it doesn't update parent
		 cfs_rq->tg_load_avg_contrib
  ..
  for_each_leaf_cfs_rq_safe: parent cfs_rq
    update cfs_rq_load_avg() for parent cfs_rq
      - nothing to do because parent cfs_rq has already been updated
		recently so cfs_rq->tg_load_avg_contrib is not updated
    ...
    parent cfs_rq is decayed
      list_del_leaf_cfs_rq parent cfs_rq
	  - but it still contibutes to tg->load_avg

we must set UPDATE_TG flags when propagting pending load to the parent

Fixes: 039ae8bcf7a5 ("sched/fair: Fix O(nr_cgroups) in the load balancing path")
Reported-by: Odin Ugedal <odin@uged.al>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Odin Ugedal <odin@uged.al>
Link: https://lkml.kernel.org/r/20210527122916.27683-3-vincent.guittot@linaro.org
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f4795b8008415..e7c8277e3d54a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8029,7 +8029,7 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
 		/* Propagate pending load changes to the parent, if any: */
 		se = cfs_rq->tg->se[cpu];
 		if (se && !skip_blocked_update(se))
-			update_load_avg(cfs_rq_of(se), se, 0);
+			update_load_avg(cfs_rq_of(se), se, UPDATE_TG);
 
 		/*
 		 * There can be a lot of idle CPU cgroups.  Don't let fully
-- 
GitLab


From f268c3737ecaefcfeecfb4cb5e44958a8976f067 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Thu, 27 May 2021 13:34:41 +0200
Subject: [PATCH 1846/3804] tick/nohz: Only check for RCU deferred wakeup on
 user/guest entry when needed

Checking for and processing RCU-nocb deferred wakeup upon user/guest
entry is only relevant when nohz_full runs on the local CPU, otherwise
the periodic tick should take care of it.

Make sure we don't needlessly pollute these fast-paths as a -3%
performance regression on a will-it-scale.per_process_ops has been
reported so far.

Fixes: 47b8ff194c1f (entry: Explicitly flush pending rcuog wakeup before last rescheduling point)
Fixes: 4ae7dc97f726 (entry/kvm: Explicitly flush pending rcuog wakeup before last rescheduling point)
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Paul E. McKenney <paulmck@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20210527113441.465489-1-frederic@kernel.org
---
 include/linux/entry-kvm.h | 3 ++-
 include/linux/tick.h      | 7 +++++++
 kernel/entry/common.c     | 5 +++--
 kernel/time/tick-sched.c  | 1 +
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 8b2b1d68b9545..136b8d97d8c01 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -3,6 +3,7 @@
 #define __LINUX_ENTRYKVM_H
 
 #include <linux/entry-common.h>
+#include <linux/tick.h>
 
 /* Transfer to guest mode work */
 #ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
@@ -57,7 +58,7 @@ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu);
 static inline void xfer_to_guest_mode_prepare(void)
 {
 	lockdep_assert_irqs_disabled();
-	rcu_nocb_flush_deferred_wakeup();
+	tick_nohz_user_enter_prepare();
 }
 
 /**
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 7340613c7eff7..1a0ff88fa107b 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -11,6 +11,7 @@
 #include <linux/context_tracking_state.h>
 #include <linux/cpumask.h>
 #include <linux/sched.h>
+#include <linux/rcupdate.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern void __init tick_init(void);
@@ -300,4 +301,10 @@ static inline void tick_nohz_task_switch(void)
 		__tick_nohz_task_switch();
 }
 
+static inline void tick_nohz_user_enter_prepare(void)
+{
+	if (tick_nohz_full_cpu(smp_processor_id()))
+		rcu_nocb_flush_deferred_wakeup();
+}
+
 #endif
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index a0b3b04fb5965..bf16395b9e135 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -5,6 +5,7 @@
 #include <linux/highmem.h>
 #include <linux/livepatch.h>
 #include <linux/audit.h>
+#include <linux/tick.h>
 
 #include "common.h"
 
@@ -186,7 +187,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
 		local_irq_disable_exit_to_user();
 
 		/* Check if any of the above work has queued a deferred wakeup */
-		rcu_nocb_flush_deferred_wakeup();
+		tick_nohz_user_enter_prepare();
 
 		ti_work = READ_ONCE(current_thread_info()->flags);
 	}
@@ -202,7 +203,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
 	lockdep_assert_irqs_disabled();
 
 	/* Flush pending rcuog wakeup before the last need_resched() check */
-	rcu_nocb_flush_deferred_wakeup();
+	tick_nohz_user_enter_prepare();
 
 	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
 		ti_work = exit_to_user_mode_loop(regs, ti_work);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 828b091501ca4..6784f27a30993 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -230,6 +230,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
 
 #ifdef CONFIG_NO_HZ_FULL
 cpumask_var_t tick_nohz_full_mask;
+EXPORT_SYMBOL_GPL(tick_nohz_full_mask);
 bool tick_nohz_full_running;
 EXPORT_SYMBOL_GPL(tick_nohz_full_running);
 static atomic_t tick_dep_mask;
-- 
GitLab


From 6c605f8371159432ec61cbb1488dcf7ad24ad19a Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Thu, 27 May 2021 12:47:11 +0200
Subject: [PATCH 1847/3804] perf: Fix data race between pin_count
 increment/decrement

KCSAN reports a data race between increment and decrement of pin_count:

  write to 0xffff888237c2d4e0 of 4 bytes by task 15740 on cpu 1:
   find_get_context		kernel/events/core.c:4617
   __do_sys_perf_event_open	kernel/events/core.c:12097 [inline]
   __se_sys_perf_event_open	kernel/events/core.c:11933
   ...
  read to 0xffff888237c2d4e0 of 4 bytes by task 15743 on cpu 0:
   perf_unpin_context		kernel/events/core.c:1525 [inline]
   __do_sys_perf_event_open	kernel/events/core.c:12328 [inline]
   __se_sys_perf_event_open	kernel/events/core.c:11933
   ...

Because neither read-modify-write here is atomic, this can lead to one
of the operations being lost, resulting in an inconsistent pin_count.
Fix it by adding the missing locking in the CPU-event case.

Fixes: fe4b04fa31a6 ("perf: Cure task_oncpu_function_call() races")
Reported-by: syzbot+142c9018f5962db69c7e@syzkaller.appspotmail.com
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210527104711.2671610-1-elver@google.com
---
 kernel/events/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6fee4a7e88d7d..fe88d6eea3c2c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4609,7 +4609,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
 		cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
 		ctx = &cpuctx->ctx;
 		get_ctx(ctx);
+		raw_spin_lock_irqsave(&ctx->lock, flags);
 		++ctx->pin_count;
+		raw_spin_unlock_irqrestore(&ctx->lock, flags);
 
 		return ctx;
 	}
-- 
GitLab


From 4a0e3ff30980b7601b13dd3b7ee275212b852843 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Wed, 26 May 2021 06:58:47 -0700
Subject: [PATCH 1848/3804] perf/x86/intel/uncore: Fix a kernel WARNING
 triggered by maxcpus=1

A kernel WARNING may be triggered when setting maxcpus=1.

The uncore counters are Die-scope. When probing a PCI device, only the
BUS information can be retrieved. The uncore driver has to maintain a
mapping table used to calculate the logical Die ID from a given BUS#.

Before the patch ba9506be4e40, the mapping table stores the mapping
information from the BUS# -> a Physical Socket ID. To calculate the
logical die ID, perf does,
- In snbep_pci2phy_map_init(), retrieve the BUS# -> a Physical Socket ID
  from the UBOX PCI configure space.
- Calculate the mapping information (a BUS# -> a Physical Socket ID) for
  the other PCI BUS.
- In the uncore_pci_probe(), get the physical Socket ID from a given BUS
  and the mapping table.
- Calculate the logical Die ID

Since only the logical Die ID is required, with the patch ba9506be4e40,
the mapping table stores the mapping information from the BUS# -> a
logical Die ID. Now perf does,
- In snbep_pci2phy_map_init(), retrieve the BUS# -> a Physical Socket ID
  from the UBOX PCI configure space.
- Calculate the logical Die ID
- Calculate the mapping information (a BUS# -> a logical Die ID) for the
  other PCI BUS.
- In the uncore_pci_probe(), get the logical die ID from a given BUS and
  the mapping table.

When calculating the logical Die ID, -1 may be returned, especially when
maxcpus=1. Here, -1 means the logical Die ID is not found. But when
calculating the mapping information for the other PCI BUS, -1 indicates
that it's the other PCI BUS that requires the calculation of the
mapping. The driver will mistakenly do the calculation.

Uses the -ENODEV to indicate the case which the logical Die ID is not
found. The driver will not mess up the mapping table anymore.

Fixes: ba9506be4e40 ("perf/x86/intel/uncore: Store the logical die id instead of the physical die id.")
Reported-by: John Donnelly <john.p.donnelly@oracle.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: John Donnelly <john.p.donnelly@oracle.com>
Tested-by: John Donnelly <john.p.donnelly@oracle.com>
Link: https://lkml.kernel.org/r/1622037527-156028-1-git-send-email-kan.liang@linux.intel.com
---
 arch/x86/events/intel/uncore_snbep.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 63f097289a84c..1587d32897430 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1406,6 +1406,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
 						die_id = i;
 					else
 						die_id = topology_phys_to_logical_pkg(i);
+					if (die_id < 0)
+						die_id = -ENODEV;
 					map->pbus_to_dieid[bus] = die_id;
 					break;
 				}
@@ -1452,14 +1454,14 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
 			i = -1;
 			if (reverse) {
 				for (bus = 255; bus >= 0; bus--) {
-					if (map->pbus_to_dieid[bus] >= 0)
+					if (map->pbus_to_dieid[bus] != -1)
 						i = map->pbus_to_dieid[bus];
 					else
 						map->pbus_to_dieid[bus] = i;
 				}
 			} else {
 				for (bus = 0; bus <= 255; bus++) {
-					if (map->pbus_to_dieid[bus] >= 0)
+					if (map->pbus_to_dieid[bus] != -1)
 						i = map->pbus_to_dieid[bus];
 					else
 						map->pbus_to_dieid[bus] = i;
-- 
GitLab


From fab6216fafdd74cd84de929ffe7b787976d32cff Mon Sep 17 00:00:00 2001
From: Xiongwei Song <sxwjean@gmail.com>
Date: Mon, 24 May 2021 23:05:45 +0800
Subject: [PATCH 1849/3804] locking/lockdep,doc: Improve readability of the
 block matrix

The block condition matrix is using 'E' as the writer notation,
however, the writer reminder below the matrix is using 'W', to make
them consistent and make the matrix more readable, we'd better to use
'W' to represent writer.

Suggested-by: Waiman Long <llong@redhat.com>
Suggested-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lkml.kernel.org/r/1621868745-23311-1-git-send-email-sxwjean@me.com
---
 Documentation/locking/lockdep-design.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/locking/lockdep-design.rst b/Documentation/locking/lockdep-design.rst
index 9f3cfca9f8a45..82f36cab61bdd 100644
--- a/Documentation/locking/lockdep-design.rst
+++ b/Documentation/locking/lockdep-design.rst
@@ -453,9 +453,9 @@ There are simply four block conditions:
 Block condition matrix, Y means the row blocks the column, and N means otherwise.
 
 	+---+---+---+---+
-	|   | E | r | R |
+	|   | W | r | R |
 	+---+---+---+---+
-	| E | Y | Y | Y |
+	| W | Y | Y | Y |
 	+---+---+---+---+
 	| r | Y | Y | N |
 	+---+---+---+---+
-- 
GitLab


From b8e00abe7d9fe21dd13609e2e3a707e38902b105 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 24 May 2021 15:41:50 -0700
Subject: [PATCH 1850/3804] locking/lockdep: Reduce LOCKDEP dependency list

Some arches (um, sparc64, riscv, xtensa) cause a Kconfig warning for
LOCKDEP.
These arch-es select LOCKDEP_SUPPORT but they are not listed as one
of the arch-es that LOCKDEP depends on.

Since (16) arch-es define the Kconfig symbol LOCKDEP_SUPPORT if they
intend to have LOCKDEP support, replace the awkward list of
arch-es that LOCKDEP depends on with the LOCKDEP_SUPPORT symbol.

But wait. LOCKDEP_SUPPORT is included in LOCK_DEBUGGING_SUPPORT,
which is already a dependency here, so LOCKDEP_SUPPORT is redundant
and not needed.
That leaves the FRAME_POINTER dependency, but it is part of an
expression like this:
	depends on (A && B) && (FRAME_POINTER || B')
where B' is a dependency of B so if B is true then B' is true
and the value of FRAME_POINTER does not matter.
Thus we can also delete the FRAME_POINTER dependency.

Fixes this kconfig warning: (for um, sparc64, riscv, xtensa)

WARNING: unmet direct dependencies detected for LOCKDEP
  Depends on [n]: DEBUG_KERNEL [=y] && LOCK_DEBUGGING_SUPPORT [=y] && (FRAME_POINTER [=n] || MIPS || PPC || S390 || MICROBLAZE || ARM || ARC || X86)
  Selected by [y]:
  - PROVE_LOCKING [=y] && DEBUG_KERNEL [=y] && LOCK_DEBUGGING_SUPPORT [=y]
  - LOCK_STAT [=y] && DEBUG_KERNEL [=y] && LOCK_DEBUGGING_SUPPORT [=y]
  - DEBUG_LOCK_ALLOC [=y] && DEBUG_KERNEL [=y] && LOCK_DEBUGGING_SUPPORT [=y]

Fixes: 7d37cb2c912d ("lib: fix kconfig dependency on ARCH_WANT_FRAME_POINTERS")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Waiman Long <longman@redhat.com>
Link: https://lkml.kernel.org/r/20210524224150.8009-1-rdunlap@infradead.org
---
 lib/Kconfig.debug | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 678c13967580e..1e1bd6f4a13de 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1372,7 +1372,6 @@ config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
 	select STACKTRACE
-	depends on FRAME_POINTER || MIPS || PPC || S390 || MICROBLAZE || ARM || ARC || X86
 	select KALLSYMS
 	select KALLSYMS_ALL
 
-- 
GitLab


From 6411e386db0a477217607015e7d2910d02f75426 Mon Sep 17 00:00:00 2001
From: Wang Wensheng <wangwensheng4@huawei.com>
Date: Mon, 17 May 2021 01:57:49 +0000
Subject: [PATCH 1851/3804] phy: cadence: Sierra: Fix error return code in
 cdns_sierra_phy_probe()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: a43f72ae136a ("phy: cadence: Sierra: Change MAX_LANES of Sierra to 16")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com>
Link: https://lore.kernel.org/r/20210517015749.127799-1-wangwensheng4@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/cadence/phy-cadence-sierra.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/phy/cadence/phy-cadence-sierra.c b/drivers/phy/cadence/phy-cadence-sierra.c
index 5c68e31c59399..e93818e3991fd 100644
--- a/drivers/phy/cadence/phy-cadence-sierra.c
+++ b/drivers/phy/cadence/phy-cadence-sierra.c
@@ -940,6 +940,7 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev)
 	sp->nsubnodes = node;
 
 	if (sp->num_lanes > SIERRA_MAX_LANES) {
+		ret = -EINVAL;
 		dev_err(dev, "Invalid lane configuration\n");
 		goto put_child2;
 	}
-- 
GitLab


From aaac9a1bd370338ce372669eb9a6059d16b929aa Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Wed, 19 May 2021 18:37:39 +0800
Subject: [PATCH 1852/3804] phy: phy-mtk-tphy: Fix some resource leaks in
 mtk_phy_init()

Use clk_disable_unprepare() in the error path of mtk_phy_init() to fix
some resource leaks.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Reviewed-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Link: https://lore.kernel.org/r/1621420659-15858-1-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/mediatek/phy-mtk-tphy.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c
index cdbcc49f71152..731c483a04dea 100644
--- a/drivers/phy/mediatek/phy-mtk-tphy.c
+++ b/drivers/phy/mediatek/phy-mtk-tphy.c
@@ -949,6 +949,8 @@ static int mtk_phy_init(struct phy *phy)
 		break;
 	default:
 		dev_err(tphy->dev, "incompatible PHY type\n");
+		clk_disable_unprepare(instance->ref_clk);
+		clk_disable_unprepare(instance->da_ref_clk);
 		return -EINVAL;
 	}
 
-- 
GitLab


From b8203ec7f58ae925e10fadd3d136073ae7503a6e Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 25 May 2021 18:50:32 +0800
Subject: [PATCH 1853/3804] phy: ti: Fix an error code in wiz_probe()

When the code execute this if statement, the value of ret is 0.
However, we can see from the dev_err() log that the value of
ret should be -EINVAL.

Clean up smatch warning:

drivers/phy/ti/phy-j721e-wiz.c:1216 wiz_probe() warn: missing error code 'ret'

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Fixes: c9f9eba06629 ("phy: ti: j721e-wiz: Manage typec-gpio-dir")
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/1621939832-65535-1-git-send-email-yang.lee@linux.alibaba.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/ti/phy-j721e-wiz.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c
index 9eb6d37c907ea..126f5b8735cc1 100644
--- a/drivers/phy/ti/phy-j721e-wiz.c
+++ b/drivers/phy/ti/phy-j721e-wiz.c
@@ -1212,6 +1212,7 @@ static int wiz_probe(struct platform_device *pdev)
 
 		if (wiz->typec_dir_delay < WIZ_TYPEC_DIR_DEBOUNCE_MIN ||
 		    wiz->typec_dir_delay > WIZ_TYPEC_DIR_DEBOUNCE_MAX) {
+			ret = -EINVAL;
 			dev_err(dev, "Invalid typec-dir-debounce property\n");
 			goto err_addr_to_resource;
 		}
-- 
GitLab


From 280b68a3b3b96b027fcdeb5a3916a8e2aaf84d03 Mon Sep 17 00:00:00 2001
From: Pu Wen <puwen@hygon.cn>
Date: Fri, 28 May 2021 16:14:17 +0800
Subject: [PATCH 1854/3804] x86/cstate: Allow ACPI C1 FFH MWAIT use on Hygon
 systems

Hygon systems support the MONITOR/MWAIT instructions and these can be
used for ACPI C1 in the same way as on AMD and Intel systems.

The BIOS declares a C1 state in _CST to use FFH and CPUID_Fn00000005_EDX
is non-zero on Hygon systems.

Allow ffh_cstate_init() to succeed on Hygon systems to default using FFH
MWAIT instead of HALT for ACPI C1.

Signed-off-by: Pu Wen <puwen@hygon.cn>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210528081417.31474-1-puwen@hygon.cn
---
 arch/x86/kernel/acpi/cstate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 49ae4e1ac9cd8..7de599eba7f04 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -197,7 +197,8 @@ static int __init ffh_cstate_init(void)
 	struct cpuinfo_x86 *c = &boot_cpu_data;
 
 	if (c->x86_vendor != X86_VENDOR_INTEL &&
-	    c->x86_vendor != X86_VENDOR_AMD)
+	    c->x86_vendor != X86_VENDOR_AMD &&
+	    c->x86_vendor != X86_VENDOR_HYGON)
 		return -1;
 
 	cpu_cstate_entry = alloc_percpu(struct cstate_entry);
-- 
GitLab


From 9a51ebd7e5b6b8aa826d3c24f2077adf3b5df129 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Wed, 26 May 2021 10:27:27 +1000
Subject: [PATCH 1855/3804] MAINTAINERS, .mailmap: Update Finn Thain's email
 address

Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Link: https://lore.kernel.org/r/fc397a7074d627e22974ef8927910ad08744db5c.1621988847.git.fthain@linux-m68k.org
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 .mailmap    | 1 +
 MAINTAINERS | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.mailmap b/.mailmap
index 3e2bff9137e91..f0a0c72e5058b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -102,6 +102,7 @@ Felipe W Damasio <felipewd@terra.com.br>
 Felix Kuhling <fxkuehl@gmx.de>
 Felix Moeller <felix@derklecks.de>
 Filipe Lautert <filipe@icewall.org>
+Finn Thain <fthain@linux-m68k.org> <fthain@telegraphics.com.au>
 Franck Bui-Huu <vagabon.xyz@gmail.com>
 Frank Rowand <frowand.list@gmail.com> <frank.rowand@am.sony.com>
 Frank Rowand <frowand.list@gmail.com> <frank.rowand@sonymobile.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index bd7aff0c120f2..0dcf8b505da2a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12592,7 +12592,7 @@ S:	Orphan
 F:	drivers/net/ethernet/natsemi/natsemi.c
 
 NCR 5380 SCSI DRIVERS
-M:	Finn Thain <fthain@telegraphics.com.au>
+M:	Finn Thain <fthain@linux-m68k.org>
 M:	Michael Schmitz <schmitzmic@gmail.com>
 L:	linux-scsi@vger.kernel.org
 S:	Maintained
@@ -13064,7 +13064,7 @@ F:	Documentation/filesystems/ntfs.rst
 F:	fs/ntfs/
 
 NUBUS SUBSYSTEM
-M:	Finn Thain <fthain@telegraphics.com.au>
+M:	Finn Thain <fthain@linux-m68k.org>
 L:	linux-m68k@lists.linux-m68k.org
 S:	Maintained
 F:	arch/*/include/asm/nubus.h
-- 
GitLab


From b44bf9410a6921a232679124ca48daa2a8a15303 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 10 May 2021 11:53:02 +0200
Subject: [PATCH 1856/3804] m68k: defconfig: Update defconfigs for v5.13-rc1

  - Drop CONFIG_NF_LOG_NETDEV=m (removed in commit 1510618e45cb9fb7
    ("netfilter: nf_log_netdev: merge with nf_log_syslog")),
  - Enable modular build of IPv4 packet logging (no longer auto-enabled
    since commit db3187ae21bb0cff ("netfilter: nf_log_ipv4: rename to
    nf_log_syslog")),
  - Drop CONFIG_NF_LOG_BRIDGE=m (removed in commit 77ccee96a67422ac
    ("netfilter: nf_log_bridge: merge with nf_log_syslog")),
  - Enable modular build of ECDSA crypto algorithm,
  - Enable modular build of 64bit/32bit division and modulo test.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20210510095302.4125561-1-geert@linux-m68k.org
---
 arch/m68k/configs/amiga_defconfig    | 5 +++--
 arch/m68k/configs/apollo_defconfig   | 5 +++--
 arch/m68k/configs/atari_defconfig    | 5 +++--
 arch/m68k/configs/bvme6000_defconfig | 5 +++--
 arch/m68k/configs/hp300_defconfig    | 5 +++--
 arch/m68k/configs/mac_defconfig      | 5 +++--
 arch/m68k/configs/multi_defconfig    | 5 +++--
 arch/m68k/configs/mvme147_defconfig  | 5 +++--
 arch/m68k/configs/mvme16x_defconfig  | 5 +++--
 arch/m68k/configs/q40_defconfig      | 5 +++--
 arch/m68k/configs/sun3_defconfig     | 5 +++--
 arch/m68k/configs/sun3x_defconfig    | 5 +++--
 12 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 59b727b693575..75e8e9b6551dd 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -85,7 +85,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -207,6 +206,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -253,7 +253,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -563,6 +562,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -627,6 +627,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 8d4ddcebe7b8d..4dc6dcfaf28ab 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -81,7 +81,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -203,6 +202,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -249,7 +249,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -519,6 +518,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -583,6 +583,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 9cc9f1a065164..0a133e896bdcb 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -88,7 +88,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -210,6 +209,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -256,7 +256,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -541,6 +540,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -605,6 +605,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index c3f3f462e6ce6..2c3f428338469 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -78,7 +78,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -200,6 +199,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -246,7 +246,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -512,6 +511,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -576,6 +576,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 8c908fc5c1910..5b1898d4b249a 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -80,7 +80,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -202,6 +201,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -248,7 +248,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -521,6 +520,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -585,6 +585,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 4e68b72d9c50f..30c61f518c181 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -79,7 +79,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -201,6 +200,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -247,7 +247,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -544,6 +543,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -608,6 +608,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index d31896293c394..0ee0e61289056 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -99,7 +99,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -221,6 +220,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -267,7 +267,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -630,6 +629,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -694,6 +694,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index c7442f9dd469a..793085f00c99f 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -77,7 +77,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -199,6 +198,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -245,7 +245,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -511,6 +510,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -575,6 +575,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 233b82ea103a4..56fbac7943b2e 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -78,7 +78,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -200,6 +199,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -246,7 +246,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -512,6 +511,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -576,6 +576,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 664025a0f6a41..ebfd1cb3a9b6f 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -79,7 +79,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -201,6 +200,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -247,7 +247,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -530,6 +529,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -594,6 +594,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 73293a0b3dc86..3490a05f29b82 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -75,7 +75,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -197,6 +196,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -243,7 +243,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -514,6 +513,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -577,6 +577,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index bca8a6f3e92f5..4e92c8c332fc5 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -75,7 +75,6 @@ CONFIG_IPV6_VTI=m
 CONFIG_IPV6_GRE=m
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_ZONES=y
 # CONFIG_NF_CONNTRACK_PROCFS is not set
 # CONFIG_NF_CT_PROTO_DCCP is not set
@@ -197,6 +196,7 @@ CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_NF_FLOW_TABLE_IPV4=m
 CONFIG_NF_LOG_ARP=m
+CONFIG_NF_LOG_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -243,7 +243,6 @@ CONFIG_IP6_NF_TARGET_NPT=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NFT_BRIDGE_META=m
 CONFIG_NFT_BRIDGE_REJECT=m
-CONFIG_NF_LOG_BRIDGE=m
 CONFIG_NF_CONNTRACK_BRIDGE=m
 CONFIG_BRIDGE_NF_EBTABLES=m
 CONFIG_BRIDGE_EBT_BROUTE=m
@@ -513,6 +512,7 @@ CONFIG_CRYPTO_TEST=m
 CONFIG_CRYPTO_RSA=m
 CONFIG_CRYPTO_DH=m
 CONFIG_CRYPTO_ECDH=m
+CONFIG_CRYPTO_ECDSA=m
 CONFIG_CRYPTO_ECRDSA=m
 CONFIG_CRYPTO_SM2=m
 CONFIG_CRYPTO_CURVE25519=m
@@ -577,6 +577,7 @@ CONFIG_KUNIT_ALL_TESTS=m
 CONFIG_TEST_LIST_SORT=m
 CONFIG_TEST_MIN_HEAP=m
 CONFIG_TEST_SORT=m
+CONFIG_TEST_DIV64=m
 CONFIG_REED_SOLOMON_TEST=m
 CONFIG_ATOMIC64_SELFTEST=m
 CONFIG_ASYNC_RAID6_TEST=m
-- 
GitLab


From 1ab19c5de4c537ec0d9b21020395a5b5a6c059b2 Mon Sep 17 00:00:00 2001
From: Hillf Danton <hdanton@sina.com>
Date: Tue, 18 May 2021 16:46:25 +0800
Subject: [PATCH 1857/3804] gfs2: Fix use-after-free in gfs2_glock_shrink_scan

The GLF_LRU flag is checked under lru_lock in gfs2_glock_remove_from_lru() to
remove the glock from the lru list in __gfs2_glock_put().

On the shrink scan path, the same flag is cleared under lru_lock but because
of cond_resched_lock(&lru_lock) in gfs2_dispose_glock_lru(), progress on the
put side can be made without deleting the glock from the lru list.

Keep GLF_LRU across the race window opened by cond_resched_lock(&lru_lock) to
ensure correct behavior on both sides - clear GLF_LRU after list_del under
lru_lock.

Reported-by: syzbot <syzbot+34ba7ddbf3021981a228@syzkaller.appspotmail.com>
Signed-off-by: Hillf Danton <hdanton@sina.com>
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 797949e784ccd..d9cb261f55b06 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1795,6 +1795,7 @@ __acquires(&lru_lock)
 	while(!list_empty(list)) {
 		gl = list_first_entry(list, struct gfs2_glock, gl_lru);
 		list_del_init(&gl->gl_lru);
+		clear_bit(GLF_LRU, &gl->gl_flags);
 		if (!spin_trylock(&gl->gl_lockref.lock)) {
 add_back_to_lru:
 			list_add(&gl->gl_lru, &lru_list);
@@ -1840,7 +1841,6 @@ static long gfs2_scan_glock_lru(int nr)
 		if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
 			list_move(&gl->gl_lru, &dispose);
 			atomic_dec(&lru_count);
-			clear_bit(GLF_LRU, &gl->gl_flags);
 			freed++;
 			continue;
 		}
-- 
GitLab


From e4dfe108371214500ee10c2cf19268f53acaa803 Mon Sep 17 00:00:00 2001
From: Erik Kaneda <erik.kaneda@intel.com>
Date: Fri, 21 May 2021 15:28:08 -0700
Subject: [PATCH 1858/3804] ACPICA: Clean up context mutex during object
 deletion

ACPICA commit bc43c878fd4ff27ba75b1d111b97ee90d4a82707

Fixes: c27f3d011b08 ("Fix race in GenericSerialBus (I2C) and GPIO OpRegion parameter handling")
Link: https://github.com/acpica/acpica/commit/bc43c878
Reported-by: John Garry <john.garry@huawei.com>
Reported-by: Xiang Chen <chenxiang66@hisilicon.com>
Tested-by: Xiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: Erik Kaneda <erik.kaneda@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utdelete.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c
index 624a26794d558..e5ba9795ec696 100644
--- a/drivers/acpi/acpica/utdelete.c
+++ b/drivers/acpi/acpica/utdelete.c
@@ -285,6 +285,14 @@ static void acpi_ut_delete_internal_obj(union acpi_operand_object *object)
 		}
 		break;
 
+	case ACPI_TYPE_LOCAL_ADDRESS_HANDLER:
+
+		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
+				  "***** Address handler %p\n", object));
+
+		acpi_os_delete_mutex(object->address_space.context_mutex);
+		break;
+
 	default:
 
 		break;
-- 
GitLab


From c58e7ed28b4534ed073371843d03c433d6a9fe34 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 26 May 2021 12:22:51 -0400
Subject: [PATCH 1859/3804] PM: runtime: document common mistake with
 pm_runtime_get_sync()

pm_runtime_get_sync(), contradictory to intuition, does not drop the
runtime PM usage counter on errors which lead to several wrong usages in
drivers (missing the put).  pm_runtime_resume_and_get() was added as a
better implementation so document the preference of using it, hoping it
will stop bad patterns.

Suggested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
[ rjw: Documentation change edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.rst | 6 +++++-
 include/linux/pm_runtime.h         | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst
index 18ae21bf7f925..b48cac5f90489 100644
--- a/Documentation/power/runtime_pm.rst
+++ b/Documentation/power/runtime_pm.rst
@@ -378,7 +378,11 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
 
   `int pm_runtime_get_sync(struct device *dev);`
     - increment the device's usage counter, run pm_runtime_resume(dev) and
-      return its result
+      return its result;
+      note that it does not drop the device's usage counter on errors, so
+      consider using pm_runtime_resume_and_get() instead of it, especially
+      if its return value is checked by the caller, as this is likely to
+      result in cleaner code.
 
   `int pm_runtime_get_if_in_use(struct device *dev);`
     - return -EINVAL if 'power.disable_depth' is nonzero; otherwise, if the
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 6c08a085367bf..aab8b35e9f8ac 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -380,6 +380,9 @@ static inline int pm_runtime_get(struct device *dev)
  * The possible return values of this function are the same as for
  * pm_runtime_resume() and the runtime PM usage counter of @dev remains
  * incremented in all cases, even if it returns an error code.
+ * Consider using pm_runtime_resume_and_get() instead of it, especially
+ * if its return value is checked by the caller, as this is likely to result
+ * in cleaner code.
  */
 static inline int pm_runtime_get_sync(struct device *dev)
 {
-- 
GitLab


From 671cc352acd3e2b2832b59787ed8027d9f80ccc9 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Sun, 30 May 2021 22:55:06 +0300
Subject: [PATCH 1860/3804] drm/tegra: Correct
 DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT

The format modifier is 64bit, while DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT
uses BIT() macro that is 32bit on ARM32.

The (modifier &= ~DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT) doesn't work as
expected on ARM32 and tegra_fb_get_tiling() fails for the tiled formats
on 32bit Tegra because modifier mask isn't applied properly. Use the
BIT_ULL() macro to fix this trouble.

Fixes: 7b6f846785f4 ("drm/tegra: Support sector layout on Tegra194")
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 87df251c1fcf5..0cb8680653483 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -25,7 +25,7 @@
 #include "trace.h"
 
 /* XXX move to include/uapi/drm/drm_fourcc.h? */
-#define DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT BIT(22)
+#define DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT BIT_ULL(22)
 
 struct reset_control;
 
-- 
GitLab


From e305509e678b3a4af2b3cfd410f409f7cdaabb52 Mon Sep 17 00:00:00 2001
From: Lin Ma <linma@zju.edu.cn>
Date: Sun, 30 May 2021 21:37:43 +0800
Subject: [PATCH 1861/3804] Bluetooth: use correct lock to prevent UAF of hdev
 object

The hci_sock_dev_event() function will cleanup the hdev object for
sockets even if this object may still be in used within the
hci_sock_bound_ioctl() function, result in UAF vulnerability.

This patch replace the BH context lock to serialize these affairs
and prevent the race condition.

Signed-off-by: Lin Ma <linma@zju.edu.cn>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 251b9128f530a..eed0dd066e12c 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -762,7 +762,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 		/* Detach sockets from device */
 		read_lock(&hci_sk_list.lock);
 		sk_for_each(sk, &hci_sk_list.head) {
-			bh_lock_sock_nested(sk);
+			lock_sock(sk);
 			if (hci_pi(sk)->hdev == hdev) {
 				hci_pi(sk)->hdev = NULL;
 				sk->sk_err = EPIPE;
@@ -771,7 +771,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 
 				hci_dev_put(hdev);
 			}
-			bh_unlock_sock(sk);
+			release_sock(sk);
 		}
 		read_unlock(&hci_sk_list.lock);
 	}
-- 
GitLab


From 1fa98d96ea0ff6c8770eeba90417aab4b4e07f52 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 23 May 2021 14:58:25 +0800
Subject: [PATCH 1862/3804] clockevents: Use DEVICE_ATTR_[RO|WO] macros

Use the DEVICE_ATTR_[RO|WO] helpers instead of plain DEVICE_ATTR, which
makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210523065825.19684-1-yuehaibing@huawei.com
---
 kernel/time/clockevents.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index f5490222e134a..0056d2bed53ed 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -668,9 +668,9 @@ static struct bus_type clockevents_subsys = {
 static DEFINE_PER_CPU(struct device, tick_percpu_dev);
 static struct tick_device *tick_get_tick_dev(struct device *dev);
 
-static ssize_t sysfs_show_current_tick_dev(struct device *dev,
-					   struct device_attribute *attr,
-					   char *buf)
+static ssize_t current_device_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
 {
 	struct tick_device *td;
 	ssize_t count = 0;
@@ -682,12 +682,12 @@ static ssize_t sysfs_show_current_tick_dev(struct device *dev,
 	raw_spin_unlock_irq(&clockevents_lock);
 	return count;
 }
-static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL);
+static DEVICE_ATTR_RO(current_device);
 
 /* We don't support the abomination of removable broadcast devices */
-static ssize_t sysfs_unbind_tick_dev(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t count)
+static ssize_t unbind_device_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
 {
 	char name[CS_NAME_LEN];
 	ssize_t ret = sysfs_get_uname(buf, name, count);
@@ -714,7 +714,7 @@ static ssize_t sysfs_unbind_tick_dev(struct device *dev,
 	mutex_unlock(&clockevents_mutex);
 	return ret ? ret : count;
 }
-static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev);
+static DEVICE_ATTR_WO(unbind_device);
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 static struct device tick_bc_dev = {
-- 
GitLab


From c2d4fee3f6d170dee5ee7c337a0ba5e92fad7a64 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:14 +0100
Subject: [PATCH 1863/3804] tick/broadcast: Drop unneeded
 CONFIG_GENERIC_CLOCKEVENTS_BROADCAST guard

tick-broadcast.o is only built if CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
so remove the redundant #ifdef guards around the definition of
tick_receive_broadcast().

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-2-will@kernel.org
---
 kernel/time/tick-broadcast.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index a440552287969..fb794ff4855ec 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -253,7 +253,6 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
 	return ret;
 }
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 int tick_receive_broadcast(void)
 {
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
@@ -268,7 +267,6 @@ int tick_receive_broadcast(void)
 	evt->event_handler(evt);
 	return 0;
 }
-#endif
 
 /*
  * Broadcast the event to the cpus, which are set in the mask (mangled).
-- 
GitLab


From e5007c288e7981e0b0cf8ea3dea443f0b8c34345 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:15 +0100
Subject: [PATCH 1864/3804] tick/broadcast: Split
 __tick_broadcast_oneshot_control() into a helper

In preparation for adding support for per-cpu wakeup timers, split
_tick_broadcast_oneshot_control() into a helper function which deals
only with the broadcast timer management across idle transitions.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-3-will@kernel.org
---
 kernel/time/tick-broadcast.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index fb794ff4855ec..f3f2f4ba43219 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -717,24 +717,16 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
 	clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
-int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state,
+					     struct tick_device *td,
+					     int cpu)
 {
-	struct clock_event_device *bc, *dev;
-	int cpu, ret = 0;
+	struct clock_event_device *bc, *dev = td->evtdev;
+	int ret = 0;
 	ktime_t now;
 
-	/*
-	 * If there is no broadcast device, tell the caller not to go
-	 * into deep idle.
-	 */
-	if (!tick_broadcast_device.evtdev)
-		return -EBUSY;
-
-	dev = this_cpu_ptr(&tick_cpu_device)->evtdev;
-
 	raw_spin_lock(&tick_broadcast_lock);
 	bc = tick_broadcast_device.evtdev;
-	cpu = smp_processor_id();
 
 	if (state == TICK_BROADCAST_ENTER) {
 		/*
@@ -863,6 +855,21 @@ out:
 	return ret;
 }
 
+int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
+{
+	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
+	int cpu = smp_processor_id();
+
+	if (tick_broadcast_device.evtdev)
+		return ___tick_broadcast_oneshot_control(state, td, cpu);
+
+	/*
+	 * If there is no broadcast device, tell the caller not
+	 * to go into deep idle.
+	 */
+	return -EBUSY;
+}
+
 /*
  * Reset the one shot broadcast for a cpu
  *
-- 
GitLab


From c94a8537df12708cc03da9120c3c3561ae744ce1 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:16 +0100
Subject: [PATCH 1865/3804] tick/broadcast: Prefer per-cpu oneshot wakeup
 timers to broadcast

Some SoCs have two per-cpu timer implementations where the timer with the
higher rating stops in deep idle (i.e. suffers from CLOCK_EVT_FEAT_C3STOP)
but is otherwise preferable to the timer with the lower rating. In such a
design, selecting the higher rated devices relies on a global broadcast
timer and IPIs to wake up from deep idle states.

To avoid the reliance on a global broadcast timer and also to reduce the
overhead associated with the IPI wakeups, extend
tick_install_broadcast_device() to manage per-cpu wakeup timers separately
from the broadcast device.

For now, these timers remain unused.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-4-will@kernel.org
---
 kernel/time/tick-broadcast.c | 59 +++++++++++++++++++++++++++++++++++-
 kernel/time/tick-common.c    |  2 +-
 kernel/time/tick-internal.h  |  4 +--
 3 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f3f2f4ba43219..0e9e06d6cc5cb 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -33,6 +33,8 @@ static int tick_broadcast_forced;
 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
 
 #ifdef CONFIG_TICK_ONESHOT
+static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device);
+
 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
 static void tick_broadcast_clear_oneshot(int cpu);
 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
@@ -88,13 +90,65 @@ static bool tick_check_broadcast_device(struct clock_event_device *curdev,
 	return !curdev || newdev->rating > curdev->rating;
 }
 
+#ifdef CONFIG_TICK_ONESHOT
+static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
+{
+	return per_cpu(tick_oneshot_wakeup_device, cpu);
+}
+
+static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
+					   int cpu)
+{
+	struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu);
+
+	if (!newdev)
+		goto set_device;
+
+	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
+	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))
+		 return false;
+
+	if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) ||
+	    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
+		return false;
+
+	if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
+		return false;
+
+	if (curdev && newdev->rating <= curdev->rating)
+		return false;
+
+	if (!try_module_get(newdev->owner))
+		return false;
+
+set_device:
+	clockevents_exchange_device(curdev, newdev);
+	per_cpu(tick_oneshot_wakeup_device, cpu) = newdev;
+	return true;
+}
+#else
+static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
+{
+	return NULL;
+}
+
+static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
+					   int cpu)
+{
+	return false;
+}
+#endif
+
 /*
  * Conditionally install/replace broadcast device
  */
-void tick_install_broadcast_device(struct clock_event_device *dev)
+void tick_install_broadcast_device(struct clock_event_device *dev, int cpu)
 {
 	struct clock_event_device *cur = tick_broadcast_device.evtdev;
 
+	if (tick_set_oneshot_wakeup_device(dev, cpu))
+		return;
+
 	if (!tick_check_broadcast_device(cur, dev))
 		return;
 
@@ -996,6 +1050,9 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu)
  */
 static void tick_broadcast_oneshot_offline(unsigned int cpu)
 {
+	if (tick_get_oneshot_wakeup_device(cpu))
+		tick_set_oneshot_wakeup_device(NULL, cpu);
+
 	/*
 	 * Clear the broadcast masks for the dead cpu, but do not stop
 	 * the broadcast device!
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index e15bc0ef19128..d663249652efb 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -373,7 +373,7 @@ out_bc:
 	/*
 	 * Can the new device be used as a broadcast device ?
 	 */
-	tick_install_broadcast_device(newdev);
+	tick_install_broadcast_device(newdev, cpu);
 }
 
 /**
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 7a981c9e87a4a..30c89639e305f 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -61,7 +61,7 @@ extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 /* Broadcasting support */
 # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
-extern void tick_install_broadcast_device(struct clock_event_device *dev);
+extern void tick_install_broadcast_device(struct clock_event_device *dev, int cpu);
 extern int tick_is_broadcast_device(struct clock_event_device *dev);
 extern void tick_suspend_broadcast(void);
 extern void tick_resume_broadcast(void);
@@ -72,7 +72,7 @@ extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
 extern struct tick_device *tick_get_broadcast_device(void);
 extern struct cpumask *tick_get_broadcast_mask(void);
 # else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
-static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
+static inline void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) { }
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
 static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
 static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
-- 
GitLab


From ea5c7f1b9aa1a7c9d1bb9440084ac1256789fadb Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:17 +0100
Subject: [PATCH 1866/3804] tick/broadcast: Program wakeup timer when entering
 idle if required

When configuring the broadcast timer on entry to and exit from deep idle
states, prefer a per-CPU wakeup timer if one exists.

On entry to idle, stop the tick device and transfer the next event into
the oneshot wakeup device, which will serve as the wakeup from idle. To
avoid the overhead of additional hardware accesses on exit from idle,
leave the timer armed and treat the inevitable interrupt as a (possibly
spurious) tick event.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-5-will@kernel.org
---
 kernel/time/tick-broadcast.c | 44 +++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 0e9e06d6cc5cb..9b845212430bc 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -96,6 +96,15 @@ static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu)
 	return per_cpu(tick_oneshot_wakeup_device, cpu);
 }
 
+static void tick_oneshot_wakeup_handler(struct clock_event_device *wd)
+{
+	/*
+	 * If we woke up early and the tick was reprogrammed in the
+	 * meantime then this may be spurious but harmless.
+	 */
+	tick_receive_broadcast();
+}
+
 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
 					   int cpu)
 {
@@ -121,6 +130,7 @@ static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev,
 	if (!try_module_get(newdev->owner))
 		return false;
 
+	newdev->event_handler = tick_oneshot_wakeup_handler;
 set_device:
 	clockevents_exchange_device(curdev, newdev);
 	per_cpu(tick_oneshot_wakeup_device, cpu) = newdev;
@@ -909,16 +919,48 @@ out:
 	return ret;
 }
 
+static int tick_oneshot_wakeup_control(enum tick_broadcast_state state,
+				       struct tick_device *td,
+				       int cpu)
+{
+	struct clock_event_device *dev, *wd;
+
+	dev = td->evtdev;
+	if (td->mode != TICKDEV_MODE_ONESHOT)
+		return -EINVAL;
+
+	wd = tick_get_oneshot_wakeup_device(cpu);
+	if (!wd)
+		return -ENODEV;
+
+	switch (state) {
+	case TICK_BROADCAST_ENTER:
+		clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED);
+		clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT);
+		clockevents_program_event(wd, dev->next_event, 1);
+		break;
+	case TICK_BROADCAST_EXIT:
+		/* We may have transitioned to oneshot mode while idle */
+		if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT)
+			return -ENODEV;
+	}
+
+	return 0;
+}
+
 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 {
 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 	int cpu = smp_processor_id();
 
+	if (!tick_oneshot_wakeup_control(state, td, cpu))
+		return 0;
+
 	if (tick_broadcast_device.evtdev)
 		return ___tick_broadcast_oneshot_control(state, td, cpu);
 
 	/*
-	 * If there is no broadcast device, tell the caller not
+	 * If there is no broadcast or wakeup device, tell the caller not
 	 * to go into deep idle.
 	 */
 	return -EBUSY;
-- 
GitLab


From 245a057fee18be08d6ac12357463579d06bea077 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 24 May 2021 23:18:18 +0100
Subject: [PATCH 1867/3804] timer_list: Print name of per-cpu wakeup device

With the introduction of per-cpu wakeup devices that can be used in
preference to the broadcast timer, print the name of such devices when
they are available.

Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210524221818.15850-6-will@kernel.org
---
 kernel/time/tick-broadcast.c |  7 +++++++
 kernel/time/tick-internal.h  |  1 +
 kernel/time/timer_list.c     | 10 +++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 9b845212430bc..f7fe6fe361731 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -63,6 +63,13 @@ struct cpumask *tick_get_broadcast_mask(void)
 	return tick_broadcast_mask;
 }
 
+static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu);
+
+const struct clock_event_device *tick_get_wakeup_device(int cpu)
+{
+	return tick_get_oneshot_wakeup_device(cpu);
+}
+
 /*
  * Start the device in periodic mode
  */
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 30c89639e305f..6a742a29e545f 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -71,6 +71,7 @@ extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadc
 extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
 extern struct tick_device *tick_get_broadcast_device(void);
 extern struct cpumask *tick_get_broadcast_mask(void);
+extern const struct clock_event_device *tick_get_wakeup_device(int cpu);
 # else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
 static inline void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) { }
 static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 6939140ab7c54..ed7d6ad694fba 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,6 +228,14 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
 	SEQ_printf(m, " event_handler:  %ps\n", dev->event_handler);
 	SEQ_printf(m, "\n");
 	SEQ_printf(m, " retries:        %lu\n", dev->retries);
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+	if (cpu >= 0) {
+		const struct clock_event_device *wd = tick_get_wakeup_device(cpu);
+
+		SEQ_printf(m, "Wakeup Device: %s\n", wd ? wd->name : "<NULL>");
+	}
+#endif
 	SEQ_printf(m, "\n");
 }
 
@@ -248,7 +256,7 @@ static void timer_list_show_tickdevices_header(struct seq_file *m)
 
 static inline void timer_list_header(struct seq_file *m, u64 now)
 {
-	SEQ_printf(m, "Timer List Version: v0.8\n");
+	SEQ_printf(m, "Timer List Version: v0.9\n");
 	SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
 	SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
 	SEQ_printf(m, "\n");
-- 
GitLab


From 08a4b904a2a90246aadd6aa2e4f26abca9037385 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 31 May 2021 20:06:33 +0200
Subject: [PATCH 1868/3804] ALSA: hda: Fix a regression in Capture Switch mixer
 read

The recent commit to drop the HDA-specific mute-LED control,
e65bf99718b5 ("ALSA: HDA - remove the custom implementation for the
audio LED trigger"), caused a regression on the mixer element read for
"Capture Switch" when it's built from bind controls.  The function
create_bind_cap_vol_ctl() creates the snd_kcontrol_new object directly
via snd_hda_gen_add_kctl() instead of add_control().  Although the
commit above added a workaround for the SNDRV_CTL_ACCESS_READWRITE in
add_control() as default, this code path fell out from the radar.  As
a result, now the driver gives -EPERM error because of the lack of the
proper access bit at reading "Capture Switch" element value.

Fix the regression by setting the access bit properly.

Fixes: e65bf99718b5 ("ALSA: HDA - remove the custom implementation for the audio LED trigger")
BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=1186634
Link: https://lore.kernel.org/r/20210531180633.27831-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_generic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index b638fc2ef6f72..1f8018f9ce57a 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -3520,6 +3520,7 @@ static int cap_sw_put(struct snd_kcontrol *kcontrol,
 static const struct snd_kcontrol_new cap_sw_temp = {
 	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
 	.name = "Capture Switch",
+	.access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
 	.info = cap_sw_info,
 	.get = cap_sw_get,
 	.put = cap_sw_put,
-- 
GitLab


From 0ee4d55534f82a0624701d0bb9fc2304d4529086 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 16:47:17 +0200
Subject: [PATCH 1869/3804] mac80211: remove warning in ieee80211_get_sband()

Syzbot reports that it's possible to hit this from userspace,
by trying to add a station before any other connection setup
has been done. Instead of trying to catch this in some other
way simply remove the warning, that will appropriately reject
the call from userspace.

Reported-by: syzbot+7716dbc401d9a437890d@syzkaller.appspotmail.com
Link: https://lore.kernel.org/r/20210517164715.f537da276d17.Id05f40ec8761d6a8cc2df87f1aa09c651988a586@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 214404a558fb6..648696b49f897 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1442,7 +1442,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
 	rcu_read_lock();
 	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
 
-	if (WARN_ON_ONCE(!chanctx_conf)) {
+	if (!chanctx_conf) {
 		rcu_read_unlock();
 		return NULL;
 	}
-- 
GitLab


From bd18de517923903a177508fc8813f44e717b1c00 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 17:04:31 +0200
Subject: [PATCH 1870/3804] mac80211_hwsim: drop pending frames on stop

Syzbot reports that we may be able to get into a situation where
mac80211 has pending ACK frames on shutdown with hwsim. It appears
that the reason for this is that syzbot uses the wmediumd hooks to
intercept/injection frames, and may shut down hwsim, removing the
radio(s), while frames are pending in the air simulation.

Clean out the pending queue when the interface is stopped, after
this the frames can't be reported back to mac80211 properly anyway.

Reported-by: syzbot+a063bbf0b15737362592@syzkaller.appspotmail.com
Link: https://lore.kernel.org/r/20210517170429.b0f85ab0eda1.Ie42a6ec6b940c971f3441286aeaaae2fe368e29a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 51ce767eaf88e..7a6fd46d0c6e8 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1693,8 +1693,13 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw)
 static void mac80211_hwsim_stop(struct ieee80211_hw *hw)
 {
 	struct mac80211_hwsim_data *data = hw->priv;
+
 	data->started = false;
 	hrtimer_cancel(&data->beacon_timer);
+
+	while (!skb_queue_empty(&data->pending))
+		ieee80211_free_txskb(hw, skb_dequeue(&data->pending));
+
 	wiphy_dbg(hw->wiphy, "%s\n", __func__);
 }
 
-- 
GitLab


From 34fb4db5abc1fe6708522cbf13f637e0eefb1a50 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Wed, 5 May 2021 13:28:29 -0700
Subject: [PATCH 1871/3804] mac80211: correct
 ieee80211_iterate_active_interfaces_mtx() locking comments

Commit a05829a7222e ("cfg80211: avoid holding the RTNL when calling the
driver") dropped usage of RTNL here and replaced it with
hw->wiphy->mutex. But we didn't update the comments.

Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
Signed-off-by: Brian Norris <briannorris@chromium.org>
Link: https://lore.kernel.org/r/20210505202829.1039400-1-briannorris@chromium.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 445b66c6eb7e5..e7c59b4e2c44d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -5537,7 +5537,7 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
  *
  * This function iterates over the interfaces associated with a given
  * hardware that are currently active and calls the callback for them.
- * This version can only be used while holding the RTNL.
+ * This version can only be used while holding the wiphy mutex.
  *
  * @hw: the hardware struct of which the interfaces should be iterated over
  * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags
-- 
GitLab


From a64b6a25dd9f984ed05fade603a00e2eae787d2f Mon Sep 17 00:00:00 2001
From: Du Cheng <ducheng2@gmail.com>
Date: Wed, 28 Apr 2021 14:39:41 +0800
Subject: [PATCH 1872/3804] cfg80211: call cfg80211_leave_ocb when switching
 away from OCB

If the userland switches back-and-forth between NL80211_IFTYPE_OCB and
NL80211_IFTYPE_ADHOC via send_msg(NL80211_CMD_SET_INTERFACE), there is a
chance where the cleanup cfg80211_leave_ocb() is not called. This leads
to initialization of in-use memory (e.g. init u.ibss while in-use by
u.ocb) due to a shared struct/union within ieee80211_sub_if_data:

struct ieee80211_sub_if_data {
    ...
    union {
        struct ieee80211_if_ap ap;
        struct ieee80211_if_vlan vlan;
        struct ieee80211_if_managed mgd;
        struct ieee80211_if_ibss ibss; // <- shares address
        struct ieee80211_if_mesh mesh;
        struct ieee80211_if_ocb ocb; // <- shares address
        struct ieee80211_if_mntr mntr;
        struct ieee80211_if_nan nan;
    } u;
    ...
}

Therefore add handling of otype == NL80211_IFTYPE_OCB, during
cfg80211_change_iface() to perform cleanup when leaving OCB mode.

link to syzkaller bug:
https://syzkaller.appspot.com/bug?id=0612dbfa595bf4b9b680ff7b4948257b8e3732d5

Reported-by: syzbot+105896fac213f26056f9@syzkaller.appspotmail.com
Signed-off-by: Du Cheng <ducheng2@gmail.com>
Link: https://lore.kernel.org/r/20210428063941.105161-1-ducheng2@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 7ec021a610aeb..18dba3d7c638b 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 		case NL80211_IFTYPE_MESH_POINT:
 			/* mesh should be handled? */
 			break;
+		case NL80211_IFTYPE_OCB:
+			cfg80211_leave_ocb(rdev, dev);
+			break;
 		default:
 			break;
 		}
-- 
GitLab


From b90f51e8e1f5014c01c82a7bf4c611643d0a8bcb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 26 Apr 2021 21:28:02 +0200
Subject: [PATCH 1873/3804] staging: rtl8723bs: fix monitor netdev
 register/unregister

Due to the locking changes and callbacks happening inside
cfg80211, we need to use cfg80211 versions of the register
and unregister functions if called within cfg80211 methods,
otherwise deadlocks occur.

Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20210426212801.3d902cc9e6f4.Ie0b1e0c545920c61400a4b7d0f384ea61feb645a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index a6d731e959a28..36a1319ec4bf2 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -2284,7 +2284,7 @@ static int rtw_cfg80211_add_monitor_if(struct adapter *padapter, char *name, str
 	mon_wdev->iftype = NL80211_IFTYPE_MONITOR;
 	mon_ndev->ieee80211_ptr = mon_wdev;
 
-	ret = register_netdevice(mon_ndev);
+	ret = cfg80211_register_netdevice(mon_ndev);
 	if (ret) {
 		goto out;
 	}
@@ -2360,7 +2360,7 @@ static int cfg80211_rtw_del_virtual_intf(struct wiphy *wiphy,
 	adapter = rtw_netdev_priv(ndev);
 	pwdev_priv = adapter_wdev_data(adapter);
 
-	unregister_netdevice(ndev);
+	cfg80211_unregister_netdevice(ndev);
 
 	if (ndev == pwdev_priv->pmon_ndev) {
 		pwdev_priv->pmon_ndev = NULL;
-- 
GitLab


From e298aa358f0ca658406d524b6639fe389cb6e11e Mon Sep 17 00:00:00 2001
From: Du Cheng <ducheng2@gmail.com>
Date: Mon, 10 May 2021 12:16:49 +0800
Subject: [PATCH 1874/3804] mac80211: fix skb length check in
 ieee80211_scan_rx()

Replace hard-coded compile-time constants for header length check
with dynamic determination based on the frame type. Otherwise, we
hit a validation WARN_ON in cfg80211 later.

Fixes: cd418ba63f0c ("mac80211: convert S1G beacon to scan results")
Reported-by: syzbot+405843667e93b9790fc1@syzkaller.appspotmail.com
Signed-off-by: Du Cheng <ducheng2@gmail.com>
Link: https://lore.kernel.org/r/20210510041649.589754-1-ducheng2@gmail.com
[style fixes, reword commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/scan.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index d4cc9ac2d7033..6b50cb5e0e3cc 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -251,13 +251,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
 	struct ieee80211_mgmt *mgmt = (void *)skb->data;
 	struct ieee80211_bss *bss;
 	struct ieee80211_channel *channel;
+	size_t min_hdr_len = offsetof(struct ieee80211_mgmt,
+				      u.probe_resp.variable);
+
+	if (!ieee80211_is_probe_resp(mgmt->frame_control) &&
+	    !ieee80211_is_beacon(mgmt->frame_control) &&
+	    !ieee80211_is_s1g_beacon(mgmt->frame_control))
+		return;
 
 	if (ieee80211_is_s1g_beacon(mgmt->frame_control)) {
-		if (skb->len < 15)
-			return;
-	} else if (skb->len < 24 ||
-		 (!ieee80211_is_probe_resp(mgmt->frame_control) &&
-		  !ieee80211_is_beacon(mgmt->frame_control)))
+		if (ieee80211_is_s1g_short_beacon(mgmt->frame_control))
+			min_hdr_len = offsetof(struct ieee80211_ext,
+					       u.s1g_short_beacon.variable);
+		else
+			min_hdr_len = offsetof(struct ieee80211_ext,
+					       u.s1g_beacon);
+	}
+
+	if (skb->len < min_hdr_len)
 		return;
 
 	sdata1 = rcu_dereference(local->scan_sdata);
-- 
GitLab


From bddc0c411a45d3718ac535a070f349be8eca8d48 Mon Sep 17 00:00:00 2001
From: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
Date: Sun, 30 May 2021 15:32:26 +0200
Subject: [PATCH 1875/3804] mac80211: Fix NULL ptr deref for injected rate info

The commit cb17ed29a7a5 ("mac80211: parse radiotap header when selecting Tx
queue") moved the code to validate the radiotap header from
ieee80211_monitor_start_xmit to ieee80211_parse_tx_radiotap. This made is
possible to share more code with the new Tx queue selection code for
injected frames. But at the same time, it now required the call of
ieee80211_parse_tx_radiotap at the beginning of functions which wanted to
handle the radiotap header. And this broke the rate parser for radiotap
header parser.

The radiotap parser for rates is operating most of the time only on the
data in the actual radiotap header. But for the 802.11a/b/g rates, it must
also know the selected band from the chandef information. But this
information is only written to the ieee80211_tx_info at the end of the
ieee80211_monitor_start_xmit - long after ieee80211_parse_tx_radiotap was
already called. The info->band information was therefore always 0
(NL80211_BAND_2GHZ) when the parser code tried to access it.

For a 5GHz only device, injecting a frame with 802.11a rates would cause a
NULL pointer dereference because local->hw.wiphy->bands[NL80211_BAND_2GHZ]
would most likely have been NULL when the radiotap parser searched for the
correct rate index of the driver.

Cc: stable@vger.kernel.org
Reported-by: Ben Greear <greearb@candelatech.com>
Fixes: cb17ed29a7a5 ("mac80211: parse radiotap header when selecting Tx queue")
Signed-off-by: Mathy Vanhoef <Mathy.Vanhoef@kuleuven.be>
[sven@narfation.org: added commit message]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Link: https://lore.kernel.org/r/20210530133226.40587-1-sven@narfation.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  7 +++++-
 net/mac80211/tx.c      | 52 +++++++++++++++++++++++++++++-------------
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e7c59b4e2c44d..e89530d0d9c61 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6392,7 +6392,12 @@ bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
 
 /**
  * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header
- *				 of injected frames
+ *				 of injected frames.
+ *
+ * To accurately parse and take into account rate and retransmission fields,
+ * you must initialize the chandef field in the ieee80211_tx_info structure
+ * of the skb before calling this function.
+ *
  * @skb: packet injected by userspace
  * @dev: the &struct device of this 802.11 device
  */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 0b719f3d2dec7..2651498d05e8e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2014,6 +2014,26 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 	ieee80211_tx(sdata, sta, skb, false);
 }
 
+static bool ieee80211_validate_radiotap_len(struct sk_buff *skb)
+{
+	struct ieee80211_radiotap_header *rthdr =
+		(struct ieee80211_radiotap_header *)skb->data;
+
+	/* check for not even having the fixed radiotap header part */
+	if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
+		return false; /* too short to be possibly valid */
+
+	/* is it a header version we can trust to find length from? */
+	if (unlikely(rthdr->it_version))
+		return false; /* only version 0 is supported */
+
+	/* does the skb contain enough to deliver on the alleged length? */
+	if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
+		return false; /* skb too short for claimed rt header extent */
+
+	return true;
+}
+
 bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
 				 struct net_device *dev)
 {
@@ -2022,8 +2042,6 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
 	struct ieee80211_radiotap_header *rthdr =
 		(struct ieee80211_radiotap_header *) skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	struct ieee80211_supported_band *sband =
-		local->hw.wiphy->bands[info->band];
 	int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len,
 						   NULL);
 	u16 txflags;
@@ -2036,17 +2054,8 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
 	u8 vht_mcs = 0, vht_nss = 0;
 	int i;
 
-	/* check for not even having the fixed radiotap header part */
-	if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
-		return false; /* too short to be possibly valid */
-
-	/* is it a header version we can trust to find length from? */
-	if (unlikely(rthdr->it_version))
-		return false; /* only version 0 is supported */
-
-	/* does the skb contain enough to deliver on the alleged length? */
-	if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data)))
-		return false; /* skb too short for claimed rt header extent */
+	if (!ieee80211_validate_radiotap_len(skb))
+		return false;
 
 	info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT |
 		       IEEE80211_TX_CTL_DONTFRAG;
@@ -2186,6 +2195,9 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
 		return false;
 
 	if (rate_found) {
+		struct ieee80211_supported_band *sband =
+			local->hw.wiphy->bands[info->band];
+
 		info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT;
 
 		for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
@@ -2199,7 +2211,7 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
 		} else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) {
 			ieee80211_rate_set_vht(info->control.rates, vht_mcs,
 					       vht_nss);
-		} else {
+		} else if (sband) {
 			for (i = 0; i < sband->n_bitrates; i++) {
 				if (rate * 5 != sband->bitrates[i].bitrate)
 					continue;
@@ -2236,8 +2248,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
 		      IEEE80211_TX_CTL_INJECTED;
 
-	/* Sanity-check and process the injection radiotap header */
-	if (!ieee80211_parse_tx_radiotap(skb, dev))
+	/* Sanity-check the length of the radiotap header */
+	if (!ieee80211_validate_radiotap_len(skb))
 		goto fail;
 
 	/* we now know there is a radiotap header with a length we can use */
@@ -2351,6 +2363,14 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	ieee80211_select_queue_80211(sdata, skb, hdr);
 	skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority));
 
+	/*
+	 * Process the radiotap header. This will now take into account the
+	 * selected chandef above to accurately set injection rates and
+	 * retransmissions.
+	 */
+	if (!ieee80211_parse_tx_radiotap(skb, dev))
+		goto fail_rcu;
+
 	/* remove the injection radiotap header */
 	skb_pull(skb, len_rthdr);
 
-- 
GitLab


From 9a90ed065a155d13db0d0ffeaad5cc54e51c90c6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 27 May 2021 11:02:26 +0200
Subject: [PATCH 1876/3804] x86/thermal: Fix LVT thermal setup for SMI delivery
 mode

There are machines out there with added value crap^WBIOS which provide an
SMI handler for the local APIC thermal sensor interrupt. Out of reset,
the BSP on those machines has something like 0x200 in that APIC register
(timestamps left in because this whole issue is timing sensitive):

  [    0.033858] read lvtthmr: 0x330, val: 0x200

which means:

 - bit 16 - the interrupt mask bit is clear and thus that interrupt is enabled
 - bits [10:8] have 010b which means SMI delivery mode.

Now, later during boot, when the kernel programs the local APIC, it
soft-disables it temporarily through the spurious vector register:

  setup_local_APIC:

  	...

	/*
	 * If this comes from kexec/kcrash the APIC might be enabled in
	 * SPIV. Soft disable it before doing further initialization.
	 */
	value = apic_read(APIC_SPIV);
	value &= ~APIC_SPIV_APIC_ENABLED;
	apic_write(APIC_SPIV, value);

which means (from the SDM):

"10.4.7.2 Local APIC State After It Has Been Software Disabled

...

* The mask bits for all the LVT entries are set. Attempts to reset these
bits will be ignored."

And this happens too:

  [    0.124111] APIC: Switch to symmetric I/O mode setup
  [    0.124117] lvtthmr 0x200 before write 0xf to APIC 0xf0
  [    0.124118] lvtthmr 0x10200 after write 0xf to APIC 0xf0

This results in CPU 0 soft lockups depending on the placement in time
when the APIC soft-disable happens. Those soft lockups are not 100%
reproducible and the reason for that can only be speculated as no one
tells you what SMM does. Likely, it confuses the SMM code that the APIC
is disabled and the thermal interrupt doesn't doesn't fire at all,
leading to CPU 0 stuck in SMM forever...

Now, before

  4f432e8bb15b ("x86/mce: Get rid of mcheck_intel_therm_init()")

due to how the APIC_LVTTHMR was read before APIC initialization in
mcheck_intel_therm_init(), it would read the value with the mask bit 16
clear and then intel_init_thermal() would replicate it onto the APs and
all would be peachy - the thermal interrupt would remain enabled.

But that commit moved that reading to a later moment in
intel_init_thermal(), resulting in reading APIC_LVTTHMR on the BSP too
late and with its interrupt mask bit set.

Thus, revert back to the old behavior of reading the thermal LVT
register before the APIC gets initialized.

Fixes: 4f432e8bb15b ("x86/mce: Get rid of mcheck_intel_therm_init()")
Reported-by: James Feeney <james@nurealm.net>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Link: https://lkml.kernel.org/r/YKIqDdFNaXYd39wz@zn.tnic
---
 arch/x86/include/asm/thermal.h      |  4 +++-
 arch/x86/kernel/setup.c             |  9 +++++++++
 drivers/thermal/intel/therm_throt.c | 15 +++++++++++----
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/thermal.h b/arch/x86/include/asm/thermal.h
index ddbdefd5b94f1..91a7b6687c3b9 100644
--- a/arch/x86/include/asm/thermal.h
+++ b/arch/x86/include/asm/thermal.h
@@ -3,11 +3,13 @@
 #define _ASM_X86_THERMAL_H
 
 #ifdef CONFIG_X86_THERMAL_VECTOR
+void therm_lvt_init(void);
 void intel_init_thermal(struct cpuinfo_x86 *c);
 bool x86_thermal_enabled(void);
 void intel_thermal_interrupt(void);
 #else
-static inline void intel_init_thermal(struct cpuinfo_x86 *c) { }
+static inline void therm_lvt_init(void)				{ }
+static inline void intel_init_thermal(struct cpuinfo_x86 *c)	{ }
 #endif
 
 #endif /* _ASM_X86_THERMAL_H */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 72920af0b3c01..ff653d608d5f7 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -44,6 +44,7 @@
 #include <asm/pci-direct.h>
 #include <asm/prom.h>
 #include <asm/proto.h>
+#include <asm/thermal.h>
 #include <asm/unwind.h>
 #include <asm/vsyscall.h>
 #include <linux/vmalloc.h>
@@ -1226,6 +1227,14 @@ void __init setup_arch(char **cmdline_p)
 
 	x86_init.timers.wallclock_init();
 
+	/*
+	 * This needs to run before setup_local_APIC() which soft-disables the
+	 * local APIC temporarily and that masks the thermal LVT interrupt,
+	 * leading to softlockups on machines which have configured SMI
+	 * interrupt delivery.
+	 */
+	therm_lvt_init();
+
 	mcheck_init();
 
 	register_refined_jiffies(CLOCK_TICK_RATE);
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index f8e882592ba5d..99abdc03c44ce 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -621,6 +621,17 @@ bool x86_thermal_enabled(void)
 	return atomic_read(&therm_throt_en);
 }
 
+void __init therm_lvt_init(void)
+{
+	/*
+	 * This function is only called on boot CPU. Save the init thermal
+	 * LVT value on BSP and use that value to restore APs' thermal LVT
+	 * entry BIOS programmed later
+	 */
+	if (intel_thermal_supported(&boot_cpu_data))
+		lvtthmr_init = apic_read(APIC_LVTTHMR);
+}
+
 void intel_init_thermal(struct cpuinfo_x86 *c)
 {
 	unsigned int cpu = smp_processor_id();
@@ -630,10 +641,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 	if (!intel_thermal_supported(c))
 		return;
 
-	/* On the BSP? */
-	if (c == &boot_cpu_data)
-		lvtthmr_init = apic_read(APIC_LVTTHMR);
-
 	/*
 	 * First check if its enabled already, in which case there might
 	 * be some SMM goo which handles it, so we can't even put a handler
-- 
GitLab


From 59cc84c802eb923805e7bba425976a3df5ce35d8 Mon Sep 17 00:00:00 2001
From: Frederic Barrat <fbarrat@linux.ibm.com>
Date: Wed, 26 May 2021 16:45:40 +0200
Subject: [PATCH 1877/3804] Revert "powerpc/kernel/iommu: Align size for
 IOMMU_PAGE_SIZE() to save TCEs"

This reverts commit 3c0468d4451eb6b4f6604370639f163f9637a479.

That commit was breaking alignment guarantees for the DMA address when
allocating coherent mappings, as described in
Documentation/core-api/dma-api-howto.rst

It was also noticed by Mellanox' driver:
[ 1515.763621] mlx5_core c002:01:00.0: mlx5_frag_buf_alloc_node:146:(pid 13402): unexpected map alignment: 0x0800000000c61000, page_shift=16
[ 1515.763635] mlx5_core c002:01:00.0: mlx5_cqwq_create:181:(pid
13402): mlx5_frag_buf_alloc_node() failed, -12

Fixes: 3c0468d4451e ("powerpc/kernel/iommu: Align size for  IOMMU_PAGE_SIZE() to save TCEs")
Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210526144540.117795-1-fbarrat@linux.ibm.com
---
 arch/powerpc/kernel/iommu.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 57d6b85e9b964..2af89a5e379f2 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -898,7 +898,6 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
 	unsigned int order;
 	unsigned int nio_pages, io_order;
 	struct page *page;
-	size_t size_io = size;
 
 	size = PAGE_ALIGN(size);
 	order = get_order(size);
@@ -925,9 +924,8 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
 	memset(ret, 0, size);
 
 	/* Set up tces to cover the allocated range */
-	size_io = IOMMU_PAGE_ALIGN(size_io, tbl);
-	nio_pages = size_io >> tbl->it_page_shift;
-	io_order = get_iommu_order(size_io, tbl);
+	nio_pages = size >> tbl->it_page_shift;
+	io_order = get_iommu_order(size, tbl);
 	mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
 			      mask >> tbl->it_page_shift, io_order, 0);
 	if (mapping == DMA_MAPPING_ERROR) {
@@ -942,9 +940,10 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
 			 void *vaddr, dma_addr_t dma_handle)
 {
 	if (tbl) {
-		size_t size_io = IOMMU_PAGE_ALIGN(size, tbl);
-		unsigned int nio_pages = size_io >> tbl->it_page_shift;
+		unsigned int nio_pages;
 
+		size = PAGE_ALIGN(size);
+		nio_pages = size >> tbl->it_page_shift;
 		iommu_free(tbl, dma_handle, nio_pages);
 		size = PAGE_ALIGN(size);
 		free_pages((unsigned long)vaddr, get_order(size));
-- 
GitLab


From 515da6f4295c2c42b8c54572cce3d2dd1167c41e Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Mon, 31 May 2021 14:13:26 +0200
Subject: [PATCH 1878/3804] scsi: target: core: Fix warning on realtime kernels

On realtime kernels, spin_lock_irq*(spinlock_t) do not disable the
interrupts, a call to irqs_disabled() will return false thus firing a
warning in __transport_wait_for_tasks().

Remove the warning and also replace assert_spin_locked() with
lockdep_assert_held()

Link: https://lore.kernel.org/r/20210531121326.3649-1-mlombard@redhat.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 05d7ffd59df65..7e35eddd9eb70 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -3121,9 +3121,7 @@ __transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop,
 	__releases(&cmd->t_state_lock)
 	__acquires(&cmd->t_state_lock)
 {
-
-	assert_spin_locked(&cmd->t_state_lock);
-	WARN_ON_ONCE(!irqs_disabled());
+	lockdep_assert_held(&cmd->t_state_lock);
 
 	if (fabric_stop)
 		cmd->transport_state |= CMD_T_FABRIC_STOP;
-- 
GitLab


From 696770e72f2b42b92ea0a4a98087fb2ba376417a Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 28 May 2021 14:22:40 -0700
Subject: [PATCH 1879/3804] scsi: lpfc: Fix failure to transmit ABTS on FC link

The abort_cmd_ia flag in an abort wqe describes whether an ABTS basic link
service should be transmitted on the FC link or not.  Code added in
lpfc_sli4_issue_abort_iotag() set the abort_cmd_ia flag incorrectly,
surpressing ABTS transmission.

A previous LPFC change to build an abort wqe inverted prior logic that
determined whether an ABTS was to be issued on the FC link.

Revert this logic to its proper state.

Link: https://lore.kernel.org/r/20210528212240.11387-1-jsmart2021@gmail.com
Fixes: db7531d2b377 ("scsi: lpfc: Convert abort handling to SLI-3 and SLI-4 handlers")
Cc: <stable@vger.kernel.org> # v5.11+
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 573c8599d71c2..fc3682f15f509 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -20589,10 +20589,8 @@ lpfc_sli4_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	abtswqe = &abtsiocb->wqe;
 	memset(abtswqe, 0, sizeof(*abtswqe));
 
-	if (lpfc_is_link_up(phba))
+	if (!lpfc_is_link_up(phba))
 		bf_set(abort_cmd_ia, &abtswqe->abort_cmd, 1);
-	else
-		bf_set(abort_cmd_ia, &abtswqe->abort_cmd, 0);
 	bf_set(abort_cmd_criteria, &abtswqe->abort_cmd, T_XRI_TAG);
 	abtswqe->abort_cmd.rsrvd5 = 0;
 	abtswqe->abort_cmd.wqe_com.abort_tag = xritag;
-- 
GitLab


From 4ac06a1e013cf5fdd963317ffd3b968560f33bba Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Mon, 31 May 2021 09:21:38 +0200
Subject: [PATCH 1880/3804] nfc: fix NULL ptr dereference in
 llcp_sock_getname() after failed connect

It's possible to trigger NULL pointer dereference by local unprivileged
user, when calling getsockname() after failed bind() (e.g. the bind
fails because LLCP_SAP_MAX used as SAP):

  BUG: kernel NULL pointer dereference, address: 0000000000000000
  CPU: 1 PID: 426 Comm: llcp_sock_getna Not tainted 5.13.0-rc2-next-20210521+ #9
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1 04/01/2014
  Call Trace:
   llcp_sock_getname+0xb1/0xe0
   __sys_getpeername+0x95/0xc0
   ? lockdep_hardirqs_on_prepare+0xd5/0x180
   ? syscall_enter_from_user_mode+0x1c/0x40
   __x64_sys_getpeername+0x11/0x20
   do_syscall_64+0x36/0x70
   entry_SYSCALL_64_after_hwframe+0x44/0xae

This can be reproduced with Syzkaller C repro (bind followed by
getpeername):
https://syzkaller.appspot.com/x/repro.c?x=14def446e00000

Cc: <stable@vger.kernel.org>
Fixes: d646960f7986 ("NFC: Initial LLCP support")
Reported-by: syzbot+80fb126e7f7d8b1a5914@syzkaller.appspotmail.com
Reported-by: butt3rflyh4ck <butterflyhuangxx@gmail.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210531072138.5219-1-krzysztof.kozlowski@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/nfc/llcp_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 53dbe733f9981..6cfd30fc07985 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -110,6 +110,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
 	if (!llcp_sock->service_name) {
 		nfc_llcp_local_put(llcp_sock->local);
 		llcp_sock->local = NULL;
+		llcp_sock->dev = NULL;
 		ret = -ENOMEM;
 		goto put_dev;
 	}
@@ -119,6 +120,7 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
 		llcp_sock->local = NULL;
 		kfree(llcp_sock->service_name);
 		llcp_sock->service_name = NULL;
+		llcp_sock->dev = NULL;
 		ret = -EADDRINUSE;
 		goto put_dev;
 	}
-- 
GitLab


From 527ff9550682a3d08066a000435ffd8330bdd729 Mon Sep 17 00:00:00 2001
From: Stefan Binding <sbinding@opensource.cirrus.com>
Date: Mon, 31 May 2021 17:37:54 +0100
Subject: [PATCH 1881/3804] ALSA: hda/cirrus: Set Initial DMIC volume to -26 dB

Previously this fix was applied only to Bullseye variant laptops,
and should be applied to Cyborg and Warlock variants.

Fixes: 45b14fe200ba ("ALSA: hda/cirrus: Use CS8409 filter to fix abnormal sounds on Bullseye")
Signed-off-by: Stefan Binding <sbinding@opensource.cirrus.com>
Signed-off-by: Vitaly Rodionov <vitalyr@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210531163754.136736-1-vitalyr@opensource.cirrus.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_cirrus.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 726507d0b04ce..8629e84fef23d 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -2206,10 +2206,9 @@ static void cs8409_cs42l42_fixups(struct hda_codec *codec,
 		break;
 	case HDA_FIXUP_ACT_PROBE:
 
-		/* Set initial volume on Bullseye to -26 dB */
-		if (codec->fixup_id == CS8409_BULLSEYE)
-			snd_hda_codec_amp_init_stereo(codec, CS8409_CS42L42_DMIC_ADC_PIN_NID,
-					HDA_INPUT, 0, 0xff, 0x19);
+		/* Set initial DMIC volume to -26 dB */
+		snd_hda_codec_amp_init_stereo(codec, CS8409_CS42L42_DMIC_ADC_PIN_NID,
+				HDA_INPUT, 0, 0xff, 0x19);
 		snd_hda_gen_add_kctl(&spec->gen,
 			NULL, &cs8409_cs42l42_hp_volume_mixer);
 		snd_hda_gen_add_kctl(&spec->gen,
-- 
GitLab


From 901be145a46eb79879367d853194346a549e623d Mon Sep 17 00:00:00 2001
From: Carlos M <carlos.marr.pz@gmail.com>
Date: Mon, 31 May 2021 22:20:26 +0200
Subject: [PATCH 1882/3804] ALSA: hda: Fix for mute key LED for HP Pavilion
 15-CK0xx

For the HP Pavilion 15-CK0xx, with audio subsystem ID 0x103c:0x841c,
adding a line in patch_realtek.c to apply the ALC269_FIXUP_HP_MUTE_LED_MIC3
fix activates the mute key LED.

Signed-off-by: Carlos M <carlos.marr.pz@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210531202026.35427-1-carlos.marr.pz@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 61a60c420f6fd..43e37145eb5d9 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8303,6 +8303,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x82bf, "HP G3 mini", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+	SND_PCI_QUIRK(0x103c, 0x841c, "HP Pavilion 15-CK0xx", ALC269_FIXUP_HP_MUTE_LED_MIC3),
 	SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
 	SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN),
 	SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
-- 
GitLab


From 0e5cb7770684b4c81bcc63f4675e488f9a0e31eb Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sat, 27 Feb 2021 10:23:45 +0000
Subject: [PATCH 1883/3804] irqchip/gic: Split vGIC probing information from
 the GIC code

The vGIC advertising code is unsurprisingly very much tied to
the GIC implementations. However, we are about to extend the
support to lesser implementations.

Let's dissociate the vgic registration from the GIC code and
move it into KVM, where it makes a bit more sense. This also
allows us to mark the gic_kvm_info structures as __initdata.

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-init.c        | 18 +++++++++--
 drivers/irqchip/irq-gic-common.c       | 13 --------
 drivers/irqchip/irq-gic-common.h       |  2 --
 drivers/irqchip/irq-gic-v3.c           |  6 ++--
 drivers/irqchip/irq-gic.c              |  6 ++--
 include/linux/irqchip/arm-gic-common.h | 25 +---------------
 include/linux/irqchip/arm-vgic-info.h  | 41 ++++++++++++++++++++++++++
 7 files changed, 63 insertions(+), 48 deletions(-)
 create mode 100644 include/linux/irqchip/arm-vgic-info.h

diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 58cbda00e56d9..2fdb65529594d 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+static struct gic_kvm_info *gic_kvm_info;
+
+void __init vgic_set_kvm_info(const struct gic_kvm_info *info)
+{
+	BUG_ON(gic_kvm_info != NULL);
+	gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (gic_kvm_info)
+		*gic_kvm_info = *info;
+}
+
 /**
  * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
  *
@@ -509,10 +519,8 @@ void kvm_vgic_init_cpu_hardware(void)
  */
 int kvm_vgic_hyp_init(void)
 {
-	const struct gic_kvm_info *gic_kvm_info;
 	int ret;
 
-	gic_kvm_info = gic_get_kvm_info();
 	if (!gic_kvm_info)
 		return -ENODEV;
 
@@ -536,10 +544,14 @@ int kvm_vgic_hyp_init(void)
 		ret = -ENODEV;
 	}
 
+	kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+
+	kfree(gic_kvm_info);
+	gic_kvm_info = NULL;
+
 	if (ret)
 		return ret;
 
-	kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
 	ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
 				 vgic_maintenance_handler,
 				 "vgic", kvm_get_running_vcpus());
diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index f47b41dfd0238..a610821c8ff2a 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -12,19 +12,6 @@
 
 static DEFINE_RAW_SPINLOCK(irq_controller_lock);
 
-static const struct gic_kvm_info *gic_kvm_info;
-
-const struct gic_kvm_info *gic_get_kvm_info(void)
-{
-	return gic_kvm_info;
-}
-
-void gic_set_kvm_info(const struct gic_kvm_info *info)
-{
-	BUG_ON(gic_kvm_info != NULL);
-	gic_kvm_info = info;
-}
-
 void gic_enable_of_quirks(const struct device_node *np,
 			  const struct gic_quirk *quirks, void *data)
 {
diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h
index ccba8b0fe0f58..27e3d4ed4f328 100644
--- a/drivers/irqchip/irq-gic-common.h
+++ b/drivers/irqchip/irq-gic-common.h
@@ -28,6 +28,4 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
 void gic_enable_of_quirks(const struct device_node *np,
 			  const struct gic_quirk *quirks, void *data);
 
-void gic_set_kvm_info(const struct gic_kvm_info *info);
-
 #endif /* _IRQ_GIC_COMMON_H */
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 37a23aa6de37c..453fc425eede2 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -103,7 +103,7 @@ EXPORT_SYMBOL(gic_nonsecure_priorities);
 /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
 static refcount_t *ppi_nmi_refs;
 
-static struct gic_kvm_info gic_v3_kvm_info;
+static struct gic_kvm_info gic_v3_kvm_info __initdata;
 static DEFINE_PER_CPU(bool, has_rss);
 
 #define MPIDR_RS(mpidr)			(((mpidr) & 0xF0UL) >> 4)
@@ -1852,7 +1852,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
 
 	gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
 	gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid;
-	gic_set_kvm_info(&gic_v3_kvm_info);
+	vgic_set_kvm_info(&gic_v3_kvm_info);
 }
 
 static int __init gic_of_init(struct device_node *node, struct device_node *parent)
@@ -2168,7 +2168,7 @@ static void __init gic_acpi_setup_kvm_info(void)
 
 	gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis;
 	gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid;
-	gic_set_kvm_info(&gic_v3_kvm_info);
+	vgic_set_kvm_info(&gic_v3_kvm_info);
 }
 
 static int __init
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b1d9c22caf2e6..2de9ec8ece0c2 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -119,7 +119,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
 
 static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly;
 
-static struct gic_kvm_info gic_v2_kvm_info;
+static struct gic_kvm_info gic_v2_kvm_info __initdata;
 
 static DEFINE_PER_CPU(u32, sgi_intid);
 
@@ -1451,7 +1451,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node)
 		return;
 
 	if (static_branch_likely(&supports_deactivate_key))
-		gic_set_kvm_info(&gic_v2_kvm_info);
+		vgic_set_kvm_info(&gic_v2_kvm_info);
 }
 
 int __init
@@ -1618,7 +1618,7 @@ static void __init gic_acpi_setup_kvm_info(void)
 
 	gic_v2_kvm_info.maint_irq = irq;
 
-	gic_set_kvm_info(&gic_v2_kvm_info);
+	vgic_set_kvm_info(&gic_v2_kvm_info);
 }
 
 static int __init gic_v2_acpi_init(union acpi_subtable_headers *header,
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
index fa8c0455c3523..1177f3a1aed5d 100644
--- a/include/linux/irqchip/arm-gic-common.h
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -7,8 +7,7 @@
 #ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H
 #define __LINUX_IRQCHIP_ARM_GIC_COMMON_H
 
-#include <linux/types.h>
-#include <linux/ioport.h>
+#include <linux/irqchip/arm-vgic-info.h>
 
 #define GICD_INT_DEF_PRI		0xa0
 #define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
@@ -16,28 +15,6 @@
 					(GICD_INT_DEF_PRI << 8) |\
 					GICD_INT_DEF_PRI)
 
-enum gic_type {
-	GIC_V2,
-	GIC_V3,
-};
-
-struct gic_kvm_info {
-	/* GIC type */
-	enum gic_type	type;
-	/* Virtual CPU interface */
-	struct resource vcpu;
-	/* Interrupt number */
-	unsigned int	maint_irq;
-	/* Virtual control interface */
-	struct resource vctrl;
-	/* vlpi support */
-	bool		has_v4;
-	/* rvpeid support */
-	bool		has_v4_1;
-};
-
-const struct gic_kvm_info *gic_get_kvm_info(void);
-
 struct irq_domain;
 struct fwnode_handle;
 int gicv2m_init(struct fwnode_handle *parent_handle,
diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h
new file mode 100644
index 0000000000000..a25d4da5697d4
--- /dev/null
+++ b/include/linux/irqchip/arm-vgic-info.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * include/linux/irqchip/arm-vgic-info.h
+ *
+ * Copyright (C) 2016 ARM Limited, All Rights Reserved.
+ */
+#ifndef __LINUX_IRQCHIP_ARM_VGIC_INFO_H
+#define __LINUX_IRQCHIP_ARM_VGIC_INFO_H
+
+#include <linux/types.h>
+#include <linux/ioport.h>
+
+enum gic_type {
+	/* Full GICv2 */
+	GIC_V2,
+	/* Full GICv3, optionally with v2 compat */
+	GIC_V3,
+};
+
+struct gic_kvm_info {
+	/* GIC type */
+	enum gic_type	type;
+	/* Virtual CPU interface */
+	struct resource vcpu;
+	/* Interrupt number */
+	unsigned int	maint_irq;
+	/* Virtual control interface */
+	struct resource vctrl;
+	/* vlpi support */
+	bool		has_v4;
+	/* rvpeid support */
+	bool		has_v4_1;
+};
+
+#ifdef CONFIG_KVM
+void vgic_set_kvm_info(const struct gic_kvm_info *info);
+#else
+static inline void vgic_set_kvm_info(const struct gic_kvm_info *info) {}
+#endif
+
+#endif
-- 
GitLab


From 74501499d4e0d4ba59ab2bc6be1873716549169d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 19 Feb 2021 16:39:31 +0000
Subject: [PATCH 1884/3804] KVM: arm64: Handle physical FIQ as an IRQ while
 running a guest

As we we now entertain the possibility of FIQ being used on the host,
treat the signalling of a FIQ while running a guest as an IRQ,
causing an exit instead of a HYP panic.

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/hyp-entry.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 5f49df4ffdd86..9aa9b73475c95 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -76,6 +76,7 @@ el1_trap:
 	b	__guest_exit
 
 el1_irq:
+el1_fiq:
 	get_vcpu_ptr	x1, x0
 	mov	x0, #ARM_EXCEPTION_IRQ
 	b	__guest_exit
@@ -131,7 +132,6 @@ SYM_CODE_END(\label)
 	invalid_vector	el2t_error_invalid
 	invalid_vector	el2h_irq_invalid
 	invalid_vector	el2h_fiq_invalid
-	invalid_vector	el1_fiq_invalid
 
 	.ltorg
 
@@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector)
 
 	valid_vect	el1_sync		// Synchronous 64-bit EL1
 	valid_vect	el1_irq			// IRQ 64-bit EL1
-	invalid_vect	el1_fiq_invalid		// FIQ 64-bit EL1
+	valid_vect	el1_fiq			// FIQ 64-bit EL1
 	valid_vect	el1_error		// Error 64-bit EL1
 
 	valid_vect	el1_sync		// Synchronous 32-bit EL1
 	valid_vect	el1_irq			// IRQ 32-bit EL1
-	invalid_vect	el1_fiq_invalid		// FIQ 32-bit EL1
+	valid_vect	el1_fiq			// FIQ 32-bit EL1
 	valid_vect	el1_error		// Error 32-bit EL1
 SYM_CODE_END(__kvm_hyp_vector)
 
-- 
GitLab


From 669062d2a1aa36661b490683fe17810aa24a9cfb Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 28 Feb 2021 11:09:59 +0000
Subject: [PATCH 1885/3804] KVM: arm64: vgic: Be tolerant to the lack of
 maintenance interrupt masking

As it turns out, not all the interrupt controllers are able to
expose a vGIC maintenance interrupt that can be independently
enabled/disabled.

And to be fair, it doesn't really matter as all we require is
for the interrupt to kick us out of guest mode out way or another.

To that effect, add gic_kvm_info.no_maint_irq_mask for an interrupt
controller to advertise the lack of masking.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-init.c       | 8 +++++++-
 include/linux/irqchip/arm-vgic-info.h | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 2fdb65529594d..6752d084934d4 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -519,12 +519,15 @@ void kvm_vgic_init_cpu_hardware(void)
  */
 int kvm_vgic_hyp_init(void)
 {
+	bool has_mask;
 	int ret;
 
 	if (!gic_kvm_info)
 		return -ENODEV;
 
-	if (!gic_kvm_info->maint_irq) {
+	has_mask = !gic_kvm_info->no_maint_irq_mask;
+
+	if (has_mask && !gic_kvm_info->maint_irq) {
 		kvm_err("No vgic maintenance irq\n");
 		return -ENXIO;
 	}
@@ -552,6 +555,9 @@ int kvm_vgic_hyp_init(void)
 	if (ret)
 		return ret;
 
+	if (!has_mask)
+		return 0;
+
 	ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
 				 vgic_maintenance_handler,
 				 "vgic", kvm_get_running_vcpus());
diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h
index a25d4da5697d4..7c0d08ebb82c7 100644
--- a/include/linux/irqchip/arm-vgic-info.h
+++ b/include/linux/irqchip/arm-vgic-info.h
@@ -24,6 +24,8 @@ struct gic_kvm_info {
 	struct resource vcpu;
 	/* Interrupt number */
 	unsigned int	maint_irq;
+	/* No interrupt mask, no need to use the above field */
+	bool		no_maint_irq_mask;
 	/* Virtual control interface */
 	struct resource vctrl;
 	/* vlpi support */
-- 
GitLab


From f6c3e24fb721dda247f6691c809d6e6c413f22c7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 15 Mar 2021 21:56:47 +0000
Subject: [PATCH 1886/3804] KVM: arm64: vgic: Let an interrupt controller
 advertise lack of HW deactivation

The vGIC, as architected by ARM, allows a virtual interrupt to
trigger the deactivation of a physical interrupt. This allows
the following interrupt to be delivered without requiring an exit.

However, some implementations have choosen not to implement this,
meaning that we will need some unsavoury workarounds to deal with this.

On detecting such a case, taint the kernel and spit a nastygram.
We'll deal with this in later patches.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-init.c       | 10 ++++++++++
 include/kvm/arm_vgic.h                |  3 +++
 include/linux/irqchip/arm-vgic-info.h |  2 ++
 3 files changed, 15 insertions(+)

diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 6752d084934d4..340c51d87677c 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -532,6 +532,16 @@ int kvm_vgic_hyp_init(void)
 		return -ENXIO;
 	}
 
+	/*
+	 * If we get one of these oddball non-GICs, taint the kernel,
+	 * as we have no idea of how they *really* behave.
+	 */
+	if (gic_kvm_info->no_hw_deactivation) {
+		kvm_info("Non-architectural vgic, tainting kernel\n");
+		add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+		kvm_vgic_global_state.no_hw_deactivation = true;
+	}
+
 	switch (gic_kvm_info->type) {
 	case GIC_V2:
 		ret = vgic_v2_probe(gic_kvm_info);
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ec621180ef094..e45b26e8d4799 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -72,6 +72,9 @@ struct vgic_global {
 	bool			has_gicv4;
 	bool			has_gicv4_1;
 
+	/* Pseudo GICv3 from outer space */
+	bool			no_hw_deactivation;
+
 	/* GIC system register CPU interface */
 	struct static_key_false gicv3_cpuif;
 
diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h
index 7c0d08ebb82c7..a75b2c7de69d0 100644
--- a/include/linux/irqchip/arm-vgic-info.h
+++ b/include/linux/irqchip/arm-vgic-info.h
@@ -32,6 +32,8 @@ struct gic_kvm_info {
 	bool		has_v4;
 	/* rvpeid support */
 	bool		has_v4_1;
+	/* Deactivation impared, subpar stuff */
+	bool		no_hw_deactivation;
 };
 
 #ifdef CONFIG_KVM
-- 
GitLab


From db75f1a33f82ad332b6e139c5960e01999969d2c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 1 Mar 2021 17:39:39 +0000
Subject: [PATCH 1887/3804] KVM: arm64: vgic: move irq->get_input_level into an
 ops structure

We already have the option to attach a callback to an interrupt
to retrieve its pending state. As we are planning to expand this
facility, move this callback into its own data structure.

This will limit the size of individual interrupts as the ops
structures can be shared across multiple interrupts.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/arch_timer.c |  8 ++++++--
 arch/arm64/kvm/vgic/vgic.c  | 14 +++++++-------
 include/kvm/arm_vgic.h      | 28 +++++++++++++++++-----------
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 74e0699661e90..e2288b6bf435c 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -1116,6 +1116,10 @@ bool kvm_arch_timer_get_input_level(int vintid)
 	return kvm_timer_should_fire(timer);
 }
 
+static struct irq_ops arch_timer_irq_ops = {
+	.get_input_level = kvm_arch_timer_get_input_level,
+};
+
 int kvm_timer_enable(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -1143,7 +1147,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 	ret = kvm_vgic_map_phys_irq(vcpu,
 				    map.direct_vtimer->host_timer_irq,
 				    map.direct_vtimer->irq.irq,
-				    kvm_arch_timer_get_input_level);
+				    &arch_timer_irq_ops);
 	if (ret)
 		return ret;
 
@@ -1151,7 +1155,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 		ret = kvm_vgic_map_phys_irq(vcpu,
 					    map.direct_ptimer->host_timer_irq,
 					    map.direct_ptimer->irq.irq,
-					    kvm_arch_timer_get_input_level);
+					    &arch_timer_irq_ops);
 	}
 
 	if (ret)
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 15b666200f0b4..111bff47e4710 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq)
 
 	BUG_ON(!irq->hw);
 
-	if (irq->get_input_level)
-		return irq->get_input_level(irq->intid);
+	if (irq->ops && irq->ops->get_input_level)
+		return irq->ops->get_input_level(irq->intid);
 
 	WARN_ON(irq_get_irqchip_state(irq->host_irq,
 				      IRQCHIP_STATE_PENDING,
@@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 /* @irq->irq_lock must be held */
 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 			    unsigned int host_irq,
-			    bool (*get_input_level)(int vindid))
+			    struct irq_ops *ops)
 {
 	struct irq_desc *desc;
 	struct irq_data *data;
@@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 	irq->hw = true;
 	irq->host_irq = host_irq;
 	irq->hwintid = data->hwirq;
-	irq->get_input_level = get_input_level;
+	irq->ops = ops;
 	return 0;
 }
 
@@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 {
 	irq->hw = false;
 	irq->hwintid = 0;
-	irq->get_input_level = NULL;
+	irq->ops = NULL;
 }
 
 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
-			  u32 vintid, bool (*get_input_level)(int vindid))
+			  u32 vintid, struct irq_ops *ops)
 {
 	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 	unsigned long flags;
@@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
 	BUG_ON(!irq);
 
 	raw_spin_lock_irqsave(&irq->irq_lock, flags);
-	ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level);
+	ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops);
 	raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
 	vgic_put_irq(vcpu->kvm, irq);
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index e45b26e8d4799..e5f06df000f23 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -92,6 +92,21 @@ enum vgic_irq_config {
 	VGIC_CONFIG_LEVEL
 };
 
+/*
+ * Per-irq ops overriding some common behavious.
+ *
+ * Always called in non-preemptible section and the functions can use
+ * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs.
+ */
+struct irq_ops {
+	/*
+	 * Callback function pointer to in-kernel devices that can tell us the
+	 * state of the input level of mapped level-triggered IRQ faster than
+	 * peaking into the physical GIC.
+	 */
+	bool (*get_input_level)(int vintid);
+};
+
 struct vgic_irq {
 	raw_spinlock_t irq_lock;	/* Protects the content of the struct */
 	struct list_head lpi_list;	/* Used to link all LPIs together */
@@ -129,16 +144,7 @@ struct vgic_irq {
 	u8 group;			/* 0 == group 0, 1 == group 1 */
 	enum vgic_irq_config config;	/* Level or edge */
 
-	/*
-	 * Callback function pointer to in-kernel devices that can tell us the
-	 * state of the input level of mapped level-triggered IRQ faster than
-	 * peaking into the physical GIC.
-	 *
-	 * Always called in non-preemptible section and the functions can use
-	 * kvm_arm_get_running_vcpu() to get the vcpu pointer for private
-	 * IRQs.
-	 */
-	bool (*get_input_level)(int vintid);
+	struct irq_ops *ops;
 
 	void *owner;			/* Opaque pointer to reserve an interrupt
 					   for in-kernel devices. */
@@ -355,7 +361,7 @@ void kvm_vgic_init_cpu_hardware(void);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 			bool level, void *owner);
 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
-			  u32 vintid, bool (*get_input_level)(int vindid));
+			  u32 vintid, struct irq_ops *ops);
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
 
-- 
GitLab


From 354920e79441c8a53ac73008b06d3b70ed06eb34 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 15 Mar 2021 13:11:58 +0000
Subject: [PATCH 1888/3804] KVM: arm64: vgic: Implement SW-driven deactivation

In order to deal with these systems that do not offer HW-based
deactivation of interrupts, let implement a SW-based approach:

- When the irq is queued into a LR, treat it as a pure virtual
  interrupt and set the EOI flag in the LR.

- When the interrupt state is read back from the LR, force a
  deactivation when the state is invalid (neither active nor
  pending)

Interrupts requiring such treatment get the VGIC_SW_RESAMPLE flag.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/vgic/vgic-v2.c | 19 +++++++++++++++----
 arch/arm64/kvm/vgic/vgic-v3.c | 19 +++++++++++++++----
 include/kvm/arm_vgic.h        | 10 ++++++++++
 3 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 11934c2af2f42..2c580204f1dc9 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 		 * If this causes us to lower the level, we have to also clear
 		 * the physical active state, since we will otherwise never be
 		 * told when the interrupt becomes asserted again.
+		 *
+		 * Another case is when the interrupt requires a helping hand
+		 * on deactivation (no HW deactivation, for example).
 		 */
-		if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) {
-			irq->line_level = vgic_get_phys_line_level(irq);
+		if (vgic_irq_is_mapped_level(irq)) {
+			bool resample = false;
+
+			if (val & GICH_LR_PENDING_BIT) {
+				irq->line_level = vgic_get_phys_line_level(irq);
+				resample = !irq->line_level;
+			} else if (vgic_irq_needs_resampling(irq) &&
+				   !(irq->active || irq->pending_latch)) {
+				resample = true;
+			}
 
-			if (!irq->line_level)
+			if (resample)
 				vgic_irq_set_phys_active(irq, false);
 		}
 
@@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 	if (irq->group)
 		val |= GICH_LR_GROUP1;
 
-	if (irq->hw) {
+	if (irq->hw && !vgic_irq_needs_resampling(irq)) {
 		val |= GICH_LR_HW;
 		val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
 		/*
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 41ecf219c3334..66004f61cd83d 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 		 * If this causes us to lower the level, we have to also clear
 		 * the physical active state, since we will otherwise never be
 		 * told when the interrupt becomes asserted again.
+		 *
+		 * Another case is when the interrupt requires a helping hand
+		 * on deactivation (no HW deactivation, for example).
 		 */
-		if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) {
-			irq->line_level = vgic_get_phys_line_level(irq);
+		if (vgic_irq_is_mapped_level(irq)) {
+			bool resample = false;
+
+			if (val & ICH_LR_PENDING_BIT) {
+				irq->line_level = vgic_get_phys_line_level(irq);
+				resample = !irq->line_level;
+			} else if (vgic_irq_needs_resampling(irq) &&
+				   !(irq->active || irq->pending_latch)) {
+				resample = true;
+			}
 
-			if (!irq->line_level)
+			if (resample)
 				vgic_irq_set_phys_active(irq, false);
 		}
 
@@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
 		}
 	}
 
-	if (irq->hw) {
+	if (irq->hw && !vgic_irq_needs_resampling(irq)) {
 		val |= ICH_LR_HW;
 		val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
 		/*
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index e5f06df000f23..e602d848fc1ab 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -99,6 +99,11 @@ enum vgic_irq_config {
  * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs.
  */
 struct irq_ops {
+	/* Per interrupt flags for special-cased interrupts */
+	unsigned long flags;
+
+#define VGIC_IRQ_SW_RESAMPLE	BIT(0)	/* Clear the active state for resampling */
+
 	/*
 	 * Callback function pointer to in-kernel devices that can tell us the
 	 * state of the input level of mapped level-triggered IRQ faster than
@@ -150,6 +155,11 @@ struct vgic_irq {
 					   for in-kernel devices. */
 };
 
+static inline bool vgic_irq_needs_resampling(struct vgic_irq *irq)
+{
+	return irq->ops && (irq->ops->flags & VGIC_IRQ_SW_RESAMPLE);
+}
+
 struct vgic_register_region;
 struct vgic_its;
 
-- 
GitLab


From 2f2f7e39dbb31aa1db13c490a4e47502497510fe Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 15 Mar 2021 14:05:21 +0000
Subject: [PATCH 1889/3804] KVM: arm64: timer: Refactor IRQ configuration

As we are about to add some more things to the timer IRQ
configuration, move this code out of the main timer init code
into its own set of functions.

No functional changes.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/arch_timer.c | 57 +++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index e2288b6bf435c..3cd170388d884 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -973,6 +973,35 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
 	return 0;
 }
 
+static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
+{
+	*flags = irq_get_trigger_type(virq);
+	if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
+		kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
+			virq);
+		*flags = IRQF_TRIGGER_LOW;
+	}
+}
+
+static int kvm_irq_init(struct arch_timer_kvm_info *info)
+{
+	if (info->virtual_irq <= 0) {
+		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
+			info->virtual_irq);
+		return -ENODEV;
+	}
+
+	host_vtimer_irq = info->virtual_irq;
+	kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
+
+	if (info->physical_irq > 0) {
+		host_ptimer_irq = info->physical_irq;
+		kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
+	}
+
+	return 0;
+}
+
 int kvm_timer_hyp_init(bool has_gic)
 {
 	struct arch_timer_kvm_info *info;
@@ -986,22 +1015,11 @@ int kvm_timer_hyp_init(bool has_gic)
 		return -ENODEV;
 	}
 
-	/* First, do the virtual EL1 timer irq */
-
-	if (info->virtual_irq <= 0) {
-		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
-			info->virtual_irq);
-		return -ENODEV;
-	}
-	host_vtimer_irq = info->virtual_irq;
+	err = kvm_irq_init(info);
+	if (err)
+		return err;
 
-	host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
-	if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
-	    host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
-		kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
-			host_vtimer_irq);
-		host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
-	}
+	/* First, do the virtual EL1 timer irq */
 
 	err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
 				 "kvm guest vtimer", kvm_get_running_vcpus());
@@ -1027,15 +1045,6 @@ int kvm_timer_hyp_init(bool has_gic)
 	/* Now let's do the physical EL1 timer irq */
 
 	if (info->physical_irq > 0) {
-		host_ptimer_irq = info->physical_irq;
-		host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
-		if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
-		    host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
-			kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
-				host_ptimer_irq);
-			host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
-		}
-
 		err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
 					 "kvm guest ptimer", kvm_get_running_vcpus());
 		if (err) {
-- 
GitLab


From 5f59229680f70078ac4c11db2ae89be087474144 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 15 Mar 2021 14:21:21 +0000
Subject: [PATCH 1890/3804] KVM: arm64: timer: Add support for SW-based
 deactivation

In order to deal with the lack of active state, we need to use
the mask/unmask primitives (after all, the active state is just an
additional mask on top of the normal one).

To avoid adding a bunch of ugly conditionals in the timer and vgic
code, let's use a timer-specific irqdomain to deal with the state
conversion. Yes, this is an unexpected use of irqdomains, but
there is no reason not to be just as creative as the designers
of the HW...

This involves overloading the vcpu_affinity, set_irqchip_state
and eoi callbacks so that the rest of the KVM code can continue
ignoring the oddities of the underlying platform.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/arch_timer.c | 105 ++++++++++++++++++++++++++++++++++--
 1 file changed, 101 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 3cd170388d884..3df67c1274898 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -9,6 +9,7 @@
 #include <linux/kvm_host.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/uaccess.h>
 
 #include <clocksource/arm_arch_timer.h>
@@ -973,6 +974,77 @@ static int kvm_timer_dying_cpu(unsigned int cpu)
 	return 0;
 }
 
+static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
+{
+	if (vcpu)
+		irqd_set_forwarded_to_vcpu(d);
+	else
+		irqd_clr_forwarded_to_vcpu(d);
+
+	return 0;
+}
+
+static int timer_irq_set_irqchip_state(struct irq_data *d,
+				       enum irqchip_irq_state which, bool val)
+{
+	if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
+		return irq_chip_set_parent_state(d, which, val);
+
+	if (val)
+		irq_chip_mask_parent(d);
+	else
+		irq_chip_unmask_parent(d);
+
+	return 0;
+}
+
+static void timer_irq_eoi(struct irq_data *d)
+{
+	if (!irqd_is_forwarded_to_vcpu(d))
+		irq_chip_eoi_parent(d);
+}
+
+static void timer_irq_ack(struct irq_data *d)
+{
+	d = d->parent_data;
+	if (d->chip->irq_ack)
+		d->chip->irq_ack(d);
+}
+
+static struct irq_chip timer_chip = {
+	.name			= "KVM",
+	.irq_ack		= timer_irq_ack,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= timer_irq_eoi,
+	.irq_set_type		= irq_chip_set_type_parent,
+	.irq_set_vcpu_affinity	= timer_irq_set_vcpu_affinity,
+	.irq_set_irqchip_state	= timer_irq_set_irqchip_state,
+};
+
+static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *arg)
+{
+	irq_hw_number_t hwirq = (uintptr_t)arg;
+
+	return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+					     &timer_chip, NULL);
+}
+
+static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs)
+{
+}
+
+static const struct irq_domain_ops timer_domain_ops = {
+	.alloc	= timer_irq_domain_alloc,
+	.free	= timer_irq_domain_free,
+};
+
+static struct irq_ops arch_timer_irq_ops = {
+	.get_input_level = kvm_arch_timer_get_input_level,
+};
+
 static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
 {
 	*flags = irq_get_trigger_type(virq);
@@ -985,6 +1057,8 @@ static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
 
 static int kvm_irq_init(struct arch_timer_kvm_info *info)
 {
+	struct irq_domain *domain = NULL;
+
 	if (info->virtual_irq <= 0) {
 		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
 			info->virtual_irq);
@@ -994,9 +1068,36 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
 	host_vtimer_irq = info->virtual_irq;
 	kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
 
+	if (kvm_vgic_global_state.no_hw_deactivation) {
+		struct fwnode_handle *fwnode;
+		struct irq_data *data;
+
+		fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
+		if (!fwnode)
+			return -ENOMEM;
+
+		/* Assume both vtimer and ptimer in the same parent */
+		data = irq_get_irq_data(host_vtimer_irq);
+		domain = irq_domain_create_hierarchy(data->domain, 0,
+						     NR_KVM_TIMERS, fwnode,
+						     &timer_domain_ops, NULL);
+		if (!domain) {
+			irq_domain_free_fwnode(fwnode);
+			return -ENOMEM;
+		}
+
+		arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
+		WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
+					    (void *)TIMER_VTIMER));
+	}
+
 	if (info->physical_irq > 0) {
 		host_ptimer_irq = info->physical_irq;
 		kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
+
+		if (domain)
+			WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
+						    (void *)TIMER_PTIMER));
 	}
 
 	return 0;
@@ -1125,10 +1226,6 @@ bool kvm_arch_timer_get_input_level(int vintid)
 	return kvm_timer_should_fire(timer);
 }
 
-static struct irq_ops arch_timer_irq_ops = {
-	.get_input_level = kvm_arch_timer_get_input_level,
-};
-
 int kvm_timer_enable(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-- 
GitLab


From b6ca556c352979d09659027dc1559fad15b72649 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 28 Feb 2021 11:11:47 +0000
Subject: [PATCH 1891/3804] irqchip/apple-aic: Advertise some level of vGICv3
 compatibility

The CPUs in the Apple M1 SoC partially implement a virtual GICv3
CPU interface, although one that is incapable of HW deactivation
of interrupts, nor masking the maintenance interrupt.

Advertise the support to KVM.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-apple-aic.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index c179e27062fd5..b8c06bd8659e9 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -50,6 +50,7 @@
 #include <linux/cpuhotplug.h>
 #include <linux/io.h>
 #include <linux/irqchip.h>
+#include <linux/irqchip/arm-vgic-info.h>
 #include <linux/irqdomain.h>
 #include <linux/limits.h>
 #include <linux/of_address.h>
@@ -787,6 +788,12 @@ static int aic_init_cpu(unsigned int cpu)
 	return 0;
 }
 
+static struct gic_kvm_info vgic_info __initdata = {
+	.type			= GIC_V3,
+	.no_maint_irq_mask	= true,
+	.no_hw_deactivation	= true,
+};
+
 static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
 {
 	int i;
@@ -843,6 +850,8 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
 			  "irqchip/apple-aic/ipi:starting",
 			  aic_init_cpu, NULL);
 
+	vgic_set_kvm_info(&vgic_info);
+
 	pr_info("Initialized with %d IRQs, %d FIQs, %d vIPIs\n",
 		irqc->nr_hw, AIC_NR_FIQ, AIC_NR_SWIPI);
 
-- 
GitLab


From 380d2b2d5a0491e47dfa250b40e3d849a922871d Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Fri, 28 May 2021 02:54:00 +0300
Subject: [PATCH 1892/3804] regulator: core: Add regulator_sync_voltage_rdev()

Some NVIDIA Tegra devices use a CPU soft-reset method for the reboot and
in this case we need to restore the coupled voltages to the state that is
suitable for hardware during boot. Add new regulator_sync_voltage_rdev()
helper which is needed by regulator drivers in order to sync voltage of
a coupled regulators.

Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/regulator/core.c         | 23 +++++++++++++++++++++++
 include/linux/regulator/driver.h |  1 +
 2 files changed, 24 insertions(+)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f192bf19492ed..ead0b6d2af45f 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -4105,6 +4105,29 @@ int regulator_set_voltage_time_sel(struct regulator_dev *rdev,
 }
 EXPORT_SYMBOL_GPL(regulator_set_voltage_time_sel);
 
+int regulator_sync_voltage_rdev(struct regulator_dev *rdev)
+{
+	int ret;
+
+	regulator_lock(rdev);
+
+	if (!rdev->desc->ops->set_voltage &&
+	    !rdev->desc->ops->set_voltage_sel) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* balance only, if regulator is coupled */
+	if (rdev->coupling_desc.n_coupled > 1)
+		ret = regulator_balance_voltage(rdev, PM_SUSPEND_ON);
+	else
+		ret = -EOPNOTSUPP;
+
+out:
+	regulator_unlock(rdev);
+	return ret;
+}
+
 /**
  * regulator_sync_voltage - re-apply last regulator output voltage
  * @regulator: regulator source
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 4ea520c248e9e..35e5a611db816 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -540,6 +540,7 @@ int regulator_set_current_limit_regmap(struct regulator_dev *rdev,
 int regulator_get_current_limit_regmap(struct regulator_dev *rdev);
 void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data);
 int regulator_set_ramp_delay_regmap(struct regulator_dev *rdev, int ramp_delay);
+int regulator_sync_voltage_rdev(struct regulator_dev *rdev);
 
 /*
  * Helper functions intended to be used by regulator drivers prior registering
-- 
GitLab


From fd6f17bade2147b31198ad00b22d3acf5a398aec Mon Sep 17 00:00:00 2001
From: Keqian Zhu <zhukeqian1@huawei.com>
Date: Fri, 7 May 2021 19:03:21 +0800
Subject: [PATCH 1893/3804] KVM: arm64: Remove the creation time's mapping of
 MMIO regions

The MMIO regions may be unmapped for many reasons and can be remapped
by stage2 fault path. Map MMIO regions at creation time becomes a
minor optimization and makes these two mapping path hard to sync.

Remove the mapping code while keep the useful sanity check.

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210507110322.23348-2-zhukeqian1@huawei.com
---
 arch/arm64/kvm/mmu.c | 38 +++-----------------------------------
 1 file changed, 3 insertions(+), 35 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c10207fed2f36..e982178c8c72b 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1346,7 +1346,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 {
 	hva_t hva = mem->userspace_addr;
 	hva_t reg_end = hva + mem->memory_size;
-	bool writable = !(mem->flags & KVM_MEM_READONLY);
 	int ret = 0;
 
 	if (change != KVM_MR_CREATE && change != KVM_MR_MOVE &&
@@ -1363,8 +1362,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	mmap_read_lock(current->mm);
 	/*
 	 * A memory region could potentially cover multiple VMAs, and any holes
-	 * between them, so iterate over all of them to find out if we can map
-	 * any of them right now.
+	 * between them, so iterate over all of them.
 	 *
 	 *     +--------------------------------------------+
 	 * +---------------+----------------+   +----------------+
@@ -1375,51 +1373,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	 */
 	do {
 		struct vm_area_struct *vma;
-		hva_t vm_start, vm_end;
 
 		vma = find_vma_intersection(current->mm, hva, reg_end);
 		if (!vma)
 			break;
 
-		/*
-		 * Take the intersection of this VMA with the memory region
-		 */
-		vm_start = max(hva, vma->vm_start);
-		vm_end = min(reg_end, vma->vm_end);
-
 		if (vma->vm_flags & VM_PFNMAP) {
-			gpa_t gpa = mem->guest_phys_addr +
-				    (vm_start - mem->userspace_addr);
-			phys_addr_t pa;
-
-			pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
-			pa += vm_start - vma->vm_start;
-
 			/* IO region dirty page logging not allowed */
 			if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
 				ret = -EINVAL;
-				goto out;
-			}
-
-			ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
-						    vm_end - vm_start,
-						    writable);
-			if (ret)
 				break;
+			}
 		}
-		hva = vm_end;
+		hva = min(reg_end, vma->vm_end);
 	} while (hva < reg_end);
 
-	if (change == KVM_MR_FLAGS_ONLY)
-		goto out;
-
-	spin_lock(&kvm->mmu_lock);
-	if (ret)
-		unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size);
-	else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
-		stage2_flush_memslot(kvm, memslot);
-	spin_unlock(&kvm->mmu_lock);
-out:
 	mmap_read_unlock(current->mm);
 	return ret;
 }
-- 
GitLab


From 2aa53d68cee6603931f73b28ef6b51ff3fde9397 Mon Sep 17 00:00:00 2001
From: Keqian Zhu <zhukeqian1@huawei.com>
Date: Fri, 7 May 2021 19:03:22 +0800
Subject: [PATCH 1894/3804] KVM: arm64: Try stage2 block mapping for host
 device MMIO

The MMIO region of a device maybe huge (GB level), try to use
block mapping in stage2 to speedup both map and unmap.

Compared to normal memory mapping, we should consider two more
points when try block mapping for MMIO region:

1. For normal memory mapping, the PA(host physical address) and
HVA have same alignment within PUD_SIZE or PMD_SIZE when we use
the HVA to request hugepage, so we don't need to consider PA
alignment when verifing block mapping. But for device memory
mapping, the PA and HVA may have different alignment.

2. For normal memory mapping, we are sure hugepage size properly
fit into vma, so we don't check whether the mapping size exceeds
the boundary of vma. But for device memory mapping, we should pay
attention to this.

This adds get_vma_page_shift() to get page shift for both normal
memory and device MMIO region, and check these two points when
selecting block mapping size for MMIO region.

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210507110322.23348-3-zhukeqian1@huawei.com
---
 arch/arm64/kvm/mmu.c | 61 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 51 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index e982178c8c72b..5742ba765ff95 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -822,6 +822,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
 	return PAGE_SIZE;
 }
 
+static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
+{
+	unsigned long pa;
+
+	if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP))
+		return huge_page_shift(hstate_vma(vma));
+
+	if (!(vma->vm_flags & VM_PFNMAP))
+		return PAGE_SHIFT;
+
+	VM_BUG_ON(is_vm_hugetlb_page(vma));
+
+	pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start);
+
+#ifndef __PAGETABLE_PMD_FOLDED
+	if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) &&
+	    ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start &&
+	    ALIGN(hva, PUD_SIZE) <= vma->vm_end)
+		return PUD_SHIFT;
+#endif
+
+	if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) &&
+	    ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start &&
+	    ALIGN(hva, PMD_SIZE) <= vma->vm_end)
+		return PMD_SHIFT;
+
+	return PAGE_SHIFT;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -853,7 +882,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 	}
 
-	/* Let's check if we will get back a huge page backed by hugetlbfs */
+	/*
+	 * Let's check if we will get back a huge page backed by hugetlbfs, or
+	 * get block mapping for device MMIO region.
+	 */
 	mmap_read_lock(current->mm);
 	vma = find_vma_intersection(current->mm, hva, hva + 1);
 	if (unlikely(!vma)) {
@@ -862,15 +894,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 	}
 
-	if (is_vm_hugetlb_page(vma))
-		vma_shift = huge_page_shift(hstate_vma(vma));
-	else
-		vma_shift = PAGE_SHIFT;
-
-	if (logging_active ||
-	    (vma->vm_flags & VM_PFNMAP)) {
+	/*
+	 * logging_active is guaranteed to never be true for VM_PFNMAP
+	 * memslots.
+	 */
+	if (logging_active) {
 		force_pte = true;
 		vma_shift = PAGE_SHIFT;
+	} else {
+		vma_shift = get_vma_page_shift(vma, hva);
 	}
 
 	switch (vma_shift) {
@@ -943,8 +975,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 
 	if (kvm_is_device_pfn(pfn)) {
+		/*
+		 * If the page was identified as device early by looking at
+		 * the VMA flags, vma_pagesize is already representing the
+		 * largest quantity we can map.  If instead it was mapped
+		 * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE
+		 * and must not be upgraded.
+		 *
+		 * In both cases, we don't let transparent_hugepage_adjust()
+		 * change things at the last minute.
+		 */
 		device = true;
-		force_pte = true;
 	} else if (logging_active && !write_fault) {
 		/*
 		 * Only actually map the page as writable if this was a write
@@ -965,7 +1006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 * If we are not forced to use page mapping, check if we are
 	 * backed by a THP and thus use block mapping if possible.
 	 */
-	if (vma_pagesize == PAGE_SIZE && !force_pte)
+	if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
 		vma_pagesize = transparent_hugepage_adjust(memslot, hva,
 							   &pfn, &fault_ipa);
 	if (writable)
-- 
GitLab


From ce1f25718b2520d0210c24f1e4145d75c5620c9f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 1 Jun 2021 11:35:06 +0100
Subject: [PATCH 1895/3804] ASoC: topology: Fix spelling mistake "vesion" ->
 "version"

There are spelling mistakes in comments. Fix them.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20210601103506.9477-1-colin.king@canonical.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-topology.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 73076d425efb3..4893a56208e08 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1901,7 +1901,7 @@ static void stream_caps_new_ver(struct snd_soc_tplg_stream_caps *dest,
  * @src: older version of pcm as a source
  * @pcm: latest version of pcm created from the source
  *
- * Support from vesion 4. User should free the returned pcm manually.
+ * Support from version 4. User should free the returned pcm manually.
  */
 static int pcm_new_ver(struct soc_tplg *tplg,
 		       struct snd_soc_tplg_pcm *src,
@@ -2089,7 +2089,7 @@ static void set_link_hw_format(struct snd_soc_dai_link *link,
  * @src: old version of phyical link config as a source
  * @link: latest version of physical link config created from the source
  *
- * Support from vesion 4. User need free the returned link config manually.
+ * Support from version 4. User need free the returned link config manually.
  */
 static int link_new_ver(struct soc_tplg *tplg,
 			struct snd_soc_tplg_link_config *src,
@@ -2400,7 +2400,7 @@ static int soc_tplg_dai_elems_load(struct soc_tplg *tplg,
  * @src: old version of manifest as a source
  * @manifest: latest version of manifest created from the source
  *
- * Support from vesion 4. Users need free the returned manifest manually.
+ * Support from version 4. Users need free the returned manifest manually.
  */
 static int manifest_new_ver(struct soc_tplg *tplg,
 			    struct snd_soc_tplg_manifest *src,
-- 
GitLab


From a8437f05384cb472518ec21bf4fffbe8f0a47378 Mon Sep 17 00:00:00 2001
From: Nicolas Cavallari <nicolas.cavallari@green-communications.fr>
Date: Thu, 27 May 2021 18:34:09 +0200
Subject: [PATCH 1896/3804] ASoC: fsl-asoc-card: Set .owner attribute when
 registering card.

Otherwise, when compiled as module, a WARN_ON is triggered:

WARNING: CPU: 0 PID: 5 at sound/core/init.c:208 snd_card_new+0x310/0x39c [snd]
[...]
CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 5.10.39 #1
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
Workqueue: events deferred_probe_work_func
[<c0111988>] (unwind_backtrace) from [<c010c8ac>] (show_stack+0x10/0x14)
[<c010c8ac>] (show_stack) from [<c092784c>] (dump_stack+0xdc/0x104)
[<c092784c>] (dump_stack) from [<c0129710>] (__warn+0xd8/0x114)
[<c0129710>] (__warn) from [<c0922a48>] (warn_slowpath_fmt+0x5c/0xc4)
[<c0922a48>] (warn_slowpath_fmt) from [<bf0496f8>] (snd_card_new+0x310/0x39c [snd])
[<bf0496f8>] (snd_card_new [snd]) from [<bf1d7df8>] (snd_soc_bind_card+0x334/0x9c4 [snd_soc_core])
[<bf1d7df8>] (snd_soc_bind_card [snd_soc_core]) from [<bf1e9cd8>] (devm_snd_soc_register_card+0x30/0x6c [snd_soc_core])
[<bf1e9cd8>] (devm_snd_soc_register_card [snd_soc_core]) from [<bf22d964>] (fsl_asoc_card_probe+0x550/0xcc8 [snd_soc_fsl_asoc_card])
[<bf22d964>] (fsl_asoc_card_probe [snd_soc_fsl_asoc_card]) from [<c060c930>] (platform_drv_probe+0x48/0x98)
[...]

Signed-off-by: Nicolas Cavallari <nicolas.cavallari@green-communications.fr>
Acked-by: Shengjiu Wang <shengjiu.wang@gmail.com>
Link: https://lore.kernel.org/r/20210527163409.22049-1-nicolas.cavallari@green-communications.fr
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/fsl-asoc-card.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c
index c62bfd1c3ac7c..4f55b316cf0fb 100644
--- a/sound/soc/fsl/fsl-asoc-card.c
+++ b/sound/soc/fsl/fsl-asoc-card.c
@@ -744,6 +744,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev)
 	/* Initialize sound card */
 	priv->pdev = pdev;
 	priv->card.dev = &pdev->dev;
+	priv->card.owner = THIS_MODULE;
 	ret = snd_soc_of_parse_card_name(&priv->card, "model");
 	if (ret) {
 		snprintf(priv->name, sizeof(priv->name), "%s-audio",
-- 
GitLab


From b640e8a4bd24e17ce24a064d704aba14831651a8 Mon Sep 17 00:00:00 2001
From: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Date: Fri, 28 May 2021 17:43:30 +0300
Subject: [PATCH 1897/3804] ASoC: SOF: reset enabled_cores state at suspend

The recent changes to use common code to power up/down DSP cores also
removed the reset of the core state at suspend. It turns out this is
still needed. When the firmware state is reset to
SOF_FW_BOOT_NOT_STARTED, also enabled_cores should be reset, and
existing DSP drivers depend on this.

BugLink: https://github.com/thesofproject/linux/issues/2824
Fixes: 42077f08b3 ("ASoC: SOF: update dsp core power status in common APIs")
Signed-off-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://lore.kernel.org/r/20210528144330.2551-1-kai.vehmanen@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/pm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/sof/pm.c b/sound/soc/sof/pm.c
index fd265803f7bc7..c83fb62559616 100644
--- a/sound/soc/sof/pm.c
+++ b/sound/soc/sof/pm.c
@@ -256,6 +256,7 @@ suspend:
 
 	/* reset FW state */
 	sdev->fw_state = SOF_FW_BOOT_NOT_STARTED;
+	sdev->enabled_cores_mask = 0;
 
 	return ret;
 }
-- 
GitLab


From 86ab21cc39e6b99b7065ab9008c90bec5dec535a Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 30 May 2021 20:41:00 +0800
Subject: [PATCH 1898/3804] regulator: rtmv20: Fix
 .set_current_limit/.get_current_limit callbacks

Current code does not set .curr_table and .n_linear_ranges settings,
so it cannot use the regulator_get/set_current_limit_regmap helpers.
If we setup the curr_table, it will has 200 entries.
Implement customized .set_current_limit/.get_current_limit callbacks
instead.

Fixes: b8c054a5eaf0 ("regulator: rtmv20: Adds support for Richtek RTMV20 load switch regulator")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: ChiYuan Huang <cy_huang@richtek.com>
Link: https://lore.kernel.org/r/20210530124101.477727-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rtmv20-regulator.c | 42 ++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/rtmv20-regulator.c b/drivers/regulator/rtmv20-regulator.c
index 852fb2596ffda..5adc552dffd58 100644
--- a/drivers/regulator/rtmv20-regulator.c
+++ b/drivers/regulator/rtmv20-regulator.c
@@ -103,9 +103,47 @@ static int rtmv20_lsw_disable(struct regulator_dev *rdev)
 	return 0;
 }
 
+static int rtmv20_lsw_set_current_limit(struct regulator_dev *rdev, int min_uA,
+					int max_uA)
+{
+	int sel;
+
+	if (min_uA > RTMV20_LSW_MAXUA || max_uA < RTMV20_LSW_MINUA)
+		return -EINVAL;
+
+	if (max_uA > RTMV20_LSW_MAXUA)
+		max_uA = RTMV20_LSW_MAXUA;
+
+	sel = (max_uA - RTMV20_LSW_MINUA) / RTMV20_LSW_STEPUA;
+
+	/* Ensure the selected setting is still in range */
+	if ((sel * RTMV20_LSW_STEPUA + RTMV20_LSW_MINUA) < min_uA)
+		return -EINVAL;
+
+	sel <<= ffs(rdev->desc->csel_mask) - 1;
+
+	return regmap_update_bits(rdev->regmap, rdev->desc->csel_reg,
+				  rdev->desc->csel_mask, sel);
+}
+
+static int rtmv20_lsw_get_current_limit(struct regulator_dev *rdev)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(rdev->regmap, rdev->desc->csel_reg, &val);
+	if (ret)
+		return ret;
+
+	val &= rdev->desc->csel_mask;
+	val >>= ffs(rdev->desc->csel_mask) - 1;
+
+	return val * RTMV20_LSW_STEPUA + RTMV20_LSW_MINUA;
+}
+
 static const struct regulator_ops rtmv20_regulator_ops = {
-	.set_current_limit = regulator_set_current_limit_regmap,
-	.get_current_limit = regulator_get_current_limit_regmap,
+	.set_current_limit = rtmv20_lsw_set_current_limit,
+	.get_current_limit = rtmv20_lsw_get_current_limit,
 	.enable = rtmv20_lsw_enable,
 	.disable = rtmv20_lsw_disable,
 	.is_enabled = regulator_is_enabled_regmap,
-- 
GitLab


From 5f01de6ffae2b00d3795a399d8d630bdae3c8997 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 30 May 2021 20:41:01 +0800
Subject: [PATCH 1899/3804] regulator: rtmv20: Add Richtek to Kconfig text

The other Richtek drivers has Richtek prefix, make it consistent.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210530124101.477727-2-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 77c43134bc9e7..6120c5cf6ccc0 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1011,7 +1011,7 @@ config REGULATOR_RT5033
 	  current source, LDO and Buck.
 
 config REGULATOR_RTMV20
-	tristate "RTMV20 Laser Diode Regulator"
+	tristate "Richtek RTMV20 Laser Diode Regulator"
 	depends on I2C
 	select REGMAP_I2C
 	help
-- 
GitLab


From 89082179ec5028bcd58c87171e08ada035689542 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 30 May 2021 10:21:09 +0800
Subject: [PATCH 1900/3804] regulator: mt6315: Fix function prototype for
 mt6315_map_mode

The .of_map_mode should has below function prototype:
	unsigned int (*of_map_mode)(unsigned int mode);

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210530022109.425054-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6315-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/mt6315-regulator.c b/drivers/regulator/mt6315-regulator.c
index 9edc34981ee0a..6b8be52c3772a 100644
--- a/drivers/regulator/mt6315-regulator.c
+++ b/drivers/regulator/mt6315-regulator.c
@@ -59,7 +59,7 @@ static const struct linear_range mt_volt_range1[] = {
 	REGULATOR_LINEAR_RANGE(0, 0, 0xbf, 6250),
 };
 
-static unsigned int mt6315_map_mode(u32 mode)
+static unsigned int mt6315_map_mode(unsigned int mode)
 {
 	switch (mode) {
 	case MT6315_BUCK_MODE_AUTO:
-- 
GitLab


From 46639a5e684edd0b80ae9dff220f193feb356277 Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Tue, 1 Jun 2021 18:09:15 +0800
Subject: [PATCH 1901/3804] regulator: rtmv20: Fix to make regcache value first
 reading back from HW

- Fix to make regcache value first reading back from HW.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Link: https://lore.kernel.org/r/1622542155-6373-1-git-send-email-u0084500@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rtmv20-regulator.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/regulator/rtmv20-regulator.c b/drivers/regulator/rtmv20-regulator.c
index 5adc552dffd58..4bca64de0f672 100644
--- a/drivers/regulator/rtmv20-regulator.c
+++ b/drivers/regulator/rtmv20-regulator.c
@@ -27,6 +27,7 @@
 #define RTMV20_REG_LDIRQ	0x30
 #define RTMV20_REG_LDSTAT	0x40
 #define RTMV20_REG_LDMASK	0x50
+#define RTMV20_MAX_REGS		(RTMV20_REG_LDMASK + 1)
 
 #define RTMV20_VID_MASK		GENMASK(7, 4)
 #define RICHTEK_VID		0x80
@@ -313,6 +314,7 @@ static const struct regmap_config rtmv20_regmap_config = {
 	.val_bits = 8,
 	.cache_type = REGCACHE_RBTREE,
 	.max_register = RTMV20_REG_LDMASK,
+	.num_reg_defaults_raw = RTMV20_MAX_REGS,
 
 	.writeable_reg = rtmv20_is_accessible_reg,
 	.readable_reg = rtmv20_is_accessible_reg,
-- 
GitLab


From 1963fa67d78674a110bc9b2a8b1e226967692f05 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 29 May 2021 07:01:47 +0800
Subject: [PATCH 1902/3804] regulator: atc260x: Fix n_voltages and min_sel for
 pickable linear ranges

The .n_voltages was missed for pickable linear ranges, fix it.
The min_sel for each pickable range should be starting from 0.
Also fix atc260x_ldo_voltage_range_sel setting (bit 5 - LDO<N>_VOL_SEL
in datasheet).

Fixes: 3b15ccac161a ("regulator: Add regulator driver for ATC260x PMICs")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Cristian Ciocaltea <cristian.ciocaltea@gmail.com>
Link: https://lore.kernel.org/r/20210528230147.363974-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/atc260x-regulator.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/regulator/atc260x-regulator.c b/drivers/regulator/atc260x-regulator.c
index d8b429955d33f..05147d2c38428 100644
--- a/drivers/regulator/atc260x-regulator.c
+++ b/drivers/regulator/atc260x-regulator.c
@@ -28,16 +28,16 @@ static const struct linear_range atc2609a_dcdc_voltage_ranges[] = {
 
 static const struct linear_range atc2609a_ldo_voltage_ranges0[] = {
 	REGULATOR_LINEAR_RANGE(700000, 0, 15, 100000),
-	REGULATOR_LINEAR_RANGE(2100000, 16, 28, 100000),
+	REGULATOR_LINEAR_RANGE(2100000, 0, 12, 100000),
 };
 
 static const struct linear_range atc2609a_ldo_voltage_ranges1[] = {
 	REGULATOR_LINEAR_RANGE(850000, 0, 15, 100000),
-	REGULATOR_LINEAR_RANGE(2100000, 16, 27, 100000),
+	REGULATOR_LINEAR_RANGE(2100000, 0, 11, 100000),
 };
 
 static const unsigned int atc260x_ldo_voltage_range_sel[] = {
-	0x0, 0x1,
+	0x0, 0x20,
 };
 
 static int atc260x_dcdc_set_voltage_time_sel(struct regulator_dev *rdev,
@@ -411,7 +411,7 @@ enum atc2609a_reg_ids {
 	.owner = THIS_MODULE, \
 }
 
-#define atc2609a_reg_desc_ldo_range_pick(num, n_range) { \
+#define atc2609a_reg_desc_ldo_range_pick(num, n_range, n_volt) { \
 	.name = "LDO"#num, \
 	.supply_name = "ldo"#num, \
 	.of_match = of_match_ptr("ldo"#num), \
@@ -421,6 +421,7 @@ enum atc2609a_reg_ids {
 	.type = REGULATOR_VOLTAGE, \
 	.linear_ranges = atc2609a_ldo_voltage_ranges##n_range, \
 	.n_linear_ranges = ARRAY_SIZE(atc2609a_ldo_voltage_ranges##n_range), \
+	.n_voltages = n_volt, \
 	.vsel_reg = ATC2609A_PMU_LDO##num##_CTL0, \
 	.vsel_mask = GENMASK(4, 1), \
 	.vsel_range_reg = ATC2609A_PMU_LDO##num##_CTL0, \
@@ -458,12 +459,12 @@ static const struct regulator_desc atc2609a_reg[] = {
 	atc2609a_reg_desc_ldo_bypass(0),
 	atc2609a_reg_desc_ldo_bypass(1),
 	atc2609a_reg_desc_ldo_bypass(2),
-	atc2609a_reg_desc_ldo_range_pick(3, 0),
-	atc2609a_reg_desc_ldo_range_pick(4, 0),
+	atc2609a_reg_desc_ldo_range_pick(3, 0, 29),
+	atc2609a_reg_desc_ldo_range_pick(4, 0, 29),
 	atc2609a_reg_desc_ldo(5),
-	atc2609a_reg_desc_ldo_range_pick(6, 1),
-	atc2609a_reg_desc_ldo_range_pick(7, 0),
-	atc2609a_reg_desc_ldo_range_pick(8, 0),
+	atc2609a_reg_desc_ldo_range_pick(6, 1, 28),
+	atc2609a_reg_desc_ldo_range_pick(7, 0, 29),
+	atc2609a_reg_desc_ldo_range_pick(8, 0, 29),
 	atc2609a_reg_desc_ldo_fixed(9),
 };
 
-- 
GitLab


From 2ec6f20b33eb4f62ab90bdcd620436c883ec3af6 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 27 May 2021 23:10:56 +0200
Subject: [PATCH 1903/3804] spi: Cleanup on failure of initial setup

Commit c7299fea6769 ("spi: Fix spi device unregister flow") changed the
SPI core's behavior if the ->setup() hook returns an error upon adding
an spi_device:  Before, the ->cleanup() hook was invoked to free any
allocations that were made by ->setup().  With the commit, that's no
longer the case, so the ->setup() hook is expected to free the
allocations itself.

I've identified 5 drivers which depend on the old behavior and am fixing
them up hereinafter: spi-bitbang.c spi-fsl-spi.c spi-omap-uwire.c
spi-omap2-mcspi.c spi-pxa2xx.c

Importantly, ->setup() is not only invoked on spi_device *addition*:
It may subsequently be called to *change* SPI parameters.  If changing
these SPI parameters fails, freeing memory allocations would be wrong.
That should only be done if the spi_device is finally destroyed.
I am therefore using a bool "initial_setup" in 4 of the affected drivers
to differentiate between the invocation on *adding* the spi_device and
any subsequent invocations: spi-bitbang.c spi-fsl-spi.c spi-omap-uwire.c
spi-omap2-mcspi.c

In spi-pxa2xx.c, it seems the ->setup() hook can only fail on spi_device
addition, not any subsequent calls.  It therefore doesn't need the bool.

It's worth noting that 5 other drivers already perform a cleanup if the
->setup() hook fails.  Before c7299fea6769, they caused a double-free
if ->setup() failed on spi_device addition.  Since the commit, they're
fine.  These drivers are: spi-mpc512x-psc.c spi-pl022.c spi-s3c64xx.c
spi-st-ssc4.c spi-tegra114.c

(spi-pxa2xx.c also already performs a cleanup, but only in one of
several error paths.)

Fixes: c7299fea6769 ("spi: Fix spi device unregister flow")
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: Saravana Kannan <saravanak@google.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> # pxa2xx
Link: https://lore.kernel.org/r/f76a0599469f265b69c371538794101fa37b5536.1622149321.git.lukas@wunner.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bitbang.c     | 18 ++++++++++++++----
 drivers/spi/spi-fsl-spi.c     |  4 ++++
 drivers/spi/spi-omap-uwire.c  |  9 ++++++++-
 drivers/spi/spi-omap2-mcspi.c | 33 ++++++++++++++++++++-------------
 drivers/spi/spi-pxa2xx.c      |  9 ++++++++-
 5 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 6a6af85aebfd8..27d0087f86884 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -184,6 +184,8 @@ int spi_bitbang_setup(struct spi_device *spi)
 {
 	struct spi_bitbang_cs	*cs = spi->controller_state;
 	struct spi_bitbang	*bitbang;
+	bool			initial_setup = false;
+	int			retval;
 
 	bitbang = spi_master_get_devdata(spi->master);
 
@@ -192,22 +194,30 @@ int spi_bitbang_setup(struct spi_device *spi)
 		if (!cs)
 			return -ENOMEM;
 		spi->controller_state = cs;
+		initial_setup = true;
 	}
 
 	/* per-word shift register access, in hardware or bitbanging */
 	cs->txrx_word = bitbang->txrx_word[spi->mode & (SPI_CPOL|SPI_CPHA)];
-	if (!cs->txrx_word)
-		return -EINVAL;
+	if (!cs->txrx_word) {
+		retval = -EINVAL;
+		goto err_free;
+	}
 
 	if (bitbang->setup_transfer) {
-		int retval = bitbang->setup_transfer(spi, NULL);
+		retval = bitbang->setup_transfer(spi, NULL);
 		if (retval < 0)
-			return retval;
+			goto err_free;
 	}
 
 	dev_dbg(&spi->dev, "%s, %u nsec/bit\n", __func__, 2 * cs->nsecs);
 
 	return 0;
+
+err_free:
+	if (initial_setup)
+		kfree(cs);
+	return retval;
 }
 EXPORT_SYMBOL_GPL(spi_bitbang_setup);
 
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index d0e5aa18b7bad..bdf94cc7be1af 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -440,6 +440,7 @@ static int fsl_spi_setup(struct spi_device *spi)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi;
 	struct fsl_spi_reg __iomem *reg_base;
+	bool initial_setup = false;
 	int retval;
 	u32 hw_mode;
 	struct spi_mpc8xxx_cs *cs = spi_get_ctldata(spi);
@@ -452,6 +453,7 @@ static int fsl_spi_setup(struct spi_device *spi)
 		if (!cs)
 			return -ENOMEM;
 		spi_set_ctldata(spi, cs);
+		initial_setup = true;
 	}
 	mpc8xxx_spi = spi_master_get_devdata(spi->master);
 
@@ -475,6 +477,8 @@ static int fsl_spi_setup(struct spi_device *spi)
 	retval = fsl_spi_setup_transfer(spi, NULL);
 	if (retval < 0) {
 		cs->hw_mode = hw_mode; /* Restore settings */
+		if (initial_setup)
+			kfree(cs);
 		return retval;
 	}
 
diff --git a/drivers/spi/spi-omap-uwire.c b/drivers/spi/spi-omap-uwire.c
index 71402f71ddd85..df28c6664aba6 100644
--- a/drivers/spi/spi-omap-uwire.c
+++ b/drivers/spi/spi-omap-uwire.c
@@ -424,15 +424,22 @@ done:
 static int uwire_setup(struct spi_device *spi)
 {
 	struct uwire_state *ust = spi->controller_state;
+	bool initial_setup = false;
+	int status;
 
 	if (ust == NULL) {
 		ust = kzalloc(sizeof(*ust), GFP_KERNEL);
 		if (ust == NULL)
 			return -ENOMEM;
 		spi->controller_state = ust;
+		initial_setup = true;
 	}
 
-	return uwire_setup_transfer(spi, NULL);
+	status = uwire_setup_transfer(spi, NULL);
+	if (status && initial_setup)
+		kfree(ust);
+
+	return status;
 }
 
 static void uwire_cleanup(struct spi_device *spi)
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 999c227364164..ede7f05e5ced7 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -1032,8 +1032,22 @@ static void omap2_mcspi_release_dma(struct spi_master *master)
 	}
 }
 
+static void omap2_mcspi_cleanup(struct spi_device *spi)
+{
+	struct omap2_mcspi_cs	*cs;
+
+	if (spi->controller_state) {
+		/* Unlink controller state from context save list */
+		cs = spi->controller_state;
+		list_del(&cs->node);
+
+		kfree(cs);
+	}
+}
+
 static int omap2_mcspi_setup(struct spi_device *spi)
 {
+	bool			initial_setup = false;
 	int			ret;
 	struct omap2_mcspi	*mcspi = spi_master_get_devdata(spi->master);
 	struct omap2_mcspi_regs	*ctx = &mcspi->ctx;
@@ -1051,35 +1065,28 @@ static int omap2_mcspi_setup(struct spi_device *spi)
 		spi->controller_state = cs;
 		/* Link this to context save list */
 		list_add_tail(&cs->node, &ctx->cs);
+		initial_setup = true;
 	}
 
 	ret = pm_runtime_get_sync(mcspi->dev);
 	if (ret < 0) {
 		pm_runtime_put_noidle(mcspi->dev);
+		if (initial_setup)
+			omap2_mcspi_cleanup(spi);
 
 		return ret;
 	}
 
 	ret = omap2_mcspi_setup_transfer(spi, NULL);
+	if (ret && initial_setup)
+		omap2_mcspi_cleanup(spi);
+
 	pm_runtime_mark_last_busy(mcspi->dev);
 	pm_runtime_put_autosuspend(mcspi->dev);
 
 	return ret;
 }
 
-static void omap2_mcspi_cleanup(struct spi_device *spi)
-{
-	struct omap2_mcspi_cs	*cs;
-
-	if (spi->controller_state) {
-		/* Unlink controller state from context save list */
-		cs = spi->controller_state;
-		list_del(&cs->node);
-
-		kfree(cs);
-	}
-}
-
 static irqreturn_t omap2_mcspi_irq_handler(int irq, void *data)
 {
 	struct omap2_mcspi *mcspi = data;
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 5e59ba075bc7a..8ee0cc0717774 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1254,6 +1254,8 @@ static int setup_cs(struct spi_device *spi, struct chip_data *chip,
 		chip->gpio_cs_inverted = spi->mode & SPI_CS_HIGH;
 
 		err = gpiod_direction_output(gpiod, !chip->gpio_cs_inverted);
+		if (err)
+			gpiod_put(chip->gpiod_cs);
 	}
 
 	return err;
@@ -1267,6 +1269,7 @@ static int setup(struct spi_device *spi)
 	struct driver_data *drv_data =
 		spi_controller_get_devdata(spi->controller);
 	uint tx_thres, tx_hi_thres, rx_thres;
+	int err;
 
 	switch (drv_data->ssp_type) {
 	case QUARK_X1000_SSP:
@@ -1413,7 +1416,11 @@ static int setup(struct spi_device *spi)
 	if (drv_data->ssp_type == CE4100_SSP)
 		return 0;
 
-	return setup_cs(spi, chip, chip_info);
+	err = setup_cs(spi, chip, chip_info);
+	if (err)
+		kfree(chip);
+
+	return err;
 }
 
 static void cleanup(struct spi_device *spi)
-- 
GitLab


From 4c4fce171c4ca08cd98be7db350e6950630b046a Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 26 May 2021 20:24:08 +0800
Subject: [PATCH 1904/3804] regulator: pca9450: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210526122408.78156-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/pca9450-regulator.c | 51 +++++++++++++--------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c
index 2f7ee212cb8c9..64e5f5f0cc841 100644
--- a/drivers/regulator/pca9450-regulator.c
+++ b/drivers/regulator/pca9450-regulator.c
@@ -65,32 +65,9 @@ static const struct regmap_config pca9450_regmap_config = {
  * 10: 25mV/4usec
  * 11: 25mV/8usec
  */
-static int pca9450_dvs_set_ramp_delay(struct regulator_dev *rdev,
-				      int ramp_delay)
-{
-	int id = rdev_get_id(rdev);
-	unsigned int ramp_value;
-
-	switch (ramp_delay) {
-	case 1 ... 3125:
-		ramp_value = BUCK1_RAMP_3P125MV;
-		break;
-	case 3126 ... 6250:
-		ramp_value = BUCK1_RAMP_6P25MV;
-		break;
-	case 6251 ... 12500:
-		ramp_value = BUCK1_RAMP_12P5MV;
-		break;
-	case 12501 ... 25000:
-		ramp_value = BUCK1_RAMP_25MV;
-		break;
-	default:
-		ramp_value = BUCK1_RAMP_25MV;
-	}
-
-	return regmap_update_bits(rdev->regmap, PCA9450_REG_BUCK1CTRL + id * 3,
-				  BUCK1_RAMP_MASK, ramp_value << 6);
-}
+static const unsigned int pca9450_dvs_buck_ramp_table[] = {
+	25000, 12500, 6250, 3125
+};
 
 static const struct regulator_ops pca9450_dvs_buck_regulator_ops = {
 	.enable = regulator_enable_regmap,
@@ -100,7 +77,7 @@ static const struct regulator_ops pca9450_dvs_buck_regulator_ops = {
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
-	.set_ramp_delay = pca9450_dvs_set_ramp_delay,
+	.set_ramp_delay	= regulator_set_ramp_delay_regmap,
 };
 
 static const struct regulator_ops pca9450_buck_regulator_ops = {
@@ -251,6 +228,10 @@ static const struct pca9450_regulator_desc pca9450a_regulators[] = {
 			.vsel_mask = BUCK1OUT_DVS0_MASK,
 			.enable_reg = PCA9450_REG_BUCK1CTRL,
 			.enable_mask = BUCK1_ENMODE_MASK,
+			.ramp_reg = PCA9450_REG_BUCK1CTRL,
+			.ramp_mask = BUCK1_RAMP_MASK,
+			.ramp_delay_table = pca9450_dvs_buck_ramp_table,
+			.n_ramp_values = ARRAY_SIZE(pca9450_dvs_buck_ramp_table),
 			.owner = THIS_MODULE,
 			.of_parse_cb = pca9450_set_dvs_levels,
 		},
@@ -276,6 +257,10 @@ static const struct pca9450_regulator_desc pca9450a_regulators[] = {
 			.vsel_mask = BUCK2OUT_DVS0_MASK,
 			.enable_reg = PCA9450_REG_BUCK2CTRL,
 			.enable_mask = BUCK1_ENMODE_MASK,
+			.ramp_reg = PCA9450_REG_BUCK2CTRL,
+			.ramp_mask = BUCK2_RAMP_MASK,
+			.ramp_delay_table = pca9450_dvs_buck_ramp_table,
+			.n_ramp_values = ARRAY_SIZE(pca9450_dvs_buck_ramp_table),
 			.owner = THIS_MODULE,
 			.of_parse_cb = pca9450_set_dvs_levels,
 		},
@@ -301,6 +286,10 @@ static const struct pca9450_regulator_desc pca9450a_regulators[] = {
 			.vsel_mask = BUCK3OUT_DVS0_MASK,
 			.enable_reg = PCA9450_REG_BUCK3CTRL,
 			.enable_mask = BUCK3_ENMODE_MASK,
+			.ramp_reg = PCA9450_REG_BUCK3CTRL,
+			.ramp_mask = BUCK3_RAMP_MASK,
+			.ramp_delay_table = pca9450_dvs_buck_ramp_table,
+			.n_ramp_values = ARRAY_SIZE(pca9450_dvs_buck_ramp_table),
 			.owner = THIS_MODULE,
 			.of_parse_cb = pca9450_set_dvs_levels,
 		},
@@ -477,6 +466,10 @@ static const struct pca9450_regulator_desc pca9450bc_regulators[] = {
 			.vsel_mask = BUCK1OUT_DVS0_MASK,
 			.enable_reg = PCA9450_REG_BUCK1CTRL,
 			.enable_mask = BUCK1_ENMODE_MASK,
+			.ramp_reg = PCA9450_REG_BUCK1CTRL,
+			.ramp_mask = BUCK1_RAMP_MASK,
+			.ramp_delay_table = pca9450_dvs_buck_ramp_table,
+			.n_ramp_values = ARRAY_SIZE(pca9450_dvs_buck_ramp_table),
 			.owner = THIS_MODULE,
 			.of_parse_cb = pca9450_set_dvs_levels,
 		},
@@ -502,6 +495,10 @@ static const struct pca9450_regulator_desc pca9450bc_regulators[] = {
 			.vsel_mask = BUCK2OUT_DVS0_MASK,
 			.enable_reg = PCA9450_REG_BUCK2CTRL,
 			.enable_mask = BUCK1_ENMODE_MASK,
+			.ramp_reg = PCA9450_REG_BUCK2CTRL,
+			.ramp_mask = BUCK2_RAMP_MASK,
+			.ramp_delay_table = pca9450_dvs_buck_ramp_table,
+			.n_ramp_values = ARRAY_SIZE(pca9450_dvs_buck_ramp_table),
 			.owner = THIS_MODULE,
 			.of_parse_cb = pca9450_set_dvs_levels,
 		},
-- 
GitLab


From 7f8c8394425fd5e1449bf0a81ab6ec718cd4346b Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 30 May 2021 10:05:43 +0800
Subject: [PATCH 1905/3804] regulator: mt6315: Don't ignore
 devm_regulator_register failure

Also use dev_err instead of dev_notice for messages in error conditions.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210530020543.418634-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6315-regulator.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/regulator/mt6315-regulator.c b/drivers/regulator/mt6315-regulator.c
index 9edc34981ee0a..8c5d72869c2be 100644
--- a/drivers/regulator/mt6315-regulator.c
+++ b/drivers/regulator/mt6315-regulator.c
@@ -84,7 +84,7 @@ static unsigned int mt6315_regulator_get_mode(struct regulator_dev *rdev)
 	modeset_mask = init->modeset_mask[rdev_get_id(rdev)];
 	ret = regmap_read(rdev->regmap, MT6315_BUCK_TOP_4PHASE_ANA_CON42, &regval);
 	if (ret != 0) {
-		dev_notice(&rdev->dev, "Failed to get mode: %d\n", ret);
+		dev_err(&rdev->dev, "Failed to get mode: %d\n", ret);
 		return ret;
 	}
 
@@ -93,7 +93,7 @@ static unsigned int mt6315_regulator_get_mode(struct regulator_dev *rdev)
 
 	ret = regmap_read(rdev->regmap, MT6315_BUCK_TOP_CON1, &regval);
 	if (ret != 0) {
-		dev_notice(&rdev->dev, "Failed to get lp mode: %d\n", ret);
+		dev_err(&rdev->dev, "Failed to get lp mode: %d\n", ret);
 		return ret;
 	}
 
@@ -147,12 +147,12 @@ static int mt6315_regulator_set_mode(struct regulator_dev *rdev,
 		break;
 	default:
 		ret = -EINVAL;
-		dev_notice(&rdev->dev, "Unsupported mode: %d\n", mode);
+		dev_err(&rdev->dev, "Unsupported mode: %d\n", mode);
 		break;
 	}
 
 	if (ret != 0) {
-		dev_notice(&rdev->dev, "Failed to set mode: %d\n", ret);
+		dev_err(&rdev->dev, "Failed to set mode: %d\n", ret);
 		return ret;
 	}
 
@@ -168,7 +168,7 @@ static int mt6315_get_status(struct regulator_dev *rdev)
 	info = container_of(rdev->desc, struct mt6315_regulator_info, desc);
 	ret = regmap_read(rdev->regmap, info->status_reg, &regval);
 	if (ret < 0) {
-		dev_notice(&rdev->dev, "Failed to get enable reg: %d\n", ret);
+		dev_err(&rdev->dev, "Failed to get enable reg: %d\n", ret);
 		return ret;
 	}
 
@@ -260,8 +260,9 @@ static int mt6315_regulator_probe(struct spmi_device *pdev)
 		config.driver_data = init_data;
 		rdev = devm_regulator_register(dev, &mt6315_regulators[i].desc, &config);
 		if (IS_ERR(rdev)) {
-			dev_notice(dev, "Failed to register %s\n", mt6315_regulators[i].desc.name);
-			continue;
+			dev_err(dev, "Failed to register %s\n",
+				mt6315_regulators[i].desc.name);
+			return PTR_ERR(rdev);
 		}
 	}
 
@@ -279,7 +280,7 @@ static void mt6315_regulator_shutdown(struct spmi_device *pdev)
 	ret |= regmap_write(chip->regmap, MT6315_TOP_TMA_KEY, 0);
 	ret |= regmap_write(chip->regmap, MT6315_TOP_TMA_KEY_H, 0);
 	if (ret < 0)
-		dev_notice(&pdev->dev, "[%#x] Failed to enable power off sequence. %d\n",
+		dev_err(&pdev->dev, "[%#x] Failed to enable power off sequence. %d\n",
 			   pdev->usid, ret);
 }
 
-- 
GitLab


From fbd168cd76e4ea80fc22d361b08267664db4d905 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 27 May 2021 22:42:48 +0800
Subject: [PATCH 1906/3804] regulator: lp8755: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210527144248.247992-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/lp8755.c | 55 ++++++--------------------------------
 1 file changed, 8 insertions(+), 47 deletions(-)

diff --git a/drivers/regulator/lp8755.c b/drivers/regulator/lp8755.c
index 13c535711265c..321bec6e3f8df 100644
--- a/drivers/regulator/lp8755.c
+++ b/drivers/regulator/lp8755.c
@@ -136,52 +136,9 @@ err_i2c:
 	return 0;
 }
 
-static int lp8755_buck_set_ramp(struct regulator_dev *rdev, int ramp)
-{
-	int ret;
-	unsigned int regval = 0x00;
-	enum lp8755_bucks id = rdev_get_id(rdev);
-
-	/* uV/us */
-	switch (ramp) {
-	case 0 ... 230:
-		regval = 0x07;
-		break;
-	case 231 ... 470:
-		regval = 0x06;
-		break;
-	case 471 ... 940:
-		regval = 0x05;
-		break;
-	case 941 ... 1900:
-		regval = 0x04;
-		break;
-	case 1901 ... 3800:
-		regval = 0x03;
-		break;
-	case 3801 ... 7500:
-		regval = 0x02;
-		break;
-	case 7501 ... 15000:
-		regval = 0x01;
-		break;
-	case 15001 ... 30000:
-		regval = 0x00;
-		break;
-	default:
-		dev_err(&rdev->dev,
-			"Not supported ramp value %d %s\n", ramp, __func__);
-		return -EINVAL;
-	}
-
-	ret = regmap_update_bits(rdev->regmap, 0x07 + id, 0x07, regval);
-	if (ret < 0)
-		goto err_i2c;
-	return ret;
-err_i2c:
-	dev_err(&rdev->dev, "i2c access error %s\n", __func__);
-	return ret;
-}
+static const unsigned int lp8755_buck_ramp_table[] = {
+	30000, 15000, 7500, 3800, 1900, 940, 470, 230
+};
 
 static const struct regulator_ops lp8755_buck_ops = {
 	.map_voltage = regulator_map_voltage_linear,
@@ -194,7 +151,7 @@ static const struct regulator_ops lp8755_buck_ops = {
 	.enable_time = lp8755_buck_enable_time,
 	.set_mode = lp8755_buck_set_mode,
 	.get_mode = lp8755_buck_get_mode,
-	.set_ramp_delay = lp8755_buck_set_ramp,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
 };
 
 #define lp8755_rail(_id) "lp8755_buck"#_id
@@ -269,6 +226,10 @@ out_i2c_error:
 	.enable_mask = LP8755_BUCK_EN_M,\
 	.vsel_reg = LP8755_REG_BUCK##_id,\
 	.vsel_mask = LP8755_BUCK_VOUT_M,\
+	.ramp_reg = (LP8755_BUCK##_id) + 0x7,\
+	.ramp_mask = 0x7,\
+	.ramp_delay_table = lp8755_buck_ramp_table,\
+	.n_ramp_values = ARRAY_SIZE(lp8755_buck_ramp_table),\
 }
 
 static const struct regulator_desc lp8755_regulators[] = {
-- 
GitLab


From 71de5d6e63c992abe037c43bc581cff432a5a1c4 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 27 May 2021 14:59:29 +0300
Subject: [PATCH 1907/3804] regulator: bd70528: Drop BD70528 support

The only known BD70528 use-cases are such that the PMIC is controlled
from separate MCU which is not running Linux. I am not aware of
any Linux driver users. Furthermore, it seems there is no demand for
this IC. Let's ease the maintenance burden and drop the driver. We can
always add it back if there is sudden need for it.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/d7271362129edceebc512b49efed9ee7c3efcb6a.1622116622.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig             |  11 -
 drivers/regulator/Makefile            |   1 -
 drivers/regulator/bd70528-regulator.c | 278 --------------------------
 3 files changed, 290 deletions(-)
 delete mode 100644 drivers/regulator/bd70528-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 9aeb32c320aab..bc02ea3ea2eff 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -193,17 +193,6 @@ config REGULATOR_BCM590XX
 	  BCM590xx PMUs. This will enable support for the software
 	  controllable LDO/Switching regulators.
 
-config REGULATOR_BD70528
-	tristate "ROHM BD70528 Power Regulator"
-	depends on MFD_ROHM_BD70528
-	help
-	  This driver supports voltage regulators on ROHM BD70528 PMIC.
-	  This will enable support for the software controllable buck
-	  and LDO regulators.
-
-	  This driver can also be built as a module. If so, the module
-	  will be called bd70528-regulator.
-
 config REGULATOR_BD71815
 	tristate "ROHM BD71815 Power Regulator"
 	depends on MFD_ROHM_BD71828
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 580b015296ea2..6a5d55e209d30 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -29,7 +29,6 @@ obj-$(CONFIG_REGULATOR_AS3722) += as3722-regulator.o
 obj-$(CONFIG_REGULATOR_ATC260X) += atc260x-regulator.o
 obj-$(CONFIG_REGULATOR_AXP20X) += axp20x-regulator.o
 obj-$(CONFIG_REGULATOR_BCM590XX) += bcm590xx-regulator.o
-obj-$(CONFIG_REGULATOR_BD70528) += bd70528-regulator.o
 obj-$(CONFIG_REGULATOR_BD71815)	+= bd71815-regulator.o
 obj-$(CONFIG_REGULATOR_BD71828) += bd71828-regulator.o
 obj-$(CONFIG_REGULATOR_BD718XX) += bd718x7-regulator.o
diff --git a/drivers/regulator/bd70528-regulator.c b/drivers/regulator/bd70528-regulator.c
deleted file mode 100644
index e6fec70fabfa3..0000000000000
--- a/drivers/regulator/bd70528-regulator.c
+++ /dev/null
@@ -1,278 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2018 ROHM Semiconductors
-// bd70528-regulator.c ROHM BD70528MWV regulator driver
-
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/mfd/rohm-bd70528.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/regmap.h>
-#include <linux/regulator/driver.h>
-#include <linux/regulator/machine.h>
-#include <linux/regulator/of_regulator.h>
-#include <linux/slab.h>
-
-static const struct linear_range bd70528_buck1_volts[] = {
-	REGULATOR_LINEAR_RANGE(1200000, 0x00, 0x1, 600000),
-	REGULATOR_LINEAR_RANGE(2750000, 0x2, 0xf, 50000),
-};
-static const struct linear_range bd70528_buck2_volts[] = {
-	REGULATOR_LINEAR_RANGE(1200000, 0x00, 0x1, 300000),
-	REGULATOR_LINEAR_RANGE(1550000, 0x2, 0xd, 50000),
-	REGULATOR_LINEAR_RANGE(3000000, 0xe, 0xf, 300000),
-};
-static const struct linear_range bd70528_buck3_volts[] = {
-	REGULATOR_LINEAR_RANGE(800000, 0x00, 0xd, 50000),
-	REGULATOR_LINEAR_RANGE(1800000, 0xe, 0xf, 0),
-};
-
-/* All LDOs have same voltage ranges */
-static const struct linear_range bd70528_ldo_volts[] = {
-	REGULATOR_LINEAR_RANGE(1650000, 0x0, 0x07, 50000),
-	REGULATOR_LINEAR_RANGE(2100000, 0x8, 0x0f, 100000),
-	REGULATOR_LINEAR_RANGE(2850000, 0x10, 0x19, 50000),
-	REGULATOR_LINEAR_RANGE(3300000, 0x19, 0x1f, 0),
-};
-
-/* Also both LEDs support same voltages */
-static const unsigned int led_volts[] = {
-	20000, 30000
-};
-
-static const unsigned int bd70528_buck_ramp_table[] = {
-	250, 125
-};
-
-static int bd70528_led_set_voltage_sel(struct regulator_dev *rdev,
-				       unsigned int sel)
-{
-	int ret;
-
-	ret = regulator_is_enabled_regmap(rdev);
-	if (ret < 0)
-		return ret;
-
-	if (ret == 0)
-		return regulator_set_voltage_sel_regmap(rdev, sel);
-
-	dev_err(&rdev->dev,
-		"LED voltage change not allowed when led is enabled\n");
-
-	return -EBUSY;
-}
-
-static const struct regulator_ops bd70528_buck_ops = {
-	.enable = regulator_enable_regmap,
-	.disable = regulator_disable_regmap,
-	.is_enabled = regulator_is_enabled_regmap,
-	.list_voltage = regulator_list_voltage_linear_range,
-	.set_voltage_sel = regulator_set_voltage_sel_regmap,
-	.get_voltage_sel = regulator_get_voltage_sel_regmap,
-	.set_voltage_time_sel = regulator_set_voltage_time_sel,
-	.set_ramp_delay = regulator_set_ramp_delay_regmap,
-};
-
-static const struct regulator_ops bd70528_ldo_ops = {
-	.enable = regulator_enable_regmap,
-	.disable = regulator_disable_regmap,
-	.is_enabled = regulator_is_enabled_regmap,
-	.list_voltage = regulator_list_voltage_linear_range,
-	.set_voltage_sel = regulator_set_voltage_sel_regmap,
-	.get_voltage_sel = regulator_get_voltage_sel_regmap,
-	.set_voltage_time_sel = regulator_set_voltage_time_sel,
-};
-
-static const struct regulator_ops bd70528_led_ops = {
-	.enable = regulator_enable_regmap,
-	.disable = regulator_disable_regmap,
-	.is_enabled = regulator_is_enabled_regmap,
-	.list_voltage = regulator_list_voltage_table,
-	.set_voltage_sel = bd70528_led_set_voltage_sel,
-	.get_voltage_sel = regulator_get_voltage_sel_regmap,
-};
-
-static const struct regulator_desc bd70528_desc[] = {
-	{
-		.name = "buck1",
-		.of_match = of_match_ptr("BUCK1"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD70528_BUCK1,
-		.ops = &bd70528_buck_ops,
-		.type = REGULATOR_VOLTAGE,
-		.linear_ranges = bd70528_buck1_volts,
-		.n_linear_ranges = ARRAY_SIZE(bd70528_buck1_volts),
-		.n_voltages = BD70528_BUCK_VOLTS,
-		.enable_reg = BD70528_REG_BUCK1_EN,
-		.enable_mask = BD70528_MASK_RUN_EN,
-		.vsel_reg = BD70528_REG_BUCK1_VOLT,
-		.vsel_mask = BD70528_MASK_BUCK_VOLT,
-		.ramp_reg = BD70528_REG_BUCK1_VOLT,
-		.ramp_mask = BD70528_MASK_BUCK_RAMP,
-		.ramp_delay_table = bd70528_buck_ramp_table,
-		.n_ramp_values = ARRAY_SIZE(bd70528_buck_ramp_table),
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "buck2",
-		.of_match = of_match_ptr("BUCK2"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD70528_BUCK2,
-		.ops = &bd70528_buck_ops,
-		.type = REGULATOR_VOLTAGE,
-		.linear_ranges = bd70528_buck2_volts,
-		.n_linear_ranges = ARRAY_SIZE(bd70528_buck2_volts),
-		.n_voltages = BD70528_BUCK_VOLTS,
-		.enable_reg = BD70528_REG_BUCK2_EN,
-		.enable_mask = BD70528_MASK_RUN_EN,
-		.vsel_reg = BD70528_REG_BUCK2_VOLT,
-		.vsel_mask = BD70528_MASK_BUCK_VOLT,
-		.ramp_reg = BD70528_REG_BUCK2_VOLT,
-		.ramp_mask = BD70528_MASK_BUCK_RAMP,
-		.ramp_delay_table = bd70528_buck_ramp_table,
-		.n_ramp_values = ARRAY_SIZE(bd70528_buck_ramp_table),
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "buck3",
-		.of_match = of_match_ptr("BUCK3"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD70528_BUCK3,
-		.ops = &bd70528_buck_ops,
-		.type = REGULATOR_VOLTAGE,
-		.linear_ranges = bd70528_buck3_volts,
-		.n_linear_ranges = ARRAY_SIZE(bd70528_buck3_volts),
-		.n_voltages = BD70528_BUCK_VOLTS,
-		.enable_reg = BD70528_REG_BUCK3_EN,
-		.enable_mask = BD70528_MASK_RUN_EN,
-		.vsel_reg = BD70528_REG_BUCK3_VOLT,
-		.vsel_mask = BD70528_MASK_BUCK_VOLT,
-		.ramp_reg = BD70528_REG_BUCK3_VOLT,
-		.ramp_mask = BD70528_MASK_BUCK_RAMP,
-		.ramp_delay_table = bd70528_buck_ramp_table,
-		.n_ramp_values = ARRAY_SIZE(bd70528_buck_ramp_table),
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "ldo1",
-		.of_match = of_match_ptr("LDO1"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD70528_LDO1,
-		.ops = &bd70528_ldo_ops,
-		.type = REGULATOR_VOLTAGE,
-		.linear_ranges = bd70528_ldo_volts,
-		.n_linear_ranges = ARRAY_SIZE(bd70528_ldo_volts),
-		.n_voltages = BD70528_LDO_VOLTS,
-		.enable_reg = BD70528_REG_LDO1_EN,
-		.enable_mask = BD70528_MASK_RUN_EN,
-		.vsel_reg = BD70528_REG_LDO1_VOLT,
-		.vsel_mask = BD70528_MASK_LDO_VOLT,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "ldo2",
-		.of_match = of_match_ptr("LDO2"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD70528_LDO2,
-		.ops = &bd70528_ldo_ops,
-		.type = REGULATOR_VOLTAGE,
-		.linear_ranges = bd70528_ldo_volts,
-		.n_linear_ranges = ARRAY_SIZE(bd70528_ldo_volts),
-		.n_voltages = BD70528_LDO_VOLTS,
-		.enable_reg = BD70528_REG_LDO2_EN,
-		.enable_mask = BD70528_MASK_RUN_EN,
-		.vsel_reg = BD70528_REG_LDO2_VOLT,
-		.vsel_mask = BD70528_MASK_LDO_VOLT,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "ldo3",
-		.of_match = of_match_ptr("LDO3"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD70528_LDO3,
-		.ops = &bd70528_ldo_ops,
-		.type = REGULATOR_VOLTAGE,
-		.linear_ranges = bd70528_ldo_volts,
-		.n_linear_ranges = ARRAY_SIZE(bd70528_ldo_volts),
-		.n_voltages = BD70528_LDO_VOLTS,
-		.enable_reg = BD70528_REG_LDO3_EN,
-		.enable_mask = BD70528_MASK_RUN_EN,
-		.vsel_reg = BD70528_REG_LDO3_VOLT,
-		.vsel_mask = BD70528_MASK_LDO_VOLT,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "ldo_led1",
-		.of_match = of_match_ptr("LDO_LED1"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD70528_LED1,
-		.ops = &bd70528_led_ops,
-		.type = REGULATOR_VOLTAGE,
-		.volt_table = &led_volts[0],
-		.n_voltages = ARRAY_SIZE(led_volts),
-		.enable_reg = BD70528_REG_LED_EN,
-		.enable_mask = BD70528_MASK_LED1_EN,
-		.vsel_reg = BD70528_REG_LED_VOLT,
-		.vsel_mask = BD70528_MASK_LED1_VOLT,
-		.owner = THIS_MODULE,
-	},
-	{
-		.name = "ldo_led2",
-		.of_match = of_match_ptr("LDO_LED2"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD70528_LED2,
-		.ops = &bd70528_led_ops,
-		.type = REGULATOR_VOLTAGE,
-		.volt_table = &led_volts[0],
-		.n_voltages = ARRAY_SIZE(led_volts),
-		.enable_reg = BD70528_REG_LED_EN,
-		.enable_mask = BD70528_MASK_LED2_EN,
-		.vsel_reg = BD70528_REG_LED_VOLT,
-		.vsel_mask = BD70528_MASK_LED2_VOLT,
-		.owner = THIS_MODULE,
-	},
-
-};
-
-static int bd70528_probe(struct platform_device *pdev)
-{
-	int i;
-	struct regulator_config config = {
-		.dev = pdev->dev.parent,
-	};
-
-	config.regmap = dev_get_regmap(pdev->dev.parent, NULL);
-	if (!config.regmap)
-		return -ENODEV;
-
-	for (i = 0; i < ARRAY_SIZE(bd70528_desc); i++) {
-		struct regulator_dev *rdev;
-
-		rdev = devm_regulator_register(&pdev->dev, &bd70528_desc[i],
-					       &config);
-		if (IS_ERR(rdev)) {
-			dev_err(&pdev->dev,
-				"failed to register %s regulator\n",
-				bd70528_desc[i].name);
-			return PTR_ERR(rdev);
-		}
-	}
-	return 0;
-}
-
-static struct platform_driver bd70528_regulator = {
-	.driver = {
-		.name = "bd70528-pmic"
-	},
-	.probe = bd70528_probe,
-};
-
-module_platform_driver(bd70528_regulator);
-
-MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
-MODULE_DESCRIPTION("BD70528 voltage regulator driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:bd70528-pmic");
-- 
GitLab


From 1d15b3e6f9d95865450c8856401b3166ed074c83 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 26 May 2021 20:50:25 +0800
Subject: [PATCH 1908/3804] regulator: mcp16502: Convert to use .probe_new

Use the new .probe_new for mcp16502.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210526125026.82549-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mcp16502.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/mcp16502.c b/drivers/regulator/mcp16502.c
index 88c6bd5b6c78e..ac1b129e33d98 100644
--- a/drivers/regulator/mcp16502.c
+++ b/drivers/regulator/mcp16502.c
@@ -522,8 +522,7 @@ static const struct regmap_config mcp16502_regmap_config = {
 	.wr_table	= &mcp16502_yes_reg_table,
 };
 
-static int mcp16502_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int mcp16502_probe(struct i2c_client *client)
 {
 	struct regulator_config config = { };
 	struct regulator_dev *rdev;
@@ -606,7 +605,7 @@ static const struct i2c_device_id mcp16502_i2c_id[] = {
 MODULE_DEVICE_TABLE(i2c, mcp16502_i2c_id);
 
 static struct i2c_driver mcp16502_drv = {
-	.probe		= mcp16502_probe,
+	.probe_new	= mcp16502_probe,
 	.driver		= {
 		.name	= "mcp16502-regulator",
 		.of_match_table	= of_match_ptr(mcp16502_ids),
-- 
GitLab


From 96ec5afeb3001dcb432b9c9e8738aa537c6cdb12 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 26 May 2021 20:50:26 +0800
Subject: [PATCH 1909/3804] regulator: mcp16502: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210526125026.82549-2-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mcp16502.c | 74 +++++++++++++-----------------------
 1 file changed, 26 insertions(+), 48 deletions(-)

diff --git a/drivers/regulator/mcp16502.c b/drivers/regulator/mcp16502.c
index ac1b129e33d98..0426683856783 100644
--- a/drivers/regulator/mcp16502.c
+++ b/drivers/regulator/mcp16502.c
@@ -90,10 +90,14 @@ enum mcp16502_reg {
 };
 
 /* Ramp delay (uV/us) for buck1, ldo1, ldo2. */
-static const int mcp16502_ramp_b1l12[] = { 6250, 3125, 2083, 1563 };
+static const unsigned int mcp16502_ramp_b1l12[] = {
+	6250, 3125, 2083, 1563
+};
 
 /* Ramp delay (uV/us) for buck2, buck3, buck4. */
-static const int mcp16502_ramp_b234[] = { 3125, 1563, 1042, 781 };
+static const unsigned int mcp16502_ramp_b234[] = {
+	3125, 1563, 1042, 781
+};
 
 static unsigned int mcp16502_of_map_mode(unsigned int mode)
 {
@@ -103,7 +107,7 @@ static unsigned int mcp16502_of_map_mode(unsigned int mode)
 	return REGULATOR_MODE_INVALID;
 }
 
-#define MCP16502_REGULATOR(_name, _id, _ranges, _ops)			\
+#define MCP16502_REGULATOR(_name, _id, _ranges, _ops, _ramp_table)	\
 	[_id] = {							\
 		.name			= _name,			\
 		.regulators_node	= of_match_ptr("regulators"),	\
@@ -121,6 +125,10 @@ static unsigned int mcp16502_of_map_mode(unsigned int mode)
 		.vsel_mask		= MCP16502_VSEL,		\
 		.enable_reg		= (((_id) + 1) << 4),		\
 		.enable_mask		= MCP16502_EN,			\
+		.ramp_reg		= MCP16502_REG_BASE(_id, CFG),	\
+		.ramp_mask		= MCP16502_DVSR,		\
+		.ramp_delay_table	= _ramp_table,			\
+		.n_ramp_values		= ARRAY_SIZE(_ramp_table),	\
 	}
 
 enum {
@@ -314,42 +322,6 @@ static int mcp16502_set_voltage_time_sel(struct regulator_dev *rdev,
 	return ret;
 }
 
-static int mcp16502_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
-{
-	const int *ramp;
-	int id = rdev_get_id(rdev);
-	unsigned int i, size;
-
-	switch (id) {
-	case BUCK1:
-	case LDO1:
-	case LDO2:
-		ramp = mcp16502_ramp_b1l12;
-		size = ARRAY_SIZE(mcp16502_ramp_b1l12);
-		break;
-
-	case BUCK2:
-	case BUCK3:
-	case BUCK4:
-		ramp = mcp16502_ramp_b234;
-		size = ARRAY_SIZE(mcp16502_ramp_b234);
-		break;
-
-	default:
-		return -EINVAL;
-	}
-
-	for (i = 0; i < size; i++) {
-		if (ramp[i] == ramp_delay)
-			break;
-	}
-	if (i == size)
-		return -EINVAL;
-
-	return regmap_update_bits(rdev->regmap, MCP16502_REG_BASE(id, CFG),
-				  MCP16502_DVSR, (i << 2));
-}
-
 #ifdef CONFIG_SUSPEND
 /*
  * mcp16502_suspend_get_target_reg() - get the reg of the target suspend PMIC
@@ -445,7 +417,7 @@ static const struct regulator_ops mcp16502_buck_ops = {
 	.is_enabled			= regulator_is_enabled_regmap,
 	.get_status			= mcp16502_get_status,
 	.set_voltage_time_sel		= mcp16502_set_voltage_time_sel,
-	.set_ramp_delay			= mcp16502_set_ramp_delay,
+	.set_ramp_delay			= regulator_set_ramp_delay_regmap,
 
 	.set_mode			= mcp16502_set_mode,
 	.get_mode			= mcp16502_get_mode,
@@ -471,7 +443,7 @@ static const struct regulator_ops mcp16502_ldo_ops = {
 	.is_enabled			= regulator_is_enabled_regmap,
 	.get_status			= mcp16502_get_status,
 	.set_voltage_time_sel		= mcp16502_set_voltage_time_sel,
-	.set_ramp_delay			= mcp16502_set_ramp_delay,
+	.set_ramp_delay			= regulator_set_ramp_delay_regmap,
 
 #ifdef CONFIG_SUSPEND
 	.set_suspend_voltage		= mcp16502_set_suspend_voltage,
@@ -495,13 +467,19 @@ static const struct linear_range b234_ranges[] = {
 };
 
 static const struct regulator_desc mcp16502_desc[] = {
-	/* MCP16502_REGULATOR(_name, _id, ranges, regulator_ops) */
-	MCP16502_REGULATOR("VDD_IO", BUCK1, b1l12_ranges, mcp16502_buck_ops),
-	MCP16502_REGULATOR("VDD_DDR", BUCK2, b234_ranges, mcp16502_buck_ops),
-	MCP16502_REGULATOR("VDD_CORE", BUCK3, b234_ranges, mcp16502_buck_ops),
-	MCP16502_REGULATOR("VDD_OTHER", BUCK4, b234_ranges, mcp16502_buck_ops),
-	MCP16502_REGULATOR("LDO1", LDO1, b1l12_ranges, mcp16502_ldo_ops),
-	MCP16502_REGULATOR("LDO2", LDO2, b1l12_ranges, mcp16502_ldo_ops)
+	/* MCP16502_REGULATOR(_name, _id, ranges, regulator_ops, ramp_table) */
+	MCP16502_REGULATOR("VDD_IO", BUCK1, b1l12_ranges, mcp16502_buck_ops,
+			   mcp16502_ramp_b1l12),
+	MCP16502_REGULATOR("VDD_DDR", BUCK2, b234_ranges, mcp16502_buck_ops,
+			   mcp16502_ramp_b234),
+	MCP16502_REGULATOR("VDD_CORE", BUCK3, b234_ranges, mcp16502_buck_ops,
+			   mcp16502_ramp_b234),
+	MCP16502_REGULATOR("VDD_OTHER", BUCK4, b234_ranges, mcp16502_buck_ops,
+			   mcp16502_ramp_b234),
+	MCP16502_REGULATOR("LDO1", LDO1, b1l12_ranges, mcp16502_ldo_ops,
+			   mcp16502_ramp_b1l12),
+	MCP16502_REGULATOR("LDO2", LDO2, b1l12_ranges, mcp16502_ldo_ops,
+			   mcp16502_ramp_b1l12)
 };
 
 static const struct regmap_range mcp16502_ranges[] = {
-- 
GitLab


From 6c1ced2f701618e912be6c549139d58c180419ea Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Tue, 1 Jun 2021 19:53:56 +0800
Subject: [PATCH 1910/3804] perf tools: Copy uapi/asm/perf_regs.h from the
 kernel for MIPS

To allow the build to complete on older systems, where those files are
either not uptodate, lacking some recent additions or not present at
all.

And check if the copy drifts from the kernel.

This commit is similar with
commit 12f020338a2c ("tools: Copy uapi/asm/perf_regs.h from the kernel")

With this commit, we can avoid the following build error in any case:

tools/perf/arch/mips/include/perf_regs.h:7:10:
fatal error: asm/perf_regs.h: No such file or directory
 #include <asm/perf_regs.h>
          ^~~~~~~~~~~~~~~~~
compilation terminated.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Link: http://lore.kernel.org/lkml/1622548436-12472-1-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/mips/include/uapi/asm/perf_regs.h | 40 ++++++++++++++++++++
 tools/perf/Makefile.config                   |  1 -
 tools/perf/check-headers.sh                  |  1 +
 3 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 tools/arch/mips/include/uapi/asm/perf_regs.h

diff --git a/tools/arch/mips/include/uapi/asm/perf_regs.h b/tools/arch/mips/include/uapi/asm/perf_regs.h
new file mode 100644
index 0000000000000..d0f4ecd616cfc
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/perf_regs.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_MIPS_PERF_REGS_H
+#define _ASM_MIPS_PERF_REGS_H
+
+enum perf_event_mips_regs {
+	PERF_REG_MIPS_PC,
+	PERF_REG_MIPS_R1,
+	PERF_REG_MIPS_R2,
+	PERF_REG_MIPS_R3,
+	PERF_REG_MIPS_R4,
+	PERF_REG_MIPS_R5,
+	PERF_REG_MIPS_R6,
+	PERF_REG_MIPS_R7,
+	PERF_REG_MIPS_R8,
+	PERF_REG_MIPS_R9,
+	PERF_REG_MIPS_R10,
+	PERF_REG_MIPS_R11,
+	PERF_REG_MIPS_R12,
+	PERF_REG_MIPS_R13,
+	PERF_REG_MIPS_R14,
+	PERF_REG_MIPS_R15,
+	PERF_REG_MIPS_R16,
+	PERF_REG_MIPS_R17,
+	PERF_REG_MIPS_R18,
+	PERF_REG_MIPS_R19,
+	PERF_REG_MIPS_R20,
+	PERF_REG_MIPS_R21,
+	PERF_REG_MIPS_R22,
+	PERF_REG_MIPS_R23,
+	PERF_REG_MIPS_R24,
+	PERF_REG_MIPS_R25,
+	PERF_REG_MIPS_R26,
+	PERF_REG_MIPS_R27,
+	PERF_REG_MIPS_R28,
+	PERF_REG_MIPS_R29,
+	PERF_REG_MIPS_R30,
+	PERF_REG_MIPS_R31,
+	PERF_REG_MIPS_MAX = PERF_REG_MIPS_R31 + 1,
+};
+#endif /* _ASM_MIPS_PERF_REGS_H */
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 406a9519145e5..73df23dd664c1 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -90,7 +90,6 @@ endif
 ifeq ($(ARCH),mips)
   NO_PERF_REGS := 0
   CFLAGS += -I$(OUTPUT)arch/mips/include/generated
-  CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi
   LIBUNWIND_LIBS = -lunwind -lunwind-mips
 endif
 
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index dd8ff287e9302..c783558332b85 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -39,6 +39,7 @@ arch/x86/lib/x86-opcode-map.txt
 arch/x86/tools/gen-insn-attr-x86.awk
 arch/arm/include/uapi/asm/perf_regs.h
 arch/arm64/include/uapi/asm/perf_regs.h
+arch/mips/include/uapi/asm/perf_regs.h
 arch/powerpc/include/uapi/asm/perf_regs.h
 arch/s390/include/uapi/asm/perf_regs.h
 arch/x86/include/uapi/asm/perf_regs.h
-- 
GitLab


From 3cb17cce1e76ccc5499915a4d7e095a1ad6bf7ff Mon Sep 17 00:00:00 2001
From: Li Huafei <lihuafei1@huawei.com>
Date: Tue, 1 Jun 2021 17:27:50 +0800
Subject: [PATCH 1911/3804] perf probe: Fix NULL pointer dereference in
 convert_variable_location()

If we just check whether the variable can be converted, 'tvar' should be
a null pointer. However, the null pointer check is missing in the
'Constant value' execution path.

The following cases can trigger this problem:

	$ cat test.c
	#include <stdio.h>

	void main(void)
	{
	        int a;
	        const int b = 1;

	        asm volatile("mov %1, %0" : "=r"(a): "i"(b));
	        printf("a: %d\n", a);
	}

	$ gcc test.c -o test -O -g
	$ sudo ./perf probe -x ./test -L "main"
	<main@/home/lhf/test.c:0>
	      0  void main(void)
	         {
	      2          int a;
	                 const int b = 1;

	                 asm volatile("mov %1, %0" : "=r"(a): "i"(b));
	      6          printf("a: %d\n", a);
	         }

	$ sudo ./perf probe -x ./test -V "main:6"
	Segmentation fault

The check on 'tvar' is added. If 'tavr' is a null pointer, we return 0
to indicate that the variable can be converted. Now, we can successfully
show the variables that can be accessed.

	$ sudo ./perf probe -x ./test -V "main:6"
	Available variables at main:6
	        @<main+13>
	                char*   __fmt
	                int     a
	                int     b

However, the variable 'b' cannot be tracked.

	$ sudo ./perf probe -x ./test -D "main:6 b"
	Failed to find the location of the 'b' variable at this address.
	 Perhaps it has been optimized out.
	 Use -V with the --range option to show 'b' location range.
	  Error: Failed to add events.

This is because __die_find_variable_cb() did not successfully match
variable 'b', which has the DW_AT_const_value attribute instead of
DW_AT_location. We added support for DW_AT_const_value in
__die_find_variable_cb(). With this modification, we can successfully
track the variable 'b'.

	$ sudo ./perf probe -x ./test -D "main:6 b"
	p:probe_test/main_L6 /home/lhf/test:0x1156 b=\1:s32

Fixes: 66f69b219716 ("perf probe: Support DW_AT_const_value constant value")
Signed-off-by: Li Huafei <lihuafei1@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Jianlin Lv <jianlin.lv@arm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Zhang Jinhao <zhangjinhao2@huawei.com>
http://lore.kernel.org/lkml/20210601092750.169601-1-lihuafei1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c    | 8 ++++++--
 tools/perf/util/probe-finder.c | 3 +++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index b2f4920e19a6d..7d2ba8419b0c4 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -975,9 +975,13 @@ static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
 	if ((tag == DW_TAG_formal_parameter ||
 	     tag == DW_TAG_variable) &&
 	    die_compare_name(die_mem, fvp->name) &&
-	/* Does the DIE have location information or external instance? */
+	/*
+	 * Does the DIE have location information or const value
+	 * or external instance?
+	 */
 	    (dwarf_attr(die_mem, DW_AT_external, &attr) ||
-	     dwarf_attr(die_mem, DW_AT_location, &attr)))
+	     dwarf_attr(die_mem, DW_AT_location, &attr) ||
+	     dwarf_attr(die_mem, DW_AT_const_value, &attr)))
 		return DIE_FIND_CB_END;
 	if (dwarf_haspc(die_mem, fvp->addr))
 		return DIE_FIND_CB_CONTINUE;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 866f2d514d729..b029c29ce227a 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -190,6 +190,9 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
 	    immediate_value_is_supported()) {
 		Dwarf_Sword snum;
 
+		if (!tvar)
+			return 0;
+
 		dwarf_formsdata(&attr, &snum);
 		ret = asprintf(&tvar->value, "\\%ld", (long)snum);
 
-- 
GitLab


From fdbef8c4e68ad423416aa6cc93d1616d6f8ac5b3 Mon Sep 17 00:00:00 2001
From: Yang Jihong <yangjihong1@huawei.com>
Date: Fri, 30 Apr 2021 09:26:59 +0800
Subject: [PATCH 1912/3804] arm_pmu: Fix write counter incorrect in ARMv7
 big-endian mode

Commit 3a95200d3f89 ("arm_pmu: Change API to support 64bit counter values")
changes the input "value" type from 32-bit to 64-bit, which introduces the
following problem: ARMv7 PMU counters is 32-bit width, in big-endian mode,
write counter uses high 32-bit, which writes an incorrect value.

Before:

 Performance counter stats for 'ls':

              2.22 msec task-clock                #    0.675 CPUs utilized
                 0      context-switches          #    0.000 K/sec
                 0      cpu-migrations            #    0.000 K/sec
                49      page-faults               #    0.022 M/sec
        2150476593      cycles                    #  966.663 GHz
        2148588788      instructions              #    1.00  insn per cycle
        2147745484      branches                  # 965435.074 M/sec
        2147508540      branch-misses             #   99.99% of all branches

None of the above hw event counters are correct.

Solution:

"value" forcibly converted to 32-bit type before being written to PMU register.

After:

 Performance counter stats for 'ls':

              2.09 msec task-clock                #    0.681 CPUs utilized
                 0      context-switches          #    0.000 K/sec
                 0      cpu-migrations            #    0.000 K/sec
                46      page-faults               #    0.022 M/sec
           2807301      cycles                    #    1.344 GHz
           1060159      instructions              #    0.38  insn per cycle
            250496      branches                  #  119.914 M/sec
             23192      branch-misses             #    9.26% of all branches

Fixes: 3a95200d3f89 ("arm_pmu: Change API to support 64bit counter values")
Cc: <stable@vger.kernel.org>
Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210430012659.232110-1-yangjihong1@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm/kernel/perf_event_v7.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 2924d7910b106..eb2190477da10 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -773,10 +773,10 @@ static inline void armv7pmu_write_counter(struct perf_event *event, u64 value)
 		pr_err("CPU%u writing wrong counter %d\n",
 			smp_processor_id(), idx);
 	} else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
-		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
+		asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value));
 	} else {
 		armv7_pmnc_select_counter(idx);
-		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
+		asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value));
 	}
 }
 
-- 
GitLab


From a5740e955540181f4ab8f076cc9795c6bbe4d730 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Thu, 20 May 2021 15:59:45 +0800
Subject: [PATCH 1913/3804] arm64: perf: Convert snprintf to sysfs_emit

Use sysfs_emit instead of snprintf to avoid buf overrun,because in
sysfs_emit it strictly checks whether buf is null or buf whether
pagesize aligned, otherwise it returns an error.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Link: https://lore.kernel.org/r/1621497585-30887-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index f594957e29bd1..44b6eda69a81a 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -312,7 +312,7 @@ static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
 	struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
 	u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
 
-	return snprintf(page, PAGE_SIZE, "0x%08x\n", slots);
+	return sysfs_emit(page, "0x%08x\n", slots);
 }
 
 static DEVICE_ATTR_RO(slots);
-- 
GitLab


From 2db5223731b79cf5c617dc391ceb21dd5cb93237 Mon Sep 17 00:00:00 2001
From: Hao Fang <fanghao11@huawei.com>
Date: Sat, 22 May 2021 18:23:57 +0800
Subject: [PATCH 1914/3804] drivers/perf: hisi: use the correct HiSilicon
 copyright

s/Hisilicon/HiSilicon/.
It should use capital S, according to the official website
https://www.hisilicon.com/en.

Signed-off-by: Hao Fang <fanghao11@huawei.com>
Link: https://lore.kernel.org/r/1621679037-15323-1-git-send-email-fanghao11@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +-
 drivers/perf/hisilicon/hisi_uncore_hha_pmu.c  | 2 +-
 drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c  | 2 +-
 drivers/perf/hisilicon/hisi_uncore_pmu.c      | 2 +-
 drivers/perf/hisilicon/hisi_uncore_pmu.h      | 2 +-
 drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index 0c7777bf1542d..62299ab5a9be9 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -2,7 +2,7 @@
 /*
  * HiSilicon SoC DDRC uncore Hardware event counters support
  *
- * Copyright (C) 2017 Hisilicon Limited
+ * Copyright (C) 2017 HiSilicon Limited
  * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
  *         Anurup M <anurup.m@huawei.com>
  *
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index 12eb41ab1b8af..12b2c5e6d488d 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -2,7 +2,7 @@
 /*
  * HiSilicon SoC HHA uncore Hardware event counters support
  *
- * Copyright (C) 2017 Hisilicon Limited
+ * Copyright (C) 2017 HiSilicon Limited
  * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
  *         Anurup M <anurup.m@huawei.com>
  *
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 773f69538090d..560ab964c8b59 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -2,7 +2,7 @@
 /*
  * HiSilicon SoC L3C uncore Hardware event counters support
  *
- * Copyright (C) 2017 Hisilicon Limited
+ * Copyright (C) 2017 HiSilicon Limited
  * Author: Anurup M <anurup.m@huawei.com>
  *         Shaokun Zhang <zhangshaokun@hisilicon.com>
  *
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 5842593632e43..a738aeab5c049 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -2,7 +2,7 @@
 /*
  * HiSilicon SoC Hardware event counters support
  *
- * Copyright (C) 2017 Hisilicon Limited
+ * Copyright (C) 2017 HiSilicon Limited
  * Author: Anurup M <anurup.m@huawei.com>
  *         Shaokun Zhang <zhangshaokun@hisilicon.com>
  *
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.h b/drivers/perf/hisilicon/hisi_uncore_pmu.h
index ea9d89bbc1ea0..7f5841d6f5924 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.h
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.h
@@ -2,7 +2,7 @@
 /*
  * HiSilicon SoC Hardware event counters support
  *
- * Copyright (C) 2017 Hisilicon Limited
+ * Copyright (C) 2017 HiSilicon Limited
  * Author: Anurup M <anurup.m@huawei.com>
  *         Shaokun Zhang <zhangshaokun@hisilicon.com>
  *
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
index 835ec3e2178fe..08e028d9a4065 100644
--- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -2,7 +2,7 @@
 /*
  * HiSilicon SLLC uncore Hardware event counters support
  *
- * Copyright (C) 2020 Hisilicon Limited
+ * Copyright (C) 2020 HiSilicon Limited
  * Author: Shaokun Zhang <zhangshaokun@hisilicon.com>
  *
  * This code is based on the uncore PMUs like arm-cci and arm-ccn.
-- 
GitLab


From 29c043760eea902f170b6485c6e88a5ef33a9908 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 28 May 2021 09:41:30 +0800
Subject: [PATCH 1915/3804] perf: arm_pmu: use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20210528014130.7708-1-yuehaibing@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index e57b348c1628b..a64e254a731b1 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -563,14 +563,14 @@ static int armpmu_filter_match(struct perf_event *event)
 	return ret;
 }
 
-static ssize_t armpmu_cpumask_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
+static ssize_t cpus_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev));
 	return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus);
 }
 
-static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL);
+static DEVICE_ATTR_RO(cpus);
 
 static struct attribute *armpmu_common_attrs[] = {
 	&dev_attr_cpus.attr,
-- 
GitLab


From ccbe14ce88289ede522318ef3205e46f8455bbf2 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 28 May 2021 09:47:49 +0800
Subject: [PATCH 1916/3804] perf: qcom: use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO() helper instead of plain DEVICE_ATTR(),
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20210528014749.24068-1-yuehaibing@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/qcom_l3_pmu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
index bba078077c93c..081273543c6bc 100644
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -670,15 +670,15 @@ static const struct attribute_group qcom_l3_cache_pmu_events_group = {
 
 /* cpumask */
 
-static ssize_t qcom_l3_cache_pmu_cpumask_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
 {
 	struct l3cache_pmu *l3pmu = to_l3cache_pmu(dev_get_drvdata(dev));
 
 	return cpumap_print_to_pagebuf(true, buf, &l3pmu->cpumask);
 }
 
-static DEVICE_ATTR(cpumask, 0444, qcom_l3_cache_pmu_cpumask_show, NULL);
+static DEVICE_ATTR_RO(cpumask);
 
 static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = {
 	&dev_attr_cpumask.attr,
-- 
GitLab


From 21ad02e6b4c822d453faead4c96f0a86c4541b62 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 28 May 2021 09:49:40 +0800
Subject: [PATCH 1917/3804] perf: xgene_pmu: use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO() helper instead of plain DEVICE_ATTR(),
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20210528014940.4184-1-yuehaibing@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/xgene_pmu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index ffe3bdeec8459..62d942534a6be 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -604,15 +604,15 @@ static const struct attribute_group mc_pmu_v3_events_attr_group = {
 /*
  * sysfs cpumask attributes
  */
-static ssize_t xgene_pmu_cpumask_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
 {
 	struct xgene_pmu_dev *pmu_dev = to_pmu_dev(dev_get_drvdata(dev));
 
 	return cpumap_print_to_pagebuf(true, buf, &pmu_dev->parent->cpu);
 }
 
-static DEVICE_ATTR(cpumask, S_IRUGO, xgene_pmu_cpumask_show, NULL);
+static DEVICE_ATTR_RO(cpumask);
 
 static struct attribute *xgene_pmu_cpumask_attrs[] = {
 	&dev_attr_cpumask.attr,
-- 
GitLab


From f9e36b388a325eee74fae3b545f64449c13f090a Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 28 May 2021 14:17:38 +0800
Subject: [PATCH 1918/3804] perf: arm_spe: use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO() helper instead of plain DEVICE_ATTR(),
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20210528061738.23392-1-yuehaibing@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_spe_pmu.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index e3711cb4c1b55..d44bcc29d99c8 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -231,15 +231,14 @@ static const struct attribute_group arm_spe_pmu_format_group = {
 	.attrs	= arm_spe_pmu_formats_attr,
 };
 
-static ssize_t arm_spe_pmu_get_attr_cpumask(struct device *dev,
-					    struct device_attribute *attr,
-					    char *buf)
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
 {
 	struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev);
 
 	return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus);
 }
-static DEVICE_ATTR(cpumask, S_IRUGO, arm_spe_pmu_get_attr_cpumask, NULL);
+static DEVICE_ATTR_RO(cpumask);
 
 static struct attribute *arm_spe_pmu_attrs[] = {
 	&dev_attr_cpumask.attr,
-- 
GitLab


From 4f2abe91922ba02bb419d91d92a518e4c805220b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 27 May 2021 11:28:35 -0700
Subject: [PATCH 1919/3804] perf record: Move probing cgroup sampling support

I found that checking cgroup sampling support using the missing features
doesn't work on old kernels.  Because it added both attr.cgroup bit and
PERF_SAMPLE_CGROUP bit, it needs to check whichever comes first (usually
the actual event, not dummy).

But it only checks the attr.cgroup bit which is set only in the dummy
event so cannot detect failtures due the sample bits.  Also we don't
ignore the missing feature and retry, it'd be better checking it with
the API probing logic.

Committer notes:

Extracted the minimal part to check using the new cgroup API probe
routine, the part that removes the cgroup member can be left for further
discussion.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210527182835.1634339-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c      |  6 ++++++
 tools/perf/util/perf_api_probe.c | 10 ++++++++++
 tools/perf/util/perf_api_probe.h |  1 +
 3 files changed, 17 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3337b5f93336d..84803abeb9425 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2714,6 +2714,12 @@ int cmd_record(int argc, const char **argv)
 		rec->no_buildid = true;
 	}
 
+	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
+		pr_err("Kernel has no cgroup sampling support.\n");
+		err = -EINVAL;
+		goto out_opts;
+	}
+
 	if (rec->opts.kcore)
 		rec->data.is_dir = true;
 
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 829af17a0867b..020411682a3cb 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -103,6 +103,11 @@ static void perf_probe_build_id(struct evsel *evsel)
 	evsel->core.attr.build_id = 1;
 }
 
+static void perf_probe_cgroup(struct evsel *evsel)
+{
+	evsel->core.attr.cgroup = 1;
+}
+
 bool perf_can_sample_identifier(void)
 {
 	return perf_probe_api(perf_probe_sample_identifier);
@@ -182,3 +187,8 @@ bool perf_can_record_build_id(void)
 {
 	return perf_probe_api(perf_probe_build_id);
 }
+
+bool perf_can_record_cgroup(void)
+{
+	return perf_probe_api(perf_probe_cgroup);
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
index f12ca55f509a8..b104168efb154 100644
--- a/tools/perf/util/perf_api_probe.h
+++ b/tools/perf/util/perf_api_probe.h
@@ -12,5 +12,6 @@ bool perf_can_record_switch_events(void);
 bool perf_can_record_text_poke_events(void);
 bool perf_can_sample_identifier(void);
 bool perf_can_record_build_id(void);
+bool perf_can_record_cgroup(void);
 
 #endif // __PERF_API_PROBE_H
-- 
GitLab


From d3fddc355a4a4415e8d43d1faae1be713d65cf5e Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Mon, 17 May 2021 16:12:54 +0800
Subject: [PATCH 1920/3804] perf stat: Fix error return code in bperf__load()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix to return a negative error code from the error handling case instead
of 0, as done elsewhere in this function.

Committer notes:

Added the missing {} for the now multiline 'if' block, fixing this error:

    CC      /tmp/build/perf/util/bpf_counter.o
  util/bpf_counter.c: In function ‘bperf__load’:
  util/bpf_counter.c:523:9: error: this ‘if’ clause does not guard... [-Werror=misleading-indentation]
    523 |         if (evsel->bperf_leader_link_fd < 0 &&
        |         ^~
  util/bpf_counter.c:526:17: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
    526 |                 goto out;
        |                 ^~~~
  cc1: all warnings being treated as errors

Fixes: 7fac83aaf2eecc9e ("perf stat: Introduce 'bperf' to share hardware PMCs with BPF")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Yu Kuai <yukuai3@huawei.com>
Cc: Zhang Yi <yi.zhang@huawei.com>
Link: http://lore.kernel.org/lkml/20210517081254.1561564-1-yukuai3@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf_counter.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index 974f10e356f06..5ed674a2f55e8 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -521,9 +521,10 @@ static int bperf__load(struct evsel *evsel, struct target *target)
 
 	evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id);
 	if (evsel->bperf_leader_link_fd < 0 &&
-	    bperf_reload_leader_program(evsel, attr_map_fd, &entry))
+	    bperf_reload_leader_program(evsel, attr_map_fd, &entry)) {
+		err = -1;
 		goto out;
-
+	}
 	/*
 	 * The bpf_link holds reference to the leader program, and the
 	 * leader program holds reference to the maps. Therefore, if
@@ -550,6 +551,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
 	/* Step 2: load the follower skeleton */
 	evsel->follower_skel = bperf_follower_bpf__open();
 	if (!evsel->follower_skel) {
+		err = -1;
 		pr_err("Failed to open follower skeleton\n");
 		goto out;
 	}
-- 
GitLab


From f677ec94f6fb9d895f40403bd54236f7763c29db Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Fri, 28 May 2021 11:10:50 +0200
Subject: [PATCH 1921/3804] perf test: Test 17 fails with make LIBPFM4=1 on
 s390 z/VM

This test case fails on s390 virtual machine z/VM which has no PMU support
when the perf tool is built with LIBPFM4=1.

Using make LIBPFM4=1 builds the perf tool with support for libpfm
event notation. The command line flag --pfm-events is valid:
 # ./perf record --pfm-events cycles -- true
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.001 MB perf.data (2 samples) ]
 #

However the command 'perf test -Fv 17' fails on s390 z/VM virtual machine
with LIBPFM4=1:
  # perf test -Fv 17
  17: Setup struct perf_event_attr                                    :
  --- start ---
  .....
  running './tests/attr/test-record-group2'
  unsupp  './tests/attr/test-record-group2'
  running './tests/attr/test-record-pfm-period'
  expected exclude_hv=0, got 1
 FAILED './tests/attr/test-record-pfm-period' - match failure
 ---- end ----
 Setup struct perf_event_attr: FAILED!

When --pfm-event system is not supported, the test returns unsupported
and continues. Here is an example using a virtual machine on x86 and
Fedora 34:
 [root@f33 perf]# perf test -Fv 17
 17: Setup struct perf_event_attr                                    :
 --- start ---
 .....
 running './tests/attr/test-record-group2'
 unsupp  './tests/attr/test-record-group2'
 running './tests/attr/test-record-pfm-period'
 unsupp  './tests/attr/test-record-pfm-period'
 ....

The issue is file ./tests/attr/test-record-pfm-period
which requires perf event attribute member exclude_hv to be zero.
This is not the case on s390 where the value of exclude_hv is one when
executing on a z/VM virtual machine without PMU hardware support.

Fix this by allowing value exlucde_hv to be zero or one.

Output before:
 # /usr/bin/python ./tests/attr.py -d ./tests/attr/ -t \
	test-record-pfm-period -p ./perf  -vvv 2>&1| fgrep match
    matching [event:base-record]
    match: [event:base-record] matches []
 FAILED './tests/attr//test-record-pfm-period' - match failure
 #

Output after:
 # /usr/bin/python ./tests/attr.py -d ./tests/attr/ -t \
	test-record-pfm-period -p ./perf  -vvv 2>&1| fgrep match
    matching [event:base-record]
    match: [event:base-record] matches ['event-1-0-6', 'event-1-0-5']
  matched

Background:
Using libpfm library ends up in this function call sequence

pfm_get_perf_event_encoding()
+-- pfm_get_os_event_encoding()
    +-- pfmlib_perf_event_encode()

is called when no hardware specific PMU unit can be detected
as in the s390 z/VM virtual machine case. This uses the
"perf_events generic PMU" data structure which sets exclude_hv
to 1 per default.  Using this PMU that test case always fails.

That is the reason why exclude_hv attribute setting varies.

Version 2:

   As suggested by Ian Rogers make perf_event_attribute member
   exclude_hv more robust and accept value 0 or 1 to handle more
   test cases which might fail on s390 virtual machine z/VM.

Suggested-by: Ian Rogers <irogers@google.com>
Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Link: http://lore.kernel.org/lkml/20210528091050.245838-1-tmricht@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/attr/base-record | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 4a7b8deef3fdd..8c10955eff939 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -16,7 +16,7 @@ pinned=0
 exclusive=0
 exclude_user=0
 exclude_kernel=0|1
-exclude_hv=0
+exclude_hv=0|1
 exclude_idle=0
 mmap=1
 comm=1
-- 
GitLab


From 848ff3768684701a4ce73a2ec0e5d438d4e2b0da Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 1 Jun 2021 06:09:03 -0700
Subject: [PATCH 1922/3804] perf/x86/intel/uncore: Fix M2M event umask for Ice
 Lake server

Perf tool errors out with the latest event list for the Ice Lake server.

event syntax error: 'unc_m2m_imc_reads.to_pmm'
                           \___ value too big for format, maximum is 255

The same as the Snow Ridge server, the M2M uncore unit in the Ice Lake
server has the unit mask extension field as well.

Fixes: 2b3b76b5ec67 ("perf/x86/intel/uncore: Add Ice Lake server uncore support")
Reported-by: Jin Yao <yao.jin@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1622552943-119174-1-git-send-email-kan.liang@linux.intel.com
---
 arch/x86/events/intel/uncore_snbep.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 1587d32897430..3a75a2c601c2a 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -5099,9 +5099,10 @@ static struct intel_uncore_type icx_uncore_m2m = {
 	.perf_ctr	= SNR_M2M_PCI_PMON_CTR0,
 	.event_ctl	= SNR_M2M_PCI_PMON_CTL0,
 	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.event_mask_ext	= SNR_M2M_PCI_PMON_UMASK_EXT,
 	.box_ctl	= SNR_M2M_PCI_PMON_BOX_CTL,
 	.ops		= &snr_m2m_uncore_pci_ops,
-	.format_group	= &skx_uncore_format_group,
+	.format_group	= &snr_m2m_uncore_format_group,
 };
 
 static struct attribute *icx_upi_uncore_formats_attr[] = {
-- 
GitLab


From 9ce4d216fe8b581e4da4406461a4cfc9acbfa679 Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Mon, 24 May 2021 04:14:11 +0000
Subject: [PATCH 1923/3804] uprobes: Update uprobe_write_opcode() kernel-doc
 comment

commit 6d43743e9079 ("Uprobe: Additional argument arch_uprobe to
uprobe_write_opcode()") added the parameter @auprobe.

Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210524041411.157027-1-hqjagain@gmail.com
---
 kernel/events/uprobes.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6addc97803198..a481ef696143c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -453,6 +453,7 @@ static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
  * that have fixed length instructions.
  *
  * uprobe_write_opcode - write the opcode at a given virtual address.
+ * @auprobe: arch specific probepoint information.
  * @mm: the probed process address space.
  * @vaddr: the virtual address to store the opcode.
  * @opcode: opcode to be written at @vaddr.
-- 
GitLab


From ec6aba3d2be1ed75b3f4c894bb64a36d40db1f55 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 May 2021 09:25:19 +0200
Subject: [PATCH 1924/3804] kprobes: Remove kprobe::fault_handler

The reason for kprobe::fault_handler(), as given by their comment:

 * We come here because instructions in the pre/post
 * handler caused the page_fault, this could happen
 * if handler tries to access user space by
 * copy_from_user(), get_user() etc. Let the
 * user-specified handler try to fix it first.

Is just plain bad. Those other handlers are ran from non-preemptible
context and had better use _nofault() functions. Also, there is no
upstream usage of this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20210525073213.561116662@infradead.org
---
 Documentation/trace/kprobes.rst    | 24 +++++-------------------
 arch/arc/kernel/kprobes.c          | 10 ----------
 arch/arm/probes/kprobes/core.c     |  9 ---------
 arch/arm64/kernel/probes/kprobes.c | 10 ----------
 arch/csky/kernel/probes/kprobes.c  | 10 ----------
 arch/ia64/kernel/kprobes.c         |  9 ---------
 arch/mips/kernel/kprobes.c         |  3 ---
 arch/powerpc/kernel/kprobes.c      | 10 ----------
 arch/riscv/kernel/probes/kprobes.c | 10 ----------
 arch/s390/kernel/kprobes.c         | 10 ----------
 arch/sh/kernel/kprobes.c           | 10 ----------
 arch/sparc/kernel/kprobes.c        | 10 ----------
 arch/x86/kernel/kprobes/core.c     | 10 ----------
 include/linux/kprobes.h            |  8 --------
 kernel/kprobes.c                   | 19 -------------------
 samples/kprobes/kprobe_example.c   | 15 ---------------
 16 files changed, 5 insertions(+), 172 deletions(-)

diff --git a/Documentation/trace/kprobes.rst b/Documentation/trace/kprobes.rst
index b757b6dfd3d4d..998149ce2fd95 100644
--- a/Documentation/trace/kprobes.rst
+++ b/Documentation/trace/kprobes.rst
@@ -362,14 +362,11 @@ register_kprobe
 	#include <linux/kprobes.h>
 	int register_kprobe(struct kprobe *kp);
 
-Sets a breakpoint at the address kp->addr.  When the breakpoint is
-hit, Kprobes calls kp->pre_handler.  After the probed instruction
-is single-stepped, Kprobe calls kp->post_handler.  If a fault
-occurs during execution of kp->pre_handler or kp->post_handler,
-or during single-stepping of the probed instruction, Kprobes calls
-kp->fault_handler.  Any or all handlers can be NULL. If kp->flags
-is set KPROBE_FLAG_DISABLED, that kp will be registered but disabled,
-so, its handlers aren't hit until calling enable_kprobe(kp).
+Sets a breakpoint at the address kp->addr.  When the breakpoint is hit, Kprobes
+calls kp->pre_handler.  After the probed instruction is single-stepped, Kprobe
+calls kp->post_handler.  Any or all handlers can be NULL. If kp->flags is set
+KPROBE_FLAG_DISABLED, that kp will be registered but disabled, so, its handlers
+aren't hit until calling enable_kprobe(kp).
 
 .. note::
 
@@ -415,17 +412,6 @@ User's post-handler (kp->post_handler)::
 p and regs are as described for the pre_handler.  flags always seems
 to be zero.
 
-User's fault-handler (kp->fault_handler)::
-
-	#include <linux/kprobes.h>
-	#include <linux/ptrace.h>
-	int fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr);
-
-p and regs are as described for the pre_handler.  trapnr is the
-architecture-specific trap number associated with the fault (e.g.,
-on i386, 13 for a general protection fault or 14 for a page fault).
-Returns 1 if it successfully handled the exception.
-
 register_kretprobe
 ------------------
 
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index cabef45f11df5..9f5b39f387362 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -323,16 +323,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long trapnr)
 		 */
 		kprobes_inc_nmissed_count(cur);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
-
 		/*
 		 * In case the user-specified fault handler returned zero,
 		 * try to fix up.
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index a9653117ca0dd..7b9b9a5a409bb 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -358,15 +358,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
 		 */
 		kprobes_inc_nmissed_count(cur);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
-			return 1;
 		break;
 
 	default:
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d607c99120252..f6b088e9fa70e 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -283,16 +283,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
 		 */
 		kprobes_inc_nmissed_count(cur);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, fsr))
-			return 1;
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index 589f090f48b99..e0e973e497703 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -301,16 +301,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
 		 */
 		kprobes_inc_nmissed_count(cur);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index fc1ff8a4d7de6..6efed4ecff9e9 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -850,15 +850,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 */
 		kprobes_inc_nmissed_count(cur);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
index 54dfba8fa77c8..75bff0f773198 100644
--- a/arch/mips/kernel/kprobes.c
+++ b/arch/mips/kernel/kprobes.c
@@ -403,9 +403,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
-	if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-		return 1;
-
 	if (kcb->kprobe_status & KPROBE_HIT_SS) {
 		resume_execution(cur, regs, kcb);
 		regs->cp0_status |= kcb->kprobe_old_SR;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 01ab2163659e4..75b4e874269d4 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -508,16 +508,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 */
 		kprobes_inc_nmissed_count(cur);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 10b965c345366..923b5ea396eab 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -283,16 +283,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
 		 */
 		kprobes_inc_nmissed_count(cur);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index aae24dc75df61..ad631e33df24f 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -452,16 +452,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
 		 */
 		kprobes_inc_nmissed_count(p);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (p->fault_handler && p->fault_handler(p, regs, trapnr))
-			return 1;
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 756100b01e846..58263420ad2a5 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -389,16 +389,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 */
 		kprobes_inc_nmissed_count(cur);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index 217c21a6986ad..db4e341b4b6ea 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -352,16 +352,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 */
 		kprobes_inc_nmissed_count(cur);
 
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index d3d65545cb8b7..cfcdf4b8a306f 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1110,16 +1110,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		 * these specific fault cases.
 		 */
 		kprobes_inc_nmissed_count(cur);
-
-		/*
-		 * We come here because instructions in the pre/post
-		 * handler caused the page_fault, this could happen
-		 * if handler tries to access user space by
-		 * copy_from_user(), get_user() etc. Let the
-		 * user-specified handler try to fix it first.
-		 */
-		if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
-			return 1;
 	}
 
 	return 0;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 1883a4a9f16a7..523ffc7bc3a8b 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -54,8 +54,6 @@ struct kretprobe_instance;
 typedef int (*kprobe_pre_handler_t) (struct kprobe *, struct pt_regs *);
 typedef void (*kprobe_post_handler_t) (struct kprobe *, struct pt_regs *,
 				       unsigned long flags);
-typedef int (*kprobe_fault_handler_t) (struct kprobe *, struct pt_regs *,
-				       int trapnr);
 typedef int (*kretprobe_handler_t) (struct kretprobe_instance *,
 				    struct pt_regs *);
 
@@ -83,12 +81,6 @@ struct kprobe {
 	/* Called after addr is executed, unless... */
 	kprobe_post_handler_t post_handler;
 
-	/*
-	 * ... called if executing addr causes a fault (eg. page fault).
-	 * Return 1 if it handled fault, otherwise kernel will see it.
-	 */
-	kprobe_fault_handler_t fault_handler;
-
 	/* Saved opcode (which has been replaced with breakpoint) */
 	kprobe_opcode_t opcode;
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 745f08fdd7a69..e41385afe79dc 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1183,23 +1183,6 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
 }
 NOKPROBE_SYMBOL(aggr_post_handler);
 
-static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
-			      int trapnr)
-{
-	struct kprobe *cur = __this_cpu_read(kprobe_instance);
-
-	/*
-	 * if we faulted "during" the execution of a user specified
-	 * probe handler, invoke just that probe's fault handler
-	 */
-	if (cur && cur->fault_handler) {
-		if (cur->fault_handler(cur, regs, trapnr))
-			return 1;
-	}
-	return 0;
-}
-NOKPROBE_SYMBOL(aggr_fault_handler);
-
 /* Walks the list and increments nmissed count for multiprobe case */
 void kprobes_inc_nmissed_count(struct kprobe *p)
 {
@@ -1330,7 +1313,6 @@ static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 	ap->addr = p->addr;
 	ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED;
 	ap->pre_handler = aggr_pre_handler;
-	ap->fault_handler = aggr_fault_handler;
 	/* We don't care the kprobe which has gone. */
 	if (p->post_handler && !kprobe_gone(p))
 		ap->post_handler = aggr_post_handler;
@@ -2014,7 +1996,6 @@ int register_kretprobe(struct kretprobe *rp)
 
 	rp->kp.pre_handler = pre_handler_kretprobe;
 	rp->kp.post_handler = NULL;
-	rp->kp.fault_handler = NULL;
 
 	/* Pre-allocate memory for max kretprobe instances */
 	if (rp->maxactive <= 0) {
diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c
index c495664c0a9b3..4b2f318289517 100644
--- a/samples/kprobes/kprobe_example.c
+++ b/samples/kprobes/kprobe_example.c
@@ -94,26 +94,11 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs,
 #endif
 }
 
-/*
- * fault_handler: this is called if an exception is generated for any
- * instruction within the pre- or post-handler, or when Kprobes
- * single-steps the probed instruction.
- */
-static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
-{
-	pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr);
-	/* Return 0 because we don't handle the fault. */
-	return 0;
-}
-/* NOKPROBE_SYMBOL() is also available */
-NOKPROBE_SYMBOL(handler_fault);
-
 static int __init kprobe_init(void)
 {
 	int ret;
 	kp.pre_handler = handler_pre;
 	kp.post_handler = handler_post;
-	kp.fault_handler = handler_fault;
 
 	ret = register_kprobe(&kp);
 	if (ret < 0) {
-- 
GitLab


From 00afe83098f59d3091a800d0db188ca495b2bc02 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 May 2021 09:25:20 +0200
Subject: [PATCH 1925/3804] x86,kprobes: WARN if kprobes tries to handle a
 fault

With the removal of kprobe::handle_fault there is no reason left that
kprobe_page_fault() would ever return true on x86, make sure it
doesn't happen by accident.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20210525073213.660594073@infradead.org
---
 arch/x86/mm/fault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1c548ad007520..362255bfc9a89 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1186,7 +1186,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
 		return;
 
 	/* kprobes don't want to hook the spurious faults: */
-	if (kprobe_page_fault(regs, X86_TRAP_PF))
+	if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF)))
 		return;
 
 	/*
@@ -1239,7 +1239,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 	}
 
 	/* kprobes don't want to hook the spurious faults: */
-	if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
+	if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF)))
 		return;
 
 	/*
-- 
GitLab


From 7b419f47facd286c6723daca6ad69ec355473f78 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 May 2021 08:53:28 +0200
Subject: [PATCH 1926/3804] sched: Add CONFIG_SCHED_CORE help text

Hugh noted that the SCHED_CORE Kconfig option could do with a help
text.

Requested-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Hugh Dickins <hughd@google.com>
Link: https://lkml.kernel.org/r/YKyhtwhEgvtUDOyl@hirez.programming.kicks-ass.net
---
 kernel/Kconfig.preempt | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index ea1e3331c0ba3..bd7c4147b9a81 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -104,4 +104,18 @@ config SCHED_CORE
 	bool "Core Scheduling for SMT"
 	default y
 	depends on SCHED_SMT
+	help
+	  This option permits Core Scheduling, a means of coordinated task
+	  selection across SMT siblings. When enabled -- see
+	  prctl(PR_SCHED_CORE) -- task selection ensures that all SMT siblings
+	  will execute a task from the same 'core group', forcing idle when no
+	  matching task is found.
+
+	  Use of this feature includes:
+	   - mitigation of some (not all) SMT side channels;
+	   - limiting SMT interference to improve determinism and/or performance.
+
+	  SCHED_CORE is default enabled when SCHED_SMT is enabled -- when
+	  unused there should be no impact on performance.
+
 
-- 
GitLab


From 15faafc6b449777a85c0cf82dd8286c293fed4eb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 31 May 2021 12:21:13 +0200
Subject: [PATCH 1927/3804] sched,init: Fix DEBUG_PREEMPT vs early boot

Extend 8fb12156b8db ("init: Pin init task to the boot CPU, initially")
to cover the new PF_NO_SETAFFINITY requirement.

While there, move wait_for_completion(&kthreadd_done) into kernel_init()
to make it absolutely clear it is the very first thing done by the init
thread.

Fixes: 570a752b7a9b ("lib/smp_processor_id: Use is_percpu_thread() instead of nr_cpus_allowed")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Tested-by: Borislav Petkov <bp@alien8.de>
Link: https://lkml.kernel.org/r/YLS4mbKUrA3Gnb4t@hirez.programming.kicks-ass.net
---
 init/main.c         | 11 ++++++-----
 kernel/sched/core.c |  1 +
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/init/main.c b/init/main.c
index 7b027d9c5c89b..e945ec82b8a54 100644
--- a/init/main.c
+++ b/init/main.c
@@ -692,6 +692,7 @@ noinline void __ref rest_init(void)
 	 */
 	rcu_read_lock();
 	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	tsk->flags |= PF_NO_SETAFFINITY;
 	set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));
 	rcu_read_unlock();
 
@@ -1440,6 +1441,11 @@ static int __ref kernel_init(void *unused)
 {
 	int ret;
 
+	/*
+	 * Wait until kthreadd is all set-up.
+	 */
+	wait_for_completion(&kthreadd_done);
+
 	kernel_init_freeable();
 	/* need to finish all async __init code before freeing the memory */
 	async_synchronize_full();
@@ -1520,11 +1526,6 @@ void __init console_on_rootfs(void)
 
 static noinline void __init kernel_init_freeable(void)
 {
-	/*
-	 * Wait until kthreadd is all set-up.
-	 */
-	wait_for_completion(&kthreadd_done);
-
 	/* Now the scheduler is fully set up and can do blocking allocations */
 	gfp_allowed_mask = __GFP_BITS_MASK;
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3d2527239c3ed..e205c191b7fba 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8862,6 +8862,7 @@ void __init sched_init_smp(void)
 	/* Move init over to a non-isolated CPU */
 	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
 		BUG();
+	current->flags &= ~PF_NO_SETAFFINITY;
 	sched_init_granularity();
 
 	init_sched_rt_class();
-- 
GitLab


From 08f7c2f4d0e9f4283f5796b8168044c034a1bfcb Mon Sep 17 00:00:00 2001
From: Odin Ugedal <odin@uged.al>
Date: Tue, 18 May 2021 14:52:02 +0200
Subject: [PATCH 1928/3804] sched/fair: Fix ascii art by relpacing tabs

When using something other than 8 spaces per tab, this ascii art
makes not sense, and the reader might end up wondering what this
advanced equation "is".

Signed-off-by: Odin Ugedal <odin@uged.al>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20210518125202.78658-4-odin@uged.al
---
 kernel/sched/fair.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 161b92aa1c797..a2c30e52de768 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3093,7 +3093,7 @@ void reweight_task(struct task_struct *p, int prio)
  *
  *                     tg->weight * grq->load.weight
  *   ge->load.weight = -----------------------------               (1)
- *			  \Sum grq->load.weight
+ *                       \Sum grq->load.weight
  *
  * Now, because computing that sum is prohibitively expensive to compute (been
  * there, done that) we approximate it with this average stuff. The average
@@ -3107,7 +3107,7 @@ void reweight_task(struct task_struct *p, int prio)
  *
  *                     tg->weight * grq->avg.load_avg
  *   ge->load.weight = ------------------------------              (3)
- *				tg->load_avg
+ *                             tg->load_avg
  *
  * Where: tg->load_avg ~= \Sum grq->avg.load_avg
  *
@@ -3123,7 +3123,7 @@ void reweight_task(struct task_struct *p, int prio)
  *
  *                     tg->weight * grq->load.weight
  *   ge->load.weight = ----------------------------- = tg->weight   (4)
- *			    grp->load.weight
+ *                         grp->load.weight
  *
  * That is, the sum collapses because all other CPUs are idle; the UP scenario.
  *
@@ -3142,7 +3142,7 @@ void reweight_task(struct task_struct *p, int prio)
  *
  *                     tg->weight * grq->load.weight
  *   ge->load.weight = -----------------------------		   (6)
- *				tg_load_avg'
+ *                             tg_load_avg'
  *
  * Where:
  *
-- 
GitLab


From 475ea6c60279e9f2ddf7e4cf2648cd8ae0608361 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Wed, 26 May 2021 21:57:50 +0100
Subject: [PATCH 1929/3804] sched: Don't defer CPU pick to migration_cpu_stop()

Will reported that the 'XXX __migrate_task() can fail' in migration_cpu_stop()
can happen, and it *is* sort of a big deal. Looking at it some more, one
will note there is a glaring hole in the deferred CPU selection:

  (w/ CONFIG_CPUSET=n, so that the affinity mask passed via taskset doesn't
  get AND'd with cpu_online_mask)

  $ taskset -pc 0-2 $PID
  # offline CPUs 3-4
  $ taskset -pc 3-5 $PID
    `\
      $PID may stay on 0-2 due to the cpumask_any_distribute() picking an
      offline CPU and __migrate_task() refusing to do anything due to
      cpu_is_allowed().

set_cpus_allowed_ptr() goes to some length to pick a dest_cpu that matches
the right constraints vs affinity and the online/active state of the
CPUs. Reuse that instead of discarding it in the affine_move_task() case.

Fixes: 6d337eab041d ("sched: Fix migrate_disable() vs set_cpus_allowed_ptr()")
Reported-by: Will Deacon <will@kernel.org>
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210526205751.842360-2-valentin.schneider@arm.com
---
 kernel/sched/core.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e205c191b7fba..7e59466987112 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2273,7 +2273,6 @@ static int migration_cpu_stop(void *data)
 	struct migration_arg *arg = data;
 	struct set_affinity_pending *pending = arg->pending;
 	struct task_struct *p = arg->task;
-	int dest_cpu = arg->dest_cpu;
 	struct rq *rq = this_rq();
 	bool complete = false;
 	struct rq_flags rf;
@@ -2311,19 +2310,15 @@ static int migration_cpu_stop(void *data)
 		if (pending) {
 			p->migration_pending = NULL;
 			complete = true;
-		}
 
-		if (dest_cpu < 0) {
 			if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask))
 				goto out;
-
-			dest_cpu = cpumask_any_distribute(&p->cpus_mask);
 		}
 
 		if (task_on_rq_queued(p))
-			rq = __migrate_task(rq, &rf, p, dest_cpu);
+			rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
 		else
-			p->wake_cpu = dest_cpu;
+			p->wake_cpu = arg->dest_cpu;
 
 		/*
 		 * XXX __migrate_task() can fail, at which point we might end
@@ -2606,7 +2601,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
 			init_completion(&my_pending.done);
 			my_pending.arg = (struct migration_arg) {
 				.task = p,
-				.dest_cpu = -1,		/* any */
+				.dest_cpu = dest_cpu,
 				.pending = &my_pending,
 			};
 
@@ -2614,6 +2609,15 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
 		} else {
 			pending = p->migration_pending;
 			refcount_inc(&pending->refs);
+			/*
+			 * Affinity has changed, but we've already installed a
+			 * pending. migration_cpu_stop() *must* see this, else
+			 * we risk a completion of the pending despite having a
+			 * task on a disallowed CPU.
+			 *
+			 * Serialized by p->pi_lock, so this is safe.
+			 */
+			pending->arg.dest_cpu = dest_cpu;
 		}
 	}
 	pending = p->migration_pending;
-- 
GitLab


From 0b78f8bcf4951af30b0ae83ea4fad27d641ab617 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 1 Jun 2021 15:30:30 +0100
Subject: [PATCH 1930/3804] Revert "fb_defio: Remove custom
 address_space_operations"

Commit ccf953d8f3d6 makes framebuffers which use deferred I/O stop
displaying updates after the first one.  This is because the pages
handled by fb_defio no longer have a page_mapping().  That prevents
page_mkclean() from marking the PTEs as clean, and so writes are only
noticed the first time.

Reported-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/YLZEhv0cpZp8uVE3@casper.infradead.org
---
 drivers/video/fbdev/core/fb_defio.c | 35 +++++++++++++++++++++++++++++
 drivers/video/fbdev/core/fbmem.c    |  4 ++++
 include/linux/fb.h                  |  3 +++
 3 files changed, 42 insertions(+)

diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index b292887a24815..a591d291b231a 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -52,6 +52,13 @@ static vm_fault_t fb_deferred_io_fault(struct vm_fault *vmf)
 		return VM_FAULT_SIGBUS;
 
 	get_page(page);
+
+	if (vmf->vma->vm_file)
+		page->mapping = vmf->vma->vm_file->f_mapping;
+	else
+		printk(KERN_ERR "no mapping available\n");
+
+	BUG_ON(!page->mapping);
 	page->index = vmf->pgoff;
 
 	vmf->page = page;
@@ -144,6 +151,17 @@ static const struct vm_operations_struct fb_deferred_io_vm_ops = {
 	.page_mkwrite	= fb_deferred_io_mkwrite,
 };
 
+static int fb_deferred_io_set_page_dirty(struct page *page)
+{
+	if (!PageDirty(page))
+		SetPageDirty(page);
+	return 0;
+}
+
+static const struct address_space_operations fb_deferred_io_aops = {
+	.set_page_dirty = fb_deferred_io_set_page_dirty,
+};
+
 int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	vma->vm_ops = &fb_deferred_io_vm_ops;
@@ -194,12 +212,29 @@ void fb_deferred_io_init(struct fb_info *info)
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_init);
 
+void fb_deferred_io_open(struct fb_info *info,
+			 struct inode *inode,
+			 struct file *file)
+{
+	file->f_mapping->a_ops = &fb_deferred_io_aops;
+}
+EXPORT_SYMBOL_GPL(fb_deferred_io_open);
+
 void fb_deferred_io_cleanup(struct fb_info *info)
 {
 	struct fb_deferred_io *fbdefio = info->fbdefio;
+	struct page *page;
+	int i;
 
 	BUG_ON(!fbdefio);
 	cancel_delayed_work_sync(&info->deferred_work);
+
+	/* clear out the mapping that we setup */
+	for (i = 0 ; i < info->fix.smem_len; i += PAGE_SIZE) {
+		page = fb_deferred_io_page(info, i);
+		page->mapping = NULL;
+	}
+
 	mutex_destroy(&fbdefio->lock);
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_cleanup);
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 072780b0e5702..98f193078c05a 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1415,6 +1415,10 @@ __releases(&info->lock)
 		if (res)
 			module_put(info->fbops->owner);
 	}
+#ifdef CONFIG_FB_DEFERRED_IO
+	if (info->fbdefio)
+		fb_deferred_io_open(info, inode, file);
+#endif
 out:
 	unlock_fb_info(info);
 	if (res)
diff --git a/include/linux/fb.h b/include/linux/fb.h
index a8dccd23c2499..ecfbcc0553a59 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -659,6 +659,9 @@ static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch,
 /* drivers/video/fb_defio.c */
 int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma);
 extern void fb_deferred_io_init(struct fb_info *info);
+extern void fb_deferred_io_open(struct fb_info *info,
+				struct inode *inode,
+				struct file *file);
 extern void fb_deferred_io_cleanup(struct fb_info *info);
 extern int fb_deferred_io_fsync(struct file *file, loff_t start,
 				loff_t end, int datasync);
-- 
GitLab


From d8570c182f56ca52c98734732fb9a331f7c23f9a Mon Sep 17 00:00:00 2001
From: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Date: Wed, 26 May 2021 14:52:00 +0800
Subject: [PATCH 1931/3804] mfd: mt6358: Refine interrupt code

This patch refines the interrupt related code to support new chips.

Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/mt6358-irq.c        | 65 +++++++++++++++++++--------------
 include/linux/mfd/mt6358/core.h |  8 ++--
 2 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/drivers/mfd/mt6358-irq.c b/drivers/mfd/mt6358-irq.c
index db734f2831ff0..4b094e5e51cc3 100644
--- a/drivers/mfd/mt6358-irq.c
+++ b/drivers/mfd/mt6358-irq.c
@@ -13,7 +13,9 @@
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 
-static struct irq_top_t mt6358_ints[] = {
+#define MTK_PMIC_REG_WIDTH 16
+
+static const struct irq_top_t mt6358_ints[] = {
 	MT6358_TOP_GEN(BUCK),
 	MT6358_TOP_GEN(LDO),
 	MT6358_TOP_GEN(PSC),
@@ -24,6 +26,13 @@ static struct irq_top_t mt6358_ints[] = {
 	MT6358_TOP_GEN(MISC),
 };
 
+static struct pmic_irq_data mt6358_irqd = {
+	.num_top = ARRAY_SIZE(mt6358_ints),
+	.num_pmic_irqs = MT6358_IRQ_NR,
+	.top_int_status_reg = MT6358_TOP_INT_STATUS0,
+	.pmic_ints = mt6358_ints,
+};
+
 static void pmic_irq_enable(struct irq_data *data)
 {
 	unsigned int hwirq = irqd_to_hwirq(data);
@@ -62,15 +71,15 @@ static void pmic_irq_sync_unlock(struct irq_data *data)
 		/* Find out the IRQ group */
 		top_gp = 0;
 		while ((top_gp + 1) < irqd->num_top &&
-		       i >= mt6358_ints[top_gp + 1].hwirq_base)
+		       i >= irqd->pmic_ints[top_gp + 1].hwirq_base)
 			top_gp++;
 
 		/* Find the IRQ registers */
-		gp_offset = i - mt6358_ints[top_gp].hwirq_base;
-		int_regs = gp_offset / MT6358_REG_WIDTH;
-		shift = gp_offset % MT6358_REG_WIDTH;
-		en_reg = mt6358_ints[top_gp].en_reg +
-			 (mt6358_ints[top_gp].en_reg_shift * int_regs);
+		gp_offset = i - irqd->pmic_ints[top_gp].hwirq_base;
+		int_regs = gp_offset / MTK_PMIC_REG_WIDTH;
+		shift = gp_offset % MTK_PMIC_REG_WIDTH;
+		en_reg = irqd->pmic_ints[top_gp].en_reg +
+			 (irqd->pmic_ints[top_gp].en_reg_shift * int_regs);
 
 		regmap_update_bits(chip->regmap, en_reg, BIT(shift),
 				   irqd->enable_hwirq[i] << shift);
@@ -95,10 +104,11 @@ static void mt6358_irq_sp_handler(struct mt6397_chip *chip,
 	unsigned int irq_status, sta_reg, status;
 	unsigned int hwirq, virq;
 	int i, j, ret;
+	struct pmic_irq_data *irqd = chip->irq_data;
 
-	for (i = 0; i < mt6358_ints[top_gp].num_int_regs; i++) {
-		sta_reg = mt6358_ints[top_gp].sta_reg +
-			mt6358_ints[top_gp].sta_reg_shift * i;
+	for (i = 0; i < irqd->pmic_ints[top_gp].num_int_regs; i++) {
+		sta_reg = irqd->pmic_ints[top_gp].sta_reg +
+			irqd->pmic_ints[top_gp].sta_reg_shift * i;
 
 		ret = regmap_read(chip->regmap, sta_reg, &irq_status);
 		if (ret) {
@@ -114,8 +124,8 @@ static void mt6358_irq_sp_handler(struct mt6397_chip *chip,
 		do {
 			j = __ffs(status);
 
-			hwirq = mt6358_ints[top_gp].hwirq_base +
-				MT6358_REG_WIDTH * i + j;
+			hwirq = irqd->pmic_ints[top_gp].hwirq_base +
+				MTK_PMIC_REG_WIDTH * i + j;
 
 			virq = irq_find_mapping(chip->irq_domain, hwirq);
 			if (virq)
@@ -131,12 +141,12 @@ static void mt6358_irq_sp_handler(struct mt6397_chip *chip,
 static irqreturn_t mt6358_irq_handler(int irq, void *data)
 {
 	struct mt6397_chip *chip = data;
-	struct pmic_irq_data *mt6358_irq_data = chip->irq_data;
+	struct pmic_irq_data *irqd = chip->irq_data;
 	unsigned int bit, i, top_irq_status = 0;
 	int ret;
 
 	ret = regmap_read(chip->regmap,
-			  mt6358_irq_data->top_int_status_reg,
+			  irqd->top_int_status_reg,
 			  &top_irq_status);
 	if (ret) {
 		dev_err(chip->dev,
@@ -144,8 +154,8 @@ static irqreturn_t mt6358_irq_handler(int irq, void *data)
 		return IRQ_NONE;
 	}
 
-	for (i = 0; i < mt6358_irq_data->num_top; i++) {
-		bit = BIT(mt6358_ints[i].top_offset);
+	for (i = 0; i < irqd->num_top; i++) {
+		bit = BIT(irqd->pmic_ints[i].top_offset);
 		if (top_irq_status & bit) {
 			mt6358_irq_sp_handler(chip, i);
 			top_irq_status &= ~bit;
@@ -180,17 +190,18 @@ int mt6358_irq_init(struct mt6397_chip *chip)
 	int i, j, ret;
 	struct pmic_irq_data *irqd;
 
-	irqd = devm_kzalloc(chip->dev, sizeof(*irqd), GFP_KERNEL);
-	if (!irqd)
-		return -ENOMEM;
+	switch (chip->chip_id) {
+	case MT6358_CHIP_ID:
+		chip->irq_data = &mt6358_irqd;
+		break;
 
-	chip->irq_data = irqd;
+	default:
+		dev_err(chip->dev, "unsupported chip: 0x%x\n", chip->chip_id);
+		return -ENODEV;
+	}
 
 	mutex_init(&chip->irqlock);
-	irqd->top_int_status_reg = MT6358_TOP_INT_STATUS0;
-	irqd->num_pmic_irqs = MT6358_IRQ_NR;
-	irqd->num_top = ARRAY_SIZE(mt6358_ints);
-
+	irqd = chip->irq_data;
 	irqd->enable_hwirq = devm_kcalloc(chip->dev,
 					  irqd->num_pmic_irqs,
 					  sizeof(*irqd->enable_hwirq),
@@ -207,10 +218,10 @@ int mt6358_irq_init(struct mt6397_chip *chip)
 
 	/* Disable all interrupts for initializing */
 	for (i = 0; i < irqd->num_top; i++) {
-		for (j = 0; j < mt6358_ints[i].num_int_regs; j++)
+		for (j = 0; j < irqd->pmic_ints[i].num_int_regs; j++)
 			regmap_write(chip->regmap,
-				     mt6358_ints[i].en_reg +
-				     mt6358_ints[i].en_reg_shift * j, 0);
+				     irqd->pmic_ints[i].en_reg +
+				     irqd->pmic_ints[i].en_reg_shift * j, 0);
 	}
 
 	chip->irq_domain = irq_domain_add_linear(chip->dev->of_node,
diff --git a/include/linux/mfd/mt6358/core.h b/include/linux/mfd/mt6358/core.h
index c5a11b7458d42..68578e2019b08 100644
--- a/include/linux/mfd/mt6358/core.h
+++ b/include/linux/mfd/mt6358/core.h
@@ -6,12 +6,9 @@
 #ifndef __MFD_MT6358_CORE_H__
 #define __MFD_MT6358_CORE_H__
 
-#define MT6358_REG_WIDTH 16
-
 struct irq_top_t {
 	int hwirq_base;
 	unsigned int num_int_regs;
-	unsigned int num_int_bits;
 	unsigned int en_reg;
 	unsigned int en_reg_shift;
 	unsigned int sta_reg;
@@ -25,6 +22,7 @@ struct pmic_irq_data {
 	unsigned short top_int_status_reg;
 	bool *enable_hwirq;
 	bool *cache_hwirq;
+	const struct irq_top_t *pmic_ints;
 };
 
 enum mt6358_irq_top_status_shift {
@@ -146,8 +144,8 @@ enum mt6358_irq_numbers {
 {	\
 	.hwirq_base = MT6358_IRQ_##sp##_BASE,	\
 	.num_int_regs =	\
-		((MT6358_IRQ_##sp##_BITS - 1) / MT6358_REG_WIDTH) + 1,	\
-	.num_int_bits = MT6358_IRQ_##sp##_BITS, \
+		((MT6358_IRQ_##sp##_BITS - 1) /	\
+		MTK_PMIC_REG_WIDTH) + 1,	\
 	.en_reg = MT6358_##sp##_TOP_INT_CON0,	\
 	.en_reg_shift = 0x6,	\
 	.sta_reg = MT6358_##sp##_TOP_INT_STATUS0,	\
-- 
GitLab


From be60652f0260c2f371670ec90f1ac55e2671f793 Mon Sep 17 00:00:00 2001
From: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Date: Wed, 26 May 2021 14:52:01 +0800
Subject: [PATCH 1932/3804] rtc: mt6397: refine RTC_TC_MTH

This patch adds RTC_TC_MTH_MASK to support new chips.

Signed-off-by: Yuchen Huang <yuchen.huang@mediatek.com>
Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/rtc/rtc-mt6397.c       | 2 +-
 include/linux/mfd/mt6397/rtc.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c
index 6655035e5164d..80dc479a6ff02 100644
--- a/drivers/rtc/rtc-mt6397.c
+++ b/drivers/rtc/rtc-mt6397.c
@@ -75,7 +75,7 @@ static int __mtk_rtc_read_time(struct mt6397_rtc *rtc,
 	tm->tm_min = data[RTC_OFFSET_MIN];
 	tm->tm_hour = data[RTC_OFFSET_HOUR];
 	tm->tm_mday = data[RTC_OFFSET_DOM];
-	tm->tm_mon = data[RTC_OFFSET_MTH];
+	tm->tm_mon = data[RTC_OFFSET_MTH] & RTC_TC_MTH_MASK;
 	tm->tm_year = data[RTC_OFFSET_YEAR];
 
 	ret = regmap_read(rtc->regmap, rtc->addr_base + RTC_TC_SEC, sec);
diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h
index c3748b53bf7dd..068ae1c0f0e8e 100644
--- a/include/linux/mfd/mt6397/rtc.h
+++ b/include/linux/mfd/mt6397/rtc.h
@@ -36,6 +36,7 @@
 #define RTC_AL_MASK_DOW                BIT(4)
 
 #define RTC_TC_SEC             0x000a
+#define RTC_TC_MTH_MASK        0x000f
 /* Min, Hour, Dom... register offset to RTC_TC_SEC */
 #define RTC_OFFSET_SEC         0
 #define RTC_OFFSET_MIN         1
-- 
GitLab


From 65c1d05325b71b592688590d85c5ef6b360ca3fe Mon Sep 17 00:00:00 2001
From: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Date: Wed, 26 May 2021 14:52:02 +0800
Subject: [PATCH 1933/3804] dt-bindings: mfd: Add compatible for the MediaTek
 MT6359 PMIC

This adds compatible for the MediaTek MT6359 PMIC.

Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/mt6397.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/mfd/mt6397.txt b/Documentation/devicetree/bindings/mfd/mt6397.txt
index 2661775a38251..99a84b69a29fe 100644
--- a/Documentation/devicetree/bindings/mfd/mt6397.txt
+++ b/Documentation/devicetree/bindings/mfd/mt6397.txt
@@ -21,6 +21,7 @@ Required properties:
 compatible:
 	"mediatek,mt6323" for PMIC MT6323
 	"mediatek,mt6358" for PMIC MT6358
+	"mediatek,mt6359" for PMIC MT6359
 	"mediatek,mt6397" for PMIC MT6397
 
 Optional subnodes:
-- 
GitLab


From 8771456635d595707307210d5aa9f8ce41598f94 Mon Sep 17 00:00:00 2001
From: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Date: Wed, 26 May 2021 14:52:03 +0800
Subject: [PATCH 1934/3804] dt-bindings: regulator: Add document for MT6359
 regulator

add dt-binding document for MediaTek MT6359 PMIC

Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 .../bindings/regulator/mt6359-regulator.yaml  | 385 ++++++++++++++++++
 1 file changed, 385 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml

diff --git a/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml b/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml
new file mode 100644
index 0000000000000..8cc413eb482dd
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/mt6359-regulator.yaml
@@ -0,0 +1,385 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/mt6359-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MT6359 Regulator from MediaTek Integrated
+
+maintainers:
+  - Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
+
+description: |
+  List of regulators provided by this controller. It is named
+  according to its regulator type, buck_<name> and ldo_<name>.
+  MT6359 regulators node should be sub node of the MT6397 MFD node.
+
+patternProperties:
+  "^buck_v(s1|gpu11|modem|pu|core|s2|pa|proc2|proc1|core_sshub)$":
+    type: object
+    $ref: "regulator.yaml#"
+
+    properties:
+      regulator-name:
+        pattern: "^v(s1|gpu11|modem|pu|core|s2|pa|proc2|proc1|core_sshub)$"
+
+    unevaluatedProperties: false
+
+  "^ldo_v(ibr|rf12|usb|camio|efuse|xo22)$":
+    type: object
+    $ref: "regulator.yaml#"
+
+    properties:
+      regulator-name:
+        pattern: "^v(ibr|rf12|usb|camio|efuse|xo22)$"
+
+    unevaluatedProperties: false
+
+  "^ldo_v(rfck|emc|a12|a09|ufs|bbck)$":
+    type: object
+    $ref: "regulator.yaml#"
+
+    properties:
+      regulator-name:
+        pattern: "^v(rfck|emc|a12|a09|ufs|bbck)$"
+
+    unevaluatedProperties: false
+
+  "^ldo_vcn(18|13|33_1_bt|13_1_wifi|33_2_bt|33_2_wifi)$":
+    type: object
+    $ref: "regulator.yaml#"
+
+    properties:
+      regulator-name:
+        pattern: "^vcn(18|13|33_1_bt|13_1_wifi|33_2_bt|33_2_wifi)$"
+
+    unevaluatedProperties: false
+
+  "^ldo_vsram_(proc2|others|md|proc1|others_sshub)$":
+    type: object
+    $ref: "regulator.yaml#"
+
+    properties:
+      regulator-name:
+        pattern: "^vsram_(proc2|others|md|proc1|others_sshub)$"
+
+    unevaluatedProperties: false
+
+  "^ldo_v(fe|bif|io)28$":
+    type: object
+    $ref: "regulator.yaml#"
+
+    properties:
+      regulator-name:
+        pattern: "^v(fe|bif|io)28$"
+
+    unevaluatedProperties: false
+
+  "^ldo_v(aud|io|aux|rf|m)18$":
+    type: object
+    $ref: "regulator.yaml#"
+
+    properties:
+      regulator-name:
+        pattern: "^v(aud|io|aux|rf|m)18$"
+
+    unevaluatedProperties: false
+
+  "^ldo_vsim[12]$":
+    type: object
+    $ref: "regulator.yaml#"
+
+    properties:
+      regulator-name:
+        pattern: "^vsim[12]$"
+
+    required:
+      - regulator-name
+
+    unevaluatedProperties: false
+
+additionalProperties: false
+
+examples:
+  - |
+    pmic {
+      regulators {
+        mt6359_vs1_buck_reg: buck_vs1 {
+          regulator-name = "vs1";
+          regulator-min-microvolt = <800000>;
+          regulator-max-microvolt = <2200000>;
+          regulator-enable-ramp-delay = <0>;
+          regulator-always-on;
+        };
+        mt6359_vgpu11_buck_reg: buck_vgpu11 {
+          regulator-name = "vgpu11";
+          regulator-min-microvolt = <400000>;
+          regulator-max-microvolt = <1193750>;
+          regulator-ramp-delay = <5000>;
+          regulator-enable-ramp-delay = <200>;
+          regulator-allowed-modes = <0 1 2>;
+        };
+        mt6359_vmodem_buck_reg: buck_vmodem {
+          regulator-name = "vmodem";
+          regulator-min-microvolt = <400000>;
+          regulator-max-microvolt = <1100000>;
+          regulator-ramp-delay = <10760>;
+          regulator-enable-ramp-delay = <200>;
+        };
+        mt6359_vpu_buck_reg: buck_vpu {
+          regulator-name = "vpu";
+          regulator-min-microvolt = <400000>;
+          regulator-max-microvolt = <1193750>;
+          regulator-ramp-delay = <5000>;
+          regulator-enable-ramp-delay = <200>;
+          regulator-allowed-modes = <0 1 2>;
+        };
+        mt6359_vcore_buck_reg: buck_vcore {
+          regulator-name = "vcore";
+          regulator-min-microvolt = <400000>;
+          regulator-max-microvolt = <1300000>;
+          regulator-ramp-delay = <5000>;
+          regulator-enable-ramp-delay = <200>;
+          regulator-allowed-modes = <0 1 2>;
+        };
+        mt6359_vs2_buck_reg: buck_vs2 {
+          regulator-name = "vs2";
+          regulator-min-microvolt = <800000>;
+          regulator-max-microvolt = <1600000>;
+          regulator-enable-ramp-delay = <0>;
+          regulator-always-on;
+        };
+        mt6359_vpa_buck_reg: buck_vpa {
+          regulator-name = "vpa";
+          regulator-min-microvolt = <500000>;
+          regulator-max-microvolt = <3650000>;
+          regulator-enable-ramp-delay = <300>;
+        };
+        mt6359_vproc2_buck_reg: buck_vproc2 {
+          regulator-name = "vproc2";
+          regulator-min-microvolt = <400000>;
+          regulator-max-microvolt = <1193750>;
+          regulator-ramp-delay = <7500>;
+          regulator-enable-ramp-delay = <200>;
+          regulator-allowed-modes = <0 1 2>;
+        };
+        mt6359_vproc1_buck_reg: buck_vproc1 {
+          regulator-name = "vproc1";
+          regulator-min-microvolt = <400000>;
+          regulator-max-microvolt = <1193750>;
+          regulator-ramp-delay = <7500>;
+          regulator-enable-ramp-delay = <200>;
+          regulator-allowed-modes = <0 1 2>;
+        };
+        mt6359_vcore_sshub_buck_reg: buck_vcore_sshub {
+          regulator-name = "vcore_sshub";
+          regulator-min-microvolt = <400000>;
+          regulator-max-microvolt = <1193750>;
+        };
+        mt6359_vgpu11_sshub_buck_reg: buck_vgpu11_sshub {
+          regulator-name = "vgpu11_sshub";
+          regulator-min-microvolt = <400000>;
+          regulator-max-microvolt = <1193750>;
+        };
+        mt6359_vaud18_ldo_reg: ldo_vaud18 {
+          regulator-name = "vaud18";
+          regulator-min-microvolt = <1800000>;
+          regulator-max-microvolt = <1800000>;
+          regulator-enable-ramp-delay = <240>;
+        };
+        mt6359_vsim1_ldo_reg: ldo_vsim1 {
+          regulator-name = "vsim1";
+          regulator-min-microvolt = <1700000>;
+          regulator-max-microvolt = <3100000>;
+        };
+        mt6359_vibr_ldo_reg: ldo_vibr {
+          regulator-name = "vibr";
+          regulator-min-microvolt = <1200000>;
+          regulator-max-microvolt = <3300000>;
+        };
+        mt6359_vrf12_ldo_reg: ldo_vrf12 {
+          regulator-name = "vrf12";
+          regulator-min-microvolt = <1100000>;
+          regulator-max-microvolt = <1300000>;
+        };
+        mt6359_vusb_ldo_reg: ldo_vusb {
+          regulator-name = "vusb";
+          regulator-min-microvolt = <3000000>;
+          regulator-max-microvolt = <3000000>;
+          regulator-enable-ramp-delay = <960>;
+          regulator-always-on;
+        };
+        mt6359_vsram_proc2_ldo_reg: ldo_vsram_proc2 {
+          regulator-name = "vsram_proc2";
+          regulator-min-microvolt = <500000>;
+          regulator-max-microvolt = <1293750>;
+          regulator-ramp-delay = <7500>;
+          regulator-enable-ramp-delay = <240>;
+          regulator-always-on;
+        };
+        mt6359_vio18_ldo_reg: ldo_vio18 {
+          regulator-name = "vio18";
+          regulator-min-microvolt = <1700000>;
+          regulator-max-microvolt = <1900000>;
+          regulator-enable-ramp-delay = <960>;
+          regulator-always-on;
+        };
+        mt6359_vcamio_ldo_reg: ldo_vcamio {
+          regulator-name = "vcamio";
+          regulator-min-microvolt = <1700000>;
+          regulator-max-microvolt = <1900000>;
+        };
+        mt6359_vcn18_ldo_reg: ldo_vcn18 {
+          regulator-name = "vcn18";
+          regulator-min-microvolt = <1800000>;
+          regulator-max-microvolt = <1800000>;
+          regulator-enable-ramp-delay = <240>;
+        };
+        mt6359_vfe28_ldo_reg: ldo_vfe28 {
+          regulator-name = "vfe28";
+          regulator-min-microvolt = <2800000>;
+          regulator-max-microvolt = <2800000>;
+          regulator-enable-ramp-delay = <120>;
+        };
+        mt6359_vcn13_ldo_reg: ldo_vcn13 {
+          regulator-name = "vcn13";
+          regulator-min-microvolt = <900000>;
+          regulator-max-microvolt = <1300000>;
+        };
+        mt6359_vcn33_1_bt_ldo_reg: ldo_vcn33_1_bt {
+          regulator-name = "vcn33_1_bt";
+          regulator-min-microvolt = <2800000>;
+          regulator-max-microvolt = <3500000>;
+        };
+        mt6359_vcn33_1_wifi_ldo_reg: ldo_vcn33_1_wifi {
+          regulator-name = "vcn33_1_wifi";
+          regulator-min-microvolt = <2800000>;
+          regulator-max-microvolt = <3500000>;
+        };
+        mt6359_vaux18_ldo_reg: ldo_vaux18 {
+          regulator-name = "vaux18";
+          regulator-min-microvolt = <1800000>;
+          regulator-max-microvolt = <1800000>;
+          regulator-enable-ramp-delay = <240>;
+          regulator-always-on;
+        };
+        mt6359_vsram_others_ldo_reg: ldo_vsram_others {
+          regulator-name = "vsram_others";
+          regulator-min-microvolt = <500000>;
+          regulator-max-microvolt = <1293750>;
+          regulator-ramp-delay = <5000>;
+          regulator-enable-ramp-delay = <240>;
+        };
+        mt6359_vefuse_ldo_reg: ldo_vefuse {
+          regulator-name = "vefuse";
+          regulator-min-microvolt = <1700000>;
+          regulator-max-microvolt = <2000000>;
+        };
+        mt6359_vxo22_ldo_reg: ldo_vxo22 {
+          regulator-name = "vxo22";
+          regulator-min-microvolt = <1800000>;
+          regulator-max-microvolt = <2200000>;
+          regulator-always-on;
+        };
+        mt6359_vrfck_ldo_reg: ldo_vrfck {
+          regulator-name = "vrfck";
+          regulator-min-microvolt = <1500000>;
+          regulator-max-microvolt = <1700000>;
+        };
+        mt6359_vrfck_1_ldo_reg: ldo_vrfck_1 {
+          regulator-name = "vrfck";
+          regulator-min-microvolt = <1240000>;
+          regulator-max-microvolt = <1600000>;
+        };
+        mt6359_vbif28_ldo_reg: ldo_vbif28 {
+          regulator-name = "vbif28";
+          regulator-min-microvolt = <2800000>;
+          regulator-max-microvolt = <2800000>;
+          regulator-enable-ramp-delay = <240>;
+        };
+        mt6359_vio28_ldo_reg: ldo_vio28 {
+          regulator-name = "vio28";
+          regulator-min-microvolt = <2800000>;
+          regulator-max-microvolt = <3300000>;
+          regulator-always-on;
+        };
+        mt6359_vemc_ldo_reg: ldo_vemc {
+          regulator-name = "vemc";
+          regulator-min-microvolt = <2900000>;
+          regulator-max-microvolt = <3300000>;
+        };
+        mt6359_vemc_1_ldo_reg: ldo_vemc_1 {
+          regulator-name = "vemc";
+          regulator-min-microvolt = <2500000>;
+          regulator-max-microvolt = <3300000>;
+        };
+        mt6359_vcn33_2_bt_ldo_reg: ldo_vcn33_2_bt {
+          regulator-name = "vcn33_2_bt";
+          regulator-min-microvolt = <2800000>;
+          regulator-max-microvolt = <3500000>;
+        };
+        mt6359_vcn33_2_wifi_ldo_reg: ldo_vcn33_2_wifi {
+          regulator-name = "vcn33_2_wifi";
+          regulator-min-microvolt = <2800000>;
+          regulator-max-microvolt = <3500000>;
+        };
+        mt6359_va12_ldo_reg: ldo_va12 {
+          regulator-name = "va12";
+          regulator-min-microvolt = <1200000>;
+          regulator-max-microvolt = <1300000>;
+          regulator-always-on;
+        };
+        mt6359_va09_ldo_reg: ldo_va09 {
+          regulator-name = "va09";
+          regulator-min-microvolt = <800000>;
+          regulator-max-microvolt = <1200000>;
+        };
+        mt6359_vrf18_ldo_reg: ldo_vrf18 {
+          regulator-name = "vrf18";
+          regulator-min-microvolt = <1700000>;
+          regulator-max-microvolt = <1810000>;
+        };
+        mt6359_vsram_md_ldo_reg: ldo_vsram_md {
+          regulator-name = "vsram_md";
+          regulator-min-microvolt = <500000>;
+          regulator-max-microvolt = <1293750>;
+          regulator-ramp-delay = <10760>;
+          regulator-enable-ramp-delay = <240>;
+        };
+        mt6359_vufs_ldo_reg: ldo_vufs {
+          regulator-name = "vufs";
+          regulator-min-microvolt = <1700000>;
+          regulator-max-microvolt = <1900000>;
+        };
+        mt6359_vm18_ldo_reg: ldo_vm18 {
+          regulator-name = "vm18";
+          regulator-min-microvolt = <1700000>;
+          regulator-max-microvolt = <1900000>;
+          regulator-always-on;
+        };
+        mt6359_vbbck_ldo_reg: ldo_vbbck {
+          regulator-name = "vbbck";
+          regulator-min-microvolt = <1100000>;
+          regulator-max-microvolt = <1200000>;
+        };
+        mt6359_vsram_proc1_ldo_reg: ldo_vsram_proc1 {
+          regulator-name = "vsram_proc1";
+          regulator-min-microvolt = <500000>;
+          regulator-max-microvolt = <1293750>;
+          regulator-ramp-delay = <7500>;
+          regulator-enable-ramp-delay = <240>;
+          regulator-always-on;
+        };
+        mt6359_vsim2_ldo_reg: ldo_vsim2 {
+          regulator-name = "vsim2";
+          regulator-min-microvolt = <1700000>;
+          regulator-max-microvolt = <3100000>;
+        };
+        mt6359_vsram_others_sshub_ldo: ldo_vsram_others_sshub {
+          regulator-name = "vsram_others_sshub";
+          regulator-min-microvolt = <500000>;
+          regulator-max-microvolt = <1293750>;
+        };
+      };
+    };
+...
-- 
GitLab


From e545b8f380a96174df40db4203d09156e096ee89 Mon Sep 17 00:00:00 2001
From: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Date: Wed, 26 May 2021 14:52:04 +0800
Subject: [PATCH 1935/3804] mfd: Add support for the MediaTek MT6359 PMIC

This adds support for the MediaTek MT6359 PMIC. This is a
multifunction device with the following sub modules:

- Codec
- Interrupt
- Regulator
- RTC

It is interfaced to the host controller using SPI interface
by a proprietary hardware called PMIC wrapper or pwrap.
MT6359 MFD is a child device of the pwrap.

Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/mt6358-irq.c             |  24 ++
 drivers/mfd/mt6397-core.c            |  24 ++
 include/linux/mfd/mt6359/core.h      | 133 +++++++
 include/linux/mfd/mt6359/registers.h | 529 +++++++++++++++++++++++++++
 include/linux/mfd/mt6397/core.h      |   1 +
 5 files changed, 711 insertions(+)
 create mode 100644 include/linux/mfd/mt6359/core.h
 create mode 100644 include/linux/mfd/mt6359/registers.h

diff --git a/drivers/mfd/mt6358-irq.c b/drivers/mfd/mt6358-irq.c
index 4b094e5e51cc3..83f3ffbdbb4ca 100644
--- a/drivers/mfd/mt6358-irq.c
+++ b/drivers/mfd/mt6358-irq.c
@@ -5,6 +5,8 @@
 #include <linux/interrupt.h>
 #include <linux/mfd/mt6358/core.h>
 #include <linux/mfd/mt6358/registers.h>
+#include <linux/mfd/mt6359/core.h>
+#include <linux/mfd/mt6359/registers.h>
 #include <linux/mfd/mt6397/core.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -26,6 +28,17 @@ static const struct irq_top_t mt6358_ints[] = {
 	MT6358_TOP_GEN(MISC),
 };
 
+static const struct irq_top_t mt6359_ints[] = {
+	MT6359_TOP_GEN(BUCK),
+	MT6359_TOP_GEN(LDO),
+	MT6359_TOP_GEN(PSC),
+	MT6359_TOP_GEN(SCK),
+	MT6359_TOP_GEN(BM),
+	MT6359_TOP_GEN(HK),
+	MT6359_TOP_GEN(AUD),
+	MT6359_TOP_GEN(MISC),
+};
+
 static struct pmic_irq_data mt6358_irqd = {
 	.num_top = ARRAY_SIZE(mt6358_ints),
 	.num_pmic_irqs = MT6358_IRQ_NR,
@@ -33,6 +46,13 @@ static struct pmic_irq_data mt6358_irqd = {
 	.pmic_ints = mt6358_ints,
 };
 
+static struct pmic_irq_data mt6359_irqd = {
+	.num_top = ARRAY_SIZE(mt6359_ints),
+	.num_pmic_irqs = MT6359_IRQ_NR,
+	.top_int_status_reg = MT6359_TOP_INT_STATUS0,
+	.pmic_ints = mt6359_ints,
+};
+
 static void pmic_irq_enable(struct irq_data *data)
 {
 	unsigned int hwirq = irqd_to_hwirq(data);
@@ -195,6 +215,10 @@ int mt6358_irq_init(struct mt6397_chip *chip)
 		chip->irq_data = &mt6358_irqd;
 		break;
 
+	case MT6359_CHIP_ID:
+		chip->irq_data = &mt6359_irqd;
+		break;
+
 	default:
 		dev_err(chip->dev, "unsupported chip: 0x%x\n", chip->chip_id);
 		return -ENODEV;
diff --git a/drivers/mfd/mt6397-core.c b/drivers/mfd/mt6397-core.c
index 7518d74c3b4c3..9a615f75fbde8 100644
--- a/drivers/mfd/mt6397-core.c
+++ b/drivers/mfd/mt6397-core.c
@@ -13,9 +13,11 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/mt6323/core.h>
 #include <linux/mfd/mt6358/core.h>
+#include <linux/mfd/mt6359/core.h>
 #include <linux/mfd/mt6397/core.h>
 #include <linux/mfd/mt6323/registers.h>
 #include <linux/mfd/mt6358/registers.h>
+#include <linux/mfd/mt6359/registers.h>
 #include <linux/mfd/mt6397/registers.h>
 
 #define MT6323_RTC_BASE		0x8000
@@ -99,6 +101,17 @@ static const struct mfd_cell mt6358_devs[] = {
 	},
 };
 
+static const struct mfd_cell mt6359_devs[] = {
+	{ .name = "mt6359-regulator", },
+	{
+		.name = "mt6359-rtc",
+		.num_resources = ARRAY_SIZE(mt6358_rtc_resources),
+		.resources = mt6358_rtc_resources,
+		.of_compatible = "mediatek,mt6358-rtc",
+	},
+	{ .name = "mt6359-sound", },
+};
+
 static const struct mfd_cell mt6397_devs[] = {
 	{
 		.name = "mt6397-rtc",
@@ -149,6 +162,14 @@ static const struct chip_data mt6358_core = {
 	.irq_init = mt6358_irq_init,
 };
 
+static const struct chip_data mt6359_core = {
+	.cid_addr = MT6359_SWCID,
+	.cid_shift = 8,
+	.cells = mt6359_devs,
+	.cell_size = ARRAY_SIZE(mt6359_devs),
+	.irq_init = mt6358_irq_init,
+};
+
 static const struct chip_data mt6397_core = {
 	.cid_addr = MT6397_CID,
 	.cid_shift = 0,
@@ -218,6 +239,9 @@ static const struct of_device_id mt6397_of_match[] = {
 	}, {
 		.compatible = "mediatek,mt6358",
 		.data = &mt6358_core,
+	}, {
+		.compatible = "mediatek,mt6359",
+		.data = &mt6359_core,
 	}, {
 		.compatible = "mediatek,mt6397",
 		.data = &mt6397_core,
diff --git a/include/linux/mfd/mt6359/core.h b/include/linux/mfd/mt6359/core.h
new file mode 100644
index 0000000000000..8d298868126de
--- /dev/null
+++ b/include/linux/mfd/mt6359/core.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ */
+
+#ifndef __MFD_MT6359_CORE_H__
+#define __MFD_MT6359_CORE_H__
+
+enum mt6359_irq_top_status_shift {
+	MT6359_BUCK_TOP = 0,
+	MT6359_LDO_TOP,
+	MT6359_PSC_TOP,
+	MT6359_SCK_TOP,
+	MT6359_BM_TOP,
+	MT6359_HK_TOP,
+	MT6359_AUD_TOP = 7,
+	MT6359_MISC_TOP,
+};
+
+enum mt6359_irq_numbers {
+	MT6359_IRQ_VCORE_OC = 1,
+	MT6359_IRQ_VGPU11_OC,
+	MT6359_IRQ_VGPU12_OC,
+	MT6359_IRQ_VMODEM_OC,
+	MT6359_IRQ_VPROC1_OC,
+	MT6359_IRQ_VPROC2_OC,
+	MT6359_IRQ_VS1_OC,
+	MT6359_IRQ_VS2_OC,
+	MT6359_IRQ_VPA_OC = 9,
+	MT6359_IRQ_VFE28_OC = 16,
+	MT6359_IRQ_VXO22_OC,
+	MT6359_IRQ_VRF18_OC,
+	MT6359_IRQ_VRF12_OC,
+	MT6359_IRQ_VEFUSE_OC,
+	MT6359_IRQ_VCN33_1_OC,
+	MT6359_IRQ_VCN33_2_OC,
+	MT6359_IRQ_VCN13_OC,
+	MT6359_IRQ_VCN18_OC,
+	MT6359_IRQ_VA09_OC,
+	MT6359_IRQ_VCAMIO_OC,
+	MT6359_IRQ_VA12_OC,
+	MT6359_IRQ_VAUX18_OC,
+	MT6359_IRQ_VAUD18_OC,
+	MT6359_IRQ_VIO18_OC,
+	MT6359_IRQ_VSRAM_PROC1_OC,
+	MT6359_IRQ_VSRAM_PROC2_OC,
+	MT6359_IRQ_VSRAM_OTHERS_OC,
+	MT6359_IRQ_VSRAM_MD_OC,
+	MT6359_IRQ_VEMC_OC,
+	MT6359_IRQ_VSIM1_OC,
+	MT6359_IRQ_VSIM2_OC,
+	MT6359_IRQ_VUSB_OC,
+	MT6359_IRQ_VRFCK_OC,
+	MT6359_IRQ_VBBCK_OC,
+	MT6359_IRQ_VBIF28_OC,
+	MT6359_IRQ_VIBR_OC,
+	MT6359_IRQ_VIO28_OC,
+	MT6359_IRQ_VM18_OC,
+	MT6359_IRQ_VUFS_OC = 45,
+	MT6359_IRQ_PWRKEY = 48,
+	MT6359_IRQ_HOMEKEY,
+	MT6359_IRQ_PWRKEY_R,
+	MT6359_IRQ_HOMEKEY_R,
+	MT6359_IRQ_NI_LBAT_INT,
+	MT6359_IRQ_CHRDET_EDGE = 53,
+	MT6359_IRQ_RTC = 64,
+	MT6359_IRQ_FG_BAT_H = 80,
+	MT6359_IRQ_FG_BAT_L,
+	MT6359_IRQ_FG_CUR_H,
+	MT6359_IRQ_FG_CUR_L,
+	MT6359_IRQ_FG_ZCV = 84,
+	MT6359_IRQ_FG_N_CHARGE_L = 87,
+	MT6359_IRQ_FG_IAVG_H,
+	MT6359_IRQ_FG_IAVG_L = 89,
+	MT6359_IRQ_FG_DISCHARGE = 91,
+	MT6359_IRQ_FG_CHARGE,
+	MT6359_IRQ_BATON_LV = 96,
+	MT6359_IRQ_BATON_BAT_IN = 98,
+	MT6359_IRQ_BATON_BAT_OU,
+	MT6359_IRQ_BIF = 100,
+	MT6359_IRQ_BAT_H = 112,
+	MT6359_IRQ_BAT_L,
+	MT6359_IRQ_BAT2_H,
+	MT6359_IRQ_BAT2_L,
+	MT6359_IRQ_BAT_TEMP_H,
+	MT6359_IRQ_BAT_TEMP_L,
+	MT6359_IRQ_THR_H,
+	MT6359_IRQ_THR_L,
+	MT6359_IRQ_AUXADC_IMP,
+	MT6359_IRQ_NAG_C_DLTV = 121,
+	MT6359_IRQ_AUDIO = 128,
+	MT6359_IRQ_ACCDET = 133,
+	MT6359_IRQ_ACCDET_EINT0,
+	MT6359_IRQ_ACCDET_EINT1,
+	MT6359_IRQ_SPI_CMD_ALERT = 144,
+	MT6359_IRQ_NR,
+};
+
+#define MT6359_IRQ_BUCK_BASE MT6359_IRQ_VCORE_OC
+#define MT6359_IRQ_LDO_BASE MT6359_IRQ_VFE28_OC
+#define MT6359_IRQ_PSC_BASE MT6359_IRQ_PWRKEY
+#define MT6359_IRQ_SCK_BASE MT6359_IRQ_RTC
+#define MT6359_IRQ_BM_BASE MT6359_IRQ_FG_BAT_H
+#define MT6359_IRQ_HK_BASE MT6359_IRQ_BAT_H
+#define MT6359_IRQ_AUD_BASE MT6359_IRQ_AUDIO
+#define MT6359_IRQ_MISC_BASE MT6359_IRQ_SPI_CMD_ALERT
+
+#define MT6359_IRQ_BUCK_BITS (MT6359_IRQ_VPA_OC - MT6359_IRQ_BUCK_BASE + 1)
+#define MT6359_IRQ_LDO_BITS (MT6359_IRQ_VUFS_OC - MT6359_IRQ_LDO_BASE + 1)
+#define MT6359_IRQ_PSC_BITS	\
+	(MT6359_IRQ_CHRDET_EDGE - MT6359_IRQ_PSC_BASE + 1)
+#define MT6359_IRQ_SCK_BITS (MT6359_IRQ_RTC - MT6359_IRQ_SCK_BASE + 1)
+#define MT6359_IRQ_BM_BITS (MT6359_IRQ_BIF - MT6359_IRQ_BM_BASE + 1)
+#define MT6359_IRQ_HK_BITS (MT6359_IRQ_NAG_C_DLTV - MT6359_IRQ_HK_BASE + 1)
+#define MT6359_IRQ_AUD_BITS	\
+	(MT6359_IRQ_ACCDET_EINT1 - MT6359_IRQ_AUD_BASE + 1)
+#define MT6359_IRQ_MISC_BITS	\
+	(MT6359_IRQ_SPI_CMD_ALERT - MT6359_IRQ_MISC_BASE + 1)
+
+#define MT6359_TOP_GEN(sp)	\
+{	\
+	.hwirq_base = MT6359_IRQ_##sp##_BASE,	\
+	.num_int_regs =	\
+		((MT6359_IRQ_##sp##_BITS - 1) /	\
+		MTK_PMIC_REG_WIDTH) + 1,	\
+	.en_reg = MT6359_##sp##_TOP_INT_CON0,	\
+	.en_reg_shift = 0x6,	\
+	.sta_reg = MT6359_##sp##_TOP_INT_STATUS0,	\
+	.sta_reg_shift = 0x2,	\
+	.top_offset = MT6359_##sp##_TOP,	\
+}
+
+#endif /* __MFD_MT6359_CORE_H__ */
diff --git a/include/linux/mfd/mt6359/registers.h b/include/linux/mfd/mt6359/registers.h
new file mode 100644
index 0000000000000..2135c9695918c
--- /dev/null
+++ b/include/linux/mfd/mt6359/registers.h
@@ -0,0 +1,529 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ */
+
+#ifndef __MFD_MT6359_REGISTERS_H__
+#define __MFD_MT6359_REGISTERS_H__
+
+/* PMIC Registers */
+#define MT6359_SWCID                         0xa
+#define MT6359_MISC_TOP_INT_CON0             0x188
+#define MT6359_MISC_TOP_INT_STATUS0          0x194
+#define MT6359_TOP_INT_STATUS0               0x19e
+#define MT6359_SCK_TOP_INT_CON0              0x528
+#define MT6359_SCK_TOP_INT_STATUS0           0x534
+#define MT6359_EOSC_CALI_CON0                0x53a
+#define MT6359_EOSC_CALI_CON1                0x53c
+#define MT6359_RTC_MIX_CON0                  0x53e
+#define MT6359_RTC_MIX_CON1                  0x540
+#define MT6359_RTC_MIX_CON2                  0x542
+#define MT6359_RTC_DSN_ID                    0x580
+#define MT6359_RTC_DSN_REV0                  0x582
+#define MT6359_RTC_DBI                       0x584
+#define MT6359_RTC_DXI                       0x586
+#define MT6359_RTC_BBPU                      0x588
+#define MT6359_RTC_IRQ_STA                   0x58a
+#define MT6359_RTC_IRQ_EN                    0x58c
+#define MT6359_RTC_CII_EN                    0x58e
+#define MT6359_RTC_AL_MASK                   0x590
+#define MT6359_RTC_TC_SEC                    0x592
+#define MT6359_RTC_TC_MIN                    0x594
+#define MT6359_RTC_TC_HOU                    0x596
+#define MT6359_RTC_TC_DOM                    0x598
+#define MT6359_RTC_TC_DOW                    0x59a
+#define MT6359_RTC_TC_MTH                    0x59c
+#define MT6359_RTC_TC_YEA                    0x59e
+#define MT6359_RTC_AL_SEC                    0x5a0
+#define MT6359_RTC_AL_MIN                    0x5a2
+#define MT6359_RTC_AL_HOU                    0x5a4
+#define MT6359_RTC_AL_DOM                    0x5a6
+#define MT6359_RTC_AL_DOW                    0x5a8
+#define MT6359_RTC_AL_MTH                    0x5aa
+#define MT6359_RTC_AL_YEA                    0x5ac
+#define MT6359_RTC_OSC32CON                  0x5ae
+#define MT6359_RTC_POWERKEY1                 0x5b0
+#define MT6359_RTC_POWERKEY2                 0x5b2
+#define MT6359_RTC_PDN1                      0x5b4
+#define MT6359_RTC_PDN2                      0x5b6
+#define MT6359_RTC_SPAR0                     0x5b8
+#define MT6359_RTC_SPAR1                     0x5ba
+#define MT6359_RTC_PROT                      0x5bc
+#define MT6359_RTC_DIFF                      0x5be
+#define MT6359_RTC_CALI                      0x5c0
+#define MT6359_RTC_WRTGR                     0x5c2
+#define MT6359_RTC_CON                       0x5c4
+#define MT6359_RTC_SEC_CTRL                  0x5c6
+#define MT6359_RTC_INT_CNT                   0x5c8
+#define MT6359_RTC_SEC_DAT0                  0x5ca
+#define MT6359_RTC_SEC_DAT1                  0x5cc
+#define MT6359_RTC_SEC_DAT2                  0x5ce
+#define MT6359_RTC_SEC_DSN_ID                0x600
+#define MT6359_RTC_SEC_DSN_REV0              0x602
+#define MT6359_RTC_SEC_DBI                   0x604
+#define MT6359_RTC_SEC_DXI                   0x606
+#define MT6359_RTC_TC_SEC_SEC                0x608
+#define MT6359_RTC_TC_MIN_SEC                0x60a
+#define MT6359_RTC_TC_HOU_SEC                0x60c
+#define MT6359_RTC_TC_DOM_SEC                0x60e
+#define MT6359_RTC_TC_DOW_SEC                0x610
+#define MT6359_RTC_TC_MTH_SEC                0x612
+#define MT6359_RTC_TC_YEA_SEC                0x614
+#define MT6359_RTC_SEC_CK_PDN                0x616
+#define MT6359_RTC_SEC_WRTGR                 0x618
+#define MT6359_PSC_TOP_INT_CON0              0x910
+#define MT6359_PSC_TOP_INT_STATUS0           0x91c
+#define MT6359_BM_TOP_INT_CON0               0xc32
+#define MT6359_BM_TOP_INT_CON1               0xc38
+#define MT6359_BM_TOP_INT_STATUS0            0xc4a
+#define MT6359_BM_TOP_INT_STATUS1            0xc4c
+#define MT6359_HK_TOP_INT_CON0               0xf92
+#define MT6359_HK_TOP_INT_STATUS0            0xf9e
+#define MT6359_BUCK_TOP_INT_CON0             0x1418
+#define MT6359_BUCK_TOP_INT_STATUS0          0x1424
+#define MT6359_BUCK_VPU_CON0                 0x1488
+#define MT6359_BUCK_VPU_DBG0                 0x14a6
+#define MT6359_BUCK_VPU_DBG1                 0x14a8
+#define MT6359_BUCK_VPU_ELR0                 0x14ac
+#define MT6359_BUCK_VCORE_CON0               0x1508
+#define MT6359_BUCK_VCORE_DBG0               0x1526
+#define MT6359_BUCK_VCORE_DBG1               0x1528
+#define MT6359_BUCK_VCORE_SSHUB_CON0         0x152a
+#define MT6359_BUCK_VCORE_ELR0               0x1534
+#define MT6359_BUCK_VGPU11_CON0              0x1588
+#define MT6359_BUCK_VGPU11_DBG0              0x15a6
+#define MT6359_BUCK_VGPU11_DBG1              0x15a8
+#define MT6359_BUCK_VGPU11_ELR0              0x15ac
+#define MT6359_BUCK_VMODEM_CON0              0x1688
+#define MT6359_BUCK_VMODEM_DBG0              0x16a6
+#define MT6359_BUCK_VMODEM_DBG1              0x16a8
+#define MT6359_BUCK_VMODEM_ELR0              0x16ae
+#define MT6359_BUCK_VPROC1_CON0              0x1708
+#define MT6359_BUCK_VPROC1_DBG0              0x1726
+#define MT6359_BUCK_VPROC1_DBG1              0x1728
+#define MT6359_BUCK_VPROC1_ELR0              0x172e
+#define MT6359_BUCK_VPROC2_CON0              0x1788
+#define MT6359_BUCK_VPROC2_DBG0              0x17a6
+#define MT6359_BUCK_VPROC2_DBG1              0x17a8
+#define MT6359_BUCK_VPROC2_ELR0              0x17b2
+#define MT6359_BUCK_VS1_CON0                 0x1808
+#define MT6359_BUCK_VS1_DBG0                 0x1826
+#define MT6359_BUCK_VS1_DBG1                 0x1828
+#define MT6359_BUCK_VS1_ELR0                 0x1834
+#define MT6359_BUCK_VS2_CON0                 0x1888
+#define MT6359_BUCK_VS2_DBG0                 0x18a6
+#define MT6359_BUCK_VS2_DBG1                 0x18a8
+#define MT6359_BUCK_VS2_ELR0                 0x18b4
+#define MT6359_BUCK_VPA_CON0                 0x1908
+#define MT6359_BUCK_VPA_CON1                 0x190e
+#define MT6359_BUCK_VPA_CFG0                 0x1910
+#define MT6359_BUCK_VPA_CFG1                 0x1912
+#define MT6359_BUCK_VPA_DBG0                 0x1914
+#define MT6359_BUCK_VPA_DBG1                 0x1916
+#define MT6359_VGPUVCORE_ANA_CON2            0x198e
+#define MT6359_VGPUVCORE_ANA_CON13           0x19a4
+#define MT6359_VPROC1_ANA_CON3               0x19b2
+#define MT6359_VPROC2_ANA_CON3               0x1a0e
+#define MT6359_VMODEM_ANA_CON3               0x1a1a
+#define MT6359_VPU_ANA_CON3                  0x1a26
+#define MT6359_VS1_ANA_CON0                  0x1a2c
+#define MT6359_VS2_ANA_CON0                  0x1a34
+#define MT6359_VPA_ANA_CON0                  0x1a3c
+#define MT6359_LDO_TOP_INT_CON0              0x1b14
+#define MT6359_LDO_TOP_INT_CON1              0x1b1a
+#define MT6359_LDO_TOP_INT_STATUS0           0x1b28
+#define MT6359_LDO_TOP_INT_STATUS1           0x1b2a
+#define MT6359_LDO_VSRAM_PROC1_ELR           0x1b40
+#define MT6359_LDO_VSRAM_PROC2_ELR           0x1b42
+#define MT6359_LDO_VSRAM_OTHERS_ELR          0x1b44
+#define MT6359_LDO_VSRAM_MD_ELR              0x1b46
+#define MT6359_LDO_VFE28_CON0                0x1b88
+#define MT6359_LDO_VFE28_MON                 0x1b8a
+#define MT6359_LDO_VXO22_CON0                0x1b98
+#define MT6359_LDO_VXO22_MON                 0x1b9a
+#define MT6359_LDO_VRF18_CON0                0x1ba8
+#define MT6359_LDO_VRF18_MON                 0x1baa
+#define MT6359_LDO_VRF12_CON0                0x1bb8
+#define MT6359_LDO_VRF12_MON                 0x1bba
+#define MT6359_LDO_VEFUSE_CON0               0x1bc8
+#define MT6359_LDO_VEFUSE_MON                0x1bca
+#define MT6359_LDO_VCN33_1_CON0              0x1bd8
+#define MT6359_LDO_VCN33_1_MON               0x1bda
+#define MT6359_LDO_VCN33_1_MULTI_SW          0x1be8
+#define MT6359_LDO_VCN33_2_CON0              0x1c08
+#define MT6359_LDO_VCN33_2_MON               0x1c0a
+#define MT6359_LDO_VCN33_2_MULTI_SW          0x1c18
+#define MT6359_LDO_VCN13_CON0                0x1c1a
+#define MT6359_LDO_VCN13_MON                 0x1c1c
+#define MT6359_LDO_VCN18_CON0                0x1c2a
+#define MT6359_LDO_VCN18_MON                 0x1c2c
+#define MT6359_LDO_VA09_CON0                 0x1c3a
+#define MT6359_LDO_VA09_MON                  0x1c3c
+#define MT6359_LDO_VCAMIO_CON0               0x1c4a
+#define MT6359_LDO_VCAMIO_MON                0x1c4c
+#define MT6359_LDO_VA12_CON0                 0x1c5a
+#define MT6359_LDO_VA12_MON                  0x1c5c
+#define MT6359_LDO_VAUX18_CON0               0x1c88
+#define MT6359_LDO_VAUX18_MON                0x1c8a
+#define MT6359_LDO_VAUD18_CON0               0x1c98
+#define MT6359_LDO_VAUD18_MON                0x1c9a
+#define MT6359_LDO_VIO18_CON0                0x1ca8
+#define MT6359_LDO_VIO18_MON                 0x1caa
+#define MT6359_LDO_VEMC_CON0                 0x1cb8
+#define MT6359_LDO_VEMC_MON                  0x1cba
+#define MT6359_LDO_VSIM1_CON0                0x1cc8
+#define MT6359_LDO_VSIM1_MON                 0x1cca
+#define MT6359_LDO_VSIM2_CON0                0x1cd8
+#define MT6359_LDO_VSIM2_MON                 0x1cda
+#define MT6359_LDO_VUSB_CON0                 0x1d08
+#define MT6359_LDO_VUSB_MON                  0x1d0a
+#define MT6359_LDO_VUSB_MULTI_SW             0x1d18
+#define MT6359_LDO_VRFCK_CON0                0x1d1a
+#define MT6359_LDO_VRFCK_MON                 0x1d1c
+#define MT6359_LDO_VBBCK_CON0                0x1d2a
+#define MT6359_LDO_VBBCK_MON                 0x1d2c
+#define MT6359_LDO_VBIF28_CON0               0x1d3a
+#define MT6359_LDO_VBIF28_MON                0x1d3c
+#define MT6359_LDO_VIBR_CON0                 0x1d4a
+#define MT6359_LDO_VIBR_MON                  0x1d4c
+#define MT6359_LDO_VIO28_CON0                0x1d5a
+#define MT6359_LDO_VIO28_MON                 0x1d5c
+#define MT6359_LDO_VM18_CON0                 0x1d88
+#define MT6359_LDO_VM18_MON                  0x1d8a
+#define MT6359_LDO_VUFS_CON0                 0x1d98
+#define MT6359_LDO_VUFS_MON                  0x1d9a
+#define MT6359_LDO_VSRAM_PROC1_CON0          0x1e88
+#define MT6359_LDO_VSRAM_PROC1_MON           0x1e8a
+#define MT6359_LDO_VSRAM_PROC1_VOSEL1        0x1e8e
+#define MT6359_LDO_VSRAM_PROC2_CON0          0x1ea6
+#define MT6359_LDO_VSRAM_PROC2_MON           0x1ea8
+#define MT6359_LDO_VSRAM_PROC2_VOSEL1        0x1eac
+#define MT6359_LDO_VSRAM_OTHERS_CON0         0x1f08
+#define MT6359_LDO_VSRAM_OTHERS_MON          0x1f0a
+#define MT6359_LDO_VSRAM_OTHERS_VOSEL1       0x1f0e
+#define MT6359_LDO_VSRAM_OTHERS_SSHUB        0x1f26
+#define MT6359_LDO_VSRAM_MD_CON0             0x1f2c
+#define MT6359_LDO_VSRAM_MD_MON              0x1f2e
+#define MT6359_LDO_VSRAM_MD_VOSEL1           0x1f32
+#define MT6359_VFE28_ANA_CON0                0x1f88
+#define MT6359_VAUX18_ANA_CON0               0x1f8c
+#define MT6359_VUSB_ANA_CON0                 0x1f90
+#define MT6359_VBIF28_ANA_CON0               0x1f94
+#define MT6359_VCN33_1_ANA_CON0              0x1f98
+#define MT6359_VCN33_2_ANA_CON0              0x1f9c
+#define MT6359_VEMC_ANA_CON0                 0x1fa0
+#define MT6359_VSIM1_ANA_CON0                0x1fa4
+#define MT6359_VSIM2_ANA_CON0                0x1fa8
+#define MT6359_VIO28_ANA_CON0                0x1fac
+#define MT6359_VIBR_ANA_CON0                 0x1fb0
+#define MT6359_VRF18_ANA_CON0                0x2008
+#define MT6359_VEFUSE_ANA_CON0               0x200c
+#define MT6359_VCN18_ANA_CON0                0x2010
+#define MT6359_VCAMIO_ANA_CON0               0x2014
+#define MT6359_VAUD18_ANA_CON0               0x2018
+#define MT6359_VIO18_ANA_CON0                0x201c
+#define MT6359_VM18_ANA_CON0                 0x2020
+#define MT6359_VUFS_ANA_CON0                 0x2024
+#define MT6359_VRF12_ANA_CON0                0x202a
+#define MT6359_VCN13_ANA_CON0                0x202e
+#define MT6359_VA09_ANA_CON0                 0x2032
+#define MT6359_VA12_ANA_CON0                 0x2036
+#define MT6359_VXO22_ANA_CON0                0x2088
+#define MT6359_VRFCK_ANA_CON0                0x208c
+#define MT6359_VBBCK_ANA_CON0                0x2094
+#define MT6359_AUD_TOP_INT_CON0              0x2328
+#define MT6359_AUD_TOP_INT_STATUS0           0x2334
+
+#define MT6359_RG_BUCK_VPU_EN_ADDR             MT6359_BUCK_VPU_CON0
+#define MT6359_RG_BUCK_VPU_LP_ADDR             MT6359_BUCK_VPU_CON0
+#define MT6359_RG_BUCK_VPU_LP_SHIFT            1
+#define MT6359_DA_VPU_VOSEL_ADDR               MT6359_BUCK_VPU_DBG0
+#define MT6359_DA_VPU_VOSEL_MASK               0x7F
+#define MT6359_DA_VPU_VOSEL_SHIFT              0
+#define MT6359_DA_VPU_EN_ADDR                  MT6359_BUCK_VPU_DBG1
+#define MT6359_RG_BUCK_VPU_VOSEL_ADDR          MT6359_BUCK_VPU_ELR0
+#define MT6359_RG_BUCK_VPU_VOSEL_MASK          0x7F
+#define MT6359_RG_BUCK_VPU_VOSEL_SHIFT         0
+#define MT6359_RG_BUCK_VCORE_EN_ADDR           MT6359_BUCK_VCORE_CON0
+#define MT6359_RG_BUCK_VCORE_LP_ADDR           MT6359_BUCK_VCORE_CON0
+#define MT6359_RG_BUCK_VCORE_LP_SHIFT          1
+#define MT6359_DA_VCORE_VOSEL_ADDR             MT6359_BUCK_VCORE_DBG0
+#define MT6359_DA_VCORE_VOSEL_MASK             0x7F
+#define MT6359_DA_VCORE_VOSEL_SHIFT            0
+#define MT6359_DA_VCORE_EN_ADDR                MT6359_BUCK_VCORE_DBG1
+#define MT6359_RG_BUCK_VCORE_SSHUB_EN_ADDR     MT6359_BUCK_VCORE_SSHUB_CON0
+#define MT6359_RG_BUCK_VCORE_SSHUB_VOSEL_ADDR  MT6359_BUCK_VCORE_SSHUB_CON0
+#define MT6359_RG_BUCK_VCORE_SSHUB_VOSEL_MASK  0x7F
+#define MT6359_RG_BUCK_VCORE_SSHUB_VOSEL_SHIFT 4
+#define MT6359_RG_BUCK_VCORE_VOSEL_ADDR        MT6359_BUCK_VCORE_ELR0
+#define MT6359_RG_BUCK_VCORE_VOSEL_MASK        0x7F
+#define MT6359_RG_BUCK_VCORE_VOSEL_SHIFT       0
+#define MT6359_RG_BUCK_VGPU11_EN_ADDR          MT6359_BUCK_VGPU11_CON0
+#define MT6359_RG_BUCK_VGPU11_LP_ADDR          MT6359_BUCK_VGPU11_CON0
+#define MT6359_RG_BUCK_VGPU11_LP_SHIFT         1
+#define MT6359_DA_VGPU11_VOSEL_ADDR            MT6359_BUCK_VGPU11_DBG0
+#define MT6359_DA_VGPU11_VOSEL_MASK            0x7F
+#define MT6359_DA_VGPU11_VOSEL_SHIFT           0
+#define MT6359_DA_VGPU11_EN_ADDR               MT6359_BUCK_VGPU11_DBG1
+#define MT6359_RG_BUCK_VGPU11_VOSEL_ADDR       MT6359_BUCK_VGPU11_ELR0
+#define MT6359_RG_BUCK_VGPU11_VOSEL_MASK       0x7F
+#define MT6359_RG_BUCK_VGPU11_VOSEL_SHIFT      0
+#define MT6359_RG_BUCK_VMODEM_EN_ADDR          MT6359_BUCK_VMODEM_CON0
+#define MT6359_RG_BUCK_VMODEM_LP_ADDR          MT6359_BUCK_VMODEM_CON0
+#define MT6359_RG_BUCK_VMODEM_LP_SHIFT         1
+#define MT6359_DA_VMODEM_VOSEL_ADDR            MT6359_BUCK_VMODEM_DBG0
+#define MT6359_DA_VMODEM_VOSEL_MASK            0x7F
+#define MT6359_DA_VMODEM_VOSEL_SHIFT           0
+#define MT6359_DA_VMODEM_EN_ADDR               MT6359_BUCK_VMODEM_DBG1
+#define MT6359_RG_BUCK_VMODEM_VOSEL_ADDR       MT6359_BUCK_VMODEM_ELR0
+#define MT6359_RG_BUCK_VMODEM_VOSEL_MASK       0x7F
+#define MT6359_RG_BUCK_VMODEM_VOSEL_SHIFT      0
+#define MT6359_RG_BUCK_VPROC1_EN_ADDR          MT6359_BUCK_VPROC1_CON0
+#define MT6359_RG_BUCK_VPROC1_LP_ADDR          MT6359_BUCK_VPROC1_CON0
+#define MT6359_RG_BUCK_VPROC1_LP_SHIFT         1
+#define MT6359_DA_VPROC1_VOSEL_ADDR            MT6359_BUCK_VPROC1_DBG0
+#define MT6359_DA_VPROC1_VOSEL_MASK            0x7F
+#define MT6359_DA_VPROC1_VOSEL_SHIFT           0
+#define MT6359_DA_VPROC1_EN_ADDR               MT6359_BUCK_VPROC1_DBG1
+#define MT6359_RG_BUCK_VPROC1_VOSEL_ADDR       MT6359_BUCK_VPROC1_ELR0
+#define MT6359_RG_BUCK_VPROC1_VOSEL_MASK       0x7F
+#define MT6359_RG_BUCK_VPROC1_VOSEL_SHIFT      0
+#define MT6359_RG_BUCK_VPROC2_EN_ADDR          MT6359_BUCK_VPROC2_CON0
+#define MT6359_RG_BUCK_VPROC2_LP_ADDR          MT6359_BUCK_VPROC2_CON0
+#define MT6359_RG_BUCK_VPROC2_LP_SHIFT         1
+#define MT6359_DA_VPROC2_VOSEL_ADDR            MT6359_BUCK_VPROC2_DBG0
+#define MT6359_DA_VPROC2_VOSEL_MASK            0x7F
+#define MT6359_DA_VPROC2_VOSEL_SHIFT           0
+#define MT6359_DA_VPROC2_EN_ADDR               MT6359_BUCK_VPROC2_DBG1
+#define MT6359_RG_BUCK_VPROC2_VOSEL_ADDR       MT6359_BUCK_VPROC2_ELR0
+#define MT6359_RG_BUCK_VPROC2_VOSEL_MASK       0x7F
+#define MT6359_RG_BUCK_VPROC2_VOSEL_SHIFT      0
+#define MT6359_RG_BUCK_VS1_EN_ADDR             MT6359_BUCK_VS1_CON0
+#define MT6359_RG_BUCK_VS1_LP_ADDR             MT6359_BUCK_VS1_CON0
+#define MT6359_RG_BUCK_VS1_LP_SHIFT            1
+#define MT6359_DA_VS1_VOSEL_ADDR               MT6359_BUCK_VS1_DBG0
+#define MT6359_DA_VS1_VOSEL_MASK               0x7F
+#define MT6359_DA_VS1_VOSEL_SHIFT              0
+#define MT6359_DA_VS1_EN_ADDR                  MT6359_BUCK_VS1_DBG1
+#define MT6359_RG_BUCK_VS1_VOSEL_ADDR          MT6359_BUCK_VS1_ELR0
+#define MT6359_RG_BUCK_VS1_VOSEL_MASK          0x7F
+#define MT6359_RG_BUCK_VS1_VOSEL_SHIFT         0
+#define MT6359_RG_BUCK_VS2_EN_ADDR             MT6359_BUCK_VS2_CON0
+#define MT6359_RG_BUCK_VS2_LP_ADDR             MT6359_BUCK_VS2_CON0
+#define MT6359_RG_BUCK_VS2_LP_SHIFT            1
+#define MT6359_DA_VS2_VOSEL_ADDR               MT6359_BUCK_VS2_DBG0
+#define MT6359_DA_VS2_VOSEL_MASK               0x7F
+#define MT6359_DA_VS2_VOSEL_SHIFT              0
+#define MT6359_DA_VS2_EN_ADDR                  MT6359_BUCK_VS2_DBG1
+#define MT6359_RG_BUCK_VS2_VOSEL_ADDR          MT6359_BUCK_VS2_ELR0
+#define MT6359_RG_BUCK_VS2_VOSEL_MASK          0x7F
+#define MT6359_RG_BUCK_VS2_VOSEL_SHIFT         0
+#define MT6359_RG_BUCK_VPA_EN_ADDR             MT6359_BUCK_VPA_CON0
+#define MT6359_RG_BUCK_VPA_LP_ADDR             MT6359_BUCK_VPA_CON0
+#define MT6359_RG_BUCK_VPA_LP_SHIFT            1
+#define MT6359_RG_BUCK_VPA_VOSEL_ADDR          MT6359_BUCK_VPA_CON1
+#define MT6359_RG_BUCK_VPA_VOSEL_MASK          0x3F
+#define MT6359_RG_BUCK_VPA_VOSEL_SHIFT         0
+#define MT6359_DA_VPA_VOSEL_ADDR               MT6359_BUCK_VPA_DBG0
+#define MT6359_DA_VPA_VOSEL_MASK               0x3F
+#define MT6359_DA_VPA_VOSEL_SHIFT              0
+#define MT6359_DA_VPA_EN_ADDR                  MT6359_BUCK_VPA_DBG1
+#define MT6359_RG_VGPU11_FCCM_ADDR             MT6359_VGPUVCORE_ANA_CON2
+#define MT6359_RG_VGPU11_FCCM_SHIFT            9
+#define MT6359_RG_VCORE_FCCM_ADDR              MT6359_VGPUVCORE_ANA_CON13
+#define MT6359_RG_VCORE_FCCM_SHIFT             5
+#define MT6359_RG_VPROC1_FCCM_ADDR             MT6359_VPROC1_ANA_CON3
+#define MT6359_RG_VPROC1_FCCM_SHIFT            1
+#define MT6359_RG_VPROC2_FCCM_ADDR             MT6359_VPROC2_ANA_CON3
+#define MT6359_RG_VPROC2_FCCM_SHIFT            1
+#define MT6359_RG_VMODEM_FCCM_ADDR             MT6359_VMODEM_ANA_CON3
+#define MT6359_RG_VMODEM_FCCM_SHIFT            1
+#define MT6359_RG_VPU_FCCM_ADDR                MT6359_VPU_ANA_CON3
+#define MT6359_RG_VPU_FCCM_SHIFT               1
+#define MT6359_RG_VS1_FPWM_ADDR                MT6359_VS1_ANA_CON0
+#define MT6359_RG_VS1_FPWM_SHIFT               3
+#define MT6359_RG_VS2_FPWM_ADDR                MT6359_VS2_ANA_CON0
+#define MT6359_RG_VS2_FPWM_SHIFT               3
+#define MT6359_RG_VPA_MODESET_ADDR             MT6359_VPA_ANA_CON0
+#define MT6359_RG_VPA_MODESET_SHIFT            1
+#define MT6359_RG_LDO_VSRAM_PROC1_VOSEL_ADDR   MT6359_LDO_VSRAM_PROC1_ELR
+#define MT6359_RG_LDO_VSRAM_PROC1_VOSEL_MASK   0x7F
+#define MT6359_RG_LDO_VSRAM_PROC1_VOSEL_SHIFT  0
+#define MT6359_RG_LDO_VSRAM_PROC2_VOSEL_ADDR   MT6359_LDO_VSRAM_PROC2_ELR
+#define MT6359_RG_LDO_VSRAM_PROC2_VOSEL_MASK   0x7F
+#define MT6359_RG_LDO_VSRAM_PROC2_VOSEL_SHIFT  0
+#define MT6359_RG_LDO_VSRAM_OTHERS_VOSEL_ADDR  MT6359_LDO_VSRAM_OTHERS_ELR
+#define MT6359_RG_LDO_VSRAM_OTHERS_VOSEL_MASK  0x7F
+#define MT6359_RG_LDO_VSRAM_OTHERS_VOSEL_SHIFT 0
+#define MT6359_RG_LDO_VSRAM_MD_VOSEL_ADDR      MT6359_LDO_VSRAM_MD_ELR
+#define MT6359_RG_LDO_VSRAM_MD_VOSEL_MASK      0x7F
+#define MT6359_RG_LDO_VSRAM_MD_VOSEL_SHIFT     0
+#define MT6359_RG_LDO_VFE28_EN_ADDR            MT6359_LDO_VFE28_CON0
+#define MT6359_DA_VFE28_B_EN_ADDR              MT6359_LDO_VFE28_MON
+#define MT6359_RG_LDO_VXO22_EN_ADDR            MT6359_LDO_VXO22_CON0
+#define MT6359_RG_LDO_VXO22_EN_SHIFT           0
+#define MT6359_DA_VXO22_B_EN_ADDR              MT6359_LDO_VXO22_MON
+#define MT6359_RG_LDO_VRF18_EN_ADDR            MT6359_LDO_VRF18_CON0
+#define MT6359_RG_LDO_VRF18_EN_SHIFT           0
+#define MT6359_DA_VRF18_B_EN_ADDR              MT6359_LDO_VRF18_MON
+#define MT6359_RG_LDO_VRF12_EN_ADDR            MT6359_LDO_VRF12_CON0
+#define MT6359_RG_LDO_VRF12_EN_SHIFT           0
+#define MT6359_DA_VRF12_B_EN_ADDR              MT6359_LDO_VRF12_MON
+#define MT6359_RG_LDO_VEFUSE_EN_ADDR           MT6359_LDO_VEFUSE_CON0
+#define MT6359_RG_LDO_VEFUSE_EN_SHIFT          0
+#define MT6359_DA_VEFUSE_B_EN_ADDR             MT6359_LDO_VEFUSE_MON
+#define MT6359_RG_LDO_VCN33_1_EN_0_ADDR        MT6359_LDO_VCN33_1_CON0
+#define MT6359_RG_LDO_VCN33_1_EN_0_MASK        0x1
+#define MT6359_RG_LDO_VCN33_1_EN_0_SHIFT       0
+#define MT6359_DA_VCN33_1_B_EN_ADDR            MT6359_LDO_VCN33_1_MON
+#define MT6359_RG_LDO_VCN33_1_EN_1_ADDR        MT6359_LDO_VCN33_1_MULTI_SW
+#define MT6359_RG_LDO_VCN33_1_EN_1_SHIFT       15
+#define MT6359_RG_LDO_VCN33_2_EN_0_ADDR        MT6359_LDO_VCN33_2_CON0
+#define MT6359_RG_LDO_VCN33_2_EN_0_SHIFT       0
+#define MT6359_DA_VCN33_2_B_EN_ADDR            MT6359_LDO_VCN33_2_MON
+#define MT6359_RG_LDO_VCN33_2_EN_1_ADDR        MT6359_LDO_VCN33_2_MULTI_SW
+#define MT6359_RG_LDO_VCN33_2_EN_1_MASK        0x1
+#define MT6359_RG_LDO_VCN33_2_EN_1_SHIFT       15
+#define MT6359_RG_LDO_VCN13_EN_ADDR            MT6359_LDO_VCN13_CON0
+#define MT6359_RG_LDO_VCN13_EN_SHIFT           0
+#define MT6359_DA_VCN13_B_EN_ADDR              MT6359_LDO_VCN13_MON
+#define MT6359_RG_LDO_VCN18_EN_ADDR            MT6359_LDO_VCN18_CON0
+#define MT6359_DA_VCN18_B_EN_ADDR              MT6359_LDO_VCN18_MON
+#define MT6359_RG_LDO_VA09_EN_ADDR             MT6359_LDO_VA09_CON0
+#define MT6359_RG_LDO_VA09_EN_SHIFT            0
+#define MT6359_DA_VA09_B_EN_ADDR               MT6359_LDO_VA09_MON
+#define MT6359_RG_LDO_VCAMIO_EN_ADDR           MT6359_LDO_VCAMIO_CON0
+#define MT6359_RG_LDO_VCAMIO_EN_SHIFT          0
+#define MT6359_DA_VCAMIO_B_EN_ADDR             MT6359_LDO_VCAMIO_MON
+#define MT6359_RG_LDO_VA12_EN_ADDR             MT6359_LDO_VA12_CON0
+#define MT6359_RG_LDO_VA12_EN_SHIFT            0
+#define MT6359_DA_VA12_B_EN_ADDR               MT6359_LDO_VA12_MON
+#define MT6359_RG_LDO_VAUX18_EN_ADDR           MT6359_LDO_VAUX18_CON0
+#define MT6359_DA_VAUX18_B_EN_ADDR             MT6359_LDO_VAUX18_MON
+#define MT6359_RG_LDO_VAUD18_EN_ADDR           MT6359_LDO_VAUD18_CON0
+#define MT6359_DA_VAUD18_B_EN_ADDR             MT6359_LDO_VAUD18_MON
+#define MT6359_RG_LDO_VIO18_EN_ADDR            MT6359_LDO_VIO18_CON0
+#define MT6359_RG_LDO_VIO18_EN_SHIFT           0
+#define MT6359_DA_VIO18_B_EN_ADDR              MT6359_LDO_VIO18_MON
+#define MT6359_RG_LDO_VEMC_EN_ADDR             MT6359_LDO_VEMC_CON0
+#define MT6359_RG_LDO_VEMC_EN_SHIFT            0
+#define MT6359_DA_VEMC_B_EN_ADDR               MT6359_LDO_VEMC_MON
+#define MT6359_RG_LDO_VSIM1_EN_ADDR            MT6359_LDO_VSIM1_CON0
+#define MT6359_RG_LDO_VSIM1_EN_SHIFT           0
+#define MT6359_DA_VSIM1_B_EN_ADDR              MT6359_LDO_VSIM1_MON
+#define MT6359_RG_LDO_VSIM2_EN_ADDR            MT6359_LDO_VSIM2_CON0
+#define MT6359_RG_LDO_VSIM2_EN_SHIFT           0
+#define MT6359_DA_VSIM2_B_EN_ADDR              MT6359_LDO_VSIM2_MON
+#define MT6359_RG_LDO_VUSB_EN_0_ADDR           MT6359_LDO_VUSB_CON0
+#define MT6359_RG_LDO_VUSB_EN_0_MASK           0x1
+#define MT6359_RG_LDO_VUSB_EN_0_SHIFT          0
+#define MT6359_DA_VUSB_B_EN_ADDR               MT6359_LDO_VUSB_MON
+#define MT6359_RG_LDO_VUSB_EN_1_ADDR           MT6359_LDO_VUSB_MULTI_SW
+#define MT6359_RG_LDO_VUSB_EN_1_MASK           0x1
+#define MT6359_RG_LDO_VUSB_EN_1_SHIFT          15
+#define MT6359_RG_LDO_VRFCK_EN_ADDR            MT6359_LDO_VRFCK_CON0
+#define MT6359_RG_LDO_VRFCK_EN_SHIFT           0
+#define MT6359_DA_VRFCK_B_EN_ADDR              MT6359_LDO_VRFCK_MON
+#define MT6359_RG_LDO_VBBCK_EN_ADDR            MT6359_LDO_VBBCK_CON0
+#define MT6359_RG_LDO_VBBCK_EN_SHIFT           0
+#define MT6359_DA_VBBCK_B_EN_ADDR              MT6359_LDO_VBBCK_MON
+#define MT6359_RG_LDO_VBIF28_EN_ADDR           MT6359_LDO_VBIF28_CON0
+#define MT6359_DA_VBIF28_B_EN_ADDR             MT6359_LDO_VBIF28_MON
+#define MT6359_RG_LDO_VIBR_EN_ADDR             MT6359_LDO_VIBR_CON0
+#define MT6359_RG_LDO_VIBR_EN_SHIFT            0
+#define MT6359_DA_VIBR_B_EN_ADDR               MT6359_LDO_VIBR_MON
+#define MT6359_RG_LDO_VIO28_EN_ADDR            MT6359_LDO_VIO28_CON0
+#define MT6359_RG_LDO_VIO28_EN_SHIFT           0
+#define MT6359_DA_VIO28_B_EN_ADDR              MT6359_LDO_VIO28_MON
+#define MT6359_RG_LDO_VM18_EN_ADDR             MT6359_LDO_VM18_CON0
+#define MT6359_RG_LDO_VM18_EN_SHIFT            0
+#define MT6359_DA_VM18_B_EN_ADDR               MT6359_LDO_VM18_MON
+#define MT6359_RG_LDO_VUFS_EN_ADDR             MT6359_LDO_VUFS_CON0
+#define MT6359_RG_LDO_VUFS_EN_SHIFT               0
+#define MT6359_DA_VUFS_B_EN_ADDR               MT6359_LDO_VUFS_MON
+#define MT6359_RG_LDO_VSRAM_PROC1_EN_ADDR      MT6359_LDO_VSRAM_PROC1_CON0
+#define MT6359_DA_VSRAM_PROC1_B_EN_ADDR        MT6359_LDO_VSRAM_PROC1_MON
+#define MT6359_DA_VSRAM_PROC1_VOSEL_ADDR       MT6359_LDO_VSRAM_PROC1_VOSEL1
+#define MT6359_DA_VSRAM_PROC1_VOSEL_MASK       0x7F
+#define MT6359_DA_VSRAM_PROC1_VOSEL_SHIFT      8
+#define MT6359_RG_LDO_VSRAM_PROC2_EN_ADDR      MT6359_LDO_VSRAM_PROC2_CON0
+#define MT6359_DA_VSRAM_PROC2_B_EN_ADDR        MT6359_LDO_VSRAM_PROC2_MON
+#define MT6359_DA_VSRAM_PROC2_VOSEL_ADDR       MT6359_LDO_VSRAM_PROC2_VOSEL1
+#define MT6359_DA_VSRAM_PROC2_VOSEL_MASK       0x7F
+#define MT6359_DA_VSRAM_PROC2_VOSEL_SHIFT      8
+#define MT6359_RG_LDO_VSRAM_OTHERS_EN_ADDR     MT6359_LDO_VSRAM_OTHERS_CON0
+#define MT6359_DA_VSRAM_OTHERS_B_EN_ADDR       MT6359_LDO_VSRAM_OTHERS_MON
+#define MT6359_DA_VSRAM_OTHERS_VOSEL_ADDR      MT6359_LDO_VSRAM_OTHERS_VOSEL1
+#define MT6359_DA_VSRAM_OTHERS_VOSEL_MASK      0x7F
+#define MT6359_DA_VSRAM_OTHERS_VOSEL_SHIFT     8
+#define MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_EN_ADDR     MT6359_LDO_VSRAM_OTHERS_SSHUB
+#define MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_ADDR  MT6359_LDO_VSRAM_OTHERS_SSHUB
+#define MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_MASK  0x7F
+#define MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_SHIFT 1
+#define MT6359_RG_LDO_VSRAM_MD_EN_ADDR         MT6359_LDO_VSRAM_MD_CON0
+#define MT6359_DA_VSRAM_MD_B_EN_ADDR           MT6359_LDO_VSRAM_MD_MON
+#define MT6359_DA_VSRAM_MD_VOSEL_ADDR          MT6359_LDO_VSRAM_MD_VOSEL1
+#define MT6359_DA_VSRAM_MD_VOSEL_MASK          0x7F
+#define MT6359_DA_VSRAM_MD_VOSEL_SHIFT         8
+#define MT6359_RG_VCN33_1_VOSEL_ADDR           MT6359_VCN33_1_ANA_CON0
+#define MT6359_RG_VCN33_1_VOSEL_MASK           0xF
+#define MT6359_RG_VCN33_1_VOSEL_SHIFT          8
+#define MT6359_RG_VCN33_2_VOSEL_ADDR           MT6359_VCN33_2_ANA_CON0
+#define MT6359_RG_VCN33_2_VOSEL_MASK           0xF
+#define MT6359_RG_VCN33_2_VOSEL_SHIFT          8
+#define MT6359_RG_VEMC_VOSEL_ADDR              MT6359_VEMC_ANA_CON0
+#define MT6359_RG_VEMC_VOSEL_MASK              0xF
+#define MT6359_RG_VEMC_VOSEL_SHIFT             8
+#define MT6359_RG_VSIM1_VOSEL_ADDR             MT6359_VSIM1_ANA_CON0
+#define MT6359_RG_VSIM1_VOSEL_MASK             0xF
+#define MT6359_RG_VSIM1_VOSEL_SHIFT            8
+#define MT6359_RG_VSIM2_VOSEL_ADDR             MT6359_VSIM2_ANA_CON0
+#define MT6359_RG_VSIM2_VOSEL_MASK             0xF
+#define MT6359_RG_VSIM2_VOSEL_SHIFT            8
+#define MT6359_RG_VIO28_VOSEL_ADDR             MT6359_VIO28_ANA_CON0
+#define MT6359_RG_VIO28_VOSEL_MASK             0xF
+#define MT6359_RG_VIO28_VOSEL_SHIFT            8
+#define MT6359_RG_VIBR_VOSEL_ADDR              MT6359_VIBR_ANA_CON0
+#define MT6359_RG_VIBR_VOSEL_MASK              0xF
+#define MT6359_RG_VIBR_VOSEL_SHIFT             8
+#define MT6359_RG_VRF18_VOSEL_ADDR             MT6359_VRF18_ANA_CON0
+#define MT6359_RG_VRF18_VOSEL_MASK             0xF
+#define MT6359_RG_VRF18_VOSEL_SHIFT            8
+#define MT6359_RG_VEFUSE_VOSEL_ADDR            MT6359_VEFUSE_ANA_CON0
+#define MT6359_RG_VEFUSE_VOSEL_MASK            0xF
+#define MT6359_RG_VEFUSE_VOSEL_SHIFT           8
+#define MT6359_RG_VCAMIO_VOSEL_ADDR            MT6359_VCAMIO_ANA_CON0
+#define MT6359_RG_VCAMIO_VOSEL_MASK            0xF
+#define MT6359_RG_VCAMIO_VOSEL_SHIFT           8
+#define MT6359_RG_VIO18_VOSEL_ADDR             MT6359_VIO18_ANA_CON0
+#define MT6359_RG_VIO18_VOSEL_MASK             0xF
+#define MT6359_RG_VIO18_VOSEL_SHIFT            8
+#define MT6359_RG_VM18_VOSEL_ADDR              MT6359_VM18_ANA_CON0
+#define MT6359_RG_VM18_VOSEL_MASK              0xF
+#define MT6359_RG_VM18_VOSEL_SHIFT             8
+#define MT6359_RG_VUFS_VOSEL_ADDR              MT6359_VUFS_ANA_CON0
+#define MT6359_RG_VUFS_VOSEL_MASK              0xF
+#define MT6359_RG_VUFS_VOSEL_SHIFT             8
+#define MT6359_RG_VRF12_VOSEL_ADDR             MT6359_VRF12_ANA_CON0
+#define MT6359_RG_VRF12_VOSEL_MASK             0xF
+#define MT6359_RG_VRF12_VOSEL_SHIFT            8
+#define MT6359_RG_VCN13_VOSEL_ADDR             MT6359_VCN13_ANA_CON0
+#define MT6359_RG_VCN13_VOSEL_MASK             0xF
+#define MT6359_RG_VCN13_VOSEL_SHIFT            8
+#define MT6359_RG_VA09_VOSEL_ADDR              MT6359_VA09_ANA_CON0
+#define MT6359_RG_VA09_VOSEL_MASK              0xF
+#define MT6359_RG_VA09_VOSEL_SHIFT             8
+#define MT6359_RG_VA12_VOSEL_ADDR              MT6359_VA12_ANA_CON0
+#define MT6359_RG_VA12_VOSEL_MASK              0xF
+#define MT6359_RG_VA12_VOSEL_SHIFT             8
+#define MT6359_RG_VXO22_VOSEL_ADDR             MT6359_VXO22_ANA_CON0
+#define MT6359_RG_VXO22_VOSEL_MASK             0xF
+#define MT6359_RG_VXO22_VOSEL_SHIFT            8
+#define MT6359_RG_VRFCK_VOSEL_ADDR             MT6359_VRFCK_ANA_CON0
+#define MT6359_RG_VRFCK_VOSEL_MASK             0xF
+#define MT6359_RG_VRFCK_VOSEL_SHIFT            8
+#define MT6359_RG_VBBCK_VOSEL_ADDR             MT6359_VBBCK_ANA_CON0
+#define MT6359_RG_VBBCK_VOSEL_MASK             0xF
+#define MT6359_RG_VBBCK_VOSEL_SHIFT            8
+
+#endif /* __MFD_MT6359_REGISTERS_H__ */
diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h
index 949268581b369..56f210eebc541 100644
--- a/include/linux/mfd/mt6397/core.h
+++ b/include/linux/mfd/mt6397/core.h
@@ -13,6 +13,7 @@
 enum chip_id {
 	MT6323_CHIP_ID = 0x23,
 	MT6358_CHIP_ID = 0x58,
+	MT6359_CHIP_ID = 0x59,
 	MT6391_CHIP_ID = 0x91,
 	MT6397_CHIP_ID = 0x97,
 };
-- 
GitLab


From d7a58decc7049e8ca9707b63fcc2556cde3d26c5 Mon Sep 17 00:00:00 2001
From: Wen Su <wen.su@mediatek.com>
Date: Wed, 26 May 2021 14:52:05 +0800
Subject: [PATCH 1936/3804] regulator: mt6359: Add support for MT6359 regulator

The MT6359 is a regulator found on boards based on MediaTek MT6779 and
probably other SoCs. It is a so called pmic and connects as a slave to
SoC using SPI, wrapped inside the pmic-wrapper.

Signed-off-by: Wen Su <wen.su@mediatek.com>
Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/regulator/Kconfig                  |   9 +
 drivers/regulator/Makefile                 |   1 +
 drivers/regulator/mt6359-regulator.c       | 669 +++++++++++++++++++++
 include/linux/regulator/mt6359-regulator.h |  58 ++
 4 files changed, 737 insertions(+)
 create mode 100644 drivers/regulator/mt6359-regulator.c
 create mode 100644 include/linux/regulator/mt6359-regulator.h

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 9d84d9245490e..1ef47c9fb336c 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -779,6 +779,15 @@ config REGULATOR_MT6358
 	  This driver supports the control of different power rails of device
 	  through regulator interface.
 
+config REGULATOR_MT6359
+	tristate "MediaTek MT6359 PMIC"
+	depends on MFD_MT6397
+	help
+	  Say y here to select this option to enable the power regulator of
+	  MediaTek MT6359 PMIC.
+	  This driver supports the control of different power rails of device
+	  through regulator interface.
+
 config REGULATOR_MT6360
 	tristate "MT6360 SubPMIC Regulator"
 	depends on MFD_MT6360
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 580b015296ea2..4f4d99622db83 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -94,6 +94,7 @@ obj-$(CONFIG_REGULATOR_MT6311) += mt6311-regulator.o
 obj-$(CONFIG_REGULATOR_MT6315) += mt6315-regulator.o
 obj-$(CONFIG_REGULATOR_MT6323)	+= mt6323-regulator.o
 obj-$(CONFIG_REGULATOR_MT6358)	+= mt6358-regulator.o
+obj-$(CONFIG_REGULATOR_MT6359)	+= mt6359-regulator.o
 obj-$(CONFIG_REGULATOR_MT6360) += mt6360-regulator.o
 obj-$(CONFIG_REGULATOR_MT6380)	+= mt6380-regulator.o
 obj-$(CONFIG_REGULATOR_MT6397)	+= mt6397-regulator.o
diff --git a/drivers/regulator/mt6359-regulator.c b/drivers/regulator/mt6359-regulator.c
new file mode 100644
index 0000000000000..994d3f67f73dc
--- /dev/null
+++ b/drivers/regulator/mt6359-regulator.c
@@ -0,0 +1,669 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2021 MediaTek Inc.
+
+#include <linux/platform_device.h>
+#include <linux/mfd/mt6359/registers.h>
+#include <linux/mfd/mt6397/core.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/mt6359-regulator.h>
+#include <linux/regulator/of_regulator.h>
+
+#define MT6359_BUCK_MODE_AUTO		0
+#define MT6359_BUCK_MODE_FORCE_PWM	1
+#define MT6359_BUCK_MODE_NORMAL		0
+#define MT6359_BUCK_MODE_LP		2
+
+/*
+ * MT6359 regulators' information
+ *
+ * @desc: standard fields of regulator description.
+ * @status_reg: for query status of regulators.
+ * @qi: Mask for query enable signal status of regulators.
+ * @modeset_reg: for operating AUTO/PWM mode register.
+ * @modeset_mask: MASK for operating modeset register.
+ * @modeset_shift: SHIFT for operating modeset register.
+ */
+struct mt6359_regulator_info {
+	struct regulator_desc desc;
+	u32 status_reg;
+	u32 qi;
+	u32 modeset_reg;
+	u32 modeset_mask;
+	u32 modeset_shift;
+	u32 lp_mode_reg;
+	u32 lp_mode_mask;
+	u32 lp_mode_shift;
+};
+
+#define MT6359_BUCK(match, _name, min, max, step, min_sel,	\
+	volt_ranges, _enable_reg, _status_reg,			\
+	_vsel_reg, _vsel_mask,					\
+	_lp_mode_reg, _lp_mode_shift,				\
+	_modeset_reg, _modeset_shift)				\
+[MT6359_ID_##_name] = {						\
+	.desc = {						\
+		.name = #_name,					\
+		.of_match = of_match_ptr(match),		\
+		.regulators_node = of_match_ptr("regulators"),	\
+		.ops = &mt6359_volt_range_ops,			\
+		.type = REGULATOR_VOLTAGE,			\
+		.id = MT6359_ID_##_name,			\
+		.owner = THIS_MODULE,				\
+		.uV_step = (step),				\
+		.linear_min_sel = (min_sel),			\
+		.n_voltages = ((max) - (min)) / (step) + 1,	\
+		.min_uV = (min),				\
+		.linear_ranges = volt_ranges,			\
+		.n_linear_ranges = ARRAY_SIZE(volt_ranges),	\
+		.vsel_reg = _vsel_reg,				\
+		.vsel_mask = _vsel_mask,			\
+		.enable_reg = _enable_reg,			\
+		.enable_mask = BIT(0),				\
+		.of_map_mode = mt6359_map_mode,			\
+	},							\
+	.status_reg = _status_reg,				\
+	.qi = BIT(0),						\
+	.lp_mode_reg = _lp_mode_reg,				\
+	.lp_mode_mask = BIT(_lp_mode_shift),			\
+	.lp_mode_shift = _lp_mode_shift,			\
+	.modeset_reg = _modeset_reg,				\
+	.modeset_mask = BIT(_modeset_shift),			\
+	.modeset_shift = _modeset_shift				\
+}
+
+#define MT6359_LDO_LINEAR(match, _name, min, max, step, min_sel,\
+	volt_ranges, _enable_reg, _status_reg,			\
+	_vsel_reg, _vsel_mask)					\
+[MT6359_ID_##_name] = {						\
+	.desc = {						\
+		.name = #_name,					\
+		.of_match = of_match_ptr(match),		\
+		.regulators_node = of_match_ptr("regulators"),	\
+		.ops = &mt6359_volt_range_ops,			\
+		.type = REGULATOR_VOLTAGE,			\
+		.id = MT6359_ID_##_name,			\
+		.owner = THIS_MODULE,				\
+		.uV_step = (step),				\
+		.linear_min_sel = (min_sel),			\
+		.n_voltages = ((max) - (min)) / (step) + 1,	\
+		.min_uV = (min),				\
+		.linear_ranges = volt_ranges,			\
+		.n_linear_ranges = ARRAY_SIZE(volt_ranges),	\
+		.vsel_reg = _vsel_reg,				\
+		.vsel_mask = _vsel_mask,			\
+		.enable_reg = _enable_reg,			\
+		.enable_mask = BIT(0),				\
+	},							\
+	.status_reg = _status_reg,				\
+	.qi = BIT(0),						\
+}
+
+#define MT6359_LDO(match, _name, _volt_table,			\
+	_enable_reg, _enable_mask, _status_reg,			\
+	_vsel_reg, _vsel_mask, _en_delay)			\
+[MT6359_ID_##_name] = {						\
+	.desc = {						\
+		.name = #_name,					\
+		.of_match = of_match_ptr(match),		\
+		.regulators_node = of_match_ptr("regulators"),	\
+		.ops = &mt6359_volt_table_ops,			\
+		.type = REGULATOR_VOLTAGE,			\
+		.id = MT6359_ID_##_name,			\
+		.owner = THIS_MODULE,				\
+		.n_voltages = ARRAY_SIZE(_volt_table),		\
+		.volt_table = _volt_table,			\
+		.vsel_reg = _vsel_reg,				\
+		.vsel_mask = _vsel_mask,			\
+		.enable_reg = _enable_reg,			\
+		.enable_mask = BIT(_enable_mask),		\
+		.enable_time = _en_delay,			\
+	},							\
+	.status_reg = _status_reg,				\
+	.qi = BIT(0),						\
+}
+
+#define MT6359_REG_FIXED(match, _name, _enable_reg,	\
+	_status_reg, _fixed_volt)			\
+[MT6359_ID_##_name] = {					\
+	.desc = {					\
+		.name = #_name,				\
+		.of_match = of_match_ptr(match),	\
+		.regulators_node = of_match_ptr("regulators"),	\
+		.ops = &mt6359_volt_fixed_ops,		\
+		.type = REGULATOR_VOLTAGE,		\
+		.id = MT6359_ID_##_name,		\
+		.owner = THIS_MODULE,			\
+		.n_voltages = 1,			\
+		.enable_reg = _enable_reg,		\
+		.enable_mask = BIT(0),			\
+		.fixed_uV = (_fixed_volt),		\
+	},						\
+	.status_reg = _status_reg,			\
+	.qi = BIT(0),					\
+}
+
+static const struct linear_range mt_volt_range1[] = {
+	REGULATOR_LINEAR_RANGE(800000, 0, 0x70, 12500),
+};
+
+static const struct linear_range mt_volt_range2[] = {
+	REGULATOR_LINEAR_RANGE(400000, 0, 0x7f, 6250),
+};
+
+static const struct linear_range mt_volt_range3[] = {
+	REGULATOR_LINEAR_RANGE(400000, 0, 0x70, 6250),
+};
+
+static const struct linear_range mt_volt_range4[] = {
+	REGULATOR_LINEAR_RANGE(800000, 0, 0x40, 12500),
+};
+
+static const struct linear_range mt_volt_range5[] = {
+	REGULATOR_LINEAR_RANGE(500000, 0, 0x3F, 50000),
+};
+
+static const struct linear_range mt_volt_range6[] = {
+	REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250),
+};
+
+static const struct linear_range mt_volt_range7[] = {
+	REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250),
+};
+
+static const u32 vsim1_voltages[] = {
+	0, 0, 0, 1700000, 1800000, 0, 0, 0, 2700000, 0, 0, 3000000, 3100000,
+};
+
+static const u32 vibr_voltages[] = {
+	1200000, 1300000, 1500000, 0, 1800000, 2000000, 0, 0, 2700000, 2800000,
+	0, 3000000, 0, 3300000,
+};
+
+static const u32 vrf12_voltages[] = {
+	0, 0, 1100000, 1200000,	1300000,
+};
+
+static const u32 volt18_voltages[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1700000, 1800000, 1900000,
+};
+
+static const u32 vcn13_voltages[] = {
+	900000, 1000000, 0, 1200000, 1300000,
+};
+
+static const u32 vcn33_voltages[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 2800000, 0, 0, 0, 3300000, 3400000, 3500000,
+};
+
+static const u32 vefuse_voltages[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1700000, 1800000, 1900000, 2000000,
+};
+
+static const u32 vxo22_voltages[] = {
+	1800000, 0, 0, 0, 2200000,
+};
+
+static const u32 vrfck_voltages[] = {
+	0, 0, 1500000, 0, 0, 0, 0, 1600000, 0, 0, 0, 0, 1700000,
+};
+
+static const u32 vio28_voltages[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 2800000, 2900000, 3000000, 3100000, 3300000,
+};
+
+static const u32 vemc_voltages[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2900000, 3000000, 0, 3300000,
+};
+
+static const u32 va12_voltages[] = {
+	0, 0, 0, 0, 0, 0, 1200000, 1300000,
+};
+
+static const u32 va09_voltages[] = {
+	0, 0, 800000, 900000, 0, 0, 1200000,
+};
+
+static const u32 vrf18_voltages[] = {
+	0, 0, 0, 0, 0, 1700000, 1800000, 1810000,
+};
+
+static const u32 vbbck_voltages[] = {
+	0, 0, 0, 0, 1100000, 0, 0, 0, 1150000, 0, 0, 0, 1200000,
+};
+
+static const u32 vsim2_voltages[] = {
+	0, 0, 0, 1700000, 1800000, 0, 0, 0, 2700000, 0, 0, 3000000, 3100000,
+};
+
+static inline unsigned int mt6359_map_mode(unsigned int mode)
+{
+	switch (mode) {
+	case MT6359_BUCK_MODE_NORMAL:
+		return REGULATOR_MODE_NORMAL;
+	case MT6359_BUCK_MODE_FORCE_PWM:
+		return REGULATOR_MODE_FAST;
+	case MT6359_BUCK_MODE_LP:
+		return REGULATOR_MODE_IDLE;
+	default:
+		return REGULATOR_MODE_INVALID;
+	}
+}
+
+static int mt6359_get_status(struct regulator_dev *rdev)
+{
+	int ret;
+	u32 regval;
+	struct mt6359_regulator_info *info = rdev_get_drvdata(rdev);
+
+	ret = regmap_read(rdev->regmap, info->status_reg, &regval);
+	if (ret != 0) {
+		dev_err(&rdev->dev, "Failed to get enable reg: %d\n", ret);
+		return ret;
+	}
+
+	if (regval & info->qi)
+		return REGULATOR_STATUS_ON;
+	else
+		return REGULATOR_STATUS_OFF;
+}
+
+static unsigned int mt6359_regulator_get_mode(struct regulator_dev *rdev)
+{
+	struct mt6359_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret, regval;
+
+	ret = regmap_read(rdev->regmap, info->modeset_reg, &regval);
+	if (ret != 0) {
+		dev_err(&rdev->dev,
+			"Failed to get mt6359 buck mode: %d\n", ret);
+		return ret;
+	}
+
+	if ((regval & info->modeset_mask) >> info->modeset_shift ==
+		MT6359_BUCK_MODE_FORCE_PWM)
+		return REGULATOR_MODE_FAST;
+
+	ret = regmap_read(rdev->regmap, info->lp_mode_reg, &regval);
+	if (ret != 0) {
+		dev_err(&rdev->dev,
+			"Failed to get mt6359 buck lp mode: %d\n", ret);
+		return ret;
+	}
+
+	if (regval & info->lp_mode_mask)
+		return REGULATOR_MODE_IDLE;
+	else
+		return REGULATOR_MODE_NORMAL;
+}
+
+static int mt6359_regulator_set_mode(struct regulator_dev *rdev,
+				     unsigned int mode)
+{
+	struct mt6359_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret = 0, val;
+	int curr_mode;
+
+	curr_mode = mt6359_regulator_get_mode(rdev);
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		val = MT6359_BUCK_MODE_FORCE_PWM;
+		val <<= info->modeset_shift;
+		ret = regmap_update_bits(rdev->regmap,
+					 info->modeset_reg,
+					 info->modeset_mask,
+					 val);
+		break;
+	case REGULATOR_MODE_NORMAL:
+		if (curr_mode == REGULATOR_MODE_FAST) {
+			val = MT6359_BUCK_MODE_AUTO;
+			val <<= info->modeset_shift;
+			ret = regmap_update_bits(rdev->regmap,
+						 info->modeset_reg,
+						 info->modeset_mask,
+						 val);
+		} else if (curr_mode == REGULATOR_MODE_IDLE) {
+			val = MT6359_BUCK_MODE_NORMAL;
+			val <<= info->lp_mode_shift;
+			ret = regmap_update_bits(rdev->regmap,
+						 info->lp_mode_reg,
+						 info->lp_mode_mask,
+						 val);
+			udelay(100);
+		}
+		break;
+	case REGULATOR_MODE_IDLE:
+		val = MT6359_BUCK_MODE_LP >> 1;
+		val <<= info->lp_mode_shift;
+		ret = regmap_update_bits(rdev->regmap,
+					 info->lp_mode_reg,
+					 info->lp_mode_mask,
+					 val);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ret != 0) {
+		dev_err(&rdev->dev,
+			"Failed to set mt6359 buck mode: %d\n", ret);
+	}
+
+	return ret;
+}
+
+static const struct regulator_ops mt6359_volt_range_ops = {
+	.list_voltage = regulator_list_voltage_linear_range,
+	.map_voltage = regulator_map_voltage_linear_range,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6359_get_status,
+	.set_mode = mt6359_regulator_set_mode,
+	.get_mode = mt6359_regulator_get_mode,
+};
+
+static const struct regulator_ops mt6359_volt_table_ops = {
+	.list_voltage = regulator_list_voltage_table,
+	.map_voltage = regulator_map_voltage_iterate,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6359_get_status,
+};
+
+static const struct regulator_ops mt6359_volt_fixed_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6359_get_status,
+};
+
+/* The array is indexed by id(MT6359_ID_XXX) */
+static struct mt6359_regulator_info mt6359_regulators[] = {
+	MT6359_BUCK("buck_vs1", VS1, 800000, 2200000, 12500, 0,
+		    mt_volt_range1, MT6359_RG_BUCK_VS1_EN_ADDR,
+		    MT6359_DA_VS1_EN_ADDR, MT6359_RG_BUCK_VS1_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VS1_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VS1_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VS1_LP_ADDR, MT6359_RG_BUCK_VS1_LP_SHIFT,
+		    MT6359_RG_VS1_FPWM_ADDR, MT6359_RG_VS1_FPWM_SHIFT),
+	MT6359_BUCK("buck_vgpu11", VGPU11, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VGPU11_EN_ADDR,
+		    MT6359_DA_VGPU11_EN_ADDR, MT6359_RG_BUCK_VGPU11_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VGPU11_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VGPU11_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VGPU11_LP_ADDR,
+		    MT6359_RG_BUCK_VGPU11_LP_SHIFT,
+		    MT6359_RG_VGPU11_FCCM_ADDR, MT6359_RG_VGPU11_FCCM_SHIFT),
+	MT6359_BUCK("buck_vmodem", VMODEM, 400000, 1100000, 6250, 0,
+		    mt_volt_range3, MT6359_RG_BUCK_VMODEM_EN_ADDR,
+		    MT6359_DA_VMODEM_EN_ADDR, MT6359_RG_BUCK_VMODEM_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VMODEM_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VMODEM_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VMODEM_LP_ADDR,
+		    MT6359_RG_BUCK_VMODEM_LP_SHIFT,
+		    MT6359_RG_VMODEM_FCCM_ADDR, MT6359_RG_VMODEM_FCCM_SHIFT),
+	MT6359_BUCK("buck_vpu", VPU, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VPU_EN_ADDR,
+		    MT6359_DA_VPU_EN_ADDR, MT6359_RG_BUCK_VPU_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VPU_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VPU_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VPU_LP_ADDR, MT6359_RG_BUCK_VPU_LP_SHIFT,
+		    MT6359_RG_VPU_FCCM_ADDR, MT6359_RG_VPU_FCCM_SHIFT),
+	MT6359_BUCK("buck_vcore", VCORE, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VCORE_EN_ADDR,
+		    MT6359_DA_VCORE_EN_ADDR, MT6359_RG_BUCK_VCORE_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VCORE_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VCORE_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VCORE_LP_ADDR, MT6359_RG_BUCK_VCORE_LP_SHIFT,
+		    MT6359_RG_VCORE_FCCM_ADDR, MT6359_RG_VCORE_FCCM_SHIFT),
+	MT6359_BUCK("buck_vs2", VS2, 800000, 1600000, 12500, 0,
+		    mt_volt_range4, MT6359_RG_BUCK_VS2_EN_ADDR,
+		    MT6359_DA_VS2_EN_ADDR, MT6359_RG_BUCK_VS2_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VS2_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VS2_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VS2_LP_ADDR, MT6359_RG_BUCK_VS2_LP_SHIFT,
+		    MT6359_RG_VS2_FPWM_ADDR, MT6359_RG_VS2_FPWM_SHIFT),
+	MT6359_BUCK("buck_vpa", VPA, 500000, 3650000, 50000, 0,
+		    mt_volt_range5, MT6359_RG_BUCK_VPA_EN_ADDR,
+		    MT6359_DA_VPA_EN_ADDR, MT6359_RG_BUCK_VPA_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VPA_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VPA_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VPA_LP_ADDR, MT6359_RG_BUCK_VPA_LP_SHIFT,
+		    MT6359_RG_VPA_MODESET_ADDR, MT6359_RG_VPA_MODESET_SHIFT),
+	MT6359_BUCK("buck_vproc2", VPROC2, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VPROC2_EN_ADDR,
+		    MT6359_DA_VPROC2_EN_ADDR, MT6359_RG_BUCK_VPROC2_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VPROC2_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VPROC2_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VPROC2_LP_ADDR,
+		    MT6359_RG_BUCK_VPROC2_LP_SHIFT,
+		    MT6359_RG_VPROC2_FCCM_ADDR, MT6359_RG_VPROC2_FCCM_SHIFT),
+	MT6359_BUCK("buck_vproc1", VPROC1, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VPROC1_EN_ADDR,
+		    MT6359_DA_VPROC1_EN_ADDR, MT6359_RG_BUCK_VPROC1_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VPROC1_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VPROC1_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VPROC1_LP_ADDR,
+		    MT6359_RG_BUCK_VPROC1_LP_SHIFT,
+		    MT6359_RG_VPROC1_FCCM_ADDR, MT6359_RG_VPROC1_FCCM_SHIFT),
+	MT6359_BUCK("buck_vcore_sshub", VCORE_SSHUB, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VCORE_SSHUB_EN_ADDR,
+		    MT6359_DA_VCORE_EN_ADDR,
+		    MT6359_RG_BUCK_VCORE_SSHUB_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VCORE_SSHUB_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VCORE_SSHUB_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VCORE_LP_ADDR, MT6359_RG_BUCK_VCORE_LP_SHIFT,
+		    MT6359_RG_VCORE_FCCM_ADDR, MT6359_RG_VCORE_FCCM_SHIFT),
+	MT6359_REG_FIXED("ldo_vaud18", VAUD18, MT6359_RG_LDO_VAUD18_EN_ADDR,
+			 MT6359_DA_VAUD18_B_EN_ADDR, 1800000),
+	MT6359_LDO("ldo_vsim1", VSIM1, vsim1_voltages,
+		   MT6359_RG_LDO_VSIM1_EN_ADDR, MT6359_RG_LDO_VSIM1_EN_SHIFT,
+		   MT6359_DA_VSIM1_B_EN_ADDR, MT6359_RG_VSIM1_VOSEL_ADDR,
+		   MT6359_RG_VSIM1_VOSEL_MASK << MT6359_RG_VSIM1_VOSEL_SHIFT,
+		   480),
+	MT6359_LDO("ldo_vibr", VIBR, vibr_voltages,
+		   MT6359_RG_LDO_VIBR_EN_ADDR, MT6359_RG_LDO_VIBR_EN_SHIFT,
+		   MT6359_DA_VIBR_B_EN_ADDR, MT6359_RG_VIBR_VOSEL_ADDR,
+		   MT6359_RG_VIBR_VOSEL_MASK << MT6359_RG_VIBR_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_vrf12", VRF12, vrf12_voltages,
+		   MT6359_RG_LDO_VRF12_EN_ADDR, MT6359_RG_LDO_VRF12_EN_SHIFT,
+		   MT6359_DA_VRF12_B_EN_ADDR, MT6359_RG_VRF12_VOSEL_ADDR,
+		   MT6359_RG_VRF12_VOSEL_MASK << MT6359_RG_VRF12_VOSEL_SHIFT,
+		   120),
+	MT6359_REG_FIXED("ldo_vusb", VUSB, MT6359_RG_LDO_VUSB_EN_0_ADDR,
+			 MT6359_DA_VUSB_B_EN_ADDR, 3000000),
+	MT6359_LDO_LINEAR("ldo_vsram_proc2", VSRAM_PROC2, 500000, 1293750, 6250,
+			  0, mt_volt_range6, MT6359_RG_LDO_VSRAM_PROC2_EN_ADDR,
+			  MT6359_DA_VSRAM_PROC2_B_EN_ADDR,
+			  MT6359_RG_LDO_VSRAM_PROC2_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_PROC2_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_PROC2_VOSEL_SHIFT),
+	MT6359_LDO("ldo_vio18", VIO18, volt18_voltages,
+		   MT6359_RG_LDO_VIO18_EN_ADDR, MT6359_RG_LDO_VIO18_EN_SHIFT,
+		   MT6359_DA_VIO18_B_EN_ADDR, MT6359_RG_VIO18_VOSEL_ADDR,
+		   MT6359_RG_VIO18_VOSEL_MASK << MT6359_RG_VIO18_VOSEL_SHIFT,
+		   960),
+	MT6359_LDO("ldo_vcamio", VCAMIO, volt18_voltages,
+		   MT6359_RG_LDO_VCAMIO_EN_ADDR, MT6359_RG_LDO_VCAMIO_EN_SHIFT,
+		   MT6359_DA_VCAMIO_B_EN_ADDR, MT6359_RG_VCAMIO_VOSEL_ADDR,
+		   MT6359_RG_VCAMIO_VOSEL_MASK << MT6359_RG_VCAMIO_VOSEL_SHIFT,
+		   1290),
+	MT6359_REG_FIXED("ldo_vcn18", VCN18, MT6359_RG_LDO_VCN18_EN_ADDR,
+			 MT6359_DA_VCN18_B_EN_ADDR, 1800000),
+	MT6359_REG_FIXED("ldo_vfe28", VFE28, MT6359_RG_LDO_VFE28_EN_ADDR,
+			 MT6359_DA_VFE28_B_EN_ADDR, 2800000),
+	MT6359_LDO("ldo_vcn13", VCN13, vcn13_voltages,
+		   MT6359_RG_LDO_VCN13_EN_ADDR, MT6359_RG_LDO_VCN13_EN_SHIFT,
+		   MT6359_DA_VCN13_B_EN_ADDR, MT6359_RG_VCN13_VOSEL_ADDR,
+		   MT6359_RG_VCN13_VOSEL_MASK << MT6359_RG_VCN13_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_vcn33_1_bt", VCN33_1_BT, vcn33_voltages,
+		   MT6359_RG_LDO_VCN33_1_EN_0_ADDR,
+		   MT6359_RG_LDO_VCN33_1_EN_0_SHIFT,
+		   MT6359_DA_VCN33_1_B_EN_ADDR, MT6359_RG_VCN33_1_VOSEL_ADDR,
+		   MT6359_RG_VCN33_1_VOSEL_MASK <<
+		   MT6359_RG_VCN33_1_VOSEL_SHIFT, 240),
+	MT6359_LDO("ldo_vcn33_1_wifi", VCN33_1_WIFI, vcn33_voltages,
+		   MT6359_RG_LDO_VCN33_1_EN_1_ADDR,
+		   MT6359_RG_LDO_VCN33_1_EN_1_SHIFT,
+		   MT6359_DA_VCN33_1_B_EN_ADDR, MT6359_RG_VCN33_1_VOSEL_ADDR,
+		   MT6359_RG_VCN33_1_VOSEL_MASK <<
+		   MT6359_RG_VCN33_1_VOSEL_SHIFT, 240),
+	MT6359_REG_FIXED("ldo_vaux18", VAUX18, MT6359_RG_LDO_VAUX18_EN_ADDR,
+			 MT6359_DA_VAUX18_B_EN_ADDR, 1800000),
+	MT6359_LDO_LINEAR("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750,
+			  6250, 0, mt_volt_range6,
+			  MT6359_RG_LDO_VSRAM_OTHERS_EN_ADDR,
+			  MT6359_DA_VSRAM_OTHERS_B_EN_ADDR,
+			  MT6359_RG_LDO_VSRAM_OTHERS_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_OTHERS_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_OTHERS_VOSEL_SHIFT),
+	MT6359_LDO("ldo_vefuse", VEFUSE, vefuse_voltages,
+		   MT6359_RG_LDO_VEFUSE_EN_ADDR, MT6359_RG_LDO_VEFUSE_EN_SHIFT,
+		   MT6359_DA_VEFUSE_B_EN_ADDR, MT6359_RG_VEFUSE_VOSEL_ADDR,
+		   MT6359_RG_VEFUSE_VOSEL_MASK << MT6359_RG_VEFUSE_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_vxo22", VXO22, vxo22_voltages,
+		   MT6359_RG_LDO_VXO22_EN_ADDR, MT6359_RG_LDO_VXO22_EN_SHIFT,
+		   MT6359_DA_VXO22_B_EN_ADDR, MT6359_RG_VXO22_VOSEL_ADDR,
+		   MT6359_RG_VXO22_VOSEL_MASK << MT6359_RG_VXO22_VOSEL_SHIFT,
+		   120),
+	MT6359_LDO("ldo_vrfck", VRFCK, vrfck_voltages,
+		   MT6359_RG_LDO_VRFCK_EN_ADDR, MT6359_RG_LDO_VRFCK_EN_SHIFT,
+		   MT6359_DA_VRFCK_B_EN_ADDR, MT6359_RG_VRFCK_VOSEL_ADDR,
+		   MT6359_RG_VRFCK_VOSEL_MASK << MT6359_RG_VRFCK_VOSEL_SHIFT,
+		   480),
+	MT6359_REG_FIXED("ldo_vbif28", VBIF28, MT6359_RG_LDO_VBIF28_EN_ADDR,
+			 MT6359_DA_VBIF28_B_EN_ADDR, 2800000),
+	MT6359_LDO("ldo_vio28", VIO28, vio28_voltages,
+		   MT6359_RG_LDO_VIO28_EN_ADDR, MT6359_RG_LDO_VIO28_EN_SHIFT,
+		   MT6359_DA_VIO28_B_EN_ADDR, MT6359_RG_VIO28_VOSEL_ADDR,
+		   MT6359_RG_VIO28_VOSEL_MASK << MT6359_RG_VIO28_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_vemc", VEMC, vemc_voltages,
+		   MT6359_RG_LDO_VEMC_EN_ADDR, MT6359_RG_LDO_VEMC_EN_SHIFT,
+		   MT6359_DA_VEMC_B_EN_ADDR, MT6359_RG_VEMC_VOSEL_ADDR,
+		   MT6359_RG_VEMC_VOSEL_MASK << MT6359_RG_VEMC_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_vcn33_2_bt", VCN33_2_BT, vcn33_voltages,
+		   MT6359_RG_LDO_VCN33_2_EN_0_ADDR,
+		   MT6359_RG_LDO_VCN33_2_EN_0_SHIFT,
+		   MT6359_DA_VCN33_2_B_EN_ADDR, MT6359_RG_VCN33_2_VOSEL_ADDR,
+		   MT6359_RG_VCN33_2_VOSEL_MASK <<
+		   MT6359_RG_VCN33_2_VOSEL_SHIFT, 240),
+	MT6359_LDO("ldo_vcn33_2_wifi", VCN33_2_WIFI, vcn33_voltages,
+		   MT6359_RG_LDO_VCN33_2_EN_1_ADDR,
+		   MT6359_RG_LDO_VCN33_2_EN_1_SHIFT,
+		   MT6359_DA_VCN33_2_B_EN_ADDR, MT6359_RG_VCN33_2_VOSEL_ADDR,
+		   MT6359_RG_VCN33_2_VOSEL_MASK <<
+		   MT6359_RG_VCN33_2_VOSEL_SHIFT, 240),
+	MT6359_LDO("ldo_va12", VA12, va12_voltages,
+		   MT6359_RG_LDO_VA12_EN_ADDR, MT6359_RG_LDO_VA12_EN_SHIFT,
+		   MT6359_DA_VA12_B_EN_ADDR, MT6359_RG_VA12_VOSEL_ADDR,
+		   MT6359_RG_VA12_VOSEL_MASK << MT6359_RG_VA12_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_va09", VA09, va09_voltages,
+		   MT6359_RG_LDO_VA09_EN_ADDR, MT6359_RG_LDO_VA09_EN_SHIFT,
+		   MT6359_DA_VA09_B_EN_ADDR, MT6359_RG_VA09_VOSEL_ADDR,
+		   MT6359_RG_VA09_VOSEL_MASK << MT6359_RG_VA09_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_vrf18", VRF18, vrf18_voltages,
+		   MT6359_RG_LDO_VRF18_EN_ADDR, MT6359_RG_LDO_VRF18_EN_SHIFT,
+		   MT6359_DA_VRF18_B_EN_ADDR, MT6359_RG_VRF18_VOSEL_ADDR,
+		   MT6359_RG_VRF18_VOSEL_MASK << MT6359_RG_VRF18_VOSEL_SHIFT,
+		   120),
+	MT6359_LDO_LINEAR("ldo_vsram_md", VSRAM_MD, 500000, 1100000, 6250,
+			  0, mt_volt_range7, MT6359_RG_LDO_VSRAM_MD_EN_ADDR,
+			  MT6359_DA_VSRAM_MD_B_EN_ADDR,
+			  MT6359_RG_LDO_VSRAM_MD_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_MD_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_MD_VOSEL_SHIFT),
+	MT6359_LDO("ldo_vufs", VUFS, volt18_voltages,
+		   MT6359_RG_LDO_VUFS_EN_ADDR, MT6359_RG_LDO_VUFS_EN_SHIFT,
+		   MT6359_DA_VUFS_B_EN_ADDR, MT6359_RG_VUFS_VOSEL_ADDR,
+		   MT6359_RG_VUFS_VOSEL_MASK << MT6359_RG_VUFS_VOSEL_SHIFT,
+		   1920),
+	MT6359_LDO("ldo_vm18", VM18, volt18_voltages,
+		   MT6359_RG_LDO_VM18_EN_ADDR, MT6359_RG_LDO_VM18_EN_SHIFT,
+		   MT6359_DA_VM18_B_EN_ADDR, MT6359_RG_VM18_VOSEL_ADDR,
+		   MT6359_RG_VM18_VOSEL_MASK << MT6359_RG_VM18_VOSEL_SHIFT,
+		   1920),
+	MT6359_LDO("ldo_vbbck", VBBCK, vbbck_voltages,
+		   MT6359_RG_LDO_VBBCK_EN_ADDR, MT6359_RG_LDO_VBBCK_EN_SHIFT,
+		   MT6359_DA_VBBCK_B_EN_ADDR, MT6359_RG_VBBCK_VOSEL_ADDR,
+		   MT6359_RG_VBBCK_VOSEL_MASK << MT6359_RG_VBBCK_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO_LINEAR("ldo_vsram_proc1", VSRAM_PROC1, 500000, 1293750, 6250,
+			  0, mt_volt_range6, MT6359_RG_LDO_VSRAM_PROC1_EN_ADDR,
+			  MT6359_DA_VSRAM_PROC1_B_EN_ADDR,
+			  MT6359_RG_LDO_VSRAM_PROC1_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_PROC1_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_PROC1_VOSEL_SHIFT),
+	MT6359_LDO("ldo_vsim2", VSIM2, vsim2_voltages,
+		   MT6359_RG_LDO_VSIM2_EN_ADDR, MT6359_RG_LDO_VSIM2_EN_SHIFT,
+		   MT6359_DA_VSIM2_B_EN_ADDR, MT6359_RG_VSIM2_VOSEL_ADDR,
+		   MT6359_RG_VSIM2_VOSEL_MASK << MT6359_RG_VSIM2_VOSEL_SHIFT,
+		   480),
+	MT6359_LDO_LINEAR("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB,
+			  500000, 1293750, 6250, 0, mt_volt_range6,
+			  MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_EN_ADDR,
+			  MT6359_DA_VSRAM_OTHERS_B_EN_ADDR,
+			  MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_SHIFT),
+};
+
+static int mt6359_regulator_probe(struct platform_device *pdev)
+{
+	struct mt6397_chip *mt6397 = dev_get_drvdata(pdev->dev.parent);
+	struct regulator_config config = {};
+	struct regulator_dev *rdev;
+	int i;
+
+	config.dev = mt6397->dev;
+	config.regmap = mt6397->regmap;
+	for (i = 0; i < MT6359_MAX_REGULATOR; i++) {
+		config.driver_data = &mt6359_regulators[i];
+		rdev = devm_regulator_register(&pdev->dev, &mt6359_regulators[i].desc, &config);
+		if (IS_ERR(rdev)) {
+			dev_err(&pdev->dev, "failed to register %s\n",
+				mt6359_regulators[i].desc.name);
+			return PTR_ERR(rdev);
+		}
+	}
+
+	return 0;
+}
+
+static const struct platform_device_id mt6359_platform_ids[] = {
+	{"mt6359-regulator", 0},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(platform, mt6359_platform_ids);
+
+static struct platform_driver mt6359_regulator_driver = {
+	.driver = {
+		.name = "mt6359-regulator",
+	},
+	.probe = mt6359_regulator_probe,
+	.id_table = mt6359_platform_ids,
+};
+
+module_platform_driver(mt6359_regulator_driver);
+
+MODULE_AUTHOR("Wen Su <wen.su@mediatek.com>");
+MODULE_DESCRIPTION("Regulator Driver for MediaTek MT6359 PMIC");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/regulator/mt6359-regulator.h b/include/linux/regulator/mt6359-regulator.h
new file mode 100644
index 0000000000000..14c4b715613ec
--- /dev/null
+++ b/include/linux/regulator/mt6359-regulator.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ */
+
+#ifndef __LINUX_REGULATOR_MT6359_H
+#define __LINUX_REGULATOR_MT6359_H
+
+enum {
+	MT6359_ID_VS1 = 0,
+	MT6359_ID_VGPU11,
+	MT6359_ID_VMODEM,
+	MT6359_ID_VPU,
+	MT6359_ID_VCORE,
+	MT6359_ID_VS2,
+	MT6359_ID_VPA,
+	MT6359_ID_VPROC2,
+	MT6359_ID_VPROC1,
+	MT6359_ID_VCORE_SSHUB,
+	MT6359_ID_VAUD18 = 10,
+	MT6359_ID_VSIM1,
+	MT6359_ID_VIBR,
+	MT6359_ID_VRF12,
+	MT6359_ID_VUSB,
+	MT6359_ID_VSRAM_PROC2,
+	MT6359_ID_VIO18,
+	MT6359_ID_VCAMIO,
+	MT6359_ID_VCN18,
+	MT6359_ID_VFE28,
+	MT6359_ID_VCN13,
+	MT6359_ID_VCN33_1_BT,
+	MT6359_ID_VCN33_1_WIFI,
+	MT6359_ID_VAUX18,
+	MT6359_ID_VSRAM_OTHERS,
+	MT6359_ID_VEFUSE,
+	MT6359_ID_VXO22,
+	MT6359_ID_VRFCK,
+	MT6359_ID_VBIF28,
+	MT6359_ID_VIO28,
+	MT6359_ID_VEMC,
+	MT6359_ID_VCN33_2_BT,
+	MT6359_ID_VCN33_2_WIFI,
+	MT6359_ID_VA12,
+	MT6359_ID_VA09,
+	MT6359_ID_VRF18,
+	MT6359_ID_VSRAM_MD,
+	MT6359_ID_VUFS,
+	MT6359_ID_VM18,
+	MT6359_ID_VBBCK,
+	MT6359_ID_VSRAM_PROC1,
+	MT6359_ID_VSIM2,
+	MT6359_ID_VSRAM_OTHERS_SSHUB,
+	MT6359_ID_RG_MAX,
+};
+
+#define MT6359_MAX_REGULATOR	MT6359_ID_RG_MAX
+
+#endif /* __LINUX_REGULATOR_MT6359_H */
-- 
GitLab


From 4cfc965475124c4eed2b7b5d8b6fc5048a21ecfd Mon Sep 17 00:00:00 2001
From: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Date: Wed, 26 May 2021 14:52:06 +0800
Subject: [PATCH 1937/3804] regulator: mt6359: Add support for MT6359P
 regulator

The MT6359P is a eco version for MT6359 regulator.
We add support based on MT6359 regulator driver.

Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/regulator/mt6359-regulator.c       | 379 ++++++++++++++++++++-
 include/linux/mfd/mt6359p/registers.h      | 249 ++++++++++++++
 include/linux/regulator/mt6359-regulator.h |   1 +
 3 files changed, 623 insertions(+), 6 deletions(-)
 create mode 100644 include/linux/mfd/mt6359p/registers.h

diff --git a/drivers/regulator/mt6359-regulator.c b/drivers/regulator/mt6359-regulator.c
index 994d3f67f73dc..4f517c9fd6c4a 100644
--- a/drivers/regulator/mt6359-regulator.c
+++ b/drivers/regulator/mt6359-regulator.c
@@ -4,6 +4,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/mfd/mt6359/registers.h>
+#include <linux/mfd/mt6359p/registers.h>
 #include <linux/mfd/mt6397/core.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -147,6 +148,29 @@ struct mt6359_regulator_info {
 	.qi = BIT(0),					\
 }
 
+#define MT6359P_LDO1(match, _name, _ops, _volt_table,	\
+	_enable_reg, _enable_mask, _status_reg,		\
+	_vsel_reg, _vsel_mask)				\
+[MT6359_ID_##_name] = {					\
+	.desc = {					\
+		.name = #_name,				\
+		.of_match = of_match_ptr(match),	\
+		.regulators_node = of_match_ptr("regulators"),	\
+		.ops = &_ops,				\
+		.type = REGULATOR_VOLTAGE,		\
+		.id = MT6359_ID_##_name,		\
+		.owner = THIS_MODULE,			\
+		.n_voltages = ARRAY_SIZE(_volt_table),	\
+		.volt_table = _volt_table,		\
+		.vsel_reg = _vsel_reg,			\
+		.vsel_mask = _vsel_mask,		\
+		.enable_reg = _enable_reg,		\
+		.enable_mask = BIT(_enable_mask),	\
+	},						\
+	.status_reg = _status_reg,			\
+	.qi = BIT(0),					\
+}
+
 static const struct linear_range mt_volt_range1[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0, 0x70, 12500),
 };
@@ -175,6 +199,10 @@ static const struct linear_range mt_volt_range7[] = {
 	REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250),
 };
 
+static const struct linear_range mt_volt_range8[] = {
+	REGULATOR_LINEAR_RANGE(506250, 0, 0x7f, 6250),
+};
+
 static const u32 vsim1_voltages[] = {
 	0, 0, 0, 1700000, 1800000, 0, 0, 0, 2700000, 0, 0, 3000000, 3100000,
 };
@@ -212,6 +240,10 @@ static const u32 vrfck_voltages[] = {
 	0, 0, 1500000, 0, 0, 0, 0, 1600000, 0, 0, 0, 0, 1700000,
 };
 
+static const u32 vrfck_voltages_1[] = {
+	1240000, 1600000,
+};
+
 static const u32 vio28_voltages[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 2800000, 2900000, 3000000, 3100000, 3300000,
 };
@@ -220,6 +252,11 @@ static const u32 vemc_voltages[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2900000, 3000000, 0, 3300000,
 };
 
+static const u32 vemc_voltages_1[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 2500000, 2800000, 2900000, 3000000, 3100000,
+	3300000,
+};
+
 static const u32 va12_voltages[] = {
 	0, 0, 0, 0, 0, 0, 1200000, 1300000,
 };
@@ -356,6 +393,78 @@ static int mt6359_regulator_set_mode(struct regulator_dev *rdev,
 	return ret;
 }
 
+static int mt6359p_vemc_set_voltage_sel(struct regulator_dev *rdev,
+					u32 sel)
+{
+	struct mt6359_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+	u32 val = 0;
+
+	sel <<= ffs(info->desc.vsel_mask) - 1;
+	ret = regmap_write(rdev->regmap, MT6359P_TMA_KEY_ADDR, TMA_KEY);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(rdev->regmap, MT6359P_VM_MODE_ADDR, &val);
+	if (ret)
+		return ret;
+
+	switch (val) {
+	case 0:
+		/* If HW trapping is 0, use VEMC_VOSEL_0 */
+		ret = regmap_update_bits(rdev->regmap,
+					 info->desc.vsel_reg,
+					 info->desc.vsel_mask, sel);
+		break;
+	case 1:
+		/* If HW trapping is 1, use VEMC_VOSEL_1 */
+		ret = regmap_update_bits(rdev->regmap,
+					 info->desc.vsel_reg + 0x2,
+					 info->desc.vsel_mask, sel);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	ret = regmap_write(rdev->regmap, MT6359P_TMA_KEY_ADDR, 0);
+	return ret;
+}
+
+static int mt6359p_vemc_get_voltage_sel(struct regulator_dev *rdev)
+{
+	struct mt6359_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret;
+	u32 val = 0;
+
+	ret = regmap_read(rdev->regmap, MT6359P_VM_MODE_ADDR, &val);
+	if (ret)
+		return ret;
+	switch (val) {
+	case 0:
+		/* If HW trapping is 0, use VEMC_VOSEL_0 */
+		ret = regmap_read(rdev->regmap,
+				  info->desc.vsel_reg, &val);
+		break;
+	case 1:
+		/* If HW trapping is 1, use VEMC_VOSEL_1 */
+		ret = regmap_read(rdev->regmap,
+				  info->desc.vsel_reg + 0x2, &val);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (ret)
+		return ret;
+
+	val &= info->desc.vsel_mask;
+	val >>= ffs(info->desc.vsel_mask) - 1;
+
+	return val;
+}
+
 static const struct regulator_ops mt6359_volt_range_ops = {
 	.list_voltage = regulator_list_voltage_linear_range,
 	.map_voltage = regulator_map_voltage_linear_range,
@@ -389,6 +498,18 @@ static const struct regulator_ops mt6359_volt_fixed_ops = {
 	.get_status = mt6359_get_status,
 };
 
+static const struct regulator_ops mt6359p_vemc_ops = {
+	.list_voltage = regulator_list_voltage_table,
+	.map_voltage = regulator_map_voltage_iterate,
+	.set_voltage_sel = mt6359p_vemc_set_voltage_sel,
+	.get_voltage_sel = mt6359p_vemc_get_voltage_sel,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6359_get_status,
+};
+
 /* The array is indexed by id(MT6359_ID_XXX) */
 static struct mt6359_regulator_info mt6359_regulators[] = {
 	MT6359_BUCK("buck_vs1", VS1, 800000, 2200000, 12500, 0,
@@ -626,21 +747,267 @@ static struct mt6359_regulator_info mt6359_regulators[] = {
 			  MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_SHIFT),
 };
 
+static struct mt6359_regulator_info mt6359p_regulators[] = {
+	MT6359_BUCK("buck_vs1", VS1, 800000, 2200000, 12500, 0,
+		    mt_volt_range1, MT6359_RG_BUCK_VS1_EN_ADDR,
+		    MT6359_DA_VS1_EN_ADDR, MT6359_RG_BUCK_VS1_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VS1_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VS1_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VS1_LP_ADDR, MT6359_RG_BUCK_VS1_LP_SHIFT,
+		    MT6359_RG_VS1_FPWM_ADDR, MT6359_RG_VS1_FPWM_SHIFT),
+	MT6359_BUCK("buck_vgpu11", VGPU11, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VGPU11_EN_ADDR,
+		    MT6359_DA_VGPU11_EN_ADDR, MT6359P_RG_BUCK_VGPU11_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VGPU11_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VGPU11_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VGPU11_LP_ADDR,
+		    MT6359_RG_BUCK_VGPU11_LP_SHIFT,
+		    MT6359_RG_VGPU11_FCCM_ADDR, MT6359_RG_VGPU11_FCCM_SHIFT),
+	MT6359_BUCK("buck_vmodem", VMODEM, 400000, 1100000, 6250, 0,
+		    mt_volt_range3, MT6359_RG_BUCK_VMODEM_EN_ADDR,
+		    MT6359_DA_VMODEM_EN_ADDR, MT6359_RG_BUCK_VMODEM_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VMODEM_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VMODEM_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VMODEM_LP_ADDR,
+		    MT6359_RG_BUCK_VMODEM_LP_SHIFT,
+		    MT6359_RG_VMODEM_FCCM_ADDR, MT6359_RG_VMODEM_FCCM_SHIFT),
+	MT6359_BUCK("buck_vpu", VPU, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VPU_EN_ADDR,
+		    MT6359_DA_VPU_EN_ADDR, MT6359_RG_BUCK_VPU_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VPU_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VPU_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VPU_LP_ADDR, MT6359_RG_BUCK_VPU_LP_SHIFT,
+		    MT6359_RG_VPU_FCCM_ADDR, MT6359_RG_VPU_FCCM_SHIFT),
+	MT6359_BUCK("buck_vcore", VCORE, 506250, 1300000, 6250, 0,
+		    mt_volt_range8, MT6359_RG_BUCK_VCORE_EN_ADDR,
+		    MT6359_DA_VCORE_EN_ADDR, MT6359P_RG_BUCK_VCORE_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VCORE_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VCORE_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VCORE_LP_ADDR, MT6359_RG_BUCK_VCORE_LP_SHIFT,
+		    MT6359_RG_VCORE_FCCM_ADDR, MT6359_RG_VCORE_FCCM_SHIFT),
+	MT6359_BUCK("buck_vs2", VS2, 800000, 1600000, 12500, 0,
+		    mt_volt_range4, MT6359_RG_BUCK_VS2_EN_ADDR,
+		    MT6359_DA_VS2_EN_ADDR, MT6359_RG_BUCK_VS2_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VS2_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VS2_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VS2_LP_ADDR, MT6359_RG_BUCK_VS2_LP_SHIFT,
+		    MT6359_RG_VS2_FPWM_ADDR, MT6359_RG_VS2_FPWM_SHIFT),
+	MT6359_BUCK("buck_vpa", VPA, 500000, 3650000, 50000, 0,
+		    mt_volt_range5, MT6359_RG_BUCK_VPA_EN_ADDR,
+		    MT6359_DA_VPA_EN_ADDR, MT6359_RG_BUCK_VPA_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VPA_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VPA_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VPA_LP_ADDR, MT6359_RG_BUCK_VPA_LP_SHIFT,
+		    MT6359_RG_VPA_MODESET_ADDR, MT6359_RG_VPA_MODESET_SHIFT),
+	MT6359_BUCK("buck_vproc2", VPROC2, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VPROC2_EN_ADDR,
+		    MT6359_DA_VPROC2_EN_ADDR, MT6359_RG_BUCK_VPROC2_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VPROC2_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VPROC2_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VPROC2_LP_ADDR,
+		    MT6359_RG_BUCK_VPROC2_LP_SHIFT,
+		    MT6359_RG_VPROC2_FCCM_ADDR, MT6359_RG_VPROC2_FCCM_SHIFT),
+	MT6359_BUCK("buck_vproc1", VPROC1, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359_RG_BUCK_VPROC1_EN_ADDR,
+		    MT6359_DA_VPROC1_EN_ADDR, MT6359_RG_BUCK_VPROC1_VOSEL_ADDR,
+		    MT6359_RG_BUCK_VPROC1_VOSEL_MASK <<
+		    MT6359_RG_BUCK_VPROC1_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VPROC1_LP_ADDR,
+		    MT6359_RG_BUCK_VPROC1_LP_SHIFT,
+		    MT6359_RG_VPROC1_FCCM_ADDR, MT6359_RG_VPROC1_FCCM_SHIFT),
+	MT6359_BUCK("buck_vgpu11_sshub", VGPU11_SSHUB, 400000, 1193750, 6250, 0,
+		    mt_volt_range2, MT6359P_RG_BUCK_VGPU11_SSHUB_EN_ADDR,
+		    MT6359_DA_VGPU11_EN_ADDR,
+		    MT6359P_RG_BUCK_VGPU11_SSHUB_VOSEL_ADDR,
+		    MT6359P_RG_BUCK_VGPU11_SSHUB_VOSEL_MASK <<
+		    MT6359P_RG_BUCK_VGPU11_SSHUB_VOSEL_SHIFT,
+		    MT6359_RG_BUCK_VGPU11_LP_ADDR,
+		    MT6359_RG_BUCK_VGPU11_LP_SHIFT,
+		    MT6359_RG_VGPU11_FCCM_ADDR, MT6359_RG_VGPU11_FCCM_SHIFT),
+	MT6359_REG_FIXED("ldo_vaud18", VAUD18, MT6359P_RG_LDO_VAUD18_EN_ADDR,
+			 MT6359P_DA_VAUD18_B_EN_ADDR, 1800000),
+	MT6359_LDO("ldo_vsim1", VSIM1, vsim1_voltages,
+		   MT6359P_RG_LDO_VSIM1_EN_ADDR, MT6359P_RG_LDO_VSIM1_EN_SHIFT,
+		   MT6359P_DA_VSIM1_B_EN_ADDR, MT6359P_RG_VSIM1_VOSEL_ADDR,
+		   MT6359_RG_VSIM1_VOSEL_MASK << MT6359_RG_VSIM1_VOSEL_SHIFT,
+		   480),
+	MT6359_LDO("ldo_vibr", VIBR, vibr_voltages,
+		   MT6359P_RG_LDO_VIBR_EN_ADDR, MT6359P_RG_LDO_VIBR_EN_SHIFT,
+		   MT6359P_DA_VIBR_B_EN_ADDR, MT6359P_RG_VIBR_VOSEL_ADDR,
+		   MT6359_RG_VIBR_VOSEL_MASK << MT6359_RG_VIBR_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_vrf12", VRF12, vrf12_voltages,
+		   MT6359P_RG_LDO_VRF12_EN_ADDR, MT6359P_RG_LDO_VRF12_EN_SHIFT,
+		   MT6359P_DA_VRF12_B_EN_ADDR, MT6359P_RG_VRF12_VOSEL_ADDR,
+		   MT6359_RG_VRF12_VOSEL_MASK << MT6359_RG_VRF12_VOSEL_SHIFT,
+		   480),
+	MT6359_REG_FIXED("ldo_vusb", VUSB, MT6359P_RG_LDO_VUSB_EN_0_ADDR,
+			 MT6359P_DA_VUSB_B_EN_ADDR, 3000000),
+	MT6359_LDO_LINEAR("ldo_vsram_proc2", VSRAM_PROC2, 500000, 1293750, 6250,
+			  0, mt_volt_range6, MT6359P_RG_LDO_VSRAM_PROC2_EN_ADDR,
+			  MT6359P_DA_VSRAM_PROC2_B_EN_ADDR,
+			  MT6359P_RG_LDO_VSRAM_PROC2_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_PROC2_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_PROC2_VOSEL_SHIFT),
+	MT6359_LDO("ldo_vio18", VIO18, volt18_voltages,
+		   MT6359P_RG_LDO_VIO18_EN_ADDR, MT6359P_RG_LDO_VIO18_EN_SHIFT,
+		   MT6359P_DA_VIO18_B_EN_ADDR, MT6359P_RG_VIO18_VOSEL_ADDR,
+		   MT6359_RG_VIO18_VOSEL_MASK << MT6359_RG_VIO18_VOSEL_SHIFT,
+		   960),
+	MT6359_LDO("ldo_vcamio", VCAMIO, volt18_voltages,
+		   MT6359P_RG_LDO_VCAMIO_EN_ADDR,
+		   MT6359P_RG_LDO_VCAMIO_EN_SHIFT,
+		   MT6359P_DA_VCAMIO_B_EN_ADDR, MT6359P_RG_VCAMIO_VOSEL_ADDR,
+		   MT6359_RG_VCAMIO_VOSEL_MASK << MT6359_RG_VCAMIO_VOSEL_SHIFT,
+		   1290),
+	MT6359_REG_FIXED("ldo_vcn18", VCN18, MT6359P_RG_LDO_VCN18_EN_ADDR,
+			 MT6359P_DA_VCN18_B_EN_ADDR, 1800000),
+	MT6359_REG_FIXED("ldo_vfe28", VFE28, MT6359P_RG_LDO_VFE28_EN_ADDR,
+			 MT6359P_DA_VFE28_B_EN_ADDR, 2800000),
+	MT6359_LDO("ldo_vcn13", VCN13, vcn13_voltages,
+		   MT6359P_RG_LDO_VCN13_EN_ADDR, MT6359P_RG_LDO_VCN13_EN_SHIFT,
+		   MT6359P_DA_VCN13_B_EN_ADDR, MT6359P_RG_VCN13_VOSEL_ADDR,
+		   MT6359_RG_VCN13_VOSEL_MASK << MT6359_RG_VCN13_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_vcn33_1_bt", VCN33_1_BT, vcn33_voltages,
+		   MT6359P_RG_LDO_VCN33_1_EN_0_ADDR,
+		   MT6359_RG_LDO_VCN33_1_EN_0_SHIFT,
+		   MT6359P_DA_VCN33_1_B_EN_ADDR, MT6359P_RG_VCN33_1_VOSEL_ADDR,
+		   MT6359_RG_VCN33_1_VOSEL_MASK <<
+		   MT6359_RG_VCN33_1_VOSEL_SHIFT, 240),
+	MT6359_LDO("ldo_vcn33_1_wifi", VCN33_1_WIFI, vcn33_voltages,
+		   MT6359P_RG_LDO_VCN33_1_EN_1_ADDR,
+		   MT6359P_RG_LDO_VCN33_1_EN_1_SHIFT,
+		   MT6359P_DA_VCN33_1_B_EN_ADDR, MT6359P_RG_VCN33_1_VOSEL_ADDR,
+		   MT6359_RG_VCN33_1_VOSEL_MASK <<
+		   MT6359_RG_VCN33_1_VOSEL_SHIFT, 240),
+	MT6359_REG_FIXED("ldo_vaux18", VAUX18, MT6359P_RG_LDO_VAUX18_EN_ADDR,
+			 MT6359P_DA_VAUX18_B_EN_ADDR, 1800000),
+	MT6359_LDO_LINEAR("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750,
+			  6250, 0, mt_volt_range6,
+			  MT6359P_RG_LDO_VSRAM_OTHERS_EN_ADDR,
+			  MT6359P_DA_VSRAM_OTHERS_B_EN_ADDR,
+			  MT6359P_RG_LDO_VSRAM_OTHERS_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_OTHERS_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_OTHERS_VOSEL_SHIFT),
+	MT6359_LDO("ldo_vefuse", VEFUSE, vefuse_voltages,
+		   MT6359P_RG_LDO_VEFUSE_EN_ADDR,
+		   MT6359P_RG_LDO_VEFUSE_EN_SHIFT,
+		   MT6359P_DA_VEFUSE_B_EN_ADDR, MT6359P_RG_VEFUSE_VOSEL_ADDR,
+		   MT6359_RG_VEFUSE_VOSEL_MASK << MT6359_RG_VEFUSE_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO("ldo_vxo22", VXO22, vxo22_voltages,
+		   MT6359P_RG_LDO_VXO22_EN_ADDR, MT6359P_RG_LDO_VXO22_EN_SHIFT,
+		   MT6359P_DA_VXO22_B_EN_ADDR, MT6359P_RG_VXO22_VOSEL_ADDR,
+		   MT6359_RG_VXO22_VOSEL_MASK << MT6359_RG_VXO22_VOSEL_SHIFT,
+		   480),
+	MT6359_LDO("ldo_vrfck_1", VRFCK, vrfck_voltages_1,
+		   MT6359P_RG_LDO_VRFCK_EN_ADDR, MT6359P_RG_LDO_VRFCK_EN_SHIFT,
+		   MT6359P_DA_VRFCK_B_EN_ADDR, MT6359P_RG_VRFCK_VOSEL_ADDR,
+		   MT6359_RG_VRFCK_VOSEL_MASK << MT6359_RG_VRFCK_VOSEL_SHIFT,
+		   480),
+	MT6359_REG_FIXED("ldo_vbif28", VBIF28, MT6359P_RG_LDO_VBIF28_EN_ADDR,
+			 MT6359P_DA_VBIF28_B_EN_ADDR, 2800000),
+	MT6359_LDO("ldo_vio28", VIO28, vio28_voltages,
+		   MT6359P_RG_LDO_VIO28_EN_ADDR, MT6359P_RG_LDO_VIO28_EN_SHIFT,
+		   MT6359P_DA_VIO28_B_EN_ADDR, MT6359P_RG_VIO28_VOSEL_ADDR,
+		   MT6359_RG_VIO28_VOSEL_MASK << MT6359_RG_VIO28_VOSEL_SHIFT,
+		   1920),
+	MT6359P_LDO1("ldo_vemc_1", VEMC, mt6359p_vemc_ops, vemc_voltages_1,
+		     MT6359P_RG_LDO_VEMC_EN_ADDR, MT6359P_RG_LDO_VEMC_EN_SHIFT,
+		     MT6359P_DA_VEMC_B_EN_ADDR,
+		     MT6359P_RG_LDO_VEMC_VOSEL_0_ADDR,
+		     MT6359P_RG_LDO_VEMC_VOSEL_0_MASK <<
+		     MT6359P_RG_LDO_VEMC_VOSEL_0_SHIFT),
+	MT6359_LDO("ldo_vcn33_2_bt", VCN33_2_BT, vcn33_voltages,
+		   MT6359P_RG_LDO_VCN33_2_EN_0_ADDR,
+		   MT6359P_RG_LDO_VCN33_2_EN_0_SHIFT,
+		   MT6359P_DA_VCN33_2_B_EN_ADDR, MT6359P_RG_VCN33_2_VOSEL_ADDR,
+		   MT6359_RG_VCN33_2_VOSEL_MASK <<
+		   MT6359_RG_VCN33_2_VOSEL_SHIFT, 240),
+	MT6359_LDO("ldo_vcn33_2_wifi", VCN33_2_WIFI, vcn33_voltages,
+		   MT6359P_RG_LDO_VCN33_2_EN_1_ADDR,
+		   MT6359_RG_LDO_VCN33_2_EN_1_SHIFT,
+		   MT6359P_DA_VCN33_2_B_EN_ADDR, MT6359P_RG_VCN33_2_VOSEL_ADDR,
+		   MT6359_RG_VCN33_2_VOSEL_MASK <<
+		   MT6359_RG_VCN33_2_VOSEL_SHIFT, 240),
+	MT6359_LDO("ldo_va12", VA12, va12_voltages,
+		   MT6359P_RG_LDO_VA12_EN_ADDR, MT6359P_RG_LDO_VA12_EN_SHIFT,
+		   MT6359P_DA_VA12_B_EN_ADDR, MT6359P_RG_VA12_VOSEL_ADDR,
+		   MT6359_RG_VA12_VOSEL_MASK << MT6359_RG_VA12_VOSEL_SHIFT,
+		   960),
+	MT6359_LDO("ldo_va09", VA09, va09_voltages,
+		   MT6359P_RG_LDO_VA09_EN_ADDR, MT6359P_RG_LDO_VA09_EN_SHIFT,
+		   MT6359P_DA_VA09_B_EN_ADDR, MT6359P_RG_VA09_VOSEL_ADDR,
+		   MT6359_RG_VA09_VOSEL_MASK << MT6359_RG_VA09_VOSEL_SHIFT,
+		   960),
+	MT6359_LDO("ldo_vrf18", VRF18, vrf18_voltages,
+		   MT6359P_RG_LDO_VRF18_EN_ADDR, MT6359P_RG_LDO_VRF18_EN_SHIFT,
+		   MT6359P_DA_VRF18_B_EN_ADDR, MT6359P_RG_VRF18_VOSEL_ADDR,
+		   MT6359_RG_VRF18_VOSEL_MASK << MT6359_RG_VRF18_VOSEL_SHIFT,
+		   240),
+	MT6359_LDO_LINEAR("ldo_vsram_md", VSRAM_MD, 500000, 1293750, 6250,
+			  0, mt_volt_range7, MT6359P_RG_LDO_VSRAM_MD_EN_ADDR,
+			  MT6359P_DA_VSRAM_MD_B_EN_ADDR,
+			  MT6359P_RG_LDO_VSRAM_MD_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_MD_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_MD_VOSEL_SHIFT),
+	MT6359_LDO("ldo_vufs", VUFS, volt18_voltages,
+		   MT6359P_RG_LDO_VUFS_EN_ADDR, MT6359P_RG_LDO_VUFS_EN_SHIFT,
+		   MT6359P_DA_VUFS_B_EN_ADDR, MT6359P_RG_VUFS_VOSEL_ADDR,
+		   MT6359_RG_VUFS_VOSEL_MASK << MT6359_RG_VUFS_VOSEL_SHIFT,
+		   1920),
+	MT6359_LDO("ldo_vm18", VM18, volt18_voltages,
+		   MT6359P_RG_LDO_VM18_EN_ADDR, MT6359P_RG_LDO_VM18_EN_SHIFT,
+		   MT6359P_DA_VM18_B_EN_ADDR, MT6359P_RG_VM18_VOSEL_ADDR,
+		   MT6359_RG_VM18_VOSEL_MASK << MT6359_RG_VM18_VOSEL_SHIFT,
+		   1920),
+	MT6359_LDO("ldo_vbbck", VBBCK, vbbck_voltages,
+		   MT6359P_RG_LDO_VBBCK_EN_ADDR, MT6359P_RG_LDO_VBBCK_EN_SHIFT,
+		   MT6359P_DA_VBBCK_B_EN_ADDR, MT6359P_RG_VBBCK_VOSEL_ADDR,
+		   MT6359P_RG_VBBCK_VOSEL_MASK << MT6359P_RG_VBBCK_VOSEL_SHIFT,
+		   480),
+	MT6359_LDO_LINEAR("ldo_vsram_proc1", VSRAM_PROC1, 500000, 1293750, 6250,
+			  0, mt_volt_range6, MT6359P_RG_LDO_VSRAM_PROC1_EN_ADDR,
+			  MT6359P_DA_VSRAM_PROC1_B_EN_ADDR,
+			  MT6359P_RG_LDO_VSRAM_PROC1_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_PROC1_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_PROC1_VOSEL_SHIFT),
+	MT6359_LDO("ldo_vsim2", VSIM2, vsim2_voltages,
+		   MT6359P_RG_LDO_VSIM2_EN_ADDR, MT6359P_RG_LDO_VSIM2_EN_SHIFT,
+		   MT6359P_DA_VSIM2_B_EN_ADDR, MT6359P_RG_VSIM2_VOSEL_ADDR,
+		   MT6359_RG_VSIM2_VOSEL_MASK << MT6359_RG_VSIM2_VOSEL_SHIFT,
+		   480),
+	MT6359_LDO_LINEAR("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB,
+			  500000, 1293750, 6250, 0, mt_volt_range6,
+			  MT6359P_RG_LDO_VSRAM_OTHERS_SSHUB_EN_ADDR,
+			  MT6359P_DA_VSRAM_OTHERS_B_EN_ADDR,
+			  MT6359P_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_ADDR,
+			  MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_MASK <<
+			  MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_SHIFT),
+};
+
 static int mt6359_regulator_probe(struct platform_device *pdev)
 {
 	struct mt6397_chip *mt6397 = dev_get_drvdata(pdev->dev.parent);
 	struct regulator_config config = {};
 	struct regulator_dev *rdev;
-	int i;
+	struct mt6359_regulator_info *mt6359_info;
+	int i, hw_ver;
+
+	regmap_read(mt6397->regmap, MT6359P_HWCID, &hw_ver);
+	if (hw_ver >= MT6359P_CHIP_VER)
+		mt6359_info = mt6359p_regulators;
+	else
+		mt6359_info = mt6359_regulators;
 
 	config.dev = mt6397->dev;
 	config.regmap = mt6397->regmap;
-	for (i = 0; i < MT6359_MAX_REGULATOR; i++) {
-		config.driver_data = &mt6359_regulators[i];
-		rdev = devm_regulator_register(&pdev->dev, &mt6359_regulators[i].desc, &config);
+	for (i = 0; i < MT6359_MAX_REGULATOR; i++, mt6359_info++) {
+		config.driver_data = mt6359_info;
+		rdev = devm_regulator_register(&pdev->dev, &mt6359_info->desc, &config);
 		if (IS_ERR(rdev)) {
-			dev_err(&pdev->dev, "failed to register %s\n",
-				mt6359_regulators[i].desc.name);
+			dev_err(&pdev->dev, "failed to register %s\n", mt6359_info->desc.name);
 			return PTR_ERR(rdev);
 		}
 	}
diff --git a/include/linux/mfd/mt6359p/registers.h b/include/linux/mfd/mt6359p/registers.h
new file mode 100644
index 0000000000000..3d97c1885171e
--- /dev/null
+++ b/include/linux/mfd/mt6359p/registers.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ */
+
+#ifndef __MFD_MT6359P_REGISTERS_H__
+#define __MFD_MT6359P_REGISTERS_H__
+
+#define MT6359P_CHIP_VER 0x5930
+
+/* PMIC Registers */
+#define MT6359P_HWCID                         0x8
+#define MT6359P_TOP_TRAP                      0x50
+#define MT6359P_TOP_TMA_KEY                   0x3a8
+#define MT6359P_BUCK_VCORE_ELR_NUM            0x152a
+#define MT6359P_BUCK_VCORE_ELR0               0x152c
+#define MT6359P_BUCK_VGPU11_SSHUB_CON0        0x15aa
+#define MT6359P_BUCK_VGPU11_ELR0              0x15b4
+#define MT6359P_LDO_VSRAM_PROC1_ELR           0x1b44
+#define MT6359P_LDO_VSRAM_PROC2_ELR           0x1b46
+#define MT6359P_LDO_VSRAM_OTHERS_ELR          0x1b48
+#define MT6359P_LDO_VSRAM_MD_ELR              0x1b4a
+#define MT6359P_LDO_VEMC_ELR_0                0x1b4c
+#define MT6359P_LDO_VFE28_CON0                0x1b88
+#define MT6359P_LDO_VFE28_MON                 0x1b8c
+#define MT6359P_LDO_VXO22_CON0                0x1b9a
+#define MT6359P_LDO_VXO22_MON                 0x1b9e
+#define MT6359P_LDO_VRF18_CON0                0x1bac
+#define MT6359P_LDO_VRF18_MON                 0x1bb0
+#define MT6359P_LDO_VRF12_CON0                0x1bbe
+#define MT6359P_LDO_VRF12_MON                 0x1bc2
+#define MT6359P_LDO_VEFUSE_CON0               0x1bd0
+#define MT6359P_LDO_VEFUSE_MON                0x1bd4
+#define MT6359P_LDO_VCN33_1_CON0              0x1be2
+#define MT6359P_LDO_VCN33_1_MON               0x1be6
+#define MT6359P_LDO_VCN33_1_MULTI_SW          0x1bf4
+#define MT6359P_LDO_VCN33_2_CON0              0x1c08
+#define MT6359P_LDO_VCN33_2_MON               0x1c0c
+#define MT6359P_LDO_VCN33_2_MULTI_SW          0x1c1a
+#define MT6359P_LDO_VCN13_CON0                0x1c1c
+#define MT6359P_LDO_VCN13_MON                 0x1c20
+#define MT6359P_LDO_VCN18_CON0                0x1c2e
+#define MT6359P_LDO_VCN18_MON                 0x1c32
+#define MT6359P_LDO_VA09_CON0                 0x1c40
+#define MT6359P_LDO_VA09_MON                  0x1c44
+#define MT6359P_LDO_VCAMIO_CON0               0x1c52
+#define MT6359P_LDO_VCAMIO_MON                0x1c56
+#define MT6359P_LDO_VA12_CON0                 0x1c64
+#define MT6359P_LDO_VA12_MON                  0x1c68
+#define MT6359P_LDO_VAUX18_CON0               0x1c88
+#define MT6359P_LDO_VAUX18_MON                0x1c8c
+#define MT6359P_LDO_VAUD18_CON0               0x1c9a
+#define MT6359P_LDO_VAUD18_MON                0x1c9e
+#define MT6359P_LDO_VIO18_CON0                0x1cac
+#define MT6359P_LDO_VIO18_MON                 0x1cb0
+#define MT6359P_LDO_VEMC_CON0                 0x1cbe
+#define MT6359P_LDO_VEMC_MON                  0x1cc2
+#define MT6359P_LDO_VSIM1_CON0                0x1cd0
+#define MT6359P_LDO_VSIM1_MON                 0x1cd4
+#define MT6359P_LDO_VSIM2_CON0                0x1ce2
+#define MT6359P_LDO_VSIM2_MON                 0x1ce6
+#define MT6359P_LDO_VUSB_CON0                 0x1d08
+#define MT6359P_LDO_VUSB_MON                  0x1d0c
+#define MT6359P_LDO_VUSB_MULTI_SW             0x1d1a
+#define MT6359P_LDO_VRFCK_CON0                0x1d1c
+#define MT6359P_LDO_VRFCK_MON                 0x1d20
+#define MT6359P_LDO_VBBCK_CON0                0x1d2e
+#define MT6359P_LDO_VBBCK_MON                 0x1d32
+#define MT6359P_LDO_VBIF28_CON0               0x1d40
+#define MT6359P_LDO_VBIF28_MON                0x1d44
+#define MT6359P_LDO_VIBR_CON0                 0x1d52
+#define MT6359P_LDO_VIBR_MON                  0x1d56
+#define MT6359P_LDO_VIO28_CON0                0x1d64
+#define MT6359P_LDO_VIO28_MON                 0x1d68
+#define MT6359P_LDO_VM18_CON0                 0x1d88
+#define MT6359P_LDO_VM18_MON                  0x1d8c
+#define MT6359P_LDO_VUFS_CON0                 0x1d9a
+#define MT6359P_LDO_VUFS_MON                  0x1d9e
+#define MT6359P_LDO_VSRAM_PROC1_CON0          0x1e88
+#define MT6359P_LDO_VSRAM_PROC1_MON           0x1e8c
+#define MT6359P_LDO_VSRAM_PROC1_VOSEL1        0x1e90
+#define MT6359P_LDO_VSRAM_PROC2_CON0          0x1ea8
+#define MT6359P_LDO_VSRAM_PROC2_MON           0x1eac
+#define MT6359P_LDO_VSRAM_PROC2_VOSEL1        0x1eb0
+#define MT6359P_LDO_VSRAM_OTHERS_CON0         0x1f08
+#define MT6359P_LDO_VSRAM_OTHERS_MON          0x1f0c
+#define MT6359P_LDO_VSRAM_OTHERS_VOSEL1       0x1f10
+#define MT6359P_LDO_VSRAM_OTHERS_SSHUB        0x1f28
+#define MT6359P_LDO_VSRAM_MD_CON0             0x1f2e
+#define MT6359P_LDO_VSRAM_MD_MON              0x1f32
+#define MT6359P_LDO_VSRAM_MD_VOSEL1           0x1f36
+#define MT6359P_VFE28_ANA_CON0                0x1f88
+#define MT6359P_VAUX18_ANA_CON0               0x1f8c
+#define MT6359P_VUSB_ANA_CON0                 0x1f90
+#define MT6359P_VBIF28_ANA_CON0               0x1f94
+#define MT6359P_VCN33_1_ANA_CON0              0x1f98
+#define MT6359P_VCN33_2_ANA_CON0              0x1f9c
+#define MT6359P_VEMC_ANA_CON0                 0x1fa0
+#define MT6359P_VSIM1_ANA_CON0                0x1fa2
+#define MT6359P_VSIM2_ANA_CON0                0x1fa6
+#define MT6359P_VIO28_ANA_CON0                0x1faa
+#define MT6359P_VIBR_ANA_CON0                 0x1fae
+#define MT6359P_VFE28_ELR_4                   0x1fc0
+#define MT6359P_VRF18_ANA_CON0                0x2008
+#define MT6359P_VEFUSE_ANA_CON0               0x200c
+#define MT6359P_VCN18_ANA_CON0                0x2010
+#define MT6359P_VCAMIO_ANA_CON0               0x2014
+#define MT6359P_VAUD18_ANA_CON0               0x2018
+#define MT6359P_VIO18_ANA_CON0                0x201c
+#define MT6359P_VM18_ANA_CON0                 0x2020
+#define MT6359P_VUFS_ANA_CON0                 0x2024
+#define MT6359P_VRF12_ANA_CON0                0x202a
+#define MT6359P_VCN13_ANA_CON0                0x202e
+#define MT6359P_VA09_ANA_CON0                 0x2032
+#define MT6359P_VRF18_ELR_3                   0x204e
+#define MT6359P_VXO22_ANA_CON0                0x2088
+#define MT6359P_VRFCK_ANA_CON0                0x208c
+#define MT6359P_VBBCK_ANA_CON0                0x2096
+
+#define MT6359P_RG_BUCK_VCORE_VOSEL_ADDR         MT6359P_BUCK_VCORE_ELR0
+#define MT6359P_RG_BUCK_VGPU11_SSHUB_EN_ADDR     MT6359P_BUCK_VGPU11_SSHUB_CON0
+#define MT6359P_RG_BUCK_VGPU11_VOSEL_ADDR        MT6359P_BUCK_VGPU11_ELR0
+#define MT6359P_RG_BUCK_VGPU11_SSHUB_VOSEL_ADDR  MT6359P_BUCK_VGPU11_SSHUB_CON0
+#define MT6359P_RG_BUCK_VGPU11_SSHUB_VOSEL_MASK  0x7F
+#define MT6359P_RG_BUCK_VGPU11_SSHUB_VOSEL_SHIFT 4
+#define MT6359P_RG_LDO_VSRAM_PROC1_VOSEL_ADDR    MT6359P_LDO_VSRAM_PROC1_ELR
+#define MT6359P_RG_LDO_VSRAM_PROC2_VOSEL_ADDR    MT6359P_LDO_VSRAM_PROC2_ELR
+#define MT6359P_RG_LDO_VSRAM_OTHERS_VOSEL_ADDR   MT6359P_LDO_VSRAM_OTHERS_ELR
+#define MT6359P_RG_LDO_VSRAM_MD_VOSEL_ADDR       MT6359P_LDO_VSRAM_MD_ELR
+#define MT6359P_RG_LDO_VEMC_VOSEL_0_ADDR         MT6359P_LDO_VEMC_ELR_0
+#define MT6359P_RG_LDO_VEMC_VOSEL_0_MASK         0xF
+#define MT6359P_RG_LDO_VEMC_VOSEL_0_SHIFT        0
+#define MT6359P_RG_LDO_VFE28_EN_ADDR             MT6359P_LDO_VFE28_CON0
+#define MT6359P_DA_VFE28_B_EN_ADDR               MT6359P_LDO_VFE28_MON
+#define MT6359P_RG_LDO_VXO22_EN_ADDR             MT6359P_LDO_VXO22_CON0
+#define MT6359P_RG_LDO_VXO22_EN_SHIFT            0
+#define MT6359P_DA_VXO22_B_EN_ADDR               MT6359P_LDO_VXO22_MON
+#define MT6359P_RG_LDO_VRF18_EN_ADDR             MT6359P_LDO_VRF18_CON0
+#define MT6359P_RG_LDO_VRF18_EN_SHIFT            0
+#define MT6359P_DA_VRF18_B_EN_ADDR               MT6359P_LDO_VRF18_MON
+#define MT6359P_RG_LDO_VRF12_EN_ADDR             MT6359P_LDO_VRF12_CON0
+#define MT6359P_RG_LDO_VRF12_EN_SHIFT            0
+#define MT6359P_DA_VRF12_B_EN_ADDR               MT6359P_LDO_VRF12_MON
+#define MT6359P_RG_LDO_VEFUSE_EN_ADDR            MT6359P_LDO_VEFUSE_CON0
+#define MT6359P_RG_LDO_VEFUSE_EN_SHIFT           0
+#define MT6359P_DA_VEFUSE_B_EN_ADDR              MT6359P_LDO_VEFUSE_MON
+#define MT6359P_RG_LDO_VCN33_1_EN_0_ADDR         MT6359P_LDO_VCN33_1_CON0
+#define MT6359P_DA_VCN33_1_B_EN_ADDR             MT6359P_LDO_VCN33_1_MON
+#define MT6359P_RG_LDO_VCN33_1_EN_1_ADDR         MT6359P_LDO_VCN33_1_MULTI_SW
+#define MT6359P_RG_LDO_VCN33_1_EN_1_SHIFT        15
+#define MT6359P_RG_LDO_VCN33_2_EN_0_ADDR         MT6359P_LDO_VCN33_2_CON0
+#define MT6359P_RG_LDO_VCN33_2_EN_0_SHIFT        0
+#define MT6359P_DA_VCN33_2_B_EN_ADDR             MT6359P_LDO_VCN33_2_MON
+#define MT6359P_RG_LDO_VCN33_2_EN_1_ADDR         MT6359P_LDO_VCN33_2_MULTI_SW
+#define MT6359P_RG_LDO_VCN13_EN_ADDR             MT6359P_LDO_VCN13_CON0
+#define MT6359P_RG_LDO_VCN13_EN_SHIFT            0
+#define MT6359P_DA_VCN13_B_EN_ADDR               MT6359P_LDO_VCN13_MON
+#define MT6359P_RG_LDO_VCN18_EN_ADDR             MT6359P_LDO_VCN18_CON0
+#define MT6359P_DA_VCN18_B_EN_ADDR               MT6359P_LDO_VCN18_MON
+#define MT6359P_RG_LDO_VA09_EN_ADDR              MT6359P_LDO_VA09_CON0
+#define MT6359P_RG_LDO_VA09_EN_SHIFT             0
+#define MT6359P_DA_VA09_B_EN_ADDR                MT6359P_LDO_VA09_MON
+#define MT6359P_RG_LDO_VCAMIO_EN_ADDR            MT6359P_LDO_VCAMIO_CON0
+#define MT6359P_RG_LDO_VCAMIO_EN_SHIFT           0
+#define MT6359P_DA_VCAMIO_B_EN_ADDR              MT6359P_LDO_VCAMIO_MON
+#define MT6359P_RG_LDO_VA12_EN_ADDR              MT6359P_LDO_VA12_CON0
+#define MT6359P_RG_LDO_VA12_EN_SHIFT             0
+#define MT6359P_DA_VA12_B_EN_ADDR                MT6359P_LDO_VA12_MON
+#define MT6359P_RG_LDO_VAUX18_EN_ADDR            MT6359P_LDO_VAUX18_CON0
+#define MT6359P_DA_VAUX18_B_EN_ADDR              MT6359P_LDO_VAUX18_MON
+#define MT6359P_RG_LDO_VAUD18_EN_ADDR            MT6359P_LDO_VAUD18_CON0
+#define MT6359P_DA_VAUD18_B_EN_ADDR              MT6359P_LDO_VAUD18_MON
+#define MT6359P_RG_LDO_VIO18_EN_ADDR             MT6359P_LDO_VIO18_CON0
+#define MT6359P_RG_LDO_VIO18_EN_SHIFT            0
+#define MT6359P_DA_VIO18_B_EN_ADDR               MT6359P_LDO_VIO18_MON
+#define MT6359P_RG_LDO_VEMC_EN_ADDR              MT6359P_LDO_VEMC_CON0
+#define MT6359P_RG_LDO_VEMC_EN_SHIFT             0
+#define MT6359P_DA_VEMC_B_EN_ADDR                MT6359P_LDO_VEMC_MON
+#define MT6359P_RG_LDO_VSIM1_EN_ADDR             MT6359P_LDO_VSIM1_CON0
+#define MT6359P_RG_LDO_VSIM1_EN_SHIFT            0
+#define MT6359P_DA_VSIM1_B_EN_ADDR               MT6359P_LDO_VSIM1_MON
+#define MT6359P_RG_LDO_VSIM2_EN_ADDR             MT6359P_LDO_VSIM2_CON0
+#define MT6359P_RG_LDO_VSIM2_EN_SHIFT            0
+#define MT6359P_DA_VSIM2_B_EN_ADDR               MT6359P_LDO_VSIM2_MON
+#define MT6359P_RG_LDO_VUSB_EN_0_ADDR            MT6359P_LDO_VUSB_CON0
+#define MT6359P_DA_VUSB_B_EN_ADDR                MT6359P_LDO_VUSB_MON
+#define MT6359P_RG_LDO_VUSB_EN_1_ADDR            MT6359P_LDO_VUSB_MULTI_SW
+#define MT6359P_RG_LDO_VRFCK_EN_ADDR             MT6359P_LDO_VRFCK_CON0
+#define MT6359P_RG_LDO_VRFCK_EN_SHIFT            0
+#define MT6359P_DA_VRFCK_B_EN_ADDR               MT6359P_LDO_VRFCK_MON
+#define MT6359P_RG_LDO_VBBCK_EN_ADDR             MT6359P_LDO_VBBCK_CON0
+#define MT6359P_RG_LDO_VBBCK_EN_SHIFT            0
+#define MT6359P_DA_VBBCK_B_EN_ADDR               MT6359P_LDO_VBBCK_MON
+#define MT6359P_RG_LDO_VBIF28_EN_ADDR            MT6359P_LDO_VBIF28_CON0
+#define MT6359P_DA_VBIF28_B_EN_ADDR              MT6359P_LDO_VBIF28_MON
+#define MT6359P_RG_LDO_VIBR_EN_ADDR              MT6359P_LDO_VIBR_CON0
+#define MT6359P_RG_LDO_VIBR_EN_SHIFT             0
+#define MT6359P_DA_VIBR_B_EN_ADDR                MT6359P_LDO_VIBR_MON
+#define MT6359P_RG_LDO_VIO28_EN_ADDR             MT6359P_LDO_VIO28_CON0
+#define MT6359P_RG_LDO_VIO28_EN_SHIFT            0
+#define MT6359P_DA_VIO28_B_EN_ADDR               MT6359P_LDO_VIO28_MON
+#define MT6359P_RG_LDO_VM18_EN_ADDR              MT6359P_LDO_VM18_CON0
+#define MT6359P_RG_LDO_VM18_EN_SHIFT             0
+#define MT6359P_DA_VM18_B_EN_ADDR                MT6359P_LDO_VM18_MON
+#define MT6359P_RG_LDO_VUFS_EN_ADDR              MT6359P_LDO_VUFS_CON0
+#define MT6359P_RG_LDO_VUFS_EN_SHIFT             0
+#define MT6359P_DA_VUFS_B_EN_ADDR                MT6359P_LDO_VUFS_MON
+#define MT6359P_RG_LDO_VSRAM_PROC1_EN_ADDR       MT6359P_LDO_VSRAM_PROC1_CON0
+#define MT6359P_DA_VSRAM_PROC1_B_EN_ADDR         MT6359P_LDO_VSRAM_PROC1_MON
+#define MT6359P_DA_VSRAM_PROC1_VOSEL_ADDR        MT6359P_LDO_VSRAM_PROC1_VOSEL1
+#define MT6359P_RG_LDO_VSRAM_PROC2_EN_ADDR       MT6359P_LDO_VSRAM_PROC2_CON0
+#define MT6359P_DA_VSRAM_PROC2_B_EN_ADDR         MT6359P_LDO_VSRAM_PROC2_MON
+#define MT6359P_DA_VSRAM_PROC2_VOSEL_ADDR        MT6359P_LDO_VSRAM_PROC2_VOSEL1
+#define MT6359P_RG_LDO_VSRAM_OTHERS_EN_ADDR      MT6359P_LDO_VSRAM_OTHERS_CON0
+#define MT6359P_DA_VSRAM_OTHERS_B_EN_ADDR        MT6359P_LDO_VSRAM_OTHERS_MON
+#define MT6359P_DA_VSRAM_OTHERS_VOSEL_ADDR       MT6359P_LDO_VSRAM_OTHERS_VOSEL1
+#define MT6359P_RG_LDO_VSRAM_OTHERS_SSHUB_EN_ADDR    MT6359P_LDO_VSRAM_OTHERS_SSHUB
+#define MT6359P_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_ADDR MT6359P_LDO_VSRAM_OTHERS_SSHUB
+#define MT6359P_RG_LDO_VSRAM_MD_EN_ADDR          MT6359P_LDO_VSRAM_MD_CON0
+#define MT6359P_DA_VSRAM_MD_B_EN_ADDR            MT6359P_LDO_VSRAM_MD_MON
+#define MT6359P_DA_VSRAM_MD_VOSEL_ADDR           MT6359P_LDO_VSRAM_MD_VOSEL1
+#define MT6359P_RG_VCN33_1_VOSEL_ADDR            MT6359P_VCN33_1_ANA_CON0
+#define MT6359P_RG_VCN33_2_VOSEL_ADDR            MT6359P_VCN33_2_ANA_CON0
+#define MT6359P_RG_VEMC_VOSEL_ADDR               MT6359P_VEMC_ANA_CON0
+#define MT6359P_RG_VSIM1_VOSEL_ADDR              MT6359P_VSIM1_ANA_CON0
+#define MT6359P_RG_VSIM2_VOSEL_ADDR              MT6359P_VSIM2_ANA_CON0
+#define MT6359P_RG_VIO28_VOSEL_ADDR              MT6359P_VIO28_ANA_CON0
+#define MT6359P_RG_VIBR_VOSEL_ADDR               MT6359P_VIBR_ANA_CON0
+#define MT6359P_RG_VRF18_VOSEL_ADDR              MT6359P_VRF18_ANA_CON0
+#define MT6359P_RG_VEFUSE_VOSEL_ADDR             MT6359P_VEFUSE_ANA_CON0
+#define MT6359P_RG_VCAMIO_VOSEL_ADDR             MT6359P_VCAMIO_ANA_CON0
+#define MT6359P_RG_VIO18_VOSEL_ADDR              MT6359P_VIO18_ANA_CON0
+#define MT6359P_RG_VM18_VOSEL_ADDR               MT6359P_VM18_ANA_CON0
+#define MT6359P_RG_VUFS_VOSEL_ADDR               MT6359P_VUFS_ANA_CON0
+#define MT6359P_RG_VRF12_VOSEL_ADDR              MT6359P_VRF12_ANA_CON0
+#define MT6359P_RG_VCN13_VOSEL_ADDR              MT6359P_VCN13_ANA_CON0
+#define MT6359P_RG_VA09_VOSEL_ADDR               MT6359P_VRF18_ELR_3
+#define MT6359P_RG_VA12_VOSEL_ADDR               MT6359P_VFE28_ELR_4
+#define MT6359P_RG_VXO22_VOSEL_ADDR              MT6359P_VXO22_ANA_CON0
+#define MT6359P_RG_VRFCK_VOSEL_ADDR              MT6359P_VRFCK_ANA_CON0
+#define MT6359P_RG_VBBCK_VOSEL_ADDR              MT6359P_VBBCK_ANA_CON0
+#define MT6359P_RG_VBBCK_VOSEL_MASK              0xF
+#define MT6359P_RG_VBBCK_VOSEL_SHIFT             4
+#define MT6359P_VM_MODE_ADDR                     MT6359P_TOP_TRAP
+#define MT6359P_TMA_KEY_ADDR                     MT6359P_TOP_TMA_KEY
+
+#define TMA_KEY 0x9CA6
+
+#endif /* __MFD_MT6359P_REGISTERS_H__ */
diff --git a/include/linux/regulator/mt6359-regulator.h b/include/linux/regulator/mt6359-regulator.h
index 14c4b715613ec..6d6e5a58f482b 100644
--- a/include/linux/regulator/mt6359-regulator.h
+++ b/include/linux/regulator/mt6359-regulator.h
@@ -17,6 +17,7 @@ enum {
 	MT6359_ID_VPROC2,
 	MT6359_ID_VPROC1,
 	MT6359_ID_VCORE_SSHUB,
+	MT6359_ID_VGPU11_SSHUB = MT6359_ID_VCORE_SSHUB,
 	MT6359_ID_VAUD18 = 10,
 	MT6359_ID_VSIM1,
 	MT6359_ID_VIBR,
-- 
GitLab


From 1886ab01a3fb98ee7f7739ae50eb9492f5df3641 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:49 +0200
Subject: [PATCH 1938/3804] evm: Allow setxattr() and setattr() for unmodified
 metadata

With the patch to allow xattr/attr operations if a portable signature
verification fails, cp and tar can copy all xattrs/attrs so that at the
end of the process verification succeeds.

However, it might happen that the xattrs/attrs are already set to the
correct value (taken at signing time) and signature verification succeeds
before the copy has completed. For example, an archive might contains files
owned by root and the archive is extracted by root.

Then, since portable signatures are immutable, all subsequent operations
fail (e.g. fchown()), even if the operation is legitimate (does not alter
the current value).

This patch avoids this problem by reporting successful operation to user
space when that operation does not alter the current value of xattrs/attrs.

With this patch, the one that introduces evm_hmac_disabled() and the one
that allows a metadata operation on the INTEGRITY_FAIL_IMMUTABLE error, EVM
portable signatures can be used without disabling metadata verification
(by setting EVM_ALLOW_METADATA_WRITES). Due to keeping metadata
verification enabled, altering immutable metadata protected with a portable
signature that was successfully verified will be denied (existing
behavior).

Reported-by: kernel test robot <lkp@intel.com> [implicit declaration of function]
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/evm/evm_main.c | 113 +++++++++++++++++++++++++++++-
 1 file changed, 112 insertions(+), 1 deletion(-)

diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 300df6906e05d..0196168aeb7d5 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -18,6 +18,7 @@
 #include <linux/integrity.h>
 #include <linux/evm.h>
 #include <linux/magic.h>
+#include <linux/posix_acl_xattr.h>
 
 #include <crypto/hash.h>
 #include <crypto/hash_info.h>
@@ -330,6 +331,92 @@ static enum integrity_status evm_verify_current_integrity(struct dentry *dentry)
 	return evm_verify_hmac(dentry, NULL, NULL, 0, NULL);
 }
 
+/*
+ * evm_xattr_acl_change - check if passed ACL changes the inode mode
+ * @mnt_userns: user namespace of the idmapped mount
+ * @dentry: pointer to the affected dentry
+ * @xattr_name: requested xattr
+ * @xattr_value: requested xattr value
+ * @xattr_value_len: requested xattr value length
+ *
+ * Check if passed ACL changes the inode mode, which is protected by EVM.
+ *
+ * Returns 1 if passed ACL causes inode mode change, 0 otherwise.
+ */
+static int evm_xattr_acl_change(struct user_namespace *mnt_userns,
+				struct dentry *dentry, const char *xattr_name,
+				const void *xattr_value, size_t xattr_value_len)
+{
+#ifdef CONFIG_FS_POSIX_ACL
+	umode_t mode;
+	struct posix_acl *acl = NULL, *acl_res;
+	struct inode *inode = d_backing_inode(dentry);
+	int rc;
+
+	/*
+	 * user_ns is not relevant here, ACL_USER/ACL_GROUP don't have impact
+	 * on the inode mode (see posix_acl_equiv_mode()).
+	 */
+	acl = posix_acl_from_xattr(&init_user_ns, xattr_value, xattr_value_len);
+	if (IS_ERR_OR_NULL(acl))
+		return 1;
+
+	acl_res = acl;
+	/*
+	 * Passing mnt_userns is necessary to correctly determine the GID in
+	 * an idmapped mount, as the GID is used to clear the setgid bit in
+	 * the inode mode.
+	 */
+	rc = posix_acl_update_mode(mnt_userns, inode, &mode, &acl_res);
+
+	posix_acl_release(acl);
+
+	if (rc)
+		return 1;
+
+	if (inode->i_mode != mode)
+		return 1;
+#endif
+	return 0;
+}
+
+/*
+ * evm_xattr_change - check if passed xattr value differs from current value
+ * @mnt_userns: user namespace of the idmapped mount
+ * @dentry: pointer to the affected dentry
+ * @xattr_name: requested xattr
+ * @xattr_value: requested xattr value
+ * @xattr_value_len: requested xattr value length
+ *
+ * Check if passed xattr value differs from current value.
+ *
+ * Returns 1 if passed xattr value differs from current value, 0 otherwise.
+ */
+static int evm_xattr_change(struct user_namespace *mnt_userns,
+			    struct dentry *dentry, const char *xattr_name,
+			    const void *xattr_value, size_t xattr_value_len)
+{
+	char *xattr_data = NULL;
+	int rc = 0;
+
+	if (posix_xattr_acl(xattr_name))
+		return evm_xattr_acl_change(mnt_userns, dentry, xattr_name,
+					    xattr_value, xattr_value_len);
+
+	rc = vfs_getxattr_alloc(&init_user_ns, dentry, xattr_name, &xattr_data,
+				0, GFP_NOFS);
+	if (rc < 0)
+		return 1;
+
+	if (rc == xattr_value_len)
+		rc = !!memcmp(xattr_value, xattr_data, rc);
+	else
+		rc = 1;
+
+	kfree(xattr_data);
+	return rc;
+}
+
 /*
  * evm_protect_xattr - protect the EVM extended attribute
  *
@@ -397,7 +484,13 @@ out:
 	if (evm_status == INTEGRITY_FAIL_IMMUTABLE)
 		return 0;
 
-	if (evm_status != INTEGRITY_PASS)
+	if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
+	    !evm_xattr_change(mnt_userns, dentry, xattr_name, xattr_value,
+			      xattr_value_len))
+		return 0;
+
+	if (evm_status != INTEGRITY_PASS &&
+	    evm_status != INTEGRITY_PASS_IMMUTABLE)
 		integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
 				    dentry->d_name.name, "appraise_metadata",
 				    integrity_status_msg[evm_status],
@@ -553,6 +646,19 @@ void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
 	evm_update_evmxattr(dentry, xattr_name, NULL, 0);
 }
 
+static int evm_attr_change(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = d_backing_inode(dentry);
+	unsigned int ia_valid = attr->ia_valid;
+
+	if ((!(ia_valid & ATTR_UID) || uid_eq(attr->ia_uid, inode->i_uid)) &&
+	    (!(ia_valid & ATTR_GID) || gid_eq(attr->ia_gid, inode->i_gid)) &&
+	    (!(ia_valid & ATTR_MODE) || attr->ia_mode == inode->i_mode))
+		return 0;
+
+	return 1;
+}
+
 /**
  * evm_inode_setattr - prevent updating an invalid EVM extended attribute
  * @dentry: pointer to the affected dentry
@@ -584,6 +690,11 @@ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
 	    (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL ||
 	     evm_status == INTEGRITY_UNKNOWN)))
 		return 0;
+
+	if (evm_status == INTEGRITY_PASS_IMMUTABLE &&
+	    !evm_attr_change(dentry, attr))
+		return 0;
+
 	integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
 			    dentry->d_name.name, "appraise_metadata",
 			    integrity_status_msg[evm_status], -EPERM, 0);
-- 
GitLab


From 1434c6a1d32a3a1a77f58a03197b802b1724c740 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:50 +0200
Subject: [PATCH 1939/3804] evm: Deprecate EVM_ALLOW_METADATA_WRITES

This patch deprecates the usage of EVM_ALLOW_METADATA_WRITES, as it is no
longer necessary. All the issues that prevent the usage of EVM portable
signatures just with a public key loaded have been solved.

This flag will remain available for a short time to ensure that users are
able to use EVM without it.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/ABI/testing/evm | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/evm b/Documentation/ABI/testing/evm
index 2243b72e41107..553fd8a33e567 100644
--- a/Documentation/ABI/testing/evm
+++ b/Documentation/ABI/testing/evm
@@ -24,7 +24,7 @@ Description:
 		1	  Enable digital signature validation
 		2	  Permit modification of EVM-protected metadata at
 			  runtime. Not supported if HMAC validation and
-			  creation is enabled.
+			  creation is enabled (deprecated).
 		31	  Disable further runtime modification of EVM policy
 		===	  ==================================================
 
@@ -47,7 +47,13 @@ Description:
 
 		will enable digital signature validation, permit
 		modification of EVM-protected metadata and
-		disable all further modification of policy
+		disable all further modification of policy. This option is now
+		deprecated in favor of::
+
+		  echo 0x80000002 ><securityfs>/evm
+
+		as the outstanding issues that prevent the usage of EVM portable
+		signatures have been solved.
 
 		Echoing a value is additive, the new value is added to the
 		existing initialization flags.
-- 
GitLab


From 7aa5783d95646f924b99d245338d5b7aa7a2b3c0 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:51 +0200
Subject: [PATCH 1940/3804] ima: Allow imasig requirement to be satisfied by
 EVM portable signatures

System administrators can require that all accessed files have a signature
by specifying appraise_type=imasig in a policy rule.

Currently, IMA signatures satisfy this requirement. Appended signatures may
also satisfy this requirement, but are not applicable as IMA signatures.
IMA/appended signatures ensure data source authentication for file content
and prevent any change. EVM signatures instead ensure data source
authentication for file metadata. Given that the digest or signature of the
file content must be included in the metadata, EVM signatures provide the
same file data guarantees of IMA signatures, as well as providing file
metadata guarantees.

This patch lets systems protected with EVM signatures pass appraisal
verification if the appraise_type=imasig requirement is specified in the
policy. This facilitates deployment in the scenarios where only EVM
signatures are available.

The patch makes the following changes:

file xattr types:
security.ima: IMA_XATTR_DIGEST/IMA_XATTR_DIGEST_NG
security.evm: EVM_XATTR_PORTABLE_DIGSIG

execve(), mmap(), open() behavior (with appraise_type=imasig):
before: denied (file without IMA signature, imasig requirement not met)
after: allowed (file with EVM portable signature, imasig requirement met)

open(O_WRONLY) behavior (without appraise_type=imasig):
before: allowed (file without IMA signature, not immutable)
after: denied (file with EVM portable signature, immutable)

In addition, similarly to IMA signatures, this patch temporarily allows
new files without or with incomplete metadata to be opened so that content
can be written.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_appraise.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 9bb351b933fbc..d9a627de39303 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -242,12 +242,16 @@ static int xattr_verify(enum ima_hooks func, struct integrity_iint_cache *iint,
 		hash_start = 1;
 		fallthrough;
 	case IMA_XATTR_DIGEST:
-		if (iint->flags & IMA_DIGSIG_REQUIRED) {
-			*cause = "IMA-signature-required";
-			*status = INTEGRITY_FAIL;
-			break;
+		if (*status != INTEGRITY_PASS_IMMUTABLE) {
+			if (iint->flags & IMA_DIGSIG_REQUIRED) {
+				*cause = "IMA-signature-required";
+				*status = INTEGRITY_FAIL;
+				break;
+			}
+			clear_bit(IMA_DIGSIG, &iint->atomic_flags);
+		} else {
+			set_bit(IMA_DIGSIG, &iint->atomic_flags);
 		}
-		clear_bit(IMA_DIGSIG, &iint->atomic_flags);
 		if (xattr_len - sizeof(xattr_value->type) - hash_start >=
 				iint->ima_hash->length)
 			/*
@@ -417,6 +421,7 @@ int ima_appraise_measurement(enum ima_hooks func,
 		cause = "missing-HMAC";
 		goto out;
 	case INTEGRITY_FAIL_IMMUTABLE:
+		set_bit(IMA_DIGSIG, &iint->atomic_flags);
 		fallthrough;
 	case INTEGRITY_FAIL:		/* Invalid HMAC/signature. */
 		cause = "invalid-HMAC";
@@ -461,9 +466,12 @@ out:
 				status = INTEGRITY_PASS;
 		}
 
-		/* Permit new files with file signatures, but without data. */
+		/*
+		 * Permit new files with file/EVM portable signatures, but
+		 * without data.
+		 */
 		if (inode->i_size == 0 && iint->flags & IMA_NEW_FILE &&
-		    xattr_value && xattr_value->type == EVM_IMA_XATTR_DIGSIG) {
+		    test_bit(IMA_DIGSIG, &iint->atomic_flags)) {
 			status = INTEGRITY_PASS;
 		}
 
@@ -581,6 +589,8 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
 		if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
 			return -EINVAL;
 		digsig = (xvalue->type == EVM_IMA_XATTR_DIGSIG);
+	} else if (!strcmp(xattr_name, XATTR_NAME_EVM) && xattr_value_len > 0) {
+		digsig = (xvalue->type == EVM_XATTR_PORTABLE_DIGSIG);
 	}
 	if (result == 1 || evm_revalidate_status(xattr_name)) {
 		ima_reset_appraise_flags(d_backing_inode(dentry), digsig);
-- 
GitLab


From 026d7fc92a9d629630779c999fe49ecae93f9d63 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:52 +0200
Subject: [PATCH 1941/3804] ima: Introduce template field evmsig and write to
 field sig as fallback

With the patch to accept EVM portable signatures when the
appraise_type=imasig requirement is specified in the policy, appraisal can
be successfully done even if the file does not have an IMA signature.

However, remote attestation would not see that a different signature type
was used, as only IMA signatures can be included in the measurement list.
This patch solves the issue by introducing the new template field 'evmsig'
to show EVM portable signatures and by including its value in the existing
field 'sig' if the IMA signature is not found.

Suggested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/security/IMA-templates.rst  |  4 ++-
 security/integrity/ima/ima_template.c     |  2 ++
 security/integrity/ima/ima_template_lib.c | 33 ++++++++++++++++++++++-
 security/integrity/ima/ima_template_lib.h |  2 ++
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index c5a8432972ef1..9f3e86ab028a4 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -70,9 +70,11 @@ descriptors by adding their identifier to the format string
    prefix is shown only if the hash algorithm is not SHA1 or MD5);
  - 'd-modsig': the digest of the event without the appended modsig;
  - 'n-ng': the name of the event, without size limitations;
- - 'sig': the file signature;
+ - 'sig': the file signature, or the EVM portable signature if the file
+   signature is not found;
  - 'modsig' the appended file signature;
  - 'buf': the buffer data that was used to generate the hash without size limitations;
+ - 'evmsig': the EVM portable signature;
 
 
 Below, there is the list of defined template descriptors:
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index 4e081e6500476..7a60848c04a54 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -45,6 +45,8 @@ static const struct ima_template_field supported_fields[] = {
 	 .field_show = ima_show_template_digest_ng},
 	{.field_id = "modsig", .field_init = ima_eventmodsig_init,
 	 .field_show = ima_show_template_sig},
+	{.field_id = "evmsig", .field_init = ima_eventevmsig_init,
+	 .field_show = ima_show_template_sig},
 };
 
 /*
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index c022ee9e2a4e6..4314d9a3514c1 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -10,6 +10,7 @@
  */
 
 #include "ima_template_lib.h"
+#include <linux/xattr.h>
 
 static bool ima_template_hash_algo_allowed(u8 algo)
 {
@@ -438,7 +439,7 @@ int ima_eventsig_init(struct ima_event_data *event_data,
 	struct evm_ima_xattr_data *xattr_value = event_data->xattr_value;
 
 	if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG))
-		return 0;
+		return ima_eventevmsig_init(event_data, field_data);
 
 	return ima_write_template_field_data(xattr_value, event_data->xattr_len,
 					     DATA_FMT_HEX, field_data);
@@ -484,3 +485,33 @@ int ima_eventmodsig_init(struct ima_event_data *event_data,
 	return ima_write_template_field_data(data, data_len, DATA_FMT_HEX,
 					     field_data);
 }
+
+/*
+ *  ima_eventevmsig_init - include the EVM portable signature as part of the
+ *  template data
+ */
+int ima_eventevmsig_init(struct ima_event_data *event_data,
+			 struct ima_field_data *field_data)
+{
+	struct evm_ima_xattr_data *xattr_data = NULL;
+	int rc = 0;
+
+	if (!event_data->file)
+		return 0;
+
+	rc = vfs_getxattr_alloc(&init_user_ns, file_dentry(event_data->file),
+				XATTR_NAME_EVM, (char **)&xattr_data, 0,
+				GFP_NOFS);
+	if (rc <= 0)
+		return 0;
+
+	if (xattr_data->type != EVM_XATTR_PORTABLE_DIGSIG) {
+		kfree(xattr_data);
+		return 0;
+	}
+
+	rc = ima_write_template_field_data((char *)xattr_data, rc, DATA_FMT_HEX,
+					   field_data);
+	kfree(xattr_data);
+	return rc;
+}
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h
index 6b3b880637a0c..f4b2a2056d1d5 100644
--- a/security/integrity/ima/ima_template_lib.h
+++ b/security/integrity/ima/ima_template_lib.h
@@ -46,4 +46,6 @@ int ima_eventbuf_init(struct ima_event_data *event_data,
 		      struct ima_field_data *field_data);
 int ima_eventmodsig_init(struct ima_event_data *event_data,
 			 struct ima_field_data *field_data);
+int ima_eventevmsig_init(struct ima_event_data *event_data,
+			 struct ima_field_data *field_data);
 #endif /* __LINUX_IMA_TEMPLATE_LIB_H */
-- 
GitLab


From ed1b472fc15aeaa20ddeeb93fd25190014e50d17 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 14 May 2021 17:27:53 +0200
Subject: [PATCH 1942/3804] ima: Don't remove security.ima if file must not be
 appraised

Files might come from a remote source and might have xattrs, including
security.ima. It should not be IMA task to decide whether security.ima
should be kept or not. This patch removes the removexattr() system
call in ima_inode_post_setattr().

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_appraise.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index d9a627de39303..940695e7b5356 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -532,8 +532,6 @@ void ima_inode_post_setattr(struct user_namespace *mnt_userns,
 		return;
 
 	action = ima_must_appraise(mnt_userns, inode, MAY_ACCESS, POST_SETATTR);
-	if (!action)
-		__vfs_removexattr(&init_user_ns, dentry, XATTR_NAME_IMA);
 	iint = integrity_iint_find(inode);
 	if (iint) {
 		set_bit(IMA_CHANGE_ATTR, &iint->atomic_flags);
-- 
GitLab


From f8849e206ef52b584cd9227255f4724f0cc900bb Mon Sep 17 00:00:00 2001
From: Dai Ngo <dai.ngo@oracle.com>
Date: Wed, 19 May 2021 17:15:10 -0400
Subject: [PATCH 1943/3804] NFSv4: nfs4_proc_set_acl needs to restore
 NFS_CAP_UIDGID_NOMAP on error.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently if __nfs4_proc_set_acl fails with NFS4ERR_BADOWNER it
re-enables the idmapper by clearing NFS_CAP_UIDGID_NOMAP before
retrying again. The NFS_CAP_UIDGID_NOMAP remains cleared even if
the retry fails. This causes problem for subsequent setattr
requests for v4 server that does not have idmapping configured.

This patch modifies nfs4_proc_set_acl to detect NFS4ERR_BADOWNER
and NFS4ERR_BADNAME and skips the retry, since the kernel isn't
involved in encoding the ACEs, and return -EINVAL.

Steps to reproduce the problem:

 # mount -o vers=4.1,sec=sys server:/export/test /tmp/mnt
 # touch /tmp/mnt/file1
 # chown 99 /tmp/mnt/file1
 # nfs4_setfacl -a A::unknown.user@xyz.com:wrtncy /tmp/mnt/file1
 Failed setxattr operation: Invalid argument
 # chown 99 /tmp/mnt/file1
 chown: changing ownership of ‘/tmp/mnt/file1’: Invalid argument
 # umount /tmp/mnt
 # mount -o vers=4.1,sec=sys server:/export/test /tmp/mnt
 # chown 99 /tmp/mnt/file1
 #

v2: detect NFS4ERR_BADOWNER and NFS4ERR_BADNAME and skip retry
       in nfs4_proc_set_acl.
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0cd9658822329..d671b2884d5ac 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5968,6 +5968,14 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 	do {
 		err = __nfs4_proc_set_acl(inode, buf, buflen);
 		trace_nfs4_set_acl(inode, err);
+		if (err == -NFS4ERR_BADOWNER || err == -NFS4ERR_BADNAME) {
+			/*
+			 * no need to retry since the kernel
+			 * isn't involved in encoding the ACEs.
+			 */
+			err = -EINVAL;
+			break;
+		}
 		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
 	} while (exception.retry);
-- 
GitLab


From 43de30d36742dbbde22f2ad526c3e5a403c271e2 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <sam.tebbs@arm.com>
Date: Thu, 27 May 2021 16:34:41 +0100
Subject: [PATCH 1944/3804] arm64: Import latest version of Cortex Strings'
 memcmp

Import the latest version of the former Cortex Strings - now
Arm Optimized Routines - memcmp function based on the upstream
code of string/aarch64/memcmp.S at commit e823e3a from
https://github.com/ARM-software/optimized-routines

Note that for simplicity Arm have chosen to contribute this code
to Linux under GPLv2 rather than the original MIT license.

Signed-off-by: Sam Tebbs <sam.tebbs@arm.com>
[ rm: update attribution and commit message ]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/2889de2d41054f3f508fb3addad784a3606ef383.1622128527.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/memcmp.S | 346 ++++++++++++++--------------------------
 1 file changed, 119 insertions(+), 227 deletions(-)

diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index c0671e793ea91..498f0d9941d91 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -1,247 +1,139 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2013-2020, Arm Limited.
  *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/memcmp.S
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-/*
-* compare memory areas(when two memory areas' offset are different,
-* alignment handled by the hardware)
-*
-* Parameters:
-*  x0 - const memory area 1 pointer
-*  x1 - const memory area 2 pointer
-*  x2 - the maximal compare byte length
-* Returns:
-*  x0 - a compare result, maybe less than, equal to, or greater than ZERO
-*/
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ */
+
+#define L(label) .L ## label
 
 /* Parameters and result.  */
-src1		.req	x0
-src2		.req	x1
-limit		.req	x2
-result		.req	x0
+#define src1		x0
+#define src2		x1
+#define limit		x2
+#define result		w0
 
 /* Internal variables.  */
-data1		.req	x3
-data1w		.req	w3
-data2		.req	x4
-data2w		.req	w4
-has_nul		.req	x5
-diff		.req	x6
-endloop		.req	x7
-tmp1		.req	x8
-tmp2		.req	x9
-tmp3		.req	x10
-pos		.req	x11
-limit_wd	.req	x12
-mask		.req	x13
+#define data1		x3
+#define data1w		w3
+#define data1h		x4
+#define data2		x5
+#define data2w		w5
+#define data2h		x6
+#define tmp1		x7
+#define tmp2		x8
 
 SYM_FUNC_START_WEAK_PI(memcmp)
-	cbz	limit, .Lret0
-	eor	tmp1, src1, src2
-	tst	tmp1, #7
-	b.ne	.Lmisaligned8
-	ands	tmp1, src1, #7
-	b.ne	.Lmutual_align
-	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
-	lsr	limit_wd, limit_wd, #3 /* Convert to Dwords.  */
-	/*
-	* The input source addresses are at alignment boundary.
-	* Directly compare eight bytes each time.
-	*/
-.Lloop_aligned:
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-.Lstart_realigned:
-	subs	limit_wd, limit_wd, #1
-	eor	diff, data1, data2	/* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, cs	/* Last Dword or differences.  */
-	cbz	endloop, .Lloop_aligned
-
-	/* Not reached the limit, must have found a diff.  */
-	tbz	limit_wd, #63, .Lnot_limit
-
-	/* Limit % 8 == 0 => the diff is in the last 8 bytes. */
-	ands	limit, limit, #7
-	b.eq	.Lnot_limit
-	/*
-	* The remained bytes less than 8. It is needed to extract valid data
-	* from last eight bytes of the intended memory range.
-	*/
-	lsl	limit, limit, #3	/* bytes-> bits.  */
-	mov	mask, #~0
-CPU_BE( lsr	mask, mask, limit )
-CPU_LE( lsl	mask, mask, limit )
-	bic	data1, data1, mask
-	bic	data2, data2, mask
-
-	orr	diff, diff, mask
-	b	.Lnot_limit
-
-.Lmutual_align:
-	/*
-	* Sources are mutually aligned, but are not currently at an
-	* alignment boundary. Round down the addresses and then mask off
-	* the bytes that precede the start point.
-	*/
-	bic	src1, src1, #7
-	bic	src2, src2, #7
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	/*
-	* We can not add limit with alignment offset(tmp1) here. Since the
-	* addition probably make the limit overflown.
-	*/
-	sub	limit_wd, limit, #1/*limit != 0, so no underflow.*/
-	and	tmp3, limit_wd, #7
-	lsr	limit_wd, limit_wd, #3
-	add	tmp3, tmp3, tmp1
-	add	limit_wd, limit_wd, tmp3, lsr #3
-	add	limit, limit, tmp1/* Adjust the limit for the extra.  */
-
-	lsl	tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/
-	neg	tmp1, tmp1/* Bits to alignment -64.  */
-	mov	tmp2, #~0
-	/*mask off the non-intended bytes before the start address.*/
-CPU_BE( lsl	tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/
-	/* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr	tmp2, tmp2, tmp1 )
-
-	orr	data1, data1, tmp2
-	orr	data2, data2, tmp2
-	b	.Lstart_realigned
-
-	/*src1 and src2 have different alignment offset.*/
-.Lmisaligned8:
-	cmp	limit, #8
-	b.lo	.Ltiny8proc /*limit < 8: compare byte by byte*/
-
-	and	tmp1, src1, #7
-	neg	tmp1, tmp1
-	add	tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/
-	and	tmp2, src2, #7
-	neg	tmp2, tmp2
-	add	tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/
-	subs	tmp3, tmp1, tmp2
-	csel	pos, tmp1, tmp2, hi /*Choose the maximum.*/
-
-	sub	limit, limit, pos
-	/*compare the proceeding bytes in the first 8 byte segment.*/
-.Ltinycmp:
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	pos, pos, #1
-	ccmp	data1w, data2w, #0, ne  /* NZCV = 0b0000.  */
-	b.eq	.Ltinycmp
-	cbnz	pos, 1f /*diff occurred before the last byte.*/
-	cmp	data1w, data2w
-	b.eq	.Lstart_align
-1:
-	sub	result, data1, data2
+	subs	limit, limit, 8
+	b.lo	L(less8)
+
+	ldr	data1, [src1], 8
+	ldr	data2, [src2], 8
+	cmp	data1, data2
+	b.ne	L(return)
+
+	subs	limit, limit, 8
+	b.gt	L(more16)
+
+	ldr	data1, [src1, limit]
+	ldr	data2, [src2, limit]
+	b	L(return)
+
+L(more16):
+	ldr	data1, [src1], 8
+	ldr	data2, [src2], 8
+	cmp	data1, data2
+	bne	L(return)
+
+	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
+	   strings.  */
+	subs	limit, limit, 16
+	b.ls	L(last_bytes)
+
+	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
+	   try to align, so limit it only to strings larger than 128 bytes.  */
+	cmp	limit, 96
+	b.ls	L(loop16)
+
+	/* Align src1 and adjust src2 with bytes not yet done.  */
+	and	tmp1, src1, 15
+	add	limit, limit, tmp1
+	sub	src1, src1, tmp1
+	sub	src2, src2, tmp1
+
+	/* Loop performing 16 bytes per iteration using aligned src1.
+	   Limit is pre-decremented by 16 and must be larger than zero.
+	   Exit if <= 16 bytes left to do or if the data is not equal.  */
+	.p2align 4
+L(loop16):
+	ldp	data1, data1h, [src1], 16
+	ldp	data2, data2h, [src2], 16
+	subs	limit, limit, 16
+	ccmp	data1, data2, 0, hi
+	ccmp	data1h, data2h, 0, eq
+	b.eq	L(loop16)
+
+	cmp	data1, data2
+	bne	L(return)
+	mov	data1, data1h
+	mov	data2, data2h
+	cmp	data1, data2
+	bne	L(return)
+
+	/* Compare last 1-16 bytes using unaligned access.  */
+L(last_bytes):
+	add	src1, src1, limit
+	add	src2, src2, limit
+	ldp	data1, data1h, [src1]
+	ldp	data2, data2h, [src2]
+	cmp	data1, data2
+	bne	L(return)
+	mov	data1, data1h
+	mov	data2, data2h
+	cmp	data1, data2
+
+	/* Compare data bytes and set return value to 0, -1 or 1.  */
+L(return):
+#ifndef __AARCH64EB__
+	rev	data1, data1
+	rev	data2, data2
+#endif
+	cmp	data1, data2
+L(ret_eq):
+	cset	result, ne
+	cneg	result, result, lo
 	ret
 
-.Lstart_align:
-	lsr	limit_wd, limit, #3
-	cbz	limit_wd, .Lremain8
-
-	ands	xzr, src1, #7
-	b.eq	.Lrecal_offset
-	/*process more leading bytes to make src1 aligned...*/
-	add	src1, src1, tmp3 /*backwards src1 to alignment boundary*/
-	add	src2, src2, tmp3
-	sub	limit, limit, tmp3
-	lsr	limit_wd, limit, #3
-	cbz	limit_wd, .Lremain8
-	/*load 8 bytes from aligned SRC1..*/
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-
-	subs	limit_wd, limit_wd, #1
-	eor	diff, data1, data2  /*Non-zero if differences found.*/
-	csinv	endloop, diff, xzr, ne
-	cbnz	endloop, .Lunequal_proc
-	/*How far is the current SRC2 from the alignment boundary...*/
-	and	tmp3, tmp3, #7
-
-.Lrecal_offset:/*src1 is aligned now..*/
-	neg	pos, tmp3
-.Lloopcmp_proc:
-	/*
-	* Divide the eight bytes into two parts. First,backwards the src2
-	* to an alignment boundary,load eight bytes and compare from
-	* the SRC2 alignment boundary. If all 8 bytes are equal,then start
-	* the second part's comparison. Otherwise finish the comparison.
-	* This special handle can garantee all the accesses are in the
-	* thread/task space in avoid to overrange access.
-	*/
-	ldr	data1, [src1,pos]
-	ldr	data2, [src2,pos]
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	cbnz	diff, .Lnot_limit
-
-	/*The second part process*/
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	subs	limit_wd, limit_wd, #1
-	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
-	cbz	endloop, .Lloopcmp_proc
-.Lunequal_proc:
-	cbz	diff, .Lremain8
-
-/* There is difference occurred in the latest comparison. */
-.Lnot_limit:
-/*
-* For little endian,reverse the low significant equal bits into MSB,then
-* following CLZ can find how many equal bits exist.
-*/
-CPU_LE( rev	diff, diff )
-CPU_LE( rev	data1, data1 )
-CPU_LE( rev	data2, data2 )
-
-	/*
-	* The MS-non-zero bit of DIFF marks either the first bit
-	* that is different, or the end of the significant data.
-	* Shifting left now will bring the critical information into the
-	* top bits.
-	*/
-	clz	pos, diff
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/*
-	* We need to zero-extend (char is unsigned) the value and then
-	* perform a signed subtraction.
-	*/
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
+	.p2align 4
+	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
+L(less8):
+	adds	limit, limit, 4
+	b.lo	L(less4)
+	ldr	data1w, [src1], 4
+	ldr	data2w, [src2], 4
+	cmp	data1w, data2w
+	b.ne	L(return)
+	sub	limit, limit, 4
+L(less4):
+	adds	limit, limit, 4
+	beq	L(ret_eq)
+L(byte_loop):
+	ldrb	data1w, [src1], 1
+	ldrb	data2w, [src2], 1
+	subs	limit, limit, 1
+	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
+	b.eq	L(byte_loop)
+	sub	result, data1w, data2w
 	ret
 
-.Lremain8:
-	/* Limit % 8 == 0 =>. all data are equal.*/
-	ands	limit, limit, #7
-	b.eq	.Lret0
-
-.Ltiny8proc:
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	limit, limit, #1
-
-	ccmp	data1w, data2w, #0, ne  /* NZCV = 0b0000. */
-	b.eq	.Ltiny8proc
-	sub	result, data1, data2
-	ret
-.Lret0:
-	mov	result, #0
-	ret
 SYM_FUNC_END_PI(memcmp)
 EXPORT_SYMBOL_NOKASAN(memcmp)
-- 
GitLab


From 758602c04409d8c5a092cef570b2de125ce0f2ae Mon Sep 17 00:00:00 2001
From: Sam Tebbs <sam.tebbs@arm.com>
Date: Thu, 27 May 2021 16:34:42 +0100
Subject: [PATCH 1945/3804] arm64: Import latest version of Cortex Strings'
 strcmp

Import the latest version of the former Cortex Strings - now
Arm Optimized Routines - strcmp function based on the upstream
code of string/aarch64/strcmp.S at commit afd6244 from
https://github.com/ARM-software/optimized-routines

Note that for simplicity Arm have chosen to contribute this code
to Linux under GPLv2 rather than the original MIT license.

Signed-off-by: Sam Tebbs <sam.tebbs@arm.com>
[ rm: update attribution and commit message ]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/0fe90c90b96b569fbdfd46e47bd1298abb02079e.1622128527.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/strcmp.S | 289 +++++++++++++++++-----------------------
 1 file changed, 121 insertions(+), 168 deletions(-)

diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 4e79566726c87..e82ccb6c2f931 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -1,84 +1,123 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2012-2020, Arm Limited.
  *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/strcmp.S
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-/*
- * compare two strings
+/* Assumptions:
  *
- * Parameters:
- *	x0 - const string 1 pointer
- *    x1 - const string 2 pointer
- * Returns:
- * x0 - an integer less than, equal to, or greater than zero
- * if  s1  is  found, respectively, to be less than, to match,
- * or be greater than s2.
+ * ARMv8-a, AArch64
  */
 
+#define L(label) .L ## label
+
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
 /* Parameters and result.  */
-src1		.req	x0
-src2		.req	x1
-result		.req	x0
+#define src1		x0
+#define src2		x1
+#define result		x0
 
 /* Internal variables.  */
-data1		.req	x2
-data1w		.req	w2
-data2		.req	x3
-data2w		.req	w3
-has_nul		.req	x4
-diff		.req	x5
-syndrome	.req	x6
-tmp1		.req	x7
-tmp2		.req	x8
-tmp3		.req	x9
-zeroones	.req	x10
-pos		.req	x11
-
+#define data1		x2
+#define data1w		w2
+#define data2		x3
+#define data2w		w3
+#define has_nul		x4
+#define diff		x5
+#define syndrome	x6
+#define tmp1		x7
+#define tmp2		x8
+#define tmp3		x9
+#define zeroones	x10
+#define pos		x11
+
+	/* Start of performance-critical section  -- one 64B cache line.  */
+	.align 6
 SYM_FUNC_START_WEAK_PI(strcmp)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	tst	tmp1, #7
-	b.ne	.Lmisaligned8
+	b.ne	L(misaligned8)
 	ands	tmp1, src1, #7
-	b.ne	.Lmutual_align
-
-	/*
-	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	* can be done in parallel across the entire word.
-	*/
-.Lloop_aligned:
+	b.ne	L(mutual_align)
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+L(loop_aligned):
 	ldr	data1, [src1], #8
 	ldr	data2, [src2], #8
-.Lstart_realigned:
+L(start_realigned):
 	sub	tmp1, data1, zeroones
 	orr	tmp2, data1, #REP8_7f
 	eor	diff, data1, data2	/* Non-zero if differences found.  */
 	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
 	orr	syndrome, diff, has_nul
-	cbz	syndrome, .Lloop_aligned
-	b	.Lcal_cmpresult
+	cbz	syndrome, L(loop_aligned)
+	/* End of performance-critical section  -- one 64B cache line.  */
+
+L(end):
+#ifndef	__AARCH64EB__
+	rev	syndrome, syndrome
+	rev	data1, data1
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	clz	pos, syndrome
+	rev	data2, data2
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+#else
+	/* For big-endian we cannot use the trick with the syndrome value
+	   as carry-propagation can corrupt the upper bits if the trailing
+	   bytes in the string contain 0x01.  */
+	/* However, if there is no NUL byte in the dword, we can generate
+	   the result directly.  We can't just subtract the bytes as the
+	   MSB might be significant.  */
+	cbnz	has_nul, 1f
+	cmp	data1, data2
+	cset	result, ne
+	cneg	result, result, lo
+	ret
+1:
+	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
+	rev	tmp3, data1
+	sub	tmp1, tmp3, zeroones
+	orr	tmp2, tmp3, #REP8_7f
+	bic	has_nul, tmp1, tmp2
+	rev	has_nul, has_nul
+	orr	syndrome, diff, has_nul
+	clz	pos, syndrome
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+#endif
 
-.Lmutual_align:
-	/*
-	* Sources are mutually aligned, but are not currently at an
-	* alignment boundary.  Round down the addresses and then mask off
-	* the bytes that preceed the start point.
-	*/
+L(mutual_align):
+	/* Sources are mutually aligned, but are not currently at an
+	   alignment boundary.  Round down the addresses and then mask off
+	   the bytes that preceed the start point.  */
 	bic	src1, src1, #7
 	bic	src2, src2, #7
 	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
@@ -86,138 +125,52 @@ SYM_FUNC_START_WEAK_PI(strcmp)
 	neg	tmp1, tmp1		/* Bits to alignment -64.  */
 	ldr	data2, [src2], #8
 	mov	tmp2, #~0
+#ifdef __AARCH64EB__
 	/* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
+	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#else
 	/* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
-
+	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+#endif
 	orr	data1, data1, tmp2
 	orr	data2, data2, tmp2
-	b	.Lstart_realigned
-
-.Lmisaligned8:
-	/*
-	* Get the align offset length to compare per byte first.
-	* After this process, one string's address will be aligned.
-	*/
-	and	tmp1, src1, #7
-	neg	tmp1, tmp1
-	add	tmp1, tmp1, #8
-	and	tmp2, src2, #7
-	neg	tmp2, tmp2
-	add	tmp2, tmp2, #8
-	subs	tmp3, tmp1, tmp2
-	csel	pos, tmp1, tmp2, hi /*Choose the maximum. */
-.Ltinycmp:
+	b	L(start_realigned)
+
+L(misaligned8):
+	/* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
+	   checking to make sure that we don't access beyond page boundary in
+	   SRC2.  */
+	tst	src1, #7
+	b.eq	L(loop_misaligned)
+L(do_misaligned):
 	ldrb	data1w, [src1], #1
 	ldrb	data2w, [src2], #1
-	subs	pos, pos, #1
-	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
-	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
-	b.eq	.Ltinycmp
-	cbnz	pos, 1f /*find the null or unequal...*/
 	cmp	data1w, #1
-	ccmp	data1w, data2w, #0, cs
-	b.eq	.Lstart_align /*the last bytes are equal....*/
-1:
-	sub	result, data1, data2
-	ret
-
-.Lstart_align:
-	ands	xzr, src1, #7
-	b.eq	.Lrecal_offset
-	/*process more leading bytes to make str1 aligned...*/
-	add	src1, src1, tmp3
-	add	src2, src2, tmp3
-	/*load 8 bytes from aligned str1 and non-aligned str2..*/
+	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	b.ne	L(done)
+	tst	src1, #7
+	b.ne	L(do_misaligned)
+
+L(loop_misaligned):
+	/* Test if we are within the last dword of the end of a 4K page.  If
+	   yes then jump back to the misaligned loop to copy a byte at a time.  */
+	and	tmp1, src2, #0xff8
+	eor	tmp1, tmp1, #0xff8
+	cbz	tmp1, L(do_misaligned)
 	ldr	data1, [src1], #8
 	ldr	data2, [src2], #8
 
 	sub	tmp1, data1, zeroones
 	orr	tmp2, data1, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	eor	diff, data1, data2 /* Non-zero if differences found.  */
-	orr	syndrome, diff, has_nul
-	cbnz	syndrome, .Lcal_cmpresult
-	/*How far is the current str2 from the alignment boundary...*/
-	and	tmp3, tmp3, #7
-.Lrecal_offset:
-	neg	pos, tmp3
-.Lloopcmp_proc:
-	/*
-	* Divide the eight bytes into two parts. First,backwards the src2
-	* to an alignment boundary,load eight bytes from the SRC2 alignment
-	* boundary,then compare with the relative bytes from SRC1.
-	* If all 8 bytes are equal,then start the second part's comparison.
-	* Otherwise finish the comparison.
-	* This special handle can garantee all the accesses are in the
-	* thread/task space in avoid to overrange access.
-	*/
-	ldr	data1, [src1,pos]
-	ldr	data2, [src2,pos]
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	orr	syndrome, diff, has_nul
-	cbnz	syndrome, .Lcal_cmpresult
-
-	/*The second part process*/
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	bic	has_nul, tmp1, tmp2
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
 	orr	syndrome, diff, has_nul
-	cbz	syndrome, .Lloopcmp_proc
+	cbz	syndrome, L(loop_misaligned)
+	b	L(end)
 
-.Lcal_cmpresult:
-	/*
-	* reversed the byte-order as big-endian,then CLZ can find the most
-	* significant zero bits.
-	*/
-CPU_LE( rev	syndrome, syndrome )
-CPU_LE( rev	data1, data1 )
-CPU_LE( rev	data2, data2 )
-
-	/*
-	* For big-endian we cannot use the trick with the syndrome value
-	* as carry-propagation can corrupt the upper bits if the trailing
-	* bytes in the string contain 0x01.
-	* However, if there is no NUL byte in the dword, we can generate
-	* the result directly.  We cannot just subtract the bytes as the
-	* MSB might be significant.
-	*/
-CPU_BE( cbnz	has_nul, 1f )
-CPU_BE( cmp	data1, data2 )
-CPU_BE( cset	result, ne )
-CPU_BE( cneg	result, result, lo )
-CPU_BE( ret )
-CPU_BE( 1: )
-	/*Re-compute the NUL-byte detection, using a byte-reversed value. */
-CPU_BE(	rev	tmp3, data1 )
-CPU_BE(	sub	tmp1, tmp3, zeroones )
-CPU_BE(	orr	tmp2, tmp3, #REP8_7f )
-CPU_BE(	bic	has_nul, tmp1, tmp2 )
-CPU_BE(	rev	has_nul, has_nul )
-CPU_BE(	orr	syndrome, diff, has_nul )
-
-	clz	pos, syndrome
-	/*
-	* The MS-non-zero bit of the syndrome marks either the first bit
-	* that is different, or the top bit of the first zero byte.
-	* Shifting left now will bring the critical information into the
-	* top bits.
-	*/
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/*
-	* But we need to zero-extend (char is unsigned) the value and then
-	* perform a signed 32-bit subtraction.
-	*/
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
+L(done):
+	sub	result, data1, data2
 	ret
+
 SYM_FUNC_END_PI(strcmp)
 EXPORT_SYMBOL_NOKASAN(strcmp)
-- 
GitLab


From 325a1de81287a3d4ea2b8e6528a534c6c3a7c608 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <sam.tebbs@arm.com>
Date: Thu, 27 May 2021 16:34:43 +0100
Subject: [PATCH 1946/3804] arm64: Import updated version of Cortex Strings'
 strlen

Import an updated version of the former Cortex Strings - now Arm
Optimized Routines - strcmp function. The latest version introduces
Advanced SIMD usage which rules it out for our purposes, but we can
still pick an intermediate improvement from the previous version,
namely string/aarch64/strlen.S at commit 98e4d6a from
https://github.com/ARM-software/optimized-routines

Note that for simplicity Arm have chosen to contribute this code
to Linux under GPLv2 rather than the original MIT license.

Signed-off-by: Sam Tebbs <sam.tebbs@arm.com>
[ rm: update attribution and commit message ]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/32e3489398a24b23ae6e996935ac4818f8fd9dfd.1622128527.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/strlen.S | 258 +++++++++++++++++++++++++++-------------
 1 file changed, 173 insertions(+), 85 deletions(-)

diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index ee3ed882dd79f..b557185b54a59 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -1,115 +1,203 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2013, Arm Limited.
  *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/strlen.S
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-/*
- * calculate the length of a string
+/* Assumptions:
  *
- * Parameters:
- *	x0 - const string pointer
- * Returns:
- *	x0 - the return length of specific string
+ * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
  */
 
+#define L(label) .L ## label
+
 /* Arguments and results.  */
-srcin		.req	x0
-len		.req	x0
+#define srcin		x0
+#define len		x0
 
 /* Locals and temporaries.  */
-src		.req	x1
-data1		.req	x2
-data2		.req	x3
-data2a		.req	x4
-has_nul1	.req	x5
-has_nul2	.req	x6
-tmp1		.req	x7
-tmp2		.req	x8
-tmp3		.req	x9
-tmp4		.req	x10
-zeroones	.req	x11
-pos		.req	x12
+#define src		x1
+#define data1		x2
+#define data2		x3
+#define has_nul1	x4
+#define has_nul2	x5
+#define tmp1		x4
+#define tmp2		x5
+#define tmp3		x6
+#define tmp4		x7
+#define zeroones	x8
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word. A faster check
+	   (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
+	   false hits for characters 129..255.	*/
 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
+#define MIN_PAGE_SIZE 4096
+
+	/* Since strings are short on average, we check the first 16 bytes
+	   of the string for a NUL character.  In order to do an unaligned ldp
+	   safely we have to do a page cross check first.  If there is a NUL
+	   byte we calculate the length from the 2 8-byte words using
+	   conditional select to reduce branch mispredictions (it is unlikely
+	   strlen will be repeatedly called on strings with the same length).
+
+	   If the string is longer than 16 bytes, we align src so don't need
+	   further page cross checks, and process 32 bytes per iteration
+	   using the fast NUL check.  If we encounter non-ASCII characters,
+	   fallback to a second loop using the full NUL check.
+
+	   If the page cross check fails, we read 16 bytes from an aligned
+	   address, remove any characters before the string, and continue
+	   in the main loop using aligned loads.  Since strings crossing a
+	   page in the first 16 bytes are rare (probability of
+	   16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
+
+	   AArch64 systems have a minimum page size of 4k.  We don't bother
+	   checking for larger page sizes - the cost of setting up the correct
+	   page size is just not worth the extra gain from a small reduction in
+	   the cases taking the slow path.  Note that we only care about
+	   whether the first fetch, which may be misaligned, crosses a page
+	   boundary.  */
+
 SYM_FUNC_START_WEAK_PI(strlen)
-	mov	zeroones, #REP8_01
-	bic	src, srcin, #15
-	ands	tmp1, srcin, #15
-	b.ne	.Lmisaligned
-	/*
-	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	* can be done in parallel across the entire word.
-	*/
-	/*
-	* The inner loop deals with two Dwords at a time. This has a
-	* slightly higher start-up cost, but we should win quite quickly,
-	* especially on cores with a high number of issue slots per
-	* cycle, as we get much better parallelism out of the operations.
-	*/
-.Lloop:
-	ldp	data1, data2, [src], #16
-.Lrealigned:
+	and	tmp1, srcin, MIN_PAGE_SIZE - 1
+	mov	zeroones, REP8_01
+	cmp	tmp1, MIN_PAGE_SIZE - 16
+	b.gt	L(page_cross)
+	ldp	data1, data2, [srcin]
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul1/2 directly.
+	   Since we expect strings to be small and early-exit,
+	   byte-swap the data now so has_null1/2 will be correct.  */
+	rev	data1, data1
+	rev	data2, data2
+#endif
 	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
+	orr	tmp2, data1, REP8_7f
 	sub	tmp3, data2, zeroones
-	orr	tmp4, data2, #REP8_7f
-	bic	has_nul1, tmp1, tmp2
-	bics	has_nul2, tmp3, tmp4
-	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
-	b.eq	.Lloop
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(main_loop_entry)
+
+	/* Enter with C = has_nul1 == 0.  */
+	csel	has_nul1, has_nul1, has_nul2, cc
+	mov	len, 8
+	rev	has_nul1, has_nul1
+	clz	tmp1, has_nul1
+	csel	len, xzr, len, cc
+	add	len, len, tmp1, lsr 3
+	ret
 
+	/* The inner loop processes 32 bytes per iteration and uses the fast
+	   NUL check.  If we encounter non-ASCII characters, use a second
+	   loop with the accurate NUL check.  */
+	.p2align 4
+L(main_loop_entry):
+	bic	src, srcin, 15
+	sub	src, src, 16
+L(main_loop):
+	ldp	data1, data2, [src, 32]!
+L(page_cross_entry):
+	sub	tmp1, data1, zeroones
+	sub	tmp3, data2, zeroones
+	orr	tmp2, tmp1, tmp3
+	tst	tmp2, zeroones, lsl 7
+	bne	1f
+	ldp	data1, data2, [src, 16]
+	sub	tmp1, data1, zeroones
+	sub	tmp3, data2, zeroones
+	orr	tmp2, tmp1, tmp3
+	tst	tmp2, zeroones, lsl 7
+	beq	L(main_loop)
+	add	src, src, 16
+1:
+	/* The fast check failed, so do the slower, accurate NUL check.	 */
+	orr	tmp2, data1, REP8_7f
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(nonascii_loop)
+
+	/* Enter with C = has_nul1 == 0.  */
+L(tail):
+#ifdef __AARCH64EB__
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul1/2 directly.  The
+	   easiest way to get the correct byte is to byte-swap the data
+	   and calculate the syndrome a second time.  */
+	csel	data1, data1, data2, cc
+	rev	data1, data1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	bic	has_nul1, tmp1, tmp2
+#else
+	csel	has_nul1, has_nul1, has_nul2, cc
+#endif
 	sub	len, src, srcin
-	cbz	has_nul1, .Lnul_in_data2
-CPU_BE(	mov	data2, data1 )	/*prepare data to re-calculate the syndrome*/
-	sub	len, len, #8
-	mov	has_nul2, has_nul1
-.Lnul_in_data2:
-	/*
-	* For big-endian, carry propagation (if the final byte in the
-	* string is 0x01) means we cannot use has_nul directly.  The
-	* easiest way to get the correct byte is to byte-swap the data
-	* and calculate the syndrome a second time.
-	*/
-CPU_BE( rev	data2, data2 )
-CPU_BE( sub	tmp1, data2, zeroones )
-CPU_BE( orr	tmp2, data2, #REP8_7f )
-CPU_BE( bic	has_nul2, tmp1, tmp2 )
-
-	sub	len, len, #8
-	rev	has_nul2, has_nul2
-	clz	pos, has_nul2
-	add	len, len, pos, lsr #3		/* Bits to bytes.  */
+	rev	has_nul1, has_nul1
+	add	tmp2, len, 8
+	clz	tmp1, has_nul1
+	csel	len, len, tmp2, cc
+	add	len, len, tmp1, lsr 3
 	ret
 
-.Lmisaligned:
-	cmp	tmp1, #8
-	neg	tmp1, tmp1
-	ldp	data1, data2, [src], #16
-	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
-	mov	tmp2, #~0
-	/* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
+L(nonascii_loop):
+	ldp	data1, data2, [src, 16]!
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	bne	L(tail)
+	ldp	data1, data2, [src, 16]!
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(nonascii_loop)
+	b	L(tail)
+
+	/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
+	   srcin to 0x7f, so we ignore any NUL bytes before the string.
+	   Then continue in the aligned loop.  */
+L(page_cross):
+	bic	src, srcin, 15
+	ldp	data1, data2, [src]
+	lsl	tmp1, srcin, 3
+	mov	tmp4, -1
+#ifdef __AARCH64EB__
+	/* Big-endian.	Early bytes are at MSB.	 */
+	lsr	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
+#else
 	/* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr	tmp2, tmp2, tmp1 )	/* Shift (tmp1 & 63).  */
+	lsl	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
+#endif
+	orr	tmp1, tmp1, REP8_80
+	orn	data1, data1, tmp1
+	orn	tmp2, data2, tmp1
+	tst	srcin, 8
+	csel	data1, data1, tmp4, eq
+	csel	data2, data2, tmp2, eq
+	b	L(page_cross_entry)
 
-	orr	data1, data1, tmp2
-	orr	data2a, data2, tmp2
-	csinv	data1, data1, xzr, le
-	csel	data2, data2, data2a, le
-	b	.Lrealigned
 SYM_FUNC_END_PI(strlen)
 EXPORT_SYMBOL_NOKASAN(strlen)
-- 
GitLab


From 020b199bc70d98d92e1bbc6a71358d7293ebc5ea Mon Sep 17 00:00:00 2001
From: Sam Tebbs <sam.tebbs@arm.com>
Date: Thu, 27 May 2021 16:34:44 +0100
Subject: [PATCH 1947/3804] arm64: Import latest version of Cortex Strings'
 strncmp

Import the latest version of the former Cortex Strings - now
Arm Optimized Routines - strncmp function based on the upstream
code of string/aarch64/strncmp.S at commit e823e3a from
https://github.com/ARM-software/optimized-routines

Note that for simplicity Arm have chosen to contribute this code
to Linux under GPLv2 rather than the original MIT license.

Signed-off-by: Sam Tebbs <sam.tebbs@arm.com>
[ rm: update attribution and commit message ]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/26110bee02ad360596c9a7536af7eaaf6890d0e8.1622128527.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/strncmp.S | 406 ++++++++++++++++++---------------------
 1 file changed, 184 insertions(+), 222 deletions(-)

diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index 2a7ee949ed471..0c0bf5462de05 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -1,299 +1,261 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2013, Arm Limited.
  *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/strncmp.S
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 
-/*
- * compare two strings
+/* Assumptions:
  *
- * Parameters:
- *  x0 - const string 1 pointer
- *  x1 - const string 2 pointer
- *  x2 - the maximal length to be compared
- * Returns:
- *  x0 - an integer less than, equal to, or greater than zero if s1 is found,
- *     respectively, to be less than, to match, or be greater than s2.
+ * ARMv8-a, AArch64
  */
 
+#define L(label) .L ## label
+
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
 /* Parameters and result.  */
-src1		.req	x0
-src2		.req	x1
-limit		.req	x2
-result		.req	x0
+#define src1		x0
+#define src2		x1
+#define limit		x2
+#define result		x0
 
 /* Internal variables.  */
-data1		.req	x3
-data1w		.req	w3
-data2		.req	x4
-data2w		.req	w4
-has_nul		.req	x5
-diff		.req	x6
-syndrome	.req	x7
-tmp1		.req	x8
-tmp2		.req	x9
-tmp3		.req	x10
-zeroones	.req	x11
-pos		.req	x12
-limit_wd	.req	x13
-mask		.req	x14
-endloop		.req	x15
+#define data1		x3
+#define data1w		w3
+#define data2		x4
+#define data2w		w4
+#define has_nul		x5
+#define diff		x6
+#define syndrome	x7
+#define tmp1		x8
+#define tmp2		x9
+#define tmp3		x10
+#define zeroones	x11
+#define pos		x12
+#define limit_wd	x13
+#define mask		x14
+#define endloop		x15
+#define count		mask
 
 SYM_FUNC_START_WEAK_PI(strncmp)
-	cbz	limit, .Lret0
+	cbz	limit, L(ret0)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
 	tst	tmp1, #7
-	b.ne	.Lmisaligned8
-	ands	tmp1, src1, #7
-	b.ne	.Lmutual_align
+	and	count, src1, #7
+	b.ne	L(misaligned8)
+	cbnz	count, L(mutual_align)
 	/* Calculate the number of full and partial words -1.  */
-	/*
-	* when limit is mulitply of 8, if not sub 1,
-	* the judgement of last dword will wrong.
-	*/
-	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
-	lsr	limit_wd, limit_wd, #3  /* Convert to Dwords.  */
+	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
+	lsr	limit_wd, limit_wd, #3	/* Convert to Dwords.  */
 
-	/*
-	* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	* (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	* can be done in parallel across the entire word.
-	*/
-.Lloop_aligned:
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word.  */
+	.p2align 4
+L(loop_aligned):
 	ldr	data1, [src1], #8
 	ldr	data2, [src2], #8
-.Lstart_realigned:
+L(start_realigned):
 	subs	limit_wd, limit_wd, #1
 	sub	tmp1, data1, zeroones
 	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, pl  /* Last Dword or differences.*/
-	bics	has_nul, tmp1, tmp2 /* Non-zero if NUL terminator.  */
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, pl	/* Last Dword or differences.  */
+	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
 	ccmp	endloop, #0, #0, eq
-	b.eq	.Lloop_aligned
+	b.eq	L(loop_aligned)
+	/* End of main loop */
 
-	/*Not reached the limit, must have found the end or a diff.  */
-	tbz	limit_wd, #63, .Lnot_limit
+	/* Not reached the limit, must have found the end or a diff.  */
+	tbz	limit_wd, #63, L(not_limit)
 
 	/* Limit % 8 == 0 => all bytes significant.  */
 	ands	limit, limit, #7
-	b.eq	.Lnot_limit
+	b.eq	L(not_limit)
 
-	lsl	limit, limit, #3    /* Bits -> bytes.  */
+	lsl	limit, limit, #3	/* Bits -> bytes.  */
 	mov	mask, #~0
-CPU_BE( lsr	mask, mask, limit )
-CPU_LE( lsl	mask, mask, limit )
+#ifdef __AARCH64EB__
+	lsr	mask, mask, limit
+#else
+	lsl	mask, mask, limit
+#endif
 	bic	data1, data1, mask
 	bic	data2, data2, mask
 
 	/* Make sure that the NUL byte is marked in the syndrome.  */
 	orr	has_nul, has_nul, mask
 
-.Lnot_limit:
+L(not_limit):
 	orr	syndrome, diff, has_nul
-	b	.Lcal_cmpresult
 
-.Lmutual_align:
-	/*
-	* Sources are mutually aligned, but are not currently at an
-	* alignment boundary.  Round down the addresses and then mask off
-	* the bytes that precede the start point.
-	* We also need to adjust the limit calculations, but without
-	* overflowing if the limit is near ULONG_MAX.
-	*/
+#ifndef	__AARCH64EB__
+	rev	syndrome, syndrome
+	rev	data1, data1
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	clz	pos, syndrome
+	rev	data2, data2
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+#else
+	/* For big-endian we cannot use the trick with the syndrome value
+	   as carry-propagation can corrupt the upper bits if the trailing
+	   bytes in the string contain 0x01.  */
+	/* However, if there is no NUL byte in the dword, we can generate
+	   the result directly.  We can't just subtract the bytes as the
+	   MSB might be significant.  */
+	cbnz	has_nul, 1f
+	cmp	data1, data2
+	cset	result, ne
+	cneg	result, result, lo
+	ret
+1:
+	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */
+	rev	tmp3, data1
+	sub	tmp1, tmp3, zeroones
+	orr	tmp2, tmp3, #REP8_7f
+	bic	has_nul, tmp1, tmp2
+	rev	has_nul, has_nul
+	orr	syndrome, diff, has_nul
+	clz	pos, syndrome
+	/* The MS-non-zero bit of the syndrome marks either the first bit
+	   that is different, or the top bit of the first zero byte.
+	   Shifting left now will bring the critical information into the
+	   top bits.  */
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/* But we need to zero-extend (char is unsigned) the value and then
+	   perform a signed 32-bit subtraction.  */
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+#endif
+
+L(mutual_align):
+	/* Sources are mutually aligned, but are not currently at an
+	   alignment boundary.  Round down the addresses and then mask off
+	   the bytes that precede the start point.
+	   We also need to adjust the limit calculations, but without
+	   overflowing if the limit is near ULONG_MAX.  */
 	bic	src1, src1, #7
 	bic	src2, src2, #7
 	ldr	data1, [src1], #8
-	neg	tmp3, tmp1, lsl #3  /* 64 - bits(bytes beyond align). */
+	neg	tmp3, count, lsl #3	/* 64 - bits(bytes beyond align). */
 	ldr	data2, [src2], #8
 	mov	tmp2, #~0
-	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
+	sub	limit_wd, limit, #1	/* limit != 0, so no underflow.  */
+#ifdef __AARCH64EB__
 	/* Big-endian.  Early bytes are at MSB.  */
-CPU_BE( lsl	tmp2, tmp2, tmp3 )	/* Shift (tmp1 & 63).  */
+	lsl	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
+#else
 	/* Little-endian.  Early bytes are at LSB.  */
-CPU_LE( lsr	tmp2, tmp2, tmp3 )	/* Shift (tmp1 & 63).  */
-
+	lsr	tmp2, tmp2, tmp3	/* Shift (count & 63).  */
+#endif
 	and	tmp3, limit_wd, #7
 	lsr	limit_wd, limit_wd, #3
-	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.*/
-	add	limit, limit, tmp1
-	add	tmp3, tmp3, tmp1
+	/* Adjust the limit. Only low 3 bits used, so overflow irrelevant.  */
+	add	limit, limit, count
+	add	tmp3, tmp3, count
 	orr	data1, data1, tmp2
 	orr	data2, data2, tmp2
 	add	limit_wd, limit_wd, tmp3, lsr #3
-	b	.Lstart_realigned
+	b	L(start_realigned)
+
+	.p2align 4
+	/* Don't bother with dwords for up to 16 bytes.  */
+L(misaligned8):
+	cmp	limit, #16
+	b.hs	L(try_misaligned_words)
 
-/*when src1 offset is not equal to src2 offset...*/
-.Lmisaligned8:
-	cmp	limit, #8
-	b.lo	.Ltiny8proc /*limit < 8... */
-	/*
-	* Get the align offset length to compare per byte first.
-	* After this process, one string's address will be aligned.*/
-	and	tmp1, src1, #7
-	neg	tmp1, tmp1
-	add	tmp1, tmp1, #8
-	and	tmp2, src2, #7
-	neg	tmp2, tmp2
-	add	tmp2, tmp2, #8
-	subs	tmp3, tmp1, tmp2
-	csel	pos, tmp1, tmp2, hi /*Choose the maximum. */
-	/*
-	* Here, limit is not less than 8, so directly run .Ltinycmp
-	* without checking the limit.*/
-	sub	limit, limit, pos
-.Ltinycmp:
+L(byte_loop):
+	/* Perhaps we can do better than this.  */
 	ldrb	data1w, [src1], #1
 	ldrb	data2w, [src2], #1
-	subs	pos, pos, #1
-	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
-	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
-	b.eq	.Ltinycmp
-	cbnz	pos, 1f /*find the null or unequal...*/
-	cmp	data1w, #1
-	ccmp	data1w, data2w, #0, cs
-	b.eq	.Lstart_align /*the last bytes are equal....*/
-1:
+	subs	limit, limit, #1
+	ccmp	data1w, #1, #0, hi	/* NZCV = 0b0000.  */
+	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	b.eq	L(byte_loop)
+L(done):
 	sub	result, data1, data2
 	ret
-
-.Lstart_align:
+	/* Align the SRC1 to a dword by doing a bytewise compare and then do
+	   the dword loop.  */
+L(try_misaligned_words):
 	lsr	limit_wd, limit, #3
-	cbz	limit_wd, .Lremain8
-	/*process more leading bytes to make str1 aligned...*/
-	ands	xzr, src1, #7
-	b.eq	.Lrecal_offset
-	add	src1, src1, tmp3	/*tmp3 is positive in this branch.*/
-	add	src2, src2, tmp3
-	ldr	data1, [src1], #8
-	ldr	data2, [src2], #8
+	cbz	count, L(do_misaligned)
 
-	sub	limit, limit, tmp3
+	neg	count, count
+	and	count, count, #7
+	sub	limit, limit, count
 	lsr	limit_wd, limit, #3
-	subs	limit_wd, limit_wd, #1
 
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
-	bics	has_nul, tmp1, tmp2
-	ccmp	endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
-	b.ne	.Lunequal_proc
-	/*How far is the current str2 from the alignment boundary...*/
-	and	tmp3, tmp3, #7
-.Lrecal_offset:
-	neg	pos, tmp3
-.Lloopcmp_proc:
-	/*
-	* Divide the eight bytes into two parts. First,backwards the src2
-	* to an alignment boundary,load eight bytes from the SRC2 alignment
-	* boundary,then compare with the relative bytes from SRC1.
-	* If all 8 bytes are equal,then start the second part's comparison.
-	* Otherwise finish the comparison.
-	* This special handle can garantee all the accesses are in the
-	* thread/task space in avoid to overrange access.
-	*/
-	ldr	data1, [src1,pos]
-	ldr	data2, [src2,pos]
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	bics	has_nul, tmp1, tmp2 /* Non-zero if NUL terminator.  */
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, eq
-	cbnz	endloop, .Lunequal_proc
+L(page_end_loop):
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	cmp	data1w, #1
+	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */
+	b.ne	L(done)
+	subs	count, count, #1
+	b.hi	L(page_end_loop)
+
+L(do_misaligned):
+	/* Prepare ourselves for the next page crossing.  Unlike the aligned
+	   loop, we fetch 1 less dword because we risk crossing bounds on
+	   SRC2.  */
+	mov	count, #8
+	subs	limit_wd, limit_wd, #1
+	b.lo	L(done_loop)
+L(loop_misaligned):
+	and	tmp2, src2, #0xff8
+	eor	tmp2, tmp2, #0xff8
+	cbz	tmp2, L(page_end_loop)
 
-	/*The second part process*/
 	ldr	data1, [src1], #8
 	ldr	data2, [src2], #8
-	subs	limit_wd, limit_wd, #1
 	sub	tmp1, data1, zeroones
 	orr	tmp2, data1, #REP8_7f
-	eor	diff, data1, data2  /* Non-zero if differences found.  */
-	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
-	bics	has_nul, tmp1, tmp2
-	ccmp	endloop, #0, #0, eq /*has_null is ZERO: no null byte*/
-	b.eq	.Lloopcmp_proc
-
-.Lunequal_proc:
-	orr	syndrome, diff, has_nul
-	cbz	syndrome, .Lremain8
-.Lcal_cmpresult:
-	/*
-	* reversed the byte-order as big-endian,then CLZ can find the most
-	* significant zero bits.
-	*/
-CPU_LE( rev	syndrome, syndrome )
-CPU_LE( rev	data1, data1 )
-CPU_LE( rev	data2, data2 )
-	/*
-	* For big-endian we cannot use the trick with the syndrome value
-	* as carry-propagation can corrupt the upper bits if the trailing
-	* bytes in the string contain 0x01.
-	* However, if there is no NUL byte in the dword, we can generate
-	* the result directly.  We can't just subtract the bytes as the
-	* MSB might be significant.
-	*/
-CPU_BE( cbnz	has_nul, 1f )
-CPU_BE( cmp	data1, data2 )
-CPU_BE( cset	result, ne )
-CPU_BE( cneg	result, result, lo )
-CPU_BE( ret )
-CPU_BE( 1: )
-	/* Re-compute the NUL-byte detection, using a byte-reversed value.*/
-CPU_BE( rev	tmp3, data1 )
-CPU_BE( sub	tmp1, tmp3, zeroones )
-CPU_BE( orr	tmp2, tmp3, #REP8_7f )
-CPU_BE( bic	has_nul, tmp1, tmp2 )
-CPU_BE( rev	has_nul, has_nul )
-CPU_BE( orr	syndrome, diff, has_nul )
-	/*
-	* The MS-non-zero bit of the syndrome marks either the first bit
-	* that is different, or the top bit of the first zero byte.
-	* Shifting left now will bring the critical information into the
-	* top bits.
-	*/
-	clz	pos, syndrome
-	lsl	data1, data1, pos
-	lsl	data2, data2, pos
-	/*
-	* But we need to zero-extend (char is unsigned) the value and then
-	* perform a signed 32-bit subtraction.
-	*/
-	lsr	data1, data1, #56
-	sub	result, data1, data2, lsr #56
-	ret
-
-.Lremain8:
-	/* Limit % 8 == 0 => all bytes significant.  */
-	ands	limit, limit, #7
-	b.eq	.Lret0
-.Ltiny8proc:
-	ldrb	data1w, [src1], #1
-	ldrb	data2w, [src2], #1
-	subs	limit, limit, #1
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	ccmp	diff, #0, #0, eq
+	b.ne	L(not_limit)
+	subs	limit_wd, limit_wd, #1
+	b.pl	L(loop_misaligned)
 
-	ccmp	data1w, #1, #0, ne  /* NZCV = 0b0000.  */
-	ccmp	data1w, data2w, #0, cs  /* NZCV = 0b0000.  */
-	b.eq	.Ltiny8proc
-	sub	result, data1, data2
-	ret
+L(done_loop):
+	/* We found a difference or a NULL before the limit was reached.  */
+	and	limit, limit, #7
+	cbz	limit, L(not_limit)
+	/* Read the last word.  */
+	sub	src1, src1, 8
+	sub	src2, src2, 8
+	ldr	data1, [src1, limit]
+	ldr	data2, [src2, limit]
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, #REP8_7f
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	bics	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */
+	ccmp	diff, #0, #0, eq
+	b.ne	L(not_limit)
 
-.Lret0:
+L(ret0):
 	mov	result, #0
 	ret
+
 SYM_FUNC_END_PI(strncmp)
 EXPORT_SYMBOL_NOKASAN(strncmp)
-- 
GitLab


From b6c4ea48415d26ec08fb67fbbd3eefdb1f96ffa6 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 27 May 2021 16:34:45 +0100
Subject: [PATCH 1948/3804] arm64: Add assembly annotations for weak-PI-alias
 madness

Add yet another set of assembly symbol annotations, this time for the
borderline-absurd situation of a function aliasing to a weak symbol
which itself also wants a position-independent alias.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/75545b3c4129b20b887474bb58a9cf302bf2132b.1622128527.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/linkage.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h
index ba89a9af820ab..9906541a68619 100644
--- a/arch/arm64/include/asm/linkage.h
+++ b/arch/arm64/include/asm/linkage.h
@@ -56,8 +56,16 @@
 		SYM_FUNC_START_ALIAS(__pi_##x);	\
 		SYM_FUNC_START_WEAK(x)
 
+#define SYM_FUNC_START_WEAK_ALIAS_PI(x)		\
+		SYM_FUNC_START_ALIAS(__pi_##x);	\
+		SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN)
+
 #define SYM_FUNC_END_PI(x)			\
 		SYM_FUNC_END(x);		\
 		SYM_FUNC_END_ALIAS(__pi_##x)
 
+#define SYM_FUNC_END_ALIAS_PI(x)		\
+		SYM_FUNC_END_ALIAS(x);		\
+		SYM_FUNC_END_ALIAS(__pi_##x)
+
 #endif
-- 
GitLab


From 285133040e6ce0e6f37db962f2b4dad10ea46da0 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 27 May 2021 16:34:46 +0100
Subject: [PATCH 1949/3804] arm64: Import latest memcpy()/memmove()
 implementation

Import the latest implementation of memcpy(), based on the
upstream code of string/aarch64/memcpy.S at commit afd6244 from
https://github.com/ARM-software/optimized-routines, and subsuming
memmove() in the process.

Note that for simplicity Arm have chosen to contribute this code
to Linux under GPLv2 rather than the original MIT license.

Note also that the needs of the usercopy routines vs. regular memcpy()
have now diverged so far that we abandon the shared template idea
and the damage which that incurred to the tuning of LDP/STP loops.
We'll be back to tackle those routines separately in future.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/3c953af43506581b2422f61952261e76949ba711.1622128527.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/Makefile  |   2 +-
 arch/arm64/lib/memcpy.S  | 272 ++++++++++++++++++++++++++++++++-------
 arch/arm64/lib/memmove.S | 189 ---------------------------
 3 files changed, 230 insertions(+), 233 deletions(-)
 delete mode 100644 arch/arm64/lib/memmove.S

diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index d31e1169d9b8e..01c596aa539c5 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 lib-y		:= clear_user.o delay.o copy_from_user.o		\
 		   copy_to_user.o copy_in_user.o copy_page.o		\
-		   clear_page.o csum.o memchr.o memcpy.o memmove.o	\
+		   clear_page.o csum.o memchr.o memcpy.o		\
 		   memset.o memcmp.o strcmp.o strncmp.o strlen.o	\
 		   strnlen.o strchr.o strrchr.o tishift.o
 
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index dc8d2a216a6e6..31073a8304fb6 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -1,66 +1,252 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
+ * Copyright (c) 2012-2020, Arm Limited.
  *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
+ * Adapted from the original at:
+ * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/memcpy.S
  */
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
-#include <asm/cache.h>
 
-/*
- * Copy a buffer from src to dest (alignment handled by the hardware)
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
  *
- * Parameters:
- *	x0 - dest
- *	x1 - src
- *	x2 - n
- * Returns:
- *	x0 - dest
  */
-	.macro ldrb1 reg, ptr, val
-	ldrb  \reg, [\ptr], \val
-	.endm
-
-	.macro strb1 reg, ptr, val
-	strb \reg, [\ptr], \val
-	.endm
 
-	.macro ldrh1 reg, ptr, val
-	ldrh  \reg, [\ptr], \val
-	.endm
+#define L(label) .L ## label
 
-	.macro strh1 reg, ptr, val
-	strh \reg, [\ptr], \val
-	.endm
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define dst	x3
+#define srcend	x4
+#define dstend	x5
+#define A_l	x6
+#define A_lw	w6
+#define A_h	x7
+#define B_l	x8
+#define B_lw	w8
+#define B_h	x9
+#define C_l	x10
+#define C_lw	w10
+#define C_h	x11
+#define D_l	x12
+#define D_h	x13
+#define E_l	x14
+#define E_h	x15
+#define F_l	x16
+#define F_h	x17
+#define G_l	count
+#define G_h	dst
+#define H_l	src
+#define H_h	srcend
+#define tmp1	x14
 
-	.macro ldr1 reg, ptr, val
-	ldr \reg, [\ptr], \val
-	.endm
+/* This implementation handles overlaps and supports both memcpy and memmove
+   from a single entry point.  It uses unaligned accesses and branchless
+   sequences to keep the code small, simple and improve performance.
 
-	.macro str1 reg, ptr, val
-	str \reg, [\ptr], \val
-	.endm
+   Copies are split into 3 main cases: small copies of up to 32 bytes, medium
+   copies of up to 128 bytes, and large copies.  The overhead of the overlap
+   check is negligible since it is only required for large copies.
 
-	.macro ldp1 reg1, reg2, ptr, val
-	ldp \reg1, \reg2, [\ptr], \val
-	.endm
-
-	.macro stp1 reg1, reg2, ptr, val
-	stp \reg1, \reg2, [\ptr], \val
-	.endm
+   Large copies use a software pipelined loop processing 64 bytes per iteration.
+   The destination pointer is 16-byte aligned to minimize unaligned accesses.
+   The loop tail is handled by always copying 64 bytes from the end.
+*/
 
+SYM_FUNC_START_ALIAS(__memmove)
+SYM_FUNC_START_WEAK_ALIAS_PI(memmove)
 SYM_FUNC_START_ALIAS(__memcpy)
 SYM_FUNC_START_WEAK_PI(memcpy)
-#include "copy_template.S"
+	add	srcend, src, count
+	add	dstend, dstin, count
+	cmp	count, 128
+	b.hi	L(copy_long)
+	cmp	count, 32
+	b.hi	L(copy32_128)
+
+	/* Small copies: 0..32 bytes.  */
+	cmp	count, 16
+	b.lo	L(copy16)
+	ldp	A_l, A_h, [src]
+	ldp	D_l, D_h, [srcend, -16]
+	stp	A_l, A_h, [dstin]
+	stp	D_l, D_h, [dstend, -16]
+	ret
+
+	/* Copy 8-15 bytes.  */
+L(copy16):
+	tbz	count, 3, L(copy8)
+	ldr	A_l, [src]
+	ldr	A_h, [srcend, -8]
+	str	A_l, [dstin]
+	str	A_h, [dstend, -8]
+	ret
+
+	.p2align 3
+	/* Copy 4-7 bytes.  */
+L(copy8):
+	tbz	count, 2, L(copy4)
+	ldr	A_lw, [src]
+	ldr	B_lw, [srcend, -4]
+	str	A_lw, [dstin]
+	str	B_lw, [dstend, -4]
+	ret
+
+	/* Copy 0..3 bytes using a branchless sequence.  */
+L(copy4):
+	cbz	count, L(copy0)
+	lsr	tmp1, count, 1
+	ldrb	A_lw, [src]
+	ldrb	C_lw, [srcend, -1]
+	ldrb	B_lw, [src, tmp1]
+	strb	A_lw, [dstin]
+	strb	B_lw, [dstin, tmp1]
+	strb	C_lw, [dstend, -1]
+L(copy0):
+	ret
+
+	.p2align 4
+	/* Medium copies: 33..128 bytes.  */
+L(copy32_128):
+	ldp	A_l, A_h, [src]
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [srcend, -32]
+	ldp	D_l, D_h, [srcend, -16]
+	cmp	count, 64
+	b.hi	L(copy128)
+	stp	A_l, A_h, [dstin]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstend, -32]
+	stp	D_l, D_h, [dstend, -16]
 	ret
+
+	.p2align 4
+	/* Copy 65..128 bytes.  */
+L(copy128):
+	ldp	E_l, E_h, [src, 32]
+	ldp	F_l, F_h, [src, 48]
+	cmp	count, 96
+	b.ls	L(copy96)
+	ldp	G_l, G_h, [srcend, -64]
+	ldp	H_l, H_h, [srcend, -48]
+	stp	G_l, G_h, [dstend, -64]
+	stp	H_l, H_h, [dstend, -48]
+L(copy96):
+	stp	A_l, A_h, [dstin]
+	stp	B_l, B_h, [dstin, 16]
+	stp	E_l, E_h, [dstin, 32]
+	stp	F_l, F_h, [dstin, 48]
+	stp	C_l, C_h, [dstend, -32]
+	stp	D_l, D_h, [dstend, -16]
+	ret
+
+	.p2align 4
+	/* Copy more than 128 bytes.  */
+L(copy_long):
+	/* Use backwards copy if there is an overlap.  */
+	sub	tmp1, dstin, src
+	cbz	tmp1, L(copy0)
+	cmp	tmp1, count
+	b.lo	L(copy_long_backwards)
+
+	/* Copy 16 bytes and then align dst to 16-byte alignment.  */
+
+	ldp	D_l, D_h, [src]
+	and	tmp1, dstin, 15
+	bic	dst, dstin, 15
+	sub	src, src, tmp1
+	add	count, count, tmp1	/* Count is now 16 too large.  */
+	ldp	A_l, A_h, [src, 16]
+	stp	D_l, D_h, [dstin]
+	ldp	B_l, B_h, [src, 32]
+	ldp	C_l, C_h, [src, 48]
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 128 + 16	/* Test and readjust count.  */
+	b.ls	L(copy64_from_end)
+
+L(loop64):
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [src, 16]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [src, 32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [src, 48]
+	stp	D_l, D_h, [dst, 64]!
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 64
+	b.hi	L(loop64)
+
+	/* Write the last iteration and copy 64 bytes from the end.  */
+L(copy64_from_end):
+	ldp	E_l, E_h, [srcend, -64]
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [srcend, -48]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [srcend, -16]
+	stp	D_l, D_h, [dst, 64]
+	stp	E_l, E_h, [dstend, -64]
+	stp	A_l, A_h, [dstend, -48]
+	stp	B_l, B_h, [dstend, -32]
+	stp	C_l, C_h, [dstend, -16]
+	ret
+
+	.p2align 4
+
+	/* Large backwards copy for overlapping copies.
+	   Copy 16 bytes and then align dst to 16-byte alignment.  */
+L(copy_long_backwards):
+	ldp	D_l, D_h, [srcend, -16]
+	and	tmp1, dstend, 15
+	sub	srcend, srcend, tmp1
+	sub	count, count, tmp1
+	ldp	A_l, A_h, [srcend, -16]
+	stp	D_l, D_h, [dstend, -16]
+	ldp	B_l, B_h, [srcend, -32]
+	ldp	C_l, C_h, [srcend, -48]
+	ldp	D_l, D_h, [srcend, -64]!
+	sub	dstend, dstend, tmp1
+	subs	count, count, 128
+	b.ls	L(copy64_from_start)
+
+L(loop64_backwards):
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [srcend, -16]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [srcend, -48]
+	stp	D_l, D_h, [dstend, -64]!
+	ldp	D_l, D_h, [srcend, -64]!
+	subs	count, count, 64
+	b.hi	L(loop64_backwards)
+
+	/* Write the last iteration and copy 64 bytes from the start.  */
+L(copy64_from_start):
+	ldp	G_l, G_h, [src, 48]
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [src, 32]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [src, 16]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [src]
+	stp	D_l, D_h, [dstend, -64]
+	stp	G_l, G_h, [dstin, 48]
+	stp	A_l, A_h, [dstin, 32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin]
+	ret
+
 SYM_FUNC_END_PI(memcpy)
 EXPORT_SYMBOL(memcpy)
 SYM_FUNC_END_ALIAS(__memcpy)
 EXPORT_SYMBOL(__memcpy)
+SYM_FUNC_END_ALIAS_PI(memmove)
+EXPORT_SYMBOL(memmove)
+SYM_FUNC_END_ALIAS(__memmove)
+EXPORT_SYMBOL(__memmove)
\ No newline at end of file
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
deleted file mode 100644
index 1035dce4bdaf4..0000000000000
--- a/arch/arm64/lib/memmove.S
+++ /dev/null
@@ -1,189 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2013 ARM Ltd.
- * Copyright (C) 2013 Linaro.
- *
- * This code is based on glibc cortex strings work originally authored by Linaro
- * be found @
- *
- * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
- * files/head:/src/aarch64/
- */
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/cache.h>
-
-/*
- * Move a buffer from src to test (alignment handled by the hardware).
- * If dest <= src, call memcpy, otherwise copy in reverse order.
- *
- * Parameters:
- *	x0 - dest
- *	x1 - src
- *	x2 - n
- * Returns:
- *	x0 - dest
- */
-dstin	.req	x0
-src	.req	x1
-count	.req	x2
-tmp1	.req	x3
-tmp1w	.req	w3
-tmp2	.req	x4
-tmp2w	.req	w4
-tmp3	.req	x5
-tmp3w	.req	w5
-dst	.req	x6
-
-A_l	.req	x7
-A_h	.req	x8
-B_l	.req	x9
-B_h	.req	x10
-C_l	.req	x11
-C_h	.req	x12
-D_l	.req	x13
-D_h	.req	x14
-
-SYM_FUNC_START_ALIAS(__memmove)
-SYM_FUNC_START_WEAK_PI(memmove)
-	cmp	dstin, src
-	b.lo	__memcpy
-	add	tmp1, src, count
-	cmp	dstin, tmp1
-	b.hs	__memcpy		/* No overlap.  */
-
-	add	dst, dstin, count
-	add	src, src, count
-	cmp	count, #16
-	b.lo	.Ltail15  /*probably non-alignment accesses.*/
-
-	ands	tmp2, src, #15     /* Bytes to reach alignment.  */
-	b.eq	.LSrcAligned
-	sub	count, count, tmp2
-	/*
-	* process the aligned offset length to make the src aligned firstly.
-	* those extra instructions' cost is acceptable. It also make the
-	* coming accesses are based on aligned address.
-	*/
-	tbz	tmp2, #0, 1f
-	ldrb	tmp1w, [src, #-1]!
-	strb	tmp1w, [dst, #-1]!
-1:
-	tbz	tmp2, #1, 2f
-	ldrh	tmp1w, [src, #-2]!
-	strh	tmp1w, [dst, #-2]!
-2:
-	tbz	tmp2, #2, 3f
-	ldr	tmp1w, [src, #-4]!
-	str	tmp1w, [dst, #-4]!
-3:
-	tbz	tmp2, #3, .LSrcAligned
-	ldr	tmp1, [src, #-8]!
-	str	tmp1, [dst, #-8]!
-
-.LSrcAligned:
-	cmp	count, #64
-	b.ge	.Lcpy_over64
-
-	/*
-	* Deal with small copies quickly by dropping straight into the
-	* exit block.
-	*/
-.Ltail63:
-	/*
-	* Copy up to 48 bytes of data. At this point we only need the
-	* bottom 6 bits of count to be accurate.
-	*/
-	ands	tmp1, count, #0x30
-	b.eq	.Ltail15
-	cmp	tmp1w, #0x20
-	b.eq	1f
-	b.lt	2f
-	ldp	A_l, A_h, [src, #-16]!
-	stp	A_l, A_h, [dst, #-16]!
-1:
-	ldp	A_l, A_h, [src, #-16]!
-	stp	A_l, A_h, [dst, #-16]!
-2:
-	ldp	A_l, A_h, [src, #-16]!
-	stp	A_l, A_h, [dst, #-16]!
-
-.Ltail15:
-	tbz	count, #3, 1f
-	ldr	tmp1, [src, #-8]!
-	str	tmp1, [dst, #-8]!
-1:
-	tbz	count, #2, 2f
-	ldr	tmp1w, [src, #-4]!
-	str	tmp1w, [dst, #-4]!
-2:
-	tbz	count, #1, 3f
-	ldrh	tmp1w, [src, #-2]!
-	strh	tmp1w, [dst, #-2]!
-3:
-	tbz	count, #0, .Lexitfunc
-	ldrb	tmp1w, [src, #-1]
-	strb	tmp1w, [dst, #-1]
-
-.Lexitfunc:
-	ret
-
-.Lcpy_over64:
-	subs	count, count, #128
-	b.ge	.Lcpy_body_large
-	/*
-	* Less than 128 bytes to copy, so handle 64 bytes here and then jump
-	* to the tail.
-	*/
-	ldp	A_l, A_h, [src, #-16]
-	stp	A_l, A_h, [dst, #-16]
-	ldp	B_l, B_h, [src, #-32]
-	ldp	C_l, C_h, [src, #-48]
-	stp	B_l, B_h, [dst, #-32]
-	stp	C_l, C_h, [dst, #-48]
-	ldp	D_l, D_h, [src, #-64]!
-	stp	D_l, D_h, [dst, #-64]!
-
-	tst	count, #0x3f
-	b.ne	.Ltail63
-	ret
-
-	/*
-	* Critical loop. Start at a new cache line boundary. Assuming
-	* 64 bytes per line this ensures the entire loop is in one line.
-	*/
-	.p2align	L1_CACHE_SHIFT
-.Lcpy_body_large:
-	/* pre-load 64 bytes data. */
-	ldp	A_l, A_h, [src, #-16]
-	ldp	B_l, B_h, [src, #-32]
-	ldp	C_l, C_h, [src, #-48]
-	ldp	D_l, D_h, [src, #-64]!
-1:
-	/*
-	* interlace the load of next 64 bytes data block with store of the last
-	* loaded 64 bytes data.
-	*/
-	stp	A_l, A_h, [dst, #-16]
-	ldp	A_l, A_h, [src, #-16]
-	stp	B_l, B_h, [dst, #-32]
-	ldp	B_l, B_h, [src, #-32]
-	stp	C_l, C_h, [dst, #-48]
-	ldp	C_l, C_h, [src, #-48]
-	stp	D_l, D_h, [dst, #-64]!
-	ldp	D_l, D_h, [src, #-64]!
-	subs	count, count, #64
-	b.ge	1b
-	stp	A_l, A_h, [dst, #-16]
-	stp	B_l, B_h, [dst, #-32]
-	stp	C_l, C_h, [dst, #-48]
-	stp	D_l, D_h, [dst, #-64]!
-
-	tst	count, #0x3f
-	b.ne	.Ltail63
-	ret
-SYM_FUNC_END_PI(memmove)
-EXPORT_SYMBOL(memmove)
-SYM_FUNC_END_ALIAS(__memmove)
-EXPORT_SYMBOL(__memmove)
-- 
GitLab


From 9e51cafd783b22018fb15bfb06d65f69349223a9 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 27 May 2021 16:34:47 +0100
Subject: [PATCH 1950/3804] arm64: Better optimised memchr()

Although we implement our own assembly version of memchr(), it turns
out to be barely any better than what GCC can generate for the generic
C version (and would go wrong if the size_t argument were ever large
enough to be interpreted as negative). Unfortunately we can't import the
tuned implementation from the Arm optimized-routines library, since that
has some Advanced SIMD parts which are not really viable for general
kernel library code. What we can do, however, is pep things up with some
relatively straightforward word-at-a-time logic for larger calls.

Adding some timing to optimized-routines' memchr() test for a simple
benchmark, overall this version comes in around half as fast as the SIMD
code, but still nearly 4x faster than our existing implementation.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/58471b42f9287e039dafa9e5e7035077152438fd.1622128527.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/memchr.S | 65 +++++++++++++++++++++++++++++++++--------
 1 file changed, 53 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index edf6b970a2774..7c2276fdab543 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -1,9 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Based on arch/arm/lib/memchr.S
- *
- * Copyright (C) 1995-2000 Russell King
- * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2021 Arm Ltd.
  */
 
 #include <linux/linkage.h>
@@ -19,16 +16,60 @@
  * Returns:
  *	x0 - address of first occurrence of 'c' or 0
  */
+
+#define L(label) .L ## label
+
+#define REP8_01 0x0101010101010101
+#define REP8_7f 0x7f7f7f7f7f7f7f7f
+
+#define srcin		x0
+#define chrin		w1
+#define cntin		x2
+
+#define result		x0
+
+#define wordcnt		x3
+#define rep01		x4
+#define repchr		x5
+#define cur_word	x6
+#define cur_byte	w6
+#define tmp		x7
+#define tmp2		x8
+
+	.p2align 4
+	nop
 SYM_FUNC_START_WEAK_PI(memchr)
-	and	w1, w1, #0xff
-1:	subs	x2, x2, #1
-	b.mi	2f
-	ldrb	w3, [x0], #1
-	cmp	w3, w1
-	b.ne	1b
-	sub	x0, x0, #1
+	and	chrin, chrin, #0xff
+	lsr	wordcnt, cntin, #3
+	cbz	wordcnt, L(byte_loop)
+	mov	rep01, #REP8_01
+	mul	repchr, x1, rep01
+	and	cntin, cntin, #7
+L(word_loop):
+	ldr	cur_word, [srcin], #8
+	sub	wordcnt, wordcnt, #1
+	eor	cur_word, cur_word, repchr
+	sub	tmp, cur_word, rep01
+	orr	tmp2, cur_word, #REP8_7f
+	bics	tmp, tmp, tmp2
+	b.ne	L(found_word)
+	cbnz	wordcnt, L(word_loop)
+L(byte_loop):
+	cbz	cntin, L(not_found)
+	ldrb	cur_byte, [srcin], #1
+	sub	cntin, cntin, #1
+	cmp	cur_byte, chrin
+	b.ne	L(byte_loop)
+	sub	srcin, srcin, #1
+	ret
+L(found_word):
+CPU_LE(	rev	tmp, tmp)
+	clz	tmp, tmp
+	sub	tmp, tmp, #64
+	add	result, srcin, tmp, asr #3
 	ret
-2:	mov	x0, #0
+L(not_found):
+	mov	result, #0
 	ret
 SYM_FUNC_END_PI(memchr)
 EXPORT_SYMBOL_NOKASAN(memchr)
-- 
GitLab


From 344323e0428b9911406bede6cff23d1482c19eae Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 27 May 2021 16:34:48 +0100
Subject: [PATCH 1951/3804] arm64: Rewrite __arch_clear_user()

Now that we're always using STTR variants rather than abstracting two
different addressing modes, the user_ldst macro here is frankly more
obfuscating than helpful. Rewrite __arch_clear_user() with regular
USER() annotations so that it's clearer what's going on, and take the
opportunity to minimise the branchiness in the most common paths, while
also allowing the exception fixup to return an accurate result.

Apparently some folks examine large reads from /dev/zero closely enough
to notice the loop being hot, so align it per the other critical loops
(presumably around a typical instruction fetch granularity).

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/1cbd78b12c076a8ad4656a345811cfb9425df0b3.1622128527.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/clear_user.S | 47 +++++++++++++++++++++----------------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index af9afcbec92cd..a7efb2ad2a1c1 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -1,12 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Based on arch/arm/lib/clear_user.S
- *
- * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2021 Arm Ltd.
  */
-#include <linux/linkage.h>
 
-#include <asm/asm-uaccess.h>
+#include <linux/linkage.h>
 #include <asm/assembler.h>
 
 	.text
@@ -19,25 +16,33 @@
  *
  * Alignment fixed up by hardware.
  */
+
+	.p2align 4
+	// Alignment is for the loop, but since the prologue (including BTI)
+	// is also 16 bytes we can keep any padding outside the function
 SYM_FUNC_START(__arch_clear_user)
-	mov	x2, x1			// save the size for fixup return
+	add	x2, x0, x1
 	subs	x1, x1, #8
 	b.mi	2f
 1:
-user_ldst 9f, sttr, xzr, x0, 8
+USER(9f, sttr	xzr, [x0])
+	add	x0, x0, #8
 	subs	x1, x1, #8
-	b.pl	1b
-2:	adds	x1, x1, #4
-	b.mi	3f
-user_ldst 9f, sttr, wzr, x0, 4
-	sub	x1, x1, #4
-3:	adds	x1, x1, #2
-	b.mi	4f
-user_ldst 9f, sttrh, wzr, x0, 2
-	sub	x1, x1, #2
-4:	adds	x1, x1, #1
-	b.mi	5f
-user_ldst 9f, sttrb, wzr, x0, 0
+	b.hi	1b
+USER(9f, sttr	xzr, [x2, #-8])
+	mov	x0, #0
+	ret
+
+2:	tbz	x1, #2, 3f
+USER(9f, sttr	wzr, [x0])
+USER(8f, sttr	wzr, [x2, #-4])
+	mov	x0, #0
+	ret
+
+3:	tbz	x1, #1, 4f
+USER(9f, sttrh	wzr, [x0])
+4:	tbz	x1, #0, 5f
+USER(7f, sttrb	wzr, [x2, #-1])
 5:	mov	x0, #0
 	ret
 SYM_FUNC_END(__arch_clear_user)
@@ -45,6 +50,8 @@ EXPORT_SYMBOL(__arch_clear_user)
 
 	.section .fixup,"ax"
 	.align	2
-9:	mov	x0, x2			// return the original size
+7:	sub	x0, x2, #5	// Adjust for faulting on the final byte...
+8:	add	x0, x0, #4	// ...or the second word of the 4-7 byte case
+9:	sub	x0, x2, x0
 	ret
 	.previous
-- 
GitLab


From 5ae632ed356c2f2e42a3e7ea447e98a9e684539c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Sat, 29 May 2021 19:15:10 +0800
Subject: [PATCH 1952/3804] arm64: mm: Use better bitmap_zalloc()

Use better bitmap_zalloc() to allocate bitmap.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Link: https://lore.kernel.org/r/20210529111510.186355-1-wangkefeng.wang@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/context.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 001737a8f309b..cd72576ae2b76 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -402,14 +402,12 @@ static int asids_init(void)
 {
 	asid_bits = get_cpu_asid_bits();
 	atomic64_set(&asid_generation, ASID_FIRST_VERSION);
-	asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS), sizeof(*asid_map),
-			   GFP_KERNEL);
+	asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
 	if (!asid_map)
 		panic("Failed to allocate bitmap for %lu ASIDs\n",
 		      NUM_USER_ASIDS);
 
-	pinned_asid_map = kcalloc(BITS_TO_LONGS(NUM_USER_ASIDS),
-				  sizeof(*pinned_asid_map), GFP_KERNEL);
+	pinned_asid_map = bitmap_zalloc(NUM_USER_ASIDS, GFP_KERNEL);
 	nr_pinned_asids = 0;
 
 	/*
-- 
GitLab


From 58cc6b72a2127475296502fcb4d2b5006b7f4742 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 27 May 2021 12:03:17 +0100
Subject: [PATCH 1953/3804] arm64: mm: Remove unused support for Device-GRE
 memory type

The Device-GRE memory type is unused, so remove it and reclaim a MAIR.

Cc: Christoph Hellwig <hch@lst.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210505180228.GA3874@arm.com
Link: https://lore.kernel.org/r/20210527110319.22157-2-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/memory.h | 1 -
 arch/arm64/include/asm/sysreg.h | 1 -
 arch/arm64/mm/proc.S            | 1 -
 arch/arm64/mm/ptdump.c          | 4 ----
 4 files changed, 7 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 87b90dc27a432..1e025e3b655ef 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -138,7 +138,6 @@
 #define MT_NORMAL_WT		3
 #define MT_DEVICE_nGnRnE	4
 #define MT_DEVICE_nGnRE		5
-#define MT_DEVICE_GRE		6
 
 /*
  * Memory types for Stage-2 translation
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 65d15700a1685..baeb33cd7685f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -703,7 +703,6 @@
 /* MAIR_ELx memory attributes (used by Linux) */
 #define MAIR_ATTR_DEVICE_nGnRnE		UL(0x00)
 #define MAIR_ATTR_DEVICE_nGnRE		UL(0x04)
-#define MAIR_ATTR_DEVICE_GRE		UL(0x0c)
 #define MAIR_ATTR_NORMAL_NC		UL(0x44)
 #define MAIR_ATTR_NORMAL_WT		UL(0xbb)
 #define MAIR_ATTR_NORMAL_TAGGED		UL(0xf0)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 97d7bcd8d4f26..add026fcc88ca 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -58,7 +58,6 @@
 #define MAIR_EL1_SET							\
 	(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |	\
 	 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) |	\
-	 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) |		\
 	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) |		\
 	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) |			\
 	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT) |		\
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index a1937dfff31c3..1c403536c9bb0 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -157,10 +157,6 @@ static const struct prot_bits pte_bits[] = {
 		.mask	= PTE_ATTRINDX_MASK,
 		.val	= PTE_ATTRINDX(MT_DEVICE_nGnRE),
 		.set	= "DEVICE/nGnRE",
-	}, {
-		.mask	= PTE_ATTRINDX_MASK,
-		.val	= PTE_ATTRINDX(MT_DEVICE_GRE),
-		.set	= "DEVICE/GRE",
 	}, {
 		.mask	= PTE_ATTRINDX_MASK,
 		.val	= PTE_ATTRINDX(MT_NORMAL_NC),
-- 
GitLab


From ee67c1103a1b50467969cf2cdb182c096c144459 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 27 May 2021 12:03:18 +0100
Subject: [PATCH 1954/3804] arm64: acpi: Map EFI_MEMORY_WT memory as Normal-NC

The only user we have of Normal Write-Through memory is in the ACPI code
when mapping memory regions advertised as EFI_MEMORY_WT. Since most (all?)
CPUs treat write-through as non-cacheable under the hood, don't bother
with the extra memory type here and just treat EFI_MEMORY_WT the same way
as EFI_MEMORY_WC by mapping it to the Normal-NC memory type instead and
emitting a warning if we have failed to find an alternative EFI memory
type.

Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210527110319.22157-3-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/acpi.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index cada0b816c8a3..f3851724fe356 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -239,6 +239,18 @@ done:
 	}
 }
 
+static pgprot_t __acpi_get_writethrough_mem_attribute(void)
+{
+	/*
+	 * Although UEFI specifies the use of Normal Write-through for
+	 * EFI_MEMORY_WT, it is seldom used in practice and not implemented
+	 * by most (all?) CPUs. Rather than allocate a MAIR just for this
+	 * purpose, emit a warning and use Normal Non-cacheable instead.
+	 */
+	pr_warn_once("No MAIR allocation for EFI_MEMORY_WT; treating as Normal Non-cacheable\n");
+	return __pgprot(PROT_NORMAL_NC);
+}
+
 pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
 {
 	/*
@@ -246,7 +258,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
 	 * types" of UEFI 2.5 section 2.3.6.1, each EFI memory type is
 	 * mapped to a corresponding MAIR attribute encoding.
 	 * The EFI memory attribute advises all possible capabilities
-	 * of a memory region. We use the most efficient capability.
+	 * of a memory region.
 	 */
 
 	u64 attr;
@@ -254,10 +266,10 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr)
 	attr = efi_mem_attributes(addr);
 	if (attr & EFI_MEMORY_WB)
 		return PAGE_KERNEL;
-	if (attr & EFI_MEMORY_WT)
-		return __pgprot(PROT_NORMAL_WT);
 	if (attr & EFI_MEMORY_WC)
 		return __pgprot(PROT_NORMAL_NC);
+	if (attr & EFI_MEMORY_WT)
+		return __acpi_get_writethrough_mem_attribute();
 	return __pgprot(PROT_DEVICE_nGnRnE);
 }
 
@@ -340,10 +352,10 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
 		default:
 			if (region->attribute & EFI_MEMORY_WB)
 				prot = PAGE_KERNEL;
-			else if (region->attribute & EFI_MEMORY_WT)
-				prot = __pgprot(PROT_NORMAL_WT);
 			else if (region->attribute & EFI_MEMORY_WC)
 				prot = __pgprot(PROT_NORMAL_NC);
+			else if (region->attribute & EFI_MEMORY_WT)
+				prot = __acpi_get_writethrough_mem_attribute();
 		}
 	}
 	return __ioremap(phys, size, prot);
-- 
GitLab


From 21cfe6edbadb703b674ae2ddf78862d00d24bfc5 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 27 May 2021 12:03:19 +0100
Subject: [PATCH 1955/3804] arm64: mm: Remove unused support for Normal-WT
 memory type

The Normal-WT memory type is unused, so remove it and reclaim a MAIR.

Cc: Christoph Hellwig <hch@lst.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210527110319.22157-4-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/memory.h       | 5 ++---
 arch/arm64/include/asm/pgtable-prot.h | 1 -
 arch/arm64/include/asm/sysreg.h       | 1 -
 arch/arm64/mm/proc.S                  | 1 -
 4 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 1e025e3b655ef..7b360960cc35a 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -135,9 +135,8 @@
 #define MT_NORMAL		0
 #define MT_NORMAL_TAGGED	1
 #define MT_NORMAL_NC		2
-#define MT_NORMAL_WT		3
-#define MT_DEVICE_nGnRnE	4
-#define MT_DEVICE_nGnRE		5
+#define MT_DEVICE_nGnRnE	3
+#define MT_DEVICE_nGnRE		4
 
 /*
  * Memory types for Stage-2 translation
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 938092df76cfe..7032f04c8ac6e 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -55,7 +55,6 @@ extern bool arm64_use_ng_mappings;
 #define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
 #define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
 #define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 #define PROT_NORMAL_TAGGED	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_TAGGED))
 
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index baeb33cd7685f..9ea84bcddf85e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -704,7 +704,6 @@
 #define MAIR_ATTR_DEVICE_nGnRnE		UL(0x00)
 #define MAIR_ATTR_DEVICE_nGnRE		UL(0x04)
 #define MAIR_ATTR_NORMAL_NC		UL(0x44)
-#define MAIR_ATTR_NORMAL_WT		UL(0xbb)
 #define MAIR_ATTR_NORMAL_TAGGED		UL(0xf0)
 #define MAIR_ATTR_NORMAL		UL(0xff)
 #define MAIR_ATTR_MASK			UL(0xff)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index add026fcc88ca..6e640fa9788e2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -60,7 +60,6 @@
 	 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) |	\
 	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) |		\
 	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) |			\
-	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT) |		\
 	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL_TAGGED))
 
 #ifdef CONFIG_CPU_PM
-- 
GitLab


From 65688d2a05deb9f0671a7e2301eadbfe7e27c9e9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 27 May 2021 13:43:56 +0100
Subject: [PATCH 1956/3804] arm64: cache: Lower ARCH_DMA_MINALIGN to 64
 (L1_CACHE_BYTES)

Back in 97303480753e ("arm64: Increase the max granular size"),
ARCH_DMA_MINALIGN was effectively increased to 128 bytes thanks to an
increase in L1_CACHE_BYTES due to an unsubstantiated performance claim
on the now obsolete ThunderX-1. Although this was reverted in
d93277b9839b, ARCH_DMA_MINALIGN was kept at 128 bytes by ebc7e21e0fa2
("arm64: Increase ARCH_DMA_MINALIGN to 128").

During discussion of the original patch, it was reported that the change
also prevented a warning during boot on (again, now obsolete) Qualcomm
server hardware where the cache writeback granule was larger than 64
bytes. The reason for this warning was because non-coherent DMA could
lead to data corruption due to unexpected writeback from the CPU where a
cacheline is shared with other allocations.

Since then, systems have appeared with larger cachelines still, and so
commit 8f5c9037a55b ("arm64/mm: Correct the cache line size warning with
non coherent device") reworked the warning so that it only appears on
systems where non-coherent DMA is actually required and taints the
kernel with TAINT_CPU_OUT_OF_SPEC. We are not aware of any systems, even
including the aforementioned obsolete machines, which have a CWG larger
than 64 bytes and require non-coherent DMA.

More recently, it has been reported that a ARCH_DMA_MINALIGN of 128
bytes wastes considerable memory (~6% immediately after boot on one
system).

Reduce ARCH_DMA_MINALIGN to 64 bytes and allow the warning/taint to
indicate if there are machines that unknowingly rely on this.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Vincent Whitchurch <vincent.whitchurch@axis.com>
Link: https://lore.kernel.org/linux-arm-kernel/1442944788-17254-1-git-send-email-rric@kernel.org/
Link: https://lore.kernel.org/linux-arm-kernel/CAOZdJXUiRMAguDV+HEJqPg57MyBNqEcTyaH+ya=U93NHb-pdJA@mail.gmail.com/
Link: https://lore.kernel.org/linux-arm-kernel/20190614131141.4428-1-msys.mizuma@gmail.com/
Link: https://lore.kernel.org/r/20210517074332.28280-1-vincent.whitchurch@axis.com
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210527124356.22367-1-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index a074459f8f2fb..a9c0716e74405 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -47,7 +47,7 @@
  * cache before the transfer is done, causing old data to be seen by
  * the CPU.
  */
-#define ARCH_DMA_MINALIGN	(128)
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
 
 #ifdef CONFIG_KASAN_SW_TAGS
 #define ARCH_SLAB_MINALIGN	(1ULL << KASAN_SHADOW_SCALE_SHIFT)
-- 
GitLab


From cbcddaa33d7e11a053cb80a4a635c023b4f8b906 Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Fri, 14 May 2021 14:59:20 +0100
Subject: [PATCH 1957/3804] perf/x86/rapl: Use CPUID bit on AMD and Hygon parts

AMD and Hygon CPUs have a CPUID bit for RAPL.  Drop the fam17h suffix as
it is stale already.

Make use of this instead of a model check to work more nicely in virtual
environments where RAPL typically isn't available.

 [ bp: drop the ../cpu/powerflags.c hunk which is superfluous as the
   "rapl" bit name appears already in flags. ]

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210514135920.16093-1-andrew.cooper3@citrix.com
---
 arch/x86/events/rapl.c             | 6 ++----
 arch/x86/include/asm/cpufeatures.h | 2 +-
 arch/x86/kernel/cpu/amd.c          | 4 ++++
 arch/x86/kernel/cpu/hygon.c        | 4 ++++
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 84a1042c3b01e..85feafacc445d 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -764,13 +764,14 @@ static struct rapl_model model_spr = {
 	.rapl_msrs      = intel_rapl_spr_msrs,
 };
 
-static struct rapl_model model_amd_fam17h = {
+static struct rapl_model model_amd_hygon = {
 	.events		= BIT(PERF_RAPL_PKG),
 	.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
 	.rapl_msrs      = amd_rapl_msrs,
 };
 
 static const struct x86_cpu_id rapl_model_match[] __initconst = {
+	X86_MATCH_FEATURE(X86_FEATURE_RAPL,		&model_amd_hygon),
 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&model_snb),
 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&model_snbep),
 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&model_snb),
@@ -803,9 +804,6 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,		&model_skl),
 	X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,		&model_skl),
 	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&model_spr),
-	X86_MATCH_VENDOR_FAM(AMD,	0x17,		&model_amd_fam17h),
-	X86_MATCH_VENDOR_FAM(HYGON,	0x18,		&model_amd_fam17h),
-	X86_MATCH_VENDOR_FAM(AMD,	0x19,		&model_amd_fam17h),
 	{},
 };
 MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ac37830ae9412..81269c73a0dc9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -108,7 +108,7 @@
 #define X86_FEATURE_EXTD_APICID		( 3*32+26) /* Extended APICID (8 bits) */
 #define X86_FEATURE_AMD_DCM		( 3*32+27) /* AMD multi-node processor */
 #define X86_FEATURE_APERFMPERF		( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-/* free					( 3*32+29) */
+#define X86_FEATURE_RAPL		( 3*32+29) /* AMD/Hygon RAPL interface */
 #define X86_FEATURE_NONSTOP_TSC_S3	( 3*32+30) /* TSC doesn't stop in S3 state */
 #define X86_FEATURE_TSC_KNOWN_FREQ	( 3*32+31) /* TSC has known frequency */
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 2d11384dc9ab4..da57b96fafbe0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -646,6 +646,10 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	if (c->x86_power & BIT(12))
 		set_cpu_cap(c, X86_FEATURE_ACC_POWER);
 
+	/* Bit 14 indicates the Runtime Average Power Limit interface. */
+	if (c->x86_power & BIT(14))
+		set_cpu_cap(c, X86_FEATURE_RAPL);
+
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
 #else
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index 0bd6c74e3ba15..6d50136f7ab98 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -260,6 +260,10 @@ static void early_init_hygon(struct cpuinfo_x86 *c)
 	if (c->x86_power & BIT(12))
 		set_cpu_cap(c, X86_FEATURE_ACC_POWER);
 
+	/* Bit 14 indicates the Runtime Average Power Limit interface. */
+	if (c->x86_power & BIT(14))
+		set_cpu_cap(c, X86_FEATURE_RAPL);
+
 #ifdef CONFIG_X86_64
 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
 #endif
-- 
GitLab


From cde1391a0b4014b0e8fc09cd316272f478b54c0f Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 28 May 2021 09:38:06 +0200
Subject: [PATCH 1958/3804] ima: Add ima_show_template_uint() template library
 function

This patch introduces the new function ima_show_template_uint(). This can
be used for showing integers of different sizes in ASCII format. The
function ima_show_template_data_ascii() automatically determines how to
print a stored integer by checking the integer size.

If integers have been written in canonical format,
ima_show_template_data_ascii() calls the appropriate leXX_to_cpu() function
to correctly display the value.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_template_lib.c | 38 ++++++++++++++++++++++-
 security/integrity/ima/ima_template_lib.h |  2 ++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 4314d9a3514c1..f23296c33da14 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -24,7 +24,8 @@ enum data_formats {
 	DATA_FMT_DIGEST = 0,
 	DATA_FMT_DIGEST_WITH_ALGO,
 	DATA_FMT_STRING,
-	DATA_FMT_HEX
+	DATA_FMT_HEX,
+	DATA_FMT_UINT
 };
 
 static int ima_write_template_field_data(const void *data, const u32 datalen,
@@ -88,6 +89,35 @@ static void ima_show_template_data_ascii(struct seq_file *m,
 	case DATA_FMT_STRING:
 		seq_printf(m, "%s", buf_ptr);
 		break;
+	case DATA_FMT_UINT:
+		switch (field_data->len) {
+		case sizeof(u8):
+			seq_printf(m, "%u", *(u8 *)buf_ptr);
+			break;
+		case sizeof(u16):
+			if (ima_canonical_fmt)
+				seq_printf(m, "%u",
+					   le16_to_cpu(*(u16 *)buf_ptr));
+			else
+				seq_printf(m, "%u", *(u16 *)buf_ptr);
+			break;
+		case sizeof(u32):
+			if (ima_canonical_fmt)
+				seq_printf(m, "%u",
+					   le32_to_cpu(*(u32 *)buf_ptr));
+			else
+				seq_printf(m, "%u", *(u32 *)buf_ptr);
+			break;
+		case sizeof(u64):
+			if (ima_canonical_fmt)
+				seq_printf(m, "%llu",
+					   le64_to_cpu(*(u64 *)buf_ptr));
+			else
+				seq_printf(m, "%llu", *(u64 *)buf_ptr);
+			break;
+		default:
+			break;
+		}
 	default:
 		break;
 	}
@@ -163,6 +193,12 @@ void ima_show_template_buf(struct seq_file *m, enum ima_show_type show,
 	ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data);
 }
 
+void ima_show_template_uint(struct seq_file *m, enum ima_show_type show,
+			    struct ima_field_data *field_data)
+{
+	ima_show_template_field_data(m, show, DATA_FMT_UINT, field_data);
+}
+
 /**
  * ima_parse_buf() - Parses lengths and data from an input buffer
  * @bufstartp:       Buffer start address.
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h
index f4b2a2056d1d5..54b67c80b3155 100644
--- a/security/integrity/ima/ima_template_lib.h
+++ b/security/integrity/ima/ima_template_lib.h
@@ -27,6 +27,8 @@ void ima_show_template_sig(struct seq_file *m, enum ima_show_type show,
 			   struct ima_field_data *field_data);
 void ima_show_template_buf(struct seq_file *m, enum ima_show_type show,
 			   struct ima_field_data *field_data);
+void ima_show_template_uint(struct seq_file *m, enum ima_show_type show,
+			    struct ima_field_data *field_data);
 int ima_parse_buf(void *bufstartp, void *bufendp, void **bufcurp,
 		  int maxfields, struct ima_field_data *fields, int *curfields,
 		  unsigned long *len_mask, int enforce_mask, char *bufname);
-- 
GitLab


From 7dcfeacc5a9d0c130160b86de23279793a8732c8 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 28 May 2021 09:38:07 +0200
Subject: [PATCH 1959/3804] ima: Define new template fields iuid and igid

This patch defines the new template fields iuid and igid, which include
respectively the inode UID and GID. For idmapped mounts, still the original
UID and GID are provided.

These fields can be used to verify the EVM portable signature, if it was
included with the template fields sig or evmsig.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/security/IMA-templates.rst  |  2 +
 security/integrity/ima/ima_template.c     |  4 ++
 security/integrity/ima/ima_template_lib.c | 45 +++++++++++++++++++++++
 security/integrity/ima/ima_template_lib.h |  4 ++
 4 files changed, 55 insertions(+)

diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index 9f3e86ab028a4..bf8ce4cf5878a 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -75,6 +75,8 @@ descriptors by adding their identifier to the format string
  - 'modsig' the appended file signature;
  - 'buf': the buffer data that was used to generate the hash without size limitations;
  - 'evmsig': the EVM portable signature;
+ - 'iuid': the inode UID;
+ - 'igid': the inode GID;
 
 
 Below, there is the list of defined template descriptors:
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index 7a60848c04a54..a5ecd9e2581bd 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -47,6 +47,10 @@ static const struct ima_template_field supported_fields[] = {
 	 .field_show = ima_show_template_sig},
 	{.field_id = "evmsig", .field_init = ima_eventevmsig_init,
 	 .field_show = ima_show_template_sig},
+	{.field_id = "iuid", .field_init = ima_eventinodeuid_init,
+	 .field_show = ima_show_template_uint},
+	{.field_id = "igid", .field_init = ima_eventinodegid_init,
+	 .field_show = ima_show_template_uint},
 };
 
 /*
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index f23296c33da14..87b40f391739f 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -551,3 +551,48 @@ int ima_eventevmsig_init(struct ima_event_data *event_data,
 	kfree(xattr_data);
 	return rc;
 }
+
+static int ima_eventinodedac_init_common(struct ima_event_data *event_data,
+					 struct ima_field_data *field_data,
+					 bool get_uid)
+{
+	unsigned int id;
+
+	if (!event_data->file)
+		return 0;
+
+	if (get_uid)
+		id = i_uid_read(file_inode(event_data->file));
+	else
+		id = i_gid_read(file_inode(event_data->file));
+
+	if (ima_canonical_fmt) {
+		if (sizeof(id) == sizeof(u16))
+			id = cpu_to_le16(id);
+		else
+			id = cpu_to_le32(id);
+	}
+
+	return ima_write_template_field_data((void *)&id, sizeof(id),
+					     DATA_FMT_UINT, field_data);
+}
+
+/*
+ *  ima_eventinodeuid_init - include the inode UID as part of the template
+ *  data
+ */
+int ima_eventinodeuid_init(struct ima_event_data *event_data,
+			   struct ima_field_data *field_data)
+{
+	return ima_eventinodedac_init_common(event_data, field_data, true);
+}
+
+/*
+ *  ima_eventinodegid_init - include the inode GID as part of the template
+ *  data
+ */
+int ima_eventinodegid_init(struct ima_event_data *event_data,
+			   struct ima_field_data *field_data)
+{
+	return ima_eventinodedac_init_common(event_data, field_data, false);
+}
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h
index 54b67c80b3155..b0aaf109f386e 100644
--- a/security/integrity/ima/ima_template_lib.h
+++ b/security/integrity/ima/ima_template_lib.h
@@ -50,4 +50,8 @@ int ima_eventmodsig_init(struct ima_event_data *event_data,
 			 struct ima_field_data *field_data);
 int ima_eventevmsig_init(struct ima_event_data *event_data,
 			 struct ima_field_data *field_data);
+int ima_eventinodeuid_init(struct ima_event_data *event_data,
+			   struct ima_field_data *field_data);
+int ima_eventinodegid_init(struct ima_event_data *event_data,
+			   struct ima_field_data *field_data);
 #endif /* __LINUX_IMA_TEMPLATE_LIB_H */
-- 
GitLab


From f8216f6b957f5657c5f4c97f4b037120c6f236bc Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 28 May 2021 09:38:08 +0200
Subject: [PATCH 1960/3804] ima: Define new template field imode

This patch defines the new template field imode, which includes the
inode mode. It can be used by a remote verifier to verify the EVM portable
signature, if it was included with the template fields sig or evmsig.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/security/IMA-templates.rst  |  1 +
 security/integrity/ima/ima_template.c     |  2 ++
 security/integrity/ima/ima_template_lib.c | 22 ++++++++++++++++++++++
 security/integrity/ima/ima_template_lib.h |  2 ++
 4 files changed, 27 insertions(+)

diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index bf8ce4cf5878a..65c1ce451d083 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -77,6 +77,7 @@ descriptors by adding their identifier to the format string
  - 'evmsig': the EVM portable signature;
  - 'iuid': the inode UID;
  - 'igid': the inode GID;
+ - 'imode': the inode mode;
 
 
 Below, there is the list of defined template descriptors:
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index a5ecd9e2581bd..43784f2bf8bd6 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -51,6 +51,8 @@ static const struct ima_template_field supported_fields[] = {
 	 .field_show = ima_show_template_uint},
 	{.field_id = "igid", .field_init = ima_eventinodegid_init,
 	 .field_show = ima_show_template_uint},
+	{.field_id = "imode", .field_init = ima_eventinodemode_init,
+	 .field_show = ima_show_template_uint},
 };
 
 /*
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 87b40f391739f..3156fb34b1afa 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -596,3 +596,25 @@ int ima_eventinodegid_init(struct ima_event_data *event_data,
 {
 	return ima_eventinodedac_init_common(event_data, field_data, false);
 }
+
+/*
+ *  ima_eventinodemode_init - include the inode mode as part of the template
+ *  data
+ */
+int ima_eventinodemode_init(struct ima_event_data *event_data,
+			    struct ima_field_data *field_data)
+{
+	struct inode *inode;
+	umode_t mode;
+
+	if (!event_data->file)
+		return 0;
+
+	inode = file_inode(event_data->file);
+	mode = inode->i_mode;
+	if (ima_canonical_fmt)
+		mode = cpu_to_le16(mode);
+
+	return ima_write_template_field_data((char *)&mode, sizeof(mode),
+					     DATA_FMT_UINT, field_data);
+}
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h
index b0aaf109f386e..6509af4a97ee5 100644
--- a/security/integrity/ima/ima_template_lib.h
+++ b/security/integrity/ima/ima_template_lib.h
@@ -54,4 +54,6 @@ int ima_eventinodeuid_init(struct ima_event_data *event_data,
 			   struct ima_field_data *field_data);
 int ima_eventinodegid_init(struct ima_event_data *event_data,
 			   struct ima_field_data *field_data);
+int ima_eventinodemode_init(struct ima_event_data *event_data,
+			    struct ima_field_data *field_data);
 #endif /* __LINUX_IMA_TEMPLATE_LIB_H */
-- 
GitLab


From 8c7a703ec9787a1b45b024e9acd253328422dcbd Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 28 May 2021 09:38:09 +0200
Subject: [PATCH 1961/3804] evm: Verify portable signatures against all
 protected xattrs

Currently, the evm_config_default_xattrnames array contains xattr names
only related to LSMs which are enabled in the kernel configuration.
However, EVM portable signatures do not depend on local information and a
vendor might include in the signature calculation xattrs that are not
enabled in the target platform.

Just including all xattrs names in evm_config_default_xattrnames is not a
safe approach, because a target system might have already calculated
signatures or HMACs based only on the enabled xattrs. After applying this
patch, EVM would verify those signatures and HMACs with all xattrs instead.
The non-enabled ones, which could possibly exist, would cause a
verification error.

Thus, this patch adds a new field named enabled to the xattr_list
structure, which is set to true if the LSM associated to a given xattr name
is enabled in the kernel configuration. The non-enabled xattrs are taken
into account only in evm_calc_hmac_or_hash(), if the passed security.evm
type is EVM_XATTR_PORTABLE_DIGSIG.

The new function evm_protected_xattr_if_enabled() has been defined so that
IMA can include all protected xattrs and not only the enabled ones in the
measurement list, if the new template fields xattrnames, xattrlengths or
xattrvalues have been included in the template format.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/evm.h                 |  6 ++++
 security/integrity/evm/evm.h        |  1 +
 security/integrity/evm/evm_crypto.c |  7 ++++
 security/integrity/evm/evm_main.c   | 56 +++++++++++++++++++++++------
 security/integrity/evm/evm_secfs.c  | 16 +++++++--
 5 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/include/linux/evm.h b/include/linux/evm.h
index 31ef1dbbb3acd..5011a299c2511 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -38,6 +38,7 @@ extern int evm_inode_init_security(struct inode *inode,
 				   const struct xattr *xattr_array,
 				   struct xattr *evm);
 extern bool evm_revalidate_status(const char *xattr_name);
+extern int evm_protected_xattr_if_enabled(const char *req_xattr_name);
 #ifdef CONFIG_FS_POSIX_ACL
 extern int posix_xattr_acl(const char *xattrname);
 #else
@@ -114,5 +115,10 @@ static inline bool evm_revalidate_status(const char *xattr_name)
 	return false;
 }
 
+static inline int evm_protected_xattr_if_enabled(const char *req_xattr_name)
+{
+	return false;
+}
+
 #endif /* CONFIG_EVM */
 #endif /* LINUX_EVM_H */
diff --git a/security/integrity/evm/evm.h b/security/integrity/evm/evm.h
index f2fef2b5ed51b..0d44f41d16f87 100644
--- a/security/integrity/evm/evm.h
+++ b/security/integrity/evm/evm.h
@@ -29,6 +29,7 @@
 struct xattr_list {
 	struct list_head list;
 	char *name;
+	bool enabled;
 };
 
 extern int evm_initialized;
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index d76b006cbcc4c..1628e2ca98623 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -216,6 +216,13 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
 		if (strcmp(xattr->name, XATTR_NAME_IMA) == 0)
 			is_ima = true;
 
+		/*
+		 * Skip non-enabled xattrs for locally calculated
+		 * signatures/HMACs.
+		 */
+		if (type != EVM_XATTR_PORTABLE_DIGSIG && !xattr->enabled)
+			continue;
+
 		if ((req_xattr_name && req_xattr_value)
 		    && !strcmp(xattr->name, req_xattr_name)) {
 			error = 0;
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 0196168aeb7d5..ee4e17a790fba 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -34,24 +34,44 @@ static const char * const integrity_status_msg[] = {
 int evm_hmac_attrs;
 
 static struct xattr_list evm_config_default_xattrnames[] = {
+	{.name = XATTR_NAME_SELINUX,
 #ifdef CONFIG_SECURITY_SELINUX
-	{.name = XATTR_NAME_SELINUX},
+	 .enabled = true
 #endif
+	},
+	{.name = XATTR_NAME_SMACK,
 #ifdef CONFIG_SECURITY_SMACK
-	{.name = XATTR_NAME_SMACK},
+	 .enabled = true
+#endif
+	},
+	{.name = XATTR_NAME_SMACKEXEC,
+#ifdef CONFIG_EVM_EXTRA_SMACK_XATTRS
+	 .enabled = true
+#endif
+	},
+	{.name = XATTR_NAME_SMACKTRANSMUTE,
 #ifdef CONFIG_EVM_EXTRA_SMACK_XATTRS
-	{.name = XATTR_NAME_SMACKEXEC},
-	{.name = XATTR_NAME_SMACKTRANSMUTE},
-	{.name = XATTR_NAME_SMACKMMAP},
+	 .enabled = true
 #endif
+	},
+	{.name = XATTR_NAME_SMACKMMAP,
+#ifdef CONFIG_EVM_EXTRA_SMACK_XATTRS
+	 .enabled = true
 #endif
+	},
+	{.name = XATTR_NAME_APPARMOR,
 #ifdef CONFIG_SECURITY_APPARMOR
-	{.name = XATTR_NAME_APPARMOR},
+	 .enabled = true
 #endif
+	},
+	{.name = XATTR_NAME_IMA,
 #ifdef CONFIG_IMA_APPRAISE
-	{.name = XATTR_NAME_IMA},
+	 .enabled = true
 #endif
-	{.name = XATTR_NAME_CAPS},
+	},
+	{.name = XATTR_NAME_CAPS,
+	 .enabled = true
+	},
 };
 
 LIST_HEAD(evm_config_xattrnames);
@@ -76,7 +96,9 @@ static void __init evm_init_config(void)
 
 	pr_info("Initialising EVM extended attributes:\n");
 	for (i = 0; i < xattrs; i++) {
-		pr_info("%s\n", evm_config_default_xattrnames[i].name);
+		pr_info("%s%s\n", evm_config_default_xattrnames[i].name,
+			!evm_config_default_xattrnames[i].enabled ?
+			" (disabled)" : "");
 		list_add_tail(&evm_config_default_xattrnames[i].list,
 			      &evm_config_xattrnames);
 	}
@@ -257,7 +279,8 @@ out:
 	return evm_status;
 }
 
-static int evm_protected_xattr(const char *req_xattr_name)
+static int evm_protected_xattr_common(const char *req_xattr_name,
+				      bool all_xattrs)
 {
 	int namelen;
 	int found = 0;
@@ -265,6 +288,9 @@ static int evm_protected_xattr(const char *req_xattr_name)
 
 	namelen = strlen(req_xattr_name);
 	list_for_each_entry_lockless(xattr, &evm_config_xattrnames, list) {
+		if (!all_xattrs && !xattr->enabled)
+			continue;
+
 		if ((strlen(xattr->name) == namelen)
 		    && (strncmp(req_xattr_name, xattr->name, namelen) == 0)) {
 			found = 1;
@@ -281,6 +307,16 @@ static int evm_protected_xattr(const char *req_xattr_name)
 	return found;
 }
 
+static int evm_protected_xattr(const char *req_xattr_name)
+{
+	return evm_protected_xattr_common(req_xattr_name, false);
+}
+
+int evm_protected_xattr_if_enabled(const char *req_xattr_name)
+{
+	return evm_protected_xattr_common(req_xattr_name, true);
+}
+
 /**
  * evm_verifyxattr - verify the integrity of the requested xattr
  * @dentry: object of the verify xattr
diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index 5f0da41bccd07..a99676eb7f414 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -139,8 +139,12 @@ static ssize_t evm_read_xattrs(struct file *filp, char __user *buf,
 	if (rc)
 		return -ERESTARTSYS;
 
-	list_for_each_entry(xattr, &evm_config_xattrnames, list)
+	list_for_each_entry(xattr, &evm_config_xattrnames, list) {
+		if (!xattr->enabled)
+			continue;
+
 		size += strlen(xattr->name) + 1;
+	}
 
 	temp = kmalloc(size + 1, GFP_KERNEL);
 	if (!temp) {
@@ -149,6 +153,9 @@ static ssize_t evm_read_xattrs(struct file *filp, char __user *buf,
 	}
 
 	list_for_each_entry(xattr, &evm_config_xattrnames, list) {
+		if (!xattr->enabled)
+			continue;
+
 		sprintf(temp + offset, "%s\n", xattr->name);
 		offset += strlen(xattr->name) + 1;
 	}
@@ -199,6 +206,7 @@ static ssize_t evm_write_xattrs(struct file *file, const char __user *buf,
 		goto out;
 	}
 
+	xattr->enabled = true;
 	xattr->name = memdup_user_nul(buf, count);
 	if (IS_ERR(xattr->name)) {
 		err = PTR_ERR(xattr->name);
@@ -245,6 +253,10 @@ static ssize_t evm_write_xattrs(struct file *file, const char __user *buf,
 	list_for_each_entry(tmp, &evm_config_xattrnames, list) {
 		if (strcmp(xattr->name, tmp->name) == 0) {
 			err = -EEXIST;
+			if (!tmp->enabled) {
+				tmp->enabled = true;
+				err = count;
+			}
 			mutex_unlock(&xattr_list_mutex);
 			goto out;
 		}
@@ -256,7 +268,7 @@ static ssize_t evm_write_xattrs(struct file *file, const char __user *buf,
 	audit_log_end(ab);
 	return count;
 out:
-	audit_log_format(ab, " res=%d", err);
+	audit_log_format(ab, " res=%d", (err < 0) ? err : 0);
 	audit_log_end(ab);
 	if (xattr) {
 		kfree(xattr->name);
-- 
GitLab


From d5b8145455c629e7f157d2da46a9b2fba483f235 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 1 Jun 2021 22:53:27 +0200
Subject: [PATCH 1962/3804] Revert "gfs2: Fix mmap locking for write faults"

This reverts commit b7f55d928e75557295c1ac280c291b738905b6fb.

As explained by Linus in [*], write faults on a mmap region are reads
from a filesysten point of view, so taking the inode glock exclusively
on write faults is incorrect.

Instead, when a page is marked writable, the .page_mkwrite vm operation
will be called, which is where the exclusive lock taking needs to
happen.  I got this wrong because of a broken test case that made me
believe .page_mkwrite isn't getting called when it actually is.

[*] https://lore.kernel.org/lkml/CAHk-=wj8EWr_D65i4oRSj2FTbrc6RdNydNNCGxeabRnwtoU=3Q@mail.gmail.com/

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 8a35a0196b6da..493a83e3f5906 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -540,11 +540,9 @@ static vm_fault_t gfs2_fault(struct vm_fault *vmf)
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_holder gh;
 	vm_fault_t ret;
-	u16 state;
 	int err;
 
-	state = (vmf->flags & FAULT_FLAG_WRITE) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
-	gfs2_holder_init(ip->i_gl, state, 0, &gh);
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
 	err = gfs2_glock_nq(&gh);
 	if (err) {
 		ret = block_page_mkwrite_return(err);
-- 
GitLab


From 4ef8d857b5f494e62bce9085031563fda35f9563 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 31 May 2021 13:20:45 +0300
Subject: [PATCH 1963/3804] net: dsa: tag_8021q: fix the VLAN IDs used for
 encoding sub-VLANs

When using sub-VLANs in the range of 1-7, the resulting value from:

	rx_vid = dsa_8021q_rx_vid_subvlan(ds, port, subvlan);

is wrong according to the description from tag_8021q.c:

 | 11  | 10  |  9  |  8  |  7  |  6  |  5  |  4  |  3  |  2  |  1  |  0  |
 +-----------+-----+-----------------+-----------+-----------------------+
 |    DIR    | SVL |    SWITCH_ID    |  SUBVLAN  |          PORT         |
 +-----------+-----+-----------------+-----------+-----------------------+

For example, when ds->index == 0, port == 3 and subvlan == 1,
dsa_8021q_rx_vid_subvlan() returns 1027, same as it returns for
subvlan == 0, but it should have returned 1043.

This is because the low portion of the subvlan bits are not masked
properly when writing into the 12-bit VLAN value. They are masked into
bits 4:3, but they should be masked into bits 5:4.

Fixes: 3eaae1d05f2b ("net: dsa: tag_8021q: support up to 8 VLANs per port using sub-VLANs")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_8021q.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 008c1ec6e20c1..122ad5833fb1c 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -64,7 +64,7 @@
 #define DSA_8021Q_SUBVLAN_HI_SHIFT	9
 #define DSA_8021Q_SUBVLAN_HI_MASK	GENMASK(9, 9)
 #define DSA_8021Q_SUBVLAN_LO_SHIFT	4
-#define DSA_8021Q_SUBVLAN_LO_MASK	GENMASK(4, 3)
+#define DSA_8021Q_SUBVLAN_LO_MASK	GENMASK(5, 4)
 #define DSA_8021Q_SUBVLAN_HI(x)		(((x) & GENMASK(2, 2)) >> 2)
 #define DSA_8021Q_SUBVLAN_LO(x)		((x) & GENMASK(1, 0))
 #define DSA_8021Q_SUBVLAN(x)		\
-- 
GitLab


From dd9082f4a9f94280fbbece641bf8fc0a25f71f7a Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Mon, 31 May 2021 17:00:30 -0400
Subject: [PATCH 1964/3804] net: sock: fix in-kernel mark setting

This patch fixes the in-kernel mark setting by doing an additional
sk_dst_reset() which was introduced by commit 50254256f382 ("sock: Reset
dst when changing sk_mark via setsockopt"). The code is now shared to
avoid any further suprises when changing the socket mark value.

Fixes: 84d1c617402e ("net: sock: add sock_set_mark")
Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 958614ea16edb..946888afef880 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -815,10 +815,18 @@ void sock_set_rcvbuf(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(sock_set_rcvbuf);
 
+static void __sock_set_mark(struct sock *sk, u32 val)
+{
+	if (val != sk->sk_mark) {
+		sk->sk_mark = val;
+		sk_dst_reset(sk);
+	}
+}
+
 void sock_set_mark(struct sock *sk, u32 val)
 {
 	lock_sock(sk);
-	sk->sk_mark = val;
+	__sock_set_mark(sk, val);
 	release_sock(sk);
 }
 EXPORT_SYMBOL(sock_set_mark);
@@ -1126,10 +1134,10 @@ set_sndbuf:
 	case SO_MARK:
 		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
 			ret = -EPERM;
-		} else if (val != sk->sk_mark) {
-			sk->sk_mark = val;
-			sk_dst_reset(sk);
+			break;
 		}
+
+		__sock_set_mark(sk, val);
 		break;
 
 	case SO_RXQ_OVFL:
-- 
GitLab


From 5c37711d9f27bdc83fd5980446be7f4aa2106230 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Tue, 1 Jun 2021 14:39:59 +0800
Subject: [PATCH 1965/3804] virtio-net: fix for unable to handle page fault for
 address

In merge mode, when xdp is enabled, if the headroom of buf is smaller
than virtnet_get_headroom(), xdp_linearize_page() will be called but the
variable of "headroom" is still 0, which leads to wrong logic after
entering page_to_skb().

[   16.600944] BUG: unable to handle page fault for address: ffffecbfff7b43c8[   16.602175] #PF: supervisor read access in kernel mode
[   16.603350] #PF: error_code(0x0000) - not-present page
[   16.604200] PGD 0 P4D 0
[   16.604686] Oops: 0000 [#1] SMP PTI
[   16.605306] CPU: 4 PID: 715 Comm: sh Tainted: G    B             5.12.0+ #312
[   16.606429] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/04
[   16.608217] RIP: 0010:unmap_page_range+0x947/0xde0
[   16.609014] Code: 00 00 08 00 48 83 f8 01 45 19 e4 41 f7 d4 41 83 e4 03 e9 a4 fd ff ff e8 b7 63 ed ff 4c 89 e0 48 c1 e0 065
[   16.611863] RSP: 0018:ffffc90002503c58 EFLAGS: 00010286
[   16.612720] RAX: ffffecbfff7b43c0 RBX: 00007f19f7203000 RCX: ffffffff812ff359
[   16.613853] RDX: ffff888107778000 RSI: 0000000000000000 RDI: 0000000000000005
[   16.614976] RBP: ffffea000425e000 R08: 0000000000000000 R09: 3030303030303030
[   16.616124] R10: ffffffff82ed7d94 R11: 6637303030302052 R12: 7c00000afffded0f
[   16.617276] R13: 0000000000000001 R14: ffff888119ee7010 R15: 00007f19f7202000
[   16.618423] FS:  0000000000000000(0000) GS:ffff88842fd00000(0000) knlGS:0000000000000000
[   16.619738] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   16.620670] CR2: ffffecbfff7b43c8 CR3: 0000000103220005 CR4: 0000000000370ee0
[   16.621792] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   16.622920] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   16.624047] Call Trace:
[   16.624525]  ? release_pages+0x24d/0x730
[   16.625209]  unmap_single_vma+0xa9/0x130
[   16.625885]  unmap_vmas+0x76/0xf0
[   16.626480]  exit_mmap+0xa0/0x210
[   16.627129]  mmput+0x67/0x180
[   16.627673]  do_exit+0x3d1/0xf10
[   16.628259]  ? do_user_addr_fault+0x231/0x840
[   16.629000]  do_group_exit+0x53/0xd0
[   16.629631]  __x64_sys_exit_group+0x1d/0x20
[   16.630354]  do_syscall_64+0x3c/0x80
[   16.630988]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[   16.631828] RIP: 0033:0x7f1a043d0191
[   16.632464] Code: Unable to access opcode bytes at RIP 0x7f1a043d0167.
[   16.633502] RSP: 002b:00007ffe3d993308 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7
[   16.634737] RAX: ffffffffffffffda RBX: 00007f1a044c9490 RCX: 00007f1a043d0191
[   16.635857] RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000
[   16.636986] RBP: 0000000000000000 R08: ffffffffffffff88 R09: 0000000000000001
[   16.638120] R10: 0000000000000008 R11: 0000000000000246 R12: 00007f1a044c9490
[   16.639245] R13: 0000000000000001 R14: 00007f1a044c9968 R15: 0000000000000000
[   16.640408] Modules linked in:
[   16.640958] CR2: ffffecbfff7b43c8
[   16.641557] ---[ end trace bc4891c6ce46354c ]---
[   16.642335] RIP: 0010:unmap_page_range+0x947/0xde0
[   16.643135] Code: 00 00 08 00 48 83 f8 01 45 19 e4 41 f7 d4 41 83 e4 03 e9 a4 fd ff ff e8 b7 63 ed ff 4c 89 e0 48 c1 e0 065
[   16.645983] RSP: 0018:ffffc90002503c58 EFLAGS: 00010286
[   16.646845] RAX: ffffecbfff7b43c0 RBX: 00007f19f7203000 RCX: ffffffff812ff359
[   16.647970] RDX: ffff888107778000 RSI: 0000000000000000 RDI: 0000000000000005
[   16.649091] RBP: ffffea000425e000 R08: 0000000000000000 R09: 3030303030303030
[   16.650250] R10: ffffffff82ed7d94 R11: 6637303030302052 R12: 7c00000afffded0f
[   16.651394] R13: 0000000000000001 R14: ffff888119ee7010 R15: 00007f19f7202000
[   16.652529] FS:  0000000000000000(0000) GS:ffff88842fd00000(0000) knlGS:0000000000000000
[   16.653887] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   16.654841] CR2: ffffecbfff7b43c8 CR3: 0000000103220005 CR4: 0000000000370ee0
[   16.655992] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   16.657150] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   16.658290] Kernel panic - not syncing: Fatal exception
[   16.659613] Kernel Offset: disabled
[   16.660234] ---[ end Kernel panic - not syncing: Fatal exception ]---

Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9b6a4a875c553..6b929aca155ad 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -958,7 +958,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 				put_page(page);
 				head_skb = page_to_skb(vi, rq, xdp_page, offset,
 						       len, PAGE_SIZE, false,
-						       metasize, headroom);
+						       metasize,
+						       VIRTIO_XDP_HEADROOM);
 				return head_skb;
 			}
 			break;
-- 
GitLab


From 8fb7da9e990793299c89ed7a4281c235bfdd31f8 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Tue, 1 Jun 2021 14:40:00 +0800
Subject: [PATCH 1966/3804] virtio_net: get build_skb() buf by data ptr

In the case of merge, the page passed into page_to_skb() may be a head
page, not the page where the current data is located. So when trying to
get the buf where the data is located, we should get buf based on
headroom instead of offset.

This patch solves this problem. But if you don't use this patch, the
original code can also run, because if the page is not the page of the
current data, the calculated tailroom will be less than 0, and will not
enter the logic of build_skb() . The significance of this patch is to
modify this logical problem, allowing more situations to use
build_skb().

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6b929aca155ad..fa407eb8b457a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -401,18 +401,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	/* If headroom is not 0, there is an offset between the beginning of the
 	 * data and the allocated space, otherwise the data and the allocated
 	 * space are aligned.
+	 *
+	 * Buffers with headroom use PAGE_SIZE as alloc size, see
+	 * add_recvbuf_mergeable() + get_mergeable_buf_len()
 	 */
-	if (headroom) {
-		/* Buffers with headroom use PAGE_SIZE as alloc size,
-		 * see add_recvbuf_mergeable() + get_mergeable_buf_len()
-		 */
-		truesize = PAGE_SIZE;
-		tailroom = truesize - len - offset;
-		buf = page_address(page);
-	} else {
-		tailroom = truesize - len;
-		buf = p;
-	}
+	truesize = headroom ? PAGE_SIZE : truesize;
+	tailroom = truesize - len - headroom;
+	buf = p - headroom;
 
 	len -= hdr_len;
 	offset += hdr_padded_len;
-- 
GitLab


From e50899122f3204946bb3559da23700c2e5b9568b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 25 May 2021 15:27:27 +0200
Subject: [PATCH 1967/3804] scripts: sphinx-pre-install: rework the sphinx
 install logic

The sphinx-pre-install supports installing sphinx via a virtual
environment using pip/pypi or directly from the distribution's
package, when --no-virtualenv is used.

However, even when --no-virtualenv, the current logic is
still recomending to install a virtual env, due to a regression.

It turns that the logic there is complex, as it depends on
several different conditions.

Split the code which recommends Sphinx on two separate
functions, in order to clean up the code.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9dedaec201803017b7a7dc24a074f3a4f040b72a.1621949137.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 208 +++++++++++++++++++++++++------------
 1 file changed, 140 insertions(+), 68 deletions(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index fe92020d67e3f..b5fec149f4739 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -24,7 +24,6 @@ my $need_symlink = 0;
 my $need_sphinx = 0;
 my $need_venv = 0;
 my $need_virtualenv = 0;
-my $rec_sphinx_upgrade = 0;
 my $install = "";
 my $virtenv_dir = "";
 my $python_cmd = "";
@@ -33,6 +32,7 @@ my $cur_version;
 my $rec_version = "1.7.9";	# PDF won't build here
 my $min_pdf_version = "2.4.4";	# Min version where pdf builds
 
+
 #
 # Command line arguments
 #
@@ -319,10 +319,7 @@ sub check_sphinx()
 		return;
 	}
 
-	if ($cur_version lt $rec_version) {
-		$rec_sphinx_upgrade = 1;
-		return;
-	}
+	return if ($cur_version lt $rec_version);
 
 	# On version check mode, just assume Sphinx has all mandatory deps
 	exit (0) if ($version_check);
@@ -701,6 +698,141 @@ sub deactivate_help()
 	printf "\tdeactivate\n";
 }
 
+sub get_virtenv()
+{
+	my $min_activate = "$ENV{'PWD'}/${virtenv_prefix}${min_version}/bin/activate";
+	my @activates = glob "$ENV{'PWD'}/${virtenv_prefix}*/bin/activate";
+
+	@activates = sort {$b cmp $a} @activates;
+	my ($activate, $ver);
+	foreach my $f (@activates) {
+		next if ($f lt $min_activate);
+
+		my $sphinx_cmd = $f;
+		$sphinx_cmd =~ s/activate/sphinx-build/;
+		next if (! -f $sphinx_cmd);
+
+		my $ver = get_sphinx_version($sphinx_cmd);
+		if ($need_sphinx && ($ver ge $min_version)) {
+			return ($f, $ver);
+		} elsif ($ver gt $cur_version) {
+			return ($f, $ver);
+		}
+	}
+	return ("", "");
+}
+
+#
+# The logic here is complex, as it have to deal with different versions:
+#	- minimal supported version;
+#	- minimal PDF version;
+#	- recommended version.
+# It also needs to work fine with both distro's package and venv/virtualenv
+sub recommend_sphinx_version($)
+{
+	my $virtualenv_cmd = shift;
+
+	# Avoid running sphinx-builds from venv if $cur_version is good
+	return if ($cur_version && ($cur_version ge $rec_version));
+
+	my $latest_avail_ver;
+	my $rec_sphinx_upgrade = 0;
+
+	# Get the highest version from sphinx_*/bin/sphinx-build and the
+	# corresponding command to activate the venv/virtenv
+	my ($activate, $venv_ver) = get_virtenv();
+
+	if (($activate ne "") && ($venv_ver gt $cur_version)) {
+		$latest_avail_ver = $venv_ver;
+	} else {
+		$latest_avail_ver = $cur_version if ($cur_version);
+	}
+
+	if (!$need_sphinx) {
+		# sphinx-build is present and its version is >= $min_version
+
+		#only recommend enabling a newer virtenv version if makes sense.
+		if ($latest_avail_ver gt $cur_version) {
+			printf "\nYou may also use the newer Sphinx version $venv_ver with:\n";
+			printf "\tdeactivate\n"  if ($ENV{'PWD'} =~ /${virtenv_prefix}/);
+			printf "\t. $activate\n";
+			deactivate_help();
+
+			return;
+		}
+		return if ($latest_avail_ver ge $rec_version);
+	}
+
+	if (!$virtualenv) {
+		# No sphinx either via package or via virtenv. As we can't
+		# Compare the versions here, just return, recommending the
+		# user to install it from the package distro.
+		return if (!$latest_avail_ver);
+
+		# User doesn't want a virtenv recommendation, but he already
+		# installed one via virtenv with a newer version.
+		# So, print commands to enable it
+		if ($latest_avail_ver gt $cur_version) {
+			printf "\nYou may also use the Sphinx virtualenv version $venv_ver with:\n";
+			printf "\tdeactivate\n"  if ($ENV{'PWD'} =~ /${virtenv_prefix}/);
+			printf "\t. $activate\n";
+			deactivate_help();
+
+			return;
+		}
+		print "\n";
+	} else {
+		$need++ if ($need_sphinx);
+	}
+
+	# Suggest newer versions if current ones are too old
+	if ($latest_avail_ver && $cur_version ge $min_version) {
+		# If there's a good enough version, ask the user to enable it
+		if ($latest_avail_ver ge $rec_version) {
+			printf "\nNeed to activate Sphinx (version $venv_ver) on virtualenv with:\n";
+			printf "\t. $activate\n";
+			deactivate_help();
+
+			return;
+		}
+
+		# Version is above the minimal required one, but may be
+		# below the recommended one. So, print warnings/notes
+
+		if ($latest_avail_ver lt $rec_version) {
+			print "Warning: It is recommended at least Sphinx version $rec_version.\n";
+			$rec_sphinx_upgrade = 1;
+		}
+		if ($latest_avail_ver lt $min_pdf_version) {
+			print "note: If you want pdf, you need at least $min_pdf_version.\n";
+		}
+	}
+
+	# At this point, either it needs Sphinx or upgrade is recommended,
+	# both via pip
+
+	if ($rec_sphinx_upgrade) {
+		if (!$virtualenv) {
+			print "Instead of install/upgrade Python Sphinx pkg, you could use pip/pypi with:\n\n";
+		} else {
+			print "To upgrade Sphinx, use:\n\n";
+		}
+	} else {
+		print "Sphinx needs to be installed either as a package or via pip/pypi with:\n";
+	}
+
+	$python_cmd = find_python_no_venv();
+
+	if ($need_venv) {
+		printf "\t$python_cmd -m venv $virtenv_dir\n";
+	} else {
+		printf "\t$virtualenv_cmd $virtenv_dir\n";
+	}
+	printf "\t. $virtenv_dir/bin/activate\n";
+	printf "\tpip install -r $requirement_file\n";
+	deactivate_help();
+}
+
 sub check_needs()
 {
 	# Check if Sphinx is already accessible from current environment
@@ -763,8 +895,8 @@ sub check_needs()
 	check_program("rsvg-convert", 2) if ($pdf);
 	check_program("latexmk", 2) if ($pdf);
 
-	if ($need_sphinx || $rec_sphinx_upgrade) {
-		check_python_module("ensurepip", 0) if ($need_venv);
+	if ($need_venv) {
+		check_python_module("ensurepip", 0);
 	}
 
 	# Do distro-specific checks and output distro-install commands
@@ -784,67 +916,7 @@ sub check_needs()
 		       which("sphinx-build-3");
 	}
 
-	# NOTE: if the system has a too old Sphinx version installed,
-	# it will recommend installing a newer version using virtualenv
-
-	if ($need_sphinx || $rec_sphinx_upgrade) {
-		my $min_activate = "$ENV{'PWD'}/${virtenv_prefix}${min_version}/bin/activate";
-		my @activates = glob "$ENV{'PWD'}/${virtenv_prefix}*/bin/activate";
-
-		if ($cur_version lt $rec_version) {
-			print "Warning: It is recommended at least Sphinx version $rec_version.\n";
-			print "         If you want pdf, you need at least $min_pdf_version.\n";
-		}
-		if ($cur_version lt $min_pdf_version) {
-			print "Note: It is recommended at least Sphinx version $min_pdf_version if you need PDF support.\n";
-		}
-		@activates = sort {$b cmp $a} @activates;
-		my ($activate, $ver);
-		foreach my $f (@activates) {
-			next if ($f lt $min_activate);
-
-			my $sphinx_cmd = $f;
-			$sphinx_cmd =~ s/activate/sphinx-build/;
-			next if (! -f $sphinx_cmd);
-
-			$ver = get_sphinx_version($sphinx_cmd);
-			if ($need_sphinx && ($ver ge $min_version)) {
-				$activate = $f;
-				last;
-			} elsif ($ver gt $cur_version) {
-				$activate = $f;
-				last;
-			}
-		}
-		if ($activate ne "") {
-			if ($need_sphinx) {
-				printf "\nNeed to activate Sphinx (version $ver) on virtualenv with:\n";
-				printf "\t. $activate\n";
-				deactivate_help();
-				exit (1);
-			} else {
-				printf "\nYou may also use a newer Sphinx (version $ver) with:\n";
-				printf "\tdeactivate && . $activate\n";
-			}
-		} else {
-			my $rec_activate = "$virtenv_dir/bin/activate";
-
-			print "To upgrade Sphinx, use:\n\n" if ($rec_sphinx_upgrade);
-
-			$python_cmd = find_python_no_venv();
-
-			if ($need_venv) {
-				printf "\t$python_cmd -m venv $virtenv_dir\n";
-			} else {
-				printf "\t$virtualenv_cmd $virtenv_dir\n";
-			}
-			printf "\t. $rec_activate\n";
-			printf "\tpip install -r $requirement_file\n";
-			deactivate_help();
-
-			$need++ if (!$rec_sphinx_upgrade);
-		}
-	}
+	recommend_sphinx_version($virtualenv_cmd);
 	printf "\n";
 
 	print "All optional dependencies are met.\n" if (!$optional);
-- 
GitLab


From a5f785f1021857a889b1f5b7cc1d83efd4404336 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 25 May 2021 15:27:28 +0200
Subject: [PATCH 1968/3804] scripts: sphinx-pre-install: fix the need of
 virtenv packages

The pip packages are only needed when the distro-provided
Sphinx version is not good enough.

Don't recommend installing it if not needed.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/04ce53b77b37f1e495c3abc39c2d3dc407895dc0.1621949137.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/sphinx-pre-install | 122 ++++++++++++++++++++++---------------
 1 file changed, 74 insertions(+), 48 deletions(-)

diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install
index b5fec149f4739..288e86a9d1e58 100755
--- a/scripts/sphinx-pre-install
+++ b/scripts/sphinx-pre-install
@@ -22,16 +22,18 @@ my $need = 0;
 my $optional = 0;
 my $need_symlink = 0;
 my $need_sphinx = 0;
-my $need_venv = 0;
+my $need_pip = 0;
 my $need_virtualenv = 0;
+my $rec_sphinx_upgrade = 0;
 my $install = "";
 my $virtenv_dir = "";
 my $python_cmd = "";
+my $activate_cmd;
 my $min_version;
 my $cur_version;
 my $rec_version = "1.7.9";	# PDF won't build here
 my $min_pdf_version = "2.4.4";	# Min version where pdf builds
-
+my $latest_avail_ver;
 
 #
 # Command line arguments
@@ -700,11 +702,12 @@ sub deactivate_help()
 
 sub get_virtenv()
 {
+	my $ver;
 	my $min_activate = "$ENV{'PWD'}/${virtenv_prefix}${min_version}/bin/activate";
 	my @activates = glob "$ENV{'PWD'}/${virtenv_prefix}*/bin/activate";
 
 	@activates = sort {$b cmp $a} @activates;
-	my ($activate, $ver);
+
 	foreach my $f (@activates) {
 		next if ($f lt $min_activate);
 
@@ -722,40 +725,67 @@ sub get_virtenv()
 	return ("", "");
 }
 
-#
-# The logic here is complex, as it have to deal with different versions:
-#	- minimal supported version;
-#	- minimal PDF version;
-#	- recommended version.
-# It also needs to work fine with both distro's package and venv/virtualenv
-sub recommend_sphinx_version($)
+sub recommend_sphinx_upgrade()
 {
-	my $virtualenv_cmd = shift;
+	my $venv_ver;
 
 	# Avoid running sphinx-builds from venv if $cur_version is good
-	return if ($cur_version && ($cur_version ge $rec_version));
-
-	my $latest_avail_ver;
-	my $rec_sphinx_upgrade = 0;
+	if ($cur_version && ($cur_version ge $rec_version)) {
+		$latest_avail_ver = $cur_version;
+		return;
+	}
 
 	# Get the highest version from sphinx_*/bin/sphinx-build and the
 	# corresponding command to activate the venv/virtenv
-	my ($activate, $venv_ver) = get_virtenv();
+	$activate_cmd = get_virtenv();
 
-	if (($activate ne "") && ($venv_ver gt $cur_version)) {
+	# Store the highest version from Sphinx existing virtualenvs
+	if (($activate_cmd ne "") && ($venv_ver gt $cur_version)) {
 		$latest_avail_ver = $venv_ver;
 	} else {
 		$latest_avail_ver = $cur_version if ($cur_version);
 	}
 
+	# As we don't know package version of Sphinx, and there's no
+	# virtual environments, don't check if upgrades are needed
+	if (!$virtualenv) {
+		return if (!$latest_avail_ver);
+	}
+
+	# Either there are already a virtual env or a new one should be created
+	$need_pip = 1;
+
+	# Return if the reason is due to an upgrade or not
+	if ($latest_avail_ver lt $rec_version) {
+		$rec_sphinx_upgrade = 1;
+	}
+}
+
+#
+# The logic here is complex, as it have to deal with different versions:
+#	- minimal supported version;
+#	- minimal PDF version;
+#	- recommended version.
+# It also needs to work fine with both distro's package and venv/virtualenv
+sub recommend_sphinx_version($)
+{
+	my $virtualenv_cmd = shift;
+
+	if ($latest_avail_ver lt $min_pdf_version) {
+		print "note: If you want pdf, you need at least Sphinx $min_pdf_version.\n";
+	}
+
+	# Version is OK. Nothing to do.
+	return if ($cur_version && ($cur_version ge $rec_version));
+
 	if (!$need_sphinx) {
 		# sphinx-build is present and its version is >= $min_version
 
 		#only recommend enabling a newer virtenv version if makes sense.
 		if ($latest_avail_ver gt $cur_version) {
-			printf "\nYou may also use the newer Sphinx version $venv_ver with:\n";
+			printf "\nYou may also use the newer Sphinx version $latest_avail_ver with:\n";
 			printf "\tdeactivate\n"  if ($ENV{'PWD'} =~ /${virtenv_prefix}/);
-			printf "\t. $activate\n";
+			printf "\t. $activate_cmd\n";
 			deactivate_help();
 
 			return;
@@ -773,9 +803,9 @@ sub recommend_sphinx_version($)
 		# installed one via virtenv with a newer version.
 		# So, print commands to enable it
 		if ($latest_avail_ver gt $cur_version) {
-			printf "\nYou may also use the Sphinx virtualenv version $venv_ver with:\n";
+			printf "\nYou may also use the Sphinx virtualenv version $latest_avail_ver with:\n";
 			printf "\tdeactivate\n"  if ($ENV{'PWD'} =~ /${virtenv_prefix}/);
-			printf "\t. $activate\n";
+			printf "\t. $activate_cmd\n";
 			deactivate_help();
 
 			return;
@@ -789,8 +819,8 @@ sub recommend_sphinx_version($)
 	if ($latest_avail_ver && $cur_version ge $min_version) {
 		# If there's a good enough version, ask the user to enable it
 		if ($latest_avail_ver ge $rec_version) {
-			printf "\nNeed to activate Sphinx (version $venv_ver) on virtualenv with:\n";
-			printf "\t. $activate\n";
+			printf "\nNeed to activate Sphinx (version $latest_avail_ver) on virtualenv with:\n";
+			printf "\t. $activate_cmd\n";
 			deactivate_help();
 
 			return;
@@ -801,10 +831,6 @@ sub recommend_sphinx_version($)
 
 		if ($latest_avail_ver lt $rec_version) {
 			print "Warning: It is recommended at least Sphinx version $rec_version.\n";
-			$rec_sphinx_upgrade = 1;
-		}
-		if ($latest_avail_ver lt $min_pdf_version) {
-			print "note: If you want pdf, you need at least $min_pdf_version.\n";
 		}
 	}
 
@@ -823,11 +849,8 @@ sub recommend_sphinx_version($)
 
 	$python_cmd = find_python_no_venv();
 
-	if ($need_venv) {
-		printf "\t$python_cmd -m venv $virtenv_dir\n";
-	} else {
-		printf "\t$virtualenv_cmd $virtenv_dir\n";
-	}
+	printf "\t$virtualenv_cmd $virtenv_dir\n";
+
 	printf "\t. $virtenv_dir/bin/activate\n";
 	printf "\tpip install -r $requirement_file\n";
 	deactivate_help();
@@ -854,15 +877,14 @@ sub check_needs()
 		if ($virtualenv) {
 			my $tmp = qx($python_cmd --version 2>&1);
 			if ($tmp =~ m/(\d+\.)(\d+\.)/) {
-				if ($1 >= 3 && $2 >= 3) {
-					$need_venv = 1;		# python 3.3 or upper
-				} else {
-					$need_virtualenv = 1;
-				}
 				if ($1 < 3) {
 					# Fail if it finds python2 (or worse)
 					die "Python 3 is required to build the kernel docs\n";
 				}
+				if ($1 == 3 && $2 < 3) {
+					# Need Python 3.3 or upper for venv
+					$need_virtualenv = 1;
+				}
 			} else {
 				die "Warning: couldn't identify $python_cmd version!";
 			}
@@ -871,14 +893,22 @@ sub check_needs()
 		}
 	}
 
-	# Set virtualenv command line, if python < 3.3
+	recommend_sphinx_upgrade();
+
 	my $virtualenv_cmd;
-	if ($need_virtualenv) {
-		$virtualenv_cmd = findprog("virtualenv-3");
-		$virtualenv_cmd = findprog("virtualenv-3.5") if (!$virtualenv_cmd);
-		if (!$virtualenv_cmd) {
-			check_program("virtualenv", 0);
-			$virtualenv_cmd = "virtualenv";
+
+	if ($need_pip) {
+		# Set virtualenv command line, if python < 3.3
+		if ($need_virtualenv) {
+			$virtualenv_cmd = findprog("virtualenv-3");
+			$virtualenv_cmd = findprog("virtualenv-3.5") if (!$virtualenv_cmd);
+			if (!$virtualenv_cmd) {
+				check_program("virtualenv", 0);
+				$virtualenv_cmd = "virtualenv";
+			}
+		} else {
+			$virtualenv_cmd = "$python_cmd -m venv";
+			check_python_module("ensurepip", 0);
 		}
 	}
 
@@ -895,10 +925,6 @@ sub check_needs()
 	check_program("rsvg-convert", 2) if ($pdf);
 	check_program("latexmk", 2) if ($pdf);
 
-	if ($need_venv) {
-		check_python_module("ensurepip", 0);
-	}
-
 	# Do distro-specific checks and output distro-install commands
 	check_distros();
 
-- 
GitLab


From 51568befea2aba3c75a5a929f41909c50176ca6e Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Thu, 27 May 2021 14:43:47 +0800
Subject: [PATCH 1969/3804] docs/zh_CN: add core-api symbol-namespaces.rst
 translation

Translates Documentation/core-api/symbol-namespaces.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/20210527064347.3936694-1-siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   2 -
 .../zh_CN/core-api/symbol-namespaces.rst      | 142 ++++++++++++++++++
 2 files changed, 142 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/symbol-namespaces.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index 4b7efb7edb183..93162b04624d2 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -26,8 +26,6 @@
    printk-basics
    printk-formats
    workqueue
-
-Todolist:
    symbol-namespaces
 
 数据结构和低级实用程序
diff --git a/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
new file mode 100644
index 0000000000000..ce05c29c76972
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/symbol-namespaces.rst
@@ -0,0 +1,142 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/symbol-namespaces.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_symbol-namespaces.rst:
+
+
+=================================
+符号命名空间（Symbol Namespaces）
+=================================
+
+本文档描述了如何使用符号命名空间来构造通过EXPORT_SYMBOL()系列宏导出的内核内符号的导出面。
+
+.. 目录
+
+       === 1 简介
+       === 2 如何定义符号命名空间
+          --- 2.1 使用EXPORT_SYMBOL宏
+          --- 2.2 使用DEFAULT_SYMBOL_NAMESPACE定义
+       === 3 如何使用命名空间中导出的符号
+       === 4 加载使用命名空间符号的模块
+       === 5 自动创建MODULE_IMPORT_NS声明
+
+1. 简介
+=======
+
+符号命名空间已经被引入，作为构造内核内API的导出面的一种手段。它允许子系统维护者将
+他们导出的符号划分进独立的命名空间。这对于文档的编写非常有用（想想SUBSYSTEM_DEBUG
+命名空间），也可以限制一组符号在内核其他部分的使用。今后，使用导出到命名空间的符号
+的模块必须导入命名空间。否则，内核将根据其配置，拒绝加载该模块或警告说缺少
+导入。
+
+2. 如何定义符号命名空间
+=======================
+
+符号可以用不同的方法导出到命名空间。所有这些都在改变 EXPORT_SYMBOL 和与之类似的那些宏
+被检测到的方式，以创建 ksymtab 条目。
+
+2.1 使用EXPORT_SYMBOL宏
+=======================
+
+除了允许将内核符号导出到内核符号表的宏EXPORT_SYMBOL()和EXPORT_SYMBOL_GPL()之外，
+这些宏的变体还可以将符号导出到某个命名空间：EXPORT_SYMBOL_NS() 和 EXPORT_SYMBOL_NS_GPL()。
+它们需要一个额外的参数：命名空间（the namespace）。请注意，由于宏扩展，该参数需
+要是一个预处理器符号。例如，要把符号 ``usb_stor_suspend`` 导出到命名空间 ``USB_STORAGE``，
+请使用::
+
+       EXPORT_SYMBOL_NS(usb_stor_suspend, USB_STORAGE);
+
+相应的 ksymtab 条目结构体 ``kernel_symbol`` 将有相应的成员 ``命名空间`` 集。
+导出时未指明命名空间的符号将指向 ``NULL`` 。如果没有定义命名空间，则默认没有。
+``modpost`` 和kernel/module.c分别在构建时或模块加载时使用名称空间。
+
+2.2 使用DEFAULT_SYMBOL_NAMESPACE定义
+====================================
+
+为一个子系统的所有符号定义命名空间可能会非常冗长，并可能变得难以维护。因此，我
+们提供了一个默认定义（DEFAULT_SYMBOL_NAMESPACE），如果设置了这个定义， 它将成
+为所有没有指定命名空间的 EXPORT_SYMBOL() 和 EXPORT_SYMBOL_GPL() 宏扩展的默认
+定义。
+
+有多种方法来指定这个定义，使用哪种方法取决于子系统和维护者的喜好。第一种方法是在
+子系统的 ``Makefile`` 中定义默认命名空间。例如，如果要将usb-common中定义的所有符号导
+出到USB_COMMON命名空间，可以在drivers/usb/common/Makefile中添加这样一行::
+
+       ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON
+
+这将影响所有 EXPORT_SYMBOL() 和 EXPORT_SYMBOL_GPL() 语句。当这个定义存在时，
+用EXPORT_SYMBOL_NS()导出的符号仍然会被导出到作为命名空间参数传递的命名空间中，
+因为这个参数优先于默认的符号命名空间。
+
+定义默认命名空间的第二个选项是直接在编译单元中作为预处理声明。上面的例子就会变
+成::
+
+       #undef  DEFAULT_SYMBOL_NAMESPACE
+       #define DEFAULT_SYMBOL_NAMESPACE USB_COMMON
+
+应置于相关编译单元中任何 EXPORT_SYMBOL 宏之前
+
+3. 如何使用命名空间中导出的符号
+===============================
+
+为了使用被导出到命名空间的符号，内核模块需要明确地导入这些命名空间。
+否则内核可能会拒绝加载该模块。模块代码需要使用宏MODULE_IMPORT_NS来
+表示它所使用的命名空间的符号。例如，一个使用usb_stor_suspend符号的
+模块，需要使用如下语句导入命名空间USB_STORAGE::
+
+       MODULE_IMPORT_NS(USB_STORAGE);
+
+这将在模块中为每个导入的命名空间创建一个 ``modinfo`` 标签。这也顺带
+使得可以用modinfo检查模块已导入的命名空间::
+
+       $ modinfo drivers/usb/storage/ums-karma.ko
+       [...]
+       import_ns:      USB_STORAGE
+       [...]
+
+
+建议将 MODULE_IMPORT_NS() 语句添加到靠近其他模块元数据定义的地方，
+如 MODULE_AUTHOR() 或 MODULE_LICENSE() 。关于自动创建缺失的导入
+语句的方法，请参考第5节。
+
+4. 加载使用命名空间符号的模块
+=============================
+
+在模块加载时（比如 ``insmod`` ），内核将检查每个从模块中引用的符号是否可
+用，以及它可能被导出到的名字空间是否被模块导入。内核的默认行为是拒绝
+加载那些没有指明足以导入的模块。此错误会被记录下来，并且加载将以
+EINVAL方式失败。要允许加载不满足这个前提条件的模块，可以使用此配置选项：
+设置 MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS=y 将使加载不受影响，但会
+发出警告。
+
+5. 自动创建MODULE_IMPORT_NS声明
+===============================
+
+缺少命名空间的导入可以在构建时很容易被检测到。事实上，如果一个模块
+使用了一个命名空间的符号而没有导入它，modpost会发出警告。
+MODULE_IMPORT_NS()语句通常会被添加到一个明确的位置（和其他模块元
+数据一起）。为了使模块作者（和子系统维护者）的生活更加轻松，我们提
+供了一个脚本和make目标来修复丢失的导入。修复丢失的导入可以用::
+
+       $ make nsdeps
+
+对模块作者来说，以下情况可能很典型::
+
+       - 编写依赖未导入命名空间的符号的代码
+       - ``make``
+       - 注意 ``modpost`` 的警告，提醒你有一个丢失的导入。
+       - 运行 ``make nsdeps``将导入添加到正确的代码位置。
+
+对于引入命名空间的子系统维护者来说，其步骤非常相似。同样，make nsdeps最终将
+为树内模块添加缺失的命名空间导入::
+
+       - 向命名空间转移或添加符号（例如，使用EXPORT_SYMBOL_NS()）。
+       - `make e`（最好是用allmodconfig来覆盖所有的内核模块）。
+       - 注意 ``modpost`` 的警告，提醒你有一个丢失的导入。
+       - 运行 ``maknsdeps``将导入添加到正确的代码位置。
+
+你也可以为外部模块的构建运行nsdeps。典型的用法是::
+
+       $ make -C <path_to_kernel_src> M=$PWD nsdeps
-- 
GitLab


From cbae918b2c4b6d1c4577d35659196b4f75b5c376 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Mon, 31 May 2021 20:41:05 +0800
Subject: [PATCH 1970/3804] docs/zh_CN:add core-api padata translation

Translate Documentation/core-api/padata.rst into Chinese.

Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Link: https://lore.kernel.org/r/20210531124105.946859-1-siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/index.rst     |   2 +-
 .../translations/zh_CN/core-api/padata.rst    | 158 ++++++++++++++++++
 2 files changed, 159 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/padata.rst

diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index 93162b04624d2..a8b2afcbf5bcc 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -65,10 +65,10 @@ Linux如何让一切同时发生。 详情请参阅
    irq/index
    refcount-vs-atomic
    local_ops
+   padata
 
 Todolist:
 
-   padata
    ../RCU/index
 
 低级硬件管理
diff --git a/Documentation/translations/zh_CN/core-api/padata.rst b/Documentation/translations/zh_CN/core-api/padata.rst
new file mode 100644
index 0000000000000..c627f8f131f9a
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/padata.rst
@@ -0,0 +1,158 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/padata.rst
+:Translator: Yanteng Si <siyanteng@loongson.cn>
+
+.. _cn_core_api_padata.rst:
+
+==================
+padata并行执行机制
+==================
+
+:日期: 2020年5月
+
+Padata是一种机制，内核可以通过此机制将工作分散到多个CPU上并行完成，同时
+可以选择保持它们的顺序。
+
+它最初是为IPsec开发的，它需要在不对这些数据包重新排序的其前提下，为大量的数
+据包进行加密和解密。这是目前padata的序列化作业支持的唯一用途。
+
+Padata还支持多线程作业，将作业平均分割，同时在线程之间进行负载均衡和协调。
+
+执行序列化作业
+==============
+
+初始化
+------
+
+使用padata执行序列化作业的第一步是建立一个padata_instance结构体，以全面
+控制作业的运行方式::
+
+    #include <linux/padata.h>
+
+    struct padata_instance *padata_alloc(const char *name);
+
+'name'即标识了这个实例。
+
+然后，通过分配一个padata_shell来完成padata的初始化::
+
+   struct padata_shell *padata_alloc_shell(struct padata_instance *pinst);
+
+一个padata_shell用于向padata提交一个作业，并允许一系列这样的作业被独立地
+序列化。一个padata_instance可以有一个或多个padata_shell与之相关联，每个
+都允许一系列独立的作业。
+
+修改cpumasks
+------------
+
+用于运行作业的CPU可以通过两种方式改变，通过padata_set_cpumask()编程或通
+过sysfs。前者的定义是::
+
+    int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
+			   cpumask_var_t cpumask);
+
+这里cpumask_type是PADATA_CPU_PARALLEL（并行）或PADATA_CPU_SERIAL（串行）之一，其中并
+行cpumask描述了哪些处理器将被用来并行执行提交给这个实例的作业，串行cpumask
+定义了哪些处理器被允许用作串行化回调处理器。 cpumask指定了要使用的新cpumask。
+
+一个实例的cpumasks可能有sysfs文件。例如，pcrypt的文件在
+/sys/kernel/pcrypt/<instance-name>。在一个实例的目录中，有两个文件，parallel_cpumask
+和serial_cpumask，任何一个cpumask都可以通过在文件中回显（echo）一个bitmask
+来改变，比如说::
+
+    echo f > /sys/kernel/pcrypt/pencrypt/parallel_cpumask
+
+读取其中一个文件会显示用户提供的cpumask，它可能与“可用”的cpumask不同。
+
+Padata内部维护着两对cpumask，用户提供的cpumask和“可用的”cpumask(每一对由一个
+并行和一个串行cpumask组成)。用户提供的cpumasks在实例分配时默认为所有可能的CPU，
+并且可以如上所述进行更改。可用的cpumasks总是用户提供的cpumasks的一个子集，只包
+含用户提供的掩码中的在线CPU；这些是padata实际使用的cpumasks。因此，向padata提
+供一个包含离线CPU的cpumask是合法的。一旦用户提供的cpumask中的一个离线CPU上线，
+padata就会使用它。
+
+改变CPU掩码的操作代价很高，所以不应频繁更改。
+
+运行一个作业
+-------------
+
+实际上向padata实例提交工作需要创建一个padata_priv结构体，它代表一个作业::
+
+    struct padata_priv {
+        /* Other stuff here... */
+	void                    (*parallel)(struct padata_priv *padata);
+	void                    (*serial)(struct padata_priv *padata);
+    };
+
+这个结构体几乎肯定会被嵌入到一些针对要做的工作的大结构体中。它的大部分字段对
+padata来说是私有的，但是这个结构在初始化时应该被清零，并且应该提供parallel()和
+serial()函数。在完成工作的过程中，这些函数将被调用，我们马上就会遇到。
+
+工作的提交是通过::
+
+    int padata_do_parallel(struct padata_shell *ps,
+		           struct padata_priv *padata, int *cb_cpu);
+
+ps和padata结构体必须如上所述进行设置；cb_cpu指向作业完成后用于最终回调的首选CPU；
+它必须在当前实例的CPU掩码中（如果不是，cb_cpu指针将被更新为指向实际选择的CPU）。
+padata_do_parallel()的返回值在成功时为0，表示工作正在进行中。-EBUSY意味着有人
+在其他地方正在搞乱实例的CPU掩码，而当cb_cpu不在串行cpumask中、并行或串行cpumasks
+中无在线CPU，或实例停止时，则会出现-EINVAL反馈。
+
+每个提交给padata_do_parallel()的作业将依次传递给一个CPU上的上述parallel()函数
+的一个调用，所以真正的并行是通过提交多个作业来实现的。parallel()在运行时禁用软
+件中断，因此不能睡眠。parallel()函数把获得的padata_priv结构体指针作为其唯一的参
+数；关于实际要做的工作的信息可能是通过使用container_of()找到封装结构体来获得的。
+
+请注意，parallel()没有返回值；padata子系统假定parallel()将从此时开始负责这项工
+作。作业不需要在这次调用中完成，但是，如果parallel()留下了未完成的工作，它应该准
+备在前一个作业完成之前，被以新的作业再次调用
+
+序列化作业
+----------
+
+当一个作业完成时，parallel()（或任何实际完成该工作的函数）应该通过调用通知padata此
+事::
+
+    void padata_do_serial(struct padata_priv *padata);
+
+在未来的某个时刻，padata_do_serial()将触发对padata_priv结构体中serial()函数的调
+用。这个调用将发生在最初要求调用padata_do_parallel()的CPU上；它也是在本地软件中断
+被禁用的情况下运行的。
+请注意，这个调用可能会被推迟一段时间，因为padata代码会努力确保作业按照提交的顺序完
+成。
+
+销毁
+----
+
+清理一个padata实例时，可以预见的是调用两个free函数，这两个函数对应于分配的逆过程::
+
+    void padata_free_shell(struct padata_shell *ps);
+    void padata_free(struct padata_instance *pinst);
+
+用户有责任确保在调用上述任何一项之前，所有未完成的工作都已完成。
+
+运行多线程作业
+==============
+
+一个多线程作业有一个主线程和零个或多个辅助线程，主线程参与作业，然后等待所有辅助线
+程完成。padata将作业分割成称为chunk的单元，其中chunk是一个线程在一次调用线程函数
+中完成的作业片段。
+
+用户必须做三件事来运行一个多线程作业。首先，通过定义一个padata_mt_job结构体来描述
+作业，这在接口部分有解释。这包括一个指向线程函数的指针，padata每次将作业块分配给线
+程时都会调用这个函数。然后，定义线程函数，它接受三个参数： ``start`` 、 ``end`` 和 ``arg`` ，
+其中前两个参数限定了线程操作的范围，最后一个是指向作业共享状态的指针，如果有的话。
+准备好共享状态，它通常被分配在主线程的堆栈中。最后，调用padata_do_multithreaded()，
+它将在作业完成后返回。
+
+接口
+====
+
+该API在以下内核代码中:
+
+include/linux/padata.h
+
+kernel/padata.c
-- 
GitLab


From 0afd4df0d16a5ae894b087562ffef4e5ec43fe24 Mon Sep 17 00:00:00 2001
From: Akira Yokosawa <akiyks@gmail.com>
Date: Sun, 30 May 2021 00:19:14 +0900
Subject: [PATCH 1971/3804] docs: pdfdocs: Prevent column squeezing by tabulary

Setting a reasonable width to \tymin prevents column squeezing
by tabulary.
Width of 20em works well in almost all the tables still in the
ascii-art format.

Excerpt from tabulary package documentation at [1]:

    To stop very narrow columns being too 'squeezed' by this process
    any columns that are narrower than \tymin are set to their natural
    width.

[1]: https://mirrors.ctan.org/macros/latex/contrib/tabulary/tabulary.pdf

Note: Sphinx has its own default value of \tymin set in
sphinxlatextables.sty (Sphinx 4.0.2) and sphinxmulticell.sty
(Sphinx 2.4.4) as follows:

    \setlength{\tymin}{3\fontcharwd\font`0 }

, which is not sufficient for kernel-doc.

Tested against Sphinx versions 2.4.4 and 4.0.2.

Signed-off-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/r/277d68fa-c96a-0ccb-6ce0-4d314851d9fe@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index 25aa00c707b02..a05225056e086 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -353,6 +353,8 @@ latex_elements = {
 
     # Additional stuff for the LaTeX preamble.
     'preamble': '''
+	% Prevent column squeezing of tabulary.
+	\\setlength{\\tymin}{20em}
         % Use some font with UTF-8 support with XeLaTeX
         \\usepackage{fontspec}
         \\setsansfont{DejaVu Sans}
-- 
GitLab


From 6ad1800071e80ade38b6287792a6ad678e6085ed Mon Sep 17 00:00:00 2001
From: Haocheng Xie <xiehaocheng.cn@gmail.com>
Date: Mon, 31 May 2021 16:39:05 +0800
Subject: [PATCH 1972/3804] docs: Fix typos in Documentation/trace/ftrace.rst

Fix the usage of "a/the" and improve the readability.

Signed-off-by: Haocheng Xie <xiehaocheng.cn@gmail.com>
Link: https://lore.kernel.org/r/20210531083905.25763-1-xiehaocheng.cn@gmail.com
[jc: tweaked wording slightly]
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/ftrace.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index 62c98e9bbdd9d..b88c6b79db3ee 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -354,8 +354,8 @@ of ftrace. Here is a list of some of the key files:
 	is being directly called by the function. If the count is greater
 	than 1 it most likely will be ftrace_ops_list_func().
 
-	If the callback of the function jumps to a trampoline that is
-	specific to a the callback and not the standard trampoline,
+	If the callback of a function jumps to a trampoline that is
+	specific to the callback and which is not the standard trampoline,
 	its address will be printed as well as the function that the
 	trampoline calls.
 
-- 
GitLab


From f336d0b93ae978f12c5e27199f828da89b91e56a Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 1 Jun 2021 19:04:51 +0800
Subject: [PATCH 1973/3804] ethernet: myri10ge: Fix missing error code in
 myri10ge_probe()

The error code is missing in this code scenario, add the error code
'-EINVAL' to the return value 'status'.

Eliminate the follow smatch warning:

drivers/net/ethernet/myricom/myri10ge/myri10ge.c:3818 myri10ge_probe()
warn: missing error code 'status'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index c84c8bf2bc20e..fc99ad8e4a388 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -3815,6 +3815,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_err(&pdev->dev,
 			"invalid sram_size %dB or board span %ldB\n",
 			mgp->sram_size, mgp->board_span);
+		status = -EINVAL;
 		goto abort_with_ioremap;
 	}
 	memcpy_fromio(mgp->eeprom_strings,
-- 
GitLab


From 05fc8b6cbd4f979a6f25759c4a17dd5f657f7ecd Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Tue, 1 Jun 2021 15:07:59 +0300
Subject: [PATCH 1974/3804] net/tls: Replace TLS_RX_SYNC_RUNNING with RCU

RCU synchronization is guaranteed to finish in finite time, unlike a
busy loop that polls a flag. This patch is a preparation for the bugfix
in the next patch, where the same synchronize_net() call will also be
used to sync with the TX datapath.

Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h    |  1 -
 net/tls/tls_device.c | 10 +++-------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 3eccb525e8f79..6531ace2a68bd 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -193,7 +193,6 @@ struct tls_offload_context_tx {
 	(sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX)
 
 enum tls_context_flags {
-	TLS_RX_SYNC_RUNNING = 0,
 	/* Unlike RX where resync is driven entirely by the core in TX only
 	 * the driver knows when things went out of sync, so we need the flag
 	 * to be atomic.
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 76a6f8c2eec4b..171752cd69102 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -680,15 +680,13 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx,
 	struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
 	struct net_device *netdev;
 
-	if (WARN_ON(test_and_set_bit(TLS_RX_SYNC_RUNNING, &tls_ctx->flags)))
-		return;
-
 	trace_tls_device_rx_resync_send(sk, seq, rcd_sn, rx_ctx->resync_type);
+	rcu_read_lock();
 	netdev = READ_ONCE(tls_ctx->netdev);
 	if (netdev)
 		netdev->tlsdev_ops->tls_dev_resync(netdev, sk, seq, rcd_sn,
 						   TLS_OFFLOAD_CTX_DIR_RX);
-	clear_bit_unlock(TLS_RX_SYNC_RUNNING, &tls_ctx->flags);
+	rcu_read_unlock();
 	TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICERESYNC);
 }
 
@@ -1300,9 +1298,7 @@ static int tls_device_down(struct net_device *netdev)
 			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
 							TLS_OFFLOAD_CTX_DIR_RX);
 		WRITE_ONCE(ctx->netdev, NULL);
-		smp_mb__before_atomic(); /* pairs with test_and_set_bit() */
-		while (test_bit(TLS_RX_SYNC_RUNNING, &ctx->flags))
-			usleep_range(10, 200);
+		synchronize_net();
 		dev_put(netdev);
 		list_del_init(&ctx->list);
 
-- 
GitLab


From c55dcdd435aa6c6ad6ccac0a4c636d010ee367a4 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Tue, 1 Jun 2021 15:08:00 +0300
Subject: [PATCH 1975/3804] net/tls: Fix use-after-free after the TLS device
 goes down and up

When a netdev with active TLS offload goes down, tls_device_down is
called to stop the offload and tear down the TLS context. However, the
socket stays alive, and it still points to the TLS context, which is now
deallocated. If a netdev goes up, while the connection is still active,
and the data flow resumes after a number of TCP retransmissions, it will
lead to a use-after-free of the TLS context.

This commit addresses this bug by keeping the context alive until its
normal destruction, and implements the necessary fallbacks, so that the
connection can resume in software (non-offloaded) kTLS mode.

On the TX side tls_sw_fallback is used to encrypt all packets. The RX
side already has all the necessary fallbacks, because receiving
non-decrypted packets is supported. The thing needed on the RX side is
to block resync requests, which are normally produced after receiving
non-decrypted packets.

The necessary synchronization is implemented for a graceful teardown:
first the fallbacks are deployed, then the driver resources are released
(it used to be possible to have a tls_dev_resync after tls_dev_del).

A new flag called TLS_RX_DEV_DEGRADED is added to indicate the fallback
mode. It's used to skip the RX resync logic completely, as it becomes
useless, and some objects may be released (for example, resync_async,
which is allocated and freed by the driver).

Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h             |  9 ++++++
 net/tls/tls_device.c          | 52 +++++++++++++++++++++++++++++++----
 net/tls/tls_device_fallback.c |  7 +++++
 net/tls/tls_main.c            |  1 +
 4 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 6531ace2a68bd..8341a8d1e8073 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -193,6 +193,11 @@ struct tls_offload_context_tx {
 	(sizeof(struct tls_offload_context_tx) + TLS_DRIVER_STATE_SIZE_TX)
 
 enum tls_context_flags {
+	/* tls_device_down was called after the netdev went down, device state
+	 * was released, and kTLS works in software, even though rx_conf is
+	 * still TLS_HW (needed for transition).
+	 */
+	TLS_RX_DEV_DEGRADED = 0,
 	/* Unlike RX where resync is driven entirely by the core in TX only
 	 * the driver knows when things went out of sync, so we need the flag
 	 * to be atomic.
@@ -265,6 +270,7 @@ struct tls_context {
 
 	/* cache cold stuff */
 	struct proto *sk_proto;
+	struct sock *sk;
 
 	void (*sk_destruct)(struct sock *sk);
 
@@ -447,6 +453,9 @@ static inline u16 tls_user_config(struct tls_context *ctx, bool tx)
 struct sk_buff *
 tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
 		      struct sk_buff *skb);
+struct sk_buff *
+tls_validate_xmit_skb_sw(struct sock *sk, struct net_device *dev,
+			 struct sk_buff *skb);
 
 static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk)
 {
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 171752cd69102..bd9f1567aa392 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -50,6 +50,7 @@ static void tls_device_gc_task(struct work_struct *work);
 static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task);
 static LIST_HEAD(tls_device_gc_list);
 static LIST_HEAD(tls_device_list);
+static LIST_HEAD(tls_device_down_list);
 static DEFINE_SPINLOCK(tls_device_lock);
 
 static void tls_device_free_ctx(struct tls_context *ctx)
@@ -759,6 +760,8 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
 
 	if (tls_ctx->rx_conf != TLS_HW)
 		return;
+	if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags)))
+		return;
 
 	prot = &tls_ctx->prot_info;
 	rx_ctx = tls_offload_ctx_rx(tls_ctx);
@@ -961,6 +964,17 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
 
 	ctx->sw.decrypted |= is_decrypted;
 
+	if (unlikely(test_bit(TLS_RX_DEV_DEGRADED, &tls_ctx->flags))) {
+		if (likely(is_encrypted || is_decrypted))
+			return 0;
+
+		/* After tls_device_down disables the offload, the next SKB will
+		 * likely have initial fragments decrypted, and final ones not
+		 * decrypted. We need to reencrypt that single SKB.
+		 */
+		return tls_device_reencrypt(sk, skb);
+	}
+
 	/* Return immediately if the record is either entirely plaintext or
 	 * entirely ciphertext. Otherwise handle reencrypt partially decrypted
 	 * record.
@@ -1290,6 +1304,26 @@ static int tls_device_down(struct net_device *netdev)
 	spin_unlock_irqrestore(&tls_device_lock, flags);
 
 	list_for_each_entry_safe(ctx, tmp, &list, list)	{
+		/* Stop offloaded TX and switch to the fallback.
+		 * tls_is_sk_tx_device_offloaded will return false.
+		 */
+		WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw);
+
+		/* Stop the RX and TX resync.
+		 * tls_dev_resync must not be called after tls_dev_del.
+		 */
+		WRITE_ONCE(ctx->netdev, NULL);
+
+		/* Start skipping the RX resync logic completely. */
+		set_bit(TLS_RX_DEV_DEGRADED, &ctx->flags);
+
+		/* Sync with inflight packets. After this point:
+		 * TX: no non-encrypted packets will be passed to the driver.
+		 * RX: resync requests from the driver will be ignored.
+		 */
+		synchronize_net();
+
+		/* Release the offload context on the driver side. */
 		if (ctx->tx_conf == TLS_HW)
 			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
 							TLS_OFFLOAD_CTX_DIR_TX);
@@ -1297,13 +1331,21 @@ static int tls_device_down(struct net_device *netdev)
 		    !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags))
 			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
 							TLS_OFFLOAD_CTX_DIR_RX);
-		WRITE_ONCE(ctx->netdev, NULL);
-		synchronize_net();
+
 		dev_put(netdev);
-		list_del_init(&ctx->list);
 
-		if (refcount_dec_and_test(&ctx->refcount))
-			tls_device_free_ctx(ctx);
+		/* Move the context to a separate list for two reasons:
+		 * 1. When the context is deallocated, list_del is called.
+		 * 2. It's no longer an offloaded context, so we don't want to
+		 *    run offload-specific code on this context.
+		 */
+		spin_lock_irqsave(&tls_device_lock, flags);
+		list_move_tail(&ctx->list, &tls_device_down_list);
+		spin_unlock_irqrestore(&tls_device_lock, flags);
+
+		/* Device contexts for RX and TX will be freed in on sk_destruct
+		 * by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
+		 */
 	}
 
 	up_write(&device_offload_lock);
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index cacf040872c74..e40bedd112b68 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -431,6 +431,13 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(tls_validate_xmit_skb);
 
+struct sk_buff *tls_validate_xmit_skb_sw(struct sock *sk,
+					 struct net_device *dev,
+					 struct sk_buff *skb)
+{
+	return tls_sw_fallback(sk, skb);
+}
+
 struct sk_buff *tls_encrypt_skb(struct sk_buff *skb)
 {
 	return tls_sw_fallback(skb->sk, skb);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 47b7c5334c346..fde56ff491637 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -636,6 +636,7 @@ struct tls_context *tls_ctx_create(struct sock *sk)
 	mutex_init(&ctx->tx_lock);
 	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
 	ctx->sk_proto = READ_ONCE(sk->sk_prot);
+	ctx->sk = sk;
 	return ctx;
 }
 
-- 
GitLab


From b000372627ce9dbbe641dafbf40db0718276ab77 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 1 Jun 2021 09:38:58 -0700
Subject: [PATCH 1976/3804] MAINTAINERS: nfc mailing lists are subscribers-only

It looks as if the MAINTAINERS entries for the nfc mailing list
should be updated as I just got a "rejected" bounce from the nfc list.

-------
Your message to the Linux-nfc mailing-list was rejected for the following
reasons:

The message is not from a list member
-------

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8696ead914809..bfb3d0931cbaa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12903,7 +12903,7 @@ F:	net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
 M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
-L:	linux-nfc@lists.01.org (moderated for non-subscribers)
+L:	linux-nfc@lists.01.org (subscribers-only)
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/nfc/
@@ -12916,7 +12916,7 @@ F:	net/nfc/
 NFC VIRTUAL NCI DEVICE DRIVER
 M:	Bongsu Jeon <bongsu.jeon@samsung.com>
 L:	netdev@vger.kernel.org
-L:	linux-nfc@lists.01.org (moderated for non-subscribers)
+L:	linux-nfc@lists.01.org (subscribers-only)
 S:	Supported
 F:	drivers/nfc/virtual_ncidev.c
 F:	tools/testing/selftests/nci/
@@ -13214,7 +13214,7 @@ F:	sound/soc/codecs/tfa9879*
 
 NXP-NCI NFC DRIVER
 R:	Charles Gorand <charles.gorand@effinnov.com>
-L:	linux-nfc@lists.01.org (moderated for non-subscribers)
+L:	linux-nfc@lists.01.org (subscribers-only)
 S:	Supported
 F:	drivers/nfc/nxp-nci
 
@@ -16141,7 +16141,7 @@ F:	include/media/drv-intf/s3c_camif.h
 SAMSUNG S3FWRN5 NFC DRIVER
 M:	Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
 M:	Krzysztof Opasiak <k.opasiak@samsung.com>
-L:	linux-nfc@lists.01.org (moderated for non-subscribers)
+L:	linux-nfc@lists.01.org (subscribers-only)
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
 F:	drivers/nfc/s3fwrn5
@@ -18332,7 +18332,7 @@ F:	sound/soc/codecs/tas571x*
 TI TRF7970A NFC DRIVER
 M:	Mark Greer <mgreer@animalcreek.com>
 L:	linux-wireless@vger.kernel.org
-L:	linux-nfc@lists.01.org (moderated for non-subscribers)
+L:	linux-nfc@lists.01.org (subscribers-only)
 S:	Supported
 F:	Documentation/devicetree/bindings/net/nfc/trf7970a.txt
 F:	drivers/nfc/trf7970a.c
-- 
GitLab


From d8ec92005f806dfa7524e9171eca707c0bb1267e Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Tue, 25 May 2021 15:35:25 +0300
Subject: [PATCH 1977/3804] net/mlx5e: Fix incompatible casting

Device supports setting of a single fec mode at a time, enforce this
by bitmap_weight == 1. Input from fec command is in u32, avoid cast to
unsigned long and use bitmap_from_arr32 to populate bitmap safely.

Fixes: 4bd9d5070b92 ("net/mlx5e: Enforce setting of a single FEC mode")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 8360289813f0d..c4724742eef1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1624,12 +1624,13 @@ static int mlx5e_set_fecparam(struct net_device *netdev,
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
+	unsigned long fec_bitmap;
 	u16 fec_policy = 0;
 	int mode;
 	int err;
 
-	if (bitmap_weight((unsigned long *)&fecparam->fec,
-			  ETHTOOL_FEC_LLRS_BIT + 1) > 1)
+	bitmap_from_arr32(&fec_bitmap, &fecparam->fec, sizeof(fecparam->fec) * BITS_PER_BYTE);
+	if (bitmap_weight(&fec_bitmap, ETHTOOL_FEC_LLRS_BIT + 1) > 1)
 		return -EOPNOTSUPP;
 
 	for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) {
-- 
GitLab


From b38742e41177c339e891b74f3925862fa36debb1 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Thu, 29 Apr 2021 12:13:35 +0300
Subject: [PATCH 1978/3804] net/mlx5e: Disable TLS offload for uplink
 representor

TLS offload is not supported in switchdev mode.

Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ad0f69480b9ca..8eed2dcc8898d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3858,6 +3858,16 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
 			netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
 	}
 
+	if (mlx5e_is_uplink_rep(priv)) {
+		features &= ~NETIF_F_HW_TLS_RX;
+		if (netdev->features & NETIF_F_HW_TLS_RX)
+			netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
+
+		features &= ~NETIF_F_HW_TLS_TX;
+		if (netdev->features & NETIF_F_HW_TLS_TX)
+			netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
+	}
+
 	mutex_unlock(&priv->state_lock);
 
 	return features;
-- 
GitLab


From 5940e64281c09976ce2b560244217e610bf9d029 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Thu, 8 Apr 2021 07:30:57 +0300
Subject: [PATCH 1979/3804] net/mlx5: Check firmware sync reset requested is
 set before trying to abort it

In case driver sent NACK to firmware on sync reset request, it will get
sync reset abort event while it didn't set sync reset requested mode.
Thus, on abort sync reset event handler, driver should check reset
requested is set before trying to stop sync reset poll.

Fixes: 7dd6df329d4c ("net/mlx5: Handle sync reset abort event")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index d5d57630015ff..106b50e42b464 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -349,6 +349,9 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
 						      reset_abort_work);
 	struct mlx5_core_dev *dev = fw_reset->dev;
 
+	if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+		return;
+
 	mlx5_sync_reset_clear_reset_requested(dev, true);
 	mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
 }
-- 
GitLab


From afe93f71b5d3cdae7209213ec8ef25210b837b93 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Tue, 13 Apr 2021 14:35:22 +0300
Subject: [PATCH 1980/3804] net/mlx5e: Check for needed capability for cvlan
 matching

If not supported show an error and return instead of trying to offload
to the hardware and fail.

Fixes: 699e96ddf47f ("net/mlx5e: Support offloading tc double vlan headers match")
Reported-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 2c776e7a7692a..dd64878e5b381 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2015,11 +2015,13 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 				    misc_parameters_3);
 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
+	enum fs_flow_table_type fs_type;
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 	u8 *match_level;
 	int err;
 
+	fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
 	match_level = outer_match_level;
 
 	if (dissector->used_keys &
@@ -2145,6 +2147,13 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 		if (match.mask->vlan_id ||
 		    match.mask->vlan_priority ||
 		    match.mask->vlan_tpid) {
+			if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
+						     fs_type)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Matching on CVLAN is not supported");
+				return -EOPNOTSUPP;
+			}
+
 			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
 				MLX5_SET(fte_match_set_misc, misc_c,
 					 outer_second_svlan_tag, 1);
-- 
GitLab


From 2a2c84facd4af661d71be6e81fd9d490ac7fdc53 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@nvidia.com>
Date: Wed, 19 May 2021 10:00:27 +0300
Subject: [PATCH 1981/3804] net/mlx5e: Fix adding encap rules to slow path

On some devices the ignore flow level cap is not supported and we
shouldn't use it. Setting the dest ft with mlx5_chains_get_tc_end_ft()
already gives the correct end ft if ignore flow level cap is supported
or not.

Fixes: 39ac237ce009 ("net/mlx5: E-Switch, Refactor chains and priorities")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c    | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h    | 5 +++++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index db1e74280e570..d18a28a6e9a63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -219,7 +219,8 @@ esw_setup_slow_path_dest(struct mlx5_flow_destination *dest,
 			 struct mlx5_fs_chains *chains,
 			 int i)
 {
-	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+	if (mlx5_chains_ignore_flow_level_supported(chains))
+		flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 	dest[i].ft = mlx5_chains_get_tc_end_ft(chains);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index 00ef10a1a9f86..20a4047f2737d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -107,7 +107,7 @@ bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains)
 	return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED;
 }
 
-static bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
 {
 	return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
index e96f345e7dae7..d50bdb226cef8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.h
@@ -28,6 +28,7 @@ struct mlx5_chains_attr {
 
 bool
 mlx5_chains_prios_supported(struct mlx5_fs_chains *chains);
+bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains);
 bool
 mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains);
 u32
@@ -70,6 +71,10 @@ mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
 
 #else /* CONFIG_MLX5_CLS_ACT */
 
+static inline bool
+mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
+{ return false; }
+
 static inline struct mlx5_flow_table *
 mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
 		      u32 level) { return ERR_PTR(-EOPNOTSUPP); }
-- 
GitLab


From 256f79d13c1d1fe53b2b31ab2089b615bbfcd361 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 3 May 2021 16:59:55 +0300
Subject: [PATCH 1982/3804] net/mlx5e: Fix HW TS with CQE compression according
 to profile

When the driver's profile doesn't support a dedicated PTP-RQ, the PTP
accuracy of HW TS is affected by the CQE compression. In this case,
turn off CQE compression. Otherwise, the driver crashes:

BUG: kernel NULL pointer dereference, address:0000000000000018
...
...
RIP: 0010:mlx5e_ptp_rx_set_fs+0x25/0x1a0 [mlx5_core]
...
...
Call Trace:
 mlx5e_ptp_activate_channel+0xb2/0xf0 [mlx5_core]
 mlx5e_activate_priv_channels+0x3b9/0x8c0 [mlx5_core]
 ? __mutex_unlock_slowpath+0x45/0x2a0
 ? mlx5e_refresh_tirs+0x151/0x1e0 [mlx5_core]
 mlx5e_switch_priv_channels+0x1cd/0x2d0 [mlx5_core]
 ? mlx5e_xdp_allowed+0x150/0x150 [mlx5_core]
 mlx5e_safe_switch_params+0x118/0x3c0 [mlx5_core]
 ? __mutex_lock+0x6e/0x8e0
 ? mlx5e_hwstamp_set+0xa9/0x300 [mlx5_core]
 mlx5e_hwstamp_set+0x194/0x300 [mlx5_core]
 ? dev_ioctl+0x9b/0x3d0
 mlx5i_ioctl+0x37/0x60 [mlx5_core]
 mlx5i_pkey_ioctl+0x12/0x20 [mlx5_core]
 dev_ioctl+0xa9/0x3d0
 sock_ioctl+0x268/0x420
 __x64_sys_ioctl+0x3d8/0x790
 ? lockdep_hardirqs_on_prepare+0xe4/0x190
 do_syscall_64+0x2d/0x40
entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 960fbfe222a4 ("net/mlx5e: Allow coexistence of CQE compression and HW TS PTP")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 67 ++++++++++++++-----
 1 file changed, 52 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8eed2dcc8898d..ec6bafe7a2e59 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3984,11 +3984,45 @@ int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
 	return mlx5e_ptp_rx_manage_fs(priv, set);
 }
 
-int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
+static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter)
+{
+	bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
+	int err;
+
+	if (!rx_filter)
+		/* Reset CQE compression to Admin default */
+		return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def);
+
+	if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
+		return 0;
+
+	/* Disable CQE compression */
+	netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
+	err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
+	if (err)
+		netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
+
+	return err;
+}
+
+static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx)
 {
 	struct mlx5e_params new_params;
+
+	if (ptp_rx == priv->channels.params.ptp_rx)
+		return 0;
+
+	new_params = priv->channels.params;
+	new_params.ptp_rx = ptp_rx;
+	return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
+					&new_params.ptp_rx, true);
+}
+
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
 	struct hwtstamp_config config;
 	bool rx_cqe_compress_def;
+	bool ptp_rx;
 	int err;
 
 	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
@@ -4008,13 +4042,12 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 	}
 
 	mutex_lock(&priv->state_lock);
-	new_params = priv->channels.params;
 	rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
 
 	/* RX HW timestamp */
 	switch (config.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
-		new_params.ptp_rx = false;
+		ptp_rx = false;
 		break;
 	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_SOME:
@@ -4031,24 +4064,25 @@ int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
 	case HWTSTAMP_FILTER_NTP_ALL:
-		new_params.ptp_rx = rx_cqe_compress_def;
 		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		/* ptp_rx is set if both HW TS is set and CQE
+		 * compression is set
+		 */
+		ptp_rx = rx_cqe_compress_def;
 		break;
 	default:
-		mutex_unlock(&priv->state_lock);
-		return -ERANGE;
+		err = -ERANGE;
+		goto err_unlock;
 	}
 
-	if (new_params.ptp_rx == priv->channels.params.ptp_rx)
-		goto out;
+	if (!priv->profile->rx_ptp_support)
+		err = mlx5e_hwstamp_config_no_ptp_rx(priv,
+						     config.rx_filter != HWTSTAMP_FILTER_NONE);
+	else
+		err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx);
+	if (err)
+		goto err_unlock;
 
-	err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
-				       &new_params.ptp_rx, true);
-	if (err) {
-		mutex_unlock(&priv->state_lock);
-		return err;
-	}
-out:
 	memcpy(&priv->tstamp, &config, sizeof(config));
 	mutex_unlock(&priv->state_lock);
 
@@ -4057,6 +4091,9 @@ out:
 
 	return copy_to_user(ifr->ifr_data, &config,
 			    sizeof(config)) ? -EFAULT : 0;
+err_unlock:
+	mutex_unlock(&priv->state_lock);
+	return err;
 }
 
 int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
-- 
GitLab


From 5349cbba754ee54f6cca8b946aa9172f1ac60b8c Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 3 May 2021 17:16:44 +0300
Subject: [PATCH 1983/3804] net/mlx5e: Fix conflict with HW TS and CQE
 compression

When a driver's profile doesn't support a dedicated PTP-RQ,
configuration of CQE compression while HW TS is configured should fail.

Fixes: 885b8cfb161e ("net/mlx5e: Update ethtool setting of CQE compression")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c4724742eef1f..d6513aef5cd45 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1894,6 +1894,13 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
 	if (curr_val == new_val)
 		return 0;
 
+	if (new_val && !priv->profile->rx_ptp_support &&
+	    priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) {
+		netdev_err(priv->netdev,
+			   "Profile doesn't support enabling of CQE compression while hardware time-stamping is enabled.\n");
+		return -EINVAL;
+	}
+
 	new_params = priv->channels.params;
 	MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
 	if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE)
-- 
GitLab


From 216214c64a8c1cb9078c2c0aec7bb4a2f8e75397 Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@nvidia.com>
Date: Wed, 9 Dec 2020 16:40:38 +0200
Subject: [PATCH 1984/3804] net/mlx5: DR, Create multi-destination flow table
 with level less than 64

Flow table that contains flow pointing to multiple flow tables or multiple
TIRs must have a level lower than 64. In our case it applies to muli-
destination flow table.
Fix the level of the created table to comply with HW Spec definitions, and
still make sure that its level lower than SW-owned tables, so that it
would be possible to point from the multi-destination FW table to SW
tables.

Fixes: 34583beea4b7 ("net/mlx5: DR, Create multi-destination table for SW-steering use")
Signed-off-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Reviewed-by: Alex Vesker <valex@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c | 3 ++-
 include/linux/mlx5/mlx5_ifc.h                            | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
index 1fbcd012bb855..7ccfd40586cee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
@@ -112,7 +112,8 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
 	int ret;
 
 	ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB;
-	ft_attr.level = dmn->info.caps.max_ft_level - 2;
+	ft_attr.level = min_t(int, dmn->info.caps.max_ft_level - 2,
+			      MLX5_FT_MAX_MULTIPATH_LEVEL);
 	ft_attr.reformat_en = reformat_req;
 	ft_attr.decap_en = reformat_req;
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6d16eed6850e5..eb86e80e4643f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1289,6 +1289,8 @@ enum mlx5_fc_bulk_alloc_bitmask {
 
 #define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum))
 
+#define MLX5_FT_MAX_MULTIPATH_LEVEL 63
+
 enum {
 	MLX5_STEERING_FORMAT_CONNECTX_5   = 0,
 	MLX5_STEERING_FORMAT_CONNECTX_6DX = 1,
-- 
GitLab


From 79c932cd6af9829432888c4a0001d01793a09f12 Mon Sep 17 00:00:00 2001
From: Daniel Wagner <dwagner@suse.de>
Date: Fri, 21 May 2021 16:34:40 +0200
Subject: [PATCH 1985/3804] scsi: qedf: Do not put host in qedf_vport_create()
 unconditionally

Do not drop reference count on vn_port->host in qedf_vport_create()
unconditionally. Instead drop the reference count in qedf_vport_destroy().

Link: https://lore.kernel.org/r/20210521143440.84816-1-dwagner@suse.de
Reported-by: Javed Hasan <jhasan@marvell.com>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 7562311518824..b92570a7c309d 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -1827,22 +1827,20 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled)
 		fcoe_wwn_to_str(vport->port_name, buf, sizeof(buf));
 		QEDF_WARN(&(base_qedf->dbg_ctx), "Failed to create vport, "
 			   "WWPN (0x%s) already exists.\n", buf);
-		goto err1;
+		return rc;
 	}
 
 	if (atomic_read(&base_qedf->link_state) != QEDF_LINK_UP) {
 		QEDF_WARN(&(base_qedf->dbg_ctx), "Cannot create vport "
 			   "because link is not up.\n");
-		rc = -EIO;
-		goto err1;
+		return -EIO;
 	}
 
 	vn_port = libfc_vport_create(vport, sizeof(struct qedf_ctx));
 	if (!vn_port) {
 		QEDF_WARN(&(base_qedf->dbg_ctx), "Could not create lport "
 			   "for vport.\n");
-		rc = -ENOMEM;
-		goto err1;
+		return -ENOMEM;
 	}
 
 	fcoe_wwn_to_str(vport->port_name, buf, sizeof(buf));
@@ -1866,7 +1864,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled)
 	if (rc) {
 		QEDF_ERR(&(base_qedf->dbg_ctx), "Could not allocate memory "
 		    "for lport stats.\n");
-		goto err2;
+		goto err;
 	}
 
 	fc_set_wwnn(vn_port, vport->node_name);
@@ -1884,7 +1882,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled)
 	if (rc) {
 		QEDF_WARN(&base_qedf->dbg_ctx,
 			  "Error adding Scsi_Host rc=0x%x.\n", rc);
-		goto err2;
+		goto err;
 	}
 
 	/* Set default dev_loss_tmo based on module parameter */
@@ -1925,9 +1923,10 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled)
 	vport_qedf->dbg_ctx.host_no = vn_port->host->host_no;
 	vport_qedf->dbg_ctx.pdev = base_qedf->pdev;
 
-err2:
+	return 0;
+
+err:
 	scsi_host_put(vn_port->host);
-err1:
 	return rc;
 }
 
@@ -1968,8 +1967,7 @@ static int qedf_vport_destroy(struct fc_vport *vport)
 	fc_lport_free_stats(vn_port);
 
 	/* Release Scsi_Host */
-	if (vn_port->host)
-		scsi_host_put(vn_port->host);
+	scsi_host_put(vn_port->host);
 
 out:
 	return 0;
-- 
GitLab


From 2c89e41326b16e0a3eb41063e6f585aae5baf4f7 Mon Sep 17 00:00:00 2001
From: Stanley Chu <stanley.chu@mediatek.com>
Date: Mon, 31 May 2021 14:26:42 +0800
Subject: [PATCH 1986/3804] scsi: ufs: ufs-mediatek: Fix HCI version in some
 platforms

Some MediaTek SoC platforms with UFSHCI version below 3.0 have incorrect
UFSHCI versions showed in register map.

Fix the version by referring to UniPro version which is always correct.

Link: https://lore.kernel.org/r/20210531062642.12642-1-stanley.chu@mediatek.com
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Stanley Chu <stanley.chu@mediatek.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs-mediatek.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c
index aee3cfc7142a4..0a84ec9e7cea0 100644
--- a/drivers/scsi/ufs/ufs-mediatek.c
+++ b/drivers/scsi/ufs/ufs-mediatek.c
@@ -603,11 +603,23 @@ static void ufs_mtk_get_controller_version(struct ufs_hba *hba)
 
 	ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_LOCALVERINFO), &ver);
 	if (!ret) {
-		if (ver >= UFS_UNIPRO_VER_1_8)
+		if (ver >= UFS_UNIPRO_VER_1_8) {
 			host->hw_ver.major = 3;
+			/*
+			 * Fix HCI version for some platforms with
+			 * incorrect version
+			 */
+			if (hba->ufs_version < ufshci_version(3, 0))
+				hba->ufs_version = ufshci_version(3, 0);
+		}
 	}
 }
 
+static u32 ufs_mtk_get_ufs_hci_version(struct ufs_hba *hba)
+{
+	return hba->ufs_version;
+}
+
 /**
  * ufs_mtk_init - find other essential mmio bases
  * @hba: host controller instance
@@ -1048,6 +1060,7 @@ static void ufs_mtk_event_notify(struct ufs_hba *hba,
 static const struct ufs_hba_variant_ops ufs_hba_mtk_vops = {
 	.name                = "mediatek.ufshci",
 	.init                = ufs_mtk_init,
+	.get_ufs_hci_version = ufs_mtk_get_ufs_hci_version,
 	.setup_clocks        = ufs_mtk_setup_clocks,
 	.hce_enable_notify   = ufs_mtk_hce_enable_notify,
 	.link_startup_notify = ufs_mtk_link_startup_notify,
-- 
GitLab


From 4d96d3b0efee6416ef0d61b76aaac6f4a2e15b12 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Tue, 1 Jun 2021 14:04:18 -0500
Subject: [PATCH 1987/3804] Bluetooth: Add a new USB ID for RTL8822CE

Some models of the RTL8822ce utilize a different USB ID. Add this
new one to the Bluetooth driver.

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/bluetooth/btusb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 5d603ef39bad9..7a8e1d240f156 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -388,6 +388,8 @@ static const struct usb_device_id blacklist_table[] = {
 	/* Realtek 8822CE Bluetooth devices */
 	{ USB_DEVICE(0x0bda, 0xb00c), .driver_info = BTUSB_REALTEK |
 						     BTUSB_WIDEBAND_SPEECH },
+	{ USB_DEVICE(0x0bda, 0xc822), .driver_info = BTUSB_REALTEK |
+						     BTUSB_WIDEBAND_SPEECH },
 
 	/* Realtek 8852AE Bluetooth devices */
 	{ USB_DEVICE(0x0bda, 0xc852), .driver_info = BTUSB_REALTEK |
-- 
GitLab


From e57f5cd99ca60cddf40201b0f4ced9f1938e299c Mon Sep 17 00:00:00 2001
From: "Ewan D. Milne" <emilne@redhat.com>
Date: Tue, 1 Jun 2021 13:52:14 -0400
Subject: [PATCH 1988/3804] scsi: scsi_devinfo: Add blacklist entry for HPE
 OPEN-V

Apparently some arrays are now returning "HPE" as the vendor.

Link: https://lore.kernel.org/r/20210601175214.25719-1-emilne@redhat.com
Signed-off-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_devinfo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index d92cec12454cb..d33355ab6e145 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -184,6 +184,7 @@ static struct {
 	{"HP", "C3323-300", "4269", BLIST_NOTQ},
 	{"HP", "C5713A", NULL, BLIST_NOREPORTLUN},
 	{"HP", "DISK-SUBSYSTEM", "*", BLIST_REPORTLUN2},
+	{"HPE", "OPEN-", "*", BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES},
 	{"IBM", "AuSaV1S2", NULL, BLIST_FORCELUN},
 	{"IBM", "ProFibre 4000R", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
 	{"IBM", "2105", NULL, BLIST_RETRY_HWERROR},
-- 
GitLab


From 8a4102a0cf07cc76a18f373f6b49485258cc6af4 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Sun, 16 May 2021 17:00:38 +0800
Subject: [PATCH 1989/3804] riscv: mm: Fix W+X mappings at boot

When the kernel mapping was moved the last 2GB of the address space,
(__va(PFN_PHYS(max_low_pfn))) is much smaller than the .data section
start address, the last set_memory_nx() in protect_kernel_text_data()
will fail, thus the .data section is still mapped as W+X. This results
in below W+X mapping waring at boot. Fix it by passing the correct
.data section page num to the set_memory_nx().

[    0.396516] ------------[ cut here ]------------
[    0.396889] riscv/mm: Found insecure W+X mapping at address (____ptrval____)/0xffffffff80c00000
[    0.398347] WARNING: CPU: 0 PID: 1 at arch/riscv/mm/ptdump.c:258 note_page+0x244/0x24a
[    0.398964] Modules linked in:
[    0.399459] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc1+ #14
[    0.400003] Hardware name: riscv-virtio,qemu (DT)
[    0.400591] epc : note_page+0x244/0x24a
[    0.401368]  ra : note_page+0x244/0x24a
[    0.401772] epc : ffffffff80007c86 ra : ffffffff80007c86 sp : ffffffe000e7bc30
[    0.402304]  gp : ffffffff80caae88 tp : ffffffe000e70000 t0 : ffffffff80cb80cf
[    0.402800]  t1 : ffffffff80cb80c0 t2 : 0000000000000000 s0 : ffffffe000e7bc80
[    0.403310]  s1 : ffffffe000e7bde8 a0 : 0000000000000053 a1 : ffffffff80c83ff0
[    0.403805]  a2 : 0000000000000010 a3 : 0000000000000000 a4 : 6c7e7a5137233100
[    0.404298]  a5 : 6c7e7a5137233100 a6 : 0000000000000030 a7 : ffffffffffffffff
[    0.404849]  s2 : ffffffff80e00000 s3 : 0000000040000000 s4 : 0000000000000000
[    0.405393]  s5 : 0000000000000000 s6 : 0000000000000003 s7 : ffffffe000e7bd48
[    0.405935]  s8 : ffffffff81000000 s9 : ffffffffc0000000 s10: ffffffe000e7bd48
[    0.406476]  s11: 0000000000001000 t3 : 0000000000000072 t4 : ffffffffffffffff
[    0.407016]  t5 : 0000000000000002 t6 : ffffffe000e7b978
[    0.407435] status: 0000000000000120 badaddr: 0000000000000000 cause: 0000000000000003
[    0.408052] Call Trace:
[    0.408343] [<ffffffff80007c86>] note_page+0x244/0x24a
[    0.408855] [<ffffffff8010c5a6>] ptdump_hole+0x14/0x1e
[    0.409263] [<ffffffff800f65c6>] walk_pgd_range+0x2a0/0x376
[    0.409690] [<ffffffff800f6828>] walk_page_range_novma+0x4e/0x6e
[    0.410146] [<ffffffff8010c5f8>] ptdump_walk_pgd+0x48/0x78
[    0.410570] [<ffffffff80007d66>] ptdump_check_wx+0xb4/0xf8
[    0.410990] [<ffffffff80006738>] mark_rodata_ro+0x26/0x2e
[    0.411407] [<ffffffff8031961e>] kernel_init+0x44/0x108
[    0.411814] [<ffffffff80002312>] ret_from_exception+0x0/0xc
[    0.412309] ---[ end trace 7ec3459f2547ea83 ]---
[    0.413141] Checked W+X mappings: failed, 512 W+X pages found

Fixes: 2bfc6cd81bd17e43 ("riscv: Move kernel mapping outside of linear mapping")
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/mm/init.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 4faf8bd157eaa..4c4c92ce0bb81 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -746,14 +746,18 @@ void __init protect_kernel_text_data(void)
 	unsigned long init_data_start = (unsigned long)__init_data_begin;
 	unsigned long rodata_start = (unsigned long)__start_rodata;
 	unsigned long data_start = (unsigned long)_data;
-	unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
+#if defined(CONFIG_64BIT) && defined(CONFIG_MMU)
+	unsigned long end_va = kernel_virt_addr + load_sz;
+#else
+	unsigned long end_va = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
+#endif
 
 	set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT);
 	set_memory_ro(init_text_start, (init_data_start - init_text_start) >> PAGE_SHIFT);
 	set_memory_nx(init_data_start, (rodata_start - init_data_start) >> PAGE_SHIFT);
 	/* rodata section is marked readonly in mark_rodata_ro */
 	set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
-	set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
+	set_memory_nx(data_start, (end_va - data_start) >> PAGE_SHIFT);
 }
 
 void mark_rodata_ro(void)
-- 
GitLab


From b75db25c416b9f0edae7cd86c4901c216a52e7a0 Mon Sep 17 00:00:00 2001
From: Vincent <vincent.chen@sifive.com>
Date: Sat, 22 May 2021 07:40:15 +0800
Subject: [PATCH 1990/3804] riscv: skip errata_cip_453.o if
 CONFIG_ERRATA_SIFIVE_CIP_453 is disabled

The errata_cip_453.o should be built only when the Kconfig
CONFIG_ERRATA_SIFIVE_CIP_453 is enabled.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Vincent <vincent.chen@sifive.com>
Fixes: 0e0d4992517f ("riscv: enable SiFive errata CIP-453 and CIP-1200 Kconfig only if CONFIG_64BIT=y")
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/errata/sifive/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/errata/sifive/Makefile b/arch/riscv/errata/sifive/Makefile
index bdd5fc843b8ee..2fde48db0619a 100644
--- a/arch/riscv/errata/sifive/Makefile
+++ b/arch/riscv/errata/sifive/Makefile
@@ -1,2 +1,2 @@
-obj-y += errata_cip_453.o
+obj-$(CONFIG_ERRATA_SIFIVE_CIP_453) += errata_cip_453.o
 obj-y += errata.o
-- 
GitLab


From da2d48808fbd1eddefefe245c6c0e92a9195df8b Mon Sep 17 00:00:00 2001
From: Wende Tan <twd2.me@gmail.com>
Date: Sat, 22 May 2021 17:49:51 +0000
Subject: [PATCH 1991/3804] RISC-V: Fix memblock_free() usages in
 init_resources()

`memblock_free()` takes a physical address as its first argument.
Fix the wrong usages in `init_resources()`.

Fixes: ffe0e526126884cf036a6f724220f1f9b4094fd2 ("RISC-V: Improve init_resources()")
Fixes: 797f0375dd2ef5cdc68ac23450cbae9a5c67a74e ("RISC-V: Do not allocate memblock while iterating reserved memblocks")
Signed-off-by: Wende Tan <twd2.me@gmail.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/setup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 03901d3a8b027..9a1b7a0603b28 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -231,13 +231,13 @@ static void __init init_resources(void)
 
 	/* Clean-up any unused pre-allocated resources */
 	mem_res_sz = (num_resources - res_idx + 1) * sizeof(*mem_res);
-	memblock_free((phys_addr_t) mem_res, mem_res_sz);
+	memblock_free(__pa(mem_res), mem_res_sz);
 	return;
 
  error:
 	/* Better an empty resource tree than an inconsistent one */
 	release_child_resources(&iomem_resource);
-	memblock_free((phys_addr_t) mem_res, mem_res_sz);
+	memblock_free(__pa(mem_res), mem_res_sz);
 }
 
 
-- 
GitLab


From a6c144f3d2e230f2b3ac5ed8c51e0f0391556197 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 26 May 2021 17:23:15 +0200
Subject: [PATCH 1992/3804] nvme-loop: reset queue count to 1 in
 nvme_loop_destroy_io_queues()

The queue count is increased in nvme_loop_init_io_queues(), so we
need to reset it to 1 at the end of nvme_loop_destroy_io_queues().
Otherwise the function is not re-entrant safe, and crash will happen
during concurrent reset and remove calls.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/loop.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index cb30cb942e1d1..93fca31e50430 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -299,6 +299,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
 		clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags);
 		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
 	}
+	ctrl->ctrl.queue_count = 1;
 }
 
 static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
-- 
GitLab


From 1c5f8e882a05de5c011e8c3fbeceb0d1c590eb53 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 26 May 2021 17:23:16 +0200
Subject: [PATCH 1993/3804] nvme-loop: clear NVME_LOOP_Q_LIVE when
 nvme_loop_configure_admin_queue() fails

When the call to nvme_enable_ctrl() in nvme_loop_configure_admin_queue()
fails the NVME_LOOP_Q_LIVE flag is not cleared.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/loop.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 93fca31e50430..8643c71953ad8 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -406,6 +406,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
 	return 0;
 
 out_cleanup_queue:
+	clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
 	blk_cleanup_queue(ctrl->ctrl.admin_q);
 out_cleanup_fabrics_q:
 	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
-- 
GitLab


From 4237de2f73a669e4f89ac0aa2b44fb1a1d9ec583 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 26 May 2021 17:23:17 +0200
Subject: [PATCH 1994/3804] nvme-loop: check for NVME_LOOP_Q_LIVE in
 nvme_loop_destroy_admin_queue()

We need to check the NVME_LOOP_Q_LIVE flag in
nvme_loop_destroy_admin_queue() to protect against duplicate
invocations eg during concurrent reset and remove calls.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/loop.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 8643c71953ad8..209ad4bc2695e 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -263,7 +263,8 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
 
 static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
 {
-	clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
+	if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags))
+		return;
 	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
 	blk_cleanup_queue(ctrl->ctrl.admin_q);
 	blk_cleanup_queue(ctrl->ctrl.fabrics_q);
-- 
GitLab


From 6622f9acd29cd4f6272720e827e6406f5a970cb0 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 26 May 2021 17:23:18 +0200
Subject: [PATCH 1995/3804] nvme-loop: do not warn for deleted controllers
 during reset

During concurrent reset and delete calls the reset workqueue is
flushed, causing nvme_loop_reset_ctrl_work() to be executed when
the controller is in state DELETING or DELETING_NOIO.
But this is expected, so we shouldn't issue a WARN_ON here.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/loop.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 209ad4bc2695e..a5c4a18650263 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -465,8 +465,10 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
 	nvme_loop_shutdown_ctrl(ctrl);
 
 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
-		/* state change failure should never happen */
-		WARN_ON_ONCE(1);
+		if (ctrl->ctrl.state != NVME_CTRL_DELETING &&
+		    ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO)
+			/* state change failure for non-deleted ctrl? */
+			WARN_ON_ONCE(1);
 		return;
 	}
 
-- 
GitLab


From bcd9a0797d73eeff659582f23277e7ab6e5f18f3 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Tue, 1 Jun 2021 19:22:05 +0300
Subject: [PATCH 1996/3804] nvmet: fix freeing unallocated p2pmem

In case p2p device was found but the p2p pool is empty, the nvme target
is still trying to free the sgl from the p2p pool instead of the
regular sgl pool and causing a crash (BUG() is called). Instead, assign
the p2p_dev for the request only if it was allocated from p2p pool.

This is the crash that was caused:

[Sun May 30 19:13:53 2021] ------------[ cut here ]------------
[Sun May 30 19:13:53 2021] kernel BUG at lib/genalloc.c:518!
[Sun May 30 19:13:53 2021] invalid opcode: 0000 [#1] SMP PTI
...
[Sun May 30 19:13:53 2021] kernel BUG at lib/genalloc.c:518!
...
[Sun May 30 19:13:53 2021] RIP: 0010:gen_pool_free_owner+0xa8/0xb0
...
[Sun May 30 19:13:53 2021] Call Trace:
[Sun May 30 19:13:53 2021] ------------[ cut here ]------------
[Sun May 30 19:13:53 2021]  pci_free_p2pmem+0x2b/0x70
[Sun May 30 19:13:53 2021]  pci_p2pmem_free_sgl+0x4f/0x80
[Sun May 30 19:13:53 2021]  nvmet_req_free_sgls+0x1e/0x80 [nvmet]
[Sun May 30 19:13:53 2021] kernel BUG at lib/genalloc.c:518!
[Sun May 30 19:13:53 2021]  nvmet_rdma_release_rsp+0x4e/0x1f0 [nvmet_rdma]
[Sun May 30 19:13:53 2021]  nvmet_rdma_send_done+0x1c/0x60 [nvmet_rdma]

Fixes: c6e3f1339812 ("nvmet: add metadata support for block devices")
Reviewed-by: Israel Rukshin <israelr@nvidia.com>
Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 4b29a5bac8969..b20b8d0a11441 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1005,19 +1005,23 @@ static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
 	return req->transfer_len - req->metadata_len;
 }
 
-static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req)
+static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev,
+		struct nvmet_req *req)
 {
-	req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt,
+	req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
 			nvmet_data_transfer_len(req));
 	if (!req->sg)
 		goto out_err;
 
 	if (req->metadata_len) {
-		req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev,
+		req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev,
 				&req->metadata_sg_cnt, req->metadata_len);
 		if (!req->metadata_sg)
 			goto out_free_sg;
 	}
+
+	req->p2p_dev = p2p_dev;
+
 	return 0;
 out_free_sg:
 	pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
@@ -1025,25 +1029,19 @@ out_err:
 	return -ENOMEM;
 }
 
-static bool nvmet_req_find_p2p_dev(struct nvmet_req *req)
+static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req)
 {
-	if (!IS_ENABLED(CONFIG_PCI_P2PDMA))
-		return false;
-
-	if (req->sq->ctrl && req->sq->qid && req->ns) {
-		req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
-						 req->ns->nsid);
-		if (req->p2p_dev)
-			return true;
-	}
-
-	req->p2p_dev = NULL;
-	return false;
+	if (!IS_ENABLED(CONFIG_PCI_P2PDMA) ||
+	    !req->sq->ctrl || !req->sq->qid || !req->ns)
+		return NULL;
+	return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid);
 }
 
 int nvmet_req_alloc_sgls(struct nvmet_req *req)
 {
-	if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req))
+	struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req);
+
+	if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req))
 		return 0;
 
 	req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
@@ -1072,6 +1070,7 @@ void nvmet_req_free_sgls(struct nvmet_req *req)
 		pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
 		if (req->metadata_sg)
 			pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
+		req->p2p_dev = NULL;
 	} else {
 		sgl_free(req->sg);
 		if (req->metadata_sg)
-- 
GitLab


From 3ae72f6ab9c1f688bd578cdc252dabce65fdaf57 Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Wed, 2 Jun 2021 11:41:36 +0800
Subject: [PATCH 1997/3804] ALSA: control led: fix memory leak in
 snd_ctl_led_register

The snd_ctl_led_sysfs_add and snd_ctl_led_sysfs_remove should contain
the refcount operations in pair. However, snd_ctl_led_sysfs_remove fails
to decrease the refcount to zero, which causes device_release never to
be invoked. This leads to memory leak to some resources, like struct
device_private. In addition, we also free some other similar memory
leaks in snd_ctl_led_init/snd_ctl_led_exit.

Fix this by replacing device_del to device_unregister
in snd_ctl_led_sysfs_remove/snd_ctl_led_init/snd_ctl_led_exit.

Note that, when CONFIG_DEBUG_KOBJECT_RELEASE is enabled, put_device will
call kobject_release and delay the release of kobject, which will cause
use-after-free when the memory backing the kobject is freed at once.

Reported-by: syzbot+08a7d8b51ea048a74ffb@syzkaller.appspotmail.com
Fixes: a135dfb5de15 ("ALSA: led control - add sysfs kcontrol LED marking layer")
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Link: https://lore.kernel.org/r/20210602034136.2762497-1-mudongliangabcd@gmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/control_led.c | 33 ++++++++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/sound/core/control_led.c b/sound/core/control_led.c
index 25f57c14f294f..a90e31dbde61f 100644
--- a/sound/core/control_led.c
+++ b/sound/core/control_led.c
@@ -17,6 +17,9 @@ MODULE_LICENSE("GPL");
 #define MAX_LED (((SNDRV_CTL_ELEM_ACCESS_MIC_LED - SNDRV_CTL_ELEM_ACCESS_SPK_LED) \
 			>> SNDRV_CTL_ELEM_ACCESS_LED_SHIFT) + 1)
 
+#define to_led_card_dev(_dev) \
+	container_of(_dev, struct snd_ctl_led_card, dev)
+
 enum snd_ctl_led_mode {
 	 MODE_FOLLOW_MUTE = 0,
 	 MODE_FOLLOW_ROUTE,
@@ -371,6 +374,21 @@ static void snd_ctl_led_disconnect(struct snd_card *card)
 	snd_ctl_led_refresh();
 }
 
+static void snd_ctl_led_card_release(struct device *dev)
+{
+	struct snd_ctl_led_card *led_card = to_led_card_dev(dev);
+
+	kfree(led_card);
+}
+
+static void snd_ctl_led_release(struct device *dev)
+{
+}
+
+static void snd_ctl_led_dev_release(struct device *dev)
+{
+}
+
 /*
  * sysfs
  */
@@ -663,6 +681,7 @@ static void snd_ctl_led_sysfs_add(struct snd_card *card)
 		led_card->number = card->number;
 		led_card->led = led;
 		device_initialize(&led_card->dev);
+		led_card->dev.release = snd_ctl_led_card_release;
 		if (dev_set_name(&led_card->dev, "card%d", card->number) < 0)
 			goto cerr;
 		led_card->dev.parent = &led->dev;
@@ -681,7 +700,6 @@ cerr:
 		put_device(&led_card->dev);
 cerr2:
 		printk(KERN_ERR "snd_ctl_led: unable to add card%d", card->number);
-		kfree(led_card);
 	}
 }
 
@@ -700,8 +718,7 @@ static void snd_ctl_led_sysfs_remove(struct snd_card *card)
 		snprintf(link_name, sizeof(link_name), "led-%s", led->name);
 		sysfs_remove_link(&card->ctl_dev.kobj, link_name);
 		sysfs_remove_link(&led_card->dev.kobj, "card");
-		device_del(&led_card->dev);
-		kfree(led_card);
+		device_unregister(&led_card->dev);
 		led->cards[card->number] = NULL;
 	}
 }
@@ -723,6 +740,7 @@ static int __init snd_ctl_led_init(void)
 
 	device_initialize(&snd_ctl_led_dev);
 	snd_ctl_led_dev.class = sound_class;
+	snd_ctl_led_dev.release = snd_ctl_led_dev_release;
 	dev_set_name(&snd_ctl_led_dev, "ctl-led");
 	if (device_add(&snd_ctl_led_dev)) {
 		put_device(&snd_ctl_led_dev);
@@ -733,15 +751,16 @@ static int __init snd_ctl_led_init(void)
 		INIT_LIST_HEAD(&led->controls);
 		device_initialize(&led->dev);
 		led->dev.parent = &snd_ctl_led_dev;
+		led->dev.release = snd_ctl_led_release;
 		led->dev.groups = snd_ctl_led_dev_attr_groups;
 		dev_set_name(&led->dev, led->name);
 		if (device_add(&led->dev)) {
 			put_device(&led->dev);
 			for (; group > 0; group--) {
 				led = &snd_ctl_leds[group - 1];
-				device_del(&led->dev);
+				device_unregister(&led->dev);
 			}
-			device_del(&snd_ctl_led_dev);
+			device_unregister(&snd_ctl_led_dev);
 			return -ENOMEM;
 		}
 	}
@@ -767,9 +786,9 @@ static void __exit snd_ctl_led_exit(void)
 	}
 	for (group = 0; group < MAX_LED; group++) {
 		led = &snd_ctl_leds[group];
-		device_del(&led->dev);
+		device_unregister(&led->dev);
 	}
-	device_del(&snd_ctl_led_dev);
+	device_unregister(&snd_ctl_led_dev);
 	snd_ctl_led_clean(NULL);
 }
 
-- 
GitLab


From de646852cdadf7da2267e06297f7f6fe22dfb899 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 31 May 2021 17:05:45 +0200
Subject: [PATCH 1998/3804] media: move ttpci-eeprom to common

The ttpci-eeprom is actually an independent driver that
doesn't depend on the stuff under drivers/media/pci/ttpci/.

Also, it is used by an USB driver (pctv452e).

So, move it to the common directory.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/common/Kconfig                       | 4 ++++
 drivers/media/common/Makefile                      | 1 +
 drivers/media/{pci/ttpci => common}/ttpci-eeprom.c | 0
 drivers/media/{pci/ttpci => common}/ttpci-eeprom.h | 0
 drivers/media/pci/ttpci/Makefile                   | 2 +-
 drivers/media/usb/Kconfig                          | 5 -----
 drivers/media/usb/dvb-usb/Makefile                 | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)
 rename drivers/media/{pci/ttpci => common}/ttpci-eeprom.c (100%)
 rename drivers/media/{pci/ttpci => common}/ttpci-eeprom.h (100%)

diff --git a/drivers/media/common/Kconfig b/drivers/media/common/Kconfig
index 4ea03b7899a8f..0f6bde0f793ee 100644
--- a/drivers/media/common/Kconfig
+++ b/drivers/media/common/Kconfig
@@ -13,6 +13,10 @@ config VIDEO_TVEEPROM
 	tristate
 	depends on I2C
 
+config TTPCI_EEPROM
+        tristate
+        depends on I2C
+
 config CYPRESS_FIRMWARE
 	tristate
 	depends on USB
diff --git a/drivers/media/common/Makefile b/drivers/media/common/Makefile
index b71e4b62eea5e..55b5a19001248 100644
--- a/drivers/media/common/Makefile
+++ b/drivers/media/common/Makefile
@@ -3,3 +3,4 @@ obj-y += b2c2/ saa7146/ siano/ v4l2-tpg/ videobuf2/
 obj-$(CONFIG_VIDEO_CX2341X) += cx2341x.o
 obj-$(CONFIG_VIDEO_TVEEPROM) += tveeprom.o
 obj-$(CONFIG_CYPRESS_FIRMWARE) += cypress_firmware.o
+obj-$(CONFIG_TTPCI_EEPROM) += ttpci-eeprom.o
diff --git a/drivers/media/pci/ttpci/ttpci-eeprom.c b/drivers/media/common/ttpci-eeprom.c
similarity index 100%
rename from drivers/media/pci/ttpci/ttpci-eeprom.c
rename to drivers/media/common/ttpci-eeprom.c
diff --git a/drivers/media/pci/ttpci/ttpci-eeprom.h b/drivers/media/common/ttpci-eeprom.h
similarity index 100%
rename from drivers/media/pci/ttpci/ttpci-eeprom.h
rename to drivers/media/common/ttpci-eeprom.h
diff --git a/drivers/media/pci/ttpci/Makefile b/drivers/media/pci/ttpci/Makefile
index 9b44c479fcdd9..61001fa5a93e1 100644
--- a/drivers/media/pci/ttpci/Makefile
+++ b/drivers/media/pci/ttpci/Makefile
@@ -10,7 +10,6 @@ ifdef CONFIG_DVB_AV7110_IR
 dvb-ttpci-objs += av7110_ir.o
 endif
 
-obj-$(CONFIG_TTPCI_EEPROM) += ttpci-eeprom.o
 obj-$(CONFIG_DVB_BUDGET_CORE) += budget-core.o
 obj-$(CONFIG_DVB_BUDGET) += budget.o
 obj-$(CONFIG_DVB_BUDGET_AV) += budget-av.o
@@ -20,3 +19,4 @@ obj-$(CONFIG_DVB_AV7110) += dvb-ttpci.o
 
 ccflags-y += -I $(srctree)/drivers/media/dvb-frontends/
 ccflags-y += -I $(srctree)/drivers/media/tuners
+ccflags-y += -I $(srctree)/drivers/media/common
diff --git a/drivers/media/usb/Kconfig b/drivers/media/usb/Kconfig
index 00feadb217d8c..f97153df3c848 100644
--- a/drivers/media/usb/Kconfig
+++ b/drivers/media/usb/Kconfig
@@ -1,10 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
-# This Kconfig option is also used by the legacy av7110 driver
-config TTPCI_EEPROM
-	tristate
-	depends on I2C
-
 if USB && MEDIA_SUPPORT
 
 menuconfig MEDIA_USB_SUPPORT
diff --git a/drivers/media/usb/dvb-usb/Makefile b/drivers/media/usb/dvb-usb/Makefile
index 28e4806a87cd3..c22514948db28 100644
--- a/drivers/media/usb/dvb-usb/Makefile
+++ b/drivers/media/usb/dvb-usb/Makefile
@@ -83,4 +83,4 @@ obj-$(CONFIG_DVB_USB_TECHNISAT_USB2) += dvb-usb-technisat-usb2.o
 ccflags-y += -I$(srctree)/drivers/media/dvb-frontends/
 # due to tuner-xc3028
 ccflags-y += -I$(srctree)/drivers/media/tuners
-ccflags-y += -I$(srctree)/drivers/media/pci/ttpci
+ccflags-y += -I$(srctree)/drivers/media/common
-- 
GitLab


From 989cf18ed08f8b6efd1d1592d1d0108fa09b98f5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 31 May 2021 17:27:09 +0200
Subject: [PATCH 1999/3804] media: av7110: move driver to staging

This driver is really old, from devices that aren't
manufactured anymore for more than a decade.

Also, the decoder supports only MPEG2, with is not compatible
with several modern DVB streams.

It is also the only upstream driver relying on the DVB
"full-featured" API.

Some changes at the frontend drivers seem to have broken it
without anybody noticing.

Due to that, it sounds it is time to retire the driver for good.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/ttpci/Kconfig               | 74 -------------------
 drivers/media/pci/ttpci/Makefile              |  9 ---
 drivers/media/pci/ttpci/budget.h              |  2 +-
 drivers/staging/media/Kconfig                 |  2 +
 drivers/staging/media/Makefile                |  1 +
 drivers/staging/media/av7110/Kconfig          | 74 +++++++++++++++++++
 drivers/staging/media/av7110/Makefile         | 20 +++++
 drivers/staging/media/av7110/TODO             |  3 +
 .../ttpci => staging/media/av7110}/av7110.c   |  0
 .../ttpci => staging/media/av7110}/av7110.h   |  0
 .../media/av7110}/av7110_av.c                 |  0
 .../media/av7110}/av7110_av.h                 |  0
 .../media/av7110}/av7110_ca.c                 |  0
 .../media/av7110}/av7110_ca.h                 |  0
 .../media/av7110}/av7110_hw.c                 |  0
 .../media/av7110}/av7110_hw.h                 |  0
 .../media/av7110}/av7110_ipack.c              |  0
 .../media/av7110}/av7110_ipack.h              |  0
 .../media/av7110}/av7110_ir.c                 |  0
 .../media/av7110}/av7110_v4l.c                |  0
 .../media/av7110}/budget-patch.c              |  0
 .../media/av7110}/dvb_filter.c                |  0
 .../media/av7110}/dvb_filter.h                |  0
 23 files changed, 101 insertions(+), 84 deletions(-)
 create mode 100644 drivers/staging/media/av7110/Kconfig
 create mode 100644 drivers/staging/media/av7110/Makefile
 create mode 100644 drivers/staging/media/av7110/TODO
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110.c (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110.h (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_av.c (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_av.h (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_ca.c (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_ca.h (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_hw.c (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_hw.h (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_ipack.c (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_ipack.h (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_ir.c (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/av7110_v4l.c (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/budget-patch.c (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/dvb_filter.c (100%)
 rename drivers/{media/pci/ttpci => staging/media/av7110}/dvb_filter.h (100%)

diff --git a/drivers/media/pci/ttpci/Kconfig b/drivers/media/pci/ttpci/Kconfig
index 8a362ee9105f0..65a6832a6b963 100644
--- a/drivers/media/pci/ttpci/Kconfig
+++ b/drivers/media/pci/ttpci/Kconfig
@@ -1,56 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config DVB_AV7110_IR
-	bool
-	depends on RC_CORE=y || RC_CORE = DVB_AV7110
-	default DVB_AV7110
-
-config DVB_AV7110
-	tristate "AV7110 cards"
-	depends on DVB_CORE && PCI && I2C
-	select TTPCI_EEPROM
-	select VIDEO_SAA7146_VV
-	depends on VIDEO_DEV	# dependencies of VIDEO_SAA7146_VV
-	select DVB_VES1820 if MEDIA_SUBDRV_AUTOSELECT
-	select DVB_VES1X93 if MEDIA_SUBDRV_AUTOSELECT
-	select DVB_STV0299 if MEDIA_SUBDRV_AUTOSELECT
-	select DVB_TDA8083 if MEDIA_SUBDRV_AUTOSELECT
-	select DVB_SP8870 if MEDIA_SUBDRV_AUTOSELECT
-	select DVB_STV0297 if MEDIA_SUBDRV_AUTOSELECT
-	select DVB_L64781 if MEDIA_SUBDRV_AUTOSELECT
-	select DVB_LNBP21 if MEDIA_SUBDRV_AUTOSELECT
-	help
-	  Support for SAA7146 and AV7110 based DVB cards as produced
-	  by Fujitsu-Siemens, Technotrend, Hauppauge and others.
-
-	  This driver only supports the fullfeatured cards with
-	  onboard MPEG2 decoder.
-
-	  This driver needs an external firmware. Please use the script
-	  "<kerneldir>/scripts/get_dvb_firmware av7110" to
-	  download/extract it, and then copy it to /usr/lib/hotplug/firmware
-	  or /lib/firmware (depending on configuration of firmware hotplug).
-
-	  Alternatively, you can download the file and use the kernel's
-	  EXTRA_FIRMWARE configuration option to build it into your
-	  kernel image by adding the filename to the EXTRA_FIRMWARE
-	  configuration option string.
-
-	  Say Y if you own such a card and want to use it.
-
-config DVB_AV7110_OSD
-	bool "AV7110 OSD support"
-	depends on DVB_AV7110
-	default y if DVB_AV7110=y || DVB_AV7110=m
-	help
-	  The AV7110 firmware provides some code to generate an OnScreenDisplay
-	  on the video output. This is kind of nonstandard and not guaranteed to
-	  be maintained.
-
-	  Anyway, some popular DVB software like VDR uses this OSD to render
-	  its menus, so say Y if you want to use this software.
-
-	  All other people say N.
-
 config DVB_BUDGET_CORE
 	tristate "SAA7146 DVB cards (aka Budget, Nova-PCI)"
 	depends on DVB_CORE && PCI && I2C
@@ -136,25 +84,3 @@ config DVB_BUDGET_AV
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called budget-av.
-
-config DVB_BUDGET_PATCH
-	tristate "AV7110 cards with Budget Patch"
-	depends on DVB_BUDGET_CORE && I2C
-	depends on DVB_AV7110
-	select DVB_STV0299 if MEDIA_SUBDRV_AUTOSELECT
-	select DVB_VES1X93 if MEDIA_SUBDRV_AUTOSELECT
-	select DVB_TDA8083 if MEDIA_SUBDRV_AUTOSELECT
-	help
-	  Support for Budget Patch (full TS) modification on
-	  SAA7146+AV7110 based cards (DVB-S cards). This
-	  driver doesn't use onboard MPEG2 decoder. The
-	  card is driven in Budget-only mode. Card is
-	  required to have loaded firmware to tune properly.
-	  Firmware can be loaded by insertion and removal of
-	  standard AV7110 driver prior to loading this
-	  driver.
-
-	  Say Y if you own such a card and want to use it.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called budget-patch.
diff --git a/drivers/media/pci/ttpci/Makefile b/drivers/media/pci/ttpci/Makefile
index 61001fa5a93e1..b0708f6e40cc0 100644
--- a/drivers/media/pci/ttpci/Makefile
+++ b/drivers/media/pci/ttpci/Makefile
@@ -1,21 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the kernel SAA7146 FULL TS DVB device driver
-# and the AV7110 DVB device driver
 #
 
-dvb-ttpci-objs := av7110_hw.o av7110_v4l.o av7110_av.o av7110_ca.o av7110.o av7110_ipack.o dvb_filter.o
-
-ifdef CONFIG_DVB_AV7110_IR
-dvb-ttpci-objs += av7110_ir.o
-endif
-
 obj-$(CONFIG_DVB_BUDGET_CORE) += budget-core.o
 obj-$(CONFIG_DVB_BUDGET) += budget.o
 obj-$(CONFIG_DVB_BUDGET_AV) += budget-av.o
 obj-$(CONFIG_DVB_BUDGET_CI) += budget-ci.o
-obj-$(CONFIG_DVB_BUDGET_PATCH) += budget-patch.o
-obj-$(CONFIG_DVB_AV7110) += dvb-ttpci.o
 
 ccflags-y += -I $(srctree)/drivers/media/dvb-frontends/
 ccflags-y += -I $(srctree)/drivers/media/tuners
diff --git a/drivers/media/pci/ttpci/budget.h b/drivers/media/pci/ttpci/budget.h
index a7463daf39f15..bd87432e6cde0 100644
--- a/drivers/media/pci/ttpci/budget.h
+++ b/drivers/media/pci/ttpci/budget.h
@@ -8,7 +8,6 @@
 #include <media/demux.h>
 #include <media/dvb_demux.h>
 #include <media/dmxdev.h>
-#include "dvb_filter.h"
 #include <media/dvb_net.h>
 
 #include <linux/module.h>
@@ -28,6 +27,7 @@ extern int budget_debug;
 		       __func__, ##arg);				\
 } while (0)
 
+#define TS_SIZE        188
 
 struct budget_info {
 	char *name;
diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig
index ca59986b20f8a..e3aaae920847c 100644
--- a/drivers/staging/media/Kconfig
+++ b/drivers/staging/media/Kconfig
@@ -42,4 +42,6 @@ source "drivers/staging/media/tegra-video/Kconfig"
 
 source "drivers/staging/media/ipu3/Kconfig"
 
+source "drivers/staging/media/av7110/Kconfig"
+
 endif
diff --git a/drivers/staging/media/Makefile b/drivers/staging/media/Makefile
index 716929a1a3130..5b5afc5b03a02 100644
--- a/drivers/staging/media/Makefile
+++ b/drivers/staging/media/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_TEGRA_VDE)		+= tegra-vde/
 obj-$(CONFIG_VIDEO_HANTRO)	+= hantro/
 obj-$(CONFIG_VIDEO_IPU3_IMGU)	+= ipu3/
 obj-$(CONFIG_VIDEO_ZORAN)	+= zoran/
+obj-$(CONFIG_DVB_AV7110)	+= av7110/
diff --git a/drivers/staging/media/av7110/Kconfig b/drivers/staging/media/av7110/Kconfig
new file mode 100644
index 0000000000000..e19d24bf2eb47
--- /dev/null
+++ b/drivers/staging/media/av7110/Kconfig
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config DVB_AV7110_IR
+	bool
+	depends on RC_CORE=y || RC_CORE = DVB_AV7110
+	default DVB_AV7110
+
+config DVB_AV7110
+	tristate "AV7110 cards"
+	depends on DVB_CORE && PCI && I2C
+	select TTPCI_EEPROM
+	select VIDEO_SAA7146_VV
+	depends on VIDEO_DEV	# dependencies of VIDEO_SAA7146_VV
+	select DVB_VES1820 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_VES1X93 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_STV0299 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_TDA8083 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_SP8870 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_STV0297 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_L64781 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_LNBP21 if MEDIA_SUBDRV_AUTOSELECT
+	help
+	  Support for SAA7146 and AV7110 based DVB cards as produced
+	  by Fujitsu-Siemens, Technotrend, Hauppauge and others.
+
+	  This driver only supports the fullfeatured cards with
+	  onboard MPEG2 decoder.
+
+	  This driver needs an external firmware. Please use the script
+	  "<kerneldir>/scripts/get_dvb_firmware av7110" to
+	  download/extract it, and then copy it to /usr/lib/hotplug/firmware
+	  or /lib/firmware (depending on configuration of firmware hotplug).
+
+	  Alternatively, you can download the file and use the kernel's
+	  EXTRA_FIRMWARE configuration option to build it into your
+	  kernel image by adding the filename to the EXTRA_FIRMWARE
+	  configuration option string.
+
+	  Say Y if you own such a card and want to use it.
+
+config DVB_AV7110_OSD
+	bool "AV7110 OSD support"
+	depends on DVB_AV7110
+	default y if DVB_AV7110=y || DVB_AV7110=m
+	help
+	  The AV7110 firmware provides some code to generate an OnScreenDisplay
+	  on the video output. This is kind of nonstandard and not guaranteed to
+	  be maintained.
+
+	  Anyway, some popular DVB software like VDR uses this OSD to render
+	  its menus, so say Y if you want to use this software.
+
+	  All other people say N.
+
+config DVB_BUDGET_PATCH
+	tristate "AV7110 cards with Budget Patch"
+	depends on DVB_BUDGET_CORE && I2C
+	depends on DVB_AV7110
+	select DVB_STV0299 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_VES1X93 if MEDIA_SUBDRV_AUTOSELECT
+	select DVB_TDA8083 if MEDIA_SUBDRV_AUTOSELECT
+	help
+	  Support for Budget Patch (full TS) modification on
+	  SAA7146+AV7110 based cards (DVB-S cards). This
+	  driver doesn't use onboard MPEG2 decoder. The
+	  card is driven in Budget-only mode. Card is
+	  required to have loaded firmware to tune properly.
+	  Firmware can be loaded by insertion and removal of
+	  standard AV7110 driver prior to loading this
+	  driver.
+
+	  Say Y if you own such a card and want to use it.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called budget-patch.
diff --git a/drivers/staging/media/av7110/Makefile b/drivers/staging/media/av7110/Makefile
new file mode 100644
index 0000000000000..dcabecf1abdeb
--- /dev/null
+++ b/drivers/staging/media/av7110/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the AV7110 DVB device driver
+#
+
+dvb-ttpci-objs := av7110_hw.o av7110_v4l.o av7110_av.o av7110_ca.o av7110.o \
+		  av7110_ipack.o dvb_filter.o
+
+ifdef CONFIG_DVB_AV7110_IR
+dvb-ttpci-objs += av7110_ir.o
+endif
+
+obj-$(CONFIG_DVB_BUDGET_PATCH) += budget-patch.o
+
+obj-$(CONFIG_DVB_AV7110) += dvb-ttpci.o
+
+ccflags-y += -I $(srctree)/drivers/media/dvb-frontends
+ccflags-y += -I $(srctree)/drivers/media/tuners
+ccflags-y += -I $(srctree)/drivers/media/pci/ttpci
+ccflags-y += -I $(srctree)/drivers/media/common
diff --git a/drivers/staging/media/av7110/TODO b/drivers/staging/media/av7110/TODO
new file mode 100644
index 0000000000000..60062d8441b34
--- /dev/null
+++ b/drivers/staging/media/av7110/TODO
@@ -0,0 +1,3 @@
+- This driver is too old and relies on a different API.
+  Drop it from Kernel on a couple of versions.
+- Cleanup patches for the drivers here won't be accepted.
diff --git a/drivers/media/pci/ttpci/av7110.c b/drivers/staging/media/av7110/av7110.c
similarity index 100%
rename from drivers/media/pci/ttpci/av7110.c
rename to drivers/staging/media/av7110/av7110.c
diff --git a/drivers/media/pci/ttpci/av7110.h b/drivers/staging/media/av7110/av7110.h
similarity index 100%
rename from drivers/media/pci/ttpci/av7110.h
rename to drivers/staging/media/av7110/av7110.h
diff --git a/drivers/media/pci/ttpci/av7110_av.c b/drivers/staging/media/av7110/av7110_av.c
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_av.c
rename to drivers/staging/media/av7110/av7110_av.c
diff --git a/drivers/media/pci/ttpci/av7110_av.h b/drivers/staging/media/av7110/av7110_av.h
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_av.h
rename to drivers/staging/media/av7110/av7110_av.h
diff --git a/drivers/media/pci/ttpci/av7110_ca.c b/drivers/staging/media/av7110/av7110_ca.c
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_ca.c
rename to drivers/staging/media/av7110/av7110_ca.c
diff --git a/drivers/media/pci/ttpci/av7110_ca.h b/drivers/staging/media/av7110/av7110_ca.h
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_ca.h
rename to drivers/staging/media/av7110/av7110_ca.h
diff --git a/drivers/media/pci/ttpci/av7110_hw.c b/drivers/staging/media/av7110/av7110_hw.c
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_hw.c
rename to drivers/staging/media/av7110/av7110_hw.c
diff --git a/drivers/media/pci/ttpci/av7110_hw.h b/drivers/staging/media/av7110/av7110_hw.h
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_hw.h
rename to drivers/staging/media/av7110/av7110_hw.h
diff --git a/drivers/media/pci/ttpci/av7110_ipack.c b/drivers/staging/media/av7110/av7110_ipack.c
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_ipack.c
rename to drivers/staging/media/av7110/av7110_ipack.c
diff --git a/drivers/media/pci/ttpci/av7110_ipack.h b/drivers/staging/media/av7110/av7110_ipack.h
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_ipack.h
rename to drivers/staging/media/av7110/av7110_ipack.h
diff --git a/drivers/media/pci/ttpci/av7110_ir.c b/drivers/staging/media/av7110/av7110_ir.c
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_ir.c
rename to drivers/staging/media/av7110/av7110_ir.c
diff --git a/drivers/media/pci/ttpci/av7110_v4l.c b/drivers/staging/media/av7110/av7110_v4l.c
similarity index 100%
rename from drivers/media/pci/ttpci/av7110_v4l.c
rename to drivers/staging/media/av7110/av7110_v4l.c
diff --git a/drivers/media/pci/ttpci/budget-patch.c b/drivers/staging/media/av7110/budget-patch.c
similarity index 100%
rename from drivers/media/pci/ttpci/budget-patch.c
rename to drivers/staging/media/av7110/budget-patch.c
diff --git a/drivers/media/pci/ttpci/dvb_filter.c b/drivers/staging/media/av7110/dvb_filter.c
similarity index 100%
rename from drivers/media/pci/ttpci/dvb_filter.c
rename to drivers/staging/media/av7110/dvb_filter.c
diff --git a/drivers/media/pci/ttpci/dvb_filter.h b/drivers/staging/media/av7110/dvb_filter.h
similarity index 100%
rename from drivers/media/pci/ttpci/dvb_filter.h
rename to drivers/staging/media/av7110/dvb_filter.h
-- 
GitLab


From b998a59f82f1152605eae4f7617778020549e81a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 31 May 2021 22:34:24 +0200
Subject: [PATCH 2000/3804] media: sp8870: move it to staging

This driver is used only by av7110, which is preparing for
its retirement. So, move this ancillary driver to stay together
with av7110.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/dvb-frontends/Kconfig           | 12 -----------
 drivers/media/dvb-frontends/Makefile          |  1 -
 drivers/staging/media/av7110/Kconfig          | 20 +++++++++++++++++++
 drivers/staging/media/av7110/Makefile         |  2 ++
 .../media/av7110}/sp8870.c                    |  0
 .../media/av7110}/sp8870.h                    |  0
 6 files changed, 22 insertions(+), 13 deletions(-)
 rename drivers/{media/dvb-frontends => staging/media/av7110}/sp8870.c (100%)
 rename drivers/{media/dvb-frontends => staging/media/av7110}/sp8870.h (100%)

diff --git a/drivers/media/dvb-frontends/Kconfig b/drivers/media/dvb-frontends/Kconfig
index 3468b07b62fe5..2c1ed98d43c55 100644
--- a/drivers/media/dvb-frontends/Kconfig
+++ b/drivers/media/dvb-frontends/Kconfig
@@ -323,18 +323,6 @@ config DVB_TDA10071
 comment "DVB-T (terrestrial) frontends"
 	depends on DVB_CORE
 
-config DVB_SP8870
-	tristate "Spase sp8870 based"
-	depends on DVB_CORE && I2C
-	default m if !MEDIA_SUBDRV_AUTOSELECT
-	help
-	  A DVB-T tuner module. Say Y when you want to support this frontend.
-
-	  This driver needs external firmware. Please use the command
-	  "<kerneldir>/scripts/get_dvb_firmware sp8870" to
-	  download/extract it, and then copy it to /usr/lib/hotplug/firmware
-	  or /lib/firmware (depending on configuration of firmware hotplug).
-
 config DVB_SP887X
 	tristate "Spase sp887x based"
 	depends on DVB_CORE && I2C
diff --git a/drivers/media/dvb-frontends/Makefile b/drivers/media/dvb-frontends/Makefile
index b9f47d68e14ec..d32e4c0be5769 100644
--- a/drivers/media/dvb-frontends/Makefile
+++ b/drivers/media/dvb-frontends/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_DVB_PLL) += dvb-pll.o
 obj-$(CONFIG_DVB_STV0299) += stv0299.o
 obj-$(CONFIG_DVB_STB0899) += stb0899.o
 obj-$(CONFIG_DVB_STB6100) += stb6100.o
-obj-$(CONFIG_DVB_SP8870) += sp8870.o
 obj-$(CONFIG_DVB_CX22700) += cx22700.o
 obj-$(CONFIG_DVB_S5H1432) += s5h1432.o
 obj-$(CONFIG_DVB_CX24110) += cx24110.o
diff --git a/drivers/staging/media/av7110/Kconfig b/drivers/staging/media/av7110/Kconfig
index e19d24bf2eb47..9faf9d2d40010 100644
--- a/drivers/staging/media/av7110/Kconfig
+++ b/drivers/staging/media/av7110/Kconfig
@@ -72,3 +72,23 @@ config DVB_BUDGET_PATCH
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called budget-patch.
+
+if DVB_AV7110
+
+# Frontend driver that it is used only by AV7110 driver
+# While technically independent, it doesn't make sense to keep
+# it if we drop support for AV7110, as no other driver will use it.
+
+config DVB_SP8870
+	tristate "Spase sp8870 based"
+	depends on DVB_CORE && I2C
+	default m if !MEDIA_SUBDRV_AUTOSELECT
+	help
+	  A DVB-T tuner module. Say Y when you want to support this frontend.
+
+	  This driver needs external firmware. Please use the command
+	  "<kerneldir>/scripts/get_dvb_firmware sp8870" to
+	  download/extract it, and then copy it to /usr/lib/hotplug/firmware
+	  or /lib/firmware (depending on configuration of firmware hotplug).
+
+endif
diff --git a/drivers/staging/media/av7110/Makefile b/drivers/staging/media/av7110/Makefile
index dcabecf1abdeb..307b267598ea6 100644
--- a/drivers/staging/media/av7110/Makefile
+++ b/drivers/staging/media/av7110/Makefile
@@ -14,6 +14,8 @@ obj-$(CONFIG_DVB_BUDGET_PATCH) += budget-patch.o
 
 obj-$(CONFIG_DVB_AV7110) += dvb-ttpci.o
 
+obj-$(CONFIG_DVB_SP8870) += sp8870.o
+
 ccflags-y += -I $(srctree)/drivers/media/dvb-frontends
 ccflags-y += -I $(srctree)/drivers/media/tuners
 ccflags-y += -I $(srctree)/drivers/media/pci/ttpci
diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/staging/media/av7110/sp8870.c
similarity index 100%
rename from drivers/media/dvb-frontends/sp8870.c
rename to drivers/staging/media/av7110/sp8870.c
diff --git a/drivers/media/dvb-frontends/sp8870.h b/drivers/staging/media/av7110/sp8870.h
similarity index 100%
rename from drivers/media/dvb-frontends/sp8870.h
rename to drivers/staging/media/av7110/sp8870.h
-- 
GitLab


From 1cb13613735a15b994b680ae5ef18aaf79108b95 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 2 Jun 2021 10:42:28 +0200
Subject: [PATCH 2001/3804] media: mc-request.c: allow object_bind in QUEUED
 state

If a request was queued without a control handler object, and
a control handler object is then created and bound to the request
when copying controls on request completion, then a WARN_ON in
mc-request.c is triggered since at that time the request is in
state QUEUED, and not UPDATING.

But this is too strict, and in this case it must also allow
binding objects when in state QUEUED.

This patch was unfortunately lost when the "always copy the controls
on completion" patch was posted, it should have been part of that
commit.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Fixes: c3bf5129f339 ("media: v4l2-ctrls: always copy the controls on completion")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/mc/mc-request.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/mc/mc-request.c b/drivers/media/mc/mc-request.c
index c0782fd96c591..addb8f2d8939e 100644
--- a/drivers/media/mc/mc-request.c
+++ b/drivers/media/mc/mc-request.c
@@ -414,7 +414,8 @@ int media_request_object_bind(struct media_request *req,
 
 	spin_lock_irqsave(&req->lock, flags);
 
-	if (WARN_ON(req->state != MEDIA_REQUEST_STATE_UPDATING))
+	if (WARN_ON(req->state != MEDIA_REQUEST_STATE_UPDATING &&
+		    req->state != MEDIA_REQUEST_STATE_QUEUED))
 		goto unlock;
 
 	obj->req = req;
-- 
GitLab


From ff3cc65cadb5d7333fde557b38cbb60b3a6cf496 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Fri, 5 Mar 2021 18:38:39 +0100
Subject: [PATCH 2002/3804] media: v4l: async, fwnode: Improve module
 organisation

The V4L2 async framework is generally used with the V4L2 fwnode, which
also depends on the former. There are a few exceptions but they are
relatively few.

At the same time there is a vast number of systems that need videodev
module, but have no use for v4l2-async that's now part of videodev.

In order to improve, split the v4l2-async into its own module. Selecting
V4L2_FWNODE also selects V4L2_ASYNC.

This also moves the initialisation of the debufs entries for async subdevs
to loading of the v4l2-async module. The directory is named as
"v4l2-async".

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/Kconfig            | 11 +++++++++++
 drivers/media/v4l2-core/Kconfig      |  5 +++++
 drivers/media/v4l2-core/Makefile     |  5 +++--
 drivers/media/v4l2-core/v4l2-async.c | 23 +++++++++++++++++++++--
 drivers/media/v4l2-core/v4l2-dev.c   |  5 -----
 5 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index 462c0e0597546..4f1dafc648160 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig
@@ -217,6 +217,7 @@ config VIDEO_ADV7180
 	depends on GPIOLIB && VIDEO_V4L2 && I2C
 	select MEDIA_CONTROLLER
 	select VIDEO_V4L2_SUBDEV_API
+	select V4L2_ASYNC
 	help
 	  Support for the Analog Devices ADV7180 video decoder.
 
@@ -534,6 +535,7 @@ config VIDEO_ADV7175
 config VIDEO_ADV7343
 	tristate "ADV7343 video encoder"
 	depends on I2C
+	select V4L2_ASYNC
 	help
 	  Support for Analog Devices I2C bus based ADV7343 encoder.
 
@@ -652,6 +654,7 @@ config SDR_MAX2175
 	tristate "Maxim 2175 RF to Bits tuner"
 	depends on VIDEO_V4L2 && MEDIA_SDR_SUPPORT && I2C
 	select REGMAP_I2C
+	select V4L2_ASYNC
 	help
 	  Support for Maxim 2175 tuner. It is an advanced analog/digital
 	  radio receiver with RF-to-Bits front-end designed for SDR solutions.
@@ -668,6 +671,7 @@ menu "Miscellaneous helper chips"
 config VIDEO_THS7303
 	tristate "THS7303/53 Video Amplifier"
 	depends on VIDEO_V4L2 && I2C
+	select V4L2_ASYNC
 	help
 	  Support for TI THS7303/53 video amplifier
 
@@ -1341,6 +1345,7 @@ config VIDEO_AD5820
 	tristate "AD5820 lens voice coil support"
 	depends on GPIOLIB && I2C && VIDEO_V4L2
 	select MEDIA_CONTROLLER
+	select V4L2_ASYNC
 	help
 	  This is a driver for the AD5820 camera lens voice coil.
 	  It is used for example in Nokia N900 (RX-51).
@@ -1350,6 +1355,7 @@ config VIDEO_AK7375
 	depends on I2C && VIDEO_V4L2
 	select MEDIA_CONTROLLER
 	select VIDEO_V4L2_SUBDEV_API
+	select V4L2_ASYNC
 	help
 	  This is a driver for the AK7375 camera lens voice coil.
 	  AK7375 is a 12 bit DAC with 120mA output current sink
@@ -1361,6 +1367,7 @@ config VIDEO_DW9714
 	depends on I2C && VIDEO_V4L2
 	select MEDIA_CONTROLLER
 	select VIDEO_V4L2_SUBDEV_API
+	select V4L2_ASYNC
 	help
 	  This is a driver for the DW9714 camera lens voice coil.
 	  DW9714 is a 10 bit DAC with 120mA output current sink
@@ -1384,6 +1391,7 @@ config VIDEO_DW9807_VCM
 	depends on I2C && VIDEO_V4L2
 	select MEDIA_CONTROLLER
 	select VIDEO_V4L2_SUBDEV_API
+	select V4L2_ASYNC
 	help
 	  This is a driver for the DW9807 camera lens voice coil.
 	  DW9807 is a 10 bit DAC with 100mA output current sink
@@ -1399,6 +1407,7 @@ config VIDEO_ADP1653
 	tristate "ADP1653 flash support"
 	depends on I2C && VIDEO_V4L2
 	select MEDIA_CONTROLLER
+	select V4L2_ASYNC
 	help
 	  This is a driver for the ADP1653 flash controller. It is used for
 	  example in Nokia N900.
@@ -1408,6 +1417,7 @@ config VIDEO_LM3560
 	depends on I2C && VIDEO_V4L2
 	select MEDIA_CONTROLLER
 	select REGMAP_I2C
+	select V4L2_ASYNC
 	help
 	  This is a driver for the lm3560 dual flash controllers. It controls
 	  flash, torch LEDs.
@@ -1417,6 +1427,7 @@ config VIDEO_LM3646
 	depends on I2C && VIDEO_V4L2
 	select MEDIA_CONTROLLER
 	select REGMAP_I2C
+	select V4L2_ASYNC
 	help
 	  This is a driver for the lm3646 dual flash controllers. It controls
 	  flash, torch LEDs.
diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
index bf49f83cb86f8..02dc1787e9535 100644
--- a/drivers/media/v4l2-core/Kconfig
+++ b/drivers/media/v4l2-core/Kconfig
@@ -62,6 +62,7 @@ config V4L2_FLASH_LED_CLASS
 	tristate "V4L2 flash API for LED flash class devices"
 	depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
 	depends on LEDS_CLASS_FLASH
+	select V4L2_ASYNC
 	help
 	  Say Y here to enable V4L2 flash API support for LED flash
 	  class drivers.
@@ -70,6 +71,10 @@ config V4L2_FLASH_LED_CLASS
 
 config V4L2_FWNODE
 	tristate
+	select V4L2_ASYNC
+
+config V4L2_ASYNC
+	tristate
 
 # Used by drivers that need Videobuf modules
 config VIDEOBUF_GEN
diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
index ad967b72fb5d0..66a78c556c989 100644
--- a/drivers/media/v4l2-core/Makefile
+++ b/drivers/media/v4l2-core/Makefile
@@ -6,7 +6,7 @@
 tuner-objs	:=	tuner-core.o
 
 videodev-objs	:=	v4l2-dev.o v4l2-ioctl.o v4l2-device.o v4l2-fh.o \
-			v4l2-event.o v4l2-subdev.o v4l2-async.o v4l2-common.o \
+			v4l2-event.o v4l2-subdev.o v4l2-common.o \
 			v4l2-ctrls-core.o v4l2-ctrls-api.o \
 			v4l2-ctrls-request.o v4l2-ctrls-defs.o
 videodev-$(CONFIG_COMPAT) += v4l2-compat-ioctl32.o
@@ -15,8 +15,9 @@ videodev-$(CONFIG_MEDIA_CONTROLLER) += v4l2-mc.o
 videodev-$(CONFIG_SPI) += v4l2-spi.o
 videodev-$(CONFIG_VIDEO_V4L2_I2C) += v4l2-i2c.o
 
-obj-$(CONFIG_V4L2_FWNODE) += v4l2-fwnode.o
 obj-$(CONFIG_VIDEO_V4L2) += videodev.o
+obj-$(CONFIG_V4L2_FWNODE) += v4l2-fwnode.o
+obj-$(CONFIG_V4L2_ASYNC) += v4l2-async.o
 obj-$(CONFIG_VIDEO_V4L2) += v4l2-dv-timings.o
 
 obj-$(CONFIG_VIDEO_TUNER) += tuner.o
diff --git a/drivers/media/v4l2-core/v4l2-async.c b/drivers/media/v4l2-core/v4l2-async.c
index e638aa8aecb79..cd9e78c63791b 100644
--- a/drivers/media/v4l2-core/v4l2-async.c
+++ b/drivers/media/v4l2-core/v4l2-async.c
@@ -854,8 +854,27 @@ static int pending_subdevs_show(struct seq_file *s, void *data)
 }
 DEFINE_SHOW_ATTRIBUTE(pending_subdevs);
 
-void v4l2_async_debug_init(struct dentry *debugfs_dir)
+static struct dentry *v4l2_async_debugfs_dir;
+
+static int __init v4l2_async_init(void)
 {
-	debugfs_create_file("pending_async_subdevices", 0444, debugfs_dir, NULL,
+	v4l2_async_debugfs_dir = debugfs_create_dir("v4l2-async", NULL);
+	debugfs_create_file("pending_async_subdevices", 0444,
+			    v4l2_async_debugfs_dir, NULL,
 			    &pending_subdevs_fops);
+
+	return 0;
+}
+
+static void __exit v4l2_async_exit(void)
+{
+	debugfs_remove_recursive(v4l2_async_debugfs_dir);
 }
+
+subsys_initcall(v4l2_async_init);
+module_exit(v4l2_async_exit);
+
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
+MODULE_AUTHOR("Sakari Ailus <sakari.ailus@linux.intel.com>");
+MODULE_AUTHOR("Ezequiel Garcia <ezequiel@collabora.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 7d0edf3530be3..4aa8fcd674d75 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -39,8 +39,6 @@
 		       __func__, ##arg);				\
 } while (0)
 
-static struct dentry *v4l2_debugfs_dir;
-
 /*
  *	sysfs stuff
  */
@@ -1121,8 +1119,6 @@ static int __init videodev_init(void)
 		return -EIO;
 	}
 
-	v4l2_debugfs_dir = debugfs_create_dir("video4linux", NULL);
-	v4l2_async_debug_init(v4l2_debugfs_dir);
 	return 0;
 }
 
@@ -1130,7 +1126,6 @@ static void __exit videodev_exit(void)
 {
 	dev_t dev = MKDEV(VIDEO_MAJOR, 0);
 
-	debugfs_remove_recursive(v4l2_debugfs_dir);
 	class_unregister(&video_class);
 	unregister_chrdev_region(dev, VIDEO_NUM_DEVICES);
 }
-- 
GitLab


From dc794d3d24246588d4db88c9d2c2ad67273027fd Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 12 Apr 2021 13:04:33 +0200
Subject: [PATCH 2003/3804] media: staging: ipu3-imgu: Move the UAPI header
 from include under include/uapi

The header defines the user space interface but may be mistaken as
kernel-only header due to its location. Add "uapi" directory under
driver's include directory and move the header there.

Suggested-by: Greg KH <gregkh@linuxfoundation.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Bingbu Cao <bingbu.cao@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/admin-guide/media/ipu3.rst      | 35 ++++++++++---------
 .../media/v4l/pixfmt-meta-intel-ipu3.rst      |  2 +-
 .../ipu3/include/{ => uapi}/intel-ipu3.h      |  0
 drivers/staging/media/ipu3/ipu3-abi.h         |  2 +-
 4 files changed, 20 insertions(+), 19 deletions(-)
 rename drivers/staging/media/ipu3/include/{ => uapi}/intel-ipu3.h (100%)

diff --git a/Documentation/admin-guide/media/ipu3.rst b/Documentation/admin-guide/media/ipu3.rst
index f59697c7b3740..d6454f637ff4f 100644
--- a/Documentation/admin-guide/media/ipu3.rst
+++ b/Documentation/admin-guide/media/ipu3.rst
@@ -234,22 +234,23 @@ The IPU3 ImgU pipelines can be configured using the Media Controller, defined at
 Running mode and firmware binary selection
 ------------------------------------------
 
-ImgU works based on firmware, currently the ImgU firmware support run 2 pipes in
-time-sharing with single input frame data. Each pipe can run at certain mode -
-"VIDEO" or "STILL", "VIDEO" mode is commonly used for video frames capture, and
-"STILL" is used for still frame capture. However, you can also select "VIDEO" to
-capture still frames if you want to capture images with less system load and
-power. For "STILL" mode, ImgU will try to use smaller BDS factor and output
-larger bayer frame for further YUV processing than "VIDEO" mode to get high
-quality images. Besides, "STILL" mode need XNR3 to do noise reduction, hence
-"STILL" mode will need more power and memory bandwidth than "VIDEO" mode. TNR
-will be enabled in "VIDEO" mode and bypassed by "STILL" mode. ImgU is running at
-“VIDEO” mode by default, the user can use v4l2 control V4L2_CID_INTEL_IPU3_MODE
-(currently defined in drivers/staging/media/ipu3/include/intel-ipu3.h) to query
-and set the running mode. For user, there is no difference for buffer queueing
-between the "VIDEO" and "STILL" mode, mandatory input and main output node
-should be enabled and buffers need be queued, the statistics and the view-finder
-queues are optional.
+ImgU works based on firmware, currently the ImgU firmware support run 2 pipes
+in time-sharing with single input frame data. Each pipe can run at certain mode
+- "VIDEO" or "STILL", "VIDEO" mode is commonly used for video frames capture,
+and "STILL" is used for still frame capture. However, you can also select
+"VIDEO" to capture still frames if you want to capture images with less system
+load and power. For "STILL" mode, ImgU will try to use smaller BDS factor and
+output larger bayer frame for further YUV processing than "VIDEO" mode to get
+high quality images. Besides, "STILL" mode need XNR3 to do noise reduction,
+hence "STILL" mode will need more power and memory bandwidth than "VIDEO" mode.
+TNR will be enabled in "VIDEO" mode and bypassed by "STILL" mode. ImgU is
+running at “VIDEO” mode by default, the user can use v4l2 control
+V4L2_CID_INTEL_IPU3_MODE (currently defined in
+drivers/staging/media/ipu3/include/uapi/intel-ipu3.h) to query and set the
+running mode. For user, there is no difference for buffer queueing between the
+"VIDEO" and "STILL" mode, mandatory input and main output node should be
+enabled and buffers need be queued, the statistics and the view-finder queues
+are optional.
 
 The firmware binary will be selected according to current running mode, such log
 "using binary if_to_osys_striped " or "using binary if_to_osys_primary_striped"
@@ -586,7 +587,7 @@ preserved.
 References
 ==========
 
-.. [#f5] drivers/staging/media/ipu3/include/intel-ipu3.h
+.. [#f5] drivers/staging/media/ipu3/include/uapi/intel-ipu3.h
 
 .. [#f1] https://github.com/intel/nvt
 
diff --git a/Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst b/Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst
index 5f33d35532ef4..84d81dd7a7b5e 100644
--- a/Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst
+++ b/Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst
@@ -78,4 +78,4 @@ hardware and algorithm details.
 Intel IPU3 ImgU uAPI data types
 ===============================
 
-.. kernel-doc:: drivers/staging/media/ipu3/include/intel-ipu3.h
+.. kernel-doc:: drivers/staging/media/ipu3/include/uapi/intel-ipu3.h
diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/uapi/intel-ipu3.h
similarity index 100%
rename from drivers/staging/media/ipu3/include/intel-ipu3.h
rename to drivers/staging/media/ipu3/include/uapi/intel-ipu3.h
diff --git a/drivers/staging/media/ipu3/ipu3-abi.h b/drivers/staging/media/ipu3/ipu3-abi.h
index e1185602c7fd8..c76935b436d78 100644
--- a/drivers/staging/media/ipu3/ipu3-abi.h
+++ b/drivers/staging/media/ipu3/ipu3-abi.h
@@ -4,7 +4,7 @@
 #ifndef __IPU3_ABI_H
 #define __IPU3_ABI_H
 
-#include "include/intel-ipu3.h"
+#include "include/uapi/intel-ipu3.h"
 
 /******************* IMGU Hardware information *******************/
 
-- 
GitLab


From caad79405086151dec128f78274a999f15d947ed Mon Sep 17 00:00:00 2001
From: Bernhard Wimmer <be.wimm@gmail.com>
Date: Wed, 21 Apr 2021 23:33:19 +0200
Subject: [PATCH 2004/3804] media: Documentation: ccs: Fix the
 op_pll_multiplier address

According to the CCS spec the op_pll_multiplier address is 0x030e,
not 0x031e.

Signed-off-by: Bernhard Wimmer <be.wimm@gmail.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/driver-api/media/drivers/ccs/ccs-regs.asc | 2 +-
 Documentation/driver-api/media/drivers/ccs/mk-ccs-regs  | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/driver-api/media/drivers/ccs/ccs-regs.asc b/Documentation/driver-api/media/drivers/ccs/ccs-regs.asc
index f2042acc8a45f..bbf9213c33885 100644
--- a/Documentation/driver-api/media/drivers/ccs/ccs-regs.asc
+++ b/Documentation/driver-api/media/drivers/ccs/ccs-regs.asc
@@ -210,7 +210,7 @@ pll_multiplier				0x0306	16
 op_pix_clk_div				0x0308	16
 op_sys_clk_div				0x030a	16
 op_pre_pll_clk_div			0x030c	16
-op_pll_multiplier			0x031e	16
+op_pll_multiplier			0x030e	16
 pll_mode				0x0310	8
 - f					0	0
 - e	single				0
diff --git a/Documentation/driver-api/media/drivers/ccs/mk-ccs-regs b/Documentation/driver-api/media/drivers/ccs/mk-ccs-regs
index 6668deaf2f192..2a4edc7e051af 100755
--- a/Documentation/driver-api/media/drivers/ccs/mk-ccs-regs
+++ b/Documentation/driver-api/media/drivers/ccs/mk-ccs-regs
@@ -72,13 +72,14 @@ $uc_header =~ s/[^A-Z0-9]/_/g;
 
 my $copyright = "/* Copyright (C) 2019--2020 Intel Corporation */\n";
 my $license = "SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause";
+my $note = "/*\n * Generated by $0;\n * do not modify.\n */\n";
 
 for my $fh ($A, $LC) {
-	print $fh "// $license\n$copyright\n" if defined $fh;
+	print $fh "// $license\n$copyright$note\n" if defined $fh;
 }
 
 for my $fh ($H, $LH) {
-	print $fh "/* $license */\n$copyright\n";
+	print $fh "/* $license */\n$copyright$note\n";
 }
 
 sub bit_def($) {
-- 
GitLab


From 0e3e0c9369c822b7f1dd11504eeb98cfd4aabf24 Mon Sep 17 00:00:00 2001
From: Bernhard Wimmer <be.wimm@gmail.com>
Date: Wed, 21 Apr 2021 23:33:20 +0200
Subject: [PATCH 2005/3804] media: ccs: Fix the op_pll_multiplier address

According to the CCS spec the op_pll_multiplier address is 0x030e,
not 0x031e.

Signed-off-by: Bernhard Wimmer <be.wimm@gmail.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: stable@vger.kernel.org
Fixes: 6493c4b777c2 ("media: smiapp: Import CCS definitions")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ccs/ccs-limits.c | 4 ++++
 drivers/media/i2c/ccs/ccs-limits.h | 4 ++++
 drivers/media/i2c/ccs/ccs-regs.h   | 6 +++++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/media/i2c/ccs/ccs-limits.c b/drivers/media/i2c/ccs/ccs-limits.c
index f5511789ac837..4969fa425317d 100644
--- a/drivers/media/i2c/ccs/ccs-limits.c
+++ b/drivers/media/i2c/ccs/ccs-limits.c
@@ -1,5 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause
 /* Copyright (C) 2019--2020 Intel Corporation */
+/*
+ * Generated by Documentation/driver-api/media/drivers/ccs/mk-ccs-regs;
+ * do not modify.
+ */
 
 #include "ccs-limits.h"
 #include "ccs-regs.h"
diff --git a/drivers/media/i2c/ccs/ccs-limits.h b/drivers/media/i2c/ccs/ccs-limits.h
index 1efa43c23a2eb..551d3ee9d04e1 100644
--- a/drivers/media/i2c/ccs/ccs-limits.h
+++ b/drivers/media/i2c/ccs/ccs-limits.h
@@ -1,5 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */
 /* Copyright (C) 2019--2020 Intel Corporation */
+/*
+ * Generated by Documentation/driver-api/media/drivers/ccs/mk-ccs-regs;
+ * do not modify.
+ */
 
 #ifndef __CCS_LIMITS_H__
 #define __CCS_LIMITS_H__
diff --git a/drivers/media/i2c/ccs/ccs-regs.h b/drivers/media/i2c/ccs/ccs-regs.h
index 4b3e5df2121f8..6ce84c5ecf207 100644
--- a/drivers/media/i2c/ccs/ccs-regs.h
+++ b/drivers/media/i2c/ccs/ccs-regs.h
@@ -1,5 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */
 /* Copyright (C) 2019--2020 Intel Corporation */
+/*
+ * Generated by Documentation/driver-api/media/drivers/ccs/mk-ccs-regs;
+ * do not modify.
+ */
 
 #ifndef __CCS_REGS_H__
 #define __CCS_REGS_H__
@@ -202,7 +206,7 @@
 #define CCS_R_OP_PIX_CLK_DIV					(0x0308 | CCS_FL_16BIT)
 #define CCS_R_OP_SYS_CLK_DIV					(0x030a | CCS_FL_16BIT)
 #define CCS_R_OP_PRE_PLL_CLK_DIV				(0x030c | CCS_FL_16BIT)
-#define CCS_R_OP_PLL_MULTIPLIER					(0x031e | CCS_FL_16BIT)
+#define CCS_R_OP_PLL_MULTIPLIER					(0x030e | CCS_FL_16BIT)
 #define CCS_R_PLL_MODE						0x0310
 #define CCS_PLL_MODE_SHIFT					0U
 #define CCS_PLL_MODE_MASK					0x1
-- 
GitLab


From 2cb2705cf7ffe41dc5bd81290e4241bfb7f031cc Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Sun, 4 Apr 2021 20:14:09 +0200
Subject: [PATCH 2006/3804] media: ipu3-cio2: Fix reference counting when
 looping over ACPI devices

When we continue, due to device is disabled, loop we have to drop
reference count. When we have an array full of devices we have to also
drop the reference count. Note, in this case the
cio2_bridge_unregister_sensors() is called by the caller.

Fixes: 803abec64ef9 ("media: ipu3-cio2: Add cio2-bridge to ipu3-cio2 driver")
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Daniel Scally <djrscally@gmail.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/intel/ipu3/cio2-bridge.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/media/pci/intel/ipu3/cio2-bridge.c b/drivers/media/pci/intel/ipu3/cio2-bridge.c
index e8511787c1e43..4657e99df0339 100644
--- a/drivers/media/pci/intel/ipu3/cio2-bridge.c
+++ b/drivers/media/pci/intel/ipu3/cio2-bridge.c
@@ -173,14 +173,15 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg,
 	int ret;
 
 	for_each_acpi_dev_match(adev, cfg->hid, NULL, -1) {
-		if (!adev->status.enabled)
+		if (!adev->status.enabled) {
+			acpi_dev_put(adev);
 			continue;
+		}
 
 		if (bridge->n_sensors >= CIO2_NUM_PORTS) {
+			acpi_dev_put(adev);
 			dev_err(&cio2->dev, "Exceeded available CIO2 ports\n");
-			cio2_bridge_unregister_sensors(bridge);
-			ret = -EINVAL;
-			goto err_out;
+			return -EINVAL;
 		}
 
 		sensor = &bridge->sensors[bridge->n_sensors];
@@ -228,7 +229,6 @@ err_free_swnodes:
 	software_node_unregister_nodes(sensor->swnodes);
 err_put_adev:
 	acpi_dev_put(sensor->adev);
-err_out:
 	return ret;
 }
 
-- 
GitLab


From 24786ccd9c80fdb05494aa4d90fcb8f34295c193 Mon Sep 17 00:00:00 2001
From: Dillon Min <dillon.minfei@gmail.com>
Date: Tue, 4 May 2021 07:09:53 +0200
Subject: [PATCH 2007/3804] media: i2c: ov2659: Use
 clk_{prepare_enable,disable_unprepare}() to set xvclk on/off

On some platform(imx6q), xvclk might not switch on in advance,
also for power save purpose, xvclk should not be always on.
so, add clk_prepare_enable(), clk_disable_unprepare() in driver
side to set xvclk on/off at proper stage.

Add following changes:
- add 'struct clk *clk;' in 'struct ov2659 {}'
- enable xvclk in ov2659_power_on()
- disable xvclk in ov2659_power_off()

Signed-off-by: Dillon Min <dillon.minfei@gmail.com>
Acked-by: Lad Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov2659.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c
index a3c8eae684865..7c1781f646cea 100644
--- a/drivers/media/i2c/ov2659.c
+++ b/drivers/media/i2c/ov2659.c
@@ -204,6 +204,7 @@ struct ov2659 {
 	struct i2c_client *client;
 	struct v4l2_ctrl_handler ctrls;
 	struct v4l2_ctrl *link_frequency;
+	struct clk *clk;
 	const struct ov2659_framesize *frame_size;
 	struct sensor_register *format_ctrl_regs;
 	struct ov2659_pll_ctrl pll;
@@ -1268,6 +1269,8 @@ static int ov2659_power_off(struct device *dev)
 
 	gpiod_set_value(ov2659->pwdn_gpio, 1);
 
+	clk_disable_unprepare(ov2659->clk);
+
 	return 0;
 }
 
@@ -1276,9 +1279,17 @@ static int ov2659_power_on(struct device *dev)
 	struct i2c_client *client = to_i2c_client(dev);
 	struct v4l2_subdev *sd = i2c_get_clientdata(client);
 	struct ov2659 *ov2659 = to_ov2659(sd);
+	int ret;
 
 	dev_dbg(&client->dev, "%s:\n", __func__);
 
+	ret = clk_prepare_enable(ov2659->clk);
+	if (ret) {
+		dev_err(&client->dev, "%s: failed to enable clock\n",
+			__func__);
+		return ret;
+	}
+
 	gpiod_set_value(ov2659->pwdn_gpio, 0);
 
 	if (ov2659->resetb_gpio) {
@@ -1423,7 +1434,6 @@ static int ov2659_probe(struct i2c_client *client)
 	const struct ov2659_platform_data *pdata = ov2659_get_pdata(client);
 	struct v4l2_subdev *sd;
 	struct ov2659 *ov2659;
-	struct clk *clk;
 	int ret;
 
 	if (!pdata) {
@@ -1438,11 +1448,11 @@ static int ov2659_probe(struct i2c_client *client)
 	ov2659->pdata = pdata;
 	ov2659->client = client;
 
-	clk = devm_clk_get(&client->dev, "xvclk");
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
+	ov2659->clk = devm_clk_get(&client->dev, "xvclk");
+	if (IS_ERR(ov2659->clk))
+		return PTR_ERR(ov2659->clk);
 
-	ov2659->xvclk_frequency = clk_get_rate(clk);
+	ov2659->xvclk_frequency = clk_get_rate(ov2659->clk);
 	if (ov2659->xvclk_frequency < 6000000 ||
 	    ov2659->xvclk_frequency > 27000000)
 		return -EINVAL;
@@ -1504,7 +1514,9 @@ static int ov2659_probe(struct i2c_client *client)
 	ov2659->frame_size = &ov2659_framesizes[2];
 	ov2659->format_ctrl_regs = ov2659_formats[0].format_ctrl_regs;
 
-	ov2659_power_on(&client->dev);
+	ret = ov2659_power_on(&client->dev);
+	if (ret < 0)
+		goto error;
 
 	ret = ov2659_detect(sd);
 	if (ret < 0)
-- 
GitLab


From c492ec9ae9ede77dd794b14913b0382376da2bff Mon Sep 17 00:00:00 2001
From: Shawn Tu <shawnx.tu@intel.com>
Date: Fri, 16 Apr 2021 11:58:58 +0200
Subject: [PATCH 2008/3804] media: ov8856: Add support for 2 data lanes

The OV8856 sensor can output frames with 2/4 CSI2 data lanes.
This commit adds support for 2 lane mode in addition to the 4 lane
and also configuring the data lane settings in the driver based on
system configuration.

- Fix Bayer order output in 1640x1232 binning registers
- supported data lanes
  + 3280x2464 on 2 & 4 lanes
  + 1640x1232 on 2 & 4 lanes
  + 3264x2448 on 4 lanes
  + 1632x1224 on 4 lanes

Signed-off-by: Shawn Tu <shawnx.tu@intel.com>
Acked-by: Andrey Konovalov <andrey.konovalov@linaro.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov8856.c | 2331 ++++++++++++++++++++++--------------
 1 file changed, 1430 insertions(+), 901 deletions(-)

diff --git a/drivers/media/i2c/ov8856.c b/drivers/media/i2c/ov8856.c
index 2875f8e4ddcba..d145f004fd8d8 100644
--- a/drivers/media/i2c/ov8856.c
+++ b/drivers/media/i2c/ov8856.c
@@ -18,8 +18,6 @@
 #define OV8856_REG_VALUE_16BIT		2
 #define OV8856_REG_VALUE_24BIT		3
 
-#define OV8856_LINK_FREQ_360MHZ		360000000ULL
-#define OV8856_LINK_FREQ_180MHZ		180000000ULL
 #define OV8856_SCLK			144000000ULL
 #define OV8856_XVCLK_19_2		19200000
 #define OV8856_DATA_LANES		4
@@ -78,6 +76,10 @@
 #define OV8856_TEST_PATTERN_ENABLE	BIT(7)
 #define OV8856_TEST_PATTERN_BAR_SHIFT	2
 
+#define NUM_REGS				7
+#define NUM_MODE_REGS				187
+#define NUM_MODE_REGS_2				200
+
 #define to_ov8856(_sd)			container_of(_sd, struct ov8856, sd)
 
 static const char * const ov8856_supply_names[] = {
@@ -86,11 +88,6 @@ static const char * const ov8856_supply_names[] = {
 	"dvdd",		/* Digital core power */
 };
 
-enum {
-	OV8856_LINK_FREQ_720MBPS,
-	OV8856_LINK_FREQ_360MBPS,
-};
-
 struct ov8856_reg {
 	u16 address;
 	u8 val;
@@ -126,891 +123,1242 @@ struct ov8856_mode {
 
 	/* Sensor register settings for this resolution */
 	const struct ov8856_reg_list reg_list;
+
+	/* Number of data lanes */
+	u8 data_lanes;
 };
 
-static const struct ov8856_reg mipi_data_rate_720mbps[] = {
-	{0x0103, 0x01},
-	{0x0100, 0x00},
-	{0x0302, 0x4b},
-	{0x0303, 0x01},
-	{0x030b, 0x02},
-	{0x030d, 0x4b},
-	{0x031e, 0x0c},
+struct ov8856_mipi_data_rates {
+	const struct ov8856_reg regs_0[NUM_REGS];
+	const struct ov8856_reg regs_1[NUM_REGS];
 };
 
-static const struct ov8856_reg mipi_data_rate_360mbps[] = {
-	{0x0103, 0x01},
-	{0x0100, 0x00},
-	{0x0302, 0x4b},
-	{0x0303, 0x03},
-	{0x030b, 0x02},
-	{0x030d, 0x4b},
-	{0x031e, 0x0c},
+static const struct ov8856_mipi_data_rates mipi_data_rate_lane_2 = {
+	//mipi_data_rate_1440mbps
+	{
+		{0x0103, 0x01},
+		{0x0100, 0x00},
+		{0x0302, 0x43},
+		{0x0303, 0x00},
+		{0x030b, 0x02},
+		{0x030d, 0x4b},
+		{0x031e, 0x0c}
+	},
+	//mipi_data_rate_720mbps
+	{
+		{0x0103, 0x01},
+		{0x0100, 0x00},
+		{0x0302, 0x4b},
+		{0x0303, 0x01},
+		{0x030b, 0x02},
+		{0x030d, 0x4b},
+		{0x031e, 0x0c}
+	}
 };
 
-static const struct ov8856_reg mode_3280x2464_regs[] = {
-	{0x3000, 0x20},
-	{0x3003, 0x08},
-	{0x300e, 0x20},
-	{0x3010, 0x00},
-	{0x3015, 0x84},
-	{0x3018, 0x72},
-	{0x3021, 0x23},
-	{0x3033, 0x24},
-	{0x3500, 0x00},
-	{0x3501, 0x9a},
-	{0x3502, 0x20},
-	{0x3503, 0x08},
-	{0x3505, 0x83},
-	{0x3508, 0x01},
-	{0x3509, 0x80},
-	{0x350c, 0x00},
-	{0x350d, 0x80},
-	{0x350e, 0x04},
-	{0x350f, 0x00},
-	{0x3510, 0x00},
-	{0x3511, 0x02},
-	{0x3512, 0x00},
-	{0x3600, 0x72},
-	{0x3601, 0x40},
-	{0x3602, 0x30},
-	{0x3610, 0xc5},
-	{0x3611, 0x58},
-	{0x3612, 0x5c},
-	{0x3613, 0xca},
-	{0x3614, 0x20},
-	{0x3628, 0xff},
-	{0x3629, 0xff},
-	{0x362a, 0xff},
-	{0x3633, 0x10},
-	{0x3634, 0x10},
-	{0x3635, 0x10},
-	{0x3636, 0x10},
-	{0x3663, 0x08},
-	{0x3669, 0x34},
-	{0x366e, 0x10},
-	{0x3706, 0x86},
-	{0x370b, 0x7e},
-	{0x3714, 0x23},
-	{0x3730, 0x12},
-	{0x3733, 0x10},
-	{0x3764, 0x00},
-	{0x3765, 0x00},
-	{0x3769, 0x62},
-	{0x376a, 0x2a},
-	{0x376b, 0x30},
-	{0x3780, 0x00},
-	{0x3781, 0x24},
-	{0x3782, 0x00},
-	{0x3783, 0x23},
-	{0x3798, 0x2f},
-	{0x37a1, 0x60},
-	{0x37a8, 0x6a},
-	{0x37ab, 0x3f},
-	{0x37c2, 0x04},
-	{0x37c3, 0xf1},
-	{0x37c9, 0x80},
-	{0x37cb, 0x16},
-	{0x37cc, 0x16},
-	{0x37cd, 0x16},
-	{0x37ce, 0x16},
-	{0x3800, 0x00},
-	{0x3801, 0x00},
-	{0x3802, 0x00},
-	{0x3803, 0x06},
-	{0x3804, 0x0c},
-	{0x3805, 0xdf},
-	{0x3806, 0x09},
-	{0x3807, 0xa7},
-	{0x3808, 0x0c},
-	{0x3809, 0xd0},
-	{0x380a, 0x09},
-	{0x380b, 0xa0},
-	{0x380c, 0x07},
-	{0x380d, 0x88},
-	{0x380e, 0x09},
-	{0x380f, 0xb8},
-	{0x3810, 0x00},
-	{0x3811, 0x00},
-	{0x3812, 0x00},
-	{0x3813, 0x01},
-	{0x3814, 0x01},
-	{0x3815, 0x01},
-	{0x3816, 0x00},
-	{0x3817, 0x00},
-	{0x3818, 0x00},
-	{0x3819, 0x10},
-	{0x3820, 0x80},
-	{0x3821, 0x46},
-	{0x382a, 0x01},
-	{0x382b, 0x01},
-	{0x3830, 0x06},
-	{0x3836, 0x02},
-	{0x3862, 0x04},
-	{0x3863, 0x08},
-	{0x3cc0, 0x33},
-	{0x3d85, 0x17},
-	{0x3d8c, 0x73},
-	{0x3d8d, 0xde},
-	{0x4001, 0xe0},
-	{0x4003, 0x40},
-	{0x4008, 0x00},
-	{0x4009, 0x0b},
-	{0x400a, 0x00},
-	{0x400b, 0x84},
-	{0x400f, 0x80},
-	{0x4010, 0xf0},
-	{0x4011, 0xff},
-	{0x4012, 0x02},
-	{0x4013, 0x01},
-	{0x4014, 0x01},
-	{0x4015, 0x01},
-	{0x4042, 0x00},
-	{0x4043, 0x80},
-	{0x4044, 0x00},
-	{0x4045, 0x80},
-	{0x4046, 0x00},
-	{0x4047, 0x80},
-	{0x4048, 0x00},
-	{0x4049, 0x80},
-	{0x4041, 0x03},
-	{0x404c, 0x20},
-	{0x404d, 0x00},
-	{0x404e, 0x20},
-	{0x4203, 0x80},
-	{0x4307, 0x30},
-	{0x4317, 0x00},
-	{0x4503, 0x08},
-	{0x4601, 0x80},
-	{0x4800, 0x44},
-	{0x4816, 0x53},
-	{0x481b, 0x58},
-	{0x481f, 0x27},
-	{0x4837, 0x16},
-	{0x483c, 0x0f},
-	{0x484b, 0x05},
-	{0x5000, 0x57},
-	{0x5001, 0x0a},
-	{0x5004, 0x04},
-	{0x502e, 0x03},
-	{0x5030, 0x41},
-	{0x5780, 0x14},
-	{0x5781, 0x0f},
-	{0x5782, 0x44},
-	{0x5783, 0x02},
-	{0x5784, 0x01},
-	{0x5785, 0x01},
-	{0x5786, 0x00},
-	{0x5787, 0x04},
-	{0x5788, 0x02},
-	{0x5789, 0x0f},
-	{0x578a, 0xfd},
-	{0x578b, 0xf5},
-	{0x578c, 0xf5},
-	{0x578d, 0x03},
-	{0x578e, 0x08},
-	{0x578f, 0x0c},
-	{0x5790, 0x08},
-	{0x5791, 0x04},
-	{0x5792, 0x00},
-	{0x5793, 0x52},
-	{0x5794, 0xa3},
-	{0x5795, 0x02},
-	{0x5796, 0x20},
-	{0x5797, 0x20},
-	{0x5798, 0xd5},
-	{0x5799, 0xd5},
-	{0x579a, 0x00},
-	{0x579b, 0x50},
-	{0x579c, 0x00},
-	{0x579d, 0x2c},
-	{0x579e, 0x0c},
-	{0x579f, 0x40},
-	{0x57a0, 0x09},
-	{0x57a1, 0x40},
-	{0x59f8, 0x3d},
-	{0x5a08, 0x02},
-	{0x5b00, 0x02},
-	{0x5b01, 0x10},
-	{0x5b02, 0x03},
-	{0x5b03, 0xcf},
-	{0x5b05, 0x6c},
-	{0x5e00, 0x00}
+static const struct ov8856_mipi_data_rates mipi_data_rate_lane_4 = {
+	//mipi_data_rate_720mbps
+	{
+		{0x0103, 0x01},
+		{0x0100, 0x00},
+		{0x0302, 0x4b},
+		{0x0303, 0x01},
+		{0x030b, 0x02},
+		{0x030d, 0x4b},
+		{0x031e, 0x0c}
+	},
+	//mipi_data_rate_360mbps
+	{
+		{0x0103, 0x01},
+		{0x0100, 0x00},
+		{0x0302, 0x4b},
+		{0x0303, 0x03},
+		{0x030b, 0x02},
+		{0x030d, 0x4b},
+		{0x031e, 0x0c}
+	}
 };
 
-static const struct ov8856_reg mode_3264x2448_regs[] = {
-	{0x0103, 0x01},
-	{0x0302, 0x3c},
-	{0x0303, 0x01},
-	{0x031e, 0x0c},
-	{0x3000, 0x20},
-	{0x3003, 0x08},
-	{0x300e, 0x20},
-	{0x3010, 0x00},
-	{0x3015, 0x84},
-	{0x3018, 0x72},
-	{0x3021, 0x23},
-	{0x3033, 0x24},
-	{0x3500, 0x00},
-	{0x3501, 0x9a},
-	{0x3502, 0x20},
-	{0x3503, 0x08},
-	{0x3505, 0x83},
-	{0x3508, 0x01},
-	{0x3509, 0x80},
-	{0x350c, 0x00},
-	{0x350d, 0x80},
-	{0x350e, 0x04},
-	{0x350f, 0x00},
-	{0x3510, 0x00},
-	{0x3511, 0x02},
-	{0x3512, 0x00},
-	{0x3600, 0x72},
-	{0x3601, 0x40},
-	{0x3602, 0x30},
-	{0x3610, 0xc5},
-	{0x3611, 0x58},
-	{0x3612, 0x5c},
-	{0x3613, 0xca},
-	{0x3614, 0x60},
-	{0x3628, 0xff},
-	{0x3629, 0xff},
-	{0x362a, 0xff},
-	{0x3633, 0x10},
-	{0x3634, 0x10},
-	{0x3635, 0x10},
-	{0x3636, 0x10},
-	{0x3663, 0x08},
-	{0x3669, 0x34},
-	{0x366d, 0x00},
-	{0x366e, 0x10},
-	{0x3706, 0x86},
-	{0x370b, 0x7e},
-	{0x3714, 0x23},
-	{0x3730, 0x12},
-	{0x3733, 0x10},
-	{0x3764, 0x00},
-	{0x3765, 0x00},
-	{0x3769, 0x62},
-	{0x376a, 0x2a},
-	{0x376b, 0x30},
-	{0x3780, 0x00},
-	{0x3781, 0x24},
-	{0x3782, 0x00},
-	{0x3783, 0x23},
-	{0x3798, 0x2f},
-	{0x37a1, 0x60},
-	{0x37a8, 0x6a},
-	{0x37ab, 0x3f},
-	{0x37c2, 0x04},
-	{0x37c3, 0xf1},
-	{0x37c9, 0x80},
-	{0x37cb, 0x16},
-	{0x37cc, 0x16},
-	{0x37cd, 0x16},
-	{0x37ce, 0x16},
-	{0x3800, 0x00},
-	{0x3801, 0x00},
-	{0x3802, 0x00},
-	{0x3803, 0x0c},
-	{0x3804, 0x0c},
-	{0x3805, 0xdf},
-	{0x3806, 0x09},
-	{0x3807, 0xa3},
-	{0x3808, 0x0c},
-	{0x3809, 0xc0},
-	{0x380a, 0x09},
-	{0x380b, 0x90},
-	{0x380c, 0x07},
-	{0x380d, 0x8c},
-	{0x380e, 0x09},
-	{0x380f, 0xb2},
-	{0x3810, 0x00},
-	{0x3811, 0x04},
-	{0x3812, 0x00},
-	{0x3813, 0x01},
-	{0x3814, 0x01},
-	{0x3815, 0x01},
-	{0x3816, 0x00},
-	{0x3817, 0x00},
-	{0x3818, 0x00},
-	{0x3819, 0x10},
-	{0x3820, 0x80},
-	{0x3821, 0x46},
-	{0x382a, 0x01},
-	{0x382b, 0x01},
-	{0x3830, 0x06},
-	{0x3836, 0x02},
-	{0x3862, 0x04},
-	{0x3863, 0x08},
-	{0x3cc0, 0x33},
-	{0x3d85, 0x17},
-	{0x3d8c, 0x73},
-	{0x3d8d, 0xde},
-	{0x4001, 0xe0},
-	{0x4003, 0x40},
-	{0x4008, 0x00},
-	{0x4009, 0x0b},
-	{0x400a, 0x00},
-	{0x400b, 0x84},
-	{0x400f, 0x80},
-	{0x4010, 0xf0},
-	{0x4011, 0xff},
-	{0x4012, 0x02},
-	{0x4013, 0x01},
-	{0x4014, 0x01},
-	{0x4015, 0x01},
-	{0x4042, 0x00},
-	{0x4043, 0x80},
-	{0x4044, 0x00},
-	{0x4045, 0x80},
-	{0x4046, 0x00},
-	{0x4047, 0x80},
-	{0x4048, 0x00},
-	{0x4049, 0x80},
-	{0x4041, 0x03},
-	{0x404c, 0x20},
-	{0x404d, 0x00},
-	{0x404e, 0x20},
-	{0x4203, 0x80},
-	{0x4307, 0x30},
-	{0x4317, 0x00},
-	{0x4502, 0x50},
-	{0x4503, 0x08},
-	{0x4601, 0x80},
-	{0x4800, 0x44},
-	{0x4816, 0x53},
-	{0x481b, 0x50},
-	{0x481f, 0x27},
-	{0x4823, 0x3c},
-	{0x482b, 0x00},
-	{0x4831, 0x66},
-	{0x4837, 0x16},
-	{0x483c, 0x0f},
-	{0x484b, 0x05},
-	{0x5000, 0x77},
-	{0x5001, 0x0a},
-	{0x5003, 0xc8},
-	{0x5004, 0x04},
-	{0x5006, 0x00},
-	{0x5007, 0x00},
-	{0x502e, 0x03},
-	{0x5030, 0x41},
-	{0x5780, 0x14},
-	{0x5781, 0x0f},
-	{0x5782, 0x44},
-	{0x5783, 0x02},
-	{0x5784, 0x01},
-	{0x5785, 0x01},
-	{0x5786, 0x00},
-	{0x5787, 0x04},
-	{0x5788, 0x02},
-	{0x5789, 0x0f},
-	{0x578a, 0xfd},
-	{0x578b, 0xf5},
-	{0x578c, 0xf5},
-	{0x578d, 0x03},
-	{0x578e, 0x08},
-	{0x578f, 0x0c},
-	{0x5790, 0x08},
-	{0x5791, 0x04},
-	{0x5792, 0x00},
-	{0x5793, 0x52},
-	{0x5794, 0xa3},
-	{0x5795, 0x02},
-	{0x5796, 0x20},
-	{0x5797, 0x20},
-	{0x5798, 0xd5},
-	{0x5799, 0xd5},
-	{0x579a, 0x00},
-	{0x579b, 0x50},
-	{0x579c, 0x00},
-	{0x579d, 0x2c},
-	{0x579e, 0x0c},
-	{0x579f, 0x40},
-	{0x57a0, 0x09},
-	{0x57a1, 0x40},
-	{0x59f8, 0x3d},
-	{0x5a08, 0x02},
-	{0x5b00, 0x02},
-	{0x5b01, 0x10},
-	{0x5b02, 0x03},
-	{0x5b03, 0xcf},
-	{0x5b05, 0x6c},
-	{0x5e00, 0x00},
-	{0x5e10, 0xfc}
+static const struct ov8856_reg lane_2_mode_3280x2464[] = {
+	/* 3280x2464 resolution */
+		{0x3000, 0x20},
+		{0x3003, 0x08},
+		{0x300e, 0x20},
+		{0x3010, 0x00},
+		{0x3015, 0x84},
+		{0x3018, 0x32},
+		{0x3021, 0x23},
+		{0x3033, 0x24},
+		{0x3500, 0x00},
+		{0x3501, 0x9a},
+		{0x3502, 0x20},
+		{0x3503, 0x08},
+		{0x3505, 0x83},
+		{0x3508, 0x01},
+		{0x3509, 0x80},
+		{0x350c, 0x00},
+		{0x350d, 0x80},
+		{0x350e, 0x04},
+		{0x350f, 0x00},
+		{0x3510, 0x00},
+		{0x3511, 0x02},
+		{0x3512, 0x00},
+		{0x3600, 0x72},
+		{0x3601, 0x40},
+		{0x3602, 0x30},
+		{0x3610, 0xc5},
+		{0x3611, 0x58},
+		{0x3612, 0x5c},
+		{0x3613, 0xca},
+		{0x3614, 0x50},
+		{0x3628, 0xff},
+		{0x3629, 0xff},
+		{0x362a, 0xff},
+		{0x3633, 0x10},
+		{0x3634, 0x10},
+		{0x3635, 0x10},
+		{0x3636, 0x10},
+		{0x3663, 0x08},
+		{0x3669, 0x34},
+		{0x366e, 0x10},
+		{0x3706, 0x86},
+		{0x370b, 0x7e},
+		{0x3714, 0x23},
+		{0x3730, 0x12},
+		{0x3733, 0x10},
+		{0x3764, 0x00},
+		{0x3765, 0x00},
+		{0x3769, 0x62},
+		{0x376a, 0x2a},
+		{0x376b, 0x30},
+		{0x3780, 0x00},
+		{0x3781, 0x24},
+		{0x3782, 0x00},
+		{0x3783, 0x23},
+		{0x3798, 0x2f},
+		{0x37a1, 0x60},
+		{0x37a8, 0x6a},
+		{0x37ab, 0x3f},
+		{0x37c2, 0x04},
+		{0x37c3, 0xf1},
+		{0x37c9, 0x80},
+		{0x37cb, 0x16},
+		{0x37cc, 0x16},
+		{0x37cd, 0x16},
+		{0x37ce, 0x16},
+		{0x3800, 0x00},
+		{0x3801, 0x00},
+		{0x3802, 0x00},
+		{0x3803, 0x06},
+		{0x3804, 0x0c},
+		{0x3805, 0xdf},
+		{0x3806, 0x09},
+		{0x3807, 0xa7},
+		{0x3808, 0x0c},
+		{0x3809, 0xd0},
+		{0x380a, 0x09},
+		{0x380b, 0xa0},
+		{0x380c, 0x07},
+		{0x380d, 0x88},
+		{0x380e, 0x09},
+		{0x380f, 0xb8},
+		{0x3810, 0x00},
+		{0x3811, 0x00},
+		{0x3812, 0x00},
+		{0x3813, 0x01},
+		{0x3814, 0x01},
+		{0x3815, 0x01},
+		{0x3816, 0x00},
+		{0x3817, 0x00},
+		{0x3818, 0x00},
+		{0x3819, 0x00},
+		{0x3820, 0x80},
+		{0x3821, 0x46},
+		{0x382a, 0x01},
+		{0x382b, 0x01},
+		{0x3830, 0x06},
+		{0x3836, 0x02},
+		{0x3837, 0x10},
+		{0x3862, 0x04},
+		{0x3863, 0x08},
+		{0x3cc0, 0x33},
+		{0x3d85, 0x14},
+		{0x3d8c, 0x73},
+		{0x3d8d, 0xde},
+		{0x4001, 0xe0},
+		{0x4003, 0x40},
+		{0x4008, 0x00},
+		{0x4009, 0x0b},
+		{0x400a, 0x00},
+		{0x400b, 0x84},
+		{0x400f, 0x80},
+		{0x4010, 0xf0},
+		{0x4011, 0xff},
+		{0x4012, 0x02},
+		{0x4013, 0x01},
+		{0x4014, 0x01},
+		{0x4015, 0x01},
+		{0x4042, 0x00},
+		{0x4043, 0x80},
+		{0x4044, 0x00},
+		{0x4045, 0x80},
+		{0x4046, 0x00},
+		{0x4047, 0x80},
+		{0x4048, 0x00},
+		{0x4049, 0x80},
+		{0x4041, 0x03},
+		{0x404c, 0x20},
+		{0x404d, 0x00},
+		{0x404e, 0x20},
+		{0x4203, 0x80},
+		{0x4307, 0x30},
+		{0x4317, 0x00},
+		{0x4503, 0x08},
+		{0x4601, 0x80},
+		{0x4800, 0x44},
+		{0x4816, 0x53},
+		{0x481b, 0x58},
+		{0x481f, 0x27},
+		{0x4837, 0x0c},
+		{0x483c, 0x0f},
+		{0x484b, 0x05},
+		{0x5000, 0x57},
+		{0x5001, 0x0a},
+		{0x5004, 0x04},
+		{0x502e, 0x03},
+		{0x5030, 0x41},
+		{0x5795, 0x02},
+		{0x5796, 0x20},
+		{0x5797, 0x20},
+		{0x5798, 0xd5},
+		{0x5799, 0xd5},
+		{0x579a, 0x00},
+		{0x579b, 0x50},
+		{0x579c, 0x00},
+		{0x579d, 0x2c},
+		{0x579e, 0x0c},
+		{0x579f, 0x40},
+		{0x57a0, 0x09},
+		{0x57a1, 0x40},
+		{0x5780, 0x14},
+		{0x5781, 0x0f},
+		{0x5782, 0x44},
+		{0x5783, 0x02},
+		{0x5784, 0x01},
+		{0x5785, 0x01},
+		{0x5786, 0x00},
+		{0x5787, 0x04},
+		{0x5788, 0x02},
+		{0x5789, 0x0f},
+		{0x578a, 0xfd},
+		{0x578b, 0xf5},
+		{0x578c, 0xf5},
+		{0x578d, 0x03},
+		{0x578e, 0x08},
+		{0x578f, 0x0c},
+		{0x5790, 0x08},
+		{0x5791, 0x04},
+		{0x5792, 0x00},
+		{0x5793, 0x52},
+		{0x5794, 0xa3},
+		{0x59f8, 0x3d},
+		{0x5a08, 0x02},
+		{0x5b00, 0x02},
+		{0x5b01, 0x10},
+		{0x5b02, 0x03},
+		{0x5b03, 0xcf},
+		{0x5b05, 0x6c},
+		{0x5e00, 0x00}
 };
 
-static const struct ov8856_reg mode_1640x1232_regs[] = {
-	{0x3000, 0x20},
-	{0x3003, 0x08},
-	{0x300e, 0x20},
-	{0x3010, 0x00},
-	{0x3015, 0x84},
-	{0x3018, 0x72},
-	{0x3021, 0x23},
-	{0x3033, 0x24},
-	{0x3500, 0x00},
-	{0x3501, 0x4c},
-	{0x3502, 0xe0},
-	{0x3503, 0x08},
-	{0x3505, 0x83},
-	{0x3508, 0x01},
-	{0x3509, 0x80},
-	{0x350c, 0x00},
-	{0x350d, 0x80},
-	{0x350e, 0x04},
-	{0x350f, 0x00},
-	{0x3510, 0x00},
-	{0x3511, 0x02},
-	{0x3512, 0x00},
-	{0x3600, 0x72},
-	{0x3601, 0x40},
-	{0x3602, 0x30},
-	{0x3610, 0xc5},
-	{0x3611, 0x58},
-	{0x3612, 0x5c},
-	{0x3613, 0xca},
-	{0x3614, 0x20},
-	{0x3628, 0xff},
-	{0x3629, 0xff},
-	{0x362a, 0xff},
-	{0x3633, 0x10},
-	{0x3634, 0x10},
-	{0x3635, 0x10},
-	{0x3636, 0x10},
-	{0x3663, 0x08},
-	{0x3669, 0x34},
-	{0x366e, 0x08},
-	{0x3706, 0x86},
-	{0x370b, 0x7e},
-	{0x3714, 0x27},
-	{0x3730, 0x12},
-	{0x3733, 0x10},
-	{0x3764, 0x00},
-	{0x3765, 0x00},
-	{0x3769, 0x62},
-	{0x376a, 0x2a},
-	{0x376b, 0x30},
-	{0x3780, 0x00},
-	{0x3781, 0x24},
-	{0x3782, 0x00},
-	{0x3783, 0x23},
-	{0x3798, 0x2f},
-	{0x37a1, 0x60},
-	{0x37a8, 0x6a},
-	{0x37ab, 0x3f},
-	{0x37c2, 0x14},
-	{0x37c3, 0xf1},
-	{0x37c9, 0x80},
-	{0x37cb, 0x16},
-	{0x37cc, 0x16},
-	{0x37cd, 0x16},
-	{0x37ce, 0x16},
-	{0x3800, 0x00},
-	{0x3801, 0x00},
-	{0x3802, 0x00},
-	{0x3803, 0x06},
-	{0x3804, 0x0c},
-	{0x3805, 0xdf},
-	{0x3806, 0x09},
-	{0x3807, 0xa7},
-	{0x3808, 0x06},
-	{0x3809, 0x68},
-	{0x380a, 0x04},
-	{0x380b, 0xd0},
-	{0x380c, 0x0e},
-	{0x380d, 0xec},
-	{0x380e, 0x04},
-	{0x380f, 0xe8},
-	{0x3810, 0x00},
-	{0x3811, 0x00},
-	{0x3812, 0x00},
-	{0x3813, 0x01},
-	{0x3814, 0x03},
-	{0x3815, 0x01},
-	{0x3816, 0x00},
-	{0x3817, 0x00},
-	{0x3818, 0x00},
-	{0x3819, 0x10},
-	{0x3820, 0x90},
-	{0x3821, 0x67},
-	{0x382a, 0x03},
-	{0x382b, 0x01},
-	{0x3830, 0x06},
-	{0x3836, 0x02},
-	{0x3862, 0x04},
-	{0x3863, 0x08},
-	{0x3cc0, 0x33},
-	{0x3d85, 0x17},
-	{0x3d8c, 0x73},
-	{0x3d8d, 0xde},
-	{0x4001, 0xe0},
-	{0x4003, 0x40},
-	{0x4008, 0x00},
-	{0x4009, 0x05},
-	{0x400a, 0x00},
-	{0x400b, 0x84},
-	{0x400f, 0x80},
-	{0x4010, 0xf0},
-	{0x4011, 0xff},
-	{0x4012, 0x02},
-	{0x4013, 0x01},
-	{0x4014, 0x01},
-	{0x4015, 0x01},
-	{0x4042, 0x00},
-	{0x4043, 0x80},
-	{0x4044, 0x00},
-	{0x4045, 0x80},
-	{0x4046, 0x00},
-	{0x4047, 0x80},
-	{0x4048, 0x00},
-	{0x4049, 0x80},
-	{0x4041, 0x03},
-	{0x404c, 0x20},
-	{0x404d, 0x00},
-	{0x404e, 0x20},
-	{0x4203, 0x80},
-	{0x4307, 0x30},
-	{0x4317, 0x00},
-	{0x4503, 0x08},
-	{0x4601, 0x80},
-	{0x4800, 0x44},
-	{0x4816, 0x53},
-	{0x481b, 0x58},
-	{0x481f, 0x27},
-	{0x4837, 0x16},
-	{0x483c, 0x0f},
-	{0x484b, 0x05},
-	{0x5000, 0x57},
-	{0x5001, 0x0a},
-	{0x5004, 0x04},
-	{0x502e, 0x03},
-	{0x5030, 0x41},
-	{0x5780, 0x14},
-	{0x5781, 0x0f},
-	{0x5782, 0x44},
-	{0x5783, 0x02},
-	{0x5784, 0x01},
-	{0x5785, 0x01},
-	{0x5786, 0x00},
-	{0x5787, 0x04},
-	{0x5788, 0x02},
-	{0x5789, 0x0f},
-	{0x578a, 0xfd},
-	{0x578b, 0xf5},
-	{0x578c, 0xf5},
-	{0x578d, 0x03},
-	{0x578e, 0x08},
-	{0x578f, 0x0c},
-	{0x5790, 0x08},
-	{0x5791, 0x04},
-	{0x5792, 0x00},
-	{0x5793, 0x52},
-	{0x5794, 0xa3},
-	{0x5795, 0x00},
-	{0x5796, 0x10},
-	{0x5797, 0x10},
-	{0x5798, 0x73},
-	{0x5799, 0x73},
-	{0x579a, 0x00},
-	{0x579b, 0x28},
-	{0x579c, 0x00},
-	{0x579d, 0x16},
-	{0x579e, 0x06},
-	{0x579f, 0x20},
-	{0x57a0, 0x04},
-	{0x57a1, 0xa0},
-	{0x59f8, 0x3d},
-	{0x5a08, 0x02},
-	{0x5b00, 0x02},
-	{0x5b01, 0x10},
-	{0x5b02, 0x03},
-	{0x5b03, 0xcf},
-	{0x5b05, 0x6c},
-	{0x5e00, 0x00}
+static const struct ov8856_reg lane_2_mode_1640x1232[] = {
+	/* 1640x1232 resolution */
+		{0x3000, 0x20},
+		{0x3003, 0x08},
+		{0x300e, 0x20},
+		{0x3010, 0x00},
+		{0x3015, 0x84},
+		{0x3018, 0x32},
+		{0x3021, 0x23},
+		{0x3033, 0x24},
+		{0x3500, 0x00},
+		{0x3501, 0x4c},
+		{0x3502, 0xe0},
+		{0x3503, 0x08},
+		{0x3505, 0x83},
+		{0x3508, 0x01},
+		{0x3509, 0x80},
+		{0x350c, 0x00},
+		{0x350d, 0x80},
+		{0x350e, 0x04},
+		{0x350f, 0x00},
+		{0x3510, 0x00},
+		{0x3511, 0x02},
+		{0x3512, 0x00},
+		{0x3600, 0x72},
+		{0x3601, 0x40},
+		{0x3602, 0x30},
+		{0x3610, 0xc5},
+		{0x3611, 0x58},
+		{0x3612, 0x5c},
+		{0x3613, 0xca},
+		{0x3614, 0x50},
+		{0x3628, 0xff},
+		{0x3629, 0xff},
+		{0x362a, 0xff},
+		{0x3633, 0x10},
+		{0x3634, 0x10},
+		{0x3635, 0x10},
+		{0x3636, 0x10},
+		{0x3663, 0x08},
+		{0x3669, 0x34},
+		{0x366e, 0x08},
+		{0x3706, 0x86},
+		{0x370b, 0x7e},
+		{0x3714, 0x27},
+		{0x3730, 0x12},
+		{0x3733, 0x10},
+		{0x3764, 0x00},
+		{0x3765, 0x00},
+		{0x3769, 0x62},
+		{0x376a, 0x2a},
+		{0x376b, 0x30},
+		{0x3780, 0x00},
+		{0x3781, 0x24},
+		{0x3782, 0x00},
+		{0x3783, 0x23},
+		{0x3798, 0x2f},
+		{0x37a1, 0x60},
+		{0x37a8, 0x6a},
+		{0x37ab, 0x3f},
+		{0x37c2, 0x14},
+		{0x37c3, 0xf1},
+		{0x37c9, 0x80},
+		{0x37cb, 0x16},
+		{0x37cc, 0x16},
+		{0x37cd, 0x16},
+		{0x37ce, 0x16},
+		{0x3800, 0x00},
+		{0x3801, 0x00},
+		{0x3802, 0x00},
+		{0x3803, 0x00},
+		{0x3804, 0x0c},
+		{0x3805, 0xdf},
+		{0x3806, 0x09},
+		{0x3807, 0xaf},
+		{0x3808, 0x06},
+		{0x3809, 0x68},
+		{0x380a, 0x04},
+		{0x380b, 0xd0},
+		{0x380c, 0x0c},
+		{0x380d, 0x60},
+		{0x380e, 0x05},
+		{0x380f, 0xea},
+		{0x3810, 0x00},
+		{0x3811, 0x04},
+		{0x3812, 0x00},
+		{0x3813, 0x05},
+		{0x3814, 0x03},
+		{0x3815, 0x01},
+		{0x3816, 0x00},
+		{0x3817, 0x00},
+		{0x3818, 0x00},
+		{0x3819, 0x00},
+		{0x3820, 0x90},
+		{0x3821, 0x67},
+		{0x382a, 0x03},
+		{0x382b, 0x01},
+		{0x3830, 0x06},
+		{0x3836, 0x02},
+		{0x3837, 0x10},
+		{0x3862, 0x04},
+		{0x3863, 0x08},
+		{0x3cc0, 0x33},
+		{0x3d85, 0x14},
+		{0x3d8c, 0x73},
+		{0x3d8d, 0xde},
+		{0x4001, 0xe0},
+		{0x4003, 0x40},
+		{0x4008, 0x00},
+		{0x4009, 0x05},
+		{0x400a, 0x00},
+		{0x400b, 0x84},
+		{0x400f, 0x80},
+		{0x4010, 0xf0},
+		{0x4011, 0xff},
+		{0x4012, 0x02},
+		{0x4013, 0x01},
+		{0x4014, 0x01},
+		{0x4015, 0x01},
+		{0x4042, 0x00},
+		{0x4043, 0x80},
+		{0x4044, 0x00},
+		{0x4045, 0x80},
+		{0x4046, 0x00},
+		{0x4047, 0x80},
+		{0x4048, 0x00},
+		{0x4049, 0x80},
+		{0x4041, 0x03},
+		{0x404c, 0x20},
+		{0x404d, 0x00},
+		{0x404e, 0x20},
+		{0x4203, 0x80},
+		{0x4307, 0x30},
+		{0x4317, 0x00},
+		{0x4503, 0x08},
+		{0x4601, 0x80},
+		{0x4800, 0x44},
+		{0x4816, 0x53},
+		{0x481b, 0x58},
+		{0x481f, 0x27},
+		{0x4837, 0x16},
+		{0x483c, 0x0f},
+		{0x484b, 0x05},
+		{0x5000, 0x57},
+		{0x5001, 0x0a},
+		{0x5004, 0x04},
+		{0x502e, 0x03},
+		{0x5030, 0x41},
+		{0x5795, 0x00},
+		{0x5796, 0x10},
+		{0x5797, 0x10},
+		{0x5798, 0x73},
+		{0x5799, 0x73},
+		{0x579a, 0x00},
+		{0x579b, 0x28},
+		{0x579c, 0x00},
+		{0x579d, 0x16},
+		{0x579e, 0x06},
+		{0x579f, 0x20},
+		{0x57a0, 0x04},
+		{0x57a1, 0xa0},
+		{0x5780, 0x14},
+		{0x5781, 0x0f},
+		{0x5782, 0x44},
+		{0x5783, 0x02},
+		{0x5784, 0x01},
+		{0x5785, 0x01},
+		{0x5786, 0x00},
+		{0x5787, 0x04},
+		{0x5788, 0x02},
+		{0x5789, 0x0f},
+		{0x578a, 0xfd},
+		{0x578b, 0xf5},
+		{0x578c, 0xf5},
+		{0x578d, 0x03},
+		{0x578e, 0x08},
+		{0x578f, 0x0c},
+		{0x5790, 0x08},
+		{0x5791, 0x04},
+		{0x5792, 0x00},
+		{0x5793, 0x52},
+		{0x5794, 0xa3},
+		{0x59f8, 0x3d},
+		{0x5a08, 0x02},
+		{0x5b00, 0x02},
+		{0x5b01, 0x10},
+		{0x5b02, 0x03},
+		{0x5b03, 0xcf},
+		{0x5b05, 0x6c},
+		{0x5e00, 0x00}
 };
 
-static const struct ov8856_reg mode_1632x1224_regs[] = {
-	{0x0103, 0x01},
-	{0x0302, 0x3c},
-	{0x0303, 0x01},
-	{0x031e, 0x0c},
-	{0x3000, 0x20},
-	{0x3003, 0x08},
-	{0x300e, 0x20},
-	{0x3010, 0x00},
-	{0x3015, 0x84},
-	{0x3018, 0x72},
-	{0x3021, 0x23},
-	{0x3033, 0x24},
-	{0x3500, 0x00},
-	{0x3501, 0x4c},
-	{0x3502, 0xe0},
-	{0x3503, 0x08},
-	{0x3505, 0x83},
-	{0x3508, 0x01},
-	{0x3509, 0x80},
-	{0x350c, 0x00},
-	{0x350d, 0x80},
-	{0x350e, 0x04},
-	{0x350f, 0x00},
-	{0x3510, 0x00},
-	{0x3511, 0x02},
-	{0x3512, 0x00},
-	{0x3600, 0x72},
-	{0x3601, 0x40},
-	{0x3602, 0x30},
-	{0x3610, 0xc5},
-	{0x3611, 0x58},
-	{0x3612, 0x5c},
-	{0x3613, 0xca},
-	{0x3614, 0x60},
-	{0x3628, 0xff},
-	{0x3629, 0xff},
-	{0x362a, 0xff},
-	{0x3633, 0x10},
-	{0x3634, 0x10},
-	{0x3635, 0x10},
-	{0x3636, 0x10},
-	{0x3663, 0x08},
-	{0x3669, 0x34},
-	{0x366d, 0x00},
-	{0x366e, 0x08},
-	{0x3706, 0x86},
-	{0x370b, 0x7e},
-	{0x3714, 0x27},
-	{0x3730, 0x12},
-	{0x3733, 0x10},
-	{0x3764, 0x00},
-	{0x3765, 0x00},
-	{0x3769, 0x62},
-	{0x376a, 0x2a},
-	{0x376b, 0x30},
-	{0x3780, 0x00},
-	{0x3781, 0x24},
-	{0x3782, 0x00},
-	{0x3783, 0x23},
-	{0x3798, 0x2f},
-	{0x37a1, 0x60},
-	{0x37a8, 0x6a},
-	{0x37ab, 0x3f},
-	{0x37c2, 0x14},
-	{0x37c3, 0xf1},
-	{0x37c9, 0x80},
-	{0x37cb, 0x16},
-	{0x37cc, 0x16},
-	{0x37cd, 0x16},
-	{0x37ce, 0x16},
-	{0x3800, 0x00},
-	{0x3801, 0x00},
-	{0x3802, 0x00},
-	{0x3803, 0x0c},
-	{0x3804, 0x0c},
-	{0x3805, 0xdf},
-	{0x3806, 0x09},
-	{0x3807, 0xa3},
-	{0x3808, 0x06},
-	{0x3809, 0x60},
-	{0x380a, 0x04},
-	{0x380b, 0xc8},
-	{0x380c, 0x07},
-	{0x380d, 0x8c},
-	{0x380e, 0x09},
-	{0x380f, 0xb2},
-	{0x3810, 0x00},
-	{0x3811, 0x02},
-	{0x3812, 0x00},
-	{0x3813, 0x01},
-	{0x3814, 0x03},
-	{0x3815, 0x01},
-	{0x3816, 0x00},
-	{0x3817, 0x00},
-	{0x3818, 0x00},
-	{0x3819, 0x10},
-	{0x3820, 0x80},
-	{0x3821, 0x47},
-	{0x382a, 0x03},
-	{0x382b, 0x01},
-	{0x3830, 0x06},
-	{0x3836, 0x02},
-	{0x3862, 0x04},
-	{0x3863, 0x08},
-	{0x3cc0, 0x33},
-	{0x3d85, 0x17},
-	{0x3d8c, 0x73},
-	{0x3d8d, 0xde},
-	{0x4001, 0xe0},
-	{0x4003, 0x40},
-	{0x4008, 0x00},
-	{0x4009, 0x05},
-	{0x400a, 0x00},
-	{0x400b, 0x84},
-	{0x400f, 0x80},
-	{0x4010, 0xf0},
-	{0x4011, 0xff},
-	{0x4012, 0x02},
-	{0x4013, 0x01},
-	{0x4014, 0x01},
-	{0x4015, 0x01},
-	{0x4042, 0x00},
-	{0x4043, 0x80},
-	{0x4044, 0x00},
-	{0x4045, 0x80},
-	{0x4046, 0x00},
-	{0x4047, 0x80},
-	{0x4048, 0x00},
-	{0x4049, 0x80},
-	{0x4041, 0x03},
-	{0x404c, 0x20},
-	{0x404d, 0x00},
-	{0x404e, 0x20},
-	{0x4203, 0x80},
-	{0x4307, 0x30},
-	{0x4317, 0x00},
-	{0x4502, 0x50},
-	{0x4503, 0x08},
-	{0x4601, 0x80},
-	{0x4800, 0x44},
-	{0x4816, 0x53},
-	{0x481b, 0x50},
-	{0x481f, 0x27},
-	{0x4823, 0x3c},
-	{0x482b, 0x00},
-	{0x4831, 0x66},
-	{0x4837, 0x16},
-	{0x483c, 0x0f},
-	{0x484b, 0x05},
-	{0x5000, 0x77},
-	{0x5001, 0x0a},
-	{0x5003, 0xc8},
-	{0x5004, 0x04},
-	{0x5006, 0x00},
-	{0x5007, 0x00},
-	{0x502e, 0x03},
-	{0x5030, 0x41},
-	{0x5795, 0x00},
-	{0x5796, 0x10},
-	{0x5797, 0x10},
-	{0x5798, 0x73},
-	{0x5799, 0x73},
-	{0x579a, 0x00},
-	{0x579b, 0x28},
-	{0x579c, 0x00},
-	{0x579d, 0x16},
-	{0x579e, 0x06},
-	{0x579f, 0x20},
-	{0x57a0, 0x04},
-	{0x57a1, 0xa0},
-	{0x5780, 0x14},
-	{0x5781, 0x0f},
-	{0x5782, 0x44},
-	{0x5783, 0x02},
-	{0x5784, 0x01},
-	{0x5785, 0x01},
-	{0x5786, 0x00},
-	{0x5787, 0x04},
-	{0x5788, 0x02},
-	{0x5789, 0x0f},
-	{0x578a, 0xfd},
-	{0x578b, 0xf5},
-	{0x578c, 0xf5},
-	{0x578d, 0x03},
-	{0x578e, 0x08},
-	{0x578f, 0x0c},
-	{0x5790, 0x08},
-	{0x5791, 0x04},
-	{0x5792, 0x00},
-	{0x5793, 0x52},
-	{0x5794, 0xa3},
-	{0x59f8, 0x3d},
-	{0x5a08, 0x02},
-	{0x5b00, 0x02},
-	{0x5b01, 0x10},
-	{0x5b02, 0x03},
-	{0x5b03, 0xcf},
-	{0x5b05, 0x6c},
-	{0x5e00, 0x00},
-	{0x5e10, 0xfc}
+static const struct ov8856_reg lane_4_mode_3280x2464[] = {
+	/* 3280x2464 resolution */
+		{0x3000, 0x20},
+		{0x3003, 0x08},
+		{0x300e, 0x20},
+		{0x3010, 0x00},
+		{0x3015, 0x84},
+		{0x3018, 0x72},
+		{0x3021, 0x23},
+		{0x3033, 0x24},
+		{0x3500, 0x00},
+		{0x3501, 0x9a},
+		{0x3502, 0x20},
+		{0x3503, 0x08},
+		{0x3505, 0x83},
+		{0x3508, 0x01},
+		{0x3509, 0x80},
+		{0x350c, 0x00},
+		{0x350d, 0x80},
+		{0x350e, 0x04},
+		{0x350f, 0x00},
+		{0x3510, 0x00},
+		{0x3511, 0x02},
+		{0x3512, 0x00},
+		{0x3600, 0x72},
+		{0x3601, 0x40},
+		{0x3602, 0x30},
+		{0x3610, 0xc5},
+		{0x3611, 0x58},
+		{0x3612, 0x5c},
+		{0x3613, 0xca},
+		{0x3614, 0x20},
+		{0x3628, 0xff},
+		{0x3629, 0xff},
+		{0x362a, 0xff},
+		{0x3633, 0x10},
+		{0x3634, 0x10},
+		{0x3635, 0x10},
+		{0x3636, 0x10},
+		{0x3663, 0x08},
+		{0x3669, 0x34},
+		{0x366e, 0x10},
+		{0x3706, 0x86},
+		{0x370b, 0x7e},
+		{0x3714, 0x23},
+		{0x3730, 0x12},
+		{0x3733, 0x10},
+		{0x3764, 0x00},
+		{0x3765, 0x00},
+		{0x3769, 0x62},
+		{0x376a, 0x2a},
+		{0x376b, 0x30},
+		{0x3780, 0x00},
+		{0x3781, 0x24},
+		{0x3782, 0x00},
+		{0x3783, 0x23},
+		{0x3798, 0x2f},
+		{0x37a1, 0x60},
+		{0x37a8, 0x6a},
+		{0x37ab, 0x3f},
+		{0x37c2, 0x04},
+		{0x37c3, 0xf1},
+		{0x37c9, 0x80},
+		{0x37cb, 0x16},
+		{0x37cc, 0x16},
+		{0x37cd, 0x16},
+		{0x37ce, 0x16},
+		{0x3800, 0x00},
+		{0x3801, 0x00},
+		{0x3802, 0x00},
+		{0x3803, 0x06},
+		{0x3804, 0x0c},
+		{0x3805, 0xdf},
+		{0x3806, 0x09},
+		{0x3807, 0xa7},
+		{0x3808, 0x0c},
+		{0x3809, 0xd0},
+		{0x380a, 0x09},
+		{0x380b, 0xa0},
+		{0x380c, 0x07},
+		{0x380d, 0x88},
+		{0x380e, 0x09},
+		{0x380f, 0xb8},
+		{0x3810, 0x00},
+		{0x3811, 0x00},
+		{0x3812, 0x00},
+		{0x3813, 0x01},
+		{0x3814, 0x01},
+		{0x3815, 0x01},
+		{0x3816, 0x00},
+		{0x3817, 0x00},
+		{0x3818, 0x00},
+		{0x3819, 0x10},
+		{0x3820, 0x80},
+		{0x3821, 0x46},
+		{0x382a, 0x01},
+		{0x382b, 0x01},
+		{0x3830, 0x06},
+		{0x3836, 0x02},
+		{0x3862, 0x04},
+		{0x3863, 0x08},
+		{0x3cc0, 0x33},
+		{0x3d85, 0x17},
+		{0x3d8c, 0x73},
+		{0x3d8d, 0xde},
+		{0x4001, 0xe0},
+		{0x4003, 0x40},
+		{0x4008, 0x00},
+		{0x4009, 0x0b},
+		{0x400a, 0x00},
+		{0x400b, 0x84},
+		{0x400f, 0x80},
+		{0x4010, 0xf0},
+		{0x4011, 0xff},
+		{0x4012, 0x02},
+		{0x4013, 0x01},
+		{0x4014, 0x01},
+		{0x4015, 0x01},
+		{0x4042, 0x00},
+		{0x4043, 0x80},
+		{0x4044, 0x00},
+		{0x4045, 0x80},
+		{0x4046, 0x00},
+		{0x4047, 0x80},
+		{0x4048, 0x00},
+		{0x4049, 0x80},
+		{0x4041, 0x03},
+		{0x404c, 0x20},
+		{0x404d, 0x00},
+		{0x404e, 0x20},
+		{0x4203, 0x80},
+		{0x4307, 0x30},
+		{0x4317, 0x00},
+		{0x4503, 0x08},
+		{0x4601, 0x80},
+		{0x4800, 0x44},
+		{0x4816, 0x53},
+		{0x481b, 0x58},
+		{0x481f, 0x27},
+		{0x4837, 0x16},
+		{0x483c, 0x0f},
+		{0x484b, 0x05},
+		{0x5000, 0x57},
+		{0x5001, 0x0a},
+		{0x5004, 0x04},
+		{0x502e, 0x03},
+		{0x5030, 0x41},
+		{0x5780, 0x14},
+		{0x5781, 0x0f},
+		{0x5782, 0x44},
+		{0x5783, 0x02},
+		{0x5784, 0x01},
+		{0x5785, 0x01},
+		{0x5786, 0x00},
+		{0x5787, 0x04},
+		{0x5788, 0x02},
+		{0x5789, 0x0f},
+		{0x578a, 0xfd},
+		{0x578b, 0xf5},
+		{0x578c, 0xf5},
+		{0x578d, 0x03},
+		{0x578e, 0x08},
+		{0x578f, 0x0c},
+		{0x5790, 0x08},
+		{0x5791, 0x04},
+		{0x5792, 0x00},
+		{0x5793, 0x52},
+		{0x5794, 0xa3},
+		{0x5795, 0x02},
+		{0x5796, 0x20},
+		{0x5797, 0x20},
+		{0x5798, 0xd5},
+		{0x5799, 0xd5},
+		{0x579a, 0x00},
+		{0x579b, 0x50},
+		{0x579c, 0x00},
+		{0x579d, 0x2c},
+		{0x579e, 0x0c},
+		{0x579f, 0x40},
+		{0x57a0, 0x09},
+		{0x57a1, 0x40},
+		{0x59f8, 0x3d},
+		{0x5a08, 0x02},
+		{0x5b00, 0x02},
+		{0x5b01, 0x10},
+		{0x5b02, 0x03},
+		{0x5b03, 0xcf},
+		{0x5b05, 0x6c},
+		{0x5e00, 0x00}
 };
 
-static const char * const ov8856_test_pattern_menu[] = {
-	"Disabled",
-	"Standard Color Bar",
-	"Top-Bottom Darker Color Bar",
-	"Right-Left Darker Color Bar",
-	"Bottom-Top Darker Color Bar"
+static const struct ov8856_reg lane_4_mode_1640x1232[] = {
+	/* 1640x1232 resolution */
+		{0x3000, 0x20},
+		{0x3003, 0x08},
+		{0x300e, 0x20},
+		{0x3010, 0x00},
+		{0x3015, 0x84},
+		{0x3018, 0x72},
+		{0x3021, 0x23},
+		{0x3033, 0x24},
+		{0x3500, 0x00},
+		{0x3501, 0x4c},
+		{0x3502, 0xe0},
+		{0x3503, 0x08},
+		{0x3505, 0x83},
+		{0x3508, 0x01},
+		{0x3509, 0x80},
+		{0x350c, 0x00},
+		{0x350d, 0x80},
+		{0x350e, 0x04},
+		{0x350f, 0x00},
+		{0x3510, 0x00},
+		{0x3511, 0x02},
+		{0x3512, 0x00},
+		{0x3600, 0x72},
+		{0x3601, 0x40},
+		{0x3602, 0x30},
+		{0x3610, 0xc5},
+		{0x3611, 0x58},
+		{0x3612, 0x5c},
+		{0x3613, 0xca},
+		{0x3614, 0x20},
+		{0x3628, 0xff},
+		{0x3629, 0xff},
+		{0x362a, 0xff},
+		{0x3633, 0x10},
+		{0x3634, 0x10},
+		{0x3635, 0x10},
+		{0x3636, 0x10},
+		{0x3663, 0x08},
+		{0x3669, 0x34},
+		{0x366e, 0x08},
+		{0x3706, 0x86},
+		{0x370b, 0x7e},
+		{0x3714, 0x27},
+		{0x3730, 0x12},
+		{0x3733, 0x10},
+		{0x3764, 0x00},
+		{0x3765, 0x00},
+		{0x3769, 0x62},
+		{0x376a, 0x2a},
+		{0x376b, 0x30},
+		{0x3780, 0x00},
+		{0x3781, 0x24},
+		{0x3782, 0x00},
+		{0x3783, 0x23},
+		{0x3798, 0x2f},
+		{0x37a1, 0x60},
+		{0x37a8, 0x6a},
+		{0x37ab, 0x3f},
+		{0x37c2, 0x14},
+		{0x37c3, 0xf1},
+		{0x37c9, 0x80},
+		{0x37cb, 0x16},
+		{0x37cc, 0x16},
+		{0x37cd, 0x16},
+		{0x37ce, 0x16},
+		{0x3800, 0x00},
+		{0x3801, 0x00},
+		{0x3802, 0x00},
+		{0x3803, 0x00},
+		{0x3804, 0x0c},
+		{0x3805, 0xdf},
+		{0x3806, 0x09},
+		{0x3807, 0xaf},
+		{0x3808, 0x06},
+		{0x3809, 0x68},
+		{0x380a, 0x04},
+		{0x380b, 0xd0},
+		{0x380c, 0x0e},
+		{0x380d, 0xec},
+		{0x380e, 0x04},
+		{0x380f, 0xe8},
+		{0x3810, 0x00},
+		{0x3811, 0x04},
+		{0x3812, 0x00},
+		{0x3813, 0x05},
+		{0x3814, 0x03},
+		{0x3815, 0x01},
+		{0x3816, 0x00},
+		{0x3817, 0x00},
+		{0x3818, 0x00},
+		{0x3819, 0x10},
+		{0x3820, 0x90},
+		{0x3821, 0x67},
+		{0x382a, 0x03},
+		{0x382b, 0x01},
+		{0x3830, 0x06},
+		{0x3836, 0x02},
+		{0x3862, 0x04},
+		{0x3863, 0x08},
+		{0x3cc0, 0x33},
+		{0x3d85, 0x17},
+		{0x3d8c, 0x73},
+		{0x3d8d, 0xde},
+		{0x4001, 0xe0},
+		{0x4003, 0x40},
+		{0x4008, 0x00},
+		{0x4009, 0x05},
+		{0x400a, 0x00},
+		{0x400b, 0x84},
+		{0x400f, 0x80},
+		{0x4010, 0xf0},
+		{0x4011, 0xff},
+		{0x4012, 0x02},
+		{0x4013, 0x01},
+		{0x4014, 0x01},
+		{0x4015, 0x01},
+		{0x4042, 0x00},
+		{0x4043, 0x80},
+		{0x4044, 0x00},
+		{0x4045, 0x80},
+		{0x4046, 0x00},
+		{0x4047, 0x80},
+		{0x4048, 0x00},
+		{0x4049, 0x80},
+		{0x4041, 0x03},
+		{0x404c, 0x20},
+		{0x404d, 0x00},
+		{0x404e, 0x20},
+		{0x4203, 0x80},
+		{0x4307, 0x30},
+		{0x4317, 0x00},
+		{0x4503, 0x08},
+		{0x4601, 0x80},
+		{0x4800, 0x44},
+		{0x4816, 0x53},
+		{0x481b, 0x58},
+		{0x481f, 0x27},
+		{0x4837, 0x16},
+		{0x483c, 0x0f},
+		{0x484b, 0x05},
+		{0x5000, 0x57},
+		{0x5001, 0x0a},
+		{0x5004, 0x04},
+		{0x502e, 0x03},
+		{0x5030, 0x41},
+		{0x5780, 0x14},
+		{0x5781, 0x0f},
+		{0x5782, 0x44},
+		{0x5783, 0x02},
+		{0x5784, 0x01},
+		{0x5785, 0x01},
+		{0x5786, 0x00},
+		{0x5787, 0x04},
+		{0x5788, 0x02},
+		{0x5789, 0x0f},
+		{0x578a, 0xfd},
+		{0x578b, 0xf5},
+		{0x578c, 0xf5},
+		{0x578d, 0x03},
+		{0x578e, 0x08},
+		{0x578f, 0x0c},
+		{0x5790, 0x08},
+		{0x5791, 0x04},
+		{0x5792, 0x00},
+		{0x5793, 0x52},
+		{0x5794, 0xa3},
+		{0x5795, 0x00},
+		{0x5796, 0x10},
+		{0x5797, 0x10},
+		{0x5798, 0x73},
+		{0x5799, 0x73},
+		{0x579a, 0x00},
+		{0x579b, 0x28},
+		{0x579c, 0x00},
+		{0x579d, 0x16},
+		{0x579e, 0x06},
+		{0x579f, 0x20},
+		{0x57a0, 0x04},
+		{0x57a1, 0xa0},
+		{0x59f8, 0x3d},
+		{0x5a08, 0x02},
+		{0x5b00, 0x02},
+		{0x5b01, 0x10},
+		{0x5b02, 0x03},
+		{0x5b03, 0xcf},
+		{0x5b05, 0x6c},
+		{0x5e00, 0x00}
 };
 
-static const s64 link_freq_menu_items[] = {
-	OV8856_LINK_FREQ_360MHZ,
-	OV8856_LINK_FREQ_180MHZ
+static const struct ov8856_reg lane_4_mode_3264x2448[] = {
+	/* 3264x2448 resolution */
+		{0x0103, 0x01},
+		{0x0302, 0x3c},
+		{0x0303, 0x01},
+		{0x031e, 0x0c},
+		{0x3000, 0x20},
+		{0x3003, 0x08},
+		{0x300e, 0x20},
+		{0x3010, 0x00},
+		{0x3015, 0x84},
+		{0x3018, 0x72},
+		{0x3021, 0x23},
+		{0x3033, 0x24},
+		{0x3500, 0x00},
+		{0x3501, 0x9a},
+		{0x3502, 0x20},
+		{0x3503, 0x08},
+		{0x3505, 0x83},
+		{0x3508, 0x01},
+		{0x3509, 0x80},
+		{0x350c, 0x00},
+		{0x350d, 0x80},
+		{0x350e, 0x04},
+		{0x350f, 0x00},
+		{0x3510, 0x00},
+		{0x3511, 0x02},
+		{0x3512, 0x00},
+		{0x3600, 0x72},
+		{0x3601, 0x40},
+		{0x3602, 0x30},
+		{0x3610, 0xc5},
+		{0x3611, 0x58},
+		{0x3612, 0x5c},
+		{0x3613, 0xca},
+		{0x3614, 0x60},
+		{0x3628, 0xff},
+		{0x3629, 0xff},
+		{0x362a, 0xff},
+		{0x3633, 0x10},
+		{0x3634, 0x10},
+		{0x3635, 0x10},
+		{0x3636, 0x10},
+		{0x3663, 0x08},
+		{0x3669, 0x34},
+		{0x366d, 0x00},
+		{0x366e, 0x10},
+		{0x3706, 0x86},
+		{0x370b, 0x7e},
+		{0x3714, 0x23},
+		{0x3730, 0x12},
+		{0x3733, 0x10},
+		{0x3764, 0x00},
+		{0x3765, 0x00},
+		{0x3769, 0x62},
+		{0x376a, 0x2a},
+		{0x376b, 0x30},
+		{0x3780, 0x00},
+		{0x3781, 0x24},
+		{0x3782, 0x00},
+		{0x3783, 0x23},
+		{0x3798, 0x2f},
+		{0x37a1, 0x60},
+		{0x37a8, 0x6a},
+		{0x37ab, 0x3f},
+		{0x37c2, 0x04},
+		{0x37c3, 0xf1},
+		{0x37c9, 0x80},
+		{0x37cb, 0x16},
+		{0x37cc, 0x16},
+		{0x37cd, 0x16},
+		{0x37ce, 0x16},
+		{0x3800, 0x00},
+		{0x3801, 0x00},
+		{0x3802, 0x00},
+		{0x3803, 0x0c},
+		{0x3804, 0x0c},
+		{0x3805, 0xdf},
+		{0x3806, 0x09},
+		{0x3807, 0xa3},
+		{0x3808, 0x0c},
+		{0x3809, 0xc0},
+		{0x380a, 0x09},
+		{0x380b, 0x90},
+		{0x380c, 0x07},
+		{0x380d, 0x8c},
+		{0x380e, 0x09},
+		{0x380f, 0xb2},
+		{0x3810, 0x00},
+		{0x3811, 0x04},
+		{0x3812, 0x00},
+		{0x3813, 0x01},
+		{0x3814, 0x01},
+		{0x3815, 0x01},
+		{0x3816, 0x00},
+		{0x3817, 0x00},
+		{0x3818, 0x00},
+		{0x3819, 0x10},
+		{0x3820, 0x80},
+		{0x3821, 0x46},
+		{0x382a, 0x01},
+		{0x382b, 0x01},
+		{0x3830, 0x06},
+		{0x3836, 0x02},
+		{0x3862, 0x04},
+		{0x3863, 0x08},
+		{0x3cc0, 0x33},
+		{0x3d85, 0x17},
+		{0x3d8c, 0x73},
+		{0x3d8d, 0xde},
+		{0x4001, 0xe0},
+		{0x4003, 0x40},
+		{0x4008, 0x00},
+		{0x4009, 0x0b},
+		{0x400a, 0x00},
+		{0x400b, 0x84},
+		{0x400f, 0x80},
+		{0x4010, 0xf0},
+		{0x4011, 0xff},
+		{0x4012, 0x02},
+		{0x4013, 0x01},
+		{0x4014, 0x01},
+		{0x4015, 0x01},
+		{0x4042, 0x00},
+		{0x4043, 0x80},
+		{0x4044, 0x00},
+		{0x4045, 0x80},
+		{0x4046, 0x00},
+		{0x4047, 0x80},
+		{0x4048, 0x00},
+		{0x4049, 0x80},
+		{0x4041, 0x03},
+		{0x404c, 0x20},
+		{0x404d, 0x00},
+		{0x404e, 0x20},
+		{0x4203, 0x80},
+		{0x4307, 0x30},
+		{0x4317, 0x00},
+		{0x4502, 0x50},
+		{0x4503, 0x08},
+		{0x4601, 0x80},
+		{0x4800, 0x44},
+		{0x4816, 0x53},
+		{0x481b, 0x50},
+		{0x481f, 0x27},
+		{0x4823, 0x3c},
+		{0x482b, 0x00},
+		{0x4831, 0x66},
+		{0x4837, 0x16},
+		{0x483c, 0x0f},
+		{0x484b, 0x05},
+		{0x5000, 0x77},
+		{0x5001, 0x0a},
+		{0x5003, 0xc8},
+		{0x5004, 0x04},
+		{0x5006, 0x00},
+		{0x5007, 0x00},
+		{0x502e, 0x03},
+		{0x5030, 0x41},
+		{0x5780, 0x14},
+		{0x5781, 0x0f},
+		{0x5782, 0x44},
+		{0x5783, 0x02},
+		{0x5784, 0x01},
+		{0x5785, 0x01},
+		{0x5786, 0x00},
+		{0x5787, 0x04},
+		{0x5788, 0x02},
+		{0x5789, 0x0f},
+		{0x578a, 0xfd},
+		{0x578b, 0xf5},
+		{0x578c, 0xf5},
+		{0x578d, 0x03},
+		{0x578e, 0x08},
+		{0x578f, 0x0c},
+		{0x5790, 0x08},
+		{0x5791, 0x04},
+		{0x5792, 0x00},
+		{0x5793, 0x52},
+		{0x5794, 0xa3},
+		{0x5795, 0x02},
+		{0x5796, 0x20},
+		{0x5797, 0x20},
+		{0x5798, 0xd5},
+		{0x5799, 0xd5},
+		{0x579a, 0x00},
+		{0x579b, 0x50},
+		{0x579c, 0x00},
+		{0x579d, 0x2c},
+		{0x579e, 0x0c},
+		{0x579f, 0x40},
+		{0x57a0, 0x09},
+		{0x57a1, 0x40},
+		{0x59f8, 0x3d},
+		{0x5a08, 0x02},
+		{0x5b00, 0x02},
+		{0x5b01, 0x10},
+		{0x5b02, 0x03},
+		{0x5b03, 0xcf},
+		{0x5b05, 0x6c},
+		{0x5e00, 0x00},
+		{0x5e10, 0xfc}
 };
 
-static const struct ov8856_link_freq_config link_freq_configs[] = {
-	[OV8856_LINK_FREQ_720MBPS] = {
-		.reg_list = {
-			.num_of_regs = ARRAY_SIZE(mipi_data_rate_720mbps),
-			.regs = mipi_data_rate_720mbps,
-		}
-	},
-	[OV8856_LINK_FREQ_360MBPS] = {
-		.reg_list = {
-			.num_of_regs = ARRAY_SIZE(mipi_data_rate_360mbps),
-			.regs = mipi_data_rate_360mbps,
-		}
-	}
+static const struct ov8856_reg lane_4_mode_1632x1224[] = {
+	/* 1632x1224 resolution */
+		{0x0103, 0x01},
+		{0x0302, 0x3c},
+		{0x0303, 0x01},
+		{0x031e, 0x0c},
+		{0x3000, 0x20},
+		{0x3003, 0x08},
+		{0x300e, 0x20},
+		{0x3010, 0x00},
+		{0x3015, 0x84},
+		{0x3018, 0x72},
+		{0x3021, 0x23},
+		{0x3033, 0x24},
+		{0x3500, 0x00},
+		{0x3501, 0x4c},
+		{0x3502, 0xe0},
+		{0x3503, 0x08},
+		{0x3505, 0x83},
+		{0x3508, 0x01},
+		{0x3509, 0x80},
+		{0x350c, 0x00},
+		{0x350d, 0x80},
+		{0x350e, 0x04},
+		{0x350f, 0x00},
+		{0x3510, 0x00},
+		{0x3511, 0x02},
+		{0x3512, 0x00},
+		{0x3600, 0x72},
+		{0x3601, 0x40},
+		{0x3602, 0x30},
+		{0x3610, 0xc5},
+		{0x3611, 0x58},
+		{0x3612, 0x5c},
+		{0x3613, 0xca},
+		{0x3614, 0x60},
+		{0x3628, 0xff},
+		{0x3629, 0xff},
+		{0x362a, 0xff},
+		{0x3633, 0x10},
+		{0x3634, 0x10},
+		{0x3635, 0x10},
+		{0x3636, 0x10},
+		{0x3663, 0x08},
+		{0x3669, 0x34},
+		{0x366d, 0x00},
+		{0x366e, 0x08},
+		{0x3706, 0x86},
+		{0x370b, 0x7e},
+		{0x3714, 0x27},
+		{0x3730, 0x12},
+		{0x3733, 0x10},
+		{0x3764, 0x00},
+		{0x3765, 0x00},
+		{0x3769, 0x62},
+		{0x376a, 0x2a},
+		{0x376b, 0x30},
+		{0x3780, 0x00},
+		{0x3781, 0x24},
+		{0x3782, 0x00},
+		{0x3783, 0x23},
+		{0x3798, 0x2f},
+		{0x37a1, 0x60},
+		{0x37a8, 0x6a},
+		{0x37ab, 0x3f},
+		{0x37c2, 0x14},
+		{0x37c3, 0xf1},
+		{0x37c9, 0x80},
+		{0x37cb, 0x16},
+		{0x37cc, 0x16},
+		{0x37cd, 0x16},
+		{0x37ce, 0x16},
+		{0x3800, 0x00},
+		{0x3801, 0x00},
+		{0x3802, 0x00},
+		{0x3803, 0x0c},
+		{0x3804, 0x0c},
+		{0x3805, 0xdf},
+		{0x3806, 0x09},
+		{0x3807, 0xa3},
+		{0x3808, 0x06},
+		{0x3809, 0x60},
+		{0x380a, 0x04},
+		{0x380b, 0xc8},
+		{0x380c, 0x07},
+		{0x380d, 0x8c},
+		{0x380e, 0x09},
+		{0x380f, 0xb2},
+		{0x3810, 0x00},
+		{0x3811, 0x02},
+		{0x3812, 0x00},
+		{0x3813, 0x01},
+		{0x3814, 0x03},
+		{0x3815, 0x01},
+		{0x3816, 0x00},
+		{0x3817, 0x00},
+		{0x3818, 0x00},
+		{0x3819, 0x10},
+		{0x3820, 0x80},
+		{0x3821, 0x47},
+		{0x382a, 0x03},
+		{0x382b, 0x01},
+		{0x3830, 0x06},
+		{0x3836, 0x02},
+		{0x3862, 0x04},
+		{0x3863, 0x08},
+		{0x3cc0, 0x33},
+		{0x3d85, 0x17},
+		{0x3d8c, 0x73},
+		{0x3d8d, 0xde},
+		{0x4001, 0xe0},
+		{0x4003, 0x40},
+		{0x4008, 0x00},
+		{0x4009, 0x05},
+		{0x400a, 0x00},
+		{0x400b, 0x84},
+		{0x400f, 0x80},
+		{0x4010, 0xf0},
+		{0x4011, 0xff},
+		{0x4012, 0x02},
+		{0x4013, 0x01},
+		{0x4014, 0x01},
+		{0x4015, 0x01},
+		{0x4042, 0x00},
+		{0x4043, 0x80},
+		{0x4044, 0x00},
+		{0x4045, 0x80},
+		{0x4046, 0x00},
+		{0x4047, 0x80},
+		{0x4048, 0x00},
+		{0x4049, 0x80},
+		{0x4041, 0x03},
+		{0x404c, 0x20},
+		{0x404d, 0x00},
+		{0x404e, 0x20},
+		{0x4203, 0x80},
+		{0x4307, 0x30},
+		{0x4317, 0x00},
+		{0x4502, 0x50},
+		{0x4503, 0x08},
+		{0x4601, 0x80},
+		{0x4800, 0x44},
+		{0x4816, 0x53},
+		{0x481b, 0x50},
+		{0x481f, 0x27},
+		{0x4823, 0x3c},
+		{0x482b, 0x00},
+		{0x4831, 0x66},
+		{0x4837, 0x16},
+		{0x483c, 0x0f},
+		{0x484b, 0x05},
+		{0x5000, 0x77},
+		{0x5001, 0x0a},
+		{0x5003, 0xc8},
+		{0x5004, 0x04},
+		{0x5006, 0x00},
+		{0x5007, 0x00},
+		{0x502e, 0x03},
+		{0x5030, 0x41},
+		{0x5795, 0x00},
+		{0x5796, 0x10},
+		{0x5797, 0x10},
+		{0x5798, 0x73},
+		{0x5799, 0x73},
+		{0x579a, 0x00},
+		{0x579b, 0x28},
+		{0x579c, 0x00},
+		{0x579d, 0x16},
+		{0x579e, 0x06},
+		{0x579f, 0x20},
+		{0x57a0, 0x04},
+		{0x57a1, 0xa0},
+		{0x5780, 0x14},
+		{0x5781, 0x0f},
+		{0x5782, 0x44},
+		{0x5783, 0x02},
+		{0x5784, 0x01},
+		{0x5785, 0x01},
+		{0x5786, 0x00},
+		{0x5787, 0x04},
+		{0x5788, 0x02},
+		{0x5789, 0x0f},
+		{0x578a, 0xfd},
+		{0x578b, 0xf5},
+		{0x578c, 0xf5},
+		{0x578d, 0x03},
+		{0x578e, 0x08},
+		{0x578f, 0x0c},
+		{0x5790, 0x08},
+		{0x5791, 0x04},
+		{0x5792, 0x00},
+		{0x5793, 0x52},
+		{0x5794, 0xa3},
+		{0x59f8, 0x3d},
+		{0x5a08, 0x02},
+		{0x5b00, 0x02},
+		{0x5b01, 0x10},
+		{0x5b02, 0x03},
+		{0x5b03, 0xcf},
+		{0x5b05, 0x6c},
+		{0x5e00, 0x00},
+		{0x5e10, 0xfc}
 };
 
-static const struct ov8856_mode supported_modes[] = {
-	{
-		.width = 3280,
-		.height = 2464,
-		.hts = 1928,
-		.vts_def = 2488,
-		.vts_min = 2488,
-		.reg_list = {
-			.num_of_regs = ARRAY_SIZE(mode_3280x2464_regs),
-			.regs = mode_3280x2464_regs,
-		},
-		.link_freq_index = OV8856_LINK_FREQ_720MBPS,
-	},
-	{
-		.width = 3264,
-		.height = 2448,
-		.hts = 1932,
-		.vts_def = 2482,
-		.vts_min = 2482,
-		.reg_list = {
-			.num_of_regs = ARRAY_SIZE(mode_3264x2448_regs),
-			.regs = mode_3264x2448_regs,
-		},
-		.link_freq_index = OV8856_LINK_FREQ_720MBPS,
-	},
-	{
-		.width = 1640,
-		.height = 1232,
-		.hts = 3820,
-		.vts_def = 1256,
-		.vts_min = 1256,
-		.reg_list = {
-			.num_of_regs = ARRAY_SIZE(mode_1640x1232_regs),
-			.regs = mode_1640x1232_regs,
-		},
-		.link_freq_index = OV8856_LINK_FREQ_360MBPS,
-	},
-	{
-		.width = 1632,
-		.height = 1224,
-		.hts = 1932,
-		.vts_def = 2482,
-		.vts_min = 2482,
-		.reg_list = {
-			.num_of_regs = ARRAY_SIZE(mode_1632x1224_regs),
-			.regs = mode_1632x1224_regs,
-		},
-		.link_freq_index = OV8856_LINK_FREQ_360MBPS,
-	}
+static const char * const ov8856_test_pattern_menu[] = {
+	"Disabled",
+	"Standard Color Bar",
+	"Top-Bottom Darker Color Bar",
+	"Right-Left Darker Color Bar",
+	"Bottom-Top Darker Color Bar"
 };
 
 struct ov8856 {
@@ -1037,20 +1385,173 @@ struct ov8856 {
 
 	/* Streaming on/off */
 	bool streaming;
+
+	/* lanes index */
+	u8 nlanes;
+
+	const struct ov8856_lane_cfg *priv_lane;
+	u8 modes_size;
+};
+
+struct ov8856_lane_cfg {
+	const s64 link_freq_menu_items[2];
+	const struct ov8856_link_freq_config link_freq_configs[2];
+	const struct ov8856_mode supported_modes[4];
+};
+
+static const struct ov8856_lane_cfg lane_cfg_2 = {
+	{
+		720000000,
+		360000000,
+	},
+	{{
+		.reg_list = {
+			.num_of_regs =
+			ARRAY_SIZE(mipi_data_rate_lane_2.regs_0),
+			.regs = mipi_data_rate_lane_2.regs_0,
+		}
+	},
+	{
+		.reg_list = {
+			.num_of_regs =
+			ARRAY_SIZE(mipi_data_rate_lane_2.regs_1),
+			.regs = mipi_data_rate_lane_2.regs_1,
+		}
+	}},
+	{{
+		.width = 3280,
+		.height = 2464,
+		.hts = 1928,
+		.vts_def = 2488,
+		.vts_min = 2488,
+		.reg_list = {
+			.num_of_regs =
+			ARRAY_SIZE(lane_2_mode_3280x2464),
+			.regs = lane_2_mode_3280x2464,
+		},
+		.link_freq_index = 0,
+		.data_lanes = 2,
+	},
+	{
+		.width = 1640,
+		.height = 1232,
+		.hts = 3168,
+		.vts_def = 1514,
+		.vts_min = 1514,
+		.reg_list = {
+			.num_of_regs =
+			ARRAY_SIZE(lane_2_mode_1640x1232),
+			.regs = lane_2_mode_1640x1232,
+		},
+		.link_freq_index = 1,
+		.data_lanes = 2,
+	}}
 };
 
-static u64 to_pixel_rate(u32 f_index)
+static const struct ov8856_lane_cfg lane_cfg_4 = {
+		{
+			360000000,
+			180000000,
+		},
+		{{
+			.reg_list = {
+				.num_of_regs =
+				 ARRAY_SIZE(mipi_data_rate_lane_4.regs_0),
+				.regs = mipi_data_rate_lane_4.regs_0,
+			}
+		},
+		{
+			.reg_list = {
+				.num_of_regs =
+				 ARRAY_SIZE(mipi_data_rate_lane_4.regs_1),
+				.regs = mipi_data_rate_lane_4.regs_1,
+			}
+		}},
+		{{
+			.width = 3280,
+			.height = 2464,
+			.hts = 1928,
+			.vts_def = 2488,
+			.vts_min = 2488,
+			.reg_list = {
+				.num_of_regs =
+				 ARRAY_SIZE(lane_4_mode_3280x2464),
+				.regs = lane_4_mode_3280x2464,
+			},
+			.link_freq_index = 0,
+			.data_lanes = 4,
+		},
+		{
+			.width = 1640,
+			.height = 1232,
+			.hts = 3820,
+			.vts_def = 1256,
+			.vts_min = 1256,
+			.reg_list = {
+				.num_of_regs =
+				 ARRAY_SIZE(lane_4_mode_1640x1232),
+				.regs = lane_4_mode_1640x1232,
+			},
+			.link_freq_index = 1,
+			.data_lanes = 4,
+		},
+		{
+			.width = 3264,
+			.height = 2448,
+			.hts = 1932,
+			.vts_def = 2482,
+			.vts_min = 2482,
+			.reg_list = {
+				.num_of_regs =
+				 ARRAY_SIZE(lane_4_mode_3264x2448),
+				.regs = lane_4_mode_3264x2448,
+			},
+			.link_freq_index = 0,
+			.data_lanes = 4,
+		},
+		{
+			.width = 1632,
+			.height = 1224,
+			.hts = 1932,
+			.vts_def = 2482,
+			.vts_min = 2482,
+			.reg_list = {
+				.num_of_regs =
+				 ARRAY_SIZE(lane_4_mode_1632x1224),
+				.regs = lane_4_mode_1632x1224,
+			},
+			.link_freq_index = 1,
+			.data_lanes = 4,
+		}}
+};
+
+static unsigned int ov8856_modes_num(const struct ov8856 *ov8856)
 {
-	u64 pixel_rate = link_freq_menu_items[f_index] * 2 * OV8856_DATA_LANES;
+	unsigned int i, count = 0;
+
+	for (i = 0; i < ARRAY_SIZE(ov8856->priv_lane->supported_modes); i++) {
+		if (ov8856->priv_lane->supported_modes[i].width == 0)
+			break;
+		count++;
+	}
+
+	return count;
+}
+
+static u64 to_rate(const s64 *link_freq_menu_items,
+		   u32 f_index, u8 nlanes)
+{
+	u64 pixel_rate = link_freq_menu_items[f_index] * 2 * nlanes;
 
 	do_div(pixel_rate, OV8856_RGB_DEPTH);
 
 	return pixel_rate;
 }
 
-static u64 to_pixels_per_line(u32 hts, u32 f_index)
+static u64 to_pixels_per_line(const s64 *link_freq_menu_items, u32 hts,
+			      u32 f_index, u8 nlanes)
 {
-	u64 ppl = hts * to_pixel_rate(f_index);
+	u64 ppl = hts * to_rate(link_freq_menu_items, f_index, nlanes);
 
 	do_div(ppl, OV8856_SCLK);
 
@@ -1229,23 +1730,32 @@ static int ov8856_init_controls(struct ov8856 *ov8856)
 	ctrl_hdlr->lock = &ov8856->mutex;
 	ov8856->link_freq = v4l2_ctrl_new_int_menu(ctrl_hdlr, &ov8856_ctrl_ops,
 					   V4L2_CID_LINK_FREQ,
-					   ARRAY_SIZE(link_freq_menu_items) - 1,
-					   0, link_freq_menu_items);
+					   ARRAY_SIZE
+					   (ov8856->priv_lane->link_freq_menu_items)
+					   - 1,
+					   0, ov8856->priv_lane->link_freq_menu_items);
 	if (ov8856->link_freq)
 		ov8856->link_freq->flags |= V4L2_CTRL_FLAG_READ_ONLY;
 
 	ov8856->pixel_rate = v4l2_ctrl_new_std(ctrl_hdlr, &ov8856_ctrl_ops,
 				       V4L2_CID_PIXEL_RATE, 0,
-				       to_pixel_rate(OV8856_LINK_FREQ_720MBPS),
-				       1,
-				       to_pixel_rate(OV8856_LINK_FREQ_720MBPS));
+				       to_rate(ov8856->priv_lane->link_freq_menu_items,
+					       0,
+					       ov8856->cur_mode->data_lanes), 1,
+				       to_rate(ov8856->priv_lane->link_freq_menu_items,
+					       0,
+					       ov8856->cur_mode->data_lanes));
 	ov8856->vblank = v4l2_ctrl_new_std(ctrl_hdlr, &ov8856_ctrl_ops,
 			  V4L2_CID_VBLANK,
 			  ov8856->cur_mode->vts_min - ov8856->cur_mode->height,
 			  OV8856_VTS_MAX - ov8856->cur_mode->height, 1,
-			  ov8856->cur_mode->vts_def - ov8856->cur_mode->height);
-	h_blank = to_pixels_per_line(ov8856->cur_mode->hts,
-		  ov8856->cur_mode->link_freq_index) - ov8856->cur_mode->width;
+			  ov8856->cur_mode->vts_def -
+			  ov8856->cur_mode->height);
+	h_blank = to_pixels_per_line(ov8856->priv_lane->link_freq_menu_items,
+				     ov8856->cur_mode->hts,
+				     ov8856->cur_mode->link_freq_index,
+				     ov8856->cur_mode->data_lanes) -
+				     ov8856->cur_mode->width;
 	ov8856->hblank = v4l2_ctrl_new_std(ctrl_hdlr, &ov8856_ctrl_ops,
 					   V4L2_CID_HBLANK, h_blank, h_blank, 1,
 					   h_blank);
@@ -1292,7 +1802,8 @@ static int ov8856_start_streaming(struct ov8856 *ov8856)
 	int link_freq_index, ret;
 
 	link_freq_index = ov8856->cur_mode->link_freq_index;
-	reg_list = &link_freq_configs[link_freq_index].reg_list;
+	reg_list = &ov8856->priv_lane->link_freq_configs[link_freq_index].reg_list;
+
 	ret = ov8856_write_reg_list(ov8856, reg_list);
 	if (ret) {
 		dev_err(&client->dev, "failed to set plls");
@@ -1461,9 +1972,9 @@ static int ov8856_set_format(struct v4l2_subdev *sd,
 	const struct ov8856_mode *mode;
 	s32 vblank_def, h_blank;
 
-	mode = v4l2_find_nearest_size(supported_modes,
-				      ARRAY_SIZE(supported_modes), width,
-				      height, fmt->format.width,
+	mode = v4l2_find_nearest_size(ov8856->priv_lane->supported_modes,
+				      ov8856->modes_size,
+				      width, height, fmt->format.width,
 				      fmt->format.height);
 
 	mutex_lock(&ov8856->mutex);
@@ -1474,7 +1985,9 @@ static int ov8856_set_format(struct v4l2_subdev *sd,
 		ov8856->cur_mode = mode;
 		__v4l2_ctrl_s_ctrl(ov8856->link_freq, mode->link_freq_index);
 		__v4l2_ctrl_s_ctrl_int64(ov8856->pixel_rate,
-					 to_pixel_rate(mode->link_freq_index));
+					 to_rate(ov8856->priv_lane->link_freq_menu_items,
+						 mode->link_freq_index,
+						 ov8856->cur_mode->data_lanes));
 
 		/* Update limits and set FPS to default */
 		vblank_def = mode->vts_def - mode->height;
@@ -1483,8 +1996,11 @@ static int ov8856_set_format(struct v4l2_subdev *sd,
 					 OV8856_VTS_MAX - mode->height, 1,
 					 vblank_def);
 		__v4l2_ctrl_s_ctrl(ov8856->vblank, vblank_def);
-		h_blank = to_pixels_per_line(mode->hts, mode->link_freq_index) -
-			  mode->width;
+		h_blank = to_pixels_per_line(ov8856->priv_lane->link_freq_menu_items,
+					     mode->hts,
+					     mode->link_freq_index,
+					     ov8856->cur_mode->data_lanes)
+					     - mode->width;
 		__v4l2_ctrl_modify_range(ov8856->hblank, h_blank, h_blank, 1,
 					 h_blank);
 	}
@@ -1529,15 +2045,17 @@ static int ov8856_enum_frame_size(struct v4l2_subdev *sd,
 				  struct v4l2_subdev_pad_config *cfg,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
-	if (fse->index >= ARRAY_SIZE(supported_modes))
+	struct ov8856 *ov8856 = to_ov8856(sd);
+
+	if (fse->index >= ov8856->modes_size)
 		return -EINVAL;
 
 	if (fse->code != MEDIA_BUS_FMT_SGRBG10_1X10)
 		return -EINVAL;
 
-	fse->min_width = supported_modes[fse->index].width;
+	fse->min_width = ov8856->priv_lane->supported_modes[fse->index].width;
 	fse->max_width = fse->min_width;
-	fse->min_height = supported_modes[fse->index].height;
+	fse->min_height = ov8856->priv_lane->supported_modes[fse->index].height;
 	fse->max_height = fse->min_height;
 
 	return 0;
@@ -1548,7 +2066,7 @@ static int ov8856_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	struct ov8856 *ov8856 = to_ov8856(sd);
 
 	mutex_lock(&ov8856->mutex);
-	ov8856_update_pad_format(&supported_modes[0],
+	ov8856_update_pad_format(&ov8856->priv_lane->supported_modes[0],
 				 v4l2_subdev_get_try_format(sd, fh->pad, 0));
 	mutex_unlock(&ov8856->mutex);
 
@@ -1695,29 +2213,40 @@ static int ov8856_get_hwcfg(struct ov8856 *ov8856, struct device *dev)
 	if (ret)
 		return ret;
 
-	if (bus_cfg.bus.mipi_csi2.num_data_lanes != OV8856_DATA_LANES) {
+	/* Get number of data lanes */
+	if (bus_cfg.bus.mipi_csi2.num_data_lanes != 2 &&
+	    bus_cfg.bus.mipi_csi2.num_data_lanes != 4) {
 		dev_err(dev, "number of CSI2 data lanes %d is not supported",
 			bus_cfg.bus.mipi_csi2.num_data_lanes);
 		ret = -EINVAL;
 		goto check_hwcfg_error;
 	}
 
+	dev_dbg(dev, "Using %u data lanes\n", ov8856->cur_mode->data_lanes);
+
+	if (bus_cfg.bus.mipi_csi2.num_data_lanes == 2)
+		ov8856->priv_lane = &lane_cfg_2;
+	else
+		ov8856->priv_lane = &lane_cfg_4;
+
+	ov8856->modes_size = ov8856_modes_num(ov8856);
+
 	if (!bus_cfg.nr_of_link_frequencies) {
 		dev_err(dev, "no link frequencies defined");
 		ret = -EINVAL;
 		goto check_hwcfg_error;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(link_freq_menu_items); i++) {
+	for (i = 0; i < ARRAY_SIZE(ov8856->priv_lane->link_freq_menu_items); i++) {
 		for (j = 0; j < bus_cfg.nr_of_link_frequencies; j++) {
-			if (link_freq_menu_items[i] ==
-				bus_cfg.link_frequencies[j])
+			if (ov8856->priv_lane->link_freq_menu_items[i] ==
+			    bus_cfg.link_frequencies[j])
 				break;
 		}
 
 		if (j == bus_cfg.nr_of_link_frequencies) {
 			dev_err(dev, "no link frequency %lld supported",
-				link_freq_menu_items[i]);
+				ov8856->priv_lane->link_freq_menu_items[i]);
 			ret = -EINVAL;
 			goto check_hwcfg_error;
 		}
@@ -1776,7 +2305,7 @@ static int ov8856_probe(struct i2c_client *client)
 	}
 
 	mutex_init(&ov8856->mutex);
-	ov8856->cur_mode = &supported_modes[0];
+	ov8856->cur_mode = &ov8856->priv_lane->supported_modes[0];
 	ret = ov8856_init_controls(ov8856);
 	if (ret) {
 		dev_err(&client->dev, "failed to init controls: %d", ret);
-- 
GitLab


From c19b93a69c8ea6d672b786d1e130e9b4260b4e71 Mon Sep 17 00:00:00 2001
From: Shawn Tu <shawnx.tu@intel.com>
Date: Fri, 16 Apr 2021 11:58:59 +0200
Subject: [PATCH 2009/3804] media: ov8856: add vflip/hflip control support

Add V4L2 controls: horizontal/vertical flip,
keep SGRBG10 Bayer order output (via change v/hflip)

Signed-off-by: Shawn Tu <shawnx.tu@intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov8856.c | 118 +++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

diff --git a/drivers/media/i2c/ov8856.c b/drivers/media/i2c/ov8856.c
index d145f004fd8d8..a6bc665a64309 100644
--- a/drivers/media/i2c/ov8856.c
+++ b/drivers/media/i2c/ov8856.c
@@ -80,6 +80,25 @@
 #define NUM_MODE_REGS				187
 #define NUM_MODE_REGS_2				200
 
+/* Flip Mirror Controls from sensor */
+#define OV8856_REG_FORMAT1			0x3820
+#define OV8856_REG_FORMAT2			0x3821
+#define OV8856_REG_FORMAT1_OP_1			BIT(1)
+#define OV8856_REG_FORMAT1_OP_2			BIT(2)
+#define OV8856_REG_FORMAT1_OP_3			BIT(6)
+#define OV8856_REG_FORMAT2_OP_1			BIT(1)
+#define OV8856_REG_FORMAT2_OP_2			BIT(2)
+#define OV8856_REG_FORMAT2_OP_3			BIT(6)
+#define OV8856_REG_FLIP_OPT_1			0x376b
+#define OV8856_REG_FLIP_OPT_2			0x5001
+#define OV8856_REG_FLIP_OPT_3			0x502e
+#define OV8856_REG_MIRROR_OPT_1			0x5004
+#define OV8856_REG_FLIP_OP_0			BIT(0)
+#define OV8856_REG_FLIP_OP_1			BIT(1)
+#define OV8856_REG_FLIP_OP_2			BIT(2)
+#define OV8856_REG_MIRROR_OP_1			BIT(1)
+#define OV8856_REG_MIRROR_OP_2			BIT(2)
+
 #define to_ov8856(_sd)			container_of(_sd, struct ov8856, sd)
 
 static const char * const ov8856_supply_names[] = {
@@ -1653,6 +1672,93 @@ static int ov8856_test_pattern(struct ov8856 *ov8856, u32 pattern)
 				OV8856_REG_VALUE_08BIT, pattern);
 }
 
+static int ov8856_set_ctrl_hflip(struct ov8856 *ov8856, u32 ctrl_val)
+{
+	int ret;
+	u32 val;
+
+	ret = ov8856_read_reg(ov8856, OV8856_REG_MIRROR_OPT_1,
+			      OV8856_REG_VALUE_08BIT, &val);
+	if (ret)
+		return ret;
+
+	ret = ov8856_write_reg(ov8856, OV8856_REG_MIRROR_OPT_1,
+			       OV8856_REG_VALUE_08BIT,
+			       ctrl_val ? val & ~OV8856_REG_MIRROR_OP_2 :
+			       val | OV8856_REG_MIRROR_OP_2);
+
+	if (ret)
+		return ret;
+
+	ret = ov8856_read_reg(ov8856, OV8856_REG_FORMAT2,
+			      OV8856_REG_VALUE_08BIT, &val);
+	if (ret)
+		return ret;
+
+	return ov8856_write_reg(ov8856, OV8856_REG_FORMAT2,
+				OV8856_REG_VALUE_08BIT,
+				ctrl_val ? val & ~OV8856_REG_FORMAT2_OP_1 &
+				~OV8856_REG_FORMAT2_OP_2 &
+				~OV8856_REG_FORMAT2_OP_3 :
+				val | OV8856_REG_FORMAT2_OP_1 |
+				OV8856_REG_FORMAT2_OP_2 |
+				OV8856_REG_FORMAT2_OP_3);
+}
+
+static int ov8856_set_ctrl_vflip(struct ov8856 *ov8856, u8 ctrl_val)
+{
+	int ret;
+	u32 val;
+
+	ret = ov8856_read_reg(ov8856, OV8856_REG_FLIP_OPT_1,
+			      OV8856_REG_VALUE_08BIT, &val);
+	if (ret)
+		return ret;
+
+	ret = ov8856_write_reg(ov8856, OV8856_REG_FLIP_OPT_1,
+			       OV8856_REG_VALUE_08BIT,
+			       ctrl_val ? val | OV8856_REG_FLIP_OP_1 |
+			       OV8856_REG_FLIP_OP_2 :
+			       val & ~OV8856_REG_FLIP_OP_1 &
+			       ~OV8856_REG_FLIP_OP_2);
+
+	ret = ov8856_read_reg(ov8856, OV8856_REG_FLIP_OPT_2,
+			      OV8856_REG_VALUE_08BIT, &val);
+	if (ret)
+		return ret;
+
+	ret = ov8856_write_reg(ov8856, OV8856_REG_FLIP_OPT_2,
+			       OV8856_REG_VALUE_08BIT,
+			       ctrl_val ? val | OV8856_REG_FLIP_OP_2 :
+			       val & ~OV8856_REG_FLIP_OP_2);
+
+	ret = ov8856_read_reg(ov8856, OV8856_REG_FLIP_OPT_3,
+			      OV8856_REG_VALUE_08BIT, &val);
+	if (ret)
+		return ret;
+
+	ret = ov8856_write_reg(ov8856, OV8856_REG_FLIP_OPT_3,
+			       OV8856_REG_VALUE_08BIT,
+			       ctrl_val ? val & ~OV8856_REG_FLIP_OP_0 &
+			       ~OV8856_REG_FLIP_OP_1 :
+			       val | OV8856_REG_FLIP_OP_0 |
+			       OV8856_REG_FLIP_OP_1);
+
+	ret = ov8856_read_reg(ov8856, OV8856_REG_FORMAT1,
+			      OV8856_REG_VALUE_08BIT, &val);
+	if (ret)
+		return ret;
+
+	return ov8856_write_reg(ov8856, OV8856_REG_FORMAT1,
+			       OV8856_REG_VALUE_08BIT,
+			       ctrl_val ? val | OV8856_REG_FORMAT1_OP_1 |
+			       OV8856_REG_FORMAT1_OP_3 |
+			       OV8856_REG_FORMAT1_OP_2 :
+			       val & ~OV8856_REG_FORMAT1_OP_1 &
+			       ~OV8856_REG_FORMAT1_OP_3 &
+			       ~OV8856_REG_FORMAT1_OP_2);
+}
+
 static int ov8856_set_ctrl(struct v4l2_ctrl *ctrl)
 {
 	struct ov8856 *ov8856 = container_of(ctrl->handler,
@@ -1702,6 +1808,14 @@ static int ov8856_set_ctrl(struct v4l2_ctrl *ctrl)
 		ret = ov8856_test_pattern(ov8856, ctrl->val);
 		break;
 
+	case V4L2_CID_HFLIP:
+		ret = ov8856_set_ctrl_hflip(ov8856, ctrl->val);
+		break;
+
+	case V4L2_CID_VFLIP:
+		ret = ov8856_set_ctrl_vflip(ov8856, ctrl->val);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
@@ -1778,6 +1892,10 @@ static int ov8856_init_controls(struct ov8856 *ov8856)
 				     V4L2_CID_TEST_PATTERN,
 				     ARRAY_SIZE(ov8856_test_pattern_menu) - 1,
 				     0, 0, ov8856_test_pattern_menu);
+	v4l2_ctrl_new_std(ctrl_hdlr, &ov8856_ctrl_ops,
+			  V4L2_CID_HFLIP, 0, 1, 1, 0);
+	v4l2_ctrl_new_std(ctrl_hdlr, &ov8856_ctrl_ops,
+			  V4L2_CID_VFLIP, 0, 1, 1, 0);
 	if (ctrl_hdlr->error)
 		return ctrl_hdlr->error;
 
-- 
GitLab


From cef944c8f5ae192636f53682797d62bd61859646 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Thu, 29 Apr 2021 22:59:19 +0200
Subject: [PATCH 2010/3804] media: staging: ipu3-imgu: Document pages field

The pages field in struct imgu_css_map was missing. Document it.

Reported-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/ipu3/ipu3-css-pool.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/media/ipu3/ipu3-css-pool.h b/drivers/staging/media/ipu3/ipu3-css-pool.h
index 35519a08c08c9..3f9e32e0e9a7c 100644
--- a/drivers/staging/media/ipu3/ipu3-css-pool.h
+++ b/drivers/staging/media/ipu3/ipu3-css-pool.h
@@ -15,6 +15,7 @@ struct imgu_device;
  * @size:		size of the buffer in bytes.
  * @vaddr:		kernel virtual address.
  * @daddr:		iova dma address to access IPU3.
+ * @pages:		pages mapped to this buffer
  */
 struct imgu_css_map {
 	size_t size;
-- 
GitLab


From 45dbd70c35d6a5fec4b7b45cde75b1341ede52a2 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Date: Mon, 12 Apr 2021 09:48:31 +0200
Subject: [PATCH 2011/3804] media: i2c: ov8865: remove unnecessary NULL check

The check on mode_index is sufficient to ensure that we have a
valid mode. Remove the explicit mode check similarly to
commit 38a50230292f ("media: i2c: ov5648: remove unnecessary NULL check")

Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov8865.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/i2c/ov8865.c b/drivers/media/i2c/ov8865.c
index 3bf6ee4898a91..b16c825598005 100644
--- a/drivers/media/i2c/ov8865.c
+++ b/drivers/media/i2c/ov8865.c
@@ -2689,7 +2689,7 @@ static int ov8865_enum_frame_interval(struct v4l2_subdev *subdev,
 		}
 	}
 
-	if (mode_index == ARRAY_SIZE(ov8865_modes) || !mode)
+	if (mode_index == ARRAY_SIZE(ov8865_modes))
 		return -EINVAL;
 
 	interval_enum->interval = mode->frame_interval;
-- 
GitLab


From d953e3cb4adf66322862d459451435a2eb1b7770 Mon Sep 17 00:00:00 2001
From: Shawn Tu <shawnx.tu@intel.com>
Date: Fri, 30 Apr 2021 16:05:49 +0200
Subject: [PATCH 2012/3804] media: imx208: Add imx208 camera sensor driver

Add a V4L2 sub-device driver for the Sony IMX208 image sensor.
This is a camera sensor using the I2C bus for control and the
CSI-2 bus for data.

[Sakari Ailus: Rename sensor async register function to make it compile,
	       use exposure_max and wrap a few long lines.]

Signed-off-by: Ping-Chung Chen <ping-chung.chen@intel.com>
Signed-off-by: Yeh, Andy <andy.yeh@intel.com>
Signed-off-by: Shawn Tu <shawnx.tu@intel.com>
Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 MAINTAINERS                |    7 +
 drivers/media/i2c/Kconfig  |   11 +
 drivers/media/i2c/Makefile |    1 +
 drivers/media/i2c/imx208.c | 1087 ++++++++++++++++++++++++++++++++++++
 4 files changed, 1106 insertions(+)
 create mode 100644 drivers/media/i2c/imx208.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 768f4ba6b349f..0fee01ceb1515 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17002,6 +17002,13 @@ S:	Maintained
 F:	drivers/ssb/
 F:	include/linux/ssb/
 
+SONY IMX208 SENSOR DRIVER
+M:	Sakari Ailus <sakari.ailus@linux.intel.com>
+L:	linux-media@vger.kernel.org
+S:	Maintained
+T:	git git://linuxtv.org/media_tree.git
+F:	drivers/media/i2c/imx208.c
+
 SONY IMX214 SENSOR DRIVER
 M:	Ricardo Ribalda <ribalda@kernel.org>
 L:	linux-media@vger.kernel.org
diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index 4f1dafc648160..588f8eb959844 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig
@@ -742,6 +742,17 @@ config VIDEO_HI556
 	  To compile this driver as a module, choose M here: the
 	  module will be called hi556.
 
+config VIDEO_IMX208
+	tristate "Sony IMX208 sensor support"
+	depends on I2C && VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
+	depends on MEDIA_CAMERA_SUPPORT
+	help
+	  This is a Video4Linux2 sensor driver for the Sony
+	  IMX208 camera.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called imx208.
+
 config VIDEO_IMX214
 	tristate "Sony IMX214 sensor support"
 	depends on GPIOLIB && I2C && VIDEO_V4L2
diff --git a/drivers/media/i2c/Makefile b/drivers/media/i2c/Makefile
index 0c067beca0666..1168fa6b84ed8 100644
--- a/drivers/media/i2c/Makefile
+++ b/drivers/media/i2c/Makefile
@@ -116,6 +116,7 @@ obj-$(CONFIG_VIDEO_ML86V7667)	+= ml86v7667.o
 obj-$(CONFIG_VIDEO_OV2659)	+= ov2659.o
 obj-$(CONFIG_VIDEO_TC358743)	+= tc358743.o
 obj-$(CONFIG_VIDEO_HI556)	+= hi556.o
+obj-$(CONFIG_VIDEO_IMX208)	+= imx208.o
 obj-$(CONFIG_VIDEO_IMX214)	+= imx214.o
 obj-$(CONFIG_VIDEO_IMX219)	+= imx219.o
 obj-$(CONFIG_VIDEO_IMX258)	+= imx258.o
diff --git a/drivers/media/i2c/imx208.c b/drivers/media/i2c/imx208.c
new file mode 100644
index 0000000000000..9ed261ea72553
--- /dev/null
+++ b/drivers/media/i2c/imx208.c
@@ -0,0 +1,1087 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2021 Intel Corporation
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-device.h>
+#include <asm/unaligned.h>
+
+#define IMX208_REG_MODE_SELECT		0x0100
+#define IMX208_MODE_STANDBY		0x00
+#define IMX208_MODE_STREAMING		0x01
+
+/* Chip ID */
+#define IMX208_REG_CHIP_ID		0x0000
+#define IMX208_CHIP_ID			0x0208
+
+/* V_TIMING internal */
+#define IMX208_REG_VTS			0x0340
+#define IMX208_VTS_60FPS		0x0472
+#define IMX208_VTS_BINNING		0x0239
+#define IMX208_VTS_60FPS_MIN		0x0458
+#define IMX208_VTS_BINNING_MIN		0x0230
+#define IMX208_VTS_MAX			0xffff
+
+/* HBLANK control - read only */
+#define IMX208_PPL_384MHZ		2248
+#define IMX208_PPL_96MHZ		2248
+
+/* Exposure control */
+#define IMX208_REG_EXPOSURE		0x0202
+#define IMX208_EXPOSURE_MIN		4
+#define IMX208_EXPOSURE_STEP		1
+#define IMX208_EXPOSURE_DEFAULT		0x190
+#define IMX208_EXPOSURE_MAX		65535
+
+/* Analog gain control */
+#define IMX208_REG_ANALOG_GAIN		0x0204
+#define IMX208_ANA_GAIN_MIN		0
+#define IMX208_ANA_GAIN_MAX		0x00e0
+#define IMX208_ANA_GAIN_STEP		1
+#define IMX208_ANA_GAIN_DEFAULT		0x0
+
+/* Digital gain control */
+#define IMX208_REG_GR_DIGITAL_GAIN	0x020e
+#define IMX208_REG_R_DIGITAL_GAIN	0x0210
+#define IMX208_REG_B_DIGITAL_GAIN	0x0212
+#define IMX208_REG_GB_DIGITAL_GAIN	0x0214
+#define IMX208_DIGITAL_GAIN_SHIFT	8
+
+/* Orientation */
+#define IMX208_REG_ORIENTATION_CONTROL	0x0101
+
+/* Test Pattern Control */
+#define IMX208_REG_TEST_PATTERN_MODE	0x0600
+#define IMX208_TEST_PATTERN_DISABLE	0x0
+#define IMX208_TEST_PATTERN_SOLID_COLOR	0x1
+#define IMX208_TEST_PATTERN_COLOR_BARS	0x2
+#define IMX208_TEST_PATTERN_GREY_COLOR	0x3
+#define IMX208_TEST_PATTERN_PN9		0x4
+#define IMX208_TEST_PATTERN_FIX_1	0x100
+#define IMX208_TEST_PATTERN_FIX_2	0x101
+#define IMX208_TEST_PATTERN_FIX_3	0x102
+#define IMX208_TEST_PATTERN_FIX_4	0x103
+#define IMX208_TEST_PATTERN_FIX_5	0x104
+#define IMX208_TEST_PATTERN_FIX_6	0x105
+
+/* OTP Access */
+#define IMX208_OTP_BASE			0x3500
+#define IMX208_OTP_SIZE			40
+
+struct imx208_reg {
+	u16 address;
+	u8 val;
+};
+
+struct imx208_reg_list {
+	u32 num_of_regs;
+	const struct imx208_reg *regs;
+};
+
+/* Link frequency config */
+struct imx208_link_freq_config {
+	u32 pixels_per_line;
+
+	/* PLL registers for this link frequency */
+	struct imx208_reg_list reg_list;
+};
+
+/* Mode : resolution and related config&values */
+struct imx208_mode {
+	/* Frame width */
+	u32 width;
+	/* Frame height */
+	u32 height;
+
+	/* V-timing */
+	u32 vts_def;
+	u32 vts_min;
+
+	/* Index of Link frequency config to be used */
+	u32 link_freq_index;
+	/* Default register values */
+	struct imx208_reg_list reg_list;
+};
+
+static const struct imx208_reg pll_ctrl_reg[] = {
+	{0x0305, 0x02},
+	{0x0307, 0x50},
+	{0x303C, 0x3C},
+};
+
+static const struct imx208_reg mode_1936x1096_60fps_regs[] = {
+	{0x0340, 0x04},
+	{0x0341, 0x72},
+	{0x0342, 0x04},
+	{0x0343, 0x64},
+	{0x034C, 0x07},
+	{0x034D, 0x90},
+	{0x034E, 0x04},
+	{0x034F, 0x48},
+	{0x0381, 0x01},
+	{0x0383, 0x01},
+	{0x0385, 0x01},
+	{0x0387, 0x01},
+	{0x3048, 0x00},
+	{0x3050, 0x01},
+	{0x30D5, 0x00},
+	{0x3301, 0x00},
+	{0x3318, 0x62},
+	{0x0202, 0x01},
+	{0x0203, 0x90},
+	{0x0205, 0x00},
+};
+
+static const struct imx208_reg mode_968_548_60fps_regs[] = {
+	{0x0340, 0x02},
+	{0x0341, 0x39},
+	{0x0342, 0x08},
+	{0x0343, 0xC8},
+	{0x034C, 0x03},
+	{0x034D, 0xC8},
+	{0x034E, 0x02},
+	{0x034F, 0x24},
+	{0x0381, 0x01},
+	{0x0383, 0x03},
+	{0x0385, 0x01},
+	{0x0387, 0x03},
+	{0x3048, 0x01},
+	{0x3050, 0x02},
+	{0x30D5, 0x03},
+	{0x3301, 0x10},
+	{0x3318, 0x75},
+	{0x0202, 0x01},
+	{0x0203, 0x90},
+	{0x0205, 0x00},
+};
+
+static const s64 imx208_discrete_digital_gain[] = {
+	1, 2, 4, 8, 16,
+};
+
+static const char * const imx208_test_pattern_menu[] = {
+	"Disabled",
+	"Solid Color",
+	"100% Color Bar",
+	"Fade to Grey Color Bar",
+	"PN9",
+	"Fixed Pattern1",
+	"Fixed Pattern2",
+	"Fixed Pattern3",
+	"Fixed Pattern4",
+	"Fixed Pattern5",
+	"Fixed Pattern6"
+};
+
+static const int imx208_test_pattern_val[] = {
+	IMX208_TEST_PATTERN_DISABLE,
+	IMX208_TEST_PATTERN_SOLID_COLOR,
+	IMX208_TEST_PATTERN_COLOR_BARS,
+	IMX208_TEST_PATTERN_GREY_COLOR,
+	IMX208_TEST_PATTERN_PN9,
+	IMX208_TEST_PATTERN_FIX_1,
+	IMX208_TEST_PATTERN_FIX_2,
+	IMX208_TEST_PATTERN_FIX_3,
+	IMX208_TEST_PATTERN_FIX_4,
+	IMX208_TEST_PATTERN_FIX_5,
+	IMX208_TEST_PATTERN_FIX_6,
+};
+
+/* Configurations for supported link frequencies */
+#define IMX208_MHZ			(1000 * 1000ULL)
+#define IMX208_LINK_FREQ_384MHZ		(384ULL * IMX208_MHZ)
+#define IMX208_LINK_FREQ_96MHZ		(96ULL * IMX208_MHZ)
+
+#define IMX208_DATA_RATE_DOUBLE		2
+#define IMX208_NUM_OF_LANES		2
+#define IMX208_PIXEL_BITS		10
+
+enum {
+	IMX208_LINK_FREQ_384MHZ_INDEX,
+	IMX208_LINK_FREQ_96MHZ_INDEX,
+};
+
+/*
+ * pixel_rate = link_freq * data-rate * nr_of_lanes / bits_per_sample
+ * data rate => double data rate; number of lanes => 2; bits per pixel => 10
+ */
+static u64 link_freq_to_pixel_rate(u64 f)
+{
+	f *= IMX208_DATA_RATE_DOUBLE * IMX208_NUM_OF_LANES;
+	do_div(f, IMX208_PIXEL_BITS);
+
+	return f;
+}
+
+/* Menu items for LINK_FREQ V4L2 control */
+static const s64 link_freq_menu_items[] = {
+	[IMX208_LINK_FREQ_384MHZ_INDEX] = IMX208_LINK_FREQ_384MHZ,
+	[IMX208_LINK_FREQ_96MHZ_INDEX] = IMX208_LINK_FREQ_96MHZ,
+};
+
+/* Link frequency configs */
+static const struct imx208_link_freq_config link_freq_configs[] = {
+	[IMX208_LINK_FREQ_384MHZ_INDEX] = {
+		.pixels_per_line = IMX208_PPL_384MHZ,
+		.reg_list = {
+			.num_of_regs = ARRAY_SIZE(pll_ctrl_reg),
+			.regs = pll_ctrl_reg,
+		}
+	},
+	[IMX208_LINK_FREQ_96MHZ_INDEX] = {
+		.pixels_per_line = IMX208_PPL_96MHZ,
+		.reg_list = {
+			.num_of_regs = ARRAY_SIZE(pll_ctrl_reg),
+			.regs = pll_ctrl_reg,
+		}
+	},
+};
+
+/* Mode configs */
+static const struct imx208_mode supported_modes[] = {
+	{
+		.width = 1936,
+		.height = 1096,
+		.vts_def = IMX208_VTS_60FPS,
+		.vts_min = IMX208_VTS_60FPS_MIN,
+		.reg_list = {
+			.num_of_regs = ARRAY_SIZE(mode_1936x1096_60fps_regs),
+			.regs = mode_1936x1096_60fps_regs,
+		},
+		.link_freq_index = IMX208_LINK_FREQ_384MHZ_INDEX,
+	},
+	{
+		.width = 968,
+		.height = 548,
+		.vts_def = IMX208_VTS_BINNING,
+		.vts_min = IMX208_VTS_BINNING_MIN,
+		.reg_list = {
+			.num_of_regs = ARRAY_SIZE(mode_968_548_60fps_regs),
+			.regs = mode_968_548_60fps_regs,
+		},
+		.link_freq_index = IMX208_LINK_FREQ_96MHZ_INDEX,
+	},
+};
+
+struct imx208 {
+	struct v4l2_subdev sd;
+	struct media_pad pad;
+
+	struct v4l2_ctrl_handler ctrl_handler;
+	/* V4L2 Controls */
+	struct v4l2_ctrl *link_freq;
+	struct v4l2_ctrl *pixel_rate;
+	struct v4l2_ctrl *vblank;
+	struct v4l2_ctrl *hblank;
+	struct v4l2_ctrl *vflip;
+	struct v4l2_ctrl *hflip;
+
+	/* Current mode */
+	const struct imx208_mode *cur_mode;
+
+	/*
+	 * Mutex for serialized access:
+	 * Protect sensor set pad format and start/stop streaming safely.
+	 * Protect access to sensor v4l2 controls.
+	 */
+	struct mutex imx208_mx;
+
+	/* Streaming on/off */
+	bool streaming;
+
+	/* OTP data */
+	bool otp_read;
+	char otp_data[IMX208_OTP_SIZE];
+};
+
+static inline struct imx208 *to_imx208(struct v4l2_subdev *_sd)
+{
+	return container_of(_sd, struct imx208, sd);
+}
+
+/* Get bayer order based on flip setting. */
+static u32 imx208_get_format_code(struct imx208 *imx208)
+{
+	/*
+	 * Only one bayer order is supported.
+	 * It depends on the flip settings.
+	 */
+	static const u32 codes[2][2] = {
+		{ MEDIA_BUS_FMT_SRGGB10_1X10, MEDIA_BUS_FMT_SGRBG10_1X10, },
+		{ MEDIA_BUS_FMT_SGBRG10_1X10, MEDIA_BUS_FMT_SBGGR10_1X10, },
+	};
+
+	return codes[imx208->vflip->val][imx208->hflip->val];
+}
+
+/* Read registers up to 4 at a time */
+static int imx208_read_reg(struct imx208 *imx208, u16 reg, u32 len, u32 *val)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&imx208->sd);
+	struct i2c_msg msgs[2];
+	u8 addr_buf[2] = { reg >> 8, reg & 0xff };
+	u8 data_buf[4] = { 0, };
+	int ret;
+
+	if (len > 4)
+		return -EINVAL;
+
+	/* Write register address */
+	msgs[0].addr = client->addr;
+	msgs[0].flags = 0;
+	msgs[0].len = ARRAY_SIZE(addr_buf);
+	msgs[0].buf = addr_buf;
+
+	/* Read data from register */
+	msgs[1].addr = client->addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = len;
+	msgs[1].buf = &data_buf[4 - len];
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret != ARRAY_SIZE(msgs))
+		return -EIO;
+
+	*val = get_unaligned_be32(data_buf);
+
+	return 0;
+}
+
+/* Write registers up to 4 at a time */
+static int imx208_write_reg(struct imx208 *imx208, u16 reg, u32 len, u32 val)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&imx208->sd);
+	u8 buf[6];
+
+	if (len > 4)
+		return -EINVAL;
+
+	put_unaligned_be16(reg, buf);
+	put_unaligned_be32(val << (8 * (4 - len)), buf + 2);
+	if (i2c_master_send(client, buf, len + 2) != len + 2)
+		return -EIO;
+
+	return 0;
+}
+
+/* Write a list of registers */
+static int imx208_write_regs(struct imx208 *imx208,
+			     const struct imx208_reg *regs, u32 len)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&imx208->sd);
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < len; i++) {
+		ret = imx208_write_reg(imx208, regs[i].address, 1,
+				       regs[i].val);
+		if (ret) {
+			dev_err_ratelimited(&client->dev,
+					    "Failed to write reg 0x%4.4x. error = %d\n",
+					    regs[i].address, ret);
+
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/* Open sub-device */
+static int imx208_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
+{
+	struct v4l2_mbus_framefmt *try_fmt =
+		v4l2_subdev_get_try_format(sd, fh->pad, 0);
+
+	/* Initialize try_fmt */
+	try_fmt->width = supported_modes[0].width;
+	try_fmt->height = supported_modes[0].height;
+	try_fmt->code = MEDIA_BUS_FMT_SRGGB10_1X10;
+	try_fmt->field = V4L2_FIELD_NONE;
+
+	return 0;
+}
+
+static int imx208_update_digital_gain(struct imx208 *imx208, u32 len, u32 val)
+{
+	int ret;
+
+	val = imx208_discrete_digital_gain[val] << IMX208_DIGITAL_GAIN_SHIFT;
+
+	ret = imx208_write_reg(imx208, IMX208_REG_GR_DIGITAL_GAIN, 2, val);
+	if (ret)
+		return ret;
+
+	ret = imx208_write_reg(imx208, IMX208_REG_GB_DIGITAL_GAIN, 2, val);
+	if (ret)
+		return ret;
+
+	ret = imx208_write_reg(imx208, IMX208_REG_R_DIGITAL_GAIN, 2, val);
+	if (ret)
+		return ret;
+
+	return imx208_write_reg(imx208, IMX208_REG_B_DIGITAL_GAIN, 2, val);
+}
+
+static int imx208_set_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct imx208 *imx208 =
+		container_of(ctrl->handler, struct imx208, ctrl_handler);
+	struct i2c_client *client = v4l2_get_subdevdata(&imx208->sd);
+	int ret;
+
+	/*
+	 * Applying V4L2 control value only happens
+	 * when power is up for streaming
+	 */
+	if (!pm_runtime_get_if_in_use(&client->dev))
+		return 0;
+
+	switch (ctrl->id) {
+	case V4L2_CID_ANALOGUE_GAIN:
+		ret = imx208_write_reg(imx208, IMX208_REG_ANALOG_GAIN,
+				       2, ctrl->val);
+		break;
+	case V4L2_CID_EXPOSURE:
+		ret = imx208_write_reg(imx208, IMX208_REG_EXPOSURE,
+				       2, ctrl->val);
+		break;
+	case V4L2_CID_DIGITAL_GAIN:
+		ret = imx208_update_digital_gain(imx208, 2, ctrl->val);
+		break;
+	case V4L2_CID_VBLANK:
+		/* Update VTS that meets expected vertical blanking */
+		ret = imx208_write_reg(imx208, IMX208_REG_VTS, 2,
+				       imx208->cur_mode->height + ctrl->val);
+		break;
+	case V4L2_CID_TEST_PATTERN:
+		ret = imx208_write_reg(imx208, IMX208_REG_TEST_PATTERN_MODE,
+				       2, imx208_test_pattern_val[ctrl->val]);
+		break;
+	case V4L2_CID_HFLIP:
+	case V4L2_CID_VFLIP:
+		ret = imx208_write_reg(imx208, IMX208_REG_ORIENTATION_CONTROL,
+				       1,
+				       imx208->hflip->val |
+				       imx208->vflip->val << 1);
+		break;
+	default:
+		ret = -EINVAL;
+		dev_err(&client->dev,
+			"ctrl(id:0x%x,val:0x%x) is not handled\n",
+			ctrl->id, ctrl->val);
+		break;
+	}
+
+	pm_runtime_put(&client->dev);
+
+	return ret;
+}
+
+static const struct v4l2_ctrl_ops imx208_ctrl_ops = {
+	.s_ctrl = imx208_set_ctrl,
+};
+
+static const struct v4l2_ctrl_config imx208_digital_gain_control = {
+	.ops = &imx208_ctrl_ops,
+	.id = V4L2_CID_DIGITAL_GAIN,
+	.name = "Digital Gain",
+	.type = V4L2_CTRL_TYPE_INTEGER_MENU,
+	.min = 0,
+	.max = ARRAY_SIZE(imx208_discrete_digital_gain) - 1,
+	.step = 0,
+	.def = 0,
+	.menu_skip_mask = 0,
+	.qmenu_int = imx208_discrete_digital_gain,
+};
+
+static int imx208_enum_mbus_code(struct v4l2_subdev *sd,
+				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_mbus_code_enum *code)
+{
+	struct imx208 *imx208 = to_imx208(sd);
+
+	if (code->index > 0)
+		return -EINVAL;
+
+	code->code = imx208_get_format_code(imx208);
+
+	return 0;
+}
+
+static int imx208_enum_frame_size(struct v4l2_subdev *sd,
+				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_frame_size_enum *fse)
+{
+	struct imx208 *imx208 = to_imx208(sd);
+
+	if (fse->index >= ARRAY_SIZE(supported_modes))
+		return -EINVAL;
+
+	if (fse->code != imx208_get_format_code(imx208))
+		return -EINVAL;
+
+	fse->min_width = supported_modes[fse->index].width;
+	fse->max_width = fse->min_width;
+	fse->min_height = supported_modes[fse->index].height;
+	fse->max_height = fse->min_height;
+
+	return 0;
+}
+
+static void imx208_mode_to_pad_format(struct imx208 *imx208,
+				      const struct imx208_mode *mode,
+				      struct v4l2_subdev_format *fmt)
+{
+	fmt->format.width = mode->width;
+	fmt->format.height = mode->height;
+	fmt->format.code = imx208_get_format_code(imx208);
+	fmt->format.field = V4L2_FIELD_NONE;
+}
+
+static int __imx208_get_pad_format(struct imx208 *imx208,
+				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_format *fmt)
+{
+	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
+		fmt->format = *v4l2_subdev_get_try_format(&imx208->sd, cfg,
+							  fmt->pad);
+	else
+		imx208_mode_to_pad_format(imx208, imx208->cur_mode, fmt);
+
+	return 0;
+}
+
+static int imx208_get_pad_format(struct v4l2_subdev *sd,
+				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_format *fmt)
+{
+	struct imx208 *imx208 = to_imx208(sd);
+	int ret;
+
+	mutex_lock(&imx208->imx208_mx);
+	ret = __imx208_get_pad_format(imx208, cfg, fmt);
+	mutex_unlock(&imx208->imx208_mx);
+
+	return ret;
+}
+
+static int imx208_set_pad_format(struct v4l2_subdev *sd,
+				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_format *fmt)
+{
+	struct imx208 *imx208 = to_imx208(sd);
+	const struct imx208_mode *mode;
+	s32 vblank_def;
+	s32 vblank_min;
+	s64 h_blank;
+	s64 pixel_rate;
+	s64 link_freq;
+
+	mutex_lock(&imx208->imx208_mx);
+
+	fmt->format.code = imx208_get_format_code(imx208);
+	mode = v4l2_find_nearest_size(supported_modes,
+				      ARRAY_SIZE(supported_modes), width, height,
+				      fmt->format.width, fmt->format.height);
+	imx208_mode_to_pad_format(imx208, mode, fmt);
+	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
+		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format;
+	} else {
+		imx208->cur_mode = mode;
+		__v4l2_ctrl_s_ctrl(imx208->link_freq, mode->link_freq_index);
+		link_freq = link_freq_menu_items[mode->link_freq_index];
+		pixel_rate = link_freq_to_pixel_rate(link_freq);
+		__v4l2_ctrl_s_ctrl_int64(imx208->pixel_rate, pixel_rate);
+		/* Update limits and set FPS to default */
+		vblank_def = imx208->cur_mode->vts_def -
+			     imx208->cur_mode->height;
+		vblank_min = imx208->cur_mode->vts_min -
+			     imx208->cur_mode->height;
+		__v4l2_ctrl_modify_range(imx208->vblank, vblank_min,
+					 IMX208_VTS_MAX - imx208->cur_mode->height,
+					 1, vblank_def);
+		__v4l2_ctrl_s_ctrl(imx208->vblank, vblank_def);
+		h_blank =
+			link_freq_configs[mode->link_freq_index].pixels_per_line
+			 - imx208->cur_mode->width;
+		__v4l2_ctrl_modify_range(imx208->hblank, h_blank,
+					 h_blank, 1, h_blank);
+	}
+
+	mutex_unlock(&imx208->imx208_mx);
+
+	return 0;
+}
+
+/* Start streaming */
+static int imx208_start_streaming(struct imx208 *imx208)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&imx208->sd);
+	const struct imx208_reg_list *reg_list;
+	int ret, link_freq_index;
+
+	/* Setup PLL */
+	link_freq_index = imx208->cur_mode->link_freq_index;
+	reg_list = &link_freq_configs[link_freq_index].reg_list;
+	ret = imx208_write_regs(imx208, reg_list->regs, reg_list->num_of_regs);
+	if (ret) {
+		dev_err(&client->dev, "%s failed to set plls\n", __func__);
+		return ret;
+	}
+
+	/* Apply default values of current mode */
+	reg_list = &imx208->cur_mode->reg_list;
+	ret = imx208_write_regs(imx208, reg_list->regs, reg_list->num_of_regs);
+	if (ret) {
+		dev_err(&client->dev, "%s failed to set mode\n", __func__);
+		return ret;
+	}
+
+	/* Apply customized values from user */
+	ret =  __v4l2_ctrl_handler_setup(imx208->sd.ctrl_handler);
+	if (ret)
+		return ret;
+
+	/* set stream on register */
+	return imx208_write_reg(imx208, IMX208_REG_MODE_SELECT,
+				1, IMX208_MODE_STREAMING);
+}
+
+/* Stop streaming */
+static int imx208_stop_streaming(struct imx208 *imx208)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&imx208->sd);
+	int ret;
+
+	/* set stream off register */
+	ret = imx208_write_reg(imx208, IMX208_REG_MODE_SELECT,
+			       1, IMX208_MODE_STANDBY);
+	if (ret)
+		dev_err(&client->dev, "%s failed to set stream\n", __func__);
+
+	/*
+	 * Return success even if it was an error, as there is nothing the
+	 * caller can do about it.
+	 */
+	return 0;
+}
+
+static int imx208_set_stream(struct v4l2_subdev *sd, int enable)
+{
+	struct imx208 *imx208 = to_imx208(sd);
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	int ret = 0;
+
+	mutex_lock(&imx208->imx208_mx);
+	if (imx208->streaming == enable) {
+		mutex_unlock(&imx208->imx208_mx);
+		return 0;
+	}
+
+	if (enable) {
+		ret = pm_runtime_get_sync(&client->dev);
+		if (ret < 0)
+			goto err_rpm_put;
+
+		/*
+		 * Apply default & customized values
+		 * and then start streaming.
+		 */
+		ret = imx208_start_streaming(imx208);
+		if (ret)
+			goto err_rpm_put;
+	} else {
+		imx208_stop_streaming(imx208);
+		pm_runtime_put(&client->dev);
+	}
+
+	imx208->streaming = enable;
+	mutex_unlock(&imx208->imx208_mx);
+
+	/* vflip and hflip cannot change during streaming */
+	v4l2_ctrl_grab(imx208->vflip, enable);
+	v4l2_ctrl_grab(imx208->hflip, enable);
+
+	return ret;
+
+err_rpm_put:
+	pm_runtime_put(&client->dev);
+	mutex_unlock(&imx208->imx208_mx);
+
+	return ret;
+}
+
+static int __maybe_unused imx208_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+	struct imx208 *imx208 = to_imx208(sd);
+
+	if (imx208->streaming)
+		imx208_stop_streaming(imx208);
+
+	return 0;
+}
+
+static int __maybe_unused imx208_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+	struct imx208 *imx208 = to_imx208(sd);
+	int ret;
+
+	if (imx208->streaming) {
+		ret = imx208_start_streaming(imx208);
+		if (ret)
+			goto error;
+	}
+
+	return 0;
+
+error:
+	imx208_stop_streaming(imx208);
+	imx208->streaming = 0;
+
+	return ret;
+}
+
+/* Verify chip ID */
+static int imx208_identify_module(struct imx208 *imx208)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&imx208->sd);
+	int ret;
+	u32 val;
+
+	ret = imx208_read_reg(imx208, IMX208_REG_CHIP_ID,
+			      2, &val);
+	if (ret) {
+		dev_err(&client->dev, "failed to read chip id %x\n",
+			IMX208_CHIP_ID);
+		return ret;
+	}
+
+	if (val != IMX208_CHIP_ID) {
+		dev_err(&client->dev, "chip id mismatch: %x!=%x\n",
+			IMX208_CHIP_ID, val);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static const struct v4l2_subdev_video_ops imx208_video_ops = {
+	.s_stream = imx208_set_stream,
+};
+
+static const struct v4l2_subdev_pad_ops imx208_pad_ops = {
+	.enum_mbus_code = imx208_enum_mbus_code,
+	.get_fmt = imx208_get_pad_format,
+	.set_fmt = imx208_set_pad_format,
+	.enum_frame_size = imx208_enum_frame_size,
+};
+
+static const struct v4l2_subdev_ops imx208_subdev_ops = {
+	.video = &imx208_video_ops,
+	.pad = &imx208_pad_ops,
+};
+
+static const struct v4l2_subdev_internal_ops imx208_internal_ops = {
+	.open = imx208_open,
+};
+
+static int imx208_read_otp(struct imx208 *imx208)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&imx208->sd);
+	struct i2c_msg msgs[2];
+	u8 addr_buf[2] = { IMX208_OTP_BASE >> 8, IMX208_OTP_BASE & 0xff };
+	int ret = 0;
+
+	mutex_lock(&imx208->imx208_mx);
+
+	if (imx208->otp_read)
+		goto out_unlock;
+
+	ret = pm_runtime_get_sync(&client->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(&client->dev);
+		goto out_unlock;
+	}
+
+	/* Write register address */
+	msgs[0].addr = client->addr;
+	msgs[0].flags = 0;
+	msgs[0].len = ARRAY_SIZE(addr_buf);
+	msgs[0].buf = addr_buf;
+
+	/* Read data from registers */
+	msgs[1].addr = client->addr;
+	msgs[1].flags = I2C_M_RD;
+	msgs[1].len = sizeof(imx208->otp_data);
+	msgs[1].buf = imx208->otp_data;
+
+	ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (ret == ARRAY_SIZE(msgs)) {
+		imx208->otp_read = true;
+		ret = 0;
+	}
+
+	pm_runtime_put(&client->dev);
+
+out_unlock:
+	mutex_unlock(&imx208->imx208_mx);
+
+	return ret;
+}
+
+static ssize_t otp_read(struct file *filp, struct kobject *kobj,
+			struct bin_attribute *bin_attr,
+			char *buf, loff_t off, size_t count)
+{
+	struct i2c_client *client = to_i2c_client(kobj_to_dev(kobj));
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+	struct imx208 *imx208 = to_imx208(sd);
+	int ret;
+
+	ret = imx208_read_otp(imx208);
+	if (ret)
+		return ret;
+
+	memcpy(buf, &imx208->otp_data[off], count);
+	return count;
+}
+
+static const BIN_ATTR_RO(otp, IMX208_OTP_SIZE);
+
+/* Initialize control handlers */
+static int imx208_init_controls(struct imx208 *imx208)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&imx208->sd);
+	struct v4l2_ctrl_handler *ctrl_hdlr = &imx208->ctrl_handler;
+	s64 exposure_max;
+	s64 vblank_def;
+	s64 vblank_min;
+	s64 pixel_rate_min;
+	s64 pixel_rate_max;
+	int ret;
+
+	ret = v4l2_ctrl_handler_init(ctrl_hdlr, 8);
+	if (ret)
+		return ret;
+
+	mutex_init(&imx208->imx208_mx);
+	ctrl_hdlr->lock = &imx208->imx208_mx;
+	imx208->link_freq =
+		v4l2_ctrl_new_int_menu(ctrl_hdlr,
+				       &imx208_ctrl_ops,
+				       V4L2_CID_LINK_FREQ,
+				       ARRAY_SIZE(link_freq_menu_items) - 1,
+				       0, link_freq_menu_items);
+
+	if (imx208->link_freq)
+		imx208->link_freq->flags |= V4L2_CTRL_FLAG_READ_ONLY;
+
+	pixel_rate_max = link_freq_to_pixel_rate(link_freq_menu_items[0]);
+	pixel_rate_min =
+		link_freq_to_pixel_rate(link_freq_menu_items[ARRAY_SIZE(link_freq_menu_items) - 1]);
+	/* By default, PIXEL_RATE is read only */
+	imx208->pixel_rate = v4l2_ctrl_new_std(ctrl_hdlr, &imx208_ctrl_ops,
+					       V4L2_CID_PIXEL_RATE,
+					       pixel_rate_min, pixel_rate_max,
+					       1, pixel_rate_max);
+
+	vblank_def = imx208->cur_mode->vts_def - imx208->cur_mode->height;
+	vblank_min = imx208->cur_mode->vts_min - imx208->cur_mode->height;
+	imx208->vblank =
+		v4l2_ctrl_new_std(ctrl_hdlr, &imx208_ctrl_ops, V4L2_CID_VBLANK,
+				  vblank_min,
+				  IMX208_VTS_MAX - imx208->cur_mode->height, 1,
+				  vblank_def);
+
+	imx208->hblank =
+		v4l2_ctrl_new_std(ctrl_hdlr, &imx208_ctrl_ops, V4L2_CID_HBLANK,
+				  IMX208_PPL_384MHZ - imx208->cur_mode->width,
+				  IMX208_PPL_384MHZ - imx208->cur_mode->width,
+				  1,
+				  IMX208_PPL_384MHZ - imx208->cur_mode->width);
+
+	if (imx208->hblank)
+		imx208->hblank->flags |= V4L2_CTRL_FLAG_READ_ONLY;
+
+	exposure_max = imx208->cur_mode->vts_def - 8;
+	v4l2_ctrl_new_std(ctrl_hdlr, &imx208_ctrl_ops, V4L2_CID_EXPOSURE,
+			  IMX208_EXPOSURE_MIN, exposure_max,
+			  IMX208_EXPOSURE_STEP, IMX208_EXPOSURE_DEFAULT);
+
+	imx208->hflip = v4l2_ctrl_new_std(ctrl_hdlr, &imx208_ctrl_ops,
+					  V4L2_CID_HFLIP, 0, 1, 1, 0);
+	imx208->vflip = v4l2_ctrl_new_std(ctrl_hdlr, &imx208_ctrl_ops,
+					  V4L2_CID_VFLIP, 0, 1, 1, 0);
+
+	v4l2_ctrl_new_std(ctrl_hdlr, &imx208_ctrl_ops, V4L2_CID_ANALOGUE_GAIN,
+			  IMX208_ANA_GAIN_MIN, IMX208_ANA_GAIN_MAX,
+			  IMX208_ANA_GAIN_STEP, IMX208_ANA_GAIN_DEFAULT);
+
+	v4l2_ctrl_new_custom(ctrl_hdlr, &imx208_digital_gain_control, NULL);
+
+	v4l2_ctrl_new_std_menu_items(ctrl_hdlr, &imx208_ctrl_ops,
+				     V4L2_CID_TEST_PATTERN,
+				     ARRAY_SIZE(imx208_test_pattern_menu) - 1,
+				     0, 0, imx208_test_pattern_menu);
+
+	if (ctrl_hdlr->error) {
+		ret = ctrl_hdlr->error;
+		dev_err(&client->dev, "%s control init failed (%d)\n",
+			__func__, ret);
+		goto error;
+	}
+
+	imx208->sd.ctrl_handler = ctrl_hdlr;
+
+	return 0;
+
+error:
+	v4l2_ctrl_handler_free(ctrl_hdlr);
+	mutex_destroy(&imx208->imx208_mx);
+
+	return ret;
+}
+
+static void imx208_free_controls(struct imx208 *imx208)
+{
+	v4l2_ctrl_handler_free(imx208->sd.ctrl_handler);
+}
+
+static int imx208_probe(struct i2c_client *client)
+{
+	struct imx208 *imx208;
+	int ret;
+	u32 val = 0;
+
+	device_property_read_u32(&client->dev, "clock-frequency", &val);
+	if (val != 19200000) {
+		dev_err(&client->dev,
+			"Unsupported clock-frequency %u. Expected 19200000.\n",
+			val);
+		return -EINVAL;
+	}
+
+	imx208 = devm_kzalloc(&client->dev, sizeof(*imx208), GFP_KERNEL);
+	if (!imx208)
+		return -ENOMEM;
+
+	/* Initialize subdev */
+	v4l2_i2c_subdev_init(&imx208->sd, client, &imx208_subdev_ops);
+
+	/* Check module identity */
+	ret = imx208_identify_module(imx208);
+	if (ret) {
+		dev_err(&client->dev, "failed to find sensor: %d", ret);
+		goto error_probe;
+	}
+
+	/* Set default mode to max resolution */
+	imx208->cur_mode = &supported_modes[0];
+
+	ret = imx208_init_controls(imx208);
+	if (ret) {
+		dev_err(&client->dev, "failed to init controls: %d", ret);
+		goto error_probe;
+	}
+
+	/* Initialize subdev */
+	imx208->sd.internal_ops = &imx208_internal_ops;
+	imx208->sd.flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
+	imx208->sd.entity.function = MEDIA_ENT_F_CAM_SENSOR;
+
+	/* Initialize source pad */
+	imx208->pad.flags = MEDIA_PAD_FL_SOURCE;
+	ret = media_entity_pads_init(&imx208->sd.entity, 1, &imx208->pad);
+	if (ret) {
+		dev_err(&client->dev, "%s failed:%d\n", __func__, ret);
+		goto error_handler_free;
+	}
+
+	ret = v4l2_async_register_subdev_sensor(&imx208->sd);
+	if (ret < 0)
+		goto error_media_entity;
+
+	ret = device_create_bin_file(&client->dev, &bin_attr_otp);
+	if (ret) {
+		dev_err(&client->dev, "sysfs otp creation failed\n");
+		goto error_async_subdev;
+	}
+
+	pm_runtime_set_active(&client->dev);
+	pm_runtime_enable(&client->dev);
+	pm_runtime_idle(&client->dev);
+
+	return 0;
+
+error_async_subdev:
+	v4l2_async_unregister_subdev(&imx208->sd);
+
+error_media_entity:
+	media_entity_cleanup(&imx208->sd.entity);
+
+error_handler_free:
+	imx208_free_controls(imx208);
+
+error_probe:
+	mutex_destroy(&imx208->imx208_mx);
+
+	return ret;
+}
+
+static int imx208_remove(struct i2c_client *client)
+{
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+	struct imx208 *imx208 = to_imx208(sd);
+
+	device_remove_bin_file(&client->dev, &bin_attr_otp);
+	v4l2_async_unregister_subdev(sd);
+	media_entity_cleanup(&sd->entity);
+	imx208_free_controls(imx208);
+
+	pm_runtime_disable(&client->dev);
+	pm_runtime_set_suspended(&client->dev);
+
+	mutex_destroy(&imx208->imx208_mx);
+
+	return 0;
+}
+
+static const struct dev_pm_ops imx208_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(imx208_suspend, imx208_resume)
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id imx208_acpi_ids[] = {
+	{ "INT3478" },
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(acpi, imx208_acpi_ids);
+#endif
+
+static struct i2c_driver imx208_i2c_driver = {
+	.driver = {
+		.name = "imx208",
+		.pm = &imx208_pm_ops,
+		.acpi_match_table = ACPI_PTR(imx208_acpi_ids),
+	},
+	.probe_new = imx208_probe,
+	.remove = imx208_remove,
+};
+
+module_i2c_driver(imx208_i2c_driver);
+
+MODULE_AUTHOR("Yeh, Andy <andy.yeh@intel.com>");
+MODULE_AUTHOR("Chen, Ping-chung <ping-chung.chen@intel.com>");
+MODULE_AUTHOR("Shawn Tu <shawnx.tu@intel.com>");
+MODULE_DESCRIPTION("Sony IMX208 sensor driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 47926106af78d5fe6817c8db966213801950eed3 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 5 May 2021 22:17:17 +0200
Subject: [PATCH 2013/3804] media: i2c: ov2659: Fix an error message

'ret' is known to be 0 here and printing -ENODEV wouldn't be really
helpful. So remove it from the error message.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Lad Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov2659.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c
index 7c1781f646cea..befef14aa86bc 100644
--- a/drivers/media/i2c/ov2659.c
+++ b/drivers/media/i2c/ov2659.c
@@ -1377,8 +1377,7 @@ static int ov2659_detect(struct v4l2_subdev *sd)
 		id = OV265X_ID(pid, ver);
 		if (id != OV2659_ID) {
 			dev_err(&client->dev,
-				"Sensor detection failed (%04X, %d)\n",
-				id, ret);
+				"Sensor detection failed (%04X)\n", id);
 			ret = -ENODEV;
 		} else {
 			dev_info(&client->dev, "Found OV%04X sensor\n", id);
-- 
GitLab


From 92fbe0323d1b6f596643bb5c91b886789bb90228 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 5 May 2021 22:20:37 +0200
Subject: [PATCH 2014/3804] media: i2c: ov9650: Fix an error message

'ret' is known to be 0 here and printing -ENODEV wouldn't be really
helpful. So remove it from the error message.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/ov9650.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/i2c/ov9650.c b/drivers/media/i2c/ov9650.c
index 4fe68aa557899..a9f13dc2f053b 100644
--- a/drivers/media/i2c/ov9650.c
+++ b/drivers/media/i2c/ov9650.c
@@ -1479,8 +1479,8 @@ static int ov965x_detect_sensor(struct v4l2_subdev *sd)
 		if (ov965x->id == OV9650_ID || ov965x->id == OV9652_ID) {
 			v4l2_info(sd, "Found OV%04X sensor\n", ov965x->id);
 		} else {
-			v4l2_err(sd, "Sensor detection failed (%04X, %d)\n",
-				 ov965x->id, ret);
+			v4l2_err(sd, "Sensor detection failed (%04X)\n",
+				 ov965x->id);
 			ret = -ENODEV;
 		}
 	}
-- 
GitLab


From d443d838f6d76c8e1acbd4e27583cb2948066f0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Tue, 11 May 2021 16:23:20 +0200
Subject: [PATCH 2015/3804] media: dt-bindings: media: renesas,isp: Add
 bindings for ISP Channel Selector
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add bindings for Renesas R-Car ISP Channel Selector IP. The ISP is
responsible for filtering the MIPI CSI-2 bus and directing the different
CSI-2 virtual channels to different R-Car VIN instances (DMA engines)
for capture.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../bindings/media/renesas,isp.yaml           | 196 ++++++++++++++++++
 MAINTAINERS                                   |   1 +
 2 files changed, 197 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/renesas,isp.yaml

diff --git a/Documentation/devicetree/bindings/media/renesas,isp.yaml b/Documentation/devicetree/bindings/media/renesas,isp.yaml
new file mode 100644
index 0000000000000..514857d36f6b7
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/renesas,isp.yaml
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+# Copyright (C) 2021 Renesas Electronics Corp.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/renesas,isp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas R-Car ISP Channel Selector
+
+maintainers:
+  - Niklas Söderlund <niklas.soderlund@ragnatech.se>
+
+description:
+  The R-Car ISP Channel Selector provides MIPI CSI-2 VC and DT filtering
+  capabilities for the Renesas R-Car family of devices. It is used in
+  conjunction with the R-Car VIN and CSI-2 modules, which provides the video
+  capture capabilities.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,r8a779a0-isp # V3U
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  ports:
+    $ref: /schemas/graph.yaml#/properties/ports
+
+    properties:
+      port@0:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Input port node, multiple endpoints describing the connected R-Car
+          CSI-2 receivers.
+
+      port@1:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Single endpoint describing the R-Car VIN connected to output port 0.
+
+      port@2:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Single endpoint describing the R-Car VIN connected to output port 1.
+
+      port@3:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Single endpoint describing the R-Car VIN connected to output port 2.
+
+      port@4:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Single endpoint describing the R-Car VIN connected to output port 3.
+
+      port@5:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Single endpoint describing the R-Car VIN connected to output port 4.
+
+      port@6:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Single endpoint describing the R-Car VIN connected to output port 5.
+
+      port@7:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Single endpoint describing the R-Car VIN connected to output port 6.
+
+      port@8:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Single endpoint describing the R-Car VIN connected to output port 7.
+
+    required:
+      - port@0
+      - port@1
+      - port@2
+      - port@3
+      - port@4
+      - port@5
+      - port@6
+      - port@7
+      - port@8
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - power-domains
+  - resets
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a779a0-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/r8a779a0-sysc.h>
+
+    isp1: isp@fed20000 {
+            compatible = "renesas,r8a779a0-isp";
+            reg = <0xfed20000 0x10000>;
+            interrupts = <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&cpg CPG_MOD 613>;
+            power-domains = <&sysc R8A779A0_PD_A3ISP01>;
+            resets = <&cpg 613>;
+
+            ports {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    port@0 {
+                            #address-cells = <1>;
+                            #size-cells = <0>;
+
+                            reg = <0>;
+                            isp1csi41: endpoint@1 {
+                                    reg = <1>;
+                                    remote-endpoint = <&csi41isp1>;
+                            };
+                    };
+
+                    port@1 {
+                            reg = <1>;
+                            isp1vin08: endpoint {
+                                    remote-endpoint = <&vin08isp1>;
+                            };
+                    };
+
+                    port@2 {
+                            reg = <2>;
+                            isp1vin09: endpoint {
+                                    remote-endpoint = <&vin09isp1>;
+                            };
+                    };
+
+                    port@3 {
+                            reg = <3>;
+                            isp1vin10: endpoint {
+                                    remote-endpoint = <&vin10isp1>;
+                            };
+                    };
+
+                    port@4 {
+                            reg = <4>;
+                            isp1vin11: endpoint {
+                                    remote-endpoint = <&vin11isp1>;
+                            };
+                    };
+
+                    port@5 {
+                            reg = <5>;
+                            isp1vin12: endpoint {
+                                    remote-endpoint = <&vin12isp1>;
+                            };
+                    };
+
+                    port@6 {
+                            reg = <6>;
+                            isp1vin13: endpoint {
+                                    remote-endpoint = <&vin13isp1>;
+                            };
+                    };
+
+                    port@7 {
+                            reg = <7>;
+                            isp1vin14: endpoint {
+                                    remote-endpoint = <&vin14isp1>;
+                            };
+                    };
+
+                    port@8 {
+                            reg = <8>;
+                            isp1vin15: endpoint {
+                                    remote-endpoint = <&vin15isp1>;
+                            };
+                    };
+            };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 0fee01ceb1515..5e1bbb39a68e0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11400,6 +11400,7 @@ L:	linux-renesas-soc@vger.kernel.org
 S:	Supported
 T:	git git://linuxtv.org/media_tree.git
 F:	Documentation/devicetree/bindings/media/renesas,csi2.yaml
+F:	Documentation/devicetree/bindings/media/renesas,isp.yaml
 F:	Documentation/devicetree/bindings/media/renesas,vin.yaml
 F:	drivers/media/platform/rcar-vin/
 
-- 
GitLab


From 8f6a0eabb1f21a23a570b0986c8abe9fded3ad6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Tue, 11 May 2021 16:33:32 +0200
Subject: [PATCH 2016/3804] media: dt-bindings: media: renesas,vin: Add
 r8a779a0 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document support for the VIN module in the Renesas V3U (r8a779a0) SoC.
The V3U is different from other SoCs as it have 32 instead of 8 (most of
Gen3) or 16 (V3H) VIN instances. The VIN instances are also connected to
a new IP the R-Car ISP Channel Selector.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../bindings/media/renesas,vin.yaml           | 26 ++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/media/renesas,vin.yaml b/Documentation/devicetree/bindings/media/renesas,vin.yaml
index dd1a5ce5896ce..5ba06b0f030b4 100644
--- a/Documentation/devicetree/bindings/media/renesas,vin.yaml
+++ b/Documentation/devicetree/bindings/media/renesas,vin.yaml
@@ -51,6 +51,7 @@ properties:
               - renesas,vin-r8a77980 # R-Car V3H
               - renesas,vin-r8a77990 # R-Car E3
               - renesas,vin-r8a77995 # R-Car D3
+              - renesas,vin-r8a779a0 # R-Car V3U
 
   reg:
     maxItems: 1
@@ -111,7 +112,7 @@ properties:
     description: VIN channel number
     $ref: /schemas/types.yaml#/definitions/uint32
     minimum: 0
-    maximum: 15
+    maximum: 31
 
   ports:
     $ref: /schemas/graph.yaml#/properties/ports
@@ -187,6 +188,29 @@ properties:
           - required:
               - endpoint@3
 
+      port@2:
+        $ref: /schemas/graph.yaml#/properties/port
+        description:
+          Input port node, multiple endpoints describing all the R-Car ISP
+          modules connected the VIN.
+
+        properties:
+          endpoint@0:
+            $ref: /schemas/graph.yaml#/properties/endpoint
+            description: Endpoint connected to ISP0.
+
+          endpoint@1:
+            $ref: /schemas/graph.yaml#/properties/endpoint
+            description: Endpoint connected to ISP1.
+
+          endpoint@2:
+            $ref: /schemas/graph.yaml#/properties/endpoint
+            description: Endpoint connected to ISP2.
+
+          endpoint@3:
+            $ref: /schemas/graph.yaml#/properties/endpoint
+            description: Endpoint connected to ISP3.
+
 required:
   - compatible
   - reg
-- 
GitLab


From 6e2202ca1ee034920b029124151754aec67b61ba Mon Sep 17 00:00:00 2001
From: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Date: Wed, 28 Apr 2021 23:50:20 +0200
Subject: [PATCH 2017/3804] media: venus: hfi_cmds: Fix conceal color property

The conceal color property used for Venus v4 and v6 has the same
payload structure. But currently v4 follow down to payload
structure for v1. Correct this by moving set_property to v4.

Fixes: 4ef6039fad8f ("media: venus: vdec: Add support for conceal control")
Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/hfi_cmds.c | 22 ++++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/hfi_cmds.c b/drivers/media/platform/qcom/venus/hfi_cmds.c
index 11a8347e5f5c8..4b9dea7f6940e 100644
--- a/drivers/media/platform/qcom/venus/hfi_cmds.c
+++ b/drivers/media/platform/qcom/venus/hfi_cmds.c
@@ -1226,6 +1226,17 @@ pkt_session_set_property_4xx(struct hfi_session_set_property_pkt *pkt,
 		pkt->shdr.hdr.size += sizeof(u32) + sizeof(*hdr10);
 		break;
 	}
+	case HFI_PROPERTY_PARAM_VDEC_CONCEAL_COLOR: {
+		struct hfi_conceal_color_v4 *color = prop_data;
+		u32 *in = pdata;
+
+		color->conceal_color_8bit = *in & 0xff;
+		color->conceal_color_8bit |= ((*in >> 10) & 0xff) << 8;
+		color->conceal_color_8bit |= ((*in >> 20) & 0xff) << 16;
+		color->conceal_color_10bit = *in;
+		pkt->shdr.hdr.size += sizeof(u32) + sizeof(*color);
+		break;
+	}
 
 	case HFI_PROPERTY_CONFIG_VENC_MAX_BITRATE:
 	case HFI_PROPERTY_CONFIG_VDEC_POST_LOOP_DEBLOCKER:
@@ -1279,17 +1290,6 @@ pkt_session_set_property_6xx(struct hfi_session_set_property_pkt *pkt,
 		pkt->shdr.hdr.size += sizeof(u32) + sizeof(*cq);
 		break;
 	}
-	case HFI_PROPERTY_PARAM_VDEC_CONCEAL_COLOR: {
-		struct hfi_conceal_color_v4 *color = prop_data;
-		u32 *in = pdata;
-
-		color->conceal_color_8bit = *in & 0xff;
-		color->conceal_color_8bit |= ((*in >> 10) & 0xff) << 8;
-		color->conceal_color_8bit |= ((*in >> 20) & 0xff) << 16;
-		color->conceal_color_10bit = *in;
-		pkt->shdr.hdr.size += sizeof(u32) + sizeof(*color);
-		break;
-	}
 	default:
 		return pkt_session_set_property_4xx(pkt, cookie, ptype, pdata);
 	}
-- 
GitLab


From 0394360eafa08766424c194d9096c535e6f2833f Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Sun, 14 Mar 2021 17:34:07 +0100
Subject: [PATCH 2018/3804] media: venus: Convert to use resource-managed OPP
 API

Use resource-managed OPP API to simplify code.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/core.h      |  1 -
 .../media/platform/qcom/venus/pm_helpers.c    | 42 +++++--------------
 2 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h
index 745f226a523ff..56054c3db8caf 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -155,7 +155,6 @@ struct venus_core {
 	struct clk *vcodec1_clks[VIDC_VCODEC_CLKS_NUM_MAX];
 	struct icc_path *video_path;
 	struct icc_path *cpucfg_path;
-	struct opp_table *opp_table;
 	bool has_opp_table;
 	struct device *pmdomains[VIDC_PMDOMAINS_NUM_MAX];
 	struct device_link *opp_dl_venus;
diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c
index d0fddf5e9a69f..fc204e4046aa9 100644
--- a/drivers/media/platform/qcom/venus/pm_helpers.c
+++ b/drivers/media/platform/qcom/venus/pm_helpers.c
@@ -300,16 +300,15 @@ static int core_get_v1(struct venus_core *core)
 	if (ret)
 		return ret;
 
-	core->opp_table = dev_pm_opp_set_clkname(core->dev, "core");
-	if (IS_ERR(core->opp_table))
-		return PTR_ERR(core->opp_table);
+	ret = devm_pm_opp_set_clkname(core->dev, "core");
+	if (ret)
+		return ret;
 
 	return 0;
 }
 
 static void core_put_v1(struct venus_core *core)
 {
-	dev_pm_opp_put_clkname(core->opp_table);
 }
 
 static int core_power_v1(struct venus_core *core, int on)
@@ -788,7 +787,6 @@ static int venc_power_v4(struct device *dev, int on)
 static int vcodec_domains_get(struct venus_core *core)
 {
 	int ret;
-	struct opp_table *opp_table;
 	struct device **opp_virt_dev;
 	struct device *dev = core->dev;
 	const struct venus_resources *res = core->res;
@@ -811,11 +809,9 @@ skip_pmdomains:
 		return 0;
 
 	/* Attach the power domain for setting performance state */
-	opp_table = dev_pm_opp_attach_genpd(dev, res->opp_pmdomain, &opp_virt_dev);
-	if (IS_ERR(opp_table)) {
-		ret = PTR_ERR(opp_table);
+	ret = devm_pm_opp_attach_genpd(dev, res->opp_pmdomain, &opp_virt_dev);
+	if (ret)
 		goto opp_attach_err;
-	}
 
 	core->opp_pmdomain = *opp_virt_dev;
 	core->opp_dl_venus = device_link_add(dev, core->opp_pmdomain,
@@ -824,13 +820,11 @@ skip_pmdomains:
 					     DL_FLAG_STATELESS);
 	if (!core->opp_dl_venus) {
 		ret = -ENODEV;
-		goto opp_dl_add_err;
+		goto opp_attach_err;
 	}
 
 	return 0;
 
-opp_dl_add_err:
-	dev_pm_opp_detach_genpd(core->opp_table);
 opp_attach_err:
 	for (i = 0; i < res->vcodec_pmdomains_num; i++) {
 		if (IS_ERR_OR_NULL(core->pmdomains[i]))
@@ -861,8 +855,6 @@ skip_pmdomains:
 
 	if (core->opp_dl_venus)
 		device_link_del(core->opp_dl_venus);
-
-	dev_pm_opp_detach_genpd(core->opp_table);
 }
 
 static int core_resets_reset(struct venus_core *core)
@@ -941,45 +933,33 @@ static int core_get_v4(struct venus_core *core)
 	if (legacy_binding)
 		return 0;
 
-	core->opp_table = dev_pm_opp_set_clkname(dev, "core");
-	if (IS_ERR(core->opp_table))
-		return PTR_ERR(core->opp_table);
+	ret = devm_pm_opp_set_clkname(dev, "core");
+	if (ret)
+		return ret;
 
 	if (core->res->opp_pmdomain) {
-		ret = dev_pm_opp_of_add_table(dev);
+		ret = devm_pm_opp_of_add_table(dev);
 		if (!ret) {
 			core->has_opp_table = true;
 		} else if (ret != -ENODEV) {
 			dev_err(dev, "invalid OPP table in device tree\n");
-			dev_pm_opp_put_clkname(core->opp_table);
 			return ret;
 		}
 	}
 
 	ret = vcodec_domains_get(core);
-	if (ret) {
-		if (core->has_opp_table)
-			dev_pm_opp_of_remove_table(dev);
-		dev_pm_opp_put_clkname(core->opp_table);
+	if (ret)
 		return ret;
-	}
 
 	return 0;
 }
 
 static void core_put_v4(struct venus_core *core)
 {
-	struct device *dev = core->dev;
-
 	if (legacy_binding)
 		return;
 
 	vcodec_domains_put(core);
-
-	if (core->has_opp_table)
-		dev_pm_opp_of_remove_table(dev);
-	dev_pm_opp_put_clkname(core->opp_table);
-
 }
 
 static int core_power_v4(struct venus_core *core, int on)
-- 
GitLab


From 51bb3989c2a1c49b8cebdb753a0ab28d5a546b52 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 10 Feb 2021 23:57:20 +0100
Subject: [PATCH 2019/3804] media: venus: hfi_cmds.h: Replace one-element array
 with flexible-array member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having
a dynamically sized set of trailing elements in a structure. Kernel code
should always use “flexible array members”[1] for these cases. The older
style of one-element or zero-length arrays should no longer be used[2].

Use flexible-array member in struct hfi_sys_set_property_pkt instead of
one-element array.

Also, this helps with the ongoing efforts to enable -Warray-bounds and
fix the following warnings:

drivers/media/platform/qcom/venus/hfi_cmds.c: In function ‘pkt_sys_coverage_config’:
drivers/media/platform/qcom/venus/hfi_cmds.c:57:11: warning: array subscript 1 is above array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds]
   57 |  pkt->data[1] = mode;
      |  ~~~~~~~~~^~~

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.9/process/deprecated.html#zero-length-and-one-element-arrays

Link: https://github.com/KSPP/linux/issues/79
Link: https://github.com/KSPP/linux/issues/109
Build-tested-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/lkml/602416da.iZqae7Dbk7nyl6OY%25lkp@intel.com/

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/hfi_cmds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/qcom/venus/hfi_cmds.h b/drivers/media/platform/qcom/venus/hfi_cmds.h
index 83705e237f1cf..327ed90a27888 100644
--- a/drivers/media/platform/qcom/venus/hfi_cmds.h
+++ b/drivers/media/platform/qcom/venus/hfi_cmds.h
@@ -68,7 +68,7 @@ struct hfi_sys_release_resource_pkt {
 struct hfi_sys_set_property_pkt {
 	struct hfi_pkt_hdr hdr;
 	u32 num_properties;
-	u32 data[1];
+	u32 data[];
 };
 
 struct hfi_sys_get_property_pkt {
-- 
GitLab


From 3cfe5815ce0ee87f4979787cc7af23404a02edc1 Mon Sep 17 00:00:00 2001
From: Dikshita Agarwal <dikshita@codeaurora.org>
Date: Mon, 12 Apr 2021 08:58:43 +0200
Subject: [PATCH 2020/3804] media: venus: Enable low power setting for encoder

Set the FW to run in low power for encoder
to accommodate more session without losing much on quality.

Signed-off-by: Dikshita Agarwal <dikshita@codeaurora.org>
Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/core.h      |   6 +
 drivers/media/platform/qcom/venus/helpers.c   |   2 +
 .../media/platform/qcom/venus/hfi_helper.h    |  10 +-
 .../media/platform/qcom/venus/hfi_platform.c  |  16 +++
 .../media/platform/qcom/venus/hfi_platform.h  |   4 +
 .../platform/qcom/venus/hfi_platform_v4.c     |  28 +++--
 .../platform/qcom/venus/hfi_platform_v6.c     |  28 +++--
 .../media/platform/qcom/venus/pm_helpers.c    | 108 +++++++++++++++---
 8 files changed, 167 insertions(+), 35 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h
index 56054c3db8caf..8df2d497d7067 100644
--- a/drivers/media/platform/qcom/venus/core.h
+++ b/drivers/media/platform/qcom/venus/core.h
@@ -292,6 +292,7 @@ struct clock_data {
 	unsigned long freq;
 	unsigned long vpp_freq;
 	unsigned long vsp_freq;
+	unsigned long low_power_freq;
 };
 
 #define to_venus_buffer(ptr)	container_of(ptr, struct venus_buffer, vb)
@@ -315,6 +316,10 @@ struct venus_ts_metadata {
 	struct v4l2_timecode tc;
 };
 
+enum venus_inst_modes {
+	VENUS_LOW_POWER = BIT(0),
+};
+
 /**
  * struct venus_inst - holds per instance parameters
  *
@@ -444,6 +449,7 @@ struct venus_inst {
 	unsigned int pic_struct;
 	bool next_buf_last;
 	bool drain_active;
+	enum venus_inst_modes flags;
 };
 
 #define IS_V1(core)	((core)->res->hfi_version == HFI_VERSION_1XX)
diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index b813d6dba4817..b691215a3bf2d 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -1627,6 +1627,8 @@ int venus_helper_session_init(struct venus_inst *inst)
 								  session_type);
 	inst->clk_data.vsp_freq = hfi_platform_get_codec_vsp_freq(version, codec,
 								  session_type);
+	inst->clk_data.low_power_freq = hfi_platform_get_codec_lp_freq(version, codec,
+								       session_type);
 
 	return 0;
 }
diff --git a/drivers/media/platform/qcom/venus/hfi_helper.h b/drivers/media/platform/qcom/venus/hfi_helper.h
index 63cd347a62da0..b0a9beb4163c4 100644
--- a/drivers/media/platform/qcom/venus/hfi_helper.h
+++ b/drivers/media/platform/qcom/venus/hfi_helper.h
@@ -415,9 +415,6 @@
 #define HFI_BUFFER_MODE_RING			0x1000002
 #define HFI_BUFFER_MODE_DYNAMIC			0x1000003
 
-#define HFI_VENC_PERFMODE_MAX_QUALITY		0x1
-#define HFI_VENC_PERFMODE_POWER_SAVE		0x2
-
 /*
  * HFI_PROPERTY_SYS_COMMON_START
  * HFI_DOMAIN_BASE_COMMON + HFI_ARCH_COMMON_OFFSET + 0x0000
@@ -848,6 +845,13 @@ struct hfi_framesize {
 	u32 height;
 };
 
+#define HFI_VENC_PERFMODE_MAX_QUALITY		0x1
+#define HFI_VENC_PERFMODE_POWER_SAVE		0x2
+
+struct hfi_perf_mode {
+	u32 video_perf_mode;
+};
+
 #define VIDC_CORE_ID_DEFAULT	0
 #define VIDC_CORE_ID_1		1
 #define VIDC_CORE_ID_2		2
diff --git a/drivers/media/platform/qcom/venus/hfi_platform.c b/drivers/media/platform/qcom/venus/hfi_platform.c
index 8f47804e973f7..f5b4e1f4764fc 100644
--- a/drivers/media/platform/qcom/venus/hfi_platform.c
+++ b/drivers/media/platform/qcom/venus/hfi_platform.c
@@ -50,6 +50,22 @@ hfi_platform_get_codec_vsp_freq(enum hfi_version version, u32 codec, u32 session
 	return freq;
 }
 
+unsigned long
+hfi_platform_get_codec_lp_freq(enum hfi_version version, u32 codec, u32 session_type)
+{
+	const struct hfi_platform *plat;
+	unsigned long freq = 0;
+
+	plat = hfi_platform_get(version);
+	if (!plat)
+		return 0;
+
+	if (plat->codec_lp_freq)
+		freq = plat->codec_lp_freq(session_type, codec);
+
+	return freq;
+}
+
 u8 hfi_platform_num_vpp_pipes(enum hfi_version version)
 {
 	const struct hfi_platform *plat;
diff --git a/drivers/media/platform/qcom/venus/hfi_platform.h b/drivers/media/platform/qcom/venus/hfi_platform.h
index 3819bb2b36bdf..2dbe608c53af5 100644
--- a/drivers/media/platform/qcom/venus/hfi_platform.h
+++ b/drivers/media/platform/qcom/venus/hfi_platform.h
@@ -43,11 +43,13 @@ struct hfi_platform_codec_freq_data {
 	u32 session_type;
 	unsigned long vpp_freq;
 	unsigned long vsp_freq;
+	unsigned long low_power_freq;
 };
 
 struct hfi_platform {
 	unsigned long (*codec_vpp_freq)(u32 session_type, u32 codec);
 	unsigned long (*codec_vsp_freq)(u32 session_type, u32 codec);
+	unsigned long (*codec_lp_freq)(u32 session_type, u32 codec);
 	void (*codecs)(u32 *enc_codecs, u32 *dec_codecs, u32 *count);
 	const struct hfi_plat_caps *(*capabilities)(unsigned int *entries);
 	u8 (*num_vpp_pipes)(void);
@@ -63,5 +65,7 @@ unsigned long hfi_platform_get_codec_vpp_freq(enum hfi_version version, u32 code
 					      u32 session_type);
 unsigned long hfi_platform_get_codec_vsp_freq(enum hfi_version version, u32 codec,
 					      u32 session_type);
+unsigned long hfi_platform_get_codec_lp_freq(enum hfi_version version, u32 codec,
+					     u32 session_type);
 u8 hfi_platform_num_vpp_pipes(enum hfi_version version);
 #endif
diff --git a/drivers/media/platform/qcom/venus/hfi_platform_v4.c b/drivers/media/platform/qcom/venus/hfi_platform_v4.c
index 3848bb6d74084..3f7f5277a50e1 100644
--- a/drivers/media/platform/qcom/venus/hfi_platform_v4.c
+++ b/drivers/media/platform/qcom/venus/hfi_platform_v4.c
@@ -262,14 +262,14 @@ static void get_codecs(u32 *enc_codecs, u32 *dec_codecs, u32 *count)
 }
 
 static const struct hfi_platform_codec_freq_data codec_freq_data[] =  {
-	{ V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10 },
-	{ V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10 },
-	{ V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10 },
-	{ V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10 },
-	{ V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10 },
-	{ V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10 },
-	{ V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10 },
-	{ V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10 },
+	{ V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 10, 320 },
+	{ V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 10, 320 },
+	{ V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 10, 320 },
+	{ V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 10, 200 },
+	{ V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 10, 200 },
+	{ V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 10, 200 },
+	{ V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 10, 200 },
+	{ V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 10, 200 },
 };
 
 static const struct hfi_platform_codec_freq_data *
@@ -311,9 +311,21 @@ static unsigned long codec_vsp_freq(u32 session_type, u32 codec)
 	return 0;
 }
 
+static unsigned long codec_lp_freq(u32 session_type, u32 codec)
+{
+	const struct hfi_platform_codec_freq_data *data;
+
+	data = get_codec_freq_data(session_type, codec);
+	if (data)
+		return data->low_power_freq;
+
+	return 0;
+}
+
 const struct hfi_platform hfi_plat_v4 = {
 	.codec_vpp_freq = codec_vpp_freq,
 	.codec_vsp_freq = codec_vsp_freq,
+	.codec_lp_freq = codec_lp_freq,
 	.codecs = get_codecs,
 	.capabilities = get_capabilities,
 };
diff --git a/drivers/media/platform/qcom/venus/hfi_platform_v6.c b/drivers/media/platform/qcom/venus/hfi_platform_v6.c
index dd1a03911b6cf..d8243b22568ae 100644
--- a/drivers/media/platform/qcom/venus/hfi_platform_v6.c
+++ b/drivers/media/platform/qcom/venus/hfi_platform_v6.c
@@ -262,14 +262,14 @@ static void get_codecs(u32 *enc_codecs, u32 *dec_codecs, u32 *count)
 }
 
 static const struct hfi_platform_codec_freq_data codec_freq_data[] = {
-	{ V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 25 },
-	{ V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 25 },
-	{ V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 60 },
-	{ V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 25 },
-	{ V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 25 },
-	{ V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 25 },
-	{ V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 60 },
-	{ V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 60 },
+	{ V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_ENC, 675, 25, 320 },
+	{ V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_ENC, 675, 25, 320 },
+	{ V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_ENC, 675, 60, 320 },
+	{ V4L2_PIX_FMT_MPEG2, VIDC_SESSION_TYPE_DEC, 200, 25, 200 },
+	{ V4L2_PIX_FMT_H264, VIDC_SESSION_TYPE_DEC, 200, 25, 200 },
+	{ V4L2_PIX_FMT_HEVC, VIDC_SESSION_TYPE_DEC, 200, 25, 200 },
+	{ V4L2_PIX_FMT_VP8, VIDC_SESSION_TYPE_DEC, 200, 60, 200 },
+	{ V4L2_PIX_FMT_VP9, VIDC_SESSION_TYPE_DEC, 200, 60, 200 },
 };
 
 static const struct hfi_platform_codec_freq_data *
@@ -311,6 +311,17 @@ static unsigned long codec_vsp_freq(u32 session_type, u32 codec)
 	return 0;
 }
 
+static unsigned long codec_lp_freq(u32 session_type, u32 codec)
+{
+	const struct hfi_platform_codec_freq_data *data;
+
+	data = get_codec_freq_data(session_type, codec);
+	if (data)
+		return data->low_power_freq;
+
+	return 0;
+}
+
 static u8 num_vpp_pipes(void)
 {
 	return 4;
@@ -319,6 +330,7 @@ static u8 num_vpp_pipes(void)
 const struct hfi_platform hfi_plat_v6 = {
 	.codec_vpp_freq = codec_vpp_freq,
 	.codec_vsp_freq = codec_vsp_freq,
+	.codec_lp_freq = codec_lp_freq,
 	.codecs = get_codecs,
 	.capabilities = get_capabilities,
 	.num_vpp_pipes = num_vpp_pipes,
diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c
index fc204e4046aa9..3e2345eb47f7c 100644
--- a/drivers/media/platform/qcom/venus/pm_helpers.c
+++ b/drivers/media/platform/qcom/venus/pm_helpers.c
@@ -523,8 +523,50 @@ static int poweron_coreid(struct venus_core *core, unsigned int coreid_mask)
 	return 0;
 }
 
+static inline int power_save_mode_enable(struct venus_inst *inst,
+					 bool enable)
+{
+	struct venc_controls *enc_ctr = &inst->controls.enc;
+	const u32 ptype = HFI_PROPERTY_CONFIG_VENC_PERF_MODE;
+	u32 venc_mode;
+	int ret = 0;
+
+	if (inst->session_type != VIDC_SESSION_TYPE_ENC)
+		return 0;
+
+	if (enc_ctr->bitrate_mode == V4L2_MPEG_VIDEO_BITRATE_MODE_CQ)
+		enable = false;
+
+	venc_mode = enable ? HFI_VENC_PERFMODE_POWER_SAVE :
+		HFI_VENC_PERFMODE_MAX_QUALITY;
+
+	ret = hfi_session_set_property(inst, ptype, &venc_mode);
+	if (ret)
+		return ret;
+
+	inst->flags = enable ? inst->flags | VENUS_LOW_POWER :
+		inst->flags & ~VENUS_LOW_POWER;
+
+	return ret;
+}
+
+static int move_core_to_power_save_mode(struct venus_core *core,
+					u32 core_id)
+{
+	struct venus_inst *inst = NULL;
+
+	mutex_lock(&core->lock);
+	list_for_each_entry(inst, &core->instances, list) {
+		if (inst->clk_data.core_id == core_id &&
+		    inst->session_type == VIDC_SESSION_TYPE_ENC)
+			power_save_mode_enable(inst, true);
+	}
+	mutex_unlock(&core->lock);
+	return 0;
+}
+
 static void
-min_loaded_core(struct venus_inst *inst, u32 *min_coreid, u32 *min_load)
+min_loaded_core(struct venus_inst *inst, u32 *min_coreid, u32 *min_load, bool low_power)
 {
 	u32 mbs_per_sec, load, core1_load = 0, core2_load = 0;
 	u32 cores_max = core_num_max(inst);
@@ -542,7 +584,14 @@ min_loaded_core(struct venus_inst *inst, u32 *min_coreid, u32 *min_load)
 		if (inst_pos->state != INST_START)
 			continue;
 
-		vpp_freq = inst_pos->clk_data.vpp_freq;
+		if (inst->session_type == VIDC_SESSION_TYPE_DEC)
+			vpp_freq = inst_pos->clk_data.vpp_freq;
+		else if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+			vpp_freq = low_power ? inst_pos->clk_data.vpp_freq :
+				inst_pos->clk_data.low_power_freq;
+		else
+			continue;
+
 		coreid = inst_pos->clk_data.core_id;
 
 		mbs_per_sec = load_per_instance(inst_pos);
@@ -574,9 +623,11 @@ static int decide_core(struct venus_inst *inst)
 {
 	const u32 ptype = HFI_PROPERTY_CONFIG_VIDEOCORES_USAGE;
 	struct venus_core *core = inst->core;
-	u32 min_coreid, min_load, inst_load;
+	u32 min_coreid, min_load, cur_inst_load;
+	u32 min_lp_coreid, min_lp_load, cur_inst_lp_load;
 	struct hfi_videocores_usage_type cu;
 	unsigned long max_freq;
+	int ret = 0;
 
 	if (legacy_binding) {
 		if (inst->session_type == VIDC_SESSION_TYPE_DEC)
@@ -590,23 +641,43 @@ static int decide_core(struct venus_inst *inst)
 	if (inst->clk_data.core_id != VIDC_CORE_ID_DEFAULT)
 		return 0;
 
-	inst_load = load_per_instance(inst);
-	inst_load *= inst->clk_data.vpp_freq;
-	max_freq = core->res->freq_tbl[0].freq;
+	cur_inst_load = load_per_instance(inst);
+	cur_inst_load *= inst->clk_data.vpp_freq;
+	/*TODO : divide this inst->load by work_route */
 
-	min_loaded_core(inst, &min_coreid, &min_load);
+	cur_inst_lp_load = load_per_instance(inst);
+	cur_inst_lp_load *= inst->clk_data.low_power_freq;
+	/*TODO : divide this inst->load by work_route */
 
-	if ((inst_load + min_load) > max_freq) {
-		dev_warn(core->dev, "HW is overloaded, needed: %u max: %lu\n",
-			 inst_load, max_freq);
+	max_freq = core->res->freq_tbl[0].freq;
+
+	min_loaded_core(inst, &min_coreid, &min_load, false);
+	min_loaded_core(inst, &min_lp_coreid, &min_lp_load, true);
+
+	if (cur_inst_load + min_load <= max_freq) {
+		inst->clk_data.core_id = min_coreid;
+		cu.video_core_enable_mask = min_coreid;
+	} else if (cur_inst_lp_load + min_load <= max_freq) {
+		/* Move current instance to LP and return */
+		inst->clk_data.core_id = min_coreid;
+		cu.video_core_enable_mask = min_coreid;
+		power_save_mode_enable(inst, true);
+	} else if (cur_inst_lp_load + min_lp_load <= max_freq) {
+		/* Move all instances to LP mode and return */
+		inst->clk_data.core_id = min_lp_coreid;
+		cu.video_core_enable_mask = min_lp_coreid;
+		move_core_to_power_save_mode(core, min_lp_coreid);
+	} else {
+		dev_warn(core->dev, "HW can't support this load");
 		return -EINVAL;
 	}
 
-	inst->clk_data.core_id = min_coreid;
-	cu.video_core_enable_mask = min_coreid;
-
 done:
-	return hfi_session_set_property(inst, ptype, &cu);
+	ret = hfi_session_set_property(inst, ptype, &cu);
+	if (ret)
+		return ret;
+
+	return ret;
 }
 
 static int acquire_core(struct venus_inst *inst)
@@ -1005,7 +1076,7 @@ static int core_power_v4(struct venus_core *core, int on)
 static unsigned long calculate_inst_freq(struct venus_inst *inst,
 					 unsigned long filled_len)
 {
-	unsigned long vpp_freq = 0, vsp_freq = 0;
+	unsigned long vpp_freq_per_mb = 0, vpp_freq = 0, vsp_freq = 0;
 	u32 fps = (u32)inst->fps;
 	u32 mbs_per_sec;
 
@@ -1014,7 +1085,12 @@ static unsigned long calculate_inst_freq(struct venus_inst *inst,
 	if (inst->state != INST_START)
 		return 0;
 
-	vpp_freq = mbs_per_sec * inst->clk_data.vpp_freq;
+	if (inst->session_type == VIDC_SESSION_TYPE_ENC)
+		vpp_freq_per_mb = inst->flags & VENUS_LOW_POWER ?
+			inst->clk_data.low_power_freq :
+			inst->clk_data.vpp_freq;
+
+	vpp_freq = mbs_per_sec * vpp_freq_per_mb;
 	/* 21 / 20 is overhead factor */
 	vpp_freq += vpp_freq / 20;
 	vsp_freq = mbs_per_sec * inst->clk_data.vsp_freq;
-- 
GitLab


From 6fc46680520f38af8425a447de5e0f84106512eb Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Mon, 10 May 2021 13:57:52 +0200
Subject: [PATCH 2021/3804] media: venus: helpers: Delete an unneeded bool
 conversion

The result of an expression consisting of a single relational operator is
already of the bool type and does not need to be evaluated explicitly.

No functional change.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/helpers.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/helpers.c b/drivers/media/platform/qcom/venus/helpers.c
index b691215a3bf2d..1fe6d463dc993 100644
--- a/drivers/media/platform/qcom/venus/helpers.c
+++ b/drivers/media/platform/qcom/venus/helpers.c
@@ -595,8 +595,7 @@ static int platform_get_bufreq(struct venus_inst *inst, u32 buftype,
 		params.dec.is_secondary_output =
 			inst->opb_buftype == HFI_BUFFER_OUTPUT2;
 		params.dec.is_interlaced =
-			inst->pic_struct != HFI_INTERLACE_FRAME_PROGRESSIVE ?
-				true : false;
+			inst->pic_struct != HFI_INTERLACE_FRAME_PROGRESSIVE;
 	} else {
 		params.width = inst->out_width;
 		params.height = inst->out_height;
-- 
GitLab


From 83df8dfd57be041669e6dc365caf1d5f1b2791b8 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Tue, 2 Mar 2021 15:42:35 +0100
Subject: [PATCH 2022/3804] media: dt-bindings: media: Document RDA5807 FM
 radio bindings

Add documentation for the devicetree bindings of the RDA5807 FM radio
I2C chip from Unisoc.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../bindings/media/i2c/rda,rda5807.yaml       | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/i2c/rda,rda5807.yaml

diff --git a/Documentation/devicetree/bindings/media/i2c/rda,rda5807.yaml b/Documentation/devicetree/bindings/media/i2c/rda,rda5807.yaml
new file mode 100644
index 0000000000000..f50e54a722ebe
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/rda,rda5807.yaml
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/i2c/rda,rda5807.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Unisoc Communications RDA5807 FM radio receiver
+
+maintainers:
+  - Paul Cercueil <paul@crapouillou.net>
+
+properties:
+  compatible:
+    enum:
+      - rda,rda5807
+
+  reg:
+    description: I2C address.
+    maxItems: 1
+
+  power-supply: true
+
+  rda,lnan:
+    description: Use LNAN input port.
+    type: boolean
+
+  rda,lnap:
+    description: Use LNAP input port.
+    type: boolean
+
+  rda,analog-out:
+    description: Enable analog audio output.
+    type: boolean
+
+  rda,i2s-out:
+    description: Enable I2S digital audio output.
+    type: boolean
+
+  rda,lna-microamp:
+    description: LNA working current, in micro-amperes.
+    default: 2500
+    enum: [1800, 2100, 2500, 3000]
+
+required:
+  - compatible
+  - reg
+  - power-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c0 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      radio@11 {
+        compatible = "rda,rda5807";
+        reg = <0x11>;
+
+        power-supply = <&ldo6>;
+
+        rda,lnan;
+        rda,lnap;
+        rda,analog-out;
+      };
+    };
-- 
GitLab


From 90c3493e4d9e2e1450b5d3ffd314ff350f5132a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Fri, 12 Mar 2021 14:03:30 +0100
Subject: [PATCH 2023/3804] media: dt-bindings: media: renesas,vin: Add
 r8a77961 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the compatible string for M3-W+ (r8a77961) to the list of supported
SoCs.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/devicetree/bindings/media/renesas,vin.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/media/renesas,vin.yaml b/Documentation/devicetree/bindings/media/renesas,vin.yaml
index 5ba06b0f030b4..39bb6db2fb32d 100644
--- a/Documentation/devicetree/bindings/media/renesas,vin.yaml
+++ b/Documentation/devicetree/bindings/media/renesas,vin.yaml
@@ -46,6 +46,7 @@ properties:
               - renesas,vin-r8a7779  # R-Car H1
               - renesas,vin-r8a7795  # R-Car H3
               - renesas,vin-r8a7796  # R-Car M3-W
+              - renesas,vin-r8a77961 # R-Car M3-W+
               - renesas,vin-r8a77965 # R-Car M3-N
               - renesas,vin-r8a77970 # R-Car V3M
               - renesas,vin-r8a77980 # R-Car V3H
-- 
GitLab


From be6cdcf2c9c97c5a702adb95520d0268c8ecc1ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Fri, 12 Mar 2021 14:04:21 +0100
Subject: [PATCH 2024/3804] media: dt-bindings: media: renesas,csi2: Add
 r8a77961 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the compatible string for M3-W+ (r8a77961) to the list of supported
SoCs.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/devicetree/bindings/media/renesas,csi2.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/media/renesas,csi2.yaml b/Documentation/devicetree/bindings/media/renesas,csi2.yaml
index 20396f1be9993..23703b767f5b6 100644
--- a/Documentation/devicetree/bindings/media/renesas,csi2.yaml
+++ b/Documentation/devicetree/bindings/media/renesas,csi2.yaml
@@ -25,6 +25,7 @@ properties:
           - renesas,r8a774e1-csi2 # RZ/G2H
           - renesas,r8a7795-csi2  # R-Car H3
           - renesas,r8a7796-csi2  # R-Car M3-W
+          - renesas,r8a77961-csi2 # R-Car M3-W+
           - renesas,r8a77965-csi2 # R-Car M3-N
           - renesas,r8a77970-csi2 # R-Car V3M
           - renesas,r8a77980-csi2 # R-Car V3H
-- 
GitLab


From 1710eb913bdcda3917f44d383c32de6bdabfc836 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 27 May 2021 21:54:42 +0200
Subject: [PATCH 2025/3804] netfilter: nft_ct: skip expectations for confirmed
 conntrack

nft_ct_expect_obj_eval() calls nf_ct_ext_add() for a confirmed
conntrack entry. However, nf_ct_ext_add() can only be called for
!nf_ct_is_confirmed().

[ 1825.349056] WARNING: CPU: 0 PID: 1279 at net/netfilter/nf_conntrack_extend.c:48 nf_ct_xt_add+0x18e/0x1a0 [nf_conntrack]
[ 1825.351391] RIP: 0010:nf_ct_ext_add+0x18e/0x1a0 [nf_conntrack]
[ 1825.351493] Code: 41 5c 41 5d 41 5e 41 5f c3 41 bc 0a 00 00 00 e9 15 ff ff ff ba 09 00 00 00 31 f6 4c 89 ff e8 69 6c 3d e9 eb 96 45 31 ed eb cd <0f> 0b e9 b1 fe ff ff e8 86 79 14 e9 eb bf 0f 1f 40 00 0f 1f 44 00
[ 1825.351721] RSP: 0018:ffffc90002e1f1e8 EFLAGS: 00010202
[ 1825.351790] RAX: 000000000000000e RBX: ffff88814f5783c0 RCX: ffffffffc0e4f887
[ 1825.351881] RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffff88814f578440
[ 1825.351971] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff88814f578447
[ 1825.352060] R10: ffffed1029eaf088 R11: 0000000000000001 R12: ffff88814f578440
[ 1825.352150] R13: ffff8882053f3a00 R14: 0000000000000000 R15: 0000000000000a20
[ 1825.352240] FS:  00007f992261c900(0000) GS:ffff889faec00000(0000) knlGS:0000000000000000
[ 1825.352343] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1825.352417] CR2: 000056070a4d1158 CR3: 000000015efe0000 CR4: 0000000000350ee0
[ 1825.352508] Call Trace:
[ 1825.352544]  nf_ct_helper_ext_add+0x10/0x60 [nf_conntrack]
[ 1825.352641]  nft_ct_expect_obj_eval+0x1b8/0x1e0 [nft_ct]
[ 1825.352716]  nft_do_chain+0x232/0x850 [nf_tables]

Add the ct helper extension only for unconfirmed conntrack. Skip rule
evaluation if the ct helper extension does not exist. Thus, you can
only create expectations from the first packet.

It should be possible to remove this limitation by adding a new action
to attach a generic ct helper to the first packet. Then, use this ct
helper extension from follow up packets to create the ct expectation.

While at it, add a missing check to skip the template conntrack too
and remove check for IPCT_UNTRACK which is implicit to !ct.

Fixes: 857b46027d6f ("netfilter: nft_ct: add ct expectations support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_ct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 0592a94560843..337e22d8b40b1 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1217,7 +1217,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
 	struct nf_conn *ct;
 
 	ct = nf_ct_get(pkt->skb, &ctinfo);
-	if (!ct || ctinfo == IP_CT_UNTRACKED) {
+	if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct)) {
 		regs->verdict.code = NFT_BREAK;
 		return;
 	}
-- 
GitLab


From 8971ee8b087750a23f3cd4dc55bff2d0303fd267 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 28 May 2021 13:45:16 +0200
Subject: [PATCH 2026/3804] netfilter: nfnetlink_cthelper: hit EBUSY on updates
 if size mismatches

The private helper data size cannot be updated. However, updates that
contain NFCTH_PRIV_DATA_LEN might bogusly hit EBUSY even if the size is
the same.

Fixes: 12f7a505331e ("netfilter: add user-space connection tracking helper infrastructure")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cthelper.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 322ac5dd54022..752b10cae5242 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -380,10 +380,14 @@ static int
 nfnl_cthelper_update(const struct nlattr * const tb[],
 		     struct nf_conntrack_helper *helper)
 {
+	u32 size;
 	int ret;
 
-	if (tb[NFCTH_PRIV_DATA_LEN])
-		return -EBUSY;
+	if (tb[NFCTH_PRIV_DATA_LEN]) {
+		size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+		if (size != helper->data_len)
+			return -EBUSY;
+	}
 
 	if (tb[NFCTH_POLICY]) {
 		ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
-- 
GitLab


From 14480e8df8b511bb904ad79b61bc0b6c29f989a2 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 7 Apr 2021 08:24:39 +0200
Subject: [PATCH 2027/3804] media: camss: move to use request_irq by
 IRQF_NO_AUTOEN flag

disable_irq() after request_irq() still has a time gap in which
interrupts can come. request_irq() with IRQF_NO_AUTOEN flag will
disable IRQ auto-enable because of requesting.

this patch is made base on "add IRQF_NO_AUTOEN for request_irq" which
is being merged: https://lore.kernel.org/patchwork/patch/1388765/

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Robert Foss <robert.foss@linaro.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/camss/camss-csid.c   | 5 ++---
 drivers/media/platform/qcom/camss/camss-csiphy.c | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/media/platform/qcom/camss/camss-csid.c b/drivers/media/platform/qcom/camss/camss-csid.c
index 7e2490ca1ad16..251f4c4afe196 100644
--- a/drivers/media/platform/qcom/camss/camss-csid.c
+++ b/drivers/media/platform/qcom/camss/camss-csid.c
@@ -581,14 +581,13 @@ int msm_csid_subdev_init(struct camss *camss, struct csid_device *csid,
 	snprintf(csid->irq_name, sizeof(csid->irq_name), "%s_%s%d",
 		 dev_name(dev), MSM_CSID_NAME, csid->id);
 	ret = devm_request_irq(dev, csid->irq, csid->ops->isr,
-			       IRQF_TRIGGER_RISING, csid->irq_name, csid);
+			       IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN,
+			       csid->irq_name, csid);
 	if (ret < 0) {
 		dev_err(dev, "request_irq failed: %d\n", ret);
 		return ret;
 	}
 
-	disable_irq(csid->irq);
-
 	/* Clocks */
 
 	csid->nclocks = 0;
diff --git a/drivers/media/platform/qcom/camss/camss-csiphy.c b/drivers/media/platform/qcom/camss/camss-csiphy.c
index b623e007aec67..35470cbaea863 100644
--- a/drivers/media/platform/qcom/camss/camss-csiphy.c
+++ b/drivers/media/platform/qcom/camss/camss-csiphy.c
@@ -617,14 +617,13 @@ int msm_csiphy_subdev_init(struct camss *camss,
 		 dev_name(dev), MSM_CSIPHY_NAME, csiphy->id);
 
 	ret = devm_request_irq(dev, csiphy->irq, csiphy->ops->isr,
-			       IRQF_TRIGGER_RISING, csiphy->irq_name, csiphy);
+			       IRQF_TRIGGER_RISING | IRQF_NO_AUTOEN,
+			       csiphy->irq_name, csiphy);
 	if (ret < 0) {
 		dev_err(dev, "request_irq failed: %d\n", ret);
 		return ret;
 	}
 
-	disable_irq(csiphy->irq);
-
 	/* Clocks */
 
 	csiphy->nclocks = 0;
-- 
GitLab


From a3a54bf4bddaecda8b5767209cfc703f0be2841d Mon Sep 17 00:00:00 2001
From: Tong Zhang <ztong0001@gmail.com>
Date: Thu, 29 Apr 2021 00:12:26 +0200
Subject: [PATCH 2028/3804] media: bt878: do not schedule tasklet when it is
 not setup

There is a problem with the tasklet in bt878. bt->tasklet is set by
dvb-bt8xx.ko, and bt878.ko can be loaded independently.
In this case if interrupt comes it may cause null-ptr-dereference.
To solve this issue, we check if the tasklet is actually set before
calling tasklet_schedule.

[    1.750438] bt878(0): irq FDSR FBUS risc_pc=
[    1.750728] BUG: kernel NULL pointer dereference, address: 0000000000000000
[    1.752969] RIP: 0010:0x0
[    1.757526] Call Trace:
[    1.757659]  <IRQ>
[    1.757770]  tasklet_action_common.isra.0+0x107/0x110
[    1.758041]  tasklet_action+0x22/0x30
[    1.758237]  __do_softirq+0xe0/0x29b
[    1.758430]  irq_exit_rcu+0xa4/0xb0
[    1.758618]  common_interrupt+0x8d/0xa0
[    1.758824]  </IRQ>

Signed-off-by: Tong Zhang <ztong0001@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/bt8xx/bt878.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/pci/bt8xx/bt878.c b/drivers/media/pci/bt8xx/bt878.c
index 78dd35c9b65d7..7ca309121fb53 100644
--- a/drivers/media/pci/bt8xx/bt878.c
+++ b/drivers/media/pci/bt8xx/bt878.c
@@ -300,7 +300,8 @@ static irqreturn_t bt878_irq(int irq, void *dev_id)
 		}
 		if (astat & BT878_ARISCI) {
 			bt->finished_block = (stat & BT878_ARISCS) >> 28;
-			tasklet_schedule(&bt->tasklet);
+			if (bt->tasklet.callback)
+				tasklet_schedule(&bt->tasklet);
 			break;
 		}
 		count++;
-- 
GitLab


From 19a0aa9b04c5ab9a063b6ceaf7211ee7d9a9d24d Mon Sep 17 00:00:00 2001
From: Mark Pearson <markpearson@lenovo.com>
Date: Mon, 31 May 2021 10:55:02 -0400
Subject: [PATCH 2029/3804] ASoC: AMD Renoir - add DMI entry for Lenovo 2020
 AMD platforms

More laptops identified where the AMD ACP bridge needs to be blocked
or the microphone will not work when connected to HDMI.

Use DMI to block the microphone PCM device for these platforms.

Suggested-by: Gabriel Craciunescu <nix.or.die@gmail.com>
Signed-off-by: Mark Pearson <markpearson@lenovo.com>
Link: https://lore.kernel.org/r/20210531145502.6079-1-markpearson@lenovo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/amd/renoir/rn-pci-acp3x.c | 35 +++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c
index 19438da5dfa5d..c9fb1c8fbf8c6 100644
--- a/sound/soc/amd/renoir/rn-pci-acp3x.c
+++ b/sound/soc/amd/renoir/rn-pci-acp3x.c
@@ -199,6 +199,41 @@ static const struct dmi_system_id rn_acp_quirk_table[] = {
 			DMI_EXACT_MATCH(DMI_BOARD_NAME, "20NLCTO1WW"),
 		}
 	},
+	{
+		/* Lenovo ThinkPad P14s Gen 1 (20Y1) */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_BOARD_NAME, "20Y1"),
+		}
+	},
+	{
+		/* Lenovo ThinkPad T14s Gen1 */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_BOARD_NAME, "20UH"),
+		}
+	},
+	{
+		/* Lenovo ThinkPad T14s Gen1 Campus */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_BOARD_NAME, "20UJ"),
+		}
+	},
+	{
+		/* Lenovo ThinkPad T14 Gen 1*/
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_BOARD_NAME, "20UD"),
+		}
+	},
+	{
+		/* Lenovo ThinkPad X13 Gen 1*/
+		.matches = {
+			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+			DMI_MATCH(DMI_BOARD_NAME, "20UF"),
+		}
+	},
 	{}
 };
 
-- 
GitLab


From bc537e65b09a05923f98a31920d1ab170e648dba Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 2 Jun 2021 08:45:58 +0300
Subject: [PATCH 2030/3804] regulator: bd718x7: Fix the BUCK7 voltage setting
 on BD71837

Changing the BD71837 voltages for other regulators except the first 4 BUCKs
should be forbidden when the regulator is enabled. There may be out-of-spec
voltage spikes if the voltage of these "non DVS" bucks is changed when
enabled. This restriction was accidentally removed when the LDO voltage
change was allowed for BD71847. (It was not noticed that the BD71837
BUCK7 used same voltage setting function as LDOs).

Additionally this bug causes incorrect voltage monitoring register access.
The voltage change function accidentally used for bd71837 BUCK7 is
intended to only handle LDO voltage changes. A BD71847 LDO specific
voltage monitoring disabling code gets executed on BD71837 and register
offsets are wrongly calculated as regulator is assumed to be an LDO.

Prevent the BD71837 BUCK7 voltage change when BUCK7 is enabled by using
the correct voltage setting operation.

Fixes: 9bcbabafa19b ("regulator: bd718x7: remove voltage change restriction from BD71847 LDOs")
Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/bd8c00931421fafa57e3fdf46557a83075b7cc17.1622610103.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd718x7-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c
index 8ff47ea522d69..5b6f4d3d1a146 100644
--- a/drivers/regulator/bd718x7-regulator.c
+++ b/drivers/regulator/bd718x7-regulator.c
@@ -364,7 +364,7 @@ BD718XX_OPS(bd71837_buck_regulator_ops, regulator_list_voltage_linear_range,
 	    NULL);
 
 BD718XX_OPS(bd71837_buck_regulator_nolinear_ops, regulator_list_voltage_table,
-	    regulator_map_voltage_ascend, bd718xx_set_voltage_sel_restricted,
+	    regulator_map_voltage_ascend, bd71837_set_voltage_sel_restricted,
 	    regulator_get_voltage_sel_regmap, regulator_set_voltage_time_sel,
 	    NULL);
 /*
-- 
GitLab


From f131767eefc47de2f8afb7950cdea78397997d66 Mon Sep 17 00:00:00 2001
From: zpershuai <zpershuai@gmail.com>
Date: Thu, 27 May 2021 18:20:57 +0800
Subject: [PATCH 2031/3804] spi: spi-zynq-qspi: Fix some wrong goto jumps &
 missing error code

In zynq_qspi_probe function, when enable the device clock is done,
the return of all the functions should goto the clk_dis_all label.

If num_cs is not right then this should return a negative error
code but currently it returns success.

Signed-off-by: zpershuai <zpershuai@gmail.com>
Link: https://lore.kernel.org/r/1622110857-21812-1-git-send-email-zpershuai@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-zynq-qspi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index 5a3d81c31d040..9262c6418463b 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -678,14 +678,14 @@ static int zynq_qspi_probe(struct platform_device *pdev)
 	xqspi->irq = platform_get_irq(pdev, 0);
 	if (xqspi->irq <= 0) {
 		ret = -ENXIO;
-		goto remove_master;
+		goto clk_dis_all;
 	}
 	ret = devm_request_irq(&pdev->dev, xqspi->irq, zynq_qspi_irq,
 			       0, pdev->name, xqspi);
 	if (ret != 0) {
 		ret = -ENXIO;
 		dev_err(&pdev->dev, "request_irq failed\n");
-		goto remove_master;
+		goto clk_dis_all;
 	}
 
 	ret = of_property_read_u32(np, "num-cs",
@@ -693,8 +693,9 @@ static int zynq_qspi_probe(struct platform_device *pdev)
 	if (ret < 0) {
 		ctlr->num_chipselect = 1;
 	} else if (num_cs > ZYNQ_QSPI_MAX_NUM_CS) {
+		ret = -EINVAL;
 		dev_err(&pdev->dev, "only 2 chip selects are available\n");
-		goto remove_master;
+		goto clk_dis_all;
 	} else {
 		ctlr->num_chipselect = num_cs;
 	}
-- 
GitLab


From a277a2622ca9609de09c18f660f0d10f1ddbb379 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 29 May 2021 19:52:26 +0800
Subject: [PATCH 2032/3804] regulator: core: Use DEVICE_ATTR_RO macro

Use DEVICE_ATTR_RO() helper instead of plain DEVICE_ATTR(),
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20210529115226.25376-1-yuehaibing@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c | 130 ++++++++++++++++++---------------------
 1 file changed, 60 insertions(+), 70 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f192bf19492ed..47f2d9a3707c9 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -591,8 +591,8 @@ regulator_get_suspend_state_check(struct regulator_dev *rdev, suspend_state_t st
 	return rstate;
 }
 
-static ssize_t regulator_uV_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t microvolts_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 	int uV;
@@ -605,16 +605,16 @@ static ssize_t regulator_uV_show(struct device *dev,
 		return uV;
 	return sprintf(buf, "%d\n", uV);
 }
-static DEVICE_ATTR(microvolts, 0444, regulator_uV_show, NULL);
+static DEVICE_ATTR_RO(microvolts);
 
-static ssize_t regulator_uA_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t microamps_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return sprintf(buf, "%d\n", _regulator_get_current_limit(rdev));
 }
-static DEVICE_ATTR(microamps, 0444, regulator_uA_show, NULL);
+static DEVICE_ATTR_RO(microamps);
 
 static ssize_t name_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
@@ -645,14 +645,14 @@ static ssize_t regulator_print_opmode(char *buf, int mode)
 	return sprintf(buf, "%s\n", regulator_opmode_to_str(mode));
 }
 
-static ssize_t regulator_opmode_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
+static ssize_t opmode_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return regulator_print_opmode(buf, _regulator_get_mode(rdev));
 }
-static DEVICE_ATTR(opmode, 0444, regulator_opmode_show, NULL);
+static DEVICE_ATTR_RO(opmode);
 
 static ssize_t regulator_print_state(char *buf, int state)
 {
@@ -664,8 +664,8 @@ static ssize_t regulator_print_state(char *buf, int state)
 		return sprintf(buf, "unknown\n");
 }
 
-static ssize_t regulator_state_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
+static ssize_t state_show(struct device *dev,
+			  struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 	ssize_t ret;
@@ -676,10 +676,10 @@ static ssize_t regulator_state_show(struct device *dev,
 
 	return ret;
 }
-static DEVICE_ATTR(state, 0444, regulator_state_show, NULL);
+static DEVICE_ATTR_RO(state);
 
-static ssize_t regulator_status_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 	int status;
@@ -723,10 +723,10 @@ static ssize_t regulator_status_show(struct device *dev,
 
 	return sprintf(buf, "%s\n", label);
 }
-static DEVICE_ATTR(status, 0444, regulator_status_show, NULL);
+static DEVICE_ATTR_RO(status);
 
-static ssize_t regulator_min_uA_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
+static ssize_t min_microamps_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
@@ -735,10 +735,10 @@ static ssize_t regulator_min_uA_show(struct device *dev,
 
 	return sprintf(buf, "%d\n", rdev->constraints->min_uA);
 }
-static DEVICE_ATTR(min_microamps, 0444, regulator_min_uA_show, NULL);
+static DEVICE_ATTR_RO(min_microamps);
 
-static ssize_t regulator_max_uA_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
+static ssize_t max_microamps_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
@@ -747,10 +747,10 @@ static ssize_t regulator_max_uA_show(struct device *dev,
 
 	return sprintf(buf, "%d\n", rdev->constraints->max_uA);
 }
-static DEVICE_ATTR(max_microamps, 0444, regulator_max_uA_show, NULL);
+static DEVICE_ATTR_RO(max_microamps);
 
-static ssize_t regulator_min_uV_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
+static ssize_t min_microvolts_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
@@ -759,10 +759,10 @@ static ssize_t regulator_min_uV_show(struct device *dev,
 
 	return sprintf(buf, "%d\n", rdev->constraints->min_uV);
 }
-static DEVICE_ATTR(min_microvolts, 0444, regulator_min_uV_show, NULL);
+static DEVICE_ATTR_RO(min_microvolts);
 
-static ssize_t regulator_max_uV_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
+static ssize_t max_microvolts_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
@@ -771,10 +771,10 @@ static ssize_t regulator_max_uV_show(struct device *dev,
 
 	return sprintf(buf, "%d\n", rdev->constraints->max_uV);
 }
-static DEVICE_ATTR(max_microvolts, 0444, regulator_max_uV_show, NULL);
+static DEVICE_ATTR_RO(max_microvolts);
 
-static ssize_t regulator_total_uA_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
+static ssize_t requested_microamps_show(struct device *dev,
+					struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 	struct regulator *regulator;
@@ -788,7 +788,7 @@ static ssize_t regulator_total_uA_show(struct device *dev,
 	regulator_unlock(rdev);
 	return sprintf(buf, "%d\n", uA);
 }
-static DEVICE_ATTR(requested_microamps, 0444, regulator_total_uA_show, NULL);
+static DEVICE_ATTR_RO(requested_microamps);
 
 static ssize_t num_users_show(struct device *dev, struct device_attribute *attr,
 			      char *buf)
@@ -813,104 +813,95 @@ static ssize_t type_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(type);
 
-static ssize_t regulator_suspend_mem_uV_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t suspend_mem_microvolts_show(struct device *dev,
+					   struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return sprintf(buf, "%d\n", rdev->constraints->state_mem.uV);
 }
-static DEVICE_ATTR(suspend_mem_microvolts, 0444,
-		regulator_suspend_mem_uV_show, NULL);
+static DEVICE_ATTR_RO(suspend_mem_microvolts);
 
-static ssize_t regulator_suspend_disk_uV_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t suspend_disk_microvolts_show(struct device *dev,
+					    struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return sprintf(buf, "%d\n", rdev->constraints->state_disk.uV);
 }
-static DEVICE_ATTR(suspend_disk_microvolts, 0444,
-		regulator_suspend_disk_uV_show, NULL);
+static DEVICE_ATTR_RO(suspend_disk_microvolts);
 
-static ssize_t regulator_suspend_standby_uV_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t suspend_standby_microvolts_show(struct device *dev,
+					       struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return sprintf(buf, "%d\n", rdev->constraints->state_standby.uV);
 }
-static DEVICE_ATTR(suspend_standby_microvolts, 0444,
-		regulator_suspend_standby_uV_show, NULL);
+static DEVICE_ATTR_RO(suspend_standby_microvolts);
 
-static ssize_t regulator_suspend_mem_mode_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t suspend_mem_mode_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return regulator_print_opmode(buf,
 		rdev->constraints->state_mem.mode);
 }
-static DEVICE_ATTR(suspend_mem_mode, 0444,
-		regulator_suspend_mem_mode_show, NULL);
+static DEVICE_ATTR_RO(suspend_mem_mode);
 
-static ssize_t regulator_suspend_disk_mode_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t suspend_disk_mode_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return regulator_print_opmode(buf,
 		rdev->constraints->state_disk.mode);
 }
-static DEVICE_ATTR(suspend_disk_mode, 0444,
-		regulator_suspend_disk_mode_show, NULL);
+static DEVICE_ATTR_RO(suspend_disk_mode);
 
-static ssize_t regulator_suspend_standby_mode_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
+static ssize_t suspend_standby_mode_show(struct device *dev,
+					 struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return regulator_print_opmode(buf,
 		rdev->constraints->state_standby.mode);
 }
-static DEVICE_ATTR(suspend_standby_mode, 0444,
-		regulator_suspend_standby_mode_show, NULL);
+static DEVICE_ATTR_RO(suspend_standby_mode);
 
-static ssize_t regulator_suspend_mem_state_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
+static ssize_t suspend_mem_state_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return regulator_print_state(buf,
 			rdev->constraints->state_mem.enabled);
 }
-static DEVICE_ATTR(suspend_mem_state, 0444,
-		regulator_suspend_mem_state_show, NULL);
+static DEVICE_ATTR_RO(suspend_mem_state);
 
-static ssize_t regulator_suspend_disk_state_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
+static ssize_t suspend_disk_state_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return regulator_print_state(buf,
 			rdev->constraints->state_disk.enabled);
 }
-static DEVICE_ATTR(suspend_disk_state, 0444,
-		regulator_suspend_disk_state_show, NULL);
+static DEVICE_ATTR_RO(suspend_disk_state);
 
-static ssize_t regulator_suspend_standby_state_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
+static ssize_t suspend_standby_state_show(struct device *dev,
+					  struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 
 	return regulator_print_state(buf,
 			rdev->constraints->state_standby.enabled);
 }
-static DEVICE_ATTR(suspend_standby_state, 0444,
-		regulator_suspend_standby_state_show, NULL);
+static DEVICE_ATTR_RO(suspend_standby_state);
 
-static ssize_t regulator_bypass_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
+static ssize_t bypass_show(struct device *dev,
+			   struct device_attribute *attr, char *buf)
 {
 	struct regulator_dev *rdev = dev_get_drvdata(dev);
 	const char *report;
@@ -928,8 +919,7 @@ static ssize_t regulator_bypass_show(struct device *dev,
 
 	return sprintf(buf, "%s\n", report);
 }
-static DEVICE_ATTR(bypass, 0444,
-		   regulator_bypass_show, NULL);
+static DEVICE_ATTR_RO(bypass);
 
 /* Calculate the new optimum regulator operating mode based on the new total
  * consumer load. All locks held by caller
-- 
GitLab


From bce18e52c866ff6ded13ac8ac37e9271f786c005 Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Wed, 2 Jun 2021 13:31:45 +0800
Subject: [PATCH 2033/3804] regulator: rt6160: Add DT binding document for
 Richtek RT6160

Add DT binding document for Richtek RT6160 voltage regulator.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/1622611906-2403-1-git-send-email-u0084500@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/richtek,rt6160-regulator.yaml   | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/richtek,rt6160-regulator.yaml

diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt6160-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt6160-regulator.yaml
new file mode 100644
index 0000000000000..0534b0d683593
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/richtek,rt6160-regulator.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/richtek,rt6160-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RT6160 BuckBoost converter
+
+maintainers:
+  - ChiYuan Huang <cy_huang@richtek.com>
+
+description: |
+  The RT6160 is a high-efficiency buck-boost converter that can provide
+  up to 3A output current from 2025mV to 5200mV. And it support the wide
+  input voltage range from 2200mV to 5500mV.
+
+  Datasheet is available at
+  https://www.richtek.com/assets/product_file/RT6160A/DS6160A-00.pdf
+
+allOf:
+  - $ref: regulator.yaml#
+
+properties:
+  compatible:
+    enum:
+      - richtek,rt6160
+
+  reg:
+    maxItems: 1
+
+  enable-gpios:
+    description: A connection of the 'enable' gpio line.
+    maxItems: 1
+
+  richtek,vsel-active-low:
+    description: |
+      Used to indicate the 'vsel' pin active level. if not specified, use
+      high active level as the default.
+    type: boolean
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      rt6160@75 {
+        compatible = "richtek,rt6160";
+        reg = <0x75>;
+        enable-gpios = <&gpio26 2 0>;
+        regulator-name = "rt6160-buckboost";
+        regulator-min-microvolt = <2025000>;
+        regulator-max-microvolt = <5200000>;
+      };
+    };
-- 
GitLab


From de20b747c5836ffc6768914b95d7617139fac4f4 Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Wed, 2 Jun 2021 13:31:46 +0800
Subject: [PATCH 2034/3804] regulator: rt6160: Add support for Richtek RT6160

Add support for Richtek RT6160 voltage regulator. It can provide up
to 3A output current within the adjustable voltage from 2025mV
to 5200mV. It integrate a buckboost converter to support wide input
voltage range from 2200mV to 5500mV.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Link: https://lore.kernel.org/r/1622611906-2403-2-git-send-email-u0084500@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig            |  11 +
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/rt6160-regulator.c | 332 +++++++++++++++++++++++++++
 3 files changed, 344 insertions(+)
 create mode 100644 drivers/regulator/rt6160-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index bc02ea3ea2eff..50c608e6d0065 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1020,6 +1020,17 @@ config REGULATOR_RT5033
 	  RT5033 PMIC. The device supports multiple regulators like
 	  current source, LDO and Buck.
 
+config REGULATOR_RT6160
+	tristate "Richtek RT6160 BuckBoost voltage regulator"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This adds support for voltage regulator in Richtek RT6160.
+	  This device automatically change voltage output mode from
+	  Buck or Boost. The mode transistion depend on the input source voltage.
+	  The wide output range is from 2025mV to 5200mV and can be used on most
+	  common application scenario.
+
 config REGULATOR_RTMV20
 	tristate "RTMV20 Laser Diode Regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 6a5d55e209d30..4aa5533bd8ee5 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -123,6 +123,7 @@ obj-$(CONFIG_REGULATOR_ROHM)	+= rohm-regulator.o
 obj-$(CONFIG_REGULATOR_RT4801)	+= rt4801-regulator.o
 obj-$(CONFIG_REGULATOR_RT4831)	+= rt4831-regulator.o
 obj-$(CONFIG_REGULATOR_RT5033)	+= rt5033-regulator.o
+obj-$(CONFIG_REGULATOR_RT6160)	+= rt6160-regulator.o
 obj-$(CONFIG_REGULATOR_RTMV20)	+= rtmv20-regulator.o
 obj-$(CONFIG_REGULATOR_S2MPA01) += s2mpa01.o
 obj-$(CONFIG_REGULATOR_S2MPS11) += s2mps11.o
diff --git a/drivers/regulator/rt6160-regulator.c b/drivers/regulator/rt6160-regulator.c
new file mode 100644
index 0000000000000..4588ae0748a54
--- /dev/null
+++ b/drivers/regulator/rt6160-regulator.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/of_regulator.h>
+
+#define RT6160_MODE_AUTO	0
+#define RT6160_MODE_FPWM	1
+
+#define RT6160_REG_CNTL		0x01
+#define RT6160_REG_STATUS	0x02
+#define RT6160_REG_DEVID	0x03
+#define RT6160_REG_VSELL	0x04
+#define RT6160_REG_VSELH	0x05
+#define RT6160_NUM_REGS		(RT6160_REG_VSELH + 1)
+
+#define RT6160_FPWM_MASK	BIT(3)
+#define RT6160_RAMPRATE_MASK	GENMASK(1, 0)
+#define RT6160_VID_MASK		GENMASK(7, 4)
+#define RT6160_VSEL_MASK	GENMASK(6, 0)
+#define RT6160_HDSTAT_MASK	BIT(4)
+#define RT6160_UVSTAT_MASK	BIT(3)
+#define RT6160_OCSTAT_MASK	BIT(2)
+#define RT6160_TSDSTAT_MASK	BIT(1)
+#define RT6160_PGSTAT_MASK	BIT(0)
+
+#define RT6160_VENDOR_ID	0xA0
+#define RT6160_VOUT_MINUV	2025000
+#define RT6160_VOUT_MAXUV	5200000
+#define RT6160_VOUT_STPUV	25000
+#define RT6160_N_VOUTS		((RT6160_VOUT_MAXUV - RT6160_VOUT_MINUV) / RT6160_VOUT_STPUV + 1)
+
+#define RT6160_I2CRDY_TIMEUS	100
+
+struct rt6160_priv {
+	struct regulator_desc desc;
+	struct gpio_desc *enable_gpio;
+	struct regmap *regmap;
+	bool vsel_active_low;
+	bool enable_state;
+};
+
+static int rt6160_enable(struct regulator_dev *rdev)
+{
+	struct rt6160_priv *priv = rdev_get_drvdata(rdev);
+
+	if (!priv->enable_gpio)
+		return 0;
+
+	gpiod_set_value_cansleep(priv->enable_gpio, 1);
+	priv->enable_state = true;
+
+	usleep_range(RT6160_I2CRDY_TIMEUS, RT6160_I2CRDY_TIMEUS + 100);
+
+	regcache_cache_only(priv->regmap, false);
+	return regcache_sync(priv->regmap);
+}
+
+static int rt6160_disable(struct regulator_dev *rdev)
+{
+	struct rt6160_priv *priv = rdev_get_drvdata(rdev);
+
+	if (!priv->enable_gpio)
+		return -EINVAL;
+
+	/* Mark regcache as dirty and cache only before HW disabled */
+	regcache_cache_only(priv->regmap, true);
+	regcache_mark_dirty(priv->regmap);
+
+	priv->enable_state = false;
+	gpiod_set_value_cansleep(priv->enable_gpio, 0);
+
+	return 0;
+
+}
+
+static int rt6160_is_enabled(struct regulator_dev *rdev)
+{
+	struct rt6160_priv *priv = rdev_get_drvdata(rdev);
+
+	return priv->enable_state ? 1 : 0;
+}
+
+static int rt6160_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	struct regmap *regmap = rdev_get_regmap(rdev);
+	unsigned int mode_val;
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		mode_val = RT6160_FPWM_MASK;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		mode_val = 0;
+		break;
+	default:
+		dev_err(&rdev->dev, "mode not supported\n");
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(regmap, RT6160_REG_CNTL, RT6160_FPWM_MASK, mode_val);
+}
+
+static unsigned int rt6160_get_mode(struct regulator_dev *rdev)
+{
+	struct regmap *regmap = rdev_get_regmap(rdev);
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(regmap, RT6160_REG_CNTL, &val);
+	if (ret)
+		return ret;
+
+	if (val & RT6160_FPWM_MASK)
+		return REGULATOR_MODE_FAST;
+
+	return REGULATOR_MODE_NORMAL;
+}
+
+static int rt6160_set_suspend_voltage(struct regulator_dev *rdev, int uV)
+{
+	struct rt6160_priv *priv = rdev_get_drvdata(rdev);
+	struct regmap *regmap = rdev_get_regmap(rdev);
+	unsigned int reg = RT6160_REG_VSELH;
+	int vsel;
+
+	vsel = regulator_map_voltage_linear(rdev, uV, uV);
+	if (vsel < 0)
+		return vsel;
+
+	if (priv->vsel_active_low)
+		reg = RT6160_REG_VSELL;
+
+	return regmap_update_bits(regmap, reg, RT6160_VSEL_MASK, vsel);
+}
+
+static int rt6160_set_ramp_delay(struct regulator_dev *rdev, int target)
+{
+	struct regmap *regmap = rdev_get_regmap(rdev);
+	const int ramp_tables[] = { 1000, 2500, 5000, 10000 };
+	unsigned int i, sel;
+
+	/* Find closest larger or equal */
+	for (i = 0; i < ARRAY_SIZE(ramp_tables); i++) {
+		sel = i;
+
+		/* If ramp delay is equal to 0, directly set ramp speed to fastest */
+		if (target == 0) {
+			sel = ARRAY_SIZE(ramp_tables) - 1;
+			break;
+		}
+
+		if (target <= ramp_tables[i])
+			break;
+	}
+
+	sel <<= ffs(RT6160_RAMPRATE_MASK) - 1;
+
+	return regmap_update_bits(regmap, RT6160_REG_CNTL, RT6160_RAMPRATE_MASK, sel);
+}
+
+static int rt6160_get_error_flags(struct regulator_dev *rdev, unsigned int *flags)
+{
+	struct regmap *regmap = rdev_get_regmap(rdev);
+	unsigned int val, events = 0;
+	int ret;
+
+	ret = regmap_read(regmap, RT6160_REG_STATUS, &val);
+	if (ret)
+		return ret;
+
+	if (val & (RT6160_HDSTAT_MASK | RT6160_TSDSTAT_MASK))
+		events |= REGULATOR_ERROR_OVER_TEMP;
+
+	if (val & RT6160_UVSTAT_MASK)
+		events |= REGULATOR_ERROR_UNDER_VOLTAGE;
+
+	if (val & RT6160_OCSTAT_MASK)
+		events |= REGULATOR_ERROR_OVER_CURRENT;
+
+	if (val & RT6160_PGSTAT_MASK)
+		events |= REGULATOR_ERROR_FAIL;
+
+	*flags = events;
+	return 0;
+}
+
+static const struct regulator_ops rt6160_regulator_ops = {
+	.list_voltage = regulator_list_voltage_linear,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+
+	.enable = rt6160_enable,
+	.disable = rt6160_disable,
+	.is_enabled = rt6160_is_enabled,
+
+	.set_mode = rt6160_set_mode,
+	.get_mode = rt6160_get_mode,
+	.set_suspend_voltage = rt6160_set_suspend_voltage,
+	.set_ramp_delay = rt6160_set_ramp_delay,
+	.get_error_flags = rt6160_get_error_flags,
+};
+
+static unsigned int rt6160_of_map_mode(unsigned int mode)
+{
+	switch (mode) {
+	case RT6160_MODE_FPWM:
+		return REGULATOR_MODE_FAST;
+	case RT6160_MODE_AUTO:
+		return REGULATOR_MODE_NORMAL;
+	}
+
+	return REGULATOR_MODE_INVALID;
+}
+
+static bool rt6160_is_accessible_reg(struct device *dev, unsigned int reg)
+{
+	if (reg >= RT6160_REG_CNTL && reg <= RT6160_REG_VSELH)
+		return true;
+	return false;
+}
+
+static bool rt6160_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	if (reg == RT6160_REG_STATUS)
+		return true;
+	return false;
+}
+
+static const struct regmap_config rt6160_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = RT6160_REG_VSELH,
+	.num_reg_defaults_raw = RT6160_NUM_REGS,
+	.cache_type = REGCACHE_FLAT,
+
+	.writeable_reg = rt6160_is_accessible_reg,
+	.readable_reg = rt6160_is_accessible_reg,
+	.volatile_reg = rt6160_is_volatile_reg,
+};
+
+static int rt6160_probe(struct i2c_client *i2c)
+{
+	struct rt6160_priv *priv;
+	struct regulator_config regulator_cfg = {};
+	struct regulator_dev *rdev;
+	unsigned int devid;
+	int ret;
+
+	priv = devm_kzalloc(&i2c->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->vsel_active_low = device_property_present(&i2c->dev, "richtek,vsel-active-low");
+
+	priv->enable_gpio = devm_gpiod_get_optional(&i2c->dev, "enable", GPIOD_OUT_HIGH);
+	if (IS_ERR(priv->enable_gpio)) {
+		dev_err(&i2c->dev, "Failed to get 'enable' gpio\n");
+		return PTR_ERR(priv->enable_gpio);
+	}
+	priv->enable_state = true;
+
+	usleep_range(RT6160_I2CRDY_TIMEUS, RT6160_I2CRDY_TIMEUS + 100);
+
+	priv->regmap = devm_regmap_init_i2c(i2c, &rt6160_regmap_config);
+	if (IS_ERR(priv->regmap)) {
+		ret = PTR_ERR(priv->regmap);
+		dev_err(&i2c->dev, "Failed to init regmap (%d)\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(priv->regmap, RT6160_REG_DEVID, &devid);
+	if (ret)
+		return ret;
+
+	if ((devid & RT6160_VID_MASK) != RT6160_VENDOR_ID) {
+		dev_err(&i2c->dev, "VID not correct [0x%02x]\n", devid);
+		return -ENODEV;
+	}
+
+	priv->desc.name = "rt6160-buckboost";
+	priv->desc.type = REGULATOR_VOLTAGE;
+	priv->desc.owner = THIS_MODULE;
+	priv->desc.min_uV = RT6160_VOUT_MINUV;
+	priv->desc.uV_step = RT6160_VOUT_STPUV;
+	priv->desc.vsel_reg = RT6160_REG_VSELH;
+	priv->desc.vsel_mask = RT6160_VSEL_MASK;
+	priv->desc.n_voltages = RT6160_N_VOUTS;
+	priv->desc.of_map_mode = rt6160_of_map_mode;
+	priv->desc.ops = &rt6160_regulator_ops;
+	if (priv->vsel_active_low)
+		priv->desc.vsel_reg = RT6160_REG_VSELL;
+
+	regulator_cfg.dev = &i2c->dev;
+	regulator_cfg.of_node = i2c->dev.of_node;
+	regulator_cfg.regmap = priv->regmap;
+	regulator_cfg.driver_data = priv;
+	regulator_cfg.init_data = of_get_regulator_init_data(&i2c->dev, i2c->dev.of_node,
+							     &priv->desc);
+
+	rdev = devm_regulator_register(&i2c->dev, &priv->desc, &regulator_cfg);
+	if (IS_ERR(rdev)) {
+		dev_err(&i2c->dev, "Failed to register regulator\n");
+		return PTR_ERR(rdev);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id __maybe_unused rt6160_of_match_table[] = {
+	{ .compatible = "richtek,rt6160", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rt6160_of_match_table);
+
+static struct i2c_driver rt6160_driver = {
+	.driver = {
+		.name = "rt6160",
+		.of_match_table = rt6160_of_match_table,
+	},
+	.probe_new = rt6160_probe,
+};
+module_i2c_driver(rt6160_driver);
+
+MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From ba499a50ce5846dd6f7a6df92c1f01d4201b5cce Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 2 Jun 2021 16:05:26 +0800
Subject: [PATCH 2035/3804] regulator: userspace-consumer: use
 DEVICE_ATTR_RO/RW macro

Use DEVICE_ATTR_RO/RW macro helper instead of plain DEVICE_ATTR, which
makes the code a bit shorter and easier to read.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20210602080526.11117-1-thunder.leizhen@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/userspace-consumer.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/regulator/userspace-consumer.c b/drivers/regulator/userspace-consumer.c
index 8e3b5a67cfd84..8ca28664776eb 100644
--- a/drivers/regulator/userspace-consumer.c
+++ b/drivers/regulator/userspace-consumer.c
@@ -29,15 +29,15 @@ struct userspace_consumer_data {
 	struct regulator_bulk_data *supplies;
 };
 
-static ssize_t reg_show_name(struct device *dev,
-			  struct device_attribute *attr, char *buf)
+static ssize_t name_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
 {
 	struct userspace_consumer_data *data = dev_get_drvdata(dev);
 
 	return sprintf(buf, "%s\n", data->name);
 }
 
-static ssize_t reg_show_state(struct device *dev,
+static ssize_t state_show(struct device *dev,
 			  struct device_attribute *attr, char *buf)
 {
 	struct userspace_consumer_data *data = dev_get_drvdata(dev);
@@ -48,8 +48,8 @@ static ssize_t reg_show_state(struct device *dev,
 	return sprintf(buf, "disabled\n");
 }
 
-static ssize_t reg_set_state(struct device *dev, struct device_attribute *attr,
-			 const char *buf, size_t count)
+static ssize_t state_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
 {
 	struct userspace_consumer_data *data = dev_get_drvdata(dev);
 	bool enabled;
@@ -87,8 +87,8 @@ static ssize_t reg_set_state(struct device *dev, struct device_attribute *attr,
 	return count;
 }
 
-static DEVICE_ATTR(name, 0444, reg_show_name, NULL);
-static DEVICE_ATTR(state, 0644, reg_show_state, reg_set_state);
+static DEVICE_ATTR_RO(name);
+static DEVICE_ATTR_RW(state);
 
 static struct attribute *attributes[] = {
 	&dev_attr_name.attr,
-- 
GitLab


From ec679bda639fe84b78d473526ae27c74dea383fb Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 27 May 2021 23:32:00 +0200
Subject: [PATCH 2036/3804] spi: bcm2835: Allow arbitrary number of slaves

Since commit 571e31fa60b3 ("spi: bcm2835: Cache CS register value for
->prepare_message()"), the number of slaves has been limited by a
compile-time constant.  This was necessitated by statically-sized
arrays in the driver private data which contain per-slave register
values.

As suggested by Mark, move those register values to a per-slave
controller_state which is allocated on ->setup and freed on ->cleanup.
The limitation on the number of slaves is thus lifted.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: Joe Burmeister <joe.burmeister@devtank.co.uk>
Cc: Phil Elwell <phil@raspberrypi.com>
Link: https://lore.kernel.org/r/a847c01f09400801e74e0630bf5a0197591554da.1622150204.git.lukas@wunner.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm2835.c | 204 ++++++++++++++++++++++----------------
 1 file changed, 119 insertions(+), 85 deletions(-)

diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index fe40626e45aa8..5f8771fe1a31d 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -68,7 +68,6 @@
 #define BCM2835_SPI_FIFO_SIZE		64
 #define BCM2835_SPI_FIFO_SIZE_3_4	48
 #define BCM2835_SPI_DMA_MIN_LENGTH	96
-#define BCM2835_SPI_NUM_CS		24  /* raise as necessary */
 #define BCM2835_SPI_MODE_BITS	(SPI_CPOL | SPI_CPHA | SPI_CS_HIGH \
 				| SPI_NO_CS | SPI_3WIRE)
 
@@ -96,8 +95,6 @@ MODULE_PARM_DESC(polling_limit_us,
  * @rx_prologue: bytes received without DMA if first RX sglist entry's
  *	length is not a multiple of 4 (to overcome hardware limitation)
  * @tx_spillover: whether @tx_prologue spills over to second TX sglist entry
- * @prepare_cs: precalculated CS register value for ->prepare_message()
- *	(uses slave-specific clock polarity and phase settings)
  * @debugfs_dir: the debugfs directory - neede to remove debugfs when
  *      unloading the module
  * @count_transfer_polling: count of how often polling mode is used
@@ -107,7 +104,7 @@ MODULE_PARM_DESC(polling_limit_us,
  *      These are counted as well in @count_transfer_polling and
  *      @count_transfer_irq
  * @count_transfer_dma: count how often dma mode is used
- * @chip_select: SPI slave currently selected
+ * @slv: SPI slave currently selected
  *	(used by bcm2835_spi_dma_tx_done() to write @clear_rx_cs)
  * @tx_dma_active: whether a TX DMA descriptor is in progress
  * @rx_dma_active: whether a RX DMA descriptor is in progress
@@ -115,11 +112,6 @@ MODULE_PARM_DESC(polling_limit_us,
  * @fill_tx_desc: preallocated TX DMA descriptor used for RX-only transfers
  *	(cyclically copies from zero page to TX FIFO)
  * @fill_tx_addr: bus address of zero page
- * @clear_rx_desc: preallocated RX DMA descriptor used for TX-only transfers
- *	(cyclically clears RX FIFO by writing @clear_rx_cs to CS register)
- * @clear_rx_addr: bus address of @clear_rx_cs
- * @clear_rx_cs: precalculated CS register value to clear RX FIFO
- *	(uses slave-specific clock polarity and phase settings)
  */
 struct bcm2835_spi {
 	void __iomem *regs;
@@ -134,7 +126,6 @@ struct bcm2835_spi {
 	int tx_prologue;
 	int rx_prologue;
 	unsigned int tx_spillover;
-	u32 prepare_cs[BCM2835_SPI_NUM_CS];
 
 	struct dentry *debugfs_dir;
 	u64 count_transfer_polling;
@@ -142,14 +133,28 @@ struct bcm2835_spi {
 	u64 count_transfer_irq_after_polling;
 	u64 count_transfer_dma;
 
-	u8 chip_select;
+	struct bcm2835_spidev *slv;
 	unsigned int tx_dma_active;
 	unsigned int rx_dma_active;
 	struct dma_async_tx_descriptor *fill_tx_desc;
 	dma_addr_t fill_tx_addr;
-	struct dma_async_tx_descriptor *clear_rx_desc[BCM2835_SPI_NUM_CS];
+};
+
+/**
+ * struct bcm2835_spidev - BCM2835 SPI slave
+ * @prepare_cs: precalculated CS register value for ->prepare_message()
+ *	(uses slave-specific clock polarity and phase settings)
+ * @clear_rx_desc: preallocated RX DMA descriptor used for TX-only transfers
+ *	(cyclically clears RX FIFO by writing @clear_rx_cs to CS register)
+ * @clear_rx_addr: bus address of @clear_rx_cs
+ * @clear_rx_cs: precalculated CS register value to clear RX FIFO
+ *	(uses slave-specific clock polarity and phase settings)
+ */
+struct bcm2835_spidev {
+	u32 prepare_cs;
+	struct dma_async_tx_descriptor *clear_rx_desc;
 	dma_addr_t clear_rx_addr;
-	u32 clear_rx_cs[BCM2835_SPI_NUM_CS] ____cacheline_aligned;
+	u32 clear_rx_cs ____cacheline_aligned;
 };
 
 #if defined(CONFIG_DEBUG_FS)
@@ -624,8 +629,7 @@ static void bcm2835_spi_dma_tx_done(void *data)
 
 	/* busy-wait for TX FIFO to empty */
 	while (!(bcm2835_rd(bs, BCM2835_SPI_CS) & BCM2835_SPI_CS_DONE))
-		bcm2835_wr(bs, BCM2835_SPI_CS,
-			   bs->clear_rx_cs[bs->chip_select]);
+		bcm2835_wr(bs, BCM2835_SPI_CS, bs->slv->clear_rx_cs);
 
 	bs->tx_dma_active = false;
 	smp_wmb();
@@ -646,18 +650,18 @@ static void bcm2835_spi_dma_tx_done(void *data)
 /**
  * bcm2835_spi_prepare_sg() - prepare and submit DMA descriptor for sglist
  * @ctlr: SPI master controller
- * @spi: SPI slave
  * @tfr: SPI transfer
  * @bs: BCM2835 SPI controller
+ * @slv: BCM2835 SPI slave
  * @is_tx: whether to submit DMA descriptor for TX or RX sglist
  *
  * Prepare and submit a DMA descriptor for the TX or RX sglist of @tfr.
  * Return 0 on success or a negative error number.
  */
 static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
-				  struct spi_device *spi,
 				  struct spi_transfer *tfr,
 				  struct bcm2835_spi *bs,
+				  struct bcm2835_spidev *slv,
 				  bool is_tx)
 {
 	struct dma_chan *chan;
@@ -697,7 +701,7 @@ static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
 	} else if (!tfr->rx_buf) {
 		desc->callback = bcm2835_spi_dma_tx_done;
 		desc->callback_param = ctlr;
-		bs->chip_select = spi->chip_select;
+		bs->slv = slv;
 	}
 
 	/* submit it to DMA-engine */
@@ -709,8 +713,8 @@ static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
 /**
  * bcm2835_spi_transfer_one_dma() - perform SPI transfer using DMA engine
  * @ctlr: SPI master controller
- * @spi: SPI slave
  * @tfr: SPI transfer
+ * @slv: BCM2835 SPI slave
  * @cs: CS register
  *
  * For *bidirectional* transfers (both tx_buf and rx_buf are non-%NULL), set up
@@ -754,8 +758,8 @@ static int bcm2835_spi_prepare_sg(struct spi_controller *ctlr,
  * performed at the end of an RX-only transfer.
  */
 static int bcm2835_spi_transfer_one_dma(struct spi_controller *ctlr,
-					struct spi_device *spi,
 					struct spi_transfer *tfr,
+					struct bcm2835_spidev *slv,
 					u32 cs)
 {
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
@@ -773,7 +777,7 @@ static int bcm2835_spi_transfer_one_dma(struct spi_controller *ctlr,
 
 	/* setup tx-DMA */
 	if (bs->tx_buf) {
-		ret = bcm2835_spi_prepare_sg(ctlr, spi, tfr, bs, true);
+		ret = bcm2835_spi_prepare_sg(ctlr, tfr, bs, slv, true);
 	} else {
 		cookie = dmaengine_submit(bs->fill_tx_desc);
 		ret = dma_submit_error(cookie);
@@ -799,9 +803,9 @@ static int bcm2835_spi_transfer_one_dma(struct spi_controller *ctlr,
 	 * this saves 10us or more.
 	 */
 	if (bs->rx_buf) {
-		ret = bcm2835_spi_prepare_sg(ctlr, spi, tfr, bs, false);
+		ret = bcm2835_spi_prepare_sg(ctlr, tfr, bs, slv, false);
 	} else {
-		cookie = dmaengine_submit(bs->clear_rx_desc[spi->chip_select]);
+		cookie = dmaengine_submit(slv->clear_rx_desc);
 		ret = dma_submit_error(cookie);
 	}
 	if (ret) {
@@ -850,8 +854,6 @@ static bool bcm2835_spi_can_dma(struct spi_controller *ctlr,
 static void bcm2835_dma_release(struct spi_controller *ctlr,
 				struct bcm2835_spi *bs)
 {
-	int i;
-
 	if (ctlr->dma_tx) {
 		dmaengine_terminate_sync(ctlr->dma_tx);
 
@@ -870,17 +872,6 @@ static void bcm2835_dma_release(struct spi_controller *ctlr,
 
 	if (ctlr->dma_rx) {
 		dmaengine_terminate_sync(ctlr->dma_rx);
-
-		for (i = 0; i < BCM2835_SPI_NUM_CS; i++)
-			if (bs->clear_rx_desc[i])
-				dmaengine_desc_free(bs->clear_rx_desc[i]);
-
-		if (bs->clear_rx_addr)
-			dma_unmap_single(ctlr->dma_rx->device->dev,
-					 bs->clear_rx_addr,
-					 sizeof(bs->clear_rx_cs),
-					 DMA_TO_DEVICE);
-
 		dma_release_channel(ctlr->dma_rx);
 		ctlr->dma_rx = NULL;
 	}
@@ -892,7 +883,7 @@ static int bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
 	struct dma_slave_config slave_config;
 	const __be32 *addr;
 	dma_addr_t dma_reg_base;
-	int ret, i;
+	int ret;
 
 	/* base address in dma-space */
 	addr = of_get_address(ctlr->dev.of_node, 0, NULL, NULL);
@@ -972,35 +963,6 @@ static int bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
 	if (ret)
 		goto err_config;
 
-	bs->clear_rx_addr = dma_map_single(ctlr->dma_rx->device->dev,
-					   bs->clear_rx_cs,
-					   sizeof(bs->clear_rx_cs),
-					   DMA_TO_DEVICE);
-	if (dma_mapping_error(ctlr->dma_rx->device->dev, bs->clear_rx_addr)) {
-		dev_err(dev, "cannot map clear_rx_cs - not using DMA mode\n");
-		bs->clear_rx_addr = 0;
-		ret = -ENOMEM;
-		goto err_release;
-	}
-
-	for (i = 0; i < BCM2835_SPI_NUM_CS; i++) {
-		bs->clear_rx_desc[i] = dmaengine_prep_dma_cyclic(ctlr->dma_rx,
-					   bs->clear_rx_addr + i * sizeof(u32),
-					   sizeof(u32), 0,
-					   DMA_MEM_TO_DEV, 0);
-		if (!bs->clear_rx_desc[i]) {
-			dev_err(dev, "cannot prepare clear_rx_desc - not using DMA mode\n");
-			ret = -ENOMEM;
-			goto err_release;
-		}
-
-		ret = dmaengine_desc_set_reuse(bs->clear_rx_desc[i]);
-		if (ret) {
-			dev_err(dev, "cannot reuse clear_rx_desc - not using DMA mode\n");
-			goto err_release;
-		}
-	}
-
 	/* all went well, so set can_dma */
 	ctlr->can_dma = bcm2835_spi_can_dma;
 
@@ -1082,9 +1044,10 @@ static int bcm2835_spi_transfer_one(struct spi_controller *ctlr,
 				    struct spi_transfer *tfr)
 {
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
 	unsigned long spi_hz, clk_hz, cdiv;
 	unsigned long hz_per_byte, byte_limit;
-	u32 cs = bs->prepare_cs[spi->chip_select];
+	u32 cs = slv->prepare_cs;
 
 	/* set clock */
 	spi_hz = tfr->speed_hz;
@@ -1133,7 +1096,7 @@ static int bcm2835_spi_transfer_one(struct spi_controller *ctlr,
 	 * this 1 idle clock cycle pattern but runs the spi clock without gaps
 	 */
 	if (ctlr->can_dma && bcm2835_spi_can_dma(ctlr, spi, tfr))
-		return bcm2835_spi_transfer_one_dma(ctlr, spi, tfr, cs);
+		return bcm2835_spi_transfer_one_dma(ctlr, tfr, slv, cs);
 
 	/* run in interrupt-mode */
 	return bcm2835_spi_transfer_one_irq(ctlr, spi, tfr, cs, true);
@@ -1144,6 +1107,7 @@ static int bcm2835_spi_prepare_message(struct spi_controller *ctlr,
 {
 	struct spi_device *spi = msg->spi;
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
 	int ret;
 
 	if (ctlr->can_dma) {
@@ -1162,7 +1126,7 @@ static int bcm2835_spi_prepare_message(struct spi_controller *ctlr,
 	 * Set up clock polarity before spi_transfer_one_message() asserts
 	 * chip select to avoid a gratuitous clock signal edge.
 	 */
-	bcm2835_wr(bs, BCM2835_SPI_CS, bs->prepare_cs[spi->chip_select]);
+	bcm2835_wr(bs, BCM2835_SPI_CS, slv->prepare_cs);
 
 	return 0;
 }
@@ -1188,17 +1152,81 @@ static int chip_match_name(struct gpio_chip *chip, void *data)
 	return !strcmp(chip->label, data);
 }
 
+static void bcm2835_spi_cleanup(struct spi_device *spi)
+{
+	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
+	struct spi_controller *ctlr = spi->controller;
+
+	if (slv->clear_rx_desc)
+		dmaengine_desc_free(slv->clear_rx_desc);
+
+	if (slv->clear_rx_addr)
+		dma_unmap_single(ctlr->dma_rx->device->dev,
+				 slv->clear_rx_addr,
+				 sizeof(u32),
+				 DMA_TO_DEVICE);
+
+	kfree(slv);
+}
+
+static int bcm2835_spi_setup_dma(struct spi_controller *ctlr,
+				 struct spi_device *spi,
+				 struct bcm2835_spi *bs,
+				 struct bcm2835_spidev *slv)
+{
+	int ret;
+
+	if (!ctlr->dma_rx)
+		return 0;
+
+	slv->clear_rx_addr = dma_map_single(ctlr->dma_rx->device->dev,
+					    &slv->clear_rx_cs,
+					    sizeof(u32),
+					    DMA_TO_DEVICE);
+	if (dma_mapping_error(ctlr->dma_rx->device->dev, slv->clear_rx_addr)) {
+		dev_err(&spi->dev, "cannot map clear_rx_cs\n");
+		slv->clear_rx_addr = 0;
+		return -ENOMEM;
+	}
+
+	slv->clear_rx_desc = dmaengine_prep_dma_cyclic(ctlr->dma_rx,
+						       slv->clear_rx_addr,
+						       sizeof(u32), 0,
+						       DMA_MEM_TO_DEV, 0);
+	if (!slv->clear_rx_desc) {
+		dev_err(&spi->dev, "cannot prepare clear_rx_desc\n");
+		return -ENOMEM;
+	}
+
+	ret = dmaengine_desc_set_reuse(slv->clear_rx_desc);
+	if (ret) {
+		dev_err(&spi->dev, "cannot reuse clear_rx_desc\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 static int bcm2835_spi_setup(struct spi_device *spi)
 {
 	struct spi_controller *ctlr = spi->controller;
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+	struct bcm2835_spidev *slv = spi_get_ctldata(spi);
 	struct gpio_chip *chip;
+	int ret;
 	u32 cs;
 
-	if (spi->chip_select >= BCM2835_SPI_NUM_CS) {
-		dev_err(&spi->dev, "only %d chip-selects supported\n",
-			BCM2835_SPI_NUM_CS - 1);
-		return -EINVAL;
+	if (!slv) {
+		slv = kzalloc(ALIGN(sizeof(*slv), dma_get_cache_alignment()),
+			      GFP_KERNEL);
+		if (!slv)
+			return -ENOMEM;
+
+		spi_set_ctldata(spi, slv);
+
+		ret = bcm2835_spi_setup_dma(ctlr, spi, bs, slv);
+		if (ret)
+			goto err_cleanup;
 	}
 
 	/*
@@ -1212,20 +1240,19 @@ static int bcm2835_spi_setup(struct spi_device *spi)
 		cs |= BCM2835_SPI_CS_CPOL;
 	if (spi->mode & SPI_CPHA)
 		cs |= BCM2835_SPI_CS_CPHA;
-	bs->prepare_cs[spi->chip_select] = cs;
+	slv->prepare_cs = cs;
 
 	/*
 	 * Precalculate SPI slave's CS register value to clear RX FIFO
 	 * in case of a TX-only DMA transfer.
 	 */
 	if (ctlr->dma_rx) {
-		bs->clear_rx_cs[spi->chip_select] = cs |
-						    BCM2835_SPI_CS_TA |
-						    BCM2835_SPI_CS_DMAEN |
-						    BCM2835_SPI_CS_CLEAR_RX;
+		slv->clear_rx_cs = cs | BCM2835_SPI_CS_TA |
+					BCM2835_SPI_CS_DMAEN |
+					BCM2835_SPI_CS_CLEAR_RX;
 		dma_sync_single_for_device(ctlr->dma_rx->device->dev,
-					   bs->clear_rx_addr,
-					   sizeof(bs->clear_rx_cs),
+					   slv->clear_rx_addr,
+					   sizeof(u32),
 					   DMA_TO_DEVICE);
 	}
 
@@ -1247,7 +1274,8 @@ static int bcm2835_spi_setup(struct spi_device *spi)
 		 */
 		dev_err(&spi->dev,
 			"setup: only two native chip-selects are supported\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_cleanup;
 	}
 
 	/*
@@ -1268,14 +1296,20 @@ static int bcm2835_spi_setup(struct spi_device *spi)
 						  DRV_NAME,
 						  GPIO_LOOKUP_FLAGS_DEFAULT,
 						  GPIOD_OUT_LOW);
-	if (IS_ERR(spi->cs_gpiod))
-		return PTR_ERR(spi->cs_gpiod);
+	if (IS_ERR(spi->cs_gpiod)) {
+		ret = PTR_ERR(spi->cs_gpiod);
+		goto err_cleanup;
+	}
 
 	/* and set up the "mode" and level */
 	dev_info(&spi->dev, "setting up native-CS%i to use GPIO\n",
 		 spi->chip_select);
 
 	return 0;
+
+err_cleanup:
+	bcm2835_spi_cleanup(spi);
+	return ret;
 }
 
 static int bcm2835_spi_probe(struct platform_device *pdev)
@@ -1284,8 +1318,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 	struct bcm2835_spi *bs;
 	int err;
 
-	ctlr = devm_spi_alloc_master(&pdev->dev, ALIGN(sizeof(*bs),
-						  dma_get_cache_alignment()));
+	ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*bs));
 	if (!ctlr)
 		return -ENOMEM;
 
@@ -1296,6 +1329,7 @@ static int bcm2835_spi_probe(struct platform_device *pdev)
 	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
 	ctlr->num_chipselect = 3;
 	ctlr->setup = bcm2835_spi_setup;
+	ctlr->cleanup = bcm2835_spi_cleanup;
 	ctlr->transfer_one = bcm2835_spi_transfer_one;
 	ctlr->handle_err = bcm2835_spi_handle_err;
 	ctlr->prepare_message = bcm2835_spi_prepare_message;
-- 
GitLab


From ac5688637144644f06ed1f3c6d4dd8bb7db96020 Mon Sep 17 00:00:00 2001
From: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Date: Tue, 4 May 2021 20:32:49 +0200
Subject: [PATCH 2037/3804] media: em28xx: Fix possible memory leak of em28xx
 struct

The em28xx struct kref isn't being decreased after an error in the
em28xx_ir_init, leading to a possible memory leak.

A kref_put and em28xx_shutdown_buttons is added to the error handler code.

Signed-off-by: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/em28xx/em28xx-input.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/em28xx/em28xx-input.c b/drivers/media/usb/em28xx/em28xx-input.c
index 5aa15a7a49def..59529cbf9cd0b 100644
--- a/drivers/media/usb/em28xx/em28xx-input.c
+++ b/drivers/media/usb/em28xx/em28xx-input.c
@@ -720,7 +720,8 @@ static int em28xx_ir_init(struct em28xx *dev)
 			dev->board.has_ir_i2c = 0;
 			dev_warn(&dev->intf->dev,
 				 "No i2c IR remote control device found.\n");
-			return -ENODEV;
+			err = -ENODEV;
+			goto ref_put;
 		}
 	}
 
@@ -735,7 +736,7 @@ static int em28xx_ir_init(struct em28xx *dev)
 
 	ir = kzalloc(sizeof(*ir), GFP_KERNEL);
 	if (!ir)
-		return -ENOMEM;
+		goto ref_put;
 	rc = rc_allocate_device(RC_DRIVER_SCANCODE);
 	if (!rc)
 		goto error;
@@ -839,6 +840,9 @@ error:
 	dev->ir = NULL;
 	rc_free_device(rc);
 	kfree(ir);
+ref_put:
+	em28xx_shutdown_buttons(dev);
+	kref_put(&dev->ref, em28xx_free_device);
 	return err;
 }
 
-- 
GitLab


From ba1ed4ae760a81caf39f54232e089d95157a0dba Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Wed, 5 May 2021 14:23:45 +0200
Subject: [PATCH 2038/3804] media: rkvdec: Fix .buf_prepare

The driver should only set the payload on .buf_prepare if the
buffer is CAPTURE type. If an OUTPUT buffer has a zero bytesused
set by userspace then v4l2-core will set it to buffer length.

If we overwrite bytesused for OUTPUT buffers, too, then
vb2_get_plane_payload() will return incorrect value which might be then
written to hw registers by the driver in rkvdec-h264.c.

[Changed the comment and used V4L2_TYPE_IS_CAPTURE macro]

Fixes: cd33c830448ba ("media: rkvdec: Add the rkvdec driver")
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/rkvdec/rkvdec.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c
index 8c17615f3a7ab..7131156c1f2cf 100644
--- a/drivers/staging/media/rkvdec/rkvdec.c
+++ b/drivers/staging/media/rkvdec/rkvdec.c
@@ -481,7 +481,15 @@ static int rkvdec_buf_prepare(struct vb2_buffer *vb)
 		if (vb2_plane_size(vb, i) < sizeimage)
 			return -EINVAL;
 	}
-	vb2_set_plane_payload(vb, 0, f->fmt.pix_mp.plane_fmt[0].sizeimage);
+
+	/*
+	 * Buffer's bytesused must be written by driver for CAPTURE buffers.
+	 * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets
+	 * it to buffer length).
+	 */
+	if (V4L2_TYPE_IS_CAPTURE(vq->type))
+		vb2_set_plane_payload(vb, 0, f->fmt.pix_mp.plane_fmt[0].sizeimage);
+
 	return 0;
 }
 
-- 
GitLab


From 082aaecff35fbe1937531057911b1dd1fc6b496e Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Date: Wed, 5 May 2021 14:23:46 +0200
Subject: [PATCH 2039/3804] media: hantro: Fix .buf_prepare

The driver should only set the payload on .buf_prepare if the
buffer is CAPTURE type. If an OUTPUT buffer has a zero bytesused
set by userspace then v4l2-core will set it to buffer length.

If we overwrite bytesused for OUTPUT buffers, too, then
vb2_get_plane_payload() will return incorrect value which might be then
written to hw registers by the driver in hantro_g1_h264_dec.c.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_v4l2.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
index 1bc118e375a12..7ccc6405036ae 100644
--- a/drivers/staging/media/hantro/hantro_v4l2.c
+++ b/drivers/staging/media/hantro/hantro_v4l2.c
@@ -639,7 +639,14 @@ static int hantro_buf_prepare(struct vb2_buffer *vb)
 	ret = hantro_buf_plane_check(vb, pix_fmt);
 	if (ret)
 		return ret;
-	vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage);
+	/*
+	 * Buffer's bytesused must be written by driver for CAPTURE buffers.
+	 * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets
+	 * it to buffer length).
+	 */
+	if (V4L2_TYPE_IS_CAPTURE(vq->type))
+		vb2_set_plane_payload(vb, 0, pix_fmt->plane_fmt[0].sizeimage);
+
 	return 0;
 }
 
-- 
GitLab


From d84b9202d712309840f8b5abee0ed272506563bd Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Date: Wed, 5 May 2021 14:23:47 +0200
Subject: [PATCH 2040/3804] media: cedrus: Fix .buf_prepare

The driver should only set the payload on .buf_prepare if the
buffer is CAPTURE type. If an OUTPUT buffer has a zero bytesused
set by userspace then v4l2-core will set it to buffer length.

If we overwrite bytesused for OUTPUT buffers, too, then
vb2_get_plane_payload() will return incorrect value which might be then
written to hw registers by the driver in cedrus_h264.c or cedrus_vp8.c.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/sunxi/cedrus/cedrus_video.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
index 9ddd789d0b1f2..32c13ecb22d83 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
@@ -457,7 +457,13 @@ static int cedrus_buf_prepare(struct vb2_buffer *vb)
 	if (vb2_plane_size(vb, 0) < pix_fmt->sizeimage)
 		return -EINVAL;
 
-	vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage);
+	/*
+	 * Buffer's bytesused must be written by driver for CAPTURE buffers.
+	 * (for OUTPUT buffers, if userspace passes 0 bytesused, v4l2-core sets
+	 * it to buffer length).
+	 */
+	if (V4L2_TYPE_IS_CAPTURE(vq->type))
+		vb2_set_plane_payload(vb, 0, pix_fmt->sizeimage);
 
 	return 0;
 }
-- 
GitLab


From ef677df92e450b90688828a5e44b94c8dc156e62 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 6 May 2021 14:06:57 +0200
Subject: [PATCH 2041/3804] media: adv7842: support EDIDs up to 4 blocks

The adv7842 driver didn't support EDIDs of 3 or 4 blocks, even though the
hardware supports this.

It is a bit more complicated due to the fact that the adv7842 can expose
two EDIDs: one digital, one analog, for DVI-I connectors. In that case the
VGA_EDID_ENABLE bit is set and blocks 0 and 1 of the EDID eeprom are used
for the DVI-D part and block 2 is used for the DVI-A part of the DVI-I
connector.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/adv7842.c | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c
index ff10af757b997..78e61fe6f2f0c 100644
--- a/drivers/media/i2c/adv7842.c
+++ b/drivers/media/i2c/adv7842.c
@@ -98,12 +98,12 @@ struct adv7842_state {
 
 	v4l2_std_id norm;
 	struct {
-		u8 edid[256];
+		u8 edid[512];
 		u32 blocks;
 		u32 present;
 	} hdmi_edid;
 	struct {
-		u8 edid[256];
+		u8 edid[128];
 		u32 blocks;
 		u32 present;
 	} vga_edid;
@@ -720,6 +720,9 @@ static int edid_write_vga_segment(struct v4l2_subdev *sd)
 
 	v4l2_dbg(2, debug, sd, "%s: write EDID on VGA port\n", __func__);
 
+	if (!state->vga_edid.present)
+		return 0;
+
 	/* HPA disable on port A and B */
 	io_write_and_or(sd, 0x20, 0xcf, 0x00);
 
@@ -763,7 +766,7 @@ static int edid_write_hdmi_segment(struct v4l2_subdev *sd, u8 port)
 	struct adv7842_state *state = to_state(sd);
 	const u8 *edid = state->hdmi_edid.edid;
 	u32 blocks = state->hdmi_edid.blocks;
-	int spa_loc;
+	unsigned int spa_loc;
 	u16 pa, parent_pa;
 	int err = 0;
 	int i;
@@ -796,12 +799,14 @@ static int edid_write_hdmi_segment(struct v4l2_subdev *sd, u8 port)
 		pa = (edid[spa_loc] << 8) | edid[spa_loc + 1];
 	}
 
-	/* edid segment pointer '0' for HDMI ports */
-	rep_write_and_or(sd, 0x77, 0xef, 0x00);
 
-	for (i = 0; !err && i < blocks * 128; i += I2C_SMBUS_BLOCK_MAX)
+	for (i = 0; !err && i < blocks * 128; i += I2C_SMBUS_BLOCK_MAX) {
+		/* set edid segment pointer for HDMI ports */
+		if (i % 256 == 0)
+			rep_write_and_or(sd, 0x77, 0xef, i >= 256 ? 0x10 : 0x00);
 		err = i2c_smbus_write_i2c_block_data(state->i2c_edid, i,
 						     I2C_SMBUS_BLOCK_MAX, edid + i);
+	}
 	if (err)
 		return err;
 
@@ -2491,9 +2496,17 @@ static int adv7842_get_edid(struct v4l2_subdev *sd, struct v4l2_edid *edid)
 	return 0;
 }
 
+/*
+ * If the VGA_EDID_ENABLE bit is set (Repeater Map 0x7f, bit 7), then
+ * the first two blocks of the EDID are for the HDMI, and the first block
+ * of segment 1 (i.e. the third block of the EDID) is for VGA.
+ * So if a VGA EDID is installed, then the maximum size of the HDMI EDID
+ * is 2 blocks.
+ */
 static int adv7842_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *e)
 {
 	struct adv7842_state *state = to_state(sd);
+	unsigned int max_blocks = e->pad == ADV7842_EDID_PORT_VGA ? 1 : 4;
 	int err = 0;
 
 	memset(e->reserved, 0, sizeof(e->reserved));
@@ -2502,8 +2515,12 @@ static int adv7842_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *e)
 		return -EINVAL;
 	if (e->start_block != 0)
 		return -EINVAL;
-	if (e->blocks > 2) {
-		e->blocks = 2;
+	if (e->pad < ADV7842_EDID_PORT_VGA && state->vga_edid.blocks)
+		max_blocks = 2;
+	if (e->pad == ADV7842_EDID_PORT_VGA && state->hdmi_edid.blocks > 2)
+		return -EBUSY;
+	if (e->blocks > max_blocks) {
+		e->blocks = max_blocks;
 		return -E2BIG;
 	}
 
@@ -2514,7 +2531,7 @@ static int adv7842_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *e)
 
 	switch (e->pad) {
 	case ADV7842_EDID_PORT_VGA:
-		memset(&state->vga_edid.edid, 0, 256);
+		memset(&state->vga_edid.edid, 0, sizeof(state->vga_edid.edid));
 		state->vga_edid.blocks = e->blocks;
 		state->vga_edid.present = e->blocks ? 0x1 : 0x0;
 		if (e->blocks)
@@ -2523,7 +2540,7 @@ static int adv7842_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *e)
 		break;
 	case ADV7842_EDID_PORT_A:
 	case ADV7842_EDID_PORT_B:
-		memset(&state->hdmi_edid.edid, 0, 256);
+		memset(&state->hdmi_edid.edid, 0, sizeof(state->hdmi_edid.edid));
 		state->hdmi_edid.blocks = e->blocks;
 		if (e->blocks) {
 			state->hdmi_edid.present |= 0x04 << e->pad;
-- 
GitLab


From f9c2fd3bb85768f35e1d2bb6b357a214db3b7817 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 6 May 2021 23:06:34 +0200
Subject: [PATCH 2042/3804] media: ttpci: switch from 'pci_' to 'dma_' API

The wrappers in include/linux/pci-dma-compat.h should go away.

The patch has been generated with the coccinelle script below and has been
hand modified to replace GFP_ with a correct flag.
It has been compile tested.

When memory is allocated in 'ace_allocate_descriptors()' and
'ace_init()' GFP_KERNEL can be used because both functions are called from
the probe function and no lock is acquired.

@@
@@
-    PCI_DMA_BIDIRECTIONAL
+    DMA_BIDIRECTIONAL

@@
@@
-    PCI_DMA_TODEVICE
+    DMA_TO_DEVICE

@@
@@
-    PCI_DMA_FROMDEVICE
+    DMA_FROM_DEVICE

@@
@@
-    PCI_DMA_NONE
+    DMA_NONE

@@
expression e1, e2, e3;
@@
-    pci_alloc_consistent(e1, e2, e3)
+    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)

@@
expression e1, e2, e3;
@@
-    pci_zalloc_consistent(e1, e2, e3)
+    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)

@@
expression e1, e2, e3, e4;
@@
-    pci_free_consistent(e1, e2, e3, e4)
+    dma_free_coherent(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_map_single(e1, e2, e3, e4)
+    dma_map_single(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_unmap_single(e1, e2, e3, e4)
+    dma_unmap_single(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4, e5;
@@
-    pci_map_page(e1, e2, e3, e4, e5)
+    dma_map_page(&e1->dev, e2, e3, e4, e5)

@@
expression e1, e2, e3, e4;
@@
-    pci_unmap_page(e1, e2, e3, e4)
+    dma_unmap_page(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_map_sg(e1, e2, e3, e4)
+    dma_map_sg(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_unmap_sg(e1, e2, e3, e4)
+    dma_unmap_sg(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_single_for_cpu(e1, e2, e3, e4)
+    dma_sync_single_for_cpu(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_single_for_device(e1, e2, e3, e4)
+    dma_sync_single_for_device(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_sg_for_cpu(e1, e2, e3, e4)
+    dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_sg_for_device(e1, e2, e3, e4)
+    dma_sync_sg_for_device(&e1->dev, e2, e3, e4)

@@
expression e1, e2;
@@
-    pci_dma_mapping_error(e1, e2)
+    dma_mapping_error(&e1->dev, e2)

@@
expression e1, e2;
@@
-    pci_set_dma_mask(e1, e2)
+    dma_set_mask(&e1->dev, e2)

@@
expression e1, e2;
@@
-    pci_set_consistent_dma_mask(e1, e2)
+    dma_set_coherent_mask(&e1->dev, e2)

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/ttpci/budget-core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/pci/ttpci/budget-core.c b/drivers/media/pci/ttpci/budget-core.c
index d405eea5c37f4..5d5796f244695 100644
--- a/drivers/media/pci/ttpci/budget-core.c
+++ b/drivers/media/pci/ttpci/budget-core.c
@@ -180,7 +180,8 @@ static void vpeirq(struct tasklet_struct *t)
 	u32 count;
 
 	/* Ensure streamed PCI data is synced to CPU */
-	pci_dma_sync_sg_for_cpu(budget->dev->pci, budget->pt.slist, budget->pt.nents, PCI_DMA_FROMDEVICE);
+	dma_sync_sg_for_cpu(&budget->dev->pci->dev, budget->pt.slist,
+			    budget->pt.nents, DMA_FROM_DEVICE);
 
 	/* nearest lower position divisible by 188 */
 	newdma -= newdma % 188;
-- 
GitLab


From 01fe904c9afd26e79c1f73aa0ca2e3d785e5e319 Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Sun, 9 May 2021 10:12:31 +0200
Subject: [PATCH 2043/3804] media: exynos4-is: Fix a use after free in
 isp_video_release

In isp_video_release, file->private_data is freed via
_vb2_fop_release()->v4l2_fh_release(). But the freed
file->private_data is still used in v4l2_fh_is_singular_file()
->v4l2_fh_is_singular(file->private_data), which is a use
after free bug.

My patch uses a variable 'is_singular_file' to avoid the uaf.
v3: https://lore.kernel.org/patchwork/patch/1419058/

Fixes: 34947b8aebe3f ("[media] exynos4-is: Add the FIMC-IS ISP capture DMA driver")
Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/exynos4-is/fimc-isp-video.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c
index 8d9dc597deaaf..83688a7982f70 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp-video.c
+++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c
@@ -305,17 +305,20 @@ static int isp_video_release(struct file *file)
 	struct fimc_is_video *ivc = &isp->video_capture;
 	struct media_entity *entity = &ivc->ve.vdev.entity;
 	struct media_device *mdev = entity->graph_obj.mdev;
+	bool is_singular_file;
 
 	mutex_lock(&isp->video_lock);
 
-	if (v4l2_fh_is_singular_file(file) && ivc->streaming) {
+	is_singular_file = v4l2_fh_is_singular_file(file);
+
+	if (is_singular_file && ivc->streaming) {
 		media_pipeline_stop(entity);
 		ivc->streaming = 0;
 	}
 
 	_vb2_fop_release(file, NULL);
 
-	if (v4l2_fh_is_singular_file(file)) {
+	if (is_singular_file) {
 		fimc_pipeline_call(&ivc->ve, close);
 
 		mutex_lock(&mdev->graph_mutex);
-- 
GitLab


From 7dd0c9e547b6924e18712b6b51aa3cba1896ee2c Mon Sep 17 00:00:00 2001
From: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Date: Sun, 9 May 2021 10:24:02 +0200
Subject: [PATCH 2044/3804] media: v4l2-core: Avoid the dangling pointer in
 v4l2_fh_release

A use after free bug caused by the dangling pointer
filp->privitate_data in v4l2_fh_release.
See https://lore.kernel.org/patchwork/patch/1419058/.

My patch sets the dangling pointer to NULL to provide
robust.

Signed-off-by: Lv Yunlong <lyl2019@mail.ustc.edu.cn>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-fh.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/v4l2-core/v4l2-fh.c b/drivers/media/v4l2-core/v4l2-fh.c
index 684574f58e82d..90eec79ee995a 100644
--- a/drivers/media/v4l2-core/v4l2-fh.c
+++ b/drivers/media/v4l2-core/v4l2-fh.c
@@ -96,6 +96,7 @@ int v4l2_fh_release(struct file *filp)
 		v4l2_fh_del(fh);
 		v4l2_fh_exit(fh);
 		kfree(fh);
+		filp->private_data = NULL;
 	}
 	return 0;
 }
-- 
GitLab


From d2a0f8d6afdabf5d03a1b2fce73326bf0666ec18 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 11 May 2021 13:55:24 +0200
Subject: [PATCH 2045/3804] media: saa7134: Remove unnecessary INIT_LIST_HEAD()

The list_head saa7134_devlist is initialized statically.
It is unnecessary to initialize by INIT_LIST_HEAD().

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/saa7134/saa7134-core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/media/pci/saa7134/saa7134-core.c b/drivers/media/pci/saa7134/saa7134-core.c
index efb757d5168a6..ec8dd41f9ebb9 100644
--- a/drivers/media/pci/saa7134/saa7134-core.c
+++ b/drivers/media/pci/saa7134/saa7134-core.c
@@ -1524,7 +1524,6 @@ static struct pci_driver saa7134_pci_driver = {
 
 static int __init saa7134_init(void)
 {
-	INIT_LIST_HEAD(&saa7134_devlist);
 	pr_info("saa7130/34: v4l2 driver version %s loaded\n",
 	       SAA7134_VERSION);
 	return pci_register_driver(&saa7134_pci_driver);
-- 
GitLab


From 1a4520090681853e6b850cbe54b27247a013e0e5 Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Wed, 12 May 2021 17:18:36 +0200
Subject: [PATCH 2046/3804] media: bt8xx: Fix a missing check bug in
 bt878_probe

In 'bt878_irq', the driver calls 'tasklet_schedule', but this tasklet is
set in 'dvb_bt8xx_load_card' of another driver 'dvb-bt8xx'.
However, this two drivers are separate. The user may not load the
'dvb-bt8xx' driver when loading the 'bt8xx' driver, that is, the tasklet
has not been initialized when 'tasklet_schedule' is called, so it is
necessary to check whether the tasklet is initialized in 'bt878_probe'.

Fix this by adding a check at the end of bt878_probe.

The KASAN's report reveals it:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
PGD 800000006aab2067 P4D 800000006aab2067 PUD 6b2ea067 PMD 0
Oops: 0010 [#1] PREEMPT SMP KASAN PTI
CPU: 2 PID: 8724 Comm: syz-executor.0 Not tainted 4.19.177-
gdba4159c14ef-dirty #40
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-
gc9ba5276e321-prebuilt.qemu.org 04/01/2014
RIP: 0010:          (null)
Code: Bad RIP value.
RSP: 0018:ffff88806c287ea0 EFLAGS: 00010246
RAX: fffffbfff1b01774 RBX: dffffc0000000000 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 1ffffffff1b01775 RDI: 0000000000000000
RBP: ffff88806c287f00 R08: fffffbfff1b01774 R09: fffffbfff1b01774
R10: 0000000000000001 R11: fffffbfff1b01773 R12: 0000000000000000
R13: ffff88806c29f530 R14: ffffffff8d80bb88 R15: ffffffff8d80bb90
FS:  00007f6b550e6700(0000) GS:ffff88806c280000(0000) knlGS:
0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffffffffd6 CR3: 000000005ec98000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <IRQ>
 tasklet_action_common.isra.17+0x141/0x420 kernel/softirq.c:522
 tasklet_action+0x50/0x70 kernel/softirq.c:540
 __do_softirq+0x224/0x92c kernel/softirq.c:292
 invoke_softirq kernel/softirq.c:372 [inline]
 irq_exit+0x15a/0x180 kernel/softirq.c:412
 exiting_irq arch/x86/include/asm/apic.h:535 [inline]
 do_IRQ+0x123/0x1e0 arch/x86/kernel/irq.c:260
 common_interrupt+0xf/0xf arch/x86/entry/entry_64.S:670
 </IRQ>
RIP: 0010:__do_sys_interrupt kernel/sys.c:2593 [inline]
RIP: 0010:__se_sys_interrupt kernel/sys.c:2584 [inline]
RIP: 0010:__x64_sys_interrupt+0x5b/0x80 kernel/sys.c:2584
Code: ba 00 04 00 00 48 c7 c7 c0 99 31 8c e8 ae 76 5e 01 48 85 c0 75 21 e8
14 ae 24 00 48 c7 c3 c0 99 31 8c b8 0c 00 00 00 0f 01 c1 <31> db e8 fe ad
24 00 48 89 d8 5b 5d c3 48 c7 c3 ea ff ff ff eb ec
RSP: 0018:ffff888054167f10 EFLAGS: 00000212 ORIG_RAX: ffffffffffffffde
RAX: 000000000000000c RBX: ffffffff8c3199c0 RCX: ffffc90001ca6000
RDX: 000000000000001a RSI: ffffffff813478fc RDI: ffffffff8c319dc0
RBP: ffff888054167f18 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000080 R11: fffffbfff18633b7 R12: ffff888054167f58
R13: ffff88805f638000 R14: 0000000000000000 R15: 0000000000000000
 do_syscall_64+0xb0/0x4e0 arch/x86/entry/common.c:293
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x4692a9
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f6b550e5c48 EFLAGS: 00000246 ORIG_RAX: 000000000000014f
RAX: ffffffffffffffda RBX: 000000000077bf60 RCX: 00000000004692a9
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000020000140
RBP: 00000000004cf7eb R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000077bf60
R13: 0000000000000000 R14: 000000000077bf60 R15: 00007fff55a1dca0
Modules linked in:
Dumping ftrace buffer:
   (ftrace buffer empty)
CR2: 0000000000000000
---[ end trace 68e5849c3f77cbb6 ]---
RIP: 0010:          (null)
Code: Bad RIP value.
RSP: 0018:ffff88806c287ea0 EFLAGS: 00010246
RAX: fffffbfff1b01774 RBX: dffffc0000000000 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 1ffffffff1b01775 RDI: 0000000000000000
RBP: ffff88806c287f00 R08: fffffbfff1b01774 R09: fffffbfff1b01774
R10: 0000000000000001 R11: fffffbfff1b01773 R12: 0000000000000000
R13: ffff88806c29f530 R14: ffffffff8d80bb88 R15: ffffffff8d80bb90
FS:  00007f6b550e6700(0000) GS:ffff88806c280000(0000) knlGS:
0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffffffffd6 CR3: 000000005ec98000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

Reported-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/bt8xx/bt878.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/media/pci/bt8xx/bt878.c b/drivers/media/pci/bt8xx/bt878.c
index 7ca309121fb53..90972d6952f1c 100644
--- a/drivers/media/pci/bt8xx/bt878.c
+++ b/drivers/media/pci/bt8xx/bt878.c
@@ -478,6 +478,9 @@ static int bt878_probe(struct pci_dev *dev, const struct pci_device_id *pci_id)
 	btwrite(0, BT878_AINT_MASK);
 	bt878_num++;
 
+	if (!bt->tasklet.func)
+		tasklet_disable(&bt->tasklet);
+
 	return 0;
 
       fail2:
-- 
GitLab


From 6cf16148899fc021dbd352d0177ff015ab12823b Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Wed, 12 May 2021 19:35:14 +0200
Subject: [PATCH 2047/3804] media: radio: si4713: constify static struct
 v4l2_ioctl_ops

The only usage of radio_si4713_ioctl_ops is to assign its address to the
ioctl_ops field in the video_device struct, which is a pointer to const.
Make it const to allow the compiler to put it in read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/radio/si4713/radio-platform-si4713.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/radio/si4713/radio-platform-si4713.c b/drivers/media/radio/si4713/radio-platform-si4713.c
index a7dfe5f55c187..433f9642786dd 100644
--- a/drivers/media/radio/si4713/radio-platform-si4713.c
+++ b/drivers/media/radio/si4713/radio-platform-si4713.c
@@ -110,7 +110,7 @@ static long radio_si4713_default(struct file *file, void *p,
 					  ioctl, cmd, arg);
 }
 
-static struct v4l2_ioctl_ops radio_si4713_ioctl_ops = {
+static const struct v4l2_ioctl_ops radio_si4713_ioctl_ops = {
 	.vidioc_querycap	= radio_si4713_querycap,
 	.vidioc_g_modulator	= radio_si4713_g_modulator,
 	.vidioc_s_modulator	= radio_si4713_s_modulator,
-- 
GitLab


From 0909f4acb916f4ce0217f01ff31a9e0296b536da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Thu, 13 May 2021 14:47:15 +0200
Subject: [PATCH 2048/3804] media: rcar-vin: Enable support for r8a77961
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable support for M3-W+ (r8a77961).

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Tested-by: LUU HOAI <hoai.luu.ub@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rcar-vin/rcar-core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/media/platform/rcar-vin/rcar-core.c b/drivers/media/platform/rcar-vin/rcar-core.c
index cb3025992817d..33957cc9118ca 100644
--- a/drivers/media/platform/rcar-vin/rcar-core.c
+++ b/drivers/media/platform/rcar-vin/rcar-core.c
@@ -1362,6 +1362,10 @@ static const struct of_device_id rvin_of_id_table[] = {
 		.compatible = "renesas,vin-r8a7796",
 		.data = &rcar_info_r8a7796,
 	},
+	{
+		.compatible = "renesas,vin-r8a77961",
+		.data = &rcar_info_r8a7796,
+	},
 	{
 		.compatible = "renesas,vin-r8a77965",
 		.data = &rcar_info_r8a77965,
-- 
GitLab


From 4c6178f31e7d33c87f9f046e3bcbaa15a1802ff9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Thu, 13 May 2021 16:09:14 +0200
Subject: [PATCH 2049/3804] media: rcar-csi2: Enable support for r8a77961
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Enable support for M3-W+ (r8a77961).

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Tested-by: LUU HOAI <hoai.luu.ub@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rcar-vin/rcar-csi2.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/media/platform/rcar-vin/rcar-csi2.c b/drivers/media/platform/rcar-vin/rcar-csi2.c
index 99bf814eb2a78..b87d5453e4188 100644
--- a/drivers/media/platform/rcar-vin/rcar-csi2.c
+++ b/drivers/media/platform/rcar-vin/rcar-csi2.c
@@ -1121,6 +1121,11 @@ static const struct rcar_csi2_info rcar_csi2_info_r8a7796 = {
 	.num_channels = 4,
 };
 
+static const struct rcar_csi2_info rcar_csi2_info_r8a77961 = {
+	.hsfreqrange = hsfreqrange_m3w_h3es1,
+	.num_channels = 4,
+};
+
 static const struct rcar_csi2_info rcar_csi2_info_r8a77965 = {
 	.init_phtw = rcsi2_init_phtw_h3_v3h_m3n,
 	.hsfreqrange = hsfreqrange_h3_v3h_m3n,
@@ -1173,6 +1178,10 @@ static const struct of_device_id rcar_csi2_of_table[] = {
 		.compatible = "renesas,r8a7796-csi2",
 		.data = &rcar_csi2_info_r8a7796,
 	},
+	{
+		.compatible = "renesas,r8a77961-csi2",
+		.data = &rcar_csi2_info_r8a77961,
+	},
 	{
 		.compatible = "renesas,r8a77965-csi2",
 		.data = &rcar_csi2_info_r8a77965,
-- 
GitLab


From 2c1e75f5baac5432749b90174a7a1f50a97327b2 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 14 May 2021 00:03:17 +0200
Subject: [PATCH 2050/3804] media: meson: vdec: remove redundant initialization
 of variable reg_cur

The variable reg_cur is being initialized with a value that is never
read, it is being updated later on. The assignment is redundant and
can be removed.

Addresses-Coverity: ("Unused value")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/meson/vdec/vdec_helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/meson/vdec/vdec_helpers.c b/drivers/staging/media/meson/vdec/vdec_helpers.c
index 7f07a9175815f..b9125c295d1d3 100644
--- a/drivers/staging/media/meson/vdec/vdec_helpers.c
+++ b/drivers/staging/media/meson/vdec/vdec_helpers.c
@@ -183,7 +183,7 @@ int amvdec_set_canvases(struct amvdec_session *sess,
 	u32 pixfmt = sess->pixfmt_cap;
 	u32 width = ALIGN(sess->width, 32);
 	u32 height = ALIGN(sess->height, 32);
-	u32 reg_cur = reg_base[0];
+	u32 reg_cur;
 	u32 reg_num_cur = 0;
 	u32 reg_base_cur = 0;
 	int i = 0;
-- 
GitLab


From 1fcbeeb506fd785025a37d1a874108756abbef6b Mon Sep 17 00:00:00 2001
From: Ding Senjie <dingsenjie@yulong.com>
Date: Fri, 14 May 2021 14:35:21 +0200
Subject: [PATCH 2051/3804] media: mtk-vpu: Use
 devm_platform_ioremap_resource_byname

Use the devm_platform_ioremap_resource_byname() helper instead of
calling platform_get_resource_byname() and devm_ioremap_resource()
separately.

Signed-off-by: Ding Senjie <dingsenjie@yulong.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/mtk-vpu/mtk_vpu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c
index c8a56271b259e..ef458b417fa73 100644
--- a/drivers/media/platform/mtk-vpu/mtk_vpu.c
+++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c
@@ -821,13 +821,11 @@ static int mtk_vpu_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	vpu->dev = &pdev->dev;
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "tcm");
-	vpu->reg.tcm = devm_ioremap_resource(dev, res);
+	vpu->reg.tcm = devm_platform_ioremap_resource_byname(pdev, "tcm");
 	if (IS_ERR((__force void *)vpu->reg.tcm))
 		return PTR_ERR((__force void *)vpu->reg.tcm);
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg_reg");
-	vpu->reg.cfg = devm_ioremap_resource(dev, res);
+	vpu->reg.cfg = devm_platform_ioremap_resource_byname(pdev, "cfg_reg");
 	if (IS_ERR((__force void *)vpu->reg.cfg))
 		return PTR_ERR((__force void *)vpu->reg.cfg);
 
-- 
GitLab


From 8f2e452730d2bcd59fe05246f0e19a4c52e0012d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 14 May 2021 16:20:38 +0200
Subject: [PATCH 2052/3804] media: au0828: fix a NULL vs IS_ERR() check

The media_device_usb_allocate() function returns error pointers when
it's enabled and something goes wrong.  It can return NULL as well, but
only if CONFIG_MEDIA_CONTROLLER is disabled so that doesn't apply here.

Fixes: 812658d88d26 ("media: change au0828 to use Media Device Allocator API")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/au0828/au0828-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c
index a8a72d5fbd129..caefac07af927 100644
--- a/drivers/media/usb/au0828/au0828-core.c
+++ b/drivers/media/usb/au0828/au0828-core.c
@@ -199,8 +199,8 @@ static int au0828_media_device_init(struct au0828_dev *dev,
 	struct media_device *mdev;
 
 	mdev = media_device_usb_allocate(udev, KBUILD_MODNAME, THIS_MODULE);
-	if (!mdev)
-		return -ENOMEM;
+	if (IS_ERR(mdev))
+		return PTR_ERR(mdev);
 
 	dev->media_dev = mdev;
 #endif
-- 
GitLab


From d67fa04ce41f7b5d92563734d76c55a676846cc4 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 14 May 2021 17:27:34 +0200
Subject: [PATCH 2053/3804] media: media/test_drivers: Drop unnecessary NULL
 check after container_of

The result of container_of() operations is never NULL unless the embedded
element is the first element of the structure. This is not the case here.
The NULL check is therefore unnecessary and misleading. Remove it.

This change was made automatically with the following Coccinelle script.

@@
type t;
identifier v;
statement s;
@@

<+...
(
  t v = container_of(...);
|
  v = container_of(...);
)
  ...
  when != v
- if (\( !v \| v == NULL \) ) s
...+>

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/test-drivers/vim2m.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/media/test-drivers/vim2m.c b/drivers/media/test-drivers/vim2m.c
index a24624353f9ed..d714fe50afe5c 100644
--- a/drivers/media/test-drivers/vim2m.c
+++ b/drivers/media/test-drivers/vim2m.c
@@ -624,11 +624,6 @@ static void device_work(struct work_struct *w)
 
 	curr_ctx = container_of(w, struct vim2m_ctx, work_run.work);
 
-	if (!curr_ctx) {
-		pr_err("Instance released before the end of transaction\n");
-		return;
-	}
-
 	vim2m_dev = curr_ctx->dev;
 
 	src_vb = v4l2_m2m_src_buf_remove(curr_ctx->fh.m2m_ctx);
-- 
GitLab


From a6b1e7093f0a099571fc8836ab4a589633f956a8 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 15 May 2021 08:58:30 +0200
Subject: [PATCH 2054/3804] media: tc358743: Fix error return code in
 tc358743_probe_of()

When the CSI bps per lane is not in the valid range, an appropriate error
code -EINVAL should be returned. However, we currently do not explicitly
assign this error code to 'ret'. As a result, 0 was incorrectly returned.

Fixes: 256148246852 ("[media] tc358743: support probe from device tree")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/tc358743.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c
index 1b309bb743c7b..f21da11caf224 100644
--- a/drivers/media/i2c/tc358743.c
+++ b/drivers/media/i2c/tc358743.c
@@ -1974,6 +1974,7 @@ static int tc358743_probe_of(struct tc358743_state *state)
 	bps_pr_lane = 2 * endpoint.link_frequencies[0];
 	if (bps_pr_lane < 62500000U || bps_pr_lane > 1000000000U) {
 		dev_err(dev, "unsupported bps per lane: %u bps\n", bps_pr_lane);
+		ret = -EINVAL;
 		goto disable_clk;
 	}
 
-- 
GitLab


From dd706623fcab3ba808a2c48855e5e8aa2c6e8fbf Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 17 May 2021 05:11:23 +0200
Subject: [PATCH 2055/3804] media: bdisp: remove redundant dev_err call in
 bdisp_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sti/bdisp/bdisp-v4l2.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
index 85288da9d2ae6..6413cd2791251 100644
--- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
+++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c
@@ -1318,7 +1318,6 @@ static int bdisp_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	bdisp->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(bdisp->regs)) {
-		dev_err(dev, "failed to get regs\n");
 		ret = PTR_ERR(bdisp->regs);
 		goto err_wq;
 	}
-- 
GitLab


From c75f11fbe4de0d4ccba14e7125607fd5ca12e294 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 17 May 2021 12:07:48 +0200
Subject: [PATCH 2056/3804] media: atmel: atmel-isc: Remove redundant
 assignment to i

Variable i is being assigned a value however the assignment is
never read, so this redundant assignment can be removed.

Clean up the following clang-analyzer warning:

drivers/media/platform/atmel/atmel-isc-base.c:975:2: warning: Value
stored to 'i' is never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index ce8e1351fa532..a017572c870cc 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -972,7 +972,6 @@ static int isc_enum_fmt_vid_cap(struct file *file, void *priv,
 
 	index -= ARRAY_SIZE(controller_formats);
 
-	i = 0;
 	supported_index = 0;
 
 	for (i = 0; i < ARRAY_SIZE(formats_list); i++) {
-- 
GitLab


From 8610b3a2abfd0a043df91ac2754a406d7d42b207 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 17 May 2021 12:09:20 +0200
Subject: [PATCH 2057/3804] media: st-delta: Remove redundant assignment to ret

Variable ret is being assigned a value however the assignment is
never read, so this redundant assignment can be removed.

Clean up the following clang-analyzer warning:

drivers/media/platform/sti/delta/delta-v4l2.c:1010:4: warning: Value
stored to 'ret' is never read [clang-analyzer-deadcode.DeadStores].

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sti/delta/delta-v4l2.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/media/platform/sti/delta/delta-v4l2.c b/drivers/media/platform/sti/delta/delta-v4l2.c
index 064a00a3084a0..c887a31ebb540 100644
--- a/drivers/media/platform/sti/delta/delta-v4l2.c
+++ b/drivers/media/platform/sti/delta/delta-v4l2.c
@@ -1007,7 +1007,6 @@ static void delta_run_work(struct work_struct *work)
 			dev_err(delta->dev,
 				"%s  NULL decoded frame\n",
 				ctx->name);
-			ret = -EIO;
 			goto out;
 		}
 
-- 
GitLab


From e6001f6922cfda7b76f594595ebb38351c313da2 Mon Sep 17 00:00:00 2001
From: Evgeny Novikov <novikov@ispras.ru>
Date: Mon, 17 May 2021 14:49:18 +0200
Subject: [PATCH 2058/3804] media: v4l: cadence: Handle errors of
 clk_prepare_enable()

Handle errors of clk_prepare_enable() in csi2tx_get_resources().

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Evgeny Novikov <novikov@ispras.ru>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/cadence/cdns-csi2tx.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/cadence/cdns-csi2tx.c b/drivers/media/platform/cadence/cdns-csi2tx.c
index e4d08acfbb49f..765ae408970a5 100644
--- a/drivers/media/platform/cadence/cdns-csi2tx.c
+++ b/drivers/media/platform/cadence/cdns-csi2tx.c
@@ -436,6 +436,7 @@ static int csi2tx_get_resources(struct csi2tx_priv *csi2tx,
 	struct resource *res;
 	unsigned int i;
 	u32 dev_cfg;
+	int ret;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	csi2tx->base = devm_ioremap_resource(&pdev->dev, res);
@@ -454,7 +455,12 @@ static int csi2tx_get_resources(struct csi2tx_priv *csi2tx,
 		return PTR_ERR(csi2tx->esc_clk);
 	}
 
-	clk_prepare_enable(csi2tx->p_clk);
+	ret = clk_prepare_enable(csi2tx->p_clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't prepare and enable p_clk\n");
+		return ret;
+	}
+
 	dev_cfg = readl(csi2tx->base + CSI2TX_DEVICE_CONFIG_REG);
 	clk_disable_unprepare(csi2tx->p_clk);
 
-- 
GitLab


From 0a045eac8d0427b64577a24d74bb8347c905ac65 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Mon, 17 May 2021 21:18:14 +0200
Subject: [PATCH 2059/3804] media: zr364xx: fix memory leak in
 zr364xx_start_readpipe

syzbot reported memory leak in zr364xx driver.
The problem was in non-freed urb in case of
usb_submit_urb() fail.

backtrace:
  [<ffffffff82baedf6>] kmalloc include/linux/slab.h:561 [inline]
  [<ffffffff82baedf6>] usb_alloc_urb+0x66/0xe0 drivers/usb/core/urb.c:74
  [<ffffffff82f7cce8>] zr364xx_start_readpipe+0x78/0x130 drivers/media/usb/zr364xx/zr364xx.c:1022
  [<ffffffff84251dfc>] zr364xx_board_init drivers/media/usb/zr364xx/zr364xx.c:1383 [inline]
  [<ffffffff84251dfc>] zr364xx_probe+0x6a3/0x851 drivers/media/usb/zr364xx/zr364xx.c:1516
  [<ffffffff82bb6507>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396
  [<ffffffff826018a9>] really_probe+0x159/0x500 drivers/base/dd.c:576

Fixes: ccbf035ae5de ("V4L/DVB (12278): zr364xx: implement V4L2_CAP_STREAMING")
Cc: stable@vger.kernel.org
Reported-by: syzbot+af4fa391ef18efdd5f69@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/zr364xx/zr364xx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c
index 1ef611e083237..538a330046ec9 100644
--- a/drivers/media/usb/zr364xx/zr364xx.c
+++ b/drivers/media/usb/zr364xx/zr364xx.c
@@ -1032,6 +1032,7 @@ static int zr364xx_start_readpipe(struct zr364xx_camera *cam)
 	DBG("submitting URB %p\n", pipe_info->stream_urb);
 	retval = usb_submit_urb(pipe_info->stream_urb, GFP_KERNEL);
 	if (retval) {
+		usb_free_urb(pipe_info->stream_urb);
 		printk(KERN_ERR KBUILD_MODNAME ": start read pipe failed\n");
 		return retval;
 	}
-- 
GitLab


From b75a44de44f4921cb84e855f54419e812badc325 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Tue, 18 May 2021 13:49:08 +0200
Subject: [PATCH 2060/3804] media: staging: media: zoran: fix some formatting
 issues

fixing WARNING: Possible repeated word: 'in' as "in in a VIDIOCSFBUF ioctl",
limit the number of words per line.

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/zoran/zoran_card.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/media/zoran/zoran_card.c b/drivers/staging/media/zoran/zoran_card.c
index dfc60e2e9dd7a..f259585b06897 100644
--- a/drivers/staging/media/zoran/zoran_card.c
+++ b/drivers/staging/media/zoran/zoran_card.c
@@ -37,9 +37,10 @@ module_param_array(card, int, NULL, 0444);
 MODULE_PARM_DESC(card, "Card type");
 
 /*
- * The video mem address of the video card. The driver has a little database for some videocards
- * to determine it from there. If your video card is not in there you have either to give it to
- * the driver as a parameter or set in in a VIDIOCSFBUF ioctl
+ * The video mem address of the video card. The driver has a little database
+ * for some videocards to determine it from there. If your video card is not
+ * in there you have either to give it to the driver as a parameter or set
+ * in a VIDIOCSFBUF ioctl
  */
 
 static unsigned long vidmem;	/* default = 0 - Video memory base address */
-- 
GitLab


From 3c1f2eb5475a4031d9555a38de2467d80019c66a Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 2 Jun 2021 09:00:41 +0800
Subject: [PATCH 2061/3804] arm_pmu: move to use request_irq by IRQF_NO_AUTOEN
 flag

request_irq() after setting IRQ_NOAUTOEN as below
irq_set_status_flags(irq, IRQ_NOAUTOEN);
request_irq(dev, irq...);
can be replaced by request_irq() with IRQF_NO_AUTOEN flag.

this patch is made base on "add IRQF_NO_AUTOEN for request_irq" which
is being merged: https://lore.kernel.org/patchwork/patch/1388765/

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/1622595642-61678-2-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_pmu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index a64e254a731b1..3cbc3baf087f3 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -644,11 +644,9 @@ int armpmu_request_irq(int irq, int cpu)
 		}
 
 		irq_flags = IRQF_PERCPU |
-			    IRQF_NOBALANCING |
+			    IRQF_NOBALANCING | IRQF_NO_AUTOEN |
 			    IRQF_NO_THREAD;
 
-		irq_set_status_flags(irq, IRQ_NOAUTOEN);
-
 		err = request_nmi(irq, handler, irq_flags, "arm-pmu",
 				  per_cpu_ptr(&cpu_armpmu, cpu));
 
-- 
GitLab


From 0d0f144a8f5f9815a180d16ef7d08b6269016897 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 2 Jun 2021 09:00:42 +0800
Subject: [PATCH 2062/3804] perf: qcom_l2_pmu: move to use request_irq by
 IRQF_NO_AUTOEN flag

request_irq() after setting IRQ_NOAUTOEN as below
irq_set_status_flags(irq, IRQ_NOAUTOEN); request_irq(dev, irq...); can
be replaced by request_irq() with IRQF_NO_AUTOEN flag.

this patch is made base on "add IRQF_NO_AUTOEN for request_irq" which
is being merged: https://lore.kernel.org/patchwork/patch/1388765/

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/1622595642-61678-3-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/qcom_l2_pmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index fc54a80f9c5cf..b60e30141583e 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -869,14 +869,14 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data)
 	irq = platform_get_irq(sdev, 0);
 	if (irq < 0)
 		return irq;
-	irq_set_status_flags(irq, IRQ_NOAUTOEN);
 	cluster->irq = irq;
 
 	cluster->l2cache_pmu = l2cache_pmu;
 	cluster->on_cpu = -1;
 
 	err = devm_request_irq(&pdev->dev, irq, l2_cache_handle_irq,
-			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD |
+			       IRQF_NO_AUTOEN,
 			       "l2-cache-pmu", cluster);
 	if (err) {
 		dev_err(&pdev->dev,
-- 
GitLab


From efdd0d42e27695ade6eff777bd416973a631b71c Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 18 May 2021 14:41:09 +0200
Subject: [PATCH 2063/3804] media: staging: media: zoran: remove
 detect_guest_activity

The detect_guest_activity function is no longer used, so lets removed it.

[hverkuil: remove dump_guests() as well as that too is now unused]

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/zoran/zoran_device.c | 65 ----------------------
 drivers/staging/media/zoran/zoran_device.h |  2 -
 2 files changed, 67 deletions(-)

diff --git a/drivers/staging/media/zoran/zoran_device.c b/drivers/staging/media/zoran/zoran_device.c
index cf788d9cd1dfe..5b12a730a2290 100644
--- a/drivers/staging/media/zoran/zoran_device.c
+++ b/drivers/staging/media/zoran/zoran_device.c
@@ -147,71 +147,6 @@ int post_office_read(struct zoran *zr, unsigned int guest, unsigned int reg)
 	return btread(ZR36057_POR) & 0xFF;
 }
 
-/*
- * detect guests
- */
-
-static void dump_guests(struct zoran *zr)
-{
-	if (zr36067_debug > 2) {
-		int i, guest[8];
-
-		/* do not print random data */
-		guest[0] = 0;
-
-		for (i = 1; i < 8; i++) /* Don't read jpeg codec here */
-			guest[i] = post_office_read(zr, i, 0);
-
-		pci_info(zr->pci_dev, "Guests: %*ph\n", 8, guest);
-	}
-}
-
-void detect_guest_activity(struct zoran *zr)
-{
-	int timeout, i, j, res, guest[8], guest0[8], change[8][3];
-	ktime_t t0, t1;
-
-	/* do not print random data */
-	guest[0] = 0;
-	guest0[0] = 0;
-
-	dump_guests(zr);
-	pci_info(zr->pci_dev, "Detecting guests activity, please wait...\n");
-	for (i = 1; i < 8; i++) /* Don't read jpeg codec here */
-		guest0[i] = guest[i] = post_office_read(zr, i, 0);
-
-	timeout = 0;
-	j = 0;
-	t0 = ktime_get();
-	while (timeout < 10000) {
-		udelay(10);
-		timeout++;
-		for (i = 1; (i < 8) && (j < 8); i++) {
-			res = post_office_read(zr, i, 0);
-			if (res != guest[i]) {
-				t1 = ktime_get();
-				change[j][0] = ktime_to_us(ktime_sub(t1, t0));
-				t0 = t1;
-				change[j][1] = i;
-				change[j][2] = res;
-				j++;
-				guest[i] = res;
-			}
-		}
-		if (j >= 8)
-			break;
-	}
-
-	pci_info(zr->pci_dev, "Guests: %*ph\n", 8, guest0);
-
-	if (j == 0) {
-		pci_info(zr->pci_dev, "No activity detected.\n");
-		return;
-	}
-	for (i = 0; i < j; i++)
-		pci_info(zr->pci_dev, "%6d: %d => 0x%02x\n", change[i][0], change[i][1], change[i][2]);
-}
-
 /*
  * JPEG Codec access
  */
diff --git a/drivers/staging/media/zoran/zoran_device.h b/drivers/staging/media/zoran/zoran_device.h
index 24be19a61b6d3..6c5d70238228b 100644
--- a/drivers/staging/media/zoran/zoran_device.h
+++ b/drivers/staging/media/zoran/zoran_device.h
@@ -20,8 +20,6 @@ extern int post_office_wait(struct zoran *zr);
 extern int post_office_write(struct zoran *zr, unsigned int guest, unsigned int reg, unsigned int value);
 extern int post_office_read(struct zoran *zr, unsigned int guest, unsigned int reg);
 
-extern void detect_guest_activity(struct zoran *zr);
-
 extern void jpeg_codec_sleep(struct zoran *zr, int sleep);
 extern int jpeg_codec_reset(struct zoran *zr);
 
-- 
GitLab


From 4283d387d9cbf5deb464675e050b17f34a9a8c02 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 18 May 2021 14:41:10 +0200
Subject: [PATCH 2064/3804] media: staging: media: zoran: multiple assignments
 should be avoided

Remove all multiple assignments.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/zoran/zoran_driver.c | 6 ++++--
 drivers/staging/media/zoran/zr36016.c      | 3 ++-
 drivers/staging/media/zoran/zr36050.c      | 3 ++-
 drivers/staging/media/zoran/zr36060.c      | 3 ++-
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/media/zoran/zoran_driver.c b/drivers/staging/media/zoran/zoran_driver.c
index e8902f824d6c4..46382e43f1bf7 100644
--- a/drivers/staging/media/zoran/zoran_driver.c
+++ b/drivers/staging/media/zoran/zoran_driver.c
@@ -678,12 +678,14 @@ static int zoran_g_selection(struct file *file, void *__fh, struct v4l2_selectio
 		sel->r.height = zr->jpg_settings.img_height;
 		break;
 	case V4L2_SEL_TGT_CROP_DEFAULT:
-		sel->r.top = sel->r.left = 0;
+		sel->r.top = 0;
+		sel->r.left = 0;
 		sel->r.width = BUZ_MIN_WIDTH;
 		sel->r.height = BUZ_MIN_HEIGHT;
 		break;
 	case V4L2_SEL_TGT_CROP_BOUNDS:
-		sel->r.top = sel->r.left = 0;
+		sel->r.top = 0;
+		sel->r.left = 0;
 		sel->r.width = BUZ_MAX_WIDTH;
 		sel->r.height = BUZ_MAX_HEIGHT;
 		break;
diff --git a/drivers/staging/media/zoran/zr36016.c b/drivers/staging/media/zoran/zr36016.c
index 2d7dc7abde793..82702a13b05fd 100644
--- a/drivers/staging/media/zoran/zr36016.c
+++ b/drivers/staging/media/zoran/zr36016.c
@@ -361,7 +361,8 @@ static int zr36016_setup(struct videocodec *codec)
 		return -ENOSPC;
 	}
 	//mem structure init
-	codec->data = ptr = kzalloc(sizeof(struct zr36016), GFP_KERNEL);
+	ptr = kzalloc(sizeof(struct zr36016), GFP_KERNEL);
+	codec->data = ptr;
 	if (!ptr)
 		return -ENOMEM;
 
diff --git a/drivers/staging/media/zoran/zr36050.c b/drivers/staging/media/zoran/zr36050.c
index 2826f4e5d37ba..a78862852a477 100644
--- a/drivers/staging/media/zoran/zr36050.c
+++ b/drivers/staging/media/zoran/zr36050.c
@@ -754,7 +754,8 @@ static int zr36050_setup(struct videocodec *codec)
 		return -ENOSPC;
 	}
 	//mem structure init
-	codec->data = ptr = kzalloc(sizeof(struct zr36050), GFP_KERNEL);
+	ptr = kzalloc(sizeof(struct zr36050), GFP_KERNEL);
+	codec->data = ptr;
 	if (!ptr)
 		return -ENOMEM;
 
diff --git a/drivers/staging/media/zoran/zr36060.c b/drivers/staging/media/zoran/zr36060.c
index 4f9eb9ff2c429..1c3af11b5f24d 100644
--- a/drivers/staging/media/zoran/zr36060.c
+++ b/drivers/staging/media/zoran/zr36060.c
@@ -790,7 +790,8 @@ static int zr36060_setup(struct videocodec *codec)
 		return -ENOSPC;
 	}
 	//mem structure init
-	codec->data = ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
+	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
+	codec->data = ptr;
 	if (!ptr)
 		return -ENOMEM;
 
-- 
GitLab


From 87c5d693f94975a262fa891fbc944957ea041603 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 18 May 2021 14:41:11 +0200
Subject: [PATCH 2065/3804] media: staging: media: zoran: remove blank line

Minor style fix by removing useless blank line.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/zoran/zoran.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/media/zoran/zoran.h b/drivers/staging/media/zoran/zoran.h
index e7fe8da7732c7..b1ad2a2b914cd 100644
--- a/drivers/staging/media/zoran/zoran.h
+++ b/drivers/staging/media/zoran/zoran.h
@@ -158,7 +158,6 @@ struct zoran_jpg_settings {
 	struct v4l2_jpegcompression jpg_comp;	/* JPEG-specific capture settings */
 };
 
-
 struct zoran;
 
 /* zoran_fh contains per-open() settings */
-- 
GitLab


From b8c8c4959ce372820575f28981b7a033243363e5 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 18 May 2021 14:41:12 +0200
Subject: [PATCH 2066/3804] media: staging: media: zoran: fix kzalloc style

Prefer kzalloc(sizeof(*prt)...) over kzalloc(sizeof(struct.../

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/zoran/zr36016.c | 2 +-
 drivers/staging/media/zoran/zr36050.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/zoran/zr36016.c b/drivers/staging/media/zoran/zr36016.c
index 82702a13b05fd..9b350a885879f 100644
--- a/drivers/staging/media/zoran/zr36016.c
+++ b/drivers/staging/media/zoran/zr36016.c
@@ -361,7 +361,7 @@ static int zr36016_setup(struct videocodec *codec)
 		return -ENOSPC;
 	}
 	//mem structure init
-	ptr = kzalloc(sizeof(struct zr36016), GFP_KERNEL);
+	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
 	codec->data = ptr;
 	if (!ptr)
 		return -ENOMEM;
diff --git a/drivers/staging/media/zoran/zr36050.c b/drivers/staging/media/zoran/zr36050.c
index a78862852a477..8bb101fa18bc4 100644
--- a/drivers/staging/media/zoran/zr36050.c
+++ b/drivers/staging/media/zoran/zr36050.c
@@ -754,7 +754,7 @@ static int zr36050_setup(struct videocodec *codec)
 		return -ENOSPC;
 	}
 	//mem structure init
-	ptr = kzalloc(sizeof(struct zr36050), GFP_KERNEL);
+	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
 	codec->data = ptr;
 	if (!ptr)
 		return -ENOMEM;
-- 
GitLab


From 5ef8a20af18716f97875714a32266256f6aa6f60 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 18 May 2021 14:41:13 +0200
Subject: [PATCH 2067/3804] media: staging: media: zoran: change asm header

As asked by checkpatch, convert a asm/xxx header to a linux one.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/zoran/zr36050.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/media/zoran/zr36050.c b/drivers/staging/media/zoran/zr36050.c
index 8bb101fa18bc4..c62af27f2683b 100644
--- a/drivers/staging/media/zoran/zr36050.c
+++ b/drivers/staging/media/zoran/zr36050.c
@@ -16,7 +16,7 @@
 #include <linux/wait.h>
 
 /* I/O commands, error codes */
-#include <asm/io.h>
+#include <linux/io.h>
 
 /* headerfile of this module */
 #include "zr36050.h"
-- 
GitLab


From cca65f64045523f923380171bf6d329bfd79970f Mon Sep 17 00:00:00 2001
From: Evgeny Novikov <novikov@ispras.ru>
Date: Tue, 18 May 2021 20:57:22 +0200
Subject: [PATCH 2068/3804] media: v4l: cadence: Handle errors of
 clk_prepare_enable()

Handle errors of clk_prepare_enable() in csi2rx_get_resources().

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Evgeny Novikov <novikov@ispras.ru>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/cadence/cdns-csi2rx.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/cadence/cdns-csi2rx.c b/drivers/media/platform/cadence/cdns-csi2rx.c
index c68a3eac62cdd..f2b4ddd31177b 100644
--- a/drivers/media/platform/cadence/cdns-csi2rx.c
+++ b/drivers/media/platform/cadence/cdns-csi2rx.c
@@ -282,6 +282,7 @@ static int csi2rx_get_resources(struct csi2rx_priv *csi2rx,
 	struct resource *res;
 	unsigned char i;
 	u32 dev_cfg;
+	int ret;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	csi2rx->base = devm_ioremap_resource(&pdev->dev, res);
@@ -315,7 +316,12 @@ static int csi2rx_get_resources(struct csi2rx_priv *csi2rx,
 		return -EINVAL;
 	}
 
-	clk_prepare_enable(csi2rx->p_clk);
+	ret = clk_prepare_enable(csi2rx->p_clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't prepare and enable P clock\n");
+		return ret;
+	}
+
 	dev_cfg = readl(csi2rx->base + CSI2RX_DEVICE_CFG_REG);
 	clk_disable_unprepare(csi2rx->p_clk);
 
-- 
GitLab


From b7fdd208687ba59ebfb09b2199596471c63b69e3 Mon Sep 17 00:00:00 2001
From: Evgeny Novikov <novikov@ispras.ru>
Date: Wed, 19 May 2021 14:04:49 +0200
Subject: [PATCH 2069/3804] media: st-hva: Fix potential NULL pointer
 dereferences

When ctx_id >= HVA_MAX_INSTANCES in hva_hw_its_irq_thread() it tries to
access fields of ctx that is NULL at that point. The patch gets rid of
these accesses.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Evgeny Novikov <novikov@ispras.ru>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sti/hva/hva-hw.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/media/platform/sti/hva/hva-hw.c b/drivers/media/platform/sti/hva/hva-hw.c
index 77b8bfa5e0c5e..30fb1aa4a3512 100644
--- a/drivers/media/platform/sti/hva/hva-hw.c
+++ b/drivers/media/platform/sti/hva/hva-hw.c
@@ -130,8 +130,7 @@ static irqreturn_t hva_hw_its_irq_thread(int irq, void *arg)
 	ctx_id = (hva->sts_reg & 0xFF00) >> 8;
 	if (ctx_id >= HVA_MAX_INSTANCES) {
 		dev_err(dev, "%s     %s: bad context identifier: %d\n",
-			ctx->name, __func__, ctx_id);
-		ctx->hw_err = true;
+			HVA_PREFIX, __func__, ctx_id);
 		goto out;
 	}
 
-- 
GitLab


From 99c2caa64580f999f4552eaeb3ed6f6c5f172d93 Mon Sep 17 00:00:00 2001
From: Herman <herman.yim88@gmail.com>
Date: Thu, 20 May 2021 11:35:53 +0200
Subject: [PATCH 2070/3804] media: drivers/media/usb/em28xx/em28xx-cards.c :
 fix typo issues

change 'Configuare' into 'Configure'
change 'Configuared' into 'Configured'

Signed-off-by: Herman <yanshuaijun@yulong.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/em28xx/em28xx-cards.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c
index ba9292e2a5870..c1e0dccb74088 100644
--- a/drivers/media/usb/em28xx/em28xx-cards.c
+++ b/drivers/media/usb/em28xx/em28xx-cards.c
@@ -4065,15 +4065,15 @@ static int em28xx_usb_probe(struct usb_interface *intf,
 		dev->dev_next->dvb_max_pkt_size_isoc = dev->dvb_max_pkt_size_isoc_ts2;
 		dev->dev_next->dvb_alt_isoc = dev->dvb_alt_isoc;
 
-		/* Configuare hardware to support TS2*/
+		/* Configure hardware to support TS2*/
 		if (dev->dvb_xfer_bulk) {
-			/* The ep4 and ep5 are configuared for BULK */
+			/* The ep4 and ep5 are configured for BULK */
 			em28xx_write_reg(dev, 0x0b, 0x96);
 			mdelay(100);
 			em28xx_write_reg(dev, 0x0b, 0x80);
 			mdelay(100);
 		} else {
-			/* The ep4 and ep5 are configuared for ISO */
+			/* The ep4 and ep5 are configured for ISO */
 			em28xx_write_reg(dev, 0x0b, 0x96);
 			mdelay(100);
 			em28xx_write_reg(dev, 0x0b, 0x82);
-- 
GitLab


From 66933f4b90ddd8abaa2e123e09c51ecc25331b40 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 20 May 2021 18:02:49 +0200
Subject: [PATCH 2071/3804] media: hantro: test the correct variable in probe()

This should be testing "vpu->clocks[0].clk" instead of "vpu->clocks".

Fixes: eb4cacdfb998 ("media: hantro: add fallback handling for single irq/clk")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Emil Velikov <emil.velikov@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 2f6b01c7a6a0f..4914987cfd9dd 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -783,8 +783,8 @@ static int hantro_probe(struct platform_device *pdev)
 		 * actual name in the DT bindings.
 		 */
 		vpu->clocks[0].clk = devm_clk_get(&pdev->dev, NULL);
-		if (IS_ERR(vpu->clocks))
-			return PTR_ERR(vpu->clocks);
+		if (IS_ERR(vpu->clocks[0].clk))
+			return PTR_ERR(vpu->clocks[0].clk);
 	}
 
 	num_bases = vpu->variant->num_regs ?: 1;
-- 
GitLab


From 10c1f0cbcea93beec5d3bdc02b1a3b577b4985e7 Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Tue, 1 Jun 2021 09:19:35 +0000
Subject: [PATCH 2072/3804] drm/i915/selftests: Fix return value check in
 live_breadcrumbs_smoketest()

In case of error, the function live_context() returns ERR_PTR() and never
returns NULL. The NULL test in the return value check should be replaced
with IS_ERR().

Fixes: 52c0fdb25c7c ("drm/i915: Replace global breadcrumbs with per-context interrupt tracking")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/33c46ef24cd547d0ad21dc106441491a@intel.com
[tursulin: Wrap commit text, fix Fixes: tag.]
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
(cherry picked from commit 8f4caef8d5401b42c6367d46c23da5e0e8111516)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/selftests/i915_request.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index ee8e753d98ce2..eae0abd614cbc 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -1592,8 +1592,8 @@ static int live_breadcrumbs_smoketest(void *arg)
 
 	for (n = 0; n < smoke[0].ncontexts; n++) {
 		smoke[0].contexts[n] = live_context(i915, file);
-		if (!smoke[0].contexts[n]) {
-			ret = -ENOMEM;
+		if (IS_ERR(smoke[0].contexts[n])) {
+			ret = PTR_ERR(smoke[0].contexts[n]);
 			goto out_contexts;
 		}
 	}
-- 
GitLab


From b87482dfe800f326f8f5b0093273ee6bd5b5fe9f Mon Sep 17 00:00:00 2001
From: Matthew Auld <matthew.auld@intel.com>
Date: Thu, 27 May 2021 19:51:45 +0100
Subject: [PATCH 2073/3804] Revert "i915: use io_mapping_map_user"

This reverts commit b739f125e4ebd73d10ed30a856574e13649119ed.

We are unfortunately seeing more issues like we did in 293837b9ac8d
("Revert "i915: fix remap_io_sg to verify the pgprot""), except this is
now for the vm_fault_gtt path, where we are now hitting the same
BUG_ON(!pte_none(*pte)):

[10887.466150] kernel BUG at mm/memory.c:2183!
[10887.466162] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[10887.466168] CPU: 0 PID: 7775 Comm: ffmpeg Tainted: G     U            5.13.0-rc3-CI-Nightly #1
[10887.466174] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.40 07/14/2017
[10887.466177] RIP: 0010:remap_pfn_range_notrack+0x30f/0x440
[10887.466188] Code: e8 96 d7 e0 ff 84 c0 0f 84 27 01 00 00 48 ba 00 f0 ff ff ff ff 0f 00 4c 89 e0 48 c1 e0 0c 4d 85 ed 75 96 48 21 d0 31 f6 eb a9 <0f> 0b 48 39 37 0f 85 0e 01 00 00 48 8b 0c 24 48 39 4f 08 0f 85 00
[10887.466193] RSP: 0018:ffffc90006e33c50 EFLAGS: 00010286
[10887.466198] RAX: 800000000000002f RBX: 00007f5e01800000 RCX: 0000000000000028
[10887.466201] RDX: 0000000000000001 RSI: ffffea0000000000 RDI: 0000000000000000
[10887.466204] RBP: ffffea000033fea8 R08: 800000000000002f R09: ffff8881072256e0
[10887.466207] R10: ffffc9000b84fff8 R11: 0000000017dab000 R12: 0000000000089f9f
[10887.466210] R13: 800000000000002f R14: 00007f5e017e4000 R15: ffff88800cffaf20
[10887.466213] FS:  00007f5e04849640(0000) GS:ffff888278000000(0000) knlGS:0000000000000000
[10887.466216] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[10887.466220] CR2: 00007fd9b191a2ac CR3: 00000001829ac000 CR4: 00000000003506f0
[10887.466223] Call Trace:
[10887.466233]  vm_fault_gtt+0x1ca/0x5d0 [i915]
[10887.466381]  ? ktime_get+0x38/0x90
[10887.466389]  __do_fault+0x37/0x90
[10887.466395]  __handle_mm_fault+0xc46/0x1200
[10887.466402]  handle_mm_fault+0xce/0x2a0
[10887.466407]  do_user_addr_fault+0x1c5/0x660

Reverting this commit is reported to fix the issue.

Reported-by: Eero Tamminen <eero.t.tamminen@intel.com>
References: https://gitlab.freedesktop.org/drm/intel/-/issues/3519
Fixes: b739f125e4eb ("i915: use io_mapping_map_user")
Cc: Christoph Hellwig <hch@lst.de>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210527185145.458021-1-matthew.auld@intel.com
(cherry picked from commit 0e4fe0c9f2f981f26e01b73f3c465ca314c4f9c0)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/Kconfig             |  1 -
 drivers/gpu/drm/i915/gem/i915_gem_mman.c |  9 ++---
 drivers/gpu/drm/i915/i915_drv.h          |  3 ++
 drivers/gpu/drm/i915/i915_mm.c           | 44 ++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 93f4d059fc89f..1e1cb245fca77 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -20,7 +20,6 @@ config DRM_I915
 	select INPUT if ACPI
 	select ACPI_VIDEO if ACPI
 	select ACPI_BUTTON if ACPI
-	select IO_MAPPING
 	select SYNC_FILE
 	select IOSF_MBI
 	select CRC32
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index f6fe5cb014382..8598a1c78a4c2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -367,10 +367,11 @@ retry:
 		goto err_unpin;
 
 	/* Finally, remap it using the new GTT offset */
-	ret = io_mapping_map_user(&ggtt->iomap, area, area->vm_start +
-			(vma->ggtt_view.partial.offset << PAGE_SHIFT),
-			(ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
-			min_t(u64, vma->size, area->vm_end - area->vm_start));
+	ret = remap_io_mapping(area,
+			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
+			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
+			       min_t(u64, vma->size, area->vm_end - area->vm_start),
+			       &ggtt->iomap);
 	if (ret)
 		goto err_fence;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9ec9277539ec1..69e43bf91a153 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1905,6 +1905,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
 
 /* i915_mm.c */
+int remap_io_mapping(struct vm_area_struct *vma,
+		     unsigned long addr, unsigned long pfn, unsigned long size,
+		     struct io_mapping *iomap);
 int remap_io_sg(struct vm_area_struct *vma,
 		unsigned long addr, unsigned long size,
 		struct scatterlist *sgl, resource_size_t iobase);
diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c
index 9a777b0ff59b0..666808cb3a326 100644
--- a/drivers/gpu/drm/i915/i915_mm.c
+++ b/drivers/gpu/drm/i915/i915_mm.c
@@ -37,6 +37,17 @@ struct remap_pfn {
 	resource_size_t iobase;
 };
 
+static int remap_pfn(pte_t *pte, unsigned long addr, void *data)
+{
+	struct remap_pfn *r = data;
+
+	/* Special PTE are not associated with any struct page */
+	set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot)));
+	r->pfn++;
+
+	return 0;
+}
+
 #define use_dma(io) ((io) != -1)
 
 static inline unsigned long sgt_pfn(const struct remap_pfn *r)
@@ -66,7 +77,40 @@ static int remap_sg(pte_t *pte, unsigned long addr, void *data)
 	return 0;
 }
 
+/**
+ * remap_io_mapping - remap an IO mapping to userspace
+ * @vma: user vma to map to
+ * @addr: target user address to start at
+ * @pfn: physical address of kernel memory
+ * @size: size of map area
+ * @iomap: the source io_mapping
+ *
+ *  Note: this is only safe if the mm semaphore is held when called.
+ */
+int remap_io_mapping(struct vm_area_struct *vma,
+		     unsigned long addr, unsigned long pfn, unsigned long size,
+		     struct io_mapping *iomap)
+{
+	struct remap_pfn r;
+	int err;
+
 #define EXPECTED_FLAGS (VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)
+	GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS);
+
+	/* We rely on prevalidation of the io-mapping to skip track_pfn(). */
+	r.mm = vma->vm_mm;
+	r.pfn = pfn;
+	r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) |
+			  (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK));
+
+	err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r);
+	if (unlikely(err)) {
+		zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT);
+		return err;
+	}
+
+	return 0;
+}
 
 /**
  * remap_io_sg - remap an IO mapping to userspace
-- 
GitLab


From 6d0aac74e1e28691e355a7a40bd5961d495982a2 Mon Sep 17 00:00:00 2001
From: Herman <herman.yim88@gmail.com>
Date: Fri, 21 May 2021 04:14:57 +0200
Subject: [PATCH 2074/3804] media: drivers/media/platform/Rcar_jpu.c : fix typo
 issues

change 'requerment' into 'requirement'
change 'quantanization' into 'quantization'
change 'qantization' into 'quantization'

Signed-off-by: Herman <yanshuaijun@yulong.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rcar_jpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/rcar_jpu.c b/drivers/media/platform/rcar_jpu.c
index a7c198c17deb1..f57158bf2b11a 100644
--- a/drivers/media/platform/rcar_jpu.c
+++ b/drivers/media/platform/rcar_jpu.c
@@ -42,7 +42,7 @@
 
 /*
  * Align JPEG header end to cache line to make sure we will not have any issues
- * with cache; additionally to requerment (33.3.27 R01UH0501EJ0100 Rev.1.00)
+ * with cache; additionally to requirement (33.3.27 R01UH0501EJ0100 Rev.1.00)
  */
 #define JPU_JPEG_HDR_SIZE		(ALIGN(0x258, L1_CACHE_BYTES))
 #define JPU_JPEG_MAX_BYTES_PER_PIXEL	2	/* 16 bit precision format */
@@ -121,7 +121,7 @@
 #define JCCMD_JEND	(1 << 2)
 #define JCCMD_JSRT	(1 << 0)
 
-/* JPEG code quantanization table number register */
+/* JPEG code quantization table number register */
 #define JCQTN	0x0c
 #define JCQTN_SHIFT(t)		(((t) - 1) << 1)
 
@@ -1644,7 +1644,7 @@ static int jpu_probe(struct platform_device *pdev)
 		goto device_register_rollback;
 	}
 
-	/* fill in qantization and Huffman tables for encoder */
+	/* fill in quantization and Huffman tables for encoder */
 	for (i = 0; i < JPU_MAX_QUALITY; i++)
 		jpu_generate_hdr(i, (unsigned char *)jpeg_hdrs[i]);
 
-- 
GitLab


From bf950fdc71fe756ea6407f2cbf6ce051b8f5ea07 Mon Sep 17 00:00:00 2001
From: Herman <herman.yim88@gmail.com>
Date: Fri, 21 May 2021 10:36:29 +0200
Subject: [PATCH 2075/3804] media: drivers/media/usb/gspca/cpia1.c : fix
 spelling typo

change 'then' into 'than'

Signed-off-by: Herman <yanshuaijun@yulong.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/gspca/cpia1.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c
index d93d384286c16..46ed95483e222 100644
--- a/drivers/media/usb/gspca/cpia1.c
+++ b/drivers/media/usb/gspca/cpia1.c
@@ -365,8 +365,9 @@ struct sd {
 static const struct v4l2_pix_format mode[] = {
 	{160, 120, V4L2_PIX_FMT_CPIA1, V4L2_FIELD_NONE,
 		/* The sizeimage is trial and error, as with low framerates
-		   the camera will pad out usb frames, making the image
-		   data larger then strictly necessary */
+		 *  the camera will pad out usb frames, making the image
+		 *  data larger than strictly necessary
+		 */
 		.bytesperline = 160,
 		.sizeimage = 65536,
 		.colorspace = V4L2_COLORSPACE_SRGB,
-- 
GitLab


From d170ebb00472268410dce80ae4834c98e79315da Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Fri, 21 May 2021 10:45:44 +0200
Subject: [PATCH 2076/3804] media: uapi/linux/cec-funcs.h: set delay to 1 if
 unnused

If the audio_out_delay value is unused, then set it to 1, not 0.
The value 0 is reserved, and 1 is a much safer value since it
translates to a delay of (1 - 1) * 2 = 0 ms.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/cec-funcs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h
index 37590027b6046..c3baaea0b8ef6 100644
--- a/include/uapi/linux/cec-funcs.h
+++ b/include/uapi/linux/cec-funcs.h
@@ -1665,7 +1665,7 @@ static inline void cec_ops_report_current_latency(const struct cec_msg *msg,
 	if (*audio_out_compensated == 3 && msg->len >= 7)
 		*audio_out_delay = msg->msg[6];
 	else
-		*audio_out_delay = 0;
+		*audio_out_delay = 1;
 }
 
 static inline void cec_msg_request_current_latency(struct cec_msg *msg,
-- 
GitLab


From ce67eaca95f8ab5c6aae41a10adfe9a6e8efa58c Mon Sep 17 00:00:00 2001
From: Joe Richey <joerichey@google.com>
Date: Fri, 21 May 2021 10:58:46 +0200
Subject: [PATCH 2077/3804] media: vicodec: Use _BITUL() macro in UAPI headers

Replace BIT() in v4l2's UPAI header with _BITUL(). BIT() is not defined
in the UAPI headers and its usage may cause userspace build errors.

Fixes: 206bc0f6fb94 ("media: vicodec: mark the stateless FWHT API as stable")
Signed-off-by: Joe Richey <joerichey@google.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 include/uapi/linux/v4l2-controls.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index f96bea19c9916..fdf97a6d7d18a 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -50,6 +50,7 @@
 #ifndef __LINUX_V4L2_CONTROLS_H
 #define __LINUX_V4L2_CONTROLS_H
 
+#include <linux/const.h>
 #include <linux/types.h>
 
 /* Control classes */
@@ -1602,30 +1603,30 @@ struct v4l2_ctrl_h264_decode_params {
 #define V4L2_FWHT_VERSION			3
 
 /* Set if this is an interlaced format */
-#define V4L2_FWHT_FL_IS_INTERLACED		BIT(0)
+#define V4L2_FWHT_FL_IS_INTERLACED		_BITUL(0)
 /* Set if this is a bottom-first (NTSC) interlaced format */
-#define V4L2_FWHT_FL_IS_BOTTOM_FIRST		BIT(1)
+#define V4L2_FWHT_FL_IS_BOTTOM_FIRST		_BITUL(1)
 /* Set if each 'frame' contains just one field */
-#define V4L2_FWHT_FL_IS_ALTERNATE		BIT(2)
+#define V4L2_FWHT_FL_IS_ALTERNATE		_BITUL(2)
 /*
  * If V4L2_FWHT_FL_IS_ALTERNATE was set, then this is set if this
  * 'frame' is the bottom field, else it is the top field.
  */
-#define V4L2_FWHT_FL_IS_BOTTOM_FIELD		BIT(3)
+#define V4L2_FWHT_FL_IS_BOTTOM_FIELD		_BITUL(3)
 /* Set if the Y' plane is uncompressed */
-#define V4L2_FWHT_FL_LUMA_IS_UNCOMPRESSED	BIT(4)
+#define V4L2_FWHT_FL_LUMA_IS_UNCOMPRESSED	_BITUL(4)
 /* Set if the Cb plane is uncompressed */
-#define V4L2_FWHT_FL_CB_IS_UNCOMPRESSED		BIT(5)
+#define V4L2_FWHT_FL_CB_IS_UNCOMPRESSED		_BITUL(5)
 /* Set if the Cr plane is uncompressed */
-#define V4L2_FWHT_FL_CR_IS_UNCOMPRESSED		BIT(6)
+#define V4L2_FWHT_FL_CR_IS_UNCOMPRESSED		_BITUL(6)
 /* Set if the chroma plane is full height, if cleared it is half height */
-#define V4L2_FWHT_FL_CHROMA_FULL_HEIGHT		BIT(7)
+#define V4L2_FWHT_FL_CHROMA_FULL_HEIGHT		_BITUL(7)
 /* Set if the chroma plane is full width, if cleared it is half width */
-#define V4L2_FWHT_FL_CHROMA_FULL_WIDTH		BIT(8)
+#define V4L2_FWHT_FL_CHROMA_FULL_WIDTH		_BITUL(8)
 /* Set if the alpha plane is uncompressed */
-#define V4L2_FWHT_FL_ALPHA_IS_UNCOMPRESSED	BIT(9)
+#define V4L2_FWHT_FL_ALPHA_IS_UNCOMPRESSED	_BITUL(9)
 /* Set if this is an I Frame */
-#define V4L2_FWHT_FL_I_FRAME			BIT(10)
+#define V4L2_FWHT_FL_I_FRAME			_BITUL(10)
 
 /* A 4-values flag - the number of components - 1 */
 #define V4L2_FWHT_FL_COMPONENTS_NUM_MSK		GENMASK(18, 16)
-- 
GitLab


From 8c8b9a9be2afa8bd6a72ad1130532baab9fab89d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 21 May 2021 15:28:38 +0200
Subject: [PATCH 2078/3804] media: dtv5100: fix control-request directions

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Fix the control requests which erroneously used usb_rcvctrlpipe().

Fixes: 8466028be792 ("V4L/DVB (8734): Initial support for AME DTV-5100 USB2.0 DVB-T")
Cc: stable@vger.kernel.org      # 2.6.28
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/dvb-usb/dtv5100.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/dvb-usb/dtv5100.c b/drivers/media/usb/dvb-usb/dtv5100.c
index fba06932a9e0e..1c13e493322cc 100644
--- a/drivers/media/usb/dvb-usb/dtv5100.c
+++ b/drivers/media/usb/dvb-usb/dtv5100.c
@@ -26,6 +26,7 @@ static int dtv5100_i2c_msg(struct dvb_usb_device *d, u8 addr,
 			   u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen)
 {
 	struct dtv5100_state *st = d->priv;
+	unsigned int pipe;
 	u8 request;
 	u8 type;
 	u16 value;
@@ -34,6 +35,7 @@ static int dtv5100_i2c_msg(struct dvb_usb_device *d, u8 addr,
 	switch (wlen) {
 	case 1:
 		/* write { reg }, read { value } */
+		pipe = usb_rcvctrlpipe(d->udev, 0);
 		request = (addr == DTV5100_DEMOD_ADDR ? DTV5100_DEMOD_READ :
 							DTV5100_TUNER_READ);
 		type = USB_TYPE_VENDOR | USB_DIR_IN;
@@ -41,6 +43,7 @@ static int dtv5100_i2c_msg(struct dvb_usb_device *d, u8 addr,
 		break;
 	case 2:
 		/* write { reg, value } */
+		pipe = usb_sndctrlpipe(d->udev, 0);
 		request = (addr == DTV5100_DEMOD_ADDR ? DTV5100_DEMOD_WRITE :
 							DTV5100_TUNER_WRITE);
 		type = USB_TYPE_VENDOR | USB_DIR_OUT;
@@ -54,7 +57,7 @@ static int dtv5100_i2c_msg(struct dvb_usb_device *d, u8 addr,
 
 	memcpy(st->data, rbuf, rlen);
 	msleep(1); /* avoid I2C errors */
-	return usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), request,
+	return usb_control_msg(d->udev, pipe, request,
 			       type, value, index, st->data, rlen,
 			       DTV5100_USB_TIMEOUT);
 }
@@ -141,7 +144,7 @@ static int dtv5100_probe(struct usb_interface *intf,
 
 	/* initialize non qt1010/zl10353 part? */
 	for (i = 0; dtv5100_init[i].request; i++) {
-		ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
+		ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
 				      dtv5100_init[i].request,
 				      USB_TYPE_VENDOR | USB_DIR_OUT,
 				      dtv5100_init[i].value,
-- 
GitLab


From 53ae298fde7adcc4b1432bce2dbdf8dac54dfa72 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 21 May 2021 15:28:39 +0200
Subject: [PATCH 2079/3804] media: gspca/sq905: fix control-request direction

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Fix the USB_REQ_SYNCH_FRAME request which erroneously used
usb_sndctrlpipe().

Fixes: 27d35fc3fb06 ("V4L/DVB (10639): gspca - sq905: New subdriver.")
Cc: stable@vger.kernel.org      # 2.6.30
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/gspca/sq905.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/usb/gspca/sq905.c b/drivers/media/usb/gspca/sq905.c
index 9491110709718..32504ebcfd4de 100644
--- a/drivers/media/usb/gspca/sq905.c
+++ b/drivers/media/usb/gspca/sq905.c
@@ -116,7 +116,7 @@ static int sq905_command(struct gspca_dev *gspca_dev, u16 index)
 	}
 
 	ret = usb_control_msg(gspca_dev->dev,
-			      usb_sndctrlpipe(gspca_dev->dev, 0),
+			      usb_rcvctrlpipe(gspca_dev->dev, 0),
 			      USB_REQ_SYNCH_FRAME,                /* request */
 			      USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
 			      SQ905_PING, 0, gspca_dev->usb_buf, 1,
-- 
GitLab


From 5eabfbdd7d6a473afbbd4916877ee04801ca2c45 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 22 May 2021 05:19:11 +0200
Subject: [PATCH 2080/3804] media: staging: media: tegra-vde: add missing error
 return code in tegra_vde_probe()

Add missing return error code when pm_runtime_resume_and_get() failed.

Fixes: dc8276b78917 ("staging: media: tegra-vde: use pm_runtime_resume_and_get()")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/tegra-vde/vde.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/media/tegra-vde/vde.c b/drivers/staging/media/tegra-vde/vde.c
index e025b69776f25..ed4c1250b3038 100644
--- a/drivers/staging/media/tegra-vde/vde.c
+++ b/drivers/staging/media/tegra-vde/vde.c
@@ -1071,7 +1071,8 @@ static int tegra_vde_probe(struct platform_device *pdev)
 	 * power-cycle it in order to put hardware into a predictable lower
 	 * power state.
 	 */
-	if (pm_runtime_resume_and_get(dev) < 0)
+	err = pm_runtime_resume_and_get(dev);
+	if (err)
 		goto err_pm_runtime;
 
 	pm_runtime_put(dev);
-- 
GitLab


From 8ed339f23d41e21660a389adf2e7b2966d457ff6 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 24 May 2021 13:09:18 +0200
Subject: [PATCH 2081/3804] media: gspca/gl860: fix zero-length control
 requests

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Control transfers without a data stage are treated as OUT requests by
the USB stack and should be using usb_sndctrlpipe(). Failing to do so
will now trigger a warning.

Fix the gl860_RTx() helper so that zero-length control reads fail with
an error message instead. Note that there are no current callers that
would trigger this.

Fixes: 4f7cb8837cec ("V4L/DVB (12954): gspca - gl860: Addition of GL860 based webcams")
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/gspca/gl860/gl860.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/gspca/gl860/gl860.c b/drivers/media/usb/gspca/gl860/gl860.c
index 2c05ea2598e76..ce4ee8bc75c85 100644
--- a/drivers/media/usb/gspca/gl860/gl860.c
+++ b/drivers/media/usb/gspca/gl860/gl860.c
@@ -561,8 +561,8 @@ int gl860_RTx(struct gspca_dev *gspca_dev,
 					len, 400 + 200 * (len > 1));
 			memcpy(pdata, gspca_dev->usb_buf, len);
 		} else {
-			r = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
-					req, pref, val, index, NULL, len, 400);
+			gspca_err(gspca_dev, "zero-length read request\n");
+			r = -EINVAL;
 		}
 	}
 
-- 
GitLab


From b4bb4d425b7b02424afea2dfdcd77b3b4794175e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 24 May 2021 13:09:19 +0200
Subject: [PATCH 2082/3804] media: gspca/sunplus: fix zero-length control
 requests

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Control transfers without a data stage are treated as OUT requests by
the USB stack and should be using usb_sndctrlpipe(). Failing to do so
will now trigger a warning.

Fix the single zero-length control request which was using the
read-register helper, and update the helper so that zero-length reads
fail with an error message instead.

Fixes: 6a7eba24e4f0 ("V4L/DVB (8157): gspca: all subdrivers")
Cc: stable@vger.kernel.org      # 2.6.27
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/gspca/sunplus.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/media/usb/gspca/sunplus.c b/drivers/media/usb/gspca/sunplus.c
index ace3da40006e7..971dee0a56dae 100644
--- a/drivers/media/usb/gspca/sunplus.c
+++ b/drivers/media/usb/gspca/sunplus.c
@@ -242,6 +242,10 @@ static void reg_r(struct gspca_dev *gspca_dev,
 		gspca_err(gspca_dev, "reg_r: buffer overflow\n");
 		return;
 	}
+	if (len == 0) {
+		gspca_err(gspca_dev, "reg_r: zero-length read\n");
+		return;
+	}
 	if (gspca_dev->usb_err < 0)
 		return;
 	ret = usb_control_msg(gspca_dev->dev,
@@ -250,7 +254,7 @@ static void reg_r(struct gspca_dev *gspca_dev,
 			USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
 			0,		/* value */
 			index,
-			len ? gspca_dev->usb_buf : NULL, len,
+			gspca_dev->usb_buf, len,
 			500);
 	if (ret < 0) {
 		pr_err("reg_r err %d\n", ret);
@@ -727,7 +731,7 @@ static int sd_start(struct gspca_dev *gspca_dev)
 		case MegaImageVI:
 			reg_w_riv(gspca_dev, 0xf0, 0, 0);
 			spca504B_WaitCmdStatus(gspca_dev);
-			reg_r(gspca_dev, 0xf0, 4, 0);
+			reg_w_riv(gspca_dev, 0xf0, 4, 0);
 			spca504B_WaitCmdStatus(gspca_dev);
 			break;
 		default:
-- 
GitLab


From 25d5ce3a606a1eb23a9265d615a92a876ff9cb5f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 24 May 2021 13:09:20 +0200
Subject: [PATCH 2083/3804] media: rtl28xxu: fix zero-length control request

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Control transfers without a data stage are treated as OUT requests by
the USB stack and should be using usb_sndctrlpipe(). Failing to do so
will now trigger a warning.

Fix the zero-length i2c-read request used for type detection by
attempting to read a single byte instead.

Reported-by: syzbot+faf11bbadc5a372564da@syzkaller.appspotmail.com
Fixes: d0f232e823af ("[media] rtl28xxu: add heuristic to detect chip type")
Cc: stable@vger.kernel.org      # 4.0
Cc: Antti Palosaari <crope@iki.fi>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/dvb-usb-v2/rtl28xxu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
index 97ed17a141bbf..2c04ed8af0e44 100644
--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
@@ -612,8 +612,9 @@ static int rtl28xxu_read_config(struct dvb_usb_device *d)
 static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name)
 {
 	struct rtl28xxu_dev *dev = d_to_priv(d);
+	u8 buf[1];
 	int ret;
-	struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL};
+	struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf};
 
 	dev_dbg(&d->intf->dev, "\n");
 
-- 
GitLab


From 80daed70c6dcc79f5ef36b98157062b0f3522732 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 24 May 2021 15:35:51 +0200
Subject: [PATCH 2084/3804] media: imx: imx7_mipi_csis: Fix error return code
 in mipi_csis_async_register()

Fix to return negative error code -EINVAL from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 88fc81388df9 ("media: imx: imx7_mipi_csis: Reject invalid data-lanes settings")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Rui Miguel Silva <rmfrfs@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx7-mipi-csis.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index d573f3475d28f..9cd3c86fee583 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -1175,6 +1175,7 @@ static int mipi_csis_async_register(struct csi_state *state)
 		if (vep.bus.mipi_csi2.data_lanes[i] != i + 1) {
 			dev_err(state->dev,
 				"data lanes reordering is not supported");
+			ret = -EINVAL;
 			goto err_parse;
 		}
 	}
-- 
GitLab


From 35037eab4acae8c2d01612d906d479f7006a733c Mon Sep 17 00:00:00 2001
From: lijian <lijian@yulong.com>
Date: Tue, 25 May 2021 11:41:48 +0200
Subject: [PATCH 2085/3804] media: v4l2-dev.c: Modified the macro
 SET_VALID_IOCTL

Macros starting with if should be enclosed by a do - while loop
to avoid possible if/else logic defects.
So modified the macro SET_VALID_IOCTL with do - while loop.

[hverkuil: checkpatch: add parenthesis around 'ops']

Signed-off-by: lijian <lijian@yulong.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-dev.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 4aa8fcd674d75..d03ace324db0d 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -518,9 +518,8 @@ static int get_index(struct video_device *vdev)
 	return find_first_zero_bit(used, VIDEO_NUM_DEVICES);
 }
 
-#define SET_VALID_IOCTL(ops, cmd, op)			\
-	if (ops->op)					\
-		set_bit(_IOC_NR(cmd), valid_ioctls)
+#define SET_VALID_IOCTL(ops, cmd, op) \
+	do { if ((ops)->op) set_bit(_IOC_NR(cmd), valid_ioctls); } while (0)
 
 /* This determines which ioctls are actually implemented in the driver.
    It's a one-time thing which simplifies video_ioctl2 as it can just do
-- 
GitLab


From 2bcfc81147b9266a521e5cfe2d9abbf64a2ceef4 Mon Sep 17 00:00:00 2001
From: lijian <lijian@yulong.com>
Date: Wed, 26 May 2021 11:47:12 +0200
Subject: [PATCH 2086/3804] media: videobuf-dma-sg: void function return
 statements are not generally useful

void function videobuf_vm_close return statements are not generally useful,
so deleted the return in function videobuf_vm_close().

Signed-off-by: lijian <lijian@yulong.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/videobuf-dma-sg.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index 8dd0562de287c..f75e5eedeee05 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -423,7 +423,6 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 		videobuf_queue_unlock(q);
 		kfree(map);
 	}
-	return;
 }
 
 /*
-- 
GitLab


From 98b9c7890b2d74d2f5342ef23d12c4bcbbec54bf Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 15:06:12 +0200
Subject: [PATCH 2087/3804] docs: admin-guide: media: ipu3.rst: replace some
 characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+201c ('“'): LEFT DOUBLE QUOTATION MARK
	- U+201d ('”'): RIGHT DOUBLE QUOTATION MARK

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/admin-guide/media/ipu3.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/media/ipu3.rst b/Documentation/admin-guide/media/ipu3.rst
index d6454f637ff4f..52c1c04173dac 100644
--- a/Documentation/admin-guide/media/ipu3.rst
+++ b/Documentation/admin-guide/media/ipu3.rst
@@ -244,7 +244,7 @@ output larger bayer frame for further YUV processing than "VIDEO" mode to get
 high quality images. Besides, "STILL" mode need XNR3 to do noise reduction,
 hence "STILL" mode will need more power and memory bandwidth than "VIDEO" mode.
 TNR will be enabled in "VIDEO" mode and bypassed by "STILL" mode. ImgU is
-running at “VIDEO” mode by default, the user can use v4l2 control
+running at "VIDEO" mode by default, the user can use v4l2 control
 V4L2_CID_INTEL_IPU3_MODE (currently defined in
 drivers/staging/media/ipu3/include/uapi/intel-ipu3.h) to query and set the
 running mode. For user, there is no difference for buffer queueing between the
-- 
GitLab


From 9df4827523bdc4032b1021395e8ee6f880d1e8b1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 15:06:43 +0200
Subject: [PATCH 2088/3804] docs: driver-api: media: zoran: replace SOFT HYPHEN
 character
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the occurences of the following character:

	- U+00ad ('­'): SOFT HYPHEN
	  as ASCII HYPHEN is preferred over SOFT HYPHEN

At least with some fonts, a SOFT HYPHEN is displayed as
a blank space.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/driver-api/media/drivers/zoran.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/driver-api/media/drivers/zoran.rst b/Documentation/driver-api/media/drivers/zoran.rst
index 83cbae9cedefc..b205e10c31546 100644
--- a/Documentation/driver-api/media/drivers/zoran.rst
+++ b/Documentation/driver-api/media/drivers/zoran.rst
@@ -319,7 +319,7 @@ Conexant bt866 TV encoder
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
 - is used in AVS6EYES, and
-- can generate: NTSC/PAL, PAL­M, PAL­N
+- can generate: NTSC/PAL, PAL-M, PAL-N
 
 The adv717x, should be able to produce PAL N. But you find nothing PAL N
 specific in the registers. Seem that you have to reuse a other standard
-- 
GitLab


From d4a84f86e9169e07595dd399c42bc7728d077531 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 15:12:17 +0200
Subject: [PATCH 2089/3804] docs: userspace-api: media: fdl-appendix.rst:
 replace some characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+201c ('“'): LEFT DOUBLE QUOTATION MARK
	- U+201d ('”'): RIGHT DOUBLE QUOTATION MARK

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/fdl-appendix.rst      | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/userspace-api/media/fdl-appendix.rst b/Documentation/userspace-api/media/fdl-appendix.rst
index 683ebed870178..b1bc725b4ec7d 100644
--- a/Documentation/userspace-api/media/fdl-appendix.rst
+++ b/Documentation/userspace-api/media/fdl-appendix.rst
@@ -13,14 +13,14 @@ GNU Free Documentation License
 ===========
 
 The purpose of this License is to make a manual, textbook, or other
-written document “free” in the sense of freedom: to assure everyone the
+written document "free" in the sense of freedom: to assure everyone the
 effective freedom to copy and redistribute it, with or without modifying
 it, either commercially or noncommercially. Secondarily, this License
 preserves for the author and publisher a way to get credit for their
 work, while not being considered responsible for modifications made by
 others.
 
-This License is a kind of “copyleft”, which means that derivative works
+This License is a kind of "copyleft", which means that derivative works
 of the document must themselves be free in the same sense. It
 complements the GNU General Public License, which is a copyleft license
 designed for free software.
@@ -44,21 +44,21 @@ works whose purpose is instruction or reference.
 
 This License applies to any manual or other work that contains a notice
 placed by the copyright holder saying it can be distributed under the
-terms of this License. The “Document”, below, refers to any such manual
+terms of this License. The "Document", below, refers to any such manual
 or work. Any member of the public is a licensee, and is addressed as
-“you”.
+"you".
 
 
 .. _fdl-modified:
 
-A “Modified Version” of the Document means any work containing the
+A "Modified Version" of the Document means any work containing the
 Document or a portion of it, either copied verbatim, or with
 modifications and/or translated into another language.
 
 
 .. _fdl-secondary:
 
-A “Secondary Section” is a named appendix or a front-matter section of
+A "Secondary Section" is a named appendix or a front-matter section of
 the :ref:`Document <fdl-document>` that deals exclusively with the
 relationship of the publishers or authors of the Document to the
 Document's overall subject (or to related matters) and contains nothing
@@ -72,7 +72,7 @@ regarding them.
 
 .. _fdl-invariant:
 
-The “Invariant Sections” are certain
+The "Invariant Sections" are certain
 :ref:`Secondary Sections <fdl-secondary>` whose titles are designated,
 as being those of Invariant Sections, in the notice that says that the
 :ref:`Document <fdl-document>` is released under this License.
@@ -80,14 +80,14 @@ as being those of Invariant Sections, in the notice that says that the
 
 .. _fdl-cover-texts:
 
-The “Cover Texts” are certain short passages of text that are listed, as
+The "Cover Texts" are certain short passages of text that are listed, as
 Front-Cover Texts or Back-Cover Texts, in the notice that says that the
 :ref:`Document <fdl-document>` is released under this License.
 
 
 .. _fdl-transparent:
 
-A “Transparent” copy of the :ref:`Document <fdl-document>` means a
+A "Transparent" copy of the :ref:`Document <fdl-document>` means a
 machine-readable copy, represented in a format whose specification is
 available to the general public, whose contents can be viewed and edited
 directly and straightforwardly with generic text editors or (for images
@@ -97,7 +97,7 @@ formatters or for automatic translation to a variety of formats suitable
 for input to text formatters. A copy made in an otherwise Transparent
 file format whose markup has been designed to thwart or discourage
 subsequent modification by readers is not Transparent. A copy that is
-not “Transparent” is called “Opaque”.
+not "Transparent" is called "Opaque".
 
 Examples of suitable formats for Transparent copies include plain ASCII
 without markup, Texinfo input format, LaTeX input format, SGML or XML
@@ -111,10 +111,10 @@ word processors for output purposes only.
 
 .. _fdl-title-page:
 
-The “Title Page” means, for a printed book, the title page itself, plus
+The "Title Page" means, for a printed book, the title page itself, plus
 such following pages as are needed to hold, legibly, the material this
 License requires to appear in the title page. For works in formats which
-do not have any title page as such, “Title Page” means the text near the
+do not have any title page as such, "Title Page" means the text near the
 most prominent appearance of the work's title, preceding the beginning
 of the body of the text.
 
@@ -242,11 +242,11 @@ Modified Version:
    Include an unaltered copy of this License.
 
 -  **I.**
-   Preserve the section entitled “History”, and its title, and add to it
+   Preserve the section entitled "History", and its title, and add to it
    an item stating at least the title, year, new authors, and publisher
    of the :ref:`Modified Version <fdl-modified>` as given on the
    :ref:`Title Page <fdl-title-page>`. If there is no section entitled
-   “History” in the :ref:`Document <fdl-document>`, create one stating
+   "History" in the :ref:`Document <fdl-document>`, create one stating
    the title, year, authors, and publisher of the Document as given on
    its Title Page, then add an item describing the Modified Version as
    stated in the previous sentence.
@@ -256,13 +256,13 @@ Modified Version:
    :ref:`Document <fdl-document>` for public access to a
    :ref:`Transparent <fdl-transparent>` copy of the Document, and
    likewise the network locations given in the Document for previous
-   versions it was based on. These may be placed in the “History”
+   versions it was based on. These may be placed in the "History"
    section. You may omit a network location for a work that was
    published at least four years before the Document itself, or if the
    original publisher of the version it refers to gives permission.
 
 -  **K.**
-   In any section entitled “Acknowledgements” or “Dedications”, preserve
+   In any section entitled "Acknowledgements" or "Dedications", preserve
    the section's title, and preserve in the section all the substance
    and tone of each of the contributor acknowledgements and/or
    dedications given therein.
@@ -274,11 +274,11 @@ Modified Version:
    part of the section titles.
 
 -  **M.**
-   Delete any section entitled “Endorsements”. Such a section may not be
+   Delete any section entitled "Endorsements". Such a section may not be
    included in the :ref:`Modified Version <fdl-modified>`.
 
 -  **N.**
-   Do not retitle any existing section as “Endorsements” or to conflict
+   Do not retitle any existing section as "Endorsements" or to conflict
    in title with any :ref:`Invariant Section <fdl-invariant>`.
 
 If the :ref:`Modified Version <fdl-modified>` includes new
@@ -290,7 +290,7 @@ of :ref:`Invariant Sections <fdl-invariant>` in the Modified Version's
 license notice. These titles must be distinct from any other section
 titles.
 
-You may add a section entitled “Endorsements”, provided it contains
+You may add a section entitled "Endorsements", provided it contains
 nothing but endorsements of your
 :ref:`Modified Version <fdl-modified>` by various parties--for
 example, statements of peer review or that the text has been approved by
@@ -337,11 +337,11 @@ the original author or publisher of that section if known, or else a
 unique number. Make the same adjustment to the section titles in the
 list of Invariant Sections in the license notice of the combined work.
 
-In the combination, you must combine any sections entitled “History” in
-the various original documents, forming one section entitled “History”;
-likewise combine any sections entitled “Acknowledgements”, and any
-sections entitled “Dedications”. You must delete all sections entitled
-“Endorsements.”
+In the combination, you must combine any sections entitled "History" in
+the various original documents, forming one section entitled "History";
+likewise combine any sections entitled "Acknowledgements", and any
+sections entitled "Dedications". You must delete all sections entitled
+"Endorsements."
 
 
 .. _fdl-section6:
@@ -372,7 +372,7 @@ with other separate and independent documents or works, in or on a
 volume of a storage or distribution medium, does not as a whole count as
 a :ref:`Modified Version <fdl-modified>` of the Document, provided no
 compilation copyright is claimed for the compilation. Such a compilation
-is called an “aggregate”, and this License does not apply to the other
+is called an "aggregate", and this License does not apply to the other
 self-contained works thus compiled with the Document , on account of
 their being thus compiled, if they are not themselves derivative works
 of the Document. If the :ref:`Cover Text <fdl-cover-texts>`
@@ -429,7 +429,7 @@ concerns. See
 
 Each version of the License is given a distinguishing version number. If
 the :ref:`Document <fdl-document>` specifies that a particular
-numbered version of this License “or any later version” applies to it,
+numbered version of this License "or any later version" applies to it,
 you have the option of following the terms and conditions either of that
 specified version or of any later version that has been published (not
 as a draft) by the Free Software Foundation. If the Document does not
@@ -455,13 +455,13 @@ notices just after the title page:
     being LIST THEIR TITLES, with the
     :ref:`Front-Cover Texts <fdl-cover-texts>` being LIST, and with
     the :ref:`Back-Cover Texts <fdl-cover-texts>` being LIST. A copy
-    of the license is included in the section entitled “GNU Free
-    Documentation License”.
+    of the license is included in the section entitled "GNU Free
+    Documentation License".
 
-If you have no :ref:`Invariant Sections <fdl-invariant>`, write “with
-no Invariant Sections” instead of saying which ones are invariant. If
-you have no :ref:`Front-Cover Texts <fdl-cover-texts>`, write “no
-Front-Cover Texts” instead of “Front-Cover Texts being LIST”; likewise
+If you have no :ref:`Invariant Sections <fdl-invariant>`, write "with
+no Invariant Sections" instead of saying which ones are invariant. If
+you have no :ref:`Front-Cover Texts <fdl-cover-texts>`, write "no
+Front-Cover Texts" instead of "Front-Cover Texts being LIST"; likewise
 for :ref:`Back-Cover Texts <fdl-cover-texts>`.
 
 If your document contains nontrivial examples of program code, we
-- 
GitLab


From eff7d26abc05821fd4ff32f2eef0a37cf977535b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 15:12:18 +0200
Subject: [PATCH 2090/3804] docs: userspace-api: media: v4l: replace some
 characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+00a0 (' '): NO-BREAK SPACE
	  as it can cause lines being truncated on PDF output
	- U+2014 ('—'): EM DASH
	- U+2019 ('’'): RIGHT SINGLE QUOTATION MARK

Note that Sphinx auto-translates '---' into EM DASH.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/userspace-api/media/v4l/biblio.rst      | 8 ++++----
 Documentation/userspace-api/media/v4l/dev-decoder.rst | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/biblio.rst b/Documentation/userspace-api/media/v4l/biblio.rst
index 64d241daf63ce..7b8e6738ff9e1 100644
--- a/Documentation/userspace-api/media/v4l/biblio.rst
+++ b/Documentation/userspace-api/media/v4l/biblio.rst
@@ -51,7 +51,7 @@ ISO 13818-1
 ===========
 
 
-:title:     ITU-T Rec. H.222.0 | ISO/IEC 13818-1 "Information technology — Generic coding of moving pictures and associated audio information: Systems"
+:title:     ITU-T Rec. H.222.0 | ISO/IEC 13818-1 "Information technology --- Generic coding of moving pictures and associated audio information: Systems"
 
 :author:    International Telecommunication Union (http://www.itu.ch), International Organisation for Standardisation (http://www.iso.ch)
 
@@ -61,7 +61,7 @@ ISO 13818-2
 ===========
 
 
-:title:     ITU-T Rec. H.262 | ISO/IEC 13818-2 "Information technology — Generic coding of moving pictures and associated audio information: Video"
+:title:     ITU-T Rec. H.262 | ISO/IEC 13818-2 "Information technology --- Generic coding of moving pictures and associated audio information: Video"
 
 :author:    International Telecommunication Union (http://www.itu.ch), International Organisation for Standardisation (http://www.iso.ch)
 
@@ -150,7 +150,7 @@ ITU-T.81
 ========
 
 
-:title:     ITU-T Recommendation T.81 "Information Technology — Digital Compression and Coding of Continous-Tone Still Images — Requirements and Guidelines"
+:title:     ITU-T Recommendation T.81 "Information Technology --- Digital Compression and Coding of Continous-Tone Still Images --- Requirements and Guidelines"
 
 :author:    International Telecommunication Union (http://www.itu.int)
 
@@ -310,7 +310,7 @@ ISO 12232:2006
 ==============
 
 
-:title:     Photography — Digital still cameras — Determination of exposure index, ISO speed ratings, standard output sensitivity, and recommended exposure index
+:title:     Photography --- Digital still cameras --- Determination of exposure index, ISO speed ratings, standard output sensitivity, and recommended exposure index
 
 :author:    International Organization for Standardization (http://www.iso.org)
 
diff --git a/Documentation/userspace-api/media/v4l/dev-decoder.rst b/Documentation/userspace-api/media/v4l/dev-decoder.rst
index 3d4138a4ba691..5b9b83feecebf 100644
--- a/Documentation/userspace-api/media/v4l/dev-decoder.rst
+++ b/Documentation/userspace-api/media/v4l/dev-decoder.rst
@@ -38,7 +38,7 @@ Conventions and Notations Used in This Document
 6. i = [a..b]: sequence of integers from a to b, inclusive, i.e. i =
    [0..2]: i = 0, 1, 2.
 
-7. Given an ``OUTPUT`` buffer A, then A’ represents a buffer on the ``CAPTURE``
+7. Given an ``OUTPUT`` buffer A, then A' represents a buffer on the ``CAPTURE``
    queue containing data that resulted from processing buffer A.
 
 .. _decoder-glossary:
@@ -288,7 +288,7 @@ Initialization
 
       Changing the ``OUTPUT`` format may change the currently set ``CAPTURE``
       format. How the new ``CAPTURE`` format is determined is up to the decoder
-      and the client must ensure it matches its needs afterwards.
+      and the client must ensure it matches its needs afterwards.
 
 2.  Allocate source (bytestream) buffers via :c:func:`VIDIOC_REQBUFS` on
     ``OUTPUT``.
@@ -874,7 +874,7 @@ it may be affected as per normal decoder operation.
 
    any of the following results on the ``CAPTURE`` queue is allowed:
 
-     {A’, B’, G’, H’}, {A’, G’, H’}, {G’, H’}.
+     {A', B', G', H'}, {A', G', H'}, {G', H'}.
 
    To determine the CAPTURE buffer containing the first decoded frame after the
    seek, the client may observe the timestamps to match the CAPTURE and OUTPUT
-- 
GitLab


From c11669f738f48c7b3cf3b7ec700af33e1566d9c3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 15:12:18 +0200
Subject: [PATCH 2091/3804] docs: userspace-api: media: dvb: replace some
 characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+00a0 (' '): NO-BREAK SPACE
	  as it can cause lines being truncated on PDF output
	- U+2019 ('’'): RIGHT SINGLE QUOTATION MARK
	- U+201c ('“'): LEFT DOUBLE QUOTATION MARK
	- U+201d ('”'): RIGHT DOUBLE QUOTATION MARK

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/dvb/audio-set-bypass-mode.rst       | 2 +-
 Documentation/userspace-api/media/dvb/audio.rst             | 2 +-
 Documentation/userspace-api/media/dvb/dmx-fopen.rst         | 2 +-
 Documentation/userspace-api/media/dvb/dmx-fread.rst         | 2 +-
 Documentation/userspace-api/media/dvb/dmx-set-filter.rst    | 2 +-
 Documentation/userspace-api/media/dvb/intro.rst             | 6 +++---
 Documentation/userspace-api/media/dvb/video.rst             | 2 +-
 7 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst b/Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst
index ecac02f1b2fcd..80d551a2053a0 100644
--- a/Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst
+++ b/Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst
@@ -50,7 +50,7 @@ Description
 
 This ioctl call asks the Audio Device to bypass the Audio decoder and
 forward the stream without decoding. This mode shall be used if streams
-that can’t be handled by the Digital TV system shall be decoded. Dolby
+that can't be handled by the Digital TV system shall be decoded. Dolby
 DigitalTM streams are automatically forwarded by the Digital TV subsystem if
 the hardware can handle it.
 
diff --git a/Documentation/userspace-api/media/dvb/audio.rst b/Documentation/userspace-api/media/dvb/audio.rst
index eaae5675a47d7..aa753336b31f5 100644
--- a/Documentation/userspace-api/media/dvb/audio.rst
+++ b/Documentation/userspace-api/media/dvb/audio.rst
@@ -11,7 +11,7 @@ TV hardware. It can be accessed through ``/dev/dvb/adapter?/audio?``. Data
 types and ioctl definitions can be accessed by including
 ``linux/dvb/audio.h`` in your application.
 
-Please note that some Digital TV cards don’t have their own MPEG decoder, which
+Please note that some Digital TV cards don't have their own MPEG decoder, which
 results in the omission of the audio and video device.
 
 These ioctls were also used by V4L2 to control MPEG decoders implemented
diff --git a/Documentation/userspace-api/media/dvb/dmx-fopen.rst b/Documentation/userspace-api/media/dvb/dmx-fopen.rst
index 8f0a2b831d4a2..50b36eb4371e5 100644
--- a/Documentation/userspace-api/media/dvb/dmx-fopen.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-fopen.rst
@@ -82,7 +82,7 @@ appropriately.
     :widths: 1 16
 
     -  -  ``EMFILE``
-       -  “Too many open files”, i.e. no more filters available.
+       -  "Too many open files", i.e. no more filters available.
 
 The generic error codes are described at the
 :ref:`Generic Error Codes <gen-errors>` chapter.
diff --git a/Documentation/userspace-api/media/dvb/dmx-fread.rst b/Documentation/userspace-api/media/dvb/dmx-fread.rst
index 78e9daef595a3..88c4cddf7c307 100644
--- a/Documentation/userspace-api/media/dvb/dmx-fread.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-fread.rst
@@ -34,7 +34,7 @@ Description
 
 This system call returns filtered data, which might be section or Packetized
 Elementary Stream (PES) data. The filtered data is transferred from
-the driver’s internal circular buffer to ``buf``. The maximum amount of data
+the driver's internal circular buffer to ``buf``. The maximum amount of data
 to be transferred is implied by count.
 
 .. note::
diff --git a/Documentation/userspace-api/media/dvb/dmx-set-filter.rst b/Documentation/userspace-api/media/dvb/dmx-set-filter.rst
index f43455b7adae7..1b8c8071b14f3 100644
--- a/Documentation/userspace-api/media/dvb/dmx-set-filter.rst
+++ b/Documentation/userspace-api/media/dvb/dmx-set-filter.rst
@@ -37,7 +37,7 @@ parameters provided. A timeout may be defined stating number of seconds
 to wait for a section to be loaded. A value of 0 means that no timeout
 should be applied. Finally there is a flag field where it is possible to
 state whether a section should be CRC-checked, whether the filter should
-be a ”one-shot” filter, i.e. if the filtering operation should be
+be a "one-shot" filter, i.e. if the filtering operation should be
 stopped after the first section is received, and whether the filtering
 operation should be started immediately (without waiting for a
 :ref:`DMX_START` ioctl call). If a filter was previously set-up, this
diff --git a/Documentation/userspace-api/media/dvb/intro.rst b/Documentation/userspace-api/media/dvb/intro.rst
index a935f3914e562..6784ae79657c3 100644
--- a/Documentation/userspace-api/media/dvb/intro.rst
+++ b/Documentation/userspace-api/media/dvb/intro.rst
@@ -107,7 +107,7 @@ Audio and video decoder
       a Systems on a Chip (SoC) integrated circuit.
 
       It may also not be needed for certain usages (e.g. for data-only
-      uses like “internet over satellite”).
+      uses like "internet over satellite").
 
 :ref:`stb_components` shows a crude schematic of the control and data
 flow between those components.
@@ -148,9 +148,9 @@ individual devices are called:
 
 -  ``/dev/dvb/adapterN/caM``,
 
-where ``N`` enumerates the Digital TV cards in a system starting from 0, and
+where ``N`` enumerates the Digital TV cards in a system starting from 0, and
 ``M`` enumerates the devices of each type within each adapter, starting
-from 0, too. We will omit the “``/dev/dvb/adapterN/``\ ” in the further
+from 0, too. We will omit the "``/dev/dvb/adapterN/``\ " in the further
 discussion of these devices.
 
 More details about the data structures and function calls of all the
diff --git a/Documentation/userspace-api/media/dvb/video.rst b/Documentation/userspace-api/media/dvb/video.rst
index 38a8d39a1d25c..808705b769a10 100644
--- a/Documentation/userspace-api/media/dvb/video.rst
+++ b/Documentation/userspace-api/media/dvb/video.rst
@@ -16,7 +16,7 @@ stream, not its presentation on the TV or computer screen. On PCs this
 is typically handled by an associated video4linux device, e.g.
 **/dev/video**, which allows scaling and defining output windows.
 
-Some Digital TV cards don’t have their own MPEG decoder, which results in the
+Some Digital TV cards don't have their own MPEG decoder, which results in the
 omission of the audio and video device as well as the video4linux
 device.
 
-- 
GitLab


From 8212937305f84ef73ea81036dafb80c557583d4b Mon Sep 17 00:00:00 2001
From: Wesley Cheng <wcheng@codeaurora.org>
Date: Thu, 20 May 2021 21:23:57 -0700
Subject: [PATCH 2092/3804] usb: dwc3: gadget: Disable gadget IRQ during pullup
 disable

Current sequence utilizes dwc3_gadget_disable_irq() alongside
synchronize_irq() to ensure that no further DWC3 events are generated.
However, the dwc3_gadget_disable_irq() API only disables device
specific events.  Endpoint events can still be generated.  Briefly
disable the interrupt line, so that the cleanup code can run to
prevent device and endpoint events. (i.e. __dwc3_gadget_stop() and
dwc3_stop_active_transfers() respectively)

Without doing so, it can lead to both the interrupt handler and the
pullup disable routine both writing to the GEVNTCOUNT register, which
will cause an incorrect count being read from future interrupts.

Fixes: ae7e86108b12 ("usb: dwc3: Stop active transfers before halting the controller")
Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
Link: https://lore.kernel.org/r/1621571037-1424-1-git-send-email-wcheng@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 612825a39f821..2577488456dac 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2261,13 +2261,10 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
 	}
 
 	/*
-	 * Synchronize any pending event handling before executing the controller
-	 * halt routine.
+	 * Synchronize and disable any further event handling while controller
+	 * is being enabled/disabled.
 	 */
-	if (!is_on) {
-		dwc3_gadget_disable_irq(dwc);
-		synchronize_irq(dwc->irq_gadget);
-	}
+	disable_irq(dwc->irq_gadget);
 
 	spin_lock_irqsave(&dwc->lock, flags);
 
@@ -2305,6 +2302,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
 
 	ret = dwc3_gadget_run_stop(dwc, is_on, false);
 	spin_unlock_irqrestore(&dwc->lock, flags);
+	enable_irq(dwc->irq_gadget);
+
 	pm_runtime_put(dwc->dev);
 
 	return ret;
-- 
GitLab


From 03715ea2e3dbbc56947137ce3b4ac18a726b2f87 Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Fri, 28 May 2021 09:04:05 -0700
Subject: [PATCH 2093/3804] usb: dwc3: gadget: Bail from dwc3_gadget_exit() if
 dwc->gadget is NULL

There exists a possible scenario in which dwc3_gadget_init() can fail:
during during host -> peripheral mode switch in dwc3_set_mode(), and
a pending gadget driver fails to bind.  Then, if the DRD undergoes
another mode switch from peripheral->host the resulting
dwc3_gadget_exit() will attempt to reference an invalid and dangling
dwc->gadget pointer as well as call dma_free_coherent() on unmapped
DMA pointers.

The exact scenario can be reproduced as follows:
 - Start DWC3 in peripheral mode
 - Configure ConfigFS gadget with FunctionFS instance (or use g_ffs)
 - Run FunctionFS userspace application (open EPs, write descriptors, etc)
 - Bind gadget driver to DWC3's UDC
 - Switch DWC3 to host mode
   => dwc3_gadget_exit() is called. usb_del_gadget() will put the
	ConfigFS driver instance on the gadget_driver_pending_list
 - Stop FunctionFS application (closes the ep files)
 - Switch DWC3 to peripheral mode
   => dwc3_gadget_init() fails as usb_add_gadget() calls
	check_pending_gadget_drivers() and attempts to rebind the UDC
	to the ConfigFS gadget but fails with -19 (-ENODEV) because the
	FFS instance is not in FFS_ACTIVE state (userspace has not
	re-opened and written the descriptors yet, i.e. desc_ready!=0).
 - Switch DWC3 back to host mode
   => dwc3_gadget_exit() is called again, but this time dwc->gadget
	is invalid.

Although it can be argued that userspace should take responsibility
for ensuring that the FunctionFS application be ready prior to
allowing the composite driver bind to the UDC, failure to do so
should not result in a panic from the kernel driver.

Fix this by setting dwc->gadget to NULL in the failure path of
dwc3_gadget_init() and add a check to dwc3_gadget_exit() to bail out
unless the gadget pointer is valid.

Fixes: e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically")
Cc: <stable@vger.kernel.org>
Reviewed-by: Peter Chen <peter.chen@kernel.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Link: https://lore.kernel.org/r/20210528160405.17550-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 2577488456dac..88270eee8a48c 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4045,6 +4045,7 @@ err5:
 	dwc3_gadget_free_endpoints(dwc);
 err4:
 	usb_put_gadget(dwc->gadget);
+	dwc->gadget = NULL;
 err3:
 	dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce,
 			dwc->bounce_addr);
@@ -4064,6 +4065,9 @@ err0:
 
 void dwc3_gadget_exit(struct dwc3 *dwc)
 {
+	if (!dwc->gadget)
+		return;
+
 	usb_del_gadget(dwc->gadget);
 	dwc3_gadget_free_endpoints(dwc);
 	usb_put_gadget(dwc->gadget);
-- 
GitLab


From b65ba0c362be665192381cc59e3ac3ef6f0dd1e1 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Date: Fri, 28 May 2021 16:04:46 +0200
Subject: [PATCH 2094/3804] usb: musb: fix MUSB_QUIRK_B_DISCONNECT_99 handling

In commit 92af4fc6ec33 ("usb: musb: Fix suspend with devices
connected for a64"), the logic to support the
MUSB_QUIRK_B_DISCONNECT_99 quirk was modified to only conditionally
schedule the musb->irq_work delayed work.

This commit badly breaks ECM Gadget on AM335X. Indeed, with this
commit, one can observe massive packet loss:

$ ping 192.168.0.100
...
15 packets transmitted, 3 received, 80% packet loss, time 14316ms

Reverting this commit brings back a properly functioning ECM
Gadget. An analysis of the commit seems to indicate that a mistake was
made: the previous code was not falling through into the
MUSB_QUIRK_B_INVALID_VBUS_91, but now it is, unless the condition is
taken.

Changing the logic to be as it was before the problematic commit *and*
only conditionally scheduling musb->irq_work resolves the regression:

$ ping 192.168.0.100
...
64 packets transmitted, 64 received, 0% packet loss, time 64475ms

Fixes: 92af4fc6ec33 ("usb: musb: Fix suspend with devices connected for a64")
Cc: stable@vger.kernel.org
Tested-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Tested-by: Drew Fustini <drew@beagleboard.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Link: https://lore.kernel.org/r/20210528140446.278076-1-thomas.petazzoni@bootlin.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 8f09a387b7738..4c8f0112481f3 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2009,9 +2009,8 @@ static void musb_pm_runtime_check_session(struct musb *musb)
 			schedule_delayed_work(&musb->irq_work,
 					      msecs_to_jiffies(1000));
 			musb->quirk_retries--;
-			break;
 		}
-		fallthrough;
+		break;
 	case MUSB_QUIRK_B_INVALID_VBUS_91:
 		if (musb->quirk_retries && !musb->flush_irq_work) {
 			musb_dbg(musb,
-- 
GitLab


From 6490fa565534fa83593278267785a694fd378a2b Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Fri, 28 May 2021 16:16:13 +0800
Subject: [PATCH 2095/3804] usb: pd: Set PD_T_SINK_WAIT_CAP to 310ms

Current timer PD_T_SINK_WAIT_CAP is set to 240ms which will violate the
SinkWaitCapTimer (tTypeCSinkWaitCap 310 - 620 ms) defined in the PD
Spec if the port is faster enough when running the state machine. Set it
to the lower bound 310ms to ensure the timeout is in Spec.

Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210528081613.730661-1-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/pd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h
index bf00259493e07..96b7ff66f074b 100644
--- a/include/linux/usb/pd.h
+++ b/include/linux/usb/pd.h
@@ -460,7 +460,7 @@ static inline unsigned int rdo_max_power(u32 rdo)
 #define PD_T_RECEIVER_RESPONSE	15	/* 15ms max */
 #define PD_T_SOURCE_ACTIVITY	45
 #define PD_T_SINK_ACTIVITY	135
-#define PD_T_SINK_WAIT_CAP	240
+#define PD_T_SINK_WAIT_CAP	310	/* 310 - 620 ms */
 #define PD_T_PS_TRANSITION	500
 #define PD_T_SRC_TRANSITION	35
 #define PD_T_DRP_SNK		40
-- 
GitLab


From 450605c28d571eddca39a65fdbc1338add44c6d9 Mon Sep 17 00:00:00 2001
From: Praveen Kumar <kumarpraveen@linux.microsoft.com>
Date: Mon, 31 May 2021 13:10:46 +0530
Subject: [PATCH 2096/3804] x86/hyperv: fix logical processor creation

Microsoft Hypervisor expects the logical processor index to be the same
as CPU's index during logical processor creation. Using cpu_physical_id
confuses hypervisor's scheduler. That causes the root partition not boot
when core scheduler is used.

This patch removes the call to cpu_physical_id and uses the CPU index
directly for bringing up logical processor. This scheme works for both
classic scheduler and core scheduler.

Fixes: 333abaf5abb3 (x86/hyperv: implement and use hv_smp_prepare_cpus)
Signed-off-by: Praveen Kumar <kumarpraveen@linux.microsoft.com>
Link: https://lore.kernel.org/r/20210531074046.113452-1-kumarpraveen@linux.microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 arch/x86/kernel/cpu/mshyperv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 22f13343b5da8..4fa0a42808951 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -236,7 +236,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
 	for_each_present_cpu(i) {
 		if (i == 0)
 			continue;
-		ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
+		ret = hv_call_add_logical_proc(numa_cpu_node(i), i, i);
 		BUG_ON(ret);
 	}
 
-- 
GitLab


From 9de6655cc5a6a1febc514465c87c24a0e96d8dba Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 25 May 2021 18:58:41 +0800
Subject: [PATCH 2097/3804] drivers: hv: Fix missing error code in
 vmbus_connect()

Eliminate the follow smatch warning:

drivers/hv/connection.c:236 vmbus_connect() warn: missing error code
'ret'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/1621940321-72353-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/connection.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 311cd005b3be6..5e479d54918cf 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -232,8 +232,10 @@ int vmbus_connect(void)
 	 */
 
 	for (i = 0; ; i++) {
-		if (i == ARRAY_SIZE(vmbus_versions))
+		if (i == ARRAY_SIZE(vmbus_versions)) {
+			ret = -EDOM;
 			goto cleanup;
+		}
 
 		version = vmbus_versions[i];
 		if (version > max_version)
-- 
GitLab


From 6b8f648959e5036695f056a60e3444f4753f643e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 2 Jun 2021 16:13:58 +0100
Subject: [PATCH 2098/3804] arm64: update string routine copyrights and URLs

To make future archaeology easier, let's have the string routine comment
blocks encode the specific upstream commit ID they were imported from.
These are the same commit IDs as listed in the commits importing the
code, expanded to 16 characters. Note that the routines have different
commit IDs, each reprsenting the latest upstream commit which changed
the particular routine.

At the same time, let's consistently include 2021 in the copyright
dates.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210602151358.35571-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/lib/memcmp.S  | 4 ++--
 arch/arm64/lib/memcpy.S  | 6 +++---
 arch/arm64/lib/strcmp.S  | 4 ++--
 arch/arm64/lib/strlen.S  | 4 ++--
 arch/arm64/lib/strncmp.S | 4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index 498f0d9941d91..7d956384222ff 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2013-2020, Arm Limited.
+ * Copyright (c) 2013-2021, Arm Limited.
  *
  * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/memcmp.S
+ * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
  */
 
 #include <linux/linkage.h>
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 31073a8304fb6..b82fd64ee1e1c 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2012-2020, Arm Limited.
+ * Copyright (c) 2012-2021, Arm Limited.
  *
  * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/memcpy.S
+ * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/memcpy.S
  */
 
 #include <linux/linkage.h>
@@ -249,4 +249,4 @@ EXPORT_SYMBOL(__memcpy)
 SYM_FUNC_END_ALIAS_PI(memmove)
 EXPORT_SYMBOL(memmove)
 SYM_FUNC_END_ALIAS(__memmove)
-EXPORT_SYMBOL(__memmove)
\ No newline at end of file
+EXPORT_SYMBOL(__memmove)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index e82ccb6c2f931..d7bee210a798a 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2012-2020, Arm Limited.
+ * Copyright (c) 2012-2021, Arm Limited.
  *
  * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/strcmp.S
+ * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S
  */
 
 #include <linux/linkage.h>
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index b557185b54a59..35fbdb7d6e1a6 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2013, Arm Limited.
+ * Copyright (c) 2013-2021, Arm Limited.
  *
  * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/strlen.S
+ * https://github.com/ARM-software/optimized-routines/blob/98e4d6a5c13c8e54/string/aarch64/strlen.S
  */
 
 #include <linux/linkage.h>
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index 0c0bf5462de05..48d44f7fddb13 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2013, Arm Limited.
+ * Copyright (c) 2013-2021, Arm Limited.
  *
  * Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/master/string/aarch64/strncmp.S
+ * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S
  */
 
 #include <linux/linkage.h>
-- 
GitLab


From a3e74fb9247cd530dca246699d5eb5a691884d32 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kamalheib1@gmail.com>
Date: Tue, 25 May 2021 18:01:34 +0300
Subject: [PATCH 2099/3804] RDMA/ipoib: Fix warning caused by destroying
 non-initial netns

After the commit 5ce2dced8e95 ("RDMA/ipoib: Set rtnl_link_ops for ipoib
interfaces"), if the IPoIB device is moved to non-initial netns,
destroying that netns lets the device vanish instead of moving it back to
the initial netns, This is happening because default_device_exit() skips
the interfaces due to having rtnl_link_ops set.

Steps to reporoduce:
  ip netns add foo
  ip link set mlx5_ib0 netns foo
  ip netns delete foo

WARNING: CPU: 1 PID: 704 at net/core/dev.c:11435 netdev_exit+0x3f/0x50
Modules linked in: xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT
nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack
nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink tun d
 fuse
CPU: 1 PID: 704 Comm: kworker/u64:3 Tainted: G S      W  5.13.0-rc1+ #1
Hardware name: Dell Inc. PowerEdge R630/02C2CP, BIOS 2.1.5 04/11/2016
Workqueue: netns cleanup_net
RIP: 0010:netdev_exit+0x3f/0x50
Code: 48 8b bb 30 01 00 00 e8 ef 81 b1 ff 48 81 fb c0 3a 54 a1 74 13 48
8b 83 90 00 00 00 48 81 c3 90 00 00 00 48 39 d8 75 02 5b c3 <0f> 0b 5b
c3 66 66 2e 0f 1f 84 00 00 00 00 00 66 90 0f 1f 44 00
RSP: 0018:ffffb297079d7e08 EFLAGS: 00010206
RAX: ffff8eb542c00040 RBX: ffff8eb541333150 RCX: 000000008010000d
RDX: 000000008010000e RSI: 000000008010000d RDI: ffff8eb440042c00
RBP: ffffb297079d7e48 R08: 0000000000000001 R09: ffffffff9fdeac00
R10: ffff8eb5003be000 R11: 0000000000000001 R12: ffffffffa1545620
R13: ffffffffa1545628 R14: 0000000000000000 R15: ffffffffa1543b20
FS:  0000000000000000(0000) GS:ffff8ed37fa00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005601b5f4c2e8 CR3: 0000001fc8c10002 CR4: 00000000003706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 ops_exit_list.isra.9+0x36/0x70
 cleanup_net+0x234/0x390
 process_one_work+0x1cb/0x360
 ? process_one_work+0x360/0x360
 worker_thread+0x30/0x370
 ? process_one_work+0x360/0x360
 kthread+0x116/0x130
 ? kthread_park+0x80/0x80
 ret_from_fork+0x22/0x30

To avoid the above warning and later on the kernel panic that could happen
on shutdown due to a NULL pointer dereference, make sure to set the
netns_refund flag that was introduced by commit 3a5ca857079e ("can: dev:
Move device back to init netns on owning netns delete") to properly
restore the IPoIB interfaces to the initial netns.

Fixes: 5ce2dced8e95 ("RDMA/ipoib: Set rtnl_link_ops for ipoib interfaces")
Link: https://lore.kernel.org/r/20210525150134.139342-1-kamalheib1@gmail.com
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index d5a90a66b45cf..5b05cf3837da1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -163,6 +163,7 @@ static size_t ipoib_get_size(const struct net_device *dev)
 
 static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
 	.kind		= "ipoib",
+	.netns_refund   = true,
 	.maxtype	= IFLA_IPOIB_MAX,
 	.policy		= ipoib_policy,
 	.priv_size	= sizeof(struct ipoib_dev_priv),
-- 
GitLab


From d94b93a9101573eb75b819dee94b1417acff631b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 30 Dec 2020 16:54:56 +0100
Subject: [PATCH 2100/3804] ARM: cpuidle: Avoid orphan section warning

Since commit 83109d5d5fba ("x86/build: Warn on orphan section placement"),
we get a warning for objects in orphan sections. The cpuidle implementation
for OMAP causes this when CONFIG_CPU_IDLE is disabled:

arm-linux-gnueabi-ld: warning: orphan section `__cpuidle_method_of_table' from `arch/arm/mach-omap2/pm33xx-core.o' being placed in section `__cpuidle_method_of_table'
arm-linux-gnueabi-ld: warning: orphan section `__cpuidle_method_of_table' from `arch/arm/mach-omap2/pm33xx-core.o' being placed in section `__cpuidle_method_of_table'
arm-linux-gnueabi-ld: warning: orphan section `__cpuidle_method_of_table' from `arch/arm/mach-omap2/pm33xx-core.o' being placed in section `__cpuidle_method_of_table'

Change the definition of CPUIDLE_METHOD_OF_DECLARE() to silently
drop the table and all code referenced from it when CONFIG_CPU_IDLE
is disabled.

Fixes: 06ee7a950b6a ("ARM: OMAP2+: pm33xx-core: Add cpuidle_ops for am335x/am437x")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20201230155506.1085689-1-arnd@kernel.org
---
 arch/arm/include/asm/cpuidle.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h
index 0d67ed682e077..bc4ffa7ca04c7 100644
--- a/arch/arm/include/asm/cpuidle.h
+++ b/arch/arm/include/asm/cpuidle.h
@@ -7,9 +7,11 @@
 #ifdef CONFIG_CPU_IDLE
 extern int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
 		struct cpuidle_driver *drv, int index);
+#define __cpuidle_method_section __used __section("__cpuidle_method_of_table")
 #else
 static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
 		struct cpuidle_driver *drv, int index) { return -ENODEV; }
+#define __cpuidle_method_section __maybe_unused /* drop silently */
 #endif
 
 /* Common ARM WFI state */
@@ -42,8 +44,7 @@ struct of_cpuidle_method {
 
 #define CPUIDLE_METHOD_OF_DECLARE(name, _method, _ops)			\
 	static const struct of_cpuidle_method __cpuidle_method_of_table_##name \
-	__used __section("__cpuidle_method_of_table")			\
-	= { .method = _method, .ops = _ops }
+	__cpuidle_method_section = { .method = _method, .ops = _ops }
 
 extern int arm_cpuidle_suspend(int index);
 
-- 
GitLab


From d4c6399900364facd84c9e35ce1540b6046c345f Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 5 May 2021 17:14:11 -0700
Subject: [PATCH 2101/3804] vmlinux.lds.h: Avoid orphan section with !SMP

With x86_64_defconfig and the following configs, there is an orphan
section warning:

CONFIG_SMP=n
CONFIG_AMD_MEM_ENCRYPT=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_KVM=y
CONFIG_PARAVIRT=y

ld: warning: orphan section `.data..decrypted' from `arch/x86/kernel/cpu/vmware.o' being placed in section `.data..decrypted'
ld: warning: orphan section `.data..decrypted' from `arch/x86/kernel/kvm.o' being placed in section `.data..decrypted'

These sections are created with DEFINE_PER_CPU_DECRYPTED, which
ultimately turns into __PCPU_ATTRS, which in turn has a section
attribute with a value of PER_CPU_BASE_SECTION + the section name. When
CONFIG_SMP is not set, the base section is .data and that is not
currently handled in any linker script.

Add .data..decrypted to PERCPU_DECRYPTED_SECTION, which is included in
PERCPU_INPUT -> PERCPU_SECTION, which is include in the x86 linker
script when either CONFIG_X86_64 or CONFIG_SMP is unset, taking care of
the warning.

Fixes: ac26963a1175 ("percpu: Introduce DEFINE_PER_CPU_DECRYPTED")
Link: https://github.com/ClangBuiltLinux/linux/issues/1360
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nick Desaulniers <ndesaulniers@google.com> # build
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210506001410.1026691-1-nathan@kernel.org
---
 include/asm-generic/vmlinux.lds.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 40a9c101565eb..17325416e2dee 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -960,6 +960,7 @@
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 #define PERCPU_DECRYPTED_SECTION					\
 	. = ALIGN(PAGE_SIZE);						\
+	*(.data..decrypted)						\
 	*(.data..percpu..decrypted)					\
 	. = ALIGN(PAGE_SIZE);
 #else
-- 
GitLab


From ff40e51043af63715ab413995ff46996ecf9583f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 28 May 2021 09:16:31 +0000
Subject: [PATCH 2102/3804] bpf, lockdown, audit: Fix buggy SELinux lockdown
 permission checks

Commit 59438b46471a ("security,lockdown,selinux: implement SELinux lockdown")
added an implementation of the locked_down LSM hook to SELinux, with the aim
to restrict which domains are allowed to perform operations that would breach
lockdown. This is indirectly also getting audit subsystem involved to report
events. The latter is problematic, as reported by Ondrej and Serhei, since it
can bring down the whole system via audit:

  1) The audit events that are triggered due to calls to security_locked_down()
     can OOM kill a machine, see below details [0].

  2) It also seems to be causing a deadlock via avc_has_perm()/slow_avc_audit()
     when trying to wake up kauditd, for example, when using trace_sched_switch()
     tracepoint, see details in [1]. Triggering this was not via some hypothetical
     corner case, but with existing tools like runqlat & runqslower from bcc, for
     example, which make use of this tracepoint. Rough call sequence goes like:

     rq_lock(rq) -> -------------------------+
       trace_sched_switch() ->               |
         bpf_prog_xyz() ->                   +-> deadlock
           selinux_lockdown() ->             |
             audit_log_end() ->              |
               wake_up_interruptible() ->    |
                 try_to_wake_up() ->         |
                   rq_lock(rq) --------------+

What's worse is that the intention of 59438b46471a to further restrict lockdown
settings for specific applications in respect to the global lockdown policy is
completely broken for BPF. The SELinux policy rule for the current lockdown check
looks something like this:

  allow <who> <who> : lockdown { <reason> };

However, this doesn't match with the 'current' task where the security_locked_down()
is executed, example: httpd does a syscall. There is a tracing program attached
to the syscall which triggers a BPF program to run, which ends up doing a
bpf_probe_read_kernel{,_str}() helper call. The selinux_lockdown() hook does
the permission check against 'current', that is, httpd in this example. httpd
has literally zero relation to this tracing program, and it would be nonsensical
having to write an SELinux policy rule against httpd to let the tracing helper
pass. The policy in this case needs to be against the entity that is installing
the BPF program. For example, if bpftrace would generate a histogram of syscall
counts by user space application:

  bpftrace -e 'tracepoint:raw_syscalls:sys_enter { @[comm] = count(); }'

bpftrace would then go and generate a BPF program from this internally. One way
of doing it [for the sake of the example] could be to call bpf_get_current_task()
helper and then access current->comm via one of bpf_probe_read_kernel{,_str}()
helpers. So the program itself has nothing to do with httpd or any other random
app doing a syscall here. The BPF program _explicitly initiated_ the lockdown
check. The allow/deny policy belongs in the context of bpftrace: meaning, you
want to grant bpftrace access to use these helpers, but other tracers on the
system like my_random_tracer _not_.

Therefore fix all three issues at the same time by taking a completely different
approach for the security_locked_down() hook, that is, move the check into the
program verification phase where we actually retrieve the BPF func proto. This
also reliably gets the task (current) that is trying to install the BPF tracing
program, e.g. bpftrace/bcc/perf/systemtap/etc, and it also fixes the OOM since
we're moving this out of the BPF helper's fast-path which can be called several
millions of times per second.

The check is then also in line with other security_locked_down() hooks in the
system where the enforcement is performed at open/load time, for example,
open_kcore() for /proc/kcore access or module_sig_check() for module signatures
just to pick few random ones. What's out of scope in the fix as well as in
other security_locked_down() hook locations /outside/ of BPF subsystem is that
if the lockdown policy changes on the fly there is no retrospective action.
This requires a different discussion, potentially complex infrastructure, and
it's also not clear whether this can be solved generically. Either way, it is
out of scope for a suitable stable fix which this one is targeting. Note that
the breakage is specifically on 59438b46471a where it started to rely on 'current'
as UAPI behavior, and _not_ earlier infrastructure such as 9d1f8be5cf42 ("bpf:
Restrict bpf when kernel lockdown is in confidentiality mode").

[0] https://bugzilla.redhat.com/show_bug.cgi?id=1955585, Jakub Hrozek says:

  I starting seeing this with F-34. When I run a container that is traced with
  BPF to record the syscalls it is doing, auditd is flooded with messages like:

  type=AVC msg=audit(1619784520.593:282387): avc:  denied  { confidentiality }
    for pid=476 comm="auditd" lockdown_reason="use of bpf to read kernel RAM"
      scontext=system_u:system_r:auditd_t:s0 tcontext=system_u:system_r:auditd_t:s0
        tclass=lockdown permissive=0

  This seems to be leading to auditd running out of space in the backlog buffer
  and eventually OOMs the machine.

  [...]
  auditd running at 99% CPU presumably processing all the messages, eventually I get:
  Apr 30 12:20:42 fedora kernel: audit: backlog limit exceeded
  Apr 30 12:20:42 fedora kernel: audit: backlog limit exceeded
  Apr 30 12:20:42 fedora kernel: audit: audit_backlog=2152579 > audit_backlog_limit=64
  Apr 30 12:20:42 fedora kernel: audit: audit_backlog=2152626 > audit_backlog_limit=64
  Apr 30 12:20:42 fedora kernel: audit: audit_backlog=2152694 > audit_backlog_limit=64
  Apr 30 12:20:42 fedora kernel: audit: audit_lost=6878426 audit_rate_limit=0 audit_backlog_limit=64
  Apr 30 12:20:45 fedora kernel: oci-seccomp-bpf invoked oom-killer: gfp_mask=0x100cca(GFP_HIGHUSER_MOVABLE), order=0, oom_score_adj=-1000
  Apr 30 12:20:45 fedora kernel: CPU: 0 PID: 13284 Comm: oci-seccomp-bpf Not tainted 5.11.12-300.fc34.x86_64 #1
  Apr 30 12:20:45 fedora kernel: Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014
  [...]

[1] https://lore.kernel.org/linux-audit/CANYvDQN7H5tVp47fbYcRasv4XF07eUbsDwT_eDCHXJUj43J7jQ@mail.gmail.com/,
    Serhei Makarov says:

  Upstream kernel 5.11.0-rc7 and later was found to deadlock during a
  bpf_probe_read_compat() call within a sched_switch tracepoint. The problem
  is reproducible with the reg_alloc3 testcase from SystemTap's BPF backend
  testsuite on x86_64 as well as the runqlat, runqslower tools from bcc on
  ppc64le. Example stack trace:

  [...]
  [  730.868702] stack backtrace:
  [  730.869590] CPU: 1 PID: 701 Comm: in:imjournal Not tainted, 5.12.0-0.rc2.20210309git144c79ef3353.166.fc35.x86_64 #1
  [  730.871605] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014
  [  730.873278] Call Trace:
  [  730.873770]  dump_stack+0x7f/0xa1
  [  730.874433]  check_noncircular+0xdf/0x100
  [  730.875232]  __lock_acquire+0x1202/0x1e10
  [  730.876031]  ? __lock_acquire+0xfc0/0x1e10
  [  730.876844]  lock_acquire+0xc2/0x3a0
  [  730.877551]  ? __wake_up_common_lock+0x52/0x90
  [  730.878434]  ? lock_acquire+0xc2/0x3a0
  [  730.879186]  ? lock_is_held_type+0xa7/0x120
  [  730.880044]  ? skb_queue_tail+0x1b/0x50
  [  730.880800]  _raw_spin_lock_irqsave+0x4d/0x90
  [  730.881656]  ? __wake_up_common_lock+0x52/0x90
  [  730.882532]  __wake_up_common_lock+0x52/0x90
  [  730.883375]  audit_log_end+0x5b/0x100
  [  730.884104]  slow_avc_audit+0x69/0x90
  [  730.884836]  avc_has_perm+0x8b/0xb0
  [  730.885532]  selinux_lockdown+0xa5/0xd0
  [  730.886297]  security_locked_down+0x20/0x40
  [  730.887133]  bpf_probe_read_compat+0x66/0xd0
  [  730.887983]  bpf_prog_250599c5469ac7b5+0x10f/0x820
  [  730.888917]  trace_call_bpf+0xe9/0x240
  [  730.889672]  perf_trace_run_bpf_submit+0x4d/0xc0
  [  730.890579]  perf_trace_sched_switch+0x142/0x180
  [  730.891485]  ? __schedule+0x6d8/0xb20
  [  730.892209]  __schedule+0x6d8/0xb20
  [  730.892899]  schedule+0x5b/0xc0
  [  730.893522]  exit_to_user_mode_prepare+0x11d/0x240
  [  730.894457]  syscall_exit_to_user_mode+0x27/0x70
  [  730.895361]  entry_SYSCALL_64_after_hwframe+0x44/0xae
  [...]

Fixes: 59438b46471a ("security,lockdown,selinux: implement SELinux lockdown")
Reported-by: Ondrej Mosnacek <omosnace@redhat.com>
Reported-by: Jakub Hrozek <jhrozek@redhat.com>
Reported-by: Serhei Makarov <smakarov@redhat.com>
Reported-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: James Morris <jamorris@linux.microsoft.com>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Frank Eigler <fche@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/bpf/01135120-8bf7-df2e-cff0-1d73f1f841c3@iogearbox.net
---
 kernel/bpf/helpers.c     |  7 +++++--
 kernel/trace/bpf_trace.c | 32 ++++++++++++--------------------
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 73443498d88fc..a2f1f15ce4321 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -14,6 +14,7 @@
 #include <linux/jiffies.h>
 #include <linux/pid_namespace.h>
 #include <linux/proc_ns.h>
+#include <linux/security.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -1069,11 +1070,13 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_probe_read_user:
 		return &bpf_probe_read_user_proto;
 	case BPF_FUNC_probe_read_kernel:
-		return &bpf_probe_read_kernel_proto;
+		return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+		       NULL : &bpf_probe_read_kernel_proto;
 	case BPF_FUNC_probe_read_user_str:
 		return &bpf_probe_read_user_str_proto;
 	case BPF_FUNC_probe_read_kernel_str:
-		return &bpf_probe_read_kernel_str_proto;
+		return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+		       NULL : &bpf_probe_read_kernel_str_proto;
 	case BPF_FUNC_snprintf_btf:
 		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_snprintf:
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d2d7cf6cfe83e..7a52bc1728414 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -215,16 +215,11 @@ const struct bpf_func_proto bpf_probe_read_user_str_proto = {
 static __always_inline int
 bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
 {
-	int ret = security_locked_down(LOCKDOWN_BPF_READ);
+	int ret;
 
-	if (unlikely(ret < 0))
-		goto fail;
 	ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
-		goto fail;
-	return ret;
-fail:
-	memset(dst, 0, size);
+		memset(dst, 0, size);
 	return ret;
 }
 
@@ -246,10 +241,7 @@ const struct bpf_func_proto bpf_probe_read_kernel_proto = {
 static __always_inline int
 bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
 {
-	int ret = security_locked_down(LOCKDOWN_BPF_READ);
-
-	if (unlikely(ret < 0))
-		goto fail;
+	int ret;
 
 	/*
 	 * The strncpy_from_kernel_nofault() call will likely not fill the
@@ -262,11 +254,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
 	 */
 	ret = strncpy_from_kernel_nofault(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
-		goto fail;
-
-	return ret;
-fail:
-	memset(dst, 0, size);
+		memset(dst, 0, size);
 	return ret;
 }
 
@@ -1011,16 +999,20 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_probe_read_user:
 		return &bpf_probe_read_user_proto;
 	case BPF_FUNC_probe_read_kernel:
-		return &bpf_probe_read_kernel_proto;
+		return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+		       NULL : &bpf_probe_read_kernel_proto;
 	case BPF_FUNC_probe_read_user_str:
 		return &bpf_probe_read_user_str_proto;
 	case BPF_FUNC_probe_read_kernel_str:
-		return &bpf_probe_read_kernel_str_proto;
+		return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+		       NULL : &bpf_probe_read_kernel_str_proto;
 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
 	case BPF_FUNC_probe_read:
-		return &bpf_probe_read_compat_proto;
+		return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+		       NULL : &bpf_probe_read_compat_proto;
 	case BPF_FUNC_probe_read_str:
-		return &bpf_probe_read_compat_str_proto;
+		return security_locked_down(LOCKDOWN_BPF_READ) < 0 ?
+		       NULL : &bpf_probe_read_compat_str_proto;
 #endif
 #ifdef CONFIG_CGROUPS
 	case BPF_FUNC_get_current_cgroup_id:
-- 
GitLab


From b508d5fb69c2211a1b860fc058aafbefc3b3c3cd Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Tue, 1 Jun 2021 18:38:41 -0700
Subject: [PATCH 2103/3804] net: ipconfig: Don't override command-line
 hostnames or domains

If the user specifies a hostname or domain name as part of the ip=
command-line option, preserve it and don't overwrite it with one
supplied by DHCP/BOOTP.

For instance, ip=::::myhostname::dhcp will use "myhostname" rather than
ignoring and overwriting it.

Fix the comment on ic_bootp_string that suggests it only copies a string
"if not already set"; it doesn't have any such logic.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index bc2f6ca971520..816d8aad5a684 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -886,7 +886,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
 
 
 /*
- *  Copy BOOTP-supplied string if not already set.
+ *  Copy BOOTP-supplied string
  */
 static int __init ic_bootp_string(char *dest, char *src, int len, int max)
 {
@@ -935,12 +935,15 @@ static void __init ic_do_bootp_ext(u8 *ext)
 		}
 		break;
 	case 12:	/* Host name */
-		ic_bootp_string(utsname()->nodename, ext+1, *ext,
-				__NEW_UTS_LEN);
-		ic_host_name_set = 1;
+		if (!ic_host_name_set) {
+			ic_bootp_string(utsname()->nodename, ext+1, *ext,
+					__NEW_UTS_LEN);
+			ic_host_name_set = 1;
+		}
 		break;
 	case 15:	/* Domain name (DNS) */
-		ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
+		if (!ic_domain[0])
+			ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
 		break;
 	case 17:	/* Root path */
 		if (!root_server_path[0])
-- 
GitLab


From ab00f3e051e851a8458f0d0eb1bb426deadb6619 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Date: Wed, 2 Jun 2021 10:31:25 +0800
Subject: [PATCH 2104/3804] net: stmmac: fix issue where clk is being
 unprepared twice

In the case of MDIO bus registration failure due to no external PHY
devices is connected to the MAC, clk_disable_unprepare() is called in
stmmac_bus_clk_config() and intel_eth_pci_probe() respectively.

The second call in intel_eth_pci_probe() will caused the following:-

[   16.578605] intel-eth-pci 0000:00:1e.5: No PHY found
[   16.583778] intel-eth-pci 0000:00:1e.5: stmmac_dvr_probe: MDIO bus (id: 2) registration failed
[   16.680181] ------------[ cut here ]------------
[   16.684861] stmmac-0000:00:1e.5 already disabled
[   16.689547] WARNING: CPU: 13 PID: 2053 at drivers/clk/clk.c:952 clk_core_disable+0x96/0x1b0
[   16.697963] Modules linked in: dwc3 iTCO_wdt mei_hdcp iTCO_vendor_support udc_core x86_pkg_temp_thermal kvm_intel marvell10g kvm sch_fq_codel nfsd irqbypass dwmac_intel(+) stmmac uio ax88179_178a pcs_xpcs phylink uhid spi_pxa2xx_platform usbnet mei_me pcspkr tpm_crb mii i2c_i801 dw_dmac dwc3_pci thermal dw_dmac_core intel_rapl_msr libphy i2c_smbus mei tpm_tis intel_th_gth tpm_tis_core tpm intel_th_acpi intel_pmc_core intel_th i915 fuse configfs snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec snd_hda_core snd_pcm snd_timer snd soundcore
[   16.746785] CPU: 13 PID: 2053 Comm: systemd-udevd Tainted: G     U            5.13.0-rc3-intel-lts #76
[   16.756134] Hardware name: Intel Corporation Alder Lake Client Platform/AlderLake-S ADP-S DRR4 CRB, BIOS ADLIFSI1.R00.1494.B00.2012031421 12/03/2020
[   16.769465] RIP: 0010:clk_core_disable+0x96/0x1b0
[   16.774222] Code: 00 8b 05 45 96 17 01 85 c0 7f 24 48 8b 5b 30 48 85 db 74 a5 8b 43 7c 85 c0 75 93 48 8b 33 48 c7 c7 6e 32 cc b7 e8 b2 5d 52 00 <0f> 0b 5b 5d c3 65 8b 05 76 31 18 49 89 c0 48 0f a3 05 bc 92 1a 01
[   16.793016] RSP: 0018:ffffa44580523aa0 EFLAGS: 00010086
[   16.798287] RAX: 0000000000000000 RBX: ffff8d7d0eb70a00 RCX: 0000000000000000
[   16.805435] RDX: 0000000000000002 RSI: ffffffffb7c62d5f RDI: 00000000ffffffff
[   16.812610] RBP: 0000000000000287 R08: 0000000000000000 R09: ffffa445805238d0
[   16.819759] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8d7d0eb70a00
[   16.826904] R13: ffff8d7d027370c8 R14: 0000000000000006 R15: ffffa44580523ad0
[   16.834047] FS:  00007f9882fa2600(0000) GS:ffff8d80a0940000(0000) knlGS:0000000000000000
[   16.842177] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   16.847966] CR2: 00007f9882bea3d8 CR3: 000000010b126001 CR4: 0000000000370ee0
[   16.855144] Call Trace:
[   16.857614]  clk_core_disable_lock+0x1b/0x30
[   16.861941]  intel_eth_pci_probe.cold+0x11d/0x136 [dwmac_intel]
[   16.867913]  pci_device_probe+0xcf/0x150
[   16.871890]  really_probe+0xf5/0x3e0
[   16.875526]  driver_probe_device+0x64/0x150
[   16.879763]  device_driver_attach+0x53/0x60
[   16.883998]  __driver_attach+0x9f/0x150
[   16.887883]  ? device_driver_attach+0x60/0x60
[   16.892288]  ? device_driver_attach+0x60/0x60
[   16.896698]  bus_for_each_dev+0x77/0xc0
[   16.900583]  bus_add_driver+0x184/0x1f0
[   16.904469]  driver_register+0x6c/0xc0
[   16.908268]  ? 0xffffffffc07ae000
[   16.911598]  do_one_initcall+0x4a/0x210
[   16.915489]  ? kmem_cache_alloc_trace+0x305/0x4e0
[   16.920247]  do_init_module+0x5c/0x230
[   16.924057]  load_module+0x2894/0x2b70
[   16.927857]  ? __do_sys_finit_module+0xb5/0x120
[   16.932441]  __do_sys_finit_module+0xb5/0x120
[   16.936845]  do_syscall_64+0x42/0x80
[   16.940476]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[   16.945586] RIP: 0033:0x7f98830e5ccd
[   16.949177] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 93 31 0c 00 f7 d8 64 89 01 48
[   16.967970] RSP: 002b:00007ffc66b60168 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[   16.975583] RAX: ffffffffffffffda RBX: 000055885de35ef0 RCX: 00007f98830e5ccd
[   16.982725] RDX: 0000000000000000 RSI: 00007f98832541e3 RDI: 0000000000000012
[   16.989868] RBP: 0000000000020000 R08: 0000000000000000 R09: 0000000000000000
[   16.997042] R10: 0000000000000012 R11: 0000000000000246 R12: 00007f98832541e3
[   17.004222] R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffc66b60328
[   17.011369] ---[ end trace df06a3dab26b988c ]---
[   17.016062] ------------[ cut here ]------------
[   17.020701] stmmac-0000:00:1e.5 already unprepared

Removing the stmmac_bus_clks_config() call in stmmac_dvr_probe and let
dwmac-intel to handle the unprepare and disable of the clk device.

Fixes: 5ec55823438e ("net: stmmac: add clocks management for gmac driver")
Cc: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@linux.intel.com>
Reviewed-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 342bdefcb8b4e..c87202cbd3d6d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7049,7 +7049,6 @@ error_mdio_register:
 	stmmac_napi_del(ndev);
 error_hw_init:
 	destroy_workqueue(priv->wq);
-	stmmac_bus_clks_config(priv, false);
 	bitmap_free(priv->af_xdp_zc_qps);
 
 	return ret;
-- 
GitLab


From ba8e59773ae59818695d1e20b8939282da80ec8c Mon Sep 17 00:00:00 2001
From: Bindu Ramamurthy <bindu.r@amd.com>
Date: Thu, 20 May 2021 10:06:04 -0400
Subject: [PATCH 2105/3804] drm/amd/display: Allow bandwidth validation for 0
 streams.

[Why]
Bandwidth calculations are triggered for non zero streams, and
in case of 0 streams, these calculations were skipped with
pstate status not being updated.

[How]
As the pstate status is applicable for non zero streams, check
added for allowing 0 streams inline with dcn internal bandwidth
validations.

Signed-off-by: Bindu Ramamurthy <bindu.r@amd.com>
Reviewed-by: Roman Li <Roman.Li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 527e56c353cb7..8357aa3c41d5a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -3236,7 +3236,7 @@ static noinline bool dcn20_validate_bandwidth_fp(struct dc *dc,
 	voltage_supported = dcn20_validate_bandwidth_internal(dc, context, false);
 	dummy_pstate_supported = context->bw_ctx.bw.dcn.clk.p_state_change_support;
 
-	if (voltage_supported && dummy_pstate_supported) {
+	if (voltage_supported && (dummy_pstate_supported || !(context->stream_count))) {
 		context->bw_ctx.bw.dcn.clk.p_state_change_support = false;
 		goto restore_dml_state;
 	}
-- 
GitLab


From a53085c1d20f914590fe446d01d4546150758983 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Wed, 19 May 2021 16:12:19 -0400
Subject: [PATCH 2106/3804] drm/amd/display: Fix GPU scaling regression by FS
 video support

[Why]
FS video support regressed GPU scaling and the scaled buffer ends up
stuck in the top left of the screen at native size - full, aspect,
center scaling modes do not function.

This is because decide_crtc_timing_for_drm_display_mode() does not
get called when scaling is enabled.

[How]
Split recalculate timing and scaling into two different flags.

We don't want to call drm_mode_set_crtcinfo() for scaling, but we
do want to call it for FS video.

Optimize and move preferred_refresh calculation next to
decide_crtc_timing_for_drm_display_mode() like it used to be since
that's not used for FS video.

We don't need to copy over the VIC or polarity in the case of FS video
modes because those don't change.

Fixes: 6f59f229f8ed7a ("drm/amd/display: Skip modeset for front porch change")

Cc: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 389eff96fcf6c..d6dcbb08e2031 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5500,7 +5500,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	struct drm_display_mode saved_mode;
 	struct drm_display_mode *freesync_mode = NULL;
 	bool native_mode_found = false;
-	bool recalculate_timing = dm_state ? (dm_state->scaling != RMX_OFF) : false;
+	bool recalculate_timing = false;
+	bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false;
 	int mode_refresh;
 	int preferred_refresh = 0;
 #if defined(CONFIG_DRM_AMD_DC_DCN)
@@ -5563,7 +5564,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		 */
 		DRM_DEBUG_DRIVER("No preferred mode found\n");
 	} else {
-		recalculate_timing |= amdgpu_freesync_vid_mode &&
+		recalculate_timing = amdgpu_freesync_vid_mode &&
 				 is_freesync_video_mode(&mode, aconnector);
 		if (recalculate_timing) {
 			freesync_mode = get_highest_refresh_rate_mode(aconnector, false);
@@ -5571,11 +5572,10 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 			mode = *freesync_mode;
 		} else {
 			decide_crtc_timing_for_drm_display_mode(
-				&mode, preferred_mode,
-				dm_state ? (dm_state->scaling != RMX_OFF) : false);
-		}
+				&mode, preferred_mode, scale);
 
-		preferred_refresh = drm_mode_vrefresh(preferred_mode);
+			preferred_refresh = drm_mode_vrefresh(preferred_mode);
+		}
 	}
 
 	if (recalculate_timing)
@@ -5587,7 +5587,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	* If scaling is enabled and refresh rate didn't change
 	* we copy the vic and polarities of the old timings
 	*/
-	if (!recalculate_timing || mode_refresh != preferred_refresh)
+	if (!scale || mode_refresh != preferred_refresh)
 		fill_stream_properties_from_drm_display_mode(
 			stream, &mode, &aconnector->base, con_state, NULL,
 			requested_bpc);
-- 
GitLab


From 147feb007685cbb765b16a834d4f00675d589bb4 Mon Sep 17 00:00:00 2001
From: Asher Song <Asher.Song@amd.com>
Date: Fri, 21 May 2021 17:11:33 +0800
Subject: [PATCH 2107/3804] drm/amdgpu: add judgement for dc support

Drop DC initialization when DCN is harvested in VBIOS. The way
doesn't affect virtual display ip initialization.

Signed-off-by: Likun Gao  <Likun.Gao@amd.com>
Signed-off-by: Asher Song <Asher.Song@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 66ddfe4f58c2e..57ec108b59720 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3118,7 +3118,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
  */
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
 {
-	if (amdgpu_sriov_vf(adev) || adev->enable_virtual_display)
+	if (amdgpu_sriov_vf(adev) || 
+	    adev->enable_virtual_display ||
+	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
 		return false;
 
 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
-- 
GitLab


From 5cfc912582e13b05d71fb7acc4ec69ddfa9af320 Mon Sep 17 00:00:00 2001
From: Jiansong Chen <Jiansong.Chen@amd.com>
Date: Tue, 25 May 2021 14:14:58 +0800
Subject: [PATCH 2108/3804] drm/amdgpu: refine amdgpu_fru_get_product_info

1. eliminate potential array index out of bounds.
2. return meaningful value for failure.

Signed-off-by: Jiansong Chen <Jiansong.Chen@amd.com>
Reviewed-by: Jack Gui <Jack.Gui@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c    | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index 8f4a8f8d81463..39b6c6bfab453 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -101,7 +101,8 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
 int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
 {
 	unsigned char buff[34];
-	int addrptr = 0, size = 0;
+	int addrptr, size;
+	int len;
 
 	if (!is_fru_eeprom_supported(adev))
 		return 0;
@@ -109,7 +110,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
 	/* If algo exists, it means that the i2c_adapter's initialized */
 	if (!adev->pm.smu_i2c.algo) {
 		DRM_WARN("Cannot access FRU, EEPROM accessor not initialized");
-		return 0;
+		return -ENODEV;
 	}
 
 	/* There's a lot of repetition here. This is due to the FRU having
@@ -128,7 +129,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
 	size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
 	if (size < 1) {
 		DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size);
-		return size;
+		return -EINVAL;
 	}
 
 	/* Increment the addrptr by the size of the field, and 1 due to the
@@ -138,43 +139,45 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
 	size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
 	if (size < 1) {
 		DRM_ERROR("Failed to read FRU product name, ret:%d", size);
-		return size;
+		return -EINVAL;
 	}
 
+	len = size;
 	/* Product name should only be 32 characters. Any more,
 	 * and something could be wrong. Cap it at 32 to be safe
 	 */
-	if (size > 32) {
+	if (len >= sizeof(adev->product_name)) {
 		DRM_WARN("FRU Product Number is larger than 32 characters. This is likely a mistake");
-		size = 32;
+		len = sizeof(adev->product_name) - 1;
 	}
 	/* Start at 2 due to buff using fields 0 and 1 for the address */
-	memcpy(adev->product_name, &buff[2], size);
-	adev->product_name[size] = '\0';
+	memcpy(adev->product_name, &buff[2], len);
+	adev->product_name[len] = '\0';
 
 	addrptr += size + 1;
 	size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
 	if (size < 1) {
 		DRM_ERROR("Failed to read FRU product number, ret:%d", size);
-		return size;
+		return -EINVAL;
 	}
 
+	len = size;
 	/* Product number should only be 16 characters. Any more,
 	 * and something could be wrong. Cap it at 16 to be safe
 	 */
-	if (size > 16) {
+	if (len >= sizeof(adev->product_number)) {
 		DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake");
-		size = 16;
+		len = sizeof(adev->product_number) - 1;
 	}
-	memcpy(adev->product_number, &buff[2], size);
-	adev->product_number[size] = '\0';
+	memcpy(adev->product_number, &buff[2], len);
+	adev->product_number[len] = '\0';
 
 	addrptr += size + 1;
 	size = amdgpu_fru_read_eeprom(adev, addrptr, buff);
 
 	if (size < 1) {
 		DRM_ERROR("Failed to read FRU product version, ret:%d", size);
-		return size;
+		return -EINVAL;
 	}
 
 	addrptr += size + 1;
@@ -182,18 +185,19 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
 
 	if (size < 1) {
 		DRM_ERROR("Failed to read FRU serial number, ret:%d", size);
-		return size;
+		return -EINVAL;
 	}
 
+	len = size;
 	/* Serial number should only be 16 characters. Any more,
 	 * and something could be wrong. Cap it at 16 to be safe
 	 */
-	if (size > 16) {
+	if (len >= sizeof(adev->serial)) {
 		DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake");
-		size = 16;
+		len = sizeof(adev->serial) - 1;
 	}
-	memcpy(adev->serial, &buff[2], size);
-	adev->serial[size] = '\0';
+	memcpy(adev->serial, &buff[2], len);
+	adev->serial[len] = '\0';
 
 	return 0;
 }
-- 
GitLab


From 33f409e60eb0c59a4d0d06a62ab4642a988e17f7 Mon Sep 17 00:00:00 2001
From: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Date: Fri, 14 May 2021 07:47:34 -0400
Subject: [PATCH 2109/3804] drm/amd/display: Fix overlay validation by
 considering cursors

A few weeks ago, we saw a two cursor issue in a ChromeOS system. We
fixed it in the commit:

 drm/amd/display: Fix two cursor duplication when using overlay
 (read the commit message for more details)

After this change, we noticed that some IGT subtests related to
kms_plane and kms_plane_scaling started to fail. After investigating
this issue, we noticed that all subtests that fail have a primary plane
covering the overlay plane, which is currently rejected by amdgpu dm.
Fail those IGT tests highlight that our verification was too broad and
compromises the overlay usage in our drive. This patch fixes this issue
by ensuring that we only reject commits where the primary plane is not
fully covered by the overlay when the cursor hardware is enabled. With
this fix, all IGT tests start to pass again, which means our overlay
support works as expected.

Cc: Tianci.Yin <tianci.yin@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Nicholas Choi <nicholas.choi@amd.com>
Cc: Bhawanpreet Lakha <bhawanpreet.lakha@amd.com>
Cc: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Cc: Mark Yacoub <markyacoub@google.com>
Cc: Daniel Wheeler <daniel.wheeler@amd.com>

Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d6dcbb08e2031..2a93a93d24434 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9891,7 +9891,7 @@ static int validate_overlay(struct drm_atomic_state *state)
 	int i;
 	struct drm_plane *plane;
 	struct drm_plane_state *old_plane_state, *new_plane_state;
-	struct drm_plane_state *primary_state, *overlay_state = NULL;
+	struct drm_plane_state *primary_state, *cursor_state, *overlay_state = NULL;
 
 	/* Check if primary plane is contained inside overlay */
 	for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
@@ -9921,6 +9921,14 @@ static int validate_overlay(struct drm_atomic_state *state)
 	if (!primary_state->crtc)
 		return 0;
 
+	/* check if cursor plane is enabled */
+	cursor_state = drm_atomic_get_plane_state(state, overlay_state->crtc->cursor);
+	if (IS_ERR(cursor_state))
+		return PTR_ERR(cursor_state);
+
+	if (drm_atomic_plane_disabling(plane->state, cursor_state))
+		return 0;
+
 	/* Perform the bounds check to ensure the overlay plane covers the primary */
 	if (primary_state->crtc_x < overlay_state->crtc_x ||
 	    primary_state->crtc_y < overlay_state->crtc_y ||
-- 
GitLab


From dce3d8e1d070900e0feeb06787a319ff9379212c Mon Sep 17 00:00:00 2001
From: Luben Tuikov <luben.tuikov@amd.com>
Date: Wed, 12 May 2021 12:33:23 -0400
Subject: [PATCH 2110/3804] drm/amdgpu: Don't query CE and UE errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On QUERY2 IOCTL don't query counts of correctable
and uncorrectable errors, since when RAS is
enabled and supported on Vega20 server boards,
this takes insurmountably long time, in O(n^3),
which slows the system down to the point of it
being unusable when we have GUI up.

Fixes: ae363a212b14 ("drm/amdgpu: Add a new flag to AMDGPU_CTX_OP_QUERY_STATE2")
Cc: Alexander Deucher <Alexander.Deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Luben Tuikov <luben.tuikov@amd.com>
Reviewed-by: Alexander Deucher <Alexander.Deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 0350205c48974..6819fe5612d9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -337,7 +337,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 {
 	struct amdgpu_ctx *ctx;
 	struct amdgpu_ctx_mgr *mgr;
-	unsigned long ras_counter;
 
 	if (!fpriv)
 		return -EINVAL;
@@ -362,21 +361,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 	if (atomic_read(&ctx->guilty))
 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 
-	/*query ue count*/
-	ras_counter = amdgpu_ras_query_error_count(adev, false);
-	/*ras counter is monotonic increasing*/
-	if (ras_counter != ctx->ras_counter_ue) {
-		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
-		ctx->ras_counter_ue = ras_counter;
-	}
-
-	/*query ce count*/
-	ras_counter = amdgpu_ras_query_error_count(adev, true);
-	if (ras_counter != ctx->ras_counter_ce) {
-		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
-		ctx->ras_counter_ce = ras_counter;
-	}
-
 	mutex_unlock(&mgr->lock);
 	return 0;
 }
-- 
GitLab


From c5699e2d863f58221044efdc3fa712dd32d55cde Mon Sep 17 00:00:00 2001
From: Roman Li <roman.li@amd.com>
Date: Mon, 10 May 2021 11:58:54 -0400
Subject: [PATCH 2111/3804] drm/amd/display: Fix potential memory leak in DMUB
 hw_init

[Why]
On resume we perform DMUB hw_init which allocates memory:
dm_resume->dm_dmub_hw_init->dc_dmub_srv_create->kzalloc
That results in memory leak in suspend/resume scenarios.

[How]
Allocate memory for the DC wrapper to DMUB only if it was not
allocated before.
No need to reallocate it on suspend/resume.

Signed-off-by: Lang Yu <Lang.Yu@amd.com>
Signed-off-by: Roman Li <roman.li@amd.com>
Reviewed-by: Nicholas Kazlauskas <Nicholas.Kazlauskas@amd.com>
Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 2a93a93d24434..bb18c9889a0cc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -925,7 +925,8 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 		abm->dmcu_is_running = dmcu->funcs->is_dmcu_initialized(dmcu);
 	}
 
-	adev->dm.dc->ctx->dmub_srv = dc_dmub_srv_create(adev->dm.dc, dmub_srv);
+	if (!adev->dm.dc->ctx->dmub_srv)
+		adev->dm.dc->ctx->dmub_srv = dc_dmub_srv_create(adev->dm.dc, dmub_srv);
 	if (!adev->dm.dc->ctx->dmub_srv) {
 		DRM_ERROR("Couldn't allocate DC DMUB server!\n");
 		return -ENOMEM;
@@ -1954,7 +1955,6 @@ static int dm_suspend(void *handle)
 
 	amdgpu_dm_irq_suspend(adev);
 
-
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
 
 	return 0;
-- 
GitLab


From 2370eba9f552eaae3d8aa1f70b8e9eec5c560f9e Mon Sep 17 00:00:00 2001
From: Victor Zhao <Victor.Zhao@amd.com>
Date: Thu, 18 Mar 2021 13:44:35 +0800
Subject: [PATCH 2112/3804] drm/amd/amdgpu:save psp ring wptr to avoid attack

[Why]
When some tools performing psp mailbox attack, the readback value
of register can be a random value which may break psp.

[How]
Use a psp wptr cache machanism to aovid the change made by attack.

v2: unify change and add detailed reason

Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
Signed-off-by: Jingwen Chen <Jingwen.Chen2@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 1 +
 drivers/gpu/drm/amd/amdgpu/psp_v11_0.c  | 3 ++-
 drivers/gpu/drm/amd/amdgpu/psp_v3_1.c   | 3 ++-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 46a5328e00e0b..60aa99a39a743 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -76,6 +76,7 @@ struct psp_ring
 	uint64_t			ring_mem_mc_addr;
 	void				*ring_mem_handle;
 	uint32_t			ring_size;
+	uint32_t			ring_wptr;
 };
 
 /* More registers may will be supported */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 589410c32d095..02bba1f3c42e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -720,7 +720,7 @@ static uint32_t psp_v11_0_ring_get_wptr(struct psp_context *psp)
 	struct amdgpu_device *adev = psp->adev;
 
 	if (amdgpu_sriov_vf(adev))
-		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+		data = psp->km_ring.ring_wptr;
 	else
 		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
 
@@ -734,6 +734,7 @@ static void psp_v11_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
 	if (amdgpu_sriov_vf(adev)) {
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, value);
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+		psp->km_ring.ring_wptr = value;
 	} else
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index f2e725f72d2f1..908664a5774bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -379,7 +379,7 @@ static uint32_t psp_v3_1_ring_get_wptr(struct psp_context *psp)
 	struct amdgpu_device *adev = psp->adev;
 
 	if (amdgpu_sriov_vf(adev))
-		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+		data = psp->km_ring.ring_wptr;
 	else
 		data = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
 	return data;
@@ -394,6 +394,7 @@ static void psp_v3_1_ring_set_wptr(struct psp_context *psp, uint32_t value)
 		/* send interrupt to PSP for SRIOV ring write pointer update */
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
 			GFX_CTRL_CMD_ID_CONSUME_CMD);
+		psp->km_ring.ring_wptr = value;
 	} else
 		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, value);
 }
-- 
GitLab


From 07438603a07e52f1c6aa731842bd298d2725b7be Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@amd.com>
Date: Fri, 28 May 2021 16:54:16 +0200
Subject: [PATCH 2113/3804] drm/amdgpu: make sure we unpin the UVD BO
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Releasing pinned BOs is illegal now. UVD 6 was missing from:
commit 2f40801dc553 ("drm/amdgpu: make sure we unpin the UVD BO")

Fixes: 2f40801dc553 ("drm/amdgpu: make sure we unpin the UVD BO")
Cc: stable@vger.kernel.org
Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 2bab9c77952fd..cf3803f8f075d 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -357,6 +357,7 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 
 error:
 	dma_fence_put(fence);
+	amdgpu_bo_unpin(bo);
 	amdgpu_bo_unreserve(bo);
 	amdgpu_bo_unref(&bo);
 	return r;
-- 
GitLab


From e7591a8d56bab89c617dae055446b6337ec32dc9 Mon Sep 17 00:00:00 2001
From: Simon Ser <contact@emersion.fr>
Date: Wed, 26 May 2021 13:55:50 +0000
Subject: [PATCH 2114/3804] amd/display: convert DRM_DEBUG_ATOMIC to
 drm_dbg_atomic

This allows to tie the log message to a specific DRM device.

Signed-off-by: Simon Ser <contact@emersion.fr>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Harry Wentland <hwentlan@amd.com>
Cc: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index bb18c9889a0cc..652cc1a0e450f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -9854,7 +9854,7 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state,
 
 	if (cursor_scale_w != primary_scale_w ||
 	    cursor_scale_h != primary_scale_h) {
-		DRM_DEBUG_ATOMIC("Cursor plane scaling doesn't match primary plane\n");
+		drm_dbg_atomic(crtc->dev, "Cursor plane scaling doesn't match primary plane\n");
 		return -EINVAL;
 	}
 
-- 
GitLab


From 8314b6732ae4e600bb933e108f96ce0176acb09c Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 1 Jun 2021 10:23:38 +0200
Subject: [PATCH 2115/3804] ima: Define new template fields xattrnames,
 xattrlengths and xattrvalues

This patch defines the new template fields xattrnames, xattrlengths and
xattrvalues, which contain respectively a list of xattr names (strings,
separated by |), lengths (u32, hex) and values (hex). If an xattr is not
present, the name and length are not displayed in the measurement list.

Reported-by: kernel test robot <lkp@intel.com> (Missing prototype def)
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/security/IMA-templates.rst  |  4 ++
 include/linux/evm.h                       | 10 ++++
 security/integrity/evm/evm_main.c         | 69 +++++++++++++++++++++++
 security/integrity/ima/ima_template.c     |  9 +++
 security/integrity/ima/ima_template_lib.c | 64 +++++++++++++++++++++
 security/integrity/ima/ima_template_lib.h |  6 ++
 6 files changed, 162 insertions(+)

diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index 65c1ce451d083..6a58760a0a354 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -78,6 +78,10 @@ descriptors by adding their identifier to the format string
  - 'iuid': the inode UID;
  - 'igid': the inode GID;
  - 'imode': the inode mode;
+ - 'xattrnames': a list of xattr names (separated by |), only if the xattr is
+    present;
+ - 'xattrlengths': a list of xattr lengths (u32), only if the xattr is present;
+ - 'xattrvalues': a list of xattr values;
 
 
 Below, there is the list of defined template descriptors:
diff --git a/include/linux/evm.h b/include/linux/evm.h
index 5011a299c2511..4c374be702472 100644
--- a/include/linux/evm.h
+++ b/include/linux/evm.h
@@ -39,6 +39,9 @@ extern int evm_inode_init_security(struct inode *inode,
 				   struct xattr *evm);
 extern bool evm_revalidate_status(const char *xattr_name);
 extern int evm_protected_xattr_if_enabled(const char *req_xattr_name);
+extern int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer,
+				     int buffer_size, char type,
+				     bool canonical_fmt);
 #ifdef CONFIG_FS_POSIX_ACL
 extern int posix_xattr_acl(const char *xattrname);
 #else
@@ -120,5 +123,12 @@ static inline int evm_protected_xattr_if_enabled(const char *req_xattr_name)
 	return false;
 }
 
+static inline int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer,
+					    int buffer_size, char type,
+					    bool canonical_fmt)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_EVM */
 #endif /* LINUX_EVM_H */
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index ee4e17a790fba..2c226e634ae97 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -317,6 +317,75 @@ int evm_protected_xattr_if_enabled(const char *req_xattr_name)
 	return evm_protected_xattr_common(req_xattr_name, true);
 }
 
+/**
+ * evm_read_protected_xattrs - read EVM protected xattr names, lengths, values
+ * @dentry: dentry of the read xattrs
+ * @inode: inode of the read xattrs
+ * @buffer: buffer xattr names, lengths or values are copied to
+ * @buffer_size: size of buffer
+ * @type: n: names, l: lengths, v: values
+ * @canonical_fmt: data format (true: little endian, false: native format)
+ *
+ * Read protected xattr names (separated by |), lengths (u32) or values for a
+ * given dentry and return the total size of copied data. If buffer is NULL,
+ * just return the total size.
+ *
+ * Returns the total size on success, a negative value on error.
+ */
+int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer,
+			      int buffer_size, char type, bool canonical_fmt)
+{
+	struct xattr_list *xattr;
+	int rc, size, total_size = 0;
+
+	list_for_each_entry_lockless(xattr, &evm_config_xattrnames, list) {
+		rc = __vfs_getxattr(dentry, d_backing_inode(dentry),
+				    xattr->name, NULL, 0);
+		if (rc < 0 && rc == -ENODATA)
+			continue;
+		else if (rc < 0)
+			return rc;
+
+		switch (type) {
+		case 'n':
+			size = strlen(xattr->name) + 1;
+			if (buffer) {
+				if (total_size)
+					*(buffer + total_size - 1) = '|';
+
+				memcpy(buffer + total_size, xattr->name, size);
+			}
+			break;
+		case 'l':
+			size = sizeof(u32);
+			if (buffer) {
+				if (canonical_fmt)
+					rc = cpu_to_le32(rc);
+
+				*(u32 *)(buffer + total_size) = rc;
+			}
+			break;
+		case 'v':
+			size = rc;
+			if (buffer) {
+				rc = __vfs_getxattr(dentry,
+					d_backing_inode(dentry), xattr->name,
+					buffer + total_size,
+					buffer_size - total_size);
+				if (rc < 0)
+					return rc;
+			}
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		total_size += size;
+	}
+
+	return total_size;
+}
+
 /**
  * evm_verifyxattr - verify the integrity of the requested xattr
  * @dentry: object of the verify xattr
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index 43784f2bf8bd6..159a31d2fcdff 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -53,6 +53,15 @@ static const struct ima_template_field supported_fields[] = {
 	 .field_show = ima_show_template_uint},
 	{.field_id = "imode", .field_init = ima_eventinodemode_init,
 	 .field_show = ima_show_template_uint},
+	{.field_id = "xattrnames",
+	 .field_init = ima_eventinodexattrnames_init,
+	 .field_show = ima_show_template_string},
+	{.field_id = "xattrlengths",
+	 .field_init = ima_eventinodexattrlengths_init,
+	 .field_show = ima_show_template_sig},
+	{.field_id = "xattrvalues",
+	 .field_init = ima_eventinodexattrvalues_init,
+	 .field_show = ima_show_template_sig},
 };
 
 /*
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 3156fb34b1afa..518fd50ea48a9 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -11,6 +11,7 @@
 
 #include "ima_template_lib.h"
 #include <linux/xattr.h>
+#include <linux/evm.h>
 
 static bool ima_template_hash_algo_allowed(u8 algo)
 {
@@ -618,3 +619,66 @@ int ima_eventinodemode_init(struct ima_event_data *event_data,
 	return ima_write_template_field_data((char *)&mode, sizeof(mode),
 					     DATA_FMT_UINT, field_data);
 }
+
+static int ima_eventinodexattrs_init_common(struct ima_event_data *event_data,
+					    struct ima_field_data *field_data,
+					    char type)
+{
+	u8 *buffer = NULL;
+	int rc;
+
+	if (!event_data->file)
+		return 0;
+
+	rc = evm_read_protected_xattrs(file_dentry(event_data->file), NULL, 0,
+				       type, ima_canonical_fmt);
+	if (rc < 0)
+		return 0;
+
+	buffer = kmalloc(rc, GFP_KERNEL);
+	if (!buffer)
+		return 0;
+
+	rc = evm_read_protected_xattrs(file_dentry(event_data->file), buffer,
+				       rc, type, ima_canonical_fmt);
+	if (rc < 0) {
+		rc = 0;
+		goto out;
+	}
+
+	rc = ima_write_template_field_data((char *)buffer, rc, DATA_FMT_HEX,
+					   field_data);
+out:
+	kfree(buffer);
+	return rc;
+}
+
+/*
+ *  ima_eventinodexattrnames_init - include a list of xattr names as part of the
+ *  template data
+ */
+int ima_eventinodexattrnames_init(struct ima_event_data *event_data,
+				  struct ima_field_data *field_data)
+{
+	return ima_eventinodexattrs_init_common(event_data, field_data, 'n');
+}
+
+/*
+ *  ima_eventinodexattrlengths_init - include a list of xattr lengths as part of
+ *  the template data
+ */
+int ima_eventinodexattrlengths_init(struct ima_event_data *event_data,
+				    struct ima_field_data *field_data)
+{
+	return ima_eventinodexattrs_init_common(event_data, field_data, 'l');
+}
+
+/*
+ *  ima_eventinodexattrvalues_init - include a list of xattr values as part of
+ *  the template data
+ */
+int ima_eventinodexattrvalues_init(struct ima_event_data *event_data,
+				   struct ima_field_data *field_data)
+{
+	return ima_eventinodexattrs_init_common(event_data, field_data, 'v');
+}
diff --git a/security/integrity/ima/ima_template_lib.h b/security/integrity/ima/ima_template_lib.h
index 6509af4a97ee5..c71f1de95753d 100644
--- a/security/integrity/ima/ima_template_lib.h
+++ b/security/integrity/ima/ima_template_lib.h
@@ -56,4 +56,10 @@ int ima_eventinodegid_init(struct ima_event_data *event_data,
 			   struct ima_field_data *field_data);
 int ima_eventinodemode_init(struct ima_event_data *event_data,
 			    struct ima_field_data *field_data);
+int ima_eventinodexattrnames_init(struct ima_event_data *event_data,
+				  struct ima_field_data *field_data);
+int ima_eventinodexattrlengths_init(struct ima_event_data *event_data,
+				    struct ima_field_data *field_data);
+int ima_eventinodexattrvalues_init(struct ima_event_data *event_data,
+				   struct ima_field_data *field_data);
 #endif /* __LINUX_IMA_TEMPLATE_LIB_H */
-- 
GitLab


From b45f189a19b38e01676628db79cd3eeb1333516e Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Thu, 29 Apr 2021 16:13:44 +0530
Subject: [PATCH 2116/3804] ext4: fix accessing uninit percpu counter variable
 with fast_commit

When running generic/527 with fast_commit configuration, the following
issue is seen on Power.  With fast_commit, during ext4_fc_replay()
(which can be called from ext4_fill_super()), if inode eviction
happens then it can access an uninitialized percpu counter variable.

This patch adds the check before accessing the counters in
ext4_free_inode() path.

[  321.165371] run fstests generic/527 at 2021-04-29 08:38:43
[  323.027786] EXT4-fs (dm-0): mounted filesystem with ordered data mode. Opts: block_validity. Quota mode: none.
[  323.618772] BUG: Unable to handle kernel data access on read at 0x1fbd80000
[  323.619767] Faulting instruction address: 0xc000000000bae78c
cpu 0x1: Vector: 300 (Data Access) at [c000000010706ef0]
    pc: c000000000bae78c: percpu_counter_add_batch+0x3c/0x100
    lr: c0000000006d0bb0: ext4_free_inode+0x780/0xb90
    pid   = 5593, comm = mount
	ext4_free_inode+0x780/0xb90
	ext4_evict_inode+0xa8c/0xc60
	evict+0xfc/0x1e0
	ext4_fc_replay+0xc50/0x20f0
	do_one_pass+0xfe0/0x1350
	jbd2_journal_recover+0x184/0x2e0
	jbd2_journal_load+0x1c0/0x4a0
	ext4_fill_super+0x2458/0x4200
	mount_bdev+0x1dc/0x290
	ext4_mount+0x28/0x40
	legacy_get_tree+0x4c/0xa0
	vfs_get_tree+0x4c/0x120
	path_mount+0xcf8/0xd70
	do_mount+0x80/0xd0
	sys_mount+0x3fc/0x490
	system_call_exception+0x384/0x3d0
	system_call_common+0xec/0x278

Cc: stable@kernel.org
Fixes: 8016e29f4362 ("ext4: fast commit recovery path")
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/6cceb9a75c54bef8fa9696c1b08c8df5ff6169e2.1619692410.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ialloc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 71d321b3b9844..edbaed073ac5c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -322,14 +322,16 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
 	if (is_directory) {
 		count = ext4_used_dirs_count(sb, gdp) - 1;
 		ext4_used_dirs_set(sb, gdp, count);
-		percpu_counter_dec(&sbi->s_dirs_counter);
+		if (percpu_counter_initialized(&sbi->s_dirs_counter))
+			percpu_counter_dec(&sbi->s_dirs_counter);
 	}
 	ext4_inode_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
 				   EXT4_INODES_PER_GROUP(sb) / 8);
 	ext4_group_desc_csum_set(sb, block_group, gdp);
 	ext4_unlock_group(sb, block_group);
 
-	percpu_counter_inc(&sbi->s_freeinodes_counter);
+	if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
+		percpu_counter_inc(&sbi->s_freeinodes_counter);
 	if (sbi->s_log_groups_per_flex) {
 		struct flex_groups *fg;
 
-- 
GitLab


From d6e9e8e5dd53419814eb54803b4ab3682b55cebe Mon Sep 17 00:00:00 2001
From: Sergio Paracuellos <sergio.paracuellos@gmail.com>
Date: Thu, 3 Jun 2021 06:32:19 +0200
Subject: [PATCH 2117/3804] phy: ralink: phy-mt7621-pci: drop 'of_match_ptr' to
 fix -Wunused-const-variable

The of_device_id is included unconditionally by of.h header and used
in the driver as well. Remove of_match_ptr to fix W=1 compile test
warning with !CONFIG_OF:

drivers/phy/ralink/phy-mt7621-pci.c:341:34: warning: unused variable 'mt7621_pci_phy_ids' [-Wunused-const-variable]

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Sergio Paracuellos <sergio.paracuellos@gmail.com>
Link: https://lore.kernel.org/r/20210603043219.32646-1-sergio.paracuellos@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/ralink/phy-mt7621-pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/phy/ralink/phy-mt7621-pci.c b/drivers/phy/ralink/phy-mt7621-pci.c
index 753cb5bab9308..2a9465f4bb3a9 100644
--- a/drivers/phy/ralink/phy-mt7621-pci.c
+++ b/drivers/phy/ralink/phy-mt7621-pci.c
@@ -341,7 +341,7 @@ static struct platform_driver mt7621_pci_phy_driver = {
 	.probe = mt7621_pci_phy_probe,
 	.driver = {
 		.name = "mt7621-pci-phy",
-		.of_match_table = of_match_ptr(mt7621_pci_phy_ids),
+		.of_match_table = mt7621_pci_phy_ids,
 	},
 };
 
-- 
GitLab


From d1ce245fe409241ed6168c835a5b55ef52bdb6a9 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 3 Jun 2021 13:10:14 +0800
Subject: [PATCH 2118/3804] phy: Sparx5 Eth SerDes: check return value after
 calling platform_get_resource()

It will cause null-ptr-deref if platform_get_resource() returns NULL,
we need check the return value.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20210603051014.2674744-1-yangyingliang@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/microchip/sparx5_serdes.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/phy/microchip/sparx5_serdes.c b/drivers/phy/microchip/sparx5_serdes.c
index c8a7d0927ced1..4076580fc2cd9 100644
--- a/drivers/phy/microchip/sparx5_serdes.c
+++ b/drivers/phy/microchip/sparx5_serdes.c
@@ -2470,6 +2470,10 @@ static int sparx5_serdes_probe(struct platform_device *pdev)
 	priv->coreclock = clock;
 
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iores) {
+		dev_err(priv->dev, "Invalid resource\n");
+		return -EINVAL;
+	}
 	iomem = devm_ioremap(priv->dev, iores->start, resource_size(iores));
 	if (IS_ERR(iomem)) {
 		dev_err(priv->dev, "Unable to get serdes registers: %s\n",
-- 
GitLab


From 99b18e88a1cf737ae924123d63b46d9a3d17b1af Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 2 Jun 2021 18:07:26 +0800
Subject: [PATCH 2119/3804] dmaengine: idxd: Fix missing error code in
 idxd_cdev_open()

The error code is missing in this code scenario, add the error code
'-EINVAL' to the return value 'rc'.

Eliminate the follow smatch warning:

drivers/dma/idxd/cdev.c:113 idxd_cdev_open() warn: missing error code
'rc'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/1622628446-87909-1-git-send-email-jiapeng.chong@linux.alibaba.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/cdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 302cba5ff779d..d4419bf1fedef 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -110,6 +110,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
 		pasid = iommu_sva_get_pasid(sva);
 		if (pasid == IOMMU_PASID_INVALID) {
 			iommu_sva_unbind_device(sva);
+			rc = -EINVAL;
 			goto failed;
 		}
 
-- 
GitLab


From 9c1fe96bded935369f8340c2ac2e9e189f697d5d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 2 Jun 2021 13:38:23 +0200
Subject: [PATCH 2120/3804] ALSA: timer: Fix master timer notification

snd_timer_notify1() calls the notification to each slave for a master
event, but it passes a wrong event number.  It should be +10 offset,
corresponding to SNDRV_TIMER_EVENT_MXXX, but it's incorrectly with
+100 offset.  Casually this was spotted by UBSAN check via syzkaller.

Reported-by: syzbot+d102fa5b35335a7e544e@syzkaller.appspotmail.com
Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/000000000000e5560e05c3bd1d63@google.com
Link: https://lore.kernel.org/r/20210602113823.23777-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/timer.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/core/timer.c b/sound/core/timer.c
index 6898b1ac0d7f4..92b7008fcdb86 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -520,9 +520,10 @@ static void snd_timer_notify1(struct snd_timer_instance *ti, int event)
 		return;
 	if (timer->hw.flags & SNDRV_TIMER_HW_SLAVE)
 		return;
+	event += 10; /* convert to SNDRV_TIMER_EVENT_MXXX */
 	list_for_each_entry(ts, &ti->slave_active_head, active_list)
 		if (ts->ccallback)
-			ts->ccallback(ts, event + 100, &tstamp, resolution);
+			ts->ccallback(ts, event, &tstamp, resolution);
 }
 
 /* start/continue a master timer */
-- 
GitLab


From b8b90c17602689eeaa5b219d104bbc215d1225cc Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Wed, 2 Jun 2021 22:54:24 +0800
Subject: [PATCH 2121/3804] ALSA: hda: update the power_state during the
 direct-complete

The patch_realtek.c needs to check if the power_state.event equals
PM_EVENT_SUSPEND, after using the direct-complete, the suspend() and
resume() will be skipped if the codec is already rt_suspended, in this
case, the patch_realtek.c will always get PM_EVENT_ON even the system
is really resumed from S3.

We could set power_state to PMSG_SUSPEND in the prepare(), if other
PM functions are called before complete(), those functions will
override power_state; if no other PM functions are called before
complete(), we could know the suspend() and resume() are skipped since
only S3 pm functions could be skipped by direct-complete, in this case
set power_state to PMSG_RESUME in the complete(). This could guarantee
the first time of calling hda_codec_runtime_resume() after complete()
has the correct power_state.

Fixes: 215a22ed31a1 ("ALSA: hda: Refactor codec PM to use direct-complete optimization")
Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Link: https://lore.kernel.org/r/20210602145424.3132-1-hui.wang@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_codec.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index a31009afc025f..5462f771c2f90 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -2917,6 +2917,7 @@ static int hda_codec_runtime_resume(struct device *dev)
 #ifdef CONFIG_PM_SLEEP
 static int hda_codec_pm_prepare(struct device *dev)
 {
+	dev->power.power_state = PMSG_SUSPEND;
 	return pm_runtime_suspended(dev);
 }
 
@@ -2924,6 +2925,10 @@ static void hda_codec_pm_complete(struct device *dev)
 {
 	struct hda_codec *codec = dev_to_hda_codec(dev);
 
+	/* If no other pm-functions are called between prepare() and complete() */
+	if (dev->power.power_state.event == PM_EVENT_SUSPEND)
+		dev->power.power_state = PMSG_RESUME;
+
 	if (pm_runtime_suspended(dev) && (codec->jackpoll_interval ||
 	    hda_codec_need_resume(codec) || codec->forced_resume))
 		pm_request_resume(dev);
-- 
GitLab


From ad6f5cc5f6c261f881e44ecd750f17952df2b496 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Sun, 25 Apr 2021 18:24:59 +0800
Subject: [PATCH 2122/3804] net/ieee802154: drop unneeded assignment in
 llsec_iter_devkeys()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to keep the code style consistency of the whole file,
redundant return value ‘rc’ and its assignments should be deleted

The clang_analyzer complains as follows:
net/ieee802154/nl-mac.c:1203:12: warning: Although the value stored to
'rc' is used in the enclosing expression, the value is never actually
read from 'rc'

No functional change, only more efficient.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Link: https://lore.kernel.org/r/1619346299-40237-1-git-send-email-yang.lee@linux.alibaba.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 net/ieee802154/nl-mac.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 0c1b0770c59ea..a6a8cf62924ce 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -1184,7 +1184,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data)
 {
 	struct ieee802154_llsec_device *dpos;
 	struct ieee802154_llsec_device_key *kpos;
-	int rc = 0, idx = 0, idx2;
+	int idx = 0, idx2;
 
 	list_for_each_entry(dpos, &data->table->devices, list) {
 		if (idx++ < data->s_idx)
@@ -1200,7 +1200,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data)
 						      data->nlmsg_seq,
 						      dpos->hwaddr, kpos,
 						      data->dev)) {
-				return rc = -EMSGSIZE;
+				return -EMSGSIZE;
 			}
 
 			data->s_idx2++;
@@ -1209,7 +1209,7 @@ static int llsec_iter_devkeys(struct llsec_dump_data *data)
 		data->s_idx++;
 	}
 
-	return rc;
+	return 0;
 }
 
 int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
-- 
GitLab


From aab53e6756caadeb908a70d5bcdf5a24baf34ad8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 31 May 2021 16:22:26 +0300
Subject: [PATCH 2123/3804] net: ieee802154: mrf24j40: Drop unneeded
 of_match_ptr()

Driver can be used in different environments and moreover, when compiled
with !OF, the compiler may issue a warning due to unused mrf24j40_of_match
variable. Hence drop unneeded of_match_ptr() call.

While at it, update headers block to reflect above changes.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210531132226.47081-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 drivers/net/ieee802154/mrf24j40.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index b9be530b285f9..ff83e00b77af7 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -8,8 +8,8 @@
 
 #include <linux/spi/spi.h>
 #include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of.h>
 #include <linux/regmap.h>
 #include <linux/ieee802154.h>
 #include <linux/irq.h>
@@ -1388,7 +1388,7 @@ MODULE_DEVICE_TABLE(spi, mrf24j40_ids);
 
 static struct spi_driver mrf24j40_driver = {
 	.driver = {
-		.of_match_table = of_match_ptr(mrf24j40_of_match),
+		.of_match_table = mrf24j40_of_match,
 		.name = "mrf24j40",
 	},
 	.id_table = mrf24j40_ids,
-- 
GitLab


From 79c6b8ed30e54b401c873dbad2511f2a1c525fd5 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 14:25:17 +0800
Subject: [PATCH 2124/3804] ieee802154: fix error return code in
 ieee802154_add_iface()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: be51da0f3e34 ("ieee802154: Stop using NLA_PUT*().")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20210508062517.2574-1-thunder.leizhen@huawei.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 net/ieee802154/nl-phy.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 2cdc7e63fe172..88215b5c93aa4 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -241,8 +241,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
-	    nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name))
+	    nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) {
+		rc = -EMSGSIZE;
 		goto nla_put_failure;
+	}
 	dev_put(dev);
 
 	wpan_phy_put(phy);
-- 
GitLab


From 281e44f5fd4f82d86a2b86f0592c698f7311a674 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Thu, 3 Jun 2021 15:15:02 +0800
Subject: [PATCH 2125/3804] arm64: perf: Add more support on caps under sysfs

Armv8.7 has introduced BUS_SLOTS and BUS_WIDTH in PMMIR_EL1 register,
add two entries in caps for bus_slots and bus_width under sysfs. It
will return the true slots and width if the information is available,
otherwise it will return 0.

Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/1622704502-63951-1-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/perf_event.h |  5 +++++
 arch/arm64/kernel/perf_event.c      | 33 +++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 60731f602d3ef..4ef6f19331f98 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -239,6 +239,11 @@
 /* PMMIR_EL1.SLOTS mask */
 #define ARMV8_PMU_SLOTS_MASK	0xff
 
+#define ARMV8_PMU_BUS_SLOTS_SHIFT 8
+#define ARMV8_PMU_BUS_SLOTS_MASK 0xff
+#define ARMV8_PMU_BUS_WIDTH_SHIFT 16
+#define ARMV8_PMU_BUS_WIDTH_MASK 0xf
+
 #ifdef CONFIG_PERF_EVENTS
 struct pt_regs;
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 44b6eda69a81a..a661010308c05 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -317,8 +317,41 @@ static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RO(slots);
 
+static ssize_t bus_slots_show(struct device *dev, struct device_attribute *attr,
+			      char *page)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+	u32 bus_slots = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_SLOTS_SHIFT)
+			& ARMV8_PMU_BUS_SLOTS_MASK;
+
+	return sysfs_emit(page, "0x%08x\n", bus_slots);
+}
+
+static DEVICE_ATTR_RO(bus_slots);
+
+static ssize_t bus_width_show(struct device *dev, struct device_attribute *attr,
+			      char *page)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
+	u32 bus_width = (cpu_pmu->reg_pmmir >> ARMV8_PMU_BUS_WIDTH_SHIFT)
+			& ARMV8_PMU_BUS_WIDTH_MASK;
+	u32 val = 0;
+
+	/* Encoded as Log2(number of bytes), plus one */
+	if (bus_width > 2 && bus_width < 13)
+		val = 1 << (bus_width - 1);
+
+	return sysfs_emit(page, "0x%08x\n", val);
+}
+
+static DEVICE_ATTR_RO(bus_width);
+
 static struct attribute *armv8_pmuv3_caps_attrs[] = {
 	&dev_attr_slots.attr,
+	&dev_attr_bus_slots.attr,
+	&dev_attr_bus_width.attr,
 	NULL,
 };
 
-- 
GitLab


From 373e864cf52403b0974c2f23ca8faf9104234555 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 19 May 2021 14:16:14 +0000
Subject: [PATCH 2126/3804] ieee802154: fix error return code in
 ieee802154_llsec_getparams()

Fix to return negative error code -ENOBUFS from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 3e9c156e2c21 ("ieee802154: add netlink interfaces for llsec")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Link: https://lore.kernel.org/r/20210519141614.3040055-1-weiyongjun1@huawei.com
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
---
 net/ieee802154/nl-mac.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index a6a8cf62924ce..29bf976401664 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -680,8 +680,10 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
 	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) ||
 	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
 			be32_to_cpu(params.frame_counter)) ||
-	    ieee802154_llsec_fill_key_id(msg, &params.out_key))
+	    ieee802154_llsec_fill_key_id(msg, &params.out_key)) {
+		rc = -ENOBUFS;
 		goto out_free;
+	}
 
 	dev_put(dev);
 
-- 
GitLab


From dad7b9896a5dbac5da8275d5a6147c65c81fb5f2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 14 May 2021 11:26:37 +0100
Subject: [PATCH 2127/3804] ARM: 9081/1: fix gcc-10 thumb2-kernel regression

When building the kernel wtih gcc-10 or higher using the
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y flag, the compiler picks a slightly
different set of registers for the inline assembly in cpu_init() that
subsequently results in a corrupt kernel stack as well as remaining in
FIQ mode. If a banked register is used for the last argument, the wrong
version of that register gets loaded into CPSR_c.  When building in Arm
mode, the arguments are passed as immediate values and the bug cannot
happen.

This got introduced when Daniel reworked the FIQ handling and was
technically always broken, but happened to work with both clang and gcc
before gcc-10 as long as they picked one of the lower registers.
This is probably an indication that still very few people build the
kernel in Thumb2 mode.

Marek pointed out the problem on IRC, Arnd narrowed it down to this
inline assembly and Russell pinpointed the exact bug.

Change the constraints to force the final mode switch to use a non-banked
register for the argument to ensure that the correct constant gets loaded.
Another alternative would be to always use registers for the constant
arguments to avoid the #ifdef that has now become more complex.

Cc: <stable@vger.kernel.org> # v3.18+
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Reported-by: Marek Vasut <marek.vasut@gmail.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Fixes: c0e7f7ee717e ("ARM: 8150/3: fiq: Replace default FIQ handler")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/setup.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 1a5edf562e85e..73ca7797b92f6 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -545,9 +545,11 @@ void notrace cpu_init(void)
 	 * In Thumb-2, msr with an immediate value is not allowed.
 	 */
 #ifdef CONFIG_THUMB2_KERNEL
-#define PLC	"r"
+#define PLC_l	"l"
+#define PLC_r	"r"
 #else
-#define PLC	"I"
+#define PLC_l	"I"
+#define PLC_r	"I"
 #endif
 
 	/*
@@ -569,15 +571,15 @@ void notrace cpu_init(void)
 	"msr	cpsr_c, %9"
 	    :
 	    : "r" (stk),
-	      PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
+	      PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
 	      "I" (offsetof(struct stack, irq[0])),
-	      PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
+	      PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE),
 	      "I" (offsetof(struct stack, abt[0])),
-	      PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
+	      PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE),
 	      "I" (offsetof(struct stack, und[0])),
-	      PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
+	      PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
 	      "I" (offsetof(struct stack, fiq[0])),
-	      PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
+	      PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
 	    : "r14");
 #endif
 }
-- 
GitLab


From fcf6631f3736985ec89bdd76392d3c7bfb60119f Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Tue, 1 Jun 2021 10:58:32 +0200
Subject: [PATCH 2128/3804] sched/pelt: Ensure that *_sum is always synced with
 *_avg

Rounding in PELT calculation happening when entities are attached/detached
of a cfs_rq can result into situations where util/runnable_avg is not null
but util/runnable_sum is. This is normally not possible so we need to
ensure that util/runnable_sum stays synced with util/runnable_avg.

detach_entity_load_avg() is the last place where we don't sync
util/runnable_sum with util/runnbale_avg when moving some sched_entities

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210601085832.12626-1-vincent.guittot@linaro.org
---
 kernel/sched/fair.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e7c8277e3d54a..7b98fb37330a6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3765,11 +3765,17 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
  */
 static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	/*
+	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+	 * See ___update_load_avg() for details.
+	 */
+	u32 divider = get_pelt_divider(&cfs_rq->avg);
+
 	dequeue_load_avg(cfs_rq, se);
 	sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
-	sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
 	sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
-	sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
+	cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
 
 	add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
 
-- 
GitLab


From b430e1d65ef6eeee42c4e53028f8dfcc6abc728b Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Thu, 3 Jun 2021 02:06:36 +0200
Subject: [PATCH 2129/3804] platform/surface: aggregator: Fix event disable
 function

Disabling events silently fails due to the wrong command ID being used.
Instead of the command ID for the disable call, the command ID for the
enable call was being used. This causes the disable call to enable the
event instead. As the event is already enabled when we call this
function, the EC silently drops this command and does nothing.

Use the correct command ID for disabling the event to fix this.

Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem")
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210603000636.568846-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/aggregator/controller.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c
index 8a70df60142c2..a06964aa96e77 100644
--- a/drivers/platform/surface/aggregator/controller.c
+++ b/drivers/platform/surface/aggregator/controller.c
@@ -1907,7 +1907,7 @@ static int ssam_ssh_event_disable(struct ssam_controller *ctrl,
 {
 	int status;
 
-	status = __ssam_ssh_event_request(ctrl, reg, reg.cid_enable, id, flags);
+	status = __ssam_ssh_event_request(ctrl, reg, reg.cid_disable, id, flags);
 
 	if (status < 0 && status != -EINVAL) {
 		ssam_err(ctrl,
-- 
GitLab


From dea8464ddf553803382efb753b6727dbf3931d06 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Mon, 31 May 2021 14:36:03 +0800
Subject: [PATCH 2130/3804] dmaengine: rcar-dmac: Fix PM reference leak in
 rcar_dmac_probe()

pm_runtime_get_sync will increment pm usage counter even it failed.
Forgetting to putting operation will result in reference leak here.
Fix it by replacing it with pm_runtime_resume_and_get to keep usage
counter balanced.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/1622442963-54095-1-git-send-email-zou_wei@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/sh/rcar-dmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index d530c1bf11d97..6885b3dcd7a97 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1913,7 +1913,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 
 	/* Enable runtime PM and initialize the device. */
 	pm_runtime_enable(&pdev->dev);
-	ret = pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
 		return ret;
-- 
GitLab


From e7662cb9e99ef0fd15b8a0dcb3e5d7b32f9812d4 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Mon, 24 May 2021 17:47:08 +0800
Subject: [PATCH 2131/3804] crypto: hisilicon - switch to memdup_user_nul()

Use memdup_user_nul() helper instead of open-coding to
simplify the code.

v1-->v2:
   fixed patch title error
v2-->v3:
   return the actual error

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index fe35ea949a5bb..7c1f8ab28f995 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -1603,16 +1603,9 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
 	if (count > QM_DBG_WRITE_LEN)
 		return -ENOSPC;
 
-	cmd_buf = kzalloc(count + 1, GFP_KERNEL);
-	if (!cmd_buf)
-		return -ENOMEM;
-
-	if (copy_from_user(cmd_buf, buffer, count)) {
-		kfree(cmd_buf);
-		return -EFAULT;
-	}
-
-	cmd_buf[count] = '\0';
+	cmd_buf = memdup_user_nul(buffer, count);
+	if (IS_ERR(cmd_buf))
+		return PTR_ERR(cmd_buf);
 
 	cmd_buf_tmp = strchr(cmd_buf, '\n');
 	if (cmd_buf_tmp) {
-- 
GitLab


From 5d0421d65be8c02bdde7a44f153babeaf004db7a Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Mon, 24 May 2021 19:53:38 +0800
Subject: [PATCH 2132/3804] hwrng: exynos - Use pm_runtime_resume_and_get() to
 replace open coding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

use pm_runtime_resume_and_get() to replace pm_runtime_get_sync and
pm_runtime_put_noidle. this change is just to simplify the code, no
actual functional changes.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Łukasz Stelmach <l.stelmach@samsung.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/exynos-trng.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c
index c8db62bc5ff72..9cc3d542dd0f4 100644
--- a/drivers/char/hw_random/exynos-trng.c
+++ b/drivers/char/hw_random/exynos-trng.c
@@ -196,10 +196,9 @@ static int __maybe_unused exynos_trng_resume(struct device *dev)
 {
 	int ret;
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0) {
 		dev_err(dev, "Could not get runtime PM.\n");
-		pm_runtime_put_noidle(dev);
 		return ret;
 	}
 
-- 
GitLab


From b21d14d9885ace8587a5b5b36cdcda9d8814f313 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Mon, 24 May 2021 20:20:57 +0800
Subject: [PATCH 2133/3804] hwrng: omap - Use pm_runtime_resume_and_get() to
 replace open coding

use pm_runtime_resume_and_get() to replace pm_runtime_get_sync and
pm_runtime_put_noidle. this change is just to simplify the code, no
actual functional changes.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/omap-rng.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index cede9f1591029..00ff96703dd25 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -454,10 +454,9 @@ static int omap_rng_probe(struct platform_device *pdev)
 	}
 
 	pm_runtime_enable(&pdev->dev);
-	ret = pm_runtime_get_sync(&pdev->dev);
+	ret = pm_runtime_resume_and_get(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret);
-		pm_runtime_put_noidle(&pdev->dev);
 		goto err_ioremap;
 	}
 
@@ -543,10 +542,9 @@ static int __maybe_unused omap_rng_resume(struct device *dev)
 	struct omap_rng_dev *priv = dev_get_drvdata(dev);
 	int ret;
 
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0) {
 		dev_err(dev, "Failed to runtime_get device: %d\n", ret);
-		pm_runtime_put_noidle(dev);
 		return ret;
 	}
 
-- 
GitLab


From e9009fb227fa66a66cef02a36fb51c288f411e0d Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Mon, 24 May 2021 20:28:38 +0800
Subject: [PATCH 2134/3804] hwrng: ks-sa - Use pm_runtime_resume_and_get() to
 replace open coding

use pm_runtime_resume_and_get() to replace pm_runtime_get_sync and
pm_runtime_put_noidle. this change is just to simplify the code, no
actual functional changes.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/ks-sa-rng.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/ks-sa-rng.c b/drivers/char/hw_random/ks-sa-rng.c
index 8f1d47ff97996..2f2f21f1b659e 100644
--- a/drivers/char/hw_random/ks-sa-rng.c
+++ b/drivers/char/hw_random/ks-sa-rng.c
@@ -241,10 +241,9 @@ static int ks_sa_rng_probe(struct platform_device *pdev)
 	}
 
 	pm_runtime_enable(dev);
-	ret = pm_runtime_get_sync(dev);
+	ret = pm_runtime_resume_and_get(dev);
 	if (ret < 0) {
 		dev_err(dev, "Failed to enable SA power-domain\n");
-		pm_runtime_put_noidle(dev);
 		pm_runtime_disable(dev);
 		return ret;
 	}
-- 
GitLab


From 7551a074700a4093f5556a5ae51c1f83ea6b96ba Mon Sep 17 00:00:00 2001
From: Wu Bo <wubo40@huawei.com>
Date: Tue, 25 May 2021 16:15:19 +0800
Subject: [PATCH 2135/3804] crypto: af_alg - use DIV_ROUND_UP helper macro for
 calculations

Replace open coded divisor calculations with the DIV_ROUND_UP kernel
macro for better readability.

Signed-off-by: Wu Bo <wubo40@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/af_alg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 18cc82dc4a42f..8bd288d2b089b 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -411,7 +411,7 @@ int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
 	if (n < 0)
 		return n;
 
-	npages = (off + n + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	npages = DIV_ROUND_UP(off + n, PAGE_SIZE);
 	if (WARN_ON(npages == 0))
 		return -EINVAL;
 	/* Add one extra for linking */
-- 
GitLab


From f5a6bf077126a1ac8a5c489022531e72a088603e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 25 May 2021 10:30:46 +0200
Subject: [PATCH 2136/3804] crypto: ixp4xx - convert to platform driver

The ixp4xx_crypto driver traditionally registers a bare platform
device without attaching it to a driver, and detects the hardware
at module init time by reading an SoC specific hardware register.

Change this to the conventional method of registering the platform
device from the platform code itself when the device is present,
turning the module_init/module_exit functions into probe/release
driver callbacks.

This enables compile-testing as well as potentially having ixp4xx
coexist with other ARMv5 platforms in the same kernel in the future.

Cc: Corentin Labbe <clabbe@baylibre.com>
Tested-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/mach-ixp4xx/common.c  | 26 ++++++++++++++++++++++++
 drivers/crypto/ixp4xx_crypto.c | 37 ++++++++++++----------------------
 2 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 000f672a94c97..007a44412e240 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -233,12 +233,38 @@ static struct platform_device *ixp46x_devices[] __initdata = {
 unsigned long ixp4xx_exp_bus_size;
 EXPORT_SYMBOL(ixp4xx_exp_bus_size);
 
+static struct platform_device_info ixp_dev_info __initdata = {
+	.name		= "ixp4xx_crypto",
+	.id		= 0,
+	.dma_mask	= DMA_BIT_MASK(32),
+};
+
+static int __init ixp_crypto_register(void)
+{
+	struct platform_device *pdev;
+
+	if (!(~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH |
+				IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) {
+		printk(KERN_ERR "ixp_crypto: No HW crypto available\n");
+		return -ENODEV;
+	}
+
+	pdev = platform_device_register_full(&ixp_dev_info);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	return 0;
+}
+
 void __init ixp4xx_sys_init(void)
 {
 	ixp4xx_exp_bus_size = SZ_16M;
 
 	platform_add_devices(ixp4xx_devices, ARRAY_SIZE(ixp4xx_devices));
 
+	if (IS_ENABLED(CONFIG_CRYPTO_DEV_IXP4XX))
+		ixp_crypto_register();
+
 	if (cpu_is_ixp46x()) {
 		int region;
 
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index b38650b0fea10..76099d6cfff9a 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -229,8 +229,6 @@ static dma_addr_t crypt_phys;
 
 static int support_aes = 1;
 
-#define DRIVER_NAME "ixp4xx_crypto"
-
 static struct platform_device *pdev;
 
 static inline dma_addr_t crypt_virt2phys(struct crypt_ctl *virt)
@@ -453,11 +451,6 @@ static int init_ixp_crypto(struct device *dev)
 	int ret = -ENODEV;
 	u32 msg[2] = { 0, 0 };
 
-	if (! ( ~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH |
-				IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) {
-		dev_err(dev, "ixp_crypto: No HW crypto available\n");
-		return ret;
-	}
 	npe_c = npe_request(NPE_ID);
 	if (!npe_c)
 		return ret;
@@ -1441,26 +1434,17 @@ static struct ixp_aead_alg ixp4xx_aeads[] = {
 
 #define IXP_POSTFIX "-ixp4xx"
 
-static const struct platform_device_info ixp_dev_info __initdata = {
-	.name		= DRIVER_NAME,
-	.id		= 0,
-	.dma_mask	= DMA_BIT_MASK(32),
-};
-
-static int __init ixp_module_init(void)
+static int ixp_crypto_probe(struct platform_device *_pdev)
 {
 	int num = ARRAY_SIZE(ixp4xx_algos);
 	int i, err;
 
-	pdev = platform_device_register_full(&ixp_dev_info);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
+	pdev = _pdev;
 
 	err = init_ixp_crypto(&pdev->dev);
-	if (err) {
-		platform_device_unregister(pdev);
+	if (err)
 		return err;
-	}
+
 	for (i = 0; i < num; i++) {
 		struct skcipher_alg *cra = &ixp4xx_algos[i].crypto;
 
@@ -1531,7 +1515,7 @@ static int __init ixp_module_init(void)
 	return 0;
 }
 
-static void __exit ixp_module_exit(void)
+static int ixp_crypto_remove(struct platform_device *pdev)
 {
 	int num = ARRAY_SIZE(ixp4xx_algos);
 	int i;
@@ -1546,11 +1530,16 @@ static void __exit ixp_module_exit(void)
 			crypto_unregister_skcipher(&ixp4xx_algos[i].crypto);
 	}
 	release_ixp_crypto(&pdev->dev);
-	platform_device_unregister(pdev);
+
+	return 0;
 }
 
-module_init(ixp_module_init);
-module_exit(ixp_module_exit);
+static struct platform_driver ixp_crypto_driver = {
+	.probe = ixp_crypto_probe,
+	.remove = ixp_crypto_remove,
+	.driver = { .name = "ixp4xx_crypto" },
+};
+module_platform_driver(ixp_crypto_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Hohnstaedt <chohnstaedt@innominate.com>");
-- 
GitLab


From 937264905aa21655cb1142146997f211153e6e27 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 25 May 2021 10:48:46 +0200
Subject: [PATCH 2137/3804] crypto: ixp4xx - Add DT bindings

This adds device tree bindings for the ixp4xx crypto engine.

Cc: Corentin Labbe <clabbe@baylibre.com>
Cc: devicetree@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../bindings/crypto/intel,ixp4xx-crypto.yaml  | 47 +++++++++++++++++++
 ...ntel,ixp4xx-network-processing-engine.yaml | 22 +++++++--
 2 files changed, 65 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml

diff --git a/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml
new file mode 100644
index 0000000000000..9c53c27bd20ac
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/crypto/intel,ixp4xx-crypto.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx cryptographic engine
+
+maintainers:
+  - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+  The Intel IXP4xx cryptographic engine makes use of the IXP4xx NPE
+  (Network Processing Engine). Since it is not a device on its own
+  it is defined as a subnode of the NPE, if crypto support is
+  available on the platform.
+
+properties:
+  compatible:
+    const: intel,ixp4xx-crypto
+
+  intel,npe-handle:
+    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    maxItems: 1
+    description: phandle to the NPE this crypto engine is using, the cell
+      describing the NPE instance to be used.
+
+  queue-rx:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    maxItems: 1
+    description: phandle to the RX queue on the NPE, the cell describing
+      the queue instance to be used.
+
+  queue-txready:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    maxItems: 1
+    description: phandle to the TX READY queue on the NPE, the cell describing
+      the queue instance to be used.
+
+required:
+  - compatible
+  - intel,npe-handle
+  - queue-rx
+  - queue-txready
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
index 1bd2870c3a9c9..c435c9f369a41 100644
--- a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
+++ b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
@@ -26,9 +26,16 @@ properties:
 
   reg:
     items:
-      - description: NPE0 register range
-      - description: NPE1 register range
-      - description: NPE2 register range
+      - description: NPE0 (NPE-A) register range
+      - description: NPE1 (NPE-B) register range
+      - description: NPE2 (NPE-C) register range
+
+  crypto:
+    $ref: /schemas/crypto/intel,ixp4xx-crypto.yaml#
+    type: object
+    description: Optional node for the embedded crypto engine, the node
+      should be named with the instance number of the NPE engine used for
+      the crypto engine.
 
 required:
   - compatible
@@ -38,8 +45,15 @@ additionalProperties: false
 
 examples:
   - |
-    npe@c8006000 {
+    npe: npe@c8006000 {
          compatible = "intel,ixp4xx-network-processing-engine";
          reg = <0xc8006000 0x1000>, <0xc8007000 0x1000>, <0xc8008000 0x1000>;
+
+         crypto {
+             compatible = "intel,ixp4xx-crypto";
+             intel,npe-handle = <&npe 2>;
+             queue-rx = <&qmgr 30>;
+             queue-txready = <&qmgr 29>;
+         };
     };
 ...
-- 
GitLab


From 76f24b4f46b8ca380d6e2c91bd84e0e47a9f4bcd Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 25 May 2021 10:50:56 +0200
Subject: [PATCH 2138/3804] crypto: ixp4xx - Add device tree support

This makes the IXP4xx driver probe from the device tree
and retrieve the NPE and two queue manager handled used
to process crypto from the device tree.

As the crypto engine is topologically a part of the NPE
hardware, we augment the NPE driver to spawn the
crypto engine as a child.

The platform data probe path is going away in due time,
for now it is an isolated else clause.

Cc: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c  | 107 +++++++++++++++++++++++---------
 drivers/soc/ixp4xx/ixp4xx-npe.c |   7 +++
 2 files changed, 86 insertions(+), 28 deletions(-)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 76099d6cfff9a..35fc5ee704915 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -15,6 +15,7 @@
 #include <linux/spinlock.h>
 #include <linux/gfp.h>
 #include <linux/module.h>
+#include <linux/of.h>
 
 #include <crypto/ctr.h>
 #include <crypto/internal/des.h>
@@ -71,15 +72,11 @@
 #define MOD_AES256  (0x0a00 | KEYLEN_256)
 
 #define MAX_IVLEN   16
-#define NPE_ID      2  /* NPE C */
 #define NPE_QLEN    16
 /* Space for registering when the first
  * NPE_QLEN crypt_ctl are busy */
 #define NPE_QLEN_TOTAL 64
 
-#define SEND_QID    29
-#define RECV_QID    30
-
 #define CTL_FLAG_UNUSED		0x0000
 #define CTL_FLAG_USED		0x1000
 #define CTL_FLAG_PERFORM_ABLK	0x0001
@@ -221,6 +218,9 @@ static const struct ix_hash_algo hash_alg_sha1 = {
 };
 
 static struct npe *npe_c;
+
+static unsigned int send_qid;
+static unsigned int recv_qid;
 static struct dma_pool *buffer_pool;
 static struct dma_pool *ctx_pool;
 
@@ -437,8 +437,7 @@ static void crypto_done_action(unsigned long arg)
 	int i;
 
 	for (i = 0; i < 4; i++) {
-		dma_addr_t phys = qmgr_get_entry(RECV_QID);
-
+		dma_addr_t phys = qmgr_get_entry(recv_qid);
 		if (!phys)
 			return;
 		one_packet(phys);
@@ -448,10 +447,52 @@ static void crypto_done_action(unsigned long arg)
 
 static int init_ixp_crypto(struct device *dev)
 {
-	int ret = -ENODEV;
+	struct device_node *np = dev->of_node;
 	u32 msg[2] = { 0, 0 };
+	int ret = -ENODEV;
+	u32 npe_id;
+
+	dev_info(dev, "probing...\n");
+
+	/* Locate the NPE and queue manager to use from device tree */
+	if (IS_ENABLED(CONFIG_OF) && np) {
+		struct of_phandle_args queue_spec;
+		struct of_phandle_args npe_spec;
+
+		ret = of_parse_phandle_with_fixed_args(np, "intel,npe-handle",
+						       1, 0, &npe_spec);
+		if (ret) {
+			dev_err(dev, "no NPE engine specified\n");
+			return -ENODEV;
+		}
+		npe_id = npe_spec.args[0];
 
-	npe_c = npe_request(NPE_ID);
+		ret = of_parse_phandle_with_fixed_args(np, "queue-rx", 1, 0,
+						       &queue_spec);
+		if (ret) {
+			dev_err(dev, "no rx queue phandle\n");
+			return -ENODEV;
+		}
+		recv_qid = queue_spec.args[0];
+
+		ret = of_parse_phandle_with_fixed_args(np, "queue-txready", 1, 0,
+						       &queue_spec);
+		if (ret) {
+			dev_err(dev, "no txready queue phandle\n");
+			return -ENODEV;
+		}
+		send_qid = queue_spec.args[0];
+	} else {
+		/*
+		 * Hardcoded engine when using platform data, this goes away
+		 * when we switch to using DT only.
+		 */
+		npe_id = 2;
+		send_qid = 29;
+		recv_qid = 30;
+	}
+
+	npe_c = npe_request(npe_id);
 	if (!npe_c)
 		return ret;
 
@@ -497,20 +538,20 @@ static int init_ixp_crypto(struct device *dev)
 	if (!ctx_pool)
 		goto err;
 
-	ret = qmgr_request_queue(SEND_QID, NPE_QLEN_TOTAL, 0, 0,
+	ret = qmgr_request_queue(send_qid, NPE_QLEN_TOTAL, 0, 0,
 				 "ixp_crypto:out", NULL);
 	if (ret)
 		goto err;
-	ret = qmgr_request_queue(RECV_QID, NPE_QLEN, 0, 0,
+	ret = qmgr_request_queue(recv_qid, NPE_QLEN, 0, 0,
 				 "ixp_crypto:in", NULL);
 	if (ret) {
-		qmgr_release_queue(SEND_QID);
+		qmgr_release_queue(send_qid);
 		goto err;
 	}
-	qmgr_set_irq(RECV_QID, QUEUE_IRQ_SRC_NOT_EMPTY, irqhandler, NULL);
+	qmgr_set_irq(recv_qid, QUEUE_IRQ_SRC_NOT_EMPTY, irqhandler, NULL);
 	tasklet_init(&crypto_done_tasklet, crypto_done_action, 0);
 
-	qmgr_enable_irq(RECV_QID);
+	qmgr_enable_irq(recv_qid);
 	return 0;
 
 npe_error:
@@ -526,11 +567,11 @@ npe_release:
 
 static void release_ixp_crypto(struct device *dev)
 {
-	qmgr_disable_irq(RECV_QID);
+	qmgr_disable_irq(recv_qid);
 	tasklet_kill(&crypto_done_tasklet);
 
-	qmgr_release_queue(SEND_QID);
-	qmgr_release_queue(RECV_QID);
+	qmgr_release_queue(send_qid);
+	qmgr_release_queue(recv_qid);
 
 	dma_pool_destroy(ctx_pool);
 	dma_pool_destroy(buffer_pool);
@@ -682,8 +723,8 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
 	buf->phys_addr = pad_phys;
 
 	atomic_inc(&ctx->configuring);
-	qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
-	BUG_ON(qmgr_stat_overflow(SEND_QID));
+	qmgr_put_entry(send_qid, crypt_virt2phys(crypt));
+	BUG_ON(qmgr_stat_overflow(send_qid));
 	return 0;
 }
 
@@ -757,8 +798,8 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm)
 	crypt->ctl_flags |= CTL_FLAG_GEN_REVAES;
 
 	atomic_inc(&ctx->configuring);
-	qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
-	BUG_ON(qmgr_stat_overflow(SEND_QID));
+	qmgr_put_entry(send_qid, crypt_virt2phys(crypt));
+	BUG_ON(qmgr_stat_overflow(send_qid));
 	return 0;
 }
 
@@ -943,7 +984,7 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 	if (sg_nents(req->src) > 1 || sg_nents(req->dst) > 1)
 		return ixp4xx_cipher_fallback(req, encrypt);
 
-	if (qmgr_stat_full(SEND_QID))
+	if (qmgr_stat_full(send_qid))
 		return -EAGAIN;
 	if (atomic_read(&ctx->configuring))
 		return -EAGAIN;
@@ -993,8 +1034,8 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
 	req_ctx->src = src_hook.next;
 	crypt->src_buf = src_hook.phys_next;
 	crypt->ctl_flags |= CTL_FLAG_PERFORM_ABLK;
-	qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
-	BUG_ON(qmgr_stat_overflow(SEND_QID));
+	qmgr_put_entry(send_qid, crypt_virt2phys(crypt));
+	BUG_ON(qmgr_stat_overflow(send_qid));
 	return -EINPROGRESS;
 
 free_buf_src:
@@ -1057,7 +1098,7 @@ static int aead_perform(struct aead_request *req, int encrypt,
 	enum dma_data_direction src_direction = DMA_BIDIRECTIONAL;
 	unsigned int lastlen;
 
-	if (qmgr_stat_full(SEND_QID))
+	if (qmgr_stat_full(send_qid))
 		return -EAGAIN;
 	if (atomic_read(&ctx->configuring))
 		return -EAGAIN;
@@ -1141,8 +1182,8 @@ static int aead_perform(struct aead_request *req, int encrypt,
 	}
 
 	crypt->ctl_flags |= CTL_FLAG_PERFORM_AEAD;
-	qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
-	BUG_ON(qmgr_stat_overflow(SEND_QID));
+	qmgr_put_entry(send_qid, crypt_virt2phys(crypt));
+	BUG_ON(qmgr_stat_overflow(send_qid));
 	return -EINPROGRESS;
 
 free_buf_dst:
@@ -1436,12 +1477,13 @@ static struct ixp_aead_alg ixp4xx_aeads[] = {
 
 static int ixp_crypto_probe(struct platform_device *_pdev)
 {
+	struct device *dev = &_pdev->dev;
 	int num = ARRAY_SIZE(ixp4xx_algos);
 	int i, err;
 
 	pdev = _pdev;
 
-	err = init_ixp_crypto(&pdev->dev);
+	err = init_ixp_crypto(dev);
 	if (err)
 		return err;
 
@@ -1533,11 +1575,20 @@ static int ixp_crypto_remove(struct platform_device *pdev)
 
 	return 0;
 }
+static const struct of_device_id ixp4xx_crypto_of_match[] = {
+	{
+		.compatible = "intel,ixp4xx-crypto",
+	},
+	{},
+};
 
 static struct platform_driver ixp_crypto_driver = {
 	.probe = ixp_crypto_probe,
 	.remove = ixp_crypto_remove,
-	.driver = { .name = "ixp4xx_crypto" },
+	.driver = {
+		.name = "ixp4xx_crypto",
+		.of_match_table = ixp4xx_crypto_of_match,
+	},
 };
 module_platform_driver(ixp_crypto_driver);
 
diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c
index ec90b44fa0cd3..3c158251a58b6 100644
--- a/drivers/soc/ixp4xx/ixp4xx-npe.c
+++ b/drivers/soc/ixp4xx/ixp4xx-npe.c
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/soc/ixp4xx/npe.h>
 
@@ -679,6 +680,7 @@ static int ixp4xx_npe_probe(struct platform_device *pdev)
 {
 	int i, found = 0;
 	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
 	struct resource *res;
 
 	for (i = 0; i < NPE_COUNT; i++) {
@@ -711,6 +713,11 @@ static int ixp4xx_npe_probe(struct platform_device *pdev)
 
 	if (!found)
 		return -ENODEV;
+
+	/* Spawn crypto subdevice if using device tree */
+	if (IS_ENABLED(CONFIG_OF) && np)
+		devm_of_platform_populate(dev);
+
 	return 0;
 }
 
-- 
GitLab


From 4cd8c3152edeb0a580e0552317606a1f90bc59ab Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Tue, 25 May 2021 16:57:15 +0530
Subject: [PATCH 2139/3804] crypto: octeontx2 - Add mailbox support for CN10K

Mailbox region configuration has some changes on CN10K platform
from OcteonTX2(CN9XX) platform.

On CN10K platform:
The DRAM region allocated to PF is enumerated as PF BAR4 memory.
PF BAR4 contains AF-PF mbox region followed by its VFs mbox region.
AF-PF mbox region base address is configured at RVU_AF_PFX_BAR4_ADDR
PF-VF mailbox base address is configured at
RVU_PF(x)_VF_MBOX_ADDR = RVU_AF_PF()_BAR4_ADDR+64KB. PF access its
mbox region via BAR4, whereas VF accesses PF-VF DRAM mailboxes via
BAR2 indirect access.

On CN9XX platform:
Mailbox region in DRAM is divided into two parts AF-PF mbox region and
PF-VF mbox region i.e all PFs mbox region is contiguous similarly all
VFs.
The base address of the AF-PF mbox region is configured at
RVU_AF_PF_BAR4_ADDR.
AF-PF1 mbox address can be calculated as RVU_AF_PF_BAR4_ADDR * mbox
size.

This patch changes mbox initialization to support both CN9XX and CN10K
platform.
This patch also removes platform specific name from the PF/VF driver name
to make it appropriate for all supported platforms.

Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx2/Makefile     | 12 ++---
 .../marvell/octeontx2/otx2_cpt_common.h       | 20 +++++++++
 .../marvell/octeontx2/otx2_cpt_hw_types.h     |  3 ++
 drivers/crypto/marvell/octeontx2/otx2_cptpf.h |  1 +
 .../marvell/octeontx2/otx2_cptpf_main.c       | 35 +++++++++------
 drivers/crypto/marvell/octeontx2/otx2_cptvf.h |  3 ++
 .../marvell/octeontx2/otx2_cptvf_main.c       | 45 +++++++++++++------
 .../marvell/octeontx2/otx2_cptvf_mbox.c       | 43 ++++++++++++++++++
 8 files changed, 129 insertions(+), 33 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile
index b9c6201019e0c..10e1fe056a9e5 100644
--- a/drivers/crypto/marvell/octeontx2/Makefile
+++ b/drivers/crypto/marvell/octeontx2/Makefile
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cptvf.o
+obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptpf.o rvu_cptvf.o
 
-octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \
-		      otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o
-octeontx2-cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \
-			otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \
-			otx2_cptvf_algs.o
+rvu_cptpf-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \
+		  otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o
+rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \
+		  otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \
+		  otx2_cptvf_algs.o
 
 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
index ecedd91a8d859..414427dcfa61b 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
@@ -25,6 +25,9 @@
 #define OTX2_CPT_NAME_LENGTH 64
 #define OTX2_CPT_DMA_MINALIGN 128
 
+/* HW capability flags */
+#define CN10K_MBOX  0
+
 #define BAD_OTX2_CPT_ENG_TYPE OTX2_CPT_MAX_ENG_TYPES
 
 enum otx2_cpt_eng_type {
@@ -116,6 +119,23 @@ static inline u64 otx2_cpt_read64(void __iomem *reg_base, u64 blk, u64 slot,
 			     OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs));
 }
 
+static inline bool is_dev_otx2(struct pci_dev *pdev)
+{
+	if (pdev->device == OTX2_CPT_PCI_PF_DEVICE_ID ||
+	    pdev->device == OTX2_CPT_PCI_VF_DEVICE_ID)
+		return true;
+
+	return false;
+}
+
+static inline void otx2_cpt_set_hw_caps(struct pci_dev *pdev,
+					unsigned long *cap_flag)
+{
+	if (!is_dev_otx2(pdev))
+		__set_bit(CN10K_MBOX, cap_flag);
+}
+
+
 int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev);
 int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev);
 
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h
index ecafc42f37a26..391a457f71163 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h
@@ -10,6 +10,8 @@
 /* Device IDs */
 #define OTX2_CPT_PCI_PF_DEVICE_ID 0xA0FD
 #define OTX2_CPT_PCI_VF_DEVICE_ID 0xA0FE
+#define CN10K_CPT_PCI_PF_DEVICE_ID 0xA0F2
+#define CN10K_CPT_PCI_VF_DEVICE_ID 0xA0F3
 
 /* Mailbox interrupts offset */
 #define OTX2_CPT_PF_MBOX_INT	6
@@ -25,6 +27,7 @@
  */
 #define OTX2_CPT_VF_MSIX_VECTORS 1
 #define OTX2_CPT_VF_INTR_MBOX_MASK BIT(0)
+#define CN10K_CPT_VF_MBOX_REGION  (0xC0000)
 
 /* CPT LF MSIX vectors */
 #define OTX2_CPT_LF_MSIX_VECTORS 2
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
index e19af1356f123..5ebba86c65d93 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
@@ -47,6 +47,7 @@ struct otx2_cptpf_dev {
 	struct workqueue_struct	*flr_wq;
 	struct cptpf_flr_work   *flr_work;
 
+	unsigned long cap_flag;
 	u8 pf_id;               /* RVU PF number */
 	u8 max_vfs;		/* Maximum number of VFs supported by CPT */
 	u8 enabled_vfs;		/* Number of enabled VFs */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index 58f47e3ab62e7..d341aecd3dd2f 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -8,8 +8,8 @@
 #include "otx2_cptpf.h"
 #include "rvu_reg.h"
 
-#define OTX2_CPT_DRV_NAME    "octeontx2-cpt"
-#define OTX2_CPT_DRV_STRING  "Marvell OcteonTX2 CPT Physical Function Driver"
+#define OTX2_CPT_DRV_NAME    "rvu_cptpf"
+#define OTX2_CPT_DRV_STRING  "Marvell RVU CPT Physical Function Driver"
 
 static void cptpf_enable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf,
 					int num_vfs)
@@ -284,7 +284,11 @@ static int cptpf_vfpf_mbox_init(struct otx2_cptpf_dev *cptpf, int num_vfs)
 		return -ENOMEM;
 
 	/* Map VF-PF mailbox memory */
-	vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_BAR4_ADDR);
+	if (test_bit(CN10K_MBOX, &cptpf->cap_flag))
+		vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_MBOX_ADDR);
+	else
+		vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_BAR4_ADDR);
+
 	if (!vfpf_mbox_base) {
 		dev_err(dev, "VF-PF mailbox address not configured\n");
 		err = -ENOMEM;
@@ -365,6 +369,8 @@ static int cptpf_register_afpf_mbox_intr(struct otx2_cptpf_dev *cptpf)
 
 static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf)
 {
+	struct pci_dev *pdev = cptpf->pdev;
+	resource_size_t offset;
 	int err;
 
 	cptpf->afpf_mbox_wq = alloc_workqueue("cpt_afpf_mailbox",
@@ -373,8 +379,17 @@ static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf)
 	if (!cptpf->afpf_mbox_wq)
 		return -ENOMEM;
 
+	offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
+	/* Map AF-PF mailbox memory */
+	cptpf->afpf_mbox_base = devm_ioremap_wc(&pdev->dev, offset, MBOX_SIZE);
+	if (!cptpf->afpf_mbox_base) {
+		dev_err(&pdev->dev, "Unable to map BAR4\n");
+		err = -ENOMEM;
+		goto error;
+	}
+
 	err = otx2_mbox_init(&cptpf->afpf_mbox, cptpf->afpf_mbox_base,
-			     cptpf->pdev, cptpf->reg_base, MBOX_DIR_PFAF, 1);
+			     pdev, cptpf->reg_base, MBOX_DIR_PFAF, 1);
 	if (err)
 		goto error;
 
@@ -607,7 +622,6 @@ static int otx2_cptpf_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *ent)
 {
 	struct device *dev = &pdev->dev;
-	resource_size_t offset, size;
 	struct otx2_cptpf_dev *cptpf;
 	int err;
 
@@ -644,15 +658,6 @@ static int otx2_cptpf_probe(struct pci_dev *pdev,
 	if (err)
 		goto clear_drvdata;
 
-	offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
-	size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
-	/* Map AF-PF mailbox memory */
-	cptpf->afpf_mbox_base = devm_ioremap_wc(dev, offset, size);
-	if (!cptpf->afpf_mbox_base) {
-		dev_err(&pdev->dev, "Unable to map BAR4\n");
-		err = -ENODEV;
-		goto clear_drvdata;
-	}
 	err = pci_alloc_irq_vectors(pdev, RVU_PF_INT_VEC_CNT,
 				    RVU_PF_INT_VEC_CNT, PCI_IRQ_MSIX);
 	if (err < 0) {
@@ -660,6 +665,7 @@ static int otx2_cptpf_probe(struct pci_dev *pdev,
 			RVU_PF_INT_VEC_CNT);
 		goto clear_drvdata;
 	}
+	otx2_cpt_set_hw_caps(pdev, &cptpf->cap_flag);
 	/* Initialize AF-PF mailbox */
 	err = cptpf_afpf_mbox_init(cptpf);
 	if (err)
@@ -719,6 +725,7 @@ static void otx2_cptpf_remove(struct pci_dev *pdev)
 /* Supported devices */
 static const struct pci_device_id otx2_cpt_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX2_CPT_PCI_PF_DEVICE_ID) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, CN10K_CPT_PCI_PF_DEVICE_ID) },
 	{ 0, }  /* end of table */
 };
 
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h
index 4f0a169fddbd0..4207e2236903e 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h
@@ -19,11 +19,14 @@ struct otx2_cptvf_dev {
 	struct otx2_mbox	pfvf_mbox;
 	struct work_struct	pfvf_mbox_work;
 	struct workqueue_struct *pfvf_mbox_wq;
+	void *bbuf_base;
+	unsigned long cap_flag;
 };
 
 irqreturn_t otx2_cptvf_pfvf_mbox_intr(int irq, void *arg);
 void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work);
 int otx2_cptvf_send_eng_grp_num_msg(struct otx2_cptvf_dev *cptvf, int eng_type);
 int otx2_cptvf_send_kvf_limits_msg(struct otx2_cptvf_dev *cptvf);
+int otx2_cpt_mbox_bbuf_init(struct otx2_cptvf_dev *cptvf, struct pci_dev *pdev);
 
 #endif /* __OTX2_CPTVF_H */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
index 47f3787310243..5178e0688d755 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
@@ -7,7 +7,7 @@
 #include "otx2_cptvf_algs.h"
 #include <rvu_reg.h>
 
-#define OTX2_CPTVF_DRV_NAME "octeontx2-cptvf"
+#define OTX2_CPTVF_DRV_NAME "rvu_cptvf"
 
 static void cptvf_enable_pfvf_mbox_intrs(struct otx2_cptvf_dev *cptvf)
 {
@@ -70,6 +70,8 @@ static int cptvf_register_interrupts(struct otx2_cptvf_dev *cptvf)
 
 static int cptvf_pfvf_mbox_init(struct otx2_cptvf_dev *cptvf)
 {
+	struct pci_dev *pdev = cptvf->pdev;
+	resource_size_t offset, size;
 	int ret;
 
 	cptvf->pfvf_mbox_wq = alloc_workqueue("cpt_pfvf_mailbox",
@@ -78,14 +80,39 @@ static int cptvf_pfvf_mbox_init(struct otx2_cptvf_dev *cptvf)
 	if (!cptvf->pfvf_mbox_wq)
 		return -ENOMEM;
 
+	if (test_bit(CN10K_MBOX, &cptvf->cap_flag)) {
+		/* For cn10k platform, VF mailbox region is in its BAR2
+		 * register space
+		 */
+		cptvf->pfvf_mbox_base = cptvf->reg_base +
+					CN10K_CPT_VF_MBOX_REGION;
+	} else {
+		offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
+		size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
+		/* Map PF-VF mailbox memory */
+		cptvf->pfvf_mbox_base = devm_ioremap_wc(&pdev->dev, offset,
+							size);
+		if (!cptvf->pfvf_mbox_base) {
+			dev_err(&pdev->dev, "Unable to map BAR4\n");
+			ret = -ENOMEM;
+			goto free_wqe;
+		}
+	}
+
 	ret = otx2_mbox_init(&cptvf->pfvf_mbox, cptvf->pfvf_mbox_base,
-			     cptvf->pdev, cptvf->reg_base, MBOX_DIR_VFPF, 1);
+			     pdev, cptvf->reg_base, MBOX_DIR_VFPF, 1);
 	if (ret)
 		goto free_wqe;
 
+	ret = otx2_cpt_mbox_bbuf_init(cptvf, pdev);
+	if (ret)
+		goto destroy_mbox;
+
 	INIT_WORK(&cptvf->pfvf_mbox_work, otx2_cptvf_pfvf_mbox_handler);
 	return 0;
 
+destroy_mbox:
+	otx2_mbox_destroy(&cptvf->pfvf_mbox);
 free_wqe:
 	destroy_workqueue(cptvf->pfvf_mbox_wq);
 	return ret;
@@ -305,7 +332,6 @@ static int otx2_cptvf_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *ent)
 {
 	struct device *dev = &pdev->dev;
-	resource_size_t offset, size;
 	struct otx2_cptvf_dev *cptvf;
 	int ret;
 
@@ -337,15 +363,7 @@ static int otx2_cptvf_probe(struct pci_dev *pdev,
 
 	cptvf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM];
 
-	offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
-	size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
-	/* Map PF-VF mailbox memory */
-	cptvf->pfvf_mbox_base = devm_ioremap_wc(dev, offset, size);
-	if (!cptvf->pfvf_mbox_base) {
-		dev_err(&pdev->dev, "Unable to map BAR4\n");
-		ret = -ENODEV;
-		goto clear_drvdata;
-	}
+	otx2_cpt_set_hw_caps(pdev, &cptvf->cap_flag);
 	/* Initialize PF<=>VF mailbox */
 	ret = cptvf_pfvf_mbox_init(cptvf);
 	if (ret)
@@ -392,6 +410,7 @@ static void otx2_cptvf_remove(struct pci_dev *pdev)
 /* Supported devices */
 static const struct pci_device_id otx2_cptvf_id_table[] = {
 	{PCI_VDEVICE(CAVIUM, OTX2_CPT_PCI_VF_DEVICE_ID), 0},
+	{PCI_VDEVICE(CAVIUM, CN10K_CPT_PCI_VF_DEVICE_ID), 0},
 	{ 0, }  /* end of table */
 };
 
@@ -405,6 +424,6 @@ static struct pci_driver otx2_cptvf_pci_driver = {
 module_pci_driver(otx2_cptvf_pci_driver);
 
 MODULE_AUTHOR("Marvell");
-MODULE_DESCRIPTION("Marvell OcteonTX2 CPT Virtual Function Driver");
+MODULE_DESCRIPTION("Marvell RVU CPT Virtual Function Driver");
 MODULE_LICENSE("GPL v2");
 MODULE_DEVICE_TABLE(pci, otx2_cptvf_id_table);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c
index 5d73b711cba61..02cb9e44afd81 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c
@@ -5,6 +5,48 @@
 #include "otx2_cptvf.h"
 #include <rvu_reg.h>
 
+int otx2_cpt_mbox_bbuf_init(struct otx2_cptvf_dev *cptvf, struct pci_dev *pdev)
+{
+	struct otx2_mbox_dev *mdev;
+	struct otx2_mbox *otx2_mbox;
+
+	cptvf->bbuf_base = devm_kmalloc(&pdev->dev, MBOX_SIZE, GFP_KERNEL);
+	if (!cptvf->bbuf_base)
+		return -ENOMEM;
+	/*
+	 * Overwrite mbox mbase to point to bounce buffer, so that PF/VF
+	 * prepare all mbox messages in bounce buffer instead of directly
+	 * in hw mbox memory.
+	 */
+	otx2_mbox = &cptvf->pfvf_mbox;
+	mdev = &otx2_mbox->dev[0];
+	mdev->mbase = cptvf->bbuf_base;
+
+	return 0;
+}
+
+static void otx2_cpt_sync_mbox_bbuf(struct otx2_mbox *mbox, int devid)
+{
+	u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+	void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE);
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+	struct mbox_hdr *hdr;
+	u64 msg_size;
+
+	if (mdev->mbase == hw_mbase)
+		return;
+
+	hdr = hw_mbase + mbox->rx_start;
+	msg_size = hdr->msg_size;
+
+	if (msg_size > mbox->rx_size - msgs_offset)
+		msg_size = mbox->rx_size - msgs_offset;
+
+	/* Copy mbox messages from mbox memory to bounce buffer */
+	memcpy(mdev->mbase + mbox->rx_start,
+	       hw_mbase + mbox->rx_start, msg_size + msgs_offset);
+}
+
 irqreturn_t otx2_cptvf_pfvf_mbox_intr(int __always_unused irq, void *arg)
 {
 	struct otx2_cptvf_dev *cptvf = arg;
@@ -106,6 +148,7 @@ void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work)
 
 	cptvf = container_of(work, struct otx2_cptvf_dev, pfvf_mbox_work);
 	pfvf_mbox = &cptvf->pfvf_mbox;
+	otx2_cpt_sync_mbox_bbuf(pfvf_mbox, 0);
 	mdev = &pfvf_mbox->dev[0];
 	rsp_hdr = (struct mbox_hdr *)(mdev->mbase + pfvf_mbox->rx_start);
 	if (rsp_hdr->num_msgs == 0)
-- 
GitLab


From eb33cd9116b2f1d193352c77bd829b61b1249b00 Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Tue, 25 May 2021 16:57:16 +0530
Subject: [PATCH 2140/3804] crypto: octeontx2 - add support to map LMTST region
 for CN10K

On CN10K platform transmit/receive buffer alloc and free from/to
hardware had changed to support burst operation. Whereas pervious
silicon's only support single buffer free at a time.
To Support the same firmware allocates a DRAM region for each PF/VF for
storing LMTLINES. These LMTLINES are used to send CPT commands to HW.
PF/VF LMTST region is accessed via BAR4. PFs LMTST region is followed
by its VFs mbox memory. The size of region varies from 2KB to 256KB
based on number of LMTLINES configured.

This patch adds support for mapping of PF/VF LMTST region.

Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx2/Makefile     |  5 +-
 drivers/crypto/marvell/octeontx2/cn10k_cpt.c  | 53 +++++++++++++++++++
 drivers/crypto/marvell/octeontx2/cn10k_cpt.h  | 13 +++++
 .../marvell/octeontx2/otx2_cpt_common.h       |  5 +-
 drivers/crypto/marvell/octeontx2/otx2_cptlf.h |  2 +
 .../marvell/octeontx2/otx2_cptpf_main.c       |  5 ++
 .../marvell/octeontx2/otx2_cptvf_main.c       |  6 +++
 7 files changed, 86 insertions(+), 3 deletions(-)
 create mode 100644 drivers/crypto/marvell/octeontx2/cn10k_cpt.c
 create mode 100644 drivers/crypto/marvell/octeontx2/cn10k_cpt.h

diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile
index 10e1fe056a9e5..c242d22008c33 100644
--- a/drivers/crypto/marvell/octeontx2/Makefile
+++ b/drivers/crypto/marvell/octeontx2/Makefile
@@ -2,9 +2,10 @@
 obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptpf.o rvu_cptvf.o
 
 rvu_cptpf-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \
-		  otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o
+		  otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o \
+		  cn10k_cpt.o
 rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \
 		  otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \
-		  otx2_cptvf_algs.o
+		  otx2_cptvf_algs.o cn10k_cpt.o
 
 ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
new file mode 100644
index 0000000000000..57cf156934ab1
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2021 Marvell. */
+
+#include "otx2_cptpf.h"
+#include "otx2_cptvf.h"
+#include "otx2_cptlf.h"
+#include "cn10k_cpt.h"
+
+int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf)
+{
+	struct pci_dev *pdev = cptpf->pdev;
+	resource_size_t size;
+	u64 lmt_base;
+
+	if (!test_bit(CN10K_LMTST, &cptpf->cap_flag))
+		return 0;
+
+	lmt_base = readq(cptpf->reg_base + RVU_PF_LMTLINE_ADDR);
+	if (!lmt_base) {
+		dev_err(&pdev->dev, "PF LMTLINE address not configured\n");
+		return -ENOMEM;
+	}
+	size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
+	size -= ((1 + cptpf->max_vfs) * MBOX_SIZE);
+	cptpf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, lmt_base, size);
+	if (!cptpf->lfs.lmt_base) {
+		dev_err(&pdev->dev,
+			"Mapping of PF LMTLINE address failed\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf)
+{
+	struct pci_dev *pdev = cptvf->pdev;
+	resource_size_t offset, size;
+
+	if (!test_bit(CN10K_LMTST, &cptvf->cap_flag))
+		return 0;
+
+	offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
+	size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
+	/* Map VF LMILINE region */
+	cptvf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, offset, size);
+	if (!cptvf->lfs.lmt_base) {
+		dev_err(&pdev->dev, "Unable to map BAR4\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
new file mode 100644
index 0000000000000..b9a8c463eaf37
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright (C) 2021 Marvell.
+ */
+#ifndef __CN10K_CPT_H
+#define __CN10K_CPT_H
+
+#include "otx2_cptpf.h"
+#include "otx2_cptvf.h"
+
+int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf);
+int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf);
+
+#endif /* __CN10K_CPTLF_H */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
index 414427dcfa61b..c5445b05f53c2 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
@@ -27,6 +27,7 @@
 
 /* HW capability flags */
 #define CN10K_MBOX  0
+#define CN10K_LMTST 1
 
 #define BAD_OTX2_CPT_ENG_TYPE OTX2_CPT_MAX_ENG_TYPES
 
@@ -131,8 +132,10 @@ static inline bool is_dev_otx2(struct pci_dev *pdev)
 static inline void otx2_cpt_set_hw_caps(struct pci_dev *pdev,
 					unsigned long *cap_flag)
 {
-	if (!is_dev_otx2(pdev))
+	if (!is_dev_otx2(pdev)) {
 		__set_bit(CN10K_MBOX, cap_flag);
+		__set_bit(CN10K_LMTST, cap_flag);
+	}
 }
 
 
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
index ab1678fc564d6..c87c18e311710 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
@@ -87,6 +87,8 @@ struct otx2_cptlf_info {
 struct otx2_cptlfs_info {
 	/* Registers start address of VF/PF LFs are attached to */
 	void __iomem *reg_base;
+#define LMTLINE_SIZE  128
+	void __iomem *lmt_base;
 	struct pci_dev *pdev;   /* Device LFs are attached to */
 	struct otx2_cptlf_info lf[OTX2_CPT_MAX_LFS_NUM];
 	struct otx2_mbox *mbox;
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index d341aecd3dd2f..4ec3a4613e74a 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -6,6 +6,7 @@
 #include "otx2_cpt_common.h"
 #include "otx2_cptpf_ucode.h"
 #include "otx2_cptpf.h"
+#include "cn10k_cpt.h"
 #include "rvu_reg.h"
 
 #define OTX2_CPT_DRV_NAME    "rvu_cptpf"
@@ -677,6 +678,10 @@ static int otx2_cptpf_probe(struct pci_dev *pdev,
 
 	cptpf->max_vfs = pci_sriov_get_totalvfs(pdev);
 
+	err = cn10k_cptpf_lmtst_init(cptpf);
+	if (err)
+		goto unregister_intr;
+
 	/* Initialize CPT PF device */
 	err = cptpf_device_init(cptpf);
 	if (err)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
index 5178e0688d755..3411e664cf50c 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
@@ -5,6 +5,7 @@
 #include "otx2_cptvf.h"
 #include "otx2_cptlf.h"
 #include "otx2_cptvf_algs.h"
+#include "cn10k_cpt.h"
 #include <rvu_reg.h>
 
 #define OTX2_CPTVF_DRV_NAME "rvu_cptvf"
@@ -364,6 +365,11 @@ static int otx2_cptvf_probe(struct pci_dev *pdev,
 	cptvf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM];
 
 	otx2_cpt_set_hw_caps(pdev, &cptvf->cap_flag);
+
+	ret = cn10k_cptvf_lmtst_init(cptvf);
+	if (ret)
+		goto clear_drvdata;
+
 	/* Initialize PF<=>VF mailbox */
 	ret = cptvf_pfvf_mbox_init(cptvf);
 	if (ret)
-- 
GitLab


From 40a645f753b32346f1ab3953e769479561a19b8d Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Tue, 25 May 2021 16:57:17 +0530
Subject: [PATCH 2141/3804] crypto: octeontx2 - add support for CPT operations
 on CN10K

CPT result format had changed for CN10K HW to accommodate more
fields. This patch adds support to use new result format and
new LMTST lines for CPT operations on CN10K platform.

Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx2/cn10k_cpt.c  | 44 ++++++++++++++++++-
 drivers/crypto/marvell/octeontx2/cn10k_cpt.h  | 23 ++++++++++
 .../marvell/octeontx2/otx2_cpt_hw_types.h     | 13 +++++-
 drivers/crypto/marvell/octeontx2/otx2_cptlf.c |  9 +++-
 drivers/crypto/marvell/octeontx2/otx2_cptlf.h |  8 ++++
 .../marvell/octeontx2/otx2_cptpf_main.c       |  2 +-
 .../marvell/octeontx2/otx2_cptpf_ucode.c      | 32 +++++++++++---
 .../marvell/octeontx2/otx2_cptpf_ucode.h      |  8 ++--
 .../marvell/octeontx2/otx2_cptvf_reqmgr.c     | 17 ++++---
 9 files changed, 134 insertions(+), 22 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
index 57cf156934ab1..1499ef75b5c22 100644
--- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
+++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
@@ -1,20 +1,57 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright (C) 2021 Marvell. */
 
+#include <linux/soc/marvell/octeontx2/asm.h>
 #include "otx2_cptpf.h"
 #include "otx2_cptvf.h"
 #include "otx2_cptlf.h"
 #include "cn10k_cpt.h"
 
+static struct cpt_hw_ops otx2_hw_ops = {
+	.send_cmd = otx2_cpt_send_cmd,
+	.cpt_get_compcode = otx2_cpt_get_compcode,
+	.cpt_get_uc_compcode = otx2_cpt_get_uc_compcode,
+};
+
+static struct cpt_hw_ops cn10k_hw_ops = {
+	.send_cmd = cn10k_cpt_send_cmd,
+	.cpt_get_compcode = cn10k_cpt_get_compcode,
+	.cpt_get_uc_compcode = cn10k_cpt_get_uc_compcode,
+};
+
+void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num,
+			struct otx2_cptlf_info *lf)
+{
+	void __iomem *lmtline = lf->lmtline;
+	u64 val = (lf->slot & 0x7FF);
+	u64 tar_addr = 0;
+
+	/* tar_addr<6:4> = Size of first LMTST - 1 in units of 128b. */
+	tar_addr |= (__force u64)lf->ioreg |
+		    (((OTX2_CPT_INST_SIZE/16) - 1) & 0x7) << 4;
+	/*
+	 * Make sure memory areas pointed in CPT_INST_S
+	 * are flushed before the instruction is sent to CPT
+	 */
+	dma_wmb();
+
+	/* Copy CPT command to LMTLINE */
+	memcpy_toio(lmtline, cptinst, insts_num * OTX2_CPT_INST_SIZE);
+	cn10k_lmt_flush(val, tar_addr);
+}
+
 int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf)
 {
 	struct pci_dev *pdev = cptpf->pdev;
 	resource_size_t size;
 	u64 lmt_base;
 
-	if (!test_bit(CN10K_LMTST, &cptpf->cap_flag))
+	if (!test_bit(CN10K_LMTST, &cptpf->cap_flag)) {
+		cptpf->lfs.ops = &otx2_hw_ops;
 		return 0;
+	}
 
+	cptpf->lfs.ops = &cn10k_hw_ops;
 	lmt_base = readq(cptpf->reg_base + RVU_PF_LMTLINE_ADDR);
 	if (!lmt_base) {
 		dev_err(&pdev->dev, "PF LMTLINE address not configured\n");
@@ -37,9 +74,12 @@ int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf)
 	struct pci_dev *pdev = cptvf->pdev;
 	resource_size_t offset, size;
 
-	if (!test_bit(CN10K_LMTST, &cptvf->cap_flag))
+	if (!test_bit(CN10K_LMTST, &cptvf->cap_flag)) {
+		cptvf->lfs.ops = &otx2_hw_ops;
 		return 0;
+	}
 
+	cptvf->lfs.ops = &cn10k_hw_ops;
 	offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
 	size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
 	/* Map VF LMILINE region */
diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
index b9a8c463eaf37..c091392b47e0f 100644
--- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
+++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
@@ -4,9 +4,32 @@
 #ifndef __CN10K_CPT_H
 #define __CN10K_CPT_H
 
+#include "otx2_cpt_common.h"
 #include "otx2_cptpf.h"
 #include "otx2_cptvf.h"
 
+static inline u8 cn10k_cpt_get_compcode(union otx2_cpt_res_s *result)
+{
+	return ((struct cn10k_cpt_res_s *)result)->compcode;
+}
+
+static inline u8 cn10k_cpt_get_uc_compcode(union otx2_cpt_res_s *result)
+{
+	return ((struct cn10k_cpt_res_s *)result)->uc_compcode;
+}
+
+static inline u8 otx2_cpt_get_compcode(union otx2_cpt_res_s *result)
+{
+	return ((struct cn9k_cpt_res_s *)result)->compcode;
+}
+
+static inline u8 otx2_cpt_get_uc_compcode(union otx2_cpt_res_s *result)
+{
+	return ((struct cn9k_cpt_res_s *)result)->uc_compcode;
+}
+
+void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num,
+			struct otx2_cptlf_info *lf);
 int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf);
 int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf);
 
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h
index 391a457f71163..6f947978e4e89 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h
@@ -138,7 +138,7 @@ enum otx2_cpt_comp_e {
 	OTX2_CPT_COMP_E_FAULT = 0x02,
 	OTX2_CPT_COMP_E_HWERR = 0x04,
 	OTX2_CPT_COMP_E_INSTERR = 0x05,
-	OTX2_CPT_COMP_E_LAST_ENTRY = 0x06
+	OTX2_CPT_COMP_E_WARN = 0x06
 };
 
 /*
@@ -269,13 +269,22 @@ union otx2_cpt_inst_s {
 union otx2_cpt_res_s {
 	u64 u[2];
 
-	struct {
+	struct cn9k_cpt_res_s {
 		u64 compcode:8;
 		u64 uc_compcode:8;
 		u64 doneint:1;
 		u64 reserved_17_63:47;
 		u64 reserved_64_127;
 	} s;
+
+	struct cn10k_cpt_res_s {
+		u64 compcode:7;
+		u64 doneint:1;
+		u64 uc_compcode:8;
+		u64 rlen:16;
+		u64 spi:32;
+		u64 esn;
+	} cn10k;
 };
 
 /*
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
index 34aba15327612..c8350fcd60fab 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
@@ -379,9 +379,14 @@ int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri,
 	for (slot = 0; slot < lfs->lfs_num; slot++) {
 		lfs->lf[slot].lfs = lfs;
 		lfs->lf[slot].slot = slot;
-		lfs->lf[slot].lmtline = lfs->reg_base +
-			OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_LMT, slot,
+		if (lfs->lmt_base)
+			lfs->lf[slot].lmtline = lfs->lmt_base +
+						(slot * LMTLINE_SIZE);
+		else
+			lfs->lf[slot].lmtline = lfs->reg_base +
+				OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_LMT, slot,
 						 OTX2_CPT_LMT_LF_LMTLINEX(0));
+
 		lfs->lf[slot].ioreg = lfs->reg_base +
 			OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_CPT0, slot,
 						 OTX2_CPT_LF_NQX(0));
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
index c87c18e311710..b691b6c1d5c45 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
@@ -84,6 +84,13 @@ struct otx2_cptlf_info {
 	struct otx2_cptlf_wqe *wqe;       /* Tasklet work info */
 };
 
+struct cpt_hw_ops {
+	void (*send_cmd)(union otx2_cpt_inst_s *cptinst, u32 insts_num,
+			 struct otx2_cptlf_info *lf);
+	u8 (*cpt_get_compcode)(union otx2_cpt_res_s *result);
+	u8 (*cpt_get_uc_compcode)(union otx2_cpt_res_s *result);
+};
+
 struct otx2_cptlfs_info {
 	/* Registers start address of VF/PF LFs are attached to */
 	void __iomem *reg_base;
@@ -92,6 +99,7 @@ struct otx2_cptlfs_info {
 	struct pci_dev *pdev;   /* Device LFs are attached to */
 	struct otx2_cptlf_info lf[OTX2_CPT_MAX_LFS_NUM];
 	struct otx2_mbox *mbox;
+	struct cpt_hw_ops *ops;
 	u8 are_lfs_attached;	/* Whether CPT LFs are attached */
 	u8 lfs_num;		/* Number of CPT LFs */
 	u8 kcrypto_eng_grp_num;	/* Kernel crypto engine group number */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index 4ec3a4613e74a..1fb04f9bb7ac4 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -586,7 +586,7 @@ static int cptpf_sriov_enable(struct pci_dev *pdev, int num_vfs)
 	if (ret)
 		goto disable_intr;
 
-	ret = otx2_cpt_create_eng_grps(cptpf->pdev, &cptpf->eng_grps);
+	ret = otx2_cpt_create_eng_grps(cptpf, &cptpf->eng_grps);
 	if (ret)
 		goto disable_intr;
 
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index a531f4c8b4414..dff34b3ec09e1 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -16,6 +16,8 @@
 #define LOADFVC_MAJOR_OP 0x01
 #define LOADFVC_MINOR_OP 0x08
 
+#define CTX_FLUSH_TIMER_CNT 0xFFFFFF
+
 struct fw_info_t {
 	struct list_head ucodes;
 };
@@ -666,7 +668,8 @@ static int reserve_engines(struct device *dev,
 static void ucode_unload(struct device *dev, struct otx2_cpt_ucode *ucode)
 {
 	if (ucode->va) {
-		dma_free_coherent(dev, ucode->size, ucode->va, ucode->dma);
+		dma_free_coherent(dev, OTX2_CPT_UCODE_SZ, ucode->va,
+				  ucode->dma);
 		ucode->va = NULL;
 		ucode->dma = 0;
 		ucode->size = 0;
@@ -685,7 +688,7 @@ static int copy_ucode_to_dma_mem(struct device *dev,
 	u32 i;
 
 	/*  Allocate DMAable space */
-	ucode->va = dma_alloc_coherent(dev, ucode->size, &ucode->dma,
+	ucode->va = dma_alloc_coherent(dev, OTX2_CPT_UCODE_SZ, &ucode->dma,
 				       GFP_KERNEL);
 	if (!ucode->va)
 		return -ENOMEM;
@@ -1100,11 +1103,12 @@ int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type)
 	return eng_grp_num;
 }
 
-int otx2_cpt_create_eng_grps(struct pci_dev *pdev,
+int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
 			     struct otx2_cpt_eng_grps *eng_grps)
 {
 	struct otx2_cpt_uc_info_t *uc_info[OTX2_CPT_MAX_ETYPES_PER_GRP] = {  };
 	struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} };
+	struct pci_dev *pdev = cptpf->pdev;
 	struct fw_info_t fw_info;
 	int ret;
 
@@ -1180,6 +1184,23 @@ int otx2_cpt_create_eng_grps(struct pci_dev *pdev,
 	eng_grps->is_grps_created = true;
 
 	cpt_ucode_release_fw(&fw_info);
+
+	if (is_dev_otx2(pdev))
+		return 0;
+	/*
+	 * Configure engine group mask to allow context prefetching
+	 * for the groups.
+	 */
+	otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL,
+			      OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16),
+			      BLKADDR_CPT0);
+	/*
+	 * Set interval to periodically flush dirty data for the next
+	 * CTX cache entry. Set the interval count to maximum supported
+	 * value.
+	 */
+	otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTX_FLUSH_TIMER,
+			      CTX_FLUSH_TIMER_CNT, BLKADDR_CPT0);
 	return 0;
 
 delete_eng_grp:
@@ -1460,9 +1481,10 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
 		iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps,
 							 etype);
 		otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr);
-		otx2_cpt_send_cmd(&inst, 1, &cptpf->lfs.lf[0]);
+		lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]);
 
-		while (result->s.compcode == OTX2_CPT_COMPLETION_CODE_INIT)
+		while (lfs->ops->cpt_get_compcode(result) ==
+						OTX2_CPT_COMPLETION_CODE_INIT)
 			cpu_relax();
 
 		cptpf->eng_caps[etype].u = be64_to_cpup(rptr);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h
index 6b0d432de0afa..fe019ab730b2d 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h
@@ -23,11 +23,13 @@
 /* Microcode version string length */
 #define OTX2_CPT_UCODE_VER_STR_SZ   44
 
-/* Maximum number of supported engines/cores on OcteonTX2 platform */
-#define OTX2_CPT_MAX_ENGINES        128
+/* Maximum number of supported engines/cores on OcteonTX2/CN10K platform */
+#define OTX2_CPT_MAX_ENGINES        144
 
 #define OTX2_CPT_ENGS_BITMASK_LEN   BITS_TO_LONGS(OTX2_CPT_MAX_ENGINES)
 
+#define OTX2_CPT_UCODE_SZ           (64 * 1024)
+
 /* Microcode types */
 enum otx2_cpt_ucode_type {
 	OTX2_CPT_AE_UC_TYPE = 1,  /* AE-MAIN */
@@ -153,7 +155,7 @@ int otx2_cpt_init_eng_grps(struct pci_dev *pdev,
 			   struct otx2_cpt_eng_grps *eng_grps);
 void otx2_cpt_cleanup_eng_grps(struct pci_dev *pdev,
 			       struct otx2_cpt_eng_grps *eng_grps);
-int otx2_cpt_create_eng_grps(struct pci_dev *pdev,
+int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
 			     struct otx2_cpt_eng_grps *eng_grps);
 int otx2_cpt_disable_all_cores(struct otx2_cptpf_dev *cptpf);
 int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c
index d5c1c1b7c7e4b..811ded72ce5fb 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c
@@ -320,7 +320,7 @@ static int process_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
 			     cpt_req->dlen, false);
 
 	/* Send CPT command */
-	otx2_cpt_send_cmd(&cptinst, 1, lf);
+	lf->lfs->ops->send_cmd(&cptinst, 1, lf);
 
 	/*
 	 * We allocate and prepare pending queue entry in critical section
@@ -349,13 +349,14 @@ int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
 			       &lfs->lf[cpu_num]);
 }
 
-static int cpt_process_ccode(struct pci_dev *pdev,
+static int cpt_process_ccode(struct otx2_cptlfs_info *lfs,
 			     union otx2_cpt_res_s *cpt_status,
 			     struct otx2_cpt_inst_info *info,
 			     u32 *res_code)
 {
-	u8 uc_ccode = cpt_status->s.uc_compcode;
-	u8 ccode = cpt_status->s.compcode;
+	u8 uc_ccode = lfs->ops->cpt_get_uc_compcode(cpt_status);
+	u8 ccode = lfs->ops->cpt_get_compcode(cpt_status);
+	struct pci_dev *pdev = lfs->pdev;
 
 	switch (ccode) {
 	case OTX2_CPT_COMP_E_FAULT:
@@ -389,6 +390,7 @@ static int cpt_process_ccode(struct pci_dev *pdev,
 		return 1;
 
 	case OTX2_CPT_COMP_E_GOOD:
+	case OTX2_CPT_COMP_E_WARN:
 		/*
 		 * Check microcode completion code, it is only valid
 		 * when completion code is CPT_COMP_E::GOOD
@@ -426,7 +428,7 @@ static int cpt_process_ccode(struct pci_dev *pdev,
 	return 0;
 }
 
-static inline void process_pending_queue(struct pci_dev *pdev,
+static inline void process_pending_queue(struct otx2_cptlfs_info *lfs,
 					 struct otx2_cpt_pending_queue *pqueue)
 {
 	struct otx2_cpt_pending_entry *resume_pentry = NULL;
@@ -436,6 +438,7 @@ static inline void process_pending_queue(struct pci_dev *pdev,
 	struct otx2_cpt_inst_info *info = NULL;
 	struct otx2_cpt_req_info *req = NULL;
 	struct crypto_async_request *areq;
+	struct pci_dev *pdev = lfs->pdev;
 	u32 res_code, resume_index;
 
 	while (1) {
@@ -476,7 +479,7 @@ static inline void process_pending_queue(struct pci_dev *pdev,
 			goto process_pentry;
 		}
 
-		if (cpt_process_ccode(pdev, cpt_status, info, &res_code)) {
+		if (cpt_process_ccode(lfs, cpt_status, info, &res_code)) {
 			spin_unlock_bh(&pqueue->lock);
 			return;
 		}
@@ -529,7 +532,7 @@ process_pentry:
 
 void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe)
 {
-	process_pending_queue(wqe->lfs->pdev,
+	process_pending_queue(wqe->lfs,
 			      &wqe->lfs->lf[wqe->lf_num].pqueue);
 }
 
-- 
GitLab


From 76c1f4e0efd8abeaa3c7789d10ef9c82d950bedd Mon Sep 17 00:00:00 2001
From: Srujana Challa <schalla@marvell.com>
Date: Tue, 25 May 2021 16:57:18 +0530
Subject: [PATCH 2142/3804] crypto: octeontx2 - enable and handle ME interrupts

Adds master enable (ME) interrupt handler in PF. Upon
receiving ME interrupt for a VF, PF clears it's transaction
pending bit.

Signed-off-by: Srujana Challa <schalla@marvell.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../marvell/octeontx2/otx2_cptpf_main.c       | 118 ++++++++++++++----
 1 file changed, 95 insertions(+), 23 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index 1fb04f9bb7ac4..146a55ac4b9b0 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -63,45 +63,66 @@ static void cptpf_disable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf,
 	}
 }
 
-static void cptpf_enable_vf_flr_intrs(struct otx2_cptpf_dev *cptpf)
+static void cptpf_enable_vf_flr_me_intrs(struct otx2_cptpf_dev *cptpf,
+					 int num_vfs)
 {
-	/* Clear interrupt if any */
+	/* Clear FLR interrupt if any */
 	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(0),
-			~0x0ULL);
-	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1),
-			~0x0ULL);
+			 INTR_MASK(num_vfs));
 
 	/* Enable VF FLR interrupts */
 	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
-			 RVU_PF_VFFLR_INT_ENA_W1SX(0), ~0x0ULL);
+			 RVU_PF_VFFLR_INT_ENA_W1SX(0), INTR_MASK(num_vfs));
+	/* Clear ME interrupt if any */
+	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFME_INTX(0),
+			 INTR_MASK(num_vfs));
+	/* Enable VF ME interrupts */
+	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+			 RVU_PF_VFME_INT_ENA_W1SX(0), INTR_MASK(num_vfs));
+
+	if (num_vfs <= 64)
+		return;
+
+	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1),
+			 INTR_MASK(num_vfs - 64));
 	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
-			 RVU_PF_VFFLR_INT_ENA_W1SX(1), ~0x0ULL);
+			 RVU_PF_VFFLR_INT_ENA_W1SX(1), INTR_MASK(num_vfs - 64));
+
+	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFME_INTX(1),
+			 INTR_MASK(num_vfs - 64));
+	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+			 RVU_PF_VFME_INT_ENA_W1SX(1), INTR_MASK(num_vfs - 64));
 }
 
-static void cptpf_disable_vf_flr_intrs(struct otx2_cptpf_dev *cptpf,
+static void cptpf_disable_vf_flr_me_intrs(struct otx2_cptpf_dev *cptpf,
 				       int num_vfs)
 {
 	int vector;
 
 	/* Disable VF FLR interrupts */
 	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
-			 RVU_PF_VFFLR_INT_ENA_W1CX(0), ~0x0ULL);
+			 RVU_PF_VFFLR_INT_ENA_W1CX(0), INTR_MASK(num_vfs));
+	vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR0);
+	free_irq(vector, cptpf);
+
+	/* Disable VF ME interrupts */
 	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
-			 RVU_PF_VFFLR_INT_ENA_W1CX(1), ~0x0ULL);
+			 RVU_PF_VFME_INT_ENA_W1CX(0), INTR_MASK(num_vfs));
+	vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFME0);
+	free_irq(vector, cptpf);
 
-	/* Clear interrupt if any */
-	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(0),
-			 ~0x0ULL);
-	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1),
-			 ~0x0ULL);
+	if (num_vfs <= 64)
+		return;
 
-	vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR0);
+	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+			 RVU_PF_VFFLR_INT_ENA_W1CX(1), INTR_MASK(num_vfs - 64));
+	vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR1);
 	free_irq(vector, cptpf);
 
-	if (num_vfs > 64) {
-		vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR1);
-		free_irq(vector, cptpf);
-	}
+	otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+			 RVU_PF_VFME_INT_ENA_W1CX(1), INTR_MASK(num_vfs - 64));
+	vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFME1);
+	free_irq(vector, cptpf);
 }
 
 static void cptpf_flr_wq_handler(struct work_struct *work)
@@ -173,11 +194,38 @@ static irqreturn_t cptpf_vf_flr_intr(int __always_unused irq, void *arg)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t cptpf_vf_me_intr(int __always_unused irq, void *arg)
+{
+	struct otx2_cptpf_dev *cptpf = arg;
+	int reg, vf, num_reg = 1;
+	u64 intr;
+
+	if (cptpf->max_vfs > 64)
+		num_reg = 2;
+
+	for (reg = 0; reg < num_reg; reg++) {
+		intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0,
+				       RVU_PF_VFME_INTX(reg));
+		if (!intr)
+			continue;
+		for (vf = 0; vf < 64; vf++) {
+			if (!(intr & BIT_ULL(vf)))
+				continue;
+			otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+					 RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
+			/* Clear interrupt */
+			otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+					 RVU_PF_VFME_INTX(reg), BIT_ULL(vf));
+		}
+	}
+	return IRQ_HANDLED;
+}
+
 static void cptpf_unregister_vfpf_intr(struct otx2_cptpf_dev *cptpf,
 				       int num_vfs)
 {
 	cptpf_disable_vfpf_mbox_intr(cptpf, num_vfs);
-	cptpf_disable_vf_flr_intrs(cptpf, num_vfs);
+	cptpf_disable_vf_flr_me_intrs(cptpf, num_vfs);
 }
 
 static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs)
@@ -203,6 +251,15 @@ static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs)
 			"IRQ registration failed for VFFLR0 irq\n");
 		goto free_mbox0_irq;
 	}
+	vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME0);
+	/* Register VF ME interrupt handler */
+	ret = request_irq(vector, cptpf_vf_me_intr, 0, "CPTPF ME0", cptpf);
+	if (ret) {
+		dev_err(dev,
+			"IRQ registration failed for PFVF mbox0 irq\n");
+		goto free_flr0_irq;
+	}
+
 	if (num_vfs > 64) {
 		vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1);
 		ret = request_irq(vector, otx2_cptpf_vfpf_mbox_intr, 0,
@@ -210,7 +267,7 @@ static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs)
 		if (ret) {
 			dev_err(dev,
 				"IRQ registration failed for PFVF mbox1 irq\n");
-			goto free_flr0_irq;
+			goto free_me0_irq;
 		}
 		vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR1);
 		/* Register VF FLR interrupt handler */
@@ -221,15 +278,30 @@ static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs)
 				"IRQ registration failed for VFFLR1 irq\n");
 			goto free_mbox1_irq;
 		}
+		vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME1);
+		/* Register VF FLR interrupt handler */
+		ret = request_irq(vector, cptpf_vf_me_intr, 0, "CPTPF ME1",
+				  cptpf);
+		if (ret) {
+			dev_err(dev,
+				"IRQ registration failed for VFFLR1 irq\n");
+			goto free_flr1_irq;
+		}
 	}
 	cptpf_enable_vfpf_mbox_intr(cptpf, num_vfs);
-	cptpf_enable_vf_flr_intrs(cptpf);
+	cptpf_enable_vf_flr_me_intrs(cptpf, num_vfs);
 
 	return 0;
 
+free_flr1_irq:
+	vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR1);
+	free_irq(vector, cptpf);
 free_mbox1_irq:
 	vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1);
 	free_irq(vector, cptpf);
+free_me0_irq:
+	vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME0);
+	free_irq(vector, cptpf);
 free_flr0_irq:
 	vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR0);
 	free_irq(vector, cptpf);
-- 
GitLab


From d5c1477b2f39173a988c01694d9bfafc771fa6ef Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 28 May 2021 18:26:13 +0800
Subject: [PATCH 2143/3804] crypto: hisilicon/sec - add new type of SQE

Add new type of sqe for Kunpeng930, which is the next generation
of SEC accelerator hardware. The hardware adds a new SQE data
structure.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec.h        |   5 +-
 drivers/crypto/hisilicon/sec2/sec_crypto.h | 174 +++++++++++++++++++++
 2 files changed, 178 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index dfdce2f21e658..28679cf4e4683 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -38,7 +38,10 @@ struct sec_aead_req {
 
 /* SEC request of Crypto */
 struct sec_req {
-	struct sec_sqe sec_sqe;
+	union {
+		struct sec_sqe sec_sqe;
+		struct sec_sqe3 sec_sqe3;
+	};
 	struct sec_ctx *ctx;
 	struct sec_qp_ctx *qp_ctx;
 
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
index 9c78edac56a4b..3b64e1705479f 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -44,6 +44,7 @@ enum sec_ckey_type {
 enum sec_bd_type {
 	SEC_BD_TYPE1 = 0x1,
 	SEC_BD_TYPE2 = 0x2,
+	SEC_BD_TYPE3 = 0x3,
 };
 
 enum sec_auth {
@@ -63,6 +64,16 @@ enum sec_addr_type {
 	SEC_PRP  = 0x2,
 };
 
+enum {
+	AUTHPAD_PAD,
+	AUTHPAD_NOPAD,
+};
+
+enum {
+	AIGEN_GEN,
+	AIGEN_NOGEN,
+};
+
 struct sec_sqe_type2 {
 	/*
 	 * mac_len: 0~4 bits
@@ -209,6 +220,169 @@ struct sec_sqe {
 	struct sec_sqe_type2 type2;
 };
 
+struct bd3_auth_ivin {
+	__le64 a_ivin_addr;
+	__le32 rsvd0;
+	__le32 rsvd1;
+} __packed __aligned(4);
+
+struct bd3_skip_data {
+	__le32 rsvd0;
+
+	/*
+	 * gran_num: 0~15 bits
+	 * reserved: 16~31 bits
+	 */
+	__le32 gran_num;
+
+	/*
+	 * src_skip_data_len: 0~24 bits
+	 * reserved: 25~31 bits
+	 */
+	__le32 src_skip_data_len;
+
+	/*
+	 * dst_skip_data_len: 0~24 bits
+	 * reserved: 25~31 bits
+	 */
+	__le32 dst_skip_data_len;
+};
+
+struct bd3_stream_scene {
+	__le64 c_ivin_addr;
+	__le64 long_a_data_len;
+
+	/*
+	 * auth_pad: 0~1 bits
+	 * stream_protocol: 2~4 bits
+	 * reserved: 5~7 bits
+	 */
+	__u8 stream_auth_pad;
+	__u8 plaintext_type;
+	__le16 pad_len_1p3;
+} __packed __aligned(4);
+
+struct bd3_no_scene {
+	__le64 c_ivin_addr;
+	__le32 rsvd0;
+	__le32 rsvd1;
+	__le32 rsvd2;
+} __packed __aligned(4);
+
+struct bd3_check_sum {
+	__u8 rsvd0;
+	__u8 hac_sva_status;
+	__le16 check_sum_i;
+};
+
+struct bd3_tls_type_back {
+	__u8 tls_1p3_type_back;
+	__u8 hac_sva_status;
+	__le16 pad_len_1p3_back;
+};
+
+struct sec_sqe3 {
+	/*
+	 * type: 0~3 bit
+	 * bd_invalid: 4 bit
+	 * scene: 5~8 bit
+	 * de: 9~10 bit
+	 * src_addr_type: 11~13 bit
+	 * dst_addr_type: 14~16 bit
+	 * mac_addr_type: 17~19 bit
+	 * reserved: 20~31 bits
+	 */
+	__le32 bd_param;
+
+	/*
+	 * cipher: 0~1 bits
+	 * ci_gen: 2~3 bit
+	 * c_icv_len: 4~9 bit
+	 * c_width: 10~12 bits
+	 * c_key_len: 13~15 bits
+	 */
+	__le16 c_icv_key;
+
+	/*
+	 * c_mode : 0~3 bits
+	 * c_alg : 4~7 bits
+	 */
+	__u8 c_mode_alg;
+
+	/*
+	 * nonce_len : 0~3 bits
+	 * huk : 4 bits
+	 * cal_iv_addr_en : 5 bits
+	 * seq : 6 bits
+	 * reserved : 7 bits
+	 */
+	__u8 huk_iv_seq;
+
+	__le64 tag;
+	__le64 data_src_addr;
+	__le64 a_key_addr;
+	union {
+		struct bd3_auth_ivin auth_ivin;
+		struct bd3_skip_data skip_data;
+	};
+
+	__le64 c_key_addr;
+
+	/*
+	 * auth: 0~1 bits
+	 * ai_gen: 2~3 bits
+	 * mac_len: 4~8 bits
+	 * akey_len: 9~14 bits
+	 * a_alg: 15~20 bits
+	 * key_sel: 21~24 bits
+	 * updata_key: 25 bits
+	 * reserved: 26~31 bits
+	 */
+	__le32 auth_mac_key;
+	__le32 salt;
+	__le16 auth_src_offset;
+	__le16 cipher_src_offset;
+
+	/*
+	 * auth_len: 0~23 bit
+	 * auth_key_offset: 24~31 bits
+	 */
+	__le32 a_len_key;
+
+	/*
+	 * cipher_len: 0~23 bit
+	 * auth_ivin_offset: 24~31 bits
+	 */
+	__le32 c_len_ivin;
+	__le64 data_dst_addr;
+	__le64 mac_addr;
+	union {
+		struct bd3_stream_scene stream_scene;
+		struct bd3_no_scene no_scene;
+	};
+
+	/*
+	 * done: 0 bit
+	 * icv: 1~3 bit
+	 * csc: 4~6 bit
+	 * flag: 7~10 bit
+	 * reserved: 11~15 bit
+	 */
+	__le16 done_flag;
+	__u8 error_type;
+	__u8 warning_type;
+	union {
+		__le32 mac_i;
+		__le32 kek_key_addr_l;
+	};
+	union {
+		__le32 kek_key_addr_h;
+		struct bd3_check_sum check_sum;
+		struct bd3_tls_type_back tls_type_back;
+	};
+	__le32 counter;
+} __packed __aligned(4);
+
 int sec_register_to_crypto(struct hisi_qm *qm);
 void sec_unregister_from_crypto(struct hisi_qm *qm);
 #endif
-- 
GitLab


From adc3f65a7806dda12894870731509b6778735319 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 28 May 2021 18:26:14 +0800
Subject: [PATCH 2144/3804] crypto: hisilicon/sec - driver adapt to new SQE

Due to Kunpeng930 adds new SQE data structure, the SEC driver needs
to be upgraded. It mainly includes bd parsing process and bd filling
process.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec.h        |   1 +
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 291 ++++++++++++++++++---
 drivers/crypto/hisilicon/sec2/sec_crypto.h |   7 +
 3 files changed, 256 insertions(+), 43 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 28679cf4e4683..14ba66da75855 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -140,6 +140,7 @@ struct sec_ctx {
 	bool pbuf_supported;
 	struct sec_cipher_ctx c_ctx;
 	struct sec_auth_ctx a_ctx;
+	u8 type_supported;
 	struct device *dev;
 };
 
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 133aede8bf078..f4b1c8cbb4d51 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -37,10 +37,22 @@
 #define SEC_AEAD_ALG_OFFSET     11
 #define SEC_AUTH_OFFSET		6
 
+#define SEC_DE_OFFSET_V3		9
+#define SEC_SCENE_OFFSET_V3	5
+#define SEC_CKEY_OFFSET_V3	13
+#define SEC_SRC_SGL_OFFSET_V3	11
+#define SEC_DST_SGL_OFFSET_V3	14
+#define SEC_CALG_OFFSET_V3	4
+#define SEC_AKEY_OFFSET_V3	9
+#define SEC_MAC_OFFSET_V3	4
+#define SEC_AUTH_ALG_OFFSET_V3	15
+#define SEC_CIPHER_AUTH_V3	0xbf
+#define SEC_AUTH_CIPHER_V3	0x40
 #define SEC_FLAG_OFFSET		7
 #define SEC_FLAG_MASK		0x0780
 #define SEC_TYPE_MASK		0x0F
 #define SEC_DONE_MASK		0x0001
+#define SEC_SQE_LEN_RATE_MASK	0x3
 
 #define SEC_TOTAL_IV_SZ		(SEC_IV_SIZE * QM_Q_DEPTH)
 #define SEC_SGL_SGE_NR		128
@@ -145,44 +157,90 @@ static int sec_aead_verify(struct sec_req *req)
 	return 0;
 }
 
+static u8 pre_parse_finished_bd(struct bd_status *status, void *resp)
+{
+	struct sec_sqe *bd = resp;
+
+	status->done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK;
+	status->flag = (le16_to_cpu(bd->type2.done_flag) &
+					SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
+	status->tag = le16_to_cpu(bd->type2.tag);
+	status->err_type = bd->type2.error_type;
+
+	return bd->type_cipher_auth & SEC_TYPE_MASK;
+}
+
+static u8 pre_parse_finished_bd3(struct bd_status *status, void *resp)
+{
+	struct sec_sqe3 *bd3 = resp;
+
+	status->done = le16_to_cpu(bd3->done_flag) & SEC_DONE_MASK;
+	status->flag = (le16_to_cpu(bd3->done_flag) &
+					SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
+	status->tag = le64_to_cpu(bd3->tag);
+	status->err_type = bd3->error_type;
+
+	return le32_to_cpu(bd3->bd_param) & SEC_TYPE_MASK;
+}
+
+static int sec_cb_status_check(struct sec_req *req,
+			       struct bd_status *status)
+{
+	struct sec_ctx *ctx = req->ctx;
+
+	if (unlikely(req->err_type || status->done != SEC_SQE_DONE)) {
+		dev_err_ratelimited(ctx->dev, "err_type[%d], done[%u]\n",
+				    req->err_type, status->done);
+		return -EIO;
+	}
+
+	if (unlikely(ctx->alg_type == SEC_SKCIPHER)) {
+		if (unlikely(status->flag != SEC_SQE_CFLAG)) {
+			dev_err_ratelimited(ctx->dev, "flag[%u]\n",
+					    status->flag);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
 static void sec_req_cb(struct hisi_qp *qp, void *resp)
 {
 	struct sec_qp_ctx *qp_ctx = qp->qp_ctx;
 	struct sec_dfx *dfx = &qp_ctx->ctx->sec->debug.dfx;
-	struct sec_sqe *bd = resp;
+	u8 type_supported = qp_ctx->ctx->type_supported;
+	struct bd_status status;
 	struct sec_ctx *ctx;
 	struct sec_req *req;
-	u16 done, flag;
-	int err = 0;
+	int err;
 	u8 type;
 
-	type = bd->type_cipher_auth & SEC_TYPE_MASK;
-	if (unlikely(type != SEC_BD_TYPE2)) {
+	if (type_supported == SEC_BD_TYPE2) {
+		type = pre_parse_finished_bd(&status, resp);
+		req = qp_ctx->req_list[status.tag];
+	} else {
+		type = pre_parse_finished_bd3(&status, resp);
+		req = (void *)(uintptr_t)status.tag;
+	}
+
+	if (unlikely(type != type_supported)) {
 		atomic64_inc(&dfx->err_bd_cnt);
 		pr_err("err bd type [%d]\n", type);
 		return;
 	}
 
-	req = qp_ctx->req_list[le16_to_cpu(bd->type2.tag)];
 	if (unlikely(!req)) {
 		atomic64_inc(&dfx->invalid_req_cnt);
 		atomic_inc(&qp->qp_status.used);
 		return;
 	}
-	req->err_type = bd->type2.error_type;
+
+	req->err_type = status.err_type;
 	ctx = req->ctx;
-	done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK;
-	flag = (le16_to_cpu(bd->type2.done_flag) &
-		SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
-	if (unlikely(req->err_type || done != SEC_SQE_DONE ||
-	    (ctx->alg_type == SEC_SKCIPHER && flag != SEC_SQE_CFLAG) ||
-	    (ctx->alg_type == SEC_AEAD && flag != SEC_SQE_AEAD_FLAG))) {
-		dev_err_ratelimited(ctx->dev,
-			"err_type[%d],done[%d],flag[%d]\n",
-			req->err_type, done, flag);
-		err = -EIO;
+	err = sec_cb_status_check(req, &status);
+	if (err)
 		atomic64_inc(&dfx->done_flag_cnt);
-	}
 
 	if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
 		err = sec_aead_verify(req);
@@ -382,10 +440,11 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
 	qp = ctx->qps[qp_ctx_id];
 	qp->req_type = 0;
 	qp->qp_ctx = qp_ctx;
-	qp->req_cb = sec_req_cb;
 	qp_ctx->qp = qp;
 	qp_ctx->ctx = ctx;
 
+	qp->req_cb = sec_req_cb;
+
 	mutex_init(&qp_ctx->req_lock);
 	idr_init(&qp_ctx->req_idr);
 	INIT_LIST_HEAD(&qp_ctx->backlog);
@@ -615,19 +674,25 @@ static int sec_skcipher_aes_sm4_setkey(struct sec_cipher_ctx *c_ctx,
 			return -EINVAL;
 		}
 	} else {
-		switch (keylen) {
-		case AES_KEYSIZE_128:
-			c_ctx->c_key_len = SEC_CKEY_128BIT;
-			break;
-		case AES_KEYSIZE_192:
-			c_ctx->c_key_len = SEC_CKEY_192BIT;
-			break;
-		case AES_KEYSIZE_256:
-			c_ctx->c_key_len = SEC_CKEY_256BIT;
-			break;
-		default:
-			pr_err("hisi_sec2: aes key error!\n");
+		if (c_ctx->c_alg == SEC_CALG_SM4 &&
+		    keylen != AES_KEYSIZE_128) {
+			pr_err("hisi_sec2: sm4 key error!\n");
 			return -EINVAL;
+		} else {
+			switch (keylen) {
+			case AES_KEYSIZE_128:
+				c_ctx->c_key_len = SEC_CKEY_128BIT;
+				break;
+			case AES_KEYSIZE_192:
+				c_ctx->c_key_len = SEC_CKEY_192BIT;
+				break;
+			case AES_KEYSIZE_256:
+				c_ctx->c_key_len = SEC_CKEY_256BIT;
+				break;
+			default:
+				pr_err("hisi_sec2: aes key error!\n");
+				return -EINVAL;
+			}
 		}
 	}
 
@@ -915,6 +980,12 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 		goto bad_key;
 	}
 
+	if ((ctx->a_ctx.mac_len & SEC_SQE_LEN_RATE_MASK)  ||
+	    (ctx->a_ctx.a_key_len & SEC_SQE_LEN_RATE_MASK)) {
+		dev_err(dev, "MAC or AUTH key length error!\n");
+		goto bad_key;
+	}
+
 	return 0;
 
 bad_key:
@@ -1014,29 +1085,75 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 		cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
 	sec_sqe->type_cipher_auth = bd_type | cipher;
 
-	if (req->use_pbuf)
+	/* Set destination and source address type */
+	if (req->use_pbuf) {
 		sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
-	else
+		da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
+	} else {
 		sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
+		da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
+	}
+
+	sec_sqe->sdm_addr_type |= da_type;
 	scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
 	if (c_req->c_in_dma != c_req->c_out_dma)
 		de = 0x1 << SEC_DE_OFFSET;
 
 	sec_sqe->sds_sa_type = (de | scene | sa_type);
 
-	/* Just set DST address type */
-	if (req->use_pbuf)
-		da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
-	else
-		da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
-	sec_sqe->sdm_addr_type |= da_type;
-
 	sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
 	sec_sqe->type2.tag = cpu_to_le16((u16)req->req_id);
 
 	return 0;
 }
 
+static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct sec_sqe3 *sec_sqe3 = &req->sec_sqe3;
+	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+	struct sec_cipher_req *c_req = &req->c_req;
+	u32 bd_param = 0;
+	u16 cipher;
+
+	memset(sec_sqe3, 0, sizeof(struct sec_sqe3));
+
+	sec_sqe3->c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
+	sec_sqe3->no_scene.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
+	sec_sqe3->data_src_addr = cpu_to_le64(c_req->c_in_dma);
+	sec_sqe3->data_dst_addr = cpu_to_le64(c_req->c_out_dma);
+
+	sec_sqe3->c_mode_alg = ((u8)c_ctx->c_alg << SEC_CALG_OFFSET_V3) |
+						c_ctx->c_mode;
+	sec_sqe3->c_icv_key |= cpu_to_le16(((u16)c_ctx->c_key_len) <<
+						SEC_CKEY_OFFSET_V3);
+
+	if (c_req->encrypt)
+		cipher = SEC_CIPHER_ENC;
+	else
+		cipher = SEC_CIPHER_DEC;
+	sec_sqe3->c_icv_key |= cpu_to_le16(cipher);
+
+	if (req->use_pbuf) {
+		bd_param |= SEC_PBUF << SEC_SRC_SGL_OFFSET_V3;
+		bd_param |= SEC_PBUF << SEC_DST_SGL_OFFSET_V3;
+	} else {
+		bd_param |= SEC_SGL << SEC_SRC_SGL_OFFSET_V3;
+		bd_param |= SEC_SGL << SEC_DST_SGL_OFFSET_V3;
+	}
+
+	bd_param |= SEC_COMM_SCENE << SEC_SCENE_OFFSET_V3;
+	if (c_req->c_in_dma != c_req->c_out_dma)
+		bd_param |= 0x1 << SEC_DE_OFFSET_V3;
+
+	bd_param |= SEC_BD_TYPE3;
+	sec_sqe3->bd_param = cpu_to_le32(bd_param);
+
+	sec_sqe3->c_len_ivin |= cpu_to_le32(c_req->c_len);
+	sec_sqe3->tag = cpu_to_le64(req);
+
+	return 0;
+}
+
 static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type)
 {
 	struct aead_request *aead_req = req->aead_req.aead_req;
@@ -1170,6 +1287,57 @@ static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 	return 0;
 }
 
+static void sec_auth_bd_fill_ex_v3(struct sec_auth_ctx *ctx, int dir,
+				   struct sec_req *req, struct sec_sqe3 *sqe3)
+{
+	struct sec_aead_req *a_req = &req->aead_req;
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct aead_request *aq = a_req->aead_req;
+
+	sqe3->a_key_addr = cpu_to_le64(ctx->a_key_dma);
+
+	sqe3->auth_mac_key |=
+			cpu_to_le32((u32)(ctx->mac_len /
+			SEC_SQE_LEN_RATE) << SEC_MAC_OFFSET_V3);
+
+	sqe3->auth_mac_key |=
+			cpu_to_le32((u32)(ctx->a_key_len /
+			SEC_SQE_LEN_RATE) << SEC_AKEY_OFFSET_V3);
+
+	sqe3->auth_mac_key |=
+			cpu_to_le32((u32)(ctx->a_alg) << SEC_AUTH_ALG_OFFSET_V3);
+
+	if (dir) {
+		sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
+		sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3;
+	} else {
+		sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
+		sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3;
+	}
+	sqe3->a_len_key = cpu_to_le32(c_req->c_len + aq->assoclen);
+
+	sqe3->cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
+
+	sqe3->mac_addr = cpu_to_le64(a_req->out_mac_dma);
+}
+
+static int sec_aead_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct sec_auth_ctx *auth_ctx = &ctx->a_ctx;
+	struct sec_sqe3 *sec_sqe3 = &req->sec_sqe3;
+	int ret;
+
+	ret = sec_skcipher_bd_fill_v3(ctx, req);
+	if (unlikely(ret)) {
+		dev_err(ctx->dev, "skcipher bd3 fill is error!\n");
+		return ret;
+	}
+
+	sec_auth_bd_fill_ex_v3(auth_ctx, req->c_req.encrypt, req, sec_sqe3);
+
+	return 0;
+}
+
 static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err)
 {
 	struct aead_request *a_req = req->aead_req.aead_req;
@@ -1303,13 +1471,44 @@ static const struct sec_req_op sec_aead_req_ops = {
 	.process	= sec_process,
 };
 
+static const struct sec_req_op sec_skcipher_req_ops_v3 = {
+	.buf_map	= sec_skcipher_sgl_map,
+	.buf_unmap	= sec_skcipher_sgl_unmap,
+	.do_transfer	= sec_skcipher_copy_iv,
+	.bd_fill	= sec_skcipher_bd_fill_v3,
+	.bd_send	= sec_bd_send,
+	.callback	= sec_skcipher_callback,
+	.process	= sec_process,
+};
+
+static const struct sec_req_op sec_aead_req_ops_v3 = {
+	.buf_map	= sec_aead_sgl_map,
+	.buf_unmap	= sec_aead_sgl_unmap,
+	.do_transfer	= sec_aead_copy_iv,
+	.bd_fill	= sec_aead_bd_fill_v3,
+	.bd_send	= sec_bd_send,
+	.callback	= sec_aead_callback,
+	.process	= sec_process,
+};
+
 static int sec_skcipher_ctx_init(struct crypto_skcipher *tfm)
 {
 	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int ret;
 
-	ctx->req_op = &sec_skcipher_req_ops;
+	ret = sec_skcipher_init(tfm);
+	if (ret)
+		return ret;
+
+	if (ctx->sec->qm.ver < QM_HW_V3) {
+		ctx->type_supported = SEC_BD_TYPE2;
+		ctx->req_op = &sec_skcipher_req_ops;
+	} else {
+		ctx->type_supported = SEC_BD_TYPE3;
+		ctx->req_op = &sec_skcipher_req_ops_v3;
+	}
 
-	return sec_skcipher_init(tfm);
+	return ret;
 }
 
 static void sec_skcipher_ctx_exit(struct crypto_skcipher *tfm)
@@ -1330,10 +1529,16 @@ static int sec_aead_init(struct crypto_aead *tfm)
 		return -EINVAL;
 	}
 
-	ctx->req_op = &sec_aead_req_ops;
 	ret = sec_ctx_base_init(ctx);
 	if (ret)
 		return ret;
+	if (ctx->sec->qm.ver < QM_HW_V3) {
+		ctx->type_supported = SEC_BD_TYPE2;
+		ctx->req_op = &sec_aead_req_ops;
+	} else {
+		ctx->type_supported = SEC_BD_TYPE3;
+		ctx->req_op = &sec_aead_req_ops_v3;
+	}
 
 	ret = sec_auth_init(ctx);
 	if (ret)
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
index 3b64e1705479f..163e8134bb3d5 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -64,6 +64,13 @@ enum sec_addr_type {
 	SEC_PRP  = 0x2,
 };
 
+struct bd_status {
+	u64 tag;
+	u8 done;
+	u8 err_type;
+	u16 flag;
+};
+
 enum {
 	AUTHPAD_PAD,
 	AUTHPAD_NOPAD,
-- 
GitLab


From 7b44c0eecd6ade576bfb7a104dcdae5580237420 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 28 May 2021 19:42:04 +0800
Subject: [PATCH 2145/3804] crypto: hisilicon/sec - add new skcipher mode for
 SEC

Add new skcipher algorithms for Kunpeng930 SEC:
OFB(AES), CFB(AES), CTR(AES),
OFB(SM4), CFB(SM4), CTR(SM4).

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 103 ++++++++++++++++++---
 drivers/crypto/hisilicon/sec2/sec_crypto.h |   3 +
 2 files changed, 94 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index f4b1c8cbb4d51..f4b77d0ce8f3e 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -78,6 +78,9 @@
 #define SEC_SQE_CFLAG		2
 #define SEC_SQE_AEAD_FLAG	3
 #define SEC_SQE_DONE		0x1
+#define MAX_INPUT_DATA_LEN	0xFFFE00
+#define BITS_MASK		0xFF
+#define BYTE_BITS		0x8
 
 /* Get an en/de-cipher queue cyclically to balance load over queues of TFM */
 static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
@@ -751,12 +754,16 @@ static int sec_setkey_##name(struct crypto_skcipher *tfm, const u8 *key,\
 GEN_SEC_SETKEY_FUNC(aes_ecb, SEC_CALG_AES, SEC_CMODE_ECB)
 GEN_SEC_SETKEY_FUNC(aes_cbc, SEC_CALG_AES, SEC_CMODE_CBC)
 GEN_SEC_SETKEY_FUNC(aes_xts, SEC_CALG_AES, SEC_CMODE_XTS)
-
+GEN_SEC_SETKEY_FUNC(aes_ofb, SEC_CALG_AES, SEC_CMODE_OFB)
+GEN_SEC_SETKEY_FUNC(aes_cfb, SEC_CALG_AES, SEC_CMODE_CFB)
+GEN_SEC_SETKEY_FUNC(aes_ctr, SEC_CALG_AES, SEC_CMODE_CTR)
 GEN_SEC_SETKEY_FUNC(3des_ecb, SEC_CALG_3DES, SEC_CMODE_ECB)
 GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
-
 GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
 GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
+GEN_SEC_SETKEY_FUNC(sm4_ofb, SEC_CALG_SM4, SEC_CMODE_OFB)
+GEN_SEC_SETKEY_FUNC(sm4_cfb, SEC_CALG_SM4, SEC_CMODE_CFB)
+GEN_SEC_SETKEY_FUNC(sm4_ctr, SEC_CALG_SM4, SEC_CMODE_CTR)
 
 static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
 			struct scatterlist *src)
@@ -1154,6 +1161,17 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
 	return 0;
 }
 
+/* increment counter (128-bit int) */
+static void ctr_iv_inc(__u8 *counter, __u8 bits, __u32 nums)
+{
+	do {
+		--bits;
+		nums += counter[bits];
+		counter[bits] = nums & BITS_MASK;
+		nums >>= BYTE_BITS;
+	} while (bits && nums);
+}
+
 static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type)
 {
 	struct aead_request *aead_req = req->aead_req.aead_req;
@@ -1177,10 +1195,17 @@ static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type)
 		cryptlen = aead_req->cryptlen;
 	}
 
-	sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), iv, iv_size,
-				cryptlen - iv_size);
-	if (unlikely(sz != iv_size))
-		dev_err(req->ctx->dev, "copy output iv error!\n");
+	if (req->ctx->c_ctx.c_mode == SEC_CMODE_CBC) {
+		sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), iv, iv_size,
+					cryptlen - iv_size);
+		if (unlikely(sz != iv_size))
+			dev_err(req->ctx->dev, "copy output iv error!\n");
+	} else {
+		sz = cryptlen / iv_size;
+		if (cryptlen % iv_size)
+			sz += 1;
+		ctr_iv_inc(iv, iv_size, sz);
+	}
 }
 
 static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx,
@@ -1211,8 +1236,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
 
 	sec_free_req_id(req);
 
-	/* IV output at encrypto of CBC mode */
-	if (!err && ctx->c_ctx.c_mode == SEC_CMODE_CBC && req->c_req.encrypt)
+	/* IV output at encrypto of CBC/CTR mode */
+	if (!err && (ctx->c_ctx.c_mode == SEC_CMODE_CBC ||
+	    ctx->c_ctx.c_mode == SEC_CMODE_CTR) && req->c_req.encrypt)
 		sec_update_iv(req, SEC_SKCIPHER);
 
 	while (1) {
@@ -1422,7 +1448,8 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
 		goto err_uninit_req;
 
 	/* Output IV as decrypto */
-	if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt)
+	if (!req->c_req.encrypt && (ctx->c_ctx.c_mode == SEC_CMODE_CBC ||
+	    ctx->c_ctx.c_mode == SEC_CMODE_CTR))
 		sec_update_iv(req, ctx->alg_type);
 
 	ret = ctx->req_op->bd_send(ctx, req);
@@ -1634,6 +1661,14 @@ static int sec_skcipher_cryptlen_ckeck(struct sec_ctx *ctx,
 			ret = -EINVAL;
 		}
 		break;
+	case SEC_CMODE_CFB:
+	case SEC_CMODE_OFB:
+	case SEC_CMODE_CTR:
+		if (unlikely(ctx->sec->qm.ver < QM_HW_V3)) {
+			dev_err(dev, "skcipher HW version error!\n");
+			ret = -EINVAL;
+		}
+		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -1647,7 +1682,8 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 	struct device *dev = ctx->dev;
 	u8 c_alg = ctx->c_ctx.c_alg;
 
-	if (unlikely(!sk_req->src || !sk_req->dst)) {
+	if (unlikely(!sk_req->src || !sk_req->dst ||
+		     sk_req->cryptlen > MAX_INPUT_DATA_LEN)) {
 		dev_err(dev, "skcipher input param error!\n");
 		return -EINVAL;
 	}
@@ -1762,6 +1798,32 @@ static struct skcipher_alg sec_skciphers[] = {
 			 AES_BLOCK_SIZE, AES_BLOCK_SIZE)
 };
 
+static struct skcipher_alg sec_skciphers_v3[] = {
+	SEC_SKCIPHER_ALG("ofb(aes)", sec_setkey_aes_ofb,
+			 AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE,
+			 SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+	SEC_SKCIPHER_ALG("cfb(aes)", sec_setkey_aes_cfb,
+			 AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE,
+			 SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+	SEC_SKCIPHER_ALG("ctr(aes)", sec_setkey_aes_ctr,
+			 AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE,
+			 SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+	SEC_SKCIPHER_ALG("ofb(sm4)", sec_setkey_sm4_ofb,
+			 AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE,
+			 SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+	SEC_SKCIPHER_ALG("cfb(sm4)", sec_setkey_sm4_cfb,
+			 AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE,
+			 SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+	SEC_SKCIPHER_ALG("ctr(sm4)", sec_setkey_sm4_ctr,
+			 AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE,
+			 SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+};
+
 static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 {
 	struct aead_request *req = sreq->aead_req.aead_req;
@@ -1878,15 +1940,32 @@ int sec_register_to_crypto(struct hisi_qm *qm)
 	if (ret)
 		return ret;
 
+	if (qm->ver > QM_HW_V2) {
+		ret = crypto_register_skciphers(sec_skciphers_v3,
+						ARRAY_SIZE(sec_skciphers_v3));
+		if (ret)
+			goto reg_skcipher_fail;
+	}
 	ret = crypto_register_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
 	if (ret)
-		crypto_unregister_skciphers(sec_skciphers,
-					    ARRAY_SIZE(sec_skciphers));
+		goto reg_aead_fail;
+	return ret;
+
+reg_aead_fail:
+	if (qm->ver > QM_HW_V2)
+		crypto_unregister_skciphers(sec_skciphers_v3,
+					    ARRAY_SIZE(sec_skciphers_v3));
+reg_skcipher_fail:
+	crypto_unregister_skciphers(sec_skciphers,
+				    ARRAY_SIZE(sec_skciphers));
 	return ret;
 }
 
 void sec_unregister_from_crypto(struct hisi_qm *qm)
 {
+	if (qm->ver > QM_HW_V2)
+		crypto_unregister_skciphers(sec_skciphers_v3,
+					    ARRAY_SIZE(sec_skciphers_v3));
 	crypto_unregister_skciphers(sec_skciphers,
 				    ARRAY_SIZE(sec_skciphers));
 	crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
index 163e8134bb3d5..c9bfe75d32e36 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -7,6 +7,7 @@
 #define SEC_IV_SIZE		24
 #define SEC_MAX_KEY_SIZE	64
 #define SEC_COMM_SCENE		0
+#define SEC_MIN_BLOCK_SZ	1
 
 enum sec_calg {
 	SEC_CALG_3DES = 0x1,
@@ -29,6 +30,8 @@ enum sec_mac_len {
 enum sec_cmode {
 	SEC_CMODE_ECB    = 0x0,
 	SEC_CMODE_CBC    = 0x1,
+	SEC_CMODE_CFB    = 0x2,
+	SEC_CMODE_OFB    = 0x3,
 	SEC_CMODE_CTR    = 0x4,
 	SEC_CMODE_XTS    = 0x7,
 };
-- 
GitLab


From 5652d55a76f6f59f0c1cfc7b90050742738cd227 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 28 May 2021 19:42:05 +0800
Subject: [PATCH 2146/3804] crypto: hisilicon/sec - add fallback tfm supporting
 for XTS mode

Add fallback tfm supporting for hisi_sec driver. Due to the hardware
not supports 192bit key length when using XTS mode. So the driver needs
to setting the soft fallback skcipher tfm for user.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec.h        |  4 +
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 85 +++++++++++++++++++++-
 2 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 14ba66da75855..935d8d95dcb91 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -97,6 +97,10 @@ struct sec_cipher_ctx {
 	u8 c_mode;
 	u8 c_alg;
 	u8 c_key_len;
+
+	/* add software support */
+	bool fallback;
+	struct crypto_sync_skcipher *fbtfm;
 };
 
 /* SEC queue context which defines queue's relatives */
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index f4b77d0ce8f3e..b91cf2b33b987 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -21,6 +21,7 @@
 
 #define SEC_PRIORITY		4001
 #define SEC_XTS_MIN_KEY_SIZE	(2 * AES_MIN_KEY_SIZE)
+#define SEC_XTS_MID_KEY_SIZE	(3 * AES_MIN_KEY_SIZE)
 #define SEC_XTS_MAX_KEY_SIZE	(2 * AES_MAX_KEY_SIZE)
 #define SEC_DES3_2KEY_SIZE	(2 * DES_KEY_SIZE)
 #define SEC_DES3_3KEY_SIZE	(3 * DES_KEY_SIZE)
@@ -81,6 +82,7 @@
 #define MAX_INPUT_DATA_LEN	0xFFFE00
 #define BITS_MASK		0xFF
 #define BYTE_BITS		0x8
+#define SEC_XTS_NAME_SZ		0x3
 
 /* Get an en/de-cipher queue cyclically to balance load over queues of TFM */
 static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
@@ -598,6 +600,26 @@ static void sec_auth_uninit(struct sec_ctx *ctx)
 			  a_ctx->a_key, a_ctx->a_key_dma);
 }
 
+static int sec_skcipher_fbtfm_init(struct crypto_skcipher *tfm)
+{
+	const char *alg = crypto_tfm_alg_name(&tfm->base);
+	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+
+	c_ctx->fallback = false;
+	if (likely(strncmp(alg, "xts", SEC_XTS_NAME_SZ)))
+		return 0;
+
+	c_ctx->fbtfm = crypto_alloc_sync_skcipher(alg, 0,
+						  CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(c_ctx->fbtfm)) {
+		pr_err("failed to alloc fallback tfm!\n");
+		return PTR_ERR(c_ctx->fbtfm);
+	}
+
+	return 0;
+}
+
 static int sec_skcipher_init(struct crypto_skcipher *tfm)
 {
 	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -619,8 +641,14 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm)
 	if (ret)
 		goto err_cipher_init;
 
+	ret = sec_skcipher_fbtfm_init(tfm);
+	if (ret)
+		goto err_fbtfm_init;
+
 	return 0;
 
+err_fbtfm_init:
+	sec_cipher_uninit(ctx);
 err_cipher_init:
 	sec_ctx_base_uninit(ctx);
 	return ret;
@@ -630,6 +658,9 @@ static void sec_skcipher_uninit(struct crypto_skcipher *tfm)
 {
 	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
 
+	if (ctx->c_ctx.fbtfm)
+		crypto_free_sync_skcipher(ctx->c_ctx.fbtfm);
+
 	sec_cipher_uninit(ctx);
 	sec_ctx_base_uninit(ctx);
 }
@@ -669,6 +700,9 @@ static int sec_skcipher_aes_sm4_setkey(struct sec_cipher_ctx *c_ctx,
 		case SEC_XTS_MIN_KEY_SIZE:
 			c_ctx->c_key_len = SEC_CKEY_128BIT;
 			break;
+		case SEC_XTS_MID_KEY_SIZE:
+			c_ctx->fallback = true;
+			break;
 		case SEC_XTS_MAX_KEY_SIZE:
 			c_ctx->c_key_len = SEC_CKEY_256BIT;
 			break;
@@ -740,7 +774,13 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	}
 
 	memcpy(c_ctx->c_key, key, keylen);
-
+	if (c_ctx->fallback) {
+		ret = crypto_sync_skcipher_setkey(c_ctx->fbtfm, key, keylen);
+		if (ret) {
+			dev_err(dev, "failed to set fallback skcipher key!\n");
+			return ret;
+		}
+	}
 	return 0;
 }
 
@@ -1709,6 +1749,37 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 	return -EINVAL;
 }
 
+static int sec_skcipher_soft_crypto(struct sec_ctx *ctx,
+				    struct skcipher_request *sreq, bool encrypt)
+{
+	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+	struct device *dev = ctx->dev;
+	int ret;
+
+	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm);
+
+	if (!c_ctx->fbtfm) {
+		dev_err(dev, "failed to check fallback tfm\n");
+		return -EINVAL;
+	}
+
+	skcipher_request_set_sync_tfm(subreq, c_ctx->fbtfm);
+
+	/* software need sync mode to do crypto */
+	skcipher_request_set_callback(subreq, sreq->base.flags,
+				      NULL, NULL);
+	skcipher_request_set_crypt(subreq, sreq->src, sreq->dst,
+				   sreq->cryptlen, sreq->iv);
+	if (encrypt)
+		ret = crypto_skcipher_encrypt(subreq);
+	else
+		ret = crypto_skcipher_decrypt(subreq);
+
+	skcipher_request_zero(subreq);
+
+	return ret;
+}
+
 static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(sk_req);
@@ -1716,8 +1787,11 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
 	struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int ret;
 
-	if (!sk_req->cryptlen)
+	if (!sk_req->cryptlen) {
+		if (ctx->c_ctx.c_mode == SEC_CMODE_XTS)
+			return -EINVAL;
 		return 0;
+	}
 
 	req->flag = sk_req->base.flags;
 	req->c_req.sk_req = sk_req;
@@ -1728,6 +1802,9 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
 	if (unlikely(ret))
 		return -EINVAL;
 
+	if (unlikely(ctx->c_ctx.fallback))
+		return sec_skcipher_soft_crypto(ctx, sk_req, encrypt);
+
 	return ctx->req_op->process(ctx, req);
 }
 
@@ -1748,7 +1825,9 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
 		.cra_name = sec_cra_name,\
 		.cra_driver_name = "hisi_sec_"sec_cra_name,\
 		.cra_priority = SEC_PRIORITY,\
-		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,\
+		.cra_flags = CRYPTO_ALG_ASYNC |\
+		 CRYPTO_ALG_ALLOCATES_MEMORY |\
+		 CRYPTO_ALG_NEED_FALLBACK,\
 		.cra_blocksize = blk_size,\
 		.cra_ctxsize = sizeof(struct sec_ctx),\
 		.cra_module = THIS_MODULE,\
-- 
GitLab


From 6161f40c630bd7ced5f236cd5fbabec06e47afae Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 28 May 2021 19:42:06 +0800
Subject: [PATCH 2147/3804] crypto: hisilicon/sec - fixup 3des minimum key size
 declaration

Fixup the 3des algorithm  minimum key size declaration.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index b91cf2b33b987..5926b64d0d989 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -1861,11 +1861,11 @@ static struct skcipher_alg sec_skciphers[] = {
 			 AES_BLOCK_SIZE, AES_BLOCK_SIZE)
 
 	SEC_SKCIPHER_ALG("ecb(des3_ede)", sec_setkey_3des_ecb,
-			 SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE,
+			 SEC_DES3_3KEY_SIZE, SEC_DES3_3KEY_SIZE,
 			 DES3_EDE_BLOCK_SIZE, 0)
 
 	SEC_SKCIPHER_ALG("cbc(des3_ede)", sec_setkey_3des_cbc,
-			 SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE,
+			 SEC_DES3_3KEY_SIZE, SEC_DES3_3KEY_SIZE,
 			 DES3_EDE_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE)
 
 	SEC_SKCIPHER_ALG("xts(sm4)", sec_setkey_sm4_xts,
-- 
GitLab


From 1e609f5fb73b6b17af369a031f3a4c2b9b405854 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Sat, 29 May 2021 16:57:37 +0800
Subject: [PATCH 2148/3804] crypto: hisilicon/hpre - fix ecdh self test issue

When the key length is zero, use stdrng to generate private key
to pass the crypto ecdh-nist-p256 self test on vector 2.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 34 +++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 294c3688aabb0..6ba5d8af38755 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -5,6 +5,7 @@
 #include <crypto/dh.h>
 #include <crypto/ecc_curve.h>
 #include <crypto/ecdh.h>
+#include <crypto/rng.h>
 #include <crypto/internal/akcipher.h>
 #include <crypto/internal/kpp.h>
 #include <crypto/internal/rsa.h>
@@ -38,6 +39,9 @@ struct hpre_ctx;
 #define HPRE_DFX_SEC_TO_US	1000000
 #define HPRE_DFX_US_TO_NS	1000
 
+/* due to nist p521  */
+#define HPRE_ECC_MAX_KSZ	66
+
 /* size in bytes of the n prime */
 #define HPRE_ECC_NIST_P192_N_SIZE	24
 #define HPRE_ECC_NIST_P256_N_SIZE	32
@@ -1333,11 +1337,32 @@ static bool hpre_key_is_zero(char *key, unsigned short key_sz)
 	return true;
 }
 
+static int ecdh_gen_privkey(struct hpre_ctx *ctx, struct ecdh *params)
+{
+	struct device *dev = ctx->dev;
+	int ret;
+
+	ret = crypto_get_default_rng();
+	if (ret) {
+		dev_err(dev, "failed to get default rng, ret = %d!\n", ret);
+		return ret;
+	}
+
+	ret = crypto_rng_get_bytes(crypto_default_rng, (u8 *)params->key,
+				   params->key_size);
+	crypto_put_default_rng();
+	if (ret)
+		dev_err(dev, "failed to get rng, ret = %d!\n", ret);
+
+	return ret;
+}
+
 static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 				unsigned int len)
 {
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
 	struct device *dev = ctx->dev;
+	char key[HPRE_ECC_MAX_KSZ];
 	unsigned int sz, sz_shift;
 	struct ecdh params;
 	int ret;
@@ -1347,6 +1372,15 @@ static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
 		return -EINVAL;
 	}
 
+	/* Use stdrng to generate private key */
+	if (!params.key || !params.key_size) {
+		params.key = key;
+		params.key_size = hpre_ecdh_get_curvesz(ctx->curve_id);
+		ret = ecdh_gen_privkey(ctx, &params);
+		if (ret)
+			return ret;
+	}
+
 	if (hpre_key_is_zero(params.key, params.key_size)) {
 		dev_err(dev, "Invalid hpre key!\n");
 		return -EINVAL;
-- 
GitLab


From 9612581fc10919ef70aae1fa4dcf6e20d85a14a7 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Sat, 29 May 2021 16:58:19 +0800
Subject: [PATCH 2149/3804] crypto: hisilicon/hpre - add check before gx modulo
 p

The result of gx modulo p is zero if gx is equal to p, so return
error immediately if gx is equal to p.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 6ba5d8af38755..323418bf66ab7 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -1841,8 +1841,12 @@ static int hpre_curve25519_src_init(struct hpre_asym_request *hpre_req,
 	 * When src_data equals (2^255 - 19) ~  (2^255 - 1), it is out of p,
 	 * we get its modulus to p, and then use it.
 	 */
-	if (memcmp(ptr, p, ctx->key_sz) >= 0)
+	if (memcmp(ptr, p, ctx->key_sz) == 0) {
+		dev_err(dev, "gx is p!\n");
+		return -EINVAL;
+	} else if (memcmp(ptr, p, ctx->key_sz) > 0) {
 		hpre_curve25519_src_modulo_p(ptr);
+	}
 
 	hpre_req->src = ptr;
 	msg->in = cpu_to_le64(dma);
-- 
GitLab


From b981f7990e1ae61d9a48d717868df8f00f52bc08 Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Sat, 29 May 2021 16:58:47 +0800
Subject: [PATCH 2150/3804] crypto: hisilicon/hpre - register ecdh NIST P384

Register ecdh NIST P384 curve and add the tfm initialization.

Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 56 +++++++++++++++++++--
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 323418bf66ab7..a032c192ef1d6 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -45,9 +45,11 @@ struct hpre_ctx;
 /* size in bytes of the n prime */
 #define HPRE_ECC_NIST_P192_N_SIZE	24
 #define HPRE_ECC_NIST_P256_N_SIZE	32
+#define HPRE_ECC_NIST_P384_N_SIZE	48
 
 /* size in bytes */
 #define HPRE_ECC_HW256_KSZ_B	32
+#define HPRE_ECC_HW384_KSZ_B	48
 
 typedef void (*hpre_cb)(struct hpre_ctx *ctx, void *sqe);
 
@@ -1211,12 +1213,21 @@ static void hpre_ecc_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all,
 	hpre_ctx_clear(ctx, is_clear_all);
 }
 
+/*
+ * The bits of 192/224/256/384/521 are supported by HPRE,
+ * and convert the bits like:
+ * bits<=256, bits=256; 256<bits<=384, bits=384; 384<bits<=576, bits=576;
+ * If the parameter bit width is insufficient, then we fill in the
+ * high-order zeros by soft, so TASK_LENGTH1 is 0x3/0x5/0x8;
+ */
 static unsigned int hpre_ecdh_supported_curve(unsigned short id)
 {
 	switch (id) {
 	case ECC_CURVE_NIST_P192:
 	case ECC_CURVE_NIST_P256:
 		return HPRE_ECC_HW256_KSZ_B;
+	case ECC_CURVE_NIST_P384:
+		return HPRE_ECC_HW384_KSZ_B;
 	default:
 		break;
 	}
@@ -1281,6 +1292,8 @@ static unsigned int hpre_ecdh_get_curvesz(unsigned short id)
 		return HPRE_ECC_NIST_P192_N_SIZE;
 	case ECC_CURVE_NIST_P256:
 		return HPRE_ECC_NIST_P256_N_SIZE;
+	case ECC_CURVE_NIST_P384:
+		return HPRE_ECC_NIST_P384_N_SIZE;
 	default:
 		break;
 	}
@@ -1613,6 +1626,15 @@ static int hpre_ecdh_nist_p256_init_tfm(struct crypto_kpp *tfm)
 	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
 }
 
+static int hpre_ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm)
+{
+	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+
+	ctx->curve_id = ECC_CURVE_NIST_P384;
+
+	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
+}
+
 static void hpre_ecdh_exit_tfm(struct crypto_kpp *tfm)
 {
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
@@ -2017,6 +2039,23 @@ static struct kpp_alg ecdh_nist_p256 = {
 	},
 };
 
+static struct kpp_alg ecdh_nist_p384 = {
+	.set_secret = hpre_ecdh_set_secret,
+	.generate_public_key = hpre_ecdh_compute_value,
+	.compute_shared_secret = hpre_ecdh_compute_value,
+	.max_size = hpre_ecdh_max_size,
+	.init = hpre_ecdh_nist_p384_init_tfm,
+	.exit = hpre_ecdh_exit_tfm,
+	.reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
+	.base = {
+		.cra_ctxsize = sizeof(struct hpre_ctx),
+		.cra_priority = HPRE_CRYPTO_ALG_PRI,
+		.cra_name = "ecdh-nist-p384",
+		.cra_driver_name = "hpre-ecdh-nist-p384",
+		.cra_module = THIS_MODULE,
+	},
+};
+
 static struct kpp_alg curve25519_alg = {
 	.set_secret = hpre_curve25519_set_secret,
 	.generate_public_key = hpre_curve25519_compute_value,
@@ -2044,16 +2083,25 @@ static int hpre_register_ecdh(void)
 		return ret;
 
 	ret = crypto_register_kpp(&ecdh_nist_p256);
-	if (ret) {
-		crypto_unregister_kpp(&ecdh_nist_p192);
-		return ret;
-	}
+	if (ret)
+		goto unregister_ecdh_p192;
+
+	ret = crypto_register_kpp(&ecdh_nist_p384);
+	if (ret)
+		goto unregister_ecdh_p256;
 
 	return 0;
+
+unregister_ecdh_p256:
+	crypto_unregister_kpp(&ecdh_nist_p256);
+unregister_ecdh_p192:
+	crypto_unregister_kpp(&ecdh_nist_p192);
+	return ret;
 }
 
 static void hpre_unregister_ecdh(void)
 {
+	crypto_unregister_kpp(&ecdh_nist_p384);
 	crypto_unregister_kpp(&ecdh_nist_p256);
 	crypto_unregister_kpp(&ecdh_nist_p192);
 }
-- 
GitLab


From 38cd3968bf284929162665b002891de5c60d027a Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 29 May 2021 22:15:34 +0800
Subject: [PATCH 2151/3804] crypto: hisilicon/qm - adjust reset interface

Kunpeng930 hardware supports PF/VF communications. When the device is
reset, PF can send message to VF to stop function and restart function.

This patch adjusts the reset interface to support sending message through
PF/VF communication.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 140 +++++++++++++++++++++-------------
 1 file changed, 89 insertions(+), 51 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 7c1f8ab28f995..4af0650d4b187 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -1796,6 +1796,11 @@ static int qm_check_dev_error(struct hisi_qm *qm)
 	       (dev_val & (~qm->err_info.dev_ce_mask));
 }
 
+static int qm_wait_vf_prepare_finish(struct hisi_qm *qm)
+{
+	return 0;
+}
+
 static int qm_stop_qp(struct hisi_qp *qp)
 {
 	return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
@@ -3806,14 +3811,27 @@ stop_fail:
 	return ret;
 }
 
-static int qm_reset_prepare_ready(struct hisi_qm *qm)
+static int qm_try_stop_vfs(struct hisi_qm *qm, enum qm_stop_reason stop_reason)
 {
 	struct pci_dev *pdev = qm->pdev;
-	struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+	int ret;
+
+	if (!qm->vfs_num)
+		return 0;
+
+	ret = qm_vf_reset_prepare(qm, stop_reason);
+	if (ret)
+		pci_err(pdev, "failed to prepare reset, ret = %d.\n", ret);
+
+	return ret;
+}
+
+static int qm_wait_reset_finish(struct hisi_qm *qm)
+{
 	int delay = 0;
 
 	/* All reset requests need to be queued for processing */
-	while (test_and_set_bit(QM_RESETTING, &pf_qm->misc_ctl)) {
+	while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
 		msleep(++delay);
 		if (delay > QM_RESET_WAIT_TIMEOUT)
 			return -EBUSY;
@@ -3822,6 +3840,22 @@ static int qm_reset_prepare_ready(struct hisi_qm *qm)
 	return 0;
 }
 
+static int qm_reset_prepare_ready(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+
+	return qm_wait_reset_finish(pf_qm);
+}
+
+static void qm_reset_bit_clear(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+
+	clear_bit(QM_RESETTING, &pf_qm->misc_ctl);
+}
+
 static int qm_controller_reset_prepare(struct hisi_qm *qm)
 {
 	struct pci_dev *pdev = qm->pdev;
@@ -3833,22 +3867,21 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm)
 		return ret;
 	}
 
-	if (qm->vfs_num) {
-		ret = qm_vf_reset_prepare(qm, QM_SOFT_RESET);
-		if (ret) {
-			pci_err(pdev, "Fails to stop VFs!\n");
-			clear_bit(QM_RESETTING, &qm->misc_ctl);
-			return ret;
-		}
-	}
+	ret = qm_try_stop_vfs(qm, QM_SOFT_RESET);
+	if (ret)
+		pci_err(pdev, "failed to stop vfs by pf in soft reset.\n");
 
 	ret = hisi_qm_stop(qm, QM_SOFT_RESET);
 	if (ret) {
 		pci_err(pdev, "Fails to stop QM!\n");
-		clear_bit(QM_RESETTING, &qm->misc_ctl);
+		qm_reset_bit_clear(qm);
 		return ret;
 	}
 
+	ret = qm_wait_vf_prepare_finish(qm);
+	if (ret)
+		pci_err(pdev, "failed to stop by vfs in soft reset!\n");
+
 	clear_bit(QM_RST_SCHED, &qm->misc_ctl);
 
 	return 0;
@@ -3983,6 +4016,27 @@ restart_fail:
 	return ret;
 }
 
+static int qm_try_start_vfs(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+
+	if (!qm->vfs_num)
+		return 0;
+
+	ret = qm_vf_q_assign(qm, qm->vfs_num);
+	if (ret) {
+		pci_err(pdev, "failed to assign VFs, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = qm_vf_reset_done(qm);
+	if (ret)
+		pci_warn(pdev, "failed to start vfs, ret = %d.\n", ret);
+
+	return ret;
+}
+
 static int qm_dev_hw_init(struct hisi_qm *qm)
 {
 	return qm->err_ini->hw_init(qm);
@@ -4082,23 +4136,17 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
 		return ret;
 	}
 
-	if (qm->vfs_num) {
-		ret = qm_vf_q_assign(qm, qm->vfs_num);
-		if (ret) {
-			pci_err(pdev, "Failed to assign queue!\n");
-			return ret;
-		}
-	}
+	ret = qm_try_start_vfs(qm);
+	if (ret)
+		pci_err(pdev, "failed to start vfs by pf in soft reset.\n");
 
-	ret = qm_vf_reset_done(qm);
-	if (ret) {
-		pci_err(pdev, "Failed to start VFs!\n");
-		return -EPERM;
-	}
+	ret = qm_wait_vf_prepare_finish(qm);
+	if (ret)
+		pci_err(pdev, "failed to start by vfs in soft reset!\n");
 
 	qm_restart_done(qm);
 
-	clear_bit(QM_RESETTING, &qm->misc_ctl);
+	qm_reset_bit_clear(qm);
 
 	return 0;
 }
@@ -4119,13 +4167,13 @@ static int qm_controller_reset(struct hisi_qm *qm)
 	ret = qm_soft_reset(qm);
 	if (ret) {
 		pci_err(pdev, "Controller reset failed (%d)\n", ret);
-		clear_bit(QM_RESETTING, &qm->misc_ctl);
+		qm_reset_bit_clear(qm);
 		return ret;
 	}
 
 	ret = qm_controller_reset_done(qm);
 	if (ret) {
-		clear_bit(QM_RESETTING, &qm->misc_ctl);
+		qm_reset_bit_clear(qm);
 		return ret;
 	}
 
@@ -4187,14 +4235,9 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev)
 		return;
 	}
 
-	if (qm->vfs_num) {
-		ret = qm_vf_reset_prepare(qm, QM_FLR);
-		if (ret) {
-			pci_err(pdev, "Failed to prepare reset, ret = %d.\n",
-				ret);
-			return;
-		}
-	}
+	ret = qm_try_stop_vfs(qm, QM_SOFT_RESET);
+	if (ret)
+		pci_err(pdev, "failed to stop vfs by pf in FLR.\n");
 
 	ret = hisi_qm_stop(qm, QM_FLR);
 	if (ret) {
@@ -4202,6 +4245,10 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev)
 		return;
 	}
 
+	ret = qm_wait_vf_prepare_finish(qm);
+	if (ret)
+		pci_err(pdev, "failed to stop by vfs in FLR!\n");
+
 	pci_info(pdev, "FLR resetting...\n");
 }
 EXPORT_SYMBOL_GPL(hisi_qm_reset_prepare);
@@ -4243,28 +4290,19 @@ void hisi_qm_reset_done(struct pci_dev *pdev)
 		goto flr_done;
 	}
 
-	if (qm->fun_type == QM_HW_PF) {
-		if (!qm->vfs_num)
-			goto flr_done;
-
-		ret = qm_vf_q_assign(qm, qm->vfs_num);
-		if (ret) {
-			pci_err(pdev, "Failed to assign VFs, ret = %d.\n", ret);
-			goto flr_done;
-		}
+	ret = qm_try_start_vfs(qm);
+	if (ret)
+		pci_err(pdev, "failed to start vfs by pf in FLR.\n");
 
-		ret = qm_vf_reset_done(qm);
-		if (ret) {
-			pci_err(pdev, "Failed to start VFs, ret = %d.\n", ret);
-			goto flr_done;
-		}
-	}
+	ret = qm_wait_vf_prepare_finish(qm);
+	if (ret)
+		pci_err(pdev, "failed to start by vfs in FLR!\n");
 
 flr_done:
 	if (qm_flr_reset_complete(pdev))
 		pci_info(pdev, "FLR reset complete\n");
 
-	clear_bit(QM_RESETTING, &pf_qm->misc_ctl);
+	qm_reset_bit_clear(qm);
 }
 EXPORT_SYMBOL_GPL(hisi_qm_reset_done);
 
-- 
GitLab


From e3ac4d20e93664755ccea87ad1c71f264a6c9d74 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 29 May 2021 22:15:35 +0800
Subject: [PATCH 2152/3804] crypto: hisilicon/qm - enable PF and VFs
 communication

Kunpeng930 hardware supports the communication between PF and VFs.

This patch enables communication between PF and VFs by writing hardware
registers, and requests an irq for communication.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 113 +++++++++++++++++++++++++++++++---
 drivers/crypto/hisilicon/qm.h |   1 +
 2 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 4af0650d4b187..cd25f1fdd40b5 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -25,9 +25,11 @@
 #define QM_IRQ_NUM_V1			1
 #define QM_IRQ_NUM_PF_V2		4
 #define QM_IRQ_NUM_VF_V2		2
+#define QM_IRQ_NUM_VF_V3		3
 
 #define QM_EQ_EVENT_IRQ_VECTOR		0
 #define QM_AEQ_EVENT_IRQ_VECTOR		1
+#define QM_CMD_EVENT_IRQ_VECTOR		2
 #define QM_ABNORMAL_EVENT_IRQ_VECTOR	3
 
 /* mailbox */
@@ -177,6 +179,16 @@
 #define ACC_ROB_ECC_ERR_MULTPL		BIT(1)
 #define QM_MSI_CAP_ENABLE		BIT(16)
 
+/* interfunction communication */
+#define QM_IFC_INT_SOURCE_P		0x100138
+#define QM_IFC_INT_SOURCE_V		0x0020
+#define QM_IFC_INT_MASK			0x0024
+#define QM_IFC_INT_STATUS		0x0028
+#define QM_IFC_INT_SOURCE_CLR		GENMASK(63, 0)
+#define QM_IFC_INT_SOURCE_MASK		BIT(0)
+#define QM_IFC_INT_DISABLE		BIT(0)
+#define QM_IFC_INT_STATUS_MASK		BIT(0)
+
 #define QM_DFX_MB_CNT_VF		0x104010
 #define QM_DFX_DB_CNT_VF		0x104020
 #define QM_DFX_SQE_CNT_VF_SQN		0x104030
@@ -633,6 +645,14 @@ static u32 qm_get_irq_num_v2(struct hisi_qm *qm)
 		return QM_IRQ_NUM_VF_V2;
 }
 
+static u32 qm_get_irq_num_v3(struct hisi_qm *qm)
+{
+	if (qm->fun_type == QM_HW_PF)
+		return QM_IRQ_NUM_PF_V2;
+
+	return QM_IRQ_NUM_VF_V3;
+}
+
 static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
 {
 	u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
@@ -737,6 +757,21 @@ static irqreturn_t qm_irq(int irq, void *data)
 	return IRQ_NONE;
 }
 
+static irqreturn_t qm_mb_cmd_irq(int irq, void *data)
+{
+	struct hisi_qm *qm = data;
+	u32 val;
+
+	val = readl(qm->io_base + QM_IFC_INT_STATUS);
+	val &= QM_IFC_INT_STATUS_MASK;
+	if (!val)
+		return IRQ_NONE;
+
+	schedule_work(&qm->cmd_process);
+
+	return IRQ_HANDLED;
+}
+
 static irqreturn_t qm_aeq_irq(int irq, void *data)
 {
 	struct hisi_qm *qm = data;
@@ -777,14 +812,16 @@ static void qm_irq_unregister(struct hisi_qm *qm)
 
 	free_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR), qm);
 
-	if (qm->ver == QM_HW_V1)
-		return;
+	if (qm->ver > QM_HW_V1) {
+		free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
 
-	free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
+		if (qm->fun_type == QM_HW_PF)
+			free_irq(pci_irq_vector(pdev,
+				 QM_ABNORMAL_EVENT_IRQ_VECTOR), qm);
+	}
 
-	if (qm->fun_type == QM_HW_PF)
-		free_irq(pci_irq_vector(pdev,
-			 QM_ABNORMAL_EVENT_IRQ_VECTOR), qm);
+	if (qm->ver > QM_HW_V2)
+		free_irq(pci_irq_vector(pdev, QM_CMD_EVENT_IRQ_VECTOR), qm);
 }
 
 static void qm_init_qp_status(struct hisi_qp *qp)
@@ -1796,6 +1833,18 @@ static int qm_check_dev_error(struct hisi_qm *qm)
 	       (dev_val & (~qm->err_info.dev_ce_mask));
 }
 
+static void qm_clear_cmd_interrupt(struct hisi_qm *qm, u64 vf_mask)
+{
+	u32 val;
+
+	if (qm->fun_type == QM_HW_PF)
+		writeq(vf_mask, qm->io_base + QM_IFC_INT_SOURCE_P);
+
+	val = readl(qm->io_base + QM_IFC_INT_SOURCE_V);
+	val |= QM_IFC_INT_SOURCE_MASK;
+	writel(val, qm->io_base + QM_IFC_INT_SOURCE_V);
+}
+
 static int qm_wait_vf_prepare_finish(struct hisi_qm *qm)
 {
 	return 0;
@@ -1913,7 +1962,7 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = {
 static const struct hisi_qm_hw_ops qm_hw_ops_v3 = {
 	.get_vft = qm_get_vft_v2,
 	.qm_db = qm_db_v2,
-	.get_irq_num = qm_get_irq_num_v2,
+	.get_irq_num = qm_get_irq_num_v3,
 	.hw_error_init = qm_hw_error_init_v3,
 	.hw_error_uninit = qm_hw_error_uninit_v3,
 	.hw_error_handle = qm_hw_error_handle_v2,
@@ -2777,6 +2826,34 @@ static void hisi_qm_pre_init(struct hisi_qm *qm)
 	qm->misc_ctl = false;
 }
 
+static void qm_cmd_uninit(struct hisi_qm *qm)
+{
+	u32 val;
+
+	if (qm->ver < QM_HW_V3)
+		return;
+
+	val = readl(qm->io_base + QM_IFC_INT_MASK);
+	val |= QM_IFC_INT_DISABLE;
+	writel(val, qm->io_base + QM_IFC_INT_MASK);
+}
+
+static void qm_cmd_init(struct hisi_qm *qm)
+{
+	u32 val;
+
+	if (qm->ver < QM_HW_V3)
+		return;
+
+	/* Clear communication interrupt source */
+	qm_clear_cmd_interrupt(qm, QM_IFC_INT_SOURCE_CLR);
+
+	/* Enable pf to vf communication reg. */
+	val = readl(qm->io_base + QM_IFC_INT_MASK);
+	val &= ~QM_IFC_INT_DISABLE;
+	writel(val, qm->io_base + QM_IFC_INT_MASK);
+}
+
 static void qm_put_pci_res(struct hisi_qm *qm)
 {
 	struct pci_dev *pdev = qm->pdev;
@@ -2808,6 +2885,7 @@ void hisi_qm_uninit(struct hisi_qm *qm)
 	struct pci_dev *pdev = qm->pdev;
 	struct device *dev = &pdev->dev;
 
+	qm_cmd_uninit(qm);
 	down_write(&qm->qps_lock);
 
 	if (!qm_avail_state(qm, QM_CLOSE)) {
@@ -4331,7 +4409,7 @@ static int qm_irq_register(struct hisi_qm *qm)
 	if (ret)
 		return ret;
 
-	if (qm->ver != QM_HW_V1) {
+	if (qm->ver > QM_HW_V1) {
 		ret = request_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR),
 				  qm_aeq_irq, 0, qm->dev_name, qm);
 		if (ret)
@@ -4346,8 +4424,18 @@ static int qm_irq_register(struct hisi_qm *qm)
 		}
 	}
 
+	if (qm->ver > QM_HW_V2) {
+		ret = request_irq(pci_irq_vector(pdev, QM_CMD_EVENT_IRQ_VECTOR),
+				qm_mb_cmd_irq, 0, qm->dev_name, qm);
+		if (ret)
+			goto err_mb_cmd_irq;
+	}
+
 	return 0;
 
+err_mb_cmd_irq:
+	if (qm->fun_type == QM_HW_PF)
+		free_irq(pci_irq_vector(pdev, QM_ABNORMAL_EVENT_IRQ_VECTOR), qm);
 err_abonormal_irq:
 	free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
 err_aeq_irq:
@@ -4384,6 +4472,11 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
 
 }
 
+static void qm_cmd_process(struct work_struct *cmd_process)
+{
+	/* handling messages sent by communication source */
+}
+
 /**
  * hisi_qm_alg_register() - Register alg to crypto and add qm to qm_list.
  * @qm: The qm needs add.
@@ -4615,6 +4708,10 @@ int hisi_qm_init(struct hisi_qm *qm)
 	if (qm->fun_type == QM_HW_PF)
 		INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
 
+	if (qm->ver >= QM_HW_V3)
+		INIT_WORK(&qm->cmd_process, qm_cmd_process);
+
+	qm_cmd_init(qm);
 	atomic_set(&qm->status.flags, QM_INIT);
 
 	return 0;
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index 9048aa6e5f8ab..8a36bade103d8 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -250,6 +250,7 @@ struct hisi_qm {
 	struct workqueue_struct *wq;
 	struct work_struct work;
 	struct work_struct rst_work;
+	struct work_struct cmd_process;
 
 	const char *algs;
 	bool use_sva;
-- 
GitLab


From 3cd53a27c2fc58da9dcf6f22f4ed5705e398a1b9 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 29 May 2021 22:15:36 +0800
Subject: [PATCH 2153/3804] crypto: hisilicon/qm - add callback to support
 communication

This patch adds 'ping_all_vfs' callback that supports pf send message to
all vfs and 'ping_pf' callback that supports vf send message to pf. After
receiving the interrupt, the communication destination gets the message
by sending mailbox.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 217 +++++++++++++++++++++++++++++-----
 1 file changed, 190 insertions(+), 27 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index cd25f1fdd40b5..04560c3cdd78e 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -41,6 +41,8 @@
 #define QM_MB_CMD_CQC_BT		0x5
 #define QM_MB_CMD_SQC_VFT_V2		0x6
 #define QM_MB_CMD_STOP_QP		0x8
+#define QM_MB_CMD_SRC			0xc
+#define QM_MB_CMD_DST			0xd
 
 #define QM_MB_CMD_SEND_BASE		0x300
 #define QM_MB_EVENT_SHIFT		8
@@ -48,6 +50,8 @@
 #define QM_MB_OP_SHIFT			14
 #define QM_MB_CMD_DATA_ADDR_L		0x304
 #define QM_MB_CMD_DATA_ADDR_H		0x308
+#define QM_MB_PING_ALL_VFS		0xffff
+#define QM_MB_CMD_DATA_MASK		GENMASK(31, 0)
 
 /* sqc shift */
 #define QM_SQ_HOP_NUM_SHIFT		0
@@ -180,14 +184,24 @@
 #define QM_MSI_CAP_ENABLE		BIT(16)
 
 /* interfunction communication */
+#define QM_IFC_READY_STATUS		0x100128
+#define QM_IFC_INT_SET_P		0x100130
+#define QM_IFC_INT_CFG			0x100134
 #define QM_IFC_INT_SOURCE_P		0x100138
 #define QM_IFC_INT_SOURCE_V		0x0020
 #define QM_IFC_INT_MASK			0x0024
 #define QM_IFC_INT_STATUS		0x0028
+#define QM_IFC_INT_SET_V		0x002C
+#define QM_IFC_SEND_ALL_VFS		GENMASK(6, 0)
 #define QM_IFC_INT_SOURCE_CLR		GENMASK(63, 0)
 #define QM_IFC_INT_SOURCE_MASK		BIT(0)
 #define QM_IFC_INT_DISABLE		BIT(0)
 #define QM_IFC_INT_STATUS_MASK		BIT(0)
+#define QM_IFC_INT_SET_MASK		BIT(0)
+#define QM_WAIT_DST_ACK			10
+#define QM_MAX_PF_WAIT_COUNT		10
+#define QM_MAX_VF_WAIT_COUNT		40
+
 
 #define QM_DFX_MB_CNT_VF		0x104010
 #define QM_DFX_DB_CNT_VF		0x104020
@@ -370,6 +384,8 @@ struct hisi_qm_hw_ops {
 	enum acc_err_result (*hw_error_handle)(struct hisi_qm *qm);
 	int (*stop_qp)(struct hisi_qp *qp);
 	int (*set_msi)(struct hisi_qm *qm, bool set);
+	int (*ping_all_vfs)(struct hisi_qm *qm, u64 cmd);
+	int (*ping_pf)(struct hisi_qm *qm, u64 cmd);
 };
 
 struct qm_dfx_item {
@@ -510,6 +526,18 @@ static bool qm_qp_avail_state(struct hisi_qm *qm, struct hisi_qp *qp,
 	return avail;
 }
 
+static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd,
+			   u64 base, u16 queue, bool op)
+{
+	mailbox->w0 = cpu_to_le16((cmd) |
+		((op) ? 0x1 << QM_MB_OP_SHIFT : 0) |
+		(0x1 << QM_MB_BUSY_SHIFT));
+	mailbox->queue_num = cpu_to_le16(queue);
+	mailbox->base_l = cpu_to_le32(lower_32_bits(base));
+	mailbox->base_h = cpu_to_le32(upper_32_bits(base));
+	mailbox->rsvd = 0;
+}
+
 /* return 0 mailbox ready, -ETIMEDOUT hardware timeout */
 static int qm_wait_mb_ready(struct hisi_qm *qm)
 {
@@ -542,44 +570,42 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
 		     : "memory");
 }
 
-static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
-		 bool op)
+static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox)
 {
-	struct qm_mailbox mailbox;
-	int ret = 0;
-
-	dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-%llx\n",
-		queue, cmd, (unsigned long long)dma_addr);
-
-	mailbox.w0 = cpu_to_le16(cmd |
-		     (op ? 0x1 << QM_MB_OP_SHIFT : 0) |
-		     (0x1 << QM_MB_BUSY_SHIFT));
-	mailbox.queue_num = cpu_to_le16(queue);
-	mailbox.base_l = cpu_to_le32(lower_32_bits(dma_addr));
-	mailbox.base_h = cpu_to_le32(upper_32_bits(dma_addr));
-	mailbox.rsvd = 0;
-
-	mutex_lock(&qm->mailbox_lock);
-
 	if (unlikely(qm_wait_mb_ready(qm))) {
-		ret = -EBUSY;
 		dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n");
-		goto busy_unlock;
+		goto mb_busy;
 	}
 
-	qm_mb_write(qm, &mailbox);
+	qm_mb_write(qm, mailbox);
 
 	if (unlikely(qm_wait_mb_ready(qm))) {
-		ret = -EBUSY;
 		dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n");
-		goto busy_unlock;
+		goto mb_busy;
 	}
 
-busy_unlock:
+	return 0;
+
+mb_busy:
+	atomic64_inc(&qm->debug.dfx.mb_err_cnt);
+	return -EBUSY;
+}
+
+static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
+		 bool op)
+{
+	struct qm_mailbox mailbox;
+	int ret;
+
+	dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-%llx\n",
+		queue, cmd, (unsigned long long)dma_addr);
+
+	qm_mb_pre_init(&mailbox, cmd, dma_addr, queue, op);
+
+	mutex_lock(&qm->mailbox_lock);
+	ret = qm_mb_nolock(qm, &mailbox);
 	mutex_unlock(&qm->mailbox_lock);
 
-	if (ret)
-		atomic64_inc(&qm->debug.dfx.mb_err_cnt);
 	return ret;
 }
 
@@ -1833,6 +1859,25 @@ static int qm_check_dev_error(struct hisi_qm *qm)
 	       (dev_val & (~qm->err_info.dev_ce_mask));
 }
 
+static int qm_get_mb_cmd(struct hisi_qm *qm, u64 *msg, u16 fun_num)
+{
+	struct qm_mailbox mailbox;
+	int ret;
+
+	qm_mb_pre_init(&mailbox, QM_MB_CMD_DST, 0, fun_num, 0);
+	mutex_lock(&qm->mailbox_lock);
+	ret = qm_mb_nolock(qm, &mailbox);
+	if (ret)
+		goto err_unlock;
+
+	*msg = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32);
+
+err_unlock:
+	mutex_unlock(&qm->mailbox_lock);
+	return ret;
+}
+
 static void qm_clear_cmd_interrupt(struct hisi_qm *qm, u64 vf_mask)
 {
 	u32 val;
@@ -1850,6 +1895,108 @@ static int qm_wait_vf_prepare_finish(struct hisi_qm *qm)
 	return 0;
 }
 
+static void qm_trigger_vf_interrupt(struct hisi_qm *qm, u32 fun_num)
+{
+	u32 val;
+
+	val = readl(qm->io_base + QM_IFC_INT_CFG);
+	val |= ~QM_IFC_SEND_ALL_VFS;
+	val |= fun_num;
+	writel(val, qm->io_base + QM_IFC_INT_CFG);
+
+	val = readl(qm->io_base + QM_IFC_INT_SET_P);
+	val |= QM_IFC_INT_SET_MASK;
+	writel(val, qm->io_base + QM_IFC_INT_SET_P);
+}
+
+static void qm_trigger_pf_interrupt(struct hisi_qm *qm)
+{
+	u32 val;
+
+	val = readl(qm->io_base + QM_IFC_INT_SET_V);
+	val |= QM_IFC_INT_SET_MASK;
+	writel(val, qm->io_base + QM_IFC_INT_SET_V);
+}
+
+static int qm_ping_all_vfs(struct hisi_qm *qm, u64 cmd)
+{
+	struct device *dev = &qm->pdev->dev;
+	u32 vfs_num = qm->vfs_num;
+	struct qm_mailbox mailbox;
+	u64 val = 0;
+	int cnt = 0;
+	int ret;
+	u32 i;
+
+	qm_mb_pre_init(&mailbox, QM_MB_CMD_SRC, cmd, QM_MB_PING_ALL_VFS, 0);
+	mutex_lock(&qm->mailbox_lock);
+	/* PF sends command to all VFs by mailbox */
+	ret = qm_mb_nolock(qm, &mailbox);
+	if (ret) {
+		dev_err(dev, "failed to send command to VFs!\n");
+		mutex_unlock(&qm->mailbox_lock);
+		return ret;
+	}
+
+	qm_trigger_vf_interrupt(qm, QM_IFC_SEND_ALL_VFS);
+	while (true) {
+		msleep(QM_WAIT_DST_ACK);
+		val = readq(qm->io_base + QM_IFC_READY_STATUS);
+		/* If all VFs acked, PF notifies VFs successfully. */
+		if (!(val & GENMASK(vfs_num, 1))) {
+			mutex_unlock(&qm->mailbox_lock);
+			return 0;
+		}
+
+		if (++cnt > QM_MAX_PF_WAIT_COUNT)
+			break;
+	}
+
+	mutex_unlock(&qm->mailbox_lock);
+
+	/* Check which vf respond timeout. */
+	for (i = 1; i <= vfs_num; i++) {
+		if (val & BIT(i))
+			dev_err(dev, "failed to get response from VF(%u)!\n", i);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int qm_ping_pf(struct hisi_qm *qm, u64 cmd)
+{
+	struct qm_mailbox mailbox;
+	int cnt = 0;
+	u32 val;
+	int ret;
+
+	qm_mb_pre_init(&mailbox, QM_MB_CMD_SRC, cmd, 0, 0);
+	mutex_lock(&qm->mailbox_lock);
+	ret = qm_mb_nolock(qm, &mailbox);
+	if (ret) {
+		dev_err(&qm->pdev->dev, "failed to send command to PF!\n");
+		goto unlock;
+	}
+
+	qm_trigger_pf_interrupt(qm);
+	/* Waiting for PF response */
+	while (true) {
+		msleep(QM_WAIT_DST_ACK);
+		val = readl(qm->io_base + QM_IFC_INT_SET_V);
+		if (!(val & QM_IFC_INT_STATUS_MASK))
+			break;
+
+		if (++cnt > QM_MAX_VF_WAIT_COUNT) {
+			ret = -ETIMEDOUT;
+			break;
+		}
+	}
+
+unlock:
+	mutex_unlock(&qm->mailbox_lock);
+	return ret;
+}
+
 static int qm_stop_qp(struct hisi_qp *qp)
 {
 	return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
@@ -1968,6 +2115,8 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v3 = {
 	.hw_error_handle = qm_hw_error_handle_v2,
 	.stop_qp = qm_stop_qp,
 	.set_msi = qm_set_msi_v3,
+	.ping_all_vfs = qm_ping_all_vfs,
+	.ping_pf = qm_ping_pf,
 };
 
 static void *qm_get_avail_sqe(struct hisi_qp *qp)
@@ -4474,7 +4623,21 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
 
 static void qm_cmd_process(struct work_struct *cmd_process)
 {
-	/* handling messages sent by communication source */
+	struct hisi_qm *qm = container_of(cmd_process,
+					struct hisi_qm, cmd_process);
+	struct device *dev = &qm->pdev->dev;
+	u64 msg;
+	int ret;
+
+	/*
+	 * Get the msg from source by sending mailbox. Whether message is got
+	 * successfully, destination needs to ack source by clearing the interrupt.
+	 */
+	ret = qm_get_mb_cmd(qm, &msg, 0);
+	qm_clear_cmd_interrupt(qm, 0);
+	if (ret)
+		dev_err(dev, "failed to get msg from source!\n");
+
 }
 
 /**
-- 
GitLab


From 760fe22cf5e9f5d0212aa4c9aef555625c167627 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 29 May 2021 22:15:37 +0800
Subject: [PATCH 2154/3804] crypto: hisilicon/qm - update reset flow

This patch updates the reset flow based on PF/VF communications. VFs
will be stopped after receiving reset message from PF, and wait for
reset finish to restart VFs.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 279 +++++++++++++++++++++++++++++++---
 1 file changed, 262 insertions(+), 17 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 04560c3cdd78e..efa14c9ee9763 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -201,7 +201,10 @@
 #define QM_WAIT_DST_ACK			10
 #define QM_MAX_PF_WAIT_COUNT		10
 #define QM_MAX_VF_WAIT_COUNT		40
-
+#define QM_VF_RESET_WAIT_US            20000
+#define QM_VF_RESET_WAIT_CNT           3000
+#define QM_VF_RESET_WAIT_TIMEOUT_US    \
+	(QM_VF_RESET_WAIT_US * QM_VF_RESET_WAIT_CNT)
 
 #define QM_DFX_MB_CNT_VF		0x104010
 #define QM_DFX_DB_CNT_VF		0x104020
@@ -285,6 +288,16 @@ enum acc_err_result {
 	ACC_ERR_RECOVERED,
 };
 
+enum qm_mb_cmd {
+	QM_PF_FLR_PREPARE = 0x01,
+	QM_PF_SRST_PREPARE,
+	QM_PF_RESET_DONE,
+	QM_VF_PREPARE_DONE,
+	QM_VF_PREPARE_FAIL,
+	QM_VF_START_DONE,
+	QM_VF_START_FAIL,
+};
+
 struct qm_cqe {
 	__le32 rsvd0;
 	__le16 cmd_id;
@@ -1890,9 +1903,74 @@ static void qm_clear_cmd_interrupt(struct hisi_qm *qm, u64 vf_mask)
 	writel(val, qm->io_base + QM_IFC_INT_SOURCE_V);
 }
 
+static void qm_handle_vf_msg(struct hisi_qm *qm, u32 vf_id)
+{
+	struct device *dev = &qm->pdev->dev;
+	u32 cmd;
+	u64 msg;
+	int ret;
+
+	ret = qm_get_mb_cmd(qm, &msg, vf_id);
+	if (ret) {
+		dev_err(dev, "failed to get msg from VF(%u)!\n", vf_id);
+		return;
+	}
+
+	cmd = msg & QM_MB_CMD_DATA_MASK;
+	switch (cmd) {
+	case QM_VF_PREPARE_FAIL:
+		dev_err(dev, "failed to stop VF(%u)!\n", vf_id);
+		break;
+	case QM_VF_START_FAIL:
+		dev_err(dev, "failed to start VF(%u)!\n", vf_id);
+		break;
+	case QM_VF_PREPARE_DONE:
+	case QM_VF_START_DONE:
+		break;
+	default:
+		dev_err(dev, "unsupported cmd %u sent by VF(%u)!\n", cmd, vf_id);
+		break;
+	}
+}
+
 static int qm_wait_vf_prepare_finish(struct hisi_qm *qm)
 {
-	return 0;
+	struct device *dev = &qm->pdev->dev;
+	u32 vfs_num = qm->vfs_num;
+	int cnt = 0;
+	int ret = 0;
+	u64 val;
+	u32 i;
+
+	if (!qm->vfs_num || qm->ver < QM_HW_V3)
+		return 0;
+
+	while (true) {
+		val = readq(qm->io_base + QM_IFC_INT_SOURCE_P);
+		/* All VFs send command to PF, break */
+		if ((val & GENMASK(vfs_num, 1)) == GENMASK(vfs_num, 1))
+			break;
+
+		if (++cnt > QM_MAX_PF_WAIT_COUNT) {
+			ret = -EBUSY;
+			break;
+		}
+
+		msleep(QM_WAIT_DST_ACK);
+	}
+
+	/* PF check VFs msg */
+	for (i = 1; i <= vfs_num; i++) {
+		if (val & BIT(i))
+			qm_handle_vf_msg(qm, i);
+		else
+			dev_err(dev, "VF(%u) not ping PF!\n", i);
+	}
+
+	/* PF clear interrupt to ack VFs */
+	qm_clear_cmd_interrupt(qm, val);
+
+	return ret;
 }
 
 static void qm_trigger_vf_interrupt(struct hisi_qm *qm, u32 fun_num)
@@ -4038,7 +4116,8 @@ stop_fail:
 	return ret;
 }
 
-static int qm_try_stop_vfs(struct hisi_qm *qm, enum qm_stop_reason stop_reason)
+static int qm_try_stop_vfs(struct hisi_qm *qm, u64 cmd,
+			   enum qm_stop_reason stop_reason)
 {
 	struct pci_dev *pdev = qm->pdev;
 	int ret;
@@ -4046,9 +4125,16 @@ static int qm_try_stop_vfs(struct hisi_qm *qm, enum qm_stop_reason stop_reason)
 	if (!qm->vfs_num)
 		return 0;
 
-	ret = qm_vf_reset_prepare(qm, stop_reason);
-	if (ret)
-		pci_err(pdev, "failed to prepare reset, ret = %d.\n", ret);
+	/* Kunpeng930 supports to notify VFs to stop before PF reset */
+	if (qm->ops->ping_all_vfs) {
+		ret = qm->ops->ping_all_vfs(qm, cmd);
+		if (ret)
+			pci_err(pdev, "failed to send cmd to all VFs before PF reset!\n");
+	} else {
+		ret = qm_vf_reset_prepare(qm, stop_reason);
+		if (ret)
+			pci_err(pdev, "failed to prepare reset, ret = %d.\n", ret);
+	}
 
 	return ret;
 }
@@ -4072,7 +4158,14 @@ static int qm_reset_prepare_ready(struct hisi_qm *qm)
 	struct pci_dev *pdev = qm->pdev;
 	struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
 
-	return qm_wait_reset_finish(pf_qm);
+	/*
+	 * PF and VF on host doesnot support resetting at the
+	 * same time on Kunpeng920.
+	 */
+	if (qm->ver < QM_HW_V3)
+		return qm_wait_reset_finish(pf_qm);
+
+	return qm_wait_reset_finish(qm);
 }
 
 static void qm_reset_bit_clear(struct hisi_qm *qm)
@@ -4080,7 +4173,10 @@ static void qm_reset_bit_clear(struct hisi_qm *qm)
 	struct pci_dev *pdev = qm->pdev;
 	struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
 
-	clear_bit(QM_RESETTING, &pf_qm->misc_ctl);
+	if (qm->ver < QM_HW_V3)
+		clear_bit(QM_RESETTING, &pf_qm->misc_ctl);
+
+	clear_bit(QM_RESETTING, &qm->misc_ctl);
 }
 
 static int qm_controller_reset_prepare(struct hisi_qm *qm)
@@ -4094,7 +4190,11 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm)
 		return ret;
 	}
 
-	ret = qm_try_stop_vfs(qm, QM_SOFT_RESET);
+	/* PF obtains the information of VF by querying the register. */
+	qm_cmd_uninit(qm);
+
+	/* Whether VFs stop successfully, soft reset will continue. */
+	ret = qm_try_stop_vfs(qm, QM_PF_SRST_PREPARE, QM_SOFT_RESET);
 	if (ret)
 		pci_err(pdev, "failed to stop vfs by pf in soft reset.\n");
 
@@ -4243,7 +4343,7 @@ restart_fail:
 	return ret;
 }
 
-static int qm_try_start_vfs(struct hisi_qm *qm)
+static int qm_try_start_vfs(struct hisi_qm *qm, enum qm_mb_cmd cmd)
 {
 	struct pci_dev *pdev = qm->pdev;
 	int ret;
@@ -4257,9 +4357,16 @@ static int qm_try_start_vfs(struct hisi_qm *qm)
 		return ret;
 	}
 
-	ret = qm_vf_reset_done(qm);
-	if (ret)
-		pci_warn(pdev, "failed to start vfs, ret = %d.\n", ret);
+	/* Kunpeng930 supports to notify VFs to start after PF reset. */
+	if (qm->ops->ping_all_vfs) {
+		ret = qm->ops->ping_all_vfs(qm, cmd);
+		if (ret)
+			pci_warn(pdev, "failed to send cmd to all VFs after PF reset!\n");
+	} else {
+		ret = qm_vf_reset_done(qm);
+		if (ret)
+			pci_warn(pdev, "failed to start vfs, ret = %d.\n", ret);
+	}
 
 	return ret;
 }
@@ -4363,7 +4470,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
 		return ret;
 	}
 
-	ret = qm_try_start_vfs(qm);
+	ret = qm_try_start_vfs(qm, QM_PF_RESET_DONE);
 	if (ret)
 		pci_err(pdev, "failed to start vfs by pf in soft reset.\n");
 
@@ -4371,6 +4478,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
 	if (ret)
 		pci_err(pdev, "failed to start by vfs in soft reset!\n");
 
+	qm_cmd_init(qm);
 	qm_restart_done(qm);
 
 	qm_reset_bit_clear(qm);
@@ -4462,7 +4570,11 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev)
 		return;
 	}
 
-	ret = qm_try_stop_vfs(qm, QM_SOFT_RESET);
+	/* PF obtains the information of VF by querying the register. */
+	if (qm->fun_type == QM_HW_PF)
+		qm_cmd_uninit(qm);
+
+	ret = qm_try_stop_vfs(qm, QM_PF_FLR_PREPARE, QM_FLR);
 	if (ret)
 		pci_err(pdev, "failed to stop vfs by pf in FLR.\n");
 
@@ -4517,7 +4629,7 @@ void hisi_qm_reset_done(struct pci_dev *pdev)
 		goto flr_done;
 	}
 
-	ret = qm_try_start_vfs(qm);
+	ret = qm_try_start_vfs(qm, QM_PF_RESET_DONE);
 	if (ret)
 		pci_err(pdev, "failed to start vfs by pf in FLR.\n");
 
@@ -4526,6 +4638,9 @@ void hisi_qm_reset_done(struct pci_dev *pdev)
 		pci_err(pdev, "failed to start by vfs in FLR!\n");
 
 flr_done:
+	if (qm->fun_type == QM_HW_PF)
+		qm_cmd_init(qm);
+
 	if (qm_flr_reset_complete(pdev))
 		pci_info(pdev, "FLR reset complete\n");
 
@@ -4621,12 +4736,128 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
 
 }
 
+static void qm_pf_reset_vf_prepare(struct hisi_qm *qm,
+				   enum qm_stop_reason stop_reason)
+{
+	enum qm_mb_cmd cmd = QM_VF_PREPARE_DONE;
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+
+	ret = qm_reset_prepare_ready(qm);
+	if (ret) {
+		dev_err(&pdev->dev, "reset prepare not ready!\n");
+		atomic_set(&qm->status.flags, QM_STOP);
+		cmd = QM_VF_PREPARE_FAIL;
+		goto err_prepare;
+	}
+
+	ret = hisi_qm_stop(qm, stop_reason);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to stop QM, ret = %d.\n", ret);
+		atomic_set(&qm->status.flags, QM_STOP);
+		cmd = QM_VF_PREPARE_FAIL;
+		goto err_prepare;
+	}
+
+err_prepare:
+	pci_save_state(pdev);
+	ret = qm->ops->ping_pf(qm, cmd);
+	if (ret)
+		dev_warn(&pdev->dev, "PF responds timeout in reset prepare!\n");
+}
+
+static void qm_pf_reset_vf_done(struct hisi_qm *qm)
+{
+	enum qm_mb_cmd cmd = QM_VF_START_DONE;
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+
+	pci_restore_state(pdev);
+	ret = hisi_qm_start(qm);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to start QM, ret = %d.\n", ret);
+		cmd = QM_VF_START_FAIL;
+	}
+
+	ret = qm->ops->ping_pf(qm, cmd);
+	if (ret)
+		dev_warn(&pdev->dev, "PF responds timeout in reset done!\n");
+
+	qm_reset_bit_clear(qm);
+}
+
+static int qm_wait_pf_reset_finish(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	u32 val, cmd;
+	u64 msg;
+	int ret;
+
+	/* Wait for reset to finish */
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_IFC_INT_SOURCE_V, val,
+					 val == BIT(0), QM_VF_RESET_WAIT_US,
+					 QM_VF_RESET_WAIT_TIMEOUT_US);
+	/* hardware completion status should be available by this time */
+	if (ret) {
+		dev_err(dev, "couldn't get reset done status from PF, timeout!\n");
+		return -ETIMEDOUT;
+	}
+
+	/*
+	 * Whether message is got successfully,
+	 * VF needs to ack PF by clearing the interrupt.
+	 */
+	ret = qm_get_mb_cmd(qm, &msg, 0);
+	qm_clear_cmd_interrupt(qm, 0);
+	if (ret) {
+		dev_err(dev, "failed to get msg from PF in reset done!\n");
+		return ret;
+	}
+
+	cmd = msg & QM_MB_CMD_DATA_MASK;
+	if (cmd != QM_PF_RESET_DONE) {
+		dev_err(dev, "the cmd(%u) is not reset done!\n", cmd);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static void qm_pf_reset_vf_process(struct hisi_qm *qm,
+				   enum qm_stop_reason stop_reason)
+{
+	struct device *dev = &qm->pdev->dev;
+	int ret;
+
+	dev_info(dev, "device reset start...\n");
+
+	/* The message is obtained by querying the register during resetting */
+	qm_cmd_uninit(qm);
+	qm_pf_reset_vf_prepare(qm, stop_reason);
+
+	ret = qm_wait_pf_reset_finish(qm);
+	if (ret)
+		goto err_get_status;
+
+	qm_pf_reset_vf_done(qm);
+	qm_cmd_init(qm);
+
+	dev_info(dev, "device reset done.\n");
+
+	return;
+
+err_get_status:
+	qm_cmd_init(qm);
+	qm_reset_bit_clear(qm);
+}
+
 static void qm_cmd_process(struct work_struct *cmd_process)
 {
 	struct hisi_qm *qm = container_of(cmd_process,
 					struct hisi_qm, cmd_process);
 	struct device *dev = &qm->pdev->dev;
 	u64 msg;
+	u32 cmd;
 	int ret;
 
 	/*
@@ -4635,9 +4866,23 @@ static void qm_cmd_process(struct work_struct *cmd_process)
 	 */
 	ret = qm_get_mb_cmd(qm, &msg, 0);
 	qm_clear_cmd_interrupt(qm, 0);
-	if (ret)
+	if (ret) {
 		dev_err(dev, "failed to get msg from source!\n");
+		return;
+	}
 
+	cmd = msg & QM_MB_CMD_DATA_MASK;
+	switch (cmd) {
+	case QM_PF_FLR_PREPARE:
+		qm_pf_reset_vf_process(qm, QM_FLR);
+		break;
+	case QM_PF_SRST_PREPARE:
+		qm_pf_reset_vf_process(qm, QM_SOFT_RESET);
+		break;
+	default:
+		dev_err(dev, "unsupported cmd %u sent by PF!\n", cmd);
+		break;
+	}
 }
 
 /**
-- 
GitLab


From 7ee0e638a526b2d1f09c714f86d82dfd7628f322 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 1 Jun 2021 21:30:56 +0200
Subject: [PATCH 2155/3804] x86/alternative: Align insn bytes vertically

For easier inspection which bytes have changed.

For example:

  feat: 7*32+12, old: (__x86_indirect_thunk_r10+0x0/0x20 (ffffffff81c02480) len: 17), repl: (ffffffff897813aa, len: 17)
  ffffffff81c02480:   old_insn: 41 ff e2 90 90 90 90 90 90 90 90 90 90 90 90 90 90
  ffffffff897813aa:   rpl_insn: e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 14 24 c3
  ffffffff81c02480: final_insn: e8 07 00 00 00 f3 90 0f ae e8 eb f9 4c 89 14 24 c3

No functional changes.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210601193713.16190-1-bp@alien8.de
---
 arch/x86/kernel/alternative.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 75c752b0628c1..227c4a8b145a3 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -273,8 +273,8 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 			instr, instr, a->instrlen,
 			replacement, a->replacementlen);
 
-		DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
-		DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
+		DUMP_BYTES(instr, a->instrlen, "%px:   old_insn: ", instr);
+		DUMP_BYTES(replacement, a->replacementlen, "%px:   rpl_insn: ", replacement);
 
 		memcpy(insn_buff, replacement, a->replacementlen);
 		insn_buff_sz = a->replacementlen;
-- 
GitLab


From acbef0922c7db4f5ca57d6b5573f104baa485e88 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 3 Jun 2021 15:24:25 +0800
Subject: [PATCH 2156/3804] dmaengine: ipu: fix doc warning in ipu_irq.c

Fix the following make W=1 warning and correct description:

  drivers/dma/ipu/ipu_irq.c:238: warning: expecting prototype for ipu_irq_map(). Prototype was for ipu_irq_unmap() instead

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20210603072425.2973570-1-yangyingliang@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ipu/ipu_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
index 0d5c42f7bfa4f..97d9a6f04f2a2 100644
--- a/drivers/dma/ipu/ipu_irq.c
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -230,7 +230,7 @@ out:
 }
 
 /**
- * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * ipu_irq_unmap() - unmap an IPU interrupt source
  * @source:	interrupt source bit position (see ipu_irq_map())
  * @return:	0 or negative error code
  */
-- 
GitLab


From 320232caf1d8febea17312dab4b2dfe02e033520 Mon Sep 17 00:00:00 2001
From: Mark Pearson <markpearson@lenovo.com>
Date: Wed, 2 Jun 2021 13:12:51 -0400
Subject: [PATCH 2157/3804] ASoC: AMD Renoir: Remove fix for DMI entry on
 Lenovo 2020 platforms

Unfortunately the previous patch to fix issues using the AMD ACP bridge
has the side effect of breaking the dmic in other cases and needs to be
reverted.

Removing the changes while we revisit the fix and find something better.
Apologies for the churn.

Suggested-by: Gabriel Craciunescu <unix.or.die@gmail.com>
Signed-off-by: Mark Pearson <markpearson@lenovo.com>
Link: https://lore.kernel.org/r/20210602171251.3243-1-markpearson@lenovo.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/amd/renoir/rn-pci-acp3x.c | 35 -----------------------------
 1 file changed, 35 deletions(-)

diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c
index c9fb1c8fbf8c6..19438da5dfa5d 100644
--- a/sound/soc/amd/renoir/rn-pci-acp3x.c
+++ b/sound/soc/amd/renoir/rn-pci-acp3x.c
@@ -199,41 +199,6 @@ static const struct dmi_system_id rn_acp_quirk_table[] = {
 			DMI_EXACT_MATCH(DMI_BOARD_NAME, "20NLCTO1WW"),
 		}
 	},
-	{
-		/* Lenovo ThinkPad P14s Gen 1 (20Y1) */
-		.matches = {
-			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_BOARD_NAME, "20Y1"),
-		}
-	},
-	{
-		/* Lenovo ThinkPad T14s Gen1 */
-		.matches = {
-			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_BOARD_NAME, "20UH"),
-		}
-	},
-	{
-		/* Lenovo ThinkPad T14s Gen1 Campus */
-		.matches = {
-			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_BOARD_NAME, "20UJ"),
-		}
-	},
-	{
-		/* Lenovo ThinkPad T14 Gen 1*/
-		.matches = {
-			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_BOARD_NAME, "20UD"),
-		}
-	},
-	{
-		/* Lenovo ThinkPad X13 Gen 1*/
-		.matches = {
-			DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-			DMI_MATCH(DMI_BOARD_NAME, "20UF"),
-		}
-	},
 	{}
 };
 
-- 
GitLab


From d031d99b02eaf7363c33f5b27b38086cc8104082 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 24 May 2021 11:34:48 +0200
Subject: [PATCH 2158/3804] ASoC: meson: gx-card: fix sound-dai dt schema

There is a fair amount of warnings when running 'make dtbs_check' with
amlogic,gx-sound-card.yaml.

Ex:
arch/arm64/boot/dts/amlogic/meson-gxm-q200.dt.yaml: sound: dai-link-0:sound-dai:0:1: missing phandle tag in 0
arch/arm64/boot/dts/amlogic/meson-gxm-q200.dt.yaml: sound: dai-link-0:sound-dai:0:2: missing phandle tag in 0
arch/arm64/boot/dts/amlogic/meson-gxm-q200.dt.yaml: sound: dai-link-0:sound-dai:0: [66, 0, 0] is too long

The reason is that the sound-dai phandle provided has cells, and in such
case the schema should use 'phandle-array' instead of 'phandle'.

Fixes: fd00366b8e41 ("ASoC: meson: gx: add sound card dt-binding documentation")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://lore.kernel.org/r/20210524093448.357140-1-jbrunet@baylibre.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/sound/amlogic,gx-sound-card.yaml      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml b/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml
index db61f0731a203..2e35aeaa8781d 100644
--- a/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml
+++ b/Documentation/devicetree/bindings/sound/amlogic,gx-sound-card.yaml
@@ -57,7 +57,7 @@ patternProperties:
           rate
 
       sound-dai:
-        $ref: /schemas/types.yaml#/definitions/phandle
+        $ref: /schemas/types.yaml#/definitions/phandle-array
         description: phandle of the CPU DAI
 
     patternProperties:
@@ -71,7 +71,7 @@ patternProperties:
 
         properties:
           sound-dai:
-            $ref: /schemas/types.yaml#/definitions/phandle
+            $ref: /schemas/types.yaml#/definitions/phandle-array
             description: phandle of the codec DAI
 
         required:
-- 
GitLab


From 8bef925e37bdc9b6554b85eda16ced9a8e3c135f Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Sun, 30 May 2021 22:34:46 +0200
Subject: [PATCH 2159/3804] ASoC: tas2562: Fix TDM_CFG0_SAMPRATE values

TAS2562_TDM_CFG0_SAMPRATE_MASK starts at bit 1, not 0.
So all values need to be left shifted by 1.

Signed-off-by: Richard Weinberger <richard@nod.at>
Link: https://lore.kernel.org/r/20210530203446.19022-1-richard@nod.at
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/tas2562.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sound/soc/codecs/tas2562.h b/sound/soc/codecs/tas2562.h
index 81866aeb3fbfa..55b2a1f52ca37 100644
--- a/sound/soc/codecs/tas2562.h
+++ b/sound/soc/codecs/tas2562.h
@@ -57,13 +57,13 @@
 #define TAS2562_TDM_CFG0_RAMPRATE_MASK		BIT(5)
 #define TAS2562_TDM_CFG0_RAMPRATE_44_1		BIT(5)
 #define TAS2562_TDM_CFG0_SAMPRATE_MASK		GENMASK(3, 1)
-#define TAS2562_TDM_CFG0_SAMPRATE_7305_8KHZ	0x0
-#define TAS2562_TDM_CFG0_SAMPRATE_14_7_16KHZ	0x1
-#define TAS2562_TDM_CFG0_SAMPRATE_22_05_24KHZ	0x2
-#define TAS2562_TDM_CFG0_SAMPRATE_29_4_32KHZ	0x3
-#define TAS2562_TDM_CFG0_SAMPRATE_44_1_48KHZ	0x4
-#define TAS2562_TDM_CFG0_SAMPRATE_88_2_96KHZ	0x5
-#define TAS2562_TDM_CFG0_SAMPRATE_176_4_192KHZ	0x6
+#define TAS2562_TDM_CFG0_SAMPRATE_7305_8KHZ	(0x0 << 1)
+#define TAS2562_TDM_CFG0_SAMPRATE_14_7_16KHZ	(0x1 << 1)
+#define TAS2562_TDM_CFG0_SAMPRATE_22_05_24KHZ	(0x2 << 1)
+#define TAS2562_TDM_CFG0_SAMPRATE_29_4_32KHZ	(0x3 << 1)
+#define TAS2562_TDM_CFG0_SAMPRATE_44_1_48KHZ	(0x4 << 1)
+#define TAS2562_TDM_CFG0_SAMPRATE_88_2_96KHZ	(0x5 << 1)
+#define TAS2562_TDM_CFG0_SAMPRATE_176_4_192KHZ	(0x6 << 1)
 
 #define TAS2562_TDM_CFG2_RIGHT_JUSTIFY	BIT(6)
 
-- 
GitLab


From 50bec7fb4cb1bcf9d387046b6dec7186590791ec Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 29 May 2021 09:32:36 +0800
Subject: [PATCH 2160/3804] regulator: hi6421v600: Fix .vsel_mask setting

Take ldo3_voltages as example, the ARRAY_SIZE(ldo3_voltages) is 16.
i.e. the valid selector is 0 ~ 0xF.
But in current code the vsel_mask is "(1 << 15) - 1", i.e. 0x7FFF. Fix it.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210529013236.373847-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/staging/hikey9xx/hi6421v600-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/hikey9xx/hi6421v600-regulator.c b/drivers/staging/hikey9xx/hi6421v600-regulator.c
index 612b964b13406..d6340bb492967 100644
--- a/drivers/staging/hikey9xx/hi6421v600-regulator.c
+++ b/drivers/staging/hikey9xx/hi6421v600-regulator.c
@@ -83,7 +83,7 @@ static const unsigned int ldo34_voltages[] = {
 			.owner		= THIS_MODULE,			       \
 			.volt_table	= vtable,			       \
 			.n_voltages	= ARRAY_SIZE(vtable),		       \
-			.vsel_mask	= (1 << (ARRAY_SIZE(vtable) - 1)) - 1, \
+			.vsel_mask	= ARRAY_SIZE(vtable) - 1,	       \
 			.vsel_reg	= vreg,				       \
 			.enable_reg	= ereg,				       \
 			.enable_mask	= emask,			       \
-- 
GitLab


From d38fa9a155b2829b7e2cfcf8a4171b6dd3672808 Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@foss.st.com>
Date: Thu, 3 Jun 2021 09:34:21 +0200
Subject: [PATCH 2161/3804] spi: stm32-qspi: Always wait BUSY bit to be cleared
 in stm32_qspi_wait_cmd()

In U-boot side, an issue has been encountered when QSPI source clock is
running at low frequency (24 MHz for example), waiting for TCF bit to be
set didn't ensure that all data has been send out the FIFO, we should also
wait that BUSY bit is cleared.

To prevent similar issue in kernel driver, we implement similar behavior
by always waiting BUSY bit to be cleared.

Signed-off-by: Patrice Chotard <patrice.chotard@foss.st.com>
Link: https://lore.kernel.org/r/20210603073421.8441-1-patrice.chotard@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32-qspi.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
index 7e640ccc7e774..594f641362086 100644
--- a/drivers/spi/spi-stm32-qspi.c
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -294,7 +294,7 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
 	int err = 0;
 
 	if (!op->data.nbytes)
-		return stm32_qspi_wait_nobusy(qspi);
+		goto wait_nobusy;
 
 	if (readl_relaxed(qspi->io_base + QSPI_SR) & SR_TCF)
 		goto out;
@@ -315,6 +315,9 @@ static int stm32_qspi_wait_cmd(struct stm32_qspi *qspi,
 out:
 	/* clear flags */
 	writel_relaxed(FCR_CTCF | FCR_CTEF, qspi->io_base + QSPI_FCR);
+wait_nobusy:
+	if (!err)
+		err = stm32_qspi_wait_nobusy(qspi);
 
 	return err;
 }
-- 
GitLab


From 8f4ef0788c68bf99370a91df5cb83f90d707583e Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 23 May 2021 15:23:19 +0800
Subject: [PATCH 2162/3804] regulator: max77802: Remove .set_ramp_delay from
 max77802_buck_dvs_ops

max77802_set_ramp_delay_2bit() returns -EINVAL when id > MAX77802_BUCK4.
This was a leftover in commit b0615f1da543
("regulator: max77802: Split regulator operations for BUCKs").

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210523072320.2174443-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77802-regulator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/regulator/max77802-regulator.c b/drivers/regulator/max77802-regulator.c
index 7b8ec8c0bd151..903c6fc10efbc 100644
--- a/drivers/regulator/max77802-regulator.c
+++ b/drivers/regulator/max77802-regulator.c
@@ -345,7 +345,6 @@ static const struct regulator_ops max77802_buck_dvs_ops = {
 	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
 	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
 	.set_voltage_time_sel	= regulator_set_voltage_time_sel,
-	.set_ramp_delay		= max77802_set_ramp_delay_2bit,
 	.set_suspend_disable	= max77802_set_suspend_disable,
 };
 
-- 
GitLab


From 8cdded982a6cf95d5ed7e3a014fb3d8dde6b3a94 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 23 May 2021 15:23:20 +0800
Subject: [PATCH 2163/3804] regulator: max77802: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210523072320.2174443-2-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77802-regulator.c | 69 +++++---------------------
 1 file changed, 12 insertions(+), 57 deletions(-)

diff --git a/drivers/regulator/max77802-regulator.c b/drivers/regulator/max77802-regulator.c
index 903c6fc10efbc..21e0eb0f43f94 100644
--- a/drivers/regulator/max77802-regulator.c
+++ b/drivers/regulator/max77802-regulator.c
@@ -43,15 +43,14 @@
 #define MAX77802_OFF_PWRREQ		0x1
 #define MAX77802_LP_PWRREQ		0x2
 
-/* MAX77802 has two register formats: 2-bit and 4-bit */
-static const unsigned int ramp_table_77802_2bit[] = {
+static const unsigned int max77802_buck234_ramp_table[] = {
 	12500,
 	25000,
 	50000,
 	100000,
 };
 
-static unsigned int ramp_table_77802_4bit[] = {
+static const unsigned int max77802_buck16_ramp_table[] = {
 	1000,	2000,	3030,	4000,
 	5000,	5880,	7140,	8330,
 	9090,	10000,	11110,	12500,
@@ -221,58 +220,6 @@ static int max77802_enable(struct regulator_dev *rdev)
 				  max77802->opmode[id] << shift);
 }
 
-static int max77802_find_ramp_value(struct regulator_dev *rdev,
-				    const unsigned int limits[], int size,
-				    unsigned int ramp_delay)
-{
-	int i;
-
-	for (i = 0; i < size; i++) {
-		if (ramp_delay <= limits[i])
-			return i;
-	}
-
-	/* Use maximum value for no ramp control */
-	dev_warn(&rdev->dev, "%s: ramp_delay: %d not supported, setting 100000\n",
-		 rdev->desc->name, ramp_delay);
-	return size - 1;
-}
-
-/* Used for BUCKs 2-4 */
-static int max77802_set_ramp_delay_2bit(struct regulator_dev *rdev,
-					int ramp_delay)
-{
-	int id = rdev_get_id(rdev);
-	unsigned int ramp_value;
-
-	if (id > MAX77802_BUCK4) {
-		dev_warn(&rdev->dev,
-			 "%s: regulator: ramp delay not supported\n",
-			 rdev->desc->name);
-		return -EINVAL;
-	}
-	ramp_value = max77802_find_ramp_value(rdev, ramp_table_77802_2bit,
-				ARRAY_SIZE(ramp_table_77802_2bit), ramp_delay);
-
-	return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
-				  MAX77802_RAMP_RATE_MASK_2BIT,
-				  ramp_value << MAX77802_RAMP_RATE_SHIFT_2BIT);
-}
-
-/* For BUCK1, 6 */
-static int max77802_set_ramp_delay_4bit(struct regulator_dev *rdev,
-					    int ramp_delay)
-{
-	unsigned int ramp_value;
-
-	ramp_value = max77802_find_ramp_value(rdev, ramp_table_77802_4bit,
-				ARRAY_SIZE(ramp_table_77802_4bit), ramp_delay);
-
-	return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
-				  MAX77802_RAMP_RATE_MASK_4BIT,
-				  ramp_value << MAX77802_RAMP_RATE_SHIFT_4BIT);
-}
-
 /*
  * LDOs 2, 4-19, 22-35
  */
@@ -316,7 +263,7 @@ static const struct regulator_ops max77802_buck_16_dvs_ops = {
 	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
 	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
 	.set_voltage_time_sel	= regulator_set_voltage_time_sel,
-	.set_ramp_delay		= max77802_set_ramp_delay_4bit,
+	.set_ramp_delay		= regulator_set_ramp_delay_regmap,
 	.set_suspend_disable	= max77802_set_suspend_disable,
 };
 
@@ -330,7 +277,7 @@ static const struct regulator_ops max77802_buck_234_ops = {
 	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
 	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
 	.set_voltage_time_sel	= regulator_set_voltage_time_sel,
-	.set_ramp_delay		= max77802_set_ramp_delay_2bit,
+	.set_ramp_delay		= regulator_set_ramp_delay_regmap,
 	.set_suspend_disable	= max77802_set_suspend_disable,
 	.set_suspend_mode	= max77802_set_suspend_mode,
 };
@@ -408,6 +355,10 @@ static const struct regulator_ops max77802_buck_dvs_ops = {
 	.vsel_mask	= MAX77802_DVS_VSEL_MASK,			\
 	.enable_reg	= MAX77802_REG_BUCK ## num ## CTRL,		\
 	.enable_mask	= MAX77802_OPMODE_MASK,				\
+	.ramp_reg	= MAX77802_REG_BUCK ## num ## CTRL,		\
+	.ramp_mask	= MAX77802_RAMP_RATE_MASK_4BIT,			\
+	.ramp_delay_table = max77802_buck16_ramp_table,			\
+	.n_ramp_values	= ARRAY_SIZE(max77802_buck16_ramp_table),	\
 	.of_map_mode	= max77802_map_mode,				\
 }
 
@@ -430,6 +381,10 @@ static const struct regulator_ops max77802_buck_dvs_ops = {
 	.enable_reg	= MAX77802_REG_BUCK ## num ## CTRL1,		\
 	.enable_mask	= MAX77802_OPMODE_MASK <<			\
 				MAX77802_OPMODE_BUCK234_SHIFT,		\
+	.ramp_reg	= MAX77802_REG_BUCK ## num ## CTRL1,		\
+	.ramp_mask	= MAX77802_RAMP_RATE_MASK_2BIT,			\
+	.ramp_delay_table = max77802_buck234_ramp_table,		\
+	.n_ramp_values	= ARRAY_SIZE(max77802_buck234_ramp_table),	\
 	.of_map_mode	= max77802_map_mode,				\
 }
 
-- 
GitLab


From 30b38b805b36c03db3703ef62397111c783b5f3b Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 25 May 2021 20:40:16 +0800
Subject: [PATCH 2164/3804] regulator: fan53555: Fix missing
 slew_reg/mask/shift settings for FAN53526

The di->slew_reg/di->slew_mask/di->slew_shift was not set in current code,
fix it.

Fixes: f2a9eb975ab2 ("regulator: fan53555: Add support for FAN53526")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210525124017.2550029-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53555.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index f3f49cf3731b7..9770a4df83d48 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -296,6 +296,9 @@ static int fan53526_voltages_setup_fairchild(struct fan53555_device_info *di)
 		return -EINVAL;
 	}
 
+	di->slew_reg = FAN53555_CONTROL;
+	di->slew_mask = CTL_SLEW_MASK;
+	di->slew_shift = CTL_SLEW_SHIFT;
 	di->vsel_count = FAN53526_NVOLTAGES;
 
 	return 0;
-- 
GitLab


From b61ac767db4d62540732cdac9f1820e56b9a5008 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 25 May 2021 20:40:17 +0800
Subject: [PATCH 2165/3804] regulator: fan53555: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210525124017.2550029-2-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53555.c | 63 ++++++++++--------------------------
 1 file changed, 17 insertions(+), 46 deletions(-)

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index 9770a4df83d48..eb67500ad279e 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -126,7 +126,8 @@ struct fan53555_device_info {
 	/* Slew rate */
 	unsigned int slew_reg;
 	unsigned int slew_mask;
-	unsigned int slew_shift;
+	const unsigned int *ramp_delay_table;
+	unsigned int n_ramp_values;
 	unsigned int slew_rate;
 };
 
@@ -200,7 +201,7 @@ static unsigned int fan53555_get_mode(struct regulator_dev *rdev)
 		return REGULATOR_MODE_NORMAL;
 }
 
-static const int slew_rates[] = {
+static const unsigned int slew_rates[] = {
 	64000,
 	32000,
 	16000,
@@ -211,51 +212,13 @@ static const int slew_rates[] = {
 	  500,
 };
 
-static const int tcs_slew_rates[] = {
+static const unsigned int tcs_slew_rates[] = {
 	18700,
 	 9300,
 	 4600,
 	 2300,
 };
 
-static int fan53555_set_ramp(struct regulator_dev *rdev, int ramp)
-{
-	struct fan53555_device_info *di = rdev_get_drvdata(rdev);
-	int regval = -1, i;
-	const int *slew_rate_t;
-	int slew_rate_n;
-
-	switch (di->vendor) {
-	case FAN53526_VENDOR_FAIRCHILD:
-	case FAN53555_VENDOR_FAIRCHILD:
-	case FAN53555_VENDOR_SILERGY:
-		slew_rate_t = slew_rates;
-		slew_rate_n = ARRAY_SIZE(slew_rates);
-		break;
-	case FAN53526_VENDOR_TCS:
-		slew_rate_t = tcs_slew_rates;
-		slew_rate_n = ARRAY_SIZE(tcs_slew_rates);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	for (i = 0; i < slew_rate_n; i++) {
-		if (ramp <= slew_rate_t[i])
-			regval = i;
-		else
-			break;
-	}
-
-	if (regval < 0) {
-		dev_err(di->dev, "unsupported ramp value %d\n", ramp);
-		return -EINVAL;
-	}
-
-	return regmap_update_bits(rdev->regmap, di->slew_reg,
-				  di->slew_mask, regval << di->slew_shift);
-}
-
 static const struct regulator_ops fan53555_regulator_ops = {
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
@@ -268,7 +231,7 @@ static const struct regulator_ops fan53555_regulator_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.set_mode = fan53555_set_mode,
 	.get_mode = fan53555_get_mode,
-	.set_ramp_delay = fan53555_set_ramp,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
 	.set_suspend_enable = fan53555_set_suspend_enable,
 	.set_suspend_disable = fan53555_set_suspend_disable,
 };
@@ -298,7 +261,8 @@ static int fan53526_voltages_setup_fairchild(struct fan53555_device_info *di)
 
 	di->slew_reg = FAN53555_CONTROL;
 	di->slew_mask = CTL_SLEW_MASK;
-	di->slew_shift = CTL_SLEW_SHIFT;
+	di->ramp_delay_table = slew_rates;
+	di->n_ramp_values = ARRAY_SIZE(slew_rates);
 	di->vsel_count = FAN53526_NVOLTAGES;
 
 	return 0;
@@ -343,7 +307,8 @@ static int fan53555_voltages_setup_fairchild(struct fan53555_device_info *di)
 	}
 	di->slew_reg = FAN53555_CONTROL;
 	di->slew_mask = CTL_SLEW_MASK;
-	di->slew_shift = CTL_SLEW_SHIFT;
+	di->ramp_delay_table = slew_rates;
+	di->n_ramp_values = ARRAY_SIZE(slew_rates);
 	di->vsel_count = FAN53555_NVOLTAGES;
 
 	return 0;
@@ -365,7 +330,8 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di)
 	}
 	di->slew_reg = FAN53555_CONTROL;
 	di->slew_mask = CTL_SLEW_MASK;
-	di->slew_shift = CTL_SLEW_SHIFT;
+	di->ramp_delay_table = slew_rates;
+	di->n_ramp_values = ARRAY_SIZE(slew_rates);
 	di->vsel_count = FAN53555_NVOLTAGES;
 
 	return 0;
@@ -377,7 +343,8 @@ static int fan53526_voltages_setup_tcs(struct fan53555_device_info *di)
 	case TCS4525_CHIP_ID_12:
 		di->slew_reg = TCS4525_TIME;
 		di->slew_mask = TCS_SLEW_MASK;
-		di->slew_shift = TCS_SLEW_SHIFT;
+		di->ramp_delay_table = tcs_slew_rates;
+		di->n_ramp_values = ARRAY_SIZE(tcs_slew_rates);
 
 		/* Init voltage range and step */
 		di->vsel_min = 600000;
@@ -516,6 +483,10 @@ static int fan53555_regulator_register(struct fan53555_device_info *di,
 	rdesc->uV_step = di->vsel_step;
 	rdesc->vsel_reg = di->vol_reg;
 	rdesc->vsel_mask = di->vsel_count - 1;
+	rdesc->ramp_reg = di->slew_reg;
+	rdesc->ramp_mask = di->slew_mask;
+	rdesc->ramp_delay_table = di->ramp_delay_table;
+	rdesc->n_ramp_values = di->n_ramp_values;
 	rdesc->owner = THIS_MODULE;
 
 	rdev = devm_regulator_register(di->dev, &di->desc, config);
-- 
GitLab


From 6041d5fe512cd6ceaf730cdfa1786f2bc9b5b1b5 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 25 May 2021 18:04:05 +0800
Subject: [PATCH 2166/3804] regulator: bd9576: Constify the voltage tables

Also use unsigned int instead of int for the voltage tables.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210525100405.2506483-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd9576-regulator.c | 34 ++++++++++++++++------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/regulator/bd9576-regulator.c b/drivers/regulator/bd9576-regulator.c
index 204a2da054f53..8e63169eebae3 100644
--- a/drivers/regulator/bd9576-regulator.c
+++ b/drivers/regulator/bd9576-regulator.c
@@ -21,20 +21,26 @@
 #define BD957X_VOUTS4_BASE_VOLT	1030000
 #define BD957X_VOUTS34_NUM_VOLT	32
 
-static int vout1_volt_table[] = {5000000, 4900000, 4800000, 4700000, 4600000,
-				 4500000, 4500000, 4500000, 5000000, 5100000,
-				 5200000, 5300000, 5400000, 5500000, 5500000,
-				 5500000};
-
-static int vout2_volt_table[] = {1800000, 1780000, 1760000, 1740000, 1720000,
-				 1700000, 1680000, 1660000, 1800000, 1820000,
-				 1840000, 1860000, 1880000, 1900000, 1920000,
-				 1940000};
-
-static int voutl1_volt_table[] = {2500000, 2540000, 2580000, 2620000, 2660000,
-				  2700000, 2740000, 2780000, 2500000, 2460000,
-				  2420000, 2380000, 2340000, 2300000, 2260000,
-				  2220000};
+static const unsigned int vout1_volt_table[] = {
+	5000000, 4900000, 4800000, 4700000, 4600000,
+	4500000, 4500000, 4500000, 5000000, 5100000,
+	5200000, 5300000, 5400000, 5500000, 5500000,
+	5500000
+};
+
+static const unsigned int vout2_volt_table[] = {
+	1800000, 1780000, 1760000, 1740000, 1720000,
+	1700000, 1680000, 1660000, 1800000, 1820000,
+	1840000, 1860000, 1880000, 1900000, 1920000,
+	1940000
+};
+
+static const unsigned int voutl1_volt_table[] = {
+	2500000, 2540000, 2580000, 2620000, 2660000,
+	2700000, 2740000, 2780000, 2500000, 2460000,
+	2420000, 2380000, 2340000, 2300000, 2260000,
+	2220000
+};
 
 struct bd957x_regulator_data {
 	struct regulator_desc desc;
-- 
GitLab


From c955a0cc8a286e5da1ebb88c19201e9bab8c2422 Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@foss.st.com>
Date: Tue, 18 May 2021 18:27:52 +0200
Subject: [PATCH 2167/3804] spi: spi-mem: add automatic poll status functions

With STM32 QSPI, it is possible to poll the status register of the device.
This could be done to offload the CPU during an operation (erase or
program a SPI NAND for example).

spi_mem_poll_status API has been added to handle this feature.
This new function take care of the offload/non-offload cases.

For the non-offload case, use read_poll_timeout() to poll the status in
order to release CPU during this phase.
For example, previously, when erasing large area, in non-offload case,
CPU load can reach ~50%, now it decrease to ~35%.

Signed-off-by: Patrice Chotard <patrice.chotard@foss.st.com>
Signed-off-by: Christophe Kerello <christophe.kerello@foss.st.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Link: https://lore.kernel.org/r/20210518162754.15940-2-patrice.chotard@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mem.c       | 86 +++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi-mem.h | 16 +++++++
 2 files changed, 102 insertions(+)

diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 1513553e40805..177b3e21febfd 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -6,6 +6,7 @@
  * Author: Boris Brezillon <boris.brezillon@bootlin.com>
  */
 #include <linux/dmaengine.h>
+#include <linux/iopoll.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
@@ -743,6 +744,91 @@ static inline struct spi_mem_driver *to_spi_mem_drv(struct device_driver *drv)
 	return container_of(drv, struct spi_mem_driver, spidrv.driver);
 }
 
+static int spi_mem_read_status(struct spi_mem *mem,
+			       const struct spi_mem_op *op,
+			       u16 *status)
+{
+	const u8 *bytes = (u8 *)op->data.buf.in;
+	int ret;
+
+	ret = spi_mem_exec_op(mem, op);
+	if (ret)
+		return ret;
+
+	if (op->data.nbytes > 1)
+		*status = ((u16)bytes[0] << 8) | bytes[1];
+	else
+		*status = bytes[0];
+
+	return 0;
+}
+
+/**
+ * spi_mem_poll_status() - Poll memory device status
+ * @mem: SPI memory device
+ * @op: the memory operation to execute
+ * @mask: status bitmask to ckeck
+ * @match: (status & mask) expected value
+ * @initial_delay_us: delay in us before starting to poll
+ * @polling_delay_us: time to sleep between reads in us
+ * @timeout_ms: timeout in milliseconds
+ *
+ * This function polls a status register and returns when
+ * (status & mask) == match or when the timeout has expired.
+ *
+ * Return: 0 in case of success, -ETIMEDOUT in case of error,
+ *         -EOPNOTSUPP if not supported.
+ */
+int spi_mem_poll_status(struct spi_mem *mem,
+			const struct spi_mem_op *op,
+			u16 mask, u16 match,
+			unsigned long initial_delay_us,
+			unsigned long polling_delay_us,
+			u16 timeout_ms)
+{
+	struct spi_controller *ctlr = mem->spi->controller;
+	int ret = -EOPNOTSUPP;
+	int read_status_ret;
+	u16 status;
+
+	if (op->data.nbytes < 1 || op->data.nbytes > 2 ||
+	    op->data.dir != SPI_MEM_DATA_IN)
+		return -EINVAL;
+
+	if (ctlr->mem_ops && ctlr->mem_ops->poll_status) {
+		ret = spi_mem_access_start(mem);
+		if (ret)
+			return ret;
+
+		ret = ctlr->mem_ops->poll_status(mem, op, mask, match,
+						 initial_delay_us, polling_delay_us,
+						 timeout_ms);
+
+		spi_mem_access_end(mem);
+	}
+
+	if (ret == -EOPNOTSUPP) {
+		if (!spi_mem_supports_op(mem, op))
+			return ret;
+
+		if (initial_delay_us < 10)
+			udelay(initial_delay_us);
+		else
+			usleep_range((initial_delay_us >> 2) + 1,
+				     initial_delay_us);
+
+		ret = read_poll_timeout(spi_mem_read_status, read_status_ret,
+					(read_status_ret || ((status) & mask) == match),
+					polling_delay_us, timeout_ms * 1000, false, mem,
+					op, &status);
+		if (read_status_ret)
+			return read_status_ret;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(spi_mem_poll_status);
+
 static int spi_mem_probe(struct spi_device *spi)
 {
 	struct spi_mem_driver *memdrv = to_spi_mem_drv(spi->dev.driver);
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 2b65c9edc34e7..85e2ff7b840de 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -250,6 +250,9 @@ static inline void *spi_mem_get_drvdata(struct spi_mem *mem)
  *		  the currently mapped area), and the caller of
  *		  spi_mem_dirmap_write() is responsible for calling it again in
  *		  this case.
+ * @poll_status: poll memory device status until (status & mask) == match or
+ *               when the timeout has expired. It fills the data buffer with
+ *               the last status value.
  *
  * This interface should be implemented by SPI controllers providing an
  * high-level interface to execute SPI memory operation, which is usually the
@@ -274,6 +277,12 @@ struct spi_controller_mem_ops {
 			       u64 offs, size_t len, void *buf);
 	ssize_t (*dirmap_write)(struct spi_mem_dirmap_desc *desc,
 				u64 offs, size_t len, const void *buf);
+	int (*poll_status)(struct spi_mem *mem,
+			   const struct spi_mem_op *op,
+			   u16 mask, u16 match,
+			   unsigned long initial_delay_us,
+			   unsigned long polling_rate_us,
+			   unsigned long timeout_ms);
 };
 
 /**
@@ -369,6 +378,13 @@ devm_spi_mem_dirmap_create(struct device *dev, struct spi_mem *mem,
 void devm_spi_mem_dirmap_destroy(struct device *dev,
 				 struct spi_mem_dirmap_desc *desc);
 
+int spi_mem_poll_status(struct spi_mem *mem,
+			const struct spi_mem_op *op,
+			u16 mask, u16 match,
+			unsigned long initial_delay_us,
+			unsigned long polling_delay_us,
+			u16 timeout_ms);
+
 int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv,
 				       struct module *owner);
 
-- 
GitLab


From 8941cd8d295e40f8ea1c0a5045d6d068b8e33eec Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@foss.st.com>
Date: Tue, 18 May 2021 18:27:53 +0200
Subject: [PATCH 2168/3804] mtd: spinand: use the spi-mem poll status APIs

Make use of spi-mem poll status APIs to let advanced controllers
optimize wait operations.
This should also fix the high CPU usage for system that don't have
a dedicated STATUS poll block logic.

Signed-off-by: Patrice Chotard <patrice.chotard@foss.st.com>
Signed-off-by: Christophe Kerello <christophe.kerello@foss.st.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/r/20210518162754.15940-3-patrice.chotard@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/mtd/nand/spi/core.c | 45 ++++++++++++++++++++++++++-----------
 include/linux/mtd/spinand.h | 22 ++++++++++++++++++
 2 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index 17f63f95f4a28..3131fae0c7152 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -473,20 +473,26 @@ static int spinand_erase_op(struct spinand_device *spinand,
 	return spi_mem_exec_op(spinand->spimem, &op);
 }
 
-static int spinand_wait(struct spinand_device *spinand, u8 *s)
+static int spinand_wait(struct spinand_device *spinand,
+			unsigned long initial_delay_us,
+			unsigned long poll_delay_us,
+			u8 *s)
 {
-	unsigned long timeo =  jiffies + msecs_to_jiffies(400);
+	struct spi_mem_op op = SPINAND_GET_FEATURE_OP(REG_STATUS,
+						      spinand->scratchbuf);
 	u8 status;
 	int ret;
 
-	do {
-		ret = spinand_read_status(spinand, &status);
-		if (ret)
-			return ret;
+	ret = spi_mem_poll_status(spinand->spimem, &op, STATUS_BUSY, 0,
+				  initial_delay_us,
+				  poll_delay_us,
+				  SPINAND_WAITRDY_TIMEOUT_MS);
+	if (ret)
+		return ret;
 
-		if (!(status & STATUS_BUSY))
-			goto out;
-	} while (time_before(jiffies, timeo));
+	status = *spinand->scratchbuf;
+	if (!(status & STATUS_BUSY))
+		goto out;
 
 	/*
 	 * Extra read, just in case the STATUS_READY bit has changed
@@ -526,7 +532,10 @@ static int spinand_reset_op(struct spinand_device *spinand)
 	if (ret)
 		return ret;
 
-	return spinand_wait(spinand, NULL);
+	return spinand_wait(spinand,
+			    SPINAND_RESET_INITIAL_DELAY_US,
+			    SPINAND_RESET_POLL_DELAY_US,
+			    NULL);
 }
 
 static int spinand_lock_block(struct spinand_device *spinand, u8 lock)
@@ -549,7 +558,10 @@ static int spinand_read_page(struct spinand_device *spinand,
 	if (ret)
 		return ret;
 
-	ret = spinand_wait(spinand, &status);
+	ret = spinand_wait(spinand,
+			   SPINAND_READ_INITIAL_DELAY_US,
+			   SPINAND_READ_POLL_DELAY_US,
+			   &status);
 	if (ret < 0)
 		return ret;
 
@@ -585,7 +597,10 @@ static int spinand_write_page(struct spinand_device *spinand,
 	if (ret)
 		return ret;
 
-	ret = spinand_wait(spinand, &status);
+	ret = spinand_wait(spinand,
+			   SPINAND_WRITE_INITIAL_DELAY_US,
+			   SPINAND_WRITE_POLL_DELAY_US,
+			   &status);
 	if (!ret && (status & STATUS_PROG_FAILED))
 		return -EIO;
 
@@ -768,7 +783,11 @@ static int spinand_erase(struct nand_device *nand, const struct nand_pos *pos)
 	if (ret)
 		return ret;
 
-	ret = spinand_wait(spinand, &status);
+	ret = spinand_wait(spinand,
+			   SPINAND_ERASE_INITIAL_DELAY_US,
+			   SPINAND_ERASE_POLL_DELAY_US,
+			   &status);
+
 	if (!ret && (status & STATUS_ERASE_FAILED))
 		ret = -EIO;
 
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 6bb92f26833e7..6988956b84929 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -170,6 +170,28 @@ struct spinand_op;
 struct spinand_device;
 
 #define SPINAND_MAX_ID_LEN	4
+/*
+ * For erase, write and read operation, we got the following timings :
+ * tBERS (erase) 1ms to 4ms
+ * tPROG 300us to 400us
+ * tREAD 25us to 100us
+ * In order to minimize latency, the min value is divided by 4 for the
+ * initial delay, and dividing by 20 for the poll delay.
+ * For reset, 5us/10us/500us if the device is respectively
+ * reading/programming/erasing when the RESET occurs. Since we always
+ * issue a RESET when the device is IDLE, 5us is selected for both initial
+ * and poll delay.
+ */
+#define SPINAND_READ_INITIAL_DELAY_US	6
+#define SPINAND_READ_POLL_DELAY_US	5
+#define SPINAND_RESET_INITIAL_DELAY_US	5
+#define SPINAND_RESET_POLL_DELAY_US	5
+#define SPINAND_WRITE_INITIAL_DELAY_US	75
+#define SPINAND_WRITE_POLL_DELAY_US	15
+#define SPINAND_ERASE_INITIAL_DELAY_US	250
+#define SPINAND_ERASE_POLL_DELAY_US	50
+
+#define SPINAND_WAITRDY_TIMEOUT_MS	400
 
 /**
  * struct spinand_id - SPI NAND id structure
-- 
GitLab


From 86d1c6bbae32122c5f703b2d8acccf5d4258f2bb Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@foss.st.com>
Date: Tue, 18 May 2021 18:27:54 +0200
Subject: [PATCH 2169/3804] spi: stm32-qspi: add automatic poll status feature

STM32 QSPI is able to automatically poll a specified register inside the
memory and relieve the CPU from this task.

As example, when erasing a large memory area, we got cpu load
equal to 50%. This patch allows to perform the same operation
with a cpu load around 2%.

Signed-off-by: Christophe Kerello <christophe.kerello@foss.st.com>
Signed-off-by: Patrice Chotard <patrice.chotard@foss.st.com>
Link: https://lore.kernel.org/r/20210518162754.15940-4-patrice.chotard@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32-qspi.c | 86 ++++++++++++++++++++++++++++++++----
 1 file changed, 78 insertions(+), 8 deletions(-)

diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
index 7e640ccc7e774..69fd220a28944 100644
--- a/drivers/spi/spi-stm32-qspi.c
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -36,6 +36,7 @@
 #define CR_FTIE			BIT(18)
 #define CR_SMIE			BIT(19)
 #define CR_TOIE			BIT(20)
+#define CR_APMS			BIT(22)
 #define CR_PRESC_MASK		GENMASK(31, 24)
 
 #define QSPI_DCR		0x04
@@ -53,6 +54,7 @@
 #define QSPI_FCR		0x0c
 #define FCR_CTEF		BIT(0)
 #define FCR_CTCF		BIT(1)
+#define FCR_CSMF		BIT(3)
 
 #define QSPI_DLR		0x10
 
@@ -107,6 +109,7 @@ struct stm32_qspi {
 	u32 clk_rate;
 	struct stm32_qspi_flash flash[STM32_QSPI_MAX_NORCHIP];
 	struct completion data_completion;
+	struct completion match_completion;
 	u32 fmode;
 
 	struct dma_chan *dma_chtx;
@@ -115,6 +118,7 @@ struct stm32_qspi {
 
 	u32 cr_reg;
 	u32 dcr_reg;
+	unsigned long status_timeout;
 
 	/*
 	 * to protect device configuration, could be different between
@@ -128,11 +132,20 @@ static irqreturn_t stm32_qspi_irq(int irq, void *dev_id)
 	struct stm32_qspi *qspi = (struct stm32_qspi *)dev_id;
 	u32 cr, sr;
 
+	cr = readl_relaxed(qspi->io_base + QSPI_CR);
 	sr = readl_relaxed(qspi->io_base + QSPI_SR);
 
+	if (cr & CR_SMIE && sr & SR_SMF) {
+		/* disable irq */
+		cr &= ~CR_SMIE;
+		writel_relaxed(cr, qspi->io_base + QSPI_CR);
+		complete(&qspi->match_completion);
+
+		return IRQ_HANDLED;
+	}
+
 	if (sr & (SR_TEF | SR_TCF)) {
 		/* disable irq */
-		cr = readl_relaxed(qspi->io_base + QSPI_CR);
 		cr &= ~CR_TCIE & ~CR_TEIE;
 		writel_relaxed(cr, qspi->io_base + QSPI_CR);
 		complete(&qspi->data_completion);
@@ -319,6 +332,24 @@ out:
 	return err;
 }
 
+static int stm32_qspi_wait_poll_status(struct stm32_qspi *qspi,
+				       const struct spi_mem_op *op)
+{
+	u32 cr;
+
+	reinit_completion(&qspi->match_completion);
+	cr = readl_relaxed(qspi->io_base + QSPI_CR);
+	writel_relaxed(cr | CR_SMIE, qspi->io_base + QSPI_CR);
+
+	if (!wait_for_completion_timeout(&qspi->match_completion,
+				msecs_to_jiffies(qspi->status_timeout)))
+		return -ETIMEDOUT;
+
+	writel_relaxed(FCR_CSMF, qspi->io_base + QSPI_FCR);
+
+	return 0;
+}
+
 static int stm32_qspi_get_mode(struct stm32_qspi *qspi, u8 buswidth)
 {
 	if (buswidth == 4)
@@ -332,7 +363,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
 	struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
 	struct stm32_qspi_flash *flash = &qspi->flash[mem->spi->chip_select];
 	u32 ccr, cr;
-	int timeout, err = 0;
+	int timeout, err = 0, err_poll_status = 0;
 
 	dev_dbg(qspi->dev, "cmd:%#x mode:%d.%d.%d.%d addr:%#llx len:%#x\n",
 		op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
@@ -378,6 +409,9 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
 	if (op->addr.nbytes && qspi->fmode != CCR_FMODE_MM)
 		writel_relaxed(op->addr.val, qspi->io_base + QSPI_AR);
 
+	if (qspi->fmode == CCR_FMODE_APM)
+		err_poll_status = stm32_qspi_wait_poll_status(qspi, op);
+
 	err = stm32_qspi_tx(qspi, op);
 
 	/*
@@ -387,7 +421,7 @@ static int stm32_qspi_send(struct spi_mem *mem, const struct spi_mem_op *op)
 	 *  byte of device (device size - fifo size). like device size is not
 	 *  knows, the prefetching is always stop.
 	 */
-	if (err || qspi->fmode == CCR_FMODE_MM)
+	if (err || err_poll_status || qspi->fmode == CCR_FMODE_MM)
 		goto abort;
 
 	/* wait end of tx in indirect mode */
@@ -406,15 +440,49 @@ abort:
 						    cr, !(cr & CR_ABORT), 1,
 						    STM32_ABT_TIMEOUT_US);
 
-	writel_relaxed(FCR_CTCF, qspi->io_base + QSPI_FCR);
+	writel_relaxed(FCR_CTCF | FCR_CSMF, qspi->io_base + QSPI_FCR);
 
-	if (err || timeout)
-		dev_err(qspi->dev, "%s err:%d abort timeout:%d\n",
-			__func__, err, timeout);
+	if (err || err_poll_status || timeout)
+		dev_err(qspi->dev, "%s err:%d err_poll_status:%d abort timeout:%d\n",
+			__func__, err, err_poll_status, timeout);
 
 	return err;
 }
 
+static int stm32_qspi_poll_status(struct spi_mem *mem, const struct spi_mem_op *op,
+				  u16 mask, u16 match,
+				  unsigned long initial_delay_us,
+				  unsigned long polling_rate_us,
+				  unsigned long timeout_ms)
+{
+	struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+
+	if (!spi_mem_supports_op(mem, op))
+		return -EOPNOTSUPP;
+
+	ret = pm_runtime_get_sync(qspi->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(qspi->dev);
+		return ret;
+	}
+
+	mutex_lock(&qspi->lock);
+
+	writel_relaxed(mask, qspi->io_base + QSPI_PSMKR);
+	writel_relaxed(match, qspi->io_base + QSPI_PSMAR);
+	qspi->fmode = CCR_FMODE_APM;
+	qspi->status_timeout = timeout_ms;
+
+	ret = stm32_qspi_send(mem, op);
+	mutex_unlock(&qspi->lock);
+
+	pm_runtime_mark_last_busy(qspi->dev);
+	pm_runtime_put_autosuspend(qspi->dev);
+
+	return ret;
+}
+
 static int stm32_qspi_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
 {
 	struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
@@ -527,7 +595,7 @@ static int stm32_qspi_setup(struct spi_device *spi)
 	flash->presc = presc;
 
 	mutex_lock(&qspi->lock);
-	qspi->cr_reg = 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
+	qspi->cr_reg = CR_APMS | 3 << CR_FTHRES_SHIFT | CR_SSHIFT | CR_EN;
 	writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);
 
 	/* set dcr fsize to max address */
@@ -607,6 +675,7 @@ static const struct spi_controller_mem_ops stm32_qspi_mem_ops = {
 	.exec_op	= stm32_qspi_exec_op,
 	.dirmap_create	= stm32_qspi_dirmap_create,
 	.dirmap_read	= stm32_qspi_dirmap_read,
+	.poll_status	= stm32_qspi_poll_status,
 };
 
 static int stm32_qspi_probe(struct platform_device *pdev)
@@ -661,6 +730,7 @@ static int stm32_qspi_probe(struct platform_device *pdev)
 	}
 
 	init_completion(&qspi->data_completion);
+	init_completion(&qspi->match_completion);
 
 	qspi->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(qspi->clk)) {
-- 
GitLab


From 5fa5e6dec762305a783e918a90a05369fc10e346 Mon Sep 17 00:00:00 2001
From: Dan Sneddon <dan.sneddon@microchip.com>
Date: Wed, 2 Jun 2021 09:08:14 -0700
Subject: [PATCH 2170/3804] spi: atmel: Switch to transfer_one transfer method

Switch from using our own transfer_one_message routine to using the one
provided by the SPI core.

Signed-off-by: Dan Sneddon <dan.sneddon@microchip.com>
Link: https://lore.kernel.org/r/20210602160816.4890-1-dan.sneddon@microchip.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-atmel.c | 124 +++++++++++-----------------------------
 1 file changed, 33 insertions(+), 91 deletions(-)

diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 7cd5fe00dfc11..0e693e72c9229 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -863,7 +863,6 @@ static int atmel_spi_set_xfer_speed(struct atmel_spi *as,
  * lock is held, spi irq is blocked
  */
 static void atmel_spi_pdc_next_xfer(struct spi_master *master,
-					struct spi_message *msg,
 					struct spi_transfer *xfer)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(master);
@@ -879,12 +878,12 @@ static void atmel_spi_pdc_next_xfer(struct spi_master *master,
 	spi_writel(as, RPR, rx_dma);
 	spi_writel(as, TPR, tx_dma);
 
-	if (msg->spi->bits_per_word > 8)
+	if (xfer->bits_per_word > 8)
 		len >>= 1;
 	spi_writel(as, RCR, len);
 	spi_writel(as, TCR, len);
 
-	dev_dbg(&msg->spi->dev,
+	dev_dbg(&master->dev,
 		"  start xfer %p: len %u tx %p/%08llx rx %p/%08llx\n",
 		xfer, xfer->len, xfer->tx_buf,
 		(unsigned long long)xfer->tx_dma, xfer->rx_buf,
@@ -898,12 +897,12 @@ static void atmel_spi_pdc_next_xfer(struct spi_master *master,
 		spi_writel(as, RNPR, rx_dma);
 		spi_writel(as, TNPR, tx_dma);
 
-		if (msg->spi->bits_per_word > 8)
+		if (xfer->bits_per_word > 8)
 			len >>= 1;
 		spi_writel(as, RNCR, len);
 		spi_writel(as, TNCR, len);
 
-		dev_dbg(&msg->spi->dev,
+		dev_dbg(&master->dev,
 			"  next xfer %p: len %u tx %p/%08llx rx %p/%08llx\n",
 			xfer, xfer->len, xfer->tx_buf,
 			(unsigned long long)xfer->tx_dma, xfer->rx_buf,
@@ -1273,12 +1272,28 @@ static int atmel_spi_setup(struct spi_device *spi)
 	return 0;
 }
 
+static void atmel_spi_set_cs(struct spi_device *spi, bool enable)
+{
+	struct atmel_spi *as = spi_master_get_devdata(spi->master);
+	/* the core doesn't really pass us enable/disable, but CS HIGH vs CS LOW
+	 * since we already have routines for activate/deactivate translate
+	 * high/low to active/inactive
+	 */
+	enable = (!!(spi->mode & SPI_CS_HIGH) == enable);
+
+	if (enable) {
+		cs_activate(as, spi);
+	} else {
+		cs_deactivate(as, spi);
+	}
+
+}
+
 static int atmel_spi_one_transfer(struct spi_master *master,
-					struct spi_message *msg,
+					struct spi_device *spi,
 					struct spi_transfer *xfer)
 {
 	struct atmel_spi	*as;
-	struct spi_device	*spi = msg->spi;
 	u8			bits;
 	u32			len;
 	struct atmel_spi_device	*asd;
@@ -1287,11 +1302,8 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 	unsigned long		dma_timeout;
 
 	as = spi_master_get_devdata(master);
-
-	if (!(xfer->tx_buf || xfer->rx_buf) && xfer->len) {
-		dev_dbg(&spi->dev, "missing rx or tx buf\n");
-		return -EINVAL;
-	}
+	/* This lock was orignally taken in atmel_spi_trasfer_one_message */
+	atmel_spi_lock(as);
 
 	asd = spi->controller_state;
 	bits = (asd->csr >> 4) & 0xf;
@@ -1305,13 +1317,13 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 	 * DMA map early, for performance (empties dcache ASAP) and
 	 * better fault reporting.
 	 */
-	if ((!msg->is_dma_mapped)
+	if ((!master->cur_msg_mapped)
 		&& as->use_pdc) {
 		if (atmel_spi_dma_map_xfer(as, xfer) < 0)
 			return -ENOMEM;
 	}
 
-	atmel_spi_set_xfer_speed(as, msg->spi, xfer);
+	atmel_spi_set_xfer_speed(as, spi, xfer);
 
 	as->done_status = 0;
 	as->current_transfer = xfer;
@@ -1320,7 +1332,7 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 		reinit_completion(&as->xfer_completion);
 
 		if (as->use_pdc) {
-			atmel_spi_pdc_next_xfer(master, msg, xfer);
+			atmel_spi_pdc_next_xfer(master, xfer);
 		} else if (atmel_spi_use_dma(as, xfer)) {
 			len = as->current_remaining_bytes;
 			ret = atmel_spi_next_xfer_dma_submit(master,
@@ -1328,7 +1340,8 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 			if (ret) {
 				dev_err(&spi->dev,
 					"unable to use DMA, fallback to PIO\n");
-				atmel_spi_next_xfer_pio(master, xfer);
+				as->done_status = ret;
+				break;
 			} else {
 				as->current_remaining_bytes -= len;
 				if (as->current_remaining_bytes < 0)
@@ -1381,90 +1394,18 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 		} else if (atmel_spi_use_dma(as, xfer)) {
 			atmel_spi_stop_dma(master);
 		}
-
-		if (!msg->is_dma_mapped
-			&& as->use_pdc)
-			atmel_spi_dma_unmap_xfer(master, xfer);
-
-		return 0;
-
-	} else {
-		/* only update length if no error */
-		msg->actual_length += xfer->len;
 	}
 
-	if (!msg->is_dma_mapped
+	if (!master->cur_msg_mapped
 		&& as->use_pdc)
 		atmel_spi_dma_unmap_xfer(master, xfer);
 
-	spi_transfer_delay_exec(xfer);
-
-	if (xfer->cs_change) {
-		if (list_is_last(&xfer->transfer_list,
-				 &msg->transfers)) {
-			as->keep_cs = true;
-		} else {
-			cs_deactivate(as, msg->spi);
-			udelay(10);
-			cs_activate(as, msg->spi);
-		}
-	}
-
-	return 0;
-}
-
-static int atmel_spi_transfer_one_message(struct spi_master *master,
-						struct spi_message *msg)
-{
-	struct atmel_spi *as;
-	struct spi_transfer *xfer;
-	struct spi_device *spi = msg->spi;
-	int ret = 0;
-
-	as = spi_master_get_devdata(master);
-
-	dev_dbg(&spi->dev, "new message %p submitted for %s\n",
-					msg, dev_name(&spi->dev));
-
-	atmel_spi_lock(as);
-	cs_activate(as, spi);
-
-	as->keep_cs = false;
-
-	msg->status = 0;
-	msg->actual_length = 0;
-
-	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
-		trace_spi_transfer_start(msg, xfer);
-
-		ret = atmel_spi_one_transfer(master, msg, xfer);
-		if (ret)
-			goto msg_done;
-
-		trace_spi_transfer_stop(msg, xfer);
-	}
-
 	if (as->use_pdc)
 		atmel_spi_disable_pdc_transfer(as);
 
-	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
-		dev_dbg(&spi->dev,
-			"  xfer %p: len %u tx %p/%pad rx %p/%pad\n",
-			xfer, xfer->len,
-			xfer->tx_buf, &xfer->tx_dma,
-			xfer->rx_buf, &xfer->rx_dma);
-	}
-
-msg_done:
-	if (!as->keep_cs)
-		cs_deactivate(as, msg->spi);
-
 	atmel_spi_unlock(as);
 
-	msg->status = as->done_status;
-	spi_finalize_current_message(spi->master);
-
-	return ret;
+	return as->done_status;
 }
 
 static void atmel_spi_cleanup(struct spi_device *spi)
@@ -1554,7 +1495,8 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	master->num_chipselect = 4;
 	master->setup = atmel_spi_setup;
 	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
-	master->transfer_one_message = atmel_spi_transfer_one_message;
+	master->transfer_one = atmel_spi_one_transfer;
+	master->set_cs = atmel_spi_set_cs;
 	master->cleanup = atmel_spi_cleanup;
 	master->auto_runtime_pm = true;
 	master->max_dma_len = SPI_MAX_DMA_XFER;
-- 
GitLab


From 4abd641501663493764949f045ebf9f8c0da0307 Mon Sep 17 00:00:00 2001
From: Dan Sneddon <dan.sneddon@microchip.com>
Date: Wed, 2 Jun 2021 09:08:15 -0700
Subject: [PATCH 2171/3804] spi: atmel: Reduce spin lock usage

The current implementation of the driver holds a spin lock for the
duration of the transfer, releasing it only to enable interrupts for
short periods of time.  As this would prevent any interrupt from
happening, this could cause system performance issues every time a SPI
message is sent.  Since the spi core now handles message syncronization
we can reduce the amount of time the spin-lock is held to the regions
where both the calling thread and the interrupt might interract.

Signed-off-by: Dan Sneddon <dan.sneddon@microchip.com>
Link: https://lore.kernel.org/r/20210602160816.4890-2-dan.sneddon@microchip.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-atmel.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 0e693e72c9229..2ef74885ffa2f 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -700,7 +700,6 @@ static void atmel_spi_next_xfer_pio(struct spi_master *master,
 static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
 				struct spi_transfer *xfer,
 				u32 *plen)
-	__must_hold(&as->lock)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(master);
 	struct dma_chan		*rxchan = master->dma_rx;
@@ -716,8 +715,6 @@ static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
 	if (!rxchan || !txchan)
 		return -ENODEV;
 
-	/* release lock for DMA operations */
-	atmel_spi_unlock(as);
 
 	*plen = xfer->len;
 
@@ -786,15 +783,12 @@ static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
 	rxchan->device->device_issue_pending(rxchan);
 	txchan->device->device_issue_pending(txchan);
 
-	/* take back lock */
-	atmel_spi_lock(as);
 	return 0;
 
 err_dma:
 	spi_writel(as, IDR, SPI_BIT(OVRES));
 	atmel_spi_stop_dma(master);
 err_exit:
-	atmel_spi_lock(as);
 	return -ENOMEM;
 }
 
@@ -1053,8 +1047,6 @@ atmel_spi_pump_pio_data(struct atmel_spi *as, struct spi_transfer *xfer)
 
 /* Interrupt
  *
- * No need for locking in this Interrupt handler: done_status is the
- * only information modified.
  */
 static irqreturn_t
 atmel_spi_pio_interrupt(int irq, void *dev_id)
@@ -1302,8 +1294,6 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 	unsigned long		dma_timeout;
 
 	as = spi_master_get_devdata(master);
-	/* This lock was orignally taken in atmel_spi_trasfer_one_message */
-	atmel_spi_lock(as);
 
 	asd = spi->controller_state;
 	bits = (asd->csr >> 4) & 0xf;
@@ -1332,7 +1322,9 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 		reinit_completion(&as->xfer_completion);
 
 		if (as->use_pdc) {
+			atmel_spi_lock(as);
 			atmel_spi_pdc_next_xfer(master, xfer);
+			atmel_spi_unlock(as);
 		} else if (atmel_spi_use_dma(as, xfer)) {
 			len = as->current_remaining_bytes;
 			ret = atmel_spi_next_xfer_dma_submit(master,
@@ -1348,14 +1340,13 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 					as->current_remaining_bytes = 0;
 			}
 		} else {
+			atmel_spi_lock(as);
 			atmel_spi_next_xfer_pio(master, xfer);
+			atmel_spi_unlock(as);
 		}
 
-		/* interrupts are disabled, so free the lock for schedule */
-		atmel_spi_unlock(as);
 		dma_timeout = wait_for_completion_timeout(&as->xfer_completion,
 							  SPI_DMA_TIMEOUT);
-		atmel_spi_lock(as);
 		if (WARN_ON(dma_timeout == 0)) {
 			dev_err(&spi->dev, "spi transfer timeout\n");
 			as->done_status = -EIO;
@@ -1403,8 +1394,6 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 	if (as->use_pdc)
 		atmel_spi_disable_pdc_transfer(as);
 
-	atmel_spi_unlock(as);
-
 	return as->done_status;
 }
 
-- 
GitLab


From 1623d767c7ec563d6e52ab76426377bfdde68f97 Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Thu, 3 Jun 2021 13:57:23 +0800
Subject: [PATCH 2172/3804] regulator: rt6245: Add the binding document for
 Richtek RT6245

Add the binding document for Richtek RT6245.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/1622699844-19203-1-git-send-email-u0084500@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../regulator/richtek,rt6245-regulator.yaml   | 89 +++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/richtek,rt6245-regulator.yaml

diff --git a/Documentation/devicetree/bindings/regulator/richtek,rt6245-regulator.yaml b/Documentation/devicetree/bindings/regulator/richtek,rt6245-regulator.yaml
new file mode 100644
index 0000000000000..796ceac87445c
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/richtek,rt6245-regulator.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/richtek,rt6245-regulator.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Richtek RT6245 High Current Voltage Regulator
+
+maintainers:
+  - ChiYuan Huang <cy_huang@richtek.com>
+
+description: |
+  The RT6245 is a high-performance, synchronous step-down converter
+  that can deliver up to 14A output current with an input supply
+  voltage range of 4.5V to 17V.
+
+allOf:
+  - $ref: regulator.yaml#
+
+properties:
+  compatible:
+    enum:
+      - richtek,rt6245
+
+  reg:
+    maxItems: 1
+
+  enable-gpios:
+    description: |
+      A connection of the chip 'enable' gpio line. If not provided,
+      it will be treat as a default-on power.
+    maxItems: 1
+
+  richtek,oc-level-select:
+    $ref: "/schemas/types.yaml#/definitions/uint8"
+    enum: [0, 1, 2, 3]
+    description: |
+      Over current level selection. Each respective value means the current
+      limit 8A, 14A, 12A, 10A. If this property is missing then keep in
+      in chip default.
+
+  richtek,ot-level-select:
+    $ref: "/schemas/types.yaml#/definitions/uint8"
+    enum: [0, 1, 2]
+    description: |
+      Over temperature level selection. Each respective value means the degree
+      150'c, 130'c, 170'c. If this property is missing then keep in chip
+      default.
+
+  richtek,pgdly-time-select:
+    $ref: "/schemas/types.yaml#/definitions/uint8"
+    enum: [0, 1, 2, 3]
+    description: |
+      Power good signal delay time selection. Each respective value means the
+      delay time 0us, 10us, 20us, 40us. If this property is missing then keep
+      in chip default.
+
+
+  richtek,switch-freq-select:
+    $ref: "/schemas/types.yaml#/definitions/uint8"
+    enum: [0, 1, 2]
+    description: |
+      Buck switch frequency selection. Each respective value means 400KHz,
+      800KHz, 1200KHz. If this property is missing then keep in chip default.
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      rt6245@34 {
+        compatible = "richtek,rt6245";
+        status = "okay";
+        reg = <0x34>;
+        enable-gpios = <&gpio26 2 0>;
+
+        regulator-name = "rt6245-regulator";
+        regulator-min-microvolt = <437500>;
+        regulator-max-microvolt = <1387500>;
+        regulator-boot-on;
+      };
+    };
-- 
GitLab


From 503d1acb01826b42e5afb496dfcc32751bec9478 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 3 Jun 2021 15:36:52 +0200
Subject: [PATCH 2173/3804] MAINTAINERS: add btrfs IRC link

We haven't had an IRC link before but now it's a good time to announce
where to reach the community.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9450e052f1b13..d32d454cd0257 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3803,6 +3803,7 @@ L:	linux-btrfs@vger.kernel.org
 S:	Maintained
 W:	http://btrfs.wiki.kernel.org/
 Q:	http://patchwork.kernel.org/project/linux-btrfs/list/
+C:	irc://irc.libera.chat/btrfs
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git
 F:	Documentation/filesystems/btrfs.rst
 F:	fs/btrfs/
-- 
GitLab


From 68d7a190682aa4eb02db477328088ebad15acc83 Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Wed, 2 Jun 2021 16:58:08 +0200
Subject: [PATCH 2174/3804] sched/fair: Fix util_est UTIL_AVG_UNCHANGED
 handling

The util_est internal UTIL_AVG_UNCHANGED flag which is used to prevent
unnecessary util_est updates uses the LSB of util_est.enqueued. It is
exposed via _task_util_est() (and task_util_est()).

Commit 92a801e5d5b7 ("sched/fair: Mask UTIL_AVG_UNCHANGED usages")
mentions that the LSB is lost for util_est resolution but
find_energy_efficient_cpu() checks if task_util_est() returns 0 to
return prev_cpu early.

_task_util_est() returns the max value of util_est.ewma and
util_est.enqueued or'ed w/ UTIL_AVG_UNCHANGED.
So task_util_est() returning the max of task_util() and
_task_util_est() will never return 0 under the default
SCHED_FEAT(UTIL_EST, true).

To fix this use the MSB of util_est.enqueued instead and keep the flag
util_est internal, i.e. don't export it via _task_util_est().

The maximal possible util_avg value for a task is 1024 so the MSB of
'unsigned int util_est.enqueued' isn't used to store a util value.

As a caveat the code behind the util_est_se trace point has to filter
UTIL_AVG_UNCHANGED to see the real util_est.enqueued value which should
be easy to do.

This also fixes an issue report by Xuewen Yan that util_est_update()
only used UTIL_AVG_UNCHANGED for the subtrahend of the equation:

  last_enqueued_diff = ue.enqueued - (task_util() | UTIL_AVG_UNCHANGED)

Fixes: b89997aa88f0b sched/pelt: Fix task util_est update filtering
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Xuewen Yan <xuewen.yan@unisoc.com>
Reviewed-by: Vincent Donnefort <vincent.donnefort@arm.com>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/20210602145808.1562603-1-dietmar.eggemann@arm.com
---
 include/linux/sched.h |  8 ++++++++
 kernel/sched/debug.c  |  3 ++-
 kernel/sched/fair.c   |  5 +++--
 kernel/sched/pelt.h   | 11 +----------
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2c881384517b..28a98fc4ded4f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -350,11 +350,19 @@ struct load_weight {
  * Only for tasks we track a moving average of the past instantaneous
  * estimated utilization. This allows to absorb sporadic drops in utilization
  * of an otherwise almost periodic task.
+ *
+ * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
+ * updates. When a task is dequeued, its util_est should not be updated if its
+ * util_avg has not been updated in the meantime.
+ * This information is mapped into the MSB bit of util_est.enqueued at dequeue
+ * time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg
+ * for a task) it is safe to use MSB.
  */
 struct util_est {
 	unsigned int			enqueued;
 	unsigned int			ewma;
 #define UTIL_EST_WEIGHT_SHIFT		2
+#define UTIL_AVG_UNCHANGED		0x80000000
 } __attribute__((__aligned__(sizeof(u64))));
 
 /*
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 9c882f20803e0..c5aacbd492a19 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -885,6 +885,7 @@ static const struct seq_operations sched_debug_sops = {
 #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
 #define __P(F) __PS(#F, F)
 #define   P(F) __PS(#F, p->F)
+#define   PM(F, M) __PS(#F, p->F & (M))
 #define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
 #define __PN(F) __PSN(#F, F)
 #define   PN(F) __PSN(#F, p->F)
@@ -1011,7 +1012,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 	P(se.avg.util_avg);
 	P(se.avg.last_update_time);
 	P(se.avg.util_est.ewma);
-	P(se.avg.util_est.enqueued);
+	PM(se.avg.util_est.enqueued, ~UTIL_AVG_UNCHANGED);
 #endif
 #ifdef CONFIG_UCLAMP_TASK
 	__PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7b98fb37330a6..2c8a9352590d9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3907,7 +3907,7 @@ static inline unsigned long _task_util_est(struct task_struct *p)
 {
 	struct util_est ue = READ_ONCE(p->se.avg.util_est);
 
-	return (max(ue.ewma, ue.enqueued) | UTIL_AVG_UNCHANGED);
+	return max(ue.ewma, (ue.enqueued & ~UTIL_AVG_UNCHANGED));
 }
 
 static inline unsigned long task_util_est(struct task_struct *p)
@@ -4007,7 +4007,7 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
 	 * Reset EWMA on utilization increases, the moving average is used only
 	 * to smooth utilization decreases.
 	 */
-	ue.enqueued = (task_util(p) | UTIL_AVG_UNCHANGED);
+	ue.enqueued = task_util(p);
 	if (sched_feat(UTIL_EST_FASTUP)) {
 		if (ue.ewma < ue.enqueued) {
 			ue.ewma = ue.enqueued;
@@ -4056,6 +4056,7 @@ static inline void util_est_update(struct cfs_rq *cfs_rq,
 	ue.ewma  += last_ewma_diff;
 	ue.ewma >>= UTIL_EST_WEIGHT_SHIFT;
 done:
+	ue.enqueued |= UTIL_AVG_UNCHANGED;
 	WRITE_ONCE(p->se.avg.util_est, ue);
 
 	trace_sched_util_est_se_tp(&p->se);
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index 1462846d244e3..cfe94ffd2b382 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -42,15 +42,6 @@ static inline u32 get_pelt_divider(struct sched_avg *avg)
 	return LOAD_AVG_MAX - 1024 + avg->period_contrib;
 }
 
-/*
- * When a task is dequeued, its estimated utilization should not be update if
- * its util_avg has not been updated at least once.
- * This flag is used to synchronize util_avg updates with util_est updates.
- * We map this information into the LSB bit of the utilization saved at
- * dequeue time (i.e. util_est.dequeued).
- */
-#define UTIL_AVG_UNCHANGED 0x1
-
 static inline void cfs_se_util_change(struct sched_avg *avg)
 {
 	unsigned int enqueued;
@@ -58,7 +49,7 @@ static inline void cfs_se_util_change(struct sched_avg *avg)
 	if (!sched_feat(UTIL_EST))
 		return;
 
-	/* Avoid store if the flag has been already set */
+	/* Avoid store if the flag has been already reset */
 	enqueued = avg->util_est.enqueued;
 	if (!(enqueued & UTIL_AVG_UNCHANGED))
 		return;
-- 
GitLab


From 2e38eb04c95e5546b71bb86ee699a891c7d212b5 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 1 Jun 2021 17:31:50 +0530
Subject: [PATCH 2175/3804] kprobes: Do not increment probe miss count in the
 fault handler

Kprobes has a counter 'nmissed', that is used to count the number of
times a probe handler was not called. This generally happens when we hit
a kprobe while handling another kprobe.

However, if one of the probe handlers causes a fault, we are currently
incrementing 'nmissed'. The comment in fault handler indicates that this
can be used to account faults taken by the probe handlers. But, this has
never been the intention as is evident from the comment above 'nmissed'
in 'struct kprobe':

	/*count the number of times this probe was temporarily disarmed */
	unsigned long nmissed;

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lkml.kernel.org/r/20210601120150.672652-1-naveen.n.rao@linux.vnet.ibm.com
---
 arch/arc/kernel/kprobes.c          |  6 ------
 arch/arm/probes/kprobes/core.c     | 14 --------------
 arch/arm64/kernel/probes/kprobes.c |  7 -------
 arch/csky/kernel/probes/kprobes.c  |  7 -------
 arch/ia64/kernel/kprobes.c         |  7 -------
 arch/powerpc/kernel/kprobes.c      |  7 -------
 arch/riscv/kernel/probes/kprobes.c |  7 -------
 arch/s390/kernel/kprobes.c         |  7 -------
 arch/sh/kernel/kprobes.c           |  7 -------
 arch/sparc/kernel/kprobes.c        |  7 -------
 arch/x86/kernel/kprobes/core.c     |  8 --------
 11 files changed, 84 deletions(-)

diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index 9f5b39f387362..5f0415fc73287 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -317,12 +317,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long trapnr)
 		 * caused the fault.
 		 */
 
-		/* We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
 		/*
 		 * In case the user-specified fault handler returned zero,
 		 * try to fix up.
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index 7b9b9a5a409bb..27e0af78e88b0 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -348,20 +348,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
 			reset_current_kprobe();
 		}
 		break;
-
-	case KPROBE_HIT_ACTIVE:
-	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
-		break;
-
-	default:
-		break;
 	}
 
 	return 0;
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index f6b088e9fa70e..004b86eff9c2d 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -276,13 +276,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
 		break;
 	case KPROBE_HIT_ACTIVE:
 	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index e0e973e497703..68b22b499aebf 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -294,13 +294,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
 		break;
 	case KPROBE_HIT_ACTIVE:
 	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 6efed4ecff9e9..441ed04b10378 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -843,13 +843,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		break;
 	case KPROBE_HIT_ACTIVE:
 	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 75b4e874269d4..3f700830169fa 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -501,13 +501,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		break;
 	case KPROBE_HIT_ACTIVE:
 	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 923b5ea396eab..9b71a6363bdaa 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -276,13 +276,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
 		break;
 	case KPROBE_HIT_ACTIVE:
 	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index ad631e33df24f..74b0bd2c24d4c 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -445,13 +445,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
 		break;
 	case KPROBE_HIT_ACTIVE:
 	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(p);
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 58263420ad2a5..1c7f358ef0be1 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -382,13 +382,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		break;
 	case KPROBE_HIT_ACTIVE:
 	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index db4e341b4b6ea..4c05a4ee6a0e7 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -345,13 +345,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 		break;
 	case KPROBE_HIT_ACTIVE:
 	case KPROBE_HIT_SSDONE:
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
-
 		/*
 		 * In case the user-specified fault handler returned
 		 * zero, try to fix up.
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index cfcdf4b8a306f..1b3fe0edd3299 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1102,14 +1102,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 			restore_previous_kprobe(kcb);
 		else
 			reset_current_kprobe();
-	} else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE ||
-		   kcb->kprobe_status == KPROBE_HIT_SSDONE) {
-		/*
-		 * We increment the nmissed count for accounting,
-		 * we can also use npre/npostfault count for accounting
-		 * these specific fault cases.
-		 */
-		kprobes_inc_nmissed_count(cur);
 	}
 
 	return 0;
-- 
GitLab


From 88016de3ab075790e1f1bf047576e9b557c22d19 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Thu, 3 Jun 2021 15:17:05 +0200
Subject: [PATCH 2176/3804] ima: Define new template evm-sig

With the recent introduction of the evmsig template field, remote verifiers
can obtain the EVM portable signature instead of the IMA signature, to
verify file metadata.

After introducing the new fields to include file metadata in the
measurement list, this patch finally defines the evm-sig template, whose
format is:

d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode

xattrnames, xattrlengths and xattrvalues are populated only from defined
EVM protected xattrs, i.e. the ones that EVM considers to verify the
portable signature. xattrnames and xattrlengths are populated only if the
xattr is present.

xattrnames and xattrlengths are not necessary for verifying the EVM
portable signature, but they are included for completeness of information,
if a remote verifier wants to infer more from file metadata.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/security/IMA-templates.rst | 1 +
 security/integrity/ima/ima_template.c    | 5 ++++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index 6a58760a0a354..5adc22f994960 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -91,6 +91,7 @@ Below, there is the list of defined template descriptors:
  - "ima-sig": its format is ``d-ng|n-ng|sig``;
  - "ima-buf": its format is ``d-ng|n-ng|buf``;
  - "ima-modsig": its format is ``d-ng|n-ng|sig|d-modsig|modsig``;
+ - "evm-sig": its format is ``d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode``;
 
 
 Use
diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index 159a31d2fcdff..a85963853a91a 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -22,6 +22,8 @@ static struct ima_template_desc builtin_templates[] = {
 	{.name = "ima-sig", .fmt = "d-ng|n-ng|sig"},
 	{.name = "ima-buf", .fmt = "d-ng|n-ng|buf"},
 	{.name = "ima-modsig", .fmt = "d-ng|n-ng|sig|d-modsig|modsig"},
+	{.name = "evm-sig",
+	 .fmt = "d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode"},
 	{.name = "", .fmt = ""},	/* placeholder for a custom format */
 };
 
@@ -69,7 +71,8 @@ static const struct ima_template_field supported_fields[] = {
  * need to be accounted for since they shouldn't be defined in the same template
  * description as 'd-ng' and 'n-ng' respectively.
  */
-#define MAX_TEMPLATE_NAME_LEN sizeof("d-ng|n-ng|sig|buf|d-modisg|modsig")
+#define MAX_TEMPLATE_NAME_LEN \
+	sizeof("d-ng|n-ng|evmsig|xattrnames|xattrlengths|xattrvalues|iuid|igid|imode")
 
 static struct ima_template_desc *ima_template;
 static struct ima_template_desc *ima_buf_template;
-- 
GitLab


From d721c15fd519c08819fbc6de39b713e2ed1d9894 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Fri, 28 May 2021 09:38:12 +0200
Subject: [PATCH 2177/3804] evm: Don't return an error in evm_write_xattrs() if
 audit is not enabled

This patch avoids that evm_write_xattrs() returns an error when audit is
not enabled. The ab variable can be NULL and still be passed to the other
audit_log_() functions, as those functions do not include any instruction.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/evm/evm_secfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/integrity/evm/evm_secfs.c b/security/integrity/evm/evm_secfs.c
index a99676eb7f414..8a9db7dfca7ef 100644
--- a/security/integrity/evm/evm_secfs.c
+++ b/security/integrity/evm/evm_secfs.c
@@ -197,7 +197,7 @@ static ssize_t evm_write_xattrs(struct file *file, const char __user *buf,
 
 	ab = audit_log_start(audit_context(), GFP_KERNEL,
 			     AUDIT_INTEGRITY_EVM_XATTR);
-	if (!ab)
+	if (!ab && IS_ENABLED(CONFIG_AUDIT))
 		return -ENOMEM;
 
 	xattr = kmalloc(sizeof(struct xattr_list), GFP_KERNEL);
-- 
GitLab


From 0b4f132b15f988831dfca8f96af272e437eacf05 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Wed, 2 Jun 2021 13:13:11 -0400
Subject: [PATCH 2178/3804] NFS: Ensure the NFS_CAP_SECURITY_LABEL capability
 is set when appropriate

Commit ce62b114bbad ("NFS: Split attribute support out from the server
capabilities") removed the logic from _nfs4_server_capabilities() that
sets the NFS_CAP_SECURITY_LABEL capability based on the presence of
FATTR4_WORD2_SECURITY_LABEL in the attr_bitmask of the server's response.
Now NFS_CAP_SECURITY_LABEL is never set, which breaks labelled NFS.

This was replaced with logic that clears the NFS_ATTR_FATTR_V4_SECURITY_LABEL
bit in the newly added fattr_valid field based on the absence of
FATTR4_WORD2_SECURITY_LABEL in the attr_bitmask of the server's response.
This essentially has no effect since there's nothing looks for that bit
in fattr_supported.

So revert that part of the commit, but adding the logic that sets
NFS_CAP_SECURITY_LABEL near where the other capabilities are set in
_nfs4_server_capabilities().

Fixes: ce62b114bbad ("NFS: Split attribute support out from the server capabilities")
Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d671b2884d5ac..7e63fbe253f07 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3878,6 +3878,10 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 			server->caps |= NFS_CAP_HARDLINKS;
 		if (res.has_symlinks != 0)
 			server->caps |= NFS_CAP_SYMLINKS;
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+		if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
+			server->caps |= NFS_CAP_SECURITY_LABEL;
+#endif
 		if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
 			server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
 		if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
@@ -3898,10 +3902,6 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 			server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME;
 		if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
 			server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
-#ifdef CONFIG_NFS_V4_SECURITY_LABEL
-		if (!(res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL))
-			server->fattr_valid &= ~NFS_ATTR_FATTR_V4_SECURITY_LABEL;
-#endif
 		memcpy(server->attr_bitmask_nl, res.attr_bitmask,
 				sizeof(server->attr_bitmask));
 		server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
-- 
GitLab


From 476bdb04c501fc64bf3b8464ffddefc8dbe01577 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Wed, 2 Jun 2021 14:31:20 -0400
Subject: [PATCH 2179/3804] NFS: Fix use-after-free in nfs4_init_client()

KASAN reports a use-after-free when attempting to mount two different
exports through two different NICs that belong to the same server.

Olga was able to hit this with kernels starting somewhere between 5.7
and 5.10, but I traced the patch that introduced the clear_bit() call to
4.13. So something must have changed in the refcounting of the clp
pointer to make this call to nfs_put_client() the very last one.

Fixes: 8dcbec6d20 ("NFSv41: Handle EXCHID4_FLAG_CONFIRMED_R during NFSv4.1 migration")
Cc: stable@vger.kernel.org # 4.13+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 889a9f4c0310d..42719384e25fe 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -435,8 +435,8 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 		 */
 		nfs_mark_client_ready(clp, -EPERM);
 	}
-	nfs_put_client(clp);
 	clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
+	nfs_put_client(clp);
 	return old;
 
 error:
-- 
GitLab


From 09226e8303beeec10f2ff844d2e46d1371dc58e0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 3 Jun 2021 15:37:53 +0300
Subject: [PATCH 2180/3804] NFS: Fix a potential NULL dereference in
 nfs_get_client()

None of the callers are expecting NULL returns from nfs_get_client() so
this code will lead to an Oops.  It's better to return an error
pointer.  I expect that this is dead code so hopefully no one is
affected.

Fixes: 31434f496abb ("nfs: check hostname in nfs_get_client")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index cfeaadf56bf06..330f65727c454 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -406,7 +406,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
 
 	if (cl_init->hostname == NULL) {
 		WARN_ON(1);
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	}
 
 	/* see if the client already exists */
-- 
GitLab


From d1b5c230e9cb6dddeab23f0f0c808e2b1c28d1b6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 3 Jun 2021 10:12:43 -0400
Subject: [PATCH 2181/3804] NFS: FMODE_READ and friends are C macros, not enum
 types

Address a sparse warning:

  CHECK   fs/nfs/nfstrace.c
fs/nfs/nfstrace.c: note: in included file (through /home/cel/src/linux/rpc-over-tls/include/trace/trace_events.h, /home/cel/src/linux/rpc-over-tls/include/trace/define_trace.h, ...):
fs/nfs/./nfstrace.h:424:1: warning: incorrect type in initializer (different base types)
fs/nfs/./nfstrace.h:424:1:    expected unsigned long eval_value
fs/nfs/./nfstrace.h:424:1:    got restricted fmode_t [usertype]
fs/nfs/./nfstrace.h:425:1: warning: incorrect type in initializer (different base types)
fs/nfs/./nfstrace.h:425:1:    expected unsigned long eval_value
fs/nfs/./nfstrace.h:425:1:    got restricted fmode_t [usertype]
fs/nfs/./nfstrace.h:426:1: warning: incorrect type in initializer (different base types)
fs/nfs/./nfstrace.h:426:1:    expected unsigned long eval_value
fs/nfs/./nfstrace.h:426:1:    got restricted fmode_t [usertype]

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfstrace.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index eb1ef3462e842..ccef43e02b481 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -430,10 +430,6 @@ TRACE_DEFINE_ENUM(O_CLOEXEC);
 		{ O_NOATIME, "O_NOATIME" }, \
 		{ O_CLOEXEC, "O_CLOEXEC" })
 
-TRACE_DEFINE_ENUM(FMODE_READ);
-TRACE_DEFINE_ENUM(FMODE_WRITE);
-TRACE_DEFINE_ENUM(FMODE_EXEC);
-
 #define show_fmode_flags(mode) \
 	__print_flags(mode, "|", \
 		{ ((__force unsigned long)FMODE_READ), "READ" }, \
-- 
GitLab


From dfe1fe75e00e4c724ede7b9e593f6f680e446c5f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 1 Jun 2021 11:10:05 -0400
Subject: [PATCH 2182/3804] NFSv4: Fix deadlock between nfs4_evict_inode() and
 nfs4_opendata_get_inode()

If the inode is being evicted, but has to return a delegation first,
then it can cause a deadlock in the corner case where the server reboots
before the delegreturn completes, but while the call to iget5_locked() in
nfs4_opendata_get_inode() is waiting for the inode free to complete.
Since the open call still holds a session slot, the reboot recovery
cannot proceed.

In order to break the logjam, we can turn the delegation return into a
privileged operation for the case where we're evicting the inode. We
know that in that case, there can be no other state recovery operation
that conflicts.

Reported-by: zhangxiaoxu (A) <zhangxiaoxu5@huawei.com>
Fixes: 5fcdfacc01f3 ("NFSv4: Return delegations synchronously in evict_inode")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4_fs.h  |  1 +
 fs/nfs/nfs4proc.c | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 065cb04222a1b..543d916f79abb 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -205,6 +205,7 @@ struct nfs4_exception {
 	struct inode *inode;
 	nfs4_stateid *stateid;
 	long timeout;
+	unsigned char task_is_privileged : 1;
 	unsigned char delay : 1,
 		      recovering : 1,
 		      retry : 1;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7e63fbe253f07..cf85f0ed1e065 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -589,6 +589,8 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
 		goto out_retry;
 	}
 	if (exception->recovering) {
+		if (exception->task_is_privileged)
+			return -EDEADLOCK;
 		ret = nfs4_wait_clnt_recover(clp);
 		if (test_bit(NFS_MIG_FAILED, &server->mig_status))
 			return -EIO;
@@ -614,6 +616,8 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
 		goto out_retry;
 	}
 	if (exception->recovering) {
+		if (exception->task_is_privileged)
+			return -EDEADLOCK;
 		rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
 		if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
 			rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
@@ -6417,6 +6421,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 	struct nfs4_exception exception = {
 		.inode = data->inode,
 		.stateid = &data->stateid,
+		.task_is_privileged = data->args.seq_args.sa_privileged,
 	};
 
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
@@ -6540,7 +6545,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
 	data = kzalloc(sizeof(*data), GFP_NOFS);
 	if (data == NULL)
 		return -ENOMEM;
-	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0);
 
 	nfs4_state_protect(server->nfs_client,
 			NFS_SP4_MACH_CRED_CLEANUP,
@@ -6571,6 +6575,12 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
 		}
 	}
 
+	if (!data->inode)
+		nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
+				   1);
+	else
+		nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1,
+				   0);
 	task_setup_data.callback_data = data;
 	msg.rpc_argp = &data->args;
 	msg.rpc_resp = &data->res;
-- 
GitLab


From c3aba897c6e67fa464ec02b1f17911577d619713 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 1 Jun 2021 11:35:56 -0400
Subject: [PATCH 2183/3804] NFSv4: Fix second deadlock in nfs4_evict_inode()

If the inode is being evicted but has to return a layout first, then
that too can cause a deadlock in the corner case where the server
reboots.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf85f0ed1e065..e653654c10bcd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9658,15 +9658,20 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync)
 			&task_setup_data.rpc_client, &msg);
 
 	dprintk("--> %s\n", __func__);
+	lrp->inode = nfs_igrab_and_active(lrp->args.inode);
 	if (!sync) {
-		lrp->inode = nfs_igrab_and_active(lrp->args.inode);
 		if (!lrp->inode) {
 			nfs4_layoutreturn_release(lrp);
 			return -EAGAIN;
 		}
 		task_setup_data.flags |= RPC_TASK_ASYNC;
 	}
-	nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1, 0);
+	if (!lrp->inode)
+		nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1,
+				   1);
+	else
+		nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1,
+				   0);
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-- 
GitLab


From 74b2fc882d380d8fafc2a26f01d401c2a7beeadb Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 2 Jun 2021 12:07:52 +0200
Subject: [PATCH 2184/3804] dmaengine: idxd: Use cpu_feature_enabled()

When testing x86 feature bits, use cpu_feature_enabled() so that
build-disabled features can remain off, regardless of what CPUID says.

Fixes: 8e50d392652f ("dmaengine: idxd: Add shared workqueue support")
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-By: Vinod Koul <vkoul@kernel.org>
Cc: <stable@vger.kernel.org>
---
 drivers/dma/idxd/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 2a926bef87f2a..776fd44aff5ff 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -745,12 +745,12 @@ static int __init idxd_init_module(void)
 	 * If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in
 	 * enumerating the device. We can not utilize it.
 	 */
-	if (!boot_cpu_has(X86_FEATURE_MOVDIR64B)) {
+	if (!cpu_feature_enabled(X86_FEATURE_MOVDIR64B)) {
 		pr_warn("idxd driver failed to load without MOVDIR64B.\n");
 		return -ENODEV;
 	}
 
-	if (!boot_cpu_has(X86_FEATURE_ENQCMD))
+	if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
 		pr_warn("Platform does not have ENQCMD(S) support.\n");
 	else
 		support_enqcmd = true;
-- 
GitLab


From 9bfecd05833918526cc7357d55e393393440c5fa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 29 May 2021 11:17:30 +0200
Subject: [PATCH 2185/3804] x86/cpufeatures: Force disable X86_FEATURE_ENQCMD
 and remove update_pasid()

While digesting the XSAVE-related horrors which got introduced with
the supervisor/user split, the recent addition of ENQCMD-related
functionality got on the radar and turned out to be similarly broken.

update_pasid(), which is only required when X86_FEATURE_ENQCMD is
available, is invoked from two places:

 1) From switch_to() for the incoming task

 2) Via a SMP function call from the IOMMU/SMV code

#1 is half-ways correct as it hacks around the brokenness of get_xsave_addr()
   by enforcing the state to be 'present', but all the conditionals in that
   code are completely pointless for that.

   Also the invocation is just useless overhead because at that point
   it's guaranteed that TIF_NEED_FPU_LOAD is set on the incoming task
   and all of this can be handled at return to user space.

#2 is broken beyond repair. The comment in the code claims that it is safe
   to invoke this in an IPI, but that's just wishful thinking.

   FPU state of a running task is protected by fregs_lock() which is
   nothing else than a local_bh_disable(). As BH-disabled regions run
   usually with interrupts enabled the IPI can hit a code section which
   modifies FPU state and there is absolutely no guarantee that any of the
   assumptions which are made for the IPI case is true.

   Also the IPI is sent to all CPUs in mm_cpumask(mm), but the IPI is
   invoked with a NULL pointer argument, so it can hit a completely
   unrelated task and unconditionally force an update for nothing.
   Worse, it can hit a kernel thread which operates on a user space
   address space and set a random PASID for it.

The offending commit does not cleanly revert, but it's sufficient to
force disable X86_FEATURE_ENQCMD and to remove the broken update_pasid()
code to make this dysfunctional all over the place. Anything more
complex would require more surgery and none of the related functions
outside of the x86 core code are blatantly wrong, so removing those
would be overkill.

As nothing enables the PASID bit in the IA32_XSS MSR yet, which is
required to make this actually work, this cannot result in a regression
except for related out of tree train-wrecks, but they are broken already
today.

Fixes: 20f0afd1fb3d ("x86/mmu: Allocate/free a PASID")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/87mtsd6gr9.ffs@nanos.tec.linutronix.de
---
 arch/x86/include/asm/disabled-features.h |  7 +--
 arch/x86/include/asm/fpu/api.h           |  6 +--
 arch/x86/include/asm/fpu/internal.h      |  7 ---
 arch/x86/kernel/fpu/xstate.c             | 57 ------------------------
 4 files changed, 3 insertions(+), 74 deletions(-)

diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index b7dd944dc8673..8f28fafa98b32 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -56,11 +56,8 @@
 # define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
 #endif
 
-#ifdef CONFIG_IOMMU_SUPPORT
-# define DISABLE_ENQCMD	0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
+/* Force disable because it's broken beyond repair */
+#define DISABLE_ENQCMD		(1 << (X86_FEATURE_ENQCMD & 31))
 
 #ifdef CONFIG_X86_SGX
 # define DISABLE_SGX	0
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index ed33a14188f66..23bef08a83880 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -106,10 +106,6 @@ extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
  */
 #define PASID_DISABLED	0
 
-#ifdef CONFIG_IOMMU_SUPPORT
-/* Update current's PASID MSR/state by mm's PASID. */
-void update_pasid(void);
-#else
 static inline void update_pasid(void) { }
-#endif
+
 #endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 8d33ad80704f2..ceeba9f631722 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -584,13 +584,6 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
 			pkru_val = pk->pkru;
 	}
 	__write_pkru(pkru_val);
-
-	/*
-	 * Expensive PASID MSR write will be avoided in update_pasid() because
-	 * TIF_NEED_FPU_LOAD was set. And the PASID state won't be updated
-	 * unless it's different from mm->pasid to reduce overhead.
-	 */
-	update_pasid();
 }
 
 #endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index a85c640002189..d0eef963aad13 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -1402,60 +1402,3 @@ int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
 	return 0;
 }
 #endif /* CONFIG_PROC_PID_ARCH_STATUS */
-
-#ifdef CONFIG_IOMMU_SUPPORT
-void update_pasid(void)
-{
-	u64 pasid_state;
-	u32 pasid;
-
-	if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
-		return;
-
-	if (!current->mm)
-		return;
-
-	pasid = READ_ONCE(current->mm->pasid);
-	/* Set the valid bit in the PASID MSR/state only for valid pasid. */
-	pasid_state = pasid == PASID_DISABLED ?
-		      pasid : pasid | MSR_IA32_PASID_VALID;
-
-	/*
-	 * No need to hold fregs_lock() since the task's fpstate won't
-	 * be changed by others (e.g. ptrace) while the task is being
-	 * switched to or is in IPI.
-	 */
-	if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
-		/* The MSR is active and can be directly updated. */
-		wrmsrl(MSR_IA32_PASID, pasid_state);
-	} else {
-		struct fpu *fpu = &current->thread.fpu;
-		struct ia32_pasid_state *ppasid_state;
-		struct xregs_state *xsave;
-
-		/*
-		 * The CPU's xstate registers are not currently active. Just
-		 * update the PASID state in the memory buffer here. The
-		 * PASID MSR will be loaded when returning to user mode.
-		 */
-		xsave = &fpu->state.xsave;
-		xsave->header.xfeatures |= XFEATURE_MASK_PASID;
-		ppasid_state = get_xsave_addr(xsave, XFEATURE_PASID);
-		/*
-		 * Since XFEATURE_MASK_PASID is set in xfeatures, ppasid_state
-		 * won't be NULL and no need to check its value.
-		 *
-		 * Only update the task's PASID state when it's different
-		 * from the mm's pasid.
-		 */
-		if (ppasid_state->pasid != pasid_state) {
-			/*
-			 * Invalid fpregs so that state restoring will pick up
-			 * the PASID state.
-			 */
-			__fpu_invalidate_fpregs_state(fpu);
-			ppasid_state->pasid = pasid_state;
-		}
-	}
-}
-#endif /* CONFIG_IOMMU_SUPPORT */
-- 
GitLab


From 2b31e8ed96b260ce2c22bd62ecbb9458399e3b62 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 1 Jun 2021 17:51:22 +0200
Subject: [PATCH 2186/3804] x86/alternative: Optimize single-byte NOPs at an
 arbitrary position

Up until now the assumption was that an alternative patching site would
have some instructions at the beginning and trailing single-byte NOPs
(0x90) padding. Therefore, the patching machinery would go and optimize
those single-byte NOPs into longer ones.

However, this assumption is broken on 32-bit when code like
hv_do_hypercall() in hyperv_init() would use the ratpoline speculation
killer CALL_NOSPEC. The 32-bit version of that macro would align certain
insns to 16 bytes, leading to the compiler issuing a one or more
single-byte NOPs, depending on the holes it needs to fill for alignment.

That would lead to the warning in optimize_nops() to fire:

  ------------[ cut here ]------------
  Not a NOP at 0xc27fb598
   WARNING: CPU: 0 PID: 0 at arch/x86/kernel/alternative.c:211 optimize_nops.isra.13

due to that function verifying whether all of the following bytes really
are single-byte NOPs.

Therefore, carve out the NOP padding into a separate function and call
it for each NOP range beginning with a single-byte NOP.

Fixes: 23c1ad538f4f ("x86/alternatives: Optimize optimize_nops()")
Reported-by: Richard Narron <richard@aaazen.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213301
Link: https://lkml.kernel.org/r/20210601212125.17145-1-bp@alien8.de
---
 arch/x86/kernel/alternative.c | 64 +++++++++++++++++++++++++----------
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 6974b51744955..6fe5b44fcbc9f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -182,42 +182,70 @@ done:
 		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
 }
 
+/*
+ * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
+ *
+ * @instr: instruction byte stream
+ * @instrlen: length of the above
+ * @off: offset within @instr where the first NOP has been detected
+ *
+ * Return: number of NOPs found (and replaced).
+ */
+static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
+{
+	unsigned long flags;
+	int i = off, nnops;
+
+	while (i < instrlen) {
+		if (instr[i] != 0x90)
+			break;
+
+		i++;
+	}
+
+	nnops = i - off;
+
+	if (nnops <= 1)
+		return nnops;
+
+	local_irq_save(flags);
+	add_nops(instr + off, nnops);
+	local_irq_restore(flags);
+
+	DUMP_BYTES(instr, instrlen, "%px: [%d:%d) optimized NOPs: ", instr, off, i);
+
+	return nnops;
+}
+
 /*
  * "noinline" to cause control flow change and thus invalidate I$ and
  * cause refetch after modification.
  */
 static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr)
 {
-	unsigned long flags;
 	struct insn insn;
-	int nop, i = 0;
+	int i = 0;
 
 	/*
-	 * Jump over the non-NOP insns, the remaining bytes must be single-byte
-	 * NOPs, optimize them.
+	 * Jump over the non-NOP insns and optimize single-byte NOPs into bigger
+	 * ones.
 	 */
 	for (;;) {
 		if (insn_decode_kernel(&insn, &instr[i]))
 			return;
 
+		/*
+		 * See if this and any potentially following NOPs can be
+		 * optimized.
+		 */
 		if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
-			break;
-
-		if ((i += insn.length) >= a->instrlen)
-			return;
-	}
+			i += optimize_nops_range(instr, a->instrlen, i);
+		else
+			i += insn.length;
 
-	for (nop = i; i < a->instrlen; i++) {
-		if (WARN_ONCE(instr[i] != 0x90, "Not a NOP at 0x%px\n", &instr[i]))
+		if (i >= a->instrlen)
 			return;
 	}
-
-	local_irq_save(flags);
-	add_nops(instr + nop, i - nop);
-	local_irq_restore(flags);
-
-	DUMP_BYTES(instr, a->instrlen, "%px: [%d:%d) optimized NOPs: ",
-		   instr, nop, a->instrlen);
 }
 
 /*
-- 
GitLab


From 5379260852b013902abbca691926b3ac1cac36d5 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@linutronix.de>
Date: Mon, 3 May 2021 09:28:00 +0200
Subject: [PATCH 2187/3804] igb: Fix XDP with PTP enabled

When using native XDP with the igb driver, the XDP frame data doesn't point to
the beginning of the packet. It's off by 16 bytes. Everything works as expected
with XDP skb mode.

Actually these 16 bytes are used to store the packet timestamps. Therefore, pull
the timestamp before executing any XDP operations and adjust all other code
accordingly. The igc driver does it like that as well.

Tested with Intel i210 card and AF_XDP sockets.

Fixes: 9cbc948b5a20 ("igb: add XDP support")
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Tested-by: Sandeep Penigalapati <sandeep.penigalapati@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h      |  2 +-
 drivers/net/ethernet/intel/igb/igb_main.c | 45 +++++++++++++----------
 drivers/net/ethernet/intel/igb/igb_ptp.c  | 23 +++++-------
 3 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 7bda8c5edea5d..2d3daf022651c 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -749,7 +749,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter);
 void igb_ptp_tx_hang(struct igb_adapter *adapter);
 void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
 int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
-			struct sk_buff *skb);
+			ktime_t *timestamp);
 int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
 int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
 void igb_set_flag_queue_pairs(struct igb_adapter *, const u32);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 038a9fd1af44d..0123285029fa3 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8280,7 +8280,7 @@ static void igb_add_rx_frag(struct igb_ring *rx_ring,
 static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
 					 struct igb_rx_buffer *rx_buffer,
 					 struct xdp_buff *xdp,
-					 union e1000_adv_rx_desc *rx_desc)
+					 ktime_t timestamp)
 {
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
@@ -8300,12 +8300,8 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
 	if (unlikely(!skb))
 		return NULL;
 
-	if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
-		if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) {
-			xdp->data += IGB_TS_HDR_LEN;
-			size -= IGB_TS_HDR_LEN;
-		}
-	}
+	if (timestamp)
+		skb_hwtstamps(skb)->hwtstamp = timestamp;
 
 	/* Determine available headroom for copy */
 	headlen = size;
@@ -8336,7 +8332,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
 static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
 				     struct igb_rx_buffer *rx_buffer,
 				     struct xdp_buff *xdp,
-				     union e1000_adv_rx_desc *rx_desc)
+				     ktime_t timestamp)
 {
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = igb_rx_pg_size(rx_ring) / 2;
@@ -8363,11 +8359,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
 	if (metasize)
 		skb_metadata_set(skb, metasize);
 
-	/* pull timestamp out of packet data */
-	if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
-		if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb))
-			__skb_pull(skb, IGB_TS_HDR_LEN);
-	}
+	if (timestamp)
+		skb_hwtstamps(skb)->hwtstamp = timestamp;
 
 	/* update buffer offset */
 #if (PAGE_SIZE < 8192)
@@ -8682,7 +8675,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 	while (likely(total_packets < budget)) {
 		union e1000_adv_rx_desc *rx_desc;
 		struct igb_rx_buffer *rx_buffer;
+		ktime_t timestamp = 0;
+		int pkt_offset = 0;
 		unsigned int size;
+		void *pktbuf;
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
@@ -8702,14 +8698,24 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 		dma_rmb();
 
 		rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt);
+		pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
+
+		/* pull rx packet timestamp if available and valid */
+		if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+			int ts_hdr_len;
+
+			ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector,
+							 pktbuf, &timestamp);
+
+			pkt_offset += ts_hdr_len;
+			size -= ts_hdr_len;
+		}
 
 		/* retrieve a buffer from the ring */
 		if (!skb) {
-			unsigned int offset = igb_rx_offset(rx_ring);
-			unsigned char *hard_start;
+			unsigned char *hard_start = pktbuf - igb_rx_offset(rx_ring);
+			unsigned int offset = pkt_offset + igb_rx_offset(rx_ring);
 
-			hard_start = page_address(rx_buffer->page) +
-				     rx_buffer->page_offset - offset;
 			xdp_prepare_buff(&xdp, hard_start, offset, size, true);
 #if (PAGE_SIZE > 4096)
 			/* At larger PAGE_SIZE, frame_sz depend on len size */
@@ -8732,10 +8738,11 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 		} else if (skb)
 			igb_add_rx_frag(rx_ring, rx_buffer, skb, size);
 		else if (ring_uses_build_skb(rx_ring))
-			skb = igb_build_skb(rx_ring, rx_buffer, &xdp, rx_desc);
+			skb = igb_build_skb(rx_ring, rx_buffer, &xdp,
+					    timestamp);
 		else
 			skb = igb_construct_skb(rx_ring, rx_buffer,
-						&xdp, rx_desc);
+						&xdp, timestamp);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index ba61fe9bfaf4e..d68cd4466a546 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -856,30 +856,28 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
 	dev_kfree_skb_any(skb);
 }
 
-#define IGB_RET_PTP_DISABLED 1
-#define IGB_RET_PTP_INVALID 2
-
 /**
  * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
  * @q_vector: Pointer to interrupt specific structure
  * @va: Pointer to address containing Rx buffer
- * @skb: Buffer containing timestamp and packet
+ * @timestamp: Pointer where timestamp will be stored
  *
  * This function is meant to retrieve a timestamp from the first buffer of an
  * incoming frame.  The value is stored in little endian format starting on
  * byte 8
  *
- * Returns: 0 if success, nonzero if failure
+ * Returns: The timestamp header length or 0 if not available
  **/
 int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
-			struct sk_buff *skb)
+			ktime_t *timestamp)
 {
 	struct igb_adapter *adapter = q_vector->adapter;
+	struct skb_shared_hwtstamps ts;
 	__le64 *regval = (__le64 *)va;
 	int adjust = 0;
 
 	if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
-		return IGB_RET_PTP_DISABLED;
+		return 0;
 
 	/* The timestamp is recorded in little endian format.
 	 * DWORD: 0        1        2        3
@@ -888,10 +886,9 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
 
 	/* check reserved dwords are zero, be/le doesn't matter for zero */
 	if (regval[0])
-		return IGB_RET_PTP_INVALID;
+		return 0;
 
-	igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
-				   le64_to_cpu(regval[1]));
+	igb_ptp_systim_to_hwtstamp(adapter, &ts, le64_to_cpu(regval[1]));
 
 	/* adjust timestamp for the RX latency based on link speed */
 	if (adapter->hw.mac.type == e1000_i210) {
@@ -907,10 +904,10 @@ int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
 			break;
 		}
 	}
-	skb_hwtstamps(skb)->hwtstamp =
-		ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
 
-	return 0;
+	*timestamp = ktime_sub_ns(ts.hwtstamp, adjust);
+
+	return IGB_TS_HDR_LEN;
 }
 
 /**
-- 
GitLab


From f6c10b48f8c8da44adaff730d8e700b6272add2b Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 10 May 2021 11:38:49 +0200
Subject: [PATCH 2188/3804] i40e: add correct exception tracing for XDP

Add missing exception tracing to XDP when a number of different errors
can occur. The support was only partial. Several errors where not
logged which would confuse the user quite a lot not knowing where and
why the packets disappeared.

Fixes: 74608d17fe29 ("i40e: add support for XDP_TX action")
Fixes: 0a714186d3c0 ("i40e: add AF_XDP zero-copy Rx support")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 7 ++++++-
 drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 8 ++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index de70c16ef619a..b883ab809df30 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2313,15 +2313,20 @@ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 	case XDP_TX:
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
 		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
+		if (result == I40E_XDP_CONSUMED)
+			goto out_failure;
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+		if (err)
+			goto out_failure;
+		result = I40E_XDP_REDIR;
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
 	case XDP_ABORTED:
+out_failure:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		fallthrough; /* handle aborts by dropping packet */
 	case XDP_DROP:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 46d884417c632..68f177a86403f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -162,9 +162,10 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 
 	if (likely(act == XDP_REDIRECT)) {
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
+		if (err)
+			goto out_failure;
 		rcu_read_unlock();
-		return result;
+		return I40E_XDP_REDIR;
 	}
 
 	switch (act) {
@@ -173,11 +174,14 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 	case XDP_TX:
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
 		result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
+		if (result == I40E_XDP_CONSUMED)
+			goto out_failure;
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
 	case XDP_ABORTED:
+out_failure:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		fallthrough; /* handle aborts by dropping packet */
 	case XDP_DROP:
-- 
GitLab


From 89d65df024c59988291f643b4e45d1528c51aef9 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 10 May 2021 11:38:50 +0200
Subject: [PATCH 2189/3804] ice: add correct exception tracing for XDP

Add missing exception tracing to XDP when a number of different
errors can occur. The support was only partial. Several errors
where not logged which would confuse the user quite a lot not
knowing where and why the packets disappeared.

Fixes: efc2214b6047 ("ice: Add support for XDP")
Fixes: 2d4238f55697 ("ice: Add support for AF_XDP")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c | 12 +++++++++---
 drivers/net/ethernet/intel/ice/ice_xsk.c  |  8 ++++++--
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index e2b4b29ea2075..93e5d9ebfd74c 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -523,7 +523,7 @@ ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
 	    struct bpf_prog *xdp_prog)
 {
 	struct ice_ring *xdp_ring;
-	int err;
+	int err, result;
 	u32 act;
 
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
@@ -532,14 +532,20 @@ ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp,
 		return ICE_XDP_PASS;
 	case XDP_TX:
 		xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()];
-		return ice_xmit_xdp_buff(xdp, xdp_ring);
+		result = ice_xmit_xdp_buff(xdp, xdp_ring);
+		if (result == ICE_XDP_CONSUMED)
+			goto out_failure;
+		return result;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		return !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+		if (err)
+			goto out_failure;
+		return ICE_XDP_REDIR;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
 	case XDP_ABORTED:
+out_failure:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		fallthrough;
 	case XDP_DROP:
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index faa7b8d96adb5..7228e4d427bc6 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -473,9 +473,10 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 
 	if (likely(act == XDP_REDIRECT)) {
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+		if (err)
+			goto out_failure;
 		rcu_read_unlock();
-		return result;
+		return ICE_XDP_REDIR;
 	}
 
 	switch (act) {
@@ -484,11 +485,14 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 	case XDP_TX:
 		xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
 		result = ice_xmit_xdp_buff(xdp, xdp_ring);
+		if (result == ICE_XDP_CONSUMED)
+			goto out_failure;
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
 	case XDP_ABORTED:
+out_failure:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		fallthrough;
 	case XDP_DROP:
-- 
GitLab


From 8281356b1cab1cccc71412eb4cf28b99d6bb2c19 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 10 May 2021 11:38:51 +0200
Subject: [PATCH 2190/3804] ixgbe: add correct exception tracing for XDP

Add missing exception tracing to XDP when a number of different
errors can occur. The support was only partial. Several errors
where not logged which would confuse the user quite a lot not
knowing where and why the packets disappeared.

Fixes: 33fdc82f0883 ("ixgbe: add support for XDP_TX action")
Fixes: d0bcacd0a130 ("ixgbe: add AF_XDP zero-copy Rx support")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: Vishakha Jambekar <vishakha.jambekar@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 16 ++++++++--------
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c  | 14 ++++++++------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index c5ec17d19c59d..2ac5b82676f3b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2213,23 +2213,23 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 		break;
 	case XDP_TX:
 		xdpf = xdp_convert_buff_to_frame(xdp);
-		if (unlikely(!xdpf)) {
-			result = IXGBE_XDP_CONSUMED;
-			break;
-		}
+		if (unlikely(!xdpf))
+			goto out_failure;
 		result = ixgbe_xmit_xdp_ring(adapter, xdpf);
+		if (result == IXGBE_XDP_CONSUMED)
+			goto out_failure;
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
-		if (!err)
-			result = IXGBE_XDP_REDIR;
-		else
-			result = IXGBE_XDP_CONSUMED;
+		if (err)
+			goto out_failure;
+		result = IXGBE_XDP_REDIR;
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
 	case XDP_ABORTED:
+out_failure:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		fallthrough; /* handle aborts by dropping packet */
 	case XDP_DROP:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 91ad5b902673c..f72d2978263b9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -106,9 +106,10 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 
 	if (likely(act == XDP_REDIRECT)) {
 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
-		result = !err ? IXGBE_XDP_REDIR : IXGBE_XDP_CONSUMED;
+		if (err)
+			goto out_failure;
 		rcu_read_unlock();
-		return result;
+		return IXGBE_XDP_REDIR;
 	}
 
 	switch (act) {
@@ -116,16 +117,17 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 		break;
 	case XDP_TX:
 		xdpf = xdp_convert_buff_to_frame(xdp);
-		if (unlikely(!xdpf)) {
-			result = IXGBE_XDP_CONSUMED;
-			break;
-		}
+		if (unlikely(!xdpf))
+			goto out_failure;
 		result = ixgbe_xmit_xdp_ring(adapter, xdpf);
+		if (result == IXGBE_XDP_CONSUMED)
+			goto out_failure;
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
 	case XDP_ABORTED:
+out_failure:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		fallthrough; /* handle aborts by dropping packet */
 	case XDP_DROP:
-- 
GitLab


From 74431c40b9c5fa673fff83ec157a76a69efd5c72 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 10 May 2021 11:38:52 +0200
Subject: [PATCH 2191/3804] igb: add correct exception tracing for XDP

Add missing exception tracing to XDP when a number of different
errors can occur. The support was only partial. Several errors
where not logged which would confuse the user quite a lot not
knowing where and why the packets disappeared.

Fixes: 9cbc948b5a20 ("igb: add XDP support")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: Vishakha Jambekar <vishakha.jambekar@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 0123285029fa3..b2a042f825ff5 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -8394,18 +8394,20 @@ static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
 		break;
 	case XDP_TX:
 		result = igb_xdp_xmit_back(adapter, xdp);
+		if (result == IGB_XDP_CONSUMED)
+			goto out_failure;
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
-		if (!err)
-			result = IGB_XDP_REDIR;
-		else
-			result = IGB_XDP_CONSUMED;
+		if (err)
+			goto out_failure;
+		result = IGB_XDP_REDIR;
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
 	case XDP_ABORTED:
+out_failure:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		fallthrough;
 	case XDP_DROP:
-- 
GitLab


From faae81420d162551b6ef2d804aafc00f4cd68e0e Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 10 May 2021 11:38:53 +0200
Subject: [PATCH 2192/3804] ixgbevf: add correct exception tracing for XDP

Add missing exception tracing to XDP when a number of different
errors can occur. The support was only partial. Several errors
where not logged which would confuse the user quite a lot not
knowing where and why the packets disappeared.

Fixes: 21092e9ce8b1 ("ixgbevf: Add support for XDP_TX action")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: Vishakha Jambekar <vishakha.jambekar@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index ba2ed8a43d2de..0e733cc15c588 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1067,11 +1067,14 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
 	case XDP_TX:
 		xdp_ring = adapter->xdp_ring[rx_ring->queue_index];
 		result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp);
+		if (result == IXGBEVF_XDP_CONSUMED)
+			goto out_failure;
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
 	case XDP_ABORTED:
+out_failure:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		fallthrough; /* handle aborts by dropping packet */
 	case XDP_DROP:
-- 
GitLab


From 45ce08594ec3a9f81a6dedeccd1ec785e6907405 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 10 May 2021 11:38:54 +0200
Subject: [PATCH 2193/3804] igc: add correct exception tracing for XDP

Add missing exception tracing to XDP when a number of different
errors can occur. The support was only partial. Several errors
where not logged which would confuse the user quite a lot not
knowing where and why the packets disappeared.

Fixes: 73f1071c1d29 ("igc: Add support for XDP_TX action")
Fixes: 4ff320361092 ("igc: Add support for XDP_REDIRECT action")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Tested-by: Dvora Fuxbrumer <dvorax.fuxbrumer@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 069471b7ffb0a..f1adf154ec4ae 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2047,20 +2047,19 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
 		break;
 	case XDP_TX:
 		if (igc_xdp_xmit_back(adapter, xdp) < 0)
-			res = IGC_XDP_CONSUMED;
-		else
-			res = IGC_XDP_TX;
+			goto out_failure;
+		res = IGC_XDP_TX;
 		break;
 	case XDP_REDIRECT:
 		if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
-			res = IGC_XDP_CONSUMED;
-		else
-			res = IGC_XDP_REDIRECT;
+			goto out_failure;
+		res = IGC_XDP_REDIRECT;
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
 		fallthrough;
 	case XDP_ABORTED:
+out_failure:
 		trace_xdp_exception(adapter->netdev, prog, act);
 		fallthrough;
 	case XDP_DROP:
-- 
GitLab


From e102db780e1c14f10c70dafa7684af22a745b51d Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 27 Apr 2021 21:52:09 +0200
Subject: [PATCH 2194/3804] ice: track AF_XDP ZC enabled queues in bitmap

Commit c7a219048e45 ("ice: Remove xsk_buff_pool from VSI structure")
silently introduced a regression and broke the Tx side of AF_XDP in copy
mode. xsk_pool on ice_ring is set only based on the existence of the XDP
prog on the VSI which in turn picks ice_clean_tx_irq_zc to be executed.
That is not something that should happen for copy mode as it should use
the regular data path ice_clean_tx_irq.

This results in a following splat when xdpsock is run in txonly or l2fwd
scenarios in copy mode:

<snip>
[  106.050195] BUG: kernel NULL pointer dereference, address: 0000000000000030
[  106.057269] #PF: supervisor read access in kernel mode
[  106.062493] #PF: error_code(0x0000) - not-present page
[  106.067709] PGD 0 P4D 0
[  106.070293] Oops: 0000 [#1] PREEMPT SMP NOPTI
[  106.074721] CPU: 61 PID: 0 Comm: swapper/61 Not tainted 5.12.0-rc2+ #45
[  106.081436] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019
[  106.092027] RIP: 0010:xp_raw_get_dma+0x36/0x50
[  106.096551] Code: 74 14 48 b8 ff ff ff ff ff ff 00 00 48 21 f0 48 c1 ee 30 48 01 c6 48 8b 87 90 00 00 00 48 89 f2 81 e6 ff 0f 00 00 48 c1 ea 0c <48> 8b 04 d0 48 83 e0 fe 48 01 f0 c3 66 66 2e 0f 1f 84 00 00 00 00
[  106.115588] RSP: 0018:ffffc9000d694e50 EFLAGS: 00010206
[  106.120893] RAX: 0000000000000000 RBX: ffff88984b8c8a00 RCX: ffff889852581800
[  106.128137] RDX: 0000000000000006 RSI: 0000000000000000 RDI: ffff88984cd8b800
[  106.135383] RBP: ffff888123b50001 R08: ffff889896800000 R09: 0000000000000800
[  106.142628] R10: 0000000000000000 R11: ffffffff826060c0 R12: 00000000000000ff
[  106.149872] R13: 0000000000000000 R14: 0000000000000040 R15: ffff888123b50018
[  106.157117] FS:  0000000000000000(0000) GS:ffff8897e0f40000(0000) knlGS:0000000000000000
[  106.165332] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  106.171163] CR2: 0000000000000030 CR3: 000000000560a004 CR4: 00000000007706e0
[  106.178408] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  106.185653] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  106.192898] PKRU: 55555554
[  106.195653] Call Trace:
[  106.198143]  <IRQ>
[  106.200196]  ice_clean_tx_irq_zc+0x183/0x2a0 [ice]
[  106.205087]  ice_napi_poll+0x3e/0x590 [ice]
[  106.209356]  __napi_poll+0x2a/0x160
[  106.212911]  net_rx_action+0xd6/0x200
[  106.216634]  __do_softirq+0xbf/0x29b
[  106.220274]  irq_exit_rcu+0x88/0xc0
[  106.223819]  common_interrupt+0x7b/0xa0
[  106.227719]  </IRQ>
[  106.229857]  asm_common_interrupt+0x1e/0x40
</snip>

Fix this by introducing the bitmap of queues that are zero-copy enabled,
where each bit, corresponding to a queue id that xsk pool is being
configured on, will be set/cleared within ice_xsk_pool_{en,dis}able and
checked within ice_xsk_pool(). The latter is a function used for
deciding which napi poll routine is executed.
Idea is being taken from our other drivers such as i40e and ixgbe.

Fixes: c7a219048e45 ("ice: Remove xsk_buff_pool from VSI structure")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h     |  8 +++++---
 drivers/net/ethernet/intel/ice/ice_lib.c | 10 ++++++++++
 drivers/net/ethernet/intel/ice/ice_xsk.c |  3 +++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index e35db3ff583bc..2924c67567b8a 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -335,6 +335,7 @@ struct ice_vsi {
 	struct ice_tc_cfg tc_cfg;
 	struct bpf_prog *xdp_prog;
 	struct ice_ring **xdp_rings;	 /* XDP ring array */
+	unsigned long *af_xdp_zc_qps;	 /* tracks AF_XDP ZC enabled qps */
 	u16 num_xdp_txq;		 /* Used XDP queues */
 	u8 xdp_mapping_mode;		 /* ICE_MAP_MODE_[CONTIG|SCATTER] */
 
@@ -547,15 +548,16 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
  */
 static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring)
 {
+	struct ice_vsi *vsi = ring->vsi;
 	u16 qid = ring->q_index;
 
 	if (ice_ring_is_xdp(ring))
-		qid -= ring->vsi->num_xdp_txq;
+		qid -= vsi->num_xdp_txq;
 
-	if (!ice_is_xdp_ena_vsi(ring->vsi))
+	if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
 		return NULL;
 
-	return xsk_get_pool_from_qid(ring->vsi->netdev, qid);
+	return xsk_get_pool_from_qid(vsi->netdev, qid);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 82e2ce23df3dc..7f7653906fcef 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -105,8 +105,14 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
 	if (!vsi->q_vectors)
 		goto err_vectors;
 
+	vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL);
+	if (!vsi->af_xdp_zc_qps)
+		goto err_zc_qps;
+
 	return 0;
 
+err_zc_qps:
+	devm_kfree(dev, vsi->q_vectors);
 err_vectors:
 	devm_kfree(dev, vsi->rxq_map);
 err_rxq_map:
@@ -288,6 +294,10 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
 
 	dev = ice_pf_to_dev(pf);
 
+	if (vsi->af_xdp_zc_qps) {
+		bitmap_free(vsi->af_xdp_zc_qps);
+		vsi->af_xdp_zc_qps = NULL;
+	}
 	/* free the ring and vector containers */
 	if (vsi->q_vectors) {
 		devm_kfree(dev, vsi->q_vectors);
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 7228e4d427bc6..a1f89ea3c2bdb 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -270,6 +270,7 @@ static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid)
 	if (!pool)
 		return -EINVAL;
 
+	clear_bit(qid, vsi->af_xdp_zc_qps);
 	xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR);
 
 	return 0;
@@ -300,6 +301,8 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 	if (err)
 		return err;
 
+	set_bit(qid, vsi->af_xdp_zc_qps);
+
 	return 0;
 }
 
-- 
GitLab


From a0ffb4c12f7fa89163e228e6f27df09b46631db1 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Thu, 3 Jun 2021 16:18:03 +0300
Subject: [PATCH 2195/3804] RDMA/mlx5: Use different doorbell memory for
 different processes

In a fork scenario, the parent and child can have same virtual address and
also share the uverbs fd.  That causes to the list_for_each_entry search
return same doorbell physical page for all processes, even though that
page has been COW' or copied.

This patch takes the mm_struct into consideration during search, to make
sure that VA's belonging to different processes are not intermixed.

Resolves the malfunction of uverbs after fork in some specific cases.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Link: https://lore.kernel.org/r/feacc23fe0bc6e1088c6824d5583798745e72405.1622726212.git.leonro@nvidia.com
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/doorbell.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index 61475b5715312..7af4df7a68237 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -41,6 +41,7 @@ struct mlx5_ib_user_db_page {
 	struct ib_umem	       *umem;
 	unsigned long		user_virt;
 	int			refcnt;
+	struct mm_struct	*mm;
 };
 
 int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
@@ -53,7 +54,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
 	mutex_lock(&context->db_page_mutex);
 
 	list_for_each_entry(page, &context->db_page_list, list)
-		if (page->user_virt == (virt & PAGE_MASK))
+		if ((current->mm == page->mm) &&
+		    (page->user_virt == (virt & PAGE_MASK)))
 			goto found;
 
 	page = kmalloc(sizeof(*page), GFP_KERNEL);
@@ -71,6 +73,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
 		kfree(page);
 		goto out;
 	}
+	mmgrab(current->mm);
+	page->mm = current->mm;
 
 	list_add(&page->list, &context->db_page_list);
 
@@ -91,6 +95,7 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
 
 	if (!--db->u.user_page->refcnt) {
 		list_del(&db->u.user_page->list);
+		mmdrop(db->u.user_page->mm);
 		ib_umem_release(db->u.user_page->umem);
 		kfree(db->u.user_page);
 	}
-- 
GitLab


From 404e5a12691fe797486475fe28cc0b80cb8bef2c Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Thu, 3 Jun 2021 16:19:39 +0300
Subject: [PATCH 2196/3804] RDMA/mlx4: Do not map the core_clock page to user
 space unless enabled

Currently when mlx4 maps the hca_core_clock page to the user space there
are read-modifiable registers, one of which is semaphore, on this page as
well as the clock counter. If user reads the wrong offset, it can modify
the semaphore and hang the device.

Do not map the hca_core_clock page to the user space unless the device has
been put in a backwards compatibility mode to support this feature.

After this patch, mlx4 core_clock won't be mapped to user space on the
majority of existing devices and the uverbs device time feature in
ibv_query_rt_values_ex() will be disabled.

Fixes: 52033cfb5aab ("IB/mlx4: Add mmap call to map the hardware clock")
Link: https://lore.kernel.org/r/9632304e0d6790af84b3b706d8c18732bc0d5e27.1622726305.git.leonro@nvidia.com
Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx4/main.c         | 5 +----
 drivers/net/ethernet/mellanox/mlx4/fw.c   | 3 +++
 drivers/net/ethernet/mellanox/mlx4/fw.h   | 1 +
 drivers/net/ethernet/mellanox/mlx4/main.c | 6 ++++++
 include/linux/mlx4/device.h               | 1 +
 5 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 22898d97ecbda..16704262fc3a8 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -581,12 +581,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->cq_caps.max_cq_moderation_count = MLX4_MAX_CQ_COUNT;
 	props->cq_caps.max_cq_moderation_period = MLX4_MAX_CQ_PERIOD;
 
-	if (!mlx4_is_slave(dev->dev))
-		err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
-
 	if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
 		resp.response_length += sizeof(resp.hca_core_clock_offset);
-		if (!err && !mlx4_is_slave(dev->dev)) {
+		if (!mlx4_get_internal_clock_params(dev->dev, &clock_params)) {
 			resp.comp_mask |= MLX4_IB_QUERY_DEV_RESP_MASK_CORE_CLOCK_OFFSET;
 			resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index f6cfec81ccc3b..dc4ac1a2b6b67 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -823,6 +823,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET		0xb0
 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET	0xa8
 #define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET	0xac
+#define QUERY_DEV_CAP_MAP_CLOCK_TO_USER 0xc1
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_NUM_OFFSET	0xcc
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MAX_OFFSET	0xd0
 #define QUERY_DEV_CAP_QP_RATE_LIMIT_MIN_OFFSET	0xd2
@@ -841,6 +842,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 
 	if (mlx4_is_mfunc(dev))
 		disable_unsupported_roce_caps(outbox);
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAP_CLOCK_TO_USER);
+	dev_cap->map_clock_to_user = field & 0x80;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
 	dev_cap->reserved_qps = 1 << (field & 0xf);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 8f020f26ebf5f..cf64e54eecb05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -131,6 +131,7 @@ struct mlx4_dev_cap {
 	u32 health_buffer_addrs;
 	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
 	bool wol_port[MLX4_MAX_PORTS + 1];
+	bool map_clock_to_user;
 };
 
 struct mlx4_func_cap {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index c326b434734e1..00c84656b2e7e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -498,6 +498,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		}
 	}
 
+	dev->caps.map_clock_to_user  = dev_cap->map_clock_to_user;
 	dev->caps.uar_page_size	     = PAGE_SIZE;
 	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
 	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
@@ -1948,6 +1949,11 @@ int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
 	if (mlx4_is_slave(dev))
 		return -EOPNOTSUPP;
 
+	if (!dev->caps.map_clock_to_user) {
+		mlx4_dbg(dev, "Map clock to user is not supported.\n");
+		return -EOPNOTSUPP;
+	}
+
 	if (!params)
 		return -EINVAL;
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 236a7d04f891e..30bb59fe970cb 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -630,6 +630,7 @@ struct mlx4_caps {
 	bool			wol_port[MLX4_MAX_PORTS + 1];
 	struct mlx4_rate_limit_caps rl_caps;
 	u32			health_buffer_addrs;
+	bool			map_clock_to_user;
 };
 
 struct mlx4_buf_list {
-- 
GitLab


From f1d4d47c5851b348b7713007e152bc68b94d728b Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Tue, 1 Jun 2021 10:53:52 +0300
Subject: [PATCH 2197/3804] x86/setup: Always reserve the first 1M of RAM

There are BIOSes that are known to corrupt the memory under 1M, or more
precisely under 640K because the memory above 640K is anyway reserved
for the EGA/VGA frame buffer and BIOS.

To prevent usage of the memory that will be potentially clobbered by the
kernel, the beginning of the memory is always reserved. The exact size
of the reserved area is determined by CONFIG_X86_RESERVE_LOW build time
and the "reservelow=" command line option. The reserved range may be
from 4K to 640K with the default of 64K. There are also configurations
that reserve the entire 1M range, like machines with SandyBridge graphic
devices or systems that enable crash kernel.

In addition to the potentially clobbered memory, EBDA of unknown size may
be as low as 128K and the memory above that EBDA start is also reserved
early.

It would have been possible to reserve the entire range under 1M unless for
the real mode trampoline that must reside in that area.

To accommodate placement of the real mode trampoline and keep the memory
safe from being clobbered by BIOS, reserve the first 64K of RAM before
memory allocations are possible and then, after the real mode trampoline
is allocated, reserve the entire range from 0 to 1M.

Update trim_snb_memory() and reserve_real_mode() to avoid redundant
reservations of the same memory range.

Also make sure the memory under 1M is not getting freed by
efi_free_boot_services().

 [ bp: Massage commit message and comments. ]

Fixes: a799c2bd29d1 ("x86/setup: Consolidate early memory reservations")
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Hugh Dickins <hughd@google.com>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213177
Link: https://lkml.kernel.org/r/20210601075354.5149-2-rppt@kernel.org
---
 arch/x86/kernel/setup.c        | 35 ++++++++++++++++++++--------------
 arch/x86/platform/efi/quirks.c | 12 ++++++++++++
 arch/x86/realmode/init.c       | 14 ++++++++------
 3 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ff653d608d5f7..1e720626069a3 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -638,11 +638,11 @@ static void __init trim_snb_memory(void)
 	 * them from accessing certain memory ranges, namely anything below
 	 * 1M and in the pages listed in bad_pages[] above.
 	 *
-	 * To avoid these pages being ever accessed by SNB gfx devices
-	 * reserve all memory below the 1 MB mark and bad_pages that have
-	 * not already been reserved at boot time.
+	 * To avoid these pages being ever accessed by SNB gfx devices reserve
+	 * bad_pages that have not already been reserved at boot time.
+	 * All memory below the 1 MB mark is anyway reserved later during
+	 * setup_arch(), so there is no need to reserve it here.
 	 */
-	memblock_reserve(0, 1<<20);
 
 	for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
 		if (memblock_reserve(bad_pages[i], PAGE_SIZE))
@@ -734,14 +734,14 @@ static void __init early_reserve_memory(void)
 	 * The first 4Kb of memory is a BIOS owned area, but generally it is
 	 * not listed as such in the E820 table.
 	 *
-	 * Reserve the first memory page and typically some additional
-	 * memory (64KiB by default) since some BIOSes are known to corrupt
-	 * low memory. See the Kconfig help text for X86_RESERVE_LOW.
+	 * Reserve the first 64K of memory since some BIOSes are known to
+	 * corrupt low memory. After the real mode trampoline is allocated the
+	 * rest of the memory below 640k is reserved.
 	 *
 	 * In addition, make sure page 0 is always reserved because on
 	 * systems with L1TF its contents can be leaked to user processes.
 	 */
-	memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
+	memblock_reserve(0, SZ_64K);
 
 	early_reserve_initrd();
 
@@ -752,6 +752,7 @@ static void __init early_reserve_memory(void)
 
 	reserve_ibft_region();
 	reserve_bios_regions();
+	trim_snb_memory();
 }
 
 /*
@@ -1082,14 +1083,20 @@ void __init setup_arch(char **cmdline_p)
 			(max_pfn_mapped<<PAGE_SHIFT) - 1);
 #endif
 
-	reserve_real_mode();
-
 	/*
-	 * Reserving memory causing GPU hangs on Sandy Bridge integrated
-	 * graphics devices should be done after we allocated memory under
-	 * 1M for the real mode trampoline.
+	 * Find free memory for the real mode trampoline and place it
+	 * there.
+	 * If there is not enough free memory under 1M, on EFI-enabled
+	 * systems there will be additional attempt to reclaim the memory
+	 * for the real mode trampoline at efi_free_boot_services().
+	 *
+	 * Unconditionally reserve the entire first 1M of RAM because
+	 * BIOSes are know to corrupt low memory and several
+	 * hundred kilobytes are not worth complex detection what memory gets
+	 * clobbered. Moreover, on machines with SandyBridge graphics or in
+	 * setups that use crashkernel the entire 1M is reserved anyway.
 	 */
-	trim_snb_memory();
+	reserve_real_mode();
 
 	init_mem_mapping();
 
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 7850111008a8b..b15ebfe40a73e 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -450,6 +450,18 @@ void __init efi_free_boot_services(void)
 			size -= rm_size;
 		}
 
+		/*
+		 * Don't free memory under 1M for two reasons:
+		 * - BIOS might clobber it
+		 * - Crash kernel needs it to be reserved
+		 */
+		if (start + size < SZ_1M)
+			continue;
+		if (start < SZ_1M) {
+			size -= (SZ_1M - start);
+			start = SZ_1M;
+		}
+
 		memblock_free_late(start, size);
 	}
 
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 2e1c1bec0f9e7..6534c92d0f83f 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -29,14 +29,16 @@ void __init reserve_real_mode(void)
 
 	/* Has to be under 1M so we can execute real-mode AP code. */
 	mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
-	if (!mem) {
+	if (!mem)
 		pr_info("No sub-1M memory is available for the trampoline\n");
-		return;
-	}
+	else
+		set_real_mode_mem(mem);
 
-	memblock_reserve(mem, size);
-	set_real_mode_mem(mem);
-	crash_reserve_low_1M();
+	/*
+	 * Unconditionally reserve the entire fisrt 1M, see comment in
+	 * setup_arch().
+	 */
+	memblock_reserve(0, SZ_1M);
 }
 
 static void sme_sev_setup_real_mode(struct trampoline_header *th)
-- 
GitLab


From fcf9dc02f83949b3261eefe03e7bb81c59bfaa9c Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 3 Jun 2021 20:02:39 +0800
Subject: [PATCH 2198/3804] arm64: mm: Add is_el1_data_abort() helper

We alread have is_el1_instruction_abort(), add is_el1_data_abort()
helper and use it.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210603120239.169018-1-wangkefeng.wang@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/fault.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 871c82ab0a309..5c855b2ab93b8 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -232,13 +232,17 @@ static bool is_el1_instruction_abort(unsigned int esr)
 	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
 }
 
+static bool is_el1_data_abort(unsigned int esr)
+{
+	return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_CUR;
+}
+
 static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr,
 					   struct pt_regs *regs)
 {
-	unsigned int ec       = ESR_ELx_EC(esr);
 	unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
 
-	if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+	if (!is_el1_data_abort(esr) && !is_el1_instruction_abort(esr))
 		return false;
 
 	if (fsc_type == ESR_ELx_FSC_PERM)
@@ -258,7 +262,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
 	unsigned long flags;
 	u64 par, dfsc;
 
-	if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR ||
+	if (!is_el1_data_abort(esr) ||
 	    (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT)
 		return false;
 
@@ -346,10 +350,9 @@ static void do_tag_recovery(unsigned long addr, unsigned int esr,
 
 static bool is_el1_mte_sync_tag_check_fault(unsigned int esr)
 {
-	unsigned int ec = ESR_ELx_EC(esr);
 	unsigned int fsc = esr & ESR_ELx_FSC;
 
-	if (ec != ESR_ELx_EC_DABT_CUR)
+	if (!is_el1_data_abort(esr))
 		return false;
 
 	if (fsc == ESR_ELx_FSC_MTE)
-- 
GitLab


From 8d396bb0a5b62b326f6be7594d8bd46b088296bd Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Sat, 29 May 2021 12:29:32 -0700
Subject: [PATCH 2199/3804] usb: dwc3: debugfs: Add and remove endpoint dirs
 dynamically

The DWC3 DebugFS directory and files are currently created once
during probe.  This includes creation of subdirectories for each
of the gadget's endpoints.  This works fine for peripheral-only
controllers, as dwc3_core_init_mode() calls dwc3_gadget_init()
just prior to calling dwc3_debugfs_init().

However, for dual-role controllers, dwc3_core_init_mode() will
instead call dwc3_drd_init() which is problematic in a few ways.
First, the initial state must be determined, then dwc3_set_mode()
will have to schedule drd_work and by then dwc3_debugfs_init()
could have already been invoked.  Even if the initial mode is
peripheral, dwc3_gadget_init() happens after the DebugFS files
are created, and worse so if the initial state is host and the
controller switches to peripheral much later.  And secondly,
even if the gadget endpoints' debug entries were successfully
created, if the controller exits peripheral mode, its dwc3_eps
are freed so the debug files would now hold stale references.

So it is best if the DebugFS endpoint entries are created and
removed dynamically at the same time the underlying dwc3_eps are.
Do this by calling dwc3_debugfs_create_endpoint_dir() as each
endpoint is created, and conversely remove the DebugFS entry when
the endpoint is freed.

Fixes: 41ce1456e1db ("usb: dwc3: core: make dwc3_set_mode() work properly")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Peter Chen <peter.chen@kernel.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Link: https://lore.kernel.org/r/20210529192932.22912-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/debug.h   |  3 +++
 drivers/usb/dwc3/debugfs.c | 21 ++-------------------
 drivers/usb/dwc3/gadget.c  |  3 +++
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h
index d0ac89c5b3172..d223c54115f4a 100644
--- a/drivers/usb/dwc3/debug.h
+++ b/drivers/usb/dwc3/debug.h
@@ -413,9 +413,12 @@ static inline const char *dwc3_gadget_generic_cmd_status_string(int status)
 
 
 #ifdef CONFIG_DEBUG_FS
+extern void dwc3_debugfs_create_endpoint_dir(struct dwc3_ep *dep);
 extern void dwc3_debugfs_init(struct dwc3 *d);
 extern void dwc3_debugfs_exit(struct dwc3 *d);
 #else
+static inline void dwc3_debugfs_create_endpoint_dir(struct dwc3_ep *dep)
+{  }
 static inline void dwc3_debugfs_init(struct dwc3 *d)
 {  }
 static inline void dwc3_debugfs_exit(struct dwc3 *d)
diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c
index 7146ee2ac0576..5dbbe53269d39 100644
--- a/drivers/usb/dwc3/debugfs.c
+++ b/drivers/usb/dwc3/debugfs.c
@@ -886,30 +886,14 @@ static void dwc3_debugfs_create_endpoint_files(struct dwc3_ep *dep,
 	}
 }
 
-static void dwc3_debugfs_create_endpoint_dir(struct dwc3_ep *dep,
-		struct dentry *parent)
+void dwc3_debugfs_create_endpoint_dir(struct dwc3_ep *dep)
 {
 	struct dentry		*dir;
 
-	dir = debugfs_create_dir(dep->name, parent);
+	dir = debugfs_create_dir(dep->name, dep->dwc->root);
 	dwc3_debugfs_create_endpoint_files(dep, dir);
 }
 
-static void dwc3_debugfs_create_endpoint_dirs(struct dwc3 *dwc,
-		struct dentry *parent)
-{
-	int			i;
-
-	for (i = 0; i < dwc->num_eps; i++) {
-		struct dwc3_ep	*dep = dwc->eps[i];
-
-		if (!dep)
-			continue;
-
-		dwc3_debugfs_create_endpoint_dir(dep, parent);
-	}
-}
-
 void dwc3_debugfs_init(struct dwc3 *dwc)
 {
 	struct dentry		*root;
@@ -940,7 +924,6 @@ void dwc3_debugfs_init(struct dwc3 *dwc)
 				&dwc3_testmode_fops);
 		debugfs_create_file("link_state", 0644, root, dwc,
 				    &dwc3_link_state_fops);
-		dwc3_debugfs_create_endpoint_dirs(dwc, root);
 	}
 }
 
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 88270eee8a48c..f14c2aa837598 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2753,6 +2753,8 @@ static int dwc3_gadget_init_endpoint(struct dwc3 *dwc, u8 epnum)
 	INIT_LIST_HEAD(&dep->started_list);
 	INIT_LIST_HEAD(&dep->cancelled_list);
 
+	dwc3_debugfs_create_endpoint_dir(dep);
+
 	return 0;
 }
 
@@ -2796,6 +2798,7 @@ static void dwc3_gadget_free_endpoints(struct dwc3 *dwc)
 			list_del(&dep->endpoint.ep_list);
 		}
 
+		debugfs_remove_recursive(debugfs_lookup(dep->name, dwc->root));
 		kfree(dep);
 	}
 }
-- 
GitLab


From a747070e9b629eeb70118651dfbd500bf8bb5ebe Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Thu, 3 Jun 2021 13:57:24 +0800
Subject: [PATCH 2200/3804] regulator: rt6245: Add support for Richtek RT6245

Richtek RT6245 is a high-performance, synchronous step-down converter
that can deliver up to 14A output current with an input supply voltage
range of 4.5V to 17V.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Link: https://lore.kernel.org/r/1622699844-19203-2-git-send-email-u0084500@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig            |   9 +
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/rt6245-regulator.c | 254 +++++++++++++++++++++++++++
 3 files changed, 264 insertions(+)
 create mode 100644 drivers/regulator/rt6245-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 50c608e6d0065..ef493f1d712d7 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1031,6 +1031,15 @@ config REGULATOR_RT6160
 	  The wide output range is from 2025mV to 5200mV and can be used on most
 	  common application scenario.
 
+config REGULATOR_RT6245
+	tristate "Richtek RT6245 voltage regulator"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This adds supprot for Richtek RT6245 voltage regulator.
+	  It can support up to 14A output current and adjustable output voltage
+	  from 0.4375V to 1.3875V, per step 12.5mV.
+
 config REGULATOR_RTMV20
 	tristate "RTMV20 Laser Diode Regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 4aa5533bd8ee5..4143a418ff646 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -124,6 +124,7 @@ obj-$(CONFIG_REGULATOR_RT4801)	+= rt4801-regulator.o
 obj-$(CONFIG_REGULATOR_RT4831)	+= rt4831-regulator.o
 obj-$(CONFIG_REGULATOR_RT5033)	+= rt5033-regulator.o
 obj-$(CONFIG_REGULATOR_RT6160)	+= rt6160-regulator.o
+obj-$(CONFIG_REGULATOR_RT6245)	+= rt6245-regulator.o
 obj-$(CONFIG_REGULATOR_RTMV20)	+= rtmv20-regulator.o
 obj-$(CONFIG_REGULATOR_S2MPA01) += s2mpa01.o
 obj-$(CONFIG_REGULATOR_S2MPS11) += s2mps11.o
diff --git a/drivers/regulator/rt6245-regulator.c b/drivers/regulator/rt6245-regulator.c
new file mode 100644
index 0000000000000..d3299a72fd10d
--- /dev/null
+++ b/drivers/regulator/rt6245-regulator.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/of_regulator.h>
+
+#define RT6245_VIRT_OCLIMIT	0x00
+#define RT6245_VIRT_OTLEVEL	0x01
+#define RT6245_VIRT_PGDLYTIME	0x02
+#define RT6245_VIRT_SLEWRATE	0x03
+#define RT6245_VIRT_SWFREQ	0x04
+#define RT6245_VIRT_VOUT	0x05
+
+#define RT6245_VOUT_MASK	GENMASK(6, 0)
+#define RT6245_SLEW_MASK	GENMASK(2, 0)
+#define RT6245_CHKSUM_MASK	BIT(7)
+#define RT6245_CODE_MASK	GENMASK(6, 0)
+
+/* HW Enable + Soft start time */
+#define RT6245_ENTIME_IN_US	5000
+
+#define RT6245_VOUT_MINUV	437500
+#define RT6245_VOUT_MAXUV	1387500
+#define RT6245_VOUT_STEPUV	12500
+#define RT6245_NUM_VOUT		((RT6245_VOUT_MAXUV - RT6245_VOUT_MINUV) / RT6245_VOUT_STEPUV + 1)
+
+struct rt6245_priv {
+	struct gpio_desc *enable_gpio;
+	bool enable_state;
+};
+
+static int rt6245_enable(struct regulator_dev *rdev)
+{
+	struct rt6245_priv *priv = rdev_get_drvdata(rdev);
+	struct regmap *regmap = rdev_get_regmap(rdev);
+	int ret;
+
+	if (!priv->enable_gpio)
+		return 0;
+
+	gpiod_direction_output(priv->enable_gpio, 1);
+	usleep_range(RT6245_ENTIME_IN_US, RT6245_ENTIME_IN_US + 1000);
+
+	regcache_cache_only(regmap, false);
+	ret = regcache_sync(regmap);
+	if (ret)
+		return ret;
+
+	priv->enable_state = true;
+	return 0;
+}
+
+static int rt6245_disable(struct regulator_dev *rdev)
+{
+	struct rt6245_priv *priv = rdev_get_drvdata(rdev);
+	struct regmap *regmap = rdev_get_regmap(rdev);
+
+	if (!priv->enable_gpio)
+		return -EINVAL;
+
+	regcache_cache_only(regmap, true);
+	regcache_mark_dirty(regmap);
+
+	gpiod_direction_output(priv->enable_gpio, 0);
+
+	priv->enable_state = false;
+	return 0;
+}
+
+static int rt6245_is_enabled(struct regulator_dev *rdev)
+{
+	struct rt6245_priv *priv = rdev_get_drvdata(rdev);
+
+	return priv->enable_state ? 1 : 0;
+}
+
+static const struct regulator_ops rt6245_regulator_ops = {
+	.list_voltage = regulator_list_voltage_linear,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
+	.enable = rt6245_enable,
+	.disable = rt6245_disable,
+	.is_enabled = rt6245_is_enabled,
+};
+
+/* ramp delay dividend is 12500 uV/uS, and divisor from 1 to 8 */
+static const unsigned int rt6245_ramp_delay_table[] = {
+	12500, 6250, 4167, 3125, 2500, 2083, 1786, 1562
+};
+
+static const struct regulator_desc rt6245_regulator_desc = {
+	.name = "rt6245-regulator",
+	.ops = &rt6245_regulator_ops,
+	.type = REGULATOR_VOLTAGE,
+	.min_uV = RT6245_VOUT_MINUV,
+	.uV_step = RT6245_VOUT_STEPUV,
+	.n_voltages = RT6245_NUM_VOUT,
+	.ramp_delay_table = rt6245_ramp_delay_table,
+	.n_ramp_values = ARRAY_SIZE(rt6245_ramp_delay_table),
+	.owner = THIS_MODULE,
+	.vsel_reg = RT6245_VIRT_VOUT,
+	.vsel_mask = RT6245_VOUT_MASK,
+	.ramp_reg = RT6245_VIRT_SLEWRATE,
+	.ramp_mask = RT6245_SLEW_MASK,
+};
+
+static int rt6245_init_device_properties(struct device *dev)
+{
+	const struct {
+		const char *name;
+		unsigned int reg;
+	} rt6245_props[] = {
+		{ "richtek,oc-level-select",  RT6245_VIRT_OCLIMIT },
+		{ "richtek,ot-level-select", RT6245_VIRT_OTLEVEL },
+		{ "richtek,pgdly-time-select", RT6245_VIRT_PGDLYTIME },
+		{ "richtek,switch-freq-select", RT6245_VIRT_SWFREQ }
+	};
+	struct regmap *regmap = dev_get_regmap(dev, NULL);
+	u8 propval;
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(rt6245_props); i++) {
+		ret = device_property_read_u8(dev, rt6245_props[i].name, &propval);
+		if (ret)
+			continue;
+
+		ret = regmap_write(regmap, rt6245_props[i].reg, propval);
+		if (ret) {
+			dev_err(dev, "Fail to apply [%s:%d]\n", rt6245_props[i].name, propval);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int rt6245_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct i2c_client *i2c = context;
+	const u8 func_base[] = { 0x6F, 0x73, 0x78, 0x61, 0x7C, 0 };
+	unsigned int code, bit_count;
+
+	code = func_base[reg];
+	code += val;
+
+	/* xor checksum for bit 6 to 0 */
+	bit_count = hweight8(code & RT6245_CODE_MASK);
+	if (bit_count % 2)
+		code |= RT6245_CHKSUM_MASK;
+	else
+		code &= ~RT6245_CHKSUM_MASK;
+
+	return i2c_smbus_write_byte(i2c, code);
+}
+
+static const struct reg_default rt6245_reg_defaults[] = {
+	/* Default over current 14A */
+	{ RT6245_VIRT_OCLIMIT, 2 },
+	/* Default over temperature 150'c */
+	{ RT6245_VIRT_OTLEVEL, 0 },
+	/* Default power good delay time 10us */
+	{ RT6245_VIRT_PGDLYTIME, 1 },
+	/* Default slewrate 12.5mV/uS */
+	{ RT6245_VIRT_SLEWRATE, 0 },
+	/* Default switch frequency 800KHz */
+	{ RT6245_VIRT_SWFREQ, 1 },
+	/* Default voltage 750mV */
+	{ RT6245_VIRT_VOUT, 0x19 }
+};
+
+static const struct regmap_config rt6245_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = RT6245_VIRT_VOUT,
+	.cache_type = REGCACHE_FLAT,
+	.reg_defaults = rt6245_reg_defaults,
+	.num_reg_defaults = ARRAY_SIZE(rt6245_reg_defaults),
+	.reg_write = rt6245_reg_write,
+};
+
+static int rt6245_probe(struct i2c_client *i2c)
+{
+	struct rt6245_priv *priv;
+	struct regmap *regmap;
+	struct regulator_config regulator_cfg = {};
+	struct regulator_dev *rdev;
+	int ret;
+
+	priv = devm_kzalloc(&i2c->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->enable_state = true;
+
+	priv->enable_gpio = devm_gpiod_get_optional(&i2c->dev, "enable", GPIOD_OUT_HIGH);
+	if (IS_ERR(priv->enable_gpio)) {
+		dev_err(&i2c->dev, "Failed to get 'enable' gpio\n");
+		return PTR_ERR(priv->enable_gpio);
+	}
+
+	usleep_range(RT6245_ENTIME_IN_US, RT6245_ENTIME_IN_US + 1000);
+
+	regmap = devm_regmap_init(&i2c->dev, NULL, i2c, &rt6245_regmap_config);
+	if (IS_ERR(regmap)) {
+		dev_err(&i2c->dev, "Failed to initialize the regmap\n");
+		return PTR_ERR(regmap);
+	}
+
+	ret = rt6245_init_device_properties(&i2c->dev);
+	if (ret) {
+		dev_err(&i2c->dev, "Failed to initialize device properties\n");
+		return ret;
+	}
+
+	regulator_cfg.dev = &i2c->dev;
+	regulator_cfg.of_node = i2c->dev.of_node;
+	regulator_cfg.regmap = regmap;
+	regulator_cfg.driver_data = priv;
+	regulator_cfg.init_data = of_get_regulator_init_data(&i2c->dev, i2c->dev.of_node,
+							     &rt6245_regulator_desc);
+	rdev = devm_regulator_register(&i2c->dev, &rt6245_regulator_desc, &regulator_cfg);
+	if (IS_ERR(rdev)) {
+		dev_err(&i2c->dev, "Failed to register regulator\n");
+		return PTR_ERR(rdev);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id __maybe_unused rt6245_of_match_table[] = {
+	{ .compatible = "richtek,rt6245", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rt6245_of_match_table);
+
+static struct i2c_driver rt6245_driver = {
+	.driver = {
+		.name = "rt6245",
+		.of_match_table = rt6245_of_match_table,
+	},
+	.probe_new = rt6245_probe,
+};
+module_i2c_driver(rt6245_driver);
+
+MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
+MODULE_DESCRIPTION("Richtek RT6245 Regulator Driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From cb2381cbecb81a8893b2d1e1af29bc2e5531df27 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 3 Jun 2021 17:49:44 +0800
Subject: [PATCH 2201/3804] regulator: rt4801: Fix NULL pointer dereference if
 priv->enable_gpios is NULL

devm_gpiod_get_array_optional may return NULL if no GPIO was assigned.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210603094944.1114156-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rt4801-regulator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/rt4801-regulator.c b/drivers/regulator/rt4801-regulator.c
index 2055a9cb13ba5..7a87788d3f092 100644
--- a/drivers/regulator/rt4801-regulator.c
+++ b/drivers/regulator/rt4801-regulator.c
@@ -66,7 +66,7 @@ static int rt4801_enable(struct regulator_dev *rdev)
 	struct gpio_descs *gpios = priv->enable_gpios;
 	int id = rdev_get_id(rdev), ret;
 
-	if (gpios->ndescs <= id) {
+	if (!gpios || gpios->ndescs <= id) {
 		dev_warn(&rdev->dev, "no dedicated gpio can control\n");
 		goto bypass_gpio;
 	}
@@ -88,7 +88,7 @@ static int rt4801_disable(struct regulator_dev *rdev)
 	struct gpio_descs *gpios = priv->enable_gpios;
 	int id = rdev_get_id(rdev);
 
-	if (gpios->ndescs <= id) {
+	if (!gpios || gpios->ndescs <= id) {
 		dev_warn(&rdev->dev, "no dedicated gpio can control\n");
 		goto bypass_gpio;
 	}
-- 
GitLab


From 4f9f4f0f6261e4b162dfcaf91e08824a7c93da07 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Tue, 11 May 2021 15:48:56 +0800
Subject: [PATCH 2202/3804] clocksource/drivers/arm_arch_timer: Remove
 arch_timer_rate1

This variable is added by my mistake, it's not used at all.

Fixes: e2bf384d4329 ("clocksource/drivers/arm_arch_timer: Add __ro_after_init and __init")
Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Reported-by: Hulk Robot <hulkci@huawei.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210511154856.6afbcb65@xhacker.debian
---
 drivers/clocksource/arm_arch_timer.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index fe1a82627d570..89a9e0524555f 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -64,7 +64,6 @@ struct arch_timer {
 #define to_arch_timer(e) container_of(e, struct arch_timer, evt)
 
 static u32 arch_timer_rate __ro_after_init;
-u32 arch_timer_rate1 __ro_after_init;
 static int arch_timer_ppi[ARCH_TIMER_MAX_TIMER_PPI] __ro_after_init;
 
 static const char *arch_timer_ppi_names[ARCH_TIMER_MAX_TIMER_PPI] = {
-- 
GitLab


From a4931dcab1dedf67caa231ff2c9b0a6bb40139af Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:00:53 +0100
Subject: [PATCH 2203/3804] i2c: altera: Fix formatting issue in struct and
 demote unworthy kernel-doc headers

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-altera.c:74: warning: cannot understand function prototype: 'struct altr_i2c_dev '
 drivers/i2c/busses/i2c-altera.c:180: warning: Function parameter or member 'idev' not described in 'altr_i2c_transfer'
 drivers/i2c/busses/i2c-altera.c:180: warning: Function parameter or member 'data' not described in 'altr_i2c_transfer'
 drivers/i2c/busses/i2c-altera.c:193: warning: Function parameter or member 'idev' not described in 'altr_i2c_empty_rx_fifo'
 drivers/i2c/busses/i2c-altera.c:209: warning: Function parameter or member 'idev' not described in 'altr_i2c_fill_tx_fifo'

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-altera.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c
index 7d62cbda6e06c..354cf7e45c4a0 100644
--- a/drivers/i2c/busses/i2c-altera.c
+++ b/drivers/i2c/busses/i2c-altera.c
@@ -55,7 +55,7 @@
 #define ALTR_I2C_XFER_TIMEOUT	(msecs_to_jiffies(250))
 
 /**
- * altr_i2c_dev - I2C device context
+ * struct altr_i2c_dev - I2C device context
  * @base: pointer to register struct
  * @msg: pointer to current message
  * @msg_len: number of bytes transferred in msg
@@ -172,7 +172,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev)
 	altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false);
 }
 
-/**
+/*
  * altr_i2c_transfer - On the last byte to be transmitted, send
  * a Stop bit on the last byte.
  */
@@ -185,7 +185,7 @@ static void altr_i2c_transfer(struct altr_i2c_dev *idev, u32 data)
 		writel(data, idev->base + ALTR_I2C_TFR_CMD);
 }
 
-/**
+/*
  * altr_i2c_empty_rx_fifo - Fetch data from RX FIFO until end of
  * transfer. Send a Stop bit on the last byte.
  */
@@ -201,9 +201,8 @@ static void altr_i2c_empty_rx_fifo(struct altr_i2c_dev *idev)
 	}
 }
 
-/**
+/*
  * altr_i2c_fill_tx_fifo - Fill TX FIFO from current message buffer.
- * @return: Number of bytes left to transfer.
  */
 static int altr_i2c_fill_tx_fifo(struct altr_i2c_dev *idev)
 {
-- 
GitLab


From de2646f34a5bdfa04fb079bfaaada992b87c6a55 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 20 May 2021 20:01:04 +0100
Subject: [PATCH 2204/3804] i2c: tegra-bpmp: Demote kernel-doc abuses

Fixes the following W=1 kernel build warning(s):

 drivers/i2c/busses/i2c-tegra-bpmp.c:86: warning: Function parameter or member 'i2c' not described in 'tegra_bpmp_serialize_i2c_msg'
 drivers/i2c/busses/i2c-tegra-bpmp.c:86: warning: Function parameter or member 'request' not described in 'tegra_bpmp_serialize_i2c_msg'
 drivers/i2c/busses/i2c-tegra-bpmp.c:86: warning: Function parameter or member 'msgs' not described in 'tegra_bpmp_serialize_i2c_msg'
 drivers/i2c/busses/i2c-tegra-bpmp.c:86: warning: Function parameter or member 'num' not described in 'tegra_bpmp_serialize_i2c_msg'
 drivers/i2c/busses/i2c-tegra-bpmp.c:86: warning: expecting prototype for The serialized I2C format is simply the following(). Prototype was for tegra_bpmp_serialize_i2c_msg() instead
 drivers/i2c/busses/i2c-tegra-bpmp.c:130: warning: Function parameter or member 'i2c' not described in 'tegra_bpmp_i2c_deserialize'
 drivers/i2c/busses/i2c-tegra-bpmp.c:130: warning: Function parameter or member 'response' not described in 'tegra_bpmp_i2c_deserialize'
 drivers/i2c/busses/i2c-tegra-bpmp.c:130: warning: Function parameter or member 'msgs' not described in 'tegra_bpmp_i2c_deserialize'
 drivers/i2c/busses/i2c-tegra-bpmp.c:130: warning: Function parameter or member 'num' not described in 'tegra_bpmp_i2c_deserialize'
 drivers/i2c/busses/i2c-tegra-bpmp.c:130: warning: expecting prototype for The data in the BPMP(). Prototype was for tegra_bpmp_i2c_deserialize() instead

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-tegra-bpmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-tegra-bpmp.c b/drivers/i2c/busses/i2c-tegra-bpmp.c
index 3680d608698b1..ec0c7cad42401 100644
--- a/drivers/i2c/busses/i2c-tegra-bpmp.c
+++ b/drivers/i2c/busses/i2c-tegra-bpmp.c
@@ -65,7 +65,7 @@ static void tegra_bpmp_xlate_flags(u16 flags, u16 *out)
 		*out |= SERIALI2C_RECV_LEN;
 }
 
-/**
+/*
  * The serialized I2C format is simply the following:
  * [addr little-endian][flags little-endian][len little-endian][data if write]
  * [addr little-endian][flags little-endian][len little-endian][data if write]
@@ -109,7 +109,7 @@ static void tegra_bpmp_serialize_i2c_msg(struct tegra_bpmp_i2c *i2c,
 	request->xfer.data_size = pos;
 }
 
-/**
+/*
  * The data in the BPMP -> CPU direction is composed of sequential blocks for
  * those messages that have I2C_M_RD. So, for example, if you have:
  *
-- 
GitLab


From a8db57c1d285c758adc7fb43d6e2bad2554106e1 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Wed, 2 Jun 2021 18:15:04 +0800
Subject: [PATCH 2205/3804] rtnetlink: Fix missing error code in
 rtnl_bridge_notify()

The error code is missing in this code scenario, add the error code
'-EINVAL' to the return value 'err'.

Eliminate the follow smatch warning:

net/core/rtnetlink.c:4834 rtnl_bridge_notify() warn: missing error code
'err'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 714d5fa385468..3e84279c41236 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4842,8 +4842,10 @@ static int rtnl_bridge_notify(struct net_device *dev)
 	if (err < 0)
 		goto errout;
 
-	if (!skb->len)
+	if (!skb->len) {
+		err = -EINVAL;
 		goto errout;
+	}
 
 	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 	return 0;
-- 
GitLab


From 261ba78cc364ad595cead555a7d2a61471eac165 Mon Sep 17 00:00:00 2001
From: zhang kai <zhangkaiheb@126.com>
Date: Wed, 2 Jun 2021 18:36:26 +0800
Subject: [PATCH 2206/3804] sit: set name of device back to struct parms

addrconf_set_sit_dstaddr will use parms->name.

Signed-off-by: zhang kai <zhangkaiheb@126.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index aa98294a3ad31..f7c8110ece5fb 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -271,6 +271,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
 	if (ipip6_tunnel_create(dev) < 0)
 		goto failed_free;
 
+	if (!parms->name[0])
+		strcpy(parms->name, dev->name);
+
 	return nt;
 
 failed_free:
-- 
GitLab


From a83d958504734f78f42b1e3392d93816297e790a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 3 Jun 2021 21:20:26 +0200
Subject: [PATCH 2207/3804] Bluetooth: Fix VIRTIO_ID_BT assigned number

It turned out that the VIRTIO_ID_* are not assigned in the virtio_ids.h
file in the upstream kernel. Picking the next free one was wrong and
there is a process that has been followed now.

See https://github.com/oasis-tcs/virtio-spec/issues/108 for details.

Fixes: afd2daa26c7a ("Bluetooth: Add support for virtio transport driver")
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 include/uapi/linux/virtio_ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index f0c35ce8628c9..4fe842c3a3a98 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -54,7 +54,7 @@
 #define VIRTIO_ID_SOUND			25 /* virtio sound */
 #define VIRTIO_ID_FS			26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM			27 /* virtio pmem */
-#define VIRTIO_ID_BT			28 /* virtio bluetooth */
 #define VIRTIO_ID_MAC80211_HWSIM	29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_BT			40 /* virtio bluetooth */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
-- 
GitLab


From 1f14a620f30b01234f8b61df396f513e2ec4887f Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Fri, 30 Apr 2021 16:05:01 -0700
Subject: [PATCH 2208/3804] Bluetooth: btusb: Fix failing to init controllers
 with operation firmware

Some firmware when operation don't may have broken versions leading to
error like the following:

[    6.176482] Bluetooth: hci0: Firmware revision 0.0 build 121 week 7 2021
[    6.177906] bluetooth hci0: Direct firmware load for intel/ibt-20-0-0.sfi failed with error -2
[    6.177910] Bluetooth: hci0: Failed to load Intel firmware file intel/ibt-20-0-0.sfi (-2)

Since we load the firmware file just to check if its version had changed
comparing to the one already loaded we can just skip since the firmware
is already operation.

Fixes: ac0565462e330 ("Bluetooth: btintel: Check firmware version before
download")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 5d603ef39bad9..b88c63fbf7fb6 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2527,10 +2527,17 @@ static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
 	}
 
 	btusb_setup_intel_newgen_get_fw_name(ver, fwname, sizeof(fwname), "sfi");
-	err = request_firmware(&fw, fwname, &hdev->dev);
+	err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
 	if (err < 0) {
+		if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+			/* Firmware has already been loaded */
+			set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+			return 0;
+		}
+
 		bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
 			   fwname, err);
+
 		return err;
 	}
 
@@ -2680,12 +2687,24 @@ download:
 	err = btusb_setup_intel_new_get_fw_name(ver, params, fwname,
 						sizeof(fwname), "sfi");
 	if (err < 0) {
+		if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+			/* Firmware has already been loaded */
+			set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+			return 0;
+		}
+
 		bt_dev_err(hdev, "Unsupported Intel firmware naming");
 		return -EINVAL;
 	}
 
-	err = request_firmware(&fw, fwname, &hdev->dev);
+	err = firmware_request_nowarn(&fw, fwname, &hdev->dev);
 	if (err < 0) {
+		if (!test_bit(BTUSB_BOOTLOADER, &data->flags)) {
+			/* Firmware has already been loaded */
+			set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
+			return 0;
+		}
+
 		bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
 			   fwname, err);
 		return err;
-- 
GitLab


From c47cc304990a2813995b1a92bbc11d0bb9a19ea9 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Wed, 2 Jun 2021 22:26:40 +0300
Subject: [PATCH 2209/3804] net: kcm: fix memory leak in kcm_sendmsg

Syzbot reported memory leak in kcm_sendmsg()[1].
The problem was in non-freed frag_list in case of error.

In the while loop:

	if (head == skb)
		skb_shinfo(head)->frag_list = tskb;
	else
		skb->next = tskb;

frag_list filled with skbs, but nothing was freeing them.

backtrace:
  [<0000000094c02615>] __alloc_skb+0x5e/0x250 net/core/skbuff.c:198
  [<00000000e5386cbd>] alloc_skb include/linux/skbuff.h:1083 [inline]
  [<00000000e5386cbd>] kcm_sendmsg+0x3b6/0xa50 net/kcm/kcmsock.c:967 [1]
  [<00000000f1613a8a>] sock_sendmsg_nosec net/socket.c:652 [inline]
  [<00000000f1613a8a>] sock_sendmsg+0x4c/0x60 net/socket.c:672

Reported-and-tested-by: syzbot+b039f5699bd82e1fb011@syzkaller.appspotmail.com
Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 6201965bd822f..1c572c8daced0 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1066,6 +1066,11 @@ out_error:
 		goto partial_message;
 	}
 
+	if (skb_has_frag_list(head)) {
+		kfree_skb_list(skb_shinfo(head)->frag_list);
+		skb_shinfo(head)->frag_list = NULL;
+	}
+
 	if (head != kcm->seq_skb)
 		kfree_skb(head);
 
-- 
GitLab


From bce130e7f392ddde8cfcb09927808ebd5f9c8669 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Thu, 3 Jun 2021 19:38:12 +0300
Subject: [PATCH 2210/3804] net: caif: added cfserl_release function

Added cfserl_release() function.

Cc: stable@vger.kernel.org
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfserl.h | 1 +
 net/caif/cfserl.c         | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h
index 14a55e03bb3ce..67cce8757175a 100644
--- a/include/net/caif/cfserl.h
+++ b/include/net/caif/cfserl.h
@@ -9,4 +9,5 @@
 #include <net/caif/caif_layer.h>
 
 struct cflayer *cfserl_create(int instance, bool use_stx);
+void cfserl_release(struct cflayer *layer);
 #endif
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index e11725a4bb0ed..40cd57ad0a0f4 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -31,6 +31,11 @@ static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
 static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 			   int phyid);
 
+void cfserl_release(struct cflayer *layer)
+{
+	kfree(layer);
+}
+
 struct cflayer *cfserl_create(int instance, bool use_stx)
 {
 	struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC);
-- 
GitLab


From a2805dca5107d5603f4bbc027e81e20d93476e96 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Thu, 3 Jun 2021 19:38:51 +0300
Subject: [PATCH 2211/3804] net: caif: add proper error handling

caif_enroll_dev() can fail in some cases. Ingnoring
these cases can lead to memory leak due to not assigning
link_support pointer to anywhere.

Fixes: 7c18d2205ea7 ("caif: Restructure how link caif link layer enroll")
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/caif_dev.h |  2 +-
 include/net/caif/cfcnfg.h   |  2 +-
 net/caif/caif_dev.c         |  8 +++++---
 net/caif/cfcnfg.c           | 16 +++++++++++-----
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
index 48ecca8530ffa..b655d8666f555 100644
--- a/include/net/caif/caif_dev.h
+++ b/include/net/caif/caif_dev.h
@@ -119,7 +119,7 @@ void caif_free_client(struct cflayer *adap_layer);
  * The link_support layer is used to add any Link Layer specific
  * framing.
  */
-void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
+int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
 			struct cflayer *link_support, int head_room,
 			struct cflayer **layer, int (**rcv_func)(
 				struct sk_buff *, struct net_device *,
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
index 2aa5e91d84576..8819ff4db35a6 100644
--- a/include/net/caif/cfcnfg.h
+++ b/include/net/caif/cfcnfg.h
@@ -62,7 +62,7 @@ void cfcnfg_remove(struct cfcnfg *cfg);
  * @fcs:	Specify if checksum is used in CAIF Framing Layer.
  * @head_room:	Head space needed by link specific protocol.
  */
-void
+int
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
 		     struct net_device *dev, struct cflayer *phy_layer,
 		     enum cfcnfg_phy_preference pref,
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index c10e5a55758d2..fffbe41440b3d 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -308,7 +308,7 @@ static void dev_flowctrl(struct net_device *dev, int on)
 	caifd_put(caifd);
 }
 
-void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
+int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
 		     struct cflayer *link_support, int head_room,
 		     struct cflayer **layer,
 		     int (**rcv_func)(struct sk_buff *, struct net_device *,
@@ -319,11 +319,12 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
 	enum cfcnfg_phy_preference pref;
 	struct cfcnfg *cfg = get_cfcnfg(dev_net(dev));
 	struct caif_device_entry_list *caifdevs;
+	int res;
 
 	caifdevs = caif_device_list(dev_net(dev));
 	caifd = caif_device_alloc(dev);
 	if (!caifd)
-		return;
+		return -ENOMEM;
 	*layer = &caifd->layer;
 	spin_lock_init(&caifd->flow_lock);
 
@@ -344,7 +345,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
 	strlcpy(caifd->layer.name, dev->name,
 		sizeof(caifd->layer.name));
 	caifd->layer.transmit = transmit;
-	cfcnfg_add_phy_layer(cfg,
+	res = cfcnfg_add_phy_layer(cfg,
 				dev,
 				&caifd->layer,
 				pref,
@@ -354,6 +355,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
 	mutex_unlock(&caifdevs->lock);
 	if (rcv_func)
 		*rcv_func = receive;
+	return res;
 }
 EXPORT_SYMBOL(caif_enroll_dev);
 
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 399239a14420f..cac30e676ac94 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -450,7 +450,7 @@ unlock:
 	rcu_read_unlock();
 }
 
-void
+int
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
 		     struct net_device *dev, struct cflayer *phy_layer,
 		     enum cfcnfg_phy_preference pref,
@@ -459,7 +459,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
 {
 	struct cflayer *frml;
 	struct cfcnfg_phyinfo *phyinfo = NULL;
-	int i;
+	int i, res = 0;
 	u8 phyid;
 
 	mutex_lock(&cnfg->lock);
@@ -473,12 +473,15 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
 			goto got_phyid;
 	}
 	pr_warn("Too many CAIF Link Layers (max 6)\n");
+	res = -EEXIST;
 	goto out;
 
 got_phyid:
 	phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
-	if (!phyinfo)
+	if (!phyinfo) {
+		res = -ENOMEM;
 		goto out_err;
+	}
 
 	phy_layer->id = phyid;
 	phyinfo->pref = pref;
@@ -492,8 +495,10 @@ got_phyid:
 
 	frml = cffrml_create(phyid, fcs);
 
-	if (!frml)
+	if (!frml) {
+		res = -ENOMEM;
 		goto out_err;
+	}
 	phyinfo->frm_layer = frml;
 	layer_set_up(frml, cnfg->mux);
 
@@ -511,11 +516,12 @@ got_phyid:
 	list_add_rcu(&phyinfo->node, &cnfg->phys);
 out:
 	mutex_unlock(&cnfg->lock);
-	return;
+	return res;
 
 out_err:
 	kfree(phyinfo);
 	mutex_unlock(&cnfg->lock);
+	return res;
 }
 EXPORT_SYMBOL(cfcnfg_add_phy_layer);
 
-- 
GitLab


From b53558a950a89824938e9811eddfc8efcd94e1bb Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Thu, 3 Jun 2021 19:39:11 +0300
Subject: [PATCH 2212/3804] net: caif: fix memory leak in caif_device_notify

In case of caif_enroll_dev() fail, allocated
link_support won't be assigned to the corresponding
structure. So simply free allocated pointer in case
of error

Fixes: 7c18d2205ea7 ("caif: Restructure how link caif link layer enroll")
Cc: stable@vger.kernel.org
Reported-and-tested-by: syzbot+7ec324747ce876a29db6@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index fffbe41440b3d..440139706130a 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -370,6 +370,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 	struct cflayer *layer, *link_support;
 	int head_room = 0;
 	struct caif_device_entry_list *caifdevs;
+	int res;
 
 	cfg = get_cfcnfg(dev_net(dev));
 	caifdevs = caif_device_list(dev_net(dev));
@@ -395,8 +396,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 				break;
 			}
 		}
-		caif_enroll_dev(dev, caifdev, link_support, head_room,
+		res = caif_enroll_dev(dev, caifdev, link_support, head_room,
 				&layer, NULL);
+		if (res)
+			cfserl_release(link_support);
 		caifdev->flowctrl = dev_flowctrl;
 		break;
 
-- 
GitLab


From 7f5d86669fa4d485523ddb1d212e0a2d90bd62bb Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Thu, 3 Jun 2021 19:39:35 +0300
Subject: [PATCH 2213/3804] net: caif: fix memory leak in cfusbl_device_notify

In case of caif_enroll_dev() fail, allocated
link_support won't be assigned to the corresponding
structure. So simply free allocated pointer in case
of error.

Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface")
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_usb.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index a0116b9503d9d..b02e1292f7f19 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -115,6 +115,11 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN],
 	return (struct cflayer *) this;
 }
 
+static void cfusbl_release(struct cflayer *layer)
+{
+	kfree(layer);
+}
+
 static struct packet_type caif_usb_type __read_mostly = {
 	.type = cpu_to_be16(ETH_P_802_EX1),
 };
@@ -127,6 +132,7 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
 	struct cflayer *layer, *link_support;
 	struct usbnet *usbnet;
 	struct usb_device *usbdev;
+	int res;
 
 	/* Check whether we have a NCM device, and find its VID/PID. */
 	if (!(dev->dev.parent && dev->dev.parent->driver &&
@@ -169,8 +175,11 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
 	if (dev->num_tx_queues > 1)
 		pr_warn("USB device uses more than one tx queue\n");
 
-	caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
+	res = caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
 			&layer, &caif_usb_type.func);
+	if (res)
+		goto err;
+
 	if (!pack_added)
 		dev_add_pack(&caif_usb_type);
 	pack_added = true;
@@ -178,6 +187,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
 	strlcpy(layer->name, dev->name, sizeof(layer->name));
 
 	return 0;
+err:
+	cfusbl_release(link_support);
+	return res;
 }
 
 static struct notifier_block caif_device_notifier = {
-- 
GitLab


From a27fb314cba8cb84cd6456a4699c3330a83c326d Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Wed, 2 Jun 2021 19:38:59 +0530
Subject: [PATCH 2214/3804] cxgb4: fix regression with HASH tc prio value
 update

commit db43b30cd89c ("cxgb4: add ethtool n-tuple filter deletion")
has moved searching for next highest priority HASH filter rule to
cxgb4_flow_rule_destroy(), which searches the rhashtable before the
the rule is removed from it and hence always finds at least 1 entry.
Fix by removing the rule from rhashtable first before calling
cxgb4_flow_rule_destroy() and hence avoid fetching stale info.

Fixes: db43b30cd89c ("cxgb4: add ethtool n-tuple filter deletion")
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c   | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 1b88bd1c2dbe4..dd9be229819a5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -997,20 +997,16 @@ int cxgb4_tc_flower_destroy(struct net_device *dev,
 	if (!ch_flower)
 		return -ENOENT;
 
+	rhashtable_remove_fast(&adap->flower_tbl, &ch_flower->node,
+			       adap->flower_ht_params);
+
 	ret = cxgb4_flow_rule_destroy(dev, ch_flower->fs.tc_prio,
 				      &ch_flower->fs, ch_flower->filter_id);
 	if (ret)
-		goto err;
+		netdev_err(dev, "Flow rule destroy failed for tid: %u, ret: %d",
+			   ch_flower->filter_id, ret);
 
-	ret = rhashtable_remove_fast(&adap->flower_tbl, &ch_flower->node,
-				     adap->flower_ht_params);
-	if (ret) {
-		netdev_err(dev, "Flow remove from rhashtable failed");
-		goto err;
-	}
 	kfree_rcu(ch_flower, rcu);
-
-err:
 	return ret;
 }
 
-- 
GitLab


From d7736958668c4facc15f421e622ffd718f5be80a Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 22:06:30 +0800
Subject: [PATCH 2215/3804] net/x25: Return the correct errno code

When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 44d6566dd23e4..1816899499ce8 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -536,7 +536,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
 	if (protocol)
 		goto out;
 
-	rc = -ENOBUFS;
+	rc = -ENOMEM;
 	if ((sk = x25_alloc_socket(net, kern)) == NULL)
 		goto out;
 
-- 
GitLab


From 49251cd00228a3c983651f6bb2f33f6a0b8f152e Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 22:06:40 +0800
Subject: [PATCH 2216/3804] net: Return the correct errno code

When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/compat.c b/net/compat.c
index ddd15af3a2837..210fc3b4d0d83 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -177,7 +177,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 	if (kcmlen > stackbuf_size)
 		kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL);
 	if (kcmsg == NULL)
-		return -ENOBUFS;
+		return -ENOMEM;
 
 	/* Now copy them over neatly. */
 	memset(kcmsg, 0, kcmlen);
-- 
GitLab


From 59607863c54e9eb3f69afc5257dfe71c38bb751e Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Wed, 2 Jun 2021 22:06:58 +0800
Subject: [PATCH 2217/3804] fib: Return the correct errno code

When kalloc or kmemdup failed, should return ENOMEM rather than ENOBUF.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index cd80ffed6d267..a9f9379750802 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1168,7 +1168,7 @@ static void notify_rule_change(int event, struct fib_rule *rule,
 {
 	struct net *net;
 	struct sk_buff *skb;
-	int err = -ENOBUFS;
+	int err = -ENOMEM;
 
 	net = ops->fro_net;
 	skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
-- 
GitLab


From 821bbf79fe46a8b1d18aa456e8ed0a3c208c3754 Mon Sep 17 00:00:00 2001
From: Coco Li <lixiaoyan@google.com>
Date: Thu, 3 Jun 2021 07:32:58 +0000
Subject: [PATCH 2218/3804] ipv6: Fix KASAN: slab-out-of-bounds Read in
 fib6_nh_flush_exceptions

Reported by syzbot:
HEAD commit:    90c911ad Merge tag 'fixes' of git://git.kernel.org/pub/scm..
git tree:       git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
dashboard link: https://syzkaller.appspot.com/bug?extid=123aa35098fd3c000eb7
compiler:       Debian clang version 11.0.1-2

==================================================================
BUG: KASAN: slab-out-of-bounds in fib6_nh_get_excptn_bucket net/ipv6/route.c:1604 [inline]
BUG: KASAN: slab-out-of-bounds in fib6_nh_flush_exceptions+0xbd/0x360 net/ipv6/route.c:1732
Read of size 8 at addr ffff8880145c78f8 by task syz-executor.4/17760

CPU: 0 PID: 17760 Comm: syz-executor.4 Not tainted 5.12.0-rc8-syzkaller #0
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0x202/0x31e lib/dump_stack.c:120
 print_address_description+0x5f/0x3b0 mm/kasan/report.c:232
 __kasan_report mm/kasan/report.c:399 [inline]
 kasan_report+0x15c/0x200 mm/kasan/report.c:416
 fib6_nh_get_excptn_bucket net/ipv6/route.c:1604 [inline]
 fib6_nh_flush_exceptions+0xbd/0x360 net/ipv6/route.c:1732
 fib6_nh_release+0x9a/0x430 net/ipv6/route.c:3536
 fib6_info_destroy_rcu+0xcb/0x1c0 net/ipv6/ip6_fib.c:174
 rcu_do_batch kernel/rcu/tree.c:2559 [inline]
 rcu_core+0x8f6/0x1450 kernel/rcu/tree.c:2794
 __do_softirq+0x372/0x7a6 kernel/softirq.c:345
 invoke_softirq kernel/softirq.c:221 [inline]
 __irq_exit_rcu+0x22c/0x260 kernel/softirq.c:422
 irq_exit_rcu+0x5/0x20 kernel/softirq.c:434
 sysvec_apic_timer_interrupt+0x91/0xb0 arch/x86/kernel/apic/apic.c:1100
 </IRQ>
 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:632
RIP: 0010:lock_acquire+0x1f6/0x720 kernel/locking/lockdep.c:5515
Code: f6 84 24 a1 00 00 00 02 0f 85 8d 02 00 00 f7 c3 00 02 00 00 49 bd 00 00 00 00 00 fc ff df 74 01 fb 48 c7 44 24 40 0e 36 e0 45 <4b> c7 44 3d 00 00 00 00 00 4b c7 44 3d 09 00 00 00 00 43 c7 44 3d
RSP: 0018:ffffc90009e06560 EFLAGS: 00000206
RAX: 1ffff920013c0cc0 RBX: 0000000000000246 RCX: dffffc0000000000
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffffc90009e066e0 R08: dffffc0000000000 R09: fffffbfff1f992b1
R10: fffffbfff1f992b1 R11: 0000000000000000 R12: 0000000000000000
R13: dffffc0000000000 R14: 0000000000000000 R15: 1ffff920013c0cb4
 rcu_lock_acquire+0x2a/0x30 include/linux/rcupdate.h:267
 rcu_read_lock include/linux/rcupdate.h:656 [inline]
 ext4_get_group_info+0xea/0x340 fs/ext4/ext4.h:3231
 ext4_mb_prefetch+0x123/0x5d0 fs/ext4/mballoc.c:2212
 ext4_mb_regular_allocator+0x8a5/0x28f0 fs/ext4/mballoc.c:2379
 ext4_mb_new_blocks+0xc6e/0x24f0 fs/ext4/mballoc.c:4982
 ext4_ext_map_blocks+0x2be3/0x7210 fs/ext4/extents.c:4238
 ext4_map_blocks+0xab3/0x1cb0 fs/ext4/inode.c:638
 ext4_getblk+0x187/0x6c0 fs/ext4/inode.c:848
 ext4_bread+0x2a/0x1c0 fs/ext4/inode.c:900
 ext4_append+0x1a4/0x360 fs/ext4/namei.c:67
 ext4_init_new_dir+0x337/0xa10 fs/ext4/namei.c:2768
 ext4_mkdir+0x4b8/0xc00 fs/ext4/namei.c:2814
 vfs_mkdir+0x45b/0x640 fs/namei.c:3819
 ovl_do_mkdir fs/overlayfs/overlayfs.h:161 [inline]
 ovl_mkdir_real+0x53/0x1a0 fs/overlayfs/dir.c:146
 ovl_create_real+0x280/0x490 fs/overlayfs/dir.c:193
 ovl_workdir_create+0x425/0x600 fs/overlayfs/super.c:788
 ovl_make_workdir+0xed/0x1140 fs/overlayfs/super.c:1355
 ovl_get_workdir fs/overlayfs/super.c:1492 [inline]
 ovl_fill_super+0x39ee/0x5370 fs/overlayfs/super.c:2035
 mount_nodev+0x52/0xe0 fs/super.c:1413
 legacy_get_tree+0xea/0x180 fs/fs_context.c:592
 vfs_get_tree+0x86/0x270 fs/super.c:1497
 do_new_mount fs/namespace.c:2903 [inline]
 path_mount+0x196f/0x2be0 fs/namespace.c:3233
 do_mount fs/namespace.c:3246 [inline]
 __do_sys_mount fs/namespace.c:3454 [inline]
 __se_sys_mount+0x2f9/0x3b0 fs/namespace.c:3431
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x4665f9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f68f2b87188 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
RAX: ffffffffffffffda RBX: 000000000056bf60 RCX: 00000000004665f9
RDX: 00000000200000c0 RSI: 0000000020000000 RDI: 000000000040000a
RBP: 00000000004bfbb9 R08: 0000000020000100 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000056bf60
R13: 00007ffe19002dff R14: 00007f68f2b87300 R15: 0000000000022000

Allocated by task 17768:
 kasan_save_stack mm/kasan/common.c:38 [inline]
 kasan_set_track mm/kasan/common.c:46 [inline]
 set_alloc_info mm/kasan/common.c:427 [inline]
 ____kasan_kmalloc+0xc2/0xf0 mm/kasan/common.c:506
 kasan_kmalloc include/linux/kasan.h:233 [inline]
 __kmalloc+0xb4/0x380 mm/slub.c:4055
 kmalloc include/linux/slab.h:559 [inline]
 kzalloc include/linux/slab.h:684 [inline]
 fib6_info_alloc+0x2c/0xd0 net/ipv6/ip6_fib.c:154
 ip6_route_info_create+0x55d/0x1a10 net/ipv6/route.c:3638
 ip6_route_add+0x22/0x120 net/ipv6/route.c:3728
 inet6_rtm_newroute+0x2cd/0x2260 net/ipv6/route.c:5352
 rtnetlink_rcv_msg+0xb34/0xe70 net/core/rtnetlink.c:5553
 netlink_rcv_skb+0x1f0/0x460 net/netlink/af_netlink.c:2502
 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline]
 netlink_unicast+0x7de/0x9b0 net/netlink/af_netlink.c:1338
 netlink_sendmsg+0xaa6/0xe90 net/netlink/af_netlink.c:1927
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg net/socket.c:674 [inline]
 ____sys_sendmsg+0x5a2/0x900 net/socket.c:2350
 ___sys_sendmsg net/socket.c:2404 [inline]
 __sys_sendmsg+0x319/0x400 net/socket.c:2433
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Last potentially related work creation:
 kasan_save_stack+0x27/0x50 mm/kasan/common.c:38
 kasan_record_aux_stack+0xee/0x120 mm/kasan/generic.c:345
 __call_rcu kernel/rcu/tree.c:3039 [inline]
 call_rcu+0x1b1/0xa30 kernel/rcu/tree.c:3114
 fib6_info_release include/net/ip6_fib.h:337 [inline]
 ip6_route_info_create+0x10c4/0x1a10 net/ipv6/route.c:3718
 ip6_route_add+0x22/0x120 net/ipv6/route.c:3728
 inet6_rtm_newroute+0x2cd/0x2260 net/ipv6/route.c:5352
 rtnetlink_rcv_msg+0xb34/0xe70 net/core/rtnetlink.c:5553
 netlink_rcv_skb+0x1f0/0x460 net/netlink/af_netlink.c:2502
 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline]
 netlink_unicast+0x7de/0x9b0 net/netlink/af_netlink.c:1338
 netlink_sendmsg+0xaa6/0xe90 net/netlink/af_netlink.c:1927
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg net/socket.c:674 [inline]
 ____sys_sendmsg+0x5a2/0x900 net/socket.c:2350
 ___sys_sendmsg net/socket.c:2404 [inline]
 __sys_sendmsg+0x319/0x400 net/socket.c:2433
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Second to last potentially related work creation:
 kasan_save_stack+0x27/0x50 mm/kasan/common.c:38
 kasan_record_aux_stack+0xee/0x120 mm/kasan/generic.c:345
 insert_work+0x54/0x400 kernel/workqueue.c:1331
 __queue_work+0x981/0xcc0 kernel/workqueue.c:1497
 queue_work_on+0x111/0x200 kernel/workqueue.c:1524
 queue_work include/linux/workqueue.h:507 [inline]
 call_usermodehelper_exec+0x283/0x470 kernel/umh.c:433
 kobject_uevent_env+0x1349/0x1730 lib/kobject_uevent.c:617
 kvm_uevent_notify_change+0x309/0x3b0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:4809
 kvm_destroy_vm arch/x86/kvm/../../../virt/kvm/kvm_main.c:877 [inline]
 kvm_put_kvm+0x9c/0xd10 arch/x86/kvm/../../../virt/kvm/kvm_main.c:920
 kvm_vcpu_release+0x53/0x60 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3120
 __fput+0x352/0x7b0 fs/file_table.c:280
 task_work_run+0x146/0x1c0 kernel/task_work.c:140
 tracehook_notify_resume include/linux/tracehook.h:189 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:174 [inline]
 exit_to_user_mode_prepare+0x10b/0x1e0 kernel/entry/common.c:208
 __syscall_exit_to_user_mode_work kernel/entry/common.c:290 [inline]
 syscall_exit_to_user_mode+0x26/0x70 kernel/entry/common.c:301
 entry_SYSCALL_64_after_hwframe+0x44/0xae

The buggy address belongs to the object at ffff8880145c7800
 which belongs to the cache kmalloc-192 of size 192
The buggy address is located 56 bytes to the right of
 192-byte region [ffff8880145c7800, ffff8880145c78c0)
The buggy address belongs to the page:
page:ffffea00005171c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x145c7
flags: 0xfff00000000200(slab)
raw: 00fff00000000200 ffffea00006474c0 0000000200000002 ffff888010c41a00
raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8880145c7780: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
 ffff8880145c7800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>ffff8880145c7880: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc
                                                                ^
 ffff8880145c7900: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff8880145c7980: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
==================================================================

In the ip6_route_info_create function, in the case that the nh pointer
is not NULL, the fib6_nh in fib6_info has not been allocated.
Therefore, when trying to free fib6_info in this error case using
fib6_info_release, the function will call fib6_info_destroy_rcu,
which it will access fib6_nh_release(f6i->fib6_nh);
However, f6i->fib6_nh doesn't have any refcount yet given the lack of allocation
causing the reported memory issue above.
Therefore, releasing the empty pointer directly instead would be the solution.

Fixes: f88d8ea67fbdb ("ipv6: Plumb support for nexthop object in a fib6_info")
Fixes: 706ec91916462 ("ipv6: Fix nexthop refcnt leak when creating ipv6 route info")
Signed-off-by: Coco Li <lixiaoyan@google.com>
Cc: David Ahern <dsahern@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a22822bdbf39c..d417e514bd52c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3673,11 +3673,11 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 	if (nh) {
 		if (rt->fib6_src.plen) {
 			NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
-			goto out;
+			goto out_free;
 		}
 		if (!nexthop_get(nh)) {
 			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
-			goto out;
+			goto out_free;
 		}
 		rt->nh = nh;
 		fib6_nh = nexthop_fib6_nh(rt->nh);
@@ -3714,6 +3714,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
 out:
 	fib6_info_release(rt);
 	return ERR_PTR(err);
+out_free:
+	ip_fib_metrics_put(rt->fib6_metrics);
+	kfree(rt);
+	return ERR_PTR(err);
 }
 
 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
-- 
GitLab


From 1a8024239dacf53fcf39c0f07fbf2712af22864f Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Fri, 4 Jun 2021 01:09:01 +0800
Subject: [PATCH 2219/3804] virtio-net: fix for skb_over_panic inside big mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In virtio-net's large packet mode, there is a hole in the space behind
buf.

    hdr_padded_len - hdr_len

We must take this into account when calculating tailroom.

[   44.544385] skb_put.cold (net/core/skbuff.c:5254 (discriminator 1) net/core/skbuff.c:5252 (discriminator 1))
[   44.544864] page_to_skb (drivers/net/virtio_net.c:485) [   44.545361] receive_buf (drivers/net/virtio_net.c:849 drivers/net/virtio_net.c:1131)
[   44.545870] ? netif_receive_skb_list_internal (net/core/dev.c:5714)
[   44.546628] ? dev_gro_receive (net/core/dev.c:6103)
[   44.547135] ? napi_complete_done (./include/linux/list.h:35 net/core/dev.c:5867 net/core/dev.c:5862 net/core/dev.c:6565)
[   44.547672] virtnet_poll (drivers/net/virtio_net.c:1427 drivers/net/virtio_net.c:1525)
[   44.548251] __napi_poll (net/core/dev.c:6985)
[   44.548744] net_rx_action (net/core/dev.c:7054 net/core/dev.c:7139)
[   44.549264] __do_softirq (./arch/x86/include/asm/jump_label.h:19 ./include/linux/jump_label.h:200 ./include/trace/events/irq.h:142 kernel/softirq.c:560)
[   44.549762] irq_exit_rcu (kernel/softirq.c:433 kernel/softirq.c:637 kernel/softirq.c:649)
[   44.551384] common_interrupt (arch/x86/kernel/irq.c:240 (discriminator 13))
[   44.551991] ? asm_common_interrupt (./arch/x86/include/asm/idtentry.h:638)
[   44.552654] asm_common_interrupt (./arch/x86/include/asm/idtentry.h:638)

Fixes: fb32856b16ad ("virtio-net: page_to_skb() use build_skb when there's sufficient tailroom")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Reported-by: Corentin Noël <corentin.noel@collabora.com>
Tested-by: Corentin Noël <corentin.noel@collabora.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fa407eb8b457a..78a01c71a17cf 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -406,7 +406,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	 * add_recvbuf_mergeable() + get_mergeable_buf_len()
 	 */
 	truesize = headroom ? PAGE_SIZE : truesize;
-	tailroom = truesize - len - headroom;
+	tailroom = truesize - len - headroom - (hdr_padded_len - hdr_len);
 	buf = p - headroom;
 
 	len -= hdr_len;
-- 
GitLab


From f49efb108aa4408feeca51ea4b4486075624017b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:35:33 -0600
Subject: [PATCH 2220/3804] drm/nouveau: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a couple
of warnings by explicitly adding a couple of break statements instead
of letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c        | 1 +
 drivers/gpu/drm/nouveau/nouveau_connector.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 3e09df0472ce4..c3ee3007bc45a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -440,6 +440,7 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t domain, bool contig)
 			break;
 		case TTM_PL_TT:
 			error |= !(domain & NOUVEAU_GEM_DOMAIN_GART);
+			break;
 		default:
 			break;
 		}
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 61e6d7412505a..eb844cdcaec24 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -157,6 +157,7 @@ nouveau_conn_atomic_set_property(struct drm_connector *connector,
 			default:
 				break;
 			}
+			break;
 		case DRM_MODE_SCALE_FULLSCREEN:
 		case DRM_MODE_SCALE_CENTER:
 		case DRM_MODE_SCALE_ASPECT:
-- 
GitLab


From e0e6f9b2a329c2672391fab435240c221d04641c Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:35:38 -0600
Subject: [PATCH 2221/3804] drm/nouveau/therm: Fix fall-through warnings for
 Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c
index 2b031d4eaeb68..684aff7437eef 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c
@@ -41,6 +41,7 @@ pwm_info(struct nvkm_therm *therm, int line)
 		default:
 			break;
 		}
+		break;
 	default:
 		break;
 	}
-- 
GitLab


From 0850bf2e5ce411f7c1e2879d72d80253cd8db261 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:35:28 -0600
Subject: [PATCH 2222/3804] drm/nouveau/clk: Fix fall-through warnings for
 Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
index 83067763c0ecf..e1d31c62f9ece 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
@@ -313,6 +313,7 @@ nv50_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 		default:
 			break;
 		}
+		break;
 	default:
 		break;
 	}
-- 
GitLab


From cd40407a8a018d43fdb05c84a76af96f8bce9ac2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 1 Jun 2021 11:27:04 +0200
Subject: [PATCH 2223/3804] media: ivtv: get rid of DVB deprecated ioctls

The ivtv driver has gained support a long time ago for audio
and video settings via V4L2 API.

Let's drop support of the duplicated controls implemented
abusing the DVB API.

Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/ivtv/Kconfig       |  12 --
 drivers/media/pci/ivtv/ivtv-driver.h |   2 -
 drivers/media/pci/ivtv/ivtv-ioctl.c  | 221 ---------------------------
 3 files changed, 235 deletions(-)

diff --git a/drivers/media/pci/ivtv/Kconfig b/drivers/media/pci/ivtv/Kconfig
index c729e54692c47..e70502902b73c 100644
--- a/drivers/media/pci/ivtv/Kconfig
+++ b/drivers/media/pci/ivtv/Kconfig
@@ -29,18 +29,6 @@ config VIDEO_IVTV
 	  To compile this driver as a module, choose M here: the
 	  module will be called ivtv.
 
-config VIDEO_IVTV_DEPRECATED_IOCTLS
-	bool "enable the DVB ioctls abuse on ivtv driver"
-	depends on VIDEO_IVTV
-	help
-	  Enable the usage of the a DVB set of ioctls that were abused by
-	  IVTV driver for a while.
-
-	  Those ioctls were not needed for a long time, as IVTV implements
-	  the proper V4L2 ioctls since kernel 3.3.
-
-	  If unsure, say N.
-
 config VIDEO_IVTV_ALSA
 	tristate "Conexant cx23415/cx23416 ALSA interface for PCM audio capture"
 	depends on VIDEO_IVTV && SND
diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h
index e5efe525ad7bf..4cf92dee65271 100644
--- a/drivers/media/pci/ivtv/ivtv-driver.h
+++ b/drivers/media/pci/ivtv/ivtv-driver.h
@@ -57,8 +57,6 @@
 #include <linux/uaccess.h>
 #include <asm/byteorder.h>
 
-#include <linux/dvb/video.h>
-#include <linux/dvb/audio.h>
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
 #include <media/v4l2-ctrls.h>
diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c
index 35dccb31174c1..da19b2e95e6cf 100644
--- a/drivers/media/pci/ivtv/ivtv-ioctl.c
+++ b/drivers/media/pci/ivtv/ivtv-ioctl.c
@@ -23,11 +23,6 @@
 #include <media/i2c/saa7127.h>
 #include <media/tveeprom.h>
 #include <media/v4l2-event.h>
-#ifdef CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS
-#include <linux/compat.h>
-#include <linux/dvb/audio.h>
-#include <linux/dvb/video.h>
-#endif
 
 u16 ivtv_service2vbi(int type)
 {
@@ -1606,38 +1601,11 @@ static int ivtv_try_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder
 	return ivtv_video_command(itv, id, dec, true);
 }
 
-#ifdef CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS
-static __inline__ void warn_deprecated_ioctl(const char *name)
-{
-	pr_warn_once("warning: the %s ioctl is deprecated. Don't use it, as it will be removed soon\n",
-		     name);
-}
-
-#ifdef CONFIG_COMPAT
-struct compat_video_event {
-	__s32 type;
-	/* unused, make sure to use atomic time for y2038 if it ever gets used */
-	compat_long_t timestamp;
-	union {
-		video_size_t size;
-		unsigned int frame_rate;        /* in frames per 1000sec */
-		unsigned char vsync_field;      /* unknown/odd/even/progressive */
-	} u;
-};
-#define VIDEO_GET_EVENT32 _IOR('o', 28, struct compat_video_event)
-#endif
-
-#endif
-
 static int ivtv_decoder_ioctls(struct file *filp, unsigned int cmd, void *arg)
 {
 	struct ivtv_open_id *id = fh2id(filp->private_data);
 	struct ivtv *itv = id->itv;
 	struct ivtv_stream *s = &itv->streams[id->type];
-#ifdef CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS
-	int nonblocking = filp->f_flags & O_NONBLOCK;
-	unsigned long iarg = (unsigned long)arg;
-#endif
 
 	switch (cmd) {
 	case IVTV_IOC_DMA_FRAME: {
@@ -1669,169 +1637,6 @@ static int ivtv_decoder_ioctls(struct file *filp, unsigned int cmd, void *arg)
 		if (!(itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT))
 			return -EINVAL;
 		return ivtv_passthrough_mode(itv, *(int *)arg != 0);
-#ifdef CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS
-	case VIDEO_GET_PTS: {
-		s64 *pts = arg;
-		s64 frame;
-
-		warn_deprecated_ioctl("VIDEO_GET_PTS");
-		if (s->type < IVTV_DEC_STREAM_TYPE_MPG) {
-			*pts = s->dma_pts;
-			break;
-		}
-		if (!(itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT))
-			return -EINVAL;
-		return ivtv_g_pts_frame(itv, pts, &frame);
-	}
-
-	case VIDEO_GET_FRAME_COUNT: {
-		s64 *frame = arg;
-		s64 pts;
-
-		warn_deprecated_ioctl("VIDEO_GET_FRAME_COUNT");
-		if (s->type < IVTV_DEC_STREAM_TYPE_MPG) {
-			*frame = 0;
-			break;
-		}
-		if (!(itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT))
-			return -EINVAL;
-		return ivtv_g_pts_frame(itv, &pts, frame);
-	}
-
-	case VIDEO_PLAY: {
-		struct v4l2_decoder_cmd dc;
-
-		warn_deprecated_ioctl("VIDEO_PLAY");
-		memset(&dc, 0, sizeof(dc));
-		dc.cmd = V4L2_DEC_CMD_START;
-		return ivtv_video_command(itv, id, &dc, 0);
-	}
-
-	case VIDEO_STOP: {
-		struct v4l2_decoder_cmd dc;
-
-		warn_deprecated_ioctl("VIDEO_STOP");
-		memset(&dc, 0, sizeof(dc));
-		dc.cmd = V4L2_DEC_CMD_STOP;
-		dc.flags = V4L2_DEC_CMD_STOP_TO_BLACK | V4L2_DEC_CMD_STOP_IMMEDIATELY;
-		return ivtv_video_command(itv, id, &dc, 0);
-	}
-
-	case VIDEO_FREEZE: {
-		struct v4l2_decoder_cmd dc;
-
-		warn_deprecated_ioctl("VIDEO_FREEZE");
-		memset(&dc, 0, sizeof(dc));
-		dc.cmd = V4L2_DEC_CMD_PAUSE;
-		return ivtv_video_command(itv, id, &dc, 0);
-	}
-
-	case VIDEO_CONTINUE: {
-		struct v4l2_decoder_cmd dc;
-
-		warn_deprecated_ioctl("VIDEO_CONTINUE");
-		memset(&dc, 0, sizeof(dc));
-		dc.cmd = V4L2_DEC_CMD_RESUME;
-		return ivtv_video_command(itv, id, &dc, 0);
-	}
-
-	case VIDEO_COMMAND:
-	case VIDEO_TRY_COMMAND: {
-		/* Note: struct v4l2_decoder_cmd has the same layout as
-		   struct video_command */
-		struct v4l2_decoder_cmd *dc = arg;
-		int try = (cmd == VIDEO_TRY_COMMAND);
-
-		if (try)
-			warn_deprecated_ioctl("VIDEO_TRY_COMMAND");
-		else
-			warn_deprecated_ioctl("VIDEO_COMMAND");
-		return ivtv_video_command(itv, id, dc, try);
-	}
-
-#ifdef CONFIG_COMPAT
-	case VIDEO_GET_EVENT32:
-#endif
-	case VIDEO_GET_EVENT: {
-#ifdef CONFIG_COMPAT
-		struct compat_video_event *ev32 = arg;
-#endif
-		struct video_event *ev = arg;
-		DEFINE_WAIT(wait);
-
-		warn_deprecated_ioctl("VIDEO_GET_EVENT");
-		if (!(itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT))
-			return -EINVAL;
-		memset(ev, 0, sizeof(*ev));
-		set_bit(IVTV_F_I_EV_VSYNC_ENABLED, &itv->i_flags);
-
-		while (1) {
-			if (test_and_clear_bit(IVTV_F_I_EV_DEC_STOPPED, &itv->i_flags))
-				ev->type = VIDEO_EVENT_DECODER_STOPPED;
-			else if (test_and_clear_bit(IVTV_F_I_EV_VSYNC, &itv->i_flags)) {
-				unsigned char vsync_field;
-
-				ev->type = VIDEO_EVENT_VSYNC;
-				vsync_field = test_bit(IVTV_F_I_EV_VSYNC_FIELD, &itv->i_flags) ?
-					VIDEO_VSYNC_FIELD_ODD : VIDEO_VSYNC_FIELD_EVEN;
-				if (itv->output_mode == OUT_UDMA_YUV &&
-					(itv->yuv_info.lace_mode & IVTV_YUV_MODE_MASK) ==
-								IVTV_YUV_MODE_PROGRESSIVE) {
-					vsync_field = VIDEO_VSYNC_FIELD_PROGRESSIVE;
-				}
-#ifdef CONFIG_COMPAT
-				if (cmd == VIDEO_GET_EVENT32)
-					ev32->u.vsync_field = vsync_field;
-				else
-#endif
-					ev->u.vsync_field = vsync_field;
-			}
-			if (ev->type)
-				return 0;
-			if (nonblocking)
-				return -EAGAIN;
-			/* Wait for event. Note that serialize_lock is locked,
-			   so to allow other processes to access the driver while
-			   we are waiting unlock first and later lock again. */
-			mutex_unlock(&itv->serialize_lock);
-			prepare_to_wait(&itv->event_waitq, &wait, TASK_INTERRUPTIBLE);
-			if (!test_bit(IVTV_F_I_EV_DEC_STOPPED, &itv->i_flags) &&
-			    !test_bit(IVTV_F_I_EV_VSYNC, &itv->i_flags))
-				schedule();
-			finish_wait(&itv->event_waitq, &wait);
-			mutex_lock(&itv->serialize_lock);
-			if (signal_pending(current)) {
-				/* return if a signal was received */
-				IVTV_DEBUG_INFO("User stopped wait for event\n");
-				return -EINTR;
-			}
-		}
-		break;
-	}
-
-	case VIDEO_SELECT_SOURCE:
-		warn_deprecated_ioctl("VIDEO_SELECT_SOURCE");
-		if (!(itv->v4l2_cap & V4L2_CAP_VIDEO_OUTPUT))
-			return -EINVAL;
-		return ivtv_passthrough_mode(itv, iarg == VIDEO_SOURCE_DEMUX);
-
-	case AUDIO_SET_MUTE:
-		warn_deprecated_ioctl("AUDIO_SET_MUTE");
-		itv->speed_mute_audio = iarg;
-		return 0;
-
-	case AUDIO_CHANNEL_SELECT:
-		warn_deprecated_ioctl("AUDIO_CHANNEL_SELECT");
-		if (iarg > AUDIO_STEREO_SWAPPED)
-			return -EINVAL;
-		return v4l2_ctrl_s_ctrl(itv->ctrl_audio_playback, iarg + 1);
-
-	case AUDIO_BILINGUAL_CHANNEL_SELECT:
-		warn_deprecated_ioctl("AUDIO_BILINGUAL_CHANNEL_SELECT");
-		if (iarg > AUDIO_STEREO_SWAPPED)
-			return -EINVAL;
-		return v4l2_ctrl_s_ctrl(itv->ctrl_audio_multilingual_playback, iarg + 1);
-#endif
 	default:
 		return -EINVAL;
 	}
@@ -1846,17 +1651,6 @@ static long ivtv_default(struct file *file, void *fh, bool valid_prio,
 	if (!valid_prio) {
 		switch (cmd) {
 		case IVTV_IOC_PASSTHROUGH_MODE:
-#ifdef CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS
-		case VIDEO_PLAY:
-		case VIDEO_STOP:
-		case VIDEO_FREEZE:
-		case VIDEO_CONTINUE:
-		case VIDEO_COMMAND:
-		case VIDEO_SELECT_SOURCE:
-		case AUDIO_SET_MUTE:
-		case AUDIO_CHANNEL_SELECT:
-		case AUDIO_BILINGUAL_CHANNEL_SELECT:
-#endif
 			return -EBUSY;
 		}
 	}
@@ -1874,21 +1668,6 @@ static long ivtv_default(struct file *file, void *fh, bool valid_prio,
 
 	case IVTV_IOC_DMA_FRAME:
 	case IVTV_IOC_PASSTHROUGH_MODE:
-#ifdef CONFIG_VIDEO_IVTV_DEPRECATED_IOCTLS
-	case VIDEO_GET_PTS:
-	case VIDEO_GET_FRAME_COUNT:
-	case VIDEO_GET_EVENT:
-	case VIDEO_PLAY:
-	case VIDEO_STOP:
-	case VIDEO_FREEZE:
-	case VIDEO_CONTINUE:
-	case VIDEO_COMMAND:
-	case VIDEO_TRY_COMMAND:
-	case VIDEO_SELECT_SOURCE:
-	case AUDIO_SET_MUTE:
-	case AUDIO_CHANNEL_SELECT:
-	case AUDIO_BILINGUAL_CHANNEL_SELECT:
-#endif
 		return ivtv_decoder_ioctls(file, cmd, (void *)arg);
 
 	default:
-- 
GitLab


From 819fbd3d8ef36c09576c2a0ffea503f5c46e9177 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 1 Jun 2021 11:31:30 +0200
Subject: [PATCH 2224/3804] media: dvb header files: move some headers to
 staging

The audio, video and OSD APIs are used upstream only by the
av7110 driver, which was moved to staging.

So, move the corresponding header files to it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../linux/dvb => drivers/staging/media/av7110}/audio.h     | 0
 drivers/staging/media/av7110/av7110.h                      | 7 ++++---
 .../uapi/linux/dvb => drivers/staging/media/av7110}/osd.h  | 0
 .../linux/dvb => drivers/staging/media/av7110}/video.h     | 0
 4 files changed, 4 insertions(+), 3 deletions(-)
 rename {include/uapi/linux/dvb => drivers/staging/media/av7110}/audio.h (100%)
 rename {include/uapi/linux/dvb => drivers/staging/media/av7110}/osd.h (100%)
 rename {include/uapi/linux/dvb => drivers/staging/media/av7110}/video.h (100%)

diff --git a/include/uapi/linux/dvb/audio.h b/drivers/staging/media/av7110/audio.h
similarity index 100%
rename from include/uapi/linux/dvb/audio.h
rename to drivers/staging/media/av7110/audio.h
diff --git a/drivers/staging/media/av7110/av7110.h b/drivers/staging/media/av7110/av7110.h
index 809d938ae1667..b8e8fc8ddbe9c 100644
--- a/drivers/staging/media/av7110/av7110.h
+++ b/drivers/staging/media/av7110/av7110.h
@@ -9,11 +9,12 @@
 #include <linux/input.h>
 #include <linux/time.h>
 
-#include <linux/dvb/video.h>
-#include <linux/dvb/audio.h>
+#include "video.h"
+#include "audio.h"
+#include "osd.h"
+
 #include <linux/dvb/dmx.h>
 #include <linux/dvb/ca.h>
-#include <linux/dvb/osd.h>
 #include <linux/dvb/net.h>
 #include <linux/mutex.h>
 
diff --git a/include/uapi/linux/dvb/osd.h b/drivers/staging/media/av7110/osd.h
similarity index 100%
rename from include/uapi/linux/dvb/osd.h
rename to drivers/staging/media/av7110/osd.h
diff --git a/include/uapi/linux/dvb/video.h b/drivers/staging/media/av7110/video.h
similarity index 100%
rename from include/uapi/linux/dvb/video.h
rename to drivers/staging/media/av7110/video.h
-- 
GitLab


From 793e52d4e77d49737ad83cb11925c98f4907fcb1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 1 Jun 2021 11:41:39 +0200
Subject: [PATCH 2225/3804] media: docs: move DVB audio/video docs to staging

The only upstream driver using the API described there is the
av7110 driver.

As the driver was moved to staging, move the API bits to staging
as well.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/userspace-api/media/Makefile    | 10 +----
 .../media/audio.h.rst.exceptions              | 19 ---------
 .../userspace-api/media/dvb/headers.rst       |  7 ----
 .../media/dvb/legacy_dvb_apis.rst             |  7 ----
 .../media/video.h.rst.exceptions              | 39 -------------------
 .../audio-bilingual-channel-select.rst        |  0
 .../media/av7110}/audio-channel-select.rst    |  0
 .../media/av7110}/audio-clear-buffer.rst      |  0
 .../staging/media/av7110}/audio-continue.rst  |  0
 .../staging/media/av7110}/audio-fclose.rst    |  0
 .../staging/media/av7110}/audio-fopen.rst     |  0
 .../staging/media/av7110}/audio-fwrite.rst    |  0
 .../media/av7110}/audio-get-capabilities.rst  |  0
 .../media/av7110}/audio-get-status.rst        |  0
 .../staging/media/av7110}/audio-pause.rst     |  0
 .../staging/media/av7110}/audio-play.rst      |  0
 .../media/av7110}/audio-select-source.rst     |  0
 .../media/av7110}/audio-set-av-sync.rst       |  0
 .../media/av7110}/audio-set-bypass-mode.rst   |  0
 .../staging/media/av7110}/audio-set-id.rst    |  0
 .../staging/media/av7110}/audio-set-mixer.rst |  0
 .../staging/media/av7110}/audio-set-mute.rst  |  0
 .../media/av7110}/audio-set-streamtype.rst    |  0
 .../staging/media/av7110}/audio-stop.rst      |  0
 .../staging/media/av7110}/audio.rst           |  0
 .../media/av7110}/audio_data_types.rst        |  0
 .../media/av7110}/audio_function_calls.rst    |  0
 .../media/av7110}/video-clear-buffer.rst      |  0
 .../staging/media/av7110}/video-command.rst   |  0
 .../staging/media/av7110}/video-continue.rst  |  0
 .../media/av7110}/video-fast-forward.rst      |  0
 .../staging/media/av7110}/video-fclose.rst    |  0
 .../staging/media/av7110}/video-fopen.rst     |  0
 .../staging/media/av7110}/video-freeze.rst    |  0
 .../staging/media/av7110}/video-fwrite.rst    |  0
 .../media/av7110}/video-get-capabilities.rst  |  0
 .../staging/media/av7110}/video-get-event.rst |  0
 .../media/av7110}/video-get-frame-count.rst   |  0
 .../staging/media/av7110}/video-get-pts.rst   |  0
 .../staging/media/av7110}/video-get-size.rst  |  0
 .../media/av7110}/video-get-status.rst        |  0
 .../staging/media/av7110}/video-play.rst      |  0
 .../media/av7110}/video-select-source.rst     |  0
 .../staging/media/av7110}/video-set-blank.rst |  0
 .../av7110}/video-set-display-format.rst      |  0
 .../media/av7110}/video-set-format.rst        |  0
 .../media/av7110}/video-set-streamtype.rst    |  0
 .../media/av7110}/video-slowmotion.rst        |  0
 .../media/av7110}/video-stillpicture.rst      |  0
 .../staging/media/av7110}/video-stop.rst      |  0
 .../media/av7110}/video-try-command.rst       |  0
 .../staging/media/av7110}/video.rst           |  0
 .../media/av7110}/video_function_calls.rst    |  0
 .../staging/media/av7110}/video_types.rst     |  0
 54 files changed, 2 insertions(+), 80 deletions(-)
 delete mode 100644 Documentation/userspace-api/media/audio.h.rst.exceptions
 delete mode 100644 Documentation/userspace-api/media/video.h.rst.exceptions
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-bilingual-channel-select.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-channel-select.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-clear-buffer.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-continue.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-fclose.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-fopen.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-fwrite.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-get-capabilities.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-get-status.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-pause.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-play.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-select-source.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-set-av-sync.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-set-bypass-mode.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-set-id.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-set-mixer.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-set-mute.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-set-streamtype.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio-stop.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio_data_types.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/audio_function_calls.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-clear-buffer.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-command.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-continue.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-fast-forward.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-fclose.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-fopen.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-freeze.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-fwrite.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-get-capabilities.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-get-event.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-get-frame-count.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-get-pts.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-get-size.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-get-status.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-play.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-select-source.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-set-blank.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-set-display-format.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-set-format.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-set-streamtype.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-slowmotion.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-stillpicture.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-stop.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video-try-command.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video_function_calls.rst (100%)
 rename {Documentation/userspace-api/media/dvb => drivers/staging/media/av7110}/video_types.rst (100%)

diff --git a/Documentation/userspace-api/media/Makefile b/Documentation/userspace-api/media/Makefile
index 81a4a1a53bceb..00922aa7efde7 100644
--- a/Documentation/userspace-api/media/Makefile
+++ b/Documentation/userspace-api/media/Makefile
@@ -7,8 +7,8 @@ PARSER = $(srctree)/Documentation/sphinx/parse-headers.pl
 UAPI = $(srctree)/include/uapi/linux
 KAPI = $(srctree)/include/linux
 
-FILES = audio.h.rst ca.h.rst dmx.h.rst frontend.h.rst net.h.rst video.h.rst \
-	  videodev2.h.rst media.h.rst cec.h.rst lirc.h.rst
+FILES = ca.h.rst dmx.h.rst frontend.h.rst net.h.rst \
+	videodev2.h.rst media.h.rst cec.h.rst lirc.h.rst
 
 TARGETS := $(addprefix $(BUILDDIR)/, $(FILES))
 
@@ -21,9 +21,6 @@ quiet_gen_rst = echo '  PARSE   $(patsubst $(srctree)/%,%,$<)'; \
 
 silent_gen_rst = ${gen_rst}
 
-$(BUILDDIR)/audio.h.rst: ${UAPI}/dvb/audio.h ${PARSER} $(SRC_DIR)/audio.h.rst.exceptions
-	@$($(quiet)gen_rst)
-
 $(BUILDDIR)/ca.h.rst: ${UAPI}/dvb/ca.h ${PARSER} $(SRC_DIR)/ca.h.rst.exceptions
 	@$($(quiet)gen_rst)
 
@@ -36,9 +33,6 @@ $(BUILDDIR)/frontend.h.rst: ${UAPI}/dvb/frontend.h ${PARSER} $(SRC_DIR)/frontend
 $(BUILDDIR)/net.h.rst: ${UAPI}/dvb/net.h ${PARSER} $(SRC_DIR)/net.h.rst.exceptions
 	@$($(quiet)gen_rst)
 
-$(BUILDDIR)/video.h.rst: ${UAPI}/dvb/video.h ${PARSER} $(SRC_DIR)/video.h.rst.exceptions
-	@$($(quiet)gen_rst)
-
 $(BUILDDIR)/videodev2.h.rst: ${UAPI}/videodev2.h ${PARSER} $(SRC_DIR)/videodev2.h.rst.exceptions
 	@$($(quiet)gen_rst)
 
diff --git a/Documentation/userspace-api/media/audio.h.rst.exceptions b/Documentation/userspace-api/media/audio.h.rst.exceptions
deleted file mode 100644
index cf6620477f736..0000000000000
--- a/Documentation/userspace-api/media/audio.h.rst.exceptions
+++ /dev/null
@@ -1,19 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-# Ignore header name
-ignore define _DVBAUDIO_H_
-
-# Undocumented audio caps, as this is a deprecated API anyway
-ignore define AUDIO_CAP_DTS
-ignore define AUDIO_CAP_LPCM
-ignore define AUDIO_CAP_MP1
-ignore define AUDIO_CAP_MP2
-ignore define AUDIO_CAP_MP3
-ignore define AUDIO_CAP_AAC
-ignore define AUDIO_CAP_OGG
-ignore define AUDIO_CAP_SDDS
-ignore define AUDIO_CAP_AC3
-
-# some typedefs should point to struct/enums
-replace typedef audio_mixer_t :c:type:`audio_mixer`
-replace typedef audio_status_t :c:type:`audio_status`
diff --git a/Documentation/userspace-api/media/dvb/headers.rst b/Documentation/userspace-api/media/dvb/headers.rst
index 9743ffc350969..88c3eb33a89e6 100644
--- a/Documentation/userspace-api/media/dvb/headers.rst
+++ b/Documentation/userspace-api/media/dvb/headers.rst
@@ -14,10 +14,3 @@ Digital TV uAPI headers
 .. kernel-include:: $BUILDDIR/ca.h.rst
 
 .. kernel-include:: $BUILDDIR/net.h.rst
-
-Legacy uAPI
-***********
-
-.. kernel-include:: $BUILDDIR/audio.h.rst
-
-.. kernel-include:: $BUILDDIR/video.h.rst
diff --git a/Documentation/userspace-api/media/dvb/legacy_dvb_apis.rst b/Documentation/userspace-api/media/dvb/legacy_dvb_apis.rst
index 6104879d728ab..b97d56ee543cf 100644
--- a/Documentation/userspace-api/media/dvb/legacy_dvb_apis.rst
+++ b/Documentation/userspace-api/media/dvb/legacy_dvb_apis.rst
@@ -11,11 +11,6 @@ The APIs described here **should not** be used on new drivers or applications.
 The DVBv3 frontend API has issues with new delivery systems, including
 DVB-S2, DVB-T2, ISDB, etc.
 
-There's just one driver for a very legacy hardware using the Digital TV
-audio and video APIs. No modern drivers should use it. Instead, audio and
-video should be using the V4L2 and ALSA APIs, and the pipelines should
-be set via the Media Controller API.
-
 .. attention::
 
    The APIs described here doesn't necessarily reflect the current
@@ -28,5 +23,3 @@ be set via the Media Controller API.
     :maxdepth: 1
 
     frontend_legacy_dvbv3_api
-    video
-    audio
diff --git a/Documentation/userspace-api/media/video.h.rst.exceptions b/Documentation/userspace-api/media/video.h.rst.exceptions
deleted file mode 100644
index ea9de59ad8b77..0000000000000
--- a/Documentation/userspace-api/media/video.h.rst.exceptions
+++ /dev/null
@@ -1,39 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-# Ignore header name
-ignore define _UAPI_DVBVIDEO_H_
-
-# This is a deprecated obscure API. Just ignore things we don't know
-ignore define VIDEO_CMD_PLAY
-ignore define VIDEO_CMD_STOP
-ignore define VIDEO_CMD_FREEZE
-ignore define VIDEO_CMD_CONTINUE
-ignore define VIDEO_CMD_FREEZE_TO_BLACK
-ignore define VIDEO_CMD_STOP_TO_BLACK
-ignore define VIDEO_CMD_STOP_IMMEDIATELY
-ignore define VIDEO_PLAY_FMT_NONE
-ignore define VIDEO_PLAY_FMT_GOP
-ignore define VIDEO_VSYNC_FIELD_UNKNOWN
-ignore define VIDEO_VSYNC_FIELD_ODD
-ignore define VIDEO_VSYNC_FIELD_EVEN
-ignore define VIDEO_VSYNC_FIELD_PROGRESSIVE
-ignore define VIDEO_EVENT_SIZE_CHANGED
-ignore define VIDEO_EVENT_FRAME_RATE_CHANGED
-ignore define VIDEO_EVENT_DECODER_STOPPED
-ignore define VIDEO_EVENT_VSYNC
-ignore define VIDEO_CAP_MPEG1
-ignore define VIDEO_CAP_MPEG2
-ignore define VIDEO_CAP_SYS
-ignore define VIDEO_CAP_PROG
-ignore define VIDEO_CAP_SPU
-ignore define VIDEO_CAP_NAVI
-ignore define VIDEO_CAP_CSS
-
-# some typedefs should point to struct/enums
-replace typedef video_format_t :c:type:`video_format`
-replace typedef video_system_t :c:type:`video_system`
-replace typedef video_displayformat_t :c:type:`video_displayformat`
-replace typedef video_size_t :c:type:`video_size`
-replace typedef video_stream_source_t :c:type:`video_stream_source`
-replace typedef video_play_state_t :c:type:`video_play_state`
-replace typedef video_navi_pack_t :c:type:`video_navi_pack`
diff --git a/Documentation/userspace-api/media/dvb/audio-bilingual-channel-select.rst b/drivers/staging/media/av7110/audio-bilingual-channel-select.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-bilingual-channel-select.rst
rename to drivers/staging/media/av7110/audio-bilingual-channel-select.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-channel-select.rst b/drivers/staging/media/av7110/audio-channel-select.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-channel-select.rst
rename to drivers/staging/media/av7110/audio-channel-select.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-clear-buffer.rst b/drivers/staging/media/av7110/audio-clear-buffer.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-clear-buffer.rst
rename to drivers/staging/media/av7110/audio-clear-buffer.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-continue.rst b/drivers/staging/media/av7110/audio-continue.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-continue.rst
rename to drivers/staging/media/av7110/audio-continue.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-fclose.rst b/drivers/staging/media/av7110/audio-fclose.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-fclose.rst
rename to drivers/staging/media/av7110/audio-fclose.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-fopen.rst b/drivers/staging/media/av7110/audio-fopen.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-fopen.rst
rename to drivers/staging/media/av7110/audio-fopen.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-fwrite.rst b/drivers/staging/media/av7110/audio-fwrite.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-fwrite.rst
rename to drivers/staging/media/av7110/audio-fwrite.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-get-capabilities.rst b/drivers/staging/media/av7110/audio-get-capabilities.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-get-capabilities.rst
rename to drivers/staging/media/av7110/audio-get-capabilities.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-get-status.rst b/drivers/staging/media/av7110/audio-get-status.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-get-status.rst
rename to drivers/staging/media/av7110/audio-get-status.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-pause.rst b/drivers/staging/media/av7110/audio-pause.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-pause.rst
rename to drivers/staging/media/av7110/audio-pause.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-play.rst b/drivers/staging/media/av7110/audio-play.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-play.rst
rename to drivers/staging/media/av7110/audio-play.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-select-source.rst b/drivers/staging/media/av7110/audio-select-source.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-select-source.rst
rename to drivers/staging/media/av7110/audio-select-source.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-set-av-sync.rst b/drivers/staging/media/av7110/audio-set-av-sync.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-set-av-sync.rst
rename to drivers/staging/media/av7110/audio-set-av-sync.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst b/drivers/staging/media/av7110/audio-set-bypass-mode.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-set-bypass-mode.rst
rename to drivers/staging/media/av7110/audio-set-bypass-mode.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-set-id.rst b/drivers/staging/media/av7110/audio-set-id.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-set-id.rst
rename to drivers/staging/media/av7110/audio-set-id.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-set-mixer.rst b/drivers/staging/media/av7110/audio-set-mixer.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-set-mixer.rst
rename to drivers/staging/media/av7110/audio-set-mixer.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-set-mute.rst b/drivers/staging/media/av7110/audio-set-mute.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-set-mute.rst
rename to drivers/staging/media/av7110/audio-set-mute.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-set-streamtype.rst b/drivers/staging/media/av7110/audio-set-streamtype.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-set-streamtype.rst
rename to drivers/staging/media/av7110/audio-set-streamtype.rst
diff --git a/Documentation/userspace-api/media/dvb/audio-stop.rst b/drivers/staging/media/av7110/audio-stop.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio-stop.rst
rename to drivers/staging/media/av7110/audio-stop.rst
diff --git a/Documentation/userspace-api/media/dvb/audio.rst b/drivers/staging/media/av7110/audio.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio.rst
rename to drivers/staging/media/av7110/audio.rst
diff --git a/Documentation/userspace-api/media/dvb/audio_data_types.rst b/drivers/staging/media/av7110/audio_data_types.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio_data_types.rst
rename to drivers/staging/media/av7110/audio_data_types.rst
diff --git a/Documentation/userspace-api/media/dvb/audio_function_calls.rst b/drivers/staging/media/av7110/audio_function_calls.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/audio_function_calls.rst
rename to drivers/staging/media/av7110/audio_function_calls.rst
diff --git a/Documentation/userspace-api/media/dvb/video-clear-buffer.rst b/drivers/staging/media/av7110/video-clear-buffer.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-clear-buffer.rst
rename to drivers/staging/media/av7110/video-clear-buffer.rst
diff --git a/Documentation/userspace-api/media/dvb/video-command.rst b/drivers/staging/media/av7110/video-command.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-command.rst
rename to drivers/staging/media/av7110/video-command.rst
diff --git a/Documentation/userspace-api/media/dvb/video-continue.rst b/drivers/staging/media/av7110/video-continue.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-continue.rst
rename to drivers/staging/media/av7110/video-continue.rst
diff --git a/Documentation/userspace-api/media/dvb/video-fast-forward.rst b/drivers/staging/media/av7110/video-fast-forward.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-fast-forward.rst
rename to drivers/staging/media/av7110/video-fast-forward.rst
diff --git a/Documentation/userspace-api/media/dvb/video-fclose.rst b/drivers/staging/media/av7110/video-fclose.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-fclose.rst
rename to drivers/staging/media/av7110/video-fclose.rst
diff --git a/Documentation/userspace-api/media/dvb/video-fopen.rst b/drivers/staging/media/av7110/video-fopen.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-fopen.rst
rename to drivers/staging/media/av7110/video-fopen.rst
diff --git a/Documentation/userspace-api/media/dvb/video-freeze.rst b/drivers/staging/media/av7110/video-freeze.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-freeze.rst
rename to drivers/staging/media/av7110/video-freeze.rst
diff --git a/Documentation/userspace-api/media/dvb/video-fwrite.rst b/drivers/staging/media/av7110/video-fwrite.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-fwrite.rst
rename to drivers/staging/media/av7110/video-fwrite.rst
diff --git a/Documentation/userspace-api/media/dvb/video-get-capabilities.rst b/drivers/staging/media/av7110/video-get-capabilities.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-get-capabilities.rst
rename to drivers/staging/media/av7110/video-get-capabilities.rst
diff --git a/Documentation/userspace-api/media/dvb/video-get-event.rst b/drivers/staging/media/av7110/video-get-event.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-get-event.rst
rename to drivers/staging/media/av7110/video-get-event.rst
diff --git a/Documentation/userspace-api/media/dvb/video-get-frame-count.rst b/drivers/staging/media/av7110/video-get-frame-count.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-get-frame-count.rst
rename to drivers/staging/media/av7110/video-get-frame-count.rst
diff --git a/Documentation/userspace-api/media/dvb/video-get-pts.rst b/drivers/staging/media/av7110/video-get-pts.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-get-pts.rst
rename to drivers/staging/media/av7110/video-get-pts.rst
diff --git a/Documentation/userspace-api/media/dvb/video-get-size.rst b/drivers/staging/media/av7110/video-get-size.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-get-size.rst
rename to drivers/staging/media/av7110/video-get-size.rst
diff --git a/Documentation/userspace-api/media/dvb/video-get-status.rst b/drivers/staging/media/av7110/video-get-status.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-get-status.rst
rename to drivers/staging/media/av7110/video-get-status.rst
diff --git a/Documentation/userspace-api/media/dvb/video-play.rst b/drivers/staging/media/av7110/video-play.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-play.rst
rename to drivers/staging/media/av7110/video-play.rst
diff --git a/Documentation/userspace-api/media/dvb/video-select-source.rst b/drivers/staging/media/av7110/video-select-source.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-select-source.rst
rename to drivers/staging/media/av7110/video-select-source.rst
diff --git a/Documentation/userspace-api/media/dvb/video-set-blank.rst b/drivers/staging/media/av7110/video-set-blank.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-set-blank.rst
rename to drivers/staging/media/av7110/video-set-blank.rst
diff --git a/Documentation/userspace-api/media/dvb/video-set-display-format.rst b/drivers/staging/media/av7110/video-set-display-format.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-set-display-format.rst
rename to drivers/staging/media/av7110/video-set-display-format.rst
diff --git a/Documentation/userspace-api/media/dvb/video-set-format.rst b/drivers/staging/media/av7110/video-set-format.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-set-format.rst
rename to drivers/staging/media/av7110/video-set-format.rst
diff --git a/Documentation/userspace-api/media/dvb/video-set-streamtype.rst b/drivers/staging/media/av7110/video-set-streamtype.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-set-streamtype.rst
rename to drivers/staging/media/av7110/video-set-streamtype.rst
diff --git a/Documentation/userspace-api/media/dvb/video-slowmotion.rst b/drivers/staging/media/av7110/video-slowmotion.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-slowmotion.rst
rename to drivers/staging/media/av7110/video-slowmotion.rst
diff --git a/Documentation/userspace-api/media/dvb/video-stillpicture.rst b/drivers/staging/media/av7110/video-stillpicture.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-stillpicture.rst
rename to drivers/staging/media/av7110/video-stillpicture.rst
diff --git a/Documentation/userspace-api/media/dvb/video-stop.rst b/drivers/staging/media/av7110/video-stop.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-stop.rst
rename to drivers/staging/media/av7110/video-stop.rst
diff --git a/Documentation/userspace-api/media/dvb/video-try-command.rst b/drivers/staging/media/av7110/video-try-command.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video-try-command.rst
rename to drivers/staging/media/av7110/video-try-command.rst
diff --git a/Documentation/userspace-api/media/dvb/video.rst b/drivers/staging/media/av7110/video.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video.rst
rename to drivers/staging/media/av7110/video.rst
diff --git a/Documentation/userspace-api/media/dvb/video_function_calls.rst b/drivers/staging/media/av7110/video_function_calls.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video_function_calls.rst
rename to drivers/staging/media/av7110/video_function_calls.rst
diff --git a/Documentation/userspace-api/media/dvb/video_types.rst b/drivers/staging/media/av7110/video_types.rst
similarity index 100%
rename from Documentation/userspace-api/media/dvb/video_types.rst
rename to drivers/staging/media/av7110/video_types.rst
-- 
GitLab


From df5ce27d96532844232b16bd0105defc5684e7ce Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 16:42:39 +0200
Subject: [PATCH 2226/3804] media: gspca: ov519: replace RIGHT SINGLE QUOTATION
 MARK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the occurences of the following character:

	- U+2019 ('’'): RIGHT SINGLE QUOTATION MARK

By a normal single comma character.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/gspca/ov519.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/usb/gspca/ov519.c b/drivers/media/usb/gspca/ov519.c
index cd6776c3163b1..bffa94e76da5e 100644
--- a/drivers/media/usb/gspca/ov519.c
+++ b/drivers/media/usb/gspca/ov519.c
@@ -614,7 +614,7 @@ static const struct ov_i2c_regvals norm_3620b[] = {
 	/*
 	 * From the datasheet: "Note that after writing to register COMH
 	 * (0x12) to change the sensor mode, registers related to the
-	 * sensor’s cropping window will be reset back to their default
+	 * sensor's cropping window will be reset back to their default
 	 * values."
 	 *
 	 * "wait 4096 external clock ... to make sure the sensor is
-- 
GitLab


From ffcf1b0ae3fa84f5f3f4bd1ee440e60b72f5c840 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 16:42:39 +0200
Subject: [PATCH 2227/3804] media: rtl28xxu: replace a NO-BREAK SPACE character
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of using:

	- U+00a0 (' '): NO-BREAK SPACE

Use a normal white space.

This was probably introduced by some cut-and-paste.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/dvb-usb-v2/rtl28xxu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
index 2c04ed8af0e44..83705730e37ec 100644
--- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
+++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c
@@ -1777,7 +1777,7 @@ static int rtl2832u_rc_query(struct dvb_usb_device *d)
 		ir_raw_event_store_with_filter(d->rc_dev, &ev);
 	}
 
-	/* 'flush' ir_raw_event_store_with_filter() */
+	/* 'flush' ir_raw_event_store_with_filter() */
 	ir_raw_event_handle(d->rc_dev);
 exit:
 	return ret;
-- 
GitLab


From a4c3793e71f3322b910d5ac46882120bd149b08b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 16:42:42 +0200
Subject: [PATCH 2228/3804] media: allegro-dvt: avoid EN DASH char

While there's nothing wrong with EN DASH on C code, this probably
came from some cut-and paste from an ITU-T table.
It sounds better to just an HYPHEN here.

Reviewed-by: Michael Tretter <m.tretter@pengutronix.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/allegro-dvt/nal-h264.c | 2 +-
 drivers/media/platform/allegro-dvt/nal-hevc.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/allegro-dvt/nal-h264.c b/drivers/media/platform/allegro-dvt/nal-h264.c
index 94dd9266d8506..0ab2fcbee1b93 100644
--- a/drivers/media/platform/allegro-dvt/nal-h264.c
+++ b/drivers/media/platform/allegro-dvt/nal-h264.c
@@ -25,7 +25,7 @@
 #include "nal-rbsp.h"
 
 /*
- * See Rec. ITU-T H.264 (04/2017) Table 7-1 – NAL unit type codes, syntax
+ * See Rec. ITU-T H.264 (04/2017) Table 7-1 - NAL unit type codes, syntax
  * element categories, and NAL unit type classes
  */
 enum nal_unit_type {
diff --git a/drivers/media/platform/allegro-dvt/nal-hevc.c b/drivers/media/platform/allegro-dvt/nal-hevc.c
index 5db540c69bfe6..15a352e45831b 100644
--- a/drivers/media/platform/allegro-dvt/nal-hevc.c
+++ b/drivers/media/platform/allegro-dvt/nal-hevc.c
@@ -25,7 +25,7 @@
 #include "nal-rbsp.h"
 
 /*
- * See Rec. ITU-T H.265 (02/2018) Table 7-1 – NAL unit type codes and NAL unit
+ * See Rec. ITU-T H.265 (02/2018) Table 7-1 - NAL unit type codes and NAL unit
  * type classes
  */
 enum nal_unit_type {
-- 
GitLab


From 35c47f8d9a34cfa4b17109501526411d74341c8b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 16:42:45 +0200
Subject: [PATCH 2229/3804] media: saa7134: drop a NO-BREAK SPACE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are two spaces on a comment there, being one of them
an U+00a0 (' '): NO-BREAK SPACE.

Drop it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/saa7134/saa7134-tvaudio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/pci/saa7134/saa7134-tvaudio.c b/drivers/media/pci/saa7134/saa7134-tvaudio.c
index aa0895d2d7358..9e0c442abc761 100644
--- a/drivers/media/pci/saa7134/saa7134-tvaudio.c
+++ b/drivers/media/pci/saa7134/saa7134-tvaudio.c
@@ -871,7 +871,7 @@ void saa7134_enable_i2s(struct saa7134_dev *dev)
 	switch (dev->pci->device) {
 	case PCI_DEVICE_ID_PHILIPS_SAA7133:
 	case PCI_DEVICE_ID_PHILIPS_SAA7135:
-		/* Set I2S format (SONY)  */
+		/* Set I2S format (SONY) */
 		saa_writeb(SAA7133_I2S_AUDIO_CONTROL, 0x00);
 		/* Start I2S */
 		saa_writeb(SAA7134_I2S_AUDIO_OUTPUT, 0x11);
-- 
GitLab


From 730f055666a30b8224d639110eb9b25eaa87883a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 16:42:49 +0200
Subject: [PATCH 2230/3804] media: rc: ite-cir: replace some an EN DASH
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of using U+2013 ('–'): EN DASH, let's just use an
hyphen there, as this was probably introduced by some cut-and-paste
from some other place.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/rc/ite-cir.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/rc/ite-cir.h b/drivers/media/rc/ite-cir.h
index ce7a40b108284..4b4294d77555d 100644
--- a/drivers/media/rc/ite-cir.h
+++ b/drivers/media/rc/ite-cir.h
@@ -167,7 +167,7 @@ struct ite_dev {
  * hardware data obtained from:
  *
  * IT8712F
- * Environment Control – Low Pin Count Input / Output
+ * Environment Control - Low Pin Count Input / Output
  * (EC - LPC I/O)
  * Preliminary Specification V0. 81
  */
-- 
GitLab


From 5b448065febe1c6bb6693735844f2fb2b7b654dc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 2 Jun 2021 16:42:46 +0200
Subject: [PATCH 2231/3804] media: pci: tw5864: avoid usage of some characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are several comments on this driver using those chars:

	- U+2013 ('–'): EN DASH
	- U+2018 ('‘'): LEFT SINGLE QUOTATION MARK
	- U+2019 ('’'): RIGHT SINGLE QUOTATION MARK

They probably came from cut-and-pasting some texts found
elsewhere.

While there's nothing wrong on having those on comments in
C, it is better to use ASCII chars for those specific cases,
as the current variant doesn't really add any value.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/tw5864/tw5864-reg.h | 62 +++++++++++++--------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/drivers/media/pci/tw5864/tw5864-reg.h b/drivers/media/pci/tw5864/tw5864-reg.h
index a74f30f2f78ed..a26a439c4dc08 100644
--- a/drivers/media/pci/tw5864/tw5864-reg.h
+++ b/drivers/media/pci/tw5864/tw5864-reg.h
@@ -289,13 +289,13 @@
 /* OSD enable bit for each channel */
 #define TW5864_DSP_OSD_ENABLE 0x0228
 
-/* 0x0280 ~ 0x029c – Motion Vector for 1st 4x4 Block, e.g., 80 (X), 84 (Y) */
+/* 0x0280 ~ 0x029c - Motion Vector for 1st 4x4 Block, e.g., 80 (X), 84 (Y) */
 #define TW5864_ME_MV_VEC1 0x0280
-/* 0x02a0 ~ 0x02bc – Motion Vector for 2nd 4x4 Block, e.g., A0 (X), A4 (Y) */
+/* 0x02a0 ~ 0x02bc - Motion Vector for 2nd 4x4 Block, e.g., A0 (X), A4 (Y) */
 #define TW5864_ME_MV_VEC2 0x02a0
-/* 0x02c0 ~ 0x02dc – Motion Vector for 3rd 4x4 Block, e.g., C0 (X), C4 (Y) */
+/* 0x02c0 ~ 0x02dc - Motion Vector for 3rd 4x4 Block, e.g., C0 (X), C4 (Y) */
 #define TW5864_ME_MV_VEC3 0x02c0
-/* 0x02e0 ~ 0x02fc – Motion Vector for 4th 4x4 Block, e.g., E0 (X), E4 (Y) */
+/* 0x02e0 ~ 0x02fc - Motion Vector for 4th 4x4 Block, e.g., E0 (X), E4 (Y) */
 #define TW5864_ME_MV_VEC4 0x02e0
 
 /*
@@ -462,13 +462,13 @@
 
 #define TW5864_VLC_BUF 0x100c
 /* Define controls in register TW5864_VLC_BUF */
-/* VLC BK0 full status, write ‘1’ to clear */
+/* VLC BK0 full status, write '1' to clear */
 #define TW5864_VLC_BK0_FULL BIT(0)
-/* VLC BK1 full status, write ‘1’ to clear */
+/* VLC BK1 full status, write '1' to clear */
 #define TW5864_VLC_BK1_FULL BIT(1)
-/* VLC end slice status, write ‘1’ to clear */
+/* VLC end slice status, write '1' to clear */
 #define TW5864_VLC_END_SLICE BIT(2)
-/* VLC Buffer overflow status, write ‘1’ to clear */
+/* VLC Buffer overflow status, write '1' to clear */
 #define TW5864_DSP_RD_OF BIT(3)
 /* VLC string length in either buffer 0 or 1 at end of frame */
 #define TW5864_VLC_STREAM_LEN_SHIFT 4
@@ -476,7 +476,7 @@
 
 /* [15:0] Total coefficient number in a frame */
 #define TW5864_TOTAL_COEF_NO 0x1010
-/* [0] VLC Encoder Interrupt. Write ‘1’ to clear */
+/* [0] VLC Encoder Interrupt. Write '1' to clear */
 #define TW5864_VLC_DSP_INTR 0x1014
 /* [31:0] VLC stream CRC checksum */
 #define TW5864_VLC_STREAM_CRC 0x1018
@@ -494,7 +494,7 @@
  */
 #define TW5864_VLC_RD_BRST BIT(1)
 
-/* 0x2000 ~ 0x2ffc -- H264 Stream Memory Map */
+/* 0x2000 ~ 0x2ffc - H264 Stream Memory Map */
 /*
  * A word is 4 bytes. I.e.,
  * VLC_STREAM_MEM[0] address: 0x2000
@@ -506,7 +506,7 @@
 #define TW5864_VLC_STREAM_MEM_MAX_OFFSET 0x3ff
 #define TW5864_VLC_STREAM_MEM(offset) (TW5864_VLC_STREAM_MEM_START + 4 * offset)
 
-/* 0x4000 ~ 0x4ffc -- Audio Register Map */
+/* 0x4000 ~ 0x4ffc - Audio Register Map */
 /* [31:0] config 1ms cnt = Realtime clk/1000 */
 #define TW5864_CFG_1MS_CNT 0x4000
 
@@ -688,10 +688,10 @@
 
 /*
  * [1:0]
- * 2’b00 phase set to 180 degree
- * 2’b01 phase set to 270 degree
- * 2’b10 phase set to 0 degree
- * 2’b11 phase set to 90 degree
+ * 2'b00 phase set to 180 degree
+ * 2'b01 phase set to 270 degree
+ * 2'b10 phase set to 0 degree
+ * 2'b11 phase set to 90 degree
  */
 #define TW5864_I2C_PHASE_CFG 0x800c
 
@@ -826,7 +826,7 @@
 /* SPLL_IREF, SPLL_LPX4, SPLL_CPX4, SPLL_PD, SPLL_DBG */
 #define TW5864_SPLL 0x8028
 
-/* 0x8800 ~ 0x88fc -- Interrupt Register Map */
+/* 0x8800 ~ 0x88fc - Interrupt Register Map */
 /*
  * Trigger mode of interrupt source 0 ~ 15
  * 1 Edge trigger mode
@@ -909,7 +909,7 @@
 #define TW5864_INTR_I2C_DONE BIT(25)
 #define TW5864_INTR_AD BIT(26)
 
-/* 0x9000 ~ 0x920c -- Video Capture (VIF) Register Map */
+/* 0x9000 ~ 0x920c - Video Capture (VIF) Register Map */
 /*
  * H264EN_CH_STATUS[n] Status of Vsync synchronized H264EN_CH_EN (Read Only)
  * 1 Channel Enabled
@@ -1009,7 +1009,7 @@
 /* GPIO Output Enable of Group n */
 #define TW5864_GPIO_OEN (0xff << 8)
 
-/* 0xa000 ~ 0xa8ff – DDR Controller Register Map */
+/* 0xa000 ~ 0xa8ff - DDR Controller Register Map */
 /* DDR Controller A */
 /*
  * [2:0] Data valid counter after read command to DDR. This is the delay value
@@ -1111,7 +1111,7 @@
  */
 #define TW5864_DDR_B_OFFSET 0x0800
 
-/* 0xb004 ~ 0xb018 – HW version/ARB12 Register Map */
+/* 0xb004 ~ 0xb018 - HW version/ARB12 Register Map */
 /* [15:0] Default is C013 */
 #define TW5864_HW_VERSION 0xb004
 
@@ -1145,7 +1145,7 @@
 /* ARB12 maximum value of time out counter (default 15"h1FF) */
 #define TW5864_ARB12_TIME_OUT_CNT 0x7fff
 
-/* 0xb800 ~ 0xb80c -- Indirect Access Register Map */
+/* 0xb800 ~ 0xb80c - Indirect Access Register Map */
 /*
  * Spec says:
  * In order to access the indirect register space, the following procedure is
@@ -1177,7 +1177,7 @@
 /* [31:0] Data used to read/write indirect register space */
 #define TW5864_IND_DATA 0xb804
 
-/* 0xc000 ~ 0xc7fc -- Preview Register Map */
+/* 0xc000 ~ 0xc7fc - Preview Register Map */
 /* Mostly skipped this section. */
 /*
  * [15:0] Status of Vsync Synchronized PCI_PV_CH_EN (Read Only)
@@ -1192,12 +1192,12 @@
  */
 #define TW5864_PCI_PV_CH_EN 0xc004
 
-/* 0xc800 ~ 0xc804 -- JPEG Capture Register Map */
+/* 0xc800 ~ 0xc804 - JPEG Capture Register Map */
 /* Skipped. */
-/* 0xd000 ~ 0xd0fc -- JPEG Control Register Map */
+/* 0xd000 ~ 0xd0fc - JPEG Control Register Map */
 /* Skipped. */
 
-/* 0xe000 ~ 0xfc04 – Motion Vector Register Map */
+/* 0xe000 ~ 0xfc04 - Motion Vector Register Map */
 
 /* ME Motion Vector data (Four Byte Each) 0xe000 ~ 0xe7fc */
 #define TW5864_ME_MV_VEC_START 0xe000
@@ -1231,7 +1231,7 @@
  */
 #define TW5864_MPI_DDR_SEL2 BIT(15)
 
-/* 0x18000 ~ 0x181fc – PCI Master/Slave Control Map */
+/* 0x18000 ~ 0x181fc - PCI Master/Slave Control Map */
 #define TW5864_PCI_INTR_STATUS 0x18000
 /* Define controls in register TW5864_PCI_INTR_STATUS */
 /* vlc done */
@@ -1400,11 +1400,11 @@
 #define TW5864_VLC_STREAM_BASE_ADDR 0x18080
 /* MV stream base address */
 #define TW5864_MV_STREAM_BASE_ADDR 0x18084
-/* 0x180a0 – 0x180bc: audio burst base address. Skipped. */
-/* 0x180c0 ~ 0x180dc – JPEG Push Mode Buffer Base Address. Skipped. */
-/* 0x18100 – 0x1817c: preview burst base address. Skipped. */
+/* 0x180a0 ~ 0x180bc: audio burst base address. Skipped. */
+/* 0x180c0 ~ 0x180dc: JPEG Push Mode Buffer Base Address. Skipped. */
+/* 0x18100 ~ 0x1817c: preview burst base address. Skipped. */
 
-/* 0x80000 ~ 0x87fff -- DDR Burst RW Register Map */
+/* 0x80000 ~ 0x87fff - DDR Burst RW Register Map */
 #define TW5864_DDR_CTL 0x80000
 /* Define controls in register TW5864_DDR_CTL */
 #define TW5864_BRST_LENGTH_SHIFT 2
@@ -1516,7 +1516,7 @@
  * Vertical Sharpness Control. Writable.
  * 0 = None (default)
  * 7 = Highest
- * **Note: VSHP must be set to ‘0’ if COMB = 0
+ * **Note: VSHP must be set to '0' if COMB = 0
  */
 #define TW5864_INDIR_VIN_1_VSHP 0x07
 
@@ -1595,7 +1595,7 @@
 #define TW5864_INDIR_VIN_9_CNTRST(channel) (0x009 + channel * 0x010)
 
 /*
- * These bits control the brightness. They have value of –128 to 127 in 2's
+ * These bits control the brightness. They have value of -128 to 127 in 2's
  * complement form. Positive value increases brightness. A value 0 has no
  * effect on the data. The default is 00h.
  */
-- 
GitLab


From a0143f5ac0594d73ef91c2336d8172217ff9cd72 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Thu, 6 May 2021 16:27:25 -0400
Subject: [PATCH 2232/3804] clocksource/drivers/samsung_pwm: Minor whitespace
 cleanup

Cleanup the code to be slightly more readable and follow coding
convention - only whitespace.  This fixes checkpatch warnings:

  WARNING: Block comments should align the * on each line
  WARNING: please, no space before tabs
  WARNING: Missing a blank line after declarations
  CHECK: Alignment should match open parenthesis

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210506202729.157260-1-krzysztof.kozlowski@canonical.com
---
 drivers/clocksource/samsung_pwm_timer.c | 19 +++++++++++--------
 include/clocksource/samsung_pwm.h       |  3 ++-
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index f760229d0c7f3..69bf79c7f462f 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -4,7 +4,7 @@
  *		http://www.samsung.com/
  *
  * samsung - Common hr-timer support (s3c and s5p)
-*/
+ */
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
@@ -22,7 +22,6 @@
 
 #include <clocksource/samsung_pwm.h>
 
-
 /*
  * Clocksource driver
  */
@@ -38,8 +37,8 @@
 #define TCFG0_PRESCALER_MASK		0xff
 #define TCFG0_PRESCALER1_SHIFT		8
 
-#define TCFG1_SHIFT(x)	  		((x) * 4)
-#define TCFG1_MUX_MASK	  		0xf
+#define TCFG1_SHIFT(x)			((x) * 4)
+#define TCFG1_MUX_MASK			0xf
 
 /*
  * Each channel occupies 4 bits in TCON register, but there is a gap of 4
@@ -183,7 +182,7 @@ static void samsung_time_start(unsigned int channel, bool periodic)
 }
 
 static int samsung_set_next_event(unsigned long cycles,
-				struct clock_event_device *evt)
+				  struct clock_event_device *evt)
 {
 	/*
 	 * This check is needed to account for internal rounding
@@ -225,6 +224,7 @@ static void samsung_clockevent_resume(struct clock_event_device *cev)
 
 	if (pwm.variant.has_tint_cstat) {
 		u32 mask = (1 << pwm.event_id);
+
 		writel(mask | (mask << 5), pwm.base + REG_TINT_CSTAT);
 	}
 }
@@ -248,6 +248,7 @@ static irqreturn_t samsung_clock_event_isr(int irq, void *dev_id)
 
 	if (pwm.variant.has_tint_cstat) {
 		u32 mask = (1 << pwm.event_id);
+
 		writel(mask | (mask << 5), pwm.base + REG_TINT_CSTAT);
 	}
 
@@ -272,7 +273,7 @@ static void __init samsung_clockevent_init(void)
 
 	time_event_device.cpumask = cpumask_of(0);
 	clockevents_config_and_register(&time_event_device,
-						clock_rate, 1, pwm.tcnt_max);
+					clock_rate, 1, pwm.tcnt_max);
 
 	irq_number = pwm.irq[pwm.event_id];
 	if (request_irq(irq_number, samsung_clock_event_isr,
@@ -282,6 +283,7 @@ static void __init samsung_clockevent_init(void)
 
 	if (pwm.variant.has_tint_cstat) {
 		u32 mask = (1 << pwm.event_id);
+
 		writel(mask | (mask << 5), pwm.base + REG_TINT_CSTAT);
 	}
 }
@@ -347,7 +349,7 @@ static int __init samsung_clocksource_init(void)
 		pwm.source_reg = pwm.base + pwm.source_id * 0x0c + 0x14;
 
 	sched_clock_register(samsung_read_sched_clock,
-						pwm.variant.bits, clock_rate);
+			     pwm.variant.bits, clock_rate);
 
 	samsung_clocksource.mask = CLOCKSOURCE_MASK(pwm.variant.bits);
 	return clocksource_register_hz(&samsung_clocksource, clock_rate);
@@ -398,7 +400,8 @@ static int __init _samsung_pwm_clocksource_init(void)
 }
 
 void __init samsung_pwm_clocksource_init(void __iomem *base,
-			unsigned int *irqs, struct samsung_pwm_variant *variant)
+					 unsigned int *irqs,
+					 struct samsung_pwm_variant *variant)
 {
 	pwm.base = base;
 	memcpy(&pwm.variant, variant, sizeof(pwm.variant));
diff --git a/include/clocksource/samsung_pwm.h b/include/clocksource/samsung_pwm.h
index c395238d09222..76341988fb4f2 100644
--- a/include/clocksource/samsung_pwm.h
+++ b/include/clocksource/samsung_pwm.h
@@ -27,6 +27,7 @@ struct samsung_pwm_variant {
 };
 
 void samsung_pwm_clocksource_init(void __iomem *base,
-		unsigned int *irqs, struct samsung_pwm_variant *variant);
+				  unsigned int *irqs,
+				  struct samsung_pwm_variant *variant);
 
 #endif /* __CLOCKSOURCE_SAMSUNG_PWM_H */
-- 
GitLab


From bb08e96575dbbd49acb49999dd0d7ffedb5c1608 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Thu, 6 May 2021 16:27:26 -0400
Subject: [PATCH 2233/3804] clocksource/drivers/samsung_pwm: Constify passed
 structure

The 'struct samsung_pwm_variant' argument passed to initialization
functions is not modified, so it can be made const for safety.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210506202729.157260-2-krzysztof.kozlowski@canonical.com
---
 drivers/clocksource/samsung_pwm_timer.c | 2 +-
 include/clocksource/samsung_pwm.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 69bf79c7f462f..bfad61b509f93 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -401,7 +401,7 @@ static int __init _samsung_pwm_clocksource_init(void)
 
 void __init samsung_pwm_clocksource_init(void __iomem *base,
 					 unsigned int *irqs,
-					 struct samsung_pwm_variant *variant)
+					 const struct samsung_pwm_variant *variant)
 {
 	pwm.base = base;
 	memcpy(&pwm.variant, variant, sizeof(pwm.variant));
diff --git a/include/clocksource/samsung_pwm.h b/include/clocksource/samsung_pwm.h
index 76341988fb4f2..9b435caa95fef 100644
--- a/include/clocksource/samsung_pwm.h
+++ b/include/clocksource/samsung_pwm.h
@@ -28,6 +28,6 @@ struct samsung_pwm_variant {
 
 void samsung_pwm_clocksource_init(void __iomem *base,
 				  unsigned int *irqs,
-				  struct samsung_pwm_variant *variant);
+				  const struct samsung_pwm_variant *variant);
 
 #endif /* __CLOCKSOURCE_SAMSUNG_PWM_H */
-- 
GitLab


From 63e83bd8cd848a3d1b4777d90635a309fa9cb2c7 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Thu, 6 May 2021 16:27:27 -0400
Subject: [PATCH 2234/3804] clocksource/drivers/samsung_pwm: Cleanup on init
 error

Failure of timer initialization is likely to be fatal for the system, so
cleanup in such case is not strictly necessary.  However the code might
be refactored or reused, so better not to rely on such assumption that
system won't continue init failure.

Unmap the IO memory and put the clock on initialization failures from
devicetree.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210506202729.157260-3-krzysztof.kozlowski@canonical.com
---
 drivers/clocksource/samsung_pwm_timer.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index bfad61b509f93..55e2f9fa2a15e 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -421,7 +421,7 @@ static int __init samsung_pwm_alloc(struct device_node *np,
 	struct property *prop;
 	const __be32 *cur;
 	u32 val;
-	int i;
+	int i, ret;
 
 	memcpy(&pwm.variant, variant, sizeof(pwm.variant));
 	for (i = 0; i < SAMSUNG_PWM_NUM; ++i)
@@ -444,10 +444,24 @@ static int __init samsung_pwm_alloc(struct device_node *np,
 	pwm.timerclk = of_clk_get_by_name(np, "timers");
 	if (IS_ERR(pwm.timerclk)) {
 		pr_crit("failed to get timers clock for timer\n");
-		return PTR_ERR(pwm.timerclk);
+		ret = PTR_ERR(pwm.timerclk);
+		goto err_clk;
 	}
 
-	return _samsung_pwm_clocksource_init();
+	ret = _samsung_pwm_clocksource_init();
+	if (ret)
+		goto err_clocksource;
+
+	return 0;
+
+err_clocksource:
+	clk_put(pwm.timerclk);
+	pwm.timerclk = NULL;
+err_clk:
+	iounmap(pwm.base);
+	pwm.base = NULL;
+
+	return ret;
 }
 
 static const struct samsung_pwm_variant s3c24xx_variant = {
-- 
GitLab


From b4318ce203db8f8b7004e7ab82a957f894660c88 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Thu, 6 May 2021 16:27:28 -0400
Subject: [PATCH 2235/3804] clocksource/drivers/samsung_pwm: Constify source IO
 memory

The 'source_reg' IO memory is only read, so the pointer can point to
const for safety.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210506202729.157260-4-krzysztof.kozlowski@canonical.com
---
 drivers/clocksource/samsung_pwm_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/samsung_pwm_timer.c b/drivers/clocksource/samsung_pwm_timer.c
index 55e2f9fa2a15e..6e46781bc9acf 100644
--- a/drivers/clocksource/samsung_pwm_timer.c
+++ b/drivers/clocksource/samsung_pwm_timer.c
@@ -61,7 +61,7 @@ EXPORT_SYMBOL(samsung_pwm_lock);
 
 struct samsung_pwm_clocksource {
 	void __iomem *base;
-	void __iomem *source_reg;
+	const void __iomem *source_reg;
 	unsigned int irq[SAMSUNG_PWM_NUM];
 	struct samsung_pwm_variant variant;
 
-- 
GitLab


From 9257bd80b917cc7908abd27ed5a5211964563f62 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Thu, 27 May 2021 20:10:29 +0800
Subject: [PATCH 2236/3804] dt-bindings: connector: Replace BIT macro with
 generic bit ops

BIT macro is not defined. Replace it with generic bit operations.

Fixes: 630dce2810b9 ("dt-bindings: connector: Add SVDM VDO properties")
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210527121029.583611-1-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/dt-bindings/usb/pd.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/dt-bindings/usb/pd.h b/include/dt-bindings/usb/pd.h
index fef3ef65967fa..cb70b4ceeddef 100644
--- a/include/dt-bindings/usb/pd.h
+++ b/include/dt-bindings/usb/pd.h
@@ -163,10 +163,10 @@
 #define UFP_VDO_VER1_2		2
 
 /* Device Capability */
-#define DEV_USB2_CAPABLE	BIT(0)
-#define DEV_USB2_BILLBOARD	BIT(1)
-#define DEV_USB3_CAPABLE	BIT(2)
-#define DEV_USB4_CAPABLE	BIT(3)
+#define DEV_USB2_CAPABLE	(1 << 0)
+#define DEV_USB2_BILLBOARD	(1 << 1)
+#define DEV_USB3_CAPABLE	(1 << 2)
+#define DEV_USB4_CAPABLE	(1 << 3)
 
 /* Connector Type */
 #define UFP_RECEPTACLE		2
@@ -191,9 +191,9 @@
 
 /* Alternate Modes */
 #define UFP_ALTMODE_NOT_SUPP	0
-#define UFP_ALTMODE_TBT3	BIT(0)
-#define UFP_ALTMODE_RECFG	BIT(1)
-#define UFP_ALTMODE_NO_RECFG	BIT(2)
+#define UFP_ALTMODE_TBT3	(1 << 0)
+#define UFP_ALTMODE_RECFG	(1 << 1)
+#define UFP_ALTMODE_NO_RECFG	(1 << 2)
 
 /* USB Highest Speed */
 #define UFP_USB2_ONLY		0
@@ -217,9 +217,9 @@
  * <4:0>   :: Port number
  */
 #define DFP_VDO_VER1_1		1
-#define HOST_USB2_CAPABLE	BIT(0)
-#define HOST_USB3_CAPABLE	BIT(1)
-#define HOST_USB4_CAPABLE	BIT(2)
+#define HOST_USB2_CAPABLE	(1 << 0)
+#define HOST_USB3_CAPABLE	(1 << 1)
+#define HOST_USB4_CAPABLE	(1 << 2)
 #define DFP_RECEPTACLE		2
 #define DFP_CAPTIVE		3
 
-- 
GitLab


From 8f11fe7e40683f8986aff8f1a46361ceca8f42ec Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Thu, 3 Jun 2021 16:17:42 +0100
Subject: [PATCH 2237/3804] Revert "usb: dwc3: core: Add shutdown callback for
 dwc3"

This reverts commit 568262bf5492a9bb2fcc4c204b8d38fd6be64e28.

The commit causes the following panic when shutting down a rockpro64-v2
board:

[..]
[   41.684569] xhci-hcd xhci-hcd.2.auto: USB bus 1 deregistered
[   41.686301] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a0
[   41.687096] Mem abort info:
[   41.687345]   ESR = 0x96000004
[   41.687615]   EC = 0x25: DABT (current EL), IL = 32 bits
[   41.688082]   SET = 0, FnV = 0
[   41.688352]   EA = 0, S1PTW = 0
[   41.688628] Data abort info:
[   41.688882]   ISV = 0, ISS = 0x00000004
[   41.689219]   CM = 0, WnR = 0
[   41.689481] user pgtable: 4k pages, 48-bit VAs, pgdp=00000000073b2000
[   41.690046] [00000000000000a0] pgd=0000000000000000, p4d=0000000000000000
[   41.690654] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[   41.691143] Modules linked in:
[   41.691416] CPU: 5 PID: 1 Comm: shutdown Not tainted 5.13.0-rc4 #43
[   41.691966] Hardware name: Pine64 RockPro64 v2.0 (DT)
[   41.692409] pstate: 60000005 (nZCv daif -PAN -UAO -TCO BTYPE=--)
[   41.692937] pc : down_read_interruptible+0xec/0x200
[   41.693373] lr : simple_recursive_removal+0x48/0x280
[   41.693815] sp : ffff800011fab910
[   41.694107] x29: ffff800011fab910 x28: ffff0000008fe480 x27: ffff0000008fe4d8
[   41.694736] x26: ffff800011529a90 x25: 00000000000000a0 x24: ffff800011edd030
[   41.695364] x23: 0000000000000080 x22: 0000000000000000 x21: ffff800011f23994
[   41.695992] x20: ffff800011f23998 x19: ffff0000008fe480 x18: ffffffffffffffff
[   41.696620] x17: 000c0400bb44ffff x16: 0000000000000009 x15: ffff800091faba3d
[   41.697248] x14: 0000000000000004 x13: 0000000000000000 x12: 0000000000000020
[   41.697875] x11: 0101010101010101 x10: 7f7f7f7f7f7f7f7f x9 : 6f6c746364716e62
[   41.698502] x8 : 7f7f7f7f7f7f7f7f x7 : fefefeff6364626d x6 : 0000000000000440
[   41.699130] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 00000000000000a0
[   41.699758] x2 : 0000000000000001 x1 : 0000000000000000 x0 : 00000000000000a0
[   41.700386] Call trace:
[   41.700602]  down_read_interruptible+0xec/0x200
[   41.701003]  debugfs_remove+0x5c/0x80
[   41.701328]  dwc3_debugfs_exit+0x1c/0x6c
[   41.701676]  dwc3_remove+0x34/0x1a0
[   41.701988]  platform_remove+0x28/0x60
[   41.702322]  __device_release_driver+0x188/0x22c
[   41.702730]  device_release_driver+0x2c/0x44
[   41.703106]  bus_remove_device+0x124/0x130
[   41.703468]  device_del+0x16c/0x424
[   41.703777]  platform_device_del.part.0+0x1c/0x90
[   41.704193]  platform_device_unregister+0x28/0x44
[   41.704608]  of_platform_device_destroy+0xe8/0x100
[   41.705031]  device_for_each_child_reverse+0x64/0xb4
[   41.705470]  of_platform_depopulate+0x40/0x84
[   41.705853]  __dwc3_of_simple_teardown+0x20/0xd4
[   41.706260]  dwc3_of_simple_shutdown+0x14/0x20
[   41.706652]  platform_shutdown+0x28/0x40
[   41.706998]  device_shutdown+0x158/0x330
[   41.707344]  kernel_power_off+0x38/0x7c
[   41.707684]  __do_sys_reboot+0x16c/0x2a0
[   41.708029]  __arm64_sys_reboot+0x28/0x34
[   41.708383]  invoke_syscall+0x48/0x114
[   41.708716]  el0_svc_common.constprop.0+0x44/0xdc
[   41.709131]  do_el0_svc+0x28/0x90
[   41.709426]  el0_svc+0x2c/0x54
[   41.709698]  el0_sync_handler+0xa4/0x130
[   41.710045]  el0_sync+0x198/0x1c0
[   41.710342] Code: c8047c62 35ffff84 17fffe5f f9800071 (c85ffc60)
[   41.710881] ---[ end trace 406377df5178f75c ]---
[   41.711299] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[   41.712084] Kernel Offset: disabled
[   41.712391] CPU features: 0x10001031,20000846
[   41.712775] Memory Limit: none
[   41.713049] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---

As Felipe explained: "dwc3_shutdown() is just called dwc3_remove()
directly, then we end up calling debugfs_remove_recursive() twice."

Reverting the commit fixes the panic.

Fixes: 568262bf5492 ("usb: dwc3: core: Add shutdown callback for dwc3")
Acked-by: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Link: https://lore.kernel.org/r/20210603151742.298243-1-alexandru.elisei@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index b6e53d8212cd8..21129d357f295 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1690,11 +1690,6 @@ static int dwc3_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static void dwc3_shutdown(struct platform_device *pdev)
-{
-	dwc3_remove(pdev);
-}
-
 #ifdef CONFIG_PM
 static int dwc3_core_init_for_resume(struct dwc3 *dwc)
 {
@@ -2012,7 +2007,6 @@ MODULE_DEVICE_TABLE(acpi, dwc3_acpi_match);
 static struct platform_driver dwc3_driver = {
 	.probe		= dwc3_probe,
 	.remove		= dwc3_remove,
-	.shutdown   = dwc3_shutdown,
 	.driver		= {
 		.name	= "dwc3",
 		.of_match_table	= of_match_ptr(of_dwc3_match),
-- 
GitLab


From f41bfc7e9c7c1d721c8752f1853cde43e606ad43 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Tue, 1 Jun 2021 20:31:48 +0800
Subject: [PATCH 2238/3804] usb: typec: tcpm: Correct the responses in SVDM
 Version 2.0 DFP

In USB PD Spec Rev 3.1 Ver 1.0, section "6.12.5 Applicability of
Structured VDM Commands", DFP is allowed and recommended to respond to
Discovery Identity with ACK. And in section "6.4.4.2.5.1 Commands other
than Attention", NAK should be returned only when receiving Messages
with invalid fields, Messages in wrong situation, or unrecognize
Messages.

Still keep the original design for SVDM Version 1.0 for backward
compatibilities.

Fixes: 193a68011fdc ("staging: typec: tcpm: Respond to Discover Identity commands")
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210601123151.3441914-2-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 9ce8c9af4da58..a1bf0dc5babfe 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -1547,19 +1547,25 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev,
 			if (PD_VDO_VID(p[0]) != USB_SID_PD)
 				break;
 
-			if (PD_VDO_SVDM_VER(p[0]) < svdm_version)
+			if (PD_VDO_SVDM_VER(p[0]) < svdm_version) {
 				typec_partner_set_svdm_version(port->partner,
 							       PD_VDO_SVDM_VER(p[0]));
+				svdm_version = PD_VDO_SVDM_VER(p[0]);
+			}
 
 			tcpm_ams_start(port, DISCOVER_IDENTITY);
-			/* 6.4.4.3.1: Only respond as UFP (device) */
-			if (port->data_role == TYPEC_DEVICE &&
+			/*
+			 * PD2.0 Spec 6.10.3: respond with NAK as DFP (data host)
+			 * PD3.1 Spec 6.4.4.2.5.1: respond with NAK if "invalid field" or
+			 * "wrong configuation" or "Unrecognized"
+			 */
+			if ((port->data_role == TYPEC_DEVICE || svdm_version >= SVDM_VER_2_0) &&
 			    port->nr_snk_vdo) {
 				/*
 				 * Product Type DFP and Connector Type are not defined in SVDM
 				 * version 1.0 and shall be set to zero.
 				 */
-				if (typec_get_negotiated_svdm_version(typec) < SVDM_VER_2_0)
+				if (svdm_version < SVDM_VER_2_0)
 					response[1] = port->snk_vdo[0] & ~IDH_DFP_MASK
 						      & ~IDH_CONN_MASK;
 				else
-- 
GitLab


From 55b54c269beef13d88dc30544df34763a90dc815 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Tue, 1 Jun 2021 20:31:49 +0800
Subject: [PATCH 2239/3804] dt-bindings: connector: Add PD rev 2.0 VDO
 definition

Add the VDO definition for USB PD rev 2.0 in the bindings and define a
new property snk-vdos-v1 containing legacy VDOs as the responses to the
port partner which only supports PD rev 2.0.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210601123151.3441914-3-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bindings/connector/usb-connector.yaml     | 15 ++++
 include/dt-bindings/usb/pd.h                  | 69 ++++++++++++++++++-
 2 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/connector/usb-connector.yaml b/Documentation/devicetree/bindings/connector/usb-connector.yaml
index 32509b98142e9..92b49bc379394 100644
--- a/Documentation/devicetree/bindings/connector/usb-connector.yaml
+++ b/Documentation/devicetree/bindings/connector/usb-connector.yaml
@@ -149,6 +149,17 @@ properties:
     maxItems: 6
     $ref: /schemas/types.yaml#/definitions/uint32-array
 
+  sink-vdos-v1:
+    description: An array of u32 with each entry, a Vendor Defined Message Object (VDO),
+      providing additional information corresponding to the product, the detailed bit
+      definitions and the order of each VDO can be found in
+      "USB Power Delivery Specification Revision 2.0, Version 1.3" chapter 6.4.4.3.1 Discover
+      Identity. User can specify the VDO array via VDO_IDH/_CERT/_PRODUCT/_CABLE/_AMA defined in
+      dt-bindings/usb/pd.h.
+    minItems: 3
+    maxItems: 6
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+
   op-sink-microwatt:
     description: Sink required operating power in microwatt, if source can't
       offer the power, Capability Mismatch is set. Required for power sink and
@@ -207,6 +218,10 @@ properties:
       SNK_READY for non-pd link.
     type: boolean
 
+dependencies:
+  sink-vdos-v1: [ 'sink-vdos' ]
+  sink-vdos: [ 'sink-vdos-v1' ]
+
 required:
   - compatible
 
diff --git a/include/dt-bindings/usb/pd.h b/include/dt-bindings/usb/pd.h
index cb70b4ceeddef..e6526b1381745 100644
--- a/include/dt-bindings/usb/pd.h
+++ b/include/dt-bindings/usb/pd.h
@@ -106,6 +106,10 @@
  * <20:16>  :: Reserved, Shall be set to zero
  * <15:0>   :: USB-IF assigned VID for this cable vendor
  */
+
+/* PD Rev2.0 definition */
+#define IDH_PTYPE_UNDEF		0
+
 /* SOP Product Type (UFP) */
 #define IDH_PTYPE_NOT_UFP       0
 #define IDH_PTYPE_HUB           1
@@ -228,7 +232,25 @@
 	 | ((pnum) & 0x1f))
 
 /*
- * Passive Cable VDO
+ * Cable VDO (for both Passive and Active Cable VDO in PD Rev2.0)
+ * ---------
+ * <31:28> :: Cable HW version
+ * <27:24> :: Cable FW version
+ * <23:20> :: Reserved, Shall be set to zero
+ * <19:18> :: type-C to Type-A/B/C/Captive (00b == A, 01 == B, 10 == C, 11 == Captive)
+ * <17>    :: Reserved, Shall be set to zero
+ * <16:13> :: cable latency (0001 == <10ns(~1m length))
+ * <12:11> :: cable termination type (11b == both ends active VCONN req)
+ * <10>    :: SSTX1 Directionality support (0b == fixed, 1b == cfgable)
+ * <9>     :: SSTX2 Directionality support
+ * <8>     :: SSRX1 Directionality support
+ * <7>     :: SSRX2 Directionality support
+ * <6:5>   :: Vbus current handling capability (01b == 3A, 10b == 5A)
+ * <4>     :: Vbus through cable (0b == no, 1b == yes)
+ * <3>     :: SOP" controller present? (0b == no, 1b == yes)
+ * <2:0>   :: USB SS Signaling support
+ *
+ * Passive Cable VDO (PD Rev3.0+)
  * ---------
  * <31:28> :: Cable HW version
  * <27:24> :: Cable FW version
@@ -244,7 +266,7 @@
  * <4:3>   :: Reserved, Shall be set to zero
  * <2:0>   :: USB highest speed
  *
- * Active Cable VDO 1
+ * Active Cable VDO 1 (PD Rev3.0+)
  * ---------
  * <31:28> :: Cable HW version
  * <27:24> :: Cable FW version
@@ -266,7 +288,9 @@
 #define CABLE_VDO_VER1_0	0
 #define CABLE_VDO_VER1_3	3
 
-/* Connector Type */
+/* Connector Type (_ATYPE and _BTYPE are for PD Rev2.0 only) */
+#define CABLE_ATYPE		0
+#define CABLE_BTYPE		1
 #define CABLE_CTYPE		2
 #define CABLE_CAPTIVE		3
 
@@ -303,12 +327,22 @@
 #define CABLE_CURR_3A		1
 #define CABLE_CURR_5A		2
 
+/* USB SuperSpeed Signaling Support (PD Rev2.0) */
+#define CABLE_USBSS_U2_ONLY	0
+#define CABLE_USBSS_U31_GEN1	1
+#define CABLE_USBSS_U31_GEN2	2
+
 /* USB Highest Speed */
 #define CABLE_USB2_ONLY		0
 #define CABLE_USB32_GEN1	1
 #define CABLE_USB32_4_GEN2	2
 #define CABLE_USB4_GEN3		3
 
+#define VDO_CABLE(hw, fw, cbl, lat, term, tx1d, tx2d, rx1d, rx2d, cur, vps, sopp, usbss) \
+	(((hw) & 0x7) << 28 | ((fw) & 0x7) << 24 | ((cbl) & 0x3) << 18		\
+	 | ((lat) & 0x7) << 13 | ((term) & 0x3) << 11 | (tx1d) << 10		\
+	 | (tx2d) << 9 | (rx1d) << 8 | (rx2d) << 7 | ((cur) & 0x3) << 5		\
+	 | (vps) << 4 | (sopp) << 3 | ((usbss) & 0x7))
 #define VDO_PCABLE(hw, fw, ver, conn, lat, term, vbm, cur, spd)			\
 	(((hw) & 0xf) << 28 | ((fw) & 0xf) << 24 | ((ver) & 0x7) << 21		\
 	 | ((conn) & 0x3) << 18 | ((lat) & 0xf) << 13 | ((term) & 0x3) << 11	\
@@ -373,6 +407,35 @@
 	 | ((hops) & 0x3) << 6 | (u2) << 5 | (u32) << 4 | (lane) << 3		\
 	 | (iso) << 2 | (gen))
 
+/*
+ * AMA VDO (PD Rev2.0)
+ * ---------
+ * <31:28> :: Cable HW version
+ * <27:24> :: Cable FW version
+ * <23:12> :: Reserved, Shall be set to zero
+ * <11>    :: SSTX1 Directionality support (0b == fixed, 1b == cfgable)
+ * <10>    :: SSTX2 Directionality support
+ * <9>     :: SSRX1 Directionality support
+ * <8>     :: SSRX2 Directionality support
+ * <7:5>   :: Vconn power
+ * <4>     :: Vconn power required
+ * <3>     :: Vbus power required
+ * <2:0>   :: USB SS Signaling support
+ */
+#define VDO_AMA(hw, fw, tx1d, tx2d, rx1d, rx2d, vcpwr, vcr, vbr, usbss) \
+	(((hw) & 0x7) << 28 | ((fw) & 0x7) << 24			\
+	 | (tx1d) << 11 | (tx2d) << 10 | (rx1d) << 9 | (rx2d) << 8	\
+	 | ((vcpwr) & 0x7) << 5 | (vcr) << 4 | (vbr) << 3		\
+	 | ((usbss) & 0x7))
+
+#define PD_VDO_AMA_VCONN_REQ(vdo)	(((vdo) >> 4) & 1)
+#define PD_VDO_AMA_VBUS_REQ(vdo)	(((vdo) >> 3) & 1)
+
+#define AMA_USBSS_U2_ONLY	0
+#define AMA_USBSS_U31_GEN1	1
+#define AMA_USBSS_U31_GEN2	2
+#define AMA_USBSS_BBONLY	3
+
 /*
  * VPD VDO
  * ---------
-- 
GitLab


From 7ac505103572548fd8a50a49b2c22e1588901731 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Tue, 1 Jun 2021 20:31:50 +0800
Subject: [PATCH 2240/3804] usb: typec: tcpm: Introduce snk_vdo_v1 for SVDM
 version 1.0

The ID Header VDO and Product VDOs defined in USB PD Spec rev 2.0 and
rev 3.1 are quite different. Add an additional array snk_vdo_v1 and
send it as the response to the port partner if it only supports SVDM
version 1.0.

Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210601123151.3441914-4-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 40 ++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index a1bf0dc5babfe..7b4345a6fe971 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -401,6 +401,8 @@ struct tcpm_port {
 	unsigned int nr_src_pdo;
 	u32 snk_pdo[PDO_MAX_OBJECTS];
 	unsigned int nr_snk_pdo;
+	u32 snk_vdo_v1[VDO_MAX_OBJECTS];
+	unsigned int nr_snk_vdo_v1;
 	u32 snk_vdo[VDO_MAX_OBJECTS];
 	unsigned int nr_snk_vdo;
 
@@ -1561,18 +1563,16 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev,
 			 */
 			if ((port->data_role == TYPEC_DEVICE || svdm_version >= SVDM_VER_2_0) &&
 			    port->nr_snk_vdo) {
-				/*
-				 * Product Type DFP and Connector Type are not defined in SVDM
-				 * version 1.0 and shall be set to zero.
-				 */
-				if (svdm_version < SVDM_VER_2_0)
-					response[1] = port->snk_vdo[0] & ~IDH_DFP_MASK
-						      & ~IDH_CONN_MASK;
-				else
-					response[1] = port->snk_vdo[0];
-				for (i = 1; i <  port->nr_snk_vdo; i++)
-					response[i + 1] = port->snk_vdo[i];
-				rlen = port->nr_snk_vdo + 1;
+				if (svdm_version < SVDM_VER_2_0) {
+					for (i = 0; i < port->nr_snk_vdo_v1; i++)
+						response[i + 1] = port->snk_vdo_v1[i];
+					rlen = port->nr_snk_vdo_v1 + 1;
+
+				} else {
+					for (i = 0; i < port->nr_snk_vdo; i++)
+						response[i + 1] = port->snk_vdo[i];
+					rlen = port->nr_snk_vdo + 1;
+				}
 			}
 			break;
 		case CMD_DISCOVER_SVID:
@@ -5953,6 +5953,22 @@ sink:
 			return ret;
 	}
 
+	/* If sink-vdos is found, sink-vdos-v1 is expected for backward compatibility. */
+	if (port->nr_snk_vdo) {
+		ret = fwnode_property_count_u32(fwnode, "sink-vdos-v1");
+		if (ret < 0)
+			return ret;
+		else if (ret == 0)
+			return -ENODATA;
+
+		port->nr_snk_vdo_v1 = min(ret, VDO_MAX_OBJECTS);
+		ret = fwnode_property_read_u32_array(fwnode, "sink-vdos-v1",
+						     port->snk_vdo_v1,
+						     port->nr_snk_vdo_v1);
+		if (ret < 0)
+			return ret;
+	}
+
 	return 0;
 }
 
-- 
GitLab


From 80137c18737c30d20ee630e442405236d96898a7 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Tue, 1 Jun 2021 20:31:51 +0800
Subject: [PATCH 2241/3804] usb: typec: tcpm: Fix misuses of AMS invocation

tcpm_ams_start is used to initiate an AMS as well as checking Collision
Avoidance conditions but not for flagging passive AMS (initiated by the
port partner). Fix the misuses of tcpm_ams_start in tcpm_pd_svdm.

ATTENTION doesn't need responses so the AMS flag is not needed here.

Fixes: 0bc3ee92880d ("usb: typec: tcpm: Properly interrupt VDM AMS")
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210601123151.3441914-5-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 7b4345a6fe971..6161a0c1dc0e6 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -1555,7 +1555,7 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev,
 				svdm_version = PD_VDO_SVDM_VER(p[0]);
 			}
 
-			tcpm_ams_start(port, DISCOVER_IDENTITY);
+			port->ams = DISCOVER_IDENTITY;
 			/*
 			 * PD2.0 Spec 6.10.3: respond with NAK as DFP (data host)
 			 * PD3.1 Spec 6.4.4.2.5.1: respond with NAK if "invalid field" or
@@ -1576,19 +1576,18 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev,
 			}
 			break;
 		case CMD_DISCOVER_SVID:
-			tcpm_ams_start(port, DISCOVER_SVIDS);
+			port->ams = DISCOVER_SVIDS;
 			break;
 		case CMD_DISCOVER_MODES:
-			tcpm_ams_start(port, DISCOVER_MODES);
+			port->ams = DISCOVER_MODES;
 			break;
 		case CMD_ENTER_MODE:
-			tcpm_ams_start(port, DFP_TO_UFP_ENTER_MODE);
+			port->ams = DFP_TO_UFP_ENTER_MODE;
 			break;
 		case CMD_EXIT_MODE:
-			tcpm_ams_start(port, DFP_TO_UFP_EXIT_MODE);
+			port->ams = DFP_TO_UFP_EXIT_MODE;
 			break;
 		case CMD_ATTENTION:
-			tcpm_ams_start(port, ATTENTION);
 			/* Attention command does not have response */
 			*adev_action = ADEV_ATTENTION;
 			return 0;
-- 
GitLab


From 1d0d3d818eafe1963ec1eaf302175cd14938188e Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 21 May 2021 18:55:50 +0200
Subject: [PATCH 2242/3804] usb: dwc3: meson-g12a: Disable the regulator in the
 error handling path of the probe

If an error occurs after a successful 'regulator_enable()' call,
'regulator_disable()' must be called.

Fix the error handling path of the probe accordingly.

The remove function doesn't need to be fixed, because the
'regulator_disable()' call is already hidden in 'dwc3_meson_g12a_suspend()'
which is called via 'pm_runtime_set_suspended()' in the remove function.

Fixes: c99993376f72 ("usb: dwc3: Add Amlogic G12A DWC3 glue")
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/79df054046224bbb0716a8c5c2082650290eec86.1621616013.git.christophe.jaillet@wanadoo.fr
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-meson-g12a.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c
index bdf1f98dfad8c..8049575251306 100644
--- a/drivers/usb/dwc3/dwc3-meson-g12a.c
+++ b/drivers/usb/dwc3/dwc3-meson-g12a.c
@@ -772,13 +772,13 @@ static int dwc3_meson_g12a_probe(struct platform_device *pdev)
 
 	ret = priv->drvdata->usb_init(priv);
 	if (ret)
-		goto err_disable_clks;
+		goto err_disable_regulator;
 
 	/* Init PHYs */
 	for (i = 0 ; i < PHY_COUNT ; ++i) {
 		ret = phy_init(priv->phys[i]);
 		if (ret)
-			goto err_disable_clks;
+			goto err_disable_regulator;
 	}
 
 	/* Set PHY Power */
@@ -816,6 +816,10 @@ err_phys_exit:
 	for (i = 0 ; i < PHY_COUNT ; ++i)
 		phy_exit(priv->phys[i]);
 
+err_disable_regulator:
+	if (priv->vbus)
+		regulator_disable(priv->vbus);
+
 err_disable_clks:
 	clk_bulk_disable_unprepare(priv->drvdata->num_clks,
 				   priv->drvdata->clks);
-- 
GitLab


From 4d2aa178d2ad2fb156711113790dde13e9aa2376 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 1 Jun 2021 10:48:30 +0200
Subject: [PATCH 2243/3804] usb: dwc3-meson-g12a: fix usb2 PHY glue init when
 phy0 is disabled

When only PHY1 is used (for example on Odroid-HC4), the regmap init code
uses the usb2 ports when doesn't initialize the PHY1 regmap entry.

This fixes:
Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020
...
pc : regmap_update_bits_base+0x40/0xa0
lr : dwc3_meson_g12a_usb2_init_phy+0x4c/0xf8
...
Call trace:
regmap_update_bits_base+0x40/0xa0
dwc3_meson_g12a_usb2_init_phy+0x4c/0xf8
dwc3_meson_g12a_usb2_init+0x7c/0xc8
dwc3_meson_g12a_usb_init+0x28/0x48
dwc3_meson_g12a_probe+0x298/0x540
platform_probe+0x70/0xe0
really_probe+0xf0/0x4d8
driver_probe_device+0xfc/0x168
...

Fixes: 013af227f58a97 ("usb: dwc3: meson-g12a: handle the phy and glue registers separately")
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210601084830.260196-1-narmstrong@baylibre.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-meson-g12a.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-meson-g12a.c b/drivers/usb/dwc3/dwc3-meson-g12a.c
index 8049575251306..ffe301d6ea359 100644
--- a/drivers/usb/dwc3/dwc3-meson-g12a.c
+++ b/drivers/usb/dwc3/dwc3-meson-g12a.c
@@ -651,7 +651,7 @@ static int dwc3_meson_g12a_setup_regmaps(struct dwc3_meson_g12a *priv,
 		return PTR_ERR(priv->usb_glue_regmap);
 
 	/* Create a regmap for each USB2 PHY control register set */
-	for (i = 0; i < priv->usb2_ports; i++) {
+	for (i = 0; i < priv->drvdata->num_phys; i++) {
 		struct regmap_config u2p_regmap_config = {
 			.reg_bits = 8,
 			.val_bits = 32,
@@ -659,6 +659,9 @@ static int dwc3_meson_g12a_setup_regmaps(struct dwc3_meson_g12a *priv,
 			.max_register = U2P_R1,
 		};
 
+		if (!strstr(priv->drvdata->phy_names[i], "usb2"))
+			continue;
+
 		u2p_regmap_config.name = devm_kasprintf(priv->dev, GFP_KERNEL,
 							"u2p-%d", i);
 		if (!u2p_regmap_config.name)
-- 
GitLab


From 165ea85f14831f27fc6fe3b02b35e42e50b9ed94 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 19 May 2021 17:15:53 -0400
Subject: [PATCH 2244/3804] btrfs: do not write supers if we have an fs error

Error injection testing uncovered a pretty severe problem where we could
end up committing a super that pointed to the wrong tree roots,
resulting in transid mismatch errors.

The way we commit the transaction is we update the super copy with the
current generations and bytenrs of the important roots, and then copy
that into our super_for_commit.  Then we allow transactions to continue
again, we write out the dirty pages for the transaction, and then we
write the super.  If the write out fails we'll bail and skip writing the
supers.

However since we've allowed a new transaction to start, we can have a
log attempting to sync at this point, which would be blocked on
fs_info->tree_log_mutex.  Once the commit fails we're allowed to do the
log tree commit, which uses super_for_commit, which now points at fs
tree's that were not written out.

Fix this by checking BTRFS_FS_STATE_ERROR once we acquire the
tree_log_mutex.  This way if the transaction commit fails we're sure to
see this bit set and we can skip writing the super out.  This patch
fixes this specific transid mismatch error I was seeing with this
particular error path.

CC: stable@vger.kernel.org # 5.12+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e4820e88cba03..324a9a078b6ae 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3302,6 +3302,22 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	 *    begins and releases it only after writing its superblock.
 	 */
 	mutex_lock(&fs_info->tree_log_mutex);
+
+	/*
+	 * The previous transaction writeout phase could have failed, and thus
+	 * marked the fs in an error state.  We must not commit here, as we
+	 * could have updated our generation in the super_for_commit and
+	 * writing the super here would result in transid mismatches.  If there
+	 * is an error here just bail.
+	 */
+	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+		ret = -EIO;
+		btrfs_set_log_full_commit(trans);
+		btrfs_abort_transaction(trans, ret);
+		mutex_unlock(&fs_info->tree_log_mutex);
+		goto out_wake_log_root;
+	}
+
 	btrfs_set_super_log_root(fs_info->super_for_commit, log_root_start);
 	btrfs_set_super_log_root_level(fs_info->super_for_commit, log_root_level);
 	ret = write_all_supers(fs_info, 1);
-- 
GitLab


From 5b434df8778771d181bc19fb4593bca114d1c4eb Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Thu, 27 May 2021 15:27:32 +0900
Subject: [PATCH 2245/3804] btrfs: zoned: fix zone number to sector/physical
 calculation

In btrfs_get_dev_zone_info(), we have "u32 sb_zone" and calculate "sector_t
sector" by shifting it. But, this "sector" is calculated in 32bit, leading
it to be 0 for the 2nd superblock copy.

Since zone number is u32, shifting it to sector (sector_t) or physical
address (u64) can easily trigger a missing cast bug like this.

This commit introduces helpers to convert zone number to sector/LBA, so we
won't fall into the same pitfall again.

Reported-by: Dmitry Fomichev <Dmitry.Fomichev@wdc.com>
Fixes: 12659251ca5d ("btrfs: implement log-structured superblock for ZONED mode")
CC: stable@vger.kernel.org # 5.11+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 1bb8ee97aae09..f1f3b10d1dbbe 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -150,6 +150,18 @@ static inline u32 sb_zone_number(int shift, int mirror)
 	return (u32)zone;
 }
 
+static inline sector_t zone_start_sector(u32 zone_number,
+					 struct block_device *bdev)
+{
+	return (sector_t)zone_number << ilog2(bdev_zone_sectors(bdev));
+}
+
+static inline u64 zone_start_physical(u32 zone_number,
+				      struct btrfs_zoned_device_info *zone_info)
+{
+	return (u64)zone_number << zone_info->zone_size_shift;
+}
+
 /*
  * Emulate blkdev_report_zones() for a non-zoned device. It slices up the block
  * device into static sized chunks and fake a conventional zone on each of
@@ -405,8 +417,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
 		if (sb_zone + 1 >= zone_info->nr_zones)
 			continue;
 
-		sector = sb_zone << (zone_info->zone_size_shift - SECTOR_SHIFT);
-		ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT,
+		ret = btrfs_get_dev_zones(device,
+					  zone_start_physical(sb_zone, zone_info),
 					  &zone_info->sb_zones[sb_pos],
 					  &nr_zones);
 		if (ret)
@@ -721,7 +733,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
 	if (sb_zone + 1 >= nr_zones)
 		return -ENOENT;
 
-	ret = blkdev_report_zones(bdev, sb_zone << zone_sectors_shift,
+	ret = blkdev_report_zones(bdev, zone_start_sector(sb_zone, bdev),
 				  BTRFS_NR_SB_LOG_ZONES, copy_zone_info_cb,
 				  zones);
 	if (ret < 0)
@@ -826,7 +838,7 @@ int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
 		return -ENOENT;
 
 	return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
-				sb_zone << zone_sectors_shift,
+				zone_start_sector(sb_zone, bdev),
 				zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS);
 }
 
@@ -878,7 +890,8 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
 			if (!(end <= sb_zone ||
 			      sb_zone + BTRFS_NR_SB_LOG_ZONES <= begin)) {
 				have_sb = true;
-				pos = ((u64)sb_zone + BTRFS_NR_SB_LOG_ZONES) << shift;
+				pos = zone_start_physical(
+					sb_zone + BTRFS_NR_SB_LOG_ZONES, zinfo);
 				break;
 			}
 
-- 
GitLab


From e7b2ec3d3d4ebeb4cff7ae45cf430182fa6a49fb Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Sun, 30 May 2021 20:24:05 +0530
Subject: [PATCH 2246/3804] btrfs: return value from
 btrfs_mark_extent_written() in case of error

We always return 0 even in case of an error in btrfs_mark_extent_written().
Fix it to return proper error value in case of a failure. All callers
handle it.

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3b10d98b4ebb3..55f68422061d1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1094,7 +1094,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 	int del_nr = 0;
 	int del_slot = 0;
 	int recow;
-	int ret;
+	int ret = 0;
 	u64 ino = btrfs_ino(inode);
 
 	path = btrfs_alloc_path();
@@ -1315,7 +1315,7 @@ again:
 	}
 out:
 	btrfs_free_path(path);
-	return 0;
+	return ret;
 }
 
 /*
-- 
GitLab


From aefd7f7065567a4666f42c0fc8cdb379d2e036bf Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 31 May 2021 12:26:01 +0300
Subject: [PATCH 2247/3804] btrfs: promote debugging asserts to full-fledged
 checks in validate_super

Syzbot managed to trigger this assert while performing its fuzzing.
Turns out it's better to have those asserts turned into full-fledged
checks so that in case buggy btrfs images are mounted the users gets
an error and mounting is stopped. Alternatively with CONFIG_BTRFS_ASSERT
disabled such image would have been erroneously allowed to be mounted.

Reported-by: syzbot+a6bf271c02e4fe66b4e4@syzkaller.appspotmail.com
CC: stable@vger.kernel.org # 5.4+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ add uuids to the messages ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c9a3036c23bfc..8d386a5587ee9 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2648,6 +2648,24 @@ static int validate_super(struct btrfs_fs_info *fs_info,
 		ret = -EINVAL;
 	}
 
+	if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
+		   BTRFS_FSID_SIZE)) {
+		btrfs_err(fs_info,
+		"superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
+			fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
+		ret = -EINVAL;
+	}
+
+	if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
+	    memcmp(fs_info->fs_devices->metadata_uuid,
+		   fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
+		btrfs_err(fs_info,
+"superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
+			fs_info->super_copy->metadata_uuid,
+			fs_info->fs_devices->metadata_uuid);
+		ret = -EINVAL;
+	}
+
 	if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
 		   BTRFS_FSID_SIZE) != 0) {
 		btrfs_err(fs_info,
@@ -3279,14 +3297,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 
 	disk_super = fs_info->super_copy;
 
-	ASSERT(!memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
-		       BTRFS_FSID_SIZE));
-
-	if (btrfs_fs_incompat(fs_info, METADATA_UUID)) {
-		ASSERT(!memcmp(fs_info->fs_devices->metadata_uuid,
-				fs_info->super_copy->metadata_uuid,
-				BTRFS_FSID_SIZE));
-	}
 
 	features = btrfs_super_flags(disk_super);
 	if (features & BTRFS_SUPER_FLAG_CHANGING_FSID_V2) {
-- 
GitLab


From 063933f47a7af01650af9c4fbcc5831f1c4eb7d9 Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Tue, 1 Jun 2021 00:49:28 +0800
Subject: [PATCH 2248/3804] usb: typec: tcpm: Properly handle Alert and Status
 Messages

When receiving Alert Message, if it is not unexpected but is
unsupported for some reason, the port should return Not_Supported
Message response.

Also, according to PD3.0 Spec 6.5.2.1.4 Event Flags Field, the
OTP/OVP/OCP flags in the Event Flags field in Status Message no longer
require Get_PPS_Status Message to clear them. Thus remove it when
receiving Status Message with those flags being set.

In addition, add the missing AMS operations for Status Message.

Fixes: 64f7c494a3c0 ("typec: tcpm: Add support for sink PPS related messages")
Fixes: 0908c5aca31e ("usb: typec: tcpm: AMS and Collision Avoidance")
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210531164928.2368606-1-kyletso@google.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c  | 52 ++++++++++++++++++----------------
 include/linux/usb/pd_ext_sdb.h |  4 ---
 2 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 6161a0c1dc0e6..938a1afd43ecb 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -2188,20 +2188,25 @@ static void tcpm_handle_alert(struct tcpm_port *port, const __le32 *payload,
 
 	if (!type) {
 		tcpm_log(port, "Alert message received with no type");
+		tcpm_queue_message(port, PD_MSG_CTRL_NOT_SUPP);
 		return;
 	}
 
 	/* Just handling non-battery alerts for now */
 	if (!(type & USB_PD_ADO_TYPE_BATT_STATUS_CHANGE)) {
-		switch (port->state) {
-		case SRC_READY:
-		case SNK_READY:
+		if (port->pwr_role == TYPEC_SOURCE) {
+			port->upcoming_state = GET_STATUS_SEND;
+			tcpm_ams_start(port, GETTING_SOURCE_SINK_STATUS);
+		} else {
+			/*
+			 * Do not check SinkTxOk here in case the Source doesn't set its Rp to
+			 * SinkTxOk in time.
+			 */
+			port->ams = GETTING_SOURCE_SINK_STATUS;
 			tcpm_set_state(port, GET_STATUS_SEND, 0);
-			break;
-		default:
-			tcpm_queue_message(port, PD_MSG_CTRL_WAIT);
-			break;
 		}
+	} else {
+		tcpm_queue_message(port, PD_MSG_CTRL_NOT_SUPP);
 	}
 }
 
@@ -2445,7 +2450,12 @@ static void tcpm_pd_data_request(struct tcpm_port *port,
 		tcpm_pd_handle_state(port, BIST_RX, BIST, 0);
 		break;
 	case PD_DATA_ALERT:
-		tcpm_handle_alert(port, msg->payload, cnt);
+		if (port->state != SRC_READY && port->state != SNK_READY)
+			tcpm_pd_handle_state(port, port->pwr_role == TYPEC_SOURCE ?
+					     SRC_SOFT_RESET_WAIT_SNK_TX : SNK_SOFT_RESET,
+					     NONE_AMS, 0);
+		else
+			tcpm_handle_alert(port, msg->payload, cnt);
 		break;
 	case PD_DATA_BATT_STATUS:
 	case PD_DATA_GET_COUNTRY_INFO:
@@ -2769,24 +2779,16 @@ static void tcpm_pd_ext_msg_request(struct tcpm_port *port,
 
 	switch (type) {
 	case PD_EXT_STATUS:
-		/*
-		 * If PPS related events raised then get PPS status to clear
-		 * (see USB PD 3.0 Spec, 6.5.2.4)
-		 */
-		if (msg->ext_msg.data[USB_PD_EXT_SDB_EVENT_FLAGS] &
-		    USB_PD_EXT_SDB_PPS_EVENTS)
-			tcpm_pd_handle_state(port, GET_PPS_STATUS_SEND,
-					     GETTING_SOURCE_SINK_STATUS, 0);
-
-		else
-			tcpm_pd_handle_state(port, ready_state(port), NONE_AMS, 0);
-		break;
 	case PD_EXT_PPS_STATUS:
-		/*
-		 * For now the PPS status message is used to clear events
-		 * and nothing more.
-		 */
-		tcpm_pd_handle_state(port, ready_state(port), NONE_AMS, 0);
+		if (port->ams == GETTING_SOURCE_SINK_STATUS) {
+			tcpm_ams_finish(port);
+			tcpm_set_state(port, ready_state(port), 0);
+		} else {
+			/* unexpected Status or PPS_Status Message */
+			tcpm_pd_handle_state(port, port->pwr_role == TYPEC_SOURCE ?
+					     SRC_SOFT_RESET_WAIT_SNK_TX : SNK_SOFT_RESET,
+					     NONE_AMS, 0);
+		}
 		break;
 	case PD_EXT_SOURCE_CAP_EXT:
 	case PD_EXT_GET_BATT_CAP:
diff --git a/include/linux/usb/pd_ext_sdb.h b/include/linux/usb/pd_ext_sdb.h
index 0eb83ce195970..b517ebc8f0ff2 100644
--- a/include/linux/usb/pd_ext_sdb.h
+++ b/include/linux/usb/pd_ext_sdb.h
@@ -24,8 +24,4 @@ enum usb_pd_ext_sdb_fields {
 #define USB_PD_EXT_SDB_EVENT_OVP		BIT(3)
 #define USB_PD_EXT_SDB_EVENT_CF_CV_MODE		BIT(4)
 
-#define USB_PD_EXT_SDB_PPS_EVENTS	(USB_PD_EXT_SDB_EVENT_OCP |	\
-					 USB_PD_EXT_SDB_EVENT_OTP |	\
-					 USB_PD_EXT_SDB_EVENT_OVP)
-
 #endif /* __LINUX_USB_PD_EXT_SDB_H */
-- 
GitLab


From 3a13ff7ef4349d70d1d18378d661117dd5af8efe Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 2 Jun 2021 17:57:07 +0800
Subject: [PATCH 2249/3804] usb: typec: tcpm: cancel vdm and state machine
 hrtimer when unregister tcpm port

A pending hrtimer may expire after the kthread_worker of tcpm port
is destroyed, see below kernel dump when do module unload, fix it
by cancel the 2 hrtimers.

[  111.517018] Unable to handle kernel paging request at virtual address ffff8000118cb880
[  111.518786] blk_update_request: I/O error, dev sda, sector 60061185 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0
[  111.526594] Mem abort info:
[  111.526597]   ESR = 0x96000047
[  111.526600]   EC = 0x25: DABT (current EL), IL = 32 bits
[  111.526604]   SET = 0, FnV = 0
[  111.526607]   EA = 0, S1PTW = 0
[  111.526610] Data abort info:
[  111.526612]   ISV = 0, ISS = 0x00000047
[  111.526615]   CM = 0, WnR = 1
[  111.526619] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000041d75000
[  111.526623] [ffff8000118cb880] pgd=10000001bffff003, p4d=10000001bffff003, pud=10000001bfffe003, pmd=10000001bfffa003, pte=0000000000000000
[  111.526642] Internal error: Oops: 96000047 [#1] PREEMPT SMP
[  111.526647] Modules linked in: dwc3_imx8mp dwc3 phy_fsl_imx8mq_usb [last unloaded: tcpci]
[  111.526663] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.13.0-rc4-00927-gebbe9dbd802c-dirty #36
[  111.526670] Hardware name: NXP i.MX8MPlus EVK board (DT)
[  111.526674] pstate: 800000c5 (Nzcv daIF -PAN -UAO -TCO BTYPE=--)
[  111.526681] pc : queued_spin_lock_slowpath+0x1a0/0x390
[  111.526695] lr : _raw_spin_lock_irqsave+0x88/0xb4
[  111.526703] sp : ffff800010003e20
[  111.526706] x29: ffff800010003e20 x28: ffff00017f380180
[  111.537156] buffer_io_error: 6 callbacks suppressed
[  111.537162] Buffer I/O error on dev sda1, logical block 60040704, async page read
[  111.539932]  x27: ffff00017f3801c0
[  111.539938] x26: ffff800010ba2490 x25: 0000000000000000 x24: 0000000000000001
[  111.543025] blk_update_request: I/O error, dev sda, sector 60061186 op 0x0:(READ) flags 0x0 phys_seg 7 prio class 0
[  111.548304]
[  111.548306] x23: 00000000000000c0 x22: ffff0000c2a9f184 x21: ffff00017f380180
[  111.551374] Buffer I/O error on dev sda1, logical block 60040705, async page read
[  111.554499]
[  111.554503] x20: ffff0000c5f14210 x19: 00000000000000c0 x18: 0000000000000000
[  111.557391] Buffer I/O error on dev sda1, logical block 60040706, async page read
[  111.561218]
[  111.561222] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
[  111.564205] Buffer I/O error on dev sda1, logical block 60040707, async page read
[  111.570887] x14: 00000000000000f5 x13: 0000000000000001 x12: 0000000000000040
[  111.570902] x11: ffff0000c05ac6d8
[  111.583420] Buffer I/O error on dev sda1, logical block 60040708, async page read
[  111.588978]  x10: 0000000000000000 x9 : 0000000000040000
[  111.588988] x8 : 0000000000000000
[  111.597173] Buffer I/O error on dev sda1, logical block 60040709, async page read
[  111.605766]  x7 : ffff00017f384880 x6 : ffff8000118cb880
[  111.605777] x5 : ffff00017f384880
[  111.611094] Buffer I/O error on dev sda1, logical block 60040710, async page read
[  111.617086]  x4 : 0000000000000000 x3 : ffff0000c2a9f184
[  111.617096] x2 : ffff8000118cb880
[  111.622242] Buffer I/O error on dev sda1, logical block 60040711, async page read
[  111.626927]  x1 : ffff8000118cb880 x0 : ffff00017f384888
[  111.626938] Call trace:
[  111.626942]  queued_spin_lock_slowpath+0x1a0/0x390
[  111.795809]  kthread_queue_work+0x30/0xc0
[  111.799828]  state_machine_timer_handler+0x20/0x30
[  111.804624]  __hrtimer_run_queues+0x140/0x1e0
[  111.808990]  hrtimer_interrupt+0xec/0x2c0
[  111.813004]  arch_timer_handler_phys+0x38/0x50
[  111.817456]  handle_percpu_devid_irq+0x88/0x150
[  111.821991]  __handle_domain_irq+0x80/0xe0
[  111.826093]  gic_handle_irq+0xc0/0x140
[  111.829848]  el1_irq+0xbc/0x154
[  111.832991]  arch_cpu_idle+0x1c/0x2c
[  111.836572]  default_idle_call+0x24/0x6c
[  111.840497]  do_idle+0x238/0x2ac
[  111.843729]  cpu_startup_entry+0x2c/0x70
[  111.847657]  rest_init+0xdc/0xec
[  111.850890]  arch_call_rest_init+0x14/0x20
[  111.854988]  start_kernel+0x508/0x540
[  111.858659] Code: 910020e0 8b0200c2 f861d884 aa0203e1 (f8246827)
[  111.864760] ---[ end trace 308b9a4a3dcb73ac ]---
[  111.869381] Kernel panic - not syncing: Oops: Fatal exception in interrupt
[  111.876258] SMP: stopping secondary CPUs
[  111.880185] Kernel Offset: disabled
[  111.883673] CPU features: 0x00001001,20000846
[  111.888031] Memory Limit: none
[  111.891090] ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]---

Fixes: 3ed8e1c2ac99 ("usb: typec: tcpm: Migrate workqueue to RT priority for processing events")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/1622627829-11070-1-git-send-email-jun.li@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 938a1afd43ecb..7ca452016e03a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -6335,6 +6335,9 @@ void tcpm_unregister_port(struct tcpm_port *port)
 {
 	int i;
 
+	hrtimer_cancel(&port->vdm_state_machine_timer);
+	hrtimer_cancel(&port->state_machine_timer);
+
 	tcpm_reset_port(port);
 	for (i = 0; i < ARRAY_SIZE(port->port_altmode); i++)
 		typec_unregister_altmode(port->port_altmode[i]);
-- 
GitLab


From 7ade4805e296c8d1e40c842395bbe478c7210555 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 2 Jun 2021 17:57:08 +0800
Subject: [PATCH 2250/3804] usb: typec: tcpm: cancel frs hrtimer when
 unregister tcpm port

Like the state_machine_timer, we should also cancel possible pending
frs hrtimer when unregister tcpm port.

Fixes: 8dc4bd073663 ("usb: typec: tcpm: Add support for Sink Fast Role SWAP(FRS)")
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/1622627829-11070-2-git-send-email-jun.li@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 7ca452016e03a..a1382e878127f 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -6335,6 +6335,7 @@ void tcpm_unregister_port(struct tcpm_port *port)
 {
 	int i;
 
+	hrtimer_cancel(&port->enable_frs_timer);
 	hrtimer_cancel(&port->vdm_state_machine_timer);
 	hrtimer_cancel(&port->state_machine_timer);
 
-- 
GitLab


From 024236abeba8194c23affedaaa8b1aee7b943890 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 2 Jun 2021 17:57:09 +0800
Subject: [PATCH 2251/3804] usb: typec: tcpm: cancel send discover hrtimer when
 unregister tcpm port

Like the state_machine_timer, we should also cancel possible pending
send discover identity hrtimer when unregister tcpm port.

Fixes: c34e85fa69b9 ("usb: typec: tcpm: Send DISCOVER_IDENTITY from dedicated work")
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/1622627829-11070-3-git-send-email-jun.li@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index a1382e878127f..a7c336f56849c 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -6335,6 +6335,7 @@ void tcpm_unregister_port(struct tcpm_port *port)
 {
 	int i;
 
+	hrtimer_cancel(&port->send_discover_timer);
 	hrtimer_cancel(&port->enable_frs_timer);
 	hrtimer_cancel(&port->vdm_state_machine_timer);
 	hrtimer_cancel(&port->state_machine_timer);
-- 
GitLab


From 6fc1db5e6211e30fbb1cee8d7925d79d4ed2ae14 Mon Sep 17 00:00:00 2001
From: Wesley Cheng <wcheng@codeaurora.org>
Date: Fri, 21 May 2021 17:44:21 -0700
Subject: [PATCH 2252/3804] usb: gadget: f_fs: Ensure io_completion_wq is idle
 during unbind

During unbind, ffs_func_eps_disable() will be executed, resulting in
completion callbacks for any pending USB requests.  When using AIO,
irrespective of the completion status, io_data work is queued to
io_completion_wq to evaluate and handle the completed requests.  Since
work runs asynchronously to the unbind() routine, there can be a
scenario where the work runs after the USB gadget has been fully
removed, resulting in accessing of a resource which has been already
freed. (i.e. usb_ep_free_request() accessing the USB ep structure)

Explicitly drain the io_completion_wq, instead of relying on the
destroy_workqueue() (in ffs_data_put()) to make sure no pending
completion work items are running.

Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/1621644261-1236-1-git-send-email-wcheng@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_fs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index bf109191659a5..d4844afeaffc2 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -3567,6 +3567,9 @@ static void ffs_func_unbind(struct usb_configuration *c,
 		ffs->func = NULL;
 	}
 
+	/* Drain any pending AIO completions */
+	drain_workqueue(ffs->io_completion_wq);
+
 	if (!--opts->refcnt)
 		functionfs_unbind(ffs);
 
-- 
GitLab


From 49783c6f4a4f49836b5a109ae0daf2f90b0d7713 Mon Sep 17 00:00:00 2001
From: Oder Chiou <oder_chiou@realtek.com>
Date: Fri, 4 Jun 2021 14:31:50 +0800
Subject: [PATCH 2253/3804] ASoC: rt5682: Fix the fast discharge for headset
 unplugging in soundwire mode

Based on ("5a15cd7fce20b1fd4aece6a0240e2b58cd6a225d"), the setting also
should be set in soundwire mode.

Signed-off-by: Oder Chiou <oder_chiou@realtek.com>
Link: https://lore.kernel.org/r/20210604063150.29925-1-oder_chiou@realtek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/rt5682-sdw.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c
index fed80c8f994fd..e78ba3b064c4f 100644
--- a/sound/soc/codecs/rt5682-sdw.c
+++ b/sound/soc/codecs/rt5682-sdw.c
@@ -462,7 +462,8 @@ static int rt5682_io_init(struct device *dev, struct sdw_slave *slave)
 
 	regmap_update_bits(rt5682->regmap, RT5682_CBJ_CTRL_2,
 		RT5682_EXT_JD_SRC, RT5682_EXT_JD_SRC_MANUAL);
-	regmap_write(rt5682->regmap, RT5682_CBJ_CTRL_1, 0xd042);
+	regmap_write(rt5682->regmap, RT5682_CBJ_CTRL_1, 0xd142);
+	regmap_update_bits(rt5682->regmap, RT5682_CBJ_CTRL_5, 0x0700, 0x0600);
 	regmap_update_bits(rt5682->regmap, RT5682_CBJ_CTRL_3,
 		RT5682_CBJ_IN_BUF_EN, RT5682_CBJ_IN_BUF_EN);
 	regmap_update_bits(rt5682->regmap, RT5682_SAR_IL_CMD_1,
-- 
GitLab


From 6ba53317d497dec029bfb040b1daf38328fa00ab Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 26 May 2021 22:58:51 +1000
Subject: [PATCH 2254/3804] KVM: PPC: Book3S HV: Save host FSCR in the P7/8
 path

Similar to commit 25edcc50d76c ("KVM: PPC: Book3S HV: Save and restore
FSCR in the P9 path"), ensure the P7/8 path saves and restores the host
FSCR. The logic explained in that patch actually applies there to the
old path well: a context switch can be made before kvmppc_vcpu_run_hv
restores the host FSCR and returns.

Now both the p9 and the p7/8 paths now save and restore their FSCR, it
no longer needs to be restored at the end of kvmppc_vcpu_run_hv

Fixes: b005255e12a3 ("KVM: PPC: Book3S HV: Context-switch new POWER8 SPRs")
Cc: stable@vger.kernel.org # v3.14+
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210526125851.3436735-1-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c            | 1 -
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 7 +++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 28a80d240b764..13728495ac660 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4455,7 +4455,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 		mtspr(SPRN_EBBRR, ebb_regs[1]);
 		mtspr(SPRN_BESCR, ebb_regs[2]);
 		mtspr(SPRN_TAR, user_tar);
-		mtspr(SPRN_FSCR, current->thread.fscr);
 	}
 	mtspr(SPRN_VRSAVE, user_vrsave);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 5e634db4809bf..004f0d4e665f8 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -59,6 +59,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define STACK_SLOT_UAMOR	(SFS-88)
 #define STACK_SLOT_DAWR1	(SFS-96)
 #define STACK_SLOT_DAWRX1	(SFS-104)
+#define STACK_SLOT_FSCR		(SFS-112)
 /* the following is used by the P9 short path */
 #define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
 
@@ -686,6 +687,8 @@ BEGIN_FTR_SECTION
 	std	r6, STACK_SLOT_DAWR0(r1)
 	std	r7, STACK_SLOT_DAWRX0(r1)
 	std	r8, STACK_SLOT_IAMR(r1)
+	mfspr	r5, SPRN_FSCR
+	std	r5, STACK_SLOT_FSCR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 BEGIN_FTR_SECTION
 	mfspr	r6, SPRN_DAWR1
@@ -1663,6 +1666,10 @@ FTR_SECTION_ELSE
 	ld	r7, STACK_SLOT_HFSCR(r1)
 	mtspr	SPRN_HFSCR, r7
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
+BEGIN_FTR_SECTION
+	ld	r5, STACK_SLOT_FSCR(r1)
+	mtspr	SPRN_FSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	/*
 	 * Restore various registers to 0, where non-zero values
 	 * set by the guest could disrupt the host.
-- 
GitLab


From f501b6a2312e27fffe671d461770426fe5162184 Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Thu, 27 May 2021 11:11:05 +0200
Subject: [PATCH 2255/3804] debugfs: Fix debugfs_read_file_str()

Read the entire size of the buffer, including the trailing new line
character.
Discovered while reading the sched domain names of CPU0:

before:

cat /sys/kernel/debug/sched/domains/cpu0/domain*/name
SMTMCDIE

after:

cat /sys/kernel/debug/sched/domains/cpu0/domain*/name
SMT
MC
DIE

Fixes: 9af0440ec86eb ("debugfs: Implement debugfs_create_str()")
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/20210527091105.258457-1-dietmar.eggemann@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index e813acfaa6e8a..ba7c01cd9a5d2 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -893,7 +893,7 @@ ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
 
 	copy[copy_len] = '\n';
 
-	ret = simple_read_from_buffer(user_buf, count, ppos, copy, copy_len);
+	ret = simple_read_from_buffer(user_buf, count, ppos, copy, len);
 	kfree(copy);
 
 	return ret;
-- 
GitLab


From 2dc065eae56df804e4da5f8a9e4139033f7ea605 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 2 Jun 2021 14:22:40 -0700
Subject: [PATCH 2256/3804] perf evsel: Add missing cloning of
 evsel->use_config_name

The evsel__clone() should copy all fields in the evsel which are set
during the event parsing.  But it missed the use_config_name field.

Fixes: 12279429d862 ("perf stat: Uniquify hybrid event name")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210602212241.2175005-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 1 +
 tools/perf/util/evsel.h | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 4a3cd1b5bb33e..a8d8463f8ee5d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -428,6 +428,7 @@ struct evsel *evsel__clone(struct evsel *orig)
 	evsel->auto_merge_stats = orig->auto_merge_stats;
 	evsel->collect_stat = orig->collect_stat;
 	evsel->weak_group = orig->weak_group;
+	evsel->use_config_name = orig->use_config_name;
 
 	if (evsel__copy_config_terms(evsel, orig) < 0)
 		goto out_err;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 75cf5dbfe2080..bdad52a064381 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -83,8 +83,10 @@ struct evsel {
 		bool			collect_stat;
 		bool			weak_group;
 		bool			bpf_counter;
+		bool			use_config_name;
 		int			bpf_fd;
 		struct bpf_object	*bpf_obj;
+		struct list_head	config_terms;
 	};
 
 	/*
@@ -116,10 +118,8 @@ struct evsel {
 	bool			merged_stat;
 	bool			reset_group;
 	bool			errored;
-	bool			use_config_name;
 	struct hashmap		*per_pkg_mask;
 	struct evsel		*leader;
-	struct list_head	config_terms;
 	int			err;
 	int			cpu_iter;
 	struct {
-- 
GitLab


From 3cc84399e9b60463bc39cf352ffd8bccb92e02bd Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 2 Jun 2021 14:22:41 -0700
Subject: [PATCH 2257/3804] perf stat: Honor event config name on --no-merge

If user gave an event name explicitly, it should be displayed in the
output as is.  But with --no-merge option it adds a pmu name at the
end so might confuse users.

Actually this is true for hybrid pmus, I think we should do the same
for others.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210602212241.2175005-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index a76fff5e7d830..ca326f98c7a2a 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -541,7 +541,7 @@ static void uniquify_event_name(struct evsel *counter)
 	char *config;
 	int ret = 0;
 
-	if (counter->uniquified_name ||
+	if (counter->uniquified_name || counter->use_config_name ||
 	    !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
 					   strlen(counter->pmu_name)))
 		return;
@@ -555,10 +555,8 @@ static void uniquify_event_name(struct evsel *counter)
 		}
 	} else {
 		if (perf_pmu__has_hybrid()) {
-			if (!counter->use_config_name) {
-				ret = asprintf(&new_name, "%s/%s/",
-					       counter->pmu_name, counter->name);
-			}
+			ret = asprintf(&new_name, "%s/%s/",
+				       counter->pmu_name, counter->name);
 		} else {
 			ret = asprintf(&new_name, "%s [%s]",
 				       counter->name, counter->pmu_name);
-- 
GitLab


From 69c9ffed6cede9c11697861f654946e3ae95a930 Mon Sep 17 00:00:00 2001
From: Riccardo Mancini <rickyman7@gmail.com>
Date: Thu, 3 Jun 2021 00:08:33 +0200
Subject: [PATCH 2258/3804] perf symbol-elf: Fix memory leak by freeing
 sdt_note.args

Reported by ASan.

Signed-off-by: Riccardo Mancini <rickyman7@gmail.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Fabian Hemmer <copy@copy.sh>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Remi Bernon <rbernon@codeweavers.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Link: http://lore.kernel.org/lkml/20210602220833.285226-1-rickyman7@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol-elf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 4c56aa8374344..a73345730ba90 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -2412,6 +2412,7 @@ int cleanup_sdt_note_list(struct list_head *sdt_notes)
 
 	list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) {
 		list_del_init(&pos->note_list);
+		zfree(&pos->args);
 		zfree(&pos->name);
 		zfree(&pos->provider);
 		free(pos);
-- 
GitLab


From 5405b42c2f08efe67b531799ba2fdb35bac93e70 Mon Sep 17 00:00:00 2001
From: Jiashuo Liang <liangjs@pku.edu.cn>
Date: Tue, 1 Jun 2021 16:52:03 +0800
Subject: [PATCH 2259/3804] x86/fault: Don't send SIGSEGV twice on SEGV_PKUERR

__bad_area_nosemaphore() calls both force_sig_pkuerr() and
force_sig_fault() when handling SEGV_PKUERR. This does not cause
problems because the second signal is filtered by the legacy_queue()
check in __send_signal() because in both cases, the signal is SIGSEGV,
the second one seeing that the first one is already pending.

This causes the kernel to do unnecessary work so send the signal only
once for SEGV_PKUERR.

 [ bp: Massage commit message. ]

Fixes: 9db812dbb29d ("signal/x86: Call force_sig_pkuerr from __bad_area_nosemaphore")
Suggested-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Jiashuo Liang <liangjs@pku.edu.cn>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Link: https://lkml.kernel.org/r/20210601085203.40214-1-liangjs@pku.edu.cn
---
 arch/x86/mm/fault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1c548ad007520..6bda7f67d737e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -836,8 +836,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 
 	if (si_code == SEGV_PKUERR)
 		force_sig_pkuerr((void __user *)address, pkey);
-
-	force_sig_fault(SIGSEGV, si_code, (void __user *)address);
+	else
+		force_sig_fault(SIGSEGV, si_code, (void __user *)address);
 
 	local_irq_disable();
 }
-- 
GitLab


From 67069a1f0fe5f9eeca86d954fff2087f5542a008 Mon Sep 17 00:00:00 2001
From: Riccardo Mancini <rickyman7@gmail.com>
Date: Thu, 3 Jun 2021 00:40:23 +0200
Subject: [PATCH 2260/3804] perf env: Fix memory leak of bpf_prog_info_linear
 member

ASan reported a memory leak caused by info_linear not being deallocated.

The info_linear was allocated during in perf_event__synthesize_one_bpf_prog().

This patch adds the corresponding free() when bpf_prog_info_node
is freed in perf_env__purge_bpf().

  $ sudo ./perf record -- sleep 5
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.025 MB perf.data (8 samples) ]

  =================================================================
  ==297735==ERROR: LeakSanitizer: detected memory leaks

  Direct leak of 7688 byte(s) in 19 object(s) allocated from:
      #0 0x4f420f in malloc (/home/user/linux/tools/perf/perf+0x4f420f)
      #1 0xc06a74 in bpf_program__get_prog_info_linear /home/user/linux/tools/lib/bpf/libbpf.c:11113:16
      #2 0xb426fe in perf_event__synthesize_one_bpf_prog /home/user/linux/tools/perf/util/bpf-event.c:191:16
      #3 0xb42008 in perf_event__synthesize_bpf_events /home/user/linux/tools/perf/util/bpf-event.c:410:9
      #4 0x594596 in record__synthesize /home/user/linux/tools/perf/builtin-record.c:1490:8
      #5 0x58c9ac in __cmd_record /home/user/linux/tools/perf/builtin-record.c:1798:8
      #6 0x58990b in cmd_record /home/user/linux/tools/perf/builtin-record.c:2901:8
      #7 0x7b2a20 in run_builtin /home/user/linux/tools/perf/perf.c:313:11
      #8 0x7b12ff in handle_internal_command /home/user/linux/tools/perf/perf.c:365:8
      #9 0x7b2583 in run_argv /home/user/linux/tools/perf/perf.c:409:2
      #10 0x7b0d79 in main /home/user/linux/tools/perf/perf.c:539:3
      #11 0x7fa357ef6b74 in __libc_start_main /usr/src/debug/glibc-2.33-8.fc34.x86_64/csu/../csu/libc-start.c:332:16

Signed-off-by: Riccardo Mancini <rickyman7@gmail.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Link: http://lore.kernel.org/lkml/20210602224024.300485-1-rickyman7@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/env.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 9130f6fad8d54..bc5e4f294e9e9 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -144,6 +144,7 @@ static void perf_env__purge_bpf(struct perf_env *env)
 		node = rb_entry(next, struct bpf_prog_info_node, rb_node);
 		next = rb_next(&node->rb_node);
 		rb_erase(&node->rb_node, root);
+		free(node->info_linear);
 		free(node);
 	}
 
-- 
GitLab


From 1faa491a49d53f5d1c8c23bdf01763cfc00a2b19 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 2 Jun 2021 04:21:08 -0700
Subject: [PATCH 2261/3804] sched/debug: Remove obsolete init_schedstats()

Revert commit 4698f88c06b8 ("sched/debug: Fix 'schedstats=enable'
cmdline option").

After commit 6041186a3258 ("init: initialize jump labels before
command line option parsing") we can rely on jump label infra being
ready for use when setup_schedstats() is called.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Link: https://lkml.kernel.org/r/20210602112108.1709635-1-eric.dumazet@gmail.com
---
 kernel/sched/core.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7e59466987112..9e9a5be35cde9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4009,7 +4009,6 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
 #ifdef CONFIG_SCHEDSTATS
 
 DEFINE_STATIC_KEY_FALSE(sched_schedstats);
-static bool __initdata __sched_schedstats = false;
 
 static void set_schedstats(bool enabled)
 {
@@ -4033,16 +4032,11 @@ static int __init setup_schedstats(char *str)
 	if (!str)
 		goto out;
 
-	/*
-	 * This code is called before jump labels have been set up, so we can't
-	 * change the static branch directly just yet.  Instead set a temporary
-	 * variable so init_schedstats() can do it later.
-	 */
 	if (!strcmp(str, "enable")) {
-		__sched_schedstats = true;
+		set_schedstats(true);
 		ret = 1;
 	} else if (!strcmp(str, "disable")) {
-		__sched_schedstats = false;
+		set_schedstats(false);
 		ret = 1;
 	}
 out:
@@ -4053,11 +4047,6 @@ out:
 }
 __setup("schedstats=", setup_schedstats);
 
-static void __init init_schedstats(void)
-{
-	set_schedstats(__sched_schedstats);
-}
-
 #ifdef CONFIG_PROC_SYSCTL
 int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
 		size_t *lenp, loff_t *ppos)
@@ -4079,8 +4068,6 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
 	return err;
 }
 #endif /* CONFIG_PROC_SYSCTL */
-#else  /* !CONFIG_SCHEDSTATS */
-static inline void init_schedstats(void) {}
 #endif /* CONFIG_SCHEDSTATS */
 
 /*
@@ -9089,8 +9076,6 @@ void __init sched_init(void)
 #endif
 	init_sched_fair_class();
 
-	init_schedstats();
-
 	psi_init();
 
 	init_uclamp();
-- 
GitLab


From 429b2ba70812fc8ce7c591e787ec0f2b48d13319 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 3 Jun 2021 11:33:49 +0100
Subject: [PATCH 2262/3804] EDAC/mce_amd: Fix typo "FIfo" -> "Fifo"

There is an uppercase letter I in one of the MCE error descriptions
instead of a lowercase one. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Link: https://lkml.kernel.org/r/20210603103349.79117-1-colin.king@canonical.com
---
 drivers/edac/mce_amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 43ba0f931629f..27d56920b4690 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -431,7 +431,7 @@ static const char * const smca_xgmipcs_mce_desc[] = {
 	"Replay Buffer Parity Error",
 	"Data Parity Error",
 	"Replay Fifo Overflow Error",
-	"Replay FIfo Underflow Error",
+	"Replay Fifo Underflow Error",
 	"Elastic Fifo Overflow Error",
 	"Deskew Error",
 	"Flow Control CRC Error",
-- 
GitLab


From 5eee5eced95f1b35c8567688ed52932b7e58deee Mon Sep 17 00:00:00 2001
From: Rudi Heitbaum <rudi@heitbaum.com>
Date: Wed, 2 Jun 2021 11:29:47 +0000
Subject: [PATCH 2263/3804] regulator: fan53555: add tcs4526

For rk3399pro boards the tcs4526 regulator supports the vdd_gpu
regulator. The tcs4526 regulator has a chip id of <0>.
Add the compatibile tcs,tcs4526

without this patch, the dmesg output is:
  fan53555-regulator 0-0010: Chip ID 0 not supported!
  fan53555-regulator 0-0010: Failed to setup device!
  fan53555-regulator: probe of 0-0010 failed with error -22
with this patch, the dmesg output is:
  vdd_gpu: supplied by vcc5v0_sys

The regulators are described as:
- Dedicated power management IC TCS4525
- Lithium battery protection chip TCS4526

This has been tested with a Radxa Rock Pi N10.

Signed-off-by: Rudi Heitbaum <rudi@heitbaum.com>
Link: https://lore.kernel.org/r/20210602112943.GA119@5f9be87369f8
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53555.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index eb67500ad279e..dac1fb584fa35 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -92,6 +92,10 @@ enum {
 	TCS4525_CHIP_ID_12 = 12,
 };
 
+enum {
+	TCS4526_CHIP_ID_00 = 0,
+};
+
 /* IC mask revision */
 enum {
 	FAN53555_CHIP_REV_00 = 0x3,
@@ -341,6 +345,7 @@ static int fan53526_voltages_setup_tcs(struct fan53555_device_info *di)
 {
 	switch (di->chip_id) {
 	case TCS4525_CHIP_ID_12:
+	case TCS4526_CHIP_ID_00:
 		di->slew_reg = TCS4525_TIME;
 		di->slew_mask = TCS_SLEW_MASK;
 		di->ramp_delay_table = tcs_slew_rates;
@@ -536,6 +541,9 @@ static const struct of_device_id __maybe_unused fan53555_dt_ids[] = {
 	}, {
 		.compatible = "tcs,tcs4525",
 		.data = (void *)FAN53526_VENDOR_TCS
+	}, {
+		.compatible = "tcs,tcs4526",
+		.data = (void *)FAN53526_VENDOR_TCS
 	},
 	{ }
 };
@@ -644,6 +652,9 @@ static const struct i2c_device_id fan53555_id[] = {
 	}, {
 		.name = "tcs4525",
 		.driver_data = FAN53526_VENDOR_TCS
+	}, {
+		.name = "tcs4526",
+		.driver_data = FAN53526_VENDOR_TCS
 	},
 	{ },
 };
-- 
GitLab


From 6829222b408f5aa5222c18ea3f492cac19fa9405 Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@foss.st.com>
Date: Fri, 4 Jun 2021 09:50:09 +0200
Subject: [PATCH 2264/3804] spi: stm32-qspi: Fix W=1 build warning

Fix the following compilation warning using W=1 build:
arm-linux-gnueabi-ld: drivers/spi/spi-stm32-qspi.o: in function `stm32_qspi_poll_status':

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Patrice Chotard <patrice.chotard@foss.st.com>
Link: https://lore.kernel.org/r/20210604075009.25914-1-patrice.chotard@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index f4481fe48bf06..e71a4c514f7bd 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -806,6 +806,7 @@ config SPI_STM32_QSPI
 	tristate "STMicroelectronics STM32 QUAD SPI controller"
 	depends on ARCH_STM32 || COMPILE_TEST
 	depends on OF
+	depends on SPI_MEM
 	help
 	  This enables support for the Quad SPI controller in master mode.
 	  This driver does not support generic SPI. The implementation only
-- 
GitLab


From f0457690af56673cb0c47af6e25430389a149225 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 26 Feb 2021 13:19:20 -0800
Subject: [PATCH 2265/3804] ice: Fix allowing VF to request more/less queues
 via virtchnl

Commit 12bb018c538c ("ice: Refactor VF reset") caused a regression
that removes the ability for a VF to request a different amount of
queues via VIRTCHNL_OP_REQUEST_QUEUES. This prevents VF drivers to
either increase or decrease the number of queue pairs they are
allocated. Fix this by using the variable vf->num_req_qs when
determining the vf->num_vf_qs during VF VSI creation.

Fixes: 12bb018c538c ("ice: Refactor VF reset")
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 7f7653906fcef..d70ee573fde5b 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -200,6 +200,8 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
 		break;
 	case ICE_VSI_VF:
 		vf = &pf->vf[vsi->vf_id];
+		if (vf->num_req_qs)
+			vf->num_vf_qs = vf->num_req_qs;
 		vsi->alloc_txq = vf->num_vf_qs;
 		vsi->alloc_rxq = vf->num_vf_qs;
 		/* pf->num_msix_per_vf includes (VF miscellaneous vector +
-- 
GitLab


From 8679f07a9922068b9b6be81b632f52cac45d1b91 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 26 Feb 2021 13:19:21 -0800
Subject: [PATCH 2266/3804] ice: Fix VFR issues for AVF drivers that expect
 ATQLEN cleared

Some AVF drivers expect the VF_MBX_ATQLEN register to be cleared for any
type of VFR/VFLR. Fix this by clearing the VF_MBX_ATQLEN register at the
same time as VF_MBX_ARQLEN.

Fixes: 82ba01282cf8 ("ice: clear VF ARQLEN register on reset")
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h  |  1 +
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index de38a0fc9665d..9b8300d4a2674 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -31,6 +31,7 @@
 #define PF_FW_ATQLEN_ATQOVFL_M			BIT(29)
 #define PF_FW_ATQLEN_ATQCRIT_M			BIT(30)
 #define VF_MBX_ARQLEN(_VF)			(0x0022BC00 + ((_VF) * 4))
+#define VF_MBX_ATQLEN(_VF)			(0x0022A800 + ((_VF) * 4))
 #define PF_FW_ATQLEN_ATQENABLE_M		BIT(31)
 #define PF_FW_ATQT				0x00080400
 #define PF_MBX_ARQBAH				0x0022E400
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index a1d22d2aa0bdd..944d861c85797 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -713,13 +713,15 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
 	 */
 	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
 
-	/* VF_MBX_ARQLEN is cleared by PFR, so the driver needs to clear it
-	 * in the case of VFR. If this is done for PFR, it can mess up VF
-	 * resets because the VF driver may already have started cleanup
-	 * by the time we get here.
+	/* VF_MBX_ARQLEN and VF_MBX_ATQLEN are cleared by PFR, so the driver
+	 * needs to clear them in the case of VFR/VFLR. If this is done for
+	 * PFR, it can mess up VF resets because the VF driver may already
+	 * have started cleanup by the time we get here.
 	 */
-	if (!is_pfr)
+	if (!is_pfr) {
 		wr32(hw, VF_MBX_ARQLEN(vf->vf_id), 0);
+		wr32(hw, VF_MBX_ATQLEN(vf->vf_id), 0);
+	}
 
 	/* In the case of a VFLR, the HW has already reset the VF and we
 	 * just need to clean up, so don't hit the VFRTRIG register.
-- 
GitLab


From c7ee6ce1cf60b7fcdbdd2354d377d00bae3fa2d2 Mon Sep 17 00:00:00 2001
From: Haiyue Wang <haiyue.wang@intel.com>
Date: Fri, 26 Feb 2021 13:19:31 -0800
Subject: [PATCH 2267/3804] ice: handle the VF VSI rebuild failure

VSI rebuild can be failed for LAN queue config, then the VF's VSI will
be NULL, the VF reset should be stopped with the VF entering into the
disable state.

Fixes: 12bb018c538c ("ice: Refactor VF reset")
Signed-off-by: Haiyue Wang <haiyue.wang@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 944d861c85797..97a46c616aca7 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1700,7 +1700,12 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 		ice_vf_ctrl_vsi_release(vf);
 
 	ice_vf_pre_vsi_rebuild(vf);
-	ice_vf_rebuild_vsi_with_release(vf);
+
+	if (ice_vf_rebuild_vsi_with_release(vf)) {
+		dev_err(dev, "Failed to release and setup the VF%u's VSI\n", vf->vf_id);
+		return false;
+	}
+
 	ice_vf_post_vsi_rebuild(vf);
 
 	/* if the VF has been reset allow it to come up again */
-- 
GitLab


From 5cd349c349d6ec52862e550d3576893d35ab8ac2 Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Wed, 5 May 2021 14:17:58 -0700
Subject: [PATCH 2268/3804] ice: report supported and advertised autoneg using
 PHY capabilities

Ethtool incorrectly reported supported and advertised auto-negotiation
settings for a backplane PHY image which did not support auto-negotiation.
This can occur when using media or PHY type for reporting ethtool
supported and advertised auto-negotiation settings.

Remove setting supported and advertised auto-negotiation settings based
on PHY type in ice_phy_type_to_ethtool(), and MAC type in
ice_get_link_ksettings().

Ethtool supported and advertised auto-negotiation settings should be
based on the PHY image using the AQ command get PHY capabilities with
media. Add setting supported and advertised auto-negotiation settings
based get PHY capabilities with media in ice_get_link_ksettings().

Fixes: 48cb27f2fd18 ("ice: Implement handlers for ethtool PHY/link operations")
Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 51 +++-----------------
 1 file changed, 6 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index d9ddd0bcf65f8..99301ad95290d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1773,49 +1773,6 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
 		ice_ethtool_advertise_link_mode(ICE_AQ_LINK_SPEED_100GB,
 						100000baseKR4_Full);
 	}
-
-	/* Autoneg PHY types */
-	if (phy_types_low & ICE_PHY_TYPE_LOW_100BASE_TX ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_T ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_T ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_T ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1 ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_T ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR_S ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_CR1 ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR_S ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1 ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_CR4 ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
-		ethtool_link_ksettings_add_link_mode(ks, supported,
-						     Autoneg);
-		ethtool_link_ksettings_add_link_mode(ks, advertising,
-						     Autoneg);
-	}
-	if (phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CR2 ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR2 ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_CP ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
-		ethtool_link_ksettings_add_link_mode(ks, supported,
-						     Autoneg);
-		ethtool_link_ksettings_add_link_mode(ks, advertising,
-						     Autoneg);
-	}
-	if (phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CR4 ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR4 ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_KR_PAM4 ||
-	    phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_CP2) {
-		ethtool_link_ksettings_add_link_mode(ks, supported,
-						     Autoneg);
-		ethtool_link_ksettings_add_link_mode(ks, advertising,
-						     Autoneg);
-	}
 }
 
 #define TEST_SET_BITS_TIMEOUT	50
@@ -1972,9 +1929,7 @@ ice_get_link_ksettings(struct net_device *netdev,
 		ks->base.port = PORT_TP;
 		break;
 	case ICE_MEDIA_BACKPLANE:
-		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
 		ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
-		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
 		ethtool_link_ksettings_add_link_mode(ks, advertising,
 						     Backplane);
 		ks->base.port = PORT_NONE;
@@ -2049,6 +2004,12 @@ ice_get_link_ksettings(struct net_device *netdev,
 	if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
 		ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
 
+	/* Set supported and advertised autoneg */
+	if (ice_is_phy_caps_an_enabled(caps)) {
+		ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+	}
+
 done:
 	kfree(caps);
 	return err;
-- 
GitLab


From f9f83202b7263ac371d616d6894a2c9ed79158ef Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Wed, 5 May 2021 14:17:59 -0700
Subject: [PATCH 2269/3804] ice: Allow all LLDP packets from PF to Tx

Currently in the ice driver, the check whether to
allow a LLDP packet to egress the interface from the
PF_VSI is being based on the SKB's priority field.
It checks to see if the packets priority is equal to
TC_PRIO_CONTROL.  Injected LLDP packets do not always
meet this condition.

SCAPY defaults to a sk_buff->protocol value of ETH_P_ALL
(0x0003) and does not set the priority field.  There will
be other injection methods (even ones used by end users)
that will not correctly configure the socket so that
SKB fields are correctly populated.

Then ethernet header has to have to correct value for
the protocol though.

Add a check to also allow packets whose ethhdr->h_proto
matches ETH_P_LLDP (0x88CC).

Fixes: 0c3a6101ff2d ("ice: Allow egress control packets from PF_VSI")
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Tested-by: Tony Brelinski <tonyx.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 93e5d9ebfd74c..04748aa4c7c8c 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -2149,6 +2149,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 	struct ice_tx_offload_params offload = { 0 };
 	struct ice_vsi *vsi = tx_ring->vsi;
 	struct ice_tx_buf *first;
+	struct ethhdr *eth;
 	unsigned int count;
 	int tso, csum;
 
@@ -2195,7 +2196,9 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 		goto out_drop;
 
 	/* allow CONTROL frames egress from main VSI if FW LLDP disabled */
-	if (unlikely(skb->priority == TC_PRIO_CONTROL &&
+	eth = (struct ethhdr *)skb_mac_header(skb);
+	if (unlikely((skb->priority == TC_PRIO_CONTROL ||
+		      eth->h_proto == htons(ETH_P_LLDP)) &&
 		     vsi->type == ICE_VSI_PF &&
 		     vsi->port_info->qos_cfg.is_sw_lldp))
 		offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
-- 
GitLab


From 519d8ab17682da5f2fae5941d906d85b9fd3593a Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 19 May 2021 21:43:50 +0200
Subject: [PATCH 2270/3804] virtchnl: Add missing padding to
 virtchnl_proto_hdrs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On m68k (Coldfire M547x):

      CC      drivers/net/ethernet/intel/i40e/i40e_main.o
    In file included from drivers/net/ethernet/intel/i40e/i40e_prototype.h:9,
		     from drivers/net/ethernet/intel/i40e/i40e.h:41,
		     from drivers/net/ethernet/intel/i40e/i40e_main.c:12:
    include/linux/avf/virtchnl.h:153:36: warning: division by zero [-Wdiv-by-zero]
      153 |  { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
	  |                                    ^
    include/linux/avf/virtchnl.h:844:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’
      844 | VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs);
	  | ^~~~~~~~~~~~~~~~~~~~~~~~~
    include/linux/avf/virtchnl.h:844:33: error: enumerator value for ‘virtchnl_static_assert_virtchnl_proto_hdrs’ is not an integer constant
      844 | VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs);
	  |                                 ^~~~~~~~~~~~~~~~~~~

On m68k, integers are aligned on addresses that are multiples of two,
not four, bytes.  Hence the size of a structure containing integers may
not be divisible by 4.

Fix this by adding explicit padding.

Fixes: 1f7ea1cd6a374842 ("ice: Enable FDIR Configure for AVF")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/avf/virtchnl.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 565deea6ffe88..8612f8fc86c1d 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -830,6 +830,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr);
 
 struct virtchnl_proto_hdrs {
 	u8 tunnel_level;
+	u8 pad[3];
 	/**
 	 * specify where protocol header start from.
 	 * 0 - from the outer layer
-- 
GitLab


From 314a1e1eabea5b86532e90e0d4e217fa88471e3b Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 3 Jun 2021 16:08:10 -0700
Subject: [PATCH 2271/3804] x86/pkeys: Skip 'init_pkru' debugfs file creation
 when pkeys not supported

The PKRU hardware is permissive by default: all reads and writes are
allowed.  The in-kernel policy is restrictive by default: deny all
unnecessary access until explicitly requested.

That policy can be modified with a debugfs file: "x86/init_pkru".
This file is created unconditionally, regardless of PKRU support in
the hardware, which is a little silly.

Avoid creating the file when pkeys are not available.  This also
removes the need to check for pkey support at runtime, which would be
required once the new pkey modification infrastructure is put in place
later in this series.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210603230810.113FF3F2@viggo.jf.intel.com
---
 arch/x86/mm/pkeys.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index a2332eef66e9f..4a67b922bce1e 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -192,6 +192,10 @@ static const struct file_operations fops_init_pkru = {
 
 static int __init create_init_pkru_value(void)
 {
+	/* Do not expose the file if pkeys are not supported. */
+	if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+		return 0;
+
 	debugfs_create_file("init_pkru", S_IRUSR | S_IWUSR,
 			arch_debugfs_dir, NULL, &fops_init_pkru);
 	return 0;
-- 
GitLab


From 009767dbf42ac0dbe3cf48c1ee224f6b778aa85a Mon Sep 17 00:00:00 2001
From: Pu Wen <puwen@hygon.cn>
Date: Wed, 2 Jun 2021 15:02:07 +0800
Subject: [PATCH 2272/3804] x86/sev: Check SME/SEV support in CPUID first

The first two bits of the CPUID leaf 0x8000001F EAX indicate whether SEV
or SME is supported, respectively. It's better to check whether SEV or
SME is actually supported before accessing the MSR_AMD64_SEV to check
whether SEV or SME is enabled.

This is both a bare-metal issue and a guest/VM issue. Since the first
generation Hygon Dhyana CPU doesn't support the MSR_AMD64_SEV, reading that
MSR results in a #GP - either directly from hardware in the bare-metal
case or via the hypervisor (because the RDMSR is actually intercepted)
in the guest/VM case, resulting in a failed boot. And since this is very
early in the boot phase, rdmsrl_safe()/native_read_msr_safe() can't be
used.

So check the CPUID bits first, before accessing the MSR.

 [ tlendacky: Expand and improve commit message. ]
 [ bp: Massage commit message. ]

Fixes: eab696d8e8b9 ("x86/sev: Do not require Hypervisor CPUID bit for SEV guests")
Signed-off-by: Pu Wen <puwen@hygon.cn>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: <stable@vger.kernel.org> # v5.10+
Link: https://lkml.kernel.org/r/20210602070207.2480-1-puwen@hygon.cn
---
 arch/x86/mm/mem_encrypt_identity.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index a9639f663d25f..470b202084306 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -504,10 +504,6 @@ void __init sme_enable(struct boot_params *bp)
 #define AMD_SME_BIT	BIT(0)
 #define AMD_SEV_BIT	BIT(1)
 
-	/* Check the SEV MSR whether SEV or SME is enabled */
-	sev_status   = __rdmsr(MSR_AMD64_SEV);
-	feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
-
 	/*
 	 * Check for the SME/SEV feature:
 	 *   CPUID Fn8000_001F[EAX]
@@ -519,11 +515,16 @@ void __init sme_enable(struct boot_params *bp)
 	eax = 0x8000001f;
 	ecx = 0;
 	native_cpuid(&eax, &ebx, &ecx, &edx);
-	if (!(eax & feature_mask))
+	/* Check whether SEV or SME is supported */
+	if (!(eax & (AMD_SEV_BIT | AMD_SME_BIT)))
 		return;
 
 	me_mask = 1UL << (ebx & 0x3f);
 
+	/* Check the SEV MSR whether SEV or SME is enabled */
+	sev_status   = __rdmsr(MSR_AMD64_SEV);
+	feature_mask = (sev_status & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
+
 	/* Check if memory encryption is enabled */
 	if (feature_mask == AMD_SME_BIT) {
 		/*
-- 
GitLab


From 52ea62e74ecf3dd60e6df0479320213470e2ae7f Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng@loongson.cn>
Date: Fri, 4 Jun 2021 17:06:55 +0800
Subject: [PATCH 2273/3804] docs/zh_CN: add core api cachetlb translation

Translate Documentation/core-api/cachetlb.rst into Chinese.

Reviewed-by: Wu XiangCheng <bobwxc@email.cn>
Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/20210604090655.1971227-1-siyanteng@loongson.cn
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/core-api/cachetlb.rst  | 336 ++++++++++++++++++
 .../translations/zh_CN/core-api/index.rst     |   7 +-
 2 files changed, 342 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/core-api/cachetlb.rst

diff --git a/Documentation/translations/zh_CN/core-api/cachetlb.rst b/Documentation/translations/zh_CN/core-api/cachetlb.rst
new file mode 100644
index 0000000000000..8376485a534d1
--- /dev/null
+++ b/Documentation/translations/zh_CN/core-api/cachetlb.rst
@@ -0,0 +1,336 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/core-api/cachetlb.rst
+
+:翻译:
+
+ 司延腾 Yanteng Si <siyanteng@loongson.cn>
+
+:校译:
+
+ 吴想成 Wu XiangCheng <bobwxc@email.cn>
+
+.. _cn_core-api_cachetlb:
+
+======================
+Linux下的缓存和TLB刷新
+======================
+
+:作者: David S. Miller <davem@redhat.com>
+
+*译注：TLB，Translation Lookaside Buffer，页表缓存/变换旁查缓冲器*
+
+本文描述了由Linux虚拟内存子系统调用的缓存/TLB刷新接口。它列举了每个接
+口，描述了它的预期目的，以及接口被调用后的预期副作用。
+
+下面描述的副作用是针对单处理器的实现，以及在单个处理器上发生的情况。若
+为SMP，则只需将定义简单地扩展一下，使发生在某个特定接口的副作用扩展到系
+统的所有处理器上。不要被这句话吓到，以为SMP的缓存/tlb刷新一定是很低
+效的，事实上，这是一个可以进行很多优化的领域。例如，如果可以证明一个用
+户地址空间从未在某个cpu上执行过（见mm_cpumask()），那么就不需要在该
+cpu上对这个地址空间进行刷新。
+
+首先是TLB刷新接口，因为它们是最简单的。在Linux下，TLB被抽象为cpu
+用来缓存从软件页表获得的虚拟->物理地址转换的东西。这意味着，如果软件页
+表发生变化，这个“TLB”缓存中就有可能出现过时（脏）的翻译。因此，当软件页表
+发生变化时，内核会在页表发生 *变化后* 调用以下一种刷新方法：
+
+1) ``void flush_tlb_all(void)``
+
+	最严格的刷新。在这个接口运行后，任何以前的页表修改都会对cpu可见。
+
+	这通常是在内核页表被改变时调用的，因为这种转换在本质上是“全局”的。
+
+2) ``void flush_tlb_mm(struct mm_struct *mm)``
+
+	这个接口从TLB中刷新整个用户地址空间。在运行后，这个接口必须确保
+	以前对地址空间‘mm’的任何页表修改对cpu来说是可见的。也就是说，在
+	运行后，TLB中不会有‘mm’的页表项。
+
+	这个接口被用来处理整个地址空间的页表操作，比如在fork和exec过程
+	中发生的事情。
+
+3) ``void flush_tlb_range(struct vm_area_struct *vma,
+   unsigned long start, unsigned long end)``
+
+	这里我们要从TLB中刷新一个特定范围的（用户）虚拟地址转换。在运行后，
+	这个接口必须确保以前对‘start’到‘end-1’范围内的地址空间‘vma->vm_mm’
+	的任何页表修改对cpu来说是可见的。也就是说，在运行后，TLB中不会有
+	‘mm’的页表项用于‘start’到‘end-1’范围内的虚拟地址。
+
+	“vma”是用于该区域的备份存储。主要是用于munmap()类型的操作。
+
+	提供这个接口是希望端口能够找到一个合适的有效方法来从TLB中删除多
+	个页面大小的转换，而不是让内核为每个可能被修改的页表项调用
+	flush_tlb_page(见下文)。
+
+4) ``void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)``
+
+	这一次我们需要从TLB中删除PAGE_SIZE大小的转换。‘vma’是Linux用来跟
+	踪进程的mmap区域的支持结构体，地址空间可以通过vma->vm_mm获得。另
+	外，可以通过测试（vma->vm_flags & VM_EXEC）来查看这个区域是否是
+	可执行的（因此在split-tlb类型的设置中可能在“指令TLB”中）。
+
+	在运行后，这个接口必须确保之前对用户虚拟地址“addr”的地址空间
+	“vma->vm_mm”的页表修改对cpu来说是可见的。也就是说，在运行后，TLB
+	中不会有虚拟地址‘addr’的‘vma->vm_mm’的页表项。
+
+	这主要是在故障处理时使用。
+
+5) ``void update_mmu_cache(struct vm_area_struct *vma,
+   unsigned long address, pte_t *ptep)``
+
+	在每个页面故障结束时，这个程序被调用，以告诉体系结构特定的代码，在
+	软件页表中，在地址空间“vma->vm_mm”的虚拟地址“地址”处，现在存在
+	一个翻译。
+
+	可以用它所选择的任何方式使用这个信息来进行移植。例如，它可以使用这
+	个事件来为软件管理的TLB配置预装TLB转换。目前sparc64移植就是这么干
+	的。
+
+接下来，我们有缓存刷新接口。一般来说，当Linux将现有的虚拟->物理映射
+改变为新的值时，其顺序将是以下形式之一::
+
+	1) flush_cache_mm(mm);
+		change_all_page_tables_of(mm);
+		flush_tlb_mm(mm);
+
+	2) flush_cache_range(vma, start, end);
+		change_range_of_page_tables(mm, start, end);
+		flush_tlb_range(vma, start, end);
+
+	3) flush_cache_page(vma, addr, pfn);
+		set_pte(pte_pointer, new_pte_val);
+		flush_tlb_page(vma, addr);
+
+缓存级别的刷新将永远是第一位的，因为这允许我们正确处理那些缓存严格，
+且在虚拟地址被从缓存中刷新时要求一个虚拟地址的虚拟->物理转换存在的系统。
+HyperSparc cpu就是这样一个具有这种属性的cpu。
+
+下面的缓存刷新程序只需要在特定的cpu需要的范围内处理缓存刷新。大多数
+情况下，这些程序必须为cpu实现，这些cpu有虚拟索引的缓存，当虚拟->物
+理转换被改变或移除时，必须被刷新。因此，例如，IA32处理器的物理索引
+的物理标记的缓存没有必要实现这些接口，因为这些缓存是完全同步的，并
+且不依赖于翻译信息。
+
+下面逐个列出这些程序:
+
+1) ``void flush_cache_mm(struct mm_struct *mm)``
+
+	这个接口将整个用户地址空间从高速缓存中刷掉。也就是说，在运行后，
+	将没有与‘mm’相关的缓存行。
+
+	这个接口被用来处理整个地址空间的页表操作，比如在退出和执行过程
+	中发生的事情。
+
+2) ``void flush_cache_dup_mm(struct mm_struct *mm)``
+
+	这个接口将整个用户地址空间从高速缓存中刷新掉。也就是说，在运行
+	后，将没有与‘mm’相关的缓存行。
+
+	这个接口被用来处理整个地址空间的页表操作，比如在fork过程中发生
+	的事情。
+
+	这个选项与flush_cache_mm分开，以允许对VIPT缓存进行一些优化。
+
+3) ``void flush_cache_range(struct vm_area_struct *vma,
+   unsigned long start, unsigned long end)``
+
+	在这里，我们要从缓存中刷新一个特定范围的（用户）虚拟地址。运行
+	后，在“start”到“end-1”范围内的虚拟地址的“vma->vm_mm”的缓存中
+	将没有页表项。
+
+	“vma”是被用于该区域的备份存储。主要是用于munmap()类型的操作。
+
+	提供这个接口是希望端口能够找到一个合适的有效方法来从缓存中删
+	除多个页面大小的区域， 而不是让内核为每个可能被修改的页表项调
+	用 flush_cache_page (见下文)。
+
+4) ``void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)``
+
+	这一次我们需要从缓存中删除一个PAGE_SIZE大小的区域。“vma”是
+	Linux用来跟踪进程的mmap区域的支持结构体，地址空间可以通过
+	vma->vm_mm获得。另外，我们可以通过测试（vma->vm_flags &
+	VM_EXEC）来查看这个区域是否是可执行的（因此在“Harvard”类
+	型的缓存布局中可能是在“指令缓存”中）。
+
+	“pfn”表示“addr”所对应的物理页框（通过PAGE_SHIFT左移这个
+	值来获得物理地址）。正是这个映射应该从缓存中删除。
+
+	在运行之后，对于虚拟地址‘addr’的‘vma->vm_mm’，在缓存中不会
+	有任何页表项，它被翻译成‘pfn’。
+
+	这主要是在故障处理过程中使用。
+
+5) ``void flush_cache_kmaps(void)``
+
+	只有在平台使用高位内存的情况下才需要实现这个程序。它将在所有的
+	kmaps失效之前被调用。
+
+	运行后，内核虚拟地址范围PKMAP_ADDR(0)到PKMAP_ADDR(LAST_PKMAP)
+	的缓存中将没有页表项。
+
+	这个程序应该在asm/highmem.h中实现。
+
+6) ``void flush_cache_vmap(unsigned long start, unsigned long end)``
+   ``void flush_cache_vunmap(unsigned long start, unsigned long end)``
+
+	在这里，在这两个接口中，我们从缓存中刷新一个特定范围的（内核）
+	虚拟地址。运行后，在“start”到“end-1”范围内的虚拟地址的内核地
+	址空间的缓存中不会有页表项。
+
+	这两个程序中的第一个是在vmap_range()安装了页表项之后调用的。
+	第二个是在vunmap_range()删除页表项之前调用的。
+
+还有一类cpu缓存问题，目前需要一套完全不同的接口来正确处理。最大
+的问题是处理器的数据缓存中的虚拟别名。
+
+.. note::
+
+	这段内容有些晦涩，为了减轻中文阅读压力，特作此译注。
+
+	别名（alias）属于缓存一致性问题，当不同的虚拟地址映射相同的
+	物理地址，而这些虚拟地址的index不同，此时就发生了别名现象(多
+	个虚拟地址被称为别名)。通俗点来说就是指同一个物理地址的数据被
+	加载到不同的cacheline中就会出现别名现象。
+
+	常见的解决方法有两种：第一种是硬件维护一致性，设计特定的cpu电
+	路来解决问题（例如设计为PIPT的cache）；第二种是软件维护一致性，
+	就是下面介绍的sparc的解决方案——页面染色，涉及的技术细节太多，
+	译者不便展开，请读者自行查阅相关资料。
+
+您的移植是否容易在其D-cache中出现虚拟别名？嗯，如果您的D-cache
+是虚拟索引的，且cache大于PAGE_SIZE（页大小），并且不能防止同一
+物理地址的多个cache行同时存在，您就会遇到这个问题。
+
+如果你的D-cache有这个问题，首先正确定义asm/shmparam.h SHMLBA，
+它基本上应该是你的虚拟寻址D-cache的大小（或者如果大小是可变的，
+则是最大的可能大小）。这个设置将迫使SYSv IPC层只允许用户进程在
+这个值的倍数的地址上对共享内存进行映射。
+
+.. note::
+
+	这并不能解决共享mmaps的问题，请查看sparc64移植解决
+	这个问题的一个方法（特别是 SPARC_FLAG_MMAPSHARED）。
+
+接下来，你必须解决所有其他情况下的D-cache别名问题。请记住这个事
+实，对于一个给定的页面映射到某个用户地址空间，总是至少还有一个映
+射，那就是内核在其线性映射中从PAGE_OFFSET开始。因此，一旦第一个
+用户将一个给定的物理页映射到它的地址空间，就意味着D-cache的别名
+问题有可能存在，因为内核已经将这个页映射到它的虚拟地址。
+
+  ``void copy_user_page(void *to, void *from, unsigned long addr, struct page *page)``
+  ``void clear_user_page(void *to, unsigned long addr, struct page *page)``
+
+	这两个程序在用户匿名或COW页中存储数据。它允许一个端口有效地
+	避免用户空间和内核之间的D-cache别名问题。
+
+	例如，一个端口可以在复制过程中把“from”和“to”暂时映射到内核
+	的虚拟地址上。这两个页面的虚拟地址的选择方式是，内核的加载/存
+	储指令发生在虚拟地址上，而这些虚拟地址与用户的页面映射是相同
+	的“颜色”。例如，Sparc64就使用这种技术。
+
+	“addr”参数告诉了用户最终要映射这个页面的虚拟地址，“page”参
+	数给出了一个指向目标页结构体的指针。
+
+	如果D-cache别名不是问题，这两个程序可以简单地直接调用
+	memcpy/memset而不做其他事情。
+
+  ``void flush_dcache_page(struct page *page)``
+
+	任何时候，当内核写到一个页面缓存页，或者内核要从一个页面缓存
+	页中读出，并且这个页面的用户空间共享/可写映射可能存在时，
+	这个程序就会被调用。
+
+	.. note::
+
+			这个程序只需要为有可能被映射到用户进程的地址空间的
+			页面缓存调用。因此，例如，处理页面缓存中vfs符号链
+			接的VFS层代码根本不需要调用这个接口。
+
+	“内核写入页面缓存的页面”这句话的意思是，具体来说，内核执行存
+	储指令，在该页面的页面->虚拟映射处弄脏该页面的数据。在这里，通
+	过刷新的手段处理D-cache的别名是很重要的，以确保这些内核存储对
+	该页的用户空间映射是可见的。
+
+	推论的情况也同样重要，如果有用户对这个文件有共享+可写的映射，
+	我们必须确保内核对这些页面的读取会看到用户所做的最新的存储。
+
+	如果D-cache别名不是一个问题，这个程序可以简单地定义为该架构上
+	的nop。
+
+	在page->flags (PG_arch_1)中有一个位是“架构私有”。内核保证，
+	对于分页缓存的页面，当这样的页面第一次进入分页缓存时，它将清除
+	这个位。
+
+	这使得这些接口可以更有效地被实现。如果目前没有用户进程映射这个
+	页面，它允许我们“推迟”（也许是无限期）实际的刷新过程。请看
+	sparc64的flush_dcache_page和update_mmu_cache实现，以了解如
+	何做到这一点。
+
+	这个想法是，首先在flush_dcache_page()时，如果page->mapping->i_mmap
+	是一个空树，只需标记架构私有页标志位。之后，在update_mmu_cache()
+	中，会对这个标志位进行检查，如果设置了，就进行刷新，并清除标志位。
+
+	.. important::
+
+				通常很重要的是，如果你推迟刷新，实际的刷新发生在同一个
+				CPU上，因为它将cpu存储到页面上，使其变脏。同样，请看
+				sparc64关于如何处理这个问题的例子。
+
+  ``void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+  unsigned long user_vaddr, void *dst, void *src, int len)``
+  ``void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+  unsigned long user_vaddr, void *dst, void *src, int len)``
+
+	当内核需要复制任意的数据进出任意的用户页时（比如ptrace()），它将使
+	用这两个程序。
+
+	任何必要的缓存刷新或其他需要发生的一致性操作都应该在这里发生。如果
+	处理器的指令缓存没有对cpu存储进行窥探，那么你很可能需要为
+	copy_to_user_page()刷新指令缓存。
+
+  ``void flush_anon_page(struct vm_area_struct *vma, struct page *page,
+  unsigned long vmaddr)``
+
+	当内核需要访问一个匿名页的内容时，它会调用这个函数（目前只有
+	get_user_pages()）。注意：flush_dcache_page()故意对匿名页不起作
+	用。默认的实现是nop（对于所有相干的架构应该保持这样）。对于不一致性
+	的架构，它应该刷新vmaddr处的页面缓存。
+
+  ``void flush_kernel_dcache_page(struct page *page)``
+
+	当内核需要修改一个用kmap获得的用户页时，它会在所有修改完成后（但在
+	kunmapping之前）调用这个函数，以使底层页面达到最新状态。这里假定用
+	户没有不一致性的缓存副本（即原始页面是从类似get_user_pages()的机制
+	中获得的）。默认的实现是一个nop，在所有相干的架构上都应该如此。在不
+	一致性的架构上，这应该刷新内核缓存中的页面（使用page_address(page)）。
+
+
+  ``void flush_icache_range(unsigned long start, unsigned long end)``
+
+	当内核存储到它将执行的地址中时（例如在加载模块时），这个函数被调用。
+
+	如果icache不对存储进行窥探，那么这个程序将需要对其进行刷新。
+
+  ``void flush_icache_page(struct vm_area_struct *vma, struct page *page)``
+
+	flush_icache_page的所有功能都可以在flush_dcache_page和update_mmu_cache
+	中实现。在未来，我们希望能够完全删除这个接口。
+
+最后一类API是用于I/O到内核内特意设置的别名地址范围。这种别名是通过使用
+vmap/vmalloc API设置的。由于内核I/O是通过物理页进行的，I/O子系统假定用户
+映射和内核偏移映射是唯一的别名。这对vmap别名来说是不正确的，所以内核中任何
+试图对vmap区域进行I/O的东西都必须手动管理一致性。它必须在做I/O之前刷新vmap
+范围，并在I/O返回后使其失效。
+
+  ``void flush_kernel_vmap_range(void *vaddr, int size)``
+
+	刷新vmap区域中指定的虚拟地址范围的内核缓存。这是为了确保内核在vmap范围
+	内修改的任何数据对物理页是可见的。这个设计是为了使这个区域可以安全地执
+	行I/O。注意，这个API并 *没有* 刷新该区域的偏移映射别名。
+
+  ``void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates``
+
+	在vmap区域的一个给定的虚拟地址范围的缓存，这可以防止处理器在物理页的I/O
+	发生时通过投机性地读取数据而使缓存变脏。这只对读入vmap区域的数据是必要的。
diff --git a/Documentation/translations/zh_CN/core-api/index.rst b/Documentation/translations/zh_CN/core-api/index.rst
index a8b2afcbf5bcc..b4bde9396339f 100644
--- a/Documentation/translations/zh_CN/core-api/index.rst
+++ b/Documentation/translations/zh_CN/core-api/index.rst
@@ -76,9 +76,14 @@ Todolist:
 
 缓存管理，CPU热插拔管理等。
 
-Todolist:
+.. toctree::
+   :maxdepth: 1
 
    cachetlb
+
+Todolist:
+
+
    cpu_hotplug
    memory-hotplug
    genericirq
-- 
GitLab


From b0cbba2e44c629f1b4efb31701b1d3f3ade6926e Mon Sep 17 00:00:00 2001
From: Hailong Liu <liu.hailong6@zte.com.cn>
Date: Thu, 3 Jun 2021 22:52:27 +0800
Subject: [PATCH 2274/3804] docs/zh_CN: Add
 zh_CN/admin-guide/lockup-watchdogs.rst

Add translation zh_CN/admin-guide/lockup-watchdogs.rst and link it to
zh_CN/admin-guide/index.rst while clean its todo entry.

Reviewed-by: Yanteng Si <siyanteng@loongson.cn>
Signed-off-by: Hailong Liu <liu.hailong6@zte.com.cn>
Link: https://lore.kernel.org/r/20210603145227.30956-1-liuhailongg6@163.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/admin-guide/index.rst  |  2 +-
 .../zh_CN/admin-guide/lockup-watchdogs.rst    | 66 +++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/admin-guide/lockup-watchdogs.rst

diff --git a/Documentation/translations/zh_CN/admin-guide/index.rst b/Documentation/translations/zh_CN/admin-guide/index.rst
index be835ec8e632a..460034cbc2ab8 100644
--- a/Documentation/translations/zh_CN/admin-guide/index.rst
+++ b/Documentation/translations/zh_CN/admin-guide/index.rst
@@ -65,6 +65,7 @@ Todolist:
 
    clearing-warn-once
    cpu-load
+   lockup-watchdogs
    unicode
 
 Todolist:
@@ -100,7 +101,6 @@ Todolist:
    laptops/index
    lcd-panel-cgram
    ldm
-   lockup-watchdogs
    LSM/index
    md
    media/index
diff --git a/Documentation/translations/zh_CN/admin-guide/lockup-watchdogs.rst b/Documentation/translations/zh_CN/admin-guide/lockup-watchdogs.rst
new file mode 100644
index 0000000000000..55ed3f4af4422
--- /dev/null
+++ b/Documentation/translations/zh_CN/admin-guide/lockup-watchdogs.rst
@@ -0,0 +1,66 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/admin-guide/lockup-watchdogs.rst
+:Translator: Hailong Liu <liu.hailong6@zte.com.cn>
+
+.. _cn_lockup-watchdogs:
+
+
+=================================================
+Softlockup与hardlockup检测机制(又名:nmi_watchdog)
+=================================================
+
+Linux中内核实现了一种用以检测系统发生softlockup和hardlockup的看门狗机制。
+
+Softlockup是一种会引发系统在内核态中一直循环超过20秒（详见下面“实现”小节）导致
+其他任务没有机会得到运行的BUG。一旦检测到'softlockup'发生，默认情况下系统会打
+印当前堆栈跟踪信息并进入锁定状态。也可配置使其在检测到'softlockup'后进入panic
+状态；通过sysctl命令设置“kernel.softlockup_panic”、使用内核启动参数
+“softlockup_panic”（详见Documentation/admin-guide/kernel-parameters.rst）以及使
+能内核编译选项“BOOTPARAM_SOFTLOCKUP_PANIC”都可实现这种配置。
+
+而'hardlockup'是一种会引发系统在内核态一直循环超过10秒钟（详见"实现"小节）导致其
+他中断没有机会运行的缺陷。与'softlockup'情况类似，除了使用sysctl命令设置
+'hardlockup_panic'、使能内核选项“BOOTPARAM_HARDLOCKUP_PANIC”以及使用内核参数
+"nmi_watchdog"(详见:”Documentation/admin-guide/kernel-parameters.rst“)外，一旦检
+测到'hardlockup'默认情况下系统打印当前堆栈跟踪信息，然后进入锁定状态。
+
+这个panic选项也可以与panic_timeout结合使用（这个panic_timeout是通过稍具迷惑性的
+sysctl命令"kernel.panic"来设置），使系统在panic指定时间后自动重启。
+
+实现
+====
+
+Softlockup和hardlockup分别建立在hrtimer(高精度定时器)和perf两个子系统上而实现。
+这也就意味着理论上任何架构只要实现了这两个子系统就支持这两种检测机制。
+
+Hrtimer用于周期性产生中断并唤醒watchdog线程；NMI perf事件则以”watchdog_thresh“
+(编译时默认初始化为10秒，也可通过”watchdog_thresh“这个sysctl接口来进行配置修改)
+为间隔周期产生以检测 hardlockups。如果一个CPU在这个时间段内没有检测到hrtimer中
+断发生，'hardlockup 检测器'(即NMI perf事件处理函数)将会视系统配置而选择产生内核
+警告或者直接panic。
+
+而watchdog线程本质上是一个高优先级内核线程，每调度一次就对时间戳进行一次更新。
+如果时间戳在2*watchdog_thresh(这个是softlockup的触发门限)这段时间都未更新,那么
+"softlocup 检测器"(内部hrtimer定时器回调函数)会将相关的调试信息打印到系统日志中，
+然后如果系统配置了进入panic流程则进入panic，否则内核继续执行。
+
+Hrtimer定时器的周期是2*watchdog_thresh/5，也就是说在hardlockup被触发前hrtimer有
+2~3次机会产生时钟中断。
+
+如上所述,内核相当于为系统管理员提供了一个可调节hrtimer定时器和perf事件周期长度
+的调节旋钮。如何通过这个旋钮为特定使用场景配置一个合理的周期值要对lockups检测的
+响应速度和lockups检测开销这二者之间进行权衡。
+
+默认情况下所有在线cpu上都会运行一个watchdog线程。不过在内核配置了”NO_HZ_FULL“的
+情况下watchdog线程默认只会运行在管家(housekeeping)cpu上，而”nohz_full“启动参数指
+定的cpu上则不会有watchdog线程运行。试想，如果我们允许watchdog线程在”nohz_full“指
+定的cpu上运行，这些cpu上必须得运行时钟定时器来激发watchdog线程调度；这样一来就会
+使”nohz_full“保护用户程序免受内核干扰的功能失效。当然，副作用就是”nohz_full“指定
+的cpu即使在内核产生了lockup问题我们也无法检测到。不过，至少我们可以允许watchdog
+线程在管家(non-tickless)核上继续运行以便我们能继续正常的监测这些cpus上的lockups
+事件。
+
+不论哪种情况都可以通过sysctl命令kernel.watchdog_cpumask来对没有运行watchdog线程
+的cpu集合进行调节。对于nohz_full而言,如果nohz_full cpu上有异常挂住的情况，通过
+这种方式打开这些cpu上的watchdog进行调试可能会有所作用。
-- 
GitLab


From c003555a026f56dae1d6b522045a7917150ceabb Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Thu, 3 Jun 2021 22:11:17 +0800
Subject: [PATCH 2275/3804] docs/zh_CN: add translations in
 zh_CN/dev-tools/kasan

Add new zh translations
* zh_CN/dev-tools/kasan.rst
and link it to zh_CN/dev-tools/index.rst

Reviewed-by: Fangrui Song <maskray@google.com>
Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: Alex Shi <alexs@kernel.org>
Link: https://lore.kernel.org/r/20210603141127.101689-1-wanjiabing@vivo.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../translations/zh_CN/dev-tools/index.rst    |   2 +-
 .../translations/zh_CN/dev-tools/kasan.rst    | 417 ++++++++++++++++++
 2 files changed, 418 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/translations/zh_CN/dev-tools/kasan.rst

diff --git a/Documentation/translations/zh_CN/dev-tools/index.rst b/Documentation/translations/zh_CN/dev-tools/index.rst
index fd73c479917b2..e6c99f2f543fe 100644
--- a/Documentation/translations/zh_CN/dev-tools/index.rst
+++ b/Documentation/translations/zh_CN/dev-tools/index.rst
@@ -19,13 +19,13 @@
    :maxdepth: 2
 
    gcov
+   kasan
 
 Todolist:
 
  - coccinelle
  - sparse
  - kcov
- - kasan
  - ubsan
  - kmemleak
  - kcsan
diff --git a/Documentation/translations/zh_CN/dev-tools/kasan.rst b/Documentation/translations/zh_CN/dev-tools/kasan.rst
new file mode 100644
index 0000000000000..23db9d419047a
--- /dev/null
+++ b/Documentation/translations/zh_CN/dev-tools/kasan.rst
@@ -0,0 +1,417 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/dev-tools/kasan.rst
+:Translator: 万家兵 Wan Jiabing <wanjiabing@vivo.com>
+
+内核地址消毒剂(KASAN)
+=====================
+
+概述
+----
+
+KernelAddressSANitizer(KASAN)是一种动态内存安全错误检测工具，主要功能是
+检查内存越界访问和使用已释放内存的问题。KASAN有三种模式:
+
+1. 通用KASAN（与用户空间的ASan类似）
+2. 基于软件标签的KASAN（与用户空间的HWASan类似）
+3. 基于硬件标签的KASAN（基于硬件内存标签）
+
+由于通用KASAN的内存开销较大，通用KASAN主要用于调试。基于软件标签的KASAN
+可用于dogfood测试，因为它具有较低的内存开销，并允许将其用于实际工作量。
+基于硬件标签的KASAN具有较低的内存和性能开销，因此可用于生产。同时可用于
+检测现场内存问题或作为安全缓解措施。
+
+软件KASAN模式（#1和#2）使用编译时工具在每次内存访问之前插入有效性检查，
+因此需要一个支持它的编译器版本。
+
+通用KASAN在GCC和Clang受支持。GCC需要8.3.0或更高版本。任何受支持的Clang
+版本都是兼容的，但从Clang 11才开始支持检测全局变量的越界访问。
+
+基于软件标签的KASAN模式仅在Clang中受支持。
+
+硬件KASAN模式（#3）依赖硬件来执行检查，但仍需要支持内存标签指令的编译器
+版本。GCC 10+和Clang 11+支持此模式。
+
+两种软件KASAN模式都适用于SLUB和SLAB内存分配器，而基于硬件标签的KASAN目前
+仅支持SLUB。
+
+目前x86_64、arm、arm64、xtensa、s390、riscv架构支持通用KASAN模式，仅
+arm64架构支持基于标签的KASAN模式。
+
+用法
+----
+
+要启用KASAN，请使用以下命令配置内核::
+
+	  CONFIG_KASAN=y
+
+同时在 ``CONFIG_KASAN_GENERIC`` (启用通用KASAN模式)， ``CONFIG_KASAN_SW_TAGS``
+(启用基于硬件标签的KASAN模式)，和 ``CONFIG_KASAN_HW_TAGS`` (启用基于硬件标签
+的KASAN模式)之间进行选择。
+
+对于软件模式，还可以在 ``CONFIG_KASAN_OUTLINE`` 和 ``CONFIG_KASAN_INLINE``
+之间进行选择。outline和inline是编译器插桩类型。前者产生较小的二进制文件，
+而后者快1.1-2倍。
+
+要将受影响的slab对象的alloc和free堆栈跟踪包含到报告中，请启用
+``CONFIG_STACKTRACE`` 。要包括受影响物理页面的分配和释放堆栈跟踪的话，
+请启用 ``CONFIG_PAGE_OWNER`` 并使用 ``page_owner=on`` 进行引导。
+
+错误报告
+~~~~~~~~
+
+典型的KASAN报告如下所示::
+
+    ==================================================================
+    BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [test_kasan]
+    Write of size 1 at addr ffff8801f44ec37b by task insmod/2760
+
+    CPU: 1 PID: 2760 Comm: insmod Not tainted 4.19.0-rc3+ #698
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
+    Call Trace:
+     dump_stack+0x94/0xd8
+     print_address_description+0x73/0x280
+     kasan_report+0x144/0x187
+     __asan_report_store1_noabort+0x17/0x20
+     kmalloc_oob_right+0xa8/0xbc [test_kasan]
+     kmalloc_tests_init+0x16/0x700 [test_kasan]
+     do_one_initcall+0xa5/0x3ae
+     do_init_module+0x1b6/0x547
+     load_module+0x75df/0x8070
+     __do_sys_init_module+0x1c6/0x200
+     __x64_sys_init_module+0x6e/0xb0
+     do_syscall_64+0x9f/0x2c0
+     entry_SYSCALL_64_after_hwframe+0x44/0xa9
+    RIP: 0033:0x7f96443109da
+    RSP: 002b:00007ffcf0b51b08 EFLAGS: 00000202 ORIG_RAX: 00000000000000af
+    RAX: ffffffffffffffda RBX: 000055dc3ee521a0 RCX: 00007f96443109da
+    RDX: 00007f96445cff88 RSI: 0000000000057a50 RDI: 00007f9644992000
+    RBP: 000055dc3ee510b0 R08: 0000000000000003 R09: 0000000000000000
+    R10: 00007f964430cd0a R11: 0000000000000202 R12: 00007f96445cff88
+    R13: 000055dc3ee51090 R14: 0000000000000000 R15: 0000000000000000
+
+    Allocated by task 2760:
+     save_stack+0x43/0xd0
+     kasan_kmalloc+0xa7/0xd0
+     kmem_cache_alloc_trace+0xe1/0x1b0
+     kmalloc_oob_right+0x56/0xbc [test_kasan]
+     kmalloc_tests_init+0x16/0x700 [test_kasan]
+     do_one_initcall+0xa5/0x3ae
+     do_init_module+0x1b6/0x547
+     load_module+0x75df/0x8070
+     __do_sys_init_module+0x1c6/0x200
+     __x64_sys_init_module+0x6e/0xb0
+     do_syscall_64+0x9f/0x2c0
+     entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+    Freed by task 815:
+     save_stack+0x43/0xd0
+     __kasan_slab_free+0x135/0x190
+     kasan_slab_free+0xe/0x10
+     kfree+0x93/0x1a0
+     umh_complete+0x6a/0xa0
+     call_usermodehelper_exec_async+0x4c3/0x640
+     ret_from_fork+0x35/0x40
+
+    The buggy address belongs to the object at ffff8801f44ec300
+     which belongs to the cache kmalloc-128 of size 128
+    The buggy address is located 123 bytes inside of
+     128-byte region [ffff8801f44ec300, ffff8801f44ec380)
+    The buggy address belongs to the page:
+    page:ffffea0007d13b00 count:1 mapcount:0 mapping:ffff8801f7001640 index:0x0
+    flags: 0x200000000000100(slab)
+    raw: 0200000000000100 ffffea0007d11dc0 0000001a0000001a ffff8801f7001640
+    raw: 0000000000000000 0000000080150015 00000001ffffffff 0000000000000000
+    page dumped because: kasan: bad access detected
+
+    Memory state around the buggy address:
+     ffff8801f44ec200: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
+     ffff8801f44ec280: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
+    >ffff8801f44ec300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 03
+                                                                    ^
+     ffff8801f44ec380: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
+     ffff8801f44ec400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
+    ==================================================================
+
+报告标题总结了发生的错误类型以及导致该错误的访问类型。紧随其后的是错误访问的
+堆栈跟踪、所访问内存分配位置的堆栈跟踪（对于访问了slab对象的情况）以及对象
+被释放的位置的堆栈跟踪（对于访问已释放内存的问题报告）。接下来是对访问的
+slab对象的描述以及关于访问的内存页的信息。
+
+最后，报告展示了访问地址周围的内存状态。在内部，KASAN单独跟踪每个内存颗粒的
+内存状态，根据KASAN模式分为8或16个对齐字节。报告的内存状态部分中的每个数字
+都显示了围绕访问地址的其中一个内存颗粒的状态。
+
+对于通用KASAN，每个内存颗粒的大小为8个字节。每个颗粒的状态被编码在一个影子字节
+中。这8个字节可以是可访问的，部分访问的，已释放的或成为Redzone的一部分。KASAN
+对每个影子字节使用以下编码:00表示对应内存区域的所有8个字节都可以访问；数字N
+(1 <= N <= 7)表示前N个字节可访问，其他(8 - N)个字节不可访问；任何负值都表示
+无法访问整个8字节。KASAN使用不同的负值来区分不同类型的不可访问内存，如redzones
+或已释放的内存（参见 mm/kasan/kasan.h）。
+
+在上面的报告中，箭头指向影子字节 ``03`` ，表示访问的地址是部分可访问的。
+
+对于基于标签的KASAN模式，报告最后的部分显示了访问地址周围的内存标签
+(参考 `实施细则`_ 章节)。
+
+请注意，KASAN错误标题（如 ``slab-out-of-bounds`` 或 ``use-after-free`` ）
+是尽量接近的:KASAN根据其拥有的有限信息打印出最可能的错误类型。错误的实际类型
+可能会有所不同。
+
+通用KASAN还报告两个辅助调用堆栈跟踪。这些堆栈跟踪指向代码中与对象交互但不直接
+出现在错误访问堆栈跟踪中的位置。目前，这包括 call_rcu() 和排队的工作队列。
+
+启动参数
+~~~~~~~~
+
+KASAN受通用 ``panic_on_warn`` 命令行参数的影响。启用该功能后，KASAN在打印错误
+报告后会引起内核恐慌。
+
+默认情况下，KASAN只为第一次无效内存访问打印错误报告。使用 ``kasan_multi_shot`` ，
+KASAN会针对每个无效访问打印报告。这有效地禁用了KASAN报告的 ``panic_on_warn`` 。
+
+基于硬件标签的KASAN模式（请参阅下面有关各种模式的部分）旨在在生产中用作安全缓解
+措施。因此，它支持允许禁用KASAN或控制其功能的引导参数。
+
+- ``kasan=off`` 或 ``=on`` 控制KASAN是否启用 (默认: ``on`` )。
+
+- ``kasan.mode=sync`` 或 ``=async`` 控制KASAN是否配置为同步或异步执行模式(默认:
+  ``sync`` )。同步模式：当标签检查错误发生时，立即检测到错误访问。异步模式：
+  延迟错误访问检测。当标签检查错误发生时，信息存储在硬件中（在arm64的
+  TFSR_EL1寄存器中）。内核会定期检查硬件，并且仅在这些检查期间报告标签错误。
+
+- ``kasan.stacktrace=off`` 或 ``=on`` 禁用或启用alloc和free堆栈跟踪收集
+  (默认: ``on`` )。
+
+- ``kasan.fault=report`` 或 ``=panic`` 控制是只打印KASAN报告还是同时使内核恐慌
+  (默认: ``report`` )。即使启用了 ``kasan_multi_shot`` ，也会发生内核恐慌。
+
+实施细则
+--------
+
+通用KASAN
+~~~~~~~~~
+
+软件KASAN模式使用影子内存来记录每个内存字节是否可以安全访问，并使用编译时工具
+在每次内存访问之前插入影子内存检查。
+
+通用KASAN将1/8的内核内存专用于其影子内存（16TB以覆盖x86_64上的128TB），并使用
+具有比例和偏移量的直接映射将内存地址转换为其相应的影子地址。
+
+这是将地址转换为其相应影子地址的函数::
+
+    static inline void *kasan_mem_to_shadow(const void *addr)
+    {
+	return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
+		+ KASAN_SHADOW_OFFSET;
+    }
+
+在这里 ``KASAN_SHADOW_SCALE_SHIFT = 3`` 。
+
+编译时工具用于插入内存访问检查。编译器在每次访问大小为1、2、4、8或16的内存之前
+插入函数调用( ``__asan_load*(addr)`` , ``__asan_store*(addr)``)。这些函数通过
+检查相应的影子内存来检查内存访问是否有效。
+
+使用inline插桩，编译器不进行函数调用，而是直接插入代码来检查影子内存。此选项
+显著地增大了内核体积，但与outline插桩内核相比，它提供了x1.1-x2的性能提升。
+
+通用KASAN是唯一一种通过隔离延迟重新使用已释放对象的模式
+（参见 mm/kasan/quarantine.c 以了解实现）。
+
+基于软件标签的KASAN模式
+~~~~~~~~~~~~~~~~~~~~~~~
+
+基于软件标签的KASAN使用软件内存标签方法来检查访问有效性。目前仅针对arm64架构实现。
+
+基于软件标签的KASAN使用arm64 CPU的顶部字节忽略(TBI)特性在内核指针的顶部字节中
+存储一个指针标签。它使用影子内存来存储与每个16字节内存单元相关的内存标签(因此，
+它将内核内存的1/16专用于影子内存)。
+
+在每次内存分配时，基于软件标签的KASAN都会生成一个随机标签，用这个标签标记分配
+的内存，并将相同的标签嵌入到返回的指针中。
+
+基于软件标签的KASAN使用编译时工具在每次内存访问之前插入检查。这些检查确保正在
+访问的内存的标签等于用于访问该内存的指针的标签。如果标签不匹配，基于软件标签
+的KASAN会打印错误报告。
+
+基于软件标签的KASAN也有两种插桩模式（outline，发出回调来检查内存访问；inline，
+执行内联的影子内存检查）。使用outline插桩模式，会从执行访问检查的函数打印错误
+报告。使用inline插桩，编译器会发出 ``brk`` 指令，并使用专用的 ``brk`` 处理程序
+来打印错误报告。
+
+基于软件标签的KASAN使用0xFF作为匹配所有指针标签（不检查通过带有0xFF指针标签
+的指针进行的访问）。值0xFE当前保留用于标记已释放的内存区域。
+
+基于软件标签的KASAN目前仅支持对Slab和page_alloc内存进行标记。
+
+基于硬件标签的KASAN模式
+~~~~~~~~~~~~~~~~~~~~~~~
+
+基于硬件标签的KASAN在概念上类似于软件模式，但它是使用硬件内存标签作为支持而
+不是编译器插桩和影子内存。
+
+基于硬件标签的KASAN目前仅针对arm64架构实现，并且基于ARMv8.5指令集架构中引入
+的arm64内存标记扩展(MTE)和最高字节忽略(TBI)。
+
+特殊的arm64指令用于为每次内存分配指定内存标签。相同的标签被指定给指向这些分配
+的指针。在每次内存访问时，硬件确保正在访问的内存的标签等于用于访问该内存的指针
+的标签。如果标签不匹配，则会生成故障并打印报告。
+
+基于硬件标签的KASAN使用0xFF作为匹配所有指针标签（不检查通过带有0xFF指针标签的
+指针进行的访问）。值0xFE当前保留用于标记已释放的内存区域。
+
+基于硬件标签的KASAN目前仅支持对Slab和page_alloc内存进行标记。
+
+如果硬件不支持MTE（ARMv8.5之前），则不会启用基于硬件标签的KASAN。在这种情况下，
+所有KASAN引导参数都将被忽略。
+
+请注意，启用CONFIG_KASAN_HW_TAGS始终会导致启用内核中的TBI。即使提供了
+``kasan.mode=off`` 或硬件不支持MTE（但支持TBI）。
+
+基于硬件标签的KASAN只报告第一个发现的错误。之后，MTE标签检查将被禁用。
+
+影子内存
+--------
+
+内核将内存映射到地址空间的几个不同部分。内核虚拟地址的范围很大：没有足够的真实
+内存来支持内核可以访问的每个地址的真实影子区域。因此，KASAN只为地址空间的某些
+部分映射真实的影子。
+
+默认行为
+~~~~~~~~
+
+默认情况下，体系结构仅将实际内存映射到用于线性映射的阴影区域（以及可能的其他
+小区域）。对于所有其他区域 —— 例如vmalloc和vmemmap空间 —— 一个只读页面被映射
+到阴影区域上。这个只读的影子页面声明所有内存访问都是允许的。
+
+这给模块带来了一个问题：它们不存在于线性映射中，而是存在于专用的模块空间中。
+通过连接模块分配器，KASAN临时映射真实的影子内存以覆盖它们。例如，这允许检测
+对模块全局变量的无效访问。
+
+这也造成了与 ``VMAP_STACK`` 的不兼容：如果堆栈位于vmalloc空间中，它将被分配
+只读页面的影子内存，并且内核在尝试为堆栈变量设置影子数据时会出错。
+
+CONFIG_KASAN_VMALLOC
+~~~~~~~~~~~~~~~~~~~~
+
+使用 ``CONFIG_KASAN_VMALLOC`` ，KASAN可以以更大的内存使用为代价覆盖vmalloc
+空间。目前，这在x86、riscv、s390和powerpc上受支持。
+
+这通过连接到vmalloc和vmap并动态分配真实的影子内存来支持映射。
+
+vmalloc空间中的大多数映射都很小，需要不到一整页的阴影空间。因此，为每个映射
+分配一个完整的影子页面将是一种浪费。此外，为了确保不同的映射使用不同的影子
+页面，映射必须与 ``KASAN_GRANULE_SIZE * PAGE_SIZE`` 对齐。
+
+相反，KASAN跨多个映射共享后备空间。当vmalloc空间中的映射使用影子区域的特定
+页面时，它会分配一个后备页面。此页面稍后可以由其他vmalloc映射共享。
+
+KASAN连接到vmap基础架构以懒清理未使用的影子内存。
+
+为了避免交换映射的困难，KASAN预测覆盖vmalloc空间的阴影区域部分将不会被早期
+的阴影页面覆盖，但是将不会被映射。这将需要更改特定于arch的代码。
+
+这允许在x86上支持 ``VMAP_STACK`` ，并且可以简化对没有固定模块区域的架构的支持。
+
+对于开发者
+----------
+
+忽略访问
+~~~~~~~~
+
+软件KASAN模式使用编译器插桩来插入有效性检查。此类检测可能与内核的某些部分
+不兼容，因此需要禁用。
+
+内核的其他部分可能会访问已分配对象的元数据。通常，KASAN会检测并报告此类访问，
+但在某些情况下（例如，在内存分配器中），这些访问是有效的。
+
+对于软件KASAN模式，要禁用特定文件或目录的检测，请将 ``KASAN_SANITIZE`` 添加
+到相应的内核Makefile中:
+
+- 对于单个文件(例如，main.o)::
+
+    KASAN_SANITIZE_main.o := n
+
+- 对于一个目录下的所有文件::
+
+    KASAN_SANITIZE := n
+
+对于软件KASAN模式，要在每个函数的基础上禁用检测，请使用KASAN特定的
+``__no_sanitize_address`` 函数属性或通用的 ``noinstr`` 。
+
+请注意，禁用编译器插桩（基于每个文件或每个函数）会使KASAN忽略在软件KASAN模式
+的代码中直接发生的访问。当访问是间接发生的（通过调用检测函数）或使用没有编译器
+插桩的基于硬件标签的模式时，它没有帮助。
+
+对于软件KASAN模式，要在当前任务的一部分内核代码中禁用KASAN报告，请使用
+``kasan_disable_current()``/``kasan_enable_current()`` 部分注释这部分代码。
+这也会禁用通过函数调用发生的间接访问的报告。
+
+对于基于标签的KASAN模式（包括硬件模式），要禁用访问检查，请使用
+``kasan_reset_tag()`` 或 ``page_kasan_tag_reset()`` 。请注意，通过
+``page_kasan_tag_reset()`` 临时禁用访问检查需要通过 ``page_kasan_tag``
+/ ``page_kasan_tag_set`` 保存和恢复每页KASAN标签。
+
+测试
+~~~~
+
+有一些KASAN测试可以验证KASAN是否正常工作并可以检测某些类型的内存损坏。
+测试由两部分组成:
+
+1. 与KUnit测试框架集成的测试。使用 ``CONFIG_KASAN_KUNIT_TEST`` 启用。
+这些测试可以通过几种不同的方式自动运行和部分验证；请参阅下面的说明。
+
+2. 与KUnit不兼容的测试。使用 ``CONFIG_KASAN_MODULE_TEST`` 启用并且只能作为模块
+运行。这些测试只能通过加载内核模块并检查内核日志以获取KASAN报告来手动验证。
+
+如果检测到错误，每个KUnit兼容的KASAN测试都会打印多个KASAN报告之一，然后测试打印
+其编号和状态。
+
+当测试通过::
+
+        ok 28 - kmalloc_double_kzfree
+
+当由于 ``kmalloc`` 失败而导致测试失败时::
+
+        # kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163
+        Expected ptr is not null, but is
+        not ok 4 - kmalloc_large_oob_right
+
+当由于缺少KASAN报告而导致测试失败时::
+
+        # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629
+        Expected kasan_data->report_expected == kasan_data->report_found, but
+        kasan_data->report_expected == 1
+        kasan_data->report_found == 0
+        not ok 28 - kmalloc_double_kzfree
+
+最后打印所有KASAN测试的累积状态。成功::
+
+        ok 1 - kasan
+
+或者，如果其中一项测试失败::
+
+        not ok 1 - kasan
+
+有几种方法可以运行与KUnit兼容的KASAN测试。
+
+1. 可加载模块
+
+   启用 ``CONFIG_KUNIT`` 后，KASAN-KUnit测试可以构建为可加载模块，并通过使用
+   ``insmod`` 或 ``modprobe`` 加载 ``test_kasan.ko`` 来运行。
+
+2. 内置
+
+   通过内置 ``CONFIG_KUNIT`` ，也可以内置KASAN-KUnit测试。在这种情况下，
+   测试将在启动时作为后期初始化调用运行。
+
+3. 使用kunit_tool
+
+   通过内置 ``CONFIG_KUNIT`` 和 ``CONFIG_KASAN_KUNIT_TEST`` ，还可以使用
+   ``kunit_tool`` 以更易读的方式查看KUnit测试结果。这不会打印通过测试
+   的KASAN报告。有关 ``kunit_tool`` 更多最新信息，请参阅
+   `KUnit文档 <https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html>`_ 。
+
+.. _KUnit: https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html
-- 
GitLab


From 709dedfdf3daa8719240ecff1c0b70b278005386 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 2 Jun 2021 20:28:06 -0500
Subject: [PATCH 2276/3804] documentation-file-ref-check: Make git check work
 for multiple working directories

With multiple git working directories, '.git' may also be a text file
linking to the actual git tree instead of a directory.

Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Link: https://lore.kernel.org/r/20210603012806.331132-1-robh@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 scripts/documentation-file-ref-check | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/documentation-file-ref-check b/scripts/documentation-file-ref-check
index c71832b2312b9..7187ea5e5149b 100755
--- a/scripts/documentation-file-ref-check
+++ b/scripts/documentation-file-ref-check
@@ -24,7 +24,7 @@ my $help = 0;
 my $fix = 0;
 my $warn = 0;
 
-if (! -d ".git") {
+if (! -e ".git") {
 	printf "Warning: can't check if file exists, as this is not a git tree\n";
 	exit 0;
 }
-- 
GitLab


From f9ce26c56d37fa6d32f700dfc77f4ceb445ce215 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 2 Jun 2021 13:29:14 -0700
Subject: [PATCH 2277/3804] docs: networking: Replace strncpy() with strscpy()

Replace example code's use of strncpy() with strscpy() functions. Using
strncpy() is considered deprecated:
https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lore.kernel.org/r/20210602202914.4079123-1-keescook@chromium.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/input/joydev/joystick-api.rst | 2 +-
 Documentation/networking/packet_mmap.rst    | 2 +-
 Documentation/networking/tuntap.rst         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/input/joydev/joystick-api.rst b/Documentation/input/joydev/joystick-api.rst
index af5934c10c1c9..5db6dc6fe1c58 100644
--- a/Documentation/input/joydev/joystick-api.rst
+++ b/Documentation/input/joydev/joystick-api.rst
@@ -263,7 +263,7 @@ possible overrun should the name be too long::
 
 	char name[128];
 	if (ioctl(fd, JSIOCGNAME(sizeof(name)), name) < 0)
-		strncpy(name, "Unknown", sizeof(name));
+		strscpy(name, "Unknown", sizeof(name));
 	printf("Name: %s\n", name);
 
 
diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst
index 500ef60b1b821..c5da1a5d93de8 100644
--- a/Documentation/networking/packet_mmap.rst
+++ b/Documentation/networking/packet_mmap.rst
@@ -153,7 +153,7 @@ As capture, each frame contains two parts::
     struct ifreq s_ifr;
     ...
 
-    strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name));
+    strscpy_pad (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name));
 
     /* get interface index of eth0 */
     ioctl(this->socket, SIOCGIFINDEX, &s_ifr);
diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
index a59d1dd6fdcc2..4d7087f727be5 100644
--- a/Documentation/networking/tuntap.rst
+++ b/Documentation/networking/tuntap.rst
@@ -107,7 +107,7 @@ Note that the character pointer becomes overwritten with the real device name
        */
       ifr.ifr_flags = IFF_TUN;
       if( *dev )
-	 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+	 strscpy_pad(ifr.ifr_name, dev, IFNAMSIZ);
 
       if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ){
 	 close(fd);
-- 
GitLab


From e53eeac9a9d78dc550b889897f5315424bb63e10 Mon Sep 17 00:00:00 2001
From: Gao Mingfei <g199209@gmail.com>
Date: Tue, 1 Jun 2021 08:51:45 +0000
Subject: [PATCH 2278/3804] docs: block: fix stat.rst document error

Update the description of the device stat files to include the proper
number of fields.

Signed-off-by: Gao Mingfei <g199209@gmail.com>
Link: https://lore.kernel.org/r/20210601085145.3273-1-g199209@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/block/stat.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/block/stat.rst b/Documentation/block/stat.rst
index 77311335c08ba..a1cd9db2058f9 100644
--- a/Documentation/block/stat.rst
+++ b/Documentation/block/stat.rst
@@ -18,7 +18,7 @@ A.
    each, it would be impossible to guarantee that a set of readings
    represent a single point in time.
 
-The stat file consists of a single line of text containing 11 decimal
+The stat file consists of a single line of text containing 17 decimal
 values separated by whitespace.  The fields are summarized in the
 following table, and described in more detail below.
 
-- 
GitLab


From fb7b26a8b1d0b82c79e93deb12d624011c7a4e0e Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Date: Mon, 31 May 2021 22:42:35 +0900
Subject: [PATCH 2279/3804] docs: Fix typo in Documentation/arm/marvell.rst

Fix typo in the documentation, changed from 'comatible' to
'compatible.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Link: https://lore.kernel.org/r/20210531134235.720351-1-iwamatsu@nigauri.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/arm/marvell.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst
index c50be711ec728..db2246493d182 100644
--- a/Documentation/arm/marvell.rst
+++ b/Documentation/arm/marvell.rst
@@ -259,7 +259,7 @@ Storage family
 	https://web.archive.org/web/20191129073953/http://www.marvell.com/storage/armada-sp/
 
   Core:
-	Sheeva ARMv7 comatible Quad-core PJ4C
+	Sheeva ARMv7 compatible Quad-core PJ4C
 
   (not supported in upstream Linux kernel)
 
-- 
GitLab


From acda97acb2e98c97895d81d20494bf6a4bc67c6c Mon Sep 17 00:00:00 2001
From: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Date: Mon, 31 May 2021 10:05:15 -0300
Subject: [PATCH 2280/3804] docs: convert dax.txt to rst

Change the file extension and add the rst constructs to integrate this
doc to the documentation infrastructure and take advantage of rst
features.

Signed-off-by: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Link: https://lore.kernel.org/r/20210531130515.10309-1-igormtorrente@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/dax.rst   | 291 ++++++++++++++++++++++++++++
 Documentation/filesystems/dax.txt   | 257 ------------------------
 Documentation/filesystems/index.rst |   1 +
 3 files changed, 292 insertions(+), 257 deletions(-)
 create mode 100644 Documentation/filesystems/dax.rst
 delete mode 100644 Documentation/filesystems/dax.txt

diff --git a/Documentation/filesystems/dax.rst b/Documentation/filesystems/dax.rst
new file mode 100644
index 0000000000000..9a1b8fd9e82b1
--- /dev/null
+++ b/Documentation/filesystems/dax.rst
@@ -0,0 +1,291 @@
+=======================
+Direct Access for files
+=======================
+
+Motivation
+----------
+
+The page cache is usually used to buffer reads and writes to files.
+It is also used to provide the pages which are mapped into userspace
+by a call to mmap.
+
+For block devices that are memory-like, the page cache pages would be
+unnecessary copies of the original storage.  The `DAX` code removes the
+extra copy by performing reads and writes directly to the storage device.
+For file mappings, the storage device is mapped directly into userspace.
+
+
+Usage
+-----
+
+If you have a block device which supports `DAX`, you can make a filesystem
+on it as usual.  The `DAX` code currently only supports files with a block
+size equal to your kernel's `PAGE_SIZE`, so you may need to specify a block
+size when creating the filesystem.
+
+Currently 3 filesystems support `DAX`: ext2, ext4 and xfs.  Enabling `DAX` on them
+is different.
+
+Enabling DAX on ext2
+--------------------
+
+When mounting the filesystem, use the ``-o dax`` option on the command line or
+add 'dax' to the options in ``/etc/fstab``.  This works to enable `DAX` on all files
+within the filesystem.  It is equivalent to the ``-o dax=always`` behavior below.
+
+
+Enabling DAX on xfs and ext4
+----------------------------
+
+Summary
+-------
+
+ 1. There exists an in-kernel file access mode flag `S_DAX` that corresponds to
+    the statx flag `STATX_ATTR_DAX`.  See the manpage for statx(2) for details
+    about this access mode.
+
+ 2. There exists a persistent flag `FS_XFLAG_DAX` that can be applied to regular
+    files and directories. This advisory flag can be set or cleared at any
+    time, but doing so does not immediately affect the `S_DAX` state.
+
+ 3. If the persistent `FS_XFLAG_DAX` flag is set on a directory, this flag will
+    be inherited by all regular files and subdirectories that are subsequently
+    created in this directory. Files and subdirectories that exist at the time
+    this flag is set or cleared on the parent directory are not modified by
+    this modification of the parent directory.
+
+ 4. There exist dax mount options which can override `FS_XFLAG_DAX` in the
+    setting of the `S_DAX` flag.  Given underlying storage which supports `DAX` the
+    following hold:
+
+    ``-o dax=inode``  means "follow `FS_XFLAG_DAX`" and is the default.
+
+    ``-o dax=never``  means "never set `S_DAX`, ignore `FS_XFLAG_DAX`."
+
+    ``-o dax=always`` means "always set `S_DAX` ignore `FS_XFLAG_DAX`."
+
+    ``-o dax``      is a legacy option which is an alias for ``dax=always``.
+
+    .. warning::
+
+      The option ``-o dax`` may be removed in the future so ``-o dax=always`` is
+      the preferred method for specifying this behavior.
+
+    .. note::
+
+      Modifications to and the inheritance behavior of `FS_XFLAG_DAX` remain
+      the same even when the filesystem is mounted with a dax option.  However,
+      in-core inode state (`S_DAX`) will be overridden until the filesystem is
+      remounted with dax=inode and the inode is evicted from kernel memory.
+
+ 5. The `S_DAX` policy can be changed via:
+
+    a) Setting the parent directory `FS_XFLAG_DAX` as needed before files are
+       created
+
+    b) Setting the appropriate dax="foo" mount option
+
+    c) Changing the `FS_XFLAG_DAX` flag on existing regular files and
+       directories.  This has runtime constraints and limitations that are
+       described in 6) below.
+
+ 6. When changing the `S_DAX` policy via toggling the persistent `FS_XFLAG_DAX`
+    flag, the change to existing regular files won't take effect until the
+    files are closed by all processes.
+
+
+Details
+-------
+
+There are 2 per-file dax flags.  One is a persistent inode setting (`FS_XFLAG_DAX`)
+and the other is a volatile flag indicating the active state of the feature
+(`S_DAX`).
+
+`FS_XFLAG_DAX` is preserved within the filesystem.  This persistent config
+setting can be set, cleared and/or queried using the `FS_IOC_FS`[`GS`]`ETXATTR` ioctl
+(see ioctl_xfs_fsgetxattr(2)) or an utility such as 'xfs_io'.
+
+New files and directories automatically inherit `FS_XFLAG_DAX` from
+their parent directory **when created**.  Therefore, setting `FS_XFLAG_DAX` at
+directory creation time can be used to set a default behavior for an entire
+sub-tree.
+
+To clarify inheritance, here are 3 examples:
+
+Example A:
+
+.. code-block:: shell
+
+  mkdir -p a/b/c
+  xfs_io -c 'chattr +x' a
+  mkdir a/b/c/d
+  mkdir a/e
+
+  ------[outcome]------
+
+  dax: a,e
+  no dax: b,c,d
+
+Example B:
+
+.. code-block:: shell
+
+  mkdir a
+  xfs_io -c 'chattr +x' a
+  mkdir -p a/b/c/d
+
+  ------[outcome]------
+
+  dax: a,b,c,d
+  no dax:
+
+Example C:
+
+.. code-block:: shell
+
+  mkdir -p a/b/c
+  xfs_io -c 'chattr +x' c
+  mkdir a/b/c/d
+
+  ------[outcome]------
+
+  dax: c,d
+  no dax: a,b
+
+The current enabled state (`S_DAX`) is set when a file inode is instantiated in
+memory by the kernel.  It is set based on the underlying media support, the
+value of `FS_XFLAG_DAX` and the filesystem's dax mount option.
+
+statx can be used to query `S_DAX`.
+
+.. note::
+
+  That only regular files will ever have `S_DAX` set and therefore statx
+  will never indicate that `S_DAX` is set on directories.
+
+Setting the `FS_XFLAG_DAX` flag (specifically or through inheritance) occurs even
+if the underlying media does not support dax and/or the filesystem is
+overridden with a mount option.
+
+
+Implementation Tips for Block Driver Writers
+--------------------------------------------
+
+To support `DAX` in your block driver, implement the 'direct_access'
+block device operation.  It is used to translate the sector number
+(expressed in units of 512-byte sectors) to a page frame number (pfn)
+that identifies the physical page for the memory.  It also returns a
+kernel virtual address that can be used to access the memory.
+
+The direct_access method takes a 'size' parameter that indicates the
+number of bytes being requested.  The function should return the number
+of bytes that can be contiguously accessed at that offset.  It may also
+return a negative errno if an error occurs.
+
+In order to support this method, the storage must be byte-accessible by
+the CPU at all times.  If your device uses paging techniques to expose
+a large amount of memory through a smaller window, then you cannot
+implement direct_access.  Equally, if your device can occasionally
+stall the CPU for an extended period, you should also not attempt to
+implement direct_access.
+
+These block devices may be used for inspiration:
+- brd: RAM backed block device driver
+- dcssblk: s390 dcss block device driver
+- pmem: NVDIMM persistent memory driver
+
+
+Implementation Tips for Filesystem Writers
+------------------------------------------
+
+Filesystem support consists of:
+
+* Adding support to mark inodes as being `DAX` by setting the `S_DAX` flag in
+  i_flags
+* Implementing ->read_iter and ->write_iter operations which use
+  :c:func:`dax_iomap_rw()` when inode has `S_DAX` flag set
+* Implementing an mmap file operation for `DAX` files which sets the
+  `VM_MIXEDMAP` and `VM_HUGEPAGE` flags on the `VMA`, and setting the vm_ops to
+  include handlers for fault, pmd_fault, page_mkwrite, pfn_mkwrite. These
+  handlers should probably call :c:func:`dax_iomap_fault()` passing the
+  appropriate fault size and iomap operations.
+* Calling :c:func:`iomap_zero_range()` passing appropriate iomap operations
+  instead of :c:func:`block_truncate_page()` for `DAX` files
+* Ensuring that there is sufficient locking between reads, writes,
+  truncates and page faults
+
+The iomap handlers for allocating blocks must make sure that allocated blocks
+are zeroed out and converted to written extents before being returned to avoid
+exposure of uninitialized data through mmap.
+
+These filesystems may be used for inspiration:
+
+.. seealso::
+
+  ext2: see Documentation/filesystems/ext2.rst
+
+.. seealso::
+
+  xfs:  see Documentation/admin-guide/xfs.rst
+
+.. seealso::
+
+  ext4: see Documentation/filesystems/ext4/
+
+
+Handling Media Errors
+---------------------
+
+The libnvdimm subsystem stores a record of known media error locations for
+each pmem block device (in gendisk->badblocks). If we fault at such location,
+or one with a latent error not yet discovered, the application can expect
+to receive a `SIGBUS`. Libnvdimm also allows clearing of these errors by simply
+writing the affected sectors (through the pmem driver, and if the underlying
+NVDIMM supports the clear_poison DSM defined by ACPI).
+
+Since `DAX` IO normally doesn't go through the ``driver/bio`` path, applications or
+sysadmins have an option to restore the lost data from a prior ``backup/inbuilt``
+redundancy in the following ways:
+
+1. Delete the affected file, and restore from a backup (sysadmin route):
+   This will free the filesystem blocks that were being used by the file,
+   and the next time they're allocated, they will be zeroed first, which
+   happens through the driver, and will clear bad sectors.
+
+2. Truncate or hole-punch the part of the file that has a bad-block (at least
+   an entire aligned sector has to be hole-punched, but not necessarily an
+   entire filesystem block).
+
+These are the two basic paths that allow `DAX` filesystems to continue operating
+in the presence of media errors. More robust error recovery mechanisms can be
+built on top of this in the future, for example, involving redundancy/mirroring
+provided at the block layer through DM, or additionally, at the filesystem
+level. These would have to rely on the above two tenets, that error clearing
+can happen either by sending an IO through the driver, or zeroing (also through
+the driver).
+
+
+Shortcomings
+------------
+
+Even if the kernel or its modules are stored on a filesystem that supports
+`DAX` on a block device that supports `DAX`, they will still be copied into RAM.
+
+The DAX code does not work correctly on architectures which have virtually
+mapped caches such as ARM, MIPS and SPARC.
+
+Calling :c:func:`get_user_pages()` on a range of user memory that has been
+mmaped from a `DAX` file will fail when there are no 'struct page' to describe
+those pages.  This problem has been addressed in some device drivers
+by adding optional struct page support for pages under the control of
+the driver (see `CONFIG_NVDIMM_PFN` in ``drivers/nvdimm`` for an example of
+how to do this). In the non struct page cases `O_DIRECT` reads/writes to
+those memory ranges from a non-`DAX` file will fail 
+
+
+.. note::
+
+  `O_DIRECT` reads/writes _of a `DAX` file do work, it is the memory that
+  is being accessed that is key here).  Other things that will not work in
+  the non struct page case include RDMA, :c:func:`sendfile()` and
+  :c:func:`splice()`.
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
deleted file mode 100644
index e03c20564f3a7..0000000000000
--- a/Documentation/filesystems/dax.txt
+++ /dev/null
@@ -1,257 +0,0 @@
-Direct Access for files
------------------------
-
-Motivation
-----------
-
-The page cache is usually used to buffer reads and writes to files.
-It is also used to provide the pages which are mapped into userspace
-by a call to mmap.
-
-For block devices that are memory-like, the page cache pages would be
-unnecessary copies of the original storage.  The DAX code removes the
-extra copy by performing reads and writes directly to the storage device.
-For file mappings, the storage device is mapped directly into userspace.
-
-
-Usage
------
-
-If you have a block device which supports DAX, you can make a filesystem
-on it as usual.  The DAX code currently only supports files with a block
-size equal to your kernel's PAGE_SIZE, so you may need to specify a block
-size when creating the filesystem.
-
-Currently 3 filesystems support DAX: ext2, ext4 and xfs.  Enabling DAX on them
-is different.
-
-Enabling DAX on ext2
------------------------------
-
-When mounting the filesystem, use the "-o dax" option on the command line or
-add 'dax' to the options in /etc/fstab.  This works to enable DAX on all files
-within the filesystem.  It is equivalent to the '-o dax=always' behavior below.
-
-
-Enabling DAX on xfs and ext4
-----------------------------
-
-Summary
--------
-
- 1. There exists an in-kernel file access mode flag S_DAX that corresponds to
-    the statx flag STATX_ATTR_DAX.  See the manpage for statx(2) for details
-    about this access mode.
-
- 2. There exists a persistent flag FS_XFLAG_DAX that can be applied to regular
-    files and directories. This advisory flag can be set or cleared at any
-    time, but doing so does not immediately affect the S_DAX state.
-
- 3. If the persistent FS_XFLAG_DAX flag is set on a directory, this flag will
-    be inherited by all regular files and subdirectories that are subsequently
-    created in this directory. Files and subdirectories that exist at the time
-    this flag is set or cleared on the parent directory are not modified by
-    this modification of the parent directory.
-
- 4. There exist dax mount options which can override FS_XFLAG_DAX in the
-    setting of the S_DAX flag.  Given underlying storage which supports DAX the
-    following hold:
-
-    "-o dax=inode"  means "follow FS_XFLAG_DAX" and is the default.
-
-    "-o dax=never"  means "never set S_DAX, ignore FS_XFLAG_DAX."
-
-    "-o dax=always" means "always set S_DAX ignore FS_XFLAG_DAX."
-
-    "-o dax"        is a legacy option which is an alias for "dax=always".
-		    This may be removed in the future so "-o dax=always" is
-		    the preferred method for specifying this behavior.
-
-    NOTE: Modifications to and the inheritance behavior of FS_XFLAG_DAX remain
-    the same even when the filesystem is mounted with a dax option.  However,
-    in-core inode state (S_DAX) will be overridden until the filesystem is
-    remounted with dax=inode and the inode is evicted from kernel memory.
-
- 5. The S_DAX policy can be changed via:
-
-    a) Setting the parent directory FS_XFLAG_DAX as needed before files are
-       created
-
-    b) Setting the appropriate dax="foo" mount option
-
-    c) Changing the FS_XFLAG_DAX flag on existing regular files and
-       directories.  This has runtime constraints and limitations that are
-       described in 6) below.
-
- 6. When changing the S_DAX policy via toggling the persistent FS_XFLAG_DAX
-    flag, the change to existing regular files won't take effect until the
-    files are closed by all processes.
-
-
-Details
--------
-
-There are 2 per-file dax flags.  One is a persistent inode setting (FS_XFLAG_DAX)
-and the other is a volatile flag indicating the active state of the feature
-(S_DAX).
-
-FS_XFLAG_DAX is preserved within the filesystem.  This persistent config
-setting can be set, cleared and/or queried using the FS_IOC_FS[GS]ETXATTR ioctl
-(see ioctl_xfs_fsgetxattr(2)) or an utility such as 'xfs_io'.
-
-New files and directories automatically inherit FS_XFLAG_DAX from
-their parent directory _when_ _created_.  Therefore, setting FS_XFLAG_DAX at
-directory creation time can be used to set a default behavior for an entire
-sub-tree.
-
-To clarify inheritance, here are 3 examples:
-
-Example A:
-
-mkdir -p a/b/c
-xfs_io -c 'chattr +x' a
-mkdir a/b/c/d
-mkdir a/e
-
-	dax: a,e
-	no dax: b,c,d
-
-Example B:
-
-mkdir a
-xfs_io -c 'chattr +x' a
-mkdir -p a/b/c/d
-
-	dax: a,b,c,d
-	no dax:
-
-Example C:
-
-mkdir -p a/b/c
-xfs_io -c 'chattr +x' c
-mkdir a/b/c/d
-
-	dax: c,d
-	no dax: a,b
-
-
-The current enabled state (S_DAX) is set when a file inode is instantiated in
-memory by the kernel.  It is set based on the underlying media support, the
-value of FS_XFLAG_DAX and the filesystem's dax mount option.
-
-statx can be used to query S_DAX.  NOTE that only regular files will ever have
-S_DAX set and therefore statx will never indicate that S_DAX is set on
-directories.
-
-Setting the FS_XFLAG_DAX flag (specifically or through inheritance) occurs even
-if the underlying media does not support dax and/or the filesystem is
-overridden with a mount option.
-
-
-
-Implementation Tips for Block Driver Writers
---------------------------------------------
-
-To support DAX in your block driver, implement the 'direct_access'
-block device operation.  It is used to translate the sector number
-(expressed in units of 512-byte sectors) to a page frame number (pfn)
-that identifies the physical page for the memory.  It also returns a
-kernel virtual address that can be used to access the memory.
-
-The direct_access method takes a 'size' parameter that indicates the
-number of bytes being requested.  The function should return the number
-of bytes that can be contiguously accessed at that offset.  It may also
-return a negative errno if an error occurs.
-
-In order to support this method, the storage must be byte-accessible by
-the CPU at all times.  If your device uses paging techniques to expose
-a large amount of memory through a smaller window, then you cannot
-implement direct_access.  Equally, if your device can occasionally
-stall the CPU for an extended period, you should also not attempt to
-implement direct_access.
-
-These block devices may be used for inspiration:
-- brd: RAM backed block device driver
-- dcssblk: s390 dcss block device driver
-- pmem: NVDIMM persistent memory driver
-
-
-Implementation Tips for Filesystem Writers
-------------------------------------------
-
-Filesystem support consists of
-- adding support to mark inodes as being DAX by setting the S_DAX flag in
-  i_flags
-- implementing ->read_iter and ->write_iter operations which use dax_iomap_rw()
-  when inode has S_DAX flag set
-- implementing an mmap file operation for DAX files which sets the
-  VM_MIXEDMAP and VM_HUGEPAGE flags on the VMA, and setting the vm_ops to
-  include handlers for fault, pmd_fault, page_mkwrite, pfn_mkwrite. These
-  handlers should probably call dax_iomap_fault() passing the appropriate
-  fault size and iomap operations.
-- calling iomap_zero_range() passing appropriate iomap operations instead of
-  block_truncate_page() for DAX files
-- ensuring that there is sufficient locking between reads, writes,
-  truncates and page faults
-
-The iomap handlers for allocating blocks must make sure that allocated blocks
-are zeroed out and converted to written extents before being returned to avoid
-exposure of uninitialized data through mmap.
-
-These filesystems may be used for inspiration:
-- ext2: see Documentation/filesystems/ext2.rst
-- ext4: see Documentation/filesystems/ext4/
-- xfs:  see Documentation/admin-guide/xfs.rst
-
-
-Handling Media Errors
----------------------
-
-The libnvdimm subsystem stores a record of known media error locations for
-each pmem block device (in gendisk->badblocks). If we fault at such location,
-or one with a latent error not yet discovered, the application can expect
-to receive a SIGBUS. Libnvdimm also allows clearing of these errors by simply
-writing the affected sectors (through the pmem driver, and if the underlying
-NVDIMM supports the clear_poison DSM defined by ACPI).
-
-Since DAX IO normally doesn't go through the driver/bio path, applications or
-sysadmins have an option to restore the lost data from a prior backup/inbuilt
-redundancy in the following ways:
-
-1. Delete the affected file, and restore from a backup (sysadmin route):
-   This will free the filesystem blocks that were being used by the file,
-   and the next time they're allocated, they will be zeroed first, which
-   happens through the driver, and will clear bad sectors.
-
-2. Truncate or hole-punch the part of the file that has a bad-block (at least
-   an entire aligned sector has to be hole-punched, but not necessarily an
-   entire filesystem block).
-
-These are the two basic paths that allow DAX filesystems to continue operating
-in the presence of media errors. More robust error recovery mechanisms can be
-built on top of this in the future, for example, involving redundancy/mirroring
-provided at the block layer through DM, or additionally, at the filesystem
-level. These would have to rely on the above two tenets, that error clearing
-can happen either by sending an IO through the driver, or zeroing (also through
-the driver).
-
-
-Shortcomings
-------------
-
-Even if the kernel or its modules are stored on a filesystem that supports
-DAX on a block device that supports DAX, they will still be copied into RAM.
-
-The DAX code does not work correctly on architectures which have virtually
-mapped caches such as ARM, MIPS and SPARC.
-
-Calling get_user_pages() on a range of user memory that has been mmaped
-from a DAX file will fail when there are no 'struct page' to describe
-those pages.  This problem has been addressed in some device drivers
-by adding optional struct page support for pages under the control of
-the driver (see CONFIG_NVDIMM_PFN in drivers/nvdimm for an example of
-how to do this). In the non struct page cases O_DIRECT reads/writes to
-those memory ranges from a non-DAX file will fail (note that O_DIRECT
-reads/writes _of a DAX file_ do work, it is the memory that is being
-accessed that is key here).  Other things that will not work in the
-non struct page case include RDMA, sendfile() and splice().
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index d4853cb919d29..246af51b277a6 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -77,6 +77,7 @@ Documentation for filesystem implementations.
    coda
    configfs
    cramfs
+   dax
    debugfs
    dlmfs
    ecryptfs
-- 
GitLab


From 814be609baae62aaa6c02fa6f3ad66cff32a6d15 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Thu, 3 Jun 2021 16:34:51 +0800
Subject: [PATCH 2281/3804] drivers/perf: hisi: Fix data source control

'Data source' is a new function for HHA PMU and config / clear
interface was wrong by mistake. 'HHA_DATSRC_CTRL' register is
mainly used for data source configuration, if we enable bit0
as driver, it will go on count the event and we didn't check
it carefully. So fix the issue and do as the initial purpose.

Fixes: 932f6a99f9b0 ("drivers/perf: hisi: Add new functions for HHA PMU")
Reported-by: kernel test robot <lkp@intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Link: https://lore.kernel.org/r/1622709291-37996-1-git-send-email-zhangshaokun@hisilicon.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index 12b2c5e6d488d..393513150106a 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -90,7 +90,7 @@ static void hisi_hha_pmu_config_ds(struct perf_event *event)
 
 		val = readl(hha_pmu->base + HHA_DATSRC_CTRL);
 		val |= HHA_DATSRC_SKT_EN;
-		writel(ds_skt, hha_pmu->base + HHA_DATSRC_CTRL);
+		writel(val, hha_pmu->base + HHA_DATSRC_CTRL);
 	}
 }
 
@@ -104,7 +104,7 @@ static void hisi_hha_pmu_clear_ds(struct perf_event *event)
 
 		val = readl(hha_pmu->base + HHA_DATSRC_CTRL);
 		val &= ~HHA_DATSRC_SKT_EN;
-		writel(ds_skt, hha_pmu->base + HHA_DATSRC_CTRL);
+		writel(val, hha_pmu->base + HHA_DATSRC_CTRL);
 	}
 }
 
-- 
GitLab


From 92638b4e1b47f97d7269e74465dedf73096f777d Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Wed, 2 Jun 2021 16:52:27 -0700
Subject: [PATCH 2282/3804] mm: arch: remove indirection level in
 alloc_zeroed_user_highpage_movable()

In an upcoming change we would like to add a flag to
GFP_HIGHUSER_MOVABLE so that it would no longer be an OR
of GFP_HIGHUSER and __GFP_MOVABLE. This poses a problem for
alloc_zeroed_user_highpage_movable() which passes __GFP_MOVABLE
into an arch-specific __alloc_zeroed_user_highpage() hook which ORs
in GFP_HIGHUSER.

Since __alloc_zeroed_user_highpage() is only ever called from
alloc_zeroed_user_highpage_movable(), we can remove one level
of indirection here. Remove __alloc_zeroed_user_highpage(),
make alloc_zeroed_user_highpage_movable() the hook, and use
GFP_HIGHUSER_MOVABLE in the hook implementations so that they will
pick up the new flag that we are going to add.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Ic6361c657b2cdcd896adbe0cf7cb5a7fbb1ed7bf
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210602235230.3928842-2-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/alpha/include/asm/page.h   |  6 +++---
 arch/arm64/include/asm/page.h   |  6 +++---
 arch/ia64/include/asm/page.h    |  6 +++---
 arch/m68k/include/asm/page_no.h |  6 +++---
 arch/s390/include/asm/page.h    |  6 +++---
 arch/x86/include/asm/page.h     |  6 +++---
 include/linux/highmem.h         | 35 ++++++++-------------------------
 7 files changed, 26 insertions(+), 45 deletions(-)

diff --git a/arch/alpha/include/asm/page.h b/arch/alpha/include/asm/page.h
index 268f99b4602bd..18f48a6f2ff6d 100644
--- a/arch/alpha/include/asm/page.h
+++ b/arch/alpha/include/asm/page.h
@@ -17,9 +17,9 @@
 extern void clear_page(void *page);
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vmaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
+	alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vmaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
 
 extern void copy_page(void * _to, void * _from);
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 012cffc574e89..e1fc0f60e79f8 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -28,9 +28,9 @@ void copy_user_highpage(struct page *to, struct page *from,
 void copy_highpage(struct page *to, struct page *from);
 #define __HAVE_ARCH_COPY_HIGHPAGE
 
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
+	alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
 
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h
index f4dc81fa71462..1b990466d5404 100644
--- a/arch/ia64/include/asm/page.h
+++ b/arch/ia64/include/asm/page.h
@@ -82,16 +82,16 @@ do {						\
 } while (0)
 
 
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr)		\
+#define alloc_zeroed_user_highpage_movable(vma, vaddr)			\
 ({									\
 	struct page *page = alloc_page_vma(				\
-		GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr);	\
+		GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr);		\
 	if (page)							\
  		flush_dcache_page(page);				\
 	page;								\
 })
 
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
 
 #define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index 8d0f862ee9d79..c9d0d84158a4e 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -13,9 +13,9 @@ extern unsigned long memory_end;
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
+	alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
 
 #define __pa(vaddr)		((unsigned long)(vaddr))
 #define __va(paddr)		((void *)((unsigned long)(paddr)))
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index cc98f9b78fd4b..479dc76e0eca1 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -68,9 +68,9 @@ static inline void copy_page(void *to, void *from)
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
+	alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
 
 /*
  * These are used to make use of C type-checking..
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 7555b48803a8c..4d5810c8fab74 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -34,9 +34,9 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
 	copy_page(to, from);
 }
 
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
+	alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
+#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
 
 #ifndef __pa
 #define __pa(x)		__phys_addr((unsigned long)(x))
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 832b49b50c7bf..54d0643b8fcfb 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -152,28 +152,24 @@ static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
 }
 #endif
 
-#ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
 /**
- * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags
- * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE
+ * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move
  * @vma: The VMA the page is to be allocated for
  * @vaddr: The virtual address the page will be inserted into
  *
- * This function will allocate a page for a VMA but the caller is expected
- * to specify via movableflags whether the page will be movable in the
- * future or not
+ * This function will allocate a page for a VMA that the caller knows will
+ * be able to migrate in the future using move_pages() or reclaimed
  *
  * An architecture may override this function by defining
- * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own
+ * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE and providing their own
  * implementation.
  */
 static inline struct page *
-__alloc_zeroed_user_highpage(gfp_t movableflags,
-			struct vm_area_struct *vma,
-			unsigned long vaddr)
+alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+				   unsigned long vaddr)
 {
-	struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
-			vma, vaddr);
+	struct page *page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
 
 	if (page)
 		clear_user_highpage(page, vaddr);
@@ -182,21 +178,6 @@ __alloc_zeroed_user_highpage(gfp_t movableflags,
 }
 #endif
 
-/**
- * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move
- * @vma: The VMA the page is to be allocated for
- * @vaddr: The virtual address the page will be inserted into
- *
- * This function will allocate a page for a VMA that the caller knows will
- * be able to migrate in the future using move_pages() or reclaimed
- */
-static inline struct page *
-alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
-					unsigned long vaddr)
-{
-	return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
-}
-
 static inline void clear_highpage(struct page *page)
 {
 	void *kaddr = kmap_atomic(page);
-- 
GitLab


From 7a3b835371883558eb63e069d891bd87f562380d Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Wed, 2 Jun 2021 16:52:28 -0700
Subject: [PATCH 2283/3804] kasan: use separate (un)poison implementation for
 integrated init

Currently with integrated init page_alloc.c needs to know whether
kasan_alloc_pages() will zero initialize memory, but this will start
becoming more complicated once we start adding tag initialization
support for user pages. To avoid page_alloc.c needing to know more
details of what integrated init will do, move the unpoisoning logic
for integrated init into the HW tags implementation. Currently the
logic is identical but it will diverge in subsequent patches.

For symmetry do the same for poisoning although this logic will
be unaffected by subsequent patches.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://linux-review.googlesource.com/id/I2c550234c6c4a893c48c18ff0c6ce658c7c67056
Link: https://lore.kernel.org/r/20210602235230.3928842-3-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/kasan.h | 64 +++++++++++++++++++++++++------------------
 mm/kasan/common.c     |  4 +--
 mm/kasan/hw_tags.c    | 22 +++++++++++++++
 mm/mempool.c          |  6 ++--
 mm/page_alloc.c       | 55 +++++++++++++++++++------------------
 5 files changed, 95 insertions(+), 56 deletions(-)

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index b1678a61e6a76..a1c7ce5f3e4f2 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_KASAN_H
 #define _LINUX_KASAN_H
 
+#include <linux/bug.h>
 #include <linux/static_key.h>
 #include <linux/types.h>
 
@@ -79,14 +80,6 @@ static inline void kasan_disable_current(void) {}
 
 #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
-#ifdef CONFIG_KASAN
-
-struct kasan_cache {
-	int alloc_meta_offset;
-	int free_meta_offset;
-	bool is_kmalloc;
-};
-
 #ifdef CONFIG_KASAN_HW_TAGS
 
 DECLARE_STATIC_KEY_FALSE(kasan_flag_enabled);
@@ -101,11 +94,14 @@ static inline bool kasan_has_integrated_init(void)
 	return kasan_enabled();
 }
 
+void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags);
+void kasan_free_pages(struct page *page, unsigned int order);
+
 #else /* CONFIG_KASAN_HW_TAGS */
 
 static inline bool kasan_enabled(void)
 {
-	return true;
+	return IS_ENABLED(CONFIG_KASAN);
 }
 
 static inline bool kasan_has_integrated_init(void)
@@ -113,8 +109,30 @@ static inline bool kasan_has_integrated_init(void)
 	return false;
 }
 
+static __always_inline void kasan_alloc_pages(struct page *page,
+					      unsigned int order, gfp_t flags)
+{
+	/* Only available for integrated init. */
+	BUILD_BUG();
+}
+
+static __always_inline void kasan_free_pages(struct page *page,
+					     unsigned int order)
+{
+	/* Only available for integrated init. */
+	BUILD_BUG();
+}
+
 #endif /* CONFIG_KASAN_HW_TAGS */
 
+#ifdef CONFIG_KASAN
+
+struct kasan_cache {
+	int alloc_meta_offset;
+	int free_meta_offset;
+	bool is_kmalloc;
+};
+
 slab_flags_t __kasan_never_merge(void);
 static __always_inline slab_flags_t kasan_never_merge(void)
 {
@@ -130,20 +148,20 @@ static __always_inline void kasan_unpoison_range(const void *addr, size_t size)
 		__kasan_unpoison_range(addr, size);
 }
 
-void __kasan_alloc_pages(struct page *page, unsigned int order, bool init);
-static __always_inline void kasan_alloc_pages(struct page *page,
+void __kasan_poison_pages(struct page *page, unsigned int order, bool init);
+static __always_inline void kasan_poison_pages(struct page *page,
 						unsigned int order, bool init)
 {
 	if (kasan_enabled())
-		__kasan_alloc_pages(page, order, init);
+		__kasan_poison_pages(page, order, init);
 }
 
-void __kasan_free_pages(struct page *page, unsigned int order, bool init);
-static __always_inline void kasan_free_pages(struct page *page,
-						unsigned int order, bool init)
+void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init);
+static __always_inline void kasan_unpoison_pages(struct page *page,
+						 unsigned int order, bool init)
 {
 	if (kasan_enabled())
-		__kasan_free_pages(page, order, init);
+		__kasan_unpoison_pages(page, order, init);
 }
 
 void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
@@ -285,21 +303,15 @@ void kasan_restore_multi_shot(bool enabled);
 
 #else /* CONFIG_KASAN */
 
-static inline bool kasan_enabled(void)
-{
-	return false;
-}
-static inline bool kasan_has_integrated_init(void)
-{
-	return false;
-}
 static inline slab_flags_t kasan_never_merge(void)
 {
 	return 0;
 }
 static inline void kasan_unpoison_range(const void *address, size_t size) {}
-static inline void kasan_alloc_pages(struct page *page, unsigned int order, bool init) {}
-static inline void kasan_free_pages(struct page *page, unsigned int order, bool init) {}
+static inline void kasan_poison_pages(struct page *page, unsigned int order,
+				      bool init) {}
+static inline void kasan_unpoison_pages(struct page *page, unsigned int order,
+					bool init) {}
 static inline void kasan_cache_create(struct kmem_cache *cache,
 				      unsigned int *size,
 				      slab_flags_t *flags) {}
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 6bb87f2acd4eb..0ecd293af3443 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -97,7 +97,7 @@ slab_flags_t __kasan_never_merge(void)
 	return 0;
 }
 
-void __kasan_alloc_pages(struct page *page, unsigned int order, bool init)
+void __kasan_unpoison_pages(struct page *page, unsigned int order, bool init)
 {
 	u8 tag;
 	unsigned long i;
@@ -111,7 +111,7 @@ void __kasan_alloc_pages(struct page *page, unsigned int order, bool init)
 	kasan_unpoison(page_address(page), PAGE_SIZE << order, init);
 }
 
-void __kasan_free_pages(struct page *page, unsigned int order, bool init)
+void __kasan_poison_pages(struct page *page, unsigned int order, bool init)
 {
 	if (likely(!PageHighMem(page)))
 		kasan_poison(page_address(page), PAGE_SIZE << order,
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 4004388b4e4bc..9d0f6f9340167 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -238,6 +238,28 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
 	return &alloc_meta->free_track[0];
 }
 
+void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags)
+{
+	/*
+	 * This condition should match the one in post_alloc_hook() in
+	 * page_alloc.c.
+	 */
+	bool init = !want_init_on_free() && want_init_on_alloc(flags);
+
+	kasan_unpoison_pages(page, order, init);
+}
+
+void kasan_free_pages(struct page *page, unsigned int order)
+{
+	/*
+	 * This condition should match the one in free_pages_prepare() in
+	 * page_alloc.c.
+	 */
+	bool init = want_init_on_free();
+
+	kasan_poison_pages(page, order, init);
+}
+
 #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
 
 void kasan_set_tagging_report_once(bool state)
diff --git a/mm/mempool.c b/mm/mempool.c
index a258cf4de5756..0b8afbec3e358 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -106,7 +106,8 @@ static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
 	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
 		kasan_slab_free_mempool(element);
 	else if (pool->alloc == mempool_alloc_pages)
-		kasan_free_pages(element, (unsigned long)pool->pool_data, false);
+		kasan_poison_pages(element, (unsigned long)pool->pool_data,
+				   false);
 }
 
 static void kasan_unpoison_element(mempool_t *pool, void *element)
@@ -114,7 +115,8 @@ static void kasan_unpoison_element(mempool_t *pool, void *element)
 	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
 		kasan_unpoison_range(element, __ksize(element));
 	else if (pool->alloc == mempool_alloc_pages)
-		kasan_alloc_pages(element, (unsigned long)pool->pool_data, false);
+		kasan_unpoison_pages(element, (unsigned long)pool->pool_data,
+				     false);
 }
 
 static __always_inline void add_element(mempool_t *pool, void *element)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index aaa1655cf6820..4fddb7cac3c69 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -382,7 +382,7 @@ int page_group_by_mobility_disabled __read_mostly;
 static DEFINE_STATIC_KEY_TRUE(deferred_pages);
 
 /*
- * Calling kasan_free_pages() only after deferred memory initialization
+ * Calling kasan_poison_pages() only after deferred memory initialization
  * has completed. Poisoning pages during deferred memory init will greatly
  * lengthen the process and cause problem in large memory systems as the
  * deferred pages initialization is done with interrupt disabled.
@@ -394,15 +394,11 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages);
  * on-demand allocation and then freed again before the deferred pages
  * initialization is done, but this is not likely to happen.
  */
-static inline void kasan_free_nondeferred_pages(struct page *page, int order,
-						bool init, fpi_t fpi_flags)
+static inline bool should_skip_kasan_poison(fpi_t fpi_flags)
 {
-	if (static_branch_unlikely(&deferred_pages))
-		return;
-	if (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
-			(fpi_flags & FPI_SKIP_KASAN_POISON))
-		return;
-	kasan_free_pages(page, order, init);
+	return static_branch_unlikely(&deferred_pages) ||
+	       (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+		(fpi_flags & FPI_SKIP_KASAN_POISON));
 }
 
 /* Returns true if the struct page for the pfn is uninitialised */
@@ -453,13 +449,10 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 	return false;
 }
 #else
-static inline void kasan_free_nondeferred_pages(struct page *page, int order,
-						bool init, fpi_t fpi_flags)
+static inline bool should_skip_kasan_poison(fpi_t fpi_flags)
 {
-	if (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
-			(fpi_flags & FPI_SKIP_KASAN_POISON))
-		return;
-	kasan_free_pages(page, order, init);
+	return (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
+		(fpi_flags & FPI_SKIP_KASAN_POISON));
 }
 
 static inline bool early_page_uninitialised(unsigned long pfn)
@@ -1245,7 +1238,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 			unsigned int order, bool check_free, fpi_t fpi_flags)
 {
 	int bad = 0;
-	bool init;
+	bool skip_kasan_poison = should_skip_kasan_poison(fpi_flags);
 
 	VM_BUG_ON_PAGE(PageTail(page), page);
 
@@ -1314,10 +1307,17 @@ static __always_inline bool free_pages_prepare(struct page *page,
 	 * With hardware tag-based KASAN, memory tags must be set before the
 	 * page becomes unavailable via debug_pagealloc or arch_free_page.
 	 */
-	init = want_init_on_free();
-	if (init && !kasan_has_integrated_init())
-		kernel_init_free_pages(page, 1 << order);
-	kasan_free_nondeferred_pages(page, order, init, fpi_flags);
+	if (kasan_has_integrated_init()) {
+		if (!skip_kasan_poison)
+			kasan_free_pages(page, order);
+	} else {
+		bool init = want_init_on_free();
+
+		if (init)
+			kernel_init_free_pages(page, 1 << order);
+		if (!skip_kasan_poison)
+			kasan_poison_pages(page, order, init);
+	}
 
 	/*
 	 * arch_free_page() can make the page's contents inaccessible.  s390
@@ -2324,8 +2324,6 @@ static bool check_new_pages(struct page *page, unsigned int order)
 inline void post_alloc_hook(struct page *page, unsigned int order,
 				gfp_t gfp_flags)
 {
-	bool init;
-
 	set_page_private(page, 0);
 	set_page_refcounted(page);
 
@@ -2344,10 +2342,15 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 	 * kasan_alloc_pages and kernel_init_free_pages must be
 	 * kept together to avoid discrepancies in behavior.
 	 */
-	init = !want_init_on_free() && want_init_on_alloc(gfp_flags);
-	kasan_alloc_pages(page, order, init);
-	if (init && !kasan_has_integrated_init())
-		kernel_init_free_pages(page, 1 << order);
+	if (kasan_has_integrated_init()) {
+		kasan_alloc_pages(page, order, gfp_flags);
+	} else {
+		bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags);
+
+		kasan_unpoison_pages(page, order, init);
+		if (init)
+			kernel_init_free_pages(page, 1 << order);
+	}
 
 	set_page_owner(page, order, gfp_flags);
 }
-- 
GitLab


From 013bb59dbb7cf876449df860946458a595a96d51 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Wed, 2 Jun 2021 16:52:29 -0700
Subject: [PATCH 2284/3804] arm64: mte: handle tags zeroing at page allocation
 time

Currently, on an anonymous page fault, the kernel allocates a zeroed
page and maps it in user space. If the mapping is tagged (PROT_MTE),
set_pte_at() additionally clears the tags. It is, however, more
efficient to clear the tags at the same time as zeroing the data on
allocation. To avoid clearing the tags on any page (which may not be
mapped as tagged), only do this if the vma flags contain VM_MTE. This
requires introducing a new GFP flag that is used to determine whether
to clear the tags.

The DC GZVA instruction with a 0 top byte (and 0 tag) requires
top-byte-ignore. Set the TCR_EL1.{TBI1,TBID1} bits irrespective of
whether KASAN_HW is enabled.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://linux-review.googlesource.com/id/Id46dc94e30fe11474f7e54f5d65e7658dbdddb26
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://lore.kernel.org/r/20210602235230.3928842-4-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mte.h  |  4 ++++
 arch/arm64/include/asm/page.h |  8 ++++++--
 arch/arm64/lib/mte.S          | 20 ++++++++++++++++++++
 arch/arm64/mm/fault.c         | 26 ++++++++++++++++++++++++++
 arch/arm64/mm/proc.S          | 10 +++++++---
 include/linux/gfp.h           |  9 +++++++--
 include/linux/highmem.h       |  8 ++++++++
 mm/kasan/hw_tags.c            |  9 ++++++++-
 mm/page_alloc.c               | 13 ++++++++++---
 9 files changed, 96 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index bc88a1ced0d7e..67bf259ae7689 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -37,6 +37,7 @@ void mte_free_tag_storage(char *storage);
 /* track which pages have valid allocation tags */
 #define PG_mte_tagged	PG_arch_2
 
+void mte_zero_clear_page_tags(void *addr);
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void mte_thread_init_user(void);
@@ -53,6 +54,9 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
 /* unused if !CONFIG_ARM64_MTE, silence the compiler */
 #define PG_mte_tagged	0
 
+static inline void mte_zero_clear_page_tags(void *addr)
+{
+}
 static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
 {
 }
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index e1fc0f60e79f8..ed1b9dcf12b29 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -13,6 +13,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/personality.h> /* for READ_IMPLIES_EXEC */
+#include <linux/types.h> /* for gfp_t */
 #include <asm/pgtable-types.h>
 
 struct page;
@@ -28,10 +29,13 @@ void copy_user_highpage(struct page *to, struct page *from,
 void copy_highpage(struct page *to, struct page *from);
 #define __HAVE_ARCH_COPY_HIGHPAGE
 
-#define alloc_zeroed_user_highpage_movable(vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, vma, vaddr)
+struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+						unsigned long vaddr);
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE_MOVABLE
 
+void tag_clear_highpage(struct page *to);
+#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
+
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 351537c12f36e..e83643b3995f4 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -36,6 +36,26 @@ SYM_FUNC_START(mte_clear_page_tags)
 	ret
 SYM_FUNC_END(mte_clear_page_tags)
 
+/*
+ * Zero the page and tags at the same time
+ *
+ * Parameters:
+ *	x0 - address to the beginning of the page
+ */
+SYM_FUNC_START(mte_zero_clear_page_tags)
+	mrs	x1, dczid_el0
+	and	w1, w1, #0xf
+	mov	x2, #4
+	lsl	x1, x2, x1
+	and	x0, x0, #(1 << MTE_TAG_SHIFT) - 1	// clear the tag
+
+1:	dc	gzva, x0
+	add	x0, x0, x1
+	tst	x0, #(PAGE_SIZE - 1)
+	b.ne	1b
+	ret
+SYM_FUNC_END(mte_zero_clear_page_tags)
+
 /*
  * Copy the tags from the source page to the destination one
  *   x0 - address of the destination page
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 871c82ab0a309..180c0343d82a2 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -921,3 +921,29 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
 	debug_exception_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug_exception);
+
+/*
+ * Used during anonymous page fault handling.
+ */
+struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
+						unsigned long vaddr)
+{
+	gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_ZERO;
+
+	/*
+	 * If the page is mapped with PROT_MTE, initialise the tags at the
+	 * point of allocation and page zeroing as this is usually faster than
+	 * separate DC ZVA and STGM.
+	 */
+	if (vma->vm_flags & VM_MTE)
+		flags |= __GFP_ZEROTAGS;
+
+	return alloc_page_vma(flags, vma, vaddr);
+}
+
+void tag_clear_highpage(struct page *page)
+{
+	mte_zero_clear_page_tags(page_address(page));
+	page_kasan_tag_reset(page);
+	set_bit(PG_mte_tagged, &page->flags);
+}
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 97d7bcd8d4f26..48fd1df3d05a2 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -46,9 +46,13 @@
 #endif
 
 #ifdef CONFIG_KASAN_HW_TAGS
-#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
+#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
 #else
-#define TCR_KASAN_HW_FLAGS 0
+/*
+ * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
+ * TBI being enabled at EL1.
+ */
+#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1
 #endif
 
 /*
@@ -464,7 +468,7 @@ SYM_FUNC_START(__cpu_setup)
 	msr_s	SYS_TFSRE0_EL1, xzr
 
 	/* set the TCR_EL1 bits */
-	mov_q	x10, TCR_KASAN_HW_FLAGS
+	mov_q	x10, TCR_MTE_FLAGS
 	orr	tcr, tcr, x10
 1:
 #endif
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 11da8af067040..68ba237365dcc 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -53,8 +53,9 @@ struct vm_area_struct;
 #define ___GFP_HARDWALL		0x100000u
 #define ___GFP_THISNODE		0x200000u
 #define ___GFP_ACCOUNT		0x400000u
+#define ___GFP_ZEROTAGS		0x800000u
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP	0x800000u
+#define ___GFP_NOLOCKDEP	0x1000000u
 #else
 #define ___GFP_NOLOCKDEP	0
 #endif
@@ -229,16 +230,20 @@ struct vm_area_struct;
  * %__GFP_COMP address compound page metadata.
  *
  * %__GFP_ZERO returns a zeroed page on success.
+ *
+ * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if
+ * __GFP_ZERO is set.
  */
 #define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
 #define __GFP_COMP	((__force gfp_t)___GFP_COMP)
 #define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)
+#define __GFP_ZEROTAGS	((__force gfp_t)___GFP_ZEROTAGS)
 
 /* Disable lockdep for GFP context tracking */
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
 
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP))
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /**
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 54d0643b8fcfb..8c6e8e996c875 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -185,6 +185,14 @@ static inline void clear_highpage(struct page *page)
 	kunmap_atomic(kaddr);
 }
 
+#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
+
+static inline void tag_clear_highpage(struct page *page)
+{
+}
+
+#endif
+
 /*
  * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
  * If we pass in a head page, we can zero up to the size of the compound page.
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 9d0f6f9340167..41fd5326ee0ab 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -246,7 +246,14 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags)
 	 */
 	bool init = !want_init_on_free() && want_init_on_alloc(flags);
 
-	kasan_unpoison_pages(page, order, init);
+	if (flags & __GFP_ZEROTAGS) {
+		int i;
+
+		for (i = 0; i != 1 << order; ++i)
+			tag_clear_highpage(page + i);
+	} else {
+		kasan_unpoison_pages(page, order, init);
+	}
 }
 
 void kasan_free_pages(struct page *page, unsigned int order)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4fddb7cac3c69..13937e793fdaf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1219,10 +1219,16 @@ out:
 	return ret;
 }
 
-static void kernel_init_free_pages(struct page *page, int numpages)
+static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags)
 {
 	int i;
 
+	if (zero_tags) {
+		for (i = 0; i < numpages; i++)
+			tag_clear_highpage(page + i);
+		return;
+	}
+
 	/* s390's use of memset() could override KASAN redzones. */
 	kasan_disable_current();
 	for (i = 0; i < numpages; i++) {
@@ -1314,7 +1320,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 		bool init = want_init_on_free();
 
 		if (init)
-			kernel_init_free_pages(page, 1 << order);
+			kernel_init_free_pages(page, 1 << order, false);
 		if (!skip_kasan_poison)
 			kasan_poison_pages(page, order, init);
 	}
@@ -2349,7 +2355,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 
 		kasan_unpoison_pages(page, order, init);
 		if (init)
-			kernel_init_free_pages(page, 1 << order);
+			kernel_init_free_pages(page, 1 << order,
+					       gfp_flags & __GFP_ZEROTAGS);
 	}
 
 	set_page_owner(page, order, gfp_flags);
-- 
GitLab


From c275c5c6d50a0518cdb0584e85905d10e7cefc6e Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Wed, 2 Jun 2021 16:52:30 -0700
Subject: [PATCH 2285/3804] kasan: disable freed user page poisoning with HW
 tags

Poisoning freed pages protects against kernel use-after-free. The
likelihood of such a bug involving kernel pages is significantly higher
than that for user pages. At the same time, poisoning freed pages can
impose a significant performance cost, which cannot always be justified
for user pages given the lower probability of finding a bug. Therefore,
disable freed user page poisoning when using HW tags. We identify
"user" pages via the flag set GFP_HIGHUSER_MOVABLE, which indicates
a strong likelihood of not being directly accessible to the kernel.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://linux-review.googlesource.com/id/I716846e2de8ef179f44e835770df7e6307be96c9
Link: https://lore.kernel.org/r/20210602235230.3928842-5-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/gfp.h            | 13 ++++++++++---
 include/linux/page-flags.h     |  9 +++++++++
 include/trace/events/mmflags.h |  9 ++++++++-
 mm/kasan/hw_tags.c             |  3 +++
 mm/page_alloc.c                | 12 +++++++-----
 5 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 68ba237365dcc..e6102dfa4faad 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -54,8 +54,9 @@ struct vm_area_struct;
 #define ___GFP_THISNODE		0x200000u
 #define ___GFP_ACCOUNT		0x400000u
 #define ___GFP_ZEROTAGS		0x800000u
+#define ___GFP_SKIP_KASAN_POISON	0x1000000u
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP	0x1000000u
+#define ___GFP_NOLOCKDEP	0x2000000u
 #else
 #define ___GFP_NOLOCKDEP	0
 #endif
@@ -233,17 +234,22 @@ struct vm_area_struct;
  *
  * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if
  * __GFP_ZERO is set.
+ *
+ * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned
+ * on deallocation. Typically used for userspace pages. Currently only has an
+ * effect in HW tags mode.
  */
 #define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
 #define __GFP_COMP	((__force gfp_t)___GFP_COMP)
 #define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)
 #define __GFP_ZEROTAGS	((__force gfp_t)___GFP_ZEROTAGS)
+#define __GFP_SKIP_KASAN_POISON	((__force gfp_t)___GFP_SKIP_KASAN_POISON)
 
 /* Disable lockdep for GFP context tracking */
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
 
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP))
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /**
@@ -324,7 +330,8 @@ struct vm_area_struct;
 #define GFP_DMA		__GFP_DMA
 #define GFP_DMA32	__GFP_DMA32
 #define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
+#define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE | \
+			 __GFP_SKIP_KASAN_POISON)
 #define GFP_TRANSHUGE_LIGHT	((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
 #define GFP_TRANSHUGE	(GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 04a34c08e0a65..40e2c5000585f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -137,6 +137,9 @@ enum pageflags {
 #endif
 #ifdef CONFIG_64BIT
 	PG_arch_2,
+#endif
+#ifdef CONFIG_KASAN_HW_TAGS
+	PG_skip_kasan_poison,
 #endif
 	__NR_PAGEFLAGS,
 
@@ -443,6 +446,12 @@ TESTCLEARFLAG(Young, young, PF_ANY)
 PAGEFLAG(Idle, idle, PF_ANY)
 #endif
 
+#ifdef CONFIG_KASAN_HW_TAGS
+PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD)
+#else
+PAGEFLAG_FALSE(SkipKASanPoison)
+#endif
+
 /*
  * PageReported() is used to track reported free pages within the Buddy
  * allocator. We can use the non-atomic version of the test and set
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 629c7a0eaff23..390270e00a1d1 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -85,6 +85,12 @@
 #define IF_HAVE_PG_ARCH_2(flag,string)
 #endif
 
+#ifdef CONFIG_KASAN_HW_TAGS
+#define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) ,{1UL << flag, string}
+#else
+#define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string)
+#endif
+
 #define __def_pageflag_names						\
 	{1UL << PG_locked,		"locked"	},		\
 	{1UL << PG_waiters,		"waiters"	},		\
@@ -112,7 +118,8 @@ IF_HAVE_PG_UNCACHED(PG_uncached,	"uncached"	)		\
 IF_HAVE_PG_HWPOISON(PG_hwpoison,	"hwpoison"	)		\
 IF_HAVE_PG_IDLE(PG_young,		"young"		)		\
 IF_HAVE_PG_IDLE(PG_idle,		"idle"		)		\
-IF_HAVE_PG_ARCH_2(PG_arch_2,		"arch_2"	)
+IF_HAVE_PG_ARCH_2(PG_arch_2,		"arch_2"	)		\
+IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison")
 
 #define show_page_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",				\
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 41fd5326ee0ab..ed5e5b833d610 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -246,6 +246,9 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags)
 	 */
 	bool init = !want_init_on_free() && want_init_on_alloc(flags);
 
+	if (flags & __GFP_SKIP_KASAN_POISON)
+		SetPageSkipKASanPoison(page);
+
 	if (flags & __GFP_ZEROTAGS) {
 		int i;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 13937e793fdaf..5ad76e540a226 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -394,11 +394,12 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages);
  * on-demand allocation and then freed again before the deferred pages
  * initialization is done, but this is not likely to happen.
  */
-static inline bool should_skip_kasan_poison(fpi_t fpi_flags)
+static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags)
 {
 	return static_branch_unlikely(&deferred_pages) ||
 	       (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
-		(fpi_flags & FPI_SKIP_KASAN_POISON));
+		(fpi_flags & FPI_SKIP_KASAN_POISON)) ||
+	       PageSkipKASanPoison(page);
 }
 
 /* Returns true if the struct page for the pfn is uninitialised */
@@ -449,10 +450,11 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
 	return false;
 }
 #else
-static inline bool should_skip_kasan_poison(fpi_t fpi_flags)
+static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags)
 {
 	return (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
-		(fpi_flags & FPI_SKIP_KASAN_POISON));
+		(fpi_flags & FPI_SKIP_KASAN_POISON)) ||
+	       PageSkipKASanPoison(page);
 }
 
 static inline bool early_page_uninitialised(unsigned long pfn)
@@ -1244,7 +1246,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 			unsigned int order, bool check_free, fpi_t fpi_flags)
 {
 	int bad = 0;
-	bool skip_kasan_poison = should_skip_kasan_poison(fpi_flags);
+	bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags);
 
 	VM_BUG_ON_PAGE(PageTail(page), page);
 
-- 
GitLab


From 6325ce1542bcee2813558e12055794b7a40d4615 Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Fri, 4 Jun 2021 15:25:40 +0200
Subject: [PATCH 2286/3804] platform/surface: dtx: Add missing mutex_destroy()
 call in failure path

When we fail to open the device file due to DTX being shut down, the
mutex is initialized but never destroyed. We are destroying it when
releasing the file, so add the missing call in the failure path as well.

Fixes: 1d609992832e ("platform/surface: Add DTX driver")
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Link: https://lore.kernel.org/r/20210604132540.533036-1-luzmaximilian@gmail.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/surface/surface_dtx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/surface/surface_dtx.c b/drivers/platform/surface/surface_dtx.c
index 5d9b758a99bbc..1203b9a829939 100644
--- a/drivers/platform/surface/surface_dtx.c
+++ b/drivers/platform/surface/surface_dtx.c
@@ -427,6 +427,7 @@ static int surface_dtx_open(struct inode *inode, struct file *file)
 	 */
 	if (test_bit(SDTX_DEVICE_SHUTDOWN_BIT, &ddev->flags)) {
 		up_write(&ddev->client_lock);
+		mutex_destroy(&client->read_lock);
 		sdtx_device_put(client->ddev);
 		kfree(client);
 		return -ENODEV;
-- 
GitLab


From 701b54bcb7d0d72ee3f032afc900608708409be0 Mon Sep 17 00:00:00 2001
From: Mykola Kostenok <c_mykolak@nvidia.com>
Date: Thu, 3 Jun 2021 20:28:27 +0300
Subject: [PATCH 2287/3804] platform/mellanox: mlxreg-hotplug: Revert "move to
 use request_irq by IRQF_NO_AUTOEN flag"

It causes mlxreg-hotplug probing failure: request_threaded_irq()
 returns -EINVAL due to true value of condition:
((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN))
after flag "IRQF_NO_AUTOEN" has been added to:
	err = devm_request_irq(&pdev->dev, priv->irq,
			       mlxreg_hotplug_irq_handler, IRQF_TRIGGER_FALLING
			       | IRQF_SHARED | IRQF_NO_AUTOEN,
			       "mlxreg-hotplug", priv);

This reverts commit bee3ecfed0fc ("platform/mellanox: mlxreg-hotplug: move
to use request_irq by IRQF_NO_AUTOEN flag").

Signed-off-by: Mykola Kostenok <c_mykolak@nvidia.com>
Acked-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/20210603172827.2599908-1-c_mykolak@nvidia.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/mellanox/mlxreg-hotplug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/mellanox/mlxreg-hotplug.c b/drivers/platform/mellanox/mlxreg-hotplug.c
index a9db2f32658f2..b013445147dd5 100644
--- a/drivers/platform/mellanox/mlxreg-hotplug.c
+++ b/drivers/platform/mellanox/mlxreg-hotplug.c
@@ -683,13 +683,13 @@ static int mlxreg_hotplug_probe(struct platform_device *pdev)
 
 	err = devm_request_irq(&pdev->dev, priv->irq,
 			       mlxreg_hotplug_irq_handler, IRQF_TRIGGER_FALLING
-			       | IRQF_SHARED | IRQF_NO_AUTOEN,
-			       "mlxreg-hotplug", priv);
+			       | IRQF_SHARED, "mlxreg-hotplug", priv);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to request irq: %d\n", err);
 		return err;
 	}
 
+	disable_irq(priv->irq);
 	spin_lock_init(&priv->lock);
 	INIT_DELAYED_WORK(&priv->dwork_irq, mlxreg_hotplug_work_handler);
 	dev_set_drvdata(&pdev->dev, priv);
-- 
GitLab


From 9f78c607600ce4f2a952560de26534715236f612 Mon Sep 17 00:00:00 2001
From: Roja Rani Yarubandi <rojay@codeaurora.org>
Date: Tue, 25 May 2021 18:40:50 +0530
Subject: [PATCH 2288/3804] i2c: qcom-geni: Add shutdown callback for i2c

If the hardware is still accessing memory after SMMU translation
is disabled (as part of smmu shutdown callback), then the
IOVAs (I/O virtual address) which it was using will go on the bus
as the physical addresses which will result in unknown crashes
like NoC/interconnect errors.

So, implement shutdown callback for i2c driver to suspend the bus
during system "reboot" or "shutdown".

Fixes: 37692de5d523 ("i2c: i2c-qcom-geni: Add bus driver for the Qualcomm GENI I2C controller")
Signed-off-by: Roja Rani Yarubandi <rojay@codeaurora.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-qcom-geni.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 07b710a774df4..8f67ebe6096ce 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -650,6 +650,14 @@ static int geni_i2c_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static void geni_i2c_shutdown(struct platform_device *pdev)
+{
+	struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
+
+	/* Make client i2c transfers start failing */
+	i2c_mark_adapter_suspended(&gi2c->adap);
+}
+
 static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
 {
 	int ret;
@@ -714,6 +722,7 @@ MODULE_DEVICE_TABLE(of, geni_i2c_dt_match);
 static struct platform_driver geni_i2c_driver = {
 	.probe  = geni_i2c_probe,
 	.remove = geni_i2c_remove,
+	.shutdown = geni_i2c_shutdown,
 	.driver = {
 		.name = "geni_i2c",
 		.pm = &geni_i2c_pm_ops,
-- 
GitLab


From 57648e860485de39c800a89f849fdd03c2d31d15 Mon Sep 17 00:00:00 2001
From: Roja Rani Yarubandi <rojay@codeaurora.org>
Date: Tue, 25 May 2021 18:40:51 +0530
Subject: [PATCH 2289/3804] i2c: qcom-geni: Suspend and resume the bus during
 SYSTEM_SLEEP_PM ops

Mark bus as suspended during system suspend to block the future
transfers. Implement geni_i2c_resume_noirq() to resume the bus.

Fixes: 37692de5d523 ("i2c: i2c-qcom-geni: Add bus driver for the Qualcomm GENI I2C controller")
Signed-off-by: Roja Rani Yarubandi <rojay@codeaurora.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-qcom-geni.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 8f67ebe6096ce..6d635a7c104ce 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -698,6 +698,8 @@ static int __maybe_unused geni_i2c_suspend_noirq(struct device *dev)
 {
 	struct geni_i2c_dev *gi2c = dev_get_drvdata(dev);
 
+	i2c_mark_adapter_suspended(&gi2c->adap);
+
 	if (!gi2c->suspended) {
 		geni_i2c_runtime_suspend(dev);
 		pm_runtime_disable(dev);
@@ -707,8 +709,16 @@ static int __maybe_unused geni_i2c_suspend_noirq(struct device *dev)
 	return 0;
 }
 
+static int __maybe_unused geni_i2c_resume_noirq(struct device *dev)
+{
+	struct geni_i2c_dev *gi2c = dev_get_drvdata(dev);
+
+	i2c_mark_adapter_resumed(&gi2c->adap);
+	return 0;
+}
+
 static const struct dev_pm_ops geni_i2c_pm_ops = {
-	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(geni_i2c_suspend_noirq, NULL)
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(geni_i2c_suspend_noirq, geni_i2c_resume_noirq)
 	SET_RUNTIME_PM_OPS(geni_i2c_runtime_suspend, geni_i2c_runtime_resume,
 									NULL)
 };
-- 
GitLab


From acf2492b51c9a3c4dfb947f4d3477a86d315150f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Jun 2021 17:17:30 +0200
Subject: [PATCH 2290/3804] wireguard: selftests: remove old conntrack kconfig
 value

On recent kernels, this config symbol is no longer used.

Reported-by: Rui Salvaterra <rsalvaterra@gmail.com>
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/wireguard/qemu/kernel.config | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 4eecb432a66c1..74db83a0aedd8 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y
 CONFIG_NETFILTER_XT_NAT=y
 CONFIG_NETFILTER_XT_MATCH_LENGTH=y
 CONFIG_NETFILTER_XT_MARK=y
-CONFIG_NF_CONNTRACK_IPV4=y
 CONFIG_NF_NAT_IPV4=y
 CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_NF_FILTER=y
-- 
GitLab


From f8873d11d4121aad35024f9379e431e0c83abead Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Jun 2021 17:17:31 +0200
Subject: [PATCH 2291/3804] wireguard: selftests: make sure rp_filter is
 disabled on vethc

Some distros may enable strict rp_filter by default, which will prevent
vethc from receiving the packets with an unrouteable reverse path address.

Reported-by: Hangbin Liu <liuhangbin@gmail.com>
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/wireguard/netns.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 7ed7cd95e58fe..ebc4ee0fe179f 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_prefixlength 0
 ip1 -4 route add default dev wg0 table 51820
 ip1 -4 rule add not fwmark 51820 table 51820
 ip1 -4 rule add table main suppress_prefixlength 0
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter'
 # Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
 n1 ping -W 1 -c 100 -f 192.168.99.7
 n1 ping -W 1 -c 100 -f abab::1111
-- 
GitLab


From cc5060ca0285efe2728bced399a1955a7ce808b2 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Jun 2021 17:17:32 +0200
Subject: [PATCH 2292/3804] wireguard: do not use -O3

Apparently, various versions of gcc have O3-related miscompiles. Looking
at the difference between -O2 and -O3 for gcc 11 doesn't indicate
miscompiles, but the difference also doesn't seem so significant for
performance that it's worth risking.

Link: https://lore.kernel.org/lkml/CAHk-=wjuoGyxDhAF8SsrTkN0-YfCx7E6jUN3ikC_tn2AKWTTsA@mail.gmail.com/
Link: https://lore.kernel.org/lkml/CAHmME9otB5Wwxp7H8bR_i2uH2esEMvoBMC8uEXBMH9p0q1s6Bw@mail.gmail.com/
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireguard/Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireguard/Makefile b/drivers/net/wireguard/Makefile
index fc52b2cb500b3..dbe1f8514efc3 100644
--- a/drivers/net/wireguard/Makefile
+++ b/drivers/net/wireguard/Makefile
@@ -1,5 +1,4 @@
-ccflags-y := -O3
-ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
+ccflags-y := -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
 ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG
 wireguard-y := main.o
 wireguard-y += noise.o
-- 
GitLab


From 24b70eeeb4f46c09487f8155239ebfb1f875774a Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Jun 2021 17:17:33 +0200
Subject: [PATCH 2293/3804] wireguard: use synchronize_net rather than
 synchronize_rcu

Many of the synchronization points are sometimes called under the rtnl
lock, which means we should use synchronize_net rather than
synchronize_rcu. Under the hood, this expands to using the expedited
flavor of function in the event that rtnl is held, in order to not stall
other concurrent changes.

This fixes some very, very long delays when removing multiple peers at
once, which would cause some operations to take several minutes.

Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireguard/peer.c   | 6 +++---
 drivers/net/wireguard/socket.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
index cd5cb0292cb67..3a042d28eb2ea 100644
--- a/drivers/net/wireguard/peer.c
+++ b/drivers/net/wireguard/peer.c
@@ -88,7 +88,7 @@ static void peer_make_dead(struct wg_peer *peer)
 	/* Mark as dead, so that we don't allow jumping contexts after. */
 	WRITE_ONCE(peer->is_dead, true);
 
-	/* The caller must now synchronize_rcu() for this to take effect. */
+	/* The caller must now synchronize_net() for this to take effect. */
 }
 
 static void peer_remove_after_dead(struct wg_peer *peer)
@@ -160,7 +160,7 @@ void wg_peer_remove(struct wg_peer *peer)
 	lockdep_assert_held(&peer->device->device_update_lock);
 
 	peer_make_dead(peer);
-	synchronize_rcu();
+	synchronize_net();
 	peer_remove_after_dead(peer);
 }
 
@@ -178,7 +178,7 @@ void wg_peer_remove_all(struct wg_device *wg)
 		peer_make_dead(peer);
 		list_add_tail(&peer->peer_list, &dead_peers);
 	}
-	synchronize_rcu();
+	synchronize_net();
 	list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
 		peer_remove_after_dead(peer);
 }
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index d9ad850daa793..8c496b7471082 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -430,7 +430,7 @@ void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
 	if (new4)
 		wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
 	mutex_unlock(&wg->socket_update_lock);
-	synchronize_rcu();
+	synchronize_net();
 	sock_free(old4);
 	sock_free(old6);
 }
-- 
GitLab


From a4e9f8e3287c9eb6bf70df982870980dd3341863 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Jun 2021 17:17:34 +0200
Subject: [PATCH 2294/3804] wireguard: peer: allocate in kmem_cache

With deployments having upwards of 600k peers now, this somewhat heavy
structure could benefit from more fine-grained allocations.
Specifically, instead of using a 2048-byte slab for a 1544-byte object,
we can now use 1544-byte objects directly, thus saving almost 25%
per-peer, or with 600k peers, that's a savings of 303 MiB. This also
makes wireguard's memory usage more transparent in tools like slabtop
and /proc/slabinfo.

Fixes: 8b5553ace83c ("wireguard: queueing: get rid of per-peer ring buffers")
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireguard/main.c |  7 +++++++
 drivers/net/wireguard/peer.c | 21 +++++++++++++++++----
 drivers/net/wireguard/peer.h |  3 +++
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c
index 7a7d5f1a80fc7..0a3ebfdac7947 100644
--- a/drivers/net/wireguard/main.c
+++ b/drivers/net/wireguard/main.c
@@ -28,6 +28,10 @@ static int __init mod_init(void)
 #endif
 	wg_noise_init();
 
+	ret = wg_peer_init();
+	if (ret < 0)
+		goto err_peer;
+
 	ret = wg_device_init();
 	if (ret < 0)
 		goto err_device;
@@ -44,6 +48,8 @@ static int __init mod_init(void)
 err_netlink:
 	wg_device_uninit();
 err_device:
+	wg_peer_uninit();
+err_peer:
 	return ret;
 }
 
@@ -51,6 +57,7 @@ static void __exit mod_exit(void)
 {
 	wg_genetlink_uninit();
 	wg_device_uninit();
+	wg_peer_uninit();
 }
 
 module_init(mod_init);
diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
index 3a042d28eb2ea..1acd00ab2fbcb 100644
--- a/drivers/net/wireguard/peer.c
+++ b/drivers/net/wireguard/peer.c
@@ -15,6 +15,7 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 
+static struct kmem_cache *peer_cache;
 static atomic64_t peer_counter = ATOMIC64_INIT(0);
 
 struct wg_peer *wg_peer_create(struct wg_device *wg,
@@ -29,10 +30,10 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
 	if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
 		return ERR_PTR(ret);
 
-	peer = kzalloc(sizeof(*peer), GFP_KERNEL);
+	peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL);
 	if (unlikely(!peer))
 		return ERR_PTR(ret);
-	if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
+	if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)))
 		goto err;
 
 	peer->device = wg;
@@ -64,7 +65,7 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
 	return peer;
 
 err:
-	kfree(peer);
+	kmem_cache_free(peer_cache, peer);
 	return ERR_PTR(ret);
 }
 
@@ -193,7 +194,8 @@ static void rcu_release(struct rcu_head *rcu)
 	/* The final zeroing takes care of clearing any remaining handshake key
 	 * material and other potentially sensitive information.
 	 */
-	kfree_sensitive(peer);
+	memzero_explicit(peer, sizeof(*peer));
+	kmem_cache_free(peer_cache, peer);
 }
 
 static void kref_release(struct kref *refcount)
@@ -225,3 +227,14 @@ void wg_peer_put(struct wg_peer *peer)
 		return;
 	kref_put(&peer->refcount, kref_release);
 }
+
+int __init wg_peer_init(void)
+{
+	peer_cache = KMEM_CACHE(wg_peer, 0);
+	return peer_cache ? 0 : -ENOMEM;
+}
+
+void wg_peer_uninit(void)
+{
+	kmem_cache_destroy(peer_cache);
+}
diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h
index 8d53b687a1d16..76e4d3128ad4e 100644
--- a/drivers/net/wireguard/peer.h
+++ b/drivers/net/wireguard/peer.h
@@ -80,4 +80,7 @@ void wg_peer_put(struct wg_peer *peer);
 void wg_peer_remove(struct wg_peer *peer);
 void wg_peer_remove_all(struct wg_device *wg);
 
+int wg_peer_init(void);
+void wg_peer_uninit(void);
+
 #endif /* _WG_PEER_H */
-- 
GitLab


From 46cfe8eee285cde465b420637507884551f5d7ca Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Jun 2021 17:17:35 +0200
Subject: [PATCH 2295/3804] wireguard: allowedips: initialize list head in
 selftest

The randomized trie tests weren't initializing the dummy peer list head,
resulting in a NULL pointer dereference when used. Fix this by
initializing it in the randomized trie test, just like we do for the
static unit test.

While we're at it, all of the other strings like this have the word
"self-test", so add it to the missing place here.

Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireguard/selftest/allowedips.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c
index 846db14cb046b..0d2a43a2d4008 100644
--- a/drivers/net/wireguard/selftest/allowedips.c
+++ b/drivers/net/wireguard/selftest/allowedips.c
@@ -296,6 +296,7 @@ static __init bool randomized_test(void)
 			goto free;
 		}
 		kref_init(&peers[i]->refcount);
+		INIT_LIST_HEAD(&peers[i]->allowedips_list);
 	}
 
 	mutex_lock(&mutex);
@@ -333,7 +334,7 @@ static __init bool randomized_test(void)
 			if (wg_allowedips_insert_v4(&t,
 						    (struct in_addr *)mutated,
 						    cidr, peer, &mutex) < 0) {
-				pr_err("allowedips random malloc: FAIL\n");
+				pr_err("allowedips random self-test malloc: FAIL\n");
 				goto free_locked;
 			}
 			if (horrible_allowedips_insert_v4(&h,
-- 
GitLab


From f634f418c227c912e7ea95a3299efdc9b10e4022 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Jun 2021 17:17:36 +0200
Subject: [PATCH 2296/3804] wireguard: allowedips: remove nodes in O(1)

Previously, deleting peers would require traversing the entire trie in
order to rebalance nodes and safely free them. This meant that removing
1000 peers from a trie with a half million nodes would take an extremely
long time, during which we're holding the rtnl lock. Large-scale users
were reporting 200ms latencies added to the networking stack as a whole
every time their userspace software would queue up significant removals.
That's a serious situation.

This commit fixes that by maintaining a double pointer to the parent's
bit pointer for each node, and then using the already existing node list
belonging to each peer to go directly to the node, fix up its pointers,
and free it with RCU. This means removal is O(1) instead of O(n), and we
don't use gobs of stack.

The removal algorithm has the same downside as the code that it fixes:
it won't collapse needlessly long runs of fillers.  We can enhance that
in the future if it ever becomes a problem. This commit documents that
limitation with a TODO comment in code, a small but meaningful
improvement over the prior situation.

Currently the biggest flaw, which the next commit addresses, is that
because this increases the node size on 64-bit machines from 60 bytes to
68 bytes. 60 rounds up to 64, but 68 rounds up to 128. So we wind up
using twice as much memory per node, because of power-of-two
allocations, which is a big bummer. We'll need to figure something out
there.

Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireguard/allowedips.c | 132 ++++++++++++-----------------
 drivers/net/wireguard/allowedips.h |   9 +-
 2 files changed, 57 insertions(+), 84 deletions(-)

diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 3725e9cd85f4f..2785cfd3a2212 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -66,60 +66,6 @@ static void root_remove_peer_lists(struct allowedips_node *root)
 	}
 }
 
-static void walk_remove_by_peer(struct allowedips_node __rcu **top,
-				struct wg_peer *peer, struct mutex *lock)
-{
-#define REF(p) rcu_access_pointer(p)
-#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
-#define PUSH(p) ({                                                             \
-		WARN_ON(IS_ENABLED(DEBUG) && len >= 128);                      \
-		stack[len++] = p;                                              \
-	})
-
-	struct allowedips_node __rcu **stack[128], **nptr;
-	struct allowedips_node *node, *prev;
-	unsigned int len;
-
-	if (unlikely(!peer || !REF(*top)))
-		return;
-
-	for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
-		nptr = stack[len - 1];
-		node = DEREF(nptr);
-		if (!node) {
-			--len;
-			continue;
-		}
-		if (!prev || REF(prev->bit[0]) == node ||
-		    REF(prev->bit[1]) == node) {
-			if (REF(node->bit[0]))
-				PUSH(&node->bit[0]);
-			else if (REF(node->bit[1]))
-				PUSH(&node->bit[1]);
-		} else if (REF(node->bit[0]) == prev) {
-			if (REF(node->bit[1]))
-				PUSH(&node->bit[1]);
-		} else {
-			if (rcu_dereference_protected(node->peer,
-				lockdep_is_held(lock)) == peer) {
-				RCU_INIT_POINTER(node->peer, NULL);
-				list_del_init(&node->peer_list);
-				if (!node->bit[0] || !node->bit[1]) {
-					rcu_assign_pointer(*nptr, DEREF(
-					       &node->bit[!REF(node->bit[0])]));
-					kfree_rcu(node, rcu);
-					node = DEREF(nptr);
-				}
-			}
-			--len;
-		}
-	}
-
-#undef REF
-#undef DEREF
-#undef PUSH
-}
-
 static unsigned int fls128(u64 a, u64 b)
 {
 	return a ? fls64(a) + 64U : fls64(b);
@@ -224,6 +170,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 		RCU_INIT_POINTER(node->peer, peer);
 		list_add_tail(&node->peer_list, &peer->allowedips_list);
 		copy_and_assign_cidr(node, key, cidr, bits);
+		rcu_assign_pointer(node->parent_bit, trie);
 		rcu_assign_pointer(*trie, node);
 		return 0;
 	}
@@ -243,9 +190,9 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	if (!node) {
 		down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
 	} else {
-		down = rcu_dereference_protected(CHOOSE_NODE(node, key),
-						 lockdep_is_held(lock));
+		down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock));
 		if (!down) {
+			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
 			rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
 			return 0;
 		}
@@ -254,29 +201,37 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	parent = node;
 
 	if (newnode->cidr == cidr) {
+		rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits));
 		rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
-		if (!parent)
+		if (!parent) {
+			rcu_assign_pointer(newnode->parent_bit, trie);
 			rcu_assign_pointer(*trie, newnode);
-		else
-			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
-					   newnode);
-	} else {
-		node = kzalloc(sizeof(*node), GFP_KERNEL);
-		if (unlikely(!node)) {
-			list_del(&newnode->peer_list);
-			kfree(newnode);
-			return -ENOMEM;
+		} else {
+			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits));
+			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
 		}
-		INIT_LIST_HEAD(&node->peer_list);
-		copy_and_assign_cidr(node, newnode->bits, cidr, bits);
-
-		rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
-		rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
-		if (!parent)
-			rcu_assign_pointer(*trie, node);
-		else
-			rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
-					   node);
+		return 0;
+	}
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (unlikely(!node)) {
+		list_del(&newnode->peer_list);
+		kfree(newnode);
+		return -ENOMEM;
+	}
+	INIT_LIST_HEAD(&node->peer_list);
+	copy_and_assign_cidr(node, newnode->bits, cidr, bits);
+
+	rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits));
+	rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
+	rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits));
+	rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
+	if (!parent) {
+		rcu_assign_pointer(node->parent_bit, trie);
+		rcu_assign_pointer(*trie, node);
+	} else {
+		rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits));
+		rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
 	}
 	return 0;
 }
@@ -335,9 +290,30 @@ int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
 void wg_allowedips_remove_by_peer(struct allowedips *table,
 				  struct wg_peer *peer, struct mutex *lock)
 {
+	struct allowedips_node *node, *child, *tmp;
+
+	if (list_empty(&peer->allowedips_list))
+		return;
 	++table->seq;
-	walk_remove_by_peer(&table->root4, peer, lock);
-	walk_remove_by_peer(&table->root6, peer, lock);
+	list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) {
+		list_del_init(&node->peer_list);
+		RCU_INIT_POINTER(node->peer, NULL);
+		if (node->bit[0] && node->bit[1])
+			continue;
+		child = rcu_dereference_protected(
+				node->bit[!rcu_access_pointer(node->bit[0])],
+				lockdep_is_held(lock));
+		if (child)
+			child->parent_bit = node->parent_bit;
+		*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
+		kfree_rcu(node, rcu);
+
+		/* TODO: Note that we currently don't walk up and down in order to
+		 * free any potential filler nodes. This means that this function
+		 * doesn't free up as much as it could, which could be revisited
+		 * at some point.
+		 */
+	}
 }
 
 int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h
index e5c83cafcef4c..f08f552e68529 100644
--- a/drivers/net/wireguard/allowedips.h
+++ b/drivers/net/wireguard/allowedips.h
@@ -15,14 +15,11 @@ struct wg_peer;
 struct allowedips_node {
 	struct wg_peer __rcu *peer;
 	struct allowedips_node __rcu *bit[2];
-	/* While it may seem scandalous that we waste space for v4,
-	 * we're alloc'ing to the nearest power of 2 anyway, so this
-	 * doesn't actually make a difference.
-	 */
-	u8 bits[16] __aligned(__alignof(u64));
 	u8 cidr, bit_at_a, bit_at_b, bitlen;
+	u8 bits[16] __aligned(__alignof(u64));
 
-	/* Keep rarely used list at bottom to be beyond cache line. */
+	/* Keep rarely used members at bottom to be beyond cache line. */
+	struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */
 	union {
 		struct list_head peer_list;
 		struct rcu_head rcu;
-- 
GitLab


From dc680de28ca849dfe589dc15ac56d22505f0ef11 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Jun 2021 17:17:37 +0200
Subject: [PATCH 2297/3804] wireguard: allowedips: allocate nodes in kmem_cache

The previous commit moved from O(n) to O(1) for removal, but in the
process introduced an additional pointer member to a struct that
increased the size from 60 to 68 bytes, putting nodes in the 128-byte
slab. With deployed systems having as many as 2 million nodes, this
represents a significant doubling in memory usage (128 MiB -> 256 MiB).
Fix this by using our own kmem_cache, that's sized exactly right. This
also makes wireguard's memory usage more transparent in tools like
slabtop and /proc/slabinfo.

Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireguard/allowedips.c | 31 ++++++++++++++++++++++++------
 drivers/net/wireguard/allowedips.h |  5 ++++-
 drivers/net/wireguard/main.c       | 10 +++++++++-
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 2785cfd3a2212..c540dce8d224e 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -6,6 +6,8 @@
 #include "allowedips.h"
 #include "peer.h"
 
+static struct kmem_cache *node_cache;
+
 static void swap_endian(u8 *dst, const u8 *src, u8 bits)
 {
 	if (bits == 32) {
@@ -40,6 +42,11 @@ static void push_rcu(struct allowedips_node **stack,
 	}
 }
 
+static void node_free_rcu(struct rcu_head *rcu)
+{
+	kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu));
+}
+
 static void root_free_rcu(struct rcu_head *rcu)
 {
 	struct allowedips_node *node, *stack[128] = {
@@ -49,7 +56,7 @@ static void root_free_rcu(struct rcu_head *rcu)
 	while (len > 0 && (node = stack[--len])) {
 		push_rcu(stack, node->bit[0], &len);
 		push_rcu(stack, node->bit[1], &len);
-		kfree(node);
+		kmem_cache_free(node_cache, node);
 	}
 }
 
@@ -164,7 +171,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 		return -EINVAL;
 
 	if (!rcu_access_pointer(*trie)) {
-		node = kzalloc(sizeof(*node), GFP_KERNEL);
+		node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
 		if (unlikely(!node))
 			return -ENOMEM;
 		RCU_INIT_POINTER(node->peer, peer);
@@ -180,7 +187,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 		return 0;
 	}
 
-	newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
+	newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL);
 	if (unlikely(!newnode))
 		return -ENOMEM;
 	RCU_INIT_POINTER(newnode->peer, peer);
@@ -213,10 +220,10 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 		return 0;
 	}
 
-	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	node = kmem_cache_zalloc(node_cache, GFP_KERNEL);
 	if (unlikely(!node)) {
 		list_del(&newnode->peer_list);
-		kfree(newnode);
+		kmem_cache_free(node_cache, newnode);
 		return -ENOMEM;
 	}
 	INIT_LIST_HEAD(&node->peer_list);
@@ -306,7 +313,7 @@ void wg_allowedips_remove_by_peer(struct allowedips *table,
 		if (child)
 			child->parent_bit = node->parent_bit;
 		*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
-		kfree_rcu(node, rcu);
+		call_rcu(&node->rcu, node_free_rcu);
 
 		/* TODO: Note that we currently don't walk up and down in order to
 		 * free any potential filler nodes. This means that this function
@@ -350,4 +357,16 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
 	return NULL;
 }
 
+int __init wg_allowedips_slab_init(void)
+{
+	node_cache = KMEM_CACHE(allowedips_node, 0);
+	return node_cache ? 0 : -ENOMEM;
+}
+
+void wg_allowedips_slab_uninit(void)
+{
+	rcu_barrier();
+	kmem_cache_destroy(node_cache);
+}
+
 #include "selftest/allowedips.c"
diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h
index f08f552e68529..32d611aaf3cc2 100644
--- a/drivers/net/wireguard/allowedips.h
+++ b/drivers/net/wireguard/allowedips.h
@@ -19,7 +19,7 @@ struct allowedips_node {
 	u8 bits[16] __aligned(__alignof(u64));
 
 	/* Keep rarely used members at bottom to be beyond cache line. */
-	struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */
+	struct allowedips_node *__rcu *parent_bit;
 	union {
 		struct list_head peer_list;
 		struct rcu_head rcu;
@@ -53,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
 bool wg_allowedips_selftest(void);
 #endif
 
+int wg_allowedips_slab_init(void);
+void wg_allowedips_slab_uninit(void);
+
 #endif /* _WG_ALLOWEDIPS_H */
diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c
index 0a3ebfdac7947..75dbe77b0b4b4 100644
--- a/drivers/net/wireguard/main.c
+++ b/drivers/net/wireguard/main.c
@@ -21,10 +21,15 @@ static int __init mod_init(void)
 {
 	int ret;
 
+	ret = wg_allowedips_slab_init();
+	if (ret < 0)
+		goto err_allowedips;
+
 #ifdef DEBUG
+	ret = -ENOTRECOVERABLE;
 	if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
 	    !wg_ratelimiter_selftest())
-		return -ENOTRECOVERABLE;
+		goto err_peer;
 #endif
 	wg_noise_init();
 
@@ -50,6 +55,8 @@ err_netlink:
 err_device:
 	wg_peer_uninit();
 err_peer:
+	wg_allowedips_slab_uninit();
+err_allowedips:
 	return ret;
 }
 
@@ -58,6 +65,7 @@ static void __exit mod_exit(void)
 	wg_genetlink_uninit();
 	wg_device_uninit();
 	wg_peer_uninit();
+	wg_allowedips_slab_uninit();
 }
 
 module_init(mod_init);
-- 
GitLab


From bf7b042dc62a31f66d3a41dd4dfc7806f267b307 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 4 Jun 2021 17:17:38 +0200
Subject: [PATCH 2298/3804] wireguard: allowedips: free empty intermediate
 nodes when removing single node

When removing single nodes, it's possible that that node's parent is an
empty intermediate node, in which case, it too should be removed.
Otherwise the trie fills up and never is fully emptied, leading to
gradual memory leaks over time for tries that are modified often. There
was originally code to do this, but was removed during refactoring in
2016 and never reworked. Now that we have proper parent pointers from
the previous commits, we can implement this properly.

In order to reduce branching and expensive comparisons, we want to keep
the double pointer for parent assignment (which lets us easily chain up
to the root), but we still need to actually get the parent's base
address. So encode the bit number into the last two bits of the pointer,
and pack and unpack it as needed. This is a little bit clumsy but is the
fastest and less memory wasteful of the compromises. Note that we align
the root struct here to a minimum of 4, because it's embedded into a
larger struct, and we're relying on having the bottom two bits for our
flag, which would only be 16-bit aligned on m68k.

The existing macro-based helpers were a bit unwieldy for adding the bit
packing to, so this commit replaces them with safer and clearer ordinary
functions.

We add a test to the randomized/fuzzer part of the selftests, to free
the randomized tries by-peer, refuzz it, and repeat, until it's supposed
to be empty, and then then see if that actually resulted in the whole
thing being emptied. That combined with kmemcheck should hopefully make
sure this commit is doing what it should. Along the way this resulted in
various other cleanups of the tests and fixes for recent graphviz.

Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Cc: stable@vger.kernel.org
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireguard/allowedips.c          | 102 ++++++------
 drivers/net/wireguard/allowedips.h          |   4 +-
 drivers/net/wireguard/selftest/allowedips.c | 162 ++++++++++----------
 3 files changed, 137 insertions(+), 131 deletions(-)

diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index c540dce8d224e..b7197e80f2264 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -30,8 +30,11 @@ static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
 	node->bitlen = bits;
 	memcpy(node->bits, src, bits / 8U);
 }
-#define CHOOSE_NODE(parent, key) \
-	parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
+
+static inline u8 choose(struct allowedips_node *node, const u8 *key)
+{
+	return (key[node->bit_at_a] >> node->bit_at_b) & 1;
+}
 
 static void push_rcu(struct allowedips_node **stack,
 		     struct allowedips_node __rcu *p, unsigned int *len)
@@ -112,7 +115,7 @@ static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
 			found = node;
 		if (node->cidr == bits)
 			break;
-		node = rcu_dereference_bh(CHOOSE_NODE(node, key));
+		node = rcu_dereference_bh(node->bit[choose(node, key)]);
 	}
 	return found;
 }
@@ -144,8 +147,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
 			   u8 cidr, u8 bits, struct allowedips_node **rnode,
 			   struct mutex *lock)
 {
-	struct allowedips_node *node = rcu_dereference_protected(trie,
-						lockdep_is_held(lock));
+	struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
 	struct allowedips_node *parent = NULL;
 	bool exact = false;
 
@@ -155,13 +157,24 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
 			exact = true;
 			break;
 		}
-		node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
-						 lockdep_is_held(lock));
+		node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock));
 	}
 	*rnode = parent;
 	return exact;
 }
 
+static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node)
+{
+	node->parent_bit_packed = (unsigned long)parent | bit;
+	rcu_assign_pointer(*parent, node);
+}
+
+static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node)
+{
+	u8 bit = choose(parent, node->bits);
+	connect_node(&parent->bit[bit], bit, node);
+}
+
 static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	       u8 cidr, struct wg_peer *peer, struct mutex *lock)
 {
@@ -177,8 +190,7 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 		RCU_INIT_POINTER(node->peer, peer);
 		list_add_tail(&node->peer_list, &peer->allowedips_list);
 		copy_and_assign_cidr(node, key, cidr, bits);
-		rcu_assign_pointer(node->parent_bit, trie);
-		rcu_assign_pointer(*trie, node);
+		connect_node(trie, 2, node);
 		return 0;
 	}
 	if (node_placement(*trie, key, cidr, bits, &node, lock)) {
@@ -197,10 +209,10 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	if (!node) {
 		down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
 	} else {
-		down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock));
+		const u8 bit = choose(node, key);
+		down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock));
 		if (!down) {
-			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
-			rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
+			connect_node(&node->bit[bit], bit, newnode);
 			return 0;
 		}
 	}
@@ -208,15 +220,11 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	parent = node;
 
 	if (newnode->cidr == cidr) {
-		rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits));
-		rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
-		if (!parent) {
-			rcu_assign_pointer(newnode->parent_bit, trie);
-			rcu_assign_pointer(*trie, newnode);
-		} else {
-			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits));
-			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
-		}
+		choose_and_connect_node(newnode, down);
+		if (!parent)
+			connect_node(trie, 2, newnode);
+		else
+			choose_and_connect_node(parent, newnode);
 		return 0;
 	}
 
@@ -229,17 +237,12 @@ static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
 	INIT_LIST_HEAD(&node->peer_list);
 	copy_and_assign_cidr(node, newnode->bits, cidr, bits);
 
-	rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits));
-	rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
-	rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits));
-	rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
-	if (!parent) {
-		rcu_assign_pointer(node->parent_bit, trie);
-		rcu_assign_pointer(*trie, node);
-	} else {
-		rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits));
-		rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
-	}
+	choose_and_connect_node(node, down);
+	choose_and_connect_node(node, newnode);
+	if (!parent)
+		connect_node(trie, 2, node);
+	else
+		choose_and_connect_node(parent, node);
 	return 0;
 }
 
@@ -297,7 +300,8 @@ int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
 void wg_allowedips_remove_by_peer(struct allowedips *table,
 				  struct wg_peer *peer, struct mutex *lock)
 {
-	struct allowedips_node *node, *child, *tmp;
+	struct allowedips_node *node, *child, **parent_bit, *parent, *tmp;
+	bool free_parent;
 
 	if (list_empty(&peer->allowedips_list))
 		return;
@@ -307,19 +311,29 @@ void wg_allowedips_remove_by_peer(struct allowedips *table,
 		RCU_INIT_POINTER(node->peer, NULL);
 		if (node->bit[0] && node->bit[1])
 			continue;
-		child = rcu_dereference_protected(
-				node->bit[!rcu_access_pointer(node->bit[0])],
-				lockdep_is_held(lock));
+		child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])],
+						  lockdep_is_held(lock));
 		if (child)
-			child->parent_bit = node->parent_bit;
-		*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
+			child->parent_bit_packed = node->parent_bit_packed;
+		parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL);
+		*parent_bit = child;
+		parent = (void *)parent_bit -
+			 offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]);
+		free_parent = !rcu_access_pointer(node->bit[0]) &&
+			      !rcu_access_pointer(node->bit[1]) &&
+			      (node->parent_bit_packed & 3) <= 1 &&
+			      !rcu_access_pointer(parent->peer);
+		if (free_parent)
+			child = rcu_dereference_protected(
+					parent->bit[!(node->parent_bit_packed & 1)],
+					lockdep_is_held(lock));
 		call_rcu(&node->rcu, node_free_rcu);
-
-		/* TODO: Note that we currently don't walk up and down in order to
-		 * free any potential filler nodes. This means that this function
-		 * doesn't free up as much as it could, which could be revisited
-		 * at some point.
-		 */
+		if (!free_parent)
+			continue;
+		if (child)
+			child->parent_bit_packed = parent->parent_bit_packed;
+		*(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child;
+		call_rcu(&parent->rcu, node_free_rcu);
 	}
 }
 
diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h
index 32d611aaf3cc2..2346c797eb4d8 100644
--- a/drivers/net/wireguard/allowedips.h
+++ b/drivers/net/wireguard/allowedips.h
@@ -19,7 +19,7 @@ struct allowedips_node {
 	u8 bits[16] __aligned(__alignof(u64));
 
 	/* Keep rarely used members at bottom to be beyond cache line. */
-	struct allowedips_node *__rcu *parent_bit;
+	unsigned long parent_bit_packed;
 	union {
 		struct list_head peer_list;
 		struct rcu_head rcu;
@@ -30,7 +30,7 @@ struct allowedips {
 	struct allowedips_node __rcu *root4;
 	struct allowedips_node __rcu *root6;
 	u64 seq;
-};
+} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */
 
 void wg_allowedips_init(struct allowedips *table);
 void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c
index 0d2a43a2d4008..e173204ae7d78 100644
--- a/drivers/net/wireguard/selftest/allowedips.c
+++ b/drivers/net/wireguard/selftest/allowedips.c
@@ -19,32 +19,22 @@
 
 #include <linux/siphash.h>
 
-static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
-					      u8 cidr)
-{
-	swap_endian(dst, src, bits);
-	memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
-	if (cidr)
-		dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
-}
-
 static __init void print_node(struct allowedips_node *node, u8 bits)
 {
 	char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
-	char *fmt_declaration = KERN_DEBUG
-		"\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
+	char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
+	u8 ip1[16], ip2[16], cidr1, cidr2;
 	char *style = "dotted";
-	u8 ip1[16], ip2[16];
 	u32 color = 0;
 
+	if (node == NULL)
+		return;
 	if (bits == 32) {
 		fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
-		fmt_declaration = KERN_DEBUG
-			"\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
+		fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
 	} else if (bits == 128) {
 		fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
-		fmt_declaration = KERN_DEBUG
-			"\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
+		fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
 	}
 	if (node->peer) {
 		hsiphash_key_t key = { { 0 } };
@@ -55,24 +45,20 @@ static __init void print_node(struct allowedips_node *node, u8 bits)
 			hsiphash_1u32(0xabad1dea, &key) % 200;
 		style = "bold";
 	}
-	swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
-	printk(fmt_declaration, ip1, node->cidr, style, color);
+	wg_allowedips_read_node(node, ip1, &cidr1);
+	printk(fmt_declaration, ip1, cidr1, style, color);
 	if (node->bit[0]) {
-		swap_endian_and_apply_cidr(ip2,
-				rcu_dereference_raw(node->bit[0])->bits, bits,
-				node->cidr);
-		printk(fmt_connection, ip1, node->cidr, ip2,
-		       rcu_dereference_raw(node->bit[0])->cidr);
-		print_node(rcu_dereference_raw(node->bit[0]), bits);
+		wg_allowedips_read_node(rcu_dereference_raw(node->bit[0]), ip2, &cidr2);
+		printk(fmt_connection, ip1, cidr1, ip2, cidr2);
 	}
 	if (node->bit[1]) {
-		swap_endian_and_apply_cidr(ip2,
-				rcu_dereference_raw(node->bit[1])->bits,
-				bits, node->cidr);
-		printk(fmt_connection, ip1, node->cidr, ip2,
-		       rcu_dereference_raw(node->bit[1])->cidr);
-		print_node(rcu_dereference_raw(node->bit[1]), bits);
+		wg_allowedips_read_node(rcu_dereference_raw(node->bit[1]), ip2, &cidr2);
+		printk(fmt_connection, ip1, cidr1, ip2, cidr2);
 	}
+	if (node->bit[0])
+		print_node(rcu_dereference_raw(node->bit[0]), bits);
+	if (node->bit[1])
+		print_node(rcu_dereference_raw(node->bit[1]), bits);
 }
 
 static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
@@ -121,8 +107,8 @@ static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr)
 {
 	union nf_inet_addr mask;
 
-	memset(&mask, 0x00, 128 / 8);
-	memset(&mask, 0xff, cidr / 8);
+	memset(&mask, 0, sizeof(mask));
+	memset(&mask.all, 0xff, cidr / 8);
 	if (cidr % 32)
 		mask.all[cidr / 32] = (__force u32)htonl(
 			(0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
@@ -149,42 +135,36 @@ horrible_mask_self(struct horrible_allowedips_node *node)
 }
 
 static __init inline bool
-horrible_match_v4(const struct horrible_allowedips_node *node,
-		  struct in_addr *ip)
+horrible_match_v4(const struct horrible_allowedips_node *node, struct in_addr *ip)
 {
 	return (ip->s_addr & node->mask.ip) == node->ip.ip;
 }
 
 static __init inline bool
-horrible_match_v6(const struct horrible_allowedips_node *node,
-		  struct in6_addr *ip)
+horrible_match_v6(const struct horrible_allowedips_node *node, struct in6_addr *ip)
 {
-	return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
-		       node->ip.ip6[0] &&
-	       (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
-		       node->ip.ip6[1] &&
-	       (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
-		       node->ip.ip6[2] &&
+	return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] &&
+	       (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] &&
+	       (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] &&
 	       (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
 }
 
 static __init void
-horrible_insert_ordered(struct horrible_allowedips *table,
-			struct horrible_allowedips_node *node)
+horrible_insert_ordered(struct horrible_allowedips *table, struct horrible_allowedips_node *node)
 {
 	struct horrible_allowedips_node *other = NULL, *where = NULL;
 	u8 my_cidr = horrible_mask_to_cidr(node->mask);
 
 	hlist_for_each_entry(other, &table->head, table) {
-		if (!memcmp(&other->mask, &node->mask,
-			    sizeof(union nf_inet_addr)) &&
-		    !memcmp(&other->ip, &node->ip,
-			    sizeof(union nf_inet_addr)) &&
-		    other->ip_version == node->ip_version) {
+		if (other->ip_version == node->ip_version &&
+		    !memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) &&
+		    !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr))) {
 			other->value = node->value;
 			kfree(node);
 			return;
 		}
+	}
+	hlist_for_each_entry(other, &table->head, table) {
 		where = other;
 		if (horrible_mask_to_cidr(other->mask) <= my_cidr)
 			break;
@@ -201,8 +181,7 @@ static __init int
 horrible_allowedips_insert_v4(struct horrible_allowedips *table,
 			      struct in_addr *ip, u8 cidr, void *value)
 {
-	struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
-							GFP_KERNEL);
+	struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
 
 	if (unlikely(!node))
 		return -ENOMEM;
@@ -219,8 +198,7 @@ static __init int
 horrible_allowedips_insert_v6(struct horrible_allowedips *table,
 			      struct in6_addr *ip, u8 cidr, void *value)
 {
-	struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
-							GFP_KERNEL);
+	struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
 
 	if (unlikely(!node))
 		return -ENOMEM;
@@ -234,39 +212,43 @@ horrible_allowedips_insert_v6(struct horrible_allowedips *table,
 }
 
 static __init void *
-horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
-			      struct in_addr *ip)
+horrible_allowedips_lookup_v4(struct horrible_allowedips *table, struct in_addr *ip)
 {
 	struct horrible_allowedips_node *node;
-	void *ret = NULL;
 
 	hlist_for_each_entry(node, &table->head, table) {
-		if (node->ip_version != 4)
-			continue;
-		if (horrible_match_v4(node, ip)) {
-			ret = node->value;
-			break;
-		}
+		if (node->ip_version == 4 && horrible_match_v4(node, ip))
+			return node->value;
 	}
-	return ret;
+	return NULL;
 }
 
 static __init void *
-horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
-			      struct in6_addr *ip)
+horrible_allowedips_lookup_v6(struct horrible_allowedips *table, struct in6_addr *ip)
 {
 	struct horrible_allowedips_node *node;
-	void *ret = NULL;
 
 	hlist_for_each_entry(node, &table->head, table) {
-		if (node->ip_version != 6)
+		if (node->ip_version == 6 && horrible_match_v6(node, ip))
+			return node->value;
+	}
+	return NULL;
+}
+
+
+static __init void
+horrible_allowedips_remove_by_value(struct horrible_allowedips *table, void *value)
+{
+	struct horrible_allowedips_node *node;
+	struct hlist_node *h;
+
+	hlist_for_each_entry_safe(node, h, &table->head, table) {
+		if (node->value != value)
 			continue;
-		if (horrible_match_v6(node, ip)) {
-			ret = node->value;
-			break;
-		}
+		hlist_del(&node->table);
+		kfree(node);
 	}
-	return ret;
+
 }
 
 static __init bool randomized_test(void)
@@ -397,23 +379,33 @@ static __init bool randomized_test(void)
 		print_tree(t.root6, 128);
 	}
 
-	for (i = 0; i < NUM_QUERIES; ++i) {
-		prandom_bytes(ip, 4);
-		if (lookup(t.root4, 32, ip) !=
-		    horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
-			pr_err("allowedips random self-test: FAIL\n");
-			goto free;
+	for (j = 0;; ++j) {
+		for (i = 0; i < NUM_QUERIES; ++i) {
+			prandom_bytes(ip, 4);
+			if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
+				horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip);
+				pr_err("allowedips random v4 self-test: FAIL\n");
+				goto free;
+			}
+			prandom_bytes(ip, 16);
+			if (lookup(t.root6, 128, ip) != horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
+				pr_err("allowedips random v6 self-test: FAIL\n");
+				goto free;
+			}
 		}
+		if (j >= NUM_PEERS)
+			break;
+		mutex_lock(&mutex);
+		wg_allowedips_remove_by_peer(&t, peers[j], &mutex);
+		mutex_unlock(&mutex);
+		horrible_allowedips_remove_by_value(&h, peers[j]);
 	}
 
-	for (i = 0; i < NUM_QUERIES; ++i) {
-		prandom_bytes(ip, 16);
-		if (lookup(t.root6, 128, ip) !=
-		    horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
-			pr_err("allowedips random self-test: FAIL\n");
-			goto free;
-		}
+	if (t.root4 || t.root6) {
+		pr_err("allowedips random self-test removal: FAIL\n");
+		goto free;
 	}
+
 	ret = true;
 
 free:
-- 
GitLab


From 944d671d5faa0d78980a3da5c0f04960ef1ad893 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Fri, 4 Jun 2021 19:03:18 +0800
Subject: [PATCH 2299/3804] sch_htb: fix refcount leak in
 htb_parent_to_leaf_offload

The commit ae81feb7338c ("sch_htb: fix null pointer dereference
on a null new_q") fixes a NULL pointer dereference bug, but it
is not correct.

Because htb_graft_helper properly handles the case when new_q
is NULL, and after the previous patch by skipping this call
which creates an inconsistency : dev_queue->qdisc will still
point to the old qdisc, but cl->parent->leaf.q will point to
the new one (which will be noop_qdisc, because new_q was NULL).
The code is based on an assumption that these two pointers are
the same, so it can lead to refcount leaks.

The correct fix is to add a NULL pointer check to protect
qdisc_refcount_inc inside htb_parent_to_leaf_offload.

Fixes: ae81feb7338c ("sch_htb: fix null pointer dereference on a null new_q")
Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Suggested-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 081c11d5717c4..8827987ba9034 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1488,7 +1488,8 @@ static void htb_parent_to_leaf_offload(struct Qdisc *sch,
 	struct Qdisc *old_q;
 
 	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
-	qdisc_refcount_inc(new_q);
+	if (new_q)
+		qdisc_refcount_inc(new_q);
 	old_q = htb_graft_helper(dev_queue, new_q);
 	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
 }
@@ -1675,10 +1676,9 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg,
 					  cl->parent->common.classid,
 					  NULL);
 		if (q->offload) {
-			if (new_q) {
+			if (new_q)
 				htb_set_lockdep_class_child(new_q);
-				htb_parent_to_leaf_offload(sch, dev_queue, new_q);
-			}
+			htb_parent_to_leaf_offload(sch, dev_queue, new_q);
 		}
 	}
 
-- 
GitLab


From 3822d0670c9d4342794d73e0d0e615322b40438e Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 4 Jun 2021 16:48:18 +0530
Subject: [PATCH 2300/3804] cxgb4: avoid link re-train during TC-MQPRIO
 configuration

When configuring TC-MQPRIO offload, only turn off netdev carrier and
don't bring physical link down in hardware. Otherwise, when the
physical link is brought up again after configuration, it gets
re-trained and stalls ongoing traffic.

Also, when firmware is no longer accessible or crashed, avoid sending
FLOWC and waiting for reply that will never come.

Fix following hung_task_timeout_secs trace seen in these cases.

INFO: task tc:20807 blocked for more than 122 seconds.
      Tainted: G S                5.13.0-rc3+ #122
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:tc   state:D stack:14768 pid:20807 ppid: 19366 flags:0x00000000
Call Trace:
 __schedule+0x27b/0x6a0
 schedule+0x37/0xa0
 schedule_preempt_disabled+0x5/0x10
 __mutex_lock.isra.14+0x2a0/0x4a0
 ? netlink_lookup+0x120/0x1a0
 ? rtnl_fill_ifinfo+0x10f0/0x10f0
 __netlink_dump_start+0x70/0x250
 rtnetlink_rcv_msg+0x28b/0x380
 ? rtnl_fill_ifinfo+0x10f0/0x10f0
 ? rtnl_calcit.isra.42+0x120/0x120
 netlink_rcv_skb+0x4b/0xf0
 netlink_unicast+0x1a0/0x280
 netlink_sendmsg+0x216/0x440
 sock_sendmsg+0x56/0x60
 __sys_sendto+0xe9/0x150
 ? handle_mm_fault+0x6d/0x1b0
 ? do_user_addr_fault+0x1c5/0x620
 __x64_sys_sendto+0x1f/0x30
 do_syscall_64+0x3c/0x80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f7f73218321
RSP: 002b:00007ffd19626208 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 000055b7c0a8b240 RCX: 00007f7f73218321
RDX: 0000000000000028 RSI: 00007ffd19626210 RDI: 0000000000000003
RBP: 000055b7c08680ff R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000055b7c085f5f6
R13: 000055b7c085f60a R14: 00007ffd19636470 R15: 00007ffd196262a0

Fixes: b1396c2bd675 ("cxgb4: parse and configure TC-MQPRIO offload")
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h           | 2 --
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c      | 4 ++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c | 9 ++++++---
 drivers/net/ethernet/chelsio/cxgb4/sge.c             | 6 ++++++
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 314f8d8067231..9058f09f921ee 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -2177,8 +2177,6 @@ int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid,
 			  bool persistent, u8 *smt_idx);
 int cxgb4_get_msix_idx_from_bmap(struct adapter *adap);
 void cxgb4_free_msix_idx_in_bmap(struct adapter *adap, u32 msix_idx);
-int cxgb_open(struct net_device *dev);
-int cxgb_close(struct net_device *dev);
 void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q);
 void cxgb4_quiesce_rx(struct sge_rspq *q);
 int cxgb4_port_mirror_alloc(struct net_device *dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 421bd9b88028d..1f601de02e706 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2834,7 +2834,7 @@ static void cxgb_down(struct adapter *adapter)
 /*
  * net_device operations
  */
-int cxgb_open(struct net_device *dev)
+static int cxgb_open(struct net_device *dev)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -2882,7 +2882,7 @@ out_unlock:
 	return err;
 }
 
-int cxgb_close(struct net_device *dev)
+static int cxgb_close(struct net_device *dev)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
index 6c259de96f969..338b04f339b3d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -589,7 +589,8 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
 	 * down before configuring tc params.
 	 */
 	if (netif_running(dev)) {
-		cxgb_close(dev);
+		netif_tx_stop_all_queues(dev);
+		netif_carrier_off(dev);
 		needs_bring_up = true;
 	}
 
@@ -615,8 +616,10 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
 	}
 
 out:
-	if (needs_bring_up)
-		cxgb_open(dev);
+	if (needs_bring_up) {
+		netif_tx_start_all_queues(dev);
+		netif_carrier_on(dev);
+	}
 
 	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
 	return ret;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 1e5f2edb70cf4..6a099cb34b122 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2556,6 +2556,12 @@ int cxgb4_ethofld_send_flowc(struct net_device *dev, u32 eotid, u32 tc)
 	if (!eosw_txq)
 		return -ENOMEM;
 
+	if (!(adap->flags & CXGB4_FW_OK)) {
+		/* Don't stall caller when access to FW is lost */
+		complete(&eosw_txq->completion);
+		return -EIO;
+	}
+
 	skb = alloc_skb(len, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
-- 
GitLab


From 40e67c120093a918037b6ec589bafd5d96b522a3 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 20 Nov 2020 12:39:10 -0600
Subject: [PATCH 2301/3804] rxrpc: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 net/rxrpc/af_rxrpc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 41671af6b33f9..2b5f89713e365 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -471,6 +471,7 @@ static int rxrpc_connect(struct socket *sock, struct sockaddr *addr,
 	switch (rx->sk.sk_state) {
 	case RXRPC_UNBOUND:
 		rx->sk.sk_state = RXRPC_CLIENT_UNBOUND;
+		break;
 	case RXRPC_CLIENT_UNBOUND:
 	case RXRPC_CLIENT_BOUND:
 		break;
-- 
GitLab


From 77f30bfcfcf484da7208affd6a9e63406420bf91 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 27 May 2021 16:52:36 -0700
Subject: [PATCH 2302/3804] fscrypt: don't ignore minor_hash when hash is 0

When initializing a no-key name, fscrypt_fname_disk_to_usr() sets the
minor_hash to 0 if the (major) hash is 0.

This doesn't make sense because 0 is a valid hash code, so we shouldn't
ignore the filesystem-provided minor_hash in that case.  Fix this by
removing the special case for 'hash == 0'.

This is an old bug that appears to have originated when the encryption
code in ext4 and f2fs was moved into fs/crypto/.  The original ext4 and
f2fs code passed the hash by pointer instead of by value.  So
'if (hash)' actually made sense then, as it was checking whether a
pointer was NULL.  But now the hashes are passed by value, and
filesystems just pass 0 for any hashes they don't have.  There is no
need to handle this any differently from the hashes actually being 0.

It is difficult to reproduce this bug, as it only made a difference in
the case where a filename's 32-bit major hash happened to be 0.
However, it probably had the largest chance of causing problems on
ubifs, since ubifs uses minor_hash to do lookups of no-key names, in
addition to using it as a readdir cookie.  ext4 only uses minor_hash as
a readdir cookie, and f2fs doesn't use minor_hash at all.

Fixes: 0b81d0779072 ("fs crypto: move per-file encryption from f2fs tree to fs/crypto")
Cc: <stable@vger.kernel.org> # v4.6+
Link: https://lore.kernel.org/r/20210527235236.2376556-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/fname.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 6ca7d16593ff6..d00455440d087 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -344,13 +344,9 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
 		     offsetof(struct fscrypt_nokey_name, sha256));
 	BUILD_BUG_ON(BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) > NAME_MAX);
 
-	if (hash) {
-		nokey_name.dirhash[0] = hash;
-		nokey_name.dirhash[1] = minor_hash;
-	} else {
-		nokey_name.dirhash[0] = 0;
-		nokey_name.dirhash[1] = 0;
-	}
+	nokey_name.dirhash[0] = hash;
+	nokey_name.dirhash[1] = minor_hash;
+
 	if (iname->len <= sizeof(nokey_name.bytes)) {
 		memcpy(nokey_name.bytes, iname->name, iname->len);
 		size = offsetof(struct fscrypt_nokey_name, bytes[iname->len]);
-- 
GitLab


From 2fc2b430f559fdf32d5d1dd5ceaa40e12fb77bdf Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 5 Jun 2021 00:50:33 -0700
Subject: [PATCH 2303/3804] fscrypt: fix derivation of SipHash keys on big
 endian CPUs

Typically, the cryptographic APIs that fscrypt uses take keys as byte
arrays, which avoids endianness issues.  However, siphash_key_t is an
exception.  It is defined as 'u64 key[2];', i.e. the 128-bit key is
expected to be given directly as two 64-bit words in CPU endianness.

fscrypt_derive_dirhash_key() and fscrypt_setup_iv_ino_lblk_32_key()
forgot to take this into account.  Therefore, the SipHash keys used to
index encrypted+casefolded directories differ on big endian vs. little
endian platforms, as do the SipHash keys used to hash inode numbers for
IV_INO_LBLK_32-encrypted directories.  This makes such directories
non-portable between these platforms.

Fix this by always using the little endian order.  This is a breaking
change for big endian platforms, but this should be fine in practice
since these features (encrypt+casefold support, and the IV_INO_LBLK_32
flag) aren't known to actually be used on any big endian platforms yet.

Fixes: aa408f835d02 ("fscrypt: derive dirhash key for casefolded directories")
Fixes: e3b1078bedd3 ("fscrypt: add support for IV_INO_LBLK_32 policies")
Cc: <stable@vger.kernel.org> # v5.6+
Link: https://lore.kernel.org/r/20210605075033.54424-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/keysetup.c | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index 261293fb70974..bca9c6658a7c5 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -210,15 +210,40 @@ out_unlock:
 	return err;
 }
 
+/*
+ * Derive a SipHash key from the given fscrypt master key and the given
+ * application-specific information string.
+ *
+ * Note that the KDF produces a byte array, but the SipHash APIs expect the key
+ * as a pair of 64-bit words.  Therefore, on big endian CPUs we have to do an
+ * endianness swap in order to get the same results as on little endian CPUs.
+ */
+static int fscrypt_derive_siphash_key(const struct fscrypt_master_key *mk,
+				      u8 context, const u8 *info,
+				      unsigned int infolen, siphash_key_t *key)
+{
+	int err;
+
+	err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, context, info, infolen,
+				  (u8 *)key, sizeof(*key));
+	if (err)
+		return err;
+
+	BUILD_BUG_ON(sizeof(*key) != 16);
+	BUILD_BUG_ON(ARRAY_SIZE(key->key) != 2);
+	le64_to_cpus(&key->key[0]);
+	le64_to_cpus(&key->key[1]);
+	return 0;
+}
+
 int fscrypt_derive_dirhash_key(struct fscrypt_info *ci,
 			       const struct fscrypt_master_key *mk)
 {
 	int err;
 
-	err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, HKDF_CONTEXT_DIRHASH_KEY,
-				  ci->ci_nonce, FSCRYPT_FILE_NONCE_SIZE,
-				  (u8 *)&ci->ci_dirhash_key,
-				  sizeof(ci->ci_dirhash_key));
+	err = fscrypt_derive_siphash_key(mk, HKDF_CONTEXT_DIRHASH_KEY,
+					 ci->ci_nonce, FSCRYPT_FILE_NONCE_SIZE,
+					 &ci->ci_dirhash_key);
 	if (err)
 		return err;
 	ci->ci_dirhash_key_initialized = true;
@@ -253,10 +278,9 @@ static int fscrypt_setup_iv_ino_lblk_32_key(struct fscrypt_info *ci,
 		if (mk->mk_ino_hash_key_initialized)
 			goto unlock;
 
-		err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
-					  HKDF_CONTEXT_INODE_HASH_KEY, NULL, 0,
-					  (u8 *)&mk->mk_ino_hash_key,
-					  sizeof(mk->mk_ino_hash_key));
+		err = fscrypt_derive_siphash_key(mk,
+						 HKDF_CONTEXT_INODE_HASH_KEY,
+						 NULL, 0, &mk->mk_ino_hash_key);
 		if (err)
 			goto unlock;
 		/* pairs with smp_load_acquire() above */
-- 
GitLab


From a4d7e8ae4a541557d7a2c815835b786c18c3613c Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Wed, 2 Jun 2021 14:36:44 -0700
Subject: [PATCH 2304/3804] Drivers: hv: Move Hyper-V extended capability check
 to arch neutral code

The extended capability query code is currently under arch/x86, but it
is architecture neutral, and is used by arch neutral code in the Hyper-V
balloon driver. Hence the balloon driver fails to build on other
architectures.

Fix by moving the ext cap code out from arch/x86.  Because it is also
called from built-in architecture specific code, it can't be in a module,
so the Makefile treats as built-in even when CONFIG_HYPERV is "m".  Also
drivers/Makefile is tweaked because this is the first occurrence of a
Hyper-V file that is built-in even when CONFIG_HYPERV is "m".

While here, update the hypercall status check to use the new helper
function instead of open coding. No functional change.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Reviewed-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Link: https://lore.kernel.org/r/1622669804-2016-1-git-send-email-mikelley@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 arch/x86/hyperv/hv_init.c | 47 ----------------------------
 drivers/Makefile          |  2 +-
 drivers/hv/Makefile       |  3 ++
 drivers/hv/hv_common.c    | 66 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 70 insertions(+), 48 deletions(-)
 create mode 100644 drivers/hv/hv_common.c

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index bb0ae4b5c00f1..6952e219cba36 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -614,50 +614,3 @@ bool hv_is_isolation_supported(void)
 	return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
 }
 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
-
-/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
-bool hv_query_ext_cap(u64 cap_query)
-{
-	/*
-	 * The address of the 'hv_extended_cap' variable will be used as an
-	 * output parameter to the hypercall below and so it should be
-	 * compatible with 'virt_to_phys'. Which means, it's address should be
-	 * directly mapped. Use 'static' to keep it compatible; stack variables
-	 * can be virtually mapped, making them imcompatible with
-	 * 'virt_to_phys'.
-	 * Hypercall input/output addresses should also be 8-byte aligned.
-	 */
-	static u64 hv_extended_cap __aligned(8);
-	static bool hv_extended_cap_queried;
-	u64 status;
-
-	/*
-	 * Querying extended capabilities is an extended hypercall. Check if the
-	 * partition supports extended hypercall, first.
-	 */
-	if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
-		return false;
-
-	/* Extended capabilities do not change at runtime. */
-	if (hv_extended_cap_queried)
-		return hv_extended_cap & cap_query;
-
-	status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
-				 &hv_extended_cap);
-
-	/*
-	 * The query extended capabilities hypercall should not fail under
-	 * any normal circumstances. Avoid repeatedly making the hypercall, on
-	 * error.
-	 */
-	hv_extended_cap_queried = true;
-	status &= HV_HYPERCALL_RESULT_MASK;
-	if (status != HV_STATUS_SUCCESS) {
-		pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
-		       status);
-		return false;
-	}
-
-	return hv_extended_cap & cap_query;
-}
-EXPORT_SYMBOL_GPL(hv_query_ext_cap);
diff --git a/drivers/Makefile b/drivers/Makefile
index 5a6d613e868d3..1c2e1acbd098a 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -161,7 +161,7 @@ obj-$(CONFIG_SOUNDWIRE)		+= soundwire/
 
 # Virtualization drivers
 obj-$(CONFIG_VIRT_DRIVERS)	+= virt/
-obj-$(CONFIG_HYPERV)		+= hv/
+obj-$(subst m,y,$(CONFIG_HYPERV))	+= hv/
 
 obj-$(CONFIG_PM_DEVFREQ)	+= devfreq/
 obj-$(CONFIG_EXTCON)		+= extcon/
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 94daf8240c959..d76df5c8c2a91 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -11,3 +11,6 @@ hv_vmbus-y := vmbus_drv.o \
 		 channel_mgmt.o ring_buffer.o hv_trace.o
 hv_vmbus-$(CONFIG_HYPERV_TESTING)	+= hv_debugfs.o
 hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_fcopy.o hv_utils_transport.o
+
+# Code that must be built-in
+obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
new file mode 100644
index 0000000000000..f0053c7868917
--- /dev/null
+++ b/drivers/hv/hv_common.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Architecture neutral utility routines for interacting with
+ * Hyper-V. This file is specifically for code that must be
+ * built-in to the kernel image when CONFIG_HYPERV is set
+ * (vs. being in a module) because it is called from architecture
+ * specific code under arch/.
+ *
+ * Copyright (C) 2021, Microsoft, Inc.
+ *
+ * Author : Michael Kelley <mikelley@microsoft.com>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/bitfield.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
+
+
+/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
+bool hv_query_ext_cap(u64 cap_query)
+{
+	/*
+	 * The address of the 'hv_extended_cap' variable will be used as an
+	 * output parameter to the hypercall below and so it should be
+	 * compatible with 'virt_to_phys'. Which means, it's address should be
+	 * directly mapped. Use 'static' to keep it compatible; stack variables
+	 * can be virtually mapped, making them imcompatible with
+	 * 'virt_to_phys'.
+	 * Hypercall input/output addresses should also be 8-byte aligned.
+	 */
+	static u64 hv_extended_cap __aligned(8);
+	static bool hv_extended_cap_queried;
+	u64 status;
+
+	/*
+	 * Querying extended capabilities is an extended hypercall. Check if the
+	 * partition supports extended hypercall, first.
+	 */
+	if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
+		return false;
+
+	/* Extended capabilities do not change at runtime. */
+	if (hv_extended_cap_queried)
+		return hv_extended_cap & cap_query;
+
+	status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
+				 &hv_extended_cap);
+
+	/*
+	 * The query extended capabilities hypercall should not fail under
+	 * any normal circumstances. Avoid repeatedly making the hypercall, on
+	 * error.
+	 */
+	hv_extended_cap_queried = true;
+	if (!hv_result_success(status)) {
+		pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
+		       status);
+		return false;
+	}
+
+	return hv_extended_cap & cap_query;
+}
+EXPORT_SYMBOL_GPL(hv_query_ext_cap);
-- 
GitLab


From bc96c72df33ee81b24d87eab953c73f7bcc04f29 Mon Sep 17 00:00:00 2001
From: George McCollister <george.mccollister@gmail.com>
Date: Thu, 3 Jun 2021 19:32:08 -0500
Subject: [PATCH 2305/3804] USB: serial: ftdi_sio: add NovaTech OrionMX product
 ID

Add PID for the NovaTech OrionMX so it can be automatically detected.

Signed-off-by: George McCollister <george.mccollister@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/ftdi_sio.c     | 1 +
 drivers/usb/serial/ftdi_sio_ids.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 369ef140df78a..4a1f3a95d0177 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -611,6 +611,7 @@ static const struct usb_device_id id_table_combined[] = {
 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
 	{ USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLX_PLUS_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_NT_ORION_IO_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_NT_ORIONMX_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index d854e04a4286e..add602bebd820 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -581,6 +581,7 @@
 #define FTDI_NT_ORIONLXM_PID		0x7c90	/* OrionLXm Substation Automation Platform */
 #define FTDI_NT_ORIONLX_PLUS_PID	0x7c91	/* OrionLX+ Substation Automation Platform */
 #define FTDI_NT_ORION_IO_PID		0x7c92	/* Orion I/O */
+#define FTDI_NT_ORIONMX_PID		0x7c93	/* OrionMX */
 
 /*
  * Synapse Wireless product ids (FTDI_VID)
-- 
GitLab


From 15d295b560e6dd45f839a53ae69e4f63b54eb32f Mon Sep 17 00:00:00 2001
From: Jeremy Szu <jeremy.szu@canonical.com>
Date: Sat, 5 Jun 2021 16:25:36 +0800
Subject: [PATCH 2306/3804] ALSA: hda/realtek: fix mute/micmute LEDs and
 speaker for HP Elite Dragonfly G2

The HP Elite Dragonfly G2 using ALC285 codec which using 0x04 to control
mute LED and 0x01 to control micmute LED.
In the other hand, there is no output from right channel of speaker.
Therefore, add a quirk to make it works.

Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210605082539.41797-1-jeremy.szu@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 43e37145eb5d9..9f65171a902dd 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8310,6 +8310,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360),
 	SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO),
+	SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
-- 
GitLab


From 61d3e87468fad82dc8e8cb6de7db563ada64b532 Mon Sep 17 00:00:00 2001
From: Jeremy Szu <jeremy.szu@canonical.com>
Date: Sat, 5 Jun 2021 16:25:37 +0800
Subject: [PATCH 2307/3804] ALSA: hda/realtek: fix mute/micmute LEDs and
 speaker for HP EliteBook x360 1040 G8

The HP EliteBook x360 1040 G8 using ALC285 codec which using 0x04 to control
mute LED and 0x01 to control micmute LED.
In the other hand, there is no output from right channel of speaker.
Therefore, add a quirk to make it works.

Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210605082539.41797-2-jeremy.szu@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 9f65171a902dd..11324163ebe1c 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8311,6 +8311,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO),
 	SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+	SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
-- 
GitLab


From dfb06401b4cdfc71e2fc3e19b877ab845cc9f7f7 Mon Sep 17 00:00:00 2001
From: Jeremy Szu <jeremy.szu@canonical.com>
Date: Sat, 5 Jun 2021 16:25:38 +0800
Subject: [PATCH 2308/3804] ALSA: hda/realtek: fix mute/micmute LEDs for HP
 EliteBook 840 Aero G8

The HP EliteBook 840 Aero G8 using ALC285 codec which using 0x04 to
control mute LED and 0x01 to control micmute LED.
In the other hand, there is no output from right channel of speaker.
Therefore, add a quirk to make it works.

Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210605082539.41797-3-jeremy.szu@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 11324163ebe1c..215beb3ac678b 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8330,6 +8330,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
 	SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x884b, "HP EliteBook 840 Aero G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
-- 
GitLab


From 9981b20a5e3694f4625ab5a1ddc98ce7503f6d12 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sat, 5 Jun 2021 18:10:54 +0900
Subject: [PATCH 2309/3804] ALSA: firewire-lib: fix the context to call
 snd_pcm_stop_xrun()

In the workqueue to queue wake-up event, isochronous context is not
processed, thus it's useless to check context for the workqueue to switch
status of runtime for PCM substream to XRUN. On the other hand, in
software IRQ context of 1394 OHCI, it's needed.

This commit fixes the bug introduced when tasklet was replaced with
workqueue.

Cc: <stable@vger.kernel.org>
Fixes: 2b3d2987d800 ("ALSA: firewire: Replace tasklet with work")
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20210605091054.68866-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/amdtp-stream.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index e0faa6601966c..5805c5de39fbf 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -804,7 +804,7 @@ static void generate_pkt_descs(struct amdtp_stream *s, struct pkt_desc *descs,
 static inline void cancel_stream(struct amdtp_stream *s)
 {
 	s->packet_index = -1;
-	if (current_work() == &s->period_work)
+	if (in_interrupt())
 		amdtp_stream_pcm_abort(s);
 	WRITE_ONCE(s->pcm_buffer_pointer, SNDRV_PCM_POS_XRUN);
 }
-- 
GitLab


From 50c25ee97cf6ab011542167ab590c17012cea4ed Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Fri, 4 Jun 2021 20:01:08 -0700
Subject: [PATCH 2310/3804] Revert "MIPS: make userspace mapping young by
 default"

This reverts commit f685a533a7fab35c5d069dcd663f59c8e4171a75.

The MIPS cache flush logic needs to know whether the mapping was already
established to decide how to flush caches.  This is done by checking the
valid bit in the PTE.  The commit above breaks this logic by setting the
valid in the PTE in new mappings, which causes kernel crashes.

Link: https://lkml.kernel.org/r/20210526094335.92948-1-tsbogend@alpha.franken.de
Fixes: f685a533a7f ("MIPS: make userspace mapping young by default")
Reported-by: Zhou Yanjie <zhouyanjie@wanyeetech.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Huang Pei <huangpei@loongson.cn>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/mm/cache.c    | 30 ++++++++++++++----------------
 include/linux/pgtable.h |  8 ++++++++
 mm/memory.c             |  4 ++++
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index a7bf0c80371cd..830ab91e574f4 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -158,31 +158,29 @@ unsigned long _page_cachable_default;
 EXPORT_SYMBOL(_page_cachable_default);
 
 #define PM(p)	__pgprot(_page_cachable_default | (p))
-#define PVA(p)	PM(_PAGE_VALID | _PAGE_ACCESSED | (p))
 
 static inline void setup_protection_map(void)
 {
 	protection_map[0]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-	protection_map[1]  = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
-	protection_map[2]  = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-	protection_map[3]  = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
-	protection_map[4]  = PVA(_PAGE_PRESENT);
-	protection_map[5]  = PVA(_PAGE_PRESENT);
-	protection_map[6]  = PVA(_PAGE_PRESENT);
-	protection_map[7]  = PVA(_PAGE_PRESENT);
+	protection_map[1]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+	protection_map[2]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+	protection_map[3]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+	protection_map[4]  = PM(_PAGE_PRESENT);
+	protection_map[5]  = PM(_PAGE_PRESENT);
+	protection_map[6]  = PM(_PAGE_PRESENT);
+	protection_map[7]  = PM(_PAGE_PRESENT);
 
 	protection_map[8]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-	protection_map[9]  = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
-	protection_map[10] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
+	protection_map[9]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+	protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
 				_PAGE_NO_READ);
-	protection_map[11] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
-	protection_map[12] = PVA(_PAGE_PRESENT);
-	protection_map[13] = PVA(_PAGE_PRESENT);
-	protection_map[14] = PVA(_PAGE_PRESENT);
-	protection_map[15] = PVA(_PAGE_PRESENT);
+	protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
+	protection_map[12] = PM(_PAGE_PRESENT);
+	protection_map[13] = PM(_PAGE_PRESENT);
+	protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE);
+	protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE);
 }
 
-#undef _PVA
 #undef PM
 
 void cpu_cache_init(void)
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 46b13780c2c8c..a43047b1030dc 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -432,6 +432,14 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
  * To be differentiate with macro pte_mkyoung, this macro is used on platforms
  * where software maintains page access bit.
  */
+#ifndef pte_sw_mkyoung
+static inline pte_t pte_sw_mkyoung(pte_t pte)
+{
+	return pte;
+}
+#define pte_sw_mkyoung	pte_sw_mkyoung
+#endif
+
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 730daa00952ba..f3ffab9b9e391 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2939,6 +2939,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
+		entry = pte_sw_mkyoung(entry);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 		/*
@@ -3602,6 +3603,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	__SetPageUptodate(page);
 
 	entry = mk_pte(page, vma->vm_page_prot);
+	entry = pte_sw_mkyoung(entry);
 	if (vma->vm_flags & VM_WRITE)
 		entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3786,6 +3788,8 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
 
 	if (prefault && arch_wants_old_prefaulted_pte())
 		entry = pte_mkold(entry);
+	else
+		entry = pte_sw_mkyoung(entry);
 
 	if (write)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-- 
GitLab


From 8fd0e995cc7b6a7a8a40bc03d52a2cd445beeff4 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 4 Jun 2021 20:01:11 -0700
Subject: [PATCH 2311/3804] kfence: use TASK_IDLE when awaiting allocation

Since wait_event() uses TASK_UNINTERRUPTIBLE by default, waiting for an
allocation counts towards load.  However, for KFENCE, this does not make
any sense, since there is no busy work we're awaiting.

Instead, use TASK_IDLE via wait_event_idle() to not count towards load.

BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1185565
Link: https://lkml.kernel.org/r/20210521083209.3740269-1-elver@google.com
Fixes: 407f1d8c1b5f ("kfence: await for allocation using wait_event")
Signed-off-by: Marco Elver <elver@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Hillf Danton <hdanton@sina.com>
Cc: <stable@vger.kernel.org>	[5.12+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kfence/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index e18fbbd5d9b48..4d21ac44d5d35 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -627,10 +627,10 @@ static void toggle_allocation_gate(struct work_struct *work)
 		 * During low activity with no allocations we might wait a
 		 * while; let's avoid the hung task warning.
 		 */
-		wait_event_timeout(allocation_wait, atomic_read(&kfence_allocation_gate),
-				   sysctl_hung_task_timeout_secs * HZ / 2);
+		wait_event_idle_timeout(allocation_wait, atomic_read(&kfence_allocation_gate),
+					sysctl_hung_task_timeout_secs * HZ / 2);
 	} else {
-		wait_event(allocation_wait, atomic_read(&kfence_allocation_gate));
+		wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate));
 	}
 
 	/* Disable static key and reset timer. */
-- 
GitLab


From 0711f0d7050b9e07c44bc159bbc64ac0a1022c7f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 4 Jun 2021 20:01:14 -0700
Subject: [PATCH 2312/3804] pid: take a reference when initializing `cad_pid`

During boot, kernel_init_freeable() initializes `cad_pid` to the init
task's struct pid.  Later on, we may change `cad_pid` via a sysctl, and
when this happens proc_do_cad_pid() will increment the refcount on the
new pid via get_pid(), and will decrement the refcount on the old pid
via put_pid().  As we never called get_pid() when we initialized
`cad_pid`, we decrement a reference we never incremented, can therefore
free the init task's struct pid early.  As there can be dangling
references to the struct pid, we can later encounter a use-after-free
(e.g.  when delivering signals).

This was spotted when fuzzing v5.13-rc3 with Syzkaller, but seems to
have been around since the conversion of `cad_pid` to struct pid in
commit 9ec52099e4b8 ("[PATCH] replace cad_pid by a struct pid") from the
pre-KASAN stone age of v2.6.19.

Fix this by getting a reference to the init task's struct pid when we
assign it to `cad_pid`.

Full KASAN splat below.

   ==================================================================
   BUG: KASAN: use-after-free in ns_of_pid include/linux/pid.h:153 [inline]
   BUG: KASAN: use-after-free in task_active_pid_ns+0xc0/0xc8 kernel/pid.c:509
   Read of size 4 at addr ffff23794dda0004 by task syz-executor.0/273

   CPU: 1 PID: 273 Comm: syz-executor.0 Not tainted 5.12.0-00001-g9aef892b2d15 #1
   Hardware name: linux,dummy-virt (DT)
   Call trace:
    ns_of_pid include/linux/pid.h:153 [inline]
    task_active_pid_ns+0xc0/0xc8 kernel/pid.c:509
    do_notify_parent+0x308/0xe60 kernel/signal.c:1950
    exit_notify kernel/exit.c:682 [inline]
    do_exit+0x2334/0x2bd0 kernel/exit.c:845
    do_group_exit+0x108/0x2c8 kernel/exit.c:922
    get_signal+0x4e4/0x2a88 kernel/signal.c:2781
    do_signal arch/arm64/kernel/signal.c:882 [inline]
    do_notify_resume+0x300/0x970 arch/arm64/kernel/signal.c:936
    work_pending+0xc/0x2dc

   Allocated by task 0:
    slab_post_alloc_hook+0x50/0x5c0 mm/slab.h:516
    slab_alloc_node mm/slub.c:2907 [inline]
    slab_alloc mm/slub.c:2915 [inline]
    kmem_cache_alloc+0x1f4/0x4c0 mm/slub.c:2920
    alloc_pid+0xdc/0xc00 kernel/pid.c:180
    copy_process+0x2794/0x5e18 kernel/fork.c:2129
    kernel_clone+0x194/0x13c8 kernel/fork.c:2500
    kernel_thread+0xd4/0x110 kernel/fork.c:2552
    rest_init+0x44/0x4a0 init/main.c:687
    arch_call_rest_init+0x1c/0x28
    start_kernel+0x520/0x554 init/main.c:1064
    0x0

   Freed by task 270:
    slab_free_hook mm/slub.c:1562 [inline]
    slab_free_freelist_hook+0x98/0x260 mm/slub.c:1600
    slab_free mm/slub.c:3161 [inline]
    kmem_cache_free+0x224/0x8e0 mm/slub.c:3177
    put_pid.part.4+0xe0/0x1a8 kernel/pid.c:114
    put_pid+0x30/0x48 kernel/pid.c:109
    proc_do_cad_pid+0x190/0x1b0 kernel/sysctl.c:1401
    proc_sys_call_handler+0x338/0x4b0 fs/proc/proc_sysctl.c:591
    proc_sys_write+0x34/0x48 fs/proc/proc_sysctl.c:617
    call_write_iter include/linux/fs.h:1977 [inline]
    new_sync_write+0x3ac/0x510 fs/read_write.c:518
    vfs_write fs/read_write.c:605 [inline]
    vfs_write+0x9c4/0x1018 fs/read_write.c:585
    ksys_write+0x124/0x240 fs/read_write.c:658
    __do_sys_write fs/read_write.c:670 [inline]
    __se_sys_write fs/read_write.c:667 [inline]
    __arm64_sys_write+0x78/0xb0 fs/read_write.c:667
    __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline]
    invoke_syscall arch/arm64/kernel/syscall.c:49 [inline]
    el0_svc_common.constprop.1+0x16c/0x388 arch/arm64/kernel/syscall.c:129
    do_el0_svc+0xf8/0x150 arch/arm64/kernel/syscall.c:168
    el0_svc+0x28/0x38 arch/arm64/kernel/entry-common.c:416
    el0_sync_handler+0x134/0x180 arch/arm64/kernel/entry-common.c:432
    el0_sync+0x154/0x180 arch/arm64/kernel/entry.S:701

   The buggy address belongs to the object at ffff23794dda0000
    which belongs to the cache pid of size 224
   The buggy address is located 4 bytes inside of
    224-byte region [ffff23794dda0000, ffff23794dda00e0)
   The buggy address belongs to the page:
   page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x4dda0
   head:(____ptrval____) order:1 compound_mapcount:0
   flags: 0x3fffc0000010200(slab|head)
   raw: 03fffc0000010200 dead000000000100 dead000000000122 ffff23794d40d080
   raw: 0000000000000000 0000000000190019 00000001ffffffff 0000000000000000
   page dumped because: kasan: bad access detected

   Memory state around the buggy address:
    ffff23794dd9ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
    ffff23794dd9ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
   >ffff23794dda0000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                      ^
    ffff23794dda0080: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
    ffff23794dda0100: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00
   ==================================================================

Link: https://lkml.kernel.org/r/20210524172230.38715-1-mark.rutland@arm.com
Fixes: 9ec52099e4b8678a ("[PATCH] replace cad_pid by a struct pid")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Cedric Le Goater <clg@fr.ibm.com>
Cc: Christian Brauner <christian@brauner.io>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 init/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/main.c b/init/main.c
index eb01e121d2f15..e9c42a183e339 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1537,7 +1537,7 @@ static noinline void __init kernel_init_freeable(void)
 	 */
 	set_mems_allowed(node_states[N_MEMORY]);
 
-	cad_pid = task_pid(current);
+	cad_pid = get_pid(task_pid(current));
 
 	smp_prepare_cpus(setup_max_cpus);
 
-- 
GitLab


From 04f7ce3f07ce39b1a3ca03a56b238a53acc52cfd Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Date: Fri, 4 Jun 2021 20:01:18 -0700
Subject: [PATCH 2313/3804] mm/debug_vm_pgtable: fix alignment for
 pmd/pud_advanced_tests()

In pmd/pud_advanced_tests(), the vaddr is aligned up to the next pmd/pud
entry, and so it does not match the given pmdp/pudp and (aligned down)
pfn any more.

For s390, this results in memory corruption, because the IDTE
instruction used e.g.  in xxx_get_and_clear() will take the vaddr for
some calculations, in combination with the given pmdp.  It will then end
up with a wrong table origin, ending on ...ff8, and some of those
wrongly set low-order bits will also select a wrong pagetable level for
the index addition.  IDTE could therefore invalidate (or 0x20) something
outside of the page tables, depending on the wrongly picked index, which
in turn depends on the random vaddr.

As result, we sometimes see "BUG task_struct (Not tainted): Padding
overwritten" on s390, where one 0x5a padding value got overwritten with
0x7a.

Fix this by aligning down, similar to how the pmd/pud_aligned pfns are
calculated.

Link: https://lkml.kernel.org/r/20210525130043.186290-2-gerald.schaefer@linux.ibm.com
Fixes: a5c3b9ffb0f40 ("mm/debug_vm_pgtable: add tests validating advanced arch page table helpers")
Signed-off-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: <stable@vger.kernel.org>	[5.9+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/debug_vm_pgtable.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 05efe98a9ac2c..297d1b349c197 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -192,7 +192,7 @@ static void __init pmd_advanced_tests(struct mm_struct *mm,
 
 	pr_debug("Validating PMD advanced\n");
 	/* Align the address wrt HPAGE_PMD_SIZE */
-	vaddr = (vaddr & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE;
+	vaddr &= HPAGE_PMD_MASK;
 
 	pgtable_trans_huge_deposit(mm, pmdp, pgtable);
 
@@ -330,7 +330,7 @@ static void __init pud_advanced_tests(struct mm_struct *mm,
 
 	pr_debug("Validating PUD advanced\n");
 	/* Align the address wrt HPAGE_PUD_SIZE */
-	vaddr = (vaddr & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE;
+	vaddr &= HPAGE_PUD_MASK;
 
 	set_pud_at(mm, vaddr, pudp, pud);
 	pudp_set_wrprotect(mm, vaddr, pudp);
-- 
GitLab


From bac9c6fa1f929213bbd0ac9cdf21e8e2f0916828 Mon Sep 17 00:00:00 2001
From: Ding Hui <dinghui@sangfor.com.cn>
Date: Fri, 4 Jun 2021 20:01:21 -0700
Subject: [PATCH 2314/3804] mm/page_alloc: fix counting of free pages after
 take off from buddy

Recently we found that there is a lot MemFree left in /proc/meminfo
after do a lot of pages soft offline, it's not quite correct.

Before Oscar's rework of soft offline for free pages [1], if we soft
offline free pages, these pages are left in buddy with HWPoison flag,
and NR_FREE_PAGES is not updated immediately.  So the difference between
NR_FREE_PAGES and real number of available free pages is also even big
at the beginning.

However, with the workload running, when we catch HWPoison page in any
alloc functions subsequently, we will remove it from buddy, meanwhile
update the NR_FREE_PAGES and try again, so the NR_FREE_PAGES will get
more and more closer to the real number of available free pages.
(regardless of unpoison_memory())

Now, for offline free pages, after a successful call
take_page_off_buddy(), the page is no longer belong to buddy allocator,
and will not be used any more, but we missed accounting NR_FREE_PAGES in
this situation, and there is no chance to be updated later.

Do update in take_page_off_buddy() like rmqueue() does, but avoid double
counting if some one already set_migratetype_isolate() on the page.

[1]: commit 06be6ff3d2ec ("mm,hwpoison: rework soft offline for free pages")

Link: https://lkml.kernel.org/r/20210526075247.11130-1-dinghui@sangfor.com.cn
Fixes: 06be6ff3d2ec ("mm,hwpoison: rework soft offline for free pages")
Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
Suggested-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index aaa1655cf6820..d1f5de1c1283b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -9158,6 +9158,8 @@ bool take_page_off_buddy(struct page *page)
 			del_page_from_free_list(page_head, zone, page_order);
 			break_down_buddy_pages(zone, page_head, page, 0,
 						page_order, migratetype);
+			if (!is_migrate_isolate(migratetype))
+				__mod_zone_freepage_state(zone, -1, migratetype);
 			ret = true;
 			break;
 		}
-- 
GitLab


From 928130532e19f2f920840e41bd6b1cae742ea63b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 4 Jun 2021 20:01:24 -0700
Subject: [PATCH 2315/3804] drivers/base/memory: fix trying offlining memory
 blocks with memory holes on aarch64

offline_pages() properly checks for memory holes and bails out.
However, we do a page_zone(pfn_to_page(start_pfn)) before calling
offline_pages() when offlining a memory block.

We should not unconditionally call page_zone(pfn_to_page(start_pfn)) on
aarch64 in offlining code, otherwise we can trigger a BUG when hitting a
memory hole:

   kernel BUG at include/linux/mm.h:1383!
   Internal error: Oops - BUG: 0 [#1] SMP
   Modules linked in: loop processor efivarfs ip_tables x_tables ext4 mbcache jbd2 dm_mod igb nvme i2c_algo_bit mlx5_core i2c_core nvme_core firmware_class
   CPU: 13 PID: 1694 Comm: ranbug Not tainted 5.12.0-next-20210524+ #4
   Hardware name: MiTAC RAPTOR EV-883832-X3-0001/RAPTOR, BIOS 1.6 06/28/2020
   pstate: 60000005 (nZCv daif -PAN -UAO -TCO BTYPE=--)
   pc : memory_subsys_offline+0x1f8/0x250
   lr : memory_subsys_offline+0x1f8/0x250
   Call trace:
     memory_subsys_offline+0x1f8/0x250
     device_offline+0x154/0x1d8
     online_store+0xa4/0x118
     dev_attr_store+0x44/0x78
     sysfs_kf_write+0xe8/0x138
     kernfs_fop_write_iter+0x26c/0x3d0
     new_sync_write+0x2bc/0x4f8
     vfs_write+0x718/0xc88
     ksys_write+0xf8/0x1e0
     __arm64_sys_write+0x74/0xa8
     invoke_syscall.constprop.0+0x78/0x1e8
     do_el0_svc+0xe4/0x298
     el0_svc+0x20/0x30
     el0_sync_handler+0xb0/0xb8
     el0_sync+0x178/0x180
   Kernel panic - not syncing: Oops - BUG: Fatal exception
   SMP: stopping secondary CPUs
   Kernel Offset: disabled
   CPU features: 0x00000251,20000846
   Memory Limit: none

If nr_vmemmap_pages is set, we know that we are dealing with hotplugged
memory that doesn't have any holes.  So call
page_zone(pfn_to_page(start_pfn)) only when really necessary -- when
nr_vmemmap_pages is set and we actually adjust the present pages.

Link: https://lkml.kernel.org/r/20210526075226.5572-1-david@redhat.com
Fixes: a08a2ae34613 ("mm,memory_hotplug: allocate memmap from the added memory range")
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: Qian Cai (QUIC) <quic_qiancai@quicinc.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Mike Rapoport <rppt@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index b31b3af5c490f..d5ffaab3cb61c 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -218,14 +218,14 @@ static int memory_block_offline(struct memory_block *mem)
 	struct zone *zone;
 	int ret;
 
-	zone = page_zone(pfn_to_page(start_pfn));
-
 	/*
 	 * Unaccount before offlining, such that unpopulated zone and kthreads
 	 * can properly be torn down in offline_pages().
 	 */
-	if (nr_vmemmap_pages)
+	if (nr_vmemmap_pages) {
+		zone = page_zone(pfn_to_page(start_pfn));
 		adjust_present_page_count(zone, -nr_vmemmap_pages);
+	}
 
 	ret = offline_pages(start_pfn + nr_vmemmap_pages,
 			    nr_pages - nr_vmemmap_pages);
-- 
GitLab


From 0c5da35723a961d8c02ea516da2bcfeb007d7d2c Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Fri, 4 Jun 2021 20:01:27 -0700
Subject: [PATCH 2316/3804] hugetlb: pass head page to remove_hugetlb_page()

When memory_failure() or soft_offline_page() is called on a tail page of
some hugetlb page, "BUG: unable to handle page fault" error can be
triggered.

remove_hugetlb_page() dereferences page->lru, so it's assumed that the
page points to a head page, but one of the caller,
dissolve_free_huge_page(), provides remove_hugetlb_page() with 'page'
which could be a tail page.  So pass 'head' to it, instead.

Link: https://lkml.kernel.org/r/20210526235257.2769473-1-nao.horiguchi@gmail.com
Fixes: 6eb4e88a6d27 ("hugetlb: create remove_hugetlb_page() to separate functionality")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 95918f410c0f8..470f7b5b437e6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1793,7 +1793,7 @@ retry:
 			SetPageHWPoison(page);
 			ClearPageHWPoison(head);
 		}
-		remove_hugetlb_page(h, page, false);
+		remove_hugetlb_page(h, head, false);
 		h->max_huge_pages--;
 		spin_unlock_irq(&hugetlb_lock);
 		update_and_free_page(h, head);
-- 
GitLab


From 263e88d678baa1a2e3f2d5afbdcd9fd3feb80a4d Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Fri, 4 Jun 2021 20:01:30 -0700
Subject: [PATCH 2317/3804] proc: add .gitignore for proc-subset-pid selftest

This new selftest needs an entry in the .gitignore file otherwise git
will try to track the binary.

Link: https://lkml.kernel.org/r/20210601164305.11776-1-dmatlack@google.com
Fixes: 268af17ada5855 ("selftests: proc: test subset=pid")
Signed-off-by: David Matlack <dmatlack@google.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Alexey Gladkov <gladkov.alexey@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/proc/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index bed4b5318a86c..8f3e72e626fa7 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -10,6 +10,7 @@
 /proc-self-map-files-002
 /proc-self-syscall
 /proc-self-wchan
+/proc-subset-pid
 /proc-uptime-001
 /proc-uptime-002
 /read
-- 
GitLab


From 7b6889f54a3c8c4139137a24a3ca12fe52a91dba Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Fri, 4 Jun 2021 20:01:33 -0700
Subject: [PATCH 2318/3804] mm/kasan/init.c: fix doc warning

Fix gcc W=1 warning:

  mm/kasan/init.c:228: warning: Function parameter or member 'shadow_start' not described in 'kasan_populate_early_shadow'
  mm/kasan/init.c:228: warning: Function parameter or member 'shadow_end' not described in 'kasan_populate_early_shadow'

Link: https://lkml.kernel.org/r/20210603140700.3045298-1-yukuai3@huawei.com
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Acked-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kasan/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/kasan/init.c b/mm/kasan/init.c
index c4605ac9837b0..348f31d15a971 100644
--- a/mm/kasan/init.c
+++ b/mm/kasan/init.c
@@ -220,8 +220,8 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
 /**
  * kasan_populate_early_shadow - populate shadow memory region with
  *                               kasan_early_shadow_page
- * @shadow_start - start of the memory range to populate
- * @shadow_end   - end of the memory range to populate
+ * @shadow_start: start of the memory range to populate
+ * @shadow_end: end of the memory range to populate
  */
 int __ref kasan_populate_early_shadow(const void *shadow_start,
 					const void *shadow_end)
-- 
GitLab


From d84cf06e3dd8c5c5b547b5d8931015fc536678e5 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Fri, 4 Jun 2021 20:01:36 -0700
Subject: [PATCH 2319/3804] mm, hugetlb: fix simple resv_huge_pages underflow
 on UFFDIO_COPY

The userfaultfd hugetlb tests cause a resv_huge_pages underflow.  This
happens when hugetlb_mcopy_atomic_pte() is called with !is_continue on
an index for which we already have a page in the cache.  When this
happens, we allocate a second page, double consuming the reservation,
and then fail to insert the page into the cache and return -EEXIST.

To fix this, we first check if there is a page in the cache which
already consumed the reservation, and return -EEXIST immediately if so.

There is still a rare condition where we fail to copy the page contents
AND race with a call for hugetlb_no_page() for this index and again we
will underflow resv_huge_pages.  That is fixed in a more complicated
patch not targeted for -stable.

Test:

  Hacked the code locally such that resv_huge_pages underflows produce a
  warning, then:

  ./tools/testing/selftests/vm/userfaultfd hugetlb_shared 10
	2 /tmp/kokonut_test/huge/userfaultfd_test && echo test success
  ./tools/testing/selftests/vm/userfaultfd hugetlb 10
	2 /tmp/kokonut_test/huge/userfaultfd_test && echo test success

Both tests succeed and produce no warnings.  After the test runs number
of free/resv hugepages is correct.

[mike.kravetz@oracle.com: changelog fixes]

Link: https://lkml.kernel.org/r/20210528004649.85298-1-almasrymina@google.com
Fixes: 8fb5debc5fcd ("userfaultfd: hugetlbfs: add hugetlb_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 470f7b5b437e6..5560b50876fb7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4889,10 +4889,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 		if (!page)
 			goto out;
 	} else if (!*pagep) {
-		ret = -ENOMEM;
+		/* If a page already exists, then it's UFFDIO_COPY for
+		 * a non-missing case. Return -EEXIST.
+		 */
+		if (vm_shared &&
+		    hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
+			ret = -EEXIST;
+			goto out;
+		}
+
 		page = alloc_huge_page(dst_vma, dst_addr, 0);
-		if (IS_ERR(page))
+		if (IS_ERR(page)) {
+			ret = -ENOMEM;
 			goto out;
+		}
 
 		ret = copy_huge_page_from_user(page,
 						(const void __user *) src_addr,
-- 
GitLab


From 415f0c835ba799e47ce077b01876568431da1ff3 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 4 Jun 2021 20:01:39 -0700
Subject: [PATCH 2320/3804] lib: crc64: fix kernel-doc warning

Fix W=1 kernel build warning:

  lib/crc64.c:40: warning:
   bad line:         or the previous crc64 value if computing incrementally.

Link: https://lkml.kernel.org/r/20210601135851.15444-1-yuehaibing@huawei.com
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Coly Li <colyli@suse.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/crc64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/crc64.c b/lib/crc64.c
index 47cfa054827f3..9f852a89ee2a1 100644
--- a/lib/crc64.c
+++ b/lib/crc64.c
@@ -37,7 +37,7 @@ MODULE_LICENSE("GPL v2");
 /**
  * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
  * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
-	or the previous crc64 value if computing incrementally.
+ *       or the previous crc64 value if computing incrementally.
  * @p: pointer to buffer over which CRC64 is run
  * @len: length of buffer @p
  */
-- 
GitLab


From 6bba4471f0cc1296fe3c2089b9e52442d3074b2e Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Fri, 4 Jun 2021 20:01:42 -0700
Subject: [PATCH 2321/3804] ocfs2: fix data corruption by fallocate

When fallocate punches holes out of inode size, if original isize is in
the middle of last cluster, then the part from isize to the end of the
cluster will be zeroed with buffer write, at that time isize is not yet
updated to match the new size, if writeback is kicked in, it will invoke
ocfs2_writepage()->block_write_full_page() where the pages out of inode
size will be dropped.  That will cause file corruption.  Fix this by
zero out eof blocks when extending the inode size.

Running the following command with qemu-image 4.2.1 can get a corrupted
coverted image file easily.

    qemu-img convert -p -t none -T none -f qcow2 $qcow_image \
             -O qcow2 -o compat=1.1 $qcow_image.conv

The usage of fallocate in qemu is like this, it first punches holes out
of inode size, then extend the inode size.

    fallocate(11, FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE, 2276196352, 65536) = 0
    fallocate(11, 0, 2276196352, 65536) = 0

v1: https://www.spinics.net/lists/linux-fsdevel/msg193999.html
v2: https://lore.kernel.org/linux-fsdevel/20210525093034.GB4112@quack2.suse.cz/T/

Link: https://lkml.kernel.org/r/20210528210648.9124-1-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/file.c | 55 ++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 5 deletions(-)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f17c3d33fb18a..7756579430578 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1855,6 +1855,45 @@ out:
 	return ret;
 }
 
+/*
+ * zero out partial blocks of one cluster.
+ *
+ * start: file offset where zero starts, will be made upper block aligned.
+ * len: it will be trimmed to the end of current cluster if "start + len"
+ *      is bigger than it.
+ */
+static int ocfs2_zeroout_partial_cluster(struct inode *inode,
+					u64 start, u64 len)
+{
+	int ret;
+	u64 start_block, end_block, nr_blocks;
+	u64 p_block, offset;
+	u32 cluster, p_cluster, nr_clusters;
+	struct super_block *sb = inode->i_sb;
+	u64 end = ocfs2_align_bytes_to_clusters(sb, start);
+
+	if (start + len < end)
+		end = start + len;
+
+	start_block = ocfs2_blocks_for_bytes(sb, start);
+	end_block = ocfs2_blocks_for_bytes(sb, end);
+	nr_blocks = end_block - start_block;
+	if (!nr_blocks)
+		return 0;
+
+	cluster = ocfs2_bytes_to_clusters(sb, start);
+	ret = ocfs2_get_clusters(inode, cluster, &p_cluster,
+				&nr_clusters, NULL);
+	if (ret)
+		return ret;
+	if (!p_cluster)
+		return 0;
+
+	offset = start_block - ocfs2_clusters_to_blocks(sb, cluster);
+	p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset;
+	return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS);
+}
+
 /*
  * Parts of this function taken from xfs_change_file_space()
  */
@@ -1865,7 +1904,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 {
 	int ret;
 	s64 llen;
-	loff_t size;
+	loff_t size, orig_isize;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct buffer_head *di_bh = NULL;
 	handle_t *handle;
@@ -1896,6 +1935,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 		goto out_inode_unlock;
 	}
 
+	orig_isize = i_size_read(inode);
 	switch (sr->l_whence) {
 	case 0: /*SEEK_SET*/
 		break;
@@ -1903,7 +1943,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 		sr->l_start += f_pos;
 		break;
 	case 2: /*SEEK_END*/
-		sr->l_start += i_size_read(inode);
+		sr->l_start += orig_isize;
 		break;
 	default:
 		ret = -EINVAL;
@@ -1957,6 +1997,14 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 	default:
 		ret = -EINVAL;
 	}
+
+	/* zeroout eof blocks in the cluster. */
+	if (!ret && change_size && orig_isize < size) {
+		ret = ocfs2_zeroout_partial_cluster(inode, orig_isize,
+					size - orig_isize);
+		if (!ret)
+			i_size_write(inode, size);
+	}
 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
 	if (ret) {
 		mlog_errno(ret);
@@ -1973,9 +2021,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 		goto out_inode_unlock;
 	}
 
-	if (change_size && i_size_read(inode) < size)
-		i_size_write(inode, size);
-
 	inode->i_ctime = inode->i_mtime = current_time(inode);
 	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
 	if (ret < 0)
-- 
GitLab


From 2eff0573e0d5a50a42eea41e4d23d5029d4b24fc Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <michel@lespinasse.org>
Date: Fri, 4 Jun 2021 20:01:45 -0700
Subject: [PATCH 2322/3804] mailmap: use private address for Michel Lespinasse

Link: https://lkml.kernel.org/r/20210602221225.49446-1-michel@lespinasse.org
Signed-off-by: Michel Lespinasse <michel@lespinasse.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.mailmap b/.mailmap
index ce6c497767e22..c79a78766c07f 100644
--- a/.mailmap
+++ b/.mailmap
@@ -243,6 +243,9 @@ Maxime Ripard <mripard@kernel.org> <maxime.ripard@free-electrons.com>
 Mayuresh Janorkar <mayur@ti.com>
 Michael Buesch <m@bues.ch>
 Michel Dänzer <michel@tungstengraphics.com>
+Michel Lespinasse <michel@lespinasse.org>
+Michel Lespinasse <michel@lespinasse.org> <walken@google.com>
+Michel Lespinasse <michel@lespinasse.org> <walken@zoy.org>
 Miguel Ojeda <ojeda@kernel.org> <miguel.ojeda.sandonis@gmail.com>
 Mike Rapoport <rppt@kernel.org> <mike@compulab.co.il>
 Mike Rapoport <rppt@kernel.org> <mike.rapoport@gmail.com>
-- 
GitLab


From 8e11d62e2e8769fe29d1ae98b44b23c7233eb8a2 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Sat, 5 Jun 2021 08:56:09 +0000
Subject: [PATCH 2323/3804] powerpc/mem: Add back missing header to fix 'no
 previous prototype' error

Commit b26e8f27253a ("powerpc/mem: Move cache flushing functions into
mm/cacheflush.c") removed asm/sparsemem.h which is required when
CONFIG_MEMORY_HOTPLUG is selected to get the declaration of
create_section_mapping().

Add it back.

Fixes: b26e8f27253a ("powerpc/mem: Move cache flushing functions into mm/cacheflush.c")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/3e5b63bb3daab54a1eb9c20221c2e9528c4db9b3.1622883330.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/mm/mem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 043bbeaf407cb..a6b36a40897ae 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -20,6 +20,7 @@
 #include <asm/machdep.h>
 #include <asm/rtas.h>
 #include <asm/kasan.h>
+#include <asm/sparsemem.h>
 #include <asm/svm.h>
 
 #include <mm/mmu_decl.h>
-- 
GitLab


From e22808071d4d23596e6cc8f62588225515789031 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 1 Jun 2021 13:31:55 +0200
Subject: [PATCH 2324/3804] dt-bindings: irqchip: renesas-irqc: Add R-Car M3-W+
 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document support for the Interrupt Controller for External Devices
(INT-EC) in the Renesas R-Car M3-W+ (r8a77961) SoC.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/55d2c30cb14b2e10193a7fd4aa7670c70f360037.1622546880.git.geert+renesas@glider.be
---
 .../devicetree/bindings/interrupt-controller/renesas,irqc.yaml   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
index b67b8cbd33fcb..abb22db3bb28b 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
@@ -29,6 +29,7 @@ properties:
           - renesas,intc-ex-r8a774c0    # RZ/G2E
           - renesas,intc-ex-r8a7795     # R-Car H3
           - renesas,intc-ex-r8a7796     # R-Car M3-W
+          - renesas,intc-ex-r8a77961    # R-Car M3-W+
           - renesas,intc-ex-r8a77965    # R-Car M3-N
           - renesas,intc-ex-r8a77970    # R-Car V3M
           - renesas,intc-ex-r8a77980    # R-Car V3H
-- 
GitLab


From 4acd8a4be614a6c191273f2247aff7374a92f318 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Tue, 18 May 2021 21:19:21 +0000
Subject: [PATCH 2325/3804] irqchip/qcom-pdc: Switch to IRQCHIP_PLATFORM_DRIVER
 and allow as a module

This patch revives changes from Saravana Kannan to switch the
qcom-pdc driver to use IRQCHIP_PLATFORM_DRIVER helper macros,
and allows qcom-pdc driver to be loaded as a permanent module.

Earlier attempts at this ran into trouble with loading
dependencies, but with Saravana's fw_devlink=on set by default
now we should avoid those.

[jstultz: Folded in with my changes to allow the driver to be
 loadable as a permenent module]

Cc: Andy Gross <agross@kernel.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Maulik Shah <mkshah@codeaurora.org>
Cc: Lina Iyer <ilina@codeaurora.org>
Cc: Saravana Kannan <saravanak@google.com>
Cc: Todd Kjos <tkjos@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-arm-msm@vger.kernel.org
Cc: iommu@lists.linux-foundation.org
Cc: linux-gpio@vger.kernel.org
Signed-off-by: Saravana Kannan <saravanak@google.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210518211922.3474368-1-john.stultz@linaro.org
---
 drivers/irqchip/Kconfig    | 2 +-
 drivers/irqchip/qcom-pdc.c | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 62543a4eccc08..4d5924e9f7666 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -415,7 +415,7 @@ config GOLDFISH_PIC
          for Goldfish based virtual platforms.
 
 config QCOM_PDC
-	bool "QCOM PDC"
+	tristate "QCOM PDC"
 	depends on ARCH_QCOM
 	select IRQ_DOMAIN_HIERARCHY
 	help
diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c
index 5dc63c20b67ea..32d59202d408d 100644
--- a/drivers/irqchip/qcom-pdc.c
+++ b/drivers/irqchip/qcom-pdc.c
@@ -11,9 +11,11 @@
 #include <linux/irqdomain.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/of_irq.h>
 #include <linux/soc/qcom/irq.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
@@ -459,4 +461,8 @@ fail:
 	return ret;
 }
 
-IRQCHIP_DECLARE(qcom_pdc, "qcom,pdc", qcom_pdc_init);
+IRQCHIP_PLATFORM_DRIVER_BEGIN(qcom_pdc)
+IRQCHIP_MATCH("qcom,pdc", qcom_pdc_init)
+IRQCHIP_PLATFORM_DRIVER_END(qcom_pdc)
+MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Power Domain Controller");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From c96d6abbec52d6723bef6b50846f40f7fb27e93c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 19 May 2021 13:04:55 +0800
Subject: [PATCH 2326/3804] irqchip/mbigen: Fix compile warning when
 CONFIG_ACPI is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following compile warning:

  drivers/irqchip/irq-mbigen.c:372:36: warning: ‘mbigen_acpi_match’ defined but not used [-Wunused-const-variable=]
   static const struct acpi_device_id mbigen_acpi_match[] = {

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210519050455.1693953-1-yangyingliang@huawei.com
---
 drivers/irqchip/irq-mbigen.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c
index 2cb45c6b85011..f565317a3da3c 100644
--- a/drivers/irqchip/irq-mbigen.c
+++ b/drivers/irqchip/irq-mbigen.c
@@ -273,6 +273,12 @@ static int mbigen_of_create_domain(struct platform_device *pdev,
 }
 
 #ifdef CONFIG_ACPI
+static const struct acpi_device_id mbigen_acpi_match[] = {
+	{ "HISI0152", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, mbigen_acpi_match);
+
 static int mbigen_acpi_create_domain(struct platform_device *pdev,
 				     struct mbigen_device *mgn_chip)
 {
@@ -369,12 +375,6 @@ static const struct of_device_id mbigen_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, mbigen_of_match);
 
-static const struct acpi_device_id mbigen_acpi_match[] = {
-	{ "HISI0152", 0 },
-	{}
-};
-MODULE_DEVICE_TABLE(acpi, mbigen_acpi_match);
-
 static struct platform_driver mbigen_platform_driver = {
 	.driver = {
 		.name		= "Hisilicon MBIGEN-V2",
-- 
GitLab


From 082cd4ec240b8734a82a89ffb890216ac98fec68 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Thu, 6 May 2021 22:10:42 +0800
Subject: [PATCH 2327/3804] ext4: fix bug on in ext4_es_cache_extent as
 ext4_split_extent_at failed

We got follow bug_on when run fsstress with injecting IO fault:
[130747.323114] kernel BUG at fs/ext4/extents_status.c:762!
[130747.323117] Internal error: Oops - BUG: 0 [#1] SMP
......
[130747.334329] Call trace:
[130747.334553]  ext4_es_cache_extent+0x150/0x168 [ext4]
[130747.334975]  ext4_cache_extents+0x64/0xe8 [ext4]
[130747.335368]  ext4_find_extent+0x300/0x330 [ext4]
[130747.335759]  ext4_ext_map_blocks+0x74/0x1178 [ext4]
[130747.336179]  ext4_map_blocks+0x2f4/0x5f0 [ext4]
[130747.336567]  ext4_mpage_readpages+0x4a8/0x7a8 [ext4]
[130747.336995]  ext4_readpage+0x54/0x100 [ext4]
[130747.337359]  generic_file_buffered_read+0x410/0xae8
[130747.337767]  generic_file_read_iter+0x114/0x190
[130747.338152]  ext4_file_read_iter+0x5c/0x140 [ext4]
[130747.338556]  __vfs_read+0x11c/0x188
[130747.338851]  vfs_read+0x94/0x150
[130747.339110]  ksys_read+0x74/0xf0

This patch's modification is according to Jan Kara's suggestion in:
https://patchwork.ozlabs.org/project/linux-ext4/patch/20210428085158.3728201-1-yebin10@huawei.com/
"I see. Now I understand your patch. Honestly, seeing how fragile is trying
to fix extent tree after split has failed in the middle, I would probably
go even further and make sure we fix the tree properly in case of ENOSPC
and EDQUOT (those are easily user triggerable).  Anything else indicates a
HW problem or fs corruption so I'd rather leave the extent tree as is and
don't try to fix it (which also means we will not create overlapping
extents)."

Cc: stable@kernel.org
Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20210506141042.3298679-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/extents.c | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 77c84d6f1af6b..cbf37b2cf871e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3206,7 +3206,10 @@ static int ext4_split_extent_at(handle_t *handle,
 		ext4_ext_mark_unwritten(ex2);
 
 	err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
-	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+	if (err != -ENOSPC && err != -EDQUOT)
+		goto out;
+
+	if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
 		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
 			if (split_flag & EXT4_EXT_DATA_VALID1) {
 				err = ext4_ext_zeroout(inode, ex2);
@@ -3232,25 +3235,22 @@ static int ext4_split_extent_at(handle_t *handle,
 					      ext4_ext_pblock(&orig_ex));
 		}
 
-		if (err)
-			goto fix_extent_len;
-		/* update the extent length and mark as initialized */
-		ex->ee_len = cpu_to_le16(ee_len);
-		ext4_ext_try_to_merge(handle, inode, path, ex);
-		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
-		if (err)
-			goto fix_extent_len;
-
-		/* update extent status tree */
-		err = ext4_zeroout_es(inode, &zero_ex);
-
-		goto out;
-	} else if (err)
-		goto fix_extent_len;
-
-out:
-	ext4_ext_show_leaf(inode, path);
-	return err;
+		if (!err) {
+			/* update the extent length and mark as initialized */
+			ex->ee_len = cpu_to_le16(ee_len);
+			ext4_ext_try_to_merge(handle, inode, path, ex);
+			err = ext4_ext_dirty(handle, inode, path + path->p_depth);
+			if (!err)
+				/* update extent status tree */
+				err = ext4_zeroout_es(inode, &zero_ex);
+			/* If we failed at this point, we don't know in which
+			 * state the extent tree exactly is so don't try to fix
+			 * length of the original extent as it may do even more
+			 * damage.
+			 */
+			goto out;
+		}
+	}
 
 fix_extent_len:
 	ex->ee_len = orig_ex.ee_len;
@@ -3260,6 +3260,9 @@ fix_extent_len:
 	 */
 	ext4_ext_dirty(handle, inode, path + path->p_depth);
 	return err;
+out:
+	ext4_ext_show_leaf(inode, path);
+	return err;
 }
 
 /*
-- 
GitLab


From a7ba36bc94f20b6c77f16364b9a23f582ea8faac Mon Sep 17 00:00:00 2001
From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Date: Wed, 19 May 2021 14:59:20 -0700
Subject: [PATCH 2328/3804] ext4: fix fast commit alignment issues

Fast commit recovery data on disk may not be aligned. So, when the
recovery code reads it, this patch makes sure that fast commit info
found on-disk is first memcpy-ed into an aligned variable before
accessing it. As a consequence of it, we also remove some macros that
could resulted in unaligned accesses.

Cc: stable@kernel.org
Fixes: 8016e29f4362 ("ext4: fast commit recovery path")
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Link: https://lore.kernel.org/r/20210519215920.2037527-1-harshads@google.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/fast_commit.c | 170 ++++++++++++++++++++++--------------------
 fs/ext4/fast_commit.h |  19 -----
 2 files changed, 90 insertions(+), 99 deletions(-)

diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index f98ca4f37ef61..e8195229c2529 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -1288,28 +1288,29 @@ struct dentry_info_args {
 };
 
 static inline void tl_to_darg(struct dentry_info_args *darg,
-				struct  ext4_fc_tl *tl)
+			      struct  ext4_fc_tl *tl, u8 *val)
 {
-	struct ext4_fc_dentry_info *fcd;
+	struct ext4_fc_dentry_info fcd;
 
-	fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl);
+	memcpy(&fcd, val, sizeof(fcd));
 
-	darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino);
-	darg->ino = le32_to_cpu(fcd->fc_ino);
-	darg->dname = fcd->fc_dname;
-	darg->dname_len = ext4_fc_tag_len(tl) -
-			sizeof(struct ext4_fc_dentry_info);
+	darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
+	darg->ino = le32_to_cpu(fcd.fc_ino);
+	darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
+	darg->dname_len = le16_to_cpu(tl->fc_len) -
+		sizeof(struct ext4_fc_dentry_info);
 }
 
 /* Unlink replay function */
-static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl)
+static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
+				 u8 *val)
 {
 	struct inode *inode, *old_parent;
 	struct qstr entry;
 	struct dentry_info_args darg;
 	int ret = 0;
 
-	tl_to_darg(&darg, tl);
+	tl_to_darg(&darg, tl, val);
 
 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
 			darg.parent_ino, darg.dname_len);
@@ -1399,13 +1400,14 @@ out:
 }
 
 /* Link replay function */
-static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl)
+static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
+			       u8 *val)
 {
 	struct inode *inode;
 	struct dentry_info_args darg;
 	int ret = 0;
 
-	tl_to_darg(&darg, tl);
+	tl_to_darg(&darg, tl, val);
 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
 			darg.parent_ino, darg.dname_len);
 
@@ -1450,9 +1452,10 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
 /*
  * Inode replay function
  */
-static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl)
+static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
+				u8 *val)
 {
-	struct ext4_fc_inode *fc_inode;
+	struct ext4_fc_inode fc_inode;
 	struct ext4_inode *raw_inode;
 	struct ext4_inode *raw_fc_inode;
 	struct inode *inode = NULL;
@@ -1460,9 +1463,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl)
 	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
 	struct ext4_extent_header *eh;
 
-	fc_inode = (struct ext4_fc_inode *)ext4_fc_tag_val(tl);
+	memcpy(&fc_inode, val, sizeof(fc_inode));
 
-	ino = le32_to_cpu(fc_inode->fc_ino);
+	ino = le32_to_cpu(fc_inode.fc_ino);
 	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
 
 	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
@@ -1474,12 +1477,13 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl)
 
 	ext4_fc_record_modified_inode(sb, ino);
 
-	raw_fc_inode = (struct ext4_inode *)fc_inode->fc_raw_inode;
+	raw_fc_inode = (struct ext4_inode *)
+		(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
 	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
 	if (ret)
 		goto out;
 
-	inode_len = ext4_fc_tag_len(tl) - sizeof(struct ext4_fc_inode);
+	inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode);
 	raw_inode = ext4_raw_inode(&iloc);
 
 	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
@@ -1547,14 +1551,15 @@ out:
  * inode for which we are trying to create a dentry here, should already have
  * been replayed before we start here.
  */
-static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl)
+static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
+				 u8 *val)
 {
 	int ret = 0;
 	struct inode *inode = NULL;
 	struct inode *dir = NULL;
 	struct dentry_info_args darg;
 
-	tl_to_darg(&darg, tl);
+	tl_to_darg(&darg, tl, val);
 
 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
 			darg.parent_ino, darg.dname_len);
@@ -1633,9 +1638,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino,
 
 /* Replay add range tag */
 static int ext4_fc_replay_add_range(struct super_block *sb,
-				struct ext4_fc_tl *tl)
+				    struct ext4_fc_tl *tl, u8 *val)
 {
-	struct ext4_fc_add_range *fc_add_ex;
+	struct ext4_fc_add_range fc_add_ex;
 	struct ext4_extent newex, *ex;
 	struct inode *inode;
 	ext4_lblk_t start, cur;
@@ -1645,15 +1650,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
 	struct ext4_ext_path *path = NULL;
 	int ret;
 
-	fc_add_ex = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
-	ex = (struct ext4_extent *)&fc_add_ex->fc_ex;
+	memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
+	ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
 
 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
-		le32_to_cpu(fc_add_ex->fc_ino), le32_to_cpu(ex->ee_block),
+		le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
 		ext4_ext_get_actual_len(ex));
 
-	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino),
-				EXT4_IGET_NORMAL);
+	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
 	if (IS_ERR(inode)) {
 		jbd_debug(1, "Inode not found.");
 		return 0;
@@ -1762,32 +1766,33 @@ next:
 
 /* Replay DEL_RANGE tag */
 static int
-ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl)
+ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
+			 u8 *val)
 {
 	struct inode *inode;
-	struct ext4_fc_del_range *lrange;
+	struct ext4_fc_del_range lrange;
 	struct ext4_map_blocks map;
 	ext4_lblk_t cur, remaining;
 	int ret;
 
-	lrange = (struct ext4_fc_del_range *)ext4_fc_tag_val(tl);
-	cur = le32_to_cpu(lrange->fc_lblk);
-	remaining = le32_to_cpu(lrange->fc_len);
+	memcpy(&lrange, val, sizeof(lrange));
+	cur = le32_to_cpu(lrange.fc_lblk);
+	remaining = le32_to_cpu(lrange.fc_len);
 
 	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
-		le32_to_cpu(lrange->fc_ino), cur, remaining);
+		le32_to_cpu(lrange.fc_ino), cur, remaining);
 
-	inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL);
+	inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
 	if (IS_ERR(inode)) {
-		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino));
+		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino));
 		return 0;
 	}
 
 	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
 
 	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
-			inode->i_ino, le32_to_cpu(lrange->fc_lblk),
-			le32_to_cpu(lrange->fc_len));
+			inode->i_ino, le32_to_cpu(lrange.fc_lblk),
+			le32_to_cpu(lrange.fc_len));
 	while (remaining > 0) {
 		map.m_lblk = cur;
 		map.m_len = remaining;
@@ -1808,8 +1813,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl)
 	}
 
 	ret = ext4_punch_hole(inode,
-		le32_to_cpu(lrange->fc_lblk) << sb->s_blocksize_bits,
-		le32_to_cpu(lrange->fc_len) <<  sb->s_blocksize_bits);
+		le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits,
+		le32_to_cpu(lrange.fc_len) <<  sb->s_blocksize_bits);
 	if (ret)
 		jbd_debug(1, "ext4_punch_hole returned %d", ret);
 	ext4_ext_replay_shrink_inode(inode,
@@ -1925,11 +1930,11 @@ static int ext4_fc_replay_scan(journal_t *journal,
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_fc_replay_state *state;
 	int ret = JBD2_FC_REPLAY_CONTINUE;
-	struct ext4_fc_add_range *ext;
-	struct ext4_fc_tl *tl;
-	struct ext4_fc_tail *tail;
-	__u8 *start, *end;
-	struct ext4_fc_head *head;
+	struct ext4_fc_add_range ext;
+	struct ext4_fc_tl tl;
+	struct ext4_fc_tail tail;
+	__u8 *start, *end, *cur, *val;
+	struct ext4_fc_head head;
 	struct ext4_extent *ex;
 
 	state = &sbi->s_fc_replay_state;
@@ -1956,15 +1961,17 @@ static int ext4_fc_replay_scan(journal_t *journal,
 	}
 
 	state->fc_replay_expected_off++;
-	fc_for_each_tl(start, end, tl) {
+	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
+		memcpy(&tl, cur, sizeof(tl));
+		val = cur + sizeof(tl);
 		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
-			  tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
-		switch (le16_to_cpu(tl->fc_tag)) {
+			  tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr);
+		switch (le16_to_cpu(tl.fc_tag)) {
 		case EXT4_FC_TAG_ADD_RANGE:
-			ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
-			ex = (struct ext4_extent *)&ext->fc_ex;
+			memcpy(&ext, val, sizeof(ext));
+			ex = (struct ext4_extent *)&ext.fc_ex;
 			ret = ext4_fc_record_regions(sb,
-				le32_to_cpu(ext->fc_ino),
+				le32_to_cpu(ext.fc_ino),
 				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
 				ext4_ext_get_actual_len(ex));
 			if (ret < 0)
@@ -1978,18 +1985,18 @@ static int ext4_fc_replay_scan(journal_t *journal,
 		case EXT4_FC_TAG_INODE:
 		case EXT4_FC_TAG_PAD:
 			state->fc_cur_tag++;
-			state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
-					sizeof(*tl) + ext4_fc_tag_len(tl));
+			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
+					sizeof(tl) + le16_to_cpu(tl.fc_len));
 			break;
 		case EXT4_FC_TAG_TAIL:
 			state->fc_cur_tag++;
-			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
-			state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
-						sizeof(*tl) +
+			memcpy(&tail, val, sizeof(tail));
+			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
+						sizeof(tl) +
 						offsetof(struct ext4_fc_tail,
 						fc_crc));
-			if (le32_to_cpu(tail->fc_tid) == expected_tid &&
-				le32_to_cpu(tail->fc_crc) == state->fc_crc) {
+			if (le32_to_cpu(tail.fc_tid) == expected_tid &&
+				le32_to_cpu(tail.fc_crc) == state->fc_crc) {
 				state->fc_replay_num_tags = state->fc_cur_tag;
 				state->fc_regions_valid =
 					state->fc_regions_used;
@@ -2000,19 +2007,19 @@ static int ext4_fc_replay_scan(journal_t *journal,
 			state->fc_crc = 0;
 			break;
 		case EXT4_FC_TAG_HEAD:
-			head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
-			if (le32_to_cpu(head->fc_features) &
+			memcpy(&head, val, sizeof(head));
+			if (le32_to_cpu(head.fc_features) &
 				~EXT4_FC_SUPPORTED_FEATURES) {
 				ret = -EOPNOTSUPP;
 				break;
 			}
-			if (le32_to_cpu(head->fc_tid) != expected_tid) {
+			if (le32_to_cpu(head.fc_tid) != expected_tid) {
 				ret = JBD2_FC_REPLAY_STOP;
 				break;
 			}
 			state->fc_cur_tag++;
-			state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
-					sizeof(*tl) + ext4_fc_tag_len(tl));
+			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
+					    sizeof(tl) + le16_to_cpu(tl.fc_len));
 			break;
 		default:
 			ret = state->fc_replay_num_tags ?
@@ -2036,11 +2043,11 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
 {
 	struct super_block *sb = journal->j_private;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_fc_tl *tl;
-	__u8 *start, *end;
+	struct ext4_fc_tl tl;
+	__u8 *start, *end, *cur, *val;
 	int ret = JBD2_FC_REPLAY_CONTINUE;
 	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
-	struct ext4_fc_tail *tail;
+	struct ext4_fc_tail tail;
 
 	if (pass == PASS_SCAN) {
 		state->fc_current_pass = PASS_SCAN;
@@ -2067,49 +2074,52 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
 	start = (u8 *)bh->b_data;
 	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
 
-	fc_for_each_tl(start, end, tl) {
+	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
+		memcpy(&tl, cur, sizeof(tl));
+		val = cur + sizeof(tl);
+
 		if (state->fc_replay_num_tags == 0) {
 			ret = JBD2_FC_REPLAY_STOP;
 			ext4_fc_set_bitmaps_and_counters(sb);
 			break;
 		}
 		jbd_debug(3, "Replay phase, tag:%s\n",
-				tag2str(le16_to_cpu(tl->fc_tag)));
+				tag2str(le16_to_cpu(tl.fc_tag)));
 		state->fc_replay_num_tags--;
-		switch (le16_to_cpu(tl->fc_tag)) {
+		switch (le16_to_cpu(tl.fc_tag)) {
 		case EXT4_FC_TAG_LINK:
-			ret = ext4_fc_replay_link(sb, tl);
+			ret = ext4_fc_replay_link(sb, &tl, val);
 			break;
 		case EXT4_FC_TAG_UNLINK:
-			ret = ext4_fc_replay_unlink(sb, tl);
+			ret = ext4_fc_replay_unlink(sb, &tl, val);
 			break;
 		case EXT4_FC_TAG_ADD_RANGE:
-			ret = ext4_fc_replay_add_range(sb, tl);
+			ret = ext4_fc_replay_add_range(sb, &tl, val);
 			break;
 		case EXT4_FC_TAG_CREAT:
-			ret = ext4_fc_replay_create(sb, tl);
+			ret = ext4_fc_replay_create(sb, &tl, val);
 			break;
 		case EXT4_FC_TAG_DEL_RANGE:
-			ret = ext4_fc_replay_del_range(sb, tl);
+			ret = ext4_fc_replay_del_range(sb, &tl, val);
 			break;
 		case EXT4_FC_TAG_INODE:
-			ret = ext4_fc_replay_inode(sb, tl);
+			ret = ext4_fc_replay_inode(sb, &tl, val);
 			break;
 		case EXT4_FC_TAG_PAD:
 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
-				ext4_fc_tag_len(tl), 0);
+					     le16_to_cpu(tl.fc_len), 0);
 			break;
 		case EXT4_FC_TAG_TAIL:
 			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
-				ext4_fc_tag_len(tl), 0);
-			tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
-			WARN_ON(le32_to_cpu(tail->fc_tid) != expected_tid);
+					     le16_to_cpu(tl.fc_len), 0);
+			memcpy(&tail, val, sizeof(tail));
+			WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
 			break;
 		case EXT4_FC_TAG_HEAD:
 			break;
 		default:
-			trace_ext4_fc_replay(sb, le16_to_cpu(tl->fc_tag), 0,
-				ext4_fc_tag_len(tl), 0);
+			trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0,
+					     le16_to_cpu(tl.fc_len), 0);
 			ret = -ECANCELED;
 			break;
 		}
diff --git a/fs/ext4/fast_commit.h b/fs/ext4/fast_commit.h
index b77f70f55a622..937c381b4c85e 100644
--- a/fs/ext4/fast_commit.h
+++ b/fs/ext4/fast_commit.h
@@ -153,13 +153,6 @@ struct ext4_fc_replay_state {
 #define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1)
 #endif
 
-#define fc_for_each_tl(__start, __end, __tl)				\
-	for (tl = (struct ext4_fc_tl *)(__start);			\
-	     (__u8 *)tl < (__u8 *)(__end);				\
-		tl = (struct ext4_fc_tl *)((__u8 *)tl +			\
-					sizeof(struct ext4_fc_tl) +	\
-					+ le16_to_cpu(tl->fc_len)))
-
 static inline const char *tag2str(__u16 tag)
 {
 	switch (tag) {
@@ -186,16 +179,4 @@ static inline const char *tag2str(__u16 tag)
 	}
 }
 
-/* Get length of a particular tlv */
-static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
-{
-	return le16_to_cpu(tl->fc_len);
-}
-
-/* Get a pointer to "value" of a tlv */
-static inline __u8 *ext4_fc_tag_val(struct ext4_fc_tl *tl)
-{
-	return (__u8 *)tl + sizeof(*tl);
-}
-
 #endif /* __FAST_COMMIT_H__ */
-- 
GitLab


From afd09b617db3786b6ef3dc43e28fe728cfea84df Mon Sep 17 00:00:00 2001
From: Alexey Makhalov <amakhalov@vmware.com>
Date: Fri, 21 May 2021 07:55:33 +0000
Subject: [PATCH 2329/3804] ext4: fix memory leak in ext4_fill_super

Buffer head references must be released before calling kill_bdev();
otherwise the buffer head (and its page referenced by b_data) will not
be freed by kill_bdev, and subsequently that bh will be leaked.

If blocksizes differ, sb_set_blocksize() will kill current buffers and
page cache by using kill_bdev(). And then super block will be reread
again but using correct blocksize this time. sb_set_blocksize() didn't
fully free superblock page and buffer head, and being busy, they were
not freed and instead leaked.

This can easily be reproduced by calling an infinite loop of:

  systemctl start <ext4_on_lvm>.mount, and
  systemctl stop <ext4_on_lvm>.mount

... since systemd creates a cgroup for each slice which it mounts, and
the bh leak get amplified by a dying memory cgroup that also never
gets freed, and memory consumption is much more easily noticed.

Fixes: ce40733ce93d ("ext4: Check for return value from sb_set_blocksize")
Fixes: ac27a0ec112a ("ext4: initial copy of files from ext3")
Link: https://lore.kernel.org/r/20210521075533.95732-1-amakhalov@vmware.com
Signed-off-by: Alexey Makhalov <amakhalov@vmware.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
---
 fs/ext4/super.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 886e0d518668f..f66c7301b53a9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4462,14 +4462,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	if (sb->s_blocksize != blocksize) {
+		/*
+		 * bh must be released before kill_bdev(), otherwise
+		 * it won't be freed and its page also. kill_bdev()
+		 * is called by sb_set_blocksize().
+		 */
+		brelse(bh);
 		/* Validate the filesystem blocksize */
 		if (!sb_set_blocksize(sb, blocksize)) {
 			ext4_msg(sb, KERN_ERR, "bad block size %d",
 					blocksize);
+			bh = NULL;
 			goto failed_mount;
 		}
 
-		brelse(bh);
 		logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
 		offset = do_div(logical_sb_block, blocksize);
 		bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
@@ -5202,8 +5208,9 @@ failed_mount:
 		kfree(get_qf_name(sb, sbi, i));
 #endif
 	fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
-	ext4_blkdev_remove(sbi);
+	/* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
 	brelse(bh);
+	ext4_blkdev_remove(sbi);
 out_fail:
 	sb->s_fs_info = NULL;
 	kfree(sbi->s_blockgroup_lock);
-- 
GitLab


From 63e7f1289389c8dff3c766f01ac1cc1c874b2ba5 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Sat, 22 May 2021 00:41:32 +0000
Subject: [PATCH 2330/3804] ext4: fix no-key deletion for encrypt+casefold

commit 471fbbea7ff7 ("ext4: handle casefolding with encryption") is
missing a few checks for the encryption key which are needed to
support deleting enrypted casefolded files when the key is not
present.

This bug made it impossible to delete encrypted+casefolded directories
without the encryption key, due to errors like:

    W         : EXT4-fs warning (device vdc): __ext4fs_dirhash:270: inode #49202: comm Binder:378_4: Siphash requires key

Repro steps in kvm-xfstests test appliance:
      mkfs.ext4 -F -E encoding=utf8 -O encrypt /dev/vdc
      mount /vdc
      mkdir /vdc/dir
      chattr +F /vdc/dir
      keyid=$(head -c 64 /dev/zero | xfs_io -c add_enckey /vdc | awk '{print $NF}')
      xfs_io -c "set_encpolicy $keyid" /vdc/dir
      for i in `seq 1 100`; do
          mkdir /vdc/dir/$i
      done
      xfs_io -c "rm_enckey $keyid" /vdc
      rm -rf /vdc/dir # fails with the bug

Fixes: 471fbbea7ff7 ("ext4: handle casefolding with encryption")
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Link: https://lore.kernel.org/r/20210522004132.2142563-1-drosen@google.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/namei.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 8b46a17a85c19..2531f9e9e90e6 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1376,7 +1376,8 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
 	struct dx_hash_info *hinfo = &name->hinfo;
 	int len;
 
-	if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding) {
+	if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
+	    (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
 		cf_name->name = NULL;
 		return 0;
 	}
@@ -1427,7 +1428,8 @@ static bool ext4_match(struct inode *parent,
 #endif
 
 #ifdef CONFIG_UNICODE
-	if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent)) {
+	if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
+	    (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
 		if (fname->cf_name.name) {
 			struct qstr cf = {.name = fname->cf_name.name,
 					  .len = fname->cf_name.len};
-- 
GitLab


From e71f99f2dfb45f4e7203a0732e85f71ef1d04dab Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Thu, 3 Jun 2021 09:48:49 +0000
Subject: [PATCH 2331/3804] ext4: Only advertise encrypted_casefold when
 encryption and unicode are enabled

Encrypted casefolding is only supported when both encryption and
casefolding are both enabled in the config.

Fixes: 471fbbea7ff7 ("ext4: handle casefolding with encryption")
Cc: stable@vger.kernel.org # 5.13+
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Link: https://lore.kernel.org/r/20210603094849.314342-1-drosen@google.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/sysfs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 6f825dedc3d42..55fcab60a59a5 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -315,7 +315,9 @@ EXT4_ATTR_FEATURE(verity);
 #endif
 EXT4_ATTR_FEATURE(metadata_csum_seed);
 EXT4_ATTR_FEATURE(fast_commit);
+#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION)
 EXT4_ATTR_FEATURE(encrypted_casefold);
+#endif
 
 static struct attribute *ext4_feat_attrs[] = {
 	ATTR_LIST(lazy_itable_init),
@@ -333,7 +335,9 @@ static struct attribute *ext4_feat_attrs[] = {
 #endif
 	ATTR_LIST(metadata_csum_seed),
 	ATTR_LIST(fast_commit),
+#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION)
 	ATTR_LIST(encrypted_casefold),
+#endif
 	NULL,
 };
 ATTRIBUTE_GROUPS(ext4_feat);
-- 
GitLab


From 0e4bf265b11a00bde9fef6b791bd8ee2d8059701 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Wed, 26 May 2021 13:58:57 +0530
Subject: [PATCH 2332/3804] pinctrl: qcom: Fix duplication in gpio_groups

"gpio52" and "gpio53" are duplicated in gpio_groups, fix them!

Fixes: ac43c44a7a37 ("pinctrl: qcom: Add SDX55 pincontrol driver")
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210526082857.174682-1-manivannan.sadhasivam@linaro.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/qcom/pinctrl-sdx55.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/pinctrl/qcom/pinctrl-sdx55.c b/drivers/pinctrl/qcom/pinctrl-sdx55.c
index 5aaf57b40407f..0bb4931cec59e 100644
--- a/drivers/pinctrl/qcom/pinctrl-sdx55.c
+++ b/drivers/pinctrl/qcom/pinctrl-sdx55.c
@@ -410,15 +410,15 @@ static const char * const gpio_groups[] = {
 	"gpio29", "gpio30", "gpio31", "gpio32", "gpio33", "gpio34", "gpio35",
 	"gpio36", "gpio37", "gpio38", "gpio39", "gpio40", "gpio41", "gpio42",
 	"gpio43", "gpio44", "gpio45", "gpio46", "gpio47", "gpio48", "gpio49",
-	"gpio50", "gpio51", "gpio52", "gpio52", "gpio53", "gpio53", "gpio54",
-	"gpio55", "gpio56", "gpio57", "gpio58", "gpio59", "gpio60", "gpio61",
-	"gpio62", "gpio63", "gpio64", "gpio65", "gpio66", "gpio67", "gpio68",
-	"gpio69", "gpio70", "gpio71", "gpio72", "gpio73", "gpio74", "gpio75",
-	"gpio76", "gpio77", "gpio78", "gpio79", "gpio80", "gpio81", "gpio82",
-	"gpio83", "gpio84", "gpio85", "gpio86", "gpio87", "gpio88", "gpio89",
-	"gpio90", "gpio91", "gpio92", "gpio93", "gpio94", "gpio95", "gpio96",
-	"gpio97", "gpio98", "gpio99", "gpio100", "gpio101", "gpio102",
-	"gpio103", "gpio104", "gpio105", "gpio106", "gpio107",
+	"gpio50", "gpio51", "gpio52", "gpio53", "gpio54", "gpio55", "gpio56",
+	"gpio57", "gpio58", "gpio59", "gpio60", "gpio61", "gpio62", "gpio63",
+	"gpio64", "gpio65", "gpio66", "gpio67", "gpio68", "gpio69", "gpio70",
+	"gpio71", "gpio72", "gpio73", "gpio74", "gpio75", "gpio76", "gpio77",
+	"gpio78", "gpio79", "gpio80", "gpio81", "gpio82", "gpio83", "gpio84",
+	"gpio85", "gpio86", "gpio87", "gpio88", "gpio89", "gpio90", "gpio91",
+	"gpio92", "gpio93", "gpio94", "gpio95", "gpio96", "gpio97", "gpio98",
+	"gpio99", "gpio100", "gpio101", "gpio102", "gpio103", "gpio104",
+	"gpio105", "gpio106", "gpio107",
 };
 
 static const char * const qdss_stm_groups[] = {
-- 
GitLab


From 614124bea77e452aa6df7a8714e8bc820b489922 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 6 Jun 2021 15:47:27 -0700
Subject: [PATCH 2333/3804] Linux 5.13-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b79e0e8acbe33..144d4f8b7eb32 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Frozen Wasteland
 
 # *DOCUMENTATION*
-- 
GitLab


From 0a2ff58f9f8f95526ecb0ccd7517fefceb96f661 Mon Sep 17 00:00:00 2001
From: Guillaume Ranquet <granquet@baylibre.com>
Date: Thu, 13 May 2021 21:26:40 +0200
Subject: [PATCH 2334/3804] dmaengine: mediatek: free the proper desc in
 desc_free handler

The desc_free handler assumed that the desc we want to free was always
 the current one associated with the channel.

This is seldom the case and this is causing use after free crashes in
 multiple places (tx/rx/terminate...).

  BUG: KASAN: use-after-free in mtk_uart_apdma_rx_handler+0x120/0x304

  Call trace:
   dump_backtrace+0x0/0x1b0
   show_stack+0x24/0x34
   dump_stack+0xe0/0x150
   print_address_description+0x8c/0x55c
   __kasan_report+0x1b8/0x218
   kasan_report+0x14/0x20
   __asan_load4+0x98/0x9c
   mtk_uart_apdma_rx_handler+0x120/0x304
   mtk_uart_apdma_irq_handler+0x50/0x80
   __handle_irq_event_percpu+0xe0/0x210
   handle_irq_event+0x8c/0x184
   handle_fasteoi_irq+0x1d8/0x3ac
   __handle_domain_irq+0xb0/0x110
   gic_handle_irq+0x50/0xb8
   el0_irq_naked+0x60/0x6c

  Allocated by task 3541:
   __kasan_kmalloc+0xf0/0x1b0
   kasan_kmalloc+0x10/0x1c
   kmem_cache_alloc_trace+0x90/0x2dc
   mtk_uart_apdma_prep_slave_sg+0x6c/0x1a0
   mtk8250_dma_rx_complete+0x220/0x2e4
   vchan_complete+0x290/0x340
   tasklet_action_common+0x220/0x298
   tasklet_action+0x28/0x34
   __do_softirq+0x158/0x35c

  Freed by task 3541:
   __kasan_slab_free+0x154/0x224
   kasan_slab_free+0x14/0x24
   slab_free_freelist_hook+0xf8/0x15c
   kfree+0xb4/0x278
   mtk_uart_apdma_desc_free+0x34/0x44
   vchan_complete+0x1bc/0x340
   tasklet_action_common+0x220/0x298
   tasklet_action+0x28/0x34
   __do_softirq+0x158/0x35c

  The buggy address belongs to the object at ffff000063606800
   which belongs to the cache kmalloc-256 of size 256
  The buggy address is located 176 bytes inside of
   256-byte region [ffff000063606800, ffff000063606900)
  The buggy address belongs to the page:
  page:fffffe00016d8180 refcount:1 mapcount:0 mapping:ffff00000302f600 index:0x0 compound_mapcount: 0
  flags: 0xffff00000010200(slab|head)
  raw: 0ffff00000010200 dead000000000100 dead000000000122 ffff00000302f600
  raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000
  page dumped because: kasan: bad access detected

Signed-off-by: Guillaume Ranquet <granquet@baylibre.com>

Link: https://lore.kernel.org/r/20210513192642.29446-2-granquet@baylibre.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/mediatek/mtk-uart-apdma.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index 27c07350971dd..e38b67fc0c0ca 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg)
 
 static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd)
 {
-	struct dma_chan *chan = vd->tx.chan;
-	struct mtk_chan *c = to_mtk_uart_apdma_chan(chan);
-
-	kfree(c->desc);
+	kfree(container_of(vd, struct mtk_uart_apdma_desc, vd));
 }
 
 static void mtk_uart_apdma_start_tx(struct mtk_chan *c)
-- 
GitLab


From 2537b40b0a4f61d2c83900744fe89b09076be9c6 Mon Sep 17 00:00:00 2001
From: Guillaume Ranquet <granquet@baylibre.com>
Date: Thu, 13 May 2021 21:26:41 +0200
Subject: [PATCH 2335/3804] dmaengine: mediatek: do not issue a new desc if one
 is still current

Avoid issuing a new desc if one is still being processed as this can
lead to some desc never being marked as completed.

Signed-off-by: Guillaume Ranquet <granquet@baylibre.com>

Link: https://lore.kernel.org/r/20210513192642.29446-3-granquet@baylibre.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/mediatek/mtk-uart-apdma.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index e38b67fc0c0ca..a09ab2dd3b464 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -204,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c)
 
 static void mtk_uart_apdma_tx_handler(struct mtk_chan *c)
 {
-	struct mtk_uart_apdma_desc *d = c->desc;
-
 	mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B);
 	mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B);
 	mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B);
-
-	list_del(&d->vd.node);
-	vchan_cookie_complete(&d->vd);
 }
 
 static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
@@ -242,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
 
 	c->rx_status = d->avail_len - cnt;
 	mtk_uart_apdma_write(c, VFF_RPT, wg);
+}
 
-	list_del(&d->vd.node);
-	vchan_cookie_complete(&d->vd);
+static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c)
+{
+	struct mtk_uart_apdma_desc *d = c->desc;
+
+	if (d) {
+		list_del(&d->vd.node);
+		vchan_cookie_complete(&d->vd);
+		c->desc = NULL;
+	}
 }
 
 static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
@@ -258,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id)
 		mtk_uart_apdma_rx_handler(c);
 	else if (c->dir == DMA_MEM_TO_DEV)
 		mtk_uart_apdma_tx_handler(c);
+	mtk_uart_apdma_chan_complete_handler(c);
 	spin_unlock_irqrestore(&c->vc.lock, flags);
 
 	return IRQ_HANDLED;
@@ -363,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan)
 	unsigned long flags;
 
 	spin_lock_irqsave(&c->vc.lock, flags);
-	if (vchan_issue_pending(&c->vc)) {
+	if (vchan_issue_pending(&c->vc) && !c->desc) {
 		vd = vchan_next_desc(&c->vc);
 		c->desc = to_mtk_uart_apdma_desc(&vd->tx);
 
-- 
GitLab


From 9041575348b21ade1fb74d790f1aac85d68198c7 Mon Sep 17 00:00:00 2001
From: Guillaume Ranquet <granquet@baylibre.com>
Date: Thu, 13 May 2021 21:26:42 +0200
Subject: [PATCH 2336/3804] dmaengine: mediatek: use GFP_NOWAIT instead of
 GFP_ATOMIC in prep_dma

As recommended by the doc in:
Documentation/drivers-api/dmaengine/provider.rst

Use GFP_NOWAIT to not deplete the emergency pool.

Signed-off-by: Guillaume Ranquet <granquet@baylibre.com>

Link: https://lore.kernel.org/r/20210513192642.29446-4-granquet@baylibre.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/mediatek/mtk-uart-apdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c
index a09ab2dd3b464..375e7e647df6b 100644
--- a/drivers/dma/mediatek/mtk-uart-apdma.c
+++ b/drivers/dma/mediatek/mtk-uart-apdma.c
@@ -349,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg
 		return NULL;
 
 	/* Now allocate and setup the descriptor */
-	d = kzalloc(sizeof(*d), GFP_ATOMIC);
+	d = kzalloc(sizeof(*d), GFP_NOWAIT);
 	if (!d)
 		return NULL;
 
-- 
GitLab


From eb367d875f94a228c17c8538e3f2efcf2eb07ead Mon Sep 17 00:00:00 2001
From: Sergio Paracuellos <sergio.paracuellos@gmail.com>
Date: Fri, 4 Jun 2021 07:53:37 +0200
Subject: [PATCH 2337/3804] pinctrl: ralink: rt2880: avoid to error in calls is
 pin is already enabled

In 'rt2880_pmx_group_enable' driver is printing an error and returning
-EBUSY if a pin has been already enabled. This begets anoying messages
in the caller when this happens like the following:

rt2880-pinmux pinctrl: pcie is already enabled
mt7621-pci 1e140000.pcie: Error applying setting, reverse things back

To avoid this just print the already enabled message in the pinctrl
driver and return 0 instead to not confuse the user with a real
bad problem.

Signed-off-by: Sergio Paracuellos <sergio.paracuellos@gmail.com>
Link: https://lore.kernel.org/r/20210604055337.20407-1-sergio.paracuellos@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/ralink/pinctrl-rt2880.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/ralink/pinctrl-rt2880.c b/drivers/pinctrl/ralink/pinctrl-rt2880.c
index 1f4bca854add5..a9b511c7e8500 100644
--- a/drivers/pinctrl/ralink/pinctrl-rt2880.c
+++ b/drivers/pinctrl/ralink/pinctrl-rt2880.c
@@ -127,7 +127,7 @@ static int rt2880_pmx_group_enable(struct pinctrl_dev *pctrldev,
 	if (p->groups[group].enabled) {
 		dev_err(p->dev, "%s is already enabled\n",
 			p->groups[group].name);
-		return -EBUSY;
+		return 0;
 	}
 
 	p->groups[group].enabled = 1;
-- 
GitLab


From 1a6a9044b96729abacede172d7591c714a5b81d1 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Tue, 1 Jun 2021 10:53:53 +0300
Subject: [PATCH 2338/3804] x86/setup: Remove CONFIG_X86_RESERVE_LOW and
 reservelow= options

The CONFIG_X86_RESERVE_LOW build time and reservelow= command line option
allowed to control the amount of memory under 1M that would be reserved at
boot to avoid using memory that can be potentially clobbered by BIOS.

Since the entire range under 1M is always reserved there is no need for
these options anymore and they can be removed.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210601075354.5149-3-rppt@kernel.org
---
 .../admin-guide/kernel-parameters.txt         |  5 ----
 arch/x86/Kconfig                              | 29 -------------------
 arch/x86/kernel/setup.c                       | 24 ---------------
 3 files changed, 58 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb89dbdedc463..d7d813032c513 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4775,11 +4775,6 @@
 			Reserves a hole at the top of the kernel virtual
 			address space.
 
-	reservelow=	[X86]
-			Format: nn[K]
-			Set the amount of memory to reserve for BIOS at
-			the bottom of the address space.
-
 	reset_devices	[KNL] Force drivers to reset the underlying device
 			during initialization.
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0045e1b441902..86dae426798b7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1693,35 +1693,6 @@ config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
 	  Set whether the default state of memory_corruption_check is
 	  on or off.
 
-config X86_RESERVE_LOW
-	int "Amount of low memory, in kilobytes, to reserve for the BIOS"
-	default 64
-	range 4 640
-	help
-	  Specify the amount of low memory to reserve for the BIOS.
-
-	  The first page contains BIOS data structures that the kernel
-	  must not use, so that page must always be reserved.
-
-	  By default we reserve the first 64K of physical RAM, as a
-	  number of BIOSes are known to corrupt that memory range
-	  during events such as suspend/resume or monitor cable
-	  insertion, so it must not be used by the kernel.
-
-	  You can set this to 4 if you are absolutely sure that you
-	  trust the BIOS to get all its memory reservations and usages
-	  right.  If you know your BIOS have problems beyond the
-	  default 64K area, you can set this to 640 to avoid using the
-	  entire low memory range.
-
-	  If you have doubts about the BIOS (e.g. suspend/resume does
-	  not work or there's kernel crashes after certain hardware
-	  hotplug events) then you might want to enable
-	  X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check
-	  typical corruption patterns.
-
-	  Leave this to the default value of 64 if you are unsure.
-
 config MATH_EMULATION
 	bool
 	depends on MODIFY_LDT_SYSCALL
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 1e720626069a3..7638ac6c3d80a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -695,30 +695,6 @@ static void __init e820_add_kernel_range(void)
 	e820__range_add(start, size, E820_TYPE_RAM);
 }
 
-static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
-
-static int __init parse_reservelow(char *p)
-{
-	unsigned long long size;
-
-	if (!p)
-		return -EINVAL;
-
-	size = memparse(p, &p);
-
-	if (size < 4096)
-		size = 4096;
-
-	if (size > 640*1024)
-		size = 640*1024;
-
-	reserve_low = size;
-
-	return 0;
-}
-
-early_param("reservelow", parse_reservelow);
-
 static void __init early_reserve_memory(void)
 {
 	/*
-- 
GitLab


From 5bcbe3285fb614c49db6b238253f7daff7e66312 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Mon, 17 May 2021 08:18:11 +0200
Subject: [PATCH 2339/3804] s390/mcck: fix calculation of SIE critical section
 size

The size of SIE critical section is calculated wrongly
as result of a missed subtraction in commit 0b0ed657fe00
("s390: remove critical section cleanup from entry.S")

Fixes: 0b0ed657fe00 ("s390: remove critical section cleanup from entry.S")
Cc: <stable@vger.kernel.org>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 12de7a9c85b35..3a81e38c95e74 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -651,7 +651,7 @@ ENDPROC(stack_overflow)
 .Lcleanup_sie_mcck:
 	larl	%r13,.Lsie_entry
 	slgr	%r9,%r13
-	larl	%r13,.Lsie_skip
+	lghi	%r13,.Lsie_skip - .Lsie_entry
 	clgr	%r9,%r13
 	jh	.Lcleanup_sie_int
 	oi	__LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
-- 
GitLab


From 1874cb13d5d7cafa61ce93a760093ebc5485b6ab Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Mon, 17 May 2021 08:18:12 +0200
Subject: [PATCH 2340/3804] s390/mcck: fix invalid KVM guest condition check

Wrong condition check is used to decide if a machine check hit
while in KVM guest. As result of this check the instruction
following the SIE critical section might be considered as still
in KVM guest and _CIF_MCCK_GUEST CPU flag mistakenly set as
result.

Fixes: c929500d7a5a ("s390/nmi: s390: New low level handling for machine check happening in guest")
Cc: <stable@vger.kernel.org>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/entry.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3a81e38c95e74..9cc71ca9a88f9 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -653,7 +653,7 @@ ENDPROC(stack_overflow)
 	slgr	%r9,%r13
 	lghi	%r13,.Lsie_skip - .Lsie_entry
 	clgr	%r9,%r13
-	jh	.Lcleanup_sie_int
+	jhe	.Lcleanup_sie_int
 	oi	__LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
 .Lcleanup_sie_int:
 	BPENTER	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
-- 
GitLab


From 23721c8e92f73f9f89e7362c50c2996a5c9ad483 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Tue, 1 Jun 2021 10:53:54 +0300
Subject: [PATCH 2341/3804] x86/crash: Remove crash_reserve_low_1M()

The entire memory range under 1M is unconditionally reserved in
setup_arch(), so there is no need for crash_reserve_low_1M() anymore.

Remove this function.

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210601075354.5149-4-rppt@kernel.org
---
 arch/x86/include/asm/crash.h |  6 ------
 arch/x86/kernel/crash.c      | 13 -------------
 2 files changed, 19 deletions(-)

diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
index f58de66091e5e..8b6bd63530dcc 100644
--- a/arch/x86/include/asm/crash.h
+++ b/arch/x86/include/asm/crash.h
@@ -9,10 +9,4 @@ int crash_setup_memmap_entries(struct kimage *image,
 		struct boot_params *params);
 void crash_smp_send_stop(void);
 
-#ifdef CONFIG_KEXEC_CORE
-void __init crash_reserve_low_1M(void);
-#else
-static inline void __init crash_reserve_low_1M(void) { }
-#endif
-
 #endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 54ce999ed3212..e8326a8d1c5dc 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -70,19 +70,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
 	rcu_read_unlock();
 }
 
-/*
- * When the crashkernel option is specified, only use the low
- * 1M for the real mode trampoline.
- */
-void __init crash_reserve_low_1M(void)
-{
-	if (cmdline_find_option(boot_command_line, "crashkernel", NULL, 0) < 0)
-		return;
-
-	memblock_reserve(0, 1<<20);
-	pr_info("Reserving the low 1M of memory for crashkernel\n");
-}
-
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 
 static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
-- 
GitLab


From 382dcdd66ce86491ddd390b39224468c82a47892 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:05 +0100
Subject: [PATCH 2342/3804] arm64: remove redundant local_daif_mask() in
 bad_mode()

Upon taking an exception, the CPU sets all the DAIF bits. We never
clear any of these bits prior to calling bad_mode(), and bad_mode()
itself never clears any of these bits, so there's no need to call
local_daif_mask().

This patch removes the redundant call.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-2-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/traps.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index a05d34f0e82a7..41f0aa92022a1 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -765,7 +765,6 @@ asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int
 		esr_get_class_string(esr));
 
 	__show_regs(regs);
-	local_daif_mask();
 	panic("bad mode");
 }
 
-- 
GitLab


From f7c706f0391d7894d1ae2d28cb2d5446f5ec59ad Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:06 +0100
Subject: [PATCH 2343/3804] arm64: entry: unmask IRQ+FIQ after EL0 handling

For non-fatal exceptions taken from EL0, we expect that at some point
during exception handling it is possible to return to a regular process
context with all exceptions unmasked (e.g. as we do in
do_notify_resume()), and we generally aim to unmask exceptions wherever
possible.

While handling SError and debug exceptions from EL0, we need to leave
some exceptions masked during handling. Handling SError requires us to
mask SError (which also requires masking IRQ+FIQ), and handing debug
exceptions requires us to mask debug (which also requires masking
SError+IRQ+FIQ).

Once do_serror() or do_debug_exception() has returned, we no longer need
to mask exceptions, and can unmask them all, which is what we did prior
to commit:

  9034f6251572a474 ("arm64: Do not enable IRQs for ct_user_exit")

... where we had to mask IRQs as for context_tracking_user_exit()
expected IRQs to be masked.

Since then, we realised that our context tracking wasn't entirely
correct, and reworked the entry code to fix this. As of commit:

  23529049c6842382 ("arm64: entry: fix non-NMI user<->kernel transitions")

... we replaced the call to context_tracking_user_exit() with a call to
user_exit_irqoff() as part of enter_from_user_mode(), which occurs
earlier, before we run the body of the handler and unmask exceptions in
DAIF.

When we return to userspace, we go via ret_to_user(), which masks
exceptions in DAIF prior to calling user_enter_irqoff() as part of
exit_to_user_mode().

Thus, there's no longer a reason to leave IRQs or FIQs masked at the end
of the EL0 debug or error handlers, as neither the user exit context
tracking nor the user entry context tracking requires this. Let's bring
these into line with other EL0 exception handlers and ensure that IRQ
and FIQ are unmasked in DAIF at some point during the handler.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c | 2 +-
 arch/arm64/kernel/entry.S        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 340d04e136179..02be1517e08f5 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -398,7 +398,7 @@ static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr)
 
 	enter_from_user_mode();
 	do_debug_exception(far, esr, regs);
-	local_daif_restore(DAIF_PROCCTX_NOIRQ);
+	local_daif_restore(DAIF_PROCCTX);
 }
 
 static void noinstr el0_svc(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 3513984a88bd1..6b2f6f5c5bb8c 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -794,7 +794,7 @@ el0_error_naked:
 	mov	x0, sp
 	mov	x1, x25
 	bl	do_serror
-	enable_da
+	enable_daif
 	b	ret_to_user
 SYM_CODE_END(el0_error)
 
-- 
GitLab


From bb8e93a287a5f5f10fe7a9d8f612f6105c9622ef Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:07 +0100
Subject: [PATCH 2344/3804] arm64: entry: convert SError handlers to C

For various reasons we'd like to convert the bulk of arm64's exception
triage logic to C. As a step towards that, this patch converts the EL1
and EL0 SError triage logic to C.

Separate C functions are added for the native and compat cases so that
in subsequent patches we can handle native/compat differences in C.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-4-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/exception.h |  4 ++++
 arch/arm64/kernel/entry-common.c   | 32 ++++++++++++++++++++++++++++++
 arch/arm64/kernel/entry.S          | 16 +++++----------
 arch/arm64/kernel/traps.c          |  6 +-----
 4 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 6546158d2f2d4..3a859d4e8b59e 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -32,8 +32,11 @@ static inline u32 disr_to_esr(u64 disr)
 }
 
 asmlinkage void el1_sync_handler(struct pt_regs *regs);
+asmlinkage void el1_error_handler(struct pt_regs *regs);
 asmlinkage void el0_sync_handler(struct pt_regs *regs);
+asmlinkage void el0_error_handler(struct pt_regs *regs);
 asmlinkage void el0_sync_compat_handler(struct pt_regs *regs);
+asmlinkage void el0_error_compat_handler(struct pt_regs *regs);
 
 asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
@@ -57,4 +60,5 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs);
 void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
 void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
+void do_serror(struct pt_regs *regs, unsigned int esr);
 #endif	/* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 02be1517e08f5..3b79437210776 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -279,6 +279,16 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
 	}
 }
 
+asmlinkage void noinstr el1_error_handler(struct pt_regs *regs)
+{
+	unsigned long esr = read_sysreg(esr_el1);
+
+	local_daif_restore(DAIF_ERRCTX);
+	arm64_enter_nmi(regs);
+	do_serror(regs, esr);
+	arm64_exit_nmi(regs);
+}
+
 asmlinkage void noinstr enter_from_user_mode(void)
 {
 	lockdep_hardirqs_off(CALLER_ADDR0);
@@ -468,6 +478,23 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
 	}
 }
 
+static void __el0_error_handler_common(struct pt_regs *regs)
+{
+	unsigned long esr = read_sysreg(esr_el1);
+
+	enter_from_user_mode();
+	local_daif_restore(DAIF_ERRCTX);
+	arm64_enter_nmi(regs);
+	do_serror(regs, esr);
+	arm64_exit_nmi(regs);
+	local_daif_restore(DAIF_PROCCTX);
+}
+
+asmlinkage void noinstr el0_error_handler(struct pt_regs *regs)
+{
+	__el0_error_handler_common(regs);
+}
+
 #ifdef CONFIG_COMPAT
 static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr)
 {
@@ -526,4 +553,9 @@ asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
 		el0_inv(regs, esr);
 	}
 }
+
+asmlinkage void noinstr el0_error_compat_handler(struct pt_regs *regs)
+{
+	__el0_error_handler_common(regs);
+}
 #endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6b2f6f5c5bb8c..656f3129bfefe 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -757,7 +757,9 @@ SYM_CODE_END(el0_fiq_compat)
 
 SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat)
 	kernel_entry 0, 32
-	b	el0_error_naked
+	mov	x0, sp
+	bl	el0_error_compat_handler
+	b	ret_to_user
 SYM_CODE_END(el0_error_compat)
 #endif
 
@@ -778,23 +780,15 @@ SYM_CODE_END(el0_fiq)
 
 SYM_CODE_START_LOCAL(el1_error)
 	kernel_entry 1
-	mrs	x1, esr_el1
-	enable_dbg
 	mov	x0, sp
-	bl	do_serror
+	bl	el1_error_handler
 	kernel_exit 1
 SYM_CODE_END(el1_error)
 
 SYM_CODE_START_LOCAL(el0_error)
 	kernel_entry 0
-el0_error_naked:
-	mrs	x25, esr_el1
-	user_exit_irqoff
-	enable_dbg
 	mov	x0, sp
-	mov	x1, x25
-	bl	do_serror
-	enable_daif
+	bl	el0_error_handler
 	b	ret_to_user
 SYM_CODE_END(el0_error)
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 41f0aa92022a1..5fd12d19ef4be 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -869,15 +869,11 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
 	}
 }
 
-asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr)
+void do_serror(struct pt_regs *regs, unsigned int esr)
 {
-	arm64_enter_nmi(regs);
-
 	/* non-RAS errors are not containable */
 	if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
 		arm64_serror_panic(regs, esr);
-
-	arm64_exit_nmi(regs);
 }
 
 /* GENERIC_BUG traps */
-- 
GitLab


From 33a3581a76f3a36c7dcc9864120ce681bcfbcff1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:08 +0100
Subject: [PATCH 2345/3804] arm64: entry: move arm64_preempt_schedule_irq to
 entry-common.c

Subsequent patches will pull more of the IRQ entry handling into C. To
keep this in one place, let's move arm64_preempt_schedule_irq() into
entry-common.c along with the other entry management functions.

We no longer need to include <linux/lockdep.h> in process.c, so the
include directive is removed.

There should be no functional change as a result of this patch.

Reviewed-by Joey Gouly <joey.gouly@arm.com>

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-5-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c | 20 ++++++++++++++++++++
 arch/arm64/kernel/process.c      | 17 -----------------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 3b79437210776..1fe60578e5566 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -6,7 +6,11 @@
  */
 
 #include <linux/context_tracking.h>
+#include <linux/linkage.h>
+#include <linux/lockdep.h>
 #include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/thread_info.h>
 
 #include <asm/cpufeature.h>
@@ -113,6 +117,22 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
 		exit_to_kernel_mode(regs);
 }
 
+asmlinkage void __sched arm64_preempt_schedule_irq(void)
+{
+	lockdep_assert_irqs_disabled();
+
+	/*
+	 * Preempting a task from an IRQ means we leave copies of PSTATE
+	 * on the stack. cpufeature's enable calls may modify PSTATE, but
+	 * resuming one of these preempted tasks would undo those changes.
+	 *
+	 * Only allow a task to be preempted once cpufeatures have been
+	 * enabled.
+	 */
+	if (system_capabilities_finalized())
+		preempt_schedule_irq();
+}
+
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b4bb67f17a2ca..2e73377091552 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -18,7 +18,6 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/kernel.h>
-#include <linux/lockdep.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/nospec.h>
@@ -724,22 +723,6 @@ static int __init tagged_addr_init(void)
 core_initcall(tagged_addr_init);
 #endif	/* CONFIG_ARM64_TAGGED_ADDR_ABI */
 
-asmlinkage void __sched arm64_preempt_schedule_irq(void)
-{
-	lockdep_assert_irqs_disabled();
-
-	/*
-	 * Preempting a task from an IRQ means we leave copies of PSTATE
-	 * on the stack. cpufeature's enable calls may modify PSTATE, but
-	 * resuming one of these preempted tasks would undo those changes.
-	 *
-	 * Only allow a task to be preempted once cpufeatures have been
-	 * enabled.
-	 */
-	if (system_capabilities_finalized())
-		preempt_schedule_irq();
-}
-
 #ifdef CONFIG_BINFMT_ELF
 int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
 			 bool has_interp, bool is_interp)
-- 
GitLab


From 101a5b665dcdff169ae7ad90556604c483d9027e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:09 +0100
Subject: [PATCH 2346/3804] arm64: entry: move NMI preempt logic to C

Currently portions of our preempt logic are written in C while other
parts are written in assembly. Let's clean this up a little bit by
moving the NMI preempt checks to C. For now, the preempt count (and
need_resched) checking is left in assembly, and will be converted
with the body of the IRQ handler in subsequent patches.

Other than the increased lockdep coverage there should be no functional
change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-6-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c |  9 +++++++++
 arch/arm64/kernel/entry.S        | 12 +-----------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 1fe60578e5566..08d17eb0ce13a 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -121,6 +121,15 @@ asmlinkage void __sched arm64_preempt_schedule_irq(void)
 {
 	lockdep_assert_irqs_disabled();
 
+	/*
+	 * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC
+	 * priority masking is used the GIC irqchip driver will clear DAIF.IF
+	 * using gic_arch_enable_irqs() for normal IRQs. If anything is set in
+	 * DAIF we must have handled an NMI, so skip preemption.
+	 */
+	if (system_uses_irq_prio_masking() && read_sysreg(daif))
+		return;
+
 	/*
 	 * Preempting a task from an IRQ means we leave copies of PSTATE
 	 * on the stack. cpufeature's enable calls may modify PSTATE, but
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 656f3129bfefe..449628290ce8f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -562,17 +562,7 @@ tsk	.req	x28		// current thread_info
 
 #ifdef CONFIG_PREEMPTION
 	ldr	x24, [tsk, #TSK_TI_PREEMPT]	// get preempt count
-alternative_if ARM64_HAS_IRQ_PRIO_MASKING
-	/*
-	 * DA were cleared at start of handling, and IF are cleared by
-	 * the GIC irqchip driver using gic_arch_enable_irqs() for
-	 * normal IRQs. If anything is set, it means we come back from
-	 * an NMI instead of a normal IRQ, so skip preemption
-	 */
-	mrs	x0, daif
-	orr	x24, x24, x0
-alternative_else_nop_endif
-	cbnz	x24, 1f				// preempt count != 0 || NMI return path
+	cbnz	x24, 1f				// preempt count != 0
 	bl	arm64_preempt_schedule_irq	// irq en/disable is done inside
 1:
 #endif
-- 
GitLab


From f8049488e7d37b0a0e438ee449e83b3e46958743 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:10 +0100
Subject: [PATCH 2347/3804] arm64: entry: add a call_on_irq_stack helper

When handling IRQ/FIQ exceptions the entry assembly may transition from
a task's stack to a CPU's IRQ stack (and IRQ shadow call stack).

In subsequent patches we want to migrate the IRQ/FIQ triage logic to C,
and as we want to perform some actions on the task stack (e.g. EL1
preemption), we need to switch stacks within the C handler. So that we
can do so, this patch adds a helper to call a function on a CPU's IRQ
stack (and shadow stack as appropriate).

Subsequent patches will make use of the new helper function.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-7-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/exception.h |  2 ++
 arch/arm64/kernel/entry.S          | 36 ++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 3a859d4e8b59e..c24b69c0c5899 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -40,6 +40,8 @@ asmlinkage void el0_error_compat_handler(struct pt_regs *regs);
 
 asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
+asmlinkage void call_on_irq_stack(struct pt_regs *regs,
+				  void (*func)(struct pt_regs *));
 asmlinkage void enter_from_user_mode(void);
 asmlinkage void exit_to_user_mode(void);
 void arm64_enter_nmi(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 449628290ce8f..8ca74ce115eed 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -982,6 +982,42 @@ SYM_CODE_START(ret_from_fork)
 SYM_CODE_END(ret_from_fork)
 NOKPROBE(ret_from_fork)
 
+/*
+ * void call_on_irq_stack(struct pt_regs *regs,
+ * 		          void (*func)(struct pt_regs *));
+ *
+ * Calls func(regs) using this CPU's irq stack and shadow irq stack.
+ */
+SYM_FUNC_START(call_on_irq_stack)
+#ifdef CONFIG_SHADOW_CALL_STACK
+	stp	scs_sp, xzr, [sp, #-16]!
+	ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x17
+#endif
+	/* Create a frame record to save our LR and SP (implicit in FP) */
+	stp	x29, x30, [sp, #-16]!
+	mov	x29, sp
+
+	ldr_this_cpu x16, irq_stack_ptr, x17
+	mov	x15, #IRQ_STACK_SIZE
+	add	x16, x16, x15
+
+	/* Move to the new stack and call the function there */
+	mov	sp, x16
+	blr	x1
+
+	/*
+	 * Restore the SP from the FP, and restore the FP and LR from the frame
+	 * record.
+	 */
+	mov	sp, x29
+	ldp	x29, x30, [sp], #16
+#ifdef CONFIG_SHADOW_CALL_STACK
+	ldp	scs_sp, xzr, [sp], #16
+#endif
+	ret
+SYM_FUNC_END(call_on_irq_stack)
+NOKPROBE(call_on_irq_stack)
+
 #ifdef CONFIG_ARM_SDE_INTERFACE
 
 #include <asm/sdei.h>
-- 
GitLab


From 064dbfb4169141943ec7d9dbfd02974dd008f2ce Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:11 +0100
Subject: [PATCH 2348/3804] arm64: entry: convert IRQ+FIQ handlers to C

For various reasons we'd like to convert the bulk of arm64's exception
triage logic to C. As a step towards that, this patch converts the EL1
and EL0 IRQ+FIQ triage logic to C.

Separate C functions are added for the native and compat cases so that
in subsequent patches we can handle native/compat differences in C.

Since the triage functions can now call arm64_apply_bp_hardening()
directly, the do_el0_irq_bp_hardening() wrapper function is removed.

Since the user_exit_irqoff macro is now unused, it is removed. The
user_enter_irqoff macro is still used by the ret_to_user code, and
cannot be removed at this time.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-8-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/exception.h |   8 ++-
 arch/arm64/include/asm/processor.h |   2 -
 arch/arm64/kernel/entry-common.c   |  93 ++++++++++++++++++++++++-
 arch/arm64/kernel/entry.S          | 108 ++++-------------------------
 arch/arm64/mm/fault.c              |   7 --
 5 files changed, 110 insertions(+), 108 deletions(-)

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index c24b69c0c5899..4284ee57a9a59 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -32,14 +32,18 @@ static inline u32 disr_to_esr(u64 disr)
 }
 
 asmlinkage void el1_sync_handler(struct pt_regs *regs);
+asmlinkage void el1_irq_handler(struct pt_regs *regs);
+asmlinkage void el1_fiq_handler(struct pt_regs *regs);
 asmlinkage void el1_error_handler(struct pt_regs *regs);
 asmlinkage void el0_sync_handler(struct pt_regs *regs);
+asmlinkage void el0_irq_handler(struct pt_regs *regs);
+asmlinkage void el0_fiq_handler(struct pt_regs *regs);
 asmlinkage void el0_error_handler(struct pt_regs *regs);
 asmlinkage void el0_sync_compat_handler(struct pt_regs *regs);
+asmlinkage void el0_irq_compat_handler(struct pt_regs *regs);
+asmlinkage void el0_fiq_compat_handler(struct pt_regs *regs);
 asmlinkage void el0_error_compat_handler(struct pt_regs *regs);
 
-asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs);
-asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs);
 asmlinkage void call_on_irq_stack(struct pt_regs *regs,
 				  void (*func)(struct pt_regs *));
 asmlinkage void enter_from_user_mode(void);
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 9df3feeee8909..2f21c76324bb7 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -257,8 +257,6 @@ void set_task_sctlr_el1(u64 sctlr);
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
 					 struct task_struct *next);
 
-asmlinkage void arm64_preempt_schedule_irq(void);
-
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
 
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 08d17eb0ce13a..ae1b6d7c00e1b 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -19,6 +19,8 @@
 #include <asm/exception.h>
 #include <asm/kprobes.h>
 #include <asm/mmu.h>
+#include <asm/processor.h>
+#include <asm/stacktrace.h>
 #include <asm/sysreg.h>
 
 /*
@@ -101,7 +103,7 @@ void noinstr arm64_exit_nmi(struct pt_regs *regs)
 	__nmi_exit();
 }
 
-asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
+static void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
 		arm64_enter_nmi(regs);
@@ -109,7 +111,7 @@ asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs)
 		enter_from_kernel_mode(regs);
 }
 
-asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
+static void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs))
 		arm64_exit_nmi(regs);
@@ -117,7 +119,7 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs)
 		exit_to_kernel_mode(regs);
 }
 
-asmlinkage void __sched arm64_preempt_schedule_irq(void)
+static void __sched arm64_preempt_schedule_irq(void)
 {
 	lockdep_assert_irqs_disabled();
 
@@ -142,6 +144,18 @@ asmlinkage void __sched arm64_preempt_schedule_irq(void)
 		preempt_schedule_irq();
 }
 
+static void do_interrupt_handler(struct pt_regs *regs,
+				 void (*handler)(struct pt_regs *))
+{
+	if (on_thread_stack())
+		call_on_irq_stack(regs, handler);
+	else
+		handler(regs);
+}
+
+extern void (*handle_arch_irq)(struct pt_regs *);
+extern void (*handle_arch_fiq)(struct pt_regs *);
+
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
 
@@ -308,6 +322,36 @@ asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
 	}
 }
 
+static void noinstr el1_interrupt(struct pt_regs *regs,
+				  void (*handler)(struct pt_regs *))
+{
+	write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+
+	enter_el1_irq_or_nmi(regs);
+	do_interrupt_handler(regs, handler);
+
+	/*
+	 * Note: thread_info::preempt_count includes both thread_info::count
+	 * and thread_info::need_resched, and is not equivalent to
+	 * preempt_count().
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPTION) &&
+	    READ_ONCE(current_thread_info()->preempt_count) == 0)
+		arm64_preempt_schedule_irq();
+
+	exit_el1_irq_or_nmi(regs);
+}
+
+asmlinkage void noinstr el1_irq_handler(struct pt_regs *regs)
+{
+	el1_interrupt(regs, handle_arch_irq);
+}
+
+asmlinkage void noinstr el1_fiq_handler(struct pt_regs *regs)
+{
+	el1_interrupt(regs, handle_arch_fiq);
+}
+
 asmlinkage void noinstr el1_error_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
@@ -507,6 +551,39 @@ asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
 	}
 }
 
+static void noinstr el0_interrupt(struct pt_regs *regs,
+				  void (*handler)(struct pt_regs *))
+{
+	enter_from_user_mode();
+
+	write_sysreg(DAIF_PROCCTX_NOIRQ, daif);
+
+	if (regs->pc & BIT(55))
+		arm64_apply_bp_hardening();
+
+	do_interrupt_handler(regs, handler);
+}
+
+static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
+{
+	el0_interrupt(regs, handle_arch_irq);
+}
+
+asmlinkage void noinstr el0_irq_handler(struct pt_regs *regs)
+{
+	__el0_irq_handler_common(regs);
+}
+
+static void noinstr __el0_fiq_handler_common(struct pt_regs *regs)
+{
+	el0_interrupt(regs, handle_arch_fiq);
+}
+
+asmlinkage void noinstr el0_fiq_handler(struct pt_regs *regs)
+{
+	__el0_fiq_handler_common(regs);
+}
+
 static void __el0_error_handler_common(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
@@ -583,6 +660,16 @@ asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
 	}
 }
 
+asmlinkage void noinstr el0_irq_compat_handler(struct pt_regs *regs)
+{
+	__el0_irq_handler_common(regs);
+}
+
+asmlinkage void noinstr el0_fiq_compat_handler(struct pt_regs *regs)
+{
+	__el0_fiq_handler_common(regs);
+}
+
 asmlinkage void noinstr el0_error_compat_handler(struct pt_regs *regs)
 {
 	__el0_error_handler_common(regs);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 8ca74ce115eed..8eb3a0a514130 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -33,12 +33,6 @@
  * Context tracking and irqflag tracing need to instrument transitions between
  * user and kernel mode.
  */
-	.macro user_exit_irqoff
-#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
-	bl	enter_from_user_mode
-#endif
-	.endm
-
 	.macro user_enter_irqoff
 #if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS)
 	bl	exit_to_user_mode
@@ -486,63 +480,12 @@ SYM_CODE_START_LOCAL(__swpan_exit_el0)
 SYM_CODE_END(__swpan_exit_el0)
 #endif
 
-	.macro	irq_stack_entry
-	mov	x19, sp			// preserve the original sp
-#ifdef CONFIG_SHADOW_CALL_STACK
-	mov	x24, scs_sp		// preserve the original shadow stack
-#endif
-
-	/*
-	 * Compare sp with the base of the task stack.
-	 * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
-	 * and should switch to the irq stack.
-	 */
-	ldr	x25, [tsk, TSK_STACK]
-	eor	x25, x25, x19
-	and	x25, x25, #~(THREAD_SIZE - 1)
-	cbnz	x25, 9998f
-
-	ldr_this_cpu x25, irq_stack_ptr, x26
-	mov	x26, #IRQ_STACK_SIZE
-	add	x26, x25, x26
-
-	/* switch to the irq stack */
-	mov	sp, x26
-
-#ifdef CONFIG_SHADOW_CALL_STACK
-	/* also switch to the irq shadow stack */
-	ldr_this_cpu scs_sp, irq_shadow_call_stack_ptr, x26
-#endif
-
-9998:
-	.endm
-
-	/*
-	 * The callee-saved regs (x19-x29) should be preserved between
-	 * irq_stack_entry and irq_stack_exit, but note that kernel_entry
-	 * uses x20-x23 to store data for later use.
-	 */
-	.macro	irq_stack_exit
-	mov	sp, x19
-#ifdef CONFIG_SHADOW_CALL_STACK
-	mov	scs_sp, x24
-#endif
-	.endm
-
 /* GPRs used by entry code */
 tsk	.req	x28		// current thread_info
 
 /*
  * Interrupt handling.
  */
-	.macro	irq_handler, handler:req
-	ldr_l	x1, \handler
-	mov	x0, sp
-	irq_stack_entry
-	blr	x1
-	irq_stack_exit
-	.endm
-
 	.macro	gic_prio_kentry_setup, tmp:req
 #ifdef CONFIG_ARM64_PSEUDO_NMI
 	alternative_if ARM64_HAS_IRQ_PRIO_MASKING
@@ -552,35 +495,6 @@ tsk	.req	x28		// current thread_info
 #endif
 	.endm
 
-	.macro el1_interrupt_handler, handler:req
-	enable_da
-
-	mov	x0, sp
-	bl	enter_el1_irq_or_nmi
-
-	irq_handler	\handler
-
-#ifdef CONFIG_PREEMPTION
-	ldr	x24, [tsk, #TSK_TI_PREEMPT]	// get preempt count
-	cbnz	x24, 1f				// preempt count != 0
-	bl	arm64_preempt_schedule_irq	// irq en/disable is done inside
-1:
-#endif
-
-	mov	x0, sp
-	bl	exit_el1_irq_or_nmi
-	.endm
-
-	.macro el0_interrupt_handler, handler:req
-	user_exit_irqoff
-	enable_da
-
-	tbz	x22, #55, 1f
-	bl	do_el0_irq_bp_hardening
-1:
-	irq_handler	\handler
-	.endm
-
 	.text
 
 /*
@@ -704,13 +618,15 @@ SYM_CODE_END(el1_sync)
 	.align	6
 SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
 	kernel_entry 1
-	el1_interrupt_handler handle_arch_irq
+	mov	x0, sp
+	bl	el1_irq_handler
 	kernel_exit 1
 SYM_CODE_END(el1_irq)
 
 SYM_CODE_START_LOCAL_NOALIGN(el1_fiq)
 	kernel_entry 1
-	el1_interrupt_handler handle_arch_fiq
+	mov	x0, sp
+	bl	el1_fiq_handler
 	kernel_exit 1
 SYM_CODE_END(el1_fiq)
 
@@ -737,12 +653,16 @@ SYM_CODE_END(el0_sync_compat)
 	.align	6
 SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat)
 	kernel_entry 0, 32
-	b	el0_irq_naked
+	mov	x0, sp
+	bl	el0_irq_compat_handler
+	b	ret_to_user
 SYM_CODE_END(el0_irq_compat)
 
 SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat)
 	kernel_entry 0, 32
-	b	el0_fiq_naked
+	mov	x0, sp
+	bl	el0_fiq_compat_handler
+	b	ret_to_user
 SYM_CODE_END(el0_fiq_compat)
 
 SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat)
@@ -756,15 +676,15 @@ SYM_CODE_END(el0_error_compat)
 	.align	6
 SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
 	kernel_entry 0
-el0_irq_naked:
-	el0_interrupt_handler handle_arch_irq
+	mov	x0, sp
+	bl	el0_irq_handler
 	b	ret_to_user
 SYM_CODE_END(el0_irq)
 
 SYM_CODE_START_LOCAL_NOALIGN(el0_fiq)
 	kernel_entry 0
-el0_fiq_naked:
-	el0_interrupt_handler handle_arch_fiq
+	mov	x0, sp
+	bl	el0_fiq_handler
 	b	ret_to_user
 SYM_CODE_END(el0_fiq)
 
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 871c82ab0a309..3b4a4adfddfd3 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -836,13 +836,6 @@ void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(do_mem_abort);
 
-void do_el0_irq_bp_hardening(void)
-{
-	/* PC has already been checked in entry.S */
-	arm64_apply_bp_hardening();
-}
-NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
-
 void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
 {
 	arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN,
-- 
GitLab


From 2f2bbaa4eda027d0bf0f3f23d0c206b2b76e2180 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:12 +0100
Subject: [PATCH 2349/3804] arm64: entry: organise entry handlers consistently

In entry.S we have two comments which distinguish EL0 and EL1 exception
handlers, but the code isn't actually laid out to match, and there are a
few other inconsistencies that would be good to clear up.

This patch organizes the entry handers consistently:

* The handlers are laid out in order of the vectors, to make them easier
  to navigate.

* The inconsistently-applied alignment is removed

* The handlers are consistently marked with SYM_CODE_START_LOCAL()
  rather than SYM_CODE_START_LOCAL_NOALIGN(), giving them the same
  default alignment as other assembly code snippets.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-9-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry.S | 78 ++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 42 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 8eb3a0a514130..ed7c55d57afeb 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -607,65 +607,88 @@ SYM_CODE_END(el1_error_invalid)
 /*
  * EL1 mode handlers.
  */
-	.align	6
-SYM_CODE_START_LOCAL_NOALIGN(el1_sync)
+SYM_CODE_START_LOCAL(el1_sync)
 	kernel_entry 1
 	mov	x0, sp
 	bl	el1_sync_handler
 	kernel_exit 1
 SYM_CODE_END(el1_sync)
 
-	.align	6
-SYM_CODE_START_LOCAL_NOALIGN(el1_irq)
+SYM_CODE_START_LOCAL(el1_irq)
 	kernel_entry 1
 	mov	x0, sp
 	bl	el1_irq_handler
 	kernel_exit 1
 SYM_CODE_END(el1_irq)
 
-SYM_CODE_START_LOCAL_NOALIGN(el1_fiq)
+SYM_CODE_START_LOCAL(el1_fiq)
 	kernel_entry 1
 	mov	x0, sp
 	bl	el1_fiq_handler
 	kernel_exit 1
 SYM_CODE_END(el1_fiq)
 
+SYM_CODE_START_LOCAL(el1_error)
+	kernel_entry 1
+	mov	x0, sp
+	bl	el1_error_handler
+	kernel_exit 1
+SYM_CODE_END(el1_error)
+
 /*
  * EL0 mode handlers.
  */
-	.align	6
-SYM_CODE_START_LOCAL_NOALIGN(el0_sync)
+SYM_CODE_START_LOCAL(el0_sync)
 	kernel_entry 0
 	mov	x0, sp
 	bl	el0_sync_handler
 	b	ret_to_user
 SYM_CODE_END(el0_sync)
 
+SYM_CODE_START_LOCAL(el0_irq)
+	kernel_entry 0
+	mov	x0, sp
+	bl	el0_irq_handler
+	b	ret_to_user
+SYM_CODE_END(el0_irq)
+
+SYM_CODE_START_LOCAL(el0_fiq)
+	kernel_entry 0
+	mov	x0, sp
+	bl	el0_fiq_handler
+	b	ret_to_user
+SYM_CODE_END(el0_fiq)
+
+SYM_CODE_START_LOCAL(el0_error)
+	kernel_entry 0
+	mov	x0, sp
+	bl	el0_error_handler
+	b	ret_to_user
+SYM_CODE_END(el0_error)
+
 #ifdef CONFIG_COMPAT
-	.align	6
-SYM_CODE_START_LOCAL_NOALIGN(el0_sync_compat)
+SYM_CODE_START_LOCAL(el0_sync_compat)
 	kernel_entry 0, 32
 	mov	x0, sp
 	bl	el0_sync_compat_handler
 	b	ret_to_user
 SYM_CODE_END(el0_sync_compat)
 
-	.align	6
-SYM_CODE_START_LOCAL_NOALIGN(el0_irq_compat)
+SYM_CODE_START_LOCAL(el0_irq_compat)
 	kernel_entry 0, 32
 	mov	x0, sp
 	bl	el0_irq_compat_handler
 	b	ret_to_user
 SYM_CODE_END(el0_irq_compat)
 
-SYM_CODE_START_LOCAL_NOALIGN(el0_fiq_compat)
+SYM_CODE_START_LOCAL(el0_fiq_compat)
 	kernel_entry 0, 32
 	mov	x0, sp
 	bl	el0_fiq_compat_handler
 	b	ret_to_user
 SYM_CODE_END(el0_fiq_compat)
 
-SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat)
+SYM_CODE_START_LOCAL(el0_error_compat)
 	kernel_entry 0, 32
 	mov	x0, sp
 	bl	el0_error_compat_handler
@@ -673,35 +696,6 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_error_compat)
 SYM_CODE_END(el0_error_compat)
 #endif
 
-	.align	6
-SYM_CODE_START_LOCAL_NOALIGN(el0_irq)
-	kernel_entry 0
-	mov	x0, sp
-	bl	el0_irq_handler
-	b	ret_to_user
-SYM_CODE_END(el0_irq)
-
-SYM_CODE_START_LOCAL_NOALIGN(el0_fiq)
-	kernel_entry 0
-	mov	x0, sp
-	bl	el0_fiq_handler
-	b	ret_to_user
-SYM_CODE_END(el0_fiq)
-
-SYM_CODE_START_LOCAL(el1_error)
-	kernel_entry 1
-	mov	x0, sp
-	bl	el1_error_handler
-	kernel_exit 1
-SYM_CODE_END(el1_error)
-
-SYM_CODE_START_LOCAL(el0_error)
-	kernel_entry 0
-	mov	x0, sp
-	bl	el0_error_handler
-	b	ret_to_user
-SYM_CODE_END(el0_error)
-
 /*
  * "slow" syscall return path.
  */
-- 
GitLab


From e931fa03c6bf525babc9a41b951eb2311b055abb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:13 +0100
Subject: [PATCH 2350/3804] arm64: entry: organise entry vectors consistently

In subsequent patches we'll rename the entry handlers based on their
original EL, register width, and exception class. To do so, we need to
make all 3 mandatory arguments to the `kernel_ventry` macro, and
distinguish EL1h from EL1t.

In preparation for this, let's make the current set of arguments
mandatory, and move the `regsize` column before the branch label suffix,
making the vectors easier to read column-wise.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-10-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry.S | 42 +++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ed7c55d57afeb..e29d0fb773589 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -54,7 +54,7 @@
 #define BAD_FIQ		2
 #define BAD_ERROR	3
 
-	.macro kernel_ventry, el, label, regsize = 64
+	.macro kernel_ventry, el:req, regsize:req, label:req
 	.align 7
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	.if	\el == 0
@@ -504,31 +504,31 @@ tsk	.req	x28		// current thread_info
 
 	.align	11
 SYM_CODE_START(vectors)
-	kernel_ventry	1, sync_invalid			// Synchronous EL1t
-	kernel_ventry	1, irq_invalid			// IRQ EL1t
-	kernel_ventry	1, fiq_invalid			// FIQ EL1t
-	kernel_ventry	1, error_invalid		// Error EL1t
+	kernel_ventry	1, 64, sync_invalid		// Synchronous EL1t
+	kernel_ventry	1, 64, irq_invalid		// IRQ EL1t
+	kernel_ventry	1, 64, fiq_invalid		// FIQ EL1t
+	kernel_ventry	1, 64, error_invalid		// Error EL1t
 
-	kernel_ventry	1, sync				// Synchronous EL1h
-	kernel_ventry	1, irq				// IRQ EL1h
-	kernel_ventry	1, fiq				// FIQ EL1h
-	kernel_ventry	1, error			// Error EL1h
+	kernel_ventry	1, 64, sync			// Synchronous EL1h
+	kernel_ventry	1, 64, irq			// IRQ EL1h
+	kernel_ventry	1, 64, fiq			// FIQ EL1h
+	kernel_ventry	1, 64, error			// Error EL1h
 
-	kernel_ventry	0, sync				// Synchronous 64-bit EL0
-	kernel_ventry	0, irq				// IRQ 64-bit EL0
-	kernel_ventry	0, fiq				// FIQ 64-bit EL0
-	kernel_ventry	0, error			// Error 64-bit EL0
+	kernel_ventry	0, 64, sync			// Synchronous 64-bit EL0
+	kernel_ventry	0, 64, irq			// IRQ 64-bit EL0
+	kernel_ventry	0, 64, fiq			// FIQ 64-bit EL0
+	kernel_ventry	0, 64, error			// Error 64-bit EL0
 
 #ifdef CONFIG_COMPAT
-	kernel_ventry	0, sync_compat, 32		// Synchronous 32-bit EL0
-	kernel_ventry	0, irq_compat, 32		// IRQ 32-bit EL0
-	kernel_ventry	0, fiq_compat, 32		// FIQ 32-bit EL0
-	kernel_ventry	0, error_compat, 32		// Error 32-bit EL0
+	kernel_ventry	0, 32, sync_compat		// Synchronous 32-bit EL0
+	kernel_ventry	0, 32, irq_compat		// IRQ 32-bit EL0
+	kernel_ventry	0, 32, fiq_compat		// FIQ 32-bit EL0
+	kernel_ventry	0, 32, error_compat		// Error 32-bit EL0
 #else
-	kernel_ventry	0, sync_invalid, 32		// Synchronous 32-bit EL0
-	kernel_ventry	0, irq_invalid, 32		// IRQ 32-bit EL0
-	kernel_ventry	0, fiq_invalid, 32		// FIQ 32-bit EL0
-	kernel_ventry	0, error_invalid, 32		// Error 32-bit EL0
+	kernel_ventry	0, 32, sync_invalid		// Synchronous 32-bit EL0
+	kernel_ventry	0, 32, irq_invalid		// IRQ 32-bit EL0
+	kernel_ventry	0, 32, fiq_invalid		// FIQ 32-bit EL0
+	kernel_ventry	0, 32, error_invalid		// Error 32-bit EL0
 #endif
 SYM_CODE_END(vectors)
 
-- 
GitLab


From af541cbbf9c646d2eaa8b3ee3836d5b16435e848 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:14 +0100
Subject: [PATCH 2351/3804] arm64: entry: consolidate EL1 exception returns

Following the example of ret_to_user, let's consolidate all the EL1
return paths with a ret_to_kernel helper, rather than each entry point
having its own copy of the return code.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-11-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry.S | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e29d0fb773589..54986d488983b 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -611,30 +611,34 @@ SYM_CODE_START_LOCAL(el1_sync)
 	kernel_entry 1
 	mov	x0, sp
 	bl	el1_sync_handler
-	kernel_exit 1
+	b	ret_to_kernel
 SYM_CODE_END(el1_sync)
 
 SYM_CODE_START_LOCAL(el1_irq)
 	kernel_entry 1
 	mov	x0, sp
 	bl	el1_irq_handler
-	kernel_exit 1
+	b	ret_to_kernel
 SYM_CODE_END(el1_irq)
 
 SYM_CODE_START_LOCAL(el1_fiq)
 	kernel_entry 1
 	mov	x0, sp
 	bl	el1_fiq_handler
-	kernel_exit 1
+	b	ret_to_kernel
 SYM_CODE_END(el1_fiq)
 
 SYM_CODE_START_LOCAL(el1_error)
 	kernel_entry 1
 	mov	x0, sp
 	bl	el1_error_handler
-	kernel_exit 1
+	b	ret_to_kernel
 SYM_CODE_END(el1_error)
 
+SYM_CODE_START_LOCAL(ret_to_kernel)
+	kernel_exit 1
+SYM_CODE_END(ret_to_kernel)
+
 /*
  * EL0 mode handlers.
  */
-- 
GitLab


From cbed5f8d3feb5ecc84c998b81db7e004b3fb2135 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:15 +0100
Subject: [PATCH 2352/3804] arm64: entry: move bad_mode() to entry-common.c

In subsequent patches we'll rework the way bad_mode() is called by
exception entry code. In preparation for this, let's move bad_mode()
itself into entry-common.c.

Let's also mark it as noinstr (e.g. to prevent it being kprobed), and
let's also make the `handler` array a local variable, as this is only
use by bad_mode(), and will be removed entirely in a subsequent patch.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-12-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c | 27 +++++++++++++++++++++++++++
 arch/arm64/kernel/traps.c        | 25 -------------------------
 2 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index ae1b6d7c00e1b..74d09fd3dafaf 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -22,6 +22,7 @@
 #include <asm/processor.h>
 #include <asm/stacktrace.h>
 #include <asm/sysreg.h>
+#include <asm/system_misc.h>
 
 /*
  * This is intended to match the logic in irqentry_enter(), handling the kernel
@@ -156,6 +157,32 @@ static void do_interrupt_handler(struct pt_regs *regs,
 extern void (*handle_arch_irq)(struct pt_regs *);
 extern void (*handle_arch_fiq)(struct pt_regs *);
 
+/*
+ * bad_mode handles the impossible case in the exception vector. This is always
+ * fatal.
+ */
+asmlinkage void noinstr bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
+{
+	const char *handler[] = {
+		"Synchronous Abort",
+		"IRQ",
+		"FIQ",
+		"Error"
+	};
+
+	arm64_enter_nmi(regs);
+
+	console_verbose();
+
+	pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
+		handler[reason], smp_processor_id(), esr,
+		esr_get_class_string(esr));
+
+	__show_regs(regs);
+	panic("bad mode");
+}
+
+
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 5fd12d19ef4be..7def18ff02e25 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -45,13 +45,6 @@
 #include <asm/system_misc.h>
 #include <asm/sysreg.h>
 
-static const char *handler[] = {
-	"Synchronous Abort",
-	"IRQ",
-	"FIQ",
-	"Error"
-};
-
 int show_unhandled_signals = 0;
 
 static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
@@ -750,24 +743,6 @@ const char *esr_get_class_string(u32 esr)
 	return esr_class_str[ESR_ELx_EC(esr)];
 }
 
-/*
- * bad_mode handles the impossible case in the exception vector. This is always
- * fatal.
- */
-asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
-{
-	arm64_enter_nmi(regs);
-
-	console_verbose();
-
-	pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
-		handler[reason], smp_processor_id(), esr,
-		esr_get_class_string(esr));
-
-	__show_regs(regs);
-	panic("bad mode");
-}
-
 /*
  * bad_el0_sync handles unexpected, but potentially recoverable synchronous
  * exceptions taken from EL0. Unlike bad_mode, this returns.
-- 
GitLab


From ca0c2647f54c34000b4026c6632268d2dc304c67 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:16 +0100
Subject: [PATCH 2353/3804] arm64: entry: improve bad_mode()

Our use of bad_mode() has a few rough edges:

* AArch64 doesn't use the term "mode", and refers to "Execution
  states", "Exception levels", and "Selected stack pointer".

* We log the exception type (SYNC/IRQ/FIQ/SError), but not the actual
  "mode" (though this can be decoded from the SPSR value).

* We use bad_mode() as a second-level handler for unexpected synchronous
  exceptions, where the "mode" is legitimate, but the specific exception
  is not.

* We dump the ESR value, but call this "code", and so it's not clear to
  all readers that this is the ESR.

... and all of this can be somewhat opaque to those who aren't extremely
familiar with the code.

Let's make this a bit clearer by having bad_mode() log "Unhandled
${TYPE} exception" rather than "Bad mode in ${TYPE} handler", using
"ESR" rather than "code", and having the final panic() log "Unhandled
exception" rather than "Bad mode".

In future we'd like to log the specific architectural vector rather than
just the type of exception, so we also split the core of bad_mode() out
into a helper called __panic_unhandled(), which takes the vector as a
string argument.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-13-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 74d09fd3dafaf..d0f9a63940675 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -157,31 +157,32 @@ static void do_interrupt_handler(struct pt_regs *regs,
 extern void (*handle_arch_irq)(struct pt_regs *);
 extern void (*handle_arch_fiq)(struct pt_regs *);
 
-/*
- * bad_mode handles the impossible case in the exception vector. This is always
- * fatal.
- */
-asmlinkage void noinstr bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
+static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
+				      unsigned int esr)
 {
-	const char *handler[] = {
-		"Synchronous Abort",
-		"IRQ",
-		"FIQ",
-		"Error"
-	};
-
 	arm64_enter_nmi(regs);
 
 	console_verbose();
 
-	pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n",
-		handler[reason], smp_processor_id(), esr,
+	pr_crit("Unhandled %s exception on CPU%d, ESR 0x%08x -- %s\n",
+		vector, smp_processor_id(), esr,
 		esr_get_class_string(esr));
 
 	__show_regs(regs);
-	panic("bad mode");
+	panic("Unhandled exception");
 }
 
+asmlinkage void noinstr bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
+{
+	const char *handler[] = {
+		"Synchronous Abort",
+		"IRQ",
+		"FIQ",
+		"Error"
+	};
+
+	__panic_unhandled(regs, handler[reason], esr);
+}
 
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-- 
GitLab


From a5b43a87a7609d49ed4a453a2b99b6d36ab1e5d0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:17 +0100
Subject: [PATCH 2354/3804] arm64: entry: template the entry asm functions

Now that the majority of the exception triage logic has been converted
to C, the entry assembly functions all have a uniform structure.

Let's generate them all with an assembly macro to reduce the amount of
code and to ensure they all remain in sync if we make changes in future.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-14-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry.S | 113 +++++++++-----------------------------
 1 file changed, 27 insertions(+), 86 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 54986d488983b..b719ac26f7d13 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -604,102 +604,43 @@ SYM_CODE_START_LOCAL(el1_error_invalid)
 	inv_entry 1, BAD_ERROR
 SYM_CODE_END(el1_error_invalid)
 
-/*
- * EL1 mode handlers.
- */
-SYM_CODE_START_LOCAL(el1_sync)
-	kernel_entry 1
-	mov	x0, sp
-	bl	el1_sync_handler
-	b	ret_to_kernel
-SYM_CODE_END(el1_sync)
-
-SYM_CODE_START_LOCAL(el1_irq)
-	kernel_entry 1
-	mov	x0, sp
-	bl	el1_irq_handler
-	b	ret_to_kernel
-SYM_CODE_END(el1_irq)
-
-SYM_CODE_START_LOCAL(el1_fiq)
-	kernel_entry 1
-	mov	x0, sp
-	bl	el1_fiq_handler
-	b	ret_to_kernel
-SYM_CODE_END(el1_fiq)
-
-SYM_CODE_START_LOCAL(el1_error)
-	kernel_entry 1
+	.macro entry_handler el:req, regsize:req, label:req
+SYM_CODE_START_LOCAL(el\el\()_\label)
+	kernel_entry \el, \regsize
 	mov	x0, sp
-	bl	el1_error_handler
+	bl	el\el\()_\label\()_handler
+	.if \el == 0
+	b	ret_to_user
+	.else
 	b	ret_to_kernel
-SYM_CODE_END(el1_error)
-
-SYM_CODE_START_LOCAL(ret_to_kernel)
-	kernel_exit 1
-SYM_CODE_END(ret_to_kernel)
+	.endif
+SYM_CODE_END(el\el\()_\label)
+	.endm
 
 /*
- * EL0 mode handlers.
+ * Early exception handlers
  */
-SYM_CODE_START_LOCAL(el0_sync)
-	kernel_entry 0
-	mov	x0, sp
-	bl	el0_sync_handler
-	b	ret_to_user
-SYM_CODE_END(el0_sync)
+	entry_handler	1, 64, sync
+	entry_handler	1, 64, irq
+	entry_handler	1, 64, fiq
+	entry_handler	1, 64, error
 
-SYM_CODE_START_LOCAL(el0_irq)
-	kernel_entry 0
-	mov	x0, sp
-	bl	el0_irq_handler
-	b	ret_to_user
-SYM_CODE_END(el0_irq)
-
-SYM_CODE_START_LOCAL(el0_fiq)
-	kernel_entry 0
-	mov	x0, sp
-	bl	el0_fiq_handler
-	b	ret_to_user
-SYM_CODE_END(el0_fiq)
-
-SYM_CODE_START_LOCAL(el0_error)
-	kernel_entry 0
-	mov	x0, sp
-	bl	el0_error_handler
-	b	ret_to_user
-SYM_CODE_END(el0_error)
+	entry_handler	0, 64, sync
+	entry_handler	0, 64, irq
+	entry_handler	0, 64, fiq
+	entry_handler	0, 64, error
 
 #ifdef CONFIG_COMPAT
-SYM_CODE_START_LOCAL(el0_sync_compat)
-	kernel_entry 0, 32
-	mov	x0, sp
-	bl	el0_sync_compat_handler
-	b	ret_to_user
-SYM_CODE_END(el0_sync_compat)
-
-SYM_CODE_START_LOCAL(el0_irq_compat)
-	kernel_entry 0, 32
-	mov	x0, sp
-	bl	el0_irq_compat_handler
-	b	ret_to_user
-SYM_CODE_END(el0_irq_compat)
-
-SYM_CODE_START_LOCAL(el0_fiq_compat)
-	kernel_entry 0, 32
-	mov	x0, sp
-	bl	el0_fiq_compat_handler
-	b	ret_to_user
-SYM_CODE_END(el0_fiq_compat)
-
-SYM_CODE_START_LOCAL(el0_error_compat)
-	kernel_entry 0, 32
-	mov	x0, sp
-	bl	el0_error_compat_handler
-	b	ret_to_user
-SYM_CODE_END(el0_error_compat)
+	entry_handler	0, 32, sync_compat
+	entry_handler	0, 32, irq_compat
+	entry_handler	0, 32, fiq_compat
+	entry_handler	0, 32, error_compat
 #endif
 
+SYM_CODE_START_LOCAL(ret_to_kernel)
+	kernel_exit 1
+SYM_CODE_END(ret_to_kernel)
+
 /*
  * "slow" syscall return path.
  */
-- 
GitLab


From ec841aab8d3cdd23decdcf0c47292e14627446c1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:18 +0100
Subject: [PATCH 2355/3804] arm64: entry: handle all vectors with C

We have 16 architectural exception vectors, and depending on kernel
configuration we handle 8 or 12 of these with C code, with the remaining
8 or 4 of these handled as special cases in the entry assembly.

It would be nicer if the entry assembly were uniform for all exceptions,
and we deferred any specific handling of the exceptions to C code. This
way the entry assembly can be more easily templated without ifdeffery or
special cases, and it's easier to modify the handling of these cases in
future (e.g. to dump additional registers other context).

This patch reworks the entry code so that we always have a C handler for
every architectural exception vector, with the entry assembly being
completely uniform. We now have to handle exceptions from EL1t and EL1h,
and also have to handle exceptions from AArch32 even when the kernel is
built without CONFIG_COMPAT. To make this clear and to simplify
templating, we rename the top-level exception handlers with a consistent
naming scheme:

  asm: <el+sp>_<regsize>_<type>
  c:   <el+sp>_<regsize>_<type>_handler

.. where:

  <el+sp> is `el1t`, `el1h`, or `el0t`
  <regsize> is `64` or `32`
  <type> is `sync`, `irq`, `fiq`, or `error`

... e.g.

  asm: el1h_64_sync
  c:   el1h_64_sync_handler

... with lower-level handlers simply using "el1" and "compat" as today.

For unexpected exceptions, this information is passed to
__panic_unhandled(), so it can report the specific vector an unexpected
exception was taken from, e.g.

| Unhandled 64-bit el1t sync exception

For vectors we never expect to enter legitimately, the C code is
generated using a macro to avoid code duplication. The exceptions are
handled via __panic_unhandled(), replacing bad_mode() (which is
removed).

The `kernel_ventry` and `entry_handler` assembly macros are updated to
handle the new naming scheme. In theory it should be possible to
generate the entry functions at the same time as the vectors using a
single table, but this will require reworking the linker script to split
the two into separate sections, so for now we have separate tables.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-15-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/exception.h |  32 ++++---
 arch/arm64/kernel/entry-common.c   |  51 +++++-----
 arch/arm64/kernel/entry.S          | 146 +++++++++--------------------
 arch/arm64/kernel/traps.c          |   2 +-
 4 files changed, 93 insertions(+), 138 deletions(-)

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 4284ee57a9a59..ad30a5a1d2bfa 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -31,18 +31,25 @@ static inline u32 disr_to_esr(u64 disr)
 	return esr;
 }
 
-asmlinkage void el1_sync_handler(struct pt_regs *regs);
-asmlinkage void el1_irq_handler(struct pt_regs *regs);
-asmlinkage void el1_fiq_handler(struct pt_regs *regs);
-asmlinkage void el1_error_handler(struct pt_regs *regs);
-asmlinkage void el0_sync_handler(struct pt_regs *regs);
-asmlinkage void el0_irq_handler(struct pt_regs *regs);
-asmlinkage void el0_fiq_handler(struct pt_regs *regs);
-asmlinkage void el0_error_handler(struct pt_regs *regs);
-asmlinkage void el0_sync_compat_handler(struct pt_regs *regs);
-asmlinkage void el0_irq_compat_handler(struct pt_regs *regs);
-asmlinkage void el0_fiq_compat_handler(struct pt_regs *regs);
-asmlinkage void el0_error_compat_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el1h_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el1h_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_64_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_64_error_handler(struct pt_regs *regs);
+
+asmlinkage void el0t_32_sync_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_irq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_fiq_handler(struct pt_regs *regs);
+asmlinkage void el0t_32_error_handler(struct pt_regs *regs);
 
 asmlinkage void call_on_irq_stack(struct pt_regs *regs,
 				  void (*func)(struct pt_regs *));
@@ -53,7 +60,6 @@ void arm64_exit_nmi(struct pt_regs *regs);
 void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
-asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
 void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
 			struct pt_regs *regs);
 void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index d0f9a63940675..dd6403b748f2c 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -172,16 +172,11 @@ static void noinstr __panic_unhandled(struct pt_regs *regs, const char *vector,
 	panic("Unhandled exception");
 }
 
-asmlinkage void noinstr bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
-{
-	const char *handler[] = {
-		"Synchronous Abort",
-		"IRQ",
-		"FIQ",
-		"Error"
-	};
-
-	__panic_unhandled(regs, handler[reason], esr);
+#define UNHANDLED(el, regsize, vector)							\
+asmlinkage void noinstr el##_##regsize##_##vector##_handler(struct pt_regs *regs)	\
+{											\
+	const char *desc = #regsize "-bit " #el " " #vector;				\
+	__panic_unhandled(regs, desc, read_sysreg(esr_el1));				\
 }
 
 #ifdef CONFIG_ARM64_ERRATUM_1463225
@@ -233,6 +228,11 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 }
 #endif /* CONFIG_ARM64_ERRATUM_1463225 */
 
+UNHANDLED(el1t, 64, sync)
+UNHANDLED(el1t, 64, irq)
+UNHANDLED(el1t, 64, fiq)
+UNHANDLED(el1t, 64, error)
+
 static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr)
 {
 	unsigned long far = read_sysreg(far_el1);
@@ -268,7 +268,7 @@ static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
 {
 	enter_from_kernel_mode(regs);
 	local_daif_inherit(regs);
-	bad_mode(regs, 0, esr);
+	__panic_unhandled(regs, "64-bit el1h sync", esr);
 	local_daif_mask();
 	exit_to_kernel_mode(regs);
 }
@@ -316,7 +316,7 @@ static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr)
 	exit_to_kernel_mode(regs);
 }
 
-asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
@@ -370,17 +370,17 @@ static void noinstr el1_interrupt(struct pt_regs *regs,
 	exit_el1_irq_or_nmi(regs);
 }
 
-asmlinkage void noinstr el1_irq_handler(struct pt_regs *regs)
+asmlinkage void noinstr el1h_64_irq_handler(struct pt_regs *regs)
 {
 	el1_interrupt(regs, handle_arch_irq);
 }
 
-asmlinkage void noinstr el1_fiq_handler(struct pt_regs *regs)
+asmlinkage void noinstr el1h_64_fiq_handler(struct pt_regs *regs)
 {
 	el1_interrupt(regs, handle_arch_fiq);
 }
 
-asmlinkage void noinstr el1_error_handler(struct pt_regs *regs)
+asmlinkage void noinstr el1h_64_error_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
@@ -526,7 +526,7 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr)
 	do_ptrauth_fault(regs, esr);
 }
 
-asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
@@ -597,7 +597,7 @@ static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
 	el0_interrupt(regs, handle_arch_irq);
 }
 
-asmlinkage void noinstr el0_irq_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs)
 {
 	__el0_irq_handler_common(regs);
 }
@@ -607,7 +607,7 @@ static void noinstr __el0_fiq_handler_common(struct pt_regs *regs)
 	el0_interrupt(regs, handle_arch_fiq);
 }
 
-asmlinkage void noinstr el0_fiq_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_64_fiq_handler(struct pt_regs *regs)
 {
 	__el0_fiq_handler_common(regs);
 }
@@ -624,7 +624,7 @@ static void __el0_error_handler_common(struct pt_regs *regs)
 	local_daif_restore(DAIF_PROCCTX);
 }
 
-asmlinkage void noinstr el0_error_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_64_error_handler(struct pt_regs *regs)
 {
 	__el0_error_handler_common(regs);
 }
@@ -644,7 +644,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
 	do_el0_svc_compat(regs);
 }
 
-asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_32_sync_handler(struct pt_regs *regs)
 {
 	unsigned long esr = read_sysreg(esr_el1);
 
@@ -688,18 +688,23 @@ asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs)
 	}
 }
 
-asmlinkage void noinstr el0_irq_compat_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_32_irq_handler(struct pt_regs *regs)
 {
 	__el0_irq_handler_common(regs);
 }
 
-asmlinkage void noinstr el0_fiq_compat_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_32_fiq_handler(struct pt_regs *regs)
 {
 	__el0_fiq_handler_common(regs);
 }
 
-asmlinkage void noinstr el0_error_compat_handler(struct pt_regs *regs)
+asmlinkage void noinstr el0t_32_error_handler(struct pt_regs *regs)
 {
 	__el0_error_handler_common(regs);
 }
+#else /* CONFIG_COMPAT */
+UNHANDLED(el0t, 32, sync)
+UNHANDLED(el0t, 32, irq)
+UNHANDLED(el0t, 32, fiq)
+UNHANDLED(el0t, 32, error)
 #endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b719ac26f7d13..d43a12dfd189e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -45,16 +45,7 @@
 	.endr
 	.endm
 
-/*
- * Bad Abort numbers
- *-----------------
- */
-#define BAD_SYNC	0
-#define BAD_IRQ		1
-#define BAD_FIQ		2
-#define BAD_ERROR	3
-
-	.macro kernel_ventry, el:req, regsize:req, label:req
+	.macro kernel_ventry, el:req, ht:req, regsize:req, label:req
 	.align 7
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	.if	\el == 0
@@ -81,7 +72,7 @@ alternative_else_nop_endif
 	tbnz	x0, #THREAD_SHIFT, 0f
 	sub	x0, sp, x0			// x0'' = sp' - x0' = (sp + x0) - sp = x0
 	sub	sp, sp, x0			// sp'' = sp' - x0 = (sp + x0) - x0 = sp
-	b	el\()\el\()_\label
+	b	el\el\ht\()_\regsize\()_\label
 
 0:
 	/*
@@ -113,7 +104,7 @@ alternative_else_nop_endif
 	sub	sp, sp, x0
 	mrs	x0, tpidrro_el0
 #endif
-	b	el\()\el\()_\label
+	b	el\el\ht\()_\regsize\()_\label
 	.endm
 
 	.macro tramp_alias, dst, sym
@@ -504,32 +495,25 @@ tsk	.req	x28		// current thread_info
 
 	.align	11
 SYM_CODE_START(vectors)
-	kernel_ventry	1, 64, sync_invalid		// Synchronous EL1t
-	kernel_ventry	1, 64, irq_invalid		// IRQ EL1t
-	kernel_ventry	1, 64, fiq_invalid		// FIQ EL1t
-	kernel_ventry	1, 64, error_invalid		// Error EL1t
-
-	kernel_ventry	1, 64, sync			// Synchronous EL1h
-	kernel_ventry	1, 64, irq			// IRQ EL1h
-	kernel_ventry	1, 64, fiq			// FIQ EL1h
-	kernel_ventry	1, 64, error			// Error EL1h
-
-	kernel_ventry	0, 64, sync			// Synchronous 64-bit EL0
-	kernel_ventry	0, 64, irq			// IRQ 64-bit EL0
-	kernel_ventry	0, 64, fiq			// FIQ 64-bit EL0
-	kernel_ventry	0, 64, error			// Error 64-bit EL0
-
-#ifdef CONFIG_COMPAT
-	kernel_ventry	0, 32, sync_compat		// Synchronous 32-bit EL0
-	kernel_ventry	0, 32, irq_compat		// IRQ 32-bit EL0
-	kernel_ventry	0, 32, fiq_compat		// FIQ 32-bit EL0
-	kernel_ventry	0, 32, error_compat		// Error 32-bit EL0
-#else
-	kernel_ventry	0, 32, sync_invalid		// Synchronous 32-bit EL0
-	kernel_ventry	0, 32, irq_invalid		// IRQ 32-bit EL0
-	kernel_ventry	0, 32, fiq_invalid		// FIQ 32-bit EL0
-	kernel_ventry	0, 32, error_invalid		// Error 32-bit EL0
-#endif
+	kernel_ventry	1, t, 64, sync		// Synchronous EL1t
+	kernel_ventry	1, t, 64, irq		// IRQ EL1t
+	kernel_ventry	1, t, 64, fiq		// FIQ EL1h
+	kernel_ventry	1, t, 64, error		// Error EL1t
+
+	kernel_ventry	1, h, 64, sync		// Synchronous EL1h
+	kernel_ventry	1, h, 64, irq		// IRQ EL1h
+	kernel_ventry	1, h, 64, fiq		// FIQ EL1h
+	kernel_ventry	1, h, 64, error		// Error EL1h
+
+	kernel_ventry	0, t, 64, sync		// Synchronous 64-bit EL0
+	kernel_ventry	0, t, 64, irq		// IRQ 64-bit EL0
+	kernel_ventry	0, t, 64, fiq		// FIQ 64-bit EL0
+	kernel_ventry	0, t, 64, error		// Error 64-bit EL0
+
+	kernel_ventry	0, t, 32, sync		// Synchronous 32-bit EL0
+	kernel_ventry	0, t, 32, irq		// IRQ 32-bit EL0
+	kernel_ventry	0, t, 32, fiq		// FIQ 32-bit EL0
+	kernel_ventry	0, t, 32, error		// Error 32-bit EL0
 SYM_CODE_END(vectors)
 
 #ifdef CONFIG_VMAP_STACK
@@ -560,82 +544,42 @@ __bad_stack:
 	ASM_BUG()
 #endif /* CONFIG_VMAP_STACK */
 
-/*
- * Invalid mode handlers
- */
-	.macro	inv_entry, el, reason, regsize = 64
-	kernel_entry \el, \regsize
-	mov	x0, sp
-	mov	x1, #\reason
-	mrs	x2, esr_el1
-	bl	bad_mode
-	ASM_BUG()
-	.endm
-
-SYM_CODE_START_LOCAL(el0_sync_invalid)
-	inv_entry 0, BAD_SYNC
-SYM_CODE_END(el0_sync_invalid)
-
-SYM_CODE_START_LOCAL(el0_irq_invalid)
-	inv_entry 0, BAD_IRQ
-SYM_CODE_END(el0_irq_invalid)
-
-SYM_CODE_START_LOCAL(el0_fiq_invalid)
-	inv_entry 0, BAD_FIQ
-SYM_CODE_END(el0_fiq_invalid)
-
-SYM_CODE_START_LOCAL(el0_error_invalid)
-	inv_entry 0, BAD_ERROR
-SYM_CODE_END(el0_error_invalid)
 
-SYM_CODE_START_LOCAL(el1_sync_invalid)
-	inv_entry 1, BAD_SYNC
-SYM_CODE_END(el1_sync_invalid)
-
-SYM_CODE_START_LOCAL(el1_irq_invalid)
-	inv_entry 1, BAD_IRQ
-SYM_CODE_END(el1_irq_invalid)
-
-SYM_CODE_START_LOCAL(el1_fiq_invalid)
-	inv_entry 1, BAD_FIQ
-SYM_CODE_END(el1_fiq_invalid)
-
-SYM_CODE_START_LOCAL(el1_error_invalid)
-	inv_entry 1, BAD_ERROR
-SYM_CODE_END(el1_error_invalid)
-
-	.macro entry_handler el:req, regsize:req, label:req
-SYM_CODE_START_LOCAL(el\el\()_\label)
+	.macro entry_handler el:req, ht:req, regsize:req, label:req
+SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
 	kernel_entry \el, \regsize
 	mov	x0, sp
-	bl	el\el\()_\label\()_handler
+	bl	el\el\ht\()_\regsize\()_\label\()_handler
 	.if \el == 0
 	b	ret_to_user
 	.else
 	b	ret_to_kernel
 	.endif
-SYM_CODE_END(el\el\()_\label)
+SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
 	.endm
 
 /*
  * Early exception handlers
  */
-	entry_handler	1, 64, sync
-	entry_handler	1, 64, irq
-	entry_handler	1, 64, fiq
-	entry_handler	1, 64, error
-
-	entry_handler	0, 64, sync
-	entry_handler	0, 64, irq
-	entry_handler	0, 64, fiq
-	entry_handler	0, 64, error
-
-#ifdef CONFIG_COMPAT
-	entry_handler	0, 32, sync_compat
-	entry_handler	0, 32, irq_compat
-	entry_handler	0, 32, fiq_compat
-	entry_handler	0, 32, error_compat
-#endif
+	entry_handler	1, t, 64, sync
+	entry_handler	1, t, 64, irq
+	entry_handler	1, t, 64, fiq
+	entry_handler	1, t, 64, error
+
+	entry_handler	1, h, 64, sync
+	entry_handler	1, h, 64, irq
+	entry_handler	1, h, 64, fiq
+	entry_handler	1, h, 64, error
+
+	entry_handler	0, t, 64, sync
+	entry_handler	0, t, 64, irq
+	entry_handler	0, t, 64, fiq
+	entry_handler	0, t, 64, error
+
+	entry_handler	0, t, 32, sync
+	entry_handler	0, t, 32, irq
+	entry_handler	0, t, 32, fiq
+	entry_handler	0, t, 32, error
 
 SYM_CODE_START_LOCAL(ret_to_kernel)
 	kernel_exit 1
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 7def18ff02e25..47d423f7ac81f 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -745,7 +745,7 @@ const char *esr_get_class_string(u32 esr)
 
 /*
  * bad_el0_sync handles unexpected, but potentially recoverable synchronous
- * exceptions taken from EL0. Unlike bad_mode, this returns.
+ * exceptions taken from EL0.
  */
 void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 {
-- 
GitLab


From afd05e28c9115d01f01d934962634789d069d3fe Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:19 +0100
Subject: [PATCH 2356/3804] arm64: entry: fold el1_inv() into
 el1h_64_sync_handler()

An unexpected synchronous exception from EL1h could happen at any time,
and for robustness we should treat this as an NMI, making minimal
assumptions about the context the exception was taken from.

Currently el1_inv() assumes we can use enter_from_kernel_mode(), and
also assumes that we should inherit the original DAIF value. Neither of
these are desireable when we take an unexpected exception. Further,
after el1_inv() calls __panic_unhandled(), the remainder of the function
is unreachable, and therefore superfluous.

Let's address this and simplify things by having el1h_64_sync_handler()
call __panic_unhandled() directly, without any of the redundant logic.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reported-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-16-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry-common.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index dd6403b748f2c..ce5c8af91d311 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -264,15 +264,6 @@ static void noinstr el1_undef(struct pt_regs *regs)
 	exit_to_kernel_mode(regs);
 }
 
-static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr)
-{
-	enter_from_kernel_mode(regs);
-	local_daif_inherit(regs);
-	__panic_unhandled(regs, "64-bit el1h sync", esr);
-	local_daif_mask();
-	exit_to_kernel_mode(regs);
-}
-
 static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
 {
 	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
@@ -346,7 +337,7 @@ asmlinkage void noinstr el1h_64_sync_handler(struct pt_regs *regs)
 		el1_fpac(regs, esr);
 		break;
 	default:
-		el1_inv(regs, esr);
+		__panic_unhandled(regs, "64-bit el1h sync", esr);
 	}
 }
 
-- 
GitLab


From 8168f098867f6584295ea408c683f61e945c6ff1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:20 +0100
Subject: [PATCH 2357/3804] arm64: entry: split bad stack entry

We'd like to keep all the entry sequencing in entry-common.c, as this
will allow us to ensure this is consistent, and free from any unsound
instrumentation.

Currently handle_bad_stack() performs the NMI entry sequence in traps.c.
Let's split the low-level entry sequence from the reporting, moving the
former to entry-common.c and keeping the latter in traps.c. To make it
clear that reporting function never returns, it is renamed to
panic_bad_stack().

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-17-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/exception.h |  4 ++++
 arch/arm64/kernel/entry-common.c   | 11 +++++++++++
 arch/arm64/kernel/traps.c          |  6 +-----
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index ad30a5a1d2bfa..0113b9242b67e 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -31,6 +31,8 @@ static inline u32 disr_to_esr(u64 disr)
 	return esr;
 }
 
+asmlinkage void handle_bad_stack(struct pt_regs *regs);
+
 asmlinkage void el1t_64_sync_handler(struct pt_regs *regs);
 asmlinkage void el1t_64_irq_handler(struct pt_regs *regs);
 asmlinkage void el1t_64_fiq_handler(struct pt_regs *regs);
@@ -73,4 +75,6 @@ void do_el0_svc(struct pt_regs *regs);
 void do_el0_svc_compat(struct pt_regs *regs);
 void do_ptrauth_fault(struct pt_regs *regs, unsigned int esr);
 void do_serror(struct pt_regs *regs, unsigned int esr);
+
+void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far);
 #endif	/* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index ce5c8af91d311..efe95edf10c0c 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -699,3 +699,14 @@ UNHANDLED(el0t, 32, irq)
 UNHANDLED(el0t, 32, fiq)
 UNHANDLED(el0t, 32, error)
 #endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_VMAP_STACK
+asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
+{
+	unsigned int esr = read_sysreg(esr_el1);
+	unsigned long far = read_sysreg(far_el1);
+
+	arm64_enter_nmi(regs);
+	panic_bad_stack(regs, esr, far);
+}
+#endif /* CONFIG_VMAP_STACK */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 47d423f7ac81f..af941996eb5f8 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -763,15 +763,11 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
 DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)
 	__aligned(16);
 
-asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
+void panic_bad_stack(struct pt_regs *regs, unsigned int esr, unsigned long far)
 {
 	unsigned long tsk_stk = (unsigned long)current->stack;
 	unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
 	unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
-	unsigned int esr = read_sysreg(esr_el1);
-	unsigned long far = read_sysreg(far_el1);
-
-	arm64_enter_nmi(regs);
 
 	console_verbose();
 	pr_emerg("Insufficient stack space to handle exception!");
-- 
GitLab


From d60b228fd19985a903b8e8c599be0538a875d505 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:21 +0100
Subject: [PATCH 2358/3804] arm64: entry: split SDEI entry

We'd like to keep all the entry sequencing in entry-common.c, as this
will allow us to ensure this is consistent, and free from any unsound
instrumentation.

Currently __sdei_handler() performs the NMI entry/exit sequences in
sdei.c. Let's split the low-level entry sequence from the event
handling, moving the former to entry-common.c and keeping the latter in
sdei.c. The event handling function is renamed to do_sdei_event(),
matching the do_${FOO}() pattern used for other exception handlers.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-18-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/sdei.h    |  3 ++
 arch/arm64/kernel/entry-common.c | 37 ++++++++++++++++++++++++
 arch/arm64/kernel/sdei.c         | 48 ++------------------------------
 3 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index 63e0b92a5fbb0..03d619a49d4a4 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -37,6 +37,9 @@ struct sdei_registered_event;
 asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
 					struct sdei_registered_event *arg);
 
+unsigned long do_sdei_event(struct pt_regs *regs,
+			    struct sdei_registered_event *arg);
+
 unsigned long sdei_arch_get_entry_point(int conduit);
 #define sdei_arch_get_entry_point(x)	sdei_arch_get_entry_point(x)
 
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index efe95edf10c0c..1b32ca3848f53 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -20,6 +20,7 @@
 #include <asm/kprobes.h>
 #include <asm/mmu.h>
 #include <asm/processor.h>
+#include <asm/sdei.h>
 #include <asm/stacktrace.h>
 #include <asm/sysreg.h>
 #include <asm/system_misc.h>
@@ -710,3 +711,39 @@ asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs)
 	panic_bad_stack(regs, esr, far);
 }
 #endif /* CONFIG_VMAP_STACK */
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+asmlinkage noinstr unsigned long
+__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
+{
+	unsigned long ret;
+
+	/*
+	 * We didn't take an exception to get here, so the HW hasn't
+	 * set/cleared bits in PSTATE that we may rely on.
+	 *
+	 * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
+	 * whether PSTATE bits are inherited unchanged or generated from
+	 * scratch, and the TF-A implementation always clears PAN and always
+	 * clears UAO. There are no other known implementations.
+	 *
+	 * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
+	 * PSTATE is modified upon architectural exceptions, and so PAN is
+	 * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
+	 * cleared.
+	 *
+	 * We must explicitly reset PAN to the expected state, including
+	 * clearing it when the host isn't using it, in case a VM had it set.
+	 */
+	if (system_uses_hw_pan())
+		set_pstate_pan(1);
+	else if (cpu_has_pan())
+		set_pstate_pan(0);
+
+	arm64_enter_nmi(regs);
+	ret = do_sdei_event(regs, arg);
+	arm64_exit_nmi(regs);
+
+	return ret;
+}
+#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index 2c7ca449dd511..e729539927433 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -231,13 +231,13 @@ out_err:
 }
 
 /*
- * __sdei_handler() returns one of:
+ * do_sdei_event() returns one of:
  *  SDEI_EV_HANDLED -  success, return to the interrupted context.
  *  SDEI_EV_FAILED  -  failure, return this error code to firmare.
  *  virtual-address -  success, return to this address.
  */
-static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
-					     struct sdei_registered_event *arg)
+unsigned long __kprobes do_sdei_event(struct pt_regs *regs,
+				      struct sdei_registered_event *arg)
 {
 	u32 mode;
 	int i, err = 0;
@@ -292,45 +292,3 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
 
 	return vbar + 0x480;
 }
-
-static void __kprobes notrace __sdei_pstate_entry(void)
-{
-	/*
-	 * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to
-	 * whether PSTATE bits are inherited unchanged or generated from
-	 * scratch, and the TF-A implementation always clears PAN and always
-	 * clears UAO. There are no other known implementations.
-	 *
-	 * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how
-	 * PSTATE is modified upon architectural exceptions, and so PAN is
-	 * either inherited or set per SCTLR_ELx.SPAN, and UAO is always
-	 * cleared.
-	 *
-	 * We must explicitly reset PAN to the expected state, including
-	 * clearing it when the host isn't using it, in case a VM had it set.
-	 */
-	if (system_uses_hw_pan())
-		set_pstate_pan(1);
-	else if (cpu_has_pan())
-		set_pstate_pan(0);
-}
-
-asmlinkage noinstr unsigned long
-__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
-{
-	unsigned long ret;
-
-	/*
-	 * We didn't take an exception to get here, so the HW hasn't
-	 * set/cleared bits in PSTATE that we may rely on. Initialize PAN.
-	 */
-	__sdei_pstate_entry();
-
-	arm64_enter_nmi(regs);
-
-	ret = _sdei_handler(regs, arg);
-
-	arm64_exit_nmi(regs);
-
-	return ret;
-}
-- 
GitLab


From 6ecbc78c3d06a3e7a4676f348a52f1c533d88464 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:22 +0100
Subject: [PATCH 2359/3804] arm64: entry: make NMI entry/exit functions static

Now that we only call arm64_enter_nmi() and arm64_exit_nmi() from within
entry-common.c, let's make these static to ensure this remains the case.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Joey Gouly <joey.gouly@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-19-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/exception.h | 2 --
 arch/arm64/kernel/entry-common.c   | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 0113b9242b67e..4afbc45b8bb0e 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -57,8 +57,6 @@ asmlinkage void call_on_irq_stack(struct pt_regs *regs,
 				  void (*func)(struct pt_regs *));
 asmlinkage void enter_from_user_mode(void);
 asmlinkage void exit_to_user_mode(void);
-void arm64_enter_nmi(struct pt_regs *regs);
-void arm64_exit_nmi(struct pt_regs *regs);
 void do_mem_abort(unsigned long far, unsigned int esr, struct pt_regs *regs);
 void do_undefinstr(struct pt_regs *regs);
 void do_bti(struct pt_regs *regs);
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
index 1b32ca3848f53..12ce14a98b7c4 100644
--- a/arch/arm64/kernel/entry-common.c
+++ b/arch/arm64/kernel/entry-common.c
@@ -75,7 +75,7 @@ static void noinstr exit_to_kernel_mode(struct pt_regs *regs)
 	}
 }
 
-void noinstr arm64_enter_nmi(struct pt_regs *regs)
+static void noinstr arm64_enter_nmi(struct pt_regs *regs)
 {
 	regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
 
@@ -88,7 +88,7 @@ void noinstr arm64_enter_nmi(struct pt_regs *regs)
 	ftrace_nmi_enter();
 }
 
-void noinstr arm64_exit_nmi(struct pt_regs *regs)
+static void noinstr arm64_exit_nmi(struct pt_regs *regs)
 {
 	bool restore = regs->lockdep_hardirqs;
 
-- 
GitLab


From bf6fa2c0dda751863c3446aa64d733013bec4a19 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:23 +0100
Subject: [PATCH 2360/3804] arm64: entry: don't instrument entry code with KCOV

The code in entry-common.c runs at exception entry and return
boundaries, where portions of the kernel environment aren't available.
For example, RCU may not be watching, and lockdep state may be
out-of-sync with the hardware. Due to this, it is not sound to
instrument this code.

We generally avoid instrumentation by marking the entry functions as
`noinstr`, but currently this doesn't inhibit KCOV instrumentation.
Prevent this by disabling KCOV for the entire compilation unit.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-20-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/Makefile | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 6cc97730790e7..787c3c83edd7a 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_syscall.o	 = -fstack-protector -fstack-protector-strong
 CFLAGS_syscall.o	+= -fno-stack-protector
 
+# It's not safe to invoke KCOV when portions of the kernel environment aren't
+# available or are out-of-sync with HW state. Since `noinstr` doesn't always
+# inhibit KCOV instrumentation, disable it for the entire compilation unit.
+KCOV_INSTRUMENT_entry.o := n
+
 # Object file lists.
 obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   entry-common.o entry-fpsimd.o process.o ptrace.o	\
-- 
GitLab


From b5df5b8307b1db6d168ffac29eff3974779bb34b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 7 Jun 2021 10:46:24 +0100
Subject: [PATCH 2361/3804] arm64: idle: don't instrument idle code with KCOV

The low-level idle code in arch_cpu_idle() and its callees runs at a
time where where portions of the kernel environment aren't available.
For example, RCU may not be watching, and lockdep state may be
out-of-sync with the hardware. Due to this, it is not sound to
instrument this code.

We generally avoid instrumentation by marking the entry functions as
`noinstr`, but currently this doesn't inhibit KCOV instrumentation.
Prevent this by factoring these functions into a new idle.c so that we
can disable KCOV for the entire compilation unit, as is done for the
core idle code in kernel/sched/idle.c.

We'd like to keep instrumentation of the rest of process.c, and for the
existing code in cpuidle.c, so a new compilation unit is preferable. The
arch_cpu_idle_dead() function in process.c is a cpu hotplug function
that is safe to instrument, so it is left as-is in process.c.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210607094624.34689-21-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/Makefile  |  3 +-
 arch/arm64/kernel/idle.c    | 69 +++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/process.c | 57 ------------------------------
 3 files changed, 71 insertions(+), 58 deletions(-)
 create mode 100644 arch/arm64/kernel/idle.c

diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 787c3c83edd7a..de434204d7234 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -18,6 +18,7 @@ CFLAGS_syscall.o	+= -fno-stack-protector
 # available or are out-of-sync with HW state. Since `noinstr` doesn't always
 # inhibit KCOV instrumentation, disable it for the entire compilation unit.
 KCOV_INSTRUMENT_entry.o := n
+KCOV_INSTRUMENT_idle.o := n
 
 # Object file lists.
 obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
@@ -27,7 +28,7 @@ obj-y			:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   return_address.o cpuinfo.o cpu_errata.o		\
 			   cpufeature.o alternative.o cacheinfo.o		\
 			   smp.o smp_spin_table.o topology.o smccc-call.o	\
-			   syscall.o proton-pack.o idreg-override.o
+			   syscall.o proton-pack.o idreg-override.o idle.o
 
 targets			+= efi-entry.o
 
diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c
new file mode 100644
index 0000000000000..45c79204dc40c
--- /dev/null
+++ b/arch/arm64/kernel/idle.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Low-level idle sequences
+ */
+
+#include <linux/cpu.h>
+#include <linux/irqflags.h>
+
+#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+static void noinstr __cpu_do_idle(void)
+{
+	dsb(sy);
+	wfi();
+}
+
+static void noinstr __cpu_do_idle_irqprio(void)
+{
+	unsigned long pmr;
+	unsigned long daif_bits;
+
+	daif_bits = read_sysreg(daif);
+	write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif);
+
+	/*
+	 * Unmask PMR before going idle to make sure interrupts can
+	 * be raised.
+	 */
+	pmr = gic_read_pmr();
+	gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+	__cpu_do_idle();
+
+	gic_write_pmr(pmr);
+	write_sysreg(daif_bits, daif);
+}
+
+/*
+ *	cpu_do_idle()
+ *
+ *	Idle the processor (wait for interrupt).
+ *
+ *	If the CPU supports priority masking we must do additional work to
+ *	ensure that interrupts are not masked at the PMR (because the core will
+ *	not wake up if we block the wake up signal in the interrupt controller).
+ */
+void noinstr cpu_do_idle(void)
+{
+	if (system_uses_irq_prio_masking())
+		__cpu_do_idle_irqprio();
+	else
+		__cpu_do_idle();
+}
+
+/*
+ * This is our default idle handler.
+ */
+void noinstr arch_cpu_idle(void)
+{
+	/*
+	 * This should do all the clock switching and wait for interrupt
+	 * tricks
+	 */
+	cpu_do_idle();
+	raw_local_irq_enable();
+}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 2e73377091552..72c5d80f03fa8 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -73,63 +73,6 @@ EXPORT_SYMBOL_GPL(pm_power_off);
 
 void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 
-static void noinstr __cpu_do_idle(void)
-{
-	dsb(sy);
-	wfi();
-}
-
-static void noinstr __cpu_do_idle_irqprio(void)
-{
-	unsigned long pmr;
-	unsigned long daif_bits;
-
-	daif_bits = read_sysreg(daif);
-	write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif);
-
-	/*
-	 * Unmask PMR before going idle to make sure interrupts can
-	 * be raised.
-	 */
-	pmr = gic_read_pmr();
-	gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
-	__cpu_do_idle();
-
-	gic_write_pmr(pmr);
-	write_sysreg(daif_bits, daif);
-}
-
-/*
- *	cpu_do_idle()
- *
- *	Idle the processor (wait for interrupt).
- *
- *	If the CPU supports priority masking we must do additional work to
- *	ensure that interrupts are not masked at the PMR (because the core will
- *	not wake up if we block the wake up signal in the interrupt controller).
- */
-void noinstr cpu_do_idle(void)
-{
-	if (system_uses_irq_prio_masking())
-		__cpu_do_idle_irqprio();
-	else
-		__cpu_do_idle();
-}
-
-/*
- * This is our default idle handler.
- */
-void noinstr arch_cpu_idle(void)
-{
-	/*
-	 * This should do all the clock switching and wait for interrupt
-	 * tricks
-	 */
-	cpu_do_idle();
-	raw_local_irq_enable();
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 void arch_cpu_idle_dead(void)
 {
-- 
GitLab


From 9bf3797796f570b34438235a6a537df85832bdad Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Mon, 7 Jun 2021 10:58:36 +0200
Subject: [PATCH 2362/3804] drm/sun4i: dw-hdmi: Make HDMI PHY into a platform
 device

On sunxi boards that use HDMI output, HDMI device probe keeps being
avoided indefinitely with these repeated messages in dmesg:

  platform 1ee0000.hdmi: probe deferral - supplier 1ef0000.hdmi-phy
    not ready

There's a fwnode_link being created with fw_devlink=on between hdmi
and hdmi-phy nodes, because both nodes have 'compatible' property set.

Fw_devlink code assumes that nodes that have compatible property
set will also have a device associated with them by some driver
eventually. This is not the case with the current sun8i-hdmi
driver.

This commit makes sun8i-hdmi-phy into a proper platform device
and fixes the display pipeline probe on sunxi boards that use HDMI.

More context: https://lkml.org/lkml/2021/5/16/203

Signed-off-by: Saravana Kannan <saravanak@google.com>
Signed-off-by: Ondrej Jirman <megous@megous.com>
Tested-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20210607085836.2827429-1-megous@megous.com
---
 drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c  | 31 ++++++++++++++++---
 drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h  |  5 ++--
 drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c | 41 ++++++++++++++++++++++----
 3 files changed, 66 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
index bbdfd5e26ec88..f75fb157f2ff7 100644
--- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
+++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c
@@ -209,7 +209,7 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master,
 		goto err_disable_clk_tmds;
 	}
 
-	ret = sun8i_hdmi_phy_probe(hdmi, phy_node);
+	ret = sun8i_hdmi_phy_get(hdmi, phy_node);
 	of_node_put(phy_node);
 	if (ret) {
 		dev_err(dev, "Couldn't get the HDMI PHY\n");
@@ -242,7 +242,6 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master,
 
 cleanup_encoder:
 	drm_encoder_cleanup(encoder);
-	sun8i_hdmi_phy_remove(hdmi);
 err_disable_clk_tmds:
 	clk_disable_unprepare(hdmi->clk_tmds);
 err_assert_ctrl_reset:
@@ -263,7 +262,6 @@ static void sun8i_dw_hdmi_unbind(struct device *dev, struct device *master,
 	struct sun8i_dw_hdmi *hdmi = dev_get_drvdata(dev);
 
 	dw_hdmi_unbind(hdmi->hdmi);
-	sun8i_hdmi_phy_remove(hdmi);
 	clk_disable_unprepare(hdmi->clk_tmds);
 	reset_control_assert(hdmi->rst_ctrl);
 	gpiod_set_value(hdmi->ddc_en, 0);
@@ -320,7 +318,32 @@ static struct platform_driver sun8i_dw_hdmi_pltfm_driver = {
 		.of_match_table = sun8i_dw_hdmi_dt_ids,
 	},
 };
-module_platform_driver(sun8i_dw_hdmi_pltfm_driver);
+
+static int __init sun8i_dw_hdmi_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&sun8i_dw_hdmi_pltfm_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&sun8i_hdmi_phy_driver);
+	if (ret) {
+		platform_driver_unregister(&sun8i_dw_hdmi_pltfm_driver);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit sun8i_dw_hdmi_exit(void)
+{
+	platform_driver_unregister(&sun8i_dw_hdmi_pltfm_driver);
+	platform_driver_unregister(&sun8i_hdmi_phy_driver);
+}
+
+module_init(sun8i_dw_hdmi_init);
+module_exit(sun8i_dw_hdmi_exit);
 
 MODULE_AUTHOR("Jernej Skrabec <jernej.skrabec@siol.net>");
 MODULE_DESCRIPTION("Allwinner DW HDMI bridge");
diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h
index d4b55af0592f8..74f6ed0e25709 100644
--- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h
+++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h
@@ -195,14 +195,15 @@ struct sun8i_dw_hdmi {
 	struct gpio_desc		*ddc_en;
 };
 
+extern struct platform_driver sun8i_hdmi_phy_driver;
+
 static inline struct sun8i_dw_hdmi *
 encoder_to_sun8i_dw_hdmi(struct drm_encoder *encoder)
 {
 	return container_of(encoder, struct sun8i_dw_hdmi, encoder);
 }
 
-int sun8i_hdmi_phy_probe(struct sun8i_dw_hdmi *hdmi, struct device_node *node);
-void sun8i_hdmi_phy_remove(struct sun8i_dw_hdmi *hdmi);
+int sun8i_hdmi_phy_get(struct sun8i_dw_hdmi *hdmi, struct device_node *node);
 
 void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy);
 void sun8i_hdmi_phy_set_ops(struct sun8i_hdmi_phy *phy,
diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
index 9994edf675096..c9239708d398c 100644
--- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
+++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c
@@ -5,6 +5,7 @@
 
 #include <linux/delay.h>
 #include <linux/of_address.h>
+#include <linux/of_platform.h>
 
 #include "sun8i_dw_hdmi.h"
 
@@ -597,10 +598,30 @@ static const struct of_device_id sun8i_hdmi_phy_of_table[] = {
 	{ /* sentinel */ }
 };
 
-int sun8i_hdmi_phy_probe(struct sun8i_dw_hdmi *hdmi, struct device_node *node)
+int sun8i_hdmi_phy_get(struct sun8i_dw_hdmi *hdmi, struct device_node *node)
+{
+	struct platform_device *pdev = of_find_device_by_node(node);
+	struct sun8i_hdmi_phy *phy;
+
+	if (!pdev)
+		return -EPROBE_DEFER;
+
+	phy = platform_get_drvdata(pdev);
+	if (!phy)
+		return -EPROBE_DEFER;
+
+	hdmi->phy = phy;
+
+	put_device(&pdev->dev);
+
+	return 0;
+}
+
+static int sun8i_hdmi_phy_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
-	struct device *dev = hdmi->dev;
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
 	struct sun8i_hdmi_phy *phy;
 	struct resource res;
 	void __iomem *regs;
@@ -704,7 +725,7 @@ int sun8i_hdmi_phy_probe(struct sun8i_dw_hdmi *hdmi, struct device_node *node)
 		clk_prepare_enable(phy->clk_phy);
 	}
 
-	hdmi->phy = phy;
+	platform_set_drvdata(pdev, phy);
 
 	return 0;
 
@@ -728,9 +749,9 @@ err_put_clk_bus:
 	return ret;
 }
 
-void sun8i_hdmi_phy_remove(struct sun8i_dw_hdmi *hdmi)
+static int sun8i_hdmi_phy_remove(struct platform_device *pdev)
 {
-	struct sun8i_hdmi_phy *phy = hdmi->phy;
+	struct sun8i_hdmi_phy *phy = platform_get_drvdata(pdev);
 
 	clk_disable_unprepare(phy->clk_mod);
 	clk_disable_unprepare(phy->clk_bus);
@@ -744,4 +765,14 @@ void sun8i_hdmi_phy_remove(struct sun8i_dw_hdmi *hdmi)
 	clk_put(phy->clk_pll1);
 	clk_put(phy->clk_mod);
 	clk_put(phy->clk_bus);
+	return 0;
 }
+
+struct platform_driver sun8i_hdmi_phy_driver = {
+	.probe  = sun8i_hdmi_phy_probe,
+	.remove = sun8i_hdmi_phy_remove,
+	.driver = {
+		.name = "sun8i-hdmi-phy",
+		.of_match_table = sun8i_hdmi_phy_of_table,
+	},
+};
-- 
GitLab


From 8df71a7dc5e1e0d8f1bb13145e00bf375fa2082e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 26 May 2021 19:30:58 +0200
Subject: [PATCH 2363/3804] cpufreq: intel_pstate: hybrid: Fix build with
 CONFIG_ACPI unset

One of the previous commits introducing hybrid processor support to
intel_pstate broke build with CONFIG_ACPI unset.

Fix that and while at it make empty stubs of two functions related
to ACPI CPPC static inline and fix a spelling mistake in the name of
one of them.

Fixes: eb3693f0521e ("cpufreq: intel_pstate: hybrid: CPU-specific scaling factor")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
---
 drivers/cpufreq/intel_pstate.c | 91 ++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 43 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 03d8516e653e8..6012964df51ba 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -369,7 +369,7 @@ static void intel_pstate_set_itmt_prio(int cpu)
 	}
 }
 
-static int intel_pstate_get_cppc_guranteed(int cpu)
+static int intel_pstate_get_cppc_guaranteed(int cpu)
 {
 	struct cppc_perf_caps cppc_perf;
 	int ret;
@@ -385,7 +385,7 @@ static int intel_pstate_get_cppc_guranteed(int cpu)
 }
 
 #else /* CONFIG_ACPI_CPPC_LIB */
-static void intel_pstate_set_itmt_prio(int cpu)
+static inline void intel_pstate_set_itmt_prio(int cpu)
 {
 }
 #endif /* CONFIG_ACPI_CPPC_LIB */
@@ -470,6 +470,20 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 
 	acpi_processor_unregister_performance(policy->cpu);
 }
+
+static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
+{
+	return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
+}
+
+static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
+					struct cppc_perf_caps *caps)
+{
+	if (cppc_get_perf_caps(cpu->cpu, caps))
+		return false;
+
+	return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
+}
 #else /* CONFIG_ACPI */
 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
@@ -486,26 +500,12 @@ static inline bool intel_pstate_acpi_pm_profile_server(void)
 #endif /* CONFIG_ACPI */
 
 #ifndef CONFIG_ACPI_CPPC_LIB
-static int intel_pstate_get_cppc_guranteed(int cpu)
+static inline int intel_pstate_get_cppc_guaranteed(int cpu)
 {
 	return -ENOTSUPP;
 }
 #endif /* CONFIG_ACPI_CPPC_LIB */
 
-static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
-{
-	return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
-}
-
-static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
-					struct cppc_perf_caps *caps)
-{
-	if (cppc_get_perf_caps(cpu->cpu, caps))
-		return false;
-
-	return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
-}
-
 static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
 {
 	pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu);
@@ -530,7 +530,6 @@ static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
  */
 static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
 {
-	struct cppc_perf_caps caps;
 	int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
 	int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
 	int perf_ctl_turbo = pstate_funcs.get_turbo();
@@ -548,33 +547,39 @@ static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
 	pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
 	pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
 
-	if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
-		if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
-			pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
-
-			/*
-			 * If the CPPC nominal performance is valid, it can be
-			 * assumed to correspond to cpu_khz.
-			 */
-			if (caps.nominal_perf == perf_ctl_max_phys) {
-				intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-				return;
-			}
-			scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
-		} else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
-			pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
-
-			/*
-			 * If the CPPC guaranteed performance is valid, it can
-			 * be assumed to correspond to max_freq.
-			 */
-			if (caps.guaranteed_perf == perf_ctl_max) {
-				intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-				return;
+#ifdef CONFIG_ACPI
+	if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) {
+		struct cppc_perf_caps caps;
+
+		if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
+			if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
+				pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
+
+				/*
+				 * If the CPPC nominal performance is valid, it
+				 * can be assumed to correspond to cpu_khz.
+				 */
+				if (caps.nominal_perf == perf_ctl_max_phys) {
+					intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+					return;
+				}
+				scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
+			} else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
+				pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
+
+				/*
+				 * If the CPPC guaranteed performance is valid,
+				 * it can be assumed to correspond to max_freq.
+				 */
+				if (caps.guaranteed_perf == perf_ctl_max) {
+					intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
+					return;
+				}
+				scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
 			}
-			scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
 		}
 	}
+#endif
 	/*
 	 * If using the CPPC data to compute the HWP-to-frequency scaling factor
 	 * doesn't work, use the HWP_CAP gauranteed perf for this purpose with
@@ -944,7 +949,7 @@ static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 	int ratio, freq;
 
-	ratio = intel_pstate_get_cppc_guranteed(policy->cpu);
+	ratio = intel_pstate_get_cppc_guaranteed(policy->cpu);
 	if (ratio <= 0) {
 		u64 cap;
 
-- 
GitLab


From 5de1262500708bcf6eef753f5eb9d8adb3d32d33 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Mon, 31 May 2021 15:16:07 +0800
Subject: [PATCH 2364/3804] cpufreq: stats: Clean up local variable in
 cpufreq_stats_create_table()

Local variable 'count' will be initialized and 'ret' is also not
required, so remove the redundant initialization and get rid of
'ret'.

Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_stats.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index da717f7cd9a93..1570d6f3e75d3 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -211,7 +211,7 @@ void cpufreq_stats_free_table(struct cpufreq_policy *policy)
 
 void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 {
-	unsigned int i = 0, count = 0, ret = -ENOMEM;
+	unsigned int i = 0, count;
 	struct cpufreq_stats *stats;
 	unsigned int alloc_size;
 	struct cpufreq_frequency_table *pos;
@@ -253,8 +253,7 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 	stats->last_index = freq_table_get_index(stats, policy->cur);
 
 	policy->stats = stats;
-	ret = sysfs_create_group(&policy->kobj, &stats_attr_group);
-	if (!ret)
+	if (!sysfs_create_group(&policy->kobj, &stats_attr_group))
 		return;
 
 	/* We failed, release resources */
-- 
GitLab


From f1ffa9d4cccc8fdf6c03fb1b3429154d22037988 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Thu, 3 Jun 2021 10:34:14 +0800
Subject: [PATCH 2365/3804] Revert "ACPI: sleep: Put the FACS table after using
 it"

Commit 95722237cb2a ("ACPI: sleep: Put the FACS table after using it")
puts the FACS table during initialization.

But the hardware signature bits in the FACS table need to be accessed,
after every hibernation, to compare with the original hardware
signature.

So there is no reason to release the FACS table mapping after
initialization.

This reverts commit 95722237cb2ae4f7b73471058cdb19e8f4057c93.

An alternative solution is to use acpi_gbl_FACS variable instead, which
is mapped by the ACPICA core and never released.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=212277
Reported-by: Stephan Hohe <sth.dev@tejp.de>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Cc: 5.8+ <stable@vger.kernel.org> # 5.8+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sleep.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index df386571da98b..3bb2adef8490a 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -1009,10 +1009,8 @@ static void acpi_sleep_hibernate_setup(void)
 		return;
 
 	acpi_get_table(ACPI_SIG_FACS, 1, (struct acpi_table_header **)&facs);
-	if (facs) {
+	if (facs)
 		s4_hardware_signature = facs->hardware_signature;
-		acpi_put_table((struct acpi_table_header *)facs);
-	}
 }
 #else /* !CONFIG_HIBERNATION */
 static inline void acpi_sleep_hibernate_setup(void) {}
-- 
GitLab


From 107866a8eb0b664675a260f1ba0655010fac1e08 Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger.pau@citrix.com>
Date: Mon, 7 Jun 2021 15:13:15 +0200
Subject: [PATCH 2366/3804] xen-netback: take a reference to the RX task thread
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Do this in order to prevent the task from being freed if the thread
returns (which can be triggered by the frontend) before the call to
kthread_stop done as part of the backend tear down. Not taking the
reference will lead to a use-after-free in that scenario. Such
reference was taken before but dropped as part of the rework done in
2ac061ce97f4.

Reintroduce the reference taking and add a comment this time
explaining why it's needed.

This is XSA-374 / CVE-2021-28691.

Fixes: 2ac061ce97f4 ('xen/netback: cleanup init and deinit code')
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
Cc: stable@vger.kernel.org
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/net/xen-netback/interface.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 193b723fe3bd7..c58996c1e2309 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -684,6 +684,7 @@ static void xenvif_disconnect_queue(struct xenvif_queue *queue)
 {
 	if (queue->task) {
 		kthread_stop(queue->task);
+		put_task_struct(queue->task);
 		queue->task = NULL;
 	}
 
@@ -745,6 +746,11 @@ int xenvif_connect_data(struct xenvif_queue *queue,
 	if (IS_ERR(task))
 		goto kthread_err;
 	queue->task = task;
+	/*
+	 * Take a reference to the task in order to prevent it from being freed
+	 * if the thread function returns before kthread_stop is called.
+	 */
+	get_task_struct(task);
 
 	task = kthread_run(xenvif_dealloc_kthread, queue,
 			   "%s-dealloc", queue->name);
-- 
GitLab


From c1367ee016e3550745315fb9a2dd1e4ce02cdcf6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 26 May 2021 17:12:51 -0700
Subject: [PATCH 2367/3804] m68k: atari: Fix ATARI_KBD_CORE kconfig unmet
 dependency warning

Since the code for ATARI_KBD_CORE does not use drivers/input/keyboard/
code, just move ATARI_KBD_CORE to arch/m68k/Kconfig.machine to remove
the dependency on INPUT_KEYBOARD.

Removes this kconfig warning:

    WARNING: unmet direct dependencies detected for ATARI_KBD_CORE
      Depends on [n]: !UML && INPUT [=y] && INPUT_KEYBOARD [=n]
      Selected by [y]:
      - MOUSE_ATARI [=y] && !UML && INPUT [=y] && INPUT_MOUSE [=y] && ATARI [=y]

Fixes: c04cb856e20a ("m68k: Atari keyboard and mouse support.")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Suggested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Suggested-by: Michael Schmitz <schmitzmic@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Link: https://lore.kernel.org/r/20210527001251.8529-1-rdunlap@infradead.org
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/Kconfig.machine      | 3 +++
 drivers/input/keyboard/Kconfig | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine
index 4d59ec2f5b8d6..d964c1f273995 100644
--- a/arch/m68k/Kconfig.machine
+++ b/arch/m68k/Kconfig.machine
@@ -25,6 +25,9 @@ config ATARI
 	  this kernel on an Atari, say Y here and browse the material
 	  available in <file:Documentation/m68k>; otherwise say N.
 
+config ATARI_KBD_CORE
+	bool
+
 config MAC
 	bool "Macintosh support"
 	depends on MMU
diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 32d15809ae586..40a070a2e7f5b 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -67,9 +67,6 @@ config KEYBOARD_AMIGA
 	  To compile this driver as a module, choose M here: the
 	  module will be called amikbd.
 
-config ATARI_KBD_CORE
-	bool
-
 config KEYBOARD_APPLESPI
 	tristate "Apple SPI keyboard and trackpad"
 	depends on ACPI && EFI
-- 
GitLab


From c8a4556d98510ca05bad8d02265a4918b03a8c0b Mon Sep 17 00:00:00 2001
From: Srinivasa Rao Mandadapu <srivasam@codeaurora.org>
Date: Fri, 4 Jun 2021 23:45:45 +0800
Subject: [PATCH 2368/3804] ASoC: qcom: lpass-cpu: Fix pop noise during audio
 capture begin

This patch fixes PoP noise of around 15ms observed during audio
capture begin.
Enables BCLK and LRCLK in snd_soc_dai_ops prepare call for
introducing some delay before capture start.

(am from https://patchwork.kernel.org/patch/12276369/)
(also found at https://lore.kernel.org/r/20210524142114.18676-1-srivasam@codeaurora.org)

Co-developed-by: Judy Hsiao <judyhsiao@chromium.org>
Signed-off-by: Judy Hsiao <judyhsiao@chromium.org>
Signed-off-by: Srinivasa Rao Mandadapu <srivasam@codeaurora.org>
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20210604154545.1198337-1-judyhsiao@chromium.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/qcom/lpass-cpu.c | 79 ++++++++++++++++++++++++++++++++++++++
 sound/soc/qcom/lpass.h     |  4 ++
 2 files changed, 83 insertions(+)

diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c
index 28c7497344e33..a6e95db6b3fbc 100644
--- a/sound/soc/qcom/lpass-cpu.c
+++ b/sound/soc/qcom/lpass-cpu.c
@@ -93,8 +93,30 @@ static void lpass_cpu_daiops_shutdown(struct snd_pcm_substream *substream,
 		struct snd_soc_dai *dai)
 {
 	struct lpass_data *drvdata = snd_soc_dai_get_drvdata(dai);
+	struct lpaif_i2sctl *i2sctl = drvdata->i2sctl;
+	unsigned int id = dai->driver->id;
 
 	clk_disable_unprepare(drvdata->mi2s_osr_clk[dai->driver->id]);
+	/*
+	 * Ensure LRCLK is disabled even in device node validation.
+	 * Will not impact if disabled in lpass_cpu_daiops_trigger()
+	 * suspend.
+	 */
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		regmap_fields_write(i2sctl->spken, id, LPAIF_I2SCTL_SPKEN_DISABLE);
+	else
+		regmap_fields_write(i2sctl->micen, id, LPAIF_I2SCTL_MICEN_DISABLE);
+
+	/*
+	 * BCLK may not be enabled if lpass_cpu_daiops_prepare is called before
+	 * lpass_cpu_daiops_shutdown. It's paired with the clk_enable in
+	 * lpass_cpu_daiops_prepare.
+	 */
+	if (drvdata->mi2s_was_prepared[dai->driver->id]) {
+		drvdata->mi2s_was_prepared[dai->driver->id] = false;
+		clk_disable(drvdata->mi2s_bit_clk[dai->driver->id]);
+	}
+
 	clk_unprepare(drvdata->mi2s_bit_clk[dai->driver->id]);
 }
 
@@ -275,6 +297,18 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream,
 	case SNDRV_PCM_TRIGGER_START:
 	case SNDRV_PCM_TRIGGER_RESUME:
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+		/*
+		 * Ensure lpass BCLK/LRCLK is enabled during
+		 * device resume as lpass_cpu_daiops_prepare() is not called
+		 * after the device resumes. We don't check mi2s_was_prepared before
+		 * enable/disable BCLK in trigger events because:
+		 *  1. These trigger events are paired, so the BCLK
+		 *     enable_count is balanced.
+		 *  2. the BCLK can be shared (ex: headset and headset mic),
+		 *     we need to increase the enable_count so that we don't
+		 *     turn off the shared BCLK while other devices are using
+		 *     it.
+		 */
 		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 			ret = regmap_fields_write(i2sctl->spken, id,
 						 LPAIF_I2SCTL_SPKEN_ENABLE);
@@ -296,6 +330,10 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream,
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_SUSPEND:
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+		/*
+		 * To ensure lpass BCLK/LRCLK is disabled during
+		 * device suspend.
+		 */
 		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 			ret = regmap_fields_write(i2sctl->spken, id,
 						 LPAIF_I2SCTL_SPKEN_DISABLE);
@@ -315,12 +353,53 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream,
 	return ret;
 }
 
+static int lpass_cpu_daiops_prepare(struct snd_pcm_substream *substream,
+		struct snd_soc_dai *dai)
+{
+	struct lpass_data *drvdata = snd_soc_dai_get_drvdata(dai);
+	struct lpaif_i2sctl *i2sctl = drvdata->i2sctl;
+	unsigned int id = dai->driver->id;
+	int ret;
+
+	/*
+	 * Ensure lpass BCLK/LRCLK is enabled bit before playback/capture
+	 * data flow starts. This allows other codec to have some delay before
+	 * the data flow.
+	 * (ex: to drop start up pop noise before capture starts).
+	 */
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
+		ret = regmap_fields_write(i2sctl->spken, id, LPAIF_I2SCTL_SPKEN_ENABLE);
+	else
+		ret = regmap_fields_write(i2sctl->micen, id, LPAIF_I2SCTL_MICEN_ENABLE);
+
+	if (ret) {
+		dev_err(dai->dev, "error writing to i2sctl reg: %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Check mi2s_was_prepared before enabling BCLK as lpass_cpu_daiops_prepare can
+	 * be called multiple times. It's paired with the clk_disable in
+	 * lpass_cpu_daiops_shutdown.
+	 */
+	if (!drvdata->mi2s_was_prepared[dai->driver->id]) {
+		ret = clk_enable(drvdata->mi2s_bit_clk[id]);
+		if (ret) {
+			dev_err(dai->dev, "error in enabling mi2s bit clk: %d\n", ret);
+			return ret;
+		}
+		drvdata->mi2s_was_prepared[dai->driver->id] = true;
+	}
+	return 0;
+}
+
 const struct snd_soc_dai_ops asoc_qcom_lpass_cpu_dai_ops = {
 	.set_sysclk	= lpass_cpu_daiops_set_sysclk,
 	.startup	= lpass_cpu_daiops_startup,
 	.shutdown	= lpass_cpu_daiops_shutdown,
 	.hw_params	= lpass_cpu_daiops_hw_params,
 	.trigger	= lpass_cpu_daiops_trigger,
+	.prepare	= lpass_cpu_daiops_prepare,
 };
 EXPORT_SYMBOL_GPL(asoc_qcom_lpass_cpu_dai_ops);
 
diff --git a/sound/soc/qcom/lpass.h b/sound/soc/qcom/lpass.h
index 83b2e08ade060..7f72214404baf 100644
--- a/sound/soc/qcom/lpass.h
+++ b/sound/soc/qcom/lpass.h
@@ -67,6 +67,10 @@ struct lpass_data {
 	/* MI2S SD lines to use for playback/capture */
 	unsigned int mi2s_playback_sd_mode[LPASS_MAX_MI2S_PORTS];
 	unsigned int mi2s_capture_sd_mode[LPASS_MAX_MI2S_PORTS];
+
+	/* The state of MI2S prepare dai_ops was called */
+	bool mi2s_was_prepared[LPASS_MAX_MI2S_PORTS];
+
 	int hdmi_port_enable;
 
 	/* low-power audio interface (LPAIF) registers */
-- 
GitLab


From bcd23f93d3984a94d64ce0b6bbfa3789c0e8ebaf Mon Sep 17 00:00:00 2001
From: Maxim Kochetkov <fido_max@inbox.ru>
Date: Tue, 25 May 2021 06:42:03 +0300
Subject: [PATCH 2369/3804] regmap-irq: Introduce inverted status registers
 support

Some interrupt controllers have inverted status register:
cleared bits is active interrupts and set bits is inactive interrupts,
so add inverted status support to the framework.

Signed-off-by: Maxim Kochetkov <fido_max@inbox.ru>
Link: https://lore.kernel.org/r/20210525034204.5272-1-fido_max@inbox.ru
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 7 +++++++
 include/linux/regmap.h           | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 760296a4b6062..d2656581a6085 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -531,6 +531,10 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 		}
 	}
 
+	if (chip->status_invert)
+		for (i = 0; i < data->chip->num_regs; i++)
+			data->status_buf[i] = ~data->status_buf[i];
+
 	/*
 	 * Ignore masked IRQs and ack if we need to; we ack early so
 	 * there is no race between handling and acknowleding the
@@ -800,6 +804,9 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
 			goto err_alloc;
 		}
 
+		if (chip->status_invert)
+			d->status_buf[i] = ~d->status_buf[i];
+
 		if (d->status_buf[i] && (chip->ack_base || chip->use_ack)) {
 			reg = sub_irq_reg(d, d->chip->ack_base, i);
 			if (chip->ack_invert)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index bf5a834d1774b..f5f08dd0a1163 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1449,6 +1449,7 @@ struct regmap_irq_sub_irq_map {
  * @not_fixed_stride: Used when chip peripherals are not laid out with fixed
  * 		      stride. Must be used with sub_reg_offsets containing the
  * 		      offsets to each peripheral.
+ * @status_invert: Inverted status register: cleared bits are active interrupts.
  * @runtime_pm:  Hold a runtime PM lock on the device when accessing it.
  *
  * @num_regs:    Number of registers in each control bank.
@@ -1501,6 +1502,7 @@ struct regmap_irq_chip {
 	bool type_in_mask:1;
 	bool clear_on_unmask:1;
 	bool not_fixed_stride:1;
+	bool status_invert:1;
 
 	int num_regs;
 
-- 
GitLab


From 3c37d899ef8bd9940f9eee551c6c0b16b36aa1e2 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 5 Jun 2021 09:34:24 +0800
Subject: [PATCH 2370/3804] regulator: max77686: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210605013424.1298773-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77686-regulator.c | 42 ++++++--------------------
 1 file changed, 9 insertions(+), 33 deletions(-)

diff --git a/drivers/regulator/max77686-regulator.c b/drivers/regulator/max77686-regulator.c
index 9089ec608fccb..55a07d3f3ee29 100644
--- a/drivers/regulator/max77686-regulator.c
+++ b/drivers/regulator/max77686-regulator.c
@@ -67,13 +67,6 @@
 #define MAX77686_REGULATORS	MAX77686_REG_MAX
 #define MAX77686_LDOS		26
 
-enum max77686_ramp_rate {
-	RAMP_RATE_13P75MV,
-	RAMP_RATE_27P5MV,
-	RAMP_RATE_55MV,
-	RAMP_RATE_NO_CTRL,	/* 100mV/us */
-};
-
 struct max77686_data {
 	struct device *dev;
 	DECLARE_BITMAP(gpio_enabled, MAX77686_REGULATORS);
@@ -220,31 +213,6 @@ static int max77686_enable(struct regulator_dev *rdev)
 				  max77686->opmode[id] << shift);
 }
 
-static int max77686_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
-{
-	unsigned int ramp_value = RAMP_RATE_NO_CTRL;
-
-	switch (ramp_delay) {
-	case 1 ... 13750:
-		ramp_value = RAMP_RATE_13P75MV;
-		break;
-	case 13751 ... 27500:
-		ramp_value = RAMP_RATE_27P5MV;
-		break;
-	case 27501 ... 55000:
-		ramp_value = RAMP_RATE_55MV;
-		break;
-	case 55001 ... 100000:
-		break;
-	default:
-		pr_warn("%s: ramp_delay: %d not supported, setting 100000\n",
-			rdev->desc->name, ramp_delay);
-	}
-
-	return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
-				  MAX77686_RAMP_RATE_MASK, ramp_value << 6);
-}
-
 static int max77686_of_parse_cb(struct device_node *np,
 		const struct regulator_desc *desc,
 		struct regulator_config *config)
@@ -284,6 +252,10 @@ static int max77686_of_parse_cb(struct device_node *np,
 	return 0;
 }
 
+static const unsigned int max77686_buck_dvs_ramp_table[] = {
+	13750, 27500, 55000, 100000
+};
+
 static const struct regulator_ops max77686_ops = {
 	.list_voltage		= regulator_list_voltage_linear,
 	.map_voltage		= regulator_map_voltage_linear,
@@ -330,7 +302,7 @@ static const struct regulator_ops max77686_buck_dvs_ops = {
 	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
 	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
 	.set_voltage_time_sel	= regulator_set_voltage_time_sel,
-	.set_ramp_delay		= max77686_set_ramp_delay,
+	.set_ramp_delay		= regulator_set_ramp_delay_regmap,
 	.set_suspend_disable	= max77686_set_suspend_disable,
 };
 
@@ -462,6 +434,10 @@ static const struct regulator_ops max77686_buck_dvs_ops = {
 	.enable_reg	= MAX77686_REG_BUCK2CTRL1 + (num - 2) * 10,	\
 	.enable_mask	= MAX77686_OPMODE_MASK				\
 			<< MAX77686_OPMODE_BUCK234_SHIFT,		\
+	.ramp_reg	= MAX77686_REG_BUCK2CTRL1 + (num - 2) * 10,	\
+	.ramp_mask	= MAX77686_RAMP_RATE_MASK,			\
+	.ramp_delay_table = max77686_buck_dvs_ramp_table,		\
+	.n_ramp_values	= ARRAY_SIZE(max77686_buck_dvs_ramp_table),	\
 }
 
 static const struct regulator_desc regulators[] = {
-- 
GitLab


From 431ea63b647fb35a0ff7f40f19c2bbd22abeb564 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 6 Jun 2021 14:50:50 +0800
Subject: [PATCH 2371/3804] regulator: mt6358: Use unsigned int for volt_tables

The regulator_desc expects "const unsigned int *volt_table", thus use
unsigned int instead of u32.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210606065052.1417111-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6358-regulator.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c
index 13cb6ac9a8929..7c68d27d52bb7 100644
--- a/drivers/regulator/mt6358-regulator.c
+++ b/drivers/regulator/mt6358-regulator.c
@@ -153,50 +153,50 @@ static const struct linear_range buck_volt_range4[] = {
 	REGULATOR_LINEAR_RANGE(1000000, 0, 0x7f, 12500),
 };
 
-static const u32 vdram2_voltages[] = {
+static const unsigned int vdram2_voltages[] = {
 	600000, 1800000,
 };
 
-static const u32 vsim_voltages[] = {
+static const unsigned int vsim_voltages[] = {
 	1700000, 1800000, 2700000, 3000000, 3100000,
 };
 
-static const u32 vibr_voltages[] = {
+static const unsigned int vibr_voltages[] = {
 	1200000, 1300000, 1500000, 1800000,
 	2000000, 2800000, 3000000, 3300000,
 };
 
-static const u32 vusb_voltages[] = {
+static const unsigned int vusb_voltages[] = {
 	3000000, 3100000,
 };
 
-static const u32 vcamd_voltages[] = {
+static const unsigned int vcamd_voltages[] = {
 	900000, 1000000, 1100000, 1200000,
 	1300000, 1500000, 1800000,
 };
 
-static const u32 vefuse_voltages[] = {
+static const unsigned int vefuse_voltages[] = {
 	1700000, 1800000, 1900000,
 };
 
-static const u32 vmch_vemc_voltages[] = {
+static const unsigned int vmch_vemc_voltages[] = {
 	2900000, 3000000, 3300000,
 };
 
-static const u32 vcama_voltages[] = {
+static const unsigned int vcama_voltages[] = {
 	1800000, 2500000, 2700000,
 	2800000, 2900000, 3000000,
 };
 
-static const u32 vcn33_bt_wifi_voltages[] = {
+static const unsigned int vcn33_bt_wifi_voltages[] = {
 	3300000, 3400000, 3500000,
 };
 
-static const u32 vmc_voltages[] = {
+static const unsigned int vmc_voltages[] = {
 	1800000, 2900000, 3000000, 3300000,
 };
 
-static const u32 vldo28_voltages[] = {
+static const unsigned int vldo28_voltages[] = {
 	2800000, 3000000,
 };
 
-- 
GitLab


From 5a5e31150c9ecdb76d3f87ed852f5c650d70c7d2 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 6 Jun 2021 14:50:51 +0800
Subject: [PATCH 2372/3804] regulator: mt6359: Use unsigned int for volt_tables

The regulator_desc expects "const unsigned int *volt_table", thus use
unsigned int instead of u32.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210606065052.1417111-2-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6359-regulator.c | 36 ++++++++++++++--------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/regulator/mt6359-regulator.c b/drivers/regulator/mt6359-regulator.c
index 4f517c9fd6c4a..8003c363cdfdd 100644
--- a/drivers/regulator/mt6359-regulator.c
+++ b/drivers/regulator/mt6359-regulator.c
@@ -203,77 +203,77 @@ static const struct linear_range mt_volt_range8[] = {
 	REGULATOR_LINEAR_RANGE(506250, 0, 0x7f, 6250),
 };
 
-static const u32 vsim1_voltages[] = {
+static const unsigned int vsim1_voltages[] = {
 	0, 0, 0, 1700000, 1800000, 0, 0, 0, 2700000, 0, 0, 3000000, 3100000,
 };
 
-static const u32 vibr_voltages[] = {
+static const unsigned int vibr_voltages[] = {
 	1200000, 1300000, 1500000, 0, 1800000, 2000000, 0, 0, 2700000, 2800000,
 	0, 3000000, 0, 3300000,
 };
 
-static const u32 vrf12_voltages[] = {
+static const unsigned int vrf12_voltages[] = {
 	0, 0, 1100000, 1200000,	1300000,
 };
 
-static const u32 volt18_voltages[] = {
+static const unsigned int volt18_voltages[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1700000, 1800000, 1900000,
 };
 
-static const u32 vcn13_voltages[] = {
+static const unsigned int vcn13_voltages[] = {
 	900000, 1000000, 0, 1200000, 1300000,
 };
 
-static const u32 vcn33_voltages[] = {
+static const unsigned int vcn33_voltages[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 2800000, 0, 0, 0, 3300000, 3400000, 3500000,
 };
 
-static const u32 vefuse_voltages[] = {
+static const unsigned int vefuse_voltages[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1700000, 1800000, 1900000, 2000000,
 };
 
-static const u32 vxo22_voltages[] = {
+static const unsigned int vxo22_voltages[] = {
 	1800000, 0, 0, 0, 2200000,
 };
 
-static const u32 vrfck_voltages[] = {
+static const unsigned int vrfck_voltages[] = {
 	0, 0, 1500000, 0, 0, 0, 0, 1600000, 0, 0, 0, 0, 1700000,
 };
 
-static const u32 vrfck_voltages_1[] = {
+static const unsigned int vrfck_voltages_1[] = {
 	1240000, 1600000,
 };
 
-static const u32 vio28_voltages[] = {
+static const unsigned int vio28_voltages[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 2800000, 2900000, 3000000, 3100000, 3300000,
 };
 
-static const u32 vemc_voltages[] = {
+static const unsigned int vemc_voltages[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2900000, 3000000, 0, 3300000,
 };
 
-static const u32 vemc_voltages_1[] = {
+static const unsigned int vemc_voltages_1[] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 2500000, 2800000, 2900000, 3000000, 3100000,
 	3300000,
 };
 
-static const u32 va12_voltages[] = {
+static const unsigned int va12_voltages[] = {
 	0, 0, 0, 0, 0, 0, 1200000, 1300000,
 };
 
-static const u32 va09_voltages[] = {
+static const unsigned int va09_voltages[] = {
 	0, 0, 800000, 900000, 0, 0, 1200000,
 };
 
-static const u32 vrf18_voltages[] = {
+static const unsigned int vrf18_voltages[] = {
 	0, 0, 0, 0, 0, 1700000, 1800000, 1810000,
 };
 
-static const u32 vbbck_voltages[] = {
+static const unsigned int vbbck_voltages[] = {
 	0, 0, 0, 0, 1100000, 0, 0, 0, 1150000, 0, 0, 0, 1200000,
 };
 
-static const u32 vsim2_voltages[] = {
+static const unsigned int vsim2_voltages[] = {
 	0, 0, 0, 1700000, 1800000, 0, 0, 0, 2700000, 0, 0, 3000000, 3100000,
 };
 
-- 
GitLab


From 73d4ae57f82ec1e5fc92b9acd25bb1db2f8cf8e3 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 6 Jun 2021 14:50:52 +0800
Subject: [PATCH 2373/3804] regulator: mt6359: Get rid of linear_range tables

The MT6359_BUCK and MT6359_LDO_LINEAR macros setup both
linear settings (min_uV, uV_step, linear_min_sel) and linar_range
(linear_ranges, n_linear_ranges) settings.
All the linear range tables actually only has one entry.
Let's simplify it by using linear instead of linear range.

The linear_min_sel setting is 0 for all cases, remove it
from the macros.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210606065052.1417111-3-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6359-regulator.c | 157 ++++++++++-----------------
 1 file changed, 59 insertions(+), 98 deletions(-)

diff --git a/drivers/regulator/mt6359-regulator.c b/drivers/regulator/mt6359-regulator.c
index 8003c363cdfdd..7ce0bd377a089 100644
--- a/drivers/regulator/mt6359-regulator.c
+++ b/drivers/regulator/mt6359-regulator.c
@@ -41,8 +41,8 @@ struct mt6359_regulator_info {
 	u32 lp_mode_shift;
 };
 
-#define MT6359_BUCK(match, _name, min, max, step, min_sel,	\
-	volt_ranges, _enable_reg, _status_reg,			\
+#define MT6359_BUCK(match, _name, min, max, step,		\
+	_enable_reg, _status_reg,				\
 	_vsel_reg, _vsel_mask,					\
 	_lp_mode_reg, _lp_mode_shift,				\
 	_modeset_reg, _modeset_shift)				\
@@ -51,16 +51,13 @@ struct mt6359_regulator_info {
 		.name = #_name,					\
 		.of_match = of_match_ptr(match),		\
 		.regulators_node = of_match_ptr("regulators"),	\
-		.ops = &mt6359_volt_range_ops,			\
+		.ops = &mt6359_volt_linear_ops,			\
 		.type = REGULATOR_VOLTAGE,			\
 		.id = MT6359_ID_##_name,			\
 		.owner = THIS_MODULE,				\
 		.uV_step = (step),				\
-		.linear_min_sel = (min_sel),			\
 		.n_voltages = ((max) - (min)) / (step) + 1,	\
 		.min_uV = (min),				\
-		.linear_ranges = volt_ranges,			\
-		.n_linear_ranges = ARRAY_SIZE(volt_ranges),	\
 		.vsel_reg = _vsel_reg,				\
 		.vsel_mask = _vsel_mask,			\
 		.enable_reg = _enable_reg,			\
@@ -77,24 +74,20 @@ struct mt6359_regulator_info {
 	.modeset_shift = _modeset_shift				\
 }
 
-#define MT6359_LDO_LINEAR(match, _name, min, max, step, min_sel,\
-	volt_ranges, _enable_reg, _status_reg,			\
-	_vsel_reg, _vsel_mask)					\
+#define MT6359_LDO_LINEAR(match, _name, min, max, step,		\
+	_enable_reg, _status_reg, _vsel_reg, _vsel_mask)	\
 [MT6359_ID_##_name] = {						\
 	.desc = {						\
 		.name = #_name,					\
 		.of_match = of_match_ptr(match),		\
 		.regulators_node = of_match_ptr("regulators"),	\
-		.ops = &mt6359_volt_range_ops,			\
+		.ops = &mt6359_volt_linear_ops,			\
 		.type = REGULATOR_VOLTAGE,			\
 		.id = MT6359_ID_##_name,			\
 		.owner = THIS_MODULE,				\
 		.uV_step = (step),				\
-		.linear_min_sel = (min_sel),			\
 		.n_voltages = ((max) - (min)) / (step) + 1,	\
 		.min_uV = (min),				\
-		.linear_ranges = volt_ranges,			\
-		.n_linear_ranges = ARRAY_SIZE(volt_ranges),	\
 		.vsel_reg = _vsel_reg,				\
 		.vsel_mask = _vsel_mask,			\
 		.enable_reg = _enable_reg,			\
@@ -171,38 +164,6 @@ struct mt6359_regulator_info {
 	.qi = BIT(0),					\
 }
 
-static const struct linear_range mt_volt_range1[] = {
-	REGULATOR_LINEAR_RANGE(800000, 0, 0x70, 12500),
-};
-
-static const struct linear_range mt_volt_range2[] = {
-	REGULATOR_LINEAR_RANGE(400000, 0, 0x7f, 6250),
-};
-
-static const struct linear_range mt_volt_range3[] = {
-	REGULATOR_LINEAR_RANGE(400000, 0, 0x70, 6250),
-};
-
-static const struct linear_range mt_volt_range4[] = {
-	REGULATOR_LINEAR_RANGE(800000, 0, 0x40, 12500),
-};
-
-static const struct linear_range mt_volt_range5[] = {
-	REGULATOR_LINEAR_RANGE(500000, 0, 0x3F, 50000),
-};
-
-static const struct linear_range mt_volt_range6[] = {
-	REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250),
-};
-
-static const struct linear_range mt_volt_range7[] = {
-	REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250),
-};
-
-static const struct linear_range mt_volt_range8[] = {
-	REGULATOR_LINEAR_RANGE(506250, 0, 0x7f, 6250),
-};
-
 static const unsigned int vsim1_voltages[] = {
 	0, 0, 0, 1700000, 1800000, 0, 0, 0, 2700000, 0, 0, 3000000, 3100000,
 };
@@ -465,9 +426,9 @@ static int mt6359p_vemc_get_voltage_sel(struct regulator_dev *rdev)
 	return val;
 }
 
-static const struct regulator_ops mt6359_volt_range_ops = {
-	.list_voltage = regulator_list_voltage_linear_range,
-	.map_voltage = regulator_map_voltage_linear_range,
+static const struct regulator_ops mt6359_volt_linear_ops = {
+	.list_voltage = regulator_list_voltage_linear,
+	.map_voltage = regulator_map_voltage_linear,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
@@ -512,75 +473,75 @@ static const struct regulator_ops mt6359p_vemc_ops = {
 
 /* The array is indexed by id(MT6359_ID_XXX) */
 static struct mt6359_regulator_info mt6359_regulators[] = {
-	MT6359_BUCK("buck_vs1", VS1, 800000, 2200000, 12500, 0,
-		    mt_volt_range1, MT6359_RG_BUCK_VS1_EN_ADDR,
+	MT6359_BUCK("buck_vs1", VS1, 800000, 2200000, 12500,
+		    MT6359_RG_BUCK_VS1_EN_ADDR,
 		    MT6359_DA_VS1_EN_ADDR, MT6359_RG_BUCK_VS1_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VS1_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VS1_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VS1_LP_ADDR, MT6359_RG_BUCK_VS1_LP_SHIFT,
 		    MT6359_RG_VS1_FPWM_ADDR, MT6359_RG_VS1_FPWM_SHIFT),
-	MT6359_BUCK("buck_vgpu11", VGPU11, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VGPU11_EN_ADDR,
+	MT6359_BUCK("buck_vgpu11", VGPU11, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VGPU11_EN_ADDR,
 		    MT6359_DA_VGPU11_EN_ADDR, MT6359_RG_BUCK_VGPU11_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VGPU11_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VGPU11_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VGPU11_LP_ADDR,
 		    MT6359_RG_BUCK_VGPU11_LP_SHIFT,
 		    MT6359_RG_VGPU11_FCCM_ADDR, MT6359_RG_VGPU11_FCCM_SHIFT),
-	MT6359_BUCK("buck_vmodem", VMODEM, 400000, 1100000, 6250, 0,
-		    mt_volt_range3, MT6359_RG_BUCK_VMODEM_EN_ADDR,
+	MT6359_BUCK("buck_vmodem", VMODEM, 400000, 1100000, 6250,
+		    MT6359_RG_BUCK_VMODEM_EN_ADDR,
 		    MT6359_DA_VMODEM_EN_ADDR, MT6359_RG_BUCK_VMODEM_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VMODEM_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VMODEM_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VMODEM_LP_ADDR,
 		    MT6359_RG_BUCK_VMODEM_LP_SHIFT,
 		    MT6359_RG_VMODEM_FCCM_ADDR, MT6359_RG_VMODEM_FCCM_SHIFT),
-	MT6359_BUCK("buck_vpu", VPU, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VPU_EN_ADDR,
+	MT6359_BUCK("buck_vpu", VPU, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VPU_EN_ADDR,
 		    MT6359_DA_VPU_EN_ADDR, MT6359_RG_BUCK_VPU_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VPU_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VPU_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VPU_LP_ADDR, MT6359_RG_BUCK_VPU_LP_SHIFT,
 		    MT6359_RG_VPU_FCCM_ADDR, MT6359_RG_VPU_FCCM_SHIFT),
-	MT6359_BUCK("buck_vcore", VCORE, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VCORE_EN_ADDR,
+	MT6359_BUCK("buck_vcore", VCORE, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VCORE_EN_ADDR,
 		    MT6359_DA_VCORE_EN_ADDR, MT6359_RG_BUCK_VCORE_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VCORE_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VCORE_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VCORE_LP_ADDR, MT6359_RG_BUCK_VCORE_LP_SHIFT,
 		    MT6359_RG_VCORE_FCCM_ADDR, MT6359_RG_VCORE_FCCM_SHIFT),
-	MT6359_BUCK("buck_vs2", VS2, 800000, 1600000, 12500, 0,
-		    mt_volt_range4, MT6359_RG_BUCK_VS2_EN_ADDR,
+	MT6359_BUCK("buck_vs2", VS2, 800000, 1600000, 12500,
+		    MT6359_RG_BUCK_VS2_EN_ADDR,
 		    MT6359_DA_VS2_EN_ADDR, MT6359_RG_BUCK_VS2_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VS2_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VS2_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VS2_LP_ADDR, MT6359_RG_BUCK_VS2_LP_SHIFT,
 		    MT6359_RG_VS2_FPWM_ADDR, MT6359_RG_VS2_FPWM_SHIFT),
-	MT6359_BUCK("buck_vpa", VPA, 500000, 3650000, 50000, 0,
-		    mt_volt_range5, MT6359_RG_BUCK_VPA_EN_ADDR,
+	MT6359_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
+		    MT6359_RG_BUCK_VPA_EN_ADDR,
 		    MT6359_DA_VPA_EN_ADDR, MT6359_RG_BUCK_VPA_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VPA_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VPA_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VPA_LP_ADDR, MT6359_RG_BUCK_VPA_LP_SHIFT,
 		    MT6359_RG_VPA_MODESET_ADDR, MT6359_RG_VPA_MODESET_SHIFT),
-	MT6359_BUCK("buck_vproc2", VPROC2, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VPROC2_EN_ADDR,
+	MT6359_BUCK("buck_vproc2", VPROC2, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VPROC2_EN_ADDR,
 		    MT6359_DA_VPROC2_EN_ADDR, MT6359_RG_BUCK_VPROC2_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VPROC2_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VPROC2_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VPROC2_LP_ADDR,
 		    MT6359_RG_BUCK_VPROC2_LP_SHIFT,
 		    MT6359_RG_VPROC2_FCCM_ADDR, MT6359_RG_VPROC2_FCCM_SHIFT),
-	MT6359_BUCK("buck_vproc1", VPROC1, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VPROC1_EN_ADDR,
+	MT6359_BUCK("buck_vproc1", VPROC1, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VPROC1_EN_ADDR,
 		    MT6359_DA_VPROC1_EN_ADDR, MT6359_RG_BUCK_VPROC1_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VPROC1_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VPROC1_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VPROC1_LP_ADDR,
 		    MT6359_RG_BUCK_VPROC1_LP_SHIFT,
 		    MT6359_RG_VPROC1_FCCM_ADDR, MT6359_RG_VPROC1_FCCM_SHIFT),
-	MT6359_BUCK("buck_vcore_sshub", VCORE_SSHUB, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VCORE_SSHUB_EN_ADDR,
+	MT6359_BUCK("buck_vcore_sshub", VCORE_SSHUB, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VCORE_SSHUB_EN_ADDR,
 		    MT6359_DA_VCORE_EN_ADDR,
 		    MT6359_RG_BUCK_VCORE_SSHUB_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VCORE_SSHUB_VOSEL_MASK <<
@@ -607,7 +568,7 @@ static struct mt6359_regulator_info mt6359_regulators[] = {
 	MT6359_REG_FIXED("ldo_vusb", VUSB, MT6359_RG_LDO_VUSB_EN_0_ADDR,
 			 MT6359_DA_VUSB_B_EN_ADDR, 3000000),
 	MT6359_LDO_LINEAR("ldo_vsram_proc2", VSRAM_PROC2, 500000, 1293750, 6250,
-			  0, mt_volt_range6, MT6359_RG_LDO_VSRAM_PROC2_EN_ADDR,
+			  MT6359_RG_LDO_VSRAM_PROC2_EN_ADDR,
 			  MT6359_DA_VSRAM_PROC2_B_EN_ADDR,
 			  MT6359_RG_LDO_VSRAM_PROC2_VOSEL_ADDR,
 			  MT6359_RG_LDO_VSRAM_PROC2_VOSEL_MASK <<
@@ -646,7 +607,7 @@ static struct mt6359_regulator_info mt6359_regulators[] = {
 	MT6359_REG_FIXED("ldo_vaux18", VAUX18, MT6359_RG_LDO_VAUX18_EN_ADDR,
 			 MT6359_DA_VAUX18_B_EN_ADDR, 1800000),
 	MT6359_LDO_LINEAR("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750,
-			  6250, 0, mt_volt_range6,
+			  6250,
 			  MT6359_RG_LDO_VSRAM_OTHERS_EN_ADDR,
 			  MT6359_DA_VSRAM_OTHERS_B_EN_ADDR,
 			  MT6359_RG_LDO_VSRAM_OTHERS_VOSEL_ADDR,
@@ -707,7 +668,7 @@ static struct mt6359_regulator_info mt6359_regulators[] = {
 		   MT6359_RG_VRF18_VOSEL_MASK << MT6359_RG_VRF18_VOSEL_SHIFT,
 		   120),
 	MT6359_LDO_LINEAR("ldo_vsram_md", VSRAM_MD, 500000, 1100000, 6250,
-			  0, mt_volt_range7, MT6359_RG_LDO_VSRAM_MD_EN_ADDR,
+			  MT6359_RG_LDO_VSRAM_MD_EN_ADDR,
 			  MT6359_DA_VSRAM_MD_B_EN_ADDR,
 			  MT6359_RG_LDO_VSRAM_MD_VOSEL_ADDR,
 			  MT6359_RG_LDO_VSRAM_MD_VOSEL_MASK <<
@@ -728,7 +689,7 @@ static struct mt6359_regulator_info mt6359_regulators[] = {
 		   MT6359_RG_VBBCK_VOSEL_MASK << MT6359_RG_VBBCK_VOSEL_SHIFT,
 		   240),
 	MT6359_LDO_LINEAR("ldo_vsram_proc1", VSRAM_PROC1, 500000, 1293750, 6250,
-			  0, mt_volt_range6, MT6359_RG_LDO_VSRAM_PROC1_EN_ADDR,
+			  MT6359_RG_LDO_VSRAM_PROC1_EN_ADDR,
 			  MT6359_DA_VSRAM_PROC1_B_EN_ADDR,
 			  MT6359_RG_LDO_VSRAM_PROC1_VOSEL_ADDR,
 			  MT6359_RG_LDO_VSRAM_PROC1_VOSEL_MASK <<
@@ -739,7 +700,7 @@ static struct mt6359_regulator_info mt6359_regulators[] = {
 		   MT6359_RG_VSIM2_VOSEL_MASK << MT6359_RG_VSIM2_VOSEL_SHIFT,
 		   480),
 	MT6359_LDO_LINEAR("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB,
-			  500000, 1293750, 6250, 0, mt_volt_range6,
+			  500000, 1293750, 6250,
 			  MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_EN_ADDR,
 			  MT6359_DA_VSRAM_OTHERS_B_EN_ADDR,
 			  MT6359_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_ADDR,
@@ -748,75 +709,75 @@ static struct mt6359_regulator_info mt6359_regulators[] = {
 };
 
 static struct mt6359_regulator_info mt6359p_regulators[] = {
-	MT6359_BUCK("buck_vs1", VS1, 800000, 2200000, 12500, 0,
-		    mt_volt_range1, MT6359_RG_BUCK_VS1_EN_ADDR,
+	MT6359_BUCK("buck_vs1", VS1, 800000, 2200000, 12500,
+		    MT6359_RG_BUCK_VS1_EN_ADDR,
 		    MT6359_DA_VS1_EN_ADDR, MT6359_RG_BUCK_VS1_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VS1_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VS1_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VS1_LP_ADDR, MT6359_RG_BUCK_VS1_LP_SHIFT,
 		    MT6359_RG_VS1_FPWM_ADDR, MT6359_RG_VS1_FPWM_SHIFT),
-	MT6359_BUCK("buck_vgpu11", VGPU11, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VGPU11_EN_ADDR,
+	MT6359_BUCK("buck_vgpu11", VGPU11, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VGPU11_EN_ADDR,
 		    MT6359_DA_VGPU11_EN_ADDR, MT6359P_RG_BUCK_VGPU11_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VGPU11_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VGPU11_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VGPU11_LP_ADDR,
 		    MT6359_RG_BUCK_VGPU11_LP_SHIFT,
 		    MT6359_RG_VGPU11_FCCM_ADDR, MT6359_RG_VGPU11_FCCM_SHIFT),
-	MT6359_BUCK("buck_vmodem", VMODEM, 400000, 1100000, 6250, 0,
-		    mt_volt_range3, MT6359_RG_BUCK_VMODEM_EN_ADDR,
+	MT6359_BUCK("buck_vmodem", VMODEM, 400000, 1100000, 6250,
+		    MT6359_RG_BUCK_VMODEM_EN_ADDR,
 		    MT6359_DA_VMODEM_EN_ADDR, MT6359_RG_BUCK_VMODEM_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VMODEM_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VMODEM_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VMODEM_LP_ADDR,
 		    MT6359_RG_BUCK_VMODEM_LP_SHIFT,
 		    MT6359_RG_VMODEM_FCCM_ADDR, MT6359_RG_VMODEM_FCCM_SHIFT),
-	MT6359_BUCK("buck_vpu", VPU, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VPU_EN_ADDR,
+	MT6359_BUCK("buck_vpu", VPU, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VPU_EN_ADDR,
 		    MT6359_DA_VPU_EN_ADDR, MT6359_RG_BUCK_VPU_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VPU_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VPU_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VPU_LP_ADDR, MT6359_RG_BUCK_VPU_LP_SHIFT,
 		    MT6359_RG_VPU_FCCM_ADDR, MT6359_RG_VPU_FCCM_SHIFT),
-	MT6359_BUCK("buck_vcore", VCORE, 506250, 1300000, 6250, 0,
-		    mt_volt_range8, MT6359_RG_BUCK_VCORE_EN_ADDR,
+	MT6359_BUCK("buck_vcore", VCORE, 506250, 1300000, 6250,
+		    MT6359_RG_BUCK_VCORE_EN_ADDR,
 		    MT6359_DA_VCORE_EN_ADDR, MT6359P_RG_BUCK_VCORE_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VCORE_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VCORE_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VCORE_LP_ADDR, MT6359_RG_BUCK_VCORE_LP_SHIFT,
 		    MT6359_RG_VCORE_FCCM_ADDR, MT6359_RG_VCORE_FCCM_SHIFT),
-	MT6359_BUCK("buck_vs2", VS2, 800000, 1600000, 12500, 0,
-		    mt_volt_range4, MT6359_RG_BUCK_VS2_EN_ADDR,
+	MT6359_BUCK("buck_vs2", VS2, 800000, 1600000, 12500,
+		    MT6359_RG_BUCK_VS2_EN_ADDR,
 		    MT6359_DA_VS2_EN_ADDR, MT6359_RG_BUCK_VS2_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VS2_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VS2_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VS2_LP_ADDR, MT6359_RG_BUCK_VS2_LP_SHIFT,
 		    MT6359_RG_VS2_FPWM_ADDR, MT6359_RG_VS2_FPWM_SHIFT),
-	MT6359_BUCK("buck_vpa", VPA, 500000, 3650000, 50000, 0,
-		    mt_volt_range5, MT6359_RG_BUCK_VPA_EN_ADDR,
+	MT6359_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
+		    MT6359_RG_BUCK_VPA_EN_ADDR,
 		    MT6359_DA_VPA_EN_ADDR, MT6359_RG_BUCK_VPA_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VPA_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VPA_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VPA_LP_ADDR, MT6359_RG_BUCK_VPA_LP_SHIFT,
 		    MT6359_RG_VPA_MODESET_ADDR, MT6359_RG_VPA_MODESET_SHIFT),
-	MT6359_BUCK("buck_vproc2", VPROC2, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VPROC2_EN_ADDR,
+	MT6359_BUCK("buck_vproc2", VPROC2, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VPROC2_EN_ADDR,
 		    MT6359_DA_VPROC2_EN_ADDR, MT6359_RG_BUCK_VPROC2_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VPROC2_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VPROC2_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VPROC2_LP_ADDR,
 		    MT6359_RG_BUCK_VPROC2_LP_SHIFT,
 		    MT6359_RG_VPROC2_FCCM_ADDR, MT6359_RG_VPROC2_FCCM_SHIFT),
-	MT6359_BUCK("buck_vproc1", VPROC1, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359_RG_BUCK_VPROC1_EN_ADDR,
+	MT6359_BUCK("buck_vproc1", VPROC1, 400000, 1193750, 6250,
+		    MT6359_RG_BUCK_VPROC1_EN_ADDR,
 		    MT6359_DA_VPROC1_EN_ADDR, MT6359_RG_BUCK_VPROC1_VOSEL_ADDR,
 		    MT6359_RG_BUCK_VPROC1_VOSEL_MASK <<
 		    MT6359_RG_BUCK_VPROC1_VOSEL_SHIFT,
 		    MT6359_RG_BUCK_VPROC1_LP_ADDR,
 		    MT6359_RG_BUCK_VPROC1_LP_SHIFT,
 		    MT6359_RG_VPROC1_FCCM_ADDR, MT6359_RG_VPROC1_FCCM_SHIFT),
-	MT6359_BUCK("buck_vgpu11_sshub", VGPU11_SSHUB, 400000, 1193750, 6250, 0,
-		    mt_volt_range2, MT6359P_RG_BUCK_VGPU11_SSHUB_EN_ADDR,
+	MT6359_BUCK("buck_vgpu11_sshub", VGPU11_SSHUB, 400000, 1193750, 6250,
+		    MT6359P_RG_BUCK_VGPU11_SSHUB_EN_ADDR,
 		    MT6359_DA_VGPU11_EN_ADDR,
 		    MT6359P_RG_BUCK_VGPU11_SSHUB_VOSEL_ADDR,
 		    MT6359P_RG_BUCK_VGPU11_SSHUB_VOSEL_MASK <<
@@ -844,7 +805,7 @@ static struct mt6359_regulator_info mt6359p_regulators[] = {
 	MT6359_REG_FIXED("ldo_vusb", VUSB, MT6359P_RG_LDO_VUSB_EN_0_ADDR,
 			 MT6359P_DA_VUSB_B_EN_ADDR, 3000000),
 	MT6359_LDO_LINEAR("ldo_vsram_proc2", VSRAM_PROC2, 500000, 1293750, 6250,
-			  0, mt_volt_range6, MT6359P_RG_LDO_VSRAM_PROC2_EN_ADDR,
+			  MT6359P_RG_LDO_VSRAM_PROC2_EN_ADDR,
 			  MT6359P_DA_VSRAM_PROC2_B_EN_ADDR,
 			  MT6359P_RG_LDO_VSRAM_PROC2_VOSEL_ADDR,
 			  MT6359_RG_LDO_VSRAM_PROC2_VOSEL_MASK <<
@@ -884,7 +845,7 @@ static struct mt6359_regulator_info mt6359p_regulators[] = {
 	MT6359_REG_FIXED("ldo_vaux18", VAUX18, MT6359P_RG_LDO_VAUX18_EN_ADDR,
 			 MT6359P_DA_VAUX18_B_EN_ADDR, 1800000),
 	MT6359_LDO_LINEAR("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750,
-			  6250, 0, mt_volt_range6,
+			  6250,
 			  MT6359P_RG_LDO_VSRAM_OTHERS_EN_ADDR,
 			  MT6359P_DA_VSRAM_OTHERS_B_EN_ADDR,
 			  MT6359P_RG_LDO_VSRAM_OTHERS_VOSEL_ADDR,
@@ -947,7 +908,7 @@ static struct mt6359_regulator_info mt6359p_regulators[] = {
 		   MT6359_RG_VRF18_VOSEL_MASK << MT6359_RG_VRF18_VOSEL_SHIFT,
 		   240),
 	MT6359_LDO_LINEAR("ldo_vsram_md", VSRAM_MD, 500000, 1293750, 6250,
-			  0, mt_volt_range7, MT6359P_RG_LDO_VSRAM_MD_EN_ADDR,
+			  MT6359P_RG_LDO_VSRAM_MD_EN_ADDR,
 			  MT6359P_DA_VSRAM_MD_B_EN_ADDR,
 			  MT6359P_RG_LDO_VSRAM_MD_VOSEL_ADDR,
 			  MT6359_RG_LDO_VSRAM_MD_VOSEL_MASK <<
@@ -968,7 +929,7 @@ static struct mt6359_regulator_info mt6359p_regulators[] = {
 		   MT6359P_RG_VBBCK_VOSEL_MASK << MT6359P_RG_VBBCK_VOSEL_SHIFT,
 		   480),
 	MT6359_LDO_LINEAR("ldo_vsram_proc1", VSRAM_PROC1, 500000, 1293750, 6250,
-			  0, mt_volt_range6, MT6359P_RG_LDO_VSRAM_PROC1_EN_ADDR,
+			  MT6359P_RG_LDO_VSRAM_PROC1_EN_ADDR,
 			  MT6359P_DA_VSRAM_PROC1_B_EN_ADDR,
 			  MT6359P_RG_LDO_VSRAM_PROC1_VOSEL_ADDR,
 			  MT6359_RG_LDO_VSRAM_PROC1_VOSEL_MASK <<
@@ -979,7 +940,7 @@ static struct mt6359_regulator_info mt6359p_regulators[] = {
 		   MT6359_RG_VSIM2_VOSEL_MASK << MT6359_RG_VSIM2_VOSEL_SHIFT,
 		   480),
 	MT6359_LDO_LINEAR("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB,
-			  500000, 1293750, 6250, 0, mt_volt_range6,
+			  500000, 1293750, 6250,
 			  MT6359P_RG_LDO_VSRAM_OTHERS_SSHUB_EN_ADDR,
 			  MT6359P_DA_VSRAM_OTHERS_B_EN_ADDR,
 			  MT6359P_RG_LDO_VSRAM_OTHERS_SSHUB_VOSEL_ADDR,
-- 
GitLab


From 2b2142f247ebeef74aaadc1a646261c19627fd7e Mon Sep 17 00:00:00 2001
From: Jay Fang <f.fangjian@huawei.com>
Date: Fri, 4 Jun 2021 14:55:18 +0800
Subject: [PATCH 2374/3804] spi: hisi-kunpeng: Add debugfs support

This patch uses debugfs_regset32 interface to create the registers dump
file. Use it instead of creating a generic debugfs file with manually
written read callback function.

With these entries, users can check all the SPI controller registers
during run time.

Signed-off-by: Jay Fang <f.fangjian@huawei.com>
Link: https://lore.kernel.org/r/1622789718-13977-1-git-send-email-f.fangjian@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-hisi-kunpeng.c | 51 +++++++++++++++++++++++++++++++++-
 1 file changed, 50 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c
index 3f986ba1c328c..58b823a16fc4d 100644
--- a/drivers/spi/spi-hisi-kunpeng.c
+++ b/drivers/spi/spi-hisi-kunpeng.c
@@ -9,6 +9,7 @@
 
 #include <linux/acpi.h>
 #include <linux/bitfield.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
@@ -126,6 +127,7 @@ struct hisi_spi {
 	void __iomem		*regs;
 	int			irq;
 	u32			fifo_len; /* depth of the FIFO buffer */
+	u16			bus_num;
 
 	/* Current message transfer state info */
 	const void		*tx;
@@ -133,8 +135,49 @@ struct hisi_spi {
 	void			*rx;
 	unsigned int		rx_len;
 	u8			n_bytes; /* current is a 1/2/4 bytes op */
+
+	struct dentry *debugfs;
+	struct debugfs_regset32 regset;
+};
+
+#define HISI_SPI_DBGFS_REG(_name, _off)	\
+{					\
+	.name = _name,			\
+	.offset = _off,			\
+}
+
+static const struct debugfs_reg32 hisi_spi_regs[] = {
+	HISI_SPI_DBGFS_REG("CSCR", HISI_SPI_CSCR),
+	HISI_SPI_DBGFS_REG("CR", HISI_SPI_CR),
+	HISI_SPI_DBGFS_REG("ENR", HISI_SPI_ENR),
+	HISI_SPI_DBGFS_REG("FIFOC", HISI_SPI_FIFOC),
+	HISI_SPI_DBGFS_REG("IMR", HISI_SPI_IMR),
+	HISI_SPI_DBGFS_REG("DIN", HISI_SPI_DIN),
+	HISI_SPI_DBGFS_REG("DOUT", HISI_SPI_DOUT),
+	HISI_SPI_DBGFS_REG("SR", HISI_SPI_SR),
+	HISI_SPI_DBGFS_REG("RISR", HISI_SPI_RISR),
+	HISI_SPI_DBGFS_REG("ISR", HISI_SPI_ISR),
+	HISI_SPI_DBGFS_REG("ICR", HISI_SPI_ICR),
+	HISI_SPI_DBGFS_REG("VERSION", HISI_SPI_VERSION),
 };
 
+static int hisi_spi_debugfs_init(struct hisi_spi *hs)
+{
+	char name[32];
+
+	snprintf(name, 32, "hisi_spi%d", hs->bus_num);
+	hs->debugfs = debugfs_create_dir(name, NULL);
+	if (!hs->debugfs)
+		return -ENOMEM;
+
+	hs->regset.regs = hisi_spi_regs;
+	hs->regset.nregs = ARRAY_SIZE(hisi_spi_regs);
+	hs->regset.base = hs->regs;
+	debugfs_create_regset32("registers", 0400, hs->debugfs, &hs->regset);
+
+	return 0;
+}
+
 static u32 hisi_spi_busy(struct hisi_spi *hs)
 {
 	return readl(hs->regs + HISI_SPI_SR) & SR_BUSY;
@@ -424,6 +467,7 @@ static int hisi_spi_probe(struct platform_device *pdev)
 	hs = spi_controller_get_devdata(master);
 	hs->dev = dev;
 	hs->irq = irq;
+	hs->bus_num = pdev->id;
 
 	hs->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(hs->regs))
@@ -446,7 +490,7 @@ static int hisi_spi_probe(struct platform_device *pdev)
 	master->use_gpio_descriptors = true;
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
-	master->bus_num = pdev->id;
+	master->bus_num = hs->bus_num;
 	master->setup = hisi_spi_setup;
 	master->cleanup = hisi_spi_cleanup;
 	master->transfer_one = hisi_spi_transfer_one;
@@ -462,6 +506,9 @@ static int hisi_spi_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	if (hisi_spi_debugfs_init(hs))
+		dev_info(dev, "failed to create debugfs dir\n");
+
 	ret = spi_register_controller(master);
 	if (ret) {
 		dev_err(dev, "failed to register spi master, ret=%d\n", ret);
@@ -478,7 +525,9 @@ static int hisi_spi_probe(struct platform_device *pdev)
 static int hisi_spi_remove(struct platform_device *pdev)
 {
 	struct spi_controller *master = platform_get_drvdata(pdev);
+	struct hisi_spi *hs = spi_controller_get_devdata(master);
 
+	debugfs_remove_recursive(hs->debugfs);
 	spi_unregister_controller(master);
 
 	return 0;
-- 
GitLab


From 019694f5c1b9cc444e6a3fd3005f556d0c5a6b14 Mon Sep 17 00:00:00 2001
From: Hailong Liu <liu.hailong6@zte.com.cn>
Date: Sun, 6 Jun 2021 19:58:28 +0800
Subject: [PATCH 2375/3804] cpufreq: sh: Remove unused linux/sched.h headers

Since commit '205dcc1ecbc5(cpufreq/sh: Replace racy task affinity logic)'
the header <linux/sched.h> is useless in sh-cpufreq.c, so remove it.

Signed-off-by: Hailong Liu <liu.hailong6@zte.com.cn>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/sh-cpufreq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c
index 0ac265d47ef0c..1a251e635ebdd 100644
--- a/drivers/cpufreq/sh-cpufreq.c
+++ b/drivers/cpufreq/sh-cpufreq.c
@@ -23,7 +23,6 @@
 #include <linux/cpumask.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
-#include <linux/sched.h>	/* set_cpus_allowed() */
 #include <linux/clk.h>
 #include <linux/percpu.h>
 #include <linux/sh_clk.h>
-- 
GitLab


From bcc936c5d5159b4d1891d58f89301f74ff61a67d Mon Sep 17 00:00:00 2001
From: Hailong Liu <liu.hailong6@zte.com.cn>
Date: Thu, 3 Jun 2021 21:57:52 +0800
Subject: [PATCH 2376/3804] cpufreq: loongson2: Remove unused linux/sched.h
 headers

Since commit 759f534e93ac(CPUFREQ: Loongson2: drop set_cpus_allowed_ptr()),
the header <linux/sched.h> is useless in oongson2_cpufreq.c, so remove it.

Signed-off-by: Hailong Liu <liu.hailong6@zte.com.cn>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/loongson2_cpufreq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index d05e761d95721..afc59b292153d 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -16,7 +16,6 @@
 #include <linux/cpufreq.h>
 #include <linux/module.h>
 #include <linux/err.h>
-#include <linux/sched.h>	/* set_cpus_allowed() */
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 
-- 
GitLab


From 0508c1ad0f264a24c4643701823a45f6c9bd8146 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 19 May 2021 14:16:57 +0000
Subject: [PATCH 2377/3804] erofs: fix error return code in
 erofs_read_superblock()

'ret' will be overwritten to 0 if erofs_sb_has_sb_chksum() return true,
thus 0 will return in some error handling cases. Fix to return negative
error code -EINVAL instead of 0.

Link: https://lore.kernel.org/r/20210519141657.3062715-1-weiyongjun1@huawei.com
Fixes: b858a4844cfb ("erofs: support superblock checksum")
Cc: stable <stable@vger.kernel.org> # 5.5+
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Gao Xiang <xiang@kernel.org>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 fs/erofs/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index bbf3bbd908e08..22991d22af5a2 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -285,6 +285,7 @@ static int erofs_read_superblock(struct super_block *sb)
 			goto out;
 	}
 
+	ret = -EINVAL;
 	blkszbits = dsb->blkszbits;
 	/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
 	if (blkszbits != LOG_BLOCK_SIZE) {
-- 
GitLab


From 7dea3de7d384f4c8156e8bd93112ba6db1eb276c Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Mon, 19 Apr 2021 18:26:23 +0800
Subject: [PATCH 2378/3804] erofs: remove the occupied parameter from
 z_erofs_pagevec_enqueue()

No any behavior to variable occupied in z_erofs_attach_page() which
is only caller to z_erofs_pagevec_enqueue().

Link: https://lore.kernel.org/r/20210419102623.2015-1-zbestahu@gmail.com
Signed-off-by: Yue Hu <huyue2@yulong.com>
Reviewed-by: Gao Xiang <xiang@kernel.org>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 fs/erofs/zdata.c | 4 +---
 fs/erofs/zpvec.h | 5 +----
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 78e4b598eccaf..275fef484f247 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -380,7 +380,6 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt,
 			       enum z_erofs_page_type type)
 {
 	int ret;
-	bool occupied;
 
 	/* give priority for inplaceio */
 	if (clt->mode >= COLLECT_PRIMARY &&
@@ -388,8 +387,7 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt,
 	    z_erofs_try_inplace_io(clt, page))
 		return 0;
 
-	ret = z_erofs_pagevec_enqueue(&clt->vector,
-				      page, type, &occupied);
+	ret = z_erofs_pagevec_enqueue(&clt->vector, page, type);
 	clt->cl->vcnt += (unsigned int)ret;
 
 	return ret ? 0 : -EAGAIN;
diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h
index 1d67cbd387042..95a620739e6a6 100644
--- a/fs/erofs/zpvec.h
+++ b/fs/erofs/zpvec.h
@@ -107,10 +107,8 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
 
 static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
 					   struct page *page,
-					   enum z_erofs_page_type type,
-					   bool *occupied)
+					   enum z_erofs_page_type type)
 {
-	*occupied = false;
 	if (!ctor->next && type)
 		if (ctor->index + 1 == ctor->nr)
 			return false;
@@ -125,7 +123,6 @@ static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
 	/* should remind that collector->next never equal to 1, 2 */
 	if (type == (uintptr_t)ctor->next) {
 		ctor->next = page;
-		*occupied = true;
 	}
 	ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type);
 	return true;
-- 
GitLab


From c5fcb51111b85323cafe3f02784f7f0bf6a7cf07 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 3 Jun 2021 00:06:34 +0800
Subject: [PATCH 2379/3804] erofs: clean up file headers & footers

 - Remove my outdated misleading email address;

 - Get rid of all unnecessary trailing newline by accident.

Link: https://lore.kernel.org/r/20210602160634.10757-1-xiang@kernel.org
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/Kconfig        | 1 -
 fs/erofs/compress.h     | 2 --
 fs/erofs/data.c         | 2 --
 fs/erofs/decompressor.c | 2 --
 fs/erofs/dir.c          | 2 --
 fs/erofs/erofs_fs.h     | 2 --
 fs/erofs/inode.c        | 2 --
 fs/erofs/internal.h     | 2 --
 fs/erofs/namei.c        | 2 --
 fs/erofs/super.c        | 2 --
 fs/erofs/tagptr.h       | 3 ---
 fs/erofs/utils.c        | 2 --
 fs/erofs/xattr.c        | 2 --
 fs/erofs/xattr.h        | 1 -
 fs/erofs/zdata.c        | 2 --
 fs/erofs/zdata.h        | 1 -
 fs/erofs/zmap.c         | 2 --
 fs/erofs/zpvec.h        | 2 --
 18 files changed, 34 deletions(-)

diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 858b3339f381f..906af0c1998c1 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -75,4 +75,3 @@ config EROFS_FS_ZIP
 	  Enable fixed-sized output compression for EROFS.
 
 	  If you don't want to enable compression feature, say N.
-
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index aea129ddda74b..3701c72bacb2e 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2019 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #ifndef __EROFS_FS_COMPRESS_H
 #define __EROFS_FS_COMPRESS_H
@@ -85,4 +84,3 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq,
 		       struct list_head *pagepool);
 
 #endif
-
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index ebac756cb2a38..3787a5fb0a42b 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include "internal.h"
 #include <linux/prefetch.h>
@@ -315,4 +314,3 @@ const struct address_space_operations erofs_raw_access_aops = {
 	.readahead = erofs_raw_access_readahead,
 	.bmap = erofs_bmap,
 };
-
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 88e33addf2298..a5bc4b1b7813e 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2019 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include "compress.h"
 #include <linux/module.h>
@@ -407,4 +406,3 @@ int z_erofs_decompress(struct z_erofs_decompress_req *rq,
 		return z_erofs_shifted_transform(rq, pagepool);
 	return z_erofs_decompress_generic(rq, pagepool);
 }
-
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index 2776bb832127d..eee9b0b31b639 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include "internal.h"
 
@@ -139,4 +138,3 @@ const struct file_operations erofs_dir_fops = {
 	.read		= generic_read_dir,
 	.iterate_shared	= erofs_readdir,
 };
-
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 8739d3adf51f1..0f8da74570b44 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -4,7 +4,6 @@
  *
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #ifndef __EROFS_FS_H
 #define __EROFS_FS_H
@@ -348,4 +347,3 @@ static inline void erofs_check_ondisk_layout_definitions(void)
 }
 
 #endif
-
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 7ed2d73916928..aa8a0d770ba34 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include "xattr.h"
 
@@ -374,4 +373,3 @@ const struct inode_operations erofs_fast_symlink_iops = {
 	.listxattr = erofs_listxattr,
 	.get_acl = erofs_get_acl,
 };
-
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index f92e3e32b9f48..543c2ff97d30f 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #ifndef __EROFS_INTERNAL_H
 #define __EROFS_INTERNAL_H
@@ -469,4 +468,3 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb,
 #define EFSCORRUPTED    EUCLEAN         /* Filesystem is corrupted */
 
 #endif	/* __EROFS_INTERNAL_H */
-
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index 3a81e1f7fc067..a8271ce5e13ff 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include "xattr.h"
 
@@ -247,4 +246,3 @@ const struct inode_operations erofs_dir_iops = {
 	.listxattr = erofs_listxattr,
 	.get_acl = erofs_get_acl,
 };
-
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 22991d22af5a2..8fc6c04b54f41 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include <linux/module.h>
 #include <linux/buffer_head.h>
@@ -752,4 +751,3 @@ module_exit(erofs_module_exit);
 MODULE_DESCRIPTION("Enhanced ROM File System");
 MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc.");
 MODULE_LICENSE("GPL");
-
diff --git a/fs/erofs/tagptr.h b/fs/erofs/tagptr.h
index a72897c86744c..64ceb7270b5c1 100644
--- a/fs/erofs/tagptr.h
+++ b/fs/erofs/tagptr.h
@@ -1,8 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * A tagged pointer implementation
- *
- * Copyright (C) 2018 Gao Xiang <gaoxiang25@huawei.com>
  */
 #ifndef __EROFS_FS_TAGPTR_H
 #define __EROFS_FS_TAGPTR_H
@@ -107,4 +105,3 @@ tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); })
 *ptptr; })
 
 #endif	/* __EROFS_FS_TAGPTR_H */
-
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index 6758c5b19f7cf..bd86067a63f7f 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include "internal.h"
 #include <linux/pagevec.h>
@@ -278,4 +277,3 @@ void erofs_exit_shrinker(void)
 	unregister_shrinker(&erofs_shrinker_info);
 }
 #endif	/* !CONFIG_EROFS_FS_ZIP */
-
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index 47314a26767a8..8dd54b420a1d6 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include <linux/security.h>
 #include "xattr.h"
@@ -709,4 +708,3 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type)
 	return acl;
 }
 #endif
-
diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h
index 815304bd335f5..366dcb400525f 100644
--- a/fs/erofs/xattr.h
+++ b/fs/erofs/xattr.h
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2017-2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #ifndef __EROFS_XATTR_H
 #define __EROFS_XATTR_H
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 275fef484f247..cb4d0889eca95 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include "zdata.h"
 #include "compress.h"
@@ -1469,4 +1468,3 @@ const struct address_space_operations z_erofs_aops = {
 	.readpage = z_erofs_readpage,
 	.readahead = z_erofs_readahead,
 };
-
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
index 942ee69dff6af..3a008f1b9f787 100644
--- a/fs/erofs/zdata.h
+++ b/fs/erofs/zdata.h
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #ifndef __EROFS_FS_ZDATA_H
 #define __EROFS_FS_ZDATA_H
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index efaf32596b97f..f68aea4baed74 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2018-2019 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #include "internal.h"
 #include <asm/unaligned.h>
@@ -597,4 +596,3 @@ out:
 	DBG_BUGON(err < 0 && err != -ENOMEM);
 	return err;
 }
-
diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h
index 95a620739e6a6..dfd7fe0503bb1 100644
--- a/fs/erofs/zpvec.h
+++ b/fs/erofs/zpvec.h
@@ -2,7 +2,6 @@
 /*
  * Copyright (C) 2018 HUAWEI, Inc.
  *             https://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
  */
 #ifndef __EROFS_FS_ZPVEC_H
 #define __EROFS_FS_ZPVEC_H
@@ -151,4 +150,3 @@ z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor,
 	return tagptr_unfold_ptr(t);
 }
 #endif
-
-- 
GitLab


From 13dfead49db07225335d4f587a560a2210391a1a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 10 Mar 2021 19:40:43 -0600
Subject: [PATCH 2380/3804] media: siano: Fix out-of-bounds warnings in
 smscore_load_firmware_family2()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rename struct sms_msg_data4 to sms_msg_data5 and increase the size of
its msg_data array from 4 to 5 elements. Notice that at some point
the 5th element of msg_data is being accessed in function
smscore_load_firmware_family2():

1006                 trigger_msg->msg_data[4] = 4; /* Task ID */

Also, there is no need for the object _trigger_msg_ of type struct
sms_msg_data *, when _msg_ can be used, directly. Notice that msg_data
in struct sms_msg_data is a one-element array, which causes multiple
out-of-bounds warnings when accessing beyond its first element
in function smscore_load_firmware_family2():

 992                 struct sms_msg_data *trigger_msg =
 993                         (struct sms_msg_data *) msg;
 994
 995                 pr_debug("sending MSG_SMS_SWDOWNLOAD_TRIGGER_REQ\n");
 996                 SMS_INIT_MSG(&msg->x_msg_header,
 997                                 MSG_SMS_SWDOWNLOAD_TRIGGER_REQ,
 998                                 sizeof(struct sms_msg_hdr) +
 999                                 sizeof(u32) * 5);
1000
1001                 trigger_msg->msg_data[0] = firmware->start_address;
1002                                         /* Entry point */
1003                 trigger_msg->msg_data[1] = 6; /* Priority */
1004                 trigger_msg->msg_data[2] = 0x200; /* Stack size */
1005                 trigger_msg->msg_data[3] = 0; /* Parameter */
1006                 trigger_msg->msg_data[4] = 4; /* Task ID */

even when enough dynamic memory is allocated for _msg_:

 929         /* PAGE_SIZE buffer shall be enough and dma aligned */
 930         msg = kmalloc(PAGE_SIZE, GFP_KERNEL | coredev->gfp_buf_flags);

but as _msg_ is casted to (struct sms_msg_data *):

 992                 struct sms_msg_data *trigger_msg =
 993                         (struct sms_msg_data *) msg;

the out-of-bounds warnings are actually valid and should be addressed.

Fix this by declaring object _msg_ of type struct sms_msg_data5 *,
which contains a 5-elements array, instead of just 4. And use
_msg_ directly, instead of creating object trigger_msg.

This helps with the ongoing efforts to enable -Warray-bounds by fixing
the following warnings:

  CC [M]  drivers/media/common/siano/smscoreapi.o
drivers/media/common/siano/smscoreapi.c: In function ‘smscore_load_firmware_family2’:
drivers/media/common/siano/smscoreapi.c:1003:24: warning: array subscript 1 is above array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds]
 1003 |   trigger_msg->msg_data[1] = 6; /* Priority */
      |   ~~~~~~~~~~~~~~~~~~~~~^~~
In file included from drivers/media/common/siano/smscoreapi.c:12:
drivers/media/common/siano/smscoreapi.h:619:6: note: while referencing ‘msg_data’
  619 |  u32 msg_data[1];
      |      ^~~~~~~~
drivers/media/common/siano/smscoreapi.c:1004:24: warning: array subscript 2 is above array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds]
 1004 |   trigger_msg->msg_data[2] = 0x200; /* Stack size */
      |   ~~~~~~~~~~~~~~~~~~~~~^~~
In file included from drivers/media/common/siano/smscoreapi.c:12:
drivers/media/common/siano/smscoreapi.h:619:6: note: while referencing ‘msg_data’
  619 |  u32 msg_data[1];
      |      ^~~~~~~~
drivers/media/common/siano/smscoreapi.c:1005:24: warning: array subscript 3 is above array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds]
 1005 |   trigger_msg->msg_data[3] = 0; /* Parameter */
      |   ~~~~~~~~~~~~~~~~~~~~~^~~
In file included from drivers/media/common/siano/smscoreapi.c:12:
drivers/media/common/siano/smscoreapi.h:619:6: note: while referencing ‘msg_data’
  619 |  u32 msg_data[1];
      |      ^~~~~~~~
drivers/media/common/siano/smscoreapi.c:1006:24: warning: array subscript 4 is above array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds]
 1006 |   trigger_msg->msg_data[4] = 4; /* Task ID */
      |   ~~~~~~~~~~~~~~~~~~~~~^~~
In file included from drivers/media/common/siano/smscoreapi.c:12:
drivers/media/common/siano/smscoreapi.h:619:6: note: while referencing ‘msg_data’
  619 |  u32 msg_data[1];
      |      ^~~~~~~~

Fixes: 018b0c6f8acb ("[media] siano: make load firmware logic to work with newer firmwares")
Co-developed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/media/common/siano/smscoreapi.c | 22 +++++++++-------------
 drivers/media/common/siano/smscoreapi.h |  4 ++--
 2 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/media/common/siano/smscoreapi.c b/drivers/media/common/siano/smscoreapi.c
index 410cc3ac6f948..bceaf91faa15f 100644
--- a/drivers/media/common/siano/smscoreapi.c
+++ b/drivers/media/common/siano/smscoreapi.c
@@ -908,7 +908,7 @@ static int smscore_load_firmware_family2(struct smscore_device_t *coredev,
 					 void *buffer, size_t size)
 {
 	struct sms_firmware *firmware = (struct sms_firmware *) buffer;
-	struct sms_msg_data4 *msg;
+	struct sms_msg_data5 *msg;
 	u32 mem_address,  calc_checksum = 0;
 	u32 i, *ptr;
 	u8 *payload = firmware->payload;
@@ -989,24 +989,20 @@ static int smscore_load_firmware_family2(struct smscore_device_t *coredev,
 		goto exit_fw_download;
 
 	if (coredev->mode == DEVICE_MODE_NONE) {
-		struct sms_msg_data *trigger_msg =
-			(struct sms_msg_data *) msg;
-
 		pr_debug("sending MSG_SMS_SWDOWNLOAD_TRIGGER_REQ\n");
 		SMS_INIT_MSG(&msg->x_msg_header,
 				MSG_SMS_SWDOWNLOAD_TRIGGER_REQ,
-				sizeof(struct sms_msg_hdr) +
-				sizeof(u32) * 5);
+				sizeof(*msg));
 
-		trigger_msg->msg_data[0] = firmware->start_address;
+		msg->msg_data[0] = firmware->start_address;
 					/* Entry point */
-		trigger_msg->msg_data[1] = 6; /* Priority */
-		trigger_msg->msg_data[2] = 0x200; /* Stack size */
-		trigger_msg->msg_data[3] = 0; /* Parameter */
-		trigger_msg->msg_data[4] = 4; /* Task ID */
+		msg->msg_data[1] = 6; /* Priority */
+		msg->msg_data[2] = 0x200; /* Stack size */
+		msg->msg_data[3] = 0; /* Parameter */
+		msg->msg_data[4] = 4; /* Task ID */
 
-		rc = smscore_sendrequest_and_wait(coredev, trigger_msg,
-					trigger_msg->x_msg_header.msg_length,
+		rc = smscore_sendrequest_and_wait(coredev, msg,
+					msg->x_msg_header.msg_length,
 					&coredev->trigger_done);
 	} else {
 		SMS_INIT_MSG(&msg->x_msg_header, MSG_SW_RELOAD_EXEC_REQ,
diff --git a/drivers/media/common/siano/smscoreapi.h b/drivers/media/common/siano/smscoreapi.h
index 4a6b9f4c44ace..f8789ee0d554e 100644
--- a/drivers/media/common/siano/smscoreapi.h
+++ b/drivers/media/common/siano/smscoreapi.h
@@ -624,9 +624,9 @@ struct sms_msg_data2 {
 	u32 msg_data[2];
 };
 
-struct sms_msg_data4 {
+struct sms_msg_data5 {
 	struct sms_msg_hdr x_msg_header;
-	u32 msg_data[4];
+	u32 msg_data[5];
 };
 
 struct sms_data_download {
-- 
GitLab


From dc2557308ede6bd8a91409fe196ba4b081567809 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Sun, 6 Jun 2021 21:21:27 +0100
Subject: [PATCH 2381/3804] afs: Fix partial writeback of large files on fsync
 and close

In commit e87b03f5830e ("afs: Prepare for use of THPs"), the return
value for afs_write_back_from_locked_page was changed from a number
of pages to a length in bytes.  The loop in afs_writepages_region uses
the return value to compute the index that will be used to find dirty
pages in the next iteration, but treats it as a number of pages and
wrongly multiplies it by PAGE_SIZE.  This gives a very large index value,
potentially skipping any dirty data that was not covered in the first
pass, which is limited to 256M.

This causes fsync(), and indirectly close(), to only do a partial
writeback of a large file's dirty data.  The rest is eventually written
back by background threads after dirty_expire_centisecs.

Fixes: e87b03f5830e ("afs: Prepare for use of THPs")
Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
cc: linux-afs@lists.infradead.org
Link: https://lore.kernel.org/r/20210604175504.4055-1-marc.c.dionne@gmail.com/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3edb6204b9370..a523bb86915d0 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -730,7 +730,7 @@ static int afs_writepages_region(struct address_space *mapping,
 			return ret;
 		}
 
-		start += ret * PAGE_SIZE;
+		start += ret;
 
 		cond_resched();
 	} while (wbc->nr_to_write > 0);
-- 
GitLab


From 51c96a561f244e25a4a2afc7a48b92b4adf8050d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 6 Jun 2021 17:24:22 +0300
Subject: [PATCH 2382/3804] ethtool: Fix NULL pointer dereference during module
 EEPROM dump

When get_module_eeprom_by_page() is not implemented by the driver, NULL
pointer dereference can occur [1].

Fix by testing if get_module_eeprom_by_page() is implemented instead of
get_module_info().

[1]
 BUG: kernel NULL pointer dereference, address: 0000000000000000
 [...]
 CPU: 0 PID: 251 Comm: ethtool Not tainted 5.13.0-rc3-custom-00940-g3822d0670c9d #989
 Call Trace:
  eeprom_prepare_data+0x101/0x2d0
  ethnl_default_doit+0xc2/0x290
  genl_family_rcv_msg_doit+0xdc/0x140
  genl_rcv_msg+0xd7/0x1d0
  netlink_rcv_skb+0x49/0xf0
  genl_rcv+0x1f/0x30
  netlink_unicast+0x1f6/0x2c0
  netlink_sendmsg+0x1f9/0x400
  __sys_sendto+0xe1/0x130
  __x64_sys_sendto+0x1b/0x20
  do_syscall_64+0x3a/0x70
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: c97a31f66ebc ("ethtool: wire in generic SFP module access")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/eeprom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c
index 2a6733a6449af..5d38e90895ac1 100644
--- a/net/ethtool/eeprom.c
+++ b/net/ethtool/eeprom.c
@@ -95,7 +95,7 @@ static int get_module_eeprom_by_page(struct net_device *dev,
 	if (dev->sfp_bus)
 		return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack);
 
-	if (ops->get_module_info)
+	if (ops->get_module_eeprom_by_page)
 		return ops->get_module_eeprom_by_page(dev, page_data, extack);
 
 	return -EOPNOTSUPP;
-- 
GitLab


From 306b9228c097b4101c150ccd262372ded8348644 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Sun, 6 Jun 2021 11:24:30 +0300
Subject: [PATCH 2383/3804] mlxsw: reg: Spectrum-3: Enforce lowest max-shaper
 burst size of 11

A max-shaper is the HW component responsible for delaying egress traffic
above a configured transmission rate. Burst size is the amount of traffic
that is allowed to pass without accounting. The burst size value needs to
be such that it can be expressed as 2^BS * 512 bits, where BS lies in a
certain ASIC-dependent range. mlxsw enforces that this holds before
attempting to configure the shaper.

The assumption for Spectrum-3 was that the lower limit of BS would be 5,
like for Spectrum-1. But as of now, the limit is still 11. Therefore fix
the driver accordingly, so that incorrect values are rejected early with a
proper message.

Fixes: 23effa2479ba ("mlxsw: reg: Add max_shaper_bs to QoS ETS Element Configuration")
Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 900b4bf5bb5bf..2bc5a9003c6de 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3907,7 +3907,7 @@ MLXSW_ITEM32(reg, qeec, max_shaper_bs, 0x1C, 0, 6);
 #define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS	25
 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1	5
 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2	11
-#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3	5
+#define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3	11
 
 static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
 				       enum mlxsw_reg_qeec_hr hr, u8 index,
-- 
GitLab


From d566ed04e42bbb7144cf52718b77ca5c791abc09 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@nvidia.com>
Date: Sun, 6 Jun 2021 11:24:31 +0300
Subject: [PATCH 2384/3804] mlxsw: spectrum_qdisc: Pass handle, not band number
 to find_class()

In mlxsw Qdisc offload, find_class() is an operation that yields a qdisc
offload descriptor given a parental qdisc descriptor and a class handle. In
__mlxsw_sp_qdisc_ets_graft() however, a band number is passed to that
function instead of a handle. This can lead to a trigger of a WARN_ON
with the following splat:

 WARNING: CPU: 3 PID: 808 at drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c:1356 __mlxsw_sp_qdisc_ets_graft+0x115/0x130 [mlxsw_spectrum]
 [...]
 Call Trace:
  mlxsw_sp_setup_tc_prio+0xe3/0x100 [mlxsw_spectrum]
  qdisc_offload_graft_helper+0x35/0xa0
  prio_graft+0x176/0x290 [sch_prio]
  qdisc_graft+0xb3/0x540
  tc_modify_qdisc+0x56a/0x8a0
  rtnetlink_rcv_msg+0x12c/0x370
  netlink_rcv_skb+0x49/0xf0
  netlink_unicast+0x1f6/0x2b0
  netlink_sendmsg+0x1fb/0x410
  ____sys_sendmsg+0x1f3/0x220
  ___sys_sendmsg+0x70/0xb0
  __sys_sendmsg+0x54/0xa0
  do_syscall_64+0x3a/0x70
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Since the parent handle is not passed with the offload information, compute
it from the band number and qdisc handle.

Fixes: 28052e618b04 ("mlxsw: spectrum_qdisc: Track children per qdisc")
Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 04672eb5c7f34..9958d503bf0e9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -1332,6 +1332,7 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
 			   u8 band, u32 child_handle)
 {
 	struct mlxsw_sp_qdisc *old_qdisc;
+	u32 parent;
 
 	if (band < mlxsw_sp_qdisc->num_classes &&
 	    mlxsw_sp_qdisc->qdiscs[band].handle == child_handle)
@@ -1352,7 +1353,9 @@ __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (old_qdisc)
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
 
-	mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, band);
+	parent = TC_H_MAKE(mlxsw_sp_qdisc->handle, band + 1);
+	mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc,
+							 parent);
 	if (!WARN_ON(!mlxsw_sp_qdisc))
 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
 
-- 
GitLab


From 2fd8d84ce3095e8a7b5fe96532c91b1b9e07339c Mon Sep 17 00:00:00 2001
From: Mykola Kostenok <c_mykolak@nvidia.com>
Date: Sun, 6 Jun 2021 11:24:32 +0300
Subject: [PATCH 2385/3804] mlxsw: core: Set thermal zone polling delay
 argument to real value at init

Thermal polling delay argument for modules and gearboxes thermal zones
used to be initialized with zero value, while actual delay was used to
be set by mlxsw_thermal_set_mode() by thermal operation callback
set_mode(). After operations set_mode()/get_mode() have been removed by
cited commits, modules and gearboxes thermal zones always have polling
time set to zero and do not perform temperature monitoring.

Set non-zero "polling_delay" in thermal_zone_device_register() routine,
thus, the relevant thermal zones will perform thermal monitoring.

Cc: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Fixes: 5d7bd8aa7c35 ("thermal: Simplify or eliminate unnecessary set_mode() methods")
Fixes: 1ee14820fd8e ("thermal: remove get_mode() operation of drivers")
Signed-off-by: Mykola Kostenok <c_mykolak@nvidia.com>
Acked-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index dfea14399607f..85f0ce2851460 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -693,7 +693,8 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
 							MLXSW_THERMAL_TRIP_MASK,
 							module_tz,
 							&mlxsw_thermal_module_ops,
-							NULL, 0, 0);
+							NULL, 0,
+							module_tz->parent->polling_delay);
 	if (IS_ERR(module_tz->tzdev)) {
 		err = PTR_ERR(module_tz->tzdev);
 		return err;
@@ -815,7 +816,8 @@ mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
 						MLXSW_THERMAL_TRIP_MASK,
 						gearbox_tz,
 						&mlxsw_thermal_gearbox_ops,
-						NULL, 0, 0);
+						NULL, 0,
+						gearbox_tz->parent->polling_delay);
 	if (IS_ERR(gearbox_tz->tzdev))
 		return PTR_ERR(gearbox_tz->tzdev);
 
-- 
GitLab


From a47c397bb29fce1751dc755246a2c8deeca5e38f Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Mon, 7 Jun 2021 21:46:23 +0300
Subject: [PATCH 2386/3804] revert "net: kcm: fix memory leak in kcm_sendmsg"

In commit c47cc304990a ("net: kcm: fix memory leak in kcm_sendmsg")
I misunderstood the root case of the memory leak and came up with
completely broken fix.

So, simply revert this commit to avoid GPF reported by
syzbot.

Im so sorry for this situation.

Fixes: c47cc304990a ("net: kcm: fix memory leak in kcm_sendmsg")
Reported-by: syzbot+65badd5e74ec62cb67dc@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/kcm/kcmsock.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 1c572c8daced0..6201965bd822f 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1066,11 +1066,6 @@ out_error:
 		goto partial_message;
 	}
 
-	if (skb_has_frag_list(head)) {
-		kfree_skb_list(skb_shinfo(head)->frag_list);
-		skb_shinfo(head)->frag_list = NULL;
-	}
-
 	if (head != kcm->seq_skb)
 		kfree_skb(head);
 
-- 
GitLab


From 7a6b1ab7475fd6478eeaf5c9d1163e7a18125c8f Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Mon, 7 Jun 2021 11:35:30 -0600
Subject: [PATCH 2387/3804] neighbour: allow NUD_NOARP entries to be forced
 GCed

IFF_POINTOPOINT interfaces use NUD_NOARP entries for IPv6. It's possible to
fill up the neighbour table with enough entries that it will overflow for
valid connections after that.

This behaviour is more prevalent after commit 58956317c8de ("neighbor:
Improve garbage collection") is applied, as it prevents removal from
entries that are not NUD_FAILED, unless they are more than 5s old.

Fixes: 58956317c8de (neighbor: Improve garbage collection)
Reported-by: Kasper Dupont <kasperd@gjkwv.06.feb.2021.kasperd.net>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 98f20efbfadf2..bf774575ad716 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -238,6 +238,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
 
 			write_lock(&n->lock);
 			if ((n->nud_state == NUD_FAILED) ||
+			    (n->nud_state == NUD_NOARP) ||
 			    (tbl->is_multicast &&
 			     tbl->is_multicast(n->primary_key)) ||
 			    time_after(tref, n->updated))
-- 
GitLab


From 11fc79fc9f2e395aa39fa5baccae62767c5d8280 Mon Sep 17 00:00:00 2001
From: Kev Jackson <foamdino@gmail.com>
Date: Mon, 7 Jun 2021 14:08:35 +0100
Subject: [PATCH 2388/3804] libbpf: Fixes incorrect rx_ring_setup_done

When calling xsk_socket__create_shared(), the logic at line 1097 marks a
boolean flag true within the xsk_umem structure to track setup progress
in order to support multiple calls to the function.  However, instead of
marking umem->tx_ring_setup_done, the code incorrectly sets
umem->rx_ring_setup_done.  This leads to improper behaviour when
creating and destroying xsk and umem structures.

Multiple calls to this function is documented as supported.

Fixes: ca7a83e2487a ("libbpf: Only create rx and tx XDP rings when necessary")
Signed-off-by: Kev Jackson <foamdino@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/YL4aU4f3Aaik7CN0@linux-dev
---
 tools/lib/bpf/xsk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 6061431ee04c0..e9b619aa0cdf3 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -1094,7 +1094,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
 			goto out_put_ctx;
 		}
 		if (xsk->fd == umem->fd)
-			umem->rx_ring_setup_done = true;
+			umem->tx_ring_setup_done = true;
 	}
 
 	err = xsk_get_mmap_offsets(xsk->fd, &off);
-- 
GitLab


From 66a834d092930cf41d809c0e989b13cd6f9ca006 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 2 Jun 2021 21:30:26 +0800
Subject: [PATCH 2389/3804] scsi: core: Fix error handling of scsi_host_alloc()

After device is initialized via device_initialize(), or its name is set via
dev_set_name(), the device has to be freed via put_device().  Otherwise
device name will be leaked because it is allocated dynamically in
dev_set_name().

Fix the leak by replacing kfree() with put_device(). Since
scsi_host_dev_release() properly handles IDA and kthread removal, remove
special-casing these from the error handling as well.

Link: https://lore.kernel.org/r/20210602133029.2864069-2-ming.lei@redhat.com
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Hannes Reinecke <hare@suse.de>
Tested-by: John Garry <john.garry@huawei.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: John Garry <john.garry@huawei.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hosts.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 697c09ef259b3..cc6c171905bb6 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -388,8 +388,10 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 	mutex_init(&shost->scan_mutex);
 
 	index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL);
-	if (index < 0)
-		goto fail_kfree;
+	if (index < 0) {
+		kfree(shost);
+		return NULL;
+	}
 	shost->host_no = index;
 
 	shost->dma_channel = 0xff;
@@ -481,7 +483,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 		shost_printk(KERN_WARNING, shost,
 			"error handler thread failed to spawn, error = %ld\n",
 			PTR_ERR(shost->ehandler));
-		goto fail_index_remove;
+		goto fail;
 	}
 
 	shost->tmf_work_q = alloc_workqueue("scsi_tmf_%d",
@@ -490,17 +492,18 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 	if (!shost->tmf_work_q) {
 		shost_printk(KERN_WARNING, shost,
 			     "failed to create tmf workq\n");
-		goto fail_kthread;
+		goto fail;
 	}
 	scsi_proc_hostdir_add(shost->hostt);
 	return shost;
+ fail:
+	/*
+	 * Host state is still SHOST_CREATED and that is enough to release
+	 * ->shost_gendev. scsi_host_dev_release() will free
+	 * dev_name(&shost->shost_dev).
+	 */
+	put_device(&shost->shost_gendev);
 
- fail_kthread:
-	kthread_stop(shost->ehandler);
- fail_index_remove:
-	ida_simple_remove(&host_index_ida, shost->host_no);
- fail_kfree:
-	kfree(shost);
 	return NULL;
 }
 EXPORT_SYMBOL(scsi_host_alloc);
-- 
GitLab


From 3719f4ff047e20062b8314c23ec3cab84d74c908 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 2 Jun 2021 21:30:27 +0800
Subject: [PATCH 2390/3804] scsi: core: Fix failure handling of
 scsi_add_host_with_dma()

When scsi_add_host_with_dma() returns failure, the caller will call
scsi_host_put(shost) to release everything allocated for this host
instance. Consequently we can't also free allocated stuff in
scsi_add_host_with_dma(), otherwise we will end up with a double free.

Strictly speaking, host resource allocations should have been done in
scsi_host_alloc(). However, the allocations may need information which is
not yet provided by the driver when that function is called. So leave the
allocations where they are but rely on host device's release handler to
free resources.

Link: https://lore.kernel.org/r/20210602133029.2864069-3-ming.lei@redhat.com
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Hannes Reinecke <hare@suse.de>
Tested-by: John Garry <john.garry@huawei.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: John Garry <john.garry@huawei.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hosts.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index cc6c171905bb6..d91cfca743d8a 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -278,23 +278,22 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 
 		if (!shost->work_q) {
 			error = -EINVAL;
-			goto out_free_shost_data;
+			goto out_del_dev;
 		}
 	}
 
 	error = scsi_sysfs_add_host(shost);
 	if (error)
-		goto out_destroy_host;
+		goto out_del_dev;
 
 	scsi_proc_host_add(shost);
 	scsi_autopm_put_host(shost);
 	return error;
 
- out_destroy_host:
-	if (shost->work_q)
-		destroy_workqueue(shost->work_q);
- out_free_shost_data:
-	kfree(shost->shost_data);
+	/*
+	 * Any host allocation in this function will be freed in
+	 * scsi_host_dev_release().
+	 */
  out_del_dev:
 	device_del(&shost->shost_dev);
  out_del_gendev:
@@ -304,7 +303,6 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 	pm_runtime_disable(&shost->shost_gendev);
 	pm_runtime_set_suspended(&shost->shost_gendev);
 	pm_runtime_put_noidle(&shost->shost_gendev);
-	scsi_mq_destroy_tags(shost);
  fail:
 	return error;
 }
-- 
GitLab


From 11714026c02d613c30a149c3f4c4a15047744529 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 2 Jun 2021 21:30:28 +0800
Subject: [PATCH 2391/3804] scsi: core: Put .shost_dev in failure path if host
 state changes to RUNNING

scsi_host_dev_release() only frees dev_name when host state is
SHOST_CREATED. After host state has changed to SHOST_RUNNING,
scsi_host_dev_release() no longer cleans up.

Fix this by doing a put_device(&shost->shost_dev) in the failure path when
host state is SHOST_RUNNING. Move get_device(&shost->shost_gendev) before
device_add(&shost->shost_dev) so that scsi_host_cls_release() can do a put
on this reference.

Link: https://lore.kernel.org/r/20210602133029.2864069-4-ming.lei@redhat.com
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Hannes Reinecke <hare@suse.de>
Reported-by: John Garry <john.garry@huawei.com>
Tested-by: John Garry <john.garry@huawei.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hosts.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index d91cfca743d8a..7627a1e1f41ab 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -254,12 +254,11 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
 
 	device_enable_async_suspend(&shost->shost_dev);
 
+	get_device(&shost->shost_gendev);
 	error = device_add(&shost->shost_dev);
 	if (error)
 		goto out_del_gendev;
 
-	get_device(&shost->shost_gendev);
-
 	if (shost->transportt->host_size) {
 		shost->shost_data = kzalloc(shost->transportt->host_size,
 					 GFP_KERNEL);
@@ -297,6 +296,11 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev,
  out_del_dev:
 	device_del(&shost->shost_dev);
  out_del_gendev:
+	/*
+	 * Host state is SHOST_RUNNING so we have to explicitly release
+	 * ->shost_dev.
+	 */
+	put_device(&shost->shost_dev);
 	device_del(&shost->shost_gendev);
  out_disable_runtime_pm:
 	device_disable_async_suspend(&shost->shost_gendev);
-- 
GitLab


From 1e0d4e6225996f05271de1ebcb1a7c9381af0b27 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 2 Jun 2021 21:30:29 +0800
Subject: [PATCH 2392/3804] scsi: core: Only put parent device if host state
 differs from SHOST_CREATED

get_device(shost->shost_gendev.parent) is called after host state has
switched to SHOST_RUNNING. scsi_host_dev_release() shouldn't release the
parent device if host state is still SHOST_CREATED.

Link: https://lore.kernel.org/r/20210602133029.2864069-5-ming.lei@redhat.com
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Hannes Reinecke <hare@suse.de>
Tested-by: John Garry <john.garry@huawei.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hosts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 7627a1e1f41ab..cd52664920e1a 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -347,7 +347,7 @@ static void scsi_host_dev_release(struct device *dev)
 
 	ida_simple_remove(&host_index_ida, shost->host_no);
 
-	if (parent)
+	if (shost->shost_state != SHOST_CREATED)
 		put_device(parent);
 	kfree(shost);
 }
-- 
GitLab


From d5befb224edbe53056c2c18999d630dafb4a08b9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 May 2021 16:03:23 +0200
Subject: [PATCH 2393/3804] mac80211: fix deadlock in AP/VLAN handling

Syzbot reports that when you have AP_VLAN interfaces that are up
and close the AP interface they belong to, we get a deadlock. No
surprise - since we dev_close() them with the wiphy mutex held,
which goes back into the netdev notifier in cfg80211 and tries to
acquire the wiphy mutex there.

To fix this, we need to do two things:
 1) prevent changing iftype while AP_VLANs are up, we can't
    easily fix this case since cfg80211 already calls us with
    the wiphy mutex held, but change_interface() is relatively
    rare in drivers anyway, so changing iftype isn't used much
    (and userspace has to fall back to down/change/up anyway)
 2) pull the dev_close() loop over VLANs out of the wiphy mutex
    section in the normal stop case

Cc: stable@vger.kernel.org
Reported-by: syzbot+452ea4fbbef700ff0a56@syzkaller.appspotmail.com
Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
Link: https://lore.kernel.org/r/20210517160322.9b8f356c0222.I392cb0e2fa5a1a94cf2e637555d702c7e512c1ff@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 2e2f73a4aa734..137fa4c50e07a 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -476,14 +476,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
 				   GFP_KERNEL);
 	}
 
-	/* APs need special treatment */
 	if (sdata->vif.type == NL80211_IFTYPE_AP) {
-		struct ieee80211_sub_if_data *vlan, *tmpsdata;
-
-		/* down all dependent devices, that is VLANs */
-		list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
-					 u.vlan.list)
-			dev_close(vlan->dev);
 		WARN_ON(!list_empty(&sdata->u.ap.vlans));
 	} else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
 		/* remove all packets in parent bc_buf pointing to this dev */
@@ -641,6 +634,15 @@ static int ieee80211_stop(struct net_device *dev)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
+	/* close all dependent VLAN interfaces before locking wiphy */
+	if (sdata->vif.type == NL80211_IFTYPE_AP) {
+		struct ieee80211_sub_if_data *vlan, *tmpsdata;
+
+		list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
+					 u.vlan.list)
+			dev_close(vlan->dev);
+	}
+
 	wiphy_lock(sdata->local->hw.wiphy);
 	ieee80211_do_stop(sdata, true);
 	wiphy_unlock(sdata->local->hw.wiphy);
@@ -1591,6 +1593,9 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_AP:
+		if (!list_empty(&sdata->u.ap.vlans))
+			return -EBUSY;
+		break;
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_ADHOC:
 	case NL80211_IFTYPE_OCB:
-- 
GitLab


From a810ed0b3370e0b3f448233d526d085effd1f829 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Mon, 12 Apr 2021 13:02:09 +0200
Subject: [PATCH 2394/3804] media: videobuf2-v4l2.c: add
 vb2_queue_change_type() helper

On some platforms a video device can capture either video data or
metadata. The driver can implement vidioc functions for both video and
metadata, and use a single vb2_queue for the buffers. However, vb2_queue
requires choosing a single buffer type, which conflicts with the idea of
capturing either video or metadata.

The buffer type of vb2_queue can be changed, but it's not obvious how
this should be done in the drivers. To help this, add a new helper
function vb2_queue_change_type() which ensures the correct checks and
documents how it can be used.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/common/videobuf2/videobuf2-v4l2.c | 14 ++++++++++++++
 include/media/videobuf2-v4l2.h                  | 16 ++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index 7e96f67c60ba8..2988bb38ceb18 100644
--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
+++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
@@ -939,6 +939,20 @@ void vb2_queue_release(struct vb2_queue *q)
 }
 EXPORT_SYMBOL_GPL(vb2_queue_release);
 
+int vb2_queue_change_type(struct vb2_queue *q, unsigned int type)
+{
+	if (type == q->type)
+		return 0;
+
+	if (vb2_is_busy(q))
+		return -EBUSY;
+
+	q->type = type;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vb2_queue_change_type);
+
 __poll_t vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait)
 {
 	struct video_device *vfd = video_devdata(file);
diff --git a/include/media/videobuf2-v4l2.h b/include/media/videobuf2-v4l2.h
index c203047eb8340..b66585e304e26 100644
--- a/include/media/videobuf2-v4l2.h
+++ b/include/media/videobuf2-v4l2.h
@@ -261,6 +261,22 @@ int __must_check vb2_queue_init_name(struct vb2_queue *q, const char *name);
  */
 void vb2_queue_release(struct vb2_queue *q);
 
+/**
+ * vb2_queue_change_type() - change the type of an inactive vb2_queue
+ * @q:		pointer to &struct vb2_queue with videobuf2 queue.
+ * @type:	the type to change to (V4L2_BUF_TYPE_VIDEO_*)
+ *
+ * This function changes the type of the vb2_queue. This is only possible
+ * if the queue is not busy (i.e. no buffers have been allocated).
+ *
+ * vb2_queue_change_type() can be used to support multiple buffer types using
+ * the same queue. The driver can implement v4l2_ioctl_ops.vidioc_reqbufs and
+ * v4l2_ioctl_ops.vidioc_create_bufs functions and call vb2_queue_change_type()
+ * before calling vb2_ioctl_reqbufs() or vb2_ioctl_create_bufs(), and thus
+ * "lock" the buffer type until the buffers have been released.
+ */
+int vb2_queue_change_type(struct vb2_queue *q, unsigned int type);
+
 /**
  * vb2_poll() - implements poll userspace operation
  * @q:		pointer to &struct vb2_queue with videobuf2 queue.
-- 
GitLab


From 2d8b2a6431b38f4cb4046636117940b0cb0b3ecf Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Mon, 12 Apr 2021 13:02:10 +0200
Subject: [PATCH 2395/3804] media: vivid: remove stream_sliced_vbi_cap field

Vivid tracks the VBI capture mode in vivid_dev->stream_sliced_vbi_cap
field.  We can just look at the buffer type instead, and drop the field.

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/test-drivers/vivid/vivid-core.h        | 1 -
 drivers/media/test-drivers/vivid/vivid-kthread-cap.c | 2 +-
 drivers/media/test-drivers/vivid/vivid-vbi-cap.c     | 6 ++----
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/media/test-drivers/vivid/vivid-core.h b/drivers/media/test-drivers/vivid/vivid-core.h
index cdff6cd264d03..1e3c4f5a9413f 100644
--- a/drivers/media/test-drivers/vivid/vivid-core.h
+++ b/drivers/media/test-drivers/vivid/vivid-core.h
@@ -429,7 +429,6 @@ struct vivid_dev {
 	u32				vbi_cap_seq_start;
 	u32				vbi_cap_seq_count;
 	bool				vbi_cap_streaming;
-	bool				stream_sliced_vbi_cap;
 	u32				meta_cap_seq_start;
 	u32				meta_cap_seq_count;
 	bool				meta_cap_streaming;
diff --git a/drivers/media/test-drivers/vivid/vivid-kthread-cap.c b/drivers/media/test-drivers/vivid/vivid-kthread-cap.c
index c0dc609c13581..9da730ccfa94f 100644
--- a/drivers/media/test-drivers/vivid/vivid-kthread-cap.c
+++ b/drivers/media/test-drivers/vivid/vivid-kthread-cap.c
@@ -752,7 +752,7 @@ static noinline_for_stack void vivid_thread_vid_cap_tick(struct vivid_dev *dev,
 
 		v4l2_ctrl_request_setup(vbi_cap_buf->vb.vb2_buf.req_obj.req,
 					&dev->ctrl_hdl_vbi_cap);
-		if (dev->stream_sliced_vbi_cap)
+		if (vbi_cap_buf->vb.vb2_buf.type == V4L2_BUF_TYPE_SLICED_VBI_CAPTURE)
 			vivid_sliced_vbi_cap_process(dev, vbi_cap_buf);
 		else
 			vivid_raw_vbi_cap_process(dev, vbi_cap_buf);
diff --git a/drivers/media/test-drivers/vivid/vivid-vbi-cap.c b/drivers/media/test-drivers/vivid/vivid-vbi-cap.c
index 1a9348eea7817..387df4ff01b08 100644
--- a/drivers/media/test-drivers/vivid/vivid-vbi-cap.c
+++ b/drivers/media/test-drivers/vivid/vivid-vbi-cap.c
@@ -255,9 +255,8 @@ int vidioc_s_fmt_vbi_cap(struct file *file, void *priv,
 
 	if (ret)
 		return ret;
-	if (dev->stream_sliced_vbi_cap && vb2_is_busy(&dev->vb_vbi_cap_q))
+	if (f->type != V4L2_BUF_TYPE_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q))
 		return -EBUSY;
-	dev->stream_sliced_vbi_cap = false;
 	dev->vbi_cap_dev.queue->type = V4L2_BUF_TYPE_VBI_CAPTURE;
 	return 0;
 }
@@ -322,10 +321,9 @@ int vidioc_s_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format
 
 	if (ret)
 		return ret;
-	if (!dev->stream_sliced_vbi_cap && vb2_is_busy(&dev->vb_vbi_cap_q))
+	if (fmt->type != V4L2_BUF_TYPE_SLICED_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q))
 		return -EBUSY;
 	dev->service_set_cap = vbi->service_set;
-	dev->stream_sliced_vbi_cap = true;
 	dev->vbi_cap_dev.queue->type = V4L2_BUF_TYPE_SLICED_VBI_CAPTURE;
 	return 0;
 }
-- 
GitLab


From c9cbf021c82c3fa5b0beaa7d6e7f3f5706aeabfa Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Mon, 12 Apr 2021 13:02:11 +0200
Subject: [PATCH 2396/3804] media: vivid: use vb2_queue_change_type

Use the new vb2_queue_change_type() function in .vidioc_reqbufs and
.vidioc_create_bufs instead of changing the queue type manually in
vidioc_s_fmt_vbi_cap() and vidioc_s_fmt_sliced_vbi_cap().

This allows for a more consistent behavior, as .vidioc_reqbufs and
.vidioc_create_bufs are when the queue will become "busy".

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/test-drivers/vivid/vivid-core.c | 44 ++++++++++++++++++-
 .../media/test-drivers/vivid/vivid-vbi-cap.c  |  2 -
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/drivers/media/test-drivers/vivid/vivid-core.c b/drivers/media/test-drivers/vivid/vivid-core.c
index ca0ebf6ad9ccf..d2bd2653cf54d 100644
--- a/drivers/media/test-drivers/vivid/vivid-core.c
+++ b/drivers/media/test-drivers/vivid/vivid-core.c
@@ -656,6 +656,46 @@ static const struct v4l2_file_operations vivid_radio_fops = {
 	.unlocked_ioctl = video_ioctl2,
 };
 
+static int vidioc_reqbufs(struct file *file, void *priv,
+			  struct v4l2_requestbuffers *p)
+{
+	struct video_device *vdev = video_devdata(file);
+	int r;
+
+	/*
+	 * Sliced and raw VBI capture share the same queue so we must
+	 * change the type.
+	 */
+	if (p->type == V4L2_BUF_TYPE_SLICED_VBI_CAPTURE ||
+	    p->type == V4L2_BUF_TYPE_VBI_CAPTURE) {
+		r = vb2_queue_change_type(vdev->queue, p->type);
+		if (r)
+			return r;
+	}
+
+	return vb2_ioctl_reqbufs(file, priv, p);
+}
+
+static int vidioc_create_bufs(struct file *file, void *priv,
+			      struct v4l2_create_buffers *p)
+{
+	struct video_device *vdev = video_devdata(file);
+	int r;
+
+	/*
+	 * Sliced and raw VBI capture share the same queue so we must
+	 * change the type.
+	 */
+	if (p->format.type == V4L2_BUF_TYPE_SLICED_VBI_CAPTURE ||
+	    p->format.type == V4L2_BUF_TYPE_VBI_CAPTURE) {
+		r = vb2_queue_change_type(vdev->queue, p->format.type);
+		if (r)
+			return r;
+	}
+
+	return vb2_ioctl_create_bufs(file, priv, p);
+}
+
 static const struct v4l2_ioctl_ops vivid_ioctl_ops = {
 	.vidioc_querycap		= vidioc_querycap,
 
@@ -717,8 +757,8 @@ static const struct v4l2_ioctl_ops vivid_ioctl_ops = {
 	.vidioc_g_fbuf			= vidioc_g_fbuf,
 	.vidioc_s_fbuf			= vidioc_s_fbuf,
 
-	.vidioc_reqbufs			= vb2_ioctl_reqbufs,
-	.vidioc_create_bufs		= vb2_ioctl_create_bufs,
+	.vidioc_reqbufs			= vidioc_reqbufs,
+	.vidioc_create_bufs		= vidioc_create_bufs,
 	.vidioc_prepare_buf		= vb2_ioctl_prepare_buf,
 	.vidioc_querybuf		= vb2_ioctl_querybuf,
 	.vidioc_qbuf			= vb2_ioctl_qbuf,
diff --git a/drivers/media/test-drivers/vivid/vivid-vbi-cap.c b/drivers/media/test-drivers/vivid/vivid-vbi-cap.c
index 387df4ff01b08..b65b02eeeb972 100644
--- a/drivers/media/test-drivers/vivid/vivid-vbi-cap.c
+++ b/drivers/media/test-drivers/vivid/vivid-vbi-cap.c
@@ -257,7 +257,6 @@ int vidioc_s_fmt_vbi_cap(struct file *file, void *priv,
 		return ret;
 	if (f->type != V4L2_BUF_TYPE_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q))
 		return -EBUSY;
-	dev->vbi_cap_dev.queue->type = V4L2_BUF_TYPE_VBI_CAPTURE;
 	return 0;
 }
 
@@ -324,7 +323,6 @@ int vidioc_s_fmt_sliced_vbi_cap(struct file *file, void *fh, struct v4l2_format
 	if (fmt->type != V4L2_BUF_TYPE_SLICED_VBI_CAPTURE && vb2_is_busy(&dev->vb_vbi_cap_q))
 		return -EBUSY;
 	dev->service_set_cap = vbi->service_set;
-	dev->vbi_cap_dev.queue->type = V4L2_BUF_TYPE_SLICED_VBI_CAPTURE;
 	return 0;
 }
 
-- 
GitLab


From 269b4dd3e8b34edec44c5bb0016ee96353638618 Mon Sep 17 00:00:00 2001
From: John Cox <jc@kynesim.co.uk>
Date: Fri, 30 Apr 2021 18:48:13 +0200
Subject: [PATCH 2397/3804] media: hevc: Add sps_max_sub_layers_minus1 to
 v4l2_ctrl_hevc_sps

sps_max_sub_layers_minus1 is needed if the driver wishes to determine
whether or not a frame might be used for reference.

Signed-off-by: John Cox <jc@kynesim.co.uk>
Reviewed-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst | 3 +++
 include/media/hevc-ctrls.h                                | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index 0b8061666c57c..2b5edab55bb4e 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -2707,6 +2707,9 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
     * - __u8
       - ``chroma_format_idc``
       -
+    * - __u8
+      - ``sps_max_sub_layers_minus1``
+      -
     * - __u64
       - ``flags``
       - See :ref:`Sequence Parameter Set Flags <hevc_sps_flags>`
diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h
index 226fcfa0e0261..36e4c93707ae5 100644
--- a/include/media/hevc-ctrls.h
+++ b/include/media/hevc-ctrls.h
@@ -75,8 +75,7 @@ struct v4l2_ctrl_hevc_sps {
 	__u8	num_short_term_ref_pic_sets;
 	__u8	num_long_term_ref_pics_sps;
 	__u8	chroma_format_idc;
-
-	__u8	padding;
+	__u8	sps_max_sub_layers_minus1;
 
 	__u64	flags;
 };
-- 
GitLab


From f84bc784fa614ae9dba9fb79af2b8f143248c112 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:56:59 +0200
Subject: [PATCH 2398/3804] media: atmel: atmel-isc: specialize gamma table
 into product specific

Separate the gamma table from the isc base file into the specific sama5d2
product file.
Add a pointer to the gamma table and entries count inside the platform
driver specific struct.

[hverkuil: made isc_sama5d2_gamma_table static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 47 ++-----------------
 drivers/media/platform/atmel/atmel-isc.h      | 11 +++--
 .../media/platform/atmel/atmel-sama5d2-isc.c  | 45 ++++++++++++++++++
 3 files changed, 56 insertions(+), 47 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index a017572c870cc..46d384332a581 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -176,48 +176,6 @@ struct isc_format formats_list[] = {
 
 };
 
-/* Gamma table with gamma 1/2.2 */
-const u32 isc_gamma_table[GAMMA_MAX + 1][GAMMA_ENTRIES] = {
-	/* 0 --> gamma 1/1.8 */
-	{      0x65,  0x66002F,  0x950025,  0xBB0020,  0xDB001D,  0xF8001A,
-	  0x1130018, 0x12B0017, 0x1420016, 0x1580014, 0x16D0013, 0x1810012,
-	  0x1940012, 0x1A60012, 0x1B80011, 0x1C90010, 0x1DA0010, 0x1EA000F,
-	  0x1FA000F, 0x209000F, 0x218000F, 0x227000E, 0x235000E, 0x243000E,
-	  0x251000E, 0x25F000D, 0x26C000D, 0x279000D, 0x286000D, 0x293000C,
-	  0x2A0000C, 0x2AC000C, 0x2B8000C, 0x2C4000C, 0x2D0000B, 0x2DC000B,
-	  0x2E7000B, 0x2F3000B, 0x2FE000B, 0x309000B, 0x314000B, 0x31F000A,
-	  0x32A000A, 0x334000B, 0x33F000A, 0x349000A, 0x354000A, 0x35E000A,
-	  0x368000A, 0x372000A, 0x37C000A, 0x386000A, 0x3900009, 0x399000A,
-	  0x3A30009, 0x3AD0009, 0x3B60009, 0x3BF000A, 0x3C90009, 0x3D20009,
-	  0x3DB0009, 0x3E40009, 0x3ED0009, 0x3F60009 },
-
-	/* 1 --> gamma 1/2 */
-	{      0x7F,  0x800034,  0xB50028,  0xDE0021, 0x100001E, 0x11E001B,
-	  0x1390019, 0x1520017, 0x16A0015, 0x1800014, 0x1940014, 0x1A80013,
-	  0x1BB0012, 0x1CD0011, 0x1DF0010, 0x1EF0010, 0x200000F, 0x20F000F,
-	  0x21F000E, 0x22D000F, 0x23C000E, 0x24A000E, 0x258000D, 0x265000D,
-	  0x273000C, 0x27F000D, 0x28C000C, 0x299000C, 0x2A5000C, 0x2B1000B,
-	  0x2BC000C, 0x2C8000B, 0x2D3000C, 0x2DF000B, 0x2EA000A, 0x2F5000A,
-	  0x2FF000B, 0x30A000A, 0x314000B, 0x31F000A, 0x329000A, 0x333000A,
-	  0x33D0009, 0x3470009, 0x350000A, 0x35A0009, 0x363000A, 0x36D0009,
-	  0x3760009, 0x37F0009, 0x3880009, 0x3910009, 0x39A0009, 0x3A30009,
-	  0x3AC0008, 0x3B40009, 0x3BD0008, 0x3C60008, 0x3CE0008, 0x3D60009,
-	  0x3DF0008, 0x3E70008, 0x3EF0008, 0x3F70008 },
-
-	/* 2 --> gamma 1/2.2 */
-	{      0x99,  0x9B0038,  0xD4002A,  0xFF0023, 0x122001F, 0x141001B,
-	  0x15D0019, 0x1760017, 0x18E0015, 0x1A30015, 0x1B80013, 0x1CC0012,
-	  0x1DE0011, 0x1F00010, 0x2010010, 0x2110010, 0x221000F, 0x230000F,
-	  0x23F000E, 0x24D000E, 0x25B000D, 0x269000C, 0x276000C, 0x283000C,
-	  0x28F000C, 0x29B000C, 0x2A7000C, 0x2B3000B, 0x2BF000B, 0x2CA000B,
-	  0x2D5000B, 0x2E0000A, 0x2EB000A, 0x2F5000A, 0x2FF000A, 0x30A000A,
-	  0x3140009, 0x31E0009, 0x327000A, 0x3310009, 0x33A0009, 0x3440009,
-	  0x34D0009, 0x3560009, 0x35F0009, 0x3680008, 0x3710008, 0x3790009,
-	  0x3820008, 0x38A0008, 0x3930008, 0x39B0008, 0x3A30008, 0x3AB0008,
-	  0x3B30008, 0x3BB0008, 0x3C30008, 0x3CB0007, 0x3D20008, 0x3DA0007,
-	  0x3E20007, 0x3E90007, 0x3F00008, 0x3F80007 },
-};
-
 #define ISC_IS_FORMAT_RAW(mbus_code) \
 	(((mbus_code) & 0xf000) == 0x3000)
 
@@ -691,7 +649,7 @@ static void isc_set_pipeline(struct isc_device *isc, u32 pipeline)
 
 	regmap_write(regmap, ISC_CFA_CFG, bay_cfg | ISC_CFA_CFG_EITPOL);
 
-	gamma = &isc_gamma_table[ctrls->gamma_index][0];
+	gamma = &isc->gamma_table[ctrls->gamma_index][0];
 	regmap_bulk_write(regmap, ISC_GAM_BENTRY, gamma, GAMMA_ENTRIES);
 	regmap_bulk_write(regmap, ISC_GAM_GENTRY, gamma, GAMMA_ENTRIES);
 	regmap_bulk_write(regmap, ISC_GAM_RENTRY, gamma, GAMMA_ENTRIES);
@@ -2085,7 +2043,8 @@ static int isc_ctrl_init(struct isc_device *isc)
 
 	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_BRIGHTNESS, -1024, 1023, 1, 0);
 	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_CONTRAST, -2048, 2047, 1, 256);
-	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_GAMMA, 0, GAMMA_MAX, 1, 2);
+	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_GAMMA, 0, isc->gamma_max, 1,
+			  isc->gamma_max);
 	isc->awb_ctrl = v4l2_ctrl_new_std(hdl, &isc_awb_ops,
 					  V4L2_CID_AUTO_WHITE_BALANCE,
 					  0, 1, 1, 1);
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index fab8eca58d936..f1df47a4655bc 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -187,6 +187,10 @@ struct isc_ctrls {
  *
  * @current_subdev:	current subdevice: the sensor
  * @subdev_entities:	list of subdevice entitites
+ *
+ * @gamma_table:	pointer to the table with gamma values, has
+ *			gamma_max sets of GAMMA_ENTRIES entries each
+ * @gamma_max:		maximum number of sets of inside the gamma_table
  */
 struct isc_device {
 	struct regmap		*regmap;
@@ -245,16 +249,17 @@ struct isc_device {
 		struct v4l2_ctrl	*gr_off_ctrl;
 		struct v4l2_ctrl	*gb_off_ctrl;
 	};
-};
 
-#define GAMMA_MAX	2
 #define GAMMA_ENTRIES	64
+	/* pointer to the defined gamma table */
+	const u32	(*gamma_table)[GAMMA_ENTRIES];
+	u32		gamma_max;
+};
 
 #define ATMEL_ISC_NAME "atmel-isc"
 
 extern struct isc_format formats_list[];
 extern const struct isc_format controller_formats[];
-extern const u32 isc_gamma_table[GAMMA_MAX + 1][GAMMA_ENTRIES];
 extern const struct regmap_config isc_regmap_config;
 extern const struct v4l2_async_notifier_operations isc_async_ops;
 
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 61d9885765f47..e7156169febe4 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -54,6 +54,48 @@
 
 #define ISC_CLK_MAX_DIV		255
 
+/* Gamma table with gamma 1/2.2 */
+static const u32 isc_sama5d2_gamma_table[][GAMMA_ENTRIES] = {
+	/* 0 --> gamma 1/1.8 */
+	{      0x65,  0x66002F,  0x950025,  0xBB0020,  0xDB001D,  0xF8001A,
+	  0x1130018, 0x12B0017, 0x1420016, 0x1580014, 0x16D0013, 0x1810012,
+	  0x1940012, 0x1A60012, 0x1B80011, 0x1C90010, 0x1DA0010, 0x1EA000F,
+	  0x1FA000F, 0x209000F, 0x218000F, 0x227000E, 0x235000E, 0x243000E,
+	  0x251000E, 0x25F000D, 0x26C000D, 0x279000D, 0x286000D, 0x293000C,
+	  0x2A0000C, 0x2AC000C, 0x2B8000C, 0x2C4000C, 0x2D0000B, 0x2DC000B,
+	  0x2E7000B, 0x2F3000B, 0x2FE000B, 0x309000B, 0x314000B, 0x31F000A,
+	  0x32A000A, 0x334000B, 0x33F000A, 0x349000A, 0x354000A, 0x35E000A,
+	  0x368000A, 0x372000A, 0x37C000A, 0x386000A, 0x3900009, 0x399000A,
+	  0x3A30009, 0x3AD0009, 0x3B60009, 0x3BF000A, 0x3C90009, 0x3D20009,
+	  0x3DB0009, 0x3E40009, 0x3ED0009, 0x3F60009 },
+
+	/* 1 --> gamma 1/2 */
+	{      0x7F,  0x800034,  0xB50028,  0xDE0021, 0x100001E, 0x11E001B,
+	  0x1390019, 0x1520017, 0x16A0015, 0x1800014, 0x1940014, 0x1A80013,
+	  0x1BB0012, 0x1CD0011, 0x1DF0010, 0x1EF0010, 0x200000F, 0x20F000F,
+	  0x21F000E, 0x22D000F, 0x23C000E, 0x24A000E, 0x258000D, 0x265000D,
+	  0x273000C, 0x27F000D, 0x28C000C, 0x299000C, 0x2A5000C, 0x2B1000B,
+	  0x2BC000C, 0x2C8000B, 0x2D3000C, 0x2DF000B, 0x2EA000A, 0x2F5000A,
+	  0x2FF000B, 0x30A000A, 0x314000B, 0x31F000A, 0x329000A, 0x333000A,
+	  0x33D0009, 0x3470009, 0x350000A, 0x35A0009, 0x363000A, 0x36D0009,
+	  0x3760009, 0x37F0009, 0x3880009, 0x3910009, 0x39A0009, 0x3A30009,
+	  0x3AC0008, 0x3B40009, 0x3BD0008, 0x3C60008, 0x3CE0008, 0x3D60009,
+	  0x3DF0008, 0x3E70008, 0x3EF0008, 0x3F70008 },
+
+	/* 2 --> gamma 1/2.2 */
+	{      0x99,  0x9B0038,  0xD4002A,  0xFF0023, 0x122001F, 0x141001B,
+	  0x15D0019, 0x1760017, 0x18E0015, 0x1A30015, 0x1B80013, 0x1CC0012,
+	  0x1DE0011, 0x1F00010, 0x2010010, 0x2110010, 0x221000F, 0x230000F,
+	  0x23F000E, 0x24D000E, 0x25B000D, 0x269000C, 0x276000C, 0x283000C,
+	  0x28F000C, 0x29B000C, 0x2A7000C, 0x2B3000B, 0x2BF000B, 0x2CA000B,
+	  0x2D5000B, 0x2E0000A, 0x2EB000A, 0x2F5000A, 0x2FF000A, 0x30A000A,
+	  0x3140009, 0x31E0009, 0x327000A, 0x3310009, 0x33A0009, 0x3440009,
+	  0x34D0009, 0x3560009, 0x35F0009, 0x3680008, 0x3710008, 0x3790009,
+	  0x3820008, 0x38A0008, 0x3930008, 0x39B0008, 0x3A30008, 0x3AB0008,
+	  0x3B30008, 0x3BB0008, 0x3C30008, 0x3CB0007, 0x3D20008, 0x3DA0007,
+	  0x3E20007, 0x3E90007, 0x3F00008, 0x3F80007 },
+};
+
 static int isc_parse_dt(struct device *dev, struct isc_device *isc)
 {
 	struct device_node *np = dev->of_node;
@@ -150,6 +192,9 @@ static int atmel_isc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	isc->gamma_table = isc_sama5d2_gamma_table;
+	isc->gamma_max = 2;
+
 	ret = isc_pipeline_init(isc);
 	if (ret)
 		return ret;
-- 
GitLab


From d5475b3c901a007e74544e7704a1c2107dbcc115 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:00 +0200
Subject: [PATCH 2399/3804] media: atmel: atmel-isc: specialize driver name
 constant

The driver name constant must defined based on product driver, thus moving
the constant directly where it's required. This will allow each ISC based
product to define it's own name.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 4 ++--
 drivers/media/platform/atmel/atmel-isc.h         | 2 --
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 4 ++--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 46d384332a581..d987a8891bd9e 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -909,7 +909,7 @@ static int isc_querycap(struct file *file, void *priv,
 {
 	struct isc_device *isc = video_drvdata(file);
 
-	strscpy(cap->driver, ATMEL_ISC_NAME, sizeof(cap->driver));
+	strscpy(cap->driver, "microchip-isc", sizeof(cap->driver));
 	strscpy(cap->card, "Atmel Image Sensor Controller", sizeof(cap->card));
 	snprintf(cap->bus_info, sizeof(cap->bus_info),
 		 "platform:%s", isc->v4l2_dev.name);
@@ -2261,7 +2261,7 @@ static int isc_async_complete(struct v4l2_async_notifier *notifier)
 	}
 
 	/* Register video device */
-	strscpy(vdev->name, ATMEL_ISC_NAME, sizeof(vdev->name));
+	strscpy(vdev->name, "microchip-isc", sizeof(vdev->name));
 	vdev->release		= video_device_release_empty;
 	vdev->fops		= &isc_fops;
 	vdev->ioctl_ops		= &isc_ioctl_ops;
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index f1df47a4655bc..8d81d9967ad23 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -256,8 +256,6 @@ struct isc_device {
 	u32		gamma_max;
 };
 
-#define ATMEL_ISC_NAME "atmel-isc"
-
 extern struct isc_format formats_list[];
 extern const struct isc_format controller_formats[];
 extern const struct regmap_config isc_regmap_config;
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index e7156169febe4..7e94db04a7962 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -185,7 +185,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 		return irq;
 
 	ret = devm_request_irq(dev, irq, isc_interrupt, 0,
-			       ATMEL_ISC_NAME, isc);
+			       "atmel-sama5d2-isc", isc);
 	if (ret < 0) {
 		dev_err(dev, "can't register ISR for IRQ %u (ret=%i)\n",
 			irq, ret);
@@ -364,7 +364,7 @@ static struct platform_driver atmel_isc_driver = {
 	.probe	= atmel_isc_probe,
 	.remove	= atmel_isc_remove,
 	.driver	= {
-		.name		= ATMEL_ISC_NAME,
+		.name		= "atmel-sama5d2-isc",
 		.pm		= &atmel_isc_dev_pm_ops,
 		.of_match_table = of_match_ptr(atmel_isc_of_match),
 	},
-- 
GitLab


From 5122e8d15a0703b6d8bf6cb703536d29f8aa74cf Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:01 +0200
Subject: [PATCH 2400/3804] media: atmel: atmel-isc: add checks for limiting
 frame sizes

When calling the subdev, certain subdev drivers will overwrite the
frame size and adding sizes which are beyond the ISC's capabilities.
Thus we need to ensure the frame size is cropped to the maximum caps.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index d987a8891bd9e..02f1d1c6b06ef 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -1338,6 +1338,12 @@ static int isc_try_fmt(struct isc_device *isc, struct v4l2_format *f,
 
 	v4l2_fill_pix_format(pixfmt, &format.format);
 
+	/* Limit to Atmel ISC hardware capabilities */
+	if (pixfmt->width > ISC_MAX_SUPPORT_WIDTH)
+		pixfmt->width = ISC_MAX_SUPPORT_WIDTH;
+	if (pixfmt->height > ISC_MAX_SUPPORT_HEIGHT)
+		pixfmt->height = ISC_MAX_SUPPORT_HEIGHT;
+
 	pixfmt->field = V4L2_FIELD_NONE;
 	pixfmt->bytesperline = (pixfmt->width * isc->try_config.bpp) >> 3;
 	pixfmt->sizeimage = pixfmt->bytesperline * pixfmt->height;
@@ -1373,6 +1379,12 @@ static int isc_set_fmt(struct isc_device *isc, struct v4l2_format *f)
 	if (ret < 0)
 		return ret;
 
+	/* Limit to Atmel ISC hardware capabilities */
+	if (pixfmt->width > ISC_MAX_SUPPORT_WIDTH)
+		pixfmt->width = ISC_MAX_SUPPORT_WIDTH;
+	if (pixfmt->height > ISC_MAX_SUPPORT_HEIGHT)
+		pixfmt->height = ISC_MAX_SUPPORT_HEIGHT;
+
 	isc->fmt = *f;
 
 	if (isc->try_config.sd_format && isc->config.sd_format &&
-- 
GitLab


From f794bc16a52da70e015dca0093bba9afba7d1b6c Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:02 +0200
Subject: [PATCH 2401/3804] media: atmel: atmel-isc: specialize max width and
 max height

Move the max width and max height constants to the product specific driver
and have them in the device struct.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 28 +++++++++----------
 drivers/media/platform/atmel/atmel-isc.h      |  9 ++++--
 .../media/platform/atmel/atmel-sama5d2-isc.c  |  7 +++--
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 02f1d1c6b06ef..ed0048e79f3b4 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -1216,8 +1216,8 @@ static void isc_try_fse(struct isc_device *isc,
 	 * just use the maximum ISC can receive.
 	 */
 	if (ret) {
-		pad_cfg->try_crop.width = ISC_MAX_SUPPORT_WIDTH;
-		pad_cfg->try_crop.height = ISC_MAX_SUPPORT_HEIGHT;
+		pad_cfg->try_crop.width = isc->max_width;
+		pad_cfg->try_crop.height = isc->max_height;
 	} else {
 		pad_cfg->try_crop.width = fse.max_width;
 		pad_cfg->try_crop.height = fse.max_height;
@@ -1294,10 +1294,10 @@ static int isc_try_fmt(struct isc_device *isc, struct v4l2_format *f,
 	isc->try_config.sd_format = sd_fmt;
 
 	/* Limit to Atmel ISC hardware capabilities */
-	if (pixfmt->width > ISC_MAX_SUPPORT_WIDTH)
-		pixfmt->width = ISC_MAX_SUPPORT_WIDTH;
-	if (pixfmt->height > ISC_MAX_SUPPORT_HEIGHT)
-		pixfmt->height = ISC_MAX_SUPPORT_HEIGHT;
+	if (pixfmt->width > isc->max_width)
+		pixfmt->width = isc->max_width;
+	if (pixfmt->height > isc->max_height)
+		pixfmt->height = isc->max_height;
 
 	/*
 	 * The mbus format is the one the subdev outputs.
@@ -1339,10 +1339,10 @@ static int isc_try_fmt(struct isc_device *isc, struct v4l2_format *f,
 	v4l2_fill_pix_format(pixfmt, &format.format);
 
 	/* Limit to Atmel ISC hardware capabilities */
-	if (pixfmt->width > ISC_MAX_SUPPORT_WIDTH)
-		pixfmt->width = ISC_MAX_SUPPORT_WIDTH;
-	if (pixfmt->height > ISC_MAX_SUPPORT_HEIGHT)
-		pixfmt->height = ISC_MAX_SUPPORT_HEIGHT;
+	if (pixfmt->width > isc->max_width)
+		pixfmt->width = isc->max_width;
+	if (pixfmt->height > isc->max_height)
+		pixfmt->height = isc->max_height;
 
 	pixfmt->field = V4L2_FIELD_NONE;
 	pixfmt->bytesperline = (pixfmt->width * isc->try_config.bpp) >> 3;
@@ -1380,10 +1380,10 @@ static int isc_set_fmt(struct isc_device *isc, struct v4l2_format *f)
 		return ret;
 
 	/* Limit to Atmel ISC hardware capabilities */
-	if (pixfmt->width > ISC_MAX_SUPPORT_WIDTH)
-		pixfmt->width = ISC_MAX_SUPPORT_WIDTH;
-	if (pixfmt->height > ISC_MAX_SUPPORT_HEIGHT)
-		pixfmt->height = ISC_MAX_SUPPORT_HEIGHT;
+	if (f->fmt.pix.width > isc->max_width)
+		f->fmt.pix.width = isc->max_width;
+	if (f->fmt.pix.height > isc->max_height)
+		f->fmt.pix.height = isc->max_height;
 
 	isc->fmt = *f;
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index 8d81d9967ad23..6becc6c3aaf03 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -10,9 +10,6 @@
  */
 #ifndef _ATMEL_ISC_H_
 
-#define ISC_MAX_SUPPORT_WIDTH   2592
-#define ISC_MAX_SUPPORT_HEIGHT  1944
-
 #define ISC_CLK_MAX_DIV		255
 
 enum isc_clk_id {
@@ -191,6 +188,9 @@ struct isc_ctrls {
  * @gamma_table:	pointer to the table with gamma values, has
  *			gamma_max sets of GAMMA_ENTRIES entries each
  * @gamma_max:		maximum number of sets of inside the gamma_table
+ *
+ * @max_width:		maximum frame width, dependent on the internal RAM
+ * @max_height:		maximum frame height, dependent on the internal RAM
  */
 struct isc_device {
 	struct regmap		*regmap;
@@ -254,6 +254,9 @@ struct isc_device {
 	/* pointer to the defined gamma table */
 	const u32	(*gamma_table)[GAMMA_ENTRIES];
 	u32		gamma_max;
+
+	u32		max_width;
+	u32		max_height;
 };
 
 extern struct isc_format formats_list[];
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 7e94db04a7962..72112e0256906 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -49,8 +49,8 @@
 #include "atmel-isc-regs.h"
 #include "atmel-isc.h"
 
-#define ISC_MAX_SUPPORT_WIDTH   2592
-#define ISC_MAX_SUPPORT_HEIGHT  1944
+#define ISC_SAMA5D2_MAX_SUPPORT_WIDTH   2592
+#define ISC_SAMA5D2_MAX_SUPPORT_HEIGHT  1944
 
 #define ISC_CLK_MAX_DIV		255
 
@@ -195,6 +195,9 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->gamma_table = isc_sama5d2_gamma_table;
 	isc->gamma_max = 2;
 
+	isc->max_width = ISC_SAMA5D2_MAX_SUPPORT_WIDTH;
+	isc->max_height = ISC_SAMA5D2_MAX_SUPPORT_HEIGHT;
+
 	ret = isc_pipeline_init(isc);
 	if (ret)
 		return ret;
-- 
GitLab


From cd5af39467bdc768387d841186a71bb2d947b29c Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:03 +0200
Subject: [PATCH 2402/3804] media: atmel: atmel-isc: specialize dma cfg

The dma configuration (DCFG) is specific to the product.
Move this configuration in the product specific driver, and add the
field inside the driver struct.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 3 +--
 drivers/media/platform/atmel/atmel-isc.h         | 2 ++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 3 +++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index ed0048e79f3b4..07ba439eb7e90 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -724,8 +724,7 @@ static int isc_configure(struct isc_device *isc)
 	rlp_mode = isc->config.rlp_cfg_mode;
 	pipeline = isc->config.bits_pipeline;
 
-	dcfg = isc->config.dcfg_imode |
-		       ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
+	dcfg = isc->config.dcfg_imode | isc->dcfg;
 
 	pfe_cfg0  |= subdev->pfe_cfg0 | ISC_PFE_CFG0_MODE_PROGRESSIVE;
 	mask = ISC_PFE_CFG0_BPS_MASK | ISC_PFE_CFG0_HPOL_LOW |
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index 6becc6c3aaf03..d14ae096fbf65 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -150,6 +150,7 @@ struct isc_ctrls {
  * @hclock:		Hclock clock input (refer datasheet)
  * @ispck:		iscpck clock (refer datasheet)
  * @isc_clks:		ISC clocks
+ * @dcfg:		DMA master configuration, architecture dependent
  *
  * @dev:		Registered device driver
  * @v4l2_dev:		v4l2 registered device
@@ -197,6 +198,7 @@ struct isc_device {
 	struct clk		*hclock;
 	struct clk		*ispck;
 	struct isc_clk		isc_clks[2];
+	u32			dcfg;
 
 	struct device		*dev;
 	struct v4l2_device	v4l2_dev;
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 72112e0256906..4fa5e86f42441 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -198,6 +198,9 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->max_width = ISC_SAMA5D2_MAX_SUPPORT_WIDTH;
 	isc->max_height = ISC_SAMA5D2_MAX_SUPPORT_HEIGHT;
 
+	/* sama5d2-isc - 8 bits per beat */
+	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
+
 	ret = isc_pipeline_init(isc);
 	if (ret)
 		return ret;
-- 
GitLab


From 2ede3975c0a8530663de38b485abbaa18ede0bad Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:04 +0200
Subject: [PATCH 2403/3804] media: atmel: atmel-isc: extract CSC submodule
 config into separate function

The CSC submodule is a part of the atmel-isc pipeline, and stands for
Color Space Conversion. It is used to apply a matrix transformation to
RGB pixels to convert them to the YUV components.
The CSC submodule should be initialized in the product specific driver
as it's product specific. Other products can implement it differently.

[hverkuil: made isc_sama5d2_config_csc static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    |  8 +-------
 drivers/media/platform/atmel/atmel-isc.h         |  7 +++++++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 15 +++++++++++++++
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 07ba439eb7e90..6c709f6a408cf 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -654,13 +654,7 @@ static void isc_set_pipeline(struct isc_device *isc, u32 pipeline)
 	regmap_bulk_write(regmap, ISC_GAM_GENTRY, gamma, GAMMA_ENTRIES);
 	regmap_bulk_write(regmap, ISC_GAM_RENTRY, gamma, GAMMA_ENTRIES);
 
-	/* Convert RGB to YUV */
-	regmap_write(regmap, ISC_CSC_YR_YG, 0x42 | (0x81 << 16));
-	regmap_write(regmap, ISC_CSC_YB_OY, 0x19 | (0x10 << 16));
-	regmap_write(regmap, ISC_CSC_CBR_CBG, 0xFDA | (0xFB6 << 16));
-	regmap_write(regmap, ISC_CSC_CBB_OCB, 0x70 | (0x80 << 16));
-	regmap_write(regmap, ISC_CSC_CRR_CRG, 0x70 | (0xFA2 << 16));
-	regmap_write(regmap, ISC_CSC_CRB_OCR, 0xFEE | (0x80 << 16));
+	isc->config_csc(isc);
 
 	regmap_write(regmap, ISC_CBC_BRIGHT, ctrls->brightness);
 	regmap_write(regmap, ISC_CBC_CONTRAST, ctrls->contrast);
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index d14ae096fbf65..bb0b4419deff4 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -192,6 +192,9 @@ struct isc_ctrls {
  *
  * @max_width:		maximum frame width, dependent on the internal RAM
  * @max_height:		maximum frame height, dependent on the internal RAM
+ *
+ * @config_csc:		pointer to a function that initializes product
+ *			specific CSC module
  */
 struct isc_device {
 	struct regmap		*regmap;
@@ -259,6 +262,10 @@ struct isc_device {
 
 	u32		max_width;
 	u32		max_height;
+
+	struct {
+		void (*config_csc)(struct isc_device *isc);
+	};
 };
 
 extern struct isc_format formats_list[];
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 4fa5e86f42441..9ea1cec7bdae1 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -54,6 +54,19 @@
 
 #define ISC_CLK_MAX_DIV		255
 
+static void isc_sama5d2_config_csc(struct isc_device *isc)
+{
+	struct regmap *regmap = isc->regmap;
+
+	/* Convert RGB to YUV */
+	regmap_write(regmap, ISC_CSC_YR_YG, 0x42 | (0x81 << 16));
+	regmap_write(regmap, ISC_CSC_YB_OY, 0x19 | (0x10 << 16));
+	regmap_write(regmap, ISC_CSC_CBR_CBG, 0xFDA | (0xFB6 << 16));
+	regmap_write(regmap, ISC_CSC_CBB_OCB, 0x70 | (0x80 << 16));
+	regmap_write(regmap, ISC_CSC_CRR_CRG, 0x70 | (0xFA2 << 16));
+	regmap_write(regmap, ISC_CSC_CRB_OCR, 0xFEE | (0x80 << 16));
+}
+
 /* Gamma table with gamma 1/2.2 */
 static const u32 isc_sama5d2_gamma_table[][GAMMA_ENTRIES] = {
 	/* 0 --> gamma 1/1.8 */
@@ -198,6 +211,8 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->max_width = ISC_SAMA5D2_MAX_SUPPORT_WIDTH;
 	isc->max_height = ISC_SAMA5D2_MAX_SUPPORT_HEIGHT;
 
+	isc->config_csc = isc_sama5d2_config_csc;
+
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
 
-- 
GitLab


From c59744de8a536130eba7916a010bba00bccee74b Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:05 +0200
Subject: [PATCH 2404/3804] media: atmel: atmel-isc-base: add id to clock debug
 message

Add the clock id to the debug message regarding clock setup

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 6c709f6a408cf..f9190fccb4827 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -281,8 +281,8 @@ static int isc_clk_enable(struct clk_hw *hw)
 	unsigned long flags;
 	unsigned int status;
 
-	dev_dbg(isc_clk->dev, "ISC CLK: %s, div = %d, parent id = %d\n",
-		__func__, isc_clk->div, isc_clk->parent_id);
+	dev_dbg(isc_clk->dev, "ISC CLK: %s, id = %d, div = %d, parent id = %d\n",
+		__func__, id, isc_clk->div, isc_clk->parent_id);
 
 	spin_lock_irqsave(&isc_clk->lock, flags);
 	regmap_update_bits(regmap, ISC_CLKCFG,
-- 
GitLab


From ffeeb01d11397bdeac0f5a1e1462eba440c23dc3 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:06 +0200
Subject: [PATCH 2405/3804] media: atmel: atmel-isc: create register offsets
 struct

Create a struct that holds register offsets that are product specific.
Add initially the CSC register.
This allows each product that contains a variant of the ISC to add their
own register offset.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c |  2 +-
 drivers/media/platform/atmel/atmel-isc-regs.h |  3 +++
 drivers/media/platform/atmel/atmel-isc.h      | 12 +++++++++++
 .../media/platform/atmel/atmel-sama5d2-isc.c  | 20 +++++++++++++------
 4 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index f9190fccb4827..18136e58a7548 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -2326,7 +2326,7 @@ int isc_pipeline_init(struct isc_device *isc)
 		REG_FIELD(ISC_GAM_CTRL, 1, 1),
 		REG_FIELD(ISC_GAM_CTRL, 2, 2),
 		REG_FIELD(ISC_GAM_CTRL, 3, 3),
-		REG_FIELD(ISC_CSC_CTRL, 0, 0),
+		REG_FIELD(ISC_CSC_CTRL + isc->offsets.csc, 0, 0),
 		REG_FIELD(ISC_CBC_CTRL, 0, 0),
 		REG_FIELD(ISC_SUB422_CTRL, 0, 0),
 		REG_FIELD(ISC_SUB420_CTRL, 0, 0),
diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index f1e160ed43512..5a65600c5f883 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -153,6 +153,9 @@
 /* ISC_Gamma Correction Green Entry Register */
 #define ISC_GAM_RENTRY	0x00000298
 
+/* Offset for CSC register specific to sama5d2 product */
+#define ISC_SAMA5D2_CSC_OFFSET	0
+
 /* Color Space Conversion Control Register */
 #define ISC_CSC_CTRL    0x00000398
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index bb0b4419deff4..ef3a0451192d6 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -144,6 +144,14 @@ struct isc_ctrls {
 
 #define ISC_PIPE_LINE_NODE_NUM	11
 
+/*
+ * struct isc_reg_offsets - ISC device register offsets
+ * @csc:		Offset for the CSC register
+ */
+struct isc_reg_offsets {
+	u32 csc;
+};
+
 /*
  * struct isc_device - ISC device driver data/config struct
  * @regmap:		Register map
@@ -195,6 +203,8 @@ struct isc_ctrls {
  *
  * @config_csc:		pointer to a function that initializes product
  *			specific CSC module
+ *
+ * @offsets:		struct holding the product specific register offsets
  */
 struct isc_device {
 	struct regmap		*regmap;
@@ -266,6 +276,8 @@ struct isc_device {
 	struct {
 		void (*config_csc)(struct isc_device *isc);
 	};
+
+	struct isc_reg_offsets		offsets;
 };
 
 extern struct isc_format formats_list[];
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 9ea1cec7bdae1..607b9e306f9db 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -59,12 +59,18 @@ static void isc_sama5d2_config_csc(struct isc_device *isc)
 	struct regmap *regmap = isc->regmap;
 
 	/* Convert RGB to YUV */
-	regmap_write(regmap, ISC_CSC_YR_YG, 0x42 | (0x81 << 16));
-	regmap_write(regmap, ISC_CSC_YB_OY, 0x19 | (0x10 << 16));
-	regmap_write(regmap, ISC_CSC_CBR_CBG, 0xFDA | (0xFB6 << 16));
-	regmap_write(regmap, ISC_CSC_CBB_OCB, 0x70 | (0x80 << 16));
-	regmap_write(regmap, ISC_CSC_CRR_CRG, 0x70 | (0xFA2 << 16));
-	regmap_write(regmap, ISC_CSC_CRB_OCR, 0xFEE | (0x80 << 16));
+	regmap_write(regmap, ISC_CSC_YR_YG + isc->offsets.csc,
+		     0x42 | (0x81 << 16));
+	regmap_write(regmap, ISC_CSC_YB_OY + isc->offsets.csc,
+		     0x19 | (0x10 << 16));
+	regmap_write(regmap, ISC_CSC_CBR_CBG + isc->offsets.csc,
+		     0xFDA | (0xFB6 << 16));
+	regmap_write(regmap, ISC_CSC_CBB_OCB + isc->offsets.csc,
+		     0x70 | (0x80 << 16));
+	regmap_write(regmap, ISC_CSC_CRR_CRG + isc->offsets.csc,
+		     0x70 | (0xFA2 << 16));
+	regmap_write(regmap, ISC_CSC_CRB_OCR + isc->offsets.csc,
+		     0xFEE | (0x80 << 16));
 }
 
 /* Gamma table with gamma 1/2.2 */
@@ -213,6 +219,8 @@ static int atmel_isc_probe(struct platform_device *pdev)
 
 	isc->config_csc = isc_sama5d2_config_csc;
 
+	isc->offsets.csc = ISC_SAMA5D2_CSC_OFFSET;
+
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
 
-- 
GitLab


From d3b2ee5478c8569d32c4726c5920b96a9855419c Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:07 +0200
Subject: [PATCH 2406/3804] media: atmel: atmel-isc: extract CBC submodule
 config into separate function

The CBC submodule is a part of the atmel-isc pipeline, and stands for
Contrast Brightness Control. It is used to apply gains and offsets to the
luma (Y) and chroma (U, V) components of the YUV elements.
The CBC submodule should be initialized in the product specific driver
as it's product specific. Other products can implement it differently

[hverkuil: made isc_sama5d2_config_cbc static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 4 +---
 drivers/media/platform/atmel/atmel-isc.h         | 3 +++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 9 +++++++++
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 18136e58a7548..865410e10e70a 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -655,9 +655,7 @@ static void isc_set_pipeline(struct isc_device *isc, u32 pipeline)
 	regmap_bulk_write(regmap, ISC_GAM_RENTRY, gamma, GAMMA_ENTRIES);
 
 	isc->config_csc(isc);
-
-	regmap_write(regmap, ISC_CBC_BRIGHT, ctrls->brightness);
-	regmap_write(regmap, ISC_CBC_CONTRAST, ctrls->contrast);
+	isc->config_cbc(isc);
 }
 
 static int isc_update_profile(struct isc_device *isc)
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index ef3a0451192d6..cb47932197b15 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -203,6 +203,8 @@ struct isc_reg_offsets {
  *
  * @config_csc:		pointer to a function that initializes product
  *			specific CSC module
+ * @config_cbc:		pointer to a function that initializes product
+ *			specific CBC module
  *
  * @offsets:		struct holding the product specific register offsets
  */
@@ -275,6 +277,7 @@ struct isc_device {
 
 	struct {
 		void (*config_csc)(struct isc_device *isc);
+		void (*config_cbc)(struct isc_device *isc);
 	};
 
 	struct isc_reg_offsets		offsets;
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 607b9e306f9db..b5f654f263cc6 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -73,6 +73,14 @@ static void isc_sama5d2_config_csc(struct isc_device *isc)
 		     0xFEE | (0x80 << 16));
 }
 
+static void isc_sama5d2_config_cbc(struct isc_device *isc)
+{
+	struct regmap *regmap = isc->regmap;
+
+	regmap_write(regmap, ISC_CBC_BRIGHT, isc->ctrls.brightness);
+	regmap_write(regmap, ISC_CBC_CONTRAST, isc->ctrls.contrast);
+}
+
 /* Gamma table with gamma 1/2.2 */
 static const u32 isc_sama5d2_gamma_table[][GAMMA_ENTRIES] = {
 	/* 0 --> gamma 1/1.8 */
@@ -218,6 +226,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->max_height = ISC_SAMA5D2_MAX_SUPPORT_HEIGHT;
 
 	isc->config_csc = isc_sama5d2_config_csc;
+	isc->config_cbc = isc_sama5d2_config_cbc;
 
 	isc->offsets.csc = ISC_SAMA5D2_CSC_OFFSET;
 
-- 
GitLab


From 4fc9e8a775d4b3630d1bab6ad58a02dae943787f Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:08 +0200
Subject: [PATCH 2407/3804] media: atmel: atmel-isc: add CBC to the reg offsets
 struct

The CBC submodule is a part of the atmel-isc pipeline, and stands for
Contrast Brightness Control. It is used to apply gains and offsets to the
luma (Y) and chroma (U, V) components of the YUV elements.
Add cbc to the reg offsets struct. This will allow different products
to have a different reg offset for this particular module.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 2 +-
 drivers/media/platform/atmel/atmel-isc-regs.h    | 3 +++
 drivers/media/platform/atmel/atmel-isc.h         | 2 ++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 7 +++++--
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 865410e10e70a..b7728914fda8f 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -2325,7 +2325,7 @@ int isc_pipeline_init(struct isc_device *isc)
 		REG_FIELD(ISC_GAM_CTRL, 2, 2),
 		REG_FIELD(ISC_GAM_CTRL, 3, 3),
 		REG_FIELD(ISC_CSC_CTRL + isc->offsets.csc, 0, 0),
-		REG_FIELD(ISC_CBC_CTRL, 0, 0),
+		REG_FIELD(ISC_CBC_CTRL + isc->offsets.cbc, 0, 0),
 		REG_FIELD(ISC_SUB422_CTRL, 0, 0),
 		REG_FIELD(ISC_SUB420_CTRL, 0, 0),
 	};
diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index 5a65600c5f883..a5e2fe01ba9fd 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -177,6 +177,9 @@
 /* Color Space Conversion CRB OCR Register */
 #define ISC_CSC_CRB_OCR	0x000003b0
 
+/* Offset for CBC register specific to sama5d2 product */
+#define ISC_SAMA5D2_CBC_OFFSET	0
+
 /* Contrast And Brightness Control Register */
 #define ISC_CBC_CTRL    0x000003b4
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index cb47932197b15..b1fe93c93c613 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -147,9 +147,11 @@ struct isc_ctrls {
 /*
  * struct isc_reg_offsets - ISC device register offsets
  * @csc:		Offset for the CSC register
+ * @cbc:		Offset for the CBC register
  */
 struct isc_reg_offsets {
 	u32 csc;
+	u32 cbc;
 };
 
 /*
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index b5f654f263cc6..657d50ae9fa81 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -77,8 +77,10 @@ static void isc_sama5d2_config_cbc(struct isc_device *isc)
 {
 	struct regmap *regmap = isc->regmap;
 
-	regmap_write(regmap, ISC_CBC_BRIGHT, isc->ctrls.brightness);
-	regmap_write(regmap, ISC_CBC_CONTRAST, isc->ctrls.contrast);
+	regmap_write(regmap, ISC_CBC_BRIGHT + isc->offsets.cbc,
+		     isc->ctrls.brightness);
+	regmap_write(regmap, ISC_CBC_CONTRAST + isc->offsets.cbc,
+		     isc->ctrls.contrast);
 }
 
 /* Gamma table with gamma 1/2.2 */
@@ -229,6 +231,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->config_cbc = isc_sama5d2_config_cbc;
 
 	isc->offsets.csc = ISC_SAMA5D2_CSC_OFFSET;
+	isc->offsets.cbc = ISC_SAMA5D2_CBC_OFFSET;
 
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
-- 
GitLab


From 87b62b6d55dd78597b95f3df8111e3d533357b89 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:09 +0200
Subject: [PATCH 2408/3804] media: atmel: atmel-isc: add SUB422 and SUB420 to
 register offsets

The SUB submodules are a part of the atmel-isc pipeline, and stand for
Subsampling. They are used to subsample the original YUV 4:4:4 pixel ratio
aspect to either 4:2:2 or 4:2:0.
Add sub420 and sub422 to the reg offsets struct.
This will allow different products to have a different reg offset for these
particular modules.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 4 ++--
 drivers/media/platform/atmel/atmel-isc-regs.h    | 4 ++++
 drivers/media/platform/atmel/atmel-isc.h         | 4 ++++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 2 ++
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index b7728914fda8f..b398cdfdc2c98 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -2326,8 +2326,8 @@ int isc_pipeline_init(struct isc_device *isc)
 		REG_FIELD(ISC_GAM_CTRL, 3, 3),
 		REG_FIELD(ISC_CSC_CTRL + isc->offsets.csc, 0, 0),
 		REG_FIELD(ISC_CBC_CTRL + isc->offsets.cbc, 0, 0),
-		REG_FIELD(ISC_SUB422_CTRL, 0, 0),
-		REG_FIELD(ISC_SUB420_CTRL, 0, 0),
+		REG_FIELD(ISC_SUB422_CTRL + isc->offsets.sub422, 0, 0),
+		REG_FIELD(ISC_SUB420_CTRL + isc->offsets.sub420, 0, 0),
 	};
 
 	for (i = 0; i < ISC_PIPE_LINE_NODE_NUM; i++) {
diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index a5e2fe01ba9fd..04839def6ef66 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -194,9 +194,13 @@
 #define ISC_CBC_CONTRAST	0x000003c0
 #define ISC_CBC_CONTRAST_MASK	GENMASK(11, 0)
 
+/* Offset for SUB422 register specific to sama5d2 product */
+#define ISC_SAMA5D2_SUB422_OFFSET	0
 /* Subsampling 4:4:4 to 4:2:2 Control Register */
 #define ISC_SUB422_CTRL 0x000003c4
 
+/* Offset for SUB420 register specific to sama5d2 product */
+#define ISC_SAMA5D2_SUB420_OFFSET	0
 /* Subsampling 4:2:2 to 4:2:0 Control Register */
 #define ISC_SUB420_CTRL 0x000003cc
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index b1fe93c93c613..fb7257872e7cc 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -148,10 +148,14 @@ struct isc_ctrls {
  * struct isc_reg_offsets - ISC device register offsets
  * @csc:		Offset for the CSC register
  * @cbc:		Offset for the CBC register
+ * @sub422:		Offset for the SUB422 register
+ * @sub420:		Offset for the SUB420 register
  */
 struct isc_reg_offsets {
 	u32 csc;
 	u32 cbc;
+	u32 sub422;
+	u32 sub420;
 };
 
 /*
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 657d50ae9fa81..8211ee9bd8b99 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -232,6 +232,8 @@ static int atmel_isc_probe(struct platform_device *pdev)
 
 	isc->offsets.csc = ISC_SAMA5D2_CSC_OFFSET;
 	isc->offsets.cbc = ISC_SAMA5D2_CBC_OFFSET;
+	isc->offsets.sub422 = ISC_SAMA5D2_SUB422_OFFSET;
+	isc->offsets.sub420 = ISC_SAMA5D2_SUB420_OFFSET;
 
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
-- 
GitLab


From 40ee17d1b41ccc8c65f831d37008e25d3ae03646 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:10 +0200
Subject: [PATCH 2409/3804] media: atmel: atmel-isc: add RLP to register
 offsets

The RLP submodule is a part of the atmel-isc pipeline, and stands for
Rounding,Limiting and Packaging. It used to extract specific data from the
ISC pipeline. For example if we want to output greyscale 8 bit, we would
use limiting to 8 bits, and packaging to Luma component only.
Add rlp to the reg offsets struct.
This will allow different products to have a different reg offset for this
particular module.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 4 ++--
 drivers/media/platform/atmel/atmel-isc-regs.h    | 2 ++
 drivers/media/platform/atmel/atmel-isc.h         | 2 ++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 1 +
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index b398cdfdc2c98..25c90b821067c 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -726,8 +726,8 @@ static int isc_configure(struct isc_device *isc)
 
 	regmap_update_bits(regmap, ISC_PFE_CFG0, mask, pfe_cfg0);
 
-	regmap_update_bits(regmap, ISC_RLP_CFG, ISC_RLP_CFG_MODE_MASK,
-			   rlp_mode);
+	regmap_update_bits(regmap, ISC_RLP_CFG + isc->offsets.rlp,
+			   ISC_RLP_CFG_MODE_MASK, rlp_mode);
 
 	regmap_write(regmap, ISC_DCFG, dcfg);
 
diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index 04839def6ef66..2205484e04fc3 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -204,6 +204,8 @@
 /* Subsampling 4:2:2 to 4:2:0 Control Register */
 #define ISC_SUB420_CTRL 0x000003cc
 
+/* Offset for RLP register specific to sama5d2 product */
+#define ISC_SAMA5D2_RLP_OFFSET	0
 /* Rounding, Limiting and Packing Configuration Register */
 #define ISC_RLP_CFG     0x000003d0
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index fb7257872e7cc..b7d4e7fab5705 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -150,12 +150,14 @@ struct isc_ctrls {
  * @cbc:		Offset for the CBC register
  * @sub422:		Offset for the SUB422 register
  * @sub420:		Offset for the SUB420 register
+ * @rlp:		Offset for the RLP register
  */
 struct isc_reg_offsets {
 	u32 csc;
 	u32 cbc;
 	u32 sub422;
 	u32 sub420;
+	u32 rlp;
 };
 
 /*
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 8211ee9bd8b99..06257806c8ae7 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -234,6 +234,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->offsets.cbc = ISC_SAMA5D2_CBC_OFFSET;
 	isc->offsets.sub422 = ISC_SAMA5D2_SUB422_OFFSET;
 	isc->offsets.sub420 = ISC_SAMA5D2_SUB420_OFFSET;
+	isc->offsets.rlp = ISC_SAMA5D2_RLP_OFFSET;
 
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
-- 
GitLab


From 1a3ac5d51541b6a816380bafd7c3e240ff9542d9 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:11 +0200
Subject: [PATCH 2410/3804] media: atmel: atmel-isc: add HIS to register
 offsets

The HIS submodule is a part of the atmel-isc pipeline, and stands for
Histogram. This module performs a color histogram that can be read and used
by the main processor.
Add his to the reg offsets struct.
This will allow different products to have a different reg offset for this
particular module.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 11 +++++++----
 drivers/media/platform/atmel/atmel-isc-regs.h    |  2 ++
 drivers/media/platform/atmel/atmel-isc.h         |  2 ++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c |  1 +
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 25c90b821067c..5c95aa45cf6ca 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -686,12 +686,13 @@ static void isc_set_histogram(struct isc_device *isc, bool enable)
 	struct isc_ctrls *ctrls = &isc->ctrls;
 
 	if (enable) {
-		regmap_write(regmap, ISC_HIS_CFG,
+		regmap_write(regmap, ISC_HIS_CFG + isc->offsets.his,
 			     ISC_HIS_CFG_MODE_GR |
 			     (isc->config.sd_format->cfa_baycfg
 					<< ISC_HIS_CFG_BAYSEL_SHIFT) |
 					ISC_HIS_CFG_RAR);
-		regmap_write(regmap, ISC_HIS_CTRL, ISC_HIS_CTRL_EN);
+		regmap_write(regmap, ISC_HIS_CTRL + isc->offsets.his,
+			     ISC_HIS_CTRL_EN);
 		regmap_write(regmap, ISC_INTEN, ISC_INT_HISDONE);
 		ctrls->hist_id = ISC_HIS_CFG_MODE_GR;
 		isc_update_profile(isc);
@@ -700,7 +701,8 @@ static void isc_set_histogram(struct isc_device *isc, bool enable)
 		ctrls->hist_stat = HIST_ENABLED;
 	} else {
 		regmap_write(regmap, ISC_INTDIS, ISC_INT_HISDONE);
-		regmap_write(regmap, ISC_HIS_CTRL, ISC_HIS_CTRL_DIS);
+		regmap_write(regmap, ISC_HIS_CTRL + isc->offsets.his,
+			     ISC_HIS_CTRL_DIS);
 
 		ctrls->hist_stat = HIST_DISABLED;
 	}
@@ -1836,7 +1838,8 @@ static void isc_awb_work(struct work_struct *w)
 			ctrls->awb = ISC_WB_NONE;
 		}
 	}
-	regmap_write(regmap, ISC_HIS_CFG, hist_id | baysel | ISC_HIS_CFG_RAR);
+	regmap_write(regmap, ISC_HIS_CFG + isc->offsets.his,
+		     hist_id | baysel | ISC_HIS_CFG_RAR);
 	isc_update_profile(isc);
 	/* if awb has been disabled, we don't need to start another histogram */
 	if (ctrls->awb)
diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index 2205484e04fc3..0ab280ab59ecc 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -224,6 +224,8 @@
 #define ISC_RLP_CFG_MODE_YYCC_LIMITED   0xc
 #define ISC_RLP_CFG_MODE_MASK           GENMASK(3, 0)
 
+/* Offset for HIS register specific to sama5d2 product */
+#define ISC_SAMA5D2_HIS_OFFSET	0
 /* Histogram Control Register */
 #define ISC_HIS_CTRL	0x000003d4
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index b7d4e7fab5705..652285dc9f528 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -151,6 +151,7 @@ struct isc_ctrls {
  * @sub422:		Offset for the SUB422 register
  * @sub420:		Offset for the SUB420 register
  * @rlp:		Offset for the RLP register
+ * @his:		Offset for the HIS related registers
  */
 struct isc_reg_offsets {
 	u32 csc;
@@ -158,6 +159,7 @@ struct isc_reg_offsets {
 	u32 sub422;
 	u32 sub420;
 	u32 rlp;
+	u32 his;
 };
 
 /*
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 06257806c8ae7..77be62a070dbe 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -235,6 +235,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->offsets.sub422 = ISC_SAMA5D2_SUB422_OFFSET;
 	isc->offsets.sub420 = ISC_SAMA5D2_SUB420_OFFSET;
 	isc->offsets.rlp = ISC_SAMA5D2_RLP_OFFSET;
+	isc->offsets.his = ISC_SAMA5D2_HIS_OFFSET;
 
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
-- 
GitLab


From e891009857716e17129899fe6345e7968010917f Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:12 +0200
Subject: [PATCH 2411/3804] media: atmel: atmel-isc: add DMA to register
 offsets

The DMA submodule is a part of the atmel-isc pipeline, and stands for
Direct Memory Access. It acts like a master on the AXI bus of the SoC, and
can directly write the RAM area with the pixel data from the ISC internal
sram.
Add dma to the reg offsets struct.
This will allow different products to have a different reg offset for this
particular module.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 19 ++++++++++++-------
 drivers/media/platform/atmel/atmel-isc-regs.h |  3 +++
 drivers/media/platform/atmel/atmel-isc.h      |  2 ++
 .../media/platform/atmel/atmel-sama5d2-isc.c  |  1 +
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 5c95aa45cf6ca..e010429fc44de 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -601,16 +601,20 @@ static void isc_start_dma(struct isc_device *isc)
 			   ISC_PFE_CFG0_COLEN | ISC_PFE_CFG0_ROWEN);
 
 	addr0 = vb2_dma_contig_plane_dma_addr(&isc->cur_frm->vb.vb2_buf, 0);
-	regmap_write(regmap, ISC_DAD0, addr0);
+	regmap_write(regmap, ISC_DAD0 + isc->offsets.dma, addr0);
 
 	switch (isc->config.fourcc) {
 	case V4L2_PIX_FMT_YUV420:
-		regmap_write(regmap, ISC_DAD1, addr0 + (sizeimage * 2) / 3);
-		regmap_write(regmap, ISC_DAD2, addr0 + (sizeimage * 5) / 6);
+		regmap_write(regmap, ISC_DAD1 + isc->offsets.dma,
+			     addr0 + (sizeimage * 2) / 3);
+		regmap_write(regmap, ISC_DAD2 + isc->offsets.dma,
+			     addr0 + (sizeimage * 5) / 6);
 		break;
 	case V4L2_PIX_FMT_YUV422P:
-		regmap_write(regmap, ISC_DAD1, addr0 + sizeimage / 2);
-		regmap_write(regmap, ISC_DAD2, addr0 + (sizeimage * 3) / 4);
+		regmap_write(regmap, ISC_DAD1 + isc->offsets.dma,
+			     addr0 + sizeimage / 2);
+		regmap_write(regmap, ISC_DAD2 + isc->offsets.dma,
+			     addr0 + (sizeimage * 3) / 4);
 		break;
 	default:
 		break;
@@ -618,7 +622,8 @@ static void isc_start_dma(struct isc_device *isc)
 
 	dctrl_dview = isc->config.dctrl_dview;
 
-	regmap_write(regmap, ISC_DCTRL, dctrl_dview | ISC_DCTRL_IE_IS);
+	regmap_write(regmap, ISC_DCTRL + isc->offsets.dma,
+		     dctrl_dview | ISC_DCTRL_IE_IS);
 	spin_lock(&isc->awb_lock);
 	regmap_write(regmap, ISC_CTRLEN, ISC_CTRL_CAPTURE);
 	spin_unlock(&isc->awb_lock);
@@ -731,7 +736,7 @@ static int isc_configure(struct isc_device *isc)
 	regmap_update_bits(regmap, ISC_RLP_CFG + isc->offsets.rlp,
 			   ISC_RLP_CFG_MODE_MASK, rlp_mode);
 
-	regmap_write(regmap, ISC_DCFG, dcfg);
+	regmap_write(regmap, ISC_DCFG + isc->offsets.dma, dcfg);
 
 	/* Set the pipeline */
 	isc_set_pipeline(isc, pipeline);
diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index 0ab280ab59ecc..4940998c82a2c 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -247,6 +247,9 @@
 
 #define ISC_HIS_CFG_RAR			BIT(8)
 
+/* Offset for DMA register specific to sama5d2 product */
+#define ISC_SAMA5D2_DMA_OFFSET	0
+
 /* DMA Configuration Register */
 #define ISC_DCFG        0x000003e0
 #define ISC_DCFG_IMODE_PACKED8          0x0
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index 652285dc9f528..52a9199d65ee1 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -152,6 +152,7 @@ struct isc_ctrls {
  * @sub420:		Offset for the SUB420 register
  * @rlp:		Offset for the RLP register
  * @his:		Offset for the HIS related registers
+ * @dma:		Offset for the DMA related registers
  */
 struct isc_reg_offsets {
 	u32 csc;
@@ -160,6 +161,7 @@ struct isc_reg_offsets {
 	u32 sub420;
 	u32 rlp;
 	u32 his;
+	u32 dma;
 };
 
 /*
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 77be62a070dbe..0b31dcf1443b3 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -236,6 +236,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->offsets.sub420 = ISC_SAMA5D2_SUB420_OFFSET;
 	isc->offsets.rlp = ISC_SAMA5D2_RLP_OFFSET;
 	isc->offsets.his = ISC_SAMA5D2_HIS_OFFSET;
+	isc->offsets.dma = ISC_SAMA5D2_DMA_OFFSET;
 
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
-- 
GitLab


From d51470069eb169d54f2b14d07d613b69f62615b8 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:13 +0200
Subject: [PATCH 2412/3804] media: atmel: atmel-isc: add support for version
 register

Add support for version register and print it at probe time.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-regs.h    | 5 +++++
 drivers/media/platform/atmel/atmel-isc.h         | 2 ++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 5 +++++
 3 files changed, 12 insertions(+)

diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index 4940998c82a2c..344668dcfcf4c 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -295,6 +295,11 @@
 /* DMA Address 2 Register */
 #define ISC_DAD2        0x000003fc
 
+/* Offset for version register specific to sama5d2 product */
+#define ISC_SAMA5D2_VERSION_OFFSET	0
+/* Version Register */
+#define ISC_VERSION	0x0000040c
+
 /* Histogram Entry */
 #define ISC_HIS_ENTRY	0x00000410
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index 52a9199d65ee1..676a5be1ee8ca 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -153,6 +153,7 @@ struct isc_ctrls {
  * @rlp:		Offset for the RLP register
  * @his:		Offset for the HIS related registers
  * @dma:		Offset for the DMA related registers
+ * @version:		Offset for the version register
  */
 struct isc_reg_offsets {
 	u32 csc;
@@ -162,6 +163,7 @@ struct isc_reg_offsets {
 	u32 rlp;
 	u32 his;
 	u32 dma;
+	u32 version;
 };
 
 /*
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 0b31dcf1443b3..059dc5d3c6647 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -189,6 +189,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	struct isc_subdev_entity *subdev_entity;
 	int irq;
 	int ret;
+	u32 ver;
 
 	isc = devm_kzalloc(dev, sizeof(*isc), GFP_KERNEL);
 	if (!isc)
@@ -237,6 +238,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->offsets.rlp = ISC_SAMA5D2_RLP_OFFSET;
 	isc->offsets.his = ISC_SAMA5D2_HIS_OFFSET;
 	isc->offsets.dma = ISC_SAMA5D2_DMA_OFFSET;
+	isc->offsets.version = ISC_SAMA5D2_VERSION_OFFSET;
 
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
@@ -332,6 +334,9 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	pm_runtime_enable(dev);
 	pm_request_idle(dev);
 
+	regmap_read(isc->regmap, ISC_VERSION + isc->offsets.version, &ver);
+	dev_info(dev, "Microchip ISC version %x\n", ver);
+
 	return 0;
 
 cleanup_subdev:
-- 
GitLab


From 629de518e6f3b81bc1d7486a9b2e0a8fb100e18e Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:14 +0200
Subject: [PATCH 2413/3804] media: atmel: atmel-isc: add his_entry to register
 offsets

Add his_entry to the reg offsets struct.
This will allow different products to have a different reg offset for this
particular module.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 3 ++-
 drivers/media/platform/atmel/atmel-isc-regs.h    | 2 ++
 drivers/media/platform/atmel/atmel-isc.h         | 2 ++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 1 +
 4 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index e010429fc44de..cfe60b2882acf 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -1684,7 +1684,8 @@ static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max)
 	*min = 0;
 	*max = HIST_ENTRIES;
 
-	regmap_bulk_read(regmap, ISC_HIS_ENTRY, hist_entry, HIST_ENTRIES);
+	regmap_bulk_read(regmap, ISC_HIS_ENTRY + isc->offsets.his_entry,
+			 hist_entry, HIST_ENTRIES);
 
 	*hist_count = 0;
 	/*
diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index 344668dcfcf4c..a15c13e1a833f 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -300,6 +300,8 @@
 /* Version Register */
 #define ISC_VERSION	0x0000040c
 
+/* Offset for version register specific to sama5d2 product */
+#define ISC_SAMA5D2_HIS_ENTRY_OFFSET	0
 /* Histogram Entry */
 #define ISC_HIS_ENTRY	0x00000410
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index 676a5be1ee8ca..c5d956fd20a0d 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -154,6 +154,7 @@ struct isc_ctrls {
  * @his:		Offset for the HIS related registers
  * @dma:		Offset for the DMA related registers
  * @version:		Offset for the version register
+ * @his_entry:		Offset for the HIS entries registers
  */
 struct isc_reg_offsets {
 	u32 csc;
@@ -164,6 +165,7 @@ struct isc_reg_offsets {
 	u32 his;
 	u32 dma;
 	u32 version;
+	u32 his_entry;
 };
 
 /*
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 059dc5d3c6647..72cb91b4b8baa 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -239,6 +239,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->offsets.his = ISC_SAMA5D2_HIS_OFFSET;
 	isc->offsets.dma = ISC_SAMA5D2_DMA_OFFSET;
 	isc->offsets.version = ISC_SAMA5D2_VERSION_OFFSET;
+	isc->offsets.his_entry = ISC_SAMA5D2_HIS_ENTRY_OFFSET;
 
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
-- 
GitLab


From 2d2ddb589d5925ec7f2d1b17d88a2b36bf536105 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 28 May 2021 14:34:38 +0200
Subject: [PATCH 2414/3804] drm/ttm: fix deref of bo->ttm without holding the
 lock v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to grab the resv lock first before doing that check.

v2 (chk): simplify the change for -fixes

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210528130041.1683-1-christian.koenig@amd.com
---
 drivers/gpu/drm/ttm/ttm_bo.c     | 5 ++++-
 drivers/gpu/drm/ttm/ttm_device.c | 8 +-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index cfd0b92923973..ebcffe794adb8 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1172,7 +1172,10 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
 	if (!ttm_bo_evict_swapout_allowable(bo, ctx, &locked, NULL))
 		return -EBUSY;
 
-	if (!ttm_bo_get_unless_zero(bo)) {
+	if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) ||
+	    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
+	    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED ||
+	    !ttm_bo_get_unless_zero(bo)) {
 		if (locked)
 			dma_resv_unlock(bo->base.resv);
 		return -EBUSY;
diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index a1dcf7d55c903..3d9c62b93e299 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -143,14 +143,8 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 
 		for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
 			list_for_each_entry(bo, &man->lru[j], lru) {
-				uint32_t num_pages;
+				uint32_t num_pages = PFN_UP(bo->base.size);
 
-				if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) ||
-				    bo->ttm->page_flags & TTM_PAGE_FLAG_SG ||
-				    bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
-					continue;
-
-				num_pages = bo->ttm->num_pages;
 				ret = ttm_bo_swapout(bo, ctx, gfp_flags);
 				/* ttm_bo_swapout has dropped the lru_lock */
 				if (!ret)
-- 
GitLab


From 5ca54404e68de8560ca15e8d0e6b625fd05ceeaf Mon Sep 17 00:00:00 2001
From: ChenXiaoSong <chenxiaosong2@huawei.com>
Date: Tue, 8 Jun 2021 16:48:16 +0800
Subject: [PATCH 2415/3804] perf: qcom: Remove redundant dev_err call in
 qcom_l3_cache_pmu_probe()

There is a error message within devm_ioremap_resource
already, so remove the dev_err call to avoid redundant
error message.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
Link: https://lore.kernel.org/r/20210608084816.1046485-1-chenxiaosong2@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/qcom_l3_pmu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
index 081273543c6bc..c76f6f21d2a80 100644
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -767,10 +767,8 @@ static int qcom_l3_cache_pmu_probe(struct platform_device *pdev)
 
 	memrc = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	l3pmu->regs = devm_ioremap_resource(&pdev->dev, memrc);
-	if (IS_ERR(l3pmu->regs)) {
-		dev_err(&pdev->dev, "Can't map PMU @%pa\n", &memrc->start);
+	if (IS_ERR(l3pmu->regs))
 		return PTR_ERR(l3pmu->regs);
-	}
 
 	qcom_l3_cache__init(l3pmu);
 
-- 
GitLab


From 59d697a99daa4723b62f9b07f41191cca1e44f3f Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sun, 6 Jun 2021 00:15:14 +0200
Subject: [PATCH 2416/3804] perf/hisi: Constify static attribute_group structs

These are only put in an array of pointers to const attribute_group
structs. Make them const like the other static attribute_group structs
to allow the compiler to put them in read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Link: https://lore.kernel.org/r/20210605221514.73449-1-rikard.falkeborn@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/hisilicon/hisi_uncore_pa_pmu.c   | 2 +-
 drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
index e1f71eab56409..83264ec0a9573 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c
@@ -333,7 +333,7 @@ static struct attribute *hisi_pa_pmu_identifier_attrs[] = {
 	NULL
 };
 
-static struct attribute_group hisi_pa_pmu_identifier_group = {
+static const struct attribute_group hisi_pa_pmu_identifier_group = {
 	.attrs = hisi_pa_pmu_identifier_attrs,
 };
 
diff --git a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
index 08e028d9a4065..6aedc303ff56a 100644
--- a/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_sllc_pmu.c
@@ -366,7 +366,7 @@ static struct attribute *hisi_sllc_pmu_identifier_attrs[] = {
 	NULL
 };
 
-static struct attribute_group hisi_sllc_pmu_identifier_group = {
+static const struct attribute_group hisi_sllc_pmu_identifier_group = {
 	.attrs = hisi_sllc_pmu_identifier_attrs,
 };
 
-- 
GitLab


From 57c9e21a49b1c196cda28f54de9a5d556ac93f20 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Tue, 8 Jun 2021 10:46:00 +0800
Subject: [PATCH 2417/3804] ALSA: hda/realtek: headphone and mic don't work on
 an Acer laptop

There are 2 issues on this machine, the 1st one is mic's plug/unplug
can't be detected, that is because the mic is set to manual detecting
mode, need to apply ALC255_FIXUP_XIAOMI_HEADSET_MIC to set it to auto
detecting mode. The other one is headphone's plug/unplug can't be
detected by pulseaudio, that is because the pulseaudio will use
ucm2/sof-hda-dsp on this machine, and the ucm2 only handle
'Headphone Jack', but on this machine the headphone's pincfg sets the
location to Front, then the alsa mixer name is "Front Headphone Jack"
instead of "Headphone Jack", so override the pincfg to change location
to Left.

BugLink: http://bugs.launchpad.net/bugs/1930188
Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Link: https://lore.kernel.org/r/20210608024600.6198-1-hui.wang@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 215beb3ac678b..11ba8e351ad41 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6568,6 +6568,7 @@ enum {
 	ALC285_FIXUP_HP_SPECTRE_X360,
 	ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP,
 	ALC623_FIXUP_LENOVO_THINKSTATION_P340,
+	ALC255_FIXUP_ACER_HEADPHONE_AND_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -8146,6 +8147,15 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC283_FIXUP_HEADSET_MIC,
 	},
+	[ALC255_FIXUP_ACER_HEADPHONE_AND_MIC] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x21, 0x03211030 }, /* Change the Headphone location to Left */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC255_FIXUP_XIAOMI_HEADSET_MIC
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -8182,6 +8192,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC),
 	SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
 	SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
 	SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X),
@@ -8740,6 +8751,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 	{.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"},
 	{.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"},
 	{.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"},
+	{.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"},
 	{}
 };
 #define ALC225_STANDARD_PINS \
-- 
GitLab


From 600dd2a7e8b62170d177381cc1303861f48f9780 Mon Sep 17 00:00:00 2001
From: Jeremy Szu <jeremy.szu@canonical.com>
Date: Tue, 8 Jun 2021 19:47:48 +0800
Subject: [PATCH 2418/3804] ALSA: hda/realtek: fix mute/micmute LEDs for HP
 ZBook Power G8

The HP ZBook Power G8 using ALC236 codec which using 0x02 to
control mute LED and 0x01 to control micmute LED.
Therefore, add a quirk to make it works.

Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210608114750.32009-1-jeremy.szu@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 11ba8e351ad41..ab5113cccffae 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8346,6 +8346,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
 	SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+	SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
 	SND_PCI_QUIRK(0x103c, 0x8898, "HP EliteBook 845 G8 Notebook PC", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
-- 
GitLab


From 27f2a4db76e8d8a8b601fc1c6a7a17f88bd907ab Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 21 May 2021 18:26:24 -0700
Subject: [PATCH 2419/3804] Makefile: fix GDB warning with CONFIG_RELR

GDB produces the following warning when debugging kernels built with
CONFIG_RELR:

BFD: /android0/linux-next/vmlinux: unknown type [0x13] section `.relr.dyn'

when loading a kernel built with CONFIG_RELR into GDB. It can also
prevent debugging symbols using such relocations.

Peter sugguests:
  [That flag] means that lld will use dynamic tags and section type
  numbers in the OS-specific range rather than the generic range. The
  kernel itself doesn't care about these numbers; it determines the
  location of the RELR section using symbols defined by a linker script.

Link: https://github.com/ClangBuiltLinux/linux/issues/1057
Suggested-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/r/20210522012626.2811297-1-ndesaulniers@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Makefile                      | 2 +-
 scripts/tools-support-relr.sh | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index e4468353425a6..e38c74d0433c2 100644
--- a/Makefile
+++ b/Makefile
@@ -1031,7 +1031,7 @@ LDFLAGS_vmlinux	+= $(call ld-option, -X,)
 endif
 
 ifeq ($(CONFIG_RELR),y)
-LDFLAGS_vmlinux	+= --pack-dyn-relocs=relr
+LDFLAGS_vmlinux	+= --pack-dyn-relocs=relr --use-android-relr-tags
 endif
 
 # We never want expected sections to be placed heuristically by the
diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh
index 45e8aa360b457..cb55878bd5b81 100755
--- a/scripts/tools-support-relr.sh
+++ b/scripts/tools-support-relr.sh
@@ -7,7 +7,8 @@ trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT
 cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1
 void *p = &p;
 END
-$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
+$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr \
+  --use-android-relr-tags -o $tmp_file
 
 # Despite printing an error message, GNU nm still exits with exit code 0 if it
 # sees a relr section. So we need to check that nothing is printed to stderr.
-- 
GitLab


From a911e927443477d67f4c577bfb68b0d41680f4a0 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:15 +0200
Subject: [PATCH 2420/3804] media: atmel: atmel-isc: add register description
 for additional modules

Add register description for additional pipeline modules: the
Defective Pixel Correction (DPC) and the Vertical and Horizontal Scaler(VHXS)

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-regs.h | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index a15c13e1a833f..457eed74cda9f 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -90,6 +90,46 @@
 #define ISC_INT_DDONE		BIT(8)
 #define ISC_INT_HISDONE		BIT(12)
 
+/* ISC DPC Control Register */
+#define ISC_DPC_CTRL	0x40
+
+#define ISC_DPC_CTRL_DPCEN	BIT(0)
+#define ISC_DPC_CTRL_GDCEN	BIT(1)
+#define ISC_DPC_CTRL_BLCEN	BIT(2)
+
+/* ISC DPC Config Register */
+#define ISC_DPC_CFG	0x44
+
+#define ISC_DPC_CFG_BAYSEL_SHIFT	0
+
+#define ISC_DPC_CFG_EITPOL		BIT(4)
+
+#define ISC_DPC_CFG_TA_ENABLE		BIT(14)
+#define ISC_DPC_CFG_TC_ENABLE		BIT(13)
+#define ISC_DPC_CFG_TM_ENABLE		BIT(12)
+
+#define ISC_DPC_CFG_RE_MODE		BIT(17)
+
+#define ISC_DPC_CFG_GDCCLP_SHIFT	20
+#define ISC_DPC_CFG_GDCCLP_MASK		GENMASK(22, 20)
+
+#define ISC_DPC_CFG_BLOFF_SHIFT		24
+#define ISC_DPC_CFG_BLOFF_MASK		GENMASK(31, 24)
+
+#define ISC_DPC_CFG_BAYCFG_SHIFT	0
+#define ISC_DPC_CFG_BAYCFG_MASK		GENMASK(1, 0)
+/* ISC DPC Threshold Median Register */
+#define ISC_DPC_THRESHM	0x48
+
+/* ISC DPC Threshold Closest Register */
+#define ISC_DPC_THRESHC	0x4C
+
+/* ISC DPC Threshold Average Register */
+#define ISC_DPC_THRESHA	0x50
+
+/* ISC DPC STatus Register */
+#define ISC_DPC_SR	0x54
+
 /* ISC White Balance Control Register */
 #define ISC_WB_CTRL     0x00000058
 
@@ -153,6 +193,33 @@
 /* ISC_Gamma Correction Green Entry Register */
 #define ISC_GAM_RENTRY	0x00000298
 
+/* ISC VHXS Control Register */
+#define ISC_VHXS_CTRL	0x398
+
+/* ISC VHXS Source Size Register */
+#define ISC_VHXS_SS	0x39C
+
+/* ISC VHXS Destination Size Register */
+#define ISC_VHXS_DS	0x3A0
+
+/* ISC Vertical Factor Register */
+#define ISC_VXS_FACT	0x3a4
+
+/* ISC Horizontal Factor Register */
+#define ISC_HXS_FACT	0x3a8
+
+/* ISC Vertical Config Register */
+#define ISC_VXS_CFG	0x3ac
+
+/* ISC Horizontal Config Register */
+#define ISC_HXS_CFG	0x3b0
+
+/* ISC Vertical Tap Register */
+#define ISC_VXS_TAP	0x3b4
+
+/* ISC Horizontal Tap Register */
+#define ISC_HXS_TAP	0x434
+
 /* Offset for CSC register specific to sama5d2 product */
 #define ISC_SAMA5D2_CSC_OFFSET	0
 
-- 
GitLab


From 5507b10109253a19765880fffff6e9fff3810868 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:16 +0200
Subject: [PATCH 2421/3804] media: atmel: atmel-isc: extend pipeline with extra
 modules

Newer ISC pipelines have the additional modules of
Defective Pixel Correction -> DPC itself,
Defective Pixel Correction -> Green Disparity Correction (DPC_GDC)
Defective Pixel Correction -> Black Level Correction (DPC_BLC)
Vertical and Horizontal Scaler -> VHXS

Some products have this full pipeline (sama7g5), other products do not (sama5d2)

Add the modules to the isc base, and also extend the register range to include
the modules.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 11 ++++++--
 drivers/media/platform/atmel/atmel-isc.h      | 28 +++++++++++--------
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index cfe60b2882acf..a6b62e009c383 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -2324,8 +2324,14 @@ int isc_pipeline_init(struct isc_device *isc)
 	struct regmap_field *regs;
 	unsigned int i;
 
-	/* WB-->CFA-->CC-->GAM-->CSC-->CBC-->SUB422-->SUB420 */
+	/*
+	 * DPCEN-->GDCEN-->BLCEN-->WB-->CFA-->CC-->
+	 * GAM-->VHXS-->CSC-->CBC-->SUB422-->SUB420
+	 */
 	const struct reg_field regfields[ISC_PIPE_LINE_NODE_NUM] = {
+		REG_FIELD(ISC_DPC_CTRL, 0, 0),
+		REG_FIELD(ISC_DPC_CTRL, 1, 1),
+		REG_FIELD(ISC_DPC_CTRL, 2, 2),
 		REG_FIELD(ISC_WB_CTRL, 0, 0),
 		REG_FIELD(ISC_CFA_CTRL, 0, 0),
 		REG_FIELD(ISC_CC_CTRL, 0, 0),
@@ -2333,6 +2339,7 @@ int isc_pipeline_init(struct isc_device *isc)
 		REG_FIELD(ISC_GAM_CTRL, 1, 1),
 		REG_FIELD(ISC_GAM_CTRL, 2, 2),
 		REG_FIELD(ISC_GAM_CTRL, 3, 3),
+		REG_FIELD(ISC_VHXS_CTRL, 0, 0),
 		REG_FIELD(ISC_CSC_CTRL + isc->offsets.csc, 0, 0),
 		REG_FIELD(ISC_CBC_CTRL + isc->offsets.cbc, 0, 0),
 		REG_FIELD(ISC_SUB422_CTRL + isc->offsets.sub422, 0, 0),
@@ -2351,7 +2358,7 @@ int isc_pipeline_init(struct isc_device *isc)
 }
 
 /* regmap configuration */
-#define ATMEL_ISC_REG_MAX    0xbfc
+#define ATMEL_ISC_REG_MAX    0xd5c
 const struct regmap_config isc_regmap_config = {
 	.reg_bits       = 32,
 	.reg_stride     = 4,
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index c5d956fd20a0d..f574bcc3ba67a 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -68,17 +68,21 @@ struct isc_format {
 };
 
 /* Pipeline bitmap */
-#define WB_ENABLE	BIT(0)
-#define CFA_ENABLE	BIT(1)
-#define CC_ENABLE	BIT(2)
-#define GAM_ENABLE	BIT(3)
-#define GAM_BENABLE	BIT(4)
-#define GAM_GENABLE	BIT(5)
-#define GAM_RENABLE	BIT(6)
-#define CSC_ENABLE	BIT(7)
-#define CBC_ENABLE	BIT(8)
-#define SUB422_ENABLE	BIT(9)
-#define SUB420_ENABLE	BIT(10)
+#define DPC_DPCENABLE	BIT(0)
+#define DPC_GDCENABLE	BIT(1)
+#define DPC_BLCENABLE	BIT(2)
+#define WB_ENABLE	BIT(3)
+#define CFA_ENABLE	BIT(4)
+#define CC_ENABLE	BIT(5)
+#define GAM_ENABLE	BIT(6)
+#define GAM_BENABLE	BIT(7)
+#define GAM_GENABLE	BIT(8)
+#define GAM_RENABLE	BIT(9)
+#define VHXS_ENABLE	BIT(10)
+#define CSC_ENABLE	BIT(11)
+#define CBC_ENABLE	BIT(12)
+#define SUB422_ENABLE	BIT(13)
+#define SUB420_ENABLE	BIT(14)
 
 #define GAM_ENABLES	(GAM_RENABLE | GAM_GENABLE | GAM_BENABLE | GAM_ENABLE)
 
@@ -142,7 +146,7 @@ struct isc_ctrls {
 	u32 hist_minmax[HIST_BAYER][2];
 };
 
-#define ISC_PIPE_LINE_NODE_NUM	11
+#define ISC_PIPE_LINE_NODE_NUM	15
 
 /*
  * struct isc_reg_offsets - ISC device register offsets
-- 
GitLab


From 2873f85bd318bfc3f453fa78facb2b77632b36d8 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:17 +0200
Subject: [PATCH 2422/3804] media: atmel: atmel-isc: add CC initialization
 function

The CC submodule is a part of the atmel-isc pipeline, and stands for
Color Correction. It is used to apply gains and offsets to the
chroma (U, V) components of the YUV elements.
Implement the CC submodule initialization, as a product
specific function, which currently configures the neutral point in color
correction.

[hverkuil: made isc_sama5d2_config_cc static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    |  1 +
 drivers/media/platform/atmel/atmel-isc.h         |  3 +++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 14 ++++++++++++++
 3 files changed, 18 insertions(+)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index a6b62e009c383..ffce8de2cf4d8 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -661,6 +661,7 @@ static void isc_set_pipeline(struct isc_device *isc, u32 pipeline)
 
 	isc->config_csc(isc);
 	isc->config_cbc(isc);
+	isc->config_cc(isc);
 }
 
 static int isc_update_profile(struct isc_device *isc)
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index f574bcc3ba67a..a5f8d50013811 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -225,6 +225,8 @@ struct isc_reg_offsets {
  *			specific CSC module
  * @config_cbc:		pointer to a function that initializes product
  *			specific CBC module
+ * @config_cc:		pointer to a function that initializes product
+ *			specific CC module
  *
  * @offsets:		struct holding the product specific register offsets
  */
@@ -298,6 +300,7 @@ struct isc_device {
 	struct {
 		void (*config_csc)(struct isc_device *isc);
 		void (*config_cbc)(struct isc_device *isc);
+		void (*config_cc)(struct isc_device *isc);
 	};
 
 	struct isc_reg_offsets		offsets;
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 72cb91b4b8baa..3d2e6e68015aa 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -83,6 +83,19 @@ static void isc_sama5d2_config_cbc(struct isc_device *isc)
 		     isc->ctrls.contrast);
 }
 
+static void isc_sama5d2_config_cc(struct isc_device *isc)
+{
+	struct regmap *regmap = isc->regmap;
+
+	/* Configure each register at the neutral fixed point 1.0 or 0.0 */
+	regmap_write(regmap, ISC_CC_RR_RG, (1 << 8));
+	regmap_write(regmap, ISC_CC_RB_OR, 0);
+	regmap_write(regmap, ISC_CC_GR_GG, (1 << 8) << 16);
+	regmap_write(regmap, ISC_CC_GB_OG, 0);
+	regmap_write(regmap, ISC_CC_BR_BG, 0);
+	regmap_write(regmap, ISC_CC_BB_OB, (1 << 8));
+}
+
 /* Gamma table with gamma 1/2.2 */
 static const u32 isc_sama5d2_gamma_table[][GAMMA_ENTRIES] = {
 	/* 0 --> gamma 1/1.8 */
@@ -230,6 +243,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 
 	isc->config_csc = isc_sama5d2_config_csc;
 	isc->config_cbc = isc_sama5d2_config_cbc;
+	isc->config_cc = isc_sama5d2_config_cc;
 
 	isc->offsets.csc = ISC_SAMA5D2_CSC_OFFSET;
 	isc->offsets.cbc = ISC_SAMA5D2_CBC_OFFSET;
-- 
GitLab


From 8f1b451c87ee054f3f5238ac00593e7adaf96152 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:18 +0200
Subject: [PATCH 2423/3804] media: atmel: atmel-isc: create product specific
 v4l2 controls config

Create product specific callback for initializing v4l2 controls.
Call this from v4l2 controls init function.

[hverkuil: made isc_sama5d2_config_ctrls static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    |  5 +++--
 drivers/media/platform/atmel/atmel-isc.h         |  5 +++++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 12 ++++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index ffce8de2cf4d8..8ed8b8a4840cf 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -2051,11 +2051,12 @@ static int isc_ctrl_init(struct isc_device *isc)
 	if (ret < 0)
 		return ret;
 
+	/* Initialize product specific controls. For example, contrast */
+	isc->config_ctrls(isc, ops);
+
 	ctrls->brightness = 0;
-	ctrls->contrast = 256;
 
 	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_BRIGHTNESS, -1024, 1023, 1, 0);
-	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_CONTRAST, -2048, 2047, 1, 256);
 	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_GAMMA, 0, isc->gamma_max, 1,
 			  isc->gamma_max);
 	isc->awb_ctrl = v4l2_ctrl_new_std(hdl, &isc_awb_ops,
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index a5f8d50013811..eb549fadb1a81 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -227,6 +227,8 @@ struct isc_reg_offsets {
  *			specific CBC module
  * @config_cc:		pointer to a function that initializes product
  *			specific CC module
+ * @config_ctrls:	pointer to a functoin that initializes product
+ *			specific v4l2 controls.
  *
  * @offsets:		struct holding the product specific register offsets
  */
@@ -301,6 +303,9 @@ struct isc_device {
 		void (*config_csc)(struct isc_device *isc);
 		void (*config_cbc)(struct isc_device *isc);
 		void (*config_cc)(struct isc_device *isc);
+
+		void (*config_ctrls)(struct isc_device *isc,
+				     const struct v4l2_ctrl_ops *ops);
 	};
 
 	struct isc_reg_offsets		offsets;
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 3d2e6e68015aa..6c8555e22bbeb 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -96,6 +96,17 @@ static void isc_sama5d2_config_cc(struct isc_device *isc)
 	regmap_write(regmap, ISC_CC_BB_OB, (1 << 8));
 }
 
+static void isc_sama5d2_config_ctrls(struct isc_device *isc,
+				     const struct v4l2_ctrl_ops *ops)
+{
+	struct isc_ctrls *ctrls = &isc->ctrls;
+	struct v4l2_ctrl_handler *hdl = &ctrls->handler;
+
+	ctrls->contrast = 256;
+
+	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_CONTRAST, -2048, 2047, 1, 256);
+}
+
 /* Gamma table with gamma 1/2.2 */
 static const u32 isc_sama5d2_gamma_table[][GAMMA_ENTRIES] = {
 	/* 0 --> gamma 1/1.8 */
@@ -244,6 +255,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->config_csc = isc_sama5d2_config_csc;
 	isc->config_cbc = isc_sama5d2_config_cbc;
 	isc->config_cc = isc_sama5d2_config_cc;
+	isc->config_ctrls = isc_sama5d2_config_ctrls;
 
 	isc->offsets.csc = ISC_SAMA5D2_CSC_OFFSET;
 	isc->offsets.cbc = ISC_SAMA5D2_CBC_OFFSET;
-- 
GitLab


From 883285556388affe1273a50d1af8772c30aa6d89 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:19 +0200
Subject: [PATCH 2424/3804] media: atmel: atmel-isc: create callback for DPC
 submodule product specific

The DPC submodule is a part of the atmel-isc pipeline, and stands for
Defective Pixel Correction. Its purpose is to detect defective pixels and
correct them if possible with the help of adjacent pixels.
Create a product specific callback for initializing the DPC submodule
of the pipeline.
For sama5d2 product, this module does not exist, thus this function is a noop.

[hverkuil: made isc_sama5d2_config_dpc static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 1 +
 drivers/media/platform/atmel/atmel-isc.h         | 3 +++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 6 ++++++
 3 files changed, 10 insertions(+)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 8ed8b8a4840cf..777a5dc19d6e3 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -659,6 +659,7 @@ static void isc_set_pipeline(struct isc_device *isc, u32 pipeline)
 	regmap_bulk_write(regmap, ISC_GAM_GENTRY, gamma, GAMMA_ENTRIES);
 	regmap_bulk_write(regmap, ISC_GAM_RENTRY, gamma, GAMMA_ENTRIES);
 
+	isc->config_dpc(isc);
 	isc->config_csc(isc);
 	isc->config_cbc(isc);
 	isc->config_cc(isc);
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index eb549fadb1a81..d6cd85a4c3e93 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -221,6 +221,8 @@ struct isc_reg_offsets {
  * @max_width:		maximum frame width, dependent on the internal RAM
  * @max_height:		maximum frame height, dependent on the internal RAM
  *
+ * @config_dpc:		pointer to a function that initializes product
+ *			specific DPC module
  * @config_csc:		pointer to a function that initializes product
  *			specific CSC module
  * @config_cbc:		pointer to a function that initializes product
@@ -300,6 +302,7 @@ struct isc_device {
 	u32		max_height;
 
 	struct {
+		void (*config_dpc)(struct isc_device *isc);
 		void (*config_csc)(struct isc_device *isc);
 		void (*config_cbc)(struct isc_device *isc);
 		void (*config_cc)(struct isc_device *isc);
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 6c8555e22bbeb..b6b7dbdbfd1dc 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -107,6 +107,11 @@ static void isc_sama5d2_config_ctrls(struct isc_device *isc,
 	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_CONTRAST, -2048, 2047, 1, 256);
 }
 
+static void isc_sama5d2_config_dpc(struct isc_device *isc)
+{
+	/* This module is not present on sama5d2 pipeline */
+}
+
 /* Gamma table with gamma 1/2.2 */
 static const u32 isc_sama5d2_gamma_table[][GAMMA_ENTRIES] = {
 	/* 0 --> gamma 1/1.8 */
@@ -252,6 +257,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->max_width = ISC_SAMA5D2_MAX_SUPPORT_WIDTH;
 	isc->max_height = ISC_SAMA5D2_MAX_SUPPORT_HEIGHT;
 
+	isc->config_dpc = isc_sama5d2_config_dpc;
 	isc->config_csc = isc_sama5d2_config_csc;
 	isc->config_cbc = isc_sama5d2_config_cbc;
 	isc->config_cc = isc_sama5d2_config_cc;
-- 
GitLab


From e48848a6af150ed09d9761167aad2a7cd023470b Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:20 +0200
Subject: [PATCH 2425/3804] media: atmel: atmel-isc: create callback for GAM
 submodule product specific

The GAM submodule is a part of the atmel-isc pipeline, and stands for
Gamma Correction. It is used to apply the gamma curve to the incoming pixels.
Create a product specific callback for initializing the GAM submodule
of the pipeline.
For sama5d2 product, there is no special configuration at this moment,
thus this function is a noop.

[hverkuil: made isc_sama5d2_config_gam static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    | 1 +
 drivers/media/platform/atmel/atmel-isc.h         | 3 +++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 6 ++++++
 3 files changed, 10 insertions(+)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 777a5dc19d6e3..aef0d6570d395 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -663,6 +663,7 @@ static void isc_set_pipeline(struct isc_device *isc, u32 pipeline)
 	isc->config_csc(isc);
 	isc->config_cbc(isc);
 	isc->config_cc(isc);
+	isc->config_gam(isc);
 }
 
 static int isc_update_profile(struct isc_device *isc)
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index d6cd85a4c3e93..1e6988f1876ea 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -229,6 +229,8 @@ struct isc_reg_offsets {
  *			specific CBC module
  * @config_cc:		pointer to a function that initializes product
  *			specific CC module
+ * @config_gam:		pointer to a function that initializes product
+ *			specific GAMMA module
  * @config_ctrls:	pointer to a functoin that initializes product
  *			specific v4l2 controls.
  *
@@ -306,6 +308,7 @@ struct isc_device {
 		void (*config_csc)(struct isc_device *isc);
 		void (*config_cbc)(struct isc_device *isc);
 		void (*config_cc)(struct isc_device *isc);
+		void (*config_gam)(struct isc_device *isc);
 
 		void (*config_ctrls)(struct isc_device *isc,
 				     const struct v4l2_ctrl_ops *ops);
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index b6b7dbdbfd1dc..6973c65822d8c 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -112,6 +112,11 @@ static void isc_sama5d2_config_dpc(struct isc_device *isc)
 	/* This module is not present on sama5d2 pipeline */
 }
 
+static void isc_sama5d2_config_gam(struct isc_device *isc)
+{
+	/* No specific gamma configuration */
+}
+
 /* Gamma table with gamma 1/2.2 */
 static const u32 isc_sama5d2_gamma_table[][GAMMA_ENTRIES] = {
 	/* 0 --> gamma 1/1.8 */
@@ -261,6 +266,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->config_csc = isc_sama5d2_config_csc;
 	isc->config_cbc = isc_sama5d2_config_cbc;
 	isc->config_cc = isc_sama5d2_config_cc;
+	isc->config_gam = isc_sama5d2_config_gam;
 	isc->config_ctrls = isc_sama5d2_config_ctrls;
 
 	isc->offsets.csc = ISC_SAMA5D2_CSC_OFFSET;
-- 
GitLab


From 2c9017d0b5d3fbf17e69577a42d9e610ca122810 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 2 Jun 2021 09:34:35 +0200
Subject: [PATCH 2426/3804] mmc: renesas_sdhi: abort tuning when timeout
 detected
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have to bring the eMMC from sending-data state back to transfer state
once we detected a CRC error (timeout) during tuning. So, send a stop
command via mmc_abort_tuning().

Fixes: 4f11997773b6 ("mmc: tmio: Add tuning support")
Reported-by Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Tested-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Link: https://lore.kernel.org/r/20210602073435.5955-1-wsa+renesas@sang-engineering.com
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi_core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 635bf31a67359..9029308c4a0f0 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -692,14 +692,19 @@ static int renesas_sdhi_execute_tuning(struct mmc_host *mmc, u32 opcode)
 
 	/* Issue CMD19 twice for each tap */
 	for (i = 0; i < 2 * priv->tap_num; i++) {
+		int cmd_error;
+
 		/* Set sampling clock position */
 		sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_TAPSET, i % priv->tap_num);
 
-		if (mmc_send_tuning(mmc, opcode, NULL) == 0)
+		if (mmc_send_tuning(mmc, opcode, &cmd_error) == 0)
 			set_bit(i, priv->taps);
 
 		if (sd_scc_read32(host, priv, SH_MOBILE_SDHI_SCC_SMPCMP) == 0)
 			set_bit(i, priv->smpcmp);
+
+		if (cmd_error)
+			mmc_abort_tuning(mmc, opcode);
 	}
 
 	ret = renesas_sdhi_select_tuning(host);
-- 
GitLab


From dff404deb8493e6154ad75a62ce7c4e37ff8fccd Mon Sep 17 00:00:00 2001
From: Sander Vanheule <sander@svanheule.net>
Date: Thu, 3 Jun 2021 20:25:09 +0200
Subject: [PATCH 2427/3804] regmap: mdio: Clean up invalid clause-22 addresses

Currently a regmap configuration for regmap-mdio must have a register
address width of 5 bits (cf. clause-22 register access). This is not
enforced on the provided register addresses, which would enable
clause-45 MDIO bus access, if the right bit packing is used.

Prevent clause-45 access, and other invalid addresses, by masking the
provided register address.

Signed-off-by: Sander Vanheule <sander@svanheule.net>
Link: https://lore.kernel.org/r/f7013f67e6d6ff56ec98660f18320f6ffcc1a777.1622743333.git.sander@svanheule.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-mdio.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/base/regmap/regmap-mdio.c b/drivers/base/regmap/regmap-mdio.c
index 5ec208279913f..aee34bf2400e5 100644
--- a/drivers/base/regmap/regmap-mdio.c
+++ b/drivers/base/regmap/regmap-mdio.c
@@ -5,16 +5,19 @@
 #include <linux/module.h>
 #include <linux/regmap.h>
 
+#define REGVAL_MASK		GENMASK(15, 0)
+#define REGNUM_C22_MASK		GENMASK(4, 0)
+
 static int regmap_mdio_read(void *context, unsigned int reg, unsigned int *val)
 {
 	struct mdio_device *mdio_dev = context;
 	int ret;
 
-	ret = mdiobus_read(mdio_dev->bus, mdio_dev->addr, reg);
+	ret = mdiobus_read(mdio_dev->bus, mdio_dev->addr, reg & REGNUM_C22_MASK);
 	if (ret < 0)
 		return ret;
 
-	*val = ret & 0xffff;
+	*val = ret & REGVAL_MASK;
 	return 0;
 }
 
@@ -22,7 +25,7 @@ static int regmap_mdio_write(void *context, unsigned int reg, unsigned int val)
 {
 	struct mdio_device *mdio_dev = context;
 
-	return mdiobus_write(mdio_dev->bus, mdio_dev->addr, reg, val);
+	return mdiobus_write(mdio_dev->bus, mdio_dev->addr, reg & REGNUM_C22_MASK, val);
 }
 
 static const struct regmap_bus regmap_mdio_bus = {
-- 
GitLab


From f083be9db060fbac09123d80bdffb2c001ac0e2b Mon Sep 17 00:00:00 2001
From: Sander Vanheule <sander@svanheule.net>
Date: Thu, 3 Jun 2021 20:25:10 +0200
Subject: [PATCH 2428/3804] regmap: mdio: Add clause-45 support

Modern ethernet phys support the so-called clause-45 register access
mode, which allows for register address widths of 16 bit.

Also allow for 16-bit register address widths, and return a regmap for
clause-45 access in that case.

Signed-off-by: Sander Vanheule <sander@svanheule.net>
Link: https://lore.kernel.org/r/9cc263e3e7d5865edd90453b4183f1cf363cb636.1622743333.git.sander@svanheule.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-mdio.c | 70 ++++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 14 deletions(-)

diff --git a/drivers/base/regmap/regmap-mdio.c b/drivers/base/regmap/regmap-mdio.c
index aee34bf2400e5..cfb23afb19ebf 100644
--- a/drivers/base/regmap/regmap-mdio.c
+++ b/drivers/base/regmap/regmap-mdio.c
@@ -7,13 +7,14 @@
 
 #define REGVAL_MASK		GENMASK(15, 0)
 #define REGNUM_C22_MASK		GENMASK(4, 0)
+/* Clause-45 mask includes the device type (5 bit) and actual register number (16 bit) */
+#define REGNUM_C45_MASK		GENMASK(20, 0)
 
-static int regmap_mdio_read(void *context, unsigned int reg, unsigned int *val)
+static int regmap_mdio_read(struct mdio_device *mdio_dev, u32 reg, unsigned int *val)
 {
-	struct mdio_device *mdio_dev = context;
 	int ret;
 
-	ret = mdiobus_read(mdio_dev->bus, mdio_dev->addr, reg & REGNUM_C22_MASK);
+	ret = mdiobus_read(mdio_dev->bus, mdio_dev->addr, reg);
 	if (ret < 0)
 		return ret;
 
@@ -21,27 +22,63 @@ static int regmap_mdio_read(void *context, unsigned int reg, unsigned int *val)
 	return 0;
 }
 
-static int regmap_mdio_write(void *context, unsigned int reg, unsigned int val)
+static int regmap_mdio_write(struct mdio_device *mdio_dev, u32 reg, unsigned int val)
+{
+	return mdiobus_write(mdio_dev->bus, mdio_dev->addr, reg, val);
+}
+
+static int regmap_mdio_c22_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct mdio_device *mdio_dev = context;
+
+	return regmap_mdio_read(mdio_dev, reg & REGNUM_C22_MASK, val);
+}
+
+static int regmap_mdio_c22_write(void *context, unsigned int reg, unsigned int val)
 {
 	struct mdio_device *mdio_dev = context;
 
-	return mdiobus_write(mdio_dev->bus, mdio_dev->addr, reg & REGNUM_C22_MASK, val);
+	return regmap_mdio_write(mdio_dev, reg & REGNUM_C22_MASK, val);
 }
 
-static const struct regmap_bus regmap_mdio_bus = {
-	.reg_write = regmap_mdio_write,
-	.reg_read = regmap_mdio_read,
+static const struct regmap_bus regmap_mdio_c22_bus = {
+	.reg_write = regmap_mdio_c22_write,
+	.reg_read = regmap_mdio_c22_read,
+};
+
+static int regmap_mdio_c45_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct mdio_device *mdio_dev = context;
+
+	return regmap_mdio_read(mdio_dev, MII_ADDR_C45 | (reg & REGNUM_C45_MASK), val);
+}
+
+static int regmap_mdio_c45_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct mdio_device *mdio_dev = context;
+
+	return regmap_mdio_write(mdio_dev, MII_ADDR_C45 | (reg & REGNUM_C45_MASK), val);
+}
+
+static const struct regmap_bus regmap_mdio_c45_bus = {
+	.reg_write = regmap_mdio_c45_write,
+	.reg_read = regmap_mdio_c45_read,
 };
 
 struct regmap *__regmap_init_mdio(struct mdio_device *mdio_dev,
 	const struct regmap_config *config, struct lock_class_key *lock_key,
 	const char *lock_name)
 {
-	if (config->reg_bits != 5 || config->val_bits != 16)
+	struct regmap_bus *bus;
+
+	if (config->reg_bits == 5 && config->val_bits == 16)
+		bus = &regmap_mdio_c22_bus;
+	else if (config->reg_bits == 21 && config->val_bits == 16)
+		bus = &regmap_mdio_c45_bus;
+	else
 		return ERR_PTR(-EOPNOTSUPP);
 
-	return __regmap_init(&mdio_dev->dev, &regmap_mdio_bus, mdio_dev, config,
-		lock_key, lock_name);
+	return __regmap_init(&mdio_dev->dev, bus, mdio_dev, config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__regmap_init_mdio);
 
@@ -49,11 +86,16 @@ struct regmap *__devm_regmap_init_mdio(struct mdio_device *mdio_dev,
 	const struct regmap_config *config, struct lock_class_key *lock_key,
 	const char *lock_name)
 {
-	if (config->reg_bits != 5 || config->val_bits != 16)
+	const struct regmap_bus *bus;
+
+	if (config->reg_bits == 5 && config->val_bits == 16)
+		bus = &regmap_mdio_c22_bus;
+	else if (config->reg_bits == 21 && config->val_bits == 16)
+		bus = &regmap_mdio_c45_bus;
+	else
 		return ERR_PTR(-EOPNOTSUPP);
 
-	return __devm_regmap_init(&mdio_dev->dev, &regmap_mdio_bus, mdio_dev,
-		config, lock_key, lock_name);
+	return __devm_regmap_init(&mdio_dev->dev, bus, mdio_dev, config, lock_key, lock_name);
 }
 EXPORT_SYMBOL_GPL(__devm_regmap_init_mdio);
 
-- 
GitLab


From ba6622c43381e7045f8bc5438089ae53054fdf83 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 4 Jun 2021 19:58:03 +0800
Subject: [PATCH 2429/3804] regulator: ltc3589: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded

LTC3589_LINEAR_REG() no longer call LTC3589_REG() now.
Only LTC3589_LINEAR_REG() needs to set go_bit, thus remove go_bit
parameter from LTC3589_REG() macro.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210604115803.1260976-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/ltc3589.c | 73 +++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 35 deletions(-)

diff --git a/drivers/regulator/ltc3589.c b/drivers/regulator/ltc3589.c
index 38f7ccb63b528..5e0b669c3a014 100644
--- a/drivers/regulator/ltc3589.c
+++ b/drivers/regulator/ltc3589.c
@@ -54,6 +54,11 @@
 #define LTC3589_VCCR_SW3_GO		BIT(4)
 #define LTC3589_VCCR_LDO2_GO		BIT(6)
 
+#define LTC3589_VRRCR_SW1_RAMP_MASK	GENMASK(1, 0)
+#define LTC3589_VRRCR_SW2_RAMP_MASK	GENMASK(3, 2)
+#define LTC3589_VRRCR_SW3_RAMP_MASK	GENMASK(5, 4)
+#define LTC3589_VRRCR_LDO2_RAMP_MASK	GENMASK(7, 6)
+
 enum ltc3589_variant {
 	LTC3589,
 	LTC3589_1,
@@ -88,27 +93,9 @@ static const int ltc3589_12_ldo4[] = {
 	1200000, 1800000, 2500000, 3200000,
 };
 
-static int ltc3589_set_ramp_delay(struct regulator_dev *rdev, int ramp_delay)
-{
-	struct ltc3589 *ltc3589 = rdev_get_drvdata(rdev);
-	int sel, shift;
-
-	if (unlikely(ramp_delay <= 0))
-		return -EINVAL;
-
-	/* VRRCR slew rate offsets are the same as VCCR go bit offsets */
-	shift = ffs(rdev->desc->apply_bit) - 1;
-
-	/* The slew rate can be set to 0.88, 1.75, 3.5, or 7 mV/uS */
-	for (sel = 0; sel < 4; sel++) {
-		if ((880 << sel) >= ramp_delay) {
-			return regmap_update_bits(ltc3589->regmap,
-						  LTC3589_VRRCR,
-						  0x3 << shift, sel << shift);
-		}
-	}
-	return -EINVAL;
-}
+static const unsigned int ltc3589_ramp_table[] = {
+	880, 1750, 3500, 7000
+};
 
 static int ltc3589_set_suspend_voltage(struct regulator_dev *rdev, int uV)
 {
@@ -149,7 +136,7 @@ static const struct regulator_ops ltc3589_linear_regulator_ops = {
 	.list_voltage = regulator_list_voltage_linear,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
-	.set_ramp_delay = ltc3589_set_ramp_delay,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
 	.set_suspend_voltage = ltc3589_set_suspend_voltage,
 	.set_suspend_mode = ltc3589_set_suspend_mode,
@@ -218,16 +205,13 @@ static int ltc3589_of_parse_cb(struct device_node *np,
 	return 0;
 }
 
-#define LTC3589_REG(_name, _of_name, _ops, en_bit, dtv1_reg, dtv_mask, go_bit)\
+#define LTC3589_REG(_name, _of_name, _ops, en_bit, dtv1_reg, dtv_mask)	\
 	[LTC3589_ ## _name] = {						\
 		.name = #_name,						\
 		.of_match = of_match_ptr(#_of_name),			\
 		.regulators_node = of_match_ptr("regulators"),		\
 		.of_parse_cb = ltc3589_of_parse_cb,			\
 		.n_voltages = (dtv_mask) + 1,				\
-		.min_uV = (go_bit) ? 362500 : 0,			\
-		.uV_step = (go_bit) ? 12500 : 0,			\
-		.ramp_delay = (go_bit) ? 1750 : 0,			\
 		.fixed_uV = (dtv_mask) ? 0 : 800000,			\
 		.ops = &ltc3589_ ## _ops ## _regulator_ops,		\
 		.type = REGULATOR_VOLTAGE,				\
@@ -235,30 +219,49 @@ static int ltc3589_of_parse_cb(struct device_node *np,
 		.owner = THIS_MODULE,					\
 		.vsel_reg = (dtv1_reg),					\
 		.vsel_mask = (dtv_mask),				\
-		.apply_reg = (go_bit) ? LTC3589_VCCR : 0,		\
-		.apply_bit = (go_bit),					\
 		.enable_reg = (en_bit) ? LTC3589_OVEN : 0,		\
 		.enable_mask = (en_bit),				\
 	}
 
 #define LTC3589_LINEAR_REG(_name, _of_name, _dtv1)			\
-	LTC3589_REG(_name, _of_name, linear, LTC3589_OVEN_ ## _name,	\
-		    LTC3589_ ## _dtv1, 0x1f,				\
-		    LTC3589_VCCR_ ## _name ## _GO)
+	[LTC3589_ ## _name] = {						\
+		.name = #_name,						\
+		.of_match = of_match_ptr(#_of_name),			\
+		.regulators_node = of_match_ptr("regulators"),		\
+		.of_parse_cb = ltc3589_of_parse_cb,			\
+		.n_voltages = 32,					\
+		.min_uV = 362500,					\
+		.uV_step = 12500,					\
+		.ramp_delay = 1750,					\
+		.ops = &ltc3589_linear_regulator_ops,			\
+		.type = REGULATOR_VOLTAGE,				\
+		.id = LTC3589_ ## _name,				\
+		.owner = THIS_MODULE,					\
+		.vsel_reg = LTC3589_ ## _dtv1,				\
+		.vsel_mask = 0x1f,					\
+		.apply_reg = LTC3589_VCCR,				\
+		.apply_bit = LTC3589_VCCR_ ## _name ## _GO,		\
+		.enable_reg = LTC3589_OVEN,				\
+		.enable_mask = (LTC3589_OVEN_ ## _name),		\
+		.ramp_reg = LTC3589_VRRCR,				\
+		.ramp_mask = LTC3589_VRRCR_ ## _name ## _RAMP_MASK,	\
+		.ramp_delay_table = ltc3589_ramp_table,			\
+		.n_ramp_values = ARRAY_SIZE(ltc3589_ramp_table),	\
+	}
+
 
 #define LTC3589_FIXED_REG(_name, _of_name)				\
-	LTC3589_REG(_name, _of_name, fixed, LTC3589_OVEN_ ## _name, 0, 0, 0)
+	LTC3589_REG(_name, _of_name, fixed, LTC3589_OVEN_ ## _name, 0, 0)
 
 static const struct regulator_desc ltc3589_regulators[] = {
 	LTC3589_LINEAR_REG(SW1, sw1, B1DTV1),
 	LTC3589_LINEAR_REG(SW2, sw2, B2DTV1),
 	LTC3589_LINEAR_REG(SW3, sw3, B3DTV1),
 	LTC3589_FIXED_REG(BB_OUT, bb-out),
-	LTC3589_REG(LDO1, ldo1, fixed_standby, 0, 0, 0, 0),
+	LTC3589_REG(LDO1, ldo1, fixed_standby, 0, 0, 0),
 	LTC3589_LINEAR_REG(LDO2, ldo2, L2DTV1),
 	LTC3589_FIXED_REG(LDO3, ldo3),
-	LTC3589_REG(LDO4, ldo4, table, LTC3589_OVEN_LDO4, LTC3589_L2DTV2,
-		    0x60, 0),
+	LTC3589_REG(LDO4, ldo4, table, LTC3589_OVEN_LDO4, LTC3589_L2DTV2, 0x60),
 };
 
 static bool ltc3589_writeable_reg(struct device *dev, unsigned int reg)
-- 
GitLab


From 0ea461b4f229739345870a086aa4647a16ff42ff Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 7 Jun 2021 22:30:02 +0800
Subject: [PATCH 2430/3804] regulator: bd71815: Get rid of struct bd71815_pmic

The content of bd71815_regulators is never changed, no need to duplicate
it, thus remove descs[BD71815_REGULATOR_CNT].
The *regmap, *dev and *rdev[BD71815_REGULATOR_CNT] are not really needed.
The *gps is unused.

Thus the struct bd71815_pmic can be removed.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/20210607143002.1600017-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd71815-regulator.c | 57 +++++++++------------------
 1 file changed, 19 insertions(+), 38 deletions(-)

diff --git a/drivers/regulator/bd71815-regulator.c b/drivers/regulator/bd71815-regulator.c
index 4dd21ac24ddf8..16edd9062ca91 100644
--- a/drivers/regulator/bd71815-regulator.c
+++ b/drivers/regulator/bd71815-regulator.c
@@ -28,14 +28,6 @@ struct bd71815_regulator {
 	const struct rohm_dvs_config *dvs;
 };
 
-struct bd71815_pmic {
-	struct bd71815_regulator descs[BD71815_REGULATOR_CNT];
-	struct regmap *regmap;
-	struct device *dev;
-	struct gpio_descs *gps;
-	struct regulator_dev *rdev[BD71815_REGULATOR_CNT];
-};
-
 static const int bd7181x_wled_currents[] = {
 	10, 20, 30, 50, 70, 100, 200, 300, 500, 700, 1000, 2000, 3000, 4000,
 	5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000,
@@ -302,14 +294,13 @@ static int bd7181x_led_set_current_limit(struct regulator_dev *rdev,
 
 static int bd7181x_buck12_get_voltage_sel(struct regulator_dev *rdev)
 {
-	struct bd71815_pmic *pmic = rdev_get_drvdata(rdev);
 	int rid = rdev_get_id(rdev);
 	int ret, regh, regl, val;
 
 	regh = BD71815_REG_BUCK1_VOLT_H + rid * 0x2;
 	regl = BD71815_REG_BUCK1_VOLT_L + rid * 0x2;
 
-	ret = regmap_read(pmic->regmap, regh, &val);
+	ret = regmap_read(rdev->regmap, regh, &val);
 	if (ret)
 		return ret;
 
@@ -321,7 +312,7 @@ static int bd7181x_buck12_get_voltage_sel(struct regulator_dev *rdev)
 	 * by BD71815_BUCK_DVSSEL bit
 	 */
 	if ((!(val & BD71815_BUCK_STBY_DVS)) && (!(val & BD71815_BUCK_DVSSEL)))
-		ret = regmap_read(pmic->regmap, regl, &val);
+		ret = regmap_read(rdev->regmap, regl, &val);
 
 	if (ret)
 		return ret;
@@ -335,14 +326,13 @@ static int bd7181x_buck12_get_voltage_sel(struct regulator_dev *rdev)
 static int bd7181x_buck12_set_voltage_sel(struct regulator_dev *rdev,
 					  unsigned int sel)
 {
-	struct bd71815_pmic *pmic = rdev_get_drvdata(rdev);
 	int rid = rdev_get_id(rdev);
 	int ret, val, reg, regh, regl;
 
 	regh = BD71815_REG_BUCK1_VOLT_H + rid*0x2;
 	regl = BD71815_REG_BUCK1_VOLT_L + rid*0x2;
 
-	ret = regmap_read(pmic->regmap, regh, &val);
+	ret = regmap_read(rdev->regmap, regh, &val);
 	if (ret)
 		return ret;
 
@@ -352,7 +342,7 @@ static int bd7181x_buck12_set_voltage_sel(struct regulator_dev *rdev,
 	 * voltages at runtime is not supported by this driver.
 	 */
 	if (((val & BD71815_BUCK_STBY_DVS))) {
-		return regmap_update_bits(pmic->regmap, regh, BD71815_VOLT_MASK,
+		return regmap_update_bits(rdev->regmap, regh, BD71815_VOLT_MASK,
 					  sel);
 	}
 	/* Update new voltage to the register which is not selected now */
@@ -361,12 +351,13 @@ static int bd7181x_buck12_set_voltage_sel(struct regulator_dev *rdev,
 	else
 		reg = regh;
 
-	ret = regmap_update_bits(pmic->regmap, reg, BD71815_VOLT_MASK, sel);
+	ret = regmap_update_bits(rdev->regmap, reg, BD71815_VOLT_MASK, sel);
 	if (ret)
 		return ret;
 
 	/* Select the other DVS register to be used */
-	return regmap_update_bits(pmic->regmap, regh, BD71815_BUCK_DVSSEL, ~val);
+	return regmap_update_bits(rdev->regmap, regh, BD71815_BUCK_DVSSEL,
+				  ~val);
 }
 
 static const struct regulator_ops bd7181x_ldo_regulator_ops = {
@@ -524,7 +515,7 @@ static const struct regulator_ops bd7181x_led_regulator_ops = {
 		.dvs = (_dvs),						\
 	}
 
-static struct bd71815_regulator bd71815_regulators[] = {
+static const struct bd71815_regulator bd71815_regulators[] = {
 	BD71815_BUCK12_REG(buck1, BD71815_BUCK1, BD71815_REG_BUCK1_VOLT_H,
 			   BD71815_REG_BUCK1_MODE, 800000, 2000000, 25000,
 			   &buck1_dvs),
@@ -570,24 +561,16 @@ static struct bd71815_regulator bd71815_regulators[] = {
 
 static int bd7181x_probe(struct platform_device *pdev)
 {
-	struct bd71815_pmic *pmic;
 	struct regulator_config config = {};
 	int i, ret;
 	struct gpio_desc *ldo4_en;
+	struct regmap *regmap;
 
-	pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
-	if (!pmic)
-		return -ENOMEM;
-
-	memcpy(pmic->descs, bd71815_regulators,	sizeof(pmic->descs));
-
-	pmic->dev = &pdev->dev;
-	pmic->regmap = dev_get_regmap(pdev->dev.parent, NULL);
-	if (!pmic->regmap) {
-		dev_err(pmic->dev, "No parent regmap\n");
+	regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!regmap) {
+		dev_err(&pdev->dev, "No parent regmap\n");
 		return -ENODEV;
 	}
-	platform_set_drvdata(pdev, pmic);
 	ldo4_en = devm_gpiod_get_from_of_node(&pdev->dev,
 					      pdev->dev.parent->of_node,
 						 "rohm,vsel-gpios", 0,
@@ -601,23 +584,23 @@ static int bd7181x_probe(struct platform_device *pdev)
 	}
 
 	/* Disable to go to ship-mode */
-	ret = regmap_update_bits(pmic->regmap, BD71815_REG_PWRCTRL,
-				 RESTARTEN, 0);
+	ret = regmap_update_bits(regmap, BD71815_REG_PWRCTRL, RESTARTEN, 0);
 	if (ret)
 		return ret;
 
 	config.dev = pdev->dev.parent;
-	config.regmap = pmic->regmap;
+	config.regmap = regmap;
 
 	for (i = 0; i < BD71815_REGULATOR_CNT; i++) {
-		struct regulator_desc *desc;
+		const struct regulator_desc *desc;
 		struct regulator_dev *rdev;
 
-		desc = &pmic->descs[i].desc;
+		desc = &bd71815_regulators[i].desc;
+
 		if (i == BD71815_LDO4)
 			config.ena_gpiod = ldo4_en;
-
-		config.driver_data = pmic;
+		else
+			config.ena_gpiod = NULL;
 
 		rdev = devm_regulator_register(&pdev->dev, desc, &config);
 		if (IS_ERR(rdev)) {
@@ -626,8 +609,6 @@ static int bd7181x_probe(struct platform_device *pdev)
 				desc->name);
 			return PTR_ERR(rdev);
 		}
-		config.ena_gpiod = NULL;
-		pmic->rdev[i] = rdev;
 	}
 	return 0;
 }
-- 
GitLab


From 2e11737a772b95c6587df73f216eec1762431432 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 7 Jun 2021 22:29:07 +0800
Subject: [PATCH 2431/3804] regulator: fan53880: Fix vsel_mask setting for
 FAN53880_BUCK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the datasheet:
REGISTER DETAILS − 0x02 BUCK, BUCK_OUT is BIT0 ~ BIT7.

So vsel_mask for FAN53880_BUCK should be 0xFF.

Fixes: e6dea51e2d41 ("regulator: fan53880: Add initial support")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210607142907.1599905-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fan53880.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/fan53880.c b/drivers/regulator/fan53880.c
index d49f10ac222d5..d7e34e9188c7d 100644
--- a/drivers/regulator/fan53880.c
+++ b/drivers/regulator/fan53880.c
@@ -77,7 +77,7 @@ static const struct regulator_desc fan53880_regulators[] = {
 		},
 		.n_linear_ranges = 2,
 		.vsel_reg =	   FAN53880_BUCKVOUT,
-		.vsel_mask =	   0x7f,
+		.vsel_mask =	   0xff,
 		.enable_reg =	   FAN53880_ENABLE,
 		.enable_mask =	   0x10,
 		.enable_time =	   480,
-- 
GitLab


From aceda401e84115bf9121454828f9da63c2a94482 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Tue, 8 Jun 2021 08:15:18 +0100
Subject: [PATCH 2432/3804] spi: tegra20-slink: Ensure SPI controller reset is
 deasserted

Commit 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling
clocks") removed some legacy code for handling resets on Tegra from
within the Tegra clock code. This exposed an issue in the Tegra20 slink
driver where the SPI controller reset was not being deasserted as needed
during probe. This is causing the Tegra30 Cardhu platform to hang on
boot. Fix this by ensuring the SPI controller reset is deasserted during
probe.

Fixes: 4782c0a5dd88 ("clk: tegra: Don't deassert reset on enabling clocks")
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Link: https://lore.kernel.org/r/20210608071518.93037-1-jonathanh@nvidia.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-tegra20-slink.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index f7c832fd40036..6a726c95ac7a8 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -1118,6 +1118,11 @@ static int tegra_slink_probe(struct platform_device *pdev)
 		pm_runtime_put_noidle(&pdev->dev);
 		goto exit_pm_disable;
 	}
+
+	reset_control_assert(tspi->rst);
+	udelay(2);
+	reset_control_deassert(tspi->rst);
+
 	tspi->def_command_reg  = SLINK_M_S;
 	tspi->def_command2_reg = SLINK_CS_ACTIVE_BETWEEN;
 	tegra_slink_writel(tspi, tspi->def_command_reg, SLINK_COMMAND);
-- 
GitLab


From 763663c9715f5f1cc0d065d2b020f12cd37417d2 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 12 May 2021 15:25:15 +0800
Subject: [PATCH 2433/3804] PM: domains: fix some kernel-doc issues

Fix the following make W=1 kernel build warnings:

  drivers/base/power/domain_governor.c:259: warning: Function parameter or member 'now' not described in '_default_power_down_ok'
  drivers/base/power/domain.c:581: warning: Function parameter or member 'depth' not described in 'genpd_power_off'
  drivers/base/power/domain.c:2520: warning: Function parameter or member 'np' not described in 'of_genpd_remove_last'
  drivers/base/power/domain.c:2520: warning: Excess function parameter 'provider' description in 'of_genpd_remove_last'

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c          | 3 ++-
 drivers/base/power/domain_governor.c | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index b6a782c316138..5695a641efd36 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -572,6 +572,7 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
  * RPM status of the releated device is in an intermediate state, not yet turned
  * into RPM_SUSPENDED. This means genpd_power_off() must allow one device to not
  * be RPM_SUSPENDED, while it tries to power off the PM domain.
+ * @depth: nesting count for lockdep.
  *
  * If all of the @genpd's devices have been suspended and all of its subdomains
  * have been powered down, remove power from @genpd.
@@ -2505,7 +2506,7 @@ EXPORT_SYMBOL_GPL(of_genpd_remove_subdomain);
 
 /**
  * of_genpd_remove_last - Remove the last PM domain registered for a provider
- * @provider: Pointer to device structure associated with provider
+ * @np: Pointer to device node associated with provider
  *
  * Find the last PM domain that was added by a particular provider and
  * remove this PM domain from the list of PM domains. The provider is
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index c6c218758f0b0..cd08c58851905 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -252,6 +252,7 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd,
 /**
  * _default_power_down_ok - Default generic PM domain power off governor routine.
  * @pd: PM domain to check.
+ * @now: current ktime.
  *
  * This routine must be executed under the PM domain's lock.
  */
-- 
GitLab


From 6687cd72aa9112a454a4646986e0402dd1b07d0e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 4 Jun 2021 14:59:43 +0200
Subject: [PATCH 2434/3804] mmc: renesas_sdhi: Fix HS400 on R-Car M3-W+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

R-Car M3-W ES3.0 is marketed as R-Car M3-W+ (R8A77961), and has its own
compatible value "renesas,r8a77961".

Hence using soc_device_match() with soc_id = "r8a7796" and revision =
"ES3.*" does not actually match running on an R-Car M3-W+ SoC.

Fix this by matching with soc_id = "r8a77961" instead.

Fixes: a38c078fea0b1393 ("mmc: renesas_sdhi: Avoid bad TAP in HS400")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Link: https://lore.kernel.org/r/ee8af5d631f5331139ffea714539030d97352e93.1622811525.git.geert+renesas@glider.be
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/renesas_sdhi_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index 9029308c4a0f0..baab4c2e1b533 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -944,7 +944,7 @@ static const struct soc_device_attribute sdhi_quirks_match[]  = {
 	{ .soc_id = "r8a7795", .revision = "ES3.*", .data = &sdhi_quirks_bad_taps2367 },
 	{ .soc_id = "r8a7796", .revision = "ES1.[012]", .data = &sdhi_quirks_4tap_nohs400 },
 	{ .soc_id = "r8a7796", .revision = "ES1.*", .data = &sdhi_quirks_r8a7796_es13 },
-	{ .soc_id = "r8a7796", .revision = "ES3.*", .data = &sdhi_quirks_bad_taps1357 },
+	{ .soc_id = "r8a77961", .data = &sdhi_quirks_bad_taps1357 },
 	{ .soc_id = "r8a77965", .data = &sdhi_quirks_r8a77965 },
 	{ .soc_id = "r8a77980", .data = &sdhi_quirks_nohs400 },
 	{ .soc_id = "r8a77990", .data = &sdhi_quirks_r8a77990 },
-- 
GitLab


From cfa7ff959a789a953eac40c8ac793e2cfc2db931 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 3 Jun 2021 19:41:18 +0100
Subject: [PATCH 2435/3804] arm64: smccc: Support SMCCC v1.3 SVE register
 saving hint

SMCCC v1.2 requires that all SVE state be preserved over SMC calls which
introduces substantial overhead in the common case where there is no SVE
state in the registers. To avoid this SMCCC v1.3 introduces a flag which
allows the caller to say that there is no state that needs to be preserved
in the registers. Make use of this flag, setting it if the SMCCC version
indicates support for it and the TIF_ flags indicate that there is no live
SVE state in the registers, this avoids placing any constraints on when
SMCCC calls can be done or triggering extra saving and reloading of SVE
register state in the kernel.

This would be straightforward enough except for the rather entertaining
inline assembly we use to do SMCCC v1.1 calls to allow us to take advantage
of the limited number of registers it clobbers. Deal with this by having a
function which we call immediately before issuing the SMCCC call to make
our checks and set the flag. Using alternatives the overhead if SVE is
supported but not detected at runtime can be reduced to a single NOP.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210603184118.15090-1-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/smccc-call.S | 26 ++++++++++++++++++++++++++
 drivers/firmware/smccc/smccc.c |  4 ++++
 include/linux/arm-smccc.h      | 33 +++++++++++++++++++++++++++++++--
 3 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 2def9d0dd3ddb..d3d37f932b97a 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -7,8 +7,34 @@
 
 #include <asm/asm-offsets.h>
 #include <asm/assembler.h>
+#include <asm/thread_info.h>
+
+/*
+ * If we have SMCCC v1.3 and (as is likely) no SVE state in
+ * the registers then set the SMCCC hint bit to say there's no
+ * need to preserve it.  Do this by directly adjusting the SMCCC
+ * function value which is already stored in x0 ready to be called.
+ */
+SYM_FUNC_START(__arm_smccc_sve_check)
+
+	ldr_l	x16, smccc_has_sve_hint
+	cbz	x16, 2f
+
+	get_current_task x16
+	ldr	x16, [x16, #TSK_TI_FLAGS]
+	tbnz	x16, #TIF_FOREIGN_FPSTATE, 1f	// Any live FP state?
+	tbnz	x16, #TIF_SVE, 2f		// Does that state include SVE?
+
+1:	orr	x0, x0, ARM_SMCCC_1_3_SVE_HINT
+
+2:	ret
+SYM_FUNC_END(__arm_smccc_sve_check)
+EXPORT_SYMBOL(__arm_smccc_sve_check)
 
 	.macro SMCCC instr
+alternative_if ARM64_SVE
+	bl	__arm_smccc_sve_check
+alternative_else_nop_endif
 	\instr	#0
 	ldr	x4, [sp]
 	stp	x0, x1, [x4, #ARM_SMCCC_RES_X0_OFFS]
diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c
index 028f81d702cc5..9f937b125ab07 100644
--- a/drivers/firmware/smccc/smccc.c
+++ b/drivers/firmware/smccc/smccc.c
@@ -15,6 +15,7 @@ static u32 smccc_version = ARM_SMCCC_VERSION_1_0;
 static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE;
 
 bool __ro_after_init smccc_trng_available = false;
+u64 __ro_after_init smccc_has_sve_hint = false;
 
 void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit)
 {
@@ -22,6 +23,9 @@ void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit)
 	smccc_conduit = conduit;
 
 	smccc_trng_available = smccc_probe_trng();
+	if (IS_ENABLED(CONFIG_ARM64_SVE) &&
+	    smccc_version >= ARM_SMCCC_VERSION_1_3)
+		smccc_has_sve_hint = true;
 }
 
 enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void)
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 5cef2b8b0479e..7d1cabe152622 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -63,6 +63,9 @@
 #define ARM_SMCCC_VERSION_1_0		0x10000
 #define ARM_SMCCC_VERSION_1_1		0x10001
 #define ARM_SMCCC_VERSION_1_2		0x10002
+#define ARM_SMCCC_VERSION_1_3		0x10003
+
+#define ARM_SMCCC_1_3_SVE_HINT		0x10000
 
 #define ARM_SMCCC_VERSION_FUNC_ID					\
 	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
@@ -216,6 +219,8 @@ u32 arm_smccc_get_version(void);
 
 void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit);
 
+extern u64 smccc_has_sve_hint;
+
 /**
  * struct arm_smccc_res - Result from SMC/HVC call
  * @a0-a3 result values from registers 0 to 3
@@ -295,6 +300,15 @@ struct arm_smccc_quirk {
 	} state;
 };
 
+/**
+ * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls
+ *
+ * Sets the SMCCC hint bit to indicate if there is live state in the SVE
+ * registers, this modifies x0 in place and should never be called from C
+ * code.
+ */
+asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0);
+
 /**
  * __arm_smccc_smc() - make SMC calls
  * @a0-a7: arguments passed in registers 0 to 7
@@ -352,6 +366,20 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
 
 #endif
 
+/* nVHE hypervisor doesn't have a current thread so needs separate checks */
+#if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__)
+
+#define SMCCC_SVE_CHECK ALTERNATIVE("nop \n",  "bl __arm_smccc_sve_check \n", \
+				    ARM64_SVE)
+#define smccc_sve_clobbers "x16", "x30", "cc",
+
+#else
+
+#define SMCCC_SVE_CHECK
+#define smccc_sve_clobbers
+
+#endif
+
 #define ___count_args(_0, _1, _2, _3, _4, _5, _6, _7, _8, x, ...) x
 
 #define __count_args(...)						\
@@ -419,7 +447,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
 
 #define ___constraints(count)						\
 	: __constraint_read_ ## count					\
-	: "memory"
+	: smccc_sve_clobbers "memory"
 #define __constraints(count)	___constraints(count)
 
 /*
@@ -434,7 +462,8 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
 		register unsigned long r2 asm("r2");			\
 		register unsigned long r3 asm("r3"); 			\
 		__declare_args(__count_args(__VA_ARGS__), __VA_ARGS__);	\
-		asm volatile(inst "\n" :				\
+		asm volatile(SMCCC_SVE_CHECK				\
+			     inst "\n" :				\
 			     "=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3)	\
 			     __constraints(__count_args(__VA_ARGS__)));	\
 		if (___res)						\
-- 
GitLab


From ef9b7779688b2d4a772a5089aba2eacbe336779e Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:21 +0200
Subject: [PATCH 2436/3804] media: atmel: atmel-isc: create callback for RLP
 submodule product specific

The RLP submodule is a part of the atmel-isc pipeline, and stands for
Rounding,Limiting and Packaging. It used to extract specific data from the
ISC pipeline. For example if we want to output greyscale 8 bit, we would
use limiting to 8 bits, and packaging to Luma component only.

Create a product specific callback for initializing the RLP submodule
of the pipeline

[hverkuil: made isc_sama5d2_config_rlp static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    |  6 ++----
 drivers/media/platform/atmel/atmel-isc.h         |  3 +++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 10 ++++++++++
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index aef0d6570d395..67c16ca176720 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -719,11 +719,10 @@ static void isc_set_histogram(struct isc_device *isc, bool enable)
 static int isc_configure(struct isc_device *isc)
 {
 	struct regmap *regmap = isc->regmap;
-	u32 pfe_cfg0, rlp_mode, dcfg, mask, pipeline;
+	u32 pfe_cfg0, dcfg, mask, pipeline;
 	struct isc_subdev_entity *subdev = isc->current_subdev;
 
 	pfe_cfg0 = isc->config.sd_format->pfe_cfg0_bps;
-	rlp_mode = isc->config.rlp_cfg_mode;
 	pipeline = isc->config.bits_pipeline;
 
 	dcfg = isc->config.dcfg_imode | isc->dcfg;
@@ -736,8 +735,7 @@ static int isc_configure(struct isc_device *isc)
 
 	regmap_update_bits(regmap, ISC_PFE_CFG0, mask, pfe_cfg0);
 
-	regmap_update_bits(regmap, ISC_RLP_CFG + isc->offsets.rlp,
-			   ISC_RLP_CFG_MODE_MASK, rlp_mode);
+	isc->config_rlp(isc);
 
 	regmap_write(regmap, ISC_DCFG + isc->offsets.dma, dcfg);
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index 1e6988f1876ea..abeef7b2ab27f 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -231,6 +231,8 @@ struct isc_reg_offsets {
  *			specific CC module
  * @config_gam:		pointer to a function that initializes product
  *			specific GAMMA module
+ * @config_rlp:		pointer to a function that initializes product
+ *			specific RLP module
  * @config_ctrls:	pointer to a functoin that initializes product
  *			specific v4l2 controls.
  *
@@ -309,6 +311,7 @@ struct isc_device {
 		void (*config_cbc)(struct isc_device *isc);
 		void (*config_cc)(struct isc_device *isc);
 		void (*config_gam)(struct isc_device *isc);
+		void (*config_rlp)(struct isc_device *isc);
 
 		void (*config_ctrls)(struct isc_device *isc,
 				     const struct v4l2_ctrl_ops *ops);
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 6973c65822d8c..292532da26dad 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -117,6 +117,15 @@ static void isc_sama5d2_config_gam(struct isc_device *isc)
 	/* No specific gamma configuration */
 }
 
+static void isc_sama5d2_config_rlp(struct isc_device *isc)
+{
+	struct regmap *regmap = isc->regmap;
+	u32 rlp_mode = isc->config.rlp_cfg_mode;
+
+	regmap_update_bits(regmap, ISC_RLP_CFG + isc->offsets.rlp,
+			   ISC_RLP_CFG_MODE_MASK, rlp_mode);
+}
+
 /* Gamma table with gamma 1/2.2 */
 static const u32 isc_sama5d2_gamma_table[][GAMMA_ENTRIES] = {
 	/* 0 --> gamma 1/1.8 */
@@ -267,6 +276,7 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->config_cbc = isc_sama5d2_config_cbc;
 	isc->config_cc = isc_sama5d2_config_cc;
 	isc->config_gam = isc_sama5d2_config_gam;
+	isc->config_rlp = isc_sama5d2_config_rlp;
 	isc->config_ctrls = isc_sama5d2_config_ctrls;
 
 	isc->offsets.csc = ISC_SAMA5D2_CSC_OFFSET;
-- 
GitLab


From 415dbe4efafa29896a9567c3054dd25a749b8857 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:22 +0200
Subject: [PATCH 2437/3804] media: atmel: atmel-isc: move the formats list into
 product specific code

The list of input and output formats has to be product specific.
Move this list into the product specific code.
Have pointers to these arrays inside the device struct.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 167 ++----------------
 drivers/media/platform/atmel/atmel-isc.h      |  12 +-
 .../media/platform/atmel/atmel-sama5d2-isc.c  | 136 ++++++++++++++
 3 files changed, 165 insertions(+), 150 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 67c16ca176720..90a62d43fdb17 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -45,137 +45,6 @@ module_param(sensor_preferred, uint, 0644);
 MODULE_PARM_DESC(sensor_preferred,
 		 "Sensor is preferred to output the specified format (1-on 0-off), default 1");
 
-/* This is a list of the formats that the ISC can *output* */
-const struct isc_format controller_formats[] = {
-	{
-		.fourcc		= V4L2_PIX_FMT_ARGB444,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_ARGB555,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_RGB565,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_ABGR32,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_XBGR32,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_YUV420,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_YUYV,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_YUV422P,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_GREY,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_Y10,
-	},
-};
-
-/* This is a list of formats that the ISC can receive as *input* */
-struct isc_format formats_list[] = {
-	{
-		.fourcc		= V4L2_PIX_FMT_SBGGR8,
-		.mbus_code	= MEDIA_BUS_FMT_SBGGR8_1X8,
-		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
-		.cfa_baycfg	= ISC_BAY_CFG_BGBG,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SGBRG8,
-		.mbus_code	= MEDIA_BUS_FMT_SGBRG8_1X8,
-		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
-		.cfa_baycfg	= ISC_BAY_CFG_GBGB,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SGRBG8,
-		.mbus_code	= MEDIA_BUS_FMT_SGRBG8_1X8,
-		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
-		.cfa_baycfg	= ISC_BAY_CFG_GRGR,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SRGGB8,
-		.mbus_code	= MEDIA_BUS_FMT_SRGGB8_1X8,
-		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
-		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SBGGR10,
-		.mbus_code	= MEDIA_BUS_FMT_SBGGR10_1X10,
-		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
-		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SGBRG10,
-		.mbus_code	= MEDIA_BUS_FMT_SGBRG10_1X10,
-		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
-		.cfa_baycfg	= ISC_BAY_CFG_GBGB,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SGRBG10,
-		.mbus_code	= MEDIA_BUS_FMT_SGRBG10_1X10,
-		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
-		.cfa_baycfg	= ISC_BAY_CFG_GRGR,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SRGGB10,
-		.mbus_code	= MEDIA_BUS_FMT_SRGGB10_1X10,
-		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
-		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SBGGR12,
-		.mbus_code	= MEDIA_BUS_FMT_SBGGR12_1X12,
-		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
-		.cfa_baycfg	= ISC_BAY_CFG_BGBG,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SGBRG12,
-		.mbus_code	= MEDIA_BUS_FMT_SGBRG12_1X12,
-		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
-		.cfa_baycfg	= ISC_BAY_CFG_GBGB,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SGRBG12,
-		.mbus_code	= MEDIA_BUS_FMT_SGRBG12_1X12,
-		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
-		.cfa_baycfg	= ISC_BAY_CFG_GRGR,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_SRGGB12,
-		.mbus_code	= MEDIA_BUS_FMT_SRGGB12_1X12,
-		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
-		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_GREY,
-		.mbus_code	= MEDIA_BUS_FMT_Y8_1X8,
-		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_YUYV,
-		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
-		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_RGB565,
-		.mbus_code	= MEDIA_BUS_FMT_RGB565_2X8_LE,
-		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
-	},
-	{
-		.fourcc		= V4L2_PIX_FMT_Y10,
-		.mbus_code	= MEDIA_BUS_FMT_Y10_1X10,
-		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
-	},
-
-};
-
 #define ISC_IS_FORMAT_RAW(mbus_code) \
 	(((mbus_code) & 0xf000) == 0x3000)
 
@@ -919,24 +788,25 @@ static int isc_querycap(struct file *file, void *priv,
 static int isc_enum_fmt_vid_cap(struct file *file, void *priv,
 				 struct v4l2_fmtdesc *f)
 {
+	struct isc_device *isc = video_drvdata(file);
 	u32 index = f->index;
 	u32 i, supported_index;
 
-	if (index < ARRAY_SIZE(controller_formats)) {
-		f->pixelformat = controller_formats[index].fourcc;
+	if (index < isc->controller_formats_size) {
+		f->pixelformat = isc->controller_formats[index].fourcc;
 		return 0;
 	}
 
-	index -= ARRAY_SIZE(controller_formats);
+	index -= isc->controller_formats_size;
 
 	supported_index = 0;
 
-	for (i = 0; i < ARRAY_SIZE(formats_list); i++) {
-		if (!ISC_IS_FORMAT_RAW(formats_list[i].mbus_code) ||
-		    !formats_list[i].sd_support)
+	for (i = 0; i < isc->formats_list_size; i++) {
+		if (!ISC_IS_FORMAT_RAW(isc->formats_list[i].mbus_code) ||
+		    !isc->formats_list[i].sd_support)
 			continue;
 		if (supported_index == index) {
-			f->pixelformat = formats_list[i].fourcc;
+			f->pixelformat = isc->formats_list[i].fourcc;
 			return 0;
 		}
 		supported_index++;
@@ -1477,8 +1347,8 @@ static int isc_enum_framesizes(struct file *file, void *fh,
 		if (isc->user_formats[i]->fourcc == fsize->pixel_format)
 			ret = 0;
 
-	for (i = 0; i < ARRAY_SIZE(controller_formats); i++)
-		if (controller_formats[i].fourcc == fsize->pixel_format)
+	for (i = 0; i < isc->controller_formats_size; i++)
+		if (isc->controller_formats[i].fourcc == fsize->pixel_format)
 			ret = 0;
 
 	if (ret)
@@ -1514,8 +1384,8 @@ static int isc_enum_frameintervals(struct file *file, void *fh,
 		if (isc->user_formats[i]->fourcc == fival->pixel_format)
 			ret = 0;
 
-	for (i = 0; i < ARRAY_SIZE(controller_formats); i++)
-		if (controller_formats[i].fourcc == fival->pixel_format)
+	for (i = 0; i < isc->controller_formats_size; i++)
+		if (isc->controller_formats[i].fourcc == fival->pixel_format)
 			ret = 0;
 
 	if (ret)
@@ -2126,12 +1996,13 @@ static void isc_async_unbind(struct v4l2_async_notifier *notifier,
 	v4l2_ctrl_handler_free(&isc->ctrls.handler);
 }
 
-static struct isc_format *find_format_by_code(unsigned int code, int *index)
+static struct isc_format *find_format_by_code(struct isc_device *isc,
+					      unsigned int code, int *index)
 {
-	struct isc_format *fmt = &formats_list[0];
+	struct isc_format *fmt = &isc->formats_list[0];
 	unsigned int i;
 
-	for (i = 0; i < ARRAY_SIZE(formats_list); i++) {
+	for (i = 0; i < isc->formats_list_size; i++) {
 		if (fmt->mbus_code == code) {
 			*index = i;
 			return fmt;
@@ -2148,7 +2019,7 @@ static int isc_formats_init(struct isc_device *isc)
 	struct isc_format *fmt;
 	struct v4l2_subdev *subdev = isc->current_subdev->sd;
 	unsigned int num_fmts, i, j;
-	u32 list_size = ARRAY_SIZE(formats_list);
+	u32 list_size = isc->formats_list_size;
 	struct v4l2_subdev_mbus_code_enum mbus_code = {
 		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
 	};
@@ -2158,7 +2029,7 @@ static int isc_formats_init(struct isc_device *isc)
 	       NULL, &mbus_code)) {
 		mbus_code.index++;
 
-		fmt = find_format_by_code(mbus_code.code, &i);
+		fmt = find_format_by_code(isc, mbus_code.code, &i);
 		if (!fmt) {
 			v4l2_warn(&isc->v4l2_dev, "Mbus code %x not supported\n",
 				  mbus_code.code);
@@ -2179,7 +2050,7 @@ static int isc_formats_init(struct isc_device *isc)
 	if (!isc->user_formats)
 		return -ENOMEM;
 
-	fmt = &formats_list[0];
+	fmt = &isc->formats_list[0];
 	for (i = 0, j = 0; i < list_size; i++) {
 		if (fmt->sd_support)
 			isc->user_formats[j++] = fmt;
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index abeef7b2ab27f..14e318a7373c5 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -237,6 +237,12 @@ struct isc_reg_offsets {
  *			specific v4l2 controls.
  *
  * @offsets:		struct holding the product specific register offsets
+ * @controller_formats:	pointer to the array of possible formats that the
+ *			controller can output
+ * @formats_list:	pointer to the array of possible formats that can
+ *			be used as an input to the controller
+ * @controller_formats_size:	size of controller_formats array
+ * @formats_list_size:	size of formats_list array
  */
 struct isc_device {
 	struct regmap		*regmap;
@@ -318,10 +324,12 @@ struct isc_device {
 	};
 
 	struct isc_reg_offsets		offsets;
+	const struct isc_format		*controller_formats;
+	struct isc_format		*formats_list;
+	u32				controller_formats_size;
+	u32				formats_list_size;
 };
 
-extern struct isc_format formats_list[];
-extern const struct isc_format controller_formats[];
 extern const struct regmap_config isc_regmap_config;
 extern const struct v4l2_async_notifier_operations isc_async_ops;
 
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 292532da26dad..95cad167a264a 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -54,6 +54,137 @@
 
 #define ISC_CLK_MAX_DIV		255
 
+/* This is a list of the formats that the ISC can *output* */
+static const struct isc_format sama5d2_controller_formats[] = {
+	{
+		.fourcc		= V4L2_PIX_FMT_ARGB444,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_ARGB555,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_RGB565,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_ABGR32,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_XBGR32,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_YUV420,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_YUYV,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_YUV422P,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_GREY,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_Y10,
+	},
+};
+
+/* This is a list of formats that the ISC can receive as *input* */
+static struct isc_format sama5d2_formats_list[] = {
+	{
+		.fourcc		= V4L2_PIX_FMT_SBGGR8,
+		.mbus_code	= MEDIA_BUS_FMT_SBGGR8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+		.cfa_baycfg	= ISC_BAY_CFG_BGBG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGBRG8,
+		.mbus_code	= MEDIA_BUS_FMT_SGBRG8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+		.cfa_baycfg	= ISC_BAY_CFG_GBGB,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGRBG8,
+		.mbus_code	= MEDIA_BUS_FMT_SGRBG8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+		.cfa_baycfg	= ISC_BAY_CFG_GRGR,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SRGGB8,
+		.mbus_code	= MEDIA_BUS_FMT_SRGGB8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SBGGR10,
+		.mbus_code	= MEDIA_BUS_FMT_SBGGR10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGBRG10,
+		.mbus_code	= MEDIA_BUS_FMT_SGBRG10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+		.cfa_baycfg	= ISC_BAY_CFG_GBGB,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGRBG10,
+		.mbus_code	= MEDIA_BUS_FMT_SGRBG10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+		.cfa_baycfg	= ISC_BAY_CFG_GRGR,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SRGGB10,
+		.mbus_code	= MEDIA_BUS_FMT_SRGGB10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SBGGR12,
+		.mbus_code	= MEDIA_BUS_FMT_SBGGR12_1X12,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
+		.cfa_baycfg	= ISC_BAY_CFG_BGBG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGBRG12,
+		.mbus_code	= MEDIA_BUS_FMT_SGBRG12_1X12,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
+		.cfa_baycfg	= ISC_BAY_CFG_GBGB,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGRBG12,
+		.mbus_code	= MEDIA_BUS_FMT_SGRBG12_1X12,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
+		.cfa_baycfg	= ISC_BAY_CFG_GRGR,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SRGGB12,
+		.mbus_code	= MEDIA_BUS_FMT_SRGGB12_1X12,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
+		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_GREY,
+		.mbus_code	= MEDIA_BUS_FMT_Y8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_YUYV,
+		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_RGB565,
+		.mbus_code	= MEDIA_BUS_FMT_RGB565_2X8_LE,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_Y10,
+		.mbus_code	= MEDIA_BUS_FMT_Y10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+	},
+
+};
+
 static void isc_sama5d2_config_csc(struct isc_device *isc)
 {
 	struct regmap *regmap = isc->regmap;
@@ -289,6 +420,11 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->offsets.version = ISC_SAMA5D2_VERSION_OFFSET;
 	isc->offsets.his_entry = ISC_SAMA5D2_HIS_ENTRY_OFFSET;
 
+	isc->controller_formats = sama5d2_controller_formats;
+	isc->controller_formats_size = ARRAY_SIZE(sama5d2_controller_formats);
+	isc->formats_list = sama5d2_formats_list;
+	isc->formats_list_size = ARRAY_SIZE(sama5d2_formats_list);
+
 	/* sama5d2-isc - 8 bits per beat */
 	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS8 | ISC_DCFG_CMBSIZE_BEATS8;
 
-- 
GitLab


From 049a38fc9681b3c6103496104b22d49b60660a64 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:23 +0200
Subject: [PATCH 2438/3804] media: atmel: atmel-isc: create an adapt pipeline
 callback for product specific

Once the pipeline is set in the base code, create a callback that will adapt
the ISC pipeline to each product.
Create the adapt_pipeline callback that will be used in this fashion.

[hverkuil: made isc_sama5d2_adapt_pipeline static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c    |  4 ++++
 drivers/media/platform/atmel/atmel-isc.h         |  5 +++++
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 11 +++++++++++
 3 files changed, 20 insertions(+)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 90a62d43fdb17..7862d6bf850d0 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -1059,6 +1059,10 @@ static int isc_try_configure_pipeline(struct isc_device *isc)
 	default:
 		isc->try_config.bits_pipeline = 0x0;
 	}
+
+	/* Tune the pipeline to product specific */
+	isc->adapt_pipeline(isc);
+
 	return 0;
 }
 
diff --git a/drivers/media/platform/atmel/atmel-isc.h b/drivers/media/platform/atmel/atmel-isc.h
index 14e318a7373c5..19cc60dfcbe0f 100644
--- a/drivers/media/platform/atmel/atmel-isc.h
+++ b/drivers/media/platform/atmel/atmel-isc.h
@@ -236,6 +236,9 @@ struct isc_reg_offsets {
  * @config_ctrls:	pointer to a functoin that initializes product
  *			specific v4l2 controls.
  *
+ * @adapt_pipeline:	pointer to a function that adapts the pipeline bits
+ *			to the product specific pipeline
+ *
  * @offsets:		struct holding the product specific register offsets
  * @controller_formats:	pointer to the array of possible formats that the
  *			controller can output
@@ -321,6 +324,8 @@ struct isc_device {
 
 		void (*config_ctrls)(struct isc_device *isc,
 				     const struct v4l2_ctrl_ops *ops);
+
+		void (*adapt_pipeline)(struct isc_device *isc);
 	};
 
 	struct isc_reg_offsets		offsets;
diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 95cad167a264a..8579f27800ae6 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -54,6 +54,10 @@
 
 #define ISC_CLK_MAX_DIV		255
 
+#define ISC_SAMA5D2_PIPELINE \
+	(WB_ENABLE | CFA_ENABLE | CC_ENABLE | GAM_ENABLES | CSC_ENABLE | \
+	CBC_ENABLE | SUB422_ENABLE | SUB420_ENABLE)
+
 /* This is a list of the formats that the ISC can *output* */
 static const struct isc_format sama5d2_controller_formats[] = {
 	{
@@ -257,6 +261,11 @@ static void isc_sama5d2_config_rlp(struct isc_device *isc)
 			   ISC_RLP_CFG_MODE_MASK, rlp_mode);
 }
 
+static void isc_sama5d2_adapt_pipeline(struct isc_device *isc)
+{
+	isc->try_config.bits_pipeline &= ISC_SAMA5D2_PIPELINE;
+}
+
 /* Gamma table with gamma 1/2.2 */
 static const u32 isc_sama5d2_gamma_table[][GAMMA_ENTRIES] = {
 	/* 0 --> gamma 1/1.8 */
@@ -410,6 +419,8 @@ static int atmel_isc_probe(struct platform_device *pdev)
 	isc->config_rlp = isc_sama5d2_config_rlp;
 	isc->config_ctrls = isc_sama5d2_config_ctrls;
 
+	isc->adapt_pipeline = isc_sama5d2_adapt_pipeline;
+
 	isc->offsets.csc = ISC_SAMA5D2_CSC_OFFSET;
 	isc->offsets.cbc = ISC_SAMA5D2_CBC_OFFSET;
 	isc->offsets.sub422 = ISC_SAMA5D2_SUB422_OFFSET;
-- 
GitLab


From 0baf7a3241ac5d41de833f3b3df0961f40802b44 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:24 +0200
Subject: [PATCH 2439/3804] media: atmel: atmel-isc-regs: add additional fields
 for sama7g5 type pipeline

Add additional fields for registers present in sama7g5 type pipeline.
Extend register masks for additional bits in sama7g5 type pipeline registers.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-regs.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index 457eed74cda9f..5f99bf7717c1f 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -289,8 +289,18 @@
 #define ISC_RLP_CFG_MODE_ARGB32         0xa
 #define ISC_RLP_CFG_MODE_YYCC           0xb
 #define ISC_RLP_CFG_MODE_YYCC_LIMITED   0xc
+#define ISC_RLP_CFG_MODE_YCYC           0xd
 #define ISC_RLP_CFG_MODE_MASK           GENMASK(3, 0)
 
+#define ISC_RLP_CFG_LSH			BIT(5)
+
+#define ISC_RLP_CFG_YMODE_YUYV		(3 << 6)
+#define ISC_RLP_CFG_YMODE_YVYU		(2 << 6)
+#define ISC_RLP_CFG_YMODE_VYUY		(0 << 6)
+#define ISC_RLP_CFG_YMODE_UYVY		(1 << 6)
+
+#define ISC_RLP_CFG_YMODE_MASK		GENMASK(7, 6)
+
 /* Offset for HIS register specific to sama5d2 product */
 #define ISC_SAMA5D2_HIS_OFFSET	0
 /* Histogram Control Register */
@@ -332,13 +342,15 @@
 #define ISC_DCFG_YMBSIZE_BEATS4         (0x1 << 4)
 #define ISC_DCFG_YMBSIZE_BEATS8         (0x2 << 4)
 #define ISC_DCFG_YMBSIZE_BEATS16        (0x3 << 4)
-#define ISC_DCFG_YMBSIZE_MASK           GENMASK(5, 4)
+#define ISC_DCFG_YMBSIZE_BEATS32        (0x4 << 4)
+#define ISC_DCFG_YMBSIZE_MASK           GENMASK(6, 4)
 
 #define ISC_DCFG_CMBSIZE_SINGLE         (0x0 << 8)
 #define ISC_DCFG_CMBSIZE_BEATS4         (0x1 << 8)
 #define ISC_DCFG_CMBSIZE_BEATS8         (0x2 << 8)
 #define ISC_DCFG_CMBSIZE_BEATS16        (0x3 << 8)
-#define ISC_DCFG_CMBSIZE_MASK           GENMASK(9, 8)
+#define ISC_DCFG_CMBSIZE_BEATS32        (0x4 << 8)
+#define ISC_DCFG_CMBSIZE_MASK           GENMASK(10, 8)
 
 /* DMA Control Register */
 #define ISC_DCTRL       0x000003e4
-- 
GitLab


From debfa496871c181b658def0f2b200302bd9b1216 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:25 +0200
Subject: [PATCH 2440/3804] media: atmel: atmel-isc-base: add support for more
 formats and additional pipeline modules

Add support for additional formats supported by newer pipelines, and for
additional pipeline modules.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-isc-base.c | 48 +++++++++++++++----
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 7862d6bf850d0..dcb321ad10b85 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -855,6 +855,8 @@ static int isc_try_validate_formats(struct isc_device *isc)
 	case V4L2_PIX_FMT_YUV420:
 	case V4L2_PIX_FMT_YUV422P:
 	case V4L2_PIX_FMT_YUYV:
+	case V4L2_PIX_FMT_UYVY:
+	case V4L2_PIX_FMT_VYUY:
 		ret = 0;
 		yuv = true;
 		break;
@@ -869,6 +871,7 @@ static int isc_try_validate_formats(struct isc_device *isc)
 		break;
 	case V4L2_PIX_FMT_GREY:
 	case V4L2_PIX_FMT_Y10:
+	case V4L2_PIX_FMT_Y16:
 		ret = 0;
 		grey = true;
 		break;
@@ -899,6 +902,8 @@ static int isc_try_validate_formats(struct isc_device *isc)
  */
 static int isc_try_configure_rlp_dma(struct isc_device *isc, bool direct_dump)
 {
+	isc->try_config.rlp_cfg_mode = 0;
+
 	switch (isc->try_config.fourcc) {
 	case V4L2_PIX_FMT_SBGGR8:
 	case V4L2_PIX_FMT_SGBRG8:
@@ -965,7 +970,19 @@ static int isc_try_configure_rlp_dma(struct isc_device *isc, bool direct_dump)
 		isc->try_config.bpp = 16;
 		break;
 	case V4L2_PIX_FMT_YUYV:
-		isc->try_config.rlp_cfg_mode = ISC_RLP_CFG_MODE_YYCC;
+		isc->try_config.rlp_cfg_mode = ISC_RLP_CFG_MODE_YCYC | ISC_RLP_CFG_YMODE_YUYV;
+		isc->try_config.dcfg_imode = ISC_DCFG_IMODE_PACKED32;
+		isc->try_config.dctrl_dview = ISC_DCTRL_DVIEW_PACKED;
+		isc->try_config.bpp = 16;
+		break;
+	case V4L2_PIX_FMT_UYVY:
+		isc->try_config.rlp_cfg_mode = ISC_RLP_CFG_MODE_YCYC | ISC_RLP_CFG_YMODE_UYVY;
+		isc->try_config.dcfg_imode = ISC_DCFG_IMODE_PACKED32;
+		isc->try_config.dctrl_dview = ISC_DCTRL_DVIEW_PACKED;
+		isc->try_config.bpp = 16;
+		break;
+	case V4L2_PIX_FMT_VYUY:
+		isc->try_config.rlp_cfg_mode = ISC_RLP_CFG_MODE_YCYC | ISC_RLP_CFG_YMODE_VYUY;
 		isc->try_config.dcfg_imode = ISC_DCFG_IMODE_PACKED32;
 		isc->try_config.dctrl_dview = ISC_DCTRL_DVIEW_PACKED;
 		isc->try_config.bpp = 16;
@@ -976,8 +993,11 @@ static int isc_try_configure_rlp_dma(struct isc_device *isc, bool direct_dump)
 		isc->try_config.dctrl_dview = ISC_DCTRL_DVIEW_PACKED;
 		isc->try_config.bpp = 8;
 		break;
+	case V4L2_PIX_FMT_Y16:
+		isc->try_config.rlp_cfg_mode = ISC_RLP_CFG_MODE_DATY10 | ISC_RLP_CFG_LSH;
+		fallthrough;
 	case V4L2_PIX_FMT_Y10:
-		isc->try_config.rlp_cfg_mode = ISC_RLP_CFG_MODE_DATY10;
+		isc->try_config.rlp_cfg_mode |= ISC_RLP_CFG_MODE_DATY10;
 		isc->try_config.dcfg_imode = ISC_DCFG_IMODE_PACKED16;
 		isc->try_config.dctrl_dview = ISC_DCTRL_DVIEW_PACKED;
 		isc->try_config.bpp = 16;
@@ -1011,7 +1031,8 @@ static int isc_try_configure_pipeline(struct isc_device *isc)
 		/* if sensor format is RAW, we convert inside ISC */
 		if (ISC_IS_FORMAT_RAW(isc->try_config.sd_format->mbus_code)) {
 			isc->try_config.bits_pipeline = CFA_ENABLE |
-				WB_ENABLE | GAM_ENABLES;
+				WB_ENABLE | GAM_ENABLES | DPC_BLCENABLE |
+				CC_ENABLE;
 		} else {
 			isc->try_config.bits_pipeline = 0x0;
 		}
@@ -1020,8 +1041,9 @@ static int isc_try_configure_pipeline(struct isc_device *isc)
 		/* if sensor format is RAW, we convert inside ISC */
 		if (ISC_IS_FORMAT_RAW(isc->try_config.sd_format->mbus_code)) {
 			isc->try_config.bits_pipeline = CFA_ENABLE |
-				CSC_ENABLE | WB_ENABLE | GAM_ENABLES |
-				SUB420_ENABLE | SUB422_ENABLE | CBC_ENABLE;
+				CSC_ENABLE | GAM_ENABLES | WB_ENABLE |
+				SUB420_ENABLE | SUB422_ENABLE | CBC_ENABLE |
+				DPC_BLCENABLE;
 		} else {
 			isc->try_config.bits_pipeline = 0x0;
 		}
@@ -1031,33 +1053,39 @@ static int isc_try_configure_pipeline(struct isc_device *isc)
 		if (ISC_IS_FORMAT_RAW(isc->try_config.sd_format->mbus_code)) {
 			isc->try_config.bits_pipeline = CFA_ENABLE |
 				CSC_ENABLE | WB_ENABLE | GAM_ENABLES |
-				SUB422_ENABLE | CBC_ENABLE;
+				SUB422_ENABLE | CBC_ENABLE | DPC_BLCENABLE;
 		} else {
 			isc->try_config.bits_pipeline = 0x0;
 		}
 		break;
 	case V4L2_PIX_FMT_YUYV:
+	case V4L2_PIX_FMT_UYVY:
+	case V4L2_PIX_FMT_VYUY:
 		/* if sensor format is RAW, we convert inside ISC */
 		if (ISC_IS_FORMAT_RAW(isc->try_config.sd_format->mbus_code)) {
 			isc->try_config.bits_pipeline = CFA_ENABLE |
 				CSC_ENABLE | WB_ENABLE | GAM_ENABLES |
-				SUB422_ENABLE | CBC_ENABLE;
+				SUB422_ENABLE | CBC_ENABLE | DPC_BLCENABLE;
 		} else {
 			isc->try_config.bits_pipeline = 0x0;
 		}
 		break;
 	case V4L2_PIX_FMT_GREY:
-		if (ISC_IS_FORMAT_RAW(isc->try_config.sd_format->mbus_code)) {
+	case V4L2_PIX_FMT_Y16:
 		/* if sensor format is RAW, we convert inside ISC */
+		if (ISC_IS_FORMAT_RAW(isc->try_config.sd_format->mbus_code)) {
 			isc->try_config.bits_pipeline = CFA_ENABLE |
 				CSC_ENABLE | WB_ENABLE | GAM_ENABLES |
-				CBC_ENABLE;
+				CBC_ENABLE | DPC_BLCENABLE;
 		} else {
 			isc->try_config.bits_pipeline = 0x0;
 		}
 		break;
 	default:
-		isc->try_config.bits_pipeline = 0x0;
+		if (ISC_IS_FORMAT_RAW(isc->try_config.sd_format->mbus_code))
+			isc->try_config.bits_pipeline = WB_ENABLE | DPC_BLCENABLE;
+		else
+			isc->try_config.bits_pipeline = 0x0;
 	}
 
 	/* Tune the pipeline to product specific */
-- 
GitLab


From 2672a9397221d6ded067a205211897f3e3d712dc Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:26 +0200
Subject: [PATCH 2441/3804] media: atmel: atmel-isc-sama5d2: remove duplicate
 define

Remove a duplicate definition of clock max divider

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/atmel/atmel-sama5d2-isc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-sama5d2-isc.c b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
index 8579f27800ae6..925aa80a139b2 100644
--- a/drivers/media/platform/atmel/atmel-sama5d2-isc.c
+++ b/drivers/media/platform/atmel/atmel-sama5d2-isc.c
@@ -52,8 +52,6 @@
 #define ISC_SAMA5D2_MAX_SUPPORT_WIDTH   2592
 #define ISC_SAMA5D2_MAX_SUPPORT_HEIGHT  1944
 
-#define ISC_CLK_MAX_DIV		255
-
 #define ISC_SAMA5D2_PIPELINE \
 	(WB_ENABLE | CFA_ENABLE | CC_ENABLE | GAM_ENABLES | CSC_ENABLE | \
 	CBC_ENABLE | SUB422_ENABLE | SUB420_ENABLE)
-- 
GitLab


From 54203301d02a3afff13a002f3c2cffb30f59a2fb Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Thu, 15 Apr 2021 10:55:30 +0200
Subject: [PATCH 2442/3804] media: dt-bindings: media: atmel-isc: convert to
 yaml

Convert the Atmel ISC to yaml binding format.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../devicetree/bindings/media/atmel,isc.yaml  | 114 ++++++++++++++++++
 .../devicetree/bindings/media/atmel-isc.txt   |  65 ----------
 2 files changed, 114 insertions(+), 65 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/media/atmel,isc.yaml
 delete mode 100644 Documentation/devicetree/bindings/media/atmel-isc.txt

diff --git a/Documentation/devicetree/bindings/media/atmel,isc.yaml b/Documentation/devicetree/bindings/media/atmel,isc.yaml
new file mode 100644
index 0000000000000..3e4bb8892d941
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/atmel,isc.yaml
@@ -0,0 +1,114 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2016-2021 Microchip Technology, Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/atmel,isc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Atmel Image Sensor Controller (ISC)
+
+maintainers:
+  - Eugen Hristev <eugen.hristev@microchip.com>
+
+description: |
+  The Image Sensor Controller (ISC) device provides the video input capabilities for the
+  Atmel/Microchip AT91 SAMA family of devices.
+
+  The ISC has a single parallel input that supports RAW Bayer, RGB or YUV video,
+  with both external synchronization and BT.656 synchronization for the latter.
+
+properties:
+  compatible:
+    const: atmel,sama5d2-isc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 3
+    maxItems: 3
+
+  clock-names:
+    items:
+      - const: hclock
+      - const: iscck
+      - const: gck
+
+  '#clock-cells':
+    const: 0
+
+  clock-output-names:
+    const: isc-mck
+
+  port:
+    $ref: /schemas/graph.yaml#/properties/port
+    description:
+      Input port node, single endpoint describing the input pad.
+
+    properties:
+      endpoint:
+        $ref: video-interfaces.yaml#
+
+        properties:
+          remote-endpoint: true
+
+          bus-width:
+            enum: [8, 9, 10, 11, 12]
+            default: 12
+
+          hsync-active:
+            enum: [0, 1]
+            default: 1
+
+          vsync-active:
+            enum: [0, 1]
+            default: 1
+
+          pclk-sample:
+            enum: [0, 1]
+            default: 1
+
+        required:
+          - remote-endpoint
+
+        additionalProperties: false
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - '#clock-cells'
+  - clock-output-names
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    isc: isc@f0008000 {
+        compatible = "atmel,sama5d2-isc";
+        reg = <0xf0008000 0x4000>;
+        interrupts = <46 IRQ_TYPE_LEVEL_HIGH 5>;
+        clocks = <&isc_clk>, <&iscck>, <&isc_gclk>;
+        clock-names = "hclock", "iscck", "gck";
+        #clock-cells = <0>;
+        clock-output-names = "isc-mck";
+
+        port {
+                isc_0: endpoint {
+                       remote-endpoint = <&ov7740_0>;
+                       hsync-active = <1>;
+                       vsync-active = <0>;
+                       pclk-sample = <1>;
+                       bus-width = <8>;
+                };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/media/atmel-isc.txt b/Documentation/devicetree/bindings/media/atmel-isc.txt
deleted file mode 100644
index bbe0e87c6188d..0000000000000
--- a/Documentation/devicetree/bindings/media/atmel-isc.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-Atmel Image Sensor Controller (ISC)
-----------------------------------------------
-
-Required properties for ISC:
-- compatible
-	Must be "atmel,sama5d2-isc".
-- reg
-	Physical base address and length of the registers set for the device.
-- interrupts
-	Should contain IRQ line for the ISC.
-- clocks
-	List of clock specifiers, corresponding to entries in
-	the clock-names property;
-	Please refer to clock-bindings.txt.
-- clock-names
-	Required elements: "hclock", "iscck", "gck".
-- #clock-cells
-	Should be 0.
-- clock-output-names
-	Should be "isc-mck".
-- pinctrl-names, pinctrl-0
-	Please refer to pinctrl-bindings.txt.
-
-ISC supports a single port node with parallel bus. It should contain one
-'port' child node with child 'endpoint' node. Please refer to the bindings
-defined in Documentation/devicetree/bindings/media/video-interfaces.txt.
-
-Example:
-isc: isc@f0008000 {
-	compatible = "atmel,sama5d2-isc";
-	reg = <0xf0008000 0x4000>;
-	interrupts = <46 IRQ_TYPE_LEVEL_HIGH 5>;
-	clocks = <&isc_clk>, <&iscck>, <&isc_gclk>;
-	clock-names = "hclock", "iscck", "gck";
-	#clock-cells = <0>;
-	clock-output-names = "isc-mck";
-	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_isc_base &pinctrl_isc_data_8bit &pinctrl_isc_data_9_10 &pinctrl_isc_data_11_12>;
-
-	port {
-		isc_0: endpoint {
-			remote-endpoint = <&ov7740_0>;
-			hsync-active = <1>;
-			vsync-active = <0>;
-			pclk-sample = <1>;
-		};
-	};
-};
-
-i2c1: i2c@fc028000 {
-	ov7740: camera@21 {
-		compatible = "ovti,ov7740";
-		reg = <0x21>;
-		clocks = <&isc>;
-		clock-names = "xvclk";
-		assigned-clocks = <&isc>;
-		assigned-clock-rates = <24000000>;
-
-		port {
-			ov7740_0: endpoint {
-				remote-endpoint = <&isc_0>;
-			};
-		};
-	};
-};
-- 
GitLab


From 7b8d3d03df83aae74519b34022e95dec577af1df Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Thu, 15 Apr 2021 20:45:00 +0200
Subject: [PATCH 2443/3804] media: dt-bindings: media: add microchip,xisc
 device bindings

Add bindings for the Microchip eXtended Image Sensor Controller.
Based on the atmel,isc.yaml binding.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../bindings/media/microchip,xisc.yaml        | 129 ++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/microchip,xisc.yaml

diff --git a/Documentation/devicetree/bindings/media/microchip,xisc.yaml b/Documentation/devicetree/bindings/media/microchip,xisc.yaml
new file mode 100644
index 0000000000000..41afe2e5f133d
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/microchip,xisc.yaml
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2021 Microchip Technology, Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/microchip,xisc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip eXtended Image Sensor Controller (XISC)
+
+maintainers:
+  - Eugen Hristev <eugen.hristev@microchip.com>
+
+description: |
+  The eXtended Image Sensor Controller (XISC) device provides the video input capabilities for the
+  Microchip AT91 SAM family of devices.
+
+  The XISC has a single internal parallel input that supports RAW Bayer, RGB or YUV video.
+  The source can be either a demuxer from a CSI2 type of bus, or a simple direct bridge to a
+  parallel sensor.
+
+  The XISC provides one clock output that is used to clock the demuxer/bridge.
+
+properties:
+  compatible:
+    const: microchip,sama7g5-isc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: hclock
+
+  '#clock-cells':
+    const: 0
+
+  clock-output-names:
+    const: isc-mck
+
+  microchip,mipi-mode:
+    type: boolean
+    description:
+      As the XISC is usually connected to a demux/bridge, the XISC receives
+      the same type of input, however, it should be aware of the type of
+      signals received. The mipi-mode enables different internal handling
+      of the data and clock lines.
+
+  port:
+    $ref: /schemas/graph.yaml#/properties/port
+    description:
+      Input port node, single endpoint describing the input pad.
+
+    properties:
+      endpoint:
+        $ref: video-interfaces.yaml#
+
+        properties:
+          bus-type:
+            enum: [5, 6]
+
+          remote-endpoint: true
+
+          bus-width:
+            enum: [8, 9, 10, 11, 12]
+            default: 12
+
+          hsync-active:
+            enum: [0, 1]
+            default: 1
+
+          vsync-active:
+            enum: [0, 1]
+            default: 1
+
+          pclk-sample:
+            enum: [0, 1]
+            default: 1
+
+        required:
+          - remote-endpoint
+          - bus-type
+
+        additionalProperties: false
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - '#clock-cells'
+  - clock-output-names
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/at91.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    xisc: xisc@e1408000 {
+        compatible = "microchip,sama7g5-isc";
+        reg = <0xe1408000 0x2000>;
+        interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+        clocks = <&pmc PMC_TYPE_PERIPHERAL 56>;
+        clock-names = "hclock";
+        #clock-cells = <0>;
+        clock-output-names = "isc-mck";
+
+        port {
+                xisc_in: endpoint {
+                       bus-type = <5>; /* Parallel */
+                       remote-endpoint = <&csi2dc_out>;
+                       hsync-active = <1>;
+                       vsync-active = <1>;
+                       bus-width = <12>;
+                };
+        };
+    };
+
-- 
GitLab


From c9aa973884a163ecb6d5d4d3be9137058adcaf8c Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:29 +0200
Subject: [PATCH 2444/3804] media: atmel: atmel-isc: add microchip-xisc driver

Add driver for the extended variant of the isc, the microchip XISC
present on sama7g5 product.

[hverkuil: drop MODULE_SUPPORTED_DEVICE, no longer exists]
[hverkuil: made isc_sama7g5_config_csc et al static]
[hverkuil: made sama7g5_controller_formats et al static]

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/Makefile               |   1 +
 drivers/media/platform/atmel/Kconfig          |  11 +
 drivers/media/platform/atmel/Makefile         |   2 +
 drivers/media/platform/atmel/atmel-isc-base.c |   2 +-
 drivers/media/platform/atmel/atmel-isc-regs.h |  26 +
 .../media/platform/atmel/atmel-sama7g5-isc.c  | 630 ++++++++++++++++++
 6 files changed, 671 insertions(+), 1 deletion(-)
 create mode 100644 drivers/media/platform/atmel/atmel-sama7g5-isc.c

diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index eedc14aafb32c..73ce083c2fc6b 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_VIDEO_RCAR_VIN)		+= rcar-vin/
 
 obj-$(CONFIG_VIDEO_ATMEL_ISC)		+= atmel/
 obj-$(CONFIG_VIDEO_ATMEL_ISI)		+= atmel/
+obj-$(CONFIG_VIDEO_ATMEL_XISC)		+= atmel/
 
 obj-$(CONFIG_VIDEO_STM32_DCMI)		+= stm32/
 
diff --git a/drivers/media/platform/atmel/Kconfig b/drivers/media/platform/atmel/Kconfig
index 1850fe7f9360b..99b51213f8712 100644
--- a/drivers/media/platform/atmel/Kconfig
+++ b/drivers/media/platform/atmel/Kconfig
@@ -12,6 +12,17 @@ config VIDEO_ATMEL_ISC
 	   This module makes the ATMEL Image Sensor Controller available
 	   as a v4l2 device.
 
+config VIDEO_ATMEL_XISC
+	tristate "ATMEL eXtended Image Sensor Controller (XISC) support"
+	depends on VIDEO_V4L2 && COMMON_CLK && VIDEO_V4L2_SUBDEV_API
+	depends on ARCH_AT91 || COMPILE_TEST
+	select VIDEOBUF2_DMA_CONTIG
+	select REGMAP_MMIO
+	select V4L2_FWNODE
+	help
+	   This module makes the ATMEL eXtended Image Sensor Controller
+	   available as a v4l2 device.
+
 config VIDEO_ATMEL_ISI
 	tristate "ATMEL Image Sensor Interface (ISI) support"
 	depends on VIDEO_V4L2 && OF
diff --git a/drivers/media/platform/atmel/Makefile b/drivers/media/platform/atmel/Makefile
index 2dba38994a701..c5c01556c6538 100644
--- a/drivers/media/platform/atmel/Makefile
+++ b/drivers/media/platform/atmel/Makefile
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o
+atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o
 
 obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o
 obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o
+obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o
diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index dcb321ad10b85..46c6e3e20f33f 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -600,7 +600,7 @@ static int isc_configure(struct isc_device *isc)
 	mask = ISC_PFE_CFG0_BPS_MASK | ISC_PFE_CFG0_HPOL_LOW |
 	       ISC_PFE_CFG0_VPOL_LOW | ISC_PFE_CFG0_PPOL_LOW |
 	       ISC_PFE_CFG0_MODE_MASK | ISC_PFE_CFG0_CCIR_CRC |
-		   ISC_PFE_CFG0_CCIR656;
+	       ISC_PFE_CFG0_CCIR656 | ISC_PFE_CFG0_MIPI;
 
 	regmap_update_bits(regmap, ISC_PFE_CFG0, mask, pfe_cfg0);
 
diff --git a/drivers/media/platform/atmel/atmel-isc-regs.h b/drivers/media/platform/atmel/atmel-isc-regs.h
index 5f99bf7717c1f..d06b72228d4fa 100644
--- a/drivers/media/platform/atmel/atmel-isc-regs.h
+++ b/drivers/media/platform/atmel/atmel-isc-regs.h
@@ -26,6 +26,7 @@
 #define ISC_PFE_CFG0_PPOL_LOW   BIT(2)
 #define ISC_PFE_CFG0_CCIR656    BIT(9)
 #define ISC_PFE_CFG0_CCIR_CRC   BIT(10)
+#define ISC_PFE_CFG0_MIPI	BIT(14)
 
 #define ISC_PFE_CFG0_MODE_PROGRESSIVE   (0x0 << 4)
 #define ISC_PFE_CFG0_MODE_MASK          GENMASK(6, 4)
@@ -184,6 +185,8 @@
 /* ISC Gamma Correction Control Register */
 #define ISC_GAM_CTRL    0x00000094
 
+#define ISC_GAM_CTRL_BIPART	BIT(4)
+
 /* ISC_Gamma Correction Blue Entry Register */
 #define ISC_GAM_BENTRY	0x00000098
 
@@ -222,6 +225,8 @@
 
 /* Offset for CSC register specific to sama5d2 product */
 #define ISC_SAMA5D2_CSC_OFFSET	0
+/* Offset for CSC register specific to sama7g5 product */
+#define ISC_SAMA7G5_CSC_OFFSET	0x11c
 
 /* Color Space Conversion Control Register */
 #define ISC_CSC_CTRL    0x00000398
@@ -246,6 +251,8 @@
 
 /* Offset for CBC register specific to sama5d2 product */
 #define ISC_SAMA5D2_CBC_OFFSET	0
+/* Offset for CBC register specific to sama7g5 product */
+#define ISC_SAMA7G5_CBC_OFFSET	0x11c
 
 /* Contrast And Brightness Control Register */
 #define ISC_CBC_CTRL    0x000003b4
@@ -261,18 +268,30 @@
 #define ISC_CBC_CONTRAST	0x000003c0
 #define ISC_CBC_CONTRAST_MASK	GENMASK(11, 0)
 
+/* Hue Register */
+#define ISC_CBCHS_HUE	0x4e0
+/* Saturation Register */
+#define ISC_CBCHS_SAT	0x4e4
+
 /* Offset for SUB422 register specific to sama5d2 product */
 #define ISC_SAMA5D2_SUB422_OFFSET	0
+/* Offset for SUB422 register specific to sama7g5 product */
+#define ISC_SAMA7G5_SUB422_OFFSET	0x124
+
 /* Subsampling 4:4:4 to 4:2:2 Control Register */
 #define ISC_SUB422_CTRL 0x000003c4
 
 /* Offset for SUB420 register specific to sama5d2 product */
 #define ISC_SAMA5D2_SUB420_OFFSET	0
+/* Offset for SUB420 register specific to sama7g5 product */
+#define ISC_SAMA7G5_SUB420_OFFSET	0x124
 /* Subsampling 4:2:2 to 4:2:0 Control Register */
 #define ISC_SUB420_CTRL 0x000003cc
 
 /* Offset for RLP register specific to sama5d2 product */
 #define ISC_SAMA5D2_RLP_OFFSET	0
+/* Offset for RLP register specific to sama7g5 product */
+#define ISC_SAMA7G5_RLP_OFFSET	0x124
 /* Rounding, Limiting and Packing Configuration Register */
 #define ISC_RLP_CFG     0x000003d0
 
@@ -303,6 +322,8 @@
 
 /* Offset for HIS register specific to sama5d2 product */
 #define ISC_SAMA5D2_HIS_OFFSET	0
+/* Offset for HIS register specific to sama7g5 product */
+#define ISC_SAMA7G5_HIS_OFFSET	0x124
 /* Histogram Control Register */
 #define ISC_HIS_CTRL	0x000003d4
 
@@ -326,6 +347,8 @@
 
 /* Offset for DMA register specific to sama5d2 product */
 #define ISC_SAMA5D2_DMA_OFFSET	0
+/* Offset for DMA register specific to sama7g5 product */
+#define ISC_SAMA7G5_DMA_OFFSET	0x13c
 
 /* DMA Configuration Register */
 #define ISC_DCFG        0x000003e0
@@ -376,11 +399,14 @@
 
 /* Offset for version register specific to sama5d2 product */
 #define ISC_SAMA5D2_VERSION_OFFSET	0
+#define ISC_SAMA7G5_VERSION_OFFSET	0x13c
 /* Version Register */
 #define ISC_VERSION	0x0000040c
 
 /* Offset for version register specific to sama5d2 product */
 #define ISC_SAMA5D2_HIS_ENTRY_OFFSET	0
+/* Offset for version register specific to sama7g5 product */
+#define ISC_SAMA7G5_HIS_ENTRY_OFFSET	0x14c
 /* Histogram Entry */
 #define ISC_HIS_ENTRY	0x00000410
 
diff --git a/drivers/media/platform/atmel/atmel-sama7g5-isc.c b/drivers/media/platform/atmel/atmel-sama7g5-isc.c
new file mode 100644
index 0000000000000..f2785131ff569
--- /dev/null
+++ b/drivers/media/platform/atmel/atmel-sama7g5-isc.c
@@ -0,0 +1,630 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microchip eXtended Image Sensor Controller (XISC) driver
+ *
+ * Copyright (C) 2019-2021 Microchip Technology, Inc. and its subsidiaries
+ *
+ * Author: Eugen Hristev <eugen.hristev@microchip.com>
+ *
+ * Sensor-->PFE-->DPC-->WB-->CFA-->CC-->GAM-->VHXS-->CSC-->CBHS-->SUB-->RLP-->DMA-->HIS
+ *
+ * ISC video pipeline integrates the following submodules:
+ * PFE: Parallel Front End to sample the camera sensor input stream
+ * DPC: Defective Pixel Correction with black offset correction, green disparity
+ *      correction and defective pixel correction (3 modules total)
+ *  WB: Programmable white balance in the Bayer domain
+ * CFA: Color filter array interpolation module
+ *  CC: Programmable color correction
+ * GAM: Gamma correction
+ *VHXS: Vertical and Horizontal Scaler
+ * CSC: Programmable color space conversion
+ *CBHS: Contrast Brightness Hue and Saturation control
+ * SUB: This module performs YCbCr444 to YCbCr420 chrominance subsampling
+ * RLP: This module performs rounding, range limiting
+ *      and packing of the incoming data
+ * DMA: This module performs DMA master accesses to write frames to external RAM
+ * HIS: Histogram module performs statistic counters on the frames
+ */
+
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_graph.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/videodev2.h>
+
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-image-sizes.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-fwnode.h>
+#include <media/v4l2-subdev.h>
+#include <media/videobuf2-dma-contig.h>
+
+#include "atmel-isc-regs.h"
+#include "atmel-isc.h"
+
+#define ISC_SAMA7G5_MAX_SUPPORT_WIDTH   3264
+#define ISC_SAMA7G5_MAX_SUPPORT_HEIGHT  2464
+
+#define ISC_SAMA7G5_PIPELINE \
+	(WB_ENABLE | CFA_ENABLE | CC_ENABLE | GAM_ENABLES | CSC_ENABLE | \
+	CBC_ENABLE | SUB422_ENABLE | SUB420_ENABLE)
+
+/* This is a list of the formats that the ISC can *output* */
+static const struct isc_format sama7g5_controller_formats[] = {
+	{
+		.fourcc		= V4L2_PIX_FMT_ARGB444,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_ARGB555,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_RGB565,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_ABGR32,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_XBGR32,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_YUV420,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_UYVY,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_VYUY,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_YUYV,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_YUV422P,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_GREY,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_Y10,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_Y16,
+	},
+};
+
+/* This is a list of formats that the ISC can receive as *input* */
+static struct isc_format sama7g5_formats_list[] = {
+	{
+		.fourcc		= V4L2_PIX_FMT_SBGGR8,
+		.mbus_code	= MEDIA_BUS_FMT_SBGGR8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+		.cfa_baycfg	= ISC_BAY_CFG_BGBG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGBRG8,
+		.mbus_code	= MEDIA_BUS_FMT_SGBRG8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+		.cfa_baycfg	= ISC_BAY_CFG_GBGB,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGRBG8,
+		.mbus_code	= MEDIA_BUS_FMT_SGRBG8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+		.cfa_baycfg	= ISC_BAY_CFG_GRGR,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SRGGB8,
+		.mbus_code	= MEDIA_BUS_FMT_SRGGB8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SBGGR10,
+		.mbus_code	= MEDIA_BUS_FMT_SBGGR10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGBRG10,
+		.mbus_code	= MEDIA_BUS_FMT_SGBRG10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+		.cfa_baycfg	= ISC_BAY_CFG_GBGB,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGRBG10,
+		.mbus_code	= MEDIA_BUS_FMT_SGRBG10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+		.cfa_baycfg	= ISC_BAY_CFG_GRGR,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SRGGB10,
+		.mbus_code	= MEDIA_BUS_FMT_SRGGB10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SBGGR12,
+		.mbus_code	= MEDIA_BUS_FMT_SBGGR12_1X12,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
+		.cfa_baycfg	= ISC_BAY_CFG_BGBG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGBRG12,
+		.mbus_code	= MEDIA_BUS_FMT_SGBRG12_1X12,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
+		.cfa_baycfg	= ISC_BAY_CFG_GBGB,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SGRBG12,
+		.mbus_code	= MEDIA_BUS_FMT_SGRBG12_1X12,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
+		.cfa_baycfg	= ISC_BAY_CFG_GRGR,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_SRGGB12,
+		.mbus_code	= MEDIA_BUS_FMT_SRGGB12_1X12,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TWELVE,
+		.cfa_baycfg	= ISC_BAY_CFG_RGRG,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_GREY,
+		.mbus_code	= MEDIA_BUS_FMT_Y8_1X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_YUYV,
+		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_UYVY,
+		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_RGB565,
+		.mbus_code	= MEDIA_BUS_FMT_RGB565_2X8_LE,
+		.pfe_cfg0_bps	= ISC_PFE_CFG0_BPS_EIGHT,
+	},
+	{
+		.fourcc		= V4L2_PIX_FMT_Y10,
+		.mbus_code	= MEDIA_BUS_FMT_Y10_1X10,
+		.pfe_cfg0_bps	= ISC_PFG_CFG0_BPS_TEN,
+	},
+
+};
+
+static void isc_sama7g5_config_csc(struct isc_device *isc)
+{
+	struct regmap *regmap = isc->regmap;
+
+	/* Convert RGB to YUV */
+	regmap_write(regmap, ISC_CSC_YR_YG + isc->offsets.csc,
+		     0x42 | (0x81 << 16));
+	regmap_write(regmap, ISC_CSC_YB_OY + isc->offsets.csc,
+		     0x19 | (0x10 << 16));
+	regmap_write(regmap, ISC_CSC_CBR_CBG + isc->offsets.csc,
+		     0xFDA | (0xFB6 << 16));
+	regmap_write(regmap, ISC_CSC_CBB_OCB + isc->offsets.csc,
+		     0x70 | (0x80 << 16));
+	regmap_write(regmap, ISC_CSC_CRR_CRG + isc->offsets.csc,
+		     0x70 | (0xFA2 << 16));
+	regmap_write(regmap, ISC_CSC_CRB_OCR + isc->offsets.csc,
+		     0xFEE | (0x80 << 16));
+}
+
+static void isc_sama7g5_config_cbc(struct isc_device *isc)
+{
+	struct regmap *regmap = isc->regmap;
+
+	/* Configure what is set via v4l2 ctrls */
+	regmap_write(regmap, ISC_CBC_BRIGHT + isc->offsets.cbc, isc->ctrls.brightness);
+	regmap_write(regmap, ISC_CBC_CONTRAST + isc->offsets.cbc, isc->ctrls.contrast);
+	/* Configure Hue and Saturation as neutral midpoint */
+	regmap_write(regmap, ISC_CBCHS_HUE, 0);
+	regmap_write(regmap, ISC_CBCHS_SAT, (1 << 4));
+}
+
+static void isc_sama7g5_config_cc(struct isc_device *isc)
+{
+	struct regmap *regmap = isc->regmap;
+
+	/* Configure each register at the neutral fixed point 1.0 or 0.0 */
+	regmap_write(regmap, ISC_CC_RR_RG, (1 << 8));
+	regmap_write(regmap, ISC_CC_RB_OR, 0);
+	regmap_write(regmap, ISC_CC_GR_GG, (1 << 8) << 16);
+	regmap_write(regmap, ISC_CC_GB_OG, 0);
+	regmap_write(regmap, ISC_CC_BR_BG, 0);
+	regmap_write(regmap, ISC_CC_BB_OB, (1 << 8));
+}
+
+static void isc_sama7g5_config_ctrls(struct isc_device *isc,
+				     const struct v4l2_ctrl_ops *ops)
+{
+	struct isc_ctrls *ctrls = &isc->ctrls;
+	struct v4l2_ctrl_handler *hdl = &ctrls->handler;
+
+	ctrls->contrast = 16;
+
+	v4l2_ctrl_new_std(hdl, ops, V4L2_CID_CONTRAST, -2048, 2047, 1, 16);
+}
+
+static void isc_sama7g5_config_dpc(struct isc_device *isc)
+{
+	u32 bay_cfg = isc->config.sd_format->cfa_baycfg;
+	struct regmap *regmap = isc->regmap;
+
+	regmap_update_bits(regmap, ISC_DPC_CFG, ISC_DPC_CFG_BLOFF_MASK,
+			   (64 << ISC_DPC_CFG_BLOFF_SHIFT));
+	regmap_update_bits(regmap, ISC_DPC_CFG, ISC_DPC_CFG_BAYCFG_MASK,
+			   (bay_cfg << ISC_DPC_CFG_BAYCFG_SHIFT));
+}
+
+static void isc_sama7g5_config_gam(struct isc_device *isc)
+{
+	struct regmap *regmap = isc->regmap;
+
+	regmap_update_bits(regmap, ISC_GAM_CTRL, ISC_GAM_CTRL_BIPART,
+			   ISC_GAM_CTRL_BIPART);
+}
+
+static void isc_sama7g5_config_rlp(struct isc_device *isc)
+{
+	struct regmap *regmap = isc->regmap;
+	u32 rlp_mode = isc->config.rlp_cfg_mode;
+
+	regmap_update_bits(regmap, ISC_RLP_CFG + isc->offsets.rlp,
+			   ISC_RLP_CFG_MODE_MASK | ISC_RLP_CFG_LSH |
+			   ISC_RLP_CFG_YMODE_MASK, rlp_mode);
+}
+
+static void isc_sama7g5_adapt_pipeline(struct isc_device *isc)
+{
+	isc->try_config.bits_pipeline &= ISC_SAMA7G5_PIPELINE;
+}
+
+/* Gamma table with gamma 1/2.2 */
+static const u32 isc_sama7g5_gamma_table[][GAMMA_ENTRIES] = {
+	/* index 0 --> gamma bipartite */
+	{
+	      0x980,  0x4c0320,  0x650260,  0x7801e0,  0x8701a0,  0x940180,
+	   0xa00160,  0xab0120,  0xb40120,  0xbd0120,  0xc60100,  0xce0100,
+	   0xd600e0,  0xdd00e0,  0xe400e0,  0xeb00c0,  0xf100c0,  0xf700c0,
+	   0xfd00c0, 0x10300a0, 0x10800c0, 0x10e00a0, 0x11300a0, 0x11800a0,
+	  0x11d00a0, 0x12200a0, 0x12700a0, 0x12c0080, 0x13000a0, 0x1350080,
+	  0x13900a0, 0x13e0080, 0x1420076, 0x17d0062, 0x1ae0054, 0x1d8004a,
+	  0x1fd0044, 0x21f003e, 0x23e003a, 0x25b0036, 0x2760032, 0x28f0030,
+	  0x2a7002e, 0x2be002c, 0x2d4002c, 0x2ea0028, 0x2fe0028, 0x3120026,
+	  0x3250024, 0x3370024, 0x3490022, 0x35a0022, 0x36b0020, 0x37b0020,
+	  0x38b0020, 0x39b001e, 0x3aa001e, 0x3b9001c, 0x3c7001c, 0x3d5001c,
+	  0x3e3001c, 0x3f1001c, 0x3ff001a, 0x40c001a },
+};
+
+static int xisc_parse_dt(struct device *dev, struct isc_device *isc)
+{
+	struct device_node *np = dev->of_node;
+	struct device_node *epn = NULL;
+	struct isc_subdev_entity *subdev_entity;
+	unsigned int flags;
+	int ret;
+	bool mipi_mode;
+
+	INIT_LIST_HEAD(&isc->subdev_entities);
+
+	mipi_mode = of_property_read_bool(np, "microchip,mipi-mode");
+
+	while (1) {
+		struct v4l2_fwnode_endpoint v4l2_epn = { .bus_type = 0 };
+
+		epn = of_graph_get_next_endpoint(np, epn);
+		if (!epn)
+			return 0;
+
+		ret = v4l2_fwnode_endpoint_parse(of_fwnode_handle(epn),
+						 &v4l2_epn);
+		if (ret) {
+			ret = -EINVAL;
+			dev_err(dev, "Could not parse the endpoint\n");
+			break;
+		}
+
+		subdev_entity = devm_kzalloc(dev, sizeof(*subdev_entity),
+					     GFP_KERNEL);
+		if (!subdev_entity) {
+			ret = -ENOMEM;
+			break;
+		}
+		subdev_entity->epn = epn;
+
+		flags = v4l2_epn.bus.parallel.flags;
+
+		if (flags & V4L2_MBUS_HSYNC_ACTIVE_LOW)
+			subdev_entity->pfe_cfg0 = ISC_PFE_CFG0_HPOL_LOW;
+
+		if (flags & V4L2_MBUS_VSYNC_ACTIVE_LOW)
+			subdev_entity->pfe_cfg0 |= ISC_PFE_CFG0_VPOL_LOW;
+
+		if (flags & V4L2_MBUS_PCLK_SAMPLE_FALLING)
+			subdev_entity->pfe_cfg0 |= ISC_PFE_CFG0_PPOL_LOW;
+
+		if (v4l2_epn.bus_type == V4L2_MBUS_BT656)
+			subdev_entity->pfe_cfg0 |= ISC_PFE_CFG0_CCIR_CRC |
+					ISC_PFE_CFG0_CCIR656;
+
+		if (mipi_mode)
+			subdev_entity->pfe_cfg0 |= ISC_PFE_CFG0_MIPI;
+
+		list_add_tail(&subdev_entity->list, &isc->subdev_entities);
+	}
+	of_node_put(epn);
+
+	return ret;
+}
+
+static int microchip_xisc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct isc_device *isc;
+	struct resource *res;
+	void __iomem *io_base;
+	struct isc_subdev_entity *subdev_entity;
+	int irq;
+	int ret;
+	u32 ver;
+
+	isc = devm_kzalloc(dev, sizeof(*isc), GFP_KERNEL);
+	if (!isc)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, isc);
+	isc->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	io_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(io_base))
+		return PTR_ERR(io_base);
+
+	isc->regmap = devm_regmap_init_mmio(dev, io_base, &isc_regmap_config);
+	if (IS_ERR(isc->regmap)) {
+		ret = PTR_ERR(isc->regmap);
+		dev_err(dev, "failed to init register map: %d\n", ret);
+		return ret;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	ret = devm_request_irq(dev, irq, isc_interrupt, 0,
+			       "microchip-sama7g5-xisc", isc);
+	if (ret < 0) {
+		dev_err(dev, "can't register ISR for IRQ %u (ret=%i)\n",
+			irq, ret);
+		return ret;
+	}
+
+	isc->gamma_table = isc_sama7g5_gamma_table;
+	isc->gamma_max = 0;
+
+	isc->max_width = ISC_SAMA7G5_MAX_SUPPORT_WIDTH;
+	isc->max_height = ISC_SAMA7G5_MAX_SUPPORT_HEIGHT;
+
+	isc->config_dpc = isc_sama7g5_config_dpc;
+	isc->config_csc = isc_sama7g5_config_csc;
+	isc->config_cbc = isc_sama7g5_config_cbc;
+	isc->config_cc = isc_sama7g5_config_cc;
+	isc->config_gam = isc_sama7g5_config_gam;
+	isc->config_rlp = isc_sama7g5_config_rlp;
+	isc->config_ctrls = isc_sama7g5_config_ctrls;
+
+	isc->adapt_pipeline = isc_sama7g5_adapt_pipeline;
+
+	isc->offsets.csc = ISC_SAMA7G5_CSC_OFFSET;
+	isc->offsets.cbc = ISC_SAMA7G5_CBC_OFFSET;
+	isc->offsets.sub422 = ISC_SAMA7G5_SUB422_OFFSET;
+	isc->offsets.sub420 = ISC_SAMA7G5_SUB420_OFFSET;
+	isc->offsets.rlp = ISC_SAMA7G5_RLP_OFFSET;
+	isc->offsets.his = ISC_SAMA7G5_HIS_OFFSET;
+	isc->offsets.dma = ISC_SAMA7G5_DMA_OFFSET;
+	isc->offsets.version = ISC_SAMA7G5_VERSION_OFFSET;
+	isc->offsets.his_entry = ISC_SAMA7G5_HIS_ENTRY_OFFSET;
+
+	isc->controller_formats = sama7g5_controller_formats;
+	isc->controller_formats_size = ARRAY_SIZE(sama7g5_controller_formats);
+	isc->formats_list = sama7g5_formats_list;
+	isc->formats_list_size = ARRAY_SIZE(sama7g5_formats_list);
+
+	/* sama7g5-isc RAM access port is full AXI4 - 32 bits per beat */
+	isc->dcfg = ISC_DCFG_YMBSIZE_BEATS32 | ISC_DCFG_CMBSIZE_BEATS32;
+
+	ret = isc_pipeline_init(isc);
+	if (ret)
+		return ret;
+
+	isc->hclock = devm_clk_get(dev, "hclock");
+	if (IS_ERR(isc->hclock)) {
+		ret = PTR_ERR(isc->hclock);
+		dev_err(dev, "failed to get hclock: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(isc->hclock);
+	if (ret) {
+		dev_err(dev, "failed to enable hclock: %d\n", ret);
+		return ret;
+	}
+
+	ret = isc_clk_init(isc);
+	if (ret) {
+		dev_err(dev, "failed to init isc clock: %d\n", ret);
+		goto unprepare_hclk;
+	}
+
+	isc->ispck = isc->isc_clks[ISC_ISPCK].clk;
+
+	ret = clk_prepare_enable(isc->ispck);
+	if (ret) {
+		dev_err(dev, "failed to enable ispck: %d\n", ret);
+		goto unprepare_hclk;
+	}
+
+	/* ispck should be greater or equal to hclock */
+	ret = clk_set_rate(isc->ispck, clk_get_rate(isc->hclock));
+	if (ret) {
+		dev_err(dev, "failed to set ispck rate: %d\n", ret);
+		goto unprepare_clk;
+	}
+
+	ret = v4l2_device_register(dev, &isc->v4l2_dev);
+	if (ret) {
+		dev_err(dev, "unable to register v4l2 device.\n");
+		goto unprepare_clk;
+	}
+
+	ret = xisc_parse_dt(dev, isc);
+	if (ret) {
+		dev_err(dev, "fail to parse device tree\n");
+		goto unregister_v4l2_device;
+	}
+
+	if (list_empty(&isc->subdev_entities)) {
+		dev_err(dev, "no subdev found\n");
+		ret = -ENODEV;
+		goto unregister_v4l2_device;
+	}
+
+	list_for_each_entry(subdev_entity, &isc->subdev_entities, list) {
+		struct v4l2_async_subdev *asd;
+
+		v4l2_async_notifier_init(&subdev_entity->notifier);
+
+		asd = v4l2_async_notifier_add_fwnode_remote_subdev(
+					&subdev_entity->notifier,
+					of_fwnode_handle(subdev_entity->epn),
+					struct v4l2_async_subdev);
+
+		of_node_put(subdev_entity->epn);
+		subdev_entity->epn = NULL;
+
+		if (IS_ERR(asd)) {
+			ret = PTR_ERR(asd);
+			goto cleanup_subdev;
+		}
+
+		subdev_entity->notifier.ops = &isc_async_ops;
+
+		ret = v4l2_async_notifier_register(&isc->v4l2_dev,
+						   &subdev_entity->notifier);
+		if (ret) {
+			dev_err(dev, "fail to register async notifier\n");
+			goto cleanup_subdev;
+		}
+
+		if (video_is_registered(&isc->video_dev))
+			break;
+	}
+
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+	pm_request_idle(dev);
+
+	regmap_read(isc->regmap, ISC_VERSION + isc->offsets.version, &ver);
+	dev_info(dev, "Microchip XISC version %x\n", ver);
+
+	return 0;
+
+cleanup_subdev:
+	isc_subdev_cleanup(isc);
+
+unregister_v4l2_device:
+	v4l2_device_unregister(&isc->v4l2_dev);
+
+unprepare_clk:
+	clk_disable_unprepare(isc->ispck);
+unprepare_hclk:
+	clk_disable_unprepare(isc->hclock);
+
+	isc_clk_cleanup(isc);
+
+	return ret;
+}
+
+static int microchip_xisc_remove(struct platform_device *pdev)
+{
+	struct isc_device *isc = platform_get_drvdata(pdev);
+
+	pm_runtime_disable(&pdev->dev);
+
+	isc_subdev_cleanup(isc);
+
+	v4l2_device_unregister(&isc->v4l2_dev);
+
+	clk_disable_unprepare(isc->ispck);
+	clk_disable_unprepare(isc->hclock);
+
+	isc_clk_cleanup(isc);
+
+	return 0;
+}
+
+static int __maybe_unused xisc_runtime_suspend(struct device *dev)
+{
+	struct isc_device *isc = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(isc->ispck);
+	clk_disable_unprepare(isc->hclock);
+
+	return 0;
+}
+
+static int __maybe_unused xisc_runtime_resume(struct device *dev)
+{
+	struct isc_device *isc = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(isc->hclock);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(isc->ispck);
+	if (ret)
+		clk_disable_unprepare(isc->hclock);
+
+	return ret;
+}
+
+static const struct dev_pm_ops microchip_xisc_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(xisc_runtime_suspend, xisc_runtime_resume, NULL)
+};
+
+static const struct of_device_id microchip_xisc_of_match[] = {
+	{ .compatible = "microchip,sama7g5-isc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, microchip_xisc_of_match);
+
+static struct platform_driver microchip_xisc_driver = {
+	.probe	= microchip_xisc_probe,
+	.remove	= microchip_xisc_remove,
+	.driver	= {
+		.name		= "microchip-sama7g5-xisc",
+		.pm		= &microchip_xisc_dev_pm_ops,
+		.of_match_table = of_match_ptr(microchip_xisc_of_match),
+	},
+};
+
+module_platform_driver(microchip_xisc_driver);
+
+MODULE_AUTHOR("Eugen Hristev <eugen.hristev@microchip.com>");
+MODULE_DESCRIPTION("The V4L2 driver for Microchip-XISC");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 671d07658531422eaba9ef0a399532b39361abf3 Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:30 +0200
Subject: [PATCH 2445/3804] media: MAINTAINERS: update ISC driver bindings file

ISC driver was converted to yaml. Update maintainers file.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5e1bbb39a68e0..ffe537b3f732b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11990,7 +11990,7 @@ MICROCHIP ISC DRIVER
 M:	Eugen Hristev <eugen.hristev@microchip.com>
 L:	linux-media@vger.kernel.org
 S:	Supported
-F:	Documentation/devicetree/bindings/media/atmel-isc.txt
+F:	Documentation/devicetree/bindings/media/atmel,isc.yaml
 F:	drivers/media/platform/atmel/atmel-isc-base.c
 F:	drivers/media/platform/atmel/atmel-isc-regs.h
 F:	drivers/media/platform/atmel/atmel-isc.h
-- 
GitLab


From 038cc978777378884a40d1517c88eace13ddc49d Mon Sep 17 00:00:00 2001
From: Eugen Hristev <eugen.hristev@microchip.com>
Date: Tue, 13 Apr 2021 12:57:31 +0200
Subject: [PATCH 2446/3804] media: MAINTAINERS: add xisc files to isc driver
 entry

Add XISC driver and binding files to the ISC driver entry.

Signed-off-by: Eugen Hristev <eugen.hristev@microchip.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index ffe537b3f732b..f086d2c305b51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11991,10 +11991,12 @@ M:	Eugen Hristev <eugen.hristev@microchip.com>
 L:	linux-media@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/media/atmel,isc.yaml
+F:	Documentation/devicetree/bindings/media/microchip,xisc.yaml
 F:	drivers/media/platform/atmel/atmel-isc-base.c
 F:	drivers/media/platform/atmel/atmel-isc-regs.h
 F:	drivers/media/platform/atmel/atmel-isc.h
 F:	drivers/media/platform/atmel/atmel-sama5d2-isc.c
+F:	drivers/media/platform/atmel/atmel-sama7g5-isc.c
 F:	include/linux/atmel-isc-media.h
 
 MICROCHIP ISI DRIVER
-- 
GitLab


From bc4f21fcc03ddd816dac1db00918680bf7bf9d86 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 5 May 2021 21:06:18 +0200
Subject: [PATCH 2447/3804] media: mxl692: make a const array static, makes
 object smaller

Don't populate the const array fw_hdr on the stack but instead it
static. Makes the object code smaller by 5 bytes:

Before:
   text    data    bss     dec    hex filename
  31948   12072     64   44084   ac34 drivers/media/dvb-frontends/mxl692.o

After:
   text    data    bss     dec    hex filename
  31879   12136     64   44079   ac2f drivers/media/dvb-frontends/mxl692.o

(gcc version 10.3.0)

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/dvb-frontends/mxl692.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/media/dvb-frontends/mxl692.c b/drivers/media/dvb-frontends/mxl692.c
index 83030643aba70..a246db683cdfc 100644
--- a/drivers/media/dvb-frontends/mxl692.c
+++ b/drivers/media/dvb-frontends/mxl692.c
@@ -224,7 +224,9 @@ static int mxl692_validate_fw_header(struct mxl692_dev *dev,
 	u32 ix, temp;
 	__be32 *local_buf = NULL;
 	u8 temp_cksum = 0;
-	const u8 fw_hdr[] = { 0x4D, 0x31, 0x10, 0x02, 0x40, 0x00, 0x00, 0x80 };
+	static const u8 fw_hdr[] = {
+		0x4D, 0x31, 0x10, 0x02, 0x40, 0x00, 0x00, 0x80
+	};
 
 	if (memcmp(buffer, fw_hdr, 8) != 0) {
 		status = -EINVAL;
-- 
GitLab


From 321c0d383dc3aa1b00a1a1e0957f1543fc84a028 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 21 May 2021 14:18:27 +0200
Subject: [PATCH 2448/3804] media: cxd2880-spi: Fix some error messages

Fix some erroneous function names in some error messages.
Remove some spurious or useless trailing and leading character in some
messages.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/spi/cxd2880-spi.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/media/spi/cxd2880-spi.c b/drivers/media/spi/cxd2880-spi.c
index 931ec0727cd38..e5094fff04c5a 100644
--- a/drivers/media/spi/cxd2880-spi.c
+++ b/drivers/media/spi/cxd2880-spi.c
@@ -147,7 +147,7 @@ static int cxd2880_spi_read_ts(struct spi_device *spi,
 
 	ret = spi_sync(spi, &message);
 	if (ret)
-		pr_err("spi_write_then_read failed\n");
+		pr_err("spi_sync failed\n");
 
 	return ret;
 }
@@ -401,7 +401,7 @@ static int cxd2880_start_feed(struct dvb_demux_feed *feed)
 							      dvb_spi,
 							      "cxd2880_ts_read");
 		if (IS_ERR(dvb_spi->cxd2880_ts_read_thread)) {
-			pr_err("kthread_run failed/\n");
+			pr_err("kthread_run failed\n");
 			kfree(dvb_spi->ts_buf);
 			dvb_spi->ts_buf = NULL;
 			memset(&dvb_spi->filter_config, 0,
@@ -448,7 +448,7 @@ static int cxd2880_stop_feed(struct dvb_demux_feed *feed)
 		 * in dvb_spi->all_pid_feed_count.
 		 */
 		if (dvb_spi->all_pid_feed_count <= 0) {
-			pr_err("PID %d not found.\n", feed->pid);
+			pr_err("PID %d not found\n", feed->pid);
 			return -EINVAL;
 		}
 		dvb_spi->all_pid_feed_count--;
@@ -485,7 +485,7 @@ static int cxd2880_stop_feed(struct dvb_demux_feed *feed)
 
 		ret_stop = kthread_stop(dvb_spi->cxd2880_ts_read_thread);
 		if (ret_stop) {
-			pr_err("'kthread_stop failed. (%d)\n", ret_stop);
+			pr_err("kthread_stop failed. (%d)\n", ret_stop);
 			ret = ret_stop;
 		}
 		kfree(dvb_spi->ts_buf);
@@ -512,7 +512,7 @@ cxd2880_spi_probe(struct spi_device *spi)
 	struct cxd2880_config config;
 
 	if (!spi) {
-		pr_err("invalid arg.\n");
+		pr_err("invalid arg\n");
 		return -EINVAL;
 	}
 
@@ -596,7 +596,7 @@ cxd2880_spi_probe(struct spi_device *spi)
 	ret = dvb_spi->demux.dmx.connect_frontend(&dvb_spi->demux.dmx,
 						  &dvb_spi->dmx_fe);
 	if (ret < 0) {
-		pr_err("dvb_register_frontend() failed\n");
+		pr_err("connect_frontend() failed\n");
 		goto fail_fe_conn;
 	}
 
-- 
GitLab


From 9ad1efee086e0e913914fa2b2173efb830bad68c Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Tue, 25 May 2021 15:06:52 +0200
Subject: [PATCH 2449/3804] media: dvd_usb: memory leak in cinergyt2_fe_attach

When the driver fails to talk with the hardware with dvb_usb_generic_rw,
it will return an error to dvb_usb_adapter_frontend_init. However, the
driver forgets to free the resource (e.g., struct cinergyt2_fe_state),
which leads to a memory leak.

Fix this by freeing struct cinergyt2_fe_state when dvb_usb_generic_rw
fails in cinergyt2_frontend_attach.

backtrace:
  [<0000000056e17b1a>] kmalloc include/linux/slab.h:552 [inline]
  [<0000000056e17b1a>] kzalloc include/linux/slab.h:682 [inline]
  [<0000000056e17b1a>] cinergyt2_fe_attach+0x21/0x80 drivers/media/usb/dvb-usb/cinergyT2-fe.c:271
  [<00000000ae0b1711>] cinergyt2_frontend_attach+0x21/0x70 drivers/media/usb/dvb-usb/cinergyT2-core.c:74
  [<00000000d0254861>] dvb_usb_adapter_frontend_init+0x11b/0x1b0 drivers/media/usb/dvb-usb/dvb-usb-dvb.c:290
  [<0000000002e08ac6>] dvb_usb_adapter_init drivers/media/usb/dvb-usb/dvb-usb-init.c:84 [inline]
  [<0000000002e08ac6>] dvb_usb_init drivers/media/usb/dvb-usb/dvb-usb-init.c:173 [inline]
  [<0000000002e08ac6>] dvb_usb_device_init.cold+0x4d0/0x6ae drivers/media/usb/dvb-usb/dvb-usb-init.c:287

Reported-by: syzbot+e1de8986786b3722050e@syzkaller.appspotmail.com
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/dvb-usb/cinergyT2-core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/usb/dvb-usb/cinergyT2-core.c b/drivers/media/usb/dvb-usb/cinergyT2-core.c
index 969a7ec71dff7..4116ba5c45fcb 100644
--- a/drivers/media/usb/dvb-usb/cinergyT2-core.c
+++ b/drivers/media/usb/dvb-usb/cinergyT2-core.c
@@ -78,6 +78,8 @@ static int cinergyt2_frontend_attach(struct dvb_usb_adapter *adap)
 
 	ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 3, 0);
 	if (ret < 0) {
+		if (adap->fe_adap[0].fe)
+			adap->fe_adap[0].fe->ops.release(adap->fe_adap[0].fe);
 		deb_rc("cinergyt2_power_ctrl() Failed to retrieve sleep state info\n");
 	}
 	mutex_unlock(&d->data_mutex);
-- 
GitLab


From da9a805b1249be685c2bee110eb1260d610bd5d0 Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Tue, 1 Jun 2021 13:07:46 +0200
Subject: [PATCH 2450/3804] media: cinergyt2: make properties const

The dvb_usb_device_properties can be const. This makes it clear that
the static can be shared across threads.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/dvb-usb/cinergyT2-core.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/media/usb/dvb-usb/cinergyT2-core.c b/drivers/media/usb/dvb-usb/cinergyT2-core.c
index 4116ba5c45fcb..23f1093d28f88 100644
--- a/drivers/media/usb/dvb-usb/cinergyT2-core.c
+++ b/drivers/media/usb/dvb-usb/cinergyT2-core.c
@@ -29,10 +29,8 @@ struct cinergyt2_state {
 	unsigned char data[64];
 };
 
-/* We are missing a release hook with usb_device data */
-static struct dvb_usb_device *cinergyt2_usb_device;
-
-static struct dvb_usb_device_properties cinergyt2_properties;
+/* Forward declaration */
+static const struct dvb_usb_device_properties cinergyt2_properties;
 
 static int cinergyt2_streaming_ctrl(struct dvb_usb_adapter *adap, int enable)
 {
@@ -84,9 +82,6 @@ static int cinergyt2_frontend_attach(struct dvb_usb_adapter *adap)
 	}
 	mutex_unlock(&d->data_mutex);
 
-	/* Copy this pointer as we are gonna need it in the release phase */
-	cinergyt2_usb_device = adap->dev;
-
 	return ret;
 }
 
@@ -205,7 +200,7 @@ static struct usb_device_id cinergyt2_usb_table[] = {
 
 MODULE_DEVICE_TABLE(usb, cinergyt2_usb_table);
 
-static struct dvb_usb_device_properties cinergyt2_properties = {
+static const struct dvb_usb_device_properties cinergyt2_properties = {
 	.size_of_priv = sizeof(struct cinergyt2_state),
 	.num_adapters = 1,
 	.adapter = {
-- 
GitLab


From 04297b00dfb45277b8b661d48a7a5e29876fc6ae Mon Sep 17 00:00:00 2001
From: Evgeny Novikov <novikov@ispras.ru>
Date: Tue, 1 Jun 2021 14:56:43 +0200
Subject: [PATCH 2451/3804] media: st_rc: Handle errors of clk_prepare_enable()

Hadle errors of clk_prepare_enable() in st_rc_hardware_init() and its
callers.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Evgeny Novikov <novikov@ispras.ru>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/rc/st_rc.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/media/rc/st_rc.c b/drivers/media/rc/st_rc.c
index 3237fef5d502c..d79d1e3996b25 100644
--- a/drivers/media/rc/st_rc.c
+++ b/drivers/media/rc/st_rc.c
@@ -157,8 +157,9 @@ static irqreturn_t st_rc_rx_interrupt(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static void st_rc_hardware_init(struct st_rc_device *dev)
+static int st_rc_hardware_init(struct st_rc_device *dev)
 {
+	int ret;
 	int baseclock, freqdiff;
 	unsigned int rx_max_symbol_per = MAX_SYMB_TIME;
 	unsigned int rx_sampling_freq_div;
@@ -166,7 +167,12 @@ static void st_rc_hardware_init(struct st_rc_device *dev)
 	/* Enable the IP */
 	reset_control_deassert(dev->rstc);
 
-	clk_prepare_enable(dev->sys_clock);
+	ret = clk_prepare_enable(dev->sys_clock);
+	if (ret) {
+		dev_err(dev->dev, "Failed to prepare/enable system clock\n");
+		return ret;
+	}
+
 	baseclock = clk_get_rate(dev->sys_clock);
 
 	/* IRB input pins are inverted internally from high to low. */
@@ -184,6 +190,8 @@ static void st_rc_hardware_init(struct st_rc_device *dev)
 	}
 
 	writel(rx_max_symbol_per, dev->rx_base + IRB_MAX_SYM_PERIOD);
+
+	return 0;
 }
 
 static int st_rc_remove(struct platform_device *pdev)
@@ -287,7 +295,9 @@ static int st_rc_probe(struct platform_device *pdev)
 
 	rc_dev->dev = dev;
 	platform_set_drvdata(pdev, rc_dev);
-	st_rc_hardware_init(rc_dev);
+	ret = st_rc_hardware_init(rc_dev);
+	if (ret)
+		goto err;
 
 	rdev->allowed_protocols = RC_PROTO_BIT_ALL_IR_DECODER;
 	/* rx sampling rate is 10Mhz */
@@ -359,6 +369,7 @@ static int st_rc_suspend(struct device *dev)
 
 static int st_rc_resume(struct device *dev)
 {
+	int ret;
 	struct st_rc_device *rc_dev = dev_get_drvdata(dev);
 	struct rc_dev	*rdev = rc_dev->rdev;
 
@@ -367,7 +378,10 @@ static int st_rc_resume(struct device *dev)
 		rc_dev->irq_wake = 0;
 	} else {
 		pinctrl_pm_select_default_state(dev);
-		st_rc_hardware_init(rc_dev);
+		ret = st_rc_hardware_init(rc_dev);
+		if (ret)
+			return ret;
+
 		if (rdev->users) {
 			writel(IRB_RX_INTS, rc_dev->rx_base + IRB_RX_INT_EN);
 			writel(0x01, rc_dev->rx_base + IRB_RX_EN);
-- 
GitLab


From 53a370f621a04a06bd2402c13580d7e4eb172c98 Mon Sep 17 00:00:00 2001
From: Alexander Voronov <avv.0@ya.ru>
Date: Tue, 1 Jun 2021 22:28:12 +0200
Subject: [PATCH 2452/3804] media: rc: add keymap for Toshiba CT-90405 remote

This is an NEC remote control device shipped with some Toshiba TVs.

Signed-off-by: Alexander Voronov <avv.0@ya.ru>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../devicetree/bindings/media/rc.yaml         |  1 +
 drivers/media/rc/keymaps/Makefile             |  1 +
 drivers/media/rc/keymaps/rc-ct-90405.c        | 86 +++++++++++++++++++
 include/media/rc-map.h                        |  1 +
 4 files changed, 89 insertions(+)
 create mode 100644 drivers/media/rc/keymaps/rc-ct-90405.c

diff --git a/Documentation/devicetree/bindings/media/rc.yaml b/Documentation/devicetree/bindings/media/rc.yaml
index 12d838b05632e..d4c541c4b164c 100644
--- a/Documentation/devicetree/bindings/media/rc.yaml
+++ b/Documentation/devicetree/bindings/media/rc.yaml
@@ -45,6 +45,7 @@ properties:
       - rc-cec
       - rc-cinergy
       - rc-cinergy-1400
+      - rc-ct-90405
       - rc-d680-dmb
       - rc-delock-61959
       - rc-dib0700-nec
diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index f609dfe7fd76f..5fe5c9e1a46d7 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-budget-ci-old.o \
 			rc-cinergy-1400.o \
 			rc-cinergy.o \
+			rc-ct-90405.o \
 			rc-d680-dmb.o \
 			rc-delock-61959.o \
 			rc-dib0700-nec.o \
diff --git a/drivers/media/rc/keymaps/rc-ct-90405.c b/drivers/media/rc/keymaps/rc-ct-90405.c
new file mode 100644
index 0000000000000..8914c83c9d9f1
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-ct-90405.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Toshiba CT-90405 remote controller keytable
+ *
+ * Copyright (C) 2021 Alexander Voronov <avv.0@ya.ru>
+ */
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+static struct rc_map_table ct_90405[] = {
+	{ 0x4014, KEY_SWITCHVIDEOMODE },
+	{ 0x4012, KEY_POWER },
+	{ 0x4044, KEY_TV },
+	{ 0x40be43, KEY_3D_MODE },
+	{ 0x400c, KEY_SUBTITLE },
+	{ 0x4001, KEY_NUMERIC_1 },
+	{ 0x4002, KEY_NUMERIC_2 },
+	{ 0x4003, KEY_NUMERIC_3 },
+	{ 0x4004, KEY_NUMERIC_4 },
+	{ 0x4005, KEY_NUMERIC_5 },
+	{ 0x4006, KEY_NUMERIC_6 },
+	{ 0x4007, KEY_NUMERIC_7 },
+	{ 0x4008, KEY_NUMERIC_8 },
+	{ 0x4009, KEY_NUMERIC_9 },
+	{ 0x4062, KEY_AUDIO_DESC },
+	{ 0x4000, KEY_NUMERIC_0 },
+	{ 0x401a, KEY_VOLUMEUP },
+	{ 0x401e, KEY_VOLUMEDOWN },
+	{ 0x4016, KEY_INFO },
+	{ 0x4010, KEY_MUTE },
+	{ 0x401b, KEY_CHANNELUP },
+	{ 0x401f, KEY_CHANNELDOWN },
+	{ 0x40da, KEY_VENDOR },
+	{ 0x4066, KEY_PLAYER },
+	{ 0x4017, KEY_TEXT },
+	{ 0x4047, KEY_LIST },
+	{ 0x4073, KEY_PAGEUP },
+	{ 0x4045, KEY_PROGRAM },
+	{ 0x4043, KEY_EXIT },
+	{ 0x4074, KEY_PAGEDOWN },
+	{ 0x4064, KEY_BACK },
+	{ 0x405b, KEY_MENU },
+	{ 0x4019, KEY_UP },
+	{ 0x4040, KEY_RIGHT },
+	{ 0x401d, KEY_DOWN },
+	{ 0x4042, KEY_LEFT },
+	{ 0x4021, KEY_OK },
+	{ 0x4053, KEY_REWIND },
+	{ 0x4067, KEY_PLAY },
+	{ 0x400d, KEY_FASTFORWARD },
+	{ 0x4054, KEY_PREVIOUS },
+	{ 0x4068, KEY_STOP },
+	{ 0x406a, KEY_PAUSE },
+	{ 0x4015, KEY_NEXT },
+	{ 0x4048, KEY_RED },
+	{ 0x4049, KEY_GREEN },
+	{ 0x404a, KEY_YELLOW },
+	{ 0x404b, KEY_BLUE },
+	{ 0x406f, KEY_RECORD }
+};
+
+static struct rc_map_list ct_90405_map = {
+	.map = {
+		.scan     = ct_90405,
+		.size     = ARRAY_SIZE(ct_90405),
+		.rc_proto = RC_PROTO_NEC,
+		.name     = RC_MAP_CT_90405,
+	}
+};
+
+static int __init init_rc_map_ct_90405(void)
+{
+	return rc_map_register(&ct_90405_map);
+}
+
+static void __exit exit_rc_map_ct_90405(void)
+{
+	rc_map_unregister(&ct_90405_map);
+}
+
+module_init(init_rc_map_ct_90405)
+module_exit(exit_rc_map_ct_90405)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alexander Voronov <avv.0@ya.ru>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index b50443d6fd776..793b54342dffe 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -231,6 +231,7 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_CEC                       "rc-cec"
 #define RC_MAP_CINERGY                   "rc-cinergy"
 #define RC_MAP_CINERGY_1400              "rc-cinergy-1400"
+#define RC_MAP_CT_90405                  "rc-ct-90405"
 #define RC_MAP_D680_DMB                  "rc-d680-dmb"
 #define RC_MAP_DELOCK_61959              "rc-delock-61959"
 #define RC_MAP_DIB0700_NEC_TABLE         "rc-dib0700-nec"
-- 
GitLab


From f1d9f315924f02ed8aabada04a04b20a0c6cc9be Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Thu, 3 Jun 2021 09:02:30 +0200
Subject: [PATCH 2453/3804] media: imon: use DEVICE_ATTR_RW() helper macro

Use DEVICE_ATTR_RW() helper macro instead of DEVICE_ATTR(), which is
simpler and more readable.

Due to the names of the read and write functions of the sysfs attribute is
normalized, there is a natural association.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/rc/imon.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c
index a7962ca2ac8e3..2ca4e86c7b9f1 100644
--- a/drivers/media/rc/imon.c
+++ b/drivers/media/rc/imon.c
@@ -780,7 +780,7 @@ static int send_set_imon_clock(struct imon_context *ictx,
 /*
  * These are the sysfs functions to handle the association on the iMON 2.4G LT.
  */
-static ssize_t show_associate_remote(struct device *d,
+static ssize_t associate_remote_show(struct device *d,
 				     struct device_attribute *attr,
 				     char *buf)
 {
@@ -800,7 +800,7 @@ static ssize_t show_associate_remote(struct device *d,
 	return strlen(buf);
 }
 
-static ssize_t store_associate_remote(struct device *d,
+static ssize_t associate_remote_store(struct device *d,
 				      struct device_attribute *attr,
 				      const char *buf, size_t count)
 {
@@ -822,7 +822,7 @@ static ssize_t store_associate_remote(struct device *d,
 /*
  * sysfs functions to control internal imon clock
  */
-static ssize_t show_imon_clock(struct device *d,
+static ssize_t imon_clock_show(struct device *d,
 			       struct device_attribute *attr, char *buf)
 {
 	struct imon_context *ictx = dev_get_drvdata(d);
@@ -848,7 +848,7 @@ static ssize_t show_imon_clock(struct device *d,
 	return len;
 }
 
-static ssize_t store_imon_clock(struct device *d,
+static ssize_t imon_clock_store(struct device *d,
 				struct device_attribute *attr,
 				const char *buf, size_t count)
 {
@@ -895,11 +895,8 @@ exit:
 }
 
 
-static DEVICE_ATTR(imon_clock, S_IWUSR | S_IRUGO, show_imon_clock,
-		   store_imon_clock);
-
-static DEVICE_ATTR(associate_remote, S_IWUSR | S_IRUGO, show_associate_remote,
-		   store_associate_remote);
+static DEVICE_ATTR_RW(imon_clock);
+static DEVICE_ATTR_RW(associate_remote);
 
 static struct attribute *imon_display_sysfs_entries[] = {
 	&dev_attr_imon_clock.attr,
-- 
GitLab


From 4dd0f63b51c24afd2f34afbae2e728cf00c390e6 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Thu, 3 Jun 2021 13:49:56 +0200
Subject: [PATCH 2454/3804] media: hevc: Add fields and flags for hevc PPS

Add fields and flags as they are defined in
7.4.3.3.1 "General picture parameter set RBSP semantics of the
H.265 ITU specification.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/ext-ctrls-codec.rst    | 14 ++++++++++++++
 include/media/hevc-ctrls.h                         |  4 ++++
 2 files changed, 18 insertions(+)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index 2b5edab55bb4e..15468dcfaf08f 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -2786,6 +2786,12 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
     * - __u8
       - ``num_extra_slice_header_bits``
       -
+    * - __u8
+      - ``num_ref_idx_l0_default_active_minus1``
+      - Specifies the inferred value of num_ref_idx_l0_active_minus1
+    * - __u8
+      - ``num_ref_idx_l1_default_active_minus1``
+      - Specifies the inferred value of num_ref_idx_l1_active_minus1
     * - __s8
       - ``init_qp_minus26``
       -
@@ -2896,6 +2902,14 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
     * - ``V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT``
       - 0x00040000
       -
+    * - ``V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT``
+      - 0x00080000
+      - Specifies the presence of deblocking filter control syntax elements in
+        the PPS
+    * - ``V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING``
+      - 0x00100000
+      - Specifies that tile column boundaries and likewise tile row boundaries
+        are distributed uniformly across the picture
 
 .. raw:: latex
 
diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h
index 36e4c93707ae5..3b525fd6e6188 100644
--- a/include/media/hevc-ctrls.h
+++ b/include/media/hevc-ctrls.h
@@ -99,10 +99,14 @@ struct v4l2_ctrl_hevc_sps {
 #define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
 #define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
 #define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
+#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT	(1ULL << 19)
+#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING			(1ULL << 20)
 
 struct v4l2_ctrl_hevc_pps {
 	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
 	__u8	num_extra_slice_header_bits;
+	__u8	num_ref_idx_l0_default_active_minus1;
+	__u8	num_ref_idx_l1_default_active_minus1;
 	__s8	init_qp_minus26;
 	__u8	diff_cu_qp_delta_depth;
 	__s8	pps_cb_qp_offset;
-- 
GitLab


From d395a78db9eabd12633b39e05c80e803543b6590 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Thu, 3 Jun 2021 13:49:57 +0200
Subject: [PATCH 2455/3804] media: hevc: Add decode params control

Add decode params control and the associated structure to group
all the information that are needed to decode a reference frame as
is described in ITU-T Rec. H.265 section "8.3.2 Decoding process
for reference picture set".

Adapt Cedrus driver to these changes.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/v4l/ext-ctrls-codec.rst             | 94 +++++++++++++++----
 .../media/v4l/vidioc-queryctrl.rst            |  6 ++
 drivers/media/v4l2-core/v4l2-ctrls-core.c     | 21 +++--
 drivers/media/v4l2-core/v4l2-ctrls-defs.c     |  4 +
 drivers/staging/media/sunxi/cedrus/cedrus.c   |  6 ++
 drivers/staging/media/sunxi/cedrus/cedrus.h   |  1 +
 .../staging/media/sunxi/cedrus/cedrus_dec.c   |  2 +
 .../staging/media/sunxi/cedrus/cedrus_h265.c  | 12 ++-
 include/media/hevc-ctrls.h                    | 29 ++++--
 9 files changed, 136 insertions(+), 39 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index 15468dcfaf08f..8c6e2a11ed95d 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -3000,9 +3000,6 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
     * - __u8
       - ``pic_struct``
       -
-    * - __u8
-      - ``num_active_dpb_entries``
-      - The number of entries in ``dpb``.
     * - __u8
       - ``ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
       - The list of L0 reference elements as indices in the DPB.
@@ -3010,22 +3007,8 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
       - ``ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
       - The list of L1 reference elements as indices in the DPB.
     * - __u8
-      - ``num_rps_poc_st_curr_before``
-      - The number of reference pictures in the short-term set that come before
-        the current frame.
-    * - __u8
-      - ``num_rps_poc_st_curr_after``
-      - The number of reference pictures in the short-term set that come after
-        the current frame.
-    * - __u8
-      - ``num_rps_poc_lt_curr``
-      - The number of reference pictures in the long-term set.
-    * - __u8
-      - ``padding[7]``
+      - ``padding``
       - Applications and drivers must set this to zero.
-    * - struct :c:type:`v4l2_hevc_dpb_entry`
-      - ``dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
-      - The decoded picture buffer, for meta-data about reference frames.
     * - struct :c:type:`v4l2_hevc_pred_weight_table`
       - ``pred_weight_table``
       - The prediction weight coefficients for inter-picture prediction.
@@ -3281,3 +3264,78 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
     encoding the next frame queued after setting this control.
     This provides a bitmask which consists of bits [0, LTR_COUNT-1].
     This is applicable to the H264 and HEVC encoders.
+
+``V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS (struct)``
+    Specifies various decode parameters, especially the references picture order
+    count (POC) for all the lists (short, long, before, current, after) and the
+    number of entries for each of them.
+    These parameters are defined according to :ref:`hevc`.
+    They are described in section 8.3 "Slice decoding process" of the
+    specification.
+
+.. c:type:: v4l2_ctrl_hevc_decode_params
+
+.. cssclass:: longtable
+
+.. flat-table:: struct v4l2_ctrl_hevc_decode_params
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __s32
+      - ``pic_order_cnt_val``
+      - PicOrderCntVal as described in section 8.3.1 "Decoding process
+        for picture order count" of the specification.
+    * - __u8
+      - ``num_active_dpb_entries``
+      - The number of entries in ``dpb``.
+    * - struct :c:type:`v4l2_hevc_dpb_entry`
+      - ``dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      - The decoded picture buffer, for meta-data about reference frames.
+    * - __u8
+      - ``num_poc_st_curr_before``
+      - The number of reference pictures in the short-term set that come before
+        the current frame.
+    * - __u8
+      - ``num_poc_st_curr_after``
+      - The number of reference pictures in the short-term set that come after
+        the current frame.
+    * - __u8
+      - ``num_poc_lt_curr``
+      - The number of reference pictures in the long-term set.
+    * - __u8
+      - ``poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      - PocStCurrBefore as described in section 8.3.2 "Decoding process for reference
+        picture set.
+    * - __u8
+      - ``poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      - PocStCurrAfter as described in section 8.3.2 "Decoding process for reference
+        picture set.
+    * - __u8
+      - ``poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX]``
+      - PocLtCurr as described in section 8.3.2 "Decoding process for reference
+        picture set.
+    * - __u64
+      - ``flags``
+      - See :ref:`Decode Parameters Flags <hevc_decode_params_flags>`
+
+.. _hevc_decode_params_flags:
+
+``Decode Parameters Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC``
+      - 0x00000001
+      -
+    * - ``V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC``
+      - 0x00000002
+      -
+    * - ``V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR``
+      - 0x00000004
+      -
diff --git a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
index 07e54029e1e91..f9ecf62761296 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-queryctrl.rst
@@ -501,6 +501,12 @@ See also the examples in :ref:`control`.
       - n/a
       - A struct :c:type:`v4l2_ctrl_vp8_frame`, containing VP8
 	frame parameters for stateless video decoders.
+    * - ``V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS``
+      - n/a
+      - n/a
+      - n/a
+      - A struct :c:type:`v4l2_ctrl_hevc_decode_params`, containing HEVC
+	decoding parameters for stateless video decoders.
 
 .. raw:: latex
 
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c b/drivers/media/v4l2-core/v4l2-ctrls-core.c
index 0814392243572..c4b5082849b66 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls-core.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c
@@ -337,6 +337,7 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 	struct v4l2_ctrl_hevc_pps *p_hevc_pps;
 	struct v4l2_ctrl_hevc_slice_params *p_hevc_slice_params;
 	struct v4l2_ctrl_hdr10_mastering_display *p_hdr10_mastering;
+	struct v4l2_ctrl_hevc_decode_params *p_hevc_decode_params;
 	struct v4l2_area *area;
 	void *p = ptr.p + idx * ctrl->elem_size;
 	unsigned int i;
@@ -616,23 +617,26 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 		zero_padding(*p_hevc_pps);
 		break;
 
-	case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
-		p_hevc_slice_params = p;
+	case V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS:
+		p_hevc_decode_params = p;
 
-		if (p_hevc_slice_params->num_active_dpb_entries >
+		if (p_hevc_decode_params->num_active_dpb_entries >
 		    V4L2_HEVC_DPB_ENTRIES_NUM_MAX)
 			return -EINVAL;
 
-		zero_padding(p_hevc_slice_params->pred_weight_table);
-
-		for (i = 0; i < p_hevc_slice_params->num_active_dpb_entries;
+		for (i = 0; i < p_hevc_decode_params->num_active_dpb_entries;
 		     i++) {
 			struct v4l2_hevc_dpb_entry *dpb_entry =
-				&p_hevc_slice_params->dpb[i];
+				&p_hevc_decode_params->dpb[i];
 
 			zero_padding(*dpb_entry);
 		}
+		break;
 
+	case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
+		p_hevc_slice_params = p;
+
+		zero_padding(p_hevc_slice_params->pred_weight_table);
 		zero_padding(*p_hevc_slice_params);
 		break;
 
@@ -1236,6 +1240,9 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
 	case V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS:
 		elem_size = sizeof(struct v4l2_ctrl_hevc_slice_params);
 		break;
+	case V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS:
+		elem_size = sizeof(struct v4l2_ctrl_hevc_decode_params);
+		break;
 	case V4L2_CTRL_TYPE_HDR10_CLL_INFO:
 		elem_size = sizeof(struct v4l2_ctrl_hdr10_cll_info);
 		break;
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
index 7963c7b434504..b6344bbf1e006 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
@@ -996,6 +996,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_HEVC_SPS:			return "HEVC Sequence Parameter Set";
 	case V4L2_CID_MPEG_VIDEO_HEVC_PPS:			return "HEVC Picture Parameter Set";
 	case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:		return "HEVC Slice Parameters";
+	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS:		return "HEVC Decode Parameters";
 	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE:		return "HEVC Decode Mode";
 	case V4L2_CID_MPEG_VIDEO_HEVC_START_CODE:		return "HEVC Start Code";
 
@@ -1487,6 +1488,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS:
 		*type = V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS;
 		break;
+	case V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS:
+		*type = V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS;
+		break;
 	case V4L2_CID_UNIT_CELL_SIZE:
 		*type = V4L2_CTRL_TYPE_AREA;
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index fa348c09f8444..c0d005dafc6c0 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -157,6 +157,12 @@ static const struct cedrus_control cedrus_controls[] = {
 		},
 		.codec		= CEDRUS_CODEC_VP8,
 	},
+	{
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS,
+		},
+		.codec		= CEDRUS_CODEC_H265,
+	},
 };
 
 #define CEDRUS_CONTROLS_COUNT	ARRAY_SIZE(cedrus_controls)
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
index bbcdcd0787cf7..88afba17b78bd 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
@@ -77,6 +77,7 @@ struct cedrus_h265_run {
 	const struct v4l2_ctrl_hevc_sps			*sps;
 	const struct v4l2_ctrl_hevc_pps			*pps;
 	const struct v4l2_ctrl_hevc_slice_params	*slice_params;
+	const struct v4l2_ctrl_hevc_decode_params	*decode_params;
 };
 
 struct cedrus_vp8_run {
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index 97e410d925068..40e8c4123f76a 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -70,6 +70,8 @@ void cedrus_device_run(void *priv)
 			V4L2_CID_MPEG_VIDEO_HEVC_PPS);
 		run.h265.slice_params = cedrus_find_control_data(ctx,
 			V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS);
+		run.h265.decode_params = cedrus_find_control_data(ctx,
+			V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS);
 		break;
 
 	case V4L2_PIX_FMT_VP8_FRAME:
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c
index 10744fab7ceaa..6821e3d05d346 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c
@@ -245,6 +245,7 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx,
 	const struct v4l2_ctrl_hevc_sps *sps;
 	const struct v4l2_ctrl_hevc_pps *pps;
 	const struct v4l2_ctrl_hevc_slice_params *slice_params;
+	const struct v4l2_ctrl_hevc_decode_params *decode_params;
 	const struct v4l2_hevc_pred_weight_table *pred_weight_table;
 	dma_addr_t src_buf_addr;
 	dma_addr_t src_buf_end_addr;
@@ -256,6 +257,7 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx,
 	sps = run->h265.sps;
 	pps = run->h265.pps;
 	slice_params = run->h265.slice_params;
+	decode_params = run->h265.decode_params;
 	pred_weight_table = &slice_params->pred_weight_table;
 
 	/* MV column buffer size and allocation. */
@@ -487,7 +489,7 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx,
 
 	reg = VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(slice_params->slice_tc_offset_div2) |
 	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(slice_params->slice_beta_offset_div2) |
-	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_POC_BIGEST_IN_RPS_ST(slice_params->num_rps_poc_st_curr_after == 0) |
+	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_POC_BIGEST_IN_RPS_ST(decode_params->num_poc_st_curr_after == 0) |
 	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(slice_params->slice_cr_qp_offset) |
 	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(slice_params->slice_cb_qp_offset) |
 	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_QP_DELTA(slice_params->slice_qp_delta);
@@ -527,8 +529,8 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx,
 	cedrus_write(dev, VE_DEC_H265_NEIGHBOR_INFO_ADDR, reg);
 
 	/* Write decoded picture buffer in pic list. */
-	cedrus_h265_frame_info_write_dpb(ctx, slice_params->dpb,
-					 slice_params->num_active_dpb_entries);
+	cedrus_h265_frame_info_write_dpb(ctx, decode_params->dpb,
+					 decode_params->num_active_dpb_entries);
 
 	/* Output frame. */
 
@@ -545,7 +547,7 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx,
 
 	/* Reference picture list 0 (for P/B frames). */
 	if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_I) {
-		cedrus_h265_ref_pic_list_write(dev, slice_params->dpb,
+		cedrus_h265_ref_pic_list_write(dev, decode_params->dpb,
 					       slice_params->ref_idx_l0,
 					       slice_params->num_ref_idx_l0_active_minus1 + 1,
 					       VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST0);
@@ -564,7 +566,7 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx,
 
 	/* Reference picture list 1 (for B frames). */
 	if (slice_params->slice_type == V4L2_HEVC_SLICE_TYPE_B) {
-		cedrus_h265_ref_pic_list_write(dev, slice_params->dpb,
+		cedrus_h265_ref_pic_list_write(dev, decode_params->dpb,
 					       slice_params->ref_idx_l1,
 					       slice_params->num_ref_idx_l1_active_minus1 + 1,
 					       VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST1);
diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h
index 3b525fd6e6188..1b702c3230fbd 100644
--- a/include/media/hevc-ctrls.h
+++ b/include/media/hevc-ctrls.h
@@ -19,6 +19,7 @@
 #define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_CODEC_BASE + 1008)
 #define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_CODEC_BASE + 1009)
 #define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_BASE + 1010)
+#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS	(V4L2_CID_CODEC_BASE + 1012)
 #define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_CODEC_BASE + 1015)
 #define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_CODEC_BASE + 1016)
 
@@ -26,6 +27,7 @@
 #define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
 #define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
 #define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
+#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124
 
 enum v4l2_mpeg_video_hevc_decode_mode {
 	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
@@ -194,18 +196,10 @@ struct v4l2_ctrl_hevc_slice_params {
 	__u8	pic_struct;
 
 	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
-	__u8	num_active_dpb_entries;
 	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
 	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
 
-	__u8	num_rps_poc_st_curr_before;
-	__u8	num_rps_poc_st_curr_after;
-	__u8	num_rps_poc_lt_curr;
-
-	__u8	padding;
-
-	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
-	struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+	__u8	padding[5];
 
 	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
 	struct v4l2_hevc_pred_weight_table pred_weight_table;
@@ -213,4 +207,21 @@ struct v4l2_ctrl_hevc_slice_params {
 	__u64	flags;
 };
 
+#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC		0x1
+#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC		0x2
+#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR  0x4
+
+struct v4l2_ctrl_hevc_decode_params {
+	__s32	pic_order_cnt_val;
+	__u8	num_active_dpb_entries;
+	struct	v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+	__u8	num_poc_st_curr_before;
+	__u8	num_poc_st_curr_after;
+	__u8	num_poc_lt_curr;
+	__u8	poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+	__u8	poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+	__u8	poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
+	__u64	flags;
+};
+
 #endif
-- 
GitLab


From 42cb2a8f27d284b6c73dfc23bed4d6991f3bc1a3 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Thu, 3 Jun 2021 13:49:58 +0200
Subject: [PATCH 2456/3804] media: hantro: change hantro_codec_ops run
 prototype to return errors

Change hantro_codec_ops run prototype from 'void' to 'int'.
This allows the driver to cancel the job if an error occurs while configuring
the hardware.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_drv.c     |  4 +++-
 .../staging/media/hantro/hantro_g1_h264_dec.c | 10 +++++++---
 .../media/hantro/hantro_g1_mpeg2_dec.c        |  4 +++-
 .../staging/media/hantro/hantro_g1_vp8_dec.c  |  6 ++++--
 .../staging/media/hantro/hantro_h1_jpeg_enc.c |  4 +++-
 drivers/staging/media/hantro/hantro_hw.h      | 19 ++++++++++---------
 .../media/hantro/rk3399_vpu_hw_jpeg_enc.c     |  4 +++-
 .../media/hantro/rk3399_vpu_hw_mpeg2_dec.c    |  4 +++-
 .../media/hantro/rk3399_vpu_hw_vp8_dec.c      |  6 ++++--
 9 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 4914987cfd9dd..e255756dfd9e3 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -170,7 +170,9 @@ static void device_run(void *priv)
 
 	v4l2_m2m_buf_copy_metadata(src, dst, true);
 
-	ctx->codec_ops->run(ctx);
+	if (ctx->codec_ops->run(ctx))
+		goto err_cancel_job;
+
 	return;
 
 err_cancel_job:
diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
index 845bef73d2184..5c792b7bcb79e 100644
--- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
@@ -273,13 +273,15 @@ static void set_buffers(struct hantro_ctx *ctx)
 	vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, G1_REG_ADDR_QTABLE);
 }
 
-void hantro_g1_h264_dec_run(struct hantro_ctx *ctx)
+int hantro_g1_h264_dec_run(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
+	int ret;
 
 	/* Prepare the H264 decoder context. */
-	if (hantro_h264_dec_prepare_run(ctx))
-		return;
+	ret = hantro_h264_dec_prepare_run(ctx);
+	if (ret)
+		return ret;
 
 	/* Configure hardware registers. */
 	set_params(ctx);
@@ -301,4 +303,6 @@ void hantro_g1_h264_dec_run(struct hantro_ctx *ctx)
 			   G1_REG_CONFIG_DEC_CLK_GATE_E,
 			   G1_REG_CONFIG);
 	vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT);
+
+	return 0;
 }
diff --git a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
index 6ee1a19d189b8..9aea331e1a3c9 100644
--- a/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_mpeg2_dec.c
@@ -145,7 +145,7 @@ hantro_g1_mpeg2_dec_set_buffers(struct hantro_dev *vpu, struct hantro_ctx *ctx,
 	vdpu_write_relaxed(vpu, backward_addr, G1_REG_REFER3_BASE);
 }
 
-void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
+int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
@@ -235,4 +235,6 @@ void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx)
 	hantro_end_prepare_run(ctx);
 
 	vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT);
+
+	return 0;
 }
diff --git a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c
index 57002ba701768..96622a7f8279e 100644
--- a/drivers/staging/media/hantro/hantro_g1_vp8_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_vp8_dec.c
@@ -425,7 +425,7 @@ static void cfg_buffers(struct hantro_ctx *ctx,
 	vdpu_write_relaxed(vpu, dst_dma, G1_REG_ADDR_DST);
 }
 
-void hantro_g1_vp8_dec_run(struct hantro_ctx *ctx)
+int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx)
 {
 	const struct v4l2_ctrl_vp8_frame *hdr;
 	struct hantro_dev *vpu = ctx->dev;
@@ -438,7 +438,7 @@ void hantro_g1_vp8_dec_run(struct hantro_ctx *ctx)
 
 	hdr = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_VP8_FRAME);
 	if (WARN_ON(!hdr))
-		return;
+		return -EINVAL;
 
 	/* Reset segment_map buffer in keyframe */
 	if (V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) && ctx->vp8_dec.segment_map.cpu)
@@ -498,4 +498,6 @@ void hantro_g1_vp8_dec_run(struct hantro_ctx *ctx)
 	hantro_end_prepare_run(ctx);
 
 	vdpu_write(vpu, G1_REG_INTERRUPT_DEC_E, G1_REG_INTERRUPT);
+
+	return 0;
 }
diff --git a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c
index b88dc4ed06db7..56cf261a8e958 100644
--- a/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c
+++ b/drivers/staging/media/hantro/hantro_h1_jpeg_enc.c
@@ -88,7 +88,7 @@ hantro_h1_jpeg_enc_set_qtable(struct hantro_dev *vpu,
 	}
 }
 
-void hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx)
+int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
@@ -136,6 +136,8 @@ void hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx)
 	hantro_end_prepare_run(ctx);
 
 	vepu_write(vpu, reg, H1_REG_ENC_CTRL);
+
+	return 0;
 }
 
 void hantro_jpeg_enc_done(struct hantro_ctx *ctx)
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 3d8b53567f16b..4b73c8011b255 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -133,14 +133,15 @@ struct hantro_postproc_ctx {
  *		Optional and called from process context.
  * @run:	Start single {en,de)coding job. Called from atomic context
  *		to indicate that a pair of buffers is ready and the hardware
- *		should be programmed and started.
+ *		should be programmed and started. Returns zero if OK, a
+ *		negative value in error cases.
  * @done:	Read back processing results and additional data from hardware.
  * @reset:	Reset the hardware in case of a timeout.
  */
 struct hantro_codec_ops {
 	int (*init)(struct hantro_ctx *ctx);
 	void (*exit)(struct hantro_ctx *ctx);
-	void (*run)(struct hantro_ctx *ctx);
+	int (*run)(struct hantro_ctx *ctx);
 	void (*done)(struct hantro_ctx *ctx);
 	void (*reset)(struct hantro_ctx *ctx);
 };
@@ -180,8 +181,8 @@ void hantro_end_prepare_run(struct hantro_ctx *ctx);
 irqreturn_t hantro_g1_irq(int irq, void *dev_id);
 void hantro_g1_reset(struct hantro_ctx *ctx);
 
-void hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx);
-void rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx);
+int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx);
+int rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx);
 int hantro_jpeg_enc_init(struct hantro_ctx *ctx);
 void hantro_jpeg_enc_exit(struct hantro_ctx *ctx);
 void hantro_jpeg_enc_done(struct hantro_ctx *ctx);
@@ -189,7 +190,7 @@ void hantro_jpeg_enc_done(struct hantro_ctx *ctx);
 dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
 				   unsigned int dpb_idx);
 int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx);
-void hantro_g1_h264_dec_run(struct hantro_ctx *ctx);
+int hantro_g1_h264_dec_run(struct hantro_ctx *ctx);
 int hantro_h264_dec_init(struct hantro_ctx *ctx);
 void hantro_h264_dec_exit(struct hantro_ctx *ctx);
 
@@ -220,15 +221,15 @@ hantro_h264_mv_size(unsigned int width, unsigned int height)
 	return 64 * MB_WIDTH(width) * MB_WIDTH(height) + 32;
 }
 
-void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx);
-void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx);
+int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx);
+int rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx);
 void hantro_mpeg2_dec_copy_qtable(u8 *qtable,
 				  const struct v4l2_ctrl_mpeg2_quantisation *ctrl);
 int hantro_mpeg2_dec_init(struct hantro_ctx *ctx);
 void hantro_mpeg2_dec_exit(struct hantro_ctx *ctx);
 
-void hantro_g1_vp8_dec_run(struct hantro_ctx *ctx);
-void rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx);
+int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx);
+int rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx);
 int hantro_vp8_dec_init(struct hantro_ctx *ctx);
 void hantro_vp8_dec_exit(struct hantro_ctx *ctx);
 void hantro_vp8_prob_update(struct hantro_ctx *ctx,
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c b/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c
index 3498e6124acdb..3a27ebef4f388 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c
@@ -118,7 +118,7 @@ rk3399_vpu_jpeg_enc_set_qtable(struct hantro_dev *vpu,
 	}
 }
 
-void rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx)
+int rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
@@ -168,4 +168,6 @@ void rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx)
 	/* Kick the watchdog and start encoding */
 	hantro_end_prepare_run(ctx);
 	vepu_write(vpu, reg, VEPU_REG_ENCODE_START);
+
+	return 0;
 }
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
index 2527dce7eb18f..683982c24c2dd 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
@@ -148,7 +148,7 @@ rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
 	vdpu_write_relaxed(vpu, backward_addr, VDPU_REG_REFER3_BASE);
 }
 
-void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
+int rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
@@ -244,4 +244,6 @@ void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 
 	reg = vdpu_read(vpu, VDPU_SWREG(57)) | VDPU_REG_DEC_E(1);
 	vdpu_write(vpu, reg, VDPU_SWREG(57));
+
+	return 0;
 }
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c b/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c
index 8661a3cc1e6b5..e5d20fe5b0070 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c
+++ b/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c
@@ -503,7 +503,7 @@ static void cfg_buffers(struct hantro_ctx *ctx,
 	vdpu_write_relaxed(vpu, dst_dma, VDPU_REG_ADDR_DST);
 }
 
-void rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx)
+int rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx)
 {
 	const struct v4l2_ctrl_vp8_frame *hdr;
 	struct hantro_dev *vpu = ctx->dev;
@@ -516,7 +516,7 @@ void rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx)
 
 	hdr = hantro_get_ctrl(ctx, V4L2_CID_STATELESS_VP8_FRAME);
 	if (WARN_ON(!hdr))
-		return;
+		return -EINVAL;
 
 	/* Reset segment_map buffer in keyframe */
 	if (V4L2_VP8_FRAME_IS_KEY_FRAME(hdr) && ctx->vp8_dec.segment_map.cpu)
@@ -589,4 +589,6 @@ void rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx)
 	hantro_end_prepare_run(ctx);
 
 	hantro_reg_write(vpu, &vp8_dec_start_dec, 1);
+
+	return 0;
 }
-- 
GitLab


From 8968cfc282955c3f853b34d9ceaaa1ba33943e94 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Thu, 3 Jun 2021 13:49:59 +0200
Subject: [PATCH 2457/3804] media: hantro: Define HEVC codec profiles and
 supported features

Define which HEVC profiles (up to level 5.1) and features
(no scaling, no 10 bits) are supported by the driver.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro.h     |  3 ++
 drivers/staging/media/hantro/hantro_drv.c | 58 +++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/drivers/staging/media/hantro/hantro.h b/drivers/staging/media/hantro/hantro.h
index 6c1b888abe759..4368c0962768f 100644
--- a/drivers/staging/media/hantro/hantro.h
+++ b/drivers/staging/media/hantro/hantro.h
@@ -34,6 +34,7 @@ struct hantro_codec_ops;
 #define HANTRO_MPEG2_DECODER	BIT(16)
 #define HANTRO_VP8_DECODER	BIT(17)
 #define HANTRO_H264_DECODER	BIT(18)
+#define HANTRO_HEVC_DECODER	BIT(19)
 #define HANTRO_DECODERS		0xffff0000
 
 /**
@@ -99,6 +100,7 @@ struct hantro_variant {
  * @HANTRO_MODE_H264_DEC: H264 decoder.
  * @HANTRO_MODE_MPEG2_DEC: MPEG-2 decoder.
  * @HANTRO_MODE_VP8_DEC: VP8 decoder.
+ * @HANTRO_MODE_HEVC_DEC: HEVC decoder.
  */
 enum hantro_codec_mode {
 	HANTRO_MODE_NONE = -1,
@@ -106,6 +108,7 @@ enum hantro_codec_mode {
 	HANTRO_MODE_H264_DEC,
 	HANTRO_MODE_MPEG2_DEC,
 	HANTRO_MODE_VP8_DEC,
+	HANTRO_MODE_HEVC_DEC,
 };
 
 /*
diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index e255756dfd9e3..0e25d377f077c 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -254,6 +254,18 @@ static int hantro_try_ctrl(struct v4l2_ctrl *ctrl)
 		if (sps->bit_depth_luma_minus8 != 0)
 			/* Only 8-bit is supported */
 			return -EINVAL;
+	} else if (ctrl->id == V4L2_CID_MPEG_VIDEO_HEVC_SPS) {
+		const struct v4l2_ctrl_hevc_sps *sps = ctrl->p_new.p_hevc_sps;
+
+		if (sps->bit_depth_luma_minus8 != sps->bit_depth_chroma_minus8)
+			/* Luma and chroma bit depth mismatch */
+			return -EINVAL;
+		if (sps->bit_depth_luma_minus8 != 0)
+			/* Only 8-bit is supported */
+			return -EINVAL;
+		if (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED)
+			/* No scaling support */
+			return -EINVAL;
 	}
 	return 0;
 }
@@ -365,6 +377,52 @@ static const struct hantro_ctrl controls[] = {
 			.def = V4L2_MPEG_VIDEO_H264_PROFILE_MAIN,
 		}
 	}, {
+		.codec = HANTRO_HEVC_DECODER,
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE,
+			.min = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
+			.max = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
+			.def = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
+		},
+	}, {
+		.codec = HANTRO_HEVC_DECODER,
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE,
+			.min = V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
+			.max = V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
+			.def = V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
+		},
+	}, {
+		.codec = HANTRO_HEVC_DECODER,
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_HEVC_PROFILE,
+			.min = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN,
+			.max = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN_10,
+			.def = V4L2_MPEG_VIDEO_HEVC_PROFILE_MAIN,
+		},
+	}, {
+		.codec = HANTRO_HEVC_DECODER,
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_HEVC_LEVEL,
+			.min = V4L2_MPEG_VIDEO_HEVC_LEVEL_1,
+			.max = V4L2_MPEG_VIDEO_HEVC_LEVEL_5_1,
+		},
+	}, {
+		.codec = HANTRO_HEVC_DECODER,
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_HEVC_SPS,
+			.ops = &hantro_ctrl_ops,
+		},
+	}, {
+		.codec = HANTRO_HEVC_DECODER,
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_HEVC_PPS,
+		},
+	}, {
+		.codec = HANTRO_HEVC_DECODER,
+		.cfg = {
+			.id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS,
+		},
 	},
 };
 
-- 
GitLab


From 31ad15e688e58a94779971f428c414b7a3f882d1 Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Thu, 3 Jun 2021 13:50:00 +0200
Subject: [PATCH 2458/3804] media: hantro: Only use postproc when post
 processed formats are defined

If the variant doesn't support postprocessed formats make sure it will
be ok.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro.h          |  8 ++------
 drivers/staging/media/hantro/hantro_postproc.c | 14 ++++++++++++++
 drivers/staging/media/hantro/hantro_v4l2.c     |  4 +++-
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro.h b/drivers/staging/media/hantro/hantro.h
index 4368c0962768f..e50d39b51902a 100644
--- a/drivers/staging/media/hantro/hantro.h
+++ b/drivers/staging/media/hantro/hantro.h
@@ -413,12 +413,8 @@ hantro_get_dst_buf(struct hantro_ctx *ctx)
 	return v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx);
 }
 
-static inline bool
-hantro_needs_postproc(const struct hantro_ctx *ctx,
-		      const struct hantro_fmt *fmt)
-{
-	return !ctx->is_encoder && fmt->fourcc != V4L2_PIX_FMT_NV12;
-}
+bool hantro_needs_postproc(const struct hantro_ctx *ctx,
+			   const struct hantro_fmt *fmt);
 
 static inline dma_addr_t
 hantro_get_dec_buf_addr(struct hantro_ctx *ctx, struct vb2_buffer *vb)
diff --git a/drivers/staging/media/hantro/hantro_postproc.c b/drivers/staging/media/hantro/hantro_postproc.c
index 6d2a8f2a8f0bb..ed8916c950a4f 100644
--- a/drivers/staging/media/hantro/hantro_postproc.c
+++ b/drivers/staging/media/hantro/hantro_postproc.c
@@ -50,6 +50,20 @@ const struct hantro_postproc_regs hantro_g1_postproc_regs = {
 	.display_width = {G1_REG_PP_DISPLAY_WIDTH, 0, 0xfff},
 };
 
+bool hantro_needs_postproc(const struct hantro_ctx *ctx,
+			   const struct hantro_fmt *fmt)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	if (ctx->is_encoder)
+		return false;
+
+	if (!vpu->variant->postproc_fmts)
+		return false;
+
+	return fmt->fourcc != V4L2_PIX_FMT_NV12;
+}
+
 void hantro_postproc_enable(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
index 7ccc6405036ae..aca92cebb2af2 100644
--- a/drivers/staging/media/hantro/hantro_v4l2.c
+++ b/drivers/staging/media/hantro/hantro_v4l2.c
@@ -55,7 +55,9 @@ static const struct hantro_fmt *
 hantro_get_postproc_formats(const struct hantro_ctx *ctx,
 			    unsigned int *num_fmts)
 {
-	if (ctx->is_encoder) {
+	struct hantro_dev *vpu = ctx->dev;
+
+	if (ctx->is_encoder || !vpu->variant->postproc_fmts) {
 		*num_fmts = 0;
 		return NULL;
 	}
-- 
GitLab


From 35f51f6091bcf2cb90d9ac2f41465c415a34632e Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Thu, 3 Jun 2021 13:50:01 +0200
Subject: [PATCH 2459/3804] media: uapi: Add a control for HANTRO driver

The HEVC HANTRO driver needs to know the number of bits to skip at
the beginning of the slice header.
That is a hardware specific requirement so create a dedicated control
for this purpose.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/drivers/hantro.rst    | 19 +++++++++++++++++++
 .../userspace-api/media/drivers/index.rst     |  1 +
 include/media/hevc-ctrls.h                    | 13 +++++++++++++
 3 files changed, 33 insertions(+)
 create mode 100644 Documentation/userspace-api/media/drivers/hantro.rst

diff --git a/Documentation/userspace-api/media/drivers/hantro.rst b/Documentation/userspace-api/media/drivers/hantro.rst
new file mode 100644
index 0000000000000..cd9754b4e005a
--- /dev/null
+++ b/Documentation/userspace-api/media/drivers/hantro.rst
@@ -0,0 +1,19 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Hantro video decoder driver
+===========================
+
+The Hantro video decoder driver implements the following driver-specific controls:
+
+``V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP (integer)``
+    Specifies to Hantro HEVC video decoder driver the number of data (in bits) to
+    skip in the slice segment header.
+    If non-IDR, the bits to be skipped go from syntax element "pic_output_flag"
+    to before syntax element "slice_temporal_mvp_enabled_flag".
+    If IDR, the skipped bits are just "pic_output_flag"
+    (separate_colour_plane_flag is not supported).
+
+.. note::
+
+        This control is not yet part of the public kernel API and
+        it is expected to change.
diff --git a/Documentation/userspace-api/media/drivers/index.rst b/Documentation/userspace-api/media/drivers/index.rst
index 1a9038f5f9fae..12e3c512d7185 100644
--- a/Documentation/userspace-api/media/drivers/index.rst
+++ b/Documentation/userspace-api/media/drivers/index.rst
@@ -33,6 +33,7 @@ For more details see the file COPYING in the source distribution of Linux.
 
 	ccs
 	cx2341x-uapi
+        hantro
 	imx-uapi
 	max2175
 	meye-uapi
diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h
index 1b702c3230fbd..53c0038c792bc 100644
--- a/include/media/hevc-ctrls.h
+++ b/include/media/hevc-ctrls.h
@@ -224,4 +224,17 @@ struct v4l2_ctrl_hevc_decode_params {
 	__u64	flags;
 };
 
+/*  MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */
+#define V4L2_CID_CODEC_HANTRO_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1200)
+/*
+ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP -
+ * the number of data (in bits) to skip in the
+ * slice segment header.
+ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag"
+ * to before syntax element "slice_temporal_mvp_enabled_flag".
+ * If IDR, the skipped bits are just "pic_output_flag"
+ * (separate_colour_plane_flag is not supported).
+ */
+#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP	(V4L2_CID_CODEC_HANTRO_BASE + 0)
+
 #endif
-- 
GitLab


From b7782b34a76615f8199daf1bce544aa73e35f44d Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Thu, 3 Jun 2021 13:50:02 +0200
Subject: [PATCH 2460/3804] media: hantro: handle V4L2_PIX_FMT_HEVC_SLICE
 control

Make sure that V4L2_PIX_FMT_HEVC_SLICE is correctly handled by the driver.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_v4l2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c
index aca92cebb2af2..bcb0bdff4a9a6 100644
--- a/drivers/staging/media/hantro/hantro_v4l2.c
+++ b/drivers/staging/media/hantro/hantro_v4l2.c
@@ -392,6 +392,7 @@ hantro_update_requires_request(struct hantro_ctx *ctx, u32 fourcc)
 	case V4L2_PIX_FMT_MPEG2_SLICE:
 	case V4L2_PIX_FMT_VP8_FRAME:
 	case V4L2_PIX_FMT_H264_SLICE:
+	case V4L2_PIX_FMT_HEVC_SLICE:
 		ctx->fh.m2m_ctx->out_q_ctx.q.requires_requests = true;
 		break;
 	default:
-- 
GitLab


From cb5dd5a0fa518dff14ff2b90837c3c8f98f4dd5c Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Thu, 3 Jun 2021 13:50:03 +0200
Subject: [PATCH 2461/3804] media: hantro: Introduce G2/HEVC decoder

Implement all the logic to get G2 hardware decoding HEVC frames.
It supports up level 5.1 HEVC stream.
It doesn't support yet 10 bits formats or the scaling feature.

Add HANTRO HEVC dedicated control to skip some bits at the beginning
of the slice header. That is very specific to this hardware so can't
go into uapi structures. Computing the needed value is complex and requires
information from the stream that only the userland knows so let it
provide the correct value to the driver.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Co-developed-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Co-developed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/Makefile         |   2 +
 drivers/staging/media/hantro/hantro.h         |   2 +
 drivers/staging/media/hantro/hantro_drv.c     |  36 ++
 .../staging/media/hantro/hantro_g2_hevc_dec.c | 586 ++++++++++++++++++
 drivers/staging/media/hantro/hantro_g2_regs.h | 198 ++++++
 drivers/staging/media/hantro/hantro_hevc.c    | 333 ++++++++++
 drivers/staging/media/hantro/hantro_hw.h      |  51 ++
 7 files changed, 1208 insertions(+)
 create mode 100644 drivers/staging/media/hantro/hantro_g2_hevc_dec.c
 create mode 100644 drivers/staging/media/hantro/hantro_g2_regs.h
 create mode 100644 drivers/staging/media/hantro/hantro_hevc.c

diff --git a/drivers/staging/media/hantro/Makefile b/drivers/staging/media/hantro/Makefile
index f4b99901eeeec..23bfc423b23c3 100644
--- a/drivers/staging/media/hantro/Makefile
+++ b/drivers/staging/media/hantro/Makefile
@@ -10,12 +10,14 @@ hantro-vpu-y += \
 		hantro_g1.o \
 		hantro_g1_h264_dec.o \
 		hantro_g1_mpeg2_dec.o \
+		hantro_g2_hevc_dec.o \
 		hantro_g1_vp8_dec.o \
 		rk3399_vpu_hw_jpeg_enc.o \
 		rk3399_vpu_hw_mpeg2_dec.o \
 		rk3399_vpu_hw_vp8_dec.o \
 		hantro_jpeg.o \
 		hantro_h264.o \
+		hantro_hevc.o \
 		hantro_mpeg2.o \
 		hantro_vp8.o
 
diff --git a/drivers/staging/media/hantro/hantro.h b/drivers/staging/media/hantro/hantro.h
index e50d39b51902a..a70c386de6f19 100644
--- a/drivers/staging/media/hantro/hantro.h
+++ b/drivers/staging/media/hantro/hantro.h
@@ -221,6 +221,7 @@ struct hantro_dev {
  * @jpeg_enc:		JPEG-encoding context.
  * @mpeg2_dec:		MPEG-2-decoding context.
  * @vp8_dec:		VP8-decoding context.
+ * @hevc_dec:		HEVC-decoding context.
  */
 struct hantro_ctx {
 	struct hantro_dev *dev;
@@ -247,6 +248,7 @@ struct hantro_ctx {
 		struct hantro_jpeg_enc_hw_ctx jpeg_enc;
 		struct hantro_mpeg2_dec_hw_ctx mpeg2_dec;
 		struct hantro_vp8_dec_hw_ctx vp8_dec;
+		struct hantro_hevc_dec_hw_ctx hevc_dec;
 	};
 };
 
diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 0e25d377f077c..d448cdff59eac 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -290,6 +290,26 @@ static int hantro_jpeg_s_ctrl(struct v4l2_ctrl *ctrl)
 	return 0;
 }
 
+static int hantro_hevc_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct hantro_ctx *ctx;
+
+	ctx = container_of(ctrl->handler,
+			   struct hantro_ctx, ctrl_handler);
+
+	vpu_debug(1, "s_ctrl: id = %d, val = %d\n", ctrl->id, ctrl->val);
+
+	switch (ctrl->id) {
+	case V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP:
+		ctx->hevc_dec.ctrls.hevc_hdr_skip_length = ctrl->val;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct v4l2_ctrl_ops hantro_ctrl_ops = {
 	.try_ctrl = hantro_try_ctrl,
 };
@@ -298,6 +318,10 @@ static const struct v4l2_ctrl_ops hantro_jpeg_ctrl_ops = {
 	.s_ctrl = hantro_jpeg_s_ctrl,
 };
 
+static const struct v4l2_ctrl_ops hantro_hevc_ctrl_ops = {
+	.s_ctrl = hantro_hevc_s_ctrl,
+};
+
 static const struct hantro_ctrl controls[] = {
 	{
 		.codec = HANTRO_JPEG_ENCODER,
@@ -423,6 +447,18 @@ static const struct hantro_ctrl controls[] = {
 		.cfg = {
 			.id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS,
 		},
+	}, {
+		.codec = HANTRO_HEVC_DECODER,
+		.cfg = {
+			.id = V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP,
+			.name = "Hantro HEVC slice header skip bytes",
+			.type = V4L2_CTRL_TYPE_INTEGER,
+			.min = 0,
+			.def = 0,
+			.max = 0x100,
+			.step = 1,
+			.ops = &hantro_hevc_ctrl_ops,
+		},
 	},
 };
 
diff --git a/drivers/staging/media/hantro/hantro_g2_hevc_dec.c b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c
new file mode 100644
index 0000000000000..340efb57fd185
--- /dev/null
+++ b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c
@@ -0,0 +1,586 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU HEVC codec driver
+ *
+ * Copyright (C) 2020 Safran Passenger Innovations LLC
+ */
+
+#include "hantro_hw.h"
+#include "hantro_g2_regs.h"
+
+#define HEVC_DEC_MODE	0xC
+
+#define BUS_WIDTH_32		0
+#define BUS_WIDTH_64		1
+#define BUS_WIDTH_128		2
+#define BUS_WIDTH_256		3
+
+static inline void hantro_write_addr(struct hantro_dev *vpu,
+				     unsigned long offset,
+				     dma_addr_t addr)
+{
+	vdpu_write(vpu, addr & 0xffffffff, offset);
+}
+
+static void prepare_tile_info_buffer(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+	const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
+	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+	u16 *p = (u16 *)((u8 *)ctx->hevc_dec.tile_sizes.cpu);
+	unsigned int num_tile_rows = pps->num_tile_rows_minus1 + 1;
+	unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
+	unsigned int pic_width_in_ctbs, pic_height_in_ctbs;
+	unsigned int max_log2_ctb_size, ctb_size;
+	bool tiles_enabled, uniform_spacing;
+	u32 no_chroma = 0;
+
+	tiles_enabled = !!(pps->flags & V4L2_HEVC_PPS_FLAG_TILES_ENABLED);
+	uniform_spacing = !!(pps->flags & V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING);
+
+	hantro_reg_write(vpu, &g2_tile_e, tiles_enabled);
+
+	max_log2_ctb_size = sps->log2_min_luma_coding_block_size_minus3 + 3 +
+			    sps->log2_diff_max_min_luma_coding_block_size;
+	pic_width_in_ctbs = (sps->pic_width_in_luma_samples +
+			    (1 << max_log2_ctb_size) - 1) >> max_log2_ctb_size;
+	pic_height_in_ctbs = (sps->pic_height_in_luma_samples + (1 << max_log2_ctb_size) - 1)
+			     >> max_log2_ctb_size;
+	ctb_size = 1 << max_log2_ctb_size;
+
+	vpu_debug(1, "Preparing tile sizes buffer for %dx%d CTBs (CTB size %d)\n",
+		  pic_width_in_ctbs, pic_height_in_ctbs, ctb_size);
+
+	if (tiles_enabled) {
+		unsigned int i, j, h;
+
+		vpu_debug(1, "Tiles enabled! %dx%d\n", num_tile_cols, num_tile_rows);
+
+		hantro_reg_write(vpu, &g2_num_tile_rows, num_tile_rows);
+		hantro_reg_write(vpu, &g2_num_tile_cols, num_tile_cols);
+
+		/* write width + height for each tile in pic */
+		if (!uniform_spacing) {
+			u32 tmp_w = 0, tmp_h = 0;
+
+			for (i = 0; i < num_tile_rows; i++) {
+				if (i == num_tile_rows - 1)
+					h = pic_height_in_ctbs - tmp_h;
+				else
+					h = pps->row_height_minus1[i] + 1;
+				tmp_h += h;
+				if (i == 0 && h == 1 && ctb_size == 16)
+					no_chroma = 1;
+				for (j = 0, tmp_w = 0; j < num_tile_cols - 1; j++) {
+					tmp_w += pps->column_width_minus1[j] + 1;
+					*p++ = pps->column_width_minus1[j + 1];
+					*p++ = h;
+					if (i == 0 && h == 1 && ctb_size == 16)
+						no_chroma = 1;
+				}
+				/* last column */
+				*p++ = pic_width_in_ctbs - tmp_w;
+				*p++ = h;
+			}
+		} else { /* uniform spacing */
+			u32 tmp, prev_h, prev_w;
+
+			for (i = 0, prev_h = 0; i < num_tile_rows; i++) {
+				tmp = (i + 1) * pic_height_in_ctbs / num_tile_rows;
+				h = tmp - prev_h;
+				prev_h = tmp;
+				if (i == 0 && h == 1 && ctb_size == 16)
+					no_chroma = 1;
+				for (j = 0, prev_w = 0; j < num_tile_cols; j++) {
+					tmp = (j + 1) * pic_width_in_ctbs / num_tile_cols;
+					*p++ = tmp - prev_w;
+					*p++ = h;
+					if (j == 0 &&
+					    (pps->column_width_minus1[0] + 1) == 1 &&
+					    ctb_size == 16)
+						no_chroma = 1;
+					prev_w = tmp;
+				}
+			}
+		}
+	} else {
+		hantro_reg_write(vpu, &g2_num_tile_rows, 1);
+		hantro_reg_write(vpu, &g2_num_tile_cols, 1);
+
+		/* There's one tile, with dimensions equal to pic size. */
+		p[0] = pic_width_in_ctbs;
+		p[1] = pic_height_in_ctbs;
+	}
+
+	if (no_chroma)
+		vpu_debug(1, "%s: no chroma!\n", __func__);
+}
+
+static void set_params(struct hantro_ctx *ctx)
+{
+	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+	const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
+	const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params;
+	struct hantro_dev *vpu = ctx->dev;
+	u32 min_log2_cb_size, max_log2_ctb_size, min_cb_size, max_ctb_size;
+	u32 pic_width_in_min_cbs, pic_height_in_min_cbs;
+	u32 pic_width_aligned, pic_height_aligned;
+	u32 partial_ctb_x, partial_ctb_y;
+
+	hantro_reg_write(vpu, &g2_bit_depth_y_minus8, sps->bit_depth_luma_minus8);
+	hantro_reg_write(vpu, &g2_bit_depth_c_minus8, sps->bit_depth_chroma_minus8);
+
+	hantro_reg_write(vpu, &g2_output_8_bits, 0);
+
+	hantro_reg_write(vpu, &g2_hdr_skip_length, ctrls->hevc_hdr_skip_length);
+
+	min_log2_cb_size = sps->log2_min_luma_coding_block_size_minus3 + 3;
+	max_log2_ctb_size = min_log2_cb_size + sps->log2_diff_max_min_luma_coding_block_size;
+
+	hantro_reg_write(vpu, &g2_min_cb_size, min_log2_cb_size);
+	hantro_reg_write(vpu, &g2_max_cb_size, max_log2_ctb_size);
+
+	min_cb_size = 1 << min_log2_cb_size;
+	max_ctb_size = 1 << max_log2_ctb_size;
+
+	pic_width_in_min_cbs = sps->pic_width_in_luma_samples / min_cb_size;
+	pic_height_in_min_cbs = sps->pic_height_in_luma_samples / min_cb_size;
+	pic_width_aligned = ALIGN(sps->pic_width_in_luma_samples, max_ctb_size);
+	pic_height_aligned = ALIGN(sps->pic_height_in_luma_samples, max_ctb_size);
+
+	partial_ctb_x = !!(sps->pic_width_in_luma_samples != pic_width_aligned);
+	partial_ctb_y = !!(sps->pic_height_in_luma_samples != pic_height_aligned);
+
+	hantro_reg_write(vpu, &g2_partial_ctb_x, partial_ctb_x);
+	hantro_reg_write(vpu, &g2_partial_ctb_y, partial_ctb_y);
+
+	hantro_reg_write(vpu, &g2_pic_width_in_cbs, pic_width_in_min_cbs);
+	hantro_reg_write(vpu, &g2_pic_height_in_cbs, pic_height_in_min_cbs);
+
+	hantro_reg_write(vpu, &g2_pic_width_4x4,
+			 (pic_width_in_min_cbs * min_cb_size) / 4);
+	hantro_reg_write(vpu, &g2_pic_height_4x4,
+			 (pic_height_in_min_cbs * min_cb_size) / 4);
+
+	hantro_reg_write(vpu, &hevc_max_inter_hierdepth,
+			 sps->max_transform_hierarchy_depth_inter);
+	hantro_reg_write(vpu, &hevc_max_intra_hierdepth,
+			 sps->max_transform_hierarchy_depth_intra);
+	hantro_reg_write(vpu, &hevc_min_trb_size,
+			 sps->log2_min_luma_transform_block_size_minus2 + 2);
+	hantro_reg_write(vpu, &hevc_max_trb_size,
+			 sps->log2_min_luma_transform_block_size_minus2 + 2 +
+			 sps->log2_diff_max_min_luma_transform_block_size);
+
+	hantro_reg_write(vpu, &g2_tempor_mvp_e,
+			 !!(sps->flags & V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED) &&
+			 !(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC));
+	hantro_reg_write(vpu, &g2_strong_smooth_e,
+			 !!(sps->flags & V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED));
+	hantro_reg_write(vpu, &g2_asym_pred_e,
+			 !!(sps->flags & V4L2_HEVC_SPS_FLAG_AMP_ENABLED));
+	hantro_reg_write(vpu, &g2_sao_e,
+			 !!(sps->flags & V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET));
+	hantro_reg_write(vpu, &g2_sign_data_hide,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED));
+
+	if (pps->flags & V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED) {
+		hantro_reg_write(vpu, &g2_cu_qpd_e, 1);
+		hantro_reg_write(vpu, &g2_max_cu_qpd_depth, pps->diff_cu_qp_delta_depth);
+	} else {
+		hantro_reg_write(vpu, &g2_cu_qpd_e, 0);
+		hantro_reg_write(vpu, &g2_max_cu_qpd_depth, 0);
+	}
+
+	if (pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT) {
+		hantro_reg_write(vpu, &g2_cb_qp_offset, pps->pps_cb_qp_offset);
+		hantro_reg_write(vpu, &g2_cr_qp_offset, pps->pps_cr_qp_offset);
+	} else {
+		hantro_reg_write(vpu, &g2_cb_qp_offset, 0);
+		hantro_reg_write(vpu, &g2_cr_qp_offset, 0);
+	}
+
+	hantro_reg_write(vpu, &g2_filt_offset_beta, pps->pps_beta_offset_div2);
+	hantro_reg_write(vpu, &g2_filt_offset_tc, pps->pps_tc_offset_div2);
+	hantro_reg_write(vpu, &g2_slice_hdr_ext_e,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT));
+	hantro_reg_write(vpu, &g2_slice_hdr_ext_bits, pps->num_extra_slice_header_bits);
+	hantro_reg_write(vpu, &g2_slice_chqp_present,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT));
+	hantro_reg_write(vpu, &g2_weight_bipr_idc,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED));
+	hantro_reg_write(vpu, &g2_transq_bypass,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED));
+	hantro_reg_write(vpu, &g2_list_mod_e,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT));
+	hantro_reg_write(vpu, &g2_entropy_sync_e,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED));
+	hantro_reg_write(vpu, &g2_cabac_init_present,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT));
+	hantro_reg_write(vpu, &g2_idr_pic_e,
+			 !!(decode_params->flags & V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC));
+	hantro_reg_write(vpu, &hevc_parallel_merge,
+			 pps->log2_parallel_merge_level_minus2 + 2);
+	hantro_reg_write(vpu, &g2_pcm_filt_d,
+			 !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED));
+	hantro_reg_write(vpu, &g2_pcm_e,
+			 !!(sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED));
+	if (sps->flags & V4L2_HEVC_SPS_FLAG_PCM_ENABLED) {
+		hantro_reg_write(vpu, &g2_max_pcm_size,
+				 sps->log2_diff_max_min_pcm_luma_coding_block_size +
+				 sps->log2_min_pcm_luma_coding_block_size_minus3 + 3);
+		hantro_reg_write(vpu, &g2_min_pcm_size,
+				 sps->log2_min_pcm_luma_coding_block_size_minus3 + 3);
+		hantro_reg_write(vpu, &g2_bit_depth_pcm_y,
+				 sps->pcm_sample_bit_depth_luma_minus1 + 1);
+		hantro_reg_write(vpu, &g2_bit_depth_pcm_c,
+				 sps->pcm_sample_bit_depth_chroma_minus1 + 1);
+	} else {
+		hantro_reg_write(vpu, &g2_max_pcm_size, 0);
+		hantro_reg_write(vpu, &g2_min_pcm_size, 0);
+		hantro_reg_write(vpu, &g2_bit_depth_pcm_y, 0);
+		hantro_reg_write(vpu, &g2_bit_depth_pcm_c, 0);
+	}
+
+	hantro_reg_write(vpu, &g2_start_code_e, 1);
+	hantro_reg_write(vpu, &g2_init_qp, pps->init_qp_minus26 + 26);
+	hantro_reg_write(vpu, &g2_weight_pred_e,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED));
+	hantro_reg_write(vpu, &g2_cabac_init_present,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT));
+	hantro_reg_write(vpu, &g2_const_intra_e,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED));
+	hantro_reg_write(vpu, &g2_transform_skip,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED));
+	hantro_reg_write(vpu, &g2_out_filtering_dis,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER));
+	hantro_reg_write(vpu, &g2_filt_ctrl_pres,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT));
+	hantro_reg_write(vpu, &g2_dependent_slice,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED));
+	hantro_reg_write(vpu, &g2_filter_override,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED));
+	hantro_reg_write(vpu, &g2_refidx0_active,
+			 pps->num_ref_idx_l0_default_active_minus1 + 1);
+	hantro_reg_write(vpu, &g2_refidx1_active,
+			 pps->num_ref_idx_l1_default_active_minus1 + 1);
+	hantro_reg_write(vpu, &g2_apf_threshold, 8);
+}
+
+static int find_ref_pic_index(const struct v4l2_hevc_dpb_entry *dpb, int pic_order_cnt)
+{
+	int i;
+
+	for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) {
+		if (dpb[i].pic_order_cnt[0] == pic_order_cnt)
+			return i;
+	}
+
+	return 0x0;
+}
+
+static void set_ref_pic_list(struct hantro_ctx *ctx)
+{
+	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+	struct hantro_dev *vpu = ctx->dev;
+	const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params;
+	const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb;
+	u32 list0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX] = {};
+	u32 list1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX] = {};
+	static const struct hantro_reg ref_pic_regs0[] = {
+		hevc_rlist_f0,
+		hevc_rlist_f1,
+		hevc_rlist_f2,
+		hevc_rlist_f3,
+		hevc_rlist_f4,
+		hevc_rlist_f5,
+		hevc_rlist_f6,
+		hevc_rlist_f7,
+		hevc_rlist_f8,
+		hevc_rlist_f9,
+		hevc_rlist_f10,
+		hevc_rlist_f11,
+		hevc_rlist_f12,
+		hevc_rlist_f13,
+		hevc_rlist_f14,
+		hevc_rlist_f15,
+	};
+	static const struct hantro_reg ref_pic_regs1[] = {
+		hevc_rlist_b0,
+		hevc_rlist_b1,
+		hevc_rlist_b2,
+		hevc_rlist_b3,
+		hevc_rlist_b4,
+		hevc_rlist_b5,
+		hevc_rlist_b6,
+		hevc_rlist_b7,
+		hevc_rlist_b8,
+		hevc_rlist_b9,
+		hevc_rlist_b10,
+		hevc_rlist_b11,
+		hevc_rlist_b12,
+		hevc_rlist_b13,
+		hevc_rlist_b14,
+		hevc_rlist_b15,
+	};
+	unsigned int i, j;
+
+	/* List 0 contains: short term before, short term after and long term */
+	j = 0;
+	for (i = 0; i < decode_params->num_poc_st_curr_before && j < ARRAY_SIZE(list0); i++)
+		list0[j++] = find_ref_pic_index(dpb, decode_params->poc_st_curr_before[i]);
+	for (i = 0; i < decode_params->num_poc_st_curr_after && j < ARRAY_SIZE(list0); i++)
+		list0[j++] = find_ref_pic_index(dpb, decode_params->poc_st_curr_after[i]);
+	for (i = 0; i < decode_params->num_poc_lt_curr && j < ARRAY_SIZE(list0); i++)
+		list0[j++] = find_ref_pic_index(dpb, decode_params->poc_lt_curr[i]);
+
+	/* Fill the list, copying over and over */
+	i = 0;
+	while (j < ARRAY_SIZE(list0))
+		list0[j++] = list0[i++];
+
+	j = 0;
+	for (i = 0; i < decode_params->num_poc_st_curr_after && j < ARRAY_SIZE(list1); i++)
+		list1[j++] = find_ref_pic_index(dpb, decode_params->poc_st_curr_after[i]);
+	for (i = 0; i < decode_params->num_poc_st_curr_before && j < ARRAY_SIZE(list1); i++)
+		list1[j++] = find_ref_pic_index(dpb, decode_params->poc_st_curr_before[i]);
+	for (i = 0; i < decode_params->num_poc_lt_curr && j < ARRAY_SIZE(list1); i++)
+		list1[j++] = find_ref_pic_index(dpb, decode_params->poc_lt_curr[i]);
+
+	i = 0;
+	while (j < ARRAY_SIZE(list1))
+		list1[j++] = list1[i++];
+
+	for (i = 0; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) {
+		hantro_reg_write(vpu, &ref_pic_regs0[i], list0[i]);
+		hantro_reg_write(vpu, &ref_pic_regs1[i], list1[i]);
+	}
+}
+
+static int set_ref(struct hantro_ctx *ctx)
+{
+	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+	const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
+	const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params;
+	const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb;
+	dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
+	struct hantro_dev *vpu = ctx->dev;
+	size_t cr_offset = hantro_hevc_chroma_offset(sps);
+	size_t mv_offset = hantro_hevc_motion_vectors_offset(sps);
+	u32 max_ref_frames;
+	u16 dpb_longterm_e;
+	static const struct hantro_reg cur_poc[] = {
+		hevc_cur_poc_00,
+		hevc_cur_poc_01,
+		hevc_cur_poc_02,
+		hevc_cur_poc_03,
+		hevc_cur_poc_04,
+		hevc_cur_poc_05,
+		hevc_cur_poc_06,
+		hevc_cur_poc_07,
+		hevc_cur_poc_08,
+		hevc_cur_poc_09,
+		hevc_cur_poc_10,
+		hevc_cur_poc_11,
+		hevc_cur_poc_12,
+		hevc_cur_poc_13,
+		hevc_cur_poc_14,
+		hevc_cur_poc_15,
+	};
+	unsigned int i;
+
+	max_ref_frames = decode_params->num_poc_lt_curr +
+		decode_params->num_poc_st_curr_before +
+		decode_params->num_poc_st_curr_after;
+	/*
+	 * Set max_ref_frames to non-zero to avoid HW hang when decoding
+	 * badly marked I-frames.
+	 */
+	max_ref_frames = max_ref_frames ? max_ref_frames : 1;
+	hantro_reg_write(vpu, &g2_num_ref_frames, max_ref_frames);
+	hantro_reg_write(vpu, &g2_filter_over_slices,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED));
+	hantro_reg_write(vpu, &g2_filter_over_tiles,
+			 !!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED));
+
+	/*
+	 * Write POC count diff from current pic. For frame decoding only compute
+	 * pic_order_cnt[0] and ignore pic_order_cnt[1] used in field-coding.
+	 */
+	for (i = 0; i < decode_params->num_active_dpb_entries && i < ARRAY_SIZE(cur_poc); i++) {
+		char poc_diff = decode_params->pic_order_cnt_val - dpb[i].pic_order_cnt[0];
+
+		hantro_reg_write(vpu, &cur_poc[i], poc_diff);
+	}
+
+	if (i < ARRAY_SIZE(cur_poc)) {
+		/*
+		 * After the references, fill one entry pointing to itself,
+		 * i.e. difference is zero.
+		 */
+		hantro_reg_write(vpu, &cur_poc[i], 0);
+		i++;
+	}
+
+	/* Fill the rest with the current picture */
+	for (; i < ARRAY_SIZE(cur_poc); i++)
+		hantro_reg_write(vpu, &cur_poc[i], decode_params->pic_order_cnt_val);
+
+	set_ref_pic_list(ctx);
+
+	/* We will only keep the references picture that are still used */
+	ctx->hevc_dec.ref_bufs_used = 0;
+
+	/* Set up addresses of DPB buffers */
+	dpb_longterm_e = 0;
+	for (i = 0; i < decode_params->num_active_dpb_entries &&
+	     i < (V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1); i++) {
+		luma_addr = hantro_hevc_get_ref_buf(ctx, dpb[i].pic_order_cnt[0]);
+		if (!luma_addr)
+			return -ENOMEM;
+
+		chroma_addr = luma_addr + cr_offset;
+		mv_addr = luma_addr + mv_offset;
+
+		if (dpb[i].rps == V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR)
+			dpb_longterm_e |= BIT(V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1 - i);
+
+		hantro_write_addr(vpu, G2_REG_ADDR_REF(i), luma_addr);
+		hantro_write_addr(vpu, G2_REG_CHR_REF(i), chroma_addr);
+		hantro_write_addr(vpu, G2_REG_DMV_REF(i), mv_addr);
+	}
+
+	luma_addr = hantro_hevc_get_ref_buf(ctx, decode_params->pic_order_cnt_val);
+	if (!luma_addr)
+		return -ENOMEM;
+
+	chroma_addr = luma_addr + cr_offset;
+	mv_addr = luma_addr + mv_offset;
+
+	hantro_write_addr(vpu, G2_REG_ADDR_REF(i), luma_addr);
+	hantro_write_addr(vpu, G2_REG_CHR_REF(i), chroma_addr);
+	hantro_write_addr(vpu, G2_REG_DMV_REF(i++), mv_addr);
+
+	hantro_write_addr(vpu, G2_ADDR_DST, luma_addr);
+	hantro_write_addr(vpu, G2_ADDR_DST_CHR, chroma_addr);
+	hantro_write_addr(vpu, G2_ADDR_DST_MV, mv_addr);
+
+	hantro_hevc_ref_remove_unused(ctx);
+
+	for (; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) {
+		hantro_write_addr(vpu, G2_REG_ADDR_REF(i), 0);
+		hantro_write_addr(vpu, G2_REG_CHR_REF(i), 0);
+		hantro_write_addr(vpu, G2_REG_DMV_REF(i), 0);
+	}
+
+	hantro_reg_write(vpu, &g2_refer_lterm_e, dpb_longterm_e);
+
+	return 0;
+}
+
+static void set_buffers(struct hantro_ctx *ctx)
+{
+	struct vb2_v4l2_buffer *src_buf, *dst_buf;
+	struct hantro_dev *vpu = ctx->dev;
+	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+	size_t cr_offset = hantro_hevc_chroma_offset(sps);
+	dma_addr_t src_dma, dst_dma;
+	u32 src_len, src_buf_len;
+
+	src_buf = hantro_get_src_buf(ctx);
+	dst_buf = hantro_get_dst_buf(ctx);
+
+	/* Source (stream) buffer. */
+	src_dma = vb2_dma_contig_plane_dma_addr(&src_buf->vb2_buf, 0);
+	src_len = vb2_get_plane_payload(&src_buf->vb2_buf, 0);
+	src_buf_len = vb2_plane_size(&src_buf->vb2_buf, 0);
+
+	hantro_write_addr(vpu, G2_ADDR_STR, src_dma);
+	hantro_reg_write(vpu, &g2_stream_len, src_len);
+	hantro_reg_write(vpu, &g2_strm_buffer_len, src_buf_len);
+	hantro_reg_write(vpu, &g2_strm_start_offset, 0);
+	hantro_reg_write(vpu, &g2_write_mvs_e, 1);
+
+	/* Destination (decoded frame) buffer. */
+	dst_dma = hantro_get_dec_buf_addr(ctx, &dst_buf->vb2_buf);
+
+	hantro_write_addr(vpu, G2_RASTER_SCAN, dst_dma);
+	hantro_write_addr(vpu, G2_RASTER_SCAN_CHR, dst_dma + cr_offset);
+	hantro_write_addr(vpu, G2_ADDR_TILE_SIZE, ctx->hevc_dec.tile_sizes.dma);
+	hantro_write_addr(vpu, G2_TILE_FILTER, ctx->hevc_dec.tile_filter.dma);
+	hantro_write_addr(vpu, G2_TILE_SAO, ctx->hevc_dec.tile_sao.dma);
+	hantro_write_addr(vpu, G2_TILE_BSD, ctx->hevc_dec.tile_bsd.dma);
+}
+
+static void hantro_g2_check_idle(struct hantro_dev *vpu)
+{
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		u32 status;
+
+		/* Make sure the VPU is idle */
+		status = vdpu_read(vpu, G2_REG_INTERRUPT);
+		if (status & G2_REG_INTERRUPT_DEC_E) {
+			dev_warn(vpu->dev, "device still running, aborting");
+			status |= G2_REG_INTERRUPT_DEC_ABORT_E | G2_REG_INTERRUPT_DEC_IRQ_DIS;
+			vdpu_write(vpu, status, G2_REG_INTERRUPT);
+		}
+	}
+}
+
+int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	int ret;
+
+	hantro_g2_check_idle(vpu);
+
+	/* Prepare HEVC decoder context. */
+	ret = hantro_hevc_dec_prepare_run(ctx);
+	if (ret)
+		return ret;
+
+	/* Configure hardware registers. */
+	set_params(ctx);
+
+	/* set reference pictures */
+	ret = set_ref(ctx);
+	if (ret)
+		return ret;
+
+	set_buffers(ctx);
+	prepare_tile_info_buffer(ctx);
+
+	hantro_end_prepare_run(ctx);
+
+	hantro_reg_write(vpu, &g2_mode, HEVC_DEC_MODE);
+	hantro_reg_write(vpu, &g2_clk_gate_e, 1);
+
+	/* Don't disable output */
+	hantro_reg_write(vpu, &g2_out_dis, 0);
+
+	/* Don't compress buffers */
+	hantro_reg_write(vpu, &g2_ref_compress_bypass, 1);
+
+	/* use NV12 as output format */
+	hantro_reg_write(vpu, &g2_out_rs_e, 1);
+
+	/* Bus width and max burst */
+	hantro_reg_write(vpu, &g2_buswidth, BUS_WIDTH_128);
+	hantro_reg_write(vpu, &g2_max_burst, 16);
+
+	/* Swap */
+	hantro_reg_write(vpu, &g2_strm_swap, 0xf);
+	hantro_reg_write(vpu, &g2_dirmv_swap, 0xf);
+	hantro_reg_write(vpu, &g2_compress_swap, 0xf);
+
+	/* Start decoding! */
+	vdpu_write(vpu, G2_REG_INTERRUPT_DEC_E, G2_REG_INTERRUPT);
+
+	return 0;
+}
diff --git a/drivers/staging/media/hantro/hantro_g2_regs.h b/drivers/staging/media/hantro/hantro_g2_regs.h
new file mode 100644
index 0000000000000..bb22fa921914e
--- /dev/null
+++ b/drivers/staging/media/hantro/hantro_g2_regs.h
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, Collabora
+ *
+ * Author: Benjamin Gaignard <benjamin.gaignard@collabora.com>
+ */
+
+#ifndef HANTRO_G2_REGS_H_
+#define HANTRO_G2_REGS_H_
+
+#include "hantro.h"
+
+#define G2_SWREG(nr)	((nr) * 4)
+
+#define G2_DEC_REG(b, s, m) \
+	((const struct hantro_reg) { \
+		.base = G2_SWREG(b), \
+		.shift = s, \
+		.mask = m, \
+	})
+
+#define G2_REG_VERSION			G2_SWREG(0)
+
+#define G2_REG_INTERRUPT		G2_SWREG(1)
+#define G2_REG_INTERRUPT_DEC_RDY_INT	BIT(12)
+#define G2_REG_INTERRUPT_DEC_ABORT_E	BIT(5)
+#define G2_REG_INTERRUPT_DEC_IRQ_DIS	BIT(4)
+#define G2_REG_INTERRUPT_DEC_E		BIT(0)
+
+#define g2_strm_swap		G2_DEC_REG(2, 28, 0xf)
+#define g2_dirmv_swap		G2_DEC_REG(2, 20, 0xf)
+
+#define g2_mode			G2_DEC_REG(3, 27, 0x1f)
+#define g2_compress_swap	G2_DEC_REG(3, 20, 0xf)
+#define g2_ref_compress_bypass	G2_DEC_REG(3, 17, 0x1)
+#define g2_out_rs_e		G2_DEC_REG(3, 16, 0x1)
+#define g2_out_dis		G2_DEC_REG(3, 15, 0x1)
+#define g2_out_filtering_dis	G2_DEC_REG(3, 14, 0x1)
+#define g2_write_mvs_e		G2_DEC_REG(3, 12, 0x1)
+
+#define g2_pic_width_in_cbs	G2_DEC_REG(4, 19, 0x1fff)
+#define g2_pic_height_in_cbs	G2_DEC_REG(4, 6,  0x1fff)
+#define g2_num_ref_frames	G2_DEC_REG(4, 0,  0x1f)
+
+#define g2_scaling_list_e	G2_DEC_REG(5, 24, 0x1)
+#define g2_cb_qp_offset		G2_DEC_REG(5, 19, 0x1f)
+#define g2_cr_qp_offset		G2_DEC_REG(5, 14, 0x1f)
+#define g2_sign_data_hide	G2_DEC_REG(5, 12, 0x1)
+#define g2_tempor_mvp_e		G2_DEC_REG(5, 11, 0x1)
+#define g2_max_cu_qpd_depth	G2_DEC_REG(5, 5,  0x3f)
+#define g2_cu_qpd_e		G2_DEC_REG(5, 4,  0x1)
+
+#define g2_stream_len		G2_DEC_REG(6, 0,  0xffffffff)
+
+#define g2_cabac_init_present	G2_DEC_REG(7, 31, 0x1)
+#define g2_weight_pred_e	G2_DEC_REG(7, 28, 0x1)
+#define g2_weight_bipr_idc	G2_DEC_REG(7, 26, 0x3)
+#define g2_filter_over_slices	G2_DEC_REG(7, 25, 0x1)
+#define g2_filter_over_tiles	G2_DEC_REG(7, 24, 0x1)
+#define g2_asym_pred_e		G2_DEC_REG(7, 23, 0x1)
+#define g2_sao_e		G2_DEC_REG(7, 22, 0x1)
+#define g2_pcm_filt_d		G2_DEC_REG(7, 21, 0x1)
+#define g2_slice_chqp_present	G2_DEC_REG(7, 20, 0x1)
+#define g2_dependent_slice	G2_DEC_REG(7, 19, 0x1)
+#define g2_filter_override	G2_DEC_REG(7, 18, 0x1)
+#define g2_strong_smooth_e	G2_DEC_REG(7, 17, 0x1)
+#define g2_filt_offset_beta	G2_DEC_REG(7, 12, 0x1f)
+#define g2_filt_offset_tc	G2_DEC_REG(7, 7,  0x1f)
+#define g2_slice_hdr_ext_e	G2_DEC_REG(7, 6,  0x1)
+#define g2_slice_hdr_ext_bits	G2_DEC_REG(7, 3,  0x7)
+
+#define g2_const_intra_e	G2_DEC_REG(8, 31, 0x1)
+#define g2_filt_ctrl_pres	G2_DEC_REG(8, 30, 0x1)
+#define g2_idr_pic_e		G2_DEC_REG(8, 16, 0x1)
+#define g2_bit_depth_pcm_y	G2_DEC_REG(8, 12, 0xf)
+#define g2_bit_depth_pcm_c	G2_DEC_REG(8, 8,  0xf)
+#define g2_bit_depth_y_minus8	G2_DEC_REG(8, 6,  0x3)
+#define g2_bit_depth_c_minus8	G2_DEC_REG(8, 4,  0x3)
+#define g2_output_8_bits	G2_DEC_REG(8, 3,  0x1)
+
+#define g2_refidx1_active	G2_DEC_REG(9, 19, 0x1f)
+#define g2_refidx0_active	G2_DEC_REG(9, 14, 0x1f)
+#define g2_hdr_skip_length	G2_DEC_REG(9, 0,  0x3fff)
+
+#define g2_start_code_e		G2_DEC_REG(10, 31, 0x1)
+#define g2_init_qp		G2_DEC_REG(10, 24, 0x3f)
+#define g2_num_tile_cols	G2_DEC_REG(10, 19, 0x1f)
+#define g2_num_tile_rows	G2_DEC_REG(10, 14, 0x1f)
+#define g2_tile_e		G2_DEC_REG(10, 1,  0x1)
+#define g2_entropy_sync_e	G2_DEC_REG(10, 0,  0x1)
+
+#define g2_refer_lterm_e	G2_DEC_REG(12, 16, 0xffff)
+#define g2_min_cb_size		G2_DEC_REG(12, 13, 0x7)
+#define g2_max_cb_size		G2_DEC_REG(12, 10, 0x7)
+#define g2_min_pcm_size		G2_DEC_REG(12, 7,  0x7)
+#define g2_max_pcm_size		G2_DEC_REG(12, 4,  0x7)
+#define g2_pcm_e		G2_DEC_REG(12, 3,  0x1)
+#define g2_transform_skip	G2_DEC_REG(12, 2,  0x1)
+#define g2_transq_bypass	G2_DEC_REG(12, 1,  0x1)
+#define g2_list_mod_e		G2_DEC_REG(12, 0,  0x1)
+
+#define hevc_min_trb_size		G2_DEC_REG(13, 13, 0x7)
+#define hevc_max_trb_size		G2_DEC_REG(13, 10, 0x7)
+#define hevc_max_intra_hierdepth	G2_DEC_REG(13, 7,  0x7)
+#define hevc_max_inter_hierdepth	G2_DEC_REG(13, 4,  0x7)
+#define hevc_parallel_merge		G2_DEC_REG(13, 0,  0xf)
+
+#define hevc_rlist_f0		G2_DEC_REG(14, 0,  0x1f)
+#define hevc_rlist_f1		G2_DEC_REG(14, 10, 0x1f)
+#define hevc_rlist_f2		G2_DEC_REG(14, 20, 0x1f)
+#define hevc_rlist_b0		G2_DEC_REG(14, 5,  0x1f)
+#define hevc_rlist_b1		G2_DEC_REG(14, 15, 0x1f)
+#define hevc_rlist_b2		G2_DEC_REG(14, 25, 0x1f)
+
+#define hevc_rlist_f3		G2_DEC_REG(15, 0,  0x1f)
+#define hevc_rlist_f4		G2_DEC_REG(15, 10, 0x1f)
+#define hevc_rlist_f5		G2_DEC_REG(15, 20, 0x1f)
+#define hevc_rlist_b3		G2_DEC_REG(15, 5,  0x1f)
+#define hevc_rlist_b4		G2_DEC_REG(15, 15, 0x1f)
+#define hevc_rlist_b5		G2_DEC_REG(15, 25, 0x1f)
+
+#define hevc_rlist_f6		G2_DEC_REG(16, 0,  0x1f)
+#define hevc_rlist_f7		G2_DEC_REG(16, 10, 0x1f)
+#define hevc_rlist_f8		G2_DEC_REG(16, 20, 0x1f)
+#define hevc_rlist_b6		G2_DEC_REG(16, 5,  0x1f)
+#define hevc_rlist_b7		G2_DEC_REG(16, 15, 0x1f)
+#define hevc_rlist_b8		G2_DEC_REG(16, 25, 0x1f)
+
+#define hevc_rlist_f9		G2_DEC_REG(17, 0,  0x1f)
+#define hevc_rlist_f10		G2_DEC_REG(17, 10, 0x1f)
+#define hevc_rlist_f11		G2_DEC_REG(17, 20, 0x1f)
+#define hevc_rlist_b9		G2_DEC_REG(17, 5,  0x1f)
+#define hevc_rlist_b10		G2_DEC_REG(17, 15, 0x1f)
+#define hevc_rlist_b11		G2_DEC_REG(17, 25, 0x1f)
+
+#define hevc_rlist_f12		G2_DEC_REG(18, 0,  0x1f)
+#define hevc_rlist_f13		G2_DEC_REG(18, 10, 0x1f)
+#define hevc_rlist_f14		G2_DEC_REG(18, 20, 0x1f)
+#define hevc_rlist_b12		G2_DEC_REG(18, 5,  0x1f)
+#define hevc_rlist_b13		G2_DEC_REG(18, 15, 0x1f)
+#define hevc_rlist_b14		G2_DEC_REG(18, 25, 0x1f)
+
+#define hevc_rlist_f15		G2_DEC_REG(19, 0,  0x1f)
+#define hevc_rlist_b15		G2_DEC_REG(19, 5,  0x1f)
+
+#define g2_partial_ctb_x	G2_DEC_REG(20, 31, 0x1)
+#define g2_partial_ctb_y	G2_DEC_REG(20, 30, 0x1)
+#define g2_pic_width_4x4	G2_DEC_REG(20, 16, 0xfff)
+#define g2_pic_height_4x4	G2_DEC_REG(20, 0,  0xfff)
+#define hevc_cur_poc_00		G2_DEC_REG(46, 24, 0xff)
+#define hevc_cur_poc_01		G2_DEC_REG(46, 16, 0xff)
+#define hevc_cur_poc_02		G2_DEC_REG(46, 8,  0xff)
+#define hevc_cur_poc_03		G2_DEC_REG(46, 0,  0xff)
+
+#define hevc_cur_poc_04		G2_DEC_REG(47, 24, 0xff)
+#define hevc_cur_poc_05		G2_DEC_REG(47, 16, 0xff)
+#define hevc_cur_poc_06		G2_DEC_REG(47, 8,  0xff)
+#define hevc_cur_poc_07		G2_DEC_REG(47, 0,  0xff)
+
+#define hevc_cur_poc_08		G2_DEC_REG(48, 24, 0xff)
+#define hevc_cur_poc_09		G2_DEC_REG(48, 16, 0xff)
+#define hevc_cur_poc_10		G2_DEC_REG(48, 8,  0xff)
+#define hevc_cur_poc_11		G2_DEC_REG(48, 0,  0xff)
+
+#define hevc_cur_poc_12		G2_DEC_REG(49, 24, 0xff)
+#define hevc_cur_poc_13		G2_DEC_REG(49, 16, 0xff)
+#define hevc_cur_poc_14		G2_DEC_REG(49, 8,  0xff)
+#define hevc_cur_poc_15		G2_DEC_REG(49, 0,  0xff)
+
+#define g2_apf_threshold	G2_DEC_REG(55, 0, 0xffff)
+
+#define g2_clk_gate_e		G2_DEC_REG(58, 16, 0x1)
+#define g2_buswidth		G2_DEC_REG(58, 8,  0x7)
+#define g2_max_burst		G2_DEC_REG(58, 0,  0xff)
+
+#define G2_REG_CONFIG				G2_SWREG(58)
+#define G2_REG_CONFIG_DEC_CLK_GATE_E		BIT(16)
+#define G2_REG_CONFIG_DEC_CLK_GATE_IDLE_E	BIT(17)
+
+#define G2_ADDR_DST		(G2_SWREG(65))
+#define G2_REG_ADDR_REF(i)	(G2_SWREG(67)  + ((i) * 0x8))
+#define G2_ADDR_DST_CHR		(G2_SWREG(99))
+#define G2_REG_CHR_REF(i)	(G2_SWREG(101) + ((i) * 0x8))
+#define G2_ADDR_DST_MV		(G2_SWREG(133))
+#define G2_REG_DMV_REF(i)	(G2_SWREG(135) + ((i) * 0x8))
+#define G2_ADDR_TILE_SIZE	(G2_SWREG(167))
+#define G2_ADDR_STR		(G2_SWREG(169))
+#define HEVC_SCALING_LIST	(G2_SWREG(171))
+#define G2_RASTER_SCAN		(G2_SWREG(175))
+#define G2_RASTER_SCAN_CHR	(G2_SWREG(177))
+#define G2_TILE_FILTER		(G2_SWREG(179))
+#define G2_TILE_SAO		(G2_SWREG(181))
+#define G2_TILE_BSD		(G2_SWREG(183))
+
+#define g2_strm_buffer_len	G2_DEC_REG(258, 0, 0xffffffff)
+#define g2_strm_start_offset	G2_DEC_REG(259, 0, 0xffffffff)
+
+#endif
diff --git a/drivers/staging/media/hantro/hantro_hevc.c b/drivers/staging/media/hantro/hantro_hevc.c
new file mode 100644
index 0000000000000..5347f5a41c2ac
--- /dev/null
+++ b/drivers/staging/media/hantro/hantro_hevc.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU HEVC codec driver
+ *
+ * Copyright (C) 2020 Safran Passenger Innovations LLC
+ */
+
+#include <linux/types.h>
+#include <media/v4l2-mem2mem.h>
+
+#include "hantro.h"
+#include "hantro_hw.h"
+
+#define VERT_FILTER_RAM_SIZE 8 /* bytes per pixel row */
+/*
+ * BSD control data of current picture at tile border
+ * 128 bits per 4x4 tile = 128/(8*4) bytes per row
+ */
+#define BSD_CTRL_RAM_SIZE 4 /* bytes per pixel row */
+/* tile border coefficients of filter */
+#define VERT_SAO_RAM_SIZE 48 /* bytes per pixel */
+
+#define MAX_TILE_COLS 20
+#define MAX_TILE_ROWS 22
+
+#define UNUSED_REF	-1
+
+#define G2_ALIGN		16
+
+size_t hantro_hevc_chroma_offset(const struct v4l2_ctrl_hevc_sps *sps)
+{
+	int bytes_per_pixel = sps->bit_depth_luma_minus8 == 0 ? 1 : 2;
+
+	return sps->pic_width_in_luma_samples *
+	       sps->pic_height_in_luma_samples * bytes_per_pixel;
+}
+
+size_t hantro_hevc_motion_vectors_offset(const struct v4l2_ctrl_hevc_sps *sps)
+{
+	size_t cr_offset = hantro_hevc_chroma_offset(sps);
+
+	return ALIGN((cr_offset * 3) / 2, G2_ALIGN);
+}
+
+static size_t hantro_hevc_mv_size(const struct v4l2_ctrl_hevc_sps *sps)
+{
+	u32 min_cb_log2_size_y = sps->log2_min_luma_coding_block_size_minus3 + 3;
+	u32 ctb_log2_size_y = min_cb_log2_size_y + sps->log2_diff_max_min_luma_coding_block_size;
+	u32 pic_width_in_ctbs_y = (sps->pic_width_in_luma_samples + (1 << ctb_log2_size_y) - 1)
+				  >> ctb_log2_size_y;
+	u32 pic_height_in_ctbs_y = (sps->pic_height_in_luma_samples + (1 << ctb_log2_size_y) - 1)
+				   >> ctb_log2_size_y;
+	size_t mv_size;
+
+	mv_size = pic_width_in_ctbs_y * pic_height_in_ctbs_y *
+		  (1 << (2 * (ctb_log2_size_y - 4))) * 16;
+
+	vpu_debug(4, "%dx%d (CTBs) %zu MV bytes\n",
+		  pic_width_in_ctbs_y, pic_height_in_ctbs_y, mv_size);
+
+	return mv_size;
+}
+
+static size_t hantro_hevc_ref_size(struct hantro_ctx *ctx)
+{
+	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+
+	return hantro_hevc_motion_vectors_offset(sps) + hantro_hevc_mv_size(sps);
+}
+
+static void hantro_hevc_ref_free(struct hantro_ctx *ctx)
+{
+	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+	struct hantro_dev *vpu = ctx->dev;
+	int i;
+
+	for (i = 0;  i < NUM_REF_PICTURES; i++) {
+		if (hevc_dec->ref_bufs[i].cpu)
+			dma_free_coherent(vpu->dev, hevc_dec->ref_bufs[i].size,
+					  hevc_dec->ref_bufs[i].cpu,
+					  hevc_dec->ref_bufs[i].dma);
+	}
+}
+
+static void hantro_hevc_ref_init(struct hantro_ctx *ctx)
+{
+	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+	int i;
+
+	for (i = 0;  i < NUM_REF_PICTURES; i++)
+		hevc_dec->ref_bufs_poc[i] = UNUSED_REF;
+}
+
+dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx,
+				   int poc)
+{
+	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+	int i;
+
+	/* Find the reference buffer in already know ones */
+	for (i = 0;  i < NUM_REF_PICTURES; i++) {
+		if (hevc_dec->ref_bufs_poc[i] == poc) {
+			hevc_dec->ref_bufs_used |= 1 << i;
+			return hevc_dec->ref_bufs[i].dma;
+		}
+	}
+
+	/* Allocate a new reference buffer */
+	for (i = 0; i < NUM_REF_PICTURES; i++) {
+		if (hevc_dec->ref_bufs_poc[i] == UNUSED_REF) {
+			if (!hevc_dec->ref_bufs[i].cpu) {
+				struct hantro_dev *vpu = ctx->dev;
+
+				/*
+				 * Allocate the space needed for the raw data +
+				 * motion vector data. Optimizations could be to
+				 * allocate raw data in non coherent memory and only
+				 * clear the motion vector data.
+				 */
+				hevc_dec->ref_bufs[i].cpu =
+					dma_alloc_coherent(vpu->dev,
+							   hantro_hevc_ref_size(ctx),
+							   &hevc_dec->ref_bufs[i].dma,
+							   GFP_KERNEL);
+				if (!hevc_dec->ref_bufs[i].cpu)
+					return 0;
+
+				hevc_dec->ref_bufs[i].size = hantro_hevc_ref_size(ctx);
+			}
+			hevc_dec->ref_bufs_used |= 1 << i;
+			memset(hevc_dec->ref_bufs[i].cpu, 0, hantro_hevc_ref_size(ctx));
+			hevc_dec->ref_bufs_poc[i] = poc;
+
+			return hevc_dec->ref_bufs[i].dma;
+		}
+	}
+
+	return 0;
+}
+
+void hantro_hevc_ref_remove_unused(struct hantro_ctx *ctx)
+{
+	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+	int i;
+
+	/* Just tag buffer as unused, do not free them */
+	for (i = 0;  i < NUM_REF_PICTURES; i++) {
+		if (hevc_dec->ref_bufs_poc[i] == UNUSED_REF)
+			continue;
+
+		if (hevc_dec->ref_bufs_used & (1 << i))
+			continue;
+
+		hevc_dec->ref_bufs_poc[i] = UNUSED_REF;
+	}
+}
+
+static int tile_buffer_reallocate(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+	const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+	const struct v4l2_ctrl_hevc_pps *pps = ctrls->pps;
+	const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+	unsigned int num_tile_cols = pps->num_tile_columns_minus1 + 1;
+	unsigned int height64 = (sps->pic_height_in_luma_samples + 63) & ~63;
+	unsigned int size;
+
+	if (num_tile_cols <= 1 ||
+	    num_tile_cols <= hevc_dec->num_tile_cols_allocated)
+		return 0;
+
+	/* Need to reallocate due to tiles passed via PPS */
+	if (hevc_dec->tile_filter.cpu) {
+		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
+				  hevc_dec->tile_filter.cpu,
+				  hevc_dec->tile_filter.dma);
+		hevc_dec->tile_filter.cpu = NULL;
+	}
+
+	if (hevc_dec->tile_sao.cpu) {
+		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
+				  hevc_dec->tile_sao.cpu,
+				  hevc_dec->tile_sao.dma);
+		hevc_dec->tile_sao.cpu = NULL;
+	}
+
+	if (hevc_dec->tile_bsd.cpu) {
+		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
+				  hevc_dec->tile_bsd.cpu,
+				  hevc_dec->tile_bsd.dma);
+		hevc_dec->tile_bsd.cpu = NULL;
+	}
+
+	size = VERT_FILTER_RAM_SIZE * height64 * (num_tile_cols - 1);
+	hevc_dec->tile_filter.cpu = dma_alloc_coherent(vpu->dev, size,
+						       &hevc_dec->tile_filter.dma,
+						       GFP_KERNEL);
+	if (!hevc_dec->tile_filter.cpu)
+		goto err_free_tile_buffers;
+	hevc_dec->tile_filter.size = size;
+
+	size = VERT_SAO_RAM_SIZE * height64 * (num_tile_cols - 1);
+	hevc_dec->tile_sao.cpu = dma_alloc_coherent(vpu->dev, size,
+						    &hevc_dec->tile_sao.dma,
+						    GFP_KERNEL);
+	if (!hevc_dec->tile_sao.cpu)
+		goto err_free_tile_buffers;
+	hevc_dec->tile_sao.size = size;
+
+	size = BSD_CTRL_RAM_SIZE * height64 * (num_tile_cols - 1);
+	hevc_dec->tile_bsd.cpu = dma_alloc_coherent(vpu->dev, size,
+						    &hevc_dec->tile_bsd.dma,
+						    GFP_KERNEL);
+	if (!hevc_dec->tile_bsd.cpu)
+		goto err_free_tile_buffers;
+	hevc_dec->tile_bsd.size = size;
+
+	hevc_dec->num_tile_cols_allocated = num_tile_cols;
+
+	return 0;
+
+err_free_tile_buffers:
+	if (hevc_dec->tile_filter.cpu)
+		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
+				  hevc_dec->tile_filter.cpu,
+				  hevc_dec->tile_filter.dma);
+	hevc_dec->tile_filter.cpu = NULL;
+
+	if (hevc_dec->tile_sao.cpu)
+		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
+				  hevc_dec->tile_sao.cpu,
+				  hevc_dec->tile_sao.dma);
+	hevc_dec->tile_sao.cpu = NULL;
+
+	if (hevc_dec->tile_bsd.cpu)
+		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
+				  hevc_dec->tile_bsd.cpu,
+				  hevc_dec->tile_bsd.dma);
+	hevc_dec->tile_bsd.cpu = NULL;
+
+	return -ENOMEM;
+}
+
+int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx)
+{
+	struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec;
+	struct hantro_hevc_dec_ctrls *ctrls = &hevc_ctx->ctrls;
+	int ret;
+
+	hantro_start_prepare_run(ctx);
+
+	ctrls->decode_params =
+		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS);
+	if (WARN_ON(!ctrls->decode_params))
+		return -EINVAL;
+
+	ctrls->sps =
+		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_SPS);
+	if (WARN_ON(!ctrls->sps))
+		return -EINVAL;
+
+	ctrls->pps =
+		hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_PPS);
+	if (WARN_ON(!ctrls->pps))
+		return -EINVAL;
+
+	ret = tile_buffer_reallocate(ctx);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+void hantro_hevc_dec_exit(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+
+	if (hevc_dec->tile_sizes.cpu)
+		dma_free_coherent(vpu->dev, hevc_dec->tile_sizes.size,
+				  hevc_dec->tile_sizes.cpu,
+				  hevc_dec->tile_sizes.dma);
+	hevc_dec->tile_sizes.cpu = NULL;
+
+	if (hevc_dec->tile_filter.cpu)
+		dma_free_coherent(vpu->dev, hevc_dec->tile_filter.size,
+				  hevc_dec->tile_filter.cpu,
+				  hevc_dec->tile_filter.dma);
+	hevc_dec->tile_filter.cpu = NULL;
+
+	if (hevc_dec->tile_sao.cpu)
+		dma_free_coherent(vpu->dev, hevc_dec->tile_sao.size,
+				  hevc_dec->tile_sao.cpu,
+				  hevc_dec->tile_sao.dma);
+	hevc_dec->tile_sao.cpu = NULL;
+
+	if (hevc_dec->tile_bsd.cpu)
+		dma_free_coherent(vpu->dev, hevc_dec->tile_bsd.size,
+				  hevc_dec->tile_bsd.cpu,
+				  hevc_dec->tile_bsd.dma);
+	hevc_dec->tile_bsd.cpu = NULL;
+
+	hantro_hevc_ref_free(ctx);
+}
+
+int hantro_hevc_dec_init(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+	struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
+	unsigned int size;
+
+	memset(hevc_dec, 0, sizeof(*hevc_dec));
+
+	/*
+	 * Maximum number of tiles times width and height (2 bytes each),
+	 * rounding up to next 16 bytes boundary + one extra 16 byte
+	 * chunk (HW guys wanted to have this).
+	 */
+	size = round_up(MAX_TILE_COLS * MAX_TILE_ROWS * 4 * sizeof(u16) + 16, 16);
+	hevc_dec->tile_sizes.cpu = dma_alloc_coherent(vpu->dev, size,
+						      &hevc_dec->tile_sizes.dma,
+						      GFP_KERNEL);
+	if (!hevc_dec->tile_sizes.cpu)
+		return -ENOMEM;
+
+	hevc_dec->tile_sizes.size = size;
+
+	hantro_hevc_ref_init(ctx);
+
+	return 0;
+}
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 4b73c8011b255..a4aef5fa03bac 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -20,6 +20,8 @@
 #define MB_WIDTH(w)		DIV_ROUND_UP(w, MB_DIM)
 #define MB_HEIGHT(h)		DIV_ROUND_UP(h, MB_DIM)
 
+#define NUM_REF_PICTURES	(V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1)
+
 struct hantro_dev;
 struct hantro_ctx;
 struct hantro_buf;
@@ -95,6 +97,46 @@ struct hantro_h264_dec_hw_ctx {
 	struct hantro_h264_dec_ctrls ctrls;
 };
 
+/**
+ * struct hantro_hevc_dec_ctrls
+ * @decode_params: Decode params
+ * @sps:	SPS info
+ * @pps:	PPS info
+ * @hevc_hdr_skip_length: the number of data (in bits) to skip in the
+ *			  slice segment header syntax after 'slice type'
+ *			  token
+ */
+struct hantro_hevc_dec_ctrls {
+	const struct v4l2_ctrl_hevc_decode_params *decode_params;
+	const struct v4l2_ctrl_hevc_sps *sps;
+	const struct v4l2_ctrl_hevc_pps *pps;
+	u32 hevc_hdr_skip_length;
+};
+
+/**
+ * struct hantro_hevc_dec_hw_ctx
+ * @tile_sizes:		Tile sizes buffer
+ * @tile_filter:	Tile vertical filter buffer
+ * @tile_sao:		Tile SAO buffer
+ * @tile_bsd:		Tile BSD control buffer
+ * @ref_bufs:		Internal reference buffers
+ * @ref_bufs_poc:	Internal reference buffers picture order count
+ * @ref_bufs_used:	Bitfield of used reference buffers
+ * @ctrls:		V4L2 controls attached to a run
+ * @num_tile_cols_allocated: number of allocated tiles
+ */
+struct hantro_hevc_dec_hw_ctx {
+	struct hantro_aux_buf tile_sizes;
+	struct hantro_aux_buf tile_filter;
+	struct hantro_aux_buf tile_sao;
+	struct hantro_aux_buf tile_bsd;
+	struct hantro_aux_buf ref_bufs[NUM_REF_PICTURES];
+	int ref_bufs_poc[NUM_REF_PICTURES];
+	u32 ref_bufs_used;
+	struct hantro_hevc_dec_ctrls ctrls;
+	unsigned int num_tile_cols_allocated;
+};
+
 /**
  * struct hantro_mpeg2_dec_hw_ctx
  *
@@ -194,6 +236,15 @@ int hantro_g1_h264_dec_run(struct hantro_ctx *ctx);
 int hantro_h264_dec_init(struct hantro_ctx *ctx);
 void hantro_h264_dec_exit(struct hantro_ctx *ctx);
 
+int hantro_hevc_dec_init(struct hantro_ctx *ctx);
+void hantro_hevc_dec_exit(struct hantro_ctx *ctx);
+int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx);
+int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx);
+dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, int poc);
+void hantro_hevc_ref_remove_unused(struct hantro_ctx *ctx);
+size_t hantro_hevc_chroma_offset(const struct v4l2_ctrl_hevc_sps *sps);
+size_t hantro_hevc_motion_vectors_offset(const struct v4l2_ctrl_hevc_sps *sps);
+
 static inline size_t
 hantro_h264_mv_size(unsigned int width, unsigned int height)
 {
-- 
GitLab


From 45040f675041956ad763f9ef139ecee3647aa8bb Mon Sep 17 00:00:00 2001
From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Date: Thu, 3 Jun 2021 13:50:04 +0200
Subject: [PATCH 2462/3804] media: hantro: IMX8M: add variant for G2/HEVC codec

Add variant to IMX8M to enable G2/HEVC codec.
Define the capabilities for the hardware up to 3840x2160.
G2 doesn't have a postprocessor, uses the same clocks and has it
own interrupt.

Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_drv.c   |  1 +
 drivers/staging/media/hantro/hantro_hw.h    |  1 +
 drivers/staging/media/hantro/imx8m_vpu_hw.c | 96 ++++++++++++++++++++-
 3 files changed, 96 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index d448cdff59eac..dbc69ee0b562b 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -588,6 +588,7 @@ static const struct of_device_id of_hantro_match[] = {
 #endif
 #ifdef CONFIG_VIDEO_HANTRO_IMX8M
 	{ .compatible = "nxp,imx8mq-vpu", .data = &imx8mq_vpu_variant, },
+	{ .compatible = "nxp,imx8mq-vpu-g2", .data = &imx8mq_vpu_g2_variant },
 #endif
 #ifdef CONFIG_VIDEO_HANTRO_SAMA5D4
 	{ .compatible = "microchip,sama5d4-vdec", .data = &sama5d4_vdec_variant, },
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index a4aef5fa03bac..5737a7707944a 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -208,6 +208,7 @@ extern const struct hantro_variant rk3328_vpu_variant;
 extern const struct hantro_variant rk3288_vpu_variant;
 extern const struct hantro_variant imx8mq_vpu_variant;
 extern const struct hantro_variant sama5d4_vdec_variant;
+extern const struct hantro_variant imx8mq_vpu_g2_variant;
 
 extern const struct hantro_postproc_regs hantro_g1_postproc_regs;
 
diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c
index 9eb556460e522..ea919bfb9891a 100644
--- a/drivers/staging/media/hantro/imx8m_vpu_hw.c
+++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c
@@ -9,6 +9,9 @@
 #include <linux/delay.h>
 
 #include "hantro.h"
+#include "hantro_jpeg.h"
+#include "hantro_g1_regs.h"
+#include "hantro_g2_regs.h"
 
 #define CTRL_SOFT_RESET		0x00
 #define RESET_G1		BIT(1)
@@ -128,6 +131,62 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = {
 	},
 };
 
+static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.codec_mode = HANTRO_MODE_NONE,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_HEVC_SLICE,
+		.codec_mode = HANTRO_MODE_HEVC_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 3840,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 2160,
+			.step_height = MB_DIM,
+		},
+	},
+};
+
+static irqreturn_t imx8m_vpu_g1_irq(int irq, void *dev_id)
+{
+	struct hantro_dev *vpu = dev_id;
+	enum vb2_buffer_state state;
+	u32 status;
+
+	status = vdpu_read(vpu, G1_REG_INTERRUPT);
+	state = (status & G1_REG_INTERRUPT_DEC_RDY_INT) ?
+		 VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+	vdpu_write(vpu, 0, G1_REG_INTERRUPT);
+	vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
+
+	hantro_irq_done(vpu, state);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t imx8m_vpu_g2_irq(int irq, void *dev_id)
+{
+	struct hantro_dev *vpu = dev_id;
+	enum vb2_buffer_state state;
+	u32 status;
+
+	status = vdpu_read(vpu, G2_REG_INTERRUPT);
+	state = (status & G2_REG_INTERRUPT_DEC_RDY_INT) ?
+		 VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+	vdpu_write(vpu, 0, G2_REG_INTERRUPT);
+	vdpu_write(vpu, G2_REG_CONFIG_DEC_CLK_GATE_E, G2_REG_CONFIG);
+
+	hantro_irq_done(vpu, state);
+
+	return IRQ_HANDLED;
+}
+
 static int imx8mq_vpu_hw_init(struct hantro_dev *vpu)
 {
 	vpu->ctrl_base = vpu->reg_bases[vpu->variant->num_regs - 1];
@@ -142,6 +201,13 @@ static void imx8m_vpu_g1_reset(struct hantro_ctx *ctx)
 	imx8m_soft_reset(vpu, RESET_G1);
 }
 
+static void imx8m_vpu_g2_reset(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	imx8m_soft_reset(vpu, RESET_G2);
+}
+
 /*
  * Supported codec ops.
  */
@@ -167,13 +233,25 @@ static const struct hantro_codec_ops imx8mq_vpu_codec_ops[] = {
 	},
 };
 
+static const struct hantro_codec_ops imx8mq_vpu_g2_codec_ops[] = {
+	[HANTRO_MODE_HEVC_DEC] = {
+		.run = hantro_g2_hevc_dec_run,
+		.reset = imx8m_vpu_g2_reset,
+		.init = hantro_hevc_dec_init,
+		.exit = hantro_hevc_dec_exit,
+	},
+};
+
 /*
  * VPU variants.
  */
 
 static const struct hantro_irq imx8mq_irqs[] = {
-	{ "g1", hantro_g1_irq },
-	{ "g2", NULL /* TODO: imx8m_vpu_g2_irq */ },
+	{ "g1", imx8m_vpu_g1_irq },
+};
+
+static const struct hantro_irq imx8mq_g2_irqs[] = {
+	{ "g2", imx8m_vpu_g2_irq },
 };
 
 static const char * const imx8mq_clk_names[] = { "g1", "g2", "bus" };
@@ -197,3 +275,17 @@ const struct hantro_variant imx8mq_vpu_variant = {
 	.reg_names = imx8mq_reg_names,
 	.num_regs = ARRAY_SIZE(imx8mq_reg_names)
 };
+
+const struct hantro_variant imx8mq_vpu_g2_variant = {
+	.dec_offset = 0x0,
+	.dec_fmts = imx8m_vpu_g2_dec_fmts,
+	.num_dec_fmts = ARRAY_SIZE(imx8m_vpu_g2_dec_fmts),
+	.codec = HANTRO_HEVC_DECODER,
+	.codec_ops = imx8mq_vpu_g2_codec_ops,
+	.init = imx8mq_vpu_hw_init,
+	.runtime_resume = imx8mq_runtime_resume,
+	.irqs = imx8mq_g2_irqs,
+	.num_irqs = ARRAY_SIZE(imx8mq_g2_irqs),
+	.clk_names = imx8mq_clk_names,
+	.num_clocks = ARRAY_SIZE(imx8mq_clk_names),
+};
-- 
GitLab


From 8d651ee9c71bb12fc0c8eb2786b66cbe5aa3e43b Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Tue, 8 Jun 2021 11:54:33 +0200
Subject: [PATCH 2463/3804] x86/ioremap: Map EFI-reserved memory as encrypted
 for SEV

Some drivers require memory that is marked as EFI boot services
data. In order for this memory to not be re-used by the kernel
after ExitBootServices(), efi_mem_reserve() is used to preserve it
by inserting a new EFI memory descriptor and marking it with the
EFI_MEMORY_RUNTIME attribute.

Under SEV, memory marked with the EFI_MEMORY_RUNTIME attribute needs to
be mapped encrypted by Linux, otherwise the kernel might crash at boot
like below:

  EFI Variables Facility v0.08 2004-May-17
  general protection fault, probably for non-canonical address 0x3597688770a868b2: 0000 [#1] SMP NOPTI
  CPU: 13 PID: 1 Comm: swapper/0 Not tainted 5.12.4-2-default #1 openSUSE Tumbleweed
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  RIP: 0010:efi_mokvar_entry_next
  [...]
  Call Trace:
   efi_mokvar_sysfs_init
   ? efi_mokvar_table_init
   do_one_initcall
   ? __kmalloc
   kernel_init_freeable
   ? rest_init
   kernel_init
   ret_from_fork

Expand the __ioremap_check_other() function to additionally check for
this other type of boot data reserved at runtime and indicate that it
should be mapped encrypted for an SEV guest.

 [ bp: Massage commit message. ]

Fixes: 58c909022a5a ("efi: Support for MOK variable config table")
Reported-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Joerg Roedel <jroedel@suse.de>
Cc: <stable@vger.kernel.org> # 5.10+
Link: https://lkml.kernel.org/r/20210608095439.12668-2-joro@8bytes.org
---
 arch/x86/mm/ioremap.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 12c686c65ea99..60ade7dd71bd9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -118,7 +118,9 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des
 	if (!IS_ENABLED(CONFIG_EFI))
 		return;
 
-	if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA)
+	if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA ||
+	    (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA &&
+	     efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME))
 		desc->flags |= IORES_MAP_ENCRYPTED;
 }
 
-- 
GitLab


From 8a11e84b8056c9daa0ea9d6dbb4d75382fb4a8e0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 8 Jun 2021 09:55:12 +0100
Subject: [PATCH 2464/3804] drm/vc4: fix vc4_atomic_commit_tail() logic

In vc4_atomic_commit_tail() we iterate of the set of old CRTCs, and
attempt to wait on any channels which are still in use. When we iterate
over the CRTCs, we have:

* `i` - the index of the CRTC
* `channel` - the channel a CRTC is using

When we check the channel state, we consult:

  old_hvs_state->fifo_state[channel].in_use

... but when we wait for the channel, we erroneously wait on:

  old_hvs_state->fifo_state[i].pending_commit

... rather than:

   old_hvs_state->fifo_state[channel].pending_commit

... and this bogus access has been observed to result in boot-time hangs
on some arm64 configurations, and can be detected using KASAN. FIx this
by using the correct index.

I've tested this on a Raspberry Pi 3 model B v1.2 with KASAN.

Trimmed KASAN splat:

| ==================================================================
| BUG: KASAN: slab-out-of-bounds in vc4_atomic_commit_tail+0x1cc/0x910
| Read of size 8 at addr ffff000007360440 by task kworker/u8:0/7
| CPU: 2 PID: 7 Comm: kworker/u8:0 Not tainted 5.13.0-rc3-00009-g694c523e7267 #3
|
| Hardware name: Raspberry Pi 3 Model B (DT)
| Workqueue: events_unbound deferred_probe_work_func
| Call trace:
|  dump_backtrace+0x0/0x2b4
|  show_stack+0x1c/0x30
|  dump_stack+0xfc/0x168
|  print_address_description.constprop.0+0x2c/0x2c0
|  kasan_report+0x1dc/0x240
|  __asan_load8+0x98/0xd4
|  vc4_atomic_commit_tail+0x1cc/0x910
|  commit_tail+0x100/0x210
| ...
|
| Allocated by task 7:
|  kasan_save_stack+0x2c/0x60
|  __kasan_kmalloc+0x90/0xb4
|  vc4_hvs_channels_duplicate_state+0x60/0x1a0
|  drm_atomic_get_private_obj_state+0x144/0x230
|  vc4_atomic_check+0x40/0x73c
|  drm_atomic_check_only+0x998/0xe60
|  drm_atomic_commit+0x34/0x94
|  drm_client_modeset_commit_atomic+0x2f4/0x3a0
|  drm_client_modeset_commit_locked+0x8c/0x230
|  drm_client_modeset_commit+0x38/0x60
|  drm_fb_helper_set_par+0x104/0x17c
|  fbcon_init+0x43c/0x970
|  visual_init+0x14c/0x1e4
| ...
|
| The buggy address belongs to the object at ffff000007360400
|  which belongs to the cache kmalloc-128 of size 128
| The buggy address is located 64 bytes inside of
|  128-byte region [ffff000007360400, ffff000007360480)
| The buggy address belongs to the page:
| page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x7360
| flags: 0x3fffc0000000200(slab|node=0|zone=0|lastcpupid=0xffff)
| raw: 03fffc0000000200 dead000000000100 dead000000000122 ffff000004c02300
| raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000
| page dumped because: kasan: bad access detected
|
| Memory state around the buggy address:
|  ffff000007360300: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
|  ffff000007360380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
| >ffff000007360400: 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc
|                                            ^
|  ffff000007360480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
|  ffff000007360500: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
| ==================================================================

Link: https://lore.kernel.org/r/4d0c8318-bad8-2be7-e292-fc8f70c198de@samsung.com
Link: https://lore.kernel.org/linux-arm-kernel/20210607151740.moncryl5zv3ahq4s@gilmour
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Emma Anholt <emma@anholt.net>
Cc: Maxime Ripard <maxime@cerno.tech>
Cc: Will Deacon <will@kernel.org>
Cc: dri-devel@lists.freedesktop.org
Acked-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20210608085513.2069-1-mark.rutland@arm.com
---
 drivers/gpu/drm/vc4/vc4_kms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index bb5529a7a9c28..948b3a58aad16 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -372,7 +372,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state)
 		if (!old_hvs_state->fifo_state[channel].in_use)
 			continue;
 
-		ret = drm_crtc_commit_wait(old_hvs_state->fifo_state[i].pending_commit);
+		ret = drm_crtc_commit_wait(old_hvs_state->fifo_state[channel].pending_commit);
 		if (ret)
 			drm_err(dev, "Timed out waiting for commit\n");
 	}
-- 
GitLab


From b436acd1cf7fac0ba987abd22955d98025c80c2b Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Tue, 8 Jun 2021 19:04:36 +0800
Subject: [PATCH 2465/3804] drm: Fix use-after-free read in drm_getunique()

There is a time-of-check-to-time-of-use error in drm_getunique() due
to retrieving file_priv->master prior to locking the device's master
mutex.

An example can be seen in the crash report of the use-after-free error
found by Syzbot:
https://syzkaller.appspot.com/bug?id=148d2f1dfac64af52ffd27b661981a540724f803

In the report, the master pointer was used after being freed. This is
because another process had acquired the device's master mutex in
drm_setmaster_ioctl(), then overwrote fpriv->master in
drm_new_set_master(). The old value of fpriv->master was subsequently
freed before the mutex was unlocked.

To fix this, we lock the device's master mutex before retrieving the
pointer from from fpriv->master. This patch passes the Syzbot
reproducer test.

Reported-by: syzbot+c3a706cec1ea99e1c693@syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210608110436.239583-1-desmondcheongzx@gmail.com
---
 drivers/gpu/drm/drm_ioctl.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index d273d1a8603a9..495a4767a4430 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -118,17 +118,18 @@ int drm_getunique(struct drm_device *dev, void *data,
 		  struct drm_file *file_priv)
 {
 	struct drm_unique *u = data;
-	struct drm_master *master = file_priv->master;
+	struct drm_master *master;
 
-	mutex_lock(&master->dev->master_mutex);
+	mutex_lock(&dev->master_mutex);
+	master = file_priv->master;
 	if (u->unique_len >= master->unique_len) {
 		if (copy_to_user(u->unique, master->unique, master->unique_len)) {
-			mutex_unlock(&master->dev->master_mutex);
+			mutex_unlock(&dev->master_mutex);
 			return -EFAULT;
 		}
 	}
 	u->unique_len = master->unique_len;
-	mutex_unlock(&master->dev->master_mutex);
+	mutex_unlock(&dev->master_mutex);
 
 	return 0;
 }
-- 
GitLab


From 4f13d471e5d11034d56161af56d0f9396bc0b384 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra@amd.com>
Date: Mon, 7 Jun 2021 06:15:32 +0000
Subject: [PATCH 2466/3804] KVM: SVM: Fix SEV SEND_START session length &
 SEND_UPDATE_DATA query length after commit 238eca821cee

Commit 238eca821cee ("KVM: SVM: Allocate SEV command structures on local stack")
uses the local stack to allocate the structures used to communicate with the PSP,
which were earlier being kzalloced. This breaks SEV live migration for
computing the SEND_START session length and SEND_UPDATE_DATA query length as
session_len and trans_len and hdr_len fields are not zeroed respectively for
the above commands before issuing the SEV Firmware API call, hence the
firmware returns incorrect session length and update data header or trans length.

Also the SEV Firmware API returns SEV_RET_INVALID_LEN firmware error
for these length query API calls, and the return value and the
firmware error needs to be passed to the userspace as it is, so
need to remove the return check in the KVM code.

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Message-Id: <20210607061532.27459-1-Ashish.Kalra@amd.com>
Fixes: 238eca821cee ("KVM: SVM: Allocate SEV command structures on local stack")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/sev.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 5bc887e9a9860..e0ce5da97fc2f 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -1103,10 +1103,9 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
 	struct sev_data_send_start data;
 	int ret;
 
+	memset(&data, 0, sizeof(data));
 	data.handle = sev->handle;
 	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
-	if (ret < 0)
-		return ret;
 
 	params->session_len = data.session_len;
 	if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
@@ -1215,10 +1214,9 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
 	struct sev_data_send_update_data data;
 	int ret;
 
+	memset(&data, 0, sizeof(data));
 	data.handle = sev->handle;
 	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
-	if (ret < 0)
-		return ret;
 
 	params->hdr_len = data.hdr_len;
 	params->trans_len = data.trans_len;
-- 
GitLab


From e898da784aed0ea65f7672d941c01dc9b79e6299 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Mon, 7 Jun 2021 00:19:43 -0700
Subject: [PATCH 2467/3804] KVM: LAPIC: Write 0 to TMICT should also cancel
 vmx-preemption timer

According to the SDM 10.5.4.1:

  A write of 0 to the initial-count register effectively stops the local
  APIC timer, in both one-shot and periodic mode.

However, the lapic timer oneshot/periodic mode which is emulated by vmx-preemption
timer doesn't stop by writing 0 to TMICT since vmx->hv_deadline_tsc is still
programmed and the guest will receive the spurious timer interrupt later. This
patch fixes it by also cancelling the vmx-preemption timer when writing 0 to
the initial-count register.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1623050385-100988-1-git-send-email-wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 8120e8614b92d..6d72d8f433107 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1494,6 +1494,15 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
 
 static void cancel_hv_timer(struct kvm_lapic *apic);
 
+static void cancel_apic_timer(struct kvm_lapic *apic)
+{
+	hrtimer_cancel(&apic->lapic_timer.timer);
+	preempt_disable();
+	if (apic->lapic_timer.hv_timer_in_use)
+		cancel_hv_timer(apic);
+	preempt_enable();
+}
+
 static void apic_update_lvtt(struct kvm_lapic *apic)
 {
 	u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
@@ -1502,11 +1511,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
 	if (apic->lapic_timer.timer_mode != timer_mode) {
 		if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
 				APIC_LVT_TIMER_TSCDEADLINE)) {
-			hrtimer_cancel(&apic->lapic_timer.timer);
-			preempt_disable();
-			if (apic->lapic_timer.hv_timer_in_use)
-				cancel_hv_timer(apic);
-			preempt_enable();
+			cancel_apic_timer(apic);
 			kvm_lapic_set_reg(apic, APIC_TMICT, 0);
 			apic->lapic_timer.period = 0;
 			apic->lapic_timer.tscdeadline = 0;
@@ -2092,7 +2097,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 		if (apic_lvtt_tscdeadline(apic))
 			break;
 
-		hrtimer_cancel(&apic->lapic_timer.timer);
+		cancel_apic_timer(apic);
 		kvm_lapic_set_reg(apic, APIC_TMICT, val);
 		start_apic_timer(apic);
 		break;
-- 
GitLab


From b1bd5cba3306691c771d558e94baa73e8b0b96b7 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@linux.alibaba.com>
Date: Thu, 3 Jun 2021 13:24:55 +0800
Subject: [PATCH 2468/3804] KVM: X86: MMU: Use the correct inherited
 permissions to get shadow page

When computing the access permissions of a shadow page, use the effective
permissions of the walk up to that point, i.e. the logic AND of its parents'
permissions.  Two guest PxE entries that point at the same table gfn need to
be shadowed with different shadow pages if their parents' permissions are
different.  KVM currently uses the effective permissions of the last
non-leaf entry for all non-leaf entries.  Because all non-leaf SPTEs have
full ("uwx") permissions, and the effective permissions are recorded only
in role.access and merged into the leaves, this can lead to incorrect
reuse of a shadow page and eventually to a missing guest protection page
fault.

For example, here is a shared pagetable:

   pgd[]   pud[]        pmd[]            virtual address pointers
                     /->pmd1(u--)->pte1(uw-)->page1 <- ptr1 (u--)
        /->pud1(uw-)--->pmd2(uw-)->pte2(uw-)->page2 <- ptr2 (uw-)
   pgd-|           (shared pmd[] as above)
        \->pud2(u--)--->pmd1(u--)->pte1(uw-)->page1 <- ptr3 (u--)
                     \->pmd2(uw-)->pte2(uw-)->page2 <- ptr4 (u--)

  pud1 and pud2 point to the same pmd table, so:
  - ptr1 and ptr3 points to the same page.
  - ptr2 and ptr4 points to the same page.

(pud1 and pud2 here are pud entries, while pmd1 and pmd2 here are pmd entries)

- First, the guest reads from ptr1 first and KVM prepares a shadow
  page table with role.access=u--, from ptr1's pud1 and ptr1's pmd1.
  "u--" comes from the effective permissions of pgd, pud1 and
  pmd1, which are stored in pt->access.  "u--" is used also to get
  the pagetable for pud1, instead of "uw-".

- Then the guest writes to ptr2 and KVM reuses pud1 which is present.
  The hypervisor set up a shadow page for ptr2 with pt->access is "uw-"
  even though the pud1 pmd (because of the incorrect argument to
  kvm_mmu_get_page in the previous step) has role.access="u--".

- Then the guest reads from ptr3.  The hypervisor reuses pud1's
  shadow pmd for pud2, because both use "u--" for their permissions.
  Thus, the shadow pmd already includes entries for both pmd1 and pmd2.

- At last, the guest writes to ptr4.  This causes no vmexit or pagefault,
  because pud1's shadow page structures included an "uw-" page even though
  its role.access was "u--".

Any kind of shared pagetable might have the similar problem when in
virtual machine without TDP enabled if the permissions are different
from different ancestors.

In order to fix the problem, we change pt->access to be an array, and
any access in it will not include permissions ANDed from child ptes.

The test code is: https://lore.kernel.org/kvm/20210603050537.19605-1-jiangshanlai@gmail.com/
Remember to test it with TDP disabled.

The problem had existed long before the commit 41074d07c78b ("KVM: MMU:
Fix inherited permissions for emulated guest pte updates"), and it
is hard to find which is the culprit.  So there is no fixes tag here.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
Message-Id: <20210603052455.21023-1-jiangshanlai@gmail.com>
Cc: stable@vger.kernel.org
Fixes: cea0f0e7ea54 ("[PATCH] KVM: MMU: Shadow page table caching")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/mmu.rst |  4 ++--
 arch/x86/kvm/mmu/paging_tmpl.h | 14 +++++++++-----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst
index 5bfe28b0728e8..20d85daed395e 100644
--- a/Documentation/virt/kvm/mmu.rst
+++ b/Documentation/virt/kvm/mmu.rst
@@ -171,8 +171,8 @@ Shadow pages contain the following information:
     shadow pages) so role.quadrant takes values in the range 0..3.  Each
     quadrant maps 1GB virtual address space.
   role.access:
-    Inherited guest access permissions in the form uwx.  Note execute
-    permission is positive, not negative.
+    Inherited guest access permissions from the parent ptes in the form uwx.
+    Note execute permission is positive, not negative.
   role.invalid:
     The page is invalid and should not be used.  It is a root page that is
     currently pinned (by a cpu hardware register pointing to it); once it is
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 70b7e44e30352..823a5919f9fa0 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -90,8 +90,8 @@ struct guest_walker {
 	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
 	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
 	bool pte_writable[PT_MAX_FULL_LEVELS];
-	unsigned pt_access;
-	unsigned pte_access;
+	unsigned int pt_access[PT_MAX_FULL_LEVELS];
+	unsigned int pte_access;
 	gfn_t gfn;
 	struct x86_exception fault;
 };
@@ -418,13 +418,15 @@ retry_walk:
 		}
 
 		walker->ptes[walker->level - 1] = pte;
+
+		/* Convert to ACC_*_MASK flags for struct guest_walker.  */
+		walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
 	} while (!is_last_gpte(mmu, walker->level, pte));
 
 	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
 	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
 
 	/* Convert to ACC_*_MASK flags for struct guest_walker.  */
-	walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
 	walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
 	errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
 	if (unlikely(errcode))
@@ -463,7 +465,8 @@ retry_walk:
 	}
 
 	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-		 __func__, (u64)pte, walker->pte_access, walker->pt_access);
+		 __func__, (u64)pte, walker->pte_access,
+		 walker->pt_access[walker->level - 1]);
 	return 1;
 
 error:
@@ -643,7 +646,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
 	bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
 	struct kvm_mmu_page *sp = NULL;
 	struct kvm_shadow_walk_iterator it;
-	unsigned direct_access, access = gw->pt_access;
+	unsigned int direct_access, access;
 	int top_level, level, req_level, ret;
 	gfn_t base_gfn = gw->gfn;
 
@@ -675,6 +678,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
 		sp = NULL;
 		if (!is_shadow_present_pte(*it.sptep)) {
 			table_gfn = gw->table_gfn[it.level - 2];
+			access = gw->pt_access[it.level - 2];
 			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
 					      false, access);
 		}
-- 
GitLab


From e0e3903f83d5e41ab7e7737ebe41ef36f578dc0a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 8 Jun 2021 13:37:42 +0100
Subject: [PATCH 2469/3804] arm64: mm: decode xFSC in mem_abort_decode()

It would be helpful if mem_abort_decode() could decode the DFSC/IFSC, as
this can make it easier to identify common bugs (e.g. accesses which
trigger alignment faults) without having to manually decode the xFSC
value.

Decode the xFSC in mem_abort_decode().

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210608123742.11921-1-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/fault.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 5c855b2ab93b8..6786cf152666c 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -99,6 +99,8 @@ static void mem_abort_decode(unsigned int esr)
 	pr_alert("  EA = %lu, S1PTW = %lu\n",
 		 (esr & ESR_ELx_EA) >> ESR_ELx_EA_SHIFT,
 		 (esr & ESR_ELx_S1PTW) >> ESR_ELx_S1PTW_SHIFT);
+	pr_alert("  FSC = 0x%02x: %s\n", (esr & ESR_ELx_FSC),
+		 esr_to_fault_info(esr)->name);
 
 	if (esr_is_data_abort(esr))
 		data_abort_decode(esr);
-- 
GitLab


From 45f56690051c108e3e9a50e34b61aac05d55583d Mon Sep 17 00:00:00 2001
From: Alexey Minnekhanov <alexeymin@postmarketos.org>
Date: Tue, 18 May 2021 13:26:24 +0300
Subject: [PATCH 2470/3804] drm/msm: Init mm_list before accessing it for
 use_vram path

Fix NULL pointer dereference caused by update_inactive()
trying to list_del() an uninitialized mm_list who's
prev/next pointers are NULL.

Fixes: 64fcbde772c7 ("drm/msm: Track potentially evictable objects")
Signed-off-by: Alexey Minnekhanov <alexeymin@postmarketos.org>
Link: https://lore.kernel.org/r/20210518102624.1193955-1-alexeymin@postmarketos.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 56df86e5f7400..369d91e6361ec 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -1241,6 +1241,13 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev,
 
 		to_msm_bo(obj)->vram_node = &vma->node;
 
+		/* Call chain get_pages() -> update_inactive() tries to
+		 * access msm_obj->mm_list, but it is not initialized yet.
+		 * To avoid NULL pointer dereference error, initialize
+		 * mm_list to be empty.
+		 */
+		INIT_LIST_HEAD(&msm_obj->mm_list);
+
 		msm_gem_lock(obj);
 		pages = get_pages(obj);
 		msm_gem_unlock(obj);
-- 
GitLab


From af3511ff7fa2107d6410831f3d71030f5e8d2b25 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@linux.alibaba.com>
Date: Tue, 1 Jun 2021 01:46:28 +0800
Subject: [PATCH 2471/3804] KVM: x86: Ensure PV TLB flush tracepoint reflects
 KVM behavior

In record_steal_time(), st->preempted is read twice, and
trace_kvm_pv_tlb_flush() might output result inconsistent if
kvm_vcpu_flush_tlb_guest() see a different st->preempted later.

It is a very trivial problem and hardly has actual harm and can be
avoided by reseting and reading st->preempted in atomic way via xchg().

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>

Message-Id: <20210531174628.10265-1-jiangshanlai@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1cd6d4685932a..e2144eedaf799 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3101,9 +3101,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	 * expensive IPIs.
 	 */
 	if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+		u8 st_preempted = xchg(&st->preempted, 0);
+
 		trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
-				       st->preempted & KVM_VCPU_FLUSH_TLB);
-		if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+				       st_preempted & KVM_VCPU_FLUSH_TLB);
+		if (st_preempted & KVM_VCPU_FLUSH_TLB)
 			kvm_vcpu_flush_tlb_guest(vcpu);
 	} else {
 		st->preempted = 0;
-- 
GitLab


From 1bc603af73dd8fb2934306e861009c54f973dcc2 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 8 Jun 2021 14:39:54 +0200
Subject: [PATCH 2472/3804] KVM: selftests: introduce P47V64 for s390x

s390x can have up to 47bits of physical guest and 64bits of virtual
address  bits. Add a new address mode to avoid errors of testcases
going beyond 47bits.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Message-Id: <20210608123954.10991-1-borntraeger@de.ibm.com>
Fixes: ef4c9f4f6546 ("KVM: selftests: Fix 32-bit truncation of vm_get_max_gfn()")
Cc: stable@vger.kernel.org
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/kvm_util.h | 3 ++-
 tools/testing/selftests/kvm/lib/kvm_util.c     | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index fcd8e3855111c..b602552b1ed09 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -43,6 +43,7 @@ enum vm_guest_mode {
 	VM_MODE_P40V48_4K,
 	VM_MODE_P40V48_64K,
 	VM_MODE_PXXV48_4K,	/* For 48bits VA but ANY bits PA */
+	VM_MODE_P47V64_4K,
 	NUM_VM_MODES,
 };
 
@@ -60,7 +61,7 @@ enum vm_guest_mode {
 
 #elif defined(__s390x__)
 
-#define VM_MODE_DEFAULT			VM_MODE_P52V48_4K
+#define VM_MODE_DEFAULT			VM_MODE_P47V64_4K
 #define MIN_PAGE_SHIFT			12U
 #define ptes_per_page(page_size)	((page_size) / 16)
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 28e528c19d285..b126fab6c4e1e 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -175,6 +175,7 @@ const char *vm_guest_mode_string(uint32_t i)
 		[VM_MODE_P40V48_4K]	= "PA-bits:40,  VA-bits:48,  4K pages",
 		[VM_MODE_P40V48_64K]	= "PA-bits:40,  VA-bits:48, 64K pages",
 		[VM_MODE_PXXV48_4K]	= "PA-bits:ANY, VA-bits:48,  4K pages",
+		[VM_MODE_P47V64_4K]	= "PA-bits:47,  VA-bits:64,  4K pages",
 	};
 	_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
 		       "Missing new mode strings?");
@@ -192,6 +193,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
 	{ 40, 48,  0x1000, 12 },
 	{ 40, 48, 0x10000, 16 },
 	{  0,  0,  0x1000, 12 },
+	{ 47, 64,  0x1000, 12 },
 };
 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
 	       "Missing new mode params?");
@@ -277,6 +279,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 		TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
 #endif
 		break;
+	case VM_MODE_P47V64_4K:
+		vm->pgtable_levels = 5;
+		break;
 	default:
 		TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
 	}
-- 
GitLab


From 591a22c14d3f45cc38bd1931c593c221df2f1881 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 8 Jun 2021 10:12:21 -0700
Subject: [PATCH 2473/3804] proc: Track /proc/$pid/attr/ opener mm_struct

Commit bfb819ea20ce ("proc: Check /proc/$pid/attr/ writes against file opener")
tried to make sure that there could not be a confusion between the opener of
a /proc/$pid/attr/ file and the writer. It used struct cred to make sure
the privileges didn't change. However, there were existing cases where a more
privileged thread was passing the opened fd to a differently privileged thread
(during container setup). Instead, use mm_struct to track whether the opener
and writer are still the same process. (This is what several other proc files
already do, though for different reasons.)

Reported-by: Christian Brauner <christian.brauner@ubuntu.com>
Reported-by: Andrea Righi <andrea.righi@canonical.com>
Tested-by: Andrea Righi <andrea.righi@canonical.com>
Fixes: bfb819ea20ce ("proc: Check /proc/$pid/attr/ writes against file opener")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58bbf334265b7..7118ebe38fa62 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2674,6 +2674,11 @@ out:
 }
 
 #ifdef CONFIG_SECURITY
+static int proc_pid_attr_open(struct inode *inode, struct file *file)
+{
+	return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+}
+
 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
 				  size_t count, loff_t *ppos)
 {
@@ -2704,7 +2709,7 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 	int rv;
 
 	/* A task may only write when it was the opener. */
-	if (file->f_cred != current_real_cred())
+	if (file->private_data != current->mm)
 		return -EPERM;
 
 	rcu_read_lock();
@@ -2754,9 +2759,11 @@ out:
 }
 
 static const struct file_operations proc_pid_attr_operations = {
+	.open		= proc_pid_attr_open,
 	.read		= proc_pid_attr_read,
 	.write		= proc_pid_attr_write,
 	.llseek		= generic_file_llseek,
+	.release	= mem_release,
 };
 
 #define LSM_DIR_OPS(LSM) \
-- 
GitLab


From f53b16ad64408b5376836708f8cf42dbf1cf6098 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@intel.com>
Date: Wed, 9 Jun 2021 07:38:16 +0800
Subject: [PATCH 2474/3804] selftests: kvm: Add support for customized slot0
 memory size

Until commit 39fe2fc96694 ("selftests: kvm: make allocation of extra
memory take effect", 2021-05-27), parameter extra_mem_pages was used
only to calculate the page table size for all the memory chunks,
because real memory allocation happened with calls of
vm_userspace_mem_region_add() after vm_create_default().

Commit 39fe2fc96694 however changed the meaning of extra_mem_pages to
the size of memory slot 0.  This makes the memory allocation more
flexible, but makes it harder to account for the number of
pages needed for the page tables.  For example, memslot_perf_test
has a small amount of memory in slot 0 but a lot in other slots,
and adding that memory twice (both in slot 0 and with later
calls to vm_userspace_mem_region_add()) causes an error that
was fixed in commit 000ac4295339 ("selftests: kvm: fix overlapping
addresses in memslot_perf_test", 2021-05-29)

Since both uses are sensible, add a new parameter slot0_mem_pages
to vm_create_with_vcpus() and some comments to clarify the meaning of
slot0_mem_pages and extra_mem_pages.  With this change,
memslot_perf_test can go back to passing the number of memory
pages as extra_mem_pages.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Message-Id: <20210608233816.423958-4-zhenzhong.duan@intel.com>
[Squashed in a single patch and rewrote the commit message. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/include/kvm_util.h  |  7 +--
 .../selftests/kvm/kvm_page_table_test.c       |  2 +-
 tools/testing/selftests/kvm/lib/kvm_util.c    | 47 +++++++++++++++----
 .../selftests/kvm/lib/perf_test_util.c        |  2 +-
 .../testing/selftests/kvm/memslot_perf_test.c |  2 +-
 5 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index b602552b1ed09..35739567189e0 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -286,10 +286,11 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
 					    uint32_t num_percpu_pages, void *guest_code,
 					    uint32_t vcpuids[]);
 
-/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */
+/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
 struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
-				    uint64_t extra_mem_pages, uint32_t num_percpu_pages,
-				    void *guest_code, uint32_t vcpuids[]);
+				    uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
+				    uint32_t num_percpu_pages, void *guest_code,
+				    uint32_t vcpuids[]);
 
 /*
  * Adds a vCPU with reasonable defaults (e.g. a stack)
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 1c4753fff19e0..82171f17c1d7f 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -268,7 +268,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
 
 	/* Create a VM with enough guest pages */
 	guest_num_pages = test_mem_size / guest_page_size;
-	vm = vm_create_with_vcpus(mode, nr_vcpus,
+	vm = vm_create_with_vcpus(mode, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
 				  guest_num_pages, 0, guest_code, NULL);
 
 	/* Align down GPA of the testing memslot */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index b126fab6c4e1e..5c70596dd1b98 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -313,21 +313,50 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
 	return vm;
 }
 
+/*
+ * VM Create with customized parameters
+ *
+ * Input Args:
+ *   mode - VM Mode (e.g. VM_MODE_P52V48_4K)
+ *   nr_vcpus - VCPU count
+ *   slot0_mem_pages - Slot0 physical memory size
+ *   extra_mem_pages - Non-slot0 physical memory total size
+ *   num_percpu_pages - Per-cpu physical memory pages
+ *   guest_code - Guest entry point
+ *   vcpuids - VCPU IDs
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
+ * with customized slot0 memory size, at least 512 pages currently.
+ * extra_mem_pages is only used to calculate the maximum page table size,
+ * no real memory allocation for non-slot0 memory in this function.
+ */
 struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
-				    uint64_t extra_mem_pages, uint32_t num_percpu_pages,
-				    void *guest_code, uint32_t vcpuids[])
+				    uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
+				    uint32_t num_percpu_pages, void *guest_code,
+				    uint32_t vcpuids[])
 {
+	uint64_t vcpu_pages, extra_pg_pages, pages;
+	struct kvm_vm *vm;
+	int i;
+
+	/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
+	if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
+		slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
+
 	/* The maximum page table size for a memory region will be when the
 	 * smallest pages are used. Considering each page contains x page
 	 * table descriptors, the total extra size for page tables (for extra
 	 * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
 	 * than N/x*2.
 	 */
-	uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
-	uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
-	uint64_t pages = DEFAULT_GUEST_PHY_PAGES + extra_mem_pages + vcpu_pages + extra_pg_pages;
-	struct kvm_vm *vm;
-	int i;
+	vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
+	extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
+	pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
 
 	TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
 		    "nr_vcpus = %d too large for host, max-vcpus = %d",
@@ -359,8 +388,8 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
 					    uint32_t num_percpu_pages, void *guest_code,
 					    uint32_t vcpuids[])
 {
-	return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages,
-				    num_percpu_pages, guest_code, vcpuids);
+	return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
+				    extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
 }
 
 struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index abf381800a590..7397ca2998358 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -69,7 +69,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 	TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
 		    "Guest memory size is not guest page size aligned.");
 
-	vm = vm_create_with_vcpus(mode, vcpus,
+	vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
 				  (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
 				  0, guest_code, NULL);
 
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 9307f25d8130e..11239652d8057 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -267,7 +267,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 	data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
 	TEST_ASSERT(data->hva_slots, "malloc() fail");
 
-	data->vm = vm_create_default(VCPU_ID, 1024, guest_code);
+	data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
 
 	pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
 		max_mem_slots - 1, data->pages_per_slot, rempages);
-- 
GitLab


From f31500b0d437a2464ca5972d8f5439e156b74960 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 7 Jun 2021 10:57:48 -0700
Subject: [PATCH 2475/3804] KVM: x86: Ensure liveliness of nested VM-Enter fail
 tracepoint message

Use the __string() machinery provided by the tracing subystem to make a
copy of the string literals consumed by the "nested VM-Enter failed"
tracepoint.  A complete copy is necessary to ensure that the tracepoint
can't outlive the data/memory it consumes and deference stale memory.

Because the tracepoint itself is defined by kvm, if kvm-intel and/or
kvm-amd are built as modules, the memory holding the string literals
defined by the vendor modules will be freed when the module is unloaded,
whereas the tracepoint and its data in the ring buffer will live until
kvm is unloaded (or "indefinitely" if kvm is built-in).

This bug has existed since the tracepoint was added, but was recently
exposed by a new check in tracing to detect exactly this type of bug.

  fmt: '%s%s
  ' current_buffer: ' vmx_dirty_log_t-140127  [003] ....  kvm_nested_vmenter_failed: '
  WARNING: CPU: 3 PID: 140134 at kernel/trace/trace.c:3759 trace_check_vprintf+0x3be/0x3e0
  CPU: 3 PID: 140134 Comm: less Not tainted 5.13.0-rc1-ce2e73ce600a-req #184
  Hardware name: ASUS Q87M-E/Q87M-E, BIOS 1102 03/03/2014
  RIP: 0010:trace_check_vprintf+0x3be/0x3e0
  Code: <0f> 0b 44 8b 4c 24 1c e9 a9 fe ff ff c6 44 02 ff 00 49 8b 97 b0 20
  RSP: 0018:ffffa895cc37bcb0 EFLAGS: 00010282
  RAX: 0000000000000000 RBX: ffffa895cc37bd08 RCX: 0000000000000027
  RDX: 0000000000000027 RSI: 00000000ffffdfff RDI: ffff9766cfad74f8
  RBP: ffffffffc0a041d4 R08: ffff9766cfad74f0 R09: ffffa895cc37bad8
  R10: 0000000000000001 R11: 0000000000000001 R12: ffffffffc0a041d4
  R13: ffffffffc0f4dba8 R14: 0000000000000000 R15: ffff976409f2c000
  FS:  00007f92fa200740(0000) GS:ffff9766cfac0000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000559bd11b0000 CR3: 000000019fbaa002 CR4: 00000000001726e0
  Call Trace:
   trace_event_printf+0x5e/0x80
   trace_raw_output_kvm_nested_vmenter_failed+0x3a/0x60 [kvm]
   print_trace_line+0x1dd/0x4e0
   s_show+0x45/0x150
   seq_read_iter+0x2d5/0x4c0
   seq_read+0x106/0x150
   vfs_read+0x98/0x180
   ksys_read+0x5f/0xe0
   do_syscall_64+0x40/0xb0
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Cc: Steven Rostedt <rostedt@goodmis.org>
Fixes: 380e0055bc7e ("KVM: nVMX: trace nested VM-Enter failures detected by H/W")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Message-Id: <20210607175748.674002-1-seanjc@google.com>
---
 arch/x86/kvm/trace.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a61c015870e33..4f839148948bc 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1550,16 +1550,16 @@ TRACE_EVENT(kvm_nested_vmenter_failed,
 	TP_ARGS(msg, err),
 
 	TP_STRUCT__entry(
-		__field(const char *, msg)
+		__string(msg, msg)
 		__field(u32, err)
 	),
 
 	TP_fast_assign(
-		__entry->msg = msg;
+		__assign_str(msg, msg);
 		__entry->err = err;
 	),
 
-	TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
+	TP_printk("%s%s", __get_str(msg), !__entry->err ? "" :
 		__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
 );
 
-- 
GitLab


From 2a48b5911cf2e111a271bffbe5cac443231a4384 Mon Sep 17 00:00:00 2001
From: Changfeng <Changfeng.Zhu@amd.com>
Date: Wed, 2 Jun 2021 21:25:56 +0800
Subject: [PATCH 2476/3804] drm/amdgpu: switch kzalloc to kvzalloc in
 amdgpu_bo_create
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It will cause error when alloc memory larger than 128KB in
amdgpu_bo_create->kzalloc. So it needs to switch kzalloc to kvzalloc.

Call Trace:
   alloc_pages_current+0x6a/0xe0
   kmalloc_order+0x32/0xb0
   kmalloc_order_trace+0x1e/0x80
   __kmalloc+0x249/0x2d0
   amdgpu_bo_create+0x102/0x500 [amdgpu]
   ? xas_create+0x264/0x3e0
   amdgpu_bo_create_vm+0x32/0x60 [amdgpu]
   amdgpu_vm_pt_create+0xf5/0x260 [amdgpu]
   amdgpu_vm_init+0x1fd/0x4d0 [amdgpu]

Signed-off-by: Changfeng <Changfeng.Zhu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 1345f7eba011b..f9434bc2f9b21 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -100,7 +100,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
 		kfree(ubo->metadata);
 	}
 
-	kfree(bo);
+	kvfree(bo);
 }
 
 /**
@@ -552,7 +552,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	BUG_ON(bp->bo_ptr_size < sizeof(struct amdgpu_bo));
 
 	*bo_ptr = NULL;
-	bo = kzalloc(bp->bo_ptr_size, GFP_KERNEL);
+	bo = kvzalloc(bp->bo_ptr_size, GFP_KERNEL);
 	if (bo == NULL)
 		return -ENOMEM;
 	drm_gem_private_object_init(adev_to_drm(adev), &bo->tbo.base, size);
-- 
GitLab


From b71a52f44725a3efab9591621c9dd5f8f9f1b522 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <mdaenzer@redhat.com>
Date: Wed, 2 Jun 2021 11:53:05 +0200
Subject: [PATCH 2477/3804] drm/amdgpu: Use drm_dbg_kms for reporting failure
 to get a GEM FB
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drm_err meant broken user space could spam dmesg.

Fixes: f258907fdd835e "drm/amdgpu: Verify bo size can fit framebuffer size on init."
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Michel Dänzer <mdaenzer@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 8a1fb8b6606e5..c13985fb35bed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1057,7 +1057,7 @@ int amdgpu_display_gem_fb_init(struct drm_device *dev,
 
 	return 0;
 err:
-	drm_err(dev, "Failed to init gem fb: %d\n", ret);
+	drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret);
 	rfb->base.obj[0] = NULL;
 	return ret;
 }
@@ -1094,7 +1094,7 @@ int amdgpu_display_gem_fb_verify_and_init(
 
 	return 0;
 err:
-	drm_err(dev, "Failed to verify and init gem fb: %d\n", ret);
+	drm_dbg_kms(dev, "Failed to verify and init gem fb: %d\n", ret);
 	rfb->base.obj[0] = NULL;
 	return ret;
 }
-- 
GitLab


From c247c021b13a2ce40dd9ed06f1e18044dcaefd37 Mon Sep 17 00:00:00 2001
From: Rohit Khaire <rohit.khaire@amd.com>
Date: Fri, 4 Jun 2021 11:02:56 -0400
Subject: [PATCH 2478/3804] drm/amdgpu: Fix incorrect register offsets for
 Sienna Cichlid

RLC_CP_SCHEDULERS and RLC_SPARE_INT0 have different
offsets for Sienna Cichlid

Signed-off-by: Rohit Khaire <rohit.khaire@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 7ce76a6b3a350..0597aeb5f0e89 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -173,6 +173,9 @@
 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid              0x2030
 #define mmGC_THROTTLE_CTRL_Sienna_Cichlid_BASE_IDX     0
 
+#define mmRLC_SPARE_INT_0_Sienna_Cichlid               0x4ca5
+#define mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX      1
+
 #define GFX_RLCG_GC_WRITE_OLD	(0x8 << 28)
 #define GFX_RLCG_GC_WRITE	(0x0 << 28)
 #define GFX_RLCG_GC_READ	(0x1 << 28)
@@ -1480,8 +1483,15 @@ static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32
 		       (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG2) * 4;
 	scratch_reg3 = adev->rmmio +
 		       (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3) * 4;
-	spare_int = adev->rmmio +
-		    (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4;
+
+	if (adev->asic_type >= CHIP_SIENNA_CICHLID) {
+		spare_int = adev->rmmio +
+			    (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX]
+			     + mmRLC_SPARE_INT_0_Sienna_Cichlid) * 4;
+	} else {
+		spare_int = adev->rmmio +
+			    (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4;
+	}
 
 	grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL;
 	grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX;
@@ -7349,9 +7359,15 @@ static int gfx_v10_0_hw_fini(void *handle)
 	if (amdgpu_sriov_vf(adev)) {
 		gfx_v10_0_cp_gfx_enable(adev, false);
 		/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
-		tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
-		tmp &= 0xffffff00;
-		WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+		if (adev->asic_type >= CHIP_SIENNA_CICHLID) {
+			tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid);
+			tmp &= 0xffffff00;
+			WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp);
+		} else {
+			tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
+			tmp &= 0xffffff00;
+			WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
+		}
 
 		return 0;
 	}
-- 
GitLab


From 149876d96877eedce0ae3ffbd64edb56360b8926 Mon Sep 17 00:00:00 2001
From: Huilong Deng <denghuilong@cdjrlc.com>
Date: Sat, 5 Jun 2021 12:53:02 +0800
Subject: [PATCH 2479/3804] seqlock: Remove trailing semicolon in macros

Macros should not use a trailing semicolon.

Signed-off-by: Huilong Deng <denghuilong@cdjrlc.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210605045302.37154-1-denghuilong@cdjrlc.com
---
 include/linux/seqlock.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index f61e34fbaaea4..37ded6b8fee61 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -182,9 +182,9 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
 
 #define seqcount_raw_spinlock_init(s, lock)	seqcount_LOCKNAME_init(s, lock, raw_spinlock)
 #define seqcount_spinlock_init(s, lock)		seqcount_LOCKNAME_init(s, lock, spinlock)
-#define seqcount_rwlock_init(s, lock)		seqcount_LOCKNAME_init(s, lock, rwlock);
-#define seqcount_mutex_init(s, lock)		seqcount_LOCKNAME_init(s, lock, mutex);
-#define seqcount_ww_mutex_init(s, lock)		seqcount_LOCKNAME_init(s, lock, ww_mutex);
+#define seqcount_rwlock_init(s, lock)		seqcount_LOCKNAME_init(s, lock, rwlock)
+#define seqcount_mutex_init(s, lock)		seqcount_LOCKNAME_init(s, lock, mutex)
+#define seqcount_ww_mutex_init(s, lock)		seqcount_LOCKNAME_init(s, lock, ww_mutex)
 
 /*
  * SEQCOUNT_LOCKNAME()	- Instantiate seqcount_LOCKNAME_t and helpers
-- 
GitLab


From 924f41e52fd10f6e573137eef1afea9e9ad09212 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 4 Jun 2021 00:06:13 -0500
Subject: [PATCH 2480/3804] drm/amd/pm: Fix fall-through warning for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a warning
by explicitly adding a break statement instead of letting the code fall
through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
index f5fe540cd5366..27cf227167939 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
@@ -810,6 +810,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 		break;
 	case AMD_DPM_FORCED_LEVEL_MANUAL:
 		data->fine_grain_enabled = 1;
+		break;
 	case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
 	default:
 		break;
-- 
GitLab


From ab8363d3875a83f4901eb1cc00ce8afd24de6c85 Mon Sep 17 00:00:00 2001
From: Chen Li <chenli@uniontech.com>
Date: Fri, 4 Jun 2021 16:43:02 +0800
Subject: [PATCH 2481/3804] radeon: use memcpy_to/fromio for UVD fw upload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I met a gpu addr bug recently and the kernel log
tells me the pc is memcpy/memset and link register is
radeon_uvd_resume.

As we know, in some architectures, optimized memcpy/memset
may not work well on device memory. Trival memcpy_toio/memset_io
can fix this problem.

BTW, amdgpu has already done it in:
commit ba0b2275a678 ("drm/amdgpu: use memcpy_to/fromio for UVD fw upload"),
that's why it has no this issue on the same gpu and platform.

Signed-off-by: Chen Li <chenli@uniontech.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/radeon/radeon_uvd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index dfa9fdbe98da2..06bb24d7a9fee 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -286,7 +286,7 @@ int radeon_uvd_resume(struct radeon_device *rdev)
 	if (rdev->uvd.vcpu_bo == NULL)
 		return -EINVAL;
 
-	memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
+	memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
 
 	size = radeon_bo_size(rdev->uvd.vcpu_bo);
 	size -= rdev->uvd_fw->size;
@@ -294,7 +294,7 @@ int radeon_uvd_resume(struct radeon_device *rdev)
 	ptr = rdev->uvd.cpu_addr;
 	ptr += rdev->uvd_fw->size;
 
-	memset(ptr, 0, size);
+	memset_io((void __iomem *)ptr, 0, size);
 
 	return 0;
 }
-- 
GitLab


From 408434036958699a7f50ddec984f7ba33e11a8f5 Mon Sep 17 00:00:00 2001
From: Jonathan Marek <jonathan@marek.ca>
Date: Thu, 13 May 2021 13:13:59 -0400
Subject: [PATCH 2482/3804] drm/msm/a6xx: update/fix CP_PROTECT initialization

Update CP_PROTECT register programming based on downstream.

A6XX_PROTECT_RW is renamed to A6XX_PROTECT_NORDWR to make things aligned
and also be more clear about what it does.

Note that this required switching to use the CP_ALWAYS_ON_COUNTER as the
GMU counter is not accessible from the cmdstream.  Which also means
using the CPU counter for the msm_gpu_submit_flush() tracepoint (as
catapult depends on being able to compare this to the start/end values
captured in cmdstream).  This may need to be revisited when IFPC is
enabled.

Also, compared to downstream, this opens up CP_PERFCTR_CP_SEL as the
userspace performance tooling (fdperf and pps-producer) expect to be
able to configure the CP counters.

Fixes: 4b565ca5a2cb ("drm/msm: Add A6XX device support")
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Akhil P Oommen <akhilpo@codeaurora.org>
Link: https://lore.kernel.org/r/20210513171431.18632-5-jonathan@marek.ca
[switch to CP_ALWAYS_ON_COUNTER, open up CP_PERFCNTR_CP_SEL, and spiff
 up commit msg]
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 151 +++++++++++++++++++-------
 drivers/gpu/drm/msm/adreno/a6xx_gpu.h |   2 +-
 2 files changed, 113 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index b4d8e1b01ee4f..9702bec143100 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -157,7 +157,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 	 * GPU registers so we need to add 0x1a800 to the register value on A630
 	 * to get the right value from PM4.
 	 */
-	get_stats_counter(ring, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L + 0x1a800,
+	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
 		rbmemptr_stats(ring, index, alwayson_start));
 
 	/* Invalidate CCU depth and color */
@@ -187,7 +187,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 
 	get_stats_counter(ring, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
 		rbmemptr_stats(ring, index, cpcycles_end));
-	get_stats_counter(ring, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L + 0x1a800,
+	get_stats_counter(ring, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
 		rbmemptr_stats(ring, index, alwayson_end));
 
 	/* Write the fence to the scratch register */
@@ -206,8 +206,8 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 	OUT_RING(ring, submit->seqno);
 
 	trace_msm_gpu_submit_flush(submit,
-		gmu_read64(&a6xx_gpu->gmu, REG_A6XX_GMU_ALWAYS_ON_COUNTER_L,
-			REG_A6XX_GMU_ALWAYS_ON_COUNTER_H));
+		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
+			REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
 
 	a6xx_flush(gpu, ring);
 }
@@ -462,6 +462,113 @@ static void a6xx_set_hwcg(struct msm_gpu *gpu, bool state)
 	gpu_write(gpu, REG_A6XX_RBBM_CLOCK_CNTL, state ? clock_cntl_on : 0);
 }
 
+/* For a615, a616, a618, A619, a630, a640 and a680 */
+static const u32 a6xx_protect[] = {
+	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
+	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
+	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
+	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
+	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
+	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
+	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
+	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
+	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
+	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
+	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
+	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
+	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
+	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
+	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
+	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
+	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
+	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
+	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
+	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
+	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
+	A6XX_PROTECT_NORDWR(0x09e70, 0x0001),
+	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
+	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
+	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
+	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
+	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
+	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
+	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
+	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
+	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
+	A6XX_PROTECT_NORDWR(0x11c00, 0x0000), /* note: infinite range */
+};
+
+/* These are for a620 and a650 */
+static const u32 a650_protect[] = {
+	A6XX_PROTECT_RDONLY(0x00000, 0x04ff),
+	A6XX_PROTECT_RDONLY(0x00501, 0x0005),
+	A6XX_PROTECT_RDONLY(0x0050b, 0x02f4),
+	A6XX_PROTECT_NORDWR(0x0050e, 0x0000),
+	A6XX_PROTECT_NORDWR(0x00510, 0x0000),
+	A6XX_PROTECT_NORDWR(0x00534, 0x0000),
+	A6XX_PROTECT_NORDWR(0x00800, 0x0082),
+	A6XX_PROTECT_NORDWR(0x008a0, 0x0008),
+	A6XX_PROTECT_NORDWR(0x008ab, 0x0024),
+	A6XX_PROTECT_RDONLY(0x008de, 0x00ae),
+	A6XX_PROTECT_NORDWR(0x00900, 0x004d),
+	A6XX_PROTECT_NORDWR(0x0098d, 0x0272),
+	A6XX_PROTECT_NORDWR(0x00e00, 0x0001),
+	A6XX_PROTECT_NORDWR(0x00e03, 0x000c),
+	A6XX_PROTECT_NORDWR(0x03c00, 0x00c3),
+	A6XX_PROTECT_RDONLY(0x03cc4, 0x1fff),
+	A6XX_PROTECT_NORDWR(0x08630, 0x01cf),
+	A6XX_PROTECT_NORDWR(0x08e00, 0x0000),
+	A6XX_PROTECT_NORDWR(0x08e08, 0x0000),
+	A6XX_PROTECT_NORDWR(0x08e50, 0x001f),
+	A6XX_PROTECT_NORDWR(0x08e80, 0x027f),
+	A6XX_PROTECT_NORDWR(0x09624, 0x01db),
+	A6XX_PROTECT_NORDWR(0x09e60, 0x0011),
+	A6XX_PROTECT_NORDWR(0x09e78, 0x0187),
+	A6XX_PROTECT_NORDWR(0x0a630, 0x01cf),
+	A6XX_PROTECT_NORDWR(0x0ae02, 0x0000),
+	A6XX_PROTECT_NORDWR(0x0ae50, 0x032f),
+	A6XX_PROTECT_NORDWR(0x0b604, 0x0000),
+	A6XX_PROTECT_NORDWR(0x0b608, 0x0007),
+	A6XX_PROTECT_NORDWR(0x0be02, 0x0001),
+	A6XX_PROTECT_NORDWR(0x0be20, 0x17df),
+	A6XX_PROTECT_NORDWR(0x0f000, 0x0bff),
+	A6XX_PROTECT_RDONLY(0x0fc00, 0x1fff),
+	A6XX_PROTECT_NORDWR(0x18400, 0x1fff),
+	A6XX_PROTECT_NORDWR(0x1a800, 0x1fff),
+	A6XX_PROTECT_NORDWR(0x1f400, 0x0443),
+	A6XX_PROTECT_RDONLY(0x1f844, 0x007b),
+	A6XX_PROTECT_NORDWR(0x1f887, 0x001b),
+	A6XX_PROTECT_NORDWR(0x1f8c0, 0x0000), /* note: infinite range */
+};
+
+static void a6xx_set_cp_protect(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	const u32 *regs = a6xx_protect;
+	unsigned i, count = ARRAY_SIZE(a6xx_protect), count_max = 32;
+
+	BUILD_BUG_ON(ARRAY_SIZE(a6xx_protect) > 32);
+	BUILD_BUG_ON(ARRAY_SIZE(a650_protect) > 48);
+
+	if (adreno_is_a650(adreno_gpu)) {
+		regs = a650_protect;
+		count = ARRAY_SIZE(a650_protect);
+		count_max = 48;
+	}
+
+	/*
+	 * Enable access protection to privileged registers, fault on an access
+	 * protect violation and select the last span to protect from the start
+	 * address all the way to the end of the register address space
+	 */
+	gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, BIT(0) | BIT(1) | BIT(3));
+
+	for (i = 0; i < count - 1; i++)
+		gpu_write(gpu, REG_A6XX_CP_PROTECT(i), regs[i]);
+	/* last CP_PROTECT to have "infinite" length on the last entry */
+	gpu_write(gpu, REG_A6XX_CP_PROTECT(count_max - 1), regs[i]);
+}
+
 static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -776,41 +883,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu)
 	}
 
 	/* Protect registers from the CP */
-	gpu_write(gpu, REG_A6XX_CP_PROTECT_CNTL, 0x00000003);
-
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(0),
-		A6XX_PROTECT_RDONLY(0x600, 0x51));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(1), A6XX_PROTECT_RW(0xae50, 0x2));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(2), A6XX_PROTECT_RW(0x9624, 0x13));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(3), A6XX_PROTECT_RW(0x8630, 0x8));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(4), A6XX_PROTECT_RW(0x9e70, 0x1));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(5), A6XX_PROTECT_RW(0x9e78, 0x187));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(6), A6XX_PROTECT_RW(0xf000, 0x810));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(7),
-		A6XX_PROTECT_RDONLY(0xfc00, 0x3));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(8), A6XX_PROTECT_RW(0x50e, 0x0));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(9), A6XX_PROTECT_RDONLY(0x50f, 0x0));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(10), A6XX_PROTECT_RW(0x510, 0x0));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(11),
-		A6XX_PROTECT_RDONLY(0x0, 0x4f9));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(12),
-		A6XX_PROTECT_RDONLY(0x501, 0xa));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(13),
-		A6XX_PROTECT_RDONLY(0x511, 0x44));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(14), A6XX_PROTECT_RW(0xe00, 0xe));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(15), A6XX_PROTECT_RW(0x8e00, 0x0));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(16), A6XX_PROTECT_RW(0x8e50, 0xf));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(17), A6XX_PROTECT_RW(0xbe02, 0x0));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(18),
-		A6XX_PROTECT_RW(0xbe20, 0x11f3));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(19), A6XX_PROTECT_RW(0x800, 0x82));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(20), A6XX_PROTECT_RW(0x8a0, 0x8));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(21), A6XX_PROTECT_RW(0x8ab, 0x19));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(22), A6XX_PROTECT_RW(0x900, 0x4d));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(23), A6XX_PROTECT_RW(0x98d, 0x76));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(24),
-			A6XX_PROTECT_RDONLY(0x980, 0x4));
-	gpu_write(gpu, REG_A6XX_CP_PROTECT(25), A6XX_PROTECT_RW(0xa630, 0x0));
+	a6xx_set_cp_protect(gpu);
 
 	/* Enable expanded apriv for targets that support it */
 	if (gpu->hw_apriv) {
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
index ce0610c5256f7..bb544dfe57379 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
@@ -44,7 +44,7 @@ struct a6xx_gpu {
  * REG_CP_PROTECT_REG(n) - this will block both reads and writes for _len
  * registers starting at _reg.
  */
-#define A6XX_PROTECT_RW(_reg, _len) \
+#define A6XX_PROTECT_NORDWR(_reg, _len) \
 	((1 << 31) | \
 	(((_len) & 0x3FFF) << 18) | ((_reg) & 0x3FFFF))
 
-- 
GitLab


From b4387eaf3821a4c4241ac3a556e13244eb1fdaa5 Mon Sep 17 00:00:00 2001
From: Jonathan Marek <jonathan@marek.ca>
Date: Thu, 13 May 2021 13:13:58 -0400
Subject: [PATCH 2483/3804] drm/msm/a6xx: fix incorrectly set uavflagprd_inv
 field for A650

Value was shifted in the wrong direction, resulting in the field always
being zero, which is incorrect for A650.

Fixes: d0bac4e9cd66 ("drm/msm/a6xx: set ubwc config for A640 and A650")
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Akhil P Oommen <akhilpo@codeaurora.org>
Link: https://lore.kernel.org/r/20210513171431.18632-4-jonathan@marek.ca
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 9702bec143100..37ba111601cdc 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -596,7 +596,7 @@ static void a6xx_set_ubwc_config(struct msm_gpu *gpu)
 		rgb565_predicator << 11 | amsbc << 4 | lower_bit << 1);
 	gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, lower_bit << 1);
 	gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL,
-		uavflagprd_inv >> 4 | lower_bit << 1);
+		uavflagprd_inv << 4 | lower_bit << 1);
 	gpu_write(gpu, REG_A6XX_UCHE_MODE_CNTL, lower_bit << 21);
 }
 
-- 
GitLab


From ce86c239e4d218ae6040bec18e6d19a58edb8b7c Mon Sep 17 00:00:00 2001
From: Jonathan Marek <jonathan@marek.ca>
Date: Thu, 13 May 2021 13:14:00 -0400
Subject: [PATCH 2484/3804] drm/msm/a6xx: avoid shadow NULL reference in
 failure path

If a6xx_hw_init() fails before creating the shadow_bo, the a6xx_pm_suspend
code referencing it will crash. Change the condition to one that avoids
this problem (note: creation of shadow_bo is behind this same condition)

Fixes: e8b0b994c3a5 ("drm/msm/a6xx: Clear shadow on suspend")
Signed-off-by: Jonathan Marek <jonathan@marek.ca>
Reviewed-by: Akhil P Oommen <akhilpo@codeaurora.org>
Link: https://lore.kernel.org/r/20210513171431.18632-6-jonathan@marek.ca
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index 37ba111601cdc..f6c1b62b901e2 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1284,7 +1284,7 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)
 	if (ret)
 		return ret;
 
-	if (adreno_gpu->base.hw_apriv || a6xx_gpu->has_whereami)
+	if (a6xx_gpu->shadow_bo)
 		for (i = 0; i < gpu->nr_rings; i++)
 			a6xx_gpu->shadow[i] = 0;
 
-- 
GitLab


From d612c3f3fae221e7ea736d196581c2217304bbbc Mon Sep 17 00:00:00 2001
From: Nanyong Sun <sunnanyong@huawei.com>
Date: Tue, 8 Jun 2021 09:51:58 +0800
Subject: [PATCH 2485/3804] net: ipv4: fix memory leak in
 netlbl_cipsov4_add_std

Reported by syzkaller:
BUG: memory leak
unreferenced object 0xffff888105df7000 (size 64):
comm "syz-executor842", pid 360, jiffies 4294824824 (age 22.546s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<00000000e67ed558>] kmalloc include/linux/slab.h:590 [inline]
[<00000000e67ed558>] kzalloc include/linux/slab.h:720 [inline]
[<00000000e67ed558>] netlbl_cipsov4_add_std net/netlabel/netlabel_cipso_v4.c:145 [inline]
[<00000000e67ed558>] netlbl_cipsov4_add+0x390/0x2340 net/netlabel/netlabel_cipso_v4.c:416
[<0000000006040154>] genl_family_rcv_msg_doit.isra.0+0x20e/0x320 net/netlink/genetlink.c:739
[<00000000204d7a1c>] genl_family_rcv_msg net/netlink/genetlink.c:783 [inline]
[<00000000204d7a1c>] genl_rcv_msg+0x2bf/0x4f0 net/netlink/genetlink.c:800
[<00000000c0d6a995>] netlink_rcv_skb+0x134/0x3d0 net/netlink/af_netlink.c:2504
[<00000000d78b9d2c>] genl_rcv+0x24/0x40 net/netlink/genetlink.c:811
[<000000009733081b>] netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline]
[<000000009733081b>] netlink_unicast+0x4a0/0x6a0 net/netlink/af_netlink.c:1340
[<00000000d5fd43b8>] netlink_sendmsg+0x789/0xc70 net/netlink/af_netlink.c:1929
[<000000000a2d1e40>] sock_sendmsg_nosec net/socket.c:654 [inline]
[<000000000a2d1e40>] sock_sendmsg+0x139/0x170 net/socket.c:674
[<00000000321d1969>] ____sys_sendmsg+0x658/0x7d0 net/socket.c:2350
[<00000000964e16bc>] ___sys_sendmsg+0xf8/0x170 net/socket.c:2404
[<000000001615e288>] __sys_sendmsg+0xd3/0x190 net/socket.c:2433
[<000000004ee8b6a5>] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:47
[<00000000171c7cee>] entry_SYSCALL_64_after_hwframe+0x44/0xae

The memory of doi_def->map.std pointing is allocated in
netlbl_cipsov4_add_std, but no place has freed it. It should be
freed in cipso_v4_doi_free which frees the cipso DOI resource.

Fixes: 96cb8e3313c7a ("[NetLabel]: CIPSOv4 and Unlabeled packet integration")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Nanyong Sun <sunnanyong@huawei.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/cipso_ipv4.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index bfaf327e9d121..e0480c6cebaad 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -472,6 +472,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
 		kfree(doi_def->map.std->lvl.local);
 		kfree(doi_def->map.std->cat.cipso);
 		kfree(doi_def->map.std->cat.local);
+		kfree(doi_def->map.std);
 		break;
 	}
 	kfree(doi_def);
-- 
GitLab


From 5ac6b198d7e312bd10ebe7d58c64690dc59cc49a Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 8 Jun 2021 09:53:15 +0800
Subject: [PATCH 2486/3804] net: ipv4: Remove unneed BUG() function

When 'nla_parse_nested_deprecated' failed, it's no need to
BUG() here, return -EINVAL is ok.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/devinet.c  | 2 +-
 net/ipv6/addrconf.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2e35f68da40a7..1c6429c353a96 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
 		return -EAFNOSUPPORT;
 
 	if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
-		BUG();
+		return -EINVAL;
 
 	if (tb[IFLA_INET_CONF]) {
 		nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b0ef65eb9bd21..701eb82acd1c5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5827,7 +5827,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
 		return -EAFNOSUPPORT;
 
 	if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
-		BUG();
+		return -EINVAL;
 
 	if (tb[IFLA_INET6_TOKEN]) {
 		err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),
-- 
GitLab


From d439aa33a9b917cfbca8a528f13367aff974aeb7 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Mon, 7 Jun 2021 19:19:32 -0700
Subject: [PATCH 2487/3804] net: appletalk: fix the usage of preposition

The preposition "for" should be changed to preposition "of".

Signed-off-by: gushengxian <gushengxian@yulong.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/aarp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index be18af481d7d5..c7236daa24152 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -768,7 +768,7 @@ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (a && a->status & ATIF_PROBE) {
 		a->status |= ATIF_PROBE_FAIL;
 		/*
-		 * we do not respond to probe or request packets for
+		 * we do not respond to probe or request packets of
 		 * this address while we are probing this address
 		 */
 		goto unlock;
-- 
GitLab


From 9bb392f62447d73cc7dd7562413a2cd9104c82f8 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 8 Jun 2021 16:59:51 +0200
Subject: [PATCH 2488/3804] vrf: fix maximum MTU

My initial goal was to fix the default MTU, which is set to 65536, ie above
the maximum defined in the driver: 65535 (ETH_MAX_MTU).

In fact, it's seems more consistent, wrt min_mtu, to set the max_mtu to
IP6_MAX_MTU (65535 + sizeof(struct ipv6hdr)) and use it by default.

Let's also, for consistency, set the mtu in vrf_setup(). This function
calls ether_setup(), which set the mtu to 1500. Thus, the whole mtu config
is done in the same function.

Before the patch:
$ ip link add blue type vrf table 1234
$ ip link list blue
9: blue: <NOARP,MASTER> mtu 65536 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether fa:f5:27:70:24:2a brd ff:ff:ff:ff:ff:ff
$ ip link set dev blue mtu 65535
$ ip link set dev blue mtu 65536
Error: mtu greater than device maximum.

Fixes: 5055376a3b44 ("net: vrf: Fix ping failed when vrf mtu is set to 0")
CC: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 503e2fd7ce518..28a6c4cfe9b8c 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1183,9 +1183,6 @@ static int vrf_dev_init(struct net_device *dev)
 
 	dev->flags = IFF_MASTER | IFF_NOARP;
 
-	/* MTU is irrelevant for VRF device; set to 64k similar to lo */
-	dev->mtu = 64 * 1024;
-
 	/* similarly, oper state is irrelevant; set to up to avoid confusion */
 	dev->operstate = IF_OPER_UP;
 	netdev_lockdep_set_classes(dev);
@@ -1685,7 +1682,8 @@ static void vrf_setup(struct net_device *dev)
 	 * which breaks networking.
 	 */
 	dev->min_mtu = IPV6_MIN_MTU;
-	dev->max_mtu = ETH_MAX_MTU;
+	dev->max_mtu = IP6_MAX_MTU;
+	dev->mtu = dev->max_mtu;
 }
 
 static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
-- 
GitLab


From edc0b0bccc9c80d9a44d3002dcca94984b25e7cf Mon Sep 17 00:00:00 2001
From: Mark Bloch <mbloch@nvidia.com>
Date: Mon, 7 Jun 2021 11:03:12 +0300
Subject: [PATCH 2489/3804] RDMA/mlx5: Block FDB rules when not in switchdev
 mode

Allow creating FDB steering rules only when in switchdev mode.

The only software model where a userspace application can manipulate
FDB entries is when it manages the eswitch. This is only possible in
switchdev mode where we expose a single RDMA device with representors
for all the vports that are connected to the eswitch.

Fixes: 52438be44112 ("RDMA/mlx5: Allow inserting a steering rule to the FDB")
Link: https://lore.kernel.org/r/e928ae7c58d07f104716a2a8d730963d1bd01204.1623052923.git.leonro@nvidia.com
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/fs.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 2fc6a60c4e775..f84441ff0c81a 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -2134,6 +2134,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
 	if (err)
 		goto end;
 
+	if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+	    mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
+		err = -EINVAL;
+		goto end;
+	}
+
 	uobj->object = obj;
 	obj->mdev = dev->mdev;
 	atomic_set(&obj->usecnt, 0);
-- 
GitLab


From ec35d1d93bf8976f0668cb1026ea8c7d7bcad3c1 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 8 Jun 2021 22:17:10 +0200
Subject: [PATCH 2490/3804] x86/setup: Document that Windows reserves the first
 MiB

It does so unconditionally too, on Intel and AMD machines, to work
around BIOS bugs, as confirmed by Microsoft folks (see Link for full
details).

Reflow the paragraph, while at it.

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/MWHPR21MB159330952629D36EEDE706B3D7379@MWHPR21MB1593.namprd21.prod.outlook.com
---
 arch/x86/kernel/setup.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 7638ac6c3d80a..85acd22f8022c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1060,17 +1060,18 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	/*
-	 * Find free memory for the real mode trampoline and place it
-	 * there.
-	 * If there is not enough free memory under 1M, on EFI-enabled
-	 * systems there will be additional attempt to reclaim the memory
-	 * for the real mode trampoline at efi_free_boot_services().
+	 * Find free memory for the real mode trampoline and place it there. If
+	 * there is not enough free memory under 1M, on EFI-enabled systems
+	 * there will be additional attempt to reclaim the memory for the real
+	 * mode trampoline at efi_free_boot_services().
 	 *
-	 * Unconditionally reserve the entire first 1M of RAM because
-	 * BIOSes are know to corrupt low memory and several
-	 * hundred kilobytes are not worth complex detection what memory gets
-	 * clobbered. Moreover, on machines with SandyBridge graphics or in
-	 * setups that use crashkernel the entire 1M is reserved anyway.
+	 * Unconditionally reserve the entire first 1M of RAM because BIOSes
+	 * are known to corrupt low memory and several hundred kilobytes are not
+	 * worth complex detection what memory gets clobbered. Windows does the
+	 * same thing for very similar reasons.
+	 *
+	 * Moreover, on machines with SandyBridge graphics or in setups that use
+	 * crashkernel the entire 1M is reserved anyway.
 	 */
 	reserve_real_mode();
 
-- 
GitLab


From 22a558f567ab40b6ea779d0f535d3e32c35c099a Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 8 Jun 2021 14:31:20 +0200
Subject: [PATCH 2491/3804] doc: Fix warning in
 Documentation/security/IMA-templates.rst

This patch fixes the warning:

Documentation/security/IMA-templates.rst:81: WARNING: Inline
substitution_reference start-string without end-string.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 Documentation/security/IMA-templates.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/security/IMA-templates.rst b/Documentation/security/IMA-templates.rst
index 5adc22f994960..1a91d92950a79 100644
--- a/Documentation/security/IMA-templates.rst
+++ b/Documentation/security/IMA-templates.rst
@@ -78,7 +78,7 @@ descriptors by adding their identifier to the format string
  - 'iuid': the inode UID;
  - 'igid': the inode GID;
  - 'imode': the inode mode;
- - 'xattrnames': a list of xattr names (separated by |), only if the xattr is
+ - 'xattrnames': a list of xattr names (separated by ``|``), only if the xattr is
     present;
  - 'xattrlengths': a list of xattr lengths (u32), only if the xattr is present;
  - 'xattrvalues': a list of xattr values;
-- 
GitLab


From 24c9ae23bdfa0642228e747849dd052fd4295c6c Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 8 Jun 2021 14:31:21 +0200
Subject: [PATCH 2492/3804] ima: Set correct casting types

The code expects that the values being parsed from a buffer when the
ima_canonical_fmt global variable is true are in little endian. Thus, this
patch sets the casting types accordingly.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_template.c     |  8 ++++----
 security/integrity/ima/ima_template_lib.c | 11 ++++++-----
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index a85963853a91a..694560396be05 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -423,9 +423,9 @@ int ima_restore_measurement_list(loff_t size, void *buf)
 		return 0;
 
 	if (ima_canonical_fmt) {
-		khdr->version = le16_to_cpu(khdr->version);
-		khdr->count = le64_to_cpu(khdr->count);
-		khdr->buffer_size = le64_to_cpu(khdr->buffer_size);
+		khdr->version = le16_to_cpu((__force __le16)khdr->version);
+		khdr->count = le64_to_cpu((__force __le64)khdr->count);
+		khdr->buffer_size = le64_to_cpu((__force __le64)khdr->buffer_size);
 	}
 
 	if (khdr->version != 1) {
@@ -515,7 +515,7 @@ int ima_restore_measurement_list(loff_t size, void *buf)
 		}
 
 		entry->pcr = !ima_canonical_fmt ? *(u32 *)(hdr[HDR_PCR].data) :
-			     le32_to_cpu(*(u32 *)(hdr[HDR_PCR].data));
+			     le32_to_cpu(*(__le32 *)(hdr[HDR_PCR].data));
 		ret = ima_restore_measurement_entry(entry);
 		if (ret < 0)
 			break;
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 518fd50ea48a9..3f8d53a036120 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -98,21 +98,21 @@ static void ima_show_template_data_ascii(struct seq_file *m,
 		case sizeof(u16):
 			if (ima_canonical_fmt)
 				seq_printf(m, "%u",
-					   le16_to_cpu(*(u16 *)buf_ptr));
+					   le16_to_cpu(*(__le16 *)buf_ptr));
 			else
 				seq_printf(m, "%u", *(u16 *)buf_ptr);
 			break;
 		case sizeof(u32):
 			if (ima_canonical_fmt)
 				seq_printf(m, "%u",
-					   le32_to_cpu(*(u32 *)buf_ptr));
+					   le32_to_cpu(*(__le32 *)buf_ptr));
 			else
 				seq_printf(m, "%u", *(u32 *)buf_ptr);
 			break;
 		case sizeof(u64):
 			if (ima_canonical_fmt)
 				seq_printf(m, "%llu",
-					   le64_to_cpu(*(u64 *)buf_ptr));
+					   le64_to_cpu(*(__le64 *)buf_ptr));
 			else
 				seq_printf(m, "%llu", *(u64 *)buf_ptr);
 			break;
@@ -226,9 +226,10 @@ int ima_parse_buf(void *bufstartp, void *bufendp, void **bufcurp,
 			if (bufp > (bufendp - sizeof(u32)))
 				break;
 
-			fields[i].len = *(u32 *)bufp;
 			if (ima_canonical_fmt)
-				fields[i].len = le32_to_cpu(fields[i].len);
+				fields[i].len = le32_to_cpu(*(__le32 *)bufp);
+			else
+				fields[i].len = *(u32 *)bufp;
 
 			bufp += sizeof(u32);
 		}
-- 
GitLab


From 6b26285f44c9306747c609cb304f787f1933594c Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 8 Jun 2021 14:31:22 +0200
Subject: [PATCH 2493/3804] ima/evm: Fix type mismatch

The endianness of a variable written to the measurement list cannot be
determined at compile time, as it depends on the value of the
ima_canonical_fmt global variable (set through a kernel option with the
same name if the machine is big endian).

If ima_canonical_fmt is false, the endianness of a variable is the same as
the machine; if ima_canonical_fmt is true, the endianness is little endian.
The warning arises due to this type of instruction:

var = cpu_to_leXX(var)

which tries to assign a value in little endian to a variable with native
endianness (little or big endian).

Given that the variables set with this instruction are not used in any
operation but just written to a buffer, it is safe to force the type of the
value being set to be the same of the type of the variable with:

var = (__force <var type>)cpu_to_leXX(var)

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/evm/evm_main.c         |  2 +-
 security/integrity/ima/ima_crypto.c       |  4 ++--
 security/integrity/ima/ima_fs.c           |  6 +++---
 security/integrity/ima/ima_template_lib.c | 11 ++++++-----
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 2c226e634ae97..977208aecd066 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -360,7 +360,7 @@ int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer,
 			size = sizeof(u32);
 			if (buffer) {
 				if (canonical_fmt)
-					rc = cpu_to_le32(rc);
+					rc = (__force int)cpu_to_le32(rc);
 
 				*(u32 *)(buffer + total_size) = rc;
 			}
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index f6a7e9643b546..a7206cc1d7d19 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -598,8 +598,8 @@ static int ima_calc_field_array_hash_tfm(struct ima_field_data *field_data,
 		u8 buffer[IMA_EVENT_NAME_LEN_MAX + 1] = { 0 };
 		u8 *data_to_hash = field_data[i].data;
 		u32 datalen = field_data[i].len;
-		u32 datalen_to_hash =
-		    !ima_canonical_fmt ? datalen : cpu_to_le32(datalen);
+		u32 datalen_to_hash = !ima_canonical_fmt ?
+				datalen : (__force u32)cpu_to_le32(datalen);
 
 		if (strcmp(td->name, IMA_TEMPLATE_IMA_NAME) != 0) {
 			rc = crypto_shash_update(shash,
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index ea8ff8a07b36b..3d8e9d5db5aa5 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -147,7 +147,7 @@ int ima_measurements_show(struct seq_file *m, void *v)
 	 * PCR used defaults to the same (config option) in
 	 * little-endian format, unless set in policy
 	 */
-	pcr = !ima_canonical_fmt ? e->pcr : cpu_to_le32(e->pcr);
+	pcr = !ima_canonical_fmt ? e->pcr : (__force u32)cpu_to_le32(e->pcr);
 	ima_putc(m, &pcr, sizeof(e->pcr));
 
 	/* 2nd: template digest */
@@ -155,7 +155,7 @@ int ima_measurements_show(struct seq_file *m, void *v)
 
 	/* 3rd: template name size */
 	namelen = !ima_canonical_fmt ? strlen(template_name) :
-		cpu_to_le32(strlen(template_name));
+		(__force u32)cpu_to_le32(strlen(template_name));
 	ima_putc(m, &namelen, sizeof(namelen));
 
 	/* 4th:  template name */
@@ -167,7 +167,7 @@ int ima_measurements_show(struct seq_file *m, void *v)
 
 	if (!is_ima_template) {
 		template_data_len = !ima_canonical_fmt ? e->template_data_len :
-			cpu_to_le32(e->template_data_len);
+			(__force u32)cpu_to_le32(e->template_data_len);
 		ima_putc(m, &template_data_len, sizeof(e->template_data_len));
 	}
 
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 3f8d53a036120..8e2a121af5e11 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -133,7 +133,8 @@ static void ima_show_template_data_binary(struct seq_file *m,
 	    strlen(field_data->data) : field_data->len;
 
 	if (show != IMA_SHOW_BINARY_NO_FIELD_LEN) {
-		u32 field_len = !ima_canonical_fmt ? len : cpu_to_le32(len);
+		u32 field_len = !ima_canonical_fmt ?
+				len : (__force u32)cpu_to_le32(len);
 
 		ima_putc(m, &field_len, sizeof(field_len));
 	}
@@ -570,9 +571,9 @@ static int ima_eventinodedac_init_common(struct ima_event_data *event_data,
 
 	if (ima_canonical_fmt) {
 		if (sizeof(id) == sizeof(u16))
-			id = cpu_to_le16(id);
+			id = (__force u16)cpu_to_le16(id);
 		else
-			id = cpu_to_le32(id);
+			id = (__force u32)cpu_to_le32(id);
 	}
 
 	return ima_write_template_field_data((void *)&id, sizeof(id),
@@ -607,7 +608,7 @@ int ima_eventinodemode_init(struct ima_event_data *event_data,
 			    struct ima_field_data *field_data)
 {
 	struct inode *inode;
-	umode_t mode;
+	u16 mode;
 
 	if (!event_data->file)
 		return 0;
@@ -615,7 +616,7 @@ int ima_eventinodemode_init(struct ima_event_data *event_data,
 	inode = file_inode(event_data->file);
 	mode = inode->i_mode;
 	if (ima_canonical_fmt)
-		mode = cpu_to_le16(mode);
+		mode = (__force u16)cpu_to_le16(mode);
 
 	return ima_write_template_field_data((char *)&mode, sizeof(mode),
 					     DATA_FMT_UINT, field_data);
-- 
GitLab


From 8c559415f66a42721fcfdf321cb7a58df01a4c74 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 8 Jun 2021 14:31:23 +0200
Subject: [PATCH 2494/3804] ima: Include header defining
 ima_post_key_create_or_update()

This patch fixes the sparse warning for ima_post_key_create_or_update() by
adding the header file that defines the prototype (linux/ima.h).

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_asymmetric_keys.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/integrity/ima/ima_asymmetric_keys.c b/security/integrity/ima/ima_asymmetric_keys.c
index 1fb0b0e09559b..c985418698a44 100644
--- a/security/integrity/ima/ima_asymmetric_keys.c
+++ b/security/integrity/ima/ima_asymmetric_keys.c
@@ -11,6 +11,7 @@
 
 #include <keys/asymmetric-type.h>
 #include <linux/user_namespace.h>
+#include <linux/ima.h>
 #include "ima.h"
 
 /**
-- 
GitLab


From 531bf6a88d9bd6c13d4fc3f05d2de799d627de3b Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 8 Jun 2021 14:31:24 +0200
Subject: [PATCH 2495/3804] ima: Pass NULL instead of 0 to ima_get_action() in
 ima_file_mprotect()

This patch fixes the sparse warning:

sparse: warning: Using plain integer as NULL pointer

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 906c1d8e0b71c..287b905090066 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -433,7 +433,7 @@ int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot)
 	inode = file_inode(vma->vm_file);
 	action = ima_get_action(file_mnt_user_ns(vma->vm_file), inode,
 				current_cred(), secid, MAY_EXEC, MMAP_CHECK,
-				&pcr, &template, 0);
+				&pcr, &template, NULL);
 
 	/* Is the mmap'ed file in policy? */
 	if (!(action & (IMA_MEASURE | IMA_APPRAISE_SUBMASK)))
-- 
GitLab


From e8ba0b2b64126381643bb50df3556b139a60545a Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 11:42:16 +0800
Subject: [PATCH 2496/3804] tools/bootconfig: Fix error return code in
 apply_xbc()

Fix to return a negative error code from the error handling case instead
of 0, as done elsewhere in this function.

Link: https://lkml.kernel.org/r/20210508034216.2277-1-thunder.leizhen@huawei.com

Fixes: a995e6bc0524 ("tools/bootconfig: Fix to check the write failure correctly")
Reported-by: Hulk Robot <hulkci@huawei.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 7362bef1a3683..6cd6080cac04c 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -399,6 +399,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	}
 	/* TODO: Ensure the @path is initramfs/initrd image */
 	if (fstat(fd, &stat) < 0) {
+		ret = -errno;
 		pr_err("Failed to get the size of %s\n", path);
 		goto out;
 	}
-- 
GitLab


From 824afd55e95c3cb12c55d297a0ae408be1779cc8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 13 May 2021 12:06:33 +0900
Subject: [PATCH 2497/3804] tools/bootconfig: Fix a build error accroding to
 undefined fallthrough

Since the "fallthrough" is defined only in the kernel, building
lib/bootconfig.c as a part of user-space tools causes a build
error.

Add a dummy fallthrough to avoid the build error.

Link: https://lkml.kernel.org/r/162087519356.442660.11385099982318160180.stgit@devnote2

Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Fixes: 4c1ca831adb1 ("Revert "lib: Revert use of fallthrough pseudo-keyword in lib/"")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/include/linux/bootconfig.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h
index 078cbd2ba651d..de7f30f99af38 100644
--- a/tools/bootconfig/include/linux/bootconfig.h
+++ b/tools/bootconfig/include/linux/bootconfig.h
@@ -4,4 +4,8 @@
 
 #include "../../../../include/linux/bootconfig.h"
 
+#ifndef fallthrough
+# define fallthrough
+#endif
+
 #endif
-- 
GitLab


From 6c14133d2d3f768e0a35128faac8aa6ed4815051 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 7 Jun 2021 21:39:08 -0400
Subject: [PATCH 2498/3804] ftrace: Do not blindly read the ip address in
 ftrace_bug()

It was reported that a bug on arm64 caused a bad ip address to be used for
updating into a nop in ftrace_init(), but the error path (rightfully)
returned -EINVAL and not -EFAULT, as the bug caused more than one error to
occur. But because -EINVAL was returned, the ftrace_bug() tried to report
what was at the location of the ip address, and read it directly. This
caused the machine to panic, as the ip was not pointing to a valid memory
address.

Instead, read the ip address with copy_from_kernel_nofault() to safely
access the memory, and if it faults, report that the address faulted,
otherwise report what was in that location.

Link: https://lore.kernel.org/lkml/20210607032329.28671-1-mark-pk.tsai@mediatek.com/

Cc: stable@vger.kernel.org
Fixes: 05736a427f7e1 ("ftrace: warn on failure to disable mcount callers")
Reported-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
Tested-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2e8a3fde71044..72ef4dccbcc47 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1967,12 +1967,18 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
 
 static void print_ip_ins(const char *fmt, const unsigned char *p)
 {
+	char ins[MCOUNT_INSN_SIZE];
 	int i;
 
+	if (copy_from_kernel_nofault(ins, p, MCOUNT_INSN_SIZE)) {
+		printk(KERN_CONT "%s[FAULT] %px\n", fmt, p);
+		return;
+	}
+
 	printk(KERN_CONT "%s", fmt);
 
 	for (i = 0; i < MCOUNT_INSN_SIZE; i++)
-		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+		printk(KERN_CONT "%s%02x", i ? ":" : "", ins[i]);
 }
 
 enum ftrace_bug_type ftrace_bug_type;
-- 
GitLab


From 3e08a9f9760f4a70d633c328a76408e62d6f80a3 Mon Sep 17 00:00:00 2001
From: Liangyan <liangyan.peng@linux.alibaba.com>
Date: Mon, 7 Jun 2021 20:57:34 +0800
Subject: [PATCH 2499/3804] tracing: Correct the length check which causes
 memory corruption

We've suffered from severe kernel crashes due to memory corruption on
our production environment, like,

Call Trace:
[1640542.554277] general protection fault: 0000 [#1] SMP PTI
[1640542.554856] CPU: 17 PID: 26996 Comm: python Kdump: loaded Tainted:G
[1640542.556629] RIP: 0010:kmem_cache_alloc+0x90/0x190
[1640542.559074] RSP: 0018:ffffb16faa597df8 EFLAGS: 00010286
[1640542.559587] RAX: 0000000000000000 RBX: 0000000000400200 RCX:
0000000006e931bf
[1640542.560323] RDX: 0000000006e931be RSI: 0000000000400200 RDI:
ffff9a45ff004300
[1640542.560996] RBP: 0000000000400200 R08: 0000000000023420 R09:
0000000000000000
[1640542.561670] R10: 0000000000000000 R11: 0000000000000000 R12:
ffffffff9a20608d
[1640542.562366] R13: ffff9a45ff004300 R14: ffff9a45ff004300 R15:
696c662f65636976
[1640542.563128] FS:  00007f45d7c6f740(0000) GS:ffff9a45ff840000(0000)
knlGS:0000000000000000
[1640542.563937] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[1640542.564557] CR2: 00007f45d71311a0 CR3: 000000189d63e004 CR4:
00000000003606e0
[1640542.565279] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[1640542.566069] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400
[1640542.566742] Call Trace:
[1640542.567009]  anon_vma_clone+0x5d/0x170
[1640542.567417]  __split_vma+0x91/0x1a0
[1640542.567777]  do_munmap+0x2c6/0x320
[1640542.568128]  vm_munmap+0x54/0x70
[1640542.569990]  __x64_sys_munmap+0x22/0x30
[1640542.572005]  do_syscall_64+0x5b/0x1b0
[1640542.573724]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[1640542.575642] RIP: 0033:0x7f45d6e61e27

James Wang has reproduced it stably on the latest 4.19 LTS.
After some debugging, we finally proved that it's due to ftrace
buffer out-of-bound access using a debug tool as follows:
[   86.775200] BUG: Out-of-bounds write at addr 0xffff88aefe8b7000
[   86.780806]  no_context+0xdf/0x3c0
[   86.784327]  __do_page_fault+0x252/0x470
[   86.788367]  do_page_fault+0x32/0x140
[   86.792145]  page_fault+0x1e/0x30
[   86.795576]  strncpy_from_unsafe+0x66/0xb0
[   86.799789]  fetch_memory_string+0x25/0x40
[   86.804002]  fetch_deref_string+0x51/0x60
[   86.808134]  kprobe_trace_func+0x32d/0x3a0
[   86.812347]  kprobe_dispatcher+0x45/0x50
[   86.816385]  kprobe_ftrace_handler+0x90/0xf0
[   86.820779]  ftrace_ops_assist_func+0xa1/0x140
[   86.825340]  0xffffffffc00750bf
[   86.828603]  do_sys_open+0x5/0x1f0
[   86.832124]  do_syscall_64+0x5b/0x1b0
[   86.835900]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

commit b220c049d519 ("tracing: Check length before giving out
the filter buffer") adds length check to protect trace data
overflow introduced in 0fc1b09ff1ff, seems that this fix can't prevent
overflow entirely, the length check should also take the sizeof
entry->array[0] into account, since this array[0] is filled the
length of trace data and occupy addtional space and risk overflow.

Link: https://lkml.kernel.org/r/20210607125734.1770447-1-liangyan.peng@linux.alibaba.com

Cc: stable@vger.kernel.org
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Xunlei Pang <xlpang@linux.alibaba.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fixes: b220c049d519 ("tracing: Check length before giving out the filter buffer")
Reviewed-by: Xunlei Pang <xlpang@linux.alibaba.com>
Reviewed-by: yinbinbin <yinbinbin@alibabacloud.com>
Reviewed-by: Wetp Zhang <wetp.zy@linux.alibaba.com>
Tested-by: James Wang <jnwang@linux.alibaba.com>
Signed-off-by: Liangyan <liangyan.peng@linux.alibaba.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a21ef9cd2aae2..9299057feb56f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2736,7 +2736,7 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
 	    (entry = this_cpu_read(trace_buffered_event))) {
 		/* Try to use the per cpu buffer first */
 		val = this_cpu_inc_return(trace_buffered_event_cnt);
-		if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
+		if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
 			trace_event_setup(entry, type, trace_ctx);
 			entry->array[0] = len;
 			return entry;
-- 
GitLab


From 1616a4c2ab1a80893b6890ae93da40a2b1d0c691 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 7 Jun 2021 20:50:51 +0800
Subject: [PATCH 2500/3804] bcache: remove bcache device self-defined readahead

For read cache missing, bcache defines a readahead size for the read I/O
request to the backing device for the missing data. This readahead size
is initialized to 0, and almost no one uses it to avoid unnecessary read
amplifying onto backing device and write amplifying onto cache device.
Considering upper layer file system code has readahead logic allready
and works fine with readahead_cache_policy sysfile interface, we don't
have to keep bcache self-defined readahead anymore.

This patch removes the bcache self-defined readahead for cache missing
request for backing device, and the readahead sysfs file interfaces are
removed as well.

This is the preparation for next patch to fix potential kernel panic due
to oversized request in a simpler method.

Reported-by: Alexander Ullrich <ealex1979@gmail.com>
Reported-by: Diego Ercolani <diego.ercolani@gmail.com>
Reported-by: Jan Szubiak <jan.szubiak@linuxpolska.pl>
Reported-by: Marco Rebhan <me@dblsaiko.net>
Reported-by: Matthias Ferdinand <bcache@mfedv.net>
Reported-by: Victor Westerhuis <victor@westerhu.is>
Reported-by: Vojtech Pavlik <vojtech@suse.cz>
Reported-and-tested-by: Rolf Fokkens <rolf@rolffokkens.nl>
Reported-and-tested-by: Thorsten Knabe <linux@thorsten-knabe.de>
Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Nix <nix@esperi.org.uk>
Cc: Takashi Iwai <tiwai@suse.com>
Link: https://lore.kernel.org/r/20210607125052.21277-2-colyli@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/bcache.h  |  1 -
 drivers/md/bcache/request.c | 13 +------------
 drivers/md/bcache/stats.c   | 14 --------------
 drivers/md/bcache/stats.h   |  1 -
 drivers/md/bcache/sysfs.c   |  4 ----
 5 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 0a4551e165abf..5fc989a6d4528 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -364,7 +364,6 @@ struct cached_dev {
 
 	/* The rest of this all shows up in sysfs */
 	unsigned int		sequential_cutoff;
-	unsigned int		readahead;
 
 	unsigned int		io_disable:1;
 	unsigned int		verify:1;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 29c231758293e..ab8ff18df32af 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -880,7 +880,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 				 struct bio *bio, unsigned int sectors)
 {
 	int ret = MAP_CONTINUE;
-	unsigned int reada = 0;
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 	struct bio *miss, *cache_bio;
 
@@ -892,14 +891,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 		goto out_submit;
 	}
 
-	if (!(bio->bi_opf & REQ_RAHEAD) &&
-	    !(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
-	    s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA)
-		reada = min_t(sector_t, dc->readahead >> 9,
-			      get_capacity(bio->bi_bdev->bd_disk) -
-			      bio_end_sector(bio));
-
-	s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
+	s->insert_bio_sectors = min(sectors, bio_sectors(bio));
 
 	s->iop.replace_key = KEY(s->iop.inode,
 				 bio->bi_iter.bi_sector + s->insert_bio_sectors,
@@ -933,9 +925,6 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 	if (bch_bio_alloc_pages(cache_bio, __GFP_NOWARN|GFP_NOIO))
 		goto out_put;
 
-	if (reada)
-		bch_mark_cache_readahead(s->iop.c, s->d);
-
 	s->cache_miss	= miss;
 	s->iop.bio	= cache_bio;
 	bio_get(cache_bio);
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index 503aafe188dce..4c7ee5fedb9dc 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -46,7 +46,6 @@ read_attribute(cache_misses);
 read_attribute(cache_bypass_hits);
 read_attribute(cache_bypass_misses);
 read_attribute(cache_hit_ratio);
-read_attribute(cache_readaheads);
 read_attribute(cache_miss_collisions);
 read_attribute(bypassed);
 
@@ -64,7 +63,6 @@ SHOW(bch_stats)
 		    DIV_SAFE(var(cache_hits) * 100,
 			     var(cache_hits) + var(cache_misses)));
 
-	var_print(cache_readaheads);
 	var_print(cache_miss_collisions);
 	sysfs_hprint(bypassed,	var(sectors_bypassed) << 9);
 #undef var
@@ -86,7 +84,6 @@ static struct attribute *bch_stats_files[] = {
 	&sysfs_cache_bypass_hits,
 	&sysfs_cache_bypass_misses,
 	&sysfs_cache_hit_ratio,
-	&sysfs_cache_readaheads,
 	&sysfs_cache_miss_collisions,
 	&sysfs_bypassed,
 	NULL
@@ -113,7 +110,6 @@ void bch_cache_accounting_clear(struct cache_accounting *acc)
 	acc->total.cache_misses = 0;
 	acc->total.cache_bypass_hits = 0;
 	acc->total.cache_bypass_misses = 0;
-	acc->total.cache_readaheads = 0;
 	acc->total.cache_miss_collisions = 0;
 	acc->total.sectors_bypassed = 0;
 }
@@ -145,7 +141,6 @@ static void scale_stats(struct cache_stats *stats, unsigned long rescale_at)
 		scale_stat(&stats->cache_misses);
 		scale_stat(&stats->cache_bypass_hits);
 		scale_stat(&stats->cache_bypass_misses);
-		scale_stat(&stats->cache_readaheads);
 		scale_stat(&stats->cache_miss_collisions);
 		scale_stat(&stats->sectors_bypassed);
 	}
@@ -168,7 +163,6 @@ static void scale_accounting(struct timer_list *t)
 	move_stat(cache_misses);
 	move_stat(cache_bypass_hits);
 	move_stat(cache_bypass_misses);
-	move_stat(cache_readaheads);
 	move_stat(cache_miss_collisions);
 	move_stat(sectors_bypassed);
 
@@ -209,14 +203,6 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
 	mark_cache_stats(&c->accounting.collector, hit, bypass);
 }
 
-void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
-{
-	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
-
-	atomic_inc(&dc->accounting.collector.cache_readaheads);
-	atomic_inc(&c->accounting.collector.cache_readaheads);
-}
-
 void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d)
 {
 	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h
index abfaabf7e7fcf..ca4f435f7216a 100644
--- a/drivers/md/bcache/stats.h
+++ b/drivers/md/bcache/stats.h
@@ -7,7 +7,6 @@ struct cache_stat_collector {
 	atomic_t cache_misses;
 	atomic_t cache_bypass_hits;
 	atomic_t cache_bypass_misses;
-	atomic_t cache_readaheads;
 	atomic_t cache_miss_collisions;
 	atomic_t sectors_bypassed;
 };
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index cc89f3156d1aa..05ac1d6fbbf35 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -137,7 +137,6 @@ rw_attribute(io_disable);
 rw_attribute(discard);
 rw_attribute(running);
 rw_attribute(label);
-rw_attribute(readahead);
 rw_attribute(errors);
 rw_attribute(io_error_limit);
 rw_attribute(io_error_halflife);
@@ -260,7 +259,6 @@ SHOW(__bch_cached_dev)
 	var_printf(partial_stripes_expensive,	"%u");
 
 	var_hprint(sequential_cutoff);
-	var_hprint(readahead);
 
 	sysfs_print(running,		atomic_read(&dc->running));
 	sysfs_print(state,		states[BDEV_STATE(&dc->sb)]);
@@ -365,7 +363,6 @@ STORE(__cached_dev)
 	sysfs_strtoul_clamp(sequential_cutoff,
 			    dc->sequential_cutoff,
 			    0, UINT_MAX);
-	d_strtoi_h(readahead);
 
 	if (attr == &sysfs_clear_stats)
 		bch_cache_accounting_clear(&dc->accounting);
@@ -538,7 +535,6 @@ static struct attribute *bch_cached_dev_files[] = {
 	&sysfs_running,
 	&sysfs_state,
 	&sysfs_label,
-	&sysfs_readahead,
 #ifdef CONFIG_BCACHE_DEBUG
 	&sysfs_verify,
 	&sysfs_bypass_torture_test,
-- 
GitLab


From 41fe8d088e96472f63164e213de44ec77be69478 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Mon, 7 Jun 2021 20:50:52 +0800
Subject: [PATCH 2501/3804] bcache: avoid oversized read request in cache
 missing code path

In the cache missing code path of cached device, if a proper location
from the internal B+ tree is matched for a cache miss range, function
cached_dev_cache_miss() will be called in cache_lookup_fn() in the
following code block,
[code block 1]
  526         unsigned int sectors = KEY_INODE(k) == s->iop.inode
  527                 ? min_t(uint64_t, INT_MAX,
  528                         KEY_START(k) - bio->bi_iter.bi_sector)
  529                 : INT_MAX;
  530         int ret = s->d->cache_miss(b, s, bio, sectors);

Here s->d->cache_miss() is the call backfunction pointer initialized as
cached_dev_cache_miss(), the last parameter 'sectors' is an important
hint to calculate the size of read request to backing device of the
missing cache data.

Current calculation in above code block may generate oversized value of
'sectors', which consequently may trigger 2 different potential kernel
panics by BUG() or BUG_ON() as listed below,

1) BUG_ON() inside bch_btree_insert_key(),
[code block 2]
   886         BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
2) BUG() inside biovec_slab(),
[code block 3]
   51         default:
   52                 BUG();
   53                 return NULL;

All the above panics are original from cached_dev_cache_miss() by the
oversized parameter 'sectors'.

Inside cached_dev_cache_miss(), parameter 'sectors' is used to calculate
the size of data read from backing device for the cache missing. This
size is stored in s->insert_bio_sectors by the following lines of code,
[code block 4]
  909    s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);

Then the actual key inserting to the internal B+ tree is generated and
stored in s->iop.replace_key by the following lines of code,
[code block 5]
  911   s->iop.replace_key = KEY(s->iop.inode,
  912                    bio->bi_iter.bi_sector + s->insert_bio_sectors,
  913                    s->insert_bio_sectors);
The oversized parameter 'sectors' may trigger panic 1) by BUG_ON() from
the above code block.

And the bio sending to backing device for the missing data is allocated
with hint from s->insert_bio_sectors by the following lines of code,
[code block 6]
  926    cache_bio = bio_alloc_bioset(GFP_NOWAIT,
  927                 DIV_ROUND_UP(s->insert_bio_sectors, PAGE_SECTORS),
  928                 &dc->disk.bio_split);
The oversized parameter 'sectors' may trigger panic 2) by BUG() from the
agove code block.

Now let me explain how the panics happen with the oversized 'sectors'.
In code block 5, replace_key is generated by macro KEY(). From the
definition of macro KEY(),
[code block 7]
  71 #define KEY(inode, offset, size)                                  \
  72 ((struct bkey) {                                                  \
  73      .high = (1ULL << 63) | ((__u64) (size) << 20) | (inode),     \
  74      .low = (offset)                                              \
  75 })

Here 'size' is 16bits width embedded in 64bits member 'high' of struct
bkey. But in code block 1, if "KEY_START(k) - bio->bi_iter.bi_sector" is
very probably to be larger than (1<<16) - 1, which makes the bkey size
calculation in code block 5 is overflowed. In one bug report the value
of parameter 'sectors' is 131072 (= 1 << 17), the overflowed 'sectors'
results the overflowed s->insert_bio_sectors in code block 4, then makes
size field of s->iop.replace_key to be 0 in code block 5. Then the 0-
sized s->iop.replace_key is inserted into the internal B+ tree as cache
missing check key (a special key to detect and avoid a racing between
normal write request and cache missing read request) as,
[code block 8]
  915   ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key);

Then the 0-sized s->iop.replace_key as 3rd parameter triggers the bkey
size check BUG_ON() in code block 2, and causes the kernel panic 1).

Another kernel panic is from code block 6, is by the bvecs number
oversized value s->insert_bio_sectors from code block 4,
        min(sectors, bio_sectors(bio) + reada)
There are two possibility for oversized reresult,
- bio_sectors(bio) is valid, but bio_sectors(bio) + reada is oversized.
- sectors < bio_sectors(bio) + reada, but sectors is oversized.

From a bug report the result of "DIV_ROUND_UP(s->insert_bio_sectors,
PAGE_SECTORS)" from code block 6 can be 344, 282, 946, 342 and many
other values which larther than BIO_MAX_VECS (a.k.a 256). When calling
bio_alloc_bioset() with such larger-than-256 value as the 2nd parameter,
this value will eventually be sent to biovec_slab() as parameter
'nr_vecs' in following code path,
   bio_alloc_bioset() ==> bvec_alloc() ==> biovec_slab()
Because parameter 'nr_vecs' is larger-than-256 value, the panic by BUG()
in code block 3 is triggered inside biovec_slab().

From the above analysis, we know that the 4th parameter 'sector' sent
into cached_dev_cache_miss() may cause overflow in code block 5 and 6,
and finally cause kernel panic in code block 2 and 3. And if result of
bio_sectors(bio) + reada exceeds valid bvecs number, it may also trigger
kernel panic in code block 3 from code block 6.

Now the almost-useless readahead size for cache missing request back to
backing device is removed, this patch can fix the oversized issue with
more simpler method.
- add a local variable size_limit,  set it by the minimum value from
  the max bkey size and max bio bvecs number.
- set s->insert_bio_sectors by the minimum value from size_limit,
  sectors, and the sectors size of bio.
- replace sectors by s->insert_bio_sectors to do bio_next_split.

By the above method with size_limit, s->insert_bio_sectors will never
result oversized replace_key size or bio bvecs number. And split bio
'miss' from bio_next_split() will always match the size of 'cache_bio',
that is the current maximum bio size we can sent to backing device for
fetching the cache missing data.

Current problmatic code can be partially found since Linux v3.13-rc1,
therefore all maintained stable kernels should try to apply this fix.

Reported-by: Alexander Ullrich <ealex1979@gmail.com>
Reported-by: Diego Ercolani <diego.ercolani@gmail.com>
Reported-by: Jan Szubiak <jan.szubiak@linuxpolska.pl>
Reported-by: Marco Rebhan <me@dblsaiko.net>
Reported-by: Matthias Ferdinand <bcache@mfedv.net>
Reported-by: Victor Westerhuis <victor@westerhu.is>
Reported-by: Vojtech Pavlik <vojtech@suse.cz>
Reported-and-tested-by: Rolf Fokkens <rolf@rolffokkens.nl>
Reported-and-tested-by: Thorsten Knabe <linux@thorsten-knabe.de>
Signed-off-by: Coly Li <colyli@suse.de>
Cc: stable@vger.kernel.org
Cc: Christoph Hellwig <hch@lst.de>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Nix <nix@esperi.org.uk>
Cc: Takashi Iwai <tiwai@suse.com>
Link: https://lore.kernel.org/r/20210607125052.21277-3-colyli@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/request.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ab8ff18df32af..6d1de889baeb1 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -882,6 +882,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 	int ret = MAP_CONTINUE;
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 	struct bio *miss, *cache_bio;
+	unsigned int size_limit;
 
 	s->cache_missed = 1;
 
@@ -891,7 +892,10 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 		goto out_submit;
 	}
 
-	s->insert_bio_sectors = min(sectors, bio_sectors(bio));
+	/* Limitation for valid replace key size and cache_bio bvecs number */
+	size_limit = min_t(unsigned int, BIO_MAX_VECS * PAGE_SECTORS,
+			   (1 << KEY_SIZE_BITS) - 1);
+	s->insert_bio_sectors = min3(size_limit, sectors, bio_sectors(bio));
 
 	s->iop.replace_key = KEY(s->iop.inode,
 				 bio->bi_iter.bi_sector + s->insert_bio_sectors,
@@ -903,7 +907,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
 
 	s->iop.replace = true;
 
-	miss = bio_next_split(bio, sectors, GFP_NOIO, &s->d->bio_split);
+	miss = bio_next_split(bio, s->insert_bio_sectors, GFP_NOIO,
+			      &s->d->bio_split);
 
 	/* btree_search_recurse()'s btree iterator is no good anymore */
 	ret = miss == bio ? MAP_DONE : -EINTR;
-- 
GitLab


From b53e84eed08b88fd3ff59e5c2a7f1a69d4004e32 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@linux.alibaba.com>
Date: Tue, 1 Jun 2021 01:22:56 +0800
Subject: [PATCH 2502/3804] KVM: x86: Unload MMU on guest TLB flush if TDP
 disabled to force MMU sync

When using shadow paging, unload the guest MMU when emulating a guest TLB
flush to ensure all roots are synchronized.  From the guest's perspective,
flushing the TLB ensures any and all modifications to its PTEs will be
recognized by the CPU.

Note, unloading the MMU is overkill, but is done to mirror KVM's existing
handling of INVPCID(all) and ensure the bug is squashed.  Future cleanup
can be done to more precisely synchronize roots when servicing a guest
TLB flush.

If TDP is enabled, synchronizing the MMU is unnecessary even if nested
TDP is in play, as a "legacy" TLB flush from L1 does not invalidate L1's
TDP mappings.  For EPT, an explicit INVEPT is required to invalidate
guest-physical mappings; for NPT, guest mappings are always tagged with
an ASID and thus can only be invalidated via the VMCB's ASID control.

This bug has existed since the introduction of KVM_VCPU_FLUSH_TLB.
It was only recently exposed after Linux guests stopped flushing the
local CPU's TLB prior to flushing remote TLBs (see commit 4ce94eabac16,
"x86/mm/tlb: Flush remote and local TLBs concurrently"), but is also
visible in Windows 10 guests.

Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Fixes: f38a7b75267f ("KVM: X86: support paravirtualized help for TLB shootdowns")
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
[sean: massaged comment and changelog]
Message-Id: <20210531172256.2908-1-jiangshanlai@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e2144eedaf799..9dd23bdfc6cc1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3072,6 +3072,19 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
 static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.tlb_flush;
+
+	if (!tdp_enabled) {
+               /*
+		 * A TLB flush on behalf of the guest is equivalent to
+		 * INVPCID(all), toggling CR4.PGE, etc., which requires
+		 * a forced sync of the shadow page tables.  Unload the
+		 * entire MMU here and the subsequent load will sync the
+		 * shadow page tables, and also flush the TLB.
+		 */
+		kvm_mmu_unload(vcpu);
+		return;
+	}
+
 	static_call(kvm_x86_tlb_flush_guest)(vcpu);
 }
 
-- 
GitLab


From da27a83fd6cc7780fea190e1f5c19e87019da65c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 8 Jun 2021 15:31:42 -0400
Subject: [PATCH 2503/3804] kvm: avoid speculation-based attacks from
 out-of-range memslot accesses

KVM's mechanism for accessing guest memory translates a guest physical
address (gpa) to a host virtual address using the right-shifted gpa
(also known as gfn) and a struct kvm_memory_slot.  The translation is
performed in __gfn_to_hva_memslot using the following formula:

      hva = slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE

It is expected that gfn falls within the boundaries of the guest's
physical memory.  However, a guest can access invalid physical addresses
in such a way that the gfn is invalid.

__gfn_to_hva_memslot is called from kvm_vcpu_gfn_to_hva_prot, which first
retrieves a memslot through __gfn_to_memslot.  While __gfn_to_memslot
does check that the gfn falls within the boundaries of the guest's
physical memory or not, a CPU can speculate the result of the check and
continue execution speculatively using an illegal gfn. The speculation
can result in calculating an out-of-bounds hva.  If the resulting host
virtual address is used to load another guest physical address, this
is effectively a Spectre gadget consisting of two consecutive reads,
the second of which is data dependent on the first.

Right now it's not clear if there are any cases in which this is
exploitable.  One interesting case was reported by the original author
of this patch, and involves visiting guest page tables on x86.  Right
now these are not vulnerable because the hva read goes through get_user(),
which contains an LFENCE speculation barrier.  However, there are
patches in progress for x86 uaccess.h to mask kernel addresses instead of
using LFENCE; once these land, a guest could use speculation to read
from the VMM's ring 3 address space.  Other architectures such as ARM
already use the address masking method, and would be susceptible to
this same kind of data-dependent access gadgets.  Therefore, this patch
proactively protects from these attacks by masking out-of-bounds gfns
in __gfn_to_hva_memslot, which blocks speculation of invalid hvas.

Sean Christopherson noted that this patch does not cover
kvm_read_guest_offset_cached.  This however is limited to a few bytes
past the end of the cache, and therefore it is unlikely to be useful in
the context of building a chain of data dependent accesses.

Reported-by: Artemiy Margaritov <artemiy.margaritov@gmail.com>
Co-developed-by: Artemiy Margaritov <artemiy.margaritov@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 76102efbf0796..74995f0a2a3cb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1185,7 +1185,15 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
 static inline unsigned long
 __gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
-	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
+	/*
+	 * The index was checked originally in search_memslots.  To avoid
+	 * that a malicious guest builds a Spectre gadget out of e.g. page
+	 * table walks, do not let the processor speculate loads outside
+	 * the guest's registered memslots.
+	 */
+	unsigned long offset = array_index_nospec(gfn - slot->base_gfn,
+						  slot->npages);
+	return slot->userspace_addr + offset * PAGE_SIZE;
 }
 
 static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
-- 
GitLab


From 49bfcbfd989a8f1f23e705759a6bb099de2cff9f Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Tue, 8 Jun 2021 11:06:41 +0300
Subject: [PATCH 2504/3804] net: rds: fix memory leak in rds_recvmsg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Syzbot reported memory leak in rds. The problem
was in unputted refcount in case of error.

int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
		int msg_flags)
{
...

	if (!rds_next_incoming(rs, &inc)) {
		...
	}

After this "if" inc refcount incremented and

	if (rds_cmsg_recv(inc, msg, rs)) {
		ret = -EFAULT;
		goto out;
	}
...
out:
	return ret;
}

in case of rds_cmsg_recv() fail the refcount won't be
decremented. And it's easy to see from ftrace log, that
rds_inc_addref() don't have rds_inc_put() pair in
rds_recvmsg() after rds_cmsg_recv()

 1)               |  rds_recvmsg() {
 1)   3.721 us    |    rds_inc_addref();
 1)   3.853 us    |    rds_message_inc_copy_to_user();
 1) + 10.395 us   |    rds_cmsg_recv();
 1) + 34.260 us   |  }

Fixes: bdbe6fbc6a2f ("RDS: recv.c")
Reported-and-tested-by: syzbot+5134cdf021c4ed5aaa5f@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/recv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rds/recv.c b/net/rds/recv.c
index 4db109fb6ec28..5b426dc3634d1 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -714,7 +714,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 		if (rds_cmsg_recv(inc, msg, rs)) {
 			ret = -EFAULT;
-			goto out;
+			break;
 		}
 		rds_recvmsg_zcookie(rs, msg);
 
-- 
GitLab


From 1650bdb1c516c248fb06f6d076559ff6437a5853 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 8 Jun 2021 14:15:35 +0300
Subject: [PATCH 2505/3804] net: dsa: felix: re-enable TX flow control in
 ocelot_port_flush()

Because flow control is set up statically in ocelot_init_port(), and not
in phylink_mac_link_up(), what happens is that after the blamed commit,
the flow control remains disabled after the port flushing procedure.

Fixes: eb4733d7cffc ("net: dsa: felix: implement port flushing on .phylink_mac_link_down")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 0c4283319d7f4..adfb9781799ee 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -379,6 +379,7 @@ static u32 ocelot_read_eq_avail(struct ocelot *ocelot, int port)
 
 int ocelot_port_flush(struct ocelot *ocelot, int port)
 {
+	unsigned int pause_ena;
 	int err, val;
 
 	/* Disable dequeuing from the egress queues */
@@ -387,6 +388,7 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
 		       QSYS_PORT_MODE, port);
 
 	/* Disable flow control */
+	ocelot_fields_read(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, &pause_ena);
 	ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0);
 
 	/* Disable priority flow control */
@@ -422,6 +424,9 @@ int ocelot_port_flush(struct ocelot *ocelot, int port)
 	/* Clear flushing again. */
 	ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port);
 
+	/* Re-enable flow control */
+	ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, pause_ena);
+
 	return err;
 }
 EXPORT_SYMBOL(ocelot_port_flush);
-- 
GitLab


From 504fd6a5390c30b1b7670768e314dd5d473da06a Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Tue, 8 Jun 2021 19:42:54 +0300
Subject: [PATCH 2506/3804] net: ena: fix DMA mapping function issues in XDP

This patch fixes several bugs found when (DMA/LLQ) mapping a packet for
transmission. The mapping procedure makes the transmitted packet
accessible by the device.
When using LLQ, this requires copying the packet's header to push header
(which would be passed to LLQ) and creating DMA mapping for the payload
(if the packet doesn't fit the maximum push length).
When not using LLQ, we map the whole packet with DMA.

The following bugs are fixed in the code:
    1. Add support for non-LLQ machines:
       The ena_xdp_tx_map_frame() function assumed that LLQ is
       supported, and never mapped the whole packet using DMA. On some
       instances, which don't support LLQ, this causes loss of traffic.

    2. Wrong DMA buffer length passed to device:
       When using LLQ, the first 'tx_max_header_size' bytes of the
       packet would be copied to push header. The rest of the packet
       would be copied to a DMA'd buffer.

    3. Freeing the XDP buffer twice in case of a mapping error:
       In case a buffer DMA mapping fails, the function uses
       xdp_return_frame_rx_napi() to free the RX buffer and returns from
       the function with an error. XDP frames that fail to xmit get
       freed by the kernel and so there is no need for this call.

Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action")
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 54 ++++++++++----------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 881f88754bf6b..52571486705ee 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -236,36 +236,48 @@ static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
 static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
 				struct ena_tx_buffer *tx_info,
 				struct xdp_frame *xdpf,
-				void **push_hdr,
-				u32 *push_len)
+				struct ena_com_tx_ctx *ena_tx_ctx)
 {
 	struct ena_adapter *adapter = xdp_ring->adapter;
 	struct ena_com_buf *ena_buf;
-	dma_addr_t dma = 0;
+	int push_len = 0;
+	dma_addr_t dma;
+	void *data;
 	u32 size;
 
 	tx_info->xdpf = xdpf;
+	data = tx_info->xdpf->data;
 	size = tx_info->xdpf->len;
-	ena_buf = tx_info->bufs;
 
-	/* llq push buffer */
-	*push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
-	*push_hdr = tx_info->xdpf->data;
+	if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		/* Designate part of the packet for LLQ */
+		push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
+
+		ena_tx_ctx->push_header = data;
+
+		size -= push_len;
+		data += push_len;
+	}
+
+	ena_tx_ctx->header_len = push_len;
 
-	if (size - *push_len > 0) {
+	if (size > 0) {
 		dma = dma_map_single(xdp_ring->dev,
-				     *push_hdr + *push_len,
-				     size - *push_len,
+				     data,
+				     size,
 				     DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
 			goto error_report_dma_error;
 
-		tx_info->map_linear_data = 1;
-		tx_info->num_of_bufs = 1;
-	}
+		tx_info->map_linear_data = 0;
 
-	ena_buf->paddr = dma;
-	ena_buf->len = size;
+		ena_buf = tx_info->bufs;
+		ena_buf->paddr = dma;
+		ena_buf->len = size;
+
+		ena_tx_ctx->ena_bufs = ena_buf;
+		ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
+	}
 
 	return 0;
 
@@ -274,10 +286,6 @@ error_report_dma_error:
 			  &xdp_ring->syncp);
 	netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
 
-	xdp_return_frame_rx_napi(tx_info->xdpf);
-	tx_info->xdpf = NULL;
-	tx_info->num_of_bufs = 0;
-
 	return -EINVAL;
 }
 
@@ -289,8 +297,6 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
 	struct ena_com_tx_ctx ena_tx_ctx = {};
 	struct ena_tx_buffer *tx_info;
 	u16 next_to_use, req_id;
-	void *push_hdr;
-	u32 push_len;
 	int rc;
 
 	next_to_use = xdp_ring->next_to_use;
@@ -298,15 +304,11 @@ static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
 	tx_info = &xdp_ring->tx_buffer_info[req_id];
 	tx_info->num_of_bufs = 0;
 
-	rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len);
+	rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
 	if (unlikely(rc))
 		return rc;
 
-	ena_tx_ctx.ena_bufs = tx_info->bufs;
-	ena_tx_ctx.push_header = push_hdr;
-	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
 	ena_tx_ctx.req_id = req_id;
-	ena_tx_ctx.header_len = push_len;
 
 	rc = ena_xmit_common(dev,
 			     xdp_ring,
-- 
GitLab


From 8929ef8d4dfd53a05913e22561784ece5f6419c7 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Date: Thu, 8 Apr 2021 21:24:36 +0100
Subject: [PATCH 2507/3804] media: dt-bindings: media: renesas,drif: Fix fck
 definition

dt_binding_check reports the below error with the latest schema:

Documentation/devicetree/bindings/media/renesas,drif.yaml:
  properties:clock-names:maxItems: False schema does not allow 1
Documentation/devicetree/bindings/media/renesas,drif.yaml:
  ignoring, error in schema: properties: clock-names: maxItems

This patch fixes the problem.

Signed-off-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210408202436.3706-1-fabrizio.castro.jz@renesas.com
---
 Documentation/devicetree/bindings/media/renesas,drif.yaml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/media/renesas,drif.yaml b/Documentation/devicetree/bindings/media/renesas,drif.yaml
index ce505a7c006aa..9cd56ff2c316c 100644
--- a/Documentation/devicetree/bindings/media/renesas,drif.yaml
+++ b/Documentation/devicetree/bindings/media/renesas,drif.yaml
@@ -67,9 +67,7 @@ properties:
     maxItems: 1
 
   clock-names:
-    maxItems: 1
-    items:
-      - const: fck
+    const: fck
 
   resets:
     maxItems: 1
-- 
GitLab


From f2386cf7c5f4ff5d7b584f5d92014edd7df6c676 Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Tue, 8 Jun 2021 23:21:07 +0200
Subject: [PATCH 2508/3804] net: lantiq: disable interrupt before sheduling
 NAPI

This patch fixes TX hangs with threaded NAPI enabled. The scheduled
NAPI seems to be executed in parallel with the interrupt on second
thread. Sometimes it happens that ltq_dma_disable_irq() is executed
after xrx200_tx_housekeeping(). The symptom is that TX interrupts
are disabled in the DMA controller. As a result, the TX hangs after
a few seconds of the iperf test. Scheduling NAPI after disabling
interrupts fixes this issue.

Tested on Lantiq xRX200 (BT Home Hub 5A).

Fixes: 9423361da523 ("net: lantiq: Disable IRQs only if NAPI gets scheduled ")
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_xrx200.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 36dc3e5f62189..0e10d8aeffe18 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -352,8 +352,8 @@ static irqreturn_t xrx200_dma_irq(int irq, void *ptr)
 	struct xrx200_chan *ch = ptr;
 
 	if (napi_schedule_prep(&ch->napi)) {
-		__napi_schedule(&ch->napi);
 		ltq_dma_disable_irq(&ch->dma);
+		__napi_schedule(&ch->napi);
 	}
 
 	ltq_dma_ack_irq(&ch->dma);
-- 
GitLab


From 7d2201d46218df951004fc48897f89c6eb510b69 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 7 Jun 2021 15:49:34 -0500
Subject: [PATCH 2509/3804] ima: Fix fall-through warning for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a
fall-through warning by explicitly adding a break statement instead
of just letting the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_template_lib.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 8e2a121af5e11..ca017cae73eb3 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -119,6 +119,7 @@ static void ima_show_template_data_ascii(struct seq_file *m,
 		default:
 			break;
 		}
+		break;
 	default:
 		break;
 	}
-- 
GitLab


From 4422829e8053068e0225e4d0ef42dc41ea7c9ef5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 9 Jun 2021 01:49:13 -0400
Subject: [PATCH 2510/3804] kvm: fix previous commit for 32-bit builds

array_index_nospec does not work for uint64_t on 32-bit builds.
However, the size of a memory slot must be less than 20 bits wide
on those system, since the memory slot must fit in the user
address space.  So just store it in an unsigned long.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 74995f0a2a3cb..8583ed3ff3447 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1191,8 +1191,8 @@ __gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
 	 * table walks, do not let the processor speculate loads outside
 	 * the guest's registered memslots.
 	 */
-	unsigned long offset = array_index_nospec(gfn - slot->base_gfn,
-						  slot->npages);
+	unsigned long offset = gfn - slot->base_gfn;
+	offset = array_index_nospec(offset, slot->npages);
 	return slot->userspace_addr + offset * PAGE_SIZE;
 }
 
-- 
GitLab


From 484cea4f362e1eeb5c869abbfb5f90eae6421b38 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 8 Jun 2021 16:36:18 +0200
Subject: [PATCH 2511/3804] x86/fpu: Prevent state corruption in
 __fpu__restore_sig()

The non-compacted slowpath uses __copy_from_user() and copies the entire
user buffer into the kernel buffer, verbatim.  This means that the kernel
buffer may now contain entirely invalid state on which XRSTOR will #GP.
validate_user_xstate_header() can detect some of that corruption, but that
leaves the onus on callers to clear the buffer.

Prior to XSAVES support, it was possible just to reinitialize the buffer,
completely, but with supervisor states that is not longer possible as the
buffer clearing code split got it backwards. Fixing that is possible but
not corrupting the state in the first place is more robust.

Avoid corruption of the kernel XSAVE buffer by using copy_user_to_xstate()
which validates the XSAVE header contents before copying the actual states
to the kernel. copy_user_to_xstate() was previously only called for
compacted-format kernel buffers, but it works for both compacted and
non-compacted forms.

Using it for the non-compacted form is slower because of multiple
__copy_from_user() operations, but that cost is less important than robust
code in an already slow path.

[ Changelog polished by Dave Hansen ]

Fixes: b860eb8dce59 ("x86/fpu/xstate: Define new functions for clearing fpregs and xstates")
Reported-by: syzbot+2067e764dbcd10721e2e@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Rik van Riel <riel@surriel.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20210608144345.611833074@linutronix.de
---
 arch/x86/kernel/fpu/signal.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a4ec65317a7fa..d5bc96a536c26 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -405,14 +405,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	if (use_xsave() && !fx_only) {
 		u64 init_bv = xfeatures_mask_user() & ~user_xfeatures;
 
-		if (using_compacted_format()) {
-			ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
-		} else {
-			ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
-
-			if (!ret && state_size > offsetof(struct xregs_state, header))
-				ret = validate_user_xstate_header(&fpu->state.xsave.header);
-		}
+		ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
 		if (ret)
 			goto err_out;
 
-- 
GitLab


From d8778e393afa421f1f117471144f8ce6deb6953a Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Tue, 8 Jun 2021 16:36:19 +0200
Subject: [PATCH 2512/3804] x86/fpu: Invalidate FPU state after a failed XRSTOR
 from a user buffer

Both Intel and AMD consider it to be architecturally valid for XRSTOR to
fail with #PF but nonetheless change the register state.  The actual
conditions under which this might occur are unclear [1], but it seems
plausible that this might be triggered if one sibling thread unmaps a page
and invalidates the shared TLB while another sibling thread is executing
XRSTOR on the page in question.

__fpu__restore_sig() can execute XRSTOR while the hardware registers
are preserved on behalf of a different victim task (using the
fpu_fpregs_owner_ctx mechanism), and, in theory, XRSTOR could fail but
modify the registers.

If this happens, then there is a window in which __fpu__restore_sig()
could schedule out and the victim task could schedule back in without
reloading its own FPU registers. This would result in part of the FPU
state that __fpu__restore_sig() was attempting to load leaking into the
victim task's user-visible state.

Invalidate preserved FPU registers on XRSTOR failure to prevent this
situation from corrupting any state.

[1] Frequent readers of the errata lists might imagine "complex
    microarchitectural conditions".

Fixes: 1d731e731c4c ("x86/fpu: Add a fastpath to __fpu__restore_sig()")
Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Rik van Riel <riel@surriel.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20210608144345.758116583@linutronix.de
---
 arch/x86/kernel/fpu/signal.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index d5bc96a536c26..4ab9aeb9a9630 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -369,6 +369,25 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			fpregs_unlock();
 			return 0;
 		}
+
+		/*
+		 * The above did an FPU restore operation, restricted to
+		 * the user portion of the registers, and failed, but the
+		 * microcode might have modified the FPU registers
+		 * nevertheless.
+		 *
+		 * If the FPU registers do not belong to current, then
+		 * invalidate the FPU register state otherwise the task might
+		 * preempt current and return to user space with corrupted
+		 * FPU registers.
+		 *
+		 * In case current owns the FPU registers then no further
+		 * action is required. The fixup below will handle it
+		 * correctly.
+		 */
+		if (test_thread_flag(TIF_NEED_FPU_LOAD))
+			__cpu_invalidate_fpregs_state();
+
 		fpregs_unlock();
 	} else {
 		/*
-- 
GitLab


From 40d9e03f414d8b837926a4460788682e59a8c654 Mon Sep 17 00:00:00 2001
From: Rui Miguel Silva <rui.silva@linaro.org>
Date: Mon, 7 Jun 2021 18:00:54 +0100
Subject: [PATCH 2513/3804] MAINTAINERS: usb: add entry for isp1760

Giving support for isp1763 made a little revival to this driver, add
entry in the MAINTAINERS file with me as maintainer.

Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Rui Miguel Silva <rui.silva@linaro.org>
Link: https://lore.kernel.org/r/20210607170054.220975-1-rui.silva@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 503fd21901f10..28ebc6423cf14 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18869,6 +18869,13 @@ S:	Maintained
 F:	drivers/usb/host/isp116x*
 F:	include/linux/usb/isp116x.h
 
+USB ISP1760 DRIVER
+M:	Rui Miguel Silva <rui.silva@linaro.org>
+L:	linux-usb@vger.kernel.org
+S:	Maintained
+F:	drivers/usb/isp1760/*
+F:	Documentation/devicetree/bindings/usb/nxp,isp1760.yaml
+
 USB LAN78XX ETHERNET DRIVER
 M:	Woojung Huh <woojung.huh@microchip.com>
 M:	UNGLinuxDriver@microchip.com
-- 
GitLab


From 3370139745853f7826895293e8ac3aec1430508e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Mon, 7 Jun 2021 17:53:44 -0700
Subject: [PATCH 2514/3804] USB: f_ncm: ncm_bitrate (speed) is unsigned
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[  190.544755] configfs-gadget gadget: notify speed -44967296

This is because 4250000000 - 2**32 is -44967296.

Fixes: 9f6ce4240a2b ("usb: gadget: f_ncm.c added")
Cc: Brooke Basile <brookebasile@gmail.com>
Cc: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Cc: Linux USB Mailing List <linux-usb@vger.kernel.org>
Acked-By: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210608005344.3762668-1-zenczykowski@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_ncm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 019bea8e09cce..0d23c6c11a137 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -583,7 +583,7 @@ static void ncm_do_notify(struct f_ncm *ncm)
 		data[0] = cpu_to_le32(ncm_bitrate(cdev->gadget));
 		data[1] = data[0];
 
-		DBG(cdev, "notify speed %d\n", ncm_bitrate(cdev->gadget));
+		DBG(cdev, "notify speed %u\n", ncm_bitrate(cdev->gadget));
 		ncm->notify_state = NCM_NOTIFY_CONNECT;
 		break;
 	}
-- 
GitLab


From 1958ff5ad2d4908b44a72bcf564dfe67c981e7fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 8 Jun 2021 01:54:38 -0700
Subject: [PATCH 2515/3804] usb: f_ncm: only first packet of aggregate needs to
 start timer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The reasoning for this change is that if we already had
a packet pending, then we also already had a pending timer,
and as such there is no need to reschedule it.

This also prevents packets getting delayed 60 ms worst case
under a tiny packet every 290us transmit load, by keeping the
timeout always relative to the first queued up packet.
(300us delay * 16KB max aggregation / 80 byte packet =~ 60 ms)

As such the first packet is now at most delayed by 300us.

Under low transmit load, this will simply result in us sending
a shorter aggregate, as originally intended.

This patch has the benefit of greatly reducing (by ~10 factor
with 1500 byte frames aggregated into 16 kiB) the number of
(potentially pretty costly) updates to the hrtimer.

Cc: Brooke Basile <brookebasile@gmail.com>
Cc: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Link: https://lore.kernel.org/r/20210608085438.813960-1-zenczykowski@gmail.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_ncm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 0d23c6c11a137..855127249f242 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1101,11 +1101,11 @@ static struct sk_buff *ncm_wrap_ntb(struct gether *port,
 			ncm->ndp_dgram_count = 1;
 
 			/* Note: we skip opts->next_ndp_index */
-		}
 
-		/* Delay the timer. */
-		hrtimer_start(&ncm->task_timer, TX_TIMEOUT_NSECS,
-			      HRTIMER_MODE_REL_SOFT);
+			/* Start the timer. */
+			hrtimer_start(&ncm->task_timer, TX_TIMEOUT_NSECS,
+				      HRTIMER_MODE_REL_SOFT);
+		}
 
 		/* Add the datagram position entries */
 		ntb_ndp = skb_put_zero(ncm->skb_tx_ndp, dgram_idx_len);
-- 
GitLab


From d1658268e43980c071dbffc3d894f6f6c4b6732a Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Thu, 27 May 2021 10:45:34 -0500
Subject: [PATCH 2516/3804] usb: pci-quirks: disable D3cold on xhci suspend for
 s2idle on AMD Renoir

The XHCI controller is required to enter D3hot rather than D3cold for AMD
s2idle on this hardware generation.

Otherwise, the 'Controller Not Ready' (CNR) bit is not being cleared by
host in resume and eventually this results in xhci resume failures during
the s2idle wakeup.

Link: https://lore.kernel.org/linux-usb/1612527609-7053-1-git-send-email-Prike.Liang@amd.com/
Suggested-by: Prike Liang <Prike.Liang@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Cc: stable <stable@vger.kernel.org> # 5.11+
Link: https://lore.kernel.org/r/20210527154534.8900-1-mario.limonciello@amd.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 7 ++++++-
 drivers/usb/host/xhci.h     | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 7bc18cf8042cc..18c2bbddf080b 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -59,6 +59,7 @@
 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI		0x1138
 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI		0x461e
 
+#define PCI_DEVICE_ID_AMD_RENOIR_XHCI			0x1639
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4			0x43b9
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3			0x43ba
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_2			0x43bb
@@ -182,6 +183,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		(pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_1)))
 		xhci->quirks |= XHCI_U2_DISABLE_WAKE;
 
+	if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+		pdev->device == PCI_DEVICE_ID_AMD_RENOIR_XHCI)
+		xhci->quirks |= XHCI_BROKEN_D3COLD;
+
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
 		xhci->quirks |= XHCI_LPM_SUPPORT;
 		xhci->quirks |= XHCI_INTEL_HOST;
@@ -539,7 +544,7 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
 	 * Systems with the TI redriver that loses port status change events
 	 * need to have the registers polled during D3, so avoid D3cold.
 	 */
-	if (xhci->quirks & XHCI_COMP_MODE_QUIRK)
+	if (xhci->quirks & (XHCI_COMP_MODE_QUIRK | XHCI_BROKEN_D3COLD))
 		pci_d3cold_disable(pdev);
 
 	if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 2595a8f057c43..e417f5ce13d18 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1892,6 +1892,7 @@ struct xhci_hcd {
 #define XHCI_DISABLE_SPARSE	BIT_ULL(38)
 #define XHCI_SG_TRB_CACHE_SIZE_QUIRK	BIT_ULL(39)
 #define XHCI_NO_SOFT_RETRY	BIT_ULL(40)
+#define XHCI_BROKEN_D3COLD	BIT_ULL(41)
 
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;
-- 
GitLab


From 90c4d05780d47e14a50e11a7f17373104cd47d25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Mon, 7 Jun 2021 21:41:41 -0700
Subject: [PATCH 2517/3804] usb: fix various gadgets null ptr deref on 10gbps
 cabling.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This avoids a null pointer dereference in
f_{ecm,eem,hid,loopback,printer,rndis,serial,sourcesink,subset,tcm}
by simply reusing the 5gbps config for 10gbps.

Fixes: eaef50c76057 ("usb: gadget: Update usb_assign_descriptors for SuperSpeedPlus")
Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Michael R Sweet <msweet@msweet.org>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Pawel Laszczak <pawell@cadence.com>
Cc: Peter Chen <peter.chen@nxp.com>
Cc: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Cc: Wei Ming Chen <jj251510319013@gmail.com>
Cc: Will McVicker <willmcvicker@google.com>
Cc: Zqiang <qiang.zhang@windriver.com>
Reviewed-By: Lorenzo Colitti <lorenzo@google.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Link: https://lore.kernel.org/r/20210608044141.3898496-1-zenczykowski@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_ecm.c        | 2 +-
 drivers/usb/gadget/function/f_eem.c        | 2 +-
 drivers/usb/gadget/function/f_hid.c        | 3 ++-
 drivers/usb/gadget/function/f_loopback.c   | 2 +-
 drivers/usb/gadget/function/f_printer.c    | 3 ++-
 drivers/usb/gadget/function/f_rndis.c      | 2 +-
 drivers/usb/gadget/function/f_serial.c     | 2 +-
 drivers/usb/gadget/function/f_sourcesink.c | 3 ++-
 drivers/usb/gadget/function/f_subset.c     | 2 +-
 drivers/usb/gadget/function/f_tcm.c        | 3 ++-
 10 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c
index 7f5cf488b2b1e..ffe2486fce71c 100644
--- a/drivers/usb/gadget/function/f_ecm.c
+++ b/drivers/usb/gadget/function/f_ecm.c
@@ -791,7 +791,7 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f)
 		fs_ecm_notify_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, ecm_fs_function, ecm_hs_function,
-			ecm_ss_function, NULL);
+			ecm_ss_function, ecm_ss_function);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c
index cfcc4e81fb776..e6cb38439c411 100644
--- a/drivers/usb/gadget/function/f_eem.c
+++ b/drivers/usb/gadget/function/f_eem.c
@@ -302,7 +302,7 @@ static int eem_bind(struct usb_configuration *c, struct usb_function *f)
 	eem_ss_out_desc.bEndpointAddress = eem_fs_out_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, eem_fs_function, eem_hs_function,
-			eem_ss_function, NULL);
+			eem_ss_function, eem_ss_function);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 1125f4715830d..e556993081170 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -802,7 +802,8 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
 		hidg_fs_out_ep_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, hidg_fs_descriptors,
-			hidg_hs_descriptors, hidg_ss_descriptors, NULL);
+			hidg_hs_descriptors, hidg_ss_descriptors,
+			hidg_ss_descriptors);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_loopback.c b/drivers/usb/gadget/function/f_loopback.c
index b56ad7c3838b8..ae41f556eb752 100644
--- a/drivers/usb/gadget/function/f_loopback.c
+++ b/drivers/usb/gadget/function/f_loopback.c
@@ -207,7 +207,7 @@ autoconf_fail:
 	ss_loop_sink_desc.bEndpointAddress = fs_loop_sink_desc.bEndpointAddress;
 
 	ret = usb_assign_descriptors(f, fs_loopback_descs, hs_loopback_descs,
-			ss_loopback_descs, NULL);
+			ss_loopback_descs, ss_loopback_descs);
 	if (ret)
 		return ret;
 
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index f47fdc1fa7f13..59d382fe1bbfc 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -1101,7 +1101,8 @@ autoconf_fail:
 	ss_ep_out_desc.bEndpointAddress = fs_ep_out_desc.bEndpointAddress;
 
 	ret = usb_assign_descriptors(f, fs_printer_function,
-			hs_printer_function, ss_printer_function, NULL);
+			hs_printer_function, ss_printer_function,
+			ss_printer_function);
 	if (ret)
 		return ret;
 
diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c
index 0739b05a0ef7b..ee95e8f5f9d48 100644
--- a/drivers/usb/gadget/function/f_rndis.c
+++ b/drivers/usb/gadget/function/f_rndis.c
@@ -789,7 +789,7 @@ rndis_bind(struct usb_configuration *c, struct usb_function *f)
 	ss_notify_desc.bEndpointAddress = fs_notify_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, eth_fs_function, eth_hs_function,
-			eth_ss_function, NULL);
+			eth_ss_function, eth_ss_function);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_serial.c b/drivers/usb/gadget/function/f_serial.c
index e627138463504..1ed8ff0ac2d31 100644
--- a/drivers/usb/gadget/function/f_serial.c
+++ b/drivers/usb/gadget/function/f_serial.c
@@ -233,7 +233,7 @@ static int gser_bind(struct usb_configuration *c, struct usb_function *f)
 	gser_ss_out_desc.bEndpointAddress = gser_fs_out_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, gser_fs_function, gser_hs_function,
-			gser_ss_function, NULL);
+			gser_ss_function, gser_ss_function);
 	if (status)
 		goto fail;
 	dev_dbg(&cdev->gadget->dev, "generic ttyGS%d: %s speed IN/%s OUT/%s\n",
diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c
index 5a201ba7b155b..1abf08e5164af 100644
--- a/drivers/usb/gadget/function/f_sourcesink.c
+++ b/drivers/usb/gadget/function/f_sourcesink.c
@@ -431,7 +431,8 @@ no_iso:
 	ss_iso_sink_desc.bEndpointAddress = fs_iso_sink_desc.bEndpointAddress;
 
 	ret = usb_assign_descriptors(f, fs_source_sink_descs,
-			hs_source_sink_descs, ss_source_sink_descs, NULL);
+			hs_source_sink_descs, ss_source_sink_descs,
+			ss_source_sink_descs);
 	if (ret)
 		return ret;
 
diff --git a/drivers/usb/gadget/function/f_subset.c b/drivers/usb/gadget/function/f_subset.c
index 4d945254905d9..51c1cae162d9b 100644
--- a/drivers/usb/gadget/function/f_subset.c
+++ b/drivers/usb/gadget/function/f_subset.c
@@ -358,7 +358,7 @@ geth_bind(struct usb_configuration *c, struct usb_function *f)
 		fs_subset_out_desc.bEndpointAddress;
 
 	status = usb_assign_descriptors(f, fs_eth_function, hs_eth_function,
-			ss_eth_function, NULL);
+			ss_eth_function, ss_eth_function);
 	if (status)
 		goto fail;
 
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index 7acb507946e67..de161ee0b1f9b 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -2057,7 +2057,8 @@ static int tcm_bind(struct usb_configuration *c, struct usb_function *f)
 	uasp_fs_cmd_desc.bEndpointAddress = uasp_ss_cmd_desc.bEndpointAddress;
 
 	ret = usb_assign_descriptors(f, uasp_fs_function_desc,
-			uasp_hs_function_desc, uasp_ss_function_desc, NULL);
+			uasp_hs_function_desc, uasp_ss_function_desc,
+			uasp_ss_function_desc);
 	if (ret)
 		goto ep_fail;
 
-- 
GitLab


From 12f7764ac61200e32c916f038bdc08f884b0b604 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 8 Jun 2021 16:36:20 +0200
Subject: [PATCH 2518/3804] x86/process: Check PF_KTHREAD and not current->mm
 for kernel threads

switch_fpu_finish() checks current->mm as indicator for kernel threads.
That's wrong because kernel threads can temporarily use a mm of a user
process via kthread_use_mm().

Check the task flags for PF_KTHREAD instead.

Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Rik van Riel <riel@surriel.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20210608144345.912645927@linutronix.de
---
 arch/x86/include/asm/fpu/internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index ceeba9f631722..18382ac1ecc44 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -578,7 +578,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
 	 * PKRU state is switched eagerly because it needs to be valid before we
 	 * return to userland e.g. for a copy_to_user() operation.
 	 */
-	if (current->mm) {
+	if (!(current->flags & PF_KTHREAD)) {
 		pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
 		if (pk)
 			pkru_val = pk->pkru;
-- 
GitLab


From 032e288097a553db5653af552dd8035cd2a0ba96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 8 Jun 2021 19:44:59 -0700
Subject: [PATCH 2519/3804] usb: fix various gadget panics on 10gbps cabling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

usb_assign_descriptors() is called with 5 parameters,
the last 4 of which are the usb_descriptor_header for:
  full-speed (USB1.1 - 12Mbps [including USB1.0 low-speed @ 1.5Mbps),
  high-speed (USB2.0 - 480Mbps),
  super-speed (USB3.0 - 5Gbps),
  super-speed-plus (USB3.1 - 10Gbps).

The differences between full/high/super-speed descriptors are usually
substantial (due to changes in the maximum usb block size from 64 to 512
to 1024 bytes and other differences in the specs), while the difference
between 5 and 10Gbps descriptors may be as little as nothing
(in many cases the same tuning is simply good enough).

However if a gadget driver calls usb_assign_descriptors() with
a NULL descriptor for super-speed-plus and is then used on a max 10gbps
configuration, the kernel will crash with a null pointer dereference,
when a 10gbps capable device port + cable + host port combination shows up.
(This wouldn't happen if the gadget max-speed was set to 5gbps, but
it of course defaults to the maximum, and there's no real reason to
artificially limit it)

The fix is to simply use the 5gbps descriptor as the 10gbps descriptor,
if a 10gbps descriptor wasn't provided.

Obviously this won't fix the problem if the 5gbps descriptor is also
NULL, but such cases can't be so trivially solved (and any such gadgets
are unlikely to be used with USB3 ports any way).

Cc: Felipe Balbi <balbi@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210609024459.1126080-1-zenczykowski@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/config.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/usb/gadget/config.c b/drivers/usb/gadget/config.c
index 8bb25773b61e9..05507606b2b42 100644
--- a/drivers/usb/gadget/config.c
+++ b/drivers/usb/gadget/config.c
@@ -164,6 +164,14 @@ int usb_assign_descriptors(struct usb_function *f,
 {
 	struct usb_gadget *g = f->config->cdev->gadget;
 
+	/* super-speed-plus descriptor falls back to super-speed one,
+	 * if such a descriptor was provided, thus avoiding a NULL
+	 * pointer dereference if a 5gbps capable gadget is used with
+	 * a 10gbps capable config (device port + cable + host port)
+	 */
+	if (!ssp)
+		ssp = ss;
+
 	if (fs) {
 		f->fs_descriptors = usb_copy_descriptors(fs);
 		if (!f->fs_descriptors)
-- 
GitLab


From 5ab14ab1f2db24ffae6c5c39a689660486962e6e Mon Sep 17 00:00:00 2001
From: Kyle Tso <kyletso@google.com>
Date: Sun, 6 Jun 2021 16:14:52 +0800
Subject: [PATCH 2520/3804] usb: typec: tcpm: Do not finish VDM AMS for
 retrying Responses

If the VDM responses couldn't be sent successfully, it doesn't need to
finish the AMS until the retry count reaches the limit.

Fixes: 0908c5aca31e ("usb: typec: tcpm: AMS and Collision Avoidance")
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Cc: stable <stable@vger.kernel.org>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kyle Tso <kyletso@google.com>
Link: https://lore.kernel.org/r/20210606081452.764032-1-kyletso@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index a7c336f56849c..63470cf7f4cd9 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -1942,6 +1942,9 @@ static void vdm_run_state_machine(struct tcpm_port *port)
 			tcpm_log(port, "VDM Tx error, retry");
 			port->vdm_retries++;
 			port->vdm_state = VDM_STATE_READY;
+			if (PD_VDO_SVDM(vdo_hdr) && PD_VDO_CMDT(vdo_hdr) == CMDT_INIT)
+				tcpm_ams_finish(port);
+		} else {
 			tcpm_ams_finish(port);
 		}
 		break;
-- 
GitLab


From 1a85b350a7741776a406005b943e3dec02c424ed Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Mon, 7 Jun 2021 23:50:05 +0300
Subject: [PATCH 2521/3804] usb: typec: intel_pmc_mux: Put fwnode in error case
 during ->probe()

device_get_next_child_node() bumps a reference counting of a returned variable.
We have to balance it whenever we return to the caller.

Fixes: 6701adfa9693 ("usb: typec: driver for Intel PMC mux control")
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210607205007.71458-1-andy.shevchenko@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/mux/intel_pmc_mux.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
index 46a25b8db72e5..96d8c5a046803 100644
--- a/drivers/usb/typec/mux/intel_pmc_mux.c
+++ b/drivers/usb/typec/mux/intel_pmc_mux.c
@@ -636,8 +636,10 @@ static int pmc_usb_probe(struct platform_device *pdev)
 			break;
 
 		ret = pmc_usb_register_port(pmc, i, fwnode);
-		if (ret)
+		if (ret) {
+			fwnode_handle_put(fwnode);
 			goto err_remove_ports;
+		}
 	}
 
 	platform_set_drvdata(pdev, pmc);
-- 
GitLab


From 843fabdd7623271330af07f1b7fbd7fabe33c8de Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Mon, 7 Jun 2021 23:50:06 +0300
Subject: [PATCH 2522/3804] usb: typec: intel_pmc_mux: Add missed error check
 for devm_ioremap_resource()

devm_ioremap_resource() can return an error, add missed check for it.

Fixes: 43d596e32276 ("usb: typec: intel_pmc_mux: Check the port status before connect")
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210607205007.71458-2-andy.shevchenko@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/mux/intel_pmc_mux.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
index 96d8c5a046803..31b1b3b555c77 100644
--- a/drivers/usb/typec/mux/intel_pmc_mux.c
+++ b/drivers/usb/typec/mux/intel_pmc_mux.c
@@ -586,6 +586,11 @@ static int pmc_usb_probe_iom(struct pmc_usb *pmc)
 		return -ENOMEM;
 	}
 
+	if (IS_ERR(pmc->iom_base)) {
+		put_device(&adev->dev);
+		return PTR_ERR(pmc->iom_base);
+	}
+
 	pmc->iom_adev = adev;
 
 	return 0;
-- 
GitLab


From 184fa76b87ca36c7e98f152df709bf6f492d8e29 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Mon, 7 Jun 2021 23:50:07 +0300
Subject: [PATCH 2523/3804] usb: typec: intel_pmc_mux: Put ACPI device using
 acpi_dev_put()

For ACPI devices we have a symmetric API to put them, so use it in the driver.

Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210607205007.71458-3-andy.shevchenko@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/mux/intel_pmc_mux.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/typec/mux/intel_pmc_mux.c b/drivers/usb/typec/mux/intel_pmc_mux.c
index 31b1b3b555c77..ffa8aa12d5f11 100644
--- a/drivers/usb/typec/mux/intel_pmc_mux.c
+++ b/drivers/usb/typec/mux/intel_pmc_mux.c
@@ -582,12 +582,12 @@ static int pmc_usb_probe_iom(struct pmc_usb *pmc)
 	acpi_dev_free_resource_list(&resource_list);
 
 	if (!pmc->iom_base) {
-		put_device(&adev->dev);
+		acpi_dev_put(adev);
 		return -ENOMEM;
 	}
 
 	if (IS_ERR(pmc->iom_base)) {
-		put_device(&adev->dev);
+		acpi_dev_put(adev);
 		return PTR_ERR(pmc->iom_base);
 	}
 
@@ -658,7 +658,7 @@ err_remove_ports:
 		usb_role_switch_unregister(pmc->port[i].usb_sw);
 	}
 
-	put_device(&pmc->iom_adev->dev);
+	acpi_dev_put(pmc->iom_adev);
 
 	return ret;
 }
@@ -674,7 +674,7 @@ static int pmc_usb_remove(struct platform_device *pdev)
 		usb_role_switch_unregister(pmc->port[i].usb_sw);
 	}
 
-	put_device(&pmc->iom_adev->dev);
+	acpi_dev_put(pmc->iom_adev);
 
 	return 0;
 }
-- 
GitLab


From 305f670846a31a261462577dd0b967c4fa796871 Mon Sep 17 00:00:00 2001
From: Linyu Yuan <linyyuan@codeaurora.com>
Date: Wed, 9 Jun 2021 07:35:47 +0800
Subject: [PATCH 2524/3804] usb: gadget: eem: fix wrong eem header operation

when skb_clone() or skb_copy_expand() fail,
it should pull skb with lengh indicated by header,
or not it will read network data and check it as header.

Cc: <stable@vger.kernel.org>
Signed-off-by: Linyu Yuan <linyyuan@codeaurora.com>
Link: https://lore.kernel.org/r/20210608233547.3767-1-linyyuan@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_eem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c
index e6cb38439c411..2cd9942707b46 100644
--- a/drivers/usb/gadget/function/f_eem.c
+++ b/drivers/usb/gadget/function/f_eem.c
@@ -495,7 +495,7 @@ static int eem_unwrap(struct gether *port,
 			skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (unlikely(!skb2)) {
 				DBG(cdev, "unable to unframe EEM packet\n");
-				continue;
+				goto next;
 			}
 			skb_trim(skb2, len - ETH_FCS_LEN);
 
@@ -505,7 +505,7 @@ static int eem_unwrap(struct gether *port,
 						GFP_ATOMIC);
 			if (unlikely(!skb3)) {
 				dev_kfree_skb_any(skb2);
-				continue;
+				goto next;
 			}
 			dev_kfree_skb_any(skb2);
 			skb_queue_tail(list, skb3);
-- 
GitLab


From d00889080ab60051627dab1d85831cd9db750e2a Mon Sep 17 00:00:00 2001
From: Marian-Cristian Rotariu <marian.c.rotariu@gmail.com>
Date: Tue, 8 Jun 2021 19:26:50 +0300
Subject: [PATCH 2525/3804] usb: dwc3: ep0: fix NULL pointer exception

There is no validation of the index from dwc3_wIndex_to_dep() and we might
be referring a non-existing ep and trigger a NULL pointer exception. In
certain configurations we might use fewer eps and the index might wrongly
indicate a larger ep index than existing.

By adding this validation from the patch we can actually report a wrong
index back to the caller.

In our usecase we are using a composite device on an older kernel, but
upstream might use this fix also. Unfortunately, I cannot describe the
hardware for others to reproduce the issue as it is a proprietary
implementation.

[   82.958261] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a4
[   82.966891] Mem abort info:
[   82.969663]   ESR = 0x96000006
[   82.972703]   Exception class = DABT (current EL), IL = 32 bits
[   82.978603]   SET = 0, FnV = 0
[   82.981642]   EA = 0, S1PTW = 0
[   82.984765] Data abort info:
[   82.987631]   ISV = 0, ISS = 0x00000006
[   82.991449]   CM = 0, WnR = 0
[   82.994409] user pgtable: 4k pages, 39-bit VAs, pgdp = 00000000c6210ccc
[   83.000999] [00000000000000a4] pgd=0000000053aa5003, pud=0000000053aa5003, pmd=0000000000000000
[   83.009685] Internal error: Oops: 96000006 [#1] PREEMPT SMP
[   83.026433] Process irq/62-dwc3 (pid: 303, stack limit = 0x000000003985154c)
[   83.033470] CPU: 0 PID: 303 Comm: irq/62-dwc3 Not tainted 4.19.124 #1
[   83.044836] pstate: 60000085 (nZCv daIf -PAN -UAO)
[   83.049628] pc : dwc3_ep0_handle_feature+0x414/0x43c
[   83.054558] lr : dwc3_ep0_interrupt+0x3b4/0xc94

...

[   83.141788] Call trace:
[   83.144227]  dwc3_ep0_handle_feature+0x414/0x43c
[   83.148823]  dwc3_ep0_interrupt+0x3b4/0xc94
[   83.181546] ---[ end trace aac6b5267d84c32f ]---

Signed-off-by: Marian-Cristian Rotariu <marian.c.rotariu@gmail.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210608162650.58426-1-marian.c.rotariu@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/ep0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 8b668ef46f7f1..3cd2942643725 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -292,6 +292,9 @@ static struct dwc3_ep *dwc3_wIndex_to_dep(struct dwc3 *dwc, __le16 wIndex_le)
 		epnum |= 1;
 
 	dep = dwc->eps[epnum];
+	if (dep == NULL)
+		return NULL;
+
 	if (dep->flags & DWC3_EP_ENABLED)
 		return dep;
 
-- 
GitLab


From fbf649cd6d64d40c03c5397ecd6b1ae922ba7afc Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 5 Jun 2021 16:09:14 +0800
Subject: [PATCH 2526/3804] usb: misc: brcmstb-usb-pinmap: check return value
 after calling platform_get_resource()

It will cause null-ptr-deref if platform_get_resource() returns NULL,
we need check the return value.

Fixes: 517c4c44b323 ("usb: Add driver to allow any GPIO to be used for 7211 USB signals")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20210605080914.2057758-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/brcmstb-usb-pinmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/misc/brcmstb-usb-pinmap.c b/drivers/usb/misc/brcmstb-usb-pinmap.c
index b3cfe8666ea7d..336653091e3b3 100644
--- a/drivers/usb/misc/brcmstb-usb-pinmap.c
+++ b/drivers/usb/misc/brcmstb-usb-pinmap.c
@@ -263,6 +263,8 @@ static int __init brcmstb_usb_pinmap_probe(struct platform_device *pdev)
 		return -EINVAL;
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r)
+		return -EINVAL;
 
 	pdata = devm_kzalloc(&pdev->dev,
 			     sizeof(*pdata) +
-- 
GitLab


From 43c85d770db80cb135f576f8fde6ff1a08e707a4 Mon Sep 17 00:00:00 2001
From: Wenli Looi <wlooi@ucalgary.ca>
Date: Mon, 7 Jun 2021 23:46:20 -0700
Subject: [PATCH 2527/3804] staging: rtl8723bs: Fix uninitialized variables

The sinfo.pertid and sinfo.generation variables are not initialized and
it causes a crash when we use this as a wireless access point.

[  456.873025] ------------[ cut here ]------------
[  456.878198] kernel BUG at mm/slub.c:3968!
[  456.882680] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM

  [ snip ]

[  457.271004] Backtrace:
[  457.273733] [<c02b7ee4>] (kfree) from [<c0e2a470>] (nl80211_send_station+0x954/0xfc4)
[  457.282481]  r9:eccca0c0 r8:e8edfec0 r7:00000000 r6:00000011 r5:e80a9480 r4:e8edfe00
[  457.291132] [<c0e29b1c>] (nl80211_send_station) from [<c0e2b18c>] (cfg80211_new_sta+0x90/0x1cc)
[  457.300850]  r10:e80a9480 r9:e8edfe00 r8:ea678cca r7:00000a20 r6:00000000 r5:ec46d000
[  457.309586]  r4:ec46d9e0
[  457.312433] [<c0e2b0fc>] (cfg80211_new_sta) from [<bf086684>] (rtw_cfg80211_indicate_sta_assoc+0x80/0x9c [r8723bs])
[  457.324095]  r10:00009930 r9:e85b9d80 r8:bf091050 r7:00000000 r6:00000000 r5:0000001c
[  457.332831]  r4:c1606788
[  457.335692] [<bf086604>] (rtw_cfg80211_indicate_sta_assoc [r8723bs]) from [<bf03df38>] (rtw_stassoc_event_callback+0x1c8/0x1d4 [r8723bs])
[  457.349489]  r7:ea678cc0 r6:000000a1 r5:f1225f84 r4:f086b000
[  457.355845] [<bf03dd70>] (rtw_stassoc_event_callback [r8723bs]) from [<bf048e4c>] (mlme_evt_hdl+0x8c/0xb4 [r8723bs])
[  457.367601]  r7:c1604900 r6:f086c4b8 r5:00000000 r4:f086c000
[  457.373959] [<bf048dc0>] (mlme_evt_hdl [r8723bs]) from [<bf03693c>] (rtw_cmd_thread+0x198/0x3d8 [r8723bs])
[  457.384744]  r5:f086e000 r4:f086c000
[  457.388754] [<bf0367a4>] (rtw_cmd_thread [r8723bs]) from [<c014a214>] (kthread+0x170/0x174)
[  457.398083]  r10:ed7a57e8 r9:bf0367a4 r8:f086b000 r7:e8ede000 r6:00000000 r5:e9975200
[  457.406828]  r4:e8369900
[  457.409653] [<c014a0a4>] (kthread) from [<c01010e8>] (ret_from_fork+0x14/0x2c)
[  457.417718] Exception stack(0xe8edffb0 to 0xe8edfff8)
[  457.423356] ffa0:                                     00000000 00000000 00000000 00000000
[  457.432492] ffc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[  457.441618] ffe0: 00000000 00000000 00000000 00000000 00000013 00000000
[  457.449006]  r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:c014a0a4
[  457.457750]  r4:e9975200
[  457.460574] Code: 1a000003 e5953004 e3130001 1a000000 (e7f001f2)
[  457.467381] ---[ end trace 4acbc8c15e9e6aa7 ]---

Link: https://forum.armbian.com/topic/14727-wifi-ap-kernel-bug-in-kernel-5444/
Fixes: 8689c051a201 ("cfg80211: dynamically allocate per-tid stats for station info")
Fixes: f5ea9120be2e ("nl80211: add generation number to all dumps")
Signed-off-by: Wenli Looi <wlooi@ucalgary.ca>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20210608064620.74059-1-wlooi@ucalgary.ca
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index a6d731e959a28..ffa1cf4f9a826 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -2091,7 +2091,7 @@ void rtw_cfg80211_indicate_sta_assoc(struct adapter *padapter, u8 *pmgmt_frame,
 	struct net_device *ndev = padapter->pnetdev;
 
 	{
-		struct station_info sinfo;
+		struct station_info sinfo = {};
 		u8 ie_offset;
 		if (GetFrameSubType(pmgmt_frame) == WIFI_ASSOCREQ)
 			ie_offset = _ASOCREQ_IE_OFFSET_;
-- 
GitLab


From e9de1ecadeab5fbffd873b9110e969c869554a56 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Mon, 7 Jun 2021 12:01:19 +0200
Subject: [PATCH 2528/3804] staging: ralink-gdma: Remove incorrect author
 information

Lars did not write the ralink-gdma driver. Looks like his name just got
copy&pasted from another similar DMA driver. Remove his name from the
copyright and MODULE_AUTHOR.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Link: https://lore.kernel.org/r/20210607100119.26983-1-lars@metafoo.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/ralink-gdma/ralink-gdma.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/ralink-gdma/ralink-gdma.c b/drivers/staging/ralink-gdma/ralink-gdma.c
index 33e28ccf4d855..b5229bc6eae5b 100644
--- a/drivers/staging/ralink-gdma/ralink-gdma.c
+++ b/drivers/staging/ralink-gdma/ralink-gdma.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- *  Copyright (C) 2013, Lars-Peter Clausen <lars@metafoo.de>
  *  GDMA4740 DMAC support
  */
 
@@ -914,6 +913,5 @@ static struct platform_driver gdma_dma_driver = {
 };
 module_platform_driver(gdma_dma_driver);
 
-MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
 MODULE_DESCRIPTION("Ralink/MTK DMA driver");
 MODULE_LICENSE("GPL v2");
-- 
GitLab


From 510b80a6a0f1a0d114c6e33bcea64747d127973c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 8 Jun 2021 16:36:21 +0200
Subject: [PATCH 2529/3804] x86/pkru: Write hardware init value to PKRU when
 xstate is init

When user space brings PKRU into init state, then the kernel handling is
broken:

  T1 user space
     xsave(state)
     state.header.xfeatures &= ~XFEATURE_MASK_PKRU;
     xrstor(state)

  T1 -> kernel
     schedule()
       XSAVE(S) -> T1->xsave.header.xfeatures[PKRU] == 0
       T1->flags |= TIF_NEED_FPU_LOAD;

       wrpkru();

     schedule()
       ...
       pk = get_xsave_addr(&T1->fpu->state.xsave, XFEATURE_PKRU);
       if (pk)
	 wrpkru(pk->pkru);
       else
	 wrpkru(DEFAULT_PKRU);

Because the xfeatures bit is 0 and therefore the value in the xsave
storage is not valid, get_xsave_addr() returns NULL and switch_to()
writes the default PKRU. -> FAIL #1!

So that wrecks any copy_to/from_user() on the way back to user space
which hits memory which is protected by the default PKRU value.

Assumed that this does not fail (pure luck) then T1 goes back to user
space and because TIF_NEED_FPU_LOAD is set it ends up in

  switch_fpu_return()
      __fpregs_load_activate()
        if (!fpregs_state_valid()) {
  	 load_XSTATE_from_task();
        }

But if nothing touched the FPU between T1 scheduling out and back in,
then the fpregs_state is still valid which means switch_fpu_return()
does nothing and just clears TIF_NEED_FPU_LOAD. Back to user space with
DEFAULT_PKRU loaded. -> FAIL #2!

The fix is simple: if get_xsave_addr() returns NULL then set the
PKRU value to 0 instead of the restrictive default PKRU value in
init_pkru_value.

 [ bp: Massage in minor nitpicks from folks. ]

Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Rik van Riel <riel@surriel.com>
Tested-by: Babu Moger <babu.moger@amd.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20210608144346.045616965@linutronix.de
---
 arch/x86/include/asm/fpu/internal.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 18382ac1ecc44..fdee23ea4e173 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -579,9 +579,16 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
 	 * return to userland e.g. for a copy_to_user() operation.
 	 */
 	if (!(current->flags & PF_KTHREAD)) {
+		/*
+		 * If the PKRU bit in xsave.header.xfeatures is not set,
+		 * then the PKRU component was in init state, which means
+		 * XRSTOR will set PKRU to 0. If the bit is not set then
+		 * get_xsave_addr() will return NULL because the PKRU value
+		 * in memory is not valid. This means pkru_val has to be
+		 * set to 0 and not to init_pkru_value.
+		 */
 		pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
-		if (pk)
-			pkru_val = pk->pkru;
+		pkru_val = pk ? pk->pkru : 0;
 	}
 	__write_pkru(pkru_val);
 }
-- 
GitLab


From 4ccf359849ce709f4bf0214b4b5b8b6891d38770 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 9 Jun 2021 09:19:18 +0200
Subject: [PATCH 2530/3804] spi: remove spi_set_cs_timing()

No one seems to be using this global and exported function, so remove it
as it is no longer needed.

Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20210609071918.2852069-1-gregkh@linuxfoundation.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 73 -----------------------------------------
 include/linux/spi/spi.h |  5 ---
 2 files changed, 78 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ac892cc831719..a0a232669dc1a 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3472,79 +3472,6 @@ int spi_setup(struct spi_device *spi)
 }
 EXPORT_SYMBOL_GPL(spi_setup);
 
-/**
- * spi_set_cs_timing - configure CS setup, hold, and inactive delays
- * @spi: the device that requires specific CS timing configuration
- * @setup: CS setup time specified via @spi_delay
- * @hold: CS hold time specified via @spi_delay
- * @inactive: CS inactive delay between transfers specified via @spi_delay
- *
- * Return: zero on success, else a negative error code.
- */
-int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup,
-		      struct spi_delay *hold, struct spi_delay *inactive)
-{
-	struct device *parent = spi->controller->dev.parent;
-	size_t len;
-	int status;
-
-	if (spi->controller->set_cs_timing &&
-	    !(spi->cs_gpiod || gpio_is_valid(spi->cs_gpio))) {
-		mutex_lock(&spi->controller->io_mutex);
-
-		if (spi->controller->auto_runtime_pm) {
-			status = pm_runtime_get_sync(parent);
-			if (status < 0) {
-				mutex_unlock(&spi->controller->io_mutex);
-				pm_runtime_put_noidle(parent);
-				dev_err(&spi->controller->dev, "Failed to power device: %d\n",
-					status);
-				return status;
-			}
-
-			status = spi->controller->set_cs_timing(spi, setup,
-								hold, inactive);
-			pm_runtime_mark_last_busy(parent);
-			pm_runtime_put_autosuspend(parent);
-		} else {
-			status = spi->controller->set_cs_timing(spi, setup, hold,
-							      inactive);
-		}
-
-		mutex_unlock(&spi->controller->io_mutex);
-		return status;
-	}
-
-	if ((setup && setup->unit == SPI_DELAY_UNIT_SCK) ||
-	    (hold && hold->unit == SPI_DELAY_UNIT_SCK) ||
-	    (inactive && inactive->unit == SPI_DELAY_UNIT_SCK)) {
-		dev_err(&spi->dev,
-			"Clock-cycle delays for CS not supported in SW mode\n");
-		return -ENOTSUPP;
-	}
-
-	len = sizeof(struct spi_delay);
-
-	/* copy delays to controller */
-	if (setup)
-		memcpy(&spi->controller->cs_setup, setup, len);
-	else
-		memset(&spi->controller->cs_setup, 0, len);
-
-	if (hold)
-		memcpy(&spi->controller->cs_hold, hold, len);
-	else
-		memset(&spi->controller->cs_hold, 0, len);
-
-	if (inactive)
-		memcpy(&spi->controller->cs_inactive, inactive, len);
-	else
-		memset(&spi->controller->cs_inactive, 0, len);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(spi_set_cs_timing);
-
 static int _spi_xfer_word_delay_update(struct spi_transfer *xfer,
 				       struct spi_device *spi)
 {
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 74239d65c7fd1..f924160e995f4 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1108,11 +1108,6 @@ static inline void spi_message_free(struct spi_message *m)
 	kfree(m);
 }
 
-extern int spi_set_cs_timing(struct spi_device *spi,
-			     struct spi_delay *setup,
-			     struct spi_delay *hold,
-			     struct spi_delay *inactive);
-
 extern int spi_setup(struct spi_device *spi);
 extern int spi_async(struct spi_device *spi, struct spi_message *message);
 extern int spi_async_locked(struct spi_device *spi,
-- 
GitLab


From 30e9857a134905ac0d03ca244b615cc3ff0a076e Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 8 Jun 2021 11:07:02 -0700
Subject: [PATCH 2531/3804] pinctrl: qcom: Make it possible to select SC8180x
 TLMM

It's currently not possible to select the SC8180x TLMM driver, due to it
selecting PINCTRL_MSM, rather than depending on the same. Fix this.

Fixes: 97423113ec4b ("pinctrl: qcom: Add sc8180x TLMM driver")
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210608180702.2064253-1-bjorn.andersson@linaro.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/qcom/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig
index 25d2f7f7f3b68..11e967dbb44bb 100644
--- a/drivers/pinctrl/qcom/Kconfig
+++ b/drivers/pinctrl/qcom/Kconfig
@@ -223,7 +223,7 @@ config PINCTRL_SC7280
 config PINCTRL_SC8180X
 	tristate "Qualcomm Technologies Inc SC8180x pin controller driver"
 	depends on GPIOLIB && (OF || ACPI)
-	select PINCTRL_MSM
+	depends on PINCTRL_MSM
 	help
 	  This is the pinctrl, pinmux, pinconf and gpiolib driver for the
 	  Qualcomm Technologies Inc TLMM block found on the Qualcomm
-- 
GitLab


From c8a570443943304cac2e4186dbce6989b6c2b8b5 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 8 Jun 2021 23:33:18 +0200
Subject: [PATCH 2532/3804] drm/mcde: Fix off by 10^3 in calculation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The calclulation of how many bytes we stuff into the
DSI pipeline for video mode panels is off by three
orders of magnitude because we did not account for the
fact that the DRM mode clock is in kilohertz rather
than hertz.

This used to be:
drm_mode_vrefresh(mode) * mode->htotal * mode->vtotal
which would become for example for s6e63m0:
60 x 514 x 831 = 25628040 Hz, but mode->clock is
25628 as it is in kHz.

This affects only the Samsung GT-I8190 "Golden" phone
right now since it is the only MCDE device with a video
mode display.

Curiously some specimen work with this code and wild
settings in the EOL and empty packets at the end of the
display, but I have noticed an eeire flicker until now.
Others were not so lucky and got black screens.

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reported-by: Stephan Gerhold <stephan@gerhold.net>
Fixes: 920dd1b1425b ("drm/mcde: Use mode->clock instead of reverse calculating it from the vrefresh")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Stephan Gerhold <stephan@gerhold.net>
Reviewed-by: Stephan Gerhold <stephan@gerhold.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20210608213318.3897858-1-linus.walleij@linaro.org
---
 drivers/gpu/drm/mcde/mcde_dsi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c
index b3fd3501c4127..5275b2723293b 100644
--- a/drivers/gpu/drm/mcde/mcde_dsi.c
+++ b/drivers/gpu/drm/mcde/mcde_dsi.c
@@ -577,7 +577,7 @@ static void mcde_dsi_setup_video_mode(struct mcde_dsi *d,
 	 * porches and sync.
 	 */
 	/* (ps/s) / (pixels/s) = ps/pixels */
-	pclk = DIV_ROUND_UP_ULL(1000000000000, mode->clock);
+	pclk = DIV_ROUND_UP_ULL(1000000000000, (mode->clock * 1000));
 	dev_dbg(d->dev, "picoseconds between two pixels: %llu\n",
 		pclk);
 
-- 
GitLab


From 159d8c274fd92438ca6d7068d7a5eeda157227f4 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Wed, 9 Jun 2021 13:25:33 +0300
Subject: [PATCH 2533/3804] ACPI: Pass the same capabilities to the _OSC
 regardless of the query flag

Commit 719e1f561afb ("ACPI: Execute platform _OSC also with query bit
clear") makes acpi_bus_osc_negotiate_platform_control() not only query
the platforms capabilities but it also commits the result back to the
firmware to report which capabilities are supported by the OS back to
the firmware

On certain systems the BIOS loads SSDT tables dynamically based on the
capabilities the OS claims to support. However, on these systems the
_OSC actually clears some of the bits (under certain conditions) so what
happens is that now when we call the _OSC twice the second time we pass
the cleared values and that results errors like below to appear on the
system log:

  ACPI BIOS Error (bug): Could not resolve symbol [\_PR.PR00._CPC], AE_NOT_FOUND (20210105/psargs-330)
  ACPI Error: Aborting method \_PR.PR01._CPC due to previous error (AE_NOT_FOUND) (20210105/psparse-529)

In addition the ACPI 6.4 spec says following [1]:

  If the OS declares support of a feature in the Support Field in one
  call to _OSC, then it must preserve the set state of that bit
  (declaring support for that feature) in all subsequent calls.

Based on the above we can fix the issue by passing the same set of
capabilities to the platform wide _OSC in both calls regardless of the
query flag.

While there drop the context.ret.length checks which were wrong to begin
with (as the length is number of bytes not elements). This is already
checked in acpi_run_osc() that also returns an error in that case.

Includes fixes by Hans de Goede.

[1] https://uefi.org/specs/ACPI/6.4/06_Device_Configuration/Device_Configuration.html#sequence-of-osc-calls

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213023
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1963717
Fixes: 719e1f561afb ("ACPI: Execute platform _OSC also with query bit clear")
Cc: 5.12+ <stable@vger.kernel.org> # 5.12+
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index be7da23fad76f..a4bd673934c0a 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -330,32 +330,21 @@ static void acpi_bus_osc_negotiate_platform_control(void)
 	if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
 		return;
 
-	capbuf_ret = context.ret.pointer;
-	if (context.ret.length <= OSC_SUPPORT_DWORD) {
-		kfree(context.ret.pointer);
-		return;
-	}
+	kfree(context.ret.pointer);
 
-	/*
-	 * Now run _OSC again with query flag clear and with the caps
-	 * supported by both the OS and the platform.
-	 */
+	/* Now run _OSC again with query flag clear */
 	capbuf[OSC_QUERY_DWORD] = 0;
-	capbuf[OSC_SUPPORT_DWORD] = capbuf_ret[OSC_SUPPORT_DWORD];
-	kfree(context.ret.pointer);
 
 	if (ACPI_FAILURE(acpi_run_osc(handle, &context)))
 		return;
 
 	capbuf_ret = context.ret.pointer;
-	if (context.ret.length > OSC_SUPPORT_DWORD) {
-		osc_sb_apei_support_acked =
-			capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
-		osc_pc_lpi_support_confirmed =
-			capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
-		osc_sb_native_usb4_support_confirmed =
-			capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
-	}
+	osc_sb_apei_support_acked =
+		capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
+	osc_pc_lpi_support_confirmed =
+		capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_PCLPI_SUPPORT;
+	osc_sb_native_usb4_support_confirmed =
+		capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_NATIVE_USB4_SUPPORT;
 
 	kfree(context.ret.pointer);
 }
-- 
GitLab


From 7c3e8d9df265bd0bdf6e328174cdfba26eb22f1c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 8 Jun 2021 17:42:37 +0300
Subject: [PATCH 2534/3804] serial: 8250_exar: Avoid NULL pointer dereference
 at ->exit()

It's possible that during ->exit() the private_data is NULL,
for instance when there was no GPIO device instantiated.
Due to this we may not dereference it. Add a respective check.

Note, for now ->exit() only makes sense when GPIO device
was instantiated, that's why we may use the check for entire
function.

Fixes: 81171e7d31a6 ("serial: 8250_exar: Constify the software nodes")
Reported-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
Link: https://lore.kernel.org/r/20210608144239.12697-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_exar.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c
index 2f49c580139b8..bd4e9f6ac29ce 100644
--- a/drivers/tty/serial/8250/8250_exar.c
+++ b/drivers/tty/serial/8250/8250_exar.c
@@ -553,7 +553,11 @@ static void pci_xr17v35x_exit(struct pci_dev *pcidev)
 {
 	struct exar8250 *priv = pci_get_drvdata(pcidev);
 	struct uart_8250_port *port = serial8250_get_port(priv->line[0]);
-	struct platform_device *pdev = port->port.private_data;
+	struct platform_device *pdev;
+
+	pdev = port->port.private_data;
+	if (!pdev)
+		return;
 
 	device_remove_software_node(&pdev->dev);
 	platform_device_unregister(pdev);
-- 
GitLab


From 3d42c93e5fc9e67e0023b7242097f1c1c2cead01 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 9 Jun 2021 14:43:16 +0200
Subject: [PATCH 2535/3804] media: dmxdev: change the check for problems
 allocing secfeed

While the logic there is right, it tricks static check analyzers,
like smatch:

	drivers/media/dvb-core/dmxdev.c:729 dvb_dmxdev_filter_start() error: we previously assumed '*secfeed' could be null (see line 719)

Because the implementation of the filter itself is made via
a callback, with its real implementation at the
dvbdmx_allocate_section_feed() inside dvb_demux.c.

So, change the check logic to make it clear that the function
will not try to use *secfeed == NULL.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/dvb-core/dmxdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index f14a872d12687..5d5a48475a54f 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -720,7 +720,7 @@ static int dvb_dmxdev_filter_start(struct dmxdev_filter *filter)
 			ret = dmxdev->demux->allocate_section_feed(dmxdev->demux,
 								   secfeed,
 								   dvb_dmxdev_section_callback);
-			if (ret < 0) {
+			if (!*secfeed) {
 				pr_err("DVB (%s): could not alloc feed\n",
 				       __func__);
 				return ret;
-- 
GitLab


From adaed1b9daf5a045be71e923e04b5069d2bee664 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 8 Jun 2021 11:32:27 +0200
Subject: [PATCH 2536/3804] mac80211: fix 'reset' debugfs locking

cfg80211 now calls suspend/resume with the wiphy lock
held, and while there's a problem with that needing
to be fixed, we should do the same in debugfs.

Cc: stable@vger.kernel.org
Fixes: a05829a7222e ("cfg80211: avoid holding the RTNL when calling the driver")
Link: https://lore.kernel.org/r/20210608113226.14020430e449.I78e19db0a55a8295a376e15ac4cf77dbb4c6fb51@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/debugfs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 9245c0421bda7..b5ff61b6448ab 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -4,7 +4,7 @@
  *
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2019, 2021 Intel Corporation
  */
 
 #include <linux/debugfs.h>
@@ -389,8 +389,10 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf,
 	struct ieee80211_local *local = file->private_data;
 
 	rtnl_lock();
+	wiphy_lock(local->hw.wiphy);
 	__ieee80211_suspend(&local->hw, NULL);
 	__ieee80211_resume(&local->hw);
+	wiphy_unlock(local->hw.wiphy);
 	rtnl_unlock();
 
 	return count;
-- 
GitLab


From 43076c1e074359f11c85d7d1b85ede1bbb8ee6b9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 8 Jun 2021 11:32:28 +0200
Subject: [PATCH 2537/3804] cfg80211: fix phy80211 symlink creation

When I moved around the code here, I neglected that we could still
call register_netdev() or similar without the wiphy mutex held,
which then calls cfg80211_register_wdev() - that's also done from
cfg80211_register_netdevice(), but the phy80211 symlink creation
was only there. Now, the symlink isn't needed for a *pure* wdev,
but a netdev not registered via cfg80211_register_wdev() should
still have the symlink, so move the creation to the right place.

Cc: stable@vger.kernel.org
Fixes: 2fe8ef106238 ("cfg80211: change netdev registration/unregistration semantics")
Link: https://lore.kernel.org/r/20210608113226.a5dc4c1e488c.Ia42fe663cefe47b0883af78c98f284c5555bbe5d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 6fbf7537faf53..8d0883e810934 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1340,6 +1340,11 @@ void cfg80211_register_wdev(struct cfg80211_registered_device *rdev,
 	rdev->devlist_generation++;
 	wdev->registered = true;
 
+	if (wdev->netdev &&
+	    sysfs_create_link(&wdev->netdev->dev.kobj, &rdev->wiphy.dev.kobj,
+			      "phy80211"))
+		pr_err("failed to add phy80211 symlink to netdev!\n");
+
 	nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 }
 
@@ -1365,14 +1370,6 @@ int cfg80211_register_netdevice(struct net_device *dev)
 	if (ret)
 		goto out;
 
-	if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
-			      "phy80211")) {
-		pr_err("failed to add phy80211 symlink to netdev!\n");
-		unregister_netdevice(dev);
-		ret = -EINVAL;
-		goto out;
-	}
-
 	cfg80211_register_wdev(rdev, wdev);
 	ret = 0;
 out:
-- 
GitLab


From 65bec836da8394b1d56bdec2c478dcac21cf12a4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 8 Jun 2021 11:32:29 +0200
Subject: [PATCH 2538/3804] cfg80211: shut down interfaces on failed resume

If resume fails, we should shut down all interfaces as the
hardware is probably dead. This was/is already done now in
mac80211, but we need to change that due to locking issues,
so move it here and do it without the wiphy lock held.

Cc: stable@vger.kernel.org
Fixes: 2fe8ef106238 ("cfg80211: change netdev registration/unregistration semantics")
Link: https://lore.kernel.org/r/20210608113226.d564ca69de7c.I2e3c3e5d410b72a4f63bade4fb075df041b3d92f@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/sysfs.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9b959e3b09c6d..0c3f05c9be27a 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -133,6 +133,10 @@ static int wiphy_resume(struct device *dev)
 	if (rdev->wiphy.registered && rdev->ops->resume)
 		ret = rdev_resume(rdev);
 	wiphy_unlock(&rdev->wiphy);
+
+	if (ret)
+		cfg80211_shutdown_all_interfaces(&rdev->wiphy);
+
 	rtnl_unlock();
 
 	return ret;
-- 
GitLab


From f5baf287f5da5641099ad5c809b3b4ebfc08506d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 8 Jun 2021 11:32:30 +0200
Subject: [PATCH 2539/3804] mac80211: move interface shutdown out of wiphy lock

When reconfiguration fails, we shut down everything, but we
cannot call cfg80211_shutdown_all_interfaces() with the wiphy
mutex held. Since cfg80211 now calls it on resume errors, we
only need to do likewise for where we call reconfig (whether
directly or indirectly), but not under the wiphy lock.

Cc: stable@vger.kernel.org
Fixes: 2fe8ef106238 ("cfg80211: change netdev registration/unregistration semantics")
Link: https://lore.kernel.org/r/20210608113226.78233c80f548.Iecc104aceb89f0568f50e9670a9cb191a1c8887b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/debugfs.c | 7 ++++++-
 net/mac80211/main.c    | 7 ++++++-
 net/mac80211/util.c    | 2 --
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b5ff61b6448ab..fc34ae2b604c5 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -387,12 +387,17 @@ static ssize_t reset_write(struct file *file, const char __user *user_buf,
 			   size_t count, loff_t *ppos)
 {
 	struct ieee80211_local *local = file->private_data;
+	int ret;
 
 	rtnl_lock();
 	wiphy_lock(local->hw.wiphy);
 	__ieee80211_suspend(&local->hw, NULL);
-	__ieee80211_resume(&local->hw);
+	ret = __ieee80211_resume(&local->hw);
 	wiphy_unlock(local->hw.wiphy);
+
+	if (ret)
+		cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
 	rtnl_unlock();
 
 	return count;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 62145e5f96286..f33a3acd7f969 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -252,6 +252,7 @@ static void ieee80211_restart_work(struct work_struct *work)
 	struct ieee80211_local *local =
 		container_of(work, struct ieee80211_local, restart_work);
 	struct ieee80211_sub_if_data *sdata;
+	int ret;
 
 	/* wait for scan work complete */
 	flush_workqueue(local->workqueue);
@@ -301,8 +302,12 @@ static void ieee80211_restart_work(struct work_struct *work)
 	/* wait for all packet processing to be done */
 	synchronize_net();
 
-	ieee80211_reconfig(local);
+	ret = ieee80211_reconfig(local);
 	wiphy_unlock(local->hw.wiphy);
+
+	if (ret)
+		cfg80211_shutdown_all_interfaces(local->hw.wiphy);
+
 	rtnl_unlock();
 }
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 0a0481f5af484..93d96a4f9c3e6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2178,8 +2178,6 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
 	list_for_each_entry(ctx, &local->chanctx_list, list)
 		ctx->driver_present = false;
 	mutex_unlock(&local->chanctx_mtx);
-
-	cfg80211_shutdown_all_interfaces(local->hw.wiphy);
 }
 
 static void ieee80211_assign_chanctx(struct ieee80211_local *local,
-- 
GitLab


From a9799541ca34652d9996e45f80e8e03144c12949 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 9 Jun 2021 16:13:06 +0200
Subject: [PATCH 2540/3804] mac80211: drop multicast fragments

These are not permitted by the spec, just drop them.

Link: https://lore.kernel.org/r/20210609161305.23def022b750.Ibd6dd3cdce573dae262fcdc47f8ac52b883a9c50@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1bb43edd47b6c..af0ef456eb0f8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2240,17 +2240,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	frag = sc & IEEE80211_SCTL_FRAG;
 
-	if (is_multicast_ether_addr(hdr->addr1)) {
-		I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
-		goto out_no_led;
-	}
-
 	if (rx->sta)
 		cache = &rx->sta->frags;
 
 	if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
 		goto out;
 
+	if (is_multicast_ether_addr(hdr->addr1))
+		return RX_DROP_MONITOR;
+
 	I802_DEBUG_INC(rx->local->rx_handlers_fragments);
 
 	if (skb_linearize(rx->skb))
@@ -2376,7 +2374,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 
  out:
 	ieee80211_led_rx(rx->local);
- out_no_led:
 	if (rx->sta)
 		rx->sta->rx_stats.packets++;
 	return RX_CONTINUE;
-- 
GitLab


From 64233338499126c5c31e07165735ab5441c7e45a Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Fri, 28 May 2021 11:20:54 +0800
Subject: [PATCH 2541/3804] intel_idle: Adjust the SKX C6 parameters if PC6 is
 disabled

Because cpuidle assumes worst-case C-state parameters, PC6 parameters
are used for describing C6, which is worst-case for requesting CC6.
When PC6 is enabled, this is appropriate. But if PC6 is disabled
in the BIOS, the exit latency and target residency should be adjusted
accordingly.

Exit latency:
Previously the C6 exit latency was measured as the PC6 exit latency.
With PC6 disabled, the C6 exit latency should be the one of CC6.

Target residency:
With PC6 disabled, the idle duration within [CC6, PC6) would make the
idle governor choose C1E over C6. This would cause low energy-efficiency.
We should lower the bar to request C6 when PC6 is disabled.

To fill this gap, check if PC6 is disabled in the BIOS in the
MSR_PKG_CST_CONFIG_CONTROL(0xe2) register. If so, use the CC6 exit latency
for C6 and set target_residency to 3 times of the new exit latency. [This
is consistent with how intel_idle driver uses _CST to calculate the
target_residency.] As a result, the OS would be more likely to choose C6
over C1E when PC6 is disabled, which is reasonable, because if C6 is
enabled, it implies that the user cares about energy, so choosing C6 more
frequently makes sense.

The new CC6 exit latency of 92us was measured with wult[1] on SKX via NIC
wakeup as the 99.99th percentile. Also CLX and CPX both have the same CPU
model number as SkX, but their CC6 exit latencies are similar to the SKX
one, 96us and 89us respectively, so reuse the SKX value for them.

There is a concern that it might be better to use a more generic approach
instead of optimizing every platform. However, if the required code
complexity and different PC6 bit interpretation on different platforms
are taken into account, tuning the code per platform seems to be an
acceptable tradeoff.

Link: https://intel.github.io/wult/ # [1]
Suggested-by: Len Brown <len.brown@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Reviewed-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index ec1b9d306ba61..e6c543b5ee1dd 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1484,6 +1484,36 @@ static void __init sklh_idle_state_table_update(void)
 	skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE;	/* C9-SKL */
 }
 
+/**
+ * skx_idle_state_table_update - Adjust the Sky Lake/Cascade Lake
+ * idle states table.
+ */
+static void __init skx_idle_state_table_update(void)
+{
+	unsigned long long msr;
+
+	rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr);
+
+	/*
+	 * 000b: C0/C1 (no package C-state support)
+	 * 001b: C2
+	 * 010b: C6 (non-retention)
+	 * 011b: C6 (retention)
+	 * 111b: No Package C state limits.
+	 */
+	if ((msr & 0x7) < 2) {
+		/*
+		 * Uses the CC6 + PC0 latency and 3 times of
+		 * latency for target_residency if the PC6
+		 * is disabled in BIOS. This is consistent
+		 * with how intel_idle driver uses _CST
+		 * to set the target_residency.
+		 */
+		skx_cstates[2].exit_latency = 92;
+		skx_cstates[2].target_residency = 276;
+	}
+}
+
 static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
 {
 	unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1;
@@ -1515,6 +1545,9 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 	case INTEL_FAM6_SKYLAKE:
 		sklh_idle_state_table_update();
 		break;
+	case INTEL_FAM6_SKYLAKE_X:
+		skx_idle_state_table_update();
+		break;
 	}
 
 	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
-- 
GitLab


From c7711c22c6ebc07a19a3dbdf87b05d9aa78f5390 Mon Sep 17 00:00:00 2001
From: Jarvis Jiang <jarvis.w.jiang@gmail.com>
Date: Sun, 6 Jun 2021 21:07:39 +0530
Subject: [PATCH 2542/3804] bus: mhi: pci_generic: T99W175: update channel name
 from AT to DUN

According to MHI v1.1 specification, change the channel name of T99W175
from "AT" to "DUN" (Dial-up networking) for both channel 32 and 33,
so that the channels can be bound to the Qcom WWAN control driver, and
device node such as /dev/wwan0p3DUN will be generated, which is very useful
for debugging modem

Link: https://lore.kernel.org/r/20210429014226.21017-1-jarvis.w.jiang@gmail.com
[mani: changed the dev node to /dev/wwan0p3DUN]
Fixes: aac426562f56 ("bus: mhi: pci_generic: Introduce Foxconn T99W175 support")
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Jarvis Jiang <jarvis.w.jiang@gmail.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20210606153741.20725-2-manivannan.sadhasivam@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/mhi/pci_generic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 7c810f02a2ef4..8c7f6576e421d 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -311,8 +311,8 @@ static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = {
 	MHI_CHANNEL_CONFIG_DL(5, "DIAG", 32, 1),
 	MHI_CHANNEL_CONFIG_UL(12, "MBIM", 32, 0),
 	MHI_CHANNEL_CONFIG_DL(13, "MBIM", 32, 0),
-	MHI_CHANNEL_CONFIG_UL(32, "AT", 32, 0),
-	MHI_CHANNEL_CONFIG_DL(33, "AT", 32, 0),
+	MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0),
+	MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0),
 	MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 128, 2),
 	MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 128, 3),
 };
-- 
GitLab


From 0b67808ade8893a1b3608ddd74fac7854786c919 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Sun, 6 Jun 2021 21:07:40 +0530
Subject: [PATCH 2543/3804] bus: mhi: pci_generic: Fix possible use-after-free
 in mhi_pci_remove()

This driver's remove path calls del_timer(). However, that function
does not wait until the timer handler finishes. This means that the
timer handler may still be running after the driver's remove function
has finished, which would result in a use-after-free.

Fix by calling del_timer_sync(), which makes sure the timer handler
has finished, and unable to re-schedule itself.

Link: https://lore.kernel.org/r/20210413160318.2003699-1-weiyongjun1@huawei.com
Fixes: 8562d4fe34a3 ("mhi: pci_generic: Add health-check")
Cc: stable <stable@vger.kernel.org>
Reported-by: Hulk Robot <hulkci@huawei.com>
Reviewed-by: Hemant kumar <hemantk@codeaurora.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20210606153741.20725-3-manivannan.sadhasivam@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/mhi/pci_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 8c7f6576e421d..0a6619ad292cd 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -708,7 +708,7 @@ static void mhi_pci_remove(struct pci_dev *pdev)
 	struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
 	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
 
-	del_timer(&mhi_pdev->health_check_timer);
+	del_timer_sync(&mhi_pdev->health_check_timer);
 	cancel_work_sync(&mhi_pdev->recovery_work);
 
 	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
-- 
GitLab


From 5f0c2ee1fe8de700dd0d1cdc63e1a7338e2d3a3d Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Sun, 6 Jun 2021 21:07:41 +0530
Subject: [PATCH 2544/3804] bus: mhi: pci-generic: Fix hibernation

This patch fixes crash after resuming from hibernation. The issue
occurs when mhi stack is builtin and so part of the 'restore-kernel',
causing the device to be resumed from 'restored kernel' with a no
more valid context (memory mappings etc...) and leading to spurious
crashes.

This patch fixes the issue by implementing proper freeze/restore
callbacks.

Link: https://lore.kernel.org/r/1622571445-4505-1-git-send-email-loic.poulain@linaro.org
Reported-by: Shujun Wang <wsj20369@163.com>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20210606153741.20725-4-manivannan.sadhasivam@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/mhi/pci_generic.c | 36 ++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 0a6619ad292cd..b3357a8a2fdbc 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -935,9 +935,43 @@ static int __maybe_unused mhi_pci_resume(struct device *dev)
 	return ret;
 }
 
+static int __maybe_unused mhi_pci_freeze(struct device *dev)
+{
+	struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+	struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+	/* We want to stop all operations, hibernation does not guarantee that
+	 * device will be in the same state as before freezing, especially if
+	 * the intermediate restore kernel reinitializes MHI device with new
+	 * context.
+	 */
+	if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+		mhi_power_down(mhi_cntrl, false);
+		mhi_unprepare_after_power_down(mhi_cntrl);
+	}
+
+	return 0;
+}
+
+static int __maybe_unused mhi_pci_restore(struct device *dev)
+{
+	struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+
+	/* Reinitialize the device */
+	queue_work(system_long_wq, &mhi_pdev->recovery_work);
+
+	return 0;
+}
+
 static const struct dev_pm_ops mhi_pci_pm_ops = {
 	SET_RUNTIME_PM_OPS(mhi_pci_runtime_suspend, mhi_pci_runtime_resume, NULL)
-	SET_SYSTEM_SLEEP_PM_OPS(mhi_pci_suspend, mhi_pci_resume)
+#ifdef CONFIG_PM_SLEEP
+	.suspend = mhi_pci_suspend,
+	.resume = mhi_pci_resume,
+	.freeze = mhi_pci_freeze,
+	.thaw = mhi_pci_restore,
+	.restore = mhi_pci_restore,
+#endif
 };
 
 static struct pci_driver mhi_pci_driver = {
-- 
GitLab


From 3df4fce739e2b263120f528c5e0fe6b2f8937b5b Mon Sep 17 00:00:00 2001
From: Ricky Wu <ricky_wu@realtek.com>
Date: Mon, 7 Jun 2021 18:16:34 +0800
Subject: [PATCH 2545/3804] misc: rtsx: separate aspm mode into MODE_REG and
 MODE_CFG

aspm (Active State Power Management)
rtsx_comm_set_aspm: this function is for driver to make sure
not enter power saving when processing of init and card_detcct
ASPM_MODE_CFG: 8411 5209 5227 5229 5249 5250
Change back to use original way to control aspm
ASPM_MODE_REG: 5227A 524A 5250A 5260 5261 5228
Keep the new way to control aspm

Fixes: 121e9c6b5c4c ("misc: rtsx: modify and fix init_hw function")
Reported-by: Chris Chiu <chris.chiu@canonical.com>
Tested-by: Gordon Lack <gordon.lack@dsl.pipex.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Ricky Wu <ricky_wu@realtek.com>
Link: https://lore.kernel.org/r/20210607101634.4948-1-ricky_wu@realtek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/cardreader/rtl8411.c  |  1 +
 drivers/misc/cardreader/rts5209.c  |  1 +
 drivers/misc/cardreader/rts5227.c  |  2 ++
 drivers/misc/cardreader/rts5228.c  |  1 +
 drivers/misc/cardreader/rts5229.c  |  1 +
 drivers/misc/cardreader/rts5249.c  |  3 ++
 drivers/misc/cardreader/rts5260.c  |  1 +
 drivers/misc/cardreader/rts5261.c  |  1 +
 drivers/misc/cardreader/rtsx_pcr.c | 44 +++++++++++++++++++++---------
 include/linux/rtsx_pci.h           |  2 ++
 10 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/drivers/misc/cardreader/rtl8411.c b/drivers/misc/cardreader/rtl8411.c
index a07674ed05965..4c5621b17a6fb 100644
--- a/drivers/misc/cardreader/rtl8411.c
+++ b/drivers/misc/cardreader/rtl8411.c
@@ -468,6 +468,7 @@ static void rtl8411_init_common_params(struct rtsx_pcr *pcr)
 	pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B;
 	pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D;
 	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(23, 7, 14);
 	pcr->rx_initial_phase = SET_CLOCK_PHASE(4, 3, 10);
 	pcr->ic_version = rtl8411_get_ic_version(pcr);
diff --git a/drivers/misc/cardreader/rts5209.c b/drivers/misc/cardreader/rts5209.c
index 39a6a7ecc32e9..29f5414072bf1 100644
--- a/drivers/misc/cardreader/rts5209.c
+++ b/drivers/misc/cardreader/rts5209.c
@@ -255,6 +255,7 @@ void rts5209_init_params(struct rtsx_pcr *pcr)
 	pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B;
 	pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D;
 	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 16);
 	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
 
diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c
index 8200af22b529e..4bcfbc9afbac1 100644
--- a/drivers/misc/cardreader/rts5227.c
+++ b/drivers/misc/cardreader/rts5227.c
@@ -358,6 +358,7 @@ void rts5227_init_params(struct rtsx_pcr *pcr)
 	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
 	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
 	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15);
 	pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 7, 7);
 
@@ -483,6 +484,7 @@ void rts522a_init_params(struct rtsx_pcr *pcr)
 
 	rts5227_init_params(pcr);
 	pcr->ops = &rts522a_pcr_ops;
+	pcr->aspm_mode = ASPM_MODE_REG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11);
 	pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3;
 
diff --git a/drivers/misc/cardreader/rts5228.c b/drivers/misc/cardreader/rts5228.c
index 781a86def59a9..ffc128278613b 100644
--- a/drivers/misc/cardreader/rts5228.c
+++ b/drivers/misc/cardreader/rts5228.c
@@ -718,6 +718,7 @@ void rts5228_init_params(struct rtsx_pcr *pcr)
 	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
 	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
 	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_REG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(28, 27, 11);
 	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
 
diff --git a/drivers/misc/cardreader/rts5229.c b/drivers/misc/cardreader/rts5229.c
index 89e6f124ca5ca..c748eaf1ec1f9 100644
--- a/drivers/misc/cardreader/rts5229.c
+++ b/drivers/misc/cardreader/rts5229.c
@@ -246,6 +246,7 @@ void rts5229_init_params(struct rtsx_pcr *pcr)
 	pcr->sd30_drive_sel_1v8 = DRIVER_TYPE_B;
 	pcr->sd30_drive_sel_3v3 = DRIVER_TYPE_D;
 	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 15);
 	pcr->rx_initial_phase = SET_CLOCK_PHASE(30, 6, 6);
 
diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c
index b2676e7f50271..53f3a1f45c4a7 100644
--- a/drivers/misc/cardreader/rts5249.c
+++ b/drivers/misc/cardreader/rts5249.c
@@ -566,6 +566,7 @@ void rts5249_init_params(struct rtsx_pcr *pcr)
 	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
 	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
 	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_CFG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16);
 	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
 
@@ -729,6 +730,7 @@ static const struct pcr_ops rts524a_pcr_ops = {
 void rts524a_init_params(struct rtsx_pcr *pcr)
 {
 	rts5249_init_params(pcr);
+	pcr->aspm_mode = ASPM_MODE_REG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
 	pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
 	pcr->option.ltr_l1off_snooze_sspwrgate =
@@ -845,6 +847,7 @@ static const struct pcr_ops rts525a_pcr_ops = {
 void rts525a_init_params(struct rtsx_pcr *pcr)
 {
 	rts5249_init_params(pcr);
+	pcr->aspm_mode = ASPM_MODE_REG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11);
 	pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF;
 	pcr->option.ltr_l1off_snooze_sspwrgate =
diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c
index 080a7d67a8e1a..9b42b20a3e5ae 100644
--- a/drivers/misc/cardreader/rts5260.c
+++ b/drivers/misc/cardreader/rts5260.c
@@ -628,6 +628,7 @@ void rts5260_init_params(struct rtsx_pcr *pcr)
 	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
 	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
 	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_REG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11);
 	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
 
diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
index 6c64dade8e1af..1fd4e0e507302 100644
--- a/drivers/misc/cardreader/rts5261.c
+++ b/drivers/misc/cardreader/rts5261.c
@@ -783,6 +783,7 @@ void rts5261_init_params(struct rtsx_pcr *pcr)
 	pcr->sd30_drive_sel_1v8 = 0x00;
 	pcr->sd30_drive_sel_3v3 = 0x00;
 	pcr->aspm_en = ASPM_L1_EN;
+	pcr->aspm_mode = ASPM_MODE_REG;
 	pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 11);
 	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
 
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
index 273311184669a..baf83594a01d3 100644
--- a/drivers/misc/cardreader/rtsx_pcr.c
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -85,12 +85,18 @@ static void rtsx_comm_set_aspm(struct rtsx_pcr *pcr, bool enable)
 	if (pcr->aspm_enabled == enable)
 		return;
 
-	if (pcr->aspm_en & 0x02)
-		rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, FORCE_ASPM_CTL0 |
-			FORCE_ASPM_CTL1, enable ? 0 : FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1);
-	else
-		rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, FORCE_ASPM_CTL0 |
-			FORCE_ASPM_CTL1, FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1);
+	if (pcr->aspm_mode == ASPM_MODE_CFG) {
+		pcie_capability_clear_and_set_word(pcr->pci, PCI_EXP_LNKCTL,
+						PCI_EXP_LNKCTL_ASPMC,
+						enable ? pcr->aspm_en : 0);
+	} else if (pcr->aspm_mode == ASPM_MODE_REG) {
+		if (pcr->aspm_en & 0x02)
+			rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, FORCE_ASPM_CTL0 |
+				FORCE_ASPM_CTL1, enable ? 0 : FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1);
+		else
+			rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, FORCE_ASPM_CTL0 |
+				FORCE_ASPM_CTL1, FORCE_ASPM_CTL0 | FORCE_ASPM_CTL1);
+	}
 
 	if (!enable && (pcr->aspm_en & 0x02))
 		mdelay(10);
@@ -1394,7 +1400,8 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
 			return err;
 	}
 
-	rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30);
+	if (pcr->aspm_mode == ASPM_MODE_REG)
+		rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30);
 
 	/* No CD interrupt if probing driver with card inserted.
 	 * So we need to initialize pcr->card_exist here.
@@ -1410,6 +1417,8 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
 static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
 {
 	int err;
+	u16 cfg_val;
+	u8 val;
 
 	spin_lock_init(&pcr->lock);
 	mutex_init(&pcr->pcr_mutex);
@@ -1477,6 +1486,21 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
 	if (!pcr->slots)
 		return -ENOMEM;
 
+	if (pcr->aspm_mode == ASPM_MODE_CFG) {
+		pcie_capability_read_word(pcr->pci, PCI_EXP_LNKCTL, &cfg_val);
+		if (cfg_val & PCI_EXP_LNKCTL_ASPM_L1)
+			pcr->aspm_enabled = true;
+		else
+			pcr->aspm_enabled = false;
+
+	} else if (pcr->aspm_mode == ASPM_MODE_REG) {
+		rtsx_pci_read_register(pcr, ASPM_FORCE_CTL, &val);
+		if (val & FORCE_ASPM_CTL0 && val & FORCE_ASPM_CTL1)
+			pcr->aspm_enabled = false;
+		else
+			pcr->aspm_enabled = true;
+	}
+
 	if (pcr->ops->fetch_vendor_settings)
 		pcr->ops->fetch_vendor_settings(pcr);
 
@@ -1506,7 +1530,6 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
 	struct pcr_handle *handle;
 	u32 base, len;
 	int ret, i, bar = 0;
-	u8 val;
 
 	dev_dbg(&(pcidev->dev),
 		": Realtek PCI-E Card Reader found at %s [%04x:%04x] (rev %x)\n",
@@ -1572,11 +1595,6 @@ static int rtsx_pci_probe(struct pci_dev *pcidev,
 	pcr->host_cmds_addr = pcr->rtsx_resv_buf_addr;
 	pcr->host_sg_tbl_ptr = pcr->rtsx_resv_buf + HOST_CMDS_BUF_LEN;
 	pcr->host_sg_tbl_addr = pcr->rtsx_resv_buf_addr + HOST_CMDS_BUF_LEN;
-	rtsx_pci_read_register(pcr, ASPM_FORCE_CTL, &val);
-	if (val & FORCE_ASPM_CTL0 && val & FORCE_ASPM_CTL1)
-		pcr->aspm_enabled = false;
-	else
-		pcr->aspm_enabled = true;
 	pcr->card_inserted = 0;
 	pcr->card_removed = 0;
 	INIT_DELAYED_WORK(&pcr->carddet_work, rtsx_pci_card_detect);
diff --git a/include/linux/rtsx_pci.h b/include/linux/rtsx_pci.h
index 6f155f99aa160..4ab7bfc675f11 100644
--- a/include/linux/rtsx_pci.h
+++ b/include/linux/rtsx_pci.h
@@ -1109,6 +1109,7 @@ struct pcr_ops {
 };
 
 enum PDEV_STAT  {PDEV_STAT_IDLE, PDEV_STAT_RUN};
+enum ASPM_MODE  {ASPM_MODE_CFG, ASPM_MODE_REG};
 
 #define ASPM_L1_1_EN			BIT(0)
 #define ASPM_L1_2_EN			BIT(1)
@@ -1234,6 +1235,7 @@ struct rtsx_pcr {
 	u8				card_drive_sel;
 #define ASPM_L1_EN			0x02
 	u8				aspm_en;
+	enum ASPM_MODE			aspm_mode;
 	bool				aspm_enabled;
 
 #define PCR_MS_PMOS			(1 << 0)
-- 
GitLab


From faffc5d8576ed827e2e8e4d2a3771dbb52667381 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Date: Mon, 31 May 2021 22:46:55 +0900
Subject: [PATCH 2546/3804] dt-bindings: hwmon: Fix typo in TI ADS7828 bindings

Fix typo in example for DT binding, changed from 'comatible'
to 'compatible'.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Link: https://lore.kernel.org/r/20210531134655.720462-1-iwamatsu@nigauri.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/hwmon/ti,ads7828.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/hwmon/ti,ads7828.yaml b/Documentation/devicetree/bindings/hwmon/ti,ads7828.yaml
index 33ee575bb09da..926be9a29044b 100644
--- a/Documentation/devicetree/bindings/hwmon/ti,ads7828.yaml
+++ b/Documentation/devicetree/bindings/hwmon/ti,ads7828.yaml
@@ -49,7 +49,7 @@ examples:
         #size-cells = <0>;
 
         adc@48 {
-            comatible = "ti,ads7828";
+            compatible = "ti,ads7828";
             reg = <0x48>;
             vref-supply = <&vref>;
             ti,differential-input;
-- 
GitLab


From 7656cd2177612aa7c299b083ecff30a4d3e9a587 Mon Sep 17 00:00:00 2001
From: Wilken Gottwalt <wilken.gottwalt@posteo.net>
Date: Thu, 3 Jun 2021 11:51:02 +0000
Subject: [PATCH 2547/3804] hwmon: (corsair-psu) fix suspend behavior

During standby some PSUs turn off the microcontroller. A re-init is
required during resume or the microcontroller stays unresponsive.

Fixes: d115b51e0e56 ("hwmon: add Corsair PSU HID controller driver")
Signed-off-by: Wilken Gottwalt <wilken.gottwalt@posteo.net>
Link: https://lore.kernel.org/r/YLjCJiVtu5zgTabI@monster.powergraphx.local
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/corsair-psu.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c
index 02298b86b57b6..731d5117f9f10 100644
--- a/drivers/hwmon/corsair-psu.c
+++ b/drivers/hwmon/corsair-psu.c
@@ -771,6 +771,16 @@ static int corsairpsu_raw_event(struct hid_device *hdev, struct hid_report *repo
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int corsairpsu_resume(struct hid_device *hdev)
+{
+	struct corsairpsu_data *priv = hid_get_drvdata(hdev);
+
+	/* some PSUs turn off the microcontroller during standby, so a reinit is required */
+	return corsairpsu_init(priv);
+}
+#endif
+
 static const struct hid_device_id corsairpsu_idtable[] = {
 	{ HID_USB_DEVICE(0x1b1c, 0x1c03) }, /* Corsair HX550i */
 	{ HID_USB_DEVICE(0x1b1c, 0x1c04) }, /* Corsair HX650i */
@@ -793,6 +803,10 @@ static struct hid_driver corsairpsu_driver = {
 	.probe		= corsairpsu_probe,
 	.remove		= corsairpsu_remove,
 	.raw_event	= corsairpsu_raw_event,
+#ifdef CONFIG_PM
+	.resume		= corsairpsu_resume,
+	.reset_resume	= corsairpsu_resume,
+#endif
 };
 module_hid_driver(corsairpsu_driver);
 
-- 
GitLab


From 78d13552346289bad4a9bf8eabb5eec5e5a321a5 Mon Sep 17 00:00:00 2001
From: Riwen Lu <luriwen@kylinos.cn>
Date: Fri, 4 Jun 2021 11:09:59 +0800
Subject: [PATCH 2548/3804] hwmon: (scpi-hwmon) shows the negative temperature
 properly

The scpi hwmon shows the sub-zero temperature in an unsigned integer,
which would confuse the users when the machine works in low temperature
environment. This shows the sub-zero temperature in an signed value and
users can get it properly from sensors.

Signed-off-by: Riwen Lu <luriwen@kylinos.cn>
Tested-by: Xin Chen <chenxin@kylinos.cn>
Link: https://lore.kernel.org/r/20210604030959.736379-1-luriwen@kylinos.cn
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/scpi-hwmon.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 25aac40f2764a..919877970ae3b 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -99,6 +99,15 @@ scpi_show_sensor(struct device *dev, struct device_attribute *attr, char *buf)
 
 	scpi_scale_reading(&value, sensor);
 
+	/*
+	 * Temperature sensor values are treated as signed values based on
+	 * observation even though that is not explicitly specified, and
+	 * because an unsigned u64 temperature does not really make practical
+	 * sense especially when the temperature is below zero degrees Celsius.
+	 */
+	if (sensor->info.class == TEMPERATURE)
+		return sprintf(buf, "%lld\n", (s64)value);
+
 	return sprintf(buf, "%llu\n", value);
 }
 
-- 
GitLab


From ad9f151e560b016b6ad3280b48e42fa11e1a5440 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 4 Jun 2021 03:07:28 +0200
Subject: [PATCH 2549/3804] netfilter: nf_tables: initialize set before
 expression setup

nft_set_elem_expr_alloc() needs an initialized set if expression sets on
the NFT_EXPR_GC flag. Move set fields initialization before expression
setup.

[4512935.019450] ==================================================================
[4512935.019456] BUG: KASAN: null-ptr-deref in nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
[4512935.019487] Read of size 8 at addr 0000000000000070 by task nft/23532
[4512935.019494] CPU: 1 PID: 23532 Comm: nft Not tainted 5.12.0-rc4+ #48
[...]
[4512935.019502] Call Trace:
[4512935.019505]  dump_stack+0x89/0xb4
[4512935.019512]  ? nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
[4512935.019536]  ? nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
[4512935.019560]  kasan_report.cold.12+0x5f/0xd8
[4512935.019566]  ? nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
[4512935.019590]  nft_set_elem_expr_alloc+0x84/0xd0 [nf_tables]
[4512935.019615]  nf_tables_newset+0xc7f/0x1460 [nf_tables]

Reported-by: syzbot+ce96ca2b1d0b37c6422d@syzkaller.appspotmail.com
Fixes: 65038428b2c6 ("netfilter: nf_tables: allow to specify stateful expression in set definition")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 85 ++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 42 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 72bc759179efc..bf4d6ec9fc55c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4364,13 +4364,45 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 	err = nf_tables_set_alloc_name(&ctx, set, name);
 	kfree(name);
 	if (err < 0)
-		goto err_set_alloc_name;
+		goto err_set_name;
+
+	udata = NULL;
+	if (udlen) {
+		udata = set->data + size;
+		nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+	}
+
+	INIT_LIST_HEAD(&set->bindings);
+	INIT_LIST_HEAD(&set->catchall_list);
+	set->table = table;
+	write_pnet(&set->net, net);
+	set->ops = ops;
+	set->ktype = ktype;
+	set->klen = desc.klen;
+	set->dtype = dtype;
+	set->objtype = objtype;
+	set->dlen = desc.dlen;
+	set->flags = flags;
+	set->size = desc.size;
+	set->policy = policy;
+	set->udlen = udlen;
+	set->udata = udata;
+	set->timeout = timeout;
+	set->gc_int = gc_int;
+
+	set->field_count = desc.field_count;
+	for (i = 0; i < desc.field_count; i++)
+		set->field_len[i] = desc.field_len[i];
+
+	err = ops->init(set, &desc, nla);
+	if (err < 0)
+		goto err_set_init;
 
 	if (nla[NFTA_SET_EXPR]) {
 		expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
 		if (IS_ERR(expr)) {
 			err = PTR_ERR(expr);
-			goto err_set_alloc_name;
+			goto err_set_expr_alloc;
 		}
 		set->exprs[0] = expr;
 		set->num_exprs++;
@@ -4381,75 +4413,44 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 
 		if (!(flags & NFT_SET_EXPR)) {
 			err = -EINVAL;
-			goto err_set_alloc_name;
+			goto err_set_expr_alloc;
 		}
 		i = 0;
 		nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
 			if (i == NFT_SET_EXPR_MAX) {
 				err = -E2BIG;
-				goto err_set_init;
+				goto err_set_expr_alloc;
 			}
 			if (nla_type(tmp) != NFTA_LIST_ELEM) {
 				err = -EINVAL;
-				goto err_set_init;
+				goto err_set_expr_alloc;
 			}
 			expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
 			if (IS_ERR(expr)) {
 				err = PTR_ERR(expr);
-				goto err_set_init;
+				goto err_set_expr_alloc;
 			}
 			set->exprs[i++] = expr;
 			set->num_exprs++;
 		}
 	}
 
-	udata = NULL;
-	if (udlen) {
-		udata = set->data + size;
-		nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
-	}
-
-	INIT_LIST_HEAD(&set->bindings);
-	INIT_LIST_HEAD(&set->catchall_list);
-	set->table = table;
-	write_pnet(&set->net, net);
-	set->ops   = ops;
-	set->ktype = ktype;
-	set->klen  = desc.klen;
-	set->dtype = dtype;
-	set->objtype = objtype;
-	set->dlen  = desc.dlen;
-	set->flags = flags;
-	set->size  = desc.size;
-	set->policy = policy;
-	set->udlen  = udlen;
-	set->udata  = udata;
-	set->timeout = timeout;
-	set->gc_int = gc_int;
 	set->handle = nf_tables_alloc_handle(table);
 
-	set->field_count = desc.field_count;
-	for (i = 0; i < desc.field_count; i++)
-		set->field_len[i] = desc.field_len[i];
-
-	err = ops->init(set, &desc, nla);
-	if (err < 0)
-		goto err_set_init;
-
 	err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
 	if (err < 0)
-		goto err_set_trans;
+		goto err_set_expr_alloc;
 
 	list_add_tail_rcu(&set->list, &table->sets);
 	table->use++;
 	return 0;
 
-err_set_trans:
-	ops->destroy(set);
-err_set_init:
+err_set_expr_alloc:
 	for (i = 0; i < set->num_exprs; i++)
 		nft_expr_destroy(&ctx, set->exprs[i]);
-err_set_alloc_name:
+
+	ops->destroy(set);
+err_set_init:
 	kfree(set->name);
 err_set_name:
 	kvfree(set);
-- 
GitLab


From 82944421243e5988898f54266687586ba07d889e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Jun 2021 13:48:17 +0200
Subject: [PATCH 2550/3804] selftests: netfilter: add fib test case

There is a bug report on netfilter.org bugzilla pointing to fib
expression dropping ipv6 DAD packets.

Add a test case that demonstrates this problem.

Next patch excludes icmpv6 packets coming from any to linklocal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/netfilter/Makefile   |   2 +-
 tools/testing/selftests/netfilter/nft_fib.sh | 221 +++++++++++++++++++
 2 files changed, 222 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/netfilter/nft_fib.sh

diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 3171069a6b461..cd6430b399820 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for netfilter selftests
 
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
+TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
 	conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
 	nft_concat_range.sh nft_conntrack_helper.sh \
 	nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
new file mode 100755
index 0000000000000..6caf6ac8c285f
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -0,0 +1,221 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+	ip netns del ${ns1}
+	ip netns del ${ns2}
+	ip netns del ${nsrouter}
+
+	[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+dmesg | grep -q ' nft_rpfilter: '
+if [ $? -eq 0 ]; then
+	dmesg -c | grep ' nft_rpfilter: '
+	echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+load_ruleset() {
+	local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+	chain prerouting {
+		type filter hook prerouting priority 0; policy accept;
+	        fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+	}
+}
+EOF
+}
+
+load_ruleset_count() {
+	local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+	chain prerouting {
+		type filter hook prerouting priority 0; policy accept;
+		ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+		ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+	}
+}
+EOF
+}
+
+check_drops() {
+	dmesg | grep -q ' nft_rpfilter: '
+	if [ $? -eq 0 ]; then
+		dmesg | grep ' nft_rpfilter: '
+		echo "FAIL: rpfilter did drop packets"
+		return 1
+	fi
+
+	return 0
+}
+
+check_fib_counter() {
+	local want=$1
+	local ns=$2
+	local address=$3
+
+	line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
+	ret=$?
+
+	if [ $ret -ne 0 ];then
+		echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+		ip netns exec ${ns} nft list table inet filter
+		return 1
+	fi
+
+	if [ $want -gt 0 ]; then
+		echo "PASS: fib expression did drop packets for $address"
+	fi
+
+	return 0
+}
+
+load_ruleset ${nsrouter}
+load_ruleset ${ns1}
+load_ruleset ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+test_ping() {
+  local daddr4=$1
+  local daddr6=$2
+
+  ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
+  ret=$?
+  if [ $ret -ne 0 ];then
+	check_drops
+	echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+	return 1
+  fi
+
+  ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
+  ret=$?
+  if [ $ret -ne 0 ];then
+	check_drops
+	echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+	return 1
+  fi
+
+  return 0
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+sleep 3
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec ${nsrouter} nft flush table inet filter
+
+ip -net ${ns1} route del default
+ip -net ${ns1} -6 route del default
+
+ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr del dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr add dead:2::99/64 dev eth0
+
+ip -net ${ns1} route add default via 10.0.2.1
+ip -net ${ns1} -6 route add default via dead:2::1
+
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count ${nsrouter}
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
+check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
+
+ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
+
+sleep 2
+ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+
+exit 0
-- 
GitLab


From 12f36e9bf678a81d030ca1b693dcda62b55af7c5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Jun 2021 13:48:18 +0200
Subject: [PATCH 2551/3804] netfilter: nft_fib_ipv6: skip ipv6 packets from any
 to link-local

The ip6tables rpfilter match has an extra check to skip packets with
"::" source address.

Extend this to ipv6 fib expression.  Else ipv6 duplicate address detection
packets will fail rpf route check -- lookup returns -ENETUNREACH.

While at it, extend the prerouting check to also cover the ingress hook.

Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1543
Fixes: f6d0cbcf09c5 ("netfilter: nf_tables: add fib expression")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/nft_fib_ipv6.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index e204163c7036c..92f3235fa2874 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -135,6 +135,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
 }
 EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
 
+static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph)
+{
+	if (likely(next != IPPROTO_ICMPV6))
+		return false;
+
+	if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY)
+		return false;
+
+	return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL;
+}
+
 void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		   const struct nft_pktinfo *pkt)
 {
@@ -163,10 +174,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 
 	lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph);
 
-	if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
-	    nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
-		nft_fib_store_result(dest, priv, nft_in(pkt));
-		return;
+	if (nft_hook(pkt) == NF_INET_PRE_ROUTING ||
+	    nft_hook(pkt) == NF_INET_INGRESS) {
+		if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) ||
+		    nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) {
+			nft_fib_store_result(dest, priv, nft_in(pkt));
+			return;
+		}
 	}
 
 	*dest = 0;
-- 
GitLab


From ebc5399ea1dfcddac31974091086a3379141899b Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 20 May 2021 08:34:59 +0200
Subject: [PATCH 2552/3804] ice: add ndo_bpf callback for safe mode netdev ops

ice driver requires a programmable pipeline firmware package in order to
have a support for advanced features. Otherwise, driver falls back to so
called 'safe mode'. For that mode, ndo_bpf callback is not exposed and
when user tries to load XDP program, the following happens:

$ sudo ./xdp1 enp179s0f1
libbpf: Kernel error message: Underlying driver does not support XDP in native mode
link set xdp fd failed

which is sort of confusing, as there is a native XDP support, but not in
the current mode. Improve the user experience by providing the specific
ndo_bpf callback dedicated for safe mode which will make use of extack
to explicitly let the user know that the DDP package is missing and
that's the reason that the XDP can't be loaded onto interface currently.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Fixes: efc2214b6047 ("ice: Add support for XDP")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 4ee85a217c6fe..0eb2307325d3b 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2555,6 +2555,20 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
 	return (ret || xdp_ring_err) ? -ENOMEM : 0;
 }
 
+/**
+ * ice_xdp_safe_mode - XDP handler for safe mode
+ * @dev: netdevice
+ * @xdp: XDP command
+ */
+static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
+			     struct netdev_bpf *xdp)
+{
+	NL_SET_ERR_MSG_MOD(xdp->extack,
+			   "Please provide working DDP firmware package in order to use XDP\n"
+			   "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
+	return -EOPNOTSUPP;
+}
+
 /**
  * ice_xdp - implements XDP handler
  * @dev: netdevice
@@ -6937,6 +6951,7 @@ static const struct net_device_ops ice_netdev_safe_mode_ops = {
 	.ndo_change_mtu = ice_change_mtu,
 	.ndo_get_stats64 = ice_get_stats64,
 	.ndo_tx_timeout = ice_tx_timeout,
+	.ndo_bpf = ice_xdp_safe_mode,
 };
 
 static const struct net_device_ops ice_netdev_ops = {
-- 
GitLab


From 2e84f6b3773f43263124c76499c0c4ec3f40aa9b Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 20 May 2021 08:35:00 +0200
Subject: [PATCH 2553/3804] ice: parameterize functions responsible for Tx ring
 management

Commit ae15e0ba1b33 ("ice: Change number of XDP Tx queues to match
number of Rx queues") tried to address the incorrect setting of XDP
queue count that was based on the Tx queue count, whereas in theory we
should provide the XDP queue per Rx queue. However, the routines that
setup and destroy the set of Tx resources are still based on the
vsi->num_txq.

Ice supports the asynchronous Tx/Rx queue count, so for a setup where
vsi->num_txq > vsi->num_rxq, ice_vsi_stop_tx_rings and ice_vsi_cfg_txqs
will be accessing the vsi->xdp_rings out of the bounds.

Parameterize two mentioned functions so they get the size of Tx resources
array as the input.

Fixes: ae15e0ba1b33 ("ice: Change number of XDP Tx queues to match number of Rx queues")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: Kiran Bhandare <kiranx.bhandare@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index d70ee573fde5b..27f9dac8719c1 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1717,12 +1717,13 @@ setup_rings:
  * ice_vsi_cfg_txqs - Configure the VSI for Tx
  * @vsi: the VSI being configured
  * @rings: Tx ring array to be configured
+ * @count: number of Tx ring array elements
  *
  * Return 0 on success and a negative value on error
  * Configure the Tx VSI for operation.
  */
 static int
-ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
+ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count)
 {
 	struct ice_aqc_add_tx_qgrp *qg_buf;
 	u16 q_idx = 0;
@@ -1734,7 +1735,7 @@ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
 
 	qg_buf->num_txqs = 1;
 
-	for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+	for (q_idx = 0; q_idx < count; q_idx++) {
 		err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
 		if (err)
 			goto err_cfg_txqs;
@@ -1754,7 +1755,7 @@ err_cfg_txqs:
  */
 int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
 {
-	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
+	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
 }
 
 /**
@@ -1769,7 +1770,7 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
 	int ret;
 	int i;
 
-	ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+	ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
 	if (ret)
 		return ret;
 
@@ -2009,17 +2010,18 @@ int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi)
  * @rst_src: reset source
  * @rel_vmvf_num: Relative ID of VF/VM
  * @rings: Tx ring array to be stopped
+ * @count: number of Tx ring array elements
  */
 static int
 ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
-		      u16 rel_vmvf_num, struct ice_ring **rings)
+		      u16 rel_vmvf_num, struct ice_ring **rings, u16 count)
 {
 	u16 q_idx;
 
 	if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
 		return -EINVAL;
 
-	for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+	for (q_idx = 0; q_idx < count; q_idx++) {
 		struct ice_txq_meta txq_meta = { };
 		int status;
 
@@ -2047,7 +2049,7 @@ int
 ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 			  u16 rel_vmvf_num)
 {
-	return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
+	return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
 }
 
 /**
@@ -2056,7 +2058,7 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
  */
 int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
 {
-	return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+	return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
 }
 
 /**
-- 
GitLab


From 80ec82e3d2c1fab42eeb730aaa7985494a963d3f Mon Sep 17 00:00:00 2001
From: Austin Kim <austindh.kim@gmail.com>
Date: Wed, 9 Jun 2021 03:34:25 +0100
Subject: [PATCH 2554/3804] net: ethtool: clear heap allocations for ethtool
 function

Several ethtool functions leave heap uncleared (potentially) by
drivers. This will leave the unused portion of heap unchanged and
might copy the full contents back to userspace.

Signed-off-by: Austin Kim <austindh.kim@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/ioctl.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 3fa7a394eabf6..baa5d10043cb0 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr,
 	if (eeprom.offset + eeprom.len > total_len)
 		return -EINVAL;
 
-	data = kmalloc(PAGE_SIZE, GFP_USER);
+	data = kzalloc(PAGE_SIZE, GFP_USER);
 	if (!data)
 		return -ENOMEM;
 
@@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
 	if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
 		return -EINVAL;
 
-	data = kmalloc(PAGE_SIZE, GFP_USER);
+	data = kzalloc(PAGE_SIZE, GFP_USER);
 	if (!data)
 		return -ENOMEM;
 
@@ -1765,7 +1765,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
 		return -EFAULT;
 
 	test.len = test_len;
-	data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
+	data = kcalloc(test_len, sizeof(u64), GFP_USER);
 	if (!data)
 		return -ENOMEM;
 
@@ -2293,7 +2293,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
 	ret = ethtool_tunable_valid(&tuna);
 	if (ret)
 		return ret;
-	data = kmalloc(tuna.len, GFP_USER);
+	data = kzalloc(tuna.len, GFP_USER);
 	if (!data)
 		return -ENOMEM;
 	ret = ops->get_tunable(dev, &tuna, data);
@@ -2485,7 +2485,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
 	ret = ethtool_phy_tunable_valid(&tuna);
 	if (ret)
 		return ret;
-	data = kmalloc(tuna.len, GFP_USER);
+	data = kzalloc(tuna.len, GFP_USER);
 	if (!data)
 		return -ENOMEM;
 	if (phy_drv_tunable) {
-- 
GitLab


From dcd01eeac14486b56a790f5cce9b823440ba5b34 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 9 Jun 2021 00:59:45 -0700
Subject: [PATCH 2555/3804] inet: annotate data race in inet_send_prepare() and
 inet_dgram_connect()

Both functions are known to be racy when reading inet_num
as we do not want to grab locks for the common case the socket
has been bound already. The race is resolved in inet_autobind()
by reading again inet_num under the socket lock.

syzbot reported:
BUG: KCSAN: data-race in inet_send_prepare / udp_lib_get_port

write to 0xffff88812cba150e of 2 bytes by task 24135 on cpu 0:
 udp_lib_get_port+0x4b2/0xe20 net/ipv4/udp.c:308
 udp_v6_get_port+0x5e/0x70 net/ipv6/udp.c:89
 inet_autobind net/ipv4/af_inet.c:183 [inline]
 inet_send_prepare+0xd0/0x210 net/ipv4/af_inet.c:807
 inet6_sendmsg+0x29/0x80 net/ipv6/af_inet6.c:639
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg net/socket.c:674 [inline]
 ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350
 ___sys_sendmsg net/socket.c:2404 [inline]
 __sys_sendmmsg+0x315/0x4b0 net/socket.c:2490
 __do_sys_sendmmsg net/socket.c:2519 [inline]
 __se_sys_sendmmsg net/socket.c:2516 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2516
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff88812cba150e of 2 bytes by task 24132 on cpu 1:
 inet_send_prepare+0x21/0x210 net/ipv4/af_inet.c:806
 inet6_sendmsg+0x29/0x80 net/ipv6/af_inet6.c:639
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg net/socket.c:674 [inline]
 ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350
 ___sys_sendmsg net/socket.c:2404 [inline]
 __sys_sendmmsg+0x315/0x4b0 net/socket.c:2490
 __do_sys_sendmmsg net/socket.c:2519 [inline]
 __se_sys_sendmmsg net/socket.c:2516 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2516
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x0000 -> 0x9db4

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 24132 Comm: syz-executor.2 Not tainted 5.13.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f17870ee558bb..2f94d221c00e9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -575,7 +575,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr,
 			return err;
 	}
 
-	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+	if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
 		return -EAGAIN;
 	return sk->sk_prot->connect(sk, uaddr, addr_len);
 }
@@ -803,7 +803,7 @@ int inet_send_prepare(struct sock *sk)
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+	if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
 	    inet_autobind(sk))
 		return -EAGAIN;
 
-- 
GitLab


From a8b897c7bcd47f4147d066e22cc01d1026d7640e Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 9 Jun 2021 11:49:01 +0200
Subject: [PATCH 2556/3804] udp: fix race between close() and udp_abort()

Kaustubh reported and diagnosed a panic in udp_lib_lookup().
The root cause is udp_abort() racing with close(). Both
racing functions acquire the socket lock, but udp{v6}_destroy_sock()
release it before performing destructive actions.

We can't easily extend the socket lock scope to avoid the race,
instead use the SOCK_DEAD flag to prevent udp_abort from doing
any action when the critical race happens.

Diagnosed-and-tested-by: Kaustubh Pandey <kapandey@codeaurora.org>
Fixes: 5d77dca82839 ("net: diag: support SOCK_DESTROY for UDP sockets")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 10 ++++++++++
 net/ipv6/udp.c |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 15f5504adf5b0..1307ad0d3b9ed 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2607,6 +2607,9 @@ void udp_destroy_sock(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 	bool slow = lock_sock_fast(sk);
+
+	/* protects from races with udp_abort() */
+	sock_set_flag(sk, SOCK_DEAD);
 	udp_flush_pending_frames(sk);
 	unlock_sock_fast(sk, slow);
 	if (static_branch_unlikely(&udp_encap_needed_key)) {
@@ -2857,10 +2860,17 @@ int udp_abort(struct sock *sk, int err)
 {
 	lock_sock(sk);
 
+	/* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
+	 * with close()
+	 */
+	if (sock_flag(sk, SOCK_DEAD))
+		goto out;
+
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 	__udp_disconnect(sk, 0);
 
+out:
 	release_sock(sk);
 
 	return 0;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 199b080d418ac..3fcd86f4dfdca 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1598,6 +1598,9 @@ void udpv6_destroy_sock(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 	lock_sock(sk);
+
+	/* protects from races with udp_abort() */
+	sock_set_flag(sk, SOCK_DEAD);
 	udp_v6_flush_pending_frames(sk);
 	release_sock(sk);
 
-- 
GitLab


From 218bf772bddd221489c38dde6ef8e917131161f6 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 2 Jun 2021 13:52:24 -0700
Subject: [PATCH 2557/3804] kvm: LAPIC: Restore guard to prevent illegal APIC
 register access

Per the SDM, "any access that touches bytes 4 through 15 of an APIC
register may cause undefined behavior and must not be executed."
Worse, such an access in kvm_lapic_reg_read can result in a leak of
kernel stack contents. Prior to commit 01402cf81051 ("kvm: LAPIC:
write down valid APIC registers"), such an access was explicitly
disallowed. Restore the guard that was removed in that commit.

Fixes: 01402cf81051 ("kvm: LAPIC: write down valid APIC registers")
Signed-off-by: Jim Mattson <jmattson@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Message-Id: <20210602205224.3189316-1-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6d72d8f433107..17fa4ab1b8344 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
 	if (!apic_x2apic_mode(apic))
 		valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
 
+	if (alignment + len > 4)
+		return 1;
+
 	if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
 		return 1;
 
-- 
GitLab


From d2e381c4963663bca6f30c3b996fa4dbafe8fcb5 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 9 Jun 2021 14:17:53 +0300
Subject: [PATCH 2558/3804] rtnetlink: Fix regression in bridge VLAN
 configuration

Cited commit started returning errors when notification info is not
filled by the bridge driver, resulting in the following regression:

 # ip link add name br1 type bridge vlan_filtering 1
 # bridge vlan add dev br1 vid 555 self pvid untagged
 RTNETLINK answers: Invalid argument

As long as the bridge driver does not fill notification info for the
bridge device itself, an empty notification should not be considered as
an error. This is explained in commit 59ccaaaa49b5 ("bridge: dont send
notification when skb->len == 0 in rtnl_bridge_notify").

Fix by removing the error and add a comment to avoid future bugs.

Fixes: a8db57c1d285 ("rtnetlink: Fix missing error code in rtnl_bridge_notify()")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3e84279c41236..ec931b080156d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4842,10 +4842,12 @@ static int rtnl_bridge_notify(struct net_device *dev)
 	if (err < 0)
 		goto errout;
 
-	if (!skb->len) {
-		err = -EINVAL;
+	/* Notification info is only filled for bridge ports, not the bridge
+	 * device itself. Therefore, a zero notification length is valid and
+	 * should not result in an error.
+	 */
+	if (!skb->len)
 		goto errout;
-	}
 
 	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
 	return 0;
-- 
GitLab


From 13c62f5371e3eb4fc3400cfa26e64ca75f888008 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 9 Jun 2021 11:23:56 -0300
Subject: [PATCH 2559/3804] net/sched: act_ct: handle DNAT tuple collision

This this the counterpart of 8aa7b526dc0b ("openvswitch: handle DNAT
tuple collision") for act_ct. From that commit changelog:

"""
With multiple DNAT rules it's possible that after destination
translation the resulting tuples collide.

...

Netfilter handles this case by allocating a null binding for SNAT at
egress by default.  Perform the same operation in openvswitch for DNAT
if no explicit SNAT is requested by the user and allocate a null binding
for SNAT for packets in the "original" direction.
"""

Fixes: 95219afbb980 ("act_ct: support asymmetric conntrack")
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ct.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 18edd9ad14109..a656baa321fe1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -904,14 +904,19 @@ static int tcf_ct_act_nat(struct sk_buff *skb,
 	}
 
 	err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
-	if (err == NF_ACCEPT &&
-	    ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
-		if (maniptype == NF_NAT_MANIP_SRC)
-			maniptype = NF_NAT_MANIP_DST;
-		else
-			maniptype = NF_NAT_MANIP_SRC;
-
-		err = ct_nat_execute(skb, ct, ctinfo, range, maniptype);
+	if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) {
+		if (ct->status & IPS_SRC_NAT) {
+			if (maniptype == NF_NAT_MANIP_SRC)
+				maniptype = NF_NAT_MANIP_DST;
+			else
+				maniptype = NF_NAT_MANIP_SRC;
+
+			err = ct_nat_execute(skb, ct, ctinfo, range,
+					     maniptype);
+		} else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+			err = ct_nat_execute(skb, ct, ctinfo, NULL,
+					     NF_NAT_MANIP_SRC);
+		}
 	}
 	return err;
 #else
-- 
GitLab


From 2bf8d2ae3480da06e64dad3b326ebd2e40c0be86 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 4 Jun 2021 18:08:27 +0800
Subject: [PATCH 2560/3804] net/mlx5e: Fix an error code in
 mlx5e_arfs_create_tables()

When the code execute 'if (!priv->fs.arfs->wq)', the value of err is 0.
So, we use -ENOMEM to indicate that the function
create_singlethread_workqueue() return NULL.

Clean up smatch warning:
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c:373
mlx5e_arfs_create_tables() warn: missing error code 'err'.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Fixes: f6755b80d693 ("net/mlx5e: Dynamic alloc arfs table for netdev when needed")
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 5cd466ec64929..25403af32859e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -356,7 +356,7 @@ err:
 
 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv)
 {
-	int err = 0;
+	int err = -ENOMEM;
 	int i;
 
 	if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))
-- 
GitLab


From fb1a3132ee1ac968316e45d21a48703a6db0b6c3 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Mon, 31 May 2021 16:28:39 +0300
Subject: [PATCH 2561/3804] net/mlx5e: Fix use-after-free of encap entry in
 neigh update handler

Function mlx5e_rep_neigh_update() wasn't updated to accommodate rtnl lock
removal from TC filter update path and properly handle concurrent encap
entry insertion/deletion which can lead to following use-after-free:

 [23827.464923] ==================================================================
 [23827.469446] BUG: KASAN: use-after-free in mlx5e_encap_take+0x72/0x140 [mlx5_core]
 [23827.470971] Read of size 4 at addr ffff8881d132228c by task kworker/u20:6/21635
 [23827.472251]
 [23827.472615] CPU: 9 PID: 21635 Comm: kworker/u20:6 Not tainted 5.13.0-rc3+ #5
 [23827.473788] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 [23827.475639] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
 [23827.476731] Call Trace:
 [23827.477260]  dump_stack+0xbb/0x107
 [23827.477906]  print_address_description.constprop.0+0x18/0x140
 [23827.478896]  ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
 [23827.479879]  ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
 [23827.480905]  kasan_report.cold+0x7c/0xd8
 [23827.481701]  ? mlx5e_encap_take+0x72/0x140 [mlx5_core]
 [23827.482744]  kasan_check_range+0x145/0x1a0
 [23827.493112]  mlx5e_encap_take+0x72/0x140 [mlx5_core]
 [23827.494054]  ? mlx5e_tc_tun_encap_info_equal_generic+0x140/0x140 [mlx5_core]
 [23827.495296]  mlx5e_rep_neigh_update+0x41e/0x5e0 [mlx5_core]
 [23827.496338]  ? mlx5e_rep_neigh_entry_release+0xb80/0xb80 [mlx5_core]
 [23827.497486]  ? read_word_at_a_time+0xe/0x20
 [23827.498250]  ? strscpy+0xa0/0x2a0
 [23827.498889]  process_one_work+0x8ac/0x14e0
 [23827.499638]  ? lockdep_hardirqs_on_prepare+0x400/0x400
 [23827.500537]  ? pwq_dec_nr_in_flight+0x2c0/0x2c0
 [23827.501359]  ? rwlock_bug.part.0+0x90/0x90
 [23827.502116]  worker_thread+0x53b/0x1220
 [23827.502831]  ? process_one_work+0x14e0/0x14e0
 [23827.503627]  kthread+0x328/0x3f0
 [23827.504254]  ? _raw_spin_unlock_irq+0x24/0x40
 [23827.505065]  ? __kthread_bind_mask+0x90/0x90
 [23827.505912]  ret_from_fork+0x1f/0x30
 [23827.506621]
 [23827.506987] Allocated by task 28248:
 [23827.507694]  kasan_save_stack+0x1b/0x40
 [23827.508476]  __kasan_kmalloc+0x7c/0x90
 [23827.509197]  mlx5e_attach_encap+0xde1/0x1d40 [mlx5_core]
 [23827.510194]  mlx5e_tc_add_fdb_flow+0x397/0xc40 [mlx5_core]
 [23827.511218]  __mlx5e_add_fdb_flow+0x519/0xb30 [mlx5_core]
 [23827.512234]  mlx5e_configure_flower+0x191c/0x4870 [mlx5_core]
 [23827.513298]  tc_setup_cb_add+0x1d5/0x420
 [23827.514023]  fl_hw_replace_filter+0x382/0x6a0 [cls_flower]
 [23827.514975]  fl_change+0x2ceb/0x4a51 [cls_flower]
 [23827.515821]  tc_new_tfilter+0x89a/0x2070
 [23827.516548]  rtnetlink_rcv_msg+0x644/0x8c0
 [23827.517300]  netlink_rcv_skb+0x11d/0x340
 [23827.518021]  netlink_unicast+0x42b/0x700
 [23827.518742]  netlink_sendmsg+0x743/0xc20
 [23827.519467]  sock_sendmsg+0xb2/0xe0
 [23827.520131]  ____sys_sendmsg+0x590/0x770
 [23827.520851]  ___sys_sendmsg+0xd8/0x160
 [23827.521552]  __sys_sendmsg+0xb7/0x140
 [23827.522238]  do_syscall_64+0x3a/0x70
 [23827.522907]  entry_SYSCALL_64_after_hwframe+0x44/0xae
 [23827.523797]
 [23827.524163] Freed by task 25948:
 [23827.524780]  kasan_save_stack+0x1b/0x40
 [23827.525488]  kasan_set_track+0x1c/0x30
 [23827.526187]  kasan_set_free_info+0x20/0x30
 [23827.526968]  __kasan_slab_free+0xed/0x130
 [23827.527709]  slab_free_freelist_hook+0xcf/0x1d0
 [23827.528528]  kmem_cache_free_bulk+0x33a/0x6e0
 [23827.529317]  kfree_rcu_work+0x55f/0xb70
 [23827.530024]  process_one_work+0x8ac/0x14e0
 [23827.530770]  worker_thread+0x53b/0x1220
 [23827.531480]  kthread+0x328/0x3f0
 [23827.532114]  ret_from_fork+0x1f/0x30
 [23827.532785]
 [23827.533147] Last potentially related work creation:
 [23827.534007]  kasan_save_stack+0x1b/0x40
 [23827.534710]  kasan_record_aux_stack+0xab/0xc0
 [23827.535492]  kvfree_call_rcu+0x31/0x7b0
 [23827.536206]  mlx5e_tc_del_fdb_flow+0x577/0xef0 [mlx5_core]
 [23827.537305]  mlx5e_flow_put+0x49/0x80 [mlx5_core]
 [23827.538290]  mlx5e_delete_flower+0x6d1/0xe60 [mlx5_core]
 [23827.539300]  tc_setup_cb_destroy+0x18e/0x2f0
 [23827.540144]  fl_hw_destroy_filter+0x1d2/0x310 [cls_flower]
 [23827.541148]  __fl_delete+0x4dc/0x660 [cls_flower]
 [23827.541985]  fl_delete+0x97/0x160 [cls_flower]
 [23827.542782]  tc_del_tfilter+0x7ab/0x13d0
 [23827.543503]  rtnetlink_rcv_msg+0x644/0x8c0
 [23827.544257]  netlink_rcv_skb+0x11d/0x340
 [23827.544981]  netlink_unicast+0x42b/0x700
 [23827.545700]  netlink_sendmsg+0x743/0xc20
 [23827.546424]  sock_sendmsg+0xb2/0xe0
 [23827.547084]  ____sys_sendmsg+0x590/0x770
 [23827.547850]  ___sys_sendmsg+0xd8/0x160
 [23827.548606]  __sys_sendmsg+0xb7/0x140
 [23827.549303]  do_syscall_64+0x3a/0x70
 [23827.549969]  entry_SYSCALL_64_after_hwframe+0x44/0xae
 [23827.550853]
 [23827.551217] The buggy address belongs to the object at ffff8881d1322200
 [23827.551217]  which belongs to the cache kmalloc-256 of size 256
 [23827.553341] The buggy address is located 140 bytes inside of
 [23827.553341]  256-byte region [ffff8881d1322200, ffff8881d1322300)
 [23827.555747] The buggy address belongs to the page:
 [23827.556847] page:00000000898762aa refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1d1320
 [23827.558651] head:00000000898762aa order:2 compound_mapcount:0 compound_pincount:0
 [23827.559961] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff)
 [23827.561243] raw: 002ffff800010200 dead000000000100 dead000000000122 ffff888100042b40
 [23827.562653] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000
 [23827.564112] page dumped because: kasan: bad access detected
 [23827.565439]
 [23827.565932] Memory state around the buggy address:
 [23827.566917]  ffff8881d1322180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 [23827.568485]  ffff8881d1322200: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 [23827.569818] >ffff8881d1322280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 [23827.571143]                       ^
 [23827.571879]  ffff8881d1322300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 [23827.573283]  ffff8881d1322380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 [23827.574654] ==================================================================

Most of the necessary logic is already correctly implemented by
mlx5e_get_next_valid_encap() helper that is used in neigh stats update
handler. Make the handler generic by renaming it to
mlx5e_get_next_matching_encap() and use callback to test whether flow is
matching instead of hardcoded check for 'valid' flag value. Implement
mlx5e_get_next_valid_encap() by calling mlx5e_get_next_matching_encap()
with callback that tests encap MLX5_ENCAP_ENTRY_VALID flag. Implement new
mlx5e_get_next_init_encap() helper by calling
mlx5e_get_next_matching_encap() with callback that tests encap completion
result to be non-error and use it in mlx5e_rep_neigh_update() to safely
iterate over nhe->encap_list.

Remove encap completion logic from mlx5e_rep_update_flows() since the encap
entries passed to this function are already guaranteed to be properly
initialized by similar code in mlx5e_get_next_init_encap().

Fixes: 2a1f1768fa17 ("net/mlx5e: Refactor neigh update for concurrent execution")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en/rep/neigh.c         | 15 ++++-----
 .../ethernet/mellanox/mlx5/core/en/rep/tc.c   |  6 +---
 .../mellanox/mlx5/core/en/tc_tun_encap.c      | 33 +++++++++++++++++--
 .../net/ethernet/mellanox/mlx5/core/en_tc.h   |  3 ++
 4 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
index be0ee03de7217..2e9bee4e5209b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -129,10 +129,9 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
 							     work);
 	struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
 	struct neighbour *n = update_work->n;
+	struct mlx5e_encap_entry *e = NULL;
 	bool neigh_connected, same_dev;
-	struct mlx5e_encap_entry *e;
 	unsigned char ha[ETH_ALEN];
-	struct mlx5e_priv *priv;
 	u8 nud_state, dead;
 
 	rtnl_lock();
@@ -156,14 +155,12 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
 	if (!same_dev)
 		goto out;
 
-	list_for_each_entry(e, &nhe->encap_list, encap_list) {
-		if (!mlx5e_encap_take(e))
-			continue;
+	/* mlx5e_get_next_init_encap() releases previous encap before returning
+	 * the next one.
+	 */
+	while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
+		mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
 
-		priv = netdev_priv(e->out_dev);
-		mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
-		mlx5e_encap_put(priv, e);
-	}
 out:
 	rtnl_unlock();
 	mlx5e_release_neigh_update_work(update_work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 3113822618402..85eaadc989df2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -94,13 +94,9 @@ void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
 
 	ASSERT_RTNL();
 
-	/* wait for encap to be fully initialized */
-	wait_for_completion(&e->res_ready);
-
 	mutex_lock(&esw->offloads.encap_tbl_lock);
 	encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
-	if (e->compl_result < 0 || (encap_connected == neigh_connected &&
-				    ether_addr_equal(e->h_dest, ha)))
+	if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
 		goto unlock;
 
 	mlx5e_take_all_encap_flows(e, &flow_list);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index f1fb11680d202..490131e06efb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -251,9 +251,12 @@ static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
 		mlx5e_take_tmp_flow(flow, flow_list, 0);
 }
 
+typedef bool (match_cb)(struct mlx5e_encap_entry *);
+
 static struct mlx5e_encap_entry *
-mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
-			   struct mlx5e_encap_entry *e)
+mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
+			      struct mlx5e_encap_entry *e,
+			      match_cb match)
 {
 	struct mlx5e_encap_entry *next = NULL;
 
@@ -288,7 +291,7 @@ retry:
 	/* wait for encap to be fully initialized */
 	wait_for_completion(&next->res_ready);
 	/* continue searching if encap entry is not in valid state after completion */
-	if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
+	if (!match(next)) {
 		e = next;
 		goto retry;
 	}
@@ -296,6 +299,30 @@ retry:
 	return next;
 }
 
+static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
+{
+	return e->flags & MLX5_ENCAP_ENTRY_VALID;
+}
+
+static struct mlx5e_encap_entry *
+mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+			   struct mlx5e_encap_entry *e)
+{
+	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
+}
+
+static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
+{
+	return e->compl_result >= 0;
+}
+
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+			  struct mlx5e_encap_entry *e)
+{
+	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
+}
+
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 {
 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 25c091795bcd8..17027536efbaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -178,6 +178,9 @@ void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *f
 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
 
 struct mlx5e_neigh_hash_entry;
+struct mlx5e_encap_entry *
+mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+			  struct mlx5e_encap_entry *e);
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
 
 void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
-- 
GitLab


From 8ad893e516a77209a1818a2072d2027d87db809f Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@nvidia.com>
Date: Fri, 28 May 2021 13:20:32 -0500
Subject: [PATCH 2562/3804] net/mlx5e: Remove dependency in IPsec
 initialization flows

Currently, IPsec feature is disabled because mlx5e_build_nic_netdev
is required to be called after mlx5e_ipsec_init. This requirement is
invalid as mlx5e_build_nic_netdev and mlx5e_ipsec_init initialize
independent resources.

Remove ipsec pointer check in mlx5e_build_nic_netdev so that the
two functions can be called at any order.

Fixes: 547eede070eb ("net/mlx5e: IPSec, Innova IPSec offload infrastructure")
Signed-off-by: Huy Nguyen <huyn@nvidia.com>
Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 3d45341e2216f..26f7fab109d97 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -532,9 +532,6 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct net_device *netdev = priv->netdev;
 
-	if (!priv->ipsec)
-		return;
-
 	if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) ||
 	    !MLX5_CAP_ETH(mdev, swp)) {
 		mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");
-- 
GitLab


From a3e5fd9314dfc4314a9567cde96e1aef83a7458a Mon Sep 17 00:00:00 2001
From: Dima Chumak <dchumak@nvidia.com>
Date: Wed, 26 May 2021 13:45:10 +0300
Subject: [PATCH 2563/3804] net/mlx5e: Fix page reclaim for dead peer hairpin

When adding a hairpin flow, a firmware-side send queue is created for
the peer net device, which claims some host memory pages for its
internal ring buffer. If the peer net device is removed/unbound before
the hairpin flow is deleted, then the send queue is not destroyed which
leads to a stack trace on pci device remove:

[ 748.005230] mlx5_core 0000:08:00.2: wait_func:1094:(pid 12985): MANAGE_PAGES(0x108) timeout. Will cause a leak of a command resource
[ 748.005231] mlx5_core 0000:08:00.2: reclaim_pages:514:(pid 12985): failed reclaiming pages: err -110
[ 748.001835] mlx5_core 0000:08:00.2: mlx5_reclaim_root_pages:653:(pid 12985): failed reclaiming pages (-110) for func id 0x0
[ 748.002171] ------------[ cut here ]------------
[ 748.001177] FW pages counter is 4 after reclaiming all pages
[ 748.001186] WARNING: CPU: 1 PID: 12985 at drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:685 mlx5_reclaim_startup_pages+0x34b/0x460 [mlx5_core]                      [  +0.002771] Modules linked in: cls_flower mlx5_ib mlx5_core ptp pps_core act_mirred sch_ingress openvswitch nsh xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi rdma_cm ib_umad ib_ipoib iw_cm ib_cm ib_uverbs ib_core overlay fuse [last unloaded: pps_core]
[ 748.007225] CPU: 1 PID: 12985 Comm: tee Not tainted 5.12.0+ #1
[ 748.001376] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
[ 748.002315] RIP: 0010:mlx5_reclaim_startup_pages+0x34b/0x460 [mlx5_core]
[ 748.001679] Code: 28 00 00 00 0f 85 22 01 00 00 48 81 c4 b0 00 00 00 31 c0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 48 c7 c7 40 cc 19 a1 e8 9f 71 0e e2 <0f> 0b e9 30 ff ff ff 48 c7 c7 a0 cc 19 a1 e8 8c 71 0e e2 0f 0b e9
[ 748.003781] RSP: 0018:ffff88815220faf8 EFLAGS: 00010286
[ 748.001149] RAX: 0000000000000000 RBX: ffff8881b4900280 RCX: 0000000000000000
[ 748.001445] RDX: 0000000000000027 RSI: 0000000000000004 RDI: ffffed102a441f51
[ 748.001614] RBP: 00000000000032b9 R08: 0000000000000001 R09: ffffed1054a15ee8
[ 748.001446] R10: ffff8882a50af73b R11: ffffed1054a15ee7 R12: fffffbfff07c1e30
[ 748.001447] R13: dffffc0000000000 R14: ffff8881b492cba8 R15: 0000000000000000
[ 748.001429] FS:  00007f58bd08b580(0000) GS:ffff8882a5080000(0000) knlGS:0000000000000000
[ 748.001695] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 748.001309] CR2: 000055a026351740 CR3: 00000001d3b48006 CR4: 0000000000370ea0
[ 748.001506] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 748.001483] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 748.001654] Call Trace:
[ 748.000576]  ? mlx5_satisfy_startup_pages+0x290/0x290 [mlx5_core]
[ 748.001416]  ? mlx5_cmd_teardown_hca+0xa2/0xd0 [mlx5_core]
[ 748.001354]  ? mlx5_cmd_init_hca+0x280/0x280 [mlx5_core]
[ 748.001203]  mlx5_function_teardown+0x30/0x60 [mlx5_core]
[ 748.001275]  mlx5_uninit_one+0xa7/0xc0 [mlx5_core]
[ 748.001200]  remove_one+0x5f/0xc0 [mlx5_core]
[ 748.001075]  pci_device_remove+0x9f/0x1d0
[ 748.000833]  device_release_driver_internal+0x1e0/0x490
[ 748.001207]  unbind_store+0x19f/0x200
[ 748.000942]  ? sysfs_file_ops+0x170/0x170
[ 748.001000]  kernfs_fop_write_iter+0x2bc/0x450
[ 748.000970]  new_sync_write+0x373/0x610
[ 748.001124]  ? new_sync_read+0x600/0x600
[ 748.001057]  ? lock_acquire+0x4d6/0x700
[ 748.000908]  ? lockdep_hardirqs_on_prepare+0x400/0x400
[ 748.001126]  ? fd_install+0x1c9/0x4d0
[ 748.000951]  vfs_write+0x4d0/0x800
[ 748.000804]  ksys_write+0xf9/0x1d0
[ 748.000868]  ? __x64_sys_read+0xb0/0xb0
[ 748.000811]  ? filp_open+0x50/0x50
[ 748.000919]  ? syscall_enter_from_user_mode+0x1d/0x50
[ 748.001223]  do_syscall_64+0x3f/0x80
[ 748.000892]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 748.001026] RIP: 0033:0x7f58bcfb22f7
[ 748.000944] Code: 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
[ 748.003925] RSP: 002b:00007fffd7f2aaa8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 748.001732] RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007f58bcfb22f7
[ 748.001426] RDX: 000000000000000d RSI: 00007fffd7f2abc0 RDI: 0000000000000003
[ 748.001746] RBP: 00007fffd7f2abc0 R08: 0000000000000000 R09: 0000000000000001
[ 748.001631] R10: 00000000000001b6 R11: 0000000000000246 R12: 000000000000000d
[ 748.001537] R13: 00005597ac2c24a0 R14: 000000000000000d R15: 00007f58bd084700
[ 748.001564] irq event stamp: 0
[ 748.000787] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
[ 748.001399] hardirqs last disabled at (0): [<ffffffff813132cf>] copy_process+0x146f/0x5eb0
[ 748.001854] softirqs last  enabled at (0): [<ffffffff8131330e>] copy_process+0x14ae/0x5eb0
[ 748.013431] softirqs last disabled at (0): [<0000000000000000>] 0x0
[ 748.001492] ---[ end trace a6fabd773d1c51ae ]---

Fix by destroying the send queue of a hairpin peer net device that is
being removed/unbound, which returns the allocated ring buffer pages to
the host.

Fixes: 4d8fcf216c90 ("net/mlx5e: Avoid unbounded peer devices when unpairing TC hairpin rules")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   |  2 +-
 .../ethernet/mellanox/mlx5/core/transobj.c    | 30 +++++++++++++++----
 include/linux/mlx5/transobj.h                 |  1 +
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index dd64878e5b381..d4b0f270b6bb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4765,7 +4765,7 @@ static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
 	list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
 		wait_for_completion(&hpe->res_ready);
 		if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
-			hpe->hp->pair->peer_gone = true;
+			mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
 
 		mlx5e_hairpin_put(priv, hpe);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index 01cc00ad8acf2..b6931bbe52d29 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -424,6 +424,15 @@ err_modify_sq:
 	return err;
 }
 
+static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp)
+{
+	int i;
+
+	for (i = 0; i < hp->num_channels; i++)
+		mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
+				       MLX5_SQC_STATE_RST, 0, 0);
+}
+
 static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
 {
 	int i;
@@ -432,13 +441,9 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
 	for (i = 0; i < hp->num_channels; i++)
 		mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY,
 				       MLX5_RQC_STATE_RST, 0, 0);
-
 	/* unset peer SQs */
-	if (hp->peer_gone)
-		return;
-	for (i = 0; i < hp->num_channels; i++)
-		mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
-				       MLX5_SQC_STATE_RST, 0, 0);
+	if (!hp->peer_gone)
+		mlx5_hairpin_unpair_peer_sq(hp);
 }
 
 struct mlx5_hairpin *
@@ -485,3 +490,16 @@ void mlx5_core_hairpin_destroy(struct mlx5_hairpin *hp)
 	mlx5_hairpin_destroy_queues(hp);
 	kfree(hp);
 }
+
+void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp)
+{
+	int i;
+
+	mlx5_hairpin_unpair_peer_sq(hp);
+
+	/* destroy peer SQ */
+	for (i = 0; i < hp->num_channels; i++)
+		mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+
+	hp->peer_gone = true;
+}
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
index 028f442530cf5..60ffeb6b67ae7 100644
--- a/include/linux/mlx5/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -85,4 +85,5 @@ mlx5_core_hairpin_create(struct mlx5_core_dev *func_mdev,
 			 struct mlx5_hairpin_params *params);
 
 void mlx5_core_hairpin_destroy(struct mlx5_hairpin *pair);
+void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp);
 #endif /* __TRANSOBJ_H__ */
-- 
GitLab


From c189716b2a7c1d2d8658e269735273caa1c38b54 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Sun, 6 Jun 2021 11:20:46 +0300
Subject: [PATCH 2564/3804] net/mlx5: Consider RoCE cap before init RDMA
 resources

Check if RoCE is supported by the device before enable it in
the vport context and create all the RDMA steering objects.

Fixes: 80f09dfc237f ("net/mlx5: Eswitch, enable RoCE loopback traffic")
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/rdma.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
index 441b5453acae0..540cf05f63739 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -156,6 +156,9 @@ void mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
 {
 	int err;
 
+	if (!MLX5_CAP_GEN(dev, roce))
+		return;
+
 	err = mlx5_nic_vport_enable_roce(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
-- 
GitLab


From 4aaf96ac8b45d8e2e019b6b53cce65a73c4ace2c Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Sun, 6 Jun 2021 11:23:41 +0300
Subject: [PATCH 2565/3804] net/mlx5: DR, Don't use SW steering when RoCE is
 not supported

SW steering uses RC QP to write/read to/from ICM, hence it's not
supported when RoCE is not supported as well.

Fixes: 70605ea545e8 ("net/mlx5: DR, Expose APIs for direct rule managing")
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Alex Vesker <valex@nvidia.com>
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h    | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 612b0ac31db23..9737565cd8d43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -124,10 +124,11 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action);
 static inline bool
 mlx5dr_is_supported(struct mlx5_core_dev *dev)
 {
-	return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
-	       (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
-		(MLX5_CAP_GEN(dev, steering_format_version) <=
-		 MLX5_STEERING_FORMAT_CONNECTX_6DX));
+	return MLX5_CAP_GEN(dev, roce) &&
+	       (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
+		(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
+		 (MLX5_CAP_GEN(dev, steering_format_version) <=
+		  MLX5_STEERING_FORMAT_CONNECTX_6DX)));
 }
 
 /* buddy functions & structure */
-- 
GitLab


From 11f5ac3e05c134d333afe6f84ab10e22bc0a5d5a Mon Sep 17 00:00:00 2001
From: Chris Mi <cmi@nvidia.com>
Date: Wed, 28 Apr 2021 19:39:26 +0800
Subject: [PATCH 2566/3804] net/mlx5e: Verify dev is present in get devlink
 port ndo

When changing eswitch mode, the netdev is detached from the
hardware resources. So verify dev is present in get devlink
port ndo. Otherwise, we will hit the following panic:

[241535.973539] RIP: 0010:__devlink_port_phys_port_name_get+0x13/0x1b0
[241535.976471] RSP: 0018:ffff9eaf0ae1b7c8 EFLAGS: 00010292
[241535.977471] RAX: 000000000002d370 RBX: 000000000002d370 RCX: 0000000000000000
[241535.978479] RDX: 0000000000000010 RSI: ffff9eaf0ae1b858 RDI: 000000000002d370
[241535.979482] RBP: ffff9eaf0ae1b7e0 R08: 000000000000002a R09: ffff8888d54d13da
[241535.980486] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8888e6700000
[241535.981491] R13: ffff9eaf0ae1b858 R14: 0000000000000010 R15: 0000000000000000
[241535.982489] FS:  00007fd374ef3740(0000) GS:ffff88909ea00000(0000) knlGS:0000000000000000
[241535.983494] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[241535.984487] CR2: 000000000002d444 CR3: 000000089fd26006 CR4: 00000000003706e0
[241535.985502] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[241535.986499] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[241535.987477] Call Trace:
[241535.988426]  ? nla_put_64bit+0x71/0xa0
[241535.989368]  devlink_compat_phys_port_name_get+0x50/0xa0
[241535.990312]  dev_get_phys_port_name+0x4b/0x60
[241535.991252]  rtnl_fill_ifinfo+0x57b/0xcb0
[241535.992192]  rtnl_dump_ifinfo+0x58f/0x6d0
[241535.993123]  ? ksize+0x14/0x20
[241535.994033]  ? __alloc_skb+0xe8/0x250
[241535.994935]  netlink_dump+0x17c/0x300
[241535.995821]  netlink_recvmsg+0x1de/0x2c0
[241535.996677]  sock_recvmsg+0x70/0x80
[241535.997518]  ____sys_recvmsg+0x9b/0x1b0
[241535.998360]  ? iovec_from_user+0x82/0x120
[241535.999202]  ? __import_iovec+0x2c/0x130
[241536.000031]  ___sys_recvmsg+0x94/0x130
[241536.000850]  ? __handle_mm_fault+0x56d/0x6e0
[241536.001668]  __sys_recvmsg+0x5f/0xb0
[241536.002464]  ? syscall_enter_from_user_mode+0x2b/0x80
[241536.003242]  __x64_sys_recvmsg+0x1f/0x30
[241536.004008]  do_syscall_64+0x38/0x50
[241536.004767]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[241536.005532] RIP: 0033:0x7fd375014f47

Fixes: 2ff349c5edfe ("net/mlx5e: Verify dev is present in some ndos")
Signed-off-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Chris Mi <cmi@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index 0dd7615e59319..bc33eaada3b9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -64,6 +64,8 @@ struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev)
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct devlink_port *port;
 
+	if (!netif_device_present(dev))
+		return NULL;
 	port = mlx5e_devlink_get_dl_port(priv);
 	if (port->registered)
 		return port;
-- 
GitLab


From 9ae8c18c5e4d8814d3b405a07712fa5464070e3e Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Thu, 29 Apr 2021 10:03:20 +0300
Subject: [PATCH 2567/3804] net/mlx5e: Don't update netdev RQs with PTP-RQ

Since the driver opens the PTP-RQ under channel 0, it appears to the
stack as if the SKB was received on rxq0. So from thew stack POV there
are still the same number of RX queues.

Fixes: 960fbfe222a4 ("net/mlx5e: Allow coexistence of CQE compression and HW TS PTP")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index ec6bafe7a2e59..263adc82b4e16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2705,8 +2705,6 @@ static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 	nch = priv->channels.params.num_channels;
 	ntc = priv->channels.params.num_tc;
 	num_rxqs = nch * priv->profile->rq_groups;
-	if (priv->channels.params.ptp_rx)
-		num_rxqs++;
 
 	mlx5e_netdev_set_tcs(netdev, nch, ntc);
 
-- 
GitLab


From a6ee6f5f1082c416f9bfffbae1a87feff8a6ab3d Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 19 Apr 2021 11:58:31 +0300
Subject: [PATCH 2568/3804] net/mlx5e: Fix select queue to consider
 SKBTX_HW_TSTAMP

Steering packets to PTP-SQ should be done only if the SKB has
SKBTX_HW_TSTAMP set in the tx_flags. While here, take the function into
a header and inline it.
Set the whole condition to select the PTP-SQ to unlikely.

Fixes: 24c22dd0918b ("net/mlx5e: Add states to PTP channel")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/ptp.c  |  1 -
 .../net/ethernet/mellanox/mlx5/core/en/ptp.h  | 22 ++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   | 25 +++----------------
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index d907c1acd4d57..778e229310a93 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 // Copyright (c) 2020 Mellanox Technologies
 
-#include <linux/ptp_classify.h>
 #include "en/ptp.h"
 #include "en/txrx.h"
 #include "en/params.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
index ab935cce952b4..c96668bd701cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
@@ -6,6 +6,7 @@
 
 #include "en.h"
 #include "en_stats.h"
+#include <linux/ptp_classify.h>
 
 struct mlx5e_ptpsq {
 	struct mlx5e_txqsq       txqsq;
@@ -43,6 +44,27 @@ struct mlx5e_ptp {
 	DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES);
 };
 
+static inline bool mlx5e_use_ptpsq(struct sk_buff *skb)
+{
+	struct flow_keys fk;
+
+	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+		return false;
+
+	if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+		return false;
+
+	if (fk.basic.n_proto == htons(ETH_P_1588))
+		return true;
+
+	if (fk.basic.n_proto != htons(ETH_P_IP) &&
+	    fk.basic.n_proto != htons(ETH_P_IPV6))
+		return false;
+
+	return (fk.basic.ip_proto == IPPROTO_UDP &&
+		fk.ports.dst == htons(PTP_EV_PORT));
+}
+
 int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 		   u8 lag_port, struct mlx5e_ptp **cp);
 void mlx5e_ptp_close(struct mlx5e_ptp *c);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 8ba62671f5f10..320fe0cda9177 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -32,7 +32,6 @@
 
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
-#include <linux/ptp_classify.h>
 #include <net/geneve.h>
 #include <net/dsfield.h>
 #include "en.h"
@@ -67,24 +66,6 @@ static inline int mlx5e_get_dscp_up(struct mlx5e_priv *priv, struct sk_buff *skb
 }
 #endif
 
-static bool mlx5e_use_ptpsq(struct sk_buff *skb)
-{
-	struct flow_keys fk;
-
-	if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
-		return false;
-
-	if (fk.basic.n_proto == htons(ETH_P_1588))
-		return true;
-
-	if (fk.basic.n_proto != htons(ETH_P_IP) &&
-	    fk.basic.n_proto != htons(ETH_P_IPV6))
-		return false;
-
-	return (fk.basic.ip_proto == IPPROTO_UDP &&
-		fk.ports.dst == htons(PTP_EV_PORT));
-}
-
 static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
@@ -145,9 +126,9 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		}
 
 		ptp_channel = READ_ONCE(priv->channels.ptp);
-		if (unlikely(ptp_channel) &&
-		    test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
-		    mlx5e_use_ptpsq(skb))
+		if (unlikely(ptp_channel &&
+			     test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
+			     mlx5e_use_ptpsq(skb)))
 			return mlx5e_select_ptpsq(dev, skb);
 
 		txq_ix = netdev_pick_tx(dev, skb, NULL);
-- 
GitLab


From 7a545077cb6701957e84c7f158630bb5c984e648 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Thu, 25 Feb 2021 12:27:53 +0200
Subject: [PATCH 2569/3804] Revert "net/mlx5: Arm only EQs with EQEs"

In the scenario described below, an EQ can remain in FIRED state which
can result in missing an interrupt generation.

The scenario:

device                       mlx5_core driver
------                       ----------------
EQ1.eqe generated
EQ1.MSI-X sent
EQ1.state = FIRED
EQ2.eqe generated
                             mlx5_irq()
                               polls - eq1_eqes()
                               arm eq1
                               polls - eq2_eqes()
                               arm eq2
EQ2.MSI-X sent
EQ2.state = FIRED
                              mlx5_irq()
                              polls - eq2_eqes() -- no eqes found
                              driver skips EQ arming;

->EQ2 remains fired, misses generating interrupt.

Hence, always arm the EQ by reverting the cited commit in fixes tag.

Fixes: d894892dda25 ("net/mlx5: Arm only EQs with EQEs")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 77c0ca6559759..9403334102675 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -136,7 +136,7 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
 
 	eqe = next_eqe_sw(eq);
 	if (!eqe)
-		return 0;
+		goto out;
 
 	do {
 		struct mlx5_core_cq *cq;
@@ -161,6 +161,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
 		++eq->cons_index;
 
 	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+
+out:
 	eq_update_ci(eq, 1);
 
 	if (cqn != -1)
@@ -248,9 +250,9 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
 		++eq->cons_index;
 
 	} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
-	eq_update_ci(eq, 1);
 
 out:
+	eq_update_ci(eq, 1);
 	mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
 
 	return unlikely(recovery) ? num_eqes : 0;
-- 
GitLab


From 6d6727dddc7f93fcc155cb8d0c49c29ae0e71122 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 10 May 2021 14:34:58 +0300
Subject: [PATCH 2570/3804] net/mlx5e: Block offload of outer header csum for
 UDP tunnels

The device is able to offload either the outer header csum or inner
header csum. The driver utilizes the inner csum offload. Hence, block
setting of tx-udp_tnl-csum-segmentation and set it to off[fixed].

Fixes: b49663c8fb49 ("net/mlx5e: Add support for UDP tunnel segmentation with outer checksum offload")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 263adc82b4e16..d4167f7be99ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4822,13 +4822,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	}
 
 	if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
-		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
-					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
-		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
-					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
-		netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
-		netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
-					 NETIF_F_GSO_UDP_TUNNEL_CSUM;
+		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL;
+		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+		netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL;
 	}
 
 	if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
-- 
GitLab


From 54e1217b90486c94b26f24dcee1ee5ef5372f832 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 26 May 2021 10:40:36 +0300
Subject: [PATCH 2571/3804] net/mlx5e: Block offload of outer header csum for
 GRE tunnel

The device is able to offload either the outer header csum or inner
header csum. The driver utilizes the inner csum offload. So, prohibit
setting of tx-gre-csum-segmentation and let it be: off[fixed].

Fixes: 2729984149e6 ("net/mlx5e: Support TSO and TX checksum offloads for GRE tunnels")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d4167f7be99ca..d26b8ed511959 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4828,12 +4828,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	}
 
 	if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
-		netdev->hw_features     |= NETIF_F_GSO_GRE |
-					   NETIF_F_GSO_GRE_CSUM;
-		netdev->hw_enc_features |= NETIF_F_GSO_GRE |
-					   NETIF_F_GSO_GRE_CSUM;
-		netdev->gso_partial_features |= NETIF_F_GSO_GRE |
-						NETIF_F_GSO_GRE_CSUM;
+		netdev->hw_features     |= NETIF_F_GSO_GRE;
+		netdev->hw_enc_features |= NETIF_F_GSO_GRE;
+		netdev->gso_partial_features |= NETIF_F_GSO_GRE;
 	}
 
 	if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
-- 
GitLab


From efa165504943f2128d50f63de0c02faf6dcceb0d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 9 Jun 2021 21:18:00 +0200
Subject: [PATCH 2572/3804] x86/fpu: Reset state for all signal restore
 failures

If access_ok() or fpregs_soft_set() fails in __fpu__restore_sig() then the
function just returns but does not clear the FPU state as it does for all
other fatal failures.

Clear the FPU state for these failures as well.

Fixes: 72a671ced66d ("x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/87mtryyhhz.ffs@nanos.tec.linutronix.de
---
 arch/x86/kernel/fpu/signal.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 4ab9aeb9a9630..ec3ae30547920 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -307,13 +307,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		return 0;
 	}
 
-	if (!access_ok(buf, size))
-		return -EACCES;
+	if (!access_ok(buf, size)) {
+		ret = -EACCES;
+		goto out;
+	}
 
-	if (!static_cpu_has(X86_FEATURE_FPU))
-		return fpregs_soft_set(current, NULL,
-				       0, sizeof(struct user_i387_ia32_struct),
-				       NULL, buf) != 0;
+	if (!static_cpu_has(X86_FEATURE_FPU)) {
+		ret = fpregs_soft_set(current, NULL, 0,
+				      sizeof(struct user_i387_ia32_struct),
+				      NULL, buf);
+		goto out;
+	}
 
 	if (use_xsave()) {
 		struct _fpx_sw_bytes fx_sw_user;
@@ -396,7 +400,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		ret = __copy_from_user(&env, buf, sizeof(env));
 		if (ret)
-			goto err_out;
+			goto out;
 		envp = &env;
 	}
 
@@ -426,7 +430,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
 		ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
 		if (ret)
-			goto err_out;
+			goto out;
 
 		sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
 					      fx_only);
@@ -446,7 +450,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
 		if (ret) {
 			ret = -EFAULT;
-			goto err_out;
+			goto out;
 		}
 
 		sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures,
@@ -464,7 +468,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	} else {
 		ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size);
 		if (ret)
-			goto err_out;
+			goto out;
 
 		fpregs_lock();
 		ret = copy_kernel_to_fregs_err(&fpu->state.fsave);
@@ -475,7 +479,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		fpregs_deactivate(fpu);
 	fpregs_unlock();
 
-err_out:
+out:
 	if (ret)
 		fpu__clear_user_states(fpu);
 	return ret;
-- 
GitLab


From 6f7ec77cc8b64ff5037c1945e4650c65c458037d Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Fri, 28 May 2021 22:39:31 +0200
Subject: [PATCH 2573/3804] USB: serial: cp210x: fix alternate function for
 CP2102N QFN20

The QFN20 part has a different GPIO/port function assignment. The
configuration struct bit field ordered as TX/RX/RS485/WAKEUP/CLK
which exactly matches GPIO0-3 for QFN24/28. However, QFN20 has a
different GPIO to primary function assignment.

Special case QFN20 to follow to properly detect which GPIOs are
available.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Link: https://lore.kernel.org/r/51830b2b24118eb0f77c5c9ac64ffb2f519dbb1d.1622218300.git.stefan@agner.ch
Fixes: c8acfe0aadbe ("USB: serial: cp210x: implement GPIO support for CP2102N")
Cc: stable@vger.kernel.org	# 4.19
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/cp210x.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index ee595d1bea0a9..c9f8ebd341229 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -537,6 +537,12 @@ struct cp210x_single_port_config {
 #define CP210X_2NCONFIG_GPIO_RSTLATCH_IDX	587
 #define CP210X_2NCONFIG_GPIO_CONTROL_IDX	600
 
+/* CP2102N QFN20 port configuration values */
+#define CP2102N_QFN20_GPIO2_TXLED_MODE		BIT(2)
+#define CP2102N_QFN20_GPIO3_RXLED_MODE		BIT(3)
+#define CP2102N_QFN20_GPIO1_RS485_MODE		BIT(4)
+#define CP2102N_QFN20_GPIO0_CLK_MODE		BIT(6)
+
 /* CP210X_VENDOR_SPECIFIC, CP210X_WRITE_LATCH call writes these 0x2 bytes. */
 struct cp210x_gpio_write {
 	u8	mask;
@@ -1733,7 +1739,19 @@ static int cp2102n_gpioconf_init(struct usb_serial *serial)
 	priv->gpio_pushpull = (gpio_pushpull >> 3) & 0x0f;
 
 	/* 0 indicates GPIO mode, 1 is alternate function */
-	priv->gpio_altfunc = (gpio_ctrl >> 2) & 0x0f;
+	if (priv->partnum == CP210X_PARTNUM_CP2102N_QFN20) {
+		/* QFN20 is special... */
+		if (gpio_ctrl & CP2102N_QFN20_GPIO0_CLK_MODE)   /* GPIO 0 */
+			priv->gpio_altfunc |= BIT(0);
+		if (gpio_ctrl & CP2102N_QFN20_GPIO1_RS485_MODE) /* GPIO 1 */
+			priv->gpio_altfunc |= BIT(1);
+		if (gpio_ctrl & CP2102N_QFN20_GPIO2_TXLED_MODE) /* GPIO 2 */
+			priv->gpio_altfunc |= BIT(2);
+		if (gpio_ctrl & CP2102N_QFN20_GPIO3_RXLED_MODE) /* GPIO 3 */
+			priv->gpio_altfunc |= BIT(3);
+	} else {
+		priv->gpio_altfunc = (gpio_ctrl >> 2) & 0x0f;
+	}
 
 	if (priv->partnum == CP210X_PARTNUM_CP2102N_QFN28) {
 		/*
-- 
GitLab


From 0159bb020ca9a43b17aa9149f1199643c1d49426 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Wed, 2 Jun 2021 21:31:36 -0400
Subject: [PATCH 2574/3804] Documentation: Add usecases, design and interface
 for core scheduling

Now that core scheduling is merged, update the documentation.

Co-developed-by: Chris Hyser <chris.hyser@oracle.com>
Signed-off-by: Chris Hyser <chris.hyser@oracle.com>
Co-developed-by: Josh Don <joshdon@google.com>
Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210603013136.370918-1-joel@joelfernandes.org
---
 .../admin-guide/hw-vuln/core-scheduling.rst   | 223 ++++++++++++++++++
 Documentation/admin-guide/hw-vuln/index.rst   |   1 +
 2 files changed, 224 insertions(+)
 create mode 100644 Documentation/admin-guide/hw-vuln/core-scheduling.rst

diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
new file mode 100644
index 0000000000000..7b410aef9c5cb
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst
@@ -0,0 +1,223 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Core Scheduling
+===============
+Core scheduling support allows userspace to define groups of tasks that can
+share a core. These groups can be specified either for security usecases (one
+group of tasks don't trust another), or for performance usecases (some
+workloads may benefit from running on the same core as they don't need the same
+hardware resources of the shared core, or may prefer different cores if they
+do share hardware resource needs). This document only describes the security
+usecase.
+
+Security usecase
+----------------
+A cross-HT attack involves the attacker and victim running on different Hyper
+Threads of the same core. MDS and L1TF are examples of such attacks.  The only
+full mitigation of cross-HT attacks is to disable Hyper Threading (HT). Core
+scheduling is a scheduler feature that can mitigate some (not all) cross-HT
+attacks. It allows HT to be turned on safely by ensuring that only tasks in a
+user-designated trusted group can share a core. This increase in core sharing
+can also improve performance, however it is not guaranteed that performance
+will always improve, though that is seen to be the case with a number of real
+world workloads. In theory, core scheduling aims to perform at least as good as
+when Hyper Threading is disabled. In practice, this is mostly the case though
+not always: as synchronizing scheduling decisions across 2 or more CPUs in a
+core involves additional overhead - especially when the system is lightly
+loaded. When ``total_threads <= N_CPUS/2``, the extra overhead may cause core
+scheduling to perform more poorly compared to SMT-disabled, where N_CPUS is the
+total number of CPUs. Please measure the performance of your workloads always.
+
+Usage
+-----
+Core scheduling support is enabled via the ``CONFIG_SCHED_CORE`` config option.
+Using this feature, userspace defines groups of tasks that can be co-scheduled
+on the same core. The core scheduler uses this information to make sure that
+tasks that are not in the same group never run simultaneously on a core, while
+doing its best to satisfy the system's scheduling requirements.
+
+Core scheduling can be enabled via the ``PR_SCHED_CORE`` prctl interface.
+This interface provides support for the creation of core scheduling groups, as
+well as admission and removal of tasks from created groups::
+
+    #include <sys/prctl.h>
+
+    int prctl(int option, unsigned long arg2, unsigned long arg3,
+            unsigned long arg4, unsigned long arg5);
+
+option:
+    ``PR_SCHED_CORE``
+
+arg2:
+    Command for operation, must be one off:
+
+    - ``PR_SCHED_CORE_GET`` -- get core_sched cookie of ``pid``.
+    - ``PR_SCHED_CORE_CREATE`` -- create a new unique cookie for ``pid``.
+    - ``PR_SCHED_CORE_SHARE_TO`` -- push core_sched cookie to ``pid``.
+    - ``PR_SCHED_CORE_SHARE_FROM`` -- pull core_sched cookie from ``pid``.
+
+arg3:
+    ``pid`` of the task for which the operation applies.
+
+arg4:
+    ``pid_type`` for which the operation applies. It is of type ``enum pid_type``.
+    For example, if arg4 is ``PIDTYPE_TGID``, then the operation of this command
+    will be performed for all tasks in the task group of ``pid``.
+
+arg5:
+    userspace pointer to an unsigned long for storing the cookie returned by
+    ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands.
+
+In order for a process to push a cookie to, or pull a cookie from a process, it
+is required to have the ptrace access mode: `PTRACE_MODE_READ_REALCREDS` to the
+process.
+
+Building hierarchies of tasks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The simplest way to build hierarchies of threads/processes which share a
+cookie and thus a core is to rely on the fact that the core-sched cookie is
+inherited across forks/clones and execs, thus setting a cookie for the
+'initial' script/executable/daemon will place every spawned child in the
+same core-sched group.
+
+Cookie Transferral
+~~~~~~~~~~~~~~~~~~
+Transferring a cookie between the current and other tasks is possible using
+PR_SCHED_CORE_SHARE_FROM and PR_SCHED_CORE_SHARE_TO to inherit a cookie from a
+specified task or a share a cookie with a task. In combination this allows a
+simple helper program to pull a cookie from a task in an existing core
+scheduling group and share it with already running tasks.
+
+Design/Implementation
+---------------------
+Each task that is tagged is assigned a cookie internally in the kernel. As
+mentioned in `Usage`_, tasks with the same cookie value are assumed to trust
+each other and share a core.
+
+The basic idea is that, every schedule event tries to select tasks for all the
+siblings of a core such that all the selected tasks running on a core are
+trusted (same cookie) at any point in time. Kernel threads are assumed trusted.
+The idle task is considered special, as it trusts everything and everything
+trusts it.
+
+During a schedule() event on any sibling of a core, the highest priority task on
+the sibling's core is picked and assigned to the sibling calling schedule(), if
+the sibling has the task enqueued. For rest of the siblings in the core,
+highest priority task with the same cookie is selected if there is one runnable
+in their individual run queues. If a task with same cookie is not available,
+the idle task is selected.  Idle task is globally trusted.
+
+Once a task has been selected for all the siblings in the core, an IPI is sent to
+siblings for whom a new task was selected. Siblings on receiving the IPI will
+switch to the new task immediately. If an idle task is selected for a sibling,
+then the sibling is considered to be in a `forced idle` state. I.e., it may
+have tasks on its on runqueue to run, however it will still have to run idle.
+More on this in the next section.
+
+Forced-idling of hyperthreads
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The scheduler tries its best to find tasks that trust each other such that all
+tasks selected to be scheduled are of the highest priority in a core.  However,
+it is possible that some runqueues had tasks that were incompatible with the
+highest priority ones in the core. Favoring security over fairness, one or more
+siblings could be forced to select a lower priority task if the highest
+priority task is not trusted with respect to the core wide highest priority
+task.  If a sibling does not have a trusted task to run, it will be forced idle
+by the scheduler (idle thread is scheduled to run).
+
+When the highest priority task is selected to run, a reschedule-IPI is sent to
+the sibling to force it into idle. This results in 4 cases which need to be
+considered depending on whether a VM or a regular usermode process was running
+on either HT::
+
+          HT1 (attack)            HT2 (victim)
+   A      idle -> user space      user space -> idle
+   B      idle -> user space      guest -> idle
+   C      idle -> guest           user space -> idle
+   D      idle -> guest           guest -> idle
+
+Note that for better performance, we do not wait for the destination CPU
+(victim) to enter idle mode. This is because the sending of the IPI would bring
+the destination CPU immediately into kernel mode from user space, or VMEXIT
+in the case of guests. At best, this would only leak some scheduler metadata
+which may not be worth protecting. It is also possible that the IPI is received
+too late on some architectures, but this has not been observed in the case of
+x86.
+
+Trust model
+~~~~~~~~~~~
+Core scheduling maintains trust relationships amongst groups of tasks by
+assigning them a tag that is the same cookie value.
+When a system with core scheduling boots, all tasks are considered to trust
+each other. This is because the core scheduler does not have information about
+trust relationships until userspace uses the above mentioned interfaces, to
+communicate them. In other words, all tasks have a default cookie value of 0.
+and are considered system-wide trusted. The forced-idling of siblings running
+cookie-0 tasks is also avoided.
+
+Once userspace uses the above mentioned interfaces to group sets of tasks, tasks
+within such groups are considered to trust each other, but do not trust those
+outside. Tasks outside the group also don't trust tasks within.
+
+Limitations of core-scheduling
+------------------------------
+Core scheduling tries to guarantee that only trusted tasks run concurrently on a
+core. But there could be small window of time during which untrusted tasks run
+concurrently or kernel could be running concurrently with a task not trusted by
+kernel.
+
+IPI processing delays
+~~~~~~~~~~~~~~~~~~~~~
+Core scheduling selects only trusted tasks to run together. IPI is used to notify
+the siblings to switch to the new task. But there could be hardware delays in
+receiving of the IPI on some arch (on x86, this has not been observed). This may
+cause an attacker task to start running on a CPU before its siblings receive the
+IPI. Even though cache is flushed on entry to user mode, victim tasks on siblings
+may populate data in the cache and micro architectural buffers after the attacker
+starts to run and this is a possibility for data leak.
+
+Open cross-HT issues that core scheduling does not solve
+--------------------------------------------------------
+1. For MDS
+~~~~~~~~~~
+Core scheduling cannot protect against MDS attacks between an HT running in
+user mode and another running in kernel mode. Even though both HTs run tasks
+which trust each other, kernel memory is still considered untrusted. Such
+attacks are possible for any combination of sibling CPU modes (host or guest mode).
+
+2. For L1TF
+~~~~~~~~~~~
+Core scheduling cannot protect against an L1TF guest attacker exploiting a
+guest or host victim. This is because the guest attacker can craft invalid
+PTEs which are not inverted due to a vulnerable guest kernel. The only
+solution is to disable EPT (Extended Page Tables).
+
+For both MDS and L1TF, if the guest vCPU is configured to not trust each
+other (by tagging separately), then the guest to guest attacks would go away.
+Or it could be a system admin policy which considers guest to guest attacks as
+a guest problem.
+
+Another approach to resolve these would be to make every untrusted task on the
+system to not trust every other untrusted task. While this could reduce
+parallelism of the untrusted tasks, it would still solve the above issues while
+allowing system processes (trusted tasks) to share a core.
+
+3. Protecting the kernel (IRQ, syscall, VMEXIT)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Unfortunately, core scheduling does not protect kernel contexts running on
+sibling hyperthreads from one another. Prototypes of mitigations have been posted
+to LKML to solve this, but it is debatable whether such windows are practically
+exploitable, and whether the performance overhead of the prototypes are worth
+it (not to mention, the added code complexity).
+
+Other Use cases
+---------------
+The main use case for Core scheduling is mitigating the cross-HT vulnerabilities
+with SMT enabled. There are other use cases where this feature could be used:
+
+- Isolating tasks that needs a whole core: Examples include realtime tasks, tasks
+  that uses SIMD instructions etc.
+- Gang scheduling: Requirements for a group of tasks that needs to be scheduled
+  together could also be realized using core scheduling. One example is vCPUs of
+  a VM.
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index ca4dbdd9016d5..f12cda55538b5 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -15,3 +15,4 @@ are configurable at compile, boot or run time.
    tsx_async_abort
    multihit.rst
    special-register-buffer-data-sampling.rst
+   core-scheduling.rst
-- 
GitLab


From 156172a13ff0626d8e23276e741c7e2cb2f3b572 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 8 Jun 2021 19:54:15 +0200
Subject: [PATCH 2575/3804] irq_work: Make irq_work_queue() NMI-safe again

Someone carelessly put NMI unsafe code in irq_work_queue(), breaking
just about every single user. Also, someone has a terrible comment
style.

Fixes: e2b5bcf9f5ba ("irq_work: record irq_work_queue() call stack")
Reported-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/YL+uBq8LzXXZsYVf@hirez.programming.kicks-ass.net
---
 kernel/irq_work.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 23a7a0ba1388a..db8c248ebc8c8 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -70,9 +70,6 @@ bool irq_work_queue(struct irq_work *work)
 	if (!irq_work_claim(work))
 		return false;
 
-	/*record irq_work call stack in order to print it in KASAN reports*/
-	kasan_record_aux_stack(work);
-
 	/* Queue the entry and raise the IPI if needed. */
 	preempt_disable();
 	__irq_work_queue_local(work);
-- 
GitLab


From a8383dfb2138742a1bb77b481ada047aededa2ba Mon Sep 17 00:00:00 2001
From: CodyYao-oc <CodyYao-oc@zhaoxin.com>
Date: Mon, 7 Jun 2021 10:53:35 +0800
Subject: [PATCH 2576/3804] x86/nmi_watchdog: Fix old-style NMI watchdog
 regression on old Intel CPUs

The following commit:

   3a4ac121c2ca ("x86/perf: Add hardware performance events support for Zhaoxin CPU.")

Got the old-style NMI watchdog logic wrong and broke it for basically every
Intel CPU where it was active. Which is only truly old CPUs, so few people noticed.

On CPUs with perf events support we turn off the old-style NMI watchdog, so it
was pretty pointless to add the logic for X86_VENDOR_ZHAOXIN to begin with ... :-/

Anyway, the fix is to restore the old logic and add a 'break'.

[ mingo: Wrote a new changelog. ]

Fixes: 3a4ac121c2ca ("x86/perf: Add hardware performance events support for Zhaoxin CPU.")
Signed-off-by: CodyYao-oc <CodyYao-oc@zhaoxin.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210607025335.9643-1-CodyYao-oc@zhaoxin.com
---
 arch/x86/kernel/cpu/perfctr-watchdog.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 3ef5868ac588a..7aecb2fc31863 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -63,7 +63,7 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 		case 15:
 			return msr - MSR_P4_BPU_PERFCTR0;
 		}
-		fallthrough;
+		break;
 	case X86_VENDOR_ZHAOXIN:
 	case X86_VENDOR_CENTAUR:
 		return msr - MSR_ARCH_PERFMON_PERFCTR0;
@@ -96,7 +96,7 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 		case 15:
 			return msr - MSR_P4_BSU_ESCR0;
 		}
-		fallthrough;
+		break;
 	case X86_VENDOR_ZHAOXIN:
 	case X86_VENDOR_CENTAUR:
 		return msr - MSR_ARCH_PERFMON_EVENTSEL0;
-- 
GitLab


From 584fd3b31889852d0d6f3dd1e3d8e9619b660d2c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 7 Jun 2021 11:45:58 +0200
Subject: [PATCH 2577/3804] objtool: Fix .symtab_shndx handling for
 elf_create_undef_symbol()

When an ELF object uses extended symbol section indexes (IOW it has a
.symtab_shndx section), these must be kept in sync with the regular
symbol table (.symtab).

So for every new symbol we emit, make sure to also emit a
.symtab_shndx value to keep the arrays of equal size.

Note: since we're writing an UNDEF symbol, most GElf_Sym fields will
be 0 and we can repurpose one (st_size) to host the 0 for the xshndx
value.

Fixes: 2f2f7e47f052 ("objtool: Add elf_create_undef_symbol()")
Reported-by: Nick Desaulniers <ndesaulniers@google.com>
Suggested-by: Fangrui Song <maskray@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lkml.kernel.org/r/YL3q1qFO9QIRL/BA@hirez.programming.kicks-ass.net
---
 tools/objtool/elf.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 743c2e9d0f564..41bca1d13d8e4 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -717,7 +717,7 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
 
 struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
 {
-	struct section *symtab;
+	struct section *symtab, *symtab_shndx;
 	struct symbol *sym;
 	Elf_Data *data;
 	Elf_Scn *s;
@@ -769,6 +769,29 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
 	symtab->len += data->d_size;
 	symtab->changed = true;
 
+	symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+	if (symtab_shndx) {
+		s = elf_getscn(elf->elf, symtab_shndx->idx);
+		if (!s) {
+			WARN_ELF("elf_getscn");
+			return NULL;
+		}
+
+		data = elf_newdata(s);
+		if (!data) {
+			WARN_ELF("elf_newdata");
+			return NULL;
+		}
+
+		data->d_buf = &sym->sym.st_size; /* conveniently 0 */
+		data->d_size = sizeof(Elf32_Word);
+		data->d_align = 4;
+		data->d_type = ELF_T_WORD;
+
+		symtab_shndx->len += 4;
+		symtab_shndx->changed = true;
+	}
+
 	sym->sec = find_section_by_index(elf, 0);
 
 	elf_add_symbol(elf, sym);
-- 
GitLab


From c336a5ee984708db4826ef9e47d184e638e29717 Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Wed, 9 Jun 2021 17:21:19 +0800
Subject: [PATCH 2578/3804] drm: Lock pointer access in drm_master_release()

This patch eliminates the following smatch warning:
drivers/gpu/drm/drm_auth.c:320 drm_master_release() warn: unlocked access 'master' (line 318) expected lock '&dev->master_mutex'

The 'file_priv->master' field should be protected by the mutex lock to
'&dev->master_mutex'. This is because other processes can concurrently
modify this field and free the current 'file_priv->master'
pointer. This could result in a use-after-free error when 'master' is
dereferenced in subsequent function calls to
'drm_legacy_lock_master_cleanup()' or to 'drm_lease_revoke()'.

An example of a scenario that would produce this error can be seen
from a similar bug in 'drm_getunique()' that was reported by Syzbot:
https://syzkaller.appspot.com/bug?id=148d2f1dfac64af52ffd27b661981a540724f803

In the Syzbot report, another process concurrently acquired the
device's master mutex in 'drm_setmaster_ioctl()', then overwrote
'fpriv->master' in 'drm_new_set_master()'. The old value of
'fpriv->master' was subsequently freed before the mutex was unlocked.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210609092119.173590-1-desmondcheongzx@gmail.com
---
 drivers/gpu/drm/drm_auth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index f2d46b7ac6f9f..232abbba36868 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -314,9 +314,10 @@ int drm_master_open(struct drm_file *file_priv)
 void drm_master_release(struct drm_file *file_priv)
 {
 	struct drm_device *dev = file_priv->minor->dev;
-	struct drm_master *master = file_priv->master;
+	struct drm_master *master;
 
 	mutex_lock(&dev->master_mutex);
+	master = file_priv->master;
 	if (file_priv->magic)
 		idr_remove(&file_priv->master->magic_map, file_priv->magic);
 
-- 
GitLab


From 95bf69b400f41fbba7a2dc49b0152dd7bdc9a508 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Thu, 10 Jun 2021 16:54:18 +0800
Subject: [PATCH 2579/3804] KVM: selftests: Fix compiling errors when
 initializing the static structure

Errors like below were produced from test_util.c when compiling the KVM
selftests on my local platform.

lib/test_util.c: In function 'vm_mem_backing_src_alias':
lib/test_util.c:177:12: error: initializer element is not constant
    .flag = anon_flags,
            ^~~~~~~~~~
lib/test_util.c:177:12: note: (near initialization for 'aliases[0].flag')

The reason is that we are using non-const expressions to initialize the
static structure, which will probably trigger a compiling error/warning
on stricter GCC versions. Fix it by converting the two const variables
"anon_flags" and "anon_huge_flags" into more stable macros.

Fixes: b3784bc28ccc0 ("KVM: selftests: refactor vm_mem_backing_src_type flags")
Reported-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Message-Id: <20210610085418.35544-1-wangyanan55@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/test_util.c | 38 ++++++++++-----------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 6ad6c8276b2eb..af1031fed97f7 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -166,75 +166,75 @@ size_t get_def_hugetlb_pagesz(void)
 	return 0;
 }
 
+#define ANON_FLAGS	(MAP_PRIVATE | MAP_ANONYMOUS)
+#define ANON_HUGE_FLAGS	(ANON_FLAGS | MAP_HUGETLB)
+
 const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
 {
-	static const int anon_flags = MAP_PRIVATE | MAP_ANONYMOUS;
-	static const int anon_huge_flags = anon_flags | MAP_HUGETLB;
-
 	static const struct vm_mem_backing_src_alias aliases[] = {
 		[VM_MEM_SRC_ANONYMOUS] = {
 			.name = "anonymous",
-			.flag = anon_flags,
+			.flag = ANON_FLAGS,
 		},
 		[VM_MEM_SRC_ANONYMOUS_THP] = {
 			.name = "anonymous_thp",
-			.flag = anon_flags,
+			.flag = ANON_FLAGS,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
 			.name = "anonymous_hugetlb",
-			.flag = anon_huge_flags,
+			.flag = ANON_HUGE_FLAGS,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
 			.name = "anonymous_hugetlb_16kb",
-			.flag = anon_huge_flags | MAP_HUGE_16KB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
 			.name = "anonymous_hugetlb_64kb",
-			.flag = anon_huge_flags | MAP_HUGE_64KB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
 			.name = "anonymous_hugetlb_512kb",
-			.flag = anon_huge_flags | MAP_HUGE_512KB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
 			.name = "anonymous_hugetlb_1mb",
-			.flag = anon_huge_flags | MAP_HUGE_1MB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
 			.name = "anonymous_hugetlb_2mb",
-			.flag = anon_huge_flags | MAP_HUGE_2MB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
 			.name = "anonymous_hugetlb_8mb",
-			.flag = anon_huge_flags | MAP_HUGE_8MB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
 			.name = "anonymous_hugetlb_16mb",
-			.flag = anon_huge_flags | MAP_HUGE_16MB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
 			.name = "anonymous_hugetlb_32mb",
-			.flag = anon_huge_flags | MAP_HUGE_32MB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
 			.name = "anonymous_hugetlb_256mb",
-			.flag = anon_huge_flags | MAP_HUGE_256MB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
 			.name = "anonymous_hugetlb_512mb",
-			.flag = anon_huge_flags | MAP_HUGE_512MB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
 			.name = "anonymous_hugetlb_1gb",
-			.flag = anon_huge_flags | MAP_HUGE_1GB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
 			.name = "anonymous_hugetlb_2gb",
-			.flag = anon_huge_flags | MAP_HUGE_2GB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB,
 		},
 		[VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
 			.name = "anonymous_hugetlb_16gb",
-			.flag = anon_huge_flags | MAP_HUGE_16GB,
+			.flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB,
 		},
 		[VM_MEM_SRC_SHMEM] = {
 			.name = "shmem",
-- 
GitLab


From 02ffbe6351f5c88337143bcbc649832ded7445c0 Mon Sep 17 00:00:00 2001
From: ChenXiaoSong <chenxiaosong2@huawei.com>
Date: Wed, 9 Jun 2021 20:22:17 +0800
Subject: [PATCH 2580/3804] KVM: SVM: fix doc warnings

Fix kernel-doc warnings:

arch/x86/kvm/svm/avic.c:233: warning: Function parameter or member 'activate' not described in 'avic_update_access_page'
arch/x86/kvm/svm/avic.c:233: warning: Function parameter or member 'kvm' not described in 'avic_update_access_page'
arch/x86/kvm/svm/avic.c:781: warning: Function parameter or member 'e' not described in 'get_pi_vcpu_info'
arch/x86/kvm/svm/avic.c:781: warning: Function parameter or member 'kvm' not described in 'get_pi_vcpu_info'
arch/x86/kvm/svm/avic.c:781: warning: Function parameter or member 'svm' not described in 'get_pi_vcpu_info'
arch/x86/kvm/svm/avic.c:781: warning: Function parameter or member 'vcpu_info' not described in 'get_pi_vcpu_info'
arch/x86/kvm/svm/avic.c:1009: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
Message-Id: <20210609122217.2967131-1-chenxiaosong2@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/avic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 0e62e6a2438cf..5e7e920113f39 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -221,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
 	return &avic_physical_id_table[index];
 }
 
-/**
+/*
  * Note:
  * AVIC hardware walks the nested page table to check permissions,
  * but does not use the SPA address specified in the leaf page
@@ -764,7 +764,7 @@ out:
 	return ret;
 }
 
-/**
+/*
  * Note:
  * The HW cannot support posting multicast/broadcast
  * interrupts to a vCPU. So, we still use legacy interrupt
@@ -1005,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
 	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
 }
 
-/**
+/*
  * This function is called during VCPU halt/unhalt.
  */
 static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
-- 
GitLab


From 551912d286e940e63abe9e005f434691ee24fd15 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 28 May 2021 15:07:56 -0500
Subject: [PATCH 2581/3804] KVM: x86: Fix fall-through warnings for Clang

In preparation to enable -Wimplicit-fallthrough for Clang, fix a couple
of warnings by explicitly adding break statements instead of just letting
the code fall through to the next case.

Link: https://github.com/KSPP/linux/issues/115
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Message-Id: <20210528200756.GA39320@embeddedor>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c   | 1 +
 arch/x86/kvm/vmx/vmx.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9a48f138832d4..b4da665bb8923 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -655,6 +655,7 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
 		if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
 			entry->ecx = F(RDPID);
 		++array->nent;
+		break;
 	default:
 		break;
 	}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 50b42d7a8a117..c2a779b688e64 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6247,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
 	switch (kvm_get_apic_mode(vcpu)) {
 	case LAPIC_MODE_INVALID:
 		WARN_ONCE(true, "Invalid local APIC state");
+		break;
 	case LAPIC_MODE_DISABLED:
 		break;
 	case LAPIC_MODE_XAPIC:
-- 
GitLab


From 2adcb4c5a52a2623cd2b43efa7041e74d19f3a5e Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Thu, 10 Jun 2021 10:34:25 +0300
Subject: [PATCH 2582/3804] RDMA: Verify port when creating flow rule

Validate port value provided by the user and with that remove no longer
needed validation by the driver.  The missing check in the mlx5_ib driver
could cause to the below oops.

Call trace:
  _create_flow_rule+0x2d4/0xf28 [mlx5_ib]
  mlx5_ib_create_flow+0x2d0/0x5b0 [mlx5_ib]
  ib_uverbs_ex_create_flow+0x4cc/0x624 [ib_uverbs]
  ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0xd4/0x150 [ib_uverbs]
  ib_uverbs_cmd_verbs.isra.7+0xb28/0xc50 [ib_uverbs]
  ib_uverbs_ioctl+0x158/0x1d0 [ib_uverbs]
  do_vfs_ioctl+0xd0/0xaf0
  ksys_ioctl+0x84/0xb4
  __arm64_sys_ioctl+0x28/0xc4
  el0_svc_common.constprop.3+0xa4/0x254
  el0_svc_handler+0x84/0xa0
  el0_svc+0x10/0x26c
 Code: b9401260 f9615681 51000400 8b001c20 (f9403c1a)

Fixes: 436f2ad05a0b ("IB/core: Export ib_create/destroy_flow through uverbs")
Link: https://lore.kernel.org/r/faad30dc5219a01727f47db3dc2f029d07c82c00.1623309971.git.leonro@nvidia.com
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 5 +++++
 drivers/infiniband/hw/mlx4/main.c    | 3 ---
 drivers/infiniband/hw/mlx5/fs.c      | 5 ++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index d5e15a8c870d1..64e4be1cbec7c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3248,6 +3248,11 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
 		goto err_free_attr;
 	}
 
+	if (!rdma_is_port_valid(uobj->context->device, cmd.flow_attr.port)) {
+		err = -EINVAL;
+		goto err_uobj;
+	}
+
 	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
 	if (!qp) {
 		err = -EINVAL;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 16704262fc3a8..230a6ae0ab5a0 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1699,9 +1699,6 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 	int is_bonded = mlx4_is_bonded(dev);
 
-	if (!rdma_is_port_valid(qp->device, flow_attr->port))
-		return ERR_PTR(-EINVAL);
-
 	if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)
 		return ERR_PTR(-EOPNOTSUPP);
 
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index f84441ff0c81a..18ee2f2938250 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -1194,9 +1194,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 		goto free_ucmd;
 	}
 
-	if (flow_attr->port > dev->num_ports ||
-	    (flow_attr->flags &
-	     ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) {
+	if (flow_attr->flags &
+	    ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
 		err = -EINVAL;
 		goto free_ucmd;
 	}
-- 
GitLab


From 6466f03fdf98dd78b9453deb8a7cb0d887c09fec Mon Sep 17 00:00:00 2001
From: Aharon Landau <aharonl@nvidia.com>
Date: Thu, 10 Jun 2021 10:34:26 +0300
Subject: [PATCH 2583/3804] RDMA/mlx5: Delete right entry from MR signature
 database

The value mr->sig is stored in the entry upon mr allocation, however, ibmr
is wrongly entered here as "old", therefore, xa_cmpxchg() does not replace
the entry with NULL, which leads to the following trace:

 WARNING: CPU: 28 PID: 2078 at drivers/infiniband/hw/mlx5/main.c:3643 mlx5_ib_stage_init_cleanup+0x4d/0x60 [mlx5_ib]
 Modules linked in: nvme_rdma nvme_fabrics nvme_core 8021q garp mrp bonding bridge stp llc rfkill rpcrdma sunrpc rdma_ucm ib_srpt ib_isert iscsi_tad
 CPU: 28 PID: 2078 Comm: reboot Tainted: G               X --------- ---  5.13.0-0.rc2.19.el9.x86_64 #1
 Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 2.9.1 12/07/2018
 RIP: 0010:mlx5_ib_stage_init_cleanup+0x4d/0x60 [mlx5_ib]
 Code: 8d bb 70 1f 00 00 be 00 01 00 00 e8 9d 94 ce da 48 3d 00 01 00 00 75 02 5b c3 0f 0b 5b c3 0f 0b 48 83 bb b0 20 00 00 00 74 d5 <0f> 0b eb d1 4
 RSP: 0018:ffffa8db06d33c90 EFLAGS: 00010282
 RAX: 0000000000000000 RBX: ffff97f890a44000 RCX: ffff97f900ec0160
 RDX: 0000000000000000 RSI: 0000000080080001 RDI: ffff97f890a44000
 RBP: ffffffffc0c189b8 R08: 0000000000000001 R09: 0000000000000000
 R10: 0000000000000001 R11: 0000000000000300 R12: ffff97f890a44000
 R13: ffffffffc0c36030 R14: 00000000fee1dead R15: 0000000000000000
 FS:  00007f0d5a8a3b40(0000) GS:ffff98077fb80000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000555acbf4f450 CR3: 00000002a6f56002 CR4: 00000000001706e0
 Call Trace:
  mlx5r_remove+0x39/0x60 [mlx5_ib]
  auxiliary_bus_remove+0x1b/0x30
  __device_release_driver+0x17a/0x230
  device_release_driver+0x24/0x30
  bus_remove_device+0xdb/0x140
  device_del+0x18b/0x3e0
  mlx5_detach_device+0x59/0x90 [mlx5_core]
  mlx5_unload_one+0x22/0x60 [mlx5_core]
  shutdown+0x31/0x3a [mlx5_core]
  pci_device_shutdown+0x34/0x60
  device_shutdown+0x15b/0x1c0
  __do_sys_reboot.cold+0x2f/0x5b
  ? vfs_writev+0xc7/0x140
  ? handle_mm_fault+0xc5/0x290
  ? do_writev+0x6b/0x110
  do_syscall_64+0x40/0x80
  entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: e6fb246ccafb ("RDMA/mlx5: Consolidate MR destruction to mlx5_ib_dereg_mr()")
Link: https://lore.kernel.org/r/f3f585ea0db59c2a78f94f65eedeafc5a2374993.1623309971.git.leonro@nvidia.com
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9662cd39c7ffc..425423dfac724 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1940,8 +1940,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
 		mlx5r_deref_wait_odp_mkey(&mr->mmkey);
 
 	if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
-		xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), ibmr,
-			   NULL, GFP_KERNEL);
+		xa_cmpxchg(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key),
+			   mr->sig, NULL, GFP_KERNEL);
 
 		if (mr->mtt_mr) {
 			rc = mlx5_ib_dereg_mr(&mr->mtt_mr->ibmr, NULL);
-- 
GitLab


From 2ba0aa2feebda680ecfc3c552e867cf4d1b05a3a Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@nvidia.com>
Date: Thu, 10 Jun 2021 10:34:27 +0300
Subject: [PATCH 2584/3804] IB/mlx5: Fix initializing CQ fragments buffer

The function init_cq_frag_buf() can be called to initialize the current CQ
fragments buffer cq->buf, or the temporary cq->resize_buf that is filled
during CQ resize operation.

However, the offending commit started to use function get_cqe() for
getting the CQEs, the issue with this change is that get_cqe() always
returns CQEs from cq->buf, which leads us to initialize the wrong buffer,
and in case of enlarging the CQ we try to access elements beyond the size
of the current cq->buf and eventually hit a kernel panic.

 [exception RIP: init_cq_frag_buf+103]
  [ffff9f799ddcbcd8] mlx5_ib_resize_cq at ffffffffc0835d60 [mlx5_ib]
  [ffff9f799ddcbdb0] ib_resize_cq at ffffffffc05270df [ib_core]
  [ffff9f799ddcbdc0] llt_rdma_setup_qp at ffffffffc0a6a712 [llt]
  [ffff9f799ddcbe10] llt_rdma_cc_event_action at ffffffffc0a6b411 [llt]
  [ffff9f799ddcbe98] llt_rdma_client_conn_thread at ffffffffc0a6bb75 [llt]
  [ffff9f799ddcbec8] kthread at ffffffffa66c5da1
  [ffff9f799ddcbf50] ret_from_fork_nospec_begin at ffffffffa6d95ddd

Fix it by getting the needed CQE by calling mlx5_frag_buf_get_wqe() that
takes the correct source buffer as a parameter.

Fixes: 388ca8be0037 ("IB/mlx5: Implement fragmented completion queue (CQ)")
Link: https://lore.kernel.org/r/90a0e8c924093cfa50a482880ad7e7edb73dc19a.1623309971.git.leonro@nvidia.com
Signed-off-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx5/cq.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index eb92cefffd777..9ce01f7296739 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -849,15 +849,14 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata)
 	ib_umem_release(cq->buf.umem);
 }
 
-static void init_cq_frag_buf(struct mlx5_ib_cq *cq,
-			     struct mlx5_ib_cq_buf *buf)
+static void init_cq_frag_buf(struct mlx5_ib_cq_buf *buf)
 {
 	int i;
 	void *cqe;
 	struct mlx5_cqe64 *cqe64;
 
 	for (i = 0; i < buf->nent; i++) {
-		cqe = get_cqe(cq, i);
+		cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
 		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
 		cqe64->op_own = MLX5_CQE_INVALID << 4;
 	}
@@ -883,7 +882,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	if (err)
 		goto err_db;
 
-	init_cq_frag_buf(cq, &cq->buf);
+	init_cq_frag_buf(&cq->buf);
 
 	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
@@ -1184,7 +1183,7 @@ static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	if (err)
 		goto ex;
 
-	init_cq_frag_buf(cq, cq->resize_buf);
+	init_cq_frag_buf(cq->resize_buf);
 
 	return 0;
 
-- 
GitLab


From 9bd1cc4148cbea44ca7d8254b50edb6cb660957a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 10:59:03 +0100
Subject: [PATCH 2585/3804] nios2: Do not include linux/irqdomain.h from
 asm/irq.h

Including linux/irqdomain.h from asm/irq.h is going to break
as soon as linux/irqdomain.h will include linux/irq.h, so
let's fix this. Code relying on linux/irqomain.h should include
it directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/nios2/include/asm/irq.h | 1 -
 arch/nios2/kernel/irq.c      | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/nios2/include/asm/irq.h b/arch/nios2/include/asm/irq.h
index 13ce372722792..c52c94884e93d 100644
--- a/arch/nios2/include/asm/irq.h
+++ b/arch/nios2/include/asm/irq.h
@@ -10,6 +10,5 @@
 #define NIOS2_CPU_NR_IRQS	32
 
 #include <asm-generic/irq.h>
-#include <linux/irqdomain.h>
 
 #endif
diff --git a/arch/nios2/kernel/irq.c b/arch/nios2/kernel/irq.c
index 5f3555ce48656..c6a1a9f6ac428 100644
--- a/arch/nios2/kernel/irq.c
+++ b/arch/nios2/kernel/irq.c
@@ -11,6 +11,7 @@
 
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 
 static u32 ienable;
-- 
GitLab


From aa5f6a89700700fe6fe7e8727581a21a7d679630 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 09:31:09 +0100
Subject: [PATCH 2586/3804] staging: octeon-hcd: Directly include linux/of.h

This drivers currently obtains linux/of.h by luck and a chain of
bizarre inclusions, which we're about to fix.

Let's include the required file directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/staging/octeon-usb/octeon-hcd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/staging/octeon-usb/octeon-hcd.c b/drivers/staging/octeon-usb/octeon-hcd.c
index f27f20a4aa2d2..a1cd81d4a1144 100644
--- a/drivers/staging/octeon-usb/octeon-hcd.c
+++ b/drivers/staging/octeon-usb/octeon-hcd.c
@@ -50,8 +50,10 @@
 #include <linux/module.h>
 #include <linux/usb/hcd.h>
 #include <linux/prefetch.h>
+#include <linux/irqdomain.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 
 #include <asm/octeon/octeon.h>
 
-- 
GitLab


From c7d49545997eab111aec14be152842f56a0cabc4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 16 May 2021 17:57:15 +0100
Subject: [PATCH 2587/3804] mfd: ioc3: Directly include linux/irqdomain.h

This driver include linux/irqdomain.h via a bizarre set of
indirection, which we are about to break.

Directly include the required file.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/mfd/ioc3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mfd/ioc3.c b/drivers/mfd/ioc3.c
index c73ec78f255ba..99b9c113f9647 100644
--- a/drivers/mfd/ioc3.c
+++ b/drivers/mfd/ioc3.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-- 
GitLab


From bc9a454a9440e2872ecf71256fb962e4bb35e937 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 09:30:34 +0100
Subject: [PATCH 2588/3804] watchdog/octeon-wdt: Directly include
 linux/irqdomain.h

This drivers currently obtains linux/irqdomain.h by luck and
a chain of bizarre inclusions, which we're about to fix.

Let's include the required file directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/watchdog/octeon-wdt-main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/watchdog/octeon-wdt-main.c b/drivers/watchdog/octeon-wdt-main.c
index fde9e739b4361..391c774a1f673 100644
--- a/drivers/watchdog/octeon-wdt-main.c
+++ b/drivers/watchdog/octeon-wdt-main.c
@@ -54,6 +54,7 @@
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 
 #include <asm/mipsregs.h>
 #include <asm/uasm.h>
-- 
GitLab


From 1982752f6ba6a9d74a214b008ae9e336339276e8 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 09:30:34 +0100
Subject: [PATCH 2589/3804] irqchip/mips-gic: Directly include
 linux/irqdomain.h

This drivers currently obtains linux/irqdomain.h by luck and
a chain of bizarre inclusions, which we're about to fix.

Let's include the required file directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-mips-gic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 215885962bb0a..a2cbf0acff1c6 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqchip.h>
+#include <linux/irqdomain.h>
 #include <linux/of_address.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
-- 
GitLab


From 95af1df6f4e2b121ce33166d61c99250143073b5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 09:31:09 +0100
Subject: [PATCH 2590/3804] MIPS: lantiq: Directly include linux/of.h in
 xway/dma.c

This drivers currently obtains linux/of.h by luck and a chain of
bizarre inclusions, which we're about to fix.

Let's include the required file directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/mips/lantiq/xway/dma.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c
index aeb1b989cd4ee..63dccb2ed08b2 100644
--- a/arch/mips/lantiq/xway/dma.c
+++ b/arch/mips/lantiq/xway/dma.c
@@ -12,6 +12,7 @@
 #include <linux/spinlock.h>
 #include <linux/clk.h>
 #include <linux/err.h>
+#include <linux/of.h>
 
 #include <lantiq_soc.h>
 #include <xway_dma.h>
-- 
GitLab


From 18ca45f5ba1e31704bcca038b8b612e9b1f52b4f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 19 May 2021 09:01:06 +0100
Subject: [PATCH 2591/3804] MIPS: Add missing linux/irqdomain.h includes

A number of MIPS platforms are failing to directly include
irqdomain.h. Fix this so that we can drop unnecessary dependencies

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/mips/pci/pci-rt3883.c       | 1 +
 arch/mips/pci/pci-xtalk-bridge.c | 1 +
 arch/mips/sgi-ip27/ip27-irq.c    | 1 +
 arch/mips/sgi-ip30/ip30-irq.c    | 1 +
 4 files changed, 4 insertions(+)

diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index aebd4964ea342..c48e23cf5b5ee 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c
index d2216942af188..ab9bedb82b282 100644
--- a/arch/mips/pci/pci-xtalk-bridge.c
+++ b/arch/mips/pci/pci-xtalk-bridge.c
@@ -13,6 +13,7 @@
 #include <linux/platform_data/xtalk-bridge.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/crc16.h>
+#include <linux/irqdomain.h>
 
 #include <asm/pci/bridge.h>
 #include <asm/paccess.h>
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index 42df9fafa943e..95c1bff1ab9f8 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -9,6 +9,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
 #include <linux/bitops.h>
diff --git a/arch/mips/sgi-ip30/ip30-irq.c b/arch/mips/sgi-ip30/ip30-irq.c
index e8374e4c705b9..ba87704073c8f 100644
--- a/arch/mips/sgi-ip30/ip30-irq.c
+++ b/arch/mips/sgi-ip30/ip30-irq.c
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/percpu.h>
 #include <linux/spinlock.h>
 #include <linux/tick.h>
-- 
GitLab


From a12a9c5c03072ec6b1f4f9bd7a554a718ecf234a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 12 Sep 2018 16:11:20 +0100
Subject: [PATCH 2592/3804] MIPS: Do not include linux/irqdomain.h from
 asm/irq.h

Including linux/irqdomain.h from asm/irq.h is going to break
as soon as linux/irqdomain.h will include linux/irq.h, so
let's fix this. Code relying on linux/irqomain.h should include
it directly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/mips/include/asm/irq.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index f021de661c3a6..d1477ecb1af9b 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -11,7 +11,6 @@
 
 #include <linux/linkage.h>
 #include <linux/smp.h>
-#include <linux/irqdomain.h>
 
 #include <asm/mipsmtregs.h>
 
-- 
GitLab


From 13a9a5d17d07cec8181ea0843674ce48c191628e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 17:09:06 +0100
Subject: [PATCH 2593/3804] powerpc: Add missing linux/{of.h,irqdomain.h}
 include directives

A bunch of PPC files are missing the inclusion of linux/of.h and
linux/irqdomain.h, relying on transitive inclusion from another
file.

As we are about to break this dependency, make sure these dependencies
are explicit.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/powerpc/kernel/mce.c                        | 1 +
 arch/powerpc/kvm/book3s_hv_uvmem.c               | 1 +
 arch/powerpc/kvm/book3s_xive.c                   | 1 +
 arch/powerpc/kvm/book3s_xive_native.c            | 1 +
 arch/powerpc/mm/book3s64/radix_pgtable.c         | 1 +
 arch/powerpc/platforms/cell/pmu.c                | 1 +
 arch/powerpc/platforms/embedded6xx/flipper-pic.c | 1 +
 arch/powerpc/platforms/ps3/interrupt.c           | 1 +
 arch/powerpc/platforms/pseries/ibmebus.c         | 1 +
 arch/powerpc/sysdev/ehv_pic.c                    | 1 +
 arch/powerpc/sysdev/fsl_mpic_err.c               | 1 +
 arch/powerpc/sysdev/xics/icp-hv.c                | 1 +
 arch/powerpc/sysdev/xics/icp-opal.c              | 1 +
 13 files changed, 13 insertions(+)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 9a3c2a84a2aca..15e7b4900689a 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -18,6 +18,7 @@
 #include <linux/extable.h>
 #include <linux/ftrace.h>
 #include <linux/memblock.h>
+#include <linux/of.h>
 
 #include <asm/interrupt.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 84e5a2dc8be53..b898a596db427 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -90,6 +90,7 @@
 #include <linux/migrate.h>
 #include <linux/kvm_host.h>
 #include <linux/ksm.h>
+#include <linux/of.h>
 #include <asm/ultravisor.h>
 #include <asm/mman.h>
 #include <asm/kvm_ppc.h>
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index e7219b6f5f9a5..08910d44d2cb4 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -14,6 +14,7 @@
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/uaccess.h>
+#include <linux/irqdomain.h>
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_ppc.h>
 #include <asm/hvcall.h>
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 76800c84f2a35..30bacf6dd53d3 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -12,6 +12,7 @@
 #include <linux/spinlock.h>
 #include <linux/delay.h>
 #include <linux/file.h>
+#include <linux/irqdomain.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_book3s.h>
 #include <asm/kvm_ppc.h>
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 5fef8db3b4634..edadb9e9c9c0f 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/sched/mm.h>
 #include <linux/memblock.h>
+#include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c
index 35bbd15582af0..b207a7f99be50 100644
--- a/arch/powerpc/platforms/cell/pmu.c
+++ b/arch/powerpc/platforms/cell/pmu.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/types.h>
 #include <linux/export.h>
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
index d39a9213a3e69..609bda2ad5dd2 100644
--- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c
+++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index 78f2339ed5cb1..e68f4fb1c1f40 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 
 #include <asm/machdep.h>
 #include <asm/udbg.h>
diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c
index a15ab33646b30..c6c79ef55e138 100644
--- a/arch/powerpc/platforms/pseries/ibmebus.c
+++ b/arch/powerpc/platforms/pseries/ibmebus.c
@@ -42,6 +42,7 @@
 #include <linux/kobject.h>
 #include <linux/dma-map-ops.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c
index 48866e6c1efb7..00705258ecf96 100644
--- a/arch/powerpc/sysdev/ehv_pic.c
+++ b/arch/powerpc/sysdev/ehv_pic.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/slab.h>
diff --git a/arch/powerpc/sysdev/fsl_mpic_err.c b/arch/powerpc/sysdev/fsl_mpic_err.c
index 13583bbc3e8e8..5fa5fa215541f 100644
--- a/arch/powerpc/sysdev/fsl_mpic_err.c
+++ b/arch/powerpc/sysdev/fsl_mpic_err.c
@@ -8,6 +8,7 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/arch/powerpc/sysdev/xics/icp-hv.c b/arch/powerpc/sysdev/xics/icp-hv.c
index 21b9d1bf39ff6..6765d9e264a39 100644
--- a/arch/powerpc/sysdev/xics/icp-hv.c
+++ b/arch/powerpc/sysdev/xics/icp-hv.c
@@ -7,6 +7,7 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/cpu.h>
 #include <linux/of.h>
 
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 68fd2540b0931..675d708863d57 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -7,6 +7,7 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/cpu.h>
 #include <linux/of.h>
 
-- 
GitLab


From 5951be4c9c361242c9f0d7c9b9ef03fe82e45c7b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 14 Sep 2018 09:49:11 +0100
Subject: [PATCH 2594/3804] scsi/ibmvscsi: Directly include
 linux/{of.h,irqdomain.h}

A couple of ibmvscsi files are missing the inclusion of linux/of.h
and linux/irqdomain.h, relying on transitive inclusion from another
file.

As we are about to break this dependency, make sure these dependencies
are explicit.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/scsi/ibmvscsi/ibmvfc.c           | 1 +
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 6540d48eb0e8e..715c34904e3e5 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -13,6 +13,7 @@
 #include <linux/dmapool.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
 #include <linux/of.h>
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index 41ac9477df7ad..10b6c6daaacda 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -22,6 +22,7 @@
 #include <linux/list.h>
 #include <linux/string.h>
 #include <linux/delay.h>
+#include <linux/of.h>
 
 #include <target/target_core_base.h>
 #include <target/target_core_fabric.h>
-- 
GitLab


From 7c576f4d3ce43fa0fc1ac258dc4768d0f3b3b992 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 10:42:25 +0100
Subject: [PATCH 2595/3804] powerpc: Convert irq_domain_add_legacy_isa use to
 irq_domain_add_legacy

irq_domain_add_legacy_isa is a pain. It only exists for the benefit of
two PPC-specific drivers, and creates an ugly dependency between asm/irq.h
and linux/irqdomain.h

Instead, let's convert these two drivers to irq_domain_add_legacy(),
stop using NUM_ISA_INTERRUPTS by directly setting NR_IRQS_LEGACY.

The dependency cannot be broken yet as there is a lot of PPC-related
code that depends on it, but that's the first step towards it.

A followup patch will remove irq_domain_add_legacy_isa.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/powerpc/include/asm/irq.h         | 4 ++--
 arch/powerpc/platforms/ps3/interrupt.c | 4 ++--
 arch/powerpc/sysdev/i8259.c            | 3 ++-
 arch/powerpc/sysdev/mpic.c             | 2 +-
 arch/powerpc/sysdev/tsi108_pci.c       | 3 ++-
 arch/powerpc/sysdev/xics/xics-common.c | 2 +-
 6 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index b2bd588304300..c1eda9199214e 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -23,8 +23,8 @@ extern atomic_t ppc_n_lost_interrupts;
 /* Total number of virq in the platform */
 #define NR_IRQS		CONFIG_NR_IRQS
 
-/* Same thing, used by the generic IRQ code */
-#define NR_IRQS_LEGACY		NUM_ISA_INTERRUPTS
+/* Number of irqs reserved for a legacy isa controller */
+#define NR_IRQS_LEGACY		16
 
 extern irq_hw_number_t virq_to_hw(unsigned int virq);
 
diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c
index e68f4fb1c1f40..49871427f599d 100644
--- a/arch/powerpc/platforms/ps3/interrupt.c
+++ b/arch/powerpc/platforms/ps3/interrupt.c
@@ -46,7 +46,7 @@
  * implementation equates HV plug value to Linux virq value, constrains each
  * interrupt to have a system wide unique plug number, and limits the range
  * of the plug values to map into the first dword of the bitmaps.  This
- * gives a usable range of plug values of  {NUM_ISA_INTERRUPTS..63}.  Note
+ * gives a usable range of plug values of  {NR_IRQS_LEGACY..63}.  Note
  * that there is no constraint on how many in this set an individual thread
  * can acquire.
  *
@@ -722,7 +722,7 @@ static unsigned int ps3_get_irq(void)
 	}
 
 #if defined(DEBUG)
-	if (unlikely(plug < NUM_ISA_INTERRUPTS || plug > PS3_PLUG_MAX)) {
+	if (unlikely(plug < NR_IRQS_LEGACY || plug > PS3_PLUG_MAX)) {
 		dump_bmp(&per_cpu(ps3_private, 0));
 		dump_bmp(&per_cpu(ps3_private, 1));
 		BUG();
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index c1d76c344351c..dc1a151c63d7e 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -260,7 +260,8 @@ void i8259_init(struct device_node *node, unsigned long intack_addr)
 	raw_spin_unlock_irqrestore(&i8259_lock, flags);
 
 	/* create a legacy host */
-	i8259_host = irq_domain_add_legacy_isa(node, &i8259_host_ops, NULL);
+	i8259_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+					   &i8259_host_ops, NULL);
 	if (i8259_host == NULL) {
 		printk(KERN_ERR "i8259: failed to allocate irq host !\n");
 		return;
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index b0426f28946af..995fb2ada507a 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -602,7 +602,7 @@ static void __init mpic_scan_ht_pics(struct mpic *mpic)
 /* Find an mpic associated with a given linux interrupt */
 static struct mpic *mpic_find(unsigned int irq)
 {
-	if (irq < NUM_ISA_INTERRUPTS)
+	if (irq < NR_IRQS_LEGACY)
 		return NULL;
 
 	return irq_get_chip_data(irq);
diff --git a/arch/powerpc/sysdev/tsi108_pci.c b/arch/powerpc/sysdev/tsi108_pci.c
index 49f9541954f8d..042bb38fa5c24 100644
--- a/arch/powerpc/sysdev/tsi108_pci.c
+++ b/arch/powerpc/sysdev/tsi108_pci.c
@@ -404,7 +404,8 @@ void __init tsi108_pci_int_init(struct device_node *node)
 {
 	DBG("Tsi108_pci_int_init: initializing PCI interrupts\n");
 
-	pci_irq_host = irq_domain_add_legacy_isa(node, &pci_irq_domain_ops, NULL);
+	pci_irq_host = irq_domain_add_legacy(node, NR_IRQS_LEGACY, 0, 0,
+					     &pci_irq_domain_ops, NULL);
 	if (pci_irq_host == NULL) {
 		printk(KERN_ERR "pci_irq_host: failed to allocate irq domain!\n");
 		return;
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index 7e4305c01bacd..fdf8db4444b67 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -201,7 +201,7 @@ void xics_migrate_irqs_away(void)
 		struct ics *ics;
 
 		/* We can't set affinity on ISA interrupts */
-		if (virq < NUM_ISA_INTERRUPTS)
+		if (virq < NR_IRQS_LEGACY)
 			continue;
 		/* We only need to migrate enabled IRQS */
 		if (!desc->action)
-- 
GitLab


From 582f5aa1dbb3bd7bd3dd12de7e87f6dafb3f8258 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 5 Apr 2021 11:15:27 +0100
Subject: [PATCH 2596/3804] powerpc: Drop dependency between asm/irq.h and
 linux/irqdomain.h

Directly including linux/irqdomain.h was hiding all sort of sins,
which have now been fixed. Drop the spurious include.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/powerpc/include/asm/irq.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index c1eda9199214e..4982f3711fc3f 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -6,7 +6,6 @@
 /*
  */
 
-#include <linux/irqdomain.h>
 #include <linux/threads.h>
 #include <linux/list.h>
 #include <linux/radix-tree.h>
-- 
GitLab


From 405e94e9aed2a38bdcd22efe53c36c6cd53185a6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 13 Sep 2018 10:42:25 +0100
Subject: [PATCH 2597/3804] irqdomain: Kill irq_domain_add_legacy_isa

This helper doesn't have a user anymore, let's remove it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 Documentation/core-api/irq/irq-domain.rst |  1 -
 include/linux/irqdomain.h                 | 11 -----------
 2 files changed, 12 deletions(-)

diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst
index 8214e215a8bf3..53283b3729a1a 100644
--- a/Documentation/core-api/irq/irq-domain.rst
+++ b/Documentation/core-api/irq/irq-domain.rst
@@ -146,7 +146,6 @@ Legacy
 
 	irq_domain_add_simple()
 	irq_domain_add_legacy()
-	irq_domain_add_legacy_isa()
 	irq_domain_create_simple()
 	irq_domain_create_legacy()
 
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 62a8e3d238292..9f884c9487399 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -45,9 +45,6 @@ struct cpumask;
 struct seq_file;
 struct irq_affinity_desc;
 
-/* Number of irqs reserved for a legacy isa controller */
-#define NUM_ISA_INTERRUPTS	16
-
 #define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16
 
 /**
@@ -355,14 +352,6 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod
 {
 	return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data);
 }
-static inline struct irq_domain *irq_domain_add_legacy_isa(
-				struct device_node *of_node,
-				const struct irq_domain_ops *ops,
-				void *host_data)
-{
-	return irq_domain_add_legacy(of_node, NUM_ISA_INTERRUPTS, 0, 0, ops,
-				     host_data);
-}
 static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
-- 
GitLab


From 1da027362a7db422243601e895e6f8288389f435 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 2 Apr 2021 12:50:14 +0100
Subject: [PATCH 2598/3804] irqdomain: Reimplement irq_linear_revmap() with
 irq_find_mapping()

irq_linear_revmap() is supposed to be a fast path for domain
lookups, but it only exposes low-level details of the irqdomain
implementation, details which are better kept private.

The *overhead* between the two is only a function call and
a couple of tests, so it is likely that noone can show any
meaningful difference compared to the cost of taking an
interrupt.

Reimplement irq_linear_revmap() with irq_find_mapping()
in order to preserve source code compatibility, and
rename the internal field for a measure.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h | 22 +++++++++-------------
 kernel/irq/irqdomain.c    |  6 +++---
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 9f884c9487399..42b3f7d03a32b 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -151,9 +151,9 @@ struct irq_domain_chip_generic;
  * Revmap data, used internally by irq_domain
  * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
  *                         support direct mapping
- * @revmap_size: Size of the linear map table @linear_revmap[]
+ * @revmap_size: Size of the linear map table @revmap[]
  * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
- * @linear_revmap: Linear table of hwirq->virq reverse mappings
+ * @revmap: Linear table of hwirq->virq reverse mappings
  */
 struct irq_domain {
 	struct list_head link;
@@ -177,7 +177,7 @@ struct irq_domain {
 	unsigned int revmap_size;
 	struct radix_tree_root revmap_tree;
 	struct mutex revmap_tree_mutex;
-	unsigned int linear_revmap[];
+	unsigned int revmap[];
 };
 
 /* Irq domain flags */
@@ -394,24 +394,20 @@ static inline unsigned int irq_create_mapping(struct irq_domain *host,
 	return irq_create_mapping_affinity(host, hwirq, NULL);
 }
 
-
 /**
- * irq_linear_revmap() - Find a linux irq from a hw irq number.
+ * irq_find_mapping() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
  * @hwirq: hardware irq number in that domain space
- *
- * This is a fast path alternative to irq_find_mapping() that can be
- * called directly by irq controller code to save a handful of
- * instructions. It is always safe to call, but won't find irqs mapped
- * using the radix tree.
  */
+extern unsigned int irq_find_mapping(struct irq_domain *host,
+				     irq_hw_number_t hwirq);
+
 static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
 					     irq_hw_number_t hwirq)
 {
-	return hwirq < domain->revmap_size ? domain->linear_revmap[hwirq] : 0;
+	return irq_find_mapping(domain, hwirq);
 }
-extern unsigned int irq_find_mapping(struct irq_domain *host,
-				     irq_hw_number_t hwirq);
+
 extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
 
 extern const struct irq_domain_ops irq_domain_simple_ops;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 6284443b87ecb..8bd0122539899 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -486,7 +486,7 @@ static void irq_domain_clear_mapping(struct irq_domain *domain,
 				     irq_hw_number_t hwirq)
 {
 	if (hwirq < domain->revmap_size) {
-		domain->linear_revmap[hwirq] = 0;
+		domain->revmap[hwirq] = 0;
 	} else {
 		mutex_lock(&domain->revmap_tree_mutex);
 		radix_tree_delete(&domain->revmap_tree, hwirq);
@@ -499,7 +499,7 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
 				   struct irq_data *irq_data)
 {
 	if (hwirq < domain->revmap_size) {
-		domain->linear_revmap[hwirq] = irq_data->irq;
+		domain->revmap[hwirq] = irq_data->irq;
 	} else {
 		mutex_lock(&domain->revmap_tree_mutex);
 		radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
@@ -885,7 +885,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 
 	/* Check if the hwirq is in the linear revmap. */
 	if (hwirq < domain->revmap_size)
-		return domain->linear_revmap[hwirq];
+		return domain->revmap[hwirq];
 
 	rcu_read_lock();
 	data = radix_tree_lookup(&domain->revmap_tree, hwirq);
-- 
GitLab


From e37af8011a9631996e6cd32dd81a152708eee7d4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 4 Apr 2021 13:06:39 +0100
Subject: [PATCH 2599/3804] powerpc: Move the use of irq_domain_add_nomap()
 behind a config option

Only a handful of old PPC systems are still using the old 'nomap'
variant of the irqdomain library. Move the associated definitions
behind a configuration option, which will allow us to make some
more radical changes.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/powerpc/platforms/cell/Kconfig     | 1 +
 arch/powerpc/platforms/powermac/Kconfig | 1 +
 arch/powerpc/platforms/ps3/Kconfig      | 1 +
 arch/powerpc/sysdev/xive/Kconfig        | 1 +
 include/linux/irqdomain.h               | 8 ++++++--
 kernel/irq/Kconfig                      | 5 +++++
 kernel/irq/irqdomain.c                  | 2 ++
 7 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index e7c976bcadffd..cb70c5f25bc6f 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -35,6 +35,7 @@ config PPC_IBM_CELL_BLADE
 config AXON_MSI
 	bool
 	depends on PPC_IBM_CELL_BLADE && PCI_MSI
+	select IRQ_DOMAIN_NOMAP
 	default y
 
 menu "Cell Broadband Engine options"
diff --git a/arch/powerpc/platforms/powermac/Kconfig b/arch/powerpc/platforms/powermac/Kconfig
index c02d8c503b294..b97bf12801eb2 100644
--- a/arch/powerpc/platforms/powermac/Kconfig
+++ b/arch/powerpc/platforms/powermac/Kconfig
@@ -24,6 +24,7 @@ config PPC_PMAC32_PSURGE
 	bool "Support for powersurge upgrade cards" if EXPERT
 	depends on SMP && PPC32 && PPC_PMAC
 	select PPC_SMP_MUXED_IPI
+	select IRQ_DOMAIN_NOMAP
 	default y
 	help
 	  The powersurge cpu boards can be used in the generation
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index e32406e918d09..4d0535cc7946f 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -7,6 +7,7 @@ config PPC_PS3
 	select USB_OHCI_BIG_ENDIAN_MMIO
 	select USB_EHCI_BIG_ENDIAN_MMIO
 	select HAVE_PCI
+	select IRQ_DOMAIN_NOMAP
 	help
 	  This option enables support for the Sony PS3 game console
 	  and other platforms using the PS3 hypervisor.  Enabling this
diff --git a/arch/powerpc/sysdev/xive/Kconfig b/arch/powerpc/sysdev/xive/Kconfig
index 785c292d104b7..97796c6b63f04 100644
--- a/arch/powerpc/sysdev/xive/Kconfig
+++ b/arch/powerpc/sysdev/xive/Kconfig
@@ -3,6 +3,7 @@ config PPC_XIVE
 	bool
 	select PPC_SMP_MUXED_IPI
 	select HARDIRQS_SW_RESEND
+	select IRQ_DOMAIN_NOMAP
 
 config PPC_XIVE_NATIVE
 	bool
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 42b3f7d03a32b..723495ec5a2fa 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -345,6 +345,8 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no
 {
 	return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
 }
+
+#ifdef CONFIG_IRQ_DOMAIN_NOMAP
 static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
 					 unsigned int max_irq,
 					 const struct irq_domain_ops *ops,
@@ -352,6 +354,10 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod
 {
 	return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data);
 }
+
+extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
+#endif
+
 static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
 					 const struct irq_domain_ops *ops,
 					 void *host_data)
@@ -408,8 +414,6 @@ static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
 	return irq_find_mapping(domain, hwirq);
 }
 
-extern unsigned int irq_create_direct_mapping(struct irq_domain *host);
-
 extern const struct irq_domain_ops irq_domain_simple_ops;
 
 /* stock xlate functions */
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index d79ef2493a281..fbc54c2a7f239 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -70,6 +70,11 @@ config IRQ_DOMAIN_HIERARCHY
 	bool
 	select IRQ_DOMAIN
 
+# Support for obsolete non-mapping irq domains
+config IRQ_DOMAIN_NOMAP
+	bool
+	select IRQ_DOMAIN
+
 # Support for hierarchical fasteoi+edge and fasteoi+level handlers
 config IRQ_FASTEOI_HIERARCHY_HANDLERS
 	bool
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8bd0122539899..e0143e6406836 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -604,6 +604,7 @@ void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
 }
 EXPORT_SYMBOL_GPL(irq_domain_associate_many);
 
+#ifdef CONFIG_IRQ_DOMAIN_NOMAP
 /**
  * irq_create_direct_mapping() - Allocate an irq for direct mapping
  * @domain: domain to allocate the irq for or NULL for default domain
@@ -644,6 +645,7 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
 	return virq;
 }
 EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+#endif
 
 /**
  * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
-- 
GitLab


From 4f86a06e2d6ece5316e4c42fbf946ee22acb30f3 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 10 Sep 2018 18:33:46 +0100
Subject: [PATCH 2600/3804] irqdomain: Make normal and nomap irqdomains
 exclusive

Direct mappings are completely exclusive of normal mappings, meaning
that we can refactor the code slightly so that we can get rid of
the revmap_direct_max_irq field and use the revmap_size field
instead, reducing the size of the irqdomain structure.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h |  6 +++---
 kernel/irq/irqdomain.c    | 45 ++++++++++++++++++++++++++++++---------
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 723495ec5a2fa..0916cf9c6e208 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -149,8 +149,6 @@ struct irq_domain_chip_generic;
  * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
  *
  * Revmap data, used internally by irq_domain
- * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
- *                         support direct mapping
  * @revmap_size: Size of the linear map table @revmap[]
  * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
  * @revmap: Linear table of hwirq->virq reverse mappings
@@ -173,7 +171,6 @@ struct irq_domain {
 
 	/* reverse map data. The linear map gets appended to the irq_domain */
 	irq_hw_number_t hwirq_max;
-	unsigned int revmap_direct_max_irq;
 	unsigned int revmap_size;
 	struct radix_tree_root revmap_tree;
 	struct mutex revmap_tree_mutex;
@@ -207,6 +204,9 @@ enum {
 	 */
 	IRQ_DOMAIN_MSI_NOMASK_QUIRK	= (1 << 6),
 
+	/* Irq domain doesn't translate anything */
+	IRQ_DOMAIN_FLAG_NO_MAP		= (1 << 7),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index e0143e6406836..fa94c86e47d4a 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -146,6 +146,10 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 
 	static atomic_t unknown_domains;
 
+	if (WARN_ON((size && direct_max) ||
+		    (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max)))
+		return NULL;
+
 	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
 			      GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
 	if (!domain)
@@ -213,8 +217,14 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 	domain->ops = ops;
 	domain->host_data = host_data;
 	domain->hwirq_max = hwirq_max;
+
+	if (direct_max) {
+		size = direct_max;
+		domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP;
+	}
+
 	domain->revmap_size = size;
-	domain->revmap_direct_max_irq = direct_max;
+
 	irq_domain_check_hierarchy(domain);
 
 	mutex_lock(&irq_domain_mutex);
@@ -482,9 +492,18 @@ struct irq_domain *irq_get_default_host(void)
 	return irq_default_domain;
 }
 
+static bool irq_domain_is_nomap(struct irq_domain *domain)
+{
+	return IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) &&
+	       (domain->flags & IRQ_DOMAIN_FLAG_NO_MAP);
+}
+
 static void irq_domain_clear_mapping(struct irq_domain *domain,
 				     irq_hw_number_t hwirq)
 {
+	if (irq_domain_is_nomap(domain))
+		return;
+
 	if (hwirq < domain->revmap_size) {
 		domain->revmap[hwirq] = 0;
 	} else {
@@ -498,6 +517,9 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
 				   irq_hw_number_t hwirq,
 				   struct irq_data *irq_data)
 {
+	if (irq_domain_is_nomap(domain))
+		return;
+
 	if (hwirq < domain->revmap_size) {
 		domain->revmap[hwirq] = irq_data->irq;
 	} else {
@@ -629,9 +651,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
 		pr_debug("create_direct virq allocation failed\n");
 		return 0;
 	}
-	if (virq >= domain->revmap_direct_max_irq) {
+	if (virq >= domain->revmap_size) {
 		pr_err("ERROR: no free irqs available below %i maximum\n",
-			domain->revmap_direct_max_irq);
+			domain->revmap_size);
 		irq_free_desc(virq);
 		return 0;
 	}
@@ -879,10 +901,14 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 	if (domain == NULL)
 		return 0;
 
-	if (hwirq < domain->revmap_direct_max_irq) {
-		data = irq_domain_get_irq_data(domain, hwirq);
-		if (data && data->hwirq == hwirq)
-			return hwirq;
+	if (irq_domain_is_nomap(domain)) {
+		if (hwirq < domain->revmap_size) {
+			data = irq_domain_get_irq_data(domain, hwirq);
+			if (data && data->hwirq == hwirq)
+				return hwirq;
+		}
+
+		return 0;
 	}
 
 	/* Check if the hwirq is in the linear revmap. */
@@ -1470,7 +1496,7 @@ static void irq_domain_fix_revmap(struct irq_data *d)
 {
 	void __rcu **slot;
 
-	if (d->hwirq < d->domain->revmap_size)
+	if (irq_domain_is_nomap(d->domain) || d->hwirq < d->domain->revmap_size)
 		return; /* Not using radix tree. */
 
 	/* Fix up the revmap. */
@@ -1830,8 +1856,7 @@ static void
 irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
 {
 	seq_printf(m, "%*sname:   %s\n", ind, "", d->name);
-	seq_printf(m, "%*ssize:   %u\n", ind + 1, "",
-		   d->revmap_size + d->revmap_direct_max_irq);
+	seq_printf(m, "%*ssize:   %u\n", ind + 1, "", d->revmap_size);
 	seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
 	seq_printf(m, "%*sflags:  0x%08x\n", ind +1 , "", d->flags);
 	if (d->ops && d->ops->debug_show)
-- 
GitLab


From 426fa316148bccabf48f9c91a13c387ee911eadc Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 5 Apr 2021 11:30:51 +0100
Subject: [PATCH 2601/3804] irqdomain: Use struct_size() helper when allocating
 irqdomain

Instead of open-coding the size computation of struct irqdomain,
use the struct_size() helper instead.

This is going to be handy as we change the type of the revmap
array.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 kernel/irq/irqdomain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index fa94c86e47d4a..cdcb1989cd20c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -150,7 +150,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 		    (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && direct_max)))
 		return NULL;
 
-	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
+	domain = kzalloc_node(struct_size(domain, revmap, size),
 			      GFP_KERNEL, of_node_to_nid(to_of_node(fwnode)));
 	if (!domain)
 		return NULL;
-- 
GitLab


From 48b15a7921d60680babe59f64e127816585a585c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 5 Apr 2021 11:46:53 +0100
Subject: [PATCH 2602/3804] irqdomain: Cache irq_data instead of a virq number
 in the revmap

Caching a virq number in the revmap is pretty inefficient, as
it means we will need to convert it back to either an irq_data
or irq_desc to do anything with it.

It is also a bit odd, as the radix tree does cache irq_data
pointers.

Change the revmap type to be an irq_data pointer instead of
an unsigned int, and preserve the current API for now.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h |  4 ++--
 kernel/irq/irqdomain.c    | 16 +++++++++++-----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 0916cf9c6e208..340cc04611dd8 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -151,7 +151,7 @@ struct irq_domain_chip_generic;
  * Revmap data, used internally by irq_domain
  * @revmap_size: Size of the linear map table @revmap[]
  * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
- * @revmap: Linear table of hwirq->virq reverse mappings
+ * @revmap: Linear table of irq_data pointers
  */
 struct irq_domain {
 	struct list_head link;
@@ -174,7 +174,7 @@ struct irq_domain {
 	unsigned int revmap_size;
 	struct radix_tree_root revmap_tree;
 	struct mutex revmap_tree_mutex;
-	unsigned int revmap[];
+	struct irq_data *revmap[];
 };
 
 /* Irq domain flags */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index cdcb1989cd20c..7a4e38804487d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -505,7 +505,7 @@ static void irq_domain_clear_mapping(struct irq_domain *domain,
 		return;
 
 	if (hwirq < domain->revmap_size) {
-		domain->revmap[hwirq] = 0;
+		domain->revmap[hwirq] = NULL;
 	} else {
 		mutex_lock(&domain->revmap_tree_mutex);
 		radix_tree_delete(&domain->revmap_tree, hwirq);
@@ -521,7 +521,7 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
 		return;
 
 	if (hwirq < domain->revmap_size) {
-		domain->revmap[hwirq] = irq_data->irq;
+		domain->revmap[hwirq] = irq_data;
 	} else {
 		mutex_lock(&domain->revmap_tree_mutex);
 		radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
@@ -913,7 +913,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 
 	/* Check if the hwirq is in the linear revmap. */
 	if (hwirq < domain->revmap_size)
-		return domain->revmap[hwirq];
+		return domain->revmap[hwirq]->irq;
 
 	rcu_read_lock();
 	data = radix_tree_lookup(&domain->revmap_tree, hwirq);
@@ -1496,8 +1496,14 @@ static void irq_domain_fix_revmap(struct irq_data *d)
 {
 	void __rcu **slot;
 
-	if (irq_domain_is_nomap(d->domain) || d->hwirq < d->domain->revmap_size)
-		return; /* Not using radix tree. */
+	if (irq_domain_is_nomap(d->domain))
+		return;
+
+	if (d->hwirq < d->domain->revmap_size) {
+		/* Not using radix tree */
+		d->domain->revmap[d->hwirq] = d;
+		return;
+	}
 
 	/* Fix up the revmap. */
 	mutex_lock(&d->domain->revmap_tree_mutex);
-- 
GitLab


From d4a45c68dc81f9117ceaff9f058d5fae674181b9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 5 Apr 2021 12:57:27 +0100
Subject: [PATCH 2603/3804] irqdomain: Protect the linear revmap with RCU

It is pretty odd that the radix tree uses RCU while the linear
portion doesn't, leading to potential surprises for the users,
depending on how the irqdomain has been created.

Fix this by moving the update of the linear revmap under
the mutex, and the lookup under the RCU read-side lock.

The mutex name is updated to reflect that it doesn't only
cover the radix-tree anymore.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h |  5 ++--
 kernel/irq/irqdomain.c    | 49 ++++++++++++++++++---------------------
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 340cc04611dd8..2b696c9bcaafd 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -151,6 +151,7 @@ struct irq_domain_chip_generic;
  * Revmap data, used internally by irq_domain
  * @revmap_size: Size of the linear map table @revmap[]
  * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map
+ * @revmap_mutex: Lock for the revmap
  * @revmap: Linear table of irq_data pointers
  */
 struct irq_domain {
@@ -173,8 +174,8 @@ struct irq_domain {
 	irq_hw_number_t hwirq_max;
 	unsigned int revmap_size;
 	struct radix_tree_root revmap_tree;
-	struct mutex revmap_tree_mutex;
-	struct irq_data *revmap[];
+	struct mutex revmap_mutex;
+	struct irq_data __rcu *revmap[];
 };
 
 /* Irq domain flags */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 7a4e38804487d..8fbadeefc814a 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -213,7 +213,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 
 	/* Fill structure */
 	INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
-	mutex_init(&domain->revmap_tree_mutex);
+	mutex_init(&domain->revmap_mutex);
 	domain->ops = ops;
 	domain->host_data = host_data;
 	domain->hwirq_max = hwirq_max;
@@ -504,13 +504,12 @@ static void irq_domain_clear_mapping(struct irq_domain *domain,
 	if (irq_domain_is_nomap(domain))
 		return;
 
-	if (hwirq < domain->revmap_size) {
-		domain->revmap[hwirq] = NULL;
-	} else {
-		mutex_lock(&domain->revmap_tree_mutex);
+	mutex_lock(&domain->revmap_mutex);
+	if (hwirq < domain->revmap_size)
+		rcu_assign_pointer(domain->revmap[hwirq], NULL);
+	else
 		radix_tree_delete(&domain->revmap_tree, hwirq);
-		mutex_unlock(&domain->revmap_tree_mutex);
-	}
+	mutex_unlock(&domain->revmap_mutex);
 }
 
 static void irq_domain_set_mapping(struct irq_domain *domain,
@@ -520,13 +519,12 @@ static void irq_domain_set_mapping(struct irq_domain *domain,
 	if (irq_domain_is_nomap(domain))
 		return;
 
-	if (hwirq < domain->revmap_size) {
-		domain->revmap[hwirq] = irq_data;
-	} else {
-		mutex_lock(&domain->revmap_tree_mutex);
+	mutex_lock(&domain->revmap_mutex);
+	if (hwirq < domain->revmap_size)
+		rcu_assign_pointer(domain->revmap[hwirq], irq_data);
+	else
 		radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
-		mutex_unlock(&domain->revmap_tree_mutex);
-	}
+	mutex_unlock(&domain->revmap_mutex);
 }
 
 static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
@@ -911,12 +909,12 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 		return 0;
 	}
 
+	rcu_read_lock();
 	/* Check if the hwirq is in the linear revmap. */
 	if (hwirq < domain->revmap_size)
-		return domain->revmap[hwirq]->irq;
-
-	rcu_read_lock();
-	data = radix_tree_lookup(&domain->revmap_tree, hwirq);
+		data = rcu_dereference(domain->revmap[hwirq]);
+	else
+		data = radix_tree_lookup(&domain->revmap_tree, hwirq);
 	rcu_read_unlock();
 	return data ? data->irq : 0;
 }
@@ -1499,18 +1497,17 @@ static void irq_domain_fix_revmap(struct irq_data *d)
 	if (irq_domain_is_nomap(d->domain))
 		return;
 
+	/* Fix up the revmap. */
+	mutex_lock(&d->domain->revmap_mutex);
 	if (d->hwirq < d->domain->revmap_size) {
 		/* Not using radix tree */
-		d->domain->revmap[d->hwirq] = d;
-		return;
+		rcu_assign_pointer(d->domain->revmap[d->hwirq], d);
+	} else {
+		slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq);
+		if (slot)
+			radix_tree_replace_slot(&d->domain->revmap_tree, slot, d);
 	}
-
-	/* Fix up the revmap. */
-	mutex_lock(&d->domain->revmap_tree_mutex);
-	slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq);
-	if (slot)
-		radix_tree_replace_slot(&d->domain->revmap_tree, slot, d);
-	mutex_unlock(&d->domain->revmap_tree_mutex);
+	mutex_unlock(&d->domain->revmap_mutex);
 }
 
 /**
-- 
GitLab


From d22558dd0a6c888b1829f9d3a0a627e330e27585 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 4 May 2021 14:00:13 +0100
Subject: [PATCH 2604/3804] irqdomain: Introduce irq_resolve_mapping()

Rework irq_find_mapping() to return an both an irq_desc pointer,
optionally the virtual irq number, and rename the result to
__irq_resolve_mapping(). a new helper called irq_resolve_mapping()
is provided for code that doesn't need the virtual irq number.

irq_find_mapping() is also rewritten in terms of __irq_resolve_mapping().

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h | 23 +++++++++++++++++++++--
 kernel/irq/irqdomain.c    | 28 ++++++++++++++++++++--------
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 2b696c9bcaafd..23e4ee5235769 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -41,6 +41,7 @@ struct fwnode_handle;
 struct irq_domain;
 struct irq_chip;
 struct irq_data;
+struct irq_desc;
 struct cpumask;
 struct seq_file;
 struct irq_affinity_desc;
@@ -401,13 +402,31 @@ static inline unsigned int irq_create_mapping(struct irq_domain *host,
 	return irq_create_mapping_affinity(host, hwirq, NULL);
 }
 
+extern struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
+					      irq_hw_number_t hwirq,
+					      unsigned int *irq);
+
+static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain,
+						   irq_hw_number_t hwirq)
+{
+	return __irq_resolve_mapping(domain, hwirq, NULL);
+}
+
 /**
  * irq_find_mapping() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
  * @hwirq: hardware irq number in that domain space
  */
-extern unsigned int irq_find_mapping(struct irq_domain *host,
-				     irq_hw_number_t hwirq);
+static inline unsigned int irq_find_mapping(struct irq_domain *domain,
+					    irq_hw_number_t hwirq)
+{
+	unsigned int irq;
+
+	if (__irq_resolve_mapping(domain, hwirq, &irq))
+		return irq;
+
+	return 0;
+}
 
 static inline unsigned int irq_linear_revmap(struct irq_domain *domain,
 					     irq_hw_number_t hwirq)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8fbadeefc814a..51c483ce24473 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -884,29 +884,34 @@ void irq_dispose_mapping(unsigned int virq)
 EXPORT_SYMBOL_GPL(irq_dispose_mapping);
 
 /**
- * irq_find_mapping() - Find a linux irq from a hw irq number.
+ * __irq_resolve_mapping() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
  * @hwirq: hardware irq number in that domain space
+ * @irq: optional pointer to return the Linux irq if required
+ *
+ * Returns the interrupt descriptor.
  */
-unsigned int irq_find_mapping(struct irq_domain *domain,
-			      irq_hw_number_t hwirq)
+struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain,
+				       irq_hw_number_t hwirq,
+				       unsigned int *irq)
 {
+	struct irq_desc *desc = NULL;
 	struct irq_data *data;
 
 	/* Look for default domain if necessary */
 	if (domain == NULL)
 		domain = irq_default_domain;
 	if (domain == NULL)
-		return 0;
+		return desc;
 
 	if (irq_domain_is_nomap(domain)) {
 		if (hwirq < domain->revmap_size) {
 			data = irq_domain_get_irq_data(domain, hwirq);
 			if (data && data->hwirq == hwirq)
-				return hwirq;
+				desc = irq_data_to_desc(data);
 		}
 
-		return 0;
+		return desc;
 	}
 
 	rcu_read_lock();
@@ -915,10 +920,17 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 		data = rcu_dereference(domain->revmap[hwirq]);
 	else
 		data = radix_tree_lookup(&domain->revmap_tree, hwirq);
+
+	if (likely(data)) {
+		desc = irq_data_to_desc(data);
+		if (irq)
+			*irq = data->irq;
+	}
+
 	rcu_read_unlock();
-	return data ? data->irq : 0;
+	return desc;
 }
-EXPORT_SYMBOL_GPL(irq_find_mapping);
+EXPORT_SYMBOL_GPL(__irq_resolve_mapping);
 
 /**
  * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings
-- 
GitLab


From a3016b26ee6ee13d5647d701404a7912d4eaea9e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 4 May 2021 14:24:37 +0100
Subject: [PATCH 2605/3804] genirq: Use irq_resolve_mapping() to implement
 __handle_domain_irq() and co

In order to start reaping the benefits of irq_resolve_mapping(),
start using it in __handle_domain_irq() and handle_domain_nmi().

This involves splitting generic_handle_irq() to be able to directly
provide the irq_desc.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdesc.h |  1 +
 kernel/irq/irqdesc.c    | 60 ++++++++++++++++++++++++-----------------
 2 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index df46512507855..cdd1cf8207f62 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -158,6 +158,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
 	desc->handle_irq(desc);
 }
 
+int handle_irq_desc(struct irq_desc *desc);
 int generic_handle_irq(unsigned int irq);
 
 #ifdef CONFIG_HANDLE_DOMAIN_IRQ
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 4a617d7312a47..684c5b7b78323 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -632,14 +632,8 @@ void irq_init_desc(unsigned int irq)
 
 #endif /* !CONFIG_SPARSE_IRQ */
 
-/**
- * generic_handle_irq - Invoke the handler for a particular irq
- * @irq:	The irq number to handle
- *
- */
-int generic_handle_irq(unsigned int irq)
+int handle_irq_desc(struct irq_desc *desc)
 {
-	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_data *data;
 
 	if (!desc)
@@ -652,6 +646,17 @@ int generic_handle_irq(unsigned int irq)
 	generic_handle_irq_desc(desc);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(handle_irq_desc);
+
+/**
+ * generic_handle_irq - Invoke the handler for a particular irq
+ * @irq:	The irq number to handle
+ *
+ */
+int generic_handle_irq(unsigned int irq)
+{
+	return handle_irq_desc(irq_to_desc(irq));
+}
 EXPORT_SYMBOL_GPL(generic_handle_irq);
 
 #ifdef CONFIG_HANDLE_DOMAIN_IRQ
@@ -668,27 +673,32 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 			bool lookup, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	unsigned int irq = hwirq;
+	struct irq_desc *desc;
 	int ret = 0;
 
 	irq_enter();
 
-#ifdef CONFIG_IRQ_DOMAIN
-	if (lookup)
-		irq = irq_find_mapping(domain, hwirq);
-#endif
-
-	/*
-	 * Some hardware gives randomly wrong interrupts.  Rather
-	 * than crashing, do something sensible.
-	 */
-	if (unlikely(!irq || irq >= nr_irqs)) {
-		ack_bad_irq(irq);
-		ret = -EINVAL;
+	if (likely(IS_ENABLED(CONFIG_IRQ_DOMAIN) && lookup)) {
+		/* The irqdomain code provides boundary checks */
+		desc = irq_resolve_mapping(domain, hwirq);
 	} else {
-		generic_handle_irq(irq);
+		/*
+		 * Some hardware gives randomly wrong interrupts.  Rather
+		 * than crashing, do something sensible.
+		 */
+		if (unlikely(!hwirq || hwirq >= nr_irqs)) {
+			ack_bad_irq(hwirq);
+			desc = NULL;
+		} else {
+			desc = irq_to_desc(hwirq);
+		}
 	}
 
+	if (likely(desc))
+		handle_irq_desc(desc);
+	else
+		ret = -EINVAL;
+
 	irq_exit();
 	set_irq_regs(old_regs);
 	return ret;
@@ -709,7 +719,7 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
 		      struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	unsigned int irq;
+	struct irq_desc *desc;
 	int ret = 0;
 
 	/*
@@ -717,14 +727,14 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
 	 */
 	WARN_ON(!in_nmi());
 
-	irq = irq_find_mapping(domain, hwirq);
+	desc = irq_resolve_mapping(domain, hwirq);
 
 	/*
 	 * ack_bad_irq is not NMI-safe, just report
 	 * an invalid interrupt.
 	 */
-	if (likely(irq))
-		generic_handle_irq(irq);
+	if (likely(desc))
+		handle_irq_desc(desc);
 	else
 		ret = -EINVAL;
 
-- 
GitLab


From 9626d18a20e166a864e8d1f6ed6bbb84a0fa4989 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 4 May 2021 14:33:24 +0100
Subject: [PATCH 2606/3804] irqdesc: Fix __handle_domain_irq() comment

It appears that the comment about a NULL domain meaning anything
has always been wrong. Fix it.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdesc.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index cdd1cf8207f62..2971eb7e65f12 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -165,8 +165,7 @@ int generic_handle_irq(unsigned int irq);
 /*
  * Convert a HW interrupt number to a logical one using a IRQ domain,
  * and handle the result interrupt number. Return -EINVAL if
- * conversion failed. Providing a NULL domain indicates that the
- * conversion has already been done.
+ * conversion failed.
  */
 int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 			bool lookup, struct pt_regs *regs);
-- 
GitLab


From 9e027dd979beca41cd85f4e971d184fe0ffcff3c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 12 May 2021 13:46:05 +0100
Subject: [PATCH 2607/3804] irqchip/nvic: Convert from handle_IRQ() to
 handle_domain_irq()

Given that the nvic driver is fully irqdomain aware, there is no
reason for it to use the arch-specific handle_IRQ(), and it can
be moved over to handle_domain_irq().

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-nvic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c
index f747e2209ea99..b31c4cff4d3a5 100644
--- a/drivers/irqchip/irq-nvic.c
+++ b/drivers/irqchip/irq-nvic.c
@@ -40,9 +40,7 @@ static struct irq_domain *nvic_irq_domain;
 asmlinkage void __exception_irq_entry
 nvic_handle_irq(irq_hw_number_t hwirq, struct pt_regs *regs)
 {
-	unsigned int irq = irq_linear_revmap(nvic_irq_domain, hwirq);
-
-	handle_IRQ(irq, regs);
+	handle_domain_irq(nvic_irq_domain, hwirq, regs);
 }
 
 static int nvic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
-- 
GitLab


From 8240ef50d4864325b346e40bb9d30cda9f22102d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 12 May 2021 13:45:52 +0100
Subject: [PATCH 2608/3804] genirq: Add generic_handle_domain_irq() helper

Provide generic_handle_domain_irq() as a pendent to handle_domain_irq()
for non-root interrupt controllers

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdesc.h |  2 ++
 kernel/irq/irqdesc.c    | 19 ++++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 2971eb7e65f12..0f226c6b0c70c 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -170,6 +170,8 @@ int generic_handle_irq(unsigned int irq);
 int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 			bool lookup, struct pt_regs *regs);
 
+int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
+
 static inline int handle_domain_irq(struct irq_domain *domain,
 				    unsigned int hwirq, struct pt_regs *regs)
 {
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 684c5b7b78323..6179d5bde88e9 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -661,7 +661,24 @@ EXPORT_SYMBOL_GPL(generic_handle_irq);
 
 #ifdef CONFIG_HANDLE_DOMAIN_IRQ
 /**
- * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain
+ * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
+ *                             to a domain, usually for a non-root interrupt
+ *                             controller
+ * @domain:	The domain where to perform the lookup
+ * @hwirq:	The HW irq number to convert to a logical one
+ *
+ * Returns:	0 on success, or -EINVAL if conversion has failed
+ *
+ */
+int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
+{
+	return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
+}
+EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
+
+/**
+ * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain,
+ *                       usually for a root interrupt controller
  * @domain:	The domain where to perform the lookup
  * @hwirq:	The HW irq number to convert to a logical one
  * @lookup:	Whether to perform the domain lookup or not
-- 
GitLab


From e1c054918c6c7a30a35d2c183ed86600a071cdab Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 12 May 2021 16:18:15 +0100
Subject: [PATCH 2609/3804] genirq: Move non-irqdomain handle_domain_irq()
 handling into ARM's handle_IRQ()

Despite the name, handle_domain_irq() deals with non-irqdomain
handling for the sake of a handful of legacy ARM platforms.

Move such handling into ARM's handle_IRQ(), allowing for better
code generation for everyone else. This allows us get rid of
some complexity, and to rearrange the guards on the various helpers
in a more logical way.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm/kernel/irq.c   | 22 +++++++++++++++++++++-
 include/linux/irqdesc.h | 14 ++++----------
 kernel/irq/irqdesc.c    | 30 ++++++++----------------------
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 698b6f6361566..20ab1e6075225 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -63,7 +63,27 @@ int arch_show_interrupts(struct seq_file *p, int prec)
  */
 void handle_IRQ(unsigned int irq, struct pt_regs *regs)
 {
-	__handle_domain_irq(NULL, irq, false, regs);
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	struct irq_desc *desc;
+
+	irq_enter();
+
+	/*
+	 * Some hardware gives randomly wrong interrupts.  Rather
+	 * than crashing, do something sensible.
+	 */
+	if (unlikely(!irq || irq >= nr_irqs))
+		desc = NULL;
+	else
+		desc = irq_to_desc(irq);
+
+	if (likely(desc))
+		handle_irq_desc(desc);
+	else
+		ack_bad_irq(irq);
+
+	irq_exit();
+	set_irq_regs(old_regs);
 }
 
 /*
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 0f226c6b0c70c..59aea39785bfd 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -161,24 +161,18 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc)
 int handle_irq_desc(struct irq_desc *desc);
 int generic_handle_irq(unsigned int irq);
 
-#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+#ifdef CONFIG_IRQ_DOMAIN
 /*
  * Convert a HW interrupt number to a logical one using a IRQ domain,
  * and handle the result interrupt number. Return -EINVAL if
  * conversion failed.
  */
-int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
-			bool lookup, struct pt_regs *regs);
-
 int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
 
-static inline int handle_domain_irq(struct irq_domain *domain,
-				    unsigned int hwirq, struct pt_regs *regs)
-{
-	return __handle_domain_irq(domain, hwirq, true, regs);
-}
+#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+int handle_domain_irq(struct irq_domain *domain,
+		      unsigned int hwirq, struct pt_regs *regs);
 
-#ifdef CONFIG_IRQ_DOMAIN
 int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
 		      struct pt_regs *regs);
 #endif
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 6179d5bde88e9..f4dd5186858a4 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -659,7 +659,7 @@ int generic_handle_irq(unsigned int irq)
 }
 EXPORT_SYMBOL_GPL(generic_handle_irq);
 
-#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+#ifdef CONFIG_IRQ_DOMAIN
 /**
  * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
  *                             to a domain, usually for a non-root interrupt
@@ -676,9 +676,10 @@ int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
 }
 EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
 
+#ifdef CONFIG_HANDLE_DOMAIN_IRQ
 /**
- * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain,
- *                       usually for a root interrupt controller
+ * handle_domain_irq - Invoke the handler for a HW irq belonging to a domain,
+ *                     usually for a root interrupt controller
  * @domain:	The domain where to perform the lookup
  * @hwirq:	The HW irq number to convert to a logical one
  * @lookup:	Whether to perform the domain lookup or not
@@ -686,8 +687,8 @@ EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
  *
  * Returns:	0 on success, or -EINVAL if conversion has failed
  */
-int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
-			bool lookup, struct pt_regs *regs)
+int handle_domain_irq(struct irq_domain *domain,
+		      unsigned int hwirq, struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	struct irq_desc *desc;
@@ -695,22 +696,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 
 	irq_enter();
 
-	if (likely(IS_ENABLED(CONFIG_IRQ_DOMAIN) && lookup)) {
-		/* The irqdomain code provides boundary checks */
-		desc = irq_resolve_mapping(domain, hwirq);
-	} else {
-		/*
-		 * Some hardware gives randomly wrong interrupts.  Rather
-		 * than crashing, do something sensible.
-		 */
-		if (unlikely(!hwirq || hwirq >= nr_irqs)) {
-			ack_bad_irq(hwirq);
-			desc = NULL;
-		} else {
-			desc = irq_to_desc(hwirq);
-		}
-	}
-
+	/* The irqdomain code provides boundary checks */
+	desc = irq_resolve_mapping(domain, hwirq);
 	if (likely(desc))
 		handle_irq_desc(desc);
 	else
@@ -721,7 +708,6 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 	return ret;
 }
 
-#ifdef CONFIG_IRQ_DOMAIN
 /**
  * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain
  * @domain:	The domain where to perform the lookup
-- 
GitLab


From 046a6ee2343bb26d85a9973a39ccdb9764236fa4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 4 May 2021 17:42:18 +0100
Subject: [PATCH 2610/3804] irqchip: Bulk conversion to
 generic_handle_domain_irq()

Wherever possible, replace constructs that match either
generic_handle_irq(irq_find_mapping()) or
generic_handle_irq(irq_linear_revmap()) to a single call to
generic_handle_domain_irq().

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/exynos-combiner.c      | 10 ++++------
 drivers/irqchip/irq-al-fic.c           |  7 ++-----
 drivers/irqchip/irq-armada-370-xp.c    | 19 ++++++++-----------
 drivers/irqchip/irq-aspeed-i2c-ic.c    |  8 +++-----
 drivers/irqchip/irq-aspeed-scu-ic.c    |  6 ++----
 drivers/irqchip/irq-ath79-misc.c       |  2 +-
 drivers/irqchip/irq-bcm2835.c          |  2 +-
 drivers/irqchip/irq-bcm2836.c          |  2 +-
 drivers/irqchip/irq-bcm7038-l1.c       |  6 ++----
 drivers/irqchip/irq-bcm7120-l2.c       |  6 ++----
 drivers/irqchip/irq-brcmstb-l2.c       |  2 +-
 drivers/irqchip/irq-dw-apb-ictl.c      |  3 +--
 drivers/irqchip/irq-gic.c              | 13 +++++--------
 drivers/irqchip/irq-goldfish-pic.c     |  5 ++---
 drivers/irqchip/irq-i8259.c            |  4 +---
 drivers/irqchip/irq-idt3243x.c         |  6 ++----
 drivers/irqchip/irq-imgpdc.c           | 11 ++++-------
 drivers/irqchip/irq-imx-intmux.c       |  9 +++------
 drivers/irqchip/irq-imx-irqsteer.c     |  9 +++------
 drivers/irqchip/irq-ingenic-tcu.c      |  2 +-
 drivers/irqchip/irq-ingenic.c          |  3 +--
 drivers/irqchip/irq-keystone.c         | 14 ++++++--------
 drivers/irqchip/irq-loongson-htpic.c   |  2 +-
 drivers/irqchip/irq-loongson-htvec.c   |  4 ++--
 drivers/irqchip/irq-loongson-liointc.c |  2 +-
 drivers/irqchip/irq-lpc32xx.c          |  2 +-
 drivers/irqchip/irq-ls-scfg-msi.c      |  6 ++----
 drivers/irqchip/irq-ls1x.c             |  2 +-
 drivers/irqchip/irq-mips-gic.c         | 20 ++++++++++----------
 drivers/irqchip/irq-mscc-ocelot.c      |  2 +-
 drivers/irqchip/irq-mvebu-pic.c        |  7 ++-----
 drivers/irqchip/irq-mvebu-sei.c        | 13 ++++---------
 drivers/irqchip/irq-orion.c            |  2 +-
 drivers/irqchip/irq-partition-percpu.c |  9 +++------
 drivers/irqchip/irq-pruss-intc.c       |  9 +++------
 drivers/irqchip/irq-realtek-rtl.c      |  2 +-
 drivers/irqchip/irq-renesas-irqc.c     |  2 +-
 drivers/irqchip/irq-sifive-plic.c      |  8 +++-----
 drivers/irqchip/irq-stm32-exti.c       | 10 ++++------
 drivers/irqchip/irq-sunxi-nmi.c        |  3 +--
 drivers/irqchip/irq-tb10x.c            |  2 +-
 drivers/irqchip/irq-ti-sci-inta.c      |  9 +++------
 drivers/irqchip/irq-ts4800.c           |  3 +--
 drivers/irqchip/irq-versatile-fpga.c   |  2 +-
 drivers/irqchip/irq-vic.c              |  2 +-
 drivers/irqchip/irq-xilinx-intc.c      | 23 +++++------------------
 drivers/irqchip/qcom-irq-combiner.c    |  6 +-----
 47 files changed, 111 insertions(+), 190 deletions(-)

diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c
index 0b85d9a3fbff8..14106126cbf32 100644
--- a/drivers/irqchip/exynos-combiner.c
+++ b/drivers/irqchip/exynos-combiner.c
@@ -66,8 +66,9 @@ static void combiner_handle_cascade_irq(struct irq_desc *desc)
 {
 	struct combiner_chip_data *chip_data = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int cascade_irq, combiner_irq;
+	unsigned int combiner_irq;
 	unsigned long status;
+	int ret;
 
 	chained_irq_enter(chip, desc);
 
@@ -80,12 +81,9 @@ static void combiner_handle_cascade_irq(struct irq_desc *desc)
 		goto out;
 
 	combiner_irq = chip_data->hwirq_offset + __ffs(status);
-	cascade_irq = irq_find_mapping(combiner_irq_domain, combiner_irq);
-
-	if (unlikely(!cascade_irq))
+	ret = generic_handle_domain_irq(combiner_irq_domain, combiner_irq);
+	if (unlikely(ret))
 		handle_bad_irq(desc);
-	else
-		generic_handle_irq(cascade_irq);
 
  out:
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-al-fic.c b/drivers/irqchip/irq-al-fic.c
index 0b0a737397562..886de028a9010 100644
--- a/drivers/irqchip/irq-al-fic.c
+++ b/drivers/irqchip/irq-al-fic.c
@@ -111,7 +111,6 @@ static void al_fic_irq_handler(struct irq_desc *desc)
 	struct irq_chip *irqchip = irq_desc_get_chip(desc);
 	struct irq_chip_generic *gc = irq_get_domain_generic_chip(domain, 0);
 	unsigned long pending;
-	unsigned int irq;
 	u32 hwirq;
 
 	chained_irq_enter(irqchip, desc);
@@ -119,10 +118,8 @@ static void al_fic_irq_handler(struct irq_desc *desc)
 	pending = readl_relaxed(fic->base + AL_FIC_CAUSE);
 	pending &= ~gc->mask_cache;
 
-	for_each_set_bit(hwirq, &pending, NR_FIC_IRQS) {
-		irq = irq_find_mapping(domain, hwirq);
-		generic_handle_irq(irq);
-	}
+	for_each_set_bit(hwirq, &pending, NR_FIC_IRQS)
+		generic_handle_domain_irq(domain, hwirq);
 
 	chained_irq_exit(irqchip, desc);
 }
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 32938dfc0e466..7557ab5512953 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -582,20 +582,19 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *regs, bool is_chained)
 
 	for (msinr = PCI_MSI_DOORBELL_START;
 	     msinr < PCI_MSI_DOORBELL_END; msinr++) {
-		int irq;
+		unsigned int irq;
 
 		if (!(msimask & BIT(msinr)))
 			continue;
 
-		if (is_chained) {
-			irq = irq_find_mapping(armada_370_xp_msi_inner_domain,
-					       msinr - PCI_MSI_DOORBELL_START);
-			generic_handle_irq(irq);
-		} else {
-			irq = msinr - PCI_MSI_DOORBELL_START;
+		irq = msinr - PCI_MSI_DOORBELL_START;
+
+		if (is_chained)
+			generic_handle_domain_irq(armada_370_xp_msi_inner_domain,
+						  irq);
+		else
 			handle_domain_irq(armada_370_xp_msi_inner_domain,
 					  irq, regs);
-		}
 	}
 }
 #else
@@ -606,7 +605,6 @@ static void armada_370_xp_mpic_handle_cascade_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned long irqmap, irqn, irqsrc, cpuid;
-	unsigned int cascade_irq;
 
 	chained_irq_enter(chip, desc);
 
@@ -628,8 +626,7 @@ static void armada_370_xp_mpic_handle_cascade_irq(struct irq_desc *desc)
 			continue;
 		}
 
-		cascade_irq = irq_find_mapping(armada_370_xp_mpic_domain, irqn);
-		generic_handle_irq(cascade_irq);
+		generic_handle_domain_irq(armada_370_xp_mpic_domain, irqn);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-aspeed-i2c-ic.c b/drivers/irqchip/irq-aspeed-i2c-ic.c
index 8d591c179f812..a47db16ff9603 100644
--- a/drivers/irqchip/irq-aspeed-i2c-ic.c
+++ b/drivers/irqchip/irq-aspeed-i2c-ic.c
@@ -34,14 +34,12 @@ static void aspeed_i2c_ic_irq_handler(struct irq_desc *desc)
 	struct aspeed_i2c_ic *i2c_ic = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned long bit, status;
-	unsigned int bus_irq;
 
 	chained_irq_enter(chip, desc);
 	status = readl(i2c_ic->base);
-	for_each_set_bit(bit, &status, ASPEED_I2C_IC_NUM_BUS) {
-		bus_irq = irq_find_mapping(i2c_ic->irq_domain, bit);
-		generic_handle_irq(bus_irq);
-	}
+	for_each_set_bit(bit, &status, ASPEED_I2C_IC_NUM_BUS)
+		generic_handle_domain_irq(i2c_ic->irq_domain, bit);
+
 	chained_irq_exit(chip, desc);
 }
 
diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c
index c90a3346b9857..f3c6855a4cefb 100644
--- a/drivers/irqchip/irq-aspeed-scu-ic.c
+++ b/drivers/irqchip/irq-aspeed-scu-ic.c
@@ -44,7 +44,6 @@ struct aspeed_scu_ic {
 
 static void aspeed_scu_ic_irq_handler(struct irq_desc *desc)
 {
-	unsigned int irq;
 	unsigned int sts;
 	unsigned long bit;
 	unsigned long enabled;
@@ -74,9 +73,8 @@ static void aspeed_scu_ic_irq_handler(struct irq_desc *desc)
 	max = scu_ic->num_irqs + bit;
 
 	for_each_set_bit_from(bit, &status, max) {
-		irq = irq_find_mapping(scu_ic->irq_domain,
-				       bit - scu_ic->irq_shift);
-		generic_handle_irq(irq);
+		generic_handle_domain_irq(scu_ic->irq_domain,
+					  bit - scu_ic->irq_shift);
 
 		regmap_update_bits(scu_ic->scu, scu_ic->reg, mask,
 				   BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT));
diff --git a/drivers/irqchip/irq-ath79-misc.c b/drivers/irqchip/irq-ath79-misc.c
index 3d641bb6f3f1f..92f001a5ff8dd 100644
--- a/drivers/irqchip/irq-ath79-misc.c
+++ b/drivers/irqchip/irq-ath79-misc.c
@@ -50,7 +50,7 @@ static void ath79_misc_irq_handler(struct irq_desc *desc)
 	while (pending) {
 		int bit = __ffs(pending);
 
-		generic_handle_irq(irq_linear_revmap(domain, bit));
+		generic_handle_domain_irq(domain, bit);
 		pending &= ~BIT(bit);
 	}
 
diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c
index a1e004af23e78..adc1556ed3322 100644
--- a/drivers/irqchip/irq-bcm2835.c
+++ b/drivers/irqchip/irq-bcm2835.c
@@ -254,7 +254,7 @@ static void bcm2836_chained_handle_irq(struct irq_desc *desc)
 	u32 hwirq;
 
 	while ((hwirq = get_next_armctrl_hwirq()) != ~0)
-		generic_handle_irq(irq_linear_revmap(intc.domain, hwirq));
+		generic_handle_domain_irq(intc.domain, hwirq);
 }
 
 IRQCHIP_DECLARE(bcm2835_armctrl_ic, "brcm,bcm2835-armctrl-ic",
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index 25c9a9c06e410..501facdb4570e 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -161,7 +161,7 @@ static void bcm2836_arm_irqchip_handle_ipi(struct irq_desc *desc)
 	mbox_val = readl_relaxed(intc.base + LOCAL_MAILBOX0_CLR0 + 16 * cpu);
 	if (mbox_val) {
 		int hwirq = ffs(mbox_val) - 1;
-		generic_handle_irq(irq_find_mapping(ipi_domain, hwirq));
+		generic_handle_domain_irq(ipi_domain, hwirq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c
index 9dc9bf8cdcc49..a035c385ca7aa 100644
--- a/drivers/irqchip/irq-bcm7038-l1.c
+++ b/drivers/irqchip/irq-bcm7038-l1.c
@@ -145,10 +145,8 @@ static void bcm7038_l1_irq_handle(struct irq_desc *desc)
 			  ~cpu->mask_cache[idx];
 		raw_spin_unlock_irqrestore(&intc->lock, flags);
 
-		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) {
-			generic_handle_irq(irq_find_mapping(intc->domain,
-							    base + hwirq));
-		}
+		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD)
+			generic_handle_domain_irq(intc->domain, base + hwirq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
index ad59656ccc282..f23d7651ea847 100644
--- a/drivers/irqchip/irq-bcm7120-l2.c
+++ b/drivers/irqchip/irq-bcm7120-l2.c
@@ -74,10 +74,8 @@ static void bcm7120_l2_intc_irq_handle(struct irq_desc *desc)
 					    data->irq_map_mask[idx];
 		irq_gc_unlock(gc);
 
-		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) {
-			generic_handle_irq(irq_find_mapping(b->domain,
-					   base + hwirq));
-		}
+		for_each_set_bit(hwirq, &pending, IRQS_PER_WORD)
+			generic_handle_domain_irq(b->domain, base + hwirq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index cdd6a42d4efa4..8e0911561f2d1 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -110,7 +110,7 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc)
 	do {
 		irq = ffs(status) - 1;
 		status &= ~(1 << irq);
-		generic_handle_irq(irq_linear_revmap(b->domain, irq));
+		generic_handle_domain_irq(b->domain, irq);
 	} while (status);
 out:
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-dw-apb-ictl.c b/drivers/irqchip/irq-dw-apb-ictl.c
index 54b09d6c407cd..a67266e44491f 100644
--- a/drivers/irqchip/irq-dw-apb-ictl.c
+++ b/drivers/irqchip/irq-dw-apb-ictl.c
@@ -62,9 +62,8 @@ static void dw_apb_ictl_handle_irq_cascaded(struct irq_desc *desc)
 
 		while (stat) {
 			u32 hwirq = ffs(stat) - 1;
-			u32 virq = irq_find_mapping(d, gc->irq_base + hwirq);
+			generic_handle_domain_irq(d, gc->irq_base + hwirq);
 
-			generic_handle_irq(virq);
 			stat &= ~BIT(hwirq);
 		}
 	}
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index b1d9c22caf2e6..46c9c5fafdbc1 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -375,8 +375,9 @@ static void gic_handle_cascade_irq(struct irq_desc *desc)
 {
 	struct gic_chip_data *chip_data = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int cascade_irq, gic_irq;
+	unsigned int gic_irq;
 	unsigned long status;
+	int ret;
 
 	chained_irq_enter(chip, desc);
 
@@ -386,14 +387,10 @@ static void gic_handle_cascade_irq(struct irq_desc *desc)
 	if (gic_irq == GICC_INT_SPURIOUS)
 		goto out;
 
-	cascade_irq = irq_find_mapping(chip_data->domain, gic_irq);
-	if (unlikely(gic_irq < 32 || gic_irq > 1020)) {
+	isb();
+	ret = generic_handle_domain_irq(chip_data->domain, gic_irq);
+	if (unlikely(ret))
 		handle_bad_irq(desc);
-	} else {
-		isb();
-		generic_handle_irq(cascade_irq);
-	}
-
  out:
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/irqchip/irq-goldfish-pic.c b/drivers/irqchip/irq-goldfish-pic.c
index 4f021530e7f31..513f6edbbe953 100644
--- a/drivers/irqchip/irq-goldfish-pic.c
+++ b/drivers/irqchip/irq-goldfish-pic.c
@@ -34,15 +34,14 @@ static void goldfish_pic_cascade(struct irq_desc *desc)
 {
 	struct goldfish_pic_data *gfpic = irq_desc_get_handler_data(desc);
 	struct irq_chip *host_chip = irq_desc_get_chip(desc);
-	u32 pending, hwirq, virq;
+	u32 pending, hwirq;
 
 	chained_irq_enter(host_chip, desc);
 
 	pending = readl(gfpic->base + GFPIC_REG_IRQ_PENDING);
 	while (pending) {
 		hwirq = __fls(pending);
-		virq = irq_linear_revmap(gfpic->irq_domain, hwirq);
-		generic_handle_irq(virq);
+		generic_handle_domain_irq(gfpic->irq_domain, hwirq);
 		pending &= ~(1 << hwirq);
 	}
 
diff --git a/drivers/irqchip/irq-i8259.c b/drivers/irqchip/irq-i8259.c
index b6f6aa7b28625..b70ce0d3c092e 100644
--- a/drivers/irqchip/irq-i8259.c
+++ b/drivers/irqchip/irq-i8259.c
@@ -333,13 +333,11 @@ static void i8259_irq_dispatch(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	int hwirq = i8259_poll();
-	unsigned int irq;
 
 	if (hwirq < 0)
 		return;
 
-	irq = irq_linear_revmap(domain, hwirq);
-	generic_handle_irq(irq);
+	generic_handle_domain_irq(domain, hwirq);
 }
 
 int __init i8259_of_init(struct device_node *node, struct device_node *parent)
diff --git a/drivers/irqchip/irq-idt3243x.c b/drivers/irqchip/irq-idt3243x.c
index f0996820077a9..0732a0e9af629 100644
--- a/drivers/irqchip/irq-idt3243x.c
+++ b/drivers/irqchip/irq-idt3243x.c
@@ -28,7 +28,7 @@ static void idt_irq_dispatch(struct irq_desc *desc)
 {
 	struct idt_pic_data *idtpic = irq_desc_get_handler_data(desc);
 	struct irq_chip *host_chip = irq_desc_get_chip(desc);
-	u32 pending, hwirq, virq;
+	u32 pending, hwirq;
 
 	chained_irq_enter(host_chip, desc);
 
@@ -36,9 +36,7 @@ static void idt_irq_dispatch(struct irq_desc *desc)
 	pending &= ~idtpic->gc->mask_cache;
 	while (pending) {
 		hwirq = __fls(pending);
-		virq = irq_linear_revmap(idtpic->irq_domain, hwirq);
-		if (virq)
-			generic_handle_irq(virq);
+		generic_handle_domain_irq(idtpic->irq_domain, hwirq);
 		pending &= ~(1 << hwirq);
 	}
 
diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c
index 698d07f48fed8..646dfbf4890bd 100644
--- a/drivers/irqchip/irq-imgpdc.c
+++ b/drivers/irqchip/irq-imgpdc.c
@@ -223,7 +223,7 @@ static void pdc_intc_perip_isr(struct irq_desc *desc)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct pdc_intc_priv *priv;
-	unsigned int i, irq_no;
+	unsigned int i;
 
 	priv = (struct pdc_intc_priv *)irq_desc_get_handler_data(desc);
 
@@ -237,14 +237,13 @@ static void pdc_intc_perip_isr(struct irq_desc *desc)
 found:
 
 	/* pass on the interrupt */
-	irq_no = irq_linear_revmap(priv->domain, i);
-	generic_handle_irq(irq_no);
+	generic_handle_domain_irq(priv->domain, i);
 }
 
 static void pdc_intc_syswake_isr(struct irq_desc *desc)
 {
 	struct pdc_intc_priv *priv;
-	unsigned int syswake, irq_no;
+	unsigned int syswake;
 	unsigned int status;
 
 	priv = (struct pdc_intc_priv *)irq_desc_get_handler_data(desc);
@@ -258,9 +257,7 @@ static void pdc_intc_syswake_isr(struct irq_desc *desc)
 		if (!(status & 1))
 			continue;
 
-		irq_no = irq_linear_revmap(priv->domain,
-					   syswake_to_hwirq(syswake));
-		generic_handle_irq(irq_no);
+		generic_handle_domain_irq(priv->domain, syswake_to_hwirq(syswake));
 	}
 }
 
diff --git a/drivers/irqchip/irq-imx-intmux.c b/drivers/irqchip/irq-imx-intmux.c
index 7709f9712cb3b..e86ff743e98c3 100644
--- a/drivers/irqchip/irq-imx-intmux.c
+++ b/drivers/irqchip/irq-imx-intmux.c
@@ -182,18 +182,15 @@ static void imx_intmux_irq_handler(struct irq_desc *desc)
 	struct intmux_data *data = container_of(irqchip_data, struct intmux_data,
 						irqchip_data[idx]);
 	unsigned long irqstat;
-	int pos, virq;
+	int pos;
 
 	chained_irq_enter(irq_desc_get_chip(desc), desc);
 
 	/* read the interrupt source pending status of this channel */
 	irqstat = readl_relaxed(data->regs + CHANIPR(idx));
 
-	for_each_set_bit(pos, &irqstat, 32) {
-		virq = irq_find_mapping(irqchip_data->domain, pos);
-		if (virq)
-			generic_handle_irq(virq);
-	}
+	for_each_set_bit(pos, &irqstat, 32)
+		generic_handle_domain_irq(irqchip_data->domain, pos);
 
 	chained_irq_exit(irq_desc_get_chip(desc), desc);
 }
diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c
index 1edf7692a790b..8d91a02593fc2 100644
--- a/drivers/irqchip/irq-imx-irqsteer.c
+++ b/drivers/irqchip/irq-imx-irqsteer.c
@@ -122,7 +122,7 @@ static void imx_irqsteer_irq_handler(struct irq_desc *desc)
 	for (i = 0; i < 2; i++, hwirq += 32) {
 		int idx = imx_irqsteer_get_reg_index(data, hwirq);
 		unsigned long irqmap;
-		int pos, virq;
+		int pos;
 
 		if (hwirq >= data->reg_num * 32)
 			break;
@@ -130,11 +130,8 @@ static void imx_irqsteer_irq_handler(struct irq_desc *desc)
 		irqmap = readl_relaxed(data->regs +
 				       CHANSTATUS(idx, data->reg_num));
 
-		for_each_set_bit(pos, &irqmap, 32) {
-			virq = irq_find_mapping(data->domain, pos + hwirq);
-			if (virq)
-				generic_handle_irq(virq);
-		}
+		for_each_set_bit(pos, &irqmap, 32)
+			generic_handle_domain_irq(data->domain, pos + hwirq);
 	}
 
 	chained_irq_exit(irq_desc_get_chip(desc), desc);
diff --git a/drivers/irqchip/irq-ingenic-tcu.c b/drivers/irqchip/irq-ingenic-tcu.c
index b938d1d04d96e..34a7d261b7103 100644
--- a/drivers/irqchip/irq-ingenic-tcu.c
+++ b/drivers/irqchip/irq-ingenic-tcu.c
@@ -38,7 +38,7 @@ static void ingenic_tcu_intc_cascade(struct irq_desc *desc)
 	irq_reg &= ~irq_mask;
 
 	for_each_set_bit(i, (unsigned long *)&irq_reg, 32)
-		generic_handle_irq(irq_linear_revmap(domain, i));
+		generic_handle_domain_irq(domain, i);
 
 	chained_irq_exit(irq_chip, desc);
 }
diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c
index ea36bb00be80b..cee839ca627ee 100644
--- a/drivers/irqchip/irq-ingenic.c
+++ b/drivers/irqchip/irq-ingenic.c
@@ -49,8 +49,7 @@ static irqreturn_t intc_cascade(int irq, void *data)
 		while (pending) {
 			int bit = __fls(pending);
 
-			irq = irq_linear_revmap(domain, bit + (i * 32));
-			generic_handle_irq(irq);
+			generic_handle_domain_irq(domain, bit + (i * 32));
 			pending &= ~BIT(bit);
 		}
 	}
diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c
index 8118ebe80b09e..d47c8041e5bc1 100644
--- a/drivers/irqchip/irq-keystone.c
+++ b/drivers/irqchip/irq-keystone.c
@@ -89,7 +89,7 @@ static irqreturn_t keystone_irq_handler(int irq, void *keystone_irq)
 	struct keystone_irq_device *kirq = keystone_irq;
 	unsigned long wa_lock_flags;
 	unsigned long pending;
-	int src, virq;
+	int src, err;
 
 	dev_dbg(kirq->dev, "start irq %d\n", irq);
 
@@ -104,16 +104,14 @@ static irqreturn_t keystone_irq_handler(int irq, void *keystone_irq)
 
 	for (src = 0; src < KEYSTONE_N_IRQ; src++) {
 		if (BIT(src) & pending) {
-			virq = irq_find_mapping(kirq->irqd, src);
-			dev_dbg(kirq->dev, "dispatch bit %d, virq %d\n",
-				src, virq);
-			if (!virq)
-				dev_warn(kirq->dev, "spurious irq detected hwirq %d, virq %d\n",
-					 src, virq);
 			raw_spin_lock_irqsave(&kirq->wa_lock, wa_lock_flags);
-			generic_handle_irq(virq);
+			err = generic_handle_domain_irq(kirq->irqd, src);
 			raw_spin_unlock_irqrestore(&kirq->wa_lock,
 						   wa_lock_flags);
+
+			if (err)
+				dev_warn_ratelimited(kirq->dev, "spurious irq detected hwirq %d\n",
+						     src);
 		}
 	}
 
diff --git a/drivers/irqchip/irq-loongson-htpic.c b/drivers/irqchip/irq-loongson-htpic.c
index 1b801c4fb026f..f4abdf156de71 100644
--- a/drivers/irqchip/irq-loongson-htpic.c
+++ b/drivers/irqchip/irq-loongson-htpic.c
@@ -48,7 +48,7 @@ static void htpic_irq_dispatch(struct irq_desc *desc)
 			break;
 		}
 
-		generic_handle_irq(irq_linear_revmap(priv->domain, bit));
+		generic_handle_domain_irq(priv->domain, bit);
 		pending &= ~BIT(bit);
 	}
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c
index 6392aafb9a631..60a335d7e64ed 100644
--- a/drivers/irqchip/irq-loongson-htvec.c
+++ b/drivers/irqchip/irq-loongson-htvec.c
@@ -47,8 +47,8 @@ static void htvec_irq_dispatch(struct irq_desc *desc)
 		while (pending) {
 			int bit = __ffs(pending);
 
-			generic_handle_irq(irq_linear_revmap(priv->htvec_domain, bit +
-							     VEC_COUNT_PER_REG * i));
+			generic_handle_domain_irq(priv->htvec_domain,
+						  bit + VEC_COUNT_PER_REG * i);
 			pending &= ~BIT(bit);
 			handled = true;
 		}
diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c
index 8ccb304218069..649c583916188 100644
--- a/drivers/irqchip/irq-loongson-liointc.c
+++ b/drivers/irqchip/irq-loongson-liointc.c
@@ -73,7 +73,7 @@ static void liointc_chained_handle_irq(struct irq_desc *desc)
 	while (pending) {
 		int bit = __ffs(pending);
 
-		generic_handle_irq(irq_find_mapping(gc->domain, bit));
+		generic_handle_domain_irq(gc->domain, bit);
 		pending &= ~BIT(bit);
 	}
 
diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c
index 7d9b388afe643..5e6f6e25f2aea 100644
--- a/drivers/irqchip/irq-lpc32xx.c
+++ b/drivers/irqchip/irq-lpc32xx.c
@@ -141,7 +141,7 @@ static void lpc32xx_sic_handler(struct irq_desc *desc)
 	while (hwirq) {
 		irq = __ffs(hwirq);
 		hwirq &= ~BIT(irq);
-		generic_handle_irq(irq_find_mapping(ic->domain, irq));
+		generic_handle_domain_irq(ic->domain, irq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c
index 61dbfda08527e..55322da51c56a 100644
--- a/drivers/irqchip/irq-ls-scfg-msi.c
+++ b/drivers/irqchip/irq-ls-scfg-msi.c
@@ -194,7 +194,7 @@ static void ls_scfg_msi_irq_handler(struct irq_desc *desc)
 	struct ls_scfg_msir *msir = irq_desc_get_handler_data(desc);
 	struct ls_scfg_msi *msi_data = msir->msi_data;
 	unsigned long val;
-	int pos, size, virq, hwirq;
+	int pos, size, hwirq;
 
 	chained_irq_enter(irq_desc_get_chip(desc), desc);
 
@@ -206,9 +206,7 @@ static void ls_scfg_msi_irq_handler(struct irq_desc *desc)
 	for_each_set_bit_from(pos, &val, size) {
 		hwirq = ((msir->bit_end - pos) << msi_data->cfg->ibs_shift) |
 			msir->srs;
-		virq = irq_find_mapping(msi_data->parent, hwirq);
-		if (virq)
-			generic_handle_irq(virq);
+		generic_handle_domain_irq(msi_data->parent, hwirq);
 	}
 
 	chained_irq_exit(irq_desc_get_chip(desc), desc);
diff --git a/drivers/irqchip/irq-ls1x.c b/drivers/irqchip/irq-ls1x.c
index 353111a104133..77a3f7dfaaf06 100644
--- a/drivers/irqchip/irq-ls1x.c
+++ b/drivers/irqchip/irq-ls1x.c
@@ -50,7 +50,7 @@ static void ls1x_chained_handle_irq(struct irq_desc *desc)
 	while (pending) {
 		int bit = __ffs(pending);
 
-		generic_handle_irq(irq_find_mapping(priv->domain, bit));
+		generic_handle_domain_irq(priv->domain, bit);
 		pending &= ~BIT(bit);
 	}
 
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index a2cbf0acff1c6..b146e069bf5ba 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -148,7 +148,7 @@ int gic_get_c0_fdc_int(void)
 
 static void gic_handle_shared_int(bool chained)
 {
-	unsigned int intr, virq;
+	unsigned int intr;
 	unsigned long *pcpu_mask;
 	DECLARE_BITMAP(pending, GIC_MAX_INTRS);
 
@@ -165,12 +165,12 @@ static void gic_handle_shared_int(bool chained)
 	bitmap_and(pending, pending, pcpu_mask, gic_shared_intrs);
 
 	for_each_set_bit(intr, pending, gic_shared_intrs) {
-		virq = irq_linear_revmap(gic_irq_domain,
-					 GIC_SHARED_TO_HWIRQ(intr));
 		if (chained)
-			generic_handle_irq(virq);
+			generic_handle_domain_irq(gic_irq_domain,
+						  GIC_SHARED_TO_HWIRQ(intr));
 		else
-			do_IRQ(virq);
+			do_IRQ(irq_find_mapping(gic_irq_domain,
+						GIC_SHARED_TO_HWIRQ(intr)));
 	}
 }
 
@@ -308,7 +308,7 @@ static struct irq_chip gic_edge_irq_controller = {
 static void gic_handle_local_int(bool chained)
 {
 	unsigned long pending, masked;
-	unsigned int intr, virq;
+	unsigned int intr;
 
 	pending = read_gic_vl_pend();
 	masked = read_gic_vl_mask();
@@ -316,12 +316,12 @@ static void gic_handle_local_int(bool chained)
 	bitmap_and(&pending, &pending, &masked, GIC_NUM_LOCAL_INTRS);
 
 	for_each_set_bit(intr, &pending, GIC_NUM_LOCAL_INTRS) {
-		virq = irq_linear_revmap(gic_irq_domain,
-					 GIC_LOCAL_TO_HWIRQ(intr));
 		if (chained)
-			generic_handle_irq(virq);
+			generic_handle_domain_irq(gic_irq_domain,
+						  GIC_LOCAL_TO_HWIRQ(intr));
 		else
-			do_IRQ(virq);
+			do_IRQ(irq_find_mapping(gic_irq_domain,
+						GIC_LOCAL_TO_HWIRQ(intr)));
 	}
 }
 
diff --git a/drivers/irqchip/irq-mscc-ocelot.c b/drivers/irqchip/irq-mscc-ocelot.c
index 8235d98650c1e..4d0c3532dbe73 100644
--- a/drivers/irqchip/irq-mscc-ocelot.c
+++ b/drivers/irqchip/irq-mscc-ocelot.c
@@ -107,7 +107,7 @@ static void ocelot_irq_handler(struct irq_desc *desc)
 	while (reg) {
 		u32 hwirq = __fls(reg);
 
-		generic_handle_irq(irq_find_mapping(d, hwirq));
+		generic_handle_domain_irq(d, hwirq);
 		reg &= ~(BIT(hwirq));
 	}
 
diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c
index eec63951129ad..dc1cee4b0fe10 100644
--- a/drivers/irqchip/irq-mvebu-pic.c
+++ b/drivers/irqchip/irq-mvebu-pic.c
@@ -91,15 +91,12 @@ static void mvebu_pic_handle_cascade_irq(struct irq_desc *desc)
 	struct mvebu_pic *pic = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	unsigned long irqmap, irqn;
-	unsigned int cascade_irq;
 
 	irqmap = readl_relaxed(pic->base + PIC_CAUSE);
 	chained_irq_enter(chip, desc);
 
-	for_each_set_bit(irqn, &irqmap, BITS_PER_LONG) {
-		cascade_irq = irq_find_mapping(pic->domain, irqn);
-		generic_handle_irq(cascade_irq);
-	}
+	for_each_set_bit(irqn, &irqmap, BITS_PER_LONG)
+		generic_handle_domain_irq(pic->domain, irqn);
 
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index 3a7b7a7f20ca7..4ecef6d83777a 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -337,17 +337,12 @@ static void mvebu_sei_handle_cascade_irq(struct irq_desc *desc)
 		irqmap = readl_relaxed(sei->base + GICP_SECR(idx));
 		for_each_set_bit(bit, &irqmap, SEI_IRQ_COUNT_PER_REG) {
 			unsigned long hwirq;
-			unsigned int virq;
+			int err;
 
 			hwirq = idx * SEI_IRQ_COUNT_PER_REG + bit;
-			virq = irq_find_mapping(sei->sei_domain, hwirq);
-			if (likely(virq)) {
-				generic_handle_irq(virq);
-				continue;
-			}
-
-			dev_warn(sei->dev,
-				 "Spurious IRQ detected (hwirq %lu)\n", hwirq);
+			err = generic_handle_domain_irq(sei->sei_domain, hwirq);
+			if (unlikely(err))
+				dev_warn(sei->dev, "Spurious IRQ detected (hwirq %lu)\n", hwirq);
 		}
 	}
 
diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c
index c4b5ffb619546..b6868f7b805aa 100644
--- a/drivers/irqchip/irq-orion.c
+++ b/drivers/irqchip/irq-orion.c
@@ -117,7 +117,7 @@ static void orion_bridge_irq_handler(struct irq_desc *desc)
 	while (stat) {
 		u32 hwirq = __fls(stat);
 
-		generic_handle_irq(irq_find_mapping(d, gc->irq_base + hwirq));
+		generic_handle_domain_irq(d, gc->irq_base + hwirq);
 		stat &= ~(1 << hwirq);
 	}
 }
diff --git a/drivers/irqchip/irq-partition-percpu.c b/drivers/irqchip/irq-partition-percpu.c
index 0c4c8ed7064ee..89c23a1566dcf 100644
--- a/drivers/irqchip/irq-partition-percpu.c
+++ b/drivers/irqchip/irq-partition-percpu.c
@@ -124,13 +124,10 @@ static void partition_handle_irq(struct irq_desc *desc)
 			break;
 	}
 
-	if (unlikely(hwirq == part->nr_parts)) {
+	if (unlikely(hwirq == part->nr_parts))
 		handle_bad_irq(desc);
-	} else {
-		unsigned int irq;
-		irq = irq_find_mapping(part->domain, hwirq);
-		generic_handle_irq(irq);
-	}
+	else
+		generic_handle_domain_irq(part->domain, hwirq);
 
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/irqchip/irq-pruss-intc.c b/drivers/irqchip/irq-pruss-intc.c
index 92fb5780dc10c..fa8d89b02ec06 100644
--- a/drivers/irqchip/irq-pruss-intc.c
+++ b/drivers/irqchip/irq-pruss-intc.c
@@ -488,8 +488,7 @@ static void pruss_intc_irq_handler(struct irq_desc *desc)
 
 	while (true) {
 		u32 hipir;
-		unsigned int virq;
-		int hwirq;
+		int hwirq, err;
 
 		/* get highest priority pending PRUSS system event */
 		hipir = pruss_intc_read_reg(intc, PRU_INTC_HIPIR(host_irq));
@@ -497,16 +496,14 @@ static void pruss_intc_irq_handler(struct irq_desc *desc)
 			break;
 
 		hwirq = hipir & GENMASK(9, 0);
-		virq = irq_find_mapping(intc->domain, hwirq);
+		err = generic_handle_domain_irq(intc->domain, hwirq);
 
 		/*
 		 * NOTE: manually ACK any system events that do not have a
 		 * handler mapped yet
 		 */
-		if (WARN_ON_ONCE(!virq))
+		if (WARN_ON_ONCE(err))
 			pruss_intc_write_reg(intc, PRU_INTC_SICR, hwirq);
-		else
-			generic_handle_irq(virq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c
index b57c67dfab5b0..fd9f275592d29 100644
--- a/drivers/irqchip/irq-realtek-rtl.c
+++ b/drivers/irqchip/irq-realtek-rtl.c
@@ -85,7 +85,7 @@ static void realtek_irq_dispatch(struct irq_desc *desc)
 		goto out;
 	}
 	domain = irq_desc_get_handler_data(desc);
-	generic_handle_irq(irq_find_mapping(domain, __ffs(pending)));
+	generic_handle_domain_irq(domain, __ffs(pending));
 
 out:
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-renesas-irqc.c b/drivers/irqchip/irq-renesas-irqc.c
index 11abc09ef76c6..07a6d8b42b639 100644
--- a/drivers/irqchip/irq-renesas-irqc.c
+++ b/drivers/irqchip/irq-renesas-irqc.c
@@ -115,7 +115,7 @@ static irqreturn_t irqc_irq_handler(int irq, void *dev_id)
 	if (ioread32(p->iomem + DETECT_STATUS) & bit) {
 		iowrite32(bit, p->iomem + DETECT_STATUS);
 		irqc_dbg(i, "demux2");
-		generic_handle_irq(irq_find_mapping(p->irq_domain, i->hw_irq));
+		generic_handle_domain_irq(p->irq_domain, i->hw_irq);
 		return IRQ_HANDLED;
 	}
 	return IRQ_NONE;
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 97d4d04b0a80e..cf74cfa820453 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -233,13 +233,11 @@ static void plic_handle_irq(struct irq_desc *desc)
 	chained_irq_enter(chip, desc);
 
 	while ((hwirq = readl(claim))) {
-		int irq = irq_find_mapping(handler->priv->irqdomain, hwirq);
-
-		if (unlikely(irq <= 0))
+		int err = generic_handle_domain_irq(handler->priv->irqdomain,
+						    hwirq);
+		if (unlikely(err))
 			pr_warn_ratelimited("can't find mapping for hwirq %lu\n",
 					hwirq);
-		else
-			generic_handle_irq(irq);
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
index 4704f2ee5797a..33c76710f845f 100644
--- a/drivers/irqchip/irq-stm32-exti.c
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -257,7 +257,7 @@ static void stm32_irq_handler(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int virq, nbanks = domain->gc->num_chips;
+	unsigned int nbanks = domain->gc->num_chips;
 	struct irq_chip_generic *gc;
 	unsigned long pending;
 	int n, i, irq_base = 0;
@@ -268,11 +268,9 @@ static void stm32_irq_handler(struct irq_desc *desc)
 		gc = irq_get_domain_generic_chip(domain, irq_base);
 
 		while ((pending = stm32_exti_pending(gc))) {
-			for_each_set_bit(n, &pending, IRQS_PER_BANK) {
-				virq = irq_find_mapping(domain, irq_base + n);
-				generic_handle_irq(virq);
-			}
-		}
+			for_each_set_bit(n, &pending, IRQS_PER_BANK)
+				generic_handle_domain_irq(domain, irq_base + n);
+ 		}
 	}
 
 	chained_irq_exit(chip, desc);
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 9f2bd0c5d2899..21d49791f8552 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -88,10 +88,9 @@ static void sunxi_sc_nmi_handle_irq(struct irq_desc *desc)
 {
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	struct irq_chip *chip = irq_desc_get_chip(desc);
-	unsigned int virq = irq_find_mapping(domain, 0);
 
 	chained_irq_enter(chip, desc);
-	generic_handle_irq(virq);
+	generic_handle_domain_irq(domain, 0);
 	chained_irq_exit(chip, desc);
 }
 
diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c
index 9a63b02b81764..8a0e69298e830 100644
--- a/drivers/irqchip/irq-tb10x.c
+++ b/drivers/irqchip/irq-tb10x.c
@@ -91,7 +91,7 @@ static void tb10x_irq_cascade(struct irq_desc *desc)
 	struct irq_domain *domain = irq_desc_get_handler_data(desc);
 	unsigned int irq = irq_desc_get_irq(desc);
 
-	generic_handle_irq(irq_find_mapping(domain, irq));
+	generic_handle_domain_irq(domain, irq);
 }
 
 static int __init of_tb10x_init_irq(struct device_node *ictl,
diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
index ca1f593f4d13a..97f454ec376b0 100644
--- a/drivers/irqchip/irq-ti-sci-inta.c
+++ b/drivers/irqchip/irq-ti-sci-inta.c
@@ -147,7 +147,7 @@ static void ti_sci_inta_irq_handler(struct irq_desc *desc)
 	struct ti_sci_inta_vint_desc *vint_desc;
 	struct ti_sci_inta_irq_domain *inta;
 	struct irq_domain *domain;
-	unsigned int virq, bit;
+	unsigned int bit;
 	unsigned long val;
 
 	vint_desc = irq_desc_get_handler_data(desc);
@@ -159,11 +159,8 @@ static void ti_sci_inta_irq_handler(struct irq_desc *desc)
 	val = readq_relaxed(inta->base + vint_desc->vint_id * 0x1000 +
 			    VINT_STATUS_MASKED_OFFSET);
 
-	for_each_set_bit(bit, &val, MAX_EVENTS_PER_VINT) {
-		virq = irq_find_mapping(domain, vint_desc->events[bit].hwirq);
-		if (virq)
-			generic_handle_irq(virq);
-	}
+	for_each_set_bit(bit, &val, MAX_EVENTS_PER_VINT)
+		generic_handle_domain_irq(domain, vint_desc->events[bit].hwirq);
 
 	chained_irq_exit(irq_desc_get_chip(desc), desc);
 }
diff --git a/drivers/irqchip/irq-ts4800.c b/drivers/irqchip/irq-ts4800.c
index 2325fb3c482b8..34337a61b1eff 100644
--- a/drivers/irqchip/irq-ts4800.c
+++ b/drivers/irqchip/irq-ts4800.c
@@ -79,10 +79,9 @@ static void ts4800_ic_chained_handle_irq(struct irq_desc *desc)
 
 	do {
 		unsigned int bit = __ffs(status);
-		int irq = irq_find_mapping(data->domain, bit);
 
+		generic_handle_domain_irq(data->domain, bit);
 		status &= ~(1 << bit);
-		generic_handle_irq(irq);
 	} while (status);
 
 out:
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index f1386733d3bc1..75be350cf82f8 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -85,7 +85,7 @@ static void fpga_irq_handle(struct irq_desc *desc)
 		unsigned int irq = ffs(status) - 1;
 
 		status &= ~(1 << irq);
-		generic_handle_irq(irq_find_mapping(f->domain, irq));
+		generic_handle_domain_irq(f->domain, irq);
 	} while (status);
 
 out:
diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
index 62f3d29f90420..1e1f2d115257b 100644
--- a/drivers/irqchip/irq-vic.c
+++ b/drivers/irqchip/irq-vic.c
@@ -225,7 +225,7 @@ static void vic_handle_irq_cascaded(struct irq_desc *desc)
 
 	while ((stat = readl_relaxed(vic->base + VIC_IRQ_STATUS))) {
 		hwirq = ffs(stat) - 1;
-		generic_handle_irq(irq_find_mapping(vic->domain, hwirq));
+		generic_handle_domain_irq(vic->domain, hwirq);
 	}
 
 	chained_irq_exit(host_chip, desc);
diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c
index 8cd1bfc730572..356a59755d637 100644
--- a/drivers/irqchip/irq-xilinx-intc.c
+++ b/drivers/irqchip/irq-xilinx-intc.c
@@ -110,20 +110,6 @@ static struct irq_chip intc_dev = {
 	.irq_mask_ack = intc_mask_ack,
 };
 
-static unsigned int xintc_get_irq_local(struct xintc_irq_chip *irqc)
-{
-	unsigned int irq = 0;
-	u32 hwirq;
-
-	hwirq = xintc_read(irqc, IVR);
-	if (hwirq != -1U)
-		irq = irq_find_mapping(irqc->root_domain, hwirq);
-
-	pr_debug("irq-xilinx: hwirq=%d, irq=%d\n", hwirq, irq);
-
-	return irq;
-}
-
 unsigned int xintc_get_irq(void)
 {
 	unsigned int irq = -1;
@@ -164,15 +150,16 @@ static void xil_intc_irq_handler(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct xintc_irq_chip *irqc;
-	u32 pending;
 
 	irqc = irq_data_get_irq_handler_data(&desc->irq_data);
 	chained_irq_enter(chip, desc);
 	do {
-		pending = xintc_get_irq_local(irqc);
-		if (pending == 0)
+		u32 hwirq = xintc_read(irqc, IVR);
+
+		if (hwirq == -1U)
 			break;
-		generic_handle_irq(pending);
+
+		generic_handle_domain_irq(irqc->root_domain, hwirq);
 	} while (true);
 	chained_irq_exit(chip, desc);
 }
diff --git a/drivers/irqchip/qcom-irq-combiner.c b/drivers/irqchip/qcom-irq-combiner.c
index aa54bfcb0433f..18e696dc7f4d6 100644
--- a/drivers/irqchip/qcom-irq-combiner.c
+++ b/drivers/irqchip/qcom-irq-combiner.c
@@ -53,7 +53,6 @@ static void combiner_handle_irq(struct irq_desc *desc)
 	chained_irq_enter(chip, desc);
 
 	for (reg = 0; reg < combiner->nregs; reg++) {
-		int virq;
 		int hwirq;
 		u32 bit;
 		u32 status;
@@ -70,10 +69,7 @@ static void combiner_handle_irq(struct irq_desc *desc)
 			bit = __ffs(status);
 			status &= ~(1 << bit);
 			hwirq = irq_nr(reg, bit);
-			virq = irq_find_mapping(combiner->domain, hwirq);
-			if (virq > 0)
-				generic_handle_irq(virq);
-
+			generic_handle_domain_irq(combiner->domain, hwirq);
 		}
 	}
 
-- 
GitLab


From f36011569b90b3973f07cea00c5872c4dc0c707f Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:21 +1000
Subject: [PATCH 2611/3804] KVM: PPC: Book3S 64: move KVM interrupt entry to a
 common entry point

Rather than bifurcate the call depending on whether or not HV is
possible, and have the HV entry test for PR, just make a single
common point which does the demultiplexing. This makes it simpler
to add another type of exit handler.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Daniel Axtens <dja@axtens.net>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Acked-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-2-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64s.S    |  8 +-----
 arch/powerpc/kvm/Makefile               |  1 +
 arch/powerpc/kvm/book3s_64_entry.S      | 36 +++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 ++------
 4 files changed, 40 insertions(+), 16 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_64_entry.S

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index fa8e52a0239ee..868077f7a96f2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -208,7 +208,6 @@ do_define_int n
 .endm
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * All interrupts which set HSRR registers, as well as SRESET and MCE and
  * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
@@ -238,13 +237,8 @@ do_define_int n
 
 /*
  * If an interrupt is taken while a guest is running, it is immediately routed
- * to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
- * to kvmppc_interrupt_hv, which handles the PR guest case.
+ * to KVM to handle.
  */
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
 
 .macro KVMTEST name
 	lbz	r10,HSTATE_IN_GUEST(r13)
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 2bfeaa13befb4..bbc071a8a976c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -57,6 +57,7 @@ kvm-pr-y := \
 	book3s_32_mmu.o
 
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+	book3s_64_entry.o \
 	tm.o
 
 ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
new file mode 100644
index 0000000000000..7a039ea78f158
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/ppc_asm.h>
+#include <asm/reg.h>
+
+/*
+ * This is branched to from interrupt handlers in exception-64s.S which set
+ * IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
+ */
+.global	kvmppc_interrupt
+.balign IFETCH_ALIGN_BYTES
+kvmppc_interrupt:
+	/*
+	 * Register contents:
+	 * R12		= (guest CR << 32) | interrupt vector
+	 * R13		= PACA
+	 * guest R12 saved in shadow VCPU SCRATCH0
+	 * guest R13 saved in SPRN_SCRATCH0
+	 */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	std	r9,HSTATE_SCRATCH2(r13)
+	lbz	r9,HSTATE_IN_GUEST(r13)
+	cmpwi	r9,KVM_GUEST_MODE_HOST_HV
+	beq	kvmppc_bad_host_intr
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+	cmpwi	r9,KVM_GUEST_MODE_GUEST
+	ld	r9,HSTATE_SCRATCH2(r13)
+	beq	kvmppc_interrupt_pr
+#endif
+	b	kvmppc_interrupt_hv
+#else
+	b	kvmppc_interrupt_pr
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 004f0d4e665f8..a28b41b1bb38c 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1272,16 +1272,8 @@ kvmppc_interrupt_hv:
 	 * R13		= PACA
 	 * guest R12 saved in shadow VCPU SCRATCH0
 	 * guest R13 saved in SPRN_SCRATCH0
+	 * guest R9 saved in HSTATE_SCRATCH2
 	 */
-	std	r9, HSTATE_SCRATCH2(r13)
-	lbz	r9, HSTATE_IN_GUEST(r13)
-	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
-	beq	kvmppc_bad_host_intr
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-	cmpwi	r9, KVM_GUEST_MODE_GUEST
-	ld	r9, HSTATE_SCRATCH2(r13)
-	beq	kvmppc_interrupt_pr
-#endif
 	/* We're now back in the host but in guest MMU context */
 	li	r9, KVM_GUEST_MODE_HOST_HV
 	stb	r9, HSTATE_IN_GUEST(r13)
@@ -3287,6 +3279,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
  * cfar is saved in HSTATE_CFAR(r13)
  * ppr is saved in HSTATE_PPR(r13)
  */
+.global kvmppc_bad_host_intr
 kvmppc_bad_host_intr:
 	/*
 	 * Switch to the emergency stack, but start half-way down in
-- 
GitLab


From f33e0702d98cc5ff21f44833525b07581862eb57 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:22 +1000
Subject: [PATCH 2612/3804] KVM: PPC: Book3S 64: Move GUEST_MODE_SKIP test into
 KVM

Move the GUEST_MODE_SKIP logic into KVM code. This is quite a KVM
internal detail that has no real need to be in common handlers.

Add a comment explaining the what and why of KVM "skip" interrupts.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Daniel Axtens <dja@axtens.net>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-3-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64s.S | 60 ----------------------------
 arch/powerpc/kvm/book3s_64_entry.S   | 59 ++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 61 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 868077f7a96f2..192b927b429e4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -133,7 +133,6 @@ name:
 #define IBRANCH_TO_COMMON	.L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
 #define IREALMODE_COMMON	.L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
 #define IMASK		.L_IMASK_\name\()	/* IRQ soft-mask bit */
-#define IKVM_SKIP	.L_IKVM_SKIP_\name\()	/* Generate KVM skip handler */
 #define IKVM_REAL	.L_IKVM_REAL_\name\()	/* Real entry tests KVM */
 #define __IKVM_REAL(name)	.L_IKVM_REAL_ ## name
 #define IKVM_VIRT	.L_IKVM_VIRT_\name\()	/* Virt entry tests KVM */
@@ -190,9 +189,6 @@ do_define_int n
 	.ifndef IMASK
 		IMASK=0
 	.endif
-	.ifndef IKVM_SKIP
-		IKVM_SKIP=0
-	.endif
 	.ifndef IKVM_REAL
 		IKVM_REAL=0
 	.endif
@@ -250,15 +246,10 @@ do_define_int n
 	.balign IFETCH_ALIGN_BYTES
 \name\()_kvm:
 
-	.if IKVM_SKIP
-	cmpwi	r10,KVM_GUEST_MODE_SKIP
-	beq	89f
-	.else
 BEGIN_FTR_SECTION
 	ld	r10,IAREA+EX_CFAR(r13)
 	std	r10,HSTATE_CFAR(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
-	.endif
 
 	ld	r10,IAREA+EX_CTR(r13)
 	mtctr	r10
@@ -285,27 +276,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ori	r12,r12,(IVEC)
 	.endif
 	b	kvmppc_interrupt
-
-	.if IKVM_SKIP
-89:	mtocrf	0x80,r9
-	ld	r10,IAREA+EX_CTR(r13)
-	mtctr	r10
-	ld	r9,IAREA+EX_R9(r13)
-	ld	r10,IAREA+EX_R10(r13)
-	ld	r11,IAREA+EX_R11(r13)
-	ld	r12,IAREA+EX_R12(r13)
-	.if IHSRR_IF_HVMODE
-	BEGIN_FTR_SECTION
-	b	kvmppc_skip_Hinterrupt
-	FTR_SECTION_ELSE
-	b	kvmppc_skip_interrupt
-	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-	.elseif IHSRR
-	b	kvmppc_skip_Hinterrupt
-	.else
-	b	kvmppc_skip_interrupt
-	.endif
-	.endif
 .endm
 
 #else
@@ -1064,7 +1034,6 @@ INT_DEFINE_BEGIN(machine_check)
 	ISET_RI=0
 	IDAR=1
 	IDSISR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 INT_DEFINE_END(machine_check)
 
@@ -1336,7 +1305,6 @@ INT_DEFINE_BEGIN(data_access)
 	IVEC=0x300
 	IDAR=1
 	IDSISR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 INT_DEFINE_END(data_access)
 
@@ -1390,7 +1358,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 INT_DEFINE_BEGIN(data_access_slb)
 	IVEC=0x380
 	IDAR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 INT_DEFINE_END(data_access_slb)
 
@@ -2057,7 +2024,6 @@ INT_DEFINE_BEGIN(h_data_storage)
 	IHSRR=1
 	IDAR=1
 	IDSISR=1
-	IKVM_SKIP=1
 	IKVM_REAL=1
 	IKVM_VIRT=1
 INT_DEFINE_END(h_data_storage)
@@ -3003,32 +2969,6 @@ EXPORT_SYMBOL(do_uaccess_flush)
 MASKED_INTERRUPT
 MASKED_INTERRUPT hsrr=1
 
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-kvmppc_skip_interrupt:
-	/*
-	 * Here all GPRs are unchanged from when the interrupt happened
-	 * except for r13, which is saved in SPRG_SCRATCH0.
-	 */
-	mfspr	r13, SPRN_SRR0
-	addi	r13, r13, 4
-	mtspr	SPRN_SRR0, r13
-	GET_SCRATCH0(r13)
-	RFI_TO_KERNEL
-	b	.
-
-kvmppc_skip_Hinterrupt:
-	/*
-	 * Here all GPRs are unchanged from when the interrupt happened
-	 * except for r13, which is saved in SPRG_SCRATCH0.
-	 */
-	mfspr	r13, SPRN_HSRR0
-	addi	r13, r13, 4
-	mtspr	SPRN_HSRR0, r13
-	GET_SCRATCH0(r13)
-	HRFI_TO_KERNEL
-	b	.
-#endif
-
 	/*
 	 * Relocation-on interrupts: A subset of the interrupts can be delivered
 	 * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 7a039ea78f158..bf927e7a06afc 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
+#include <asm/exception-64s.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_book3s_asm.h>
 #include <asm/ppc_asm.h>
@@ -20,9 +21,12 @@ kvmppc_interrupt:
 	 * guest R12 saved in shadow VCPU SCRATCH0
 	 * guest R13 saved in SPRN_SCRATCH0
 	 */
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	std	r9,HSTATE_SCRATCH2(r13)
 	lbz	r9,HSTATE_IN_GUEST(r13)
+	cmpwi	r9,KVM_GUEST_MODE_SKIP
+	beq-	.Lmaybe_skip
+.Lno_skip:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	cmpwi	r9,KVM_GUEST_MODE_HOST_HV
 	beq	kvmppc_bad_host_intr
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -32,5 +36,58 @@ kvmppc_interrupt:
 #endif
 	b	kvmppc_interrupt_hv
 #else
+	ld	r9,HSTATE_SCRATCH2(r13)
 	b	kvmppc_interrupt_pr
 #endif
+
+/*
+ * "Skip" interrupts are part of a trick KVM uses a with hash guests to load
+ * the faulting instruction in guest memory from the the hypervisor without
+ * walking page tables.
+ *
+ * When the guest takes a fault that requires the hypervisor to load the
+ * instruction (e.g., MMIO emulation), KVM is running in real-mode with HV=1
+ * and the guest MMU context loaded. It sets KVM_GUEST_MODE_SKIP, and sets
+ * MSR[DR]=1 while leaving MSR[IR]=0, so it continues to fetch HV instructions
+ * but loads and stores will access the guest context. This is used to load
+ * the faulting instruction using the faulting guest effective address.
+ *
+ * However the guest context may not be able to translate, or it may cause a
+ * machine check or other issue, which results in a fault in the host
+ * (even with KVM-HV).
+ *
+ * These faults come here because KVM_GUEST_MODE_SKIP was set, so if they
+ * are (or are likely) caused by that load, the instruction is skipped by
+ * just returning with the PC advanced +4, where it is noticed the load did
+ * not execute and it goes to the slow path which walks the page tables to
+ * read guest memory.
+ */
+.Lmaybe_skip:
+	cmpwi	r12,BOOK3S_INTERRUPT_MACHINE_CHECK
+	beq	1f
+	cmpwi	r12,BOOK3S_INTERRUPT_DATA_STORAGE
+	beq	1f
+	cmpwi	r12,BOOK3S_INTERRUPT_DATA_SEGMENT
+	beq	1f
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/* HSRR interrupts get 2 added to interrupt number */
+	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 0x2
+	beq	2f
+#endif
+	b	.Lno_skip
+1:	mfspr	r9,SPRN_SRR0
+	addi	r9,r9,4
+	mtspr	SPRN_SRR0,r9
+	ld	r12,HSTATE_SCRATCH0(r13)
+	ld	r9,HSTATE_SCRATCH2(r13)
+	GET_SCRATCH0(r13)
+	RFI_TO_KERNEL
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+2:	mfspr	r9,SPRN_HSRR0
+	addi	r9,r9,4
+	mtspr	SPRN_HSRR0,r9
+	ld	r12,HSTATE_SCRATCH0(r13)
+	ld	r9,HSTATE_SCRATCH2(r13)
+	GET_SCRATCH0(r13)
+	HRFI_TO_KERNEL
+#endif
-- 
GitLab


From 31c67cfe2a6a5a7364dc1552b877c6b7820dd556 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:23 +1000
Subject: [PATCH 2613/3804] KVM: PPC: Book3S 64: add hcall interrupt handler

Add a separate hcall entry point. This can be used to deal with the
different calling convention.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Daniel Axtens <dja@axtens.net>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-4-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64s.S | 6 +++---
 arch/powerpc/kvm/book3s_64_entry.S   | 6 +++++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 192b927b429e4..4819bf60324c6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1966,16 +1966,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ori	r12,r12,0xc00
 #ifdef CONFIG_RELOCATABLE
 	/*
-	 * Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+	 * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
 	 * outside the head section.
 	 */
-	__LOAD_FAR_HANDLER(r10, kvmppc_interrupt)
+	__LOAD_FAR_HANDLER(r10, kvmppc_hcall)
 	mtctr   r10
 	ld	r10,PACA_EXGEN+EX_R10(r13)
 	bctr
 #else
 	ld	r10,PACA_EXGEN+EX_R10(r13)
-	b       kvmppc_interrupt
+	b       kvmppc_hcall
 #endif
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index bf927e7a06afc..c21fa64059ef6 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -8,9 +8,13 @@
 #include <asm/reg.h>
 
 /*
- * This is branched to from interrupt handlers in exception-64s.S which set
+ * These are branched to from interrupt handlers in exception-64s.S which set
  * IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
  */
+.global	kvmppc_hcall
+.balign IFETCH_ALIGN_BYTES
+kvmppc_hcall:
+
 .global	kvmppc_interrupt
 .balign IFETCH_ALIGN_BYTES
 kvmppc_interrupt:
-- 
GitLab


From 04ece7b60b689e1de38b9b0f597f8f94951e4367 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:24 +1000
Subject: [PATCH 2614/3804] KVM: PPC: Book3S 64: Move hcall early register
 setup to KVM

System calls / hcalls have a different calling convention than
other interrupts, so there is code in the KVMTEST to massage these
into the same form as other interrupt handlers.

Move this work into the KVM hcall handler. This means teaching KVM
a little more about the low level interrupt handler setup, PACA save
areas, etc., although that's not obviously worse than the current
approach of coming up with an entirely different interrupt register
/ save convention.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-5-npiggin@gmail.com
---
 arch/powerpc/include/asm/exception-64s.h | 13 ++++++++
 arch/powerpc/kernel/exceptions-64s.S     | 42 +-----------------------
 arch/powerpc/kvm/book3s_64_entry.S       | 30 +++++++++++++++++
 3 files changed, 44 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index c1a8aac01cf91..bb6f78fcf981c 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -35,6 +35,19 @@
 /* PACA save area size in u64 units (exgen, exmc, etc) */
 #define EX_SIZE		10
 
+/* PACA save area offsets */
+#define EX_R9		0
+#define EX_R10		8
+#define EX_R11		16
+#define EX_R12		24
+#define EX_R13		32
+#define EX_DAR		40
+#define EX_DSISR	48
+#define EX_CCR		52
+#define EX_CFAR		56
+#define EX_PPR		64
+#define EX_CTR		72
+
 /*
  * maximum recursive depth of MCE exceptions
  */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4819bf60324c6..03e2d65d3d3f9 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,22 +21,6 @@
 #include <asm/feature-fixups.h>
 #include <asm/kup.h>
 
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9		0
-#define EX_R10		8
-#define EX_R11		16
-#define EX_R12		24
-#define EX_R13		32
-#define EX_DAR		40
-#define EX_DSISR	48
-#define EX_CCR		52
-#define EX_CFAR		56
-#define EX_PPR		64
-#define EX_CTR		72
-.if EX_SIZE != 10
-	.error "EX_SIZE is wrong"
-.endif
-
 /*
  * Following are fixed section helper macros.
  *
@@ -1941,29 +1925,8 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 TRAMP_REAL_BEGIN(system_call_kvm)
-	/*
-	 * This is a hcall, so register convention is as above, with these
-	 * differences:
-	 * r13 = PACA
-	 * ctr = orig r13
-	 * orig r10 saved in PACA
-	 */
-	 /*
-	  * Save the PPR (on systems that support it) before changing to
-	  * HMT_MEDIUM. That allows the KVM code to save that value into the
-	  * guest state (it is the guest's PPR value).
-	  */
-BEGIN_FTR_SECTION
-	mfspr	r10,SPRN_PPR
-	std	r10,HSTATE_PPR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-	HMT_MEDIUM
 	mfctr	r10
-	SET_SCRATCH0(r10)
-	mfcr	r10
-	std	r12,HSTATE_SCRATCH0(r13)
-	sldi	r12,r10,32
-	ori	r12,r12,0xc00
+	SET_SCRATCH0(r10) /* Save r13 in SCRATCH0 */
 #ifdef CONFIG_RELOCATABLE
 	/*
 	 * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
@@ -1971,15 +1934,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	 */
 	__LOAD_FAR_HANDLER(r10, kvmppc_hcall)
 	mtctr   r10
-	ld	r10,PACA_EXGEN+EX_R10(r13)
 	bctr
 #else
-	ld	r10,PACA_EXGEN+EX_R10(r13)
 	b       kvmppc_hcall
 #endif
 #endif
 
-
 /**
  * Interrupt 0xd00 - Trace Interrupt.
  * This is a synchronous interrupt in response to instruction step or
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index c21fa64059ef6..f527e16707db1 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -14,6 +14,36 @@
 .global	kvmppc_hcall
 .balign IFETCH_ALIGN_BYTES
 kvmppc_hcall:
+	/*
+	 * This is a hcall, so register convention is as
+	 * Documentation/powerpc/papr_hcalls.rst, with these additions:
+	 * R13		= PACA
+	 * guest R13 saved in SPRN_SCRATCH0
+	 * R10		= free
+	 * guest r10 saved in PACA_EXGEN
+	 *
+	 * This may also be a syscall from PR-KVM userspace that is to be
+	 * reflected to the PR guest kernel, so registers may be set up for
+	 * a system call rather than hcall. We don't currently clobber
+	 * anything here, but the 0xc00 handler has already clobbered CTR
+	 * and CR0, so PR-KVM can not support a guest kernel that preserves
+	 * those registers across its system calls.
+	 */
+	 /*
+	  * Save the PPR (on systems that support it) before changing to
+	  * HMT_MEDIUM. That allows the KVM code to save that value into the
+	  * guest state (it is the guest's PPR value).
+	  */
+BEGIN_FTR_SECTION
+	mfspr	r10,SPRN_PPR
+	std	r10,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	HMT_MEDIUM
+	mfcr	r10
+	std	r12,HSTATE_SCRATCH0(r13)
+	sldi	r12,r10,32
+	ori	r12,r12,0xc00
+	ld	r10,PACA_EXGEN+EX_R10(r13)
 
 .global	kvmppc_interrupt
 .balign IFETCH_ALIGN_BYTES
-- 
GitLab


From 69fdd67499716efca861f7cecabdfeee5e5d7b51 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:25 +1000
Subject: [PATCH 2615/3804] KVM: PPC: Book3S 64: Move interrupt early register
 setup to KVM

Like the earlier patch for hcalls, KVM interrupt entry requires a
different calling convention than the Linux interrupt handlers
set up. Move the code that converts from one to the other into KVM.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-6-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64s.S | 131 +++++----------------------
 arch/powerpc/kvm/book3s_64_entry.S   |  50 +++++++++-
 2 files changed, 71 insertions(+), 110 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 03e2d65d3d3f9..bf377bfeeb1a4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -187,7 +187,6 @@ do_define_int n
 	.endif
 .endm
 
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 /*
  * All interrupts which set HSRR registers, as well as SRESET and MCE and
  * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
@@ -220,54 +219,25 @@ do_define_int n
  * to KVM to handle.
  */
 
-.macro KVMTEST name
+.macro KVMTEST name handler
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 	lbz	r10,HSTATE_IN_GUEST(r13)
 	cmpwi	r10,0
-	bne	\name\()_kvm
-.endm
-
-.macro GEN_KVM name
-	.balign IFETCH_ALIGN_BYTES
-\name\()_kvm:
-
-BEGIN_FTR_SECTION
-	ld	r10,IAREA+EX_CFAR(r13)
-	std	r10,HSTATE_CFAR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
-
-	ld	r10,IAREA+EX_CTR(r13)
-	mtctr	r10
-BEGIN_FTR_SECTION
-	ld	r10,IAREA+EX_PPR(r13)
-	std	r10,HSTATE_PPR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-	ld	r11,IAREA+EX_R11(r13)
-	ld	r12,IAREA+EX_R12(r13)
-	std	r12,HSTATE_SCRATCH0(r13)
-	sldi	r12,r9,32
-	ld	r9,IAREA+EX_R9(r13)
-	ld	r10,IAREA+EX_R10(r13)
 	/* HSRR variants have the 0x2 bit added to their trap number */
 	.if IHSRR_IF_HVMODE
 	BEGIN_FTR_SECTION
-	ori	r12,r12,(IVEC + 0x2)
+	li	r10,(IVEC + 0x2)
 	FTR_SECTION_ELSE
-	ori	r12,r12,(IVEC)
+	li	r10,(IVEC)
 	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 	.elseif IHSRR
-	ori	r12,r12,(IVEC+ 0x2)
+	li	r10,(IVEC + 0x2)
 	.else
-	ori	r12,r12,(IVEC)
+	li	r10,(IVEC)
 	.endif
-	b	kvmppc_interrupt
-.endm
-
-#else
-.macro KVMTEST name
-.endm
-.macro GEN_KVM name
-.endm
+	bne	\handler
 #endif
+.endm
 
 /*
  * This is the BOOK3S interrupt entry code macro.
@@ -409,7 +379,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 DEFINE_FIXED_SYMBOL(\name\()_common_real)
 \name\()_common_real:
 	.if IKVM_REAL
-		KVMTEST \name
+		KVMTEST \name kvm_interrupt
 	.endif
 
 	ld	r10,PACAKMSR(r13)	/* get MSR value for kernel */
@@ -432,7 +402,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
 DEFINE_FIXED_SYMBOL(\name\()_common_virt)
 \name\()_common_virt:
 	.if IKVM_VIRT
-		KVMTEST \name
+		KVMTEST \name kvm_interrupt
 1:
 	.endif
 	.endif /* IVIRT */
@@ -446,7 +416,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
 DEFINE_FIXED_SYMBOL(\name\()_common_real)
 \name\()_common_real:
 	.if IKVM_REAL
-		KVMTEST \name
+		KVMTEST \name kvm_interrupt
 	.endif
 .endm
 
@@ -948,8 +918,6 @@ EXC_COMMON_BEGIN(system_reset_common)
 	EXCEPTION_RESTORE_REGS
 	RFI_TO_USER_OR_KERNEL
 
-	GEN_KVM system_reset
-
 
 /**
  * Interrupt 0x200 - Machine Check Interrupt (MCE).
@@ -1113,7 +1081,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 	/*
 	 * Check if we are coming from guest. If yes, then run the normal
 	 * exception handler which will take the
-	 * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+	 * machine_check_kvm->kvm_interrupt branch to deliver the MC event
 	 * to guest.
 	 */
 	lbz	r11,HSTATE_IN_GUEST(r13)
@@ -1183,8 +1151,6 @@ EXC_COMMON_BEGIN(machine_check_common)
 	bl	machine_check_exception
 	b	interrupt_return
 
-	GEN_KVM machine_check
-
 
 #ifdef CONFIG_PPC_P7_NAP
 /*
@@ -1319,8 +1285,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	REST_NVGPRS(r1)
 	b	interrupt_return
 
-	GEN_KVM data_access
-
 
 /**
  * Interrupt 0x380 - Data Segment Interrupt (DSLB).
@@ -1370,8 +1334,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	bl	do_bad_slb_fault
 	b	interrupt_return
 
-	GEN_KVM data_access_slb
-
 
 /**
  * Interrupt 0x400 - Instruction Storage Interrupt (ISI).
@@ -1408,8 +1370,6 @@ MMU_FTR_SECTION_ELSE
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	b	interrupt_return
 
-	GEN_KVM instruction_access
-
 
 /**
  * Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
@@ -1454,8 +1414,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	bl	do_bad_slb_fault
 	b	interrupt_return
 
-	GEN_KVM instruction_access_slb
-
 
 /**
  * Interrupt 0x500 - External Interrupt.
@@ -1500,8 +1458,6 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
 	bl	do_IRQ
 	b	interrupt_return
 
-	GEN_KVM hardware_interrupt
-
 
 /**
  * Interrupt 0x600 - Alignment Interrupt
@@ -1529,8 +1485,6 @@ EXC_COMMON_BEGIN(alignment_common)
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return
 
-	GEN_KVM alignment
-
 
 /**
  * Interrupt 0x700 - Program Interrupt (program check).
@@ -1638,8 +1592,6 @@ EXC_COMMON_BEGIN(program_check_common)
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return
 
-	GEN_KVM program_check
-
 
 /*
  * Interrupt 0x800 - Floating-Point Unavailable Interrupt.
@@ -1689,8 +1641,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 	b	interrupt_return
 #endif
 
-	GEN_KVM fp_unavailable
-
 
 /**
  * Interrupt 0x900 - Decrementer Interrupt.
@@ -1729,8 +1679,6 @@ EXC_COMMON_BEGIN(decrementer_common)
 	bl	timer_interrupt
 	b	interrupt_return
 
-	GEN_KVM decrementer
-
 
 /**
  * Interrupt 0x980 - Hypervisor Decrementer Interrupt.
@@ -1776,8 +1724,6 @@ EXC_COMMON_BEGIN(hdecrementer_common)
 	ld	r13,PACA_EXGEN+EX_R13(r13)
 	HRFI_TO_KERNEL
 
-	GEN_KVM hdecrementer
-
 
 /**
  * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
@@ -1817,8 +1763,6 @@ EXC_COMMON_BEGIN(doorbell_super_common)
 #endif
 	b	interrupt_return
 
-	GEN_KVM doorbell_super
-
 
 EXC_REAL_NONE(0xb00, 0x100)
 EXC_VIRT_NONE(0x4b00, 0x100)
@@ -1868,7 +1812,7 @@ INT_DEFINE_END(system_call)
 	GET_PACA(r13)
 	std	r10,PACA_EXGEN+EX_R10(r13)
 	INTERRUPT_TO_KERNEL
-	KVMTEST system_call /* uses r10, branch to system_call_kvm */
+	KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
 	mfctr	r9
 #else
 	mr	r9,r13
@@ -1924,7 +1868,7 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
 EXC_VIRT_END(system_call, 0x4c00, 0x100)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-TRAMP_REAL_BEGIN(system_call_kvm)
+TRAMP_REAL_BEGIN(kvm_hcall)
 	mfctr	r10
 	SET_SCRATCH0(r10) /* Save r13 in SCRATCH0 */
 #ifdef CONFIG_RELOCATABLE
@@ -1964,8 +1908,6 @@ EXC_COMMON_BEGIN(single_step_common)
 	bl	single_step_exception
 	b	interrupt_return
 
-	GEN_KVM single_step
-
 
 /**
  * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
@@ -2004,8 +1946,6 @@ MMU_FTR_SECTION_ELSE
 ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
 	b       interrupt_return
 
-	GEN_KVM h_data_storage
-
 
 /**
  * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
@@ -2031,8 +1971,6 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
 	bl	unknown_exception
 	b	interrupt_return
 
-	GEN_KVM h_instr_storage
-
 
 /**
  * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
@@ -2057,8 +1995,6 @@ EXC_COMMON_BEGIN(emulation_assist_common)
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return
 
-	GEN_KVM emulation_assist
-
 
 /**
  * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
@@ -2130,16 +2066,12 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
 	EXCEPTION_RESTORE_REGS hsrr=1
 	GEN_INT_ENTRY hmi_exception, virt=0
 
-	GEN_KVM hmi_exception_early
-
 EXC_COMMON_BEGIN(hmi_exception_common)
 	GEN_COMMON hmi_exception
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	handle_hmi_exception
 	b	interrupt_return
 
-	GEN_KVM hmi_exception
-
 
 /**
  * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
@@ -2170,8 +2102,6 @@ EXC_COMMON_BEGIN(h_doorbell_common)
 #endif
 	b	interrupt_return
 
-	GEN_KVM h_doorbell
-
 
 /**
  * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
@@ -2198,8 +2128,6 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
 	bl	do_IRQ
 	b	interrupt_return
 
-	GEN_KVM h_virt_irq
-
 
 EXC_REAL_NONE(0xec0, 0x20)
 EXC_VIRT_NONE(0x4ec0, 0x20)
@@ -2243,8 +2171,6 @@ EXC_COMMON_BEGIN(performance_monitor_common)
 	bl	performance_monitor_exception
 	b	interrupt_return
 
-	GEN_KVM performance_monitor
-
 
 /**
  * Interrupt 0xf20 - Vector Unavailable Interrupt.
@@ -2294,8 +2220,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	bl	altivec_unavailable_exception
 	b	interrupt_return
 
-	GEN_KVM altivec_unavailable
-
 
 /**
  * Interrupt 0xf40 - VSX Unavailable Interrupt.
@@ -2344,8 +2268,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	bl	vsx_unavailable_exception
 	b	interrupt_return
 
-	GEN_KVM vsx_unavailable
-
 
 /**
  * Interrupt 0xf60 - Facility Unavailable Interrupt.
@@ -2374,8 +2296,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return
 
-	GEN_KVM facility_unavailable
-
 
 /**
  * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
@@ -2404,8 +2324,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
 	REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
 	b	interrupt_return
 
-	GEN_KVM h_facility_unavailable
-
 
 EXC_REAL_NONE(0xfa0, 0x20)
 EXC_VIRT_NONE(0x4fa0, 0x20)
@@ -2435,8 +2353,6 @@ EXC_COMMON_BEGIN(cbe_system_error_common)
 	bl	cbe_system_error_exception
 	b	interrupt_return
 
-	GEN_KVM cbe_system_error
-
 #else /* CONFIG_CBE_RAS */
 EXC_REAL_NONE(0x1200, 0x100)
 EXC_VIRT_NONE(0x5200, 0x100)
@@ -2468,8 +2384,6 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common)
 	bl	instruction_breakpoint_exception
 	b	interrupt_return
 
-	GEN_KVM instruction_breakpoint
-
 
 EXC_REAL_NONE(0x1400, 0x100)
 EXC_VIRT_NONE(0x5400, 0x100)
@@ -2590,8 +2504,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
 	bl	unknown_exception
 	b	interrupt_return
 
-	GEN_KVM denorm_exception
-
 
 #ifdef CONFIG_CBE_RAS
 INT_DEFINE_BEGIN(cbe_maintenance)
@@ -2609,8 +2521,6 @@ EXC_COMMON_BEGIN(cbe_maintenance_common)
 	bl	cbe_maintenance_exception
 	b	interrupt_return
 
-	GEN_KVM cbe_maintenance
-
 #else /* CONFIG_CBE_RAS */
 EXC_REAL_NONE(0x1600, 0x100)
 EXC_VIRT_NONE(0x5600, 0x100)
@@ -2641,8 +2551,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
 #endif
 	b	interrupt_return
 
-	GEN_KVM altivec_assist
-
 
 #ifdef CONFIG_CBE_RAS
 INT_DEFINE_BEGIN(cbe_thermal)
@@ -2660,8 +2568,6 @@ EXC_COMMON_BEGIN(cbe_thermal_common)
 	bl	cbe_thermal_exception
 	b	interrupt_return
 
-	GEN_KVM cbe_thermal
-
 #else /* CONFIG_CBE_RAS */
 EXC_REAL_NONE(0x1800, 0x100)
 EXC_VIRT_NONE(0x5800, 0x100)
@@ -2914,6 +2820,15 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
 
 USE_TEXT_SECTION()
 
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+kvm_interrupt:
+	/*
+	 * The conditional branch in KVMTEST can't reach all the way,
+	 * make a stub.
+	 */
+	b	kvmppc_interrupt
+#endif
+
 _GLOBAL(do_uaccess_flush)
 	UACCESS_FLUSH_FIXUP_SECTION
 	nop
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index f527e16707db1..2c9d106145e8e 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -44,15 +44,61 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	sldi	r12,r10,32
 	ori	r12,r12,0xc00
 	ld	r10,PACA_EXGEN+EX_R10(r13)
+	b	do_kvm_interrupt
 
+/*
+ * KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that
+ * call convention:
+ *
+ * guest R9-R13, CTR, CFAR, PPR saved in PACA EX_xxx save area
+ * guest (H)DAR, (H)DSISR are also in the save area for relevant interrupts
+ * guest R13 also saved in SCRATCH0
+ * R13		= PACA
+ * R11		= (H)SRR0
+ * R12		= (H)SRR1
+ * R9		= guest CR
+ * PPR is set to medium
+ *
+ * With the addition for KVM:
+ * R10		= trap vector
+ */
 .global	kvmppc_interrupt
 .balign IFETCH_ALIGN_BYTES
 kvmppc_interrupt:
+	li	r11,PACA_EXGEN
+	cmpdi	r10,0x200
+	bgt+	1f
+	li	r11,PACA_EXMC
+	beq	1f
+	li	r11,PACA_EXNMI
+1:	add	r11,r11,r13
+
+BEGIN_FTR_SECTION
+	ld	r12,EX_CFAR(r11)
+	std	r12,HSTATE_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+	ld	r12,EX_CTR(r11)
+	mtctr	r12
+BEGIN_FTR_SECTION
+	ld	r12,EX_PPR(r11)
+	std	r12,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+	ld	r12,EX_R12(r11)
+	std	r12,HSTATE_SCRATCH0(r13)
+	sldi	r12,r9,32
+	or	r12,r12,r10
+	ld	r9,EX_R9(r11)
+	ld	r10,EX_R10(r11)
+	ld	r11,EX_R11(r11)
+
+do_kvm_interrupt:
 	/*
-	 * Register contents:
+	 * Hcalls and other interrupts come here after normalising register
+	 * contents and save locations:
+	 *
 	 * R12		= (guest CR << 32) | interrupt vector
 	 * R13		= PACA
-	 * guest R12 saved in shadow VCPU SCRATCH0
+	 * guest R12 saved in shadow HSTATE_SCRATCH0
 	 * guest R13 saved in SPRN_SCRATCH0
 	 */
 	std	r9,HSTATE_SCRATCH2(r13)
-- 
GitLab


From 1b5821c630c219e3c6f643ebbefcf08c9fa714d8 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:26 +1000
Subject: [PATCH 2616/3804] KVM: PPC: Book3S 64: move bad_host_intr check to HV
 handler

The bad_host_intr check will never be true with PR KVM, move
it to HV code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-7-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_64_entry.S      | 4 ----
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +++-
 arch/powerpc/kvm/book3s_segment.S       | 3 +++
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 2c9d106145e8e..66170ea85bc2a 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -107,16 +107,12 @@ do_kvm_interrupt:
 	beq-	.Lmaybe_skip
 .Lno_skip:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	cmpwi	r9,KVM_GUEST_MODE_HOST_HV
-	beq	kvmppc_bad_host_intr
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	cmpwi	r9,KVM_GUEST_MODE_GUEST
-	ld	r9,HSTATE_SCRATCH2(r13)
 	beq	kvmppc_interrupt_pr
 #endif
 	b	kvmppc_interrupt_hv
 #else
-	ld	r9,HSTATE_SCRATCH2(r13)
 	b	kvmppc_interrupt_pr
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index a28b41b1bb38c..a8abe79bcb995 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1268,6 +1268,7 @@ hdec_soon:
 kvmppc_interrupt_hv:
 	/*
 	 * Register contents:
+	 * R9		= HSTATE_IN_GUEST
 	 * R12		= (guest CR << 32) | interrupt vector
 	 * R13		= PACA
 	 * guest R12 saved in shadow VCPU SCRATCH0
@@ -1275,6 +1276,8 @@ kvmppc_interrupt_hv:
 	 * guest R9 saved in HSTATE_SCRATCH2
 	 */
 	/* We're now back in the host but in guest MMU context */
+	cmpwi	r9,KVM_GUEST_MODE_HOST_HV
+	beq	kvmppc_bad_host_intr
 	li	r9, KVM_GUEST_MODE_HOST_HV
 	stb	r9, HSTATE_IN_GUEST(r13)
 
@@ -3279,7 +3282,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
  * cfar is saved in HSTATE_CFAR(r13)
  * ppr is saved in HSTATE_PPR(r13)
  */
-.global kvmppc_bad_host_intr
 kvmppc_bad_host_intr:
 	/*
 	 * Switch to the emergency stack, but start half-way down in
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index 1f492aa4c8d6c..202046a83fc10 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -164,12 +164,15 @@ kvmppc_interrupt_pr:
 	/* 64-bit entry. Register usage at this point:
 	 *
 	 * SPRG_SCRATCH0   = guest R13
+	 * R9              = HSTATE_IN_GUEST
 	 * R12             = (guest CR << 32) | exit handler id
 	 * R13             = PACA
 	 * HSTATE.SCRATCH0 = guest R12
+	 * HSTATE.SCRATCH2 = guest R9
 	 */
 #ifdef CONFIG_PPC64
 	/* Match 32-bit entry */
+	ld	r9,HSTATE_SCRATCH2(r13)
 	rotldi	r12, r12, 32		  /* Flip R12 halves for stw */
 	stw	r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
 	srdi	r12, r12, 32		  /* shift trap into low half */
-- 
GitLab


From e2762743c6328dde14290cd58ddf2175b068ad80 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:27 +1000
Subject: [PATCH 2617/3804] KVM: PPC: Book3S 64: Minimise hcall handler calling
 convention differences

This sets up the same calling convention from interrupt entry to
KVM interrupt handler for system calls as exists for other interrupt
types.

This is a better API, it uses a save area rather than SPR, and it has
more registers free to use. Using a single common API helps maintain
it, and it becomes easier to use in C in a later patch.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-8-npiggin@gmail.com
---
 arch/powerpc/kernel/exceptions-64s.S | 21 +++++++++-
 arch/powerpc/kvm/book3s_64_entry.S   | 61 ++++++++++++----------------
 2 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index bf377bfeeb1a4..f7fc6e078d4ed 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1869,8 +1869,27 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 TRAMP_REAL_BEGIN(kvm_hcall)
+	std	r9,PACA_EXGEN+EX_R9(r13)
+	std	r11,PACA_EXGEN+EX_R11(r13)
+	std	r12,PACA_EXGEN+EX_R12(r13)
+	mfcr	r9
 	mfctr	r10
-	SET_SCRATCH0(r10) /* Save r13 in SCRATCH0 */
+	std	r10,PACA_EXGEN+EX_R13(r13)
+	li	r10,0
+	std	r10,PACA_EXGEN+EX_CFAR(r13)
+	std	r10,PACA_EXGEN+EX_CTR(r13)
+	 /*
+	  * Save the PPR (on systems that support it) before changing to
+	  * HMT_MEDIUM. That allows the KVM code to save that value into the
+	  * guest state (it is the guest's PPR value).
+	  */
+BEGIN_FTR_SECTION
+	mfspr	r10,SPRN_PPR
+	std	r10,PACA_EXGEN+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	HMT_MEDIUM
+
 #ifdef CONFIG_RELOCATABLE
 	/*
 	 * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 66170ea85bc2a..a01046202eef4 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -11,40 +11,30 @@
  * These are branched to from interrupt handlers in exception-64s.S which set
  * IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
  */
+
+/*
+ * This is a hcall, so register convention is as
+ * Documentation/powerpc/papr_hcalls.rst.
+ *
+ * This may also be a syscall from PR-KVM userspace that is to be
+ * reflected to the PR guest kernel, so registers may be set up for
+ * a system call rather than hcall. We don't currently clobber
+ * anything here, but the 0xc00 handler has already clobbered CTR
+ * and CR0, so PR-KVM can not support a guest kernel that preserves
+ * those registers across its system calls.
+ *
+ * The state of registers is as kvmppc_interrupt, except CFAR is not
+ * saved, R13 is not in SCRATCH0, and R10 does not contain the trap.
+ */
 .global	kvmppc_hcall
 .balign IFETCH_ALIGN_BYTES
 kvmppc_hcall:
-	/*
-	 * This is a hcall, so register convention is as
-	 * Documentation/powerpc/papr_hcalls.rst, with these additions:
-	 * R13		= PACA
-	 * guest R13 saved in SPRN_SCRATCH0
-	 * R10		= free
-	 * guest r10 saved in PACA_EXGEN
-	 *
-	 * This may also be a syscall from PR-KVM userspace that is to be
-	 * reflected to the PR guest kernel, so registers may be set up for
-	 * a system call rather than hcall. We don't currently clobber
-	 * anything here, but the 0xc00 handler has already clobbered CTR
-	 * and CR0, so PR-KVM can not support a guest kernel that preserves
-	 * those registers across its system calls.
-	 */
-	 /*
-	  * Save the PPR (on systems that support it) before changing to
-	  * HMT_MEDIUM. That allows the KVM code to save that value into the
-	  * guest state (it is the guest's PPR value).
-	  */
-BEGIN_FTR_SECTION
-	mfspr	r10,SPRN_PPR
-	std	r10,HSTATE_PPR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-	HMT_MEDIUM
-	mfcr	r10
-	std	r12,HSTATE_SCRATCH0(r13)
-	sldi	r12,r10,32
-	ori	r12,r12,0xc00
-	ld	r10,PACA_EXGEN+EX_R10(r13)
-	b	do_kvm_interrupt
+	ld	r10,PACA_EXGEN+EX_R13(r13)
+	SET_SCRATCH0(r10)
+	li	r10,0xc00
+	/* Now we look like kvmppc_interrupt */
+	li	r11,PACA_EXGEN
+	b	.Lgot_save_area
 
 /*
  * KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that
@@ -67,12 +57,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 kvmppc_interrupt:
 	li	r11,PACA_EXGEN
 	cmpdi	r10,0x200
-	bgt+	1f
+	bgt+	.Lgot_save_area
 	li	r11,PACA_EXMC
-	beq	1f
+	beq	.Lgot_save_area
 	li	r11,PACA_EXNMI
-1:	add	r11,r11,r13
-
+.Lgot_save_area:
+	add	r11,r11,r13
 BEGIN_FTR_SECTION
 	ld	r12,EX_CFAR(r11)
 	std	r12,HSTATE_CFAR(r13)
@@ -91,7 +81,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ld	r10,EX_R10(r11)
 	ld	r11,EX_R11(r11)
 
-do_kvm_interrupt:
 	/*
 	 * Hcalls and other interrupts come here after normalising register
 	 * contents and save locations:
-- 
GitLab


From 023c3c96ca4d196c09d554d5a98900406e4d7ecb Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:28 +1000
Subject: [PATCH 2618/3804] KVM: PPC: Book3S HV P9: implement
 kvmppc_xive_pull_vcpu in C
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is more symmetric with kvmppc_xive_push_vcpu, and has the advantage
that it runs with the MMU on.

The extra test added to the asm will go away with a future change.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-9-npiggin@gmail.com
---
 arch/powerpc/include/asm/kvm_ppc.h      |  2 ++
 arch/powerpc/kvm/book3s_hv.c            |  2 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  5 ++++
 arch/powerpc/kvm/book3s_xive.c          | 31 +++++++++++++++++++++++++
 4 files changed, 40 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 5bf8ae9bb2cc1..8c10c34271666 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -671,6 +671,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
 			       int level, bool line_status);
 extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
 
 static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
 {
@@ -711,6 +712,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
 static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
 				      int level, bool line_status) { return -ENODEV; }
 static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
 
 static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
 	{ return 0; }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 13728495ac660..907963b174e19 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3570,6 +3570,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	trap = __kvmhv_vcpu_entry_p9(vcpu);
 
+	kvmppc_xive_pull_vcpu(vcpu);
+
 	/* Advance host PURR/SPURR by the amount used by guest */
 	purr = mfspr(SPRN_PURR);
 	spurr = mfspr(SPRN_SPURR);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index a8abe79bcb995..55d4d5495f5d0 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1445,6 +1445,11 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	bl	kvmhv_accumulate_time
 #endif
 #ifdef CONFIG_KVM_XICS
+	/* If we came in through the P9 short path, xive pull is done in C */
+	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
+	cmpwi	r0, 0
+	bne	1f
+
 	/* We are exiting, pull the VP from the XIVE */
 	lbz	r0, VCPU_XIVE_PUSHED(r9)
 	cmpwi	cr0, r0, 0
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index e7219b6f5f9a5..741bf1f4387a5 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -127,6 +127,37 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
 
+/*
+ * Pull a vcpu's context from the XIVE on guest exit.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
+{
+	void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+
+	if (!vcpu->arch.xive_pushed)
+		return;
+
+	/*
+	 * Should not have been pushed if there is no tima
+	 */
+	if (WARN_ON(!tima))
+		return;
+
+	eieio();
+	/* First load to pull the context, we ignore the value */
+	__raw_readl(tima + TM_SPC_PULL_OS_CTX);
+	/* Second load to recover the context state (Words 0 and 1) */
+	vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+
+	/* Fixup some of the state for the next load */
+	vcpu->arch.xive_saved_state.lsmfb = 0;
+	vcpu->arch.xive_saved_state.ack = 0xff;
+	vcpu->arch.xive_pushed = 0;
+	eieio();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
+
 /*
  * This is a simple trigger for a generic XIVE IRQ. This must
  * only be called for interrupts that support a trigger page
-- 
GitLab


From 413679e73bdfc2720dc2fa2172b65b7411185fa7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:29 +1000
Subject: [PATCH 2619/3804] KVM: PPC: Book3S HV P9: Move setting HDEC after
 switching to guest LPCR

LPCR[HDICE]=0 suppresses hypervisor decrementer exceptions on some
processors, so it must be enabled before HDEC is set.

Rather than set it in the host LPCR then setting HDEC, move the HDEC
update to after the guest MMU context (including LPCR) is loaded.
There shouldn't be much concern with delaying HDEC by some 10s or 100s
of nanoseconds by setting it a bit later.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-10-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 907963b174e19..466d62b35b6a5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3502,20 +3502,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 		host_dawrx1 = mfspr(SPRN_DAWRX1);
 	}
 
-	/*
-	 * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
-	 * so set HDICE before writing HDEC.
-	 */
-	mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE);
-	isync();
-
 	hdec = time_limit - mftb();
-	if (hdec < 0) {
-		mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
-		isync();
+	if (hdec < 0)
 		return BOOK3S_INTERRUPT_HV_DECREMENTER;
-	}
-	mtspr(SPRN_HDEC, hdec);
 
 	if (vc->tb_offset) {
 		u64 new_tb = mftb() + vc->tb_offset;
@@ -3563,6 +3552,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	mtspr(SPRN_LPCR, lpcr);
 	isync();
 
+	/*
+	 * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+	 * so set guest LPCR (with HDICE) before writing HDEC.
+	 */
+	mtspr(SPRN_HDEC, hdec);
+
 	kvmppc_xive_push_vcpu(vcpu);
 
 	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
-- 
GitLab


From 6ffe2c6e6dcefb971e4046f02086c4adadd0b310 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:30 +1000
Subject: [PATCH 2620/3804] KVM: PPC: Book3S HV P9: Reduce irq_work vs guest
 decrementer races

irq_work's use of the DEC SPR is racy with guest<->host switch and guest
entry which flips the DEC interrupt to guest, which could lose a host
work interrupt.

This patch closes one race, and attempts to comment another class of
races.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-11-npiggin@gmail.com
---
 arch/powerpc/include/asm/time.h | 12 ++++++++++++
 arch/powerpc/kernel/time.c      | 10 ----------
 arch/powerpc/kvm/book3s_hv.c    | 15 +++++++++++++++
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 8dd3cdb25338b..8c2c3dd4ddbac 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -97,6 +97,18 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
 extern void secondary_cpu_time_init(void);
 extern void __init time_init(void);
 
+#ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+	unsigned long x;
+
+	asm volatile("lbz %0,%1(13)"
+		: "=r" (x)
+		: "i" (offsetof(struct paca_struct, irq_work_pending)));
+	return x;
+}
+#endif
+
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
 /* Convert timebase ticks to nanoseconds */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b67d93a609a2d..da995c5fb97d9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -508,16 +508,6 @@ EXPORT_SYMBOL(profile_pc);
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  */
 #ifdef CONFIG_PPC64
-static inline unsigned long test_irq_work_pending(void)
-{
-	unsigned long x;
-
-	asm volatile("lbz %0,%1(13)"
-		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, irq_work_pending)));
-	return x;
-}
-
 static inline void set_irq_work_pending_flag(void)
 {
 	asm volatile("stb %0,%1(13)" : :
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 466d62b35b6a5..d82ff7fe8ac71 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3708,6 +3708,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	if (!(vcpu->arch.ctrl & 1))
 		mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
 
+	/*
+	 * When setting DEC, we must always deal with irq_work_raise via NMI vs
+	 * setting DEC. The problem occurs right as we switch into guest mode
+	 * if a NMI hits and sets pending work and sets DEC, then that will
+	 * apply to the guest and not bring us back to the host.
+	 *
+	 * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
+	 * example) and set HDEC to 1? That wouldn't solve the nested hv
+	 * case which needs to abort the hcall or zero the time limit.
+	 *
+	 * XXX: Another day's problem.
+	 */
 	mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
 
 	if (kvmhv_on_pseries()) {
@@ -3822,6 +3834,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc->in_guest = 0;
 
 	mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+	/* We may have raced with new irq work */
+	if (test_irq_work_pending())
+		set_dec(1);
 	mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
 
 	kvmhv_load_host_pmu();
-- 
GitLab


From 09512c29167bd3792820caf83bcca4d4e5ac2266 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:31 +1000
Subject: [PATCH 2621/3804] KVM: PPC: Book3S HV P9: Move xive vcpu context
 management into kvmhv_p9_guest_entry

Move the xive management up so the low level register switching can be
pushed further down in a later patch. XIVE MMIO CI operations can run in
higher level code with machine checks, tracing, etc., available.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-12-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d82ff7fe8ac71..bb326cfcf173d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3558,15 +3558,11 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	 */
 	mtspr(SPRN_HDEC, hdec);
 
-	kvmppc_xive_push_vcpu(vcpu);
-
 	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
 	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
 
 	trap = __kvmhv_vcpu_entry_p9(vcpu);
 
-	kvmppc_xive_pull_vcpu(vcpu);
-
 	/* Advance host PURR/SPURR by the amount used by guest */
 	purr = mfspr(SPRN_PURR);
 	spurr = mfspr(SPRN_SPURR);
@@ -3764,7 +3760,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 			trap = 0;
 		}
 	} else {
+		kvmppc_xive_push_vcpu(vcpu);
 		trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+		kvmppc_xive_pull_vcpu(vcpu);
+
 	}
 
 	vcpu->arch.slb_max = 0;
-- 
GitLab


From 48013cbc504e064d2318f24482cfbe3c53e0a812 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:32 +1000
Subject: [PATCH 2622/3804] KVM: PPC: Book3S HV P9: Move radix MMU switching
 instructions together

Switching the MMU from radix<->radix mode is tricky particularly as the
MMU can remain enabled and requires a certain sequence of SPR updates.
Move these together into their own functions.

This also includes the radix TLB check / flush because it's tied in to
MMU switching due to tlbiel getting LPID from LPIDR.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-13-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c | 62 ++++++++++++++++++++++++------------
 1 file changed, 41 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index bb326cfcf173d..3ec1dc1bad160 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3478,12 +3478,49 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	trace_kvmppc_run_core(vc, 1);
 }
 
+static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+	u32 lpid;
+
+	lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+
+	/*
+	 * All the isync()s are overkill but trivially follow the ISA
+	 * requirements. Some can likely be replaced with justification
+	 * comment for why they are not needed.
+	 */
+	isync();
+	mtspr(SPRN_LPID, lpid);
+	isync();
+	mtspr(SPRN_LPCR, lpcr);
+	isync();
+	mtspr(SPRN_PID, vcpu->arch.pid);
+	isync();
+
+	/* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
+	kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+}
+
+static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
+{
+	isync();
+	mtspr(SPRN_PID, pid);
+	isync();
+	mtspr(SPRN_LPID, kvm->arch.host_lpid);
+	isync();
+	mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
+	isync();
+}
+
 /*
  * Load up hypervisor-mode registers on P9.
  */
 static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 				     unsigned long lpcr)
 {
+	struct kvm *kvm = vcpu->kvm;
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	s64 hdec;
 	u64 tb, purr, spurr;
@@ -3535,7 +3572,6 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	}
 	mtspr(SPRN_CIABR, vcpu->arch.ciabr);
 	mtspr(SPRN_IC, vcpu->arch.ic);
-	mtspr(SPRN_PID, vcpu->arch.pid);
 
 	mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
 	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
@@ -3549,8 +3585,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	mtspr(SPRN_AMOR, ~0UL);
 
-	mtspr(SPRN_LPCR, lpcr);
-	isync();
+	switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
 
 	/*
 	 * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
@@ -3593,7 +3628,6 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 		mtspr(SPRN_DAWR1, host_dawr1);
 		mtspr(SPRN_DAWRX1, host_dawrx1);
 	}
-	mtspr(SPRN_PID, host_pidr);
 
 	/*
 	 * Since this is radix, do a eieio; tlbsync; ptesync sequence in
@@ -3608,9 +3642,6 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	if (cpu_has_feature(CPU_FTR_ARCH_31))
 		asm volatile(PPC_CP_ABORT);
 
-	mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid);	/* restore host LPID */
-	isync();
-
 	vc->dpdes = mfspr(SPRN_DPDES);
 	vc->vtb = mfspr(SPRN_VTB);
 	mtspr(SPRN_DPDES, 0);
@@ -3627,7 +3658,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	}
 
 	mtspr(SPRN_HDEC, 0x7fffffff);
-	mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
+
+	switch_mmu_to_host_radix(kvm, host_pidr);
 
 	return trap;
 }
@@ -4181,7 +4213,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 {
 	struct kvm_run *run = vcpu->run;
 	int trap, r, pcpu;
-	int srcu_idx, lpid;
+	int srcu_idx;
 	struct kvmppc_vcore *vc;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_nested_guest *nested = vcpu->arch.nested;
@@ -4255,13 +4287,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc->vcore_state = VCORE_RUNNING;
 	trace_kvmppc_run_core(vc, 0);
 
-	if (cpu_has_feature(CPU_FTR_HVMODE)) {
-		lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
-		mtspr(SPRN_LPID, lpid);
-		isync();
-		kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
-	}
-
 	guest_enter_irqoff();
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
@@ -4280,11 +4305,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
-	if (cpu_has_feature(CPU_FTR_HVMODE)) {
-		mtspr(SPRN_LPID, kvm->arch.host_lpid);
-		isync();
-	}
-
 	set_irq_happened(trap);
 
 	kvmppc_set_host_core(pcpu);
-- 
GitLab


From 9dc2babc185e0a24fbb48098daafd552cac157fa Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:33 +1000
Subject: [PATCH 2623/3804] KVM: PPC: Book3S HV P9: Stop handling hcalls in
 real-mode in the P9 path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the interest of minimising the amount of code that is run in
"real-mode", don't handle hcalls in real mode in the P9 path. This
requires some new handlers for H_CEDE and xics-on-xive to be added
before xive is pulled or cede logic is checked.

This introduces a change in radix guest behaviour where radix guests
that execute 'sc 1' in userspace now get a privilege fault whereas
previously the 'sc 1' would be reflected as a syscall interrupt to the
guest kernel. That reflection is only required for hash guests that run
PR KVM.

Background:

In POWER8 and earlier processors, it is very expensive to exit from the
HV real mode context of a guest hypervisor interrupt, and switch to host
virtual mode. On those processors, guest->HV interrupts reach the
hypervisor with the MMU off because the MMU is loaded with guest context
(LPCR, SDR1, SLB), and the other threads in the sub-core need to be
pulled out of the guest too. Then the primary must save off guest state,
invalidate SLB and ERAT, and load up host state before the MMU can be
enabled to run in host virtual mode (~= regular Linux mode).

Hash guests also require a lot of hcalls to run due to the nature of the
MMU architecture and paravirtualisation design. The XICS interrupt
controller requires hcalls to run.

So KVM traditionally tries hard to avoid the full exit, by handling
hcalls and other interrupts in real mode as much as possible.

By contrast, POWER9 has independent MMU context per-thread, and in radix
mode the hypervisor is in host virtual memory mode when the HV interrupt
is taken. Radix guests do not require significant hcalls to manage their
translations, and xive guests don't need hcalls to handle interrupts. So
it's much less important for performance to handle hcalls in real mode on
POWER9.

One caveat is that the TCE hcalls are performance critical, real-mode
variants introduced for POWER8 in order to achieve 10GbE performance.
Real mode TCE hcalls were found to be less important on POWER9, which
was able to drive 40GBe networking without them (using the virt mode
hcalls) but performance is still important. These hcalls will benefit
from subsequent guest entry/exit optimisation including possibly a
faster "partial exit" that does not entirely switch to host context to
handle the hcall.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-14-npiggin@gmail.com
---
 arch/powerpc/include/asm/kvm_ppc.h      |  6 ++
 arch/powerpc/kvm/book3s.c               |  6 ++
 arch/powerpc/kvm/book3s_hv.c            | 79 +++++++++++++++++++++----
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  5 ++
 arch/powerpc/kvm/book3s_xive.c          | 64 ++++++++++++++++++++
 5 files changed, 149 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 8c10c34271666..cb9e3c85c6052 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -129,6 +129,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
+extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
 extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
@@ -606,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
 extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
 extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
 extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
 extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
@@ -638,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
 static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 	{ return 0; }
+static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+	{ return 0; }
 #endif
 
 #ifdef CONFIG_KVM_XIVE
@@ -672,6 +676,7 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
 			       int level, bool line_status);
 extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
 extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu);
 
 static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
 {
@@ -713,6 +718,7 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
 				      int level, bool line_status) { return -ENODEV; }
 static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
 static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { }
 
 static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
 	{ return 0; }
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 2b691f4d1f26c..d69560d5bf16c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -171,6 +171,12 @@ void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
 }
 EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
 
+void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
+{
+	kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0);
+}
+EXPORT_SYMBOL(kvmppc_core_queue_syscall);
+
 void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
 {
 	/* might as well deliver this straight away */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3ec1dc1bad160..8df02be9be72f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -899,6 +899,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
 	 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
 	 * have useful work to do and should not confer) so we don't
 	 * recheck that here.
+	 *
+	 * In the case of the P9 single vcpu per vcore case, the real
+	 * mode handler is not called but no other threads are in the
+	 * source vcore.
 	 */
 
 	spin_lock(&vcore->lock);
@@ -1142,12 +1146,13 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 }
 
 /*
- * Handle H_CEDE in the nested virtualization case where we haven't
- * called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
+ * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
+ * handlers in book3s_hv_rmhandlers.S.
+ *
  * This has to be done early, not in kvmppc_pseries_do_hcall(), so
  * that the cede logic in kvmppc_run_single_vcpu() works properly.
  */
-static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.shregs.msr |= MSR_EE;
 	vcpu->arch.ceded = 1;
@@ -1400,13 +1405,29 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 	}
 	case BOOK3S_INTERRUPT_SYSCALL:
 	{
-		/* hcall - punt to userspace */
 		int i;
 
-		/* hypercall with MSR_PR has already been handled in rmode,
-		 * and never reaches here.
-		 */
+		if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
+			/*
+			 * Guest userspace executed sc 1. This can only be
+			 * reached by the P9 path because the old path
+			 * handles this case in realmode hcall handlers.
+			 *
+			 * Radix guests can not run PR KVM or nested HV hash
+			 * guests which might run PR KVM, so this is always
+			 * a privilege fault. Send a program check to guest
+			 * kernel.
+			 */
+			kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+			r = RESUME_GUEST;
+			break;
+		}
 
+		/*
+		 * hcall - gather args and set exit_reason. This will next be
+		 * handled by kvmppc_pseries_do_hcall which may be able to deal
+		 * with it and resume guest, or may punt to userspace.
+		 */
 		run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
 		for (i = 0; i < 9; ++i)
 			run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -3664,6 +3685,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 	return trap;
 }
 
+static inline bool hcall_is_xics(unsigned long req)
+{
+	return req == H_EOI || req == H_CPPR || req == H_IPI ||
+		req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
+}
+
 /*
  * Virtual-mode guest entry for POWER9 and later when the host and
  * guest are both using the radix MMU.  The LPIDR has already been set.
@@ -3787,15 +3814,36 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		/* H_CEDE has to be handled now, not later */
 		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
 		    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
-			kvmppc_nested_cede(vcpu);
+			kvmppc_cede(vcpu);
 			kvmppc_set_gpr(vcpu, 3, 0);
 			trap = 0;
 		}
 	} else {
 		kvmppc_xive_push_vcpu(vcpu);
 		trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+		    !(vcpu->arch.shregs.msr & MSR_PR)) {
+			unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+			/* H_CEDE has to be handled now, not later */
+			if (req == H_CEDE) {
+				kvmppc_cede(vcpu);
+				kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */
+				kvmppc_set_gpr(vcpu, 3, 0);
+				trap = 0;
+
+			/* XICS hcalls must be handled before xive is pulled */
+			} else if (hcall_is_xics(req)) {
+				int ret;
+
+				ret = kvmppc_xive_xics_hcall(vcpu, req);
+				if (ret != H_TOO_HARD) {
+					kvmppc_set_gpr(vcpu, 3, ret);
+					trap = 0;
+				}
+			}
+		}
 		kvmppc_xive_pull_vcpu(vcpu);
-
 	}
 
 	vcpu->arch.slb_max = 0;
@@ -4461,8 +4509,17 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 		else
 			r = kvmppc_run_vcpu(vcpu);
 
-		if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
-		    !(vcpu->arch.shregs.msr & MSR_PR)) {
+		if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+			if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
+				/*
+				 * These should have been caught reflected
+				 * into the guest by now. Final sanity check:
+				 * don't allow userspace to execute hcalls in
+				 * the hypervisor.
+				 */
+				r = RESUME_GUEST;
+				continue;
+			}
 			trace_kvm_hcall_enter(vcpu);
 			r = kvmppc_pseries_do_hcall(vcpu);
 			trace_kvm_hcall_exit(vcpu, r);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 55d4d5495f5d0..0637126be21ed 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1400,9 +1400,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	mr	r4,r9
 	bge	fast_guest_return
 2:
+	/* If we came in through the P9 short path, no real mode hcalls */
+	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
+	cmpwi	r0, 0
+	bne	no_try_real
 	/* See if this is an hcall we can handle in real mode */
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
+no_try_real:
 
 	/* Hypervisor doorbell - exit only if host IPI flag set */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 741bf1f4387a5..24c07094651a3 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -158,6 +158,40 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
 
+void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
+{
+	void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+
+	if (!esc_vaddr)
+		return;
+
+	/* we are using XIVE with single escalation */
+
+	if (vcpu->arch.xive_esc_on) {
+		/*
+		 * If we still have a pending escalation, abort the cede,
+		 * and we must set PQ to 10 rather than 00 so that we don't
+		 * potentially end up with two entries for the escalation
+		 * interrupt in the XIVE interrupt queue.  In that case
+		 * we also don't want to set xive_esc_on to 1 here in
+		 * case we race with xive_esc_irq().
+		 */
+		vcpu->arch.ceded = 0;
+		/*
+		 * The escalation interrupts are special as we don't EOI them.
+		 * There is no need to use the load-after-store ordering offset
+		 * to set PQ to 10 as we won't use StoreEOI.
+		 */
+		__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
+	} else {
+		vcpu->arch.xive_esc_on = true;
+		mb();
+		__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
+	}
+	mb();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
+
 /*
  * This is a simple trigger for a generic XIVE IRQ. This must
  * only be called for interrupts that support a trigger page
@@ -2106,6 +2140,36 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
 	return 0;
 }
 
+int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+	/* The VM should have configured XICS mode before doing XICS hcalls. */
+	if (!kvmppc_xics_enabled(vcpu))
+		return H_TOO_HARD;
+
+	switch (req) {
+	case H_XIRR:
+		return xive_vm_h_xirr(vcpu);
+	case H_CPPR:
+		return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+	case H_EOI:
+		return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+	case H_IPI:
+		return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+					  kvmppc_get_gpr(vcpu, 5));
+	case H_IPOLL:
+		return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+	case H_XIRR_X:
+		xive_vm_h_xirr(vcpu);
+		kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
+		return H_SUCCESS;
+	}
+
+	return H_UNSUPPORTED;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
+
 int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
-- 
GitLab


From 89d35b23910158a9add33a206e973f4227906d3c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:34 +1000
Subject: [PATCH 2624/3804] KVM: PPC: Book3S HV P9: Implement the rest of the
 P9 path in C

Almost all logic is moved to C, by introducing a new in_guest mode for
the P9 path that branches very early in the KVM interrupt handler to P9
exit code.

The main P9 entry and exit assembly is now only about 160 lines of low
level stack setup and register save/restore, plus a bad-interrupt
handler.

There are two motivations for this, the first is just make the code more
maintainable being in C. The second is to reduce the amount of code
running in a special KVM mode, "realmode". In quotes because with radix
it is no longer necessarily real-mode in the MMU, but it still has to be
treated specially because it may be in real-mode, and has various
important registers like PID, DEC, TB, etc set to guest. This is hostile
to the rest of Linux and can't use arbitrary kernel functionality or be
instrumented well.

This initial patch is a reasonably faithful conversion of the asm code,
but it does lack any loop to return quickly back into the guest without
switching out of realmode in the case of unimportant or easily handled
interrupts. As explained in previous changes, handling HV interrupts
very quickly in this low level realmode is not so important for P9
performance, and are important to avoid for security, observability,
debugability reasons.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-15-npiggin@gmail.com
---
 arch/powerpc/include/asm/asm-prototypes.h |   3 +-
 arch/powerpc/include/asm/kvm_asm.h        |   1 +
 arch/powerpc/include/asm/kvm_book3s_64.h  |   8 +
 arch/powerpc/include/asm/kvm_host.h       |   7 +-
 arch/powerpc/kernel/security.c            |   5 +-
 arch/powerpc/kvm/Makefile                 |   1 +
 arch/powerpc/kvm/book3s_64_entry.S        | 254 ++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c              |   9 +-
 arch/powerpc/kvm/book3s_hv_p9_entry.c     | 207 ++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 125 +----------
 10 files changed, 496 insertions(+), 124 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_p9_entry.c

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 1c7b75834e045..02ee6f5ac9fe9 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -120,6 +120,7 @@ extern s32 patch__call_flush_branch_caches3;
 extern s32 patch__flush_count_cache_return;
 extern s32 patch__flush_link_stack_return;
 extern s32 patch__call_kvm_flush_link_stack;
+extern s32 patch__call_kvm_flush_link_stack_p9;
 extern s32 patch__memset_nocache, patch__memcpy_nocache;
 
 extern long flush_branch_caches;
@@ -140,7 +141,7 @@ void kvmhv_load_host_pmu(void);
 void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
 void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
 
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
 
 long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
 long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index a3633560493be..43b1788e1f930 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -147,6 +147,7 @@
 #define KVM_GUEST_MODE_SKIP	2
 #define KVM_GUEST_MODE_GUEST_HV	3
 #define KVM_GUEST_MODE_HOST_HV	4
+#define KVM_GUEST_MODE_HV_FAST	5 /* ISA >= v3.0 host+guest radix, indep thr */
 
 #define KVM_INST_FETCH_FAILED	-1
 
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9bb9bb370b53e..c214bcffb441c 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -153,9 +153,17 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
 	return radix;
 }
 
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
 #endif
 
+/*
+ * Invalid HDSISR value which is used to indicate when HW has not set the reg.
+ * Used to work around an errata.
+ */
+#define HDSISR_CANARY	0x7fff
+
 /*
  * We use a lock bit in HPTE dword 0 to synchronize updates and
  * accesses to each HPTE, and another bit to indicate non-present
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1e83359f286b9..69add9d662df5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -683,7 +683,12 @@ struct kvm_vcpu_arch {
 	ulong fault_dar;
 	u32 fault_dsisr;
 	unsigned long intr_msr;
-	ulong fault_gpa;	/* guest real address of page fault (POWER9) */
+	/*
+	 * POWER9 and later: fault_gpa contains the guest real address of page
+	 * fault for a radix guest, or segment descriptor (equivalent to result
+	 * from slbmfev of SLB entry that translated the EA) for hash guests.
+	 */
+	ulong fault_gpa;
 #endif
 
 #ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 0fdfcdd9d880c..c17d1c9362b50 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -432,16 +432,19 @@ device_initcall(stf_barrier_debugfs_init);
 
 static void update_branch_cache_flush(void)
 {
-	u32 *site;
+	u32 *site, __maybe_unused *site2;
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	site = &patch__call_kvm_flush_link_stack;
+	site2 = &patch__call_kvm_flush_link_stack_p9;
 	// This controls the branch from guest_exit_cont to kvm_flush_link_stack
 	if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
 		patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+		patch_instruction_site(site2, ppc_inst(PPC_INST_NOP));
 	} else {
 		// Could use HW flush, but that could also flush count cache
 		patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+		patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
 	}
 #endif
 
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index bbc071a8a976c..ab241317481c0 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -87,6 +87,7 @@ kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
 	book3s_hv_hmi.o \
+	book3s_hv_p9_entry.o \
 	book3s_hv_rmhandlers.o \
 	book3s_hv_rm_mmu.o \
 	book3s_hv_ras.o \
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index a01046202eef4..177e8fad5c8dd 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -1,11 +1,16 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
+#include <asm/code-patching-asm.h>
 #include <asm/exception-64s.h>
+#include <asm/export.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_book3s_asm.h>
+#include <asm/mmu.h>
 #include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
 #include <asm/reg.h>
+#include <asm/ultravisor-api.h>
 
 /*
  * These are branched to from interrupt handlers in exception-64s.S which set
@@ -29,6 +34,11 @@
 .global	kvmppc_hcall
 .balign IFETCH_ALIGN_BYTES
 kvmppc_hcall:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	lbz	r10,HSTATE_IN_GUEST(r13)
+	cmpwi	r10,KVM_GUEST_MODE_HV_FAST
+	beq	kvmppc_p9_exit_hcall
+#endif
 	ld	r10,PACA_EXGEN+EX_R13(r13)
 	SET_SCRATCH0(r10)
 	li	r10,0xc00
@@ -55,6 +65,13 @@ kvmppc_hcall:
 .global	kvmppc_interrupt
 .balign IFETCH_ALIGN_BYTES
 kvmppc_interrupt:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	std	r10,HSTATE_SCRATCH0(r13)
+	lbz	r10,HSTATE_IN_GUEST(r13)
+	cmpwi	r10,KVM_GUEST_MODE_HV_FAST
+	beq	kvmppc_p9_exit_interrupt
+	ld	r10,HSTATE_SCRATCH0(r13)
+#endif
 	li	r11,PACA_EXGEN
 	cmpdi	r10,0x200
 	bgt+	.Lgot_save_area
@@ -156,3 +173,240 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	GET_SCRATCH0(r13)
 	HRFI_TO_KERNEL
 #endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+/* Stack frame offsets for kvmppc_p9_enter_guest */
+#define SFS			(144 + STACK_FRAME_MIN_SIZE)
+#define STACK_SLOT_NVGPRS	(SFS - 144)	/* 18 gprs */
+
+/*
+ * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
+ *
+ * Enter the guest on a ISAv3.0 or later system where we have exactly
+ * one vcpu per vcore, and both the host and guest are radix, and threads
+ * are set to "indepdent mode".
+ */
+.balign	IFETCH_ALIGN_BYTES
+_GLOBAL(kvmppc_p9_enter_guest)
+EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
+	mflr	r0
+	std	r0,PPC_LR_STKOFF(r1)
+	stdu	r1,-SFS(r1)
+
+	std	r1,HSTATE_HOST_R1(r13)
+
+	mfcr	r4
+	stw	r4,SFS+8(r1)
+
+	reg = 14
+	.rept	18
+	std	reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+	reg = reg + 1
+	.endr
+
+	ld	r4,VCPU_LR(r3)
+	mtlr	r4
+	ld	r4,VCPU_CTR(r3)
+	mtctr	r4
+	ld	r4,VCPU_XER(r3)
+	mtspr	SPRN_XER,r4
+
+	ld	r1,VCPU_CR(r3)
+
+BEGIN_FTR_SECTION
+	ld	r4,VCPU_CFAR(r3)
+	mtspr	SPRN_CFAR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+	ld	r4,VCPU_PPR(r3)
+	mtspr	SPRN_PPR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	reg = 4
+	.rept	28
+	ld	reg,__VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+
+	ld	r4,VCPU_KVM(r3)
+	lbz	r4,KVM_SECURE_GUEST(r4)
+	cmpdi	r4,0
+	ld	r4,VCPU_GPR(R4)(r3)
+	bne	.Lret_to_ultra
+
+	mtcr	r1
+
+	ld	r0,VCPU_GPR(R0)(r3)
+	ld	r1,VCPU_GPR(R1)(r3)
+	ld	r2,VCPU_GPR(R2)(r3)
+	ld	r3,VCPU_GPR(R3)(r3)
+
+	HRFI_TO_GUEST
+	b	.
+
+	/*
+	 * Use UV_RETURN ultracall to return control back to the Ultravisor
+	 * after processing an hypercall or interrupt that was forwarded
+	 * (a.k.a. reflected) to the Hypervisor.
+	 *
+	 * All registers have already been reloaded except the ucall requires:
+	 *   R0 = hcall result
+	 *   R2 = SRR1, so UV can detect a synthesized interrupt (if any)
+	 *   R3 = UV_RETURN
+	 */
+.Lret_to_ultra:
+	mtcr	r1
+	ld	r1,VCPU_GPR(R1)(r3)
+
+	ld	r0,VCPU_GPR(R3)(r3)
+	mfspr	r2,SPRN_SRR1
+	LOAD_REG_IMMEDIATE(r3, UV_RETURN)
+	sc	2
+
+/*
+ * kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
+ * above if the interrupt was taken for a guest that was entered via
+ * kvmppc_p9_enter_guest().
+ *
+ * The exit code recovers the host stack and vcpu pointer, saves all guest GPRs
+ * and CR, LR, XER as well as guest MSR and NIA into the VCPU, then re-
+ * establishes the host stack and registers to return from the
+ * kvmppc_p9_enter_guest() function, which saves CTR and other guest registers
+ * (SPRs and FP, VEC, etc).
+ */
+.balign	IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_hcall:
+	mfspr	r11,SPRN_SRR0
+	mfspr	r12,SPRN_SRR1
+	li	r10,0xc00
+	std	r10,HSTATE_SCRATCH0(r13)
+
+.balign	IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_interrupt:
+	/*
+	 * If set to KVM_GUEST_MODE_HV_FAST but we're still in the
+	 * hypervisor, that means we can't return from the entry stack.
+	 */
+	rldicl. r10,r12,64-MSR_HV_LG,63
+	bne-	kvmppc_p9_bad_interrupt
+
+	std     r1,HSTATE_SCRATCH1(r13)
+	std     r3,HSTATE_SCRATCH2(r13)
+	ld	r1,HSTATE_HOST_R1(r13)
+	ld	r3,HSTATE_KVM_VCPU(r13)
+
+	std	r9,VCPU_CR(r3)
+
+1:
+	std	r11,VCPU_PC(r3)
+	std	r12,VCPU_MSR(r3)
+
+	reg = 14
+	.rept	18
+	std	reg,__VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+
+	/* r1, r3, r9-r13 are saved to vcpu by C code */
+	std	r0,VCPU_GPR(R0)(r3)
+	std	r2,VCPU_GPR(R2)(r3)
+	reg = 4
+	.rept	5
+	std	reg,__VCPU_GPR(reg)(r3)
+	reg = reg + 1
+	.endr
+
+	ld	r2,PACATOC(r13)
+
+	mflr	r4
+	std	r4,VCPU_LR(r3)
+	mfspr	r4,SPRN_XER
+	std	r4,VCPU_XER(r3)
+
+	reg = 14
+	.rept	18
+	ld	reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+	reg = reg + 1
+	.endr
+
+	lwz	r4,SFS+8(r1)
+	mtcr	r4
+
+	/*
+	 * Flush the link stack here, before executing the first blr on the
+	 * way out of the guest.
+	 *
+	 * The link stack won't match coming out of the guest anyway so the
+	 * only cost is the flush itself. The call clobbers r0.
+	 */
+1:	nop
+	patch_site 1b patch__call_kvm_flush_link_stack_p9
+
+	addi	r1,r1,SFS
+	ld	r0,PPC_LR_STKOFF(r1)
+	mtlr	r0
+	blr
+
+/*
+ * Took an interrupt somewhere right before HRFID to guest, so registers are
+ * in a bad way. Return things hopefully enough to run host virtual code and
+ * run the Linux interrupt handler (SRESET or MCE) to print something useful.
+ *
+ * We could be really clever and save all host registers in known locations
+ * before setting HSTATE_IN_GUEST, then restoring them all here, and setting
+ * return address to a fixup that sets them up again. But that's a lot of
+ * effort for a small bit of code. Lots of other things to do first.
+ */
+kvmppc_p9_bad_interrupt:
+	/*
+	 * Clean up guest registers to give host a chance to run.
+	 */
+	li	r10,0
+	mtspr	SPRN_AMR,r10
+	mtspr	SPRN_IAMR,r10
+	mtspr	SPRN_CIABR,r10
+	mtspr	SPRN_DAWRX0,r10
+BEGIN_FTR_SECTION
+	mtspr	SPRN_DAWRX1,r10
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
+	mtspr	SPRN_PID,r10
+
+	/*
+	 * Switch to host MMU mode
+	 */
+	ld	r10, HSTATE_KVM_VCPU(r13)
+	ld	r10, VCPU_KVM(r10)
+	lwz	r10, KVM_HOST_LPID(r10)
+	mtspr	SPRN_LPID,r10
+
+	ld	r10, HSTATE_KVM_VCPU(r13)
+	ld	r10, VCPU_KVM(r10)
+	ld	r10, KVM_HOST_LPCR(r10)
+	mtspr	SPRN_LPCR,r10
+
+	/*
+	 * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
+	 * MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
+	 */
+	li	r10,KVM_GUEST_MODE_NONE
+	stb	r10,HSTATE_IN_GUEST(r13)
+	li	r10,MSR_RI
+	andc	r12,r12,r10
+
+	/*
+	 * Go back to interrupt handler. MCE and SRESET have their specific
+	 * PACA save area so they should be used directly. They set up their
+	 * own stack. The other handlers all use EXGEN. They will use the
+	 * guest r1 if it looks like a kernel stack, so just load the
+	 * emergency stack and go to program check for all other interrupts.
+	 */
+	ld	r10,HSTATE_SCRATCH0(r13)
+	cmpwi	r10,BOOK3S_INTERRUPT_MACHINE_CHECK
+	beq	machine_check_common
+
+	cmpwi	r10,BOOK3S_INTERRUPT_SYSTEM_RESET
+	beq	system_reset_common
+
+	b	.
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8df02be9be72f..6652799274373 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1445,6 +1445,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 	 */
 	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
 		r = RESUME_PAGE_FAULT;
+		if (vcpu->arch.fault_dsisr == HDSISR_CANARY)
+			r = RESUME_GUEST; /* Just retry if it's the canary */
 		break;
 	case BOOK3S_INTERRUPT_H_INST_STORAGE:
 		vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
@@ -3708,6 +3710,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	u64 tb;
 	int trap, save_pmu;
 
+	WARN_ON_ONCE(vcpu->arch.ceded);
+
 	dec = mfspr(SPRN_DEC);
 	tb = mftb();
 	if (dec < 0)
@@ -3716,8 +3720,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	if (local_paca->kvm_hstate.dec_expires < time_limit)
 		time_limit = local_paca->kvm_hstate.dec_expires;
 
-	vcpu->arch.ceded = 0;
-
 	kvmhv_save_host_pmu();		/* saves it to PACA kvm_hstate */
 
 	kvmppc_subcore_enter_guest();
@@ -3844,9 +3846,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 			}
 		}
 		kvmppc_xive_pull_vcpu(vcpu);
+
+		vcpu->arch.slb_max = 0;
 	}
 
-	vcpu->arch.slb_max = 0;
 	dec = mfspr(SPRN_DEC);
 	if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
 		dec = (s32) dec;
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
new file mode 100644
index 0000000000000..9db0e031a4434
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <asm/asm-prototypes.h>
+#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	u64 tb = mftb() - vc->tb_offset_applied;
+
+	vcpu->arch.cur_activity = next;
+	vcpu->arch.cur_tb_start = tb;
+}
+
+static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	struct kvmhv_tb_accumulator *curr;
+	u64 tb = mftb() - vc->tb_offset_applied;
+	u64 prev_tb;
+	u64 delta;
+	u64 seq;
+
+	curr = vcpu->arch.cur_activity;
+	vcpu->arch.cur_activity = next;
+	prev_tb = vcpu->arch.cur_tb_start;
+	vcpu->arch.cur_tb_start = tb;
+
+	if (!curr)
+		return;
+
+	delta = tb - prev_tb;
+
+	seq = curr->seqcount;
+	curr->seqcount = seq + 1;
+	smp_wmb();
+	curr->tb_total += delta;
+	if (seq == 0 || delta < curr->tb_min)
+		curr->tb_min = delta;
+	if (delta > curr->tb_max)
+		curr->tb_max = delta;
+	smp_wmb();
+	curr->seqcount = seq + 2;
+}
+
+#define start_timing(vcpu, next) __start_timing(vcpu, next)
+#define end_timing(vcpu) __start_timing(vcpu, NULL)
+#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
+#else
+#define start_timing(vcpu, next) do {} while (0)
+#define end_timing(vcpu) do {} while (0)
+#define accumulate_time(vcpu, next) do {} while (0)
+#endif
+
+static inline void mtslb(u64 slbee, u64 slbev)
+{
+	asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
+}
+
+static inline void clear_slb_entry(unsigned int idx)
+{
+	mtslb(idx, 0);
+}
+
+/*
+ * Malicious or buggy radix guests may have inserted SLB entries
+ * (only 0..3 because radix always runs with UPRT=1), so these must
+ * be cleared here to avoid side-channels. slbmte is used rather
+ * than slbia, as it won't clear cached translations.
+ */
+static void radix_clear_slb(void)
+{
+	int i;
+
+	for (i = 0; i < 4; i++)
+		clear_slb_entry(i);
+}
+
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu)
+{
+	u64 *exsave;
+	unsigned long msr = mfmsr();
+	int trap;
+
+	start_timing(vcpu, &vcpu->arch.rm_entry);
+
+	vcpu->arch.ceded = 0;
+
+	WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+	WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
+	mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+	mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
+
+	/*
+	 * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
+	 * Interrupt (HDSI) the HDSISR is not be updated at all.
+	 *
+	 * To work around this we put a canary value into the HDSISR before
+	 * returning to a guest and then check for this canary when we take a
+	 * HDSI. If we find the canary on a HDSI, we know the hardware didn't
+	 * update the HDSISR. In this case we return to the guest to retake the
+	 * HDSI which should correctly update the HDSISR the second time HDSI
+	 * entry.
+	 *
+	 * Just do this on all p9 processors for now.
+	 */
+	mtspr(SPRN_HDSISR, HDSISR_CANARY);
+
+	accumulate_time(vcpu, &vcpu->arch.guest_time);
+
+	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_FAST;
+	kvmppc_p9_enter_guest(vcpu);
+	// Radix host and guest means host never runs with guest MMU state
+	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
+
+	accumulate_time(vcpu, &vcpu->arch.rm_intr);
+
+	/* XXX: Could get these from r11/12 and paca exsave instead */
+	vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
+	vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
+	vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+	vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+	/* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
+	trap = local_paca->kvm_hstate.scratch0 & ~0x2;
+	if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+		exsave = local_paca->exgen;
+	} else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
+		exsave = local_paca->exnmi;
+	} else { /* trap == 0x200 */
+		exsave = local_paca->exmc;
+	}
+
+	vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
+	vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+	vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
+	vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
+	vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
+	vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
+	vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
+	vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
+	vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
+	vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
+
+	vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+	if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+		vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+		vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+		kvmppc_realmode_machine_check(vcpu);
+
+	} else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
+		kvmppc_realmode_hmi_handler();
+
+	} else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
+		vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+	} else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
+		vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+		vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+		vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+	} else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+		vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+	} else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
+		vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * Softpatch interrupt for transactional memory emulation cases
+	 * on POWER9 DD2.2.  This is early in the guest exit path - we
+	 * haven't saved registers or done a treclaim yet.
+	 */
+	} else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
+		vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+		/*
+		 * The cases we want to handle here are those where the guest
+		 * is in real suspend mode and is trying to transition to
+		 * transactional mode.
+		 */
+		if (local_paca->kvm_hstate.fake_suspend &&
+				(vcpu->arch.shregs.msr & MSR_TS_S)) {
+			if (kvmhv_p9_tm_emulation_early(vcpu)) {
+				/* Prevent it being handled again. */
+				trap = 0;
+			}
+		}
+#endif
+	}
+
+	radix_clear_slb();
+
+	__mtmsrd(msr, 0);
+
+	accumulate_time(vcpu, &vcpu->arch.rm_exit);
+
+	end_timing(vcpu);
+
+	return trap;
+}
+EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0637126be21ed..d742233dec55c 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -44,9 +44,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define NAPPING_UNSPLIT	3
 
 /* Stack frame offsets for kvmppc_hv_entry */
-#define SFS			208
+#define SFS			160
 #define STACK_SLOT_TRAP		(SFS-4)
-#define STACK_SLOT_SHORT_PATH	(SFS-8)
 #define STACK_SLOT_TID		(SFS-16)
 #define STACK_SLOT_PSSCR	(SFS-24)
 #define STACK_SLOT_PID		(SFS-32)
@@ -60,8 +59,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define STACK_SLOT_DAWR1	(SFS-96)
 #define STACK_SLOT_DAWRX1	(SFS-104)
 #define STACK_SLOT_FSCR		(SFS-112)
-/* the following is used by the P9 short path */
-#define STACK_SLOT_NVGPRS	(SFS-152)	/* 18 gprs */
 
 /*
  * Call kvmppc_hv_entry in real mode.
@@ -1011,9 +1008,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
-	li	r0, 0
-	stw	r0, STACK_SLOT_SHORT_PATH(r1)
-
 deliver_guest_interrupt:	/* r4 = vcpu, r13 = paca */
 	/* Check if we can deliver an external or decrementer interrupt now */
 	ld	r0, VCPU_PENDING_EXC(r4)
@@ -1033,7 +1027,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	mtspr	SPRN_SRR0, r6
 	mtspr	SPRN_SRR1, r7
 
-fast_guest_entry_c:
 	ld	r10, VCPU_PC(r4)
 	ld	r11, VCPU_MSR(r4)
 	/* r11 = vcpu->arch.msr & ~MSR_HV */
@@ -1138,97 +1131,6 @@ ret_to_ultra:
 	ld	r4, VCPU_GPR(R4)(r4)
 	sc	2
 
-/*
- * Enter the guest on a P9 or later system where we have exactly
- * one vcpu per vcore and we don't need to go to real mode
- * (which implies that host and guest are both using radix MMU mode).
- * r3 = vcpu pointer
- * Most SPRs and all the VSRs have been loaded already.
- */
-_GLOBAL(__kvmhv_vcpu_entry_p9)
-EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
-	mflr	r0
-	std	r0, PPC_LR_STKOFF(r1)
-	stdu	r1, -SFS(r1)
-
-	li	r0, 1
-	stw	r0, STACK_SLOT_SHORT_PATH(r1)
-
-	std	r3, HSTATE_KVM_VCPU(r13)
-	mfcr	r4
-	stw	r4, SFS+8(r1)
-
-	std	r1, HSTATE_HOST_R1(r13)
-
-	reg = 14
-	.rept	18
-	std	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
-	reg = reg + 1
-	.endr
-
-	reg = 14
-	.rept	18
-	ld	reg, __VCPU_GPR(reg)(r3)
-	reg = reg + 1
-	.endr
-
-	mfmsr	r10
-	std	r10, HSTATE_HOST_MSR(r13)
-
-	mr	r4, r3
-	b	fast_guest_entry_c
-guest_exit_short_path:
-	/*
-	 * Malicious or buggy radix guests may have inserted SLB entries
-	 * (only 0..3 because radix always runs with UPRT=1), so these must
-	 * be cleared here to avoid side-channels. slbmte is used rather
-	 * than slbia, as it won't clear cached translations.
-	 */
-	li	r0,0
-	slbmte	r0,r0
-	li	r4,1
-	slbmte	r0,r4
-	li	r4,2
-	slbmte	r0,r4
-	li	r4,3
-	slbmte	r0,r4
-
-	li	r0, KVM_GUEST_MODE_NONE
-	stb	r0, HSTATE_IN_GUEST(r13)
-
-	reg = 14
-	.rept	18
-	std	reg, __VCPU_GPR(reg)(r9)
-	reg = reg + 1
-	.endr
-
-	reg = 14
-	.rept	18
-	ld	reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
-	reg = reg + 1
-	.endr
-
-	lwz	r4, SFS+8(r1)
-	mtcr	r4
-
-	mr	r3, r12		/* trap number */
-
-	addi	r1, r1, SFS
-	ld	r0, PPC_LR_STKOFF(r1)
-	mtlr	r0
-
-	/* If we are in real mode, do a rfid to get back to the caller */
-	mfmsr	r4
-	andi.	r5, r4, MSR_IR
-	bnelr
-	rldicl	r5, r4, 64 - MSR_TS_S_LG, 62	/* extract TS field */
-	mtspr	SPRN_SRR0, r0
-	ld	r10, HSTATE_HOST_MSR(r13)
-	rldimi	r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
-	mtspr	SPRN_SRR1, r10
-	RFI_TO_KERNEL
-	b	.
-
 secondary_too_late:
 	li	r12, 0
 	stw	r12, STACK_SLOT_TRAP(r1)
@@ -1400,14 +1302,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	mr	r4,r9
 	bge	fast_guest_return
 2:
-	/* If we came in through the P9 short path, no real mode hcalls */
-	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
-	cmpwi	r0, 0
-	bne	no_try_real
 	/* See if this is an hcall we can handle in real mode */
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
-no_try_real:
 
 	/* Hypervisor doorbell - exit only if host IPI flag set */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
@@ -1450,11 +1347,6 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	bl	kvmhv_accumulate_time
 #endif
 #ifdef CONFIG_KVM_XICS
-	/* If we came in through the P9 short path, xive pull is done in C */
-	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
-	cmpwi	r0, 0
-	bne	1f
-
 	/* We are exiting, pull the VP from the XIVE */
 	lbz	r0, VCPU_XIVE_PUSHED(r9)
 	cmpwi	cr0, r0, 0
@@ -1494,16 +1386,11 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 
 	/*
 	 * Possibly flush the link stack here, before we do a blr in
-	 * guest_exit_short_path.
+	 * kvmhv_switch_to_host.
 	 */
 1:	nop
 	patch_site 1b patch__call_kvm_flush_link_stack
 
-	/* If we came in through the P9 short path, go back out to C now */
-	lwz	r0, STACK_SLOT_SHORT_PATH(r1)
-	cmpwi	r0, 0
-	bne	guest_exit_short_path
-
 	/* For hash guest, read the guest SLB and save it away */
 	ld	r5, VCPU_KVM(r9)
 	lbz	r0, KVM_RADIX(r5)
@@ -1551,8 +1438,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	b	guest_bypass
 
 0:	/*
-	 * Sanitise radix guest SLB, see guest_exit_short_path comment.
-	 * We clear vcpu->arch.slb_max to match earlier behaviour.
+	 * Malicious or buggy radix guests may have inserted SLB entries
+	 * (only 0..3 because radix always runs with UPRT=1), so these must
+	 * be cleared here to avoid side-channels. slbmte is used rather
+	 * than slbia, as it won't clear cached translations.
 	 */
 	li	r0,0
 	stw	r0,VCPU_SLB_MAX(r9)
@@ -3369,7 +3258,7 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_DAWRX1, r0
 END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
 
-	/* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
+	/* Clear hash and radix guest SLB. */
 	slbmte	r0, r0
 	PPC_SLBIA(6)
 
-- 
GitLab


From c00366e2375408e43370cd7981af3354f7c83ed3 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:35 +1000
Subject: [PATCH 2625/3804] KVM: PPC: Book3S HV P9: inline
 kvmhv_load_hv_regs_and_go into __kvmhv_vcpu_entry_p9

Now the initial C implementation is done, inline more HV code to make
rearranging things easier.

And rename __kvmhv_vcpu_entry_p9 to drop the leading underscores as it's
now C, and is now a more complete vcpu entry.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-16-npiggin@gmail.com
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   2 +-
 arch/powerpc/kvm/book3s_hv.c             | 190 +----------------------
 arch/powerpc/kvm/book3s_hv_p9_entry.c    | 177 ++++++++++++++++++++-
 3 files changed, 178 insertions(+), 191 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index c214bcffb441c..eaf3a562bf1ed 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -153,7 +153,7 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
 	return radix;
 }
 
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
 
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
 #endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6652799274373..777ec786ef711 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3501,192 +3501,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	trace_kvmppc_run_core(vc, 1);
 }
 
-static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
-{
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
-	struct kvm_nested_guest *nested = vcpu->arch.nested;
-	u32 lpid;
-
-	lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
-
-	/*
-	 * All the isync()s are overkill but trivially follow the ISA
-	 * requirements. Some can likely be replaced with justification
-	 * comment for why they are not needed.
-	 */
-	isync();
-	mtspr(SPRN_LPID, lpid);
-	isync();
-	mtspr(SPRN_LPCR, lpcr);
-	isync();
-	mtspr(SPRN_PID, vcpu->arch.pid);
-	isync();
-
-	/* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
-	kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
-}
-
-static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
-{
-	isync();
-	mtspr(SPRN_PID, pid);
-	isync();
-	mtspr(SPRN_LPID, kvm->arch.host_lpid);
-	isync();
-	mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
-	isync();
-}
-
-/*
- * Load up hypervisor-mode registers on P9.
- */
-static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
-				     unsigned long lpcr)
-{
-	struct kvm *kvm = vcpu->kvm;
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
-	s64 hdec;
-	u64 tb, purr, spurr;
-	int trap;
-	unsigned long host_hfscr = mfspr(SPRN_HFSCR);
-	unsigned long host_ciabr = mfspr(SPRN_CIABR);
-	unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
-	unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
-	unsigned long host_psscr = mfspr(SPRN_PSSCR);
-	unsigned long host_pidr = mfspr(SPRN_PID);
-	unsigned long host_dawr1 = 0;
-	unsigned long host_dawrx1 = 0;
-
-	if (cpu_has_feature(CPU_FTR_DAWR1)) {
-		host_dawr1 = mfspr(SPRN_DAWR1);
-		host_dawrx1 = mfspr(SPRN_DAWRX1);
-	}
-
-	hdec = time_limit - mftb();
-	if (hdec < 0)
-		return BOOK3S_INTERRUPT_HV_DECREMENTER;
-
-	if (vc->tb_offset) {
-		u64 new_tb = mftb() + vc->tb_offset;
-		mtspr(SPRN_TBU40, new_tb);
-		tb = mftb();
-		if ((tb & 0xffffff) < (new_tb & 0xffffff))
-			mtspr(SPRN_TBU40, new_tb + 0x1000000);
-		vc->tb_offset_applied = vc->tb_offset;
-	}
-
-	if (vc->pcr)
-		mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
-	mtspr(SPRN_DPDES, vc->dpdes);
-	mtspr(SPRN_VTB, vc->vtb);
-
-	local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
-	local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
-	mtspr(SPRN_PURR, vcpu->arch.purr);
-	mtspr(SPRN_SPURR, vcpu->arch.spurr);
-
-	if (dawr_enabled()) {
-		mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
-		mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
-		if (cpu_has_feature(CPU_FTR_DAWR1)) {
-			mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
-			mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
-		}
-	}
-	mtspr(SPRN_CIABR, vcpu->arch.ciabr);
-	mtspr(SPRN_IC, vcpu->arch.ic);
-
-	mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
-	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
-
-	mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
-
-	mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
-	mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
-	mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
-	mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
-
-	mtspr(SPRN_AMOR, ~0UL);
-
-	switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
-
-	/*
-	 * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
-	 * so set guest LPCR (with HDICE) before writing HDEC.
-	 */
-	mtspr(SPRN_HDEC, hdec);
-
-	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
-	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
-
-	trap = __kvmhv_vcpu_entry_p9(vcpu);
-
-	/* Advance host PURR/SPURR by the amount used by guest */
-	purr = mfspr(SPRN_PURR);
-	spurr = mfspr(SPRN_SPURR);
-	mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
-	      purr - vcpu->arch.purr);
-	mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
-	      spurr - vcpu->arch.spurr);
-	vcpu->arch.purr = purr;
-	vcpu->arch.spurr = spurr;
-
-	vcpu->arch.ic = mfspr(SPRN_IC);
-	vcpu->arch.pid = mfspr(SPRN_PID);
-	vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
-
-	vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
-	vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
-	vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
-	vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
-
-	/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
-	mtspr(SPRN_PSSCR, host_psscr |
-	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
-	mtspr(SPRN_HFSCR, host_hfscr);
-	mtspr(SPRN_CIABR, host_ciabr);
-	mtspr(SPRN_DAWR0, host_dawr0);
-	mtspr(SPRN_DAWRX0, host_dawrx0);
-	if (cpu_has_feature(CPU_FTR_DAWR1)) {
-		mtspr(SPRN_DAWR1, host_dawr1);
-		mtspr(SPRN_DAWRX1, host_dawrx1);
-	}
-
-	/*
-	 * Since this is radix, do a eieio; tlbsync; ptesync sequence in
-	 * case we interrupted the guest between a tlbie and a ptesync.
-	 */
-	asm volatile("eieio; tlbsync; ptesync");
-
-	/*
-	 * cp_abort is required if the processor supports local copy-paste
-	 * to clear the copy buffer that was under control of the guest.
-	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		asm volatile(PPC_CP_ABORT);
-
-	vc->dpdes = mfspr(SPRN_DPDES);
-	vc->vtb = mfspr(SPRN_VTB);
-	mtspr(SPRN_DPDES, 0);
-	if (vc->pcr)
-		mtspr(SPRN_PCR, PCR_MASK);
-
-	if (vc->tb_offset_applied) {
-		u64 new_tb = mftb() - vc->tb_offset_applied;
-		mtspr(SPRN_TBU40, new_tb);
-		tb = mftb();
-		if ((tb & 0xffffff) < (new_tb & 0xffffff))
-			mtspr(SPRN_TBU40, new_tb + 0x1000000);
-		vc->tb_offset_applied = 0;
-	}
-
-	mtspr(SPRN_HDEC, 0x7fffffff);
-
-	switch_mmu_to_host_radix(kvm, host_pidr);
-
-	return trap;
-}
-
 static inline bool hcall_is_xics(unsigned long req)
 {
 	return req == H_EOI || req == H_CPPR || req == H_IPI ||
@@ -3784,7 +3598,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		 * We need to save and restore the guest visible part of the
 		 * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
 		 * doesn't do this for us. Note only required if pseries since
-		 * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+		 * this is done in kvmhv_vcpu_entry_p9() below otherwise.
 		 */
 		unsigned long host_psscr;
 		/* call our hypervisor to load up HV regs and go */
@@ -3822,7 +3636,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		}
 	} else {
 		kvmppc_xive_push_vcpu(vcpu);
-		trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+		trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
 		if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
 		    !(vcpu->arch.shregs.msr & MSR_PR)) {
 			unsigned long req = kvmppc_get_gpr(vcpu, 3);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 9db0e031a4434..d2e659940630d 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -79,11 +79,121 @@ static void radix_clear_slb(void)
 		clear_slb_entry(i);
 }
 
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu)
+static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
 {
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
+	u32 lpid;
+
+	lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+
+	/*
+	 * All the isync()s are overkill but trivially follow the ISA
+	 * requirements. Some can likely be replaced with justification
+	 * comment for why they are not needed.
+	 */
+	isync();
+	mtspr(SPRN_LPID, lpid);
+	isync();
+	mtspr(SPRN_LPCR, lpcr);
+	isync();
+	mtspr(SPRN_PID, vcpu->arch.pid);
+	isync();
+
+	/* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
+	kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+}
+
+static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
+{
+	isync();
+	mtspr(SPRN_PID, pid);
+	isync();
+	mtspr(SPRN_LPID, kvm->arch.host_lpid);
+	isync();
+	mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
+	isync();
+}
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	s64 hdec;
+	u64 tb, purr, spurr;
 	u64 *exsave;
 	unsigned long msr = mfmsr();
 	int trap;
+	unsigned long host_hfscr = mfspr(SPRN_HFSCR);
+	unsigned long host_ciabr = mfspr(SPRN_CIABR);
+	unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
+	unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
+	unsigned long host_psscr = mfspr(SPRN_PSSCR);
+	unsigned long host_pidr = mfspr(SPRN_PID);
+	unsigned long host_dawr1 = 0;
+	unsigned long host_dawrx1 = 0;
+
+	if (cpu_has_feature(CPU_FTR_DAWR1)) {
+		host_dawr1 = mfspr(SPRN_DAWR1);
+		host_dawrx1 = mfspr(SPRN_DAWRX1);
+	}
+
+	hdec = time_limit - mftb();
+	if (hdec < 0)
+		return BOOK3S_INTERRUPT_HV_DECREMENTER;
+
+	if (vc->tb_offset) {
+		u64 new_tb = mftb() + vc->tb_offset;
+		mtspr(SPRN_TBU40, new_tb);
+		tb = mftb();
+		if ((tb & 0xffffff) < (new_tb & 0xffffff))
+			mtspr(SPRN_TBU40, new_tb + 0x1000000);
+		vc->tb_offset_applied = vc->tb_offset;
+	}
+
+	if (vc->pcr)
+		mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+	mtspr(SPRN_DPDES, vc->dpdes);
+	mtspr(SPRN_VTB, vc->vtb);
+
+	local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+	local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+	mtspr(SPRN_PURR, vcpu->arch.purr);
+	mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+	if (dawr_enabled()) {
+		mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+		mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+		if (cpu_has_feature(CPU_FTR_DAWR1)) {
+			mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+			mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+		}
+	}
+	mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+	mtspr(SPRN_IC, vcpu->arch.ic);
+
+	mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+	mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+	mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+	mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+	mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+	mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+	mtspr(SPRN_AMOR, ~0UL);
+
+	switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+
+	/*
+	 * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+	 * so set guest LPCR (with HDICE) before writing HDEC.
+	 */
+	mtspr(SPRN_HDEC, hdec);
+
+	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
 
 	start_timing(vcpu, &vcpu->arch.rm_entry);
 
@@ -202,6 +312,69 @@ int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu)
 
 	end_timing(vcpu);
 
+	/* Advance host PURR/SPURR by the amount used by guest */
+	purr = mfspr(SPRN_PURR);
+	spurr = mfspr(SPRN_SPURR);
+	mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
+	      purr - vcpu->arch.purr);
+	mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
+	      spurr - vcpu->arch.spurr);
+	vcpu->arch.purr = purr;
+	vcpu->arch.spurr = spurr;
+
+	vcpu->arch.ic = mfspr(SPRN_IC);
+	vcpu->arch.pid = mfspr(SPRN_PID);
+	vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+
+	vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+	vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+	vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+	vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+	/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+	mtspr(SPRN_PSSCR, host_psscr |
+	      (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+	mtspr(SPRN_HFSCR, host_hfscr);
+	mtspr(SPRN_CIABR, host_ciabr);
+	mtspr(SPRN_DAWR0, host_dawr0);
+	mtspr(SPRN_DAWRX0, host_dawrx0);
+	if (cpu_has_feature(CPU_FTR_DAWR1)) {
+		mtspr(SPRN_DAWR1, host_dawr1);
+		mtspr(SPRN_DAWRX1, host_dawrx1);
+	}
+
+	/*
+	 * Since this is radix, do a eieio; tlbsync; ptesync sequence in
+	 * case we interrupted the guest between a tlbie and a ptesync.
+	 */
+	asm volatile("eieio; tlbsync; ptesync");
+
+	/*
+	 * cp_abort is required if the processor supports local copy-paste
+	 * to clear the copy buffer that was under control of the guest.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		asm volatile(PPC_CP_ABORT);
+
+	vc->dpdes = mfspr(SPRN_DPDES);
+	vc->vtb = mfspr(SPRN_VTB);
+	mtspr(SPRN_DPDES, 0);
+	if (vc->pcr)
+		mtspr(SPRN_PCR, PCR_MASK);
+
+	if (vc->tb_offset_applied) {
+		u64 new_tb = mftb() - vc->tb_offset_applied;
+		mtspr(SPRN_TBU40, new_tb);
+		tb = mftb();
+		if ((tb & 0xffffff) < (new_tb & 0xffffff))
+			mtspr(SPRN_TBU40, new_tb + 0x1000000);
+		vc->tb_offset_applied = 0;
+	}
+
+	mtspr(SPRN_HDEC, 0x7fffffff);
+
+	switch_mmu_to_host_radix(kvm, host_pidr);
+
 	return trap;
 }
-EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9);
+EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
-- 
GitLab


From 6d770e3fe9a120560cda66331ce5faa363400e97 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:36 +1000
Subject: [PATCH 2626/3804] KVM: PPC: Book3S HV P9: Read machine check
 registers while MSR[RI] is 0

SRR0/1, DAR, DSISR must all be protected from machine check which can
clobber them. Ensure MSR[RI] is clear while they are live.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-17-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c          | 11 +++++++--
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 33 ++++++++++++++++++++++++---
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 777ec786ef711..6d39e4784af6e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3571,11 +3571,16 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	mtspr(SPRN_BESCR, vcpu->arch.bescr);
 	mtspr(SPRN_WORT, vcpu->arch.wort);
 	mtspr(SPRN_TIDR, vcpu->arch.tid);
-	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
-	mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
 	mtspr(SPRN_AMR, vcpu->arch.amr);
 	mtspr(SPRN_UAMOR, vcpu->arch.uamor);
 
+	/*
+	 * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+	 * clear (or hstate set appropriately to catch those registers
+	 * being clobbered if we take a MCE or SRESET), so those are done
+	 * later.
+	 */
+
 	if (!(vcpu->arch.ctrl & 1))
 		mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
 
@@ -3618,6 +3623,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 			hvregs.vcpu_token = vcpu->vcpu_id;
 		}
 		hvregs.hdec_expiry = time_limit;
+		mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+		mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
 		trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
 					  __pa(&vcpu->arch.regs));
 		kvmhv_restore_hv_return_state(vcpu, &hvregs);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index d2e659940630d..a6f89e30040bc 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -122,6 +122,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	s64 hdec;
 	u64 tb, purr, spurr;
 	u64 *exsave;
+	bool ri_set;
 	unsigned long msr = mfmsr();
 	int trap;
 	unsigned long host_hfscr = mfspr(SPRN_HFSCR);
@@ -192,9 +193,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	 */
 	mtspr(SPRN_HDEC, hdec);
 
-	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
-	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
-
 	start_timing(vcpu, &vcpu->arch.rm_entry);
 
 	vcpu->arch.ceded = 0;
@@ -220,6 +218,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	 */
 	mtspr(SPRN_HDSISR, HDSISR_CANARY);
 
+	__mtmsrd(0, 1); /* clear RI */
+
+	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+	mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+	mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
 	accumulate_time(vcpu, &vcpu->arch.guest_time);
 
 	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_FAST;
@@ -237,7 +242,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	/* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
 	trap = local_paca->kvm_hstate.scratch0 & ~0x2;
+
+	/* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
+	ri_set = false;
 	if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+		if (trap != BOOK3S_INTERRUPT_SYSCALL &&
+				(vcpu->arch.shregs.msr & MSR_RI))
+			ri_set = true;
 		exsave = local_paca->exgen;
 	} else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
 		exsave = local_paca->exnmi;
@@ -247,6 +258,22 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
 	vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+
+	/*
+	 * Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
+	 * and hstate scratch (which we need to move into exsave to make
+	 * re-entrant vs SRESET/MCE)
+	 */
+	if (ri_set) {
+		if (unlikely(!(mfmsr() & MSR_RI))) {
+			__mtmsrd(MSR_RI, 1);
+			WARN_ON_ONCE(1);
+		}
+	} else {
+		WARN_ON_ONCE(mfmsr() & MSR_RI);
+		__mtmsrd(MSR_RI, 1);
+	}
+
 	vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
 	vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
 	vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
-- 
GitLab


From a32ed1bb70723ec7a6c888b6c7071d516cca0e8f Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:37 +1000
Subject: [PATCH 2627/3804] KVM: PPC: Book3S HV P9: Improve exit timing
 accounting coverage

The C conversion caused exit timing to become a bit cramped. Expand it
to cover more of the entry and exit code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-18-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index a6f89e30040bc..8a56141214c17 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -143,6 +143,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	if (hdec < 0)
 		return BOOK3S_INTERRUPT_HV_DECREMENTER;
 
+	start_timing(vcpu, &vcpu->arch.rm_entry);
+
 	if (vc->tb_offset) {
 		u64 new_tb = mftb() + vc->tb_offset;
 		mtspr(SPRN_TBU40, new_tb);
@@ -193,8 +195,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	 */
 	mtspr(SPRN_HDEC, hdec);
 
-	start_timing(vcpu, &vcpu->arch.rm_entry);
-
 	vcpu->arch.ceded = 0;
 
 	WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
@@ -337,8 +337,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	accumulate_time(vcpu, &vcpu->arch.rm_exit);
 
-	end_timing(vcpu);
-
 	/* Advance host PURR/SPURR by the amount used by guest */
 	purr = mfspr(SPRN_PURR);
 	spurr = mfspr(SPRN_SPURR);
@@ -402,6 +400,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	switch_mmu_to_host_radix(kvm, host_pidr);
 
+	end_timing(vcpu);
+
 	return trap;
 }
 EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
-- 
GitLab


From 68e3baaca8c56bbb336d2215f201f4047ce736e5 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:38 +1000
Subject: [PATCH 2628/3804] KVM: PPC: Book3S HV P9: Move SPR loading after
 expiry time check

This is wasted work if the time limit is exceeded.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-19-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 36 ++++++++++++++++-----------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 8a56141214c17..f24a12632b727 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -123,21 +123,16 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	u64 tb, purr, spurr;
 	u64 *exsave;
 	bool ri_set;
-	unsigned long msr = mfmsr();
 	int trap;
-	unsigned long host_hfscr = mfspr(SPRN_HFSCR);
-	unsigned long host_ciabr = mfspr(SPRN_CIABR);
-	unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
-	unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
-	unsigned long host_psscr = mfspr(SPRN_PSSCR);
-	unsigned long host_pidr = mfspr(SPRN_PID);
-	unsigned long host_dawr1 = 0;
-	unsigned long host_dawrx1 = 0;
-
-	if (cpu_has_feature(CPU_FTR_DAWR1)) {
-		host_dawr1 = mfspr(SPRN_DAWR1);
-		host_dawrx1 = mfspr(SPRN_DAWRX1);
-	}
+	unsigned long msr;
+	unsigned long host_hfscr;
+	unsigned long host_ciabr;
+	unsigned long host_dawr0;
+	unsigned long host_dawrx0;
+	unsigned long host_psscr;
+	unsigned long host_pidr;
+	unsigned long host_dawr1;
+	unsigned long host_dawrx1;
 
 	hdec = time_limit - mftb();
 	if (hdec < 0)
@@ -154,6 +149,19 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 		vc->tb_offset_applied = vc->tb_offset;
 	}
 
+	msr = mfmsr();
+
+	host_hfscr = mfspr(SPRN_HFSCR);
+	host_ciabr = mfspr(SPRN_CIABR);
+	host_dawr0 = mfspr(SPRN_DAWR0);
+	host_dawrx0 = mfspr(SPRN_DAWRX0);
+	host_psscr = mfspr(SPRN_PSSCR);
+	host_pidr = mfspr(SPRN_PID);
+	if (cpu_has_feature(CPU_FTR_DAWR1)) {
+		host_dawr1 = mfspr(SPRN_DAWR1);
+		host_dawrx1 = mfspr(SPRN_DAWRX1);
+	}
+
 	if (vc->pcr)
 		mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
 	mtspr(SPRN_DPDES, vc->dpdes);
-- 
GitLab


From edba6aff4f2c3893e168df6a2e9a20f3c39b0b30 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:39 +1000
Subject: [PATCH 2629/3804] KVM: PPC: Book3S HV P9: Add helpers for OS SPR
 handling

This is a first step to wrapping supervisor and user SPR saving and
loading up into helpers, which will then be called independently in
bare metal and nested HV cases in order to optimise SPR access.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-20-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c | 148 ++++++++++++++++++++++-------------
 1 file changed, 93 insertions(+), 55 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6d39e4784af6e..12c35b0561d37 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3501,6 +3501,93 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	trace_kvmppc_run_core(vc, 1);
 }
 
+static void load_spr_state(struct kvm_vcpu *vcpu)
+{
+	mtspr(SPRN_DSCR, vcpu->arch.dscr);
+	mtspr(SPRN_IAMR, vcpu->arch.iamr);
+	mtspr(SPRN_PSPB, vcpu->arch.pspb);
+	mtspr(SPRN_FSCR, vcpu->arch.fscr);
+	mtspr(SPRN_TAR, vcpu->arch.tar);
+	mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+	mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+	mtspr(SPRN_BESCR, vcpu->arch.bescr);
+	mtspr(SPRN_WORT, vcpu->arch.wort);
+	mtspr(SPRN_TIDR, vcpu->arch.tid);
+	mtspr(SPRN_AMR, vcpu->arch.amr);
+	mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+
+	/*
+	 * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+	 * clear (or hstate set appropriately to catch those registers
+	 * being clobbered if we take a MCE or SRESET), so those are done
+	 * later.
+	 */
+
+	if (!(vcpu->arch.ctrl & 1))
+		mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+}
+
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+
+	vcpu->arch.iamr = mfspr(SPRN_IAMR);
+	vcpu->arch.pspb = mfspr(SPRN_PSPB);
+	vcpu->arch.fscr = mfspr(SPRN_FSCR);
+	vcpu->arch.tar = mfspr(SPRN_TAR);
+	vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+	vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+	vcpu->arch.bescr = mfspr(SPRN_BESCR);
+	vcpu->arch.wort = mfspr(SPRN_WORT);
+	vcpu->arch.tid = mfspr(SPRN_TIDR);
+	vcpu->arch.amr = mfspr(SPRN_AMR);
+	vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+	vcpu->arch.dscr = mfspr(SPRN_DSCR);
+}
+
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+	unsigned long dscr;
+	unsigned long tidr;
+	unsigned long iamr;
+	unsigned long amr;
+	unsigned long fscr;
+};
+
+static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+	host_os_sprs->dscr = mfspr(SPRN_DSCR);
+	host_os_sprs->tidr = mfspr(SPRN_TIDR);
+	host_os_sprs->iamr = mfspr(SPRN_IAMR);
+	host_os_sprs->amr = mfspr(SPRN_AMR);
+	host_os_sprs->fscr = mfspr(SPRN_FSCR);
+}
+
+/* vcpu guest regs must already be saved */
+static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+				    struct p9_host_os_sprs *host_os_sprs)
+{
+	mtspr(SPRN_PSPB, 0);
+	mtspr(SPRN_WORT, 0);
+	mtspr(SPRN_UAMOR, 0);
+
+	mtspr(SPRN_DSCR, host_os_sprs->dscr);
+	mtspr(SPRN_TIDR, host_os_sprs->tidr);
+	mtspr(SPRN_IAMR, host_os_sprs->iamr);
+
+	if (host_os_sprs->amr != vcpu->arch.amr)
+		mtspr(SPRN_AMR, host_os_sprs->amr);
+
+	if (host_os_sprs->fscr != vcpu->arch.fscr)
+		mtspr(SPRN_FSCR, host_os_sprs->fscr);
+
+	/* Save guest CTRL register, set runlatch to 1 */
+	if (!(vcpu->arch.ctrl & 1))
+		mtspr(SPRN_CTRLT, 1);
+}
+
 static inline bool hcall_is_xics(unsigned long req)
 {
 	return req == H_EOI || req == H_CPPR || req == H_IPI ||
@@ -3515,11 +3602,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 			 unsigned long lpcr)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
-	unsigned long host_dscr = mfspr(SPRN_DSCR);
-	unsigned long host_tidr = mfspr(SPRN_TIDR);
-	unsigned long host_iamr = mfspr(SPRN_IAMR);
-	unsigned long host_amr = mfspr(SPRN_AMR);
-	unsigned long host_fscr = mfspr(SPRN_FSCR);
+	struct p9_host_os_sprs host_os_sprs;
 	s64 dec;
 	u64 tb;
 	int trap, save_pmu;
@@ -3534,6 +3617,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	if (local_paca->kvm_hstate.dec_expires < time_limit)
 		time_limit = local_paca->kvm_hstate.dec_expires;
 
+	save_p9_host_os_sprs(&host_os_sprs);
+
 	kvmhv_save_host_pmu();		/* saves it to PACA kvm_hstate */
 
 	kvmppc_subcore_enter_guest();
@@ -3561,28 +3646,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 #endif
 	mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
 
-	mtspr(SPRN_DSCR, vcpu->arch.dscr);
-	mtspr(SPRN_IAMR, vcpu->arch.iamr);
-	mtspr(SPRN_PSPB, vcpu->arch.pspb);
-	mtspr(SPRN_FSCR, vcpu->arch.fscr);
-	mtspr(SPRN_TAR, vcpu->arch.tar);
-	mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
-	mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
-	mtspr(SPRN_BESCR, vcpu->arch.bescr);
-	mtspr(SPRN_WORT, vcpu->arch.wort);
-	mtspr(SPRN_TIDR, vcpu->arch.tid);
-	mtspr(SPRN_AMR, vcpu->arch.amr);
-	mtspr(SPRN_UAMOR, vcpu->arch.uamor);
-
-	/*
-	 * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
-	 * clear (or hstate set appropriately to catch those registers
-	 * being clobbered if we take a MCE or SRESET), so those are done
-	 * later.
-	 */
-
-	if (!(vcpu->arch.ctrl & 1))
-		mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+	load_spr_state(vcpu);
 
 	/*
 	 * When setting DEC, we must always deal with irq_work_raise via NMI vs
@@ -3678,36 +3742,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	vcpu->arch.dec_expires = dec + tb;
 	vcpu->cpu = -1;
 	vcpu->arch.thread_cpu = -1;
-	/* Save guest CTRL register, set runlatch to 1 */
-	vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
-	if (!(vcpu->arch.ctrl & 1))
-		mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
-
-	vcpu->arch.iamr = mfspr(SPRN_IAMR);
-	vcpu->arch.pspb = mfspr(SPRN_PSPB);
-	vcpu->arch.fscr = mfspr(SPRN_FSCR);
-	vcpu->arch.tar = mfspr(SPRN_TAR);
-	vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
-	vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
-	vcpu->arch.bescr = mfspr(SPRN_BESCR);
-	vcpu->arch.wort = mfspr(SPRN_WORT);
-	vcpu->arch.tid = mfspr(SPRN_TIDR);
-	vcpu->arch.amr = mfspr(SPRN_AMR);
-	vcpu->arch.uamor = mfspr(SPRN_UAMOR);
-	vcpu->arch.dscr = mfspr(SPRN_DSCR);
-
-	mtspr(SPRN_PSPB, 0);
-	mtspr(SPRN_WORT, 0);
-	mtspr(SPRN_UAMOR, 0);
-	mtspr(SPRN_DSCR, host_dscr);
-	mtspr(SPRN_TIDR, host_tidr);
-	mtspr(SPRN_IAMR, host_iamr);
 
-	if (host_amr != vcpu->arch.amr)
-		mtspr(SPRN_AMR, host_amr);
+	store_spr_state(vcpu);
 
-	if (host_fscr != vcpu->arch.fscr)
-		mtspr(SPRN_FSCR, host_fscr);
+	restore_p9_host_os_sprs(vcpu, &host_os_sprs);
 
 	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
 	store_fp_state(&vcpu->arch.fp);
-- 
GitLab


From 41f779917669fcc28a7f5646d1f7a85043c9d152 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:40 +1000
Subject: [PATCH 2630/3804] KVM: PPC: Book3S HV P9: Switch to guest MMU context
 as late as possible

Move MMU context switch as late as reasonably possible to minimise code
running with guest context switched in. This becomes more important when
this code may run in real-mode, with later changes.

Move WARN_ON as early as possible so program check interrupts are less
likely to tangle everything up.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-21-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 40 +++++++++++++--------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index f24a12632b727..0b5bd00c9d0fc 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -138,8 +138,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	if (hdec < 0)
 		return BOOK3S_INTERRUPT_HV_DECREMENTER;
 
+	WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+	WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
 	start_timing(vcpu, &vcpu->arch.rm_entry);
 
+	vcpu->arch.ceded = 0;
+
 	if (vc->tb_offset) {
 		u64 new_tb = mftb() + vc->tb_offset;
 		mtspr(SPRN_TBU40, new_tb);
@@ -188,26 +193,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
 
-	mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
-	mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
-	mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
-	mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
-
-	mtspr(SPRN_AMOR, ~0UL);
-
-	switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
-
-	/*
-	 * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
-	 * so set guest LPCR (with HDICE) before writing HDEC.
-	 */
-	mtspr(SPRN_HDEC, hdec);
-
-	vcpu->arch.ceded = 0;
-
-	WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
-	WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
-
 	mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
 	mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
 
@@ -226,6 +211,21 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	 */
 	mtspr(SPRN_HDSISR, HDSISR_CANARY);
 
+	mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+	mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+	mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+	mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+	mtspr(SPRN_AMOR, ~0UL);
+
+	switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+
+	/*
+	 * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+	 * so set guest LPCR (with HDICE) before writing HDEC.
+	 */
+	mtspr(SPRN_HDEC, hdec);
+
 	__mtmsrd(0, 1); /* clear RI */
 
 	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
-- 
GitLab


From 2e1ae9cd56f8616a707185f3c6cb7ee2a20809e1 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:41 +1000
Subject: [PATCH 2631/3804] KVM: PPC: Book3S HV: Implement radix prefetch
 workaround by disabling MMU

Rather than partition the guest PID space + flush a rogue guest PID to
work around this problem, instead fix it by always disabling the MMU when
switching in or out of guest MMU context in HV mode.

This may be a bit less efficient, but it is a lot less complicated and
allows the P9 path to trivally implement the workaround too. Newer CPUs
are not subject to this issue.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-22-npiggin@gmail.com
---
 arch/powerpc/include/asm/mmu_context.h   |  6 ----
 arch/powerpc/kvm/book3s_hv.c             | 21 +++++++----
 arch/powerpc/kvm/book3s_hv_p9_entry.c    | 14 ++++++--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 34 ------------------
 arch/powerpc/mm/book3s64/radix_pgtable.c | 27 +++++---------
 arch/powerpc/mm/book3s64/radix_tlb.c     | 46 ------------------------
 arch/powerpc/mm/mmu_context.c            |  4 +--
 7 files changed, 34 insertions(+), 118 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 4bc45d3ed8b0e..84e192aa54fdd 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -122,12 +122,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
 }
 #endif
 
-#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
-#else
-static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
-#endif
-
 extern void switch_cop(struct mm_struct *next);
 extern int use_cop(unsigned long acop, struct mm_struct *mm);
 extern void drop_cop(unsigned long acop, struct mm_struct *mm);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 12c35b0561d37..eb25605e23b9b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -807,7 +807,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
 		 * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
 		 * Keep this in synch with kvmppc_filter_guest_lpcr_hv.
 		 */
-		if (mflags != 0 && mflags != 3)
+		if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+				kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
 			return H_UNSUPPORTED_FLAG_START;
 		return H_TOO_HARD;
 	default:
@@ -1677,6 +1678,14 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
 		lpcr &= ~LPCR_AIL;
 	if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
 		lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+	/*
+	 * On some POWER9s we force AIL off for radix guests to prevent
+	 * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
+	 * guest, which can result in Q0 translations with LPID=0 PID=PIDR to
+	 * be cached, which the host TLB management does not expect.
+	 */
+	if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+		lpcr &= ~LPCR_AIL;
 
 	/*
 	 * On POWER9, allow userspace to enable large decrementer for the
@@ -4360,12 +4369,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
 	do {
-		/*
-		 * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
-		 * path, which also handles hash and dependent threads mode.
-		 */
-		if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
-		    !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+		if (kvm->arch.threads_indep && kvm_is_radix(kvm))
 			r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
 						  vcpu->arch.vcore->lpcr);
 		else
@@ -4995,6 +4999,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 		if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
 			pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
 			kvm->arch.threads_indep = true;
+		} else if (!indep_threads_mode && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+			pr_warn("KVM: Ignoring indep_threads_mode=N on pre-DD2.2 POWER9\n");
+			kvm->arch.threads_indep = true;
 		} else {
 			kvm->arch.threads_indep = indep_threads_mode;
 		}
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 0b5bd00c9d0fc..178f771e299c6 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -218,6 +218,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	mtspr(SPRN_AMOR, ~0UL);
 
+	if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+		__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
 	switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
 
 	/*
@@ -226,7 +229,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	 */
 	mtspr(SPRN_HDEC, hdec);
 
-	__mtmsrd(0, 1); /* clear RI */
+	if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+		__mtmsrd(0, 1); /* clear RI */
 
 	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
 	mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
@@ -341,8 +345,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	radix_clear_slb();
 
-	__mtmsrd(msr, 0);
-
 	accumulate_time(vcpu, &vcpu->arch.rm_exit);
 
 	/* Advance host PURR/SPURR by the amount used by guest */
@@ -408,6 +410,12 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	switch_mmu_to_host_radix(kvm, host_pidr);
 
+	/*
+	 * If we are in real mode, only switch MMU on after the MMU is
+	 * switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
+	 */
+	__mtmsrd(msr, 0);
+
 	end_timing(vcpu);
 
 	return trap;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index d742233dec55c..3b8fd4bd24199 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1717,40 +1717,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	eieio
 	tlbsync
 	ptesync
-
-BEGIN_FTR_SECTION
-	/* Radix: Handle the case where the guest used an illegal PID */
-	LOAD_REG_ADDR(r4, mmu_base_pid)
-	lwz	r3, VCPU_GUEST_PID(r9)
-	lwz	r5, 0(r4)
-	cmpw	cr0,r3,r5
-	blt	2f
-
-	/*
-	 * Illegal PID, the HW might have prefetched and cached in the TLB
-	 * some translations for the  LPID 0 / guest PID combination which
-	 * Linux doesn't know about, so we need to flush that PID out of
-	 * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
-	 * the right context.
-	*/
-	li	r0,0
-	mtspr	SPRN_LPID,r0
-	isync
-
-	/* Then do a congruence class local flush */
-	ld	r6,VCPU_KVM(r9)
-	lwz	r0,KVM_TLB_SETS(r6)
-	mtctr	r0
-	li	r7,0x400		/* IS field = 0b01 */
-	ptesync
-	sldi	r0,r3,32		/* RS has PID */
-1:	PPC_TLBIEL(7,0,2,1,1)		/* RIC=2, PRS=1, R=1 */
-	addi	r7,r7,0x1000
-	bdnz	1b
-	ptesync
-END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
-
-2:
 #endif /* CONFIG_PPC_RADIX_MMU */
 
 	/*
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 5fef8db3b4634..fe236c38ce00f 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -357,30 +357,19 @@ static void __init radix_init_pgtable(void)
 	}
 
 	/* Find out how many PID bits are supported */
-	if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
-		if (!mmu_pid_bits)
-			mmu_pid_bits = 20;
-		mmu_base_pid = 1;
-	} else if (cpu_has_feature(CPU_FTR_HVMODE)) {
-		if (!mmu_pid_bits)
-			mmu_pid_bits = 20;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
 		/*
-		 * When KVM is possible, we only use the top half of the
-		 * PID space to avoid collisions between host and guest PIDs
-		 * which can cause problems due to prefetch when exiting the
-		 * guest with AIL=3
+		 * Older versions of KVM on these machines perfer if the
+		 * guest only uses the low 19 PID bits.
 		 */
-		mmu_base_pid = 1 << (mmu_pid_bits - 1);
-#else
-		mmu_base_pid = 1;
-#endif
-	} else {
-		/* The guest uses the bottom half of the PID space */
 		if (!mmu_pid_bits)
 			mmu_pid_bits = 19;
-		mmu_base_pid = 1;
+	} else {
+		if (!mmu_pid_bits)
+			mmu_pid_bits = 20;
 	}
+	mmu_base_pid = 1;
 
 	/*
 	 * Allocate Partition table and process table for the
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 409e612107892..312236a6b0855 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1336,49 +1336,3 @@ void radix__flush_tlb_all(void)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
-{
-	unsigned long pid = mm->context.id;
-
-	if (unlikely(pid == MMU_NO_CONTEXT))
-		return;
-
-	if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
-		return;
-
-	/*
-	 * If this context hasn't run on that CPU before and KVM is
-	 * around, there's a slim chance that the guest on another
-	 * CPU just brought in obsolete translation into the TLB of
-	 * this CPU due to a bad prefetch using the guest PID on
-	 * the way into the hypervisor.
-	 *
-	 * We work around this here. If KVM is possible, we check if
-	 * any sibling thread is in KVM. If it is, the window may exist
-	 * and thus we flush that PID from the core.
-	 *
-	 * A potential future improvement would be to mark which PIDs
-	 * have never been used on the system and avoid it if the PID
-	 * is new and the process has no other cpumask bit set.
-	 */
-	if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
-		int cpu = smp_processor_id();
-		int sib = cpu_first_thread_sibling(cpu);
-		bool flush = false;
-
-		for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
-			if (sib == cpu)
-				continue;
-			if (!cpu_possible(sib))
-				continue;
-			if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
-				flush = true;
-		}
-		if (flush)
-			_tlbiel_pid(pid, RIC_FLUSH_ALL);
-	}
-}
-EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
-#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index a857af401738f..74246536b8326 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		asm volatile ("dssall");
 
-	if (new_on_cpu)
-		radix_kvm_prefetch_workaround(next);
-	else
+	if (!new_on_cpu)
 		membarrier_arch_switch_mm(prev, next, tsk);
 
 	/*
-- 
GitLab


From aaae8c79005846eeafc7a0e5d3eda4e34ea8ca2e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:42 +1000
Subject: [PATCH 2632/3804] KVM: PPC: Book3S HV: Remove support for dependent
 threads mode on P9

Dependent-threads mode is the normal KVM mode for pre-POWER9 SMT
processors, where all threads in a core (or subcore) would run the same
partition at the same time, or they would run the host.

This design was mandated by MMU state that is shared between threads in
a processor, so the synchronisation point is in hypervisor real-mode
that has essentially no shared state, so it's safe for multiple threads
to gather and switch to the correct mode.

It is implemented by having the host unplug all secondary threads and
always run in SMT1 mode, and host QEMU threads essentially represent
virtual cores that wake these secondary threads out of unplug when the
ioctl is called to run the guest. This happens via a side-path that is
mostly invisible to the rest of the Linux host and the secondary threads
still appear to be unplugged.

POWER9 / ISA v3.0 has a more flexible MMU design that is independent
per-thread and allows a much simpler KVM implementation. Before the new
"P9 fast path" was added that began to take advantage of this, POWER9
support was implemented in the existing path which has support to run
in the dependent threads mode. So it was not much work to add support to
run POWER9 in this dependent threads mode.

The mode is not required by the POWER9 MMU (although "mixed-mode" hash /
radix MMU limitations of early processors were worked around using this
mode). But it is one way to run SMT guests without running different
guests or guest and host on different threads of the same core, so it
could avoid or reduce some SMT attack surfaces without turning off SMT
entirely.

This security feature has some real, if indeterminate, value. However
the old path is lagging in features (nested HV), and with this series
the new P9 path adds remaining missing features (radix prefetch bug
and hash support, in later patches), so POWER9 dependent threads mode
support would be the only remaining reason to keep that code in and keep
supporting POWER9/POWER10 in the old path. So here we make the call to
drop this feature.

Remove dependent threads mode support for POWER9 and above processors.
Systems can still achieve this security by disabling SMT entirely, but
that would generally come at a larger performance cost for guests.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-23-npiggin@gmail.com
---
 arch/powerpc/include/asm/kvm_asm.h  |  2 +-
 arch/powerpc/include/asm/kvm_host.h |  1 -
 arch/powerpc/kvm/book3s_64_entry.S  |  3 +--
 arch/powerpc/kvm/book3s_hv.c        | 27 +++++----------------------
 4 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 43b1788e1f930..f4ae37810aa98 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -147,7 +147,7 @@
 #define KVM_GUEST_MODE_SKIP	2
 #define KVM_GUEST_MODE_GUEST_HV	3
 #define KVM_GUEST_MODE_HOST_HV	4
-#define KVM_GUEST_MODE_HV_FAST	5 /* ISA >= v3.0 host+guest radix, indep thr */
+#define KVM_GUEST_MODE_HV_FAST	5 /* ISA >= v3.0 host+guest radix */
 
 #define KVM_INST_FETCH_FAILED	-1
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 69add9d662df5..6904ce9e81901 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -297,7 +297,6 @@ struct kvm_arch {
 	u8 fwnmi_enabled;
 	u8 secure_guest;
 	u8 svm_enabled;
-	bool threads_indep;
 	bool nested_enable;
 	bool dawr1_enabled;
 	pgd_t *pgtable;
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 177e8fad5c8dd..bac664c1a9f7b 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -184,8 +184,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
  * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
  *
  * Enter the guest on a ISAv3.0 or later system where we have exactly
- * one vcpu per vcore, and both the host and guest are radix, and threads
- * are set to "indepdent mode".
+ * one vcpu per vcore, and both the host and guest are radix.
  */
 .balign	IFETCH_ALIGN_BYTES
 _GLOBAL(kvmppc_p9_enter_guest)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index eb25605e23b9b..acb0c72ea9007 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -103,13 +103,9 @@ static int target_smt_mode;
 module_param(target_smt_mode, int, 0644);
 MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
 
-static bool indep_threads_mode = true;
-module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
-
 static bool one_vm_per_core;
 module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)");
 
 #ifdef CONFIG_KVM_XICS
 static const struct kernel_param_ops module_param_ops = {
@@ -2265,7 +2261,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
  */
 static int threads_per_vcore(struct kvm *kvm)
 {
-	if (kvm->arch.threads_indep)
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
 		return 1;
 	return threads_per_subcore;
 }
@@ -4369,7 +4365,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
 	do {
-		if (kvm->arch.threads_indep && kvm_is_radix(kvm))
+		if (kvm_is_radix(kvm))
 			r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
 						  vcpu->arch.vcore->lpcr);
 		else
@@ -4992,21 +4988,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	/*
 	 * Track that we now have a HV mode VM active. This blocks secondary
 	 * CPU threads from coming online.
-	 * On POWER9, we only need to do this if the "indep_threads_mode"
-	 * module parameter has been set to N.
 	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
-		if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
-			pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
-			kvm->arch.threads_indep = true;
-		} else if (!indep_threads_mode && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
-			pr_warn("KVM: Ignoring indep_threads_mode=N on pre-DD2.2 POWER9\n");
-			kvm->arch.threads_indep = true;
-		} else {
-			kvm->arch.threads_indep = indep_threads_mode;
-		}
-	}
-	if (!kvm->arch.threads_indep)
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
 		kvm_hv_vm_activated();
 
 	/*
@@ -5047,7 +5030,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 {
 	debugfs_remove_recursive(kvm->arch.debugfs_dir);
 
-	if (!kvm->arch.threads_indep)
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
 		kvm_hv_vm_deactivated();
 
 	kvmppc_free_vcores(kvm);
-- 
GitLab


From 9769a7fd79b65a6a6f8362154ab59c36d0defbf3 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:43 +1000
Subject: [PATCH 2633/3804] KVM: PPC: Book3S HV: Remove radix guest support
 from P7/8 path

The P9 path now runs all supported radix guest combinations, so
remove radix guest support from the P7/8 path.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-24-npiggin@gmail.com
---
 arch/powerpc/kernel/asm-offsets.c       |   1 -
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 103 +-----------------------
 2 files changed, 3 insertions(+), 101 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 28af4efb45870..aa267d173ded0 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -534,7 +534,6 @@ int main(void)
 	OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
 	OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
 	OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
-	OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa);
 	OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
 	OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
 	OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3b8fd4bd24199..bf441b9b03cb4 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -134,15 +134,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	/* Return the trap number on this thread as the return value */
 	mr	r3, r12
 
-	/*
-	 * If we came back from the guest via a relocation-on interrupt,
-	 * we will be in virtual mode at this point, which makes it a
-	 * little easier to get back to the caller.
-	 */
-	mfmsr	r0
-	andi.	r0, r0, MSR_IR		/* in real mode? */
-	bne	.Lvirt_return
-
 	/* RFI into the highmem handler */
 	mfmsr	r6
 	li	r0, MSR_RI
@@ -152,11 +143,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	mtsrr1	r7
 	RFI_TO_KERNEL
 
-	/* Virtual-mode return */
-.Lvirt_return:
-	mtlr	r8
-	blr
-
 kvmppc_primary_no_guest:
 	/* We handle this much like a ceded vcpu */
 	/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
@@ -902,11 +888,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
-	ld	r6, VCPU_KVM(r4)
-	lbz	r0, KVM_RADIX(r6)
-	cmpwi	r0, 0
-	bne	9f
-
 	/* For hash guest, clear out and reload the SLB */
 BEGIN_MMU_FTR_SECTION
 	/* Radix host won't have populated the SLB, so no need to clear */
@@ -1094,12 +1075,8 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_HDSISR, r0
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
-	ld	r6, VCPU_KVM(r4)
-	lbz	r7, KVM_SECURE_GUEST(r6)
-	cmpdi	r7, 0
 	ld	r6, VCPU_GPR(R6)(r4)
 	ld	r7, VCPU_GPR(R7)(r4)
-	bne	ret_to_ultra
 
 	ld	r0, VCPU_CR(r4)
 	mtcr	r0
@@ -1110,26 +1087,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	ld	r4, VCPU_GPR(R4)(r4)
 	HRFI_TO_GUEST
 	b	.
-/*
- * Use UV_RETURN ultracall to return control back to the Ultravisor after
- * processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
- * to the Hypervisor.
- *
- * All registers have already been loaded, except:
- *   R0 = hcall result
- *   R2 = SRR1, so UV can detect a synthesized interrupt (if any)
- *   R3 = UV_RETURN
- */
-ret_to_ultra:
-	ld	r0, VCPU_CR(r4)
-	mtcr	r0
-
-	ld	r0, VCPU_GPR(R3)(r4)
-	mfspr	r2, SPRN_SRR1
-	li	r3, 0
-	ori	r3, r3, UV_RETURN
-	ld	r4, VCPU_GPR(R4)(r4)
-	sc	2
 
 secondary_too_late:
 	li	r12, 0
@@ -1392,11 +1349,7 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	patch_site 1b patch__call_kvm_flush_link_stack
 
 	/* For hash guest, read the guest SLB and save it away */
-	ld	r5, VCPU_KVM(r9)
-	lbz	r0, KVM_RADIX(r5)
 	li	r5, 0
-	cmpwi	r0, 0
-	bne	0f			/* for radix, save 0 entries */
 	lwz	r0,VCPU_SLB_NR(r9)	/* number of entries in SLB */
 	mtctr	r0
 	li	r6,0
@@ -1435,23 +1388,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	slbmte	r6,r5
 1:	addi	r8,r8,16
 	.endr
-	b	guest_bypass
-
-0:	/*
-	 * Malicious or buggy radix guests may have inserted SLB entries
-	 * (only 0..3 because radix always runs with UPRT=1), so these must
-	 * be cleared here to avoid side-channels. slbmte is used rather
-	 * than slbia, as it won't clear cached translations.
-	 */
-	li	r0,0
-	stw	r0,VCPU_SLB_MAX(r9)
-	slbmte	r0,r0
-	li	r4,1
-	slbmte	r0,r4
-	li	r4,2
-	slbmte	r0,r4
-	li	r4,3
-	slbmte	r0,r4
 
 guest_bypass:
 	stw	r12, STACK_SLOT_TRAP(r1)
@@ -1701,24 +1637,6 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_PID, r7
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
-#ifdef CONFIG_PPC_RADIX_MMU
-	/*
-	 * Are we running hash or radix ?
-	 */
-	ld	r5, VCPU_KVM(r9)
-	lbz	r0, KVM_RADIX(r5)
-	cmpwi	cr2, r0, 0
-	beq	cr2, 2f
-
-	/*
-	 * Radix: do eieio; tlbsync; ptesync sequence in case we
-	 * interrupted the guest between a tlbie and a ptesync.
-	 */
-	eieio
-	tlbsync
-	ptesync
-#endif /* CONFIG_PPC_RADIX_MMU */
-
 	/*
 	 * cp_abort is required if the processor supports local copy-paste
 	 * to clear the copy buffer that was under control of the guest.
@@ -1977,8 +1895,6 @@ kvmppc_tm_emul:
  * reflect the HDSI to the guest as a DSI.
  */
 kvmppc_hdsi:
-	ld	r3, VCPU_KVM(r9)
-	lbz	r0, KVM_RADIX(r3)
 	mfspr	r4, SPRN_HDAR
 	mfspr	r6, SPRN_HDSISR
 BEGIN_FTR_SECTION
@@ -1986,8 +1902,6 @@ BEGIN_FTR_SECTION
 	cmpdi	r6, 0x7fff
 	beq	6f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-	cmpwi	r0, 0
-	bne	.Lradix_hdsi		/* on radix, just save DAR/DSISR/ASDR */
 	/* HPTE not found fault or protection fault? */
 	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
 	beq	1f			/* if not, send it to the guest */
@@ -2064,23 +1978,11 @@ fast_interrupt_c_return:
 	stb	r0, HSTATE_IN_GUEST(r13)
 	b	guest_exit_cont
 
-.Lradix_hdsi:
-	std	r4, VCPU_FAULT_DAR(r9)
-	stw	r6, VCPU_FAULT_DSISR(r9)
-.Lradix_hisi:
-	mfspr	r5, SPRN_ASDR
-	std	r5, VCPU_FAULT_GPA(r9)
-	b	guest_exit_cont
-
 /*
  * Similarly for an HISI, reflect it to the guest as an ISI unless
  * it is an HPTE not found fault for a page that we have paged out.
  */
 kvmppc_hisi:
-	ld	r3, VCPU_KVM(r9)
-	lbz	r0, KVM_RADIX(r3)
-	cmpwi	r0, 0
-	bne	.Lradix_hisi		/* for radix, just save ASDR */
 	andis.	r0, r11, SRR1_ISI_NOPT@h
 	beq	1f
 	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
@@ -3224,15 +3126,16 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_DAWRX1, r0
 END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
 
-	/* Clear hash and radix guest SLB. */
+	/* Clear guest SLB. */
 	slbmte	r0, r0
 	PPC_SLBIA(6)
+	ptesync
 
 BEGIN_MMU_FTR_SECTION
 	b	4f
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 
-	ptesync
+	/* load host SLB entries */
 	ld	r8, PACA_SLBSHADOWPTR(r13)
 	.rept	SLB_NUM_BOLTED
 	li	r3, SLBSHADOW_SAVEAREA
-- 
GitLab


From dcbac73a5b374873bd6dfd8a0ee5d0b7fc844420 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:44 +1000
Subject: [PATCH 2634/3804] KVM: PPC: Book3S HV: Remove virt mode checks from
 real mode handlers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that the P7/8 path no longer supports radix, real-mode handlers
do not need to deal with being called in virt mode.

This change effectively reverts commit acde25726bc6 ("KVM: PPC: Book3S
HV: Add radix checks in real-mode hypercall handlers").

It removes a few more real-mode tests in rm hcall handlers, which
allows the indirect ops for the xive module to be removed from the
built-in xics rm handlers.

kvmppc_h_random is renamed to kvmppc_rm_h_random to be a bit more
descriptive and consistent with other rm handlers.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-25-npiggin@gmail.com
---
 arch/powerpc/include/asm/kvm_ppc.h      | 10 +--
 arch/powerpc/kvm/book3s.c               | 11 +--
 arch/powerpc/kvm/book3s_64_vio_hv.c     | 12 ----
 arch/powerpc/kvm/book3s_hv_builtin.c    | 91 ++++++-------------------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  2 +-
 arch/powerpc/kvm/book3s_xive.c          | 18 -----
 arch/powerpc/kvm/book3s_xive.h          |  7 --
 arch/powerpc/kvm/book3s_xive_native.c   | 10 ---
 8 files changed, 23 insertions(+), 138 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index cb9e3c85c6052..2d88944f9f34f 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -659,8 +659,6 @@ extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
 				u32 *priority);
 extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
 extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
-extern void kvmppc_xive_init_module(void);
-extern void kvmppc_xive_exit_module(void);
 
 extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 				    struct kvm_vcpu *vcpu, u32 cpu);
@@ -686,8 +684,6 @@ static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
 extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 					   struct kvm_vcpu *vcpu, u32 cpu);
 extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
-extern void kvmppc_xive_native_init_module(void);
-extern void kvmppc_xive_native_exit_module(void);
 extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
 				     union kvmppc_one_reg *val);
 extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
@@ -701,8 +697,6 @@ static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
 				       u32 *priority) { return -1; }
 static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
 static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
-static inline void kvmppc_xive_init_module(void) { }
-static inline void kvmppc_xive_exit_module(void) { }
 
 static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
 					   struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
@@ -725,8 +719,6 @@ static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
 static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
 			  struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
 static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
-static inline void kvmppc_xive_native_init_module(void) { }
-static inline void kvmppc_xive_native_exit_module(void) { }
 static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
 					    union kvmppc_one_reg *val)
 { return 0; }
@@ -762,7 +754,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 			   unsigned long tce_value, unsigned long npages);
 long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
                             unsigned int yield_count);
-long kvmppc_h_random(struct kvm_vcpu *vcpu);
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu);
 void kvmhv_commence_exit(int trap);
 void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
 void kvmppc_subcore_enter_guest(void);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index d69560d5bf16c..5e1e1cff0ee38 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1050,13 +1050,10 @@ static int kvmppc_book3s_init(void)
 #ifdef CONFIG_KVM_XICS
 #ifdef CONFIG_KVM_XIVE
 	if (xics_on_xive()) {
-		kvmppc_xive_init_module();
 		kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
-		if (kvmppc_xive_native_supported()) {
-			kvmppc_xive_native_init_module();
+		if (kvmppc_xive_native_supported())
 			kvm_register_device_ops(&kvm_xive_native_ops,
 						KVM_DEV_TYPE_XIVE);
-		}
 	} else
 #endif
 		kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
@@ -1066,12 +1063,6 @@ static int kvmppc_book3s_init(void)
 
 static void kvmppc_book3s_exit(void)
 {
-#ifdef CONFIG_KVM_XICS
-	if (xics_on_xive()) {
-		kvmppc_xive_exit_module();
-		kvmppc_xive_native_exit_module();
-	}
-#endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	kvmppc_book3s_exit_pr();
 #endif
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 083a4e037718d..dc6591548f0cf 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -391,10 +391,6 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 	/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
 	/* 	    liobn, ioba, tce); */
 
-	/* For radix, we might be in virtual mode, so punt */
-	if (kvm_is_radix(vcpu->kvm))
-		return H_TOO_HARD;
-
 	stt = kvmppc_find_table(vcpu->kvm, liobn);
 	if (!stt)
 		return H_TOO_HARD;
@@ -489,10 +485,6 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
 	bool prereg = false;
 	struct kvmppc_spapr_tce_iommu_table *stit;
 
-	/* For radix, we might be in virtual mode, so punt */
-	if (kvm_is_radix(vcpu->kvm))
-		return H_TOO_HARD;
-
 	/*
 	 * used to check for invalidations in progress
 	 */
@@ -602,10 +594,6 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
 	long i, ret;
 	struct kvmppc_spapr_tce_iommu_table *stit;
 
-	/* For radix, we might be in virtual mode, so punt */
-	if (kvm_is_radix(vcpu->kvm))
-		return H_TOO_HARD;
-
 	stt = kvmppc_find_table(vcpu->kvm, liobn);
 	if (!stt)
 		return H_TOO_HARD;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7a0e33a9c980d..8d669a0e15f85 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -34,21 +34,6 @@
 #include "book3s_xics.h"
 #include "book3s_xive.h"
 
-/*
- * The XIVE module will populate these when it loads
- */
-unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
-unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
-int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
-		       unsigned long mfrr);
-int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
-int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
-EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
-EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
-EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
-
 /*
  * Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
  * should be power of 2.
@@ -196,16 +181,9 @@ int kvmppc_hwrng_present(void)
 }
 EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
 
-long kvmppc_h_random(struct kvm_vcpu *vcpu)
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
 {
-	int r;
-
-	/* Only need to do the expensive mfmsr() on radix */
-	if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
-		r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]);
-	else
-		r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]);
-	if (r)
+	if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
 		return H_SUCCESS;
 
 	return H_HARDWARE;
@@ -541,22 +519,13 @@ static long kvmppc_read_one_intr(bool *again)
 }
 
 #ifdef CONFIG_KVM_XICS
-static inline bool is_rm(void)
-{
-	return !(mfmsr() & MSR_DR);
-}
-
 unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
 {
 	if (!kvmppc_xics_enabled(vcpu))
 		return H_TOO_HARD;
-	if (xics_on_xive()) {
-		if (is_rm())
-			return xive_rm_h_xirr(vcpu);
-		if (unlikely(!__xive_vm_h_xirr))
-			return H_NOT_AVAILABLE;
-		return __xive_vm_h_xirr(vcpu);
-	} else
+	if (xics_on_xive())
+		return xive_rm_h_xirr(vcpu);
+	else
 		return xics_rm_h_xirr(vcpu);
 }
 
@@ -565,13 +534,9 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
 	if (!kvmppc_xics_enabled(vcpu))
 		return H_TOO_HARD;
 	vcpu->arch.regs.gpr[5] = get_tb();
-	if (xics_on_xive()) {
-		if (is_rm())
-			return xive_rm_h_xirr(vcpu);
-		if (unlikely(!__xive_vm_h_xirr))
-			return H_NOT_AVAILABLE;
-		return __xive_vm_h_xirr(vcpu);
-	} else
+	if (xics_on_xive())
+		return xive_rm_h_xirr(vcpu);
+	else
 		return xics_rm_h_xirr(vcpu);
 }
 
@@ -579,13 +544,9 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
 {
 	if (!kvmppc_xics_enabled(vcpu))
 		return H_TOO_HARD;
-	if (xics_on_xive()) {
-		if (is_rm())
-			return xive_rm_h_ipoll(vcpu, server);
-		if (unlikely(!__xive_vm_h_ipoll))
-			return H_NOT_AVAILABLE;
-		return __xive_vm_h_ipoll(vcpu, server);
-	} else
+	if (xics_on_xive())
+		return xive_rm_h_ipoll(vcpu, server);
+	else
 		return H_TOO_HARD;
 }
 
@@ -594,13 +555,9 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 {
 	if (!kvmppc_xics_enabled(vcpu))
 		return H_TOO_HARD;
-	if (xics_on_xive()) {
-		if (is_rm())
-			return xive_rm_h_ipi(vcpu, server, mfrr);
-		if (unlikely(!__xive_vm_h_ipi))
-			return H_NOT_AVAILABLE;
-		return __xive_vm_h_ipi(vcpu, server, mfrr);
-	} else
+	if (xics_on_xive())
+		return xive_rm_h_ipi(vcpu, server, mfrr);
+	else
 		return xics_rm_h_ipi(vcpu, server, mfrr);
 }
 
@@ -608,13 +565,9 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 {
 	if (!kvmppc_xics_enabled(vcpu))
 		return H_TOO_HARD;
-	if (xics_on_xive()) {
-		if (is_rm())
-			return xive_rm_h_cppr(vcpu, cppr);
-		if (unlikely(!__xive_vm_h_cppr))
-			return H_NOT_AVAILABLE;
-		return __xive_vm_h_cppr(vcpu, cppr);
-	} else
+	if (xics_on_xive())
+		return xive_rm_h_cppr(vcpu, cppr);
+	else
 		return xics_rm_h_cppr(vcpu, cppr);
 }
 
@@ -622,13 +575,9 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 {
 	if (!kvmppc_xics_enabled(vcpu))
 		return H_TOO_HARD;
-	if (xics_on_xive()) {
-		if (is_rm())
-			return xive_rm_h_eoi(vcpu, xirr);
-		if (unlikely(!__xive_vm_h_eoi))
-			return H_NOT_AVAILABLE;
-		return __xive_vm_h_eoi(vcpu, xirr);
-	} else
+	if (xics_on_xive())
+		return xive_rm_h_eoi(vcpu, xirr);
+	else
 		return xics_rm_h_eoi(vcpu, xirr);
 }
 #endif /* CONFIG_KVM_XICS */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bf441b9b03cb4..33aa0ef496e50 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2306,7 +2306,7 @@ hcall_real_table:
 #else
 	.long	0		/* 0x2fc - H_XIRR_X*/
 #endif
-	.long	DOTSYM(kvmppc_h_random) - hcall_real_table
+	.long	DOTSYM(kvmppc_rm_h_random) - hcall_real_table
 	.globl	hcall_real_table_end
 hcall_real_table_end:
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 24c07094651a3..9268d386b128a 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -2352,21 +2352,3 @@ struct kvm_device_ops kvm_xive_ops = {
 	.get_attr = xive_get_attr,
 	.has_attr = xive_has_attr,
 };
-
-void kvmppc_xive_init_module(void)
-{
-	__xive_vm_h_xirr = xive_vm_h_xirr;
-	__xive_vm_h_ipoll = xive_vm_h_ipoll;
-	__xive_vm_h_ipi = xive_vm_h_ipi;
-	__xive_vm_h_cppr = xive_vm_h_cppr;
-	__xive_vm_h_eoi = xive_vm_h_eoi;
-}
-
-void kvmppc_xive_exit_module(void)
-{
-	__xive_vm_h_xirr = NULL;
-	__xive_vm_h_ipoll = NULL;
-	__xive_vm_h_ipi = NULL;
-	__xive_vm_h_cppr = NULL;
-	__xive_vm_h_eoi = NULL;
-}
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 86c24a4ad8093..afe9eeac6d56e 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -289,13 +289,6 @@ extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
 extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
 extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
 
-extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
-extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
-extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
-			      unsigned long mfrr);
-extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
-extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
-
 /*
  * Common Xive routines for XICS-over-XIVE and XIVE native
  */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 76800c84f2a35..1253666dd4d80 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -1281,13 +1281,3 @@ struct kvm_device_ops kvm_xive_native_ops = {
 	.has_attr = kvmppc_xive_native_has_attr,
 	.mmap = kvmppc_xive_native_mmap,
 };
-
-void kvmppc_xive_native_init_module(void)
-{
-	;
-}
-
-void kvmppc_xive_native_exit_module(void)
-{
-	;
-}
-- 
GitLab


From 2ce008c8b25467ceacf45bcf0e183d660edb82f2 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:45 +1000
Subject: [PATCH 2635/3804] KVM: PPC: Book3S HV: Remove unused nested HV tests
 in XICS emulation

Commit f3c18e9342a44 ("KVM: PPC: Book3S HV: Use XICS hypercalls when
running as a nested hypervisor") added nested HV tests in XICS
hypercalls, but not all are required.

* icp_eoi is only called by kvmppc_deliver_irq_passthru which is only
  called by kvmppc_check_passthru which is only caled by
  kvmppc_read_one_intr.

* kvmppc_read_one_intr is only called by kvmppc_read_intr which is only
  called by the L0 HV rmhandlers code.

* kvmhv_rm_send_ipi is called by:
  - kvmhv_interrupt_vcore which is only called by kvmhv_commence_exit
    which is only called by the L0 HV rmhandlers code.
  - icp_send_hcore_msg which is only called by icp_rm_set_vcpu_irq.
  - icp_rm_set_vcpu_irq which is only called by icp_rm_try_update
  - icp_rm_set_vcpu_irq is not nested HV safe because it writes to
    LPCR directly without a kvmhv_on_pseries test. Nested handlers
    should not in general be using the rm handlers.

The important test seems to be in kvmppc_ipi_thread, which sends the
virt-mode H_IPI handler kick to use smp_call_function rather than
msgsnd.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-26-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 44 +++++-----------------------
 arch/powerpc/kvm/book3s_hv_rm_xics.c | 15 ----------
 2 files changed, 8 insertions(+), 51 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 8d669a0e15f85..259492bb41531 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -199,15 +199,6 @@ void kvmhv_rm_send_ipi(int cpu)
 	void __iomem *xics_phys;
 	unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
 
-	/* For a nested hypervisor, use the XICS via hcall */
-	if (kvmhv_on_pseries()) {
-		unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-		plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
-				IPI_PRIORITY);
-		return;
-	}
-
 	/* On POWER9 we can use msgsnd for any destination cpu. */
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		msg |= get_hard_smp_processor_id(cpu);
@@ -420,19 +411,12 @@ static long kvmppc_read_one_intr(bool *again)
 		return 1;
 
 	/* Now read the interrupt from the ICP */
-	if (kvmhv_on_pseries()) {
-		unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-		rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
-		xirr = cpu_to_be32(retbuf[0]);
-	} else {
-		xics_phys = local_paca->kvm_hstate.xics_phys;
-		rc = 0;
-		if (!xics_phys)
-			rc = opal_int_get_xirr(&xirr, false);
-		else
-			xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
-	}
+	xics_phys = local_paca->kvm_hstate.xics_phys;
+	rc = 0;
+	if (!xics_phys)
+		rc = opal_int_get_xirr(&xirr, false);
+	else
+		xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
 	if (rc < 0)
 		return 1;
 
@@ -461,13 +445,7 @@ static long kvmppc_read_one_intr(bool *again)
 	 */
 	if (xisr == XICS_IPI) {
 		rc = 0;
-		if (kvmhv_on_pseries()) {
-			unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-			plpar_hcall_raw(H_IPI, retbuf,
-					hard_smp_processor_id(), 0xff);
-			plpar_hcall_raw(H_EOI, retbuf, h_xirr);
-		} else if (xics_phys) {
+		if (xics_phys) {
 			__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
 			__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
 		} else {
@@ -493,13 +471,7 @@ static long kvmppc_read_one_intr(bool *again)
 			/* We raced with the host,
 			 * we need to resend that IPI, bummer
 			 */
-			if (kvmhv_on_pseries()) {
-				unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-				plpar_hcall_raw(H_IPI, retbuf,
-						hard_smp_processor_id(),
-						IPI_PRIORITY);
-			} else if (xics_phys)
+			if (xics_phys)
 				__raw_rm_writeb(IPI_PRIORITY,
 						xics_phys + XICS_MFRR);
 			else
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index c2c9c733f3599..0a11ec88a0aef 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -141,13 +141,6 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
 		return;
 	}
 
-	if (xive_enabled() && kvmhv_on_pseries()) {
-		/* No XICS access or hypercalls available, too hard */
-		this_icp->rm_action |= XICS_RM_KICK_VCPU;
-		this_icp->rm_kick_target = vcpu;
-		return;
-	}
-
 	/*
 	 * Check if the core is loaded,
 	 * if not, find an available host core to post to wake the VCPU,
@@ -771,14 +764,6 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
 	void __iomem *xics_phys;
 	int64_t rc;
 
-	if (kvmhv_on_pseries()) {
-		unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-
-		iosync();
-		plpar_hcall_raw(H_EOI, retbuf, hwirq);
-		return;
-	}
-
 	rc = pnv_opal_pci_msi_eoi(c, hwirq);
 
 	if (rc)
-- 
GitLab


From cbcff8b1c53e458ed4e23877048d7268fd13ab8a Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:46 +1000
Subject: [PATCH 2636/3804] KVM: PPC: Book3S HV P9: Allow all P9 processors to
 enable nested HV

All radix guests go via the P9 path now, so there is no need to limit
nested HV to processors that support "mixed mode" MMU. Remove the
restriction.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-27-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index acb0c72ea9007..cf403280b199d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5451,7 +5451,7 @@ static int kvmhv_enable_nested(struct kvm *kvm)
 {
 	if (!nested)
 		return -EPERM;
-	if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
 		return -ENODEV;
 
 	/* kvm == NULL means the caller is testing if the capability exists */
-- 
GitLab


From a9aa86e08b3a0b2c273cdb772283c872e55f14bf Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:47 +1000
Subject: [PATCH 2637/3804] KVM: PPC: Book3S HV: small pseries_do_hcall cleanup

Functionality should not be changed.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-28-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cf403280b199d..9ba77747bf005 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -927,6 +927,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
 
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
 	int yield_count;
@@ -942,7 +943,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		break;
 	case H_PROD:
 		target = kvmppc_get_gpr(vcpu, 4);
-		tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+		tvcpu = kvmppc_find_vcpu(kvm, target);
 		if (!tvcpu) {
 			ret = H_PARAMETER;
 			break;
@@ -956,7 +957,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		target = kvmppc_get_gpr(vcpu, 4);
 		if (target == -1)
 			break;
-		tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+		tvcpu = kvmppc_find_vcpu(kvm, target);
 		if (!tvcpu) {
 			ret = H_PARAMETER;
 			break;
@@ -972,12 +973,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 					kvmppc_get_gpr(vcpu, 6));
 		break;
 	case H_RTAS:
-		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+		if (list_empty(&kvm->arch.rtas_tokens))
 			return RESUME_HOST;
 
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		idx = srcu_read_lock(&kvm->srcu);
 		rc = kvmppc_rtas_hcall(vcpu);
-		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		srcu_read_unlock(&kvm->srcu, idx);
 
 		if (rc == -ENOENT)
 			return RESUME_HOST;
@@ -1064,12 +1065,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
 	case H_SET_PARTITION_TABLE:
 		ret = H_FUNCTION;
-		if (nesting_enabled(vcpu->kvm))
+		if (nesting_enabled(kvm))
 			ret = kvmhv_set_partition_table(vcpu);
 		break;
 	case H_ENTER_NESTED:
 		ret = H_FUNCTION;
-		if (!nesting_enabled(vcpu->kvm))
+		if (!nesting_enabled(kvm))
 			break;
 		ret = kvmhv_enter_nested_guest(vcpu);
 		if (ret == H_INTERRUPT) {
@@ -1084,12 +1085,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		break;
 	case H_TLB_INVALIDATE:
 		ret = H_FUNCTION;
-		if (nesting_enabled(vcpu->kvm))
+		if (nesting_enabled(kvm))
 			ret = kvmhv_do_nested_tlbie(vcpu);
 		break;
 	case H_COPY_TOFROM_GUEST:
 		ret = H_FUNCTION;
-		if (nesting_enabled(vcpu->kvm))
+		if (nesting_enabled(kvm))
 			ret = kvmhv_copy_tofrom_guest_nested(vcpu);
 		break;
 	case H_PAGE_INIT:
@@ -1100,7 +1101,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	case H_SVM_PAGE_IN:
 		ret = H_UNSUPPORTED;
 		if (kvmppc_get_srr1(vcpu) & MSR_S)
-			ret = kvmppc_h_svm_page_in(vcpu->kvm,
+			ret = kvmppc_h_svm_page_in(kvm,
 						   kvmppc_get_gpr(vcpu, 4),
 						   kvmppc_get_gpr(vcpu, 5),
 						   kvmppc_get_gpr(vcpu, 6));
@@ -1108,7 +1109,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	case H_SVM_PAGE_OUT:
 		ret = H_UNSUPPORTED;
 		if (kvmppc_get_srr1(vcpu) & MSR_S)
-			ret = kvmppc_h_svm_page_out(vcpu->kvm,
+			ret = kvmppc_h_svm_page_out(kvm,
 						    kvmppc_get_gpr(vcpu, 4),
 						    kvmppc_get_gpr(vcpu, 5),
 						    kvmppc_get_gpr(vcpu, 6));
@@ -1116,12 +1117,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	case H_SVM_INIT_START:
 		ret = H_UNSUPPORTED;
 		if (kvmppc_get_srr1(vcpu) & MSR_S)
-			ret = kvmppc_h_svm_init_start(vcpu->kvm);
+			ret = kvmppc_h_svm_init_start(kvm);
 		break;
 	case H_SVM_INIT_DONE:
 		ret = H_UNSUPPORTED;
 		if (kvmppc_get_srr1(vcpu) & MSR_S)
-			ret = kvmppc_h_svm_init_done(vcpu->kvm);
+			ret = kvmppc_h_svm_init_done(kvm);
 		break;
 	case H_SVM_INIT_ABORT:
 		/*
@@ -1131,7 +1132,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		 * Instead the kvm->arch.secure_guest flag is checked inside
 		 * kvmppc_h_svm_init_abort().
 		 */
-		ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+		ret = kvmppc_h_svm_init_abort(kvm);
 		break;
 
 	default:
-- 
GitLab


From 6165d5dd99dbaec7a309491c3951bd81fc89978d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:48 +1000
Subject: [PATCH 2638/3804] KVM: PPC: Book3S HV: add virtual mode handlers for
 HPT hcalls and page faults

In order to support hash guests in the P9 path (which does not do real
mode hcalls or page fault handling), these real-mode hash specific
interrupts need to be implemented in virt mode.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-29-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c        | 145 ++++++++++++++++++++++++++--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |   8 ++
 2 files changed, 144 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9ba77747bf005..dee740a3ace9e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -939,6 +939,52 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		return RESUME_HOST;
 
 	switch (req) {
+	case H_REMOVE:
+		ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_ENTER:
+		ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6),
+					kvmppc_get_gpr(vcpu, 7));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_READ:
+		ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_CLEAR_MOD:
+		ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_CLEAR_REF:
+		ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_PROTECT:
+		ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+	case H_BULK_REMOVE:
+		ret = kvmppc_h_bulk_remove(vcpu);
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
+
 	case H_CEDE:
 		break;
 	case H_PROD:
@@ -1138,6 +1184,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	default:
 		return RESUME_HOST;
 	}
+	WARN_ON_ONCE(ret == H_TOO_HARD);
 	kvmppc_set_gpr(vcpu, 3, ret);
 	vcpu->arch.hcall_needed = 0;
 	return RESUME_GUEST;
@@ -1438,22 +1485,102 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 	 * We get these next two if the guest accesses a page which it thinks
 	 * it has mapped but which is not actually present, either because
 	 * it is for an emulated I/O device or because the corresonding
-	 * host page has been paged out.  Any other HDSI/HISI interrupts
-	 * have been handled already.
+	 * host page has been paged out.
+	 *
+	 * Any other HDSI/HISI interrupts have been handled already for P7/8
+	 * guests. For POWER9 hash guests not using rmhandlers, basic hash
+	 * fault handling is done here.
 	 */
-	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
-		r = RESUME_PAGE_FAULT;
-		if (vcpu->arch.fault_dsisr == HDSISR_CANARY)
+	case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
+		unsigned long vsid;
+		long err;
+
+		if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
 			r = RESUME_GUEST; /* Just retry if it's the canary */
+			break;
+		}
+
+		if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+			/*
+			 * Radix doesn't require anything, and pre-ISAv3.0 hash
+			 * already attempted to handle this in rmhandlers. The
+			 * hash fault handling below is v3 only (it uses ASDR
+			 * via fault_gpa).
+			 */
+			r = RESUME_PAGE_FAULT;
+			break;
+		}
+
+		if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
+			kvmppc_core_queue_data_storage(vcpu,
+				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+			r = RESUME_GUEST;
+			break;
+		}
+
+		if (!(vcpu->arch.shregs.msr & MSR_DR))
+			vsid = vcpu->kvm->arch.vrma_slb_v;
+		else
+			vsid = vcpu->arch.fault_gpa;
+
+		err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+				vsid, vcpu->arch.fault_dsisr, true);
+		if (err == 0) {
+			r = RESUME_GUEST;
+		} else if (err == -1 || err == -2) {
+			r = RESUME_PAGE_FAULT;
+		} else {
+			kvmppc_core_queue_data_storage(vcpu,
+				vcpu->arch.fault_dar, err);
+			r = RESUME_GUEST;
+		}
 		break;
-	case BOOK3S_INTERRUPT_H_INST_STORAGE:
+	}
+	case BOOK3S_INTERRUPT_H_INST_STORAGE: {
+		unsigned long vsid;
+		long err;
+
 		vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
 		vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
 			DSISR_SRR1_MATCH_64S;
-		if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
-			vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
-		r = RESUME_PAGE_FAULT;
+		if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+			/*
+			 * Radix doesn't require anything, and pre-ISAv3.0 hash
+			 * already attempted to handle this in rmhandlers. The
+			 * hash fault handling below is v3 only (it uses ASDR
+			 * via fault_gpa).
+			 */
+			if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
+				vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+			r = RESUME_PAGE_FAULT;
+			break;
+		}
+
+		if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
+			kvmppc_core_queue_inst_storage(vcpu,
+				vcpu->arch.fault_dsisr);
+			r = RESUME_GUEST;
+			break;
+		}
+
+		if (!(vcpu->arch.shregs.msr & MSR_IR))
+			vsid = vcpu->kvm->arch.vrma_slb_v;
+		else
+			vsid = vcpu->arch.fault_gpa;
+
+		err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+				vsid, vcpu->arch.fault_dsisr, false);
+		if (err == 0) {
+			r = RESUME_GUEST;
+		} else if (err == -1) {
+			r = RESUME_PAGE_FAULT;
+		} else {
+			kvmppc_core_queue_inst_storage(vcpu, err);
+			r = RESUME_GUEST;
+		}
 		break;
+	}
+
 	/*
 	 * This occurs if the guest executes an illegal instruction.
 	 * If the guest debug is disabled, generate a program interrupt
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7af7c70f14680..8cc73abbf42b3 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -409,6 +409,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 				 vcpu->arch.pgdir, true,
 				 &vcpu->arch.regs.gpr[4]);
 }
+EXPORT_SYMBOL_GPL(kvmppc_h_enter);
 
 #ifdef __BIG_ENDIAN__
 #define LOCK_TOKEN	(*(u32 *)(&get_paca()->lock_token))
@@ -553,6 +554,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 	return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
 				  &vcpu->arch.regs.gpr[4]);
 }
+EXPORT_SYMBOL_GPL(kvmppc_h_remove);
 
 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 {
@@ -671,6 +673,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
 
 long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 		      unsigned long pte_index, unsigned long avpn)
@@ -741,6 +744,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 
 	return H_SUCCESS;
 }
+EXPORT_SYMBOL_GPL(kvmppc_h_protect);
 
 long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 		   unsigned long pte_index)
@@ -781,6 +785,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 	}
 	return H_SUCCESS;
 }
+EXPORT_SYMBOL_GPL(kvmppc_h_read);
 
 long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
 			unsigned long pte_index)
@@ -829,6 +834,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
 	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
 
 long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
 			unsigned long pte_index)
@@ -876,6 +882,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
 	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
 
 static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
 			  unsigned long gpa, int writing, unsigned long *hpa,
@@ -1294,3 +1301,4 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 
 	return -1;		/* send fault up to host kernel mode */
 }
+EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
-- 
GitLab


From ac3c8b41c27ea112daed031f852a4b361c11a03e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:49 +1000
Subject: [PATCH 2639/3804] KVM: PPC: Book3S HV P9: Reflect userspace hcalls to
 hash guests to support PR KVM

The reflection of sc 1 interrupts from guest PR=1 to the guest kernel is
required to support a hash guest running PR KVM where its guest is
making hcalls with sc 1.

In preparation for hash guest support, add this hcall reflection to the
P9 path. The P7/8 path does this in its realmode hcall handler
(sc_1_fast_return).

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-30-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index dee740a3ace9e..493f67f27d068 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1457,13 +1457,23 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 			 * Guest userspace executed sc 1. This can only be
 			 * reached by the P9 path because the old path
 			 * handles this case in realmode hcall handlers.
-			 *
-			 * Radix guests can not run PR KVM or nested HV hash
-			 * guests which might run PR KVM, so this is always
-			 * a privilege fault. Send a program check to guest
-			 * kernel.
 			 */
-			kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+			if (!kvmhv_vcpu_is_radix(vcpu)) {
+				/*
+				 * A guest could be running PR KVM, so this
+				 * may be a PR KVM hcall. It must be reflected
+				 * to the guest kernel as a sc interrupt.
+				 */
+				kvmppc_core_queue_syscall(vcpu);
+			} else {
+				/*
+				 * Radix guests can not run PR KVM or nested HV
+				 * hash guests which might run PR KVM, so this
+				 * is always a privilege fault. Send a program
+				 * check to guest kernel.
+				 */
+				kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+			}
 			r = RESUME_GUEST;
 			break;
 		}
-- 
GitLab


From 079a09a500c399f804effcf9bb49214cdfa698e5 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:50 +1000
Subject: [PATCH 2640/3804] KVM: PPC: Book3S HV P9: implement hash guest
 support

Implement hash guest support. Guest entry/exit has to restore and
save/clear the SLB, plus several other bits to accommodate hash guests
in the P9 path. Radix host, hash guest support is removed from the P7/8
path.

The HPT hcalls and faults are not handled in real mode, which is a
performance regression. A worst-case fork/exit microbenchmark takes 3x
longer after this patch. kbuild benchmark performance is in the noise,
but the slowdown is likely to be noticed somewhere.

For now, accept this penalty for the benefit of simplifying the P7/8
paths and unifying P9 hash with the new code, because hash is a less
important configuration than radix on processors that support it. Hash
will benefit from future optimisations to this path, including possibly
a faster path to handle such hcalls and interrupts without doing a full
exit.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-31-npiggin@gmail.com
---
 arch/powerpc/include/asm/kvm_asm.h      |  2 +-
 arch/powerpc/kvm/book3s_64_entry.S      |  2 +-
 arch/powerpc/kvm/book3s_hv.c            | 20 +++--
 arch/powerpc/kvm/book3s_hv_p9_entry.c   | 97 ++++++++++++++++++++-----
 arch/powerpc/kvm/book3s_hv_rm_mmu.c     |  4 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 14 +---
 6 files changed, 101 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index f4ae37810aa98..e479487488f47 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -147,7 +147,7 @@
 #define KVM_GUEST_MODE_SKIP	2
 #define KVM_GUEST_MODE_GUEST_HV	3
 #define KVM_GUEST_MODE_HOST_HV	4
-#define KVM_GUEST_MODE_HV_FAST	5 /* ISA >= v3.0 host+guest radix */
+#define KVM_GUEST_MODE_HV_FAST	5 /* ISA >= v3.0 host radix */
 
 #define KVM_INST_FETCH_FAILED	-1
 
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index bac664c1a9f7b..7322fea971e43 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -184,7 +184,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
  * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
  *
  * Enter the guest on a ISAv3.0 or later system where we have exactly
- * one vcpu per vcore, and both the host and guest are radix.
+ * one vcpu per vcore, and the host is radix.
  */
 .balign	IFETCH_ALIGN_BYTES
 _GLOBAL(kvmppc_p9_enter_guest)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 493f67f27d068..662f599bdc0e9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3875,7 +3875,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		}
 		kvmppc_xive_pull_vcpu(vcpu);
 
-		vcpu->arch.slb_max = 0;
+		if (kvm_is_radix(vcpu->kvm))
+			vcpu->arch.slb_max = 0;
 	}
 
 	dec = mfspr(SPRN_DEC);
@@ -4110,7 +4111,6 @@ out:
 /*
  * This never fails for a radix guest, as none of the operations it does
  * for a radix guest can fail or have a way to report failure.
- * kvmhv_run_single_vcpu() relies on this fact.
  */
 static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
 {
@@ -4289,8 +4289,15 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 	vc->runner = vcpu;
 
 	/* See if the MMU is ready to go */
-	if (!kvm->arch.mmu_ready)
-		kvmhv_setup_mmu(vcpu);
+	if (!kvm->arch.mmu_ready) {
+		r = kvmhv_setup_mmu(vcpu);
+		if (r) {
+			run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+			run->fail_entry.hardware_entry_failure_reason = 0;
+			vcpu->arch.ret = r;
+			return r;
+		}
+	}
 
 	if (need_resched())
 		cond_resched();
@@ -4303,7 +4310,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
 	preempt_disable();
 	pcpu = smp_processor_id();
 	vc->pcpu = pcpu;
-	kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+	if (kvm_is_radix(kvm))
+		kvmppc_prepare_radix_vcpu(vcpu, pcpu);
 
 	local_irq_disable();
 	hard_irq_disable();
@@ -4503,7 +4511,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
 	do {
-		if (kvm_is_radix(kvm))
+		if (radix_enabled())
 			r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
 						  vcpu->arch.vcore->lpcr);
 		else
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 178f771e299c6..4460f1c23a9db 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -4,6 +4,7 @@
 #include <asm/asm-prototypes.h>
 #include <asm/dbell.h>
 #include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
 
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
 static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
@@ -55,6 +56,12 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator
 #define accumulate_time(vcpu, next) do {} while (0)
 #endif
 
+static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
+{
+	asm volatile("slbmfev  %0,%1" : "=r" (*slbev) : "r" (idx));
+	asm volatile("slbmfee  %0,%1" : "=r" (*slbee) : "r" (idx));
+}
+
 static inline void mtslb(u64 slbee, u64 slbev)
 {
 	asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
@@ -65,6 +72,12 @@ static inline void clear_slb_entry(unsigned int idx)
 	mtslb(idx, 0);
 }
 
+static inline void slb_clear_invalidate_partition(void)
+{
+	clear_slb_entry(0);
+	asm volatile(PPC_SLBIA(6));
+}
+
 /*
  * Malicious or buggy radix guests may have inserted SLB entries
  * (only 0..3 because radix always runs with UPRT=1), so these must
@@ -81,7 +94,6 @@ static void radix_clear_slb(void)
 
 static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
 {
-	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	struct kvm_nested_guest *nested = vcpu->arch.nested;
 	u32 lpid;
 
@@ -99,9 +111,23 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u6
 	isync();
 	mtspr(SPRN_PID, vcpu->arch.pid);
 	isync();
+}
 
-	/* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
-	kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+	u32 lpid;
+	int i;
+
+	lpid = kvm->arch.lpid;
+
+	mtspr(SPRN_LPID, lpid);
+	mtspr(SPRN_LPCR, lpcr);
+	mtspr(SPRN_PID, vcpu->arch.pid);
+
+	for (i = 0; i < vcpu->arch.slb_max; i++)
+		mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
+
+	isync();
 }
 
 static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
@@ -115,9 +141,36 @@ static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
 	isync();
 }
 
+static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+	if (kvm_is_radix(kvm)) {
+		radix_clear_slb();
+	} else {
+		int i;
+		int nr = 0;
+
+		/*
+		 * This must run before switching to host (radix host can't
+		 * access all SLBs).
+		 */
+		for (i = 0; i < vcpu->arch.slb_nr; i++) {
+			u64 slbee, slbev;
+			mfslb(i, &slbee, &slbev);
+			if (slbee & SLB_ESID_V) {
+				vcpu->arch.slb[nr].orige = slbee | i;
+				vcpu->arch.slb[nr].origv = slbev;
+				nr++;
+			}
+		}
+		vcpu->arch.slb_max = nr;
+		slb_clear_invalidate_partition();
+	}
+}
+
 int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
 {
 	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *nested = vcpu->arch.nested;
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	s64 hdec;
 	u64 tb, purr, spurr;
@@ -218,10 +271,21 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	mtspr(SPRN_AMOR, ~0UL);
 
-	if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_FAST;
+	if (kvm_is_radix(kvm)) {
+		if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+			__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+		switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+		if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+			__mtmsrd(0, 1); /* clear RI */
+
+	} else {
 		__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+		switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
+	}
 
-	switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+	/* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
+	kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
 
 	/*
 	 * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
@@ -229,9 +293,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	 */
 	mtspr(SPRN_HDEC, hdec);
 
-	if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
-		__mtmsrd(0, 1); /* clear RI */
-
 	mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
 	mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
 	mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
@@ -239,10 +300,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	accumulate_time(vcpu, &vcpu->arch.guest_time);
 
-	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_FAST;
 	kvmppc_p9_enter_guest(vcpu);
-	// Radix host and guest means host never runs with guest MMU state
-	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
 
 	accumulate_time(vcpu, &vcpu->arch.rm_intr);
 
@@ -343,8 +401,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 #endif
 	}
 
-	radix_clear_slb();
-
 	accumulate_time(vcpu, &vcpu->arch.rm_exit);
 
 	/* Advance host PURR/SPURR by the amount used by guest */
@@ -378,11 +434,14 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 		mtspr(SPRN_DAWRX1, host_dawrx1);
 	}
 
-	/*
-	 * Since this is radix, do a eieio; tlbsync; ptesync sequence in
-	 * case we interrupted the guest between a tlbie and a ptesync.
-	 */
-	asm volatile("eieio; tlbsync; ptesync");
+	if (kvm_is_radix(kvm)) {
+		/*
+		 * Since this is radix, do a eieio; tlbsync; ptesync sequence
+		 * in case we interrupted the guest between a tlbie and a
+		 * ptesync.
+		 */
+		asm volatile("eieio; tlbsync; ptesync");
+	}
 
 	/*
 	 * cp_abort is required if the processor supports local copy-paste
@@ -408,7 +467,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	mtspr(SPRN_HDEC, 0x7fffffff);
 
+	save_clear_guest_mmu(kvm, vcpu);
 	switch_mmu_to_host_radix(kvm, host_pidr);
+	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
 
 	/*
 	 * If we are in real mode, only switch MMU on after the MMU is
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 8cc73abbf42b3..f487ebb3a70aa 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -57,6 +57,10 @@ static int global_invalidates(struct kvm *kvm)
 	else
 		global = 1;
 
+	/* LPID has been switched to host if in virt mode so can't do local */
+	if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
+		global = 1;
+
 	if (!global) {
 		/* any other core might now have stale TLB entries... */
 		smp_wmb();
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 33aa0ef496e50..f7b45c3bff64b 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -888,14 +888,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
-	/* For hash guest, clear out and reload the SLB */
-BEGIN_MMU_FTR_SECTION
-	/* Radix host won't have populated the SLB, so no need to clear */
+	/* Clear out and reload the SLB */
 	li	r6, 0
 	slbmte	r6, r6
 	PPC_SLBIA(6)
 	ptesync
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
 	/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
 	lwz	r5,VCPU_SLB_MAX(r4)
@@ -1373,9 +1370,6 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	stw	r5,VCPU_SLB_MAX(r9)
 
 	/* load host SLB entries */
-BEGIN_MMU_FTR_SECTION
-	b	guest_bypass
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 	ld	r8,PACA_SLBSHADOWPTR(r13)
 
 	.rept	SLB_NUM_BOLTED
@@ -3131,10 +3125,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
 	PPC_SLBIA(6)
 	ptesync
 
-BEGIN_MMU_FTR_SECTION
-	b	4f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
-
 	/* load host SLB entries */
 	ld	r8, PACA_SLBSHADOWPTR(r13)
 	.rept	SLB_NUM_BOLTED
@@ -3148,7 +3138,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
 3:	addi	r8, r8, 16
 	.endr
 
-4:	lwz	r7, KVM_HOST_LPID(r10)
+	lwz	r7, KVM_HOST_LPID(r10)
 	mtspr	SPRN_LPID, r7
 	mtspr	SPRN_PID, r0
 	ld	r8, KVM_HOST_LPCR(r10)
-- 
GitLab


From 0bf7e1b2e9a496e1ebca9e3e1f53c7e98add4417 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:51 +1000
Subject: [PATCH 2641/3804] KVM: PPC: Book3S HV P9: implement hash host / hash
 guest support

Implement support for hash guests under hash host. This has to save and
restore the host SLB, and ensure that the MMU is off while switching
into the guest SLB.

POWER9 and later CPUs now always go via the P9 path. The "fast" guest
mode is now renamed to the P9 mode, which is consistent with its
functionality and the rest of the naming.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-32-npiggin@gmail.com
---
 arch/powerpc/include/asm/kvm_asm.h    |  2 +-
 arch/powerpc/kvm/book3s_64_entry.S    | 15 +++++++----
 arch/powerpc/kvm/book3s_hv.c          |  4 ++-
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 36 ++++++++++++++++++++++-----
 4 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index e479487488f47..fbbf3cec92e90 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -147,7 +147,7 @@
 #define KVM_GUEST_MODE_SKIP	2
 #define KVM_GUEST_MODE_GUEST_HV	3
 #define KVM_GUEST_MODE_HOST_HV	4
-#define KVM_GUEST_MODE_HV_FAST	5 /* ISA >= v3.0 host radix */
+#define KVM_GUEST_MODE_HV_P9	5 /* ISA >= v3.0 path */
 
 #define KVM_INST_FETCH_FAILED	-1
 
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index 7322fea971e43..983b8c18bc31e 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -36,7 +36,7 @@
 kvmppc_hcall:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	lbz	r10,HSTATE_IN_GUEST(r13)
-	cmpwi	r10,KVM_GUEST_MODE_HV_FAST
+	cmpwi	r10,KVM_GUEST_MODE_HV_P9
 	beq	kvmppc_p9_exit_hcall
 #endif
 	ld	r10,PACA_EXGEN+EX_R13(r13)
@@ -68,7 +68,7 @@ kvmppc_interrupt:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	std	r10,HSTATE_SCRATCH0(r13)
 	lbz	r10,HSTATE_IN_GUEST(r13)
-	cmpwi	r10,KVM_GUEST_MODE_HV_FAST
+	cmpwi	r10,KVM_GUEST_MODE_HV_P9
 	beq	kvmppc_p9_exit_interrupt
 	ld	r10,HSTATE_SCRATCH0(r13)
 #endif
@@ -183,8 +183,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 /*
  * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
  *
- * Enter the guest on a ISAv3.0 or later system where we have exactly
- * one vcpu per vcore, and the host is radix.
+ * Enter the guest on a ISAv3.0 or later system.
  */
 .balign	IFETCH_ALIGN_BYTES
 _GLOBAL(kvmppc_p9_enter_guest)
@@ -284,7 +283,7 @@ kvmppc_p9_exit_hcall:
 .balign	IFETCH_ALIGN_BYTES
 kvmppc_p9_exit_interrupt:
 	/*
-	 * If set to KVM_GUEST_MODE_HV_FAST but we're still in the
+	 * If set to KVM_GUEST_MODE_HV_P9 but we're still in the
 	 * hypervisor, that means we can't return from the entry stack.
 	 */
 	rldicl. r10,r12,64-MSR_HV_LG,63
@@ -358,6 +357,12 @@ kvmppc_p9_exit_interrupt:
  * effort for a small bit of code. Lots of other things to do first.
  */
 kvmppc_p9_bad_interrupt:
+BEGIN_MMU_FTR_SECTION
+	/*
+	 * Hash host doesn't try to recover MMU (requires host SLB reload)
+	 */
+	b	.
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 	/*
 	 * Clean up guest registers to give host a chance to run.
 	 */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 662f599bdc0e9..045458e7192a4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4511,7 +4511,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
 	do {
-		if (radix_enabled())
+		if (cpu_has_feature(CPU_FTR_ARCH_300))
 			r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
 						  vcpu->arch.vcore->lpcr);
 		else
@@ -5599,6 +5599,8 @@ static int kvmhv_enable_nested(struct kvm *kvm)
 		return -EPERM;
 	if (!cpu_has_feature(CPU_FTR_ARCH_300))
 		return -ENODEV;
+	if (!radix_enabled())
+		return -ENODEV;
 
 	/* kvm == NULL means the caller is testing if the capability exists */
 	if (kvm)
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 4460f1c23a9db..83f592eadcd2d 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -130,7 +130,7 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64
 	isync();
 }
 
-static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
+static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
 {
 	isync();
 	mtspr(SPRN_PID, pid);
@@ -139,6 +139,22 @@ static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
 	isync();
 	mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
 	isync();
+
+	if (!radix_enabled())
+		slb_restore_bolted_realmode();
+}
+
+static void save_clear_host_mmu(struct kvm *kvm)
+{
+	if (!radix_enabled()) {
+		/*
+		 * Hash host could save and restore host SLB entries to
+		 * reduce SLB fault overheads of VM exits, but for now the
+		 * existing code clears all entries and restores just the
+		 * bolted ones when switching back to host.
+		 */
+		slb_clear_invalidate_partition();
+	}
 }
 
 static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
@@ -271,16 +287,24 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	mtspr(SPRN_AMOR, ~0UL);
 
-	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_FAST;
+	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
+
+	/*
+	 * Hash host, hash guest, or radix guest with prefetch bug, all have
+	 * to disable the MMU before switching to guest MMU state.
+	 */
+	if (!radix_enabled() || !kvm_is_radix(kvm) ||
+			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+		__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
+	save_clear_host_mmu(kvm);
+
 	if (kvm_is_radix(kvm)) {
-		if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
-			__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
 		switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
 		if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
 			__mtmsrd(0, 1); /* clear RI */
 
 	} else {
-		__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
 		switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
 	}
 
@@ -468,7 +492,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 	mtspr(SPRN_HDEC, 0x7fffffff);
 
 	save_clear_guest_mmu(kvm, vcpu);
-	switch_mmu_to_host_radix(kvm, host_pidr);
+	switch_mmu_to_host(kvm, host_pidr);
 	local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
 
 	/*
-- 
GitLab


From fae5c9f3664ba278137e54a2083b39b90c64093a Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 28 May 2021 19:07:52 +1000
Subject: [PATCH 2642/3804] KVM: PPC: Book3S HV: remove ISA v3.0 and v3.1
 support from P7/8 path

POWER9 and later processors always go via the P9 guest entry path now.
Remove the remaining support from the P7/8 path.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210528090752.3542186-33-npiggin@gmail.com
---
 arch/powerpc/kvm/book3s_hv.c            |  62 ++--
 arch/powerpc/kvm/book3s_hv_interrupts.S |   9 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 421 +-----------------------
 arch/powerpc/platforms/powernv/idle.c   |  52 +--
 4 files changed, 43 insertions(+), 501 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 045458e7192a4..f4dc4f0c34b5e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -130,9 +130,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
 	return kvm->arch.nested_enable && kvm_is_radix(kvm);
 }
 
-/* If set, the threads on each CPU core have to be in the same MMU mode */
-static bool no_mixing_hpt_and_radix __read_mostly;
-
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
 /*
@@ -3133,9 +3130,6 @@ static void prepare_threads(struct kvmppc_vcore *vc)
 	for_each_runnable_thread(i, vcpu, vc) {
 		if (signal_pending(vcpu->arch.run_task))
 			vcpu->arch.ret = -EINTR;
-		else if (no_mixing_hpt_and_radix &&
-			 kvm_is_radix(vc->kvm) != radix_enabled())
-			vcpu->arch.ret = -EINVAL;
 		else if (vcpu->arch.vpa.update_pending ||
 			 vcpu->arch.slb_shadow.update_pending ||
 			 vcpu->arch.dtl.update_pending)
@@ -3342,6 +3336,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	int trap;
 	bool is_power8;
 
+	if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
+		return;
+
 	/*
 	 * Remove from the list any threads that have a signal pending
 	 * or need a VPA update done
@@ -3369,9 +3366,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	 * Make sure we are running on primary threads, and that secondary
 	 * threads are offline.  Also check if the number of threads in this
 	 * guest are greater than the current system threads per guest.
-	 * On POWER9, we need to be not in independent-threads mode if
-	 * this is a HPT guest on a radix host machine where the
-	 * CPU threads may not be in different MMU modes.
 	 */
 	if ((controlled_threads > 1) &&
 	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
@@ -3395,18 +3389,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	if (vc->num_threads < target_threads)
 		collect_piggybacks(&core_info, target_threads);
 
-	/*
-	 * On radix, arrange for TLB flushing if necessary.
-	 * This has to be done before disabling interrupts since
-	 * it uses smp_call_function().
-	 */
-	pcpu = smp_processor_id();
-	if (kvm_is_radix(vc->kvm)) {
-		for (sub = 0; sub < core_info.n_subcores; ++sub)
-			for_each_runnable_thread(i, vcpu, core_info.vc[sub])
-				kvmppc_prepare_radix_vcpu(vcpu, pcpu);
-	}
-
 	/*
 	 * Hard-disable interrupts, and check resched flag and signals.
 	 * If we need to reschedule or deliver a signal, clean up
@@ -3439,8 +3421,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	cmd_bit = stat_bit = 0;
 	split = core_info.n_subcores;
 	sip = NULL;
-	is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
-		&& !cpu_has_feature(CPU_FTR_ARCH_300);
+	is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
 
 	if (split > 1) {
 		sip = &split_info;
@@ -3738,8 +3719,7 @@ static inline bool hcall_is_xics(unsigned long req)
 }
 
 /*
- * Virtual-mode guest entry for POWER9 and later when the host and
- * guest are both using the radix MMU.  The LPIDR has already been set.
+ * Guest entry for POWER9 and later CPUs.
  */
 static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 			 unsigned long lpcr)
@@ -5762,11 +5742,25 @@ static int kvmhv_enable_dawr1(struct kvm *kvm)
 
 static bool kvmppc_hash_v3_possible(void)
 {
-	if (radix_enabled() && no_mixing_hpt_and_radix)
+	if (!cpu_has_feature(CPU_FTR_ARCH_300))
 		return false;
 
-	return cpu_has_feature(CPU_FTR_ARCH_300) &&
-		cpu_has_feature(CPU_FTR_HVMODE);
+	if (!cpu_has_feature(CPU_FTR_HVMODE))
+		return false;
+
+	/*
+	 * POWER9 chips before version 2.02 can't have some threads in
+	 * HPT mode and some in radix mode on the same core.
+	 */
+	if (radix_enabled()) {
+		unsigned int pvr = mfspr(SPRN_PVR);
+		if ((pvr >> 16) == PVR_POWER9 &&
+		    (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+		     ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+			return false;
+	}
+
+	return true;
 }
 
 static struct kvmppc_ops kvm_ops_hv = {
@@ -5910,18 +5904,6 @@ static int kvmppc_book3s_init_hv(void)
 	if (kvmppc_radix_possible())
 		r = kvmppc_radix_init();
 
-	/*
-	 * POWER9 chips before version 2.02 can't have some threads in
-	 * HPT mode and some in radix mode on the same core.
-	 */
-	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
-		unsigned int pvr = mfspr(SPRN_PVR);
-		if ((pvr >> 16) == PVR_POWER9 &&
-		    (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
-		     ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
-			no_mixing_hpt_and_radix = true;
-	}
-
 	r = kvmppc_uvmem_init();
 	if (r < 0)
 		pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 327417d79eac1..4444f83cb1338 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -58,7 +58,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	/*
 	 * Put whatever is in the decrementer into the
 	 * hypervisor decrementer.
-	 * Because of a hardware deviation in P8 and P9,
+	 * Because of a hardware deviation in P8,
 	 * we need to set LPCR[HDICE] before writing HDEC.
 	 */
 	ld	r5, HSTATE_KVM_VCORE(r13)
@@ -67,15 +67,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	ori	r8, r9, LPCR_HDICE
 	mtspr	SPRN_LPCR, r8
 	isync
-	andis.	r0, r9, LPCR_LD@h
 	mfspr	r8,SPRN_DEC
 	mftb	r7
-BEGIN_FTR_SECTION
-	/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
-	bne	32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	extsw	r8,r8
-32:	mtspr	SPRN_HDEC,r8
+	mtspr	SPRN_HDEC,r8
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f7b45c3bff64b..8dd437d7a2c63 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -25,18 +25,10 @@
 #include <asm/export.h>
 #include <asm/tm.h>
 #include <asm/opal.h>
-#include <asm/xive-regs.h>
 #include <asm/thread_info.h>
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
 #include <asm/cpuidle.h>
-#include <asm/ultravisor-api.h>
-
-/* Sign-extend HDEC if not on POWER9 */
-#define EXTEND_HDEC(reg)			\
-BEGIN_FTR_SECTION;				\
-	extsw	reg, reg;			\
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 
 /* Values in HSTATE_NAPPING(r13) */
 #define NAPPING_CEDE	1
@@ -56,9 +48,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 #define STACK_SLOT_HFSCR	(SFS-72)
 #define STACK_SLOT_AMR		(SFS-80)
 #define STACK_SLOT_UAMOR	(SFS-88)
-#define STACK_SLOT_DAWR1	(SFS-96)
-#define STACK_SLOT_DAWRX1	(SFS-104)
-#define STACK_SLOT_FSCR		(SFS-112)
+#define STACK_SLOT_FSCR		(SFS-96)
 
 /*
  * Call kvmppc_hv_entry in real mode.
@@ -229,7 +219,7 @@ kvm_novcpu_wakeup:
 
 	/* See if our timeslice has expired (HDEC is negative) */
 	mfspr	r0, SPRN_HDEC
-	EXTEND_HDEC(r0)
+	extsw	r0, r0
 	li	r12, BOOK3S_INTERRUPT_HV_DECREMENTER
 	cmpdi	r0, 0
 	blt	kvm_novcpu_exit
@@ -331,10 +321,8 @@ kvm_secondary_got_guest:
 	lbz	r4, HSTATE_PTID(r13)
 	cmpwi	r4, 0
 	bne	63f
-	LOAD_REG_ADDR(r6, decrementer_max)
-	ld	r6, 0(r6)
+	lis	r6,0x7fff		/* MAX_INT@h */
 	mtspr	SPRN_HDEC, r6
-BEGIN_FTR_SECTION
 	/* and set per-LPAR registers, if doing dynamic micro-threading */
 	ld	r6, HSTATE_SPLIT_MODE(r13)
 	cmpdi	r6, 0
@@ -346,7 +334,6 @@ BEGIN_FTR_SECTION
 	ld	r0, KVM_SPLIT_LDBAR(r6)
 	mtspr	SPRN_LDBAR, r0
 	isync
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 63:
 	/* Order load of vcpu after load of vcore */
 	lwsync
@@ -417,7 +404,6 @@ kvm_no_guest:
 	blr
 
 53:
-BEGIN_FTR_SECTION
 	HMT_LOW
 	ld	r5, HSTATE_KVM_VCORE(r13)
 	cmpdi	r5, 0
@@ -432,14 +418,6 @@ BEGIN_FTR_SECTION
 	b	kvm_unsplit_nap
 60:	HMT_MEDIUM
 	b	kvm_secondary_got_guest
-FTR_SECTION_ELSE
-	HMT_LOW
-	ld	r5, HSTATE_KVM_VCORE(r13)
-	cmpdi	r5, 0
-	beq	kvm_no_guest
-	HMT_MEDIUM
-	b	kvm_secondary_got_guest
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
 54:	li	r0, KVM_HWTHREAD_IN_KVM
 	stb	r0, HSTATE_HWTHREAD_STATE(r13)
@@ -565,13 +543,11 @@ kvmppc_hv_entry:
 	bne	10f
 
 	lwz	r7,KVM_LPID(r9)
-BEGIN_FTR_SECTION
 	ld	r6,KVM_SDR1(r9)
 	li	r0,LPID_RSVD		/* switch to reserved LPID */
 	mtspr	SPRN_LPID,r0
 	ptesync
 	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	mtspr	SPRN_LPID,r7
 	isync
 
@@ -651,16 +627,6 @@ kvmppc_got_guest:
 	mtspr	SPRN_SPURR,r8
 
 	/* Save host values of some registers */
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_TIDR
-	mfspr	r6, SPRN_PSSCR
-	mfspr	r7, SPRN_PID
-	std	r5, STACK_SLOT_TID(r1)
-	std	r6, STACK_SLOT_PSSCR(r1)
-	std	r7, STACK_SLOT_PID(r1)
-	mfspr	r5, SPRN_HFSCR
-	std	r5, STACK_SLOT_HFSCR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_CIABR
 	mfspr	r6, SPRN_DAWR0
@@ -673,12 +639,6 @@ BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_FSCR
 	std	r5, STACK_SLOT_FSCR(r1)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-	mfspr	r6, SPRN_DAWR1
-	mfspr	r7, SPRN_DAWRX1
-	std	r6, STACK_SLOT_DAWR1(r1)
-	std	r7, STACK_SLOT_DAWRX1(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
 
 	mfspr	r5, SPRN_AMR
 	std	r5, STACK_SLOT_AMR(r1)
@@ -696,13 +656,9 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
 BEGIN_FTR_SECTION
 	b	91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 	 */
@@ -769,12 +725,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	ld	r6, VCPU_DAWRX0(r4)
 	mtspr	SPRN_DAWR0, r5
 	mtspr	SPRN_DAWRX0, r6
-BEGIN_FTR_SECTION
-	ld	r5, VCPU_DAWR1(r4)
-	ld	r6, VCPU_DAWRX1(r4)
-	mtspr	SPRN_DAWR1, r5
-	mtspr	SPRN_DAWRX1, r6
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
 1:
 	ld	r7, VCPU_CIABR(r4)
 	ld	r8, VCPU_TAR(r4)
@@ -792,7 +742,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
 	mtspr	SPRN_BESCR, r6
 	mtspr	SPRN_PID, r7
 	mtspr	SPRN_WORT, r8
-BEGIN_FTR_SECTION
 	/* POWER8-only registers */
 	ld	r5, VCPU_TCSCR(r4)
 	ld	r6, VCPU_ACOP(r4)
@@ -803,18 +752,6 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_CSIGR, r7
 	mtspr	SPRN_TACR, r8
 	nop
-FTR_SECTION_ELSE
-	/* POWER9-only registers */
-	ld	r5, VCPU_TID(r4)
-	ld	r6, VCPU_PSSCR(r4)
-	lbz	r8, HSTATE_FAKE_SUSPEND(r13)
-	oris	r6, r6, PSSCR_EC@h	/* This makes stop trap to HV */
-	rldimi	r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
-	ld	r7, VCPU_HFSCR(r4)
-	mtspr	SPRN_TIDR, r5
-	mtspr	SPRN_PSSCR, r6
-	mtspr	SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 8:
 
 	ld	r5, VCPU_SPRG0(r4)
@@ -884,7 +821,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 
 	/* Check if HDEC expires soon */
 	mfspr	r3, SPRN_HDEC
-	EXTEND_HDEC(r3)
+	extsw	r3, r3
 	cmpdi	r3, 512		/* 1 microsecond */
 	blt	hdec_soon
 
@@ -907,93 +844,9 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	bdnz	1b
 9:
 
-#ifdef CONFIG_KVM_XICS
-	/* We are entering the guest on that thread, push VCPU to XIVE */
-	ld	r11, VCPU_XIVE_SAVED_STATE(r4)
-	li	r9, TM_QW1_OS
-	lwz	r8, VCPU_XIVE_CAM_WORD(r4)
-	cmpwi	r8, 0
-	beq	no_xive
-	li	r7, TM_QW1_OS + TM_WORD2
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
-	beq	2f
-	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
-	cmpldi	cr1, r10, 0
-	beq     cr1, no_xive
-	eieio
-	stdx	r11,r9,r10
-	stwx	r8,r7,r10
-	b	3f
-2:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
-	cmpldi	cr1, r10, 0
-	beq	cr1, no_xive
-	eieio
-	stdcix	r11,r9,r10
-	stwcix	r8,r7,r10
-3:	li	r9, 1
-	stb	r9, VCPU_XIVE_PUSHED(r4)
-	eieio
-
-	/*
-	 * We clear the irq_pending flag. There is a small chance of a
-	 * race vs. the escalation interrupt happening on another
-	 * processor setting it again, but the only consequence is to
-	 * cause a spurrious wakeup on the next H_CEDE which is not an
-	 * issue.
-	 */
-	li	r0,0
-	stb	r0, VCPU_IRQ_PENDING(r4)
-
-	/*
-	 * In single escalation mode, if the escalation interrupt is
-	 * on, we mask it.
-	 */
-	lbz	r0, VCPU_XIVE_ESC_ON(r4)
-	cmpwi	cr1, r0,0
-	beq	cr1, 1f
-	li	r9, XIVE_ESB_SET_PQ_01
-	beq	4f			/* in real mode? */
-	ld	r10, VCPU_XIVE_ESC_VADDR(r4)
-	ldx	r0, r10, r9
-	b	5f
-4:	ld	r10, VCPU_XIVE_ESC_RADDR(r4)
-	ldcix	r0, r10, r9
-5:	sync
-
-	/* We have a possible subtle race here: The escalation interrupt might
-	 * have fired and be on its way to the host queue while we mask it,
-	 * and if we unmask it early enough (re-cede right away), there is
-	 * a theorical possibility that it fires again, thus landing in the
-	 * target queue more than once which is a big no-no.
-	 *
-	 * Fortunately, solving this is rather easy. If the above load setting
-	 * PQ to 01 returns a previous value where P is set, then we know the
-	 * escalation interrupt is somewhere on its way to the host. In that
-	 * case we simply don't clear the xive_esc_on flag below. It will be
-	 * eventually cleared by the handler for the escalation interrupt.
-	 *
-	 * Then, when doing a cede, we check that flag again before re-enabling
-	 * the escalation interrupt, and if set, we abort the cede.
-	 */
-	andi.	r0, r0, XIVE_ESB_VAL_P
-	bne-	1f
-
-	/* Now P is 0, we can clear the flag */
-	li	r0, 0
-	stb	r0, VCPU_XIVE_ESC_ON(r4)
-1:
-no_xive:
-#endif /* CONFIG_KVM_XICS */
-
 deliver_guest_interrupt:	/* r4 = vcpu, r13 = paca */
 	/* Check if we can deliver an external or decrementer interrupt now */
 	ld	r0, VCPU_PENDING_EXC(r4)
-BEGIN_FTR_SECTION
-	/* On POWER9, also check for emulated doorbell interrupt */
-	lbz	r3, VCPU_DBELL_REQ(r4)
-	or	r0, r0, r3
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	cmpdi	r0, 0
 	beq	71f
 	mr	r3, r4
@@ -1066,12 +919,6 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_PPR, r0
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
-/* Move canary into DSISR to check for later */
-BEGIN_FTR_SECTION
-	li	r0, 0x7fff
-	mtspr	SPRN_HDSISR, r0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
 	ld	r6, VCPU_GPR(R6)(r4)
 	ld	r7, VCPU_GPR(R7)(r4)
 
@@ -1251,7 +1098,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	bne	2f
 	mfspr	r3,SPRN_HDEC
-	EXTEND_HDEC(r3)
+	extsw	r3, r3
 	cmpdi	r3,0
 	mr	r4,r9
 	bge	fast_guest_return
@@ -1263,14 +1110,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	/* Hypervisor doorbell - exit only if host IPI flag set */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DOORBELL
 	bne	3f
-BEGIN_FTR_SECTION
-	PPC_MSGSYNC
-	lwsync
-	/* always exit if we're running a nested guest */
-	ld	r0, VCPU_NESTED(r9)
-	cmpdi	r0, 0
-	bne	guest_exit_cont
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	lbz	r0, HSTATE_HOST_IPI(r13)
 	cmpwi	r0, 0
 	beq	maybe_reenter_guest
@@ -1300,43 +1139,6 @@ guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	mr	r4, r9
 	bl	kvmhv_accumulate_time
 #endif
-#ifdef CONFIG_KVM_XICS
-	/* We are exiting, pull the VP from the XIVE */
-	lbz	r0, VCPU_XIVE_PUSHED(r9)
-	cmpwi	cr0, r0, 0
-	beq	1f
-	li	r7, TM_SPC_PULL_OS_CTX
-	li	r6, TM_QW1_OS
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
-	beq	2f
-	ld	r10, HSTATE_XIVE_TIMA_VIRT(r13)
-	cmpldi	cr0, r10, 0
-	beq	1f
-	/* First load to pull the context, we ignore the value */
-	eieio
-	lwzx	r11, r7, r10
-	/* Second load to recover the context state (Words 0 and 1) */
-	ldx	r11, r6, r10
-	b	3f
-2:	ld	r10, HSTATE_XIVE_TIMA_PHYS(r13)
-	cmpldi	cr0, r10, 0
-	beq	1f
-	/* First load to pull the context, we ignore the value */
-	eieio
-	lwzcix	r11, r7, r10
-	/* Second load to recover the context state (Words 0 and 1) */
-	ldcix	r11, r6, r10
-3:	std	r11, VCPU_XIVE_SAVED_STATE(r9)
-	/* Fixup some of the state for the next load */
-	li	r10, 0
-	li	r0, 0xff
-	stb	r10, VCPU_XIVE_PUSHED(r9)
-	stb	r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
-	stb	r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
-	eieio
-1:
-#endif /* CONFIG_KVM_XICS */
 
 	/*
 	 * Possibly flush the link stack here, before we do a blr in
@@ -1391,12 +1193,6 @@ guest_bypass:
 	ld	r3, HSTATE_KVM_VCORE(r13)
 	mfspr	r5,SPRN_DEC
 	mftb	r6
-	/* On P9, if the guest has large decr enabled, don't sign extend */
-BEGIN_FTR_SECTION
-	ld	r4, VCORE_LPCR(r3)
-	andis.	r4, r4, LPCR_LD@h
-	bne	16f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	extsw	r5,r5
 16:	add	r5,r5,r6
 	/* r5 is a guest timebase value here, convert to host TB */
@@ -1470,7 +1266,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	std	r6, VCPU_BESCR(r9)
 	stw	r7, VCPU_GUEST_PID(r9)
 	std	r8, VCPU_WORT(r9)
-BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_TCSCR
 	mfspr	r6, SPRN_ACOP
 	mfspr	r7, SPRN_CSIGR
@@ -1479,17 +1274,6 @@ BEGIN_FTR_SECTION
 	std	r6, VCPU_ACOP(r9)
 	std	r7, VCPU_CSIGR(r9)
 	std	r8, VCPU_TACR(r9)
-FTR_SECTION_ELSE
-	mfspr	r5, SPRN_TIDR
-	mfspr	r6, SPRN_PSSCR
-	std	r5, VCPU_TID(r9)
-	rldicl	r6, r6, 4, 50		/* r6 &= PSSCR_GUEST_VIS */
-	rotldi	r6, r6, 60
-	std	r6, VCPU_PSSCR(r9)
-	/* Restore host HFSCR value */
-	ld	r7, STACK_SLOT_HFSCR(r1)
-	mtspr	SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 BEGIN_FTR_SECTION
 	ld	r5, STACK_SLOT_FSCR(r1)
 	mtspr	SPRN_FSCR, r5
@@ -1501,13 +1285,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	li	r0, 0
 	mtspr	SPRN_PSPB, r0
 	mtspr	SPRN_WORT, r0
-BEGIN_FTR_SECTION
 	mtspr	SPRN_TCSCR, r0
 	/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
 	li	r0, 1
 	sldi	r0, r0, 31
 	mtspr	SPRN_MMCRS, r0
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 
 	/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
 	ld	r8, STACK_SLOT_IAMR(r1)
@@ -1564,13 +1346,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	bl	kvmppc_save_fp
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
 BEGIN_FTR_SECTION
 	b	91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 	 */
@@ -1616,28 +1394,6 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_DAWR0, r6
 	mtspr	SPRN_DAWRX0, r7
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
-	ld	r6, STACK_SLOT_DAWR1(r1)
-	ld	r7, STACK_SLOT_DAWRX1(r1)
-	mtspr	SPRN_DAWR1, r6
-	mtspr	SPRN_DAWRX1, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
-BEGIN_FTR_SECTION
-	ld	r5, STACK_SLOT_TID(r1)
-	ld	r6, STACK_SLOT_PSSCR(r1)
-	ld	r7, STACK_SLOT_PID(r1)
-	mtspr	SPRN_TIDR, r5
-	mtspr	SPRN_PSSCR, r6
-	mtspr	SPRN_PID, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
-	/*
-	 * cp_abort is required if the processor supports local copy-paste
-	 * to clear the copy buffer that was under control of the guest.
-	 */
-BEGIN_FTR_SECTION
-	PPC_CP_ABORT
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 
 	/*
 	 * POWER7/POWER8 guest -> host partition switch code.
@@ -1674,13 +1430,11 @@ kvmhv_switch_to_host:
 
 	/* Primary thread switches back to host partition */
 	lwz	r7,KVM_HOST_LPID(r4)
-BEGIN_FTR_SECTION
 	ld	r6,KVM_HOST_SDR1(r4)
 	li	r8,LPID_RSVD		/* switch to reserved LPID */
 	mtspr	SPRN_LPID,r8
 	ptesync
 	mtspr	SPRN_SDR1,r6		/* switch to host page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	mtspr	SPRN_LPID,r7
 	isync
 
@@ -1891,20 +1645,11 @@ kvmppc_tm_emul:
 kvmppc_hdsi:
 	mfspr	r4, SPRN_HDAR
 	mfspr	r6, SPRN_HDSISR
-BEGIN_FTR_SECTION
-	/* Look for DSISR canary. If we find it, retry instruction */
-	cmpdi	r6, 0x7fff
-	beq	6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	/* HPTE not found fault or protection fault? */
 	andis.	r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
 	beq	1f			/* if not, send it to the guest */
 	andi.	r0, r11, MSR_DR		/* data relocation enabled? */
 	beq	3f
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
-	b	4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	clrrdi	r0, r4, 28
 	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
 	li	r0, BOOK3S_INTERRUPT_DATA_SEGMENT
@@ -1981,10 +1726,6 @@ kvmppc_hisi:
 	beq	1f
 	andi.	r0, r11, MSR_IR		/* instruction relocation enabled? */
 	beq	3f
-BEGIN_FTR_SECTION
-	mfspr	r5, SPRN_ASDR		/* on POWER9, use ASDR to get VSID */
-	b	4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	clrrdi	r0, r10, 28
 	PPC_SLBFEE_DOT(R5, R0)		/* if so, look up SLB */
 	li	r0, BOOK3S_INTERRUPT_INST_SEGMENT
@@ -2032,10 +1773,6 @@ hcall_try_real_mode:
 	andi.	r0,r11,MSR_PR
 	/* sc 1 from userspace - reflect to guest syscall */
 	bne	sc_1_fast_return
-	/* sc 1 from nested guest - give it to L1 to handle */
-	ld	r0, VCPU_NESTED(r9)
-	cmpdi	r0, 0
-	bne	guest_exit_cont
 	clrrdi	r3,r3,2
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	bge	guest_exit_cont
@@ -2431,13 +2168,9 @@ _GLOBAL(kvmppc_h_cede)		/* r3 = vcpu pointer, r11 = msr, r13 = paca */
 	bl	kvmppc_save_fp
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
 BEGIN_FTR_SECTION
 	b	91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 	 */
@@ -2457,15 +2190,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
 	mfspr	r3, SPRN_DEC
 	mfspr	r4, SPRN_HDEC
 	mftb	r5
-BEGIN_FTR_SECTION
-	/* On P9 check whether the guest has large decrementer mode enabled */
-	ld	r6, HSTATE_KVM_VCORE(r13)
-	ld	r6, VCORE_LPCR(r6)
-	andis.	r6, r6, LPCR_LD@h
-	bne	68f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	extsw	r3, r3
-68:	EXTEND_HDEC(r4)
+	extsw	r4, r4
 	cmpd	r3, r4
 	ble	67f
 	mtspr	SPRN_DEC, r4
@@ -2510,28 +2236,11 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 kvm_nap_sequence:		/* desired LPCR value in r5 */
-BEGIN_FTR_SECTION
-	/*
-	 * PSSCR bits:	exit criterion = 1 (wakeup based on LPCR at sreset)
-	 *		enable state loss = 1 (allow SMT mode switch)
-	 *		requested level = 0 (just stop dispatching)
-	 */
-	lis	r3, (PSSCR_EC | PSSCR_ESL)@h
-	/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
-	li	r4, LPCR_PECE_HVEE@higher
-	sldi	r4, r4, 32
-	or	r5, r5, r4
-FTR_SECTION_ELSE
 	li	r3, PNV_THREAD_NAP
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 	mtspr	SPRN_LPCR,r5
 	isync
 
-BEGIN_FTR_SECTION
-	bl	isa300_idle_stop_mayloss
-FTR_SECTION_ELSE
 	bl	isa206_idle_insn_mayloss
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 
 	mfspr	r0, SPRN_CTRLF
 	ori	r0, r0, 1
@@ -2550,10 +2259,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 	beq	kvm_end_cede
 	cmpwi	r0, NAPPING_NOVCPU
 	beq	kvm_novcpu_wakeup
-BEGIN_FTR_SECTION
 	cmpwi	r0, NAPPING_UNSPLIT
 	beq	kvm_unsplit_wakeup
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	twi	31,0,0 /* Nap state must not be zero */
 
 33:	mr	r4, r3
@@ -2573,13 +2280,9 @@ kvm_end_cede:
 #endif
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
 BEGIN_FTR_SECTION
 	b	91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 	/*
 	 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
 	 */
@@ -2669,47 +2372,7 @@ kvm_cede_prodded:
 	/* we've ceded but we want to give control to the host */
 kvm_cede_exit:
 	ld	r9, HSTATE_KVM_VCPU(r13)
-#ifdef CONFIG_KVM_XICS
-	/* are we using XIVE with single escalation? */
-	ld	r10, VCPU_XIVE_ESC_VADDR(r9)
-	cmpdi	r10, 0
-	beq	3f
-	li	r6, XIVE_ESB_SET_PQ_00
-	/*
-	 * If we still have a pending escalation, abort the cede,
-	 * and we must set PQ to 10 rather than 00 so that we don't
-	 * potentially end up with two entries for the escalation
-	 * interrupt in the XIVE interrupt queue.  In that case
-	 * we also don't want to set xive_esc_on to 1 here in
-	 * case we race with xive_esc_irq().
-	 */
-	lbz	r5, VCPU_XIVE_ESC_ON(r9)
-	cmpwi	r5, 0
-	beq	4f
-	li	r0, 0
-	stb	r0, VCPU_CEDED(r9)
-	/*
-	 * The escalation interrupts are special as we don't EOI them.
-	 * There is no need to use the load-after-store ordering offset
-	 * to set PQ to 10 as we won't use StoreEOI.
-	 */
-	li	r6, XIVE_ESB_SET_PQ_10
-	b	5f
-4:	li	r0, 1
-	stb	r0, VCPU_XIVE_ESC_ON(r9)
-	/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
-	sync
-5:	/* Enable XIVE escalation */
-	mfmsr	r0
-	andi.	r0, r0, MSR_DR		/* in real mode? */
-	beq	1f
-	ldx	r0, r10, r6
-	b	2f
-1:	ld	r10, VCPU_XIVE_ESC_RADDR(r9)
-	ldcix	r0, r10, r6
-2:	sync
-#endif /* CONFIG_KVM_XICS */
-3:	b	guest_exit_cont
+	b	guest_exit_cont
 
 	/* Try to do machine check recovery in real mode */
 machine_check_realmode:
@@ -2786,10 +2449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	PPC_MSGCLR(6)
 	/* see if it's a host IPI */
 	li	r3, 1
-BEGIN_FTR_SECTION
-	PPC_MSGSYNC
-	lwsync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 	lbz	r0, HSTATE_HOST_IPI(r13)
 	cmpwi	r0, 0
 	bnelr
@@ -3098,70 +2757,12 @@ kvmppc_bad_host_intr:
 	std	r3, STACK_FRAME_OVERHEAD-16(r1)
 
 	/*
-	 * On POWER9 do a minimal restore of the MMU and call C code,
-	 * which will print a message and panic.
 	 * XXX On POWER7 and POWER8, we just spin here since we don't
 	 * know what the other threads are doing (and we don't want to
 	 * coordinate with them) - but at least we now have register state
 	 * in memory that we might be able to look at from another CPU.
 	 */
-BEGIN_FTR_SECTION
 	b	.
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-	ld	r9, HSTATE_KVM_VCPU(r13)
-	ld	r10, VCPU_KVM(r9)
-
-	li	r0, 0
-	mtspr	SPRN_AMR, r0
-	mtspr	SPRN_IAMR, r0
-	mtspr	SPRN_CIABR, r0
-	mtspr	SPRN_DAWRX0, r0
-BEGIN_FTR_SECTION
-	mtspr	SPRN_DAWRX1, r0
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
-
-	/* Clear guest SLB. */
-	slbmte	r0, r0
-	PPC_SLBIA(6)
-	ptesync
-
-	/* load host SLB entries */
-	ld	r8, PACA_SLBSHADOWPTR(r13)
-	.rept	SLB_NUM_BOLTED
-	li	r3, SLBSHADOW_SAVEAREA
-	LDX_BE	r5, r8, r3
-	addi	r3, r3, 8
-	LDX_BE	r6, r8, r3
-	andis.	r7, r5, SLB_ESID_V@h
-	beq	3f
-	slbmte	r6, r5
-3:	addi	r8, r8, 16
-	.endr
-
-	lwz	r7, KVM_HOST_LPID(r10)
-	mtspr	SPRN_LPID, r7
-	mtspr	SPRN_PID, r0
-	ld	r8, KVM_HOST_LPCR(r10)
-	mtspr	SPRN_LPCR, r8
-	isync
-	li	r0, KVM_GUEST_MODE_NONE
-	stb	r0, HSTATE_IN_GUEST(r13)
-
-	/*
-	 * Turn on the MMU and jump to C code
-	 */
-	bcl	20, 31, .+4
-5:	mflr	r3
-	addi	r3, r3, 9f - 5b
-	li	r4, -1
-	rldimi	r3, r4, 62, 0	/* ensure 0xc000000000000000 bits are set */
-	ld	r4, PACAKMSR(r13)
-	mtspr	SPRN_SRR0, r3
-	mtspr	SPRN_SRR1, r4
-	RFI_TO_KERNEL
-9:	addi	r3, r1, STACK_FRAME_OVERHEAD
-	bl	kvmppc_bad_interrupt
-	b	9b
 
 /*
  * This mimics the MSR transition on IRQ delivery.  The new guest MSR is taken
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 999997d9e9a9f..528a7e0cf83aa 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -604,7 +604,7 @@ struct p9_sprs {
 	u64 uamor;
 };
 
-static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power9_idle_stop(unsigned long psscr)
 {
 	int cpu = raw_smp_processor_id();
 	int first = cpu_first_thread_sibling(cpu);
@@ -620,8 +620,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
 	if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
 		/* EC=ESL=0 case */
 
-		BUG_ON(!mmu_on);
-
 		/*
 		 * Wake synchronously. SRESET via xscom may still cause
 		 * a 0x100 powersave wakeup with SRR1 reason!
@@ -803,8 +801,7 @@ core_woken:
 		__slb_restore_bolted_realmode();
 
 out:
-	if (mmu_on)
-		mtmsr(MSR_KERNEL);
+	mtmsr(MSR_KERNEL);
 
 	return srr1;
 }
@@ -895,7 +892,7 @@ struct p10_sprs {
 	 */
 };
 
-static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power10_idle_stop(unsigned long psscr)
 {
 	int cpu = raw_smp_processor_id();
 	int first = cpu_first_thread_sibling(cpu);
@@ -909,8 +906,6 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
 	if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
 		/* EC=ESL=0 case */
 
-		BUG_ON(!mmu_on);
-
 		/*
 		 * Wake synchronously. SRESET via xscom may still cause
 		 * a 0x100 powersave wakeup with SRR1 reason!
@@ -991,8 +986,7 @@ core_woken:
 		__slb_restore_bolted_realmode();
 
 out:
-	if (mmu_on)
-		mtmsr(MSR_KERNEL);
+	mtmsr(MSR_KERNEL);
 
 	return srr1;
 }
@@ -1002,40 +996,10 @@ static unsigned long arch300_offline_stop(unsigned long psscr)
 {
 	unsigned long srr1;
 
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	__ppc64_runlatch_off();
 	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		srr1 = power10_idle_stop(psscr, true);
+		srr1 = power10_idle_stop(psscr);
 	else
-		srr1 = power9_idle_stop(psscr, true);
-	__ppc64_runlatch_on();
-#else
-	/*
-	 * Tell KVM we're entering idle.
-	 * This does not have to be done in real mode because the P9 MMU
-	 * is independent per-thread. Some steppings share radix/hash mode
-	 * between threads, but in that case KVM has a barrier sync in real
-	 * mode before and after switching between radix and hash.
-	 *
-	 * kvm_start_guest must still be called in real mode though, hence
-	 * the false argument.
-	 */
-	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
-	__ppc64_runlatch_off();
-	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		srr1 = power10_idle_stop(psscr, false);
-	else
-		srr1 = power9_idle_stop(psscr, false);
-	__ppc64_runlatch_on();
-
-	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
-	/* Order setting hwthread_state vs. testing hwthread_req */
-	smp_mb();
-	if (local_paca->kvm_hstate.hwthread_req)
-		srr1 = idle_kvm_start_guest(srr1);
-	mtmsr(MSR_KERNEL);
-#endif
+		srr1 = power9_idle_stop(psscr);
 
 	return srr1;
 }
@@ -1055,9 +1019,9 @@ void arch300_idle_type(unsigned long stop_psscr_val,
 
 	__ppc64_runlatch_off();
 	if (cpu_has_feature(CPU_FTR_ARCH_31))
-		srr1 = power10_idle_stop(psscr, true);
+		srr1 = power10_idle_stop(psscr);
 	else
-		srr1 = power9_idle_stop(psscr, true);
+		srr1 = power9_idle_stop(psscr);
 	__ppc64_runlatch_on();
 
 	fini_irq_for_idle_irqsoff();
-- 
GitLab


From 78fcb2c91adfec8ce3a2ba6b4d0dda89f2f4a7c6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 11:56:11 -0700
Subject: [PATCH 2643/3804] KVM: x86: Immediately reset the MMU context when
 the SMM flag is cleared

Immediately reset the MMU context when the vCPU's SMM flag is cleared so
that the SMM flag in the MMU role is always synchronized with the vCPU's
flag.  If RSM fails (which isn't correctly emulated), KVM will bail
without calling post_leave_smm() and leave the MMU in a bad state.

The bad MMU role can lead to a NULL pointer dereference when grabbing a
shadow page's rmap for a page fault as the initial lookups for the gfn
will happen with the vCPU's SMM flag (=0), whereas the rmap lookup will
use the shadow page's SMM flag, which comes from the MMU (=1).  SMM has
an entirely different set of memslots, and so the initial lookup can find
a memslot (SMM=0) and then explode on the rmap memslot lookup (SMM=1).

  general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN
  KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
  CPU: 1 PID: 8410 Comm: syz-executor382 Not tainted 5.13.0-rc5-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  RIP: 0010:__gfn_to_rmap arch/x86/kvm/mmu/mmu.c:935 [inline]
  RIP: 0010:gfn_to_rmap+0x2b0/0x4d0 arch/x86/kvm/mmu/mmu.c:947
  Code: <42> 80 3c 20 00 74 08 4c 89 ff e8 f1 79 a9 00 4c 89 fb 4d 8b 37 44
  RSP: 0018:ffffc90000ffef98 EFLAGS: 00010246
  RAX: 0000000000000000 RBX: ffff888015b9f414 RCX: ffff888019669c40
  RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000001
  RBP: 0000000000000001 R08: ffffffff811d9cdb R09: ffffed10065a6002
  R10: ffffed10065a6002 R11: 0000000000000000 R12: dffffc0000000000
  R13: 0000000000000003 R14: 0000000000000001 R15: 0000000000000000
  FS:  000000000124b300(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 0000000028e31000 CR4: 00000000001526e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   rmap_add arch/x86/kvm/mmu/mmu.c:965 [inline]
   mmu_set_spte+0x862/0xe60 arch/x86/kvm/mmu/mmu.c:2604
   __direct_map arch/x86/kvm/mmu/mmu.c:2862 [inline]
   direct_page_fault+0x1f74/0x2b70 arch/x86/kvm/mmu/mmu.c:3769
   kvm_mmu_do_page_fault arch/x86/kvm/mmu.h:124 [inline]
   kvm_mmu_page_fault+0x199/0x1440 arch/x86/kvm/mmu/mmu.c:5065
   vmx_handle_exit+0x26/0x160 arch/x86/kvm/vmx/vmx.c:6122
   vcpu_enter_guest+0x3bdd/0x9630 arch/x86/kvm/x86.c:9428
   vcpu_run+0x416/0xc20 arch/x86/kvm/x86.c:9494
   kvm_arch_vcpu_ioctl_run+0x4e8/0xa40 arch/x86/kvm/x86.c:9722
   kvm_vcpu_ioctl+0x70f/0xbb0 arch/x86/kvm/../../../virt/kvm/kvm_main.c:3460
   vfs_ioctl fs/ioctl.c:51 [inline]
   __do_sys_ioctl fs/ioctl.c:1069 [inline]
   __se_sys_ioctl+0xfb/0x170 fs/ioctl.c:1055
   do_syscall_64+0x3f/0xb0 arch/x86/entry/common.c:47
   entry_SYSCALL_64_after_hwframe+0x44/0xae
  RIP: 0033:0x440ce9

Cc: stable@vger.kernel.org
Reported-by: syzbot+fb0b6a7e8713aeb0319c@syzkaller.appspotmail.com
Fixes: 9ec19493fb86 ("KVM: x86: clear SMM flags before loading state while leaving SMM")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609185619.992058-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9dd23bdfc6cc1..54d212fe9b156 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7106,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
 
 static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
 {
-	emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+	vcpu->arch.hflags = emul_flags;
+	kvm_mmu_reset_context(vcpu);
 }
 
 static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
-- 
GitLab


From b7e24eb1caa5f8da20d405d262dba67943aedc42 Mon Sep 17 00:00:00 2001
From: Alexander Kuznetsov <wwfq@yandex-team.ru>
Date: Wed, 9 Jun 2021 10:17:19 +0300
Subject: [PATCH 2644/3804] cgroup1: don't allow '\n' in renaming

cgroup_mkdir() have restriction on newline usage in names:
$ mkdir $'/sys/fs/cgroup/cpu/test\ntest2'
mkdir: cannot create directory
'/sys/fs/cgroup/cpu/test\ntest2': Invalid argument

But in cgroup1_rename() such check is missed.
This allows us to make /proc/<pid>/cgroup unparsable:
$ mkdir /sys/fs/cgroup/cpu/test
$ mv /sys/fs/cgroup/cpu/test $'/sys/fs/cgroup/cpu/test\ntest2'
$ echo $$ > $'/sys/fs/cgroup/cpu/test\ntest2'
$ cat /proc/self/cgroup
11:pids:/
10:freezer:/
9:hugetlb:/
8:cpuset:/
7:blkio:/user.slice
6:memory:/user.slice
5:net_cls,net_prio:/
4:perf_event:/
3:devices:/user.slice
2:cpu,cpuacct:/test
test2
1:name=systemd:/
0::/

Signed-off-by: Alexander Kuznetsov <wwfq@yandex-team.ru>
Reported-by: Andrey Krasichkov <buglloc@yandex-team.ru>
Acked-by: Dmitry Yakunin <zeil@yandex-team.ru>
Cc: stable@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/cgroup/cgroup-v1.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 8190b6bfc9784..1f274d7fc934e 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -820,6 +820,10 @@ static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent
 	struct cgroup *cgrp = kn->priv;
 	int ret;
 
+	/* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
+	if (strchr(new_name_str, '\n'))
+		return -EINVAL;
+
 	if (kernfs_type(kn) != KERNFS_DIR)
 		return -ENOTDIR;
 	if (kn->parent != new_parent)
-- 
GitLab


From 170b763597d3a0a79f135e4d83a38462c3964fdf Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Tue, 8 Jun 2021 12:55:19 -0700
Subject: [PATCH 2645/3804] drm/msm/dsi: Stash away calculated vco frequency on
 recalc

A problem was reported on CoachZ devices where the display wouldn't come
up, or it would be distorted. It turns out that the PLL code here wasn't
getting called once dsi_pll_10nm_vco_recalc_rate() started returning the
same exact frequency, down to the Hz, that the bootloader was setting
instead of 0 when the clk was registered with the clk framework.

After commit 001d8dc33875 ("drm/msm/dsi: remove temp data from global
pll structure") we use a hardcoded value for the parent clk frequency,
i.e.  VCO_REF_CLK_RATE, and we also hardcode the value for FRAC_BITS,
instead of getting it from the config structure. This combination of
changes to the recalc function allows us to properly calculate the
frequency of the PLL regardless of whether or not the PLL has been
clk_prepare()d or clk_set_rate()d. That's a good improvement.

Unfortunately, this means that now we won't call down into the PLL clk
driver when we call clk_set_rate() because the frequency calculated in
the framework matches the frequency that is set in hardware. If the rate
is the same as what we want it should be OK to not call the set_rate PLL
op. The real problem is that the prepare op in this driver uses a
private struct member to stash away the vco frequency so that it can
call the set_rate op directly during prepare. Once the set_rate op is
never called because recalc_rate told us the rate is the same, we don't
set this private struct member before the prepare op runs, so we try to
call the set_rate function directly with a frequency of 0. This
effectively kills the PLL and configures it for a rate that won't work.
Calling set_rate from prepare is really quite bad and will confuse any
downstream clks about what the rate actually is of their parent. Fixing
that will be a rather large change though so we leave that to later.

For now, let's stash away the rate we calculate during recalc so that
the prepare op knows what frequency to set, instead of 0. This way
things keep working and the display can enable the PLL properly. In the
future, we should remove that code from the prepare op so that it
doesn't even try to call the set rate function.

Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Cc: Abhinav Kumar <abhinavk@codeaurora.org>
Fixes: 001d8dc33875 ("drm/msm/dsi: remove temp data from global pll structure")
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20210608195519.125561-1-swboyd@chromium.org
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c | 1 +
 drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
index 34bc93548fcfb..657778889d359 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c
@@ -432,6 +432,7 @@ static unsigned long dsi_pll_10nm_vco_recalc_rate(struct clk_hw *hw,
 	pll_freq += div_u64(tmp64, multiplier);
 
 	vco_rate = pll_freq;
+	pll_10nm->vco_current_rate = vco_rate;
 
 	DBG("DSI PLL%d returning vco rate = %lu, dec = %x, frac = %x",
 	    pll_10nm->phy->id, (unsigned long)vco_rate, dec, frac);
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
index e76ce40a12aba..6f96fbac82824 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c
@@ -460,6 +460,7 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw,
 	pll_freq += div_u64(tmp64, multiplier);
 
 	vco_rate = pll_freq;
+	pll_7nm->vco_current_rate = vco_rate;
 
 	DBG("DSI PLL%d returning vco rate = %lu, dec = %x, frac = %x",
 	    pll_7nm->phy->id, (unsigned long)vco_rate, dec, frac);
-- 
GitLab


From 63a8eef70ccb5199534dec56fed9759d214bfe55 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 9 Jun 2021 18:15:09 +0200
Subject: [PATCH 2646/3804] USB: serial: cp210x: fix CP2102N-A01 modem control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CP2102N revision A01 (firmware version <= 1.0.4) has a buggy
flow-control implementation that uses the ulXonLimit instead of
ulFlowReplace field of the flow-control settings structure (erratum
CP2102N_E104).

A recent change that set the input software flow-control limits
incidentally broke RTS control for these devices when CRTSCTS is not set
as the new limits would always enable hardware flow control.

Fix this by explicitly disabling flow control for the buggy firmware
versions and only updating the input software flow-control limits when
IXOFF is requested. This makes sure that the terminal settings matches
the default zero ulXonLimit (ulFlowReplace) for these devices.

Link: https://lore.kernel.org/r/20210609161509.9459-1-johan@kernel.org
Reported-by: David Frey <dpfrey@gmail.com>
Reported-by: Alex Villacís Lasso <a_villacis@palosanto.com>
Tested-by: Alex Villacís Lasso <a_villacis@palosanto.com>
Fixes: f61309d9c96a ("USB: serial: cp210x: set IXOFF thresholds")
Cc: stable@vger.kernel.org      # 5.12
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/cp210x.c | 64 ++++++++++++++++++++++++++++++++++---
 1 file changed, 59 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index c9f8ebd341229..fcb812bc832cc 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -252,9 +252,11 @@ struct cp210x_serial_private {
 	u8			gpio_input;
 #endif
 	u8			partnum;
+	u32			fw_version;
 	speed_t			min_speed;
 	speed_t			max_speed;
 	bool			use_actual_rate;
+	bool			no_flow_control;
 };
 
 enum cp210x_event_state {
@@ -398,6 +400,7 @@ struct cp210x_special_chars {
 
 /* CP210X_VENDOR_SPECIFIC values */
 #define CP210X_READ_2NCONFIG	0x000E
+#define CP210X_GET_FW_VER_2N	0x0010
 #define CP210X_READ_LATCH	0x00C2
 #define CP210X_GET_PARTNUM	0x370B
 #define CP210X_GET_PORTCONFIG	0x370C
@@ -1128,6 +1131,7 @@ static bool cp210x_termios_change(const struct ktermios *a, const struct ktermio
 static void cp210x_set_flow_control(struct tty_struct *tty,
 		struct usb_serial_port *port, struct ktermios *old_termios)
 {
+	struct cp210x_serial_private *priv = usb_get_serial_data(port->serial);
 	struct cp210x_port_private *port_priv = usb_get_serial_port_data(port);
 	struct cp210x_special_chars chars;
 	struct cp210x_flow_ctl flow_ctl;
@@ -1135,6 +1139,15 @@ static void cp210x_set_flow_control(struct tty_struct *tty,
 	u32 ctl_hs;
 	int ret;
 
+	/*
+	 * Some CP2102N interpret ulXonLimit as ulFlowReplace (erratum
+	 * CP2102N_E104). Report back that flow control is not supported.
+	 */
+	if (priv->no_flow_control) {
+		tty->termios.c_cflag &= ~CRTSCTS;
+		tty->termios.c_iflag &= ~(IXON | IXOFF);
+	}
+
 	if (old_termios &&
 			C_CRTSCTS(tty) == (old_termios->c_cflag & CRTSCTS) &&
 			I_IXON(tty) == (old_termios->c_iflag & IXON) &&
@@ -1191,19 +1204,20 @@ static void cp210x_set_flow_control(struct tty_struct *tty,
 		port_priv->crtscts = false;
 	}
 
-	if (I_IXOFF(tty))
+	if (I_IXOFF(tty)) {
 		flow_repl |= CP210X_SERIAL_AUTO_RECEIVE;
-	else
+
+		flow_ctl.ulXonLimit = cpu_to_le32(128);
+		flow_ctl.ulXoffLimit = cpu_to_le32(128);
+	} else {
 		flow_repl &= ~CP210X_SERIAL_AUTO_RECEIVE;
+	}
 
 	if (I_IXON(tty))
 		flow_repl |= CP210X_SERIAL_AUTO_TRANSMIT;
 	else
 		flow_repl &= ~CP210X_SERIAL_AUTO_TRANSMIT;
 
-	flow_ctl.ulXonLimit = cpu_to_le32(128);
-	flow_ctl.ulXoffLimit = cpu_to_le32(128);
-
 	dev_dbg(&port->dev, "%s - ctrl = 0x%02x, flow = 0x%02x\n", __func__,
 			ctl_hs, flow_repl);
 
@@ -1926,6 +1940,45 @@ static void cp210x_init_max_speed(struct usb_serial *serial)
 	priv->use_actual_rate = use_actual_rate;
 }
 
+static int cp210x_get_fw_version(struct usb_serial *serial, u16 value)
+{
+	struct cp210x_serial_private *priv = usb_get_serial_data(serial);
+	u8 ver[3];
+	int ret;
+
+	ret = cp210x_read_vendor_block(serial, REQTYPE_DEVICE_TO_HOST, value,
+			ver, sizeof(ver));
+	if (ret)
+		return ret;
+
+	dev_dbg(&serial->interface->dev, "%s - %d.%d.%d\n", __func__,
+			ver[0], ver[1], ver[2]);
+
+	priv->fw_version = ver[0] << 16 | ver[1] << 8 | ver[2];
+
+	return 0;
+}
+
+static void cp210x_determine_quirks(struct usb_serial *serial)
+{
+	struct cp210x_serial_private *priv = usb_get_serial_data(serial);
+	int ret;
+
+	switch (priv->partnum) {
+	case CP210X_PARTNUM_CP2102N_QFN28:
+	case CP210X_PARTNUM_CP2102N_QFN24:
+	case CP210X_PARTNUM_CP2102N_QFN20:
+		ret = cp210x_get_fw_version(serial, CP210X_GET_FW_VER_2N);
+		if (ret)
+			break;
+		if (priv->fw_version <= 0x10004)
+			priv->no_flow_control = true;
+		break;
+	default:
+		break;
+	}
+}
+
 static int cp210x_attach(struct usb_serial *serial)
 {
 	int result;
@@ -1946,6 +1999,7 @@ static int cp210x_attach(struct usb_serial *serial)
 
 	usb_set_serial_data(serial, priv);
 
+	cp210x_determine_quirks(serial);
 	cp210x_init_max_speed(serial);
 
 	result = cp210x_gpio_init(serial);
-- 
GitLab


From 83e197a8414c0ba545e7e3916ce05f836f349273 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 10 Jun 2021 17:20:59 +0200
Subject: [PATCH 2647/3804] ALSA: seq: Fix race of snd_seq_timer_open()

The timer instance per queue is exclusive, and snd_seq_timer_open()
should have managed the concurrent accesses.  It looks as if it's
checking the already existing timer instance at the beginning, but
it's not right, because there is no protection, hence any later
concurrent call of snd_seq_timer_open() may override the timer
instance easily.  This may result in UAF, as the leftover timer
instance can keep running while the queue itself gets closed, as
spotted by syzkaller recently.

For avoiding the race, add a proper check at the assignment of
tmr->timeri again, and return -EBUSY if it's been already registered.

Reported-by: syzbot+ddc1260a83ed1cbf6fb5@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/000000000000dce34f05c42f110c@google.com
Link: https://lore.kernel.org/r/20210610152059.24633-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/seq/seq_timer.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c
index 1645e4142e302..9863be6fd43e1 100644
--- a/sound/core/seq/seq_timer.c
+++ b/sound/core/seq/seq_timer.c
@@ -297,8 +297,16 @@ int snd_seq_timer_open(struct snd_seq_queue *q)
 		return err;
 	}
 	spin_lock_irq(&tmr->lock);
-	tmr->timeri = t;
+	if (tmr->timeri)
+		err = -EBUSY;
+	else
+		tmr->timeri = t;
 	spin_unlock_irq(&tmr->lock);
+	if (err < 0) {
+		snd_timer_close(t);
+		snd_timer_instance_free(t);
+		return err;
+	}
 	return 0;
 }
 
-- 
GitLab


From fb8543fb863e89baa433b4d716d73395caa1b7f4 Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Thu, 10 Jun 2021 00:07:26 +0200
Subject: [PATCH 2648/3804] hwmon: (tps23861) define regmap max register

Define the max register address the device supports.
This allows reading the whole register space via
regmap debugfs, without it only register 0x0 is visible.

This was forgotten in the original driver commit.

Fixes: fff7b8ab2255 ("hwmon: add Texas Instruments TPS23861 driver")
Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Link: https://lore.kernel.org/r/20210609220728.499879-1-robert.marko@sartura.hr
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/tps23861.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/tps23861.c b/drivers/hwmon/tps23861.c
index c2484f15298b0..fd0be88838290 100644
--- a/drivers/hwmon/tps23861.c
+++ b/drivers/hwmon/tps23861.c
@@ -117,6 +117,7 @@ struct tps23861_data {
 static struct regmap_config tps23861_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
+	.max_register = 0x6f,
 };
 
 static int tps23861_read_temp(struct tps23861_data *data, long *val)
-- 
GitLab


From b325d3526e14942d42c392c2ac9fbea59c22894c Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Thu, 10 Jun 2021 00:07:27 +0200
Subject: [PATCH 2649/3804] hwmon: (tps23861) set current shunt value

TPS23861 has a configuration bit for setting of the
current shunt value used on the board.
Its bit 0 of the General Mask 1 register.

According to the datasheet bit values are:
0 for 255 mOhm (Default)
1 for 250 mOhm

So, configure the bit before registering the hwmon
device according to the value passed in the DTS or
default one if none is passed.

This caused potentially reading slightly skewed values
due to max current value being 1.02A when 250mOhm shunt
is used instead of 1.0A when 255mOhm is used.

Fixes: fff7b8ab2255 ("hwmon: add Texas Instruments TPS23861 driver")
Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Link: https://lore.kernel.org/r/20210609220728.499879-2-robert.marko@sartura.hr
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/tps23861.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/hwmon/tps23861.c b/drivers/hwmon/tps23861.c
index fd0be88838290..c3685b7e9e828 100644
--- a/drivers/hwmon/tps23861.c
+++ b/drivers/hwmon/tps23861.c
@@ -99,6 +99,9 @@
 #define POWER_ENABLE			0x19
 #define TPS23861_NUM_PORTS		4
 
+#define TPS23861_GENERAL_MASK_1		0x17
+#define TPS23861_CURRENT_SHUNT_MASK	BIT(0)
+
 #define TEMPERATURE_LSB			652 /* 0.652 degrees Celsius */
 #define VOLTAGE_LSB			3662 /* 3.662 mV */
 #define SHUNT_RESISTOR_DEFAULT		255000 /* 255 mOhm */
@@ -561,6 +564,15 @@ static int tps23861_probe(struct i2c_client *client)
 	else
 		data->shunt_resistor = SHUNT_RESISTOR_DEFAULT;
 
+	if (data->shunt_resistor == SHUNT_RESISTOR_DEFAULT)
+		regmap_clear_bits(data->regmap,
+				  TPS23861_GENERAL_MASK_1,
+				  TPS23861_CURRENT_SHUNT_MASK);
+	else
+		regmap_set_bits(data->regmap,
+				TPS23861_GENERAL_MASK_1,
+				TPS23861_CURRENT_SHUNT_MASK);
+
 	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
 							 data, &tps23861_chip_info,
 							 NULL);
-- 
GitLab


From e13d1127241404f1c3eb1379ac4dd100eaf385b4 Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Thu, 10 Jun 2021 00:07:28 +0200
Subject: [PATCH 2650/3804] hwmon: (tps23861) correct shunt LSB values

Current shunt LSB values got reversed during in the
original driver commit.

So, correct the current shunt LSB values according to
the datasheet.

This caused reading slightly skewed current values.

Fixes: fff7b8ab2255 ("hwmon: add Texas Instruments TPS23861 driver")
Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Link: https://lore.kernel.org/r/20210609220728.499879-3-robert.marko@sartura.hr
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/tps23861.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/tps23861.c b/drivers/hwmon/tps23861.c
index c3685b7e9e828..8bd6435c13e82 100644
--- a/drivers/hwmon/tps23861.c
+++ b/drivers/hwmon/tps23861.c
@@ -105,8 +105,8 @@
 #define TEMPERATURE_LSB			652 /* 0.652 degrees Celsius */
 #define VOLTAGE_LSB			3662 /* 3.662 mV */
 #define SHUNT_RESISTOR_DEFAULT		255000 /* 255 mOhm */
-#define CURRENT_LSB_255			62260 /* 62.260 uA */
-#define CURRENT_LSB_250			61039 /* 61.039 uA */
+#define CURRENT_LSB_250			62260 /* 62.260 uA */
+#define CURRENT_LSB_255			61039 /* 61.039 uA */
 #define RESISTANCE_LSB			110966 /* 11.0966 Ohm*/
 #define RESISTANCE_LSB_LOW		157216 /* 15.7216 Ohm*/
 
-- 
GitLab


From 0e5a89dbb49920cea22193044bbbfd76a9b0f458 Mon Sep 17 00:00:00 2001
From: Hubert Jasudowicz <hubert.jasudowicz@gmail.com>
Date: Wed, 9 Jun 2021 23:51:12 +0200
Subject: [PATCH 2651/3804] doc: Remove references to IBM Calgary

The Calgary IOMMU driver has been removed in

  90dc392fc445 ("x86: Remove the calgary IOMMU driver")

Clean up stale docs that refer to it.

Signed-off-by: Hubert Jasudowicz <hubert.jasudowicz@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/1bd2b57dd1db53df09e520b8170ff61418805de4.1623274832.git.hubert.jasudowicz@gmail.com
---
 Documentation/x86/x86_64/boot-options.rst | 31 +----------------------
 1 file changed, 1 insertion(+), 30 deletions(-)

diff --git a/Documentation/x86/x86_64/boot-options.rst b/Documentation/x86/x86_64/boot-options.rst
index 324cefff92e7d..5f62b3b863570 100644
--- a/Documentation/x86/x86_64/boot-options.rst
+++ b/Documentation/x86/x86_64/boot-options.rst
@@ -247,16 +247,11 @@ Multiple x86-64 PCI-DMA mapping implementations exist, for example:
       Kernel boot message: "PCI-DMA: Using software bounce buffering
       for IO (SWIOTLB)"
 
-   4. <arch/x86_64/pci-calgary.c> : IBM Calgary hardware IOMMU. Used in IBM
-      pSeries and xSeries servers. This hardware IOMMU supports DMA address
-      mapping with memory protection, etc.
-      Kernel boot message: "PCI-DMA: Using Calgary IOMMU"
-
 ::
 
   iommu=[<size>][,noagp][,off][,force][,noforce]
   [,memaper[=<order>]][,merge][,fullflush][,nomerge]
-  [,noaperture][,calgary]
+  [,noaperture]
 
 General iommu options:
 
@@ -295,8 +290,6 @@ iommu options only relevant to the AMD GART hardware IOMMU:
       Don't initialize the AGP driver and use full aperture.
     panic
       Always panic when IOMMU overflows.
-    calgary
-      Use the Calgary IOMMU if it is available
 
 iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU
 implementation:
@@ -307,28 +300,6 @@ implementation:
       force
         Force all IO through the software TLB.
 
-Settings for the IBM Calgary hardware IOMMU currently found in IBM
-pSeries and xSeries machines
-
-    calgary=[64k,128k,256k,512k,1M,2M,4M,8M]
-      Set the size of each PCI slot's translation table when using the
-      Calgary IOMMU. This is the size of the translation table itself
-      in main memory. The smallest table, 64k, covers an IO space of
-      32MB; the largest, 8MB table, can cover an IO space of 4GB.
-      Normally the kernel will make the right choice by itself.
-    calgary=[translate_empty_slots]
-      Enable translation even on slots that have no devices attached to
-      them, in case a device will be hotplugged in the future.
-    calgary=[disable=<PCI bus number>]
-      Disable translation on a given PHB. For
-      example, the built-in graphics adapter resides on the first bridge
-      (PCI bus number 0); if translation (isolation) is enabled on this
-      bridge, X servers that access the hardware directly from user
-      space might stop working. Use this option if you have devices that
-      are accessed from userspace directly on some PCI host bridge.
-    panic
-      Always panic when IOMMU overflows
-
 
 Miscellaneous
 =============
-- 
GitLab


From 382e6e177bc1c02473e56591fe5083ae1e4904f6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 10 Jun 2021 15:13:46 +0100
Subject: [PATCH 2652/3804] irqchip/gic-v3: Workaround inconsistent PMR setting
 on NMI entry

The arm64 entry code suffers from an annoying issue on taking
a NMI, as it sets PMR to a value that actually allows IRQs
to be acknowledged. This is done for consistency with other parts
of the code, and is in the process of being fixed. This shouldn't
be a problem, as we are not enabling interrupts whilst in NMI
context.

However, in the infortunate scenario that we took a spurious NMI
(retired before the read of IAR) *and* that there is an IRQ pending
at the same time, we'll ack the IRQ in NMI context. Too bad.

In order to avoid deadlocks while running something like perf,
teach the GICv3 driver about this situation: if we were in
a context where no interrupt should have fired, transiently
set PMR to a value that only allows NMIs before acking the pending
interrupt, and restore the original value after that.

This papers over the core issue for the time being, and makes
NMIs great again. Sort of.

Fixes: 4d6a38da8e79e94c ("arm64: entry: always set GIC_PRIO_PSR_I_SET during entry")
Co-developed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/lkml/20210610145731.1350460-1-maz@kernel.org
---
 drivers/irqchip/irq-gic-v3.c | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 37a23aa6de37c..66d623f91678a 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -642,11 +642,45 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
 		nmi_exit();
 }
 
+static u32 do_read_iar(struct pt_regs *regs)
+{
+	u32 iar;
+
+	if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) {
+		u64 pmr;
+
+		/*
+		 * We were in a context with IRQs disabled. However, the
+		 * entry code has set PMR to a value that allows any
+		 * interrupt to be acknowledged, and not just NMIs. This can
+		 * lead to surprising effects if the NMI has been retired in
+		 * the meantime, and that there is an IRQ pending. The IRQ
+		 * would then be taken in NMI context, something that nobody
+		 * wants to debug twice.
+		 *
+		 * Until we sort this, drop PMR again to a level that will
+		 * actually only allow NMIs before reading IAR, and then
+		 * restore it to what it was.
+		 */
+		pmr = gic_read_pmr();
+		gic_pmr_mask_irqs();
+		isb();
+
+		iar = gic_read_iar();
+
+		gic_write_pmr(pmr);
+	} else {
+		iar = gic_read_iar();
+	}
+
+	return iar;
+}
+
 static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 {
 	u32 irqnr;
 
-	irqnr = gic_read_iar();
+	irqnr = do_read_iar(regs);
 
 	/* Check for special IDs first */
 	if ((irqnr >= 1020 && irqnr <= 1023))
-- 
GitLab


From d5ab95da2a41567440097c277c5771ad13928dad Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 9 Jun 2021 20:22:02 +0300
Subject: [PATCH 2653/3804] usb: typec: wcove: Use LE to CPU conversion when
 accessing msg->header

As LKP noticed the Sparse is not happy about strict type handling:
   .../typec/tcpm/wcove.c:380:50: sparse:     expected unsigned short [usertype] header
   .../typec/tcpm/wcove.c:380:50: sparse:     got restricted __le16 const [usertype] header

Fix this by switching to use pd_header_cnt_le() instead of pd_header_cnt()
in the affected code.

Fixes: ae8a2ca8a221 ("usb: typec: Group all TCPCI/TCPM code together")
Fixes: 3c4fb9f16921 ("usb: typec: wcove: start using tcpm for USB PD support")
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210609172202.83377-1-andriy.shevchenko@linux.intel.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/wcove.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tcpm/wcove.c b/drivers/usb/typec/tcpm/wcove.c
index 79ae63950050c..5d125339687a3 100644
--- a/drivers/usb/typec/tcpm/wcove.c
+++ b/drivers/usb/typec/tcpm/wcove.c
@@ -378,7 +378,7 @@ static int wcove_pd_transmit(struct tcpc_dev *tcpc,
 		const u8 *data = (void *)msg;
 		int i;
 
-		for (i = 0; i < pd_header_cnt(msg->header) * 4 + 2; i++) {
+		for (i = 0; i < pd_header_cnt_le(msg->header) * 4 + 2; i++) {
 			ret = regmap_write(wcove->regmap, USBC_TX_DATA + i,
 					   data[i]);
 			if (ret)
-- 
GitLab


From e0e8b6abe8c862229ba00cdd806e8598cdef00bb Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Thu, 10 Jun 2021 13:19:57 +0930
Subject: [PATCH 2654/3804] usb: gadget: fsl: Re-enable driver for ARM SoCs

The commit a390bef7db1f ("usb: gadget: fsl_mxc_udc: Remove the driver")
dropped the ARCH_MXC dependency from USB_FSL_USB2, leaving it depending
solely on FSL_SOC.

FSL_SOC is powerpc only; it was briefly available on ARM in 2014 but was
removed by commit cfd074ad8600 ("ARM: imx: temporarily remove
CONFIG_SOC_FSL from LS1021A"). Therefore the driver can no longer be
enabled on ARM platforms.

This appears to be a mistake as arm64's ARCH_LAYERSCAPE and arm32
SOC_LS1021A SoCs use this symbol. It's enabled in these defconfigs:

arch/arm/configs/imx_v6_v7_defconfig:CONFIG_USB_FSL_USB2=y
arch/arm/configs/multi_v7_defconfig:CONFIG_USB_FSL_USB2=y
arch/powerpc/configs/mgcoge_defconfig:CONFIG_USB_FSL_USB2=y
arch/powerpc/configs/mpc512x_defconfig:CONFIG_USB_FSL_USB2=y

To fix, expand the dependencies so USB_FSL_USB2 can be enabled on the
ARM platforms, and with COMPILE_TEST.

Fixes: a390bef7db1f ("usb: gadget: fsl_mxc_udc: Remove the driver")
Signed-off-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20210610034957.93376-1-joel@jms.id.au
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 8c614bb86c665..7348acbdc5600 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -90,7 +90,7 @@ config USB_BCM63XX_UDC
 
 config USB_FSL_USB2
 	tristate "Freescale Highspeed USB DR Peripheral Controller"
-	depends on FSL_SOC
+	depends on FSL_SOC || ARCH_LAYERSCAPE || SOC_LS1021A || COMPILE_TEST
 	help
 	   Some of Freescale PowerPC and i.MX processors have a High Speed
 	   Dual-Role(DR) USB controller, which supports device mode.
-- 
GitLab


From f247f0a82a4f8c3bfed178d8fd9e069d1424ee4e Mon Sep 17 00:00:00 2001
From: Mayank Rana <mrana@codeaurora.org>
Date: Wed, 9 Jun 2021 00:35:35 -0700
Subject: [PATCH 2655/3804] usb: typec: ucsi: Clear PPM capability data in
 ucsi_init() error path

If ucsi_init() fails for some reason (e.g. ucsi_register_port()
fails or general communication failure to the PPM), particularly at
any point after the GET_CAPABILITY command had been issued, this
results in unwinding the initialization and returning an error.
However the ucsi structure's ucsi_capability member retains its
current value, including likely a non-zero num_connectors.
And because ucsi_init() itself is done in a workqueue a UCSI
interface driver will be unaware that it failed and may think the
ucsi_register() call was completely successful.  Later, if
ucsi_unregister() is called, due to this stale ucsi->cap value it
would try to access the items in the ucsi->connector array which
might not be in a proper state or not even allocated at all and
results in NULL or invalid pointer dereference.

Fix this by clearing the ucsi->cap value to 0 during the error
path of ucsi_init() in order to prevent a later ucsi_unregister()
from entering the connector cleanup loop.

Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface")
Cc: stable@vger.kernel.org
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Mayank Rana <mrana@codeaurora.org>
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Link: https://lore.kernel.org/r/20210609073535.5094-1-jackp@codeaurora.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index b433169ef6fa4..b7d104c80d857 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -1253,6 +1253,7 @@ err_unregister:
 	}
 
 err_reset:
+	memset(&ucsi->cap, 0, sizeof(ucsi->cap));
 	ucsi_reset_ppm(ucsi);
 err:
 	return ret;
-- 
GitLab


From 142d0b24c1b17139f1aaaacae7542a38aa85640f Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 9 Jun 2021 17:21:32 -0700
Subject: [PATCH 2656/3804] usb: typec: mux: Fix copy-paste mistake in
 typec_mux_match

Fix the copy-paste mistake in the return path of typec_mux_match(),
where dev is considered a member of struct typec_switch rather than
struct typec_mux.

The two structs are identical in regards to having the struct device as
the first entry, so this provides no functional change.

Fixes: 3370db35193b ("usb: typec: Registering real device entries for the muxes")
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210610002132.3088083-1-bjorn.andersson@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/mux.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
index 8514bec7e1b89..77dabd306ba8d 100644
--- a/drivers/usb/typec/mux.c
+++ b/drivers/usb/typec/mux.c
@@ -239,7 +239,7 @@ find_mux:
 	dev = class_find_device(&typec_mux_class, NULL, fwnode,
 				mux_fwnode_match);
 
-	return dev ? to_typec_switch(dev) : ERR_PTR(-EPROBE_DEFER);
+	return dev ? to_typec_mux(dev) : ERR_PTR(-EPROBE_DEFER);
 }
 
 /**
-- 
GitLab


From 55748ac6a6d3e35f8fd0f5c9284df7c7f3b1705a Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Wed, 2 Jun 2021 16:33:39 -0400
Subject: [PATCH 2657/3804] ima: differentiate between EVM failures in the
 audit log

Differentiate between an invalid EVM portable signature failure
from other EVM HMAC/signature failures.

Reviewed-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_appraise.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 940695e7b5356..ef9dcfce45d45 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -422,7 +422,8 @@ int ima_appraise_measurement(enum ima_hooks func,
 		goto out;
 	case INTEGRITY_FAIL_IMMUTABLE:
 		set_bit(IMA_DIGSIG, &iint->atomic_flags);
-		fallthrough;
+		cause = "invalid-fail-immutable";
+		goto out;
 	case INTEGRITY_FAIL:		/* Invalid HMAC/signature. */
 		cause = "invalid-HMAC";
 		goto out;
-- 
GitLab


From 3bdd5ee0ec8c14131d560da492e6df452c6fdd75 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 9 Jun 2021 18:41:57 -0400
Subject: [PATCH 2658/3804] skbuff: fix incorrect msg_zerocopy copy
 notifications

msg_zerocopy signals if a send operation required copying with a flag
in serr->ee.ee_code.

This field can be incorrect as of the below commit, as a result of
both structs uarg and serr pointing into the same skb->cb[].

uarg->zerocopy must be read before skb->cb[] is reinitialized to hold
serr. Similar to other fields len, hi and lo, use a local variable to
temporarily hold the value.

This was not a problem before, when the value was passed as a function
argument.

Fixes: 75518851a2a0 ("skbuff: Push status and refcounts into sock_zerocopy_callback")
Reported-by: Talal Ahmad <talalahmad@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3ad22870298c9..bbc3b4b62032b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1253,6 +1253,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
 	struct sock *sk = skb->sk;
 	struct sk_buff_head *q;
 	unsigned long flags;
+	bool is_zerocopy;
 	u32 lo, hi;
 	u16 len;
 
@@ -1267,6 +1268,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
 	len = uarg->len;
 	lo = uarg->id;
 	hi = uarg->id + len - 1;
+	is_zerocopy = uarg->zerocopy;
 
 	serr = SKB_EXT_ERR(skb);
 	memset(serr, 0, sizeof(*serr));
@@ -1274,7 +1276,7 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
 	serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
 	serr->ee.ee_data = hi;
 	serr->ee.ee_info = lo;
-	if (!uarg->zerocopy)
+	if (!is_zerocopy)
 		serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
 
 	q = &sk->sk_error_queue;
-- 
GitLab


From 9d44fa3e50cc91691896934d106c86e4027e61ca Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Thu, 10 Jun 2021 09:41:36 +0800
Subject: [PATCH 2659/3804] ping: Check return value of function
 'ping_queue_rcv_skb'

Function 'ping_queue_rcv_skb' not always return success, which will
also return fail. If not check the wrong return value of it, lead to function
`ping_rcv` return success.

Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ping.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 1c9f71a372581..95a718397fd12 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -954,6 +954,7 @@ bool ping_rcv(struct sk_buff *skb)
 	struct sock *sk;
 	struct net *net = dev_net(skb->dev);
 	struct icmphdr *icmph = icmp_hdr(skb);
+	bool rc = false;
 
 	/* We assume the packet has already been checked by icmp_rcv */
 
@@ -968,14 +969,15 @@ bool ping_rcv(struct sk_buff *skb)
 		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 		pr_debug("rcv on socket %p\n", sk);
-		if (skb2)
-			ping_queue_rcv_skb(sk, skb2);
+		if (skb2 && !ping_queue_rcv_skb(sk, skb2))
+			rc = true;
 		sock_put(sk);
-		return true;
 	}
-	pr_debug("no socket, dropping\n");
 
-	return false;
+	if (!rc)
+		pr_debug("no socket, dropping\n");
+
+	return rc;
 }
 EXPORT_SYMBOL_GPL(ping_rcv);
 
-- 
GitLab


From 06af8679449d4ed282df13191fc52d5ba28ec536 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 10 Jun 2021 15:11:11 -0500
Subject: [PATCH 2660/3804] coredump: Limit what can interrupt coredumps

Olivier Langlois has been struggling with coredumps being incompletely written in
processes using io_uring.

Olivier Langlois <olivier@trillion01.com> writes:
> io_uring is a big user of task_work and any event that io_uring made a
> task waiting for that occurs during the core dump generation will
> generate a TIF_NOTIFY_SIGNAL.
>
> Here are the detailed steps of the problem:
> 1. io_uring calls vfs_poll() to install a task to a file wait queue
>    with io_async_wake() as the wakeup function cb from io_arm_poll_handler()
> 2. wakeup function ends up calling task_work_add() with TWA_SIGNAL
> 3. task_work_add() sets the TIF_NOTIFY_SIGNAL bit by calling
>    set_notify_signal()

The coredump code deliberately supports being interrupted by SIGKILL,
and depends upon prepare_signal to filter out all other signals.   Now
that signal_pending includes wake ups for TIF_NOTIFY_SIGNAL this hack
in dump_emitted by the coredump code no longer works.

Make the coredump code more robust by explicitly testing for all of
the wakeup conditions the coredump code supports.  This prevents
new wakeup conditions from breaking the coredump code, as well
as fixing the current issue.

The filesystem code that the coredump code uses already limits
itself to only aborting on fatal_signal_pending.  So it should
not develop surprising wake-up reasons either.

v2: Don't remove the now unnecessary code in prepare_signal.

Cc: stable@vger.kernel.org
Fixes: 12db8b690010 ("entry: Add support for TIF_NOTIFY_SIGNAL")
Reported-by: Olivier Langlois <olivier@trillion01.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/coredump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 2868e3e171ae6..c3d8fc14b9930 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -519,7 +519,7 @@ static bool dump_interrupted(void)
 	 * but then we need to teach dump_write() to restart and clear
 	 * TIF_SIGPENDING.
 	 */
-	return signal_pending(current);
+	return fatal_signal_pending(current) || freezing(current);
 }
 
 static void wait_for_dump_helpers(struct file *file)
-- 
GitLab


From 58e2071742e38f29f051b709a5cca014ba51166f Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Thu, 10 Jun 2021 15:04:10 +0300
Subject: [PATCH 2661/3804] net: bridge: fix vlan tunnel dst null pointer
 dereference

This patch fixes a tunnel_dst null pointer dereference due to lockless
access in the tunnel egress path. When deleting a vlan tunnel the
tunnel_dst pointer is set to NULL without waiting a grace period (i.e.
while it's still usable) and packets egressing are dereferencing it
without checking. Use READ/WRITE_ONCE to annotate the lockless use of
tunnel_id, use RCU for accessing tunnel_dst and make sure it is read
only once and checked in the egress path. The dst is already properly RCU
protected so we don't need to do anything fancy than to make sure
tunnel_id and tunnel_dst are read only once and checked in the egress path.

Cc: stable@vger.kernel.org
Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths")
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private.h     |  4 ++--
 net/bridge/br_vlan_tunnel.c | 38 +++++++++++++++++++++++--------------
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 7ce8a77cc6b6b..e013d33f1c7ca 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -90,8 +90,8 @@ struct bridge_mcast_stats {
 #endif
 
 struct br_tunnel_info {
-	__be64			tunnel_id;
-	struct metadata_dst	*tunnel_dst;
+	__be64				tunnel_id;
+	struct metadata_dst __rcu	*tunnel_dst;
 };
 
 /* private vlan flags */
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 0d3a8c01552ee..03de461a0d44a 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -41,26 +41,33 @@ static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl,
 				      br_vlan_tunnel_rht_params);
 }
 
+static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan)
+{
+	struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst);
+
+	WRITE_ONCE(vlan->tinfo.tunnel_id, 0);
+	RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL);
+	dst_release(&tdst->dst);
+}
+
 void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
 			  struct net_bridge_vlan *vlan)
 {
-	if (!vlan->tinfo.tunnel_dst)
+	if (!rcu_access_pointer(vlan->tinfo.tunnel_dst))
 		return;
 	rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode,
 			       br_vlan_tunnel_rht_params);
-	vlan->tinfo.tunnel_id = 0;
-	dst_release(&vlan->tinfo.tunnel_dst->dst);
-	vlan->tinfo.tunnel_dst = NULL;
+	vlan_tunnel_info_release(vlan);
 }
 
 static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
 				  struct net_bridge_vlan *vlan, u32 tun_id)
 {
-	struct metadata_dst *metadata = NULL;
+	struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
 	__be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
 	int err;
 
-	if (vlan->tinfo.tunnel_dst)
+	if (metadata)
 		return -EEXIST;
 
 	metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
@@ -69,8 +76,8 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
 		return -EINVAL;
 
 	metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE;
-	vlan->tinfo.tunnel_dst = metadata;
-	vlan->tinfo.tunnel_id = key;
+	rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata);
+	WRITE_ONCE(vlan->tinfo.tunnel_id, key);
 
 	err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode,
 					    br_vlan_tunnel_rht_params);
@@ -79,9 +86,7 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
 
 	return 0;
 out:
-	dst_release(&vlan->tinfo.tunnel_dst->dst);
-	vlan->tinfo.tunnel_dst = NULL;
-	vlan->tinfo.tunnel_id = 0;
+	vlan_tunnel_info_release(vlan);
 
 	return err;
 }
@@ -182,12 +187,15 @@ int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
 int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
 				 struct net_bridge_vlan *vlan)
 {
+	struct metadata_dst *tunnel_dst;
+	__be64 tunnel_id;
 	int err;
 
-	if (!vlan || !vlan->tinfo.tunnel_id)
+	if (!vlan)
 		return 0;
 
-	if (unlikely(!skb_vlan_tag_present(skb)))
+	tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id);
+	if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb)))
 		return 0;
 
 	skb_dst_drop(skb);
@@ -195,7 +203,9 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
 	if (err)
 		return err;
 
-	skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst));
+	tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
+	if (tunnel_dst)
+		skb_dst_set(skb, dst_clone(&tunnel_dst->dst));
 
 	return 0;
 }
-- 
GitLab


From cfc579f9d89af4ada58c69b03bcaa4887840f3b3 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Thu, 10 Jun 2021 15:04:11 +0300
Subject: [PATCH 2662/3804] net: bridge: fix vlan tunnel dst refcnt when
 egressing

The egress tunnel code uses dst_clone() and directly sets the result
which is wrong because the entry might have 0 refcnt or be already deleted,
causing number of problems. It also triggers the WARN_ON() in dst_hold()[1]
when a refcnt couldn't be taken. Fix it by using dst_hold_safe() and
checking if a reference was actually taken before setting the dst.

[1] dmesg WARN_ON log and following refcnt errors
 WARNING: CPU: 5 PID: 38 at include/net/dst.h:230 br_handle_egress_vlan_tunnel+0x10b/0x134 [bridge]
 Modules linked in: 8021q garp mrp bridge stp llc bonding ipv6 virtio_net
 CPU: 5 PID: 38 Comm: ksoftirqd/5 Kdump: loaded Tainted: G        W         5.13.0-rc3+ #360
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
 RIP: 0010:br_handle_egress_vlan_tunnel+0x10b/0x134 [bridge]
 Code: e8 85 bc 01 e1 45 84 f6 74 90 45 31 f6 85 db 48 c7 c7 a0 02 19 a0 41 0f 94 c6 31 c9 31 d2 44 89 f6 e8 64 bc 01 e1 85 db 75 02 <0f> 0b 31 c9 31 d2 44 89 f6 48 c7 c7 70 02 19 a0 e8 4b bc 01 e1 49
 RSP: 0018:ffff8881003d39e8 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffffa01902a0
 RBP: ffff8881040c6700 R08: 0000000000000000 R09: 0000000000000001
 R10: 2ce93d0054fe0d00 R11: 54fe0d00000e0000 R12: ffff888109515000
 R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000401
 FS:  0000000000000000(0000) GS:ffff88822bf40000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f42ba70f030 CR3: 0000000109926000 CR4: 00000000000006e0
 Call Trace:
  br_handle_vlan+0xbc/0xca [bridge]
  __br_forward+0x23/0x164 [bridge]
  deliver_clone+0x41/0x48 [bridge]
  br_handle_frame_finish+0x36f/0x3aa [bridge]
  ? skb_dst+0x2e/0x38 [bridge]
  ? br_handle_ingress_vlan_tunnel+0x3e/0x1c8 [bridge]
  ? br_handle_frame_finish+0x3aa/0x3aa [bridge]
  br_handle_frame+0x2c3/0x377 [bridge]
  ? __skb_pull+0x33/0x51
  ? vlan_do_receive+0x4f/0x36a
  ? br_handle_frame_finish+0x3aa/0x3aa [bridge]
  __netif_receive_skb_core+0x539/0x7c6
  ? __list_del_entry_valid+0x16e/0x1c2
  __netif_receive_skb_list_core+0x6d/0xd6
  netif_receive_skb_list_internal+0x1d9/0x1fa
  gro_normal_list+0x22/0x3e
  dev_gro_receive+0x55b/0x600
  ? detach_buf_split+0x58/0x140
  napi_gro_receive+0x94/0x12e
  virtnet_poll+0x15d/0x315 [virtio_net]
  __napi_poll+0x2c/0x1c9
  net_rx_action+0xe6/0x1fb
  __do_softirq+0x115/0x2d8
  run_ksoftirqd+0x18/0x20
  smpboot_thread_fn+0x183/0x19c
  ? smpboot_unregister_percpu_thread+0x66/0x66
  kthread+0x10a/0x10f
  ? kthread_mod_delayed_work+0xb6/0xb6
  ret_from_fork+0x22/0x30
 ---[ end trace 49f61b07f775fd2b ]---
 dst_release: dst:00000000c02d677a refcnt:-1
 dst_release underflow

Cc: stable@vger.kernel.org
Fixes: 11538d039ac6 ("bridge: vlan dst_metadata hooks in ingress and egress paths")
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_vlan_tunnel.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 03de461a0d44a..01017448ebdef 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -204,8 +204,8 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
 		return err;
 
 	tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
-	if (tunnel_dst)
-		skb_dst_set(skb, dst_clone(&tunnel_dst->dst));
+	if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
+		skb_dst_set(skb, &tunnel_dst->dst);
 
 	return 0;
 }
-- 
GitLab


From f13ef10059ccf5f4ed201cd050176df62ec25bb8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Jun 2021 07:27:37 -0700
Subject: [PATCH 2663/3804] net: annotate data race in sock_error()

sock_error() is known to be racy. The code avoids
an atomic operation is sk_err is zero, and this field
could be changed under us, this is fine.

Sysbot reported:

BUG: KCSAN: data-race in sock_alloc_send_pskb / unix_release_sock

write to 0xffff888131855630 of 4 bytes by task 9365 on cpu 1:
 unix_release_sock+0x2e9/0x6e0 net/unix/af_unix.c:550
 unix_release+0x2f/0x50 net/unix/af_unix.c:859
 __sock_release net/socket.c:599 [inline]
 sock_close+0x6c/0x150 net/socket.c:1258
 __fput+0x25b/0x4e0 fs/file_table.c:280
 ____fput+0x11/0x20 fs/file_table.c:313
 task_work_run+0xae/0x130 kernel/task_work.c:164
 tracehook_notify_resume include/linux/tracehook.h:189 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:174 [inline]
 exit_to_user_mode_prepare+0x156/0x190 kernel/entry/common.c:208
 __syscall_exit_to_user_mode_work kernel/entry/common.c:290 [inline]
 syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:301
 do_syscall_64+0x56/0x90 arch/x86/entry/common.c:57
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff888131855630 of 4 bytes by task 9385 on cpu 0:
 sock_error include/net/sock.h:2269 [inline]
 sock_alloc_send_pskb+0xe4/0x4e0 net/core/sock.c:2336
 unix_dgram_sendmsg+0x478/0x1610 net/unix/af_unix.c:1671
 unix_seqpacket_sendmsg+0xc2/0x100 net/unix/af_unix.c:2055
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg net/socket.c:674 [inline]
 ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350
 __sys_sendmsg_sock+0x25/0x30 net/socket.c:2416
 io_sendmsg fs/io_uring.c:4367 [inline]
 io_issue_sqe+0x231a/0x6750 fs/io_uring.c:6135
 __io_queue_sqe+0xe9/0x360 fs/io_uring.c:6414
 __io_req_task_submit fs/io_uring.c:2039 [inline]
 io_async_task_func+0x312/0x590 fs/io_uring.c:5074
 __tctx_task_work fs/io_uring.c:1910 [inline]
 tctx_task_work+0x1d4/0x3d0 fs/io_uring.c:1924
 task_work_run+0xae/0x130 kernel/task_work.c:164
 tracehook_notify_signal include/linux/tracehook.h:212 [inline]
 handle_signal_work kernel/entry/common.c:145 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
 exit_to_user_mode_prepare+0xf8/0x190 kernel/entry/common.c:208
 __syscall_exit_to_user_mode_work kernel/entry/common.c:290 [inline]
 syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:301
 do_syscall_64+0x56/0x90 arch/x86/entry/common.c:57
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00000000 -> 0x00000068

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 9385 Comm: syz-executor.3 Not tainted 5.13.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 0e962d8bc73b1..2fc513aa114c0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2266,8 +2266,13 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk);
 static inline int sock_error(struct sock *sk)
 {
 	int err;
-	if (likely(!sk->sk_err))
+
+	/* Avoid an atomic operation for the common case.
+	 * This is racy since another cpu/thread can change sk_err under us.
+	 */
+	if (likely(data_race(!sk->sk_err)))
 		return 0;
+
 	err = xchg(&sk->sk_err, 0);
 	return -err;
 }
-- 
GitLab


From b71eaed8c04f72a919a9c44e83e4ee254e69e7f3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Jun 2021 07:44:11 -0700
Subject: [PATCH 2664/3804] inet: annotate date races around sk->sk_txhash

UDP sendmsg() path can be lockless, it is possible for another
thread to re-connect an change sk->sk_txhash under us.

There is no serious impact, but we can use READ_ONCE()/WRITE_ONCE()
pair to document the race.

BUG: KCSAN: data-race in __ip4_datagram_connect / skb_set_owner_w

write to 0xffff88813397920c of 4 bytes by task 30997 on cpu 1:
 sk_set_txhash include/net/sock.h:1937 [inline]
 __ip4_datagram_connect+0x69e/0x710 net/ipv4/datagram.c:75
 __ip6_datagram_connect+0x551/0x840 net/ipv6/datagram.c:189
 ip6_datagram_connect+0x2a/0x40 net/ipv6/datagram.c:272
 inet_dgram_connect+0xfd/0x180 net/ipv4/af_inet.c:580
 __sys_connect_file net/socket.c:1837 [inline]
 __sys_connect+0x245/0x280 net/socket.c:1854
 __do_sys_connect net/socket.c:1864 [inline]
 __se_sys_connect net/socket.c:1861 [inline]
 __x64_sys_connect+0x3d/0x50 net/socket.c:1861
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff88813397920c of 4 bytes by task 31039 on cpu 0:
 skb_set_hash_from_sk include/net/sock.h:2211 [inline]
 skb_set_owner_w+0x118/0x220 net/core/sock.c:2101
 sock_alloc_send_pskb+0x452/0x4e0 net/core/sock.c:2359
 sock_alloc_send_skb+0x2d/0x40 net/core/sock.c:2373
 __ip6_append_data+0x1743/0x21a0 net/ipv6/ip6_output.c:1621
 ip6_make_skb+0x258/0x420 net/ipv6/ip6_output.c:1983
 udpv6_sendmsg+0x160a/0x16b0 net/ipv6/udp.c:1527
 inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:642
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg net/socket.c:674 [inline]
 ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350
 ___sys_sendmsg net/socket.c:2404 [inline]
 __sys_sendmmsg+0x315/0x4b0 net/socket.c:2490
 __do_sys_sendmmsg net/socket.c:2519 [inline]
 __se_sys_sendmmsg net/socket.c:2516 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2516
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0xbca3c43d -> 0xfdb309e0

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 31039 Comm: syz-executor.2 Not tainted 5.13.0-rc3-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 2fc513aa114c0..7a7058f4f265c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1934,7 +1934,8 @@ static inline u32 net_tx_rndhash(void)
 
 static inline void sk_set_txhash(struct sock *sk)
 {
-	sk->sk_txhash = net_tx_rndhash();
+	/* This pairs with READ_ONCE() in skb_set_hash_from_sk() */
+	WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
 }
 
 static inline bool sk_rethink_txhash(struct sock *sk)
@@ -2206,9 +2207,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock,
 
 static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
 {
-	if (sk->sk_txhash) {
+	/* This pairs with WRITE_ONCE() in sk_set_txhash() */
+	u32 txhash = READ_ONCE(sk->sk_txhash);
+
+	if (txhash) {
 		skb->l4_hash = 1;
-		skb->hash = sk->sk_txhash;
+		skb->hash = txhash;
 	}
 }
 
-- 
GitLab


From d1b5bee4c8be01585033be9b3a8878789285285f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Jun 2021 09:00:12 -0700
Subject: [PATCH 2665/3804] net/packet: annotate data race in packet_sendmsg()

There is a known race in packet_sendmsg(), addressed
in commit 32d3182cd2cd ("net/packet: fix race in tpacket_snd()")

Now we have data_race(), we can use it to avoid a future KCSAN warning,
as syzbot loves stressing af_packet sockets :)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ae906eb4b269e..74e6e45a8e843 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3034,10 +3034,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
 
-	if (po->tx_ring.pg_vec)
+	/* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
+	 * tpacket_snd() will redo the check safely.
+	 */
+	if (data_race(po->tx_ring.pg_vec))
 		return tpacket_snd(po, msg);
-	else
-		return packet_snd(sock, msg, len);
+
+	return packet_snd(sock, msg, len);
 }
 
 /*
-- 
GitLab


From 5fc177ab759418c9537433e63301096e733fb915 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Thu, 10 Jun 2021 19:40:29 +0300
Subject: [PATCH 2666/3804] netfilter: synproxy: Fix out of bounds when parsing
 TCP options

The TCP option parser in synproxy (synproxy_parse_options) could read
one byte out of bounds. When the length is 1, the execution flow gets
into the loop, reads one byte of the opcode, and if the opcode is
neither TCPOPT_EOL nor TCPOPT_NOP, it reads one more byte, which exceeds
the length of 1.

This fix is inspired by commit 9609dad263f8 ("ipv4: tcp_input: fix stack
out of bounds when parsing TCP options.").

v2 changes:

Added an early return when length < 0 to avoid calling
skb_header_pointer with negative length.

Cc: Young Xiao <92siuyang@gmail.com>
Fixes: 48b1de4c110a ("netfilter: add SYNPROXY core/target")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_synproxy_core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b100c04a0e435..3d6d49420db8b 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -31,6 +31,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
 	int length = (th->doff * 4) - sizeof(*th);
 	u8 buf[40], *ptr;
 
+	if (unlikely(length < 0))
+		return false;
+
 	ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
 	if (ptr == NULL)
 		return false;
@@ -47,6 +50,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
 			length--;
 			continue;
 		default:
+			if (length < 2)
+				return true;
 			opsize = *ptr++;
 			if (opsize < 2)
 				return true;
-- 
GitLab


From 07718be265680dcf496347d475ce1a5442f55ad7 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Thu, 10 Jun 2021 19:40:30 +0300
Subject: [PATCH 2667/3804] mptcp: Fix out of bounds when parsing TCP options

The TCP option parser in mptcp (mptcp_get_options) could read one byte
out of bounds. When the length is 1, the execution flow gets into the
loop, reads one byte of the opcode, and if the opcode is neither
TCPOPT_EOL nor TCPOPT_NOP, it reads one more byte, which exceeds the
length of 1.

This fix is inspired by commit 9609dad263f8 ("ipv4: tcp_input: fix stack
out of bounds when parsing TCP options.").

Cc: Young Xiao <92siuyang@gmail.com>
Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 6b825fb3fa832..9b263f27ce9bd 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -356,6 +356,8 @@ void mptcp_get_options(const struct sk_buff *skb,
 			length--;
 			continue;
 		default:
+			if (length < 2)
+				return;
 			opsize = *ptr++;
 			if (opsize < 2) /* "silly options" */
 				return;
-- 
GitLab


From ba91c49dedbde758ba0b72f57ac90b06ddf8e548 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@nvidia.com>
Date: Thu, 10 Jun 2021 19:40:31 +0300
Subject: [PATCH 2668/3804] sch_cake: Fix out of bounds when parsing TCP
 options and header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TCP option parser in cake qdisc (cake_get_tcpopt and
cake_tcph_may_drop) could read one byte out of bounds. When the length
is 1, the execution flow gets into the loop, reads one byte of the
opcode, and if the opcode is neither TCPOPT_EOL nor TCPOPT_NOP, it reads
one more byte, which exceeds the length of 1.

This fix is inspired by commit 9609dad263f8 ("ipv4: tcp_input: fix stack
out of bounds when parsing TCP options.").

v2 changes:

Added doff validation in cake_get_tcphdr to avoid parsing garbage as TCP
header. Although it wasn't strictly an out-of-bounds access (memory was
allocated), garbage values could be read where CAKE expected the TCP
header if doff was smaller than 5.

Cc: Young Xiao <92siuyang@gmail.com>
Fixes: 8b7138814f29 ("sch_cake: Add optional ACK filter")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cake.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 7d37638ee1c7a..5c15968b5155b 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -943,7 +943,7 @@ static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
 	}
 
 	tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
-	if (!tcph)
+	if (!tcph || tcph->doff < 5)
 		return NULL;
 
 	return skb_header_pointer(skb, offset,
@@ -967,6 +967,8 @@ static const void *cake_get_tcpopt(const struct tcphdr *tcph,
 			length--;
 			continue;
 		}
+		if (length < 2)
+			break;
 		opsize = *ptr++;
 		if (opsize < 2 || opsize > length)
 			break;
@@ -1104,6 +1106,8 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph,
 			length--;
 			continue;
 		}
+		if (length < 2)
+			break;
 		opsize = *ptr++;
 		if (opsize < 2 || opsize > length)
 			break;
-- 
GitLab


From cabd10be0b9e4bf043dc828321875b937b2aa7ca Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Thu, 10 Jun 2021 11:15:06 +1000
Subject: [PATCH 2669/3804] m68k/mac: Replace macide driver with generic
 platform drivers

This allows m68k mac systems to switch from the deprecated IDE subsystem
to libata.

This was tested on my Quadra 630. I haven't tested it on my PowerBook 150
because I don't have a RAM adapter board for it. It appears that the
hardware I tested doesn't need macide_clear_irq() or macide_test_irq().
If it did, the generic driver would not have worked. It's possible that
those routines are needed for the PowerBook 150 but we can cross that
bridge if and when we come to it.

BTW, macide_clear_irq() appears to suffer from a race condition. The write
to the interrupt flags register could have unintended side effects as it
may alter other flag bits. Fortunately, all of the other bits are unused
by Linux. When tested on my Quadra 630, the assignment *ide_ifr &= ~0x20
was observed to have no effect on bit 5, so it may be redundant anyway.

Cc: Michael Schmitz <schmitzmic@gmail.com>
Cc: Joshua Thompson <funaho@jurai.org>
Reviewed-by: Michael Schmitz <schmitzmic@gmail.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Link: https://lore.kernel.org/r/11a56b3317df3bb2ddc15fd29b40b6820e9c7444.1623287706.git.fthain@linux-m68k.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/m68k/configs/mac_defconfig   |   1 -
 arch/m68k/configs/multi_defconfig |   1 -
 arch/m68k/mac/config.c            |  24 +++--
 drivers/ide/Kconfig               |  14 ---
 drivers/ide/Makefile              |   1 -
 drivers/ide/macide.c              | 161 ------------------------------
 6 files changed, 14 insertions(+), 188 deletions(-)
 delete mode 100644 drivers/ide/macide.c

diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 4e68b72d9c50f..406d3f2a16eae 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -319,7 +319,6 @@ CONFIG_IDE=y
 CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_PLATFORM=y
-CONFIG_BLK_DEV_MAC_IDE=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index d31896293c394..f40093a41c1a2 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -351,7 +351,6 @@ CONFIG_BLK_DEV_PLATFORM=y
 CONFIG_BLK_DEV_GAYLE=y
 CONFIG_BLK_DEV_BUDDHA=y
 CONFIG_BLK_DEV_FALCON_IDE=y
-CONFIG_BLK_DEV_MAC_IDE=y
 CONFIG_BLK_DEV_Q40IDE=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 1cdac959bd914..5d16f9b47aa90 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -933,13 +933,15 @@ static const struct resource mac_scsi_ccl_rsrc[] __initconst = {
 	},
 };
 
-static const struct resource mac_ide_quadra_rsrc[] __initconst = {
-	DEFINE_RES_MEM(0x50F1A000, 0x104),
+static const struct resource mac_pata_quadra_rsrc[] __initconst = {
+	DEFINE_RES_MEM(0x50F1A000, 0x38),
+	DEFINE_RES_MEM(0x50F1A038, 0x04),
 	DEFINE_RES_IRQ(IRQ_NUBUS_F),
 };
 
-static const struct resource mac_ide_pb_rsrc[] __initconst = {
-	DEFINE_RES_MEM(0x50F1A000, 0x104),
+static const struct resource mac_pata_pb_rsrc[] __initconst = {
+	DEFINE_RES_MEM(0x50F1A000, 0x38),
+	DEFINE_RES_MEM(0x50F1A038, 0x04),
 	DEFINE_RES_IRQ(IRQ_NUBUS_C),
 };
 
@@ -949,7 +951,7 @@ static const struct resource mac_pata_baboon_rsrc[] __initconst = {
 	DEFINE_RES_IRQ(IRQ_BABOON_1),
 };
 
-static const struct pata_platform_info mac_pata_baboon_data __initconst = {
+static const struct pata_platform_info mac_pata_data __initconst = {
 	.ioport_shift = 2,
 };
 
@@ -1067,17 +1069,19 @@ int __init mac_platform_init(void)
 
 	switch (macintosh_config->ide_type) {
 	case MAC_IDE_QUADRA:
-		platform_device_register_simple("mac_ide", -1,
-			mac_ide_quadra_rsrc, ARRAY_SIZE(mac_ide_quadra_rsrc));
+		platform_device_register_resndata(NULL, "pata_platform", -1,
+			mac_pata_quadra_rsrc, ARRAY_SIZE(mac_pata_quadra_rsrc),
+			&mac_pata_data, sizeof(mac_pata_data));
 		break;
 	case MAC_IDE_PB:
-		platform_device_register_simple("mac_ide", -1,
-			mac_ide_pb_rsrc, ARRAY_SIZE(mac_ide_pb_rsrc));
+		platform_device_register_resndata(NULL, "pata_platform", -1,
+			mac_pata_pb_rsrc, ARRAY_SIZE(mac_pata_pb_rsrc),
+			&mac_pata_data, sizeof(mac_pata_data));
 		break;
 	case MAC_IDE_BABOON:
 		platform_device_register_resndata(NULL, "pata_platform", -1,
 			mac_pata_baboon_rsrc, ARRAY_SIZE(mac_pata_baboon_rsrc),
-			&mac_pata_baboon_data, sizeof(mac_pata_baboon_data));
+			&mac_pata_data, sizeof(mac_pata_data));
 		break;
 	}
 
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 19abf11c84c8a..8ce4a5878d0c7 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -739,20 +739,6 @@ config BLK_DEV_FALCON_IDE
 	  disks, CD-ROM drives, etc.) that are connected to the on-board IDE
 	  interface.
 
-config BLK_DEV_MAC_IDE
-	tristate "Macintosh Quadra/Powerbook IDE interface support"
-	depends on MAC
-	help
-	  This is the IDE driver for the on-board IDE interface on some m68k
-	  Macintosh models, namely Quadra/Centris 630, Performa 588 and
-	  Powerbook 150. The IDE interface on the Powerbook 190 is not
-	  supported by this driver and requires BLK_DEV_PLATFORM or
-	  PATA_PLATFORM.
-
-	  Say Y if you have such an Macintosh model and want to use IDE
-	  devices (hard disks, CD-ROM drives, etc.) that are connected to the
-	  on-board IDE interface.
-
 config BLK_DEV_Q40IDE
 	tristate "Q40/Q60 IDE interface support"
 	depends on Q40
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 2605b3cdaf475..45a1c0463bed0 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -29,7 +29,6 @@ obj-$(CONFIG_BLK_DEV_4DRIVES)		+= ide-4drives.o
 
 obj-$(CONFIG_BLK_DEV_GAYLE)		+= gayle.o
 obj-$(CONFIG_BLK_DEV_FALCON_IDE)	+= falconide.o
-obj-$(CONFIG_BLK_DEV_MAC_IDE)		+= macide.o
 obj-$(CONFIG_BLK_DEV_Q40IDE)		+= q40ide.o
 obj-$(CONFIG_BLK_DEV_BUDDHA)		+= buddha.o
 
diff --git a/drivers/ide/macide.c b/drivers/ide/macide.c
deleted file mode 100644
index 8d2bf73bc548d..0000000000000
--- a/drivers/ide/macide.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- *  Macintosh IDE Driver
- *
- *     Copyright (C) 1998 by Michael Schmitz
- *
- *  This driver was written based on information obtained from the MacOS IDE
- *  driver binary by Mikael Forselius
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of this archive for
- *  more details.
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/blkdev.h>
-#include <linux/delay.h>
-#include <linux/ide.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-
-#include <asm/macintosh.h>
-
-#define DRV_NAME "mac_ide"
-
-#define IDE_BASE 0x50F1A000	/* Base address of IDE controller */
-
-/*
- * Generic IDE registers as offsets from the base
- * These match MkLinux so they should be correct.
- */
-
-#define IDE_CONTROL	0x38	/* control/altstatus */
-
-/*
- * Mac-specific registers
- */
-
-/*
- * this register is odd; it doesn't seem to do much and it's
- * not word-aligned like virtually every other hardware register
- * on the Mac...
- */
-
-#define IDE_IFR		0x101	/* (0x101) IDE interrupt flags on Quadra:
-				 *
-				 * Bit 0+1: some interrupt flags
-				 * Bit 2+3: some interrupt enable
-				 * Bit 4:   ??
-				 * Bit 5:   IDE interrupt flag (any hwif)
-				 * Bit 6:   maybe IDE interrupt enable (any hwif) ??
-				 * Bit 7:   Any interrupt condition
-				 */
-
-volatile unsigned char *ide_ifr = (unsigned char *) (IDE_BASE + IDE_IFR);
-
-int macide_test_irq(ide_hwif_t *hwif)
-{
-	if (*ide_ifr & 0x20)
-		return 1;
-	return 0;
-}
-
-static void macide_clear_irq(ide_drive_t *drive)
-{
-	*ide_ifr &= ~0x20;
-}
-
-static void __init macide_setup_ports(struct ide_hw *hw, unsigned long base,
-				      int irq)
-{
-	int i;
-
-	memset(hw, 0, sizeof(*hw));
-
-	for (i = 0; i < 8; i++)
-		hw->io_ports_array[i] = base + i * 4;
-
-	hw->io_ports.ctl_addr = base + IDE_CONTROL;
-
-	hw->irq = irq;
-}
-
-static const struct ide_port_ops macide_port_ops = {
-	.clear_irq		= macide_clear_irq,
-	.test_irq		= macide_test_irq,
-};
-
-static const struct ide_port_info macide_port_info = {
-	.port_ops		= &macide_port_ops,
-	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
-};
-
-static const char *mac_ide_name[] =
-	{ "Quadra", "Powerbook", "Powerbook Baboon" };
-
-/*
- * Probe for a Macintosh IDE interface
- */
-
-static int mac_ide_probe(struct platform_device *pdev)
-{
-	struct resource *mem, *irq;
-	struct ide_hw hw, *hws[] = { &hw };
-	struct ide_port_info d = macide_port_info;
-	struct ide_host *host;
-	int rc;
-
-	if (!MACH_IS_MAC)
-		return -ENODEV;
-
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!mem)
-		return -ENODEV;
-
-	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!irq)
-		return -ENODEV;
-
-	if (!devm_request_mem_region(&pdev->dev, mem->start,
-				     resource_size(mem), DRV_NAME)) {
-		dev_err(&pdev->dev, "resources busy\n");
-		return -EBUSY;
-	}
-
-	printk(KERN_INFO "ide: Macintosh %s IDE controller\n",
-			 mac_ide_name[macintosh_config->ide_type - 1]);
-
-	macide_setup_ports(&hw, mem->start, irq->start);
-
-	rc = ide_host_add(&d, hws, 1, &host);
-	if (rc)
-		return rc;
-
-	platform_set_drvdata(pdev, host);
-	return 0;
-}
-
-static int mac_ide_remove(struct platform_device *pdev)
-{
-	struct ide_host *host = platform_get_drvdata(pdev);
-
-	ide_host_remove(host);
-	return 0;
-}
-
-static struct platform_driver mac_ide_driver = {
-	.driver = {
-		.name = DRV_NAME,
-	},
-	.probe  = mac_ide_probe,
-	.remove = mac_ide_remove,
-};
-
-module_platform_driver(mac_ide_driver);
-
-MODULE_ALIAS("platform:" DRV_NAME);
-MODULE_LICENSE("GPL");
-- 
GitLab


From 44b1fbc0f5f30e66a56d29575349f0b1ebe2b0ee Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Thu, 10 Jun 2021 11:15:06 +1000
Subject: [PATCH 2670/3804] m68k/q40: Replace q40ide driver with pata_falcon
 and falconide

This allows m68k q40 systems to switch from the deprecated IDE subsystem
to libata.

Enhance the byte-swapping falconide and pata_falcon platform drivers to
accept an irq resource, for use on q40. Atari ST-DMA IRQ arrangements seem
to co-exist with q40 IRQ arrangements without too much mess.

The new IO resources were added solely for the purpose of making
request_region() reservations identical to those made by q40ide: these
regions aren't used for actual IO.

Cc: Michael Schmitz <schmitzmic@gmail.com>
Cc: Richard Zidlicky <rz@linux-m68k.org>
Reviewed-and-tested-by: Michael Schmitz <schmitzmic@gmail.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Link: https://lore.kernel.org/r/eefb7e9c2291e09fb4e065ce06bc105f05bb9e06.1623287706.git.fthain@linux-m68k.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/m68k/atari/config.c          |  12 +--
 arch/m68k/configs/multi_defconfig |   1 -
 arch/m68k/configs/q40_defconfig   |   2 +-
 arch/m68k/q40/config.c            |  37 +++++--
 drivers/ata/Kconfig               |   6 +-
 drivers/ata/pata_falcon.c         |  62 ++++++++---
 drivers/ide/Kconfig               |  18 +---
 drivers/ide/Makefile              |   1 -
 drivers/ide/falconide.c           |  79 +++++++++-----
 drivers/ide/q40ide.c              | 168 ------------------------------
 10 files changed, 141 insertions(+), 245 deletions(-)
 delete mode 100644 drivers/ide/q40ide.c

diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 44f9b5216ac90..261a0f57cc9ac 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -875,16 +875,8 @@ static const struct resource atari_scsi_tt_rsrc[] __initconst = {
 #define FALCON_IDE_BASE	0xfff00000
 
 static const struct resource atari_falconide_rsrc[] __initconst = {
-	{
-		.flags = IORESOURCE_MEM,
-		.start = FALCON_IDE_BASE,
-		.end   = FALCON_IDE_BASE + 0x39,
-	},
-	{
-		.flags = IORESOURCE_IRQ,
-		.start = IRQ_MFP_FSCSI,
-		.end   = IRQ_MFP_FSCSI,
-	},
+	DEFINE_RES_MEM(FALCON_IDE_BASE, 0x38),
+	DEFINE_RES_MEM(FALCON_IDE_BASE + 0x38, 2),
 };
 
 int __init atari_platform_init(void)
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index f40093a41c1a2..f0992435e9ef7 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -351,7 +351,6 @@ CONFIG_BLK_DEV_PLATFORM=y
 CONFIG_BLK_DEV_GAYLE=y
 CONFIG_BLK_DEV_BUDDHA=y
 CONFIG_BLK_DEV_FALCON_IDE=y
-CONFIG_BLK_DEV_Q40IDE=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 664025a0f6a41..b893163d9f068 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -317,7 +317,7 @@ CONFIG_DUMMY_IRQ=m
 CONFIG_IDE=y
 CONFIG_IDE_GD_ATAPI=y
 CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_Q40IDE=y
+CONFIG_BLK_DEV_FALCON_IDE=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index d6a4238752311..5caf1e5be1c2b 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -286,14 +286,39 @@ static int q40_set_rtc_pll(struct rtc_pll_info *pll)
 		return -EINVAL;
 }
 
-static __init int q40_add_kbd_device(void)
-{
-	struct platform_device *pdev;
+#define PCIDE_BASE1	0x1f0
+#define PCIDE_BASE2	0x170
+#define PCIDE_CTL	0x206
+
+static const struct resource q40_pata_rsrc_0[] __initconst = {
+	DEFINE_RES_MEM(q40_isa_io_base + PCIDE_BASE1 * 4, 0x38),
+	DEFINE_RES_MEM(q40_isa_io_base + (PCIDE_BASE1 + PCIDE_CTL) * 4, 2),
+	DEFINE_RES_IO(PCIDE_BASE1, 8),
+	DEFINE_RES_IO(PCIDE_BASE1 + PCIDE_CTL, 1),
+	DEFINE_RES_IRQ(14),
+};
 
+static const struct resource q40_pata_rsrc_1[] __initconst = {
+	DEFINE_RES_MEM(q40_isa_io_base + PCIDE_BASE2 * 4, 0x38),
+	DEFINE_RES_MEM(q40_isa_io_base + (PCIDE_BASE2 + PCIDE_CTL) * 4, 2),
+	DEFINE_RES_IO(PCIDE_BASE2, 8),
+	DEFINE_RES_IO(PCIDE_BASE2 + PCIDE_CTL, 1),
+	DEFINE_RES_IRQ(15),
+};
+
+static __init int q40_platform_init(void)
+{
 	if (!MACH_IS_Q40)
 		return -ENODEV;
 
-	pdev = platform_device_register_simple("q40kbd", -1, NULL, 0);
-	return PTR_ERR_OR_ZERO(pdev);
+	platform_device_register_simple("q40kbd", -1, NULL, 0);
+
+	platform_device_register_simple("atari-falcon-ide", 0, q40_pata_rsrc_0,
+					ARRAY_SIZE(q40_pata_rsrc_0));
+
+	platform_device_register_simple("atari-falcon-ide", 1, q40_pata_rsrc_1,
+					ARRAY_SIZE(q40_pata_rsrc_1));
+
+	return 0;
 }
-arch_initcall(q40_add_kbd_device);
+arch_initcall(q40_platform_init);
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 030cb32da980f..b7a5abee2147f 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -1015,11 +1015,11 @@ config PATA_CMD640_PCI
 	  If unsure, say N.
 
 config PATA_FALCON
-	tristate "Atari Falcon PATA support"
-	depends on M68K && ATARI
+	tristate "Atari Falcon and Q40/Q60 PATA support"
+	depends on M68K && (ATARI || Q40)
 	help
 	  This option enables support for the on-board IDE
-	  interface on the Atari Falcon.
+	  interface on the Atari Falcon and Q40/Q60.
 
 	  If unsure, say N.
 
diff --git a/drivers/ata/pata_falcon.c b/drivers/ata/pata_falcon.c
index 27b0952fde6b6..9d0dd8f4c21c0 100644
--- a/drivers/ata/pata_falcon.c
+++ b/drivers/ata/pata_falcon.c
@@ -33,8 +33,6 @@
 #define DRV_NAME "pata_falcon"
 #define DRV_VERSION "0.1.0"
 
-#define ATA_HD_CONTROL	0x39
-
 static struct scsi_host_template pata_falcon_sht = {
 	ATA_PIO_SHT(DRV_NAME),
 };
@@ -121,23 +119,42 @@ static struct ata_port_operations pata_falcon_ops = {
 
 static int __init pata_falcon_init_one(struct platform_device *pdev)
 {
-	struct resource *res;
+	struct resource *base_mem_res, *ctl_mem_res;
+	struct resource *base_res, *ctl_res, *irq_res;
 	struct ata_host *host;
 	struct ata_port *ap;
 	void __iomem *base;
+	int irq = 0;
 
-	dev_info(&pdev->dev, "Atari Falcon PATA controller\n");
+	dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 PATA controller\n");
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
+	base_res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (base_res && !devm_request_region(&pdev->dev, base_res->start,
+					   resource_size(base_res), DRV_NAME)) {
+		dev_err(&pdev->dev, "resources busy\n");
+		return -EBUSY;
+	}
 
-	if (!devm_request_mem_region(&pdev->dev, res->start,
-				     resource_size(res), DRV_NAME)) {
+	ctl_res = platform_get_resource(pdev, IORESOURCE_IO, 1);
+	if (ctl_res && !devm_request_region(&pdev->dev, ctl_res->start,
+					    resource_size(ctl_res), DRV_NAME)) {
 		dev_err(&pdev->dev, "resources busy\n");
 		return -EBUSY;
 	}
 
+	base_mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!base_mem_res)
+		return -ENODEV;
+	if (!devm_request_mem_region(&pdev->dev, base_mem_res->start,
+				     resource_size(base_mem_res), DRV_NAME)) {
+		dev_err(&pdev->dev, "resources busy\n");
+		return -EBUSY;
+	}
+
+	ctl_mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!ctl_mem_res)
+		return -ENODEV;
+
 	/* allocate host */
 	host = ata_host_alloc(&pdev->dev, 1);
 	if (!host)
@@ -147,10 +164,10 @@ static int __init pata_falcon_init_one(struct platform_device *pdev)
 	ap->ops = &pata_falcon_ops;
 	ap->pio_mask = ATA_PIO4;
 	ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY;
-	ap->flags |= ATA_FLAG_PIO_POLLING;
 
-	base = (void __iomem *)res->start;
-	ap->ioaddr.data_addr		= base;
+	base = (void __iomem *)base_mem_res->start;
+	/* N.B. this assumes data_addr will be used for word-sized I/O only */
+	ap->ioaddr.data_addr		= base + 0 + 0 * 4;
 	ap->ioaddr.error_addr		= base + 1 + 1 * 4;
 	ap->ioaddr.feature_addr		= base + 1 + 1 * 4;
 	ap->ioaddr.nsect_addr		= base + 1 + 2 * 4;
@@ -161,14 +178,25 @@ static int __init pata_falcon_init_one(struct platform_device *pdev)
 	ap->ioaddr.status_addr		= base + 1 + 7 * 4;
 	ap->ioaddr.command_addr		= base + 1 + 7 * 4;
 
-	ap->ioaddr.altstatus_addr	= base + ATA_HD_CONTROL;
-	ap->ioaddr.ctl_addr		= base + ATA_HD_CONTROL;
+	base = (void __iomem *)ctl_mem_res->start;
+	ap->ioaddr.altstatus_addr	= base + 1;
+	ap->ioaddr.ctl_addr		= base + 1;
 
-	ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx", (unsigned long)base,
-		      (unsigned long)base + ATA_HD_CONTROL);
+	ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
+		      (unsigned long)base_mem_res->start,
+		      (unsigned long)ctl_mem_res->start);
+
+	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (irq_res && irq_res->start > 0) {
+		irq = irq_res->start;
+	} else {
+		ap->flags |= ATA_FLAG_PIO_POLLING;
+		ata_port_desc(ap, "no IRQ, using PIO polling");
+	}
 
 	/* activate */
-	return ata_host_activate(host, 0, NULL, 0, &pata_falcon_sht);
+	return ata_host_activate(host, irq, irq ? ata_sff_interrupt : NULL,
+				 IRQF_SHARED, &pata_falcon_sht);
 }
 
 static int __exit pata_falcon_remove_one(struct platform_device *pdev)
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
index 8ce4a5878d0c7..8af1ac69e5f82 100644
--- a/drivers/ide/Kconfig
+++ b/drivers/ide/Kconfig
@@ -731,21 +731,13 @@ config BLK_DEV_BUDDHA
 	  to one of its IDE interfaces.
 
 config BLK_DEV_FALCON_IDE
-	tristate "Falcon IDE interface support"
-	depends on ATARI
+	tristate "Falcon and Q40/Q60 IDE interface support"
+	depends on ATARI || Q40
 	help
 	  This is the IDE driver for the on-board IDE interface on the Atari
-	  Falcon. Say Y if you have a Falcon and want to use IDE devices (hard
-	  disks, CD-ROM drives, etc.) that are connected to the on-board IDE
-	  interface.
-
-config BLK_DEV_Q40IDE
-	tristate "Q40/Q60 IDE interface support"
-	depends on Q40
-	help
-	  Enable the on-board IDE controller in the Q40/Q60.  This should
-	  normally be on; disable it only if you are running a custom hard
-	  drive subsystem through an expansion card.
+	  Falcon and Q40/Q60. Say Y if you have such a machine and want to use
+	  IDE devices (hard disks, CD-ROM drives, etc.) that are connected to
+	  the on-board IDE interface.
 
 config BLK_DEV_PALMCHIP_BK3710
 	tristate "Palmchip bk3710 IDE controller support"
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
index 45a1c0463bed0..991eb72a786bc 100644
--- a/drivers/ide/Makefile
+++ b/drivers/ide/Makefile
@@ -29,7 +29,6 @@ obj-$(CONFIG_BLK_DEV_4DRIVES)		+= ide-4drives.o
 
 obj-$(CONFIG_BLK_DEV_GAYLE)		+= gayle.o
 obj-$(CONFIG_BLK_DEV_FALCON_IDE)	+= falconide.o
-obj-$(CONFIG_BLK_DEV_Q40IDE)		+= q40ide.o
 obj-$(CONFIG_BLK_DEV_BUDDHA)		+= buddha.o
 
 obj-$(CONFIG_BLK_DEV_AEC62XX)		+= aec62xx.o
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index bb86d84558d9a..a73a9dc17e4d8 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -25,12 +25,7 @@
 
 #define DRV_NAME "falconide"
 
-    /*
-     *  Offsets from base address
-     */
-
-#define ATA_HD_CONTROL	0x39
-
+#ifdef CONFIG_ATARI
     /*
      *  falconide_intr_lock is used to obtain access to the IDE interrupt,
      *  which is shared between several drivers.
@@ -55,6 +50,7 @@ static void falconide_get_lock(irq_handler_t handler, void *data)
 		falconide_intr_lock = 1;
 	}
 }
+#endif
 
 static void falconide_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
 				 void *buf, unsigned int len)
@@ -98,8 +94,10 @@ static const struct ide_tp_ops falconide_tp_ops = {
 };
 
 static const struct ide_port_info falconide_port_info = {
+#ifdef CONFIG_ATARI
 	.get_lock		= falconide_get_lock,
 	.release_lock		= falconide_release_lock,
+#endif
 	.tp_ops			= &falconide_tp_ops,
 	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
 				  IDE_HFLAG_NO_DMA,
@@ -107,7 +105,8 @@ static const struct ide_port_info falconide_port_info = {
 	.chipset		= ide_generic,
 };
 
-static void __init falconide_setup_ports(struct ide_hw *hw, unsigned long base)
+static void __init falconide_setup_ports(struct ide_hw *hw, unsigned long base,
+					 unsigned long ctl, int irq)
 {
 	int i;
 
@@ -118,9 +117,9 @@ static void __init falconide_setup_ports(struct ide_hw *hw, unsigned long base)
 	for (i = 1; i < 8; i++)
 		hw->io_ports_array[i] = base + 1 + i * 4;
 
-	hw->io_ports.ctl_addr = base + ATA_HD_CONTROL;
+	hw->io_ports.ctl_addr = ctl + 1;
 
-	hw->irq = IRQ_MFP_IDE;
+	hw->irq = irq;
 }
 
     /*
@@ -129,37 +128,69 @@ static void __init falconide_setup_ports(struct ide_hw *hw, unsigned long base)
 
 static int __init falconide_init(struct platform_device *pdev)
 {
-	struct resource *res;
+	struct resource *base_mem_res, *ctl_mem_res;
+	struct resource *base_res, *ctl_res, *irq_res;
 	struct ide_host *host;
 	struct ide_hw hw, *hws[] = { &hw };
-	unsigned long base;
 	int rc;
+	int irq;
+
+	dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 IDE controller\n");
+
+	base_res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (base_res && !devm_request_region(&pdev->dev, base_res->start,
+					   resource_size(base_res), DRV_NAME)) {
+		dev_err(&pdev->dev, "resources busy\n");
+		return -EBUSY;
+	}
 
-	dev_info(&pdev->dev, "Atari Falcon IDE controller\n");
+	ctl_res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (ctl_res && !devm_request_region(&pdev->dev, ctl_res->start,
+					   resource_size(ctl_res), DRV_NAME)) {
+		dev_err(&pdev->dev, "resources busy\n");
+		return -EBUSY;
+	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
+	base_mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!base_mem_res)
 		return -ENODEV;
 
-	if (!devm_request_mem_region(&pdev->dev, res->start,
-				     resource_size(res), DRV_NAME)) {
+	if (!devm_request_mem_region(&pdev->dev, base_mem_res->start,
+				     resource_size(base_mem_res), DRV_NAME)) {
 		dev_err(&pdev->dev, "resources busy\n");
 		return -EBUSY;
 	}
 
-	base = (unsigned long)res->start;
+	ctl_mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!ctl_mem_res)
+		return -ENODEV;
+
+	if (MACH_IS_ATARI) {
+		irq = IRQ_MFP_IDE;
+	} else {
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+		if (irq_res && irq_res->start > 0)
+			irq = irq_res->start;
+		else
+			return -ENODEV;
+	}
 
-	falconide_setup_ports(&hw, base);
+	falconide_setup_ports(&hw, base_mem_res->start, ctl_mem_res->start, irq);
 
 	host = ide_host_alloc(&falconide_port_info, hws, 1);
-	if (host == NULL) {
-		rc = -ENOMEM;
-		goto err;
+	if (!host)
+		return -ENOMEM;
+
+	if (!MACH_IS_ATARI) {
+		host->get_lock = NULL;
+		host->release_lock = NULL;
 	}
 
-	falconide_get_lock(NULL, NULL);
+	if (host->get_lock)
+		host->get_lock(NULL, NULL);
 	rc = ide_host_register(host, &falconide_port_info, hws);
-	falconide_release_lock();
+	if (host->release_lock)
+		host->release_lock();
 
 	if (rc)
 		goto err_free;
@@ -168,8 +199,6 @@ static int __init falconide_init(struct platform_device *pdev)
 	return 0;
 err_free:
 	ide_host_free(host);
-err:
-	release_mem_region(res->start, resource_size(res));
 	return rc;
 }
 
diff --git a/drivers/ide/q40ide.c b/drivers/ide/q40ide.c
deleted file mode 100644
index ecd0a69245f65..0000000000000
--- a/drivers/ide/q40ide.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *  Q40 I/O port IDE Driver
- *
- *     (c) Richard Zidlicky
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of this archive for
- *  more details.
- *
- *
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/blkdev.h>
-#include <linux/ide.h>
-#include <linux/module.h>
-
-#include <asm/ide.h>
-
-    /*
-     *  Bases of the IDE interfaces
-     */
-
-#define Q40IDE_NUM_HWIFS	2
-
-#define PCIDE_BASE1	0x1f0
-#define PCIDE_BASE2	0x170
-#define PCIDE_BASE3	0x1e8
-#define PCIDE_BASE4	0x168
-#define PCIDE_BASE5	0x1e0
-#define PCIDE_BASE6	0x160
-
-static const unsigned long pcide_bases[Q40IDE_NUM_HWIFS] = {
-    PCIDE_BASE1, PCIDE_BASE2, /* PCIDE_BASE3, PCIDE_BASE4  , PCIDE_BASE5,
-    PCIDE_BASE6 */
-};
-
-static int q40ide_default_irq(unsigned long base)
-{
-           switch (base) {
-	            case 0x1f0: return 14;
-		    case 0x170: return 15;
-		    case 0x1e8: return 11;
-		    default:
-			return 0;
-	   }
-}
-
-
-/*
- * Addresses are pretranslated for Q40 ISA access.
- */
-static void q40_ide_setup_ports(struct ide_hw *hw, unsigned long base, int irq)
-{
-	memset(hw, 0, sizeof(*hw));
-	/* BIG FAT WARNING: 
-	   assumption: only DATA port is ever used in 16 bit mode */
-	hw->io_ports.data_addr = Q40_ISA_IO_W(base);
-	hw->io_ports.error_addr = Q40_ISA_IO_B(base + 1);
-	hw->io_ports.nsect_addr = Q40_ISA_IO_B(base + 2);
-	hw->io_ports.lbal_addr = Q40_ISA_IO_B(base + 3);
-	hw->io_ports.lbam_addr = Q40_ISA_IO_B(base + 4);
-	hw->io_ports.lbah_addr = Q40_ISA_IO_B(base + 5);
-	hw->io_ports.device_addr = Q40_ISA_IO_B(base + 6);
-	hw->io_ports.status_addr = Q40_ISA_IO_B(base + 7);
-	hw->io_ports.ctl_addr = Q40_ISA_IO_B(base + 0x206);
-
-	hw->irq = irq;
-}
-
-static void q40ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
-			      void *buf, unsigned int len)
-{
-	unsigned long data_addr = drive->hwif->io_ports.data_addr;
-
-	if (drive->media == ide_disk && cmd && (cmd->tf_flags & IDE_TFLAG_FS)) {
-		__ide_mm_insw(data_addr, buf, (len + 1) / 2);
-		return;
-	}
-
-	raw_insw_swapw((u16 *)data_addr, buf, (len + 1) / 2);
-}
-
-static void q40ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd,
-			       void *buf, unsigned int len)
-{
-	unsigned long data_addr = drive->hwif->io_ports.data_addr;
-
-	if (drive->media == ide_disk && cmd && (cmd->tf_flags & IDE_TFLAG_FS)) {
-		__ide_mm_outsw(data_addr, buf, (len + 1) / 2);
-		return;
-	}
-
-	raw_outsw_swapw((u16 *)data_addr, buf, (len + 1) / 2);
-}
-
-/* Q40 has a byte-swapped IDE interface */
-static const struct ide_tp_ops q40ide_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= ide_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= q40ide_input_data,
-	.output_data		= q40ide_output_data,
-};
-
-static const struct ide_port_info q40ide_port_info = {
-	.tp_ops			= &q40ide_tp_ops,
-	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
-};
-
-/* 
- * the static array is needed to have the name reported in /proc/ioports,
- * hwif->name unfortunately isn't available yet
- */
-static const char *q40_ide_names[Q40IDE_NUM_HWIFS]={
-	"ide0", "ide1"
-};
-
-/*
- *  Probe for Q40 IDE interfaces
- */
-
-static int __init q40ide_init(void)
-{
-    int i;
-    struct ide_hw hw[Q40IDE_NUM_HWIFS], *hws[] = { NULL, NULL };
-
-    if (!MACH_IS_Q40)
-      return -ENODEV;
-
-    printk(KERN_INFO "ide: Q40 IDE controller\n");
-
-    for (i = 0; i < Q40IDE_NUM_HWIFS; i++) {
-	const char *name = q40_ide_names[i];
-
-	if (!request_region(pcide_bases[i], 8, name)) {
-		printk("could not reserve ports %lx-%lx for %s\n",
-		       pcide_bases[i],pcide_bases[i]+8,name);
-		continue;
-	}
-	if (!request_region(pcide_bases[i]+0x206, 1, name)) {
-		printk("could not reserve port %lx for %s\n",
-		       pcide_bases[i]+0x206,name);
-		release_region(pcide_bases[i], 8);
-		continue;
-	}
-	q40_ide_setup_ports(&hw[i], pcide_bases[i],
-			q40ide_default_irq(pcide_bases[i]));
-
-	hws[i] = &hw[i];
-    }
-
-    return ide_host_add(&q40ide_port_info, hws, Q40IDE_NUM_HWIFS, NULL);
-}
-
-module_init(q40ide_init);
-
-MODULE_LICENSE("GPL");
-- 
GitLab


From 992da01aa932b432ef8dc3885fa76415b5dbe43f Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 10 Jun 2021 16:37:37 +0100
Subject: [PATCH 2671/3804] io_uring: change registration/upd/rsrc tagging ABI

There are ABI moments about recently added rsrc registration/update and
tagging that might become a nuisance in the future. First,
IORING_REGISTER_RSRC[_UPD] hide different types of resources under it,
so breaks fine control over them by restrictions. It works for now, but
once those are wanted under restrictions it would require a rework.

It was also inconvenient trying to fit a new resource not supporting
all the features (e.g. dynamic update) into the interface, so better
to return to IORING_REGISTER_* top level dispatching.

Second, register/update were considered to accept a type of resource,
however that's not a good idea because there might be several ways of
registration of a single resource type, e.g. we may want to add
non-contig buffers or anything more exquisite as dma mapped memory.
So, remove IORING_RSRC_[FILE,BUFFER] out of the ABI, and place them
internally for now to limit changes.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/9b554897a7c17ad6e3becc48dfed2f7af9f423d5.1623339162.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 39 ++++++++++++++++++++++++-----------
 include/uapi/linux/io_uring.h | 18 ++++++++--------
 2 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 42380ed563c41..663fef3d56dff 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -783,6 +783,11 @@ struct io_task_work {
 	task_work_func_t	func;
 };
 
+enum {
+	IORING_RSRC_FILE		= 0,
+	IORING_RSRC_BUFFER		= 1,
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -9911,7 +9916,7 @@ static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
 }
 
 static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
-				   unsigned size)
+				   unsigned size, unsigned type)
 {
 	struct io_uring_rsrc_update2 up;
 
@@ -9919,13 +9924,13 @@ static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
 		return -EINVAL;
 	if (copy_from_user(&up, arg, sizeof(up)))
 		return -EFAULT;
-	if (!up.nr)
+	if (!up.nr || up.resv)
 		return -EINVAL;
-	return __io_register_rsrc_update(ctx, up.type, &up, up.nr);
+	return __io_register_rsrc_update(ctx, type, &up, up.nr);
 }
 
 static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
-			    unsigned int size)
+			    unsigned int size, unsigned int type)
 {
 	struct io_uring_rsrc_register rr;
 
@@ -9936,10 +9941,10 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
 	memset(&rr, 0, sizeof(rr));
 	if (copy_from_user(&rr, arg, size))
 		return -EFAULT;
-	if (!rr.nr)
+	if (!rr.nr || rr.resv || rr.resv2)
 		return -EINVAL;
 
-	switch (rr.type) {
+	switch (type) {
 	case IORING_RSRC_FILE:
 		return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
 					     rr.nr, u64_to_user_ptr(rr.tags));
@@ -9961,8 +9966,10 @@ static bool io_register_op_must_quiesce(int op)
 	case IORING_REGISTER_PROBE:
 	case IORING_REGISTER_PERSONALITY:
 	case IORING_UNREGISTER_PERSONALITY:
-	case IORING_REGISTER_RSRC:
-	case IORING_REGISTER_RSRC_UPDATE:
+	case IORING_REGISTER_FILES2:
+	case IORING_REGISTER_FILES_UPDATE2:
+	case IORING_REGISTER_BUFFERS2:
+	case IORING_REGISTER_BUFFERS_UPDATE:
 		return false;
 	default:
 		return true;
@@ -10088,11 +10095,19 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	case IORING_REGISTER_RESTRICTIONS:
 		ret = io_register_restrictions(ctx, arg, nr_args);
 		break;
-	case IORING_REGISTER_RSRC:
-		ret = io_register_rsrc(ctx, arg, nr_args);
+	case IORING_REGISTER_FILES2:
+		ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE);
+		break;
+	case IORING_REGISTER_FILES_UPDATE2:
+		ret = io_register_rsrc_update(ctx, arg, nr_args,
+					      IORING_RSRC_FILE);
+		break;
+	case IORING_REGISTER_BUFFERS2:
+		ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER);
 		break;
-	case IORING_REGISTER_RSRC_UPDATE:
-		ret = io_register_rsrc_update(ctx, arg, nr_args);
+	case IORING_REGISTER_BUFFERS_UPDATE:
+		ret = io_register_rsrc_update(ctx, arg, nr_args,
+					      IORING_RSRC_BUFFER);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index e1ae466833017..48b4ddcd56ffa 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -298,8 +298,12 @@ enum {
 	IORING_UNREGISTER_PERSONALITY		= 10,
 	IORING_REGISTER_RESTRICTIONS		= 11,
 	IORING_REGISTER_ENABLE_RINGS		= 12,
-	IORING_REGISTER_RSRC			= 13,
-	IORING_REGISTER_RSRC_UPDATE		= 14,
+
+	/* extended with tagging */
+	IORING_REGISTER_FILES2			= 13,
+	IORING_REGISTER_FILES_UPDATE2		= 14,
+	IORING_REGISTER_BUFFERS2		= 15,
+	IORING_REGISTER_BUFFERS_UPDATE		= 16,
 
 	/* this goes last */
 	IORING_REGISTER_LAST
@@ -312,14 +316,10 @@ struct io_uring_files_update {
 	__aligned_u64 /* __s32 * */ fds;
 };
 
-enum {
-	IORING_RSRC_FILE		= 0,
-	IORING_RSRC_BUFFER		= 1,
-};
-
 struct io_uring_rsrc_register {
-	__u32 type;
 	__u32 nr;
+	__u32 resv;
+	__u64 resv2;
 	__aligned_u64 data;
 	__aligned_u64 tags;
 };
@@ -335,8 +335,8 @@ struct io_uring_rsrc_update2 {
 	__u32 resv;
 	__aligned_u64 data;
 	__aligned_u64 tags;
-	__u32 type;
 	__u32 nr;
+	__u32 resv2;
 };
 
 /* Skip updating fd indexes set to this value in the fd table */
-- 
GitLab


From 9690557e22d63f13534fd167d293ac8ed8b104f9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 10 Jun 2021 16:37:38 +0100
Subject: [PATCH 2672/3804] io_uring: add feature flag for rsrc tags

Add IORING_FEAT_RSRC_TAGS indicating that io_uring supports a bunch of
new IORING_REGISTER operations, in particular
IORING_REGISTER_[FILES[,UPDATE]2,BUFFERS[2,UPDATE]] that support rsrc
tagging, and also indicating implemented dynamic fixed buffer updates.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/9b995d4045b6c6b4ab7510ca124fd25ac2203af7.1623339162.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 3 ++-
 include/uapi/linux/io_uring.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 663fef3d56dff..fa8794c61af7b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -9676,7 +9676,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
 			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
 			IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
-			IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS;
+			IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
+			IORING_FEAT_RSRC_TAGS;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 48b4ddcd56ffa..162ff99ed2cb2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -280,6 +280,7 @@ struct io_uring_params {
 #define IORING_FEAT_SQPOLL_NONFIXED	(1U << 7)
 #define IORING_FEAT_EXT_ARG		(1U << 8)
 #define IORING_FEAT_NATIVE_WORKERS	(1U << 9)
+#define IORING_FEAT_RSRC_TAGS		(1U << 10)
 
 /*
  * io_uring_register(2) opcodes and arguments
-- 
GitLab


From 5e63215c2f64079fbd011df5005c8bea63f149c2 Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vitaly.wool@konsulko.com>
Date: Mon, 31 May 2021 11:53:42 +0300
Subject: [PATCH 2673/3804] riscv: xip: support runtime trap patching

RISCV_ERRATA_ALTERNATIVE patches text at runtime which is currently
not possible when the kernel is executed from the flash in XIP mode.
Since runtime patching concerns only traps at the moment, let's just
have all the traps reside in RAM anyway if RISCV_ERRATA_ALTERNATIVE
is set. Thus, these functions will be patch-able even when the .text
section is in flash.

Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/traps.c           | 13 +++++++++----
 arch/riscv/kernel/vmlinux-xip.lds.S | 15 ++++++++++++++-
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 0721b97985951..7bc88d8aab97f 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -86,8 +86,13 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code,
 	}
 }
 
+#if defined (CONFIG_XIP_KERNEL) && defined (CONFIG_RISCV_ERRATA_ALTERNATIVE)
+#define __trap_section		__section(".xip.traps")
+#else
+#define __trap_section
+#endif
 #define DO_ERROR_INFO(name, signo, code, str)				\
-asmlinkage __visible void name(struct pt_regs *regs)			\
+asmlinkage __visible __trap_section void name(struct pt_regs *regs)	\
 {									\
 	do_trap_error(regs, signo, code, regs->epc, "Oops - " str);	\
 }
@@ -111,7 +116,7 @@ DO_ERROR_INFO(do_trap_store_misaligned,
 int handle_misaligned_load(struct pt_regs *regs);
 int handle_misaligned_store(struct pt_regs *regs);
 
-asmlinkage void do_trap_load_misaligned(struct pt_regs *regs)
+asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs)
 {
 	if (!handle_misaligned_load(regs))
 		return;
@@ -119,7 +124,7 @@ asmlinkage void do_trap_load_misaligned(struct pt_regs *regs)
 		      "Oops - load address misaligned");
 }
 
-asmlinkage void do_trap_store_misaligned(struct pt_regs *regs)
+asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs)
 {
 	if (!handle_misaligned_store(regs))
 		return;
@@ -146,7 +151,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
 	return GET_INSN_LENGTH(insn);
 }
 
-asmlinkage __visible void do_trap_break(struct pt_regs *regs)
+asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
 {
 #ifdef CONFIG_KPROBES
 	if (kprobe_single_step_handler(regs))
diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S
index 4b29b9917f99d..a3ff09c4c3f91 100644
--- a/arch/riscv/kernel/vmlinux-xip.lds.S
+++ b/arch/riscv/kernel/vmlinux-xip.lds.S
@@ -99,9 +99,22 @@ SECTIONS
 	}
 	PERCPU_SECTION(L1_CACHE_BYTES)
 
-	. = ALIGN(PAGE_SIZE);
+	. = ALIGN(8);
+	.alternative : {
+		__alt_start = .;
+		*(.alternative)
+		__alt_end = .;
+	}
 	__init_end = .;
 
+	. = ALIGN(16);
+	.xip.traps : {
+		__xip_traps_start = .;
+		*(.xip.traps)
+		__xip_traps_end = .;
+	}
+
+	. = ALIGN(PAGE_SIZE);
 	.sdata : {
 		__global_pointer$ = . + 0x800;
 		*(.sdata*)
-- 
GitLab


From 42e0e0b453bc6ead49c573ed512502069627546b Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Tue, 11 May 2021 00:28:38 +0800
Subject: [PATCH 2674/3804] riscv: code patching only works on !XIP_KERNEL

Some features which need code patching such as KPROBES, DYNAMIC_FTRACE
KGDB can only work on !XIP_KERNEL. Add dependencies for these features
that rely on code patching.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index c5914e70a0fd7..18ec0f9bb8d5c 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -61,11 +61,11 @@ config RISCV
 	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_ARCH_AUDITSYSCALL
-	select HAVE_ARCH_JUMP_LABEL
-	select HAVE_ARCH_JUMP_LABEL_RELATIVE
+	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
+	select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
 	select HAVE_ARCH_KASAN if MMU && 64BIT
 	select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
-	select HAVE_ARCH_KGDB
+	select HAVE_ARCH_KGDB if !XIP_KERNEL
 	select HAVE_ARCH_KGDB_QXFER_PKT
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
 	select HAVE_ARCH_SECCOMP_FILTER
@@ -80,9 +80,9 @@ config RISCV
 	select HAVE_GCC_PLUGINS
 	select HAVE_GENERIC_VDSO if MMU && 64BIT
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select HAVE_KPROBES
-	select HAVE_KPROBES_ON_FTRACE
-	select HAVE_KRETPROBES
+	select HAVE_KPROBES if !XIP_KERNEL
+	select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
+	select HAVE_KRETPROBES if !XIP_KERNEL
 	select HAVE_PCI
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_REGS
@@ -231,11 +231,11 @@ config ARCH_RV64I
 	bool "RV64I"
 	select 64BIT
 	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000
-	select HAVE_DYNAMIC_FTRACE if MMU && $(cc-option,-fpatchable-function-entry=8)
+	select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8)
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
-	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
 	select HAVE_FUNCTION_GRAPH_TRACER
-	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
 	select SWIOTLB if MMU
 
 endchoice
-- 
GitLab


From 72f961320d5d15bfcb26dbe3edaa3f7d25fd2c8a Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 10 Jun 2021 15:59:40 -0700
Subject: [PATCH 2675/3804] mptcp: try harder to borrow memory from subflow
 under pressure

If the host is under sever memory pressure, and RX forward
memory allocation for the msk fails, we try to borrow the
required memory from the ingress subflow.

The current attempt is a bit flaky: if skb->truesize is less
than SK_MEM_QUANTUM, the ssk will not release any memory, and
the next schedule will fail again.

Instead, directly move the required amount of pages from the
ssk to the msk, if available

Fixes: 9c3f94e1681b ("mptcp: add missing memory scheduling in the rx path")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5edc686faff15..534cf500521df 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -280,11 +280,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 
 	/* try to fetch required memory from subflow */
 	if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
-		if (ssk->sk_forward_alloc < skb->truesize)
-			goto drop;
-		__sk_mem_reclaim(ssk, skb->truesize);
-		if (!sk_rmem_schedule(sk, skb, skb->truesize))
+		int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
+
+		if (ssk->sk_forward_alloc < amount)
 			goto drop;
+
+		ssk->sk_forward_alloc -= amount;
+		sk->sk_forward_alloc += amount;
 	}
 
 	/* the skb map_seq accounts for the skb offset:
-- 
GitLab


From 99d1055ce2469dca3dd14be0991ff8133e25e3d0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 10 Jun 2021 15:59:41 -0700
Subject: [PATCH 2676/3804] mptcp: wake-up readers only for in sequence data

Currently we rely on the subflow->data_avail field, which is subject to
races:

	ssk1
		skb len = 500 DSS(seq=1, len=1000, off=0)
		# data_avail == MPTCP_SUBFLOW_DATA_AVAIL

	ssk2
		skb len = 500 DSS(seq = 501, len=1000)
		# data_avail == MPTCP_SUBFLOW_DATA_AVAIL

	ssk1
		skb len = 500 DSS(seq = 1, len=1000, off =500)
		# still data_avail == MPTCP_SUBFLOW_DATA_AVAIL,
		# as the skb is covered by a pre-existing map,
		# which was in-sequence at reception time.

Instead we can explicitly check if some has been received in-sequence,
propagating the info from __mptcp_move_skbs_from_subflow().

Additionally add the 'ONCE' annotation to the 'data_avail' memory
access, as msk will read it outside the subflow socket lock.

Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 33 ++++++++++++---------------------
 net/mptcp/protocol.h |  1 -
 net/mptcp/subflow.c  | 23 +++++++++--------------
 3 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 534cf500521df..f6e62a6dc9fbd 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -670,15 +670,13 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
 /* In most cases we will be able to lock the mptcp socket.  If its already
  * owned, we need to defer to the work queue to avoid ABBA deadlock.
  */
-static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 {
 	struct sock *sk = (struct sock *)msk;
 	unsigned int moved = 0;
 
 	if (inet_sk_state_load(sk) == TCP_CLOSE)
-		return;
-
-	mptcp_data_lock(sk);
+		return false;
 
 	__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
 	__mptcp_ofo_queue(msk);
@@ -690,7 +688,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 	 */
 	if (mptcp_pending_data_fin(sk, NULL))
 		mptcp_schedule_work(sk);
-	mptcp_data_unlock(sk);
+	return moved > 0;
 }
 
 void mptcp_data_ready(struct sock *sk, struct sock *ssk)
@@ -698,7 +696,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	int sk_rbuf, ssk_rbuf;
-	bool wake;
 
 	/* The peer can send data while we are shutting down this
 	 * subflow at msk destruction time, but we must avoid enqueuing
@@ -707,28 +704,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
 	if (unlikely(subflow->disposable))
 		return;
 
-	/* move_skbs_to_msk below can legitly clear the data_avail flag,
-	 * but we will need later to properly woke the reader, cache its
-	 * value
-	 */
-	wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
-	if (wake)
-		set_bit(MPTCP_DATA_READY, &msk->flags);
-
 	ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
 	sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
 	if (unlikely(ssk_rbuf > sk_rbuf))
 		sk_rbuf = ssk_rbuf;
 
-	/* over limit? can't append more skbs to msk */
+	/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
 	if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
-		goto wake;
-
-	move_skbs_to_msk(msk, ssk);
+		return;
 
-wake:
-	if (wake)
+	/* Wake-up the reader only for in-sequence data */
+	mptcp_data_lock(sk);
+	if (move_skbs_to_msk(msk, ssk)) {
+		set_bit(MPTCP_DATA_READY, &msk->flags);
 		sk->sk_data_ready(sk);
+	}
+	mptcp_data_unlock(sk);
 }
 
 static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
@@ -860,7 +851,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
 	sock_owned_by_me(sk);
 
 	mptcp_for_each_subflow(msk, subflow) {
-		if (subflow->data_avail)
+		if (READ_ONCE(subflow->data_avail))
 			return mptcp_subflow_tcp_sock(subflow);
 	}
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 0c6f99c673457..385796f0ef19b 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -362,7 +362,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
 enum mptcp_data_avail {
 	MPTCP_SUBFLOW_NODATA,
 	MPTCP_SUBFLOW_DATA_AVAIL,
-	MPTCP_SUBFLOW_OOO_DATA
 };
 
 struct mptcp_delegated_action {
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index ef3d037f984a9..ebb898acd65ab 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1000,7 +1000,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
 	struct sk_buff *skb;
 
 	if (!skb_peek(&ssk->sk_receive_queue))
-		subflow->data_avail = 0;
+		WRITE_ONCE(subflow->data_avail, 0);
 	if (subflow->data_avail)
 		return true;
 
@@ -1039,18 +1039,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
 		ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
 		pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
 			 ack_seq);
-		if (ack_seq == old_ack) {
-			subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
-			break;
-		} else if (after64(ack_seq, old_ack)) {
-			subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
-			break;
+		if (unlikely(before64(ack_seq, old_ack))) {
+			mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+			continue;
 		}
 
-		/* only accept in-sequence mapping. Old values are spurious
-		 * retransmission
-		 */
-		mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+		WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+		break;
 	}
 	return true;
 
@@ -1070,7 +1065,7 @@ fallback:
 		subflow->reset_transient = 0;
 		subflow->reset_reason = MPTCP_RST_EMPTCP;
 		tcp_send_active_reset(ssk, GFP_ATOMIC);
-		subflow->data_avail = 0;
+		WRITE_ONCE(subflow->data_avail, 0);
 		return false;
 	}
 
@@ -1080,7 +1075,7 @@ fallback:
 	subflow->map_seq = READ_ONCE(msk->ack_seq);
 	subflow->map_data_len = skb->len;
 	subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
-	subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+	WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
 	return true;
 }
 
@@ -1092,7 +1087,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
 	if (subflow->map_valid &&
 	    mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
 		subflow->map_valid = 0;
-		subflow->data_avail = 0;
+		WRITE_ONCE(subflow->data_avail, 0);
 
 		pr_debug("Done with mapping: seq=%u data_len=%u",
 			 subflow->map_subflow_seq,
-- 
GitLab


From 61e710227e97172355d5f150d5c78c64175d9fb2 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 10 Jun 2021 15:59:42 -0700
Subject: [PATCH 2677/3804] mptcp: do not warn on bad input from the network

warn_bad_map() produces a kernel WARN on bad input coming
from the network. Use pr_debug() to avoid spamming the system
log.

Additionally, when the right bound check fails, warn_bad_map() reports
the wrong ssn value, let's fix it.

Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/107
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index ebb898acd65ab..e05e05ec9687e 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -784,10 +784,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
 	return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
 }
 
-static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
+static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
 {
-	WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
-		  ssn, subflow->map_subflow_seq, subflow->map_data_len);
+	pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+		 ssn, subflow->map_subflow_seq, subflow->map_data_len);
 }
 
 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
@@ -812,13 +812,13 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
 		/* Mapping covers data later in the subflow stream,
 		 * currently unsupported.
 		 */
-		warn_bad_map(subflow, ssn);
+		dbg_bad_map(subflow, ssn);
 		return false;
 	}
 	if (unlikely(!before(ssn, subflow->map_subflow_seq +
 				  subflow->map_data_len))) {
 		/* Mapping does covers past subflow data, invalid */
-		warn_bad_map(subflow, ssn + skb->len);
+		dbg_bad_map(subflow, ssn);
 		return false;
 	}
 	return true;
-- 
GitLab


From 2395da0e17935ce9158cdfae433962bdb6cbfa67 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 10 Jun 2021 15:59:43 -0700
Subject: [PATCH 2678/3804] selftests: mptcp: enable syncookie only in absence
 of reorders

Syncookie validation may fail for OoO packets, causing spurious
resets and self-tests failures, so let's force syncookie only
for tests iteration with no OoO.

Fixes: fed61c4b584c ("selftests: mptcp: make 2nd net namespace use tcp syn cookies unconditionally")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/198
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 9ca5f1ba461ec..2b495dc8d78ec 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -197,9 +197,6 @@ ip -net "$ns4" link set ns4eth3 up
 ip -net "$ns4" route add default via 10.0.3.2
 ip -net "$ns4" route add default via dead:beef:3::2
 
-# use TCP syn cookies, even if no flooding was detected.
-ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
-
 set_ethtool_flags() {
 	local ns="$1"
 	local dev="$2"
@@ -737,6 +734,14 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
 		exit $ret
 	fi
 
+	# ns1<->ns2 is not subject to reordering/tc delays. Use it to test
+	# mptcp syncookie support.
+	if [ $sender = $ns1 ]; then
+		ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+	else
+		ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
+	fi
+
 	run_tests "$ns2" $sender 10.0.1.2
 	run_tests "$ns2" $sender dead:beef:1::2
 	run_tests "$ns2" $sender 10.0.2.1
-- 
GitLab


From 499ada5073361c631f2a3c4a8aed44d53b6f82ec Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 10 Jun 2021 15:59:44 -0700
Subject: [PATCH 2679/3804] mptcp: fix soft lookup in subflow_error_report()

Maxim reported a soft lookup in subflow_error_report():

 watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [swapper/0:0]
 RIP: 0010:native_queued_spin_lock_slowpath
 RSP: 0018:ffffa859c0003bc0 EFLAGS: 00000202
 RAX: 0000000000000101 RBX: 0000000000000001 RCX: 0000000000000000
 RDX: ffff9195c2772d88 RSI: 0000000000000000 RDI: ffff9195c2772d88
 RBP: ffff9195c2772d00 R08: 00000000000067b0 R09: c6e31da9eb1e44f4
 R10: ffff9195ef379700 R11: ffff9195edb50710 R12: ffff9195c2772d88
 R13: ffff9195f500e3d0 R14: ffff9195ef379700 R15: ffff9195ef379700
 FS:  0000000000000000(0000) GS:ffff91961f400000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000000c000407000 CR3: 0000000002988000 CR4: 00000000000006f0
 Call Trace:
  <IRQ>
 _raw_spin_lock_bh
 subflow_error_report
 mptcp_subflow_data_available
 __mptcp_move_skbs_from_subflow
 mptcp_data_ready
 tcp_data_queue
 tcp_rcv_established
 tcp_v4_do_rcv
 tcp_v4_rcv
 ip_protocol_deliver_rcu
 ip_local_deliver_finish
 __netif_receive_skb_one_core
 netif_receive_skb
 rtl8139_poll 8139too
 __napi_poll
 net_rx_action
 __do_softirq
 __irq_exit_rcu
 common_interrupt
  </IRQ>

The calling function - mptcp_subflow_data_available() - can be invoked
from different contexts:
- plain ssk socket lock
- ssk socket lock + mptcp_data_lock
- ssk socket lock + mptcp_data_lock + msk socket lock.

Since subflow_error_report() tries to acquire the mptcp_data_lock, the
latter two call chains will cause soft lookup.

This change addresses the issue moving the error reporting call to
outer functions, where the held locks list is known and the we can
acquire only the needed one.

Reported-by: Maxim Galaganov <max@internet.ru>
Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk")
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/199
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c |  9 ++++++
 net/mptcp/subflow.c  | 75 +++++++++++++++++++++++---------------------
 2 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f6e62a6dc9fbd..632350018fb66 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -680,6 +680,12 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 
 	__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
 	__mptcp_ofo_queue(msk);
+	if (unlikely(ssk->sk_err)) {
+		if (!sock_owned_by_user(sk))
+			__mptcp_error_report(sk);
+		else
+			set_bit(MPTCP_ERROR_REPORT,  &msk->flags);
+	}
 
 	/* If the moves have caught up with the DATA_FIN sequence number
 	 * it's time to ack the DATA_FIN and change socket state, but
@@ -1948,6 +1954,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
 		done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
 		mptcp_data_unlock(sk);
 		tcp_cleanup_rbuf(ssk, moved);
+
+		if (unlikely(ssk->sk_err))
+			__mptcp_error_report(sk);
 		unlock_sock_fast(ssk, slowpath);
 	} while (!done);
 
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index e05e05ec9687e..be1de4084196b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1060,7 +1060,6 @@ fallback:
 		 * subflow_error_report() will introduce the appropriate barriers
 		 */
 		ssk->sk_err = EBADMSG;
-		ssk->sk_error_report(ssk);
 		tcp_set_state(ssk, TCP_CLOSE);
 		subflow->reset_transient = 0;
 		subflow->reset_reason = MPTCP_RST_EMPTCP;
@@ -1115,41 +1114,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
 	*full_space = tcp_full_space(sk);
 }
 
-static void subflow_data_ready(struct sock *sk)
-{
-	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-	u16 state = 1 << inet_sk_state_load(sk);
-	struct sock *parent = subflow->conn;
-	struct mptcp_sock *msk;
-
-	msk = mptcp_sk(parent);
-	if (state & TCPF_LISTEN) {
-		/* MPJ subflow are removed from accept queue before reaching here,
-		 * avoid stray wakeups
-		 */
-		if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
-			return;
-
-		set_bit(MPTCP_DATA_READY, &msk->flags);
-		parent->sk_data_ready(parent);
-		return;
-	}
-
-	WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
-		     !subflow->mp_join && !(state & TCPF_CLOSE));
-
-	if (mptcp_subflow_data_available(sk))
-		mptcp_data_ready(parent, sk);
-}
-
-static void subflow_write_space(struct sock *ssk)
-{
-	struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
-
-	mptcp_propagate_sndbuf(sk, ssk);
-	mptcp_write_space(sk);
-}
-
 void __mptcp_error_report(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow;
@@ -1190,6 +1154,43 @@ static void subflow_error_report(struct sock *ssk)
 	mptcp_data_unlock(sk);
 }
 
+static void subflow_data_ready(struct sock *sk)
+{
+	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+	u16 state = 1 << inet_sk_state_load(sk);
+	struct sock *parent = subflow->conn;
+	struct mptcp_sock *msk;
+
+	msk = mptcp_sk(parent);
+	if (state & TCPF_LISTEN) {
+		/* MPJ subflow are removed from accept queue before reaching here,
+		 * avoid stray wakeups
+		 */
+		if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+			return;
+
+		set_bit(MPTCP_DATA_READY, &msk->flags);
+		parent->sk_data_ready(parent);
+		return;
+	}
+
+	WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+		     !subflow->mp_join && !(state & TCPF_CLOSE));
+
+	if (mptcp_subflow_data_available(sk))
+		mptcp_data_ready(parent, sk);
+	else if (unlikely(sk->sk_err))
+		subflow_error_report(sk);
+}
+
+static void subflow_write_space(struct sock *ssk)
+{
+	struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+	mptcp_propagate_sndbuf(sk, ssk);
+	mptcp_write_space(sk);
+}
+
 static struct inet_connection_sock_af_ops *
 subflow_default_af_ops(struct sock *sk)
 {
@@ -1500,6 +1501,8 @@ static void subflow_state_change(struct sock *sk)
 	 */
 	if (mptcp_subflow_data_available(sk))
 		mptcp_data_ready(parent, sk);
+	else if (unlikely(sk->sk_err))
+		subflow_error_report(sk);
 
 	subflow_sched_work_if_closed(mptcp_sk(parent), sk);
 
-- 
GitLab


From 96f1b00138cb8f04c742c82d0a7c460b2202e887 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Tue, 8 Jun 2021 19:39:25 -0700
Subject: [PATCH 2680/3804] ARCv2: save ABI registers across signal handling

ARCv2 has some configuration dependent registers (r30, r58, r59) which
could be targetted by the compiler. To keep the ABI stable, these were
unconditionally part of the glibc ABI
(sysdeps/unix/sysv/linux/arc/sys/ucontext.h:mcontext_t) however we
missed populating them (by saving/restoring them across signal
handling).

This patch fixes the issue by
 - adding arcv2 ABI regs to kernel struct sigcontext
 - populating them during signal handling

Change to struct sigcontext might seem like a glibc ABI change (although
it primarily uses ucontext_t:mcontext_t) but the fact is
 - it has only been extended (existing fields are not touched)
 - the old sigcontext was ABI incomplete to begin with anyways

Fixes: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/53
Cc: <stable@vger.kernel.org>
Tested-by: kernel test robot <lkp@intel.com>
Reported-by: Vladimir Isaev <isaev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/uapi/asm/sigcontext.h |  1 +
 arch/arc/kernel/signal.c               | 43 ++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/arch/arc/include/uapi/asm/sigcontext.h b/arch/arc/include/uapi/asm/sigcontext.h
index 95f8a4380e110..7a5449dfcb290 100644
--- a/arch/arc/include/uapi/asm/sigcontext.h
+++ b/arch/arc/include/uapi/asm/sigcontext.h
@@ -18,6 +18,7 @@
  */
 struct sigcontext {
 	struct user_regs_struct regs;
+	struct user_regs_arcv2 v2abi;
 };
 
 #endif /* _ASM_ARC_SIGCONTEXT_H */
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index b3ccb9e5ffe42..cb2f88502bafe 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -61,6 +61,41 @@ struct rt_sigframe {
 	unsigned int sigret_magic;
 };
 
+static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+	int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+	struct user_regs_arcv2 v2abi;
+
+	v2abi.r30 = regs->r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	v2abi.r58 = regs->r58;
+	v2abi.r59 = regs->r59;
+#else
+	v2abi.r58 = v2abi.r59 = 0;
+#endif
+	err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi));
+#endif
+	return err;
+}
+
+static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs)
+{
+	int err = 0;
+#ifndef CONFIG_ISA_ARCOMPACT
+	struct user_regs_arcv2 v2abi;
+
+	err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi));
+
+	regs->r30 = v2abi.r30;
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+	regs->r58 = v2abi.r58;
+	regs->r59 = v2abi.r59;
+#endif
+#endif
+	return err;
+}
+
 static int
 stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
 	       sigset_t *set)
@@ -94,6 +129,10 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
 
 	err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch,
 			     sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+	if (is_isa_arcv2())
+		err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
 	err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
 
 	return err ? -EFAULT : 0;
@@ -109,6 +148,10 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
 	err |= __copy_from_user(&uregs.scratch,
 				&(sf->uc.uc_mcontext.regs.scratch),
 				sizeof(sf->uc.uc_mcontext.regs.scratch));
+
+	if (is_isa_arcv2())
+		err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs);
+
 	if (err)
 		return -EFAULT;
 
-- 
GitLab


From 110febc0148f8ab867344061d5cf95ee1e1ebb3e Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 26 Feb 2021 10:35:39 -0800
Subject: [PATCH 2681/3804] ARC: fix CONFIG_HARDENED_USERCOPY

Currently enabling this triggers a warning

| usercopy: Kernel memory overwrite attempt detected to kernel text (offset 155633, size 11)!
| usercopy: BUG: failure at mm/usercopy.c:99/usercopy_abort()!
|
|gcc generated __builtin_trap
|Path: /bin/busybox
|CPU: 0 PID: 84 Comm: init Not tainted 5.4.22
|
|[ECR ]: 0x00090005 => gcc generated __builtin_trap
|[EFA ]: 0x9024fcaa
|[BLINK ]: usercopy_abort+0x8a/0x8c
|[ERET ]: memfd_fcntl+0x0/0x470
|[STAT32]: 0x80080802 : IE K
|...
|...
|Stack Trace:
| memfd_fcntl+0x0/0x470
| usercopy_abort+0x8a/0x8c
| __check_object_size+0x10e/0x138
| copy_strings+0x1f4/0x38c
| __do_execve_file+0x352/0x848
| EV_Trap+0xcc/0xd0

The issue is triggered by an allocation in "init reclaimed" region.
ARC _stext emcompasses the init region (for historical reasons we wanted
the init.text to be under .text as well). This however trips up
__check_object_size()->check_kernel_text_object() which treats this as
object bleeding into kernel text.

Fix that by rezoning _stext to start from regular kernel .text and leave
out .init altogether.

Fixes: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/15
Reported-by: Evgeniy Didin <didin@synopsys.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/vmlinux.lds.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index 33ce59d914619..e2146a8da1953 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -57,7 +57,6 @@ SECTIONS
 	.init.ramfs : { INIT_RAM_FS }
 
 	. = ALIGN(PAGE_SIZE);
-	_stext = .;
 
 	HEAD_TEXT_SECTION
 	INIT_TEXT_SECTION(L1_CACHE_BYTES)
@@ -83,6 +82,7 @@ SECTIONS
 
 	.text : {
 		_text = .;
+		_stext = .;
 		TEXT_TEXT
 		SCHED_TEXT
 		CPUIDLE_TEXT
-- 
GitLab


From 9be148e408df7d361ec5afd6299b7736ff3928b0 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Fri, 28 May 2021 14:16:38 +0800
Subject: [PATCH 2682/3804] async_xor: check src_offs is not NULL before
 updating it

When PAGE_SIZE is greater than 4kB, multiple stripes may share the same
page. Thus, src_offs is added to async_xor_offs() with array of offsets.
However, async_xor() passes NULL src_offs to async_xor_offs(). In such
case, src_offs should not be updated. Add a check before the update.

Fixes: ceaf2966ab08(async_xor: increase src_offs when dropping destination page)
Cc: stable@vger.kernel.org # v5.10+
Reported-by: Oleksandr Shchirskyi <oleksandr.shchirskyi@linux.intel.com>
Tested-by: Oleksandr Shchirskyi <oleksandr.shchirskyi@intel.com>
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Song Liu <song@kernel.org>
---
 crypto/async_tx/async_xor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 6cd7f7025df47..d8a91521144e0 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -233,7 +233,8 @@ async_xor_offs(struct page *dest, unsigned int offset,
 		if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
 			src_cnt--;
 			src_list++;
-			src_offs++;
+			if (src_offs)
+				src_offs++;
 		}
 
 		/* wait for any prerequisite operations */
-- 
GitLab


From 858cf860494fab545abfa206d17efcb8bee73e36 Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vitaly.wool@konsulko.com>
Date: Mon, 31 May 2021 12:33:10 +0300
Subject: [PATCH 2683/3804] riscv: alternative: fix typo in macro name

alternative-macros.h defines ALT_NEW_CONTENT in its assembly part
and ALT_NEW_CONSTENT in the C part. Most likely it is the latter
that is wrong.

Fixes: 6f4eea90465ad
	(riscv: Introduce alternative mechanism to apply errata solution)
Signed-off-by: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/alternative-macros.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index 88c08705f64aa..67406c3763890 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -51,7 +51,7 @@
 	REG_ASM " " newlen "\n" \
 	".word " errata_id "\n"
 
-#define ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) \
+#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \
 	".if " __stringify(enable) " == 1\n"				\
 	".pushsection .alternative, \"a\"\n"				\
 	ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \
@@ -69,7 +69,7 @@
 	"886 :\n"	\
 	old_c "\n"	\
 	"887 :\n"	\
-	ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c)
+	ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c)
 
 #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k)	\
 	__ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k))
-- 
GitLab


From 2d49b721dc18c113d5221f4cf5a6104eb66cb7f2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 10 Jun 2021 09:04:29 +0200
Subject: [PATCH 2684/3804] objtool: Only rewrite unconditional retpoline thunk
 calls

It turns out that the compilers generate conditional branches to the
retpoline thunks like:

  5d5:   0f 85 00 00 00 00       jne    5db <cpuidle_reflect+0x22>
	5d7: R_X86_64_PLT32     __x86_indirect_thunk_r11-0x4

while the rewrite can only handle JMP/CALL to the thunks. The result
is the alternative wrecking the code. Make sure to skip writing the
alternatives for conditional branches.

Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
Reported-by: Lukasz Majczak <lma@semihalf.com>
Reported-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
---
 tools/objtool/arch/x86/decode.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 24295d39713b2..523aa4157f801 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -747,6 +747,10 @@ int arch_rewrite_retpolines(struct objtool_file *file)
 
 	list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
 
+		if (insn->type != INSN_JUMP_DYNAMIC &&
+		    insn->type != INSN_CALL_DYNAMIC)
+			continue;
+
 		if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
 			continue;
 
-- 
GitLab


From 57c126661f50b884d3812e7db6e00f2e778eccfb Mon Sep 17 00:00:00 2001
From: Tong Tiangen <tongtiangen@huawei.com>
Date: Tue, 1 Jun 2021 18:01:55 +0800
Subject: [PATCH 2685/3804] crypto: nitrox - fix unchecked variable in
 nitrox_register_interrupts

Function nitrox_register_interrupts leaves variable 'nr_vecs' unchecked, which
would be use as kcalloc parameter later.

Fixes: 5155e118dda9 ("crypto: cavium/nitrox - use pci_alloc_irq_vectors() while enabling MSI-X.")
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/nitrox/nitrox_isr.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.c b/drivers/crypto/cavium/nitrox/nitrox_isr.c
index c288c4b51783d..f19e520da6d0c 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_isr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_isr.c
@@ -307,6 +307,10 @@ int nitrox_register_interrupts(struct nitrox_device *ndev)
 	 * Entry 192: NPS_CORE_INT_ACTIVE
 	 */
 	nr_vecs = pci_msix_vec_count(pdev);
+	if (nr_vecs < 0) {
+		dev_err(DEV(ndev), "Error in getting vec count %d\n", nr_vecs);
+		return nr_vecs;
+	}
 
 	/* Enable MSI-X */
 	ret = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
-- 
GitLab


From d950cd1b1c204c4a72e08c7c13862451f9d7d902 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Tue, 1 Jun 2021 22:51:17 +0800
Subject: [PATCH 2686/3804] crypto: omap-des - using pm_runtime_resume_and_get
 instead of pm_runtime_get_sync

Using pm_runtime_resume_and_get() to replace pm_runtime_get_sync and
pm_runtime_put_noidle. This change is just to simplify the code, no
actual functional changes.

Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-des.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index c9d38bcfd1c77..bc8631363d725 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -229,9 +229,8 @@ static int omap_des_hw_init(struct omap_des_dev *dd)
 	 * It may be long delays between requests.
 	 * Device might go to off mode to save power.
 	 */
-	err = pm_runtime_get_sync(dd->dev);
+	err = pm_runtime_resume_and_get(dd->dev);
 	if (err < 0) {
-		pm_runtime_put_noidle(dd->dev);
 		dev_err(dd->dev, "%s: failed to get_sync(%d)\n", __func__, err);
 		return err;
 	}
@@ -994,9 +993,8 @@ static int omap_des_probe(struct platform_device *pdev)
 	pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY);
 
 	pm_runtime_enable(dev);
-	err = pm_runtime_get_sync(dev);
+	err = pm_runtime_resume_and_get(dev);
 	if (err < 0) {
-		pm_runtime_put_noidle(dev);
 		dev_err(dd->dev, "%s: failed to get_sync(%d)\n", __func__, err);
 		goto err_get;
 	}
@@ -1124,9 +1122,8 @@ static int omap_des_resume(struct device *dev)
 {
 	int err;
 
-	err = pm_runtime_get_sync(dev);
+	err = pm_runtime_resume_and_get(dev);
 	if (err < 0) {
-		pm_runtime_put_noidle(dev);
 		dev_err(dev, "%s: failed to get_sync(%d)\n", __func__, err);
 		return err;
 	}
-- 
GitLab


From ca323b2c61ec321eb9f2179a405b9c34cdb4f553 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Tue, 1 Jun 2021 22:51:18 +0800
Subject: [PATCH 2687/3804] crypto: omap-sham - Fix PM reference leak in omap
 sham ops

pm_runtime_get_sync will increment pm usage counter
even it failed. Forgetting to putting operation will
result in reference leak here. We fix it by replacing
it with pm_runtime_resume_and_get to keep usage counter
balanced.

Fixes: 604c31039dae4 ("crypto: omap-sham - Check for return value from pm_runtime_get_sync")
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-sham.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index ae0d320d3c60d..dd53ad9987b0d 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -372,7 +372,7 @@ static int omap_sham_hw_init(struct omap_sham_dev *dd)
 {
 	int err;
 
-	err = pm_runtime_get_sync(dd->dev);
+	err = pm_runtime_resume_and_get(dd->dev);
 	if (err < 0) {
 		dev_err(dd->dev, "failed to get sync: %d\n", err);
 		return err;
@@ -2244,7 +2244,7 @@ static int omap_sham_suspend(struct device *dev)
 
 static int omap_sham_resume(struct device *dev)
 {
-	int err = pm_runtime_get_sync(dev);
+	int err = pm_runtime_resume_and_get(dev);
 	if (err < 0) {
 		dev_err(dev, "failed to get sync: %d\n", err);
 		return err;
-- 
GitLab


From 124d77c22c6183c76aa4bb71c29ee0c842562a5f Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 1 Jun 2021 15:11:28 +0000
Subject: [PATCH 2688/3804] dt-bindings: crypto: Add documentation for
 sl3516-ce

This patch adds documentation for Device-Tree bindings for the
SL3516-ce cryptographic offloader driver.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/cortina,sl3516-crypto.yaml         | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml

diff --git a/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml b/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml
new file mode 100644
index 0000000000000..b633b8d0e6f05
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/cortina,sl3516-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SL3516 cryptographic offloader driver
+
+maintainers:
+  - Corentin Labbe <clabbe@baylibre.com>
+
+properties:
+  compatible:
+    enum:
+      - cortina,sl3516-crypto
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/clock/cortina,gemini-clock.h>
+    #include <dt-bindings/reset/cortina,gemini-reset.h>
+
+    crypto@62000000 {
+        compatible = "cortina,sl3516-crypto";
+        reg = <0x62000000 0x10000>;
+        interrupts = <7 IRQ_TYPE_EDGE_RISING>;
+        resets = <&syscon GEMINI_RESET_SECURITY>;
+        clocks = <&syscon GEMINI_CLK_GATE_SECURITY>;
+    };
-- 
GitLab


From 46c5338db7bd45b2cf99570560f00389d60fd6b4 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 1 Jun 2021 15:11:29 +0000
Subject: [PATCH 2689/3804] crypto: sl3516 - Add sl3516 crypto engine

The cortina/gemini SL3516 SoC has a crypto IP name either (crypto
engine/crypto acceleration engine in the datasheet).
It support many algorithms like [AES|DES|3DES][ECB|CBC], SHA1, MD5 and
some HMAC.

This patch adds the core files and support for ecb(aes) and the RNG.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig                   |  19 +
 drivers/crypto/Makefile                  |   1 +
 drivers/crypto/gemini/Makefile           |   2 +
 drivers/crypto/gemini/sl3516-ce-cipher.c | 388 ++++++++++++++++
 drivers/crypto/gemini/sl3516-ce-core.c   | 535 +++++++++++++++++++++++
 drivers/crypto/gemini/sl3516-ce-rng.c    |  61 +++
 drivers/crypto/gemini/sl3516-ce.h        | 347 +++++++++++++++
 7 files changed, 1353 insertions(+)
 create mode 100644 drivers/crypto/gemini/Makefile
 create mode 100644 drivers/crypto/gemini/sl3516-ce-cipher.c
 create mode 100644 drivers/crypto/gemini/sl3516-ce-core.c
 create mode 100644 drivers/crypto/gemini/sl3516-ce-rng.c
 create mode 100644 drivers/crypto/gemini/sl3516-ce.h

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 1d5b342e6b424..99b0907901788 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -266,6 +266,25 @@ config CRYPTO_DEV_NIAGARA2
 	  Group, which can perform encryption, decryption, hashing,
 	  checksumming, and raw copies.
 
+config CRYPTO_DEV_SL3516
+	tristate "Stormlink SL3516 crypto offloader"
+	select CRYPTO_SKCIPHER
+	select CRYPTO_ENGINE
+	select CRYPTO_ECB
+	select CRYPTO_AES
+	select HW_RANDOM
+	help
+	  This option allows you to have support for SL3516 crypto offloader.
+
+config CRYPTO_DEV_SL3516_DEBUG
+	bool "Enable SL3516 stats"
+	depends on CRYPTO_DEV_SL3516
+	depends on DEBUG_FS
+	help
+	  Say y to enable SL3516 debug stats.
+	  This will create /sys/kernel/debug/sl3516/stats for displaying
+	  the number of requests per algorithm and other internal stats.
+
 config CRYPTO_DEV_HIFN_795X
 	tristate "Driver HIFN 795x crypto accelerator chips"
 	select CRYPTO_LIB_DES
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index fa22cb19e242a..1fe5120eb9663 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
 obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_SA2UL) += sa2ul.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
+obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/
 obj-$(CONFIG_ARCH_STM32) += stm32/
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
diff --git a/drivers/crypto/gemini/Makefile b/drivers/crypto/gemini/Makefile
new file mode 100644
index 0000000000000..c73c8b69260de
--- /dev/null
+++ b/drivers/crypto/gemini/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_SL3516) += sl3516-ce.o
+sl3516-ce-y += sl3516-ce-core.o sl3516-ce-cipher.o sl3516-ce-rng.o
diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
new file mode 100644
index 0000000000000..0b34a4971e498
--- /dev/null
+++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sl3516-ce-cipher.c - hardware cryptographic offloader for Stormlink SL3516 SoC
+ *
+ * Copyright (C) 2021 Corentin LABBE <clabbe@baylibre.com>
+ *
+ * This file adds support for AES cipher with 128,192,256 bits keysize in
+ * ECB mode.
+ */
+
+#include <linux/crypto.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/io.h>
+#include <linux/pm_runtime.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/skcipher.h>
+#include "sl3516-ce.h"
+
+/* sl3516_ce_need_fallback - check if a request can be handled by the CE */
+static bool sl3516_ce_need_fallback(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sl3516_ce_dev *ce = op->ce;
+	struct scatterlist *in_sg = areq->src;
+	struct scatterlist *out_sg = areq->dst;
+	struct scatterlist *sg;
+
+	if (areq->cryptlen == 0 || areq->cryptlen % 16) {
+		ce->fallback_mod16++;
+		return true;
+	}
+
+	/*
+	 * check if we have enough descriptors for TX
+	 * Note: TX need one control desc for each SG
+	 */
+	if (sg_nents(areq->src) > MAXDESC / 2) {
+		ce->fallback_sg_count_tx++;
+		return true;
+	}
+	/* check if we have enough descriptors for RX */
+	if (sg_nents(areq->dst) > MAXDESC) {
+		ce->fallback_sg_count_rx++;
+		return true;
+	}
+
+	sg = areq->src;
+	while (sg) {
+		if ((sg->length % 16) != 0) {
+			ce->fallback_mod16++;
+			return true;
+		}
+		if ((sg_dma_len(sg) % 16) != 0) {
+			ce->fallback_mod16++;
+			return true;
+		}
+		if (!IS_ALIGNED(sg->offset, 16)) {
+			ce->fallback_align16++;
+			return true;
+		}
+		sg = sg_next(sg);
+	}
+	sg = areq->dst;
+	while (sg) {
+		if ((sg->length % 16) != 0) {
+			ce->fallback_mod16++;
+			return true;
+		}
+		if ((sg_dma_len(sg) % 16) != 0) {
+			ce->fallback_mod16++;
+			return true;
+		}
+		if (!IS_ALIGNED(sg->offset, 16)) {
+			ce->fallback_align16++;
+			return true;
+		}
+		sg = sg_next(sg);
+	}
+
+	/* need same numbers of SG (with same length) for source and destination */
+	in_sg = areq->src;
+	out_sg = areq->dst;
+	while (in_sg && out_sg) {
+		if (in_sg->length != out_sg->length) {
+			ce->fallback_not_same_len++;
+			return true;
+		}
+		in_sg = sg_next(in_sg);
+		out_sg = sg_next(out_sg);
+	}
+	if (in_sg || out_sg)
+		return true;
+
+	return false;
+}
+
+static int sl3516_ce_cipher_fallback(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct sl3516_ce_alg_template *algt;
+	int err;
+
+	algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher);
+	algt->stat_fb++;
+
+	skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
+	skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
+				      areq->base.complete, areq->base.data);
+	skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
+				   areq->cryptlen, areq->iv);
+	if (rctx->op_dir == CE_DECRYPTION)
+		err = crypto_skcipher_decrypt(&rctx->fallback_req);
+	else
+		err = crypto_skcipher_encrypt(&rctx->fallback_req);
+	return err;
+}
+
+static int sl3516_ce_cipher(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sl3516_ce_dev *ce = op->ce;
+	struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct sl3516_ce_alg_template *algt;
+	struct scatterlist *sg;
+	unsigned int todo, len;
+	struct pkt_control_ecb *ecb;
+	int nr_sgs = 0;
+	int nr_sgd = 0;
+	int err = 0;
+	int i;
+
+	algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher);
+
+	dev_dbg(ce->dev, "%s %s %u %x IV(%p %u) key=%u\n", __func__,
+		crypto_tfm_alg_name(areq->base.tfm),
+		areq->cryptlen,
+		rctx->op_dir, areq->iv, crypto_skcipher_ivsize(tfm),
+		op->keylen);
+
+	algt->stat_req++;
+
+	if (areq->src == areq->dst) {
+		nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
+				    DMA_BIDIRECTIONAL);
+		if (nr_sgs <= 0 || nr_sgs > MAXDESC / 2) {
+			dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
+			err = -EINVAL;
+			goto theend;
+		}
+		nr_sgd = nr_sgs;
+	} else {
+		nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
+				    DMA_TO_DEVICE);
+		if (nr_sgs <= 0 || nr_sgs > MAXDESC / 2) {
+			dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
+			err = -EINVAL;
+			goto theend;
+		}
+		nr_sgd = dma_map_sg(ce->dev, areq->dst, sg_nents(areq->dst),
+				    DMA_FROM_DEVICE);
+		if (nr_sgd <= 0 || nr_sgd > MAXDESC) {
+			dev_err(ce->dev, "Invalid sg number %d\n", nr_sgd);
+			err = -EINVAL;
+			goto theend_sgs;
+		}
+	}
+
+	len = areq->cryptlen;
+	i = 0;
+	sg = areq->src;
+	while (i < nr_sgs && sg && len) {
+		if (sg_dma_len(sg) == 0)
+			goto sgs_next;
+		rctx->t_src[i].addr = sg_dma_address(sg);
+		todo = min(len, sg_dma_len(sg));
+		rctx->t_src[i].len = todo;
+		dev_dbg(ce->dev, "%s total=%u SGS(%d %u off=%d) todo=%u\n", __func__,
+			areq->cryptlen, i, rctx->t_src[i].len, sg->offset, todo);
+		len -= todo;
+		i++;
+sgs_next:
+		sg = sg_next(sg);
+	}
+	if (len > 0) {
+		dev_err(ce->dev, "remaining len %d/%u nr_sgs=%d\n", len, areq->cryptlen, nr_sgs);
+		err = -EINVAL;
+		goto theend_sgs;
+	}
+
+	len = areq->cryptlen;
+	i = 0;
+	sg = areq->dst;
+	while (i < nr_sgd && sg && len) {
+		if (sg_dma_len(sg) == 0)
+			goto sgd_next;
+		rctx->t_dst[i].addr = sg_dma_address(sg);
+		todo = min(len, sg_dma_len(sg));
+		rctx->t_dst[i].len = todo;
+		dev_dbg(ce->dev, "%s total=%u SGD(%d %u off=%d) todo=%u\n", __func__,
+			areq->cryptlen, i, rctx->t_dst[i].len, sg->offset, todo);
+		len -= todo;
+		i++;
+
+sgd_next:
+		sg = sg_next(sg);
+	}
+	if (len > 0) {
+		dev_err(ce->dev, "remaining len %d\n", len);
+		err = -EINVAL;
+		goto theend_sgs;
+	}
+
+	switch (algt->mode) {
+	case ECB_AES:
+		rctx->pctrllen = sizeof(struct pkt_control_ecb);
+		ecb = (struct pkt_control_ecb *)ce->pctrl;
+
+		rctx->tqflag = TQ0_TYPE_CTRL;
+		rctx->tqflag |= TQ1_CIPHER;
+		ecb->control.op_mode = rctx->op_dir;
+		ecb->control.cipher_algorithm = ECB_AES;
+		ecb->cipher.header_len = 0;
+		ecb->cipher.algorithm_len = areq->cryptlen;
+		cpu_to_be32_array((__be32 *)ecb->key, (u32 *)op->key, op->keylen / 4);
+		rctx->h = &ecb->cipher;
+
+		rctx->tqflag |= TQ4_KEY0;
+		rctx->tqflag |= TQ5_KEY4;
+		rctx->tqflag |= TQ6_KEY6;
+		ecb->control.aesnk = op->keylen / 4;
+		break;
+	}
+
+	rctx->nr_sgs = nr_sgs;
+	rctx->nr_sgd = nr_sgd;
+	err = sl3516_ce_run_task(ce, rctx, crypto_tfm_alg_name(areq->base.tfm));
+
+theend_sgs:
+	if (areq->src == areq->dst) {
+		dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
+			     DMA_BIDIRECTIONAL);
+	} else {
+		dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
+			     DMA_TO_DEVICE);
+		dma_unmap_sg(ce->dev, areq->dst, sg_nents(areq->dst),
+			     DMA_FROM_DEVICE);
+	}
+
+theend:
+
+	return err;
+}
+
+static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *areq)
+{
+	int err;
+	struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
+
+	err = sl3516_ce_cipher(breq);
+	crypto_finalize_skcipher_request(engine, breq, err);
+
+	return 0;
+}
+
+int sl3516_ce_skdecrypt(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct crypto_engine *engine;
+
+	memset(rctx, 0, sizeof(struct sl3516_ce_cipher_req_ctx));
+	rctx->op_dir = CE_DECRYPTION;
+
+	if (sl3516_ce_need_fallback(areq))
+		return sl3516_ce_cipher_fallback(areq);
+
+	engine = op->ce->engine;
+
+	return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int sl3516_ce_skencrypt(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+	struct crypto_engine *engine;
+
+	memset(rctx, 0, sizeof(struct sl3516_ce_cipher_req_ctx));
+	rctx->op_dir = CE_ENCRYPTION;
+
+	if (sl3516_ce_need_fallback(areq))
+		return sl3516_ce_cipher_fallback(areq);
+
+	engine = op->ce->engine;
+
+	return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int sl3516_ce_cipher_init(struct crypto_tfm *tfm)
+{
+	struct sl3516_ce_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+	struct sl3516_ce_alg_template *algt;
+	const char *name = crypto_tfm_alg_name(tfm);
+	struct crypto_skcipher *sktfm = __crypto_skcipher_cast(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(sktfm);
+	int err;
+
+	memset(op, 0, sizeof(struct sl3516_ce_cipher_tfm_ctx));
+
+	algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher);
+	op->ce = algt->ce;
+
+	op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(op->fallback_tfm)) {
+		dev_err(op->ce->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
+			name, PTR_ERR(op->fallback_tfm));
+		return PTR_ERR(op->fallback_tfm);
+	}
+
+	sktfm->reqsize = sizeof(struct sl3516_ce_cipher_req_ctx) +
+			 crypto_skcipher_reqsize(op->fallback_tfm);
+
+	dev_info(op->ce->dev, "Fallback for %s is %s\n",
+		 crypto_tfm_alg_driver_name(&sktfm->base),
+		 crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
+
+	op->enginectx.op.do_one_request = sl3516_ce_handle_cipher_request;
+	op->enginectx.op.prepare_request = NULL;
+	op->enginectx.op.unprepare_request = NULL;
+
+	err = pm_runtime_get_sync(op->ce->dev);
+	if (err < 0)
+		goto error_pm;
+
+	return 0;
+error_pm:
+	pm_runtime_put_noidle(op->ce->dev);
+	crypto_free_skcipher(op->fallback_tfm);
+	return err;
+}
+
+void sl3516_ce_cipher_exit(struct crypto_tfm *tfm)
+{
+	struct sl3516_ce_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+
+	kfree_sensitive(op->key);
+	crypto_free_skcipher(op->fallback_tfm);
+	pm_runtime_put_sync_suspend(op->ce->dev);
+}
+
+int sl3516_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			 unsigned int keylen)
+{
+	struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+	struct sl3516_ce_dev *ce = op->ce;
+
+	switch (keylen) {
+	case 128 / 8:
+		break;
+	case 192 / 8:
+		break;
+	case 256 / 8:
+		break;
+	default:
+		dev_dbg(ce->dev, "ERROR: Invalid keylen %u\n", keylen);
+		return -EINVAL;
+	}
+	kfree_sensitive(op->key);
+	op->keylen = keylen;
+	op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
+	if (!op->key)
+		return -ENOMEM;
+
+	crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+
+	return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
+}
diff --git a/drivers/crypto/gemini/sl3516-ce-core.c b/drivers/crypto/gemini/sl3516-ce-core.c
new file mode 100644
index 0000000000000..da6cd529a6c01
--- /dev/null
+++ b/drivers/crypto/gemini/sl3516-ce-core.c
@@ -0,0 +1,535 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sl3516-ce-core.c - hardware cryptographic offloader for Stormlink SL3516 SoC
+ *
+ * Copyright (C) 2021 Corentin Labbe <clabbe@baylibre.com>
+ *
+ * Core file which registers crypto algorithms supported by the CryptoEngine
+ */
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/dev_printk.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <crypto/internal/rng.h>
+#include <crypto/internal/skcipher.h>
+
+#include "sl3516-ce.h"
+
+static int sl3516_ce_desc_init(struct sl3516_ce_dev *ce)
+{
+	const size_t sz = sizeof(struct descriptor) * MAXDESC;
+	int i;
+
+	ce->tx = dma_alloc_coherent(ce->dev, sz, &ce->dtx, GFP_KERNEL);
+	if (!ce->tx)
+		return -ENOMEM;
+	ce->rx = dma_alloc_coherent(ce->dev, sz, &ce->drx, GFP_KERNEL);
+	if (!ce->rx)
+		goto err_rx;
+
+	for (i = 0; i < MAXDESC; i++) {
+		ce->tx[i].frame_ctrl.bits.own = CE_CPU;
+		ce->tx[i].next_desc.next_descriptor = ce->dtx + (i + 1) * sizeof(struct descriptor);
+	}
+	ce->tx[MAXDESC - 1].next_desc.next_descriptor = ce->dtx;
+
+	for (i = 0; i < MAXDESC; i++) {
+		ce->rx[i].frame_ctrl.bits.own = CE_CPU;
+		ce->rx[i].next_desc.next_descriptor = ce->drx + (i + 1) * sizeof(struct descriptor);
+	}
+	ce->rx[MAXDESC - 1].next_desc.next_descriptor = ce->drx;
+
+	ce->pctrl = dma_alloc_coherent(ce->dev, sizeof(struct pkt_control_ecb),
+				       &ce->dctrl, GFP_KERNEL);
+	if (!ce->pctrl)
+		goto err_pctrl;
+
+	return 0;
+err_pctrl:
+	dma_free_coherent(ce->dev, sz, ce->rx, ce->drx);
+err_rx:
+	dma_free_coherent(ce->dev, sz, ce->tx, ce->dtx);
+	return -ENOMEM;
+}
+
+static void sl3516_ce_free_descs(struct sl3516_ce_dev *ce)
+{
+	const size_t sz = sizeof(struct descriptor) * MAXDESC;
+
+	dma_free_coherent(ce->dev, sz, ce->tx, ce->dtx);
+	dma_free_coherent(ce->dev, sz, ce->rx, ce->drx);
+	dma_free_coherent(ce->dev, sizeof(struct pkt_control_ecb), ce->pctrl,
+			  ce->dctrl);
+}
+
+static void start_dma_tx(struct sl3516_ce_dev *ce)
+{
+	u32 v;
+
+	v = TXDMA_CTRL_START | TXDMA_CTRL_CHAIN_MODE | TXDMA_CTRL_CONTINUE | \
+		TXDMA_CTRL_INT_FAIL | TXDMA_CTRL_INT_PERR | TXDMA_CTRL_BURST_UNK;
+
+	writel(v, ce->base + IPSEC_TXDMA_CTRL);
+}
+
+static void start_dma_rx(struct sl3516_ce_dev *ce)
+{
+	u32 v;
+
+	v = RXDMA_CTRL_START | RXDMA_CTRL_CHAIN_MODE | RXDMA_CTRL_CONTINUE | \
+		RXDMA_CTRL_BURST_UNK | RXDMA_CTRL_INT_FINISH | \
+		RXDMA_CTRL_INT_FAIL | RXDMA_CTRL_INT_PERR | \
+		RXDMA_CTRL_INT_EOD | RXDMA_CTRL_INT_EOF;
+
+	writel(v, ce->base + IPSEC_RXDMA_CTRL);
+}
+
+static struct descriptor *get_desc_tx(struct sl3516_ce_dev *ce)
+{
+	struct descriptor *dd;
+
+	dd = &ce->tx[ce->ctx];
+	ce->ctx++;
+	if (ce->ctx >= MAXDESC)
+		ce->ctx = 0;
+	return dd;
+}
+
+static struct descriptor *get_desc_rx(struct sl3516_ce_dev *ce)
+{
+	struct descriptor *rdd;
+
+	rdd = &ce->rx[ce->crx];
+	ce->crx++;
+	if (ce->crx >= MAXDESC)
+		ce->crx = 0;
+	return rdd;
+}
+
+int sl3516_ce_run_task(struct sl3516_ce_dev *ce, struct sl3516_ce_cipher_req_ctx *rctx,
+		       const char *name)
+{
+	struct descriptor *dd, *rdd = NULL;
+	u32 v;
+	int i, err = 0;
+
+	ce->stat_req++;
+
+	reinit_completion(&ce->complete);
+	ce->status = 0;
+
+	for (i = 0; i < rctx->nr_sgd; i++) {
+		dev_dbg(ce->dev, "%s handle DST SG %d/%d len=%d\n", __func__,
+			i, rctx->nr_sgd, rctx->t_dst[i].len);
+		rdd = get_desc_rx(ce);
+		rdd->buf_adr = rctx->t_dst[i].addr;
+		rdd->frame_ctrl.bits.buffer_size = rctx->t_dst[i].len;
+		rdd->frame_ctrl.bits.own = CE_DMA;
+	}
+	rdd->next_desc.bits.eofie = 1;
+
+	for (i = 0; i < rctx->nr_sgs; i++) {
+		dev_dbg(ce->dev, "%s handle SRC SG %d/%d len=%d\n", __func__,
+			i, rctx->nr_sgs, rctx->t_src[i].len);
+		rctx->h->algorithm_len = rctx->t_src[i].len;
+
+		dd = get_desc_tx(ce);
+		dd->frame_ctrl.raw = 0;
+		dd->flag_status.raw = 0;
+		dd->frame_ctrl.bits.buffer_size = rctx->pctrllen;
+		dd->buf_adr = ce->dctrl;
+		dd->flag_status.tx_flag.tqflag = rctx->tqflag;
+		dd->next_desc.bits.eofie = 0;
+		dd->next_desc.bits.dec = 0;
+		dd->next_desc.bits.sof_eof = DESC_FIRST | DESC_LAST;
+		dd->frame_ctrl.bits.own = CE_DMA;
+
+		dd = get_desc_tx(ce);
+		dd->frame_ctrl.raw = 0;
+		dd->flag_status.raw = 0;
+		dd->frame_ctrl.bits.buffer_size = rctx->t_src[i].len;
+		dd->buf_adr = rctx->t_src[i].addr;
+		dd->flag_status.tx_flag.tqflag = 0;
+		dd->next_desc.bits.eofie = 0;
+		dd->next_desc.bits.dec = 0;
+		dd->next_desc.bits.sof_eof = DESC_FIRST | DESC_LAST;
+		dd->frame_ctrl.bits.own = CE_DMA;
+		start_dma_tx(ce);
+		start_dma_rx(ce);
+	}
+	wait_for_completion_interruptible_timeout(&ce->complete,
+						  msecs_to_jiffies(5000));
+	if (ce->status == 0) {
+		dev_err(ce->dev, "DMA timeout for %s\n", name);
+		err = -EFAULT;
+	}
+	v = readl(ce->base + IPSEC_STATUS_REG);
+	if (v & 0xFFF) {
+		dev_err(ce->dev, "IPSEC_STATUS_REG %x\n", v);
+		err = -EFAULT;
+	}
+
+	return err;
+}
+
+static irqreturn_t ce_irq_handler(int irq, void *data)
+{
+	struct sl3516_ce_dev *ce = (struct sl3516_ce_dev *)data;
+	u32 v;
+
+	ce->stat_irq++;
+
+	v = readl(ce->base + IPSEC_DMA_STATUS);
+	writel(v, ce->base + IPSEC_DMA_STATUS);
+
+	if (v & DMA_STATUS_TS_DERR)
+		dev_err(ce->dev, "AHB bus Error While Tx !!!\n");
+	if (v & DMA_STATUS_TS_PERR)
+		dev_err(ce->dev, "Tx Descriptor Protocol Error !!!\n");
+	if (v & DMA_STATUS_RS_DERR)
+		dev_err(ce->dev, "AHB bus Error While Rx !!!\n");
+	if (v & DMA_STATUS_RS_PERR)
+		dev_err(ce->dev, "Rx Descriptor Protocol Error !!!\n");
+
+	if (v & DMA_STATUS_TS_EOFI)
+		ce->stat_irq_tx++;
+	if (v & DMA_STATUS_RS_EOFI) {
+		ce->status = 1;
+		complete(&ce->complete);
+		ce->stat_irq_rx++;
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct sl3516_ce_alg_template ce_algs[] = {
+{
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
+	.mode = ECB_AES,
+	.alg.skcipher = {
+		.base = {
+			.cra_name = "ecb(aes)",
+			.cra_driver_name = "ecb-aes-sl3516",
+			.cra_priority = 400,
+			.cra_blocksize = AES_BLOCK_SIZE,
+			.cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+				CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+			.cra_ctxsize = sizeof(struct sl3516_ce_cipher_tfm_ctx),
+			.cra_module = THIS_MODULE,
+			.cra_alignmask = 0xf,
+			.cra_init = sl3516_ce_cipher_init,
+			.cra_exit = sl3516_ce_cipher_exit,
+		},
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.setkey		= sl3516_ce_aes_setkey,
+		.encrypt	= sl3516_ce_skencrypt,
+		.decrypt	= sl3516_ce_skdecrypt,
+	}
+},
+};
+
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+static int sl3516_ce_debugfs_show(struct seq_file *seq, void *v)
+{
+	struct sl3516_ce_dev *ce = seq->private;
+	unsigned int i;
+
+	seq_printf(seq, "HWRNG %lu %lu\n",
+		   ce->hwrng_stat_req, ce->hwrng_stat_bytes);
+	seq_printf(seq, "IRQ %lu\n", ce->stat_irq);
+	seq_printf(seq, "IRQ TX %lu\n", ce->stat_irq_tx);
+	seq_printf(seq, "IRQ RX %lu\n", ce->stat_irq_rx);
+	seq_printf(seq, "nreq %lu\n", ce->stat_req);
+	seq_printf(seq, "fallback SG count TX %lu\n", ce->fallback_sg_count_tx);
+	seq_printf(seq, "fallback SG count RX %lu\n", ce->fallback_sg_count_rx);
+	seq_printf(seq, "fallback modulo16 %lu\n", ce->fallback_mod16);
+	seq_printf(seq, "fallback align16 %lu\n", ce->fallback_align16);
+	seq_printf(seq, "fallback not same len %lu\n", ce->fallback_not_same_len);
+
+	for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
+		if (!ce_algs[i].ce)
+			continue;
+		switch (ce_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
+				   ce_algs[i].alg.skcipher.base.cra_driver_name,
+				   ce_algs[i].alg.skcipher.base.cra_name,
+				   ce_algs[i].stat_req, ce_algs[i].stat_fb);
+			break;
+		}
+	}
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(sl3516_ce_debugfs);
+#endif
+
+static int sl3516_ce_register_algs(struct sl3516_ce_dev *ce)
+{
+	int err;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
+		ce_algs[i].ce = ce;
+		switch (ce_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			dev_info(ce->dev, "DEBUG: Register %s\n",
+				 ce_algs[i].alg.skcipher.base.cra_name);
+			err = crypto_register_skcipher(&ce_algs[i].alg.skcipher);
+			if (err) {
+				dev_err(ce->dev, "Fail to register %s\n",
+					ce_algs[i].alg.skcipher.base.cra_name);
+				ce_algs[i].ce = NULL;
+				return err;
+			}
+			break;
+		default:
+			ce_algs[i].ce = NULL;
+			dev_err(ce->dev, "ERROR: tried to register an unknown algo\n");
+		}
+	}
+	return 0;
+}
+
+static void sl3516_ce_unregister_algs(struct sl3516_ce_dev *ce)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
+		if (!ce_algs[i].ce)
+			continue;
+		switch (ce_algs[i].type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			dev_info(ce->dev, "Unregister %d %s\n", i,
+				 ce_algs[i].alg.skcipher.base.cra_name);
+			crypto_unregister_skcipher(&ce_algs[i].alg.skcipher);
+			break;
+		}
+	}
+}
+
+static void sl3516_ce_start(struct sl3516_ce_dev *ce)
+{
+	ce->ctx = 0;
+	ce->crx = 0;
+	writel(ce->dtx, ce->base + IPSEC_TXDMA_CURR_DESC);
+	writel(ce->drx, ce->base + IPSEC_RXDMA_CURR_DESC);
+	writel(0, ce->base + IPSEC_DMA_STATUS);
+}
+
+/*
+ * Power management strategy: The device is suspended unless a TFM exists for
+ * one of the algorithms proposed by this driver.
+ */
+static int sl3516_ce_pm_suspend(struct device *dev)
+{
+	struct sl3516_ce_dev *ce = dev_get_drvdata(dev);
+
+	reset_control_assert(ce->reset);
+	clk_disable_unprepare(ce->clks);
+	return 0;
+}
+
+static int sl3516_ce_pm_resume(struct device *dev)
+{
+	struct sl3516_ce_dev *ce = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(ce->clks);
+	if (err) {
+		dev_err(ce->dev, "Cannot prepare_enable\n");
+		goto error;
+	}
+	err = reset_control_deassert(ce->reset);
+	if (err) {
+		dev_err(ce->dev, "Cannot deassert reset control\n");
+		goto error;
+	}
+
+	sl3516_ce_start(ce);
+
+	return 0;
+error:
+	sl3516_ce_pm_suspend(dev);
+	return err;
+}
+
+static const struct dev_pm_ops sl3516_ce_pm_ops = {
+	SET_RUNTIME_PM_OPS(sl3516_ce_pm_suspend, sl3516_ce_pm_resume, NULL)
+};
+
+static int sl3516_ce_pm_init(struct sl3516_ce_dev *ce)
+{
+	int err;
+
+	pm_runtime_use_autosuspend(ce->dev);
+	pm_runtime_set_autosuspend_delay(ce->dev, 2000);
+
+	err = pm_runtime_set_suspended(ce->dev);
+	if (err)
+		return err;
+	pm_runtime_enable(ce->dev);
+	return err;
+}
+
+static void sl3516_ce_pm_exit(struct sl3516_ce_dev *ce)
+{
+	pm_runtime_disable(ce->dev);
+}
+
+static int sl3516_ce_probe(struct platform_device *pdev)
+{
+	struct sl3516_ce_dev *ce;
+	int err, irq;
+	u32 v;
+
+	ce = devm_kzalloc(&pdev->dev, sizeof(*ce), GFP_KERNEL);
+	if (!ce)
+		return -ENOMEM;
+
+	ce->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ce);
+
+	ce->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ce->base))
+		return PTR_ERR(ce->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	err = devm_request_irq(&pdev->dev, irq, ce_irq_handler, 0, "crypto", ce);
+	if (err) {
+		dev_err(ce->dev, "Cannot request Crypto Engine IRQ (err=%d)\n", err);
+		return err;
+	}
+
+	ce->reset = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(ce->reset))
+		return dev_err_probe(&pdev->dev, PTR_ERR(ce->reset),
+				     "No reset control found\n");
+	ce->clks = devm_clk_get(ce->dev, NULL);
+	if (IS_ERR(ce->clks)) {
+		err = PTR_ERR(ce->clks);
+		dev_err(ce->dev, "Cannot get clock err=%d\n", err);
+		return err;
+	}
+
+	err = sl3516_ce_desc_init(ce);
+	if (err)
+		return err;
+
+	err = sl3516_ce_pm_init(ce);
+	if (err)
+		goto error_pm;
+
+	init_completion(&ce->complete);
+
+	ce->engine = crypto_engine_alloc_init(ce->dev, true);
+	if (!ce->engine) {
+		dev_err(ce->dev, "Cannot allocate engine\n");
+		err = -ENOMEM;
+		goto error_engine;
+	}
+
+	err = crypto_engine_start(ce->engine);
+	if (err) {
+		dev_err(ce->dev, "Cannot start engine\n");
+		goto error_engine;
+	}
+
+	err = sl3516_ce_register_algs(ce);
+	if (err)
+		goto error_alg;
+
+	err = sl3516_ce_rng_register(ce);
+	if (err)
+		goto error_rng;
+
+	err = pm_runtime_resume_and_get(ce->dev);
+	if (err < 0)
+		goto error_pmuse;
+
+	v = readl(ce->base + IPSEC_ID);
+	dev_info(ce->dev, "SL3516 dev %lx rev %lx\n",
+		 v & GENMASK(31, 4),
+		 v & GENMASK(3, 0));
+	v = readl(ce->base + IPSEC_DMA_DEVICE_ID);
+	dev_info(ce->dev, "SL3516 DMA dev %lx rev %lx\n",
+		 v & GENMASK(15, 4),
+		 v & GENMASK(3, 0));
+
+	pm_runtime_put_sync(ce->dev);
+
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+	/* Ignore error of debugfs */
+	ce->dbgfs_dir = debugfs_create_dir("sl3516", NULL);
+	ce->dbgfs_stats = debugfs_create_file("stats", 0444,
+					      ce->dbgfs_dir, ce,
+					      &sl3516_ce_debugfs_fops);
+#endif
+
+	return 0;
+error_pmuse:
+	sl3516_ce_rng_unregister(ce);
+error_rng:
+	sl3516_ce_unregister_algs(ce);
+error_alg:
+	crypto_engine_exit(ce->engine);
+error_engine:
+	sl3516_ce_pm_exit(ce);
+error_pm:
+	sl3516_ce_free_descs(ce);
+	return err;
+}
+
+static int sl3516_ce_remove(struct platform_device *pdev)
+{
+	struct sl3516_ce_dev *ce = platform_get_drvdata(pdev);
+
+	sl3516_ce_rng_unregister(ce);
+	sl3516_ce_unregister_algs(ce);
+	crypto_engine_exit(ce->engine);
+	sl3516_ce_pm_exit(ce);
+	sl3516_ce_free_descs(ce);
+
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+	debugfs_remove_recursive(ce->dbgfs_dir);
+#endif
+
+	return 0;
+}
+
+static const struct of_device_id sl3516_ce_crypto_of_match_table[] = {
+	{ .compatible = "cortina,sl3516-crypto"},
+	{}
+};
+MODULE_DEVICE_TABLE(of, sl3516_ce_crypto_of_match_table);
+
+static struct platform_driver sl3516_ce_driver = {
+	.probe		 = sl3516_ce_probe,
+	.remove		 = sl3516_ce_remove,
+	.driver		 = {
+		.name		= "sl3516-crypto",
+		.pm		= &sl3516_ce_pm_ops,
+		.of_match_table	= sl3516_ce_crypto_of_match_table,
+	},
+};
+
+module_platform_driver(sl3516_ce_driver);
+
+MODULE_DESCRIPTION("SL3516 cryptographic offloader");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Corentin Labbe <clabbe@baylibre.com>");
diff --git a/drivers/crypto/gemini/sl3516-ce-rng.c b/drivers/crypto/gemini/sl3516-ce-rng.c
new file mode 100644
index 0000000000000..76931ec1cec55
--- /dev/null
+++ b/drivers/crypto/gemini/sl3516-ce-rng.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sl3516-ce-rng.c - hardware cryptographic offloader for SL3516 SoC.
+ *
+ * Copyright (C) 2021 Corentin Labbe <clabbe@baylibre.com>
+ *
+ * This file handle the RNG found in the SL3516 crypto engine
+ */
+#include "sl3516-ce.h"
+#include <linux/pm_runtime.h>
+#include <linux/hw_random.h>
+
+static int sl3516_ce_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+	struct sl3516_ce_dev *ce;
+	u32 *data = buf;
+	size_t read = 0;
+	int err;
+
+	ce = container_of(rng, struct sl3516_ce_dev, trng);
+
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+	ce->hwrng_stat_req++;
+	ce->hwrng_stat_bytes += max;
+#endif
+
+	err = pm_runtime_get_sync(ce->dev);
+	if (err < 0) {
+		pm_runtime_put_noidle(ce->dev);
+		return err;
+	}
+
+	while (read < max) {
+		*data = readl(ce->base + IPSEC_RAND_NUM_REG);
+		data++;
+		read += 4;
+	}
+
+	pm_runtime_put(ce->dev);
+
+	return read;
+}
+
+int sl3516_ce_rng_register(struct sl3516_ce_dev *ce)
+{
+	int ret;
+
+	ce->trng.name = "SL3516 Crypto Engine RNG";
+	ce->trng.read = sl3516_ce_rng_read;
+	ce->trng.quality = 700;
+
+	ret = hwrng_register(&ce->trng);
+	if (ret)
+		dev_err(ce->dev, "Fail to register the RNG\n");
+	return ret;
+}
+
+void sl3516_ce_rng_unregister(struct sl3516_ce_dev *ce)
+{
+	hwrng_unregister(&ce->trng);
+}
diff --git a/drivers/crypto/gemini/sl3516-ce.h b/drivers/crypto/gemini/sl3516-ce.h
new file mode 100644
index 0000000000000..4c0ec6c920d1d
--- /dev/null
+++ b/drivers/crypto/gemini/sl3516-ce.h
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sl3516-ce.h - hardware cryptographic offloader for cortina/gemini SoC
+ *
+ * Copyright (C) 2021 Corentin LABBE <clabbe@baylibre.com>
+ *
+ * General notes on this driver:
+ * Called either Crypto Acceleration Engine Module, Security Acceleration Engine
+ * or IPSEC module in the datasheet, it will be called Crypto Engine for short
+ * in this driver.
+ * The CE was designed to handle IPSEC and wifi(TKIP WEP) protocol.
+ * It can handle AES, DES, 3DES, MD5, WEP, TKIP, SHA1, HMAC(MD5), HMAC(SHA1),
+ * Michael cipher/digest suites.
+ * It acts the same as a network hw, with both RX and TX chained descriptors.
+ */
+#include <crypto/aes.h>
+#include <crypto/engine.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/hw_random.h>
+
+#define TQ0_TYPE_DATA 0
+#define TQ0_TYPE_CTRL BIT(0)
+#define TQ1_CIPHER BIT(1)
+#define TQ2_AUTH BIT(2)
+#define TQ3_IV BIT(3)
+#define TQ4_KEY0 BIT(4)
+#define TQ5_KEY4 BIT(5)
+#define TQ6_KEY6 BIT(6)
+#define TQ7_AKEY0 BIT(7)
+#define TQ8_AKEY2 BIT(8)
+#define TQ9_AKEY2 BIT(9)
+
+#define ECB_AES       0x2
+
+#define DESC_LAST 0x01
+#define DESC_FIRST 0x02
+
+#define IPSEC_ID		0x0000
+#define IPSEC_STATUS_REG	0x00a8
+#define IPSEC_RAND_NUM_REG	0x00ac
+#define IPSEC_DMA_DEVICE_ID	0xff00
+#define IPSEC_DMA_STATUS	0xff04
+#define IPSEC_TXDMA_CTRL	0xff08
+#define IPSEC_TXDMA_FIRST_DESC	0xff0c
+#define IPSEC_TXDMA_CURR_DESC	0xff10
+#define IPSEC_RXDMA_CTRL	0xff14
+#define IPSEC_RXDMA_FIRST_DESC	0xff18
+#define IPSEC_RXDMA_CURR_DESC	0xff1c
+#define IPSEC_TXDMA_BUF_ADDR	0xff28
+#define IPSEC_RXDMA_BUF_ADDR	0xff38
+#define IPSEC_RXDMA_BUF_SIZE	0xff30
+
+#define CE_ENCRYPTION		0x01
+#define CE_DECRYPTION		0x03
+
+#define MAXDESC 6
+
+#define DMA_STATUS_RS_EOFI	BIT(22)
+#define DMA_STATUS_RS_PERR	BIT(24)
+#define DMA_STATUS_RS_DERR	BIT(25)
+#define DMA_STATUS_TS_EOFI	BIT(27)
+#define DMA_STATUS_TS_PERR	BIT(29)
+#define DMA_STATUS_TS_DERR	BIT(30)
+
+#define TXDMA_CTRL_START BIT(31)
+#define TXDMA_CTRL_CONTINUE BIT(30)
+#define TXDMA_CTRL_CHAIN_MODE BIT(29)
+/* the burst value is not documented in the datasheet */
+#define TXDMA_CTRL_BURST_UNK BIT(22)
+#define TXDMA_CTRL_INT_FAIL BIT(17)
+#define TXDMA_CTRL_INT_PERR BIT(16)
+
+#define RXDMA_CTRL_START BIT(31)
+#define RXDMA_CTRL_CONTINUE BIT(30)
+#define RXDMA_CTRL_CHAIN_MODE BIT(29)
+/* the burst value is not documented in the datasheet */
+#define RXDMA_CTRL_BURST_UNK BIT(22)
+#define RXDMA_CTRL_INT_FINISH BIT(18)
+#define RXDMA_CTRL_INT_FAIL BIT(17)
+#define RXDMA_CTRL_INT_PERR BIT(16)
+#define RXDMA_CTRL_INT_EOD BIT(15)
+#define RXDMA_CTRL_INT_EOF BIT(14)
+
+#define CE_CPU 0
+#define CE_DMA 1
+
+/*
+ * struct sl3516_ce_descriptor - descriptor for CE operations
+ * @frame_ctrl:		Information for the current descriptor
+ * @flag_status:	For send packet, describe flag of operations.
+ * @buf_adr:		pointer to a send/recv buffer for data packet
+ * @next_desc:		control linking to other descriptors
+ */
+struct descriptor {
+	union {
+		u32 raw;
+		/*
+		 * struct desc_frame_ctrl - Information for the current descriptor
+		 * @buffer_size:	the size of buffer at buf_adr
+		 * @desc_count:		Upon completion of a DMA operation, DMA
+		 *			write the number of descriptors used
+		 *			for the current frame
+		 * @checksum:		unknown
+		 * @authcomp:		unknown
+		 * @perr:		Protocol error during processing this descriptor
+		 * @derr:		Data error during processing this descriptor
+		 * @own:		0 if owned by CPU, 1 for DMA
+		 */
+		struct desc_frame_ctrl {
+			u32 buffer_size	:16;
+			u32 desc_count	:6;
+			u32 checksum	:6;
+			u32 authcomp	:1;
+			u32 perr	:1;
+			u32 derr	:1;
+			u32 own		:1;
+		} bits;
+	} frame_ctrl;
+
+	union {
+		u32 raw;
+		/*
+		 * struct desc_flag_status - flag for this descriptor
+		 * @tqflag:	list of flag describing the type of operation
+		 *		to be performed.
+		 */
+		struct desc_tx_flag_status {
+			u32 tqflag	:10;
+			u32 unused	:22;
+		} tx_flag;
+	} flag_status;
+
+	u32 buf_adr;
+
+	union {
+		u32 next_descriptor;
+		/*
+		 * struct desc_next - describe chaining of descriptors
+		 * @sof_eof:	does the descriptor is first (0x11),
+		 *		the last (0x01), middle of a chan (0x00)
+		 *		or the only one (0x11)
+		 * @dec:	AHB bus address increase (0), decrease (1)
+		 * @eofie:	End of frame interrupt enable
+		 * @ndar:	Next descriptor address
+		 */
+		struct desc_next {
+			u32 sof_eof	:2;
+			u32 dec		:1;
+			u32 eofie	:1;
+			u32 ndar	:28;
+		} bits;
+	} next_desc;
+};
+
+/*
+ * struct control - The value of this register is used to set the
+ *			operation mode of the IPSec Module.
+ * @process_id:		Used to identify the process. The number will be copied
+ *			to the descriptor status of the received packet.
+ * @auth_check_len:	Number of 32-bit words to be checked or appended by the
+ *			authentication module
+ * @auth_algorithm:
+ * @auth_mode:		0:append 1:Check Authentication Result
+ * @fcs_stream_copy:	0:enable 1:disable authentication stream copy
+ * @mix_key_sel:	0:use rCipherKey0-3  1:use Key Mixer
+ * @aesnk:		AES Key Size
+ * @cipher_algorithm:	choice of CBC/ECE and AES/DES/3DES
+ * @op_mode:		Operation Mode for the IPSec Module
+ */
+struct pkt_control_header {
+	u32 process_id		:8;
+	u32 auth_check_len	:3;
+	u32 un1			:1;
+	u32 auth_algorithm	:3;
+	u32 auth_mode		:1;
+	u32 fcs_stream_copy	:1;
+	u32 un2			:2;
+	u32 mix_key_sel		:1;
+	u32 aesnk		:4;
+	u32 cipher_algorithm	:3;
+	u32 un3			:1;
+	u32 op_mode		:4;
+};
+
+struct pkt_control_cipher {
+	u32 algorithm_len	:16;
+	u32 header_len		:16;
+};
+
+/*
+ * struct pkt_control_ecb - control packet for ECB
+ */
+struct pkt_control_ecb {
+	struct pkt_control_header control;
+	struct pkt_control_cipher cipher;
+	unsigned char key[AES_MAX_KEY_SIZE];
+};
+
+/*
+ * struct sl3516_ce_dev - main container for all this driver information
+ * @base:	base address
+ * @clks:	clocks used
+ * @reset:	pointer to reset controller
+ * @dev:	the platform device
+ * @engine:	ptr to the crypto/crypto_engine
+ * @complete:	completion for the current task on this flow
+ * @status:	set to 1 by interrupt if task is done
+ * @dtx:	base DMA address for TX descriptors
+ * @tx		base address of TX descriptors
+ * @drx:	base DMA address for RX descriptors
+ * @rx		base address of RX descriptors
+ * @ctx		current used TX descriptor
+ * @crx		current used RX descriptor
+ * @trng	hw_random structure for RNG
+ * @hwrng_stat_req	number of HWRNG requests
+ * @hwrng_stat_bytes	total number of bytes generated by RNG
+ * @stat_irq	number of IRQ handled by CE
+ * @stat_irq_tx	number of TX IRQ handled by CE
+ * @stat_irq_rx	number of RX IRQ handled by CE
+ * @stat_req	number of requests handled by CE
+ * @fallbak_sg_count_tx		number of fallback due to destination SG count
+ * @fallbak_sg_count_rx		number of fallback due to source SG count
+ * @fallbak_not_same_len	number of fallback due to difference in SG length
+ * @dbgfs_dir:	Debugfs dentry for statistic directory
+ * @dbgfs_stats: Debugfs dentry for statistic counters
+ */
+struct sl3516_ce_dev {
+	void __iomem *base;
+	struct clk *clks;
+	struct reset_control *reset;
+	struct device *dev;
+	struct crypto_engine *engine;
+	struct completion complete;
+	int status;
+	dma_addr_t dtx;
+	struct descriptor *tx;
+	dma_addr_t drx;
+	struct descriptor *rx;
+	int ctx;
+	int crx;
+	struct hwrng trng;
+	unsigned long hwrng_stat_req;
+	unsigned long hwrng_stat_bytes;
+	unsigned long stat_irq;
+	unsigned long stat_irq_tx;
+	unsigned long stat_irq_rx;
+	unsigned long stat_req;
+	unsigned long fallback_sg_count_tx;
+	unsigned long fallback_sg_count_rx;
+	unsigned long fallback_not_same_len;
+	unsigned long fallback_mod16;
+	unsigned long fallback_align16;
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+	struct dentry *dbgfs_dir;
+	struct dentry *dbgfs_stats;
+#endif
+	void *pctrl;
+	dma_addr_t dctrl;
+};
+
+struct sginfo {
+	u32 addr;
+	u32 len;
+};
+
+/*
+ * struct sl3516_ce_cipher_req_ctx - context for a skcipher request
+ * @t_src:		list of mapped SGs with their size
+ * @t_dst:		list of mapped SGs with their size
+ * @op_dir:		direction (encrypt vs decrypt) for this request
+ * @pctrllen:		the length of the ctrl packet
+ * @tqflag:		the TQflag to set in data packet
+ * @h			pointer to the pkt_control_cipher header
+ * @nr_sgs:		number of source SG
+ * @nr_sgd:		number of destination SG
+ * @fallback_req:	request struct for invoking the fallback skcipher TFM
+ */
+struct sl3516_ce_cipher_req_ctx {
+	struct sginfo t_src[MAXDESC];
+	struct sginfo t_dst[MAXDESC];
+	u32 op_dir;
+	unsigned int pctrllen;
+	u32 tqflag;
+	struct pkt_control_cipher *h;
+	int nr_sgs;
+	int nr_sgd;
+	struct skcipher_request fallback_req;   // keep at the end
+};
+
+/*
+ * struct sl3516_ce_cipher_tfm_ctx - context for a skcipher TFM
+ * @enginectx:		crypto_engine used by this TFM
+ * @key:		pointer to key data
+ * @keylen:		len of the key
+ * @ce:			pointer to the private data of driver handling this TFM
+ * @fallback_tfm:	pointer to the fallback TFM
+ *
+ * enginectx must be the first element
+ */
+struct sl3516_ce_cipher_tfm_ctx {
+	struct crypto_engine_ctx enginectx;
+	u32 *key;
+	u32 keylen;
+	struct sl3516_ce_dev *ce;
+	struct crypto_skcipher *fallback_tfm;
+};
+
+/*
+ * struct sl3516_ce_alg_template - crypto_alg template
+ * @type:		the CRYPTO_ALG_TYPE for this template
+ * @mode:		value to be used in control packet for this algorithm
+ * @ce:			pointer to the sl3516_ce_dev structure associated with
+ *			this template
+ * @alg:		one of sub struct must be used
+ * @stat_req:		number of request done on this template
+ * @stat_fb:		number of request which has fallbacked
+ * @stat_bytes:		total data size done by this template
+ */
+struct sl3516_ce_alg_template {
+	u32 type;
+	u32 mode;
+	struct sl3516_ce_dev *ce;
+	union {
+		struct skcipher_alg skcipher;
+	} alg;
+	unsigned long stat_req;
+	unsigned long stat_fb;
+	unsigned long stat_bytes;
+};
+
+int sl3516_ce_enqueue(struct crypto_async_request *areq, u32 type);
+
+int sl3516_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			 unsigned int keylen);
+int sl3516_ce_cipher_init(struct crypto_tfm *tfm);
+void sl3516_ce_cipher_exit(struct crypto_tfm *tfm);
+int sl3516_ce_skdecrypt(struct skcipher_request *areq);
+int sl3516_ce_skencrypt(struct skcipher_request *areq);
+
+int sl3516_ce_run_task(struct sl3516_ce_dev *ce,
+		       struct sl3516_ce_cipher_req_ctx *rctx, const char *name);
+
+int sl3516_ce_rng_register(struct sl3516_ce_dev *ce);
+void sl3516_ce_rng_unregister(struct sl3516_ce_dev *ce);
-- 
GitLab


From 2dcf45622481a22ffe108e2f381a929c9132c605 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 1 Jun 2021 15:11:32 +0000
Subject: [PATCH 2690/3804] MAINTAINERS: add gemini crypto sl3516-ce

Add myself as maintainer of gemini sl3516-ce crypto driver.
Add also the driver to the list of files for gemini SoC.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d478f44be7ce6..388924c2d23ac 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1811,6 +1811,7 @@ F:	Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt
 F:	Documentation/devicetree/bindings/pinctrl/cortina,gemini-pinctrl.txt
 F:	Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt
 F:	arch/arm/mach-gemini/
+F:	drivers/crypto/gemini/
 F:	drivers/net/ethernet/cortina/
 F:	drivers/pinctrl/pinctrl-gemini.c
 F:	drivers/rtc/rtc-ftrtc010.c
@@ -7549,6 +7550,12 @@ M:	Kieran Bingham <kbingham@kernel.org>
 S:	Supported
 F:	scripts/gdb/
 
+GEMINI CRYPTO DRIVER
+M:	Corentin Labbe <clabbe@baylibre.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/gemini/
+
 GEMTEK FM RADIO RECEIVER DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
-- 
GitLab


From a8bc4f5e7a72e4067f5afd7e98b61624231713ca Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 2 Jun 2021 11:36:45 +0000
Subject: [PATCH 2691/3804] crypto: qce - fix error return code in
 qce_skcipher_async_req_handle()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 1339a7c3ba05 ("crypto: qce: skcipher: Fix incorrect sg count for dma transfers")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/skcipher.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c
index 2594184792272..8ff10928f581d 100644
--- a/drivers/crypto/qce/skcipher.c
+++ b/drivers/crypto/qce/skcipher.c
@@ -124,13 +124,17 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req)
 	rctx->dst_sg = rctx->dst_tbl.sgl;
 
 	dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
-	if (dst_nents < 0)
+	if (dst_nents < 0) {
+		ret = dst_nents;
 		goto error_free;
+	}
 
 	if (diff_dst) {
 		src_nents = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src);
-		if (src_nents < 0)
+		if (src_nents < 0) {
+			ret = src_nents;
 			goto error_unmap_dst;
+		}
 		rctx->src_sg = req->src;
 	} else {
 		rctx->src_sg = rctx->dst_sg;
-- 
GitLab


From 1b82435d17774f3eaab35dce239d354548aa9da2 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 3 Jun 2021 01:53:40 -0400
Subject: [PATCH 2692/3804] crypto: x86/curve25519 - fix cpu feature checking
 logic in mod_exit

In curve25519_mod_init() the curve25519_alg will be registered only when
(X86_FEATURE_BMI2 && X86_FEATURE_ADX). But in curve25519_mod_exit()
it still checks (X86_FEATURE_BMI2 || X86_FEATURE_ADX) when do crypto
unregister. This will trigger a BUG_ON in crypto_unregister_alg() as
alg->cra_refcnt is 0 if the cpu only supports one of X86_FEATURE_BMI2
and X86_FEATURE_ADX.

Fixes: 07b586fe0662 ("crypto: x86/curve25519 - replace with formally verified implementation")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/curve25519-x86_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index 6706b6cb1d0fc..38caf61cd5b7d 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -1500,7 +1500,7 @@ static int __init curve25519_mod_init(void)
 static void __exit curve25519_mod_exit(void)
 {
 	if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
-	    (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX)))
+	    static_branch_likely(&curve25519_use_bmi2_adx))
 		crypto_unregister_kpp(&curve25519_alg);
 }
 
-- 
GitLab


From c16a70c1f253e70f5d49b8e1054769bc8dbc3848 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 4 Jun 2021 09:31:26 +0800
Subject: [PATCH 2693/3804] crypto: hisilicon/sec - add new algorithm mode for
 AEAD

Add new algorithm mode for AEAD:
CCM(AES), GCM(AES), CCM(SM4), GCM(SM4).

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec.h        |   4 +
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 367 +++++++++++++++++++--
 drivers/crypto/hisilicon/sec2/sec_crypto.h |   8 +
 3 files changed, 345 insertions(+), 34 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 935d8d95dcb91..2960faeea1b3a 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -13,6 +13,8 @@ struct sec_alg_res {
 	dma_addr_t pbuf_dma;
 	u8 *c_ivin;
 	dma_addr_t c_ivin_dma;
+	u8 *a_ivin;
+	dma_addr_t a_ivin_dma;
 	u8 *out_mac;
 	dma_addr_t out_mac_dma;
 };
@@ -33,6 +35,8 @@ struct sec_cipher_req {
 struct sec_aead_req {
 	u8 *out_mac;
 	dma_addr_t out_mac_dma;
+	u8 *a_ivin;
+	dma_addr_t a_ivin_dma;
 	struct aead_request *aead_req;
 };
 
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 5926b64d0d989..f2ab9ffa8f0ee 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -79,10 +79,24 @@
 #define SEC_SQE_CFLAG		2
 #define SEC_SQE_AEAD_FLAG	3
 #define SEC_SQE_DONE		0x1
+#define MIN_MAC_LEN		4
+#define MAC_LEN_MASK		0x1U
 #define MAX_INPUT_DATA_LEN	0xFFFE00
 #define BITS_MASK		0xFF
 #define BYTE_BITS		0x8
 #define SEC_XTS_NAME_SZ		0x3
+#define IV_CM_CAL_NUM		2
+#define IV_CL_MASK		0x7
+#define IV_CL_MIN		2
+#define IV_CL_MID		4
+#define IV_CL_MAX		8
+#define IV_FLAGS_OFFSET	0x6
+#define IV_CM_OFFSET		0x3
+#define IV_LAST_BYTE1		1
+#define IV_LAST_BYTE2		2
+#define IV_LAST_BYTE_MASK	0xFF
+#define IV_CTR_INIT		0x1
+#define IV_BYTE_OFFSET		0x8
 
 /* Get an en/de-cipher queue cyclically to balance load over queues of TFM */
 static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
@@ -316,6 +330,30 @@ static void sec_free_civ_resource(struct device *dev, struct sec_alg_res *res)
 				  res->c_ivin, res->c_ivin_dma);
 }
 
+static int sec_alloc_aiv_resource(struct device *dev, struct sec_alg_res *res)
+{
+	int i;
+
+	res->a_ivin = dma_alloc_coherent(dev, SEC_TOTAL_IV_SZ,
+					 &res->a_ivin_dma, GFP_KERNEL);
+	if (!res->a_ivin)
+		return -ENOMEM;
+
+	for (i = 1; i < QM_Q_DEPTH; i++) {
+		res[i].a_ivin_dma = res->a_ivin_dma + i * SEC_IV_SIZE;
+		res[i].a_ivin = res->a_ivin + i * SEC_IV_SIZE;
+	}
+
+	return 0;
+}
+
+static void sec_free_aiv_resource(struct device *dev, struct sec_alg_res *res)
+{
+	if (res->a_ivin)
+		dma_free_coherent(dev, SEC_TOTAL_IV_SZ,
+				  res->a_ivin, res->a_ivin_dma);
+}
+
 static int sec_alloc_mac_resource(struct device *dev, struct sec_alg_res *res)
 {
 	int i;
@@ -398,9 +436,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
 		return ret;
 
 	if (ctx->alg_type == SEC_AEAD) {
+		ret = sec_alloc_aiv_resource(dev, res);
+		if (ret)
+			goto alloc_aiv_fail;
+
 		ret = sec_alloc_mac_resource(dev, res);
 		if (ret)
-			goto alloc_fail;
+			goto alloc_mac_fail;
 	}
 	if (ctx->pbuf_supported) {
 		ret = sec_alloc_pbuf_resource(dev, res);
@@ -415,7 +457,10 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
 alloc_pbuf_fail:
 	if (ctx->alg_type == SEC_AEAD)
 		sec_free_mac_resource(dev, qp_ctx->res);
-alloc_fail:
+alloc_mac_fail:
+	if (ctx->alg_type == SEC_AEAD)
+		sec_free_aiv_resource(dev, res);
+alloc_aiv_fail:
 	sec_free_civ_resource(dev, res);
 	return ret;
 }
@@ -871,6 +916,8 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 		c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
 		c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
 		if (ctx->alg_type == SEC_AEAD) {
+			a_req->a_ivin = res->a_ivin;
+			a_req->a_ivin_dma = res->a_ivin_dma;
 			a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
 			a_req->out_mac_dma = res->pbuf_dma +
 					SEC_PBUF_MAC_OFFSET;
@@ -881,6 +928,8 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 	c_req->c_ivin = res->c_ivin;
 	c_req->c_ivin_dma = res->c_ivin_dma;
 	if (ctx->alg_type == SEC_AEAD) {
+		a_req->a_ivin = res->a_ivin;
+		a_req->a_ivin_dma = res->a_ivin_dma;
 		a_req->out_mac = res->out_mac;
 		a_req->out_mac_dma = res->out_mac_dma;
 	}
@@ -1012,6 +1061,17 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	ctx->a_ctx.mac_len = mac_len;
 	c_ctx->c_mode = c_mode;
 
+	if (c_mode == SEC_CMODE_CCM || c_mode == SEC_CMODE_GCM) {
+		ret = sec_skcipher_aes_sm4_setkey(c_ctx, keylen, c_mode);
+		if (ret) {
+			dev_err(dev, "set sec aes ccm cipher key err!\n");
+			return ret;
+		}
+		memcpy(c_ctx->c_key, key, keylen);
+
+		return 0;
+	}
+
 	if (crypto_authenc_extractkeys(&keys, key, keylen))
 		goto bad_key;
 
@@ -1054,6 +1114,14 @@ GEN_SEC_AEAD_SETKEY_FUNC(aes_cbc_sha256, SEC_A_HMAC_SHA256,
 			 SEC_CALG_AES, SEC_HMAC_SHA256_MAC, SEC_CMODE_CBC)
 GEN_SEC_AEAD_SETKEY_FUNC(aes_cbc_sha512, SEC_A_HMAC_SHA512,
 			 SEC_CALG_AES, SEC_HMAC_SHA512_MAC, SEC_CMODE_CBC)
+GEN_SEC_AEAD_SETKEY_FUNC(aes_ccm, 0, SEC_CALG_AES,
+			 SEC_HMAC_CCM_MAC, SEC_CMODE_CCM)
+GEN_SEC_AEAD_SETKEY_FUNC(aes_gcm, 0, SEC_CALG_AES,
+			 SEC_HMAC_GCM_MAC, SEC_CMODE_GCM)
+GEN_SEC_AEAD_SETKEY_FUNC(sm4_ccm, 0, SEC_CALG_SM4,
+			 SEC_HMAC_CCM_MAC, SEC_CMODE_CCM)
+GEN_SEC_AEAD_SETKEY_FUNC(sm4_gcm, 0, SEC_CALG_SM4,
+			 SEC_HMAC_GCM_MAC, SEC_CMODE_GCM)
 
 static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
 {
@@ -1295,12 +1363,125 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
 	sk_req->base.complete(&sk_req->base, err);
 }
 
-static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
+static void set_aead_auth_iv(struct sec_ctx *ctx, struct sec_req *req)
 {
 	struct aead_request *aead_req = req->aead_req.aead_req;
 	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_aead_req *a_req = &req->aead_req;
+	size_t authsize = ctx->a_ctx.mac_len;
+	u32 data_size = aead_req->cryptlen;
+	u8 flage = 0;
+	u8 cm, cl;
+
+	/* the specification has been checked in aead_iv_demension_check() */
+	cl = c_req->c_ivin[0] + 1;
+	c_req->c_ivin[ctx->c_ctx.ivsize - cl] = 0x00;
+	memset(&c_req->c_ivin[ctx->c_ctx.ivsize - cl], 0, cl);
+	c_req->c_ivin[ctx->c_ctx.ivsize - IV_LAST_BYTE1] = IV_CTR_INIT;
+
+	/* the last 3bit is L' */
+	flage |= c_req->c_ivin[0] & IV_CL_MASK;
+
+	/* the M' is bit3~bit5, the Flags is bit6 */
+	cm = (authsize - IV_CM_CAL_NUM) / IV_CM_CAL_NUM;
+	flage |= cm << IV_CM_OFFSET;
+	if (aead_req->assoclen)
+		flage |= 0x01 << IV_FLAGS_OFFSET;
+
+	memcpy(a_req->a_ivin, c_req->c_ivin, ctx->c_ctx.ivsize);
+	a_req->a_ivin[0] = flage;
+
+	/*
+	 * the last 32bit is counter's initial number,
+	 * but the nonce uses the first 16bit
+	 * the tail 16bit fill with the cipher length
+	 */
+	if (!c_req->encrypt)
+		data_size = aead_req->cryptlen - authsize;
+
+	a_req->a_ivin[ctx->c_ctx.ivsize - IV_LAST_BYTE1] =
+			data_size & IV_LAST_BYTE_MASK;
+	data_size >>= IV_BYTE_OFFSET;
+	a_req->a_ivin[ctx->c_ctx.ivsize - IV_LAST_BYTE2] =
+			data_size & IV_LAST_BYTE_MASK;
+}
+
+static void sec_aead_set_iv(struct sec_ctx *ctx, struct sec_req *req)
+{
+	struct aead_request *aead_req = req->aead_req.aead_req;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
+	size_t authsize = crypto_aead_authsize(tfm);
+	struct sec_cipher_req *c_req = &req->c_req;
+	struct sec_aead_req *a_req = &req->aead_req;
 
 	memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
+
+	if (ctx->c_ctx.c_mode == SEC_CMODE_CCM) {
+		/*
+		 * CCM 16Byte Cipher_IV: {1B_Flage,13B_IV,2B_counter},
+		 * the  counter must set to 0x01
+		 */
+		ctx->a_ctx.mac_len = authsize;
+		/* CCM 16Byte Auth_IV: {1B_AFlage,13B_IV,2B_Ptext_length} */
+		set_aead_auth_iv(ctx, req);
+	}
+
+	/* GCM 12Byte Cipher_IV == Auth_IV */
+	if (ctx->c_ctx.c_mode == SEC_CMODE_GCM) {
+		ctx->a_ctx.mac_len = authsize;
+		memcpy(a_req->a_ivin, c_req->c_ivin, SEC_AIV_SIZE);
+	}
+}
+
+static void sec_auth_bd_fill_xcm(struct sec_auth_ctx *ctx, int dir,
+				 struct sec_req *req, struct sec_sqe *sec_sqe)
+{
+	struct sec_aead_req *a_req = &req->aead_req;
+	struct aead_request *aq = a_req->aead_req;
+
+	/* C_ICV_Len is MAC size, 0x4 ~ 0x10 */
+	sec_sqe->type2.icvw_kmode |= cpu_to_le16((u16)ctx->mac_len);
+
+	/* mode set to CCM/GCM, don't set {A_Alg, AKey_Len, MAC_Len} */
+	sec_sqe->type2.a_key_addr = sec_sqe->type2.c_key_addr;
+	sec_sqe->type2.a_ivin_addr = cpu_to_le64(a_req->a_ivin_dma);
+	sec_sqe->type_cipher_auth |= SEC_NO_AUTH << SEC_AUTH_OFFSET;
+
+	if (dir)
+		sec_sqe->sds_sa_type &= SEC_CIPHER_AUTH;
+	else
+		sec_sqe->sds_sa_type |= SEC_AUTH_CIPHER;
+
+	sec_sqe->type2.alen_ivllen = cpu_to_le32(aq->assoclen);
+	sec_sqe->type2.auth_src_offset = cpu_to_le16(0x0);
+	sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
+
+	sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma);
+}
+
+static void sec_auth_bd_fill_xcm_v3(struct sec_auth_ctx *ctx, int dir,
+				    struct sec_req *req, struct sec_sqe3 *sqe3)
+{
+	struct sec_aead_req *a_req = &req->aead_req;
+	struct aead_request *aq = a_req->aead_req;
+
+	/* C_ICV_Len is MAC size, 0x4 ~ 0x10 */
+	sqe3->c_icv_key |= cpu_to_le16((u16)ctx->mac_len << SEC_MAC_OFFSET_V3);
+
+	/* mode set to CCM/GCM, don't set {A_Alg, AKey_Len, MAC_Len} */
+	sqe3->a_key_addr = sqe3->c_key_addr;
+	sqe3->auth_ivin.a_ivin_addr = cpu_to_le64(a_req->a_ivin_dma);
+	sqe3->auth_mac_key |= SEC_NO_AUTH;
+
+	if (dir)
+		sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3;
+	else
+		sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3;
+
+	sqe3->a_len_key = cpu_to_le32(aq->assoclen);
+	sqe3->auth_src_offset = cpu_to_le16(0x0);
+	sqe3->cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
+	sqe3->mac_addr = cpu_to_le64(a_req->out_mac_dma);
 }
 
 static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
@@ -1348,7 +1529,11 @@ static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 		return ret;
 	}
 
-	sec_auth_bd_fill_ex(auth_ctx, req->c_req.encrypt, req, sec_sqe);
+	if (ctx->c_ctx.c_mode == SEC_CMODE_CCM ||
+	    ctx->c_ctx.c_mode == SEC_CMODE_GCM)
+		sec_auth_bd_fill_xcm(auth_ctx, req->c_req.encrypt, req, sec_sqe);
+	else
+		sec_auth_bd_fill_ex(auth_ctx, req->c_req.encrypt, req, sec_sqe);
 
 	return 0;
 }
@@ -1399,7 +1584,13 @@ static int sec_aead_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
 		return ret;
 	}
 
-	sec_auth_bd_fill_ex_v3(auth_ctx, req->c_req.encrypt, req, sec_sqe3);
+	if (ctx->c_ctx.c_mode == SEC_CMODE_CCM ||
+	    ctx->c_ctx.c_mode == SEC_CMODE_GCM)
+		sec_auth_bd_fill_xcm_v3(auth_ctx, req->c_req.encrypt,
+					req, sec_sqe3);
+	else
+		sec_auth_bd_fill_ex_v3(auth_ctx, req->c_req.encrypt,
+				       req, sec_sqe3);
 
 	return 0;
 }
@@ -1531,7 +1722,7 @@ static const struct sec_req_op sec_skcipher_req_ops = {
 static const struct sec_req_op sec_aead_req_ops = {
 	.buf_map	= sec_aead_sgl_map,
 	.buf_unmap	= sec_aead_sgl_unmap,
-	.do_transfer	= sec_aead_copy_iv,
+	.do_transfer	= sec_aead_set_iv,
 	.bd_fill	= sec_aead_bd_fill,
 	.bd_send	= sec_bd_send,
 	.callback	= sec_aead_callback,
@@ -1551,7 +1742,7 @@ static const struct sec_req_op sec_skcipher_req_ops_v3 = {
 static const struct sec_req_op sec_aead_req_ops_v3 = {
 	.buf_map	= sec_aead_sgl_map,
 	.buf_unmap	= sec_aead_sgl_unmap,
-	.do_transfer	= sec_aead_copy_iv,
+	.do_transfer	= sec_aead_set_iv,
 	.bd_fill	= sec_aead_bd_fill_v3,
 	.bd_send	= sec_bd_send,
 	.callback	= sec_aead_callback,
@@ -1591,8 +1782,9 @@ static int sec_aead_init(struct crypto_aead *tfm)
 	crypto_aead_set_reqsize(tfm, sizeof(struct sec_req));
 	ctx->alg_type = SEC_AEAD;
 	ctx->c_ctx.ivsize = crypto_aead_ivsize(tfm);
-	if (ctx->c_ctx.ivsize > SEC_IV_SIZE) {
-		dev_err(ctx->dev, "get error aead iv size!\n");
+	if (ctx->c_ctx.ivsize < SEC_AIV_SIZE ||
+	    ctx->c_ctx.ivsize > SEC_IV_SIZE) {
+		pr_err("get error aead iv size!\n");
 		return -EINVAL;
 	}
 
@@ -1663,6 +1855,25 @@ static void sec_aead_ctx_exit(struct crypto_aead *tfm)
 	sec_aead_exit(tfm);
 }
 
+static int sec_aead_xcm_ctx_init(struct crypto_aead *tfm)
+{
+	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+	int ret;
+
+	ret = sec_aead_init(tfm);
+	if (ret) {
+		dev_err(ctx->dev, "hisi_sec2: aead xcm init error!\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void sec_aead_xcm_ctx_exit(struct crypto_aead *tfm)
+{
+	sec_aead_exit(tfm);
+}
+
 static int sec_aead_sha1_ctx_init(struct crypto_aead *tfm)
 {
 	return sec_aead_ctx_init(tfm, "sha1");
@@ -1903,41 +2114,100 @@ static struct skcipher_alg sec_skciphers_v3[] = {
 			 SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
 };
 
-static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
+static int aead_iv_demension_check(struct aead_request *aead_req)
+{
+	u8 cl;
+
+	cl = aead_req->iv[0] + 1;
+	if (cl < IV_CL_MIN || cl > IV_CL_MAX)
+		return -EINVAL;
+
+	if (cl < IV_CL_MID && aead_req->cryptlen >> (BYTE_BITS * cl))
+		return -EOVERFLOW;
+
+	return 0;
+}
+
+static int sec_aead_spec_check(struct sec_ctx *ctx, struct sec_req *sreq)
 {
 	struct aead_request *req = sreq->aead_req.aead_req;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	size_t authsize = crypto_aead_authsize(tfm);
+	u8 c_mode = ctx->c_ctx.c_mode;
 	struct device *dev = ctx->dev;
-	u8 c_alg = ctx->c_ctx.c_alg;
+	int ret;
 
-	if (unlikely(!req->src || !req->dst || !req->cryptlen ||
-		req->assoclen > SEC_MAX_AAD_LEN)) {
-		dev_err(dev, "aead input param error!\n");
+	if (unlikely(req->cryptlen + req->assoclen > MAX_INPUT_DATA_LEN ||
+	    req->assoclen > SEC_MAX_AAD_LEN)) {
+		dev_err(dev, "aead input spec error!\n");
 		return -EINVAL;
 	}
 
-	if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
-		SEC_PBUF_SZ)
-		sreq->use_pbuf = true;
-	else
-		sreq->use_pbuf = false;
-
-	/* Support AES only */
-	if (unlikely(c_alg != SEC_CALG_AES)) {
-		dev_err(dev, "aead crypto alg error!\n");
+	if (unlikely((c_mode == SEC_CMODE_GCM && authsize < DES_BLOCK_SIZE) ||
+	   (c_mode == SEC_CMODE_CCM && (authsize < MIN_MAC_LEN ||
+		authsize & MAC_LEN_MASK)))) {
+		dev_err(dev, "aead input mac length error!\n");
 		return -EINVAL;
 	}
+
+	if (c_mode == SEC_CMODE_CCM) {
+		ret = aead_iv_demension_check(req);
+		if (ret) {
+			dev_err(dev, "aead input iv param error!\n");
+			return ret;
+		}
+	}
+
 	if (sreq->c_req.encrypt)
 		sreq->c_req.c_len = req->cryptlen;
 	else
 		sreq->c_req.c_len = req->cryptlen - authsize;
+	if (c_mode == SEC_CMODE_CBC) {
+		if (unlikely(sreq->c_req.c_len & (AES_BLOCK_SIZE - 1))) {
+			dev_err(dev, "aead crypto length error!\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
 
-	if (unlikely(sreq->c_req.c_len & (AES_BLOCK_SIZE - 1))) {
-		dev_err(dev, "aead crypto length error!\n");
+static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
+{
+	struct aead_request *req = sreq->aead_req.aead_req;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	size_t authsize = crypto_aead_authsize(tfm);
+	struct device *dev = ctx->dev;
+	u8 c_alg = ctx->c_ctx.c_alg;
+
+	if (unlikely(!req->src || !req->dst)) {
+		dev_err(dev, "aead input param error!\n");
 		return -EINVAL;
 	}
 
+	if (ctx->sec->qm.ver == QM_HW_V2) {
+		if (unlikely(!req->cryptlen || (!sreq->c_req.encrypt &&
+		    req->cryptlen <= authsize))) {
+			dev_err(dev, "Kunpeng920 not support 0 length!\n");
+			return -EINVAL;
+		}
+	}
+
+	/* Support AES or SM4 */
+	if (unlikely(c_alg != SEC_CALG_AES && c_alg != SEC_CALG_SM4)) {
+		dev_err(dev, "aead crypto alg error!\n");
+		return -EINVAL;
+	}
+
+	if (unlikely(sec_aead_spec_check(ctx, sreq)))
+		return -EINVAL;
+
+	if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
+		SEC_PBUF_SZ)
+		sreq->use_pbuf = true;
+	else
+		sreq->use_pbuf = false;
+
 	return 0;
 }
 
@@ -1970,7 +2240,7 @@ static int sec_aead_decrypt(struct aead_request *a_req)
 	return sec_aead_crypto(a_req, false);
 }
 
-#define SEC_AEAD_GEN_ALG(sec_cra_name, sec_set_key, ctx_init,\
+#define SEC_AEAD_ALG(sec_cra_name, sec_set_key, ctx_init,\
 			 ctx_exit, blk_size, iv_size, max_authsize)\
 {\
 	.base = {\
@@ -1991,22 +2261,39 @@ static int sec_aead_decrypt(struct aead_request *a_req)
 	.maxauthsize = max_authsize,\
 }
 
-#define SEC_AEAD_ALG(algname, keyfunc, aead_init, blksize, ivsize, authsize)\
-	SEC_AEAD_GEN_ALG(algname, keyfunc, aead_init,\
-			sec_aead_ctx_exit, blksize, ivsize, authsize)
-
 static struct aead_alg sec_aeads[] = {
 	SEC_AEAD_ALG("authenc(hmac(sha1),cbc(aes))",
 		     sec_setkey_aes_cbc_sha1, sec_aead_sha1_ctx_init,
-		     AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA1_DIGEST_SIZE),
+		     sec_aead_ctx_exit, AES_BLOCK_SIZE,
+		     AES_BLOCK_SIZE, SHA1_DIGEST_SIZE),
 
 	SEC_AEAD_ALG("authenc(hmac(sha256),cbc(aes))",
 		     sec_setkey_aes_cbc_sha256, sec_aead_sha256_ctx_init,
-		     AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA256_DIGEST_SIZE),
+		     sec_aead_ctx_exit, AES_BLOCK_SIZE,
+		     AES_BLOCK_SIZE, SHA256_DIGEST_SIZE),
 
 	SEC_AEAD_ALG("authenc(hmac(sha512),cbc(aes))",
 		     sec_setkey_aes_cbc_sha512, sec_aead_sha512_ctx_init,
-		     AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA512_DIGEST_SIZE),
+		     sec_aead_ctx_exit, AES_BLOCK_SIZE,
+		     AES_BLOCK_SIZE, SHA512_DIGEST_SIZE),
+
+	SEC_AEAD_ALG("ccm(aes)", sec_setkey_aes_ccm, sec_aead_xcm_ctx_init,
+		     sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ,
+		     AES_BLOCK_SIZE, AES_BLOCK_SIZE),
+
+	SEC_AEAD_ALG("gcm(aes)", sec_setkey_aes_gcm, sec_aead_xcm_ctx_init,
+		     sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ,
+		     SEC_AIV_SIZE, AES_BLOCK_SIZE)
+};
+
+static struct aead_alg sec_aeads_v3[] = {
+	SEC_AEAD_ALG("ccm(sm4)", sec_setkey_sm4_ccm, sec_aead_xcm_ctx_init,
+		     sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ,
+		     AES_BLOCK_SIZE, AES_BLOCK_SIZE),
+
+	SEC_AEAD_ALG("gcm(sm4)", sec_setkey_sm4_gcm, sec_aead_xcm_ctx_init,
+		     sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ,
+		     SEC_AIV_SIZE, AES_BLOCK_SIZE)
 };
 
 int sec_register_to_crypto(struct hisi_qm *qm)
@@ -2025,11 +2312,19 @@ int sec_register_to_crypto(struct hisi_qm *qm)
 		if (ret)
 			goto reg_skcipher_fail;
 	}
+
 	ret = crypto_register_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
 	if (ret)
 		goto reg_aead_fail;
+	if (qm->ver > QM_HW_V2) {
+		ret = crypto_register_aeads(sec_aeads_v3, ARRAY_SIZE(sec_aeads_v3));
+		if (ret)
+			goto reg_aead_v3_fail;
+	}
 	return ret;
 
+reg_aead_v3_fail:
+	crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
 reg_aead_fail:
 	if (qm->ver > QM_HW_V2)
 		crypto_unregister_skciphers(sec_skciphers_v3,
@@ -2042,10 +2337,14 @@ reg_skcipher_fail:
 
 void sec_unregister_from_crypto(struct hisi_qm *qm)
 {
+	if (qm->ver > QM_HW_V2)
+		crypto_unregister_aeads(sec_aeads_v3,
+					ARRAY_SIZE(sec_aeads_v3));
+	crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
+
 	if (qm->ver > QM_HW_V2)
 		crypto_unregister_skciphers(sec_skciphers_v3,
 					    ARRAY_SIZE(sec_skciphers_v3));
 	crypto_unregister_skciphers(sec_skciphers,
 				    ARRAY_SIZE(sec_skciphers));
-	crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
 }
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
index c9bfe75d32e36..a7bcd3e2affd7 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -4,6 +4,7 @@
 #ifndef __HISI_SEC_V2_CRYPTO_H
 #define __HISI_SEC_V2_CRYPTO_H
 
+#define SEC_AIV_SIZE		12
 #define SEC_IV_SIZE		24
 #define SEC_MAX_KEY_SIZE	64
 #define SEC_COMM_SCENE		0
@@ -22,6 +23,11 @@ enum sec_hash_alg {
 };
 
 enum sec_mac_len {
+	SEC_HMAC_CCM_MAC   = 16,
+	SEC_HMAC_GCM_MAC   = 16,
+	SEC_SM3_MAC        = 32,
+	SEC_HMAC_SM3_MAC   = 32,
+	SEC_HMAC_MD5_MAC   = 16,
 	SEC_HMAC_SHA1_MAC   = 20,
 	SEC_HMAC_SHA256_MAC = 32,
 	SEC_HMAC_SHA512_MAC = 64,
@@ -33,6 +39,8 @@ enum sec_cmode {
 	SEC_CMODE_CFB    = 0x2,
 	SEC_CMODE_OFB    = 0x3,
 	SEC_CMODE_CTR    = 0x4,
+	SEC_CMODE_CCM    = 0x5,
+	SEC_CMODE_GCM    = 0x6,
 	SEC_CMODE_XTS    = 0x7,
 };
 
-- 
GitLab


From 6c46a3297beae4ae2d22b26da5e091f058381c7c Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 4 Jun 2021 09:31:27 +0800
Subject: [PATCH 2694/3804] crypto: hisilicon/sec - add fallback tfm supporting
 for aeads

Add fallback tfm supporting for hisi_sec driver. Due to the Kunpeng920's
CCM/GCM algorithm not supports 0 byte src length. So the driver needs to
setting the soft fallback aead tfm.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec.h        |  2 +
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 97 ++++++++++++++++++++--
 2 files changed, 94 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 2960faeea1b3a..3fe78754fba2e 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -88,7 +88,9 @@ struct sec_auth_ctx {
 	u8 a_key_len;
 	u8 mac_len;
 	u8 a_alg;
+	bool fallback;
 	struct crypto_shash *hash_tfm;
+	struct crypto_aead *fallback_aead_tfm;
 };
 
 /* SEC cipher context which cipher's relatives */
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index f2ab9ffa8f0ee..194a9bca9c5e0 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019 HiSilicon Limited. */
 
 #include <crypto/aes.h>
+#include <crypto/aead.h>
 #include <crypto/algapi.h>
 #include <crypto/authenc.h>
 #include <crypto/des.h>
@@ -853,12 +854,16 @@ GEN_SEC_SETKEY_FUNC(sm4_ctr, SEC_CALG_SM4, SEC_CMODE_CTR)
 static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
 			struct scatterlist *src)
 {
-	struct aead_request *aead_req = req->aead_req.aead_req;
+	struct sec_aead_req *a_req = &req->aead_req;
+	struct aead_request *aead_req = a_req->aead_req;
 	struct sec_cipher_req *c_req = &req->c_req;
 	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
 	struct device *dev = ctx->dev;
 	int copy_size, pbuf_length;
 	int req_id = req->req_id;
+	struct crypto_aead *tfm;
+	size_t authsize;
+	u8 *mac_offset;
 
 	if (ctx->alg_type == SEC_AEAD)
 		copy_size = aead_req->cryptlen + aead_req->assoclen;
@@ -866,12 +871,17 @@ static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
 		copy_size = c_req->c_len;
 
 	pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
-							qp_ctx->res[req_id].pbuf,
-							copy_size);
+			qp_ctx->res[req_id].pbuf, copy_size);
 	if (unlikely(pbuf_length != copy_size)) {
 		dev_err(dev, "copy src data to pbuf error!\n");
 		return -EINVAL;
 	}
+	if (!c_req->encrypt && ctx->alg_type == SEC_AEAD) {
+		tfm = crypto_aead_reqtfm(aead_req);
+		authsize = crypto_aead_authsize(tfm);
+		mac_offset = qp_ctx->res[req_id].pbuf + copy_size - authsize;
+		memcpy(a_req->out_mac, mac_offset, authsize);
+	}
 
 	c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
 	c_req->c_out_dma = c_req->c_in_dma;
@@ -1044,6 +1054,28 @@ static int sec_aead_auth_set_key(struct sec_auth_ctx *ctx,
 	return 0;
 }
 
+static int sec_aead_setauthsize(struct crypto_aead *aead, unsigned int authsize)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+	struct sec_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+
+	if (unlikely(a_ctx->fallback_aead_tfm))
+		return crypto_aead_setauthsize(a_ctx->fallback_aead_tfm, authsize);
+
+	return 0;
+}
+
+static int sec_aead_fallback_setkey(struct sec_auth_ctx *a_ctx,
+				    struct crypto_aead *tfm, const u8 *key,
+				    unsigned int keylen)
+{
+	crypto_aead_clear_flags(a_ctx->fallback_aead_tfm, CRYPTO_TFM_REQ_MASK);
+	crypto_aead_set_flags(a_ctx->fallback_aead_tfm,
+			      crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK);
+	return crypto_aead_setkey(a_ctx->fallback_aead_tfm, key, keylen);
+}
+
 static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 			   const u32 keylen, const enum sec_hash_alg a_alg,
 			   const enum sec_calg c_alg,
@@ -1052,6 +1084,7 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 {
 	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
 	struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+	struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
 	struct device *dev = ctx->dev;
 	struct crypto_authenc_keys keys;
 	int ret;
@@ -1069,6 +1102,12 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 		}
 		memcpy(c_ctx->c_key, key, keylen);
 
+		if (unlikely(a_ctx->fallback_aead_tfm)) {
+			ret = sec_aead_fallback_setkey(a_ctx, tfm, key, keylen);
+			if (ret)
+				return ret;
+		}
+
 		return 0;
 	}
 
@@ -1857,7 +1896,10 @@ static void sec_aead_ctx_exit(struct crypto_aead *tfm)
 
 static int sec_aead_xcm_ctx_init(struct crypto_aead *tfm)
 {
+	struct aead_alg *alg = crypto_aead_alg(tfm);
 	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+	struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+	const char *aead_name = alg->base.cra_name;
 	int ret;
 
 	ret = sec_aead_init(tfm);
@@ -1866,11 +1908,24 @@ static int sec_aead_xcm_ctx_init(struct crypto_aead *tfm)
 		return ret;
 	}
 
+	a_ctx->fallback_aead_tfm = crypto_alloc_aead(aead_name, 0,
+						     CRYPTO_ALG_NEED_FALLBACK |
+						     CRYPTO_ALG_ASYNC);
+	if (IS_ERR(a_ctx->fallback_aead_tfm)) {
+		dev_err(ctx->dev, "aead driver alloc fallback tfm error!\n");
+		sec_aead_exit(tfm);
+		return PTR_ERR(a_ctx->fallback_aead_tfm);
+	}
+	a_ctx->fallback = false;
+
 	return 0;
 }
 
 static void sec_aead_xcm_ctx_exit(struct crypto_aead *tfm)
 {
+	struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+
+	crypto_free_aead(ctx->a_ctx.fallback_aead_tfm);
 	sec_aead_exit(tfm);
 }
 
@@ -2189,6 +2244,7 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 		if (unlikely(!req->cryptlen || (!sreq->c_req.encrypt &&
 		    req->cryptlen <= authsize))) {
 			dev_err(dev, "Kunpeng920 not support 0 length!\n");
+			ctx->a_ctx.fallback = true;
 			return -EINVAL;
 		}
 	}
@@ -2211,6 +2267,31 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 	return 0;
 }
 
+static int sec_aead_soft_crypto(struct sec_ctx *ctx,
+				struct aead_request *aead_req,
+				bool encrypt)
+{
+	struct aead_request *subreq = aead_request_ctx(aead_req);
+	struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+	struct device *dev = ctx->dev;
+
+	/* Kunpeng920 aead mode not support input 0 size */
+	if (!a_ctx->fallback_aead_tfm) {
+		dev_err(dev, "aead fallbcak tfm is NULL!\n");
+		return -EINVAL;
+	}
+
+	aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm);
+	aead_request_set_callback(subreq, aead_req->base.flags,
+				  aead_req->base.complete, aead_req->base.data);
+	aead_request_set_crypt(subreq, aead_req->src, aead_req->dst,
+			       aead_req->cryptlen, aead_req->iv);
+	aead_request_set_ad(subreq, aead_req->assoclen);
+
+	return encrypt ? crypto_aead_encrypt(subreq) :
+		   crypto_aead_decrypt(subreq);
+}
+
 static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(a_req);
@@ -2224,8 +2305,11 @@ static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
 	req->ctx = ctx;
 
 	ret = sec_aead_param_check(ctx, req);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
+		if (ctx->a_ctx.fallback)
+			return sec_aead_soft_crypto(ctx, a_req, encrypt);
 		return -EINVAL;
+	}
 
 	return ctx->req_op->process(ctx, req);
 }
@@ -2247,7 +2331,9 @@ static int sec_aead_decrypt(struct aead_request *a_req)
 		.cra_name = sec_cra_name,\
 		.cra_driver_name = "hisi_sec_"sec_cra_name,\
 		.cra_priority = SEC_PRIORITY,\
-		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,\
+		.cra_flags = CRYPTO_ALG_ASYNC |\
+		 CRYPTO_ALG_ALLOCATES_MEMORY |\
+		 CRYPTO_ALG_NEED_FALLBACK,\
 		.cra_blocksize = blk_size,\
 		.cra_ctxsize = sizeof(struct sec_ctx),\
 		.cra_module = THIS_MODULE,\
@@ -2255,6 +2341,7 @@ static int sec_aead_decrypt(struct aead_request *a_req)
 	.init = ctx_init,\
 	.exit = ctx_exit,\
 	.setkey = sec_set_key,\
+	.setauthsize = sec_aead_setauthsize,\
 	.decrypt = sec_aead_decrypt,\
 	.encrypt = sec_aead_encrypt,\
 	.ivsize = iv_size,\
-- 
GitLab


From 668f1ab70378d836a9df0cc01abf21c40c4d9348 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 4 Jun 2021 09:31:28 +0800
Subject: [PATCH 2695/3804] crypto: hisilicon/sec - add hardware integrity
 check value process

Use hardware integrity check value process instead of soft verify
process when doing aead decryption.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 81 +++++++++++++---------
 drivers/crypto/hisilicon/sec2/sec_crypto.h |  1 +
 2 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 194a9bca9c5e0..75122f020642f 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -54,6 +54,7 @@
 #define SEC_FLAG_MASK		0x0780
 #define SEC_TYPE_MASK		0x0F
 #define SEC_DONE_MASK		0x0001
+#define SEC_ICV_MASK		0x000E
 #define SEC_SQE_LEN_RATE_MASK	0x3
 
 #define SEC_TOTAL_IV_SZ		(SEC_IV_SIZE * QM_Q_DEPTH)
@@ -80,6 +81,7 @@
 #define SEC_SQE_CFLAG		2
 #define SEC_SQE_AEAD_FLAG	3
 #define SEC_SQE_DONE		0x1
+#define SEC_ICV_ERR		0x2
 #define MIN_MAC_LEN		4
 #define MAC_LEN_MASK		0x1U
 #define MAX_INPUT_DATA_LEN	0xFFFE00
@@ -156,32 +158,12 @@ static void sec_free_req_id(struct sec_req *req)
 	mutex_unlock(&qp_ctx->req_lock);
 }
 
-static int sec_aead_verify(struct sec_req *req)
-{
-	struct aead_request *aead_req = req->aead_req.aead_req;
-	struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
-	size_t authsize = crypto_aead_authsize(tfm);
-	u8 *mac_out = req->aead_req.out_mac;
-	u8 *mac = mac_out + SEC_MAX_MAC_LEN;
-	struct scatterlist *sgl = aead_req->src;
-	size_t sz;
-
-	sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), mac, authsize,
-				aead_req->cryptlen + aead_req->assoclen -
-				authsize);
-	if (unlikely(sz != authsize || memcmp(mac_out, mac, sz))) {
-		dev_err(req->ctx->dev, "aead verify failure!\n");
-		return -EBADMSG;
-	}
-
-	return 0;
-}
-
 static u8 pre_parse_finished_bd(struct bd_status *status, void *resp)
 {
 	struct sec_sqe *bd = resp;
 
 	status->done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK;
+	status->icv = (le16_to_cpu(bd->type2.done_flag) & SEC_ICV_MASK) >> 1;
 	status->flag = (le16_to_cpu(bd->type2.done_flag) &
 					SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
 	status->tag = le16_to_cpu(bd->type2.tag);
@@ -195,6 +177,7 @@ static u8 pre_parse_finished_bd3(struct bd_status *status, void *resp)
 	struct sec_sqe3 *bd3 = resp;
 
 	status->done = le16_to_cpu(bd3->done_flag) & SEC_DONE_MASK;
+	status->icv = (le16_to_cpu(bd3->done_flag) & SEC_ICV_MASK) >> 1;
 	status->flag = (le16_to_cpu(bd3->done_flag) &
 					SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
 	status->tag = le64_to_cpu(bd3->tag);
@@ -220,6 +203,14 @@ static int sec_cb_status_check(struct sec_req *req,
 					    status->flag);
 			return -EIO;
 		}
+	} else if (unlikely(ctx->alg_type == SEC_AEAD)) {
+		if (unlikely(status->flag != SEC_SQE_AEAD_FLAG ||
+			     status->icv == SEC_ICV_ERR)) {
+			dev_err_ratelimited(ctx->dev,
+					    "flag[%u], icv[%u]\n",
+					    status->flag, status->icv);
+			return -EBADMSG;
+		}
 	}
 
 	return 0;
@@ -262,9 +253,6 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
 	if (err)
 		atomic64_inc(&dfx->done_flag_cnt);
 
-	if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
-		err = sec_aead_verify(req);
-
 	atomic64_inc(&dfx->recv_cnt);
 
 	ctx->req_op->buf_unmap(ctx, req);
@@ -895,7 +883,6 @@ static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
 	struct aead_request *aead_req = req->aead_req.aead_req;
 	struct sec_cipher_req *c_req = &req->c_req;
 	struct sec_qp_ctx *qp_ctx = req->qp_ctx;
-	struct device *dev = ctx->dev;
 	int copy_size, pbuf_length;
 	int req_id = req->req_id;
 
@@ -905,10 +892,29 @@ static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
 		copy_size = c_req->c_len;
 
 	pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
-				qp_ctx->res[req_id].pbuf,
-				copy_size);
+			qp_ctx->res[req_id].pbuf, copy_size);
 	if (unlikely(pbuf_length != copy_size))
-		dev_err(dev, "copy pbuf data to dst error!\n");
+		dev_err(ctx->dev, "copy pbuf data to dst error!\n");
+}
+
+static int sec_aead_mac_init(struct sec_aead_req *req)
+{
+	struct aead_request *aead_req = req->aead_req;
+	struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
+	size_t authsize = crypto_aead_authsize(tfm);
+	u8 *mac_out = req->out_mac;
+	struct scatterlist *sgl = aead_req->src;
+	size_t copy_size;
+	off_t skip_size;
+
+	/* Copy input mac */
+	skip_size = aead_req->assoclen + aead_req->cryptlen - authsize;
+	copy_size = sg_pcopy_to_buffer(sgl, sg_nents(sgl), mac_out,
+				       authsize, skip_size);
+	if (unlikely(copy_size != authsize))
+		return -EINVAL;
+
+	return 0;
 }
 
 static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
@@ -922,7 +928,6 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 	int ret;
 
 	if (req->use_pbuf) {
-		ret = sec_cipher_pbuf_map(ctx, req, src);
 		c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
 		c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
 		if (ctx->alg_type == SEC_AEAD) {
@@ -932,6 +937,7 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 			a_req->out_mac_dma = res->pbuf_dma +
 					SEC_PBUF_MAC_OFFSET;
 		}
+		ret = sec_cipher_pbuf_map(ctx, req, src);
 
 		return ret;
 	}
@@ -954,6 +960,13 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 		return PTR_ERR(c_req->c_in);
 	}
 
+	if (!c_req->encrypt && ctx->alg_type == SEC_AEAD) {
+		ret = sec_aead_mac_init(a_req);
+		if (unlikely(ret)) {
+			dev_err(dev, "fail to init mac data for ICV!\n");
+			return ret;
+		}
+	}
 	if (dst == src) {
 		c_req->c_out = c_req->c_in;
 		c_req->c_out_dma = c_req->c_in_dma;
@@ -1542,13 +1555,13 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
 	sec_sqe->type2.mac_key_alg |=
 			cpu_to_le32((u32)(ctx->a_alg) << SEC_AEAD_ALG_OFFSET);
 
-	sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE1 << SEC_AUTH_OFFSET;
-
-	if (dir)
+	if (dir) {
+		sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE1 << SEC_AUTH_OFFSET;
 		sec_sqe->sds_sa_type &= SEC_CIPHER_AUTH;
-	else
+	} else {
+		sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE2 << SEC_AUTH_OFFSET;
 		sec_sqe->sds_sa_type |= SEC_AUTH_CIPHER;
-
+	}
 	sec_sqe->type2.alen_ivllen = cpu_to_le32(c_req->c_len + aq->assoclen);
 
 	sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
index a7bcd3e2affd7..9f71c358a6d35 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -80,6 +80,7 @@ struct bd_status {
 	u8 done;
 	u8 err_type;
 	u16 flag;
+	u16 icv;
 };
 
 enum {
-- 
GitLab


From 9039878ade5d7ec6ac8db299ab8e7d0d563e3447 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 4 Jun 2021 09:31:29 +0800
Subject: [PATCH 2696/3804] crypto: hisilicon/sec - modify the SEC request
 structure

Modify the SEC request structure, combines two common parameters of the
SEC request into one parameter.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec.h        |  7 +++--
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 34 +++++++++++-----------
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index 3fe78754fba2e..018415b9840a9 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -21,8 +21,6 @@ struct sec_alg_res {
 
 /* Cipher request of SEC private */
 struct sec_cipher_req {
-	struct hisi_acc_hw_sgl *c_in;
-	dma_addr_t c_in_dma;
 	struct hisi_acc_hw_sgl *c_out;
 	dma_addr_t c_out_dma;
 	u8 *c_ivin;
@@ -49,6 +47,11 @@ struct sec_req {
 	struct sec_ctx *ctx;
 	struct sec_qp_ctx *qp_ctx;
 
+	/**
+	 * Common parameter of the SEC request.
+	 */
+	struct hisi_acc_hw_sgl *in;
+	dma_addr_t in_dma;
 	struct sec_cipher_req c_req;
 	struct sec_aead_req aead_req;
 	struct list_head backlog_head;
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 75122f020642f..f23af61661dea 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -871,8 +871,8 @@ static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
 		memcpy(a_req->out_mac, mac_offset, authsize);
 	}
 
-	c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
-	c_req->c_out_dma = c_req->c_in_dma;
+	req->in_dma = qp_ctx->res[req_id].pbuf_dma;
+	c_req->c_out_dma = req->in_dma;
 
 	return 0;
 }
@@ -950,14 +950,13 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 		a_req->out_mac_dma = res->out_mac_dma;
 	}
 
-	c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
-						    qp_ctx->c_in_pool,
-						    req->req_id,
-						    &c_req->c_in_dma);
-
-	if (IS_ERR(c_req->c_in)) {
+	req->in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
+						qp_ctx->c_in_pool,
+						req->req_id,
+						&req->in_dma);
+	if (IS_ERR(req->in)) {
 		dev_err(dev, "fail to dma map input sgl buffers!\n");
-		return PTR_ERR(c_req->c_in);
+		return PTR_ERR(req->in);
 	}
 
 	if (!c_req->encrypt && ctx->alg_type == SEC_AEAD) {
@@ -967,9 +966,10 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 			return ret;
 		}
 	}
+
 	if (dst == src) {
-		c_req->c_out = c_req->c_in;
-		c_req->c_out_dma = c_req->c_in_dma;
+		c_req->c_out = req->in;
+		c_req->c_out_dma = req->in_dma;
 	} else {
 		c_req->c_out = hisi_acc_sg_buf_map_to_hw_sgl(dev, dst,
 							     qp_ctx->c_out_pool,
@@ -978,7 +978,7 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
 
 		if (IS_ERR(c_req->c_out)) {
 			dev_err(dev, "fail to dma map output sgl buffers!\n");
-			hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
+			hisi_acc_sg_buf_unmap(dev, src, req->in);
 			return PTR_ERR(c_req->c_out);
 		}
 	}
@@ -996,7 +996,7 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
 		sec_cipher_pbuf_unmap(ctx, req, dst);
 	} else {
 		if (dst != src)
-			hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
+			hisi_acc_sg_buf_unmap(dev, src, req->in);
 
 		hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
 	}
@@ -1236,7 +1236,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 
 	sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
 	sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
-	sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma);
+	sec_sqe->type2.data_src_addr = cpu_to_le64(req->in_dma);
 	sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma);
 
 	sec_sqe->type2.icvw_kmode |= cpu_to_le16(((u16)c_ctx->c_mode) <<
@@ -1263,7 +1263,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
 
 	sec_sqe->sdm_addr_type |= da_type;
 	scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
-	if (c_req->c_in_dma != c_req->c_out_dma)
+	if (req->in_dma != c_req->c_out_dma)
 		de = 0x1 << SEC_DE_OFFSET;
 
 	sec_sqe->sds_sa_type = (de | scene | sa_type);
@@ -1286,7 +1286,7 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
 
 	sec_sqe3->c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
 	sec_sqe3->no_scene.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
-	sec_sqe3->data_src_addr = cpu_to_le64(c_req->c_in_dma);
+	sec_sqe3->data_src_addr = cpu_to_le64(req->in_dma);
 	sec_sqe3->data_dst_addr = cpu_to_le64(c_req->c_out_dma);
 
 	sec_sqe3->c_mode_alg = ((u8)c_ctx->c_alg << SEC_CALG_OFFSET_V3) |
@@ -1309,7 +1309,7 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
 	}
 
 	bd_param |= SEC_COMM_SCENE << SEC_SCENE_OFFSET_V3;
-	if (c_req->c_in_dma != c_req->c_out_dma)
+	if (req->in_dma != c_req->c_out_dma)
 		bd_param |= 0x1 << SEC_DE_OFFSET_V3;
 
 	bd_param |= SEC_BD_TYPE3;
-- 
GitLab


From 5cd259ca5d466f65ffd21e2e2fa00fb648a8c555 Mon Sep 17 00:00:00 2001
From: Hongbo Li <herberthbli@tencent.com>
Date: Fri, 4 Jun 2021 14:30:35 +0800
Subject: [PATCH 2697/3804] crypto: sm2 - fix a memory leak in sm2

SM2 module alloc ec->Q in sm2_set_pub_key(), when doing alg test in
test_akcipher_one(), it will set public key for every test vector,
and don't free ec->Q. This will cause a memory leak.

This patch alloc ec->Q in sm2_ec_ctx_init().

Fixes: ea7ecb66440b ("crypto: sm2 - introduce OSCCA SM2 asymmetric cipher algorithm")
Signed-off-by: Hongbo Li <herberthbli@tencent.com>
Reviewed-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sm2.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/crypto/sm2.c b/crypto/sm2.c
index b21addc3ac06a..db8a4a265669d 100644
--- a/crypto/sm2.c
+++ b/crypto/sm2.c
@@ -79,10 +79,17 @@ static int sm2_ec_ctx_init(struct mpi_ec_ctx *ec)
 		goto free;
 
 	rc = -ENOMEM;
+
+	ec->Q = mpi_point_new(0);
+	if (!ec->Q)
+		goto free;
+
 	/* mpi_ec_setup_elliptic_curve */
 	ec->G = mpi_point_new(0);
-	if (!ec->G)
+	if (!ec->G) {
+		mpi_point_release(ec->Q);
 		goto free;
+	}
 
 	mpi_set(ec->G->x, x);
 	mpi_set(ec->G->y, y);
@@ -91,6 +98,7 @@ static int sm2_ec_ctx_init(struct mpi_ec_ctx *ec)
 	rc = -EINVAL;
 	ec->n = mpi_scanval(ecp->n);
 	if (!ec->n) {
+		mpi_point_release(ec->Q);
 		mpi_point_release(ec->G);
 		goto free;
 	}
@@ -386,27 +394,15 @@ static int sm2_set_pub_key(struct crypto_akcipher *tfm,
 	MPI a;
 	int rc;
 
-	ec->Q = mpi_point_new(0);
-	if (!ec->Q)
-		return -ENOMEM;
-
 	/* include the uncompressed flag '0x04' */
-	rc = -ENOMEM;
 	a = mpi_read_raw_data(key, keylen);
 	if (!a)
-		goto error;
+		return -ENOMEM;
 
 	mpi_normalize(a);
 	rc = sm2_ecc_os2ec(ec->Q, a);
 	mpi_free(a);
-	if (rc)
-		goto error;
-
-	return 0;
 
-error:
-	mpi_point_release(ec->Q);
-	ec->Q = NULL;
 	return rc;
 }
 
-- 
GitLab


From 0dc64297c8ac98503a7c7621b3c78e151deb75b6 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 5 Jun 2021 14:55:56 +0200
Subject: [PATCH 2698/3804] crypto: cavium/nitrox - Fix an error rhandling path
 in 'nitrox_probe()'

If an error occurs after a successful 'ioremap()' call, it must be undone
by a corresponding 'iounmap()' call, as already done in the remove
function.
Add a 'pf_sw_fail' label in the error handling path and add the missing
'iounmap()'.

While at it, also add a 'flr_fail' label in the error handling path and use
it to avoid some code duplication.

Fixes: 14fa93cdcd9b ("crypto: cavium - Add support for CNN55XX adapters.")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/nitrox/nitrox_main.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index 6af05df281a98..96bc7b5c6532d 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -424,8 +424,7 @@ static int nitrox_probe(struct pci_dev *pdev,
 	err = nitrox_device_flr(pdev);
 	if (err) {
 		dev_err(&pdev->dev, "FLR failed\n");
-		pci_disable_device(pdev);
-		return err;
+		goto flr_fail;
 	}
 
 	if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
@@ -434,17 +433,13 @@ static int nitrox_probe(struct pci_dev *pdev,
 		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
 		if (err) {
 			dev_err(&pdev->dev, "DMA configuration failed\n");
-			pci_disable_device(pdev);
-			return err;
+			goto flr_fail;
 		}
 	}
 
 	err = pci_request_mem_regions(pdev, nitrox_driver_name);
-	if (err) {
-		pci_disable_device(pdev);
-		dev_err(&pdev->dev, "Failed to request mem regions!\n");
-		return err;
-	}
+	if (err)
+		goto flr_fail;
 	pci_set_master(pdev);
 
 	ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
@@ -480,7 +475,7 @@ static int nitrox_probe(struct pci_dev *pdev,
 
 	err = nitrox_pf_sw_init(ndev);
 	if (err)
-		goto ioremap_err;
+		goto pf_sw_fail;
 
 	err = nitrox_pf_hw_init(ndev);
 	if (err)
@@ -510,12 +505,15 @@ crypto_fail:
 	smp_mb__after_atomic();
 pf_hw_fail:
 	nitrox_pf_sw_cleanup(ndev);
+pf_sw_fail:
+	iounmap(ndev->bar_addr);
 ioremap_err:
 	nitrox_remove_from_devlist(ndev);
 	kfree(ndev);
 	pci_set_drvdata(pdev, NULL);
 ndev_fail:
 	pci_release_mem_regions(pdev);
+flr_fail:
 	pci_disable_device(pdev);
 	return err;
 }
-- 
GitLab


From abd062886cd103196b4f26cf735c3a3619dec76b Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 11 Jun 2021 09:18:47 +0200
Subject: [PATCH 2699/3804] Revert "usb: gadget: fsl: Re-enable driver for ARM
 SoCs"

This reverts commit e0e8b6abe8c862229ba00cdd806e8598cdef00bb.

Turns out this breaks the build.  We had numerous reports of problems
from linux-next and 0-day about this not working properly, so revert it
for now until it can be figured out properly.

The build errors are:
	arm-linux-gnueabi-ld: fsl_udc_core.c:(.text+0x29d4): undefined reference to `fsl_udc_clk_finalize'
	arm-linux-gnueabi-ld: fsl_udc_core.c:(.text+0x2ba8): undefined reference to `fsl_udc_clk_release'
	fsl_udc_core.c:(.text+0x2848): undefined reference to `fsl_udc_clk_init'
	fsl_udc_core.c:(.text+0xe88): undefined reference to `fsl_udc_clk_release'

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: kernel test robot <lkp@intel.com>
Fixes: e0e8b6abe8c8 ("usb: gadget: fsl: Re-enable driver for ARM SoCs")
Cc: stable <stable@vger.kernel.org>
Cc: Joel Stanley <joel@jms.id.au>
Cc: Leo Li <leoyang.li@nxp.com>
Cc: Peter Chen <peter.chen@nxp.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Felipe Balbi <balbi@kernel.org>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Ran Wang <ran.wang_1@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 7348acbdc5600..8c614bb86c665 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -90,7 +90,7 @@ config USB_BCM63XX_UDC
 
 config USB_FSL_USB2
 	tristate "Freescale Highspeed USB DR Peripheral Controller"
-	depends on FSL_SOC || ARCH_LAYERSCAPE || SOC_LS1021A || COMPILE_TEST
+	depends on FSL_SOC
 	help
 	   Some of Freescale PowerPC and i.MX processors have a High Speed
 	   Dual-Role(DR) USB controller, which supports device mode.
-- 
GitLab


From 1d3156396cf6ea0873145092f4e040374ff1d862 Mon Sep 17 00:00:00 2001
From: ChenXiaoSong <chenxiaosong2@huawei.com>
Date: Wed, 9 Jun 2021 11:55:10 +0800
Subject: [PATCH 2700/3804] x86/sgx: Correct kernel-doc's arg name in
 sgx_encl_release()

Fix the following kernel-doc warning:

  arch/x86/kernel/cpu/sgx/encl.c:392: warning: Function parameter \
    or member 'ref' not described in 'sgx_encl_release'

 [ bp: Massage commit message. ]

Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210609035510.2083694-1-chenxiaosong2@huawei.com
---
 arch/x86/kernel/cpu/sgx/encl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 3be203297988f..001808e3901cc 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -383,7 +383,7 @@ const struct vm_operations_struct sgx_vm_ops = {
 
 /**
  * sgx_encl_release - Destroy an enclave instance
- * @kref:	address of a kref inside &sgx_encl
+ * @ref:	address of a kref inside &sgx_encl
  *
  * Used together with kref_put(). Frees all the resources associated with the
  * enclave and the instance itself.
-- 
GitLab


From 4c6e0976295add7f0ed94d276c04a3d6f1ea8f83 Mon Sep 17 00:00:00 2001
From: Benjamin Drung <bdrung@posteo.de>
Date: Sat, 5 Jun 2021 22:15:36 +0200
Subject: [PATCH 2701/3804] media: uvcvideo: Fix pixel format change for Elgato
 Cam Link 4K

The Elgato Cam Link 4K HDMI video capture card reports to support three
different pixel formats, where the first format depends on the connected
HDMI device.

```
$ v4l2-ctl -d /dev/video0 --list-formats-ext
ioctl: VIDIOC_ENUM_FMT
	Type: Video Capture

	[0]: 'NV12' (Y/CbCr 4:2:0)
		Size: Discrete 3840x2160
			Interval: Discrete 0.033s (29.970 fps)
	[1]: 'NV12' (Y/CbCr 4:2:0)
		Size: Discrete 3840x2160
			Interval: Discrete 0.033s (29.970 fps)
	[2]: 'YU12' (Planar YUV 4:2:0)
		Size: Discrete 3840x2160
			Interval: Discrete 0.033s (29.970 fps)
```

Changing the pixel format to anything besides the first pixel format
does not work:

```
$ v4l2-ctl -d /dev/video0 --try-fmt-video pixelformat=YU12
Format Video Capture:
	Width/Height      : 3840/2160
	Pixel Format      : 'NV12' (Y/CbCr 4:2:0)
	Field             : None
	Bytes per Line    : 3840
	Size Image        : 12441600
	Colorspace        : sRGB
	Transfer Function : Rec. 709
	YCbCr/HSV Encoding: Rec. 709
	Quantization      : Default (maps to Limited Range)
	Flags             :
```

User space applications like VLC might show an error message on the
terminal in that case:

```
libv4l2: error set_fmt gave us a different result than try_fmt!
```

Depending on the error handling of the user space applications, they
might display a distorted video, because they use the wrong pixel format
for decoding the stream.

The Elgato Cam Link 4K responds to the USB video probe
VS_PROBE_CONTROL/VS_COMMIT_CONTROL with a malformed data structure: The
second byte contains bFormatIndex (instead of being the second byte of
bmHint). The first byte is always zero. The third byte is always 1.

The firmware bug was reported to Elgato on 2020-12-01 and it was
forwarded by the support team to the developers as feature request.
There is no firmware update available since then. The latest firmware
for Elgato Cam Link 4K as of 2021-03-23 has MCU 20.02.19 and FPGA 67.

Therefore correct the malformed data structure for this device. The
change was successfully tested with VLC, OBS, and Chromium using
different pixel formats (YUYV, NV12, YU12), resolutions (3840x2160,
1920x1080), and frame rates (29.970 and 59.940 fps).

Cc: stable@vger.kernel.org
Signed-off-by: Benjamin Drung <bdrung@posteo.de>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/uvc/uvc_video.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c
index a777b389a66ec..e16464606b140 100644
--- a/drivers/media/usb/uvc/uvc_video.c
+++ b/drivers/media/usb/uvc/uvc_video.c
@@ -127,10 +127,37 @@ int uvc_query_ctrl(struct uvc_device *dev, u8 query, u8 unit,
 static void uvc_fixup_video_ctrl(struct uvc_streaming *stream,
 	struct uvc_streaming_control *ctrl)
 {
+	static const struct usb_device_id elgato_cam_link_4k = {
+		USB_DEVICE(0x0fd9, 0x0066)
+	};
 	struct uvc_format *format = NULL;
 	struct uvc_frame *frame = NULL;
 	unsigned int i;
 
+	/*
+	 * The response of the Elgato Cam Link 4K is incorrect: The second byte
+	 * contains bFormatIndex (instead of being the second byte of bmHint).
+	 * The first byte is always zero. The third byte is always 1.
+	 *
+	 * The UVC 1.5 class specification defines the first five bits in the
+	 * bmHint bitfield. The remaining bits are reserved and should be zero.
+	 * Therefore a valid bmHint will be less than 32.
+	 *
+	 * Latest Elgato Cam Link 4K firmware as of 2021-03-23 needs this fix.
+	 * MCU: 20.02.19, FPGA: 67
+	 */
+	if (usb_match_one_id(stream->dev->intf, &elgato_cam_link_4k) &&
+	    ctrl->bmHint > 255) {
+		u8 corrected_format_index = ctrl->bmHint >> 8;
+
+		uvc_dbg(stream->dev, VIDEO,
+			"Correct USB video probe response from {bmHint: 0x%04x, bFormatIndex: %u} to {bmHint: 0x%04x, bFormatIndex: %u}\n",
+			ctrl->bmHint, ctrl->bFormatIndex,
+			1, corrected_format_index);
+		ctrl->bmHint = 1;
+		ctrl->bFormatIndex = corrected_format_index;
+	}
+
 	for (i = 0; i < stream->nformats; ++i) {
 		if (stream->format[i].index == ctrl->bFormatIndex) {
 			format = &stream->format[i];
-- 
GitLab


From 4c1daba15c209b99d192f147fea3dade30f72ed2 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 8 Jun 2021 12:55:12 +0100
Subject: [PATCH 2702/3804] perf/smmuv3: Don't trample existing events with
 global filter

With global filtering, we only allow an event to be scheduled if its
filter settings exactly match those of any existing events, therefore
it is pointless to reapply the filter in that case. Much worse, though,
is that in doing that we trample the event type of counter 0 if it's
already active, and never touch the appropriate PMEVTYPERn so the new
event is likely not counting the right thing either. Don't do that.

CC: stable@vger.kernel.org
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/32c80c0e46237f49ad8da0c9f8864e13c4a803aa.1623153312.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_smmuv3_pmu.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 7786ccc6d12f2..c195a9adec320 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -277,7 +277,7 @@ static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
 				       struct perf_event *event, int idx)
 {
 	u32 span, sid;
-	unsigned int num_ctrs = smmu_pmu->num_counters;
+	unsigned int cur_idx, num_ctrs = smmu_pmu->num_counters;
 	bool filter_en = !!get_filter_enable(event);
 
 	span = filter_en ? get_filter_span(event) :
@@ -285,17 +285,19 @@ static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
 	sid = filter_en ? get_filter_stream_id(event) :
 			   SMMU_PMCG_DEFAULT_FILTER_SID;
 
-	/* Support individual filter settings */
-	if (!smmu_pmu->global_filter) {
+	cur_idx = find_first_bit(smmu_pmu->used_counters, num_ctrs);
+	/*
+	 * Per-counter filtering, or scheduling the first globally-filtered
+	 * event into an empty PMU so idx == 0 and it works out equivalent.
+	 */
+	if (!smmu_pmu->global_filter || cur_idx == num_ctrs) {
 		smmu_pmu_set_event_filter(event, idx, span, sid);
 		return 0;
 	}
 
-	/* Requested settings same as current global settings*/
-	idx = find_first_bit(smmu_pmu->used_counters, num_ctrs);
-	if (idx == num_ctrs ||
-	    smmu_pmu_check_global_filter(smmu_pmu->events[idx], event)) {
-		smmu_pmu_set_event_filter(event, 0, span, sid);
+	/* Otherwise, must match whatever's currently scheduled */
+	if (smmu_pmu_check_global_filter(smmu_pmu->events[cur_idx], event)) {
+		smmu_pmu_set_evtyper(smmu_pmu, idx, get_event(event));
 		return 0;
 	}
 
-- 
GitLab


From f8e6d24144d1bfbb8714faa9044e135c0c00bd89 Mon Sep 17 00:00:00 2001
From: Qi Liu <liuqi115@huawei.com>
Date: Wed, 9 Jun 2021 14:40:57 +0800
Subject: [PATCH 2703/3804] perf: Add EVENT_ATTR_ID to simplify event
 attributes

Similar EVENT_ATTR macros are defined in many PMU drivers,
like Arm PMU driver, Arm SMMU PMU driver. So add a generic
macro to simplify code.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Link: https://lore.kernel.org/r/1623220863-58233-2-git-send-email-liuqi115@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/perf_event.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f5a6a2f069ed4..2d510ad750edc 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1576,6 +1576,12 @@ static struct perf_pmu_events_attr _var = {				    \
 	.event_str	= _str,						    \
 };
 
+#define PMU_EVENT_ATTR_ID(_name, _show, _id)				\
+	(&((struct perf_pmu_events_attr[]) {				\
+		{ .attr = __ATTR(_name, 0444, _show, NULL),		\
+		  .id = _id, }						\
+	})[0].attr.attr)
+
 #define PMU_FORMAT_ATTR(_name, _format)					\
 static ssize_t								\
 _name##_show(struct device *dev,					\
-- 
GitLab


From 7ac87a8dfbd9c42fa1920773b09a57586222aad4 Mon Sep 17 00:00:00 2001
From: Qi Liu <liuqi115@huawei.com>
Date: Wed, 9 Jun 2021 14:40:58 +0800
Subject: [PATCH 2704/3804] drivers/perf: Simplify EVENT ATTR macro in SMMU PMU
 driver

Use common macro PMU_EVENT_ATTR_ID to simplify SMMU_EVENT_ATTR

Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Link: https://lore.kernel.org/r/1623220863-58233-3-git-send-email-liuqi115@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm_smmuv3_pmu.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index c195a9adec320..226348822ab39 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -511,11 +511,8 @@ static ssize_t smmu_pmu_event_show(struct device *dev,
 	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
 }
 
-#define SMMU_EVENT_ATTR(name, config)					\
-	(&((struct perf_pmu_events_attr) {				\
-		.attr = __ATTR(name, 0444, smmu_pmu_event_show, NULL),	\
-		.id = config,						\
-	}).attr.attr)
+#define SMMU_EVENT_ATTR(name, config)			\
+	PMU_EVENT_ATTR_ID(name, smmu_pmu_event_show, config)
 
 static struct attribute *smmu_pmu_events[] = {
 	SMMU_EVENT_ATTR(cycles, 0),
-- 
GitLab


From 0bf2d7298842afbc28a5413024ebc444a599e980 Mon Sep 17 00:00:00 2001
From: Qi Liu <liuqi115@huawei.com>
Date: Wed, 9 Jun 2021 14:40:59 +0800
Subject: [PATCH 2705/3804] drivers/perf: Simplify EVENT ATTR macro in
 qcom_l2_pmu.c

Use common macro PMU_EVENT_ATTR_ID to simplify L2CACHE_EVENT_ATTR

Cc: Andy Gross <agross@kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Link: https://lore.kernel.org/r/1623220863-58233-4-git-send-email-liuqi115@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/qcom_l2_pmu.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index b60e30141583e..5b093badd0f65 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -679,11 +679,8 @@ static ssize_t l2cache_pmu_event_show(struct device *dev,
 	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
 }
 
-#define L2CACHE_EVENT_ATTR(_name, _id)					     \
-	(&((struct perf_pmu_events_attr[]) {				     \
-		{ .attr = __ATTR(_name, 0444, l2cache_pmu_event_show, NULL), \
-		  .id = _id, }						     \
-	})[0].attr.attr)
+#define L2CACHE_EVENT_ATTR(_name, _id)			    \
+	PMU_EVENT_ATTR_ID(_name, l2cache_pmu_event_show, _id)
 
 static struct attribute *l2_cache_pmu_events[] = {
 	L2CACHE_EVENT_ATTR(cycles, L2_EVENT_CYCLES),
-- 
GitLab


From 78b1d3c72070bbc9793e63dd6528c1e67ee0d52a Mon Sep 17 00:00:00 2001
From: Qi Liu <liuqi115@huawei.com>
Date: Wed, 9 Jun 2021 14:41:00 +0800
Subject: [PATCH 2706/3804] drivers/perf: Simplify EVENT ATTR macro in
 qcom_l3_pmu.c

Use common macro PMU_EVENT_ATTR_ID to simplify L3CACHE_EVENT_ATTR

Cc: Andy Gross <agross@kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Link: https://lore.kernel.org/r/1623220863-58233-5-git-send-email-liuqi115@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/qcom_l3_pmu.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c
index c76f6f21d2a80..1ff2ff6582bf8 100644
--- a/drivers/perf/qcom_l3_pmu.c
+++ b/drivers/perf/qcom_l3_pmu.c
@@ -647,10 +647,7 @@ static ssize_t l3cache_pmu_event_show(struct device *dev,
 }
 
 #define L3CACHE_EVENT_ATTR(_name, _id)					     \
-	(&((struct perf_pmu_events_attr[]) {				     \
-		{ .attr = __ATTR(_name, 0444, l3cache_pmu_event_show, NULL), \
-		  .id = _id, }						     \
-	})[0].attr.attr)
+	PMU_EVENT_ATTR_ID(_name, l3cache_pmu_event_show, _id)
 
 static struct attribute *qcom_l3_cache_pmu_events[] = {
 	L3CACHE_EVENT_ATTR(cycles, L3_EVENT_CYCLES),
-- 
GitLab


From b323dfe02e56627e4eaed7cf59dc609da67a1651 Mon Sep 17 00:00:00 2001
From: Qi Liu <liuqi115@huawei.com>
Date: Wed, 9 Jun 2021 14:41:01 +0800
Subject: [PATCH 2707/3804] drivers/perf: Simplify EVENT ATTR macro in
 xgene_pmu.c

Use common macro PMU_EVENT_ATTR_ID to simplify XGENE_PMU_EVENT_ATTR

Cc: Khuong Dinh <khuong@os.amperecomputing.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Link: https://lore.kernel.org/r/1623220863-58233-6-git-send-email-liuqi115@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/xgene_pmu.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 62d942534a6be..2b6d476bd2137 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -278,17 +278,14 @@ static const struct attribute_group mc_pmu_v3_format_attr_group = {
 static ssize_t xgene_pmu_event_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
-	struct dev_ext_attribute *eattr;
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_attr, attr);
 
-	eattr = container_of(attr, struct dev_ext_attribute, attr);
-	return sysfs_emit(buf, "config=0x%lx\n", (unsigned long) eattr->var);
+	return sysfs_emit(buf, "config=0x%llx\n", pmu_attr->id);
 }
 
 #define XGENE_PMU_EVENT_ATTR(_name, _config)		\
-	(&((struct dev_ext_attribute[]) {		\
-		{ .attr = __ATTR(_name, S_IRUGO, xgene_pmu_event_show, NULL), \
-		  .var = (void *) _config, }		\
-	 })[0].attr.attr)
+	PMU_EVENT_ATTR_ID(_name, xgene_pmu_event_show, _config)
 
 static struct attribute *l3c_pmu_events_attrs[] = {
 	XGENE_PMU_EVENT_ATTR(cycle-count,			0x00),
-- 
GitLab


From 773510f4d2775bda7cec585e8643f4269c4944e5 Mon Sep 17 00:00:00 2001
From: Qi Liu <liuqi115@huawei.com>
Date: Wed, 9 Jun 2021 14:41:02 +0800
Subject: [PATCH 2708/3804] drivers/perf: Simplify EVENT ATTR macro in
 fsl_imx8_ddr_perf.c

Use common macro PMU_EVENT_ATTR_ID to simplify IMX8_DDR_PMU_EVENT_ATTR

Reviewed by Frank Li <Frank .li@nxp.com>

Cc: Frank Li <Frank.li@nxp.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Link: https://lore.kernel.org/r/1623220863-58233-7-git-send-email-liuqi115@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/fsl_imx8_ddr_perf.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index df048fe42fc2a..2a1d78794a4e5 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -222,11 +222,8 @@ ddr_pmu_event_show(struct device *dev, struct device_attribute *attr,
 	return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
 }
 
-#define IMX8_DDR_PMU_EVENT_ATTR(_name, _id)				\
-	(&((struct perf_pmu_events_attr[]) {				\
-		{ .attr = __ATTR(_name, 0444, ddr_pmu_event_show, NULL),\
-		  .id = _id, }						\
-	})[0].attr.attr)
+#define IMX8_DDR_PMU_EVENT_ATTR(_name, _id)		\
+	PMU_EVENT_ATTR_ID(_name, ddr_pmu_event_show, _id)
 
 static struct attribute *ddr_perf_events_attrs[] = {
 	IMX8_DDR_PMU_EVENT_ATTR(cycles, EVENT_CYCLES_ID),
-- 
GitLab


From 64432f09068a0fa76f20918a3c22ee3484a3762d Mon Sep 17 00:00:00 2001
From: Qi Liu <liuqi115@huawei.com>
Date: Wed, 9 Jun 2021 14:41:03 +0800
Subject: [PATCH 2709/3804] arm64: perf: Simplify EVENT ATTR macro in
 perf_event.c

Use common macro PMU_EVENT_ATTR_ID to simplify ARMV8_EVENT_ATTR

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Qi Liu <liuqi115@huawei.com>
Link: https://lore.kernel.org/r/1623220863-58233-8-git-send-email-liuqi115@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/perf_event.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a661010308c05..d07788dad3882 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -165,10 +165,7 @@ armv8pmu_events_sysfs_show(struct device *dev,
 }
 
 #define ARMV8_EVENT_ATTR(name, config)						\
-	(&((struct perf_pmu_events_attr) {					\
-		.attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL),	\
-		.id = config,							\
-	}).attr.attr)
+	PMU_EVENT_ATTR_ID(name, armv8pmu_events_sysfs_show, config)
 
 static struct attribute *armv8_pmuv3_event_attrs[] = {
 	ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
-- 
GitLab


From 78b92c7337e10519312e8aab64d7a1651206bd61 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 9 Jun 2021 11:23:00 +0100
Subject: [PATCH 2710/3804] arm64: insn: decouple patching from insn code

Currently, <asm/insn.h> includes <asm/patching.h>. We intend that
<asm/insn.h> will be usable from userspace, so it doesn't make sense to
include headers for kernel-only features such as the patching routines,
and we'd intended to restrict <asm/insn.h> to instruction encoding
details.

Let's decouple the patching code from <asm/insn.h>, and explicitly
include <asm/patching.h> where it is needed. Since <asm/patching.h>
isn't included from assembly, we can drop the __ASSEMBLY__ guards.

At the same time, sort the kprobes includes so that it's easier to see
what is and isn't incldued.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210609102301.17332-2-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/insn.h      |  1 -
 arch/arm64/include/asm/patching.h  |  2 --
 arch/arm64/kernel/ftrace.c         |  1 +
 arch/arm64/kernel/jump_label.c     |  1 +
 arch/arm64/kernel/kgdb.c           |  1 +
 arch/arm64/kernel/patching.c       |  1 +
 arch/arm64/kernel/probes/kprobes.c | 18 ++++++++++--------
 arch/arm64/kernel/traps.c          |  1 +
 8 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 1ea9611545bb4..a6f3f45fc46f0 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -11,7 +11,6 @@
 #include <linux/types.h>
 
 #include <asm/alternative.h>
-#include <asm/patching.h>
 
 #ifndef __ASSEMBLY__
 /*
diff --git a/arch/arm64/include/asm/patching.h b/arch/arm64/include/asm/patching.h
index 5ebab129222f4..6bf5adc562950 100644
--- a/arch/arm64/include/asm/patching.h
+++ b/arch/arm64/include/asm/patching.h
@@ -4,12 +4,10 @@
 
 #include <linux/types.h>
 
-#ifndef __ASSEMBLY__
 int aarch64_insn_read(void *addr, u32 *insnp);
 int aarch64_insn_write(void *addr, u32 insn);
 
 int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
 int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
-#endif /* __ASSEMBLY__ */
 
 #endif	/* __ASM_PATCHING_H */
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index b5d3ddaf69d9e..7f467bd9db7a3 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -15,6 +15,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/ftrace.h>
 #include <asm/insn.h>
+#include <asm/patching.h>
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 /*
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index 9a8a0ae1e75f8..fc98037e12205 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/jump_label.h>
 #include <asm/insn.h>
+#include <asm/patching.h>
 
 void arch_jump_label_transform(struct jump_entry *entry,
 			       enum jump_label_type type)
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 1a157ca33262d..2aede780fb80c 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -17,6 +17,7 @@
 
 #include <asm/debug-monitors.h>
 #include <asm/insn.h>
+#include <asm/patching.h>
 #include <asm/traps.h>
 
 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index 9d050e33901b7..7aa55b33c8c75 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -9,6 +9,7 @@
 #include <asm/cacheflush.h>
 #include <asm/fixmap.h>
 #include <asm/kprobes.h>
+#include <asm/patching.h>
 #include <asm/sections.h>
 
 static DEFINE_RAW_SPINLOCK(patch_lock);
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index d607c99120252..609edde7a5dde 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -7,26 +7,28 @@
  * Copyright (C) 2013 Linaro Limited.
  * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org>
  */
+#include <linux/extable.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
-#include <linux/extable.h>
-#include <linux/slab.h>
-#include <linux/stop_machine.h>
 #include <linux/sched/debug.h>
 #include <linux/set_memory.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
 #include <linux/stringify.h>
+#include <linux/uaccess.h>
 #include <linux/vmalloc.h>
-#include <asm/traps.h>
-#include <asm/ptrace.h>
+
 #include <asm/cacheflush.h>
-#include <asm/debug-monitors.h>
 #include <asm/daifflags.h>
-#include <asm/system_misc.h>
+#include <asm/debug-monitors.h>
 #include <asm/insn.h>
-#include <linux/uaccess.h>
 #include <asm/irq.h>
+#include <asm/patching.h>
+#include <asm/ptrace.h>
 #include <asm/sections.h>
+#include <asm/system_misc.h>
+#include <asm/traps.h>
 
 #include "decode-insn.h"
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 9b683b2381cf1..48ff6fb888e0c 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -37,6 +37,7 @@
 #include <asm/exception.h>
 #include <asm/extable.h>
 #include <asm/kprobes.h>
+#include <asm/patching.h>
 #include <asm/traps.h>
 #include <asm/smp.h>
 #include <asm/stack_pointer.h>
-- 
GitLab


From 3e00e39d9dad48360ebd518726ebf81da1b84c10 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 9 Jun 2021 11:23:01 +0100
Subject: [PATCH 2711/3804] arm64: insn: move AARCH64_INSN_SIZE into
 <asm/insn.h>

For histroical reasons, we define AARCH64_INSN_SIZE in
<asm/alternative-macros.h>, but it would make more sense to do so in
<asm/insn.h>. Let's move it into <asm/insn.h>, and add the necessary
include directives for this.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210609102301.17332-3-mark.rutland@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/alternative-macros.h | 4 +---
 arch/arm64/include/asm/insn.h               | 3 +++
 arch/arm64/include/asm/kvm_asm.h            | 1 +
 arch/arm64/kernel/cpufeature.c              | 1 +
 arch/arm64/kernel/patching.c                | 1 +
 arch/arm64/kernel/traps.c                   | 1 +
 arch/arm64/net/bpf_jit_comp.c               | 1 +
 7 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 8a078fc662ac5..703fbf310b792 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -3,12 +3,10 @@
 #define __ASM_ALTERNATIVE_MACROS_H
 
 #include <asm/cpucaps.h>
+#include <asm/insn.h>
 
 #define ARM64_CB_PATCH ARM64_NCAPS
 
-/* A64 instructions are always 32 bits. */
-#define	AARCH64_INSN_SIZE		4
-
 #ifndef __ASSEMBLY__
 
 #include <linux/stringify.h>
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index a6f3f45fc46f0..1430b4973039a 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -12,6 +12,9 @@
 
 #include <asm/alternative.h>
 
+/* A64 instructions are always 32 bits. */
+#define	AARCH64_INSN_SIZE		4
+
 #ifndef __ASSEMBLY__
 /*
  * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index cf8df032b9c30..894edda8cc85a 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -8,6 +8,7 @@
 #define __ARM_KVM_ASM_H__
 
 #include <asm/hyp_image.h>
+#include <asm/insn.h>
 #include <asm/virt.h>
 
 #define ARM_EXIT_WITH_SERROR_BIT  31
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index efed2830d141f..16d35cfffcea8 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -76,6 +76,7 @@
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
 #include <asm/fpsimd.h>
+#include <asm/insn.h>
 #include <asm/kvm_host.h>
 #include <asm/mmu_context.h>
 #include <asm/mte.h>
diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c
index 7aa55b33c8c75..9a6edb9c48c7d 100644
--- a/arch/arm64/kernel/patching.c
+++ b/arch/arm64/kernel/patching.c
@@ -8,6 +8,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/fixmap.h>
+#include <asm/insn.h>
 #include <asm/kprobes.h>
 #include <asm/patching.h>
 #include <asm/sections.h>
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 48ff6fb888e0c..8f66072fa5cb2 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -36,6 +36,7 @@
 #include <asm/esr.h>
 #include <asm/exception.h>
 #include <asm/extable.h>
+#include <asm/insn.h>
 #include <asm/kprobes.h>
 #include <asm/patching.h>
 #include <asm/traps.h>
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f7b194878a99a..dd5000da18b89 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -16,6 +16,7 @@
 #include <asm/byteorder.h>
 #include <asm/cacheflush.h>
 #include <asm/debug-monitors.h>
+#include <asm/insn.h>
 #include <asm/set_memory.h>
 
 #include "bpf_jit.h"
-- 
GitLab


From 6cbf874e51b68e5b2eb0cc50be3676f5d5601dab Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Tue, 8 Jun 2021 11:45:12 +0000
Subject: [PATCH 2712/3804] KVM: arm64: Move hyp_pool locking out of refcount
 helpers

The hyp_page refcount helpers currently rely on the hyp_pool lock for
serialization. However, this means the refcounts can't be changed from
the buddy allocator core as it already holds the lock, which means pages
have to go through odd transient states.

For example, when a page is freed, its refcount is set to 0, and the
lock is transiently released before the page can be attached to a free
list in the buddy tree. This is currently harmless as the allocator
checks the list node of each page to see if it is available for
allocation or not, but it means the page refcount can't be trusted to
represent the state of the page even if the pool lock is held.

In order to fix this, remove the pool locking from the refcount helpers,
and move all the logic to the buddy allocator. This will simplify the
removal of the list node from struct hyp_page in a later patch.

Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210608114518.748712-2-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/gfp.h | 35 ----------------------
 arch/arm64/kvm/hyp/nvhe/page_alloc.c  | 43 ++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 46 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index 18a4494337bdd..f2c84e4fa40fa 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -22,41 +22,6 @@ struct hyp_pool {
 	unsigned int max_order;
 };
 
-static inline void hyp_page_ref_inc(struct hyp_page *p)
-{
-	struct hyp_pool *pool = hyp_page_to_pool(p);
-
-	hyp_spin_lock(&pool->lock);
-	p->refcount++;
-	hyp_spin_unlock(&pool->lock);
-}
-
-static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
-{
-	struct hyp_pool *pool = hyp_page_to_pool(p);
-	int ret;
-
-	hyp_spin_lock(&pool->lock);
-	p->refcount--;
-	ret = (p->refcount == 0);
-	hyp_spin_unlock(&pool->lock);
-
-	return ret;
-}
-
-static inline void hyp_set_page_refcounted(struct hyp_page *p)
-{
-	struct hyp_pool *pool = hyp_page_to_pool(p);
-
-	hyp_spin_lock(&pool->lock);
-	if (p->refcount) {
-		hyp_spin_unlock(&pool->lock);
-		BUG();
-	}
-	p->refcount = 1;
-	hyp_spin_unlock(&pool->lock);
-}
-
 /* Allocation */
 void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
 void hyp_get_page(void *addr);
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 237e03bf0cb11..d666f4789e319 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -93,15 +93,6 @@ static void __hyp_attach_page(struct hyp_pool *pool,
 	list_add_tail(&p->node, &pool->free_area[order]);
 }
 
-static void hyp_attach_page(struct hyp_page *p)
-{
-	struct hyp_pool *pool = hyp_page_to_pool(p);
-
-	hyp_spin_lock(&pool->lock);
-	__hyp_attach_page(pool, p);
-	hyp_spin_unlock(&pool->lock);
-}
-
 static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
 					   struct hyp_page *p,
 					   unsigned int order)
@@ -125,19 +116,49 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
 	return p;
 }
 
+static inline void hyp_page_ref_inc(struct hyp_page *p)
+{
+	p->refcount++;
+}
+
+static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+{
+	p->refcount--;
+	return (p->refcount == 0);
+}
+
+static inline void hyp_set_page_refcounted(struct hyp_page *p)
+{
+	BUG_ON(p->refcount);
+	p->refcount = 1;
+}
+
+/*
+ * Changes to the buddy tree and page refcounts must be done with the hyp_pool
+ * lock held. If a refcount change requires an update to the buddy tree (e.g.
+ * hyp_put_page()), both operations must be done within the same critical
+ * section to guarantee transient states (e.g. a page with null refcount but
+ * not yet attached to a free list) can't be observed by well-behaved readers.
+ */
 void hyp_put_page(void *addr)
 {
 	struct hyp_page *p = hyp_virt_to_page(addr);
+	struct hyp_pool *pool = hyp_page_to_pool(p);
 
+	hyp_spin_lock(&pool->lock);
 	if (hyp_page_ref_dec_and_test(p))
-		hyp_attach_page(p);
+		__hyp_attach_page(pool, p);
+	hyp_spin_unlock(&pool->lock);
 }
 
 void hyp_get_page(void *addr)
 {
 	struct hyp_page *p = hyp_virt_to_page(addr);
+	struct hyp_pool *pool = hyp_page_to_pool(p);
 
+	hyp_spin_lock(&pool->lock);
 	hyp_page_ref_inc(p);
+	hyp_spin_unlock(&pool->lock);
 }
 
 void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
@@ -159,8 +180,8 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
 	p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
 	p = __hyp_extract_page(pool, p, order);
 
-	hyp_spin_unlock(&pool->lock);
 	hyp_set_page_refcounted(p);
+	hyp_spin_unlock(&pool->lock);
 
 	return hyp_page_to_virt(p);
 }
-- 
GitLab


From 581982decc635c93934aaeb88d62c21238c63f11 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Tue, 8 Jun 2021 11:45:13 +0000
Subject: [PATCH 2713/3804] KVM: arm64: Use refcount at hyp to check page
 availability

The hyp buddy allocator currently checks the struct hyp_page list node
to see if a page is available for allocation or not when trying to
coalesce memory. Now that decrementing the refcount and attaching to
the buddy tree is done in the same critical section, we can rely on the
refcount of the buddy page to be in sync, which allows to replace the
list node check by a refcount check. This will ease removing the list
node from struct hyp_page later on.

Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210608114518.748712-3-qperret@google.com
---
 arch/arm64/kvm/hyp/nvhe/page_alloc.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index d666f4789e319..2602577daa000 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -55,7 +55,7 @@ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
 {
 	struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
 
-	if (!buddy || buddy->order != order || list_empty(&buddy->node))
+	if (!buddy || buddy->order != order || buddy->refcount)
 		return NULL;
 
 	return buddy;
@@ -133,6 +133,12 @@ static inline void hyp_set_page_refcounted(struct hyp_page *p)
 	p->refcount = 1;
 }
 
+static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
+{
+	if (hyp_page_ref_dec_and_test(p))
+		__hyp_attach_page(pool, p);
+}
+
 /*
  * Changes to the buddy tree and page refcounts must be done with the hyp_pool
  * lock held. If a refcount change requires an update to the buddy tree (e.g.
@@ -146,8 +152,7 @@ void hyp_put_page(void *addr)
 	struct hyp_pool *pool = hyp_page_to_pool(p);
 
 	hyp_spin_lock(&pool->lock);
-	if (hyp_page_ref_dec_and_test(p))
-		__hyp_attach_page(pool, p);
+	__hyp_put_page(pool, p);
 	hyp_spin_unlock(&pool->lock);
 }
 
@@ -202,15 +207,16 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
 
 	/* Init the vmemmap portion */
 	p = hyp_phys_to_page(phys);
-	memset(p, 0, sizeof(*p) * nr_pages);
 	for (i = 0; i < nr_pages; i++) {
 		p[i].pool = pool;
+		p[i].order = 0;
 		INIT_LIST_HEAD(&p[i].node);
+		hyp_set_page_refcounted(&p[i]);
 	}
 
 	/* Attach the unused pages to the buddy tree */
 	for (i = reserved_pages; i < nr_pages; i++)
-		__hyp_attach_page(pool, &p[i]);
+		__hyp_put_page(pool, &p[i]);
 
 	return 0;
 }
-- 
GitLab


From 914cde58a03cc5eef858db34687433e17d0e44be Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Tue, 8 Jun 2021 11:45:14 +0000
Subject: [PATCH 2714/3804] KVM: arm64: Remove list_head from hyp_page

The list_head member of struct hyp_page is only needed when the page is
attached to a free-list, which by definition implies the page is free.
As such, nothing prevents us from using the page itself to store the
list_head, hence reducing the size of the vmemmap.

Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210608114518.748712-4-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/memory.h |  1 -
 arch/arm64/kvm/hyp/nvhe/page_alloc.c     | 39 ++++++++++++++++++++----
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index fd78bde939ee7..7691ab495eb4c 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -12,7 +12,6 @@ struct hyp_page {
 	unsigned int refcount;
 	unsigned int order;
 	struct hyp_pool *pool;
-	struct list_head node;
 };
 
 extern u64 __hyp_vmemmap;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 2602577daa000..34f0eb026dd24 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -62,6 +62,34 @@ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
 
 }
 
+/*
+ * Pages that are available for allocation are tracked in free-lists, so we use
+ * the pages themselves to store the list nodes to avoid wasting space. As the
+ * allocator always returns zeroed pages (which are zeroed on the hyp_put_page()
+ * path to optimize allocation speed), we also need to clean-up the list node in
+ * each page when we take it out of the list.
+ */
+static inline void page_remove_from_list(struct hyp_page *p)
+{
+	struct list_head *node = hyp_page_to_virt(p);
+
+	__list_del_entry(node);
+	memset(node, 0, sizeof(*node));
+}
+
+static inline void page_add_to_list(struct hyp_page *p, struct list_head *head)
+{
+	struct list_head *node = hyp_page_to_virt(p);
+
+	INIT_LIST_HEAD(node);
+	list_add_tail(node, head);
+}
+
+static inline struct hyp_page *node_to_page(struct list_head *node)
+{
+	return hyp_virt_to_page(node);
+}
+
 static void __hyp_attach_page(struct hyp_pool *pool,
 			      struct hyp_page *p)
 {
@@ -83,14 +111,14 @@ static void __hyp_attach_page(struct hyp_pool *pool,
 			break;
 
 		/* Take the buddy out of its list, and coallesce with @p */
-		list_del_init(&buddy->node);
+		page_remove_from_list(buddy);
 		buddy->order = HYP_NO_ORDER;
 		p = min(p, buddy);
 	}
 
 	/* Mark the new head, and insert it */
 	p->order = order;
-	list_add_tail(&p->node, &pool->free_area[order]);
+	page_add_to_list(p, &pool->free_area[order]);
 }
 
 static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
@@ -99,7 +127,7 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
 {
 	struct hyp_page *buddy;
 
-	list_del_init(&p->node);
+	page_remove_from_list(p);
 	while (p->order > order) {
 		/*
 		 * The buddy of order n - 1 currently has HYP_NO_ORDER as it
@@ -110,7 +138,7 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
 		p->order--;
 		buddy = __find_buddy_nocheck(pool, p, p->order);
 		buddy->order = p->order;
-		list_add_tail(&buddy->node, &pool->free_area[buddy->order]);
+		page_add_to_list(buddy, &pool->free_area[buddy->order]);
 	}
 
 	return p;
@@ -182,7 +210,7 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
 	}
 
 	/* Extract it from the tree at the right order */
-	p = list_first_entry(&pool->free_area[i], struct hyp_page, node);
+	p = node_to_page(pool->free_area[i].next);
 	p = __hyp_extract_page(pool, p, order);
 
 	hyp_set_page_refcounted(p);
@@ -210,7 +238,6 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
 	for (i = 0; i < nr_pages; i++) {
 		p[i].pool = pool;
 		p[i].order = 0;
-		INIT_LIST_HEAD(&p[i].node);
 		hyp_set_page_refcounted(&p[i]);
 	}
 
-- 
GitLab


From 7c350ea39e53ade33ca7be00b0947f2b9f53dda0 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Tue, 8 Jun 2021 11:45:15 +0000
Subject: [PATCH 2715/3804] KVM: arm64: Unify MMIO and mem host stage-2 pools

We currently maintain two separate memory pools for the host stage-2,
one for pages used in the page-table when mapping memory regions, and
the other to map MMIO regions. The former is large enough to map all of
memory with page granularity and the latter can cover an arbitrary
portion of IPA space, but allows to 'recycle' pages.

However, this split makes accounting difficult to manage as pages at
intermediate levels of the page-table may be used to map both memory and
MMIO regions. Simplify the scheme by merging both pools into one. This
means we can now hit the -ENOMEM case in the memory abort path, but
we're still guaranteed forward-progress in the worst case by unmapping
MMIO regions. On the plus side this also means we can usually map a lot
more MMIO space at once if memory ranges happen to be mapped with block
mappings.

Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210608114518.748712-5-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +-
 arch/arm64/kvm/hyp/include/nvhe/mm.h          | 13 +++---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 46 ++++++++-----------
 arch/arm64/kvm/hyp/nvhe/setup.c               | 16 ++-----
 arch/arm64/kvm/hyp/reserved_mem.c             |  3 +-
 5 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 42d81ec739fac..9c227d87c36d3 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -23,7 +23,7 @@ extern struct host_kvm host_kvm;
 int __pkvm_prot_finalize(void);
 int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end);
 
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool);
+int kvm_host_prepare_stage2(void *pgt_pool_base);
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
 static __always_inline void __load_host_stage2(void)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 0095f62897429..8ec3a5a7744b8 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void)
 	return res;
 }
 
-static inline unsigned long host_s2_mem_pgtable_pages(void)
+static inline unsigned long host_s2_pgtable_pages(void)
 {
+	unsigned long res;
+
 	/*
 	 * Include an extra 16 pages to safely upper-bound the worst case of
 	 * concatenated pgds.
 	 */
-	return __hyp_pgtable_total_pages() + 16;
-}
+	res = __hyp_pgtable_total_pages() + 16;
 
-static inline unsigned long host_s2_dev_pgtable_pages(void)
-{
 	/* Allow 1 GiB for MMIO mappings */
-	return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+	res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT);
+
+	return res;
 }
 
 #endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 4b60c0056c041..c8ed7e86231b1 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -23,8 +23,7 @@
 extern unsigned long hyp_nr_cpus;
 struct host_kvm host_kvm;
 
-static struct hyp_pool host_s2_mem;
-static struct hyp_pool host_s2_dev;
+static struct hyp_pool host_s2_pool;
 
 /*
  * Copies of the host's CPU features registers holding sanitized values.
@@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1;
 
 static void *host_s2_zalloc_pages_exact(size_t size)
 {
-	return hyp_alloc_pages(&host_s2_mem, get_order(size));
+	return hyp_alloc_pages(&host_s2_pool, get_order(size));
 }
 
 static void *host_s2_zalloc_page(void *pool)
@@ -44,20 +43,14 @@ static void *host_s2_zalloc_page(void *pool)
 	return hyp_alloc_pages(pool, 0);
 }
 
-static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool)
+static int prepare_s2_pool(void *pgt_pool_base)
 {
 	unsigned long nr_pages, pfn;
 	int ret;
 
-	pfn = hyp_virt_to_pfn(mem_pgt_pool);
-	nr_pages = host_s2_mem_pgtable_pages();
-	ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0);
-	if (ret)
-		return ret;
-
-	pfn = hyp_virt_to_pfn(dev_pgt_pool);
-	nr_pages = host_s2_dev_pgtable_pages();
-	ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0);
+	pfn = hyp_virt_to_pfn(pgt_pool_base);
+	nr_pages = host_s2_pgtable_pages();
+	ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0);
 	if (ret)
 		return ret;
 
@@ -86,7 +79,7 @@ static void prepare_host_vtcr(void)
 					  id_aa64mmfr1_el1_sys_val, phys_shift);
 }
 
-int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
+int kvm_host_prepare_stage2(void *pgt_pool_base)
 {
 	struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
 	int ret;
@@ -94,7 +87,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
 	prepare_host_vtcr();
 	hyp_spin_lock_init(&host_kvm.lock);
 
-	ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool);
+	ret = prepare_s2_pool(pgt_pool_base);
 	if (ret)
 		return ret;
 
@@ -199,11 +192,10 @@ static bool range_is_memory(u64 start, u64 end)
 }
 
 static inline int __host_stage2_idmap(u64 start, u64 end,
-				      enum kvm_pgtable_prot prot,
-				      struct hyp_pool *pool)
+				      enum kvm_pgtable_prot prot)
 {
 	return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
-				      prot, pool);
+				      prot, &host_s2_pool);
 }
 
 static int host_stage2_idmap(u64 addr)
@@ -211,7 +203,6 @@ static int host_stage2_idmap(u64 addr)
 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W;
 	struct kvm_mem_range range;
 	bool is_memory = find_mem_range(addr, &range);
-	struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev;
 	int ret;
 
 	if (is_memory)
@@ -222,22 +213,21 @@ static int host_stage2_idmap(u64 addr)
 	if (ret)
 		goto unlock;
 
-	ret = __host_stage2_idmap(range.start, range.end, prot, pool);
-	if (is_memory || ret != -ENOMEM)
+	ret = __host_stage2_idmap(range.start, range.end, prot);
+	if (ret != -ENOMEM)
 		goto unlock;
 
 	/*
-	 * host_s2_mem has been provided with enough pages to cover all of
-	 * memory with page granularity, so we should never hit the ENOMEM case.
-	 * However, it is difficult to know how much of the MMIO range we will
-	 * need to cover upfront, so we may need to 'recycle' the pages if we
-	 * run out.
+	 * The pool has been provided with enough pages to cover all of memory
+	 * with page granularity, but it is difficult to know how much of the
+	 * MMIO range we will need to cover upfront, so we may need to 'recycle'
+	 * the pages if we run out.
 	 */
 	ret = host_stage2_unmap_dev_all();
 	if (ret)
 		goto unlock;
 
-	ret = __host_stage2_idmap(range.start, range.end, prot, pool);
+	ret = __host_stage2_idmap(range.start, range.end, prot);
 
 unlock:
 	hyp_spin_unlock(&host_kvm.lock);
@@ -258,7 +248,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
 
 	hyp_spin_lock(&host_kvm.lock);
 	ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start,
-					   &host_s2_mem, pkvm_hyp_id);
+					   &host_s2_pool, pkvm_hyp_id);
 	hyp_spin_unlock(&host_kvm.lock);
 
 	return ret != -EAGAIN ? ret : 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index a3d3a275344e9..1cff3259a4931 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus;
 
 static void *vmemmap_base;
 static void *hyp_pgt_base;
-static void *host_s2_mem_pgt_base;
-static void *host_s2_dev_pgt_base;
+static void *host_s2_pgt_base;
 static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
 
 static int divide_memory_pool(void *virt, unsigned long size)
@@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size)
 	if (!hyp_pgt_base)
 		return -ENOMEM;
 
-	nr_pages = host_s2_mem_pgtable_pages();
-	host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages);
-	if (!host_s2_mem_pgt_base)
-		return -ENOMEM;
-
-	nr_pages = host_s2_dev_pgtable_pages();
-	host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages);
-	if (!host_s2_dev_pgt_base)
+	nr_pages = host_s2_pgtable_pages();
+	host_s2_pgt_base = hyp_early_alloc_contig(nr_pages);
+	if (!host_s2_pgt_base)
 		return -ENOMEM;
 
 	return 0;
@@ -158,7 +152,7 @@ void __noreturn __pkvm_init_finalise(void)
 	if (ret)
 		goto out;
 
-	ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base);
+	ret = kvm_host_prepare_stage2(host_s2_pgt_base);
 	if (ret)
 		goto out;
 
diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c
index 83ca23ac259b1..d654921dd09b5 100644
--- a/arch/arm64/kvm/hyp/reserved_mem.c
+++ b/arch/arm64/kvm/hyp/reserved_mem.c
@@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void)
 	}
 
 	hyp_mem_pages += hyp_s1_pgtable_pages();
-	hyp_mem_pages += host_s2_mem_pgtable_pages();
-	hyp_mem_pages += host_s2_dev_pgtable_pages();
+	hyp_mem_pages += host_s2_pgtable_pages();
 
 	/*
 	 * The hyp_vmemmap needs to be backed by pages, but these pages
-- 
GitLab


From d978b9cfe6fe8008467f8c5d51677f52e7815b39 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Tue, 8 Jun 2021 11:45:16 +0000
Subject: [PATCH 2716/3804] KVM: arm64: Remove hyp_pool pointer from struct
 hyp_page

Each struct hyp_page currently contains a pointer to a hyp_pool struct
where the page should be freed if its refcount reaches 0. However, this
information can always be inferred from the context in the EL2 code, so
drop the pointer to save a few bytes in the vmemmap.

Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210608114518.748712-6-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/gfp.h    |  4 ++--
 arch/arm64/kvm/hyp/include/nvhe/memory.h |  2 --
 arch/arm64/kvm/hyp/nvhe/mem_protect.c    | 14 ++++++++++++--
 arch/arm64/kvm/hyp/nvhe/page_alloc.c     |  7 ++-----
 arch/arm64/kvm/hyp/nvhe/setup.c          | 14 ++++++++++++--
 5 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index f2c84e4fa40fa..3ea7bfb6c380a 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -24,8 +24,8 @@ struct hyp_pool {
 
 /* Allocation */
 void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
-void hyp_get_page(void *addr);
-void hyp_put_page(void *addr);
+void hyp_get_page(struct hyp_pool *pool, void *addr);
+void hyp_put_page(struct hyp_pool *pool, void *addr);
 
 /* Used pages cannot be freed */
 int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index 7691ab495eb4c..991636be2f466 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -7,11 +7,9 @@
 
 #include <linux/types.h>
 
-struct hyp_pool;
 struct hyp_page {
 	unsigned int refcount;
 	unsigned int order;
-	struct hyp_pool *pool;
 };
 
 extern u64 __hyp_vmemmap;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index c8ed7e86231b1..d938ce95d3bdf 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -43,6 +43,16 @@ static void *host_s2_zalloc_page(void *pool)
 	return hyp_alloc_pages(pool, 0);
 }
 
+static void host_s2_get_page(void *addr)
+{
+	hyp_get_page(&host_s2_pool, addr);
+}
+
+static void host_s2_put_page(void *addr)
+{
+	hyp_put_page(&host_s2_pool, addr);
+}
+
 static int prepare_s2_pool(void *pgt_pool_base)
 {
 	unsigned long nr_pages, pfn;
@@ -60,8 +70,8 @@ static int prepare_s2_pool(void *pgt_pool_base)
 		.phys_to_virt = hyp_phys_to_virt,
 		.virt_to_phys = hyp_virt_to_phys,
 		.page_count = hyp_page_count,
-		.get_page = hyp_get_page,
-		.put_page = hyp_put_page,
+		.get_page = host_s2_get_page,
+		.put_page = host_s2_put_page,
 	};
 
 	return 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 34f0eb026dd24..e3689def70335 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -174,20 +174,18 @@ static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
  * section to guarantee transient states (e.g. a page with null refcount but
  * not yet attached to a free list) can't be observed by well-behaved readers.
  */
-void hyp_put_page(void *addr)
+void hyp_put_page(struct hyp_pool *pool, void *addr)
 {
 	struct hyp_page *p = hyp_virt_to_page(addr);
-	struct hyp_pool *pool = hyp_page_to_pool(p);
 
 	hyp_spin_lock(&pool->lock);
 	__hyp_put_page(pool, p);
 	hyp_spin_unlock(&pool->lock);
 }
 
-void hyp_get_page(void *addr)
+void hyp_get_page(struct hyp_pool *pool, void *addr)
 {
 	struct hyp_page *p = hyp_virt_to_page(addr);
-	struct hyp_pool *pool = hyp_page_to_pool(p);
 
 	hyp_spin_lock(&pool->lock);
 	hyp_page_ref_inc(p);
@@ -236,7 +234,6 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
 	/* Init the vmemmap portion */
 	p = hyp_phys_to_page(phys);
 	for (i = 0; i < nr_pages; i++) {
-		p[i].pool = pool;
 		p[i].order = 0;
 		hyp_set_page_refcounted(&p[i]);
 	}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 1cff3259a4931..f834833ac9210 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -137,6 +137,16 @@ static void *hyp_zalloc_hyp_page(void *arg)
 	return hyp_alloc_pages(&hpool, 0);
 }
 
+static void hpool_get_page(void *addr)
+{
+	hyp_get_page(&hpool, addr);
+}
+
+static void hpool_put_page(void *addr)
+{
+	hyp_put_page(&hpool, addr);
+}
+
 void __noreturn __pkvm_init_finalise(void)
 {
 	struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -160,8 +170,8 @@ void __noreturn __pkvm_init_finalise(void)
 		.zalloc_page = hyp_zalloc_hyp_page,
 		.phys_to_virt = hyp_phys_to_virt,
 		.virt_to_phys = hyp_virt_to_phys,
-		.get_page = hyp_get_page,
-		.put_page = hyp_put_page,
+		.get_page = hpool_get_page,
+		.put_page = hpool_put_page,
 	};
 	pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
 
-- 
GitLab


From 87ec0606733e1aa9568f54ddb41f03aa6b5687f2 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Tue, 8 Jun 2021 11:45:17 +0000
Subject: [PATCH 2717/3804] KVM: arm64: Use less bits for hyp_page order

The hyp_page order is currently encoded on 4 bytes even though it is
guaranteed to be smaller than this. Make it 2 bytes to reduce the hyp
vmemmap overhead.

Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210608114518.748712-7-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/gfp.h    |  6 +++---
 arch/arm64/kvm/hyp/include/nvhe/memory.h |  2 +-
 arch/arm64/kvm/hyp/nvhe/page_alloc.c     | 12 ++++++------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
index 3ea7bfb6c380a..fb0f523d14921 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h
@@ -7,7 +7,7 @@
 #include <nvhe/memory.h>
 #include <nvhe/spinlock.h>
 
-#define HYP_NO_ORDER	UINT_MAX
+#define HYP_NO_ORDER	USHRT_MAX
 
 struct hyp_pool {
 	/*
@@ -19,11 +19,11 @@ struct hyp_pool {
 	struct list_head free_area[MAX_ORDER];
 	phys_addr_t range_start;
 	phys_addr_t range_end;
-	unsigned int max_order;
+	unsigned short max_order;
 };
 
 /* Allocation */
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order);
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
 void hyp_get_page(struct hyp_pool *pool, void *addr);
 void hyp_put_page(struct hyp_pool *pool, void *addr);
 
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index 991636be2f466..3fe34fa30ea47 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -9,7 +9,7 @@
 
 struct hyp_page {
 	unsigned int refcount;
-	unsigned int order;
+	unsigned short order;
 };
 
 extern u64 __hyp_vmemmap;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index e3689def70335..be07055bbc10f 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -32,7 +32,7 @@ u64 __hyp_vmemmap;
  */
 static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
 					     struct hyp_page *p,
-					     unsigned int order)
+					     unsigned short order)
 {
 	phys_addr_t addr = hyp_page_to_phys(p);
 
@@ -51,7 +51,7 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool,
 /* Find a buddy page currently available for allocation */
 static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool,
 					   struct hyp_page *p,
-					   unsigned int order)
+					   unsigned short order)
 {
 	struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order);
 
@@ -93,7 +93,7 @@ static inline struct hyp_page *node_to_page(struct list_head *node)
 static void __hyp_attach_page(struct hyp_pool *pool,
 			      struct hyp_page *p)
 {
-	unsigned int order = p->order;
+	unsigned short order = p->order;
 	struct hyp_page *buddy;
 
 	memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
@@ -123,7 +123,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
 
 static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
 					   struct hyp_page *p,
-					   unsigned int order)
+					   unsigned short order)
 {
 	struct hyp_page *buddy;
 
@@ -192,9 +192,9 @@ void hyp_get_page(struct hyp_pool *pool, void *addr)
 	hyp_spin_unlock(&pool->lock);
 }
 
-void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order)
+void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order)
 {
-	unsigned int i = order;
+	unsigned short i = order;
 	struct hyp_page *p;
 
 	hyp_spin_lock(&pool->lock);
-- 
GitLab


From 6929586d8eddad184f43526efe7bf0a8be4f18b2 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Tue, 8 Jun 2021 11:45:18 +0000
Subject: [PATCH 2718/3804] KVM: arm64: Use less bits for hyp_page refcount

The hyp_page refcount is currently encoded on 4 bytes even though we
never need to count that many objects in a page. Make it 2 bytes to save
some space in the vmemmap.

As overflows are more likely to happen as well, make sure to catch those
with a BUG in the increment function.

Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210608114518.748712-8-qperret@google.com
---
 arch/arm64/kvm/hyp/include/nvhe/memory.h | 2 +-
 arch/arm64/kvm/hyp/nvhe/page_alloc.c     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index 3fe34fa30ea47..592b7edb3edb4 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -8,7 +8,7 @@
 #include <linux/types.h>
 
 struct hyp_page {
-	unsigned int refcount;
+	unsigned short refcount;
 	unsigned short order;
 };
 
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index be07055bbc10f..41fc25bdfb346 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -146,6 +146,7 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
 
 static inline void hyp_page_ref_inc(struct hyp_page *p)
 {
+	BUG_ON(p->refcount == USHRT_MAX);
 	p->refcount++;
 }
 
-- 
GitLab


From 930a58b4093ebd2a036a0d448a2047477ef90d26 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 8 Jun 2021 19:02:54 +0100
Subject: [PATCH 2719/3804] arm64: cpuinfo: Split AArch32 registers out into a
 separate struct

In preparation for late initialisation of the "sanitised" AArch32 register
state, move the AArch32 registers out of 'struct cpuinfo' and into their
own struct definition.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210608180313.11502-2-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpu.h   | 46 +++++++++++----------
 arch/arm64/kernel/cpufeature.c | 74 ++++++++++++++++++----------------
 arch/arm64/kernel/cpuinfo.c    | 53 ++++++++++++------------
 3 files changed, 92 insertions(+), 81 deletions(-)

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 9088e72c7cf66..0f6d16faa5402 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -12,27 +12,7 @@
 /*
  * Records attributes of an individual CPU.
  */
-struct cpuinfo_arm64 {
-	struct cpu	cpu;
-	struct kobject	kobj;
-	u64		reg_ctr;
-	u64		reg_cntfrq;
-	u64		reg_dczid;
-	u64		reg_midr;
-	u64		reg_revidr;
-	u64		reg_gmid;
-
-	u64		reg_id_aa64dfr0;
-	u64		reg_id_aa64dfr1;
-	u64		reg_id_aa64isar0;
-	u64		reg_id_aa64isar1;
-	u64		reg_id_aa64mmfr0;
-	u64		reg_id_aa64mmfr1;
-	u64		reg_id_aa64mmfr2;
-	u64		reg_id_aa64pfr0;
-	u64		reg_id_aa64pfr1;
-	u64		reg_id_aa64zfr0;
-
+struct cpuinfo_32bit {
 	u32		reg_id_dfr0;
 	u32		reg_id_dfr1;
 	u32		reg_id_isar0;
@@ -55,6 +35,30 @@ struct cpuinfo_arm64 {
 	u32		reg_mvfr0;
 	u32		reg_mvfr1;
 	u32		reg_mvfr2;
+};
+
+struct cpuinfo_arm64 {
+	struct cpu	cpu;
+	struct kobject	kobj;
+	u64		reg_ctr;
+	u64		reg_cntfrq;
+	u64		reg_dczid;
+	u64		reg_midr;
+	u64		reg_revidr;
+	u64		reg_gmid;
+
+	u64		reg_id_aa64dfr0;
+	u64		reg_id_aa64dfr1;
+	u64		reg_id_aa64isar0;
+	u64		reg_id_aa64isar1;
+	u64		reg_id_aa64mmfr0;
+	u64		reg_id_aa64mmfr1;
+	u64		reg_id_aa64mmfr2;
+	u64		reg_id_aa64pfr0;
+	u64		reg_id_aa64pfr1;
+	u64		reg_id_aa64zfr0;
+
+	struct cpuinfo_32bit	aarch32;
 
 	/* pseudo-ZCR for recording maximum ZCR_EL1 LEN value: */
 	u64		reg_zcr;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0645300cc1a8a..33e5330ab15b5 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -871,6 +871,31 @@ static void __init init_cpu_hwcaps_indirect_list(void)
 
 static void __init setup_boot_cpu_capabilities(void);
 
+static void __init init_32bit_cpu_features(struct cpuinfo_32bit *info)
+{
+	init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
+	init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
+	init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
+	init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
+	init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
+	init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
+	init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
+	init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
+	init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6);
+	init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
+	init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
+	init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
+	init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
+	init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4);
+	init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5);
+	init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
+	init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
+	init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2);
+	init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
+	init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
+	init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
+}
+
 void __init init_cpu_features(struct cpuinfo_arm64 *info)
 {
 	/* Before we start using the tables, make sure it is sorted */
@@ -890,29 +915,8 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 	init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
 	init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0);
 
-	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
-		init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
-		init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
-		init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0);
-		init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1);
-		init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2);
-		init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3);
-		init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4);
-		init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5);
-		init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6);
-		init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0);
-		init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1);
-		init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2);
-		init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3);
-		init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4);
-		init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5);
-		init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0);
-		init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1);
-		init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2);
-		init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0);
-		init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1);
-		init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2);
-	}
+	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
+		init_32bit_cpu_features(&info->aarch32);
 
 	if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) {
 		init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr);
@@ -986,20 +990,12 @@ static void relax_cpu_ftr_reg(u32 sys_id, int field)
 	WARN_ON(!ftrp->width);
 }
 
-static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info,
-				     struct cpuinfo_arm64 *boot)
+static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info,
+				     struct cpuinfo_32bit *boot)
 {
 	int taint = 0;
 	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
 
-	/*
-	 * If we don't have AArch32 at all then skip the checks entirely
-	 * as the register values may be UNKNOWN and we're not going to be
-	 * using them for anything.
-	 */
-	if (!id_aa64pfr0_32bit_el0(pfr0))
-		return taint;
-
 	/*
 	 * If we don't have AArch32 at EL1, then relax the strictness of
 	 * EL1-dependent register fields to avoid spurious sanity check fails.
@@ -1151,15 +1147,23 @@ void update_cpu_features(int cpu,
 	 * value is the same on all CPUs.
 	 */
 	if (IS_ENABLED(CONFIG_ARM64_MTE) &&
-	    id_aa64pfr1_mte(info->reg_id_aa64pfr1))
+	    id_aa64pfr1_mte(info->reg_id_aa64pfr1)) {
 		taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu,
 					      info->reg_gmid, boot->reg_gmid);
+	}
 
 	/*
+	 * If we don't have AArch32 at all then skip the checks entirely
+	 * as the register values may be UNKNOWN and we're not going to be
+	 * using them for anything.
+	 *
 	 * This relies on a sanitised view of the AArch64 ID registers
 	 * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last.
 	 */
-	taint |= update_32bit_cpu_features(cpu, info, boot);
+	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+		taint |= update_32bit_cpu_features(cpu, &info->aarch32,
+						   &boot->aarch32);
+	}
 
 	/*
 	 * Mismatched CPU features are a recipe for disaster. Don't even
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 5321b82185912..87731fea5e418 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -344,6 +344,32 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info)
 	pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu);
 }
 
+static void __cpuinfo_store_cpu_32bit(struct cpuinfo_32bit *info)
+{
+	info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
+	info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1);
+	info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
+	info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
+	info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
+	info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
+	info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
+	info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
+	info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1);
+	info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
+	info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
+	info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
+	info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
+	info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1);
+	info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1);
+	info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
+	info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+	info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1);
+
+	info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
+	info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
+	info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+}
+
 static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 {
 	info->reg_cntfrq = arch_timer_get_cntfrq();
@@ -374,31 +400,8 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	if (id_aa64pfr1_mte(info->reg_id_aa64pfr1))
 		info->reg_gmid = read_cpuid(GMID_EL1);
 
-	/* Update the 32bit ID registers only if AArch32 is implemented */
-	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
-		info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
-		info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1);
-		info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
-		info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
-		info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
-		info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
-		info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
-		info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
-		info->reg_id_isar6 = read_cpuid(ID_ISAR6_EL1);
-		info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
-		info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
-		info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
-		info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
-		info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1);
-		info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1);
-		info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
-		info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
-		info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1);
-
-		info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
-		info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
-		info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
-	}
+	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0))
+		__cpuinfo_store_cpu_32bit(&info->aarch32);
 
 	if (IS_ENABLED(CONFIG_ARM64_SVE) &&
 	    id_aa64pfr0_sve(info->reg_id_aa64pfr0))
-- 
GitLab


From 2122a833316f2f3f6ddc78429fa67ef6d3c86636 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 8 Jun 2021 19:02:55 +0100
Subject: [PATCH 2720/3804] arm64: Allow mismatched 32-bit EL0 support

When confronted with a mixture of CPUs, some of which support 32-bit
applications and others which don't, we quite sensibly treat the system
as 64-bit only for userspace and prevent execve() of 32-bit binaries.

Unfortunately, some crazy folks have decided to build systems like this
with the intention of running 32-bit applications, so relax our
sanitisation logic to continue to advertise 32-bit support to userspace
on these systems and track the real 32-bit capable cores in a cpumask
instead. For now, the default behaviour remains but will be tied to
a command-line option in a later patch.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210608180313.11502-3-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/cpufeature.h |   8 +-
 arch/arm64/kernel/cpufeature.c      | 114 ++++++++++++++++++++++++----
 arch/arm64/tools/cpucaps            |   3 +-
 3 files changed, 110 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 650de920e0679..9bb9d11750d72 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -637,9 +637,15 @@ static inline bool cpu_supports_mixed_endian_el0(void)
 	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
 }
 
+const struct cpumask *system_32bit_el0_cpumask(void);
+DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
+
 static inline bool system_supports_32bit_el0(void)
 {
-	return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
+	u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+	return static_branch_unlikely(&arm64_mismatched_32bit_el0) ||
+	       id_aa64pfr0_32bit_el0(pfr0);
 }
 
 static inline bool system_supports_4kb_granule(void)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 33e5330ab15b5..52389018ff335 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -107,6 +107,24 @@ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
 bool arm64_use_ng_mappings = false;
 EXPORT_SYMBOL(arm64_use_ng_mappings);
 
+/*
+ * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs
+ * support it?
+ */
+static bool __read_mostly allow_mismatched_32bit_el0;
+
+/*
+ * Static branch enabled only if allow_mismatched_32bit_el0 is set and we have
+ * seen at least one CPU capable of 32-bit EL0.
+ */
+DEFINE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0);
+
+/*
+ * Mask of CPUs supporting 32-bit EL0.
+ * Only valid if arm64_mismatched_32bit_el0 is enabled.
+ */
+static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly;
+
 /*
  * Flag to indicate if we have computed the system wide
  * capabilities based on the boot time active CPUs. This
@@ -775,7 +793,7 @@ static void __init sort_ftr_regs(void)
  * Any bits that are not covered by an arm64_ftr_bits entry are considered
  * RES0 for the system-wide value, and must strictly match.
  */
-static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
+static void init_cpu_ftr_reg(u32 sys_reg, u64 new)
 {
 	u64 val = 0;
 	u64 strict_mask = ~0x0ULL;
@@ -871,7 +889,7 @@ static void __init init_cpu_hwcaps_indirect_list(void)
 
 static void __init setup_boot_cpu_capabilities(void);
 
-static void __init init_32bit_cpu_features(struct cpuinfo_32bit *info)
+static void init_32bit_cpu_features(struct cpuinfo_32bit *info)
 {
 	init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
 	init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1);
@@ -990,6 +1008,22 @@ static void relax_cpu_ftr_reg(u32 sys_id, int field)
 	WARN_ON(!ftrp->width);
 }
 
+static void lazy_init_32bit_cpu_features(struct cpuinfo_arm64 *info,
+					 struct cpuinfo_arm64 *boot)
+{
+	static bool boot_cpu_32bit_regs_overridden = false;
+
+	if (!allow_mismatched_32bit_el0 || boot_cpu_32bit_regs_overridden)
+		return;
+
+	if (id_aa64pfr0_32bit_el0(boot->reg_id_aa64pfr0))
+		return;
+
+	boot->aarch32 = info->aarch32;
+	init_32bit_cpu_features(&boot->aarch32);
+	boot_cpu_32bit_regs_overridden = true;
+}
+
 static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info,
 				     struct cpuinfo_32bit *boot)
 {
@@ -1161,6 +1195,7 @@ void update_cpu_features(int cpu,
 	 * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last.
 	 */
 	if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) {
+		lazy_init_32bit_cpu_features(info, boot);
 		taint |= update_32bit_cpu_features(cpu, &info->aarch32,
 						   &boot->aarch32);
 	}
@@ -1273,6 +1308,28 @@ has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope)
 	return feature_matches(val, entry);
 }
 
+const struct cpumask *system_32bit_el0_cpumask(void)
+{
+	if (!system_supports_32bit_el0())
+		return cpu_none_mask;
+
+	if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+		return cpu_32bit_el0_mask;
+
+	return cpu_possible_mask;
+}
+
+static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope)
+{
+	if (!has_cpuid_feature(entry, scope))
+		return allow_mismatched_32bit_el0;
+
+	if (scope == SCOPE_SYSTEM)
+		pr_info("detected: 32-bit EL0 Support\n");
+
+	return true;
+}
+
 static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope)
 {
 	bool has_sre;
@@ -1891,10 +1948,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = cpu_copy_el2regs,
 	},
 	{
-		.desc = "32-bit EL0 Support",
-		.capability = ARM64_HAS_32BIT_EL0,
+		.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
-		.matches = has_cpuid_feature,
+		.matches = has_32bit_el0,
 		.sys_reg = SYS_ID_AA64PFR0_EL1,
 		.sign = FTR_UNSIGNED,
 		.field_pos = ID_AA64PFR0_EL0_SHIFT,
@@ -2403,7 +2459,7 @@ static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = {
 	{},
 };
 
-static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
+static void cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
 {
 	switch (cap->hwcap_type) {
 	case CAP_HWCAP:
@@ -2448,7 +2504,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
 	return rc;
 }
 
-static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
+static void setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
 {
 	/* We support emulation of accesses to CPU ID feature registers */
 	cpu_set_named_feature(CPUID);
@@ -2623,7 +2679,7 @@ static void check_early_cpu_features(void)
 }
 
 static void
-verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
+__verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
 {
 
 	for (; caps->matches; caps++)
@@ -2634,6 +2690,14 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
 		}
 }
 
+static void verify_local_elf_hwcaps(void)
+{
+	__verify_local_elf_hwcaps(arm64_elf_hwcaps);
+
+	if (id_aa64pfr0_32bit_el0(read_cpuid(ID_AA64PFR0_EL1)))
+		__verify_local_elf_hwcaps(compat_elf_hwcaps);
+}
+
 static void verify_sve_features(void)
 {
 	u64 safe_zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1);
@@ -2698,11 +2762,7 @@ static void verify_local_cpu_capabilities(void)
 	 * on all secondary CPUs.
 	 */
 	verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU);
-
-	verify_local_elf_hwcaps(arm64_elf_hwcaps);
-
-	if (system_supports_32bit_el0())
-		verify_local_elf_hwcaps(compat_elf_hwcaps);
+	verify_local_elf_hwcaps();
 
 	if (system_supports_sve())
 		verify_sve_features();
@@ -2837,6 +2897,34 @@ void __init setup_cpu_features(void)
 			ARCH_DMA_MINALIGN);
 }
 
+static int enable_mismatched_32bit_el0(unsigned int cpu)
+{
+	struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
+	bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
+
+	if (cpu_32bit) {
+		cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
+		static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0);
+		setup_elf_hwcaps(compat_elf_hwcaps);
+	}
+
+	return 0;
+}
+
+static int __init init_32bit_el0_mask(void)
+{
+	if (!allow_mismatched_32bit_el0)
+		return 0;
+
+	if (!zalloc_cpumask_var(&cpu_32bit_el0_mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+				 "arm64/mismatched_32bit_el0:online",
+				 enable_mismatched_32bit_el0, NULL);
+}
+subsys_initcall_sync(init_32bit_el0_mask);
+
 static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
 {
 	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 21fbdda7086e2..49305c2e6dfd3 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -3,7 +3,8 @@
 # Internal CPU capabilities constants, keep this list sorted
 
 BTI
-HAS_32BIT_EL0
+# Unreliable: use system_supports_32bit_el0() instead.
+HAS_32BIT_EL0_DO_NOT_USE
 HAS_32BIT_EL1
 HAS_ADDRESS_AUTH
 HAS_ADDRESS_AUTH_ARCH
-- 
GitLab


From 2f6a49bbc01da17867c26f6f650b1142e1d7c69d Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 8 Jun 2021 19:02:56 +0100
Subject: [PATCH 2721/3804] KVM: arm64: Kill 32-bit vCPUs on systems with
 mismatched EL0 support

If a vCPU is caught running 32-bit code on a system with mismatched
support at EL0, then we should kill it.

Acked-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210608180313.11502-4-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kvm/arm.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 1cb39c0803a44..5bdba97a76544 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -692,6 +692,15 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
 	}
 }
 
+static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
+{
+	if (likely(!vcpu_mode_is_32bit(vcpu)))
+		return false;
+
+	return !system_supports_32bit_el0() ||
+		static_branch_unlikely(&arm64_mismatched_32bit_el0);
+}
+
 /**
  * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
  * @vcpu:	The VCPU pointer
@@ -875,7 +884,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 * with the asymmetric AArch32 case), return to userspace with
 		 * a fatal error.
 		 */
-		if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) {
+		if (vcpu_mode_is_bad_32bit(vcpu)) {
 			/*
 			 * As we have caught the guest red-handed, decide that
 			 * it isn't fit for purpose anymore by making the vcpu
-- 
GitLab


From 873c3e89777c8c56f936ae7aceca1a102aac6b9e Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 8 Jun 2021 19:02:57 +0100
Subject: [PATCH 2722/3804] arm64: Kill 32-bit applications scheduled on
 64-bit-only CPUs

Scheduling a 32-bit application on a 64-bit-only CPU is a bad idea.

Ensure that 32-bit applications always take the slow-path when returning
to userspace on a system with mismatched support at EL0, so that we can
avoid trying to run on a 64-bit-only CPU and force a SIGKILL instead.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210608180313.11502-5-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/process.c | 19 ++++++++++++++++++-
 arch/arm64/kernel/signal.c  | 26 ++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b4bb67f17a2ca..f4a91bf1ce0c0 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -527,6 +527,15 @@ static void erratum_1418040_thread_switch(struct task_struct *prev,
 	write_sysreg(val, cntkctl_el1);
 }
 
+static void compat_thread_switch(struct task_struct *next)
+{
+	if (!is_compat_thread(task_thread_info(next)))
+		return;
+
+	if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+		set_tsk_thread_flag(next, TIF_NOTIFY_RESUME);
+}
+
 static void update_sctlr_el1(u64 sctlr)
 {
 	/*
@@ -568,6 +577,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	ssbs_thread_switch(next);
 	erratum_1418040_thread_switch(prev, next);
 	ptrauth_thread_switch_user(next);
+	compat_thread_switch(next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
@@ -633,8 +643,15 @@ unsigned long arch_align_stack(unsigned long sp)
  */
 void arch_setup_new_exec(void)
 {
-	current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
+	unsigned long mmflags = 0;
+
+	if (is_compat_task()) {
+		mmflags = MMCF_AARCH32;
+		if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
+			set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
+	}
 
+	current->mm->context.flags = mmflags;
 	ptrauth_thread_init_user();
 	mte_thread_init_user();
 
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 6237486ff6bb7..f8192f4ae0b8a 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -911,6 +911,19 @@ static void do_signal(struct pt_regs *regs)
 	restore_saved_sigmask();
 }
 
+static bool cpu_affinity_invalid(struct pt_regs *regs)
+{
+	if (!compat_user_mode(regs))
+		return false;
+
+	/*
+	 * We're preemptible, but a reschedule will cause us to check the
+	 * affinity again.
+	 */
+	return !cpumask_test_cpu(raw_smp_processor_id(),
+				 system_32bit_el0_cpumask());
+}
+
 asmlinkage void do_notify_resume(struct pt_regs *regs,
 				 unsigned long thread_flags)
 {
@@ -938,6 +951,19 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 			if (thread_flags & _TIF_NOTIFY_RESUME) {
 				tracehook_notify_resume(regs);
 				rseq_handle_notify_resume(NULL, regs);
+
+				/*
+				 * If we reschedule after checking the affinity
+				 * then we must ensure that TIF_NOTIFY_RESUME
+				 * is set so that we check the affinity again.
+				 * Since tracehook_notify_resume() clears the
+				 * flag, ensure that the compiler doesn't move
+				 * it after the affinity check.
+				 */
+				barrier();
+
+				if (cpu_affinity_invalid(regs))
+					force_sig(SIGKILL);
 			}
 
 			if (thread_flags & _TIF_FOREIGN_FPSTATE)
-- 
GitLab


From da30e6688dd64fabc3746e00e4a9b6f926efd5ca Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:03:35 +0800
Subject: [PATCH 2723/3804] irqchip/exynos-combiner: Remove unnecessary oom
 message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609140335.14425-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/exynos-combiner.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/exynos-combiner.c b/drivers/irqchip/exynos-combiner.c
index 0b85d9a3fbff8..256ed73ddfef4 100644
--- a/drivers/irqchip/exynos-combiner.c
+++ b/drivers/irqchip/exynos-combiner.c
@@ -179,10 +179,8 @@ static void __init combiner_init(void __iomem *combiner_base,
 	nr_irq = max_nr * IRQ_IN_COMBINER;
 
 	combiner_data = kcalloc(max_nr, sizeof (*combiner_data), GFP_KERNEL);
-	if (!combiner_data) {
-		pr_warn("%s: could not allocate combiner data\n", __func__);
+	if (!combiner_data)
 		return;
-	}
 
 	combiner_irq_domain = irq_domain_add_linear(np, nr_irq,
 				&combiner_irq_domain_ops, combiner_data);
-- 
GitLab


From 98ae089e1e6e5bab6f8c89412da5fc447e3580cb Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:05:34 +0800
Subject: [PATCH 2724/3804] irqchip/gic-v2m: Remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609140534.14478-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-gic-v2m.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 4116b48e60aff..be9ea6fd6f8b2 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -323,10 +323,8 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
 	struct v2m_data *v2m;
 
 	v2m = kzalloc(sizeof(struct v2m_data), GFP_KERNEL);
-	if (!v2m) {
-		pr_err("Failed to allocate struct v2m_data.\n");
+	if (!v2m)
 		return -ENOMEM;
-	}
 
 	INIT_LIST_HEAD(&v2m->entry);
 	v2m->fwnode = fwnode;
-- 
GitLab


From 944a1a17d399b33410af6dfcf2b5a0f74b42b3d0 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:06:42 +0800
Subject: [PATCH 2725/3804] irqchip/gic-v3-its: Remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609140643.14531-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-gic-v3-its.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 2e6923c2c8a8c..ba39668c3e085 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -4895,10 +4895,8 @@ static int its_init_vpe_domain(void)
 	entries = roundup_pow_of_two(nr_cpu_ids);
 	vpe_proxy.vpes = kcalloc(entries, sizeof(*vpe_proxy.vpes),
 				 GFP_KERNEL);
-	if (!vpe_proxy.vpes) {
-		pr_err("ITS: Can't allocate GICv4 proxy device array\n");
+	if (!vpe_proxy.vpes)
 		return -ENOMEM;
-	}
 
 	/* Use the last possible DevID */
 	devid = GENMASK(device_ids(its) - 1, 0);
@@ -5314,10 +5312,8 @@ static void __init acpi_table_parse_srat_its(void)
 
 	its_srat_maps = kmalloc_array(count, sizeof(struct its_srat_map),
 				      GFP_KERNEL);
-	if (!its_srat_maps) {
-		pr_warn("SRAT: Failed to allocate memory for its_srat_maps!\n");
+	if (!its_srat_maps)
 		return;
-	}
 
 	acpi_table_parse_entries(ACPI_SIG_SRAT,
 			sizeof(struct acpi_table_srat),
-- 
GitLab


From e3f389ed3a421f45b46e774b543648ebcab9020a Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:08:28 +0800
Subject: [PATCH 2726/3804] irqchip/imgpdc: Remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609140828.14584-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-imgpdc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-imgpdc.c b/drivers/irqchip/irq-imgpdc.c
index 698d07f48fed8..04efa8e79863f 100644
--- a/drivers/irqchip/irq-imgpdc.c
+++ b/drivers/irqchip/irq-imgpdc.c
@@ -316,10 +316,8 @@ static int pdc_intc_probe(struct platform_device *pdev)
 
 	/* Allocate driver data */
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		dev_err(&pdev->dev, "cannot allocate device data\n");
+	if (!priv)
 		return -ENOMEM;
-	}
 	raw_spin_lock_init(&priv->lock);
 	platform_set_drvdata(pdev, priv);
 
@@ -356,10 +354,8 @@ static int pdc_intc_probe(struct platform_device *pdev)
 	/* Get peripheral IRQ numbers */
 	priv->perip_irqs = devm_kcalloc(&pdev->dev, 4, priv->nr_perips,
 					GFP_KERNEL);
-	if (!priv->perip_irqs) {
-		dev_err(&pdev->dev, "cannot allocate perip IRQ list\n");
+	if (!priv->perip_irqs)
 		return -ENOMEM;
-	}
 	for (i = 0; i < priv->nr_perips; ++i) {
 		irq = platform_get_irq(pdev, 1 + i);
 		if (irq < 0)
-- 
GitLab


From 76fc40ec22b9947351f6f9d37a86d47e72af4e50 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:11:50 +0800
Subject: [PATCH 2727/3804] irqchip/irq-imx-gpcv2: Remove unnecessary oom
 message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609141150.14637-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-imx-gpcv2.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-imx-gpcv2.c b/drivers/irqchip/irq-imx-gpcv2.c
index 7031ef44de4f3..5b5a365dbd5e4 100644
--- a/drivers/irqchip/irq-imx-gpcv2.c
+++ b/drivers/irqchip/irq-imx-gpcv2.c
@@ -228,10 +228,8 @@ static int __init imx_gpcv2_irqchip_init(struct device_node *node,
 	}
 
 	cd = kzalloc(sizeof(struct gpcv2_irqchip_data), GFP_KERNEL);
-	if (!cd) {
-		pr_err("%pOF: kzalloc failed!\n", node);
+	if (!cd)
 		return -ENOMEM;
-	}
 
 	raw_spin_lock_init(&cd->rlock);
 
-- 
GitLab


From 75768e391f8947ea8b2e7997af68dbd68814f00c Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Wed, 9 Jun 2021 22:14:28 +0800
Subject: [PATCH 2728/3804] irqchip/sun4i: Remove unnecessary oom message

Fixes scripts/checkpatch.pl warning:
WARNING: Possible unnecessary 'out of memory' message

Remove it can help us save a bit of memory.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609141428.14737-1-thunder.leizhen@huawei.com
---
 drivers/irqchip/irq-sun4i.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c
index 9ea94456b178c..8a315d6a3399d 100644
--- a/drivers/irqchip/irq-sun4i.c
+++ b/drivers/irqchip/irq-sun4i.c
@@ -147,10 +147,8 @@ static int __init sun4i_ic_of_init(struct device_node *node,
 				   struct device_node *parent)
 {
 	irq_ic_data = kzalloc(sizeof(struct sun4i_irq_chip_data), GFP_KERNEL);
-	if (!irq_ic_data) {
-		pr_err("kzalloc failed!\n");
+	if (!irq_ic_data)
 		return -ENOMEM;
-	}
 
 	irq_ic_data->enable_reg_offset = SUN4I_IRQ_ENABLE_REG_OFFSET;
 	irq_ic_data->mask_reg_offset = SUN4I_IRQ_MASK_REG_OFFSET;
@@ -164,10 +162,8 @@ static int __init suniv_ic_of_init(struct device_node *node,
 				   struct device_node *parent)
 {
 	irq_ic_data = kzalloc(sizeof(struct sun4i_irq_chip_data), GFP_KERNEL);
-	if (!irq_ic_data) {
-		pr_err("kzalloc failed!\n");
+	if (!irq_ic_data)
 		return -ENOMEM;
-	}
 
 	irq_ic_data->enable_reg_offset = SUNIV_IRQ_ENABLE_REG_OFFSET;
 	irq_ic_data->mask_reg_offset = SUNIV_IRQ_MASK_REG_OFFSET;
-- 
GitLab


From 21a496179c6e3a9fc03d1296b36afd14046db88f Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Fri, 9 Apr 2021 11:04:34 +0800
Subject: [PATCH 2729/3804] irqchip: gic-pm: Remove redundant error log of
 clock bulk

There is error log in clk_bulk_prepare/enable()

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1617937474-24630-1-git-send-email-chunfeng.yun@mediatek.com
---
 drivers/irqchip/irq-gic-pm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c
index 1337ceceb59b9..b60e1853593f4 100644
--- a/drivers/irqchip/irq-gic-pm.c
+++ b/drivers/irqchip/irq-gic-pm.c
@@ -30,10 +30,8 @@ static int gic_runtime_resume(struct device *dev)
 	int ret;
 
 	ret = clk_bulk_prepare_enable(data->num_clocks, chip_pm->clks);
-	if (ret) {
-		dev_err(dev, "clk_enable failed: %d\n", ret);
+	if (ret)
 		return ret;
-	}
 
 	/*
 	 * On the very first resume, the pointer to chip_pm->chip_data
-- 
GitLab


From 4e08a559a18c1b6424e56859c74adb4b29c17318 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Wed, 9 Jun 2021 16:51:08 +0100
Subject: [PATCH 2730/3804] dt-bindings: interrupt-controller: arm,gic-v3:
 Describe GICv3 optional properties

Describe the optional GICv3 properties:
- clocks
- clock-names
- power-domains
- resets

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210609155108.16590-1-prabhakar.mahadev-lad.rj@bp.renesas.com
---
 .../bindings/interrupt-controller/arm,gic-v3.yaml   | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
index 1ecd1831cf02b..c84f9fe7f2547 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.yaml
@@ -145,6 +145,19 @@ properties:
         required:
           - affinity
 
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: aclk
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
 dependencies:
   mbi-ranges: [ msi-controller ]
   msi-controller: [ mbi-ranges ]
-- 
GitLab


From c67913492fec317bc53ffdff496b6ba856d2868c Mon Sep 17 00:00:00 2001
From: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
Date: Thu, 10 Jun 2021 10:15:52 -0700
Subject: [PATCH 2731/3804] ima: Fix warning: no previous prototype for
 function 'ima_add_kexec_buffer'

The function prototype for ima_add_kexec_buffer() is present
in 'linux/ima.h'.  But this header file is not included in
ima_kexec.c where the function is implemented.  This results
in the following compiler warning when "-Wmissing-prototypes" flag
is turned on:

  security/integrity/ima/ima_kexec.c:81:6: warning: no previous prototype
  for function 'ima_add_kexec_buffer' [-Wmissing-prototypes]

Include the header file 'linux/ima.h' in ima_kexec.c to fix
the compiler warning.

Fixes: dce92f6b11c3 (arm64: Enable passing IMA log to next kernel on kexec)
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/ima_kexec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c
index 667887665823f..f799cc278a9a8 100644
--- a/security/integrity/ima/ima_kexec.c
+++ b/security/integrity/ima/ima_kexec.c
@@ -11,6 +11,7 @@
 #include <linux/vmalloc.h>
 #include <linux/kexec.h>
 #include <linux/of.h>
+#include <linux/ima.h>
 #include "ima.h"
 
 #ifdef CONFIG_IMA_KEXEC
-- 
GitLab


From 934002cd660b035b926438244b4294e647507e13 Mon Sep 17 00:00:00 2001
From: Alper Gun <alpergun@google.com>
Date: Thu, 10 Jun 2021 17:46:04 +0000
Subject: [PATCH 2732/3804] KVM: SVM: Call SEV Guest Decommission if ASID
 binding fails

Send SEV_CMD_DECOMMISSION command to PSP firmware if ASID binding
fails. If a failure happens after  a successful LAUNCH_START command,
a decommission command should be executed. Otherwise, guest context
will be unfreed inside the AMD SP. After the firmware will not have
memory to allocate more SEV guest context, LAUNCH_START command will
begin to fail with SEV_RET_RESOURCE_LIMIT error.

The existing code calls decommission inside sev_unbind_asid, but it is
not called if a failure happens before guest activation succeeds. If
sev_bind_asid fails, decommission is never called. PSP firmware has a
limit for the number of guests. If sev_asid_binding fails many times,
PSP firmware will not have resources to create another guest context.

Cc: stable@vger.kernel.org
Fixes: 59414c989220 ("KVM: SVM: Add support for KVM_SEV_LAUNCH_START command")
Reported-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Alper Gun <alpergun@google.com>
Reviewed-by: Marc Orr <marcorr@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210610174604.2554090-1-alpergun@google.com>
---
 arch/x86/kvm/svm/sev.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e0ce5da97fc2f..8d36f0c730718 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev)
 	sev->misc_cg = NULL;
 }
 
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
 {
 	struct sev_data_decommission decommission;
+
+	if (!handle)
+		return;
+
+	decommission.handle = handle;
+	sev_guest_decommission(&decommission, NULL);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
 	struct sev_data_deactivate deactivate;
 
 	if (!handle)
@@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
 	sev_guest_deactivate(&deactivate, NULL);
 	up_read(&sev_deactivate_lock);
 
-	/* decommission handle */
-	decommission.handle = handle;
-	sev_guest_decommission(&decommission, NULL);
+	sev_decommission(handle);
 }
 
 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
 
 	/* Bind ASID to this guest */
 	ret = sev_bind_asid(kvm, start.handle, error);
-	if (ret)
+	if (ret) {
+		sev_decommission(start.handle);
 		goto e_free_session;
+	}
 
 	/* return handle to userspace */
 	params.handle = start.handle;
-- 
GitLab


From dfdc0a714d241bfbf951886c373cd1ae463fcc25 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Thu, 10 Jun 2021 21:59:33 -0700
Subject: [PATCH 2733/3804] KVM: X86: Fix x86_emulator slab cache leak

Commit c9b8b07cded58 (KVM: x86: Dynamically allocate per-vCPU emulation context)
tries to allocate per-vCPU emulation context dynamically, however, the
x86_emulator slab cache is still exiting after the kvm module is unload
as below after destroying the VM and unloading the kvm module.

grep x86_emulator /proc/slabinfo
x86_emulator          36     36   2672   12    8 : tunables    0    0    0 : slabdata      3      3      0

This patch fixes this slab cache leak by destroying the x86_emulator slab cache
when the kvm module is unloaded.

Fixes: c9b8b07cded58 (KVM: x86: Dynamically allocate per-vCPU emulation context)
Cc: stable@vger.kernel.org
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1623387573-5969-1-git-send-email-wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 54d212fe9b156..6d425310054b4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8261,6 +8261,7 @@ void kvm_arch_exit(void)
 	kvm_x86_ops.hardware_enable = NULL;
 	kvm_mmu_module_exit();
 	free_percpu(user_return_msrs);
+	kmem_cache_destroy(x86_emulator_cache);
 	kmem_cache_destroy(x86_fpu_cache);
 #ifdef CONFIG_KVM_XEN
 	static_key_deferred_flush(&kvm_xen_enabled);
-- 
GitLab


From 197eecb6ecae0b04bd694432f640ff75597fed9c Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Sat, 5 Jun 2021 13:29:57 +0800
Subject: [PATCH 2734/3804] perf session: Correct buffer copying when peeking
 events

When peeking an event, it has a short path and a long path.  The short
path uses the session pointer "one_mmap_addr" to directly fetch the
event; and the long path needs to read out the event header and the
following event data from file and fill into the buffer pointer passed
through the argument "buf".

The issue is in the long path that it copies the event header and event
data into the same destination address which pointer "buf", this means
the event header is overwritten.  We are just lucky to run into the
short path in most cases, so we don't hit the issue in the long path.

This patch adds the offset "hdr_sz" to the pointer "buf" when copying
the event data, so that it can reserve the event header which can be
used properly by its caller.

Fixes: 5a52f33adf02 ("perf session: Add perf_session__peek_event()")
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20210605052957.1070720-1-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 106b3d60881a5..e59242c361ce5 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1723,6 +1723,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
 	if (event->header.size < hdr_sz || event->header.size > buf_sz)
 		return -1;
 
+	buf += hdr_sz;
 	rest = event->header.size - hdr_sz;
 
 	if (readn(fd, buf, rest) != (ssize_t)rest)
-- 
GitLab


From 36524112aba3246d1240c1791c72b26fa54008a3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 8 Jun 2021 13:46:18 -0300
Subject: [PATCH 2735/3804] tools headers cpufeatures: Sync with the kernel
 sources

To pick the changes in:

  fb35d30fe5b06cc2 ("x86/cpufeatures: Assign dedicated feature word for CPUID_0x8000001F[EAX]")
  e7b6385b01d8e9fb ("x86/cpufeatures: Add Intel SGX hardware bits")
  1478b99a76534b6c ("x86/cpufeatures: Mark ENQCMD as disabled when configured out")

That don't cause any change in the tools, just silences this perf build
warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/disabled-features.h' differs from latest version at 'arch/x86/include/asm/disabled-features.h'
  diff -u tools/arch/x86/include/asm/disabled-features.h arch/x86/include/asm/disabled-features.h

Cc: Borislav Petkov <bp@suse.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/disabled-features.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index b7dd944dc8673..8f28fafa98b32 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,11 +56,8 @@
 # define DISABLE_PTI		(1 << (X86_FEATURE_PTI & 31))
 #endif
 
-#ifdef CONFIG_IOMMU_SUPPORT
-# define DISABLE_ENQCMD	0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
+/* Force disable because it's broken beyond repair */
+#define DISABLE_ENQCMD		(1 << (X86_FEATURE_ENQCMD & 31))
 
 #ifdef CONFIG_X86_SGX
 # define DISABLE_SGX	0
-- 
GitLab


From 654430efde27248be563df9a88631204b5fe2df2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Jun 2021 15:00:26 -0700
Subject: [PATCH 2736/3804] KVM: x86/mmu: Calculate and check "full" mmu_role
 for nested MMU

Calculate and check the full mmu_role when initializing the MMU context
for the nested MMU, where "full" means the bits and pieces of the role
that aren't handled by kvm_calc_mmu_role_common().  While the nested MMU
isn't used for shadow paging, things like the number of levels in the
guest's page tables are surprisingly important when walking the guest
page tables.  Failure to reinitialize the nested MMU context if L2's
paging mode changes can result in unexpected and/or missed page faults,
and likely other explosions.

E.g. if an L1 vCPU is running both a 32-bit PAE L2 and a 64-bit L2, the
"common" role calculation will yield the same role for both L2s.  If the
64-bit L2 is run after the 32-bit PAE L2, L0 will fail to reinitialize
the nested MMU context, ultimately resulting in a bad walk of L2's page
tables as the MMU will still have a guest root_level of PT32E_ROOT_LEVEL.

  WARNING: CPU: 4 PID: 167334 at arch/x86/kvm/vmx/vmx.c:3075 ept_save_pdptrs+0x15/0xe0 [kvm_intel]
  Modules linked in: kvm_intel]
  CPU: 4 PID: 167334 Comm: CPU 3/KVM Not tainted 5.13.0-rc1-d849817d5673-reqs #185
  Hardware name: ASUS Q87M-E/Q87M-E, BIOS 1102 03/03/2014
  RIP: 0010:ept_save_pdptrs+0x15/0xe0 [kvm_intel]
  Code: <0f> 0b c3 f6 87 d8 02 00f
  RSP: 0018:ffffbba702dbba00 EFLAGS: 00010202
  RAX: 0000000000000011 RBX: 0000000000000002 RCX: ffffffff810a2c08
  RDX: ffff91d7bc30acc0 RSI: 0000000000000011 RDI: ffff91d7bc30a600
  RBP: ffff91d7bc30a600 R08: 0000000000000010 R09: 0000000000000007
  R10: 0000000000000000 R11: 0000000000000000 R12: ffff91d7bc30a600
  R13: ffff91d7bc30acc0 R14: ffff91d67c123460 R15: 0000000115d7e005
  FS:  00007fe8e9ffb700(0000) GS:ffff91d90fb00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 000000029f15a001 CR4: 00000000001726e0
  Call Trace:
   kvm_pdptr_read+0x3a/0x40 [kvm]
   paging64_walk_addr_generic+0x327/0x6a0 [kvm]
   paging64_gva_to_gpa_nested+0x3f/0xb0 [kvm]
   kvm_fetch_guest_virt+0x4c/0xb0 [kvm]
   __do_insn_fetch_bytes+0x11a/0x1f0 [kvm]
   x86_decode_insn+0x787/0x1490 [kvm]
   x86_decode_emulated_instruction+0x58/0x1e0 [kvm]
   x86_emulate_instruction+0x122/0x4f0 [kvm]
   vmx_handle_exit+0x120/0x660 [kvm_intel]
   kvm_arch_vcpu_ioctl_run+0xe25/0x1cb0 [kvm]
   kvm_vcpu_ioctl+0x211/0x5a0 [kvm]
   __x64_sys_ioctl+0x83/0xb0
   do_syscall_64+0x40/0xb0
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: stable@vger.kernel.org
Fixes: bf627a928837 ("x86/kvm/mmu: check if MMU reconfiguration is needed in init_kvm_nested_mmu()")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210610220026.1364486-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0144c40d09c76..8d5876dfc6b71 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 	context->inject_page_fault = kvm_inject_page_fault;
 }
 
+static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
+{
+	union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false);
+
+	/*
+	 * Nested MMUs are used only for walking L2's gva->gpa, they never have
+	 * shadow pages of their own and so "direct" has no meaning.   Set it
+	 * to "true" to try to detect bogus usage of the nested MMU.
+	 */
+	role.base.direct = true;
+
+	if (!is_paging(vcpu))
+		role.base.level = 0;
+	else if (is_long_mode(vcpu))
+		role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL :
+						       PT64_ROOT_4LEVEL;
+	else if (is_pae(vcpu))
+		role.base.level = PT32E_ROOT_LEVEL;
+	else
+		role.base.level = PT32_ROOT_LEVEL;
+
+	return role;
+}
+
 static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 {
-	union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
+	union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu);
 	struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
 
 	if (new_role.as_u64 == g_context->mmu_role.as_u64)
-- 
GitLab


From f53cbdab011b200c67c7e5f476046828014501eb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:15:10 +0200
Subject: [PATCH 2737/3804] cpuidle: teo: Cosmetic modifications of
 teo_update()

Rename a local variable in teo_update() so that its purpose is better
reflected by its name and use one more local variable in the loop
over the CPU idle states in that function to make the code somewhat
easier to read.

No functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index ac4bb27d69b0e..e97ae84fa5a85 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -117,7 +117,7 @@ static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
 static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
-	int i, idx_hit = 0, idx_timer = 0;
+	int i, idx_timer = 0, idx_duration = 0;
 	unsigned int hits, misses;
 	u64 measured_ns;
 
@@ -156,14 +156,15 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * states matching the sleep length and the measured idle duration.
 	 */
 	for (i = 0; i < drv->state_count; i++) {
+		s64 target_residency_ns = drv->states[i].target_residency_ns;
 		unsigned int early_hits = cpu_data->states[i].early_hits;
 
 		cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
 
-		if (drv->states[i].target_residency_ns <= cpu_data->sleep_length_ns) {
+		if (target_residency_ns <= cpu_data->sleep_length_ns) {
 			idx_timer = i;
-			if (drv->states[i].target_residency_ns <= measured_ns)
-				idx_hit = i;
+			if (target_residency_ns <= measured_ns)
+				idx_duration = i;
 		}
 	}
 
@@ -181,11 +182,11 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	misses = cpu_data->states[idx_timer].misses;
 	misses -= misses >> DECAY_SHIFT;
 
-	if (idx_timer == idx_hit) {
+	if (idx_timer == idx_duration) {
 		hits += PULSE;
 	} else {
 		misses += PULSE;
-		cpu_data->states[idx_hit].early_hits += PULSE;
+		cpu_data->states[idx_duration].early_hits += PULSE;
 	}
 
 	cpu_data->states[idx_timer].misses = misses;
-- 
GitLab


From b18e0de1cf85eed6e9ced086d6323e867d4b57aa Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:15:52 +0200
Subject: [PATCH 2738/3804] cpuidle: teo: Cosmetic modification of teo_select()

Initialize local variables in teo_select() where they are declared.

No functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index e97ae84fa5a85..173ab30b9a06b 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -241,10 +241,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
-	int max_early_idx, prev_max_early_idx, constraint_idx, idx0, idx, i;
-	unsigned int hits, misses, early_hits;
+	int constraint_idx = drv->state_count;
+	unsigned int hits = 0, misses = 0;
+	unsigned int early_hits = 0;
+	int prev_max_early_idx = -1;
+	int max_early_idx = -1;
+	int idx0 = -1, idx = -1;
 	ktime_t delta_tick;
 	s64 duration_ns;
+	int i;
 
 	if (dev->last_state_idx >= 0) {
 		teo_update(drv, dev);
@@ -256,15 +261,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
 	cpu_data->sleep_length_ns = duration_ns;
 
-	hits = 0;
-	misses = 0;
-	early_hits = 0;
-	max_early_idx = -1;
-	prev_max_early_idx = -1;
-	constraint_idx = drv->state_count;
-	idx = -1;
-	idx0 = idx;
-
 	for (i = 0; i < drv->state_count; i++) {
 		struct cpuidle_state *s = &drv->states[i];
 
-- 
GitLab


From c410a9a142f152006c21a858d734a9f868bc90a6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:16:32 +0200
Subject: [PATCH 2739/3804] cpuidle: teo: Change the main idle state selection
 logic

Two aspects of the current main idle state selection logic in the
TEO (Timer Events Oriented) cpuidle governor are quite questionable.

First of all, the "hits" and "misses" metrics used by it are only
updated for a given idle state if the time till the next timer event
("sleep length") is between the target residency of that state and
the target residency of the next one.  Consequently, they are likely
to become stale if the sleep length tends to fall outside that
interval which increases the likelihood of subomtimal idle state
selection.

Second, the decision on whether or not to select the idle state
"matching" the sleep length is based on the metrics collected for
that state alone, whereas in principle the metrics collected for
the other idle states should be taken into consideration when that
decision is made.  For example, if the measured idle duration is less
than the target residency of the idle state "matching" the sleep
length, then it is also less than the target residency of any deeper
idle state and that should be taken into account when considering
whether or not to select any of those states, but currently it is
not.

In order to address the above shortcomings, modify the main idle
state selection logic in the TEO governor to take the metrics
collected for all of the idle states into account when deciding
whether or not to select the one "matching" the sleep length.

Moreover, drop the "misses" metric that becomes redundant after the
above change and rename the "early_hits" metric to "intercepts" so
that its role is better reflected by its name (the idea being that
if a CPU wakes up earlier than indicated by the sleep length, then
it must be a result of a non-timer interrupt that "intercepts" the
CPU).

Also rename the states[] array in struct struct teo_cpu to
state_bins[] to avoid confusing it with the states[] array in
struct cpuidle_driver and update the documentation to match the
new code (and make it more comprehensive while at it).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 368 +++++++++++++++++---------------
 1 file changed, 200 insertions(+), 168 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 173ab30b9a06b..5bcd45f1d610a 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -2,47 +2,90 @@
 /*
  * Timer events oriented CPU idle governor
  *
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2021 Intel Corporation
  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  *
  * The idea of this governor is based on the observation that on many systems
  * timer events are two or more orders of magnitude more frequent than any
- * other interrupts, so they are likely to be the most significant source of CPU
+ * other interrupts, so they are likely to be the most significant cause of CPU
  * wakeups from idle states.  Moreover, information about what happened in the
  * (relatively recent) past can be used to estimate whether or not the deepest
- * idle state with target residency within the time to the closest timer is
- * likely to be suitable for the upcoming idle time of the CPU and, if not, then
- * which of the shallower idle states to choose.
+ * idle state with target residency within the (known) time till the closest
+ * timer event, referred to as the sleep length, is likely to be suitable for
+ * the upcoming CPU idle period and, if not, then which of the shallower idle
+ * states to choose instead of it.
  *
- * Of course, non-timer wakeup sources are more important in some use cases and
- * they can be covered by taking a few most recent idle time intervals of the
- * CPU into account.  However, even in that case it is not necessary to consider
- * idle duration values greater than the time till the closest timer, as the
- * patterns that they may belong to produce average values close enough to
- * the time till the closest timer (sleep length) anyway.
+ * Of course, non-timer wakeup sources are more important in some use cases
+ * which can be covered by taking a few most recent idle time intervals of the
+ * CPU into account.  However, even in that context it is not necessary to
+ * consider idle duration values greater than the sleep length, because the
+ * closest timer will ultimately wake up the CPU anyway unless it is woken up
+ * earlier.
  *
- * Thus this governor estimates whether or not the upcoming idle time of the CPU
- * is likely to be significantly shorter than the sleep length and selects an
- * idle state for it in accordance with that, as follows:
+ * Thus this governor estimates whether or not the prospective idle duration of
+ * a CPU is likely to be significantly shorter than the sleep length and selects
+ * an idle state for it accordingly.
  *
- * - Find an idle state on the basis of the sleep length and state statistics
- *   collected over time:
+ * The computations carried out by this governor are based on using bins whose
+ * boundaries are aligned with the target residency parameter values of the CPU
+ * idle states provided by the cpuidle driver in the ascending order.  That is,
+ * the first bin spans from 0 up to, but not including, the target residency of
+ * the second idle state (idle state 1), the second bin spans from the target
+ * residency of idle state 1 up to, but not including, the target residency of
+ * idle state 2, the third bin spans from the target residency of idle state 2
+ * up to, but not including, the target residency of idle state 3 and so on.
+ * The last bin spans from the target residency of the deepest idle state
+ * supplied by the driver to infinity.
  *
- *   o Find the deepest idle state whose target residency is less than or equal
- *     to the sleep length.
+ * Two metrics called "hits" and "intercepts" are associated with each bin.
+ * They are updated every time before selecting an idle state for the given CPU
+ * in accordance with what happened last time.
  *
- *   o Select it if it matched both the sleep length and the observed idle
- *     duration in the past more often than it matched the sleep length alone
- *     (i.e. the observed idle duration was significantly shorter than the sleep
- *     length matched by it).
+ * The "hits" metric reflects the relative frequency of situations in which the
+ * sleep length and the idle duration measured after CPU wakeup fall into the
+ * same bin (that is, the CPU appears to wake up "on time" relative to the sleep
+ * length).  In turn, the "intercepts" metric reflects the relative frequency of
+ * situations in which the measured idle duration is so much shorter than the
+ * sleep length that the bin it falls into corresponds to an idle state
+ * shallower than the one whose bin is fallen into by the sleep length.
  *
- *   o Otherwise, select the shallower state with the greatest matched "early"
- *     wakeups metric.
+ * In order to select an idle state for a CPU, the governor takes the following
+ * steps (modulo the possible latency constraint that must be taken into account
+ * too):
  *
- * - If the majority of the most recent idle duration values are below the
- *   target residency of the idle state selected so far, use those values to
- *   compute the new expected idle duration and find an idle state matching it
- *   (which has to be shallower than the one selected so far).
+ * 1. Find the deepest CPU idle state whose target residency does not exceed
+ *    the current sleep length (the candidate idle state) and compute two sums
+ *    as follows:
+ *
+ *    - The sum of the "hits" and "intercepts" metrics for the candidate state
+ *      and all of the deeper idle states (it represents the cases in which the
+ *      CPU was idle long enough to avoid being intercepted if the sleep length
+ *      had been equal to the current one).
+ *
+ *    - The sum of the "intercepts" metrics for all of the idle states shallower
+ *      than the candidate one (it represents the cases in which the CPU was not
+ *      idle long enough to avoid being intercepted if the sleep length had been
+ *      equal to the current one).
+ *
+ * 2. If the second sum is greater than the first one, look for an alternative
+ *    idle state to select.
+ *
+ *    - Traverse the idle states shallower than the candidate one in the
+ *      descending order.
+ *
+ *    - For each of them compute the sum of the "intercepts" metrics over all of
+ *      the idle  states between it and the candidate one (including the former
+ *      and excluding the latter).
+ *
+ *    - If that sum is greater than a half of the second sum computed in step 1
+ *      (which means that the target residency of the state in question had not
+ *      exceeded the idle duration in over a half of the relevant cases), select
+ *      the given idle state instead of the candidate one.
+ *
+ * 3. If the majority of the most recent idle duration values are below the
+ *    current anticipated idle duration, use those values to compute the new
+ *    expected idle duration and find an idle state matching it (which has to
+ *    be shallower than the current candidate one).
  */
 
 #include <linux/cpuidle.h>
@@ -65,44 +108,29 @@
 #define INTERVALS	8
 
 /**
- * struct teo_idle_state - Idle state data used by the TEO cpuidle governor.
- * @early_hits: "Early" CPU wakeups "matching" this state.
- * @hits: "On time" CPU wakeups "matching" this state.
- * @misses: CPU wakeups "missing" this state.
- *
- * A CPU wakeup is "matched" by a given idle state if the idle duration measured
- * after the wakeup is between the target residency of that state and the target
- * residency of the next one (or if this is the deepest available idle state, it
- * "matches" a CPU wakeup when the measured idle duration is at least equal to
- * its target residency).
- *
- * Also, from the TEO governor perspective, a CPU wakeup from idle is "early" if
- * it occurs significantly earlier than the closest expected timer event (that
- * is, early enough to match an idle state shallower than the one matching the
- * time till the closest timer event).  Otherwise, the wakeup is "on time", or
- * it is a "hit".
- *
- * A "miss" occurs when the given state doesn't match the wakeup, but it matches
- * the time till the closest timer event used for idle state selection.
+ * struct teo_bin - Metrics used by the TEO cpuidle governor.
+ * @intercepts: The "intercepts" metric.
+ * @hits: The "hits" metric.
  */
-struct teo_idle_state {
-	unsigned int early_hits;
+struct teo_bin {
+	unsigned int intercepts;
 	unsigned int hits;
-	unsigned int misses;
 };
 
 /**
  * struct teo_cpu - CPU data used by the TEO cpuidle governor.
  * @time_span_ns: Time between idle state selection and post-wakeup update.
  * @sleep_length_ns: Time till the closest timer event (at the selection time).
- * @states: Idle states data corresponding to this CPU.
+ * @state_bins: Idle state data bins for this CPU.
+ * @total: Grand total of the "intercepts" and "hits" mertics for all bins.
  * @interval_idx: Index of the most recent saved idle interval.
  * @intervals: Saved idle duration values.
  */
 struct teo_cpu {
 	s64 time_span_ns;
 	s64 sleep_length_ns;
-	struct teo_idle_state states[CPUIDLE_STATE_MAX];
+	struct teo_bin state_bins[CPUIDLE_STATE_MAX];
+	unsigned int total;
 	int interval_idx;
 	u64 intervals[INTERVALS];
 };
@@ -110,7 +138,7 @@ struct teo_cpu {
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
 
 /**
- * teo_update - Update CPU data after wakeup.
+ * teo_update - Update CPU metrics after wakeup.
  * @drv: cpuidle driver containing state data.
  * @dev: Target CPU.
  */
@@ -118,7 +146,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	int i, idx_timer = 0, idx_duration = 0;
-	unsigned int hits, misses;
 	u64 measured_ns;
 
 	if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
@@ -151,15 +178,21 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 			measured_ns /= 2;
 	}
 
+	cpu_data->total = 0;
+
 	/*
-	 * Decay the "early hits" metric for all of the states and find the
-	 * states matching the sleep length and the measured idle duration.
+	 * Decay the "hits" and "intercepts" metrics for all of the bins and
+	 * find the bins that the sleep length and the measured idle duration
+	 * fall into.
 	 */
 	for (i = 0; i < drv->state_count; i++) {
 		s64 target_residency_ns = drv->states[i].target_residency_ns;
-		unsigned int early_hits = cpu_data->states[i].early_hits;
+		struct teo_bin *bin = &cpu_data->state_bins[i];
+
+		bin->hits -= bin->hits >> DECAY_SHIFT;
+		bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
 
-		cpu_data->states[i].early_hits -= early_hits >> DECAY_SHIFT;
+		cpu_data->total += bin->hits + bin->intercepts;
 
 		if (target_residency_ns <= cpu_data->sleep_length_ns) {
 			idx_timer = i;
@@ -169,28 +202,17 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	}
 
 	/*
-	 * Update the "hits" and "misses" data for the state matching the sleep
-	 * length.  If it matches the measured idle duration too, this is a hit,
-	 * so increase the "hits" metric for it then.  Otherwise, this is a
-	 * miss, so increase the "misses" metric for it.  In the latter case
-	 * also increase the "early hits" metric for the state that actually
-	 * matches the measured idle duration.
+	 * If the measured idle duration falls into the same bin as the sleep
+	 * length, this is a "hit", so update the "hits" metric for that bin.
+	 * Otherwise, update the "intercepts" metric for the bin fallen into by
+	 * the measured idle duration.
 	 */
-	hits = cpu_data->states[idx_timer].hits;
-	hits -= hits >> DECAY_SHIFT;
-
-	misses = cpu_data->states[idx_timer].misses;
-	misses -= misses >> DECAY_SHIFT;
-
-	if (idx_timer == idx_duration) {
-		hits += PULSE;
-	} else {
-		misses += PULSE;
-		cpu_data->states[idx_duration].early_hits += PULSE;
-	}
+	if (idx_timer == idx_duration)
+		cpu_data->state_bins[idx_timer].hits += PULSE;
+	else
+		cpu_data->state_bins[idx_duration].intercepts += PULSE;
 
-	cpu_data->states[idx_timer].misses = misses;
-	cpu_data->states[idx_timer].hits = hits;
+	cpu_data->total += PULSE;
 
 	/*
 	 * Save idle duration values corresponding to non-timer wakeups for
@@ -206,6 +228,12 @@ static bool teo_time_ok(u64 interval_ns)
 	return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
 }
 
+static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
+{
+	return (drv->states[idx].target_residency_ns +
+		drv->states[idx+1].target_residency_ns) / 2;
+}
+
 /**
  * teo_find_shallower_state - Find shallower idle state matching given duration.
  * @drv: cpuidle driver containing state data.
@@ -241,12 +269,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
-	int constraint_idx = drv->state_count;
-	unsigned int hits = 0, misses = 0;
-	unsigned int early_hits = 0;
-	int prev_max_early_idx = -1;
-	int max_early_idx = -1;
-	int idx0 = -1, idx = -1;
+	unsigned int idx_intercept_sum = 0;
+	unsigned int intercept_sum = 0;
+	unsigned int idx_hit_sum = 0;
+	unsigned int hit_sum = 0;
+	int constraint_idx = 0;
+	int idx0 = 0, idx = -1;
 	ktime_t delta_tick;
 	s64 duration_ns;
 	int i;
@@ -261,119 +289,122 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	duration_ns = tick_nohz_get_sleep_length(&delta_tick);
 	cpu_data->sleep_length_ns = duration_ns;
 
-	for (i = 0; i < drv->state_count; i++) {
-		struct cpuidle_state *s = &drv->states[i];
-
-		if (dev->states_usage[i].disable) {
-			/*
-			 * Ignore disabled states with target residencies beyond
-			 * the anticipated idle duration.
-			 */
-			if (s->target_residency_ns > duration_ns)
-				continue;
-
-			/*
-			 * This state is disabled, so the range of idle duration
-			 * values corresponding to it is covered by the current
-			 * candidate state, but still the "hits" and "misses"
-			 * metrics of the disabled state need to be used to
-			 * decide whether or not the state covering the range in
-			 * question is good enough.
-			 */
-			hits = cpu_data->states[i].hits;
-			misses = cpu_data->states[i].misses;
-
-			if (early_hits >= cpu_data->states[i].early_hits ||
-			    idx < 0)
-				continue;
+	/* Check if there is any choice in the first place. */
+	if (drv->state_count < 2) {
+		idx = 0;;
+		goto end;
+	}
+	if (!dev->states_usage[0].disable) {
+		idx = 0;
+		if (drv->states[1].target_residency_ns > duration_ns)
+			goto end;
+	}
 
-			/*
-			 * If the current candidate state has been the one with
-			 * the maximum "early hits" metric so far, the "early
-			 * hits" metric of the disabled state replaces the
-			 * current "early hits" count to avoid selecting a
-			 * deeper state with lower "early hits" metric.
-			 */
-			if (max_early_idx == idx) {
-				early_hits = cpu_data->states[i].early_hits;
-				continue;
-			}
+	/*
+	 * Find the deepest idle state whose target residency does not exceed
+	 * the current sleep length and the deepest idle state not deeper than
+	 * the former whose exit latency does not exceed the current latency
+	 * constraint.  Compute the sums of metrics for early wakeup pattern
+	 * detection.
+	 */
+	for (i = 1; i < drv->state_count; i++) {
+		struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
+		struct cpuidle_state *s = &drv->states[i];
 
-			/*
-			 * The current candidate state is closer to the disabled
-			 * one than the current maximum "early hits" state, so
-			 * replace the latter with it, but in case the maximum
-			 * "early hits" state index has not been set so far,
-			 * check if the current candidate state is not too
-			 * shallow for that role.
-			 */
-			if (teo_time_ok(drv->states[idx].target_residency_ns)) {
-				prev_max_early_idx = max_early_idx;
-				early_hits = cpu_data->states[i].early_hits;
-				max_early_idx = idx;
-			}
+		/*
+		 * Update the sums of idle state mertics for all of the states
+		 * shallower than the current one.
+		 */
+		intercept_sum += prev_bin->intercepts;
+		hit_sum += prev_bin->hits;
 
+		if (dev->states_usage[i].disable)
 			continue;
-		}
 
 		if (idx < 0) {
 			idx = i; /* first enabled state */
-			hits = cpu_data->states[i].hits;
-			misses = cpu_data->states[i].misses;
 			idx0 = i;
 		}
 
 		if (s->target_residency_ns > duration_ns)
 			break;
 
-		if (s->exit_latency_ns > latency_req && constraint_idx > i)
+		idx = i;
+
+		if (s->exit_latency_ns <= latency_req)
 			constraint_idx = i;
 
-		idx = i;
-		hits = cpu_data->states[i].hits;
-		misses = cpu_data->states[i].misses;
-
-		if (early_hits < cpu_data->states[i].early_hits &&
-		    teo_time_ok(drv->states[i].target_residency_ns)) {
-			prev_max_early_idx = max_early_idx;
-			early_hits = cpu_data->states[i].early_hits;
-			max_early_idx = i;
-		}
+		idx_intercept_sum = intercept_sum;
+		idx_hit_sum = hit_sum;
+	}
+
+	/* Avoid unnecessary overhead. */
+	if (idx < 0) {
+		idx = 0; /* No states enabled, must use 0. */
+		goto end;
+	} else if (idx == idx0) {
+		goto end;
 	}
 
 	/*
-	 * If the "hits" metric of the idle state matching the sleep length is
-	 * greater than its "misses" metric, that is the one to use.  Otherwise,
-	 * it is more likely that one of the shallower states will match the
-	 * idle duration observed after wakeup, so take the one with the maximum
-	 * "early hits" metric, but if that cannot be determined, just use the
-	 * state selected so far.
+	 * If the sum of the intercepts metric for all of the idle states
+	 * shallower than the current candidate one (idx) is greater than the
+	 * sum of the intercepts and hits metrics for the candidate state and
+	 * all of the deeper states, the CPU is likely to wake up early, so find
+	 * an alternative idle state to select.
 	 */
-	if (hits <= misses) {
+	if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
+		s64 last_enabled_span_ns = duration_ns;
+		int last_enabled_idx = idx;
+
 		/*
-		 * The current candidate state is not suitable, so take the one
-		 * whose "early hits" metric is the maximum for the range of
-		 * shallower states.
+		 * Look for the deepest idle state whose target residency had
+		 * not exceeded the idle duration in over a half of the relevant
+		 * cases in the past.
+		 *
+		 * Take the possible latency constraint and duration limitation
+		 * present if the tick has been stopped already into account.
 		 */
-		if (idx == max_early_idx)
-			max_early_idx = prev_max_early_idx;
+		intercept_sum = 0;
+
+		for (i = idx - 1; i >= idx0; i--) {
+			s64 span_ns;
 
-		if (max_early_idx >= 0) {
-			idx = max_early_idx;
-			duration_ns = drv->states[idx].target_residency_ns;
+			intercept_sum += cpu_data->state_bins[i].intercepts;
+
+			if (dev->states_usage[i].disable)
+				continue;
+
+			span_ns = teo_middle_of_bin(i, drv);
+			if (!teo_time_ok(span_ns)) {
+				/*
+				 * The current state is too shallow, so select
+				 * the first enabled deeper state.
+				 */
+				duration_ns = last_enabled_span_ns;
+				idx = last_enabled_idx;
+				break;
+			}
+
+			if (2 * intercept_sum > idx_intercept_sum) {
+				idx = i;
+				duration_ns = span_ns;
+				break;
+			}
+
+			last_enabled_span_ns = span_ns;
+			last_enabled_idx = i;
 		}
 	}
 
 	/*
-	 * If there is a latency constraint, it may be necessary to use a
-	 * shallower idle state than the one selected so far.
+	 * If there is a latency constraint, it may be necessary to select an
+	 * idle state shallower than the current candidate one.
 	 */
-	if (constraint_idx < idx)
+	if (idx > constraint_idx)
 		idx = constraint_idx;
 
-	if (idx < 0) {
-		idx = 0; /* No states enabled. Must use 0. */
-	} else if (idx > idx0) {
+	if (idx > idx0) {
 		unsigned int count = 0;
 		u64 sum = 0;
 
@@ -416,6 +447,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		}
 	}
 
+end:
 	/*
 	 * Don't stop the tick if the selected state is a polling one or if the
 	 * expected idle duration is shorter than the tick period length.
-- 
GitLab


From 77577558f25d40b82fba98673cf31ca16ba41d34 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:17:18 +0200
Subject: [PATCH 2740/3804] cpuidle: teo: Rework most recent idle duration
 values treatment

The TEO (Timer Events Oriented) cpuidle governor uses several most
recent idle duration values for a given CPU to refine the idle state
selection in case the previous long-term trends have not been
followed recently and a new trend appears to be forming.  That is
done by computing the average of the most recent idle duration
values falling below the time till the next timer event ("sleep
length"), provided that they are the majority of the most recent
idle duration values taken into account, and using it as the new
expected idle duration value.

However, idle state selection based on that value may not be optimal,
because the average does not really indicate which of the idle states
with target residencies less than or equal to it is likely to be the
best fit.

Thus, instead of computing the average, make the governor carry out
computations based on the distribution of the most recent idle
duration values among the bins corresponding to different idle
states.  Namely, if the majority of the most recent idle duration
values taken into consideration are less than the current sleep
length (which means that the CPU is likely to wake up early), find
the idle state closest to the "candidate" one "matching" the sleep
length whose target residency is less than or equal to the majority
of the most recent idle duration values that have fallen below the
current sleep length (which means that it is likely to be "shallow
enough" this time).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 153 +++++++++++++++-----------------
 1 file changed, 70 insertions(+), 83 deletions(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 5bcd45f1d610a..7c2024f91fd72 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -47,15 +47,20 @@
  * length).  In turn, the "intercepts" metric reflects the relative frequency of
  * situations in which the measured idle duration is so much shorter than the
  * sleep length that the bin it falls into corresponds to an idle state
- * shallower than the one whose bin is fallen into by the sleep length.
+ * shallower than the one whose bin is fallen into by the sleep length (these
+ * situations are referred to as "intercepts" below).
+ *
+ * In addition to the metrics described above, the governor counts recent
+ * intercepts (that is, intercepts that have occurred during the last NR_RECENT
+ * invocations of it for the given CPU) for each bin.
  *
  * In order to select an idle state for a CPU, the governor takes the following
  * steps (modulo the possible latency constraint that must be taken into account
  * too):
  *
  * 1. Find the deepest CPU idle state whose target residency does not exceed
- *    the current sleep length (the candidate idle state) and compute two sums
- *    as follows:
+ *    the current sleep length (the candidate idle state) and compute 3 sums as
+ *    follows:
  *
  *    - The sum of the "hits" and "intercepts" metrics for the candidate state
  *      and all of the deeper idle states (it represents the cases in which the
@@ -67,25 +72,29 @@
  *      idle long enough to avoid being intercepted if the sleep length had been
  *      equal to the current one).
  *
- * 2. If the second sum is greater than the first one, look for an alternative
- *    idle state to select.
+ *    - The sum of the numbers of recent intercepts for all of the idle states
+ *      shallower than the candidate one.
+ *
+ * 2. If the second sum is greater than the first one or the third sum is
+ *    greater than NR_RECENT / 2, the CPU is likely to wake up early, so look
+ *    for an alternative idle state to select.
  *
  *    - Traverse the idle states shallower than the candidate one in the
  *      descending order.
  *
- *    - For each of them compute the sum of the "intercepts" metrics over all of
- *      the idle  states between it and the candidate one (including the former
- *      and excluding the latter).
+ *    - For each of them compute the sum of the "intercepts" metrics and the sum
+ *      of the numbers of recent intercepts over all of the idle states between
+ *      it and the candidate one (including the former and excluding the
+ *      latter).
  *
- *    - If that sum is greater than a half of the second sum computed in step 1
- *      (which means that the target residency of the state in question had not
- *      exceeded the idle duration in over a half of the relevant cases), select
- *      the given idle state instead of the candidate one.
+ *    - If each of these sums that needs to be taken into account (because the
+ *      check related to it has indicated that the CPU is likely to wake up
+ *      early) is greater than a half of the corresponding sum computed in step
+ *      1 (which means that the target residency of the state in question had
+ *      not exceeded the idle duration in over a half of the relevant cases),
+ *      select the given idle state instead of the candidate one.
  *
- * 3. If the majority of the most recent idle duration values are below the
- *    current anticipated idle duration, use those values to compute the new
- *    expected idle duration and find an idle state matching it (which has to
- *    be shallower than the current candidate one).
+ * 3. By default, select the candidate state.
  */
 
 #include <linux/cpuidle.h>
@@ -103,18 +112,20 @@
 
 /*
  * Number of the most recent idle duration values to take into consideration for
- * the detection of wakeup patterns.
+ * the detection of recent early wakeup patterns.
  */
-#define INTERVALS	8
+#define NR_RECENT	9
 
 /**
  * struct teo_bin - Metrics used by the TEO cpuidle governor.
  * @intercepts: The "intercepts" metric.
  * @hits: The "hits" metric.
+ * @recent: The number of recent "intercepts".
  */
 struct teo_bin {
 	unsigned int intercepts;
 	unsigned int hits;
+	unsigned int recent;
 };
 
 /**
@@ -123,16 +134,16 @@ struct teo_bin {
  * @sleep_length_ns: Time till the closest timer event (at the selection time).
  * @state_bins: Idle state data bins for this CPU.
  * @total: Grand total of the "intercepts" and "hits" mertics for all bins.
- * @interval_idx: Index of the most recent saved idle interval.
- * @intervals: Saved idle duration values.
+ * @next_recent_idx: Index of the next @recent_idx entry to update.
+ * @recent_idx: Indices of bins corresponding to recent "intercepts".
  */
 struct teo_cpu {
 	s64 time_span_ns;
 	s64 sleep_length_ns;
 	struct teo_bin state_bins[CPUIDLE_STATE_MAX];
 	unsigned int total;
-	int interval_idx;
-	u64 intervals[INTERVALS];
+	int next_recent_idx;
+	int recent_idx[NR_RECENT];
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -201,26 +212,29 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		}
 	}
 
+	i = cpu_data->next_recent_idx++;
+	if (cpu_data->next_recent_idx >= NR_RECENT)
+		cpu_data->next_recent_idx = 0;
+
+	if (cpu_data->recent_idx[i] >= 0)
+		cpu_data->state_bins[cpu_data->recent_idx[i]].recent--;
+
 	/*
 	 * If the measured idle duration falls into the same bin as the sleep
 	 * length, this is a "hit", so update the "hits" metric for that bin.
 	 * Otherwise, update the "intercepts" metric for the bin fallen into by
 	 * the measured idle duration.
 	 */
-	if (idx_timer == idx_duration)
+	if (idx_timer == idx_duration) {
 		cpu_data->state_bins[idx_timer].hits += PULSE;
-	else
+		cpu_data->recent_idx[i] = -1;
+	} else {
 		cpu_data->state_bins[idx_duration].intercepts += PULSE;
+		cpu_data->state_bins[idx_duration].recent++;
+		cpu_data->recent_idx[i] = idx_duration;
+	}
 
 	cpu_data->total += PULSE;
-
-	/*
-	 * Save idle duration values corresponding to non-timer wakeups for
-	 * pattern detection.
-	 */
-	cpu_data->intervals[cpu_data->interval_idx++] = measured_ns;
-	if (cpu_data->interval_idx >= INTERVALS)
-		cpu_data->interval_idx = 0;
 }
 
 static bool teo_time_ok(u64 interval_ns)
@@ -271,10 +285,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
 	unsigned int idx_intercept_sum = 0;
 	unsigned int intercept_sum = 0;
+	unsigned int idx_recent_sum = 0;
+	unsigned int recent_sum = 0;
 	unsigned int idx_hit_sum = 0;
 	unsigned int hit_sum = 0;
 	int constraint_idx = 0;
 	int idx0 = 0, idx = -1;
+	bool alt_intercepts, alt_recent;
 	ktime_t delta_tick;
 	s64 duration_ns;
 	int i;
@@ -317,6 +334,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 */
 		intercept_sum += prev_bin->intercepts;
 		hit_sum += prev_bin->hits;
+		recent_sum += prev_bin->recent;
 
 		if (dev->states_usage[i].disable)
 			continue;
@@ -336,6 +354,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
 		idx_intercept_sum = intercept_sum;
 		idx_hit_sum = hit_sum;
+		idx_recent_sum = recent_sum;
 	}
 
 	/* Avoid unnecessary overhead. */
@@ -350,27 +369,36 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * If the sum of the intercepts metric for all of the idle states
 	 * shallower than the current candidate one (idx) is greater than the
 	 * sum of the intercepts and hits metrics for the candidate state and
-	 * all of the deeper states, the CPU is likely to wake up early, so find
-	 * an alternative idle state to select.
+	 * all of the deeper states, or the sum of the numbers of recent
+	 * intercepts over all of the states shallower than the candidate one
+	 * is greater than a half of the number of recent events taken into
+	 * account, the CPU is likely to wake up early, so find an alternative
+	 * idle state to select.
 	 */
-	if (2 * idx_intercept_sum > cpu_data->total - idx_hit_sum) {
+	alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
+	alt_recent = idx_recent_sum > NR_RECENT / 2;
+	if (alt_recent || alt_intercepts) {
 		s64 last_enabled_span_ns = duration_ns;
 		int last_enabled_idx = idx;
 
 		/*
 		 * Look for the deepest idle state whose target residency had
 		 * not exceeded the idle duration in over a half of the relevant
-		 * cases in the past.
+		 * cases (both with respect to intercepts overall and with
+		 * respect to the recent intercepts only) in the past.
 		 *
 		 * Take the possible latency constraint and duration limitation
 		 * present if the tick has been stopped already into account.
 		 */
 		intercept_sum = 0;
+		recent_sum = 0;
 
 		for (i = idx - 1; i >= idx0; i--) {
+			struct teo_bin *bin = &cpu_data->state_bins[i];
 			s64 span_ns;
 
-			intercept_sum += cpu_data->state_bins[i].intercepts;
+			intercept_sum += bin->intercepts;
+			recent_sum += bin->recent;
 
 			if (dev->states_usage[i].disable)
 				continue;
@@ -386,7 +414,9 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 				break;
 			}
 
-			if (2 * intercept_sum > idx_intercept_sum) {
+			if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
+			    (!alt_intercepts ||
+			     2 * intercept_sum > idx_intercept_sum)) {
 				idx = i;
 				duration_ns = span_ns;
 				break;
@@ -404,49 +434,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	if (idx > constraint_idx)
 		idx = constraint_idx;
 
-	if (idx > idx0) {
-		unsigned int count = 0;
-		u64 sum = 0;
-
-		/*
-		 * The target residencies of at least two different enabled idle
-		 * states are less than or equal to the current expected idle
-		 * duration.  Try to refine the selection using the most recent
-		 * measured idle duration values.
-		 *
-		 * Count and sum the most recent idle duration values less than
-		 * the current expected idle duration value.
-		 */
-		for (i = 0; i < INTERVALS; i++) {
-			u64 val = cpu_data->intervals[i];
-
-			if (val >= duration_ns)
-				continue;
-
-			count++;
-			sum += val;
-		}
-
-		/*
-		 * Give up unless the majority of the most recent idle duration
-		 * values are in the interesting range.
-		 */
-		if (count > INTERVALS / 2) {
-			u64 avg_ns = div64_u64(sum, count);
-
-			/*
-			 * Avoid spending too much time in an idle state that
-			 * would be too shallow.
-			 */
-			if (teo_time_ok(avg_ns)) {
-				duration_ns = avg_ns;
-				if (drv->states[idx].target_residency_ns > avg_ns)
-					idx = teo_find_shallower_state(drv, dev,
-								       idx, avg_ns);
-			}
-		}
-	}
-
 end:
 	/*
 	 * Don't stop the tick if the selected state is a polling one or if the
@@ -507,8 +494,8 @@ static int teo_enable_device(struct cpuidle_driver *drv,
 
 	memset(cpu_data, 0, sizeof(*cpu_data));
 
-	for (i = 0; i < INTERVALS; i++)
-		cpu_data->intervals[i] = U64_MAX;
+	for (i = 0; i < NR_RECENT; i++)
+		cpu_data->recent_idx[i] = -1;
 
 	return 0;
 }
-- 
GitLab


From 154ae8bb3c830f0a568a5194ce7e631aa6bcfe8b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 2 Jun 2021 20:18:02 +0200
Subject: [PATCH 2741/3804] cpuidle: teo: Use kerneldoc documentation in
 admin-guide

There are two descriptions of the TEO (Timer Events Oriented) cpuidle
governor in the kernel source tree, one in the C file containing its
code and one in cpuidle.rst which is part of admin-guide.

Instead of trying to keep them both in sync and in order to reduce
text duplication, include the governor description from the C file
directly into cpuidle.rst.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/cpuidle.rst | 77 +-----------------------
 drivers/cpuidle/governors/teo.c          | 12 ++--
 2 files changed, 10 insertions(+), 79 deletions(-)

diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index 10fde58d08697..aec2cd2aaea73 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -347,81 +347,8 @@ for tickless systems.  It follows the same basic strategy as the ``menu`` `one
 <menu-gov_>`_: it always tries to find the deepest idle state suitable for the
 given conditions.  However, it applies a different approach to that problem.
 
-First, it does not use sleep length correction factors, but instead it attempts
-to correlate the observed idle duration values with the available idle states
-and use that information to pick up the idle state that is most likely to
-"match" the upcoming CPU idle interval.   Second, it does not take the tasks
-that were running on the given CPU in the past and are waiting on some I/O
-operations to complete now at all (there is no guarantee that they will run on
-the same CPU when they become runnable again) and the pattern detection code in
-it avoids taking timer wakeups into account.  It also only uses idle duration
-values less than the current time till the closest timer (with the scheduler
-tick excluded) for that purpose.
-
-Like in the ``menu`` governor `case <menu-gov_>`_, the first step is to obtain
-the *sleep length*, which is the time until the closest timer event with the
-assumption that the scheduler tick will be stopped (that also is the upper bound
-on the time until the next CPU wakeup).  That value is then used to preselect an
-idle state on the basis of three metrics maintained for each idle state provided
-by the ``CPUIdle`` driver: ``hits``, ``misses`` and ``early_hits``.
-
-The ``hits`` and ``misses`` metrics measure the likelihood that a given idle
-state will "match" the observed (post-wakeup) idle duration if it "matches" the
-sleep length.  They both are subject to decay (after a CPU wakeup) every time
-the target residency of the idle state corresponding to them is less than or
-equal to the sleep length and the target residency of the next idle state is
-greater than the sleep length (that is, when the idle state corresponding to
-them "matches" the sleep length).  The ``hits`` metric is increased if the
-former condition is satisfied and the target residency of the given idle state
-is less than or equal to the observed idle duration and the target residency of
-the next idle state is greater than the observed idle duration at the same time
-(that is, it is increased when the given idle state "matches" both the sleep
-length and the observed idle duration).  In turn, the ``misses`` metric is
-increased when the given idle state "matches" the sleep length only and the
-observed idle duration is too short for its target residency.
-
-The ``early_hits`` metric measures the likelihood that a given idle state will
-"match" the observed (post-wakeup) idle duration if it does not "match" the
-sleep length.  It is subject to decay on every CPU wakeup and it is increased
-when the idle state corresponding to it "matches" the observed (post-wakeup)
-idle duration and the target residency of the next idle state is less than or
-equal to the sleep length (i.e. the idle state "matching" the sleep length is
-deeper than the given one).
-
-The governor walks the list of idle states provided by the ``CPUIdle`` driver
-and finds the last (deepest) one with the target residency less than or equal
-to the sleep length.  Then, the ``hits`` and ``misses`` metrics of that idle
-state are compared with each other and it is preselected if the ``hits`` one is
-greater (which means that that idle state is likely to "match" the observed idle
-duration after CPU wakeup).  If the ``misses`` one is greater, the governor
-preselects the shallower idle state with the maximum ``early_hits`` metric
-(or if there are multiple shallower idle states with equal ``early_hits``
-metric which also is the maximum, the shallowest of them will be preselected).
-[If there is a wakeup latency constraint coming from the `PM QoS framework
-<cpu-pm-qos_>`_ which is hit before reaching the deepest idle state with the
-target residency within the sleep length, the deepest idle state with the exit
-latency within the constraint is preselected without consulting the ``hits``,
-``misses`` and ``early_hits`` metrics.]
-
-Next, the governor takes several idle duration values observed most recently
-into consideration and if at least a half of them are greater than or equal to
-the target residency of the preselected idle state, that idle state becomes the
-final candidate to ask for.  Otherwise, the average of the most recent idle
-duration values below the target residency of the preselected idle state is
-computed and the governor walks the idle states shallower than the preselected
-one and finds the deepest of them with the target residency within that average.
-That idle state is then taken as the final candidate to ask for.
-
-Still, at this point the governor may need to refine the idle state selection if
-it has not decided to `stop the scheduler tick <idle-cpus-and-tick_>`_.  That
-generally happens if the target residency of the idle state selected so far is
-less than the tick period and the tick has not been stopped already (in a
-previous iteration of the idle loop).  Then, like in the ``menu`` governor
-`case <menu-gov_>`_, the sleep length used in the previous computations may not
-reflect the real time until the closest timer event and if it really is greater
-than that time, a shallower state with a suitable target residency may need to
-be selected.
-
+.. kernel-doc:: drivers/cpuidle/governors/teo.c
+   :doc: teo-description
 
 .. _idle-states-representation:
 
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 7c2024f91fd72..1e0b2f828abbe 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -4,6 +4,10 @@
  *
  * Copyright (C) 2018 - 2021 Intel Corporation
  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ */
+
+/**
+ * DOC: teo-description
  *
  * The idea of this governor is based on the observation that on many systems
  * timer events are two or more orders of magnitude more frequent than any
@@ -28,7 +32,7 @@
  *
  * The computations carried out by this governor are based on using bins whose
  * boundaries are aligned with the target residency parameter values of the CPU
- * idle states provided by the cpuidle driver in the ascending order.  That is,
+ * idle states provided by the %CPUIdle driver in the ascending order.  That is,
  * the first bin spans from 0 up to, but not including, the target residency of
  * the second idle state (idle state 1), the second bin spans from the target
  * residency of idle state 1 up to, but not including, the target residency of
@@ -51,8 +55,8 @@
  * situations are referred to as "intercepts" below).
  *
  * In addition to the metrics described above, the governor counts recent
- * intercepts (that is, intercepts that have occurred during the last NR_RECENT
- * invocations of it for the given CPU) for each bin.
+ * intercepts (that is, intercepts that have occurred during the last
+ * %NR_RECENT invocations of it for the given CPU) for each bin.
  *
  * In order to select an idle state for a CPU, the governor takes the following
  * steps (modulo the possible latency constraint that must be taken into account
@@ -76,7 +80,7 @@
  *      shallower than the candidate one.
  *
  * 2. If the second sum is greater than the first one or the third sum is
- *    greater than NR_RECENT / 2, the CPU is likely to wake up early, so look
+ *    greater than %NR_RECENT / 2, the CPU is likely to wake up early, so look
  *    for an alternative idle state to select.
  *
  *    - Traverse the idle states shallower than the candidate one in the
-- 
GitLab


From 0eef091d2dc447e10607f6dafa173c311ada972b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 3 Jun 2021 11:34:35 +0200
Subject: [PATCH 2742/3804] PM: domains: Split code in
 dev_pm_genpd_set_performance_state()

To prepare some of the code in dev_pm_genpd_set_performance_state() to be
re-used from subsequent changes, let's split it up into two functions.

Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 5695a641efd36..ede0f576efe6e 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -379,6 +379,24 @@ err:
 	return ret;
 }
 
+static int genpd_set_performance_state(struct device *dev, unsigned int state)
+{
+	struct generic_pm_domain *genpd = dev_to_genpd(dev);
+	struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
+	unsigned int prev_state;
+	int ret;
+
+	prev_state = gpd_data->performance_state;
+	gpd_data->performance_state = state;
+	state = _genpd_reeval_performance_state(genpd, state);
+
+	ret = _genpd_set_performance_state(genpd, state, 0);
+	if (ret)
+		gpd_data->performance_state = prev_state;
+
+	return ret;
+}
+
 /**
  * dev_pm_genpd_set_performance_state- Set performance state of device's power
  * domain.
@@ -397,8 +415,6 @@ err:
 int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
 {
 	struct generic_pm_domain *genpd;
-	struct generic_pm_domain_data *gpd_data;
-	unsigned int prev;
 	int ret;
 
 	genpd = dev_to_genpd_safe(dev);
@@ -410,16 +426,7 @@ int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state)
 		return -EINVAL;
 
 	genpd_lock(genpd);
-
-	gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-	prev = gpd_data->performance_state;
-	gpd_data->performance_state = state;
-
-	state = _genpd_reeval_performance_state(genpd, state);
-	ret = _genpd_set_performance_state(genpd, state, 0);
-	if (ret)
-		gpd_data->performance_state = prev;
-
+	ret = genpd_set_performance_state(dev, state);
 	genpd_unlock(genpd);
 
 	return ret;
-- 
GitLab


From d97fe100ee0b36c5dd8013ffd70fe8fcdcabff2b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 3 Jun 2021 11:34:36 +0200
Subject: [PATCH 2743/3804] PM: domains: Return early if perf state is already
 set for the device

When dev_pm_genpd_set_performance_state() gets called to set a new
performance state for the device, let's take a quicker path by doing an
early return, if it turns out that the new state is already set for the
device.

Suggested-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index ede0f576efe6e..90a9828fcb2fa 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -387,6 +387,9 @@ static int genpd_set_performance_state(struct device *dev, unsigned int state)
 	int ret;
 
 	prev_state = gpd_data->performance_state;
+	if (prev_state == state)
+		return 0;
+
 	gpd_data->performance_state = state;
 	state = _genpd_reeval_performance_state(genpd, state);
 
-- 
GitLab


From 5937c3ce21228d33d2eb3287baa7e4cf6978dba9 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 3 Jun 2021 11:34:37 +0200
Subject: [PATCH 2744/3804] PM: domains: Drop/restore performance state votes
 for devices at runtime PM

A subsystem/driver that need to manage OPPs for its device, should
typically drop its vote for the OPP when the device becomes runtime
suspended. In this way, the corresponding aggregation of the performance
state votes that is managed in genpd for the attached PM domain, may find
that the aggregated vote can be decreased. Hence, it may allow genpd to set
the lower performance state for the PM domain, thus avoiding to waste
energy.

To accomplish this, typically a subsystem/driver would need to call
dev_pm_opp_set_rate|opp() for its device from its ->runtime_suspend()
callback, to drop the vote for the OPP. Accordingly, it needs another call
to dev_pm_opp_set_rate|opp() to restore the vote for the OPP from its
->runtime_resume() callback.

To avoid boilerplate code in subsystems/driver to deal with these things,
let's instead manage this internally in genpd.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 27 +++++++++++++++++++++++++--
 include/linux/pm_domain.h   |  1 +
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 90a9828fcb2fa..ab0b740cc0f1c 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -400,6 +400,23 @@ static int genpd_set_performance_state(struct device *dev, unsigned int state)
 	return ret;
 }
 
+static int genpd_drop_performance_state(struct device *dev)
+{
+	unsigned int prev_state = dev_gpd_data(dev)->performance_state;
+
+	if (!genpd_set_performance_state(dev, 0))
+		return prev_state;
+
+	return 0;
+}
+
+static void genpd_restore_performance_state(struct device *dev,
+					    unsigned int state)
+{
+	if (state)
+		genpd_set_performance_state(dev, state);
+}
+
 /**
  * dev_pm_genpd_set_performance_state- Set performance state of device's power
  * domain.
@@ -843,7 +860,8 @@ static int genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
 	bool (*suspend_ok)(struct device *__dev);
-	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
+	struct gpd_timing_data *td = &gpd_data->td;
 	bool runtime_pm = pm_runtime_enabled(dev);
 	ktime_t time_start;
 	s64 elapsed_ns;
@@ -900,6 +918,7 @@ static int genpd_runtime_suspend(struct device *dev)
 		return 0;
 
 	genpd_lock(genpd);
+	gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
 	genpd_power_off(genpd, true, 0);
 	genpd_unlock(genpd);
 
@@ -917,7 +936,8 @@ static int genpd_runtime_suspend(struct device *dev)
 static int genpd_runtime_resume(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
-	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
+	struct generic_pm_domain_data *gpd_data = dev_gpd_data(dev);
+	struct gpd_timing_data *td = &gpd_data->td;
 	bool runtime_pm = pm_runtime_enabled(dev);
 	ktime_t time_start;
 	s64 elapsed_ns;
@@ -941,6 +961,8 @@ static int genpd_runtime_resume(struct device *dev)
 
 	genpd_lock(genpd);
 	ret = genpd_power_on(genpd, 0);
+	if (!ret)
+		genpd_restore_performance_state(dev, gpd_data->rpm_pstate);
 	genpd_unlock(genpd);
 
 	if (ret)
@@ -979,6 +1001,7 @@ err_stop:
 err_poweroff:
 	if (!pm_runtime_is_irq_safe(dev) || genpd_is_irq_safe(genpd)) {
 		genpd_lock(genpd);
+		gpd_data->rpm_pstate = genpd_drop_performance_state(dev);
 		genpd_power_off(genpd, true, 0);
 		genpd_unlock(genpd);
 	}
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index dfcfbcecc34b8..21a0577305ef4 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -198,6 +198,7 @@ struct generic_pm_domain_data {
 	struct notifier_block *power_nb;
 	int cpu;
 	unsigned int performance_state;
+	unsigned int rpm_pstate;
 	ktime_t	next_wakeup;
 	void *data;
 };
-- 
GitLab


From 03466883a0fdb5c38f2907b027565b9f253688a8 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 8 Jun 2021 15:44:37 +0800
Subject: [PATCH 2745/3804] PM: sleep: remove trailing spaces and tabs

Run the following command to find and remove the trailing spaces and tabs:

$ find kernel/power/ -type f | xargs sed -r -i 's/[ \t]+$//'

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/Kconfig   | 12 ++++++------
 kernel/power/process.c |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6bfe3ead10ad3..a12779650f152 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -98,20 +98,20 @@ config PM_STD_PARTITION
 	default ""
 	help
 	  The default resume partition is the partition that the suspend-
-	  to-disk implementation will look for a suspended disk image. 
+	  to-disk implementation will look for a suspended disk image.
 
-	  The partition specified here will be different for almost every user. 
+	  The partition specified here will be different for almost every user.
 	  It should be a valid swap partition (at least for now) that is turned
-	  on before suspending. 
+	  on before suspending.
 
 	  The partition specified can be overridden by specifying:
 
-		resume=/dev/<other device> 
+		resume=/dev/<other device>
 
-	  which will set the resume partition to the device specified. 
+	  which will set the resume partition to the device specified.
 
 	  Note there is currently not a way to specify which device to save the
-	  suspended image to. It will simply pick the first available swap 
+	  suspended image to. It will simply pick the first available swap
 	  device.
 
 config PM_SLEEP
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 50cc635344866..37401c99b7d7d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * drivers/power/process.c - Functions for starting/stopping processes on 
+ * drivers/power/process.c - Functions for starting/stopping processes on
  *                           suspend transitions.
  *
  * Originally from swsusp.
-- 
GitLab


From 480f0de68caddfe336b8cc0c74a40328779940d3 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 8 Jun 2021 16:13:14 +0800
Subject: [PATCH 2746/3804] PM: hibernate: remove leading spaces before tabs

 1) Run the following command to find and remove the leading spaces
    before tabs:
    $ find kernel/power/ -type f | xargs sed -r -i 's/^[ ]+\t/\t/'
 2) Manually check and correct if necessary

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/snapshot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index af507c8c895b6..f7a9860782135 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1146,7 +1146,7 @@ int create_basic_memory_bitmaps(void)
  Free_second_object:
 	kfree(bm2);
  Free_first_bitmap:
- 	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
+	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
  Free_first_object:
 	kfree(bm1);
 	return -ENOMEM;
-- 
GitLab


From 52c208397c246f0c31d031eb8c41f9c7e9fdec0e Mon Sep 17 00:00:00 2001
From: Tushar Sugandhi <tusharsu@linux.microsoft.com>
Date: Mon, 10 May 2021 12:09:39 -0700
Subject: [PATCH 2747/3804] IMA: support for duplicate measurement records

IMA measures contents of a given file/buffer/critical-data record,
and properly re-measures it on change.  However, IMA does not measure
the duplicate value for a given record, since TPM extend is a very
expensive operation.  For example, if the record changes from value
'v#1' to 'v#2', and then back to 'v#1', IMA will not measure and log
the last change to 'v#1', since the hash of 'v#1' for that record is
already present in the IMA htable.  This limits the ability of an
external attestation service to accurately determine the current state
of the system.  The service would incorrectly conclude that the latest
value of the given record on the system is 'v#2', and act accordingly.

Define and use a new Kconfig option IMA_DISABLE_HTABLE to permit
duplicate records in the IMA measurement list.

In addition to the duplicate measurement records described above,
other duplicate file measurement records may be included in the log,
when CONFIG_IMA_DISABLE_HTABLE is enabled.  For example,
    - i_version is not enabled,
    - i_generation changed,
    - same file present on different filesystems,
    - an inode is evicted from dcache

Signed-off-by: Tushar Sugandhi <tusharsu@linux.microsoft.com>
Reviewed-by: Petr Vorel <pvorel@suse.cz>
[zohar@linux.ibm.com: updated list of duplicate measurement records]
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/ima/Kconfig     | 7 +++++++
 security/integrity/ima/ima_queue.c | 5 +++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 12e9250c1bec6..d0ceada99243a 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -334,3 +334,10 @@ config IMA_SECURE_AND_OR_TRUSTED_BOOT
        help
           This option is selected by architectures to enable secure and/or
           trusted boot based on IMA runtime policies.
+
+config IMA_DISABLE_HTABLE
+	bool "Disable htable to allow measurement of duplicate records"
+	depends on IMA
+	default n
+	help
+	   This option disables htable to allow measurement of duplicate records.
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index c096ef8945c7d..532da87ce5198 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -168,7 +168,7 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
 	int result = 0, tpmresult = 0;
 
 	mutex_lock(&ima_extend_list_mutex);
-	if (!violation) {
+	if (!violation && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) {
 		if (ima_lookup_digest_entry(digest, entry->pcr)) {
 			audit_cause = "hash_exists";
 			result = -EEXIST;
@@ -176,7 +176,8 @@ int ima_add_template_entry(struct ima_template_entry *entry, int violation,
 		}
 	}
 
-	result = ima_add_digest_entry(entry, 1);
+	result = ima_add_digest_entry(entry,
+				      !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE));
 	if (result < 0) {
 		audit_cause = "ENOMEM";
 		audit_info = 0;
-- 
GitLab


From 5a2bd1b1c64e1ac5627db3767ac465f18606315c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 8 Jun 2021 11:02:48 +0200
Subject: [PATCH 2748/3804] PM: runtime: Improve path in rpm_idle() when no
 callback

When pm_runtime_no_callbacks() has been called for a struct device to set
the dev->power.no_callbacks flag for it, it enables rpm_idle() to take a
slightly quicker path by assuming that a ->runtime_idle() callback would
have returned 0 to indicate success.

A device that does not have the dev->power.no_callbacks flag set for it,
may still be missing a corresponding ->runtime_idle() callback, in which
case the slower path in rpm_idle() is taken. Let's improve the behaviour
for this case, by aligning code to the quicker path.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b570848d23e0e..68bebbf81347f 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -446,7 +446,10 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	/* Pending requests need to be canceled. */
 	dev->power.request = RPM_REQ_NONE;
 
-	if (dev->power.no_callbacks)
+	callback = RPM_GET_CALLBACK(dev, runtime_idle);
+
+	/* If no callback assume success. */
+	if (!callback || dev->power.no_callbacks)
 		goto out;
 
 	/* Carry out an asynchronous or a synchronous idle notification. */
@@ -462,10 +465,7 @@ static int rpm_idle(struct device *dev, int rpmflags)
 
 	dev->power.idle_notification = true;
 
-	callback = RPM_GET_CALLBACK(dev, runtime_idle);
-
-	if (callback)
-		retval = __rpm_callback(callback, dev);
+	retval = __rpm_callback(callback, dev);
 
 	dev->power.idle_notification = false;
 	wake_up_all(&dev->power.wait_queue);
-- 
GitLab


From 63d00be69348fda431ae59aba6af268a5cf5058e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 8 Jun 2021 11:02:49 +0200
Subject: [PATCH 2749/3804] PM: runtime: Allow unassigned
 ->runtime_suspend|resume callbacks

We are currently allowing ->runtime_idle() callbacks to be unassigned
without returning an error code from rpm_idle(). This has been useful to
avoid boilerplate code in drivers. Let's take this approach a step further,
by allowing also unassigned ->runtime_suspend|resume() callbacks.

In this way, a consumer/supplier device link can be used to let a consumer
device be power managed through its supplier device, without requiring
assigned ->runtime_suspend|resume() callbacks for the consumer device, for
example.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 68bebbf81347f..8a66eaf731e48 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -345,7 +345,7 @@ static void rpm_suspend_suppliers(struct device *dev)
 static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 	__releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
-	int retval, idx;
+	int retval = 0, idx;
 	bool use_links = dev->power.links_count > 0;
 
 	if (dev->power.irq_safe) {
@@ -373,7 +373,8 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 		}
 	}
 
-	retval = cb(dev);
+	if (cb)
+		retval = cb(dev);
 
 	if (dev->power.irq_safe) {
 		spin_lock(&dev->power.lock);
@@ -484,9 +485,6 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev)
 {
 	int retval;
 
-	if (!cb)
-		return -ENOSYS;
-
 	if (dev->power.memalloc_noio) {
 		unsigned int noio_flag;
 
-- 
GitLab


From 4ec4f059088b48585c337328e05fa930c64d1ba8 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 9 Jun 2021 12:06:10 +0200
Subject: [PATCH 2750/3804] PM: runtime: Clarify documentation when callbacks
 are unassigned

Recent changes to the PM core allows ->runtime_suspend|resume callbacks to
be unassigned.

In the earlier behaviour the PM core would return -ENOSYS, when trying to
runtime resume a device, for example. Let's update the documentation to
clarify this.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/runtime_pm.rst | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/power/runtime_pm.rst b/Documentation/power/runtime_pm.rst
index b48cac5f90489..d6bf84f061f41 100644
--- a/Documentation/power/runtime_pm.rst
+++ b/Documentation/power/runtime_pm.rst
@@ -831,6 +831,15 @@ or driver about runtime power changes.  Instead, the driver for the device's
 parent must take responsibility for telling the device's driver when the
 parent's power state changes.
 
+Note that, in some cases it may not be desirable for subsystems/drivers to call
+pm_runtime_no_callbacks() for their devices. This could be because a subset of
+the runtime PM callbacks needs to be implemented, a platform dependent PM
+domain could get attached to the device or that the device is power managed
+through a supplier device link. For these reasons and to avoid boilerplate code
+in subsystems/drivers, the PM core allows runtime PM callbacks to be
+unassigned. More precisely, if a callback pointer is NULL, the PM core will act
+as though there was a callback and it returned 0.
+
 9. Autosuspend, or automatically-delayed suspends
 =================================================
 
-- 
GitLab


From da9ef50f545f86ffe6ff786174d26500c4db737a Mon Sep 17 00:00:00 2001
From: Praneeth Bajjuri <praneeth@ti.com>
Date: Wed, 9 Jun 2021 19:43:42 -0500
Subject: [PATCH 2751/3804] net: phy: dp83867: perform soft reset and retain
 established link

Current logic is performing hard reset and causing the programmed
registers to be wiped out.

as per datasheet: https://www.ti.com/lit/ds/symlink/dp83867cr.pdf
8.6.26 Control Register (CTRL)

do SW_RESTART to perform a reset not including the registers,
If performed when link is already present,
it will drop the link and trigger re-auto negotiation.

Signed-off-by: Praneeth Bajjuri <praneeth@ti.com>
Signed-off-by: Geet Modi <geet.modi@ti.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83867.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 9bd9a5c0b1db3..6bbc81ad295fb 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -826,16 +826,12 @@ static int dp83867_phy_reset(struct phy_device *phydev)
 {
 	int err;
 
-	err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET);
+	err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART);
 	if (err < 0)
 		return err;
 
 	usleep_range(10, 20);
 
-	/* After reset FORCE_LINK_GOOD bit is set. Although the
-	 * default value should be unset. Disable FORCE_LINK_GOOD
-	 * for the phy to work properly.
-	 */
 	return phy_modify(phydev, MII_DP83867_PHYCTRL,
 			 DP83867_PHYCR_FORCE_LINK_GOOD, 0);
 }
-- 
GitLab


From 2398ce80152aae33b9501ef54452e09e8e8d4262 Mon Sep 17 00:00:00 2001
From: Tor Vic <torvic9@mailbox.org>
Date: Thu, 10 Jun 2021 20:58:06 +0000
Subject: [PATCH 2752/3804] x86, lto: Pass -stack-alignment only on LLD <
 13.0.0

Since LLVM commit 3787ee4, the '-stack-alignment' flag has been dropped
[1], leading to the following error message when building a LTO kernel
with Clang-13 and LLD-13:

    ld.lld: error: -plugin-opt=-: ld.lld: Unknown command line argument
    '-stack-alignment=8'.  Try 'ld.lld --help'
    ld.lld: Did you mean '--stackrealign=8'?

It also appears that the '-code-model' flag is not necessary anymore
starting with LLVM-9 [2].

Drop '-code-model' and make '-stack-alignment' conditional on LLD < 13.0.0.

These flags were necessary because these flags were not encoded in the
IR properly, so the link would restart optimizations without them. Now
there are properly encoded in the IR, and these flags exposing
implementation details are no longer necessary.

[1] https://reviews.llvm.org/D103048
[2] https://reviews.llvm.org/D52322

Cc: stable@vger.kernel.org
Link: https://github.com/ClangBuiltLinux/linux/issues/1377
Signed-off-by: Tor Vic <torvic9@mailbox.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/f2c018ee-5999-741e-58d4-e482d5246067@mailbox.org
---
 arch/x86/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 307529417021a..cb5e8d39cac15 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -200,8 +200,9 @@ endif
 KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
 
 ifdef CONFIG_LTO_CLANG
-KBUILD_LDFLAGS	+= -plugin-opt=-code-model=kernel \
-		   -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
+KBUILD_LDFLAGS	+= -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
 endif
 
 ifdef CONFIG_X86_NEED_RELOCS
-- 
GitLab


From 990e78116d38059c9306cf0560c1c4ed1cf358d3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 5 Jun 2021 17:09:50 +0300
Subject: [PATCH 2753/3804] block: loop: fix deadlock between open and remove

Commit c76f48eb5c08 ("block: take bd_mutex around delete_partitions in
del_gendisk") adds disk->part0->bd_mutex in del_gendisk(), this way
causes the following AB/BA deadlock between removing loop and opening
loop:

 1) loop_control_ioctl(LOOP_CTL_REMOVE)
     -> mutex_lock(&loop_ctl_mutex)
     -> del_gendisk
         -> mutex_lock(&disk->part0->bd_mutex)

 2) blkdev_get_by_dev
     -> mutex_lock(&disk->part0->bd_mutex)
     -> lo_open
         -> mutex_lock(&loop_ctl_mutex)

Add a new Lo_deleting state to remove the need for clearing
->private_data and thus holding loop_ctl_mutex in the ioctl
LOOP_CTL_REMOVE path.

Based on an analysis and earlier patch from
Ming Lei <ming.lei@redhat.com>.

Reported-by: Colin Ian King <colin.king@canonical.com>
Fixes: c76f48eb5c08 ("block: take bd_mutex around delete_partitions in del_gendisk")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20210605140950.5800-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 25 +++++++------------------
 drivers/block/loop.h |  1 +
 2 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index a370cde3ddd49..f280a96d4de2d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1878,29 +1878,18 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
 
 static int lo_open(struct block_device *bdev, fmode_t mode)
 {
-	struct loop_device *lo;
+	struct loop_device *lo = bdev->bd_disk->private_data;
 	int err;
 
-	/*
-	 * take loop_ctl_mutex to protect lo pointer from race with
-	 * loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention
-	 * release it prior to updating lo->lo_refcnt.
-	 */
-	err = mutex_lock_killable(&loop_ctl_mutex);
-	if (err)
-		return err;
-	lo = bdev->bd_disk->private_data;
-	if (!lo) {
-		mutex_unlock(&loop_ctl_mutex);
-		return -ENXIO;
-	}
 	err = mutex_lock_killable(&lo->lo_mutex);
-	mutex_unlock(&loop_ctl_mutex);
 	if (err)
 		return err;
-	atomic_inc(&lo->lo_refcnt);
+	if (lo->lo_state == Lo_deleting)
+		err = -ENXIO;
+	else
+		atomic_inc(&lo->lo_refcnt);
 	mutex_unlock(&lo->lo_mutex);
-	return 0;
+	return err;
 }
 
 static void lo_release(struct gendisk *disk, fmode_t mode)
@@ -2284,7 +2273,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
 			mutex_unlock(&lo->lo_mutex);
 			break;
 		}
-		lo->lo_disk->private_data = NULL;
+		lo->lo_state = Lo_deleting;
 		mutex_unlock(&lo->lo_mutex);
 		idr_remove(&loop_index_idr, lo->lo_number);
 		loop_remove(lo);
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index a3c04f310672e..5beb959b94d36 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -22,6 +22,7 @@ enum {
 	Lo_unbound,
 	Lo_bound,
 	Lo_rundown,
+	Lo_deleting,
 };
 
 struct loop_func_table;
-- 
GitLab


From 33e381448cf7a05d76ac0b47d4a6531ecd0e5c53 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 11 Jun 2021 08:13:39 +0200
Subject: [PATCH 2754/3804] alx: Fix an error handling path in 'alx_probe()'

If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
call, as already done in the remove function.

Fixes: ab69bde6b2e9 ("alx: add a simple AR816x/AR817x device driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index b3d74332ed330..7748b276e5fde 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1849,6 +1849,7 @@ out_free_netdev:
 	free_netdev(netdev);
 out_pci_release:
 	pci_release_mem_regions(pdev);
+	pci_disable_pcie_error_reporting(pdev);
 out_pci_disable:
 	pci_disable_device(pdev);
 	return err;
-- 
GitLab


From 42a2039753a7f758ba5c85cb199fcf10dc2111eb Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 11 Jun 2021 12:17:45 +0530
Subject: [PATCH 2755/3804] cxgb4: fix endianness when flashing boot image

Boot images are copied to memory and updated with current underlying
device ID before flashing them to adapter. Ensure the updated images
are always flashed in Big Endian to allow the firmware to read the
new images during boot properly.

Fixes: 550883558f17 ("cxgb4: add support to flash boot image")
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 44 +++++++++++++---------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 9428ef1f04a81..1293505025c1a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -3060,16 +3060,19 @@ int t4_read_flash(struct adapter *adapter, unsigned int addr,
  *	@addr: the start address to write
  *	@n: length of data to write in bytes
  *	@data: the data to write
+ *	@byte_oriented: whether to store data as bytes or as words
  *
  *	Writes up to a page of data (256 bytes) to the serial flash starting
  *	at the given address.  All the data must be written to the same page.
+ *	If @byte_oriented is set the write data is stored as byte stream
+ *	(i.e. matches what on disk), otherwise in big-endian.
  */
 static int t4_write_flash(struct adapter *adapter, unsigned int addr,
-			  unsigned int n, const u8 *data)
+			  unsigned int n, const u8 *data, bool byte_oriented)
 {
-	int ret;
-	u32 buf[64];
 	unsigned int i, c, left, val, offset = addr & 0xff;
+	u32 buf[64];
+	int ret;
 
 	if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE)
 		return -EINVAL;
@@ -3080,10 +3083,14 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
 	    (ret = sf1_write(adapter, 4, 1, 1, val)) != 0)
 		goto unlock;
 
-	for (left = n; left; left -= c) {
+	for (left = n; left; left -= c, data += c) {
 		c = min(left, 4U);
-		for (val = 0, i = 0; i < c; ++i)
-			val = (val << 8) + *data++;
+		for (val = 0, i = 0; i < c; ++i) {
+			if (byte_oriented)
+				val = (val << 8) + data[i];
+			else
+				val = (val << 8) + data[c - i - 1];
+		}
 
 		ret = sf1_write(adapter, c, c != left, 1, val);
 		if (ret)
@@ -3096,7 +3103,8 @@ static int t4_write_flash(struct adapter *adapter, unsigned int addr,
 	t4_write_reg(adapter, SF_OP_A, 0);    /* unlock SF */
 
 	/* Read the page to verify the write succeeded */
-	ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1);
+	ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf,
+			    byte_oriented);
 	if (ret)
 		return ret;
 
@@ -3692,7 +3700,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
 	 */
 	memcpy(first_page, fw_data, SF_PAGE_SIZE);
 	((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
-	ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
+	ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true);
 	if (ret)
 		goto out;
 
@@ -3700,14 +3708,14 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size)
 	for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
 		addr += SF_PAGE_SIZE;
 		fw_data += SF_PAGE_SIZE;
-		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data);
+		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true);
 		if (ret)
 			goto out;
 	}
 
-	ret = t4_write_flash(adap,
-			     fw_start + offsetof(struct fw_hdr, fw_ver),
-			     sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
+	ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver),
+			     sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver,
+			     true);
 out:
 	if (ret)
 		dev_err(adap->pdev_dev, "firmware download failed, error %d\n",
@@ -10208,7 +10216,7 @@ int t4_load_cfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
 			n = size - i;
 		else
 			n = SF_PAGE_SIZE;
-		ret = t4_write_flash(adap, addr, n, cfg_data);
+		ret = t4_write_flash(adap, addr, n, cfg_data, true);
 		if (ret)
 			goto out;
 
@@ -10677,13 +10685,14 @@ int t4_load_boot(struct adapter *adap, u8 *boot_data,
 	for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
 		addr += SF_PAGE_SIZE;
 		boot_data += SF_PAGE_SIZE;
-		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data);
+		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data,
+				     false);
 		if (ret)
 			goto out;
 	}
 
 	ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE,
-			     (const u8 *)header);
+			     (const u8 *)header, false);
 
 out:
 	if (ret)
@@ -10758,7 +10767,7 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
 	for (i = 0; i < size; i += SF_PAGE_SIZE) {
 		n = min_t(u32, size - i, SF_PAGE_SIZE);
 
-		ret = t4_write_flash(adap, addr, n, cfg_data);
+		ret = t4_write_flash(adap, addr, n, cfg_data, false);
 		if (ret)
 			goto out;
 
@@ -10770,7 +10779,8 @@ int t4_load_bootcfg(struct adapter *adap, const u8 *cfg_data, unsigned int size)
 	for (i = 0; i < npad; i++) {
 		u8 data = 0;
 
-		ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data);
+		ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data,
+				     false);
 		if (ret)
 			goto out;
 	}
-- 
GitLab


From f046bd0ae15d8a0bbe57d4647da182420f720c3d Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 11 Jun 2021 12:17:46 +0530
Subject: [PATCH 2756/3804] cxgb4: fix sleep in atomic when flashing PHY
 firmware

Before writing new PHY firmware to on-chip memory, driver queries
firmware for current running PHY firmware version, which can result
in sleep waiting for reply. So, move spinlock closer to the actual
on-chip memory write operation, instead of taking it at the callers.

Fixes: 5fff701c838e ("cxgb4: always sync access when flashing PHY firmware")
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 2 --
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 2 --
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c         | 2 ++
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 61ea3ec5c3fcc..bc2de01d05395 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -1337,9 +1337,7 @@ static int cxgb4_ethtool_flash_phy(struct net_device *netdev,
 		return ret;
 	}
 
-	spin_lock_bh(&adap->win0_lock);
 	ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size);
-	spin_unlock_bh(&adap->win0_lock);
 	if (ret)
 		dev_err(adap->pdev_dev, "Failed to load PHY FW\n");
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 1f601de02e706..762113a04dde6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4424,10 +4424,8 @@ static int adap_init0_phy(struct adapter *adap)
 
 	/* Load PHY Firmware onto adapter.
 	 */
-	spin_lock_bh(&adap->win0_lock);
 	ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version,
 			     (u8 *)phyf->data, phyf->size);
-	spin_unlock_bh(&adap->win0_lock);
 	if (ret < 0)
 		dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n",
 			-ret);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 1293505025c1a..a0555f4d76fc4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -3820,9 +3820,11 @@ int t4_load_phy_fw(struct adapter *adap, int win,
 	/* Copy the supplied PHY Firmware image to the adapter memory location
 	 * allocated by the adapter firmware.
 	 */
+	spin_lock_bh(&adap->win0_lock);
 	ret = t4_memory_rw(adap, win, mtype, maddr,
 			   phy_fw_size, (__be32 *)phy_fw_data,
 			   T4_MEMORY_WRITE);
+	spin_unlock_bh(&adap->win0_lock);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 6d297540f75d759489054e8b07932208fc4db2cb Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 11 Jun 2021 12:17:47 +0530
Subject: [PATCH 2757/3804] cxgb4: halt chip before flashing PHY firmware image

When using firmware-assisted PHY firmware image write to flash,
halt the chip before beginning the flash write operation to allow
the running firmware to store the image persistently. Otherwise,
the running firmware will only store the PHY image in local on-chip
RAM, which will be lost after next reset.

Fixes: 4ee339e1e92a ("cxgb4: add support to flash PHY image")
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c    | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index bc2de01d05395..df20485b57443 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -1337,11 +1337,27 @@ static int cxgb4_ethtool_flash_phy(struct net_device *netdev,
 		return ret;
 	}
 
+	/* We have to RESET the chip/firmware because we need the
+	 * chip in uninitialized state for loading new PHY image.
+	 * Otherwise, the running firmware will only store the PHY
+	 * image in local RAM which will be lost after next reset.
+	 */
+	ret = t4_fw_reset(adap, adap->mbox, PIORSTMODE_F | PIORST_F);
+	if (ret < 0) {
+		dev_err(adap->pdev_dev,
+			"Set FW to RESET for flashing PHY FW failed. ret: %d\n",
+			ret);
+		return ret;
+	}
+
 	ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size);
-	if (ret)
-		dev_err(adap->pdev_dev, "Failed to load PHY FW\n");
+	if (ret < 0) {
+		dev_err(adap->pdev_dev, "Failed to load PHY FW. ret: %d\n",
+			ret);
+		return ret;
+	}
 
-	return ret;
+	return 0;
 }
 
 static int cxgb4_ethtool_flash_fw(struct net_device *netdev,
-- 
GitLab


From 1adb20f0d496b2c61e9aa1f4761b8d71f93d258e Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Date: Fri, 11 Jun 2021 15:16:11 +0800
Subject: [PATCH 2758/3804] net: stmmac: dwmac1000: Fix extended MAC address
 registers definition

The register starts from 0x800 is the 16th MAC address register rather
than the first one.

Fixes: cffb13f4d6fb ("stmmac: extend mac addr reg and fix perfect filering")
Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac1000.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index b70d44ac09906..3c73453725f94 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -76,10 +76,10 @@ enum power_event {
 #define LPI_CTRL_STATUS_TLPIEN	0x00000001	/* Transmit LPI Entry */
 
 /* GMAC HW ADDR regs */
-#define GMAC_ADDR_HIGH(reg)	(((reg > 15) ? 0x00000800 : 0x00000040) + \
-				(reg * 8))
-#define GMAC_ADDR_LOW(reg)	(((reg > 15) ? 0x00000804 : 0x00000044) + \
-				(reg * 8))
+#define GMAC_ADDR_HIGH(reg)	((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \
+				 0x00000040 + (reg * 8))
+#define GMAC_ADDR_LOW(reg)	((reg > 15) ? 0x00000804 + (reg - 16) * 8 : \
+				 0x00000044 + (reg * 8))
 #define GMAC_MAX_PERFECT_ADDRESSES	1
 
 #define GMAC_PCS_BASE		0x000000c0	/* PCS register base */
-- 
GitLab


From 0ddd7eaffa644baa78e247bbd220ab7195b1eed6 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Fri, 4 Jun 2021 14:06:39 +0200
Subject: [PATCH 2759/3804] riscv: Fix BUILTIN_DTB for sifive and microchip soc

Fix BUILTIN_DTB config which resulted in a dtb that was actually not
built into the Linux image: in the same manner as Canaan soc does,
create an object file from the dtb file that will get linked into the
Linux image.

Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/boot/dts/microchip/Makefile | 1 +
 arch/riscv/boot/dts/sifive/Makefile    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/riscv/boot/dts/microchip/Makefile b/arch/riscv/boot/dts/microchip/Makefile
index 622b12771fd3f..855c1502d912b 100644
--- a/arch/riscv/boot/dts/microchip/Makefile
+++ b/arch/riscv/boot/dts/microchip/Makefile
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += microchip-mpfs-icicle-kit.dtb
+obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/riscv/boot/dts/sifive/Makefile b/arch/riscv/boot/dts/sifive/Makefile
index 74c47fe9fc22e..d90e4eb0ade84 100644
--- a/arch/riscv/boot/dts/sifive/Makefile
+++ b/arch/riscv/boot/dts/sifive/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb \
 			    hifive-unmatched-a00.dtb
+obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y))
-- 
GitLab


From ea6932d70e223e02fea3ae20a4feff05d7c1ea9a Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Fri, 11 Jun 2021 22:29:59 +0800
Subject: [PATCH 2760/3804] net: make get_net_ns return error if NET_NS is
 disabled

There is a panic in socket ioctl cmd SIOCGSKNS when NET_NS is not enabled.
The reason is that nsfs tries to access ns->ops but the proc_ns_operations
is not implemented in this case.

[7.670023] Unable to handle kernel NULL pointer dereference at virtual address 00000010
[7.670268] pgd = 32b54000
[7.670544] [00000010] *pgd=00000000
[7.671861] Internal error: Oops: 5 [#1] SMP ARM
[7.672315] Modules linked in:
[7.672918] CPU: 0 PID: 1 Comm: systemd Not tainted 5.13.0-rc3-00375-g6799d4f2da49 #16
[7.673309] Hardware name: Generic DT based system
[7.673642] PC is at nsfs_evict+0x24/0x30
[7.674486] LR is at clear_inode+0x20/0x9c

The same to tun SIOCGSKNS command.

To fix this problem, we make get_net_ns() return -EINVAL when NET_NS is
disabled. Meanwhile move it to right place net/core/net_namespace.c.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Fixes: c62cce2caee5 ("net: add an ioctl to get a socket network namespace")
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h      |  2 --
 include/net/net_namespace.h |  7 +++++++
 net/core/net_namespace.c    | 12 ++++++++++++
 net/socket.c                | 13 -------------
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index b8fc5c53ba6fa..0d8e3dcb7f881 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
 			    int __user *usockvec);
 extern int __sys_shutdown_sock(struct socket *sock, int how);
 extern int __sys_shutdown(int fd, int how);
-
-extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index fa5887143f0d2..6412d7833d97a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -184,6 +184,8 @@ struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns,
 void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
 
 void net_ns_barrier(void);
+
+struct ns_common *get_net_ns(struct ns_common *ns);
 #else /* CONFIG_NET_NS */
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
@@ -203,6 +205,11 @@ static inline void net_ns_get_ownership(const struct net *net,
 }
 
 static inline void net_ns_barrier(void) {}
+
+static inline struct ns_common *get_net_ns(struct ns_common *ns)
+{
+	return ERR_PTR(-EINVAL);
+}
 #endif /* CONFIG_NET_NS */
 
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 43b6ac4c44395..cc8dafb25d612 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -641,6 +641,18 @@ void __put_net(struct net *net)
 }
 EXPORT_SYMBOL_GPL(__put_net);
 
+/**
+ * get_net_ns - increment the refcount of the network namespace
+ * @ns: common namespace (net)
+ *
+ * Returns the net's common namespace.
+ */
+struct ns_common *get_net_ns(struct ns_common *ns)
+{
+	return &get_net(container_of(ns, struct net, ns))->ns;
+}
+EXPORT_SYMBOL_GPL(get_net_ns);
+
 struct net *get_net_ns_by_fd(int fd)
 {
 	struct file *file;
diff --git a/net/socket.c b/net/socket.c
index 27e3e7d53f8e8..4f2c6d2795d0a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1072,19 +1072,6 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *	what to do with it - that's up to the protocol still.
  */
 
-/**
- *	get_net_ns - increment the refcount of the network namespace
- *	@ns: common namespace (net)
- *
- *	Returns the net's common namespace.
- */
-
-struct ns_common *get_net_ns(struct ns_common *ns)
-{
-	return &get_net(container_of(ns, struct net, ns))->ns;
-}
-EXPORT_SYMBOL_GPL(get_net_ns);
-
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
 	struct socket *sock;
-- 
GitLab


From 2e3025434a6ba090c85871a1d4080ff784109e1f Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 11 Jun 2021 09:54:42 +0800
Subject: [PATCH 2761/3804] mm: relocate 'write_protect_seq' in struct
 mm_struct

0day robot reported a 9.2% regression for will-it-scale mmap1 test
case[1], caused by commit 57efa1fe5957 ("mm/gup: prevent gup_fast from
racing with COW during fork").

Further debug shows the regression is due to that commit changes the
offset of hot fields 'mmap_lock' inside structure 'mm_struct', thus some
cache alignment changes.

From the perf data, the contention for 'mmap_lock' is very severe and
takes around 95% cpu cycles, and it is a rw_semaphore

        struct rw_semaphore {
                atomic_long_t count;	/* 8 bytes */
                atomic_long_t owner;	/* 8 bytes */
                struct optimistic_spin_queue osq; /* spinner MCS lock */
                ...

Before commit 57efa1fe5957 adds the 'write_protect_seq', it happens to
have a very optimal cache alignment layout, as Linus explained:

 "and before the addition of the 'write_protect_seq' field, the
  mmap_sem was at offset 120 in 'struct mm_struct'.

  Which meant that count and owner were in two different cachelines,
  and then when you have contention and spend time in
  rwsem_down_write_slowpath(), this is probably *exactly* the kind
  of layout you want.

  Because first the rwsem_write_trylock() will do a cmpxchg on the
  first cacheline (for the optimistic fast-path), and then in the
  case of contention, rwsem_down_write_slowpath() will just access
  the second cacheline.

  Which is probably just optimal for a load that spends a lot of
  time contended - new waiters touch that first cacheline, and then
  they queue themselves up on the second cacheline."

After the commit, the rw_semaphore is at offset 128, which means the
'count' and 'owner' fields are now in the same cacheline, and causes
more cache bouncing.

Currently there are 3 "#ifdef CONFIG_XXX" before 'mmap_lock' which will
affect its offset:

  CONFIG_MMU
  CONFIG_MEMBARRIER
  CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES

The layout above is on 64 bits system with 0day's default kernel config
(similar to RHEL-8.3's config), in which all these 3 options are 'y'.
And the layout can vary with different kernel configs.

Relayouting a structure is usually a double-edged sword, as sometimes it
can helps one case, but hurt other cases.  For this case, one solution
is, as the newly added 'write_protect_seq' is a 4 bytes long seqcount_t
(when CONFIG_DEBUG_LOCK_ALLOC=n), placing it into an existing 4 bytes
hole in 'mm_struct' will not change other fields' alignment, while
restoring the regression.

Link: https://lore.kernel.org/lkml/20210525031636.GB7744@xsang-OptiPlex-9020/ [1]
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5aacc1c10a45a..8f0fb62e8975c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -445,13 +445,6 @@ struct mm_struct {
 		 */
 		atomic_t has_pinned;
 
-		/**
-		 * @write_protect_seq: Locked when any thread is write
-		 * protecting pages mapped by this mm to enforce a later COW,
-		 * for instance during page table copying for fork().
-		 */
-		seqcount_t write_protect_seq;
-
 #ifdef CONFIG_MMU
 		atomic_long_t pgtables_bytes;	/* PTE page table pages */
 #endif
@@ -460,6 +453,18 @@ struct mm_struct {
 		spinlock_t page_table_lock; /* Protects page tables and some
 					     * counters
 					     */
+		/*
+		 * With some kernel config, the current mmap_lock's offset
+		 * inside 'mm_struct' is at 0x120, which is very optimal, as
+		 * its two hot fields 'count' and 'owner' sit in 2 different
+		 * cachelines,  and when mmap_lock is highly contended, both
+		 * of the 2 fields will be accessed frequently, current layout
+		 * will help to reduce cache bouncing.
+		 *
+		 * So please be careful with adding new fields before
+		 * mmap_lock, which can easily push the 2 fields into one
+		 * cacheline.
+		 */
 		struct rw_semaphore mmap_lock;
 
 		struct list_head mmlist; /* List of maybe swapped mm's.	These
@@ -480,7 +485,15 @@ struct mm_struct {
 		unsigned long stack_vm;	   /* VM_STACK */
 		unsigned long def_flags;
 
+		/**
+		 * @write_protect_seq: Locked when any thread is write
+		 * protecting pages mapped by this mm to enforce a later COW,
+		 * for instance during page table copying for fork().
+		 */
+		seqcount_t write_protect_seq;
+
 		spinlock_t arg_lock; /* protect the below fields */
+
 		unsigned long start_code, end_code, start_data, end_data;
 		unsigned long start_brk, brk, start_stack;
 		unsigned long arg_start, arg_end, env_start, env_end;
-- 
GitLab


From 5d2388dbf84adebeb6d9742164be8d32728e4269 Mon Sep 17 00:00:00 2001
From: Khem Raj <raj.khem@gmail.com>
Date: Sun, 6 Jun 2021 15:09:40 -0700
Subject: [PATCH 2762/3804] riscv32: Use medany C model for modules

When CONFIG_CMODEL_MEDLOW is used it ends up generating riscv_hi20_rela
relocations in modules which are not resolved during runtime and
following errors would be seen

[    4.802714] virtio_input: target 00000000c1539090 can not be addressed by the 32-bit offset from PC = 39148b7b
[    4.854800] virtio_input: target 00000000c1539090 can not be addressed by the 32-bit offset from PC = 9774456d

Signed-off-by: Khem Raj <raj.khem@gmail.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 4be0206954289..99ecd8bcfd77f 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -16,7 +16,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
 	CC_FLAGS_FTRACE := -fpatchable-function-entry=8
 endif
 
-ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy)
+ifeq ($(CONFIG_CMODEL_MEDLOW),y)
 KBUILD_CFLAGS_MODULE += -mcmodel=medany
 endif
 
-- 
GitLab


From 01f5315dd7327b53a5f538b74a2338a651b1832d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 22 May 2021 14:20:36 -0700
Subject: [PATCH 2763/3804] riscv: sifive: fix Kconfig errata warning

The SOC_SIFIVE Kconfig entry unconditionally selects ERRATA_SIFIVE.
However, ERRATA_SIFIVE depends on RISCV_ERRATA_ALTERNATIVE, which is
not set, so SOC_SIFIVE should either depend on or select
RISCV_ERRATA_ALTERNATIVE. Use 'select' here to quieten the Kconfig
warning.

WARNING: unmet direct dependencies detected for ERRATA_SIFIVE
  Depends on [n]: RISCV_ERRATA_ALTERNATIVE [=n]
  Selected by [y]:
  - SOC_SIFIVE [=y]

Fixes: 1a0e5dbd3723 ("riscv: sifive: Add SiFive alternative ports")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: linux-riscv@lists.infradead.org
Cc: Vincent Chen <vincent.chen@sifive.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/Kconfig.socs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index ed963761fbd2f..30676ebb16ebd 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -14,6 +14,7 @@ config SOC_SIFIVE
 	select CLK_SIFIVE
 	select CLK_SIFIVE_PRCI
 	select SIFIVE_PLIC
+	select RISCV_ERRATA_ALTERNATIVE
 	select ERRATA_SIFIVE
 	help
 	  This enables support for SiFive SoC platform hardware.
-- 
GitLab


From 009c9aa5be652675a06d5211e1640e02bbb1c33d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 13 Jun 2021 14:43:10 -0700
Subject: [PATCH 2764/3804] Linux 5.13-rc6

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 144d4f8b7eb32..ed669b2d705dc 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Frozen Wasteland
 
 # *DOCUMENTATION*
-- 
GitLab


From c098564d91c55d408ed31e8885b915a5e2006249 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 13 Jun 2021 21:20:59 +0100
Subject: [PATCH 2765/3804] tools: Fix "the the" in a message in
 kernel-chktaint

There is a double "the" in a message in kernel-chktaint, fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20210613202059.80403-1-colin.king@canonical.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 tools/debugging/kernel-chktaint | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 719f18b1edf0d..f1af27ce9f200 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -196,7 +196,7 @@ else
 fi
 
 echo "For a more detailed explanation of the various taint flags see"
-echo " Documentation/admin-guide/tainted-kernels.rst in the the Linux kernel sources"
+echo " Documentation/admin-guide/tainted-kernels.rst in the Linux kernel sources"
 echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html"
 echo "Raw taint value as int/string: $taint/'$out'"
 #EOF#
-- 
GitLab


From a9edc03f13dbd51095b38ef0371d24e7ec7ae693 Mon Sep 17 00:00:00 2001
From: Kir Kolyshkin <kolyshkin@gmail.com>
Date: Thu, 10 Jun 2021 20:00:44 -0700
Subject: [PATCH 2766/3804] docs: fix a cross-ref

Commit acda97acb2e98c9 changes dax.txt to dax.rst.
Fix the references accordingly.

Cc: Igor Matheus Andrade Torrente <igormtorrente@gmail.com>
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
Link: https://lore.kernel.org/r/20210611030044.1982911-4-kolyshkin@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/ext4.rst | 2 +-
 Documentation/filesystems/ext2.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst
index d2795ca6821ec..4c559e08d11ee 100644
--- a/Documentation/admin-guide/ext4.rst
+++ b/Documentation/admin-guide/ext4.rst
@@ -392,7 +392,7 @@ When mounting an ext4 filesystem, the following option are accepted:
 
   dax
         Use direct access (no page cache).  See
-        Documentation/filesystems/dax.txt.  Note that this option is
+        Documentation/filesystems/dax.rst.  Note that this option is
         incompatible with data=journal.
 
   inlinecrypt
diff --git a/Documentation/filesystems/ext2.rst b/Documentation/filesystems/ext2.rst
index c2fce22cfd035..154101cf0e4f5 100644
--- a/Documentation/filesystems/ext2.rst
+++ b/Documentation/filesystems/ext2.rst
@@ -25,7 +25,7 @@ check=none, nocheck	(*)	Don't do extra checking of bitmaps on mount
 				(check=normal and check=strict options removed)
 
 dax				Use direct access (no page cache).  See
-				Documentation/filesystems/dax.txt.
+				Documentation/filesystems/dax.rst.
 
 debug				Extra debugging information is sent to the
 				kernel syslog.  Useful for developers.
-- 
GitLab


From b78f4a596692f6805e796a4c13f2d921b8a95166 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Thu, 10 Jun 2021 18:10:15 -0700
Subject: [PATCH 2767/3804] KVM: selftests: Rename vm_handle_exception

Rename the vm_handle_exception function to a name that indicates more
clearly that it installs something: vm_install_exception_handler.

Reported-by: kernel test robot <oliver.sang@intel.com>
Suggested-by: Marc Zyngier <maz@kernel.org>
Suggested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210611011020.3420067-2-ricarkol@google.com
---
 tools/testing/selftests/kvm/include/x86_64/processor.h    | 2 +-
 tools/testing/selftests/kvm/lib/x86_64/processor.c        | 4 ++--
 tools/testing/selftests/kvm/x86_64/evmcs_test.c           | 4 ++--
 tools/testing/selftests/kvm/x86_64/kvm_pv_test.c          | 2 +-
 .../selftests/kvm/x86_64/userspace_msr_exit_test.c        | 8 ++++----
 tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c       | 2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 0b30b4e15c386..e9f5849913323 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -391,7 +391,7 @@ struct ex_regs {
 
 void vm_init_descriptor_tables(struct kvm_vm *vm);
 void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
-void vm_handle_exception(struct kvm_vm *vm, int vector,
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
 			void (*handler)(struct ex_regs *));
 
 /*
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index efe2350444213..257c5c33d04e7 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1244,8 +1244,8 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
 	*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
 }
 
-void vm_handle_exception(struct kvm_vm *vm, int vector,
-			 void (*handler)(struct ex_regs *))
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+			       void (*handler)(struct ex_regs *))
 {
 	vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
 
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 63096cea26c61..0864b2e3fd9e2 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -154,8 +154,8 @@ int main(int argc, char *argv[])
 
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vm, VCPU_ID);
-	vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
-	vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
+	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+	vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
 
 	pr_info("Running L1 which uses EVMCS to run L2\n");
 
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 732b244d69564..04ed975662c96 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -227,7 +227,7 @@ int main(void)
 
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vm, VCPU_ID);
-	vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
 
 	enter_guest(vm);
 	kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index 72c0d07975221..e3e20e8848d0c 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -574,7 +574,7 @@ static void test_msr_filter_allow(void) {
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vm, VCPU_ID);
 
-	vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
 
 	/* Process guest code userspace exits. */
 	run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
@@ -588,12 +588,12 @@ static void test_msr_filter_allow(void) {
 	run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
 	run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
 
-	vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
 	run_guest(vm);
-	vm_handle_exception(vm, UD_VECTOR, NULL);
+	vm_install_exception_handler(vm, UD_VECTOR, NULL);
 
 	if (process_ucall(vm) != UCALL_DONE) {
-		vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+		vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
 
 		/* Process emulated rdmsr and wrmsr instructions. */
 		run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 2f964cdc273c9..ed27269a01bb2 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -462,7 +462,7 @@ int main(int argc, char *argv[])
 
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
-	vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
+	vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
 
 	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
 
-- 
GitLab


From b7326c01122683b88e273a0cc826cd4c01234470 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Thu, 10 Jun 2021 18:10:16 -0700
Subject: [PATCH 2768/3804] KVM: selftests: Complete x86_64/sync_regs_test
 ucall

The guest in sync_regs_test does raw ucalls by directly accessing the
ucall IO port. It makes these ucalls without setting %rdi to a `struct
ucall`, which is what a ucall uses to pass messages.  The issue is that
if the host did a get_ucall (the receiver side), it would try to access
the `struct ucall` at %rdi=0 which would lead to an error ("No mapping
for vm virtual address, gva: 0x0").

This issue is currently benign as there is no get_ucall in
sync_regs_test; however, that will change in the next commit as it
changes the unhandled exception reporting mechanism to use ucalls.  In
that case, every vcpu_run is followed by a get_ucall to check if the
guest is trying to report an unhandled exception.

Fix this in advance by setting %rdi to a UCALL_NONE struct ucall for the
sync_regs_test guest.

Tested with gcc-[8,9,10], and clang-[9,11].

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210611011020.3420067-3-ricarkol@google.com
---
 tools/testing/selftests/kvm/x86_64/sync_regs_test.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index d672f0a473f87..fc03a150278d0 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -24,6 +24,10 @@
 
 #define UCALL_PIO_PORT ((uint16_t)0x1000)
 
+struct ucall uc_none = {
+	.cmd = UCALL_NONE,
+};
+
 /*
  * ucall is embedded here to protect against compiler reshuffling registers
  * before calling a function. In this test we only need to get KVM_EXIT_IO
@@ -34,7 +38,8 @@ void guest_code(void)
 	asm volatile("1: in %[port], %%al\n"
 		     "add $0x1, %%rbx\n"
 		     "jmp 1b"
-		     : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
+		     : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+		     : "rax", "rbx");
 }
 
 static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
-- 
GitLab


From 75275d7fbef47805b77e8af81a4d51e2d92db70f Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Thu, 10 Jun 2021 18:10:17 -0700
Subject: [PATCH 2769/3804] KVM: selftests: Introduce UCALL_UNHANDLED for
 unhandled vector reporting

x86, the only arch implementing exception handling, reports unhandled
vectors using port IO at a specific port number. This replicates what
ucall already does.

Introduce a new ucall type, UCALL_UNHANDLED, for guests to report
unhandled exceptions. Then replace the x86 unhandled vector exception
reporting to use it instead of port IO.  This new ucall type will be
used in the next commits by arm64 to report unhandled vectors as well.

Tested: Forcing a page fault in the ./x86_64/xapic_ipi_test
	halter_guest_code() shows this:

	$ ./x86_64/xapic_ipi_test
	...
	  Unexpected vectored event in guest (vector:0xe)

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210611011020.3420067-4-ricarkol@google.com
---
 .../testing/selftests/kvm/include/kvm_util.h  |  1 +
 .../selftests/kvm/include/x86_64/processor.h  |  2 --
 .../selftests/kvm/lib/x86_64/processor.c      | 19 ++++++++-----------
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index fcd8e3855111c..beb76d6deaa95 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -349,6 +349,7 @@ enum {
 	UCALL_SYNC,
 	UCALL_ABORT,
 	UCALL_DONE,
+	UCALL_UNHANDLED,
 };
 
 #define UCALL_MAX_ARGS 6
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index e9f5849913323..92a62c6999bc4 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -53,8 +53,6 @@
 #define CPUID_PKU		(1ul << 3)
 #define CPUID_LA57		(1ul << 16)
 
-#define UNEXPECTED_VECTOR_PORT 0xfff0u
-
 /* General Registers in 64-Bit Mode */
 struct gpr64_regs {
 	u64 rax;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 257c5c33d04e7..a217515a9bc23 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1201,7 +1201,7 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
 
 void kvm_exit_unexpected_vector(uint32_t value)
 {
-	outl(UNEXPECTED_VECTOR_PORT, value);
+	ucall(UCALL_UNHANDLED, 1, value);
 }
 
 void route_exception(struct ex_regs *regs)
@@ -1254,16 +1254,13 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
 
 void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
 {
-	if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
-		&& vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
-		&& vcpu_state(vm, vcpuid)->io.size == 4) {
-		/* Grab pointer to io data */
-		uint32_t *data = (void *)vcpu_state(vm, vcpuid)
-			+ vcpu_state(vm, vcpuid)->io.data_offset;
-
-		TEST_ASSERT(false,
-			    "Unexpected vectored event in guest (vector:0x%x)",
-			    *data);
+	struct ucall uc;
+
+	if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) {
+		uint64_t vector = uc.args[0];
+
+		TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
+			  vector);
 	}
 }
 
-- 
GitLab


From 67f709f52bf0b5c19f24d1234163123cbb6af545 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Thu, 10 Jun 2021 18:10:18 -0700
Subject: [PATCH 2770/3804] KVM: selftests: Move GUEST_ASSERT_EQ to utils
 header

Move GUEST_ASSERT_EQ to a common header, kvm_util.h, for other
architectures and tests to use. Also modify __GUEST_ASSERT so it can be
reused to implement GUEST_ASSERT_EQ.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210611011020.3420067-5-ricarkol@google.com
---
 .../testing/selftests/kvm/include/kvm_util.h  | 22 ++++++++++---------
 .../selftests/kvm/x86_64/tsc_msrs_test.c      |  9 --------
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index beb76d6deaa95..ce49e22843d8c 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -368,26 +368,28 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
 				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
 #define GUEST_DONE()		ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _nargs, _args...) do {	\
-	if (!(_condition))					\
-		ucall(UCALL_ABORT, 2 + _nargs,			\
-			"Failed guest assert: "			\
-			#_condition, __LINE__, _args);		\
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do {    \
+	if (!(_condition))                                              \
+		ucall(UCALL_ABORT, 2 + _nargs,                          \
+			"Failed guest assert: "                         \
+			_condstr, __LINE__, _args);                     \
 } while (0)
 
 #define GUEST_ASSERT(_condition) \
-	__GUEST_ASSERT((_condition), 0, 0)
+	__GUEST_ASSERT(_condition, #_condition, 0, 0)
 
 #define GUEST_ASSERT_1(_condition, arg1) \
-	__GUEST_ASSERT((_condition), 1, (arg1))
+	__GUEST_ASSERT(_condition, #_condition, 1, (arg1))
 
 #define GUEST_ASSERT_2(_condition, arg1, arg2) \
-	__GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+	__GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
 
 #define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
-	__GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+	__GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
 
 #define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
-	__GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+	__GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
+
+#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
 
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index e357d8e222d47..5a6a662f2e590 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -18,15 +18,6 @@
 #define rounded_rdmsr(x)       ROUND(rdmsr(x))
 #define rounded_host_rdmsr(x)  ROUND(vcpu_get_msr(vm, 0, x))
 
-#define GUEST_ASSERT_EQ(a, b) do {				\
-	__typeof(a) _a = (a);					\
-	__typeof(b) _b = (b);					\
-	if (_a != _b)						\
-                ucall(UCALL_ABORT, 4,				\
-                        "Failed guest assert: "			\
-                        #a " == " #b, __LINE__, _a, _b);	\
-  } while(0)
-
 static void guest_code(void)
 {
 	u64 val = 0;
-- 
GitLab


From e3db7579ef355a0b2bfef4448b84d9ac882c8f2c Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Thu, 10 Jun 2021 18:10:19 -0700
Subject: [PATCH 2771/3804] KVM: selftests: Add exception handling support for
 aarch64

Add the infrastructure needed to enable exception handling in aarch64
selftests. The exception handling defaults to an unhandled-exception
handler which aborts the test, just like x86. These handlers can be
overridden by calling vm_install_exception_handler(vector) or
vm_install_sync_handler(vector, ec). The unhandled exception reporting
from the guest is done using the ucall type introduced in a previous
commit, UCALL_UNHANDLED.

The exception handling code is inspired on kvm-unit-tests.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210611011020.3420067-6-ricarkol@google.com
---
 tools/testing/selftests/kvm/Makefile          |   2 +-
 .../selftests/kvm/include/aarch64/processor.h |  63 +++++++++
 .../selftests/kvm/lib/aarch64/handlers.S      | 126 ++++++++++++++++++
 .../selftests/kvm/lib/aarch64/processor.c     |  97 ++++++++++++++
 4 files changed, 287 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/kvm/lib/aarch64/handlers.S

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index daaee1888b128..a77e6063f7e96 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -35,7 +35,7 @@ endif
 
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
 LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index b7fa0c8551db4..b2b3e9d626cb5 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -8,6 +8,7 @@
 #define SELFTEST_KVM_PROCESSOR_H
 
 #include "kvm_util.h"
+#include <linux/stringify.h>
 
 
 #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
@@ -18,6 +19,7 @@
 #define MAIR_EL1	3, 0, 10, 2, 0
 #define TTBR0_EL1	3, 0,  2, 0, 0
 #define SCTLR_EL1	3, 0,  1, 0, 0
+#define VBAR_EL1	3, 0, 12, 0, 0
 
 /*
  * Default MAIR
@@ -56,4 +58,65 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
 void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
 			      struct kvm_vcpu_init *init, void *guest_code);
 
+struct ex_regs {
+	u64 regs[31];
+	u64 sp;
+	u64 pc;
+	u64 pstate;
+};
+
+#define VECTOR_NUM	16
+
+enum {
+	VECTOR_SYNC_CURRENT_SP0,
+	VECTOR_IRQ_CURRENT_SP0,
+	VECTOR_FIQ_CURRENT_SP0,
+	VECTOR_ERROR_CURRENT_SP0,
+
+	VECTOR_SYNC_CURRENT,
+	VECTOR_IRQ_CURRENT,
+	VECTOR_FIQ_CURRENT,
+	VECTOR_ERROR_CURRENT,
+
+	VECTOR_SYNC_LOWER_64,
+	VECTOR_IRQ_LOWER_64,
+	VECTOR_FIQ_LOWER_64,
+	VECTOR_ERROR_LOWER_64,
+
+	VECTOR_SYNC_LOWER_32,
+	VECTOR_IRQ_LOWER_32,
+	VECTOR_FIQ_LOWER_32,
+	VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+			   (v) == VECTOR_SYNC_CURRENT     || \
+			   (v) == VECTOR_SYNC_LOWER_64    || \
+			   (v) == VECTOR_SYNC_LOWER_32)
+
+#define ESR_EC_NUM		64
+#define ESR_EC_SHIFT		26
+#define ESR_EC_MASK		(ESR_EC_NUM - 1)
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+		int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+		int vector, int ec, handler_fn handler);
+
+#define write_sysreg(reg, val)						  \
+({									  \
+	u64 __val = (u64)(val);						  \
+	asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val));	  \
+})
+
+#define read_sysreg(reg)						  \
+({	u64 val;							  \
+	asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\
+	val;								  \
+})
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
new file mode 100644
index 0000000000000..0e443eadfac60
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+	add	sp, sp, #-16 * 17
+
+	stp	x0, x1, [sp, #16 * 0]
+	stp	x2, x3, [sp, #16 * 1]
+	stp	x4, x5, [sp, #16 * 2]
+	stp	x6, x7, [sp, #16 * 3]
+	stp	x8, x9, [sp, #16 * 4]
+	stp	x10, x11, [sp, #16 * 5]
+	stp	x12, x13, [sp, #16 * 6]
+	stp	x14, x15, [sp, #16 * 7]
+	stp	x16, x17, [sp, #16 * 8]
+	stp	x18, x19, [sp, #16 * 9]
+	stp	x20, x21, [sp, #16 * 10]
+	stp	x22, x23, [sp, #16 * 11]
+	stp	x24, x25, [sp, #16 * 12]
+	stp	x26, x27, [sp, #16 * 13]
+	stp	x28, x29, [sp, #16 * 14]
+
+	/*
+	 * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+	 * at it. It will _not_ be used to restore the sp on return from the
+	 * exception so handlers can not update it.
+	 */
+	add	x1, sp, #16 * 17
+	stp	x30, x1, [sp, #16 * 15] /* x30, SP */
+
+	mrs	x1, elr_el1
+	mrs	x2, spsr_el1
+	stp	x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+	ldp	x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+	msr	elr_el1, x1
+	msr	spsr_el1, x2
+
+	/* sp is not restored */
+	ldp	x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+	ldp	x28, x29, [sp, #16 * 14]
+	ldp	x26, x27, [sp, #16 * 13]
+	ldp	x24, x25, [sp, #16 * 12]
+	ldp	x22, x23, [sp, #16 * 11]
+	ldp	x20, x21, [sp, #16 * 10]
+	ldp	x18, x19, [sp, #16 * 9]
+	ldp	x16, x17, [sp, #16 * 8]
+	ldp	x14, x15, [sp, #16 * 7]
+	ldp	x12, x13, [sp, #16 * 6]
+	ldp	x10, x11, [sp, #16 * 5]
+	ldp	x8, x9, [sp, #16 * 4]
+	ldp	x6, x7, [sp, #16 * 3]
+	ldp	x4, x5, [sp, #16 * 2]
+	ldp	x2, x3, [sp, #16 * 1]
+	ldp	x0, x1, [sp, #16 * 0]
+
+	add	sp, sp, #16 * 17
+
+	eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set	vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+	save_registers
+	mov	x0, sp
+	mov	x1, #vector
+	bl	route_exception
+	restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+	b	handler_\label
+.popsection
+
+.set	vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+	mov	x0, #vector
+	mov	x1, #0 /* ec */
+	mov	x2, #0 /* valid_ec */
+	b	kvm_exit_unexpected_exception
+.popsection
+
+.set	vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+	HANDLER_INVALID                         // Synchronous EL1t
+	HANDLER_INVALID                         // IRQ EL1t
+	HANDLER_INVALID                         // FIQ EL1t
+	HANDLER_INVALID                         // Error EL1t
+
+	HANDLER	el1h_sync                       // Synchronous EL1h
+	HANDLER	el1h_irq                        // IRQ EL1h
+	HANDLER el1h_fiq                        // FIQ EL1h
+	HANDLER	el1h_error                      // Error EL1h
+
+	HANDLER	el0_sync_64                     // Synchronous 64-bit EL0
+	HANDLER	el0_irq_64                      // IRQ 64-bit EL0
+	HANDLER	el0_fiq_64                      // FIQ 64-bit EL0
+	HANDLER	el0_error_64                    // Error 64-bit EL0
+
+	HANDLER	el0_sync_32                     // Synchronous 32-bit EL0
+	HANDLER	el0_irq_32                      // IRQ 32-bit EL0
+	HANDLER	el0_fiq_32                      // FIQ 32-bit EL0
+	HANDLER	el0_error_32                    // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index cee92d477dc0c..48b55c93f8582 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -6,6 +6,7 @@
  */
 
 #include <linux/compiler.h>
+#include <assert.h>
 
 #include "kvm_util.h"
 #include "../kvm_util_internal.h"
@@ -14,6 +15,8 @@
 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR		0x180000
 #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN	0xac0000
 
+static vm_vaddr_t exception_handlers;
+
 static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
 {
 	return (v + vm->page_size) & ~(vm->page_size - 1);
@@ -334,6 +337,100 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
 	va_end(ap);
 }
 
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+	ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+	while (1)
+		;
+}
+
 void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
 {
+	struct ucall uc;
+
+	if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED)
+		return;
+
+	if (uc.args[2]) /* valid_ec */ {
+		assert(VECTOR_IS_SYNC(uc.args[0]));
+		TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+			  uc.args[0], uc.args[1]);
+	} else {
+		assert(!VECTOR_IS_SYNC(uc.args[0]));
+		TEST_FAIL("Unexpected exception (vector:0x%lx)",
+			  uc.args[0]);
+	}
+}
+
+struct handlers {
+	handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	extern char vectors;
+
+	set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+	struct handlers *handlers = (struct handlers *)exception_handlers;
+	bool valid_ec;
+	int ec = 0;
+
+	switch (vector) {
+	case VECTOR_SYNC_CURRENT:
+	case VECTOR_SYNC_LOWER_64:
+		ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK;
+		valid_ec = true;
+		break;
+	case VECTOR_IRQ_CURRENT:
+	case VECTOR_IRQ_LOWER_64:
+	case VECTOR_FIQ_CURRENT:
+	case VECTOR_FIQ_LOWER_64:
+	case VECTOR_ERROR_CURRENT:
+	case VECTOR_ERROR_LOWER_64:
+		ec = 0;
+		valid_ec = false;
+		break;
+	default:
+		valid_ec = false;
+		goto unexpected_exception;
+	}
+
+	if (handlers && handlers->exception_handlers[vector][ec])
+		return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+	kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+	vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers),
+			vm->page_size, 0, 0);
+
+	*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+			 void (*handler)(struct ex_regs *))
+{
+	struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+	assert(VECTOR_IS_SYNC(vector));
+	assert(vector < VECTOR_NUM);
+	assert(ec < ESR_EC_NUM);
+	handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+			 void (*handler)(struct ex_regs *))
+{
+	struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+	assert(!VECTOR_IS_SYNC(vector));
+	assert(vector < VECTOR_NUM);
+	handlers->exception_handlers[vector][0] = handler;
 }
-- 
GitLab


From 4f05223acaeaabe0a1a188e25fab334735d85c5e Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Thu, 10 Jun 2021 18:10:20 -0700
Subject: [PATCH 2772/3804] KVM: selftests: Add aarch64/debug-exceptions test

Covers fundamental tests for debug exceptions. The guest installs and
handle its debug exceptions itself, without KVM_SET_GUEST_DEBUG.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210611011020.3420067-7-ricarkol@google.com
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/aarch64/debug-exceptions.c  | 250 ++++++++++++++++++
 .../selftests/kvm/include/aarch64/processor.h |  22 +-
 4 files changed, 268 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/aarch64/debug-exceptions.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 524c857a049c3..7e2c66155b068 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+/aarch64/debug-exceptions
 /aarch64/get-reg-list
 /aarch64/get-reg-list-sve
 /aarch64/vgic_init
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a77e6063f7e96..36e4ebcc82f0e 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -78,6 +78,7 @@ TEST_GEN_PROGS_x86_64 += memslot_perf_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
+TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
new file mode 100644
index 0000000000000..e5e6c92b60da6
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+#define MDSCR_KDE	(1 << 13)
+#define MDSCR_MDE	(1 << 15)
+#define MDSCR_SS	(1 << 0)
+
+#define DBGBCR_LEN8	(0xff << 5)
+#define DBGBCR_EXEC	(0x0 << 3)
+#define DBGBCR_EL1	(0x1 << 1)
+#define DBGBCR_E	(0x1 << 0)
+
+#define DBGWCR_LEN8	(0xff << 5)
+#define DBGWCR_RD	(0x1 << 3)
+#define DBGWCR_WR	(0x2 << 3)
+#define DBGWCR_EL1	(0x1 << 1)
+#define DBGWCR_E	(0x1 << 0)
+
+#define SPSR_D		(1 << 9)
+#define SPSR_SS		(1 << 21)
+
+extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define  PC(v)  ((uint64_t)&(v))
+
+static void reset_debug_state(void)
+{
+	asm volatile("msr daifset, #8");
+
+	write_sysreg(osdlr_el1, 0);
+	write_sysreg(oslar_el1, 0);
+	isb();
+
+	write_sysreg(mdscr_el1, 0);
+	/* This test only uses the first bp and wp slot. */
+	write_sysreg(dbgbvr0_el1, 0);
+	write_sysreg(dbgbcr0_el1, 0);
+	write_sysreg(dbgwcr0_el1, 0);
+	write_sysreg(dbgwvr0_el1, 0);
+	isb();
+}
+
+static void install_wp(uint64_t addr)
+{
+	uint32_t wcr;
+	uint32_t mdscr;
+
+	wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+	write_sysreg(dbgwcr0_el1, wcr);
+	write_sysreg(dbgwvr0_el1, addr);
+	isb();
+
+	asm volatile("msr daifclr, #8");
+
+	mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+	write_sysreg(mdscr_el1, mdscr);
+	isb();
+}
+
+static void install_hw_bp(uint64_t addr)
+{
+	uint32_t bcr;
+	uint32_t mdscr;
+
+	bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+	write_sysreg(dbgbcr0_el1, bcr);
+	write_sysreg(dbgbvr0_el1, addr);
+	isb();
+
+	asm volatile("msr daifclr, #8");
+
+	mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+	write_sysreg(mdscr_el1, mdscr);
+	isb();
+}
+
+static void install_ss(void)
+{
+	uint32_t mdscr;
+
+	asm volatile("msr daifclr, #8");
+
+	mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+	write_sysreg(mdscr_el1, mdscr);
+	isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(void)
+{
+	GUEST_SYNC(0);
+
+	/* Software-breakpoint */
+	asm volatile("sw_bp: brk #0");
+	GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+	GUEST_SYNC(1);
+
+	/* Hardware-breakpoint */
+	reset_debug_state();
+	install_hw_bp(PC(hw_bp));
+	asm volatile("hw_bp: nop");
+	GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+	GUEST_SYNC(2);
+
+	/* Hardware-breakpoint + svc */
+	reset_debug_state();
+	install_hw_bp(PC(bp_svc));
+	asm volatile("bp_svc: svc #0");
+	GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+	GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+	GUEST_SYNC(3);
+
+	/* Hardware-breakpoint + software-breakpoint */
+	reset_debug_state();
+	install_hw_bp(PC(bp_brk));
+	asm volatile("bp_brk: brk #0");
+	GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+	GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+	GUEST_SYNC(4);
+
+	/* Watchpoint */
+	reset_debug_state();
+	install_wp(PC(write_data));
+	write_data = 'x';
+	GUEST_ASSERT_EQ(write_data, 'x');
+	GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+	GUEST_SYNC(5);
+
+	/* Single-step */
+	reset_debug_state();
+	install_ss();
+	ss_idx = 0;
+	asm volatile("ss_start:\n"
+		     "mrs x0, esr_el1\n"
+		     "add x0, x0, #1\n"
+		     "msr daifset, #8\n"
+		     : : : "x0");
+	GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+	GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+	GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+	GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+	sw_bp_addr = regs->pc;
+	regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+	hw_bp_addr = regs->pc;
+	regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+	wp_data_addr = read_sysreg(far_el1);
+	wp_addr = regs->pc;
+	regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+	GUEST_ASSERT_1(ss_idx < 4, ss_idx);
+	ss_addr[ss_idx++] = regs->pc;
+	regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+	svc_addr = regs->pc;
+}
+
+static int debug_version(struct kvm_vm *vm)
+{
+	uint64_t id_aa64dfr0;
+
+	get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0);
+	return id_aa64dfr0 & 0xf;
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct ucall uc;
+	int stage;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	ucall_init(vm, NULL);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+	if (debug_version(vm) < 6) {
+		print_skip("Armv8 debug architecture not supported.");
+		kvm_vm_free(vm);
+		exit(KSFT_SKIP);
+	}
+
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+				ESR_EC_BRK_INS, guest_sw_bp_handler);
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+				ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler);
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+				ESR_EC_WP_CURRENT, guest_wp_handler);
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+				ESR_EC_SSTEP_CURRENT, guest_ss_handler);
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+				ESR_EC_SVC64, guest_svc_handler);
+
+	for (stage = 0; stage < 7; stage++) {
+		vcpu_run(vm, VCPU_ID);
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_SYNC:
+			TEST_ASSERT(uc.args[1] == stage,
+				"Stage %d: Unexpected sync ucall, got %lx",
+				stage, (ulong)uc.args[1]);
+			break;
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+				(const char *)uc.args[0],
+				__FILE__, uc.args[1], uc.args[2], uc.args[3]);
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index b2b3e9d626cb5..27dc5c2e56b93 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -14,12 +14,14 @@
 #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
 			   KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 
-#define CPACR_EL1	3, 0,  1, 0, 2
-#define TCR_EL1		3, 0,  2, 0, 2
-#define MAIR_EL1	3, 0, 10, 2, 0
-#define TTBR0_EL1	3, 0,  2, 0, 0
-#define SCTLR_EL1	3, 0,  1, 0, 0
-#define VBAR_EL1	3, 0, 12, 0, 0
+#define CPACR_EL1               3, 0,  1, 0, 2
+#define TCR_EL1                 3, 0,  2, 0, 2
+#define MAIR_EL1                3, 0, 10, 2, 0
+#define TTBR0_EL1               3, 0,  2, 0, 0
+#define SCTLR_EL1               3, 0,  1, 0, 0
+#define VBAR_EL1                3, 0, 12, 0, 0
+
+#define ID_AA64DFR0_EL1         3, 0,  0, 5, 0
 
 /*
  * Default MAIR
@@ -98,6 +100,12 @@ enum {
 #define ESR_EC_SHIFT		26
 #define ESR_EC_MASK		(ESR_EC_NUM - 1)
 
+#define ESR_EC_SVC64		0x15
+#define ESR_EC_HW_BP_CURRENT	0x31
+#define ESR_EC_SSTEP_CURRENT	0x33
+#define ESR_EC_WP_CURRENT	0x35
+#define ESR_EC_BRK_INS		0x3c
+
 void vm_init_descriptor_tables(struct kvm_vm *vm);
 void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
 
@@ -119,4 +127,6 @@ void vm_install_sync_handler(struct kvm_vm *vm,
 	val;								  \
 })
 
+#define isb()	asm volatile("isb" : : : "memory")
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
-- 
GitLab


From f644bc449b37cc32d3ce7b36a88073873aa21bd5 Mon Sep 17 00:00:00 2001
From: Matthew Bobrowski <repnop@google.com>
Date: Fri, 11 Jun 2021 13:32:06 +1000
Subject: [PATCH 2773/3804] fanotify: fix copy_event_to_user() fid error clean
 up

Ensure that clean up is performed on the allocated file descriptor and
struct file object in the event that an error is encountered while copying
fid info objects. Currently, we return directly to the caller when an error
is experienced in the fid info copying helper, which isn't ideal given that
the listener process could be left with a dangling file descriptor in their
fdtable.

Fixes: 5e469c830fdb ("fanotify: copy event fid info to user")
Fixes: 44d705b0370b ("fanotify: report name info for FAN_DIR_MODIFY event")
Link: https://lore.kernel.org/linux-fsdevel/YMKv1U7tNPK955ho@google.com/T/#m15361cd6399dad4396aad650de25dbf6b312288e
Link: https://lore.kernel.org/r/1ef8ae9100101eb1a91763c516c2e9a3a3b112bd.1623376346.git.repnop@google.com
Signed-off-by: Matthew Bobrowski <repnop@google.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index be5b6d2c01e7a..64864fb40b401 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -471,7 +471,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 					info_type, fanotify_info_name(info),
 					info->name_len, buf, count);
 		if (ret < 0)
-			return ret;
+			goto out_close_fd;
 
 		buf += ret;
 		count -= ret;
@@ -519,7 +519,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 					fanotify_event_object_fh(event),
 					info_type, dot, dot_len, buf, count);
 		if (ret < 0)
-			return ret;
+			goto out_close_fd;
 
 		buf += ret;
 		count -= ret;
-- 
GitLab


From b113ec2d8562f5f3e0359c547cba53686ee805e9 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 3 Jun 2021 17:38:09 +0800
Subject: [PATCH 2774/3804] regulator: rt6160: Convert to use
 regulator_set_ramp_delay_regmap

Use regulator_set_ramp_delay_regmap instead of open-coded.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210603093809.1108629-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rt6160-regulator.c | 35 +++++++---------------------
 1 file changed, 9 insertions(+), 26 deletions(-)

diff --git a/drivers/regulator/rt6160-regulator.c b/drivers/regulator/rt6160-regulator.c
index 4588ae0748a54..69550284083d8 100644
--- a/drivers/regulator/rt6160-regulator.c
+++ b/drivers/regulator/rt6160-regulator.c
@@ -46,6 +46,10 @@ struct rt6160_priv {
 	bool enable_state;
 };
 
+static const unsigned int rt6160_ramp_tables[] = {
+	1000, 2500, 5000, 10000
+};
+
 static int rt6160_enable(struct regulator_dev *rdev)
 {
 	struct rt6160_priv *priv = rdev_get_drvdata(rdev);
@@ -140,31 +144,6 @@ static int rt6160_set_suspend_voltage(struct regulator_dev *rdev, int uV)
 	return regmap_update_bits(regmap, reg, RT6160_VSEL_MASK, vsel);
 }
 
-static int rt6160_set_ramp_delay(struct regulator_dev *rdev, int target)
-{
-	struct regmap *regmap = rdev_get_regmap(rdev);
-	const int ramp_tables[] = { 1000, 2500, 5000, 10000 };
-	unsigned int i, sel;
-
-	/* Find closest larger or equal */
-	for (i = 0; i < ARRAY_SIZE(ramp_tables); i++) {
-		sel = i;
-
-		/* If ramp delay is equal to 0, directly set ramp speed to fastest */
-		if (target == 0) {
-			sel = ARRAY_SIZE(ramp_tables) - 1;
-			break;
-		}
-
-		if (target <= ramp_tables[i])
-			break;
-	}
-
-	sel <<= ffs(RT6160_RAMPRATE_MASK) - 1;
-
-	return regmap_update_bits(regmap, RT6160_REG_CNTL, RT6160_RAMPRATE_MASK, sel);
-}
-
 static int rt6160_get_error_flags(struct regulator_dev *rdev, unsigned int *flags)
 {
 	struct regmap *regmap = rdev_get_regmap(rdev);
@@ -203,7 +182,7 @@ static const struct regulator_ops rt6160_regulator_ops = {
 	.set_mode = rt6160_set_mode,
 	.get_mode = rt6160_get_mode,
 	.set_suspend_voltage = rt6160_set_suspend_voltage,
-	.set_ramp_delay = rt6160_set_ramp_delay,
+	.set_ramp_delay = regulator_set_ramp_delay_regmap,
 	.get_error_flags = rt6160_get_error_flags,
 };
 
@@ -292,6 +271,10 @@ static int rt6160_probe(struct i2c_client *i2c)
 	priv->desc.vsel_reg = RT6160_REG_VSELH;
 	priv->desc.vsel_mask = RT6160_VSEL_MASK;
 	priv->desc.n_voltages = RT6160_N_VOUTS;
+	priv->desc.ramp_reg = RT6160_REG_CNTL;
+	priv->desc.ramp_mask = RT6160_RAMPRATE_MASK;
+	priv->desc.ramp_delay_table = rt6160_ramp_tables;
+	priv->desc.n_ramp_values = ARRAY_SIZE(rt6160_ramp_tables);
 	priv->desc.of_map_mode = rt6160_of_map_mode;
 	priv->desc.ops = &rt6160_regulator_ops;
 	if (priv->vsel_active_low)
-- 
GitLab


From da3b1486d7e398617d09c022c71593462b98d67f Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 7 Apr 2021 11:15:39 +0800
Subject: [PATCH 2775/3804] mmc: s3cmci: move to use request_irq by
 IRQF_NO_AUTOEN flag

disable_irq() after request_irq() still has a time gap in which interrupts
can come. request_irq() with IRQF_NO_AUTOEN flag will disable IRQ
auto-enable because of requesting.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Link: https://lore.kernel.org/r/1617765339-28946-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/s3cmci.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 0ca6f6d30b755..8d5929a32d342 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -1578,17 +1578,12 @@ static int s3cmci_probe(struct platform_device *pdev)
 		goto probe_iounmap;
 	}
 
-	if (request_irq(host->irq, s3cmci_irq, 0, DRIVER_NAME, host)) {
+	if (request_irq(host->irq, s3cmci_irq, IRQF_NO_AUTOEN, DRIVER_NAME, host)) {
 		dev_err(&pdev->dev, "failed to request mci interrupt.\n");
 		ret = -ENOENT;
 		goto probe_iounmap;
 	}
 
-	/* We get spurious interrupts even when we have set the IMSK
-	 * register to ignore everything, so use disable_irq() to make
-	 * ensure we don't lock the system with un-serviceable requests. */
-
-	disable_irq(host->irq);
 	host->irq_state = false;
 
 	/* Depending on the dma state, get a DMA channel to use. */
-- 
GitLab


From 6a45d70cda6a6e3fa3cffe37d47495fb3c4a4bfa Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 28 Apr 2021 18:05:00 -0500
Subject: [PATCH 2776/3804] dt-bindings: mmc: sdhci-am654: Remove duplicate
 ti,j721e-sdhci-4bit

The commit 7c7905df68c5 ("dt-bindings: mmc: sdhci-am654: fix compatible
for j7200") switched the compatible property from a regular enum to an
more appropriate combinatorial oneOf convention, and in the process has
introduced a duplicate ti,j721e-sdhci-4bit.

This generated the following warning on J721E boards that use the
ti,j721e-sdhci-4bit for two nodes:
 "mmc@4fb0000: compatible: More than one condition true in oneOf schema"
 "mmc@4f98000: compatible: More than one condition true in oneOf schema"

Remove the duplicate to fix this.

Fixes: 7c7905df68c5 ("dt-bindings: mmc: sdhci-am654: fix compatible for j7200")
Signed-off-by: Suman Anna <s-anna@ti.com>
Link: https://lore.kernel.org/r/20210428230500.19214-1-s-anna@ti.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/sdhci-am654.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/mmc/sdhci-am654.yaml b/Documentation/devicetree/bindings/mmc/sdhci-am654.yaml
index 3a79e39253d24..29399e88ac536 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-am654.yaml
+++ b/Documentation/devicetree/bindings/mmc/sdhci-am654.yaml
@@ -19,7 +19,6 @@ properties:
       - const: ti,am654-sdhci-5.1
       - const: ti,j721e-sdhci-8bit
       - const: ti,j721e-sdhci-4bit
-      - const: ti,j721e-sdhci-4bit
       - const: ti,am64-sdhci-8bit
       - const: ti,am64-sdhci-4bit
       - items:
-- 
GitLab


From 873e90883069a4e32bc6ecd150b0107f9aa542b8 Mon Sep 17 00:00:00 2001
From: Liang Chen <cl@rock-chips.com>
Date: Thu, 29 Apr 2021 16:11:44 +0800
Subject: [PATCH 2777/3804] dt-bindings: mmc: rockchip-dw-mshc: add description
 for rk3568

Add "rockchip,rk3568-dw-mshc", "rockchip,rk3288-dw-mshc" compatibles for
mmc nodes on a rk3568 platform to rockchip-dw-mshc.yaml. Let's also take to
opportunity to clean up some old redundant comments around previous
compatibles.

Signed-off-by: Liang Chen <cl@rock-chips.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210429081151.17558-4-cl@rock-chips.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../devicetree/bindings/mmc/rockchip-dw-mshc.yaml        | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml
index 3762f1c8de96f..eaa3b0ef24f65 100644
--- a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml
+++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml
@@ -29,21 +29,14 @@ properties:
       - const: rockchip,rk3288-dw-mshc
       - items:
           - enum:
-            # for Rockchip PX30
               - rockchip,px30-dw-mshc
-            # for Rockchip RK3036
               - rockchip,rk3036-dw-mshc
-            # for Rockchip RK322x
               - rockchip,rk3228-dw-mshc
-            # for Rockchip RK3308
               - rockchip,rk3308-dw-mshc
-            # for Rockchip RK3328
               - rockchip,rk3328-dw-mshc
-            # for Rockchip RK3368
               - rockchip,rk3368-dw-mshc
-            # for Rockchip RK3399
               - rockchip,rk3399-dw-mshc
-            # for Rockchip RV1108
+              - rockchip,rk3568-dw-mshc
               - rockchip,rv1108-dw-mshc
           - const: rockchip,rk3288-dw-mshc
 
-- 
GitLab


From 94ee6782e045645abd9180ab9369b01293d862bd Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 3 May 2021 11:21:57 +0200
Subject: [PATCH 2778/3804] mmc: sdhci-acpi: Disable write protect detection on
 Toshiba Encore 2 WT8-B

On the Toshiba Encore 2 WT8-B the  microSD slot always reports the card
being write-protected even though microSD cards do not have a write-protect
switch at all.

Add a new DMI_QUIRK_SD_NO_WRITE_PROTECT quirk entry to sdhci-acpi.c's
DMI quirk table for this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20210503092157.5689-1-hdegoede@redhat.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Cc: stable@vger.kernel.org
---
 drivers/mmc/host/sdhci-acpi.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index c3fbf8c825c4f..8fe65f172a611 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -822,6 +822,17 @@ static const struct dmi_system_id sdhci_acpi_quirks[] = {
 		},
 		.driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT,
 	},
+	{
+		/*
+		 * The Toshiba WT8-B's microSD slot always reports the card being
+		 * write-protected.
+		 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "TOSHIBA ENCORE 2 WT8-B"),
+		},
+		.driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT,
+	},
 	{} /* Terminating entry */
 };
 
-- 
GitLab


From e62f1e0b2384e25fe61042da3ecf08b7d8262f8d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:12 +0200
Subject: [PATCH 2779/3804] mmc: core: Drop open coding when preparing commands
 with busy signaling

Similar code for validating the host->max_busy_timeout towards the current
command's busy timeout, exists in mmc_do_erase(), mmc_sleep() and
__mmc_switch(). Let's move the common code into a helper function.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210504161222.101536-2-ulf.hansson@linaro.org
---
 drivers/mmc/core/core.c    | 20 ++----------------
 drivers/mmc/core/mmc.c     | 20 +++---------------
 drivers/mmc/core/mmc_ops.c | 42 +++++++++++++++++++++-----------------
 drivers/mmc/core/mmc_ops.h |  3 +++
 4 files changed, 31 insertions(+), 54 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index f194940c59746..b00c84ea8441c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1582,7 +1582,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
 {
 	struct mmc_command cmd = {};
 	unsigned int qty = 0, busy_timeout = 0;
-	bool use_r1b_resp = false;
+	bool use_r1b_resp;
 	int err;
 
 	mmc_retune_hold(card->host);
@@ -1650,23 +1650,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
 	cmd.opcode = MMC_ERASE;
 	cmd.arg = arg;
 	busy_timeout = mmc_erase_timeout(card, arg, qty);
-	/*
-	 * If the host controller supports busy signalling and the timeout for
-	 * the erase operation does not exceed the max_busy_timeout, we should
-	 * use R1B response. Or we need to prevent the host from doing hw busy
-	 * detection, which is done by converting to a R1 response instead.
-	 * Note, some hosts requires R1B, which also means they are on their own
-	 * when it comes to deal with the busy timeout.
-	 */
-	if (!(card->host->caps & MMC_CAP_NEED_RSP_BUSY) &&
-	    card->host->max_busy_timeout &&
-	    busy_timeout > card->host->max_busy_timeout) {
-		cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
-	} else {
-		cmd.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-		cmd.busy_timeout = busy_timeout;
-		use_r1b_resp = true;
-	}
+	use_r1b_resp = mmc_prepare_busy_cmd(card->host, &cmd, busy_timeout);
 
 	err = mmc_wait_for_cmd(card->host, &cmd, 0);
 	if (err) {
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 8674c3e0c02c2..63a7bd0b239cd 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1910,6 +1910,7 @@ static int mmc_sleep(struct mmc_host *host)
 	struct mmc_command cmd = {};
 	struct mmc_card *card = host->card;
 	unsigned int timeout_ms = DIV_ROUND_UP(card->ext_csd.sa_timeout, 10000);
+	bool use_r1b_resp;
 	int err;
 
 	/* Re-tuning can't be done once the card is deselected */
@@ -1922,22 +1923,7 @@ static int mmc_sleep(struct mmc_host *host)
 	cmd.opcode = MMC_SLEEP_AWAKE;
 	cmd.arg = card->rca << 16;
 	cmd.arg |= 1 << 15;
-
-	/*
-	 * If the max_busy_timeout of the host is specified, validate it against
-	 * the sleep cmd timeout. A failure means we need to prevent the host
-	 * from doing hw busy detection, which is done by converting to a R1
-	 * response instead of a R1B. Note, some hosts requires R1B, which also
-	 * means they are on their own when it comes to deal with the busy
-	 * timeout.
-	 */
-	if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout &&
-	    (timeout_ms > host->max_busy_timeout)) {
-		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
-	} else {
-		cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
-		cmd.busy_timeout = timeout_ms;
-	}
+	use_r1b_resp = mmc_prepare_busy_cmd(host, &cmd, timeout_ms);
 
 	err = mmc_wait_for_cmd(host, &cmd, 0);
 	if (err)
@@ -1949,7 +1935,7 @@ static int mmc_sleep(struct mmc_host *host)
 	 * SEND_STATUS command to poll the status because that command (and most
 	 * others) is invalid while the card sleeps.
 	 */
-	if (!cmd.busy_timeout || !(host->caps & MMC_CAP_WAIT_WHILE_BUSY))
+	if (!use_r1b_resp || !(host->caps & MMC_CAP_WAIT_WHILE_BUSY))
 		mmc_delay(timeout_ms);
 
 out_release:
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 5756781fef376..025a4134d5c7d 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -521,6 +521,27 @@ int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
 	return __mmc_poll_for_busy(card, timeout_ms, true, false, busy_cmd);
 }
 
+bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
+			  unsigned int timeout_ms)
+{
+	/*
+	 * If the max_busy_timeout of the host is specified, make sure it's
+	 * enough to fit the used timeout_ms. In case it's not, let's instruct
+	 * the host to avoid HW busy detection, by converting to a R1 response
+	 * instead of a R1B. Note, some hosts requires R1B, which also means
+	 * they are on their own when it comes to deal with the busy timeout.
+	 */
+	if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout &&
+	    (timeout_ms > host->max_busy_timeout)) {
+		cmd->flags = MMC_CMD_AC | MMC_RSP_SPI_R1 | MMC_RSP_R1;
+		return false;
+	}
+
+	cmd->flags = MMC_CMD_AC | MMC_RSP_SPI_R1B | MMC_RSP_R1B;
+	cmd->busy_timeout = timeout_ms;
+	return true;
+}
+
 /**
  *	__mmc_switch - modify EXT_CSD register
  *	@card: the MMC card associated with the data transfer
@@ -543,7 +564,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 	struct mmc_host *host = card->host;
 	int err;
 	struct mmc_command cmd = {};
-	bool use_r1b_resp = true;
+	bool use_r1b_resp;
 	unsigned char old_timing = host->ios.timing;
 
 	mmc_retune_hold(host);
@@ -554,29 +575,12 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 		timeout_ms = card->ext_csd.generic_cmd6_time;
 	}
 
-	/*
-	 * If the max_busy_timeout of the host is specified, make sure it's
-	 * enough to fit the used timeout_ms. In case it's not, let's instruct
-	 * the host to avoid HW busy detection, by converting to a R1 response
-	 * instead of a R1B. Note, some hosts requires R1B, which also means
-	 * they are on their own when it comes to deal with the busy timeout.
-	 */
-	if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout &&
-	    (timeout_ms > host->max_busy_timeout))
-		use_r1b_resp = false;
-
 	cmd.opcode = MMC_SWITCH;
 	cmd.arg = (MMC_SWITCH_MODE_WRITE_BYTE << 24) |
 		  (index << 16) |
 		  (value << 8) |
 		  set;
-	cmd.flags = MMC_CMD_AC;
-	if (use_r1b_resp) {
-		cmd.flags |= MMC_RSP_SPI_R1B | MMC_RSP_R1B;
-		cmd.busy_timeout = timeout_ms;
-	} else {
-		cmd.flags |= MMC_RSP_SPI_R1 | MMC_RSP_R1;
-	}
+	use_r1b_resp = mmc_prepare_busy_cmd(host, &cmd, timeout_ms);
 
 	err = mmc_wait_for_cmd(host, &cmd, retries);
 	if (err)
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 7bc1cfb0654c3..ba898c4356588 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -18,6 +18,7 @@ enum mmc_busy_cmd {
 
 struct mmc_host;
 struct mmc_card;
+struct mmc_command;
 
 int mmc_select_card(struct mmc_card *card);
 int mmc_deselect_cards(struct mmc_host *host);
@@ -35,6 +36,8 @@ int mmc_bus_test(struct mmc_card *card, u8 bus_width);
 int mmc_can_ext_csd(struct mmc_card *card);
 int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 int mmc_switch_status(struct mmc_card *card, bool crc_err_fatal);
+bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
+			  unsigned int timeout_ms);
 int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
 		      enum mmc_busy_cmd busy_cmd);
 int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
-- 
GitLab


From c7bedef053cf7fd26efca90551a95c1776dd9e2f Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:13 +0200
Subject: [PATCH 2780/3804] mmc: core: Take into account MMC_CAP_NEED_RSP_BUSY
 for eMMC HPI commands

In mmc_send_hpi_cmd() the host->max_busy_timeout is being validated towards
the timeout for the eMMC HPI command, as to decide whether an R1 or R1B
response should be used.

Although, it has turned out the some host can't cope with that conversion,
but needs R1B, which means MMC_CAP_NEED_RSP_BUSY is set for them. Let's
take this into account, via using the common mmc_prepare_busy_cmd() when
doing the validation, which also avoids some open coding.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210504161222.101536-3-ulf.hansson@linaro.org
---
 drivers/mmc/core/mmc_ops.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 025a4134d5c7d..66ae699a410f4 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -817,28 +817,17 @@ static int mmc_send_hpi_cmd(struct mmc_card *card)
 {
 	unsigned int busy_timeout_ms = card->ext_csd.out_of_int_time;
 	struct mmc_host *host = card->host;
-	bool use_r1b_resp = true;
+	bool use_r1b_resp = false;
 	struct mmc_command cmd = {};
 	int err;
 
 	cmd.opcode = card->ext_csd.hpi_cmd;
 	cmd.arg = card->rca << 16 | 1;
+	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 
-	/*
-	 * Make sure the host's max_busy_timeout fit the needed timeout for HPI.
-	 * In case it doesn't, let's instruct the host to avoid HW busy
-	 * detection, by using a R1 response instead of R1B.
-	 */
-	if (host->max_busy_timeout && busy_timeout_ms > host->max_busy_timeout)
-		use_r1b_resp = false;
-
-	if (cmd.opcode == MMC_STOP_TRANSMISSION && use_r1b_resp) {
-		cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
-		cmd.busy_timeout = busy_timeout_ms;
-	} else {
-		cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
-		use_r1b_resp = false;
-	}
+	if (cmd.opcode == MMC_STOP_TRANSMISSION)
+		use_r1b_resp = mmc_prepare_busy_cmd(host, &cmd,
+						    busy_timeout_ms);
 
 	err = mmc_wait_for_cmd(host, &cmd, 0);
 	if (err) {
-- 
GitLab


From 1e0b069bdc583925d6207e091e55ad4d0f30eb4c Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:14 +0200
Subject: [PATCH 2781/3804] mmc: core: Re-structure some code in
 __mmc_poll_for_busy()

To make the code a bit more understandable, let's move the check about
whether polling is allowed or not, out to the caller instead. In this way,
we can also drop the send_status in-parameter, so let's do that.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210504161222.101536-4-ulf.hansson@linaro.org
---
 drivers/mmc/core/mmc_ops.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 66ae699a410f4..ccaee1cb7ff53 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -465,8 +465,7 @@ static int mmc_busy_status(struct mmc_card *card, bool retry_crc_err,
 }
 
 static int __mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
-			       bool send_status, bool retry_crc_err,
-			       enum mmc_busy_cmd busy_cmd)
+			       bool retry_crc_err, enum mmc_busy_cmd busy_cmd)
 {
 	struct mmc_host *host = card->host;
 	int err;
@@ -475,16 +474,6 @@ static int __mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
 	bool expired = false;
 	bool busy = false;
 
-	/*
-	 * In cases when not allowed to poll by using CMD13 or because we aren't
-	 * capable of polling by using ->card_busy(), then rely on waiting the
-	 * stated timeout to be sufficient.
-	 */
-	if (!send_status && !host->ops->card_busy) {
-		mmc_delay(timeout_ms);
-		return 0;
-	}
-
 	timeout = jiffies + msecs_to_jiffies(timeout_ms) + 1;
 	do {
 		/*
@@ -518,7 +507,7 @@ static int __mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
 int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
 		      enum mmc_busy_cmd busy_cmd)
 {
-	return __mmc_poll_for_busy(card, timeout_ms, true, false, busy_cmd);
+	return __mmc_poll_for_busy(card, timeout_ms, false, busy_cmd);
 }
 
 bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
@@ -591,8 +580,18 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 		mmc_host_is_spi(host))
 		goto out_tim;
 
+	/*
+	 * If the host doesn't support HW polling via the ->card_busy() ops and
+	 * when it's not allowed to poll by using CMD13, then we need to rely on
+	 * waiting the stated timeout to be sufficient.
+	 */
+	if (!send_status && !host->ops->card_busy) {
+		mmc_delay(timeout_ms);
+		goto out_tim;
+	}
+
 	/* Let's try to poll to find out when the command is completed. */
-	err = __mmc_poll_for_busy(card, timeout_ms, send_status, retry_crc_err,
+	err = __mmc_poll_for_busy(card, timeout_ms, retry_crc_err,
 				  MMC_BUSY_CMD6);
 	if (err)
 		goto out;
-- 
GitLab


From 04f967ad28c836815f6894b618643dd23670c6e5 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:15 +0200
Subject: [PATCH 2782/3804] mmc: core: Extend re-use of __mmc_poll_for_busy()

Via __mmc_poll_for_busy() we end up polling with the ->card_busy() host ops
or by sending the CMD13. To allow polling of different types, which is
needed to support a few new SD card features, let's rework the code around
__mmc_poll_for_busy() to make it more generic.

More precisely, let __mmc_poll_for_busy() take a pointer to a callback
function as in-parameter, which it calls to poll for busy state completion.
Additionally, let's share __mmc_poll_for_busy() to allow it to be re-used
outside of mmc_ops.c. Subsequent changes will make use of it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210504161222.101536-5-ulf.hansson@linaro.org
---
 drivers/mmc/core/core.c    |  2 +-
 drivers/mmc/core/mmc_ops.c | 42 ++++++++++++++++++++++++--------------
 drivers/mmc/core/mmc_ops.h |  5 ++++-
 3 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index b00c84ea8441c..b039dcff17f80 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1671,7 +1671,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
 		goto out;
 
 	/* Let's poll to find out when the erase operation completes. */
-	err = mmc_poll_for_busy(card, busy_timeout, MMC_BUSY_ERASE);
+	err = mmc_poll_for_busy(card, busy_timeout, false, MMC_BUSY_ERASE);
 
 out:
 	mmc_retune_release(card->host);
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index ccaee1cb7ff53..653627fe02a35 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -53,6 +53,12 @@ static const u8 tuning_blk_pattern_8bit[] = {
 	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
 };
 
+struct mmc_busy_data {
+	struct mmc_card *card;
+	bool retry_crc_err;
+	enum mmc_busy_cmd busy_cmd;
+};
+
 int __mmc_send_status(struct mmc_card *card, u32 *status, unsigned int retries)
 {
 	int err;
@@ -424,10 +430,10 @@ int mmc_switch_status(struct mmc_card *card, bool crc_err_fatal)
 	return mmc_switch_status_error(card->host, status);
 }
 
-static int mmc_busy_status(struct mmc_card *card, bool retry_crc_err,
-			   enum mmc_busy_cmd busy_cmd, bool *busy)
+static int mmc_busy_cb(void *cb_data, bool *busy)
 {
-	struct mmc_host *host = card->host;
+	struct mmc_busy_data *data = cb_data;
+	struct mmc_host *host = data->card->host;
 	u32 status = 0;
 	int err;
 
@@ -436,17 +442,17 @@ static int mmc_busy_status(struct mmc_card *card, bool retry_crc_err,
 		return 0;
 	}
 
-	err = mmc_send_status(card, &status);
-	if (retry_crc_err && err == -EILSEQ) {
+	err = mmc_send_status(data->card, &status);
+	if (data->retry_crc_err && err == -EILSEQ) {
 		*busy = true;
 		return 0;
 	}
 	if (err)
 		return err;
 
-	switch (busy_cmd) {
+	switch (data->busy_cmd) {
 	case MMC_BUSY_CMD6:
-		err = mmc_switch_status_error(card->host, status);
+		err = mmc_switch_status_error(host, status);
 		break;
 	case MMC_BUSY_ERASE:
 		err = R1_STATUS(status) ? -EIO : 0;
@@ -464,8 +470,9 @@ static int mmc_busy_status(struct mmc_card *card, bool retry_crc_err,
 	return 0;
 }
 
-static int __mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
-			       bool retry_crc_err, enum mmc_busy_cmd busy_cmd)
+int __mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
+			int (*busy_cb)(void *cb_data, bool *busy),
+			void *cb_data)
 {
 	struct mmc_host *host = card->host;
 	int err;
@@ -482,7 +489,7 @@ static int __mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
 		 */
 		expired = time_after(jiffies, timeout);
 
-		err = mmc_busy_status(card, retry_crc_err, busy_cmd, &busy);
+		err = (*busy_cb)(cb_data, &busy);
 		if (err)
 			return err;
 
@@ -505,9 +512,15 @@ static int __mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
 }
 
 int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
-		      enum mmc_busy_cmd busy_cmd)
+		      bool retry_crc_err, enum mmc_busy_cmd busy_cmd)
 {
-	return __mmc_poll_for_busy(card, timeout_ms, false, busy_cmd);
+	struct mmc_busy_data cb_data;
+
+	cb_data.card = card;
+	cb_data.retry_crc_err = retry_crc_err;
+	cb_data.busy_cmd = busy_cmd;
+
+	return __mmc_poll_for_busy(card, timeout_ms, &mmc_busy_cb, &cb_data);
 }
 
 bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
@@ -591,8 +604,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 	}
 
 	/* Let's try to poll to find out when the command is completed. */
-	err = __mmc_poll_for_busy(card, timeout_ms, retry_crc_err,
-				  MMC_BUSY_CMD6);
+	err = mmc_poll_for_busy(card, timeout_ms, retry_crc_err, MMC_BUSY_CMD6);
 	if (err)
 		goto out;
 
@@ -840,7 +852,7 @@ static int mmc_send_hpi_cmd(struct mmc_card *card)
 		return 0;
 
 	/* Let's poll to find out when the HPI request completes. */
-	return mmc_poll_for_busy(card, busy_timeout_ms, MMC_BUSY_HPI);
+	return mmc_poll_for_busy(card, busy_timeout_ms, false, MMC_BUSY_HPI);
 }
 
 /**
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index ba898c4356588..aca66c1288048 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -38,8 +38,11 @@ int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 int mmc_switch_status(struct mmc_card *card, bool crc_err_fatal);
 bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
 			  unsigned int timeout_ms);
+int __mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
+			int (*busy_cb)(void *cb_data, bool *busy),
+			void *cb_data);
 int mmc_poll_for_busy(struct mmc_card *card, unsigned int timeout_ms,
-		      enum mmc_busy_cmd busy_cmd);
+		      bool retry_crc_err, enum mmc_busy_cmd busy_cmd);
 int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 		unsigned int timeout_ms, unsigned char timing,
 		bool send_status, bool retry_crc_err, unsigned int retries);
-- 
GitLab


From 6fa79651cc808f68db6f6f297be5a950ccd5dffb Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:16 +0200
Subject: [PATCH 2783/3804] mmc: core: Enable eMMC sleep commands to use HW
 busy polling

After the eMMC sleep command (CMD5) has been sent, the card start signals
busy on the DAT0 line, which can be monitored to understand when it's
allowed to proceed to power off the VCC regulator.

When MMC_CAP_WAIT_WHILE_BUSY isn't supported by the host the DAT0 line
isn't being monitored for busy completion, but instead we are waiting a
fixed period of time. The time corresponds to the sleep timeout that is
specified in the EXT_CSD register of the eMMC card. This is many cases
suboptimal, as the timeout corresponds to the worst case scenario.

To improve the situation add support for HW busy polling through the
->card_busy() host ops, when the host supports this.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210504161222.101536-6-ulf.hansson@linaro.org
---
 drivers/mmc/core/mmc.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 63a7bd0b239cd..13074aa1f6057 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1905,6 +1905,14 @@ static int mmc_can_sleep(struct mmc_card *card)
 	return card->ext_csd.rev >= 3;
 }
 
+static int mmc_sleep_busy_cb(void *cb_data, bool *busy)
+{
+	struct mmc_host *host = cb_data;
+
+	*busy = host->ops->card_busy(host);
+	return 0;
+}
+
 static int mmc_sleep(struct mmc_host *host)
 {
 	struct mmc_command cmd = {};
@@ -1930,13 +1938,20 @@ static int mmc_sleep(struct mmc_host *host)
 		goto out_release;
 
 	/*
-	 * If the host does not wait while the card signals busy, then we will
-	 * will have to wait the sleep/awake timeout.  Note, we cannot use the
-	 * SEND_STATUS command to poll the status because that command (and most
-	 * others) is invalid while the card sleeps.
+	 * If the host does not wait while the card signals busy, then we can
+	 * try to poll, but only if the host supports HW polling, as the
+	 * SEND_STATUS cmd is not allowed. If we can't poll, then we simply need
+	 * to wait the sleep/awake timeout.
 	 */
-	if (!use_r1b_resp || !(host->caps & MMC_CAP_WAIT_WHILE_BUSY))
+	if (host->caps & MMC_CAP_WAIT_WHILE_BUSY && use_r1b_resp)
+		goto out_release;
+
+	if (!host->ops->card_busy) {
 		mmc_delay(timeout_ms);
+		goto out_release;
+	}
+
+	err = __mmc_poll_for_busy(card, timeout_ms, &mmc_sleep_busy_cb, host);
 
 out_release:
 	mmc_retune_release(host);
-- 
GitLab


From cec18ad93e35a219d2277dbbdbfedb4f83a7a220 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:17 +0200
Subject: [PATCH 2784/3804] mmc: core: Prepare mmc_send_cxd_data() to be
 re-used for additional cmds

The function mmc_send_cxd_data() sends a data read command of ADTC type and
prepares to receive an R1 response. To make it even more re-usable, let's
extend it with another in-parameter for the command argument. While at it,
let's also rename the function to mmc_send_adtc_data() as it better
describes its purpose.

Note that, this change doesn't add any new users of the function. Instead
that is done from subsequent changes.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210504161222.101536-7-ulf.hansson@linaro.org
---
 drivers/mmc/core/mmc_ops.c | 11 +++++------
 drivers/mmc/core/mmc_ops.h |  2 ++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 653627fe02a35..b1da8f1950eea 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -252,9 +252,8 @@ mmc_send_cxd_native(struct mmc_host *host, u32 arg, u32 *cxd, int opcode)
  * NOTE: void *buf, caller for the buf is required to use DMA-capable
  * buffer or on-stack buffer (with some overhead in callee).
  */
-static int
-mmc_send_cxd_data(struct mmc_card *card, struct mmc_host *host,
-		u32 opcode, void *buf, unsigned len)
+int mmc_send_adtc_data(struct mmc_card *card, struct mmc_host *host, u32 opcode,
+		       u32 args, void *buf, unsigned len)
 {
 	struct mmc_request mrq = {};
 	struct mmc_command cmd = {};
@@ -265,7 +264,7 @@ mmc_send_cxd_data(struct mmc_card *card, struct mmc_host *host,
 	mrq.data = &data;
 
 	cmd.opcode = opcode;
-	cmd.arg = 0;
+	cmd.arg = args;
 
 	/* NOTE HACK:  the MMC_RSP_SPI_R1 is always correct here, but we
 	 * rely on callers to never use this with "native" calls for reading
@@ -311,7 +310,7 @@ static int mmc_spi_send_cxd(struct mmc_host *host, u32 *cxd, u32 opcode)
 	if (!cxd_tmp)
 		return -ENOMEM;
 
-	ret = mmc_send_cxd_data(NULL, host, opcode, cxd_tmp, 16);
+	ret = mmc_send_adtc_data(NULL, host, opcode, 0, cxd_tmp, 16);
 	if (ret)
 		goto err;
 
@@ -359,7 +358,7 @@ int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
 	if (!ext_csd)
 		return -ENOMEM;
 
-	err = mmc_send_cxd_data(card, card->host, MMC_SEND_EXT_CSD, ext_csd,
+	err = mmc_send_adtc_data(card, card->host, MMC_SEND_EXT_CSD, 0, ext_csd,
 				512);
 	if (err)
 		kfree(ext_csd);
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index aca66c1288048..2b1d730e56bff 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -26,6 +26,8 @@ int mmc_set_dsr(struct mmc_host *host);
 int mmc_go_idle(struct mmc_host *host);
 int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr);
 int mmc_set_relative_addr(struct mmc_card *card);
+int mmc_send_adtc_data(struct mmc_card *card, struct mmc_host *host, u32 opcode,
+		       u32 args, void *buf, unsigned len);
 int mmc_send_csd(struct mmc_card *card, u32 *csd);
 int __mmc_send_status(struct mmc_card *card, u32 *status, unsigned int retries);
 int mmc_send_status(struct mmc_card *card, u32 *status);
-- 
GitLab


From 41e84fe1647e0d6ec309882bc247667e304c351f Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:18 +0200
Subject: [PATCH 2785/3804] mmc: core: Drop open coding in mmc_sd_switch()

The SD_SWITCH (CMD6) is an ADTC type of command with an R1 response, which
can be sent by using the mmc_send_adtc_data(). Let's do that and drop the
open coding in mmc_sd_switch().

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210504161222.101536-8-ulf.hansson@linaro.org
---
 drivers/mmc/core/sd_ops.c | 38 +++++++-------------------------------
 1 file changed, 7 insertions(+), 31 deletions(-)

diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index d61ff811218ce..ef8d1dce5af1a 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -17,6 +17,7 @@
 
 #include "core.h"
 #include "sd_ops.h"
+#include "mmc_ops.h"
 
 int mmc_app_cmd(struct mmc_host *host, struct mmc_card *card)
 {
@@ -309,43 +310,18 @@ int mmc_app_send_scr(struct mmc_card *card)
 int mmc_sd_switch(struct mmc_card *card, int mode, int group,
 	u8 value, u8 *resp)
 {
-	struct mmc_request mrq = {};
-	struct mmc_command cmd = {};
-	struct mmc_data data = {};
-	struct scatterlist sg;
+	u32 cmd_args;
 
 	/* NOTE: caller guarantees resp is heap-allocated */
 
 	mode = !!mode;
 	value &= 0xF;
+	cmd_args = mode << 31 | 0x00FFFFFF;
+	cmd_args &= ~(0xF << (group * 4));
+	cmd_args |= value << (group * 4);
 
-	mrq.cmd = &cmd;
-	mrq.data = &data;
-
-	cmd.opcode = SD_SWITCH;
-	cmd.arg = mode << 31 | 0x00FFFFFF;
-	cmd.arg &= ~(0xF << (group * 4));
-	cmd.arg |= value << (group * 4);
-	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
-
-	data.blksz = 64;
-	data.blocks = 1;
-	data.flags = MMC_DATA_READ;
-	data.sg = &sg;
-	data.sg_len = 1;
-
-	sg_init_one(&sg, resp, 64);
-
-	mmc_set_data_timeout(&data, card);
-
-	mmc_wait_for_req(card->host, &mrq);
-
-	if (cmd.error)
-		return cmd.error;
-	if (data.error)
-		return data.error;
-
-	return 0;
+	return mmc_send_adtc_data(card, card->host, SD_SWITCH, cmd_args, resp,
+				  64);
 }
 
 int mmc_app_sd_status(struct mmc_card *card, void *ssr)
-- 
GitLab


From dbea8ae9febdea11cb74d094e6b730987079679e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:19 +0200
Subject: [PATCH 2786/3804] mmc: core: Parse the SD SCR register for support of
 CMD48/49 and CMD58/59

In SD spec v4.x the support for CMD48/49 and CMD58/59 were introduced as
optional features. To let the card announce whether it supports the
commands, the SCR register has been extended with corresponding support
bits. Let's parse and store this information for later use.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210504161222.101536-9-ulf.hansson@linaro.org
---
 drivers/mmc/core/sd.c    | 4 +++-
 include/linux/mmc/card.h | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 2c48d65041013..de7b5f8df5503 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -222,7 +222,9 @@ static int mmc_decode_scr(struct mmc_card *card)
 	else
 		card->erased_byte = 0x0;
 
-	if (scr->sda_spec3)
+	if (scr->sda_spec4)
+		scr->cmds = UNSTUFF_BITS(resp, 32, 4);
+	else if (scr->sda_spec3)
 		scr->cmds = UNSTUFF_BITS(resp, 32, 2);
 
 	/* SD Spec says: any SD Card shall set at least bits 0 and 2 */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index f9ad35dd6012a..858fc4d11240a 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -139,6 +139,8 @@ struct sd_scr {
 	unsigned char		cmds;
 #define SD_SCR_CMD20_SUPPORT   (1<<0)
 #define SD_SCR_CMD23_SUPPORT   (1<<1)
+#define SD_SCR_CMD48_SUPPORT   (1<<2)
+#define SD_SCR_CMD58_SUPPORT   (1<<3)
 };
 
 struct sd_ssr {
-- 
GitLab


From c784f92769ae8eafb2eb489408757528ff7525df Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:20 +0200
Subject: [PATCH 2787/3804] mmc: core: Read the SD function extension registers
 for power management

In the SD spec v4.0 the CMD48/49 and CMD58/59 were introduced as optional
commands. In the SD spec v4.1 the SD function extension registers were
introduced, which requires support for CMD48/49/58/59 to be read/written
from/to.

Moreover, a specific function extension register were added to let the card
announce support for optional features in regards to power management. The
features that were added are "Power Off Notification", "Power Down Mode"
and "Power Sustenance".

As a first step to support this, let's read and parse the register for
power management during the SD card initialization and store the
information about the supported features in the struct mmc_card. In this
way, we prepare for subsequent changes to implement the complete support
for the new features.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Acked-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210504161222.101536-10-ulf.hansson@linaro.org
---
 drivers/mmc/core/sd.c    | 178 +++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/card.h |  13 +++
 include/linux/mmc/sd.h   |   3 +
 3 files changed, 194 insertions(+)

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index de7b5f8df5503..2e687f1f45428 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -996,6 +996,177 @@ static bool mmc_sd_card_using_v18(struct mmc_card *card)
 	       (SD_MODE_UHS_SDR50 | SD_MODE_UHS_SDR104 | SD_MODE_UHS_DDR50);
 }
 
+static int sd_read_ext_reg(struct mmc_card *card, u8 fno, u8 page,
+			   u16 offset, u16 len, u8 *reg_buf)
+{
+	u32 cmd_args;
+
+	/*
+	 * Command arguments of CMD48:
+	 * [31:31] MIO (0 = memory).
+	 * [30:27] FNO (function number).
+	 * [26:26] reserved (0).
+	 * [25:18] page number.
+	 * [17:9] offset address.
+	 * [8:0] length (0 = 1 byte, 1ff = 512 bytes).
+	 */
+	cmd_args = fno << 27 | page << 18 | offset << 9 | (len -1);
+
+	return mmc_send_adtc_data(card, card->host, SD_READ_EXTR_SINGLE,
+				  cmd_args, reg_buf, 512);
+}
+
+static int sd_parse_ext_reg_power(struct mmc_card *card, u8 fno, u8 page,
+				  u16 offset)
+{
+	int err;
+	u8 *reg_buf;
+
+	reg_buf = kzalloc(512, GFP_KERNEL);
+	if (!reg_buf)
+		return -ENOMEM;
+
+	/* Read the extension register for power management function. */
+	err = sd_read_ext_reg(card, fno, page, offset, 512, reg_buf);
+	if (err) {
+		pr_warn("%s: error %d reading PM func of ext reg\n",
+			mmc_hostname(card->host), err);
+		goto out;
+	}
+
+	/* PM revision consists of 4 bits. */
+	card->ext_power.rev = reg_buf[0] & 0xf;
+
+	/* Power Off Notification support at bit 4. */
+	if (reg_buf[1] & BIT(4))
+		card->ext_power.feature_support |= SD_EXT_POWER_OFF_NOTIFY;
+
+	/* Power Sustenance support at bit 5. */
+	if (reg_buf[1] & BIT(5))
+		card->ext_power.feature_support |= SD_EXT_POWER_SUSTENANCE;
+
+	/* Power Down Mode support at bit 6. */
+	if (reg_buf[1] & BIT(6))
+		card->ext_power.feature_support |= SD_EXT_POWER_DOWN_MODE;
+
+	card->ext_power.fno = fno;
+	card->ext_power.page = page;
+	card->ext_power.offset = offset;
+
+out:
+	kfree(reg_buf);
+	return err;
+}
+
+static int sd_parse_ext_reg(struct mmc_card *card, u8 *gen_info_buf,
+			    u16 *next_ext_addr)
+{
+	u8 num_regs, fno, page;
+	u16 sfc, offset, ext = *next_ext_addr;
+	u32 reg_addr;
+
+	/*
+	 * Parse only one register set per extension, as that is sufficient to
+	 * support the standard functions. This means another 48 bytes in the
+	 * buffer must be available.
+	 */
+	if (ext + 48 > 512)
+		return -EFAULT;
+
+	/* Standard Function Code */
+	memcpy(&sfc, &gen_info_buf[ext], 2);
+
+	/* Address to the next extension. */
+	memcpy(next_ext_addr, &gen_info_buf[ext + 40], 2);
+
+	/* Number of registers for this extension. */
+	num_regs = gen_info_buf[ext + 42];
+
+	/* We support only one register per extension. */
+	if (num_regs != 1)
+		return 0;
+
+	/* Extension register address. */
+	memcpy(&reg_addr, &gen_info_buf[ext + 44], 4);
+
+	/* 9 bits (0 to 8) contains the offset address. */
+	offset = reg_addr & 0x1ff;
+
+	/* 8 bits (9 to 16) contains the page number. */
+	page = reg_addr >> 9 & 0xff ;
+
+	/* 4 bits (18 to 21) contains the function number. */
+	fno = reg_addr >> 18 & 0xf;
+
+	/* Standard Function Code for power management. */
+	if (sfc == 0x1)
+		return sd_parse_ext_reg_power(card, fno, page, offset);
+
+	return 0;
+}
+
+static int sd_read_ext_regs(struct mmc_card *card)
+{
+	int err, i;
+	u8 num_ext, *gen_info_buf;
+	u16 rev, len, next_ext_addr;
+
+	if (mmc_host_is_spi(card->host))
+		return 0;
+
+	if (!(card->scr.cmds & SD_SCR_CMD48_SUPPORT))
+		return 0;
+
+	gen_info_buf = kzalloc(512, GFP_KERNEL);
+	if (!gen_info_buf)
+		return -ENOMEM;
+
+	/*
+	 * Read 512 bytes of general info, which is found at function number 0,
+	 * at page 0 and with no offset.
+	 */
+	err = sd_read_ext_reg(card, 0, 0, 0, 512, gen_info_buf);
+	if (err) {
+		pr_warn("%s: error %d reading general info of SD ext reg\n",
+			mmc_hostname(card->host), err);
+		goto out;
+	}
+
+	/* General info structure revision. */
+	memcpy(&rev, &gen_info_buf[0], 2);
+
+	/* Length of general info in bytes. */
+	memcpy(&len, &gen_info_buf[2], 2);
+
+	/* Number of extensions to be find. */
+	num_ext = gen_info_buf[4];
+
+	/* We support revision 0, but limit it to 512 bytes for simplicity. */
+	if (rev != 0 || len > 512) {
+		pr_warn("%s: non-supported SD ext reg layout\n",
+			mmc_hostname(card->host));
+		goto out;
+	}
+
+	/*
+	 * Parse the extension registers. The first extension should start
+	 * immediately after the general info header (16 bytes).
+	 */
+	next_ext_addr = 16;
+	for (i = 0; i < num_ext; i++) {
+		err = sd_parse_ext_reg(card, gen_info_buf, &next_ext_addr);
+		if (err) {
+			pr_warn("%s: error %d parsing SD ext reg\n",
+				mmc_hostname(card->host), err);
+			goto out;
+		}
+	}
+
+out:
+	kfree(gen_info_buf);
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -1144,6 +1315,13 @@ retry:
 		}
 	}
 
+	if (!oldcard) {
+		/* Read/parse the extension registers. */
+		err = sd_read_ext_regs(card);
+		if (err)
+			goto free_card;
+	}
+
 	if (host->cqe_ops && !host->cqe_enabled) {
 		err = host->cqe_ops->cqe_enable(host, card);
 		if (!err) {
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 858fc4d11240a..03a862e93594e 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -191,6 +191,18 @@ struct sd_switch_caps {
 #define SD_MAX_CURRENT_800	(1 << SD_SET_CURRENT_LIMIT_800)
 };
 
+struct sd_ext_reg {
+	u8			fno;
+	u8			page;
+	u16			offset;
+	u8			rev;
+	u8			feature_support;
+/* Power Management Function. */
+#define SD_EXT_POWER_OFF_NOTIFY	(1<<0)
+#define SD_EXT_POWER_SUSTENANCE	(1<<1)
+#define SD_EXT_POWER_DOWN_MODE	(1<<2)
+};
+
 struct sdio_cccr {
 	unsigned int		sdio_vsn;
 	unsigned int		sd_vsn;
@@ -292,6 +304,7 @@ struct mmc_card {
 	struct sd_scr		scr;		/* extra SD information */
 	struct sd_ssr		ssr;		/* yet more SD information */
 	struct sd_switch_caps	sw_caps;	/* switch (CMD6) caps */
+	struct sd_ext_reg	ext_power;	/* SD extension reg for PM */
 
 	unsigned int		sdio_funcs;	/* number of SDIO functions */
 	atomic_t		sdio_funcs_probed; /* number of probed SDIO funcs */
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index 2236aa540faad..43bfc5c39ad43 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -29,6 +29,9 @@
 #define SD_APP_OP_COND           41   /* bcr  [31:0] OCR         R3  */
 #define SD_APP_SEND_SCR          51   /* adtc                    R1  */
 
+  /* class 11 */
+#define SD_READ_EXTR_SINGLE      48   /* adtc [31:0]             R1  */
+
 /* OCR bit definitions */
 #define SD_OCR_S18R		(1 << 24)    /* 1.8V switching request */
 #define SD_ROCR_S18A		SD_OCR_S18R  /* 1.8V switching accepted by card */
-- 
GitLab


From 4e6306e0b83c6251699c2202e859b55ddf7b8c5f Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:21 +0200
Subject: [PATCH 2788/3804] mmc: core: Read performance enhancements registers
 for SD cards

In SD spec v6.x the SD function extension registers for performance
enhancements were introduced. These registers let the SD card announce
supports for various performance related features, like "self-maintenance",
"cache" and "command queuing".

Let's extend the parsing of SD function extension registers and store the
information in the struct mmc_card. This prepares for subsequent changes to
implement the complete support for new the performance enhancement
features.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Avri Altman <avri.altman@wdc.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20210504161222.101536-11-ulf.hansson@linaro.org
---
 drivers/mmc/core/sd.c    | 53 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/card.h |  7 ++++++
 2 files changed, 60 insertions(+)

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 2e687f1f45428..0b882aaedf78d 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -1058,6 +1058,55 @@ out:
 	return err;
 }
 
+static int sd_parse_ext_reg_perf(struct mmc_card *card, u8 fno, u8 page,
+				 u16 offset)
+{
+	int err;
+	u8 *reg_buf;
+
+	reg_buf = kzalloc(512, GFP_KERNEL);
+	if (!reg_buf)
+		return -ENOMEM;
+
+	err = sd_read_ext_reg(card, fno, page, offset, 512, reg_buf);
+	if (err) {
+		pr_warn("%s: error %d reading PERF func of ext reg\n",
+			mmc_hostname(card->host), err);
+		goto out;
+	}
+
+	/* PERF revision. */
+	card->ext_perf.rev = reg_buf[0];
+
+	/* FX_EVENT support at bit 0. */
+	if (reg_buf[1] & BIT(0))
+		card->ext_perf.feature_support |= SD_EXT_PERF_FX_EVENT;
+
+	/* Card initiated self-maintenance support at bit 0. */
+	if (reg_buf[2] & BIT(0))
+		card->ext_perf.feature_support |= SD_EXT_PERF_CARD_MAINT;
+
+	/* Host initiated self-maintenance support at bit 1. */
+	if (reg_buf[2] & BIT(1))
+		card->ext_perf.feature_support |= SD_EXT_PERF_HOST_MAINT;
+
+	/* Cache support at bit 0. */
+	if (reg_buf[4] & BIT(0))
+		card->ext_perf.feature_support |= SD_EXT_PERF_CACHE;
+
+	/* Command queue support indicated via queue depth bits (0 to 4). */
+	if (reg_buf[6] & 0x1f)
+		card->ext_perf.feature_support |= SD_EXT_PERF_CMD_QUEUE;
+
+	card->ext_perf.fno = fno;
+	card->ext_perf.page = page;
+	card->ext_perf.offset = offset;
+
+out:
+	kfree(reg_buf);
+	return err;
+}
+
 static int sd_parse_ext_reg(struct mmc_card *card, u8 *gen_info_buf,
 			    u16 *next_ext_addr)
 {
@@ -1102,6 +1151,10 @@ static int sd_parse_ext_reg(struct mmc_card *card, u8 *gen_info_buf,
 	if (sfc == 0x1)
 		return sd_parse_ext_reg_power(card, fno, page, offset);
 
+	/* Standard Function Code for performance enhancement. */
+	if (sfc == 0x2)
+		return sd_parse_ext_reg_perf(card, fno, page, offset);
+
 	return 0;
 }
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 03a862e93594e..2867af0635f8b 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -201,6 +201,12 @@ struct sd_ext_reg {
 #define SD_EXT_POWER_OFF_NOTIFY	(1<<0)
 #define SD_EXT_POWER_SUSTENANCE	(1<<1)
 #define SD_EXT_POWER_DOWN_MODE	(1<<2)
+/* Performance Enhancement Function. */
+#define SD_EXT_PERF_FX_EVENT	(1<<0)
+#define SD_EXT_PERF_CARD_MAINT	(1<<1)
+#define SD_EXT_PERF_HOST_MAINT	(1<<2)
+#define SD_EXT_PERF_CACHE	(1<<3)
+#define SD_EXT_PERF_CMD_QUEUE	(1<<4)
 };
 
 struct sdio_cccr {
@@ -305,6 +311,7 @@ struct mmc_card {
 	struct sd_ssr		ssr;		/* yet more SD information */
 	struct sd_switch_caps	sw_caps;	/* switch (CMD6) caps */
 	struct sd_ext_reg	ext_power;	/* SD extension reg for PM */
+	struct sd_ext_reg	ext_perf;	/* SD extension reg for PERF */
 
 	unsigned int		sdio_funcs;	/* number of SDIO functions */
 	atomic_t		sdio_funcs_probed; /* number of probed SDIO funcs */
-- 
GitLab


From 2c5d42769038045b92160a849aad43c4b3170e2a Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 4 May 2021 18:12:22 +0200
Subject: [PATCH 2789/3804] mmc: core: Add support for Power Off Notification
 for SD cards

Rather than only deselecting the SD card via a CMD7, before we cut power to
it at system suspend, at runtime suspend or at shutdown, let's add support
for a graceful power off sequence via enabling the SD Power Off
Notification feature.

Note that, the Power Off Notification feature was added in the SD spec
v4.x, which is several years ago. However, it's still a bit unclear how
often the SD card vendors decides to implement support for it. To validate
these changes a Sandisk Extreme PRO A2 64GB has been used, which seems to
work nicely.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Avri Altman <avri.altman@wdc.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Shawn Lin <shawn.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20210504161222.101536-12-ulf.hansson@linaro.org
---
 drivers/mmc/core/sd.c  | 136 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/mmc/sd.h |   1 +
 2 files changed, 134 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 0b882aaedf78d..bd40c682d2644 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -66,6 +66,13 @@ static const unsigned int sd_au_size[] = {
 		__res & __mask;						\
 	})
 
+#define SD_POWEROFF_NOTIFY_TIMEOUT_MS 2000
+
+struct sd_busy_data {
+	struct mmc_card *card;
+	u8 *reg_buf;
+};
+
 /*
  * Given the decoded CSD structure, decode the raw CID to our CID structure.
  */
@@ -996,6 +1003,66 @@ static bool mmc_sd_card_using_v18(struct mmc_card *card)
 	       (SD_MODE_UHS_SDR50 | SD_MODE_UHS_SDR104 | SD_MODE_UHS_DDR50);
 }
 
+static int sd_write_ext_reg(struct mmc_card *card, u8 fno, u8 page, u16 offset,
+			    u8 reg_data)
+{
+	struct mmc_host *host = card->host;
+	struct mmc_request mrq = {};
+	struct mmc_command cmd = {};
+	struct mmc_data data = {};
+	struct scatterlist sg;
+	u8 *reg_buf;
+
+	reg_buf = kzalloc(512, GFP_KERNEL);
+	if (!reg_buf)
+		return -ENOMEM;
+
+	mrq.cmd = &cmd;
+	mrq.data = &data;
+
+	/*
+	 * Arguments of CMD49:
+	 * [31:31] MIO (0 = memory).
+	 * [30:27] FNO (function number).
+	 * [26:26] MW - mask write mode (0 = disable).
+	 * [25:18] page number.
+	 * [17:9] offset address.
+	 * [8:0] length (0 = 1 byte).
+	 */
+	cmd.arg = fno << 27 | page << 18 | offset << 9;
+
+	/* The first byte in the buffer is the data to be written. */
+	reg_buf[0] = reg_data;
+
+	data.flags = MMC_DATA_WRITE;
+	data.blksz = 512;
+	data.blocks = 1;
+	data.sg = &sg;
+	data.sg_len = 1;
+	sg_init_one(&sg, reg_buf, 512);
+
+	cmd.opcode = SD_WRITE_EXTR_SINGLE;
+	cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
+
+	mmc_set_data_timeout(&data, card);
+	mmc_wait_for_req(host, &mrq);
+
+	kfree(reg_buf);
+
+	/*
+	 * Note that, the SD card is allowed to signal busy on DAT0 up to 1s
+	 * after the CMD49. Although, let's leave this to be managed by the
+	 * caller.
+	 */
+
+	if (cmd.error)
+		return cmd.error;
+	if (data.error)
+		return data.error;
+
+	return 0;
+}
+
 static int sd_read_ext_reg(struct mmc_card *card, u8 fno, u8 page,
 			   u16 offset, u16 len, u8 *reg_buf)
 {
@@ -1446,21 +1513,84 @@ static void mmc_sd_detect(struct mmc_host *host)
 	}
 }
 
+static int sd_can_poweroff_notify(struct mmc_card *card)
+{
+	return card->ext_power.feature_support & SD_EXT_POWER_OFF_NOTIFY;
+}
+
+static int sd_busy_poweroff_notify_cb(void *cb_data, bool *busy)
+{
+	struct sd_busy_data *data = cb_data;
+	struct mmc_card *card = data->card;
+	int err;
+
+	/*
+	 * Read the status register for the power management function. It's at
+	 * one byte offset and is one byte long. The Power Off Notification
+	 * Ready is bit 0.
+	 */
+	err = sd_read_ext_reg(card, card->ext_power.fno, card->ext_power.page,
+			      card->ext_power.offset + 1, 1, data->reg_buf);
+	if (err) {
+		pr_warn("%s: error %d reading status reg of PM func\n",
+			mmc_hostname(card->host), err);
+		return err;
+	}
+
+	*busy = !(data->reg_buf[0] & BIT(0));
+	return 0;
+}
+
+static int sd_poweroff_notify(struct mmc_card *card)
+{
+	struct sd_busy_data cb_data;
+	u8 *reg_buf;
+	int err;
+
+	reg_buf = kzalloc(512, GFP_KERNEL);
+	if (!reg_buf)
+		return -ENOMEM;
+
+	/*
+	 * Set the Power Off Notification bit in the power management settings
+	 * register at 2 bytes offset.
+	 */
+	err = sd_write_ext_reg(card, card->ext_power.fno, card->ext_power.page,
+			       card->ext_power.offset + 2, BIT(0));
+	if (err) {
+		pr_warn("%s: error %d writing Power Off Notify bit\n",
+			mmc_hostname(card->host), err);
+		goto out;
+	}
+
+	cb_data.card = card;
+	cb_data.reg_buf = reg_buf;
+	err = __mmc_poll_for_busy(card, SD_POWEROFF_NOTIFY_TIMEOUT_MS,
+				  &sd_busy_poweroff_notify_cb, &cb_data);
+
+out:
+	kfree(reg_buf);
+	return err;
+}
+
 static int _mmc_sd_suspend(struct mmc_host *host)
 {
+	struct mmc_card *card = host->card;
 	int err = 0;
 
 	mmc_claim_host(host);
 
-	if (mmc_card_suspended(host->card))
+	if (mmc_card_suspended(card))
 		goto out;
 
-	if (!mmc_host_is_spi(host))
+	if (sd_can_poweroff_notify(card))
+		err = sd_poweroff_notify(card);
+	else if (!mmc_host_is_spi(host))
 		err = mmc_deselect_cards(host);
 
 	if (!err) {
 		mmc_power_off(host);
-		mmc_card_set_suspended(host->card);
+		mmc_card_set_suspended(card);
 	}
 
 out:
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index 43bfc5c39ad43..6727576a87555 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -31,6 +31,7 @@
 
   /* class 11 */
 #define SD_READ_EXTR_SINGLE      48   /* adtc [31:0]             R1  */
+#define SD_WRITE_EXTR_SINGLE     49   /* adtc [31:0]             R1  */
 
 /* OCR bit definitions */
 #define SD_OCR_S18R		(1 << 24)    /* 1.8V switching request */
-- 
GitLab


From 70b52f09080565030a530a784f1c9948a7f48ca3 Mon Sep 17 00:00:00 2001
From: Bean Huo <beanhuo@micron.com>
Date: Tue, 4 May 2021 22:32:09 +0200
Subject: [PATCH 2790/3804] mmc: block: Disable CMDQ on the ioctl path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to the eMMC Spec:
"When command queuing is enabled (CMDQ Mode En bit in CMDQ_MODE_EN
field is set to ‘1’) class 11 commands are the only method through
which data transfer tasks can be issued. Existing data transfer
commands, namely CMD18/CMD17 and CMD25/CMD24, are not supported when
command queuing is enabled."
which means if CMDQ is enabled, the FFU commands will not be supported.
To fix this issue, just simply disable CMDQ on the ioctl path, and
re-enable CMDQ once ioctl request is completed.

Tested-by: Michael Brunner <Michael.Brunner@kontron.com>
Signed-off-by: Bean Huo <beanhuo@micron.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Fixes: 1e8e55b67030 (mmc: block: Add CQE support)
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20210504203209.361597-1-huobean@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 689eb9afeeed1..2518bc0856596 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1004,6 +1004,12 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req)
 
 	switch (mq_rq->drv_op) {
 	case MMC_DRV_OP_IOCTL:
+		if (card->ext_csd.cmdq_en) {
+			ret = mmc_cmdq_disable(card);
+			if (ret)
+				break;
+		}
+		fallthrough;
 	case MMC_DRV_OP_IOCTL_RPMB:
 		idata = mq_rq->drv_op_data;
 		for (i = 0, ret = 0; i < mq_rq->ioc_count; i++) {
@@ -1014,6 +1020,8 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req)
 		/* Always switch back to main area after RPMB access */
 		if (rpmb_ioctl)
 			mmc_blk_part_switch(card, 0);
+		else if (card->reenable_cmdq && !card->ext_csd.cmdq_en)
+			mmc_cmdq_enable(card);
 		break;
 	case MMC_DRV_OP_BOOT_WP:
 		ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_WP,
-- 
GitLab


From 8ae11edeb95682f6ab1983986c1daff3a00e01fc Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 6 May 2021 16:58:28 +0200
Subject: [PATCH 2791/3804] mmc: core: Move eMMC cache flushing to a new
 bus_ops callback

To prepare to add internal cache management for SD cards, let's start by
moving the eMMC specific code into a new ->flush_cache() bus_ops callback.

In this way, it becomes straight forward to add the SD specific parts,
as subsequent changes are about to show.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Avri Altman <avri.altman@wdc.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20210506145829.198823-2-ulf.hansson@linaro.org
---
 drivers/mmc/core/block.c   |  2 +-
 drivers/mmc/core/core.h    |  9 +++++++++
 drivers/mmc/core/mmc.c     | 25 +++++++++++++++++++++++--
 drivers/mmc/core/mmc_ops.c | 21 ---------------------
 drivers/mmc/core/mmc_ops.h |  1 -
 5 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 2518bc0856596..f85e107895da7 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1167,7 +1167,7 @@ static void mmc_blk_issue_flush(struct mmc_queue *mq, struct request *req)
 	struct mmc_card *card = md->queue.card;
 	int ret = 0;
 
-	ret = mmc_flush_cache(card);
+	ret = mmc_flush_cache(card->host);
 	blk_mq_end_request(req, ret ? BLK_STS_IOERR : BLK_STS_OK);
 }
 
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index db3c9c68875d8..0c4de2030b3f1 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -30,6 +30,7 @@ struct mmc_bus_ops {
 	int (*hw_reset)(struct mmc_host *);
 	int (*sw_reset)(struct mmc_host *);
 	bool (*cache_enabled)(struct mmc_host *);
+	int (*flush_cache)(struct mmc_host *);
 };
 
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
@@ -172,4 +173,12 @@ static inline bool mmc_cache_enabled(struct mmc_host *host)
 	return false;
 }
 
+static inline int mmc_flush_cache(struct mmc_host *host)
+{
+	if (host->bus_ops->flush_cache)
+		return host->bus_ops->flush_cache(host);
+
+	return 0;
+}
+
 #endif
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 13074aa1f6057..838726b68ff3e 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -28,6 +28,7 @@
 
 #define DEFAULT_CMD6_TIMEOUT_MS	500
 #define MIN_CACHE_EN_TIMEOUT_MS 1600
+#define CACHE_FLUSH_TIMEOUT_MS 30000 /* 30s */
 
 static const unsigned int tran_exp[] = {
 	10000,		100000,		1000000,	10000000,
@@ -2036,6 +2037,25 @@ static bool _mmc_cache_enabled(struct mmc_host *host)
 	       host->card->ext_csd.cache_ctrl & 1;
 }
 
+/*
+ * Flush the internal cache of the eMMC to non-volatile storage.
+ */
+static int _mmc_flush_cache(struct mmc_host *host)
+{
+	int err = 0;
+
+	if (_mmc_cache_enabled(host)) {
+		err = mmc_switch(host->card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_FLUSH_CACHE, 1,
+				 CACHE_FLUSH_TIMEOUT_MS);
+		if (err)
+			pr_err("%s: cache flush error %d\n",
+			       mmc_hostname(host), err);
+	}
+
+	return err;
+}
+
 static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
 {
 	int err = 0;
@@ -2047,7 +2067,7 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
 	if (mmc_card_suspended(host->card))
 		goto out;
 
-	err = mmc_flush_cache(host->card);
+	err = _mmc_flush_cache(host);
 	if (err)
 		goto out;
 
@@ -2188,7 +2208,7 @@ static int _mmc_hw_reset(struct mmc_host *host)
 	 * In the case of recovery, we can't expect flushing the cache to work
 	 * always, but we have a go and ignore errors.
 	 */
-	mmc_flush_cache(host->card);
+	_mmc_flush_cache(host);
 
 	if ((host->caps & MMC_CAP_HW_RESET) && host->ops->hw_reset &&
 	     mmc_can_reset(card)) {
@@ -2216,6 +2236,7 @@ static const struct mmc_bus_ops mmc_ops = {
 	.shutdown = mmc_shutdown,
 	.hw_reset = _mmc_hw_reset,
 	.cache_enabled = _mmc_cache_enabled,
+	.flush_cache = _mmc_flush_cache,
 };
 
 /*
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index b1da8f1950eea..af423acc4c885 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -20,7 +20,6 @@
 #include "mmc_ops.h"
 
 #define MMC_BKOPS_TIMEOUT_MS		(120 * 1000) /* 120s */
-#define MMC_CACHE_FLUSH_TIMEOUT_MS	(30 * 1000) /* 30s */
 #define MMC_SANITIZE_TIMEOUT_MS		(240 * 1000) /* 240s */
 
 static const u8 tuning_blk_pattern_4bit[] = {
@@ -964,26 +963,6 @@ void mmc_run_bkops(struct mmc_card *card)
 }
 EXPORT_SYMBOL(mmc_run_bkops);
 
-/*
- * Flush the cache to the non-volatile storage.
- */
-int mmc_flush_cache(struct mmc_card *card)
-{
-	int err = 0;
-
-	if (mmc_cache_enabled(card->host)) {
-		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				 EXT_CSD_FLUSH_CACHE, 1,
-				 MMC_CACHE_FLUSH_TIMEOUT_MS);
-		if (err)
-			pr_err("%s: cache flush error %d\n",
-					mmc_hostname(card->host), err);
-	}
-
-	return err;
-}
-EXPORT_SYMBOL(mmc_flush_cache);
-
 static int mmc_cmdq_switch(struct mmc_card *card, bool enable)
 {
 	u8 val = enable ? EXT_CSD_CMDQ_MODE_ENABLED : 0;
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 2b1d730e56bff..c3c1d9c2577e6 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -51,7 +51,6 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 		unsigned int timeout_ms);
 void mmc_run_bkops(struct mmc_card *card);
-int mmc_flush_cache(struct mmc_card *card);
 int mmc_cmdq_enable(struct mmc_card *card);
 int mmc_cmdq_disable(struct mmc_card *card);
 int mmc_sanitize(struct mmc_card *card, unsigned int timeout_ms);
-- 
GitLab


From 3ae613765851cc2a651a42ce9d586078a899d8c8 Mon Sep 17 00:00:00 2001
From: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Date: Fri, 7 May 2021 12:05:28 +0530
Subject: [PATCH 2792/3804] drivers: memstick: core:ms_block.c: Fix alignment
 of block comment

A * has been added to subsequent lines of block comment
The closing */ has been shifted to a new line
A new line has been give after declaration
This is done to maintain code uniformity.

Signed-off-by: Shubhankar Kuranagatti <shubhankarvk@gmail.com>
Link: https://lore.kernel.org/r/20210507063528.tvlbu2cwnlczgbga@kewl-virtual-machine
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/memstick/core/ms_block.c | 37 +++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 8004dd64d09a8..d971acd982363 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -129,7 +129,7 @@ static int msb_sg_compare_to_buffer(struct scatterlist *sg,
  * Each zone consists of 512 eraseblocks, out of which in first
  * zone 494 are used and 496 are for all following zones.
  * Therefore zone #0 hosts blocks 0-493, zone #1 blocks 494-988, etc...
-*/
+ */
 static int msb_get_zone_from_lba(int lba)
 {
 	if (lba < 494)
@@ -348,8 +348,9 @@ again:
 	switch (msb->state) {
 	case MSB_RP_SEND_BLOCK_ADDRESS:
 		/* msb_write_regs sometimes "fails" because it needs to update
-			the reg window, and thus it returns request for that.
-			Then we stay in this state and retry */
+		 * the reg window, and thus it returns request for that.
+		 * Then we stay in this state and retry
+		 */
 		if (!msb_write_regs(msb,
 			offsetof(struct ms_register, param),
 			sizeof(struct ms_param_register),
@@ -368,7 +369,8 @@ again:
 	case MSB_RP_SEND_INT_REQ:
 		msb->state = MSB_RP_RECEIVE_INT_REQ_RESULT;
 		/* If dont actually need to send the int read request (only in
-			serial mode), then just fall through */
+		 * serial mode), then just fall through
+		 */
 		if (msb_read_int_reg(msb, -1))
 			return 0;
 		fallthrough;
@@ -702,7 +704,8 @@ static int h_msb_parallel_switch(struct memstick_dev *card,
 
 	case MSB_PS_SWICH_HOST:
 		 /* Set parallel interface on our side + send a dummy request
-			to see if card responds */
+		  * to see if card responds
+		  */
 		host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_PAR4);
 		memstick_init_req(mrq, MS_TPC_GET_INT, NULL, 1);
 		msb->state = MSB_PS_CONFIRM;
@@ -821,6 +824,7 @@ static int msb_mark_page_bad(struct msb_data *msb, int pba, int page)
 static int msb_erase_block(struct msb_data *msb, u16 pba)
 {
 	int error, try;
+
 	if (msb->read_only)
 		return -EROFS;
 
@@ -997,6 +1001,7 @@ static int msb_write_block(struct msb_data *msb,
 			u16 pba, u32 lba, struct scatterlist *sg, int offset)
 {
 	int error, current_try = 1;
+
 	BUG_ON(sg->length < msb->page_size);
 
 	if (msb->read_only)
@@ -1045,11 +1050,12 @@ static int msb_write_block(struct msb_data *msb,
 		error = msb_run_state_machine(msb, h_msb_write_block);
 
 		/* Sector we just wrote to is assumed erased since its pba
-			was erased. If it wasn't erased, write will succeed
-			and will just clear the bits that were set in the block
-			thus test that what we have written,
-			matches what we expect.
-			We do trust the blocks that we erased */
+		 * was erased. If it wasn't erased, write will succeed
+		 * and will just clear the bits that were set in the block
+		 * thus test that what we have written,
+		 * matches what we expect.
+		 * We do trust the blocks that we erased
+		 */
 		if (!error && (verify_writes ||
 				!test_bit(pba, msb->erased_blocks_bitmap)))
 			error = msb_verify_block(msb, pba, sg, offset);
@@ -1493,6 +1499,7 @@ static int msb_ftl_scan(struct msb_data *msb)
 static void msb_cache_flush_timer(struct timer_list *t)
 {
 	struct msb_data *msb = from_timer(msb, t, cache_flush_timer);
+
 	msb->need_flush_cache = true;
 	queue_work(msb->io_queue, &msb->io_work);
 }
@@ -1673,7 +1680,8 @@ static int msb_cache_read(struct msb_data *msb, int lba,
  * This table content isn't that importaint,
  * One could put here different values, providing that they still
  * cover whole disk.
- * 64 MB entry is what windows reports for my 64M memstick */
+ * 64 MB entry is what windows reports for my 64M memstick
+ */
 
 static const struct chs_entry chs_table[] = {
 /*        size sectors cylynders  heads */
@@ -1706,8 +1714,9 @@ static int msb_init_card(struct memstick_dev *card)
 		return error;
 
 	/* Due to a bug in Jmicron driver written by Alex Dubov,
-	 its serial mode barely works,
-	 so we switch to parallel mode right away */
+	 * its serial mode barely works,
+	 * so we switch to parallel mode right away
+	 */
 	if (host->caps & MEMSTICK_CAP_PAR4)
 		msb_switch_to_parallel(msb);
 
@@ -2033,6 +2042,7 @@ static blk_status_t msb_queue_rq(struct blk_mq_hw_ctx *hctx,
 static int msb_check_card(struct memstick_dev *card)
 {
 	struct msb_data *msb = memstick_get_drvdata(card);
+
 	return (msb->card_dead == 0);
 }
 
@@ -2333,6 +2343,7 @@ static struct memstick_driver msb_driver = {
 static int __init msb_init(void)
 {
 	int rc = memstick_register_driver(&msb_driver);
+
 	if (rc)
 		pr_err("failed to register memstick driver (error %d)\n", rc);
 
-- 
GitLab


From 2f9ae69e5267f53e89e296fccee291975a85f0eb Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Sat, 8 May 2021 10:03:21 +0800
Subject: [PATCH 2793/3804] mmc: usdhi6rol0: fix error return code in
 usdhi6_probe()

Fix to return a negative error code from the error handling case instead
of 0, as done elsewhere in this function.

Fixes: 75fa9ea6e3c0 ("mmc: add a driver for the Renesas usdhi6rol0 SD/SDIO host controller")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20210508020321.1677-1-thunder.leizhen@huawei.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/usdhi6rol0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
index 615f3d008af1e..b9b79b1089a00 100644
--- a/drivers/mmc/host/usdhi6rol0.c
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -1801,6 +1801,7 @@ static int usdhi6_probe(struct platform_device *pdev)
 
 	version = usdhi6_read(host, USDHI6_VERSION);
 	if ((version & 0xfff) != 0xa0d) {
+		ret = -EPERM;
 		dev_err(dev, "Version not recognized %x\n", version);
 		goto e_clk_off;
 	}
-- 
GitLab


From d03be8c1c7de2ae9ebdcc34b11f9089e2349709d Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 17:33:29 +0800
Subject: [PATCH 2794/3804] mmc: jz4740: Remove redundant error printing in
 jz4740_mmc_probe()

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20210511093329.4670-1-thunder.leizhen@huawei.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/jz4740_mmc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index b3c636edbb461..861ff6d9661a9 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -1013,7 +1013,6 @@ static int jz4740_mmc_probe(struct platform_device* pdev)
 	host->base = devm_ioremap_resource(&pdev->dev, host->mem_res);
 	if (IS_ERR(host->base)) {
 		ret = PTR_ERR(host->base);
-		dev_err(&pdev->dev, "Failed to ioremap base memory\n");
 		goto err_free_host;
 	}
 
-- 
GitLab


From 110a8688c6cd11e81a1805d5dc24a7a6b5d86a18 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 10 May 2021 14:13:21 +0200
Subject: [PATCH 2795/3804] dt-bindings: mmc: renesas,mmcif: Convert to
 json-schema

Convert the Renesas Multi Media Card Interface (MMCIF) Device Tree
binding documentation to json-schema.

Document missing properties.
Update the example to match reality.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/04b97315fed0f4f512356b68f9f5bb6ed7adc41f.1620648698.git.geert+renesas@glider.be
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../devicetree/bindings/mmc/renesas,mmcif.txt |  53 -------
 .../bindings/mmc/renesas,mmcif.yaml           | 135 ++++++++++++++++++
 2 files changed, 135 insertions(+), 53 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
 create mode 100644 Documentation/devicetree/bindings/mmc/renesas,mmcif.yaml

diff --git a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
deleted file mode 100644
index 291532ac0446f..0000000000000
--- a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-* Renesas Multi Media Card Interface (MMCIF) Controller
-
-This file documents differences between the core properties in mmc.txt
-and the properties used by the MMCIF device.
-
-
-Required properties:
-
-- compatible: should be "renesas,mmcif-<soctype>", "renesas,sh-mmcif" as a
-  fallback. Examples with <soctype> are:
-	- "renesas,mmcif-r7s72100" for the MMCIF found in r7s72100 SoCs
-	- "renesas,mmcif-r8a73a4" for the MMCIF found in r8a73a4 SoCs
-	- "renesas,mmcif-r8a7740" for the MMCIF found in r8a7740 SoCs
-	- "renesas,mmcif-r8a7742" for the MMCIF found in r8a7742 SoCs
-	- "renesas,mmcif-r8a7743" for the MMCIF found in r8a7743 SoCs
-	- "renesas,mmcif-r8a7744" for the MMCIF found in r8a7744 SoCs
-	- "renesas,mmcif-r8a7745" for the MMCIF found in r8a7745 SoCs
-	- "renesas,mmcif-r8a7778" for the MMCIF found in r8a7778 SoCs
-	- "renesas,mmcif-r8a7790" for the MMCIF found in r8a7790 SoCs
-	- "renesas,mmcif-r8a7791" for the MMCIF found in r8a7791 SoCs
-	- "renesas,mmcif-r8a7793" for the MMCIF found in r8a7793 SoCs
-	- "renesas,mmcif-r8a7794" for the MMCIF found in r8a7794 SoCs
-	- "renesas,mmcif-sh73a0" for the MMCIF found in sh73a0 SoCs
-
-- interrupts: Some SoCs have only 1 shared interrupt, while others have either
-  2 or 3 individual interrupts (error, int, card detect). Below is the number
-  of interrupts for each SoC:
-    1: r8a73a4, r8a7742, r8a7743, r8a7744, r8a7745, r8a7778, r8a7790, r8a7791,
-       r8a7793, r8a7794
-    2: r8a7740, sh73a0
-    3: r7s72100
-
-- clocks: reference to the functional clock
-
-- dmas: reference to the DMA channels, one per channel name listed in the
-  dma-names property.
-- dma-names: must contain "tx" for the transmit DMA channel and "rx" for the
-  receive DMA channel.
-- max-frequency: Maximum operating clock frequency, driver uses default clock
-  frequency if it is not set.
-
-
-Example: R8A7790 (R-Car H2) MMCIF0
-
-	mmcif0: mmc@ee200000 {
-		compatible = "renesas,mmcif-r8a7790", "renesas,sh-mmcif";
-		reg = <0 0xee200000 0 0x80>;
-		interrupts = <0 169 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp3_clks R8A7790_CLK_MMCIF0>;
-		dmas = <&dmac0 0xd1>, <&dmac0 0xd2>;
-		dma-names = "tx", "rx";
-		max-frequency = <97500000>;
-	};
diff --git a/Documentation/devicetree/bindings/mmc/renesas,mmcif.yaml b/Documentation/devicetree/bindings/mmc/renesas,mmcif.yaml
new file mode 100644
index 0000000000000..c36ba561c3875
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/renesas,mmcif.yaml
@@ -0,0 +1,135 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mmc/renesas,mmcif.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Multi Media Card Interface (MMCIF) Controller
+
+maintainers:
+  - Wolfram Sang <wsa+renesas@sang-engineering.com>
+
+allOf:
+  - $ref: "mmc-controller.yaml"
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,mmcif-r7s72100 # RZ/A1H
+          - renesas,mmcif-r8a73a4  # R-Mobile APE6
+          - renesas,mmcif-r8a7740  # R-Mobile A1
+          - renesas,mmcif-r8a7742  # RZ/G1H
+          - renesas,mmcif-r8a7743  # RZ/G1M
+          - renesas,mmcif-r8a7744  # RZ/G1N
+          - renesas,mmcif-r8a7745  # RZ/G1E
+          - renesas,mmcif-r8a7778  # R-Car M1A
+          - renesas,mmcif-r8a7790  # R-Car H2
+          - renesas,mmcif-r8a7791  # R-Car M2-W
+          - renesas,mmcif-r8a7793  # R-Car M2-N
+          - renesas,mmcif-r8a7794  # R-Car E2
+          - renesas,mmcif-sh73a0   # SH-Mobile AG5
+      - const: renesas,sh-mmcif
+
+  reg:
+    maxItems: 1
+
+  interrupts: true
+
+  clocks:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  dmas:
+    minItems: 2
+    maxItems: 4
+    description:
+      Must contain a list of pairs of references to DMA specifiers, one for
+      transmission, and one for reception.
+
+  dma-names:
+    minItems: 2
+    maxItems: 4
+    items:
+      enum:
+        - tx
+        - rx
+
+  max-frequency: true
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - power-domains
+
+if:
+  properties:
+    compatible:
+      contains:
+        const: renesas,mmcif-r7s72100
+then:
+  properties:
+    interrupts:
+      items:
+        - description: Error interrupt
+        - description: Normal operation interrupt
+        - description: Card detection interrupt
+else:
+  if:
+    properties:
+      compatible:
+        contains:
+          enum:
+            - renesas,mmcif-r8a7740
+            - renesas,mmcif-sh73a0
+  then:
+    properties:
+      interrupts:
+        items:
+          - description: Error interrupt
+          - description: Normal operation interrupt
+  else:
+    if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - renesas,mmcif-r8a73a4
+              - renesas,mmcif-r8a7778
+    then:
+      properties:
+        interrupts:
+          maxItems: 1
+    else:
+      properties:
+        interrupts:
+          maxItems: 1
+      required:
+        - resets
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a7790-cpg-mssr.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/r8a7790-sysc.h>
+
+    mmcif0: mmc@ee200000 {
+            compatible = "renesas,mmcif-r8a7790", "renesas,sh-mmcif";
+            reg = <0xee200000 0x80>;
+            interrupts = <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&cpg CPG_MOD 315>;
+            power-domains = <&sysc R8A7790_PD_ALWAYS_ON>;
+            resets = <&cpg 315>;
+            dmas = <&dmac0 0xd1>, <&dmac0 0xd2>, <&dmac1 0xd1>, <&dmac1 0xd2>;
+            dma-names = "tx", "rx", "tx", "rx";
+            max-frequency = <97500000>;
+    };
-- 
GitLab


From 34dd3ccccab0b93ebdf7ecde138814d121f72e98 Mon Sep 17 00:00:00 2001
From: Ben Chuang <benchuanggli@gmail.com>
Date: Tue, 11 May 2021 14:18:35 +0800
Subject: [PATCH 2796/3804] mmc: sdhci-pci-gli: Fine tune GL9763E L1 entry
 delay

Fine tune the value to 21us in order to improve read/write performance.

Signed-off-by: Ben Chuang <benchuanggli@gmail.com>
Link: https://lore.kernel.org/r/20210511061835.5559-1-benchuanggli@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-gli.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index 061618aa247f5..4fd99c1e82ba3 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -94,7 +94,7 @@
 
 #define PCIE_GLI_9763E_CFG2      0x8A4
 #define   GLI_9763E_CFG2_L1DLY     GENMASK(28, 19)
-#define   GLI_9763E_CFG2_L1DLY_MID 0x50
+#define   GLI_9763E_CFG2_L1DLY_MID 0x54
 
 #define PCIE_GLI_9763E_MMC_CTRL  0x960
 #define   GLI_9763E_HS400_SLOW     BIT(3)
@@ -847,7 +847,7 @@ static void gli_set_gl9763e(struct sdhci_pci_slot *slot)
 
 	pci_read_config_dword(pdev, PCIE_GLI_9763E_CFG2, &value);
 	value &= ~GLI_9763E_CFG2_L1DLY;
-	/* set ASPM L1 entry delay to 20us */
+	/* set ASPM L1 entry delay to 21us */
 	value |= FIELD_PREP(GLI_9763E_CFG2_L1DLY, GLI_9763E_CFG2_L1DLY_MID);
 	pci_write_config_dword(pdev, PCIE_GLI_9763E_CFG2, value);
 
-- 
GitLab


From 130206a615a9831a65e186484a5a332f9f6d29c8 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 11 May 2021 12:13:59 +0200
Subject: [PATCH 2797/3804] mmc: core: Add support for cache ctrl for SD cards

In SD spec v6.x the SD function extension registers for performance
enhancements were introduced. As a part of this an optional internal cache
on the SD card, can be used to improve performance.

The let the SD card use the cache, the host needs to enable it and manage
flushing of the cache, so let's add support for this.

Note that for an SD card supporting the cache it's mandatory for it, to
also support the poweroff notification feature. According to the SD spec,
if the cache has been enabled and a poweroff notification is sent to the
card, that implicitly also means that the card should flush its internal
cache. Therefore, dealing with cache flushing for REQ_OP_FLUSH block
requests is sufficient.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Avri Altman <avri.altman@wdc.com>
Link: https://lore.kernel.org/r/20210511101359.83521-1-ulf.hansson@linaro.org
---
 drivers/mmc/core/mmc_ops.c |   1 +
 drivers/mmc/core/mmc_ops.h |   1 +
 drivers/mmc/core/sd.c      | 100 +++++++++++++++++++++++++++++++++++++
 include/linux/mmc/card.h   |   1 +
 4 files changed, 103 insertions(+)

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index af423acc4c885..3c58f6d0f4821 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -456,6 +456,7 @@ static int mmc_busy_cb(void *cb_data, bool *busy)
 		err = R1_STATUS(status) ? -EIO : 0;
 		break;
 	case MMC_BUSY_HPI:
+	case MMC_BUSY_EXTR_SINGLE:
 		break;
 	default:
 		err = -EINVAL;
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index c3c1d9c2577e6..41ab4f573a310 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -14,6 +14,7 @@ enum mmc_busy_cmd {
 	MMC_BUSY_CMD6,
 	MMC_BUSY_ERASE,
 	MMC_BUSY_HPI,
+	MMC_BUSY_EXTR_SINGLE,
 };
 
 struct mmc_host;
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index bd40c682d2644..781c1e24308c8 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -67,6 +67,7 @@ static const unsigned int sd_au_size[] = {
 	})
 
 #define SD_POWEROFF_NOTIFY_TIMEOUT_MS 2000
+#define SD_WRITE_EXTR_SINGLE_TIMEOUT_MS 1000
 
 struct sd_busy_data {
 	struct mmc_card *card;
@@ -1287,6 +1288,96 @@ out:
 	return err;
 }
 
+static bool sd_cache_enabled(struct mmc_host *host)
+{
+	return host->card->ext_perf.feature_enabled & SD_EXT_PERF_CACHE;
+}
+
+static int sd_flush_cache(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+	u8 *reg_buf, fno, page;
+	u16 offset;
+	int err;
+
+	if (!sd_cache_enabled(host))
+		return 0;
+
+	reg_buf = kzalloc(512, GFP_KERNEL);
+	if (!reg_buf)
+		return -ENOMEM;
+
+	/*
+	 * Set Flush Cache at bit 0 in the performance enhancement register at
+	 * 261 bytes offset.
+	 */
+	fno = card->ext_perf.fno;
+	page = card->ext_perf.page;
+	offset = card->ext_perf.offset + 261;
+
+	err = sd_write_ext_reg(card, fno, page, offset, BIT(0));
+	if (err) {
+		pr_warn("%s: error %d writing Cache Flush bit\n",
+			mmc_hostname(host), err);
+		goto out;
+	}
+
+	err = mmc_poll_for_busy(card, SD_WRITE_EXTR_SINGLE_TIMEOUT_MS, false,
+				MMC_BUSY_EXTR_SINGLE);
+	if (err)
+		goto out;
+
+	/*
+	 * Read the Flush Cache bit. The card shall reset it, to confirm that
+	 * it's has completed the flushing of the cache.
+	 */
+	err = sd_read_ext_reg(card, fno, page, offset, 1, reg_buf);
+	if (err) {
+		pr_warn("%s: error %d reading Cache Flush bit\n",
+			mmc_hostname(host), err);
+		goto out;
+	}
+
+	if (reg_buf[0] & BIT(0))
+		err = -ETIMEDOUT;
+out:
+	kfree(reg_buf);
+	return err;
+}
+
+static int sd_enable_cache(struct mmc_card *card)
+{
+	u8 *reg_buf;
+	int err;
+
+	card->ext_perf.feature_enabled &= ~SD_EXT_PERF_CACHE;
+
+	reg_buf = kzalloc(512, GFP_KERNEL);
+	if (!reg_buf)
+		return -ENOMEM;
+
+	/*
+	 * Set Cache Enable at bit 0 in the performance enhancement register at
+	 * 260 bytes offset.
+	 */
+	err = sd_write_ext_reg(card, card->ext_perf.fno, card->ext_perf.page,
+			       card->ext_perf.offset + 260, BIT(0));
+	if (err) {
+		pr_warn("%s: error %d writing Cache Enable bit\n",
+			mmc_hostname(card->host), err);
+		goto out;
+	}
+
+	err = mmc_poll_for_busy(card, SD_WRITE_EXTR_SINGLE_TIMEOUT_MS, false,
+				MMC_BUSY_EXTR_SINGLE);
+	if (!err)
+		card->ext_perf.feature_enabled |= SD_EXT_PERF_CACHE;
+
+out:
+	kfree(reg_buf);
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -1442,6 +1533,13 @@ retry:
 			goto free_card;
 	}
 
+	/* Enable internal SD cache if supported. */
+	if (card->ext_perf.feature_support & SD_EXT_PERF_CACHE) {
+		err = sd_enable_cache(card);
+		if (err)
+			goto free_card;
+	}
+
 	if (host->cqe_ops && !host->cqe_enabled) {
 		err = host->cqe_ops->cqe_enable(host, card);
 		if (!err) {
@@ -1694,6 +1792,8 @@ static const struct mmc_bus_ops mmc_sd_ops = {
 	.alive = mmc_sd_alive,
 	.shutdown = mmc_sd_suspend,
 	.hw_reset = mmc_sd_hw_reset,
+	.cache_enabled = sd_cache_enabled,
+	.flush_cache = sd_flush_cache,
 };
 
 /*
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 2867af0635f8b..74e6c0624d277 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -196,6 +196,7 @@ struct sd_ext_reg {
 	u8			page;
 	u16			offset;
 	u8			rev;
+	u8			feature_enabled;
 	u8			feature_support;
 /* Power Management Function. */
 #define SD_EXT_POWER_OFF_NOTIFY	(1<<0)
-- 
GitLab


From 4d895de3505f7eb9734f679a340c976f8949ab43 Mon Sep 17 00:00:00 2001
From: Lucas Stach <dev@lynxeye.de>
Date: Mon, 10 May 2021 21:03:58 +0200
Subject: [PATCH 2798/3804] dt-bindings: mmc: add no-mmc-hs400 flag

HS400 requires a data strobe line in addition to the usual MMC signal
lines. If a board design neglects to wire up this signal, HS400 mode is
not available, even if both the controller and the eMMC are claiming to
support this mode. Add a DT flag to allow boards to disable the HS400
support in this case.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210510190400.105162-1-l.stach@pengutronix.de
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/mmc-controller.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
index e141330c11147..ac80d09df3a9c 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
@@ -220,6 +220,11 @@ properties:
     description:
       eMMC HS400 enhanced strobe mode is supported
 
+  no-mmc-hs400:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      All eMMC HS400 modes are not supported.
+
   dsr:
     description:
       Value the card Driver Stage Register (DSR) should be programmed
-- 
GitLab


From 2991ad76d2537a4ebe7132d087cdbc76377da302 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 10 May 2021 21:03:59 +0200
Subject: [PATCH 2799/3804] mmc: sdhci-esdhc-imx: advertise HS400 mode through
 MMC caps

Instead of having an indirection through the SDHCI layer and emulating
a capability bit, that isn't there in hardware, do the same same thing
as with HS400_ES and advertise the support for HS400 directly through
the MMC caps.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://lore.kernel.org/r/20210510190400.105162-2-l.stach@pengutronix.de
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index b991cf0e60c5b..fba4a963d12e2 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -427,9 +427,6 @@ static u32 esdhc_readl_le(struct sdhci_host *host, int reg)
 					| FIELD_PREP(SDHCI_RETUNING_MODE_MASK,
 						     SDHCI_TUNING_MODE_3);
 
-			if (imx_data->socdata->flags & ESDHC_FLAG_HS400)
-				val |= SDHCI_SUPPORT_HS400;
-
 			/*
 			 * Do not advertise faster UHS modes if there are no
 			 * pinctrl states for 100MHz/200MHz.
@@ -1591,7 +1588,7 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 		host->quirks |= SDHCI_QUIRK_BROKEN_ADMA;
 
 	if (imx_data->socdata->flags & ESDHC_FLAG_HS400)
-		host->quirks2 |= SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400;
+		host->mmc->caps2 |= MMC_CAP2_HS400;
 
 	if (imx_data->socdata->flags & ESDHC_FLAG_BROKEN_AUTO_CMD23)
 		host->quirks2 |= SDHCI_QUIRK2_ACMD23_BROKEN;
-- 
GitLab


From f722e650d965307f8c4c24cf4edc166b6abe9dc6 Mon Sep 17 00:00:00 2001
From: Lucas Stach <dev@lynxeye.de>
Date: Mon, 10 May 2021 21:04:00 +0200
Subject: [PATCH 2800/3804] mmc: core: add support for disabling HS400 mode via
 DT

On some boards the data strobe line isn't wired up, rendering HS400
support broken, even if both the controller and the eMMC claim to
support it. Allow to disable HS400 mode via DT.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
Link: https://lore.kernel.org/r/20210510190400.105162-3-l.stach@pengutronix.de
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/host.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 0b0577990ddc9..eda4a1892c33c 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -388,6 +388,9 @@ int mmc_of_parse(struct mmc_host *host)
 		host->caps2 |= MMC_CAP2_NO_SD;
 	if (device_property_read_bool(dev, "no-mmc"))
 		host->caps2 |= MMC_CAP2_NO_MMC;
+	if (device_property_read_bool(dev, "no-mmc-hs400"))
+		host->caps2 &= ~(MMC_CAP2_HS400_1_8V | MMC_CAP2_HS400_1_2V |
+				 MMC_CAP2_HS400_ES);
 
 	/* Must be after "non-removable" check */
 	if (device_property_read_u32(dev, "fixed-emmc-driver-type", &drv_type) == 0) {
-- 
GitLab


From 42933c8aa14be1caa9eda41f65cde8a3a95d3e39 Mon Sep 17 00:00:00 2001
From: Tong Zhang <ztong0001@gmail.com>
Date: Tue, 11 May 2021 12:39:45 -0400
Subject: [PATCH 2801/3804] memstick: rtsx_usb_ms: fix UAF

This patch fixes the following issues:
1. memstick_free_host() will free the host, so the use of ms_dev(host) after
it will be a problem. To fix this, move memstick_free_host() after when we
are done with ms_dev(host).
2. In rtsx_usb_ms_drv_remove(), pm need to be disabled before we remove
and free host otherwise memstick_check will be called and UAF will
happen.

[   11.351173] BUG: KASAN: use-after-free in rtsx_usb_ms_drv_remove+0x94/0x140 [rtsx_usb_ms]
[   11.357077]  rtsx_usb_ms_drv_remove+0x94/0x140 [rtsx_usb_ms]
[   11.357376]  platform_remove+0x2a/0x50
[   11.367531] Freed by task 298:
[   11.368537]  kfree+0xa4/0x2a0
[   11.368711]  device_release+0x51/0xe0
[   11.368905]  kobject_put+0xa2/0x120
[   11.369090]  rtsx_usb_ms_drv_remove+0x8c/0x140 [rtsx_usb_ms]
[   11.369386]  platform_remove+0x2a/0x50

[   12.038408] BUG: KASAN: use-after-free in __mutex_lock.isra.0+0x3ec/0x7c0
[   12.045432]  mutex_lock+0xc9/0xd0
[   12.046080]  memstick_check+0x6a/0x578 [memstick]
[   12.046509]  process_one_work+0x46d/0x750
[   12.052107] Freed by task 297:
[   12.053115]  kfree+0xa4/0x2a0
[   12.053272]  device_release+0x51/0xe0
[   12.053463]  kobject_put+0xa2/0x120
[   12.053647]  rtsx_usb_ms_drv_remove+0xc4/0x140 [rtsx_usb_ms]
[   12.053939]  platform_remove+0x2a/0x50

Signed-off-by: Tong Zhang <ztong0001@gmail.com>
Co-developed-by: Ulf Hansson <ulf.hansson@linaro.org>
Link: https://lore.kernel.org/r/20210511163944.1233295-1-ztong0001@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/memstick/host/rtsx_usb_ms.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c
index 102dbb8080da5..29271ad4728a2 100644
--- a/drivers/memstick/host/rtsx_usb_ms.c
+++ b/drivers/memstick/host/rtsx_usb_ms.c
@@ -799,9 +799,9 @@ static int rtsx_usb_ms_drv_probe(struct platform_device *pdev)
 
 	return 0;
 err_out:
-	memstick_free_host(msh);
 	pm_runtime_disable(ms_dev(host));
 	pm_runtime_put_noidle(ms_dev(host));
+	memstick_free_host(msh);
 	return err;
 }
 
@@ -828,9 +828,6 @@ static int rtsx_usb_ms_drv_remove(struct platform_device *pdev)
 	}
 	mutex_unlock(&host->host_mutex);
 
-	memstick_remove_host(msh);
-	memstick_free_host(msh);
-
 	/* Balance possible unbalanced usage count
 	 * e.g. unconditional module removal
 	 */
@@ -838,10 +835,11 @@ static int rtsx_usb_ms_drv_remove(struct platform_device *pdev)
 		pm_runtime_put(ms_dev(host));
 
 	pm_runtime_disable(ms_dev(host));
-	platform_set_drvdata(pdev, NULL);
-
+	memstick_remove_host(msh);
 	dev_dbg(ms_dev(host),
 		": Realtek USB Memstick controller has been removed\n");
+	memstick_free_host(msh);
+	platform_set_drvdata(pdev, NULL);
 
 	return 0;
 }
-- 
GitLab


From 8931acce6b771dfe01d23e6d36e0b09f717c90c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20F=C3=A4rber?= <afaerber@suse.de>
Date: Mon, 17 May 2021 01:05:48 +0200
Subject: [PATCH 2802/3804] dt-bindings: mmc: rockchip-dw-mshc: Add Rockchip
 RK1808
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a compatible string for Rockchip RK1808 SoC.

Signed-off-by: Andreas Färber <afaerber@suse.de>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210516230551.12469-7-afaerber@suse.de
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml
index eaa3b0ef24f65..54fb59820d2b0 100644
--- a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml
+++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.yaml
@@ -30,6 +30,7 @@ properties:
       - items:
           - enum:
               - rockchip,px30-dw-mshc
+              - rockchip,rk1808-dw-mshc
               - rockchip,rk3036-dw-mshc
               - rockchip,rk3228-dw-mshc
               - rockchip,rk3308-dw-mshc
-- 
GitLab


From 151071351bb6f3d1861e99a22c4cebadf81911a0 Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Mon, 17 May 2021 18:09:00 +0800
Subject: [PATCH 2803/3804] mmc: mediatek: use data instead of mrq parameter
 from msdc_{un}prepare_data()

We already have 'mrq->data' before calling these two functions, no
need to find it again via 'mrq->data' internally. Also remove local
data variable accordingly.

Signed-off-by: Yue Hu <huyue2@yulong.com>
Link: https://lore.kernel.org/r/20210517100900.1620-1-zbestahu@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mtk-sd.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 898ed1b023df6..c4db944ad9bee 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -724,10 +724,8 @@ static inline void msdc_dma_setup(struct msdc_host *host, struct msdc_dma *dma,
 	writel(lower_32_bits(dma->gpd_addr), host->base + MSDC_DMA_SA);
 }
 
-static void msdc_prepare_data(struct msdc_host *host, struct mmc_request *mrq)
+static void msdc_prepare_data(struct msdc_host *host, struct mmc_data *data)
 {
-	struct mmc_data *data = mrq->data;
-
 	if (!(data->host_cookie & MSDC_PREPARE_FLAG)) {
 		data->host_cookie |= MSDC_PREPARE_FLAG;
 		data->sg_count = dma_map_sg(host->dev, data->sg, data->sg_len,
@@ -735,10 +733,8 @@ static void msdc_prepare_data(struct msdc_host *host, struct mmc_request *mrq)
 	}
 }
 
-static void msdc_unprepare_data(struct msdc_host *host, struct mmc_request *mrq)
+static void msdc_unprepare_data(struct msdc_host *host, struct mmc_data *data)
 {
-	struct mmc_data *data = mrq->data;
-
 	if (data->host_cookie & MSDC_ASYNC_FLAG)
 		return;
 
@@ -1140,7 +1136,7 @@ static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq)
 
 	msdc_track_cmd_data(host, mrq->cmd, mrq->data);
 	if (mrq->data)
-		msdc_unprepare_data(host, mrq);
+		msdc_unprepare_data(host, mrq->data);
 	if (host->error)
 		msdc_reset_hw(host);
 	mmc_request_done(mmc_from_priv(host), mrq);
@@ -1311,7 +1307,7 @@ static void msdc_ops_request(struct mmc_host *mmc, struct mmc_request *mrq)
 	host->mrq = mrq;
 
 	if (mrq->data)
-		msdc_prepare_data(host, mrq);
+		msdc_prepare_data(host, mrq->data);
 
 	/* if SBC is required, we have HW option and SW option.
 	 * if HW option is enabled, and SBC does not have "special" flags,
@@ -1332,7 +1328,7 @@ static void msdc_pre_req(struct mmc_host *mmc, struct mmc_request *mrq)
 	if (!data)
 		return;
 
-	msdc_prepare_data(host, mrq);
+	msdc_prepare_data(host, data);
 	data->host_cookie |= MSDC_ASYNC_FLAG;
 }
 
@@ -1340,14 +1336,14 @@ static void msdc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
 		int err)
 {
 	struct msdc_host *host = mmc_priv(mmc);
-	struct mmc_data *data;
+	struct mmc_data *data = mrq->data;
 
-	data = mrq->data;
 	if (!data)
 		return;
+
 	if (data->host_cookie) {
 		data->host_cookie &= ~MSDC_ASYNC_FLAG;
-		msdc_unprepare_data(host, mrq);
+		msdc_unprepare_data(host, data);
 	}
 }
 
-- 
GitLab


From f0ed43edb4cb793f4d9909feef32510ed77ed35a Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Mon, 17 May 2021 18:54:32 +0800
Subject: [PATCH 2804/3804] mmc: mediatek: remove useless data parameter from
 msdc_data_xfer_next()

We do not use the 'data' in msdc_data_xfer_next().

Signed-off-by: Yue Hu <huyue2@yulong.com>
Link: https://lore.kernel.org/r/20210517105432.1682-1-zbestahu@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mtk-sd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index c4db944ad9bee..4dfc246c5f95d 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -1347,8 +1347,7 @@ static void msdc_post_req(struct mmc_host *mmc, struct mmc_request *mrq,
 	}
 }
 
-static void msdc_data_xfer_next(struct msdc_host *host,
-				struct mmc_request *mrq, struct mmc_data *data)
+static void msdc_data_xfer_next(struct msdc_host *host, struct mmc_request *mrq)
 {
 	if (mmc_op_multi(mrq->cmd->opcode) && mrq->stop && !mrq->stop->error &&
 	    !mrq->sbc)
@@ -1407,7 +1406,7 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
 				(int)data->error, data->bytes_xfered);
 		}
 
-		msdc_data_xfer_next(host, mrq, data);
+		msdc_data_xfer_next(host, mrq);
 		done = true;
 	}
 	return done;
-- 
GitLab


From 1e9daaf616a2f053eb80e20a84b47ebf2d5e20d3 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 19 May 2021 10:37:12 -0500
Subject: [PATCH 2805/3804] dt-bindings: mmc: Clean-up examples to match
 documented bindings

The "sdhci" compatible is not documented though used as a fallback in a
few cases. It is also not supported by a Linux driver. Just remove the
example as part of ridding examples of undocumented bindings.

The "brcm,bcm43xx-fmac" compatible is also not documented. Update the
example to use one of the correct ones, "brcm,bcm4329-fmac", instead and
use a device class based nodename.

Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: linux-mmc@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210519153712.3146025-1-robh@kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 .../bindings/mmc/mmc-controller.yaml          | 20 ++-----------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
index ac80d09df3a9c..25ac8e2009708 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
@@ -362,22 +362,6 @@ dependencies:
 additionalProperties: true
 
 examples:
-  - |
-    mmc@ab000000 {
-        compatible = "sdhci";
-        reg = <0xab000000 0x200>;
-        interrupts = <23>;
-        bus-width = <4>;
-        cd-gpios = <&gpio 69 0>;
-        cd-inverted;
-        wp-gpios = <&gpio 70 0>;
-        max-frequency = <50000000>;
-        keep-power-in-suspend;
-        wakeup-source;
-        mmc-pwrseq = <&sdhci0_pwrseq>;
-        clk-phase-sd-hs = <63>, <72>;
-    };
-
   - |
     mmc3: mmc@1c12000 {
         #address-cells = <1>;
@@ -390,9 +374,9 @@ examples:
         non-removable;
         mmc-pwrseq = <&sdhci0_pwrseq>;
 
-        brcmf: bcrmf@1 {
+        brcmf: wifi@1 {
             reg = <1>;
-            compatible = "brcm,bcm43xx-fmac";
+            compatible = "brcm,bcm4329-fmac";
             interrupt-parent = <&pio>;
             interrupts = <10 8>;
             interrupt-names = "host-wake";
-- 
GitLab


From 47d23c95fe0518b6e4cf7d7f6829987f377bd0d3 Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Thu, 20 May 2021 18:13:30 +0800
Subject: [PATCH 2806/3804] mmc: cqhci: fix typo

'descritors' -> 'descriptors'

Signed-off-by: Yue Hu <huyue2@yulong.com>
Link: https://lore.kernel.org/r/20210520101330.2255-1-zbestahu@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/cqhci-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c
index 93b0432bb6011..54f4f9ad249d0 100644
--- a/drivers/mmc/host/cqhci-core.c
+++ b/drivers/mmc/host/cqhci-core.c
@@ -146,7 +146,7 @@ static void cqhci_dumpregs(struct cqhci_host *cq_host)
 }
 
 /*
- * The allocated descriptor table for task, link & transfer descritors
+ * The allocated descriptor table for task, link & transfer descriptors
  * looks like:
  * |----------|
  * |task desc |  |->|----------|
-- 
GitLab


From 83c49302566ea7328e6962fb81ceada2c93486b9 Mon Sep 17 00:00:00 2001
From: Yue Hu <huyue2@yulong.com>
Date: Fri, 21 May 2021 11:44:32 +0800
Subject: [PATCH 2807/3804] mmc: cqhci: introduce get_trans_desc_offset()

The same calculation to get transfer descriptor offset is already used
at 3 different locations. Let's create a new helper to simplify code.

Signed-off-by: Yue Hu <huyue2@yulong.com>
Link: https://lore.kernel.org/r/20210521034432.2321-1-zbestahu@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/cqhci-core.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c
index 54f4f9ad249d0..38559a956330b 100644
--- a/drivers/mmc/host/cqhci-core.c
+++ b/drivers/mmc/host/cqhci-core.c
@@ -45,17 +45,23 @@ static inline u8 *get_link_desc(struct cqhci_host *cq_host, u8 tag)
 	return desc + cq_host->task_desc_len;
 }
 
+static inline size_t get_trans_desc_offset(struct cqhci_host *cq_host, u8 tag)
+{
+	return cq_host->trans_desc_len * cq_host->mmc->max_segs * tag;
+}
+
 static inline dma_addr_t get_trans_desc_dma(struct cqhci_host *cq_host, u8 tag)
 {
-	return cq_host->trans_desc_dma_base +
-		(cq_host->mmc->max_segs * tag *
-		 cq_host->trans_desc_len);
+	size_t offset = get_trans_desc_offset(cq_host, tag);
+
+	return cq_host->trans_desc_dma_base + offset;
 }
 
 static inline u8 *get_trans_desc(struct cqhci_host *cq_host, u8 tag)
 {
-	return cq_host->trans_desc_base +
-		(cq_host->trans_desc_len * cq_host->mmc->max_segs * tag);
+	size_t offset = get_trans_desc_offset(cq_host, tag);
+
+	return cq_host->trans_desc_base + offset;
 }
 
 static void setup_trans_desc(struct cqhci_host *cq_host, u8 tag)
@@ -194,8 +200,7 @@ static int cqhci_host_alloc_tdl(struct cqhci_host *cq_host)
 
 	cq_host->desc_size = cq_host->slot_sz * cq_host->num_slots;
 
-	cq_host->data_size = cq_host->trans_desc_len * cq_host->mmc->max_segs *
-		cq_host->mmc->cqe_qdepth;
+	cq_host->data_size = get_trans_desc_offset(cq_host, cq_host->mmc->cqe_qdepth);
 
 	pr_debug("%s: cqhci: desc_size: %zu data_sz: %zu slot-sz: %d\n",
 		 mmc_hostname(cq_host->mmc), cq_host->desc_size, cq_host->data_size,
-- 
GitLab


From 05335af1e82a3ce2a7d410c7b1695a3c4ec37ea9 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Thu, 20 May 2021 20:21:44 +0800
Subject: [PATCH 2808/3804] mmc: core: Use pm_runtime_resume_and_get() to
 replace open coding

use pm_runtime_resume_and_get() to replace pm_runtime_get_sync and
pm_runtime_put_noidle. this change is just to simplify the code, no
actual functional changes

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Link: https://lore.kernel.org/r/1621513304-27824-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/sdio.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 3eb94ac2712e7..68edf7a615be5 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -937,11 +937,9 @@ static void mmc_sdio_detect(struct mmc_host *host)
 
 	/* Make sure card is powered before detecting it */
 	if (host->caps & MMC_CAP_POWER_OFF_CARD) {
-		err = pm_runtime_get_sync(&host->card->dev);
-		if (err < 0) {
-			pm_runtime_put_noidle(&host->card->dev);
+		err = pm_runtime_resume_and_get(&host->card->dev);
+		if (err < 0)
 			goto out;
-		}
 	}
 
 	mmc_claim_host(host);
-- 
GitLab


From 07e70346e21eb8f018c8478cc4881ad9026bb12e Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Fri, 21 May 2021 08:59:35 +0800
Subject: [PATCH 2809/3804] mmc: sdhci_am654: Use pm_runtime_resume_and_get()
 to replace open coding

use pm_runtime_resume_and_get() to replace pm_runtime_get_sync and
pm_runtime_put_noidle. this change is just to simplify the code, no
actual functional changes.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Link: https://lore.kernel.org/r/1621558775-31185-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci_am654.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
index 1fad6e442688f..f654afbe8e83c 100644
--- a/drivers/mmc/host/sdhci_am654.c
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -809,11 +809,9 @@ static int sdhci_am654_probe(struct platform_device *pdev)
 
 	/* Clocks are enabled using pm_runtime */
 	pm_runtime_enable(dev);
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0) {
-		pm_runtime_put_noidle(dev);
+	ret = pm_runtime_resume_and_get(dev);
+	if (ret)
 		goto pm_runtime_disable;
-	}
 
 	base = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(base)) {
-- 
GitLab


From 809ae4e1ca0a719db29ed747fc2febf099a77b9f Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Fri, 21 May 2021 09:02:45 +0800
Subject: [PATCH 2810/3804] mmc: sdhci-omap: Use pm_runtime_resume_and_get() to
 replace open coding

use pm_runtime_resume_and_get() to replace pm_runtime_get_sync and
pm_runtime_put_noidle. this change is just to simplify the code, no
actual functional changes.

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Link: https://lore.kernel.org/r/1621558965-34077-1-git-send-email-tiantao6@hisilicon.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-omap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 7893fd3599b61..8f4d1f003f656 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -1173,10 +1173,9 @@ static int sdhci_omap_probe(struct platform_device *pdev)
 	 * as part of pm_runtime_get_sync.
 	 */
 	pm_runtime_enable(dev);
-	ret = pm_runtime_get_sync(dev);
-	if (ret < 0) {
+	ret = pm_runtime_resume_and_get(dev);
+	if (ret) {
 		dev_err(dev, "pm_runtime_get_sync failed\n");
-		pm_runtime_put_noidle(dev);
 		goto err_rpm_disable;
 	}
 
-- 
GitLab


From 3c0bb3107703d2c58f7a0a7a2060bb57bc120326 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 21 May 2021 15:30:26 +0200
Subject: [PATCH 2811/3804] mmc: vub3000: fix control-request direction

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Fix the SET_ROM_WAIT_STATES request which erroneously used
usb_rcvctrlpipe().

Fixes: 88095e7b473a ("mmc: Add new VUB300 USB-to-SD/SDIO/MMC driver")
Cc: stable@vger.kernel.org      # 3.0
Signed-off-by: Johan Hovold <johan@kernel.org>
Link: https://lore.kernel.org/r/20210521133026.17296-1-johan@kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/vub300.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c
index 739cf63ef6e2f..4950d10d3a191 100644
--- a/drivers/mmc/host/vub300.c
+++ b/drivers/mmc/host/vub300.c
@@ -2279,7 +2279,7 @@ static int vub300_probe(struct usb_interface *interface,
 	if (retval < 0)
 		goto error5;
 	retval =
-		usb_control_msg(vub300->udev, usb_rcvctrlpipe(vub300->udev, 0),
+		usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0),
 				SET_ROM_WAIT_STATES,
 				USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
 				firmware_rom_wait_states, 0x0000, NULL, 0, HZ);
-- 
GitLab


From 7c45b2268e515b83772ff9c5d3db5f16ae3ca3bf Mon Sep 17 00:00:00 2001
From: Aviral Gupta <shiv14112001@gmail.com>
Date: Sun, 23 May 2021 21:29:15 +0530
Subject: [PATCH 2812/3804] mmc: core: Add a missing SPDX license header

Add the missing license header to drivers/mmc/core/block.c

Signed-off-by: Aviral Gupta <shiv14112001@gmail.com>
Link: https://lore.kernel.org/r/20210523155914.5200-1-shiv14112001@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index f85e107895da7..88f4c215caa6d 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Block driver for media (i.e., flash cards)
  *
-- 
GitLab


From dd646d982cf9b3b0c04c6f418bb91c275ce86336 Mon Sep 17 00:00:00 2001
From: Steven Lee <steven_lee@aspeedtech.com>
Date: Mon, 24 May 2021 15:32:56 +0800
Subject: [PATCH 2813/3804] mmc: sdhci-of-aspeed: Configure the SDHCIs as
 specified by the devicetree.

The hardware provides capability configuration registers for each SDHCI
in the global configuration space for the SD controller. Writes to the
global capability registers are mirrored to the capability registers in
the associated SDHCI. Configuration of the capabilities must be written
through the mirror registers prior to initialisation of the SDHCI.

Signed-off-by: Steven Lee <steven_lee@aspeedtech.com>
Reviewed-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20210524073308.9328-5-steven_lee@aspeedtech.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-aspeed.c | 48 ++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/drivers/mmc/host/sdhci-of-aspeed.c b/drivers/mmc/host/sdhci-of-aspeed.c
index d001c51074a06..65b5685f6c15e 100644
--- a/drivers/mmc/host/sdhci-of-aspeed.c
+++ b/drivers/mmc/host/sdhci-of-aspeed.c
@@ -31,6 +31,11 @@
 #define   ASPEED_SDC_S0_PHASE_OUT_EN	GENMASK(1, 0)
 #define   ASPEED_SDC_PHASE_MAX		31
 
+/* SDIO{10,20} */
+#define ASPEED_SDC_CAP1_1_8V           (0 * 32 + 26)
+/* SDIO{14,24} */
+#define ASPEED_SDC_CAP2_SDR104         (1 * 32 + 1)
+
 struct aspeed_sdc {
 	struct clk *clk;
 	struct resource *res;
@@ -72,6 +77,37 @@ struct aspeed_sdhci {
 	const struct aspeed_sdhci_phase_desc *phase_desc;
 };
 
+/*
+ * The function sets the mirror register for updating
+ * capbilities of the current slot.
+ *
+ *   slot | capability  | caps_reg | mirror_reg
+ *   -----|-------------|----------|------------
+ *     0  | CAP1_1_8V   | SDIO140  |   SDIO10
+ *     0  | CAP2_SDR104 | SDIO144  |   SDIO14
+ *     1  | CAP1_1_8V   | SDIO240  |   SDIO20
+ *     1  | CAP2_SDR104 | SDIO244  |   SDIO24
+ */
+static void aspeed_sdc_set_slot_capability(struct sdhci_host *host, struct aspeed_sdc *sdc,
+					   int capability, bool enable, u8 slot)
+{
+	u32 mirror_reg_offset;
+	u32 cap_val;
+	u8 cap_reg;
+
+	if (slot > 1)
+		return;
+
+	cap_reg = capability / 32;
+	cap_val = sdhci_readl(host, 0x40 + (cap_reg * 4));
+	if (enable)
+		cap_val |= BIT(capability % 32);
+	else
+		cap_val &= ~BIT(capability % 32);
+	mirror_reg_offset = ((slot + 1) * 0x10) + (cap_reg * 4);
+	writel(cap_val, sdc->regs + mirror_reg_offset);
+}
+
 static void aspeed_sdc_configure_8bit_mode(struct aspeed_sdc *sdc,
 					   struct aspeed_sdhci *sdhci,
 					   bool bus8)
@@ -328,6 +364,7 @@ static inline int aspeed_sdhci_calculate_slot(struct aspeed_sdhci *dev,
 static int aspeed_sdhci_probe(struct platform_device *pdev)
 {
 	const struct aspeed_sdhci_pdata *aspeed_pdata;
+	struct device_node *np = pdev->dev.of_node;
 	struct sdhci_pltfm_host *pltfm_host;
 	struct aspeed_sdhci *dev;
 	struct sdhci_host *host;
@@ -372,6 +409,17 @@ static int aspeed_sdhci_probe(struct platform_device *pdev)
 
 	sdhci_get_of_property(pdev);
 
+	if (of_property_read_bool(np, "mmc-hs200-1_8v") ||
+	    of_property_read_bool(np, "sd-uhs-sdr104")) {
+		aspeed_sdc_set_slot_capability(host, dev->parent, ASPEED_SDC_CAP1_1_8V,
+					       true, slot);
+	}
+
+	if (of_property_read_bool(np, "sd-uhs-sdr104")) {
+		aspeed_sdc_set_slot_capability(host, dev->parent, ASPEED_SDC_CAP2_SDR104,
+					       true, slot);
+	}
+
 	pltfm_host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pltfm_host->clk))
 		return PTR_ERR(pltfm_host->clk);
-- 
GitLab


From de905475bd4bd0805a33ba64b614d837e4bd292a Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 27 May 2021 21:42:26 +0200
Subject: [PATCH 2814/3804] mmc: dw_mmc-pltfm: Remove unused <linux/clk.h>

As of commit 4cdc2ec1da322776 ("mmc: dw_mmc: move rockchip related code
to a separate file"), dw_mmc-pltfm.c no longer uses the clock API.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reviewed-by: Jaehoon Chung <jh80.chung@samsung.com>
Link: https://lore.kernel.org/r/20210527194226.1705607-1-geert@linux-m68k.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc-pltfm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mmc/host/dw_mmc-pltfm.c b/drivers/mmc/host/dw_mmc-pltfm.c
index 73731cd3ba231..9901208be7973 100644
--- a/drivers/mmc/host/dw_mmc-pltfm.c
+++ b/drivers/mmc/host/dw_mmc-pltfm.c
@@ -17,7 +17,6 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/of.h>
-#include <linux/clk.h>
 
 #include "dw_mmc.h"
 #include "dw_mmc-pltfm.h"
-- 
GitLab


From 91445d5eed6b19d6d31506fb7c3f65e9acc175c5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 28 May 2021 14:21:26 +0300
Subject: [PATCH 2815/3804] mmc: mmc_spi: Drop duplicate 'mmc_spi' in the debug
 messages

dev_dbg() in any case prints the device and driver name, no need
to repeat this in (some) messages. Drop duplicates for good.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210528112127.71738-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mmc_spi.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 9776a03a10f5d..65c65bb5737fc 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -504,7 +504,7 @@ mmc_spi_command_send(struct mmc_spi_host *host,
 		/* else:  R1 (most commands) */
 	}
 
-	dev_dbg(&host->spi->dev, "  mmc_spi: CMD%d, resp %s\n",
+	dev_dbg(&host->spi->dev, "  CMD%d, resp %s\n",
 		cmd->opcode, maptype(cmd));
 
 	/* send command, leaving chipselect active */
@@ -928,8 +928,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
 		while (length) {
 			t->len = min(length, blk_size);
 
-			dev_dbg(&host->spi->dev,
-				"    mmc_spi: %s block, %d bytes\n",
+			dev_dbg(&host->spi->dev, "    %s block, %d bytes\n",
 				(direction == DMA_TO_DEVICE) ? "write" : "read",
 				t->len);
 
@@ -974,7 +973,7 @@ mmc_spi_data_do(struct mmc_spi_host *host, struct mmc_command *cmd,
 		int		tmp;
 		const unsigned	statlen = sizeof(scratch->status);
 
-		dev_dbg(&spi->dev, "    mmc_spi: STOP_TRAN\n");
+		dev_dbg(&spi->dev, "    STOP_TRAN\n");
 
 		/* Tweak the per-block message we set up earlier by morphing
 		 * it to hold single buffer with the token followed by some
@@ -1175,7 +1174,7 @@ static void mmc_spi_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 		canpower = host->pdata && host->pdata->setpower;
 
-		dev_dbg(&host->spi->dev, "mmc_spi: power %s (%d)%s\n",
+		dev_dbg(&host->spi->dev, "power %s (%d)%s\n",
 				mmc_powerstring(ios->power_mode),
 				ios->vdd,
 				canpower ? ", can switch" : "");
@@ -1248,8 +1247,7 @@ static void mmc_spi_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 		host->spi->max_speed_hz = ios->clock;
 		status = spi_setup(host->spi);
-		dev_dbg(&host->spi->dev,
-			"mmc_spi:  clock to %d Hz, %d\n",
+		dev_dbg(&host->spi->dev, "  clock to %d Hz, %d\n",
 			host->spi->max_speed_hz, status);
 	}
 }
-- 
GitLab


From 706998e70104d93d7e137b92ff0c216aee66c0dd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 28 May 2021 14:21:27 +0300
Subject: [PATCH 2816/3804] mmc: mmc_spi: Imply container_of() to be no-op

Since we don't use structure field layout randomization
the manual shuffling can affect some macros, in particular
container_of() against struct of_mmc_spi, which becomes
a no-op when pdata member is the first one in the structure.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210528112127.71738-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/of_mmc_spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/of_mmc_spi.c b/drivers/mmc/host/of_mmc_spi.c
index 9d480a05f6554..3629550528b61 100644
--- a/drivers/mmc/host/of_mmc_spi.c
+++ b/drivers/mmc/host/of_mmc_spi.c
@@ -22,8 +22,8 @@
 MODULE_LICENSE("GPL");
 
 struct of_mmc_spi {
-	int detect_irq;
 	struct mmc_spi_platform_data pdata;
+	int detect_irq;
 };
 
 static struct of_mmc_spi *to_of_mmc_spi(struct device *dev)
-- 
GitLab


From 09247e110b2efce3a104e57e887c373e0a57a412 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20L=C3=B6hle?= <CLoehle@hyperstone.com>
Date: Wed, 12 May 2021 16:03:24 +0000
Subject: [PATCH 2817/3804] mmc: core: Allow UHS-I voltage switch for SDSC
 cards if supported

While initializing an UHS-I SD card, the mmc core first tries to switch to
1.8V I/O voltage, before it continues to change the settings for the bus
speed mode.

However, the current behaviour in the mmc core is inconsistent and doesn't
conform to the SD spec. More precisely, an SD card that supports UHS-I must
set both the SD_OCR_CCS bit and the SD_OCR_S18R bit in the OCR register
response. When switching to 1.8V I/O the mmc core correctly checks both of
the bits, but only the SD_OCR_S18R bit when changing the settings for bus
speed mode.

Rather than actually fixing the code to confirm to the SD spec, let's
deliberately deviate from it by requiring only the SD_OCR_S18R bit for both
parts. This enables us to support UHS-I for SDSC cards (outside spec),
which is actually being supported by some existing SDSC cards. Moreover,
this fixes the inconsistent behaviour.

Signed-off-by: Christian Loehle <cloehle@hyperstone.com>
Link: https://lore.kernel.org/r/CWXP265MB26803AE79E0AD5ED083BF2A6C4529@CWXP265MB2680.GBRP265.PROD.OUTLOOK.COM
Cc: stable@vger.kernel.org
[Ulf: Rewrote commit message and comments to clarify the changes]
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/sd.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 781c1e24308c8..4646b7a03db6b 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -857,11 +857,13 @@ try_again:
 		return err;
 
 	/*
-	 * In case CCS and S18A in the response is set, start Signal Voltage
-	 * Switch procedure. SPI mode doesn't support CMD11.
+	 * In case the S18A bit is set in the response, let's start the signal
+	 * voltage switch procedure. SPI mode doesn't support CMD11.
+	 * Note that, according to the spec, the S18A bit is not valid unless
+	 * the CCS bit is set as well. We deliberately deviate from the spec in
+	 * regards to this, which allows UHS-I to be supported for SDSC cards.
 	 */
-	if (!mmc_host_is_spi(host) && rocr &&
-	   ((*rocr & 0x41000000) == 0x41000000)) {
+	if (!mmc_host_is_spi(host) && rocr && (*rocr & 0x01000000)) {
 		err = mmc_set_uhs_voltage(host, pocr);
 		if (err == -EAGAIN) {
 			retries--;
-- 
GitLab


From 15dd8dc9ffcca7f4f77bffef44dca26678489459 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 1 Jun 2021 11:54:02 +0200
Subject: [PATCH 2818/3804] mmc: sdhci-esdhc-imx: remove unused is_imx6q_usdhc

The is_imx6q_usdhc() function is unused:

  drivers/mmc/host/sdhci-esdhc-imx.c:343:19: warning: unused function 'is_imx6q_usdhc'

Reported-by: kernel test robot <lkp@intel.com>
Fixes: f47c4bbfa283 ("mmc: sdhci-esdhc-imx: create struct esdhc_soc_data")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Reviewed-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://lore.kernel.org/r/20210601095403.236007-1-krzysztof.kozlowski@canonical.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index fba4a963d12e2..7336ae7491377 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -324,11 +324,6 @@ static inline int is_imx53_esdhc(struct pltfm_imx_data *data)
 	return data->socdata == &esdhc_imx53_data;
 }
 
-static inline int is_imx6q_usdhc(struct pltfm_imx_data *data)
-{
-	return data->socdata == &usdhc_imx6q_data;
-}
-
 static inline int esdhc_is_usdhc(struct pltfm_imx_data *data)
 {
 	return !!(data->socdata->flags & ESDHC_FLAG_USDHC);
-- 
GitLab


From 961470820021e6f9d74db4837bd6831a1a30341b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Tue, 1 Jun 2021 11:54:03 +0200
Subject: [PATCH 2819/3804] mmc: sdhci-sprd: use sdhci_sprd_writew

The sdhci_sprd_writew() was defined by never used in sdhci_ops:

    drivers/mmc/host/sdhci-sprd.c:134:20: warning: unused function 'sdhci_sprd_writew'

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Link: https://lore.kernel.org/r/20210601095403.236007-2-krzysztof.kozlowski@canonical.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-sprd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c
index 5dc36efff47ff..11e375579cfb9 100644
--- a/drivers/mmc/host/sdhci-sprd.c
+++ b/drivers/mmc/host/sdhci-sprd.c
@@ -393,6 +393,7 @@ static void sdhci_sprd_request_done(struct sdhci_host *host,
 static struct sdhci_ops sdhci_sprd_ops = {
 	.read_l = sdhci_sprd_readl,
 	.write_l = sdhci_sprd_writel,
+	.write_w = sdhci_sprd_writew,
 	.write_b = sdhci_sprd_writeb,
 	.set_clock = sdhci_sprd_set_clock,
 	.get_max_clock = sdhci_sprd_get_max_clock,
-- 
GitLab


From 45c8ddd06c4b729c56a6083ab311bfbd9643f4a6 Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Thu, 3 Jun 2021 13:33:20 +0000
Subject: [PATCH 2820/3804] mmc: via-sdmmc: add a check against NULL pointer
 dereference

Before referencing 'host->data', the driver needs to check whether it is
null pointer, otherwise it will cause a null pointer reference.

This log reveals it:

[   29.355199] BUG: kernel NULL pointer dereference, address:
0000000000000014
[   29.357323] #PF: supervisor write access in kernel mode
[   29.357706] #PF: error_code(0x0002) - not-present page
[   29.358088] PGD 0 P4D 0
[   29.358280] Oops: 0002 [#1] PREEMPT SMP PTI
[   29.358595] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.12.4-
g70e7f0549188-dirty #102
[   29.359164] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014
[   29.359978] RIP: 0010:via_sdc_isr+0x21f/0x410
[   29.360314] Code: ff ff e8 84 aa d0 fd 66 45 89 7e 28 66 41 f7 c4 00
10 75 56 e8 72 aa d0 fd 66 41 f7 c4 00 c0 74 10 e8 65 aa d0 fd 48 8b 43
18 <c7> 40 14 ac ff ff ff e8 55 aa d0 fd 48 89 df e8 ad fb ff ff e9 77
[   29.361661] RSP: 0018:ffffc90000118e98 EFLAGS: 00010046
[   29.362042] RAX: 0000000000000000 RBX: ffff888107d77880
RCX: 0000000000000000
[   29.362564] RDX: 0000000000000000 RSI: ffffffff835d20bb
RDI: 00000000ffffffff
[   29.363085] RBP: ffffc90000118ed8 R08: 0000000000000001
R09: 0000000000000001
[   29.363604] R10: 0000000000000000 R11: 0000000000000001
R12: 0000000000008600
[   29.364128] R13: ffff888107d779c8 R14: ffffc90009c00200
R15: 0000000000008000
[   29.364651] FS:  0000000000000000(0000) GS:ffff88817bc80000(0000)
knlGS:0000000000000000
[   29.365235] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   29.365655] CR2: 0000000000000014 CR3: 0000000005a2e000
CR4: 00000000000006e0
[   29.366170] DR0: 0000000000000000 DR1: 0000000000000000
DR2: 0000000000000000
[   29.366683] DR3: 0000000000000000 DR6: 00000000fffe0ff0
DR7: 0000000000000400
[   29.367197] Call Trace:
[   29.367381]  <IRQ>
[   29.367537]  __handle_irq_event_percpu+0x53/0x3e0
[   29.367916]  handle_irq_event_percpu+0x35/0x90
[   29.368247]  handle_irq_event+0x39/0x60
[   29.368632]  handle_fasteoi_irq+0xc2/0x1d0
[   29.368950]  __common_interrupt+0x7f/0x150
[   29.369254]  common_interrupt+0xb4/0xd0
[   29.369547]  </IRQ>
[   29.369708]  asm_common_interrupt+0x1e/0x40
[   29.370016] RIP: 0010:native_safe_halt+0x17/0x20
[   29.370360] Code: 07 0f 00 2d db 80 43 00 f4 5d c3 0f 1f 84 00 00 00
00 00 8b 05 c2 37 e5 01 55 48 89 e5 85 c0 7e 07 0f 00 2d bb 80 43 00 fb
f4 <5d> c3 cc cc cc cc cc cc cc 55 48 89 e5 e8 67 53 ff ff 8b 0d f9 91
[   29.371696] RSP: 0018:ffffc9000008fe90 EFLAGS: 00000246
[   29.372079] RAX: 0000000000000000 RBX: 0000000000000002
RCX: 0000000000000000
[   29.372595] RDX: 0000000000000000 RSI: ffffffff854f67a4
RDI: ffffffff85403406
[   29.373122] RBP: ffffc9000008fe90 R08: 0000000000000001
R09: 0000000000000001
[   29.373646] R10: 0000000000000000 R11: 0000000000000001
R12: ffffffff86009188
[   29.374160] R13: 0000000000000000 R14: 0000000000000000
R15: ffff888100258000
[   29.374690]  default_idle+0x9/0x10
[   29.374944]  arch_cpu_idle+0xa/0x10
[   29.375198]  default_idle_call+0x6e/0x250
[   29.375491]  do_idle+0x1f0/0x2d0
[   29.375740]  cpu_startup_entry+0x18/0x20
[   29.376034]  start_secondary+0x11f/0x160
[   29.376328]  secondary_startup_64_no_verify+0xb0/0xbb
[   29.376705] Modules linked in:
[   29.376939] Dumping ftrace buffer:
[   29.377187]    (ftrace buffer empty)
[   29.377460] CR2: 0000000000000014
[   29.377712] ---[ end trace 51a473dffb618c47 ]---
[   29.378056] RIP: 0010:via_sdc_isr+0x21f/0x410
[   29.378380] Code: ff ff e8 84 aa d0 fd 66 45 89 7e 28 66 41 f7 c4 00
10 75 56 e8 72 aa d0 fd 66 41 f7 c4 00 c0 74 10 e8 65 aa d0 fd 48 8b 43
18 <c7> 40 14 ac ff ff ff e8 55 aa d0 fd 48 89 df e8 ad fb ff ff e9 77
[   29.379714] RSP: 0018:ffffc90000118e98 EFLAGS: 00010046
[   29.380098] RAX: 0000000000000000 RBX: ffff888107d77880
RCX: 0000000000000000
[   29.380614] RDX: 0000000000000000 RSI: ffffffff835d20bb
RDI: 00000000ffffffff
[   29.381134] RBP: ffffc90000118ed8 R08: 0000000000000001
R09: 0000000000000001
[   29.381653] R10: 0000000000000000 R11: 0000000000000001
R12: 0000000000008600
[   29.382176] R13: ffff888107d779c8 R14: ffffc90009c00200
R15: 0000000000008000
[   29.382697] FS:  0000000000000000(0000) GS:ffff88817bc80000(0000)
knlGS:0000000000000000
[   29.383277] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   29.383697] CR2: 0000000000000014 CR3: 0000000005a2e000
CR4: 00000000000006e0
[   29.384223] DR0: 0000000000000000 DR1: 0000000000000000
DR2: 0000000000000000
[   29.384736] DR3: 0000000000000000 DR6: 00000000fffe0ff0
DR7: 0000000000000400
[   29.385260] Kernel panic - not syncing: Fatal exception in interrupt
[   29.385882] Dumping ftrace buffer:
[   29.386135]    (ftrace buffer empty)
[   29.386401] Kernel Offset: disabled
[   29.386656] Rebooting in 1 seconds..

Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Link: https://lore.kernel.org/r/1622727200-15808-1-git-send-email-zheyuma97@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/via-sdmmc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/mmc/host/via-sdmmc.c b/drivers/mmc/host/via-sdmmc.c
index a1d0985600990..c32df5530b943 100644
--- a/drivers/mmc/host/via-sdmmc.c
+++ b/drivers/mmc/host/via-sdmmc.c
@@ -857,6 +857,9 @@ static void via_sdc_data_isr(struct via_crdr_mmc_host *host, u16 intmask)
 {
 	BUG_ON(intmask == 0);
 
+	if (!host->data)
+		return;
+
 	if (intmask & VIA_CRDR_SDSTS_DT)
 		host->data->error = -ETIMEDOUT;
 	else if (intmask & (VIA_CRDR_SDSTS_RC | VIA_CRDR_SDSTS_WC))
-- 
GitLab


From 039259156b3bbe62bff3492f007f0dd247013fa6 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Fri, 4 Jun 2021 12:44:59 +0200
Subject: [PATCH 2821/3804] mmc: debugfs: add description for module parameter

Make it obvious what this is for.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20210604104459.7574-1-wsa+renesas@sang-engineering.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/debugfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 9ec84c86c46af..3fdbc801e64a6 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -26,6 +26,7 @@
 static DECLARE_FAULT_ATTR(fail_default_attr);
 static char *fail_request;
 module_param(fail_request, charp, 0);
+MODULE_PARM_DESC(fail_request, "default fault injection attributes");
 
 #endif /* CONFIG_FAIL_MMC_REQUEST */
 
-- 
GitLab


From a7ab186f60785850b5af1be183867000485ad491 Mon Sep 17 00:00:00 2001
From: Andrew Jeffery <andrew@aj.id.au>
Date: Mon, 7 Jun 2021 11:00:20 +0930
Subject: [PATCH 2822/3804] mmc: sdhci-of-aspeed: Turn down a phase correction
 warning

The card timing and the bus frequency are not changed atomically with
respect to calls to the set_clock() callback in the driver. The result
is the driver sees a transient state where there's a mismatch between
the two and thus the inputs to the phase correction calculation
formula are garbage.

Switch from dev_warn() to dev_dbg() to avoid noise in the normal case,
though the change does make bad configurations less likely to be
noticed.

Reported-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Andrew Jeffery <andrew@aj.id.au>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Link: https://lore.kernel.org/r/20210607013020.85885-1-andrew@aj.id.au
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-of-aspeed.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-of-aspeed.c b/drivers/mmc/host/sdhci-of-aspeed.c
index 65b5685f6c15e..6e4e132903a63 100644
--- a/drivers/mmc/host/sdhci-of-aspeed.c
+++ b/drivers/mmc/host/sdhci-of-aspeed.c
@@ -186,7 +186,7 @@ static int aspeed_sdhci_phase_to_tap(struct device *dev, unsigned long rate_hz,
 
 	tap = div_u64(phase_period_ps, prop_delay_ps);
 	if (tap > ASPEED_SDHCI_NR_TAPS) {
-		dev_warn(dev,
+		dev_dbg(dev,
 			 "Requested out of range phase tap %d for %d degrees of phase compensation at %luHz, clamping to tap %d\n",
 			 tap, phase_deg, rate_hz, ASPEED_SDHCI_NR_TAPS);
 		tap = ASPEED_SDHCI_NR_TAPS;
-- 
GitLab


From 21adc2e45f4ef32786807375107543797ff68615 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 8 Jun 2021 20:06:20 +0200
Subject: [PATCH 2823/3804] mmc: Improve function name when aborting a tuning
 cmd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'mmc_abort_tuning()' made me think tuning gets completely aborted.
However, it sends only a STOP cmd to cancel the current tuning cmd.
Tuning process may still continue after that. So, rename the function to
'mmc_send_abort_tuning()' to better reflect all this.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Link: https://lore.kernel.org/r/20210608180620.40059-1-wsa+renesas@sang-engineering.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc_ops.c           | 4 ++--
 drivers/mmc/host/renesas_sdhi_core.c | 2 +-
 drivers/mmc/host/sdhci.c             | 2 +-
 include/linux/mmc/host.h             | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 3c58f6d0f4821..973756ed4016f 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -700,7 +700,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(mmc_send_tuning);
 
-int mmc_abort_tuning(struct mmc_host *host, u32 opcode)
+int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode)
 {
 	struct mmc_command cmd = {};
 
@@ -723,7 +723,7 @@ int mmc_abort_tuning(struct mmc_host *host, u32 opcode)
 
 	return mmc_wait_for_cmd(host, &cmd, 0);
 }
-EXPORT_SYMBOL_GPL(mmc_abort_tuning);
+EXPORT_SYMBOL_GPL(mmc_send_abort_tuning);
 
 static int
 mmc_send_bus_test(struct mmc_card *card, struct mmc_host *host, u8 opcode,
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index baab4c2e1b533..e49ca0f7fe9a8 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -704,7 +704,7 @@ static int renesas_sdhi_execute_tuning(struct mmc_host *mmc, u32 opcode)
 			set_bit(i, priv->smpcmp);
 
 		if (cmd_error)
-			mmc_abort_tuning(mmc, opcode);
+			mmc_send_abort_tuning(mmc, opcode);
 	}
 
 	ret = renesas_sdhi_select_tuning(host);
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index bf238ade16021..6aaf5c3ce34c5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2680,7 +2680,7 @@ void sdhci_abort_tuning(struct sdhci_host *host, u32 opcode)
 
 	sdhci_end_tuning(host);
 
-	mmc_abort_tuning(host->mmc, opcode);
+	mmc_send_abort_tuning(host->mmc, opcode);
 }
 EXPORT_SYMBOL_GPL(sdhci_abort_tuning);
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c7e7b43600e9d..0abd47e9ef9bb 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -632,6 +632,6 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
 }
 
 int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
-int mmc_abort_tuning(struct mmc_host *host, u32 opcode);
+int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
 
 #endif /* LINUX_MMC_HOST_H */
-- 
GitLab


From f62f7bcc827fe7f0b02208d4811caec65aad1c8e Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@nxp.com>
Date: Thu, 10 Jun 2021 20:01:26 +0800
Subject: [PATCH 2824/3804] mmc: sdhci-esdhc-imx: Enable support for system
 wakeup for SDIO

Enable support for system wakeup, by setting the wakeup capability for the
slot corresponding to the SDIO card. Users need to enable the wakeup
through the sysfs interface.

Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://lore.kernel.org/r/1623326486-25275-1-git-send-email-haibo.chen@nxp.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index 7336ae7491377..72c0bf0c18875 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1620,6 +1620,14 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev)
 	if (err)
 		goto disable_ahb_clk;
 
+	/*
+	 * Setup the wakeup capability here, let user to decide
+	 * whether need to enable this wakeup through sysfs interface.
+	 */
+	if ((host->mmc->pm_caps & MMC_PM_KEEP_POWER) &&
+			(host->mmc->pm_caps & MMC_PM_WAKE_SDIO_IRQ))
+		device_set_wakeup_capable(&pdev->dev, true);
+
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_set_autosuspend_delay(&pdev->dev, 50);
 	pm_runtime_use_autosuspend(&pdev->dev);
-- 
GitLab


From 3160e025361fad1085e527a898c5dcfedf7e796d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?=
 <zhouyanjie@wanyeetech.com>
Date: Thu, 10 Jun 2021 20:58:49 +0800
Subject: [PATCH 2825/3804] dt-bindings: mmc: JZ4740: Add bindings for JZ4775
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a compatible to the mmc DT bindings for the JZ4775 SoC from Ingenic.

Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
Acked-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/1623329930-14387-2-git-send-email-zhouyanjie@wanyeetech.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
index 04ba8b7fc054a..546480f411410 100644
--- a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
+++ b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
@@ -19,6 +19,7 @@ properties:
           - ingenic,jz4740-mmc
           - ingenic,jz4725b-mmc
           - ingenic,jz4760-mmc
+          - ingenic,jz4775-mmc
           - ingenic,jz4780-mmc
           - ingenic,x1000-mmc
       - items:
-- 
GitLab


From d1c777ee5c5e0a08755ee39f6dc4d222bfd9c832 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?=
 <zhouyanjie@wanyeetech.com>
Date: Thu, 10 Jun 2021 20:58:50 +0800
Subject: [PATCH 2826/3804] mmc: JZ4740: Add support for JZ4775
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for the variant on the JZ4775 SoC from Ingenic. Let's also
clarify that the drive clock selection and sample clock selection have been
supported since JZ4775, not X1000. So, support for these two functions has
been added for JZ4775 and JZ4780.

Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
Acked-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/1623329930-14387-3-git-send-email-zhouyanjie@wanyeetech.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/jz4740_mmc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index 861ff6d9661a9..0db17bcc9c163 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -674,7 +674,7 @@ static void jz4740_mmc_send_command(struct jz4740_mmc_host *host,
 			cmdat |= JZ_MMC_CMDAT_WRITE;
 		if (host->use_dma) {
 			/*
-			 * The 4780's MMC controller has integrated DMA ability
+			 * The JZ4780's MMC controller has integrated DMA ability
 			 * in addition to being able to use the external DMA
 			 * controller. It moves DMA control bits to a separate
 			 * register. The DMA_SEL bit chooses the external
@@ -866,7 +866,7 @@ static int jz4740_mmc_set_clock_rate(struct jz4740_mmc_host *host, int rate)
 	writew(div, host->base + JZ_REG_MMC_CLKRT);
 
 	if (real_rate > 25000000) {
-		if (host->version >= JZ_MMC_X1000) {
+		if (host->version >= JZ_MMC_JZ4780) {
 			writel(JZ_MMC_LPM_DRV_RISING_QTR_PHASE_DLY |
 				   JZ_MMC_LPM_SMP_RISING_QTR_OR_HALF_PHASE_DLY |
 				   JZ_MMC_LPM_LOW_POWER_MODE_EN,
@@ -959,6 +959,7 @@ static const struct of_device_id jz4740_mmc_of_match[] = {
 	{ .compatible = "ingenic,jz4740-mmc", .data = (void *) JZ_MMC_JZ4740 },
 	{ .compatible = "ingenic,jz4725b-mmc", .data = (void *)JZ_MMC_JZ4725B },
 	{ .compatible = "ingenic,jz4760-mmc", .data = (void *) JZ_MMC_JZ4760 },
+	{ .compatible = "ingenic,jz4775-mmc", .data = (void *) JZ_MMC_JZ4780 },
 	{ .compatible = "ingenic,jz4780-mmc", .data = (void *) JZ_MMC_JZ4780 },
 	{ .compatible = "ingenic,x1000-mmc", .data = (void *) JZ_MMC_X1000 },
 	{},
-- 
GitLab


From 103a5348c22c3fca8b96c735a9e353b8a0801842 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 9 Jun 2021 17:02:30 +0200
Subject: [PATCH 2827/3804] mmc: meson-gx: use memcpy_to/fromio for
 dram-access-quirk

It has been reported that usage of memcpy() to/from an iomem mapping is invalid,
and a recent arm64 memcpy update [1] triggers a memory abort when dram-access-quirk
is used on the G12A/G12B platforms.

This adds a local sg_copy_to_buffer which makes usage of io versions of memcpy
when dram-access-quirk is enabled.

[1] 285133040e6c ("arm64: Import latest memcpy()/memmove() implementation")

Fixes: acdc8e71d9bb ("mmc: meson-gx: add dram-access-quirk")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20210609150230.9291-1-narmstrong@baylibre.com
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/meson-gx-mmc.c | 50 +++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 5 deletions(-)

diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c
index 016a6106151a5..3f28eb4d17fe7 100644
--- a/drivers/mmc/host/meson-gx-mmc.c
+++ b/drivers/mmc/host/meson-gx-mmc.c
@@ -165,6 +165,7 @@ struct meson_host {
 
 	unsigned int bounce_buf_size;
 	void *bounce_buf;
+	void __iomem *bounce_iomem_buf;
 	dma_addr_t bounce_dma_addr;
 	struct sd_emmc_desc *descs;
 	dma_addr_t descs_dma_addr;
@@ -745,6 +746,47 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg)
 	writel(start, host->regs + SD_EMMC_START);
 }
 
+/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */
+static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data,
+				  size_t buflen, bool to_buffer)
+{
+	unsigned int sg_flags = SG_MITER_ATOMIC;
+	struct scatterlist *sgl = data->sg;
+	unsigned int nents = data->sg_len;
+	struct sg_mapping_iter miter;
+	unsigned int offset = 0;
+
+	if (to_buffer)
+		sg_flags |= SG_MITER_FROM_SG;
+	else
+		sg_flags |= SG_MITER_TO_SG;
+
+	sg_miter_start(&miter, sgl, nents, sg_flags);
+
+	while ((offset < buflen) && sg_miter_next(&miter)) {
+		unsigned int len;
+
+		len = min(miter.length, buflen - offset);
+
+		/* When dram_access_quirk, the bounce buffer is a iomem mapping */
+		if (host->dram_access_quirk) {
+			if (to_buffer)
+				memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len);
+			else
+				memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len);
+		} else {
+			if (to_buffer)
+				memcpy(host->bounce_buf + offset, miter.addr, len);
+			else
+				memcpy(miter.addr, host->bounce_buf + offset, len);
+		}
+
+		offset += len;
+	}
+
+	sg_miter_stop(&miter);
+}
+
 static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
 {
 	struct meson_host *host = mmc_priv(mmc);
@@ -788,8 +830,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd)
 		if (data->flags & MMC_DATA_WRITE) {
 			cmd_cfg |= CMD_CFG_DATA_WR;
 			WARN_ON(xfer_bytes > host->bounce_buf_size);
-			sg_copy_to_buffer(data->sg, data->sg_len,
-					  host->bounce_buf, xfer_bytes);
+			meson_mmc_copy_buffer(host, data, xfer_bytes, true);
 			dma_wmb();
 		}
 
@@ -958,8 +999,7 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id)
 	if (meson_mmc_bounce_buf_read(data)) {
 		xfer_bytes = data->blksz * data->blocks;
 		WARN_ON(xfer_bytes > host->bounce_buf_size);
-		sg_copy_from_buffer(data->sg, data->sg_len,
-				    host->bounce_buf, xfer_bytes);
+		meson_mmc_copy_buffer(host, data, xfer_bytes, false);
 	}
 
 	next_cmd = meson_mmc_get_next_command(cmd);
@@ -1179,7 +1219,7 @@ static int meson_mmc_probe(struct platform_device *pdev)
 		 * instead of the DDR memory
 		 */
 		host->bounce_buf_size = SD_EMMC_SRAM_DATA_BUF_LEN;
-		host->bounce_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF;
+		host->bounce_iomem_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF;
 		host->bounce_dma_addr = res->start + SD_EMMC_SRAM_DATA_BUF_OFF;
 	} else {
 		/* data bounce buffer */
-- 
GitLab


From d33b9035e14a35f6f2a5f067f0b156a93581811d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 11 Jun 2021 08:33:36 +0200
Subject: [PATCH 2828/3804] objtool: Improve reloc hash size guestimate

Nathan reported that LLVM ThinLTO builds have a performance regression
with commit 25cf0d8aa2a3 ("objtool: Rewrite hashtable sizing"). Sami
was quick to note that this is due to their use of -ffunction-sections.

As a result the .text section is small and basing the number of relocs
off of that no longer works. Instead have read_sections() compute the
sum of all SHF_EXECINSTR sections and use that.

Fixes: 25cf0d8aa2a3 ("objtool: Rewrite hashtable sizing")
Reported-by: Nathan Chancellor <nathan@kernel.org>
Debugged-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lkml.kernel.org/r/YMJpGLuGNsGtA5JJ@hirez.programming.kicks-ass.net
---
 tools/objtool/elf.c                 | 11 ++++-------
 tools/objtool/include/objtool/elf.h |  1 +
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index a8a0ee21f71a3..2371ccc412ebf 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -288,6 +288,9 @@ static int read_sections(struct elf *elf)
 		}
 		sec->len = sec->sh.sh_size;
 
+		if (sec->sh.sh_flags & SHF_EXECINSTR)
+			elf->text_size += sec->len;
+
 		list_add_tail(&sec->list, &elf->sections);
 		elf_hash_add(section, &sec->hash, sec->idx);
 		elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
@@ -581,13 +584,7 @@ static int read_relocs(struct elf *elf)
 	unsigned int symndx;
 	unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
 
-	sec = find_section_by_name(elf, ".text");
-	if (!sec) {
-		WARN("no .text");
-		return -1;
-	}
-
-	if (!elf_alloc_hash(reloc, sec->len / 16))
+	if (!elf_alloc_hash(reloc, elf->text_size / 16))
 		return -1;
 
 	list_for_each_entry(sec, &elf->sections, list) {
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 90082751f851d..e343950475309 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -83,6 +83,7 @@ struct elf {
 	int fd;
 	bool changed;
 	char *name;
+	unsigned int text_size;
 	struct list_head sections;
 
 	int symbol_bits;
-- 
GitLab


From e41d6c3f4f9b4804e53ca87aba8ee11ada606c77 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 8 Jun 2021 23:46:05 +1000
Subject: [PATCH 2829/3804] powerpc/signal64: Copy siginfo before changing
 regs->nip

In commit 96d7a4e06fab ("powerpc/signal64: Rewrite handle_rt_signal64()
to minimise uaccess switches") the 64-bit signal code was rearranged to
use user_write_access_begin/end().

As part of that change the call to copy_siginfo_to_user() was moved
later in the function, so that it could be done after the
user_write_access_end().

In particular it was moved after we modify regs->nip to point to the
signal trampoline. That means if copy_siginfo_to_user() fails we exit
handle_rt_signal64() with an error but with regs->nip modified, whereas
previously we would not modify regs->nip until the copy succeeded.

Returning an error from signal delivery but with regs->nip updated
leaves the process in a sort of half-delivered state. We do immediately
force a SEGV in signal_setup_done(), called from do_signal(), so the
process should never run in the half-delivered state.

However that SEGV is not delivered until we've gone around to
do_notify_resume() again, so it's possible some tracing could observe
the half-delivered state.

There are other cases where we fail signal delivery with regs partly
updated, eg. the write to newsp and SA_SIGINFO, but the latter at least
is very unlikely to fail as it reads back from the frame we just wrote
to.

Looking at other arches they seem to be more careful about leaving regs
unchanged until the copy operations have succeeded, and in general that
seems like good hygenie.

So although the current behaviour is not cleary buggy, it's also not
clearly correct. So move the call to copy_siginfo_to_user() up prior to
the modification of regs->nip, which is closer to the old behaviour, and
easier to reason about.

Fixes: 96d7a4e06fab ("powerpc/signal64: Rewrite handle_rt_signal64() to minimise uaccess switches")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210608134605.2783677-1-mpe@ellerman.id.au
---
 arch/powerpc/kernel/signal_64.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index dca66481d0c21..f9e1f5428b9e3 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -902,6 +902,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 	unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
 	user_write_access_end();
 
+	/* Save the siginfo outside of the unsafe block. */
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		goto badframe;
+
 	/* Make sure signal handler doesn't get spurious FP exceptions */
 	tsk->thread.fp_state.fpscr = 0;
 
@@ -915,11 +919,6 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 		regs->nip = (unsigned long) &frame->tramp[0];
 	}
 
-
-	/* Save the siginfo outside of the unsafe block. */
-	if (copy_siginfo_to_user(&frame->info, &ksig->info))
-		goto badframe;
-
 	/* Allocate a dummy caller frame for the signal handler. */
 	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
 	err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
-- 
GitLab


From 771fac5e26c17845de8c679e6a947a4371e86ffc Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 11 Jun 2021 08:48:02 +0530
Subject: [PATCH 2830/3804] Revert "cpufreq: CPPC: Add support for frequency
 invariance"

This reverts commit 4c38f2df71c8e33c0b64865992d693f5022eeaad.

There are few races in the frequency invariance support for CPPC driver,
namely the driver doesn't stop the kthread_work and irq_work on policy
exit during suspend/resume or CPU hotplug.

A proper fix won't be possible for the 5.13-rc, as it requires a lot of
changes. Lets revert the patch instead for now.

Fixes: 4c38f2df71c8 ("cpufreq: CPPC: Add support for frequency invariance")
Reported-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Kconfig.arm    |  10 --
 drivers/cpufreq/cppc_cpufreq.c | 245 ++-------------------------------
 include/linux/arch_topology.h  |   1 -
 kernel/sched/core.c            |   1 -
 4 files changed, 12 insertions(+), 245 deletions(-)

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index a5c5f70acfc9e..e65e0a43be644 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -19,16 +19,6 @@ config ACPI_CPPC_CPUFREQ
 
 	  If in doubt, say N.
 
-config ACPI_CPPC_CPUFREQ_FIE
-	bool "Frequency Invariance support for CPPC cpufreq driver"
-	depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY
-	default y
-	help
-	  This extends frequency invariance support in the CPPC cpufreq driver,
-	  by using CPPC delivered and reference performance counters.
-
-	  If in doubt, say N.
-
 config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
 	tristate "Allwinner nvmem based SUN50I CPUFreq driver"
 	depends on ARCH_SUNXI
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 3848b4c222e13..2f769b1630c57 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -10,18 +10,14 @@
 
 #define pr_fmt(fmt)	"CPPC Cpufreq:"	fmt
 
-#include <linux/arch_topology.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/dmi.h>
-#include <linux/irq_work.h>
-#include <linux/kthread.h>
 #include <linux/time.h>
 #include <linux/vmalloc.h>
-#include <uapi/linux/sched/types.h>
 
 #include <asm/unaligned.h>
 
@@ -61,204 +57,6 @@ static struct cppc_workaround_oem_info wa_info[] = {
 	}
 };
 
-#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
-
-/* Frequency invariance support */
-struct cppc_freq_invariance {
-	int cpu;
-	struct irq_work irq_work;
-	struct kthread_work work;
-	struct cppc_perf_fb_ctrs prev_perf_fb_ctrs;
-	struct cppc_cpudata *cpu_data;
-};
-
-static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
-static struct kthread_worker *kworker_fie;
-static bool fie_disabled;
-
-static struct cpufreq_driver cppc_cpufreq_driver;
-static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
-static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
-				 struct cppc_perf_fb_ctrs fb_ctrs_t0,
-				 struct cppc_perf_fb_ctrs fb_ctrs_t1);
-
-/**
- * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
- * @work: The work item.
- *
- * The CPPC driver register itself with the topology core to provide its own
- * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which
- * gets called by the scheduler on every tick.
- *
- * Note that the arch specific counters have higher priority than CPPC counters,
- * if available, though the CPPC driver doesn't need to have any special
- * handling for that.
- *
- * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we
- * reach here from hard-irq context), which then schedules a normal work item
- * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable
- * based on the counter updates since the last tick.
- */
-static void cppc_scale_freq_workfn(struct kthread_work *work)
-{
-	struct cppc_freq_invariance *cppc_fi;
-	struct cppc_perf_fb_ctrs fb_ctrs = {0};
-	struct cppc_cpudata *cpu_data;
-	unsigned long local_freq_scale;
-	u64 perf;
-
-	cppc_fi = container_of(work, struct cppc_freq_invariance, work);
-	cpu_data = cppc_fi->cpu_data;
-
-	if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) {
-		pr_warn("%s: failed to read perf counters\n", __func__);
-		return;
-	}
-
-	cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
-	perf = cppc_perf_from_fbctrs(cpu_data, cppc_fi->prev_perf_fb_ctrs,
-				     fb_ctrs);
-
-	perf <<= SCHED_CAPACITY_SHIFT;
-	local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf);
-	if (WARN_ON(local_freq_scale > 1024))
-		local_freq_scale = 1024;
-
-	per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale;
-}
-
-static void cppc_irq_work(struct irq_work *irq_work)
-{
-	struct cppc_freq_invariance *cppc_fi;
-
-	cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work);
-	kthread_queue_work(kworker_fie, &cppc_fi->work);
-}
-
-static void cppc_scale_freq_tick(void)
-{
-	struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id());
-
-	/*
-	 * cppc_get_perf_ctrs() can potentially sleep, call that from the right
-	 * context.
-	 */
-	irq_work_queue(&cppc_fi->irq_work);
-}
-
-static struct scale_freq_data cppc_sftd = {
-	.source = SCALE_FREQ_SOURCE_CPPC,
-	.set_freq_scale = cppc_scale_freq_tick,
-};
-
-static void cppc_freq_invariance_policy_init(struct cpufreq_policy *policy,
-					     struct cppc_cpudata *cpu_data)
-{
-	struct cppc_perf_fb_ctrs fb_ctrs = {0};
-	struct cppc_freq_invariance *cppc_fi;
-	int i, ret;
-
-	if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
-		return;
-
-	if (fie_disabled)
-		return;
-
-	for_each_cpu(i, policy->cpus) {
-		cppc_fi = &per_cpu(cppc_freq_inv, i);
-		cppc_fi->cpu = i;
-		cppc_fi->cpu_data = cpu_data;
-		kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn);
-		init_irq_work(&cppc_fi->irq_work, cppc_irq_work);
-
-		ret = cppc_get_perf_ctrs(i, &fb_ctrs);
-		if (ret) {
-			pr_warn("%s: failed to read perf counters: %d\n",
-				__func__, ret);
-			fie_disabled = true;
-		} else {
-			cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
-		}
-	}
-}
-
-static void __init cppc_freq_invariance_init(void)
-{
-	struct sched_attr attr = {
-		.size		= sizeof(struct sched_attr),
-		.sched_policy	= SCHED_DEADLINE,
-		.sched_nice	= 0,
-		.sched_priority	= 0,
-		/*
-		 * Fake (unused) bandwidth; workaround to "fix"
-		 * priority inheritance.
-		 */
-		.sched_runtime	= 1000000,
-		.sched_deadline = 10000000,
-		.sched_period	= 10000000,
-	};
-	int ret;
-
-	if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
-		return;
-
-	if (fie_disabled)
-		return;
-
-	kworker_fie = kthread_create_worker(0, "cppc_fie");
-	if (IS_ERR(kworker_fie))
-		return;
-
-	ret = sched_setattr_nocheck(kworker_fie->task, &attr);
-	if (ret) {
-		pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__,
-			ret);
-		kthread_destroy_worker(kworker_fie);
-		return;
-	}
-
-	/* Register for freq-invariance */
-	topology_set_scale_freq_source(&cppc_sftd, cpu_present_mask);
-}
-
-static void cppc_freq_invariance_exit(void)
-{
-	struct cppc_freq_invariance *cppc_fi;
-	int i;
-
-	if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
-		return;
-
-	if (fie_disabled)
-		return;
-
-	topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpu_present_mask);
-
-	for_each_possible_cpu(i) {
-		cppc_fi = &per_cpu(cppc_freq_inv, i);
-		irq_work_sync(&cppc_fi->irq_work);
-	}
-
-	kthread_destroy_worker(kworker_fie);
-	kworker_fie = NULL;
-}
-
-#else
-static inline void
-cppc_freq_invariance_policy_init(struct cpufreq_policy *policy,
-				 struct cppc_cpudata *cpu_data)
-{
-}
-
-static inline void cppc_freq_invariance_init(void)
-{
-}
-
-static inline void cppc_freq_invariance_exit(void)
-{
-}
-#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */
-
 /* Callback function used to retrieve the max frequency from DMI */
 static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
 {
@@ -547,12 +345,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	cpu_data->perf_ctrls.desired_perf =  caps->highest_perf;
 
 	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
-	if (ret) {
+	if (ret)
 		pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
 			 caps->highest_perf, cpu, ret);
-	} else {
-		cppc_freq_invariance_policy_init(policy, cpu_data);
-	}
 
 	return ret;
 }
@@ -565,12 +360,12 @@ static inline u64 get_delta(u64 t1, u64 t0)
 	return (u32)t1 - (u32)t0;
 }
 
-static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
-				 struct cppc_perf_fb_ctrs fb_ctrs_t0,
-				 struct cppc_perf_fb_ctrs fb_ctrs_t1)
+static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
+				     struct cppc_perf_fb_ctrs fb_ctrs_t0,
+				     struct cppc_perf_fb_ctrs fb_ctrs_t1)
 {
 	u64 delta_reference, delta_delivered;
-	u64 reference_perf;
+	u64 reference_perf, delivered_perf;
 
 	reference_perf = fb_ctrs_t0.reference_perf;
 
@@ -579,21 +374,12 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
 	delta_delivered = get_delta(fb_ctrs_t1.delivered,
 				    fb_ctrs_t0.delivered);
 
-	/* Check to avoid divide-by zero and invalid delivered_perf */
-	if (!delta_reference || !delta_delivered)
-		return cpu_data->perf_ctrls.desired_perf;
-
-	return (reference_perf * delta_delivered) / delta_reference;
-}
-
-static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
-				     struct cppc_perf_fb_ctrs fb_ctrs_t0,
-				     struct cppc_perf_fb_ctrs fb_ctrs_t1)
-{
-	u64 delivered_perf;
-
-	delivered_perf = cppc_perf_from_fbctrs(cpu_data, fb_ctrs_t0,
-					       fb_ctrs_t1);
+	/* Check to avoid divide-by zero */
+	if (delta_reference || delta_delivered)
+		delivered_perf = (reference_perf * delta_delivered) /
+					delta_reference;
+	else
+		delivered_perf = cpu_data->perf_ctrls.desired_perf;
 
 	return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
 }
@@ -718,8 +504,6 @@ static void cppc_check_hisi_workaround(void)
 
 static int __init cppc_cpufreq_init(void)
 {
-	int ret;
-
 	if ((acpi_disabled) || !acpi_cpc_valid())
 		return -ENODEV;
 
@@ -727,11 +511,7 @@ static int __init cppc_cpufreq_init(void)
 
 	cppc_check_hisi_workaround();
 
-	ret = cpufreq_register_driver(&cppc_cpufreq_driver);
-	if (!ret)
-		cppc_freq_invariance_init();
-
-	return ret;
+	return cpufreq_register_driver(&cppc_cpufreq_driver);
 }
 
 static inline void free_cpu_data(void)
@@ -748,7 +528,6 @@ static inline void free_cpu_data(void)
 
 static void __exit cppc_cpufreq_exit(void)
 {
-	cppc_freq_invariance_exit();
 	cpufreq_unregister_driver(&cppc_cpufreq_driver);
 
 	free_cpu_data();
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index f180240dc95f4..11e555cfaecb4 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -37,7 +37,6 @@ bool topology_scale_freq_invariant(void);
 enum scale_freq_source {
 	SCALE_FREQ_SOURCE_CPUFREQ = 0,
 	SCALE_FREQ_SOURCE_ARCH,
-	SCALE_FREQ_SOURCE_CPPC,
 };
 
 struct scale_freq_data {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5226cc26a095f..4ca80df205ce6 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6389,7 +6389,6 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
 {
 	return __sched_setscheduler(p, attr, false, true);
 }
-EXPORT_SYMBOL_GPL(sched_setattr_nocheck);
 
 /**
  * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-- 
GitLab


From ce62df2290bb86b39f826a835d6060bc3b43dae7 Mon Sep 17 00:00:00 2001
From: Sander Vanheule <sander@svanheule.net>
Date: Wed, 9 Jun 2021 15:10:03 +0200
Subject: [PATCH 2831/3804] regmap: mdio: Fix regmap_bus pointer constness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A const qualifier was omitted in the declaration of the regmap_bus
pointer, resulting in the following errors:

drivers/base/regmap/regmap-mdio.c: In function ‘__regmap_init_mdio’:
drivers/base/regmap/regmap-mdio.c:87:7: warning: assignment discards
‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers]
   87 |   bus = &regmap_mdio_c22_bus;
      |       ^
drivers/base/regmap/regmap-mdio.c:89:7: warning: assignment discards
‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers]
   89 |   bus = &regmap_mdio_c45_bus;
      |       ^

Fix this by ensuring the pointer has the same qualifiers as the assigned
values.

Fixes: f083be9db060 ("regmap: mdio: Add clause-45 support")
Signed-off-by: Sander Vanheule <sander@svanheule.net>
Link: https://lore.kernel.org/r/f304ca638ffdc66d4803a6df1f75436894bd1d5f.1623244066.git.sander@svanheule.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-mdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap-mdio.c b/drivers/base/regmap/regmap-mdio.c
index cfb23afb19ebf..b772b42809e28 100644
--- a/drivers/base/regmap/regmap-mdio.c
+++ b/drivers/base/regmap/regmap-mdio.c
@@ -69,7 +69,7 @@ struct regmap *__regmap_init_mdio(struct mdio_device *mdio_dev,
 	const struct regmap_config *config, struct lock_class_key *lock_key,
 	const char *lock_name)
 {
-	struct regmap_bus *bus;
+	const struct regmap_bus *bus;
 
 	if (config->reg_bits == 5 && config->val_bits == 16)
 		bus = &regmap_mdio_c22_bus;
-- 
GitLab


From 0df0240946b1ffbe852fa302c04c0d322229c9ce Mon Sep 17 00:00:00 2001
From: Sander Vanheule <sander@svanheule.net>
Date: Wed, 9 Jun 2021 15:10:04 +0200
Subject: [PATCH 2832/3804] regmap: mdio: Reject invalid addresses

When an invalid register offset is provided, the upper bits are silently
discarded. Change this to return -ENXIO instead, to help catch potential
bugs.

Signed-off-by: Sander Vanheule <sander@svanheule.net>
Link: https://lore.kernel.org/r/047007e0e9fb596480829f11f8c7e6281d235c70.1623244066.git.sander@svanheule.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-mdio.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/base/regmap/regmap-mdio.c b/drivers/base/regmap/regmap-mdio.c
index b772b42809e28..6a20201299f56 100644
--- a/drivers/base/regmap/regmap-mdio.c
+++ b/drivers/base/regmap/regmap-mdio.c
@@ -31,14 +31,20 @@ static int regmap_mdio_c22_read(void *context, unsigned int reg, unsigned int *v
 {
 	struct mdio_device *mdio_dev = context;
 
-	return regmap_mdio_read(mdio_dev, reg & REGNUM_C22_MASK, val);
+	if (unlikely(reg & ~REGNUM_C22_MASK))
+		return -ENXIO;
+
+	return regmap_mdio_read(mdio_dev, reg, val);
 }
 
 static int regmap_mdio_c22_write(void *context, unsigned int reg, unsigned int val)
 {
 	struct mdio_device *mdio_dev = context;
 
-	return regmap_mdio_write(mdio_dev, reg & REGNUM_C22_MASK, val);
+	if (unlikely(reg & ~REGNUM_C22_MASK))
+		return -ENXIO;
+
+	return mdiobus_write(mdio_dev->bus, mdio_dev->addr, reg, val);
 }
 
 static const struct regmap_bus regmap_mdio_c22_bus = {
@@ -50,14 +56,20 @@ static int regmap_mdio_c45_read(void *context, unsigned int reg, unsigned int *v
 {
 	struct mdio_device *mdio_dev = context;
 
-	return regmap_mdio_read(mdio_dev, MII_ADDR_C45 | (reg & REGNUM_C45_MASK), val);
+	if (unlikely(reg & ~REGNUM_C45_MASK))
+		return -ENXIO;
+
+	return regmap_mdio_read(mdio_dev, MII_ADDR_C45 | reg, val);
 }
 
 static int regmap_mdio_c45_write(void *context, unsigned int reg, unsigned int val)
 {
 	struct mdio_device *mdio_dev = context;
 
-	return regmap_mdio_write(mdio_dev, MII_ADDR_C45 | (reg & REGNUM_C45_MASK), val);
+	if (unlikely(reg & ~REGNUM_C45_MASK))
+		return -ENXIO;
+
+	return regmap_mdio_write(mdio_dev, MII_ADDR_C45 | reg, val);
 }
 
 static const struct regmap_bus regmap_mdio_c45_bus = {
-- 
GitLab


From f422316c8e9d3c4aff3c56549dfb44a677d02f14 Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@nxp.com>
Date: Thu, 10 Jun 2021 17:24:33 +0800
Subject: [PATCH 2833/3804] spi: spi-nxp-fspi: move the register operation
 after the clock enable

Move the register operation after the clock enable, otherwise system
will stuck when this driver probe.

Fixes: 71d80563b076 ("spi: spi-nxp-fspi: fix fspi panic by unexpected interrupts")
Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://lore.kernel.org/r/1623317073-25158-1-git-send-email-haibo.chen@nxp.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-nxp-fspi.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
index 6e6c2403944dd..a66fa97046ee1 100644
--- a/drivers/spi/spi-nxp-fspi.c
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -1124,12 +1124,6 @@ static int nxp_fspi_probe(struct platform_device *pdev)
 		goto err_put_ctrl;
 	}
 
-	/* Clear potential interrupts */
-	reg = fspi_readl(f, f->iobase + FSPI_INTR);
-	if (reg)
-		fspi_writel(f, reg, f->iobase + FSPI_INTR);
-
-
 	/* find the resources - controller memory mapped space */
 	if (is_acpi_node(f->dev->fwnode))
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
@@ -1167,6 +1161,11 @@ static int nxp_fspi_probe(struct platform_device *pdev)
 		}
 	}
 
+	/* Clear potential interrupts */
+	reg = fspi_readl(f, f->iobase + FSPI_INTR);
+	if (reg)
+		fspi_writel(f, reg, f->iobase + FSPI_INTR);
+
 	/* find the irq */
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0)
-- 
GitLab


From 23f95199b6446f828bf879ca316f388893435faa Mon Sep 17 00:00:00 2001
From: ChiYuan Huang <cy_huang@richtek.com>
Date: Thu, 10 Jun 2021 22:38:30 +0800
Subject: [PATCH 2834/3804] regulator: rt6160: Remove dummy line and add module
 description

Remove dummy line and add module description.

Signed-off-by: ChiYuan Huang <cy_huang@richtek.com>
Link: https://lore.kernel.org/r/1623335910-5385-1-git-send-email-u0084500@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rt6160-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/rt6160-regulator.c b/drivers/regulator/rt6160-regulator.c
index 69550284083d8..b6b53868050f1 100644
--- a/drivers/regulator/rt6160-regulator.c
+++ b/drivers/regulator/rt6160-regulator.c
@@ -81,7 +81,6 @@ static int rt6160_disable(struct regulator_dev *rdev)
 	gpiod_set_value_cansleep(priv->enable_gpio, 0);
 
 	return 0;
-
 }
 
 static int rt6160_is_enabled(struct regulator_dev *rdev)
@@ -311,5 +310,6 @@ static struct i2c_driver rt6160_driver = {
 };
 module_i2c_driver(rt6160_driver);
 
+MODULE_DESCRIPTION("Richtek RT6160 voltage regulator driver");
 MODULE_AUTHOR("ChiYuan Huang <cy_huang@richtek.com>");
 MODULE_LICENSE("GPL v2");
-- 
GitLab


From 00430f71b2b18e42ba3d733cbd2d725ec5b2ca80 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 10 Jun 2021 21:41:28 +0800
Subject: [PATCH 2835/3804] regulator: hi6421v600: Use
 regulator_map_voltage_ascend

All the voltage tables have entries in ascendant order, so use
regulator_map_voltage_ascend to speed up the mapping.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210610134128.2477821-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/hi6421v600-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c
index feddb0b5d4f1c..417cf5b4a1c39 100644
--- a/drivers/regulator/hi6421v600-regulator.c
+++ b/drivers/regulator/hi6421v600-regulator.c
@@ -166,7 +166,7 @@ static const struct regulator_ops hi6421_spmi_ldo_rops = {
 	.enable = hi6421_spmi_regulator_enable,
 	.disable = regulator_disable_regmap,
 	.list_voltage = regulator_list_voltage_table,
-	.map_voltage = regulator_map_voltage_iterate,
+	.map_voltage = regulator_map_voltage_ascend,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 	.get_mode = hi6421_spmi_regulator_get_mode,
-- 
GitLab


From 3acbacfcb3a78eb53d6fa7bc1599dcdaf043465e Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 1 Jun 2021 20:07:21 +0800
Subject: [PATCH 2836/3804] spi: spi-mem: fix doc warning in spi-mem.c

Fix the following make W=1 warning:

  drivers/spi/spi-mem.c:819: warning: expecting prototype for spi_mem_driver_unregister_with_owner(). Prototype was for spi_mem_driver_unregister() instead

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20210601120721.3198488-1-yangyingliang@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 177b3e21febfd..37f4443ce9a09 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -896,7 +896,7 @@ int spi_mem_driver_register_with_owner(struct spi_mem_driver *memdrv,
 EXPORT_SYMBOL_GPL(spi_mem_driver_register_with_owner);
 
 /**
- * spi_mem_driver_unregister_with_owner() - Unregister a SPI memory driver
+ * spi_mem_driver_unregister() - Unregister a SPI memory driver
  * @memdrv: the SPI memory driver to unregister
  *
  * Unregisters a SPI memory driver.
-- 
GitLab


From 95730d5eb73170a6d225a9998c478be273598634 Mon Sep 17 00:00:00 2001
From: zpershuai <zpershuai@gmail.com>
Date: Sun, 13 Jun 2021 13:29:32 +0800
Subject: [PATCH 2837/3804] spi: meson-spicc: fix a wrong goto jump for
 avoiding memory leak.

In meson_spifc_probe function, when enable the device pclk clock is
error, it should use clk_disable_unprepare to release the core clock.

Signed-off-by: zpershuai <zpershuai@gmail.com>
Reviewed-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lore.kernel.org/r/1623562172-22056-1-git-send-email-zpershuai@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-meson-spicc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index ecba6b4a5d85d..51aef2c6e9668 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -725,7 +725,7 @@ static int meson_spicc_probe(struct platform_device *pdev)
 	ret = clk_prepare_enable(spicc->pclk);
 	if (ret) {
 		dev_err(&pdev->dev, "pclk clock enable failed\n");
-		goto out_master;
+		goto out_core_clk;
 	}
 
 	device_reset_optional(&pdev->dev);
@@ -764,9 +764,11 @@ static int meson_spicc_probe(struct platform_device *pdev)
 	return 0;
 
 out_clk:
-	clk_disable_unprepare(spicc->core);
 	clk_disable_unprepare(spicc->pclk);
 
+out_core_clk:
+	clk_disable_unprepare(spicc->core);
+
 out_master:
 	spi_master_put(master);
 
-- 
GitLab


From b2d501c13470409ee7613855b17e5e5ec4111e1c Mon Sep 17 00:00:00 2001
From: zpershuai <zpershuai@gmail.com>
Date: Sun, 13 Jun 2021 13:29:16 +0800
Subject: [PATCH 2838/3804] spi: meson-spicc: fix memory leak in
 meson_spicc_probe

when meson_spicc_clk_init returns failed, it should goto the
out_clk label.

Signed-off-by: zpershuai <zpershuai@gmail.com>
Reviewed-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lore.kernel.org/r/1623562156-21995-1-git-send-email-zpershuai@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-meson-spicc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index 51aef2c6e9668..b2c4621db34d7 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -752,7 +752,7 @@ static int meson_spicc_probe(struct platform_device *pdev)
 	ret = meson_spicc_clk_init(spicc);
 	if (ret) {
 		dev_err(&pdev->dev, "clock registration failed\n");
-		goto out_master;
+		goto out_clk;
 	}
 
 	ret = devm_spi_register_master(&pdev->dev, master);
-- 
GitLab


From d7c176e9b5329b4a490b3d8ea49564fc9ff11071 Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas@google.com>
Date: Wed, 9 Jun 2021 19:50:58 +0000
Subject: [PATCH 2839/3804] docs: printk-formats: update size-casting examples

Since commit 72deb455b5ec ("block: remove CONFIG_LBDAF") sector_t and
blkcnt_t types are no longer variable in size, making them unsuitable
examples for casting to the largest possible type. This patch replaces
such examples with cycles_t and blk_status_t types, whose sizes depend
on architecture and config options respectively.

Signed-off-by: Carlos Llamas <cmllamas@google.com>
Link: https://lore.kernel.org/r/20210609195058.3518943-1-cmllamas@google.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/printk-formats.rst | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index f063a384c7c8e..385c0cc52f1f5 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -37,14 +37,13 @@ Integer types
 		u64			%llu or %llx
 
 
-If <type> is dependent on a config option for its size (e.g., sector_t,
-blkcnt_t) or is architecture-dependent for its size (e.g., tcflag_t), use a
-format specifier of its largest possible type and explicitly cast to it.
+If <type> is architecture-dependent for its size (e.g., cycles_t, tcflag_t) or
+is dependent on a config option for its size (e.g., blk_status_t), use a format
+specifier of its largest possible type and explicitly cast to it.
 
 Example::
 
-	printk("test: sector number/total blocks: %llu/%llu\n",
-		(unsigned long long)sector, (unsigned long long)blockcount);
+	printk("test: latency: %llu cycles\n", (unsigned long long)time);
 
 Reminder: sizeof() returns type size_t.
 
-- 
GitLab


From b1f4c363666c31f289b26bfc7c38378f0db79b55 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Wed, 9 Jun 2021 16:32:18 +0800
Subject: [PATCH 2840/3804] Documentation: kdump: update kdump guide

Some parts of the guide are aged, hence need be updated.

1) The backup area of the 1st 640K on X86_64 has been removed
   by below commits, update the description accordingly.

   commit 7c321eb2b843 ("x86/kdump: Remove the backup region handling")
   commit 6f599d84231f ("x86/kdump: Always reserve the low 1M when the crashkernel option is specified")

2) Sort out the descripiton of "crashkernel syntax" part.

3) And some other minor cleanups.

Signed-off-by: Baoquan He <bhe@redhat.com>
Acked-by: Dave Young <dyoung@redhat.com>
Link: https://lore.kernel.org/r/20210609083218.GB591017@MiWiFi-R3L-srv
[jc: added blank line to fix added build warning]
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/kdump/kdump.rst | 170 ++++++++++++++--------
 1 file changed, 109 insertions(+), 61 deletions(-)

diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst
index 75a9dd98e76eb..cb30ca3df27c9 100644
--- a/Documentation/admin-guide/kdump/kdump.rst
+++ b/Documentation/admin-guide/kdump/kdump.rst
@@ -2,7 +2,7 @@
 Documentation for Kdump - The kexec-based Crash Dumping Solution
 ================================================================
 
-This document includes overview, setup and installation, and analysis
+This document includes overview, setup, installation, and analysis
 information.
 
 Overview
@@ -13,9 +13,9 @@ dump of the system kernel's memory needs to be taken (for example, when
 the system panics). The system kernel's memory image is preserved across
 the reboot and is accessible to the dump-capture kernel.
 
-You can use common commands, such as cp and scp, to copy the
-memory image to a dump file on the local disk, or across the network to
-a remote system.
+You can use common commands, such as cp, scp or makedumpfile to copy
+the memory image to a dump file on the local disk, or across the network
+to a remote system.
 
 Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
 s390x, arm and arm64 architectures.
@@ -26,13 +26,15 @@ the dump-capture kernel. This ensures that ongoing Direct Memory Access
 The kexec -p command loads the dump-capture kernel into this reserved
 memory.
 
-On x86 machines, the first 640 KB of physical memory is needed to boot,
-regardless of where the kernel loads. Therefore, kexec backs up this
-region just before rebooting into the dump-capture kernel.
+On x86 machines, the first 640 KB of physical memory is needed for boot,
+regardless of where the kernel loads. For simpler handling, the whole
+low 1M is reserved to avoid any later kernel or device driver writing
+data into this area. Like this, the low 1M can be reused as system RAM
+by kdump kernel without extra handling.
 
-Similarly on PPC64 machines first 32KB of physical memory is needed for
-booting regardless of where the kernel is loaded and to support 64K page
-size kexec backs up the first 64KB memory.
+On PPC64 machines first 32KB of physical memory is needed for booting
+regardless of where the kernel is loaded and to support 64K page size
+kexec backs up the first 64KB memory.
 
 For s390x, when kdump is triggered, the crashkernel region is exchanged
 with the region [0, crashkernel region size] and then the kdump kernel
@@ -46,14 +48,14 @@ passed to the dump-capture kernel through the elfcorehdr= boot
 parameter. Optionally the size of the ELF header can also be passed
 when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
 
-
 With the dump-capture kernel, you can access the memory image through
 /proc/vmcore. This exports the dump as an ELF-format file that you can
-write out using file copy commands such as cp or scp. Further, you can
-use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
-debug the dump file. This method ensures that the dump pages are correctly
-ordered.
-
+write out using file copy commands such as cp or scp. You can also use
+makedumpfile utility to analyze and write out filtered contents with
+options, e.g with '-d 31' it will only write out kernel data. Further,
+you can use analysis tools such as the GNU Debugger (GDB) and the Crash
+tool to debug the dump file. This method ensures that the dump pages are
+correctly ordered.
 
 Setup and Installation
 ======================
@@ -125,9 +127,18 @@ dump-capture kernels for enabling kdump support.
 System kernel config options
 ----------------------------
 
-1) Enable "kexec system call" in "Processor type and features."::
+1) Enable "kexec system call" or "kexec file based system call" in
+   "Processor type and features."::
+
+	CONFIG_KEXEC=y or CONFIG_KEXEC_FILE=y
+
+   And both of them will select KEXEC_CORE::
 
-	CONFIG_KEXEC=y
+	CONFIG_KEXEC_CORE=y
+
+   Subsequently, CRASH_CORE is selected by KEXEC_CORE::
+
+	CONFIG_CRASH_CORE=y
 
 2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
    filesystems." This is usually enabled by default::
@@ -175,17 +186,19 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
 
 	CONFIG_HIGHMEM4G
 
-2) On i386 and x86_64, disable symmetric multi-processing support
-   under "Processor type and features"::
+2) With CONFIG_SMP=y, usually nr_cpus=1 need specified on the kernel
+   command line when loading the dump-capture kernel because one
+   CPU is enough for kdump kernel to dump vmcore on most of systems.
 
-	CONFIG_SMP=n
+   However, you can also specify nr_cpus=X to enable multiple processors
+   in kdump kernel. In this case, "disable_cpu_apicid=" is needed to
+   tell kdump kernel which cpu is 1st kernel's BSP. Please refer to
+   admin-guide/kernel-parameters.txt for more details.
 
-   (If CONFIG_SMP=y, then specify maxcpus=1 on the kernel command line
-   when loading the dump-capture kernel, see section "Load the Dump-capture
-   Kernel".)
+   With CONFIG_SMP=n, the above things are not related.
 
-3) If one wants to build and use a relocatable kernel,
-   Enable "Build a relocatable kernel" support under "Processor type and
+3) A relocatable kernel is suggested to be built by default. If not yet,
+   enable "Build a relocatable kernel" support under "Processor type and
    features"::
 
 	CONFIG_RELOCATABLE=y
@@ -232,7 +245,7 @@ Dump-capture kernel config options (Arch Dependent, ia64)
   as a dump-capture kernel if desired.
 
   The crashkernel region can be automatically placed by the system
-  kernel at run time. This is done by specifying the base address as 0,
+  kernel at runtime. This is done by specifying the base address as 0,
   or omitting it all together::
 
 	crashkernel=256M@0
@@ -241,10 +254,6 @@ Dump-capture kernel config options (Arch Dependent, ia64)
 
 	crashkernel=256M
 
-  If the start address is specified, note that the start address of the
-  kernel will be aligned to 64Mb, so if the start address is not then
-  any space below the alignment point will be wasted.
-
 Dump-capture kernel config options (Arch Dependent, arm)
 ----------------------------------------------------------
 
@@ -260,46 +269,82 @@ Dump-capture kernel config options (Arch Dependent, arm64)
   on non-VHE systems even if it is configured. This is because the CPU
   will not be reset to EL2 on panic.
 
-Extended crashkernel syntax
+crashkernel syntax
 ===========================
+1) crashkernel=size@offset
 
-While the "crashkernel=size[@offset]" syntax is sufficient for most
-configurations, sometimes it's handy to have the reserved memory dependent
-on the value of System RAM -- that's mostly for distributors that pre-setup
-the kernel command line to avoid a unbootable system after some memory has
-been removed from the machine.
+   Here 'size' specifies how much memory to reserve for the dump-capture kernel
+   and 'offset' specifies the beginning of this reserved memory. For example,
+   "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
+   starting at physical address 0x01000000 (16MB) for the dump-capture kernel.
 
-The syntax is::
+   The crashkernel region can be automatically placed by the system
+   kernel at run time. This is done by specifying the base address as 0,
+   or omitting it all together::
 
-    crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
-    range=start-[end]
+         crashkernel=256M@0
 
-For example::
+   or::
 
-    crashkernel=512M-2G:64M,2G-:128M
+         crashkernel=256M
 
-This would mean:
+   If the start address is specified, note that the start address of the
+   kernel will be aligned to a value (which is Arch dependent), so if the
+   start address is not then any space below the alignment point will be
+   wasted.
 
-    1) if the RAM is smaller than 512M, then don't reserve anything
-       (this is the "rescue" case)
-    2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
-    3) if the RAM size is larger than 2G, then reserve 128M
+2) range1:size1[,range2:size2,...][@offset]
 
+   While the "crashkernel=size[@offset]" syntax is sufficient for most
+   configurations, sometimes it's handy to have the reserved memory dependent
+   on the value of System RAM -- that's mostly for distributors that pre-setup
+   the kernel command line to avoid a unbootable system after some memory has
+   been removed from the machine.
 
+   The syntax is::
 
-Boot into System Kernel
-=======================
+       crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+       range=start-[end]
+
+   For example::
+
+       crashkernel=512M-2G:64M,2G-:128M
 
+   This would mean:
+
+       1) if the RAM is smaller than 512M, then don't reserve anything
+          (this is the "rescue" case)
+       2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
+       3) if the RAM size is larger than 2G, then reserve 128M
+
+3) crashkernel=size,high and crashkernel=size,low
+
+   If memory above 4G is preferred, crashkernel=size,high can be used to
+   fulfill that. With it, physical memory is allowed to be allocated from top,
+   so could be above 4G if system has more than 4G RAM installed. Otherwise,
+   memory region will be allocated below 4G if available.
+
+   When crashkernel=X,high is passed, kernel could allocate physical memory
+   region above 4G, low memory under 4G is needed in this case. There are
+   three ways to get low memory:
+
+      1) Kernel will allocate at least 256M memory below 4G automatically
+         if crashkernel=Y,low is not specified.
+      2) Let user specify low memory size instead.
+      3) Specified value 0 will disable low memory allocation::
+
+            crashkernel=0,low
+
+Boot into System Kernel
+-----------------------
 1) Update the boot loader (such as grub, yaboot, or lilo) configuration
    files as necessary.
 
-2) Boot the system kernel with the boot parameter "crashkernel=Y@X",
-   where Y specifies how much memory to reserve for the dump-capture kernel
-   and X specifies the beginning of this reserved memory. For example,
-   "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
-   starting at physical address 0x01000000 (16MB) for the dump-capture kernel.
+2) Boot the system kernel with the boot parameter "crashkernel=Y@X".
 
-   On x86 and x86_64, use "crashkernel=64M@16M".
+   On x86 and x86_64, use "crashkernel=Y[@X]". Most of the time, the
+   start address 'X' is not necessary, kernel will search a suitable
+   area. Unless an explicit start address is expected.
 
    On ppc64, use "crashkernel=128M@32M".
 
@@ -331,8 +376,8 @@ of dump-capture kernel. Following is the summary.
 
 For i386 and x86_64:
 
-	- Use vmlinux if kernel is not relocatable.
 	- Use bzImage/vmlinuz if kernel is relocatable.
+	- Use vmlinux if kernel is not relocatable.
 
 For ppc64:
 
@@ -392,7 +437,7 @@ loading dump-capture kernel.
 
 For i386, x86_64 and ia64:
 
-	"1 irqpoll maxcpus=1 reset_devices"
+	"1 irqpoll nr_cpus=1 reset_devices"
 
 For ppc64:
 
@@ -400,7 +445,7 @@ For ppc64:
 
 For s390x:
 
-	"1 maxcpus=1 cgroup_disable=memory"
+	"1 nr_cpus=1 cgroup_disable=memory"
 
 For arm:
 
@@ -408,7 +453,7 @@ For arm:
 
 For arm64:
 
-	"1 maxcpus=1 reset_devices"
+	"1 nr_cpus=1 reset_devices"
 
 Notes on loading the dump-capture kernel:
 
@@ -488,6 +533,10 @@ the following command::
 
    cp /proc/vmcore <dump-file>
 
+You can also use makedumpfile utility to write out the dump file
+with specified options to filter out unwanted contents, e.g::
+
+   makedumpfile -l --message-level 1 -d 31 /proc/vmcore <dump-file>
 
 Analysis
 ========
@@ -535,8 +584,7 @@ This will cause a kdump to occur at the add_taint()->panic() call.
 Contact
 =======
 
-- Vivek Goyal (vgoyal@redhat.com)
-- Maneesh Soni (maneesh@in.ibm.com)
+- kexec@lists.infradead.org
 
 GDB macros
 ==========
-- 
GitLab


From f6bca4d91b2ea052e917cca3f9d866b5cc1d500a Mon Sep 17 00:00:00 2001
From: Timo Sigurdsson <public_timo.s@silentcreek.de>
Date: Mon, 14 Jun 2021 09:25:39 +0200
Subject: [PATCH 2841/3804] ata: ahci_sunxi: Disable DIPM

DIPM is unsupported or broken on sunxi. Trying to enable the power
management policy med_power_with_dipm on an Allwinner A20 SoC based board
leads to immediate I/O errors and the attached SATA disk disappears from
the /dev filesystem. A reset (power cycle) is required to make the SATA
controller or disk work again. The A10 and A20 SoC data sheets and manuals
don't mention DIPM at all [1], so it's fair to assume that it's simply not
supported. But even if it was, it should be considered broken and best be
disabled in the ahci_sunxi driver.

[1] https://github.com/allwinner-zh/documents/tree/master/

Fixes: c5754b5220f0 ("ARM: sunxi: Add support for Allwinner SUNXi SoCs sata to ahci_platform")
Cc: stable@vger.kernel.org
Signed-off-by: Timo Sigurdsson <public_timo.s@silentcreek.de>
Tested-by: Timo Sigurdsson <public_timo.s@silentcreek.de>
Link: https://lore.kernel.org/r/20210614072539.3307-1-public_timo.s@silentcreek.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/ahci_sunxi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c
index cb69b737cb499..56b695136977a 100644
--- a/drivers/ata/ahci_sunxi.c
+++ b/drivers/ata/ahci_sunxi.c
@@ -200,7 +200,7 @@ static void ahci_sunxi_start_engine(struct ata_port *ap)
 }
 
 static const struct ata_port_info ahci_sunxi_port_info = {
-	.flags		= AHCI_FLAG_COMMON | ATA_FLAG_NCQ,
+	.flags		= AHCI_FLAG_COMMON | ATA_FLAG_NCQ | ATA_FLAG_NO_DIPM,
 	.pio_mask	= ATA_PIO4,
 	.udma_mask	= ATA_UDMA6,
 	.port_ops	= &ahci_platform_ops,
-- 
GitLab


From 4f667b8e049e716a0533fc927f50310fe6e40d22 Mon Sep 17 00:00:00 2001
From: Tyson Moore <tyson@tyson.me>
Date: Sat, 12 Jun 2021 02:54:11 -0400
Subject: [PATCH 2842/3804] sch_cake: revise docs for RFC 8622 LE PHB support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit b8392808eb3fc28e ("sch_cake: add RFC 8622 LE PHB support to CAKE
diffserv handling") added the LE mark to the Bulk tin. Update the
comments to reflect the change.

Signed-off-by: Tyson Moore <tyson@tyson.me>
Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cake.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 5c15968b5155b..951542843cab2 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -2342,7 +2342,7 @@ static int cake_config_precedence(struct Qdisc *sch)
 
 /*	List of known Diffserv codepoints:
  *
- *	Least Effort (CS1)
+ *	Least Effort (CS1, LE)
  *	Best Effort (CS0)
  *	Max Reliability & LLT "Lo" (TOS1)
  *	Max Throughput (TOS2)
@@ -2364,7 +2364,7 @@ static int cake_config_precedence(struct Qdisc *sch)
  *	Total 25 codepoints.
  */
 
-/*	List of traffic classes in RFC 4594:
+/*	List of traffic classes in RFC 4594, updated by RFC 8622:
  *		(roughly descending order of contended priority)
  *		(roughly ascending order of uncontended throughput)
  *
@@ -2379,7 +2379,7 @@ static int cake_config_precedence(struct Qdisc *sch)
  *	Ops, Admin, Management (CS2,TOS1) - eg. ssh
  *	Standard Service (CS0 & unrecognised codepoints)
  *	High Throughput Data (AF1x,TOS2)  - eg. web traffic
- *	Low Priority Data (CS1)           - eg. BitTorrent
+ *	Low Priority Data (CS1,LE)        - eg. BitTorrent
 
  *	Total 12 traffic classes.
  */
@@ -2395,7 +2395,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
  *		Video Streaming          (AF4x, AF3x, CS3)
  *		Bog Standard             (CS0 etc.)
  *		High Throughput          (AF1x, TOS2)
- *		Background Traffic       (CS1)
+ *		Background Traffic       (CS1, LE)
  *
  *		Total 8 traffic classes.
  */
@@ -2439,7 +2439,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
  *	    Latency Sensitive  (CS7, CS6, EF, VA, CS5, CS4)
  *	    Streaming Media    (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
  *	    Best Effort        (CS0, AF1x, TOS2, and those not specified)
- *	    Background Traffic (CS1)
+ *	    Background Traffic (CS1, LE)
  *
  *		Total 4 traffic classes.
  */
@@ -2477,7 +2477,7 @@ static int cake_config_diffserv4(struct Qdisc *sch)
 static int cake_config_diffserv3(struct Qdisc *sch)
 {
 /*  Simplified Diffserv structure with 3 tins.
- *		Low Priority		(CS1)
+ *		Low Priority		(CS1, LE)
  *		Best Effort
  *		Latency Sensitive	(TOS4, VA, EF, CS6, CS7)
  */
-- 
GitLab


From 994c393bb6886d6d94d628475b274a8cb3fc67a4 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 11 Jun 2021 13:26:00 -0500
Subject: [PATCH 2843/3804] net: qualcomm: rmnet: don't over-count statistics

The purpose of the loop using u64_stats_fetch_*_irq() is to ensure
statistics on a given CPU are collected atomically. If one of the
statistics values gets updated within the begin/retry window, the
loop will run again.

Currently the statistics totals are updated inside that window.
This means that if the loop ever retries, the statistics for the
CPU will be counted more than once.

Fix this by taking a snapshot of a CPU's statistics inside the
protected window, and then updating the counters with the snapshot
values after exiting the loop.

(Also add a newline at the end of this file...)

Fixes: 192c4b5d48f2a ("net: qualcomm: rmnet: Add support for 64 bit stats")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qualcomm/rmnet/rmnet_vnd.c    | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 41fbd2ceeede4..ab1e0fcccabb6 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -126,24 +126,24 @@ static void rmnet_get_stats64(struct net_device *dev,
 			      struct rtnl_link_stats64 *s)
 {
 	struct rmnet_priv *priv = netdev_priv(dev);
-	struct rmnet_vnd_stats total_stats;
+	struct rmnet_vnd_stats total_stats = { };
 	struct rmnet_pcpu_stats *pcpu_ptr;
+	struct rmnet_vnd_stats snapshot;
 	unsigned int cpu, start;
 
-	memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
-
 	for_each_possible_cpu(cpu) {
 		pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
 
 		do {
 			start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
-			total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts;
-			total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes;
-			total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts;
-			total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes;
+			snapshot = pcpu_ptr->stats;	/* struct assignment */
 		} while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start));
 
-		total_stats.tx_drops += pcpu_ptr->stats.tx_drops;
+		total_stats.rx_pkts += snapshot.rx_pkts;
+		total_stats.rx_bytes += snapshot.rx_bytes;
+		total_stats.tx_pkts += snapshot.tx_pkts;
+		total_stats.tx_bytes += snapshot.tx_bytes;
+		total_stats.tx_drops += snapshot.tx_drops;
 	}
 
 	s->rx_packets = total_stats.rx_pkts;
@@ -354,4 +354,4 @@ int rmnet_vnd_update_dev_mtu(struct rmnet_port *port,
 	}
 
 	return 0;
-}
\ No newline at end of file
+}
-- 
GitLab


From e175aef902697826d344ce3a12189329848fe898 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 11 Jun 2021 18:49:48 -0700
Subject: [PATCH 2844/3804] ethtool: strset: fix message length calculation

Outer nest for ETHTOOL_A_STRSET_STRINGSETS is not accounted for.
This may result in ETHTOOL_MSG_STRSET_GET producing a warning like:

    calculated message payload length (684) not sufficient
    WARNING: CPU: 0 PID: 30967 at net/ethtool/netlink.c:369 ethnl_default_doit+0x87a/0xa20

and a splat.

As usually with such warnings three conditions must be met for the warning
to trigger:
 - there must be no skb size rounding up (e.g. reply_size of 684);
 - string set must be per-device (so that the header gets populated);
 - the device name must be at least 12 characters long.

all in all with current user space it looks like reading priv flags
is the only place this could potentially happen. Or with syzbot :)

Reported-by: syzbot+59aa77b92d06cd5a54f2@syzkaller.appspotmail.com
Fixes: 71921690f974 ("ethtool: provide string sets with STRSET_GET request")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/strset.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index b3029fff715dd..2d51b7ab4dc52 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -353,6 +353,8 @@ static int strset_reply_size(const struct ethnl_req_info *req_base,
 	int len = 0;
 	int ret;
 
+	len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */
+
 	for (i = 0; i < ETH_SS_COUNT; i++) {
 		const struct strset_info *set_info = &data->sets[i];
 
-- 
GitLab


From cb3376604a676e0302258b01893911bdd7aa5278 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 12 Jun 2021 14:37:46 +0200
Subject: [PATCH 2845/3804] qlcnic: Fix an error handling path in
 'qlcnic_probe()'

If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
call, as already done in the remove function.

Fixes: 451724c821c1 ("qlcnic: aer support")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 96b947fde646b..3beafc60747e6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -2690,6 +2690,7 @@ err_out_free_hw_res:
 	kfree(ahw);
 
 err_out_free_res:
+	pci_disable_pcie_error_reporting(pdev);
 	pci_release_regions(pdev);
 
 err_out_disable_pdev:
-- 
GitLab


From 49a10c7b176295f8fafb338911cf028e97f65f4d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 12 Jun 2021 14:53:12 +0200
Subject: [PATCH 2846/3804] netxen_nic: Fix an error handling path in
 'netxen_nic_probe()'

If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
call, as already done in the remove function.

Fixes: e87ad5539343 ("netxen: support pci error handlers")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 7e6bac85495d3..344ea11434549 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -1602,6 +1602,8 @@ err_out_free_netdev:
 	free_netdev(netdev);
 
 err_out_free_res:
+	if (NX_IS_REVISION_P3(pdev->revision))
+		pci_disable_pcie_error_reporting(pdev);
 	pci_release_regions(pdev);
 
 err_out_disable_pdev:
-- 
GitLab


From 09427c1915f754ebe7d3d8e54e79bbee48afe916 Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Sat, 12 Jun 2021 19:20:44 +0530
Subject: [PATCH 2847/3804] cxgb4: fix wrong ethtool n-tuple rule lookup

The TID returned during successful filter creation is relative to
the region in which the filter is created. Using it directly always
returns Hi Prio/Normal filter region's entry for the first couple of
entries, even though the rule is actually inserted in Hash region.
Fix by analyzing in which region the filter has been inserted and
save the absolute TID to be used for lookup later.

Fixes: db43b30cd89c ("cxgb4: add ethtool n-tuple filter deletion")
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/chelsio/cxgb4/cxgb4_ethtool.c    | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index df20485b57443..83ed10ac86606 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -1624,16 +1624,14 @@ static struct filter_entry *cxgb4_get_filter_entry(struct adapter *adap,
 						   u32 ftid)
 {
 	struct tid_info *t = &adap->tids;
-	struct filter_entry *f;
 
-	if (ftid < t->nhpftids)
-		f = &adap->tids.hpftid_tab[ftid];
-	else if (ftid < t->nftids)
-		f = &adap->tids.ftid_tab[ftid - t->nhpftids];
-	else
-		f = lookup_tid(&adap->tids, ftid);
+	if (ftid >= t->hpftid_base && ftid < t->hpftid_base + t->nhpftids)
+		return &t->hpftid_tab[ftid - t->hpftid_base];
+
+	if (ftid >= t->ftid_base && ftid < t->ftid_base + t->nftids)
+		return &t->ftid_tab[ftid - t->ftid_base];
 
-	return f;
+	return lookup_tid(t, ftid);
 }
 
 static void cxgb4_fill_filter_rule(struct ethtool_rx_flow_spec *fs,
@@ -1840,6 +1838,11 @@ static int cxgb4_ntuple_del_filter(struct net_device *dev,
 	filter_id = filter_info->loc_array[cmd->fs.location];
 	f = cxgb4_get_filter_entry(adapter, filter_id);
 
+	if (f->fs.prio)
+		filter_id -= adapter->tids.hpftid_base;
+	else if (!f->fs.hash)
+		filter_id -= (adapter->tids.ftid_base - adapter->tids.nhpftids);
+
 	ret = cxgb4_flow_rule_destroy(dev, f->fs.tc_prio, &f->fs, filter_id);
 	if (ret)
 		goto err;
@@ -1899,6 +1902,11 @@ static int cxgb4_ntuple_set_filter(struct net_device *netdev,
 
 	filter_info = &adapter->ethtool_filters->port[pi->port_id];
 
+	if (fs.prio)
+		tid += adapter->tids.hpftid_base;
+	else if (!fs.hash)
+		tid += (adapter->tids.ftid_base - adapter->tids.nhpftids);
+
 	filter_info->loc_array[cmd->fs.location] = tid;
 	set_bit(cmd->fs.location, filter_info->bmap);
 	filter_info->in_use++;
-- 
GitLab


From 58af3d3d54e87bfc1f936e16c04ade3369d34011 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Sat, 12 Jun 2021 17:51:22 +0300
Subject: [PATCH 2848/3804] net: caif: fix memory leak in ldisc_open

Syzbot reported memory leak in tty_init_dev().
The problem was in unputted tty in ldisc_open()

static int ldisc_open(struct tty_struct *tty)
{
...
	ser->tty = tty_kref_get(tty);
...
	result = register_netdevice(dev);
	if (result) {
		rtnl_unlock();
		free_netdev(dev);
		return -ENODEV;
	}
...
}

Ser pointer is netdev private_data, so after free_netdev()
this pointer goes away with unputted tty reference. So, fix
it by adding tty_kref_put() before freeing netdev.

Reported-and-tested-by: syzbot+f303e045423e617d2cad@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/caif/caif_serial.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index d17482395a4da..4ffbfd534f182 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -350,6 +350,7 @@ static int ldisc_open(struct tty_struct *tty)
 	rtnl_lock();
 	result = register_netdevice(dev);
 	if (result) {
+		tty_kref_put(tty);
 		rtnl_unlock();
 		free_netdev(dev);
 		return -ENODEV;
-- 
GitLab


From b87b04f5019e821c8c6c7761f258402e43500a1f Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Sat, 12 Jun 2021 18:24:59 -0600
Subject: [PATCH 2849/3804] ipv4: Fix device used for dst_alloc with local
 routes

Oliver reported a use case where deleting a VRF device can hang
waiting for the refcnt to drop to 0. The root cause is that the dst
is allocated against the VRF device but cached on the loopback
device.

The use case (added to the selftests) has an implicit VRF crossing
due to the ordering of the FIB rules (lookup local is before the
l3mdev rule, but the problem occurs even if the FIB rules are
re-ordered with local after l3mdev because the VRF table does not
have a default route to terminate the lookup). The end result is
is that the FIB lookup returns the loopback device as the nexthop,
but the ingress device is in a VRF. The mismatch causes the dst
alloc against the VRF device but then cached on the loopback.

The fix is to bring the trick used for IPv6 (see ip6_rt_get_dev_rcu):
pick the dst alloc device based the fib lookup result but with checks
that the result has a nexthop device (e.g., not an unreachable or
prohibit entry).

Fixes: f5a0aab84b74 ("net: ipv4: dst for local input routes should use l3mdev if relevant")
Reported-by: Oliver Herms <oliver.peter.herms@gmail.com>
Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c                         | 15 +++++++++++++-
 tools/testing/selftests/net/fib_tests.sh | 25 ++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f6787c55f6ab9..6a36ac98476fa 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2056,6 +2056,19 @@ martian_source:
 	return err;
 }
 
+/* get device for dst_alloc with local routes */
+static struct net_device *ip_rt_get_dev(struct net *net,
+					const struct fib_result *res)
+{
+	struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
+	struct net_device *dev = NULL;
+
+	if (nhc)
+		dev = l3mdev_master_dev_rcu(nhc->nhc_dev);
+
+	return dev ? : net->loopback_dev;
+}
+
 /*
  *	NOTE. We drop all the packets that has local source
  *	addresses, because every properly looped back packet
@@ -2212,7 +2225,7 @@ local_input:
 		}
 	}
 
-	rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
+	rth = rt_dst_alloc(ip_rt_get_dev(net, res),
 			   flags | RTCF_LOCAL, res->type,
 			   IN_DEV_ORCONF(in_dev, NOPOLICY), false);
 	if (!rth)
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 76d9487fb03cc..5abe92d55b696 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1384,12 +1384,37 @@ ipv4_rt_replace()
 	ipv4_rt_replace_mpath
 }
 
+# checks that cached input route on VRF port is deleted
+# when VRF is deleted
+ipv4_local_rt_cache()
+{
+	run_cmd "ip addr add 10.0.0.1/32 dev lo"
+	run_cmd "ip netns add test-ns"
+	run_cmd "ip link add veth-outside type veth peer name veth-inside"
+	run_cmd "ip link add vrf-100 type vrf table 1100"
+	run_cmd "ip link set veth-outside master vrf-100"
+	run_cmd "ip link set veth-inside netns test-ns"
+	run_cmd "ip link set veth-outside up"
+	run_cmd "ip link set vrf-100 up"
+	run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
+	run_cmd "ip netns exec test-ns ip link set veth-inside up"
+	run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside"
+	run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside"
+	run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1"
+	run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1"
+	run_cmd "ip link delete vrf-100"
+
+	# if we do not hang test is a success
+	log_test $? 0 "Cached route removed from VRF port device"
+}
+
 ipv4_route_test()
 {
 	route_setup
 
 	ipv4_rt_add
 	ipv4_rt_replace
+	ipv4_local_rt_cache
 
 	route_cleanup
 }
-- 
GitLab


From ad9d24c9429e2159d1e279dc3a83191ccb4daf1d Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Mon, 14 Jun 2021 15:06:50 +0300
Subject: [PATCH 2850/3804] net: qrtr: fix OOB Read in qrtr_endpoint_post

Syzbot reported slab-out-of-bounds Read in
qrtr_endpoint_post. The problem was in wrong
_size_ type:

	if (len != ALIGN(size, 4) + hdrlen)
		goto err;

If size from qrtr_hdr is 4294967293 (0xfffffffd), the result of
ALIGN(size, 4) will be 0. In case of len == hdrlen and size == 4294967293
in header this check won't fail and

	skb_put_data(skb, data + hdrlen, size);

will read out of bound from data, which is hdrlen allocated block.

Fixes: 194ccc88297a ("net: qrtr: Support decoding incoming v2 packets")
Reported-and-tested-by: syzbot+1917d778024161609247@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/qrtr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c0477bec09bda..f2efaa4225f91 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -436,7 +436,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
 	struct qrtr_sock *ipc;
 	struct sk_buff *skb;
 	struct qrtr_cb *cb;
-	unsigned int size;
+	size_t size;
 	unsigned int ver;
 	size_t hdrlen;
 
-- 
GitLab


From 995fca15b73ff8f92888cc2d5d95f17ffdac74ba Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Mon, 14 Jun 2021 10:46:44 -0700
Subject: [PATCH 2851/3804] Bluetooth: SMP: Fix crash when receiving new
 connection when debug is enabled

When receiving a new connection pchan->conn won't be initialized so the
code cannot use bt_dev_dbg as the pointer to hci_dev won't be
accessible.

Fixes: 2e1614f7d61e4 ("Bluetooth: SMP: Convert BT_ERR/BT_DBG to bt_dev_err/bt_dev_dbg")
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 372e3b25aaa4c..7dd51da738454 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3229,7 +3229,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
 {
 	struct l2cap_chan *chan;
 
-	bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan);
+	BT_DBG("pchan %p", pchan);
 
 	chan = l2cap_chan_create();
 	if (!chan)
@@ -3250,7 +3250,7 @@ static inline struct l2cap_chan *smp_new_conn_cb(struct l2cap_chan *pchan)
 	 */
 	atomic_set(&chan->nesting, L2CAP_NESTING_SMP);
 
-	bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan);
+	BT_DBG("created chan %p", chan);
 
 	return chan;
 }
@@ -3354,7 +3354,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
 {
 	struct smp_dev *smp;
 
-	bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan);
+	BT_DBG("chan %p", chan);
 
 	smp = chan->data;
 	if (smp) {
-- 
GitLab


From a7b359fc6a37faaf472125867c8dc5a068c90982 Mon Sep 17 00:00:00 2001
From: Odin Ugedal <odin@uged.al>
Date: Sat, 12 Jun 2021 13:28:15 +0200
Subject: [PATCH 2852/3804] sched/fair: Correctly insert cfs_rq's to list on
 unthrottle

Fix an issue where fairness is decreased since cfs_rq's can end up not
being decayed properly. For two sibling control groups with the same
priority, this can often lead to a load ratio of 99/1 (!!).

This happens because when a cfs_rq is throttled, all the descendant
cfs_rq's will be removed from the leaf list. When they initial cfs_rq
is unthrottled, it will currently only re add descendant cfs_rq's if
they have one or more entities enqueued. This is not a perfect
heuristic.

Instead, we insert all cfs_rq's that contain one or more enqueued
entities, or it its load is not completely decayed.

Can often lead to situations like this for equally weighted control
groups:

  $ ps u -C stress
  USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
  root       10009 88.8  0.0   3676   100 pts/1    R+   11:04   0:13 stress --cpu 1
  root       10023  3.0  0.0   3676   104 pts/1    R+   11:04   0:00 stress --cpu 1

Fixes: 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
[vingo: !SMP build fix]
Signed-off-by: Odin Ugedal <odin@uged.al>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/20210612112815.61678-1-odin@uged.al
---
 kernel/sched/fair.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2c8a9352590d9..bfaa6e1f6067d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3298,6 +3298,24 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_FAIR_GROUP_SCHED
+
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+	if (cfs_rq->load.weight)
+		return false;
+
+	if (cfs_rq->avg.load_sum)
+		return false;
+
+	if (cfs_rq->avg.util_sum)
+		return false;
+
+	if (cfs_rq->avg.runnable_sum)
+		return false;
+
+	return true;
+}
+
 /**
  * update_tg_load_avg - update the tg's load avg
  * @cfs_rq: the cfs_rq whose avg changed
@@ -4091,6 +4109,11 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
 
 #else /* CONFIG_SMP */
 
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+	return true;
+}
+
 #define UPDATE_TG	0x0
 #define SKIP_AGE_LOAD	0x0
 #define DO_ATTACH	0x0
@@ -4749,8 +4772,8 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
 		cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
 					     cfs_rq->throttled_clock_task;
 
-		/* Add cfs_rq with already running entity in the list */
-		if (cfs_rq->nr_running >= 1)
+		/* Add cfs_rq with load or one or more already running entities to the list */
+		if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running)
 			list_add_leaf_cfs_rq(cfs_rq);
 	}
 
@@ -7996,23 +8019,6 @@ static bool __update_blocked_others(struct rq *rq, bool *done)
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
-{
-	if (cfs_rq->load.weight)
-		return false;
-
-	if (cfs_rq->avg.load_sum)
-		return false;
-
-	if (cfs_rq->avg.util_sum)
-		return false;
-
-	if (cfs_rq->avg.runnable_sum)
-		return false;
-
-	return true;
-}
-
 static bool __update_blocked_fair(struct rq *rq, bool *done)
 {
 	struct cfs_rq *cfs_rq, *pos;
-- 
GitLab


From d203b0fd863a2261e5d00b97f3d060c4c2a6db71 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 28 May 2021 13:03:30 +0000
Subject: [PATCH 2853/3804] bpf: Inherit expanded/patched seen count from old
 aux data

Instead of relying on current env->pass_cnt, use the seen count from the
old aux data in adjust_insn_aux_data(), and expand it to the new range of
patched instructions. This change is valid given we always expand 1:n
with n>=1, so what applies to the old/original instruction needs to apply
for the replacement as well.

Not relying on env->pass_cnt is a prerequisite for a later change where we
want to avoid marking an instruction seen when verified under speculative
execution path.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Reviewed-by: Benedict Schlueter <benedict.schlueter@rub.de>
Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 94ba5163d4c54..f93c7befb5dcb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -11366,6 +11366,7 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
 {
 	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
 	struct bpf_insn *insn = new_prog->insnsi;
+	u32 old_seen = old_data[off].seen;
 	u32 prog_len;
 	int i;
 
@@ -11386,7 +11387,8 @@ static int adjust_insn_aux_data(struct bpf_verifier_env *env,
 	memcpy(new_data + off + cnt - 1, old_data + off,
 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
 	for (i = off; i < off + cnt - 1; i++) {
-		new_data[i].seen = env->pass_cnt;
+		/* Expand insni[off]'s seen count to the patched range. */
+		new_data[i].seen = old_seen;
 		new_data[i].zext_dst = insn_has_def32(env, insn + i);
 	}
 	env->insn_aux_data = new_data;
-- 
GitLab


From fe9a5ca7e370e613a9a75a13008a3845ea759d6e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 28 May 2021 13:47:27 +0000
Subject: [PATCH 2854/3804] bpf: Do not mark insn as seen under speculative
 path verification

... in such circumstances, we do not want to mark the instruction as seen given
the goal is still to jmp-1 rewrite/sanitize dead code, if it is not reachable
from the non-speculative path verification. We do however want to verify it for
safety regardless.

With the patch as-is all the insns that have been marked as seen before the
patch will also be marked as seen after the patch (just with a potentially
different non-zero count). An upcoming patch will also verify paths that are
unreachable in the non-speculative domain, hence this extension is needed.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Reviewed-by: Benedict Schlueter <benedict.schlueter@rub.de>
Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f93c7befb5dcb..af88d9b9c0143 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6572,6 +6572,19 @@ do_sim:
 	return !ret ? REASON_STACK : 0;
 }
 
+static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
+{
+	struct bpf_verifier_state *vstate = env->cur_state;
+
+	/* If we simulate paths under speculation, we don't update the
+	 * insn as 'seen' such that when we verify unreachable paths in
+	 * the non-speculative domain, sanitize_dead_code() can still
+	 * rewrite/sanitize them.
+	 */
+	if (!vstate->speculative)
+		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+}
+
 static int sanitize_err(struct bpf_verifier_env *env,
 			const struct bpf_insn *insn, int reason,
 			const struct bpf_reg_state *off_reg,
@@ -10630,7 +10643,7 @@ static int do_check(struct bpf_verifier_env *env)
 		}
 
 		regs = cur_regs(env);
-		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+		sanitize_mark_insn_seen(env);
 		prev_insn_idx = env->insn_idx;
 
 		if (class == BPF_ALU || class == BPF_ALU64) {
@@ -10857,7 +10870,7 @@ process_bpf_exit:
 					return err;
 
 				env->insn_idx++;
-				env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+				sanitize_mark_insn_seen(env);
 			} else {
 				verbose(env, "invalid BPF_LD mode\n");
 				return -EINVAL;
@@ -12712,6 +12725,9 @@ static void free_states(struct bpf_verifier_env *env)
  * insn_aux_data was touched. These variables are compared to clear temporary
  * data from failed pass. For testing and experiments do_check_common() can be
  * run multiple times even when prior attempt to verify is unsuccessful.
+ *
+ * Note that special handling is needed on !env->bypass_spec_v1 if this is
+ * ever called outside of error path with subsequent program rejection.
  */
 static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
 {
-- 
GitLab


From 9183671af6dbf60a1219371d4ed73e23f43b49db Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 28 May 2021 15:47:32 +0000
Subject: [PATCH 2855/3804] bpf: Fix leakage under speculation on mispredicted
 branches

The verifier only enumerates valid control-flow paths and skips paths that
are unreachable in the non-speculative domain. And so it can miss issues
under speculative execution on mispredicted branches.

For example, a type confusion has been demonstrated with the following
crafted program:

  // r0 = pointer to a map array entry
  // r6 = pointer to readable stack slot
  // r9 = scalar controlled by attacker
  1: r0 = *(u64 *)(r0) // cache miss
  2: if r0 != 0x0 goto line 4
  3: r6 = r9
  4: if r0 != 0x1 goto line 6
  5: r9 = *(u8 *)(r6)
  6: // leak r9

Since line 3 runs iff r0 == 0 and line 5 runs iff r0 == 1, the verifier
concludes that the pointer dereference on line 5 is safe. But: if the
attacker trains both the branches to fall-through, such that the following
is speculatively executed ...

  r6 = r9
  r9 = *(u8 *)(r6)
  // leak r9

... then the program will dereference an attacker-controlled value and could
leak its content under speculative execution via side-channel. This requires
to mistrain the branch predictor, which can be rather tricky, because the
branches are mutually exclusive. However such training can be done at
congruent addresses in user space using different branches that are not
mutually exclusive. That is, by training branches in user space ...

  A:  if r0 != 0x0 goto line C
  B:  ...
  C:  if r0 != 0x0 goto line D
  D:  ...

... such that addresses A and C collide to the same CPU branch prediction
entries in the PHT (pattern history table) as those of the BPF program's
lines 2 and 4, respectively. A non-privileged attacker could simply brute
force such collisions in the PHT until observing the attack succeeding.

Alternative methods to mistrain the branch predictor are also possible that
avoid brute forcing the collisions in the PHT. A reliable attack has been
demonstrated, for example, using the following crafted program:

  // r0 = pointer to a [control] map array entry
  // r7 = *(u64 *)(r0 + 0), training/attack phase
  // r8 = *(u64 *)(r0 + 8), oob address
  // [...]
  // r0 = pointer to a [data] map array entry
  1: if r7 == 0x3 goto line 3
  2: r8 = r0
  // crafted sequence of conditional jumps to separate the conditional
  // branch in line 193 from the current execution flow
  3: if r0 != 0x0 goto line 5
  4: if r0 == 0x0 goto exit
  5: if r0 != 0x0 goto line 7
  6: if r0 == 0x0 goto exit
  [...]
  187: if r0 != 0x0 goto line 189
  188: if r0 == 0x0 goto exit
  // load any slowly-loaded value (due to cache miss in phase 3) ...
  189: r3 = *(u64 *)(r0 + 0x1200)
  // ... and turn it into known zero for verifier, while preserving slowly-
  // loaded dependency when executing:
  190: r3 &= 1
  191: r3 &= 2
  // speculatively bypassed phase dependency
  192: r7 += r3
  193: if r7 == 0x3 goto exit
  194: r4 = *(u8 *)(r8 + 0)
  // leak r4

As can be seen, in training phase (phase != 0x3), the condition in line 1
turns into false and therefore r8 with the oob address is overridden with
the valid map value address, which in line 194 we can read out without
issues. However, in attack phase, line 2 is skipped, and due to the cache
miss in line 189 where the map value is (zeroed and later) added to the
phase register, the condition in line 193 takes the fall-through path due
to prior branch predictor training, where under speculation, it'll load the
byte at oob address r8 (unknown scalar type at that point) which could then
be leaked via side-channel.

One way to mitigate these is to 'branch off' an unreachable path, meaning,
the current verification path keeps following the is_branch_taken() path
and we push the other branch to the verification stack. Given this is
unreachable from the non-speculative domain, this branch's vstate is
explicitly marked as speculative. This is needed for two reasons: i) if
this path is solely seen from speculative execution, then we later on still
want the dead code elimination to kick in in order to sanitize these
instructions with jmp-1s, and ii) to ensure that paths walked in the
non-speculative domain are not pruned from earlier walks of paths walked in
the speculative domain. Additionally, for robustness, we mark the registers
which have been part of the conditional as unknown in the speculative path
given there should be no assumptions made on their content.

The fix in here mitigates type confusion attacks described earlier due to
i) all code paths in the BPF program being explored and ii) existing
verifier logic already ensuring that given memory access instruction
references one specific data structure.

An alternative to this fix that has also been looked at in this scope was to
mark aux->alu_state at the jump instruction with a BPF_JMP_TAKEN state as
well as direction encoding (always-goto, always-fallthrough, unknown), such
that mixing of different always-* directions themselves as well as mixing of
always-* with unknown directions would cause a program rejection by the
verifier, e.g. programs with constructs like 'if ([...]) { x = 0; } else
{ x = 1; }' with subsequent 'if (x == 1) { [...] }'. For unprivileged, this
would result in only single direction always-* taken paths, and unknown taken
paths being allowed, such that the former could be patched from a conditional
jump to an unconditional jump (ja). Compared to this approach here, it would
have two downsides: i) valid programs that otherwise are not performing any
pointer arithmetic, etc, would potentially be rejected/broken, and ii) we are
required to turn off path pruning for unprivileged, where both can be avoided
in this work through pushing the invalid branch to the verification stack.

The issue was originally discovered by Adam and Ofek, and later independently
discovered and reported as a result of Benedict and Piotr's research work.

Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation")
Reported-by: Adam Morrison <mad@cs.tau.ac.il>
Reported-by: Ofek Kirzner <ofekkir@gmail.com>
Reported-by: Benedict Schlueter <benedict.schlueter@rub.de>
Reported-by: Piotr Krysiuk <piotras@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Reviewed-by: Benedict Schlueter <benedict.schlueter@rub.de>
Reviewed-by: Piotr Krysiuk <piotras@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 44 +++++++++++++++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index af88d9b9c0143..c6a27574242de 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6483,6 +6483,27 @@ struct bpf_sanitize_info {
 	bool mask_to_left;
 };
 
+static struct bpf_verifier_state *
+sanitize_speculative_path(struct bpf_verifier_env *env,
+			  const struct bpf_insn *insn,
+			  u32 next_idx, u32 curr_idx)
+{
+	struct bpf_verifier_state *branch;
+	struct bpf_reg_state *regs;
+
+	branch = push_stack(env, next_idx, curr_idx, true);
+	if (branch && insn) {
+		regs = branch->frame[branch->curframe]->regs;
+		if (BPF_SRC(insn->code) == BPF_K) {
+			mark_reg_unknown(env, regs, insn->dst_reg);
+		} else if (BPF_SRC(insn->code) == BPF_X) {
+			mark_reg_unknown(env, regs, insn->dst_reg);
+			mark_reg_unknown(env, regs, insn->src_reg);
+		}
+	}
+	return branch;
+}
+
 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 			    struct bpf_insn *insn,
 			    const struct bpf_reg_state *ptr_reg,
@@ -6566,7 +6587,8 @@ do_sim:
 		tmp = *dst_reg;
 		*dst_reg = *ptr_reg;
 	}
-	ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
+	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
+					env->insn_idx);
 	if (!ptr_is_dst_reg && ret)
 		*dst_reg = tmp;
 	return !ret ? REASON_STACK : 0;
@@ -8763,14 +8785,28 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 		if (err)
 			return err;
 	}
+
 	if (pred == 1) {
-		/* only follow the goto, ignore fall-through */
+		/* Only follow the goto, ignore fall-through. If needed, push
+		 * the fall-through branch for simulation under speculative
+		 * execution.
+		 */
+		if (!env->bypass_spec_v1 &&
+		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
+					       *insn_idx))
+			return -EFAULT;
 		*insn_idx += insn->off;
 		return 0;
 	} else if (pred == 0) {
-		/* only follow fall-through branch, since
-		 * that's where the program will go
+		/* Only follow the fall-through branch, since that's where the
+		 * program will go. If needed, push the goto branch for
+		 * simulation under speculative execution.
 		 */
+		if (!env->bypass_spec_v1 &&
+		    !sanitize_speculative_path(env, insn,
+					       *insn_idx + insn->off + 1,
+					       *insn_idx))
+			return -EFAULT;
 		return 0;
 	}
 
-- 
GitLab


From 973377ffe8148180b2651825b92ae91988141b05 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 31 May 2021 12:34:24 +0000
Subject: [PATCH 2856/3804] bpf, selftests: Adjust few selftest outcomes wrt
 unreachable code

In almost all cases from test_verifier that have been changed in here, we've
had an unreachable path with a load from a register which has an invalid
address on purpose. This was basically to make sure that we never walk this
path and to have the verifier complain if it would otherwise. Change it to
match on the right error for unprivileged given we now test these paths
under speculative execution.

There's one case where we match on exact # of insns_processed. Due to the
extra path, this will of course mismatch on unprivileged. Thus, restrict the
test->insn_processed check to privileged-only.

In one other case, we result in a 'pointer comparison prohibited' error. This
is similarly due to verifying an 'invalid' branch where we end up with a value
pointer on one side of the comparison.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c   |  2 +-
 tools/testing/selftests/bpf/verifier/and.c    |  2 ++
 tools/testing/selftests/bpf/verifier/bounds.c | 14 ++++++++++++
 .../selftests/bpf/verifier/dead_code.c        |  2 ++
 tools/testing/selftests/bpf/verifier/jmp32.c  | 22 +++++++++++++++++++
 tools/testing/selftests/bpf/verifier/jset.c   | 10 +++++----
 tools/testing/selftests/bpf/verifier/unpriv.c |  2 ++
 .../selftests/bpf/verifier/value_ptr_arith.c  |  7 +++---
 8 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 1512092e1e680..3a9e332c5e360 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1147,7 +1147,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		}
 	}
 
-	if (test->insn_processed) {
+	if (!unpriv && test->insn_processed) {
 		uint32_t insn_processed;
 		char *proc;
 
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index ca8fdb1b3f015..7d7ebee5cc7a8 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -61,6 +61,8 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R1 !read_ok",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 0
 },
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 8a1caf46ffbc3..e061e8799ce23 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -508,6 +508,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT
 },
 {
@@ -528,6 +530,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT
 },
 {
@@ -569,6 +573,8 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 min value is outside of the allowed memory range",
+	.result_unpriv = REJECT,
 	.fixup_map_hash_8b = { 3 },
 	.result = ACCEPT,
 },
@@ -589,6 +595,8 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 min value is outside of the allowed memory range",
+	.result_unpriv = REJECT,
 	.fixup_map_hash_8b = { 3 },
 	.result = ACCEPT,
 },
@@ -609,6 +617,8 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 min value is outside of the allowed memory range",
+	.result_unpriv = REJECT,
 	.fixup_map_hash_8b = { 3 },
 	.result = ACCEPT,
 },
@@ -674,6 +684,8 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 min value is outside of the allowed memory range",
+	.result_unpriv = REJECT,
 	.fixup_map_hash_8b = { 3 },
 	.result = ACCEPT,
 },
@@ -695,6 +707,8 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 min value is outside of the allowed memory range",
+	.result_unpriv = REJECT,
 	.fixup_map_hash_8b = { 3 },
 	.result = ACCEPT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 17fe33a750348..2c8935b3e65da 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -8,6 +8,8 @@
 	BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R9 !read_ok",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
 },
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index bd5cae4a7f733..1c857b2fbdf0a 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -87,6 +87,8 @@
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R9 !read_ok",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
 {
@@ -150,6 +152,8 @@
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R9 !read_ok",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
 {
@@ -213,6 +217,8 @@
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R9 !read_ok",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
 {
@@ -280,6 +286,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 },
@@ -348,6 +356,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 },
@@ -416,6 +426,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 },
@@ -484,6 +496,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 },
@@ -552,6 +566,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 },
@@ -620,6 +636,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 },
@@ -688,6 +706,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 },
@@ -756,6 +776,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 },
diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
index 8dcd4e0383d57..11fc68da735ea 100644
--- a/tools/testing/selftests/bpf/verifier/jset.c
+++ b/tools/testing/selftests/bpf/verifier/jset.c
@@ -82,8 +82,8 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.retval_unpriv = 1,
-	.result_unpriv = ACCEPT,
+	.errstr_unpriv = "R9 !read_ok",
+	.result_unpriv = REJECT,
 	.retval = 1,
 	.result = ACCEPT,
 },
@@ -141,7 +141,8 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.result_unpriv = ACCEPT,
+	.errstr_unpriv = "R9 !read_ok",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
 {
@@ -162,6 +163,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.result_unpriv = ACCEPT,
+	.errstr_unpriv = "R9 !read_ok",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index bd436df5cc326..111801aea5e35 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -420,6 +420,8 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
 	BPF_EXIT_INSN(),
 	},
+	.errstr_unpriv = "R7 invalid mem access 'inv'",
+	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 0,
 },
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index 7ae2859d495c5..a3e593ddfafc9 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -120,7 +120,7 @@
 	.fixup_map_array_48b = { 1 },
 	.result = ACCEPT,
 	.result_unpriv = REJECT,
-	.errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+	.errstr_unpriv = "R2 pointer comparison prohibited",
 	.retval = 0,
 },
 {
@@ -159,7 +159,8 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	// fake-dead code; targeted from branch A to
-	// prevent dead code sanitization
+	// prevent dead code sanitization, rejected
+	// via branch B however
 	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
@@ -167,7 +168,7 @@
 	.fixup_map_array_48b = { 1 },
 	.result = ACCEPT,
 	.result_unpriv = REJECT,
-	.errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
+	.errstr_unpriv = "R0 invalid mem access 'inv'",
 	.retval = 0,
 },
 {
-- 
GitLab


From 2214fb53006e6cfa6371b706070cb99794c68c3b Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Mon, 14 Jun 2021 15:03:25 -0600
Subject: [PATCH 2857/3804] net: mhi_net: Update the transmit handler prototype

Update the function prototype of mhi_ndo_xmit to match
ndo_start_xmit. This otherwise leads to run time failures when
CFI is enabled in kernel.

Fixes: 3ffec6a14f24 ("net: Add mhi-net driver")
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mhi/net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi/net.c
index 0d8293a47a56d..b806f2f8f859b 100644
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@ -49,7 +49,7 @@ static int mhi_ndo_stop(struct net_device *ndev)
 	return 0;
 }
 
-static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
 	const struct mhi_net_proto *proto = mhi_netdev->proto;
-- 
GitLab


From 91a1265cacdd96229304adddf18dcf64a4b8c040 Mon Sep 17 00:00:00 2001
From: Dwaipayan Ray <dwaipayanray1@gmail.com>
Date: Mon, 14 Jun 2021 19:41:32 +0530
Subject: [PATCH 2858/3804] docs: checkpatch: Document and segregate more
 checkpatch message types

Add and document more checkpatch message types. About 50% of all
message types are documented now.

In addition to this:

- Create a new subsection 'Indentation and Line Breaks'.
- Rename subsection 'Comment style' to simply 'Comments'.
- Refactor some of the existing types to appropriate subsections.

Reviewed-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Tested-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Dwaipayan Ray <dwaipayanray1@gmail.com>
Link: https://lore.kernel.org/r/20210614141132.6881-1-dwaipayanray1@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dev-tools/checkpatch.rst | 399 ++++++++++++++++++++-----
 1 file changed, 328 insertions(+), 71 deletions(-)

diff --git a/Documentation/dev-tools/checkpatch.rst b/Documentation/dev-tools/checkpatch.rst
index 87b859f321de4..f0956e9ea2d84 100644
--- a/Documentation/dev-tools/checkpatch.rst
+++ b/Documentation/dev-tools/checkpatch.rst
@@ -298,10 +298,148 @@ API usage
 
     See: https://www.kernel.org/doc/html/latest/process/deprecated.html#simple-strtol-simple-strtoll-simple-strtoul-simple-strtoull
 
+  **CONSTANT_CONVERSION**
+    Use of __constant_<foo> form is discouraged for the following functions::
+
+      __constant_cpu_to_be[x]
+      __constant_cpu_to_le[x]
+      __constant_be[x]_to_cpu
+      __constant_le[x]_to_cpu
+      __constant_htons
+      __constant_ntohs
+
+    Using any of these outside of include/uapi/ is not preferred as using the
+    function without __constant_ is identical when the argument is a
+    constant.
+
+    In big endian systems, the macros like __constant_cpu_to_be32(x) and
+    cpu_to_be32(x) expand to the same expression::
+
+      #define __constant_cpu_to_be32(x) ((__force __be32)(__u32)(x))
+      #define __cpu_to_be32(x)          ((__force __be32)(__u32)(x))
+
+    In little endian systems, the macros __constant_cpu_to_be32(x) and
+    cpu_to_be32(x) expand to __constant_swab32 and __swab32.  __swab32
+    has a __builtin_constant_p check::
+
+      #define __swab32(x)				\
+        (__builtin_constant_p((__u32)(x)) ?	\
+        ___constant_swab32(x) :			\
+        __fswab32(x))
+
+    So ultimately they have a special case for constants.
+    Similar is the case with all of the macros in the list.  Thus
+    using the __constant_... forms are unnecessarily verbose and
+    not preferred outside of include/uapi.
+
+    See: https://lore.kernel.org/lkml/1400106425.12666.6.camel@joe-AO725/
+
+  **DEPRECATED_API**
+    Usage of a deprecated RCU API is detected.  It is recommended to replace
+    old flavourful RCU APIs by their new vanilla-RCU counterparts.
+
+    The full list of available RCU APIs can be viewed from the kernel docs.
+
+    See: https://www.kernel.org/doc/html/latest/RCU/whatisRCU.html#full-list-of-rcu-apis
+
+  **DEPRECATED_VARIABLE**
+    EXTRA_{A,C,CPP,LD}FLAGS are deprecated and should be replaced by the new
+    flags added via commit f77bf01425b1 ("kbuild: introduce ccflags-y,
+    asflags-y and ldflags-y").
+
+    The following conversion scheme maybe used::
+
+      EXTRA_AFLAGS    ->  asflags-y
+      EXTRA_CFLAGS    ->  ccflags-y
+      EXTRA_CPPFLAGS  ->  cppflags-y
+      EXTRA_LDFLAGS   ->  ldflags-y
+
+    See:
+
+      1. https://lore.kernel.org/lkml/20070930191054.GA15876@uranus.ravnborg.org/
+      2. https://lore.kernel.org/lkml/1313384834-24433-12-git-send-email-lacombar@gmail.com/
+      3. https://www.kernel.org/doc/html/latest/kbuild/makefiles.html#compilation-flags
+
+  **DEVICE_ATTR_FUNCTIONS**
+    The function names used in DEVICE_ATTR is unusual.
+    Typically, the store and show functions are used with <attr>_store and
+    <attr>_show, where <attr> is a named attribute variable of the device.
+
+    Consider the following examples::
+
+      static DEVICE_ATTR(type, 0444, type_show, NULL);
+      static DEVICE_ATTR(power, 0644, power_show, power_store);
+
+    The function names should preferably follow the above pattern.
+
+    See: https://www.kernel.org/doc/html/latest/driver-api/driver-model/device.html#attributes
+
+  **DEVICE_ATTR_RO**
+    The DEVICE_ATTR_RO(name) helper macro can be used instead of
+    DEVICE_ATTR(name, 0444, name_show, NULL);
+
+    Note that the macro automatically appends _show to the named
+    attribute variable of the device for the show method.
+
+    See: https://www.kernel.org/doc/html/latest/driver-api/driver-model/device.html#attributes
+
+  **DEVICE_ATTR_RW**
+    The DEVICE_ATTR_RW(name) helper macro can be used instead of
+    DEVICE_ATTR(name, 0644, name_show, name_store);
+
+    Note that the macro automatically appends _show and _store to the
+    named attribute variable of the device for the show and store methods.
+
+    See: https://www.kernel.org/doc/html/latest/driver-api/driver-model/device.html#attributes
+
+  **DEVICE_ATTR_WO**
+    The DEVICE_AATR_WO(name) helper macro can be used instead of
+    DEVICE_ATTR(name, 0200, NULL, name_store);
+
+    Note that the macro automatically appends _store to the
+    named attribute variable of the device for the store method.
+
+    See: https://www.kernel.org/doc/html/latest/driver-api/driver-model/device.html#attributes
+
+  **DUPLICATED_SYSCTL_CONST**
+    Commit d91bff3011cf ("proc/sysctl: add shared variables for range
+    check") added some shared const variables to be used instead of a local
+    copy in each source file.
+
+    Consider replacing the sysctl range checking value with the shared
+    one in include/linux/sysctl.h.  The following conversion scheme may
+    be used::
+
+      &zero     ->  SYSCTL_ZERO
+      &one      ->  SYSCTL_ONE
+      &int_max  ->  SYSCTL_INT_MAX
+
+    See:
+
+      1. https://lore.kernel.org/lkml/20190430180111.10688-1-mcroce@redhat.com/
+      2. https://lore.kernel.org/lkml/20190531131422.14970-1-mcroce@redhat.com/
+
+  **ENOSYS**
+    ENOSYS means that a nonexistent system call was called.
+    Earlier, it was wrongly used for things like invalid operations on
+    otherwise valid syscalls.  This should be avoided in new code.
+
+    See: https://lore.kernel.org/lkml/5eb299021dec23c1a48fa7d9f2c8b794e967766d.1408730669.git.luto@amacapital.net/
+
+  **ENOTSUPP**
+    ENOTSUPP is not a standard error code and should be avoided in new patches.
+    EOPNOTSUPP should be used instead.
+
+    See: https://lore.kernel.org/netdev/20200510182252.GA411829@lunn.ch/
+
+  **EXPORT_SYMBOL**
+    EXPORT_SYMBOL should immediately follow the symbol to be exported.
+
   **IN_ATOMIC**
     in_atomic() is not for driver use so any such use is reported as an ERROR.
-    Also in_atomic() is often used to determine if we may sleep, but it is not
-    reliable in this use model therefore its use is strongly discouraged.
+    Also in_atomic() is often used to determine if sleeping is permitted,
+    but it is not reliable in this use model.  Therefore its use is
+    strongly discouraged.
 
     However, in_atomic() is ok for core kernel use.
 
@@ -335,8 +473,8 @@ API usage
     See: https://www.kernel.org/doc/html/latest/timers/timers-howto.html#delays-information-on-the-various-kernel-delay-sleep-mechanisms
 
 
-Comment style
--------------
+Comments
+--------
 
   **BLOCK_COMMENT_STYLE**
     The comment style is incorrect.  The preferred style for multi-
@@ -362,6 +500,21 @@ Comment style
 
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#commenting
 
+  **DATA_RACE**
+    Applications of data_race() should have a comment so as to document the
+    reasoning behind why it was deemed safe.
+
+    See: https://lore.kernel.org/lkml/20200401101714.44781-1-elver@google.com/
+
+  **FSF_MAILING_ADDRESS**
+    Kernel maintainers reject new instances of the GPL boilerplate paragraph
+    directing people to write to the FSF for a copy of the GPL, since the
+    FSF has moved in the past and may do so again.
+    So do not write paragraphs about writing to the Free Software Foundation's
+    mailing address.
+
+    See: https://lore.kernel.org/lkml/20131006222342.GT19510@leaf/
+
 
 Commit message
 --------------
@@ -394,6 +547,13 @@ Commit message
 
     See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes
 
+  **EMAIL_SUBJECT**
+    Naming the tool that found the issue is not very useful in the
+    subject line.  A good subject line summarizes the change that
+    the patch brings.
+
+    See: https://www.kernel.org/doc/html/latest/process/submitting-patches.html#describe-your-changes
+
   **FROM_SIGN_OFF_MISMATCH**
     The author's email does not match with that in the Signed-off-by:
     line(s). This can be sometimes caused due to an improperly configured
@@ -482,6 +642,87 @@ Comparison style
     side of the test should be avoided.
 
 
+Indentation and Line Breaks
+---------------------------
+
+  **CODE_INDENT**
+    Code indent should use tabs instead of spaces.
+    Outside of comments, documentation and Kconfig,
+    spaces are never used for indentation.
+
+    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#indentation
+
+  **DEEP_INDENTATION**
+    Indentation with 6 or more tabs usually indicate overly indented
+    code.
+
+    It is suggested to refactor excessive indentation of
+    if/else/for/do/while/switch statements.
+
+    See: https://lore.kernel.org/lkml/1328311239.21255.24.camel@joe2Laptop/
+
+  **SWITCH_CASE_INDENT_LEVEL**
+    switch should be at the same indent as case.
+    Example::
+
+      switch (suffix) {
+      case 'G':
+      case 'g':
+              mem <<= 30;
+              break;
+      case 'M':
+      case 'm':
+              mem <<= 20;
+              break;
+      case 'K':
+      case 'k':
+              mem <<= 10;
+              fallthrough;
+      default:
+              break;
+      }
+
+    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#indentation
+
+  **LONG_LINE**
+    The line has exceeded the specified maximum length.
+    To use a different maximum line length, the --max-line-length=n option
+    may be added while invoking checkpatch.
+
+    Earlier, the default line length was 80 columns.  Commit bdc48fa11e46
+    ("checkpatch/coding-style: deprecate 80-column warning") increased the
+    limit to 100 columns.  This is not a hard limit either and it's
+    preferable to stay within 80 columns whenever possible.
+
+    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+
+  **LONG_LINE_STRING**
+    A string starts before but extends beyond the maximum line length.
+    To use a different maximum line length, the --max-line-length=n option
+    may be added while invoking checkpatch.
+
+    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+
+  **LONG_LINE_COMMENT**
+    A comment starts before but extends beyond the maximum line length.
+    To use a different maximum line length, the --max-line-length=n option
+    may be added while invoking checkpatch.
+
+    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+
+  **TRAILING_STATEMENTS**
+    Trailing statements (for example after any conditional) should be
+    on the next line.
+    Statements, such as::
+
+      if (x == y) break;
+
+    should be::
+
+      if (x == y)
+              break;
+
+
 Macros, Attributes and Symbols
 ------------------------------
 
@@ -546,6 +787,9 @@ Macros, Attributes and Symbols
 
     See: https://lore.kernel.org/lkml/CA+55aFycQ9XJvEOsiM3txHL5bjUc8CeKWJNR_H+MiicaddB42Q@mail.gmail.com/
 
+  **DO_WHILE_MACRO_WITH_TRAILING_SEMICOLON**
+    do {} while(0) macros should not have a trailing semicolon.
+
   **INIT_ATTRIBUTE**
     Const init definitions should use __initconst instead of
     __initdata.
@@ -614,6 +858,48 @@ Functions and Variables
 
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#naming
 
+  **CONST_CONST**
+    Using `const <type> const *` is generally meant to be
+    written `const <type> * const`.
+
+  **CONST_STRUCT**
+    Using const is generally a good idea.  Checkpatch reads
+    a list of frequently used structs that are always or
+    almost always constant.
+
+    The existing structs list can be viewed from
+    `scripts/const_structs.checkpatch`.
+
+    See: https://lore.kernel.org/lkml/alpine.DEB.2.10.1608281509480.3321@hadrien/
+
+  **EMBEDDED_FUNCTION_NAME**
+    Embedded function names are less appropriate to use as
+    refactoring can cause function renaming.  Prefer the use of
+    "%s", __func__ to embedded function names.
+
+    Note that this does not work with -f (--file) checkpatch option
+    as it depends on patch context providing the function name.
+
+  **FUNCTION_ARGUMENTS**
+    This warning is emitted due to any of the following reasons:
+
+      1. Arguments for the function declaration do not follow
+         the identifier name.  Example::
+
+           void foo
+           (int bar, int baz)
+
+         This should be corrected to::
+
+           void foo(int bar, int baz)
+
+      2. Some arguments for the function definition do not
+         have an identifier name.  Example::
+
+           void foo(int)
+
+         All arguments should have identifier names.
+
   **FUNCTION_WITHOUT_ARGS**
     Function declarations without arguments like::
 
@@ -647,6 +933,13 @@ Functions and Variables
 Permissions
 -----------
 
+  **DEVICE_ATTR_PERMS**
+    The permissions used in DEVICE_ATTR are unusual.
+    Typically only three permissions are used - 0644 (RW), 0444 (RO)
+    and 0200 (WO).
+
+    See: https://www.kernel.org/doc/html/latest/filesystems/sysfs.html#attributes
+
   **EXECUTE_PERMISSIONS**
     There is no reason for source files to be executable.  The executable
     bit can be removed safely.
@@ -708,13 +1001,6 @@ Spacing and Brackets
 
         = { [0...10] = 5 }
 
-  **CODE_INDENT**
-    Code indent should use tabs instead of spaces.
-    Outside of comments, documentation and Kconfig,
-    spaces are never used for indentation.
-
-    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#indentation
-
   **CONCATENATED_STRING**
     Concatenated elements should have a space in between.
     Example::
@@ -760,29 +1046,6 @@ Spacing and Brackets
 
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#spaces
 
-  **SWITCH_CASE_INDENT_LEVEL**
-    switch should be at the same indent as case.
-    Example::
-
-      switch (suffix) {
-      case 'G':
-      case 'g':
-              mem <<= 30;
-              break;
-      case 'M':
-      case 'm':
-              mem <<= 20;
-              break;
-      case 'K':
-      case 'k':
-              mem <<= 10;
-              /* fall through */
-      default:
-              break;
-      }
-
-    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#indentation
-
   **TRAILING_WHITESPACE**
     Trailing whitespace should always be removed.
     Some editors highlight the trailing whitespace and cause visual
@@ -791,7 +1054,7 @@ Spacing and Brackets
     See: https://www.kernel.org/doc/html/latest/process/coding-style.html#spaces
 
   **UNNECESSARY_PARENTHESES**
-    Parentheses are not required in the following cases::
+    Parentheses are not required in the following cases:
 
       1. Function pointer uses::
 
@@ -842,40 +1105,46 @@ Others
     The patch seems to be corrupted or lines are wrapped.
     Please regenerate the patch file before sending it to the maintainer.
 
+  **CVS_KEYWORD**
+    Since linux moved to git, the CVS markers are no longer used.
+    So, CVS style keywords ($Id$, $Revision$, $Log$) should not be
+    added.
+
+  **DEFAULT_NO_BREAK**
+    switch default case is sometimes written as "default:;".  This can
+    cause new cases added below default to be defective.
+
+    A "break;" should be added after empty default statement to avoid
+    unwanted fallthrough.
+
   **DOS_LINE_ENDINGS**
     For DOS-formatted patches, there are extra ^M symbols at the end of
     the line.  These should be removed.
 
-  **FSF_MAILING_ADDRESS**
-    Kernel maintainers reject new instances of the GPL boilerplate paragraph
-    directing people to write to the FSF for a copy of the GPL, since the
-    FSF has moved in the past and may do so again.
-    So do not write paragraphs about writing to the Free Software Foundation's
-    mailing address.
+  **DT_SCHEMA_BINDING_PATCH**
+    DT bindings moved to a json-schema based format instead of
+    freeform text.
 
-    See: https://lore.kernel.org/lkml/20131006222342.GT19510@leaf/
+    See: https://www.kernel.org/doc/html/latest/devicetree/bindings/writing-schema.html
 
-  **LONG_LINE**
-    The line has exceeded the specified maximum length. Consider refactoring
-    it.
-    To use a different maximum line length, the --max-line-length=n option
-    may be added while invoking checkpatch.
+  **DT_SPLIT_BINDING_PATCH**
+    Devicetree bindings should be their own patch.  This is because
+    bindings are logically independent from a driver implementation,
+    they have a different maintainer (even though they often
+    are applied via the same tree), and it makes for a cleaner history in the
+    DT only tree created with git-filter-branch.
 
-    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+    See: https://www.kernel.org/doc/html/latest/devicetree/bindings/submitting-patches.html#i-for-patch-submitters
 
-  **LONG_LINE_STRING**
-    A string starts before but extends beyond the maximum line length.
-    To use a different maximum line length, the --max-line-length=n option
-    may be added while invoking checkpatch.
+  **EMBEDDED_FILENAME**
+    Embedding the complete filename path inside the file isn't particularly
+    useful as often the path is moved around and becomes incorrect.
 
-    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+  **FILE_PATH_CHANGES**
+    Whenever files are added, moved, or deleted, the MAINTAINERS file
+    patterns can be out of sync or outdated.
 
-  **LONG_LINE_COMMENT**
-    A comment starts before but extends beyond the maximum line length.
-    To use a different maximum line length, the --max-line-length=n option
-    may be added while invoking checkpatch.
-
-    See: https://www.kernel.org/doc/html/latest/process/coding-style.html#breaking-long-lines-and-strings
+    So MAINTAINERS might need updating in these cases.
 
   **MEMSET**
     The memset use appears to be incorrect.  This may be caused due to
@@ -895,17 +1164,5 @@ Others
 
     See: https://www.kernel.org/doc/html/latest/process/license-rules.html
 
-  **TRAILING_STATEMENTS**
-    Trailing statements (for example after any conditional) should be
-    on the next line.
-    Like::
-
-      if (x == y) break;
-
-    should be::
-
-      if (x == y)
-              break;
-
   **TYPO_SPELLING**
     Some words may have been misspelled.  Consider reviewing them.
-- 
GitLab


From 0236526d76b87c1dc2cbe3eb31ae29be5b0ca151 Mon Sep 17 00:00:00 2001
From: Tor Vic <torvic9@mailbox.org>
Date: Sun, 13 Jun 2021 13:07:49 +0000
Subject: [PATCH 2859/3804] Makefile: lto: Pass -warn-stack-size only on LLD <
 13.0.0

Since LLVM commit fc018eb, the '-warn-stack-size' flag has been dropped
[1], leading to the following error message when building with Clang-13
and LLD-13:

    ld.lld: error: -plugin-opt=-: ld.lld: Unknown command line argument
    '-warn-stack-size=2048'.  Try: 'ld.lld --help'
    ld.lld: Did you mean '--asan-stack=2048'?

In the same way as with commit 2398ce80152a ("x86, lto: Pass
-stack-alignment only on LLD < 13.0.0") , make '-warn-stack-size'
conditional on LLD < 13.0.0.

[1] https://reviews.llvm.org/D103928

Fixes: 24845dcb170e ("Makefile: LTO: have linker check -Wframe-larger-than")
Cc: stable@vger.kernel.org
Link: https://github.com/ClangBuiltLinux/linux/issues/1377
Signed-off-by: Tor Vic <torvic9@mailbox.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/7631bab7-a8ab-f884-ab54-f4198976125c@mailbox.org
---
 Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 144d4f8b7eb32..51a1b6b2c2abf 100644
--- a/Makefile
+++ b/Makefile
@@ -929,11 +929,14 @@ CC_FLAGS_LTO	+= -fvisibility=hidden
 # Limit inlining across translation units to reduce binary size
 KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
 
-# Check for frame size exceeding threshold during prolog/epilog insertion.
+# Check for frame size exceeding threshold during prolog/epilog insertion
+# when using lld < 13.0.0.
 ifneq ($(CONFIG_FRAME_WARN),0)
+ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
 KBUILD_LDFLAGS	+= -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN)
 endif
 endif
+endif
 
 ifdef CONFIG_LTO
 KBUILD_CFLAGS	+= -fno-lto $(CC_FLAGS_LTO)
-- 
GitLab


From 005747526d4f3c2ec995891e95cb7625161022f9 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 3 Jun 2021 14:58:41 +0200
Subject: [PATCH 2860/3804] docs: fault-injection: fix non-working usage of
 negative values

Fault injection uses debugfs in a way that the provided values via sysfs
are interpreted as u64. Providing negative numbers results in an error:

/sys/kernel/debug/fail_function# echo -1 > times
sh: write error: Invalid argument

Update the docs and examples to use "printf %#x <val>" in these cases.
For "retval", reword the paragraph a little and fix a typo.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20210603125841.27436-1-wsa+renesas@sang-engineering.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../fault-injection/fault-injection.rst       | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst
index 31ecfe44e5b45..f47d05ed0d948 100644
--- a/Documentation/fault-injection/fault-injection.rst
+++ b/Documentation/fault-injection/fault-injection.rst
@@ -78,8 +78,10 @@ configuration of fault-injection capabilities.
 
 - /sys/kernel/debug/fail*/times:
 
-	specifies how many times failures may happen at most.
-	A value of -1 means "no limit".
+	specifies how many times failures may happen at most. A value of -1
+	means "no limit". Note, though, that this file only accepts unsigned
+	values. So, if you want to specify -1, you better use 'printf' instead
+	of 'echo', e.g.: $ printf %#x -1 > times
 
 - /sys/kernel/debug/fail*/space:
 
@@ -167,11 +169,13 @@ configuration of fault-injection capabilities.
 	- ERRNO: retval must be -1 to -MAX_ERRNO (-4096).
 	- ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096).
 
-- /sys/kernel/debug/fail_function/<functiuon-name>/retval:
+- /sys/kernel/debug/fail_function/<function-name>/retval:
 
-	specifies the "error" return value to inject to the given
-	function for given function. This will be created when
-	user specifies new injection entry.
+	specifies the "error" return value to inject to the given function.
+	This will be created when the user specifies a new injection entry.
+	Note that this file only accepts unsigned values. So, if you want to
+	use a negative errno, you better use 'printf' instead of 'echo', e.g.:
+	$ printf %#x -12 > retval
 
 Boot option
 ^^^^^^^^^^^
@@ -255,7 +259,7 @@ Application Examples
     echo Y > /sys/kernel/debug/$FAILTYPE/task-filter
     echo 10 > /sys/kernel/debug/$FAILTYPE/probability
     echo 100 > /sys/kernel/debug/$FAILTYPE/interval
-    echo -1 > /sys/kernel/debug/$FAILTYPE/times
+    printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times
     echo 0 > /sys/kernel/debug/$FAILTYPE/space
     echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
     echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
@@ -309,7 +313,7 @@ Application Examples
     echo N > /sys/kernel/debug/$FAILTYPE/task-filter
     echo 10 > /sys/kernel/debug/$FAILTYPE/probability
     echo 100 > /sys/kernel/debug/$FAILTYPE/interval
-    echo -1 > /sys/kernel/debug/$FAILTYPE/times
+    printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times
     echo 0 > /sys/kernel/debug/$FAILTYPE/space
     echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
     echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
@@ -336,11 +340,11 @@ Application Examples
     FAILTYPE=fail_function
     FAILFUNC=open_ctree
     echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject
-    echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
+    printf %#x -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval
     echo N > /sys/kernel/debug/$FAILTYPE/task-filter
     echo 100 > /sys/kernel/debug/$FAILTYPE/probability
     echo 0 > /sys/kernel/debug/$FAILTYPE/interval
-    echo -1 > /sys/kernel/debug/$FAILTYPE/times
+    printf %#x -1 > /sys/kernel/debug/$FAILTYPE/times
     echo 0 > /sys/kernel/debug/$FAILTYPE/space
     echo 1 > /sys/kernel/debug/$FAILTYPE/verbose
 
-- 
GitLab


From 05a463ec1bd4fd564312d6dbc0ea1e3a4701e4a4 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Fri, 11 Jun 2021 17:22:49 +1200
Subject: [PATCH 2861/3804] docs: cputopology: move the sysfs ABI description
 to right place

Documentation/admin-guide/cputopology.rst is the wrong place to describe
sysfs ABI. So move the cputopology ABI things to
Documentation/ABI/stable/sysfs-devices-system-cpu and add a reference to
ABI doc in Documentation/admin-guide/cputopology.rst.

Link: https://lkml.kernel.org/r/20210319041618.14316-1-song.bao.hua@hisilicon.com
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Link: https://lore.kernel.org/r/20210611052249.25776-1-song.bao.hua@hisilicon.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../ABI/stable/sysfs-devices-system-cpu       | 83 ++++++++++++++++++
 Documentation/admin-guide/cputopology.rst     | 85 +------------------
 2 files changed, 87 insertions(+), 81 deletions(-)

diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu
index 33c133e2a631a..516dafea03eb9 100644
--- a/Documentation/ABI/stable/sysfs-devices-system-cpu
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu
@@ -23,3 +23,86 @@ Description:	Default value for the Data Stream Control Register (DSCR) on
 		here).
 		If set by a process it will be inherited by child processes.
 Values:		64 bit unsigned integer (bit field)
+
+What:           /sys/devices/system/cpu/cpuX/topology/physical_package_id
+Description:    physical package id of cpuX. Typically corresponds to a physical
+                socket number, but the actual value is architecture and platform
+                dependent.
+Values:         integer
+
+What:           /sys/devices/system/cpu/cpuX/topology/die_id
+Description:    the CPU die ID of cpuX. Typically it is the hardware platform's
+                identifier (rather than the kernel's). The actual value is
+                architecture and platform dependent.
+Values:         integer
+
+What:           /sys/devices/system/cpu/cpuX/topology/core_id
+Description:    the CPU core ID of cpuX. Typically it is the hardware platform's
+                identifier (rather than the kernel's). The actual value is
+                architecture and platform dependent.
+Values:         integer
+
+What:           /sys/devices/system/cpu/cpuX/topology/book_id
+Description:    the book ID of cpuX. Typically it is the hardware platform's
+                identifier (rather than the kernel's). The actual value is
+                architecture and platform dependent. it's only used on s390.
+Values:         integer
+
+What:           /sys/devices/system/cpu/cpuX/topology/drawer_id
+Description:    the drawer ID of cpuX. Typically it is the hardware platform's
+                identifier (rather than the kernel's). The actual value is
+                architecture and platform dependent. it's only used on s390.
+Values:         integer
+
+What:           /sys/devices/system/cpu/cpuX/topology/core_cpus
+Description:    internal kernel map of CPUs within the same core.
+                (deprecated name: "thread_siblings")
+Values:         hexadecimal bitmask.
+
+What:           /sys/devices/system/cpu/cpuX/topology/core_cpus_list
+Description:    human-readable list of CPUs within the same core.
+                The format is like 0-3, 8-11, 14,17.
+                (deprecated name: "thread_siblings_list").
+Values:         decimal list.
+
+What:           /sys/devices/system/cpu/cpuX/topology/package_cpus
+Description:    internal kernel map of the CPUs sharing the same physical_package_id.
+                (deprecated name: "core_siblings").
+Values:         hexadecimal bitmask.
+
+What:           /sys/devices/system/cpu/cpuX/topology/package_cpus_list
+Description:    human-readable list of CPUs sharing the same physical_package_id.
+                The format is like 0-3, 8-11, 14,17.
+                (deprecated name: "core_siblings_list")
+Values:         decimal list.
+
+What:           /sys/devices/system/cpu/cpuX/topology/die_cpus
+Description:    internal kernel map of CPUs within the same die.
+Values:         hexadecimal bitmask.
+
+What:           /sys/devices/system/cpu/cpuX/topology/die_cpus_list
+Description:    human-readable list of CPUs within the same die.
+                The format is like 0-3, 8-11, 14,17.
+Values:         decimal list.
+
+What:           /sys/devices/system/cpu/cpuX/topology/book_siblings
+Description:    internal kernel map of cpuX's hardware threads within the same
+                book_id. it's only used on s390.
+Values:         hexadecimal bitmask.
+
+What:           /sys/devices/system/cpu/cpuX/topology/book_siblings_list
+Description:    human-readable list of cpuX's hardware threads within the same
+                book_id.
+                The format is like 0-3, 8-11, 14,17. it's only used on s390.
+Values:         decimal list.
+
+What:           /sys/devices/system/cpu/cpuX/topology/drawer_siblings
+Description:    internal kernel map of cpuX's hardware threads within the same
+                drawer_id. it's only used on s390.
+Values:         hexadecimal bitmask.
+
+What:           /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list
+Description:    human-readable list of cpuX's hardware threads within the same
+                drawer_id.
+                The format is like 0-3, 8-11, 14,17. it's only used on s390.
+Values:         decimal list.
diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst
index b90dafcc8237c..8632a1db36e46 100644
--- a/Documentation/admin-guide/cputopology.rst
+++ b/Documentation/admin-guide/cputopology.rst
@@ -2,87 +2,10 @@
 How CPU topology info is exported via sysfs
 ===========================================
 
-Export CPU topology info via sysfs. Items (attributes) are similar
-to /proc/cpuinfo output of some architectures.  They reside in
-/sys/devices/system/cpu/cpuX/topology/:
-
-physical_package_id:
-
-	physical package id of cpuX. Typically corresponds to a physical
-	socket number, but the actual value is architecture and platform
-	dependent.
-
-die_id:
-
-	the CPU die ID of cpuX. Typically it is the hardware platform's
-	identifier (rather than the kernel's).  The actual value is
-	architecture and platform dependent.
-
-core_id:
-
-	the CPU core ID of cpuX. Typically it is the hardware platform's
-	identifier (rather than the kernel's).  The actual value is
-	architecture and platform dependent.
-
-book_id:
-
-	the book ID of cpuX. Typically it is the hardware platform's
-	identifier (rather than the kernel's).	The actual value is
-	architecture and platform dependent.
-
-drawer_id:
-
-	the drawer ID of cpuX. Typically it is the hardware platform's
-	identifier (rather than the kernel's).	The actual value is
-	architecture and platform dependent.
-
-core_cpus:
-
-	internal kernel map of CPUs within the same core.
-	(deprecated name: "thread_siblings")
-
-core_cpus_list:
-
-	human-readable list of CPUs within the same core.
-	(deprecated name: "thread_siblings_list");
-
-package_cpus:
-
-	internal kernel map of the CPUs sharing the same physical_package_id.
-	(deprecated name: "core_siblings")
-
-package_cpus_list:
-
-	human-readable list of CPUs sharing the same physical_package_id.
-	(deprecated name: "core_siblings_list")
-
-die_cpus:
-
-	internal kernel map of CPUs within the same die.
-
-die_cpus_list:
-
-	human-readable list of CPUs within the same die.
-
-book_siblings:
-
-	internal kernel map of cpuX's hardware threads within the same
-	book_id.
-
-book_siblings_list:
-
-	human-readable list of cpuX's hardware threads within the same
-	book_id.
-
-drawer_siblings:
-
-	internal kernel map of cpuX's hardware threads within the same
-	drawer_id.
-
-drawer_siblings_list:
-
-	human-readable list of cpuX's hardware threads within the same
-	drawer_id.
+CPU topology info is exported via sysfs. Items (attributes) are similar
+to /proc/cpuinfo output of some architectures. They reside in
+/sys/devices/system/cpu/cpuX/topology/. Please refer to the ABI file:
+Documentation/ABI/stable/sysfs-devices-system-cpu.
 
 Architecture-neutral, drivers/base/topology.c, exports these attributes.
 However, the book and drawer related sysfs files will only be created if
-- 
GitLab


From 8b1462b67f23da548f27b779a36b8ea75f5ef249 Mon Sep 17 00:00:00 2001
From: Marcin Juszkiewicz <marcin@juszkiewicz.com.pl>
Date: Mon, 14 Jun 2021 17:37:12 +0200
Subject: [PATCH 2862/3804] quota: finish disable quotactl_path syscall

In commit 5b9fedb31e47 ("quota: Disable quotactl_path syscall") Jan Kara
disabled quotactl_path syscall on several architectures.

This commit disables it on all architectures using unified list of
system calls:

- arm64
- arc
- csky
- h8300
- hexagon
- nds32
- nios2
- openrisc
- riscv (32/64)

CC: Jan Kara <jack@suse.cz>
CC: Christian Brauner <christian.brauner@ubuntu.com>
CC: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/lkml/20210512153621.n5u43jsytbik4yze@wittgenstein
Link: https://lore.kernel.org/r/20210614153712.313707-1-marcin@juszkiewicz.com.pl
Fixes: 5b9fedb31e47 ("quota: Disable quotactl_path syscall")
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Marcin Juszkiewicz <marcin@juszkiewicz.com.pl>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/uapi/asm-generic/unistd.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 6de5a7fc066b8..d2a942086fcb6 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
 
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
-- 
GitLab


From 4aca2d99fd27698cf82d55aed4859fde859082ac Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 19 May 2021 15:52:47 +0200
Subject: [PATCH 2863/3804] x86/sev: Fix error message in runtime #VC handler

The runtime #VC handler is not "early" anymore. Fix the copy&paste error
and remove that word from the error message.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210614135327.9921-2-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 651b81cd648e5..4fd997bbf059a 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -1369,7 +1369,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 		vc_finish_insn(&ctxt);
 		break;
 	case ES_UNSUPPORTED:
-		pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+		pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
 				   error_code, regs->ip);
 		goto fail;
 	case ES_VMM_ERROR:
-- 
GitLab


From f2df15639e44d23bf82a86a03092472c7278cd39 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 19 May 2021 15:52:49 +0200
Subject: [PATCH 2864/3804] x86/insn-eval: Make 0 a valid RIP for
 insn_get_effective_ip()

In theory, 0 is a valid value for the instruction pointer so don't use
it as the error return value from insn_get_effective_ip().

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210614135327.9921-5-joro@8bytes.org
---
 arch/x86/lib/insn-eval.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index a67afd74232c6..4eecb9c7c6a05 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
 	}
 }
 
-static unsigned long insn_get_effective_ip(struct pt_regs *regs)
+static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
 {
 	unsigned long seg_base = 0;
 
@@ -1430,10 +1430,12 @@ static unsigned long insn_get_effective_ip(struct pt_regs *regs)
 	if (!user_64bit_mode(regs)) {
 		seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
 		if (seg_base == -1L)
-			return 0;
+			return -EINVAL;
 	}
 
-	return seg_base + regs->ip;
+	*ip = seg_base + regs->ip;
+
+	return 0;
 }
 
 /**
@@ -1455,8 +1457,7 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
 	unsigned long ip;
 	int not_copied;
 
-	ip = insn_get_effective_ip(regs);
-	if (!ip)
+	if (insn_get_effective_ip(regs, &ip))
 		return 0;
 
 	not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
@@ -1484,8 +1485,7 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_IN
 	unsigned long ip;
 	int not_copied;
 
-	ip = insn_get_effective_ip(regs);
-	if (!ip)
+	if (insn_get_effective_ip(regs, &ip))
 		return 0;
 
 	not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);
-- 
GitLab


From 4aaa7eacd7cc7c10f269c7f2a01d044b375bed8e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Mon, 14 Jun 2021 15:53:26 +0200
Subject: [PATCH 2865/3804] x86/insn: Extend error reporting from
 insn_fetch_from_user[_inatomic]()

The error reporting from the insn_fetch_from_user*() functions is not
very verbose. Extend it to include information on whether the linear
RIP could not be calculated or whether the memory access faulted.

This will be used in the SEV-ES code to propagate the correct
exception depending on what went wrong during instruction fetch.

 [ bp: Massage comments. ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210614135327.9921-6-joro@8bytes.org
---
 arch/x86/kernel/sev.c    |  8 ++++----
 arch/x86/kernel/umip.c   | 10 ++++------
 arch/x86/lib/insn-eval.c | 16 ++++++++--------
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 4fd997bbf059a..a1eeaa79bf2bf 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -258,17 +258,17 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
 static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
 {
 	char buffer[MAX_INSN_SIZE];
-	int res;
+	int insn_bytes;
 
-	res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
-	if (!res) {
+	insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
+	if (insn_bytes <= 0) {
 		ctxt->fi.vector     = X86_TRAP_PF;
 		ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
 		ctxt->fi.cr2        = ctxt->regs->ip;
 		return ES_EXCEPTION;
 	}
 
-	if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
+	if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
 		return ES_DECODE_FAILED;
 
 	if (ctxt->insn.immediate.got)
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 8daa70b0d2da2..576b47e7523db 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -346,14 +346,12 @@ bool fixup_umip_exception(struct pt_regs *regs)
 	if (!regs)
 		return false;
 
-	nr_copied = insn_fetch_from_user(regs, buf);
-
 	/*
-	 * The insn_fetch_from_user above could have failed if user code
-	 * is protected by a memory protection key. Give up on emulation
-	 * in such a case.  Should we issue a page fault?
+	 * Give up on emulation if fetching the instruction failed. Should a
+	 * page fault or a #GP be issued?
 	 */
-	if (!nr_copied)
+	nr_copied = insn_fetch_from_user(regs, buf);
+	if (nr_copied <= 0)
 		return false;
 
 	if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 4eecb9c7c6a05..a1d24fdc07cf0 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1448,9 +1448,9 @@ static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
  *
  * Returns:
  *
- * Number of instruction bytes copied.
- *
- * 0 if nothing was copied.
+ * - number of instruction bytes copied.
+ * - 0 if nothing was copied.
+ * - -EINVAL if the linear address of the instruction could not be calculated
  */
 int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
 {
@@ -1458,7 +1458,7 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
 	int not_copied;
 
 	if (insn_get_effective_ip(regs, &ip))
-		return 0;
+		return -EINVAL;
 
 	not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
 
@@ -1476,9 +1476,9 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
  *
  * Returns:
  *
- * Number of instruction bytes copied.
- *
- * 0 if nothing was copied.
+ *  - number of instruction bytes copied.
+ *  - 0 if nothing was copied.
+ *  - -EINVAL if the linear address of the instruction could not be calculated.
  */
 int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
 {
@@ -1486,7 +1486,7 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_IN
 	int not_copied;
 
 	if (insn_get_effective_ip(regs, &ip))
-		return 0;
+		return -EINVAL;
 
 	not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);
 
-- 
GitLab


From 07570cef5e5c3fcec40f82a9075abb4c1da63319 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Mon, 14 Jun 2021 15:53:27 +0200
Subject: [PATCH 2866/3804] x86/sev: Propagate #GP if getting linear
 instruction address failed

When an instruction is fetched from user-space, segmentation needs to
be taken into account. This means that getting the linear address of an
instruction can fail. Hardware would raise a #GP exception in that case,
but the #VC exception handler would emulate it as a page-fault.

The insn_fetch_from_user*() functions now provide the relevant
information in case of a failure. Use that and propagate a #GP when the
linear address of an instruction to fetch could not be calculated.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210614135327.9921-7-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index a1eeaa79bf2bf..8178db07a06a6 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -261,11 +261,18 @@ static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
 	int insn_bytes;
 
 	insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
-	if (insn_bytes <= 0) {
+	if (insn_bytes == 0) {
+		/* Nothing could be copied */
 		ctxt->fi.vector     = X86_TRAP_PF;
 		ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
 		ctxt->fi.cr2        = ctxt->regs->ip;
 		return ES_EXCEPTION;
+	} else if (insn_bytes == -EINVAL) {
+		/* Effective RIP could not be calculated */
+		ctxt->fi.vector     = X86_TRAP_GP;
+		ctxt->fi.error_code = 0;
+		ctxt->fi.cr2        = 0;
+		return ES_EXCEPTION;
 	}
 
 	if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
-- 
GitLab


From 4bf584a03eec674975ee9fe36c8583d9d470dab1 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@kernel.org>
Date: Tue, 8 Jun 2021 18:56:56 +0800
Subject: [PATCH 2867/3804] usb: dwc3: core: fix kernel panic when do reboot

When do system reboot, it calls dwc3_shutdown and the whole debugfs
for dwc3 has removed first, when the gadget tries to do deinit, and
remove debugfs for its endpoints, it meets NULL pointer dereference
issue when call debugfs_lookup. Fix it by removing the whole dwc3
debugfs later than dwc3_drd_exit.

[ 2924.958838] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000002
....
[ 2925.030994] pstate: 60000005 (nZCv daif -PAN -UAO -TCO BTYPE=--)
[ 2925.037005] pc : inode_permission+0x2c/0x198
[ 2925.041281] lr : lookup_one_len_common+0xb0/0xf8
[ 2925.045903] sp : ffff80001276ba70
[ 2925.049218] x29: ffff80001276ba70 x28: ffff0000c01f0000 x27: 0000000000000000
[ 2925.056364] x26: ffff800011791e70 x25: 0000000000000008 x24: dead000000000100
[ 2925.063510] x23: dead000000000122 x22: 0000000000000000 x21: 0000000000000001
[ 2925.070652] x20: ffff8000122c6188 x19: 0000000000000000 x18: 0000000000000000
[ 2925.077797] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000004
[ 2925.084943] x14: ffffffffffffffff x13: 0000000000000000 x12: 0000000000000030
[ 2925.092087] x11: 0101010101010101 x10: 7f7f7f7f7f7f7f7f x9 : ffff8000102b2420
[ 2925.099232] x8 : 7f7f7f7f7f7f7f7f x7 : feff73746e2f6f64 x6 : 0000000000008080
[ 2925.106378] x5 : 61c8864680b583eb x4 : 209e6ec2d263dbb7 x3 : 000074756f307065
[ 2925.113523] x2 : 0000000000000001 x1 : 0000000000000000 x0 : ffff8000122c6188
[ 2925.120671] Call trace:
[ 2925.123119]  inode_permission+0x2c/0x198
[ 2925.127042]  lookup_one_len_common+0xb0/0xf8
[ 2925.131315]  lookup_one_len_unlocked+0x34/0xb0
[ 2925.135764]  lookup_positive_unlocked+0x14/0x50
[ 2925.140296]  debugfs_lookup+0x68/0xa0
[ 2925.143964]  dwc3_gadget_free_endpoints+0x84/0xb0
[ 2925.148675]  dwc3_gadget_exit+0x28/0x78
[ 2925.152518]  dwc3_drd_exit+0x100/0x1f8
[ 2925.156267]  dwc3_remove+0x11c/0x120
[ 2925.159851]  dwc3_shutdown+0x14/0x20
[ 2925.163432]  platform_shutdown+0x28/0x38
[ 2925.167360]  device_shutdown+0x15c/0x378
[ 2925.171291]  kernel_restart_prepare+0x3c/0x48
[ 2925.175650]  kernel_restart+0x1c/0x68
[ 2925.179316]  __do_sys_reboot+0x218/0x240
[ 2925.183247]  __arm64_sys_reboot+0x28/0x30
[ 2925.187262]  invoke_syscall+0x48/0x100
[ 2925.191017]  el0_svc_common.constprop.0+0x48/0xc8
[ 2925.195726]  do_el0_svc+0x28/0x88
[ 2925.199045]  el0_svc+0x20/0x30
[ 2925.202104]  el0_sync_handler+0xa8/0xb0
[ 2925.205942]  el0_sync+0x148/0x180
[ 2925.209270] Code: a9025bf5 2a0203f5 121f0056 370802b5 (79400660)
[ 2925.215372] ---[ end trace 124254d8e485a58b ]---
[ 2925.220012] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[ 2925.227676] Kernel Offset: disabled
[ 2925.231164] CPU features: 0x00001001,20000846
[ 2925.235521] Memory Limit: none
[ 2925.238580] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---

Fixes: 8d396bb0a5b6 ("usb: dwc3: debugfs: Add and remove endpoint dirs dynamically")
Cc: Jack Pham <jackp@codeaurora.org>
Tested-by: Jack Pham <jackp@codeaurora.org>
Signed-off-by: Peter Chen <peter.chen@kernel.org>
Link: https://lore.kernel.org/r/20210608105656.10795-1-peter.chen@kernel.org
(cherry picked from commit 2a042767814bd0edf2619f06fecd374e266ea068)
Link: https://lore.kernel.org/r/20210615080847.GA10432@jackp-linux.qualcomm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 21129d357f295..4ac397e43e19b 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1671,8 +1671,8 @@ static int dwc3_remove(struct platform_device *pdev)
 
 	pm_runtime_get_sync(&pdev->dev);
 
-	dwc3_debugfs_exit(dwc);
 	dwc3_core_exit_mode(dwc);
+	dwc3_debugfs_exit(dwc);
 
 	dwc3_core_exit(dwc);
 	dwc3_ulpi_exit(dwc);
-- 
GitLab


From b27a9f4119afa460289cd327f403e2ec9c8e0511 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss@arm.com>
Date: Sun, 13 Jun 2021 11:26:31 +0200
Subject: [PATCH 2868/3804] arm64: Add ARM64_PTR_AUTH_KERNEL config option

This patch add the ARM64_PTR_AUTH_KERNEL config and deals with the
build aspect of it.

Userspace support has no dependency on the toolchain therefore all
toolchain checks and build flags are controlled the new config
option.
The default config behavior will not be changed.

Signed-off-by: Daniel Kiss <daniel.kiss@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210613092632.93591-2-daniel.kiss@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig              | 33 +++++++++++++++++++--------------
 arch/arm64/Makefile             |  2 +-
 arch/arm64/kernel/asm-offsets.c |  2 ++
 drivers/misc/lkdtm/bugs.c       |  6 +++---
 4 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9f1d8566bbf95..489e3e42320f1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1481,12 +1481,6 @@ menu "ARMv8.3 architectural features"
 config ARM64_PTR_AUTH
 	bool "Enable support for pointer authentication"
 	default y
-	depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
-	# Modern compilers insert a .note.gnu.property section note for PAC
-	# which is only understood by binutils starting with version 2.33.1.
-	depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
-	depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
-	depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
 	help
 	  Pointer authentication (part of the ARMv8.3 Extensions) provides
 	  instructions for signing and authenticating pointers against secret
@@ -1498,13 +1492,6 @@ config ARM64_PTR_AUTH
 	  for each process at exec() time, with these keys being
 	  context-switched along with the process.
 
-	  If the compiler supports the -mbranch-protection or
-	  -msign-return-address flag (e.g. GCC 7 or later), then this option
-	  will also cause the kernel itself to be compiled with return address
-	  protection. In this case, and if the target hardware is known to
-	  support pointer authentication, then CONFIG_STACKPROTECTOR can be
-	  disabled with minimal loss of protection.
-
 	  The feature is detected at runtime. If the feature is not present in
 	  hardware it will not be advertised to userspace/KVM guest nor will it
 	  be enabled.
@@ -1515,6 +1502,24 @@ config ARM64_PTR_AUTH
 	  but with the feature disabled. On such a system, this option should
 	  not be selected.
 
+config ARM64_PTR_AUTH_KERNEL
+	bool
+	default y
+	depends on ARM64_PTR_AUTH
+	depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
+	# Modern compilers insert a .note.gnu.property section note for PAC
+	# which is only understood by binutils starting with version 2.33.1.
+	depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
+	depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
+	depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
+	help
+	  If the compiler supports the -mbranch-protection or
+	  -msign-return-address flag (e.g. GCC 7 or later), then this option
+	  will cause the kernel itself to be compiled with return address
+	  protection. In this case, and if the target hardware is known to
+	  support pointer authentication, then CONFIG_STACKPROTECTOR can be
+	  disabled with minimal loss of protection.
+
 	  This feature works with FUNCTION_GRAPH_TRACER option only if
 	  DYNAMIC_FTRACE_WITH_REGS is enabled.
 
@@ -1606,7 +1611,7 @@ config ARM64_BTI_KERNEL
 	bool "Use Branch Target Identification for kernel"
 	default y
 	depends on ARM64_BTI
-	depends on ARM64_PTR_AUTH
+	depends on ARM64_PTR_AUTH_KERNEL
 	depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI
 	# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697
 	depends on !CC_IS_GCC || GCC_VERSION >= 100100
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b52481f0605d8..3b5b1c480449c 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -70,7 +70,7 @@ endif
 # off, this will be overridden if we are using branch protection.
 branch-prot-flags-y += $(call cc-option,-mbranch-protection=none)
 
-ifeq ($(CONFIG_ARM64_PTR_AUTH),y)
+ifeq ($(CONFIG_ARM64_PTR_AUTH_KERNEL),y)
 branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=all
 # We enable additional protection for leaf functions as there is some
 # narrow potential for ROP protection benefits and no substantial
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0cb34ccb6e733..03420b89c6029 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -46,6 +46,8 @@ int main(void)
   DEFINE(THREAD_SCTLR_USER,	offsetof(struct task_struct, thread.sctlr_user));
 #ifdef CONFIG_ARM64_PTR_AUTH
   DEFINE(THREAD_KEYS_USER,	offsetof(struct task_struct, thread.keys_user));
+#endif
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
   DEFINE(THREAD_KEYS_KERNEL,	offsetof(struct task_struct, thread.keys_kernel));
 #endif
 #ifdef CONFIG_ARM64_MTE
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 0e8254d0cf0ba..a164896dc6d4c 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -463,7 +463,7 @@ void lkdtm_DOUBLE_FAULT(void)
 #ifdef CONFIG_ARM64
 static noinline void change_pac_parameters(void)
 {
-	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH)) {
+	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) {
 		/* Reset the keys of current task */
 		ptrauth_thread_init_kernel(current);
 		ptrauth_thread_switch_kernel(current);
@@ -477,8 +477,8 @@ noinline void lkdtm_CORRUPT_PAC(void)
 #define CORRUPT_PAC_ITERATE	10
 	int i;
 
-	if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH))
-		pr_err("FAIL: kernel not built with CONFIG_ARM64_PTR_AUTH\n");
+	if (!IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
+		pr_err("FAIL: kernel not built with CONFIG_ARM64_PTR_AUTH_KERNEL\n");
 
 	if (!system_supports_address_auth()) {
 		pr_err("FAIL: CPU lacks pointer authentication feature\n");
-- 
GitLab


From d053e71ac8442d4fd24fb85591489813cdb56365 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss@arm.com>
Date: Sun, 13 Jun 2021 11:26:32 +0200
Subject: [PATCH 2869/3804] arm64: Conditionally configure PTR_AUTH key of the
 kernel.

If the kernel is not compiled with CONFIG_ARM64_PTR_AUTH_KERNEL=y,
then no PACI/AUTI instructions are expected while the kernel is running
so the kernel's key will not be used. Write of a system registers
is expensive therefore avoid if not required.

Signed-off-by: Daniel Kiss <daniel.kiss@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210613092632.93591-3-daniel.kiss@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig                        |  2 +-
 arch/arm64/include/asm/asm_pointer_auth.h | 49 +++++++++++--------
 arch/arm64/include/asm/pointer_auth.h     | 59 +++++++++++++----------
 arch/arm64/include/asm/processor.h        |  2 +
 arch/arm64/kernel/asm-offsets.c           |  2 +
 5 files changed, 67 insertions(+), 47 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 489e3e42320f1..dabe9b81012f8 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1503,7 +1503,7 @@ config ARM64_PTR_AUTH
 	  not be selected.
 
 config ARM64_PTR_AUTH_KERNEL
-	bool
+	bool "Use pointer authentication for kernel"
 	default y
 	depends on ARM64_PTR_AUTH
 	depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h
index 8ca2dc0661ee2..f1bba5fc61c49 100644
--- a/arch/arm64/include/asm/asm_pointer_auth.h
+++ b/arch/arm64/include/asm/asm_pointer_auth.h
@@ -7,19 +7,7 @@
 #include <asm/cpufeature.h>
 #include <asm/sysreg.h>
 
-#ifdef CONFIG_ARM64_PTR_AUTH
-/*
- * thread.keys_user.ap* as offset exceeds the #imm offset range
- * so use the base value of ldp as thread.keys_user and offset as
- * thread.keys_user.ap*.
- */
-	.macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
-	mov	\tmp1, #THREAD_KEYS_USER
-	add	\tmp1, \tsk, \tmp1
-	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
-	msr_s	SYS_APIAKEYLO_EL1, \tmp2
-	msr_s	SYS_APIAKEYHI_EL1, \tmp3
-	.endm
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
 
 	.macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
 	mov	\tmp1, #THREAD_KEYS_KERNEL
@@ -42,6 +30,33 @@ alternative_if ARM64_HAS_ADDRESS_AUTH
 alternative_else_nop_endif
 	.endm
 
+#else /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+	.macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+	.endm
+
+	.macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
+	.endm
+
+	.macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
+	.endm
+
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+/*
+ * thread.keys_user.ap* as offset exceeds the #imm offset range
+ * so use the base value of ldp as thread.keys_user and offset as
+ * thread.keys_user.ap*.
+ */
+	.macro __ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
+	mov	\tmp1, #THREAD_KEYS_USER
+	add	\tmp1, \tsk, \tmp1
+	ldp	\tmp2, \tmp3, [\tmp1, #PTRAUTH_USER_KEY_APIA]
+	msr_s	SYS_APIAKEYLO_EL1, \tmp2
+	msr_s	SYS_APIAKEYHI_EL1, \tmp3
+	.endm
+
 	.macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3
 	mrs	\tmp1, id_aa64isar1_el1
 	ubfx	\tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8
@@ -64,17 +79,11 @@ alternative_else_nop_endif
 .Lno_addr_auth\@:
 	.endm
 
-#else /* CONFIG_ARM64_PTR_AUTH */
+#else /* !CONFIG_ARM64_PTR_AUTH */
 
 	.macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3
 	.endm
 
-	.macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3
-	.endm
-
-	.macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3
-	.endm
-
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
 #endif /* __ASM_ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index d50416be99be0..28a78b67d9b40 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -31,10 +31,6 @@ struct ptrauth_keys_user {
 	struct ptrauth_key apga;
 };
 
-struct ptrauth_keys_kernel {
-	struct ptrauth_key apia;
-};
-
 #define __ptrauth_key_install_nosync(k, v)			\
 do {								\
 	struct ptrauth_key __pki_v = (v);			\
@@ -42,6 +38,29 @@ do {								\
 	write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1);	\
 } while (0)
 
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+
+struct ptrauth_keys_kernel {
+	struct ptrauth_key apia;
+};
+
+static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
+{
+	if (system_supports_address_auth())
+		get_random_bytes(&keys->apia, sizeof(keys->apia));
+}
+
+static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kernel *keys)
+{
+	if (!system_supports_address_auth())
+		return;
+
+	__ptrauth_key_install_nosync(APIA, keys->apia);
+	isb();
+}
+
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
 static inline void ptrauth_keys_install_user(struct ptrauth_keys_user *keys)
 {
 	if (system_supports_address_auth()) {
@@ -69,21 +88,6 @@ static inline void ptrauth_keys_init_user(struct ptrauth_keys_user *keys)
 	ptrauth_keys_install_user(keys);
 }
 
-static __always_inline void ptrauth_keys_init_kernel(struct ptrauth_keys_kernel *keys)
-{
-	if (system_supports_address_auth())
-		get_random_bytes(&keys->apia, sizeof(keys->apia));
-}
-
-static __always_inline void ptrauth_keys_switch_kernel(struct ptrauth_keys_kernel *keys)
-{
-	if (!system_supports_address_auth())
-		return;
-
-	__ptrauth_key_install_nosync(APIA, keys->apia);
-	isb();
-}
-
 extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
 
 extern int ptrauth_set_enabled_keys(struct task_struct *tsk, unsigned long keys,
@@ -121,11 +125,6 @@ static __always_inline void ptrauth_enable(void)
 #define ptrauth_thread_switch_user(tsk)                                        \
 	ptrauth_keys_install_user(&(tsk)->thread.keys_user)
 
-#define ptrauth_thread_init_kernel(tsk)					\
-	ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
-#define ptrauth_thread_switch_kernel(tsk)				\
-	ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel)
-
 #else /* CONFIG_ARM64_PTR_AUTH */
 #define ptrauth_enable()
 #define ptrauth_prctl_reset_keys(tsk, arg)	(-EINVAL)
@@ -134,11 +133,19 @@ static __always_inline void ptrauth_enable(void)
 #define ptrauth_strip_insn_pac(lr)	(lr)
 #define ptrauth_suspend_exit()
 #define ptrauth_thread_init_user()
-#define ptrauth_thread_init_kernel(tsk)
 #define ptrauth_thread_switch_user(tsk)
-#define ptrauth_thread_switch_kernel(tsk)
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
+#define ptrauth_thread_init_kernel(tsk)					\
+	ptrauth_keys_init_kernel(&(tsk)->thread.keys_kernel)
+#define ptrauth_thread_switch_kernel(tsk)				\
+	ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel)
+#else
+#define ptrauth_thread_init_kernel(tsk)
+#define ptrauth_thread_switch_kernel(tsk)
+#endif /* CONFIG_ARM64_PTR_AUTH_KERNEL */
+
 #define PR_PAC_ENABLED_KEYS_MASK                                               \
 	(PR_PAC_APIAKEY | PR_PAC_APIBKEY | PR_PAC_APDAKEY | PR_PAC_APDBKEY)
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 9df3feeee8909..e7d50c6f700d3 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -148,8 +148,10 @@ struct thread_struct {
 	struct debug_info	debug;		/* debugging */
 #ifdef CONFIG_ARM64_PTR_AUTH
 	struct ptrauth_keys_user	keys_user;
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
 	struct ptrauth_keys_kernel	keys_kernel;
 #endif
+#endif
 #ifdef CONFIG_ARM64_MTE
 	u64			gcr_user_excl;
 #endif
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 03420b89c6029..c9e72d92606fb 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -155,7 +155,9 @@ int main(void)
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
   DEFINE(PTRAUTH_USER_KEY_APIA,		offsetof(struct ptrauth_keys_user, apia));
+#ifdef CONFIG_ARM64_PTR_AUTH_KERNEL
   DEFINE(PTRAUTH_KERNEL_KEY_APIA,	offsetof(struct ptrauth_keys_kernel, apia));
+#endif
   BLANK();
 #endif
   return 0;
-- 
GitLab


From 3ce6c9e2617ebc09b2d55cc88134b90c19ff6d31 Mon Sep 17 00:00:00 2001
From: Marco Felsch <m.felsch@pengutronix.de>
Date: Tue, 25 May 2021 11:10:03 +0200
Subject: [PATCH 2870/3804] spi: add of_device_uevent_modalias support

Add OF support as already done for ACPI to take driver
MODULE_DEVICE_TABLE(of, ..) into account.

For example with this change a spi nor device MODALIAS changes from:

MODALIAS=spi:spi-nor

to

MODALIAS=of:Nspi-flashT(null)Cjedec,spi-nor

Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
Link: https://lore.kernel.org/r/20210525091003.18228-1-m.felsch@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index a0a232669dc1a..8553e7d48f660 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -363,6 +363,10 @@ static int spi_uevent(struct device *dev, struct kobj_uevent_env *env)
 	const struct spi_device		*spi = to_spi_device(dev);
 	int rc;
 
+	rc = of_device_uevent_modalias(dev, env);
+	if (rc != -ENODEV)
+		return rc;
+
 	rc = acpi_device_uevent_modalias(dev, env);
 	if (rc != -ENODEV)
 		return rc;
-- 
GitLab


From 0f473ac746a992b3afd994ccd1ac73052ea256f2 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 14 Jun 2021 15:40:11 +0530
Subject: [PATCH 2871/3804] arm64/mm: Drop SWAPPER_INIT_MAP_SIZE

The commit cdef5f6e9e0e ("arm64: mm: allocate pagetables anywhere") had
dropped the last reference to SWAPPER_INIT_MAP_SIZE. Hence just clean up.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Link: https://lore.kernel.org/r/1623665411-20055-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kernel-pgtable.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index d44df9d62fc9c..e2f103cce7c1b 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -100,9 +100,6 @@
 #define SWAPPER_TABLE_SHIFT	PMD_SHIFT
 #endif
 
-/* The size of the initial kernel direct mapping */
-#define SWAPPER_INIT_MAP_SIZE	(_AC(1, UL) << SWAPPER_TABLE_SHIFT)
-
 /*
  * Initial memory map attributes.
  */
-- 
GitLab


From ca6ece6a76a8b5d8b428429c2803df48a69ee88b Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 14 Jun 2021 15:12:35 +0530
Subject: [PATCH 2872/3804] arm64/mm: Use CONT_PMD_SHIFT for
 ARM64_MEMSTART_SHIFT

ARM64_MEMSTART_SIZE needs to be aligned with CONT_PMD_SIZE on 16K page size
config. Hence just directly use CONT_PMD_SHIFT.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/1623663755-8949-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kernel-pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index e2f103cce7c1b..c5f18f2408b5b 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -122,7 +122,7 @@
 #if defined(CONFIG_ARM64_4K_PAGES)
 #define ARM64_MEMSTART_SHIFT		PUD_SHIFT
 #elif defined(CONFIG_ARM64_16K_PAGES)
-#define ARM64_MEMSTART_SHIFT		(PMD_SHIFT + 5)
+#define ARM64_MEMSTART_SHIFT		CONT_PMD_SHIFT
 #else
 #define ARM64_MEMSTART_SHIFT		PMD_SHIFT
 #endif
-- 
GitLab


From 4aaa87ab3d2de485d8aae7a88cc9cb02dcd2c450 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 14 Jun 2021 13:48:26 +0530
Subject: [PATCH 2873/3804] arm64/mm: Drop SECTION_[SHIFT|SIZE|MASK]

SECTION_[SHIFT|SIZE|MASK] are essentially PMD_[SHIFT|SIZE|MASK]. But these
create confusion being similar to generic sparsemem memory sections, which
are derived from SECTION_SIZE_BITS. Section references have always implied
PMD level block mapping. Instead just use all PMD level macros which would
make it explicit and also remove confusion with sparsmem memory sections.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/1623658706-7182-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kernel-pgtable.h | 4 ++--
 arch/arm64/include/asm/pgtable-hwdef.h  | 7 -------
 arch/arm64/mm/mmu.c                     | 2 +-
 3 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index c5f18f2408b5b..1260187adb31f 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -91,8 +91,8 @@
 
 /* Initial memory map size */
 #if ARM64_SWAPPER_USES_SECTION_MAPS
-#define SWAPPER_BLOCK_SHIFT	SECTION_SHIFT
-#define SWAPPER_BLOCK_SIZE	SECTION_SIZE
+#define SWAPPER_BLOCK_SHIFT	PMD_SHIFT
+#define SWAPPER_BLOCK_SIZE	PMD_SIZE
 #define SWAPPER_TABLE_SHIFT	PUD_SHIFT
 #else
 #define SWAPPER_BLOCK_SHIFT	PAGE_SHIFT
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index b82575a33f8b6..40085e53f573d 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -71,13 +71,6 @@
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
 #define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
 
-/*
- * Section address mask and size definitions.
- */
-#define SECTION_SHIFT		PMD_SHIFT
-#define SECTION_SIZE		(_AC(1, UL) << SECTION_SHIFT)
-#define SECTION_MASK		(~(SECTION_SIZE-1))
-
 /*
  * Contiguous page definitions.
  */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 3d34cd127f6b6..5b75f7eefb726 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -228,7 +228,7 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
 		next = pmd_addr_end(addr, end);
 
 		/* try section mapping first */
-		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
+		if (((addr | next | phys) & ~PMD_MASK) == 0 &&
 		    (flags & NO_BLOCK_MAPPINGS) == 0) {
 			pmd_set_huge(pmdp, phys, prot);
 
-- 
GitLab


From 84c5e23edecd7013ceaed8460deed5c33842cb8d Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Mon, 14 Jun 2021 20:27:01 +0800
Subject: [PATCH 2874/3804] arm64: mm: Pass original fault address to
 handle_mm_fault()

Currently, the lower bits of fault address is cleared before it's
passed to handle_mm_fault(). It's unnecessary since generic code
does same thing since the commit 1a29d85eb0f19 ("mm: use vmf->address
instead of of vmf->virtual_address").

This passes the original fault address to handle_mm_fault() in case
the generic code needs to know the exact fault address.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lore.kernel.org/r/20210614122701.100515-1-gshan@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/mm/fault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 6786cf152666c..bd9a0bb5fb566 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -509,7 +509,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
 	 */
 	if (!(vma->vm_flags & vm_flags))
 		return VM_FAULT_BADACCESS;
-	return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs);
+	return handle_mm_fault(vma, addr, mm_flags, regs);
 }
 
 static bool is_el0_instruction_abort(unsigned int esr)
-- 
GitLab


From f3f4f37d53be578c65dd32a1ffad432b33aef236 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 15 Jun 2021 18:39:47 +0800
Subject: [PATCH 2875/3804] regulator: rt6160: Remove vsel_active_low from
 struct rt6160_priv

Use a local variable instead is enough, this simplifies the code.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210615103947.3387994-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rt6160-regulator.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/regulator/rt6160-regulator.c b/drivers/regulator/rt6160-regulator.c
index b6b53868050f1..ccd023da43188 100644
--- a/drivers/regulator/rt6160-regulator.c
+++ b/drivers/regulator/rt6160-regulator.c
@@ -42,7 +42,6 @@ struct rt6160_priv {
 	struct regulator_desc desc;
 	struct gpio_desc *enable_gpio;
 	struct regmap *regmap;
-	bool vsel_active_low;
 	bool enable_state;
 };
 
@@ -128,19 +127,15 @@ static unsigned int rt6160_get_mode(struct regulator_dev *rdev)
 
 static int rt6160_set_suspend_voltage(struct regulator_dev *rdev, int uV)
 {
-	struct rt6160_priv *priv = rdev_get_drvdata(rdev);
 	struct regmap *regmap = rdev_get_regmap(rdev);
-	unsigned int reg = RT6160_REG_VSELH;
 	int vsel;
 
 	vsel = regulator_map_voltage_linear(rdev, uV, uV);
 	if (vsel < 0)
 		return vsel;
 
-	if (priv->vsel_active_low)
-		reg = RT6160_REG_VSELL;
-
-	return regmap_update_bits(regmap, reg, RT6160_VSEL_MASK, vsel);
+	return regmap_update_bits(regmap, rdev->desc->vsel_reg,
+				  RT6160_VSEL_MASK, vsel);
 }
 
 static int rt6160_get_error_flags(struct regulator_dev *rdev, unsigned int *flags)
@@ -228,6 +223,7 @@ static int rt6160_probe(struct i2c_client *i2c)
 	struct rt6160_priv *priv;
 	struct regulator_config regulator_cfg = {};
 	struct regulator_dev *rdev;
+	bool vsel_active_low;
 	unsigned int devid;
 	int ret;
 
@@ -235,7 +231,8 @@ static int rt6160_probe(struct i2c_client *i2c)
 	if (!priv)
 		return -ENOMEM;
 
-	priv->vsel_active_low = device_property_present(&i2c->dev, "richtek,vsel-active-low");
+	vsel_active_low =
+		device_property_present(&i2c->dev, "richtek,vsel-active-low");
 
 	priv->enable_gpio = devm_gpiod_get_optional(&i2c->dev, "enable", GPIOD_OUT_HIGH);
 	if (IS_ERR(priv->enable_gpio)) {
@@ -267,7 +264,10 @@ static int rt6160_probe(struct i2c_client *i2c)
 	priv->desc.owner = THIS_MODULE;
 	priv->desc.min_uV = RT6160_VOUT_MINUV;
 	priv->desc.uV_step = RT6160_VOUT_STPUV;
-	priv->desc.vsel_reg = RT6160_REG_VSELH;
+	if (vsel_active_low)
+		priv->desc.vsel_reg = RT6160_REG_VSELL;
+	else
+		priv->desc.vsel_reg = RT6160_REG_VSELH;
 	priv->desc.vsel_mask = RT6160_VSEL_MASK;
 	priv->desc.n_voltages = RT6160_N_VOUTS;
 	priv->desc.ramp_reg = RT6160_REG_CNTL;
@@ -276,8 +276,6 @@ static int rt6160_probe(struct i2c_client *i2c)
 	priv->desc.n_ramp_values = ARRAY_SIZE(rt6160_ramp_tables);
 	priv->desc.of_map_mode = rt6160_of_map_mode;
 	priv->desc.ops = &rt6160_regulator_ops;
-	if (priv->vsel_active_low)
-		priv->desc.vsel_reg = RT6160_REG_VSELL;
 
 	regulator_cfg.dev = &i2c->dev;
 	regulator_cfg.of_node = i2c->dev.of_node;
-- 
GitLab


From 6a5976f23dc38749afcb62cc3acf5e3e2b53d5ba Mon Sep 17 00:00:00 2001
From: Patrice Chotard <patrice.chotard@foss.st.com>
Date: Tue, 15 Jun 2021 11:01:15 +0200
Subject: [PATCH 2876/3804] spi: stm32-qspi: Remove unused qspi field of struct
 stm32_qspi_flash

Remove struct stm32_qspi_flash's field qspi which is not used.

Fixes: c530cd1d9d5e ("spi: spi-mem: add stm32 qspi controller")
Signed-off-by: Patrice Chotard <patrice.chotard@foss.st.com>
Link: https://lore.kernel.org/r/20210615090115.30702-1-patrice.chotard@foss.st.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-stm32-qspi.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
index 69fd220a28944..15fbd69a4c465 100644
--- a/drivers/spi/spi-stm32-qspi.c
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -93,7 +93,6 @@
 #define STM32_AUTOSUSPEND_DELAY -1
 
 struct stm32_qspi_flash {
-	struct stm32_qspi *qspi;
 	u32 cs;
 	u32 presc;
 };
@@ -590,7 +589,6 @@ static int stm32_qspi_setup(struct spi_device *spi)
 	presc = DIV_ROUND_UP(qspi->clk_rate, spi->max_speed_hz) - 1;
 
 	flash = &qspi->flash[spi->chip_select];
-	flash->qspi = qspi;
 	flash->cs = spi->chip_select;
 	flash->presc = presc;
 
-- 
GitLab


From 75ac5cc2ee6b499bc0225ad67302271772929f19 Mon Sep 17 00:00:00 2001
From: Evan Benn <evanbenn@chromium.org>
Date: Wed, 12 May 2021 12:25:44 +1000
Subject: [PATCH 2877/3804] clocksource/drivers/mediatek: Ack and disable
 interrupts on suspend

Interrupts are disabled during suspend before this driver disables its
timers. ARM trusted firmware will abort suspend if the timer irq is
pending, so ack and disable the timer interrupt during suspend.

Signed-off-by: Evan Benn <evanbenn@chromium.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210512122528.v4.1.I1d9917047de06715da16e1620759f703fcfdcbcb@changeid
---
 drivers/clocksource/timer-mediatek.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/clocksource/timer-mediatek.c b/drivers/clocksource/timer-mediatek.c
index 9318edcd89635..ab63b95e414f5 100644
--- a/drivers/clocksource/timer-mediatek.c
+++ b/drivers/clocksource/timer-mediatek.c
@@ -241,6 +241,28 @@ static void mtk_gpt_enable_irq(struct timer_of *to, u8 timer)
 	       timer_of_base(to) + GPT_IRQ_EN_REG);
 }
 
+static void mtk_gpt_resume(struct clock_event_device *clk)
+{
+	struct timer_of *to = to_timer_of(clk);
+
+	mtk_gpt_enable_irq(to, TIMER_CLK_EVT);
+}
+
+static void mtk_gpt_suspend(struct clock_event_device *clk)
+{
+	struct timer_of *to = to_timer_of(clk);
+
+	/* Disable all interrupts */
+	writel(0x0, timer_of_base(to) + GPT_IRQ_EN_REG);
+
+	/*
+	 * This is called with interrupts disabled,
+	 * so we need to ack any interrupt that is pending
+	 * or for example ATF will prevent a suspend from completing.
+	 */
+	writel(0x3f, timer_of_base(to) + GPT_IRQ_ACK_REG);
+}
+
 static struct timer_of to = {
 	.flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
 
@@ -286,6 +308,8 @@ static int __init mtk_gpt_init(struct device_node *node)
 	to.clkevt.set_state_oneshot = mtk_gpt_clkevt_shutdown;
 	to.clkevt.tick_resume = mtk_gpt_clkevt_shutdown;
 	to.clkevt.set_next_event = mtk_gpt_clkevt_next_event;
+	to.clkevt.suspend = mtk_gpt_suspend;
+	to.clkevt.resume = mtk_gpt_resume;
 	to.of_irq.handler = mtk_gpt_interrupt;
 
 	ret = timer_of_init(node, &to);
-- 
GitLab


From 9517c577f9f722270584cfb1a7b4e1354e408658 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 15 Apr 2021 11:55:06 +0300
Subject: [PATCH 2878/3804] clocksource/drivers/timer-ti-dm: Save and restore
 timer TIOCP_CFG

As we are using cpu_pm to save and restore context, we must also save and
restore the timer sysconfig register TIOCP_CFG. This is needed because
we are not calling PM runtime functions at all with cpu_pm.

Fixes: b34677b0999a ("clocksource/drivers/timer-ti-dm: Implement cpu_pm notifier for context save and restore")
Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Adam Ford <aford173@gmail.com>
Cc: Andreas Kemnade <andreas@kemnade.info>
Cc: Lokesh Vutla <lokeshvutla@ti.com>
Cc: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210415085506.56828-1-tony@atomide.com
---
 drivers/clocksource/timer-ti-dm.c | 6 ++++++
 include/clocksource/timer-ti-dm.h | 1 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index 33eeabf9c3d12..e5c631f1b5cbe 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -78,6 +78,9 @@ static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, u32 reg,
 
 static void omap_timer_restore_context(struct omap_dm_timer *timer)
 {
+	__omap_dm_timer_write(timer, OMAP_TIMER_OCP_CFG_OFFSET,
+			      timer->context.ocp_cfg, 0);
+
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_WAKEUP_EN_REG,
 				timer->context.twer);
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_COUNTER_REG,
@@ -95,6 +98,9 @@ static void omap_timer_restore_context(struct omap_dm_timer *timer)
 
 static void omap_timer_save_context(struct omap_dm_timer *timer)
 {
+	timer->context.ocp_cfg =
+		__omap_dm_timer_read(timer, OMAP_TIMER_OCP_CFG_OFFSET, 0);
+
 	timer->context.tclr =
 			omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG);
 	timer->context.twer =
diff --git a/include/clocksource/timer-ti-dm.h b/include/clocksource/timer-ti-dm.h
index 4c61dade8835f..f6da8a1326398 100644
--- a/include/clocksource/timer-ti-dm.h
+++ b/include/clocksource/timer-ti-dm.h
@@ -74,6 +74,7 @@
 #define OMAP_TIMER_ERRATA_I103_I767			0x80000000
 
 struct timer_regs {
+	u32 ocp_cfg;
 	u32 tidr;
 	u32 tier;
 	u32 twer;
-- 
GitLab


From 870a6e1539829356baf70b57c933d0b309cfac21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=91=A8=E7=90=B0=E6=9D=B0=20=28Zhou=20Yanjie=29?=
 <zhouyanjie@wanyeetech.com>
Date: Sat, 5 Jun 2021 00:31:45 +0800
Subject: [PATCH 2879/3804] clocksource/drivers/ingenic: Rename unreasonable
 array names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1.Rename the "ingenic_ost_clk_info[]" to "x1000_ost_clk_info[]" to
  facilitate the addition of OST support for X2000 SoC in a later
  commit

2.When the OST support for X2000 SoC is added, there will be two
  compatible strings, so renaming "ingenic_ost_of_match[]" to
  "ingenic_ost_of_matches[]" is more reasonable

3.Remove the unnecessary comma in "ingenic_ost_of_matches[]" to reduce
  code size as much as possible.

Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/1622824306-30987-2-git-send-email-zhouyanjie@wanyeetech.com
---
 drivers/clocksource/ingenic-sysost.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/clocksource/ingenic-sysost.c b/drivers/clocksource/ingenic-sysost.c
index e77d58449005c..a129840f14f96 100644
--- a/drivers/clocksource/ingenic-sysost.c
+++ b/drivers/clocksource/ingenic-sysost.c
@@ -186,7 +186,7 @@ static const struct clk_ops ingenic_ost_global_timer_ops = {
 
 static const char * const ingenic_ost_clk_parents[] = { "ext" };
 
-static const struct ingenic_ost_clk_info ingenic_ost_clk_info[] = {
+static const struct ingenic_ost_clk_info x1000_ost_clk_info[] = {
 	[OST_CLK_PERCPU_TIMER] = {
 		.init_data = {
 			.name = "percpu timer",
@@ -414,14 +414,14 @@ static const struct ingenic_soc_info x1000_soc_info = {
 	.num_channels = 2,
 };
 
-static const struct of_device_id __maybe_unused ingenic_ost_of_match[] __initconst = {
-	{ .compatible = "ingenic,x1000-ost", .data = &x1000_soc_info, },
+static const struct of_device_id __maybe_unused ingenic_ost_of_matches[] __initconst = {
+	{ .compatible = "ingenic,x1000-ost", .data = &x1000_soc_info },
 	{ /* sentinel */ }
 };
 
 static int __init ingenic_ost_probe(struct device_node *np)
 {
-	const struct of_device_id *id = of_match_node(ingenic_ost_of_match, np);
+	const struct of_device_id *id = of_match_node(ingenic_ost_of_matches, np);
 	struct ingenic_ost *ost;
 	unsigned int i;
 	int ret;
@@ -462,7 +462,7 @@ static int __init ingenic_ost_probe(struct device_node *np)
 	ost->clocks->num = ost->soc_info->num_channels;
 
 	for (i = 0; i < ost->clocks->num; i++) {
-		ret = ingenic_ost_register_clock(ost, i, &ingenic_ost_clk_info[i], ost->clocks);
+		ret = ingenic_ost_register_clock(ost, i, &x1000_ost_clk_info[i], ost->clocks);
 		if (ret) {
 			pr_crit("%s: Cannot register clock %d\n", __func__, i);
 			goto err_unregister_ost_clocks;
-- 
GitLab


From 478036c4cd1a16e613a2f883d79c03cf187faacb Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 14 Jun 2021 23:14:40 +1000
Subject: [PATCH 2880/3804] powerpc: Fix initrd corruption with relative jump
 labels

Commit b0b3b2c78ec0 ("powerpc: Switch to relative jump labels") switched
us to using relative jump labels. That involves changing the code,
target and key members in struct jump_entry to be relative to the
address of the jump_entry, rather than absolute addresses.

We have two static inlines that create a struct jump_entry,
arch_static_branch() and arch_static_branch_jump(), as well as an asm
macro ARCH_STATIC_BRANCH, which is used by the pseries-only hypervisor
tracing code.

Unfortunately we missed updating the key to be a relative reference in
ARCH_STATIC_BRANCH.

That causes a pseries kernel to have a handful of jump_entry structs
with bad key values. Instead of being a relative reference they instead
hold the full address of the key.

However the code doesn't expect that, it still adds the key value to the
address of the jump_entry (see jump_entry_key()) expecting to get a
pointer to a key somewhere in kernel data.

The table of jump_entry structs sits in rodata, which comes after the
kernel text. In a typical build this will be somewhere around 15MB. The
address of the key will be somewhere in data, typically around 20MB.
Adding the two values together gets us a pointer somewhere around 45MB.

We then call static_key_set_entries() with that bad pointer and modify
some members of the struct static_key we think we are pointing at.

A pseries kernel is typically ~30MB in size, so writing to ~45MB won't
corrupt the kernel itself. However if we're booting with an initrd,
depending on the size and exact location of the initrd, we can corrupt
the initrd. Depending on how exactly we corrupt the initrd it can either
cause the system to not boot, or just corrupt one of the files in the
initrd.

The fix is simply to make the key value relative to the jump_entry
struct in the ARCH_STATIC_BRANCH macro.

Fixes: b0b3b2c78ec0 ("powerpc: Switch to relative jump labels")
Reported-by: Anastasia Kovaleva <a.kovaleva@yadro.com>
Reported-by: Roman Bolshakov <r.bolshakov@yadro.com>
Reported-by: Greg Kurz <groug@kaod.org>
Reported-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Tested-by: Daniel Axtens <dja@axtens.net>
Tested-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210614131440.312360-1-mpe@ellerman.id.au
---
 arch/powerpc/include/asm/jump_label.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index 2d5c6bec2b4f3..93ce3ec253877 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -50,7 +50,7 @@ l_yes:
 1098:	nop;					\
 	.pushsection __jump_table, "aw";	\
 	.long 1098b - ., LABEL - .;		\
-	FTR_ENTRY_LONG KEY;			\
+	FTR_ENTRY_LONG KEY - .;			\
 	.popsection
 #endif
 
-- 
GitLab


From a33d62662d275cee22888fa7760fe09d5b9cd1f9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 15 Jun 2021 08:39:52 +0100
Subject: [PATCH 2881/3804] afs: Fix an IS_ERR() vs NULL check

The proc_symlink() function returns NULL on error, it doesn't return
error pointers.

Fixes: 5b86d4ff5dce ("afs: Implement network namespacing")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-afs@lists.infradead.org
Link: https://lore.kernel.org/r/YLjMRKX40pTrJvgf@mwanda/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/afs/main.c b/fs/afs/main.c
index b2975256dadbd..179004b15566d 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -203,8 +203,8 @@ static int __init afs_init(void)
 		goto error_fs;
 
 	afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs");
-	if (IS_ERR(afs_proc_symlink)) {
-		ret = PTR_ERR(afs_proc_symlink);
+	if (!afs_proc_symlink) {
+		ret = -ENOMEM;
 		goto error_proc;
 	}
 
-- 
GitLab


From 98eaa63e96273de075f3ce4eac0f18b33d28b84c Mon Sep 17 00:00:00 2001
From: ChenXiaoSong <chenxiaosong2@huawei.com>
Date: Thu, 10 Jun 2021 15:49:00 +0800
Subject: [PATCH 2882/3804] tomoyo: fix doc warnings

Fix gcc W=1 warnings:

security/tomoyo/audit.c:331: warning: Function parameter or member 'matched_acl' not described in 'tomoyo_get_audit'
security/tomoyo/securityfs_if.c:146: warning: Function parameter or member 'inode' not described in 'tomoyo_release'
security/tomoyo/tomoyo.c:122: warning: Function parameter or member 'path' not described in 'tomoyo_inode_getattr'
security/tomoyo/tomoyo.c:497: warning: Function parameter or member 'clone_flags' not described in 'tomoyo_task_alloc'
security/tomoyo/util.c:92: warning: Function parameter or member 'time64' not described in 'tomoyo_convert_time'

Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
[ penguin-kernel: Also adjust spaces and similar warnings ]
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 security/tomoyo/audit.c         |  1 +
 security/tomoyo/securityfs_if.c |  1 +
 security/tomoyo/tomoyo.c        | 12 +++++-------
 security/tomoyo/util.c          |  4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/security/tomoyo/audit.c b/security/tomoyo/audit.c
index b51bad121c110..d79bf07e16be0 100644
--- a/security/tomoyo/audit.c
+++ b/security/tomoyo/audit.c
@@ -320,6 +320,7 @@ static unsigned int tomoyo_log_count;
  * @ns:          Pointer to "struct tomoyo_policy_namespace".
  * @profile:     Profile number.
  * @index:       Index number of functionality.
+ * @matched_acl: Pointer to "struct tomoyo_acl_info".
  * @is_granted:  True if granted log, false otherwise.
  *
  * Returns true if this request should be audited, false otherwise.
diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c
index 065f4941c4d8c..a2705798476f9 100644
--- a/security/tomoyo/securityfs_if.c
+++ b/security/tomoyo/securityfs_if.c
@@ -139,6 +139,7 @@ static int tomoyo_open(struct inode *inode, struct file *file)
 /**
  * tomoyo_release - close() for /sys/kernel/security/tomoyo/ interface.
  *
+ * @inode: Pointer to "struct inode".
  * @file:  Pointer to "struct file".
  *
  */
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 1f3cd432d8308..b6a31901f2894 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -63,7 +63,7 @@ static void tomoyo_bprm_committed_creds(struct linux_binprm *bprm)
 
 #ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER
 /**
- * tomoyo_bprm_for_exec - Target for security_bprm_creds_for_exec().
+ * tomoyo_bprm_creds_for_exec - Target for security_bprm_creds_for_exec().
  *
  * @bprm: Pointer to "struct linux_binprm".
  *
@@ -113,8 +113,7 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
 /**
  * tomoyo_inode_getattr - Target for security_inode_getattr().
  *
- * @mnt:    Pointer to "struct vfsmount".
- * @dentry: Pointer to "struct dentry".
+ * @path: Pointer to "struct path".
  *
  * Returns 0 on success, negative value otherwise.
  */
@@ -300,8 +299,7 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
 /**
  * tomoyo_file_open - Target for security_file_open().
  *
- * @f:    Pointer to "struct file".
- * @cred: Pointer to "struct cred".
+ * @f: Pointer to "struct file".
  *
  * Returns 0 on success, negative value otherwise.
  */
@@ -487,8 +485,8 @@ struct lsm_blob_sizes tomoyo_blob_sizes __lsm_ro_after_init = {
 /**
  * tomoyo_task_alloc - Target for security_task_alloc().
  *
- * @task:  Pointer to "struct task_struct".
- * @flags: clone() flags.
+ * @task:        Pointer to "struct task_struct".
+ * @clone_flags: clone() flags.
  *
  * Returns 0.
  */
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index e89cac913583c..1da2e3722b126 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -83,8 +83,8 @@ const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX] = {
 /**
  * tomoyo_convert_time - Convert time_t to YYYY/MM/DD hh/mm/ss.
  *
- * @time:  Seconds since 1970/01/01 00:00:00.
- * @stamp: Pointer to "struct tomoyo_time".
+ * @time64: Seconds since 1970/01/01 00:00:00.
+ * @stamp:  Pointer to "struct tomoyo_time".
  *
  * Returns nothing.
  */
-- 
GitLab


From 1348924ba8169f35cedfd0a0087872b81a632b8e Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Mon, 14 Jun 2021 14:12:22 -0700
Subject: [PATCH 2883/3804] x86/msr: Define new bits in TSX_FORCE_ABORT MSR

Intel client processors that support the IA32_TSX_FORCE_ABORT MSR
related to perf counter interaction [1] received a microcode update that
deprecates the Transactional Synchronization Extension (TSX) feature.
The bit FORCE_ABORT_RTM now defaults to 1, writes to this bit are
ignored. A new bit TSX_CPUID_CLEAR clears the TSX related CPUID bits.

The summary of changes to the IA32_TSX_FORCE_ABORT MSR are:

  Bit 0: FORCE_ABORT_RTM (legacy bit, new default=1) Status bit that
  indicates if RTM transactions are always aborted. This bit is
  essentially !SDV_ENABLE_RTM(Bit 2). Writes to this bit are ignored.

  Bit 1: TSX_CPUID_CLEAR (new bit, default=0) When set, CPUID.HLE = 0
  and CPUID.RTM = 0.

  Bit 2: SDV_ENABLE_RTM (new bit, default=0) When clear, XBEGIN will
  always abort with EAX code 0. When set, XBEGIN will not be forced to
  abort (but will always abort in SGX enclaves). This bit is intended to
  be used on developer systems. If this bit is set, transactional
  atomicity correctness is not certain. SDV = Software Development
  Vehicle (SDV), i.e. developer systems.

Performance monitoring counter 3 is usable in all cases, regardless of
the value of above bits.

Add support for a new CPUID bit - CPUID.RTM_ALWAYS_ABORT (CPUID 7.EDX[11])
 - to indicate the status of always abort behavior.

[1] [ bp: Look for document ID 604224, "Performance Monitoring Impact
      of Intel Transactional Synchronization Extension Memory". Since
      there's no way for us to have stable links to documents... ]

 [ bp: Massage and extend commit message. ]

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
Link: https://lkml.kernel.org/r/9add61915b4a4eedad74fbd869107863a28b428e.1623704845.git-series.pawan.kumar.gupta@linux.intel.com
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/include/asm/msr-index.h   | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 81269c73a0dc9..d0ce5cfd3ac14 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -378,6 +378,7 @@
 #define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
 #define X86_FEATURE_SRBDS_CTRL		(18*32+ 9) /* "" SRBDS mitigation MSR available */
 #define X86_FEATURE_MD_CLEAR		(18*32+10) /* VERW clears CPU buffers */
+#define X86_FEATURE_RTM_ALWAYS_ABORT	(18*32+11) /* "" RTM transaction always aborts */
 #define X86_FEATURE_TSX_FORCE_ABORT	(18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_SERIALIZE		(18*32+14) /* SERIALIZE instruction */
 #define X86_FEATURE_HYBRID_CPU		(18*32+15) /* "" This part has CPUs of more than one type */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 742d89a00721d..2bc1600d36043 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -772,6 +772,10 @@
 
 #define MSR_TFA_RTM_FORCE_ABORT_BIT	0
 #define MSR_TFA_RTM_FORCE_ABORT		BIT_ULL(MSR_TFA_RTM_FORCE_ABORT_BIT)
+#define MSR_TFA_TSX_CPUID_CLEAR_BIT	1
+#define MSR_TFA_TSX_CPUID_CLEAR		BIT_ULL(MSR_TFA_TSX_CPUID_CLEAR_BIT)
+#define MSR_TFA_SDV_ENABLE_RTM_BIT	2
+#define MSR_TFA_SDV_ENABLE_RTM		BIT_ULL(MSR_TFA_SDV_ENABLE_RTM_BIT)
 
 /* P4/Xeon+ specific */
 #define MSR_IA32_MCG_EAX		0x00000180
-- 
GitLab


From 09a8ec9a2d03efa2813d9d306424eb6802146b57 Mon Sep 17 00:00:00 2001
From: Al Cooper <alcooperx@gmail.com>
Date: Wed, 2 Jun 2021 15:27:57 -0400
Subject: [PATCH 2884/3804] dt-bindings: mmc: sdhci-iproc: Add
 brcm,bcm7211a0-sdhci

Add new compatible string for the legacy sdhci controller on the
BCM7211 family of SoC's.

Signed-off-by: Al Cooper <alcooperx@gmail.com>
Link: https://lore.kernel.org/r/20210602192758.38735-1-alcooperx@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/brcm,iproc-sdhci.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/mmc/brcm,iproc-sdhci.yaml b/Documentation/devicetree/bindings/mmc/brcm,iproc-sdhci.yaml
index 6f569fbfa134d..2f63f2cdeb71e 100644
--- a/Documentation/devicetree/bindings/mmc/brcm,iproc-sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/brcm,iproc-sdhci.yaml
@@ -21,6 +21,7 @@ properties:
       - brcm,bcm2711-emmc2
       - brcm,sdhci-iproc-cygnus
       - brcm,sdhci-iproc
+      - brcm,bcm7211a0-sdhci
 
   reg:
     minItems: 1
-- 
GitLab


From 98b5ce4c08ca85727888fdbd362d574bcfa18e3c Mon Sep 17 00:00:00 2001
From: Al Cooper <alcooperx@gmail.com>
Date: Wed, 2 Jun 2021 15:27:58 -0400
Subject: [PATCH 2885/3804] mmc: sdhci-iproc: Add support for the legacy sdhci
 controller on the BCM7211

Add support for the legacy Arasan sdhci controller on the BCM7211 and
related SoC's. This includes adding a .shutdown callback to increase
the power savings during S5.

Signed-off-by: Al Cooper <alcooperx@gmail.com>
Link: https://lore.kernel.org/r/20210602192758.38735-2-alcooperx@gmail.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/Kconfig       |  2 +-
 drivers/mmc/host/sdhci-iproc.c | 30 ++++++++++++++++++++++++++++++
 drivers/mmc/host/sdhci.h       |  2 ++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index a4d4c757eea09..561184fa7eb9d 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -412,7 +412,7 @@ config MMC_SDHCI_MILBEAUT
 
 config MMC_SDHCI_IPROC
 	tristate "SDHCI support for the BCM2835 & iProc SD/MMC Controller"
-	depends on ARCH_BCM2835 || ARCH_BCM_IPROC || COMPILE_TEST
+	depends on ARCH_BCM2835 || ARCH_BCM_IPROC || ARCH_BRCMSTB || COMPILE_TEST
 	depends on MMC_SDHCI_PLTFM
 	depends on OF || ACPI
 	default ARCH_BCM_IPROC
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index ddeaf8e1f72f9..cce390fe9cf37 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -286,11 +286,35 @@ static const struct sdhci_iproc_data bcm2711_data = {
 	.mmc_caps = MMC_CAP_3_3V_DDR,
 };
 
+static const struct sdhci_pltfm_data sdhci_bcm7211a0_pltfm_data = {
+	.quirks = SDHCI_QUIRK_MISSING_CAPS |
+		SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
+		SDHCI_QUIRK_BROKEN_DMA |
+		SDHCI_QUIRK_BROKEN_ADMA,
+	.ops = &sdhci_iproc_ops,
+};
+
+#define BCM7211A0_BASE_CLK_MHZ 100
+static const struct sdhci_iproc_data bcm7211a0_data = {
+	.pdata = &sdhci_bcm7211a0_pltfm_data,
+	.caps = ((BCM7211A0_BASE_CLK_MHZ / 2) << SDHCI_TIMEOUT_CLK_SHIFT) |
+		(BCM7211A0_BASE_CLK_MHZ << SDHCI_CLOCK_BASE_SHIFT) |
+		((0x2 << SDHCI_MAX_BLOCK_SHIFT)
+			& SDHCI_MAX_BLOCK_MASK) |
+		SDHCI_CAN_VDD_330 |
+		SDHCI_CAN_VDD_180 |
+		SDHCI_CAN_DO_SUSPEND |
+		SDHCI_CAN_DO_HISPD,
+	.caps1 = SDHCI_DRIVER_TYPE_C |
+		 SDHCI_DRIVER_TYPE_D,
+};
+
 static const struct of_device_id sdhci_iproc_of_match[] = {
 	{ .compatible = "brcm,bcm2835-sdhci", .data = &bcm2835_data },
 	{ .compatible = "brcm,bcm2711-emmc2", .data = &bcm2711_data },
 	{ .compatible = "brcm,sdhci-iproc-cygnus", .data = &iproc_cygnus_data},
 	{ .compatible = "brcm,sdhci-iproc", .data = &iproc_data },
+	{ .compatible = "brcm,bcm7211a0-sdhci", .data = &bcm7211a0_data },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, sdhci_iproc_of_match);
@@ -384,6 +408,11 @@ err:
 	return ret;
 }
 
+static void sdhci_iproc_shutdown(struct platform_device *pdev)
+{
+	sdhci_pltfm_suspend(&pdev->dev);
+}
+
 static struct platform_driver sdhci_iproc_driver = {
 	.driver = {
 		.name = "sdhci-iproc",
@@ -394,6 +423,7 @@ static struct platform_driver sdhci_iproc_driver = {
 	},
 	.probe = sdhci_iproc_probe,
 	.remove = sdhci_pltfm_unregister,
+	.shutdown = sdhci_iproc_shutdown,
 };
 module_platform_driver(sdhci_iproc_driver);
 
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 0770c036e2ff5..c35ed4be75b7b 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -201,8 +201,10 @@
 
 #define SDHCI_CAPABILITIES	0x40
 #define  SDHCI_TIMEOUT_CLK_MASK		GENMASK(5, 0)
+#define  SDHCI_TIMEOUT_CLK_SHIFT 0
 #define  SDHCI_TIMEOUT_CLK_UNIT	0x00000080
 #define  SDHCI_CLOCK_BASE_MASK		GENMASK(13, 8)
+#define  SDHCI_CLOCK_BASE_SHIFT	8
 #define  SDHCI_CLOCK_V3_BASE_MASK	GENMASK(15, 8)
 #define  SDHCI_MAX_BLOCK_MASK	0x00030000
 #define  SDHCI_MAX_BLOCK_SHIFT  16
-- 
GitLab


From ad3c2e174938d72fded674acead42e2464a3b460 Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Mon, 14 Jun 2021 14:13:23 -0700
Subject: [PATCH 2886/3804] x86/events/intel: Do not deploy TSX force abort
 workaround when TSX is deprecated

Earlier workaround added by

  400816f60c54 ("perf/x86/intel: Implement support for TSX Force Abort")

for perf counter interactions [1] are not required on some client
systems which received a microcode update that deprecates TSX.

Bypass the perf workaround when such microcode is enumerated.

[1] [ bp: Look for document ID 604224, "Performance Monitoring Impact
      of Intel Transactional Synchronization Extension Memory". Since
      there's no way for us to have stable links to documents... ]

 [ bp: Massage comment. ]

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
Link: https://lkml.kernel.org/r/e4d410f786946280ced02dd07c74e0a74f1d10cb.1623704845.git-series.pawan.kumar.gupta@linux.intel.com
---
 arch/x86/events/intel/core.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2521d03de5e02..062bf8968c0e5 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6015,7 +6015,13 @@ __init int intel_pmu_init(void)
 		tsx_attr = hsw_tsx_events_attrs;
 		intel_pmu_pebs_data_source_skl(pmem);
 
-		if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+		/*
+		 * Processors with CPUID.RTM_ALWAYS_ABORT have TSX deprecated by default.
+		 * TSX force abort hooks are not required on these systems. Only deploy
+		 * workaround when microcode has not enabled X86_FEATURE_RTM_ALWAYS_ABORT.
+		 */
+		if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) &&
+		   !boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT)) {
 			x86_pmu.flags |= PMU_FL_TFA;
 			x86_pmu.get_event_constraints = tfa_get_event_constraints;
 			x86_pmu.enable_all = intel_tfa_pmu_enable_all;
-- 
GitLab


From 8c485bedfb7852fa4de2a34aac2a6fd911f539f4 Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair@alistair23.me>
Date: Tue, 15 Jun 2021 20:33:58 +1000
Subject: [PATCH 2887/3804] regulator: sy7636a: Initial commit

Initial support for the Silergy SY7636A-regulator Power Management chip.

Signed-off-by: Alistair Francis <alistair@alistair23.me>
Link: https://lore.kernel.org/r/20210615103400.946-3-alistair@alistair23.me
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig             |   6 ++
 drivers/regulator/Makefile            |   1 +
 drivers/regulator/sy7636a-regulator.c | 127 ++++++++++++++++++++++++++
 3 files changed, 134 insertions(+)
 create mode 100644 drivers/regulator/sy7636a-regulator.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 3134211b94983..a69b546872c1b 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -1169,6 +1169,12 @@ config REGULATOR_STW481X_VMMC
 	  This driver supports the internal VMMC regulator in the STw481x
 	  PMIC chips.
 
+config REGULATOR_SY7636A
+	tristate "Silergy SY7636A voltage regulator"
+	depends on MFD_SY7636A
+	help
+	  This driver supports Silergy SY3686A voltage regulator.
+
 config REGULATOR_SY8106A
 	tristate "Silergy SY8106A regulator"
 	depends on I2C && (OF || COMPILE_TEST)
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 59ce3359a84a5..028f2b8788db2 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -138,6 +138,7 @@ obj-$(CONFIG_REGULATOR_STM32_VREFBUF) += stm32-vrefbuf.o
 obj-$(CONFIG_REGULATOR_STM32_PWR) += stm32-pwr.o
 obj-$(CONFIG_REGULATOR_STPMIC1) += stpmic1_regulator.o
 obj-$(CONFIG_REGULATOR_STW481X_VMMC) += stw481x-vmmc.o
+obj-$(CONFIG_REGULATOR_SY7636A) += sy7636a-regulator.o
 obj-$(CONFIG_REGULATOR_SY8106A) += sy8106a-regulator.o
 obj-$(CONFIG_REGULATOR_SY8824X) += sy8824x.o
 obj-$(CONFIG_REGULATOR_SY8827N) += sy8827n.o
diff --git a/drivers/regulator/sy7636a-regulator.c b/drivers/regulator/sy7636a-regulator.c
new file mode 100644
index 0000000000000..c384c2b6ac46a
--- /dev/null
+++ b/drivers/regulator/sy7636a-regulator.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Functions to access SY3686A power management chip voltages
+//
+// Copyright (C) 2019 reMarkable AS - http://www.remarkable.com/
+//
+// Authors: Lars Ivar Miljeteig <lars.ivar.miljeteig@remarkable.com>
+//          Alistair Francis <alistair@alistair23.me>
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/gpio/consumer.h>
+#include <linux/mfd/sy7636a.h>
+
+#define SY7636A_POLL_ENABLED_TIME 500
+
+static int sy7636a_get_vcom_voltage_op(struct regulator_dev *rdev)
+{
+	int ret;
+	unsigned int val, val_h;
+
+	ret = regmap_read(rdev->regmap, SY7636A_REG_VCOM_ADJUST_CTRL_L, &val);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(rdev->regmap, SY7636A_REG_VCOM_ADJUST_CTRL_H, &val_h);
+	if (ret)
+		return ret;
+
+	val |= (val_h << VCOM_ADJUST_CTRL_SHIFT);
+
+	return (val & VCOM_ADJUST_CTRL_MASK) * VCOM_ADJUST_CTRL_SCAL;
+}
+
+static int sy7636a_get_status(struct regulator_dev *rdev)
+{
+	struct sy7636a *sy7636a = dev_get_drvdata(rdev->dev.parent);
+	int ret = 0;
+
+	ret = gpiod_get_value_cansleep(sy7636a->pgood_gpio);
+	if (ret < 0)
+		dev_err(&rdev->dev, "Failed to read pgood gpio: %d\n", ret);
+
+	return ret;
+}
+
+static const struct regulator_ops sy7636a_vcom_volt_ops = {
+	.get_voltage = sy7636a_get_vcom_voltage_op,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = sy7636a_get_status,
+};
+
+struct regulator_desc desc = {
+	.name = "vcom",
+	.id = 0,
+	.ops = &sy7636a_vcom_volt_ops,
+	.type = REGULATOR_VOLTAGE,
+	.owner = THIS_MODULE,
+	.enable_reg = SY7636A_REG_OPERATION_MODE_CRL,
+	.enable_mask = SY7636A_OPERATION_MODE_CRL_ONOFF,
+	.poll_enabled_time	= SY7636A_POLL_ENABLED_TIME,
+	.regulators_node = of_match_ptr("regulators"),
+	.of_match = of_match_ptr("vcom"),
+};
+
+static int sy7636a_regulator_probe(struct platform_device *pdev)
+{
+	struct sy7636a *sy7636a = dev_get_drvdata(pdev->dev.parent);
+	struct regulator_config config = { };
+	struct regulator_dev *rdev;
+	struct gpio_desc *gdp;
+	int ret;
+
+	if (!sy7636a)
+		return -EPROBE_DEFER;
+
+	platform_set_drvdata(pdev, sy7636a);
+
+	gdp = devm_gpiod_get(sy7636a->dev, "epd-pwr-good", GPIOD_IN);
+	if (IS_ERR(gdp)) {
+		dev_err(sy7636a->dev, "Power good GPIO fault %ld\n", PTR_ERR(gdp));
+		return PTR_ERR(gdp);
+	}
+
+	sy7636a->pgood_gpio = gdp;
+
+	ret = regmap_write(sy7636a->regmap, SY7636A_REG_POWER_ON_DELAY_TIME, 0x0);
+	if (ret) {
+		dev_err(sy7636a->dev, "Failed to initialize regulator: %d\n", ret);
+		return ret;
+	}
+
+	config.dev = &pdev->dev;
+	config.dev->of_node = sy7636a->dev->of_node;
+	config.driver_data = sy7636a;
+	config.regmap = sy7636a->regmap;
+
+	rdev = devm_regulator_register(&pdev->dev, &desc, &config);
+	if (IS_ERR(rdev)) {
+		dev_err(sy7636a->dev, "Failed to register %s regulator\n",
+			pdev->name);
+		return PTR_ERR(rdev);
+	}
+
+	return 0;
+}
+
+static const struct platform_device_id sy7636a_regulator_id_table[] = {
+	{ "sy7636a-regulator", },
+};
+MODULE_DEVICE_TABLE(platform, sy7636a_regulator_id_table);
+
+static struct platform_driver sy7636a_regulator_driver = {
+	.driver = {
+		.name = "sy7636a-regulator",
+	},
+	.probe = sy7636a_regulator_probe,
+	.id_table = sy7636a_regulator_id_table,
+};
+module_platform_driver(sy7636a_regulator_driver);
+
+MODULE_AUTHOR("Lars Ivar Miljeteig <lars.ivar.miljeteig@remarkable.com>");
+MODULE_DESCRIPTION("SY7636A voltage regulator driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 70d654ea3de937d7754c107bb8eeb20e30262c89 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 15 Jun 2021 21:29:34 +0800
Subject: [PATCH 2888/3804] regulator: mt6315: Fix checking return value of
 devm_regmap_init_spmi_ext

devm_regmap_init_spmi_ext() returns ERR_PTR() on error.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210615132934.3453965-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6315-regulator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/mt6315-regulator.c b/drivers/regulator/mt6315-regulator.c
index 8c5d72869c2be..ebb68d3b53906 100644
--- a/drivers/regulator/mt6315-regulator.c
+++ b/drivers/regulator/mt6315-regulator.c
@@ -223,8 +223,8 @@ static int mt6315_regulator_probe(struct spmi_device *pdev)
 	int i;
 
 	regmap = devm_regmap_init_spmi_ext(pdev, &mt6315_regmap_config);
-	if (!regmap)
-		return -ENODEV;
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
 
 	chip = devm_kzalloc(dev, sizeof(struct mt6315_chip), GFP_KERNEL);
 	if (!chip)
-- 
GitLab


From 293649307ef9abcd4f83f6dac4d4400dfd97c936 Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Mon, 14 Jun 2021 14:14:25 -0700
Subject: [PATCH 2889/3804] x86/tsx: Clear CPUID bits when TSX always force
 aborts

As a result of TSX deprecation, some processors always abort TSX
transactions by default after a microcode update.

When TSX feature cannot be used it is better to hide it. Clear CPUID.RTM
and CPUID.HLE bits when TSX transactions always abort.

 [ bp: Massage commit message and comments. ]

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
Link: https://lkml.kernel.org/r/5209b3d72ffe5bd3cafdcc803f5b883f785329c3.1623704845.git-series.pawan.kumar.gupta@linux.intel.com
---
 arch/x86/kernel/cpu/cpu.h   |  2 ++
 arch/x86/kernel/cpu/intel.c |  4 +++-
 arch/x86/kernel/cpu/tsx.c   | 37 +++++++++++++++++++++++++++++++++++--
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 67944128876d7..95521302630d4 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -48,6 +48,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
 enum tsx_ctrl_states {
 	TSX_CTRL_ENABLE,
 	TSX_CTRL_DISABLE,
+	TSX_CTRL_RTM_ALWAYS_ABORT,
 	TSX_CTRL_NOT_SUPPORTED,
 };
 
@@ -56,6 +57,7 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
 extern void __init tsx_init(void);
 extern void tsx_enable(void);
 extern void tsx_disable(void);
+extern void tsx_clear_cpuid(void);
 #else
 static inline void tsx_init(void) { }
 #endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8adffc17fa8b8..861e919eba9ac 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -717,8 +717,10 @@ static void init_intel(struct cpuinfo_x86 *c)
 
 	if (tsx_ctrl_state == TSX_CTRL_ENABLE)
 		tsx_enable();
-	if (tsx_ctrl_state == TSX_CTRL_DISABLE)
+	else if (tsx_ctrl_state == TSX_CTRL_DISABLE)
 		tsx_disable();
+	else if (tsx_ctrl_state == TSX_CTRL_RTM_ALWAYS_ABORT)
+		tsx_clear_cpuid();
 
 	split_lock_init();
 	bus_lock_init();
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index e2ad30e474f82..9c7a5f0492929 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -2,7 +2,7 @@
 /*
  * Intel Transactional Synchronization Extensions (TSX) control.
  *
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2019-2021 Intel Corporation
  *
  * Author:
  *	Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
@@ -84,13 +84,46 @@ static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
 	return TSX_CTRL_ENABLE;
 }
 
+void tsx_clear_cpuid(void)
+{
+	u64 msr;
+
+	/*
+	 * MSR_TFA_TSX_CPUID_CLEAR bit is only present when both CPUID
+	 * bits RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are present.
+	 */
+	if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) &&
+	    boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+		rdmsrl(MSR_TSX_FORCE_ABORT, msr);
+		msr |= MSR_TFA_TSX_CPUID_CLEAR;
+		wrmsrl(MSR_TSX_FORCE_ABORT, msr);
+	}
+}
+
 void __init tsx_init(void)
 {
 	char arg[5] = {};
 	int ret;
 
-	if (!tsx_ctrl_is_supported())
+	/*
+	 * Hardware will always abort a TSX transaction if both CPUID bits
+	 * RTM_ALWAYS_ABORT and TSX_FORCE_ABORT are set. In this case, it is
+	 * better not to enumerate CPUID.RTM and CPUID.HLE bits. Clear them
+	 * here.
+	 */
+	if (boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) &&
+	    boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
+		tsx_ctrl_state = TSX_CTRL_RTM_ALWAYS_ABORT;
+		tsx_clear_cpuid();
+		setup_clear_cpu_cap(X86_FEATURE_RTM);
+		setup_clear_cpu_cap(X86_FEATURE_HLE);
 		return;
+	}
+
+	if (!tsx_ctrl_is_supported()) {
+		tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED;
+		return;
+	}
 
 	ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
 	if (ret >= 0) {
-- 
GitLab


From 4692bc775d2180a937335ccba0edce557103d44a Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Tue, 15 Jun 2021 22:16:39 +1200
Subject: [PATCH 2890/3804] x86/sgx: Add missing xa_destroy() when virtual EPC
 is destroyed

xa_destroy() needs to be called to destroy a virtual EPC's page array
before calling kfree() to free the virtual EPC. Currently it is not
called so add the missing xa_destroy().

Fixes: 540745ddbc70 ("x86/sgx: Introduce virtual EPC for use by KVM guests")
Signed-off-by: Kai Huang <kai.huang@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Tested-by: Yang Zhong <yang.zhong@intel.com>
Link: https://lkml.kernel.org/r/20210615101639.291929-1-kai.huang@intel.com
---
 arch/x86/kernel/cpu/sgx/virt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/cpu/sgx/virt.c b/arch/x86/kernel/cpu/sgx/virt.c
index 6ad165a5c0cc5..64511c4a52001 100644
--- a/arch/x86/kernel/cpu/sgx/virt.c
+++ b/arch/x86/kernel/cpu/sgx/virt.c
@@ -212,6 +212,7 @@ static int sgx_vepc_release(struct inode *inode, struct file *file)
 		list_splice_tail(&secs_pages, &zombie_secs_pages);
 	mutex_unlock(&zombie_secs_pages_lock);
 
+	xa_destroy(&vepc->page_array);
 	kfree(vepc);
 
 	return 0;
-- 
GitLab


From 94f0b2d4a1d0c52035aef425da5e022bd2cb1c71 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 15 Jun 2021 09:26:19 -0700
Subject: [PATCH 2891/3804] proc: only require mm_struct for writing

Commit 591a22c14d3f ("proc: Track /proc/$pid/attr/ opener mm_struct") we
started using __mem_open() to track the mm_struct at open-time, so that
we could then check it for writes.

But that also ended up making the permission checks at open time much
stricter - and not just for writes, but for reads too.  And that in turn
caused a regression for at least Fedora 29, where NIC interfaces fail to
start when using NetworkManager.

Since only the write side wanted the mm_struct test, ignore any failures
by __mem_open() at open time, leaving reads unaffected.  The write()
time verification of the mm_struct pointer will then catch the failure
case because a NULL pointer will not match a valid 'current->mm'.

Link: https://lore.kernel.org/netdev/YMjTlp2FSJYvoyFa@unreal/
Fixes: 591a22c14d3f ("proc: Track /proc/$pid/attr/ opener mm_struct")
Reported-and-tested-by: Leon Romanovsky <leon@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Andrea Righi <andrea.righi@canonical.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 7118ebe38fa62..9cbd915025ad7 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2676,7 +2676,9 @@ out:
 #ifdef CONFIG_SECURITY
 static int proc_pid_attr_open(struct inode *inode, struct file *file)
 {
-	return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+	file->private_data = NULL;
+	__mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+	return 0;
 }
 
 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
-- 
GitLab


From 475b92f932168a78da8109acd10bfb7578b8f2bb Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 14 Jun 2021 15:24:05 -0700
Subject: [PATCH 2892/3804] ptp: improve max_adj check against unreasonable
 values

Scaled PPM conversion to PPB may (on 64bit systems) result
in a value larger than s32 can hold (freq/scaled_ppm is a long).
This means the kernel will not correctly reject unreasonably
high ->freq values (e.g. > 4294967295ppb, 281474976645 scaled PPM).

The conversion is equivalent to a division by ~66 (65.536),
so the value of ppb is always smaller than ppm, but not small
enough to assume narrowing the type from long -> s32 is okay.

Note that reasonable user space (e.g. ptp4l) will not use such
high values, anyway, 4289046510ppb ~= 4.3x, so the fix is
somewhat pedantic.

Fixes: d39a743511cd ("ptp: validate the requested frequency adjustment.")
Fixes: d94ba80ebbea ("ptp: Added a brand new class driver for ptp clocks.")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c          | 6 +++---
 include/linux/ptp_clock_kernel.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 03a246e60fd98..21c4c34c52d8d 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -63,7 +63,7 @@ static void enqueue_external_timestamp(struct timestamp_event_queue *queue,
 	spin_unlock_irqrestore(&queue->lock, flags);
 }
 
-s32 scaled_ppm_to_ppb(long ppm)
+long scaled_ppm_to_ppb(long ppm)
 {
 	/*
 	 * The 'freq' field in the 'struct timex' is in parts per
@@ -80,7 +80,7 @@ s32 scaled_ppm_to_ppb(long ppm)
 	s64 ppb = 1 + ppm;
 	ppb *= 125;
 	ppb >>= 13;
-	return (s32) ppb;
+	return (long) ppb;
 }
 EXPORT_SYMBOL(scaled_ppm_to_ppb);
 
@@ -138,7 +138,7 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
 		delta = ktime_to_ns(kt);
 		err = ops->adjtime(ops, delta);
 	} else if (tx->modes & ADJ_FREQUENCY) {
-		s32 ppb = scaled_ppm_to_ppb(tx->freq);
+		long ppb = scaled_ppm_to_ppb(tx->freq);
 		if (ppb > ops->max_adj || ppb < -ops->max_adj)
 			return -ERANGE;
 		if (ops->adjfine)
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 0d47fd33b2285..51d7f1b8b32aa 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -235,7 +235,7 @@ extern int ptp_clock_index(struct ptp_clock *ptp);
  * @ppm:    Parts per million, but with a 16 bit binary fractional field
  */
 
-extern s32 scaled_ppm_to_ppb(long ppm);
+extern long scaled_ppm_to_ppb(long ppm);
 
 /**
  * ptp_find_pin() - obtain the pin index of a given auxiliary function
-- 
GitLab


From e34492dea68d4f09e9989e518fc76cd41909d707 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Tue, 15 Jun 2021 07:52:43 +0800
Subject: [PATCH 2893/3804] net: inline function get_net_ns_by_fd if NET_NS is
 disabled

The function get_net_ns_by_fd() could be inlined when NET_NS is not
enabled.

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 7 ++++++-
 net/core/net_namespace.c    | 8 +-------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 6412d7833d97a..bdc0459a595ee 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -186,6 +186,7 @@ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
 void net_ns_barrier(void);
 
 struct ns_common *get_net_ns(struct ns_common *ns);
+struct net *get_net_ns_by_fd(int fd);
 #else /* CONFIG_NET_NS */
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
@@ -210,13 +211,17 @@ static inline struct ns_common *get_net_ns(struct ns_common *ns)
 {
 	return ERR_PTR(-EINVAL);
 }
+
+static inline struct net *get_net_ns_by_fd(int fd)
+{
+	return ERR_PTR(-EINVAL);
+}
 #endif /* CONFIG_NET_NS */
 
 
 extern struct list_head net_namespace_list;
 
 struct net *get_net_ns_by_pid(pid_t pid);
-struct net *get_net_ns_by_fd(int fd);
 
 #ifdef CONFIG_SYSCTL
 void ipx_register_sysctl(void);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index cc8dafb25d612..9b5a767eddd5f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -672,14 +672,8 @@ struct net *get_net_ns_by_fd(int fd)
 	fput(file);
 	return net;
 }
-
-#else
-struct net *get_net_ns_by_fd(int fd)
-{
-	return ERR_PTR(-EINVAL);
-}
-#endif
 EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
+#endif
 
 struct net *get_net_ns_by_pid(pid_t pid)
 {
-- 
GitLab


From 9163f01130304fab1f74683d7d44632da7bda637 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 15 Jun 2021 15:02:58 +0530
Subject: [PATCH 2894/3804] arm64/mm: Fix ttbr0 values stored in struct
 thread_info for software-pan

When using CONFIG_ARM64_SW_TTBR0_PAN, a task's thread_info::ttbr0 must be
the TTBR0_EL1 value used to run userspace. With 52-bit PAs, the PA must be
packed into the TTBR using phys_to_ttbr(), but we forget to do this in some
of the SW PAN code. Thus, if the value is installed into TTBR0_EL1 (as may
happen in the uaccess routines), this could result in UNPREDICTABLE
behaviour.

Since hardware with 52-bit PA support almost certainly has HW PAN, which
will be used in preference, this shouldn't be a practical issue, but let's
fix this for consistency.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Fixes: 529c4b05a3cb ("arm64: handle 52-bit addresses in TTBR")
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/1623749578-11231-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/mmu_context.h | 4 ++--
 arch/arm64/kernel/setup.c            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index d3cef91335396..eeb210997149a 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -177,9 +177,9 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
 		return;
 
 	if (mm == &init_mm)
-		ttbr = __pa_symbol(reserved_pg_dir);
+		ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
 	else
-		ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
+		ttbr = phys_to_ttbr(virt_to_phys(mm->pgd)) | ASID(mm) << 48;
 
 	WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
 }
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 61845c0821d9d..68b30e8c22dbf 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -381,7 +381,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
 	 * faults in case uaccess_enable() is inadvertently called by the init
 	 * thread.
 	 */
-	init_task.thread_info.ttbr0 = __pa_symbol(reserved_pg_dir);
+	init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
 #endif
 
 	if (boot_args[1] || boot_args[2] || boot_args[3]) {
-- 
GitLab


From c70fe14f83ae0793a1119fa5741b19ab9ba411b2 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Tue, 18 May 2021 18:14:03 +0800
Subject: [PATCH 2895/3804] arm64: mm: fix the count comments in
 compute_indices

'count - 1' is confusing and not comply with the real code running.
'count' actually represents the extra entries required, no need minus 1.

Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210518101405.1048860-3-aisheng.dong@nxp.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/head.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 96873dfa67fd5..b70db34458ecf 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -195,7 +195,7 @@ SYM_CODE_END(preserve_boot_args)
 	and	\iend, \iend, \istart	// iend = (vend >> shift) & (ptrs - 1)
 	mov	\istart, \ptrs
 	mul	\istart, \istart, \count
-	add	\iend, \iend, \istart	// iend += (count - 1) * ptrs
+	add	\iend, \iend, \istart	// iend += count * ptrs
 					// our entries span multiple tables
 
 	lsr	\istart, \vstart, \shift
-- 
GitLab


From f91671b5418bde81a7ce6bb2e9f3f4d41184b77c Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Tue, 18 May 2021 18:14:04 +0800
Subject: [PATCH 2896/3804] arm64: mm: drop unused __pa(__idmap_text_start)

x5 is not used in the following map_memory. Instead,
__pa(__idmap_text_start) is stored in x3 which is used later.

Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518101405.1048860-4-aisheng.dong@nxp.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/head.S | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index b70db34458ecf..d266b4c6287d9 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -354,7 +354,6 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 #endif
 1:
 	ldr_l	x4, idmap_ptrs_per_pgd
-	mov	x5, x3				// __pa(__idmap_text_start)
 	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end)
 
 	map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
-- 
GitLab


From 7957a3db01bf533a235a9ae9333150abbe6bde32 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Tue, 18 May 2021 18:14:05 +0800
Subject: [PATCH 2897/3804] arm64: head: fix code comments in
 set_cpu_boot_mode_flag

Up to here, the CPU boot mode can either be EL1 or EL2.
Correct the code comments a bit.

Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210518101405.1048860-5-aisheng.dong@nxp.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/head.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index d266b4c6287d9..3b88000841d92 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -550,7 +550,7 @@ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
 	cmp	w0, #BOOT_CPU_MODE_EL2
 	b.ne	1f
 	add	x1, x1, #4
-1:	str	w0, [x1]			// This CPU has booted in EL1
+1:	str	w0, [x1]			// Save CPU boot mode
 	dmb	sy
 	dc	ivac, x1			// Invalidate potentially stale cache line
 	ret
-- 
GitLab


From c1a3d4067309451e68c33dbd356032549cc0bd8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Tue, 15 Jun 2021 01:05:49 -0700
Subject: [PATCH 2898/3804] net: cdc_ncm: switch to eth%d interface naming
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is meant to make the host side cdc_ncm interface consistently
named just like the older CDC protocols: cdc_ether & cdc_ecm
(and even rndis_host), which all use 'FLAG_ETHER | FLAG_POINTTOPOINT'.

include/linux/usb/usbnet.h:
  #define FLAG_ETHER	0x0020		/* maybe use "eth%d" names */
  #define FLAG_WLAN	0x0080		/* use "wlan%d" names */
  #define FLAG_WWAN	0x0400		/* use "wwan%d" names */
  #define FLAG_POINTTOPOINT 0x1000	/* possibly use "usb%d" names */

drivers/net/usb/usbnet.c @ line 1711:
  strcpy (net->name, "usb%d");
  ...
  // heuristic:  "usb%d" for links we know are two-host,
  // else "eth%d" when there's reasonable doubt.  userspace
  // can rename the link if it knows better.
  if ((dev->driver_info->flags & FLAG_ETHER) != 0 &&
      ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 ||
       (net->dev_addr [0] & 0x02) == 0))
          strcpy (net->name, "eth%d");
  /* WLAN devices should always be named "wlan%d" */
  if ((dev->driver_info->flags & FLAG_WLAN) != 0)
          strcpy(net->name, "wlan%d");
  /* WWAN devices should always be named "wwan%d" */
  if ((dev->driver_info->flags & FLAG_WWAN) != 0)
          strcpy(net->name, "wwan%d");

So by using ETHER | POINTTOPOINT the interface naming is
either usb%d or eth%d based on the global uniqueness of the
mac address of the device.

Without this 2.5gbps ethernet dongles which all seem to use the cdc_ncm
driver end up being called usb%d instead of eth%d even though they're
definitely not two-host.  (All 1gbps & 5gbps ethernet usb dongles I've
tested don't hit this problem due to use of different drivers, primarily
r8152 and aqc111)

Fixes tag is based purely on git blame, and is really just here to make
sure this hits LTS branches newer than v4.5.

Cc: Lorenzo Colitti <lorenzo@google.com>
Fixes: 4d06dd537f95 ("cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind")
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index b04055fd1b79c..df0d1837e4ed7 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1880,7 +1880,7 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 static const struct driver_info cdc_ncm_info = {
 	.description = "CDC NCM",
 	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
-			| FLAG_LINK_INTR,
+			| FLAG_LINK_INTR | FLAG_ETHER,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
 	.manage_power = usbnet_manage_power,
-- 
GitLab


From 057d49334c02a79af81c30a8d240e641bd6f1741 Mon Sep 17 00:00:00 2001
From: Kristian Evensen <kristian.evensen@gmail.com>
Date: Tue, 15 Jun 2021 12:01:51 +0200
Subject: [PATCH 2899/3804] qmi_wwan: Do not call netif_rx from rx_fixup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the QMI_WWAN_FLAG_PASS_THROUGH is set, netif_rx() is called from
qmi_wwan_rx_fixup(). When the call to netif_rx() is successful (which is
most of the time), usbnet_skb_return() is called (from rx_process()).
usbnet_skb_return() will then call netif_rx() a second time for the same
skb.

Simplify the code and avoid the redundant netif_rx() call by changing
qmi_wwan_rx_fixup() to always return 1 when QMI_WWAN_FLAG_PASS_THROUGH
is set. We then leave it up to the existing infrastructure to call
netif_rx().

Suggested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/qmi_wwan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 6700f1970b240..bc55ec739af90 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -575,7 +575,7 @@ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 
 	if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) {
 		skb->protocol = htons(ETH_P_MAP);
-		return (netif_rx(skb) == NET_RX_SUCCESS);
+		return 1;
 	}
 
 	switch (skb->data[0] & 0xf0) {
-- 
GitLab


From 7ea6cd16f1599c1eac6018751eadbc5fc736b99a Mon Sep 17 00:00:00 2001
From: Aleksander Jan Bajkowski <olek2@wp.pl>
Date: Tue, 15 Jun 2021 22:42:57 +0200
Subject: [PATCH 2900/3804] lantiq: net: fix duplicated skb in rx descriptor
 ring

The previous commit didn't fix the bug properly. By mistake, it replaces
the pointer of the next skb in the descriptor ring instead of the current
one. As a result, the two descriptors are assigned the same SKB. The error
is seen during the iperf test when skb_put tries to insert a second packet
and exceeds the available buffer.

Fixes: c7718ee96dbc ("net: lantiq: fix memory corruption in RX ring ")
Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_xrx200.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 0e10d8aeffe18..21ef2f1280705 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -154,6 +154,7 @@ static int xrx200_close(struct net_device *net_dev)
 
 static int xrx200_alloc_skb(struct xrx200_chan *ch)
 {
+	struct sk_buff *skb = ch->skb[ch->dma.desc];
 	dma_addr_t mapping;
 	int ret = 0;
 
@@ -168,6 +169,7 @@ static int xrx200_alloc_skb(struct xrx200_chan *ch)
 				 XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) {
 		dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+		ch->skb[ch->dma.desc] = skb;
 		ret = -ENOMEM;
 		goto skip;
 	}
@@ -198,7 +200,6 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
 	ch->dma.desc %= LTQ_DESC_NUM;
 
 	if (ret) {
-		ch->skb[ch->dma.desc] = skb;
 		net_dev->stats.rx_dropped++;
 		netdev_err(net_dev, "failed to allocate new rx buffer\n");
 		return ret;
-- 
GitLab


From 9c54cd10e43947caa64920aaa7a30858193f8ef5 Mon Sep 17 00:00:00 2001
From: Charles Rose <charles.rose@dell.com>
Date: Tue, 15 Jun 2021 14:08:01 -0500
Subject: [PATCH 2901/3804] ahci: Add support for Dell S140 and later
 controllers

This patch enables support for Dell S140 and later controllers
that use Intel's PCHs configured as PCI_CLASS_STORAGE_RAID.

Reviewed-by: Mika Westerberg <mika.westerberg@intel.com>
Signed-off-by: Charles Rose <charles.rose@dell.com>
Link: https://lore.kernel.org/r/20210615190801.1744466-1-charles.rose@dell.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/ahci.c      | 4 ++++
 include/linux/pci_ids.h | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 33192a8f687d6..186cbf90c8ead 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -446,6 +446,10 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
 	  PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci },
 
+	/* Dell S140/S150 */
+	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_SUBVENDOR_ID_DELL, PCI_ANY_ID,
+	  PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci },
+
 	/* VIA */
 	{ PCI_VDEVICE(VIA, 0x3349), board_ahci_vt8251 }, /* VIA VT8251 */
 	{ PCI_VDEVICE(VIA, 0x6287), board_ahci_vt8251 }, /* VIA VT8251 */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4c3fa5293d763..803ec446a7292 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -631,6 +631,8 @@
 #define PCI_DEVICE_ID_DELL_RAC4		0x0012
 #define PCI_DEVICE_ID_DELL_PERC5	0x0015
 
+#define PCI_SUBVENDOR_ID_DELL		0x1028
+
 #define PCI_VENDOR_ID_MATROX		0x102B
 #define PCI_DEVICE_ID_MATROX_MGA_2	0x0518
 #define PCI_DEVICE_ID_MATROX_MIL	0x0519
-- 
GitLab


From c6d580d96f140596d69220f60ce0cfbea4ee5c0f Mon Sep 17 00:00:00 2001
From: Breno Lima <breno.lima@nxp.com>
Date: Mon, 14 Jun 2021 13:50:13 -0400
Subject: [PATCH 2902/3804] usb: chipidea: imx: Fix Battery Charger 1.2 CDP
 detection

i.MX8MM cannot detect certain CDP USB HUBs. usbmisc_imx.c driver is not
following CDP timing requirements defined by USB BC 1.2 specification
and section 3.2.4 Detection Timing CDP.

During Primary Detection the i.MX device should turn on VDP_SRC and
IDM_SINK for a minimum of 40ms (TVDPSRC_ON). After a time of TVDPSRC_ON,
the i.MX is allowed to check the status of the D- line. Current
implementation is waiting between 1ms and 2ms, and certain BC 1.2
complaint USB HUBs cannot be detected. Increase delay to 40ms allowing
enough time for primary detection.

During secondary detection the i.MX is required to disable VDP_SRC and
IDM_SNK, and enable VDM_SRC and IDP_SINK for at least 40ms (TVDMSRC_ON).

Current implementation is not disabling VDP_SRC and IDM_SNK, introduce
disable sequence in imx7d_charger_secondary_detection() function.

VDM_SRC and IDP_SINK should be enabled for at least 40ms (TVDMSRC_ON).
Increase delay allowing enough time for detection.

Cc: <stable@vger.kernel.org>
Fixes: 746f316b753a ("usb: chipidea: introduce imx7d USB charger detection")
Signed-off-by: Breno Lima <breno.lima@nxp.com>
Signed-off-by: Jun Li <jun.li@nxp.com>
Link: https://lore.kernel.org/r/20210614175013.495808-1-breno.lima@nxp.com
Signed-off-by: Peter Chen <peter.chen@kernel.org>
---
 drivers/usb/chipidea/usbmisc_imx.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index 4545b23bda3f1..bac0f5458cab9 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -686,6 +686,16 @@ static int imx7d_charger_secondary_detection(struct imx_usbmisc_data *data)
 	int val;
 	unsigned long flags;
 
+	/* Clear VDATSRCENB0 to disable VDP_SRC and IDM_SNK required by BC 1.2 spec */
+	spin_lock_irqsave(&usbmisc->lock, flags);
+	val = readl(usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
+	val &= ~MX7D_USB_OTG_PHY_CFG2_CHRG_VDATSRCENB0;
+	writel(val, usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
+	spin_unlock_irqrestore(&usbmisc->lock, flags);
+
+	/* TVDMSRC_DIS */
+	msleep(20);
+
 	/* VDM_SRC is connected to D- and IDP_SINK is connected to D+ */
 	spin_lock_irqsave(&usbmisc->lock, flags);
 	val = readl(usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
@@ -695,7 +705,8 @@ static int imx7d_charger_secondary_detection(struct imx_usbmisc_data *data)
 				usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
 	spin_unlock_irqrestore(&usbmisc->lock, flags);
 
-	usleep_range(1000, 2000);
+	/* TVDMSRC_ON */
+	msleep(40);
 
 	/*
 	 * Per BC 1.2, check voltage of D+:
@@ -798,7 +809,8 @@ static int imx7d_charger_primary_detection(struct imx_usbmisc_data *data)
 				usbmisc->base + MX7D_USB_OTG_PHY_CFG2);
 	spin_unlock_irqrestore(&usbmisc->lock, flags);
 
-	usleep_range(1000, 2000);
+	/* TVDPSRC_ON */
+	msleep(40);
 
 	/* Check if D- is less than VDAT_REF to determine an SDP per BC 1.2 */
 	val = readl(usbmisc->base + MX7D_USB_OTG_PHY_STATUS);
-- 
GitLab


From 703ac06a88f07f1fdde795d00c0296750e2b8e4c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Sat, 5 Jun 2021 14:00:54 +0200
Subject: [PATCH 2903/3804] media: docs: */media/index.rst: don't use ReST
 doc:`foo`

The :doc:`foo` tag is auto-generated via automarkup.py.
    So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/admin-guide/media/index.rst   | 12 +++++++-----
 Documentation/driver-api/media/index.rst    | 10 ++++++----
 Documentation/userspace-api/media/index.rst | 12 +++++++-----
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/Documentation/admin-guide/media/index.rst b/Documentation/admin-guide/media/index.rst
index 6e0d2bae71543..c676af665111d 100644
--- a/Documentation/admin-guide/media/index.rst
+++ b/Documentation/admin-guide/media/index.rst
@@ -11,12 +11,14 @@ its supported drivers.
 
 Please see:
 
-- :doc:`/userspace-api/media/index`
-     for the userspace APIs used on media devices.
+Documentation/userspace-api/media/index.rst
 
-- :doc:`/driver-api/media/index`
-     for driver development information and Kernel APIs used by
-     media devices;
+  - for the userspace APIs used on media devices.
+
+Documentation/driver-api/media/index.rst
+
+  - for driver development information and Kernel APIs used by
+    media devices;
 
 The media subsystem
 ===================
diff --git a/Documentation/driver-api/media/index.rst b/Documentation/driver-api/media/index.rst
index 2ad71dfa8828f..813d7db59da71 100644
--- a/Documentation/driver-api/media/index.rst
+++ b/Documentation/driver-api/media/index.rst
@@ -11,11 +11,13 @@ its supported drivers.
 
 Please see:
 
-- :doc:`/admin-guide/media/index`
-    for usage information about media subsystem and supported drivers;
+Documentation/admin-guide/media/index.rst
 
-- :doc:`/userspace-api/media/index`
-     for the userspace APIs used on media devices.
+  - for usage information about media subsystem and supported drivers;
+
+Documentation/userspace-api/media/index.rst
+
+  - for the userspace APIs used on media devices.
 
 
 .. only:: html
diff --git a/Documentation/userspace-api/media/index.rst b/Documentation/userspace-api/media/index.rst
index 7f42f83b9f59c..d839904be0859 100644
--- a/Documentation/userspace-api/media/index.rst
+++ b/Documentation/userspace-api/media/index.rst
@@ -11,12 +11,14 @@ used by media devices.
 
 Please see:
 
-- :doc:`/admin-guide/media/index`
-    for usage information about media subsystem and supported drivers;
+Documentation/admin-guide/media/index.rst
 
-- :doc:`/driver-api/media/index`
-     for driver development information and Kernel APIs used by
-     media devices;
+  - for usage information about media subsystem and supported drivers;
+
+Documentation/driver-api/media/index.rst
+
+  - for driver development information and Kernel APIs used by
+    media devices;
 
 
 .. only:: html
-- 
GitLab


From d759cd46b9f15180321b6f246a6e0964d4510aef Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Sat, 5 Jun 2021 14:40:12 +0200
Subject: [PATCH 2904/3804] media: userspace-api: avoid using ReST :doc:`foo`
 markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/userspace-api/media/glossary.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/userspace-api/media/glossary.rst b/Documentation/userspace-api/media/glossary.rst
index cb165d7176b78..96a360edbf3b1 100644
--- a/Documentation/userspace-api/media/glossary.rst
+++ b/Documentation/userspace-api/media/glossary.rst
@@ -116,7 +116,7 @@ Glossary
 	  - :term:`RC API`; and
 	  - :term:`V4L2 API`.
 
-	See :doc:`index`.
+	See Documentation/userspace-api/media/index.rst.
 
     MC API
 	**Media Controller API**
-- 
GitLab


From a169c44e58190bbdaf9c8d345cd445eec2c2b010 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Sat, 5 Jun 2021 14:40:12 +0200
Subject: [PATCH 2905/3804] media: driver-api: drivers: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/driver-api/media/drivers/bttv-devel.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/driver-api/media/drivers/bttv-devel.rst b/Documentation/driver-api/media/drivers/bttv-devel.rst
index c9aa8b95a5e5f..0885a04563a94 100644
--- a/Documentation/driver-api/media/drivers/bttv-devel.rst
+++ b/Documentation/driver-api/media/drivers/bttv-devel.rst
@@ -21,7 +21,7 @@ log, telling which card type is used.  Like this one::
 
 You should verify this is correct.  If it isn't, you have to pass the
 correct board type as insmod argument, ``insmod bttv card=2`` for
-example.  The file :doc:`/admin-guide/media/bttv-cardlist` has a list
+example.  The file Documentation/admin-guide/media/bttv-cardlist.rst has a list
 of valid arguments for card.
 
 If your card isn't listed there, you might check the source code for
-- 
GitLab


From 6ef43d273e8562366035d8086008e4000a270fd8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Sat, 5 Jun 2021 14:40:12 +0200
Subject: [PATCH 2906/3804] media: admin-guide: avoid using ReST :doc:`foo`
 markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/admin-guide/media/bt8xx.rst   | 15 ++++++++-------
 Documentation/admin-guide/media/bttv.rst    | 21 +++++++++++----------
 Documentation/admin-guide/media/saa7134.rst |  3 ++-
 3 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/Documentation/admin-guide/media/bt8xx.rst b/Documentation/admin-guide/media/bt8xx.rst
index 1382ada1e38eb..3589f6ab7e466 100644
--- a/Documentation/admin-guide/media/bt8xx.rst
+++ b/Documentation/admin-guide/media/bt8xx.rst
@@ -15,11 +15,12 @@ Authors:
 General information
 -------------------
 
-This class of cards has a bt878a as the PCI interface, and require the bttv driver
-for accessing the i2c bus and the gpio pins of the bt8xx chipset.
+This class of cards has a bt878a as the PCI interface, and require the bttv
+driver for accessing the i2c bus and the gpio pins of the bt8xx chipset.
 
-Please see :doc:`bttv-cardlist` for a complete list of Cards based on the
-Conexant Bt8xx PCI bridge supported by the Linux Kernel.
+Please see Documentation/admin-guide/media/bttv-cardlist.rst for a complete
+list of Cards based on the Conexant Bt8xx PCI bridge supported by the
+Linux Kernel.
 
 In order to be able to compile the kernel, some config options should be
 enabled::
@@ -80,7 +81,7 @@ for dvb-bt8xx drivers by passing modprobe parameters may be necessary.
 Running TwinHan and Clones
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-As shown at :doc:`bttv-cardlist`, TwinHan and
+As shown at Documentation/admin-guide/media/bttv-cardlist.rst, TwinHan and
 clones use ``card=113`` modprobe parameter. So, in order to properly
 detect it for devices without EEPROM, you should use::
 
@@ -105,12 +106,12 @@ The autodetected values are determined by the cards' "response string".
 In your logs see f. ex.: dst_get_device_id: Recognize [DSTMCI].
 
 For bug reports please send in a complete log with verbose=4 activated.
-Please also see :doc:`ci`.
+Please also see Documentation/admin-guide/media/ci.rst.
 
 Running multiple cards
 ~~~~~~~~~~~~~~~~~~~~~~
 
-See :doc:`bttv-cardlist` for a complete list of
+See Documentation/admin-guide/media/bttv-cardlist.rst for a complete list of
 Card ID. Some examples:
 
 	===========================	===
diff --git a/Documentation/admin-guide/media/bttv.rst b/Documentation/admin-guide/media/bttv.rst
index 0ef1f203104d3..125f6f47123d1 100644
--- a/Documentation/admin-guide/media/bttv.rst
+++ b/Documentation/admin-guide/media/bttv.rst
@@ -24,7 +24,8 @@ If your board has digital TV, you'll also need::
 
     ./scripts/config -m DVB_BT8XX
 
-In this case, please see :doc:`bt8xx` for additional notes.
+In this case, please see Documentation/admin-guide/media/bt8xx.rst
+for additional notes.
 
 Make bttv work with your card
 -----------------------------
@@ -39,7 +40,7 @@ If it doesn't bttv likely could not autodetect your card and needs some
 insmod options.  The most important insmod option for bttv is "card=n"
 to select the correct card type.  If you get video but no sound you've
 very likely specified the wrong (or no) card type.  A list of supported
-cards is in :doc:`bttv-cardlist`.
+cards is in Documentation/admin-guide/media/bttv-cardlist.rst.
 
 If bttv takes very long to load (happens sometimes with the cheap
 cards which have no tuner), try adding this to your modules configuration
@@ -57,8 +58,8 @@ directory should be enough for it to be autoload during the driver's
 probing mode (e. g. when the Kernel boots or when the driver is
 manually loaded via ``modprobe`` command).
 
-If your card isn't listed in :doc:`bttv-cardlist` or if you have
-trouble making audio work, please read :ref:`still_doesnt_work`.
+If your card isn't listed in Documentation/admin-guide/media/bttv-cardlist.rst
+or if you have trouble making audio work, please read :ref:`still_doesnt_work`.
 
 
 Autodetecting cards
@@ -77,8 +78,8 @@ the Subsystem ID in the second line, looks like this:
 only bt878-based cards can have a subsystem ID (which does not mean
 that every card really has one).  bt848 cards can't have a Subsystem
 ID and therefore can't be autodetected.  There is a list with the ID's
-at :doc:`bttv-cardlist` (in case you are interested or want to mail
-patches with updates).
+at Documentation/admin-guide/media/bttv-cardlist.rst
+(in case you are interested or want to mail patches with updates).
 
 
 .. _still_doesnt_work:
@@ -259,15 +260,15 @@ bug.  It is very helpful if you can tell where exactly it broke
 With a hard freeze you probably doesn't find anything in the logfiles.
 The only way to capture any kernel messages is to hook up a serial
 console and let some terminal application log the messages.  /me uses
-screen.  See :doc:`/admin-guide/serial-console` for details on setting
-up a serial console.
+screen.  See Documentation/admin-guide/serial-console.rst for details on
+setting up a serial console.
 
-Read :doc:`/admin-guide/bug-hunting` to learn how to get any useful
+Read Documentation/admin-guide/bug-hunting.rst to learn how to get any useful
 information out of a register+stack dump printed by the kernel on
 protection faults (so-called "kernel oops").
 
 If you run into some kind of deadlock, you can try to dump a call trace
-for each process using sysrq-t (see :doc:`/admin-guide/sysrq`).
+for each process using sysrq-t (see Documentation/admin-guide/sysrq.rst).
 This way it is possible to figure where *exactly* some process in "D"
 state is stuck.
 
diff --git a/Documentation/admin-guide/media/saa7134.rst b/Documentation/admin-guide/media/saa7134.rst
index 7ab9c70b9abea..51eae7eb5ab7f 100644
--- a/Documentation/admin-guide/media/saa7134.rst
+++ b/Documentation/admin-guide/media/saa7134.rst
@@ -50,7 +50,8 @@ To build and install, you should run::
 Once the new Kernel is booted, saa7134 driver should be loaded automatically.
 
 Depending on the card you might have to pass ``card=<nr>`` as insmod option.
-If so, please check :doc:`saa7134-cardlist` for valid choices.
+If so, please check Documentation/admin-guide/media/saa7134-cardlist.rst
+for valid choices.
 
 Once you have your card type number, you can pass a modules configuration
 via a file (usually, it is either ``/etc/modules.conf`` or some file at
-- 
GitLab


From 6262e1b906a1ba12688ea6039453b4a088dbaf44 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punitagrawal@gmail.com>
Date: Tue, 15 Jun 2021 08:56:35 +0900
Subject: [PATCH 2907/3804] printk: Move EXPORT_SYMBOL() closer to vprintk
 definition

Commit 28e1745b9fa2 ("printk: rename vprintk_func to vprintk") while
improving readability by removing vprintk indirection, inadvertently
placed the EXPORT_SYMBOL() for the newly renamed function at the end
of the file.

For reader sanity, and as is convention move the EXPORT_SYMBOL()
declaration just after the end of the function.

Fixes: 28e1745b9fa2 ("printk: rename vprintk_func to vprintk")
Signed-off-by: Punit Agrawal <punitagrawal@gmail.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210614235635.887365-1-punitagrawal@gmail.com
---
 kernel/printk/printk_safe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index 7a1414622051a..94232186fccba 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -391,6 +391,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 	/* No obstacles. */
 	return vprintk_default(fmt, args);
 }
+EXPORT_SYMBOL(vprintk);
 
 void __init printk_safe_init(void)
 {
@@ -411,4 +412,3 @@ void __init printk_safe_init(void)
 	/* Flush pending messages that did not have scheduled IRQ works. */
 	printk_safe_flush();
 }
-EXPORT_SYMBOL(vprintk);
-- 
GitLab


From 2030043e616cab40f510299f09b636285e0a3678 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 21 May 2021 13:57:20 +0200
Subject: [PATCH 2908/3804] can: j1939: fix Use-after-Free, hold skb ref while
 in use

This patch fixes a Use-after-Free found by the syzbot.

The problem is that a skb is taken from the per-session skb queue,
without incrementing the ref count. This leads to a Use-after-Free if
the skb is taken concurrently from the session queue due to a CTS.

Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol")
Link: https://lore.kernel.org/r/20210521115720.7533-1-o.rempel@pengutronix.de
Cc: Hillf Danton <hdanton@sina.com>
Cc: linux-stable <stable@vger.kernel.org>
Reported-by: syzbot+220c1a29987a9a490903@syzkaller.appspotmail.com
Reported-by: syzbot+45199c1b73b4013525cf@syzkaller.appspotmail.com
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/j1939/transport.c | 54 +++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 14 deletions(-)

diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index e09d087ba2409..c3946c3558826 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -330,6 +330,9 @@ static void j1939_session_skb_drop_old(struct j1939_session *session)
 
 	if ((do_skcb->offset + do_skb->len) < offset_start) {
 		__skb_unlink(do_skb, &session->skb_queue);
+		/* drop ref taken in j1939_session_skb_queue() */
+		skb_unref(do_skb);
+
 		kfree_skb(do_skb);
 	}
 	spin_unlock_irqrestore(&session->skb_queue.lock, flags);
@@ -349,12 +352,13 @@ void j1939_session_skb_queue(struct j1939_session *session,
 
 	skcb->flags |= J1939_ECU_LOCAL_SRC;
 
+	skb_get(skb);
 	skb_queue_tail(&session->skb_queue, skb);
 }
 
 static struct
-sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
-					  unsigned int offset_start)
+sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session,
+					 unsigned int offset_start)
 {
 	struct j1939_priv *priv = session->priv;
 	struct j1939_sk_buff_cb *do_skcb;
@@ -371,6 +375,10 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
 			skb = do_skb;
 		}
 	}
+
+	if (skb)
+		skb_get(skb);
+
 	spin_unlock_irqrestore(&session->skb_queue.lock, flags);
 
 	if (!skb)
@@ -381,12 +389,12 @@ sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session,
 	return skb;
 }
 
-static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+static struct sk_buff *j1939_session_skb_get(struct j1939_session *session)
 {
 	unsigned int offset_start;
 
 	offset_start = session->pkt.dpo * 7;
-	return j1939_session_skb_find_by_offset(session, offset_start);
+	return j1939_session_skb_get_by_offset(session, offset_start);
 }
 
 /* see if we are receiver
@@ -776,7 +784,7 @@ static int j1939_session_tx_dat(struct j1939_session *session)
 	int ret = 0;
 	u8 dat[8];
 
-	se_skb = j1939_session_skb_find_by_offset(session, session->pkt.tx * 7);
+	se_skb = j1939_session_skb_get_by_offset(session, session->pkt.tx * 7);
 	if (!se_skb)
 		return -ENOBUFS;
 
@@ -801,7 +809,8 @@ static int j1939_session_tx_dat(struct j1939_session *session)
 			netdev_err_once(priv->ndev,
 					"%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n",
 					__func__, session, skcb->offset, se_skb->len , session->pkt.tx);
-			return -EOVERFLOW;
+			ret = -EOVERFLOW;
+			goto out_free;
 		}
 
 		if (!len) {
@@ -835,6 +844,12 @@ static int j1939_session_tx_dat(struct j1939_session *session)
 	if (pkt_done)
 		j1939_tp_set_rxtimeout(session, 250);
 
+ out_free:
+	if (ret)
+		kfree_skb(se_skb);
+	else
+		consume_skb(se_skb);
+
 	return ret;
 }
 
@@ -1007,7 +1022,7 @@ static int j1939_xtp_txnext_receiver(struct j1939_session *session)
 static int j1939_simple_txnext(struct j1939_session *session)
 {
 	struct j1939_priv *priv = session->priv;
-	struct sk_buff *se_skb = j1939_session_skb_find(session);
+	struct sk_buff *se_skb = j1939_session_skb_get(session);
 	struct sk_buff *skb;
 	int ret;
 
@@ -1015,8 +1030,10 @@ static int j1939_simple_txnext(struct j1939_session *session)
 		return 0;
 
 	skb = skb_clone(se_skb, GFP_ATOMIC);
-	if (!skb)
-		return -ENOMEM;
+	if (!skb) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
 
 	can_skb_set_owner(skb, se_skb->sk);
 
@@ -1024,12 +1041,18 @@ static int j1939_simple_txnext(struct j1939_session *session)
 
 	ret = j1939_send_one(priv, skb);
 	if (ret)
-		return ret;
+		goto out_free;
 
 	j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
 	j1939_sk_queue_activate_next(session);
 
-	return 0;
+ out_free:
+	if (ret)
+		kfree_skb(se_skb);
+	else
+		consume_skb(se_skb);
+
+	return ret;
 }
 
 static bool j1939_session_deactivate_locked(struct j1939_session *session)
@@ -1170,9 +1193,10 @@ static void j1939_session_completed(struct j1939_session *session)
 	struct sk_buff *skb;
 
 	if (!session->transmission) {
-		skb = j1939_session_skb_find(session);
+		skb = j1939_session_skb_get(session);
 		/* distribute among j1939 receivers */
 		j1939_sk_recv(session->priv, skb);
+		consume_skb(skb);
 	}
 
 	j1939_session_deactivate_activate_next(session);
@@ -1744,7 +1768,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 {
 	struct j1939_priv *priv = session->priv;
 	struct j1939_sk_buff_cb *skcb;
-	struct sk_buff *se_skb;
+	struct sk_buff *se_skb = NULL;
 	const u8 *dat;
 	u8 *tpdat;
 	int offset;
@@ -1786,7 +1810,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 		goto out_session_cancel;
 	}
 
-	se_skb = j1939_session_skb_find_by_offset(session, packet * 7);
+	se_skb = j1939_session_skb_get_by_offset(session, packet * 7);
 	if (!se_skb) {
 		netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__,
 			    session);
@@ -1848,11 +1872,13 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
 		j1939_tp_set_rxtimeout(session, 250);
 	}
 	session->last_cmd = 0xff;
+	consume_skb(se_skb);
 	j1939_session_put(session);
 
 	return;
 
  out_session_cancel:
+	kfree_skb(se_skb);
 	j1939_session_timers_cancel(session);
 	j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
 	j1939_session_put(session);
-- 
GitLab


From 8d0caedb759683041d9db82069937525999ada53 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Sat, 5 Jun 2021 19:26:35 +0900
Subject: [PATCH 2909/3804] can: bcm/raw/isotp: use per module netdevice
 notifier

syzbot is reporting hung task at register_netdevice_notifier() [1] and
unregister_netdevice_notifier() [2], for cleanup_net() might perform
time consuming operations while CAN driver's raw/bcm/isotp modules are
calling {register,unregister}_netdevice_notifier() on each socket.

Change raw/bcm/isotp modules to call register_netdevice_notifier() from
module's __init function and call unregister_netdevice_notifier() from
module's __exit function, as with gw/j1939 modules are doing.

Link: https://syzkaller.appspot.com/bug?id=391b9498827788b3cc6830226d4ff5be87107c30 [1]
Link: https://syzkaller.appspot.com/bug?id=1724d278c83ca6e6df100a2e320c10d991cf2bce [2]
Link: https://lore.kernel.org/r/54a5f451-05ed-f977-8534-79e7aa2bcc8f@i-love.sakura.ne.jp
Cc: linux-stable <stable@vger.kernel.org>
Reported-by: syzbot <syzbot+355f8edb2ff45d5f95fa@syzkaller.appspotmail.com>
Reported-by: syzbot <syzbot+0f1827363a305f74996f@syzkaller.appspotmail.com>
Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Tested-by: syzbot <syzbot+355f8edb2ff45d5f95fa@syzkaller.appspotmail.com>
Tested-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/bcm.c   | 59 +++++++++++++++++++++++++++++++++++-----------
 net/can/isotp.c | 61 +++++++++++++++++++++++++++++++++++++-----------
 net/can/raw.c   | 62 ++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 142 insertions(+), 40 deletions(-)

diff --git a/net/can/bcm.c b/net/can/bcm.c
index 909b9e684e043..f00176b2a6c30 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -125,7 +125,7 @@ struct bcm_sock {
 	struct sock sk;
 	int bound;
 	int ifindex;
-	struct notifier_block notifier;
+	struct list_head notifier;
 	struct list_head rx_ops;
 	struct list_head tx_ops;
 	unsigned long dropped_usr_msgs;
@@ -133,6 +133,10 @@ struct bcm_sock {
 	char procname [32]; /* inode number in decimal with \0 */
 };
 
+static LIST_HEAD(bcm_notifier_list);
+static DEFINE_SPINLOCK(bcm_notifier_lock);
+static struct bcm_sock *bcm_busy_notifier;
+
 static inline struct bcm_sock *bcm_sk(const struct sock *sk)
 {
 	return (struct bcm_sock *)sk;
@@ -1378,20 +1382,15 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 /*
  * notification handler for netdevice status changes
  */
-static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
-			void *ptr)
+static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
+		       struct net_device *dev)
 {
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
 	struct sock *sk = &bo->sk;
 	struct bcm_op *op;
 	int notify_enodev = 0;
 
 	if (!net_eq(dev_net(dev), sock_net(sk)))
-		return NOTIFY_DONE;
-
-	if (dev->type != ARPHRD_CAN)
-		return NOTIFY_DONE;
+		return;
 
 	switch (msg) {
 
@@ -1426,7 +1425,28 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
 				sk->sk_error_report(sk);
 		}
 	}
+}
 
+static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
+			void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	if (dev->type != ARPHRD_CAN)
+		return NOTIFY_DONE;
+	if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+		return NOTIFY_DONE;
+	if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */
+		return NOTIFY_DONE;
+
+	spin_lock(&bcm_notifier_lock);
+	list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) {
+		spin_unlock(&bcm_notifier_lock);
+		bcm_notify(bcm_busy_notifier, msg, dev);
+		spin_lock(&bcm_notifier_lock);
+	}
+	bcm_busy_notifier = NULL;
+	spin_unlock(&bcm_notifier_lock);
 	return NOTIFY_DONE;
 }
 
@@ -1446,9 +1466,9 @@ static int bcm_init(struct sock *sk)
 	INIT_LIST_HEAD(&bo->rx_ops);
 
 	/* set notifier */
-	bo->notifier.notifier_call = bcm_notifier;
-
-	register_netdevice_notifier(&bo->notifier);
+	spin_lock(&bcm_notifier_lock);
+	list_add_tail(&bo->notifier, &bcm_notifier_list);
+	spin_unlock(&bcm_notifier_lock);
 
 	return 0;
 }
@@ -1471,7 +1491,14 @@ static int bcm_release(struct socket *sock)
 
 	/* remove bcm_ops, timer, rx_unregister(), etc. */
 
-	unregister_netdevice_notifier(&bo->notifier);
+	spin_lock(&bcm_notifier_lock);
+	while (bcm_busy_notifier == bo) {
+		spin_unlock(&bcm_notifier_lock);
+		schedule_timeout_uninterruptible(1);
+		spin_lock(&bcm_notifier_lock);
+	}
+	list_del(&bo->notifier);
+	spin_unlock(&bcm_notifier_lock);
 
 	lock_sock(sk);
 
@@ -1692,6 +1719,10 @@ static struct pernet_operations canbcm_pernet_ops __read_mostly = {
 	.exit = canbcm_pernet_exit,
 };
 
+static struct notifier_block canbcm_notifier = {
+	.notifier_call = bcm_notifier
+};
+
 static int __init bcm_module_init(void)
 {
 	int err;
@@ -1705,12 +1736,14 @@ static int __init bcm_module_init(void)
 	}
 
 	register_pernet_subsys(&canbcm_pernet_ops);
+	register_netdevice_notifier(&canbcm_notifier);
 	return 0;
 }
 
 static void __exit bcm_module_exit(void)
 {
 	can_proto_unregister(&bcm_can_proto);
+	unregister_netdevice_notifier(&canbcm_notifier);
 	unregister_pernet_subsys(&canbcm_pernet_ops);
 }
 
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 253b24417c8e5..be6183f8ca110 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -143,10 +143,14 @@ struct isotp_sock {
 	u32 force_tx_stmin;
 	u32 force_rx_stmin;
 	struct tpcon rx, tx;
-	struct notifier_block notifier;
+	struct list_head notifier;
 	wait_queue_head_t wait;
 };
 
+static LIST_HEAD(isotp_notifier_list);
+static DEFINE_SPINLOCK(isotp_notifier_lock);
+static struct isotp_sock *isotp_busy_notifier;
+
 static inline struct isotp_sock *isotp_sk(const struct sock *sk)
 {
 	return (struct isotp_sock *)sk;
@@ -1013,7 +1017,14 @@ static int isotp_release(struct socket *sock)
 	/* wait for complete transmission of current pdu */
 	wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
 
-	unregister_netdevice_notifier(&so->notifier);
+	spin_lock(&isotp_notifier_lock);
+	while (isotp_busy_notifier == so) {
+		spin_unlock(&isotp_notifier_lock);
+		schedule_timeout_uninterruptible(1);
+		spin_lock(&isotp_notifier_lock);
+	}
+	list_del(&so->notifier);
+	spin_unlock(&isotp_notifier_lock);
 
 	lock_sock(sk);
 
@@ -1317,21 +1328,16 @@ static int isotp_getsockopt(struct socket *sock, int level, int optname,
 	return 0;
 }
 
-static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
-			  void *ptr)
+static void isotp_notify(struct isotp_sock *so, unsigned long msg,
+			 struct net_device *dev)
 {
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier);
 	struct sock *sk = &so->sk;
 
 	if (!net_eq(dev_net(dev), sock_net(sk)))
-		return NOTIFY_DONE;
-
-	if (dev->type != ARPHRD_CAN)
-		return NOTIFY_DONE;
+		return;
 
 	if (so->ifindex != dev->ifindex)
-		return NOTIFY_DONE;
+		return;
 
 	switch (msg) {
 	case NETDEV_UNREGISTER:
@@ -1357,7 +1363,28 @@ static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
 			sk->sk_error_report(sk);
 		break;
 	}
+}
 
+static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
+			  void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	if (dev->type != ARPHRD_CAN)
+		return NOTIFY_DONE;
+	if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+		return NOTIFY_DONE;
+	if (unlikely(isotp_busy_notifier)) /* Check for reentrant bug. */
+		return NOTIFY_DONE;
+
+	spin_lock(&isotp_notifier_lock);
+	list_for_each_entry(isotp_busy_notifier, &isotp_notifier_list, notifier) {
+		spin_unlock(&isotp_notifier_lock);
+		isotp_notify(isotp_busy_notifier, msg, dev);
+		spin_lock(&isotp_notifier_lock);
+	}
+	isotp_busy_notifier = NULL;
+	spin_unlock(&isotp_notifier_lock);
 	return NOTIFY_DONE;
 }
 
@@ -1394,8 +1421,9 @@ static int isotp_init(struct sock *sk)
 
 	init_waitqueue_head(&so->wait);
 
-	so->notifier.notifier_call = isotp_notifier;
-	register_netdevice_notifier(&so->notifier);
+	spin_lock(&isotp_notifier_lock);
+	list_add_tail(&so->notifier, &isotp_notifier_list);
+	spin_unlock(&isotp_notifier_lock);
 
 	return 0;
 }
@@ -1442,6 +1470,10 @@ static const struct can_proto isotp_can_proto = {
 	.prot = &isotp_proto,
 };
 
+static struct notifier_block canisotp_notifier = {
+	.notifier_call = isotp_notifier
+};
+
 static __init int isotp_module_init(void)
 {
 	int err;
@@ -1451,6 +1483,8 @@ static __init int isotp_module_init(void)
 	err = can_proto_register(&isotp_can_proto);
 	if (err < 0)
 		pr_err("can: registration of isotp protocol failed\n");
+	else
+		register_netdevice_notifier(&canisotp_notifier);
 
 	return err;
 }
@@ -1458,6 +1492,7 @@ static __init int isotp_module_init(void)
 static __exit void isotp_module_exit(void)
 {
 	can_proto_unregister(&isotp_can_proto);
+	unregister_netdevice_notifier(&canisotp_notifier);
 }
 
 module_init(isotp_module_init);
diff --git a/net/can/raw.c b/net/can/raw.c
index 139d9471ddcf4..ac96fc2100253 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -83,7 +83,7 @@ struct raw_sock {
 	struct sock sk;
 	int bound;
 	int ifindex;
-	struct notifier_block notifier;
+	struct list_head notifier;
 	int loopback;
 	int recv_own_msgs;
 	int fd_frames;
@@ -95,6 +95,10 @@ struct raw_sock {
 	struct uniqframe __percpu *uniq;
 };
 
+static LIST_HEAD(raw_notifier_list);
+static DEFINE_SPINLOCK(raw_notifier_lock);
+static struct raw_sock *raw_busy_notifier;
+
 /* Return pointer to store the extra msg flags for raw_recvmsg().
  * We use the space of one unsigned int beyond the 'struct sockaddr_can'
  * in skb->cb.
@@ -263,21 +267,16 @@ static int raw_enable_allfilters(struct net *net, struct net_device *dev,
 	return err;
 }
 
-static int raw_notifier(struct notifier_block *nb,
-			unsigned long msg, void *ptr)
+static void raw_notify(struct raw_sock *ro, unsigned long msg,
+		       struct net_device *dev)
 {
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
 	struct sock *sk = &ro->sk;
 
 	if (!net_eq(dev_net(dev), sock_net(sk)))
-		return NOTIFY_DONE;
-
-	if (dev->type != ARPHRD_CAN)
-		return NOTIFY_DONE;
+		return;
 
 	if (ro->ifindex != dev->ifindex)
-		return NOTIFY_DONE;
+		return;
 
 	switch (msg) {
 	case NETDEV_UNREGISTER:
@@ -305,7 +304,28 @@ static int raw_notifier(struct notifier_block *nb,
 			sk->sk_error_report(sk);
 		break;
 	}
+}
+
+static int raw_notifier(struct notifier_block *nb, unsigned long msg,
+			void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	if (dev->type != ARPHRD_CAN)
+		return NOTIFY_DONE;
+	if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+		return NOTIFY_DONE;
+	if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */
+		return NOTIFY_DONE;
 
+	spin_lock(&raw_notifier_lock);
+	list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) {
+		spin_unlock(&raw_notifier_lock);
+		raw_notify(raw_busy_notifier, msg, dev);
+		spin_lock(&raw_notifier_lock);
+	}
+	raw_busy_notifier = NULL;
+	spin_unlock(&raw_notifier_lock);
 	return NOTIFY_DONE;
 }
 
@@ -334,9 +354,9 @@ static int raw_init(struct sock *sk)
 		return -ENOMEM;
 
 	/* set notifier */
-	ro->notifier.notifier_call = raw_notifier;
-
-	register_netdevice_notifier(&ro->notifier);
+	spin_lock(&raw_notifier_lock);
+	list_add_tail(&ro->notifier, &raw_notifier_list);
+	spin_unlock(&raw_notifier_lock);
 
 	return 0;
 }
@@ -351,7 +371,14 @@ static int raw_release(struct socket *sock)
 
 	ro = raw_sk(sk);
 
-	unregister_netdevice_notifier(&ro->notifier);
+	spin_lock(&raw_notifier_lock);
+	while (raw_busy_notifier == ro) {
+		spin_unlock(&raw_notifier_lock);
+		schedule_timeout_uninterruptible(1);
+		spin_lock(&raw_notifier_lock);
+	}
+	list_del(&ro->notifier);
+	spin_unlock(&raw_notifier_lock);
 
 	lock_sock(sk);
 
@@ -889,6 +916,10 @@ static const struct can_proto raw_can_proto = {
 	.prot       = &raw_proto,
 };
 
+static struct notifier_block canraw_notifier = {
+	.notifier_call = raw_notifier
+};
+
 static __init int raw_module_init(void)
 {
 	int err;
@@ -898,6 +929,8 @@ static __init int raw_module_init(void)
 	err = can_proto_register(&raw_can_proto);
 	if (err < 0)
 		pr_err("can: registration of raw protocol failed\n");
+	else
+		register_netdevice_notifier(&canraw_notifier);
 
 	return err;
 }
@@ -905,6 +938,7 @@ static __init int raw_module_init(void)
 static __exit void raw_module_exit(void)
 {
 	can_proto_unregister(&raw_can_proto);
+	unregister_netdevice_notifier(&canraw_notifier);
 }
 
 module_init(raw_module_init);
-- 
GitLab


From 5e87ddbe3942e27e939bdc02deb8579b0cbd8ecc Mon Sep 17 00:00:00 2001
From: Norbert Slusarek <nslusarek@gmx.net>
Date: Sat, 12 Jun 2021 22:18:54 +0200
Subject: [PATCH 2910/3804] can: bcm: fix infoleak in struct bcm_msg_head

On 64-bit systems, struct bcm_msg_head has an added padding of 4 bytes between
struct members count and ival1. Even though all struct members are initialized,
the 4-byte hole will contain data from the kernel stack. This patch zeroes out
struct bcm_msg_head before usage, preventing infoleaks to userspace.

Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol")
Link: https://lore.kernel.org/r/trinity-7c1b2e82-e34f-4885-8060-2cd7a13769ce-1623532166177@3c-app-gmx-bs52
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Norbert Slusarek <nslusarek@gmx.net>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/bcm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/can/bcm.c b/net/can/bcm.c
index f00176b2a6c30..f3e4d9528fa38 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -406,6 +406,7 @@ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
 		if (!op->count && (op->flags & TX_COUNTEVT)) {
 
 			/* create notification to user */
+			memset(&msg_head, 0, sizeof(msg_head));
 			msg_head.opcode  = TX_EXPIRED;
 			msg_head.flags   = op->flags;
 			msg_head.count   = op->count;
@@ -443,6 +444,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
 	/* this element is not throttled anymore */
 	data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
 
+	memset(&head, 0, sizeof(head));
 	head.opcode  = RX_CHANGED;
 	head.flags   = op->flags;
 	head.count   = op->count;
@@ -564,6 +566,7 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 	}
 
 	/* create notification to user */
+	memset(&msg_head, 0, sizeof(msg_head));
 	msg_head.opcode  = RX_TIMEOUT;
 	msg_head.flags   = op->flags;
 	msg_head.count   = op->count;
-- 
GitLab


From 91c02557174be7f72e46ed7311e3bea1939840b0 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Thu, 10 Jun 2021 00:58:33 +0300
Subject: [PATCH 2911/3804] can: mcba_usb: fix memory leak in mcba_usb

Syzbot reported memory leak in SocketCAN driver for Microchip CAN BUS
Analyzer Tool. The problem was in unfreed usb_coherent.

In mcba_usb_start() 20 coherent buffers are allocated and there is
nothing, that frees them:

1) In callback function the urb is resubmitted and that's all
2) In disconnect function urbs are simply killed, but URB_FREE_BUFFER
   is not set (see mcba_usb_start) and this flag cannot be used with
   coherent buffers.

Fail log:
| [ 1354.053291][ T8413] mcba_usb 1-1:0.0 can0: device disconnected
| [ 1367.059384][ T8420] kmemleak: 20 new suspected memory leaks (see /sys/kernel/debug/kmem)

So, all allocated buffers should be freed with usb_free_coherent()
explicitly

NOTE:
The same pattern for allocating and freeing coherent buffers
is used in drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c

Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer")
Link: https://lore.kernel.org/r/20210609215833.30393-1-paskripkin@gmail.com
Cc: linux-stable <stable@vger.kernel.org>
Reported-and-tested-by: syzbot+57281c762a3922e14dfe@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/mcba_usb.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 029e77dfa773b..a45865bd72546 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -82,6 +82,8 @@ struct mcba_priv {
 	bool can_ka_first_pass;
 	bool can_speed_check;
 	atomic_t free_ctx_cnt;
+	void *rxbuf[MCBA_MAX_RX_URBS];
+	dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS];
 };
 
 /* CAN frame */
@@ -633,6 +635,7 @@ static int mcba_usb_start(struct mcba_priv *priv)
 	for (i = 0; i < MCBA_MAX_RX_URBS; i++) {
 		struct urb *urb = NULL;
 		u8 *buf;
+		dma_addr_t buf_dma;
 
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -642,7 +645,7 @@ static int mcba_usb_start(struct mcba_priv *priv)
 		}
 
 		buf = usb_alloc_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE,
-					 GFP_KERNEL, &urb->transfer_dma);
+					 GFP_KERNEL, &buf_dma);
 		if (!buf) {
 			netdev_err(netdev, "No memory left for USB buffer\n");
 			usb_free_urb(urb);
@@ -661,11 +664,14 @@ static int mcba_usb_start(struct mcba_priv *priv)
 		if (err) {
 			usb_unanchor_urb(urb);
 			usb_free_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE,
-					  buf, urb->transfer_dma);
+					  buf, buf_dma);
 			usb_free_urb(urb);
 			break;
 		}
 
+		priv->rxbuf[i] = buf;
+		priv->rxbuf_dma[i] = buf_dma;
+
 		/* Drop reference, USB core will take care of freeing it */
 		usb_free_urb(urb);
 	}
@@ -708,7 +714,14 @@ static int mcba_usb_open(struct net_device *netdev)
 
 static void mcba_urb_unlink(struct mcba_priv *priv)
 {
+	int i;
+
 	usb_kill_anchored_urbs(&priv->rx_submitted);
+
+	for (i = 0; i < MCBA_MAX_RX_URBS; ++i)
+		usb_free_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE,
+				  priv->rxbuf[i], priv->rxbuf_dma[i]);
+
 	usb_kill_anchored_urbs(&priv->tx_submitted);
 }
 
-- 
GitLab


From 411efa18e4b03840553ff58ad9b4621b82a30c04 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime@cerno.tech>
Date: Tue, 25 May 2021 11:10:58 +0200
Subject: [PATCH 2912/3804] drm/vc4: hdmi: Move the HSM clock enable to
 runtime_pm

In order to access the HDMI controller, we need to make sure the HSM
clock is enabled. If we were to access it with the clock disabled, the
CPU would completely hang, resulting in an hard crash.

Since we have different code path that would require it, let's move that
clock enable / disable to runtime_pm that will take care of the
reference counting for us.

Fixes: 4f6e3d66ac52 ("drm/vc4: Add runtime PM support to the HDMI encoder driver")
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210525091059.234116-3-maxime@cerno.tech
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 40 +++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 1fda574579afc..84e2183650452 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -473,7 +473,6 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder,
 		   HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE);
 
 	clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock);
-	clk_disable_unprepare(vc4_hdmi->hsm_clock);
 	clk_disable_unprepare(vc4_hdmi->pixel_clock);
 
 	ret = pm_runtime_put(&vc4_hdmi->pdev->dev);
@@ -784,13 +783,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
 		return;
 	}
 
-	ret = clk_prepare_enable(vc4_hdmi->hsm_clock);
-	if (ret) {
-		DRM_ERROR("Failed to turn on HSM clock: %d\n", ret);
-		clk_disable_unprepare(vc4_hdmi->pixel_clock);
-		return;
-	}
-
 	vc4_hdmi_cec_update_clk_div(vc4_hdmi);
 
 	/*
@@ -801,7 +793,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
 			       (hsm_rate > VC4_HSM_MID_CLOCK ? 150000000 : 75000000));
 	if (ret) {
 		DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret);
-		clk_disable_unprepare(vc4_hdmi->hsm_clock);
 		clk_disable_unprepare(vc4_hdmi->pixel_clock);
 		return;
 	}
@@ -809,7 +800,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder,
 	ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock);
 	if (ret) {
 		DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret);
-		clk_disable_unprepare(vc4_hdmi->hsm_clock);
 		clk_disable_unprepare(vc4_hdmi->pixel_clock);
 		return;
 	}
@@ -1929,6 +1919,29 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int vc4_hdmi_runtime_suspend(struct device *dev)
+{
+	struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(vc4_hdmi->hsm_clock);
+
+	return 0;
+}
+
+static int vc4_hdmi_runtime_resume(struct device *dev)
+{
+	struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(vc4_hdmi->hsm_clock);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+#endif
+
 static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev);
@@ -2165,11 +2178,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = {
 	{}
 };
 
+static const struct dev_pm_ops vc4_hdmi_pm_ops = {
+	SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend,
+			   vc4_hdmi_runtime_resume,
+			   NULL)
+};
+
 struct platform_driver vc4_hdmi_driver = {
 	.probe = vc4_hdmi_dev_probe,
 	.remove = vc4_hdmi_dev_remove,
 	.driver = {
 		.name = "vc4_hdmi",
 		.of_match_table = vc4_hdmi_dt_match,
+		.pm = &vc4_hdmi_pm_ops,
 	},
 };
-- 
GitLab


From 9984d6664ce9dcbbc713962539eaf7636ea246c2 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime@cerno.tech>
Date: Tue, 25 May 2021 11:10:59 +0200
Subject: [PATCH 2913/3804] drm/vc4: hdmi: Make sure the controller is powered
 in detect

If the HPD GPIO is not available and drm_probe_ddc fails, we end up
reading the HDMI_HOTPLUG register, but the controller might be powered
off resulting in a CPU hang. Make sure we have the power domain and the
HSM clock powered during the detect cycle to prevent the hang from
happening.

Fixes: 4f6e3d66ac52 ("drm/vc4: Add runtime PM support to the HDMI encoder driver")
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210525091059.234116-4-maxime@cerno.tech
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 84e2183650452..8106b5634fe10 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -159,6 +159,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
 	struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector);
 	bool connected = false;
 
+	WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev));
+
 	if (vc4_hdmi->hpd_gpio) {
 		if (gpio_get_value_cansleep(vc4_hdmi->hpd_gpio) ^
 		    vc4_hdmi->hpd_active_low)
@@ -180,10 +182,12 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
 			}
 		}
 
+		pm_runtime_put(&vc4_hdmi->pdev->dev);
 		return connector_status_connected;
 	}
 
 	cec_phys_addr_invalidate(vc4_hdmi->cec_adap);
+	pm_runtime_put(&vc4_hdmi->pdev->dev);
 	return connector_status_disconnected;
 }
 
-- 
GitLab


From 4e7dba070b1f44da9bef4a61fd633f6b73a2e853 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:04:52 +0100
Subject: [PATCH 2914/3804] ata: include: libata: Move fields commonly
 over-written to separate MACRO

This is a pre-cursor to some upcoming W=1 fix-ups.

Fixes the following W=1 kernel build warning(s):

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mark Lord <mlord@pobox.com>
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-2-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/libata.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5f550eb27f811..3fcd24236793e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1397,25 +1397,28 @@ extern struct device_attribute *ata_common_sdev_attrs[];
 	ATA_SCSI_COMPAT_IOCTL					\
 	.queuecommand		= ata_scsi_queuecmd,		\
 	.dma_need_drain		= ata_scsi_dma_need_drain,	\
-	.can_queue		= ATA_DEF_QUEUE,		\
-	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
 	.this_id		= ATA_SHT_THIS_ID,		\
 	.emulated		= ATA_SHT_EMULATED,		\
 	.proc_name		= drv_name,			\
-	.slave_configure	= ata_scsi_slave_config,	\
 	.slave_destroy		= ata_scsi_slave_destroy,	\
 	.bios_param		= ata_std_bios_param,		\
 	.unlock_native_capacity	= ata_scsi_unlock_native_capacity
 
-#define ATA_BASE_SHT(drv_name)					\
+#define ATA_SUBBASE_SHT(drv_name)				\
 	__ATA_BASE_SHT(drv_name),				\
+	.can_queue		= ATA_DEF_QUEUE,		\
+	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,		\
+	.slave_configure	= ata_scsi_slave_config
+
+#define ATA_BASE_SHT(drv_name)					\
+	ATA_SUBBASE_SHT(drv_name),				\
 	.sdev_attrs		= ata_common_sdev_attrs
 
 #ifdef CONFIG_SATA_HOST
 extern struct device_attribute *ata_ncq_sdev_attrs[];
 
 #define ATA_NCQ_SHT(drv_name)					\
-	__ATA_BASE_SHT(drv_name),				\
+	ATA_SUBBASE_SHT(drv_name),				\
 	.sdev_attrs		= ata_ncq_sdev_attrs,		\
 	.change_queue_depth	= ata_scsi_change_queue_depth
 #endif
-- 
GitLab


From 071e86fe2872e7442e42ad26f71cd6bde55344f8 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:04:53 +0100
Subject: [PATCH 2915/3804] ata: ahci: Ensure initialised fields are not
 overwritten in AHCI_SHT()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

  In file included from drivers/ata/ahci_platform.c:21:
  drivers/ata/ahci.h:388:16: warning: initialized field overwritten [-Woverride-init]
  drivers/ata/ahci_platform.c:40:2: note: in expansion of macro ‘AHCI_SHT’
  drivers/ata/ahci.h:388:16: note: (near initialization for ‘ahci_platform_sht.can_queue’)
  drivers/ata/ahci_platform.c:40:2: note: in expansion of macro ‘AHCI_SHT’
  drivers/ata/ahci.h:392:17: warning: initialized field overwritten [-Woverride-init]
  drivers/ata/ahci_platform.c:40:2: note: in expansion of macro ‘AHCI_SHT’
  drivers/ata/ahci.h:392:17: note: (near initialization for ‘ahci_platform_sht.sdev_attrs’)
  drivers/ata/ahci_platform.c:40:2: note: in expansion of macro ‘AHCI_SHT’
  In file included from drivers/ata/ahci_mtk.c:18:
  drivers/ata/ahci.h:388:16: warning: initialized field overwritten [-Woverride-init]
  drivers/ata/ahci_mtk.c:41:2: note: in expansion of macro ‘AHCI_SHT’
  drivers/ata/ahci.h:388:16: note: (near initialization for ‘ahci_platform_sht.can_queue’)
  drivers/ata/ahci_mtk.c:41:2: note: in expansion of macro ‘AHCI_SHT’
  drivers/ata/ahci.h:392:17: warning: initialized field overwritten [-Woverride-init]
  drivers/ata/ahci_mtk.c:41:2: note: in expansion of macro ‘AHCI_SHT’
  drivers/ata/ahci.h:392:17: note: (near initialization for ‘ahci_platform_sht.sdev_attrs’)
  drivers/ata/ahci_mtk.c:41:2: note: in expansion of macro ‘AHCI_SHT’
  In file included from drivers/ata/ahci_dm816.c:16:
  drivers/ata/ahci.h:388:16: warning: initialized field overwritten [-Woverride-init]
  drivers/ata/ahci_dm816.c:138:2: note: in expansion of macro ‘AHCI_SHT’
  drivers/ata/ahci.h:388:16: note: (near initialization for ‘ahci_dm816_platform_sht.can_queue’)
  drivers/ata/ahci_dm816.c:138:2: note: in expansion of macro ‘AHCI_SHT’
  drivers/ata/ahci.h:392:17: warning: initialized field overwritten [-Woverride-init]
  drivers/ata/ahci_dm816.c:138:2: note: in expansion of macro ‘AHCI_SHT’
  drivers/ata/ahci.h:392:17: note: (near initialization for ‘ahci_dm816_platform_sht.sdev_attrs’)
  drivers/ata/ahci_dm816.c:138:2: note: in expansion of macro ‘AHCI_SHT’

  NB: Snipped 150 lines of this for brevity!

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Cc: Tejun Heo <tj@kernel.org>
Cc: ALWAYS copy <linux-ide@vger.kernel.org>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-3-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/ahci.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index d1f284f0c83d9..2e89499bd9c3d 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -384,12 +384,15 @@ extern struct device_attribute *ahci_sdev_attrs[];
  * for ATA_BASE_SHT
  */
 #define AHCI_SHT(drv_name)						\
-	ATA_NCQ_SHT(drv_name),						\
+	__ATA_BASE_SHT(drv_name),					\
 	.can_queue		= AHCI_MAX_CMDS,			\
 	.sg_tablesize		= AHCI_MAX_SG,				\
 	.dma_boundary		= AHCI_DMA_BOUNDARY,			\
 	.shost_attrs		= ahci_shost_attrs,			\
-	.sdev_attrs		= ahci_sdev_attrs
+	.sdev_attrs		= ahci_sdev_attrs,			\
+	.change_queue_depth     = ata_scsi_change_queue_depth,		\
+	.tag_alloc_policy       = BLK_TAG_ALLOC_RR,             	\
+	.slave_configure        = ata_scsi_slave_config
 
 extern struct ata_port_operations ahci_ops;
 extern struct ata_port_operations ahci_platform_ops;
-- 
GitLab


From 945a0e2875f4d5d0030617f16f48a781d4523f48 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:04:54 +0100
Subject: [PATCH 2916/3804] ata: sata_sil24: Do not over-write initialise
 fields in 'sil24_sht'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 In file included from drivers/ata/sata_sil24.c:14:
 drivers/ata/sata_sil24.c:378:16: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/sata_sil24.c:378:16: note: (near initialization for ‘sil24_sht.can_queue’)

Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-4-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/sata_sil24.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 560070d4f1d09..06a1e27c4f84a 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -374,11 +374,14 @@ static struct pci_driver sil24_pci_driver = {
 };
 
 static struct scsi_host_template sil24_sht = {
-	ATA_NCQ_SHT(DRV_NAME),
+	__ATA_BASE_SHT(DRV_NAME),
 	.can_queue		= SIL24_MAX_CMDS,
 	.sg_tablesize		= SIL24_MAX_SGE,
 	.dma_boundary		= ATA_DMA_BOUNDARY,
 	.tag_alloc_policy	= BLK_TAG_ALLOC_FIFO,
+	.sdev_attrs		= ata_ncq_sdev_attrs,
+	.change_queue_depth	= ata_scsi_change_queue_depth,
+	.slave_configure	= ata_scsi_slave_config
 };
 
 static struct ata_port_operations sil24_ops = {
-- 
GitLab


From e75f41a983e75ffff8b102665580fdb3816b289e Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:04:55 +0100
Subject: [PATCH 2917/3804] ata: sata_mv: Do not over-write initialise fields
 in 'mv6_sht'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ata/sata_mv.c:670:16: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/sata_mv.c:670:16: note: (near initialization for ‘mv6_sht.can_queue’)

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mark Lord <mlord@pobox.com>
Cc: ALWAYS copy <linux-ide@vger.kernel.org>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-5-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/sata_mv.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index c8867c12c0b86..9d86203e1e7a1 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -666,10 +666,14 @@ static struct scsi_host_template mv5_sht = {
 };
 #endif
 static struct scsi_host_template mv6_sht = {
-	ATA_NCQ_SHT(DRV_NAME),
+	__ATA_BASE_SHT(DRV_NAME),
 	.can_queue		= MV_MAX_Q_DEPTH - 1,
 	.sg_tablesize		= MV_MAX_SG_CT / 2,
 	.dma_boundary		= MV_DMA_BOUNDARY,
+	.sdev_attrs             = ata_ncq_sdev_attrs,
+	.change_queue_depth	= ata_scsi_change_queue_depth,
+	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,
+	.slave_configure	= ata_scsi_slave_config
 };
 
 static struct ata_port_operations mv5_ops = {
-- 
GitLab


From 7d43b8283eb23d7c042d2376c86d2d27365c3ed0 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:04:56 +0100
Subject: [PATCH 2918/3804] ata: sata_nv: Do not over-write initialise fields
 in 'nv_adma_sht' and 'nv_swncq_sht'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ata/sata_nv.c:379:16: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/sata_nv.c:379:16: note: (near initialization for ‘nv_adma_sht.can_queue’)
 drivers/ata/sata_nv.c:382:21: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/sata_nv.c:382:21: note: (near initialization for ‘nv_adma_sht.slave_configure’)
 drivers/ata/sata_nv.c:387:16: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/sata_nv.c:387:16: note: (near initialization for ‘nv_swncq_sht.can_queue’)
 drivers/ata/sata_nv.c:390:21: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/sata_nv.c:390:21: note: (near initialization for ‘nv_swncq_sht.slave_configure’)

Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-6-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/sata_nv.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index de45045566692..c385d18ce87b7 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -375,19 +375,25 @@ static struct scsi_host_template nv_sht = {
 };
 
 static struct scsi_host_template nv_adma_sht = {
-	ATA_NCQ_SHT(DRV_NAME),
+	__ATA_BASE_SHT(DRV_NAME),
 	.can_queue		= NV_ADMA_MAX_CPBS,
 	.sg_tablesize		= NV_ADMA_SGTBL_TOTAL_LEN,
 	.dma_boundary		= NV_ADMA_DMA_BOUNDARY,
 	.slave_configure	= nv_adma_slave_config,
+	.sdev_attrs             = ata_ncq_sdev_attrs,
+	.change_queue_depth     = ata_scsi_change_queue_depth,
+	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,
 };
 
 static struct scsi_host_template nv_swncq_sht = {
-	ATA_NCQ_SHT(DRV_NAME),
+	__ATA_BASE_SHT(DRV_NAME),
 	.can_queue		= ATA_MAX_QUEUE - 1,
 	.sg_tablesize		= LIBATA_MAX_PRD,
 	.dma_boundary		= ATA_DMA_BOUNDARY,
 	.slave_configure	= nv_swncq_slave_config,
+	.sdev_attrs             = ata_ncq_sdev_attrs,
+	.change_queue_depth     = ata_scsi_change_queue_depth,
+	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,
 };
 
 /*
-- 
GitLab


From 76115de698d8d090bdd6463e27e0fa0b40fda033 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:04:57 +0100
Subject: [PATCH 2919/3804] ata: pata_atiixp: Avoid overwriting initialised
 field in 'atiixp_sht'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ata/pata_atiixp.c:256:19: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/pata_atiixp.c:256:19: note: (near initialization for ‘atiixp_sht.sg_tablesize’)

Cc: Jens Axboe <axboe@kernel.dk>
Cc: ATI Inc <hyu@ati.com>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-7-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_atiixp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index d671d33ef2874..c3a65ccd4b799 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -252,8 +252,9 @@ static void atiixp_bmdma_stop(struct ata_queued_cmd *qc)
 }
 
 static struct scsi_host_template atiixp_sht = {
-	ATA_BMDMA_SHT(DRV_NAME),
+	ATA_BASE_SHT(DRV_NAME),
 	.sg_tablesize		= LIBATA_DUMB_MAX_PRD,
+	.dma_boundary		= ATA_DMA_BOUNDARY,
 };
 
 static struct ata_port_operations atiixp_port_ops = {
-- 
GitLab


From 98eb8a6ba491d8a4288d2de572721eca6354f86e Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:04:58 +0100
Subject: [PATCH 2920/3804] ata: pata_cs5520: Avoid overwriting initialised
 field in 'cs5520_sht'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ata/pata_cs5520.c:99:19: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/pata_cs5520.c:99:19: note: (near initialization for ‘cs5520_sht.sg_tablesize’)

Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-8-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_cs5520.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c
index d09d432d3c442..247c147026243 100644
--- a/drivers/ata/pata_cs5520.c
+++ b/drivers/ata/pata_cs5520.c
@@ -95,8 +95,9 @@ static void cs5520_set_piomode(struct ata_port *ap, struct ata_device *adev)
 }
 
 static struct scsi_host_template cs5520_sht = {
-	ATA_BMDMA_SHT(DRV_NAME),
+	ATA_BASE_SHT(DRV_NAME),
 	.sg_tablesize		= LIBATA_DUMB_MAX_PRD,
+	.dma_boundary		= ATA_DMA_BOUNDARY,
 };
 
 static struct ata_port_operations cs5520_port_ops = {
-- 
GitLab


From 52ebd7124e0d5593b6032743bf3cb46d49b0343d Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:04:59 +0100
Subject: [PATCH 2921/3804] ata: pata_cs5530: Avoid overwriting initialised
 field in 'cs5530_sht'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ata/pata_cs5530.c:151:18: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/pata_cs5530.c:151:18: note: (near initialization for ‘cs5530_sht.sg_tablesize’)

Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-9-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_cs5530.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/pata_cs5530.c b/drivers/ata/pata_cs5530.c
index a1b4aaccaa50a..d5b7ac14e78f5 100644
--- a/drivers/ata/pata_cs5530.c
+++ b/drivers/ata/pata_cs5530.c
@@ -147,8 +147,9 @@ static unsigned int cs5530_qc_issue(struct ata_queued_cmd *qc)
 }
 
 static struct scsi_host_template cs5530_sht = {
-	ATA_BMDMA_SHT(DRV_NAME),
+	ATA_BASE_SHT(DRV_NAME),
 	.sg_tablesize	= LIBATA_DUMB_MAX_PRD,
+	.dma_boundary	= ATA_DMA_BOUNDARY,
 };
 
 static struct ata_port_operations cs5530_port_ops = {
-- 
GitLab


From 160be1bc0bc3c42db845f8f0c334a05577bfe369 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:05:00 +0100
Subject: [PATCH 2922/3804] ata: pata_sc1200: sc1200_sht'Avoid overwriting
 initialised field in '
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ata/pata_sc1200.c:197:18: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/pata_sc1200.c:197:18: note: (near initialization for ‘sc1200_sht.sg_tablesize’)

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Mark Lord <mlord@pobox.com>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-10-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_sc1200.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/pata_sc1200.c b/drivers/ata/pata_sc1200.c
index 3b8c111140bdb..f28daf62a37df 100644
--- a/drivers/ata/pata_sc1200.c
+++ b/drivers/ata/pata_sc1200.c
@@ -193,8 +193,9 @@ static int sc1200_qc_defer(struct ata_queued_cmd *qc)
 }
 
 static struct scsi_host_template sc1200_sht = {
-	ATA_BMDMA_SHT(DRV_NAME),
+	ATA_BASE_SHT(DRV_NAME),
 	.sg_tablesize	= LIBATA_DUMB_MAX_PRD,
+	.dma_boundary	= ATA_DMA_BOUNDARY,
 };
 
 static struct ata_port_operations sc1200_port_ops = {
-- 
GitLab


From ec3d95182b491b1e8cdd470748f133d4c7934f4c Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:05:01 +0100
Subject: [PATCH 2923/3804] ata: pata_serverworks: Avoid overwriting
 initialised field in 'serverworks_osb4_sht
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ata/pata_serverworks.c:257:18: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/pata_serverworks.c:257:18: note: (near initialization for ‘serverworks_osb4_sht.sg_tablesize’)

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Andre Hedrick <andre@linux-ide.org>
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-11-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_serverworks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c
index 7511e11eef4d6..b602e303fb54c 100644
--- a/drivers/ata/pata_serverworks.c
+++ b/drivers/ata/pata_serverworks.c
@@ -253,8 +253,9 @@ static void serverworks_set_dmamode(struct ata_port *ap, struct ata_device *adev
 }
 
 static struct scsi_host_template serverworks_osb4_sht = {
-	ATA_BMDMA_SHT(DRV_NAME),
+	ATA_BASE_SHT(DRV_NAME),
 	.sg_tablesize	= LIBATA_DUMB_MAX_PRD,
+	.dma_boundary	= ATA_DMA_BOUNDARY,
 };
 
 static struct scsi_host_template serverworks_csb_sht = {
-- 
GitLab


From 827b3e84fd1d2c43b7c85786d366bc53b02cd8da Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 28 May 2021 10:05:02 +0100
Subject: [PATCH 2924/3804] ata: pata_macio: Avoid overwriting initialised
 field in 'pata_macio_sht'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following W=1 kernel build warning(s):

 drivers/ata/pata_macio.c:925:21: warning: initialized field overwritten [-Woverride-init]
 drivers/ata/pata_macio.c:925:21: note: (near initialization for ‘pata_macio_sht.slave_configure’)

Cc: Jens Axboe <axboe@kernel.dk>
Cc: benh@kernel.crashing.org
Cc: linux-ide@vger.kernel.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210528090502.1799866-12-lee.jones@linaro.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_macio.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index e47a28271f5bb..be0ca8d5b3452 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -914,7 +914,7 @@ static int pata_macio_do_resume(struct pata_macio_priv *priv)
 #endif /* CONFIG_PM_SLEEP */
 
 static struct scsi_host_template pata_macio_sht = {
-	ATA_BASE_SHT(DRV_NAME),
+	__ATA_BASE_SHT(DRV_NAME),
 	.sg_tablesize		= MAX_DCMDS,
 	/* We may not need that strict one */
 	.dma_boundary		= ATA_DMA_BOUNDARY,
@@ -923,6 +923,9 @@ static struct scsi_host_template pata_macio_sht = {
 	 */
 	.max_segment_size	= MAX_DBDMA_SEG,
 	.slave_configure	= pata_macio_slave_config,
+	.sdev_attrs		= ata_common_sdev_attrs,
+	.can_queue		= ATA_DEF_QUEUE,
+	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,
 };
 
 static struct ata_port_operations pata_macio_ops = {
-- 
GitLab


From 512d895664a318d57de0ca3655d2bf1c280767a0 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 16 Jun 2021 09:18:16 +0800
Subject: [PATCH 2925/3804] regulator: rt6160: Fix setting suspend voltage

The vsel active level is for the normal voltage, the opposite level is
the suspend voltage.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: ChiYuan Huang <cy_huang@richtek.com>
Link: https://lore.kernel.org/r/20210616011816.3479406-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/rt6160-regulator.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/rt6160-regulator.c b/drivers/regulator/rt6160-regulator.c
index ccd023da43188..5d7b0e7ad69af 100644
--- a/drivers/regulator/rt6160-regulator.c
+++ b/drivers/regulator/rt6160-regulator.c
@@ -128,13 +128,19 @@ static unsigned int rt6160_get_mode(struct regulator_dev *rdev)
 static int rt6160_set_suspend_voltage(struct regulator_dev *rdev, int uV)
 {
 	struct regmap *regmap = rdev_get_regmap(rdev);
+	unsigned int suspend_vsel_reg;
 	int vsel;
 
 	vsel = regulator_map_voltage_linear(rdev, uV, uV);
 	if (vsel < 0)
 		return vsel;
 
-	return regmap_update_bits(regmap, rdev->desc->vsel_reg,
+	if (rdev->desc->vsel_reg == RT6160_REG_VSELL)
+		suspend_vsel_reg = RT6160_REG_VSELH;
+	else
+		suspend_vsel_reg = RT6160_REG_VSELL;
+
+	return regmap_update_bits(regmap, suspend_vsel_reg,
 				  RT6160_VSEL_MASK, vsel);
 }
 
-- 
GitLab


From 686f6b31bf6cea71ca941b6dbf9e1388d54222b6 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 16 Jun 2021 11:44:56 +0800
Subject: [PATCH 2926/3804] regulator: sy7636a: Add terminating entry for
 platform_device_id table

The platform_device_id table is supposed to be zero-terminated.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Alistair Francis <alistair@alistair23.me>
Link: https://lore.kernel.org/r/20210616034458.3499522-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/sy7636a-regulator.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/sy7636a-regulator.c b/drivers/regulator/sy7636a-regulator.c
index c384c2b6ac46a..54ab1be1001ee 100644
--- a/drivers/regulator/sy7636a-regulator.c
+++ b/drivers/regulator/sy7636a-regulator.c
@@ -110,6 +110,7 @@ static int sy7636a_regulator_probe(struct platform_device *pdev)
 
 static const struct platform_device_id sy7636a_regulator_id_table[] = {
 	{ "sy7636a-regulator", },
+	{ }
 };
 MODULE_DEVICE_TABLE(platform, sy7636a_regulator_id_table);
 
-- 
GitLab


From 31a89d297e196472875dc7d4a8f5dd0aaefcc0b4 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 16 Jun 2021 11:44:57 +0800
Subject: [PATCH 2927/3804] regulator: sy7636a: Make regulator_desc static
 const

It's only used in this file and never changed, make it static const.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Alistair Francis <alistair@alistair23.me>
Link: https://lore.kernel.org/r/20210616034458.3499522-2-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/sy7636a-regulator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/sy7636a-regulator.c b/drivers/regulator/sy7636a-regulator.c
index 54ab1be1001ee..c71c0a007d957 100644
--- a/drivers/regulator/sy7636a-regulator.c
+++ b/drivers/regulator/sy7636a-regulator.c
@@ -53,7 +53,7 @@ static const struct regulator_ops sy7636a_vcom_volt_ops = {
 	.get_status = sy7636a_get_status,
 };
 
-struct regulator_desc desc = {
+static const struct regulator_desc desc = {
 	.name = "vcom",
 	.id = 0,
 	.ops = &sy7636a_vcom_volt_ops,
@@ -61,7 +61,7 @@ struct regulator_desc desc = {
 	.owner = THIS_MODULE,
 	.enable_reg = SY7636A_REG_OPERATION_MODE_CRL,
 	.enable_mask = SY7636A_OPERATION_MODE_CRL_ONOFF,
-	.poll_enabled_time	= SY7636A_POLL_ENABLED_TIME,
+	.poll_enabled_time = SY7636A_POLL_ENABLED_TIME,
 	.regulators_node = of_match_ptr("regulators"),
 	.of_match = of_match_ptr("vcom"),
 };
-- 
GitLab


From 830c364f4a2299e8215c40f0a2ba9229c0fdeede Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Wed, 16 Jun 2021 11:44:58 +0800
Subject: [PATCH 2928/3804] regulator: sy7636a: Use rdev_get_drvdata at proper
 place

At the context with *rdev, use rdev_get_drvdata() is more intuitive.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Alistair Francis <alistair@alistair23.me>
Link: https://lore.kernel.org/r/20210616034458.3499522-3-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/sy7636a-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/sy7636a-regulator.c b/drivers/regulator/sy7636a-regulator.c
index c71c0a007d957..e021ae08cbaa4 100644
--- a/drivers/regulator/sy7636a-regulator.c
+++ b/drivers/regulator/sy7636a-regulator.c
@@ -35,7 +35,7 @@ static int sy7636a_get_vcom_voltage_op(struct regulator_dev *rdev)
 
 static int sy7636a_get_status(struct regulator_dev *rdev)
 {
-	struct sy7636a *sy7636a = dev_get_drvdata(rdev->dev.parent);
+	struct sy7636a *sy7636a = rdev_get_drvdata(rdev);
 	int ret = 0;
 
 	ret = gpiod_get_value_cansleep(sy7636a->pgood_gpio);
-- 
GitLab


From 7740ab84c13e32002742106afd443a4ca7fe3918 Mon Sep 17 00:00:00 2001
From: Rouven Czerwinski <r.czerwinski@pengutronix.de>
Date: Wed, 16 Jun 2021 14:53:31 +0200
Subject: [PATCH 2929/3804] regulator: fixed: use dev_err_probe for gpio

Instead of returning the the PTR_ERR directly, use dev_err_probe which
will also correctly set the deferred probe reason in
/sys/kernel/debug/deferred_devices, making it easier to debug missing
devices on the system.

Signed-off-by: Rouven Czerwinski <r.czerwinski@pengutronix.de>
Link: https://lore.kernel.org/r/20210616125331.153414-1-r.czerwinski@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/fixed.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 02ad83153e19a..78f3a938b39bb 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -271,7 +271,8 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
 	 */
 	cfg.ena_gpiod = gpiod_get_optional(&pdev->dev, NULL, gflags);
 	if (IS_ERR(cfg.ena_gpiod))
-		return PTR_ERR(cfg.ena_gpiod);
+		return dev_err_probe(&pdev->dev, PTR_ERR(cfg.ena_gpiod),
+				     "can't get GPIO\n");
 
 	cfg.dev = &pdev->dev;
 	cfg.init_data = config->init_data;
-- 
GitLab


From ea8b16303f2b42d8ea15c7d762e546e4bc793b1f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Jun 2021 15:46:53 +0200
Subject: [PATCH 2930/3804] pata_cypress: add a module option to disable BM-DMA

Multiple users report that they need to disable DMA on this driver,
so provide an option to do so without affecting all of libata.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/20210616134658.1471835-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_cypress.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/pata_cypress.c b/drivers/ata/pata_cypress.c
index e1486fe298ae0..5b3a7a8ebef64 100644
--- a/drivers/ata/pata_cypress.c
+++ b/drivers/ata/pata_cypress.c
@@ -41,6 +41,10 @@ enum {
 	CY82_INDEX_TIMEOUT	= 0x32
 };
 
+static bool enable_dma = true;
+module_param(enable_dma, bool, 0);
+MODULE_PARM_DESC(enable_dma, "Enable bus master DMA operations");
+
 /**
  *	cy82c693_set_piomode	-	set initial PIO mode data
  *	@ap: ATA interface
@@ -124,14 +128,16 @@ static struct ata_port_operations cy82c693_port_ops = {
 
 static int cy82c693_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	static const struct ata_port_info info = {
+	static struct ata_port_info info = {
 		.flags = ATA_FLAG_SLAVE_POSS,
 		.pio_mask = ATA_PIO4,
-		.mwdma_mask = ATA_MWDMA2,
 		.port_ops = &cy82c693_port_ops
 	};
 	const struct ata_port_info *ppi[] = { &info, &ata_dummy_port_info };
 
+	if (enable_dma)
+		info.mwdma_mask = ATA_MWDMA2;
+
 	/* Devfn 1 is the ATA primary. The secondary is magic and on devfn2.
 	   For the moment we don't handle the secondary. FIXME */
 
-- 
GitLab


From 492b1389005c71e0ce81e24d5be6271546aa8c34 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Jun 2021 15:46:54 +0200
Subject: [PATCH 2931/3804] alpha: use libata instead of the legacy ide driver

Switch the alpha defconfig from the legacy ide driver to libata.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210616134658.1471835-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/alpha/configs/defconfig | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig
index 724c4075df408..dd2dd9f0861f1 100644
--- a/arch/alpha/configs/defconfig
+++ b/arch/alpha/configs/defconfig
@@ -25,19 +25,18 @@ CONFIG_PNP=y
 CONFIG_ISAPNP=y
 CONFIG_BLK_DEV_FD=y
 CONFIG_BLK_DEV_LOOP=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_IDE_GENERIC=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_ALI15X3=y
-CONFIG_BLK_DEV_CMD64X=y
-CONFIG_BLK_DEV_CY82C693=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_SCSI_AIC7XXX=m
 CONFIG_AIC7XXX_CMDS_PER_DEVICE=253
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+CONFIG_PATA_ALI=y
+CONFIG_PATA_CMD64X=y
+CONFIG_PATA_CYPRESS=y
+CONFIG_ATA_GENERIC=y
 CONFIG_NETDEVICES=y
 CONFIG_DUMMY=m
 CONFIG_NET_ETHERNET=y
-- 
GitLab


From cdc429452596ea9e0c76c8b10b5e93feab522906 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Jun 2021 15:46:55 +0200
Subject: [PATCH 2932/3804] ARM: disable CONFIG_IDE in footbridge_defconfig

footbridge_defconfig enables CONFIG_IDE but no actual host controller
driver, so just drop it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210616134658.1471835-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/arm/configs/footbridge_defconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/configs/footbridge_defconfig b/arch/arm/configs/footbridge_defconfig
index 2aa3ebeb89d7f..7a32de51f0faa 100644
--- a/arch/arm/configs/footbridge_defconfig
+++ b/arch/arm/configs/footbridge_defconfig
@@ -64,7 +64,6 @@ CONFIG_PARIDE_ON26=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_IDE=y
 CONFIG_NETDEVICES=y
 CONFIG_NET_ETHERNET=y
 CONFIG_NET_VENDOR_3COM=y
-- 
GitLab


From 468c736b5eb34c712636279eb49251a6f7156f40 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Jun 2021 15:46:56 +0200
Subject: [PATCH 2933/3804] ARM: disable CONFIG_IDE in pxa_defconfig

pxa_defconfig already enables libata including the pata_pcmcia driver, so
drop the legacy ide driver and idecs host driver.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210616134658.1471835-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/arm/configs/pxa_defconfig | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index 875a3c28a267d..363f1b1b08e38 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -215,8 +215,6 @@ CONFIG_IIO=m
 CONFIG_AD5446=m
 CONFIG_EEPROM_AT24=m
 CONFIG_SENSORS_LIS3_SPI=m
-CONFIG_IDE=m
-CONFIG_BLK_DEV_IDECS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=m
 CONFIG_CHR_DEV_ST=m
-- 
GitLab


From b90257bfddbd01f3686d99c256ae6dd24a6a1deb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Jun 2021 15:46:57 +0200
Subject: [PATCH 2934/3804] m68k: use libata instead of the legacy ide driver

Switch the m68 defconfigs from the deprecated ide subsystem to use libata
instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20210616134658.1471835-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/m68k/configs/amiga_defconfig | 10 +++++-----
 arch/m68k/configs/atari_defconfig |  8 ++++----
 arch/m68k/configs/mac_defconfig   |  8 ++++----
 arch/m68k/configs/multi_defconfig | 14 +++++++-------
 arch/m68k/configs/q40_defconfig   |  8 ++++----
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 59b727b693575..4fe26d54627e4 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -323,11 +323,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
-CONFIG_IDE=y
-CONFIG_IDE_GD_ATAPI=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GAYLE=y
-CONFIG_BLK_DEV_BUDDHA=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -344,6 +339,11 @@ CONFIG_GVP11_SCSI=y
 CONFIG_SCSI_A4000T=y
 CONFIG_SCSI_ZORRO7XX=y
 CONFIG_SCSI_ZORRO_ESP=y
+CONFIG_ATA=y
+# CONFIG_ATA_VERBOSE_ERROR is not set
+# CONFIG_ATA_BMDMA is not set
+CONFIG_PATA_GAYLE=y
+CONFIG_PATA_BUDDHA=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
 CONFIG_BLK_DEV_DM=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 9cc9f1a065164..21b2990fe9af5 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -324,10 +324,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
-CONFIG_IDE=y
-CONFIG_IDE_GD_ATAPI=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_FALCON_IDE=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -339,6 +335,10 @@ CONFIG_SCSI_SAS_ATTRS=m
 CONFIG_ISCSI_TCP=m
 CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_ATARI_SCSI=y
+CONFIG_ATA=y
+# CONFIG_ATA_VERBOSE_ERROR is not set
+# CONFIG_ATA_BMDMA is not set
+CONFIG_PATA_FALCON=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
 CONFIG_BLK_DEV_DM=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 406d3f2a16eae..b03300df13fc7 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -315,10 +315,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
-CONFIG_IDE=y
-CONFIG_IDE_GD_ATAPI=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_PLATFORM=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -331,6 +327,10 @@ CONFIG_ISCSI_TCP=m
 CONFIG_ISCSI_BOOT_SYSFS=m
 CONFIG_MAC_SCSI=y
 CONFIG_SCSI_MAC_ESP=y
+CONFIG_ATA=y
+# CONFIG_ATA_VERBOSE_ERROR is not set
+# CONFIG_ATA_BMDMA is not set
+CONFIG_PATA_PLATFORM=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
 CONFIG_BLK_DEV_DM=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index f0992435e9ef7..e2c8368e22319 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -344,13 +344,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
-CONFIG_IDE=y
-CONFIG_IDE_GD_ATAPI=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_PLATFORM=y
-CONFIG_BLK_DEV_GAYLE=y
-CONFIG_BLK_DEV_BUDDHA=y
-CONFIG_BLK_DEV_FALCON_IDE=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -374,6 +367,13 @@ CONFIG_MVME147_SCSI=y
 CONFIG_MVME16x_SCSI=y
 CONFIG_BVME6000_SCSI=y
 CONFIG_SUN3X_ESP=y
+CONFIG_ATA=y
+# CONFIG_ATA_VERBOSE_ERROR is not set
+# CONFIG_ATA_BMDMA is not set
+CONFIG_PATA_FALCON=y
+CONFIG_PATA_GAYLE=y
+CONFIG_PATA_BUDDHA=y
+CONFIG_PATA_PLATFORM=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
 CONFIG_BLK_DEV_DM=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index b893163d9f068..514e2e8cddbd5 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -314,10 +314,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
 CONFIG_DUMMY_IRQ=m
-CONFIG_IDE=y
-CONFIG_IDE_GD_ATAPI=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_FALCON_IDE=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -328,6 +324,10 @@ CONFIG_SCSI_CONSTANTS=y
 CONFIG_SCSI_SAS_ATTRS=m
 CONFIG_ISCSI_TCP=m
 CONFIG_ISCSI_BOOT_SYSFS=m
+CONFIG_ATA=y
+# CONFIG_ATA_VERBOSE_ERROR is not set
+# CONFIG_ATA_BMDMA is not set
+CONFIG_PATA_FALCON=y
 CONFIG_MD=y
 CONFIG_MD_LINEAR=m
 CONFIG_BLK_DEV_DM=m
-- 
GitLab


From b7fb14d3ac63117e0e8beabe75f4ea52051fbe3a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Jun 2021 15:46:58 +0200
Subject: [PATCH 2935/3804] ide: remove the legacy ide driver

The legay ide driver has been replace with libata starting in 2003 and has
been scheduled for removal for a while.  Finally kill it off so that we
can start cleaning up various bits of cruft it forced on the block layer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/userspace-api/ioctl/hdio.rst |  845 +-------
 MAINTAINERS                                |   16 -
 drivers/Kconfig                            |    2 -
 drivers/Makefile                           |    1 -
 drivers/ide/Kconfig                        |  827 --------
 drivers/ide/Makefile                       |  109 -
 drivers/ide/aec62xx.c                      |  331 ----
 drivers/ide/ali14xx.c                      |  250 ---
 drivers/ide/alim15x3.c                     |  602 ------
 drivers/ide/amd74xx.c                      |  343 ----
 drivers/ide/atiixp.c                       |  212 --
 drivers/ide/buddha.c                       |  238 ---
 drivers/ide/cmd640.c                       |  848 --------
 drivers/ide/cmd64x.c                       |  452 -----
 drivers/ide/cs5520.c                       |  168 --
 drivers/ide/cs5530.c                       |  295 ---
 drivers/ide/cs5535.c                       |  216 --
 drivers/ide/cs5536.c                       |  294 ---
 drivers/ide/cy82c693.c                     |  234 ---
 drivers/ide/delkin_cb.c                    |  181 --
 drivers/ide/dtc2278.c                      |  155 --
 drivers/ide/falconide.c                    |  226 ---
 drivers/ide/gayle.c                        |  188 --
 drivers/ide/hpt366.c                       | 1545 ---------------
 drivers/ide/ht6560b.c                      |  383 ----
 drivers/ide/icside.c                       |  692 -------
 drivers/ide/ide-4drives.c                  |   65 -
 drivers/ide/ide-acpi.c                     |  622 ------
 drivers/ide/ide-atapi.c                    |  756 -------
 drivers/ide/ide-cd.c                       | 1858 -----------------
 drivers/ide/ide-cd.h                       |  123 --
 drivers/ide/ide-cd_ioctl.c                 |  468 -----
 drivers/ide/ide-cd_verbose.c               |  362 ----
 drivers/ide/ide-cs.c                       |  364 ----
 drivers/ide/ide-devsets.c                  |  192 --
 drivers/ide/ide-disk.c                     |  795 --------
 drivers/ide/ide-disk.h                     |   30 -
 drivers/ide/ide-disk_ioctl.c               |   33 -
 drivers/ide/ide-disk_proc.c                |  125 --
 drivers/ide/ide-dma-sff.c                  |  336 ----
 drivers/ide/ide-dma.c                      |  551 ------
 drivers/ide/ide-eh.c                       |  443 -----
 drivers/ide/ide-floppy.c                   |  551 ------
 drivers/ide/ide-floppy.h                   |   42 -
 drivers/ide/ide-floppy_ioctl.c             |  339 ----
 drivers/ide/ide-floppy_proc.c              |   34 -
 drivers/ide/ide-gd.c                       |  432 ----
 drivers/ide/ide-gd.h                       |   43 -
 drivers/ide/ide-generic.c                  |  139 --
 drivers/ide/ide-io-std.c                   |  262 ---
 drivers/ide/ide-io.c                       |  904 ---------
 drivers/ide/ide-ioctls.c                   |  306 ---
 drivers/ide/ide-iops.c                     |  536 -----
 drivers/ide/ide-legacy.c                   |   59 -
 drivers/ide/ide-lib.c                      |  146 --
 drivers/ide/ide-park.c                     |  155 --
 drivers/ide/ide-pci-generic.c              |  203 --
 drivers/ide/ide-pio-blacklist.c            |   96 -
 drivers/ide/ide-pm.c                       |  261 ---
 drivers/ide/ide-pnp.c                      |   92 -
 drivers/ide/ide-probe.c                    | 1623 ---------------
 drivers/ide/ide-proc.c                     |  633 ------
 drivers/ide/ide-scan-pci.c                 |  113 --
 drivers/ide/ide-sysfs.c                    |  143 --
 drivers/ide/ide-tape.c                     | 2083 --------------------
 drivers/ide/ide-taskfile.c                 |  668 -------
 drivers/ide/ide-timings.c                  |  198 --
 drivers/ide/ide-xfer-mode.c                |  267 ---
 drivers/ide/ide.c                          |  415 ----
 drivers/ide/ide_platform.c                 |  133 --
 drivers/ide/it8172.c                       |  165 --
 drivers/ide/it8213.c                       |  217 --
 drivers/ide/it821x.c                       |  715 -------
 drivers/ide/jmicron.c                      |  176 --
 drivers/ide/ns87415.c                      |  350 ----
 drivers/ide/opti621.c                      |  179 --
 drivers/ide/palm_bk3710.c                  |  387 ----
 drivers/ide/pdc202xx_new.c                 |  557 ------
 drivers/ide/pdc202xx_old.c                 |  362 ----
 drivers/ide/piix.c                         |  476 -----
 drivers/ide/pmac.c                         | 1703 ----------------
 drivers/ide/qd65xx.c                       |  446 -----
 drivers/ide/qd65xx.h                       |  145 --
 drivers/ide/rapide.c                       |  106 -
 drivers/ide/rz1000.c                       |  100 -
 drivers/ide/sc1200.c                       |  355 ----
 drivers/ide/serverworks.c                  |  456 -----
 drivers/ide/setup-pci.c                    |  682 -------
 drivers/ide/siimage.c                      |  843 --------
 drivers/ide/sis5513.c                      |  637 ------
 drivers/ide/sl82c105.c                     |  367 ----
 drivers/ide/slc90e66.c                     |  182 --
 drivers/ide/tc86c001.c                     |  270 ---
 drivers/ide/triflex.c                      |  143 --
 drivers/ide/trm290.c                       |  374 ----
 drivers/ide/tx4938ide.c                    |  209 --
 drivers/ide/tx4939ide.c                    |  628 ------
 drivers/ide/umc8672.c                      |  184 --
 drivers/ide/via82cxxx.c                    |  532 -----
 include/linux/ide.h                        | 1623 ---------------
 100 files changed, 25 insertions(+), 41196 deletions(-)
 delete mode 100644 drivers/ide/Kconfig
 delete mode 100644 drivers/ide/Makefile
 delete mode 100644 drivers/ide/aec62xx.c
 delete mode 100644 drivers/ide/ali14xx.c
 delete mode 100644 drivers/ide/alim15x3.c
 delete mode 100644 drivers/ide/amd74xx.c
 delete mode 100644 drivers/ide/atiixp.c
 delete mode 100644 drivers/ide/buddha.c
 delete mode 100644 drivers/ide/cmd640.c
 delete mode 100644 drivers/ide/cmd64x.c
 delete mode 100644 drivers/ide/cs5520.c
 delete mode 100644 drivers/ide/cs5530.c
 delete mode 100644 drivers/ide/cs5535.c
 delete mode 100644 drivers/ide/cs5536.c
 delete mode 100644 drivers/ide/cy82c693.c
 delete mode 100644 drivers/ide/delkin_cb.c
 delete mode 100644 drivers/ide/dtc2278.c
 delete mode 100644 drivers/ide/falconide.c
 delete mode 100644 drivers/ide/gayle.c
 delete mode 100644 drivers/ide/hpt366.c
 delete mode 100644 drivers/ide/ht6560b.c
 delete mode 100644 drivers/ide/icside.c
 delete mode 100644 drivers/ide/ide-4drives.c
 delete mode 100644 drivers/ide/ide-acpi.c
 delete mode 100644 drivers/ide/ide-atapi.c
 delete mode 100644 drivers/ide/ide-cd.c
 delete mode 100644 drivers/ide/ide-cd.h
 delete mode 100644 drivers/ide/ide-cd_ioctl.c
 delete mode 100644 drivers/ide/ide-cd_verbose.c
 delete mode 100644 drivers/ide/ide-cs.c
 delete mode 100644 drivers/ide/ide-devsets.c
 delete mode 100644 drivers/ide/ide-disk.c
 delete mode 100644 drivers/ide/ide-disk.h
 delete mode 100644 drivers/ide/ide-disk_ioctl.c
 delete mode 100644 drivers/ide/ide-disk_proc.c
 delete mode 100644 drivers/ide/ide-dma-sff.c
 delete mode 100644 drivers/ide/ide-dma.c
 delete mode 100644 drivers/ide/ide-eh.c
 delete mode 100644 drivers/ide/ide-floppy.c
 delete mode 100644 drivers/ide/ide-floppy.h
 delete mode 100644 drivers/ide/ide-floppy_ioctl.c
 delete mode 100644 drivers/ide/ide-floppy_proc.c
 delete mode 100644 drivers/ide/ide-gd.c
 delete mode 100644 drivers/ide/ide-gd.h
 delete mode 100644 drivers/ide/ide-generic.c
 delete mode 100644 drivers/ide/ide-io-std.c
 delete mode 100644 drivers/ide/ide-io.c
 delete mode 100644 drivers/ide/ide-ioctls.c
 delete mode 100644 drivers/ide/ide-iops.c
 delete mode 100644 drivers/ide/ide-legacy.c
 delete mode 100644 drivers/ide/ide-lib.c
 delete mode 100644 drivers/ide/ide-park.c
 delete mode 100644 drivers/ide/ide-pci-generic.c
 delete mode 100644 drivers/ide/ide-pio-blacklist.c
 delete mode 100644 drivers/ide/ide-pm.c
 delete mode 100644 drivers/ide/ide-pnp.c
 delete mode 100644 drivers/ide/ide-probe.c
 delete mode 100644 drivers/ide/ide-proc.c
 delete mode 100644 drivers/ide/ide-scan-pci.c
 delete mode 100644 drivers/ide/ide-sysfs.c
 delete mode 100644 drivers/ide/ide-tape.c
 delete mode 100644 drivers/ide/ide-taskfile.c
 delete mode 100644 drivers/ide/ide-timings.c
 delete mode 100644 drivers/ide/ide-xfer-mode.c
 delete mode 100644 drivers/ide/ide.c
 delete mode 100644 drivers/ide/ide_platform.c
 delete mode 100644 drivers/ide/it8172.c
 delete mode 100644 drivers/ide/it8213.c
 delete mode 100644 drivers/ide/it821x.c
 delete mode 100644 drivers/ide/jmicron.c
 delete mode 100644 drivers/ide/ns87415.c
 delete mode 100644 drivers/ide/opti621.c
 delete mode 100644 drivers/ide/palm_bk3710.c
 delete mode 100644 drivers/ide/pdc202xx_new.c
 delete mode 100644 drivers/ide/pdc202xx_old.c
 delete mode 100644 drivers/ide/piix.c
 delete mode 100644 drivers/ide/pmac.c
 delete mode 100644 drivers/ide/qd65xx.c
 delete mode 100644 drivers/ide/qd65xx.h
 delete mode 100644 drivers/ide/rapide.c
 delete mode 100644 drivers/ide/rz1000.c
 delete mode 100644 drivers/ide/sc1200.c
 delete mode 100644 drivers/ide/serverworks.c
 delete mode 100644 drivers/ide/setup-pci.c
 delete mode 100644 drivers/ide/siimage.c
 delete mode 100644 drivers/ide/sis5513.c
 delete mode 100644 drivers/ide/sl82c105.c
 delete mode 100644 drivers/ide/slc90e66.c
 delete mode 100644 drivers/ide/tc86c001.c
 delete mode 100644 drivers/ide/triflex.c
 delete mode 100644 drivers/ide/trm290.c
 delete mode 100644 drivers/ide/tx4938ide.c
 delete mode 100644 drivers/ide/tx4939ide.c
 delete mode 100644 drivers/ide/umc8672.c
 delete mode 100644 drivers/ide/via82cxxx.c
 delete mode 100644 include/linux/ide.h

diff --git a/Documentation/userspace-api/ioctl/hdio.rst b/Documentation/userspace-api/ioctl/hdio.rst
index 817371bf94e94..6ee8fc88699f0 100644
--- a/Documentation/userspace-api/ioctl/hdio.rst
+++ b/Documentation/userspace-api/ioctl/hdio.rst
@@ -7,8 +7,8 @@ Summary of `HDIO_` ioctl calls
 November, 2004
 
 This document attempts to describe the ioctl(2) calls supported by
-the HD/IDE layer.  These are by-and-large implemented (as of Linux 2.6)
-in drivers/ide/ide.c and drivers/block/scsi_ioctl.c
+the HD/IDE layer.  These are by-and-large implemented (as of Linux 5.11)
+drivers/ata/libata-scsi.c.
 
 ioctl values are listed in <linux/hdreg.h>.  As of this writing, they
 are as follows:
@@ -17,50 +17,17 @@ are as follows:
 
 	=======================	=======================================
 	HDIO_GETGEO		get device geometry
-	HDIO_GET_UNMASKINTR	get current unmask setting
-	HDIO_GET_MULTCOUNT	get current IDE blockmode setting
-	HDIO_GET_QDMA		get use-qdma flag
-	HDIO_SET_XFER		set transfer rate via proc
-	HDIO_OBSOLETE_IDENTITY	OBSOLETE, DO NOT USE
-	HDIO_GET_KEEPSETTINGS	get keep-settings-on-reset flag
 	HDIO_GET_32BIT		get current io_32bit setting
-	HDIO_GET_NOWERR		get ignore-write-error flag
-	HDIO_GET_DMA		get use-dma flag
-	HDIO_GET_NICE		get nice flags
 	HDIO_GET_IDENTITY	get IDE identification info
-	HDIO_GET_WCACHE		get write cache mode on|off
-	HDIO_GET_ACOUSTIC	get acoustic value
-	HDIO_GET_ADDRESS	get sector addressing mode
-	HDIO_GET_BUSSTATE	get the bus state of the hwif
-	HDIO_TRISTATE_HWIF	execute a channel tristate
-	HDIO_DRIVE_RESET	execute a device reset
 	HDIO_DRIVE_TASKFILE	execute raw taskfile
 	HDIO_DRIVE_TASK		execute task and special drive command
 	HDIO_DRIVE_CMD		execute a special drive command
-	HDIO_DRIVE_CMD_AEB	HDIO_DRIVE_TASK
 	=======================	=======================================
 
     ioctls that pass non-pointer values:
 
 	=======================	=======================================
-	HDIO_SET_MULTCOUNT	change IDE blockmode
-	HDIO_SET_UNMASKINTR	permit other irqs during I/O
-	HDIO_SET_KEEPSETTINGS	keep ioctl settings on reset
 	HDIO_SET_32BIT		change io_32bit flags
-	HDIO_SET_NOWERR		change ignore-write-error flag
-	HDIO_SET_DMA		change use-dma flag
-	HDIO_SET_PIO_MODE	reconfig interface to new speed
-	HDIO_SCAN_HWIF		register and (re)scan interface
-	HDIO_SET_NICE		set nice flags
-	HDIO_UNREGISTER_HWIF	unregister interface
-	HDIO_SET_WCACHE		change write cache enable-disable
-	HDIO_SET_ACOUSTIC	change acoustic behavior
-	HDIO_SET_BUSSTATE	set the bus state of the hwif
-	HDIO_SET_QDMA		change use-qdma flag
-	HDIO_SET_ADDRESS	change lba addressing modes
-
-	HDIO_SET_IDE_SCSI	Set scsi emulation mode on/off
-	HDIO_SET_SCSI_IDE	not implemented yet
 	=======================	=======================================
 
 
@@ -137,512 +104,49 @@ HDIO_GETGEO
 
 
-
-HDIO_GET_UNMASKINTR
-	get current unmask setting
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_GET_UNMASKINTR, &val);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The value of the drive's current unmask setting
-
-
-
-
-
-HDIO_SET_UNMASKINTR
-	permit other irqs during I/O
-
-
-	usage::
-
-	  unsigned long val;
-
-	  ioctl(fd, HDIO_SET_UNMASKINTR, val);
-
-	inputs:
-		New value for unmask flag
-
-
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range [0 1]
-	  - EBUSY	Controller busy
-
-
-
-
-HDIO_GET_MULTCOUNT
-	get current IDE blockmode setting
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_GET_MULTCOUNT, &val);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The value of the current IDE block mode setting.  This
-		controls how many sectors the drive will transfer per
-		interrupt.
-
-
-
-HDIO_SET_MULTCOUNT
-	change IDE blockmode
-
-
-	usage::
-
-	  int val;
-
-	  ioctl(fd, HDIO_SET_MULTCOUNT, val);
-
-	inputs:
-		New value for IDE block mode setting.  This controls how many
-		sectors the drive will transfer per interrupt.
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range supported by disk.
-	  - EBUSY	Controller busy or blockmode already set.
-	  - EIO		Drive did not accept new block mode.
-
-	notes:
-	  Source code comments read::
-
-	    This is tightly woven into the driver->do_special cannot
-	    touch.  DON'T do it again until a total personality rewrite
-	    is committed.
-
-	  If blockmode has already been set, this ioctl will fail with
-	  -EBUSY
-
-
-
-HDIO_GET_QDMA
-	get use-qdma flag
-
-
-	Not implemented, as of 2.6.8.1
-
-
-
-HDIO_SET_XFER
-	set transfer rate via proc
-
-
-	Not implemented, as of 2.6.8.1
-
-
-
-HDIO_OBSOLETE_IDENTITY
-	OBSOLETE, DO NOT USE
-
-
-	Same as HDIO_GET_IDENTITY (see below), except that it only
-	returns the first 142 bytes of drive identity information.
-
-
-
-HDIO_GET_IDENTITY
-	get IDE identification info
-
-
-	usage::
-
-	  unsigned char identity[512];
-
-	  ioctl(fd, HDIO_GET_IDENTITY, identity);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		ATA drive identity information.  For full description, see
-		the IDENTIFY DEVICE and IDENTIFY PACKET DEVICE commands in
-		the ATA specification.
-
-	error returns:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - ENOMSG	IDENTIFY DEVICE information not available
-
-	notes:
-		Returns information that was obtained when the drive was
-		probed.  Some of this information is subject to change, and
-		this ioctl does not re-probe the drive to update the
-		information.
-
-		This information is also available from /proc/ide/hdX/identify
-
-
-
-HDIO_GET_KEEPSETTINGS
-	get keep-settings-on-reset flag
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_GET_KEEPSETTINGS, &val);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The value of the current "keep settings" flag
-
-
-
-	notes:
-		When set, indicates that kernel should restore settings
-		after a drive reset.
-
-
-
-HDIO_SET_KEEPSETTINGS
-	keep ioctl settings on reset
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_SET_KEEPSETTINGS, val);
-
-	inputs:
-		New value for keep_settings flag
-
-
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range [0 1]
-	  - EBUSY		Controller busy
-
-
-
-HDIO_GET_32BIT
-	get current io_32bit setting
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_GET_32BIT, &val);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The value of the current io_32bit setting
-
-
-
-	notes:
-		0=16-bit, 1=32-bit, 2,3 = 32bit+sync
-
-
-
-
-
-HDIO_GET_NOWERR
-	get ignore-write-error flag
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_GET_NOWERR, &val);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The value of the current ignore-write-error flag
-
-
-
-
-
-HDIO_GET_DMA
-	get use-dma flag
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_GET_DMA, &val);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The value of the current use-dma flag
-
-
-
-
-
-HDIO_GET_NICE
-	get nice flags
-
-
-	usage::
-
-	  long nice;
-
-	  ioctl(fd, HDIO_GET_NICE, &nice);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The drive's "nice" values.
-
-
-
-	notes:
-		Per-drive flags which determine when the system will give more
-		bandwidth to other devices sharing the same IDE bus.
-
-		See <linux/hdreg.h>, near symbol IDE_NICE_DSC_OVERLAP.
-
-
-
-
-HDIO_SET_NICE
-	set nice flags
-
-
-	usage::
-
-	  unsigned long nice;
-
-	  ...
-	  ioctl(fd, HDIO_SET_NICE, nice);
-
-	inputs:
-		bitmask of nice flags.
-
-
-
-	outputs:
-		none
-
-
-
-	error returns:
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EPERM	Flags other than DSC_OVERLAP and NICE_1 set.
-	  - EPERM	DSC_OVERLAP specified but not supported by drive
-
-	notes:
-		This ioctl sets the DSC_OVERLAP and NICE_1 flags from values
-		provided by the user.
-
-		Nice flags are listed in <linux/hdreg.h>, starting with
-		IDE_NICE_DSC_OVERLAP.  These values represent shifts.
-
-
-
-
-
-HDIO_GET_WCACHE
-	get write cache mode on|off
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_GET_WCACHE, &val);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The value of the current write cache mode
-
-
-
-
-
-HDIO_GET_ACOUSTIC
-	get acoustic value
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_GET_ACOUSTIC, &val);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The value of the current acoustic settings
-
-
-
-	notes:
-		See HDIO_SET_ACOUSTIC
-
-
-
-
-
-HDIO_GET_ADDRESS
-	usage::
-
-
-	  long val;
-
-	  ioctl(fd, HDIO_GET_ADDRESS, &val);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		The value of the current addressing mode:
-
-	    =  ===================
-	    0  28-bit
-	    1  48-bit
-	    2  48-bit doing 28-bit
-	    3  64-bit
-	    =  ===================
-
-
-
-HDIO_GET_BUSSTATE
-	get the bus state of the hwif
-
-
-	usage::
-
-	  long state;
-
-	  ioctl(fd, HDIO_SCAN_HWIF, &state);
-
-	inputs:
-		none
-
-
-
-	outputs:
-		Current power state of the IDE bus.  One of BUSSTATE_OFF,
-		BUSSTATE_ON, or BUSSTATE_TRISTATE
-
-	error returns:
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-
-
-
-
-HDIO_SET_BUSSTATE
-	set the bus state of the hwif
+HDIO_GET_IDENTITY
+	get IDE identification info
 
 
 	usage::
 
-	  int state;
+	  unsigned char identity[512];
 
-	  ...
-	  ioctl(fd, HDIO_SCAN_HWIF, state);
+	  ioctl(fd, HDIO_GET_IDENTITY, identity);
 
 	inputs:
-		Desired IDE power state.  One of BUSSTATE_OFF, BUSSTATE_ON,
-		or BUSSTATE_TRISTATE
-
-	outputs:
 		none
 
 
-	error returns:
-	  - EACCES	Access denied:  requires CAP_SYS_RAWIO
-	  - EOPNOTSUPP	Hardware interface does not support bus power control
-
-
-
+	outputs:
+		ATA drive identity information.  For full description, see
+		the IDENTIFY DEVICE and IDENTIFY PACKET DEVICE commands in
+		the ATA specification.
 
-HDIO_TRISTATE_HWIF
-	execute a channel tristate
+	error returns:
+	  - EINVAL	Called on a partition instead of the whole disk device
+	  - ENOMSG	IDENTIFY DEVICE information not available
 
+	notes:
+		Returns information that was obtained when the drive was
+		probed.  Some of this information is subject to change, and
+		this ioctl does not re-probe the drive to update the
+		information.
 
-	Not implemented, as of 2.6.8.1.  See HDIO_SET_BUSSTATE
+		This information is also available from /proc/ide/hdX/identify
 
 
-HDIO_DRIVE_RESET
-	execute a device reset
+HDIO_GET_32BIT
+	get current io_32bit setting
 
 
 	usage::
 
-	  int args[3]
+	  long val;
 
-	  ...
-	  ioctl(fd, HDIO_DRIVE_RESET, args);
+	  ioctl(fd, HDIO_GET_32BIT, &val);
 
 	inputs:
 		none
@@ -650,22 +154,12 @@ HDIO_DRIVE_RESET
 
 
 	outputs:
-		none
-
+		The value of the current io_32bit setting
 
 
-	error returns:
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - ENXIO	No such device:	phy dead or ctl_addr == 0
-	  - EIO		I/O error:	reset timed out or hardware error
 
 	notes:
-
-	  - Execute a reset on the device as soon as the current IO
-	    operation has completed.
-
-	  - Executes an ATAPI soft reset if applicable, otherwise
-	    executes an ATA soft reset on the controller.
+		0=16-bit, 1=32-bit, 2,3 = 32bit+sync
 
 
@@ -1026,14 +520,6 @@ HDIO_DRIVE_TASK
 
 
-HDIO_DRIVE_CMD_AEB
-	HDIO_DRIVE_TASK
-
-
-	Not implemented, as of 2.6.8.1
-
-
-
 HDIO_SET_32BIT
 	change io_32bit flags
 
@@ -1059,284 +545,3 @@ HDIO_SET_32BIT
 	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
 	  - EINVAL	value out of range [0 3]
 	  - EBUSY	Controller busy
-
-
-
-
-HDIO_SET_NOWERR
-	change ignore-write-error flag
-
-
-	usage::
-
-	  int val;
-
-	  ioctl(fd, HDIO_SET_NOWERR, val);
-
-	inputs:
-		New value for ignore-write-error flag.  Used for ignoring
-
-
-	  WRERR_STAT
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range [0 1]
-	  - EBUSY		Controller busy
-
-
-
-HDIO_SET_DMA
-	change use-dma flag
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_SET_DMA, val);
-
-	inputs:
-		New value for use-dma flag
-
-
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range [0 1]
-	  - EBUSY	Controller busy
-
-
-
-HDIO_SET_PIO_MODE
-	reconfig interface to new speed
-
-
-	usage::
-
-	  long val;
-
-	  ioctl(fd, HDIO_SET_PIO_MODE, val);
-
-	inputs:
-		New interface speed.
-
-
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range [0 255]
-	  - EBUSY	Controller busy
-
-
-
-HDIO_SCAN_HWIF
-	register and (re)scan interface
-
-
-	usage::
-
-	  int args[3]
-
-	  ...
-	  ioctl(fd, HDIO_SCAN_HWIF, args);
-
-	inputs:
-
-	  =======	=========================
-	  args[0]	io address to probe
-
-
-	  args[1]	control address to probe
-	  args[2]	irq number
-	  =======	=========================
-
-	outputs:
-		none
-
-
-
-	error returns:
-	  - EACCES	Access denied:  requires CAP_SYS_RAWIO
-	  - EIO		Probe failed.
-
-	notes:
-		This ioctl initializes the addresses and irq for a disk
-		controller, probes for drives, and creates /proc/ide
-		interfaces as appropriate.
-
-
-
-HDIO_UNREGISTER_HWIF
-	unregister interface
-
-
-	usage::
-
-	  int index;
-
-	  ioctl(fd, HDIO_UNREGISTER_HWIF, index);
-
-	inputs:
-		index		index of hardware interface to unregister
-
-
-
-	outputs:
-		none
-
-
-
-	error returns:
-	  - EACCES	Access denied:  requires CAP_SYS_RAWIO
-
-	notes:
-		This ioctl removes a hardware interface from the kernel.
-
-		Currently (2.6.8) this ioctl silently fails if any drive on
-		the interface is busy.
-
-
-
-HDIO_SET_WCACHE
-	change write cache enable-disable
-
-
-	usage::
-
-	  int val;
-
-	  ioctl(fd, HDIO_SET_WCACHE, val);
-
-	inputs:
-		New value for write cache enable
-
-
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range [0 1]
-	  - EBUSY	Controller busy
-
-
-
-HDIO_SET_ACOUSTIC
-	change acoustic behavior
-
-
-	usage::
-
-	  int val;
-
-	  ioctl(fd, HDIO_SET_ACOUSTIC, val);
-
-	inputs:
-		New value for drive acoustic settings
-
-
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range [0 254]
-	  - EBUSY	Controller busy
-
-
-
-HDIO_SET_QDMA
-	change use-qdma flag
-
-
-	Not implemented, as of 2.6.8.1
-
-
-
-HDIO_SET_ADDRESS
-	change lba addressing modes
-
-
-	usage::
-
-	  int val;
-
-	  ioctl(fd, HDIO_SET_ADDRESS, val);
-
-	inputs:
-		New value for addressing mode
-
-	    =   ===================
-	    0   28-bit
-	    1   48-bit
-	    2   48-bit doing 28-bit
-	    =   ===================
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range [0 2]
-	  - EBUSY		Controller busy
-	  - EIO		Drive does not support lba48 mode.
-
-
-HDIO_SET_IDE_SCSI
-	usage::
-
-
-	  long val;
-
-	  ioctl(fd, HDIO_SET_IDE_SCSI, val);
-
-	inputs:
-		New value for scsi emulation mode (?)
-
-
-
-	outputs:
-		none
-
-
-
-	error return:
-	  - EINVAL	Called on a partition instead of the whole disk device
-	  - EACCES	Access denied:  requires CAP_SYS_ADMIN
-	  - EINVAL	value out of range [0 1]
-	  - EBUSY	Controller busy
-
-
-
-HDIO_SET_SCSI_IDE
-	Not implemented, as of 2.6.8.1
diff --git a/MAINTAINERS b/MAINTAINERS
index 008fcad7ac008..4f3a7e3ce93f3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8763,22 +8763,6 @@ L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/busses/i2c-icy.c
 
-IDE SUBSYSTEM
-M:	"David S. Miller" <davem@davemloft.net>
-L:	linux-ide@vger.kernel.org
-S:	Maintained
-Q:	http://patchwork.ozlabs.org/project/linux-ide/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/davem/ide.git
-F:	Documentation/ide/
-F:	drivers/ide/
-F:	include/linux/ide.h
-
-IDE/ATAPI DRIVERS
-L:	linux-ide@vger.kernel.org
-S:	Orphan
-F:	Documentation/cdrom/ide-cd.rst
-F:	drivers/ide/ide-cd*
-
 IDEAPAD LAPTOP EXTRAS DRIVER
 M:	Ike Panhc <ike.pan@canonical.com>
 L:	platform-driver-x86@vger.kernel.org
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 47980c6b1945d..8bad63417a506 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -33,8 +33,6 @@ source "drivers/nvme/Kconfig"
 
 source "drivers/misc/Kconfig"
 
-source "drivers/ide/Kconfig"
-
 source "drivers/scsi/Kconfig"
 
 source "drivers/ata/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 5a6d613e868d3..f85185f9139e7 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -78,7 +78,6 @@ obj-$(CONFIG_CXL_BUS)		+= cxl/
 obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
 obj-$(CONFIG_NUBUS)		+= nubus/
 obj-y				+= macintosh/
-obj-$(CONFIG_IDE)		+= ide/
 obj-y				+= scsi/
 obj-y				+= nvme/
 obj-$(CONFIG_ATA)		+= ata/
diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig
deleted file mode 100644
index 8af1ac69e5f82..0000000000000
--- a/drivers/ide/Kconfig
+++ /dev/null
@@ -1,827 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# IDE ATA ATAPI Block device driver configuration
-#
-
-# Select HAVE_IDE if IDE is supported
-config HAVE_IDE
-	bool
-
-menuconfig IDE
-	tristate "ATA/ATAPI/MFM/RLL support (DEPRECATED)"
-	depends on HAVE_IDE
-	depends on BLOCK
-	select BLK_SCSI_REQUEST
-	help
-	  If you say Y here, your kernel will be able to manage ATA/(E)IDE and
-	  ATAPI units. The most common cases are IDE hard drives and ATAPI
-	  CD-ROM drives.
-
-	  This subsystem is currently in maintenance mode with only bug fix
-	  changes applied. Users of ATA hardware are encouraged to migrate to
-	  the newer ATA subsystem ("Serial ATA (prod) and Parallel ATA
-	  (experimental) drivers") which is more actively maintained.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ide-core.
-
-	  For further information, please read <file:Documentation/ide/ide.rst>.
-
-	  If unsure, say N.
-
-if IDE
-
-comment "Please see Documentation/ide/ide.rst for help/info on IDE drives"
-
-config IDE_XFER_MODE
-	bool
-
-config IDE_TIMINGS
-	bool
-	select IDE_XFER_MODE
-
-config IDE_ATAPI
-	bool
-
-config IDE_LEGACY
-	bool
-
-config BLK_DEV_IDE_SATA
-	bool "Support for SATA (deprecated; conflicts with libata SATA driver)"
-	default n
-	help
-	  There are two drivers for Serial ATA controllers.
-
-	  The main driver, "libata", uses the SCSI subsystem
-	  and supports most modern SATA controllers. In order to use it
-	  you may take a look at "Serial ATA (prod) and Parallel ATA
-	  (experimental) drivers".
-
-	  The IDE driver (which you are currently configuring) supports
-	  a few first-generation SATA controllers.
-
-	  In order to eliminate conflicts between the two subsystems,
-	  this config option enables the IDE driver's SATA support.
-	  Normally this is disabled, as it is preferred that libata
-	  supports SATA controllers, and this (IDE) driver supports
-	  PATA controllers.
-
-	  If unsure, say N.
-
-config IDE_GD
-	tristate "generic ATA/ATAPI disk support"
-	default y
-	help
-	  Support for ATA/ATAPI disks (including ATAPI floppy drives).
-
-	  To compile this driver as a module, choose M here.
-	  The module will be called ide-gd_mod.
-
-	  If unsure, say Y.
-
-config IDE_GD_ATA
-	bool "ATA disk support"
-	depends on IDE_GD
-	default y
-	help
-	  This will include support for ATA hard disks.
-
-	  If unsure, say Y.
-
-config IDE_GD_ATAPI
-	bool "ATAPI floppy support"
-	depends on IDE_GD
-	select IDE_ATAPI
-	help
-	  This will include support for ATAPI floppy drives
-	  (i.e. Iomega ZIP or MKE LS-120).
-
-	  For information about jumper settings and the question
-	  of when a ZIP drive uses a partition table, see
-	  <http://www.win.tue.nl/~aeb/linux/zip/zip-1.html>.
-
-	  If unsure, say N.
-
-config BLK_DEV_IDECS
-	tristate "PCMCIA IDE support"
-	depends on PCMCIA
-	help
-	  Support for Compact Flash cards, outboard IDE disks, tape drives,
-	  and CD-ROM drives connected through a PCMCIA card.
-
-config BLK_DEV_DELKIN
-	tristate "Cardbus IDE support (Delkin/ASKA/Workbit)"
-	depends on CARDBUS && PCI
-	help
-	  Support for Delkin, ASKA, and Workbit Cardbus CompactFlash
-	  Adapters.  This may also work for similar SD and XD adapters.
-
-config BLK_DEV_IDECD
-	tristate "Include IDE/ATAPI CDROM support"
-	depends on BLK_DEV
-	select IDE_ATAPI
-	select CDROM
-	help
-	  If you have a CD-ROM drive using the ATAPI protocol, say Y. ATAPI is
-	  a newer protocol used by IDE CD-ROM and TAPE drives, similar to the
-	  SCSI protocol. Most new CD-ROM drives use ATAPI, including the
-	  NEC-260, Mitsumi FX400, Sony 55E, and just about all non-SCSI
-	  double(2X) or better speed drives.
-
-	  If you say Y here, the CD-ROM drive will be identified at boot time
-	  along with other IDE devices, as "hdb" or "hdc", or something
-	  similar (check the boot messages with dmesg). If this is your only
-	  CD-ROM drive, you can say N to all other CD-ROM options, but be sure
-	  to say Y or M to "ISO 9660 CD-ROM file system support".
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ide-cd.
-
-config BLK_DEV_IDECD_VERBOSE_ERRORS
-	bool "Verbose error logging for IDE/ATAPI CDROM driver" if EXPERT
-	depends on BLK_DEV_IDECD
-	default y
-	help
-	  Turn this on to have the driver print out the meanings of the
-	  ATAPI error codes.  This will use up additional 8kB of kernel-space
-	  memory, though.
-
-config BLK_DEV_IDETAPE
-	tristate "Include IDE/ATAPI TAPE support"
-	select IDE_ATAPI
-	help
-	  If you have an IDE tape drive using the ATAPI protocol, say Y.
-	  ATAPI is a newer protocol used by IDE tape and CD-ROM drives,
-	  similar to the SCSI protocol.  If you have an SCSI tape drive
-	  however, you can say N here.
-
-	  You should also say Y if you have an OnStream DI-30 tape drive; this
-	  will not work with the SCSI protocol, until there is support for the
-	  SC-30 and SC-50 versions.
-
-	  If you say Y here, the tape drive will be identified at boot time
-	  along with other IDE devices, as "hdb" or "hdc", or something
-	  similar, and will be mapped to a character device such as "ht0"
-	  (check the boot messages with dmesg).  Be sure to consult the
-	  <file:drivers/ide/ide-tape.c> and <file:Documentation/ide/ide.rst>
-	  files for usage information.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called ide-tape.
-
-config BLK_DEV_IDEACPI
-	bool "IDE ACPI support"
-	depends on ACPI
-	help
-	  Implement ACPI support for generic IDE devices. On modern
-	  machines ACPI support is required to properly handle ACPI S3 states.
-
-config IDE_TASK_IOCTL
-	bool "IDE Taskfile Access"
-	help
-	  This is a direct raw access to the media.  It is a complex but
-	  elegant solution to test and validate the domain of the hardware and
-	  perform below the driver data recovery if needed.  This is the most
-	  basic form of media-forensics.
-
-	  If you are unsure, say N here.
-
-config IDE_PROC_FS
-	bool "legacy /proc/ide/ support"
-	depends on IDE && PROC_FS
-	default y
-	help
-	  This option enables support for the various files in
-	  /proc/ide.  In Linux 2.6 this has been superseded by
-	  files in sysfs but many legacy applications rely on this.
-
-	  If unsure say Y.
-
-comment "IDE chipset support/bugfixes"
-
-config IDE_GENERIC
-	tristate "generic/default IDE chipset support"
-	depends on ALPHA || X86 || IA64 || MIPS || ARCH_RPC
-	default ARM && ARCH_RPC
-	help
-	  This is the generic IDE driver.  This driver attaches to the
-	  fixed legacy ports (e.g. on PCs 0x1f0/0x170, 0x1e8/0x168 and
-	  so on).  Please note that if this driver is built into the
-	  kernel or loaded before other ATA (IDE or libata) drivers
-	  and the controller is located at legacy ports, this driver
-	  may grab those ports and thus can prevent the controller
-	  specific driver from attaching.
-
-	  Also, currently, IDE generic doesn't allow IRQ sharing
-	  meaning that the IRQs it grabs won't be available to other
-	  controllers sharing those IRQs which usually makes drivers
-	  for those controllers fail.  Generally, it's not a good idea
-	  to load IDE generic driver on modern systems.
-
-	  If unsure, say N.
-
-config BLK_DEV_PLATFORM
-	tristate "Platform driver for IDE interfaces"
-	help
-	  This is the platform IDE driver, used mostly for Memory Mapped
-	  IDE devices, like Compact Flashes running in True IDE mode.
-
-	  If unsure, say N.
-
-config BLK_DEV_CMD640
-	tristate "CMD640 chipset bugfix/support"
-	depends on X86
-	select IDE_TIMINGS
-	help
-	  The CMD-Technologies CMD640 IDE chip is used on many common 486 and
-	  Pentium motherboards, usually in combination with a "Neptune" or
-	  "SiS" chipset. Unfortunately, it has a number of rather nasty
-	  design flaws that can cause severe data corruption under many common
-	  conditions. Say Y here to include code which tries to automatically
-	  detect and correct the problems under Linux. This option also
-	  enables access to the secondary IDE ports in some CMD640 based
-	  systems.
-
-	  This driver will work automatically in PCI based systems (most new
-	  systems have PCI slots). But if your system uses VESA local bus
-	  (VLB) instead of PCI, you must also supply a kernel boot parameter
-	  to enable the CMD640 bugfix/support: "cmd640.probe_vlb". (Try "man
-	  bootparam" or see the documentation of your boot loader about how to
-	  pass options to the kernel.)
-
-	  The CMD640 chip is also used on add-in cards by Acculogic, and on
-	  the "CSA-6400E PCI to IDE controller" that some people have. For
-	  details, read <file:Documentation/ide/ide.rst>.
-
-config BLK_DEV_CMD640_ENHANCED
-	bool "CMD640 enhanced support"
-	depends on BLK_DEV_CMD640
-	help
-	  This option includes support for setting/autotuning PIO modes and
-	  prefetch on CMD640 IDE interfaces.  For details, read
-	  <file:Documentation/ide/ide.rst>. If you have a CMD640 IDE interface
-	  and your BIOS does not already do this for you, then say Y here.
-	  Otherwise say N.
-
-config BLK_DEV_IDEPNP
-	tristate "PNP EIDE support"
-	depends on PNP
-	help
-	  If you have a PnP (Plug and Play) compatible EIDE card and
-	  would like the kernel to automatically detect and activate
-	  it, say Y here.
-
-config BLK_DEV_IDEDMA_SFF
-	bool
-
-if PCI
-
-comment "PCI IDE chipsets support"
-
-config BLK_DEV_IDEPCI
-	bool
-
-config IDEPCI_PCIBUS_ORDER
-	bool "Probe IDE PCI devices in the PCI bus order (DEPRECATED)"
-	depends on IDE=y && BLK_DEV_IDEPCI
-	default y
-	help
-	  Probe IDE PCI devices in the order in which they appear on the
-	  PCI bus (i.e. 00:1f.1 PCI device before 02:01.0 PCI device)
-	  instead of the order in which IDE PCI host drivers are loaded.
-
-	  Please note that this method of assuring stable naming of
-	  IDE devices is unreliable and use other means for achieving
-	  it (i.e. udev).
-
-	  If in doubt, say N.
-
-# TODO: split it on per host driver config options (or module parameters)
-config BLK_DEV_OFFBOARD
-	bool "Boot off-board chipsets first support (DEPRECATED)"
-	depends on BLK_DEV_IDEPCI && (BLK_DEV_AEC62XX || BLK_DEV_GENERIC || BLK_DEV_HPT366 || BLK_DEV_PDC202XX_NEW || BLK_DEV_PDC202XX_OLD || BLK_DEV_TC86C001)
-	help
-	  Normally, IDE controllers built into the motherboard (on-board
-	  controllers) are assigned to ide0 and ide1 while those on add-in PCI
-	  cards (off-board controllers) are relegated to ide2 and ide3.
-	  Answering Y here will allow you to reverse the situation, with
-	  off-board controllers on ide0/1 and on-board controllers on ide2/3.
-	  This can improve the usability of some boot managers such as lilo
-	  when booting from a drive on an off-board controller.
-
-	  Note that, if you do this, the order of the hd* devices will be
-	  rearranged which may require modification of fstab and other files.
-
-	  Please also note that this method of assuring stable naming of
-	  IDE devices is unreliable and use other means for achieving it
-	  (i.e. udev).
-
-	  If in doubt, say N.
-
-config BLK_DEV_GENERIC
-	tristate "Generic PCI IDE Chipset Support"
-	select BLK_DEV_IDEPCI
-	help
-	  This option provides generic support for various PCI IDE Chipsets
-	  which otherwise might not be supported.
-
-config BLK_DEV_OPTI621
-	tristate "OPTi 82C621 chipset enhanced support"
-	select BLK_DEV_IDEPCI
-	help
-	  This is a driver for the OPTi 82C621 EIDE controller.
-	  Please read the comments at the top of <file:drivers/ide/opti621.c>.
-
-config BLK_DEV_RZ1000
-	tristate "RZ1000 chipset bugfix/support"
-	depends on X86
-	select BLK_DEV_IDEPCI
-	help
-	  The PC-Technologies RZ1000 IDE chip is used on many common 486 and
-	  Pentium motherboards, usually along with the "Neptune" chipset.
-	  Unfortunately, it has a rather nasty design flaw that can cause
-	  severe data corruption under many conditions. Say Y here to include
-	  code which automatically detects and corrects the problem under
-	  Linux. This may slow disk throughput by a few percent, but at least
-	  things will operate 100% reliably.
-
-config BLK_DEV_IDEDMA_PCI
-	bool
-	select BLK_DEV_IDEPCI
-	select BLK_DEV_IDEDMA_SFF
-
-config BLK_DEV_AEC62XX
-	tristate "AEC62XX chipset support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds explicit support for Acard AEC62xx (Artop ATP8xx)
-	  IDE controllers. This allows the kernel to change PIO, DMA and UDMA
-	  speeds and to configure the chip to optimum performance.
-
-config BLK_DEV_ALI15X3
-	tristate "ALI M15x3 chipset support"
-	select IDE_TIMINGS
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver ensures (U)DMA support for ALI 1533, 1543 and 1543C
-	  onboard chipsets.  It also tests for Simplex mode and enables
-	  normal dual channel support.
-
-	  Please read the comments at the top of
-	  <file:drivers/ide/alim15x3.c>.
-
-	  If unsure, say N.
-
-config BLK_DEV_AMD74XX
-	tristate "AMD and nVidia IDE support"
-	depends on !ARM
-	select IDE_TIMINGS
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds explicit support for AMD-7xx and AMD-8111 chips
-	  and also for the nVidia nForce chip.  This allows the kernel to
-	  change PIO, DMA and UDMA speeds and to configure the chip to
-	  optimum performance.
-
-config BLK_DEV_ATIIXP
-	tristate "ATI IXP chipset IDE support"
-	depends on X86
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds explicit support for ATI IXP chipset.
-	  This allows the kernel to change PIO, DMA and UDMA speeds
-	  and to configure the chip to optimum performance.
-
-	  Say Y here if you have an ATI IXP chipset IDE controller.
-
-config BLK_DEV_CMD64X
-	tristate "CMD64{3|6|8|9} chipset support"
-	select IDE_TIMINGS
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  Say Y here if you have an IDE controller which uses any of these
-	  chipsets: CMD643, CMD646, or CMD648.
-
-config BLK_DEV_TRIFLEX
-	tristate "Compaq Triflex IDE support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  Say Y here if you have a Compaq Triflex IDE controller, such
-	  as those commonly found on Compaq Pentium-Pro systems
-
-config BLK_DEV_CY82C693
-	tristate "CY82C693 chipset support"
-	depends on ALPHA
-	select IDE_TIMINGS
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds detection and support for the CY82C693 chipset
-	  used on Digital's PC-Alpha 164SX boards.
-
-config BLK_DEV_CS5520
-	tristate "Cyrix CS5510/20 MediaGX chipset support (VERY EXPERIMENTAL)"
-	depends on X86_32 || COMPILE_TEST
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  Include support for PIO tuning and virtual DMA on the Cyrix MediaGX
-	  5510/5520 chipset. This will automatically be detected and
-	  configured if found.
-
-	  It is safe to say Y to this question.
-
-config BLK_DEV_CS5530
-	tristate "Cyrix/National Semiconductor CS5530 MediaGX chipset support"
-	depends on X86_32 || COMPILE_TEST
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  Include support for UDMA on the Cyrix MediaGX 5530 chipset. This
-	  will automatically be detected and configured if found.
-
-	  It is safe to say Y to this question.
-
-config BLK_DEV_CS5535
-	tristate "AMD CS5535 chipset support"
-	depends on X86_32
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  Include support for UDMA on the NSC/AMD CS5535 companion chipset.
-	  This will automatically be detected and configured if found.
-
-	  It is safe to say Y to this question.
-
-config BLK_DEV_CS5536
-	tristate "CS5536 chipset support"
-	depends on X86_32
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This option enables support for the AMD CS5536
-	  companion chip used with the Geode LX processor family.
-
-	  If unsure, say N.
-
-config BLK_DEV_HPT366
-	tristate "HPT36X/37X chipset support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  HPT366 is an Ultra DMA chipset for ATA-66.
-	  HPT368 is an Ultra DMA chipset for ATA-66 RAID Based.
-	  HPT370 is an Ultra DMA chipset for ATA-100.
-	  HPT372 is an Ultra DMA chipset for ATA-100.
-	  HPT374 is an Ultra DMA chipset for ATA-100.
-
-	  This driver adds up to 4 more EIDE devices sharing a single
-	  interrupt.
-
-	  The HPT366 chipset in its current form is bootable. One solution
-	  for this problem are special LILO commands for redirecting the
-	  reference to device 0x80. The other solution is to say Y to "Boot
-	  off-board chipsets first support" (CONFIG_BLK_DEV_OFFBOARD) unless
-	  your mother board has the chipset natively mounted. Regardless one
-	  should use the fore mentioned option and call at LILO.
-
-	  This driver requires dynamic tuning of the chipset during the
-	  ide-probe at boot. It is reported to support DVD II drives, by the
-	  manufacturer.
-
-config BLK_DEV_JMICRON
-	tristate "JMicron JMB36x support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  Basic support for the JMicron ATA controllers. For full support
-	  use the libata drivers.
-
-config BLK_DEV_SC1200
-	tristate "National SCx200 chipset support"
-	depends on X86_32 || COMPILE_TEST
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds support for the on-board IDE controller on the
-	  National SCx200 series of embedded x86 "Geode" systems.
-
-config BLK_DEV_PIIX
-	tristate "Intel PIIX/ICH chipsets support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds explicit support for Intel PIIX and ICH chips.
-	  This allows the kernel to change PIO, DMA and UDMA speeds and to
-	  configure the chip to optimum performance.
-
-config BLK_DEV_IT8172
-	tristate "IT8172 IDE support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds support for the IDE controller on the
-	  IT8172 System Controller.
-
-config BLK_DEV_IT8213
-	tristate "IT8213 IDE support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds support for the ITE 8213 IDE controller.
-
-config BLK_DEV_IT821X
-	tristate "IT821X IDE support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds support for the ITE 8211 IDE controller and the
-	  IT 8212 IDE RAID controller in both RAID and pass-through mode.
-
-config BLK_DEV_NS87415
-	tristate "NS87415 chipset support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds detection and support for the NS87415 chip
-	  (used mainly on SPARC64 and PA-RISC machines).
-
-	  Please read the comments at the top of <file:drivers/ide/ns87415.c>.
-
-config BLK_DEV_PDC202XX_OLD
-	tristate "PROMISE PDC202{46|62|65|67} support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  Promise Ultra33 or PDC20246
-	  Promise Ultra66 or PDC20262
-	  Promise Ultra100 or PDC20265/PDC20267/PDC20268
-
-	  This driver adds up to 4 more EIDE devices sharing a single
-	  interrupt. This add-on card is a bootable PCI UDMA controller. Since
-	  multiple cards can be installed and there are BIOS ROM problems that
-	  happen if the BIOS revisions of all installed cards (three-max) do
-	  not match, the driver attempts to do dynamic tuning of the chipset
-	  at boot-time for max-speed.  Ultra33 BIOS 1.25 or newer is required
-	  for more than one card.
-
-	  Please read the comments at the top of
-	  <file:drivers/ide/pdc202xx_old.c>.
-
-	  If unsure, say N.
-
-config BLK_DEV_PDC202XX_NEW
-	tristate "PROMISE PDC202{68|69|70|71|75|76|77} support"
-	select BLK_DEV_IDEDMA_PCI
-
-config BLK_DEV_SVWKS
-	tristate "ServerWorks OSB4/CSB5/CSB6 chipsets support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds PIO/(U)DMA support for the ServerWorks OSB4/CSB5
-	  chipsets.
-
-config BLK_DEV_SIIMAGE
-	tristate "Silicon Image chipset support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds PIO/(U)DMA support for the SI CMD680 and SII
-	  3112 (Serial ATA) chips.
-
-config BLK_DEV_SIS5513
-	tristate "SiS5513 chipset support"
-	depends on X86
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver ensures (U)DMA support for SIS5513 chipset family based
-	  mainboards.
-
-	  The following chipsets are supported:
-	  ATA16:  SiS5511, SiS5513
-	  ATA33:  SiS5591, SiS5597, SiS5598, SiS5600
-	  ATA66:  SiS530, SiS540, SiS620, SiS630, SiS640
-	  ATA100: SiS635, SiS645, SiS650, SiS730, SiS735, SiS740,
-	  SiS745, SiS750
-
-	  Please read the comments at the top of <file:drivers/ide/sis5513.c>.
-
-config BLK_DEV_SL82C105
-	tristate "Winbond SL82c105 support"
-	depends on (PPC || ARM)
-	select IDE_TIMINGS
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  If you have a Winbond SL82c105 IDE controller, say Y here to enable
-	  special configuration for this chip. This is common on various CHRP
-	  motherboards, but could be used elsewhere. If in doubt, say Y.
-
-config BLK_DEV_SLC90E66
-	tristate "SLC90E66 chipset support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver ensures (U)DMA support for Victory66 SouthBridges for
-	  SMsC with Intel NorthBridges.  This is an Ultra66 based chipset.
-	  The nice thing about it is that you can mix Ultra/DMA/PIO devices
-	  and it will handle timing cycles.  Since this is an improved
-	  look-a-like to the PIIX4 it should be a nice addition.
-
-	  Please read the comments at the top of
-	  <file:drivers/ide/slc90e66.c>.
-
-config BLK_DEV_TRM290
-	tristate "Tekram TRM290 chipset support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds support for bus master DMA transfers
-	  using the Tekram TRM290 PCI IDE chip. Volunteers are
-	  needed for further tweaking and development.
-	  Please read the comments at the top of <file:drivers/ide/trm290.c>.
-
-config BLK_DEV_VIA82CXXX
-	tristate "VIA82CXXX chipset support"
-	select IDE_TIMINGS
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver adds explicit support for VIA BusMastering IDE chips.
-	  This allows the kernel to change PIO, DMA and UDMA speeds and to
-	  configure the chip to optimum performance.
-
-config BLK_DEV_TC86C001
-	tristate "Toshiba TC86C001 support"
-	select BLK_DEV_IDEDMA_PCI
-	help
-	This driver adds support for Toshiba TC86C001 GOKU-S chip.
-
-endif
-
-# TODO: BLK_DEV_IDEDMA_PCI -> BLK_DEV_IDEDMA_SFF
-config BLK_DEV_IDE_PMAC
-	tristate "PowerMac on-board IDE support"
-	depends on PPC_PMAC
-	select IDE_TIMINGS
-	select BLK_DEV_IDEDMA_PCI
-	help
-	  This driver provides support for the on-board IDE controller on
-	  most of the recent Apple Power Macintoshes and PowerBooks.
-	  If unsure, say Y.
-
-config BLK_DEV_IDE_PMAC_ATA100FIRST
-	bool "Probe on-board ATA/100 (Kauai) first"
-	depends on BLK_DEV_IDE_PMAC
-	help
-	  This option will cause the ATA/100 controller found in UniNorth2
-	  based machines (Windtunnel PowerMac, Aluminium PowerBooks, ...)
-	  to be probed before the ATA/66 and ATA/33 controllers. Without
-	  these, those machine used to have the hard disk on hdc and the
-	  CD-ROM on hda. This option changes this to more natural hda for
-	  hard disk and hdc for CD-ROM.
-
-config BLK_DEV_IDE_TX4938
-	tristate "TX4938 internal IDE support"
-	depends on SOC_TX4938
-	select IDE_TIMINGS
-
-config BLK_DEV_IDE_TX4939
-	tristate "TX4939 internal IDE support"
-	depends on SOC_TX4939
-	select BLK_DEV_IDEDMA_SFF
-
-config BLK_DEV_IDE_ICSIDE
-	tristate "ICS IDE interface support"
-	depends on ARM && ARCH_ACORN
-	help
-	  On Acorn systems, say Y here if you wish to use the ICS IDE
-	  interface card.  This is not required for ICS partition support.
-	  If you are unsure, say N to this.
-
-config BLK_DEV_IDEDMA_ICS
-	bool "ICS DMA support"
-	depends on BLK_DEV_IDE_ICSIDE
-	help
-	  Say Y here if you want to add DMA (Direct Memory Access) support to
-	  the ICS IDE driver.
-
-config BLK_DEV_IDE_RAPIDE
-	tristate "RapIDE interface support"
-	depends on ARM && ARCH_ACORN
-	help
-	  Say Y here if you want to support the Yellowstone RapIDE controller
-	  manufactured for use with Acorn computers.
-
-config BLK_DEV_GAYLE
-	tristate "Amiga Gayle IDE interface support"
-	depends on AMIGA
-	help
-	  This is the IDE driver for the Amiga Gayle IDE interface. It supports
-	  both the `A1200 style' and `A4000 style' of the Gayle IDE interface,
-	  This includes on-board IDE interfaces on some Amiga models (A600,
-	  A1200, A4000, and A4000T), and IDE interfaces on the Zorro expansion
-	  bus (M-Tech E-Matrix 530 expansion card).
-
-	  It also provides support for the so-called `IDE doublers' (made
-	  by various manufacturers, e.g. Eyetech) that can be connected to
-	  the on-board IDE interface of some Amiga models. Using such an IDE
-	  doubler, you can connect up to four instead of two IDE devices to
-	  the Amiga's on-board IDE interface. The feature is enabled at kernel
-	  runtime using the "gayle.doubler" kernel boot parameter.
-
-	  Say Y if you have an Amiga with a Gayle IDE interface and want to use
-	  IDE devices (hard disks, CD-ROM drives, etc.) that are connected to
-	  it.
-
-	  Note that you also have to enable Zorro bus support if you want to
-	  use Gayle IDE interfaces on the Zorro expansion bus.
-
-config BLK_DEV_BUDDHA
-	tristate "Buddha/Catweasel/X-Surf IDE interface support"
-	depends on ZORRO
-	help
-	  This is the IDE driver for the IDE interfaces on the Buddha, Catweasel
-	  and X-Surf expansion boards.  It supports up to two interfaces on the
-	  Buddha, three on the Catweasel and two on the X-Surf.
-
-	  Say Y if you have a Buddha or Catweasel expansion board and want to
-	  use IDE devices (hard disks, CD-ROM drives, etc.) that are connected
-	  to one of its IDE interfaces.
-
-config BLK_DEV_FALCON_IDE
-	tristate "Falcon and Q40/Q60 IDE interface support"
-	depends on ATARI || Q40
-	help
-	  This is the IDE driver for the on-board IDE interface on the Atari
-	  Falcon and Q40/Q60. Say Y if you have such a machine and want to use
-	  IDE devices (hard disks, CD-ROM drives, etc.) that are connected to
-	  the on-board IDE interface.
-
-config BLK_DEV_PALMCHIP_BK3710
-	tristate "Palmchip bk3710 IDE controller support"
-	depends on ARCH_DAVINCI
-	select IDE_TIMINGS
-	select BLK_DEV_IDEDMA_SFF
-	help
-	  Say Y here if you want to support the onchip IDE controller on the
-	  TI DaVinci SoC
-
-# no isa -> no vlb
-if ISA && (ALPHA || X86 || MIPS)
-
-comment "Other IDE chipsets support"
-comment "Note: most of these also require special kernel boot parameters"
-
-config BLK_DEV_4DRIVES
-	tristate "Generic 4 drives/port support"
-	help
-	  Certain older chipsets, including the Tekram 690CD, use a single set
-	  of I/O ports at 0x1f0 to control up to four drives, instead of the
-	  customary two drives per port. Support for this can be enabled at
-	  runtime using the "ide-4drives.probe" kernel boot parameter if you
-	  say Y here.
-
-config BLK_DEV_ALI14XX
-	tristate "ALI M14xx support"
-	select IDE_TIMINGS
-	select IDE_LEGACY
-	help
-	  This driver is enabled at runtime using the "ali14xx.probe" kernel
-	  boot parameter.  It enables support for the secondary IDE interface
-	  of the ALI M1439/1443/1445/1487/1489 chipsets, and permits faster
-	  I/O speeds to be set as well.
-	  See the files <file:Documentation/ide/ide.rst> and
-	  <file:drivers/ide/ali14xx.c> for more info.
-
-config BLK_DEV_DTC2278
-	tristate "DTC-2278 support"
-	select IDE_XFER_MODE
-	select IDE_LEGACY
-	help
-	  This driver is enabled at runtime using the "dtc2278.probe" kernel
-	  boot parameter. It enables support for the secondary IDE interface
-	  of the DTC-2278 card, and permits faster I/O speeds to be set as
-	  well. See the <file:Documentation/ide/ide.rst> and
-	  <file:drivers/ide/dtc2278.c> files for more info.
-
-config BLK_DEV_HT6560B
-	tristate "Holtek HT6560B support"
-	select IDE_TIMINGS
-	select IDE_LEGACY
-	help
-	  This driver is enabled at runtime using the "ht6560b.probe" kernel
-	  boot parameter. It enables support for the secondary IDE interface
-	  of the Holtek card, and permits faster I/O speeds to be set as well.
-	  See the <file:Documentation/ide/ide.rst> and
-	  <file:drivers/ide/ht6560b.c> files for more info.
-
-config BLK_DEV_QD65XX
-	tristate "QDI QD65xx support"
-	select IDE_TIMINGS
-	select IDE_LEGACY
-	help
-	  This driver is enabled at runtime using the "qd65xx.probe" kernel
-	  boot parameter.  It permits faster I/O speeds to be set.  See the
-	  <file:Documentation/ide/ide.rst> and <file:drivers/ide/qd65xx.c>
-	  for more info.
-
-config BLK_DEV_UMC8672
-	tristate "UMC-8672 support"
-	select IDE_XFER_MODE
-	select IDE_LEGACY
-	help
-	  This driver is enabled at runtime using the "umc8672.probe" kernel
-	  boot parameter. It enables support for the secondary IDE interface
-	  of the UMC-8672, and permits faster I/O speeds to be set as well.
-	  See the files <file:Documentation/ide/ide.rst> and
-	  <file:drivers/ide/umc8672.c> for more info.
-
-endif
-
-config BLK_DEV_IDEDMA
-	def_bool BLK_DEV_IDEDMA_SFF || BLK_DEV_IDEDMA_ICS
-	select IDE_XFER_MODE
-
-endif # IDE
diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile
deleted file mode 100644
index 991eb72a786bc..0000000000000
--- a/drivers/ide/Makefile
+++ /dev/null
@@ -1,109 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# link order is important here
-#
-
-ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \
-	      ide-taskfile.o ide-pm.o ide-park.o ide-sysfs.o ide-devsets.o \
-	      ide-io-std.o ide-eh.o
-
-# core IDE code
-ide-core-$(CONFIG_IDE_XFER_MODE)	+= ide-pio-blacklist.o ide-xfer-mode.o
-ide-core-$(CONFIG_IDE_TIMINGS)		+= ide-timings.o
-ide-core-$(CONFIG_IDE_ATAPI)		+= ide-atapi.o
-ide-core-$(CONFIG_BLK_DEV_IDEPCI)	+= setup-pci.o
-ide-core-$(CONFIG_BLK_DEV_IDEDMA)	+= ide-dma.o
-ide-core-$(CONFIG_BLK_DEV_IDEDMA_SFF)	+= ide-dma-sff.o
-ide-core-$(CONFIG_IDE_PROC_FS)		+= ide-proc.o
-ide-core-$(CONFIG_BLK_DEV_IDEACPI)	+= ide-acpi.o
-ide-core-$(CONFIG_IDE_LEGACY)		+= ide-legacy.o
-
-obj-$(CONFIG_IDE)			+= ide-core.o
-
-obj-$(CONFIG_BLK_DEV_ALI14XX)		+= ali14xx.o
-obj-$(CONFIG_BLK_DEV_UMC8672)		+= umc8672.o
-obj-$(CONFIG_BLK_DEV_DTC2278)		+= dtc2278.o
-obj-$(CONFIG_BLK_DEV_HT6560B)		+= ht6560b.o
-obj-$(CONFIG_BLK_DEV_QD65XX)		+= qd65xx.o
-obj-$(CONFIG_BLK_DEV_4DRIVES)		+= ide-4drives.o
-
-obj-$(CONFIG_BLK_DEV_GAYLE)		+= gayle.o
-obj-$(CONFIG_BLK_DEV_FALCON_IDE)	+= falconide.o
-obj-$(CONFIG_BLK_DEV_BUDDHA)		+= buddha.o
-
-obj-$(CONFIG_BLK_DEV_AEC62XX)		+= aec62xx.o
-obj-$(CONFIG_BLK_DEV_ALI15X3)		+= alim15x3.o
-obj-$(CONFIG_BLK_DEV_AMD74XX)		+= amd74xx.o
-obj-$(CONFIG_BLK_DEV_ATIIXP)		+= atiixp.o
-obj-$(CONFIG_BLK_DEV_CMD64X)		+= cmd64x.o
-obj-$(CONFIG_BLK_DEV_CS5520)		+= cs5520.o
-obj-$(CONFIG_BLK_DEV_CS5530)		+= cs5530.o
-obj-$(CONFIG_BLK_DEV_CS5535)		+= cs5535.o
-obj-$(CONFIG_BLK_DEV_CS5536)		+= cs5536.o
-obj-$(CONFIG_BLK_DEV_SC1200)		+= sc1200.o
-obj-$(CONFIG_BLK_DEV_CY82C693)		+= cy82c693.o
-obj-$(CONFIG_BLK_DEV_DELKIN)		+= delkin_cb.o
-obj-$(CONFIG_BLK_DEV_HPT366)		+= hpt366.o
-obj-$(CONFIG_BLK_DEV_IT8172)		+= it8172.o
-obj-$(CONFIG_BLK_DEV_IT8213)		+= it8213.o
-obj-$(CONFIG_BLK_DEV_IT821X)		+= it821x.o
-obj-$(CONFIG_BLK_DEV_JMICRON)		+= jmicron.o
-obj-$(CONFIG_BLK_DEV_NS87415)		+= ns87415.o
-obj-$(CONFIG_BLK_DEV_OPTI621)		+= opti621.o
-obj-$(CONFIG_BLK_DEV_PDC202XX_OLD)	+= pdc202xx_old.o
-obj-$(CONFIG_BLK_DEV_PDC202XX_NEW)	+= pdc202xx_new.o
-obj-$(CONFIG_BLK_DEV_PIIX)		+= piix.o
-obj-$(CONFIG_BLK_DEV_RZ1000)		+= rz1000.o
-obj-$(CONFIG_BLK_DEV_SVWKS)		+= serverworks.o
-obj-$(CONFIG_BLK_DEV_SIIMAGE)		+= siimage.o
-obj-$(CONFIG_BLK_DEV_SIS5513)		+= sis5513.o
-obj-$(CONFIG_BLK_DEV_SL82C105)		+= sl82c105.o
-obj-$(CONFIG_BLK_DEV_SLC90E66)		+= slc90e66.o
-obj-$(CONFIG_BLK_DEV_TC86C001)		+= tc86c001.o
-obj-$(CONFIG_BLK_DEV_TRIFLEX)		+= triflex.o
-obj-$(CONFIG_BLK_DEV_TRM290)		+= trm290.o
-obj-$(CONFIG_BLK_DEV_VIA82CXXX)		+= via82cxxx.o
-
-# Must appear at the end of the block
-obj-$(CONFIG_BLK_DEV_GENERIC)		+= ide-pci-generic.o
-
-obj-$(CONFIG_IDEPCI_PCIBUS_ORDER)	+= ide-scan-pci.o
-
-obj-$(CONFIG_BLK_DEV_CMD640)		+= cmd640.o
-
-obj-$(CONFIG_BLK_DEV_IDE_PMAC)		+= pmac.o
-
-obj-$(CONFIG_IDE_GENERIC)		+= ide-generic.o
-obj-$(CONFIG_BLK_DEV_IDEPNP)		+= ide-pnp.o
-
-ide-gd_mod-y += ide-gd.o
-ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o
-
-ifeq ($(CONFIG_IDE_GD_ATA), y)
-	ide-gd_mod-y += ide-disk.o ide-disk_ioctl.o
-ifeq ($(CONFIG_IDE_PROC_FS), y)
-	ide-gd_mod-y += ide-disk_proc.o
-endif
-endif
-
-ifeq ($(CONFIG_IDE_GD_ATAPI), y)
-	ide-gd_mod-y += ide-floppy.o ide-floppy_ioctl.o
-ifeq ($(CONFIG_IDE_PROC_FS), y)
-	ide-gd_mod-y += ide-floppy_proc.o
-endif
-endif
-
-obj-$(CONFIG_IDE_GD)			+= ide-gd_mod.o
-obj-$(CONFIG_BLK_DEV_IDECD)		+= ide-cd_mod.o
-obj-$(CONFIG_BLK_DEV_IDETAPE)		+= ide-tape.o
-
-obj-$(CONFIG_BLK_DEV_IDECS)		+= ide-cs.o
-
-obj-$(CONFIG_BLK_DEV_PLATFORM)		+= ide_platform.o
-
-obj-$(CONFIG_BLK_DEV_IDE_ICSIDE)	+= icside.o
-obj-$(CONFIG_BLK_DEV_IDE_RAPIDE)	+= rapide.o
-obj-$(CONFIG_BLK_DEV_PALMCHIP_BK3710)	+= palm_bk3710.o
-
-obj-$(CONFIG_BLK_DEV_IDE_TX4938)	+= tx4938ide.o
-obj-$(CONFIG_BLK_DEV_IDE_TX4939)	+= tx4939ide.o
diff --git a/drivers/ide/aec62xx.c b/drivers/ide/aec62xx.c
deleted file mode 100644
index 4c959ce41ba9a..0000000000000
--- a/drivers/ide/aec62xx.c
+++ /dev/null
@@ -1,331 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 1999-2002	Andre Hedrick <andre@linux-ide.org>
- * Copyright (C) 2007		MontaVista Software, Inc. <source@mvista.com>
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "aec62xx"
-
-struct chipset_bus_clock_list_entry {
-	u8 xfer_speed;
-	u8 chipset_settings;
-	u8 ultra_settings;
-};
-
-static const struct chipset_bus_clock_list_entry aec6xxx_33_base [] = {
-	{	XFER_UDMA_6,	0x31,	0x07	},
-	{	XFER_UDMA_5,	0x31,	0x06	},
-	{	XFER_UDMA_4,	0x31,	0x05	},
-	{	XFER_UDMA_3,	0x31,	0x04	},
-	{	XFER_UDMA_2,	0x31,	0x03	},
-	{	XFER_UDMA_1,	0x31,	0x02	},
-	{	XFER_UDMA_0,	0x31,	0x01	},
-
-	{	XFER_MW_DMA_2,	0x31,	0x00	},
-	{	XFER_MW_DMA_1,	0x31,	0x00	},
-	{	XFER_MW_DMA_0,	0x0a,	0x00	},
-	{	XFER_PIO_4,	0x31,	0x00	},
-	{	XFER_PIO_3,	0x33,	0x00	},
-	{	XFER_PIO_2,	0x08,	0x00	},
-	{	XFER_PIO_1,	0x0a,	0x00	},
-	{	XFER_PIO_0,	0x00,	0x00	},
-	{	0,		0x00,	0x00	}
-};
-
-static const struct chipset_bus_clock_list_entry aec6xxx_34_base [] = {
-	{	XFER_UDMA_6,	0x41,	0x06	},
-	{	XFER_UDMA_5,	0x41,	0x05	},
-	{	XFER_UDMA_4,	0x41,	0x04	},
-	{	XFER_UDMA_3,	0x41,	0x03	},
-	{	XFER_UDMA_2,	0x41,	0x02	},
-	{	XFER_UDMA_1,	0x41,	0x01	},
-	{	XFER_UDMA_0,	0x41,	0x01	},
-
-	{	XFER_MW_DMA_2,	0x41,	0x00	},
-	{	XFER_MW_DMA_1,	0x42,	0x00	},
-	{	XFER_MW_DMA_0,	0x7a,	0x00	},
-	{	XFER_PIO_4,	0x41,	0x00	},
-	{	XFER_PIO_3,	0x43,	0x00	},
-	{	XFER_PIO_2,	0x78,	0x00	},
-	{	XFER_PIO_1,	0x7a,	0x00	},
-	{	XFER_PIO_0,	0x70,	0x00	},
-	{	0,		0x00,	0x00	}
-};
-
-/*
- * TO DO: active tuning and correction of cards without a bios.
- */
-static u8 pci_bus_clock_list (u8 speed, struct chipset_bus_clock_list_entry * chipset_table)
-{
-	for ( ; chipset_table->xfer_speed ; chipset_table++)
-		if (chipset_table->xfer_speed == speed) {
-			return chipset_table->chipset_settings;
-		}
-	return chipset_table->chipset_settings;
-}
-
-static u8 pci_bus_clock_list_ultra (u8 speed, struct chipset_bus_clock_list_entry * chipset_table)
-{
-	for ( ; chipset_table->xfer_speed ; chipset_table++)
-		if (chipset_table->xfer_speed == speed) {
-			return chipset_table->ultra_settings;
-		}
-	return chipset_table->ultra_settings;
-}
-
-static void aec6210_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	struct ide_host *host	= pci_get_drvdata(dev);
-	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
-	u16 d_conf		= 0;
-	u8 ultra = 0, ultra_conf = 0;
-	u8 tmp0 = 0, tmp1 = 0, tmp2 = 0;
-	const u8 speed = drive->dma_mode;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	/* 0x40|(2*drive->dn): Active, 0x41|(2*drive->dn): Recovery */
-	pci_read_config_word(dev, 0x40|(2*drive->dn), &d_conf);
-	tmp0 = pci_bus_clock_list(speed, bus_clock);
-	d_conf = ((tmp0 & 0xf0) << 4) | (tmp0 & 0xf);
-	pci_write_config_word(dev, 0x40|(2*drive->dn), d_conf);
-
-	tmp1 = 0x00;
-	tmp2 = 0x00;
-	pci_read_config_byte(dev, 0x54, &ultra);
-	tmp1 = ((0x00 << (2*drive->dn)) | (ultra & ~(3 << (2*drive->dn))));
-	ultra_conf = pci_bus_clock_list_ultra(speed, bus_clock);
-	tmp2 = ((ultra_conf << (2*drive->dn)) | (tmp1 & ~(3 << (2*drive->dn))));
-	pci_write_config_byte(dev, 0x54, tmp2);
-	local_irq_restore(flags);
-}
-
-static void aec6260_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	struct ide_host *host	= pci_get_drvdata(dev);
-	struct chipset_bus_clock_list_entry *bus_clock = host->host_priv;
-	u8 unit			= drive->dn & 1;
-	u8 tmp1 = 0, tmp2 = 0;
-	u8 ultra = 0, drive_conf = 0, ultra_conf = 0;
-	const u8 speed = drive->dma_mode;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	/* high 4-bits: Active, low 4-bits: Recovery */
-	pci_read_config_byte(dev, 0x40|drive->dn, &drive_conf);
-	drive_conf = pci_bus_clock_list(speed, bus_clock);
-	pci_write_config_byte(dev, 0x40|drive->dn, drive_conf);
-
-	pci_read_config_byte(dev, (0x44|hwif->channel), &ultra);
-	tmp1 = ((0x00 << (4*unit)) | (ultra & ~(7 << (4*unit))));
-	ultra_conf = pci_bus_clock_list_ultra(speed, bus_clock);
-	tmp2 = ((ultra_conf << (4*unit)) | (tmp1 & ~(7 << (4*unit))));
-	pci_write_config_byte(dev, (0x44|hwif->channel), tmp2);
-	local_irq_restore(flags);
-}
-
-static void aec_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	drive->dma_mode = drive->pio_mode;
-	hwif->port_ops->set_dma_mode(hwif, drive);
-}
-
-static int init_chipset_aec62xx(struct pci_dev *dev)
-{
-	/* These are necessary to get AEC6280 Macintosh cards to work */
-	if ((dev->device == PCI_DEVICE_ID_ARTOP_ATP865) ||
-	    (dev->device == PCI_DEVICE_ID_ARTOP_ATP865R)) {
-		u8 reg49h = 0, reg4ah = 0;
-		/* Clear reset and test bits.  */
-		pci_read_config_byte(dev, 0x49, &reg49h);
-		pci_write_config_byte(dev, 0x49, reg49h & ~0x30);
-		/* Enable chip interrupt output.  */
-		pci_read_config_byte(dev, 0x4a, &reg4ah);
-		pci_write_config_byte(dev, 0x4a, reg4ah & ~0x01);
-		/* Enable burst mode. */
-		pci_read_config_byte(dev, 0x4a, &reg4ah);
-		pci_write_config_byte(dev, 0x4a, reg4ah | 0x80);
-	}
-
-	return 0;
-}
-
-static u8 atp86x_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	u8 ata66 = 0, mask = hwif->channel ? 0x02 : 0x01;
-
-	pci_read_config_byte(dev, 0x49, &ata66);
-
-	return (ata66 & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
-}
-
-static const struct ide_port_ops atp850_port_ops = {
-	.set_pio_mode		= aec_set_pio_mode,
-	.set_dma_mode		= aec6210_set_mode,
-};
-
-static const struct ide_port_ops atp86x_port_ops = {
-	.set_pio_mode		= aec_set_pio_mode,
-	.set_dma_mode		= aec6260_set_mode,
-	.cable_detect		= atp86x_cable_detect,
-};
-
-static const struct ide_port_info aec62xx_chipsets[] = {
-	{	/* 0: AEC6210 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_aec62xx,
-		.enablebits	= {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},
-		.port_ops	= &atp850_port_ops,
-		.host_flags	= IDE_HFLAG_SERIALIZE |
-				  IDE_HFLAG_NO_ATAPI_DMA |
-				  IDE_HFLAG_NO_DSC |
-				  IDE_HFLAG_OFF_BOARD,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA2,
-	},
-	{	/* 1: AEC6260 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_aec62xx,
-		.port_ops	= &atp86x_port_ops,
-		.host_flags	= IDE_HFLAG_NO_ATAPI_DMA | IDE_HFLAG_NO_AUTODMA |
-				  IDE_HFLAG_OFF_BOARD,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA4,
-	},
-	{	/* 2: AEC6260R */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_aec62xx,
-		.enablebits	= {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},
-		.port_ops	= &atp86x_port_ops,
-		.host_flags	= IDE_HFLAG_NO_ATAPI_DMA |
-				  IDE_HFLAG_NON_BOOTABLE,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA4,
-	},
-	{	/* 3: AEC6280 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_aec62xx,
-		.port_ops	= &atp86x_port_ops,
-		.host_flags	= IDE_HFLAG_NO_ATAPI_DMA |
-				  IDE_HFLAG_OFF_BOARD,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA5,
-	},
-	{	/* 4: AEC6280R */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_aec62xx,
-		.enablebits	= {{0x4a,0x02,0x02}, {0x4a,0x04,0x04}},
-		.port_ops	= &atp86x_port_ops,
-		.host_flags	= IDE_HFLAG_NO_ATAPI_DMA |
-				  IDE_HFLAG_OFF_BOARD,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA5,
-	}
-};
-
-/**
- *	aec62xx_init_one	-	called when a AEC is found
- *	@dev: the aec62xx device
- *	@id: the matching pci id
- *
- *	Called when the PCI registration layer (or the IDE initialization)
- *	finds a device matching our IDE device tables.
- *
- *	NOTE: since we're going to modify the 'name' field for AEC-6[26]80[R]
- *	chips, pass a local copy of 'struct ide_port_info' down the call chain.
- */
-
-static int aec62xx_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	const struct chipset_bus_clock_list_entry *bus_clock;
-	struct ide_port_info d;
-	u8 idx = id->driver_data;
-	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
-	int err;
-
-	if (bus_speed <= 33)
-		bus_clock = aec6xxx_33_base;
-	else
-		bus_clock = aec6xxx_34_base;
-
-	err = pci_enable_device(dev);
-	if (err)
-		return err;
-
-	d = aec62xx_chipsets[idx];
-
-	if (idx == 3 || idx == 4) {
-		unsigned long dma_base = pci_resource_start(dev, 4);
-
-		if (inb(dma_base + 2) & 0x10) {
-			printk(KERN_INFO DRV_NAME " %s: AEC6880%s card detected"
-				"\n", pci_name(dev), (idx == 4) ? "R" : "");
-			d.udma_mask = ATA_UDMA6;
-		}
-	}
-
-	err = ide_pci_init_one(dev, &d, (void *)bus_clock);
-	if (err)
-		pci_disable_device(dev);
-
-	return err;
-}
-
-static void aec62xx_remove(struct pci_dev *dev)
-{
-	ide_pci_remove(dev);
-	pci_disable_device(dev);
-}
-
-static const struct pci_device_id aec62xx_pci_tbl[] = {
-	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP850UF), 0 },
-	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP860),   1 },
-	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP860R),  2 },
-	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP865),   3 },
-	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP865R),  4 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, aec62xx_pci_tbl);
-
-static struct pci_driver aec62xx_pci_driver = {
-	.name		= "AEC62xx_IDE",
-	.id_table	= aec62xx_pci_tbl,
-	.probe		= aec62xx_init_one,
-	.remove		= aec62xx_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init aec62xx_ide_init(void)
-{
-	return ide_pci_register_driver(&aec62xx_pci_driver);
-}
-
-static void __exit aec62xx_ide_exit(void)
-{
-	pci_unregister_driver(&aec62xx_pci_driver);
-}
-
-module_init(aec62xx_ide_init);
-module_exit(aec62xx_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for ARTOP AEC62xx IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ali14xx.c b/drivers/ide/ali14xx.c
deleted file mode 100644
index 3268931c2c7a2..0000000000000
--- a/drivers/ide/ali14xx.c
+++ /dev/null
@@ -1,250 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1996  Linus Torvalds & author (see below)
- */
-
-/*
- * ALI M14xx chipset EIDE controller
- *
- * Works for ALI M1439/1443/1445/1487/1489 chipsets.
- *
- * Adapted from code developed by derekn@vw.ece.cmu.edu.  -ml
- * Derek's notes follow:
- *
- * I think the code should be pretty understandable,
- * but I'll be happy to (try to) answer questions.
- *
- * The critical part is in the setupDrive function.  The initRegisters
- * function doesn't seem to be necessary, but the DOS driver does it, so
- * I threw it in.
- *
- * I've only tested this on my system, which only has one disk.  I posted
- * it to comp.sys.linux.hardware, so maybe some other people will try it
- * out.
- *
- * Derek Noonburg  (derekn@ece.cmu.edu)
- * 95-sep-26
- *
- * Update 96-jul-13:
- *
- * I've since upgraded to two disks and a CD-ROM, with no trouble, and
- * I've also heard from several others who have used it successfully.
- * This driver appears to work with both the 1443/1445 and the 1487/1489
- * chipsets.  I've added support for PIO mode 4 for the 1487.  This
- * seems to work just fine on the 1443 also, although I'm not sure it's
- * advertised as supporting mode 4.  (I've been running a WDC AC21200 in
- * mode 4 for a while now with no trouble.)  -Derek
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/ioport.h>
-#include <linux/blkdev.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "ali14xx"
-
-/* port addresses for auto-detection */
-#define ALI_NUM_PORTS 4
-static const int ports[ALI_NUM_PORTS] __initconst =
-	{ 0x074, 0x0f4, 0x034, 0x0e4 };
-
-/* register initialization data */
-typedef struct { u8 reg, data; } RegInitializer;
-
-static const RegInitializer initData[] __initconst = {
-	{0x01, 0x0f}, {0x02, 0x00}, {0x03, 0x00}, {0x04, 0x00},
-	{0x05, 0x00}, {0x06, 0x00}, {0x07, 0x2b}, {0x0a, 0x0f},
-	{0x25, 0x00}, {0x26, 0x00}, {0x27, 0x00}, {0x28, 0x00},
-	{0x29, 0x00}, {0x2a, 0x00}, {0x2f, 0x00}, {0x2b, 0x00},
-	{0x2c, 0x00}, {0x2d, 0x00}, {0x2e, 0x00}, {0x30, 0x00},
-	{0x31, 0x00}, {0x32, 0x00}, {0x33, 0x00}, {0x34, 0xff},
-	{0x35, 0x03}, {0x00, 0x00}
-};
-
-/* timing parameter registers for each drive */
-static struct { u8 reg1, reg2, reg3, reg4; } regTab[4] = {
-	{0x03, 0x26, 0x04, 0x27},     /* drive 0 */
-	{0x05, 0x28, 0x06, 0x29},     /* drive 1 */
-	{0x2b, 0x30, 0x2c, 0x31},     /* drive 2 */
-	{0x2d, 0x32, 0x2e, 0x33},     /* drive 3 */
-};
-
-static int basePort;	/* base port address */
-static int regPort;	/* port for register number */
-static int dataPort;	/* port for register data */
-static u8 regOn;	/* output to base port to access registers */
-static u8 regOff;	/* output to base port to close registers */
-
-/*------------------------------------------------------------------------*/
-
-/*
- * Read a controller register.
- */
-static inline u8 inReg(u8 reg)
-{
-	outb_p(reg, regPort);
-	return inb(dataPort);
-}
-
-/*
- * Write a controller register.
- */
-static void outReg(u8 data, u8 reg)
-{
-	outb_p(reg, regPort);
-	outb_p(data, dataPort);
-}
-
-static DEFINE_SPINLOCK(ali14xx_lock);
-
-/*
- * Set PIO mode for the specified drive.
- * This function computes timing parameters
- * and sets controller registers accordingly.
- */
-static void ali14xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	int driveNum;
-	int time1, time2;
-	u8 param1, param2, param3, param4;
-	unsigned long flags;
-	int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
-
-	/* calculate timing, according to PIO mode */
-	time1 = ide_pio_cycle_time(drive, pio);
-	time2 = t->active;
-	param3 = param1 = (time2 * bus_speed + 999) / 1000;
-	param4 = param2 = (time1 * bus_speed + 999) / 1000 - param1;
-	if (pio < 3) {
-		param3 += 8;
-		param4 += 8;
-	}
-	printk(KERN_DEBUG "%s: PIO mode%d, t1=%dns, t2=%dns, cycles = %d+%d, %d+%d\n",
-		drive->name, pio, time1, time2, param1, param2, param3, param4);
-
-	/* stuff timing parameters into controller registers */
-	driveNum = (drive->hwif->index << 1) + (drive->dn & 1);
-	spin_lock_irqsave(&ali14xx_lock, flags);
-	outb_p(regOn, basePort);
-	outReg(param1, regTab[driveNum].reg1);
-	outReg(param2, regTab[driveNum].reg2);
-	outReg(param3, regTab[driveNum].reg3);
-	outReg(param4, regTab[driveNum].reg4);
-	outb_p(regOff, basePort);
-	spin_unlock_irqrestore(&ali14xx_lock, flags);
-}
-
-/*
- * Auto-detect the IDE controller port.
- */
-static int __init findPort(void)
-{
-	int i;
-	u8 t;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	for (i = 0; i < ALI_NUM_PORTS; ++i) {
-		basePort = ports[i];
-		regOff = inb(basePort);
-		for (regOn = 0x30; regOn <= 0x33; ++regOn) {
-			outb_p(regOn, basePort);
-			if (inb(basePort) == regOn) {
-				regPort = basePort + 4;
-				dataPort = basePort + 8;
-				t = inReg(0) & 0xf0;
-				outb_p(regOff, basePort);
-				local_irq_restore(flags);
-				if (t != 0x50)
-					return 0;
-				return 1;  /* success */
-			}
-		}
-		outb_p(regOff, basePort);
-	}
-	local_irq_restore(flags);
-	return 0;
-}
-
-/*
- * Initialize controller registers with default values.
- */
-static int __init initRegisters(void)
-{
-	const RegInitializer *p;
-	u8 t;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	outb_p(regOn, basePort);
-	for (p = initData; p->reg != 0; ++p)
-		outReg(p->data, p->reg);
-	outb_p(0x01, regPort);
-	t = inb(regPort) & 0x01;
-	outb_p(regOff, basePort);
-	local_irq_restore(flags);
-	return t;
-}
-
-static const struct ide_port_ops ali14xx_port_ops = {
-	.set_pio_mode		= ali14xx_set_pio_mode,
-};
-
-static const struct ide_port_info ali14xx_port_info = {
-	.name			= DRV_NAME,
-	.chipset		= ide_ali14xx,
-	.port_ops		= &ali14xx_port_ops,
-	.host_flags		= IDE_HFLAG_NO_DMA,
-	.pio_mask		= ATA_PIO4,
-};
-
-static int __init ali14xx_probe(void)
-{
-	printk(KERN_DEBUG "ali14xx: base=0x%03x, regOn=0x%02x.\n",
-			  basePort, regOn);
-
-	/* initialize controller registers */
-	if (!initRegisters()) {
-		printk(KERN_ERR "ali14xx: Chip initialization failed.\n");
-		return 1;
-	}
-
-	return ide_legacy_device_add(&ali14xx_port_info, 0);
-}
-
-static bool probe_ali14xx;
-
-module_param_named(probe, probe_ali14xx, bool, 0);
-MODULE_PARM_DESC(probe, "probe for ALI M14xx chipsets");
-
-static int __init ali14xx_init(void)
-{
-	if (probe_ali14xx == 0)
-		goto out;
-
-	/* auto-detect IDE controller port */
-	if (findPort()) {
-		if (ali14xx_probe())
-			return -ENODEV;
-		return 0;
-	}
-	printk(KERN_ERR "ali14xx: not found.\n");
-out:
-	return -ENODEV;
-}
-
-module_init(ali14xx_init);
-
-MODULE_AUTHOR("see local file");
-MODULE_DESCRIPTION("support of ALI 14XX IDE chipsets");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
deleted file mode 100644
index 3265970aee34f..0000000000000
--- a/drivers/ide/alim15x3.c
+++ /dev/null
@@ -1,602 +0,0 @@
-/*
- *  Copyright (C) 1998-2000 Michel Aubry, Maintainer
- *  Copyright (C) 1998-2000 Andrzej Krzysztofowicz, Maintainer
- *  Copyright (C) 1999-2000 CJ, cjtsai@ali.com.tw, Maintainer
- *
- *  Copyright (C) 1998-2000 Andre Hedrick (andre@linux-ide.org)
- *  May be copied or modified under the terms of the GNU General Public License
- *  Copyright (C) 2002 Alan Cox
- *  ALi (now ULi M5228) support by Clear Zhang <Clear.Zhang@ali.com.tw>
- *  Copyright (C) 2007 MontaVista Software, Inc. <source@mvista.com>
- *  Copyright (C) 2007-2010 Bartlomiej Zolnierkiewicz
- *
- *  (U)DMA capable version of ali 1533/1543(C), 1535(D)
- *
- **********************************************************************
- *  9/7/99 --Parts from the above author are included and need to be
- *  converted into standard interface, once I finish the thought.
- *
- *  Recent changes
- *	Don't use LBA48 mode on ALi <= 0xC4
- *	Don't poke 0x79 with a non ALi northbridge
- *	Don't flip undefined bits on newer chipsets (fix Fujitsu laptop hang)
- *	Allow UDMA6 on revisions > 0xC4
- *
- *  Documentation
- *	Chipset documentation available under NDA only
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <linux/dmi.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "alim15x3"
-
-/*
- *	ALi devices are not plug in. Otherwise these static values would
- *	need to go. They ought to go away anyway
- */
- 
-static u8 m5229_revision;
-static u8 chip_is_1543c_e;
-static struct pci_dev *isa_dev;
-
-static void ali_fifo_control(ide_hwif_t *hwif, ide_drive_t *drive, int on)
-{
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	int pio_fifo = 0x54 + hwif->channel;
-	u8 fifo;
-	int shift = 4 * (drive->dn & 1);
-
-	pci_read_config_byte(pdev, pio_fifo, &fifo);
-	fifo &= ~(0x0F << shift);
-	fifo |= (on << shift);
-	pci_write_config_byte(pdev, pio_fifo, fifo);
-}
-
-static void ali_program_timings(ide_hwif_t *hwif, ide_drive_t *drive,
-				struct ide_timing *t, u8 ultra)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	int port = hwif->channel ? 0x5c : 0x58;
-	int udmat = 0x56 + hwif->channel;
-	u8 unit = drive->dn & 1, udma;
-	int shift = 4 * unit;
-
-	/* Set up the UDMA */
-	pci_read_config_byte(dev, udmat, &udma);
-	udma &= ~(0x0F << shift);
-	udma |= ultra << shift;
-	pci_write_config_byte(dev, udmat, udma);
-
-	if (t == NULL)
-		return;
-
-	t->setup = clamp_val(t->setup, 1, 8) & 7;
-	t->act8b = clamp_val(t->act8b, 1, 8) & 7;
-	t->rec8b = clamp_val(t->rec8b, 1, 16) & 15;
-	t->active = clamp_val(t->active, 1, 8) & 7;
-	t->recover = clamp_val(t->recover, 1, 16) & 15;
-
-	pci_write_config_byte(dev, port, t->setup);
-	pci_write_config_byte(dev, port + 1, (t->act8b << 4) | t->rec8b);
-	pci_write_config_byte(dev, port + unit + 2,
-			      (t->active << 4) | t->recover);
-}
-
-/**
- *	ali_set_pio_mode	-	set host controller for PIO mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Program the controller for the given PIO mode.
- */
-
-static void ali_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	ide_drive_t *pair = ide_get_pair_dev(drive);
-	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
-	unsigned long T =  1000000 / bus_speed; /* PCI clock based */
-	struct ide_timing t;
-
-	ide_timing_compute(drive, drive->pio_mode, &t, T, 1);
-	if (pair) {
-		struct ide_timing p;
-
-		ide_timing_compute(pair, pair->pio_mode, &p, T, 1);
-		ide_timing_merge(&p, &t, &t,
-			IDE_TIMING_SETUP | IDE_TIMING_8BIT);
-		if (pair->dma_mode) {
-			ide_timing_compute(pair, pair->dma_mode, &p, T, 1);
-			ide_timing_merge(&p, &t, &t,
-				IDE_TIMING_SETUP | IDE_TIMING_8BIT);
-		}
-	}
-
-	/* 
-	 * PIO mode => ATA FIFO on, ATAPI FIFO off
-	 */
-	ali_fifo_control(hwif, drive, (drive->media == ide_disk) ? 0x05 : 0x00);
-
-	ali_program_timings(hwif, drive, &t, 0);
-}
-
-/**
- *	ali_udma_filter		-	compute UDMA mask
- *	@drive: IDE device
- *
- *	Return available UDMA modes.
- *
- *	The actual rules for the ALi are:
- *		No UDMA on revisions <= 0x20
- *		Disk only for revisions < 0xC2
- *		Not WDC drives on M1543C-E (?)
- */
-
-static u8 ali_udma_filter(ide_drive_t *drive)
-{
-	if (m5229_revision > 0x20 && m5229_revision < 0xC2) {
-		if (drive->media != ide_disk)
-			return 0;
-		if (chip_is_1543c_e &&
-		    strstr((char *)&drive->id[ATA_ID_PROD], "WDC "))
-			return 0;
-	}
-
-	return drive->hwif->ultra_mask;
-}
-
-/**
- *	ali_set_dma_mode	-	set host controller for DMA mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Configure the hardware for the desired IDE transfer mode.
- */
-
-static void ali_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	static u8 udma_timing[7] = { 0xC, 0xB, 0xA, 0x9, 0x8, 0xF, 0xD };
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	ide_drive_t *pair	= ide_get_pair_dev(drive);
-	int bus_speed		= ide_pci_clk ? ide_pci_clk : 33;
-	unsigned long T		=  1000000 / bus_speed; /* PCI clock based */
-	const u8 speed		= drive->dma_mode;
-	u8 tmpbyte		= 0x00;
-	struct ide_timing t;
-
-	if (speed < XFER_UDMA_0) {
-		ide_timing_compute(drive, drive->dma_mode, &t, T, 1);
-		if (pair) {
-			struct ide_timing p;
-
-			ide_timing_compute(pair, pair->pio_mode, &p, T, 1);
-			ide_timing_merge(&p, &t, &t,
-				IDE_TIMING_SETUP | IDE_TIMING_8BIT);
-			if (pair->dma_mode) {
-				ide_timing_compute(pair, pair->dma_mode,
-						&p, T, 1);
-				ide_timing_merge(&p, &t, &t,
-					IDE_TIMING_SETUP | IDE_TIMING_8BIT);
-			}
-		}
-		ali_program_timings(hwif, drive, &t, 0);
-	} else {
-		ali_program_timings(hwif, drive, NULL,
-				udma_timing[speed - XFER_UDMA_0]);
-		if (speed >= XFER_UDMA_3) {
-			pci_read_config_byte(dev, 0x4b, &tmpbyte);
-			tmpbyte |= 1;
-			pci_write_config_byte(dev, 0x4b, tmpbyte);
-		}
-	}
-}
-
-/**
- *	ali_dma_check	-	DMA check
- *	@drive:	target device
- *	@cmd: command
- *
- *	Returns 1 if the DMA cannot be performed, zero on success.
- */
-
-static int ali_dma_check(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	if (m5229_revision < 0xC2 && drive->media != ide_disk) {
-		if (cmd->tf_flags & IDE_TFLAG_WRITE)
-			return 1;	/* try PIO instead of DMA */
-	}
-	return 0;
-}
-
-/**
- *	init_chipset_ali15x3	-	Initialise an ALi IDE controller
- *	@dev: PCI device
- *
- *	This function initializes the ALI IDE controller and where 
- *	appropriate also sets up the 1533 southbridge.
- */
-
-static int init_chipset_ali15x3(struct pci_dev *dev)
-{
-	unsigned long flags;
-	u8 tmpbyte;
-	struct pci_dev *north = pci_get_slot(dev->bus, PCI_DEVFN(0,0));
-
-	m5229_revision = dev->revision;
-
-	isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
-
-	local_irq_save(flags);
-
-	if (m5229_revision < 0xC2) {
-		/*
-		 * revision 0x20 (1543-E, 1543-F)
-		 * revision 0xC0, 0xC1 (1543C-C, 1543C-D, 1543C-E)
-		 * clear CD-ROM DMA write bit, m5229, 0x4b, bit 7
-		 */
-		pci_read_config_byte(dev, 0x4b, &tmpbyte);
-		/*
-		 * clear bit 7
-		 */
-		pci_write_config_byte(dev, 0x4b, tmpbyte & 0x7F);
-		/*
-		 * check m1533, 0x5e, bit 1~4 == 1001 => & 00011110 = 00010010
-		 */
-		if (m5229_revision >= 0x20 && isa_dev) {
-			pci_read_config_byte(isa_dev, 0x5e, &tmpbyte);
-			chip_is_1543c_e = ((tmpbyte & 0x1e) == 0x12) ? 1: 0;
-		}
-		goto out;
-	}
-
-	/*
-	 * 1543C-B?, 1535, 1535D, 1553
-	 * Note 1: not all "motherboard" support this detection
-	 * Note 2: if no udma 66 device, the detection may "error".
-	 *         but in this case, we will not set the device to
-	 *         ultra 66, the detection result is not important
-	 */
-
-	/*
-	 * enable "Cable Detection", m5229, 0x4b, bit3
-	 */
-	pci_read_config_byte(dev, 0x4b, &tmpbyte);
-	pci_write_config_byte(dev, 0x4b, tmpbyte | 0x08);
-
-	/*
-	 * We should only tune the 1533 enable if we are using an ALi
-	 * North bridge. We might have no north found on some zany
-	 * box without a device at 0:0.0. The ALi bridge will be at
-	 * 0:0.0 so if we didn't find one we know what is cooking.
-	 */
-	if (north && north->vendor != PCI_VENDOR_ID_AL)
-		goto out;
-
-	if (m5229_revision < 0xC5 && isa_dev)
-	{	
-		/*
-		 * set south-bridge's enable bit, m1533, 0x79
-		 */
-
-		pci_read_config_byte(isa_dev, 0x79, &tmpbyte);
-		if (m5229_revision == 0xC2) {
-			/*
-			 * 1543C-B0 (m1533, 0x79, bit 2)
-			 */
-			pci_write_config_byte(isa_dev, 0x79, tmpbyte | 0x04);
-		} else if (m5229_revision >= 0xC3) {
-			/*
-			 * 1553/1535 (m1533, 0x79, bit 1)
-			 */
-			pci_write_config_byte(isa_dev, 0x79, tmpbyte | 0x02);
-		}
-	}
-
-out:
-	/*
-	 * CD_ROM DMA on (m5229, 0x53, bit0)
-	 *      Enable this bit even if we want to use PIO.
-	 * PIO FIFO off (m5229, 0x53, bit1)
-	 *      The hardware will use 0x54h and 0x55h to control PIO FIFO.
-	 *	(Not on later devices it seems)
-	 *
-	 *	0x53 changes meaning on later revs - we must no touch
-	 *	bit 1 on them.  Need to check if 0x20 is the right break.
-	 */
-	if (m5229_revision >= 0x20) {
-		pci_read_config_byte(dev, 0x53, &tmpbyte);
-
-		if (m5229_revision <= 0x20)
-			tmpbyte = (tmpbyte & (~0x02)) | 0x01;
-		else if (m5229_revision == 0xc7 || m5229_revision == 0xc8)
-			tmpbyte |= 0x03;
-		else
-			tmpbyte |= 0x01;
-
-		pci_write_config_byte(dev, 0x53, tmpbyte);
-	}
-	local_irq_restore(flags);
-	pci_dev_put(north);
-	pci_dev_put(isa_dev);
-	return 0;
-}
-
-/*
- *	Cable special cases
- */
-
-static const struct dmi_system_id cable_dmi_table[] = {
-	{
-		.ident = "HP Pavilion N5430",
-		.matches = {
-			DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
-			DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
-		},
-	},
-	{
-		.ident = "Toshiba Satellite S1800-814",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "S1800-814"),
-		},
-	},
-	{ }
-};
-
-static int ali_cable_override(struct pci_dev *pdev)
-{
-	/* Fujitsu P2000 */
-	if (pdev->subsystem_vendor == 0x10CF &&
-	    pdev->subsystem_device == 0x10AF)
-		return 1;
-
-	/* Mitac 8317 (Winbook-A) and relatives */
-	if (pdev->subsystem_vendor == 0x1071 &&
-	    pdev->subsystem_device == 0x8317)
-		return 1;
-
-	/* Systems by DMI */
-	if (dmi_check_system(cable_dmi_table))
-		return 1;
-
-	return 0;
-}
-
-/**
- *	ali_cable_detect	-	cable detection
- *	@hwif: IDE interface
- *
- *	This checks if the controller and the cable are capable
- *	of UDMA66 transfers. It doesn't check the drives.
- */
-
-static u8 ali_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	u8 cbl = ATA_CBL_PATA40, tmpbyte;
-
-	if (m5229_revision >= 0xC2) {
-		/*
-		 * m5229 80-pin cable detection (from Host View)
-		 *
-		 * 0x4a bit0 is 0 => primary channel has 80-pin
-		 * 0x4a bit1 is 0 => secondary channel has 80-pin
-		 *
-		 * Certain laptops use short but suitable cables
-		 * and don't implement the detect logic.
-		 */
-		if (ali_cable_override(dev))
-			cbl = ATA_CBL_PATA40_SHORT;
-		else {
-			pci_read_config_byte(dev, 0x4a, &tmpbyte);
-			if ((tmpbyte & (1 << hwif->channel)) == 0)
-				cbl = ATA_CBL_PATA80;
-		}
-	}
-
-	return cbl;
-}
-
-#ifndef CONFIG_SPARC64
-/**
- *	init_hwif_ali15x3	-	Initialize the ALI IDE x86 stuff
- *	@hwif: interface to configure
- *
- *	Obtain the IRQ tables for an ALi based IDE solution on the PC
- *	class platforms. This part of the code isn't applicable to the
- *	Sparc systems.
- */
-
-static void init_hwif_ali15x3(ide_hwif_t *hwif)
-{
-	u8 ideic, inmir;
-	s8 irq_routing_table[] = { -1,  9, 3, 10, 4,  5, 7,  6,
-				      1, 11, 0, 12, 0, 14, 0, 15 };
-	int irq = -1;
-
-	if (isa_dev) {
-		/*
-		 * read IDE interface control
-		 */
-		pci_read_config_byte(isa_dev, 0x58, &ideic);
-
-		/* bit0, bit1 */
-		ideic = ideic & 0x03;
-
-		/* get IRQ for IDE Controller */
-		if ((hwif->channel && ideic == 0x03) ||
-		    (!hwif->channel && !ideic)) {
-			/*
-			 * get SIRQ1 routing table
-			 */
-			pci_read_config_byte(isa_dev, 0x44, &inmir);
-			inmir = inmir & 0x0f;
-			irq = irq_routing_table[inmir];
-		} else if (hwif->channel && !(ideic & 0x01)) {
-			/*
-			 * get SIRQ2 routing table
-			 */
-			pci_read_config_byte(isa_dev, 0x75, &inmir);
-			inmir = inmir & 0x0f;
-			irq = irq_routing_table[inmir];
-		}
-		if(irq >= 0)
-			hwif->irq = irq;
-	}
-}
-#else
-#define init_hwif_ali15x3 NULL
-#endif /* CONFIG_SPARC64 */
-
-/**
- *	init_dma_ali15x3	-	set up DMA on ALi15x3
- *	@hwif: IDE interface
- *	@d: IDE port info
- *
- *	Set up the DMA functionality on the ALi 15x3.
- */
-
-static int init_dma_ali15x3(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	unsigned long base = ide_pci_dma_base(hwif, d);
-
-	if (base == 0)
-		return -1;
-
-	hwif->dma_base = base;
-
-	if (ide_pci_check_simplex(hwif, d) < 0)
-		return -1;
-
-	if (ide_pci_set_master(dev, d->name) < 0)
-		return -1;
-
-	if (!hwif->channel)
-		outb(inb(base + 2) & 0x60, base + 2);
-
-	printk(KERN_INFO "    %s: BM-DMA at 0x%04lx-0x%04lx\n",
-			 hwif->name, base, base + 7);
-
-	if (ide_allocate_dma_engine(hwif))
-		return -1;
-
-	return 0;
-}
-
-static const struct ide_port_ops ali_port_ops = {
-	.set_pio_mode		= ali_set_pio_mode,
-	.set_dma_mode		= ali_set_dma_mode,
-	.udma_filter		= ali_udma_filter,
-	.cable_detect		= ali_cable_detect,
-};
-
-static const struct ide_dma_ops ali_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= ide_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_check		= ali_dma_check,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-static const struct ide_port_info ali15x3_chipset = {
-	.name		= DRV_NAME,
-	.init_chipset	= init_chipset_ali15x3,
-	.init_hwif	= init_hwif_ali15x3,
-	.init_dma	= init_dma_ali15x3,
-	.port_ops	= &ali_port_ops,
-	.dma_ops	= &sff_dma_ops,
-	.pio_mask	= ATA_PIO5,
-	.swdma_mask	= ATA_SWDMA2,
-	.mwdma_mask	= ATA_MWDMA2,
-};
-
-/**
- *	alim15x3_init_one	-	set up an ALi15x3 IDE controller
- *	@dev: PCI device to set up
- *
- *	Perform the actual set up for an ALi15x3 that has been found by the
- *	hot plug layer.
- */
- 
-static int alim15x3_init_one(struct pci_dev *dev,
-			     const struct pci_device_id *id)
-{
-	struct ide_port_info d = ali15x3_chipset;
-	u8 rev = dev->revision, idx = id->driver_data;
-
-	/* don't use LBA48 DMA on ALi devices before rev 0xC5 */
-	if (rev <= 0xC4)
-		d.host_flags |= IDE_HFLAG_NO_LBA48_DMA;
-
-	if (rev >= 0x20) {
-		if (rev == 0x20)
-			d.host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
-
-		if (rev < 0xC2)
-			d.udma_mask = ATA_UDMA2;
-		else if (rev == 0xC2 || rev == 0xC3)
-			d.udma_mask = ATA_UDMA4;
-		else if (rev == 0xC4)
-			d.udma_mask = ATA_UDMA5;
-		else
-			d.udma_mask = ATA_UDMA6;
-
-		d.dma_ops = &ali_dma_ops;
-	} else {
-		d.host_flags |= IDE_HFLAG_NO_DMA;
-
-		d.mwdma_mask = d.swdma_mask = 0;
-	}
-
-	if (idx == 0)
-		d.host_flags |= IDE_HFLAG_CLEAR_SIMPLEX;
-
-	return ide_pci_init_one(dev, &d, NULL);
-}
-
-
-static const struct pci_device_id alim15x3_pci_tbl[] = {
-	{ PCI_VDEVICE(AL, PCI_DEVICE_ID_AL_M5229), 0 },
-	{ PCI_VDEVICE(AL, PCI_DEVICE_ID_AL_M5228), 1 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, alim15x3_pci_tbl);
-
-static struct pci_driver alim15x3_pci_driver = {
-	.name		= "ALI15x3_IDE",
-	.id_table	= alim15x3_pci_tbl,
-	.probe		= alim15x3_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init ali15x3_ide_init(void)
-{
-	return ide_pci_register_driver(&alim15x3_pci_driver);
-}
-
-static void __exit ali15x3_ide_exit(void)
-{
-	pci_unregister_driver(&alim15x3_pci_driver);
-}
-
-module_init(ali15x3_ide_init);
-module_exit(ali15x3_ide_exit);
-
-MODULE_AUTHOR("Michael Aubry, Andrzej Krzysztofowicz, CJ, Andre Hedrick, Alan Cox, Bartlomiej Zolnierkiewicz");
-MODULE_DESCRIPTION("PCI driver module for ALi 15x3 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/amd74xx.c b/drivers/ide/amd74xx.c
deleted file mode 100644
index 7340597a373e3..0000000000000
--- a/drivers/ide/amd74xx.c
+++ /dev/null
@@ -1,343 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AMD 755/756/766/8111 and nVidia nForce/2/2s/3/3s/CK804/MCP04
- * IDE driver for Linux.
- *
- * Copyright (c) 2000-2002 Vojtech Pavlik
- * Copyright (c) 2007-2010 Bartlomiej Zolnierkiewicz
- *
- * Based on the work of:
- *      Andre Hedrick
- */
-
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-
-#define DRV_NAME "amd74xx"
-
-enum {
-	AMD_IDE_CONFIG		= 0x41,
-	AMD_CABLE_DETECT	= 0x42,
-	AMD_DRIVE_TIMING	= 0x48,
-	AMD_8BIT_TIMING		= 0x4e,
-	AMD_ADDRESS_SETUP	= 0x4c,
-	AMD_UDMA_TIMING		= 0x50,
-};
-
-static unsigned int amd_80w;
-static unsigned int amd_clock;
-
-static char *amd_dma[] = { "16", "25", "33", "44", "66", "100", "133" };
-static unsigned char amd_cyc2udma[] = { 6, 6, 5, 4, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 7 };
-
-static inline u8 amd_offset(struct pci_dev *dev)
-{
-	return (dev->vendor == PCI_VENDOR_ID_NVIDIA) ? 0x10 : 0;
-}
-
-/*
- * amd_set_speed() writes timing values to the chipset registers
- */
-
-static void amd_set_speed(struct pci_dev *dev, u8 dn, u8 udma_mask,
-			  struct ide_timing *timing)
-{
-	u8 t = 0, offset = amd_offset(dev);
-
-	pci_read_config_byte(dev, AMD_ADDRESS_SETUP + offset, &t);
-	t = (t & ~(3 << ((3 - dn) << 1))) | ((clamp_val(timing->setup, 1, 4) - 1) << ((3 - dn) << 1));
-	pci_write_config_byte(dev, AMD_ADDRESS_SETUP + offset, t);
-
-	pci_write_config_byte(dev, AMD_8BIT_TIMING + offset + (1 - (dn >> 1)),
-		((clamp_val(timing->act8b, 1, 16) - 1) << 4) | (clamp_val(timing->rec8b, 1, 16) - 1));
-
-	pci_write_config_byte(dev, AMD_DRIVE_TIMING + offset + (3 - dn),
-		((clamp_val(timing->active, 1, 16) - 1) << 4) | (clamp_val(timing->recover, 1, 16) - 1));
-
-	switch (udma_mask) {
-	case ATA_UDMA2: t = timing->udma ? (0xc0 | (clamp_val(timing->udma, 2, 5) - 2)) : 0x03; break;
-	case ATA_UDMA4: t = timing->udma ? (0xc0 | amd_cyc2udma[clamp_val(timing->udma, 2, 10)]) : 0x03; break;
-	case ATA_UDMA5: t = timing->udma ? (0xc0 | amd_cyc2udma[clamp_val(timing->udma, 1, 10)]) : 0x03; break;
-	case ATA_UDMA6: t = timing->udma ? (0xc0 | amd_cyc2udma[clamp_val(timing->udma, 1, 15)]) : 0x03; break;
-	default: return;
-	}
-
-	if (timing->udma)
-		pci_write_config_byte(dev, AMD_UDMA_TIMING + offset + 3 - dn, t);
-}
-
-/*
- * amd_set_drive() computes timing values and configures the chipset
- * to a desired transfer mode.  It also can be called by upper layers.
- */
-
-static void amd_set_drive(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	ide_drive_t *peer = ide_get_pair_dev(drive);
-	struct ide_timing t, p;
-	int T, UT;
-	u8 udma_mask = hwif->ultra_mask;
-	const u8 speed = drive->dma_mode;
-
-	T = 1000000000 / amd_clock;
-	UT = (udma_mask == ATA_UDMA2) ? T : (T / 2);
-
-	ide_timing_compute(drive, speed, &t, T, UT);
-
-	if (peer) {
-		ide_timing_compute(peer, peer->pio_mode, &p, T, UT);
-		ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
-	}
-
-	if (speed == XFER_UDMA_5 && amd_clock <= 33333) t.udma = 1;
-	if (speed == XFER_UDMA_6 && amd_clock <= 33333) t.udma = 15;
-
-	amd_set_speed(dev, drive->dn, udma_mask, &t);
-}
-
-/*
- * amd_set_pio_mode() is a callback from upper layers for PIO-only tuning.
- */
-
-static void amd_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	drive->dma_mode = drive->pio_mode;
-	amd_set_drive(hwif, drive);
-}
-
-static void amd7409_cable_detect(struct pci_dev *dev)
-{
-	/* no host side cable detection */
-	amd_80w = 0x03;
-}
-
-static void amd7411_cable_detect(struct pci_dev *dev)
-{
-	int i;
-	u32 u = 0;
-	u8 t = 0, offset = amd_offset(dev);
-
-	pci_read_config_byte(dev, AMD_CABLE_DETECT + offset, &t);
-	pci_read_config_dword(dev, AMD_UDMA_TIMING + offset, &u);
-	amd_80w = ((t & 0x3) ? 1 : 0) | ((t & 0xc) ? 2 : 0);
-	for (i = 24; i >= 0; i -= 8)
-		if (((u >> i) & 4) && !(amd_80w & (1 << (1 - (i >> 4))))) {
-			printk(KERN_WARNING DRV_NAME " %s: BIOS didn't set "
-				"cable bits correctly. Enabling workaround.\n",
-				pci_name(dev));
-			amd_80w |= (1 << (1 - (i >> 4)));
-		}
-}
-
-/*
- * The initialization callback.  Initialize drive independent registers.
- */
-
-static int init_chipset_amd74xx(struct pci_dev *dev)
-{
-	u8 t = 0, offset = amd_offset(dev);
-
-/*
- * Check 80-wire cable presence.
- */
-
-	if (dev->vendor == PCI_VENDOR_ID_AMD &&
-	    dev->device == PCI_DEVICE_ID_AMD_COBRA_7401)
-		; /* no UDMA > 2 */
-	else if (dev->vendor == PCI_VENDOR_ID_AMD &&
-		 dev->device == PCI_DEVICE_ID_AMD_VIPER_7409)
-		amd7409_cable_detect(dev);
-	else
-		amd7411_cable_detect(dev);
-
-/*
- * Take care of prefetch & postwrite.
- */
-
-	pci_read_config_byte(dev, AMD_IDE_CONFIG + offset, &t);
-	/*
-	 * Check for broken FIFO support.
-	 */
-	if (dev->vendor == PCI_VENDOR_ID_AMD &&
-	    dev->device == PCI_DEVICE_ID_AMD_VIPER_7411)
-		t &= 0x0f;
-	else
-		t |= 0xf0;
-	pci_write_config_byte(dev, AMD_IDE_CONFIG + offset, t);
-
-	return 0;
-}
-
-static u8 amd_cable_detect(ide_hwif_t *hwif)
-{
-	if ((amd_80w >> hwif->channel) & 1)
-		return ATA_CBL_PATA80;
-	else
-		return ATA_CBL_PATA40;
-}
-
-static const struct ide_port_ops amd_port_ops = {
-	.set_pio_mode		= amd_set_pio_mode,
-	.set_dma_mode		= amd_set_drive,
-	.cable_detect		= amd_cable_detect,
-};
-
-#define IDE_HFLAGS_AMD \
-	(IDE_HFLAG_PIO_NO_BLACKLIST | \
-	 IDE_HFLAG_POST_SET_MODE | \
-	 IDE_HFLAG_IO_32BIT | \
-	 IDE_HFLAG_UNMASK_IRQS)
-
-#define DECLARE_AMD_DEV(swdma, udma)				\
-	{								\
-		.name		= DRV_NAME,				\
-		.init_chipset	= init_chipset_amd74xx,			\
-		.enablebits	= {{0x40,0x02,0x02}, {0x40,0x01,0x01}},	\
-		.port_ops	= &amd_port_ops,			\
-		.host_flags	= IDE_HFLAGS_AMD,			\
-		.pio_mask	= ATA_PIO5,				\
-		.swdma_mask	= swdma,				\
-		.mwdma_mask	= ATA_MWDMA2,				\
-		.udma_mask	= udma,					\
-	}
-
-#define DECLARE_NV_DEV(udma)					\
-	{								\
-		.name		= DRV_NAME,				\
-		.init_chipset	= init_chipset_amd74xx,			\
-		.enablebits	= {{0x50,0x02,0x02}, {0x50,0x01,0x01}},	\
-		.port_ops	= &amd_port_ops,			\
-		.host_flags	= IDE_HFLAGS_AMD,			\
-		.pio_mask	= ATA_PIO5,				\
-		.swdma_mask	= ATA_SWDMA2,				\
-		.mwdma_mask	= ATA_MWDMA2,				\
-		.udma_mask	= udma,					\
-	}
-
-static const struct ide_port_info amd74xx_chipsets[] = {
-	/* 0: AMD7401 */	DECLARE_AMD_DEV(0x00, ATA_UDMA2),
-	/* 1: AMD7409 */	DECLARE_AMD_DEV(ATA_SWDMA2, ATA_UDMA4),
-	/* 2: AMD7411/7441 */	DECLARE_AMD_DEV(ATA_SWDMA2, ATA_UDMA5),
-	/* 3: AMD8111 */	DECLARE_AMD_DEV(ATA_SWDMA2, ATA_UDMA6),
-
-	/* 4: NFORCE */		DECLARE_NV_DEV(ATA_UDMA5),
-	/* 5: >= NFORCE2 */	DECLARE_NV_DEV(ATA_UDMA6),
-
-	/* 6: AMD5536 */	DECLARE_AMD_DEV(ATA_SWDMA2, ATA_UDMA5),
-};
-
-static int amd74xx_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct ide_port_info d;
-	u8 idx = id->driver_data;
-
-	d = amd74xx_chipsets[idx];
-
-	/*
-	 * Check for bad SWDMA and incorrectly wired Serenade mainboards.
-	 */
-	if (idx == 1) {
-		if (dev->revision <= 7)
-			d.swdma_mask = 0;
-		d.host_flags |= IDE_HFLAG_CLEAR_SIMPLEX;
-	} else if (idx == 3) {
-		if (dev->subsystem_vendor == PCI_VENDOR_ID_AMD &&
-		    dev->subsystem_device == PCI_DEVICE_ID_AMD_SERENADE)
-			d.udma_mask = ATA_UDMA5;
-	}
-
-	/*
-	 * It seems that on some nVidia controllers using AltStatus
-	 * register can be unreliable so default to Status register
-	 * if the device is in Compatibility Mode.
-	 */
-	if (dev->vendor == PCI_VENDOR_ID_NVIDIA &&
-	    ide_pci_is_in_compatibility_mode(dev))
-		d.host_flags |= IDE_HFLAG_BROKEN_ALTSTATUS;
-
-	printk(KERN_INFO "%s %s: UDMA%s controller\n",
-		d.name, pci_name(dev), amd_dma[fls(d.udma_mask) - 1]);
-
-	/*
-	* Determine the system bus clock.
-	*/
-	amd_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
-
-	switch (amd_clock) {
-	case 33000: amd_clock = 33333; break;
-	case 37000: amd_clock = 37500; break;
-	case 41000: amd_clock = 41666; break;
-	}
-
-	if (amd_clock < 20000 || amd_clock > 50000) {
-		printk(KERN_WARNING "%s: User given PCI clock speed impossible"
-				    " (%d), using 33 MHz instead.\n",
-				    d.name, amd_clock);
-		amd_clock = 33333;
-	}
-
-	return ide_pci_init_one(dev, &d, NULL);
-}
-
-static const struct pci_device_id amd74xx_pci_tbl[] = {
-	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_COBRA_7401),		 0 },
-	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_VIPER_7409),		 1 },
-	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_VIPER_7411),		 2 },
-	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_OPUS_7441),		 2 },
-	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_8111_IDE),		 3 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_IDE),	 4 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE2_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE2S_IDE),	 5 },
-#ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE2S_SATA),	 5 },
-#endif
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3S_IDE),	 5 },
-#ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE3S_SATA2),	 5 },
-#endif
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP65_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP73_IDE),	 5 },
-	{ PCI_VDEVICE(NVIDIA,	PCI_DEVICE_ID_NVIDIA_NFORCE_MCP77_IDE),	 5 },
-	{ PCI_VDEVICE(AMD,	PCI_DEVICE_ID_AMD_CS5536_IDE),		 6 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, amd74xx_pci_tbl);
-
-static struct pci_driver amd74xx_pci_driver = {
-	.name		= "AMD_IDE",
-	.id_table	= amd74xx_pci_tbl,
-	.probe		= amd74xx_probe,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init amd74xx_ide_init(void)
-{
-	return ide_pci_register_driver(&amd74xx_pci_driver);
-}
-
-static void __exit amd74xx_ide_exit(void)
-{
-	pci_unregister_driver(&amd74xx_pci_driver);
-}
-
-module_init(amd74xx_ide_init);
-module_exit(amd74xx_ide_exit);
-
-MODULE_AUTHOR("Vojtech Pavlik, Bartlomiej Zolnierkiewicz");
-MODULE_DESCRIPTION("AMD PCI IDE driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c
deleted file mode 100644
index e08b0aac08b9e..0000000000000
--- a/drivers/ide/atiixp.c
+++ /dev/null
@@ -1,212 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2003 ATI Inc. <hyu@ati.com>
- *  Copyright (C) 2004,2007 Bartlomiej Zolnierkiewicz
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define DRV_NAME "atiixp"
-
-#define ATIIXP_IDE_PIO_TIMING		0x40
-#define ATIIXP_IDE_MDMA_TIMING		0x44
-#define ATIIXP_IDE_PIO_CONTROL		0x48
-#define ATIIXP_IDE_PIO_MODE		0x4a
-#define ATIIXP_IDE_UDMA_CONTROL		0x54
-#define ATIIXP_IDE_UDMA_MODE		0x56
-
-struct atiixp_ide_timing {
-	u8 command_width;
-	u8 recover_width;
-};
-
-static struct atiixp_ide_timing pio_timing[] = {
-	{ 0x05, 0x0d },
-	{ 0x04, 0x07 },
-	{ 0x03, 0x04 },
-	{ 0x02, 0x02 },
-	{ 0x02, 0x00 },
-};
-
-static struct atiixp_ide_timing mdma_timing[] = {
-	{ 0x07, 0x07 },
-	{ 0x02, 0x01 },
-	{ 0x02, 0x00 },
-};
-
-static DEFINE_SPINLOCK(atiixp_lock);
-
-/**
- *	atiixp_set_pio_mode	-	set host controller for PIO mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Set the interface PIO mode.
- */
-
-static void atiixp_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	unsigned long flags;
-	int timing_shift = (drive->dn ^ 1) * 8;
-	u32 pio_timing_data;
-	u16 pio_mode_data;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	spin_lock_irqsave(&atiixp_lock, flags);
-
-	pci_read_config_word(dev, ATIIXP_IDE_PIO_MODE, &pio_mode_data);
-	pio_mode_data &= ~(0x07 << (drive->dn * 4));
-	pio_mode_data |= (pio << (drive->dn * 4));
-	pci_write_config_word(dev, ATIIXP_IDE_PIO_MODE, pio_mode_data);
-
-	pci_read_config_dword(dev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data);
-	pio_timing_data &= ~(0xff << timing_shift);
-	pio_timing_data |= (pio_timing[pio].recover_width << timing_shift) |
-		 (pio_timing[pio].command_width << (timing_shift + 4));
-	pci_write_config_dword(dev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
-
-	spin_unlock_irqrestore(&atiixp_lock, flags);
-}
-
-/**
- *	atiixp_set_dma_mode	-	set host controller for DMA mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Set a ATIIXP host controller to the desired DMA mode.  This involves
- *	programming the right timing data into the PCI configuration space.
- */
-
-static void atiixp_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	unsigned long flags;
-	int timing_shift = (drive->dn ^ 1) * 8;
-	u32 tmp32;
-	u16 tmp16;
-	u16 udma_ctl = 0;
-	const u8 speed = drive->dma_mode;
-
-	spin_lock_irqsave(&atiixp_lock, flags);
-
-	pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &udma_ctl);
-
-	if (speed >= XFER_UDMA_0) {
-		pci_read_config_word(dev, ATIIXP_IDE_UDMA_MODE, &tmp16);
-		tmp16 &= ~(0x07 << (drive->dn * 4));
-		tmp16 |= ((speed & 0x07) << (drive->dn * 4));
-		pci_write_config_word(dev, ATIIXP_IDE_UDMA_MODE, tmp16);
-
-		udma_ctl |= (1 << drive->dn);
-	} else if (speed >= XFER_MW_DMA_0) {
-		u8 i = speed & 0x03;
-
-		pci_read_config_dword(dev, ATIIXP_IDE_MDMA_TIMING, &tmp32);
-		tmp32 &= ~(0xff << timing_shift);
-		tmp32 |= (mdma_timing[i].recover_width << timing_shift) |
-			 (mdma_timing[i].command_width << (timing_shift + 4));
-		pci_write_config_dword(dev, ATIIXP_IDE_MDMA_TIMING, tmp32);
-
-		udma_ctl &= ~(1 << drive->dn);
-	}
-
-	pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, udma_ctl);
-
-	spin_unlock_irqrestore(&atiixp_lock, flags);
-}
-
-static u8 atiixp_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	u8 udma_mode = 0, ch = hwif->channel;
-
-	pci_read_config_byte(pdev, ATIIXP_IDE_UDMA_MODE + ch, &udma_mode);
-
-	if ((udma_mode & 0x07) >= 0x04 || (udma_mode & 0x70) >= 0x40)
-		return ATA_CBL_PATA80;
-	else
-		return ATA_CBL_PATA40;
-}
-
-static const struct ide_port_ops atiixp_port_ops = {
-	.set_pio_mode		= atiixp_set_pio_mode,
-	.set_dma_mode		= atiixp_set_dma_mode,
-	.cable_detect		= atiixp_cable_detect,
-};
-
-static const struct ide_port_info atiixp_pci_info[] = {
-	{	/* 0: IXP200/300/400/700 */
-		.name		= DRV_NAME,
-		.enablebits	= {{0x48,0x01,0x00}, {0x48,0x08,0x00}},
-		.port_ops	= &atiixp_port_ops,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA5,
-	},
-	{	/* 1: IXP600 */
-		.name		= DRV_NAME,
-		.enablebits	= {{0x48,0x01,0x00}, {0x00,0x00,0x00}},
-		.port_ops	= &atiixp_port_ops,
-		.host_flags	= IDE_HFLAG_SINGLE,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA5,
- 	},
-};
-
-/**
- *	atiixp_init_one	-	called when a ATIIXP is found
- *	@dev: the atiixp device
- *	@id: the matching pci id
- *
- *	Called when the PCI registration layer (or the IDE initialization)
- *	finds a device matching our IDE device tables.
- */
-
-static int atiixp_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &atiixp_pci_info[id->driver_data], NULL);
-}
-
-static const struct pci_device_id atiixp_pci_tbl[] = {
-	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP200_IDE), 0 },
-	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP300_IDE), 0 },
-	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP400_IDE), 0 },
-	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP600_IDE), 1 },
-	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP700_IDE), 0 },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_HUDSON2_IDE), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, atiixp_pci_tbl);
-
-static struct pci_driver atiixp_pci_driver = {
-	.name		= "ATIIXP_IDE",
-	.id_table	= atiixp_pci_tbl,
-	.probe		= atiixp_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init atiixp_ide_init(void)
-{
-	return ide_pci_register_driver(&atiixp_pci_driver);
-}
-
-static void __exit atiixp_ide_exit(void)
-{
-	pci_unregister_driver(&atiixp_pci_driver);
-}
-
-module_init(atiixp_ide_init);
-module_exit(atiixp_ide_exit);
-
-MODULE_AUTHOR("HUI YU");
-MODULE_DESCRIPTION("PCI driver module for ATI IXP IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/buddha.c b/drivers/ide/buddha.c
deleted file mode 100644
index 46eaf58d881b4..0000000000000
--- a/drivers/ide/buddha.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- *  Amiga Buddha, Catweasel and X-Surf IDE Driver
- *
- *	Copyright (C) 1997, 2001 by Geert Uytterhoeven and others
- *
- *  This driver was written based on the specifications in README.buddha and
- *  the X-Surf info from Inside_XSurf.txt available at
- *  http://www.jschoenfeld.com
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of this archive for
- *  more details.
- *
- *  TODO:
- *    - test it :-)
- *    - tune the timings using the speed-register
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/blkdev.h>
-#include <linux/zorro.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <linux/module.h>
-
-#include <asm/amigahw.h>
-#include <asm/amigaints.h>
-
-
-    /*
-     *  The Buddha has 2 IDE interfaces, the Catweasel has 3, X-Surf has 2
-     */
-
-#define BUDDHA_NUM_HWIFS	2
-#define CATWEASEL_NUM_HWIFS	3
-#define XSURF_NUM_HWIFS         2
-
-#define MAX_NUM_HWIFS		3
-
-    /*
-     *  Bases of the IDE interfaces (relative to the board address)
-     */
-
-#define BUDDHA_BASE1	0x800
-#define BUDDHA_BASE2	0xa00
-#define BUDDHA_BASE3	0xc00
-
-#define XSURF_BASE1     0xb000 /* 2.5" Interface */
-#define XSURF_BASE2     0xd000 /* 3.5" Interface */
-
-static u_int buddha_bases[CATWEASEL_NUM_HWIFS] __initdata = {
-    BUDDHA_BASE1, BUDDHA_BASE2, BUDDHA_BASE3
-};
-
-static u_int xsurf_bases[XSURF_NUM_HWIFS] __initdata = {
-     XSURF_BASE1, XSURF_BASE2
-};
-
-    /*
-     *  Offsets from one of the above bases
-     */
-
-#define BUDDHA_CONTROL	0x11a
-
-    /*
-     *  Other registers
-     */
-
-#define BUDDHA_IRQ1	0xf00		/* MSB = 1, Harddisk is source of */
-#define BUDDHA_IRQ2	0xf40		/* interrupt */
-#define BUDDHA_IRQ3	0xf80
-
-#define XSURF_IRQ1      0x7e
-#define XSURF_IRQ2      0x7e
-
-static int buddha_irqports[CATWEASEL_NUM_HWIFS] __initdata = {
-    BUDDHA_IRQ1, BUDDHA_IRQ2, BUDDHA_IRQ3
-};
-
-static int xsurf_irqports[XSURF_NUM_HWIFS] __initdata = {
-    XSURF_IRQ1, XSURF_IRQ2
-};
-
-#define BUDDHA_IRQ_MR	0xfc0		/* master interrupt enable */
-
-
-    /*
-     *  Board information
-     */
-
-typedef enum BuddhaType_Enum {
-    BOARD_BUDDHA, BOARD_CATWEASEL, BOARD_XSURF
-} BuddhaType;
-
-static const char *buddha_board_name[] = { "Buddha", "Catweasel", "X-Surf" };
-
-    /*
-     *  Check and acknowledge the interrupt status
-     */
-
-static int buddha_test_irq(ide_hwif_t *hwif)
-{
-    unsigned char ch;
-
-    ch = z_readb(hwif->io_ports.irq_addr);
-    if (!(ch & 0x80))
-	    return 0;
-    return 1;
-}
-
-static void xsurf_clear_irq(ide_drive_t *drive)
-{
-    /*
-     * X-Surf needs 0 written to IRQ register to ensure ISA bit A11 stays at 0
-     */
-    z_writeb(0, drive->hwif->io_ports.irq_addr);
-}
-
-static void __init buddha_setup_ports(struct ide_hw *hw, unsigned long base,
-				      unsigned long ctl, unsigned long irq_port)
-{
-	int i;
-
-	memset(hw, 0, sizeof(*hw));
-
-	hw->io_ports.data_addr = base;
-
-	for (i = 1; i < 8; i++)
-		hw->io_ports_array[i] = base + 2 + i * 4;
-
-	hw->io_ports.ctl_addr = ctl;
-	hw->io_ports.irq_addr = irq_port;
-
-	hw->irq = IRQ_AMIGA_PORTS;
-}
-
-static const struct ide_port_ops buddha_port_ops = {
-	.test_irq		= buddha_test_irq,
-};
-
-static const struct ide_port_ops xsurf_port_ops = {
-	.clear_irq		= xsurf_clear_irq,
-	.test_irq		= buddha_test_irq,
-};
-
-static const struct ide_port_info buddha_port_info = {
-	.port_ops		= &buddha_port_ops,
-	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
-};
-
-    /*
-     *  Probe for a Buddha or Catweasel IDE interface
-     */
-
-static int __init buddha_init(void)
-{
-	struct zorro_dev *z = NULL;
-	u_long buddha_board = 0;
-	BuddhaType type;
-	int buddha_num_hwifs, i;
-
-	while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
-		unsigned long board;
-		struct ide_hw hw[MAX_NUM_HWIFS], *hws[MAX_NUM_HWIFS];
-		struct ide_port_info d = buddha_port_info;
-
-		if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_BUDDHA) {
-			buddha_num_hwifs = BUDDHA_NUM_HWIFS;
-			type=BOARD_BUDDHA;
-		} else if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_CATWEASEL) {
-			buddha_num_hwifs = CATWEASEL_NUM_HWIFS;
-			type=BOARD_CATWEASEL;
-		} else if (z->id == ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF) {
-			buddha_num_hwifs = XSURF_NUM_HWIFS;
-			type=BOARD_XSURF;
-			d.port_ops = &xsurf_port_ops;
-		} else 
-			continue;
-		
-		board = z->resource.start;
-
-		if(type != BOARD_XSURF) {
-			if (!request_mem_region(board+BUDDHA_BASE1, 0x800, "IDE"))
-				continue;
-		} else {
-			if (!request_mem_region(board+XSURF_BASE1, 0x1000, "IDE"))
-				continue;
-			if (!request_mem_region(board+XSURF_BASE2, 0x1000, "IDE"))
-				goto fail_base2;
-			if (!request_mem_region(board+XSURF_IRQ1, 0x8, "IDE")) {
-				release_mem_region(board+XSURF_BASE2, 0x1000);
-fail_base2:
-				release_mem_region(board+XSURF_BASE1, 0x1000);
-				continue;
-			}
-		}	  
-		buddha_board = (unsigned long)ZTWO_VADDR(board);
-		
-		/* write to BUDDHA_IRQ_MR to enable the board IRQ */
-		/* X-Surf doesn't have this.  IRQs are always on */
-		if (type != BOARD_XSURF)
-			z_writeb(0, buddha_board+BUDDHA_IRQ_MR);
-
-		printk(KERN_INFO "ide: %s IDE controller\n",
-				 buddha_board_name[type]);
-
-		for (i = 0; i < buddha_num_hwifs; i++) {
-			unsigned long base, ctl, irq_port;
-
-			if (type != BOARD_XSURF) {
-				base = buddha_board + buddha_bases[i];
-				ctl = base + BUDDHA_CONTROL;
-				irq_port = buddha_board + buddha_irqports[i];
-			} else {
-				base = buddha_board + xsurf_bases[i];
-				/* X-Surf has no CS1* (Control/AltStat) */
-				ctl = 0;
-				irq_port = buddha_board + xsurf_irqports[i];
-			}
-
-			buddha_setup_ports(&hw[i], base, ctl, irq_port);
-
-			hws[i] = &hw[i];
-		}
-
-		ide_host_add(&d, hws, i, NULL);
-	}
-
-	return 0;
-}
-
-module_init(buddha_init);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/cmd640.c b/drivers/ide/cmd640.c
deleted file mode 100644
index f48decb9fac4d..0000000000000
--- a/drivers/ide/cmd640.c
+++ /dev/null
@@ -1,848 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1995-1996  Linus Torvalds & authors (see below)
- */
-
-/*
- *  Original authors:	abramov@cecmow.enet.dec.com (Igor Abramov)
- *			mlord@pobox.com (Mark Lord)
- *
- *  See linux/MAINTAINERS for address of current maintainer.
- *
- *  This file provides support for the advanced features and bugs
- *  of IDE interfaces using the CMD Technologies 0640 IDE interface chip.
- *
- *  These chips are basically fucked by design, and getting this driver
- *  to work on every motherboard design that uses this screwed chip seems
- *  bloody well impossible.  However, we're still trying.
- *
- *  Version 0.97 worked for everybody.
- *
- *  User feedback is essential.  Many thanks to the beta test team:
- *
- *  A.Hartgers@stud.tue.nl, JZDQC@CUNYVM.CUNY.edu, abramov@cecmow.enet.dec.com,
- *  bardj@utopia.ppp.sn.no, bart@gaga.tue.nl, bbol001@cs.auckland.ac.nz,
- *  chrisc@dbass.demon.co.uk, dalecki@namu26.Num.Math.Uni-Goettingen.de,
- *  derekn@vw.ece.cmu.edu, florian@btp2x3.phy.uni-bayreuth.de,
- *  flynn@dei.unipd.it, gadio@netvision.net.il, godzilla@futuris.net,
- *  j@pobox.com, jkemp1@mises.uni-paderborn.de, jtoppe@hiwaay.net,
- *  kerouac@ssnet.com, meskes@informatik.rwth-aachen.de, hzoli@cs.elte.hu,
- *  peter@udgaard.isgtec.com, phil@tazenda.demon.co.uk, roadcapw@cfw.com,
- *  s0033las@sun10.vsz.bme.hu, schaffer@tam.cornell.edu, sjd@slip.net,
- *  steve@ei.org, ulrpeg@bigcomm.gun.de, ism@tardis.ed.ac.uk, mack@cray.com
- *  liug@mama.indstate.edu, and others.
- *
- *  Version 0.01	Initial version, hacked out of ide.c,
- *			and #include'd rather than compiled separately.
- *			This will get cleaned up in a subsequent release.
- *
- *  Version 0.02	Fixes for vlb initialization code, enable prefetch
- *			for versions 'B' and 'C' of chip by default,
- *			some code cleanup.
- *
- *  Version 0.03	Added reset of secondary interface,
- *			and black list for devices which are not compatible
- *			with prefetch mode. Separate function for setting
- *			prefetch is added, possibly it will be called some
- *			day from ioctl processing code.
- *
- *  Version 0.04	Now configs/compiles separate from ide.c
- *
- *  Version 0.05	Major rewrite of interface timing code.
- *			Added new function cmd640_set_mode to set PIO mode
- *			from ioctl call. New drives added to black list.
- *
- *  Version 0.06	More code cleanup. Prefetch is enabled only for
- *			detected hard drives, not included in prefetch
- *			black list.
- *
- *  Version 0.07	Changed to more conservative drive tuning policy.
- *			Unknown drives, which report PIO < 4 are set to
- *			(reported_PIO - 1) if it is supported, or to PIO0.
- *			List of known drives extended by info provided by
- *			CMD at their ftp site.
- *
- *  Version 0.08	Added autotune/noautotune support.
- *
- *  Version 0.09	Try to be smarter about 2nd port enabling.
- *  Version 0.10	Be nice and don't reset 2nd port.
- *  Version 0.11	Try to handle more weird situations.
- *
- *  Version 0.12	Lots of bug fixes from Laszlo Peter
- *			irq unmasking disabled for reliability.
- *			try to be even smarter about the second port.
- *			tidy up source code formatting.
- *  Version 0.13	permit irq unmasking again.
- *  Version 0.90	massive code cleanup, some bugs fixed.
- *			defaults all drives to PIO mode0, prefetch off.
- *			autotune is OFF by default, with compile time flag.
- *			prefetch can be turned OFF/ON using "hdparm -p8/-p9"
- *			 (requires hdparm-3.1 or newer)
- *  Version 0.91	first release to linux-kernel list.
- *  Version 0.92	move initial reg dump to separate callable function
- *			change "readahead" to "prefetch" to avoid confusion
- *  Version 0.95	respect original BIOS timings unless autotuning.
- *			tons of code cleanup and rearrangement.
- *			added CONFIG_BLK_DEV_CMD640_ENHANCED option
- *			prevent use of unmask when prefetch is on
- *  Version 0.96	prevent use of io_32bit when prefetch is off
- *  Version 0.97	fix VLB secondary interface for sjd@slip.net
- *			other minor tune-ups:  0.96 was very good.
- *  Version 0.98	ignore PCI version when disabled by BIOS
- *  Version 0.99	display setup/active/recovery clocks with PIO mode
- *  Version 1.00	Mmm.. cannot depend on PCMD_ENA in all systems
- *  Version 1.01	slow/fast devsel can be selected with "hdparm -p6/-p7"
- *			 ("fast" is necessary for 32bit I/O in some systems)
- *  Version 1.02	fix bug that resulted in slow "setup times"
- *			 (patch courtesy of Zoltan Hidvegi)
- */
-
-#define CMD640_PREFETCH_MASKS 1
-
-/*#define CMD640_DUMP_REGS */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <linux/module.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "cmd640"
-
-static bool cmd640_vlb;
-
-/*
- * CMD640 specific registers definition.
- */
-
-#define VID		0x00
-#define DID		0x02
-#define PCMD		0x04
-#define   PCMD_ENA	0x01
-#define PSTTS		0x06
-#define REVID		0x08
-#define PROGIF		0x09
-#define SUBCL		0x0a
-#define BASCL		0x0b
-#define BaseA0		0x10
-#define BaseA1		0x14
-#define BaseA2		0x18
-#define BaseA3		0x1c
-#define INTLINE		0x3c
-#define INPINE		0x3d
-
-#define	CFR		0x50
-#define   CFR_DEVREV		0x03
-#define   CFR_IDE01INTR		0x04
-#define	  CFR_DEVID		0x18
-#define	  CFR_AT_VESA_078h	0x20
-#define	  CFR_DSA1		0x40
-#define	  CFR_DSA0		0x80
-
-#define CNTRL		0x51
-#define	  CNTRL_DIS_RA0		0x40
-#define   CNTRL_DIS_RA1		0x80
-#define	  CNTRL_ENA_2ND		0x08
-
-#define	CMDTIM		0x52
-#define	ARTTIM0		0x53
-#define	DRWTIM0		0x54
-#define ARTTIM1 	0x55
-#define DRWTIM1		0x56
-#define ARTTIM23	0x57
-#define   ARTTIM23_DIS_RA2	0x04
-#define   ARTTIM23_DIS_RA3	0x08
-#define   ARTTIM23_IDE23INTR	0x10
-#define DRWTIM23	0x58
-#define BRST		0x59
-
-/*
- * Registers and masks for easy access by drive index:
- */
-static u8 prefetch_regs[4]  = {CNTRL, CNTRL, ARTTIM23, ARTTIM23};
-static u8 prefetch_masks[4] = {CNTRL_DIS_RA0, CNTRL_DIS_RA1, ARTTIM23_DIS_RA2, ARTTIM23_DIS_RA3};
-
-#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED
-
-static u8 arttim_regs[4] = {ARTTIM0, ARTTIM1, ARTTIM23, ARTTIM23};
-static u8 drwtim_regs[4] = {DRWTIM0, DRWTIM1, DRWTIM23, DRWTIM23};
-
-/*
- * Current cmd640 timing values for each drive.
- * The defaults for each are the slowest possible timings.
- */
-static u8 setup_counts[4]    = {4, 4, 4, 4};     /* Address setup count (in clocks) */
-static u8 active_counts[4]   = {16, 16, 16, 16}; /* Active count   (encoded) */
-static u8 recovery_counts[4] = {16, 16, 16, 16}; /* Recovery count (encoded) */
-
-#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
-
-static DEFINE_SPINLOCK(cmd640_lock);
-
-/*
- * Interface to access cmd640x registers
- */
-static unsigned int cmd640_key;
-static void (*__put_cmd640_reg)(u16 reg, u8 val);
-static u8 (*__get_cmd640_reg)(u16 reg);
-
-/*
- * This is read from the CFR reg, and is used in several places.
- */
-static unsigned int cmd640_chip_version;
-
-/*
- * The CMD640x chip does not support DWORD config write cycles, but some
- * of the BIOSes use them to implement the config services.
- * Therefore, we must use direct IO instead.
- */
-
-/* PCI method 1 access */
-
-static void put_cmd640_reg_pci1(u16 reg, u8 val)
-{
-	outl_p((reg & 0xfc) | cmd640_key, 0xcf8);
-	outb_p(val, (reg & 3) | 0xcfc);
-}
-
-static u8 get_cmd640_reg_pci1(u16 reg)
-{
-	outl_p((reg & 0xfc) | cmd640_key, 0xcf8);
-	return inb_p((reg & 3) | 0xcfc);
-}
-
-/* PCI method 2 access (from CMD datasheet) */
-
-static void put_cmd640_reg_pci2(u16 reg, u8 val)
-{
-	outb_p(0x10, 0xcf8);
-	outb_p(val, cmd640_key + reg);
-	outb_p(0, 0xcf8);
-}
-
-static u8 get_cmd640_reg_pci2(u16 reg)
-{
-	u8 b;
-
-	outb_p(0x10, 0xcf8);
-	b = inb_p(cmd640_key + reg);
-	outb_p(0, 0xcf8);
-	return b;
-}
-
-/* VLB access */
-
-static void put_cmd640_reg_vlb(u16 reg, u8 val)
-{
-	outb_p(reg, cmd640_key);
-	outb_p(val, cmd640_key + 4);
-}
-
-static u8 get_cmd640_reg_vlb(u16 reg)
-{
-	outb_p(reg, cmd640_key);
-	return inb_p(cmd640_key + 4);
-}
-
-static u8 get_cmd640_reg(u16 reg)
-{
-	unsigned long flags;
-	u8 b;
-
-	spin_lock_irqsave(&cmd640_lock, flags);
-	b = __get_cmd640_reg(reg);
-	spin_unlock_irqrestore(&cmd640_lock, flags);
-	return b;
-}
-
-static void put_cmd640_reg(u16 reg, u8 val)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cmd640_lock, flags);
-	__put_cmd640_reg(reg, val);
-	spin_unlock_irqrestore(&cmd640_lock, flags);
-}
-
-static int __init match_pci_cmd640_device(void)
-{
-	const u8 ven_dev[4] = {0x95, 0x10, 0x40, 0x06};
-	unsigned int i;
-	for (i = 0; i < 4; i++) {
-		if (get_cmd640_reg(i) != ven_dev[i])
-			return 0;
-	}
-#ifdef STUPIDLY_TRUST_BROKEN_PCMD_ENA_BIT
-	if ((get_cmd640_reg(PCMD) & PCMD_ENA) == 0) {
-		printk("ide: cmd640 on PCI disabled by BIOS\n");
-		return 0;
-	}
-#endif /* STUPIDLY_TRUST_BROKEN_PCMD_ENA_BIT */
-	return 1; /* success */
-}
-
-/*
- * Probe for CMD640x -- pci method 1
- */
-static int __init probe_for_cmd640_pci1(void)
-{
-	__get_cmd640_reg = get_cmd640_reg_pci1;
-	__put_cmd640_reg = put_cmd640_reg_pci1;
-	for (cmd640_key = 0x80000000;
-	     cmd640_key <= 0x8000f800;
-	     cmd640_key += 0x800) {
-		if (match_pci_cmd640_device())
-			return 1; /* success */
-	}
-	return 0;
-}
-
-/*
- * Probe for CMD640x -- pci method 2
- */
-static int __init probe_for_cmd640_pci2(void)
-{
-	__get_cmd640_reg = get_cmd640_reg_pci2;
-	__put_cmd640_reg = put_cmd640_reg_pci2;
-	for (cmd640_key = 0xc000; cmd640_key <= 0xcf00; cmd640_key += 0x100) {
-		if (match_pci_cmd640_device())
-			return 1; /* success */
-	}
-	return 0;
-}
-
-/*
- * Probe for CMD640x -- vlb
- */
-static int __init probe_for_cmd640_vlb(void)
-{
-	u8 b;
-
-	__get_cmd640_reg = get_cmd640_reg_vlb;
-	__put_cmd640_reg = put_cmd640_reg_vlb;
-	cmd640_key = 0x178;
-	b = get_cmd640_reg(CFR);
-	if (b == 0xff || b == 0x00 || (b & CFR_AT_VESA_078h)) {
-		cmd640_key = 0x78;
-		b = get_cmd640_reg(CFR);
-		if (b == 0xff || b == 0x00 || !(b & CFR_AT_VESA_078h))
-			return 0;
-	}
-	return 1; /* success */
-}
-
-/*
- *  Returns 1 if an IDE interface/drive exists at 0x170,
- *  Returns 0 otherwise.
- */
-static int __init secondary_port_responding(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cmd640_lock, flags);
-
-	outb_p(0x0a, 0x176);	/* select drive0 */
-	udelay(100);
-	if ((inb_p(0x176) & 0x1f) != 0x0a) {
-		outb_p(0x1a, 0x176); /* select drive1 */
-		udelay(100);
-		if ((inb_p(0x176) & 0x1f) != 0x1a) {
-			spin_unlock_irqrestore(&cmd640_lock, flags);
-			return 0; /* nothing responded */
-		}
-	}
-	spin_unlock_irqrestore(&cmd640_lock, flags);
-	return 1; /* success */
-}
-
-#ifdef CMD640_DUMP_REGS
-/*
- * Dump out all cmd640 registers.  May be called from ide.c
- */
-static void cmd640_dump_regs(void)
-{
-	unsigned int reg = cmd640_vlb ? 0x50 : 0x00;
-
-	/* Dump current state of chip registers */
-	printk("ide: cmd640 internal register dump:");
-	for (; reg <= 0x59; reg++) {
-		if (!(reg & 0x0f))
-			printk("\n%04x:", reg);
-		printk(" %02x", get_cmd640_reg(reg));
-	}
-	printk("\n");
-}
-#endif
-
-static void __set_prefetch_mode(ide_drive_t *drive, int mode)
-{
-	if (mode) {	/* want prefetch on? */
-#if CMD640_PREFETCH_MASKS
-		drive->dev_flags |= IDE_DFLAG_NO_UNMASK;
-		drive->dev_flags &= ~IDE_DFLAG_UNMASK;
-#endif
-		drive->dev_flags &= ~IDE_DFLAG_NO_IO_32BIT;
-	} else {
-		drive->dev_flags &= ~IDE_DFLAG_NO_UNMASK;
-		drive->dev_flags |= IDE_DFLAG_NO_IO_32BIT;
-		drive->io_32bit = 0;
-	}
-}
-
-#ifndef CONFIG_BLK_DEV_CMD640_ENHANCED
-/*
- * Check whether prefetch is on for a drive,
- * and initialize the unmask flags for safe operation.
- */
-static void __init check_prefetch(ide_drive_t *drive, unsigned int index)
-{
-	u8 b = get_cmd640_reg(prefetch_regs[index]);
-
-	__set_prefetch_mode(drive, (b & prefetch_masks[index]) ? 0 : 1);
-}
-#else
-
-/*
- * Sets prefetch mode for a drive.
- */
-static void set_prefetch_mode(ide_drive_t *drive, unsigned int index, int mode)
-{
-	unsigned long flags;
-	int reg = prefetch_regs[index];
-	u8 b;
-
-	spin_lock_irqsave(&cmd640_lock, flags);
-	b = __get_cmd640_reg(reg);
-	__set_prefetch_mode(drive, mode);
-	if (mode)
-		b &= ~prefetch_masks[index];	/* enable prefetch */
-	else
-		b |= prefetch_masks[index];	/* disable prefetch */
-	__put_cmd640_reg(reg, b);
-	spin_unlock_irqrestore(&cmd640_lock, flags);
-}
-
-/*
- * Dump out current drive clocks settings
- */
-static void display_clocks(unsigned int index)
-{
-	u8 active_count, recovery_count;
-
-	active_count = active_counts[index];
-	if (active_count == 1)
-		++active_count;
-	recovery_count = recovery_counts[index];
-	if (active_count > 3 && recovery_count == 1)
-		++recovery_count;
-	if (cmd640_chip_version > 1)
-		recovery_count += 1;  /* cmd640b uses (count + 1)*/
-	printk(", clocks=%d/%d/%d\n", setup_counts[index], active_count, recovery_count);
-}
-
-/*
- * Pack active and recovery counts into single byte representation
- * used by controller
- */
-static inline u8 pack_nibbles(u8 upper, u8 lower)
-{
-	return ((upper & 0x0f) << 4) | (lower & 0x0f);
-}
-
-/*
- * This routine writes the prepared setup/active/recovery counts
- * for a drive into the cmd640 chipset registers to active them.
- */
-static void program_drive_counts(ide_drive_t *drive, unsigned int index)
-{
-	unsigned long flags;
-	u8 setup_count    = setup_counts[index];
-	u8 active_count   = active_counts[index];
-	u8 recovery_count = recovery_counts[index];
-
-	/*
-	 * Set up address setup count and drive read/write timing registers.
-	 * Primary interface has individual count/timing registers for
-	 * each drive.  Secondary interface has one common set of registers,
-	 * so we merge the timings, using the slowest value for each timing.
-	 */
-	if (index > 1) {
-		ide_drive_t *peer = ide_get_pair_dev(drive);
-		unsigned int mate = index ^ 1;
-
-		if (peer) {
-			if (setup_count < setup_counts[mate])
-				setup_count = setup_counts[mate];
-			if (active_count < active_counts[mate])
-				active_count = active_counts[mate];
-			if (recovery_count < recovery_counts[mate])
-				recovery_count = recovery_counts[mate];
-		}
-	}
-
-	/*
-	 * Convert setup_count to internal chipset representation
-	 */
-	switch (setup_count) {
-	case 4:	 setup_count = 0x00; break;
-	case 3:	 setup_count = 0x80; break;
-	case 1:
-	case 2:	 setup_count = 0x40; break;
-	default: setup_count = 0xc0; /* case 5 */
-	}
-
-	/*
-	 * Now that everything is ready, program the new timings
-	 */
-	spin_lock_irqsave(&cmd640_lock, flags);
-	/*
-	 * Program the address_setup clocks into ARTTIM reg,
-	 * and then the active/recovery counts into the DRWTIM reg
-	 * (this converts counts of 16 into counts of zero -- okay).
-	 */
-	setup_count |= __get_cmd640_reg(arttim_regs[index]) & 0x3f;
-	__put_cmd640_reg(arttim_regs[index], setup_count);
-	__put_cmd640_reg(drwtim_regs[index], pack_nibbles(active_count, recovery_count));
-	spin_unlock_irqrestore(&cmd640_lock, flags);
-}
-
-/*
- * Set a specific pio_mode for a drive
- */
-static void cmd640_set_mode(ide_drive_t *drive, unsigned int index,
-			    u8 pio_mode, unsigned int cycle_time)
-{
-	struct ide_timing *t;
-	int setup_time, active_time, recovery_time, clock_time;
-	u8 setup_count, active_count, recovery_count, recovery_count2, cycle_count;
-	int bus_speed;
-
-	if (cmd640_vlb)
-		bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
-	else
-		bus_speed = ide_pci_clk ? ide_pci_clk : 33;
-
-	if (pio_mode > 5)
-		pio_mode = 5;
-
-	t = ide_timing_find_mode(XFER_PIO_0 + pio_mode);
-	setup_time  = t->setup;
-	active_time = t->active;
-
-	recovery_time = cycle_time - (setup_time + active_time);
-	clock_time = 1000 / bus_speed;
-	cycle_count = DIV_ROUND_UP(cycle_time, clock_time);
-
-	setup_count = DIV_ROUND_UP(setup_time, clock_time);
-
-	active_count = DIV_ROUND_UP(active_time, clock_time);
-	if (active_count < 2)
-		active_count = 2; /* minimum allowed by cmd640 */
-
-	recovery_count = DIV_ROUND_UP(recovery_time, clock_time);
-	recovery_count2 = cycle_count - (setup_count + active_count);
-	if (recovery_count2 > recovery_count)
-		recovery_count = recovery_count2;
-	if (recovery_count < 2)
-		recovery_count = 2; /* minimum allowed by cmd640 */
-	if (recovery_count > 17) {
-		active_count += recovery_count - 17;
-		recovery_count = 17;
-	}
-	if (active_count > 16)
-		active_count = 16; /* maximum allowed by cmd640 */
-	if (cmd640_chip_version > 1)
-		recovery_count -= 1;  /* cmd640b uses (count + 1)*/
-	if (recovery_count > 16)
-		recovery_count = 16; /* maximum allowed by cmd640 */
-
-	setup_counts[index]    = setup_count;
-	active_counts[index]   = active_count;
-	recovery_counts[index] = recovery_count;
-
-	/*
-	 * In a perfect world, we might set the drive pio mode here
-	 * (using WIN_SETFEATURE) before continuing.
-	 *
-	 * But we do not, because:
-	 *	1) this is the wrong place to do it (proper is do_special() in ide.c)
-	 * 	2) in practice this is rarely, if ever, necessary
-	 */
-	program_drive_counts(drive, index);
-}
-
-static void cmd640_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	unsigned int index = 0, cycle_time;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-	u8 b;
-
-	switch (pio) {
-	case 6: /* set fast-devsel off */
-	case 7: /* set fast-devsel on */
-		b = get_cmd640_reg(CNTRL) & ~0x27;
-		if (pio & 1)
-			b |= 0x27;
-		put_cmd640_reg(CNTRL, b);
-		printk("%s: %sabled cmd640 fast host timing (devsel)\n",
-			drive->name, (pio & 1) ? "en" : "dis");
-		return;
-	case 8: /* set prefetch off */
-	case 9: /* set prefetch on */
-		set_prefetch_mode(drive, index, pio & 1);
-		printk("%s: %sabled cmd640 prefetch\n",
-			drive->name, (pio & 1) ? "en" : "dis");
-		return;
-	}
-
-	cycle_time = ide_pio_cycle_time(drive, pio);
-	cmd640_set_mode(drive, index, pio, cycle_time);
-
-	printk("%s: selected cmd640 PIO mode%d (%dns)",
-		drive->name, pio, cycle_time);
-
-	display_clocks(index);
-}
-#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
-
-static void __init cmd640_init_dev(ide_drive_t *drive)
-{
-	unsigned int i = drive->hwif->channel * 2 + (drive->dn & 1);
-
-#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED
-	/*
-	 * Reset timing to the slowest speed and turn off prefetch.
-	 * This way, the drive identify code has a better chance.
-	 */
-	setup_counts[i]    =  4;	/* max possible */
-	active_counts[i]   = 16;	/* max possible */
-	recovery_counts[i] = 16;	/* max possible */
-	program_drive_counts(drive, i);
-	set_prefetch_mode(drive, i, 0);
-	printk(KERN_INFO DRV_NAME ": drive%d timings/prefetch cleared\n", i);
-#else
-	/*
-	 * Set the drive unmask flags to match the prefetch setting.
-	 */
-	check_prefetch(drive, i);
-	printk(KERN_INFO DRV_NAME ": drive%d timings/prefetch(%s) preserved\n",
-		i, (drive->dev_flags & IDE_DFLAG_NO_IO_32BIT) ? "off" : "on");
-#endif /* CONFIG_BLK_DEV_CMD640_ENHANCED */
-}
-
-static int cmd640_test_irq(ide_hwif_t *hwif)
-{
-	int irq_reg		= hwif->channel ? ARTTIM23 : CFR;
-	u8  irq_mask		= hwif->channel ? ARTTIM23_IDE23INTR :
-						  CFR_IDE01INTR;
-	u8  irq_stat		= get_cmd640_reg(irq_reg);
-
-	return (irq_stat & irq_mask) ? 1 : 0;
-}
-
-static const struct ide_port_ops cmd640_port_ops = {
-	.init_dev		= cmd640_init_dev,
-#ifdef CONFIG_BLK_DEV_CMD640_ENHANCED
-	.set_pio_mode		= cmd640_set_pio_mode,
-#endif
-	.test_irq		= cmd640_test_irq,
-};
-
-static int pci_conf1(void)
-{
-	unsigned long flags;
-	u32 tmp;
-
-	spin_lock_irqsave(&cmd640_lock, flags);
-	outb(0x01, 0xCFB);
-	tmp = inl(0xCF8);
-	outl(0x80000000, 0xCF8);
-	if (inl(0xCF8) == 0x80000000) {
-		outl(tmp, 0xCF8);
-		spin_unlock_irqrestore(&cmd640_lock, flags);
-		return 1;
-	}
-	outl(tmp, 0xCF8);
-	spin_unlock_irqrestore(&cmd640_lock, flags);
-	return 0;
-}
-
-static int pci_conf2(void)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cmd640_lock, flags);
-	outb(0x00, 0xCFB);
-	outb(0x00, 0xCF8);
-	outb(0x00, 0xCFA);
-	if (inb(0xCF8) == 0x00 && inb(0xCF8) == 0x00) {
-		spin_unlock_irqrestore(&cmd640_lock, flags);
-		return 1;
-	}
-	spin_unlock_irqrestore(&cmd640_lock, flags);
-	return 0;
-}
-
-static const struct ide_port_info cmd640_port_info __initconst = {
-	.chipset		= ide_cmd640,
-	.host_flags		= IDE_HFLAG_SERIALIZE |
-				  IDE_HFLAG_NO_DMA |
-				  IDE_HFLAG_ABUSE_PREFETCH |
-				  IDE_HFLAG_ABUSE_FAST_DEVSEL,
-	.port_ops		= &cmd640_port_ops,
-	.pio_mask		= ATA_PIO5,
-};
-
-static int __init cmd640x_init_one(unsigned long base, unsigned long ctl)
-{
-	if (!request_region(base, 8, DRV_NAME)) {
-		printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
-				DRV_NAME, base, base + 7);
-		return -EBUSY;
-	}
-
-	if (!request_region(ctl, 1, DRV_NAME)) {
-		printk(KERN_ERR "%s: I/O resource 0x%lX not free.\n",
-				DRV_NAME, ctl);
-		release_region(base, 8);
-		return -EBUSY;
-	}
-
-	return 0;
-}
-
-/*
- * Probe for a cmd640 chipset, and initialize it if found.
- */
-static int __init cmd640x_init(void)
-{
-	int second_port_cmd640 = 0, rc;
-	const char *bus_type, *port2;
-	u8 b, cfr;
-	struct ide_hw hw[2], *hws[2];
-
-	if (cmd640_vlb && probe_for_cmd640_vlb()) {
-		bus_type = "VLB";
-	} else {
-		cmd640_vlb = 0;
-		/* Find out what kind of PCI probing is supported otherwise
-		   Justin Gibbs will sulk.. */
-		if (pci_conf1() && probe_for_cmd640_pci1())
-			bus_type = "PCI (type1)";
-		else if (pci_conf2() && probe_for_cmd640_pci2())
-			bus_type = "PCI (type2)";
-		else
-			return 0;
-	}
-	/*
-	 * Undocumented magic (there is no 0x5b reg in specs)
-	 */
-	put_cmd640_reg(0x5b, 0xbd);
-	if (get_cmd640_reg(0x5b) != 0xbd) {
-		printk(KERN_ERR "ide: cmd640 init failed: wrong value in reg 0x5b\n");
-		return 0;
-	}
-	put_cmd640_reg(0x5b, 0);
-
-#ifdef CMD640_DUMP_REGS
-	cmd640_dump_regs();
-#endif
-
-	/*
-	 * Documented magic begins here
-	 */
-	cfr = get_cmd640_reg(CFR);
-	cmd640_chip_version = cfr & CFR_DEVREV;
-	if (cmd640_chip_version == 0) {
-		printk("ide: bad cmd640 revision: %d\n", cmd640_chip_version);
-		return 0;
-	}
-
-	rc = cmd640x_init_one(0x1f0, 0x3f6);
-	if (rc)
-		return rc;
-
-	rc = cmd640x_init_one(0x170, 0x376);
-	if (rc) {
-		release_region(0x3f6, 1);
-		release_region(0x1f0, 8);
-		return rc;
-	}
-
-	memset(&hw, 0, sizeof(hw));
-
-	ide_std_init_ports(&hw[0], 0x1f0, 0x3f6);
-	hw[0].irq = 14;
-
-	ide_std_init_ports(&hw[1], 0x170, 0x376);
-	hw[1].irq = 15;
-
-	printk(KERN_INFO "cmd640: buggy cmd640%c interface on %s, config=0x%02x"
-			 "\n", 'a' + cmd640_chip_version - 1, bus_type, cfr);
-
-	/*
-	 * Initialize data for primary port
-	 */
-	hws[0] = &hw[0];
-
-	/*
-	 * Ensure compatibility by always using the slowest timings
-	 * for access to the drive's command register block,
-	 * and reset the prefetch burstsize to default (512 bytes).
-	 *
-	 * Maybe we need a way to NOT do these on *some* systems?
-	 */
-	put_cmd640_reg(CMDTIM, 0);
-	put_cmd640_reg(BRST, 0x40);
-
-	b = get_cmd640_reg(CNTRL);
-
-	/*
-	 * Try to enable the secondary interface, if not already enabled
-	 */
-	if (secondary_port_responding()) {
-		if ((b & CNTRL_ENA_2ND)) {
-			second_port_cmd640 = 1;
-			port2 = "okay";
-		} else if (cmd640_vlb) {
-			second_port_cmd640 = 1;
-			port2 = "alive";
-		} else
-			port2 = "not cmd640";
-	} else {
-		put_cmd640_reg(CNTRL, b ^ CNTRL_ENA_2ND); /* toggle the bit */
-		if (secondary_port_responding()) {
-			second_port_cmd640 = 1;
-			port2 = "enabled";
-		} else {
-			put_cmd640_reg(CNTRL, b); /* restore original setting */
-			port2 = "not responding";
-		}
-	}
-
-	/*
-	 * Initialize data for secondary cmd640 port, if enabled
-	 */
-	if (second_port_cmd640)
-		hws[1] = &hw[1];
-
-	printk(KERN_INFO "cmd640: %sserialized, secondary interface %s\n",
-			 second_port_cmd640 ? "" : "not ", port2);
-
-#ifdef CMD640_DUMP_REGS
-	cmd640_dump_regs();
-#endif
-
-	return ide_host_add(&cmd640_port_info, hws, second_port_cmd640 ? 2 : 1,
-			    NULL);
-}
-
-module_param_named(probe_vlb, cmd640_vlb, bool, 0);
-MODULE_PARM_DESC(probe_vlb, "probe for VLB version of CMD640 chipset");
-
-module_init(cmd640x_init);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c
deleted file mode 100644
index 943bf944bf722..0000000000000
--- a/drivers/ide/cmd64x.c
+++ /dev/null
@@ -1,452 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * cmd64x.c: Enable interrupts at initialization time on Ultra/PCI machines.
- *           Due to massive hardware bugs, UltraDMA is only supported
- *           on the 646U2 and not on the 646U.
- *
- * Copyright (C) 1998		Eddie C. Dost  (ecd@skynet.be)
- * Copyright (C) 1998		David S. Miller (davem@redhat.com)
- *
- * Copyright (C) 1999-2002	Andre Hedrick <andre@linux-ide.org>
- * Copyright (C) 2007-2010	Bartlomiej Zolnierkiewicz
- * Copyright (C) 2007,2009	MontaVista Software, Inc. <source@mvista.com>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "cmd64x"
-
-/*
- * CMD64x specific registers definition.
- */
-#define CFR		0x50
-#define   CFR_INTR_CH0		0x04
-
-#define	CMDTIM		0x52
-#define	ARTTIM0		0x53
-#define	DRWTIM0		0x54
-#define ARTTIM1 	0x55
-#define DRWTIM1		0x56
-#define ARTTIM23	0x57
-#define   ARTTIM23_DIS_RA2	0x04
-#define   ARTTIM23_DIS_RA3	0x08
-#define   ARTTIM23_INTR_CH1	0x10
-#define DRWTIM2		0x58
-#define BRST		0x59
-#define DRWTIM3		0x5b
-
-#define BMIDECR0	0x70
-#define MRDMODE		0x71
-#define   MRDMODE_INTR_CH0	0x04
-#define   MRDMODE_INTR_CH1	0x08
-#define UDIDETCR0	0x73
-#define DTPR0		0x74
-#define BMIDECR1	0x78
-#define BMIDECSR	0x79
-#define UDIDETCR1	0x7B
-#define DTPR1		0x7C
-
-static void cmd64x_program_timings(ide_drive_t *drive, u8 mode)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
-	const unsigned long T = 1000000 / bus_speed;
-	static const u8 recovery_values[] =
-		{15, 15, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0};
-	static const u8 setup_values[] = {0x40, 0x40, 0x40, 0x80, 0, 0xc0};
-	static const u8 arttim_regs[4] = {ARTTIM0, ARTTIM1, ARTTIM23, ARTTIM23};
-	static const u8 drwtim_regs[4] = {DRWTIM0, DRWTIM1, DRWTIM2, DRWTIM3};
-	struct ide_timing t;
-	u8 arttim = 0;
-
-	if (drive->dn >= ARRAY_SIZE(drwtim_regs))
-		return;
-
-	ide_timing_compute(drive, mode, &t, T, 0);
-
-	/*
-	 * In case we've got too long recovery phase, try to lengthen
-	 * the active phase
-	 */
-	if (t.recover > 16) {
-		t.active += t.recover - 16;
-		t.recover = 16;
-	}
-	if (t.active > 16)		/* shouldn't actually happen... */
-		t.active = 16;
-
-	/*
-	 * Convert values to internal chipset representation
-	 */
-	t.recover = recovery_values[t.recover];
-	t.active &= 0x0f;
-
-	/* Program the active/recovery counts into the DRWTIM register */
-	pci_write_config_byte(dev, drwtim_regs[drive->dn],
-			      (t.active << 4) | t.recover);
-
-	/*
-	 * The primary channel has individual address setup timing registers
-	 * for each drive and the hardware selects the slowest timing itself.
-	 * The secondary channel has one common register and we have to select
-	 * the slowest address setup timing ourselves.
-	 */
-	if (hwif->channel) {
-		ide_drive_t *pair = ide_get_pair_dev(drive);
-
-		if (pair) {
-			struct ide_timing tp;
-
-			ide_timing_compute(pair, pair->pio_mode, &tp, T, 0);
-			ide_timing_merge(&t, &tp, &t, IDE_TIMING_SETUP);
-			if (pair->dma_mode) {
-				ide_timing_compute(pair, pair->dma_mode,
-						&tp, T, 0);
-				ide_timing_merge(&tp, &t, &t, IDE_TIMING_SETUP);
-			}
-		}
-	}
-
-	if (t.setup > 5)		/* shouldn't actually happen... */
-		t.setup = 5;
-
-	/*
-	 * Program the address setup clocks into the ARTTIM registers.
-	 * Avoid clearing the secondary channel's interrupt bit.
-	 */
-	(void) pci_read_config_byte (dev, arttim_regs[drive->dn], &arttim);
-	if (hwif->channel)
-		arttim &= ~ARTTIM23_INTR_CH1;
-	arttim &= ~0xc0;
-	arttim |= setup_values[t.setup];
-	(void) pci_write_config_byte(dev, arttim_regs[drive->dn], arttim);
-}
-
-/*
- * Attempts to set drive's PIO mode.
- * Special cases are 8: prefetch off, 9: prefetch on (both never worked)
- */
-
-static void cmd64x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	/*
-	 * Filter out the prefetch control values
-	 * to prevent PIO5 from being programmed
-	 */
-	if (pio == 8 || pio == 9)
-		return;
-
-	cmd64x_program_timings(drive, XFER_PIO_0 + pio);
-}
-
-static void cmd64x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u8 unit			= drive->dn & 0x01;
-	u8 regU = 0, pciU	= hwif->channel ? UDIDETCR1 : UDIDETCR0;
-	const u8 speed		= drive->dma_mode;
-
-	pci_read_config_byte(dev, pciU, &regU);
-	regU &= ~(unit ? 0xCA : 0x35);
-
-	switch(speed) {
-	case XFER_UDMA_5:
-		regU |= unit ? 0x0A : 0x05;
-		break;
-	case XFER_UDMA_4:
-		regU |= unit ? 0x4A : 0x15;
-		break;
-	case XFER_UDMA_3:
-		regU |= unit ? 0x8A : 0x25;
-		break;
-	case XFER_UDMA_2:
-		regU |= unit ? 0x42 : 0x11;
-		break;
-	case XFER_UDMA_1:
-		regU |= unit ? 0x82 : 0x21;
-		break;
-	case XFER_UDMA_0:
-		regU |= unit ? 0xC2 : 0x31;
-		break;
-	case XFER_MW_DMA_2:
-	case XFER_MW_DMA_1:
-	case XFER_MW_DMA_0:
-		cmd64x_program_timings(drive, speed);
-		break;
-	}
-
-	pci_write_config_byte(dev, pciU, regU);
-}
-
-static void cmd648_clear_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned long base	= pci_resource_start(dev, 4);
-	u8  irq_mask		= hwif->channel ? MRDMODE_INTR_CH1 :
-						  MRDMODE_INTR_CH0;
-	u8  mrdmode		= inb(base + 1);
-
-	/* clear the interrupt bit */
-	outb((mrdmode & ~(MRDMODE_INTR_CH0 | MRDMODE_INTR_CH1)) | irq_mask,
-	     base + 1);
-}
-
-static void cmd64x_clear_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	int irq_reg		= hwif->channel ? ARTTIM23 : CFR;
-	u8  irq_mask		= hwif->channel ? ARTTIM23_INTR_CH1 :
-						  CFR_INTR_CH0;
-	u8  irq_stat		= 0;
-
-	(void) pci_read_config_byte(dev, irq_reg, &irq_stat);
-	/* clear the interrupt bit */
-	(void) pci_write_config_byte(dev, irq_reg, irq_stat | irq_mask);
-}
-
-static int cmd648_test_irq(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned long base	= pci_resource_start(dev, 4);
-	u8 irq_mask		= hwif->channel ? MRDMODE_INTR_CH1 :
-						  MRDMODE_INTR_CH0;
-	u8 mrdmode		= inb(base + 1);
-
-	pr_debug("%s: mrdmode: 0x%02x irq_mask: 0x%02x\n",
-		 hwif->name, mrdmode, irq_mask);
-
-	return (mrdmode & irq_mask) ? 1 : 0;
-}
-
-static int cmd64x_test_irq(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	int irq_reg		= hwif->channel ? ARTTIM23 : CFR;
-	u8  irq_mask		= hwif->channel ? ARTTIM23_INTR_CH1 :
-						  CFR_INTR_CH0;
-	u8  irq_stat		= 0;
-
-	(void) pci_read_config_byte(dev, irq_reg, &irq_stat);
-
-	pr_debug("%s: irq_stat: 0x%02x irq_mask: 0x%02x\n",
-		 hwif->name, irq_stat, irq_mask);
-
-	return (irq_stat & irq_mask) ? 1 : 0;
-}
-
-/*
- * ASUS P55T2P4D with CMD646 chipset revision 0x01 requires the old
- * event order for DMA transfers.
- */
-
-static int cmd646_1_dma_end(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat = 0, dma_cmd = 0;
-
-	/* get DMA status */
-	dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS);
-	/* read DMA command state */
-	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
-	/* stop DMA */
-	outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD);
-	/* clear the INTR & ERROR bits */
-	outb(dma_stat | 6, hwif->dma_base + ATA_DMA_STATUS);
-	/* verify good DMA status */
-	return (dma_stat & 7) != 4;
-}
-
-static int init_chipset_cmd64x(struct pci_dev *dev)
-{
-	u8 mrdmode = 0;
-
-	/* Set a good latency timer and cache line size value. */
-	(void) pci_write_config_byte(dev, PCI_LATENCY_TIMER, 64);
-	/* FIXME: pci_set_master() to ensure a good latency timer value */
-
-	/*
-	 * Enable interrupts, select MEMORY READ LINE for reads.
-	 *
-	 * NOTE: although not mentioned in the PCI0646U specs,
-	 * bits 0-1 are write only and won't be read back as
-	 * set or not -- PCI0646U2 specs clarify this point.
-	 */
-	(void) pci_read_config_byte (dev, MRDMODE, &mrdmode);
-	mrdmode &= ~0x30;
-	(void) pci_write_config_byte(dev, MRDMODE, (mrdmode | 0x02));
-
-	return 0;
-}
-
-static u8 cmd64x_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev  *dev	= to_pci_dev(hwif->dev);
-	u8 bmidecsr = 0, mask	= hwif->channel ? 0x02 : 0x01;
-
-	switch (dev->device) {
-	case PCI_DEVICE_ID_CMD_648:
-	case PCI_DEVICE_ID_CMD_649:
- 		pci_read_config_byte(dev, BMIDECSR, &bmidecsr);
-		return (bmidecsr & mask) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
-	default:
-		return ATA_CBL_PATA40;
-	}
-}
-
-static const struct ide_port_ops cmd64x_port_ops = {
-	.set_pio_mode		= cmd64x_set_pio_mode,
-	.set_dma_mode		= cmd64x_set_dma_mode,
-	.clear_irq		= cmd64x_clear_irq,
-	.test_irq		= cmd64x_test_irq,
-	.cable_detect		= cmd64x_cable_detect,
-};
-
-static const struct ide_port_ops cmd648_port_ops = {
-	.set_pio_mode		= cmd64x_set_pio_mode,
-	.set_dma_mode		= cmd64x_set_dma_mode,
-	.clear_irq		= cmd648_clear_irq,
-	.test_irq		= cmd648_test_irq,
-	.cable_detect		= cmd64x_cable_detect,
-};
-
-static const struct ide_dma_ops cmd646_rev1_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= cmd646_1_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-static const struct ide_port_info cmd64x_chipsets[] = {
-	{	/* 0: CMD643 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_cmd64x,
-		.enablebits	= {{0x00,0x00,0x00}, {0x51,0x08,0x08}},
-		.port_ops	= &cmd64x_port_ops,
-		.host_flags	= IDE_HFLAG_CLEAR_SIMPLEX |
-				  IDE_HFLAG_ABUSE_PREFETCH |
-				  IDE_HFLAG_SERIALIZE,
-		.pio_mask	= ATA_PIO5,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= 0x00, /* no udma */
-	},
-	{	/* 1: CMD646 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_cmd64x,
-		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
-		.port_ops	= &cmd648_port_ops,
-		.host_flags	= IDE_HFLAG_ABUSE_PREFETCH |
-				  IDE_HFLAG_SERIALIZE,
-		.pio_mask	= ATA_PIO5,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA2,
-	},
-	{	/* 2: CMD648 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_cmd64x,
-		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
-		.port_ops	= &cmd648_port_ops,
-		.host_flags	= IDE_HFLAG_ABUSE_PREFETCH,
-		.pio_mask	= ATA_PIO5,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA4,
-	},
-	{	/* 3: CMD649 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_cmd64x,
-		.enablebits	= {{0x51,0x04,0x04}, {0x51,0x08,0x08}},
-		.port_ops	= &cmd648_port_ops,
-		.host_flags	= IDE_HFLAG_ABUSE_PREFETCH,
-		.pio_mask	= ATA_PIO5,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA5,
-	}
-};
-
-static int cmd64x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct ide_port_info d;
-	u8 idx = id->driver_data;
-
-	d = cmd64x_chipsets[idx];
-
-	if (idx == 1) {
-		/*
-		 * UltraDMA only supported on PCI646U and PCI646U2, which
-		 * correspond to revisions 0x03, 0x05 and 0x07 respectively.
-		 * Actually, although the CMD tech support people won't
-		 * tell me the details, the 0x03 revision cannot support
-		 * UDMA correctly without hardware modifications, and even
-		 * then it only works with Quantum disks due to some
-		 * hold time assumptions in the 646U part which are fixed
-		 * in the 646U2.
-		 *
-		 * So we only do UltraDMA on revision 0x05 and 0x07 chipsets.
-		 */
-		if (dev->revision < 5) {
-			d.udma_mask = 0x00;
-			/*
-			 * The original PCI0646 didn't have the primary
-			 * channel enable bit, it appeared starting with
-			 * PCI0646U (i.e. revision ID 3).
-			 */
-			if (dev->revision < 3) {
-				d.enablebits[0].reg = 0;
-				d.port_ops = &cmd64x_port_ops;
-				if (dev->revision == 1)
-					d.dma_ops = &cmd646_rev1_dma_ops;
-			}
-		}
-	}
-
-	return ide_pci_init_one(dev, &d, NULL);
-}
-
-static const struct pci_device_id cmd64x_pci_tbl[] = {
-	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_CMD_643), 0 },
-	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_CMD_646), 1 },
-	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_CMD_648), 2 },
-	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_CMD_649), 3 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, cmd64x_pci_tbl);
-
-static struct pci_driver cmd64x_pci_driver = {
-	.name		= "CMD64x_IDE",
-	.id_table	= cmd64x_pci_tbl,
-	.probe		= cmd64x_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init cmd64x_ide_init(void)
-{
-	return ide_pci_register_driver(&cmd64x_pci_driver);
-}
-
-static void __exit cmd64x_ide_exit(void)
-{
-	pci_unregister_driver(&cmd64x_pci_driver);
-}
-
-module_init(cmd64x_ide_init);
-module_exit(cmd64x_ide_exit);
-
-MODULE_AUTHOR("Eddie Dost, David Miller, Andre Hedrick, Bartlomiej Zolnierkiewicz");
-MODULE_DESCRIPTION("PCI driver module for CMD64x IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/cs5520.c b/drivers/ide/cs5520.c
deleted file mode 100644
index 89a4ff100b7a5..0000000000000
--- a/drivers/ide/cs5520.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- *	IDE tuning and bus mastering support for the CS5510/CS5520
- *	chipsets
- *
- *	The CS5510/CS5520 are slightly unusual devices. Unlike the 
- *	typical IDE controllers they do bus mastering with the drive in
- *	PIO mode and smarter silicon.
- *
- *	The practical upshot of this is that we must always tune the
- *	drive for the right PIO mode. We must also ignore all the blacklists
- *	and the drive bus mastering DMA information.
- *
- *	*** This driver is strictly experimental ***
- *
- *	(c) Copyright Red Hat Inc 2002
- * 
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * For the avoidance of doubt the "preferred form" of this code is one which
- * is in an open non patent encumbered format. Where cryptographic key signing
- * forms part of the process of creating an executable the information
- * including keys needed to generate an equivalently functional executable
- * are deemed to be part of the source code.
- *
- */
- 
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/dma-mapping.h>
-
-#define DRV_NAME "cs5520"
-
-struct pio_clocks
-{
-	int address;
-	int assert;
-	int recovery;
-};
-
-static struct pio_clocks cs5520_pio_clocks[]={
-	{3, 6, 11},
-	{2, 5, 6},
-	{1, 4, 3},
-	{1, 3, 2},
-	{1, 2, 1}
-};
-
-static void cs5520_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	int controller = drive->dn > 1 ? 1 : 0;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	/* 8bit CAT/CRT - 8bit command timing for channel */
-	pci_write_config_byte(pdev, 0x62 + controller, 
-		(cs5520_pio_clocks[pio].recovery << 4) |
-		(cs5520_pio_clocks[pio].assert));
-
-	/* 0x64 - 16bit Primary, 0x68 - 16bit Secondary */
-
-	/* FIXME: should these use address ? */
-	/* Data read timing */
-	pci_write_config_byte(pdev, 0x64 + 4*controller + (drive->dn&1),
-		(cs5520_pio_clocks[pio].recovery << 4) |
-		(cs5520_pio_clocks[pio].assert));
-	/* Write command timing */
-	pci_write_config_byte(pdev, 0x66 + 4*controller + (drive->dn&1),
-		(cs5520_pio_clocks[pio].recovery << 4) |
-		(cs5520_pio_clocks[pio].assert));
-}
-
-static void cs5520_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	printk(KERN_ERR "cs55x0: bad ide timing.\n");
-
-	drive->pio_mode = XFER_PIO_0 + 0;
-	cs5520_set_pio_mode(hwif, drive);
-}
-
-static const struct ide_port_ops cs5520_port_ops = {
-	.set_pio_mode		= cs5520_set_pio_mode,
-	.set_dma_mode		= cs5520_set_dma_mode,
-};
-
-static const struct ide_port_info cyrix_chipset = {
-	.name		= DRV_NAME,
-	.enablebits	= { { 0x60, 0x01, 0x01 }, { 0x60, 0x02, 0x02 } },
-	.port_ops	= &cs5520_port_ops,
-	.host_flags	= IDE_HFLAG_ISA_PORTS | IDE_HFLAG_CS5520,
-	.pio_mask	= ATA_PIO4,
-};
-
-/*
- *	The 5510/5520 are a bit weird. They don't quite set up the way
- *	the PCI helper layer expects so we must do much of the set up 
- *	work longhand.
- */
- 
-static int cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	const struct ide_port_info *d = &cyrix_chipset;
-	struct ide_hw hw[2], *hws[] = { NULL, NULL };
-
-	ide_setup_pci_noise(dev, d);
-
-	/* We must not grab the entire device, it has 'ISA' space in its
-	 * BARS too and we will freak out other bits of the kernel
-	 */
-	if (pci_enable_device_io(dev)) {
-		printk(KERN_WARNING "%s: Unable to enable 55x0.\n", d->name);
-		return -ENODEV;
-	}
-	pci_set_master(dev);
-	if (dma_set_mask(&dev->dev, DMA_BIT_MASK(32))) {
-		printk(KERN_WARNING "%s: No suitable DMA available.\n",
-			d->name);
-		return -ENODEV;
-	}
-
-	/*
-	 *	Now the chipset is configured we can let the core
-	 *	do all the device setup for us
-	 */
-
-	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
-	hw[0].irq = 14;
-	hw[1].irq = 15;
-
-	return ide_host_add(d, hws, 2, NULL);
-}
-
-static const struct pci_device_id cs5520_pci_tbl[] = {
-	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), 0 },
-	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), 1 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, cs5520_pci_tbl);
-
-static struct pci_driver cs5520_pci_driver = {
-	.name		= "Cyrix_IDE",
-	.id_table	= cs5520_pci_tbl,
-	.probe		= cs5520_init_one,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init cs5520_ide_init(void)
-{
-	return ide_pci_register_driver(&cs5520_pci_driver);
-}
-
-module_init(cs5520_ide_init);
-
-MODULE_AUTHOR("Alan Cox");
-MODULE_DESCRIPTION("PCI driver module for Cyrix 5510/5520 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/cs5530.c b/drivers/ide/cs5530.c
deleted file mode 100644
index 65371599b9767..0000000000000
--- a/drivers/ide/cs5530.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (C) 2000			Andre Hedrick <andre@linux-ide.org>
- * Copyright (C) 2000			Mark Lord <mlord@pobox.com>
- * Copyright (C) 2007			Bartlomiej Zolnierkiewicz
- *
- * May be copied or modified under the terms of the GNU General Public License
- *
- * Development of this chipset driver was funded
- * by the nice folks at National Semiconductor.
- *
- * Documentation:
- *	CS5530 documentation available from National Semiconductor.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "cs5530"
-
-/*
- * Here are the standard PIO mode 0-4 timings for each "format".
- * Format-0 uses fast data reg timings, with slower command reg timings.
- * Format-1 uses fast timings for all registers, but won't work with all drives.
- */
-static unsigned int cs5530_pio_timings[2][5] = {
-	{0x00009172, 0x00012171, 0x00020080, 0x00032010, 0x00040010},
-	{0xd1329172, 0x71212171, 0x30200080, 0x20102010, 0x00100010}
-};
-
-/*
- * After chip reset, the PIO timings are set to 0x0000e132, which is not valid.
- */
-#define CS5530_BAD_PIO(timings) (((timings)&~0x80000000)==0x0000e132)
-#define CS5530_BASEREG(hwif)	(((hwif)->dma_base & ~0xf) + ((hwif)->channel ? 0x30 : 0x20))
-
-/**
- *	cs5530_set_pio_mode	-	set host controller for PIO mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Handles setting of PIO mode for the chipset.
- *
- *	The init_hwif_cs5530() routine guarantees that all drives
- *	will have valid default PIO timings set up before we get here.
- */
-
-static void cs5530_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	unsigned long basereg = CS5530_BASEREG(hwif);
-	unsigned int format = (inl(basereg + 4) >> 31) & 1;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	outl(cs5530_pio_timings[format][pio], basereg + ((drive->dn & 1)<<3));
-}
-
-/**
- *	cs5530_udma_filter	-	UDMA filter
- *	@drive: drive
- *
- *	cs5530_udma_filter() does UDMA mask filtering for the given drive
- *	taking into the consideration capabilities of the mate device.
- *
- *	The CS5530 specifies that two drives sharing a cable cannot mix
- *	UDMA/MDMA.  It has to be one or the other, for the pair, though
- *	different timings can still be chosen for each drive.  We could
- *	set the appropriate timing bits on the fly, but that might be
- *	a bit confusing.  So, for now we statically handle this requirement
- *	by looking at our mate drive to see what it is capable of, before
- *	choosing a mode for our own drive.
- *
- *	Note: This relies on the fact we never fail from UDMA to MWDMA2
- *	but instead drop to PIO.
- */
-
-static u8 cs5530_udma_filter(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	ide_drive_t *mate = ide_get_pair_dev(drive);
-	u16 *mateid;
-	u8 mask = hwif->ultra_mask;
-
-	if (mate == NULL)
-		goto out;
-	mateid = mate->id;
-
-	if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) {
-		if ((mateid[ATA_ID_FIELD_VALID] & 4) &&
-		    (mateid[ATA_ID_UDMA_MODES] & 7))
-			goto out;
-		if (mateid[ATA_ID_MWDMA_MODES] & 7)
-			mask = 0;
-	}
-out:
-	return mask;
-}
-
-static void cs5530_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	unsigned long basereg;
-	unsigned int reg, timings = 0;
-
-	switch (drive->dma_mode) {
-		case XFER_UDMA_0:	timings = 0x00921250; break;
-		case XFER_UDMA_1:	timings = 0x00911140; break;
-		case XFER_UDMA_2:	timings = 0x00911030; break;
-		case XFER_MW_DMA_0:	timings = 0x00077771; break;
-		case XFER_MW_DMA_1:	timings = 0x00012121; break;
-		case XFER_MW_DMA_2:	timings = 0x00002020; break;
-	}
-	basereg = CS5530_BASEREG(hwif);
-	reg = inl(basereg + 4);			/* get drive0 config register */
-	timings |= reg & 0x80000000;		/* preserve PIO format bit */
-	if ((drive-> dn & 1) == 0) {		/* are we configuring drive0? */
-		outl(timings, basereg + 4);	/* write drive0 config register */
-	} else {
-		if (timings & 0x00100000)
-			reg |=  0x00100000;	/* enable UDMA timings for both drives */
-		else
-			reg &= ~0x00100000;	/* disable UDMA timings for both drives */
-		outl(reg, basereg + 4);		/* write drive0 config register */
-		outl(timings, basereg + 12);	/* write drive1 config register */
-	}
-}
-
-/**
- *	init_chipset_5530	-	set up 5530 bridge
- *	@dev: PCI device
- *
- *	Initialize the cs5530 bridge for reliable IDE DMA operation.
- */
-
-static int init_chipset_cs5530(struct pci_dev *dev)
-{
-	struct pci_dev *master_0 = NULL, *cs5530_0 = NULL;
-
-	if (pci_resource_start(dev, 4) == 0)
-		return -EFAULT;
-
-	dev = NULL;
-	while ((dev = pci_get_device(PCI_VENDOR_ID_CYRIX, PCI_ANY_ID, dev)) != NULL) {
-		switch (dev->device) {
-			case PCI_DEVICE_ID_CYRIX_PCI_MASTER:
-				master_0 = pci_dev_get(dev);
-				break;
-			case PCI_DEVICE_ID_CYRIX_5530_LEGACY:
-				cs5530_0 = pci_dev_get(dev);
-				break;
-		}
-	}
-	if (!master_0) {
-		printk(KERN_ERR DRV_NAME ": unable to locate PCI MASTER function\n");
-		goto out;
-	}
-	if (!cs5530_0) {
-		printk(KERN_ERR DRV_NAME ": unable to locate CS5530 LEGACY function\n");
-		goto out;
-	}
-
-	/*
-	 * Enable BusMaster and MemoryWriteAndInvalidate for the cs5530:
-	 * -->  OR 0x14 into 16-bit PCI COMMAND reg of function 0 of the cs5530
-	 */
-
-	pci_set_master(cs5530_0);
-	pci_try_set_mwi(cs5530_0);
-
-	/*
-	 * Set PCI CacheLineSize to 16-bytes:
-	 * --> Write 0x04 into 8-bit PCI CACHELINESIZE reg of function 0 of the cs5530
-	 */
-
-	pci_write_config_byte(cs5530_0, PCI_CACHE_LINE_SIZE, 0x04);
-
-	/*
-	 * Disable trapping of UDMA register accesses (Win98 hack):
-	 * --> Write 0x5006 into 16-bit reg at offset 0xd0 of function 0 of the cs5530
-	 */
-
-	pci_write_config_word(cs5530_0, 0xd0, 0x5006);
-
-	/*
-	 * Bit-1 at 0x40 enables MemoryWriteAndInvalidate on internal X-bus:
-	 * The other settings are what is necessary to get the register
-	 * into a sane state for IDE DMA operation.
-	 */
-
-	pci_write_config_byte(master_0, 0x40, 0x1e);
-
-	/* 
-	 * Set max PCI burst size (16-bytes seems to work best):
-	 *	   16bytes: set bit-1 at 0x41 (reg value of 0x16)
-	 *	all others: clear bit-1 at 0x41, and do:
-	 *	  128bytes: OR 0x00 at 0x41
-	 *	  256bytes: OR 0x04 at 0x41
-	 *	  512bytes: OR 0x08 at 0x41
-	 *	 1024bytes: OR 0x0c at 0x41
-	 */
-
-	pci_write_config_byte(master_0, 0x41, 0x14);
-
-	/*
-	 * These settings are necessary to get the chip
-	 * into a sane state for IDE DMA operation.
-	 */
-
-	pci_write_config_byte(master_0, 0x42, 0x00);
-	pci_write_config_byte(master_0, 0x43, 0xc1);
-
-out:
-	pci_dev_put(master_0);
-	pci_dev_put(cs5530_0);
-	return 0;
-}
-
-/**
- *	init_hwif_cs5530	-	initialise an IDE channel
- *	@hwif: IDE to initialize
- *
- *	This gets invoked by the IDE driver once for each channel. It
- *	performs channel-specific pre-initialization before drive probing.
- */
-
-static void init_hwif_cs5530 (ide_hwif_t *hwif)
-{
-	unsigned long basereg;
-	u32 d0_timings;
-
-	basereg = CS5530_BASEREG(hwif);
-	d0_timings = inl(basereg + 0);
-	if (CS5530_BAD_PIO(d0_timings))
-		outl(cs5530_pio_timings[(d0_timings >> 31) & 1][0], basereg + 0);
-	if (CS5530_BAD_PIO(inl(basereg + 8)))
-		outl(cs5530_pio_timings[(d0_timings >> 31) & 1][0], basereg + 8);
-}
-
-static const struct ide_port_ops cs5530_port_ops = {
-	.set_pio_mode		= cs5530_set_pio_mode,
-	.set_dma_mode		= cs5530_set_dma_mode,
-	.udma_filter		= cs5530_udma_filter,
-};
-
-static const struct ide_port_info cs5530_chipset = {
-	.name		= DRV_NAME,
-	.init_chipset	= init_chipset_cs5530,
-	.init_hwif	= init_hwif_cs5530,
-	.port_ops	= &cs5530_port_ops,
-	.host_flags	= IDE_HFLAG_SERIALIZE |
-			  IDE_HFLAG_POST_SET_MODE,
-	.pio_mask	= ATA_PIO4,
-	.mwdma_mask	= ATA_MWDMA2,
-	.udma_mask	= ATA_UDMA2,
-};
-
-static int cs5530_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &cs5530_chipset, NULL);
-}
-
-static const struct pci_device_id cs5530_pci_tbl[] = {
-	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5530_IDE), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, cs5530_pci_tbl);
-
-static struct pci_driver cs5530_pci_driver = {
-	.name		= "CS5530 IDE",
-	.id_table	= cs5530_pci_tbl,
-	.probe		= cs5530_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init cs5530_ide_init(void)
-{
-	return ide_pci_register_driver(&cs5530_pci_driver);
-}
-
-static void __exit cs5530_ide_exit(void)
-{
-	pci_unregister_driver(&cs5530_pci_driver);
-}
-
-module_init(cs5530_ide_init);
-module_exit(cs5530_ide_exit);
-
-MODULE_AUTHOR("Mark Lord");
-MODULE_DESCRIPTION("PCI driver module for Cyrix/NS 5530 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/cs5535.c b/drivers/ide/cs5535.c
deleted file mode 100644
index 70fdbe3161f8f..0000000000000
--- a/drivers/ide/cs5535.c
+++ /dev/null
@@ -1,216 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2004-2005 Advanced Micro Devices, Inc.
- * Copyright (C)      2007 Bartlomiej Zolnierkiewicz
- *
- * History:
- * 09/20/2005 - Jaya Kumar <jayakumar.ide@gmail.com>
- * - Reworked tuneproc, set_drive, misc mods to prep for mainline
- * - Work was sponsored by CIS (M) Sdn Bhd.
- * Ported to Kernel 2.6.11 on June 26, 2005 by
- *   Wolfgang Zuleger <wolfgang.zuleger@gmx.de>
- *   Alexander Kiausch <alex.kiausch@t-online.de>
- * Originally developed by AMD for 2.4/2.6
- *
- * Development of this chipset driver was funded
- * by the nice folks at National Semiconductor/AMD.
- *
- * Documentation:
- *  CS5535 documentation available from AMD
- */
-
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-
-#define DRV_NAME "cs5535"
-
-#define MSR_ATAC_BASE		0x51300000
-#define ATAC_GLD_MSR_CAP	(MSR_ATAC_BASE+0)
-#define ATAC_GLD_MSR_CONFIG	(MSR_ATAC_BASE+0x01)
-#define ATAC_GLD_MSR_SMI	(MSR_ATAC_BASE+0x02)
-#define ATAC_GLD_MSR_ERROR	(MSR_ATAC_BASE+0x03)
-#define ATAC_GLD_MSR_PM		(MSR_ATAC_BASE+0x04)
-#define ATAC_GLD_MSR_DIAG	(MSR_ATAC_BASE+0x05)
-#define ATAC_IO_BAR		(MSR_ATAC_BASE+0x08)
-#define ATAC_RESET		(MSR_ATAC_BASE+0x10)
-#define ATAC_CH0D0_PIO		(MSR_ATAC_BASE+0x20)
-#define ATAC_CH0D0_DMA		(MSR_ATAC_BASE+0x21)
-#define ATAC_CH0D1_PIO		(MSR_ATAC_BASE+0x22)
-#define ATAC_CH0D1_DMA		(MSR_ATAC_BASE+0x23)
-#define ATAC_PCI_ABRTERR	(MSR_ATAC_BASE+0x24)
-#define ATAC_BM0_CMD_PRIM	0x00
-#define ATAC_BM0_STS_PRIM	0x02
-#define ATAC_BM0_PRD		0x04
-#define CS5535_CABLE_DETECT	0x48
-
-/* Format I PIO settings. We separate out cmd and data for safer timings */
-
-static unsigned int cs5535_pio_cmd_timings[5] =
-{ 0xF7F4, 0x53F3, 0x13F1, 0x5131, 0x1131 };
-static unsigned int cs5535_pio_dta_timings[5] =
-{ 0xF7F4, 0xF173, 0x8141, 0x5131, 0x1131 };
-
-static unsigned int cs5535_mwdma_timings[3] =
-{ 0x7F0FFFF3, 0x7F035352, 0x7f024241 };
-
-static unsigned int cs5535_udma_timings[5] =
-{ 0x7F7436A1, 0x7F733481, 0x7F723261, 0x7F713161, 0x7F703061 };
-
-/* Macros to check if the register is the reset value -  reset value is an
-   invalid timing and indicates the register has not been set previously */
-
-#define CS5535_BAD_PIO(timings) ( (timings&~0x80000000UL) == 0x00009172 )
-#define CS5535_BAD_DMA(timings) ( (timings & 0x000FFFFF) == 0x00077771 )
-
-/****
- *	cs5535_set_speed         -     Configure the chipset to the new speed
- *	@drive: Drive to set up
- *	@speed: desired speed
- *
- *	cs5535_set_speed() configures the chipset to a new speed.
- */
-static void cs5535_set_speed(ide_drive_t *drive, const u8 speed)
-{
-	u32 reg = 0, dummy;
-	u8 unit = drive->dn & 1;
-
-	/* Set the PIO timings */
-	if (speed < XFER_SW_DMA_0) {
-		ide_drive_t *pair = ide_get_pair_dev(drive);
-		u8 cmd, pioa;
-
-		cmd = pioa = speed - XFER_PIO_0;
-
-		if (pair) {
-			u8 piob = pair->pio_mode - XFER_PIO_0;
-
-			if (piob < cmd)
-				cmd = piob;
-		}
-
-		/* Write the speed of the current drive */
-		reg = (cs5535_pio_cmd_timings[cmd] << 16) |
-			cs5535_pio_dta_timings[pioa];
-		wrmsr(unit ? ATAC_CH0D1_PIO : ATAC_CH0D0_PIO, reg, 0);
-
-		/* And if nessesary - change the speed of the other drive */
-		rdmsr(unit ?  ATAC_CH0D0_PIO : ATAC_CH0D1_PIO, reg, dummy);
-
-		if (((reg >> 16) & cs5535_pio_cmd_timings[cmd]) !=
-			cs5535_pio_cmd_timings[cmd]) {
-			reg &= 0x0000FFFF;
-			reg |= cs5535_pio_cmd_timings[cmd] << 16;
-			wrmsr(unit ? ATAC_CH0D0_PIO : ATAC_CH0D1_PIO, reg, 0);
-		}
-
-		/* Set bit 31 of the DMA register for PIO format 1 timings */
-		rdmsr(unit ?  ATAC_CH0D1_DMA : ATAC_CH0D0_DMA, reg, dummy);
-		wrmsr(unit ? ATAC_CH0D1_DMA : ATAC_CH0D0_DMA,
-					reg | 0x80000000UL, 0);
-	} else {
-		rdmsr(unit ? ATAC_CH0D1_DMA : ATAC_CH0D0_DMA, reg, dummy);
-
-		reg &= 0x80000000UL;  /* Preserve the PIO format bit */
-
-		if (speed >= XFER_UDMA_0 && speed <= XFER_UDMA_4)
-			reg |= cs5535_udma_timings[speed - XFER_UDMA_0];
-		else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
-			reg |= cs5535_mwdma_timings[speed - XFER_MW_DMA_0];
-		else
-			return;
-
-		wrmsr(unit ? ATAC_CH0D1_DMA : ATAC_CH0D0_DMA, reg, 0);
-	}
-}
-
-/**
- *	cs5535_set_dma_mode	-	set host controller for DMA mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Programs the chipset for DMA mode.
- */
-
-static void cs5535_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	cs5535_set_speed(drive, drive->dma_mode);
-}
-
-/**
- *	cs5535_set_pio_mode	-	set host controller for PIO mode
- *	@hwif: port
- *	@drive: drive
- *
- *	A callback from the upper layers for PIO-only tuning.
- */
-
-static void cs5535_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	cs5535_set_speed(drive, drive->pio_mode);
-}
-
-static u8 cs5535_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	u8 bit;
-
-	/* if a 80 wire cable was detected */
-	pci_read_config_byte(dev, CS5535_CABLE_DETECT, &bit);
-
-	return (bit & 1) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
-}
-
-static const struct ide_port_ops cs5535_port_ops = {
-	.set_pio_mode		= cs5535_set_pio_mode,
-	.set_dma_mode		= cs5535_set_dma_mode,
-	.cable_detect		= cs5535_cable_detect,
-};
-
-static const struct ide_port_info cs5535_chipset = {
-	.name		= DRV_NAME,
-	.port_ops	= &cs5535_port_ops,
-	.host_flags	= IDE_HFLAG_SINGLE | IDE_HFLAG_POST_SET_MODE,
-	.pio_mask	= ATA_PIO4,
-	.mwdma_mask	= ATA_MWDMA2,
-	.udma_mask	= ATA_UDMA4,
-};
-
-static int cs5535_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &cs5535_chipset, NULL);
-}
-
-static const struct pci_device_id cs5535_pci_tbl[] = {
-	{ PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_CS5535_IDE), 0 },
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5535_IDE), },
-	{ 0, },
-};
-
-MODULE_DEVICE_TABLE(pci, cs5535_pci_tbl);
-
-static struct pci_driver cs5535_pci_driver = {
-	.name		= "CS5535_IDE",
-	.id_table	= cs5535_pci_tbl,
-	.probe		= cs5535_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init cs5535_ide_init(void)
-{
-	return ide_pci_register_driver(&cs5535_pci_driver);
-}
-
-static void __exit cs5535_ide_exit(void)
-{
-	pci_unregister_driver(&cs5535_pci_driver);
-}
-
-module_init(cs5535_ide_init);
-module_exit(cs5535_ide_exit);
-
-MODULE_AUTHOR("AMD");
-MODULE_DESCRIPTION("PCI driver module for AMD/NS CS5535 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/cs5536.c b/drivers/ide/cs5536.c
deleted file mode 100644
index 8b5ca145191b5..0000000000000
--- a/drivers/ide/cs5536.c
+++ /dev/null
@@ -1,294 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * CS5536 PATA support
- * (C) 2007 Martin K. Petersen <mkp@mkp.net>
- * (C) 2009 Bartlomiej Zolnierkiewicz
- *
- * Documentation:
- *	Available from AMD web site.
- *
- * The IDE timing registers for the CS5536 live in the Geode Machine
- * Specific Register file and not PCI config space.  Most BIOSes
- * virtualize the PCI registers so the chip looks like a standard IDE
- * controller.  Unfortunately not all implementations get this right.
- * In particular some have problems with unaligned accesses to the
- * virtualized PCI registers.  This driver always does full dword
- * writes to work around the issue.  Also, in case of a bad BIOS this
- * driver can be loaded with the "msr=1" parameter which forces using
- * the Machine Specific Registers to configure the device.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-#include <asm/msr.h>
-
-#define DRV_NAME	"cs5536"
-
-enum {
-	MSR_IDE_CFG		= 0x51300010,
-	PCI_IDE_CFG		= 0x40,
-
-	CFG			= 0,
-	DTC			= 2,
-	CAST			= 3,
-	ETC			= 4,
-
-	IDE_CFG_CHANEN		= (1 << 1),
-	IDE_CFG_CABLE		= (1 << 17) | (1 << 16),
-
-	IDE_D0_SHIFT		= 24,
-	IDE_D1_SHIFT		= 16,
-	IDE_DRV_MASK		= 0xff,
-
-	IDE_CAST_D0_SHIFT	= 6,
-	IDE_CAST_D1_SHIFT	= 4,
-	IDE_CAST_DRV_MASK	= 0x3,
-
-	IDE_CAST_CMD_SHIFT	= 24,
-	IDE_CAST_CMD_MASK	= 0xff,
-
-	IDE_ETC_UDMA_MASK	= 0xc0,
-};
-
-static int use_msr;
-
-static int cs5536_read(struct pci_dev *pdev, int reg, u32 *val)
-{
-	if (unlikely(use_msr)) {
-		u32 dummy;
-
-		rdmsr(MSR_IDE_CFG + reg, *val, dummy);
-		return 0;
-	}
-
-	return pci_read_config_dword(pdev, PCI_IDE_CFG + reg * 4, val);
-}
-
-static int cs5536_write(struct pci_dev *pdev, int reg, int val)
-{
-	if (unlikely(use_msr)) {
-		wrmsr(MSR_IDE_CFG + reg, val, 0);
-		return 0;
-	}
-
-	return pci_write_config_dword(pdev, PCI_IDE_CFG + reg * 4, val);
-}
-
-static void cs5536_program_dtc(ide_drive_t *drive, u8 tim)
-{
-	struct pci_dev *pdev = to_pci_dev(drive->hwif->dev);
-	int dshift = (drive->dn & 1) ? IDE_D1_SHIFT : IDE_D0_SHIFT;
-	u32 dtc;
-
-	cs5536_read(pdev, DTC, &dtc);
-	dtc &= ~(IDE_DRV_MASK << dshift);
-	dtc |= tim << dshift;
-	cs5536_write(pdev, DTC, dtc);
-}
-
-/**
- *	cs5536_cable_detect	-	detect cable type
- *	@hwif: Port to detect on
- *
- *	Perform cable detection for ATA66 capable cable.
- *
- *	Returns a cable type.
- */
-
-static u8 cs5536_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	u32 cfg;
-
-	cs5536_read(pdev, CFG, &cfg);
-
-	if (cfg & IDE_CFG_CABLE)
-		return ATA_CBL_PATA80;
-	else
-		return ATA_CBL_PATA40;
-}
-
-/**
- *	cs5536_set_pio_mode		-	PIO timing setup
- *	@hwif: ATA port
- *	@drive: ATA device
- */
-
-static void cs5536_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	static const u8 drv_timings[5] = {
-		0x98, 0x55, 0x32, 0x21, 0x20,
-	};
-
-	static const u8 addr_timings[5] = {
-		0x2, 0x1, 0x0, 0x0, 0x0,
-	};
-
-	static const u8 cmd_timings[5] = {
-		0x99, 0x92, 0x90, 0x22, 0x20,
-	};
-
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	ide_drive_t *pair = ide_get_pair_dev(drive);
-	int cshift = (drive->dn & 1) ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT;
-	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
-	u32 cast;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-	u8 cmd_pio = pio;
-
-	if (pair)
-		cmd_pio = min_t(u8, pio, pair->pio_mode - XFER_PIO_0);
-
-	timings &= (IDE_DRV_MASK << 8);
-	timings |= drv_timings[pio];
-	ide_set_drivedata(drive, (void *)timings);
-
-	cs5536_program_dtc(drive, drv_timings[pio]);
-
-	cs5536_read(pdev, CAST, &cast);
-
-	cast &= ~(IDE_CAST_DRV_MASK << cshift);
-	cast |= addr_timings[pio] << cshift;
-
-	cast &= ~(IDE_CAST_CMD_MASK << IDE_CAST_CMD_SHIFT);
-	cast |= cmd_timings[cmd_pio] << IDE_CAST_CMD_SHIFT;
-
-	cs5536_write(pdev, CAST, cast);
-}
-
-/**
- *	cs5536_set_dma_mode		-	DMA timing setup
- *	@hwif: ATA port
- *	@drive: ATA device
- */
-
-static void cs5536_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	static const u8 udma_timings[6] = {
-		0xc2, 0xc1, 0xc0, 0xc4, 0xc5, 0xc6,
-	};
-
-	static const u8 mwdma_timings[3] = {
-		0x67, 0x21, 0x20,
-	};
-
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	int dshift = (drive->dn & 1) ? IDE_D1_SHIFT : IDE_D0_SHIFT;
-	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
-	u32 etc;
-	const u8 mode = drive->dma_mode;
-
-	cs5536_read(pdev, ETC, &etc);
-
-	if (mode >= XFER_UDMA_0) {
-		etc &= ~(IDE_DRV_MASK << dshift);
-		etc |= udma_timings[mode - XFER_UDMA_0] << dshift;
-	} else { /* MWDMA */
-		etc &= ~(IDE_ETC_UDMA_MASK << dshift);
-		timings &= IDE_DRV_MASK;
-		timings |= mwdma_timings[mode - XFER_MW_DMA_0] << 8;
-		ide_set_drivedata(drive, (void *)timings);
-	}
-
-	cs5536_write(pdev, ETC, etc);
-}
-
-static void cs5536_dma_start(ide_drive_t *drive)
-{
-	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
-
-	if (drive->current_speed < XFER_UDMA_0 &&
-	    (timings >> 8) != (timings & IDE_DRV_MASK))
-		cs5536_program_dtc(drive, timings >> 8);
-
-	ide_dma_start(drive);
-}
-
-static int cs5536_dma_end(ide_drive_t *drive)
-{
-	int ret = ide_dma_end(drive);
-	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
-
-	if (drive->current_speed < XFER_UDMA_0 &&
-	    (timings >> 8) != (timings & IDE_DRV_MASK))
-		cs5536_program_dtc(drive, timings & IDE_DRV_MASK);
-
-	return ret;
-}
-
-static const struct ide_port_ops cs5536_port_ops = {
-	.set_pio_mode		= cs5536_set_pio_mode,
-	.set_dma_mode		= cs5536_set_dma_mode,
-	.cable_detect		= cs5536_cable_detect,
-};
-
-static const struct ide_dma_ops cs5536_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= cs5536_dma_start,
-	.dma_end		= cs5536_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-static const struct ide_port_info cs5536_info = {
-	.name		= DRV_NAME,
-	.port_ops	= &cs5536_port_ops,
-	.dma_ops	= &cs5536_dma_ops,
-	.host_flags	= IDE_HFLAG_SINGLE,
-	.pio_mask	= ATA_PIO4,
-	.mwdma_mask	= ATA_MWDMA2,
-	.udma_mask	= ATA_UDMA5,
-};
-
-/**
- *	cs5536_init_one
- *	@dev: PCI device
- *	@id: Entry in match table
- */
-
-static int cs5536_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	u32 cfg;
-
-	if (use_msr)
-		printk(KERN_INFO DRV_NAME ": Using MSR regs instead of PCI\n");
-
-	cs5536_read(dev, CFG, &cfg);
-
-	if ((cfg & IDE_CFG_CHANEN) == 0) {
-		printk(KERN_ERR DRV_NAME ": disabled by BIOS\n");
-		return -ENODEV;
-	}
-
-	return ide_pci_init_one(dev, &cs5536_info, NULL);
-}
-
-static const struct pci_device_id cs5536_pci_tbl[] = {
-	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5536_IDE), },
-	{ },
-};
-
-static struct pci_driver cs5536_pci_driver = {
-	.name		= DRV_NAME,
-	.id_table	= cs5536_pci_tbl,
-	.probe		= cs5536_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-module_pci_driver(cs5536_pci_driver);
-
-MODULE_AUTHOR("Martin K. Petersen, Bartlomiej Zolnierkiewicz");
-MODULE_DESCRIPTION("low-level driver for the CS5536 IDE controller");
-MODULE_LICENSE("GPL");
-MODULE_DEVICE_TABLE(pci, cs5536_pci_tbl);
-
-module_param_named(msr, use_msr, int, 0644);
-MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)");
diff --git a/drivers/ide/cy82c693.c b/drivers/ide/cy82c693.c
deleted file mode 100644
index bc01660ee8fd3..0000000000000
--- a/drivers/ide/cy82c693.c
+++ /dev/null
@@ -1,234 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1998-2000 Andreas S. Krebs (akrebs@altavista.net), Maintainer
- *  Copyright (C) 1998-2002 Andre Hedrick <andre@linux-ide.org>, Integrator
- *  Copyright (C) 2007-2011 Bartlomiej Zolnierkiewicz
- *
- * CYPRESS CY82C693 chipset IDE controller
- *
- * The CY82C693 chipset is used on Digital's PC-Alpha 164SX boards.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "cy82c693"
-
-/*
- *	NOTE: the value for busmaster timeout is tricky and I got it by
- *	trial and error!  By using a to low value will cause DMA timeouts
- *	and drop IDE performance, and by using a to high value will cause
- *	audio playback to scatter.
- *	If you know a better value or how to calc it, please let me know.
- */
-
-/* twice the value written in cy82c693ub datasheet */
-#define BUSMASTER_TIMEOUT	0x50
-/*
- * the value above was tested on my machine and it seems to work okay
- */
-
-/* here are the offset definitions for the registers */
-#define CY82_IDE_CMDREG		0x04
-#define CY82_IDE_ADDRSETUP	0x48
-#define CY82_IDE_MASTER_IOR	0x4C
-#define CY82_IDE_MASTER_IOW	0x4D
-#define CY82_IDE_SLAVE_IOR	0x4E
-#define CY82_IDE_SLAVE_IOW	0x4F
-#define CY82_IDE_MASTER_8BIT	0x50
-#define CY82_IDE_SLAVE_8BIT	0x51
-
-#define CY82_INDEX_PORT		0x22
-#define CY82_DATA_PORT		0x23
-
-#define CY82_INDEX_CHANNEL0	0x30
-#define CY82_INDEX_CHANNEL1	0x31
-#define CY82_INDEX_TIMEOUT	0x32
-
-/*
- * set DMA mode a specific channel for CY82C693
- */
-
-static void cy82c693_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	const u8 mode = drive->dma_mode;
-	u8 single = (mode & 0x10) >> 4, index = 0, data = 0;
-
-	index = hwif->channel ? CY82_INDEX_CHANNEL1 : CY82_INDEX_CHANNEL0;
-
-	data = (mode & 3) | (single << 2);
-
-	outb(index, CY82_INDEX_PORT);
-	outb(data, CY82_DATA_PORT);
-
-	/*
-	 * note: below we set the value for Bus Master IDE TimeOut Register
-	 * I'm not absolutely sure what this does, but it solved my problem
-	 * with IDE DMA and sound, so I now can play sound and work with
-	 * my IDE driver at the same time :-)
-	 *
-	 * If you know the correct (best) value for this register please
-	 * let me know - ASK
-	 */
-
-	data = BUSMASTER_TIMEOUT;
-	outb(CY82_INDEX_TIMEOUT, CY82_INDEX_PORT);
-	outb(data, CY82_DATA_PORT);
-}
-
-static void cy82c693_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	int bus_speed = ide_pci_clk ? ide_pci_clk : 33;
-	const unsigned long T = 1000000 / bus_speed;
-	unsigned int addrCtrl;
-	struct ide_timing t;
-	u8 time_16, time_8;
-
-	/* select primary or secondary channel */
-	if (drive->dn > 1) {  /* drive is on the secondary channel */
-		dev = pci_get_slot(dev->bus, dev->devfn+1);
-		if (!dev) {
-			printk(KERN_ERR "%s: tune_drive: "
-				"Cannot find secondary interface!\n",
-				drive->name);
-			return;
-		}
-	}
-
-	ide_timing_compute(drive, drive->pio_mode, &t, T, 1);
-
-	time_16 = clamp_val(t.recover - 1, 0, 15) |
-		  (clamp_val(t.active - 1, 0, 15) << 4);
-	time_8 = clamp_val(t.act8b - 1, 0, 15) |
-		 (clamp_val(t.rec8b - 1, 0, 15) << 4);
-
-	/* now let's write  the clocks registers */
-	if ((drive->dn & 1) == 0) {
-		/*
-		 * set master drive
-		 * address setup control register
-		 * is 32 bit !!!
-		 */
-		pci_read_config_dword(dev, CY82_IDE_ADDRSETUP, &addrCtrl);
-
-		addrCtrl &= (~0xF);
-		addrCtrl |= clamp_val(t.setup - 1, 0, 15);
-		pci_write_config_dword(dev, CY82_IDE_ADDRSETUP, addrCtrl);
-
-		/* now let's set the remaining registers */
-		pci_write_config_byte(dev, CY82_IDE_MASTER_IOR, time_16);
-		pci_write_config_byte(dev, CY82_IDE_MASTER_IOW, time_16);
-		pci_write_config_byte(dev, CY82_IDE_MASTER_8BIT, time_8);
-	} else {
-		/*
-		 * set slave drive
-		 * address setup control register
-		 * is 32 bit !!!
-		 */
-		pci_read_config_dword(dev, CY82_IDE_ADDRSETUP, &addrCtrl);
-
-		addrCtrl &= (~0xF0);
-		addrCtrl |= (clamp_val(t.setup - 1, 0, 15) << 4);
-		pci_write_config_dword(dev, CY82_IDE_ADDRSETUP, addrCtrl);
-
-		/* now let's set the remaining registers */
-		pci_write_config_byte(dev, CY82_IDE_SLAVE_IOR, time_16);
-		pci_write_config_byte(dev, CY82_IDE_SLAVE_IOW, time_16);
-		pci_write_config_byte(dev, CY82_IDE_SLAVE_8BIT, time_8);
-	}
-	if (drive->dn > 1)
-		pci_dev_put(dev);
-}
-
-static void init_iops_cy82c693(ide_hwif_t *hwif)
-{
-	static ide_hwif_t *primary;
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-
-	if (PCI_FUNC(dev->devfn) == 1)
-		primary = hwif;
-	else {
-		hwif->mate = primary;
-		hwif->channel = 1;
-	}
-}
-
-static const struct ide_port_ops cy82c693_port_ops = {
-	.set_pio_mode		= cy82c693_set_pio_mode,
-	.set_dma_mode		= cy82c693_set_dma_mode,
-};
-
-static const struct ide_port_info cy82c693_chipset = {
-	.name		= DRV_NAME,
-	.init_iops	= init_iops_cy82c693,
-	.port_ops	= &cy82c693_port_ops,
-	.host_flags	= IDE_HFLAG_SINGLE,
-	.pio_mask	= ATA_PIO4,
-	.swdma_mask	= ATA_SWDMA2,
-	.mwdma_mask	= ATA_MWDMA2,
-};
-
-static int cy82c693_init_one(struct pci_dev *dev,
-			     const struct pci_device_id *id)
-{
-	struct pci_dev *dev2;
-	int ret = -ENODEV;
-
-	/* CY82C693 is more than only a IDE controller.
-	   Function 1 is primary IDE channel, function 2 - secondary. */
-	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE &&
-	    PCI_FUNC(dev->devfn) == 1) {
-		dev2 = pci_get_slot(dev->bus, dev->devfn + 1);
-		ret = ide_pci_init_two(dev, dev2, &cy82c693_chipset, NULL);
-		if (ret)
-			pci_dev_put(dev2);
-	}
-	return ret;
-}
-
-static void cy82c693_remove(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct pci_dev *dev2 = host->dev[1] ? to_pci_dev(host->dev[1]) : NULL;
-
-	ide_pci_remove(dev);
-	pci_dev_put(dev2);
-}
-
-static const struct pci_device_id cy82c693_pci_tbl[] = {
-	{ PCI_VDEVICE(CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, cy82c693_pci_tbl);
-
-static struct pci_driver cy82c693_pci_driver = {
-	.name		= "Cypress_IDE",
-	.id_table	= cy82c693_pci_tbl,
-	.probe		= cy82c693_init_one,
-	.remove		= cy82c693_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init cy82c693_ide_init(void)
-{
-	return ide_pci_register_driver(&cy82c693_pci_driver);
-}
-
-static void __exit cy82c693_ide_exit(void)
-{
-	pci_unregister_driver(&cy82c693_pci_driver);
-}
-
-module_init(cy82c693_ide_init);
-module_exit(cy82c693_ide_exit);
-
-MODULE_AUTHOR("Andreas Krebs, Andre Hedrick, Bartlomiej Zolnierkiewicz");
-MODULE_DESCRIPTION("PCI driver module for the Cypress CY82C693 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/delkin_cb.c b/drivers/ide/delkin_cb.c
deleted file mode 100644
index 300daabaa5753..0000000000000
--- a/drivers/ide/delkin_cb.c
+++ /dev/null
@@ -1,181 +0,0 @@
-/*
- *  Created 20 Oct 2004 by Mark Lord
- *
- *  Basic support for Delkin/ASKA/Workbit Cardbus CompactFlash adapter
- *
- *  Modeled after the 16-bit PCMCIA driver: ide-cs.c
- *
- *  This is slightly peculiar, in that it is a PCI driver,
- *  but is NOT an IDE PCI driver -- the IDE layer does not directly
- *  support hot insertion/removal of PCI interfaces, so this driver
- *  is unable to use the IDE PCI interfaces.  Instead, it uses the
- *  same interfaces as the ide-cs (PCMCIA) driver uses.
- *  On the plus side, the driver is also smaller/simpler this way.
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of this archive for
- *  more details.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-
-#include <asm/io.h>
-
-/*
- * No chip documentation has yet been found,
- * so these configuration values were pulled from
- * a running Win98 system using "debug".
- * This gives around 3MByte/second read performance,
- * which is about 2/3 of what the chip is capable of.
- *
- * There is also a 4KByte mmio region on the card,
- * but its purpose has yet to be reverse-engineered.
- */
-static const u8 setup[] = {
-	0x00, 0x05, 0xbe, 0x01, 0x20, 0x8f, 0x00, 0x00,
-	0xa4, 0x1f, 0xb3, 0x1b, 0x00, 0x00, 0x00, 0x80,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0xa4, 0x83, 0x02, 0x13,
-};
-
-static const struct ide_port_ops delkin_cb_port_ops = {
-	.quirkproc		= ide_undecoded_slave,
-};
-
-static int delkin_cb_init_chipset(struct pci_dev *dev)
-{
-	unsigned long base = pci_resource_start(dev, 0);
-	int i;
-
-	outb(0x02, base + 0x1e);	/* set nIEN to block interrupts */
-	inb(base + 0x17);		/* read status to clear interrupts */
-
-	for (i = 0; i < sizeof(setup); ++i) {
-		if (setup[i])
-			outb(setup[i], base + i);
-	}
-
-	return 0;
-}
-
-static const struct ide_port_info delkin_cb_port_info = {
-	.port_ops		= &delkin_cb_port_ops,
-	.host_flags		= IDE_HFLAG_IO_32BIT | IDE_HFLAG_UNMASK_IRQS |
-				  IDE_HFLAG_NO_DMA,
-	.irq_flags		= IRQF_SHARED,
-	.init_chipset		= delkin_cb_init_chipset,
-	.chipset		= ide_pci,
-};
-
-static int delkin_cb_probe(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct ide_host *host;
-	unsigned long base;
-	int rc;
-	struct ide_hw hw, *hws[] = { &hw };
-
-	rc = pci_enable_device(dev);
-	if (rc) {
-		printk(KERN_ERR "delkin_cb: pci_enable_device failed (%d)\n", rc);
-		return rc;
-	}
-	rc = pci_request_regions(dev, "delkin_cb");
-	if (rc) {
-		printk(KERN_ERR "delkin_cb: pci_request_regions failed (%d)\n", rc);
-		pci_disable_device(dev);
-		return rc;
-	}
-	base = pci_resource_start(dev, 0);
-
-	delkin_cb_init_chipset(dev);
-
-	memset(&hw, 0, sizeof(hw));
-	ide_std_init_ports(&hw, base + 0x10, base + 0x1e);
-	hw.irq = dev->irq;
-	hw.dev = &dev->dev;
-
-	rc = ide_host_add(&delkin_cb_port_info, hws, 1, &host);
-	if (rc)
-		goto out_disable;
-
-	pci_set_drvdata(dev, host);
-
-	return 0;
-
-out_disable:
-	pci_release_regions(dev);
-	pci_disable_device(dev);
-	return rc;
-}
-
-static void
-delkin_cb_remove (struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-
-	ide_host_remove(host);
-
-	pci_release_regions(dev);
-	pci_disable_device(dev);
-}
-
-#ifdef CONFIG_PM
-static int delkin_cb_suspend(struct pci_dev *dev, pm_message_t state)
-{
-	pci_save_state(dev);
-	pci_disable_device(dev);
-	pci_set_power_state(dev, pci_choose_state(dev, state));
-
-	return 0;
-}
-
-static int delkin_cb_resume(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	int rc;
-
-	pci_set_power_state(dev, PCI_D0);
-
-	rc = pci_enable_device(dev);
-	if (rc)
-		return rc;
-
-	pci_restore_state(dev);
-	pci_set_master(dev);
-
-	if (host->init_chipset)
-		host->init_chipset(dev);
-
-	return 0;
-}
-#else
-#define delkin_cb_suspend NULL
-#define delkin_cb_resume NULL
-#endif
-
-static struct pci_device_id delkin_cb_pci_tbl[] = {
-	{ 0x1145, 0xf021, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{ 0x1145, 0xf024, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, delkin_cb_pci_tbl);
-
-static struct pci_driver delkin_cb_pci_driver = {
-	.name		= "Delkin-ASKA-Workbit Cardbus IDE",
-	.id_table	= delkin_cb_pci_tbl,
-	.probe		= delkin_cb_probe,
-	.remove		= delkin_cb_remove,
-	.suspend	= delkin_cb_suspend,
-	.resume		= delkin_cb_resume,
-};
-
-module_pci_driver(delkin_cb_pci_driver);
-
-MODULE_AUTHOR("Mark Lord");
-MODULE_DESCRIPTION("Basic support for Delkin/ASKA/Workbit Cardbus IDE");
-MODULE_LICENSE("GPL");
-
diff --git a/drivers/ide/dtc2278.c b/drivers/ide/dtc2278.c
deleted file mode 100644
index 714e8cd0fa491..0000000000000
--- a/drivers/ide/dtc2278.c
+++ /dev/null
@@ -1,155 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1996  Linus Torvalds & author (see below)
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/ioport.h>
-#include <linux/blkdev.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "dtc2278"
-
-/*
- * Changing this #undef to #define may solve start up problems in some systems.
- */
-#undef ALWAYS_SET_DTC2278_PIO_MODE
-
-/*
- * From: andy@cercle.cts.com (Dyan Wile)
- *
- * Below is a patch for DTC-2278 - alike software-programmable controllers
- * The code enables the secondary IDE controller and the PIO4 (3?) timings on
- * the primary (EIDE). You may probably have to enable the 32-bit support to
- * get the full speed. You better get the disk interrupts disabled ( hdparm -u0
- * /dev/hd.. ) for the drives connected to the EIDE interface. (I get my
- * filesystem  corrupted with -u1, but under heavy disk load only :-)
- *
- * This card is now forced to use the "serialize" feature,
- * and irq-unmasking is disallowed.  If io_32bit is enabled,
- * it must be done for BOTH drives on each interface.
- *
- * This code was written for the DTC2278E, but might work with any of these:
- *
- * DTC2278S has only a single IDE interface.
- * DTC2278D has two IDE interfaces and is otherwise identical to the S version.
- * DTC2278E also has serial ports and a printer port
- * DTC2278EB: has onboard BIOS, and "works like a charm" -- Kent Bradford <kent@theory.caltech.edu>
- *
- * There may be a fourth controller type. The S and D versions use the
- * Winbond chip, and I think the E version does also.
- *
- */
-
-static void sub22 (char b, char c)
-{
-	int i;
-
-	for(i = 0; i < 3; ++i) {
-		inb(0x3f6);
-		outb_p(b,0xb0);
-		inb(0x3f6);
-		outb_p(c,0xb4);
-		inb(0x3f6);
-		if(inb(0xb4) == c) {
-			outb_p(7,0xb0);
-			inb(0x3f6);
-			return;	/* success */
-		}
-	}
-}
-
-static DEFINE_SPINLOCK(dtc2278_lock);
-
-static void dtc2278_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	unsigned long flags;
-
-	if (drive->pio_mode >= XFER_PIO_3) {
-		spin_lock_irqsave(&dtc2278_lock, flags);
-		/*
-		 * This enables PIO mode4 (3?) on the first interface
-		 */
-		sub22(1,0xc3);
-		sub22(0,0xa0);
-		spin_unlock_irqrestore(&dtc2278_lock, flags);
-	} else {
-		/* we don't know how to set it back again.. */
-		/* Actually we do - there is a data sheet available for the
-		   Winbond but does anyone actually care */
-	}
-}
-
-static const struct ide_port_ops dtc2278_port_ops = {
-	.set_pio_mode		= dtc2278_set_pio_mode,
-};
-
-static const struct ide_port_info dtc2278_port_info __initconst = {
-	.name			= DRV_NAME,
-	.chipset		= ide_dtc2278,
-	.port_ops		= &dtc2278_port_ops,
-	.host_flags		= IDE_HFLAG_SERIALIZE |
-				  IDE_HFLAG_NO_UNMASK_IRQS |
-				  IDE_HFLAG_IO_32BIT |
-				  /* disallow ->io_32bit changes */
-				  IDE_HFLAG_NO_IO_32BIT |
-				  IDE_HFLAG_NO_DMA |
-				  IDE_HFLAG_DTC2278,
-	.pio_mask		= ATA_PIO4,
-};
-
-static int __init dtc2278_probe(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	/*
-	 * This enables the second interface
-	 */
-	outb_p(4,0xb0);
-	inb(0x3f6);
-	outb_p(0x20,0xb4);
-	inb(0x3f6);
-#ifdef ALWAYS_SET_DTC2278_PIO_MODE
-	/*
-	 * This enables PIO mode4 (3?) on the first interface
-	 * and may solve start-up problems for some people.
-	 */
-	sub22(1,0xc3);
-	sub22(0,0xa0);
-#endif
-	local_irq_restore(flags);
-
-	return ide_legacy_device_add(&dtc2278_port_info, 0);
-}
-
-static bool probe_dtc2278;
-
-module_param_named(probe, probe_dtc2278, bool, 0);
-MODULE_PARM_DESC(probe, "probe for DTC2278xx chipsets");
-
-static int __init dtc2278_init(void)
-{
-	if (probe_dtc2278 == 0)
-		return -ENODEV;
-
-	if (dtc2278_probe()) {
-		printk(KERN_ERR "dtc2278: ide interfaces already in use!\n");
-		return -EBUSY;
-	}
-	return 0;
-}
-
-module_init(dtc2278_init);
-
-MODULE_AUTHOR("See Local File");
-MODULE_DESCRIPTION("support of DTC-2278 VLB IDE chipsets");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
deleted file mode 100644
index a73a9dc17e4d8..0000000000000
--- a/drivers/ide/falconide.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- *  Atari Falcon IDE Driver
- *
- *     Created 12 Jul 1997 by Geert Uytterhoeven
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of this archive for
- *  more details.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/blkdev.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-
-#include <asm/setup.h>
-#include <asm/atarihw.h>
-#include <asm/atariints.h>
-#include <asm/atari_stdma.h>
-#include <asm/ide.h>
-
-#define DRV_NAME "falconide"
-
-#ifdef CONFIG_ATARI
-    /*
-     *  falconide_intr_lock is used to obtain access to the IDE interrupt,
-     *  which is shared between several drivers.
-     */
-
-static int falconide_intr_lock;
-
-static void falconide_release_lock(void)
-{
-	if (falconide_intr_lock == 0) {
-		printk(KERN_ERR "%s: bug\n", __func__);
-		return;
-	}
-	falconide_intr_lock = 0;
-	stdma_release();
-}
-
-static void falconide_get_lock(irq_handler_t handler, void *data)
-{
-	if (falconide_intr_lock == 0) {
-		stdma_lock(handler, data);
-		falconide_intr_lock = 1;
-	}
-}
-#endif
-
-static void falconide_input_data(ide_drive_t *drive, struct ide_cmd *cmd,
-				 void *buf, unsigned int len)
-{
-	unsigned long data_addr = drive->hwif->io_ports.data_addr;
-
-	if (drive->media == ide_disk && cmd && (cmd->tf_flags & IDE_TFLAG_FS)) {
-		__ide_mm_insw(data_addr, buf, (len + 1) / 2);
-		return;
-	}
-
-	raw_insw_swapw((u16 *)data_addr, buf, (len + 1) / 2);
-}
-
-static void falconide_output_data(ide_drive_t *drive, struct ide_cmd *cmd,
-				  void *buf, unsigned int len)
-{
-	unsigned long data_addr = drive->hwif->io_ports.data_addr;
-
-	if (drive->media == ide_disk && cmd && (cmd->tf_flags & IDE_TFLAG_FS)) {
-		__ide_mm_outsw(data_addr, buf, (len + 1) / 2);
-		return;
-	}
-
-	raw_outsw_swapw((u16 *)data_addr, buf, (len + 1) / 2);
-}
-
-/* Atari has a byte-swapped IDE interface */
-static const struct ide_tp_ops falconide_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= ide_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= falconide_input_data,
-	.output_data		= falconide_output_data,
-};
-
-static const struct ide_port_info falconide_port_info = {
-#ifdef CONFIG_ATARI
-	.get_lock		= falconide_get_lock,
-	.release_lock		= falconide_release_lock,
-#endif
-	.tp_ops			= &falconide_tp_ops,
-	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
-				  IDE_HFLAG_NO_DMA,
-	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
-};
-
-static void __init falconide_setup_ports(struct ide_hw *hw, unsigned long base,
-					 unsigned long ctl, int irq)
-{
-	int i;
-
-	memset(hw, 0, sizeof(*hw));
-
-	hw->io_ports.data_addr = base;
-
-	for (i = 1; i < 8; i++)
-		hw->io_ports_array[i] = base + 1 + i * 4;
-
-	hw->io_ports.ctl_addr = ctl + 1;
-
-	hw->irq = irq;
-}
-
-    /*
-     *  Probe for a Falcon IDE interface
-     */
-
-static int __init falconide_init(struct platform_device *pdev)
-{
-	struct resource *base_mem_res, *ctl_mem_res;
-	struct resource *base_res, *ctl_res, *irq_res;
-	struct ide_host *host;
-	struct ide_hw hw, *hws[] = { &hw };
-	int rc;
-	int irq;
-
-	dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 IDE controller\n");
-
-	base_res = platform_get_resource(pdev, IORESOURCE_IO, 0);
-	if (base_res && !devm_request_region(&pdev->dev, base_res->start,
-					   resource_size(base_res), DRV_NAME)) {
-		dev_err(&pdev->dev, "resources busy\n");
-		return -EBUSY;
-	}
-
-	ctl_res = platform_get_resource(pdev, IORESOURCE_IO, 0);
-	if (ctl_res && !devm_request_region(&pdev->dev, ctl_res->start,
-					   resource_size(ctl_res), DRV_NAME)) {
-		dev_err(&pdev->dev, "resources busy\n");
-		return -EBUSY;
-	}
-
-	base_mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!base_mem_res)
-		return -ENODEV;
-
-	if (!devm_request_mem_region(&pdev->dev, base_mem_res->start,
-				     resource_size(base_mem_res), DRV_NAME)) {
-		dev_err(&pdev->dev, "resources busy\n");
-		return -EBUSY;
-	}
-
-	ctl_mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!ctl_mem_res)
-		return -ENODEV;
-
-	if (MACH_IS_ATARI) {
-		irq = IRQ_MFP_IDE;
-	} else {
-		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-		if (irq_res && irq_res->start > 0)
-			irq = irq_res->start;
-		else
-			return -ENODEV;
-	}
-
-	falconide_setup_ports(&hw, base_mem_res->start, ctl_mem_res->start, irq);
-
-	host = ide_host_alloc(&falconide_port_info, hws, 1);
-	if (!host)
-		return -ENOMEM;
-
-	if (!MACH_IS_ATARI) {
-		host->get_lock = NULL;
-		host->release_lock = NULL;
-	}
-
-	if (host->get_lock)
-		host->get_lock(NULL, NULL);
-	rc = ide_host_register(host, &falconide_port_info, hws);
-	if (host->release_lock)
-		host->release_lock();
-
-	if (rc)
-		goto err_free;
-
-	platform_set_drvdata(pdev, host);
-	return 0;
-err_free:
-	ide_host_free(host);
-	return rc;
-}
-
-static int falconide_remove(struct platform_device *pdev)
-{
-	struct ide_host *host = platform_get_drvdata(pdev);
-
-	ide_host_remove(host);
-
-	return 0;
-}
-
-static struct platform_driver ide_falcon_driver = {
-	.remove = falconide_remove,
-	.driver   = {
-		.name	= "atari-falcon-ide",
-	},
-};
-
-module_platform_driver_probe(ide_falcon_driver, falconide_init);
-
-MODULE_AUTHOR("Geert Uytterhoeven");
-MODULE_DESCRIPTION("low-level driver for Atari Falcon IDE");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:atari-falcon-ide");
diff --git a/drivers/ide/gayle.c b/drivers/ide/gayle.c
deleted file mode 100644
index 901e6ebfeb969..0000000000000
--- a/drivers/ide/gayle.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- *  Amiga Gayle IDE Driver
- *
- *     Created 9 Jul 1997 by Geert Uytterhoeven
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License.  See the file COPYING in the main directory of this archive for
- *  more details.
- */
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/blkdev.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <linux/zorro.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-
-#include <asm/setup.h>
-#include <asm/amigahw.h>
-#include <asm/amigaints.h>
-#include <asm/amigayle.h>
-
-
-    /*
-     *  Offsets from one of the above bases
-     */
-
-#define GAYLE_CONTROL	0x101a
-
-    /*
-     *  These are at different offsets from the base
-     */
-
-#define GAYLE_IRQ_4000	0xdd3020	/* MSB = 1, Harddisk is source of */
-#define GAYLE_IRQ_1200	0xda9000	/* interrupt */
-
-
-    /*
-     *  Offset of the secondary port for IDE doublers
-     *  Note that GAYLE_CONTROL is NOT available then!
-     */
-
-#define GAYLE_NEXT_PORT	0x1000
-
-#define GAYLE_NUM_HWIFS		2
-#define GAYLE_NUM_PROBE_HWIFS	(ide_doubler ? GAYLE_NUM_HWIFS : \
-					       GAYLE_NUM_HWIFS-1)
-#define GAYLE_HAS_CONTROL_REG	(!ide_doubler)
-
-static bool ide_doubler;
-module_param_named(doubler, ide_doubler, bool, 0);
-MODULE_PARM_DESC(doubler, "enable support for IDE doublers");
-
-    /*
-     *  Check and acknowledge the interrupt status
-     */
-
-static int gayle_test_irq(ide_hwif_t *hwif)
-{
-	unsigned char ch;
-
-	ch = z_readb(hwif->io_ports.irq_addr);
-	if (!(ch & GAYLE_IRQ_IDE))
-		return 0;
-	return 1;
-}
-
-static void gayle_a1200_clear_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	(void)z_readb(hwif->io_ports.status_addr);
-	z_writeb(0x7c, hwif->io_ports.irq_addr);
-}
-
-static void __init gayle_setup_ports(struct ide_hw *hw, unsigned long base,
-				     unsigned long ctl, unsigned long irq_port)
-{
-	int i;
-
-	memset(hw, 0, sizeof(*hw));
-
-	hw->io_ports.data_addr = base;
-
-	for (i = 1; i < 8; i++)
-		hw->io_ports_array[i] = base + 2 + i * 4;
-
-	hw->io_ports.ctl_addr = ctl;
-	hw->io_ports.irq_addr = irq_port;
-
-	hw->irq = IRQ_AMIGA_PORTS;
-}
-
-static const struct ide_port_ops gayle_a4000_port_ops = {
-	.test_irq		= gayle_test_irq,
-};
-
-static const struct ide_port_ops gayle_a1200_port_ops = {
-	.clear_irq		= gayle_a1200_clear_irq,
-	.test_irq		= gayle_test_irq,
-};
-
-static const struct ide_port_info gayle_port_info = {
-	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_SERIALIZE |
-				  IDE_HFLAG_NO_DMA,
-	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_generic,
-};
-
-    /*
-     *  Probe for a Gayle IDE interface (and optionally for an IDE doubler)
-     */
-
-static int __init amiga_gayle_ide_probe(struct platform_device *pdev)
-{
-	struct resource *res;
-	struct gayle_ide_platform_data *pdata;
-	unsigned long base, ctrlport, irqport;
-	unsigned int i;
-	int error;
-	struct ide_hw hw[GAYLE_NUM_HWIFS], *hws[GAYLE_NUM_HWIFS];
-	struct ide_port_info d = gayle_port_info;
-	struct ide_host *host;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
-	if (!request_mem_region(res->start, resource_size(res), "IDE"))
-		return -EBUSY;
-
-	pdata = dev_get_platdata(&pdev->dev);
-	pr_info("ide: Gayle IDE controller (A%u style%s)\n",
-		pdata->explicit_ack ? 1200 : 4000,
-		ide_doubler ? ", IDE doubler" : "");
-
-	base = (unsigned long)ZTWO_VADDR(pdata->base);
-	ctrlport = 0;
-	irqport = (unsigned long)ZTWO_VADDR(pdata->irqport);
-	if (pdata->explicit_ack)
-		d.port_ops = &gayle_a1200_port_ops;
-	else
-		d.port_ops = &gayle_a4000_port_ops;
-
-	for (i = 0; i < GAYLE_NUM_PROBE_HWIFS; i++, base += GAYLE_NEXT_PORT) {
-		if (GAYLE_HAS_CONTROL_REG)
-			ctrlport = base + GAYLE_CONTROL;
-
-		gayle_setup_ports(&hw[i], base, ctrlport, irqport);
-		hws[i] = &hw[i];
-	}
-
-	error = ide_host_add(&d, hws, i, &host);
-	if (error)
-		goto out;
-
-	platform_set_drvdata(pdev, host);
-	return 0;
-
-out:
-	release_mem_region(res->start, resource_size(res));
-	return error;
-}
-
-static int __exit amiga_gayle_ide_remove(struct platform_device *pdev)
-{
-	struct ide_host *host = platform_get_drvdata(pdev);
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	ide_host_remove(host);
-	release_mem_region(res->start, resource_size(res));
-	return 0;
-}
-
-static struct platform_driver amiga_gayle_ide_driver = {
-	.remove = __exit_p(amiga_gayle_ide_remove),
-	.driver   = {
-		.name	= "amiga-gayle-ide",
-	},
-};
-
-module_platform_driver_probe(amiga_gayle_ide_driver, amiga_gayle_ide_probe);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:amiga-gayle-ide");
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
deleted file mode 100644
index 50c9a41467c88..0000000000000
--- a/drivers/ide/hpt366.c
+++ /dev/null
@@ -1,1545 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 1999-2003		Andre Hedrick <andre@linux-ide.org>
- * Portions Copyright (C) 2001	        Sun Microsystems, Inc.
- * Portions Copyright (C) 2003		Red Hat Inc
- * Portions Copyright (C) 2007		Bartlomiej Zolnierkiewicz
- * Portions Copyright (C) 2005-2009	MontaVista Software, Inc.
- *
- * Thanks to HighPoint Technologies for their assistance, and hardware.
- * Special Thanks to Jon Burchmore in SanDiego for the deep pockets, his
- * donation of an ABit BP6 mainboard, processor, and memory acellerated
- * development and support.
- *
- *
- * HighPoint has its own drivers (open source except for the RAID part)
- * available from http://www.highpoint-tech.com/USA_new/service_support.htm 
- * This may be useful to anyone wanting to work on this driver, however  do not
- * trust  them too much since the code tends to become less and less meaningful
- * as the time passes... :-/
- *
- * Note that final HPT370 support was done by force extraction of GPL.
- *
- * - add function for getting/setting power status of drive
- * - the HPT370's state machine can get confused. reset it before each dma 
- *   xfer to prevent that from happening.
- * - reset state engine whenever we get an error.
- * - check for busmaster state at end of dma. 
- * - use new highpoint timings.
- * - detect bus speed using highpoint register.
- * - use pll if we don't have a clock table. added a 66MHz table that's
- *   just 2x the 33MHz table.
- * - removed turnaround. NOTE: we never want to switch between pll and
- *   pci clocks as the chip can glitch in those cases. the highpoint
- *   approved workaround slows everything down too much to be useful. in
- *   addition, we would have to serialize access to each chip.
- * 	Adrian Sun <a.sun@sun.com>
- *
- * add drive timings for 66MHz PCI bus,
- * fix ATA Cable signal detection, fix incorrect /proc info
- * add /proc display for per-drive PIO/DMA/UDMA mode and
- * per-channel ATA-33/66 Cable detect.
- * 	Duncan Laurie <void@sun.com>
- *
- * fixup /proc output for multiple controllers
- *	Tim Hockin <thockin@sun.com>
- *
- * On hpt366: 
- * Reset the hpt366 on error, reset on dma
- * Fix disabling Fast Interrupt hpt366.
- * 	Mike Waychison <crlf@sun.com>
- *
- * Added support for 372N clocking and clock switching. The 372N needs
- * different clocks on read/write. This requires overloading rw_disk and
- * other deeply crazy things. Thanks to <http://www.hoerstreich.de> for
- * keeping me sane. 
- *		Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- * - fix the clock turnaround code: it was writing to the wrong ports when
- *   called for the secondary channel, caching the current clock mode per-
- *   channel caused the cached register value to get out of sync with the
- *   actual one, the channels weren't serialized, the turnaround shouldn't
- *   be done on 66 MHz PCI bus
- * - disable UltraATA/100 for HPT370 by default as the 33 MHz clock being used
- *   does not allow for this speed anyway
- * - avoid touching disabled channels (e.g. HPT371/N are single channel chips,
- *   their primary channel is kind of virtual, it isn't tied to any pins)
- * - fix/remove bad/unused timing tables and use one set of tables for the whole
- *   HPT37x chip family; save space by introducing the separate transfer mode
- *   table in which the mode lookup is done
- * - use f_CNT value saved by  the HighPoint BIOS as reading it directly gives
- *   the wrong PCI frequency since DPLL has already been calibrated by BIOS;
- *   read it only from the function 0 of HPT374 chips
- * - fix the hotswap code:  it caused RESET- to glitch when tristating the bus,
- *   and for HPT36x the obsolete HDIO_TRISTATE_HWIF handler was called instead
- * - pass to init_chipset() handlers a copy of the IDE PCI device structure as
- *   they tamper with its fields
- * - pass  to the init_setup handlers a copy of the ide_pci_device_t structure
- *   since they may tamper with its fields
- * - prefix the driver startup messages with the real chip name
- * - claim the extra 240 bytes of I/O space for all chips
- * - optimize the UltraDMA filtering and the drive list lookup code
- * - use pci_get_slot() to get to the function 1 of HPT36x/374
- * - cache offset of the channel's misc. control registers (MCRs) being used
- *   throughout the driver
- * - only touch the relevant MCR when detecting the cable type on HPT374's
- *   function 1
- * - rename all the register related variables consistently
- * - move all the interrupt twiddling code from the speedproc handlers into
- *   init_hwif_hpt366(), also grouping all the DMA related code together there
- * - merge HPT36x/HPT37x speedproc handlers, fix PIO timing register mask and
- *   separate the UltraDMA and MWDMA masks there to avoid changing PIO timings
- *   when setting an UltraDMA mode
- * - fix hpt3xx_tune_drive() to set the PIO mode requested, not always select
- *   the best possible one
- * - clean up DMA timeout handling for HPT370
- * - switch to using the enumeration type to differ between the numerous chip
- *   variants, matching PCI device/revision ID with the chip type early, at the
- *   init_setup stage
- * - extend the hpt_info structure to hold the DPLL and PCI clock frequencies,
- *   stop duplicating it for each channel by storing the pointer in the pci_dev
- *   structure: first, at the init_setup stage, point it to a static "template"
- *   with only the chip type and its specific base DPLL frequency, the highest
- *   UltraDMA mode, and the chip settings table pointer filled,  then, at the
- *   init_chipset stage, allocate per-chip instance  and fill it with the rest
- *   of the necessary information
- * - get rid of the constant thresholds in the HPT37x PCI clock detection code,
- *   switch  to calculating  PCI clock frequency based on the chip's base DPLL
- *   frequency
- * - switch to using the  DPLL clock and enable UltraATA/133 mode by default on
- *   anything  newer than HPT370/A (except HPT374 that is not capable of this
- *   mode according to the manual)
- * - fold PCI clock detection and DPLL setup code into init_chipset_hpt366(),
- *   also fixing the interchanged 25/40 MHz PCI clock cases for HPT36x chips;
- *   unify HPT36x/37x timing setup code and the speedproc handlers by joining
- *   the register setting lists into the table indexed by the clock selected
- * - set the correct hwif->ultra_mask for each individual chip
- * - add Ultra and MW DMA mode filtering for the HPT37[24] based SATA cards
- * - stop resetting HPT370's state machine before each DMA transfer as that has
- *   caused more harm than good
- *	Sergei Shtylyov, <sshtylyov@ru.mvista.com> or <source@mvista.com>
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/blkdev.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-#include <linux/slab.h>
-
-#include <linux/uaccess.h>
-#include <asm/io.h>
-
-#define DRV_NAME "hpt366"
-
-/* various tuning parameters */
-#undef	HPT_RESET_STATE_ENGINE
-#undef	HPT_DELAY_INTERRUPT
-
-static const char *bad_ata100_5[] = {
-	"IBM-DTLA-307075",
-	"IBM-DTLA-307060",
-	"IBM-DTLA-307045",
-	"IBM-DTLA-307030",
-	"IBM-DTLA-307020",
-	"IBM-DTLA-307015",
-	"IBM-DTLA-305040",
-	"IBM-DTLA-305030",
-	"IBM-DTLA-305020",
-	"IC35L010AVER07-0",
-	"IC35L020AVER07-0",
-	"IC35L030AVER07-0",
-	"IC35L040AVER07-0",
-	"IC35L060AVER07-0",
-	"WDC AC310200R",
-	NULL
-};
-
-static const char *bad_ata66_4[] = {
-	"IBM-DTLA-307075",
-	"IBM-DTLA-307060",
-	"IBM-DTLA-307045",
-	"IBM-DTLA-307030",
-	"IBM-DTLA-307020",
-	"IBM-DTLA-307015",
-	"IBM-DTLA-305040",
-	"IBM-DTLA-305030",
-	"IBM-DTLA-305020",
-	"IC35L010AVER07-0",
-	"IC35L020AVER07-0",
-	"IC35L030AVER07-0",
-	"IC35L040AVER07-0",
-	"IC35L060AVER07-0",
-	"WDC AC310200R",
-	"MAXTOR STM3320620A",
-	NULL
-};
-
-static const char *bad_ata66_3[] = {
-	"WDC AC310200R",
-	NULL
-};
-
-static const char *bad_ata33[] = {
-	"Maxtor 92720U8", "Maxtor 92040U6", "Maxtor 91360U4", "Maxtor 91020U3", "Maxtor 90845U3", "Maxtor 90650U2",
-	"Maxtor 91360D8", "Maxtor 91190D7", "Maxtor 91020D6", "Maxtor 90845D5", "Maxtor 90680D4", "Maxtor 90510D3", "Maxtor 90340D2",
-	"Maxtor 91152D8", "Maxtor 91008D7", "Maxtor 90845D6", "Maxtor 90840D6", "Maxtor 90720D5", "Maxtor 90648D5", "Maxtor 90576D4",
-	"Maxtor 90510D4",
-	"Maxtor 90432D3", "Maxtor 90288D2", "Maxtor 90256D2",
-	"Maxtor 91000D8", "Maxtor 90910D8", "Maxtor 90875D7", "Maxtor 90840D7", "Maxtor 90750D6", "Maxtor 90625D5", "Maxtor 90500D4",
-	"Maxtor 91728D8", "Maxtor 91512D7", "Maxtor 91303D6", "Maxtor 91080D5", "Maxtor 90845D4", "Maxtor 90680D4", "Maxtor 90648D3", "Maxtor 90432D2",
-	NULL
-};
-
-static u8 xfer_speeds[] = {
-	XFER_UDMA_6,
-	XFER_UDMA_5,
-	XFER_UDMA_4,
-	XFER_UDMA_3,
-	XFER_UDMA_2,
-	XFER_UDMA_1,
-	XFER_UDMA_0,
-
-	XFER_MW_DMA_2,
-	XFER_MW_DMA_1,
-	XFER_MW_DMA_0,
-
-	XFER_PIO_4,
-	XFER_PIO_3,
-	XFER_PIO_2,
-	XFER_PIO_1,
-	XFER_PIO_0
-};
-
-/* Key for bus clock timings
- * 36x   37x
- * bits  bits
- * 0:3	 0:3	data_high_time. Inactive time of DIOW_/DIOR_ for PIO and MW DMA.
- *		cycles = value + 1
- * 4:7	 4:8	data_low_time. Active time of DIOW_/DIOR_ for PIO and MW DMA.
- *		cycles = value + 1
- * 8:11  9:12	cmd_high_time. Inactive time of DIOW_/DIOR_ during task file
- *		register access.
- * 12:15 13:17	cmd_low_time. Active time of DIOW_/DIOR_ during task file
- *		register access.
- * 16:18 18:20	udma_cycle_time. Clock cycles for UDMA xfer.
- * -	 21	CLK frequency: 0=ATA clock, 1=dual ATA clock.
- * 19:21 22:24	pre_high_time. Time to initialize the 1st cycle for PIO and
- *		MW DMA xfer.
- * 22:24 25:27	cmd_pre_high_time. Time to initialize the 1st PIO cycle for
- *		task file register access.
- * 28	 28	UDMA enable.
- * 29	 29	DMA  enable.
- * 30	 30	PIO MST enable. If set, the chip is in bus master mode during
- *		PIO xfer.
- * 31	 31	FIFO enable.
- */
-
-static u32 forty_base_hpt36x[] = {
-	/* XFER_UDMA_6 */	0x900fd943,
-	/* XFER_UDMA_5 */	0x900fd943,
-	/* XFER_UDMA_4 */	0x900fd943,
-	/* XFER_UDMA_3 */	0x900ad943,
-	/* XFER_UDMA_2 */	0x900bd943,
-	/* XFER_UDMA_1 */	0x9008d943,
-	/* XFER_UDMA_0 */	0x9008d943,
-
-	/* XFER_MW_DMA_2 */	0xa008d943,
-	/* XFER_MW_DMA_1 */	0xa010d955,
-	/* XFER_MW_DMA_0 */	0xa010d9fc,
-
-	/* XFER_PIO_4 */	0xc008d963,
-	/* XFER_PIO_3 */	0xc010d974,
-	/* XFER_PIO_2 */	0xc010d997,
-	/* XFER_PIO_1 */	0xc010d9c7,
-	/* XFER_PIO_0 */	0xc018d9d9
-};
-
-static u32 thirty_three_base_hpt36x[] = {
-	/* XFER_UDMA_6 */	0x90c9a731,
-	/* XFER_UDMA_5 */	0x90c9a731,
-	/* XFER_UDMA_4 */	0x90c9a731,
-	/* XFER_UDMA_3 */	0x90cfa731,
-	/* XFER_UDMA_2 */	0x90caa731,
-	/* XFER_UDMA_1 */	0x90cba731,
-	/* XFER_UDMA_0 */	0x90c8a731,
-
-	/* XFER_MW_DMA_2 */	0xa0c8a731,
-	/* XFER_MW_DMA_1 */	0xa0c8a732,	/* 0xa0c8a733 */
-	/* XFER_MW_DMA_0 */	0xa0c8a797,
-
-	/* XFER_PIO_4 */	0xc0c8a731,
-	/* XFER_PIO_3 */	0xc0c8a742,
-	/* XFER_PIO_2 */	0xc0d0a753,
-	/* XFER_PIO_1 */	0xc0d0a7a3,	/* 0xc0d0a793 */
-	/* XFER_PIO_0 */	0xc0d0a7aa	/* 0xc0d0a7a7 */
-};
-
-static u32 twenty_five_base_hpt36x[] = {
-	/* XFER_UDMA_6 */	0x90c98521,
-	/* XFER_UDMA_5 */	0x90c98521,
-	/* XFER_UDMA_4 */	0x90c98521,
-	/* XFER_UDMA_3 */	0x90cf8521,
-	/* XFER_UDMA_2 */	0x90cf8521,
-	/* XFER_UDMA_1 */	0x90cb8521,
-	/* XFER_UDMA_0 */	0x90cb8521,
-
-	/* XFER_MW_DMA_2 */	0xa0ca8521,
-	/* XFER_MW_DMA_1 */	0xa0ca8532,
-	/* XFER_MW_DMA_0 */	0xa0ca8575,
-
-	/* XFER_PIO_4 */	0xc0ca8521,
-	/* XFER_PIO_3 */	0xc0ca8532,
-	/* XFER_PIO_2 */	0xc0ca8542,
-	/* XFER_PIO_1 */	0xc0d08572,
-	/* XFER_PIO_0 */	0xc0d08585
-};
-
-/*
- * The following are the new timing tables with PIO mode data/taskfile transfer
- * overclocking fixed...
- */
-
-/* This table is taken from the HPT370 data manual rev. 1.02 */
-static u32 thirty_three_base_hpt37x[] = {
-	/* XFER_UDMA_6 */	0x16455031,	/* 0x16655031 ?? */
-	/* XFER_UDMA_5 */	0x16455031,
-	/* XFER_UDMA_4 */	0x16455031,
-	/* XFER_UDMA_3 */	0x166d5031,
-	/* XFER_UDMA_2 */	0x16495031,
-	/* XFER_UDMA_1 */	0x164d5033,
-	/* XFER_UDMA_0 */	0x16515097,
-
-	/* XFER_MW_DMA_2 */	0x26515031,
-	/* XFER_MW_DMA_1 */	0x26515033,
-	/* XFER_MW_DMA_0 */	0x26515097,
-
-	/* XFER_PIO_4 */	0x06515021,
-	/* XFER_PIO_3 */	0x06515022,
-	/* XFER_PIO_2 */	0x06515033,
-	/* XFER_PIO_1 */	0x06915065,
-	/* XFER_PIO_0 */	0x06d1508a
-};
-
-static u32 fifty_base_hpt37x[] = {
-	/* XFER_UDMA_6 */	0x1a861842,
-	/* XFER_UDMA_5 */	0x1a861842,
-	/* XFER_UDMA_4 */	0x1aae1842,
-	/* XFER_UDMA_3 */	0x1a8e1842,
-	/* XFER_UDMA_2 */	0x1a0e1842,
-	/* XFER_UDMA_1 */	0x1a161854,
-	/* XFER_UDMA_0 */	0x1a1a18ea,
-
-	/* XFER_MW_DMA_2 */	0x2a821842,
-	/* XFER_MW_DMA_1 */	0x2a821854,
-	/* XFER_MW_DMA_0 */	0x2a8218ea,
-
-	/* XFER_PIO_4 */	0x0a821842,
-	/* XFER_PIO_3 */	0x0a821843,
-	/* XFER_PIO_2 */	0x0a821855,
-	/* XFER_PIO_1 */	0x0ac218a8,
-	/* XFER_PIO_0 */	0x0b02190c
-};
-
-static u32 sixty_six_base_hpt37x[] = {
-	/* XFER_UDMA_6 */	0x1c86fe62,
-	/* XFER_UDMA_5 */	0x1caefe62,	/* 0x1c8afe62 */
-	/* XFER_UDMA_4 */	0x1c8afe62,
-	/* XFER_UDMA_3 */	0x1c8efe62,
-	/* XFER_UDMA_2 */	0x1c92fe62,
-	/* XFER_UDMA_1 */	0x1c9afe62,
-	/* XFER_UDMA_0 */	0x1c82fe62,
-
-	/* XFER_MW_DMA_2 */	0x2c82fe62,
-	/* XFER_MW_DMA_1 */	0x2c82fe66,
-	/* XFER_MW_DMA_0 */	0x2c82ff2e,
-
-	/* XFER_PIO_4 */	0x0c82fe62,
-	/* XFER_PIO_3 */	0x0c82fe84,
-	/* XFER_PIO_2 */	0x0c82fea6,
-	/* XFER_PIO_1 */	0x0d02ff26,
-	/* XFER_PIO_0 */	0x0d42ff7f
-};
-
-#define HPT371_ALLOW_ATA133_6		1
-#define HPT302_ALLOW_ATA133_6		1
-#define HPT372_ALLOW_ATA133_6		1
-#define HPT370_ALLOW_ATA100_5		0
-#define HPT366_ALLOW_ATA66_4		1
-#define HPT366_ALLOW_ATA66_3		1
-
-/* Supported ATA clock frequencies */
-enum ata_clock {
-	ATA_CLOCK_25MHZ,
-	ATA_CLOCK_33MHZ,
-	ATA_CLOCK_40MHZ,
-	ATA_CLOCK_50MHZ,
-	ATA_CLOCK_66MHZ,
-	NUM_ATA_CLOCKS
-};
-
-struct hpt_timings {
-	u32 pio_mask;
-	u32 dma_mask;
-	u32 ultra_mask;
-	u32 *clock_table[NUM_ATA_CLOCKS];
-};
-
-/*
- *	Hold all the HighPoint chip information in one place.
- */
-
-struct hpt_info {
-	char *chip_name;	/* Chip name */
-	u8 chip_type;		/* Chip type */
-	u8 udma_mask;		/* Allowed UltraDMA modes mask. */
-	u8 dpll_clk;		/* DPLL clock in MHz */
-	u8 pci_clk;		/* PCI  clock in MHz */
-	struct hpt_timings *timings; /* Chipset timing data */
-	u8 clock;		/* ATA clock selected */
-};
-
-/* Supported HighPoint chips */
-enum {
-	HPT36x,
-	HPT370,
-	HPT370A,
-	HPT374,
-	HPT372,
-	HPT372A,
-	HPT302,
-	HPT371,
-	HPT372N,
-	HPT302N,
-	HPT371N
-};
-
-static struct hpt_timings hpt36x_timings = {
-	.pio_mask	= 0xc1f8ffff,
-	.dma_mask	= 0x303800ff,
-	.ultra_mask	= 0x30070000,
-	.clock_table	= {
-		[ATA_CLOCK_25MHZ] = twenty_five_base_hpt36x,
-		[ATA_CLOCK_33MHZ] = thirty_three_base_hpt36x,
-		[ATA_CLOCK_40MHZ] = forty_base_hpt36x,
-		[ATA_CLOCK_50MHZ] = NULL,
-		[ATA_CLOCK_66MHZ] = NULL
-	}
-};
-
-static struct hpt_timings hpt37x_timings = {
-	.pio_mask	= 0xcfc3ffff,
-	.dma_mask	= 0x31c001ff,
-	.ultra_mask	= 0x303c0000,
-	.clock_table	= {
-		[ATA_CLOCK_25MHZ] = NULL,
-		[ATA_CLOCK_33MHZ] = thirty_three_base_hpt37x,
-		[ATA_CLOCK_40MHZ] = NULL,
-		[ATA_CLOCK_50MHZ] = fifty_base_hpt37x,
-		[ATA_CLOCK_66MHZ] = sixty_six_base_hpt37x
-	}
-};
-
-static const struct hpt_info hpt36x = {
-	.chip_name	= "HPT36x",
-	.chip_type	= HPT36x,
-	.udma_mask	= HPT366_ALLOW_ATA66_3 ? (HPT366_ALLOW_ATA66_4 ? ATA_UDMA4 : ATA_UDMA3) : ATA_UDMA2,
-	.dpll_clk	= 0,	/* no DPLL */
-	.timings	= &hpt36x_timings
-};
-
-static const struct hpt_info hpt370 = {
-	.chip_name	= "HPT370",
-	.chip_type	= HPT370,
-	.udma_mask	= HPT370_ALLOW_ATA100_5 ? ATA_UDMA5 : ATA_UDMA4,
-	.dpll_clk	= 48,
-	.timings	= &hpt37x_timings
-};
-
-static const struct hpt_info hpt370a = {
-	.chip_name	= "HPT370A",
-	.chip_type	= HPT370A,
-	.udma_mask	= HPT370_ALLOW_ATA100_5 ? ATA_UDMA5 : ATA_UDMA4,
-	.dpll_clk	= 48,
-	.timings	= &hpt37x_timings
-};
-
-static const struct hpt_info hpt374 = {
-	.chip_name	= "HPT374",
-	.chip_type	= HPT374,
-	.udma_mask	= ATA_UDMA5,
-	.dpll_clk	= 48,
-	.timings	= &hpt37x_timings
-};
-
-static const struct hpt_info hpt372 = {
-	.chip_name	= "HPT372",
-	.chip_type	= HPT372,
-	.udma_mask	= HPT372_ALLOW_ATA133_6 ? ATA_UDMA6 : ATA_UDMA5,
-	.dpll_clk	= 55,
-	.timings	= &hpt37x_timings
-};
-
-static const struct hpt_info hpt372a = {
-	.chip_name	= "HPT372A",
-	.chip_type	= HPT372A,
-	.udma_mask	= HPT372_ALLOW_ATA133_6 ? ATA_UDMA6 : ATA_UDMA5,
-	.dpll_clk	= 66,
-	.timings	= &hpt37x_timings
-};
-
-static const struct hpt_info hpt302 = {
-	.chip_name	= "HPT302",
-	.chip_type	= HPT302,
-	.udma_mask	= HPT302_ALLOW_ATA133_6 ? ATA_UDMA6 : ATA_UDMA5,
-	.dpll_clk	= 66,
-	.timings	= &hpt37x_timings
-};
-
-static const struct hpt_info hpt371 = {
-	.chip_name	= "HPT371",
-	.chip_type	= HPT371,
-	.udma_mask	= HPT371_ALLOW_ATA133_6 ? ATA_UDMA6 : ATA_UDMA5,
-	.dpll_clk	= 66,
-	.timings	= &hpt37x_timings
-};
-
-static const struct hpt_info hpt372n = {
-	.chip_name	= "HPT372N",
-	.chip_type	= HPT372N,
-	.udma_mask	= HPT372_ALLOW_ATA133_6 ? ATA_UDMA6 : ATA_UDMA5,
-	.dpll_clk	= 77,
-	.timings	= &hpt37x_timings
-};
-
-static const struct hpt_info hpt302n = {
-	.chip_name	= "HPT302N",
-	.chip_type	= HPT302N,
-	.udma_mask	= HPT302_ALLOW_ATA133_6 ? ATA_UDMA6 : ATA_UDMA5,
-	.dpll_clk	= 77,
-	.timings	= &hpt37x_timings
-};
-
-static const struct hpt_info hpt371n = {
-	.chip_name	= "HPT371N",
-	.chip_type	= HPT371N,
-	.udma_mask	= HPT371_ALLOW_ATA133_6 ? ATA_UDMA6 : ATA_UDMA5,
-	.dpll_clk	= 77,
-	.timings	= &hpt37x_timings
-};
-
-static bool check_in_drive_list(ide_drive_t *drive, const char **list)
-{
-	return match_string(list, -1, (char *)&drive->id[ATA_ID_PROD]) >= 0;
-}
-
-static struct hpt_info *hpt3xx_get_info(struct device *dev)
-{
-	struct ide_host *host	= dev_get_drvdata(dev);
-	struct hpt_info *info	= (struct hpt_info *)host->host_priv;
-
-	return dev == host->dev[1] ? info + 1 : info;
-}
-
-/*
- * The Marvell bridge chips used on the HighPoint SATA cards do not seem
- * to support the UltraDMA modes 1, 2, and 3 as well as any MWDMA modes...
- */
-
-static u8 hpt3xx_udma_filter(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
-	u8 mask 		= hwif->ultra_mask;
-
-	switch (info->chip_type) {
-	case HPT36x:
-		if (!HPT366_ALLOW_ATA66_4 ||
-		    check_in_drive_list(drive, bad_ata66_4))
-			mask = ATA_UDMA3;
-
-		if (!HPT366_ALLOW_ATA66_3 ||
-		    check_in_drive_list(drive, bad_ata66_3))
-			mask = ATA_UDMA2;
-		break;
-	case HPT370:
-		if (!HPT370_ALLOW_ATA100_5 ||
-		    check_in_drive_list(drive, bad_ata100_5))
-			mask = ATA_UDMA4;
-		break;
-	case HPT370A:
-		if (!HPT370_ALLOW_ATA100_5 ||
-		    check_in_drive_list(drive, bad_ata100_5))
-			return ATA_UDMA4;
-		fallthrough;
-	case HPT372 :
-	case HPT372A:
-	case HPT372N:
-	case HPT374 :
-		if (ata_id_is_sata(drive->id))
-			mask &= ~0x0e;
-		fallthrough;
-	default:
-		return mask;
-	}
-
-	return check_in_drive_list(drive, bad_ata33) ? 0x00 : mask;
-}
-
-static u8 hpt3xx_mdma_filter(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
-
-	switch (info->chip_type) {
-	case HPT372 :
-	case HPT372A:
-	case HPT372N:
-	case HPT374 :
-		if (ata_id_is_sata(drive->id))
-			return 0x00;
-		fallthrough;
-	default:
-		return 0x07;
-	}
-}
-
-static u32 get_speed_setting(u8 speed, struct hpt_info *info)
-{
-	int i;
-
-	/*
-	 * Lookup the transfer mode table to get the index into
-	 * the timing table.
-	 *
-	 * NOTE: For XFER_PIO_SLOW, PIO mode 0 timings will be used.
-	 */
-	for (i = 0; i < ARRAY_SIZE(xfer_speeds) - 1; i++)
-		if (xfer_speeds[i] == speed)
-			break;
-
-	return info->timings->clock_table[info->clock][i];
-}
-
-static void hpt3xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
-	struct hpt_timings *t	= info->timings;
-	u8  itr_addr		= 0x40 + (drive->dn * 4);
-	u32 old_itr		= 0;
-	const u8 speed		= drive->dma_mode;
-	u32 new_itr		= get_speed_setting(speed, info);
-	u32 itr_mask		= speed < XFER_MW_DMA_0 ? t->pio_mask :
-				 (speed < XFER_UDMA_0   ? t->dma_mask :
-							  t->ultra_mask);
-
-	pci_read_config_dword(dev, itr_addr, &old_itr);
-	new_itr = (old_itr & ~itr_mask) | (new_itr & itr_mask);
-	/*
-	 * Disable on-chip PIO FIFO/buffer (and PIO MST mode as well)
-	 * to avoid problems handling I/O errors later
-	 */
-	new_itr &= ~0xc0000000;
-
-	pci_write_config_dword(dev, itr_addr, new_itr);
-}
-
-static void hpt3xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	drive->dma_mode = drive->pio_mode;
-	hpt3xx_set_mode(hwif, drive);
-}
-
-static void hpt3xx_maskproc(ide_drive_t *drive, int mask)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
-	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
-
-	if ((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
-		return;
-
-	if (info->chip_type >= HPT370) {
-		u8 scr1 = 0;
-
-		pci_read_config_byte(dev, 0x5a, &scr1);
-		if (((scr1 & 0x10) >> 4) != mask) {
-			if (mask)
-				scr1 |=  0x10;
-			else
-				scr1 &= ~0x10;
-			pci_write_config_byte(dev, 0x5a, scr1);
-		}
-	} else if (mask)
-		disable_irq(hwif->irq);
-	else
-		enable_irq(hwif->irq);
-}
-
-/*
- * This is specific to the HPT366 UDMA chipset
- * by HighPoint|Triones Technologies, Inc.
- */
-static void hpt366_dma_lost_irq(ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u8 mcr1 = 0, mcr3 = 0, scr1 = 0;
-
-	pci_read_config_byte(dev, 0x50, &mcr1);
-	pci_read_config_byte(dev, 0x52, &mcr3);
-	pci_read_config_byte(dev, 0x5a, &scr1);
-	printk("%s: (%s)  mcr1=0x%02x, mcr3=0x%02x, scr1=0x%02x\n",
-		drive->name, __func__, mcr1, mcr3, scr1);
-	if (scr1 & 0x10)
-		pci_write_config_byte(dev, 0x5a, scr1 & ~0x10);
-	ide_dma_lost_irq(drive);
-}
-
-static void hpt370_clear_engine(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-
-	pci_write_config_byte(dev, hwif->select_data, 0x37);
-	udelay(10);
-}
-
-static void hpt370_irq_timeout(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u16 bfifo		= 0;
-	u8  dma_cmd;
-
-	pci_read_config_word(dev, hwif->select_data + 2, &bfifo);
-	printk(KERN_DEBUG "%s: %d bytes in FIFO\n", drive->name, bfifo & 0x1ff);
-
-	/* get DMA command mode */
-	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
-	/* stop DMA */
-	outb(dma_cmd & ~ATA_DMA_START, hwif->dma_base + ATA_DMA_CMD);
-	hpt370_clear_engine(drive);
-}
-
-static void hpt370_dma_start(ide_drive_t *drive)
-{
-#ifdef HPT_RESET_STATE_ENGINE
-	hpt370_clear_engine(drive);
-#endif
-	ide_dma_start(drive);
-}
-
-static int hpt370_dma_end(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	u8  dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
-
-	if (dma_stat & ATA_DMA_ACTIVE) {
-		/* wait a little */
-		udelay(20);
-		dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS);
-		if (dma_stat & ATA_DMA_ACTIVE)
-			hpt370_irq_timeout(drive);
-	}
-	return ide_dma_end(drive);
-}
-
-/* returns 1 if DMA IRQ issued, 0 otherwise */
-static int hpt374_dma_test_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u16 bfifo		= 0;
-	u8  dma_stat;
-
-	pci_read_config_word(dev, hwif->select_data + 2, &bfifo);
-	if (bfifo & 0x1FF) {
-//		printk("%s: %d bytes in FIFO\n", drive->name, bfifo);
-		return 0;
-	}
-
-	dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS);
-	/* return 1 if INTR asserted */
-	if (dma_stat & ATA_DMA_INTR)
-		return 1;
-
-	return 0;
-}
-
-static int hpt374_dma_end(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u8 mcr	= 0, mcr_addr	= hwif->select_data;
-	u8 bwsr = 0, mask	= hwif->channel ? 0x02 : 0x01;
-
-	pci_read_config_byte(dev, 0x6a, &bwsr);
-	pci_read_config_byte(dev, mcr_addr, &mcr);
-	if (bwsr & mask)
-		pci_write_config_byte(dev, mcr_addr, mcr | 0x30);
-	return ide_dma_end(drive);
-}
-
-/**
- *	hpt3xxn_set_clock	-	perform clock switching dance
- *	@hwif: hwif to switch
- *	@mode: clocking mode (0x21 for write, 0x23 otherwise)
- *
- *	Switch the DPLL clock on the HPT3xxN devices. This is a	right mess.
- */
-
-static void hpt3xxn_set_clock(ide_hwif_t *hwif, u8 mode)
-{
-	unsigned long base = hwif->extra_base;
-	u8 scr2 = inb(base + 0x6b);
-
-	if ((scr2 & 0x7f) == mode)
-		return;
-
-	/* Tristate the bus */
-	outb(0x80, base + 0x63);
-	outb(0x80, base + 0x67);
-
-	/* Switch clock and reset channels */
-	outb(mode, base + 0x6b);
-	outb(0xc0, base + 0x69);
-
-	/*
-	 * Reset the state machines.
-	 * NOTE: avoid accidentally enabling the disabled channels.
-	 */
-	outb(inb(base + 0x60) | 0x32, base + 0x60);
-	outb(inb(base + 0x64) | 0x32, base + 0x64);
-
-	/* Complete reset */
-	outb(0x00, base + 0x69);
-
-	/* Reconnect channels to bus */
-	outb(0x00, base + 0x63);
-	outb(0x00, base + 0x67);
-}
-
-/**
- *	hpt3xxn_rw_disk		-	prepare for I/O
- *	@drive: drive for command
- *	@rq: block request structure
- *
- *	This is called when a disk I/O is issued to HPT3xxN.
- *	We need it because of the clock switching.
- */
-
-static void hpt3xxn_rw_disk(ide_drive_t *drive, struct request *rq)
-{
-	hpt3xxn_set_clock(drive->hwif, rq_data_dir(rq) ? 0x21 : 0x23);
-}
-
-/**
- *	hpt37x_calibrate_dpll	-	calibrate the DPLL
- *	@dev: PCI device
- *
- *	Perform a calibration cycle on the DPLL.
- *	Returns 1 if this succeeds
- */
-static int hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f_high)
-{
-	u32 dpll = (f_high << 16) | f_low | 0x100;
-	u8  scr2;
-	int i;
-
-	pci_write_config_dword(dev, 0x5c, dpll);
-
-	/* Wait for oscillator ready */
-	for(i = 0; i < 0x5000; ++i) {
-		udelay(50);
-		pci_read_config_byte(dev, 0x5b, &scr2);
-		if (scr2 & 0x80)
-			break;
-	}
-	/* See if it stays ready (we'll just bail out if it's not yet) */
-	for(i = 0; i < 0x1000; ++i) {
-		pci_read_config_byte(dev, 0x5b, &scr2);
-		/* DPLL destabilized? */
-		if(!(scr2 & 0x80))
-			return 0;
-	}
-	/* Turn off tuning, we have the DPLL set */
-	pci_read_config_dword (dev, 0x5c, &dpll);
-	pci_write_config_dword(dev, 0x5c, (dpll & ~0x100));
-	return 1;
-}
-
-static void hpt3xx_disable_fast_irq(struct pci_dev *dev, u8 mcr_addr)
-{
-	struct ide_host *host	= pci_get_drvdata(dev);
-	struct hpt_info *info	= host->host_priv + (&dev->dev == host->dev[1]);
-	u8  chip_type		= info->chip_type;
-	u8  new_mcr, old_mcr	= 0;
-
-	/*
-	 * Disable the "fast interrupt" prediction.  Don't hold off
-	 * on interrupts. (== 0x01 despite what the docs say)
-	 */
-	pci_read_config_byte(dev, mcr_addr + 1, &old_mcr);
-
-	if (chip_type >= HPT374)
-		new_mcr = old_mcr & ~0x07;
-	else if (chip_type >= HPT370) {
-		new_mcr = old_mcr;
-		new_mcr &= ~0x02;
-#ifdef HPT_DELAY_INTERRUPT
-		new_mcr &= ~0x01;
-#else
-		new_mcr |=  0x01;
-#endif
-	} else					/* HPT366 and HPT368  */
-		new_mcr = old_mcr & ~0x80;
-
-	if (new_mcr != old_mcr)
-		pci_write_config_byte(dev, mcr_addr + 1, new_mcr);
-}
-
-static int init_chipset_hpt366(struct pci_dev *dev)
-{
-	unsigned long io_base	= pci_resource_start(dev, 4);
-	struct hpt_info *info	= hpt3xx_get_info(&dev->dev);
-	const char *name	= DRV_NAME;
-	u8 pci_clk,  dpll_clk	= 0;	/* PCI and DPLL clock in MHz */
-	u8 chip_type;
-	enum ata_clock	clock;
-
-	chip_type = info->chip_type;
-
-	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, (L1_CACHE_BYTES / 4));
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x78);
-	pci_write_config_byte(dev, PCI_MIN_GNT, 0x08);
-	pci_write_config_byte(dev, PCI_MAX_LAT, 0x08);
-
-	/*
-	 * First, try to estimate the PCI clock frequency...
-	 */
-	if (chip_type >= HPT370) {
-		u8  scr1  = 0;
-		u16 f_cnt = 0;
-		u32 temp  = 0;
-
-		/* Interrupt force enable. */
-		pci_read_config_byte(dev, 0x5a, &scr1);
-		if (scr1 & 0x10)
-			pci_write_config_byte(dev, 0x5a, scr1 & ~0x10);
-
-		/*
-		 * HighPoint does this for HPT372A.
-		 * NOTE: This register is only writeable via I/O space.
-		 */
-		if (chip_type == HPT372A)
-			outb(0x0e, io_base + 0x9c);
-
-		/*
-		 * Default to PCI clock. Make sure MA15/16 are set to output
-		 * to prevent drives having problems with 40-pin cables.
-		 */
-		pci_write_config_byte(dev, 0x5b, 0x23);
-
-		/*
-		 * We'll have to read f_CNT value in order to determine
-		 * the PCI clock frequency according to the following ratio:
-		 *
-		 * f_CNT = Fpci * 192 / Fdpll
-		 *
-		 * First try reading the register in which the HighPoint BIOS
-		 * saves f_CNT value before  reprogramming the DPLL from its
-		 * default setting (which differs for the various chips).
-		 *
-		 * NOTE: This register is only accessible via I/O space;
-		 * HPT374 BIOS only saves it for the function 0, so we have to
-		 * always read it from there -- no need to check the result of
-		 * pci_get_slot() for the function 0 as the whole device has
-		 * been already "pinned" (via function 1) in init_setup_hpt374()
-		 */
-		if (chip_type == HPT374 && (PCI_FUNC(dev->devfn) & 1)) {
-			struct pci_dev	*dev1 = pci_get_slot(dev->bus,
-							     dev->devfn - 1);
-			unsigned long io_base = pci_resource_start(dev1, 4);
-
-			temp =	inl(io_base + 0x90);
-			pci_dev_put(dev1);
-		} else
-			temp =	inl(io_base + 0x90);
-
-		/*
-		 * In case the signature check fails, we'll have to
-		 * resort to reading the f_CNT register itself in hopes
-		 * that nobody has touched the DPLL yet...
-		 */
-		if ((temp & 0xFFFFF000) != 0xABCDE000) {
-			int i;
-
-			printk(KERN_WARNING "%s %s: no clock data saved by "
-				"BIOS\n", name, pci_name(dev));
-
-			/* Calculate the average value of f_CNT. */
-			for (temp = i = 0; i < 128; i++) {
-				pci_read_config_word(dev, 0x78, &f_cnt);
-				temp += f_cnt & 0x1ff;
-				mdelay(1);
-			}
-			f_cnt = temp / 128;
-		} else
-			f_cnt = temp & 0x1ff;
-
-		dpll_clk = info->dpll_clk;
-		pci_clk  = (f_cnt * dpll_clk) / 192;
-
-		/* Clamp PCI clock to bands. */
-		if (pci_clk < 40)
-			pci_clk = 33;
-		else if(pci_clk < 45)
-			pci_clk = 40;
-		else if(pci_clk < 55)
-			pci_clk = 50;
-		else
-			pci_clk = 66;
-
-		printk(KERN_INFO "%s %s: DPLL base: %d MHz, f_CNT: %d, "
-			"assuming %d MHz PCI\n", name, pci_name(dev),
-			dpll_clk, f_cnt, pci_clk);
-	} else {
-		u32 itr1 = 0;
-
-		pci_read_config_dword(dev, 0x40, &itr1);
-
-		/* Detect PCI clock by looking at cmd_high_time. */
-		switch ((itr1 >> 8) & 0x0f) {
-			case 0x09:
-				pci_clk = 40;
-				break;
-			case 0x05:
-				pci_clk = 25;
-				break;
-			case 0x07:
-			default:
-				pci_clk = 33;
-				break;
-		}
-	}
-
-	/* Let's assume we'll use PCI clock for the ATA clock... */
-	switch (pci_clk) {
-		case 25:
-			clock = ATA_CLOCK_25MHZ;
-			break;
-		case 33:
-		default:
-			clock = ATA_CLOCK_33MHZ;
-			break;
-		case 40:
-			clock = ATA_CLOCK_40MHZ;
-			break;
-		case 50:
-			clock = ATA_CLOCK_50MHZ;
-			break;
-		case 66:
-			clock = ATA_CLOCK_66MHZ;
-			break;
-	}
-
-	/*
-	 * Only try the DPLL if we don't have a table for the PCI clock that
-	 * we are running at for HPT370/A, always use it  for anything newer...
-	 *
-	 * NOTE: Using the internal DPLL results in slow reads on 33 MHz PCI.
-	 * We also  don't like using  the DPLL because this causes glitches
-	 * on PRST-/SRST- when the state engine gets reset...
-	 */
-	if (chip_type >= HPT374 || info->timings->clock_table[clock] == NULL) {
-		u16 f_low, delta = pci_clk < 50 ? 2 : 4;
-		int adjust;
-
-		 /*
-		  * Select 66 MHz DPLL clock only if UltraATA/133 mode is
-		  * supported/enabled, use 50 MHz DPLL clock otherwise...
-		  */
-		if (info->udma_mask == ATA_UDMA6) {
-			dpll_clk = 66;
-			clock = ATA_CLOCK_66MHZ;
-		} else if (dpll_clk) {	/* HPT36x chips don't have DPLL */
-			dpll_clk = 50;
-			clock = ATA_CLOCK_50MHZ;
-		}
-
-		if (info->timings->clock_table[clock] == NULL) {
-			printk(KERN_ERR "%s %s: unknown bus timing!\n",
-				name, pci_name(dev));
-			return -EIO;
-		}
-
-		/* Select the DPLL clock. */
-		pci_write_config_byte(dev, 0x5b, 0x21);
-
-		/*
-		 * Adjust the DPLL based upon PCI clock, enable it,
-		 * and wait for stabilization...
-		 */
-		f_low = (pci_clk * 48) / dpll_clk;
-
-		for (adjust = 0; adjust < 8; adjust++) {
-			if(hpt37x_calibrate_dpll(dev, f_low, f_low + delta))
-				break;
-
-			/*
-			 * See if it'll settle at a fractionally different clock
-			 */
-			if (adjust & 1)
-				f_low -= adjust >> 1;
-			else
-				f_low += adjust >> 1;
-		}
-		if (adjust == 8) {
-			printk(KERN_ERR "%s %s: DPLL did not stabilize!\n",
-				name, pci_name(dev));
-			return -EIO;
-		}
-
-		printk(KERN_INFO "%s %s: using %d MHz DPLL clock\n",
-			name, pci_name(dev), dpll_clk);
-	} else {
-		/* Mark the fact that we're not using the DPLL. */
-		dpll_clk = 0;
-
-		printk(KERN_INFO "%s %s: using %d MHz PCI clock\n",
-			name, pci_name(dev), pci_clk);
-	}
-
-	/* Store the clock frequencies. */
-	info->dpll_clk	= dpll_clk;
-	info->pci_clk	= pci_clk;
-	info->clock	= clock;
-
-	if (chip_type >= HPT370) {
-		u8  mcr1, mcr4;
-
-		/*
-		 * Reset the state engines.
-		 * NOTE: Avoid accidentally enabling the disabled channels.
-		 */
-		pci_read_config_byte (dev, 0x50, &mcr1);
-		pci_read_config_byte (dev, 0x54, &mcr4);
-		pci_write_config_byte(dev, 0x50, (mcr1 | 0x32));
-		pci_write_config_byte(dev, 0x54, (mcr4 | 0x32));
-		udelay(100);
-	}
-
-	/*
-	 * On  HPT371N, if ATA clock is 66 MHz we must set bit 2 in
-	 * the MISC. register to stretch the UltraDMA Tss timing.
-	 * NOTE: This register is only writeable via I/O space.
-	 */
-	if (chip_type == HPT371N && clock == ATA_CLOCK_66MHZ)
-		outb(inb(io_base + 0x9c) | 0x04, io_base + 0x9c);
-
-	hpt3xx_disable_fast_irq(dev, 0x50);
-	hpt3xx_disable_fast_irq(dev, 0x54);
-
-	return 0;
-}
-
-static u8 hpt3xx_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev	*dev	= to_pci_dev(hwif->dev);
-	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
-	u8 chip_type		= info->chip_type;
-	u8 scr1 = 0, ata66	= hwif->channel ? 0x01 : 0x02;
-
-	/*
-	 * The HPT37x uses the CBLID pins as outputs for MA15/MA16
-	 * address lines to access an external EEPROM.  To read valid
-	 * cable detect state the pins must be enabled as inputs.
-	 */
-	if (chip_type == HPT374 && (PCI_FUNC(dev->devfn) & 1)) {
-		/*
-		 * HPT374 PCI function 1
-		 * - set bit 15 of reg 0x52 to enable TCBLID as input
-		 * - set bit 15 of reg 0x56 to enable FCBLID as input
-		 */
-		u8  mcr_addr = hwif->select_data + 2;
-		u16 mcr;
-
-		pci_read_config_word(dev, mcr_addr, &mcr);
-		pci_write_config_word(dev, mcr_addr, mcr | 0x8000);
-		/* Debounce, then read cable ID register */
-		udelay(10);
-		pci_read_config_byte(dev, 0x5a, &scr1);
-		pci_write_config_word(dev, mcr_addr, mcr);
-	} else if (chip_type >= HPT370) {
-		/*
-		 * HPT370/372 and 374 pcifn 0
-		 * - clear bit 0 of reg 0x5b to enable P/SCBLID as inputs
-		 */
-		u8 scr2 = 0;
-
-		pci_read_config_byte(dev, 0x5b, &scr2);
-		pci_write_config_byte(dev, 0x5b, scr2 & ~1);
-		/* Debounce, then read cable ID register */
-		udelay(10);
-		pci_read_config_byte(dev, 0x5a, &scr1);
-		pci_write_config_byte(dev, 0x5b, scr2);
-	} else
-		pci_read_config_byte(dev, 0x5a, &scr1);
-
-	return (scr1 & ata66) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
-}
-
-static void init_hwif_hpt366(ide_hwif_t *hwif)
-{
-	struct hpt_info *info	= hpt3xx_get_info(hwif->dev);
-	u8  chip_type		= info->chip_type;
-
-	/* Cache the channel's MISC. control registers' offset */
-	hwif->select_data	= hwif->channel ? 0x54 : 0x50;
-
-	/*
-	 * HPT3xxN chips have some complications:
-	 *
-	 * - on 33 MHz PCI we must clock switch
-	 * - on 66 MHz PCI we must NOT use the PCI clock
-	 */
-	if (chip_type >= HPT372N && info->dpll_clk && info->pci_clk < 66) {
-		/*
-		 * Clock is shared between the channels,
-		 * so we'll have to serialize them... :-(
-		 */
-		hwif->host->host_flags |= IDE_HFLAG_SERIALIZE;
-		hwif->rw_disk = &hpt3xxn_rw_disk;
-	}
-}
-
-static int init_dma_hpt366(ide_hwif_t *hwif,
-				     const struct ide_port_info *d)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	unsigned long flags, base = ide_pci_dma_base(hwif, d);
-	u8 dma_old, dma_new, masterdma = 0, slavedma = 0;
-
-	if (base == 0)
-		return -1;
-
-	hwif->dma_base = base;
-
-	if (ide_pci_check_simplex(hwif, d) < 0)
-		return -1;
-
-	if (ide_pci_set_master(dev, d->name) < 0)
-		return -1;
-
-	dma_old = inb(base + 2);
-
-	local_irq_save(flags);
-
-	dma_new = dma_old;
-	pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
-	pci_read_config_byte(dev, hwif->channel ? 0x4f : 0x47,  &slavedma);
-
-	if (masterdma & 0x30)	dma_new |= 0x20;
-	if ( slavedma & 0x30)	dma_new |= 0x40;
-	if (dma_new != dma_old)
-		outb(dma_new, base + 2);
-
-	local_irq_restore(flags);
-
-	printk(KERN_INFO "    %s: BM-DMA at 0x%04lx-0x%04lx\n",
-			 hwif->name, base, base + 7);
-
-	hwif->extra_base = base + (hwif->channel ? 8 : 16);
-
-	if (ide_allocate_dma_engine(hwif))
-		return -1;
-
-	return 0;
-}
-
-static void hpt374_init(struct pci_dev *dev, struct pci_dev *dev2)
-{
-	if (dev2->irq != dev->irq) {
-		/* FIXME: we need a core pci_set_interrupt() */
-		dev2->irq = dev->irq;
-		printk(KERN_INFO DRV_NAME " %s: PCI config space interrupt "
-			"fixed\n", pci_name(dev2));
-	}
-}
-
-static void hpt371_init(struct pci_dev *dev)
-{
-	u8 mcr1 = 0;
-
-	/*
-	 * HPT371 chips physically have only one channel, the secondary one,
-	 * but the primary channel registers do exist!  Go figure...
-	 * So,  we manually disable the non-existing channel here
-	 * (if the BIOS hasn't done this already).
-	 */
-	pci_read_config_byte(dev, 0x50, &mcr1);
-	if (mcr1 & 0x04)
-		pci_write_config_byte(dev, 0x50, mcr1 & ~0x04);
-}
-
-static int hpt36x_init(struct pci_dev *dev, struct pci_dev *dev2)
-{
-	u8 mcr1 = 0, pin1 = 0, pin2 = 0;
-
-	/*
-	 * Now we'll have to force both channels enabled if
-	 * at least one of them has been enabled by BIOS...
-	 */
-	pci_read_config_byte(dev, 0x50, &mcr1);
-	if (mcr1 & 0x30)
-		pci_write_config_byte(dev, 0x50, mcr1 | 0x30);
-
-	pci_read_config_byte(dev,  PCI_INTERRUPT_PIN, &pin1);
-	pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin2);
-
-	if (pin1 != pin2 && dev->irq == dev2->irq) {
-		printk(KERN_INFO DRV_NAME " %s: onboard version of chipset, "
-			"pin1=%d pin2=%d\n", pci_name(dev), pin1, pin2);
-		return 1;
-	}
-
-	return 0;
-}
-
-#define IDE_HFLAGS_HPT3XX \
-	(IDE_HFLAG_NO_ATAPI_DMA | \
-	 IDE_HFLAG_OFF_BOARD)
-
-static const struct ide_port_ops hpt3xx_port_ops = {
-	.set_pio_mode		= hpt3xx_set_pio_mode,
-	.set_dma_mode		= hpt3xx_set_mode,
-	.maskproc		= hpt3xx_maskproc,
-	.mdma_filter		= hpt3xx_mdma_filter,
-	.udma_filter		= hpt3xx_udma_filter,
-	.cable_detect		= hpt3xx_cable_detect,
-};
-
-static const struct ide_dma_ops hpt37x_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= hpt374_dma_end,
-	.dma_test_irq		= hpt374_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-static const struct ide_dma_ops hpt370_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= hpt370_dma_start,
-	.dma_end		= hpt370_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_clear		= hpt370_irq_timeout,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-static const struct ide_dma_ops hpt36x_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= ide_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= hpt366_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-static const struct ide_port_info hpt366_chipsets[] = {
-	{	/* 0: HPT36x */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_hpt366,
-		.init_hwif	= init_hwif_hpt366,
-		.init_dma	= init_dma_hpt366,
-		/*
-		 * HPT36x chips have one channel per function and have
-		 * both channel enable bits located differently and visible
-		 * to both functions -- really stupid design decision... :-(
-		 * Bit 4 is for the primary channel, bit 5 for the secondary.
-		 */
-		.enablebits	= {{0x50,0x10,0x10}, {0x54,0x04,0x04}},
-		.port_ops	= &hpt3xx_port_ops,
-		.dma_ops	= &hpt36x_dma_ops,
-		.host_flags	= IDE_HFLAGS_HPT3XX | IDE_HFLAG_SINGLE,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-	},
-	{	/* 1: HPT3xx */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_hpt366,
-		.init_hwif	= init_hwif_hpt366,
-		.init_dma	= init_dma_hpt366,
-		.enablebits	= {{0x50,0x04,0x04}, {0x54,0x04,0x04}},
-		.port_ops	= &hpt3xx_port_ops,
-		.dma_ops	= &hpt37x_dma_ops,
-		.host_flags	= IDE_HFLAGS_HPT3XX,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-	}
-};
-
-/**
- *	hpt366_init_one	-	called when an HPT366 is found
- *	@dev: the hpt366 device
- *	@id: the matching pci id
- *
- *	Called when the PCI registration layer (or the IDE initialization)
- *	finds a device matching our IDE device tables.
- */
-static int hpt366_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	const struct hpt_info *info = NULL;
-	struct hpt_info *dyn_info;
-	struct pci_dev *dev2 = NULL;
-	struct ide_port_info d;
-	u8 idx = id->driver_data;
-	u8 rev = dev->revision;
-	int ret;
-
-	if ((idx == 0 || idx == 4) && (PCI_FUNC(dev->devfn) & 1))
-		return -ENODEV;
-
-	switch (idx) {
-	case 0:
-		if (rev < 3)
-			info = &hpt36x;
-		else {
-			switch (min_t(u8, rev, 6)) {
-			case 3: info = &hpt370;  break;
-			case 4: info = &hpt370a; break;
-			case 5: info = &hpt372;  break;
-			case 6: info = &hpt372n; break;
-			}
-			idx++;
-		}
-		break;
-	case 1:
-		info = (rev > 1) ? &hpt372n : &hpt372a;
-		break;
-	case 2:
-		info = (rev > 1) ? &hpt302n : &hpt302;
-		break;
-	case 3:
-		hpt371_init(dev);
-		info = (rev > 1) ? &hpt371n : &hpt371;
-		break;
-	case 4:
-		info = &hpt374;
-		break;
-	case 5:
-		info = &hpt372n;
-		break;
-	}
-
-	printk(KERN_INFO DRV_NAME ": %s chipset detected\n", info->chip_name);
-
-	d = hpt366_chipsets[min_t(u8, idx, 1)];
-
-	d.udma_mask = info->udma_mask;
-
-	/* fixup ->dma_ops for HPT370/HPT370A */
-	if (info == &hpt370 || info == &hpt370a)
-		d.dma_ops = &hpt370_dma_ops;
-
-	if (info == &hpt36x || info == &hpt374)
-		dev2 = pci_get_slot(dev->bus, dev->devfn + 1);
-
-	dyn_info = kcalloc(dev2 ? 2 : 1, sizeof(*dyn_info), GFP_KERNEL);
-	if (dyn_info == NULL) {
-		printk(KERN_ERR "%s %s: out of memory!\n",
-			d.name, pci_name(dev));
-		pci_dev_put(dev2);
-		return -ENOMEM;
-	}
-
-	/*
-	 * Copy everything from a static "template" structure
-	 * to just allocated per-chip hpt_info structure.
-	 */
-	memcpy(dyn_info, info, sizeof(*dyn_info));
-
-	if (dev2) {
-		memcpy(dyn_info + 1, info, sizeof(*dyn_info));
-
-		if (info == &hpt374)
-			hpt374_init(dev, dev2);
-		else {
-			if (hpt36x_init(dev, dev2))
-				d.host_flags &= ~IDE_HFLAG_NON_BOOTABLE;
-		}
-
-		ret = ide_pci_init_two(dev, dev2, &d, dyn_info);
-		if (ret < 0) {
-			pci_dev_put(dev2);
-			kfree(dyn_info);
-		}
-		return ret;
-	}
-
-	ret = ide_pci_init_one(dev, &d, dyn_info);
-	if (ret < 0)
-		kfree(dyn_info);
-
-	return ret;
-}
-
-static void hpt366_remove(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct ide_info *info = host->host_priv;
-	struct pci_dev *dev2 = host->dev[1] ? to_pci_dev(host->dev[1]) : NULL;
-
-	ide_pci_remove(dev);
-	pci_dev_put(dev2);
-	kfree(info);
-}
-
-static const struct pci_device_id hpt366_pci_tbl[] = {
-	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT366),  0 },
-	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT372),  1 },
-	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT302),  2 },
-	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT371),  3 },
-	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT374),  4 },
-	{ PCI_VDEVICE(TTI, PCI_DEVICE_ID_TTI_HPT372N), 5 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, hpt366_pci_tbl);
-
-static struct pci_driver hpt366_pci_driver = {
-	.name		= "HPT366_IDE",
-	.id_table	= hpt366_pci_tbl,
-	.probe		= hpt366_init_one,
-	.remove		= hpt366_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init hpt366_ide_init(void)
-{
-	return ide_pci_register_driver(&hpt366_pci_driver);
-}
-
-static void __exit hpt366_ide_exit(void)
-{
-	pci_unregister_driver(&hpt366_pci_driver);
-}
-
-module_init(hpt366_ide_init);
-module_exit(hpt366_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for Highpoint HPT366 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ht6560b.c b/drivers/ide/ht6560b.c
deleted file mode 100644
index 743bc3693ac8a..0000000000000
--- a/drivers/ide/ht6560b.c
+++ /dev/null
@@ -1,383 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1995-2000  Linus Torvalds & author (see below)
- */
-
-/*
- *  HT-6560B EIDE-controller support
- *  To activate controller support use kernel parameter "ide0=ht6560b".
- *  Use hdparm utility to enable PIO mode support.
- *
- *  Author:    Mikko Ala-Fossi            <maf@iki.fi>
- *             Jan Evert van Grootheest   <j.e.van.grootheest@caiway.nl>
- *
- */
-
-#define DRV_NAME	"ht6560b"
-#define HT6560B_VERSION "v0.08"
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/ioport.h>
-#include <linux/blkdev.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-/* #define DEBUG */  /* remove comments for DEBUG messages */
-
-/*
- * The special i/o-port that HT-6560B uses to configuration:
- *    bit0 (0x01): "1" selects secondary interface
- *    bit2 (0x04): "1" enables FIFO function
- *    bit5 (0x20): "1" enables prefetched data read function  (???)
- *
- * The special i/o-port that HT-6560A uses to configuration:
- *    bit0 (0x01): "1" selects secondary interface
- *    bit1 (0x02): "1" enables prefetched data read function
- *    bit2 (0x04): "0" enables multi-master system	      (?)
- *    bit3 (0x08): "1" 3 cycle time, "0" 2 cycle time	      (?)
- */
-#define HT_CONFIG_PORT	  0x3e6
-
-static inline u8 HT_CONFIG(ide_drive_t *drive)
-{
-	return ((unsigned long)ide_get_drivedata(drive) & 0xff00) >> 8;
-}
-
-/*
- * FIFO + PREFETCH (both a/b-model)
- */
-#define HT_CONFIG_DEFAULT 0x1c /* no prefetch */
-/* #define HT_CONFIG_DEFAULT 0x3c */ /* with prefetch */
-#define HT_SECONDARY_IF	  0x01
-#define HT_PREFETCH_MODE  0x20
-
-/*
- * ht6560b Timing values:
- *
- * I reviewed some assembler source listings of htide drivers and found
- * out how they setup those cycle time interfacing values, as they at Holtek
- * call them. IDESETUP.COM that is supplied with the drivers figures out
- * optimal values and fetches those values to drivers. I found out that
- * they use Select register to fetch timings to the ide board right after
- * interface switching. After that it was quite easy to add code to
- * ht6560b.c.
- *
- * IDESETUP.COM gave me values 0x24, 0x45, 0xaa, 0xff that worked fine
- * for hda and hdc. But hdb needed higher values to work, so I guess
- * that sometimes it is necessary to give higher value than IDESETUP
- * gives.   [see cmd640.c for an extreme example of this. -ml]
- *
- * Perhaps I should explain something about these timing values:
- * The higher nibble of value is the Recovery Time  (rt) and the lower nibble
- * of the value is the Active Time  (at). Minimum value 2 is the fastest and
- * the maximum value 15 is the slowest. Default values should be 15 for both.
- * So 0x24 means 2 for rt and 4 for at. Each of the drives should have
- * both values, and IDESETUP gives automatically rt=15 st=15 for CDROMs or
- * similar. If value is too small there will be all sorts of failures.
- *
- * Timing byte consists of
- *	High nibble:  Recovery Cycle Time  (rt)
- *	     The valid values range from 2 to 15. The default is 15.
- *
- *	Low nibble:   Active Cycle Time	   (at)
- *	     The valid values range from 2 to 15. The default is 15.
- *
- * You can obtain optimized timing values by running Holtek IDESETUP.COM
- * for DOS. DOS drivers get their timing values from command line, where
- * the first value is the Recovery Time and the second value is the
- * Active Time for each drive. Smaller value gives higher speed.
- * In case of failures you should probably fall back to a higher value.
- */
-static inline u8 HT_TIMING(ide_drive_t *drive)
-{
-	return (unsigned long)ide_get_drivedata(drive) & 0x00ff;
-}
-
-#define HT_TIMING_DEFAULT 0xff
-
-/*
- * This routine handles interface switching for the peculiar hardware design
- * on the F.G.I./Holtek HT-6560B VLB IDE interface.
- * The HT-6560B can only enable one IDE port at a time, and requires a
- * silly sequence (below) whenever we switch between primary and secondary.
- */
-
-/*
- * This routine is invoked from ide.c to prepare for access to a given drive.
- */
-static void ht6560b_dev_select(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned long flags;
-	static u8 current_select = 0;
-	static u8 current_timing = 0;
-	u8 select, timing;
-	
-	local_irq_save(flags);
-
-	select = HT_CONFIG(drive);
-	timing = HT_TIMING(drive);
-
-	/*
-	 * Need to enforce prefetch sometimes because otherwise
-	 * it'll hang (hard).
-	 */
-	if (drive->media != ide_disk ||
-	    (drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-		select |= HT_PREFETCH_MODE;
-
-	if (select != current_select || timing != current_timing) {
-		current_select = select;
-		current_timing = timing;
-		(void)inb(HT_CONFIG_PORT);
-		(void)inb(HT_CONFIG_PORT);
-		(void)inb(HT_CONFIG_PORT);
-		(void)inb(HT_CONFIG_PORT);
-		outb(select, HT_CONFIG_PORT);
-		/*
-		 * Set timing for this drive:
-		 */
-		outb(timing, hwif->io_ports.device_addr);
-		(void)inb(hwif->io_ports.status_addr);
-#ifdef DEBUG
-		printk("ht6560b: %s: select=%#x timing=%#x\n",
-			drive->name, select, timing);
-#endif
-	}
-	local_irq_restore(flags);
-
-	outb(drive->select | ATA_DEVICE_OBS, hwif->io_ports.device_addr);
-}
-
-/*
- * Autodetection and initialization of ht6560b
- */
-static int __init try_to_init_ht6560b(void)
-{
-	u8 orig_value;
-	int i;
-	
-	/* Autodetect ht6560b */
-	if ((orig_value = inb(HT_CONFIG_PORT)) == 0xff)
-		return 0;
-	
-	for (i=3;i>0;i--) {
-		outb(0x00, HT_CONFIG_PORT);
-		if (!( (~inb(HT_CONFIG_PORT)) & 0x3f )) {
-			outb(orig_value, HT_CONFIG_PORT);
-			return 0;
-		}
-	}
-	outb(0x00, HT_CONFIG_PORT);
-	if ((~inb(HT_CONFIG_PORT))& 0x3f) {
-		outb(orig_value, HT_CONFIG_PORT);
-		return 0;
-	}
-	/*
-	 * Ht6560b autodetected
-	 */
-	outb(HT_CONFIG_DEFAULT, HT_CONFIG_PORT);
-	outb(HT_TIMING_DEFAULT, 0x1f6);	/* Select register */
-	(void)inb(0x1f7);		/* Status register */
-
-	printk("ht6560b " HT6560B_VERSION
-	       ": chipset detected and initialized"
-#ifdef DEBUG
-	       " with debug enabled"
-#endif
-	       "\n"
-		);
-	return 1;
-}
-
-static u8 ht_pio2timings(ide_drive_t *drive, const u8 pio)
-{
-	int active_time, recovery_time;
-	int active_cycles, recovery_cycles;
-	int bus_speed = ide_vlb_clk ? ide_vlb_clk : 50;
-
-        if (pio) {
-		unsigned int cycle_time;
-		struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
-
-		cycle_time = ide_pio_cycle_time(drive, pio);
-
-		/*
-		 *  Just like opti621.c we try to calculate the
-		 *  actual cycle time for recovery and activity
-		 *  according system bus speed.
-		 */
-		active_time = t->active;
-		recovery_time = cycle_time - active_time - t->setup;
-		/*
-		 *  Cycle times should be Vesa bus cycles
-		 */
-		active_cycles   = (active_time   * bus_speed + 999) / 1000;
-		recovery_cycles = (recovery_time * bus_speed + 999) / 1000;
-		/*
-		 *  Upper and lower limits
-		 */
-		if (active_cycles   < 2)  active_cycles   = 2;
-		if (recovery_cycles < 2)  recovery_cycles = 2;
-		if (active_cycles   > 15) active_cycles   = 15;
-		if (recovery_cycles > 15) recovery_cycles = 0;  /* 0==16 */
-		
-#ifdef DEBUG
-		printk("ht6560b: drive %s setting pio=%d recovery=%d (%dns) active=%d (%dns)\n", drive->name, pio, recovery_cycles, recovery_time, active_cycles, active_time);
-#endif
-		
-		return (u8)((recovery_cycles << 4) | active_cycles);
-	} else {
-		
-#ifdef DEBUG
-		printk("ht6560b: drive %s setting pio=0\n", drive->name);
-#endif
-		
-		return HT_TIMING_DEFAULT;    /* default setting */
-	}
-}
-
-static DEFINE_SPINLOCK(ht6560b_lock);
-
-/*
- *  Enable/Disable so called prefetch mode
- */
-static void ht_set_prefetch(ide_drive_t *drive, u8 state)
-{
-	unsigned long flags, config;
-	int t = HT_PREFETCH_MODE << 8;
-
-	spin_lock_irqsave(&ht6560b_lock, flags);
-
-	config = (unsigned long)ide_get_drivedata(drive);
-
-	/*
-	 *  Prefetch mode and unmask irq seems to conflict
-	 */
-	if (state) {
-		config |= t;   /* enable prefetch mode */
-		drive->dev_flags |= IDE_DFLAG_NO_UNMASK;
-		drive->dev_flags &= ~IDE_DFLAG_UNMASK;
-	} else {
-		config &= ~t;  /* disable prefetch mode */
-		drive->dev_flags &= ~IDE_DFLAG_NO_UNMASK;
-	}
-
-	ide_set_drivedata(drive, (void *)config);
-
-	spin_unlock_irqrestore(&ht6560b_lock, flags);
-
-#ifdef DEBUG
-	printk("ht6560b: drive %s prefetch mode %sabled\n", drive->name, (state ? "en" : "dis"));
-#endif
-}
-
-static void ht6560b_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	unsigned long flags, config;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-	u8 timing;
-	
-	switch (pio) {
-	case 8:         /* set prefetch off */
-	case 9:         /* set prefetch on */
-		ht_set_prefetch(drive, pio & 1);
-		return;
-	}
-
-	timing = ht_pio2timings(drive, pio);
-
-	spin_lock_irqsave(&ht6560b_lock, flags);
-	config = (unsigned long)ide_get_drivedata(drive);
-	config &= 0xff00;
-	config |= timing;
-	ide_set_drivedata(drive, (void *)config);
-	spin_unlock_irqrestore(&ht6560b_lock, flags);
-
-#ifdef DEBUG
-	printk("ht6560b: drive %s tuned to pio mode %#x timing=%#x\n", drive->name, pio, timing);
-#endif
-}
-
-static void __init ht6560b_init_dev(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	/* Setting default configurations for drives. */
-	unsigned long t = (HT_CONFIG_DEFAULT << 8) | HT_TIMING_DEFAULT;
-
-	if (hwif->channel)
-		t |= (HT_SECONDARY_IF << 8);
-
-	ide_set_drivedata(drive, (void *)t);
-}
-
-static bool probe_ht6560b;
-
-module_param_named(probe, probe_ht6560b, bool, 0);
-MODULE_PARM_DESC(probe, "probe for HT6560B chipset");
-
-static const struct ide_tp_ops ht6560b_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= ht6560b_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= ide_input_data,
-	.output_data		= ide_output_data,
-};
-
-static const struct ide_port_ops ht6560b_port_ops = {
-	.init_dev		= ht6560b_init_dev,
-	.set_pio_mode		= ht6560b_set_pio_mode,
-};
-
-static const struct ide_port_info ht6560b_port_info __initconst = {
-	.name			= DRV_NAME,
-	.chipset		= ide_ht6560b,
-	.tp_ops 		= &ht6560b_tp_ops,
-	.port_ops		= &ht6560b_port_ops,
-	.host_flags		= IDE_HFLAG_SERIALIZE | /* is this needed? */
-				  IDE_HFLAG_NO_DMA |
-				  IDE_HFLAG_ABUSE_PREFETCH,
-	.pio_mask		= ATA_PIO4,
-};
-
-static int __init ht6560b_init(void)
-{
-	if (probe_ht6560b == 0)
-		return -ENODEV;
-
-	if (!request_region(HT_CONFIG_PORT, 1, DRV_NAME)) {
-		printk(KERN_NOTICE "%s: HT_CONFIG_PORT not found\n",
-			__func__);
-		return -ENODEV;
-	}
-
-	if (!try_to_init_ht6560b()) {
-		printk(KERN_NOTICE "%s: HBA not found\n", __func__);
-		goto release_region;
-	}
-
-	return ide_legacy_device_add(&ht6560b_port_info, 0);
-
-release_region:
-	release_region(HT_CONFIG_PORT, 1);
-	return -ENODEV;
-}
-
-module_init(ht6560b_init);
-
-MODULE_AUTHOR("See Local File");
-MODULE_DESCRIPTION("HT-6560B EIDE-controller support");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/icside.c b/drivers/ide/icside.c
deleted file mode 100644
index 329c7e4bc9d09..0000000000000
--- a/drivers/ide/icside.c
+++ /dev/null
@@ -1,692 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 1996-2004 Russell King.
- *
- * Please note that this platform does not support 32-bit IDE IO.
- */
-
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/slab.h>
-#include <linux/blkdev.h>
-#include <linux/errno.h>
-#include <linux/ide.h>
-#include <linux/dma-mapping.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/scatterlist.h>
-#include <linux/io.h>
-
-#include <asm/dma.h>
-#include <asm/ecard.h>
-
-#define DRV_NAME "icside"
-
-#define ICS_IDENT_OFFSET		0x2280
-
-#define ICS_ARCIN_V5_INTRSTAT		0x0000
-#define ICS_ARCIN_V5_INTROFFSET		0x0004
-#define ICS_ARCIN_V5_IDEOFFSET		0x2800
-#define ICS_ARCIN_V5_IDEALTOFFSET	0x2b80
-#define ICS_ARCIN_V5_IDESTEPPING	6
-
-#define ICS_ARCIN_V6_IDEOFFSET_1	0x2000
-#define ICS_ARCIN_V6_INTROFFSET_1	0x2200
-#define ICS_ARCIN_V6_INTRSTAT_1		0x2290
-#define ICS_ARCIN_V6_IDEALTOFFSET_1	0x2380
-#define ICS_ARCIN_V6_IDEOFFSET_2	0x3000
-#define ICS_ARCIN_V6_INTROFFSET_2	0x3200
-#define ICS_ARCIN_V6_INTRSTAT_2		0x3290
-#define ICS_ARCIN_V6_IDEALTOFFSET_2	0x3380
-#define ICS_ARCIN_V6_IDESTEPPING	6
-
-struct cardinfo {
-	unsigned int dataoffset;
-	unsigned int ctrloffset;
-	unsigned int stepping;
-};
-
-static struct cardinfo icside_cardinfo_v5 = {
-	.dataoffset	= ICS_ARCIN_V5_IDEOFFSET,
-	.ctrloffset	= ICS_ARCIN_V5_IDEALTOFFSET,
-	.stepping	= ICS_ARCIN_V5_IDESTEPPING,
-};
-
-static struct cardinfo icside_cardinfo_v6_1 = {
-	.dataoffset	= ICS_ARCIN_V6_IDEOFFSET_1,
-	.ctrloffset	= ICS_ARCIN_V6_IDEALTOFFSET_1,
-	.stepping	= ICS_ARCIN_V6_IDESTEPPING,
-};
-
-static struct cardinfo icside_cardinfo_v6_2 = {
-	.dataoffset	= ICS_ARCIN_V6_IDEOFFSET_2,
-	.ctrloffset	= ICS_ARCIN_V6_IDEALTOFFSET_2,
-	.stepping	= ICS_ARCIN_V6_IDESTEPPING,
-};
-
-struct icside_state {
-	unsigned int channel;
-	unsigned int enabled;
-	void __iomem *irq_port;
-	void __iomem *ioc_base;
-	unsigned int sel;
-	unsigned int type;
-	struct ide_host *host;
-};
-
-#define ICS_TYPE_A3IN	0
-#define ICS_TYPE_A3USER	1
-#define ICS_TYPE_V6	3
-#define ICS_TYPE_V5	15
-#define ICS_TYPE_NOTYPE	((unsigned int)-1)
-
-/* ---------------- Version 5 PCB Support Functions --------------------- */
-/* Prototype: icside_irqenable_arcin_v5 (struct expansion_card *ec, int irqnr)
- * Purpose  : enable interrupts from card
- */
-static void icside_irqenable_arcin_v5 (struct expansion_card *ec, int irqnr)
-{
-	struct icside_state *state = ec->irq_data;
-
-	writeb(0, state->irq_port + ICS_ARCIN_V5_INTROFFSET);
-}
-
-/* Prototype: icside_irqdisable_arcin_v5 (struct expansion_card *ec, int irqnr)
- * Purpose  : disable interrupts from card
- */
-static void icside_irqdisable_arcin_v5 (struct expansion_card *ec, int irqnr)
-{
-	struct icside_state *state = ec->irq_data;
-
-	readb(state->irq_port + ICS_ARCIN_V5_INTROFFSET);
-}
-
-static const expansioncard_ops_t icside_ops_arcin_v5 = {
-	.irqenable	= icside_irqenable_arcin_v5,
-	.irqdisable	= icside_irqdisable_arcin_v5,
-};
-
-
-/* ---------------- Version 6 PCB Support Functions --------------------- */
-/* Prototype: icside_irqenable_arcin_v6 (struct expansion_card *ec, int irqnr)
- * Purpose  : enable interrupts from card
- */
-static void icside_irqenable_arcin_v6 (struct expansion_card *ec, int irqnr)
-{
-	struct icside_state *state = ec->irq_data;
-	void __iomem *base = state->irq_port;
-
-	state->enabled = 1;
-
-	switch (state->channel) {
-	case 0:
-		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_1);
-		readb(base + ICS_ARCIN_V6_INTROFFSET_2);
-		break;
-	case 1:
-		writeb(0, base + ICS_ARCIN_V6_INTROFFSET_2);
-		readb(base + ICS_ARCIN_V6_INTROFFSET_1);
-		break;
-	}
-}
-
-/* Prototype: icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
- * Purpose  : disable interrupts from card
- */
-static void icside_irqdisable_arcin_v6 (struct expansion_card *ec, int irqnr)
-{
-	struct icside_state *state = ec->irq_data;
-
-	state->enabled = 0;
-
-	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-	readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-}
-
-/* Prototype: icside_irqprobe(struct expansion_card *ec)
- * Purpose  : detect an active interrupt from card
- */
-static int icside_irqpending_arcin_v6(struct expansion_card *ec)
-{
-	struct icside_state *state = ec->irq_data;
-
-	return readb(state->irq_port + ICS_ARCIN_V6_INTRSTAT_1) & 1 ||
-	       readb(state->irq_port + ICS_ARCIN_V6_INTRSTAT_2) & 1;
-}
-
-static const expansioncard_ops_t icside_ops_arcin_v6 = {
-	.irqenable	= icside_irqenable_arcin_v6,
-	.irqdisable	= icside_irqdisable_arcin_v6,
-	.irqpending	= icside_irqpending_arcin_v6,
-};
-
-/*
- * Handle routing of interrupts.  This is called before
- * we write the command to the drive.
- */
-static void icside_maskproc(ide_drive_t *drive, int mask)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct expansion_card *ec = ECARD_DEV(hwif->dev);
-	struct icside_state *state = ecard_get_drvdata(ec);
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	state->channel = hwif->channel;
-
-	if (state->enabled && !mask) {
-		switch (hwif->channel) {
-		case 0:
-			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-			break;
-		case 1:
-			writeb(0, state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-			readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-			break;
-		}
-	} else {
-		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_2);
-		readb(state->irq_port + ICS_ARCIN_V6_INTROFFSET_1);
-	}
-
-	local_irq_restore(flags);
-}
-
-static const struct ide_port_ops icside_v6_no_dma_port_ops = {
-	.maskproc		= icside_maskproc,
-};
-
-#ifdef CONFIG_BLK_DEV_IDEDMA_ICS
-/*
- * SG-DMA support.
- *
- * Similar to the BM-DMA, but we use the RiscPCs IOMD DMA controllers.
- * There is only one DMA controller per card, which means that only
- * one drive can be accessed at one time.  NOTE! We do not enforce that
- * here, but we rely on the main IDE driver spotting that both
- * interfaces use the same IRQ, which should guarantee this.
- */
-
-/*
- * Configure the IOMD to give the appropriate timings for the transfer
- * mode being requested.  We take the advice of the ATA standards, and
- * calculate the cycle time based on the transfer mode, and the EIDE
- * MW DMA specs that the drive provides in the IDENTIFY command.
- *
- * We have the following IOMD DMA modes to choose from:
- *
- *	Type	Active		Recovery	Cycle
- *	A	250 (250)	312 (550)	562 (800)
- *	B	187		250		437
- *	C	125 (125)	125 (375)	250 (500)
- *	D	62		125		187
- *
- * (figures in brackets are actual measured timings)
- *
- * However, we also need to take care of the read/write active and
- * recovery timings:
- *
- *			Read	Write
- *  	Mode	Active	-- Recovery --	Cycle	IOMD type
- *	MW0	215	50	215	480	A
- *	MW1	80	50	50	150	C
- *	MW2	70	25	25	120	C
- */
-static void icside_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	unsigned long cycle_time = 0;
-	int use_dma_info = 0;
-	const u8 xfer_mode = drive->dma_mode;
-
-	switch (xfer_mode) {
-	case XFER_MW_DMA_2:
-		cycle_time = 250;
-		use_dma_info = 1;
-		break;
-
-	case XFER_MW_DMA_1:
-		cycle_time = 250;
-		use_dma_info = 1;
-		break;
-
-	case XFER_MW_DMA_0:
-		cycle_time = 480;
-		break;
-
-	case XFER_SW_DMA_2:
-	case XFER_SW_DMA_1:
-	case XFER_SW_DMA_0:
-		cycle_time = 480;
-		break;
-	}
-
-	/*
-	 * If we're going to be doing MW_DMA_1 or MW_DMA_2, we should
-	 * take care to note the values in the ID...
-	 */
-	if (use_dma_info && drive->id[ATA_ID_EIDE_DMA_TIME] > cycle_time)
-		cycle_time = drive->id[ATA_ID_EIDE_DMA_TIME];
-
-	ide_set_drivedata(drive, (void *)cycle_time);
-
-	printk(KERN_INFO "%s: %s selected (peak %luMB/s)\n",
-	       drive->name, ide_xfer_verbose(xfer_mode),
-	       2000 / (cycle_time ? cycle_time : (unsigned long) -1));
-}
-
-static const struct ide_port_ops icside_v6_port_ops = {
-	.set_dma_mode		= icside_set_dma_mode,
-	.maskproc		= icside_maskproc,
-};
-
-static void icside_dma_host_set(ide_drive_t *drive, int on)
-{
-}
-
-static int icside_dma_end(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct expansion_card *ec = ECARD_DEV(hwif->dev);
-
-	disable_dma(ec->dma);
-
-	return get_dma_residue(ec->dma) != 0;
-}
-
-static void icside_dma_start(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct expansion_card *ec = ECARD_DEV(hwif->dev);
-
-	/* We can not enable DMA on both channels simultaneously. */
-	BUG_ON(dma_channel_active(ec->dma));
-	enable_dma(ec->dma);
-}
-
-static int icside_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct expansion_card *ec = ECARD_DEV(hwif->dev);
-	struct icside_state *state = ecard_get_drvdata(ec);
-	unsigned int dma_mode;
-
-	if (cmd->tf_flags & IDE_TFLAG_WRITE)
-		dma_mode = DMA_MODE_WRITE;
-	else
-		dma_mode = DMA_MODE_READ;
-
-	/*
-	 * We can not enable DMA on both channels.
-	 */
-	BUG_ON(dma_channel_active(ec->dma));
-
-	/*
-	 * Ensure that we have the right interrupt routed.
-	 */
-	icside_maskproc(drive, 0);
-
-	/*
-	 * Route the DMA signals to the correct interface.
-	 */
-	writeb(state->sel | hwif->channel, state->ioc_base);
-
-	/*
-	 * Select the correct timing for this drive.
-	 */
-	set_dma_speed(ec->dma, (unsigned long)ide_get_drivedata(drive));
-
-	/*
-	 * Tell the DMA engine about the SG table and
-	 * data direction.
-	 */
-	set_dma_sg(ec->dma, hwif->sg_table, cmd->sg_nents);
-	set_dma_mode(ec->dma, dma_mode);
-
-	return 0;
-}
-
-static int icside_dma_test_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct expansion_card *ec = ECARD_DEV(hwif->dev);
-	struct icside_state *state = ecard_get_drvdata(ec);
-
-	return readb(state->irq_port +
-		     (hwif->channel ?
-			ICS_ARCIN_V6_INTRSTAT_2 :
-			ICS_ARCIN_V6_INTRSTAT_1)) & 1;
-}
-
-static int icside_dma_init(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-	hwif->dmatable_cpu	= NULL;
-	hwif->dmatable_dma	= 0;
-
-	return 0;
-}
-
-static const struct ide_dma_ops icside_v6_dma_ops = {
-	.dma_host_set		= icside_dma_host_set,
-	.dma_setup		= icside_dma_setup,
-	.dma_start		= icside_dma_start,
-	.dma_end		= icside_dma_end,
-	.dma_test_irq		= icside_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-};
-#endif
-
-static int icside_dma_off_init(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-	return -EOPNOTSUPP;
-}
-
-static void icside_setup_ports(struct ide_hw *hw, void __iomem *base,
-			       struct cardinfo *info, struct expansion_card *ec)
-{
-	unsigned long port = (unsigned long)base + info->dataoffset;
-
-	hw->io_ports.data_addr	 = port;
-	hw->io_ports.error_addr	 = port + (1 << info->stepping);
-	hw->io_ports.nsect_addr	 = port + (2 << info->stepping);
-	hw->io_ports.lbal_addr	 = port + (3 << info->stepping);
-	hw->io_ports.lbam_addr	 = port + (4 << info->stepping);
-	hw->io_ports.lbah_addr	 = port + (5 << info->stepping);
-	hw->io_ports.device_addr = port + (6 << info->stepping);
-	hw->io_ports.status_addr = port + (7 << info->stepping);
-	hw->io_ports.ctl_addr	 = (unsigned long)base + info->ctrloffset;
-
-	hw->irq = ec->irq;
-	hw->dev = &ec->dev;
-}
-
-static const struct ide_port_info icside_v5_port_info = {
-	.host_flags		= IDE_HFLAG_NO_DMA,
-	.chipset		= ide_acorn,
-};
-
-static int icside_register_v5(struct icside_state *state,
-			      struct expansion_card *ec)
-{
-	void __iomem *base;
-	struct ide_host *host;
-	struct ide_hw hw, *hws[] = { &hw };
-	int ret;
-
-	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
-	if (!base)
-		return -ENOMEM;
-
-	state->irq_port = base;
-
-	ec->irqaddr  = base + ICS_ARCIN_V5_INTRSTAT;
-	ec->irqmask  = 1;
-
-	ecard_setirq(ec, &icside_ops_arcin_v5, state);
-
-	/*
-	 * Be on the safe side - disable interrupts
-	 */
-	icside_irqdisable_arcin_v5(ec, 0);
-
-	icside_setup_ports(&hw, base, &icside_cardinfo_v5, ec);
-
-	host = ide_host_alloc(&icside_v5_port_info, hws, 1);
-	if (host == NULL)
-		return -ENODEV;
-
-	state->host = host;
-
-	ecard_set_drvdata(ec, state);
-
-	ret = ide_host_register(host, &icside_v5_port_info, hws);
-	if (ret)
-		goto err_free;
-
-	return 0;
-err_free:
-	ide_host_free(host);
-	ecard_set_drvdata(ec, NULL);
-	return ret;
-}
-
-static const struct ide_port_info icside_v6_port_info = {
-	.init_dma		= icside_dma_off_init,
-	.port_ops		= &icside_v6_no_dma_port_ops,
-	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_MMIO,
-	.mwdma_mask		= ATA_MWDMA2,
-	.swdma_mask		= ATA_SWDMA2,
-	.chipset		= ide_acorn,
-};
-
-static int icside_register_v6(struct icside_state *state,
-			      struct expansion_card *ec)
-{
-	void __iomem *ioc_base, *easi_base;
-	struct ide_host *host;
-	unsigned int sel = 0;
-	int ret;
-	struct ide_hw hw[2], *hws[] = { &hw[0], &hw[1] };
-	struct ide_port_info d = icside_v6_port_info;
-
-	ioc_base = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
-	if (!ioc_base) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	easi_base = ioc_base;
-
-	if (ecard_resource_flags(ec, ECARD_RES_EASI)) {
-		easi_base = ecardm_iomap(ec, ECARD_RES_EASI, 0, 0);
-		if (!easi_base) {
-			ret = -ENOMEM;
-			goto out;
-		}
-
-		/*
-		 * Enable access to the EASI region.
-		 */
-		sel = 1 << 5;
-	}
-
-	writeb(sel, ioc_base);
-
-	ecard_setirq(ec, &icside_ops_arcin_v6, state);
-
-	state->irq_port   = easi_base;
-	state->ioc_base   = ioc_base;
-	state->sel	  = sel;
-
-	/*
-	 * Be on the safe side - disable interrupts
-	 */
-	icside_irqdisable_arcin_v6(ec, 0);
-
-	icside_setup_ports(&hw[0], easi_base, &icside_cardinfo_v6_1, ec);
-	icside_setup_ports(&hw[1], easi_base, &icside_cardinfo_v6_2, ec);
-
-	host = ide_host_alloc(&d, hws, 2);
-	if (host == NULL)
-		return -ENODEV;
-
-	state->host = host;
-
-	ecard_set_drvdata(ec, state);
-
-#ifdef CONFIG_BLK_DEV_IDEDMA_ICS
-	if (ec->dma != NO_DMA && !request_dma(ec->dma, DRV_NAME)) {
-		d.init_dma = icside_dma_init;
-		d.port_ops = &icside_v6_port_ops;
-		d.dma_ops  = &icside_v6_dma_ops;
-	}
-#endif
-
-	ret = ide_host_register(host, &d, hws);
-	if (ret)
-		goto err_free;
-
-	return 0;
-err_free:
-	ide_host_free(host);
-	if (d.dma_ops)
-		free_dma(ec->dma);
-	ecard_set_drvdata(ec, NULL);
-out:
-	return ret;
-}
-
-static int icside_probe(struct expansion_card *ec, const struct ecard_id *id)
-{
-	struct icside_state *state;
-	void __iomem *idmem;
-	int ret;
-
-	ret = ecard_request_resources(ec);
-	if (ret)
-		goto out;
-
-	state = kzalloc(sizeof(struct icside_state), GFP_KERNEL);
-	if (!state) {
-		ret = -ENOMEM;
-		goto release;
-	}
-
-	state->type	= ICS_TYPE_NOTYPE;
-
-	idmem = ecardm_iomap(ec, ECARD_RES_IOCFAST, 0, 0);
-	if (idmem) {
-		unsigned int type;
-
-		type = readb(idmem + ICS_IDENT_OFFSET) & 1;
-		type |= (readb(idmem + ICS_IDENT_OFFSET + 4) & 1) << 1;
-		type |= (readb(idmem + ICS_IDENT_OFFSET + 8) & 1) << 2;
-		type |= (readb(idmem + ICS_IDENT_OFFSET + 12) & 1) << 3;
-		ecardm_iounmap(ec, idmem);
-
-		state->type = type;
-	}
-
-	switch (state->type) {
-	case ICS_TYPE_A3IN:
-		dev_warn(&ec->dev, "A3IN unsupported\n");
-		ret = -ENODEV;
-		break;
-
-	case ICS_TYPE_A3USER:
-		dev_warn(&ec->dev, "A3USER unsupported\n");
-		ret = -ENODEV;
-		break;
-
-	case ICS_TYPE_V5:
-		ret = icside_register_v5(state, ec);
-		break;
-
-	case ICS_TYPE_V6:
-		ret = icside_register_v6(state, ec);
-		break;
-
-	default:
-		dev_warn(&ec->dev, "unknown interface type\n");
-		ret = -ENODEV;
-		break;
-	}
-
-	if (ret == 0)
-		goto out;
-
-	kfree(state);
- release:
-	ecard_release_resources(ec);
- out:
-	return ret;
-}
-
-static void icside_remove(struct expansion_card *ec)
-{
-	struct icside_state *state = ecard_get_drvdata(ec);
-
-	switch (state->type) {
-	case ICS_TYPE_V5:
-		/* FIXME: tell IDE to stop using the interface */
-
-		/* Disable interrupts */
-		icside_irqdisable_arcin_v5(ec, 0);
-		break;
-
-	case ICS_TYPE_V6:
-		/* FIXME: tell IDE to stop using the interface */
-		if (ec->dma != NO_DMA)
-			free_dma(ec->dma);
-
-		/* Disable interrupts */
-		icside_irqdisable_arcin_v6(ec, 0);
-
-		/* Reset the ROM pointer/EASI selection */
-		writeb(0, state->ioc_base);
-		break;
-	}
-
-	ecard_set_drvdata(ec, NULL);
-
-	kfree(state);
-	ecard_release_resources(ec);
-}
-
-static void icside_shutdown(struct expansion_card *ec)
-{
-	struct icside_state *state = ecard_get_drvdata(ec);
-	unsigned long flags;
-
-	/*
-	 * Disable interrupts from this card.  We need to do
-	 * this before disabling EASI since we may be accessing
-	 * this register via that region.
-	 */
-	local_irq_save(flags);
-	ec->ops->irqdisable(ec, 0);
-	local_irq_restore(flags);
-
-	/*
-	 * Reset the ROM pointer so that we can read the ROM
-	 * after a soft reboot.  This also disables access to
-	 * the IDE taskfile via the EASI region.
-	 */
-	if (state->ioc_base)
-		writeb(0, state->ioc_base);
-}
-
-static const struct ecard_id icside_ids[] = {
-	{ MANU_ICS,  PROD_ICS_IDE  },
-	{ MANU_ICS2, PROD_ICS2_IDE },
-	{ 0xffff, 0xffff }
-};
-
-static struct ecard_driver icside_driver = {
-	.probe		= icside_probe,
-	.remove		= icside_remove,
-	.shutdown	= icside_shutdown,
-	.id_table	= icside_ids,
-	.drv = {
-		.name	= "icside",
-	},
-};
-
-static int __init icside_init(void)
-{
-	return ecard_register_driver(&icside_driver);
-}
-
-static void __exit icside_exit(void)
-{
-	ecard_remove_driver(&icside_driver);
-}
-
-MODULE_AUTHOR("Russell King <rmk@arm.linux.org.uk>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("ICS IDE driver");
-
-module_init(icside_init);
-module_exit(icside_exit);
diff --git a/drivers/ide/ide-4drives.c b/drivers/ide/ide-4drives.c
deleted file mode 100644
index 06c6215e0cbe3..0000000000000
--- a/drivers/ide/ide-4drives.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ide.h>
-
-#define DRV_NAME "ide-4drives"
-
-static bool probe_4drives;
-
-module_param_named(probe, probe_4drives, bool, 0);
-MODULE_PARM_DESC(probe, "probe for generic IDE chipset with 4 drives/port");
-
-static void ide_4drives_init_dev(ide_drive_t *drive)
-{
-	if (drive->hwif->channel)
-		drive->select ^= 0x20;
-}
-
-static const struct ide_port_ops ide_4drives_port_ops = {
-	.init_dev		= ide_4drives_init_dev,
-};
-
-static const struct ide_port_info ide_4drives_port_info = {
-	.port_ops		= &ide_4drives_port_ops,
-	.host_flags		= IDE_HFLAG_SERIALIZE | IDE_HFLAG_NO_DMA |
-				  IDE_HFLAG_4DRIVES,
-	.chipset		= ide_4drives,
-};
-
-static int __init ide_4drives_init(void)
-{
-	unsigned long base = 0x1f0, ctl = 0x3f6;
-	struct ide_hw hw, *hws[] = { &hw, &hw };
-
-	if (probe_4drives == 0)
-		return -ENODEV;
-
-	if (!request_region(base, 8, DRV_NAME)) {
-		printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
-				DRV_NAME, base, base + 7);
-		return -EBUSY;
-	}
-
-	if (!request_region(ctl, 1, DRV_NAME)) {
-		printk(KERN_ERR "%s: I/O resource 0x%lX not free.\n",
-				DRV_NAME, ctl);
-		release_region(base, 8);
-		return -EBUSY;
-	}
-
-	memset(&hw, 0, sizeof(hw));
-
-	ide_std_init_ports(&hw, base, ctl);
-	hw.irq = 14;
-
-	return ide_host_add(&ide_4drives_port_info, hws, 2, NULL);
-}
-
-module_init(ide_4drives_init);
-
-MODULE_AUTHOR("Bartlomiej Zolnierkiewicz");
-MODULE_DESCRIPTION("generic IDE chipset with 4 drives/port support");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
deleted file mode 100644
index 05e18d6581416..0000000000000
--- a/drivers/ide/ide-acpi.c
+++ /dev/null
@@ -1,622 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Provides ACPI support for IDE drives.
- *
- * Copyright (C) 2005 Intel Corp.
- * Copyright (C) 2005 Randy Dunlap
- * Copyright (C) 2006 SUSE Linux Products GmbH
- * Copyright (C) 2006 Hannes Reinecke
- */
-
-#include <linux/acpi.h>
-#include <linux/ata.h>
-#include <linux/delay.h>
-#include <linux/device.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/ide.h>
-#include <linux/pci.h>
-#include <linux/dmi.h>
-#include <linux/module.h>
-
-#define REGS_PER_GTF		7
-
-struct GTM_buffer {
-	u32	PIO_speed0;
-	u32	DMA_speed0;
-	u32	PIO_speed1;
-	u32	DMA_speed1;
-	u32	GTM_flags;
-};
-
-struct ide_acpi_drive_link {
-	acpi_handle	 obj_handle;
-	u8		 idbuff[512];
-};
-
-struct ide_acpi_hwif_link {
-	ide_hwif_t			*hwif;
-	acpi_handle			 obj_handle;
-	struct GTM_buffer		 gtm;
-	struct ide_acpi_drive_link	 master;
-	struct ide_acpi_drive_link	 slave;
-};
-
-#undef DEBUGGING
-/* note: adds function name and KERN_DEBUG */
-#ifdef DEBUGGING
-#define DEBPRINT(fmt, args...)	\
-		printk(KERN_DEBUG "%s: " fmt, __func__, ## args)
-#else
-#define DEBPRINT(fmt, args...)	do {} while (0)
-#endif	/* DEBUGGING */
-
-static bool ide_noacpi;
-module_param_named(noacpi, ide_noacpi, bool, 0);
-MODULE_PARM_DESC(noacpi, "disable IDE ACPI support");
-
-static bool ide_acpigtf;
-module_param_named(acpigtf, ide_acpigtf, bool, 0);
-MODULE_PARM_DESC(acpigtf, "enable IDE ACPI _GTF support");
-
-static bool ide_acpionboot;
-module_param_named(acpionboot, ide_acpionboot, bool, 0);
-MODULE_PARM_DESC(acpionboot, "call IDE ACPI methods on boot");
-
-static bool ide_noacpi_psx;
-static int no_acpi_psx(const struct dmi_system_id *id)
-{
-	ide_noacpi_psx = true;
-	printk(KERN_NOTICE"%s detected - disable ACPI _PSx.\n", id->ident);
-	return 0;
-}
-
-static const struct dmi_system_id ide_acpi_dmi_table[] = {
-	/* Bug 9673. */
-	/* We should check if this is because ACPI NVS isn't save/restored. */
-	{
-		.callback = no_acpi_psx,
-		.ident    = "HP nx9005",
-		.matches  = {
-			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies Ltd."),
-			DMI_MATCH(DMI_BIOS_VERSION, "KAM1.60")
-		},
-	},
-
-	{ }	/* terminate list */
-};
-
-int ide_acpi_init(void)
-{
-	dmi_check_system(ide_acpi_dmi_table);
-	return 0;
-}
-
-bool ide_port_acpi(ide_hwif_t *hwif)
-{
-	return ide_noacpi == 0 && hwif->acpidata;
-}
-
-static acpi_handle acpi_get_child(acpi_handle handle, u64 addr)
-{
-	struct acpi_device *adev;
-
-	if (!handle || acpi_bus_get_device(handle, &adev))
-		return NULL;
-
-	adev = acpi_find_child_device(adev, addr, false);
-	return adev ? adev->handle : NULL;
-}
-
-/**
- * ide_get_dev_handle - finds acpi_handle and PCI device.function
- * @dev: device to locate
- * @handle: returned acpi_handle for @dev
- * @pcidevfn: return PCI device.func for @dev
- *
- * Returns the ACPI object handle to the corresponding PCI device.
- *
- * Returns 0 on success, <0 on error.
- */
-static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
-			       u64 *pcidevfn)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	unsigned int bus, devnum, func;
-	u64 addr;
-	acpi_handle dev_handle;
-	acpi_status status;
-	struct acpi_device_info	*dinfo = NULL;
-	int ret = -ENODEV;
-
-	bus = pdev->bus->number;
-	devnum = PCI_SLOT(pdev->devfn);
-	func = PCI_FUNC(pdev->devfn);
-	/* ACPI _ADR encoding for PCI bus: */
-	addr = (u64)(devnum << 16 | func);
-
-	DEBPRINT("ENTER: pci %02x:%02x.%01x\n", bus, devnum, func);
-
-	dev_handle = ACPI_HANDLE(dev);
-	if (!dev_handle) {
-		DEBPRINT("no acpi handle for device\n");
-		goto err;
-	}
-
-	status = acpi_get_object_info(dev_handle, &dinfo);
-	if (ACPI_FAILURE(status)) {
-		DEBPRINT("get_object_info for device failed\n");
-		goto err;
-	}
-	if (dinfo && (dinfo->valid & ACPI_VALID_ADR) &&
-	    dinfo->address == addr) {
-		*pcidevfn = addr;
-		*handle = dev_handle;
-	} else {
-		DEBPRINT("get_object_info for device has wrong "
-			" address: %llu, should be %u\n",
-			dinfo ? (unsigned long long)dinfo->address : -1ULL,
-			(unsigned int)addr);
-		goto err;
-	}
-
-	DEBPRINT("for dev=0x%x.%x, addr=0x%llx, *handle=0x%p\n",
-		 devnum, func, (unsigned long long)addr, *handle);
-	ret = 0;
-err:
-	kfree(dinfo);
-	return ret;
-}
-
-/**
- * ide_acpi_hwif_get_handle - Get ACPI object handle for a given hwif
- * @hwif: device to locate
- *
- * Retrieves the object handle for a given hwif.
- *
- * Returns handle on success, 0 on error.
- */
-static acpi_handle ide_acpi_hwif_get_handle(ide_hwif_t *hwif)
-{
-	struct device		*dev = hwif->gendev.parent;
-	acpi_handle		dev_handle;
-	u64			pcidevfn;
-	acpi_handle		chan_handle;
-	int			err;
-
-	DEBPRINT("ENTER: device %s\n", hwif->name);
-
-	if (!dev) {
-		DEBPRINT("no PCI device for %s\n", hwif->name);
-		return NULL;
-	}
-
-	err = ide_get_dev_handle(dev, &dev_handle, &pcidevfn);
-	if (err < 0) {
-		DEBPRINT("ide_get_dev_handle failed (%d)\n", err);
-		return NULL;
-	}
-
-	/* get child objects of dev_handle == channel objects,
-	 * + _their_ children == drive objects */
-	/* channel is hwif->channel */
-	chan_handle = acpi_get_child(dev_handle, hwif->channel);
-	DEBPRINT("chan adr=%d: handle=0x%p\n",
-		 hwif->channel, chan_handle);
-
-	return chan_handle;
-}
-
-/**
- * do_drive_get_GTF - get the drive bootup default taskfile settings
- * @drive: the drive for which the taskfile settings should be retrieved
- * @gtf_length: number of bytes of _GTF data returned at @gtf_address
- * @gtf_address: buffer containing _GTF taskfile arrays
- *
- * The _GTF method has no input parameters.
- * It returns a variable number of register set values (registers
- * hex 1F1..1F7, taskfiles).
- * The <variable number> is not known in advance, so have ACPI-CA
- * allocate the buffer as needed and return it, then free it later.
- *
- * The returned @gtf_length and @gtf_address are only valid if the
- * function return value is 0.
- */
-static int do_drive_get_GTF(ide_drive_t *drive,
-		     unsigned int *gtf_length, unsigned long *gtf_address,
-		     unsigned long *obj_loc)
-{
-	acpi_status			status;
-	struct acpi_buffer		output;
-	union acpi_object 		*out_obj;
-	int				err = -ENODEV;
-
-	*gtf_length = 0;
-	*gtf_address = 0UL;
-	*obj_loc = 0UL;
-
-	if (!drive->acpidata->obj_handle) {
-		DEBPRINT("No ACPI object found for %s\n", drive->name);
-		goto out;
-	}
-
-	/* Setting up output buffer */
-	output.length = ACPI_ALLOCATE_BUFFER;
-	output.pointer = NULL;	/* ACPI-CA sets this; save/free it later */
-
-	/* _GTF has no input parameters */
-	err = -EIO;
-	status = acpi_evaluate_object(drive->acpidata->obj_handle, "_GTF",
-				      NULL, &output);
-	if (ACPI_FAILURE(status)) {
-		printk(KERN_DEBUG
-		       "%s: Run _GTF error: status = 0x%x\n",
-		       __func__, status);
-		goto out;
-	}
-
-	if (!output.length || !output.pointer) {
-		DEBPRINT("Run _GTF: "
-		       "length or ptr is NULL (0x%llx, 0x%p)\n",
-		       (unsigned long long)output.length,
-		       output.pointer);
-		goto out;
-	}
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		DEBPRINT("Run _GTF: error: "
-		       "expected object type of ACPI_TYPE_BUFFER, "
-		       "got 0x%x\n", out_obj->type);
-		err = -ENOENT;
-		kfree(output.pointer);
-		goto out;
-	}
-
-	if (!out_obj->buffer.length || !out_obj->buffer.pointer ||
-	    out_obj->buffer.length % REGS_PER_GTF) {
-		printk(KERN_ERR
-		       "%s: unexpected GTF length (%d) or addr (0x%p)\n",
-		       __func__, out_obj->buffer.length,
-		       out_obj->buffer.pointer);
-		err = -ENOENT;
-		kfree(output.pointer);
-		goto out;
-	}
-
-	*gtf_length = out_obj->buffer.length;
-	*gtf_address = (unsigned long)out_obj->buffer.pointer;
-	*obj_loc = (unsigned long)out_obj;
-	DEBPRINT("returning gtf_length=%d, gtf_address=0x%lx, obj_loc=0x%lx\n",
-		 *gtf_length, *gtf_address, *obj_loc);
-	err = 0;
-out:
-	return err;
-}
-
-/**
- * do_drive_set_taskfiles - write the drive taskfile settings from _GTF
- * @drive: the drive to which the taskfile command should be sent
- * @gtf_length: total number of bytes of _GTF taskfiles
- * @gtf_address: location of _GTF taskfile arrays
- *
- * Write {gtf_address, length gtf_length} in groups of
- * REGS_PER_GTF bytes.
- */
-static int do_drive_set_taskfiles(ide_drive_t *drive,
-				  unsigned int gtf_length,
-				  unsigned long gtf_address)
-{
-	int			rc = 0, err;
-	int			gtf_count = gtf_length / REGS_PER_GTF;
-	int			ix;
-
-	DEBPRINT("total GTF bytes=%u (0x%x), gtf_count=%d, addr=0x%lx\n",
-		 gtf_length, gtf_length, gtf_count, gtf_address);
-
-	/* send all taskfile registers (0x1f1-0x1f7) *in*that*order* */
-	for (ix = 0; ix < gtf_count; ix++) {
-		u8 *gtf = (u8 *)(gtf_address + ix * REGS_PER_GTF);
-		struct ide_cmd cmd;
-
-		DEBPRINT("(0x1f1-1f7): "
-			 "hex: %02x %02x %02x %02x %02x %02x %02x\n",
-			 gtf[0], gtf[1], gtf[2],
-			 gtf[3], gtf[4], gtf[5], gtf[6]);
-
-		if (!ide_acpigtf) {
-			DEBPRINT("_GTF execution disabled\n");
-			continue;
-		}
-
-		/* convert GTF to taskfile */
-		memset(&cmd, 0, sizeof(cmd));
-		memcpy(&cmd.tf.feature, gtf, REGS_PER_GTF);
-		cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-		cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-
-		err = ide_no_data_taskfile(drive, &cmd);
-		if (err) {
-			printk(KERN_ERR "%s: ide_no_data_taskfile failed: %u\n",
-					__func__, err);
-			rc = err;
-		}
-	}
-
-	return rc;
-}
-
-/**
- * ide_acpi_exec_tfs - get then write drive taskfile settings
- * @drive: the drive for which the taskfile settings should be
- *         written.
- *
- * According to the ACPI spec this should be called after _STM
- * has been evaluated for the interface. Some ACPI vendors interpret
- * that as a hard requirement and modify the taskfile according
- * to the Identify Drive information passed down with _STM.
- * So one should really make sure to call this only after _STM has
- * been executed.
- */
-int ide_acpi_exec_tfs(ide_drive_t *drive)
-{
-	int		ret;
-	unsigned int	gtf_length;
-	unsigned long	gtf_address;
-	unsigned long	obj_loc;
-
-	DEBPRINT("call get_GTF, drive=%s port=%d\n", drive->name, drive->dn);
-
-	ret = do_drive_get_GTF(drive, &gtf_length, &gtf_address, &obj_loc);
-	if (ret < 0) {
-		DEBPRINT("get_GTF error (%d)\n", ret);
-		return ret;
-	}
-
-	DEBPRINT("call set_taskfiles, drive=%s\n", drive->name);
-
-	ret = do_drive_set_taskfiles(drive, gtf_length, gtf_address);
-	kfree((void *)obj_loc);
-	if (ret < 0) {
-		DEBPRINT("set_taskfiles error (%d)\n", ret);
-	}
-
-	DEBPRINT("ret=%d\n", ret);
-
-	return ret;
-}
-
-/**
- * ide_acpi_get_timing - get the channel (controller) timings
- * @hwif: target IDE interface (channel)
- *
- * This function executes the _GTM ACPI method for the target channel.
- *
- */
-void ide_acpi_get_timing(ide_hwif_t *hwif)
-{
-	acpi_status		status;
-	struct acpi_buffer	output;
-	union acpi_object 	*out_obj;
-
-	/* Setting up output buffer for _GTM */
-	output.length = ACPI_ALLOCATE_BUFFER;
-	output.pointer = NULL;	/* ACPI-CA sets this; save/free it later */
-
-	/* _GTM has no input parameters */
-	status = acpi_evaluate_object(hwif->acpidata->obj_handle, "_GTM",
-				      NULL, &output);
-
-	DEBPRINT("_GTM status: %d, outptr: 0x%p, outlen: 0x%llx\n",
-		 status, output.pointer,
-		 (unsigned long long)output.length);
-
-	if (ACPI_FAILURE(status)) {
-		DEBPRINT("Run _GTM error: status = 0x%x\n", status);
-		return;
-	}
-
-	if (!output.length || !output.pointer) {
-		DEBPRINT("Run _GTM: length or ptr is NULL (0x%llx, 0x%p)\n",
-		       (unsigned long long)output.length,
-		       output.pointer);
-		kfree(output.pointer);
-		return;
-	}
-
-	out_obj = output.pointer;
-	if (out_obj->type != ACPI_TYPE_BUFFER) {
-		DEBPRINT("Run _GTM: error: "
-		       "expected object type of ACPI_TYPE_BUFFER, "
-		       "got 0x%x\n", out_obj->type);
-		kfree(output.pointer);
-		return;
-	}
-
-	if (!out_obj->buffer.length || !out_obj->buffer.pointer ||
-	    out_obj->buffer.length != sizeof(struct GTM_buffer)) {
-		printk(KERN_ERR
-			"%s: unexpected _GTM length (0x%x)[should be 0x%zx] or "
-			"addr (0x%p)\n",
-			__func__, out_obj->buffer.length,
-			sizeof(struct GTM_buffer), out_obj->buffer.pointer);
-		kfree(output.pointer);
-		return;
-	}
-
-	memcpy(&hwif->acpidata->gtm, out_obj->buffer.pointer,
-	       sizeof(struct GTM_buffer));
-
-	DEBPRINT("_GTM info: ptr: 0x%p, len: 0x%x, exp.len: 0x%zx\n",
-		 out_obj->buffer.pointer, out_obj->buffer.length,
-		 sizeof(struct GTM_buffer));
-
-	DEBPRINT("_GTM fields: 0x%x, 0x%x, 0x%x, 0x%x, 0x%x\n",
-		 hwif->acpidata->gtm.PIO_speed0,
-		 hwif->acpidata->gtm.DMA_speed0,
-		 hwif->acpidata->gtm.PIO_speed1,
-		 hwif->acpidata->gtm.DMA_speed1,
-		 hwif->acpidata->gtm.GTM_flags);
-
-	kfree(output.pointer);
-}
-
-/**
- * ide_acpi_push_timing - set the channel (controller) timings
- * @hwif: target IDE interface (channel)
- *
- * This function executes the _STM ACPI method for the target channel.
- *
- * _STM requires Identify Drive data, which has to passed as an argument.
- * Unfortunately drive->id is a mangled version which we can't readily
- * use; hence we'll get the information afresh.
- */
-void ide_acpi_push_timing(ide_hwif_t *hwif)
-{
-	acpi_status		status;
-	struct acpi_object_list	input;
-	union acpi_object 	in_params[3];
-	struct ide_acpi_drive_link	*master = &hwif->acpidata->master;
-	struct ide_acpi_drive_link	*slave = &hwif->acpidata->slave;
-
-	/* Give the GTM buffer + drive Identify data to the channel via the
-	 * _STM method: */
-	/* setup input parameters buffer for _STM */
-	input.count = 3;
-	input.pointer = in_params;
-	in_params[0].type = ACPI_TYPE_BUFFER;
-	in_params[0].buffer.length = sizeof(struct GTM_buffer);
-	in_params[0].buffer.pointer = (u8 *)&hwif->acpidata->gtm;
-	in_params[1].type = ACPI_TYPE_BUFFER;
-	in_params[1].buffer.length = ATA_ID_WORDS * 2;
-	in_params[1].buffer.pointer = (u8 *)&master->idbuff;
-	in_params[2].type = ACPI_TYPE_BUFFER;
-	in_params[2].buffer.length = ATA_ID_WORDS * 2;
-	in_params[2].buffer.pointer = (u8 *)&slave->idbuff;
-	/* Output buffer: _STM has no output */
-
-	status = acpi_evaluate_object(hwif->acpidata->obj_handle, "_STM",
-				      &input, NULL);
-
-	if (ACPI_FAILURE(status)) {
-		DEBPRINT("Run _STM error: status = 0x%x\n", status);
-	}
-	DEBPRINT("_STM status: %d\n", status);
-}
-
-/**
- * ide_acpi_set_state - set the channel power state
- * @hwif: target IDE interface
- * @on: state, on/off
- *
- * This function executes the _PS0/_PS3 ACPI method to set the power state.
- * ACPI spec requires _PS0 when IDE power on and _PS3 when power off
- */
-void ide_acpi_set_state(ide_hwif_t *hwif, int on)
-{
-	ide_drive_t *drive;
-	int i;
-
-	if (ide_noacpi_psx)
-		return;
-
-	DEBPRINT("ENTER:\n");
-
-	/* channel first and then drives for power on and verse versa for power off */
-	if (on)
-		acpi_bus_set_power(hwif->acpidata->obj_handle, ACPI_STATE_D0);
-
-	ide_port_for_each_present_dev(i, drive, hwif) {
-		if (drive->acpidata->obj_handle)
-			acpi_bus_set_power(drive->acpidata->obj_handle,
-				on ? ACPI_STATE_D0 : ACPI_STATE_D3_COLD);
-	}
-
-	if (!on)
-		acpi_bus_set_power(hwif->acpidata->obj_handle,
-				   ACPI_STATE_D3_COLD);
-}
-
-/**
- * ide_acpi_init_port - initialize the ACPI link for an IDE interface
- * @hwif: target IDE interface (channel)
- *
- * The ACPI spec is not quite clear when the drive identify buffer
- * should be obtained. Calling IDENTIFY DEVICE during shutdown
- * is not the best of ideas as the drive might already being put to
- * sleep. And obviously we can't call it during resume.
- * So we get the information during startup; but this means that
- * any changes during run-time will be lost after resume.
- */
-void ide_acpi_init_port(ide_hwif_t *hwif)
-{
-	hwif->acpidata = kzalloc(sizeof(struct ide_acpi_hwif_link), GFP_KERNEL);
-	if (!hwif->acpidata)
-		return;
-
-	hwif->acpidata->obj_handle = ide_acpi_hwif_get_handle(hwif);
-	if (!hwif->acpidata->obj_handle) {
-		DEBPRINT("no ACPI object for %s found\n", hwif->name);
-		kfree(hwif->acpidata);
-		hwif->acpidata = NULL;
-	}
-}
-
-void ide_acpi_port_init_devices(ide_hwif_t *hwif)
-{
-	ide_drive_t *drive;
-	int i, err;
-
-	if (hwif->acpidata == NULL)
-		return;
-
-	/*
-	 * The ACPI spec mandates that we send information
-	 * for both drives, regardless whether they are connected
-	 * or not.
-	 */
-	hwif->devices[0]->acpidata = &hwif->acpidata->master;
-	hwif->devices[1]->acpidata = &hwif->acpidata->slave;
-
-	/* get _ADR info for each device */
-	ide_port_for_each_present_dev(i, drive, hwif) {
-		acpi_handle dev_handle;
-
-		DEBPRINT("ENTER: %s at channel#: %d port#: %d\n",
-			 drive->name, hwif->channel, drive->dn & 1);
-
-		/* TBD: could also check ACPI object VALID bits */
-		dev_handle = acpi_get_child(hwif->acpidata->obj_handle,
-					    drive->dn & 1);
-
-		DEBPRINT("drive %s handle 0x%p\n", drive->name, dev_handle);
-
-		drive->acpidata->obj_handle = dev_handle;
-	}
-
-	/* send IDENTIFY for each device */
-	ide_port_for_each_present_dev(i, drive, hwif) {
-		err = taskfile_lib_get_identify(drive, drive->acpidata->idbuff);
-		if (err)
-			DEBPRINT("identify device %s failed (%d)\n",
-				 drive->name, err);
-	}
-
-	if (ide_noacpi || ide_acpionboot == 0) {
-		DEBPRINT("ACPI methods disabled on boot\n");
-		return;
-	}
-
-	/* ACPI _PS0 before _STM */
-	ide_acpi_set_state(hwif, 1);
-	/*
-	 * ACPI requires us to call _STM on startup
-	 */
-	ide_acpi_get_timing(hwif);
-	ide_acpi_push_timing(hwif);
-
-	ide_port_for_each_present_dev(i, drive, hwif) {
-		ide_acpi_exec_tfs(drive);
-	}
-}
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
deleted file mode 100644
index a1ce9f5ac3aa1..0000000000000
--- a/drivers/ide/ide-atapi.c
+++ /dev/null
@@ -1,756 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ATAPI support.
- */
-
-#include <linux/kernel.h>
-#include <linux/cdrom.h>
-#include <linux/delay.h>
-#include <linux/export.h>
-#include <linux/ide.h>
-#include <linux/scatterlist.h>
-#include <linux/gfp.h>
-
-#include <scsi/scsi.h>
-
-#define DRV_NAME "ide-atapi"
-#define PFX DRV_NAME ": "
-
-#ifdef DEBUG
-#define debug_log(fmt, args...) \
-	printk(KERN_INFO "ide: " fmt, ## args)
-#else
-#define debug_log(fmt, args...) do {} while (0)
-#endif
-
-#define ATAPI_MIN_CDB_BYTES	12
-
-static inline int dev_is_idecd(ide_drive_t *drive)
-{
-	return drive->media == ide_cdrom || drive->media == ide_optical;
-}
-
-/*
- * Check whether we can support a device,
- * based on the ATAPI IDENTIFY command results.
- */
-int ide_check_atapi_device(ide_drive_t *drive, const char *s)
-{
-	u16 *id = drive->id;
-	u8 gcw[2], protocol, device_type, removable, drq_type, packet_size;
-
-	*((u16 *)&gcw) = id[ATA_ID_CONFIG];
-
-	protocol    = (gcw[1] & 0xC0) >> 6;
-	device_type =  gcw[1] & 0x1F;
-	removable   = (gcw[0] & 0x80) >> 7;
-	drq_type    = (gcw[0] & 0x60) >> 5;
-	packet_size =  gcw[0] & 0x03;
-
-#ifdef CONFIG_PPC
-	/* kludge for Apple PowerBook internal zip */
-	if (drive->media == ide_floppy && device_type == 5 &&
-	    !strstr((char *)&id[ATA_ID_PROD], "CD-ROM") &&
-	    strstr((char *)&id[ATA_ID_PROD], "ZIP"))
-		device_type = 0;
-#endif
-
-	if (protocol != 2)
-		printk(KERN_ERR "%s: %s: protocol (0x%02x) is not ATAPI\n",
-			s, drive->name, protocol);
-	else if ((drive->media == ide_floppy && device_type != 0) ||
-		 (drive->media == ide_tape && device_type != 1))
-		printk(KERN_ERR "%s: %s: invalid device type (0x%02x)\n",
-			s, drive->name, device_type);
-	else if (removable == 0)
-		printk(KERN_ERR "%s: %s: the removable flag is not set\n",
-			s, drive->name);
-	else if (drive->media == ide_floppy && drq_type == 3)
-		printk(KERN_ERR "%s: %s: sorry, DRQ type (0x%02x) not "
-			"supported\n", s, drive->name, drq_type);
-	else if (packet_size != 0)
-		printk(KERN_ERR "%s: %s: packet size (0x%02x) is not 12 "
-			"bytes\n", s, drive->name, packet_size);
-	else
-		return 1;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_check_atapi_device);
-
-void ide_init_pc(struct ide_atapi_pc *pc)
-{
-	memset(pc, 0, sizeof(*pc));
-}
-EXPORT_SYMBOL_GPL(ide_init_pc);
-
-/*
- * Add a special packet command request to the tail of the request queue,
- * and wait for it to be serviced.
- */
-int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
-		      struct ide_atapi_pc *pc, void *buf, unsigned int bufflen)
-{
-	struct request *rq;
-	int error;
-
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
-	ide_req(rq)->type = ATA_PRIV_MISC;
-	ide_req(rq)->special = pc;
-
-	if (buf && bufflen) {
-		error = blk_rq_map_kern(drive->queue, rq, buf, bufflen,
-					GFP_NOIO);
-		if (error)
-			goto put_req;
-	}
-
-	memcpy(scsi_req(rq)->cmd, pc->c, 12);
-	if (drive->media == ide_tape)
-		scsi_req(rq)->cmd[13] = REQ_IDETAPE_PC1;
-	blk_execute_rq(disk, rq, 0);
-	error = scsi_req(rq)->result ? -EIO : 0;
-put_req:
-	blk_put_request(rq);
-	return error;
-}
-EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
-
-int ide_do_test_unit_ready(ide_drive_t *drive, struct gendisk *disk)
-{
-	struct ide_atapi_pc pc;
-
-	ide_init_pc(&pc);
-	pc.c[0] = TEST_UNIT_READY;
-
-	return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
-}
-EXPORT_SYMBOL_GPL(ide_do_test_unit_ready);
-
-int ide_do_start_stop(ide_drive_t *drive, struct gendisk *disk, int start)
-{
-	struct ide_atapi_pc pc;
-
-	ide_init_pc(&pc);
-	pc.c[0] = START_STOP;
-	pc.c[4] = start;
-
-	if (drive->media == ide_tape)
-		pc.flags |= PC_FLAG_WAIT_FOR_DSC;
-
-	return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
-}
-EXPORT_SYMBOL_GPL(ide_do_start_stop);
-
-int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on)
-{
-	struct ide_atapi_pc pc;
-
-	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
-		return 0;
-
-	ide_init_pc(&pc);
-	pc.c[0] = ALLOW_MEDIUM_REMOVAL;
-	pc.c[4] = on;
-
-	return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
-}
-EXPORT_SYMBOL_GPL(ide_set_media_lock);
-
-void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc)
-{
-	ide_init_pc(pc);
-	pc->c[0] = REQUEST_SENSE;
-	if (drive->media == ide_floppy) {
-		pc->c[4] = 255;
-		pc->req_xfer = 18;
-	} else {
-		pc->c[4] = 20;
-		pc->req_xfer = 20;
-	}
-}
-EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
-
-void ide_prep_sense(ide_drive_t *drive, struct request *rq)
-{
-	struct request_sense *sense = &drive->sense_data;
-	struct request *sense_rq;
-	struct scsi_request *req;
-	unsigned int cmd_len, sense_len;
-	int err;
-
-	switch (drive->media) {
-	case ide_floppy:
-		cmd_len = 255;
-		sense_len = 18;
-		break;
-	case ide_tape:
-		cmd_len = 20;
-		sense_len = 20;
-		break;
-	default:
-		cmd_len = 18;
-		sense_len = 18;
-	}
-
-	BUG_ON(sense_len > sizeof(*sense));
-
-	if (ata_sense_request(rq) || drive->sense_rq_armed)
-		return;
-
-	sense_rq = drive->sense_rq;
-	if (!sense_rq) {
-		sense_rq = blk_mq_alloc_request(drive->queue, REQ_OP_DRV_IN,
-					BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
-		drive->sense_rq = sense_rq;
-	}
-	req = scsi_req(sense_rq);
-
-	memset(sense, 0, sizeof(*sense));
-
-	scsi_req_init(req);
-
-	err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
-			      GFP_NOIO);
-	if (unlikely(err)) {
-		if (printk_ratelimit())
-			printk(KERN_WARNING PFX "%s: failed to map sense "
-					    "buffer\n", drive->name);
-		blk_mq_free_request(sense_rq);
-		drive->sense_rq = NULL;
-		return;
-	}
-
-	sense_rq->rq_disk = rq->rq_disk;
-	sense_rq->cmd_flags = REQ_OP_DRV_IN;
-	ide_req(sense_rq)->type = ATA_PRIV_SENSE;
-
-	req->cmd[0] = GPCMD_REQUEST_SENSE;
-	req->cmd[4] = cmd_len;
-	if (drive->media == ide_tape)
-		req->cmd[13] = REQ_IDETAPE_PC1;
-
-	drive->sense_rq_armed = true;
-}
-EXPORT_SYMBOL_GPL(ide_prep_sense);
-
-int ide_queue_sense_rq(ide_drive_t *drive, void *special)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *sense_rq;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hwif->lock, flags);
-
-	/* deferred failure from ide_prep_sense() */
-	if (!drive->sense_rq_armed) {
-		printk(KERN_WARNING PFX "%s: error queuing a sense request\n",
-		       drive->name);
-		spin_unlock_irqrestore(&hwif->lock, flags);
-		return -ENOMEM;
-	}
-
-	sense_rq = drive->sense_rq;
-	ide_req(sense_rq)->special = special;
-	drive->sense_rq_armed = false;
-
-	drive->hwif->rq = NULL;
-
-	ide_insert_request_head(drive, sense_rq);
-	spin_unlock_irqrestore(&hwif->lock, flags);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
-
-/*
- * Called when an error was detected during the last packet command.
- * We queue a request sense packet command at the head of the request
- * queue.
- */
-void ide_retry_pc(ide_drive_t *drive)
-{
-	struct request *failed_rq = drive->hwif->rq;
-	struct request *sense_rq = drive->sense_rq;
-	struct ide_atapi_pc *pc = &drive->request_sense_pc;
-
-	(void)ide_read_error(drive);
-
-	/* init pc from sense_rq */
-	ide_init_pc(pc);
-	memcpy(pc->c, scsi_req(sense_rq)->cmd, 12);
-
-	if (drive->media == ide_tape)
-		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
-
-	/*
-	 * Push back the failed request and put request sense on top
-	 * of it.  The failed command will be retried after sense data
-	 * is acquired.
-	 */
-	drive->hwif->rq = NULL;
-	ide_requeue_and_plug(drive, failed_rq);
-	if (ide_queue_sense_rq(drive, pc))
-		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(failed_rq));
-}
-EXPORT_SYMBOL_GPL(ide_retry_pc);
-
-int ide_cd_expiry(ide_drive_t *drive)
-{
-	struct request *rq = drive->hwif->rq;
-	unsigned long wait = 0;
-
-	debug_log("%s: scsi_req(rq)->cmd[0]: 0x%x\n", __func__, scsi_req(rq)->cmd[0]);
-
-	/*
-	 * Some commands are *slow* and normally take a long time to complete.
-	 * Usually we can use the ATAPI "disconnect" to bypass this, but not all
-	 * commands/drives support that. Let ide_timer_expiry keep polling us
-	 * for these.
-	 */
-	switch (scsi_req(rq)->cmd[0]) {
-	case GPCMD_BLANK:
-	case GPCMD_FORMAT_UNIT:
-	case GPCMD_RESERVE_RZONE_TRACK:
-	case GPCMD_CLOSE_TRACK:
-	case GPCMD_FLUSH_CACHE:
-		wait = ATAPI_WAIT_PC;
-		break;
-	default:
-		if (!(rq->rq_flags & RQF_QUIET))
-			printk(KERN_INFO PFX "cmd 0x%x timed out\n",
-					 scsi_req(rq)->cmd[0]);
-		wait = 0;
-		break;
-	}
-	return wait;
-}
-EXPORT_SYMBOL_GPL(ide_cd_expiry);
-
-int ide_cd_get_xferlen(struct request *rq)
-{
-	switch (req_op(rq)) {
-	default:
-		return 32768;
-	case REQ_OP_SCSI_IN:
-	case REQ_OP_SCSI_OUT:
-		return blk_rq_bytes(rq);
-	case REQ_OP_DRV_IN:
-	case REQ_OP_DRV_OUT:
-		switch (ide_req(rq)->type) {
-		case ATA_PRIV_PC:
-		case ATA_PRIV_SENSE:
-			return blk_rq_bytes(rq);
-		default:
-			return 0;
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(ide_cd_get_xferlen);
-
-void ide_read_bcount_and_ireason(ide_drive_t *drive, u16 *bcount, u8 *ireason)
-{
-	struct ide_taskfile tf;
-
-	drive->hwif->tp_ops->tf_read(drive, &tf, IDE_VALID_NSECT |
-				     IDE_VALID_LBAM | IDE_VALID_LBAH);
-
-	*bcount = (tf.lbah << 8) | tf.lbam;
-	*ireason = tf.nsect & 3;
-}
-EXPORT_SYMBOL_GPL(ide_read_bcount_and_ireason);
-
-/*
- * Check the contents of the interrupt reason register and attempt to recover if
- * there are problems.
- *
- * Returns:
- * - 0 if everything's ok
- * - 1 if the request has to be terminated.
- */
-int ide_check_ireason(ide_drive_t *drive, struct request *rq, int len,
-		      int ireason, int rw)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	debug_log("ireason: 0x%x, rw: 0x%x\n", ireason, rw);
-
-	if (ireason == (!rw << 1))
-		return 0;
-	else if (ireason == (rw << 1)) {
-		printk(KERN_ERR PFX "%s: %s: wrong transfer direction!\n",
-				drive->name, __func__);
-
-		if (dev_is_idecd(drive))
-			ide_pad_transfer(drive, rw, len);
-	} else if (!rw && ireason == ATAPI_COD) {
-		if (dev_is_idecd(drive)) {
-			/*
-			 * Some drives (ASUS) seem to tell us that status info
-			 * is available.  Just get it and ignore.
-			 */
-			(void)hwif->tp_ops->read_status(hwif);
-			return 0;
-		}
-	} else {
-		if (ireason & ATAPI_COD)
-			printk(KERN_ERR PFX "%s: CoD != 0 in %s\n", drive->name,
-					__func__);
-
-		/* drive wants a command packet, or invalid ireason... */
-		printk(KERN_ERR PFX "%s: %s: bad interrupt reason 0x%02x\n",
-				drive->name, __func__, ireason);
-	}
-
-	if (dev_is_idecd(drive) && ata_pc_request(rq))
-		rq->rq_flags |= RQF_FAILED;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(ide_check_ireason);
-
-/*
- * This is the usual interrupt handler which will be called during a packet
- * command.  We will transfer some of the data (as requested by the drive)
- * and will re-point interrupt handler to us.
- */
-static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
-{
-	struct ide_atapi_pc *pc = drive->pc;
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_cmd *cmd = &hwif->cmd;
-	struct request *rq = hwif->rq;
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	unsigned int timeout, done;
-	u16 bcount;
-	u8 stat, ireason, dsc = 0;
-	u8 write = !!(pc->flags & PC_FLAG_WRITING);
-
-	debug_log("Enter %s - interrupt handler\n", __func__);
-
-	timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
-					       : WAIT_TAPE_CMD;
-
-	/* Clear the interrupt */
-	stat = tp_ops->read_status(hwif);
-
-	if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
-		int rc;
-
-		drive->waiting_for_dma = 0;
-		rc = hwif->dma_ops->dma_end(drive);
-		ide_dma_unmap_sg(drive, cmd);
-
-		if (rc || (drive->media == ide_tape && (stat & ATA_ERR))) {
-			if (drive->media == ide_floppy)
-				printk(KERN_ERR PFX "%s: DMA %s error\n",
-					drive->name, rq_data_dir(pc->rq)
-						     ? "write" : "read");
-			pc->flags |= PC_FLAG_DMA_ERROR;
-		} else
-			scsi_req(rq)->resid_len = 0;
-		debug_log("%s: DMA finished\n", drive->name);
-	}
-
-	/* No more interrupts */
-	if ((stat & ATA_DRQ) == 0) {
-		int uptodate;
-		blk_status_t error;
-
-		debug_log("Packet command completed, %d bytes transferred\n",
-			  blk_rq_bytes(rq));
-
-		pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
-
-		local_irq_enable_in_hardirq();
-
-		if (drive->media == ide_tape &&
-		    (stat & ATA_ERR) && scsi_req(rq)->cmd[0] == REQUEST_SENSE)
-			stat &= ~ATA_ERR;
-
-		if ((stat & ATA_ERR) || (pc->flags & PC_FLAG_DMA_ERROR)) {
-			/* Error detected */
-			debug_log("%s: I/O error\n", drive->name);
-
-			if (drive->media != ide_tape)
-				scsi_req(pc->rq)->result++;
-
-			if (scsi_req(rq)->cmd[0] == REQUEST_SENSE) {
-				printk(KERN_ERR PFX "%s: I/O error in request "
-						"sense command\n", drive->name);
-				return ide_do_reset(drive);
-			}
-
-			debug_log("[cmd %x]: check condition\n", scsi_req(rq)->cmd[0]);
-
-			/* Retry operation */
-			ide_retry_pc(drive);
-
-			/* queued, but not started */
-			return ide_stopped;
-		}
-		pc->error = 0;
-
-		if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
-			dsc = 1;
-
-		/*
-		 * ->pc_callback() might change rq->data_len for
-		 * residual count, cache total length.
-		 */
-		done = blk_rq_bytes(rq);
-
-		/* Command finished - Call the callback function */
-		uptodate = drive->pc_callback(drive, dsc);
-
-		if (uptodate == 0)
-			drive->failed_pc = NULL;
-
-		if (ata_misc_request(rq)) {
-			scsi_req(rq)->result = 0;
-			error = BLK_STS_OK;
-		} else {
-
-			if (blk_rq_is_passthrough(rq) && uptodate <= 0) {
-				if (scsi_req(rq)->result == 0)
-					scsi_req(rq)->result = -EIO;
-			}
-
-			error = uptodate ? BLK_STS_OK : BLK_STS_IOERR;
-		}
-
-		ide_complete_rq(drive, error, blk_rq_bytes(rq));
-		return ide_stopped;
-	}
-
-	if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) {
-		pc->flags &= ~PC_FLAG_DMA_IN_PROGRESS;
-		printk(KERN_ERR PFX "%s: The device wants to issue more "
-				"interrupts in DMA mode\n", drive->name);
-		ide_dma_off(drive);
-		return ide_do_reset(drive);
-	}
-
-	/* Get the number of bytes to transfer on this interrupt. */
-	ide_read_bcount_and_ireason(drive, &bcount, &ireason);
-
-	if (ide_check_ireason(drive, rq, bcount, ireason, write))
-		return ide_do_reset(drive);
-
-	done = min_t(unsigned int, bcount, cmd->nleft);
-	ide_pio_bytes(drive, cmd, write, done);
-
-	/* Update transferred byte count */
-	scsi_req(rq)->resid_len -= done;
-
-	bcount -= done;
-
-	if (bcount)
-		ide_pad_transfer(drive, write, bcount);
-
-	debug_log("[cmd %x] transferred %d bytes, padded %d bytes, resid: %u\n",
-		  scsi_req(rq)->cmd[0], done, bcount, scsi_req(rq)->resid_len);
-
-	/* And set the interrupt handler again */
-	ide_set_handler(drive, ide_pc_intr, timeout);
-	return ide_started;
-}
-
-static void ide_init_packet_cmd(struct ide_cmd *cmd, u8 valid_tf,
-				u16 bcount, u8 dma)
-{
-	cmd->protocol = dma ? ATAPI_PROT_DMA : ATAPI_PROT_PIO;
-	cmd->valid.out.tf = IDE_VALID_LBAH | IDE_VALID_LBAM |
-			    IDE_VALID_FEATURE | valid_tf;
-	cmd->tf.command = ATA_CMD_PACKET;
-	cmd->tf.feature = dma;		/* Use PIO/DMA */
-	cmd->tf.lbam    = bcount & 0xff;
-	cmd->tf.lbah    = (bcount >> 8) & 0xff;
-}
-
-static u8 ide_read_ireason(ide_drive_t *drive)
-{
-	struct ide_taskfile tf;
-
-	drive->hwif->tp_ops->tf_read(drive, &tf, IDE_VALID_NSECT);
-
-	return tf.nsect & 3;
-}
-
-static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason)
-{
-	int retries = 100;
-
-	while (retries-- && ((ireason & ATAPI_COD) == 0 ||
-		(ireason & ATAPI_IO))) {
-		printk(KERN_ERR PFX "%s: (IO,CoD != (0,1) while issuing "
-				"a packet command, retrying\n", drive->name);
-		udelay(100);
-		ireason = ide_read_ireason(drive);
-		if (retries == 0) {
-			printk(KERN_ERR PFX "%s: (IO,CoD != (0,1) while issuing"
-					" a packet command, ignoring\n",
-					drive->name);
-			ireason |= ATAPI_COD;
-			ireason &= ~ATAPI_IO;
-		}
-	}
-
-	return ireason;
-}
-
-static int ide_delayed_transfer_pc(ide_drive_t *drive)
-{
-	/* Send the actual packet */
-	drive->hwif->tp_ops->output_data(drive, NULL, drive->pc->c, 12);
-
-	/* Timeout for the packet command */
-	return WAIT_FLOPPY_CMD;
-}
-
-static ide_startstop_t ide_transfer_pc(ide_drive_t *drive)
-{
-	struct ide_atapi_pc *pc;
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->rq;
-	ide_expiry_t *expiry;
-	unsigned int timeout;
-	int cmd_len;
-	ide_startstop_t startstop;
-	u8 ireason;
-
-	if (ide_wait_stat(&startstop, drive, ATA_DRQ, ATA_BUSY, WAIT_READY)) {
-		printk(KERN_ERR PFX "%s: Strange, packet command initiated yet "
-				"DRQ isn't asserted\n", drive->name);
-		return startstop;
-	}
-
-	if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) {
-		if (drive->dma)
-			drive->waiting_for_dma = 1;
-	}
-
-	if (dev_is_idecd(drive)) {
-		/* ATAPI commands get padded out to 12 bytes minimum */
-		cmd_len = COMMAND_SIZE(scsi_req(rq)->cmd[0]);
-		if (cmd_len < ATAPI_MIN_CDB_BYTES)
-			cmd_len = ATAPI_MIN_CDB_BYTES;
-
-		timeout = rq->timeout;
-		expiry  = ide_cd_expiry;
-	} else {
-		pc = drive->pc;
-
-		cmd_len = ATAPI_MIN_CDB_BYTES;
-
-		/*
-		 * If necessary schedule the packet transfer to occur 'timeout'
-		 * milliseconds later in ide_delayed_transfer_pc() after the
-		 * device says it's ready for a packet.
-		 */
-		if (drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) {
-			timeout = drive->pc_delay;
-			expiry = &ide_delayed_transfer_pc;
-		} else {
-			timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
-							       : WAIT_TAPE_CMD;
-			expiry = NULL;
-		}
-
-		ireason = ide_read_ireason(drive);
-		if (drive->media == ide_tape)
-			ireason = ide_wait_ireason(drive, ireason);
-
-		if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) {
-			printk(KERN_ERR PFX "%s: (IO,CoD) != (0,1) while "
-				"issuing a packet command\n", drive->name);
-
-			return ide_do_reset(drive);
-		}
-	}
-
-	hwif->expiry = expiry;
-
-	/* Set the interrupt routine */
-	ide_set_handler(drive,
-			(dev_is_idecd(drive) ? drive->irq_handler
-					     : ide_pc_intr),
-			timeout);
-
-	/* Send the actual packet */
-	if ((drive->atapi_flags & IDE_AFLAG_ZIP_DRIVE) == 0)
-		hwif->tp_ops->output_data(drive, NULL, scsi_req(rq)->cmd, cmd_len);
-
-	/* Begin DMA, if necessary */
-	if (dev_is_idecd(drive)) {
-		if (drive->dma)
-			hwif->dma_ops->dma_start(drive);
-	} else {
-		if (pc->flags & PC_FLAG_DMA_OK) {
-			pc->flags |= PC_FLAG_DMA_IN_PROGRESS;
-			hwif->dma_ops->dma_start(drive);
-		}
-	}
-
-	return ide_started;
-}
-
-ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	struct ide_atapi_pc *pc;
-	ide_hwif_t *hwif = drive->hwif;
-	ide_expiry_t *expiry = NULL;
-	struct request *rq = hwif->rq;
-	unsigned int timeout, bytes;
-	u16 bcount;
-	u8 valid_tf;
-	u8 drq_int = !!(drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT);
-
-	if (dev_is_idecd(drive)) {
-		valid_tf = IDE_VALID_NSECT | IDE_VALID_LBAL;
-		bcount = ide_cd_get_xferlen(rq);
-		expiry = ide_cd_expiry;
-		timeout = ATAPI_WAIT_PC;
-
-		if (drive->dma)
-			drive->dma = !ide_dma_prepare(drive, cmd);
-	} else {
-		pc = drive->pc;
-
-		valid_tf = IDE_VALID_DEVICE;
-		bytes = blk_rq_bytes(rq);
-		bcount = ((drive->media == ide_tape) ? bytes
-						     : min_t(unsigned int,
-							     bytes, 63 * 1024));
-
-		/* We haven't transferred any data yet */
-		scsi_req(rq)->resid_len = bcount;
-
-		if (pc->flags & PC_FLAG_DMA_ERROR) {
-			pc->flags &= ~PC_FLAG_DMA_ERROR;
-			ide_dma_off(drive);
-		}
-
-		if (pc->flags & PC_FLAG_DMA_OK)
-			drive->dma = !ide_dma_prepare(drive, cmd);
-
-		if (!drive->dma)
-			pc->flags &= ~PC_FLAG_DMA_OK;
-
-		timeout = (drive->media == ide_floppy) ? WAIT_FLOPPY_CMD
-						       : WAIT_TAPE_CMD;
-	}
-
-	ide_init_packet_cmd(cmd, valid_tf, bcount, drive->dma);
-
-	(void)do_rw_taskfile(drive, cmd);
-
-	if (drq_int) {
-		if (drive->dma)
-			drive->waiting_for_dma = 0;
-		hwif->expiry = expiry;
-	}
-
-	ide_execute_command(drive, cmd, ide_transfer_pc, timeout);
-
-	return drq_int ? ide_started : ide_transfer_pc(drive);
-}
-EXPORT_SYMBOL_GPL(ide_issue_pc);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
deleted file mode 100644
index cffbcc27a34cc..0000000000000
--- a/drivers/ide/ide-cd.c
+++ /dev/null
@@ -1,1858 +0,0 @@
-/*
- * ATAPI CD-ROM driver.
- *
- * Copyright (C) 1994-1996   Scott Snyder <snyder@fnald0.fnal.gov>
- * Copyright (C) 1996-1998   Erik Andersen <andersee@debian.org>
- * Copyright (C) 1998-2000   Jens Axboe <axboe@suse.de>
- * Copyright (C) 2005, 2007-2009  Bartlomiej Zolnierkiewicz
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * See Documentation/cdrom/ide-cd.rst for usage information.
- *
- * Suggestions are welcome. Patches that work are more welcome though. ;-)
- *
- * Documentation:
- *	Mt. Fuji (SFF8090 version 4) and ATAPI (SFF-8020i rev 2.6) standards.
- *
- * For historical changelog please see:
- *	Documentation/ide/ChangeLog.ide-cd.1994-2004
- */
-
-#define DRV_NAME "ide-cd"
-#define PFX DRV_NAME ": "
-
-#define IDECD_VERSION "5.00"
-
-#include <linux/compat.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/sched/task_stack.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-#include <linux/cdrom.h>
-#include <linux/ide.h>
-#include <linux/completion.h>
-#include <linux/mutex.h>
-#include <linux/bcd.h>
-
-/* For SCSI -> ATAPI command conversion */
-#include <scsi/scsi.h>
-
-#include <linux/io.h>
-#include <asm/byteorder.h>
-#include <linux/uaccess.h>
-#include <asm/unaligned.h>
-
-#include "ide-cd.h"
-
-static DEFINE_MUTEX(ide_cd_mutex);
-static DEFINE_MUTEX(idecd_ref_mutex);
-
-static void ide_cd_release(struct device *);
-
-static struct cdrom_info *ide_cd_get(struct gendisk *disk)
-{
-	struct cdrom_info *cd = NULL;
-
-	mutex_lock(&idecd_ref_mutex);
-	cd = ide_drv_g(disk, cdrom_info);
-	if (cd) {
-		if (ide_device_get(cd->drive))
-			cd = NULL;
-		else
-			get_device(&cd->dev);
-
-	}
-	mutex_unlock(&idecd_ref_mutex);
-	return cd;
-}
-
-static void ide_cd_put(struct cdrom_info *cd)
-{
-	ide_drive_t *drive = cd->drive;
-
-	mutex_lock(&idecd_ref_mutex);
-	put_device(&cd->dev);
-	ide_device_put(drive);
-	mutex_unlock(&idecd_ref_mutex);
-}
-
-/*
- * Generic packet command support and error handling routines.
- */
-
-/* Mark that we've seen a media change and invalidate our internal buffers. */
-static void cdrom_saw_media_change(ide_drive_t *drive)
-{
-	drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
-	drive->atapi_flags &= ~IDE_AFLAG_TOC_VALID;
-}
-
-static int cdrom_log_sense(ide_drive_t *drive, struct request *rq)
-{
-	struct request_sense *sense = &drive->sense_data;
-	int log = 0;
-
-	if (!sense || !rq || (rq->rq_flags & RQF_QUIET))
-		return 0;
-
-	ide_debug_log(IDE_DBG_SENSE, "sense_key: 0x%x", sense->sense_key);
-
-	switch (sense->sense_key) {
-	case NO_SENSE:
-	case RECOVERED_ERROR:
-		break;
-	case NOT_READY:
-		/*
-		 * don't care about tray state messages for e.g. capacity
-		 * commands or in-progress or becoming ready
-		 */
-		if (sense->asc == 0x3a || sense->asc == 0x04)
-			break;
-		log = 1;
-		break;
-	case ILLEGAL_REQUEST:
-		/*
-		 * don't log START_STOP unit with LoEj set, since we cannot
-		 * reliably check if drive can auto-close
-		 */
-		if (scsi_req(rq)->cmd[0] == GPCMD_START_STOP_UNIT && sense->asc == 0x24)
-			break;
-		log = 1;
-		break;
-	case UNIT_ATTENTION:
-		/*
-		 * Make good and sure we've seen this potential media change.
-		 * Some drives (i.e. Creative) fail to present the correct sense
-		 * key in the error register.
-		 */
-		cdrom_saw_media_change(drive);
-		break;
-	default:
-		log = 1;
-		break;
-	}
-	return log;
-}
-
-static void cdrom_analyze_sense_data(ide_drive_t *drive,
-				     struct request *failed_command)
-{
-	struct request_sense *sense = &drive->sense_data;
-	struct cdrom_info *info = drive->driver_data;
-	unsigned long sector;
-	unsigned long bio_sectors;
-
-	ide_debug_log(IDE_DBG_SENSE, "error_code: 0x%x, sense_key: 0x%x",
-				     sense->error_code, sense->sense_key);
-
-	if (failed_command)
-		ide_debug_log(IDE_DBG_SENSE, "failed cmd: 0x%x",
-					     failed_command->cmd[0]);
-
-	if (!cdrom_log_sense(drive, failed_command))
-		return;
-
-	/*
-	 * If a read toc is executed for a CD-R or CD-RW medium where the first
-	 * toc has not been recorded yet, it will fail with 05/24/00 (which is a
-	 * confusing error)
-	 */
-	if (failed_command && scsi_req(failed_command)->cmd[0] == GPCMD_READ_TOC_PMA_ATIP)
-		if (sense->sense_key == 0x05 && sense->asc == 0x24)
-			return;
-
-	/* current error */
-	if (sense->error_code == 0x70) {
-		switch (sense->sense_key) {
-		case MEDIUM_ERROR:
-		case VOLUME_OVERFLOW:
-		case ILLEGAL_REQUEST:
-			if (!sense->valid)
-				break;
-			if (failed_command == NULL ||
-			    blk_rq_is_passthrough(failed_command))
-				break;
-			sector = (sense->information[0] << 24) |
-				 (sense->information[1] << 16) |
-				 (sense->information[2] <<  8) |
-				 (sense->information[3]);
-
-			if (queue_logical_block_size(drive->queue) == 2048)
-				/* device sector size is 2K */
-				sector <<= 2;
-
-			bio_sectors = max(bio_sectors(failed_command->bio), 4U);
-			sector &= ~(bio_sectors - 1);
-
-			/*
-			 * The SCSI specification allows for the value
-			 * returned by READ CAPACITY to be up to 75 2K
-			 * sectors past the last readable block.
-			 * Therefore, if we hit a medium error within the
-			 * last 75 2K sectors, we decrease the saved size
-			 * value.
-			 */
-			if (sector < get_capacity(info->disk) &&
-			    drive->probed_capacity - sector < 4 * 75)
-				set_capacity(info->disk, sector);
-		}
-	}
-
-	ide_cd_log_error(drive->name, failed_command, sense);
-}
-
-static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
-{
-	/*
-	 * For ATA_PRIV_SENSE, "ide_req(rq)->special" points to the original
-	 * failed request.  Also, the sense data should be read
-	 * directly from rq which might be different from the original
-	 * sense buffer if it got copied during mapping.
-	 */
-	struct request *failed = ide_req(rq)->special;
-	void *sense = bio_data(rq->bio);
-
-	if (failed) {
-		/*
-		 * Sense is always read into drive->sense_data, copy back to the
-		 * original request.
-		 */
-		memcpy(scsi_req(failed)->sense, sense, 18);
-		scsi_req(failed)->sense_len = scsi_req(rq)->sense_len;
-		cdrom_analyze_sense_data(drive, failed);
-
-		if (ide_end_rq(drive, failed, BLK_STS_IOERR, blk_rq_bytes(failed)))
-			BUG();
-	} else
-		cdrom_analyze_sense_data(drive, NULL);
-}
-
-
-/*
- * Allow the drive 5 seconds to recover; some devices will return NOT_READY
- * while flushing data from cache.
- *
- * returns: 0 failed (write timeout expired)
- *	    1 success
- */
-static int ide_cd_breathe(ide_drive_t *drive, struct request *rq)
-{
-
-	struct cdrom_info *info = drive->driver_data;
-
-	if (!scsi_req(rq)->result)
-		info->write_timeout = jiffies +	ATAPI_WAIT_WRITE_BUSY;
-
-	scsi_req(rq)->result = 1;
-
-	if (time_after(jiffies, info->write_timeout))
-		return 0;
-	else {
-		/*
-		 * take a breather
-		 */
-		blk_mq_requeue_request(rq, false);
-		blk_mq_delay_kick_requeue_list(drive->queue, 1);
-		return 1;
-	}
-}
-
-static void ide_cd_free_sense(ide_drive_t *drive)
-{
-	if (!drive->sense_rq)
-		return;
-
-	blk_mq_free_request(drive->sense_rq);
-	drive->sense_rq = NULL;
-	drive->sense_rq_armed = false;
-}
-
-/**
- * Returns:
- * 0: if the request should be continued.
- * 1: if the request will be going through error recovery.
- * 2: if the request should be ended.
- */
-static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->rq;
-	int err, sense_key, do_end_request = 0;
-
-	/* get the IDE error register */
-	err = ide_read_error(drive);
-	sense_key = err >> 4;
-
-	ide_debug_log(IDE_DBG_RQ, "cmd: 0x%x, rq->cmd_type: 0x%x, err: 0x%x, "
-				  "stat 0x%x",
-				  rq->cmd[0], rq->cmd_type, err, stat);
-
-	if (ata_sense_request(rq)) {
-		/*
-		 * We got an error trying to get sense info from the drive
-		 * (probably while trying to recover from a former error).
-		 * Just give up.
-		 */
-		rq->rq_flags |= RQF_FAILED;
-		return 2;
-	}
-
-	/* if we have an error, pass CHECK_CONDITION as the SCSI status byte */
-	if (blk_rq_is_scsi(rq) && !scsi_req(rq)->result)
-		scsi_req(rq)->result = SAM_STAT_CHECK_CONDITION;
-
-	if (blk_noretry_request(rq))
-		do_end_request = 1;
-
-	switch (sense_key) {
-	case NOT_READY:
-		if (req_op(rq) == REQ_OP_WRITE) {
-			if (ide_cd_breathe(drive, rq))
-				return 1;
-		} else {
-			cdrom_saw_media_change(drive);
-
-			if (!blk_rq_is_passthrough(rq) &&
-			    !(rq->rq_flags & RQF_QUIET))
-				printk(KERN_ERR PFX "%s: tray open\n",
-					drive->name);
-		}
-		do_end_request = 1;
-		break;
-	case UNIT_ATTENTION:
-		cdrom_saw_media_change(drive);
-
-		if (blk_rq_is_passthrough(rq))
-			return 0;
-
-		/*
-		 * Arrange to retry the request but be sure to give up if we've
-		 * retried too many times.
-		 */
-		if (++scsi_req(rq)->result > ERROR_MAX)
-			do_end_request = 1;
-		break;
-	case ILLEGAL_REQUEST:
-		/*
-		 * Don't print error message for this condition -- SFF8090i
-		 * indicates that 5/24/00 is the correct response to a request
-		 * to close the tray if the drive doesn't have that capability.
-		 *
-		 * cdrom_log_sense() knows this!
-		 */
-		if (scsi_req(rq)->cmd[0] == GPCMD_START_STOP_UNIT)
-			break;
-		fallthrough;
-	case DATA_PROTECT:
-		/*
-		 * No point in retrying after an illegal request or data
-		 * protect error.
-		 */
-		if (!(rq->rq_flags & RQF_QUIET))
-			ide_dump_status(drive, "command error", stat);
-		do_end_request = 1;
-		break;
-	case MEDIUM_ERROR:
-		/*
-		 * No point in re-trying a zillion times on a bad sector.
-		 * If we got here the error is not correctable.
-		 */
-		if (!(rq->rq_flags & RQF_QUIET))
-			ide_dump_status(drive, "media error "
-					"(bad sector)", stat);
-		do_end_request = 1;
-		break;
-	case BLANK_CHECK:
-		/* disk appears blank? */
-		if (!(rq->rq_flags & RQF_QUIET))
-			ide_dump_status(drive, "media error (blank)",
-					stat);
-		do_end_request = 1;
-		break;
-	default:
-		if (blk_rq_is_passthrough(rq))
-			break;
-		if (err & ~ATA_ABORTED) {
-			/* go to the default handler for other errors */
-			ide_error(drive, "cdrom_decode_status", stat);
-			return 1;
-		} else if (++scsi_req(rq)->result > ERROR_MAX)
-			/* we've racked up too many retries, abort */
-			do_end_request = 1;
-	}
-
-	if (blk_rq_is_passthrough(rq)) {
-		rq->rq_flags |= RQF_FAILED;
-		do_end_request = 1;
-	}
-
-	/*
-	 * End a request through request sense analysis when we have sense data.
-	 * We need this in order to perform end of media processing.
-	 */
-	if (do_end_request)
-		goto end_request;
-
-	/* if we got a CHECK_CONDITION status, queue a request sense command */
-	if (stat & ATA_ERR)
-		return ide_queue_sense_rq(drive, NULL) ? 2 : 1;
-	return 1;
-
-end_request:
-	if (stat & ATA_ERR) {
-		hwif->rq = NULL;
-		return ide_queue_sense_rq(drive, rq) ? 2 : 1;
-	} else
-		return 2;
-}
-
-static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	struct request *rq = cmd->rq;
-
-	ide_debug_log(IDE_DBG_FUNC, "rq->cmd[0]: 0x%x", rq->cmd[0]);
-
-	/*
-	 * Some of the trailing request sense fields are optional,
-	 * and some drives don't send them.  Sigh.
-	 */
-	if (scsi_req(rq)->cmd[0] == GPCMD_REQUEST_SENSE &&
-	    cmd->nleft > 0 && cmd->nleft <= 5)
-		cmd->nleft = 0;
-}
-
-int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
-		    int write, void *buffer, unsigned *bufflen,
-		    struct scsi_sense_hdr *sshdr, int timeout,
-		    req_flags_t rq_flags)
-{
-	struct cdrom_info *info = drive->driver_data;
-	struct scsi_sense_hdr local_sshdr;
-	int retries = 10;
-	bool failed;
-
-	ide_debug_log(IDE_DBG_PC, "cmd[0]: 0x%x, write: 0x%x, timeout: %d, "
-				  "rq_flags: 0x%x",
-				  cmd[0], write, timeout, rq_flags);
-
-	if (!sshdr)
-		sshdr = &local_sshdr;
-
-	/* start of retry loop */
-	do {
-		struct request *rq;
-		int error;
-		bool delay = false;
-
-		rq = blk_get_request(drive->queue,
-			write ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
-		memcpy(scsi_req(rq)->cmd, cmd, BLK_MAX_CDB);
-		ide_req(rq)->type = ATA_PRIV_PC;
-		rq->rq_flags |= rq_flags;
-		rq->timeout = timeout;
-		if (buffer) {
-			error = blk_rq_map_kern(drive->queue, rq, buffer,
-						*bufflen, GFP_NOIO);
-			if (error) {
-				blk_put_request(rq);
-				return error;
-			}
-		}
-
-		blk_execute_rq(info->disk, rq, 0);
-		error = scsi_req(rq)->result ? -EIO : 0;
-
-		if (buffer)
-			*bufflen = scsi_req(rq)->resid_len;
-		scsi_normalize_sense(scsi_req(rq)->sense,
-				     scsi_req(rq)->sense_len, sshdr);
-
-		/*
-		 * FIXME: we should probably abort/retry or something in case of
-		 * failure.
-		 */
-		failed = (rq->rq_flags & RQF_FAILED) != 0;
-		if (failed) {
-			/*
-			 * The request failed.  Retry if it was due to a unit
-			 * attention status (usually means media was changed).
-			 */
-			if (sshdr->sense_key == UNIT_ATTENTION)
-				cdrom_saw_media_change(drive);
-			else if (sshdr->sense_key == NOT_READY &&
-				 sshdr->asc == 4 && sshdr->ascq != 4) {
-				/*
-				 * The drive is in the process of loading
-				 * a disk.  Retry, but wait a little to give
-				 * the drive time to complete the load.
-				 */
-				delay = true;
-			} else {
-				/* otherwise, don't retry */
-				retries = 0;
-			}
-			--retries;
-		}
-		blk_put_request(rq);
-		if (delay)
-			ssleep(2);
-	} while (failed && retries >= 0);
-
-	/* return an error if the command failed */
-	return failed ? -EIO : 0;
-}
-
-/*
- * returns true if rq has been completed
- */
-static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	unsigned int nr_bytes = cmd->nbytes - cmd->nleft;
-
-	if (cmd->tf_flags & IDE_TFLAG_WRITE)
-		nr_bytes -= cmd->last_xfer_len;
-
-	if (nr_bytes > 0) {
-		ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
-		return true;
-	}
-
-	return false;
-}
-
-/* standard prep_rq that builds 10 byte cmds */
-static bool ide_cdrom_prep_fs(struct request_queue *q, struct request *rq)
-{
-	int hard_sect = queue_logical_block_size(q);
-	long block = (long)blk_rq_pos(rq) / (hard_sect >> 9);
-	unsigned long blocks = blk_rq_sectors(rq) / (hard_sect >> 9);
-	struct scsi_request *req = scsi_req(rq);
-
-	if (rq_data_dir(rq) == READ)
-		req->cmd[0] = GPCMD_READ_10;
-	else
-		req->cmd[0] = GPCMD_WRITE_10;
-
-	/*
-	 * fill in lba
-	 */
-	req->cmd[2] = (block >> 24) & 0xff;
-	req->cmd[3] = (block >> 16) & 0xff;
-	req->cmd[4] = (block >>  8) & 0xff;
-	req->cmd[5] = block & 0xff;
-
-	/*
-	 * and transfer length
-	 */
-	req->cmd[7] = (blocks >> 8) & 0xff;
-	req->cmd[8] = blocks & 0xff;
-	req->cmd_len = 10;
-	return true;
-}
-
-/*
- * Most of the SCSI commands are supported directly by ATAPI devices.
- * This transform handles the few exceptions.
- */
-static bool ide_cdrom_prep_pc(struct request *rq)
-{
-	u8 *c = scsi_req(rq)->cmd;
-
-	/* transform 6-byte read/write commands to the 10-byte version */
-	if (c[0] == READ_6 || c[0] == WRITE_6) {
-		c[8] = c[4];
-		c[5] = c[3];
-		c[4] = c[2];
-		c[3] = c[1] & 0x1f;
-		c[2] = 0;
-		c[1] &= 0xe0;
-		c[0] += (READ_10 - READ_6);
-		scsi_req(rq)->cmd_len = 10;
-		return true;
-	}
-
-	/*
-	 * it's silly to pretend we understand 6-byte sense commands, just
-	 * reject with ILLEGAL_REQUEST and the caller should take the
-	 * appropriate action
-	 */
-	if (c[0] == MODE_SENSE || c[0] == MODE_SELECT) {
-		scsi_req(rq)->result = ILLEGAL_REQUEST;
-		return false;
-	}
-
-	return true;
-}
-
-static bool ide_cdrom_prep_rq(ide_drive_t *drive, struct request *rq)
-{
-	if (!blk_rq_is_passthrough(rq)) {
-		scsi_req_init(scsi_req(rq));
-
-		return ide_cdrom_prep_fs(drive->queue, rq);
-	} else if (blk_rq_is_scsi(rq))
-		return ide_cdrom_prep_pc(rq);
-
-	return true;
-}
-
-static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_cmd *cmd = &hwif->cmd;
-	struct request *rq = hwif->rq;
-	ide_expiry_t *expiry = NULL;
-	int dma_error = 0, dma, thislen, uptodate = 0;
-	int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0;
-	int sense = ata_sense_request(rq);
-	unsigned int timeout;
-	u16 len;
-	u8 ireason, stat;
-
-	ide_debug_log(IDE_DBG_PC, "cmd: 0x%x, write: 0x%x", rq->cmd[0], write);
-
-	/* check for errors */
-	dma = drive->dma;
-	if (dma) {
-		drive->dma = 0;
-		drive->waiting_for_dma = 0;
-		dma_error = hwif->dma_ops->dma_end(drive);
-		ide_dma_unmap_sg(drive, cmd);
-		if (dma_error) {
-			printk(KERN_ERR PFX "%s: DMA %s error\n", drive->name,
-					write ? "write" : "read");
-			ide_dma_off(drive);
-		}
-	}
-
-	/* check status */
-	stat = hwif->tp_ops->read_status(hwif);
-
-	if (!OK_STAT(stat, 0, BAD_R_STAT)) {
-		rc = cdrom_decode_status(drive, stat);
-		if (rc) {
-			if (rc == 2)
-				goto out_end;
-			return ide_stopped;
-		}
-	}
-
-	/* using dma, transfer is complete now */
-	if (dma) {
-		if (dma_error)
-			return ide_error(drive, "dma error", stat);
-		uptodate = 1;
-		goto out_end;
-	}
-
-	ide_read_bcount_and_ireason(drive, &len, &ireason);
-
-	thislen = !blk_rq_is_passthrough(rq) ? len : cmd->nleft;
-	if (thislen > len)
-		thislen = len;
-
-	ide_debug_log(IDE_DBG_PC, "DRQ: stat: 0x%x, thislen: %d",
-				  stat, thislen);
-
-	/* If DRQ is clear, the command has completed. */
-	if ((stat & ATA_DRQ) == 0) {
-		switch (req_op(rq)) {
-		default:
-			/*
-			 * If we're not done reading/writing, complain.
-			 * Otherwise, complete the command normally.
-			 */
-			uptodate = 1;
-			if (cmd->nleft > 0) {
-				printk(KERN_ERR PFX "%s: %s: data underrun "
-					"(%u bytes)\n", drive->name, __func__,
-					cmd->nleft);
-				if (!write)
-					rq->rq_flags |= RQF_FAILED;
-				uptodate = 0;
-			}
-			goto out_end;
-		case REQ_OP_DRV_IN:
-		case REQ_OP_DRV_OUT:
-			ide_cd_request_sense_fixup(drive, cmd);
-
-			uptodate = cmd->nleft ? 0 : 1;
-
-			/*
-			 * suck out the remaining bytes from the drive in an
-			 * attempt to complete the data xfer. (see BZ#13399)
-			 */
-			if (!(stat & ATA_ERR) && !uptodate && thislen) {
-				ide_pio_bytes(drive, cmd, write, thislen);
-				uptodate = cmd->nleft ? 0 : 1;
-			}
-
-			if (!uptodate)
-				rq->rq_flags |= RQF_FAILED;
-			goto out_end;
-		case REQ_OP_SCSI_IN:
-		case REQ_OP_SCSI_OUT:
-			goto out_end;
-		}
-	}
-
-	rc = ide_check_ireason(drive, rq, len, ireason, write);
-	if (rc)
-		goto out_end;
-
-	cmd->last_xfer_len = 0;
-
-	ide_debug_log(IDE_DBG_PC, "data transfer, rq->cmd_type: 0x%x, "
-				  "ireason: 0x%x",
-				  rq->cmd_type, ireason);
-
-	/* transfer data */
-	while (thislen > 0) {
-		int blen = min_t(int, thislen, cmd->nleft);
-
-		if (cmd->nleft == 0)
-			break;
-
-		ide_pio_bytes(drive, cmd, write, blen);
-		cmd->last_xfer_len += blen;
-
-		thislen -= blen;
-		len -= blen;
-
-		if (sense && write == 0)
-			scsi_req(rq)->sense_len += blen;
-	}
-
-	/* pad, if necessary */
-	if (len > 0) {
-		if (blk_rq_is_passthrough(rq) || write == 0)
-			ide_pad_transfer(drive, write, len);
-		else {
-			printk(KERN_ERR PFX "%s: confused, missing data\n",
-				drive->name);
-			blk_dump_rq_flags(rq, "cdrom_newpc_intr");
-		}
-	}
-
-	switch (req_op(rq)) {
-	case REQ_OP_SCSI_IN:
-	case REQ_OP_SCSI_OUT:
-		timeout = rq->timeout;
-		break;
-	case REQ_OP_DRV_IN:
-	case REQ_OP_DRV_OUT:
-		expiry = ide_cd_expiry;
-		fallthrough;
-	default:
-		timeout = ATAPI_WAIT_PC;
-		break;
-	}
-
-	hwif->expiry = expiry;
-	ide_set_handler(drive, cdrom_newpc_intr, timeout);
-	return ide_started;
-
-out_end:
-	if (blk_rq_is_scsi(rq) && rc == 0) {
-		scsi_req(rq)->resid_len = 0;
-		blk_mq_end_request(rq, BLK_STS_OK);
-		hwif->rq = NULL;
-	} else {
-		if (sense && uptodate)
-			ide_cd_complete_failed_rq(drive, rq);
-
-		if (!blk_rq_is_passthrough(rq)) {
-			if (cmd->nleft == 0)
-				uptodate = 1;
-		} else {
-			if (uptodate <= 0 && scsi_req(rq)->result == 0)
-				scsi_req(rq)->result = -EIO;
-		}
-
-		if (uptodate == 0 && rq->bio)
-			if (ide_cd_error_cmd(drive, cmd))
-				return ide_stopped;
-
-		/* make sure it's fully ended */
-		if (blk_rq_is_passthrough(rq)) {
-			scsi_req(rq)->resid_len -= cmd->nbytes - cmd->nleft;
-			if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
-				scsi_req(rq)->resid_len += cmd->last_xfer_len;
-		}
-
-		ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, blk_rq_bytes(rq));
-
-		if (sense && rc == 2)
-			ide_error(drive, "request sense failure", stat);
-	}
-
-	ide_cd_free_sense(drive);
-	return ide_stopped;
-}
-
-static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
-{
-	struct cdrom_info *cd = drive->driver_data;
-	struct request_queue *q = drive->queue;
-	int write = rq_data_dir(rq) == WRITE;
-	unsigned short sectors_per_frame =
-		queue_logical_block_size(q) >> SECTOR_SHIFT;
-
-	ide_debug_log(IDE_DBG_RQ, "rq->cmd[0]: 0x%x, rq->cmd_flags: 0x%x, "
-				  "secs_per_frame: %u",
-				  rq->cmd[0], rq->cmd_flags, sectors_per_frame);
-
-	if (write) {
-		/* disk has become write protected */
-		if (get_disk_ro(cd->disk))
-			return ide_stopped;
-	} else {
-		/*
-		 * We may be retrying this request after an error.  Fix up any
-		 * weirdness which might be present in the request packet.
-		 */
-		ide_cdrom_prep_rq(drive, rq);
-	}
-
-	/* fs requests *must* be hardware frame aligned */
-	if ((blk_rq_sectors(rq) & (sectors_per_frame - 1)) ||
-	    (blk_rq_pos(rq) & (sectors_per_frame - 1)))
-		return ide_stopped;
-
-	/* use DMA, if possible */
-	drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
-
-	if (write)
-		cd->devinfo.media_written = 1;
-
-	rq->timeout = ATAPI_WAIT_PC;
-
-	return ide_started;
-}
-
-static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
-{
-
-	ide_debug_log(IDE_DBG_PC, "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x",
-				  rq->cmd[0], rq->cmd_type);
-
-	if (blk_rq_is_scsi(rq))
-		rq->rq_flags |= RQF_QUIET;
-	else
-		rq->rq_flags &= ~RQF_FAILED;
-
-	drive->dma = 0;
-
-	/* sg request */
-	if (rq->bio) {
-		struct request_queue *q = drive->queue;
-		char *buf = bio_data(rq->bio);
-		unsigned int alignment;
-
-		drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
-
-		/*
-		 * check if dma is safe
-		 *
-		 * NOTE! The "len" and "addr" checks should possibly have
-		 * separate masks.
-		 */
-		alignment = queue_dma_alignment(q) | q->dma_pad_mask;
-		if ((unsigned long)buf & alignment
-		    || blk_rq_bytes(rq) & q->dma_pad_mask
-		    || object_is_on_stack(buf))
-			drive->dma = 0;
-	}
-}
-
-static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
-					sector_t block)
-{
-	struct ide_cmd cmd;
-	int uptodate = 0;
-	unsigned int nsectors;
-
-	ide_debug_log(IDE_DBG_RQ, "cmd: 0x%x, block: %llu",
-				  rq->cmd[0], (unsigned long long)block);
-
-	if (drive->debug_mask & IDE_DBG_RQ)
-		blk_dump_rq_flags(rq, "ide_cd_do_request");
-
-	switch (req_op(rq)) {
-	default:
-		if (cdrom_start_rw(drive, rq) == ide_stopped)
-			goto out_end;
-		break;
-	case REQ_OP_SCSI_IN:
-	case REQ_OP_SCSI_OUT:
-	handle_pc:
-		if (!rq->timeout)
-			rq->timeout = ATAPI_WAIT_PC;
-		cdrom_do_block_pc(drive, rq);
-		break;
-	case REQ_OP_DRV_IN:
-	case REQ_OP_DRV_OUT:
-		switch (ide_req(rq)->type) {
-		case ATA_PRIV_MISC:
-			/* right now this can only be a reset... */
-			uptodate = 1;
-			goto out_end;
-		case ATA_PRIV_SENSE:
-		case ATA_PRIV_PC:
-			goto handle_pc;
-		default:
-			BUG();
-		}
-	}
-
-	/* prepare sense request for this command */
-	ide_prep_sense(drive, rq);
-
-	memset(&cmd, 0, sizeof(cmd));
-
-	if (rq_data_dir(rq))
-		cmd.tf_flags |= IDE_TFLAG_WRITE;
-
-	cmd.rq = rq;
-
-	if (!blk_rq_is_passthrough(rq) || blk_rq_bytes(rq)) {
-		ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
-		ide_map_sg(drive, &cmd);
-	}
-
-	return ide_issue_pc(drive, &cmd);
-out_end:
-	nsectors = blk_rq_sectors(rq);
-
-	if (nsectors == 0)
-		nsectors = 1;
-
-	ide_complete_rq(drive, uptodate ? BLK_STS_OK : BLK_STS_IOERR, nsectors << 9);
-
-	return ide_stopped;
-}
-
-/*
- * Ioctl handling.
- *
- * Routines which queue packet commands take as a final argument a pointer to a
- * request_sense struct. If execution of the command results in an error with a
- * CHECK CONDITION status, this structure will be filled with the results of the
- * subsequent request sense command. The pointer can also be NULL, in which case
- * no sense information is returned.
- */
-static void msf_from_bcd(struct atapi_msf *msf)
-{
-	msf->minute = bcd2bin(msf->minute);
-	msf->second = bcd2bin(msf->second);
-	msf->frame  = bcd2bin(msf->frame);
-}
-
-int cdrom_check_status(ide_drive_t *drive, struct scsi_sense_hdr *sshdr)
-{
-	struct cdrom_info *info = drive->driver_data;
-	struct cdrom_device_info *cdi;
-	unsigned char cmd[BLK_MAX_CDB];
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	if (!info)
-		return -EIO;
-
-	cdi = &info->devinfo;
-
-	memset(cmd, 0, BLK_MAX_CDB);
-	cmd[0] = GPCMD_TEST_UNIT_READY;
-
-	/*
-	 * Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to switch CDs
-	 * instead of supporting the LOAD_UNLOAD opcode.
-	 */
-	cmd[7] = cdi->sanyo_slot % 3;
-
-	return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, sshdr, 0, RQF_QUIET);
-}
-
-static int cdrom_read_capacity(ide_drive_t *drive, unsigned long *capacity,
-			       unsigned long *sectors_per_frame)
-{
-	struct {
-		__be32 lba;
-		__be32 blocklen;
-	} capbuf;
-
-	int stat;
-	unsigned char cmd[BLK_MAX_CDB];
-	unsigned len = sizeof(capbuf);
-	u32 blocklen;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	memset(cmd, 0, BLK_MAX_CDB);
-	cmd[0] = GPCMD_READ_CDVD_CAPACITY;
-
-	stat = ide_cd_queue_pc(drive, cmd, 0, &capbuf, &len, NULL, 0,
-			       RQF_QUIET);
-	if (stat)
-		return stat;
-
-	/*
-	 * Sanity check the given block size, in so far as making
-	 * sure the sectors_per_frame we give to the caller won't
-	 * end up being bogus.
-	 */
-	blocklen = be32_to_cpu(capbuf.blocklen);
-	blocklen = (blocklen >> SECTOR_SHIFT) << SECTOR_SHIFT;
-	switch (blocklen) {
-	case 512:
-	case 1024:
-	case 2048:
-	case 4096:
-		break;
-	default:
-		printk_once(KERN_ERR PFX "%s: weird block size %u; "
-				"setting default block size to 2048\n",
-				drive->name, blocklen);
-		blocklen = 2048;
-		break;
-	}
-
-	*capacity = 1 + be32_to_cpu(capbuf.lba);
-	*sectors_per_frame = blocklen >> SECTOR_SHIFT;
-
-	ide_debug_log(IDE_DBG_PROBE, "cap: %lu, sectors_per_frame: %lu",
-				     *capacity, *sectors_per_frame);
-
-	return 0;
-}
-
-static int ide_cdrom_read_tocentry(ide_drive_t *drive, int trackno,
-		int msf_flag, int format, char *buf, int buflen)
-{
-	unsigned char cmd[BLK_MAX_CDB];
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	memset(cmd, 0, BLK_MAX_CDB);
-
-	cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
-	cmd[6] = trackno;
-	cmd[7] = (buflen >> 8);
-	cmd[8] = (buflen & 0xff);
-	cmd[9] = (format << 6);
-
-	if (msf_flag)
-		cmd[1] = 2;
-
-	return ide_cd_queue_pc(drive, cmd, 0, buf, &buflen, NULL, 0, RQF_QUIET);
-}
-
-/* Try to read the entire TOC for the disk into our internal buffer. */
-int ide_cd_read_toc(ide_drive_t *drive)
-{
-	int stat, ntracks, i;
-	struct cdrom_info *info = drive->driver_data;
-	struct cdrom_device_info *cdi = &info->devinfo;
-	struct atapi_toc *toc = info->toc;
-	struct {
-		struct atapi_toc_header hdr;
-		struct atapi_toc_entry  ent;
-	} ms_tmp;
-	long last_written;
-	unsigned long sectors_per_frame = SECTORS_PER_FRAME;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	if (toc == NULL) {
-		/* try to allocate space */
-		toc = kmalloc(sizeof(struct atapi_toc), GFP_KERNEL);
-		if (toc == NULL) {
-			printk(KERN_ERR PFX "%s: No cdrom TOC buffer!\n",
-					drive->name);
-			return -ENOMEM;
-		}
-		info->toc = toc;
-	}
-
-	/*
-	 * Check to see if the existing data is still valid. If it is,
-	 * just return.
-	 */
-	(void) cdrom_check_status(drive, NULL);
-
-	if (drive->atapi_flags & IDE_AFLAG_TOC_VALID)
-		return 0;
-
-	/* try to get the total cdrom capacity and sector size */
-	stat = cdrom_read_capacity(drive, &toc->capacity, &sectors_per_frame);
-	if (stat)
-		toc->capacity = 0x1fffff;
-
-	set_capacity(info->disk, toc->capacity * sectors_per_frame);
-	/* save a private copy of the TOC capacity for error handling */
-	drive->probed_capacity = toc->capacity * sectors_per_frame;
-
-	blk_queue_logical_block_size(drive->queue,
-				     sectors_per_frame << SECTOR_SHIFT);
-
-	/* first read just the header, so we know how long the TOC is */
-	stat = ide_cdrom_read_tocentry(drive, 0, 1, 0, (char *) &toc->hdr,
-				    sizeof(struct atapi_toc_header));
-	if (stat)
-		return stat;
-
-	if (drive->atapi_flags & IDE_AFLAG_TOCTRACKS_AS_BCD) {
-		toc->hdr.first_track = bcd2bin(toc->hdr.first_track);
-		toc->hdr.last_track  = bcd2bin(toc->hdr.last_track);
-	}
-
-	ntracks = toc->hdr.last_track - toc->hdr.first_track + 1;
-	if (ntracks <= 0)
-		return -EIO;
-	if (ntracks > MAX_TRACKS)
-		ntracks = MAX_TRACKS;
-
-	/* now read the whole schmeer */
-	stat = ide_cdrom_read_tocentry(drive, toc->hdr.first_track, 1, 0,
-				  (char *)&toc->hdr,
-				   sizeof(struct atapi_toc_header) +
-				   (ntracks + 1) *
-				   sizeof(struct atapi_toc_entry));
-
-	if (stat && toc->hdr.first_track > 1) {
-		/*
-		 * Cds with CDI tracks only don't have any TOC entries, despite
-		 * of this the returned values are
-		 * first_track == last_track = number of CDI tracks + 1,
-		 * so that this case is indistinguishable from the same layout
-		 * plus an additional audio track. If we get an error for the
-		 * regular case, we assume a CDI without additional audio
-		 * tracks. In this case the readable TOC is empty (CDI tracks
-		 * are not included) and only holds the Leadout entry.
-		 *
-		 * Heiko Eißfeldt.
-		 */
-		ntracks = 0;
-		stat = ide_cdrom_read_tocentry(drive, CDROM_LEADOUT, 1, 0,
-					   (char *)&toc->hdr,
-					   sizeof(struct atapi_toc_header) +
-					   (ntracks + 1) *
-					   sizeof(struct atapi_toc_entry));
-		if (stat)
-			return stat;
-
-		if (drive->atapi_flags & IDE_AFLAG_TOCTRACKS_AS_BCD) {
-			toc->hdr.first_track = (u8)bin2bcd(CDROM_LEADOUT);
-			toc->hdr.last_track = (u8)bin2bcd(CDROM_LEADOUT);
-		} else {
-			toc->hdr.first_track = CDROM_LEADOUT;
-			toc->hdr.last_track = CDROM_LEADOUT;
-		}
-	}
-
-	if (stat)
-		return stat;
-
-	toc->hdr.toc_length = be16_to_cpu(toc->hdr.toc_length);
-
-	if (drive->atapi_flags & IDE_AFLAG_TOCTRACKS_AS_BCD) {
-		toc->hdr.first_track = bcd2bin(toc->hdr.first_track);
-		toc->hdr.last_track  = bcd2bin(toc->hdr.last_track);
-	}
-
-	for (i = 0; i <= ntracks; i++) {
-		if (drive->atapi_flags & IDE_AFLAG_TOCADDR_AS_BCD) {
-			if (drive->atapi_flags & IDE_AFLAG_TOCTRACKS_AS_BCD)
-				toc->ent[i].track = bcd2bin(toc->ent[i].track);
-			msf_from_bcd(&toc->ent[i].addr.msf);
-		}
-		toc->ent[i].addr.lba = msf_to_lba(toc->ent[i].addr.msf.minute,
-						  toc->ent[i].addr.msf.second,
-						  toc->ent[i].addr.msf.frame);
-	}
-
-	if (toc->hdr.first_track != CDROM_LEADOUT) {
-		/* read the multisession information */
-		stat = ide_cdrom_read_tocentry(drive, 0, 0, 1, (char *)&ms_tmp,
-					   sizeof(ms_tmp));
-		if (stat)
-			return stat;
-
-		toc->last_session_lba = be32_to_cpu(ms_tmp.ent.addr.lba);
-	} else {
-		ms_tmp.hdr.last_track = CDROM_LEADOUT;
-		ms_tmp.hdr.first_track = ms_tmp.hdr.last_track;
-		toc->last_session_lba = msf_to_lba(0, 2, 0); /* 0m 2s 0f */
-	}
-
-	if (drive->atapi_flags & IDE_AFLAG_TOCADDR_AS_BCD) {
-		/* re-read multisession information using MSF format */
-		stat = ide_cdrom_read_tocentry(drive, 0, 1, 1, (char *)&ms_tmp,
-					   sizeof(ms_tmp));
-		if (stat)
-			return stat;
-
-		msf_from_bcd(&ms_tmp.ent.addr.msf);
-		toc->last_session_lba = msf_to_lba(ms_tmp.ent.addr.msf.minute,
-						   ms_tmp.ent.addr.msf.second,
-						   ms_tmp.ent.addr.msf.frame);
-	}
-
-	toc->xa_flag = (ms_tmp.hdr.first_track != ms_tmp.hdr.last_track);
-
-	/* now try to get the total cdrom capacity */
-	stat = cdrom_get_last_written(cdi, &last_written);
-	if (!stat && (last_written > toc->capacity)) {
-		toc->capacity = last_written;
-		set_capacity(info->disk, toc->capacity * sectors_per_frame);
-		drive->probed_capacity = toc->capacity * sectors_per_frame;
-	}
-
-	/* Remember that we've read this stuff. */
-	drive->atapi_flags |= IDE_AFLAG_TOC_VALID;
-
-	return 0;
-}
-
-int ide_cdrom_get_capabilities(ide_drive_t *drive, u8 *buf)
-{
-	struct cdrom_info *info = drive->driver_data;
-	struct cdrom_device_info *cdi = &info->devinfo;
-	struct packet_command cgc;
-	int stat, attempts = 3, size = ATAPI_CAPABILITIES_PAGE_SIZE;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	if ((drive->atapi_flags & IDE_AFLAG_FULL_CAPS_PAGE) == 0)
-		size -= ATAPI_CAPABILITIES_PAGE_PAD_SIZE;
-
-	init_cdrom_command(&cgc, buf, size, CGC_DATA_UNKNOWN);
-	do {
-		/* we seem to get stat=0x01,err=0x00 the first time (??) */
-		stat = cdrom_mode_sense(cdi, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
-		if (!stat)
-			break;
-	} while (--attempts);
-	return stat;
-}
-
-void ide_cdrom_update_speed(ide_drive_t *drive, u8 *buf)
-{
-	struct cdrom_info *cd = drive->driver_data;
-	u16 curspeed, maxspeed;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	if (drive->atapi_flags & IDE_AFLAG_LE_SPEED_FIELDS) {
-		curspeed = le16_to_cpup((__le16 *)&buf[8 + 14]);
-		maxspeed = le16_to_cpup((__le16 *)&buf[8 + 8]);
-	} else {
-		curspeed = be16_to_cpup((__be16 *)&buf[8 + 14]);
-		maxspeed = be16_to_cpup((__be16 *)&buf[8 + 8]);
-	}
-
-	ide_debug_log(IDE_DBG_PROBE, "curspeed: %u, maxspeed: %u",
-				     curspeed, maxspeed);
-
-	cd->current_speed = DIV_ROUND_CLOSEST(curspeed, 176);
-	cd->max_speed = DIV_ROUND_CLOSEST(maxspeed, 176);
-}
-
-#define IDE_CD_CAPABILITIES \
-	(CDC_CLOSE_TRAY | CDC_OPEN_TRAY | CDC_LOCK | CDC_SELECT_SPEED | \
-	 CDC_SELECT_DISC | CDC_MULTI_SESSION | CDC_MCN | CDC_MEDIA_CHANGED | \
-	 CDC_PLAY_AUDIO | CDC_RESET | CDC_DRIVE_STATUS | CDC_CD_R | \
-	 CDC_CD_RW | CDC_DVD | CDC_DVD_R | CDC_DVD_RAM | CDC_GENERIC_PACKET | \
-	 CDC_MO_DRIVE | CDC_MRW | CDC_MRW_W | CDC_RAM)
-
-static const struct cdrom_device_ops ide_cdrom_dops = {
-	.open			= ide_cdrom_open_real,
-	.release		= ide_cdrom_release_real,
-	.drive_status		= ide_cdrom_drive_status,
-	.check_events		= ide_cdrom_check_events_real,
-	.tray_move		= ide_cdrom_tray_move,
-	.lock_door		= ide_cdrom_lock_door,
-	.select_speed		= ide_cdrom_select_speed,
-	.get_last_session	= ide_cdrom_get_last_session,
-	.get_mcn		= ide_cdrom_get_mcn,
-	.reset			= ide_cdrom_reset,
-	.audio_ioctl		= ide_cdrom_audio_ioctl,
-	.capability		= IDE_CD_CAPABILITIES,
-	.generic_packet		= ide_cdrom_packet,
-};
-
-static int ide_cdrom_register(ide_drive_t *drive, int nslots)
-{
-	struct cdrom_info *info = drive->driver_data;
-	struct cdrom_device_info *devinfo = &info->devinfo;
-
-	ide_debug_log(IDE_DBG_PROBE, "nslots: %d", nslots);
-
-	devinfo->ops = &ide_cdrom_dops;
-	devinfo->speed = info->current_speed;
-	devinfo->capacity = nslots;
-	devinfo->handle = drive;
-	strcpy(devinfo->name, drive->name);
-
-	if (drive->atapi_flags & IDE_AFLAG_NO_SPEED_SELECT)
-		devinfo->mask |= CDC_SELECT_SPEED;
-
-	return register_cdrom(info->disk, devinfo);
-}
-
-static int ide_cdrom_probe_capabilities(ide_drive_t *drive)
-{
-	struct cdrom_info *cd = drive->driver_data;
-	struct cdrom_device_info *cdi = &cd->devinfo;
-	u8 buf[ATAPI_CAPABILITIES_PAGE_SIZE];
-	mechtype_t mechtype;
-	int nslots = 1;
-
-	ide_debug_log(IDE_DBG_PROBE, "media: 0x%x, atapi_flags: 0x%lx",
-				     drive->media, drive->atapi_flags);
-
-	cdi->mask = (CDC_CD_R | CDC_CD_RW | CDC_DVD | CDC_DVD_R |
-		     CDC_DVD_RAM | CDC_SELECT_DISC | CDC_PLAY_AUDIO |
-		     CDC_MO_DRIVE | CDC_RAM);
-
-	if (drive->media == ide_optical) {
-		cdi->mask &= ~(CDC_MO_DRIVE | CDC_RAM);
-		printk(KERN_ERR PFX "%s: ATAPI magneto-optical drive\n",
-				drive->name);
-		return nslots;
-	}
-
-	if (drive->atapi_flags & IDE_AFLAG_PRE_ATAPI12) {
-		drive->atapi_flags &= ~IDE_AFLAG_NO_EJECT;
-		cdi->mask &= ~CDC_PLAY_AUDIO;
-		return nslots;
-	}
-
-	/*
-	 * We have to cheat a little here. the packet will eventually be queued
-	 * with ide_cdrom_packet(), which extracts the drive from cdi->handle.
-	 * Since this device hasn't been registered with the Uniform layer yet,
-	 * it can't do this. Same goes for cdi->ops.
-	 */
-	cdi->handle = drive;
-	cdi->ops = &ide_cdrom_dops;
-
-	if (ide_cdrom_get_capabilities(drive, buf))
-		return 0;
-
-	if ((buf[8 + 6] & 0x01) == 0)
-		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-	if (buf[8 + 6] & 0x08)
-		drive->atapi_flags &= ~IDE_AFLAG_NO_EJECT;
-	if (buf[8 + 3] & 0x01)
-		cdi->mask &= ~CDC_CD_R;
-	if (buf[8 + 3] & 0x02)
-		cdi->mask &= ~(CDC_CD_RW | CDC_RAM);
-	if (buf[8 + 2] & 0x38)
-		cdi->mask &= ~CDC_DVD;
-	if (buf[8 + 3] & 0x20)
-		cdi->mask &= ~(CDC_DVD_RAM | CDC_RAM);
-	if (buf[8 + 3] & 0x10)
-		cdi->mask &= ~CDC_DVD_R;
-	if ((buf[8 + 4] & 0x01) || (drive->atapi_flags & IDE_AFLAG_PLAY_AUDIO_OK))
-		cdi->mask &= ~CDC_PLAY_AUDIO;
-
-	mechtype = buf[8 + 6] >> 5;
-	if (mechtype == mechtype_caddy ||
-	    mechtype == mechtype_popup ||
-	    (drive->atapi_flags & IDE_AFLAG_NO_AUTOCLOSE))
-		cdi->mask |= CDC_CLOSE_TRAY;
-
-	if (cdi->sanyo_slot > 0) {
-		cdi->mask &= ~CDC_SELECT_DISC;
-		nslots = 3;
-	} else if (mechtype == mechtype_individual_changer ||
-		   mechtype == mechtype_cartridge_changer) {
-		nslots = cdrom_number_of_slots(cdi);
-		if (nslots > 1)
-			cdi->mask &= ~CDC_SELECT_DISC;
-	}
-
-	ide_cdrom_update_speed(drive, buf);
-
-	printk(KERN_INFO PFX "%s: ATAPI", drive->name);
-
-	/* don't print speed if the drive reported 0 */
-	if (cd->max_speed)
-		printk(KERN_CONT " %dX", cd->max_speed);
-
-	printk(KERN_CONT " %s", (cdi->mask & CDC_DVD) ? "CD-ROM" : "DVD-ROM");
-
-	if ((cdi->mask & CDC_DVD_R) == 0 || (cdi->mask & CDC_DVD_RAM) == 0)
-		printk(KERN_CONT " DVD%s%s",
-				 (cdi->mask & CDC_DVD_R) ? "" : "-R",
-				 (cdi->mask & CDC_DVD_RAM) ? "" : "/RAM");
-
-	if ((cdi->mask & CDC_CD_R) == 0 || (cdi->mask & CDC_CD_RW) == 0)
-		printk(KERN_CONT " CD%s%s",
-				 (cdi->mask & CDC_CD_R) ? "" : "-R",
-				 (cdi->mask & CDC_CD_RW) ? "" : "/RW");
-
-	if ((cdi->mask & CDC_SELECT_DISC) == 0)
-		printk(KERN_CONT " changer w/%d slots", nslots);
-	else
-		printk(KERN_CONT " drive");
-
-	printk(KERN_CONT ", %dkB Cache\n",
-			 be16_to_cpup((__be16 *)&buf[8 + 12]));
-
-	return nslots;
-}
-
-struct cd_list_entry {
-	const char	*id_model;
-	const char	*id_firmware;
-	unsigned int	cd_flags;
-};
-
-#ifdef CONFIG_IDE_PROC_FS
-static sector_t ide_cdrom_capacity(ide_drive_t *drive)
-{
-	unsigned long capacity, sectors_per_frame;
-
-	if (cdrom_read_capacity(drive, &capacity, &sectors_per_frame))
-		return 0;
-
-	return capacity * sectors_per_frame;
-}
-
-static int idecd_capacity_proc_show(struct seq_file *m, void *v)
-{
-	ide_drive_t *drive = m->private;
-
-	seq_printf(m, "%llu\n", (long long)ide_cdrom_capacity(drive));
-	return 0;
-}
-
-static ide_proc_entry_t idecd_proc[] = {
-	{ "capacity", S_IFREG|S_IRUGO, idecd_capacity_proc_show },
-	{}
-};
-
-static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive)
-{
-	return idecd_proc;
-}
-
-static const struct ide_proc_devset *ide_cd_proc_devsets(ide_drive_t *drive)
-{
-	return NULL;
-}
-#endif
-
-static const struct cd_list_entry ide_cd_quirks_list[] = {
-	/* SCR-3231 doesn't support the SET_CD_SPEED command. */
-	{ "SAMSUNG CD-ROM SCR-3231", NULL,   IDE_AFLAG_NO_SPEED_SELECT	     },
-	/* Old NEC260 (not R) was released before ATAPI 1.2 spec. */
-	{ "NEC CD-ROM DRIVE:260",    "1.01", IDE_AFLAG_TOCADDR_AS_BCD |
-					     IDE_AFLAG_PRE_ATAPI12,	     },
-	/* Vertos 300, some versions of this drive like to talk BCD. */
-	{ "V003S0DS",		     NULL,   IDE_AFLAG_VERTOS_300_SSD,	     },
-	/* Vertos 600 ESD. */
-	{ "V006E0DS",		     NULL,   IDE_AFLAG_VERTOS_600_ESD,	     },
-	/*
-	 * Sanyo 3 CD changer uses a non-standard command for CD changing
-	 * (by default standard ATAPI support for CD changers is used).
-	 */
-	{ "CD-ROM CDR-C3 G",	     NULL,   IDE_AFLAG_SANYO_3CD	     },
-	{ "CD-ROM CDR-C3G",	     NULL,   IDE_AFLAG_SANYO_3CD	     },
-	{ "CD-ROM CDR_C36",	     NULL,   IDE_AFLAG_SANYO_3CD	     },
-	/* Stingray 8X CD-ROM. */
-	{ "STINGRAY 8422 IDE 8X CD-ROM 7-27-95", NULL, IDE_AFLAG_PRE_ATAPI12 },
-	/*
-	 * ACER 50X CD-ROM and WPI 32X CD-ROM require the full spec length
-	 * mode sense page capabilities size, but older drives break.
-	 */
-	{ "ATAPI CD ROM DRIVE 50X MAX",	NULL,	IDE_AFLAG_FULL_CAPS_PAGE     },
-	{ "WPI CDS-32X",		NULL,	IDE_AFLAG_FULL_CAPS_PAGE     },
-	/* ACER/AOpen 24X CD-ROM has the speed fields byte-swapped. */
-	{ "",			     "241N", IDE_AFLAG_LE_SPEED_FIELDS       },
-	/*
-	 * Some drives used by Apple don't advertise audio play
-	 * but they do support reading TOC & audio datas.
-	 */
-	{ "MATSHITADVD-ROM SR-8187", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
-	{ "MATSHITADVD-ROM SR-8186", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
-	{ "MATSHITADVD-ROM SR-8176", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
-	{ "MATSHITADVD-ROM SR-8174", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
-	{ "Optiarc DVD RW AD-5200A", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
-	{ "Optiarc DVD RW AD-7200A", NULL,   IDE_AFLAG_PLAY_AUDIO_OK	     },
-	{ "Optiarc DVD RW AD-7543A", NULL,   IDE_AFLAG_NO_AUTOCLOSE	     },
-	{ "TEAC CD-ROM CD-224E",     NULL,   IDE_AFLAG_NO_AUTOCLOSE	     },
-	{ NULL, NULL, 0 }
-};
-
-static unsigned int ide_cd_flags(u16 *id)
-{
-	const struct cd_list_entry *cle = ide_cd_quirks_list;
-
-	while (cle->id_model) {
-		if (strcmp(cle->id_model, (char *)&id[ATA_ID_PROD]) == 0 &&
-		    (cle->id_firmware == NULL ||
-		     strstr((char *)&id[ATA_ID_FW_REV], cle->id_firmware)))
-			return cle->cd_flags;
-		cle++;
-	}
-
-	return 0;
-}
-
-static int ide_cdrom_setup(ide_drive_t *drive)
-{
-	struct cdrom_info *cd = drive->driver_data;
-	struct cdrom_device_info *cdi = &cd->devinfo;
-	struct request_queue *q = drive->queue;
-	u16 *id = drive->id;
-	char *fw_rev = (char *)&id[ATA_ID_FW_REV];
-	int nslots;
-
-	ide_debug_log(IDE_DBG_PROBE, "enter");
-
-	drive->prep_rq = ide_cdrom_prep_rq;
-	blk_queue_dma_alignment(q, 31);
-	blk_queue_update_dma_pad(q, 15);
-
-	drive->dev_flags |= IDE_DFLAG_MEDIA_CHANGED;
-	drive->atapi_flags = IDE_AFLAG_NO_EJECT | ide_cd_flags(id);
-
-	if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) &&
-	    fw_rev[4] == '1' && fw_rev[6] <= '2')
-		drive->atapi_flags |= (IDE_AFLAG_TOCTRACKS_AS_BCD |
-				     IDE_AFLAG_TOCADDR_AS_BCD);
-	else if ((drive->atapi_flags & IDE_AFLAG_VERTOS_600_ESD) &&
-		 fw_rev[4] == '1' && fw_rev[6] <= '2')
-		drive->atapi_flags |= IDE_AFLAG_TOCTRACKS_AS_BCD;
-	else if (drive->atapi_flags & IDE_AFLAG_SANYO_3CD)
-		/* 3 => use CD in slot 0 */
-		cdi->sanyo_slot = 3;
-
-	nslots = ide_cdrom_probe_capabilities(drive);
-
-	blk_queue_logical_block_size(q, CD_FRAMESIZE);
-
-	if (ide_cdrom_register(drive, nslots)) {
-		printk(KERN_ERR PFX "%s: %s failed to register device with the"
-				" cdrom driver.\n", drive->name, __func__);
-		cd->devinfo.handle = NULL;
-		return 1;
-	}
-
-	ide_proc_register_driver(drive, cd->driver);
-	return 0;
-}
-
-static void ide_cd_remove(ide_drive_t *drive)
-{
-	struct cdrom_info *info = drive->driver_data;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	ide_proc_unregister_driver(drive, info->driver);
-	device_del(&info->dev);
-	del_gendisk(info->disk);
-
-	mutex_lock(&idecd_ref_mutex);
-	put_device(&info->dev);
-	mutex_unlock(&idecd_ref_mutex);
-}
-
-static void ide_cd_release(struct device *dev)
-{
-	struct cdrom_info *info = to_ide_drv(dev, cdrom_info);
-	struct cdrom_device_info *devinfo = &info->devinfo;
-	ide_drive_t *drive = info->drive;
-	struct gendisk *g = info->disk;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	kfree(info->toc);
-	if (devinfo->handle == drive)
-		unregister_cdrom(devinfo);
-	drive->driver_data = NULL;
-	drive->prep_rq = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(info);
-}
-
-static int ide_cd_probe(ide_drive_t *);
-
-static struct ide_driver ide_cdrom_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-cdrom",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_cd_probe,
-	.remove			= ide_cd_remove,
-	.version		= IDECD_VERSION,
-	.do_request		= ide_cd_do_request,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc_entries		= ide_cd_proc_entries,
-	.proc_devsets		= ide_cd_proc_devsets,
-#endif
-};
-
-static int idecd_open(struct block_device *bdev, fmode_t mode)
-{
-	struct cdrom_info *info;
-	int rc = -ENXIO;
-
-	if (bdev_check_media_change(bdev)) {
-		info = ide_drv_g(bdev->bd_disk, cdrom_info);
-
-		ide_cd_read_toc(info->drive);
-	}
-
-	mutex_lock(&ide_cd_mutex);
-	info = ide_cd_get(bdev->bd_disk);
-	if (!info)
-		goto out;
-
-	rc = cdrom_open(&info->devinfo, bdev, mode);
-	if (rc < 0)
-		ide_cd_put(info);
-out:
-	mutex_unlock(&ide_cd_mutex);
-	return rc;
-}
-
-static void idecd_release(struct gendisk *disk, fmode_t mode)
-{
-	struct cdrom_info *info = ide_drv_g(disk, cdrom_info);
-
-	mutex_lock(&ide_cd_mutex);
-	cdrom_release(&info->devinfo, mode);
-
-	ide_cd_put(info);
-	mutex_unlock(&ide_cd_mutex);
-}
-
-static int idecd_set_spindown(struct cdrom_device_info *cdi, unsigned long arg)
-{
-	struct packet_command cgc;
-	char buffer[16];
-	int stat;
-	char spindown;
-
-	if (copy_from_user(&spindown, (void __user *)arg, sizeof(char)))
-		return -EFAULT;
-
-	init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_UNKNOWN);
-
-	stat = cdrom_mode_sense(cdi, &cgc, GPMODE_CDROM_PAGE, 0);
-	if (stat)
-		return stat;
-
-	buffer[11] = (buffer[11] & 0xf0) | (spindown & 0x0f);
-	return cdrom_mode_select(cdi, &cgc);
-}
-
-static int idecd_get_spindown(struct cdrom_device_info *cdi, unsigned long arg)
-{
-	struct packet_command cgc;
-	char buffer[16];
-	int stat;
-	char spindown;
-
-	init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_UNKNOWN);
-
-	stat = cdrom_mode_sense(cdi, &cgc, GPMODE_CDROM_PAGE, 0);
-	if (stat)
-		return stat;
-
-	spindown = buffer[11] & 0x0f;
-	if (copy_to_user((void __user *)arg, &spindown, sizeof(char)))
-		return -EFAULT;
-	return 0;
-}
-
-static int idecd_locked_ioctl(struct block_device *bdev, fmode_t mode,
-			unsigned int cmd, unsigned long arg)
-{
-	struct cdrom_info *info = ide_drv_g(bdev->bd_disk, cdrom_info);
-	int err;
-
-	switch (cmd) {
-	case CDROMSETSPINDOWN:
-		return idecd_set_spindown(&info->devinfo, arg);
-	case CDROMGETSPINDOWN:
-		return idecd_get_spindown(&info->devinfo, arg);
-	default:
-		break;
-	}
-
-	err = generic_ide_ioctl(info->drive, bdev, cmd, arg);
-	if (err == -EINVAL)
-		err = cdrom_ioctl(&info->devinfo, bdev, mode, cmd, arg);
-
-	return err;
-}
-
-static int idecd_ioctl(struct block_device *bdev, fmode_t mode,
-			     unsigned int cmd, unsigned long arg)
-{
-	int ret;
-
-	mutex_lock(&ide_cd_mutex);
-	ret = idecd_locked_ioctl(bdev, mode, cmd, arg);
-	mutex_unlock(&ide_cd_mutex);
-
-	return ret;
-}
-
-static int idecd_locked_compat_ioctl(struct block_device *bdev, fmode_t mode,
-			unsigned int cmd, unsigned long arg)
-{
-	struct cdrom_info *info = ide_drv_g(bdev->bd_disk, cdrom_info);
-	void __user *argp = compat_ptr(arg);
-	int err;
-
-	switch (cmd) {
-	case CDROMSETSPINDOWN:
-		return idecd_set_spindown(&info->devinfo, (unsigned long)argp);
-	case CDROMGETSPINDOWN:
-		return idecd_get_spindown(&info->devinfo, (unsigned long)argp);
-	default:
-		break;
-	}
-
-	err = generic_ide_ioctl(info->drive, bdev, cmd, arg);
-	if (err == -EINVAL)
-		err = cdrom_ioctl(&info->devinfo, bdev, mode, cmd,
-				  (unsigned long)argp);
-
-	return err;
-}
-
-static int idecd_compat_ioctl(struct block_device *bdev, fmode_t mode,
-			     unsigned int cmd, unsigned long arg)
-{
-	int ret;
-
-	mutex_lock(&ide_cd_mutex);
-	ret = idecd_locked_compat_ioctl(bdev, mode, cmd, arg);
-	mutex_unlock(&ide_cd_mutex);
-
-	return ret;
-}
-
-static unsigned int idecd_check_events(struct gendisk *disk,
-				       unsigned int clearing)
-{
-	struct cdrom_info *info = ide_drv_g(disk, cdrom_info);
-	return cdrom_check_events(&info->devinfo, clearing);
-}
-
-static const struct block_device_operations idecd_ops = {
-	.owner			= THIS_MODULE,
-	.open			= idecd_open,
-	.release		= idecd_release,
-	.ioctl			= idecd_ioctl,
-	.compat_ioctl		= IS_ENABLED(CONFIG_COMPAT) ?
-				  idecd_compat_ioctl : NULL,
-	.check_events		= idecd_check_events,
-};
-
-/* module options */
-static unsigned long debug_mask;
-module_param(debug_mask, ulong, 0644);
-
-MODULE_DESCRIPTION("ATAPI CD-ROM Driver");
-
-static int ide_cd_probe(ide_drive_t *drive)
-{
-	struct cdrom_info *info;
-	struct gendisk *g;
-
-	ide_debug_log(IDE_DBG_PROBE, "driver_req: %s, media: 0x%x",
-				     drive->driver_req, drive->media);
-
-	if (!strstr("ide-cdrom", drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_cdrom && drive->media != ide_optical)
-		goto failed;
-
-	drive->debug_mask = debug_mask;
-	drive->irq_handler = cdrom_newpc_intr;
-
-	info = kzalloc(sizeof(struct cdrom_info), GFP_KERNEL);
-	if (info == NULL) {
-		printk(KERN_ERR PFX "%s: Can't allocate a cdrom structure\n",
-				drive->name);
-		goto failed;
-	}
-
-	g = alloc_disk(1 << PARTN_BITS);
-	if (!g)
-		goto out_free_cd;
-
-	ide_init_disk(g, drive);
-
-	info->dev.parent = &drive->gendev;
-	info->dev.release = ide_cd_release;
-	dev_set_name(&info->dev, "%s", dev_name(&drive->gendev));
-
-	if (device_register(&info->dev))
-		goto out_free_disk;
-
-	info->drive = drive;
-	info->driver = &ide_cdrom_driver;
-	info->disk = g;
-
-	g->private_data = &info->driver;
-
-	drive->driver_data = info;
-
-	g->minors = 1;
-	g->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE;
-	if (ide_cdrom_setup(drive)) {
-		put_device(&info->dev);
-		goto failed;
-	}
-
-	ide_cd_read_toc(drive);
-	g->fops = &idecd_ops;
-	g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
-	g->events = DISK_EVENT_MEDIA_CHANGE;
-	device_add_disk(&drive->gendev, g, NULL);
-	return 0;
-
-out_free_disk:
-	put_disk(g);
-out_free_cd:
-	kfree(info);
-failed:
-	return -ENODEV;
-}
-
-static void __exit ide_cdrom_exit(void)
-{
-	driver_unregister(&ide_cdrom_driver.gen_driver);
-}
-
-static int __init ide_cdrom_init(void)
-{
-	printk(KERN_INFO DRV_NAME " driver " IDECD_VERSION "\n");
-	return driver_register(&ide_cdrom_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-cdrom*");
-MODULE_ALIAS("ide-cd");
-module_init(ide_cdrom_init);
-module_exit(ide_cdrom_exit);
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h
deleted file mode 100644
index a69dc7f61c4d5..0000000000000
--- a/drivers/ide/ide-cd.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  Copyright (C) 1996-98  Erik Andersen
- *  Copyright (C) 1998-2000 Jens Axboe
- */
-#ifndef _IDE_CD_H
-#define _IDE_CD_H
-
-#include <linux/cdrom.h>
-#include <asm/byteorder.h>
-
-#define IDECD_DEBUG_LOG		0
-
-#if IDECD_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, ## args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
-#define ATAPI_WAIT_WRITE_BUSY	(10 * HZ)
-
-/************************************************************************/
-
-#define SECTORS_PER_FRAME	(CD_FRAMESIZE >> SECTOR_SHIFT)
-#define SECTOR_BUFFER_SIZE	(CD_FRAMESIZE * 32)
-
-/* Capabilities Page size including 8 bytes of Mode Page Header */
-#define ATAPI_CAPABILITIES_PAGE_SIZE		(8 + 20)
-#define ATAPI_CAPABILITIES_PAGE_PAD_SIZE	4
-
-/* Structure of a MSF cdrom address. */
-struct atapi_msf {
-	u8 reserved;
-	u8 minute;
-	u8 second;
-	u8 frame;
-};
-
-/* Space to hold the disk TOC. */
-#define MAX_TRACKS 99
-struct atapi_toc_header {
-	unsigned short toc_length;
-	u8 first_track;
-	u8 last_track;
-};
-
-struct atapi_toc_entry {
-	u8 reserved1;
-#if defined(__BIG_ENDIAN_BITFIELD)
-	u8 adr     : 4;
-	u8 control : 4;
-#elif defined(__LITTLE_ENDIAN_BITFIELD)
-	u8 control : 4;
-	u8 adr     : 4;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-	u8 track;
-	u8 reserved2;
-	union {
-		unsigned lba;
-		struct atapi_msf msf;
-	} addr;
-};
-
-struct atapi_toc {
-	int    last_session_lba;
-	int    xa_flag;
-	unsigned long capacity;
-	struct atapi_toc_header hdr;
-	struct atapi_toc_entry  ent[MAX_TRACKS+1];
-	  /* One extra for the leadout. */
-};
-
-/* Extra per-device info for cdrom drives. */
-struct cdrom_info {
-	ide_drive_t		*drive;
-	struct ide_driver	*driver;
-	struct gendisk		*disk;
-	struct device		dev;
-
-	/* Buffer for table of contents.  NULL if we haven't allocated
-	   a TOC buffer for this device yet. */
-
-	struct atapi_toc *toc;
-
-	u8 max_speed;		/* Max speed of the drive. */
-	u8 current_speed;	/* Current speed of the drive. */
-
-        /* Per-device info needed by cdrom.c generic driver. */
-        struct cdrom_device_info devinfo;
-
-	unsigned long write_timeout;
-};
-
-/* ide-cd_verbose.c */
-void ide_cd_log_error(const char *, struct request *, struct request_sense *);
-
-/* ide-cd.c functions used by ide-cd_ioctl.c */
-int ide_cd_queue_pc(ide_drive_t *, const unsigned char *, int, void *,
-		    unsigned *, struct scsi_sense_hdr *, int, req_flags_t);
-int ide_cd_read_toc(ide_drive_t *);
-int ide_cdrom_get_capabilities(ide_drive_t *, u8 *);
-void ide_cdrom_update_speed(ide_drive_t *, u8 *);
-int cdrom_check_status(ide_drive_t *, struct scsi_sense_hdr *);
-
-/* ide-cd_ioctl.c */
-int ide_cdrom_open_real(struct cdrom_device_info *, int);
-void ide_cdrom_release_real(struct cdrom_device_info *);
-int ide_cdrom_drive_status(struct cdrom_device_info *, int);
-unsigned int ide_cdrom_check_events_real(struct cdrom_device_info *,
-					 unsigned int clearing, int slot_nr);
-int ide_cdrom_tray_move(struct cdrom_device_info *, int);
-int ide_cdrom_lock_door(struct cdrom_device_info *, int);
-int ide_cdrom_select_speed(struct cdrom_device_info *, int);
-int ide_cdrom_get_last_session(struct cdrom_device_info *,
-			       struct cdrom_multisession *);
-int ide_cdrom_get_mcn(struct cdrom_device_info *, struct cdrom_mcn *);
-int ide_cdrom_reset(struct cdrom_device_info *cdi);
-int ide_cdrom_audio_ioctl(struct cdrom_device_info *, unsigned int, void *);
-int ide_cdrom_packet(struct cdrom_device_info *, struct packet_command *);
-
-#endif /* _IDE_CD_H */
diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
deleted file mode 100644
index 011eab9c69b7e..0000000000000
--- a/drivers/ide/ide-cd_ioctl.c
+++ /dev/null
@@ -1,468 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * cdrom.c IOCTLs handling for ide-cd driver.
- *
- * Copyright (C) 1994-1996  Scott Snyder <snyder@fnald0.fnal.gov>
- * Copyright (C) 1996-1998  Erik Andersen <andersee@debian.org>
- * Copyright (C) 1998-2000  Jens Axboe <axboe@suse.de>
- */
-
-#include <linux/kernel.h>
-#include <linux/cdrom.h>
-#include <linux/gfp.h>
-#include <linux/ide.h>
-#include <scsi/scsi.h>
-
-#include "ide-cd.h"
-
-/****************************************************************************
- * Other driver requests (open, close, check media change).
- */
-int ide_cdrom_open_real(struct cdrom_device_info *cdi, int purpose)
-{
-	return 0;
-}
-
-/*
- * Close down the device.  Invalidate all cached blocks.
- */
-void ide_cdrom_release_real(struct cdrom_device_info *cdi)
-{
-	ide_drive_t *drive = cdi->handle;
-
-	if (!cdi->use_count)
-		drive->atapi_flags &= ~IDE_AFLAG_TOC_VALID;
-}
-
-/*
- * add logic to try GET_EVENT command first to check for media and tray
- * status. this should be supported by newer cd-r/w and all DVD etc
- * drives
- */
-int ide_cdrom_drive_status(struct cdrom_device_info *cdi, int slot_nr)
-{
-	ide_drive_t *drive = cdi->handle;
-	struct media_event_desc med;
-	struct scsi_sense_hdr sshdr;
-	int stat;
-
-	if (slot_nr != CDSL_CURRENT)
-		return -EINVAL;
-
-	stat = cdrom_check_status(drive, &sshdr);
-	if (!stat || sshdr.sense_key == UNIT_ATTENTION)
-		return CDS_DISC_OK;
-
-	if (!cdrom_get_media_event(cdi, &med)) {
-		if (med.media_present)
-			return CDS_DISC_OK;
-		else if (med.door_open)
-			return CDS_TRAY_OPEN;
-		else
-			return CDS_NO_DISC;
-	}
-
-	if (sshdr.sense_key == NOT_READY && sshdr.asc == 0x04
-			&& sshdr.ascq == 0x04)
-		return CDS_DISC_OK;
-
-	/*
-	 * If not using Mt Fuji extended media tray reports,
-	 * just return TRAY_OPEN since ATAPI doesn't provide
-	 * any other way to detect this...
-	 */
-	if (sshdr.sense_key == NOT_READY) {
-		if (sshdr.asc == 0x3a && sshdr.ascq == 1)
-			return CDS_NO_DISC;
-		else
-			return CDS_TRAY_OPEN;
-	}
-	return CDS_DRIVE_NOT_READY;
-}
-
-/*
- * ide-cd always generates media changed event if media is missing, which
- * makes it impossible to use for proper event reporting, so
- * DISK_EVENT_FLAG_UEVENT is cleared in disk->event_flags
- * and the following function is used only to trigger
- * revalidation and never propagated to userland.
- */
-unsigned int ide_cdrom_check_events_real(struct cdrom_device_info *cdi,
-					 unsigned int clearing, int slot_nr)
-{
-	ide_drive_t *drive = cdi->handle;
-	int retval;
-
-	if (slot_nr == CDSL_CURRENT) {
-		(void) cdrom_check_status(drive, NULL);
-		retval = (drive->dev_flags & IDE_DFLAG_MEDIA_CHANGED) ? 1 : 0;
-		drive->dev_flags &= ~IDE_DFLAG_MEDIA_CHANGED;
-		return retval ? DISK_EVENT_MEDIA_CHANGE : 0;
-	} else {
-		return 0;
-	}
-}
-
-/* Eject the disk if EJECTFLAG is 0.
-   If EJECTFLAG is 1, try to reload the disk. */
-static
-int cdrom_eject(ide_drive_t *drive, int ejectflag)
-{
-	struct cdrom_info *cd = drive->driver_data;
-	struct cdrom_device_info *cdi = &cd->devinfo;
-	char loej = 0x02;
-	unsigned char cmd[BLK_MAX_CDB];
-
-	if ((drive->atapi_flags & IDE_AFLAG_NO_EJECT) && !ejectflag)
-		return -EDRIVE_CANT_DO_THIS;
-
-	/* reload fails on some drives, if the tray is locked */
-	if ((drive->atapi_flags & IDE_AFLAG_DOOR_LOCKED) && ejectflag)
-		return 0;
-
-	/* only tell drive to close tray if open, if it can do that */
-	if (ejectflag && (cdi->mask & CDC_CLOSE_TRAY))
-		loej = 0;
-
-	memset(cmd, 0, BLK_MAX_CDB);
-
-	cmd[0] = GPCMD_START_STOP_UNIT;
-	cmd[4] = loej | (ejectflag != 0);
-
-	return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, NULL, 0, 0);
-}
-
-/* Lock the door if LOCKFLAG is nonzero; unlock it otherwise. */
-static
-int ide_cd_lockdoor(ide_drive_t *drive, int lockflag)
-{
-	struct scsi_sense_hdr sshdr;
-	int stat;
-
-	/* If the drive cannot lock the door, just pretend. */
-	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0) {
-		stat = 0;
-	} else {
-		unsigned char cmd[BLK_MAX_CDB];
-
-		memset(cmd, 0, BLK_MAX_CDB);
-
-		cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
-		cmd[4] = lockflag ? 1 : 0;
-
-		stat = ide_cd_queue_pc(drive, cmd, 0, NULL, NULL,
-				       &sshdr, 0, 0);
-	}
-
-	/* If we got an illegal field error, the drive
-	   probably cannot lock the door. */
-	if (stat != 0 &&
-	    sshdr.sense_key == ILLEGAL_REQUEST &&
-	    (sshdr.asc == 0x24 || sshdr.asc == 0x20)) {
-		printk(KERN_ERR "%s: door locking not supported\n",
-			drive->name);
-		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-		stat = 0;
-	}
-
-	/* no medium, that's alright. */
-	if (stat != 0 && sshdr.sense_key == NOT_READY && sshdr.asc == 0x3a)
-		stat = 0;
-
-	if (stat == 0) {
-		if (lockflag)
-			drive->atapi_flags |= IDE_AFLAG_DOOR_LOCKED;
-		else
-			drive->atapi_flags &= ~IDE_AFLAG_DOOR_LOCKED;
-	}
-
-	return stat;
-}
-
-int ide_cdrom_tray_move(struct cdrom_device_info *cdi, int position)
-{
-	ide_drive_t *drive = cdi->handle;
-
-	if (position) {
-		int stat = ide_cd_lockdoor(drive, 0);
-
-		if (stat)
-			return stat;
-	}
-
-	return cdrom_eject(drive, !position);
-}
-
-int ide_cdrom_lock_door(struct cdrom_device_info *cdi, int lock)
-{
-	ide_drive_t *drive = cdi->handle;
-
-	return ide_cd_lockdoor(drive, lock);
-}
-
-/*
- * ATAPI devices are free to select the speed you request or any slower
- * rate. :-(  Requesting too fast a speed will _not_ produce an error.
- */
-int ide_cdrom_select_speed(struct cdrom_device_info *cdi, int speed)
-{
-	ide_drive_t *drive = cdi->handle;
-	struct cdrom_info *cd = drive->driver_data;
-	u8 buf[ATAPI_CAPABILITIES_PAGE_SIZE];
-	int stat;
-	unsigned char cmd[BLK_MAX_CDB];
-
-	if (speed == 0)
-		speed = 0xffff; /* set to max */
-	else
-		speed *= 177;   /* Nx to kbytes/s */
-
-	memset(cmd, 0, BLK_MAX_CDB);
-
-	cmd[0] = GPCMD_SET_SPEED;
-	/* Read Drive speed in kbytes/second MSB/LSB */
-	cmd[2] = (speed >> 8) & 0xff;
-	cmd[3] = speed & 0xff;
-	if ((cdi->mask & (CDC_CD_R | CDC_CD_RW | CDC_DVD_R)) !=
-	    (CDC_CD_R | CDC_CD_RW | CDC_DVD_R)) {
-		/* Write Drive speed in kbytes/second MSB/LSB */
-		cmd[4] = (speed >> 8) & 0xff;
-		cmd[5] = speed & 0xff;
-	}
-
-	stat = ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, NULL, 0, 0);
-
-	if (!ide_cdrom_get_capabilities(drive, buf)) {
-		ide_cdrom_update_speed(drive, buf);
-		cdi->speed = cd->current_speed;
-	}
-
-	return 0;
-}
-
-int ide_cdrom_get_last_session(struct cdrom_device_info *cdi,
-			       struct cdrom_multisession *ms_info)
-{
-	struct atapi_toc *toc;
-	ide_drive_t *drive = cdi->handle;
-	struct cdrom_info *info = drive->driver_data;
-	int ret;
-
-	if ((drive->atapi_flags & IDE_AFLAG_TOC_VALID) == 0 || !info->toc) {
-		ret = ide_cd_read_toc(drive);
-		if (ret)
-			return ret;
-	}
-
-	toc = info->toc;
-	ms_info->addr.lba = toc->last_session_lba;
-	ms_info->xa_flag = toc->xa_flag;
-
-	return 0;
-}
-
-int ide_cdrom_get_mcn(struct cdrom_device_info *cdi,
-		      struct cdrom_mcn *mcn_info)
-{
-	ide_drive_t *drive = cdi->handle;
-	int stat, mcnlen;
-	char buf[24];
-	unsigned char cmd[BLK_MAX_CDB];
-	unsigned len = sizeof(buf);
-
-	memset(cmd, 0, BLK_MAX_CDB);
-
-	cmd[0] = GPCMD_READ_SUBCHANNEL;
-	cmd[1] = 2;		/* MSF addressing */
-	cmd[2] = 0x40;	/* request subQ data */
-	cmd[3] = 2;		/* format */
-	cmd[8] = len;
-
-	stat = ide_cd_queue_pc(drive, cmd, 0, buf, &len, NULL, 0, 0);
-	if (stat)
-		return stat;
-
-	mcnlen = sizeof(mcn_info->medium_catalog_number) - 1;
-	memcpy(mcn_info->medium_catalog_number, buf + 9, mcnlen);
-	mcn_info->medium_catalog_number[mcnlen] = '\0';
-
-	return 0;
-}
-
-int ide_cdrom_reset(struct cdrom_device_info *cdi)
-{
-	ide_drive_t *drive = cdi->handle;
-	struct cdrom_info *cd = drive->driver_data;
-	struct request *rq;
-	int ret;
-
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
-	ide_req(rq)->type = ATA_PRIV_MISC;
-	rq->rq_flags = RQF_QUIET;
-	blk_execute_rq(cd->disk, rq, 0);
-	ret = scsi_req(rq)->result ? -EIO : 0;
-	blk_put_request(rq);
-	/*
-	 * A reset will unlock the door. If it was previously locked,
-	 * lock it again.
-	 */
-	if (drive->atapi_flags & IDE_AFLAG_DOOR_LOCKED)
-		(void)ide_cd_lockdoor(drive, 1);
-
-	return ret;
-}
-
-static int ide_cd_get_toc_entry(ide_drive_t *drive, int track,
-				struct atapi_toc_entry **ent)
-{
-	struct cdrom_info *info = drive->driver_data;
-	struct atapi_toc *toc = info->toc;
-	int ntracks;
-
-	/*
-	 * don't serve cached data, if the toc isn't valid
-	 */
-	if ((drive->atapi_flags & IDE_AFLAG_TOC_VALID) == 0)
-		return -EINVAL;
-
-	/* Check validity of requested track number. */
-	ntracks = toc->hdr.last_track - toc->hdr.first_track + 1;
-
-	if (toc->hdr.first_track == CDROM_LEADOUT)
-		ntracks = 0;
-
-	if (track == CDROM_LEADOUT)
-		*ent = &toc->ent[ntracks];
-	else if (track < toc->hdr.first_track || track > toc->hdr.last_track)
-		return -EINVAL;
-	else
-		*ent = &toc->ent[track - toc->hdr.first_track];
-
-	return 0;
-}
-
-static int ide_cd_fake_play_trkind(ide_drive_t *drive, void *arg)
-{
-	struct cdrom_ti *ti = arg;
-	struct atapi_toc_entry *first_toc, *last_toc;
-	unsigned long lba_start, lba_end;
-	int stat;
-	unsigned char cmd[BLK_MAX_CDB];
-
-	stat = ide_cd_get_toc_entry(drive, ti->cdti_trk0, &first_toc);
-	if (stat)
-		return stat;
-
-	stat = ide_cd_get_toc_entry(drive, ti->cdti_trk1, &last_toc);
-	if (stat)
-		return stat;
-
-	if (ti->cdti_trk1 != CDROM_LEADOUT)
-		++last_toc;
-	lba_start = first_toc->addr.lba;
-	lba_end   = last_toc->addr.lba;
-
-	if (lba_end <= lba_start)
-		return -EINVAL;
-
-	memset(cmd, 0, BLK_MAX_CDB);
-
-	cmd[0] = GPCMD_PLAY_AUDIO_MSF;
-	lba_to_msf(lba_start,   &cmd[3], &cmd[4], &cmd[5]);
-	lba_to_msf(lba_end - 1, &cmd[6], &cmd[7], &cmd[8]);
-
-	return ide_cd_queue_pc(drive, cmd, 0, NULL, NULL, NULL, 0, 0);
-}
-
-static int ide_cd_read_tochdr(ide_drive_t *drive, void *arg)
-{
-	struct cdrom_info *cd = drive->driver_data;
-	struct cdrom_tochdr *tochdr = arg;
-	struct atapi_toc *toc;
-	int stat;
-
-	/* Make sure our saved TOC is valid. */
-	stat = ide_cd_read_toc(drive);
-	if (stat)
-		return stat;
-
-	toc = cd->toc;
-	tochdr->cdth_trk0 = toc->hdr.first_track;
-	tochdr->cdth_trk1 = toc->hdr.last_track;
-
-	return 0;
-}
-
-static int ide_cd_read_tocentry(ide_drive_t *drive, void *arg)
-{
-	struct cdrom_tocentry *tocentry = arg;
-	struct atapi_toc_entry *toce;
-	int stat;
-
-	stat = ide_cd_get_toc_entry(drive, tocentry->cdte_track, &toce);
-	if (stat)
-		return stat;
-
-	tocentry->cdte_ctrl = toce->control;
-	tocentry->cdte_adr  = toce->adr;
-	if (tocentry->cdte_format == CDROM_MSF) {
-		lba_to_msf(toce->addr.lba,
-			   &tocentry->cdte_addr.msf.minute,
-			   &tocentry->cdte_addr.msf.second,
-			   &tocentry->cdte_addr.msf.frame);
-	} else
-		tocentry->cdte_addr.lba = toce->addr.lba;
-
-	return 0;
-}
-
-int ide_cdrom_audio_ioctl(struct cdrom_device_info *cdi,
-			  unsigned int cmd, void *arg)
-{
-	ide_drive_t *drive = cdi->handle;
-
-	switch (cmd) {
-	/*
-	 * emulate PLAY_AUDIO_TI command with PLAY_AUDIO_10, since
-	 * atapi doesn't support it
-	 */
-	case CDROMPLAYTRKIND:
-		return ide_cd_fake_play_trkind(drive, arg);
-	case CDROMREADTOCHDR:
-		return ide_cd_read_tochdr(drive, arg);
-	case CDROMREADTOCENTRY:
-		return ide_cd_read_tocentry(drive, arg);
-	default:
-		return -EINVAL;
-	}
-}
-
-/* the generic packet interface to cdrom.c */
-int ide_cdrom_packet(struct cdrom_device_info *cdi,
-			    struct packet_command *cgc)
-{
-	ide_drive_t *drive = cdi->handle;
-	req_flags_t flags = 0;
-	unsigned len = cgc->buflen;
-
-	if (cgc->timeout <= 0)
-		cgc->timeout = ATAPI_WAIT_PC;
-
-	/* here we queue the commands from the uniform CD-ROM
-	   layer. the packet must be complete, as we do not
-	   touch it at all. */
-
-	if (cgc->sshdr)
-		memset(cgc->sshdr, 0, sizeof(*cgc->sshdr));
-
-	if (cgc->quiet)
-		flags |= RQF_QUIET;
-
-	cgc->stat = ide_cd_queue_pc(drive, cgc->cmd,
-				    cgc->data_direction == CGC_DATA_WRITE,
-				    cgc->buffer, &len,
-				    cgc->sshdr, cgc->timeout, flags);
-	if (!cgc->stat)
-		cgc->buflen -= len;
-	return cgc->stat;
-}
diff --git a/drivers/ide/ide-cd_verbose.c b/drivers/ide/ide-cd_verbose.c
deleted file mode 100644
index 5ecd5b2f03a33..0000000000000
--- a/drivers/ide/ide-cd_verbose.c
+++ /dev/null
@@ -1,362 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Verbose error logging for ATAPI CD/DVD devices.
- *
- * Copyright (C) 1994-1996  Scott Snyder <snyder@fnald0.fnal.gov>
- * Copyright (C) 1996-1998  Erik Andersen <andersee@debian.org>
- * Copyright (C) 1998-2000  Jens Axboe <axboe@suse.de>
- */
-
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/cdrom.h>
-#include <linux/ide.h>
-#include <scsi/scsi.h>
-#include "ide-cd.h"
-
-#ifndef CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS
-void ide_cd_log_error(const char *name, struct request *failed_command,
-		      struct request_sense *sense)
-{
-	/* Suppress printing unit attention and `in progress of becoming ready'
-	   errors when we're not being verbose. */
-	if (sense->sense_key == UNIT_ATTENTION ||
-	    (sense->sense_key == NOT_READY && (sense->asc == 4 ||
-						sense->asc == 0x3a)))
-		return;
-
-	printk(KERN_ERR "%s: error code: 0x%02x  sense_key: 0x%02x  "
-			"asc: 0x%02x  ascq: 0x%02x\n",
-			name, sense->error_code, sense->sense_key,
-			sense->asc, sense->ascq);
-}
-#else
-/* The generic packet command opcodes for CD/DVD Logical Units,
- * From Table 57 of the SFF8090 Ver. 3 (Mt. Fuji) draft standard. */
-static const struct {
-	unsigned short packet_command;
-	const char * const text;
-} packet_command_texts[] = {
-	{ GPCMD_TEST_UNIT_READY, "Test Unit Ready" },
-	{ GPCMD_REQUEST_SENSE, "Request Sense" },
-	{ GPCMD_FORMAT_UNIT, "Format Unit" },
-	{ GPCMD_INQUIRY, "Inquiry" },
-	{ GPCMD_START_STOP_UNIT, "Start/Stop Unit" },
-	{ GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, "Prevent/Allow Medium Removal" },
-	{ GPCMD_READ_FORMAT_CAPACITIES, "Read Format Capacities" },
-	{ GPCMD_READ_CDVD_CAPACITY, "Read Cd/Dvd Capacity" },
-	{ GPCMD_READ_10, "Read 10" },
-	{ GPCMD_WRITE_10, "Write 10" },
-	{ GPCMD_SEEK, "Seek" },
-	{ GPCMD_WRITE_AND_VERIFY_10, "Write and Verify 10" },
-	{ GPCMD_VERIFY_10, "Verify 10" },
-	{ GPCMD_FLUSH_CACHE, "Flush Cache" },
-	{ GPCMD_READ_SUBCHANNEL, "Read Subchannel" },
-	{ GPCMD_READ_TOC_PMA_ATIP, "Read Table of Contents" },
-	{ GPCMD_READ_HEADER, "Read Header" },
-	{ GPCMD_PLAY_AUDIO_10, "Play Audio 10" },
-	{ GPCMD_GET_CONFIGURATION, "Get Configuration" },
-	{ GPCMD_PLAY_AUDIO_MSF, "Play Audio MSF" },
-	{ GPCMD_PLAYAUDIO_TI, "Play Audio TrackIndex" },
-	{ GPCMD_GET_EVENT_STATUS_NOTIFICATION,
-		"Get Event Status Notification" },
-	{ GPCMD_PAUSE_RESUME, "Pause/Resume" },
-	{ GPCMD_STOP_PLAY_SCAN, "Stop Play/Scan" },
-	{ GPCMD_READ_DISC_INFO, "Read Disc Info" },
-	{ GPCMD_READ_TRACK_RZONE_INFO, "Read Track Rzone Info" },
-	{ GPCMD_RESERVE_RZONE_TRACK, "Reserve Rzone Track" },
-	{ GPCMD_SEND_OPC, "Send OPC" },
-	{ GPCMD_MODE_SELECT_10, "Mode Select 10" },
-	{ GPCMD_REPAIR_RZONE_TRACK, "Repair Rzone Track" },
-	{ GPCMD_MODE_SENSE_10, "Mode Sense 10" },
-	{ GPCMD_CLOSE_TRACK, "Close Track" },
-	{ GPCMD_BLANK, "Blank" },
-	{ GPCMD_SEND_EVENT, "Send Event" },
-	{ GPCMD_SEND_KEY, "Send Key" },
-	{ GPCMD_REPORT_KEY, "Report Key" },
-	{ GPCMD_LOAD_UNLOAD, "Load/Unload" },
-	{ GPCMD_SET_READ_AHEAD, "Set Read-ahead" },
-	{ GPCMD_READ_12, "Read 12" },
-	{ GPCMD_GET_PERFORMANCE, "Get Performance" },
-	{ GPCMD_SEND_DVD_STRUCTURE, "Send DVD Structure" },
-	{ GPCMD_READ_DVD_STRUCTURE, "Read DVD Structure" },
-	{ GPCMD_SET_STREAMING, "Set Streaming" },
-	{ GPCMD_READ_CD_MSF, "Read CD MSF" },
-	{ GPCMD_SCAN, "Scan" },
-	{ GPCMD_SET_SPEED, "Set Speed" },
-	{ GPCMD_PLAY_CD, "Play CD" },
-	{ GPCMD_MECHANISM_STATUS, "Mechanism Status" },
-	{ GPCMD_READ_CD, "Read CD" },
-};
-
-/* From Table 303 of the SFF8090 Ver. 3 (Mt. Fuji) draft standard. */
-static const char * const sense_key_texts[16] = {
-	"No sense data",
-	"Recovered error",
-	"Not ready",
-	"Medium error",
-	"Hardware error",
-	"Illegal request",
-	"Unit attention",
-	"Data protect",
-	"Blank check",
-	"(reserved)",
-	"(reserved)",
-	"Aborted command",
-	"(reserved)",
-	"(reserved)",
-	"Miscompare",
-	"(reserved)",
-};
-
-/* From Table 304 of the SFF8090 Ver. 3 (Mt. Fuji) draft standard. */
-static const struct {
-	unsigned long asc_ascq;
-	const char * const text;
-} sense_data_texts[] = {
-	{ 0x000000, "No additional sense information" },
-	{ 0x000011, "Play operation in progress" },
-	{ 0x000012, "Play operation paused" },
-	{ 0x000013, "Play operation successfully completed" },
-	{ 0x000014, "Play operation stopped due to error" },
-	{ 0x000015, "No current audio status to return" },
-	{ 0x010c0a, "Write error - padding blocks added" },
-	{ 0x011700, "Recovered data with no error correction applied" },
-	{ 0x011701, "Recovered data with retries" },
-	{ 0x011702, "Recovered data with positive head offset" },
-	{ 0x011703, "Recovered data with negative head offset" },
-	{ 0x011704, "Recovered data with retries and/or CIRC applied" },
-	{ 0x011705, "Recovered data using previous sector ID" },
-	{ 0x011800, "Recovered data with error correction applied" },
-	{ 0x011801, "Recovered data with error correction and retries applied"},
-	{ 0x011802, "Recovered data - the data was auto-reallocated" },
-	{ 0x011803, "Recovered data with CIRC" },
-	{ 0x011804, "Recovered data with L-EC" },
-	{ 0x015d00, "Failure prediction threshold exceeded"
-		    " - Predicted logical unit failure" },
-	{ 0x015d01, "Failure prediction threshold exceeded"
-		    " - Predicted media failure" },
-	{ 0x015dff, "Failure prediction threshold exceeded - False" },
-	{ 0x017301, "Power calibration area almost full" },
-	{ 0x020400, "Logical unit not ready - cause not reportable" },
-	/* Following is misspelled in ATAPI 2.6, _and_ in Mt. Fuji */
-	{ 0x020401, "Logical unit not ready"
-		    " - in progress [sic] of becoming ready" },
-	{ 0x020402, "Logical unit not ready - initializing command required" },
-	{ 0x020403, "Logical unit not ready - manual intervention required" },
-	{ 0x020404, "Logical unit not ready - format in progress" },
-	{ 0x020407, "Logical unit not ready - operation in progress" },
-	{ 0x020408, "Logical unit not ready - long write in progress" },
-	{ 0x020600, "No reference position found (media may be upside down)" },
-	{ 0x023000, "Incompatible medium installed" },
-	{ 0x023a00, "Medium not present" },
-	{ 0x025300, "Media load or eject failed" },
-	{ 0x025700, "Unable to recover table of contents" },
-	{ 0x030300, "Peripheral device write fault" },
-	{ 0x030301, "No write current" },
-	{ 0x030302, "Excessive write errors" },
-	{ 0x030c00, "Write error" },
-	{ 0x030c01, "Write error - Recovered with auto reallocation" },
-	{ 0x030c02, "Write error - auto reallocation failed" },
-	{ 0x030c03, "Write error - recommend reassignment" },
-	{ 0x030c04, "Compression check miscompare error" },
-	{ 0x030c05, "Data expansion occurred during compress" },
-	{ 0x030c06, "Block not compressible" },
-	{ 0x030c07, "Write error - recovery needed" },
-	{ 0x030c08, "Write error - recovery failed" },
-	{ 0x030c09, "Write error - loss of streaming" },
-	{ 0x031100, "Unrecovered read error" },
-	{ 0x031106, "CIRC unrecovered error" },
-	{ 0x033101, "Format command failed" },
-	{ 0x033200, "No defect spare location available" },
-	{ 0x033201, "Defect list update failure" },
-	{ 0x035100, "Erase failure" },
-	{ 0x037200, "Session fixation error" },
-	{ 0x037201, "Session fixation error writin lead-in" },
-	{ 0x037202, "Session fixation error writin lead-out" },
-	{ 0x037300, "CD control error" },
-	{ 0x037302, "Power calibration area is full" },
-	{ 0x037303, "Power calibration area error" },
-	{ 0x037304, "Program memory area / RMA update failure" },
-	{ 0x037305, "Program memory area / RMA is full" },
-	{ 0x037306, "Program memory area / RMA is (almost) full" },
-	{ 0x040200, "No seek complete" },
-	{ 0x040300, "Write fault" },
-	{ 0x040900, "Track following error" },
-	{ 0x040901, "Tracking servo failure" },
-	{ 0x040902, "Focus servo failure" },
-	{ 0x040903, "Spindle servo failure" },
-	{ 0x041500, "Random positioning error" },
-	{ 0x041501, "Mechanical positioning or changer error" },
-	{ 0x041502, "Positioning error detected by read of medium" },
-	{ 0x043c00, "Mechanical positioning or changer error" },
-	{ 0x044000, "Diagnostic failure on component (ASCQ)" },
-	{ 0x044400, "Internal CD/DVD logical unit failure" },
-	{ 0x04b600, "Media load mechanism failed" },
-	{ 0x051a00, "Parameter list length error" },
-	{ 0x052000, "Invalid command operation code" },
-	{ 0x052100, "Logical block address out of range" },
-	{ 0x052102, "Invalid address for write" },
-	{ 0x052400, "Invalid field in command packet" },
-	{ 0x052600, "Invalid field in parameter list" },
-	{ 0x052601, "Parameter not supported" },
-	{ 0x052602, "Parameter value invalid" },
-	{ 0x052700, "Write protected media" },
-	{ 0x052c00, "Command sequence error" },
-	{ 0x052c03, "Current program area is not empty" },
-	{ 0x052c04, "Current program area is empty" },
-	{ 0x053001, "Cannot read medium - unknown format" },
-	{ 0x053002, "Cannot read medium - incompatible format" },
-	{ 0x053900, "Saving parameters not supported" },
-	{ 0x054e00, "Overlapped commands attempted" },
-	{ 0x055302, "Medium removal prevented" },
-	{ 0x055500, "System resource failure" },
-	{ 0x056300, "End of user area encountered on this track" },
-	{ 0x056400, "Illegal mode for this track or incompatible medium" },
-	{ 0x056f00, "Copy protection key exchange failure"
-		    " - Authentication failure" },
-	{ 0x056f01, "Copy protection key exchange failure - Key not present" },
-	{ 0x056f02, "Copy protection key exchange failure"
-		     " - Key not established" },
-	{ 0x056f03, "Read of scrambled sector without authentication" },
-	{ 0x056f04, "Media region code is mismatched to logical unit" },
-	{ 0x056f05, "Drive region must be permanent"
-		    " / region reset count error" },
-	{ 0x057203, "Session fixation error - incomplete track in session" },
-	{ 0x057204, "Empty or partially written reserved track" },
-	{ 0x057205, "No more RZONE reservations are allowed" },
-	{ 0x05bf00, "Loss of streaming" },
-	{ 0x062800, "Not ready to ready transition, medium may have changed" },
-	{ 0x062900, "Power on, reset or hardware reset occurred" },
-	{ 0x062a00, "Parameters changed" },
-	{ 0x062a01, "Mode parameters changed" },
-	{ 0x062e00, "Insufficient time for operation" },
-	{ 0x063f00, "Logical unit operating conditions have changed" },
-	{ 0x063f01, "Microcode has been changed" },
-	{ 0x065a00, "Operator request or state change input (unspecified)" },
-	{ 0x065a01, "Operator medium removal request" },
-	{ 0x0bb900, "Play operation aborted" },
-	/* Here we use 0xff for the key (not a valid key) to signify
-	 * that these can have _any_ key value associated with them... */
-	{ 0xff0401, "Logical unit is in process of becoming ready" },
-	{ 0xff0400, "Logical unit not ready, cause not reportable" },
-	{ 0xff0402, "Logical unit not ready, initializing command required" },
-	{ 0xff0403, "Logical unit not ready, manual intervention required" },
-	{ 0xff0500, "Logical unit does not respond to selection" },
-	{ 0xff0800, "Logical unit communication failure" },
-	{ 0xff0802, "Logical unit communication parity error" },
-	{ 0xff0801, "Logical unit communication time-out" },
-	{ 0xff2500, "Logical unit not supported" },
-	{ 0xff4c00, "Logical unit failed self-configuration" },
-	{ 0xff3e00, "Logical unit has not self-configured yet" },
-};
-
-void ide_cd_log_error(const char *name, struct request *failed_command,
-		      struct request_sense *sense)
-{
-	int i;
-	const char *s = "bad sense key!";
-	char buf[80];
-
-	printk(KERN_ERR "ATAPI device %s:\n", name);
-	if (sense->error_code == 0x70)
-		printk(KERN_CONT "  Error: ");
-	else if (sense->error_code == 0x71)
-		printk("  Deferred Error: ");
-	else if (sense->error_code == 0x7f)
-		printk(KERN_CONT "  Vendor-specific Error: ");
-	else
-		printk(KERN_CONT "  Unknown Error Type: ");
-
-	if (sense->sense_key < ARRAY_SIZE(sense_key_texts))
-		s = sense_key_texts[sense->sense_key];
-
-	printk(KERN_CONT "%s -- (Sense key=0x%02x)\n", s, sense->sense_key);
-
-	if (sense->asc == 0x40) {
-		sprintf(buf, "Diagnostic failure on component 0x%02x",
-			sense->ascq);
-		s = buf;
-	} else {
-		int lo = 0, mid, hi = ARRAY_SIZE(sense_data_texts);
-		unsigned long key = (sense->sense_key << 16);
-
-		key |= (sense->asc << 8);
-		if (!(sense->ascq >= 0x80 && sense->ascq <= 0xdd))
-			key |= sense->ascq;
-		s = NULL;
-
-		while (hi > lo) {
-			mid = (lo + hi) / 2;
-			if (sense_data_texts[mid].asc_ascq == key ||
-			    sense_data_texts[mid].asc_ascq == (0xff0000|key)) {
-				s = sense_data_texts[mid].text;
-				break;
-			} else if (sense_data_texts[mid].asc_ascq > key)
-				hi = mid;
-			else
-				lo = mid + 1;
-		}
-	}
-
-	if (s == NULL) {
-		if (sense->asc > 0x80)
-			s = "(vendor-specific error)";
-		else
-			s = "(reserved error code)";
-	}
-
-	printk(KERN_ERR "  %s -- (asc=0x%02x, ascq=0x%02x)\n",
-			s, sense->asc, sense->ascq);
-
-	if (failed_command != NULL) {
-		int lo = 0, mid, hi = ARRAY_SIZE(packet_command_texts);
-		s = NULL;
-
-		while (hi > lo) {
-			mid = (lo + hi) / 2;
-			if (packet_command_texts[mid].packet_command ==
-			    scsi_req(failed_command)->cmd[0]) {
-				s = packet_command_texts[mid].text;
-				break;
-			}
-			if (packet_command_texts[mid].packet_command >
-			    scsi_req(failed_command)->cmd[0])
-				hi = mid;
-			else
-				lo = mid + 1;
-		}
-
-		printk(KERN_ERR "  The failed \"%s\" packet command "
-				"was: \n  \"", s);
-		for (i = 0; i < BLK_MAX_CDB; i++)
-			printk(KERN_CONT "%02x ", scsi_req(failed_command)->cmd[i]);
-		printk(KERN_CONT "\"\n");
-	}
-
-	/* The SKSV bit specifies validity of the sense_key_specific
-	 * in the next two commands. It is bit 7 of the first byte.
-	 * In the case of NOT_READY, if SKSV is set the drive can
-	 * give us nice ETA readings.
-	 */
-	if (sense->sense_key == NOT_READY && (sense->sks[0] & 0x80)) {
-		int progress = (sense->sks[1] << 8 | sense->sks[2]) * 100;
-
-		printk(KERN_ERR "  Command is %02d%% complete\n",
-				progress / 0xffff);
-	}
-
-	if (sense->sense_key == ILLEGAL_REQUEST &&
-	    (sense->sks[0] & 0x80) != 0) {
-		printk(KERN_ERR "  Error in %s byte %d",
-				(sense->sks[0] & 0x40) != 0 ?
-				"command packet" : "command data",
-				(sense->sks[1] << 8) + sense->sks[2]);
-
-		if ((sense->sks[0] & 0x40) != 0)
-			printk(KERN_CONT " bit %d", sense->sks[0] & 0x07);
-
-		printk(KERN_CONT "\n");
-	}
-}
-#endif
diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c
deleted file mode 100644
index f1e922e2479af..0000000000000
--- a/drivers/ide/ide-cs.c
+++ /dev/null
@@ -1,364 +0,0 @@
-/*======================================================================
-
-    A driver for PCMCIA IDE/ATA disk cards
-
-    The contents of this file are subject to the Mozilla Public
-    License Version 1.1 (the "License"); you may not use this file
-    except in compliance with the License. You may obtain a copy of
-    the License at http://www.mozilla.org/MPL/
-
-    Software distributed under the License is distributed on an "AS
-    IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
-    implied. See the License for the specific language governing
-    rights and limitations under the License.
-
-    The initial developer of the original code is David A. Hinds
-    <dahinds@users.sourceforge.net>.  Portions created by David A. Hinds
-    are Copyright (C) 1999 David A. Hinds.  All Rights Reserved.
-
-    Alternatively, the contents of this file may be used under the
-    terms of the GNU General Public License version 2 (the "GPL"), in
-    which case the provisions of the GPL are applicable instead of the
-    above.  If you wish to allow the use of your version of this file
-    only under the terms of the GPL and not to allow others to use
-    your version of this file under the MPL, indicate your decision
-    by deleting the provisions above and replace them with the notice
-    and other provisions required by the GPL.  If you do not delete
-    the provisions above, a recipient may use your version of this
-    file under either the MPL or the GPL.
-
-======================================================================*/
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/timer.h>
-#include <linux/ioport.h>
-#include <linux/ide.h>
-#include <linux/major.h>
-#include <linux/delay.h>
-#include <asm/io.h>
-
-#include <pcmcia/cistpl.h>
-#include <pcmcia/ds.h>
-#include <pcmcia/cisreg.h>
-#include <pcmcia/ciscode.h>
-
-#define DRV_NAME "ide-cs"
-
-/*====================================================================*/
-
-/* Module parameters */
-
-MODULE_AUTHOR("David Hinds <dahinds@users.sourceforge.net>");
-MODULE_DESCRIPTION("PCMCIA ATA/IDE card driver");
-MODULE_LICENSE("Dual MPL/GPL");
-
-/*====================================================================*/
-
-typedef struct ide_info_t {
-	struct pcmcia_device	*p_dev;
-	struct ide_host		*host;
-	int			ndev;
-} ide_info_t;
-
-static void ide_release(struct pcmcia_device *);
-static int ide_config(struct pcmcia_device *);
-
-static void ide_detach(struct pcmcia_device *p_dev);
-
-static int ide_probe(struct pcmcia_device *link)
-{
-    ide_info_t *info;
-
-    dev_dbg(&link->dev, "ide_attach()\n");
-
-    /* Create new ide device */
-    info = kzalloc(sizeof(*info), GFP_KERNEL);
-    if (!info)
-	return -ENOMEM;
-
-    info->p_dev = link;
-    link->priv = info;
-
-    link->config_flags |= CONF_ENABLE_IRQ | CONF_AUTO_SET_IO |
-	    CONF_AUTO_SET_VPP | CONF_AUTO_CHECK_VCC;
-
-    return ide_config(link);
-} /* ide_attach */
-
-static void ide_detach(struct pcmcia_device *link)
-{
-    ide_info_t *info = link->priv;
-
-    dev_dbg(&link->dev, "ide_detach(0x%p)\n", link);
-
-    ide_release(link);
-
-    kfree(info);
-} /* ide_detach */
-
-static const struct ide_port_ops idecs_port_ops = {
-	.quirkproc		= ide_undecoded_slave,
-};
-
-static const struct ide_port_info idecs_port_info = {
-	.port_ops		= &idecs_port_ops,
-	.host_flags		= IDE_HFLAG_NO_DMA,
-	.irq_flags		= IRQF_SHARED,
-	.chipset		= ide_pci,
-};
-
-static struct ide_host *idecs_register(unsigned long io, unsigned long ctl,
-				unsigned long irq, struct pcmcia_device *handle)
-{
-    struct ide_host *host;
-    ide_hwif_t *hwif;
-    int i, rc;
-    struct ide_hw hw, *hws[] = { &hw };
-
-    if (!request_region(io, 8, DRV_NAME)) {
-	printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
-			DRV_NAME, io, io + 7);
-	return NULL;
-    }
-
-    if (!request_region(ctl, 1, DRV_NAME)) {
-	printk(KERN_ERR "%s: I/O resource 0x%lX not free.\n",
-			DRV_NAME, ctl);
-	release_region(io, 8);
-	return NULL;
-    }
-
-    memset(&hw, 0, sizeof(hw));
-    ide_std_init_ports(&hw, io, ctl);
-    hw.irq = irq;
-    hw.dev = &handle->dev;
-
-    rc = ide_host_add(&idecs_port_info, hws, 1, &host);
-    if (rc)
-	goto out_release;
-
-    hwif = host->ports[0];
-
-    if (hwif->present)
-	return host;
-
-    /* retry registration in case device is still spinning up */
-    for (i = 0; i < 10; i++) {
-	msleep(100);
-	ide_port_scan(hwif);
-	if (hwif->present)
-	    return host;
-    }
-
-    return host;
-
-out_release:
-    release_region(ctl, 1);
-    release_region(io, 8);
-    return NULL;
-}
-
-static int pcmcia_check_one_config(struct pcmcia_device *pdev, void *priv_data)
-{
-	int *is_kme = priv_data;
-
-	if ((pdev->resource[0]->flags & IO_DATA_PATH_WIDTH)
-	    != IO_DATA_PATH_WIDTH_8) {
-		pdev->resource[0]->flags &= ~IO_DATA_PATH_WIDTH;
-		pdev->resource[0]->flags |= IO_DATA_PATH_WIDTH_AUTO;
-	}
-	pdev->resource[1]->flags &= ~IO_DATA_PATH_WIDTH;
-	pdev->resource[1]->flags |= IO_DATA_PATH_WIDTH_8;
-
-	if (pdev->resource[1]->end) {
-		pdev->resource[0]->end = 8;
-		pdev->resource[1]->end = (*is_kme) ? 2 : 1;
-	} else {
-		if (pdev->resource[0]->end < 16)
-			return -ENODEV;
-	}
-
-	return pcmcia_request_io(pdev);
-}
-
-static int ide_config(struct pcmcia_device *link)
-{
-    ide_info_t *info = link->priv;
-    int ret = 0, is_kme = 0;
-    unsigned long io_base, ctl_base;
-    struct ide_host *host;
-
-    dev_dbg(&link->dev, "ide_config(0x%p)\n", link);
-
-    is_kme = ((link->manf_id == MANFID_KME) &&
-	      ((link->card_id == PRODID_KME_KXLC005_A) ||
-	       (link->card_id == PRODID_KME_KXLC005_B)));
-
-    if (pcmcia_loop_config(link, pcmcia_check_one_config, &is_kme)) {
-	    link->config_flags &= ~CONF_AUTO_CHECK_VCC;
-	    if (pcmcia_loop_config(link, pcmcia_check_one_config, &is_kme))
-		    goto failed; /* No suitable config found */
-    }
-    io_base = link->resource[0]->start;
-    if (link->resource[1]->end)
-	    ctl_base = link->resource[1]->start;
-    else
-	    ctl_base = link->resource[0]->start + 0x0e;
-
-    if (!link->irq)
-	    goto failed;
-
-    ret = pcmcia_enable_device(link);
-    if (ret)
-	    goto failed;
-
-    /* disable drive interrupts during IDE probe */
-    outb(0x02, ctl_base);
-
-    /* special setup for KXLC005 card */
-    if (is_kme)
-	outb(0x81, ctl_base+1);
-
-     host = idecs_register(io_base, ctl_base, link->irq, link);
-     if (host == NULL && resource_size(link->resource[0]) == 0x20) {
-	    outb(0x02, ctl_base + 0x10);
-	    host = idecs_register(io_base + 0x10, ctl_base + 0x10,
-				  link->irq, link);
-    }
-
-    if (host == NULL)
-	goto failed;
-
-    info->ndev = 1;
-    info->host = host;
-    dev_info(&link->dev, "ide-cs: hd%c: Vpp = %d.%d\n",
-	    'a' + host->ports[0]->index * 2,
-	    link->vpp / 10, link->vpp % 10);
-
-    return 0;
-
-failed:
-    ide_release(link);
-    return -ENODEV;
-} /* ide_config */
-
-static void ide_release(struct pcmcia_device *link)
-{
-    ide_info_t *info = link->priv;
-    struct ide_host *host = info->host;
-
-    dev_dbg(&link->dev, "ide_release(0x%p)\n", link);
-
-    if (info->ndev) {
-	ide_hwif_t *hwif = host->ports[0];
-	unsigned long data_addr, ctl_addr;
-
-	data_addr = hwif->io_ports.data_addr;
-	ctl_addr = hwif->io_ports.ctl_addr;
-
-	ide_host_remove(host);
-	info->ndev = 0;
-
-	release_region(ctl_addr, 1);
-	release_region(data_addr, 8);
-    }
-
-    pcmcia_disable_device(link);
-} /* ide_release */
-
-
-static const struct pcmcia_device_id ide_ids[] = {
-	PCMCIA_DEVICE_FUNC_ID(4),
-	PCMCIA_DEVICE_MANF_CARD(0x0000, 0x0000),	/* Corsair */
-	PCMCIA_DEVICE_MANF_CARD(0x0007, 0x0000),	/* Hitachi */
-	PCMCIA_DEVICE_MANF_CARD(0x000a, 0x0000),	/* I-O Data CFA */
-	PCMCIA_DEVICE_MANF_CARD(0x001c, 0x0001),	/* Mitsubishi CFA */
-	PCMCIA_DEVICE_MANF_CARD(0x0032, 0x0704),
-	PCMCIA_DEVICE_MANF_CARD(0x0032, 0x2904),
-	PCMCIA_DEVICE_MANF_CARD(0x0045, 0x0401),	/* SanDisk CFA */
-	PCMCIA_DEVICE_MANF_CARD(0x004f, 0x0000),	/* Kingston */
-	PCMCIA_DEVICE_MANF_CARD(0x0097, 0x1620), 	/* TI emulated */
-	PCMCIA_DEVICE_MANF_CARD(0x0098, 0x0000),	/* Toshiba */
-	PCMCIA_DEVICE_MANF_CARD(0x00a4, 0x002d),
-	PCMCIA_DEVICE_MANF_CARD(0x00ce, 0x0000),	/* Samsung */
-	PCMCIA_DEVICE_MANF_CARD(0x0319, 0x0000),	/* Hitachi */
-	PCMCIA_DEVICE_MANF_CARD(0x2080, 0x0001),
-	PCMCIA_DEVICE_MANF_CARD(0x4e01, 0x0100),	/* Viking CFA */
-	PCMCIA_DEVICE_MANF_CARD(0x4e01, 0x0200),	/* Lexar, Viking CFA */
-	PCMCIA_DEVICE_PROD_ID123("Caravelle", "PSC-IDE ", "PSC000", 0x8c36137c, 0xd0693ab8, 0x2768a9f0),
-	PCMCIA_DEVICE_PROD_ID123("CDROM", "IDE", "MCD-601p", 0x1b9179ca, 0xede88951, 0x0d902f74),
-	PCMCIA_DEVICE_PROD_ID123("PCMCIA", "IDE CARD", "F1", 0x281f1c5d, 0x1907960c, 0xf7fde8b9),
-	PCMCIA_DEVICE_PROD_ID12("ARGOSY", "CD-ROM", 0x78f308dc, 0x66536591),
-	PCMCIA_DEVICE_PROD_ID12("ARGOSY", "PnPIDE", 0x78f308dc, 0x0c694728),
-	PCMCIA_DEVICE_PROD_ID12("CNF   ", "CD-ROM", 0x46d7db81, 0x66536591),
-	PCMCIA_DEVICE_PROD_ID12("CNF CD-M", "CD-ROM", 0x7d93b852, 0x66536591),
-	PCMCIA_DEVICE_PROD_ID12("Creative Technology Ltd.", "PCMCIA CD-ROM Interface Card", 0xff8c8a45, 0xfe8020c4),
-	PCMCIA_DEVICE_PROD_ID12("Digital Equipment Corporation.", "Digital Mobile Media CD-ROM", 0x17692a66, 0xef1dcbde),
-	PCMCIA_DEVICE_PROD_ID12("EXP", "CD+GAME", 0x6f58c983, 0x63c13aaf),
-	PCMCIA_DEVICE_PROD_ID12("EXP   ", "CD-ROM", 0x0a5c52fd, 0x66536591),
-	PCMCIA_DEVICE_PROD_ID12("EXP   ", "PnPIDE", 0x0a5c52fd, 0x0c694728),
-	PCMCIA_DEVICE_PROD_ID12("FREECOM", "PCCARD-IDE", 0x5714cbf7, 0x48e0ab8e),
-	PCMCIA_DEVICE_PROD_ID12("HITACHI", "FLASH", 0xf4f43949, 0x9eb86aae),
-	PCMCIA_DEVICE_PROD_ID12("HITACHI", "microdrive", 0xf4f43949, 0xa6d76178),
-	PCMCIA_DEVICE_PROD_ID12("Hyperstone", "Model1", 0x3d5b9ef5, 0xca6ab420),
-	PCMCIA_DEVICE_PROD_ID12("IBM", "microdrive", 0xb569a6e5, 0xa6d76178),
-	PCMCIA_DEVICE_PROD_ID12("IBM", "IBM17JSSFP20", 0xb569a6e5, 0xf2508753),
-	PCMCIA_DEVICE_PROD_ID12("KINGSTON", "CF CARD 1GB", 0x2e6d1829, 0x55d5bffb),
-	PCMCIA_DEVICE_PROD_ID12("KINGSTON", "CF CARD 4GB", 0x2e6d1829, 0x531e7d10),
-	PCMCIA_DEVICE_PROD_ID12("KINGSTON", "CF8GB", 0x2e6d1829, 0xacbe682e),
-	PCMCIA_DEVICE_PROD_ID12("IO DATA", "CBIDE2      ", 0x547e66dc, 0x8671043b),
-	PCMCIA_DEVICE_PROD_ID12("IO DATA", "PCIDE", 0x547e66dc, 0x5c5ab149),
-	PCMCIA_DEVICE_PROD_ID12("IO DATA", "PCIDEII", 0x547e66dc, 0xb3662674),
-	PCMCIA_DEVICE_PROD_ID12("LOOKMEET", "CBIDE2      ", 0xe37be2b5, 0x8671043b),
-	PCMCIA_DEVICE_PROD_ID12("M-Systems", "CF300", 0x7ed2ad87, 0x7e9e78ee),
-	PCMCIA_DEVICE_PROD_ID12("M-Systems", "CF500", 0x7ed2ad87, 0x7a13045c),
-	PCMCIA_DEVICE_PROD_ID2("NinjaATA-", 0xebe0bd79),
-	PCMCIA_DEVICE_PROD_ID12("PCMCIA", "CD-ROM", 0x281f1c5d, 0x66536591),
-	PCMCIA_DEVICE_PROD_ID12("PCMCIA", "PnPIDE", 0x281f1c5d, 0x0c694728),
-	PCMCIA_DEVICE_PROD_ID12("SHUTTLE TECHNOLOGY LTD.", "PCCARD-IDE/ATAPI Adapter", 0x4a3f0ba0, 0x322560e1),
-	PCMCIA_DEVICE_PROD_ID12("SEAGATE", "ST1", 0x87c1b330, 0xe1f30883),
-	PCMCIA_DEVICE_PROD_ID12("SAMSUNG", "04/05/06", 0x43d74cb4, 0x6a22777d),
-	PCMCIA_DEVICE_PROD_ID12("SMI VENDOR", "SMI PRODUCT", 0x30896c92, 0x703cc5f6),
-	PCMCIA_DEVICE_PROD_ID12("TOSHIBA", "MK2001MPL", 0xb4585a1a, 0x3489e003),
-	PCMCIA_DEVICE_PROD_ID1("TRANSCEND    512M   ", 0xd0909443),
-	PCMCIA_DEVICE_PROD_ID12("TRANSCEND", "TS1GCF45", 0x709b1bf1, 0xf68b6f32),
-	PCMCIA_DEVICE_PROD_ID12("TRANSCEND", "TS1GCF80", 0x709b1bf1, 0x2a54d4b1),
-	PCMCIA_DEVICE_PROD_ID12("TRANSCEND", "TS2GCF120", 0x709b1bf1, 0x969aa4f2),
-	PCMCIA_DEVICE_PROD_ID12("TRANSCEND", "TS4GCF120", 0x709b1bf1, 0xf54a91c8),
-	PCMCIA_DEVICE_PROD_ID12("TRANSCEND", "TS4GCF133", 0x709b1bf1, 0x7558f133),
-	PCMCIA_DEVICE_PROD_ID12("TRANSCEND", "TS8GCF133", 0x709b1bf1, 0xb2f89b47),
-	PCMCIA_DEVICE_PROD_ID12("WIT", "IDE16", 0x244e5994, 0x3e232852),
-	PCMCIA_DEVICE_PROD_ID12("WEIDA", "TWTTI", 0xcc7cf69c, 0x212bb918),
-	PCMCIA_DEVICE_PROD_ID1("STI Flash", 0xe4a13209),
-	PCMCIA_DEVICE_PROD_ID12("STI", "Flash 5.0", 0xbf2df18d, 0x8cb57a0e),
-	PCMCIA_MFC_DEVICE_PROD_ID12(1, "SanDisk", "ConnectPlus", 0x7a954bd9, 0x74be00c6),
-	PCMCIA_DEVICE_PROD_ID2("Flash Card", 0x5a362506),
-	PCMCIA_DEVICE_NULL,
-};
-MODULE_DEVICE_TABLE(pcmcia, ide_ids);
-
-static struct pcmcia_driver ide_cs_driver = {
-	.owner		= THIS_MODULE,
-	.name		= "ide-cs",
-	.probe		= ide_probe,
-	.remove		= ide_detach,
-	.id_table       = ide_ids,
-};
-
-static int __init init_ide_cs(void)
-{
-	return pcmcia_register_driver(&ide_cs_driver);
-}
-
-static void __exit exit_ide_cs(void)
-{
-	pcmcia_unregister_driver(&ide_cs_driver);
-}
-
-late_initcall(init_ide_cs);
-module_exit(exit_ide_cs);
diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
deleted file mode 100644
index ca1d4b3d38786..0000000000000
--- a/drivers/ide/ide-devsets.c
+++ /dev/null
@@ -1,192 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/ide.h>
-
-DEFINE_MUTEX(ide_setting_mtx);
-
-ide_devset_get(io_32bit, io_32bit);
-
-static int set_io_32bit(ide_drive_t *drive, int arg)
-{
-	if (drive->dev_flags & IDE_DFLAG_NO_IO_32BIT)
-		return -EPERM;
-
-	if (arg < 0 || arg > 1 + (SUPPORT_VLB_SYNC << 1))
-		return -EINVAL;
-
-	drive->io_32bit = arg;
-
-	return 0;
-}
-
-ide_devset_get_flag(ksettings, IDE_DFLAG_KEEP_SETTINGS);
-
-static int set_ksettings(ide_drive_t *drive, int arg)
-{
-	if (arg < 0 || arg > 1)
-		return -EINVAL;
-
-	if (arg)
-		drive->dev_flags |= IDE_DFLAG_KEEP_SETTINGS;
-	else
-		drive->dev_flags &= ~IDE_DFLAG_KEEP_SETTINGS;
-
-	return 0;
-}
-
-ide_devset_get_flag(using_dma, IDE_DFLAG_USING_DMA);
-
-static int set_using_dma(ide_drive_t *drive, int arg)
-{
-#ifdef CONFIG_BLK_DEV_IDEDMA
-	int err = -EPERM;
-
-	if (arg < 0 || arg > 1)
-		return -EINVAL;
-
-	if (ata_id_has_dma(drive->id) == 0)
-		goto out;
-
-	if (drive->hwif->dma_ops == NULL)
-		goto out;
-
-	err = 0;
-
-	if (arg) {
-		if (ide_set_dma(drive))
-			err = -EIO;
-	} else
-		ide_dma_off(drive);
-
-out:
-	return err;
-#else
-	if (arg < 0 || arg > 1)
-		return -EINVAL;
-
-	return -EPERM;
-#endif
-}
-
-/*
- * handle HDIO_SET_PIO_MODE ioctl abusers here, eventually it will go away
- */
-static int set_pio_mode_abuse(ide_hwif_t *hwif, u8 req_pio)
-{
-	switch (req_pio) {
-	case 202:
-	case 201:
-	case 200:
-	case 102:
-	case 101:
-	case 100:
-		return (hwif->host_flags & IDE_HFLAG_ABUSE_DMA_MODES) ? 1 : 0;
-	case 9:
-	case 8:
-		return (hwif->host_flags & IDE_HFLAG_ABUSE_PREFETCH) ? 1 : 0;
-	case 7:
-	case 6:
-		return (hwif->host_flags & IDE_HFLAG_ABUSE_FAST_DEVSEL) ? 1 : 0;
-	default:
-		return 0;
-	}
-}
-
-static int set_pio_mode(ide_drive_t *drive, int arg)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-
-	if (arg < 0 || arg > 255)
-		return -EINVAL;
-
-	if (port_ops == NULL || port_ops->set_pio_mode == NULL ||
-	    (hwif->host_flags & IDE_HFLAG_NO_SET_MODE))
-		return -ENOSYS;
-
-	if (set_pio_mode_abuse(drive->hwif, arg)) {
-		drive->pio_mode = arg + XFER_PIO_0;
-
-		if (arg == 8 || arg == 9) {
-			unsigned long flags;
-
-			/* take lock for IDE_DFLAG_[NO_]UNMASK/[NO_]IO_32BIT */
-			spin_lock_irqsave(&hwif->lock, flags);
-			port_ops->set_pio_mode(hwif, drive);
-			spin_unlock_irqrestore(&hwif->lock, flags);
-		} else
-			port_ops->set_pio_mode(hwif, drive);
-	} else {
-		int keep_dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
-
-		ide_set_pio(drive, arg);
-
-		if (hwif->host_flags & IDE_HFLAG_SET_PIO_MODE_KEEP_DMA) {
-			if (keep_dma)
-				ide_dma_on(drive);
-		}
-	}
-
-	return 0;
-}
-
-ide_devset_get_flag(unmaskirq, IDE_DFLAG_UNMASK);
-
-static int set_unmaskirq(ide_drive_t *drive, int arg)
-{
-	if (drive->dev_flags & IDE_DFLAG_NO_UNMASK)
-		return -EPERM;
-
-	if (arg < 0 || arg > 1)
-		return -EINVAL;
-
-	if (arg)
-		drive->dev_flags |= IDE_DFLAG_UNMASK;
-	else
-		drive->dev_flags &= ~IDE_DFLAG_UNMASK;
-
-	return 0;
-}
-
-ide_ext_devset_rw_sync(io_32bit, io_32bit);
-ide_ext_devset_rw_sync(keepsettings, ksettings);
-ide_ext_devset_rw_sync(unmaskirq, unmaskirq);
-ide_ext_devset_rw_sync(using_dma, using_dma);
-__IDE_DEVSET(pio_mode, DS_SYNC, NULL, set_pio_mode);
-
-int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting,
-		       int arg)
-{
-	struct request_queue *q = drive->queue;
-	struct request *rq;
-	int ret = 0;
-
-	if (!(setting->flags & DS_SYNC))
-		return setting->set(drive, arg);
-
-	rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
-	ide_req(rq)->type = ATA_PRIV_MISC;
-	scsi_req(rq)->cmd_len = 5;
-	scsi_req(rq)->cmd[0] = REQ_DEVSET_EXEC;
-	*(int *)&scsi_req(rq)->cmd[1] = arg;
-	ide_req(rq)->special = setting->set;
-
-	blk_execute_rq(NULL, rq, 0);
-	ret = scsi_req(rq)->result;
-	blk_put_request(rq);
-
-	return ret;
-}
-
-ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq)
-{
-	int err, (*setfunc)(ide_drive_t *, int) = ide_req(rq)->special;
-
-	err = setfunc(drive, *(int *)&scsi_req(rq)->cmd[1]);
-	if (err)
-		scsi_req(rq)->result = err;
-	ide_complete_rq(drive, 0, blk_rq_bytes(rq));
-	return ide_stopped;
-}
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
deleted file mode 100644
index 8413731c62598..0000000000000
--- a/drivers/ide/ide-disk.c
+++ /dev/null
@@ -1,795 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  Copyright (C) 1994-1998	   Linus Torvalds & authors (see below)
- *  Copyright (C) 1998-2002	   Linux ATA Development
- *				      Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2003		   Red Hat
- *  Copyright (C) 2003-2005, 2007  Bartlomiej Zolnierkiewicz
- */
-
-/*
- *  Mostly written by Mark Lord <mlord@pobox.com>
- *                and Gadi Oxman <gadio@netvision.net.il>
- *                and Andre Hedrick <andre@linux-ide.org>
- *
- * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/major.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/mutex.h>
-#include <linux/leds.h>
-#include <linux/ide.h>
-
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-#include <asm/io.h>
-#include <asm/div64.h>
-
-#include "ide-disk.h"
-
-static const u8 ide_rw_cmds[] = {
-	ATA_CMD_READ_MULTI,
-	ATA_CMD_WRITE_MULTI,
-	ATA_CMD_READ_MULTI_EXT,
-	ATA_CMD_WRITE_MULTI_EXT,
-	ATA_CMD_PIO_READ,
-	ATA_CMD_PIO_WRITE,
-	ATA_CMD_PIO_READ_EXT,
-	ATA_CMD_PIO_WRITE_EXT,
-	ATA_CMD_READ,
-	ATA_CMD_WRITE,
-	ATA_CMD_READ_EXT,
-	ATA_CMD_WRITE_EXT,
-};
-
-static void ide_tf_set_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 dma)
-{
-	u8 index, lba48, write;
-
-	lba48 = (cmd->tf_flags & IDE_TFLAG_LBA48) ? 2 : 0;
-	write = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 1 : 0;
-
-	if (dma) {
-		cmd->protocol = ATA_PROT_DMA;
-		index = 8;
-	} else {
-		cmd->protocol = ATA_PROT_PIO;
-		if (drive->mult_count) {
-			cmd->tf_flags |= IDE_TFLAG_MULTI_PIO;
-			index = 0;
-		} else
-			index = 4;
-	}
-
-	cmd->tf.command = ide_rw_cmds[index + lba48 + write];
-}
-
-/*
- * __ide_do_rw_disk() issues READ and WRITE commands to a disk,
- * using LBA if supported, or CHS otherwise, to address sectors.
- */
-static ide_startstop_t __ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
-					sector_t block)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	u16 nsectors		= (u16)blk_rq_sectors(rq);
-	u8 lba48		= !!(drive->dev_flags & IDE_DFLAG_LBA48);
-	u8 dma			= !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
-	struct ide_cmd		cmd;
-	struct ide_taskfile	*tf = &cmd.tf;
-	ide_startstop_t		rc;
-
-	if ((hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) && lba48 && dma) {
-		if (block + blk_rq_sectors(rq) > 1ULL << 28)
-			dma = 0;
-		else
-			lba48 = 0;
-	}
-
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-
-	if (drive->dev_flags & IDE_DFLAG_LBA) {
-		if (lba48) {
-			pr_debug("%s: LBA=0x%012llx\n", drive->name,
-					(unsigned long long)block);
-
-			tf->nsect  = nsectors & 0xff;
-			tf->lbal   = (u8) block;
-			tf->lbam   = (u8)(block >>  8);
-			tf->lbah   = (u8)(block >> 16);
-			tf->device = ATA_LBA;
-
-			tf = &cmd.hob;
-			tf->nsect = (nsectors >> 8) & 0xff;
-			tf->lbal  = (u8)(block >> 24);
-			if (sizeof(block) != 4) {
-				tf->lbam = (u8)((u64)block >> 32);
-				tf->lbah = (u8)((u64)block >> 40);
-			}
-
-			cmd.valid.out.hob = IDE_VALID_OUT_HOB;
-			cmd.valid.in.hob  = IDE_VALID_IN_HOB;
-			cmd.tf_flags |= IDE_TFLAG_LBA48;
-		} else {
-			tf->nsect  = nsectors & 0xff;
-			tf->lbal   = block;
-			tf->lbam   = block >>= 8;
-			tf->lbah   = block >>= 8;
-			tf->device = ((block >> 8) & 0xf) | ATA_LBA;
-		}
-	} else {
-		unsigned int sect, head, cyl, track;
-
-		track = (int)block / drive->sect;
-		sect  = (int)block % drive->sect + 1;
-		head  = track % drive->head;
-		cyl   = track / drive->head;
-
-		pr_debug("%s: CHS=%u/%u/%u\n", drive->name, cyl, head, sect);
-
-		tf->nsect  = nsectors & 0xff;
-		tf->lbal   = sect;
-		tf->lbam   = cyl;
-		tf->lbah   = cyl >> 8;
-		tf->device = head;
-	}
-
-	cmd.tf_flags |= IDE_TFLAG_FS;
-
-	if (rq_data_dir(rq))
-		cmd.tf_flags |= IDE_TFLAG_WRITE;
-
-	ide_tf_set_cmd(drive, &cmd, dma);
-	cmd.rq = rq;
-
-	if (dma == 0) {
-		ide_init_sg_cmd(&cmd, nsectors << 9);
-		ide_map_sg(drive, &cmd);
-	}
-
-	rc = do_rw_taskfile(drive, &cmd);
-
-	if (rc == ide_stopped && dma) {
-		/* fallback to PIO */
-		cmd.tf_flags |= IDE_TFLAG_DMA_PIO_FALLBACK;
-		ide_tf_set_cmd(drive, &cmd, 0);
-		ide_init_sg_cmd(&cmd, nsectors << 9);
-		rc = do_rw_taskfile(drive, &cmd);
-	}
-
-	return rc;
-}
-
-/*
- * 268435455  == 137439 MB or 28bit limit
- * 320173056  == 163929 MB or 48bit addressing
- * 1073741822 == 549756 MB or 48bit addressing fake drive
- */
-
-static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
-				      sector_t block)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED);
-	BUG_ON(blk_rq_is_passthrough(rq));
-
-	ledtrig_disk_activity(rq_data_dir(rq) == WRITE);
-
-	pr_debug("%s: %sing: block=%llu, sectors=%u\n",
-		 drive->name, rq_data_dir(rq) == READ ? "read" : "writ",
-		 (unsigned long long)block, blk_rq_sectors(rq));
-
-	if (hwif->rw_disk)
-		hwif->rw_disk(drive, rq);
-
-	return __ide_do_rw_disk(drive, rq, block);
-}
-
-/*
- * Queries for true maximum capacity of the drive.
- * Returns maximum LBA address (> 0) of the drive, 0 if failed.
- */
-static u64 idedisk_read_native_max_address(ide_drive_t *drive, int lba48)
-{
-	struct ide_cmd cmd;
-	struct ide_taskfile *tf = &cmd.tf;
-	u64 addr = 0;
-
-	memset(&cmd, 0, sizeof(cmd));
-	if (lba48)
-		tf->command = ATA_CMD_READ_NATIVE_MAX_EXT;
-	else
-		tf->command = ATA_CMD_READ_NATIVE_MAX;
-	tf->device  = ATA_LBA;
-
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	if (lba48) {
-		cmd.valid.out.hob = IDE_VALID_OUT_HOB;
-		cmd.valid.in.hob  = IDE_VALID_IN_HOB;
-		cmd.tf_flags = IDE_TFLAG_LBA48;
-	}
-
-	ide_no_data_taskfile(drive, &cmd);
-
-	/* if OK, compute maximum address value */
-	if (!(tf->status & ATA_ERR))
-		addr = ide_get_lba_addr(&cmd, lba48) + 1;
-
-	return addr;
-}
-
-/*
- * Sets maximum virtual LBA address of the drive.
- * Returns new maximum virtual LBA address (> 0) or 0 on failure.
- */
-static u64 idedisk_set_max_address(ide_drive_t *drive, u64 addr_req, int lba48)
-{
-	struct ide_cmd cmd;
-	struct ide_taskfile *tf = &cmd.tf;
-	u64 addr_set = 0;
-
-	addr_req--;
-
-	memset(&cmd, 0, sizeof(cmd));
-	tf->lbal     = (addr_req >>  0) & 0xff;
-	tf->lbam     = (addr_req >>= 8) & 0xff;
-	tf->lbah     = (addr_req >>= 8) & 0xff;
-	if (lba48) {
-		cmd.hob.lbal = (addr_req >>= 8) & 0xff;
-		cmd.hob.lbam = (addr_req >>= 8) & 0xff;
-		cmd.hob.lbah = (addr_req >>= 8) & 0xff;
-		tf->command  = ATA_CMD_SET_MAX_EXT;
-	} else {
-		tf->device   = (addr_req >>= 8) & 0x0f;
-		tf->command  = ATA_CMD_SET_MAX;
-	}
-	tf->device |= ATA_LBA;
-
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	if (lba48) {
-		cmd.valid.out.hob = IDE_VALID_OUT_HOB;
-		cmd.valid.in.hob  = IDE_VALID_IN_HOB;
-		cmd.tf_flags = IDE_TFLAG_LBA48;
-	}
-
-	ide_no_data_taskfile(drive, &cmd);
-
-	/* if OK, compute maximum address value */
-	if (!(tf->status & ATA_ERR))
-		addr_set = ide_get_lba_addr(&cmd, lba48) + 1;
-
-	return addr_set;
-}
-
-static unsigned long long sectors_to_MB(unsigned long long n)
-{
-	n <<= 9;		/* make it bytes */
-	do_div(n, 1000000);	/* make it MB */
-	return n;
-}
-
-/*
- * Some disks report total number of sectors instead of
- * maximum sector address.  We list them here.
- */
-static const struct drive_list_entry hpa_list[] = {
-	{ "ST340823A",	NULL },
-	{ "ST320413A",	NULL },
-	{ "ST310211A",	NULL },
-	{ NULL,		NULL }
-};
-
-static u64 ide_disk_hpa_get_native_capacity(ide_drive_t *drive, int lba48)
-{
-	u64 capacity, set_max;
-
-	capacity = drive->capacity64;
-	set_max  = idedisk_read_native_max_address(drive, lba48);
-
-	if (ide_in_drive_list(drive->id, hpa_list)) {
-		/*
-		 * Since we are inclusive wrt to firmware revisions do this
-		 * extra check and apply the workaround only when needed.
-		 */
-		if (set_max == capacity + 1)
-			set_max--;
-	}
-
-	return set_max;
-}
-
-static u64 ide_disk_hpa_set_capacity(ide_drive_t *drive, u64 set_max, int lba48)
-{
-	set_max = idedisk_set_max_address(drive, set_max, lba48);
-	if (set_max)
-		drive->capacity64 = set_max;
-
-	return set_max;
-}
-
-static void idedisk_check_hpa(ide_drive_t *drive)
-{
-	u64 capacity, set_max;
-	int lba48 = ata_id_lba48_enabled(drive->id);
-
-	capacity = drive->capacity64;
-	set_max  = ide_disk_hpa_get_native_capacity(drive, lba48);
-
-	if (set_max <= capacity)
-		return;
-
-	drive->probed_capacity = set_max;
-
-	printk(KERN_INFO "%s: Host Protected Area detected.\n"
-			 "\tcurrent capacity is %llu sectors (%llu MB)\n"
-			 "\tnative  capacity is %llu sectors (%llu MB)\n",
-			 drive->name,
-			 capacity, sectors_to_MB(capacity),
-			 set_max, sectors_to_MB(set_max));
-
-	if ((drive->dev_flags & IDE_DFLAG_NOHPA) == 0)
-		return;
-
-	set_max = ide_disk_hpa_set_capacity(drive, set_max, lba48);
-	if (set_max)
-		printk(KERN_INFO "%s: Host Protected Area disabled.\n",
-				 drive->name);
-}
-
-static int ide_disk_get_capacity(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-	int lba;
-
-	if (ata_id_lba48_enabled(id)) {
-		/* drive speaks 48-bit LBA */
-		lba = 1;
-		drive->capacity64 = ata_id_u64(id, ATA_ID_LBA_CAPACITY_2);
-	} else if (ata_id_has_lba(id) && ata_id_is_lba_capacity_ok(id)) {
-		/* drive speaks 28-bit LBA */
-		lba = 1;
-		drive->capacity64 = ata_id_u32(id, ATA_ID_LBA_CAPACITY);
-	} else {
-		/* drive speaks boring old 28-bit CHS */
-		lba = 0;
-		drive->capacity64 = drive->cyl * drive->head * drive->sect;
-	}
-
-	drive->probed_capacity = drive->capacity64;
-
-	if (lba) {
-		drive->dev_flags |= IDE_DFLAG_LBA;
-
-		/*
-		* If this device supports the Host Protected Area feature set,
-		* then we may need to change our opinion about its capacity.
-		*/
-		if (ata_id_hpa_enabled(id))
-			idedisk_check_hpa(drive);
-	}
-
-	/* limit drive capacity to 137GB if LBA48 cannot be used */
-	if ((drive->dev_flags & IDE_DFLAG_LBA48) == 0 &&
-	    drive->capacity64 > 1ULL << 28) {
-		printk(KERN_WARNING "%s: cannot use LBA48 - full capacity "
-		       "%llu sectors (%llu MB)\n",
-		       drive->name, (unsigned long long)drive->capacity64,
-		       sectors_to_MB(drive->capacity64));
-		drive->probed_capacity = drive->capacity64 = 1ULL << 28;
-	}
-
-	if ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA) &&
-	    (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		if (drive->capacity64 > 1ULL << 28) {
-			printk(KERN_INFO "%s: cannot use LBA48 DMA - PIO mode"
-					 " will be used for accessing sectors "
-					 "> %u\n", drive->name, 1 << 28);
-		} else
-			drive->dev_flags &= ~IDE_DFLAG_LBA48;
-	}
-
-	return 0;
-}
-
-static void ide_disk_unlock_native_capacity(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-	int lba48 = ata_id_lba48_enabled(id);
-
-	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 ||
-	    ata_id_hpa_enabled(id) == 0)
-		return;
-
-	/*
-	 * according to the spec the SET MAX ADDRESS command shall be
-	 * immediately preceded by a READ NATIVE MAX ADDRESS command
-	 */
-	if (!ide_disk_hpa_get_native_capacity(drive, lba48))
-		return;
-
-	if (ide_disk_hpa_set_capacity(drive, drive->probed_capacity, lba48))
-		drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */
-}
-
-static bool idedisk_prep_rq(ide_drive_t *drive, struct request *rq)
-{
-	struct ide_cmd *cmd;
-
-	if (req_op(rq) != REQ_OP_FLUSH)
-		return true;
-
-	if (ide_req(rq)->special) {
-		cmd = ide_req(rq)->special;
-		memset(cmd, 0, sizeof(*cmd));
-	} else {
-		cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
-	}
-
-	/* FIXME: map struct ide_taskfile on rq->cmd[] */
-	BUG_ON(cmd == NULL);
-
-	if (ata_id_flush_ext_enabled(drive->id) &&
-	    (drive->capacity64 >= (1UL << 28)))
-		cmd->tf.command = ATA_CMD_FLUSH_EXT;
-	else
-		cmd->tf.command = ATA_CMD_FLUSH;
-	cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd->tf_flags = IDE_TFLAG_DYN;
-	cmd->protocol = ATA_PROT_NODATA;
-	rq->cmd_flags &= ~REQ_OP_MASK;
-	rq->cmd_flags |= REQ_OP_DRV_OUT;
-	ide_req(rq)->type = ATA_PRIV_TASKFILE;
-	ide_req(rq)->special = cmd;
-	cmd->rq = rq;
-
-	return true;
-}
-
-ide_devset_get(multcount, mult_count);
-
-/*
- * This is tightly woven into the driver->do_special can not touch.
- * DON'T do it again until a total personality rewrite is committed.
- */
-static int set_multcount(ide_drive_t *drive, int arg)
-{
-	struct request *rq;
-
-	if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff))
-		return -EINVAL;
-
-	if (drive->special_flags & IDE_SFLAG_SET_MULTMODE)
-		return -EBUSY;
-
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
-	ide_req(rq)->type = ATA_PRIV_TASKFILE;
-
-	drive->mult_req = arg;
-	drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
-	blk_execute_rq(NULL, rq, 0);
-	blk_put_request(rq);
-
-	return (drive->mult_count == arg) ? 0 : -EIO;
-}
-
-ide_devset_get_flag(nowerr, IDE_DFLAG_NOWERR);
-
-static int set_nowerr(ide_drive_t *drive, int arg)
-{
-	if (arg < 0 || arg > 1)
-		return -EINVAL;
-
-	if (arg)
-		drive->dev_flags |= IDE_DFLAG_NOWERR;
-	else
-		drive->dev_flags &= ~IDE_DFLAG_NOWERR;
-
-	drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT;
-
-	return 0;
-}
-
-static int ide_do_setfeature(ide_drive_t *drive, u8 feature, u8 nsect)
-{
-	struct ide_cmd cmd;
-
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.tf.feature = feature;
-	cmd.tf.nsect   = nsect;
-	cmd.tf.command = ATA_CMD_SET_FEATURES;
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-
-	return ide_no_data_taskfile(drive, &cmd);
-}
-
-static void update_flush(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-	bool wc = false;
-
-	if (drive->dev_flags & IDE_DFLAG_WCACHE) {
-		unsigned long long capacity;
-		int barrier;
-		/*
-		 * We must avoid issuing commands a drive does not
-		 * understand or we may crash it. We check flush cache
-		 * is supported. We also check we have the LBA48 flush
-		 * cache if the drive capacity is too large. By this
-		 * time we have trimmed the drive capacity if LBA48 is
-		 * not available so we don't need to recheck that.
-		 */
-		capacity = ide_gd_capacity(drive);
-		barrier = ata_id_flush_enabled(id) &&
-			(drive->dev_flags & IDE_DFLAG_NOFLUSH) == 0 &&
-			((drive->dev_flags & IDE_DFLAG_LBA48) == 0 ||
-			 capacity <= (1ULL << 28) ||
-			 ata_id_flush_ext_enabled(id));
-
-		printk(KERN_INFO "%s: cache flushes %ssupported\n",
-		       drive->name, barrier ? "" : "not ");
-
-		if (barrier) {
-			wc = true;
-			drive->prep_rq = idedisk_prep_rq;
-		}
-	}
-
-	blk_queue_write_cache(drive->queue, wc, false);
-}
-
-ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE);
-
-static int set_wcache(ide_drive_t *drive, int arg)
-{
-	int err = 1;
-
-	if (arg < 0 || arg > 1)
-		return -EINVAL;
-
-	if (ata_id_flush_enabled(drive->id)) {
-		err = ide_do_setfeature(drive,
-			arg ? SETFEATURES_WC_ON : SETFEATURES_WC_OFF, 0);
-		if (err == 0) {
-			if (arg)
-				drive->dev_flags |= IDE_DFLAG_WCACHE;
-			else
-				drive->dev_flags &= ~IDE_DFLAG_WCACHE;
-		}
-	}
-
-	update_flush(drive);
-
-	return err;
-}
-
-static int do_idedisk_flushcache(ide_drive_t *drive)
-{
-	struct ide_cmd cmd;
-
-	memset(&cmd, 0, sizeof(cmd));
-	if (ata_id_flush_ext_enabled(drive->id))
-		cmd.tf.command = ATA_CMD_FLUSH_EXT;
-	else
-		cmd.tf.command = ATA_CMD_FLUSH;
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-
-	return ide_no_data_taskfile(drive, &cmd);
-}
-
-ide_devset_get(acoustic, acoustic);
-
-static int set_acoustic(ide_drive_t *drive, int arg)
-{
-	if (arg < 0 || arg > 254)
-		return -EINVAL;
-
-	ide_do_setfeature(drive,
-		arg ? SETFEATURES_AAM_ON : SETFEATURES_AAM_OFF, arg);
-
-	drive->acoustic = arg;
-
-	return 0;
-}
-
-ide_devset_get_flag(addressing, IDE_DFLAG_LBA48);
-
-/*
- * drive->addressing:
- *	0: 28-bit
- *	1: 48-bit
- *	2: 48-bit capable doing 28-bit
- */
-static int set_addressing(ide_drive_t *drive, int arg)
-{
-	if (arg < 0 || arg > 2)
-		return -EINVAL;
-
-	if (arg && ((drive->hwif->host_flags & IDE_HFLAG_NO_LBA48) ||
-	    ata_id_lba48_enabled(drive->id) == 0))
-		return -EIO;
-
-	if (arg == 2)
-		arg = 0;
-
-	if (arg)
-		drive->dev_flags |= IDE_DFLAG_LBA48;
-	else
-		drive->dev_flags &= ~IDE_DFLAG_LBA48;
-
-	return 0;
-}
-
-ide_ext_devset_rw(acoustic, acoustic);
-ide_ext_devset_rw(address, addressing);
-ide_ext_devset_rw(multcount, multcount);
-ide_ext_devset_rw(wcache, wcache);
-
-ide_ext_devset_rw_sync(nowerr, nowerr);
-
-static int ide_disk_check(ide_drive_t *drive, const char *s)
-{
-	return 1;
-}
-
-static void ide_disk_setup(ide_drive_t *drive)
-{
-	struct ide_disk_obj *idkp = drive->driver_data;
-	struct request_queue *q = drive->queue;
-	ide_hwif_t *hwif = drive->hwif;
-	u16 *id = drive->id;
-	char *m = (char *)&id[ATA_ID_PROD];
-	unsigned long long capacity;
-
-	ide_proc_register_driver(drive, idkp->driver);
-
-	if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0)
-		return;
-
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE) {
-		/*
-		 * Removable disks (eg. SYQUEST); ignore 'WD' drives
-		 */
-		if (m[0] != 'W' || m[1] != 'D')
-			drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
-	}
-
-	(void)set_addressing(drive, 1);
-
-	if (drive->dev_flags & IDE_DFLAG_LBA48) {
-		int max_s = 2048;
-
-		if (max_s > hwif->rqsize)
-			max_s = hwif->rqsize;
-
-		blk_queue_max_hw_sectors(q, max_s);
-	}
-
-	printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name,
-	       queue_max_sectors(q) / 2);
-
-	if (ata_id_is_ssd(id)) {
-		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
-		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
-	}
-
-	/* calculate drive capacity, and select LBA if possible */
-	ide_disk_get_capacity(drive);
-
-	/*
-	 * if possible, give fdisk access to more of the drive,
-	 * by correcting bios_cyls:
-	 */
-	capacity = ide_gd_capacity(drive);
-
-	if ((drive->dev_flags & IDE_DFLAG_FORCED_GEOM) == 0) {
-		if (ata_id_lba48_enabled(drive->id)) {
-			/* compatibility */
-			drive->bios_sect = 63;
-			drive->bios_head = 255;
-		}
-
-		if (drive->bios_sect && drive->bios_head) {
-			unsigned int cap0 = capacity; /* truncate to 32 bits */
-			unsigned int cylsz, cyl;
-
-			if (cap0 != capacity)
-				drive->bios_cyl = 65535;
-			else {
-				cylsz = drive->bios_sect * drive->bios_head;
-				cyl = cap0 / cylsz;
-				if (cyl > 65535)
-					cyl = 65535;
-				if (cyl > drive->bios_cyl)
-					drive->bios_cyl = cyl;
-			}
-		}
-	}
-	printk(KERN_INFO "%s: %llu sectors (%llu MB)",
-			 drive->name, capacity, sectors_to_MB(capacity));
-
-	/* Only print cache size when it was specified */
-	if (id[ATA_ID_BUF_SIZE])
-		printk(KERN_CONT " w/%dKiB Cache", id[ATA_ID_BUF_SIZE] / 2);
-
-	printk(KERN_CONT ", CHS=%d/%d/%d\n",
-			 drive->bios_cyl, drive->bios_head, drive->bios_sect);
-
-	/* write cache enabled? */
-	if ((id[ATA_ID_CSFO] & 1) || ata_id_wcache_enabled(id))
-		drive->dev_flags |= IDE_DFLAG_WCACHE;
-
-	set_wcache(drive, 1);
-
-	if ((drive->dev_flags & IDE_DFLAG_LBA) == 0 &&
-	    (drive->head == 0 || drive->head > 16))
-		printk(KERN_ERR "%s: invalid geometry: %d physical heads?\n",
-			drive->name, drive->head);
-}
-
-static void ide_disk_flush(ide_drive_t *drive)
-{
-	if (ata_id_flush_enabled(drive->id) == 0 ||
-	    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0)
-		return;
-
-	if (do_idedisk_flushcache(drive))
-		printk(KERN_INFO "%s: wcache flush failed!\n", drive->name);
-}
-
-static int ide_disk_init_media(ide_drive_t *drive, struct gendisk *disk)
-{
-	return 0;
-}
-
-static int ide_disk_set_doorlock(ide_drive_t *drive, struct gendisk *disk,
-				 int on)
-{
-	struct ide_cmd cmd;
-	int ret;
-
-	if ((drive->dev_flags & IDE_DFLAG_DOORLOCKING) == 0)
-		return 0;
-
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK;
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-
-	ret = ide_no_data_taskfile(drive, &cmd);
-
-	if (ret)
-		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-
-	return ret;
-}
-
-const struct ide_disk_ops ide_ata_disk_ops = {
-	.check			= ide_disk_check,
-	.unlock_native_capacity	= ide_disk_unlock_native_capacity,
-	.get_capacity		= ide_disk_get_capacity,
-	.setup			= ide_disk_setup,
-	.flush			= ide_disk_flush,
-	.init_media		= ide_disk_init_media,
-	.set_doorlock		= ide_disk_set_doorlock,
-	.do_request		= ide_do_rw_disk,
-	.ioctl			= ide_disk_ioctl,
-	.compat_ioctl		= ide_disk_ioctl,
-};
diff --git a/drivers/ide/ide-disk.h b/drivers/ide/ide-disk.h
deleted file mode 100644
index 0e8cc18bfda62..0000000000000
--- a/drivers/ide/ide-disk.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __IDE_DISK_H
-#define __IDE_DISK_H
-
-#include "ide-gd.h"
-
-#ifdef CONFIG_IDE_GD_ATA
-/* ide-disk.c */
-extern const struct ide_disk_ops ide_ata_disk_ops;
-ide_decl_devset(address);
-ide_decl_devset(multcount);
-ide_decl_devset(nowerr);
-ide_decl_devset(wcache);
-ide_decl_devset(acoustic);
-
-/* ide-disk_ioctl.c */
-int ide_disk_ioctl(ide_drive_t *, struct block_device *, fmode_t, unsigned int,
-		   unsigned long);
-
-#ifdef CONFIG_IDE_PROC_FS
-/* ide-disk_proc.c */
-extern ide_proc_entry_t ide_disk_proc[];
-extern const struct ide_proc_devset ide_disk_settings[];
-#endif
-#else
-#define ide_disk_proc		NULL
-#define ide_disk_settings	NULL
-#endif
-
-#endif /* __IDE_DISK_H */
diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c
deleted file mode 100644
index 2c45616cff4f1..0000000000000
--- a/drivers/ide/ide-disk_ioctl.c
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/ide.h>
-#include <linux/hdreg.h>
-#include <linux/mutex.h>
-
-#include "ide-disk.h"
-
-static DEFINE_MUTEX(ide_disk_ioctl_mutex);
-static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = {
-{ HDIO_GET_ADDRESS,	HDIO_SET_ADDRESS,   &ide_devset_address   },
-{ HDIO_GET_MULTCOUNT,	HDIO_SET_MULTCOUNT, &ide_devset_multcount },
-{ HDIO_GET_NOWERR,	HDIO_SET_NOWERR,    &ide_devset_nowerr	  },
-{ HDIO_GET_WCACHE,	HDIO_SET_WCACHE,    &ide_devset_wcache	  },
-{ HDIO_GET_ACOUSTIC,	HDIO_SET_ACOUSTIC,  &ide_devset_acoustic  },
-{ 0 }
-};
-
-int ide_disk_ioctl(ide_drive_t *drive, struct block_device *bdev, fmode_t mode,
-		   unsigned int cmd, unsigned long arg)
-{
-	int err;
-
-	mutex_lock(&ide_disk_ioctl_mutex);
-	err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings);
-	if (err != -EOPNOTSUPP)
-		goto out;
-
-	err = generic_ide_ioctl(drive, bdev, cmd, arg);
-out:
-	mutex_unlock(&ide_disk_ioctl_mutex);
-	return err;
-}
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
deleted file mode 100644
index 95d239b2f646d..0000000000000
--- a/drivers/ide/ide-disk_proc.c
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/ide.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/seq_file.h>
-
-#include "ide-disk.h"
-
-static int smart_enable(ide_drive_t *drive)
-{
-	struct ide_cmd cmd;
-	struct ide_taskfile *tf = &cmd.tf;
-
-	memset(&cmd, 0, sizeof(cmd));
-	tf->feature = ATA_SMART_ENABLE;
-	tf->lbam    = ATA_SMART_LBAM_PASS;
-	tf->lbah    = ATA_SMART_LBAH_PASS;
-	tf->command = ATA_CMD_SMART;
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-
-	return ide_no_data_taskfile(drive, &cmd);
-}
-
-static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd)
-{
-	struct ide_cmd cmd;
-	struct ide_taskfile *tf = &cmd.tf;
-
-	memset(&cmd, 0, sizeof(cmd));
-	tf->feature = sub_cmd;
-	tf->nsect   = 0x01;
-	tf->lbam    = ATA_SMART_LBAM_PASS;
-	tf->lbah    = ATA_SMART_LBAH_PASS;
-	tf->command = ATA_CMD_SMART;
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	cmd.protocol = ATA_PROT_PIO;
-
-	return ide_raw_taskfile(drive, &cmd, buf, 1);
-}
-
-static int idedisk_cache_proc_show(struct seq_file *m, void *v)
-{
-	ide_drive_t	*drive = (ide_drive_t *) m->private;
-
-	if (drive->dev_flags & IDE_DFLAG_ID_READ)
-		seq_printf(m, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2);
-	else
-		seq_printf(m, "(none)\n");
-	return 0;
-}
-
-static int idedisk_capacity_proc_show(struct seq_file *m, void *v)
-{
-	ide_drive_t*drive = (ide_drive_t *)m->private;
-
-	seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive));
-	return 0;
-}
-
-static int __idedisk_proc_show(struct seq_file *m, ide_drive_t *drive, u8 sub_cmd)
-{
-	u8 *buf;
-
-	buf = kmalloc(SECTOR_SIZE, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-
-	(void)smart_enable(drive);
-
-	if (get_smart_data(drive, buf, sub_cmd) == 0) {
-		__le16 *val = (__le16 *)buf;
-		int i;
-
-		for (i = 0; i < SECTOR_SIZE / 2; i++) {
-			seq_printf(m, "%04x%c", le16_to_cpu(val[i]),
-					(i % 8) == 7 ? '\n' : ' ');
-		}
-	}
-	kfree(buf);
-	return 0;
-}
-
-static int idedisk_sv_proc_show(struct seq_file *m, void *v)
-{
-	return __idedisk_proc_show(m, m->private, ATA_SMART_READ_VALUES);
-}
-
-static int idedisk_st_proc_show(struct seq_file *m, void *v)
-{
-	return __idedisk_proc_show(m, m->private, ATA_SMART_READ_THRESHOLDS);
-}
-
-ide_proc_entry_t ide_disk_proc[] = {
-	{ "cache",	  S_IFREG|S_IRUGO, idedisk_cache_proc_show	},
-	{ "capacity",	  S_IFREG|S_IRUGO, idedisk_capacity_proc_show	},
-	{ "geometry",	  S_IFREG|S_IRUGO, ide_geometry_proc_show	},
-	{ "smart_values", S_IFREG|S_IRUSR, idedisk_sv_proc_show		},
-	{ "smart_thresholds", S_IFREG|S_IRUSR, idedisk_st_proc_show	},
-	{}
-};
-
-ide_devset_rw_field(bios_cyl, bios_cyl);
-ide_devset_rw_field(bios_head, bios_head);
-ide_devset_rw_field(bios_sect, bios_sect);
-ide_devset_rw_field(failures, failures);
-ide_devset_rw_field(lun, lun);
-ide_devset_rw_field(max_failures, max_failures);
-
-const struct ide_proc_devset ide_disk_settings[] = {
-	IDE_PROC_DEVSET(acoustic,	0,   254),
-	IDE_PROC_DEVSET(address,	0,     2),
-	IDE_PROC_DEVSET(bios_cyl,	0, 65535),
-	IDE_PROC_DEVSET(bios_head,	0,   255),
-	IDE_PROC_DEVSET(bios_sect,	0,    63),
-	IDE_PROC_DEVSET(failures,	0, 65535),
-	IDE_PROC_DEVSET(lun,		0,     7),
-	IDE_PROC_DEVSET(max_failures,	0, 65535),
-	IDE_PROC_DEVSET(multcount,	0,    16),
-	IDE_PROC_DEVSET(nowerr,		0,     1),
-	IDE_PROC_DEVSET(wcache,		0,     1),
-	{ NULL },
-};
diff --git a/drivers/ide/ide-dma-sff.c b/drivers/ide/ide-dma-sff.c
deleted file mode 100644
index b7c2c0bd18b53..0000000000000
--- a/drivers/ide/ide-dma-sff.c
+++ /dev/null
@@ -1,336 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/ide.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/io.h>
-
-/**
- *	config_drive_for_dma	-	attempt to activate IDE DMA
- *	@drive: the drive to place in DMA mode
- *
- *	If the drive supports at least mode 2 DMA or UDMA of any kind
- *	then attempt to place it into DMA mode. Drives that are known to
- *	support DMA but predate the DMA properties or that are known
- *	to have DMA handling bugs are also set up appropriately based
- *	on the good/bad drive lists.
- */
-
-int config_drive_for_dma(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u16 *id = drive->id;
-
-	if (drive->media != ide_disk) {
-		if (hwif->host_flags & IDE_HFLAG_NO_ATAPI_DMA)
-			return 0;
-	}
-
-	/*
-	 * Enable DMA on any drive that has
-	 * UltraDMA (mode 0/1/2/3/4/5/6) enabled
-	 */
-	if ((id[ATA_ID_FIELD_VALID] & 4) &&
-	    ((id[ATA_ID_UDMA_MODES] >> 8) & 0x7f))
-		return 1;
-
-	/*
-	 * Enable DMA on any drive that has mode2 DMA
-	 * (multi or single) enabled
-	 */
-	if ((id[ATA_ID_MWDMA_MODES] & 0x404) == 0x404 ||
-	    (id[ATA_ID_SWDMA_MODES] & 0x404) == 0x404)
-		return 1;
-
-	/* Consult the list of known "good" drives */
-	if (ide_dma_good_drive(drive))
-		return 1;
-
-	return 0;
-}
-
-u8 ide_dma_sff_read_status(ide_hwif_t *hwif)
-{
-	unsigned long addr = hwif->dma_base + ATA_DMA_STATUS;
-
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		return readb((void __iomem *)addr);
-	else
-		return inb(addr);
-}
-EXPORT_SYMBOL_GPL(ide_dma_sff_read_status);
-
-static void ide_dma_sff_write_status(ide_hwif_t *hwif, u8 val)
-{
-	unsigned long addr = hwif->dma_base + ATA_DMA_STATUS;
-
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		writeb(val, (void __iomem *)addr);
-	else
-		outb(val, addr);
-}
-
-/**
- *	ide_dma_host_set	-	Enable/disable DMA on a host
- *	@drive: drive to control
- *
- *	Enable/disable DMA on an IDE controller following generic
- *	bus-mastering IDE controller behaviour.
- */
-
-void ide_dma_host_set(ide_drive_t *drive, int on)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 unit = drive->dn & 1;
-	u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
-
-	if (on)
-		dma_stat |= (1 << (5 + unit));
-	else
-		dma_stat &= ~(1 << (5 + unit));
-
-	ide_dma_sff_write_status(hwif, dma_stat);
-}
-EXPORT_SYMBOL_GPL(ide_dma_host_set);
-
-/**
- *	ide_build_dmatable	-	build IDE DMA table
- *
- *	ide_build_dmatable() prepares a dma request. We map the command
- *	to get the pci bus addresses of the buffers and then build up
- *	the PRD table that the IDE layer wants to be fed.
- *
- *	Most chipsets correctly interpret a length of 0x0000 as 64KB,
- *	but at least one (e.g. CS5530) misinterprets it as zero (!).
- *	So we break the 64KB entry into two 32KB entries instead.
- *
- *	Returns the number of built PRD entries if all went okay,
- *	returns 0 otherwise.
- *
- *	May also be invoked from trm290.c
- */
-
-int ide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	__le32 *table = (__le32 *)hwif->dmatable_cpu;
-	unsigned int count = 0;
-	int i;
-	struct scatterlist *sg;
-	u8 is_trm290 = !!(hwif->host_flags & IDE_HFLAG_TRM290);
-
-	for_each_sg(hwif->sg_table, sg, cmd->sg_nents, i) {
-		u32 cur_addr, cur_len, xcount, bcount;
-
-		cur_addr = sg_dma_address(sg);
-		cur_len = sg_dma_len(sg);
-
-		/*
-		 * Fill in the dma table, without crossing any 64kB boundaries.
-		 * Most hardware requires 16-bit alignment of all blocks,
-		 * but the trm290 requires 32-bit alignment.
-		 */
-
-		while (cur_len) {
-			if (count++ >= PRD_ENTRIES)
-				goto use_pio_instead;
-
-			bcount = 0x10000 - (cur_addr & 0xffff);
-			if (bcount > cur_len)
-				bcount = cur_len;
-			*table++ = cpu_to_le32(cur_addr);
-			xcount = bcount & 0xffff;
-			if (is_trm290)
-				xcount = ((xcount >> 2) - 1) << 16;
-			else if (xcount == 0x0000) {
-				if (count++ >= PRD_ENTRIES)
-					goto use_pio_instead;
-				*table++ = cpu_to_le32(0x8000);
-				*table++ = cpu_to_le32(cur_addr + 0x8000);
-				xcount = 0x8000;
-			}
-			*table++ = cpu_to_le32(xcount);
-			cur_addr += bcount;
-			cur_len -= bcount;
-		}
-	}
-
-	if (count) {
-		if (!is_trm290)
-			*--table |= cpu_to_le32(0x80000000);
-		return count;
-	}
-
-use_pio_instead:
-	printk(KERN_ERR "%s: %s\n", drive->name,
-		count ? "DMA table too small" : "empty DMA table?");
-
-	return 0; /* revert to PIO for this request */
-}
-EXPORT_SYMBOL_GPL(ide_build_dmatable);
-
-/**
- *	ide_dma_setup	-	begin a DMA phase
- *	@drive: target device
- *	@cmd: command
- *
- *	Build an IDE DMA PRD (IDE speak for scatter gather table)
- *	and then set up the DMA transfer registers for a device
- *	that follows generic IDE PCI DMA behaviour. Controllers can
- *	override this function if they need to
- *
- *	Returns 0 on success. If a PIO fallback is required then 1
- *	is returned.
- */
-
-int ide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
-	u8 rw = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 0 : ATA_DMA_WR;
-	u8 dma_stat;
-
-	/* fall back to pio! */
-	if (ide_build_dmatable(drive, cmd) == 0) {
-		ide_map_sg(drive, cmd);
-		return 1;
-	}
-
-	/* PRD table */
-	if (mmio)
-		writel(hwif->dmatable_dma,
-		       (void __iomem *)(hwif->dma_base + ATA_DMA_TABLE_OFS));
-	else
-		outl(hwif->dmatable_dma, hwif->dma_base + ATA_DMA_TABLE_OFS);
-
-	/* specify r/w */
-	if (mmio)
-		writeb(rw, (void __iomem *)(hwif->dma_base + ATA_DMA_CMD));
-	else
-		outb(rw, hwif->dma_base + ATA_DMA_CMD);
-
-	/* read DMA status for INTR & ERROR flags */
-	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
-
-	/* clear INTR & ERROR flags */
-	ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_dma_setup);
-
-/**
- *	ide_dma_sff_timer_expiry	-	handle a DMA timeout
- *	@drive: Drive that timed out
- *
- *	An IDE DMA transfer timed out. In the event of an error we ask
- *	the driver to resolve the problem, if a DMA transfer is still
- *	in progress we continue to wait (arguably we need to add a
- *	secondary 'I don't care what the drive thinks' timeout here)
- *	Finally if we have an interrupt we let it complete the I/O.
- *	But only one time - we clear expiry and if it's still not
- *	completed after WAIT_CMD, we error and retry in PIO.
- *	This can occur if an interrupt is lost or due to hang or bugs.
- */
-
-int ide_dma_sff_timer_expiry(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
-
-	printk(KERN_WARNING "%s: %s: DMA status (0x%02x)\n",
-		drive->name, __func__, dma_stat);
-
-	if ((dma_stat & 0x18) == 0x18)	/* BUSY Stupid Early Timer !! */
-		return WAIT_CMD;
-
-	hwif->expiry = NULL;	/* one free ride for now */
-
-	if (dma_stat & ATA_DMA_ERR)	/* ERROR */
-		return -1;
-
-	if (dma_stat & ATA_DMA_ACTIVE)	/* DMAing */
-		return WAIT_CMD;
-
-	if (dma_stat & ATA_DMA_INTR)	/* Got an Interrupt */
-		return WAIT_CMD;
-
-	return 0;	/* Status is unknown -- reset the bus */
-}
-EXPORT_SYMBOL_GPL(ide_dma_sff_timer_expiry);
-
-void ide_dma_start(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_cmd;
-
-	/* Note that this is done *after* the cmd has
-	 * been issued to the drive, as per the BM-IDE spec.
-	 * The Promise Ultra33 doesn't work correctly when
-	 * we do this part before issuing the drive cmd.
-	 */
-	if (hwif->host_flags & IDE_HFLAG_MMIO) {
-		dma_cmd = readb((void __iomem *)(hwif->dma_base + ATA_DMA_CMD));
-		writeb(dma_cmd | ATA_DMA_START,
-		       (void __iomem *)(hwif->dma_base + ATA_DMA_CMD));
-	} else {
-		dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
-		outb(dma_cmd | ATA_DMA_START, hwif->dma_base + ATA_DMA_CMD);
-	}
-}
-EXPORT_SYMBOL_GPL(ide_dma_start);
-
-/* returns 1 on error, 0 otherwise */
-int ide_dma_end(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat = 0, dma_cmd = 0;
-
-	/* stop DMA */
-	if (hwif->host_flags & IDE_HFLAG_MMIO) {
-		dma_cmd = readb((void __iomem *)(hwif->dma_base + ATA_DMA_CMD));
-		writeb(dma_cmd & ~ATA_DMA_START,
-		       (void __iomem *)(hwif->dma_base + ATA_DMA_CMD));
-	} else {
-		dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
-		outb(dma_cmd & ~ATA_DMA_START, hwif->dma_base + ATA_DMA_CMD);
-	}
-
-	/* get DMA status */
-	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
-
-	/* clear INTR & ERROR bits */
-	ide_dma_sff_write_status(hwif, dma_stat | ATA_DMA_ERR | ATA_DMA_INTR);
-
-#define CHECK_DMA_MASK (ATA_DMA_ACTIVE | ATA_DMA_ERR | ATA_DMA_INTR)
-
-	/* verify good DMA status */
-	if ((dma_stat & CHECK_DMA_MASK) != ATA_DMA_INTR)
-		return 0x10 | dma_stat;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_dma_end);
-
-/* returns 1 if dma irq issued, 0 otherwise */
-int ide_dma_test_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
-
-	return (dma_stat & ATA_DMA_INTR) ? 1 : 0;
-}
-EXPORT_SYMBOL_GPL(ide_dma_test_irq);
-
-const struct ide_dma_ops sff_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= ide_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-EXPORT_SYMBOL_GPL(sff_dma_ops);
diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
deleted file mode 100644
index 6f344654ef229..0000000000000
--- a/drivers/ide/ide-dma.c
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- *  IDE DMA support (including IDE PCI BM-DMA).
- *
- *  Copyright (C) 1995-1998   Mark Lord
- *  Copyright (C) 1999-2000   Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2004, 2007  Bartlomiej Zolnierkiewicz
- *
- *  May be copied or modified under the terms of the GNU General Public License
- *
- *  DMA is supported for all IDE devices (disk drives, cdroms, tapes, floppies).
- */
-
-/*
- *  Special Thanks to Mark for his Six years of work.
- */
-
-/*
- * Thanks to "Christopher J. Reimer" <reimer@doe.carleton.ca> for
- * fixing the problem with the BIOS on some Acer motherboards.
- *
- * Thanks to "Benoit Poulot-Cazajous" <poulot@chorus.fr> for testing
- * "TX" chipset compatibility and for providing patches for the "TX" chipset.
- *
- * Thanks to Christian Brunner <chb@muc.de> for taking a good first crack
- * at generic DMA -- his patches were referred to when preparing this code.
- *
- * Most importantly, thanks to Robert Bringman <rob@mars.trion.com>
- * for supplying a Promise UDMA board & WD UDMA drive for this work!
- */
-
-#include <linux/types.h>
-#include <linux/gfp.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/ide.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-
-static const struct drive_list_entry drive_whitelist[] = {
-	{ "Micropolis 2112A"	,       NULL		},
-	{ "CONNER CTMA 4000"	,       NULL		},
-	{ "CONNER CTT8000-A"	,       NULL		},
-	{ "ST34342A"		,	NULL		},
-	{ NULL			,	NULL		}
-};
-
-static const struct drive_list_entry drive_blacklist[] = {
-	{ "WDC AC11000H"	,	NULL 		},
-	{ "WDC AC22100H"	,	NULL 		},
-	{ "WDC AC32500H"	,	NULL 		},
-	{ "WDC AC33100H"	,	NULL 		},
-	{ "WDC AC31600H"	,	NULL 		},
-	{ "WDC AC32100H"	,	"24.09P07"	},
-	{ "WDC AC23200L"	,	"21.10N21"	},
-	{ "Compaq CRD-8241B"	,	NULL 		},
-	{ "CRD-8400B"		,	NULL 		},
-	{ "CRD-8480B",			NULL 		},
-	{ "CRD-8482B",			NULL 		},
-	{ "CRD-84"		,	NULL 		},
-	{ "SanDisk SDP3B"	,	NULL 		},
-	{ "SanDisk SDP3B-64"	,	NULL 		},
-	{ "SANYO CD-ROM CRD"	,	NULL 		},
-	{ "HITACHI CDR-8"	,	NULL 		},
-	{ "HITACHI CDR-8335"	,	NULL 		},
-	{ "HITACHI CDR-8435"	,	NULL 		},
-	{ "Toshiba CD-ROM XM-6202B"	,	NULL 		},
-	{ "TOSHIBA CD-ROM XM-1702BC",	NULL 		},
-	{ "CD-532E-A"		,	NULL 		},
-	{ "E-IDE CD-ROM CR-840",	NULL 		},
-	{ "CD-ROM Drive/F5A",	NULL 		},
-	{ "WPI CDD-820",		NULL 		},
-	{ "SAMSUNG CD-ROM SC-148C",	NULL 		},
-	{ "SAMSUNG CD-ROM SC",	NULL 		},
-	{ "ATAPI CD-ROM DRIVE 40X MAXIMUM",	NULL 		},
-	{ "_NEC DV5800A",               NULL            },
-	{ "SAMSUNG CD-ROM SN-124",	"N001" },
-	{ "Seagate STT20000A",		NULL  },
-	{ "CD-ROM CDR_U200",		"1.09" },
-	{ NULL			,	NULL		}
-
-};
-
-/**
- *	ide_dma_intr	-	IDE DMA interrupt handler
- *	@drive: the drive the interrupt is for
- *
- *	Handle an interrupt completing a read/write DMA transfer on an
- *	IDE device
- */
-
-ide_startstop_t ide_dma_intr(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_cmd *cmd = &hwif->cmd;
-	u8 stat = 0, dma_stat = 0;
-
-	drive->waiting_for_dma = 0;
-	dma_stat = hwif->dma_ops->dma_end(drive);
-	ide_dma_unmap_sg(drive, cmd);
-	stat = hwif->tp_ops->read_status(hwif);
-
-	if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) {
-		if (!dma_stat) {
-			if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
-				ide_finish_cmd(drive, cmd, stat);
-			else
-				ide_complete_rq(drive, BLK_STS_OK,
-						blk_rq_sectors(cmd->rq) << 9);
-			return ide_stopped;
-		}
-		printk(KERN_ERR "%s: %s: bad DMA status (0x%02x)\n",
-			drive->name, __func__, dma_stat);
-	}
-	return ide_error(drive, "dma_intr", stat);
-}
-
-int ide_dma_good_drive(ide_drive_t *drive)
-{
-	return ide_in_drive_list(drive->id, drive_whitelist);
-}
-
-/**
- *	ide_dma_map_sg	-	map IDE scatter gather for DMA I/O
- *	@drive: the drive to map the DMA table for
- *	@cmd: command
- *
- *	Perform the DMA mapping magic necessary to access the source or
- *	target buffers of a request via DMA.  The lower layers of the
- *	kernel provide the necessary cache management so that we can
- *	operate in a portable fashion.
- */
-
-static int ide_dma_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct scatterlist *sg = hwif->sg_table;
-	int i;
-
-	if (cmd->tf_flags & IDE_TFLAG_WRITE)
-		cmd->sg_dma_direction = DMA_TO_DEVICE;
-	else
-		cmd->sg_dma_direction = DMA_FROM_DEVICE;
-
-	i = dma_map_sg(hwif->dev, sg, cmd->sg_nents, cmd->sg_dma_direction);
-	if (i) {
-		cmd->orig_sg_nents = cmd->sg_nents;
-		cmd->sg_nents = i;
-	}
-
-	return i;
-}
-
-/**
- *	ide_dma_unmap_sg	-	clean up DMA mapping
- *	@drive: The drive to unmap
- *
- *	Teardown mappings after DMA has completed. This must be called
- *	after the completion of each use of ide_build_dmatable and before
- *	the next use of ide_build_dmatable. Failure to do so will cause
- *	an oops as only one mapping can be live for each target at a given
- *	time.
- */
-
-void ide_dma_unmap_sg(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	dma_unmap_sg(hwif->dev, hwif->sg_table, cmd->orig_sg_nents,
-		     cmd->sg_dma_direction);
-}
-EXPORT_SYMBOL_GPL(ide_dma_unmap_sg);
-
-/**
- *	ide_dma_off_quietly	-	Generic DMA kill
- *	@drive: drive to control
- *
- *	Turn off the current DMA on this IDE controller.
- */
-
-void ide_dma_off_quietly(ide_drive_t *drive)
-{
-	drive->dev_flags &= ~IDE_DFLAG_USING_DMA;
-
-	drive->hwif->dma_ops->dma_host_set(drive, 0);
-}
-EXPORT_SYMBOL(ide_dma_off_quietly);
-
-/**
- *	ide_dma_off	-	disable DMA on a device
- *	@drive: drive to disable DMA on
- *
- *	Disable IDE DMA for a device on this IDE controller.
- *	Inform the user that DMA has been disabled.
- */
-
-void ide_dma_off(ide_drive_t *drive)
-{
-	printk(KERN_INFO "%s: DMA disabled\n", drive->name);
-	ide_dma_off_quietly(drive);
-}
-EXPORT_SYMBOL(ide_dma_off);
-
-/**
- *	ide_dma_on		-	Enable DMA on a device
- *	@drive: drive to enable DMA on
- *
- *	Enable IDE DMA for a device on this IDE controller.
- */
-
-void ide_dma_on(ide_drive_t *drive)
-{
-	drive->dev_flags |= IDE_DFLAG_USING_DMA;
-
-	drive->hwif->dma_ops->dma_host_set(drive, 1);
-}
-
-int __ide_dma_bad_drive(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-
-	int blacklist = ide_in_drive_list(id, drive_blacklist);
-	if (blacklist) {
-		printk(KERN_WARNING "%s: Disabling (U)DMA for %s (blacklisted)\n",
-				    drive->name, (char *)&id[ATA_ID_PROD]);
-		return blacklist;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(__ide_dma_bad_drive);
-
-static const u8 xfer_mode_bases[] = {
-	XFER_UDMA_0,
-	XFER_MW_DMA_0,
-	XFER_SW_DMA_0,
-};
-
-static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode)
-{
-	u16 *id = drive->id;
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-	unsigned int mask = 0;
-
-	switch (base) {
-	case XFER_UDMA_0:
-		if ((id[ATA_ID_FIELD_VALID] & 4) == 0)
-			break;
-		mask = id[ATA_ID_UDMA_MODES];
-		if (port_ops && port_ops->udma_filter)
-			mask &= port_ops->udma_filter(drive);
-		else
-			mask &= hwif->ultra_mask;
-
-		/*
-		 * avoid false cable warning from eighty_ninty_three()
-		 */
-		if (req_mode > XFER_UDMA_2) {
-			if ((mask & 0x78) && (eighty_ninty_three(drive) == 0))
-				mask &= 0x07;
-		}
-		break;
-	case XFER_MW_DMA_0:
-		mask = id[ATA_ID_MWDMA_MODES];
-
-		/* Also look for the CF specific MWDMA modes... */
-		if (ata_id_is_cfa(id) && (id[ATA_ID_CFA_MODES] & 0x38)) {
-			u8 mode = ((id[ATA_ID_CFA_MODES] & 0x38) >> 3) - 1;
-
-			mask |= ((2 << mode) - 1) << 3;
-		}
-
-		if (port_ops && port_ops->mdma_filter)
-			mask &= port_ops->mdma_filter(drive);
-		else
-			mask &= hwif->mwdma_mask;
-		break;
-	case XFER_SW_DMA_0:
-		mask = id[ATA_ID_SWDMA_MODES];
-		if (!(mask & ATA_SWDMA2) && (id[ATA_ID_OLD_DMA_MODES] >> 8)) {
-			u8 mode = id[ATA_ID_OLD_DMA_MODES] >> 8;
-
-			/*
-			 * if the mode is valid convert it to the mask
-			 * (the maximum allowed mode is XFER_SW_DMA_2)
-			 */
-			if (mode <= 2)
-				mask = (2 << mode) - 1;
-		}
-		mask &= hwif->swdma_mask;
-		break;
-	default:
-		BUG();
-		break;
-	}
-
-	return mask;
-}
-
-/**
- *	ide_find_dma_mode	-	compute DMA speed
- *	@drive: IDE device
- *	@req_mode: requested mode
- *
- *	Checks the drive/host capabilities and finds the speed to use for
- *	the DMA transfer.  The speed is then limited by the requested mode.
- *
- *	Returns 0 if the drive/host combination is incapable of DMA transfers
- *	or if the requested mode is not a DMA mode.
- */
-
-u8 ide_find_dma_mode(ide_drive_t *drive, u8 req_mode)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned int mask;
-	int x, i;
-	u8 mode = 0;
-
-	if (drive->media != ide_disk) {
-		if (hwif->host_flags & IDE_HFLAG_NO_ATAPI_DMA)
-			return 0;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(xfer_mode_bases); i++) {
-		if (req_mode < xfer_mode_bases[i])
-			continue;
-		mask = ide_get_mode_mask(drive, xfer_mode_bases[i], req_mode);
-		x = fls(mask) - 1;
-		if (x >= 0) {
-			mode = xfer_mode_bases[i] + x;
-			break;
-		}
-	}
-
-	if (hwif->chipset == ide_acorn && mode == 0) {
-		/*
-		 * is this correct?
-		 */
-		if (ide_dma_good_drive(drive) &&
-		    drive->id[ATA_ID_EIDE_DMA_TIME] < 150)
-			mode = XFER_MW_DMA_1;
-	}
-
-	mode = min(mode, req_mode);
-
-	printk(KERN_INFO "%s: %s mode selected\n", drive->name,
-			  mode ? ide_xfer_verbose(mode) : "no DMA");
-
-	return mode;
-}
-
-static int ide_tune_dma(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 speed;
-
-	if (ata_id_has_dma(drive->id) == 0 ||
-	    (drive->dev_flags & IDE_DFLAG_NODMA))
-		return 0;
-
-	/* consult the list of known "bad" drives */
-	if (__ide_dma_bad_drive(drive))
-		return 0;
-
-	if (hwif->host_flags & IDE_HFLAG_TRUST_BIOS_FOR_DMA)
-		return config_drive_for_dma(drive);
-
-	speed = ide_max_dma_mode(drive);
-
-	if (!speed)
-		return 0;
-
-	if (ide_set_dma_mode(drive, speed))
-		return 0;
-
-	return 1;
-}
-
-static int ide_dma_check(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	if (ide_tune_dma(drive))
-		return 0;
-
-	/* TODO: always do PIO fallback */
-	if (hwif->host_flags & IDE_HFLAG_TRUST_BIOS_FOR_DMA)
-		return -1;
-
-	ide_set_max_pio(drive);
-
-	return -1;
-}
-
-int ide_set_dma(ide_drive_t *drive)
-{
-	int rc;
-
-	/*
-	 * Force DMAing for the beginning of the check.
-	 * Some chipsets appear to do interesting
-	 * things, if not checked and cleared.
-	 *   PARANOIA!!!
-	 */
-	ide_dma_off_quietly(drive);
-
-	rc = ide_dma_check(drive);
-	if (rc)
-		return rc;
-
-	ide_dma_on(drive);
-
-	return 0;
-}
-
-void ide_check_dma_crc(ide_drive_t *drive)
-{
-	u8 mode;
-
-	ide_dma_off_quietly(drive);
-	drive->crc_count = 0;
-	mode = drive->current_speed;
-	/*
-	 * Don't try non Ultra-DMA modes without iCRC's.  Force the
-	 * device to PIO and make the user enable SWDMA/MWDMA modes.
-	 */
-	if (mode > XFER_UDMA_0 && mode <= XFER_UDMA_7)
-		mode--;
-	else
-		mode = XFER_PIO_4;
-	ide_set_xfer_rate(drive, mode);
-	if (drive->current_speed >= XFER_SW_DMA_0)
-		ide_dma_on(drive);
-}
-
-void ide_dma_lost_irq(ide_drive_t *drive)
-{
-	printk(KERN_ERR "%s: DMA interrupt recovery\n", drive->name);
-}
-EXPORT_SYMBOL_GPL(ide_dma_lost_irq);
-
-/*
- * un-busy the port etc, and clear any pending DMA status. we want to
- * retry the current request in pio mode instead of risking tossing it
- * all away
- */
-ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_dma_ops *dma_ops = hwif->dma_ops;
-	struct ide_cmd *cmd = &hwif->cmd;
-	ide_startstop_t ret = ide_stopped;
-
-	/*
-	 * end current dma transaction
-	 */
-
-	if (error < 0) {
-		printk(KERN_WARNING "%s: DMA timeout error\n", drive->name);
-		drive->waiting_for_dma = 0;
-		(void)dma_ops->dma_end(drive);
-		ide_dma_unmap_sg(drive, cmd);
-		ret = ide_error(drive, "dma timeout error",
-				hwif->tp_ops->read_status(hwif));
-	} else {
-		printk(KERN_WARNING "%s: DMA timeout retry\n", drive->name);
-		if (dma_ops->dma_clear)
-			dma_ops->dma_clear(drive);
-		printk(KERN_ERR "%s: timeout waiting for DMA\n", drive->name);
-		if (dma_ops->dma_test_irq(drive) == 0) {
-			ide_dump_status(drive, "DMA timeout",
-					hwif->tp_ops->read_status(hwif));
-			drive->waiting_for_dma = 0;
-			(void)dma_ops->dma_end(drive);
-			ide_dma_unmap_sg(drive, cmd);
-		}
-	}
-
-	/*
-	 * disable dma for now, but remember that we did so because of
-	 * a timeout -- we'll reenable after we finish this next request
-	 * (or rather the first chunk of it) in pio.
-	 */
-	drive->dev_flags |= IDE_DFLAG_DMA_PIO_RETRY;
-	drive->retry_pio++;
-	ide_dma_off_quietly(drive);
-
-	/*
-	 * make sure request is sane
-	 */
-	if (hwif->rq)
-		scsi_req(hwif->rq)->result = 0;
-	return ret;
-}
-
-void ide_release_dma_engine(ide_hwif_t *hwif)
-{
-	if (hwif->dmatable_cpu) {
-		int prd_size = hwif->prd_max_nents * hwif->prd_ent_size;
-
-		dma_free_coherent(hwif->dev, prd_size,
-				  hwif->dmatable_cpu, hwif->dmatable_dma);
-		hwif->dmatable_cpu = NULL;
-	}
-}
-EXPORT_SYMBOL_GPL(ide_release_dma_engine);
-
-int ide_allocate_dma_engine(ide_hwif_t *hwif)
-{
-	int prd_size;
-
-	if (hwif->prd_max_nents == 0)
-		hwif->prd_max_nents = PRD_ENTRIES;
-	if (hwif->prd_ent_size == 0)
-		hwif->prd_ent_size = PRD_BYTES;
-
-	prd_size = hwif->prd_max_nents * hwif->prd_ent_size;
-
-	hwif->dmatable_cpu = dma_alloc_coherent(hwif->dev, prd_size,
-						&hwif->dmatable_dma,
-						GFP_ATOMIC);
-	if (hwif->dmatable_cpu == NULL) {
-		printk(KERN_ERR "%s: unable to allocate PRD table\n",
-			hwif->name);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_allocate_dma_engine);
-
-int ide_dma_prepare(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	const struct ide_dma_ops *dma_ops = drive->hwif->dma_ops;
-
-	if ((drive->dev_flags & IDE_DFLAG_USING_DMA) == 0 ||
-	    (dma_ops->dma_check && dma_ops->dma_check(drive, cmd)))
-		goto out;
-	ide_map_sg(drive, cmd);
-	if (ide_dma_map_sg(drive, cmd) == 0)
-		goto out_map;
-	if (dma_ops->dma_setup(drive, cmd))
-		goto out_dma_unmap;
-	drive->waiting_for_dma = 1;
-	return 0;
-out_dma_unmap:
-	ide_dma_unmap_sg(drive, cmd);
-out_map:
-	ide_map_sg(drive, cmd);
-out:
-	return 1;
-}
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
deleted file mode 100644
index 2f378213e9b5f..0000000000000
--- a/drivers/ide/ide-eh.c
+++ /dev/null
@@ -1,443 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/ide.h>
-#include <linux/delay.h>
-
-static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq,
-				     u8 stat, u8 err)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	if ((stat & ATA_BUSY) ||
-	    ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
-		/* other bits are useless when BUSY */
-		scsi_req(rq)->result |= ERROR_RESET;
-	} else if (stat & ATA_ERR) {
-		/* err has different meaning on cdrom and tape */
-		if (err == ATA_ABORTED) {
-			if ((drive->dev_flags & IDE_DFLAG_LBA) &&
-			    /* some newer drives don't support ATA_CMD_INIT_DEV_PARAMS */
-			    hwif->tp_ops->read_status(hwif) == ATA_CMD_INIT_DEV_PARAMS)
-				return ide_stopped;
-		} else if ((err & BAD_CRC) == BAD_CRC) {
-			/* UDMA crc error, just retry the operation */
-			drive->crc_count++;
-		} else if (err & (ATA_BBK | ATA_UNC)) {
-			/* retries won't help these */
-			scsi_req(rq)->result = ERROR_MAX;
-		} else if (err & ATA_TRK0NF) {
-			/* help it find track zero */
-			scsi_req(rq)->result |= ERROR_RECAL;
-		}
-	}
-
-	if ((stat & ATA_DRQ) && rq_data_dir(rq) == READ &&
-	    (hwif->host_flags & IDE_HFLAG_ERROR_STOPS_FIFO) == 0) {
-		int nsect = drive->mult_count ? drive->mult_count : 1;
-
-		ide_pad_transfer(drive, READ, nsect * SECTOR_SIZE);
-	}
-
-	if (scsi_req(rq)->result >= ERROR_MAX || blk_noretry_request(rq)) {
-		ide_kill_rq(drive, rq);
-		return ide_stopped;
-	}
-
-	if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
-		scsi_req(rq)->result |= ERROR_RESET;
-
-	if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) {
-		++scsi_req(rq)->result;
-		return ide_do_reset(drive);
-	}
-
-	if ((scsi_req(rq)->result & ERROR_RECAL) == ERROR_RECAL)
-		drive->special_flags |= IDE_SFLAG_RECALIBRATE;
-
-	++scsi_req(rq)->result;
-
-	return ide_stopped;
-}
-
-static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq,
-				       u8 stat, u8 err)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	if ((stat & ATA_BUSY) ||
-	    ((stat & ATA_DF) && (drive->dev_flags & IDE_DFLAG_NOWERR) == 0)) {
-		/* other bits are useless when BUSY */
-		scsi_req(rq)->result |= ERROR_RESET;
-	} else {
-		/* add decoding error stuff */
-	}
-
-	if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ))
-		/* force an abort */
-		hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE);
-
-	if (scsi_req(rq)->result >= ERROR_MAX) {
-		ide_kill_rq(drive, rq);
-	} else {
-		if ((scsi_req(rq)->result & ERROR_RESET) == ERROR_RESET) {
-			++scsi_req(rq)->result;
-			return ide_do_reset(drive);
-		}
-		++scsi_req(rq)->result;
-	}
-
-	return ide_stopped;
-}
-
-static ide_startstop_t __ide_error(ide_drive_t *drive, struct request *rq,
-				   u8 stat, u8 err)
-{
-	if (drive->media == ide_disk)
-		return ide_ata_error(drive, rq, stat, err);
-	return ide_atapi_error(drive, rq, stat, err);
-}
-
-/**
- *	ide_error	-	handle an error on the IDE
- *	@drive: drive the error occurred on
- *	@msg: message to report
- *	@stat: status bits
- *
- *	ide_error() takes action based on the error returned by the drive.
- *	For normal I/O that may well include retries. We deal with
- *	both new-style (taskfile) and old style command handling here.
- *	In the case of taskfile command handling there is work left to
- *	do
- */
-
-ide_startstop_t ide_error(ide_drive_t *drive, const char *msg, u8 stat)
-{
-	struct request *rq;
-	u8 err;
-
-	err = ide_dump_status(drive, msg, stat);
-
-	rq = drive->hwif->rq;
-	if (rq == NULL)
-		return ide_stopped;
-
-	/* retry only "normal" I/O: */
-	if (blk_rq_is_passthrough(rq)) {
-		if (ata_taskfile_request(rq)) {
-			struct ide_cmd *cmd = ide_req(rq)->special;
-
-			if (cmd)
-				ide_complete_cmd(drive, cmd, stat, err);
-		} else if (ata_pm_request(rq)) {
-			scsi_req(rq)->result = 1;
-			ide_complete_pm_rq(drive, rq);
-			return ide_stopped;
-		}
-		scsi_req(rq)->result = err;
-		ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
-		return ide_stopped;
-	}
-
-	return __ide_error(drive, rq, stat, err);
-}
-EXPORT_SYMBOL_GPL(ide_error);
-
-static inline void ide_complete_drive_reset(ide_drive_t *drive, blk_status_t err)
-{
-	struct request *rq = drive->hwif->rq;
-
-	if (rq && ata_misc_request(rq) &&
-	    scsi_req(rq)->cmd[0] == REQ_DRIVE_RESET) {
-		if (err <= 0 && scsi_req(rq)->result == 0)
-			scsi_req(rq)->result = -EIO;
-		ide_complete_rq(drive, err, blk_rq_bytes(rq));
-	}
-}
-
-/* needed below */
-static ide_startstop_t do_reset1(ide_drive_t *, int);
-
-/*
- * atapi_reset_pollfunc() gets invoked to poll the interface for completion
- * every 50ms during an atapi drive reset operation.  If the drive has not yet
- * responded, and we have not yet hit our maximum waiting time, then the timer
- * is restarted for another 50ms.
- */
-static ide_startstop_t atapi_reset_pollfunc(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	u8 stat;
-
-	tp_ops->dev_select(drive);
-	udelay(10);
-	stat = tp_ops->read_status(hwif);
-
-	if (OK_STAT(stat, 0, ATA_BUSY))
-		printk(KERN_INFO "%s: ATAPI reset complete\n", drive->name);
-	else {
-		if (time_before(jiffies, hwif->poll_timeout)) {
-			ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20);
-			/* continue polling */
-			return ide_started;
-		}
-		/* end of polling */
-		hwif->polling = 0;
-		printk(KERN_ERR "%s: ATAPI reset timed-out, status=0x%02x\n",
-			drive->name, stat);
-		/* do it the old fashioned way */
-		return do_reset1(drive, 1);
-	}
-	/* done polling */
-	hwif->polling = 0;
-	ide_complete_drive_reset(drive, BLK_STS_OK);
-	return ide_stopped;
-}
-
-static void ide_reset_report_error(ide_hwif_t *hwif, u8 err)
-{
-	static const char *err_master_vals[] =
-		{ NULL, "passed", "formatter device error",
-		  "sector buffer error", "ECC circuitry error",
-		  "controlling MPU error" };
-
-	u8 err_master = err & 0x7f;
-
-	printk(KERN_ERR "%s: reset: master: ", hwif->name);
-	if (err_master && err_master < 6)
-		printk(KERN_CONT "%s", err_master_vals[err_master]);
-	else
-		printk(KERN_CONT "error (0x%02x?)", err);
-	if (err & 0x80)
-		printk(KERN_CONT "; slave: failed");
-	printk(KERN_CONT "\n");
-}
-
-/*
- * reset_pollfunc() gets invoked to poll the interface for completion every 50ms
- * during an ide reset operation. If the drives have not yet responded,
- * and we have not yet hit our maximum waiting time, then the timer is restarted
- * for another 50ms.
- */
-static ide_startstop_t reset_pollfunc(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-	u8 tmp;
-	blk_status_t err = BLK_STS_OK;
-
-	if (port_ops && port_ops->reset_poll) {
-		err = port_ops->reset_poll(drive);
-		if (err) {
-			printk(KERN_ERR "%s: host reset_poll failure for %s.\n",
-				hwif->name, drive->name);
-			goto out;
-		}
-	}
-
-	tmp = hwif->tp_ops->read_status(hwif);
-
-	if (!OK_STAT(tmp, 0, ATA_BUSY)) {
-		if (time_before(jiffies, hwif->poll_timeout)) {
-			ide_set_handler(drive, &reset_pollfunc, HZ/20);
-			/* continue polling */
-			return ide_started;
-		}
-		printk(KERN_ERR "%s: reset timed-out, status=0x%02x\n",
-			hwif->name, tmp);
-		drive->failures++;
-		err = BLK_STS_IOERR;
-	} else  {
-		tmp = ide_read_error(drive);
-
-		if (tmp == 1) {
-			printk(KERN_INFO "%s: reset: success\n", hwif->name);
-			drive->failures = 0;
-		} else {
-			ide_reset_report_error(hwif, tmp);
-			drive->failures++;
-			err = BLK_STS_IOERR;
-		}
-	}
-out:
-	hwif->polling = 0;	/* done polling */
-	ide_complete_drive_reset(drive, err);
-	return ide_stopped;
-}
-
-static void ide_disk_pre_reset(ide_drive_t *drive)
-{
-	int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1;
-
-	drive->special_flags =
-		legacy ? (IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE) : 0;
-
-	drive->mult_count = 0;
-	drive->dev_flags &= ~IDE_DFLAG_PARKED;
-
-	if ((drive->dev_flags & IDE_DFLAG_KEEP_SETTINGS) == 0 &&
-	    (drive->dev_flags & IDE_DFLAG_USING_DMA) == 0)
-		drive->mult_req = 0;
-
-	if (drive->mult_req != drive->mult_count)
-		drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
-}
-
-static void pre_reset(ide_drive_t *drive)
-{
-	const struct ide_port_ops *port_ops = drive->hwif->port_ops;
-
-	if (drive->media == ide_disk)
-		ide_disk_pre_reset(drive);
-	else
-		drive->dev_flags |= IDE_DFLAG_POST_RESET;
-
-	if (drive->dev_flags & IDE_DFLAG_USING_DMA) {
-		if (drive->crc_count)
-			ide_check_dma_crc(drive);
-		else
-			ide_dma_off(drive);
-	}
-
-	if ((drive->dev_flags & IDE_DFLAG_KEEP_SETTINGS) == 0) {
-		if ((drive->dev_flags & IDE_DFLAG_USING_DMA) == 0) {
-			drive->dev_flags &= ~IDE_DFLAG_UNMASK;
-			drive->io_32bit = 0;
-		}
-		return;
-	}
-
-	if (port_ops && port_ops->pre_reset)
-		port_ops->pre_reset(drive);
-
-	if (drive->current_speed != 0xff)
-		drive->desired_speed = drive->current_speed;
-	drive->current_speed = 0xff;
-}
-
-/*
- * do_reset1() attempts to recover a confused drive by resetting it.
- * Unfortunately, resetting a disk drive actually resets all devices on
- * the same interface, so it can really be thought of as resetting the
- * interface rather than resetting the drive.
- *
- * ATAPI devices have their own reset mechanism which allows them to be
- * individually reset without clobbering other devices on the same interface.
- *
- * Unfortunately, the IDE interface does not generate an interrupt to let
- * us know when the reset operation has finished, so we must poll for this.
- * Equally poor, though, is the fact that this may a very long time to complete,
- * (up to 30 seconds worstcase).  So, instead of busy-waiting here for it,
- * we set a timer to poll at 50ms intervals.
- */
-static ide_startstop_t do_reset1(ide_drive_t *drive, int do_not_try_atapi)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_io_ports *io_ports = &hwif->io_ports;
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	const struct ide_port_ops *port_ops;
-	ide_drive_t *tdrive;
-	unsigned long flags, timeout;
-	int i;
-	DEFINE_WAIT(wait);
-
-	spin_lock_irqsave(&hwif->lock, flags);
-
-	/* We must not reset with running handlers */
-	BUG_ON(hwif->handler != NULL);
-
-	/* For an ATAPI device, first try an ATAPI SRST. */
-	if (drive->media != ide_disk && !do_not_try_atapi) {
-		pre_reset(drive);
-		tp_ops->dev_select(drive);
-		udelay(20);
-		tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
-		ndelay(400);
-		hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
-		hwif->polling = 1;
-		__ide_set_handler(drive, &atapi_reset_pollfunc, HZ/20);
-		spin_unlock_irqrestore(&hwif->lock, flags);
-		return ide_started;
-	}
-
-	/* We must not disturb devices in the IDE_DFLAG_PARKED state. */
-	do {
-		unsigned long now;
-
-		prepare_to_wait(&ide_park_wq, &wait, TASK_UNINTERRUPTIBLE);
-		timeout = jiffies;
-		ide_port_for_each_present_dev(i, tdrive, hwif) {
-			if ((tdrive->dev_flags & IDE_DFLAG_PARKED) &&
-			    time_after(tdrive->sleep, timeout))
-				timeout = tdrive->sleep;
-		}
-
-		now = jiffies;
-		if (time_before_eq(timeout, now))
-			break;
-
-		spin_unlock_irqrestore(&hwif->lock, flags);
-		timeout = schedule_timeout_uninterruptible(timeout - now);
-		spin_lock_irqsave(&hwif->lock, flags);
-	} while (timeout);
-	finish_wait(&ide_park_wq, &wait);
-
-	/*
-	 * First, reset any device state data we were maintaining
-	 * for any of the drives on this interface.
-	 */
-	ide_port_for_each_dev(i, tdrive, hwif)
-		pre_reset(tdrive);
-
-	if (io_ports->ctl_addr == 0) {
-		spin_unlock_irqrestore(&hwif->lock, flags);
-		ide_complete_drive_reset(drive, BLK_STS_IOERR);
-		return ide_stopped;
-	}
-
-	/*
-	 * Note that we also set nIEN while resetting the device,
-	 * to mask unwanted interrupts from the interface during the reset.
-	 * However, due to the design of PC hardware, this will cause an
-	 * immediate interrupt due to the edge transition it produces.
-	 * This single interrupt gives us a "fast poll" for drives that
-	 * recover from reset very quickly, saving us the first 50ms wait time.
-	 */
-	/* set SRST and nIEN */
-	tp_ops->write_devctl(hwif, ATA_SRST | ATA_NIEN | ATA_DEVCTL_OBS);
-	/* more than enough time */
-	udelay(10);
-	/* clear SRST, leave nIEN (unless device is on the quirk list) */
-	tp_ops->write_devctl(hwif,
-		((drive->dev_flags & IDE_DFLAG_NIEN_QUIRK) ? 0 : ATA_NIEN) |
-		 ATA_DEVCTL_OBS);
-	/* more than enough time */
-	udelay(10);
-	hwif->poll_timeout = jiffies + WAIT_WORSTCASE;
-	hwif->polling = 1;
-	__ide_set_handler(drive, &reset_pollfunc, HZ/20);
-
-	/*
-	 * Some weird controller like resetting themselves to a strange
-	 * state when the disks are reset this way. At least, the Winbond
-	 * 553 documentation says that
-	 */
-	port_ops = hwif->port_ops;
-	if (port_ops && port_ops->resetproc)
-		port_ops->resetproc(drive);
-
-	spin_unlock_irqrestore(&hwif->lock, flags);
-	return ide_started;
-}
-
-/*
- * ide_do_reset() is the entry point to the drive/interface reset code.
- */
-
-ide_startstop_t ide_do_reset(ide_drive_t *drive)
-{
-	return do_reset1(drive, 0);
-}
-EXPORT_SYMBOL(ide_do_reset);
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
deleted file mode 100644
index f5a2870aaf54b..0000000000000
--- a/drivers/ide/ide-floppy.c
+++ /dev/null
@@ -1,551 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * IDE ATAPI floppy driver.
- *
- * Copyright (C) 1996-1999  Gadi Oxman <gadio@netvision.net.il>
- * Copyright (C) 2000-2002  Paul Bristow <paul@paulbristow.net>
- * Copyright (C) 2005       Bartlomiej Zolnierkiewicz
- *
- * This driver supports the following IDE floppy drives:
- *
- * LS-120/240 SuperDisk
- * Iomega Zip 100/250
- * Iomega PC Card Clik!/PocketZip
- *
- * For a historical changelog see
- * Documentation/ide/ChangeLog.ide-floppy.1996-2002
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/compat.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/major.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/cdrom.h>
-#include <linux/ide.h>
-#include <linux/hdreg.h>
-#include <linux/bitops.h>
-#include <linux/mutex.h>
-#include <linux/scatterlist.h>
-
-#include <scsi/scsi_ioctl.h>
-
-#include <asm/byteorder.h>
-#include <linux/uaccess.h>
-#include <linux/io.h>
-#include <asm/unaligned.h>
-
-#include "ide-floppy.h"
-
-/*
- * After each failed packet command we issue a request sense command and retry
- * the packet command IDEFLOPPY_MAX_PC_RETRIES times.
- */
-#define IDEFLOPPY_MAX_PC_RETRIES	3
-
-/* format capacities descriptor codes */
-#define CAPACITY_INVALID	0x00
-#define CAPACITY_UNFORMATTED	0x01
-#define CAPACITY_CURRENT	0x02
-#define CAPACITY_NO_CARTRIDGE	0x03
-
-/*
- * The following delay solves a problem with ATAPI Zip 100 drive where BSY bit
- * was apparently being deasserted before the unit was ready to receive data.
- */
-#define IDEFLOPPY_PC_DELAY	(HZ/20)	/* default delay for ZIP 100 (50ms) */
-
-static int ide_floppy_callback(ide_drive_t *drive, int dsc)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_atapi_pc *pc = drive->pc;
-	struct request *rq = pc->rq;
-	int uptodate = pc->error ? 0 : 1;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	if (drive->failed_pc == pc)
-		drive->failed_pc = NULL;
-
-	if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
-	    blk_rq_is_scsi(rq))
-		uptodate = 1; /* FIXME */
-	else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
-
-		u8 *buf = bio_data(rq->bio);
-
-		if (!pc->error) {
-			floppy->sense_key = buf[2] & 0x0F;
-			floppy->asc = buf[12];
-			floppy->ascq = buf[13];
-			floppy->progress_indication = buf[15] & 0x80 ?
-				(u16)get_unaligned((u16 *)&buf[16]) : 0x10000;
-
-			if (drive->failed_pc)
-				ide_debug_log(IDE_DBG_PC, "pc = %x",
-					      drive->failed_pc->c[0]);
-
-			ide_debug_log(IDE_DBG_SENSE, "sense key = %x, asc = %x,"
-				      "ascq = %x", floppy->sense_key,
-				      floppy->asc, floppy->ascq);
-		} else
-			printk(KERN_ERR PFX "Error in REQUEST SENSE itself - "
-			       "Aborting request!\n");
-	}
-
-	if (ata_misc_request(rq))
-		scsi_req(rq)->result = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
-
-	return uptodate;
-}
-
-static void ide_floppy_report_error(struct ide_disk_obj *floppy,
-				    struct ide_atapi_pc *pc)
-{
-	/* suppress error messages resulting from Medium not present */
-	if (floppy->sense_key == 0x02 &&
-	    floppy->asc       == 0x3a &&
-	    floppy->ascq      == 0x00)
-		return;
-
-	printk(KERN_ERR PFX "%s: I/O error, pc = %2x, key = %2x, "
-			"asc = %2x, ascq = %2x\n",
-			floppy->drive->name, pc->c[0], floppy->sense_key,
-			floppy->asc, floppy->ascq);
-
-}
-
-static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
-					   struct ide_cmd *cmd,
-					   struct ide_atapi_pc *pc)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-
-	if (drive->failed_pc == NULL &&
-	    pc->c[0] != GPCMD_REQUEST_SENSE)
-		drive->failed_pc = pc;
-
-	/* Set the current packet command */
-	drive->pc = pc;
-
-	if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) {
-		unsigned int done = blk_rq_bytes(drive->hwif->rq);
-
-		if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR))
-			ide_floppy_report_error(floppy, pc);
-
-		/* Giving up */
-		pc->error = IDE_DRV_ERROR_GENERAL;
-
-		drive->failed_pc = NULL;
-		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, BLK_STS_IOERR, done);
-		return ide_stopped;
-	}
-
-	ide_debug_log(IDE_DBG_FUNC, "retry #%d", pc->retries);
-
-	pc->retries++;
-
-	return ide_issue_pc(drive, cmd);
-}
-
-void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc)
-{
-	ide_init_pc(pc);
-	pc->c[0] = GPCMD_READ_FORMAT_CAPACITIES;
-	pc->c[7] = 255;
-	pc->c[8] = 255;
-	pc->req_xfer = 255;
-}
-
-/* A mode sense command is used to "sense" floppy parameters. */
-void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code)
-{
-	u16 length = 8; /* sizeof(Mode Parameter Header) = 8 Bytes */
-
-	ide_init_pc(pc);
-	pc->c[0] = GPCMD_MODE_SENSE_10;
-	pc->c[1] = 0;
-	pc->c[2] = page_code;
-
-	switch (page_code) {
-	case IDEFLOPPY_CAPABILITIES_PAGE:
-		length += 12;
-		break;
-	case IDEFLOPPY_FLEXIBLE_DISK_PAGE:
-		length += 32;
-		break;
-	default:
-		printk(KERN_ERR PFX "unsupported page code in %s\n", __func__);
-	}
-	put_unaligned(cpu_to_be16(length), (u16 *) &pc->c[7]);
-	pc->req_xfer = length;
-}
-
-static void idefloppy_create_rw_cmd(ide_drive_t *drive,
-				    struct ide_atapi_pc *pc, struct request *rq,
-				    unsigned long sector)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	int block = sector / floppy->bs_factor;
-	int blocks = blk_rq_sectors(rq) / floppy->bs_factor;
-	int cmd = rq_data_dir(rq);
-
-	ide_debug_log(IDE_DBG_FUNC, "block: %d, blocks: %d", block, blocks);
-
-	ide_init_pc(pc);
-	pc->c[0] = cmd == READ ? GPCMD_READ_10 : GPCMD_WRITE_10;
-	put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]);
-	put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]);
-
-	memcpy(scsi_req(rq)->cmd, pc->c, 12);
-
-	pc->rq = rq;
-	if (cmd == WRITE)
-		pc->flags |= PC_FLAG_WRITING;
-
-	pc->flags |= PC_FLAG_DMA_OK;
-}
-
-static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
-		struct ide_atapi_pc *pc, struct request *rq)
-{
-	ide_init_pc(pc);
-	memcpy(pc->c, scsi_req(rq)->cmd, sizeof(pc->c));
-	pc->rq = rq;
-	if (blk_rq_bytes(rq)) {
-		pc->flags |= PC_FLAG_DMA_OK;
-		if (rq_data_dir(rq) == WRITE)
-			pc->flags |= PC_FLAG_WRITING;
-	}
-}
-
-static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
-					     struct request *rq, sector_t block)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	struct ide_cmd cmd;
-	struct ide_atapi_pc *pc;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter, cmd: 0x%x\n", rq->cmd[0]);
-
-	if (drive->debug_mask & IDE_DBG_RQ)
-		blk_dump_rq_flags(rq, (rq->rq_disk
-					? rq->rq_disk->disk_name
-					: "dev?"));
-
-	if (scsi_req(rq)->result >= ERROR_MAX) {
-		if (drive->failed_pc) {
-			ide_floppy_report_error(floppy, drive->failed_pc);
-			drive->failed_pc = NULL;
-		} else
-			printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
-
-		if (ata_misc_request(rq)) {
-			scsi_req(rq)->result = 0;
-			ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
-			return ide_stopped;
-		} else
-			goto out_end;
-	}
-
-	switch (req_op(rq)) {
-	default:
-		if (((long)blk_rq_pos(rq) % floppy->bs_factor) ||
-		    (blk_rq_sectors(rq) % floppy->bs_factor)) {
-			printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
-				drive->name);
-			goto out_end;
-		}
-		pc = &floppy->queued_pc;
-		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-		break;
-	case REQ_OP_SCSI_IN:
-	case REQ_OP_SCSI_OUT:
-		pc = &floppy->queued_pc;
-		idefloppy_blockpc_cmd(floppy, pc, rq);
-		break;
-	case REQ_OP_DRV_IN:
-	case REQ_OP_DRV_OUT:
-		switch (ide_req(rq)->type) {
-		case ATA_PRIV_MISC:
-		case ATA_PRIV_SENSE:
-			pc = (struct ide_atapi_pc *)ide_req(rq)->special;
-			break;
-		default:
-			BUG();
-		}
-	}
-
-	ide_prep_sense(drive, rq);
-
-	memset(&cmd, 0, sizeof(cmd));
-
-	if (rq_data_dir(rq))
-		cmd.tf_flags |= IDE_TFLAG_WRITE;
-
-	cmd.rq = rq;
-
-	if (!blk_rq_is_passthrough(rq) || blk_rq_bytes(rq)) {
-		ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
-		ide_map_sg(drive, &cmd);
-	}
-
-	pc->rq = rq;
-
-	return ide_floppy_issue_pc(drive, &cmd, pc);
-out_end:
-	drive->failed_pc = NULL;
-	if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
-		scsi_req(rq)->result = -EIO;
-	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
-	return ide_stopped;
-}
-
-/*
- * Look at the flexible disk page parameters. We ignore the CHS capacity
- * parameters and use the LBA parameters instead.
- */
-static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive,
-					     struct ide_atapi_pc *pc)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	struct gendisk *disk = floppy->disk;
-	u8 *page, buf[40];
-	int capacity, lba_capacity;
-	u16 transfer_rate, sector_size, cyls, rpm;
-	u8 heads, sectors;
-
-	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE);
-
-	if (ide_queue_pc_tail(drive, disk, pc, buf, pc->req_xfer)) {
-		printk(KERN_ERR PFX "Can't get flexible disk page params\n");
-		return 1;
-	}
-
-	if (buf[3] & 0x80)
-		drive->dev_flags |= IDE_DFLAG_WP;
-	else
-		drive->dev_flags &= ~IDE_DFLAG_WP;
-
-	set_disk_ro(disk, !!(drive->dev_flags & IDE_DFLAG_WP));
-
-	page = &buf[8];
-
-	transfer_rate = be16_to_cpup((__be16 *)&buf[8 + 2]);
-	sector_size   = be16_to_cpup((__be16 *)&buf[8 + 6]);
-	cyls          = be16_to_cpup((__be16 *)&buf[8 + 8]);
-	rpm           = be16_to_cpup((__be16 *)&buf[8 + 28]);
-	heads         = buf[8 + 4];
-	sectors       = buf[8 + 5];
-
-	capacity = cyls * heads * sectors * sector_size;
-
-	if (memcmp(page, &floppy->flexible_disk_page, 32))
-		printk(KERN_INFO PFX "%s: %dkB, %d/%d/%d CHS, %d kBps, "
-				"%d sector size, %d rpm\n",
-				drive->name, capacity / 1024, cyls, heads,
-				sectors, transfer_rate / 8, sector_size, rpm);
-
-	memcpy(&floppy->flexible_disk_page, page, 32);
-	drive->bios_cyl = cyls;
-	drive->bios_head = heads;
-	drive->bios_sect = sectors;
-	lba_capacity = floppy->blocks * floppy->block_size;
-
-	if (capacity < lba_capacity) {
-		printk(KERN_NOTICE PFX "%s: The disk reports a capacity of %d "
-			"bytes, but the drive only handles %d\n",
-			drive->name, lba_capacity, capacity);
-		floppy->blocks = floppy->block_size ?
-			capacity / floppy->block_size : 0;
-		drive->capacity64 = floppy->blocks * floppy->bs_factor;
-	}
-
-	return 0;
-}
-
-/*
- * Determine if a media is present in the floppy drive, and if so, its LBA
- * capacity.
- */
-static int ide_floppy_get_capacity(ide_drive_t *drive)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	struct gendisk *disk = floppy->disk;
-	struct ide_atapi_pc pc;
-	u8 *cap_desc;
-	u8 pc_buf[256], header_len, desc_cnt;
-	int i, rc = 1, blocks, length;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	drive->bios_cyl = 0;
-	drive->bios_head = drive->bios_sect = 0;
-	floppy->blocks = 0;
-	floppy->bs_factor = 1;
-	drive->capacity64 = 0;
-
-	ide_floppy_create_read_capacity_cmd(&pc);
-	if (ide_queue_pc_tail(drive, disk, &pc, pc_buf, pc.req_xfer)) {
-		printk(KERN_ERR PFX "Can't get floppy parameters\n");
-		return 1;
-	}
-	header_len = pc_buf[3];
-	cap_desc = &pc_buf[4];
-	desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */
-
-	for (i = 0; i < desc_cnt; i++) {
-		unsigned int desc_start = 4 + i*8;
-
-		blocks = be32_to_cpup((__be32 *)&pc_buf[desc_start]);
-		length = be16_to_cpup((__be16 *)&pc_buf[desc_start + 6]);
-
-		ide_debug_log(IDE_DBG_PROBE, "Descriptor %d: %dkB, %d blocks, "
-					     "%d sector size",
-					     i, blocks * length / 1024,
-					     blocks, length);
-
-		if (i)
-			continue;
-		/*
-		 * the code below is valid only for the 1st descriptor, ie i=0
-		 */
-
-		switch (pc_buf[desc_start + 4] & 0x03) {
-		/* Clik! drive returns this instead of CAPACITY_CURRENT */
-		case CAPACITY_UNFORMATTED:
-			if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
-				/*
-				 * If it is not a clik drive, break out
-				 * (maintains previous driver behaviour)
-				 */
-				break;
-			fallthrough;
-		case CAPACITY_CURRENT:
-			/* Normal Zip/LS-120 disks */
-			if (memcmp(cap_desc, &floppy->cap_desc, 8))
-				printk(KERN_INFO PFX "%s: %dkB, %d blocks, %d "
-				       "sector size\n",
-				       drive->name, blocks * length / 1024,
-				       blocks, length);
-			memcpy(&floppy->cap_desc, cap_desc, 8);
-
-			if (!length || length % 512) {
-				printk(KERN_NOTICE PFX "%s: %d bytes block size"
-				       " not supported\n", drive->name, length);
-			} else {
-				floppy->blocks = blocks;
-				floppy->block_size = length;
-				floppy->bs_factor = length / 512;
-				if (floppy->bs_factor != 1)
-					printk(KERN_NOTICE PFX "%s: Warning: "
-					       "non 512 bytes block size not "
-					       "fully supported\n",
-					       drive->name);
-				drive->capacity64 =
-					floppy->blocks * floppy->bs_factor;
-				rc = 0;
-			}
-			break;
-		case CAPACITY_NO_CARTRIDGE:
-			/*
-			 * This is a KERN_ERR so it appears on screen
-			 * for the user to see
-			 */
-			printk(KERN_ERR PFX "%s: No disk in drive\n",
-			       drive->name);
-			break;
-		case CAPACITY_INVALID:
-			printk(KERN_ERR PFX "%s: Invalid capacity for disk "
-				"in drive\n", drive->name);
-			break;
-		}
-		ide_debug_log(IDE_DBG_PROBE, "Descriptor 0 Code: %d",
-					     pc_buf[desc_start + 4] & 0x03);
-	}
-
-	/* Clik! disk does not support get_flexible_disk_page */
-	if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE))
-		(void) ide_floppy_get_flexible_disk_page(drive, &pc);
-
-	return rc;
-}
-
-static void ide_floppy_setup(ide_drive_t *drive)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	u16 *id = drive->id;
-
-	drive->pc_callback	 = ide_floppy_callback;
-
-	/*
-	 * We used to check revisions here. At this point however I'm giving up.
-	 * Just assume they are all broken, its easier.
-	 *
-	 * The actual reason for the workarounds was likely a driver bug after
-	 * all rather than a firmware bug, and the workaround below used to hide
-	 * it. It should be fixed as of version 1.9, but to be on the safe side
-	 * we'll leave the limitation below for the 2.2.x tree.
-	 */
-	if (strstarts((char *)&id[ATA_ID_PROD], "IOMEGA ZIP 100 ATAPI")) {
-		drive->atapi_flags |= IDE_AFLAG_ZIP_DRIVE;
-		/* This value will be visible in the /proc/ide/hdx/settings */
-		drive->pc_delay = IDEFLOPPY_PC_DELAY;
-		blk_queue_max_hw_sectors(drive->queue, 64);
-	}
-
-	/*
-	 * Guess what? The IOMEGA Clik! drive also needs the above fix. It makes
-	 * nasty clicking noises without it, so please don't remove this.
-	 */
-	if (strstarts((char *)&id[ATA_ID_PROD], "IOMEGA Clik!")) {
-		blk_queue_max_hw_sectors(drive->queue, 64);
-		drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE;
-		/* IOMEGA Clik! drives do not support lock/unlock commands */
-		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-	}
-
-	(void) ide_floppy_get_capacity(drive);
-
-	ide_proc_register_driver(drive, floppy->driver);
-}
-
-static void ide_floppy_flush(ide_drive_t *drive)
-{
-}
-
-static int ide_floppy_init_media(ide_drive_t *drive, struct gendisk *disk)
-{
-	int ret = 0;
-
-	if (ide_do_test_unit_ready(drive, disk))
-		ide_do_start_stop(drive, disk, 1);
-
-	ret = ide_floppy_get_capacity(drive);
-
-	set_capacity(disk, ide_gd_capacity(drive));
-
-	return ret;
-}
-
-const struct ide_disk_ops ide_atapi_disk_ops = {
-	.check		= ide_check_atapi_device,
-	.get_capacity	= ide_floppy_get_capacity,
-	.setup		= ide_floppy_setup,
-	.flush		= ide_floppy_flush,
-	.init_media	= ide_floppy_init_media,
-	.set_doorlock	= ide_set_media_lock,
-	.do_request	= ide_floppy_do_request,
-	.ioctl		= ide_floppy_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= ide_floppy_compat_ioctl,
-#endif
-};
diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h
deleted file mode 100644
index 8505a5f58f4e8..0000000000000
--- a/drivers/ide/ide-floppy.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __IDE_FLOPPY_H
-#define __IDE_FLOPPY_H
-
-#include "ide-gd.h"
-
-#ifdef CONFIG_IDE_GD_ATAPI
-/*
- * Pages of the SELECT SENSE / MODE SENSE packet commands.
- * See SFF-8070i spec.
- */
-#define	IDEFLOPPY_CAPABILITIES_PAGE	0x1b
-#define IDEFLOPPY_FLEXIBLE_DISK_PAGE	0x05
-
-/* IOCTLs used in low-level formatting. */
-#define	IDEFLOPPY_IOCTL_FORMAT_SUPPORTED	0x4600
-#define	IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY	0x4601
-#define	IDEFLOPPY_IOCTL_FORMAT_START		0x4602
-#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS	0x4603
-
-/* ide-floppy.c */
-extern const struct ide_disk_ops ide_atapi_disk_ops;
-void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8);
-void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *);
-
-/* ide-floppy_ioctl.c */
-int ide_floppy_ioctl(ide_drive_t *, struct block_device *, fmode_t,
-		     unsigned int, unsigned long);
-int ide_floppy_compat_ioctl(ide_drive_t *, struct block_device *, fmode_t,
-			    unsigned int, unsigned long);
-
-#ifdef CONFIG_IDE_PROC_FS
-/* ide-floppy_proc.c */
-extern ide_proc_entry_t ide_floppy_proc[];
-extern const struct ide_proc_devset ide_floppy_settings[];
-#endif
-#else
-#define ide_floppy_proc		NULL
-#define ide_floppy_settings	NULL
-#endif
-
-#endif /*__IDE_FLOPPY_H */
diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
deleted file mode 100644
index 39a790ac6cc30..0000000000000
--- a/drivers/ide/ide-floppy_ioctl.c
+++ /dev/null
@@ -1,339 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ide-floppy IOCTLs handling.
- */
-
-#include <linux/kernel.h>
-#include <linux/ide.h>
-#include <linux/compat.h>
-#include <linux/cdrom.h>
-#include <linux/mutex.h>
-
-#include <asm/unaligned.h>
-
-#include <scsi/scsi_ioctl.h>
-
-#include "ide-floppy.h"
-
-/*
- * Obtain the list of formattable capacities.
- * Very similar to ide_floppy_get_capacity, except that we push the capacity
- * descriptors to userland, instead of our own structures.
- *
- * Userland gives us the following structure:
- *
- * struct idefloppy_format_capacities {
- *	int nformats;
- *	struct {
- *		int nblocks;
- *		int blocksize;
- *	} formats[];
- * };
- *
- * userland initializes nformats to the number of allocated formats[] records.
- * On exit we set nformats to the number of records we've actually initialized.
- */
-
-static DEFINE_MUTEX(ide_floppy_ioctl_mutex);
-static int ide_floppy_get_format_capacities(ide_drive_t *drive,
-					    struct ide_atapi_pc *pc,
-					    int __user *arg)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	int i, blocks, length, u_array_size, u_index;
-	int __user *argp;
-	u8 pc_buf[256], header_len, desc_cnt;
-
-	if (get_user(u_array_size, arg))
-		return -EFAULT;
-
-	if (u_array_size <= 0)
-		return -EINVAL;
-
-	ide_floppy_create_read_capacity_cmd(pc);
-
-	if (ide_queue_pc_tail(drive, floppy->disk, pc, pc_buf, pc->req_xfer)) {
-		printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n");
-		return -EIO;
-	}
-
-	header_len = pc_buf[3];
-	desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */
-
-	u_index = 0;
-	argp = arg + 1;
-
-	/*
-	 * We always skip the first capacity descriptor.  That's the current
-	 * capacity.  We are interested in the remaining descriptors, the
-	 * formattable capacities.
-	 */
-	for (i = 1; i < desc_cnt; i++) {
-		unsigned int desc_start = 4 + i*8;
-
-		if (u_index >= u_array_size)
-			break;	/* User-supplied buffer too small */
-
-		blocks = be32_to_cpup((__be32 *)&pc_buf[desc_start]);
-		length = be16_to_cpup((__be16 *)&pc_buf[desc_start + 6]);
-
-		if (put_user(blocks, argp))
-			return -EFAULT;
-
-		++argp;
-
-		if (put_user(length, argp))
-			return -EFAULT;
-
-		++argp;
-
-		++u_index;
-	}
-
-	if (put_user(u_index, arg))
-		return -EFAULT;
-
-	return 0;
-}
-
-static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc,
-					      u8 *buf, int b, int l,
-					      int flags)
-{
-	ide_init_pc(pc);
-	pc->c[0] = GPCMD_FORMAT_UNIT;
-	pc->c[1] = 0x17;
-
-	memset(buf, 0, 12);
-	buf[1] = 0xA2;
-	/* Default format list header, u8 1: FOV/DCRT/IMM bits set */
-
-	if (flags & 1)				/* Verify bit on... */
-		buf[1] ^= 0x20;			/* ... turn off DCRT bit */
-	buf[3] = 8;
-
-	put_unaligned(cpu_to_be32(b), (unsigned int *)(&buf[4]));
-	put_unaligned(cpu_to_be32(l), (unsigned int *)(&buf[8]));
-	pc->req_xfer = 12;
-	pc->flags |= PC_FLAG_WRITING;
-}
-
-static int ide_floppy_get_sfrp_bit(ide_drive_t *drive, struct ide_atapi_pc *pc)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	u8 buf[20];
-
-	drive->atapi_flags &= ~IDE_AFLAG_SRFP;
-
-	ide_floppy_create_mode_sense_cmd(pc, IDEFLOPPY_CAPABILITIES_PAGE);
-	pc->flags |= PC_FLAG_SUPPRESS_ERROR;
-
-	if (ide_queue_pc_tail(drive, floppy->disk, pc, buf, pc->req_xfer))
-		return 1;
-
-	if (buf[8 + 2] & 0x40)
-		drive->atapi_flags |= IDE_AFLAG_SRFP;
-
-	return 0;
-}
-
-static int ide_floppy_format_unit(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				  int __user *arg)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	u8 buf[12];
-	int blocks, length, flags, err = 0;
-
-	if (floppy->openers > 1) {
-		/* Don't format if someone is using the disk */
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-		return -EBUSY;
-	}
-
-	drive->dev_flags |= IDE_DFLAG_FORMAT_IN_PROGRESS;
-
-	/*
-	 * Send ATAPI_FORMAT_UNIT to the drive.
-	 *
-	 * Userland gives us the following structure:
-	 *
-	 * struct idefloppy_format_command {
-	 *        int nblocks;
-	 *        int blocksize;
-	 *        int flags;
-	 *        } ;
-	 *
-	 * flags is a bitmask, currently, the only defined flag is:
-	 *
-	 *        0x01 - verify media after format.
-	 */
-	if (get_user(blocks, arg) ||
-			get_user(length, arg+1) ||
-			get_user(flags, arg+2)) {
-		err = -EFAULT;
-		goto out;
-	}
-
-	ide_floppy_get_sfrp_bit(drive, pc);
-	ide_floppy_create_format_unit_cmd(pc, buf, blocks, length, flags);
-
-	if (ide_queue_pc_tail(drive, floppy->disk, pc, buf, pc->req_xfer))
-		err = -EIO;
-
-out:
-	if (err)
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-	return err;
-}
-
-/*
- * Get ATAPI_FORMAT_UNIT progress indication.
- *
- * Userland gives a pointer to an int.  The int is set to a progress
- * indicator 0-65536, with 65536=100%.
- *
- * If the drive does not support format progress indication, we just check
- * the dsc bit, and return either 0 or 65536.
- */
-
-static int ide_floppy_get_format_progress(ide_drive_t *drive,
-					  struct ide_atapi_pc *pc,
-					  int __user *arg)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	u8 sense_buf[18];
-	int progress_indication = 0x10000;
-
-	if (drive->atapi_flags & IDE_AFLAG_SRFP) {
-		ide_create_request_sense_cmd(drive, pc);
-		if (ide_queue_pc_tail(drive, floppy->disk, pc, sense_buf,
-				      pc->req_xfer))
-			return -EIO;
-
-		if (floppy->sense_key == 2 &&
-		    floppy->asc == 4 &&
-		    floppy->ascq == 4)
-			progress_indication = floppy->progress_indication;
-
-		/* Else assume format_unit has finished, and we're at 0x10000 */
-	} else {
-		ide_hwif_t *hwif = drive->hwif;
-		unsigned long flags;
-		u8 stat;
-
-		local_irq_save(flags);
-		stat = hwif->tp_ops->read_status(hwif);
-		local_irq_restore(flags);
-
-		progress_indication = ((stat & ATA_DSC) == 0) ? 0 : 0x10000;
-	}
-
-	if (put_user(progress_indication, arg))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc,
-			       unsigned long arg, unsigned int cmd)
-{
-	struct ide_disk_obj *floppy = drive->driver_data;
-	struct gendisk *disk = floppy->disk;
-	int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0;
-
-	if (floppy->openers > 1)
-		return -EBUSY;
-
-	ide_set_media_lock(drive, disk, prevent);
-
-	if (cmd == CDROMEJECT)
-		ide_do_start_stop(drive, disk, 2);
-
-	return 0;
-}
-
-static int ide_floppy_format_ioctl(ide_drive_t *drive, struct ide_atapi_pc *pc,
-				   fmode_t mode, unsigned int cmd,
-				   void __user *argp)
-{
-	switch (cmd) {
-	case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED:
-		return 0;
-	case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY:
-		return ide_floppy_get_format_capacities(drive, pc, argp);
-	case IDEFLOPPY_IOCTL_FORMAT_START:
-		if (!(mode & FMODE_WRITE))
-			return -EPERM;
-		return ide_floppy_format_unit(drive, pc, (int __user *)argp);
-	case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS:
-		return ide_floppy_get_format_progress(drive, pc, argp);
-	default:
-		return -ENOTTY;
-	}
-}
-
-int ide_floppy_ioctl(ide_drive_t *drive, struct block_device *bdev,
-		     fmode_t mode, unsigned int cmd, unsigned long arg)
-{
-	struct ide_atapi_pc pc;
-	void __user *argp = (void __user *)arg;
-	int err;
-
-	mutex_lock(&ide_floppy_ioctl_mutex);
-	if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR) {
-		err = ide_floppy_lockdoor(drive, &pc, arg, cmd);
-		goto out;
-	}
-
-	err = ide_floppy_format_ioctl(drive, &pc, mode, cmd, argp);
-	if (err != -ENOTTY)
-		goto out;
-
-	/*
-	 * skip SCSI_IOCTL_SEND_COMMAND (deprecated)
-	 * and CDROM_SEND_PACKET (legacy) ioctls
-	 */
-	if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND)
-		err = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
-
-	if (err == -ENOTTY)
-		err = generic_ide_ioctl(drive, bdev, cmd, arg);
-
-out:
-	mutex_unlock(&ide_floppy_ioctl_mutex);
-	return err;
-}
-
-#ifdef CONFIG_COMPAT
-int ide_floppy_compat_ioctl(ide_drive_t *drive, struct block_device *bdev,
-			    fmode_t mode, unsigned int cmd, unsigned long arg)
-{
-	struct ide_atapi_pc pc;
-	void __user *argp = compat_ptr(arg);
-	int err;
-
-	mutex_lock(&ide_floppy_ioctl_mutex);
-	if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR) {
-		err = ide_floppy_lockdoor(drive, &pc, arg, cmd);
-		goto out;
-	}
-
-	err = ide_floppy_format_ioctl(drive, &pc, mode, cmd, argp);
-	if (err != -ENOTTY)
-		goto out;
-
-	/*
-	 * skip SCSI_IOCTL_SEND_COMMAND (deprecated)
-	 * and CDROM_SEND_PACKET (legacy) ioctls
-	 */
-	if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND)
-		err = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp);
-
-	if (err == -ENOTTY)
-		err = generic_ide_ioctl(drive, bdev, cmd, arg);
-
-out:
-	mutex_unlock(&ide_floppy_ioctl_mutex);
-	return err;
-}
-#endif
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
deleted file mode 100644
index 7f697ddb5fe56..0000000000000
--- a/drivers/ide/ide-floppy_proc.c
+++ /dev/null
@@ -1,34 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/ide.h>
-#include <linux/seq_file.h>
-
-#include "ide-floppy.h"
-
-static int idefloppy_capacity_proc_show(struct seq_file *m, void *v)
-{
-	ide_drive_t*drive = (ide_drive_t *)m->private;
-
-	seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive));
-	return 0;
-}
-
-ide_proc_entry_t ide_floppy_proc[] = {
-	{ "capacity",	S_IFREG|S_IRUGO, idefloppy_capacity_proc_show	},
-	{ "geometry",	S_IFREG|S_IRUGO, ide_geometry_proc_show		},
-	{}
-};
-
-ide_devset_rw_field(bios_cyl, bios_cyl);
-ide_devset_rw_field(bios_head, bios_head);
-ide_devset_rw_field(bios_sect, bios_sect);
-ide_devset_rw_field(ticks, pc_delay);
-
-const struct ide_proc_devset ide_floppy_settings[] = {
-	IDE_PROC_DEVSET(bios_cyl,  0, 1023),
-	IDE_PROC_DEVSET(bios_head, 0,  255),
-	IDE_PROC_DEVSET(bios_sect, 0,   63),
-	IDE_PROC_DEVSET(ticks,	   0,  255),
-	{ NULL },
-};
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
deleted file mode 100644
index e2b6c82586ce8..0000000000000
--- a/drivers/ide/ide-gd.c
+++ /dev/null
@@ -1,432 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/mutex.h>
-#include <linux/ide.h>
-#include <linux/hdreg.h>
-#include <linux/dmi.h>
-#include <linux/slab.h>
-
-#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
-#define IDE_DISK_MINORS		(1 << PARTN_BITS)
-#else
-#define IDE_DISK_MINORS		0
-#endif
-
-#include "ide-disk.h"
-#include "ide-floppy.h"
-
-#define IDE_GD_VERSION	"1.18"
-
-/* module parameters */
-static DEFINE_MUTEX(ide_gd_mutex);
-static unsigned long debug_mask;
-module_param(debug_mask, ulong, 0644);
-
-static DEFINE_MUTEX(ide_disk_ref_mutex);
-
-static void ide_disk_release(struct device *);
-
-static struct ide_disk_obj *ide_disk_get(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = NULL;
-
-	mutex_lock(&ide_disk_ref_mutex);
-	idkp = ide_drv_g(disk, ide_disk_obj);
-	if (idkp) {
-		if (ide_device_get(idkp->drive))
-			idkp = NULL;
-		else
-			get_device(&idkp->dev);
-	}
-	mutex_unlock(&ide_disk_ref_mutex);
-	return idkp;
-}
-
-static void ide_disk_put(struct ide_disk_obj *idkp)
-{
-	ide_drive_t *drive = idkp->drive;
-
-	mutex_lock(&ide_disk_ref_mutex);
-	put_device(&idkp->dev);
-	ide_device_put(drive);
-	mutex_unlock(&ide_disk_ref_mutex);
-}
-
-sector_t ide_gd_capacity(ide_drive_t *drive)
-{
-	return drive->capacity64;
-}
-
-static int ide_gd_probe(ide_drive_t *);
-
-static void ide_gd_remove(ide_drive_t *drive)
-{
-	struct ide_disk_obj *idkp = drive->driver_data;
-	struct gendisk *g = idkp->disk;
-
-	ide_proc_unregister_driver(drive, idkp->driver);
-	device_del(&idkp->dev);
-	del_gendisk(g);
-	drive->disk_ops->flush(drive);
-
-	mutex_lock(&ide_disk_ref_mutex);
-	put_device(&idkp->dev);
-	mutex_unlock(&ide_disk_ref_mutex);
-}
-
-static void ide_disk_release(struct device *dev)
-{
-	struct ide_disk_obj *idkp = to_ide_drv(dev, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-	struct gendisk *g = idkp->disk;
-
-	drive->disk_ops = NULL;
-	drive->driver_data = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(idkp);
-}
-
-/*
- * On HPA drives the capacity needs to be
- * reinitialized on resume otherwise the disk
- * can not be used and a hard reset is required
- */
-static void ide_gd_resume(ide_drive_t *drive)
-{
-	if (ata_id_hpa_enabled(drive->id))
-		(void)drive->disk_ops->get_capacity(drive);
-}
-
-static const struct dmi_system_id ide_coldreboot_table[] = {
-	{
-		/* Acer TravelMate 66x cuts power during reboot */
-		.ident   = "Acer TravelMate 660",
-		.matches = {
-			DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
-			DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 660"),
-		},
-	},
-
-	{ }	/* terminate list */
-};
-
-static void ide_gd_shutdown(ide_drive_t *drive)
-{
-#ifdef	CONFIG_ALPHA
-	/* On Alpha, halt(8) doesn't actually turn the machine off,
-	   it puts you into the sort of firmware monitor. Typically,
-	   it's used to boot another kernel image, so it's not much
-	   different from reboot(8). Therefore, we don't need to
-	   spin down the disk in this case, especially since Alpha
-	   firmware doesn't handle disks in standby mode properly.
-	   On the other hand, it's reasonably safe to turn the power
-	   off when the shutdown process reaches the firmware prompt,
-	   as the firmware initialization takes rather long time -
-	   at least 10 seconds, which should be sufficient for
-	   the disk to expire its write cache. */
-	if (system_state != SYSTEM_POWER_OFF) {
-#else
-	if (system_state == SYSTEM_RESTART &&
-		!dmi_check_system(ide_coldreboot_table)) {
-#endif
-		drive->disk_ops->flush(drive);
-		return;
-	}
-
-	printk(KERN_INFO "Shutdown: %s\n", drive->name);
-
-	drive->gendev.bus->suspend(&drive->gendev, PMSG_SUSPEND);
-}
-
-#ifdef CONFIG_IDE_PROC_FS
-static ide_proc_entry_t *ide_disk_proc_entries(ide_drive_t *drive)
-{
-	return (drive->media == ide_disk) ? ide_disk_proc : ide_floppy_proc;
-}
-
-static const struct ide_proc_devset *ide_disk_proc_devsets(ide_drive_t *drive)
-{
-	return (drive->media == ide_disk) ? ide_disk_settings
-					  : ide_floppy_settings;
-}
-#endif
-
-static ide_startstop_t ide_gd_do_request(ide_drive_t *drive,
-					 struct request *rq, sector_t sector)
-{
-	return drive->disk_ops->do_request(drive, rq, sector);
-}
-
-static struct ide_driver ide_gd_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-gd",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_gd_probe,
-	.remove			= ide_gd_remove,
-	.resume			= ide_gd_resume,
-	.shutdown		= ide_gd_shutdown,
-	.version		= IDE_GD_VERSION,
-	.do_request		= ide_gd_do_request,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc_entries		= ide_disk_proc_entries,
-	.proc_devsets		= ide_disk_proc_devsets,
-#endif
-};
-
-static int ide_gd_open(struct block_device *bdev, fmode_t mode)
-{
-	struct gendisk *disk = bdev->bd_disk;
-	struct ide_disk_obj *idkp;
-	ide_drive_t *drive;
-	int ret = 0;
-
-	idkp = ide_disk_get(disk);
-	if (idkp == NULL)
-		return -ENXIO;
-
-	drive = idkp->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	idkp->openers++;
-
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-		/* Just in case */
-
-		ret = drive->disk_ops->init_media(drive, disk);
-
-		/*
-		 * Allow O_NDELAY to open a drive without a disk, or with an
-		 * unreadable disk, so that we can get the format capacity
-		 * of the drive or begin the format - Sam
-		 */
-		if (ret && (mode & FMODE_NDELAY) == 0) {
-			ret = -EIO;
-			goto out_put_idkp;
-		}
-
-		if ((drive->dev_flags & IDE_DFLAG_WP) && (mode & FMODE_WRITE)) {
-			ret = -EROFS;
-			goto out_put_idkp;
-		}
-
-		/*
-		 * Ignore the return code from door_lock,
-		 * since the open() has already succeeded,
-		 * and the door_lock is irrelevant at this point.
-		 */
-		drive->disk_ops->set_doorlock(drive, disk, 1);
-		if (__invalidate_device(bdev, true))
-			pr_warn("VFS: busy inodes on changed media %s\n",
-				bdev->bd_disk->disk_name);
-		drive->disk_ops->get_capacity(drive);
-		set_capacity(disk, ide_gd_capacity(drive));
-		set_bit(GD_NEED_PART_SCAN, &disk->state);
-	} else if (drive->dev_flags & IDE_DFLAG_FORMAT_IN_PROGRESS) {
-		ret = -EBUSY;
-		goto out_put_idkp;
-	}
-	return 0;
-
-out_put_idkp:
-	idkp->openers--;
-	ide_disk_put(idkp);
-	return ret;
-}
-
-static int ide_gd_unlocked_open(struct block_device *bdev, fmode_t mode)
-{
-	int ret;
-
-	mutex_lock(&ide_gd_mutex);
-	ret = ide_gd_open(bdev, mode);
-	mutex_unlock(&ide_gd_mutex);
-
-	return ret;
-}
-
-
-static void ide_gd_release(struct gendisk *disk, fmode_t mode)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	mutex_lock(&ide_gd_mutex);
-	if (idkp->openers == 1)
-		drive->disk_ops->flush(drive);
-
-	if ((drive->dev_flags & IDE_DFLAG_REMOVABLE) && idkp->openers == 1) {
-		drive->disk_ops->set_doorlock(drive, disk, 0);
-		drive->dev_flags &= ~IDE_DFLAG_FORMAT_IN_PROGRESS;
-	}
-
-	idkp->openers--;
-
-	ide_disk_put(idkp);
-	mutex_unlock(&ide_gd_mutex);
-}
-
-static int ide_gd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	geo->heads = drive->bios_head;
-	geo->sectors = drive->bios_sect;
-	geo->cylinders = (u16)drive->bios_cyl; /* truncate */
-	return 0;
-}
-
-static void ide_gd_unlock_native_capacity(struct gendisk *disk)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-	const struct ide_disk_ops *disk_ops = drive->disk_ops;
-
-	if (disk_ops->unlock_native_capacity)
-		disk_ops->unlock_native_capacity(drive);
-}
-
-static int ide_gd_ioctl(struct block_device *bdev, fmode_t mode,
-			     unsigned int cmd, unsigned long arg)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	return drive->disk_ops->ioctl(drive, bdev, mode, cmd, arg);
-}
-
-#ifdef CONFIG_COMPAT
-static int ide_gd_compat_ioctl(struct block_device *bdev, fmode_t mode,
-			       unsigned int cmd, unsigned long arg)
-{
-	struct ide_disk_obj *idkp = ide_drv_g(bdev->bd_disk, ide_disk_obj);
-	ide_drive_t *drive = idkp->drive;
-
-	if (!drive->disk_ops->compat_ioctl)
-		return -ENOIOCTLCMD;
-
-	return drive->disk_ops->compat_ioctl(drive, bdev, mode, cmd, arg);
-}
-#endif
-
-static const struct block_device_operations ide_gd_ops = {
-	.owner			= THIS_MODULE,
-	.open			= ide_gd_unlocked_open,
-	.release		= ide_gd_release,
-	.ioctl			= ide_gd_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl		= ide_gd_compat_ioctl,
-#endif
-	.getgeo			= ide_gd_getgeo,
-	.unlock_native_capacity	= ide_gd_unlock_native_capacity,
-};
-
-static int ide_gd_probe(ide_drive_t *drive)
-{
-	const struct ide_disk_ops *disk_ops = NULL;
-	struct ide_disk_obj *idkp;
-	struct gendisk *g;
-
-	/* strstr("foo", "") is non-NULL */
-	if (!strstr("ide-gd", drive->driver_req))
-		goto failed;
-
-#ifdef CONFIG_IDE_GD_ATA
-	if (drive->media == ide_disk)
-		disk_ops = &ide_ata_disk_ops;
-#endif
-#ifdef CONFIG_IDE_GD_ATAPI
-	if (drive->media == ide_floppy)
-		disk_ops = &ide_atapi_disk_ops;
-#endif
-	if (disk_ops == NULL)
-		goto failed;
-
-	if (disk_ops->check(drive, DRV_NAME) == 0) {
-		printk(KERN_ERR PFX "%s: not supported by this driver\n",
-			drive->name);
-		goto failed;
-	}
-
-	idkp = kzalloc(sizeof(*idkp), GFP_KERNEL);
-	if (!idkp) {
-		printk(KERN_ERR PFX "%s: can't allocate a disk structure\n",
-			drive->name);
-		goto failed;
-	}
-
-	g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
-	if (!g)
-		goto out_free_idkp;
-
-	ide_init_disk(g, drive);
-
-	idkp->dev.parent = &drive->gendev;
-	idkp->dev.release = ide_disk_release;
-	dev_set_name(&idkp->dev, "%s", dev_name(&drive->gendev));
-
-	if (device_register(&idkp->dev))
-		goto out_free_disk;
-
-	idkp->drive = drive;
-	idkp->driver = &ide_gd_driver;
-	idkp->disk = g;
-
-	g->private_data = &idkp->driver;
-
-	drive->driver_data = idkp;
-	drive->debug_mask = debug_mask;
-	drive->disk_ops = disk_ops;
-
-	disk_ops->setup(drive);
-
-	set_capacity(g, ide_gd_capacity(drive));
-
-	g->minors = IDE_DISK_MINORS;
-	g->flags |= GENHD_FL_EXT_DEVT;
-	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
-		g->flags = GENHD_FL_REMOVABLE;
-	g->fops = &ide_gd_ops;
-	g->events = DISK_EVENT_MEDIA_CHANGE;
-	device_add_disk(&drive->gendev, g, NULL);
-	return 0;
-
-out_free_disk:
-	put_disk(g);
-out_free_idkp:
-	kfree(idkp);
-failed:
-	return -ENODEV;
-}
-
-static int __init ide_gd_init(void)
-{
-	printk(KERN_INFO DRV_NAME " driver " IDE_GD_VERSION "\n");
-	return driver_register(&ide_gd_driver.gen_driver);
-}
-
-static void __exit ide_gd_exit(void)
-{
-	driver_unregister(&ide_gd_driver.gen_driver);
-}
-
-MODULE_ALIAS("ide:*m-disk*");
-MODULE_ALIAS("ide-disk");
-MODULE_ALIAS("ide:*m-floppy*");
-MODULE_ALIAS("ide-floppy");
-module_init(ide_gd_init);
-module_exit(ide_gd_exit);
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("generic ATA/ATAPI disk driver");
diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h
deleted file mode 100644
index af3fe1880e9e2..0000000000000
--- a/drivers/ide/ide-gd.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __IDE_GD_H
-#define __IDE_GD_H
-
-#define DRV_NAME "ide-gd"
-#define PFX DRV_NAME ": "
-
-/* define to see debug info */
-#define IDE_GD_DEBUG_LOG	0
-
-#if IDE_GD_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, ## args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
-struct ide_disk_obj {
-	ide_drive_t		*drive;
-	struct ide_driver	*driver;
-	struct gendisk		*disk;
-	struct device		dev;
-	unsigned int		openers;	/* protected by BKL for now */
-
-	/* used for blk_{fs,pc}_request() requests */
-	struct ide_atapi_pc queued_pc;
-
-	/* Last error information */
-	u8 sense_key, asc, ascq;
-
-	int progress_indication;
-
-	/* Device information */
-	/* Current format */
-	int blocks, block_size, bs_factor;
-	/* Last format capacity descriptor */
-	u8 cap_desc[8];
-	/* Copy of the flexible disk page */
-	u8 flexible_disk_page[32];
-};
-
-sector_t ide_gd_capacity(ide_drive_t *);
-
-#endif /* __IDE_GD_H */
diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c
deleted file mode 100644
index 80c0d69b83ace..0000000000000
--- a/drivers/ide/ide-generic.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * generic/default IDE host driver
- *
- * Copyright (C) 2004, 2008-2009 Bartlomiej Zolnierkiewicz
- * This code was split off from ide.c.  See it for original copyrights.
- *
- * May be copied or modified under the terms of the GNU General Public License.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ide.h>
-#include <linux/pci_ids.h>
-
-/* FIXME: convert arm to use ide_platform host driver */
-#ifdef CONFIG_ARM
-#include <asm/irq.h>
-#endif
-
-#define DRV_NAME	"ide_generic"
-
-static int probe_mask;
-module_param(probe_mask, int, 0);
-MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports");
-
-static const struct ide_port_info ide_generic_port_info = {
-	.host_flags		= IDE_HFLAG_NO_DMA,
-	.chipset		= ide_generic,
-};
-
-#ifdef CONFIG_ARM
-static const u16 legacy_bases[] = { 0x1f0 };
-static const int legacy_irqs[]  = { IRQ_HARDDISK };
-#elif defined(CONFIG_ALPHA)
-static const u16 legacy_bases[] = { 0x1f0, 0x170, 0x1e8, 0x168 };
-static const int legacy_irqs[]  = { 14, 15, 11, 10 };
-#else
-static const u16 legacy_bases[] = { 0x1f0, 0x170, 0x1e8, 0x168, 0x1e0, 0x160 };
-static const int legacy_irqs[]  = { 14, 15, 11, 10, 8, 12 };
-#endif
-
-static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary)
-{
-#ifdef CONFIG_PCI
-	struct pci_dev *p = NULL;
-	u16 val;
-
-	for_each_pci_dev(p) {
-		if (pci_resource_start(p, 0) == 0x1f0)
-			*primary = 1;
-		if (pci_resource_start(p, 2) == 0x170)
-			*secondary = 1;
-
-		/* Cyrix CS55{1,2}0 pre SFF MWDMA ATA on the bridge */
-		if (p->vendor == PCI_VENDOR_ID_CYRIX &&
-		    (p->device == PCI_DEVICE_ID_CYRIX_5510 ||
-		     p->device == PCI_DEVICE_ID_CYRIX_5520))
-			*primary = *secondary = 1;
-
-		/* Intel MPIIX - PIO ATA on non PCI side of bridge */
-		if (p->vendor == PCI_VENDOR_ID_INTEL &&
-		    p->device == PCI_DEVICE_ID_INTEL_82371MX) {
-			pci_read_config_word(p, 0x6C, &val);
-			if (val & 0x8000) {
-				/* ATA port enabled */
-				if (val & 0x4000)
-					*secondary = 1;
-				else
-					*primary = 1;
-			}
-		}
-	}
-#endif
-}
-
-static int __init ide_generic_init(void)
-{
-	struct ide_hw hw, *hws[] = { &hw };
-	unsigned long io_addr;
-	int i, rc = 0, primary = 0, secondary = 0;
-
-	ide_generic_check_pci_legacy_iobases(&primary, &secondary);
-
-	if (!probe_mask) {
-		printk(KERN_INFO DRV_NAME ": please use \"probe_mask=0x3f\" "
-		     "module parameter for probing all legacy ISA IDE ports\n");
-
-		if (primary == 0)
-			probe_mask |= 0x1;
-
-		if (secondary == 0)
-			probe_mask |= 0x2;
-	} else
-		printk(KERN_INFO DRV_NAME ": enforcing probing of I/O ports "
-			"upon user request\n");
-
-	for (i = 0; i < ARRAY_SIZE(legacy_bases); i++) {
-		io_addr = legacy_bases[i];
-
-		if ((probe_mask & (1 << i)) && io_addr) {
-			if (!request_region(io_addr, 8, DRV_NAME)) {
-				printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX "
-						"not free.\n",
-						DRV_NAME, io_addr, io_addr + 7);
-				rc = -EBUSY;
-				continue;
-			}
-
-			if (!request_region(io_addr + 0x206, 1, DRV_NAME)) {
-				printk(KERN_ERR "%s: I/O resource 0x%lX "
-						"not free.\n",
-						DRV_NAME, io_addr + 0x206);
-				release_region(io_addr, 8);
-				rc = -EBUSY;
-				continue;
-			}
-
-			memset(&hw, 0, sizeof(hw));
-			ide_std_init_ports(&hw, io_addr, io_addr + 0x206);
-#ifdef CONFIG_IA64
-			hw.irq = isa_irq_to_vector(legacy_irqs[i]);
-#else
-			hw.irq = legacy_irqs[i];
-#endif
-			rc = ide_host_add(&ide_generic_port_info, hws, 1, NULL);
-			if (rc) {
-				release_region(io_addr + 0x206, 1);
-				release_region(io_addr, 8);
-			}
-		}
-	}
-
-	return rc;
-}
-
-module_init(ide_generic_init);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c
deleted file mode 100644
index 94bdcf1ea186c..0000000000000
--- a/drivers/ide/ide-io-std.c
+++ /dev/null
@@ -1,262 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/ide.h>
-
-#if defined(CONFIG_ARM) || defined(CONFIG_M68K) || defined(CONFIG_MIPS) || \
-    defined(CONFIG_PARISC) || defined(CONFIG_PPC) || defined(CONFIG_SPARC)
-#include <asm/ide.h>
-#else
-#include <asm-generic/ide_iops.h>
-#endif
-
-/*
- *	Conventional PIO operations for ATA devices
- */
-
-static u8 ide_inb(unsigned long port)
-{
-	return (u8) inb(port);
-}
-
-static void ide_outb(u8 val, unsigned long port)
-{
-	outb(val, port);
-}
-
-/*
- *	MMIO operations, typically used for SATA controllers
- */
-
-static u8 ide_mm_inb(unsigned long port)
-{
-	return (u8) readb((void __iomem *) port);
-}
-
-static void ide_mm_outb(u8 value, unsigned long port)
-{
-	writeb(value, (void __iomem *) port);
-}
-
-void ide_exec_command(ide_hwif_t *hwif, u8 cmd)
-{
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		writeb(cmd, (void __iomem *)hwif->io_ports.command_addr);
-	else
-		outb(cmd, hwif->io_ports.command_addr);
-}
-EXPORT_SYMBOL_GPL(ide_exec_command);
-
-u8 ide_read_status(ide_hwif_t *hwif)
-{
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		return readb((void __iomem *)hwif->io_ports.status_addr);
-	else
-		return inb(hwif->io_ports.status_addr);
-}
-EXPORT_SYMBOL_GPL(ide_read_status);
-
-u8 ide_read_altstatus(ide_hwif_t *hwif)
-{
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		return readb((void __iomem *)hwif->io_ports.ctl_addr);
-	else
-		return inb(hwif->io_ports.ctl_addr);
-}
-EXPORT_SYMBOL_GPL(ide_read_altstatus);
-
-void ide_write_devctl(ide_hwif_t *hwif, u8 ctl)
-{
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		writeb(ctl, (void __iomem *)hwif->io_ports.ctl_addr);
-	else
-		outb(ctl, hwif->io_ports.ctl_addr);
-}
-EXPORT_SYMBOL_GPL(ide_write_devctl);
-
-void ide_dev_select(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 select = drive->select | ATA_DEVICE_OBS;
-
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		writeb(select, (void __iomem *)hwif->io_ports.device_addr);
-	else
-		outb(select, hwif->io_ports.device_addr);
-}
-EXPORT_SYMBOL_GPL(ide_dev_select);
-
-void ide_tf_load(ide_drive_t *drive, struct ide_taskfile *tf, u8 valid)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_io_ports *io_ports = &hwif->io_ports;
-	void (*tf_outb)(u8 addr, unsigned long port);
-	u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
-
-	if (mmio)
-		tf_outb = ide_mm_outb;
-	else
-		tf_outb = ide_outb;
-
-	if (valid & IDE_VALID_FEATURE)
-		tf_outb(tf->feature, io_ports->feature_addr);
-	if (valid & IDE_VALID_NSECT)
-		tf_outb(tf->nsect, io_ports->nsect_addr);
-	if (valid & IDE_VALID_LBAL)
-		tf_outb(tf->lbal, io_ports->lbal_addr);
-	if (valid & IDE_VALID_LBAM)
-		tf_outb(tf->lbam, io_ports->lbam_addr);
-	if (valid & IDE_VALID_LBAH)
-		tf_outb(tf->lbah, io_ports->lbah_addr);
-	if (valid & IDE_VALID_DEVICE)
-		tf_outb(tf->device, io_ports->device_addr);
-}
-EXPORT_SYMBOL_GPL(ide_tf_load);
-
-void ide_tf_read(ide_drive_t *drive, struct ide_taskfile *tf, u8 valid)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_io_ports *io_ports = &hwif->io_ports;
-	u8 (*tf_inb)(unsigned long port);
-	u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
-
-	if (mmio)
-		tf_inb  = ide_mm_inb;
-	else
-		tf_inb  = ide_inb;
-
-	if (valid & IDE_VALID_ERROR)
-		tf->error  = tf_inb(io_ports->feature_addr);
-	if (valid & IDE_VALID_NSECT)
-		tf->nsect  = tf_inb(io_ports->nsect_addr);
-	if (valid & IDE_VALID_LBAL)
-		tf->lbal   = tf_inb(io_ports->lbal_addr);
-	if (valid & IDE_VALID_LBAM)
-		tf->lbam   = tf_inb(io_ports->lbam_addr);
-	if (valid & IDE_VALID_LBAH)
-		tf->lbah   = tf_inb(io_ports->lbah_addr);
-	if (valid & IDE_VALID_DEVICE)
-		tf->device = tf_inb(io_ports->device_addr);
-}
-EXPORT_SYMBOL_GPL(ide_tf_read);
-
-/*
- * Some localbus EIDE interfaces require a special access sequence
- * when using 32-bit I/O instructions to transfer data.  We call this
- * the "vlb_sync" sequence, which consists of three successive reads
- * of the sector count register location, with interrupts disabled
- * to ensure that the reads all happen together.
- */
-static void ata_vlb_sync(unsigned long port)
-{
-	(void)inb(port);
-	(void)inb(port);
-	(void)inb(port);
-}
-
-/*
- * This is used for most PIO data transfers *from* the IDE interface
- *
- * These routines will round up any request for an odd number of bytes,
- * so if an odd len is specified, be sure that there's at least one
- * extra byte allocated for the buffer.
- */
-void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
-		    unsigned int len)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_io_ports *io_ports = &hwif->io_ports;
-	unsigned long data_addr = io_ports->data_addr;
-	unsigned int words = (len + 1) >> 1;
-	u8 io_32bit = drive->io_32bit;
-	u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
-
-	if (io_32bit) {
-		unsigned long flags;
-
-		if ((io_32bit & 2) && !mmio) {
-			local_irq_save(flags);
-			ata_vlb_sync(io_ports->nsect_addr);
-		}
-
-		words >>= 1;
-		if (mmio)
-			__ide_mm_insl((void __iomem *)data_addr, buf, words);
-		else
-			insl(data_addr, buf, words);
-
-		if ((io_32bit & 2) && !mmio)
-			local_irq_restore(flags);
-
-		if (((len + 1) & 3) < 2)
-			return;
-
-		buf += len & ~3;
-		words = 1;
-	}
-
-	if (mmio)
-		__ide_mm_insw((void __iomem *)data_addr, buf, words);
-	else
-		insw(data_addr, buf, words);
-}
-EXPORT_SYMBOL_GPL(ide_input_data);
-
-/*
- * This is used for most PIO data transfers *to* the IDE interface
- */
-void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf,
-		     unsigned int len)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_io_ports *io_ports = &hwif->io_ports;
-	unsigned long data_addr = io_ports->data_addr;
-	unsigned int words = (len + 1) >> 1;
-	u8 io_32bit = drive->io_32bit;
-	u8 mmio = (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
-
-	if (io_32bit) {
-		unsigned long flags;
-
-		if ((io_32bit & 2) && !mmio) {
-			local_irq_save(flags);
-			ata_vlb_sync(io_ports->nsect_addr);
-		}
-
-		words >>= 1;
-		if (mmio)
-			__ide_mm_outsl((void __iomem *)data_addr, buf, words);
-		else
-			outsl(data_addr, buf, words);
-
-		if ((io_32bit & 2) && !mmio)
-			local_irq_restore(flags);
-
-		if (((len + 1) & 3) < 2)
-			return;
-
-		buf += len & ~3;
-		words = 1;
-	}
-
-	if (mmio)
-		__ide_mm_outsw((void __iomem *)data_addr, buf, words);
-	else
-		outsw(data_addr, buf, words);
-}
-EXPORT_SYMBOL_GPL(ide_output_data);
-
-const struct ide_tp_ops default_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= ide_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= ide_input_data,
-	.output_data		= ide_output_data,
-};
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
deleted file mode 100644
index 4867b67b60d69..0000000000000
--- a/drivers/ide/ide-io.c
+++ /dev/null
@@ -1,904 +0,0 @@
-/*
- *	IDE I/O functions
- *
- *	Basic PIO and command management functionality.
- *
- * This code was split off from ide.c. See ide.c for history and original
- * copyrights.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * For the avoidance of doubt the "preferred form" of this code is one which
- * is in an open non patent encumbered format. Where cryptographic key signing
- * forms part of the process of creating an executable the information
- * including keys needed to generate an equivalently functional executable
- * are deemed to be part of the source code.
- */
- 
- 
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/major.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/blkpg.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/ide.h>
-#include <linux/completion.h>
-#include <linux/reboot.h>
-#include <linux/cdrom.h>
-#include <linux/seq_file.h>
-#include <linux/device.h>
-#include <linux/kmod.h>
-#include <linux/scatterlist.h>
-#include <linux/bitops.h>
-
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-#include <asm/io.h>
-
-int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
-	       unsigned int nr_bytes)
-{
-	/*
-	 * decide whether to reenable DMA -- 3 is a random magic for now,
-	 * if we DMA timeout more than 3 times, just stay in PIO
-	 */
-	if ((drive->dev_flags & IDE_DFLAG_DMA_PIO_RETRY) &&
-	    drive->retry_pio <= 3) {
-		drive->dev_flags &= ~IDE_DFLAG_DMA_PIO_RETRY;
-		ide_dma_on(drive);
-	}
-
-	if (!blk_update_request(rq, error, nr_bytes)) {
-		if (rq == drive->sense_rq) {
-			drive->sense_rq = NULL;
-			drive->sense_rq_active = false;
-		}
-
-		__blk_mq_end_request(rq, error);
-		return 0;
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(ide_end_rq);
-
-void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
-{
-	const struct ide_tp_ops *tp_ops = drive->hwif->tp_ops;
-	struct ide_taskfile *tf = &cmd->tf;
-	struct request *rq = cmd->rq;
-	u8 tf_cmd = tf->command;
-
-	tf->error = err;
-	tf->status = stat;
-
-	if (cmd->ftf_flags & IDE_FTFLAG_IN_DATA) {
-		u8 data[2];
-
-		tp_ops->input_data(drive, cmd, data, 2);
-
-		cmd->tf.data  = data[0];
-		cmd->hob.data = data[1];
-	}
-
-	ide_tf_readback(drive, cmd);
-
-	if ((cmd->tf_flags & IDE_TFLAG_CUSTOM_HANDLER) &&
-	    tf_cmd == ATA_CMD_IDLEIMMEDIATE) {
-		if (tf->lbal != 0xc4) {
-			printk(KERN_ERR "%s: head unload failed!\n",
-			       drive->name);
-			ide_tf_dump(drive->name, cmd);
-		} else
-			drive->dev_flags |= IDE_DFLAG_PARKED;
-	}
-
-	if (rq && ata_taskfile_request(rq)) {
-		struct ide_cmd *orig_cmd = ide_req(rq)->special;
-
-		if (cmd->tf_flags & IDE_TFLAG_DYN)
-			kfree(orig_cmd);
-		else if (cmd != orig_cmd)
-			memcpy(orig_cmd, cmd, sizeof(*cmd));
-	}
-}
-
-int ide_complete_rq(ide_drive_t *drive, blk_status_t error, unsigned int nr_bytes)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq = hwif->rq;
-	int rc;
-
-	/*
-	 * if failfast is set on a request, override number of sectors
-	 * and complete the whole request right now
-	 */
-	if (blk_noretry_request(rq) && error)
-		nr_bytes = blk_rq_sectors(rq) << 9;
-
-	rc = ide_end_rq(drive, rq, error, nr_bytes);
-	if (rc == 0)
-		hwif->rq = NULL;
-
-	return rc;
-}
-EXPORT_SYMBOL(ide_complete_rq);
-
-void ide_kill_rq(ide_drive_t *drive, struct request *rq)
-{
-	u8 drv_req = ata_misc_request(rq) && rq->rq_disk;
-	u8 media = drive->media;
-
-	drive->failed_pc = NULL;
-
-	if ((media == ide_floppy || media == ide_tape) && drv_req) {
-		scsi_req(rq)->result = 0;
-	} else {
-		if (media == ide_tape)
-			scsi_req(rq)->result = IDE_DRV_ERROR_GENERAL;
-		else if (blk_rq_is_passthrough(rq) && scsi_req(rq)->result == 0)
-			scsi_req(rq)->result = -EIO;
-	}
-
-	ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
-}
-
-static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
-{
-	tf->nsect   = drive->sect;
-	tf->lbal    = drive->sect;
-	tf->lbam    = drive->cyl;
-	tf->lbah    = drive->cyl >> 8;
-	tf->device  = (drive->head - 1) | drive->select;
-	tf->command = ATA_CMD_INIT_DEV_PARAMS;
-}
-
-static void ide_tf_set_restore_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
-{
-	tf->nsect   = drive->sect;
-	tf->command = ATA_CMD_RESTORE;
-}
-
-static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
-{
-	tf->nsect   = drive->mult_req;
-	tf->command = ATA_CMD_SET_MULTI;
-}
-
-/**
- *	do_special		-	issue some special commands
- *	@drive: drive the command is for
- *
- *	do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS,
- *	ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive.
- */
-
-static ide_startstop_t do_special(ide_drive_t *drive)
-{
-	struct ide_cmd cmd;
-
-#ifdef DEBUG
-	printk(KERN_DEBUG "%s: %s: 0x%02x\n", drive->name, __func__,
-		drive->special_flags);
-#endif
-	if (drive->media != ide_disk) {
-		drive->special_flags = 0;
-		drive->mult_req = 0;
-		return ide_stopped;
-	}
-
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.protocol = ATA_PROT_NODATA;
-
-	if (drive->special_flags & IDE_SFLAG_SET_GEOMETRY) {
-		drive->special_flags &= ~IDE_SFLAG_SET_GEOMETRY;
-		ide_tf_set_specify_cmd(drive, &cmd.tf);
-	} else if (drive->special_flags & IDE_SFLAG_RECALIBRATE) {
-		drive->special_flags &= ~IDE_SFLAG_RECALIBRATE;
-		ide_tf_set_restore_cmd(drive, &cmd.tf);
-	} else if (drive->special_flags & IDE_SFLAG_SET_MULTMODE) {
-		drive->special_flags &= ~IDE_SFLAG_SET_MULTMODE;
-		ide_tf_set_setmult_cmd(drive, &cmd.tf);
-	} else
-		BUG();
-
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	cmd.tf_flags = IDE_TFLAG_CUSTOM_HANDLER;
-
-	do_rw_taskfile(drive, &cmd);
-
-	return ide_started;
-}
-
-void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct scatterlist *sg = hwif->sg_table, *last_sg = NULL;
-	struct request *rq = cmd->rq;
-
-	cmd->sg_nents = __blk_rq_map_sg(drive->queue, rq, sg, &last_sg);
-	if (blk_rq_bytes(rq) && (blk_rq_bytes(rq) & rq->q->dma_pad_mask))
-		last_sg->length +=
-			(rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
-}
-EXPORT_SYMBOL_GPL(ide_map_sg);
-
-void ide_init_sg_cmd(struct ide_cmd *cmd, unsigned int nr_bytes)
-{
-	cmd->nbytes = cmd->nleft = nr_bytes;
-	cmd->cursg_ofs = 0;
-	cmd->cursg = NULL;
-}
-EXPORT_SYMBOL_GPL(ide_init_sg_cmd);
-
-/**
- *	execute_drive_command	-	issue special drive command
- *	@drive: the drive to issue the command on
- *	@rq: the request structure holding the command
- *
- *	execute_drive_cmd() issues a special drive command,  usually 
- *	initiated by ioctl() from the external hdparm program. The
- *	command can be a drive command, drive task or taskfile 
- *	operation. Weirdly you can call it with NULL to wait for
- *	all commands to finish. Don't do this as that is due to change
- */
-
-static ide_startstop_t execute_drive_cmd (ide_drive_t *drive,
-		struct request *rq)
-{
-	struct ide_cmd *cmd = ide_req(rq)->special;
-
-	if (cmd) {
-		if (cmd->protocol == ATA_PROT_PIO) {
-			ide_init_sg_cmd(cmd, blk_rq_sectors(rq) << 9);
-			ide_map_sg(drive, cmd);
-		}
-
-		return do_rw_taskfile(drive, cmd);
-	}
-
- 	/*
- 	 * NULL is actually a valid way of waiting for
- 	 * all current requests to be flushed from the queue.
- 	 */
-#ifdef DEBUG
- 	printk("%s: DRIVE_CMD (null)\n", drive->name);
-#endif
-	scsi_req(rq)->result = 0;
-	ide_complete_rq(drive, BLK_STS_OK, blk_rq_bytes(rq));
-
- 	return ide_stopped;
-}
-
-static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq)
-{
-	u8 cmd = scsi_req(rq)->cmd[0];
-
-	switch (cmd) {
-	case REQ_PARK_HEADS:
-	case REQ_UNPARK_HEADS:
-		return ide_do_park_unpark(drive, rq);
-	case REQ_DEVSET_EXEC:
-		return ide_do_devset(drive, rq);
-	case REQ_DRIVE_RESET:
-		return ide_do_reset(drive);
-	default:
-		BUG();
-	}
-}
-
-/**
- *	start_request	-	start of I/O and command issuing for IDE
- *
- *	start_request() initiates handling of a new I/O request. It
- *	accepts commands and I/O (read/write) requests.
- *
- *	FIXME: this function needs a rename
- */
- 
-static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
-{
-	ide_startstop_t startstop;
-
-#ifdef DEBUG
-	printk("%s: start_request: current=0x%08lx\n",
-		drive->hwif->name, (unsigned long) rq);
-#endif
-
-	/* bail early if we've exceeded max_failures */
-	if (drive->max_failures && (drive->failures > drive->max_failures)) {
-		rq->rq_flags |= RQF_FAILED;
-		goto kill_rq;
-	}
-
-	if (drive->prep_rq && !drive->prep_rq(drive, rq))
-		return ide_stopped;
-
-	if (ata_pm_request(rq))
-		ide_check_pm_state(drive, rq);
-
-	drive->hwif->tp_ops->dev_select(drive);
-	if (ide_wait_stat(&startstop, drive, drive->ready_stat,
-			  ATA_BUSY | ATA_DRQ, WAIT_READY)) {
-		printk(KERN_ERR "%s: drive not ready for command\n", drive->name);
-		return startstop;
-	}
-
-	if (drive->special_flags == 0) {
-		struct ide_driver *drv;
-
-		/*
-		 * We reset the drive so we need to issue a SETFEATURES.
-		 * Do it _after_ do_special() restored device parameters.
-		 */
-		if (drive->current_speed == 0xff)
-			ide_config_drive_speed(drive, drive->desired_speed);
-
-		if (ata_taskfile_request(rq))
-			return execute_drive_cmd(drive, rq);
-		else if (ata_pm_request(rq)) {
-			struct ide_pm_state *pm = ide_req(rq)->special;
-#ifdef DEBUG_PM
-			printk("%s: start_power_step(step: %d)\n",
-				drive->name, pm->pm_step);
-#endif
-			startstop = ide_start_power_step(drive, rq);
-			if (startstop == ide_stopped &&
-			    pm->pm_step == IDE_PM_COMPLETED)
-				ide_complete_pm_rq(drive, rq);
-			return startstop;
-		} else if (!rq->rq_disk && ata_misc_request(rq))
-			/*
-			 * TODO: Once all ULDs have been modified to
-			 * check for specific op codes rather than
-			 * blindly accepting any special request, the
-			 * check for ->rq_disk above may be replaced
-			 * by a more suitable mechanism or even
-			 * dropped entirely.
-			 */
-			return ide_special_rq(drive, rq);
-
-		drv = *(struct ide_driver **)rq->rq_disk->private_data;
-
-		return drv->do_request(drive, rq, blk_rq_pos(rq));
-	}
-	return do_special(drive);
-kill_rq:
-	ide_kill_rq(drive, rq);
-	return ide_stopped;
-}
-
-/**
- *	ide_stall_queue		-	pause an IDE device
- *	@drive: drive to stall
- *	@timeout: time to stall for (jiffies)
- *
- *	ide_stall_queue() can be used by a drive to give excess bandwidth back
- *	to the port by sleeping for timeout jiffies.
- */
- 
-void ide_stall_queue (ide_drive_t *drive, unsigned long timeout)
-{
-	if (timeout > WAIT_WORSTCASE)
-		timeout = WAIT_WORSTCASE;
-	drive->sleep = timeout + jiffies;
-	drive->dev_flags |= IDE_DFLAG_SLEEPING;
-}
-EXPORT_SYMBOL(ide_stall_queue);
-
-static inline int ide_lock_port(ide_hwif_t *hwif)
-{
-	if (hwif->busy)
-		return 1;
-
-	hwif->busy = 1;
-
-	return 0;
-}
-
-static inline void ide_unlock_port(ide_hwif_t *hwif)
-{
-	hwif->busy = 0;
-}
-
-static inline int ide_lock_host(struct ide_host *host, ide_hwif_t *hwif)
-{
-	int rc = 0;
-
-	if (host->host_flags & IDE_HFLAG_SERIALIZE) {
-		rc = test_and_set_bit_lock(IDE_HOST_BUSY, &host->host_busy);
-		if (rc == 0) {
-			if (host->get_lock)
-				host->get_lock(ide_intr, hwif);
-		}
-	}
-	return rc;
-}
-
-static inline void ide_unlock_host(struct ide_host *host)
-{
-	if (host->host_flags & IDE_HFLAG_SERIALIZE) {
-		if (host->release_lock)
-			host->release_lock();
-		clear_bit_unlock(IDE_HOST_BUSY, &host->host_busy);
-	}
-}
-
-void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
-{
-	struct request_queue *q = drive->queue;
-
-	/* Use 3ms as that was the old plug delay */
-	if (rq) {
-		blk_mq_requeue_request(rq, false);
-		blk_mq_delay_kick_requeue_list(q, 3);
-	} else
-		blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3);
-}
-
-blk_status_t ide_issue_rq(ide_drive_t *drive, struct request *rq,
-			  bool local_requeue)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_host *host = hwif->host;
-	ide_startstop_t	startstop;
-
-	if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) {
-		rq->rq_flags |= RQF_DONTPREP;
-		ide_req(rq)->special = NULL;
-	}
-
-	/* HLD do_request() callback might sleep, make sure it's okay */
-	might_sleep();
-
-	if (ide_lock_host(host, hwif))
-		return BLK_STS_DEV_RESOURCE;
-
-	spin_lock_irq(&hwif->lock);
-
-	if (!ide_lock_port(hwif)) {
-		ide_hwif_t *prev_port;
-
-		WARN_ON_ONCE(hwif->rq);
-repeat:
-		prev_port = hwif->host->cur_port;
-		if (drive->dev_flags & IDE_DFLAG_SLEEPING &&
-		    time_after(drive->sleep, jiffies)) {
-			ide_unlock_port(hwif);
-			goto plug_device;
-		}
-
-		if ((hwif->host->host_flags & IDE_HFLAG_SERIALIZE) &&
-		    hwif != prev_port) {
-			ide_drive_t *cur_dev =
-				prev_port ? prev_port->cur_dev : NULL;
-
-			/*
-			 * set nIEN for previous port, drives in the
-			 * quirk list may not like intr setups/cleanups
-			 */
-			if (cur_dev &&
-			    (cur_dev->dev_flags & IDE_DFLAG_NIEN_QUIRK) == 0)
-				prev_port->tp_ops->write_devctl(prev_port,
-								ATA_NIEN |
-								ATA_DEVCTL_OBS);
-
-			hwif->host->cur_port = hwif;
-		}
-		hwif->cur_dev = drive;
-		drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
-
-		/*
-		 * Sanity: don't accept a request that isn't a PM request
-		 * if we are currently power managed. This is very important as
-		 * blk_stop_queue() doesn't prevent the blk_fetch_request()
-		 * above to return us whatever is in the queue. Since we call
-		 * ide_do_request() ourselves, we end up taking requests while
-		 * the queue is blocked...
-		 */
-		if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
-		    ata_pm_request(rq) == 0 &&
-		    (rq->rq_flags & RQF_PM) == 0) {
-			/* there should be no pending command at this point */
-			ide_unlock_port(hwif);
-			goto plug_device;
-		}
-
-		scsi_req(rq)->resid_len = blk_rq_bytes(rq);
-		hwif->rq = rq;
-
-		spin_unlock_irq(&hwif->lock);
-		startstop = start_request(drive, rq);
-		spin_lock_irq(&hwif->lock);
-
-		if (startstop == ide_stopped) {
-			rq = hwif->rq;
-			hwif->rq = NULL;
-			if (rq)
-				goto repeat;
-			ide_unlock_port(hwif);
-			goto out;
-		}
-	} else {
-plug_device:
-		if (local_requeue)
-			list_add(&rq->queuelist, &drive->rq_list);
-		spin_unlock_irq(&hwif->lock);
-		ide_unlock_host(host);
-		if (!local_requeue)
-			ide_requeue_and_plug(drive, rq);
-		return BLK_STS_OK;
-	}
-
-out:
-	spin_unlock_irq(&hwif->lock);
-	if (rq == NULL)
-		ide_unlock_host(host);
-	return BLK_STS_OK;
-}
-
-/*
- * Issue a new request to a device.
- */
-blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
-			  const struct blk_mq_queue_data *bd)
-{
-	ide_drive_t *drive = hctx->queue->queuedata;
-	ide_hwif_t *hwif = drive->hwif;
-
-	spin_lock_irq(&hwif->lock);
-	if (drive->sense_rq_active) {
-		spin_unlock_irq(&hwif->lock);
-		return BLK_STS_DEV_RESOURCE;
-	}
-	spin_unlock_irq(&hwif->lock);
-
-	blk_mq_start_request(bd->rq);
-	return ide_issue_rq(drive, bd->rq, false);
-}
-
-static int drive_is_ready(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 stat = 0;
-
-	if (drive->waiting_for_dma)
-		return hwif->dma_ops->dma_test_irq(drive);
-
-	if (hwif->io_ports.ctl_addr &&
-	    (hwif->host_flags & IDE_HFLAG_BROKEN_ALTSTATUS) == 0)
-		stat = hwif->tp_ops->read_altstatus(hwif);
-	else
-		/* Note: this may clear a pending IRQ!! */
-		stat = hwif->tp_ops->read_status(hwif);
-
-	if (stat & ATA_BUSY)
-		/* drive busy: definitely not interrupting */
-		return 0;
-
-	/* drive ready: *might* be interrupting */
-	return 1;
-}
-
-/**
- *	ide_timer_expiry	-	handle lack of an IDE interrupt
- *	@data: timer callback magic (hwif)
- *
- *	An IDE command has timed out before the expected drive return
- *	occurred. At this point we attempt to clean up the current
- *	mess. If the current handler includes an expiry handler then
- *	we invoke the expiry handler, and providing it is happy the
- *	work is done. If that fails we apply generic recovery rules
- *	invoking the handler and checking the drive DMA status. We
- *	have an excessively incestuous relationship with the DMA
- *	logic that wants cleaning up.
- */
- 
-void ide_timer_expiry (struct timer_list *t)
-{
-	ide_hwif_t	*hwif = from_timer(hwif, t, timer);
-	ide_drive_t	*drive;
-	ide_handler_t	*handler;
-	unsigned long	flags;
-	int		wait = -1;
-	int		plug_device = 0;
-	struct request	*rq_in_flight;
-
-	spin_lock_irqsave(&hwif->lock, flags);
-
-	handler = hwif->handler;
-
-	if (handler == NULL || hwif->req_gen != hwif->req_gen_timer) {
-		/*
-		 * Either a marginal timeout occurred
-		 * (got the interrupt just as timer expired),
-		 * or we were "sleeping" to give other devices a chance.
-		 * Either way, we don't really want to complain about anything.
-		 */
-	} else {
-		ide_expiry_t *expiry = hwif->expiry;
-		ide_startstop_t startstop = ide_stopped;
-
-		drive = hwif->cur_dev;
-
-		if (expiry) {
-			wait = expiry(drive);
-			if (wait > 0) { /* continue */
-				/* reset timer */
-				hwif->timer.expires = jiffies + wait;
-				hwif->req_gen_timer = hwif->req_gen;
-				add_timer(&hwif->timer);
-				spin_unlock_irqrestore(&hwif->lock, flags);
-				return;
-			}
-		}
-		hwif->handler = NULL;
-		hwif->expiry = NULL;
-		/*
-		 * We need to simulate a real interrupt when invoking
-		 * the handler() function, which means we need to
-		 * globally mask the specific IRQ:
-		 */
-		spin_unlock(&hwif->lock);
-		/* disable_irq_nosync ?? */
-		disable_irq(hwif->irq);
-
-		if (hwif->polling) {
-			startstop = handler(drive);
-		} else if (drive_is_ready(drive)) {
-			if (drive->waiting_for_dma)
-				hwif->dma_ops->dma_lost_irq(drive);
-			if (hwif->port_ops && hwif->port_ops->clear_irq)
-				hwif->port_ops->clear_irq(drive);
-
-			printk(KERN_WARNING "%s: lost interrupt\n",
-				drive->name);
-			startstop = handler(drive);
-		} else {
-			if (drive->waiting_for_dma)
-				startstop = ide_dma_timeout_retry(drive, wait);
-			else
-				startstop = ide_error(drive, "irq timeout",
-					hwif->tp_ops->read_status(hwif));
-		}
-		/* Disable interrupts again, `handler' might have enabled it */
-		spin_lock_irq(&hwif->lock);
-		enable_irq(hwif->irq);
-		if (startstop == ide_stopped && hwif->polling == 0) {
-			rq_in_flight = hwif->rq;
-			hwif->rq = NULL;
-			ide_unlock_port(hwif);
-			plug_device = 1;
-		}
-	}
-	spin_unlock_irqrestore(&hwif->lock, flags);
-
-	if (plug_device) {
-		ide_unlock_host(hwif->host);
-		ide_requeue_and_plug(drive, rq_in_flight);
-	}
-}
-
-/**
- *	unexpected_intr		-	handle an unexpected IDE interrupt
- *	@irq: interrupt line
- *	@hwif: port being processed
- *
- *	There's nothing really useful we can do with an unexpected interrupt,
- *	other than reading the status register (to clear it), and logging it.
- *	There should be no way that an irq can happen before we're ready for it,
- *	so we needn't worry much about losing an "important" interrupt here.
- *
- *	On laptops (and "green" PCs), an unexpected interrupt occurs whenever
- *	the drive enters "idle", "standby", or "sleep" mode, so if the status
- *	looks "good", we just ignore the interrupt completely.
- *
- *	This routine assumes __cli() is in effect when called.
- *
- *	If an unexpected interrupt happens on irq15 while we are handling irq14
- *	and if the two interfaces are "serialized" (CMD640), then it looks like
- *	we could screw up by interfering with a new request being set up for 
- *	irq15.
- *
- *	In reality, this is a non-issue.  The new command is not sent unless 
- *	the drive is ready to accept one, in which case we know the drive is
- *	not trying to interrupt us.  And ide_set_handler() is always invoked
- *	before completing the issuance of any new drive command, so we will not
- *	be accidentally invoked as a result of any valid command completion
- *	interrupt.
- */
-
-static void unexpected_intr(int irq, ide_hwif_t *hwif)
-{
-	u8 stat = hwif->tp_ops->read_status(hwif);
-
-	if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
-		/* Try to not flood the console with msgs */
-		static unsigned long last_msgtime, count;
-		++count;
-
-		if (time_after(jiffies, last_msgtime + HZ)) {
-			last_msgtime = jiffies;
-			printk(KERN_ERR "%s: unexpected interrupt, "
-				"status=0x%02x, count=%ld\n",
-				hwif->name, stat, count);
-		}
-	}
-}
-
-/**
- *	ide_intr	-	default IDE interrupt handler
- *	@irq: interrupt number
- *	@dev_id: hwif
- *	@regs: unused weirdness from the kernel irq layer
- *
- *	This is the default IRQ handler for the IDE layer. You should
- *	not need to override it. If you do be aware it is subtle in
- *	places
- *
- *	hwif is the interface in the group currently performing
- *	a command. hwif->cur_dev is the drive and hwif->handler is
- *	the IRQ handler to call. As we issue a command the handlers
- *	step through multiple states, reassigning the handler to the
- *	next step in the process. Unlike a smart SCSI controller IDE
- *	expects the main processor to sequence the various transfer
- *	stages. We also manage a poll timer to catch up with most
- *	timeout situations. There are still a few where the handlers
- *	don't ever decide to give up.
- *
- *	The handler eventually returns ide_stopped to indicate the
- *	request completed. At this point we issue the next request
- *	on the port and the process begins again.
- */
-
-irqreturn_t ide_intr (int irq, void *dev_id)
-{
-	ide_hwif_t *hwif = (ide_hwif_t *)dev_id;
-	struct ide_host *host = hwif->host;
-	ide_drive_t *drive;
-	ide_handler_t *handler;
-	unsigned long flags;
-	ide_startstop_t startstop;
-	irqreturn_t irq_ret = IRQ_NONE;
-	int plug_device = 0;
-	struct request *rq_in_flight;
-
-	if (host->host_flags & IDE_HFLAG_SERIALIZE) {
-		if (hwif != host->cur_port)
-			goto out_early;
-	}
-
-	spin_lock_irqsave(&hwif->lock, flags);
-
-	if (hwif->port_ops && hwif->port_ops->test_irq &&
-	    hwif->port_ops->test_irq(hwif) == 0)
-		goto out;
-
-	handler = hwif->handler;
-
-	if (handler == NULL || hwif->polling) {
-		/*
-		 * Not expecting an interrupt from this drive.
-		 * That means this could be:
-		 *	(1) an interrupt from another PCI device
-		 *	sharing the same PCI INT# as us.
-		 * or	(2) a drive just entered sleep or standby mode,
-		 *	and is interrupting to let us know.
-		 * or	(3) a spurious interrupt of unknown origin.
-		 *
-		 * For PCI, we cannot tell the difference,
-		 * so in that case we just ignore it and hope it goes away.
-		 */
-		if ((host->irq_flags & IRQF_SHARED) == 0) {
-			/*
-			 * Probably not a shared PCI interrupt,
-			 * so we can safely try to do something about it:
-			 */
-			unexpected_intr(irq, hwif);
-		} else {
-			/*
-			 * Whack the status register, just in case
-			 * we have a leftover pending IRQ.
-			 */
-			(void)hwif->tp_ops->read_status(hwif);
-		}
-		goto out;
-	}
-
-	drive = hwif->cur_dev;
-
-	if (!drive_is_ready(drive))
-		/*
-		 * This happens regularly when we share a PCI IRQ with
-		 * another device.  Unfortunately, it can also happen
-		 * with some buggy drives that trigger the IRQ before
-		 * their status register is up to date.  Hopefully we have
-		 * enough advance overhead that the latter isn't a problem.
-		 */
-		goto out;
-
-	hwif->handler = NULL;
-	hwif->expiry = NULL;
-	hwif->req_gen++;
-	del_timer(&hwif->timer);
-	spin_unlock(&hwif->lock);
-
-	if (hwif->port_ops && hwif->port_ops->clear_irq)
-		hwif->port_ops->clear_irq(drive);
-
-	if (drive->dev_flags & IDE_DFLAG_UNMASK)
-		local_irq_enable_in_hardirq();
-
-	/* service this interrupt, may set handler for next interrupt */
-	startstop = handler(drive);
-
-	spin_lock_irq(&hwif->lock);
-	/*
-	 * Note that handler() may have set things up for another
-	 * interrupt to occur soon, but it cannot happen until
-	 * we exit from this routine, because it will be the
-	 * same irq as is currently being serviced here, and Linux
-	 * won't allow another of the same (on any CPU) until we return.
-	 */
-	if (startstop == ide_stopped && hwif->polling == 0) {
-		BUG_ON(hwif->handler);
-		rq_in_flight = hwif->rq;
-		hwif->rq = NULL;
-		ide_unlock_port(hwif);
-		plug_device = 1;
-	}
-	irq_ret = IRQ_HANDLED;
-out:
-	spin_unlock_irqrestore(&hwif->lock, flags);
-out_early:
-	if (plug_device) {
-		ide_unlock_host(hwif->host);
-		ide_requeue_and_plug(drive, rq_in_flight);
-	}
-
-	return irq_ret;
-}
-EXPORT_SYMBOL_GPL(ide_intr);
-
-void ide_pad_transfer(ide_drive_t *drive, int write, int len)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 buf[4] = { 0 };
-
-	while (len > 0) {
-		if (write)
-			hwif->tp_ops->output_data(drive, NULL, buf, min(4, len));
-		else
-			hwif->tp_ops->input_data(drive, NULL, buf, min(4, len));
-		len -= 4;
-	}
-}
-EXPORT_SYMBOL_GPL(ide_pad_transfer);
-
-void ide_insert_request_head(ide_drive_t *drive, struct request *rq)
-{
-	drive->sense_rq_active = true;
-	list_add_tail(&rq->queuelist, &drive->rq_list);
-	kblockd_schedule_work(&drive->rq_work);
-}
-EXPORT_SYMBOL_GPL(ide_insert_request_head);
diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
deleted file mode 100644
index 43fbc37d85c34..0000000000000
--- a/drivers/ide/ide-ioctls.c
+++ /dev/null
@@ -1,306 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * IDE ioctls handling.
- */
-
-#include <linux/compat.h>
-#include <linux/export.h>
-#include <linux/hdreg.h>
-#include <linux/ide.h>
-#include <linux/slab.h>
-
-static int put_user_long(long val, unsigned long arg)
-{
-	if (in_compat_syscall())
-		return put_user(val, (compat_long_t __user *)compat_ptr(arg));
-
-	return put_user(val, (long __user *)arg);
-}
-
-static const struct ide_ioctl_devset ide_ioctl_settings[] = {
-{ HDIO_GET_32BIT,	 HDIO_SET_32BIT,	&ide_devset_io_32bit  },
-{ HDIO_GET_KEEPSETTINGS, HDIO_SET_KEEPSETTINGS,	&ide_devset_keepsettings },
-{ HDIO_GET_UNMASKINTR,	 HDIO_SET_UNMASKINTR,	&ide_devset_unmaskirq },
-{ HDIO_GET_DMA,		 HDIO_SET_DMA,		&ide_devset_using_dma },
-{ -1,			 HDIO_SET_PIO_MODE,	&ide_devset_pio_mode  },
-{ 0 }
-};
-
-int ide_setting_ioctl(ide_drive_t *drive, struct block_device *bdev,
-		      unsigned int cmd, unsigned long arg,
-		      const struct ide_ioctl_devset *s)
-{
-	const struct ide_devset *ds;
-	int err = -EOPNOTSUPP;
-
-	for (; (ds = s->setting); s++) {
-		if (ds->get && s->get_ioctl == cmd)
-			goto read_val;
-		else if (ds->set && s->set_ioctl == cmd)
-			goto set_val;
-	}
-
-	return err;
-
-read_val:
-	mutex_lock(&ide_setting_mtx);
-	err = ds->get(drive);
-	mutex_unlock(&ide_setting_mtx);
-	return err >= 0 ? put_user_long(err, arg) : err;
-
-set_val:
-	if (bdev_is_partition(bdev))
-		err = -EINVAL;
-	else {
-		if (!capable(CAP_SYS_ADMIN))
-			err = -EACCES;
-		else {
-			mutex_lock(&ide_setting_mtx);
-			err = ide_devset_execute(drive, ds, arg);
-			mutex_unlock(&ide_setting_mtx);
-		}
-	}
-	return err;
-}
-EXPORT_SYMBOL_GPL(ide_setting_ioctl);
-
-static int ide_get_identity_ioctl(ide_drive_t *drive, unsigned int cmd,
-				  void __user *argp)
-{
-	u16 *id = NULL;
-	int size = (cmd == HDIO_GET_IDENTITY) ? (ATA_ID_WORDS * 2) : 142;
-	int rc = 0;
-
-	if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
-		rc = -ENOMSG;
-		goto out;
-	}
-
-	/* ata_id_to_hd_driveid() relies on 'id' to be fully allocated. */
-	id = kmalloc(ATA_ID_WORDS * 2, GFP_KERNEL);
-	if (id == NULL) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	memcpy(id, drive->id, size);
-	ata_id_to_hd_driveid(id);
-
-	if (copy_to_user(argp, id, size))
-		rc = -EFAULT;
-
-	kfree(id);
-out:
-	return rc;
-}
-
-static int ide_get_nice_ioctl(ide_drive_t *drive, unsigned long arg)
-{
-	return put_user_long((!!(drive->dev_flags & IDE_DFLAG_DSC_OVERLAP)
-			 << IDE_NICE_DSC_OVERLAP) |
-			(!!(drive->dev_flags & IDE_DFLAG_NICE1)
-			 << IDE_NICE_1), arg);
-}
-
-static int ide_set_nice_ioctl(ide_drive_t *drive, unsigned long arg)
-{
-	if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1))))
-		return -EPERM;
-
-	if (((arg >> IDE_NICE_DSC_OVERLAP) & 1) &&
-	    (drive->media != ide_tape))
-		return -EPERM;
-
-	if ((arg >> IDE_NICE_DSC_OVERLAP) & 1)
-		drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP;
-	else
-		drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
-
-	if ((arg >> IDE_NICE_1) & 1)
-		drive->dev_flags |= IDE_DFLAG_NICE1;
-	else
-		drive->dev_flags &= ~IDE_DFLAG_NICE1;
-
-	return 0;
-}
-
-static int ide_cmd_ioctl(ide_drive_t *drive, void __user *argp)
-{
-	u8 *buf = NULL;
-	int bufsize = 0, err = 0;
-	u8 args[4], xfer_rate = 0;
-	struct ide_cmd cmd;
-	struct ide_taskfile *tf = &cmd.tf;
-
-	if (NULL == argp) {
-		struct request *rq;
-
-		rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
-		ide_req(rq)->type = ATA_PRIV_TASKFILE;
-		blk_execute_rq(NULL, rq, 0);
-		err = scsi_req(rq)->result ? -EIO : 0;
-		blk_put_request(rq);
-
-		return err;
-	}
-
-	if (copy_from_user(args, argp, 4))
-		return -EFAULT;
-
-	memset(&cmd, 0, sizeof(cmd));
-	tf->feature = args[2];
-	if (args[0] == ATA_CMD_SMART) {
-		tf->nsect = args[3];
-		tf->lbal  = args[1];
-		tf->lbam  = ATA_SMART_LBAM_PASS;
-		tf->lbah  = ATA_SMART_LBAH_PASS;
-		cmd.valid.out.tf = IDE_VALID_OUT_TF;
-		cmd.valid.in.tf  = IDE_VALID_NSECT;
-	} else {
-		tf->nsect = args[1];
-		cmd.valid.out.tf = IDE_VALID_FEATURE | IDE_VALID_NSECT;
-		cmd.valid.in.tf  = IDE_VALID_NSECT;
-	}
-	tf->command = args[0];
-	cmd.protocol = args[3] ? ATA_PROT_PIO : ATA_PROT_NODATA;
-
-	if (args[3]) {
-		cmd.tf_flags |= IDE_TFLAG_IO_16BIT;
-		bufsize = SECTOR_SIZE * args[3];
-		buf = kzalloc(bufsize, GFP_KERNEL);
-		if (buf == NULL)
-			return -ENOMEM;
-	}
-
-	if (tf->command == ATA_CMD_SET_FEATURES &&
-	    tf->feature == SETFEATURES_XFER &&
-	    tf->nsect >= XFER_SW_DMA_0) {
-		xfer_rate = ide_find_dma_mode(drive, tf->nsect);
-		if (xfer_rate != tf->nsect) {
-			err = -EINVAL;
-			goto abort;
-		}
-
-		cmd.tf_flags |= IDE_TFLAG_SET_XFER;
-	}
-
-	err = ide_raw_taskfile(drive, &cmd, buf, args[3]);
-
-	args[0] = tf->status;
-	args[1] = tf->error;
-	args[2] = tf->nsect;
-abort:
-	if (copy_to_user(argp, &args, 4))
-		err = -EFAULT;
-	if (buf) {
-		if (copy_to_user((argp + 4), buf, bufsize))
-			err = -EFAULT;
-		kfree(buf);
-	}
-	return err;
-}
-
-static int ide_task_ioctl(ide_drive_t *drive, void __user *p)
-{
-	int err = 0;
-	u8 args[7];
-	struct ide_cmd cmd;
-
-	if (copy_from_user(args, p, 7))
-		return -EFAULT;
-
-	memset(&cmd, 0, sizeof(cmd));
-	memcpy(&cmd.tf.feature, &args[1], 6);
-	cmd.tf.command = args[0];
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-
-	err = ide_no_data_taskfile(drive, &cmd);
-
-	args[0] = cmd.tf.command;
-	memcpy(&args[1], &cmd.tf.feature, 6);
-
-	if (copy_to_user(p, args, 7))
-		err = -EFAULT;
-
-	return err;
-}
-
-static int generic_drive_reset(ide_drive_t *drive)
-{
-	struct request *rq;
-	int ret = 0;
-
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
-	ide_req(rq)->type = ATA_PRIV_MISC;
-	scsi_req(rq)->cmd_len = 1;
-	scsi_req(rq)->cmd[0] = REQ_DRIVE_RESET;
-	blk_execute_rq(NULL, rq, 1);
-	ret = scsi_req(rq)->result;
-	blk_put_request(rq);
-	return ret;
-}
-
-int generic_ide_ioctl(ide_drive_t *drive, struct block_device *bdev,
-		      unsigned int cmd, unsigned long arg)
-{
-	int err;
-	void __user *argp = (void __user *)arg;
-
-	if (in_compat_syscall())
-		argp = compat_ptr(arg);
-
-	err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_ioctl_settings);
-	if (err != -EOPNOTSUPP)
-		return err;
-
-	switch (cmd) {
-	case HDIO_OBSOLETE_IDENTITY:
-	case HDIO_GET_IDENTITY:
-		if (bdev_is_partition(bdev))
-			return -EINVAL;
-		return ide_get_identity_ioctl(drive, cmd, argp);
-	case HDIO_GET_NICE:
-		return ide_get_nice_ioctl(drive, arg);
-	case HDIO_SET_NICE:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EACCES;
-		return ide_set_nice_ioctl(drive, arg);
-#ifdef CONFIG_IDE_TASK_IOCTL
-	case HDIO_DRIVE_TASKFILE:
-		if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
-			return -EACCES;
-		/* missing compat handler for HDIO_DRIVE_TASKFILE */
-		if (in_compat_syscall())
-			return -ENOTTY;
-		if (drive->media == ide_disk)
-			return ide_taskfile_ioctl(drive, arg);
-		return -ENOMSG;
-#endif
-	case HDIO_DRIVE_CMD:
-		if (!capable(CAP_SYS_RAWIO))
-			return -EACCES;
-		return ide_cmd_ioctl(drive, argp);
-	case HDIO_DRIVE_TASK:
-		if (!capable(CAP_SYS_RAWIO))
-			return -EACCES;
-		return ide_task_ioctl(drive, argp);
-	case HDIO_DRIVE_RESET:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EACCES;
-		return generic_drive_reset(drive);
-	case HDIO_GET_BUSSTATE:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EACCES;
-		if (put_user_long(BUSSTATE_ON, arg))
-			return -EFAULT;
-		return 0;
-	case HDIO_SET_BUSSTATE:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EACCES;
-		return -EOPNOTSUPP;
-	default:
-		return -EINVAL;
-	}
-}
-EXPORT_SYMBOL(generic_ide_ioctl);
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
deleted file mode 100644
index f2be127ee96e6..0000000000000
--- a/drivers/ide/ide-iops.c
+++ /dev/null
@@ -1,536 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2000-2002	Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2003		Red Hat
- *
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/major.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/blkpg.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/ide.h>
-#include <linux/bitops.h>
-#include <linux/nmi.h>
-
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-#include <asm/io.h>
-
-void SELECT_MASK(ide_drive_t *drive, int mask)
-{
-	const struct ide_port_ops *port_ops = drive->hwif->port_ops;
-
-	if (port_ops && port_ops->maskproc)
-		port_ops->maskproc(drive, mask);
-}
-
-u8 ide_read_error(ide_drive_t *drive)
-{
-	struct ide_taskfile tf;
-
-	drive->hwif->tp_ops->tf_read(drive, &tf, IDE_VALID_ERROR);
-
-	return tf.error;
-}
-EXPORT_SYMBOL_GPL(ide_read_error);
-
-void ide_fix_driveid(u16 *id)
-{
-#ifndef __LITTLE_ENDIAN
-# ifdef __BIG_ENDIAN
-	int i;
-
-	for (i = 0; i < 256; i++)
-		id[i] = __le16_to_cpu(id[i]);
-# else
-#  error "Please fix <asm/byteorder.h>"
-# endif
-#endif
-}
-
-/*
- * ide_fixstring() cleans up and (optionally) byte-swaps a text string,
- * removing leading/trailing blanks and compressing internal blanks.
- * It is primarily used to tidy up the model name/number fields as
- * returned by the ATA_CMD_ID_ATA[PI] commands.
- */
-
-void ide_fixstring(u8 *s, const int bytecount, const int byteswap)
-{
-	u8 *p, *end = &s[bytecount & ~1]; /* bytecount must be even */
-
-	if (byteswap) {
-		/* convert from big-endian to host byte order */
-		for (p = s ; p != end ; p += 2)
-			be16_to_cpus((u16 *) p);
-	}
-
-	/* strip leading blanks */
-	p = s;
-	while (s != end && *s == ' ')
-		++s;
-	/* compress internal blanks and strip trailing blanks */
-	while (s != end && *s) {
-		if (*s++ != ' ' || (s != end && *s && *s != ' '))
-			*p++ = *(s-1);
-	}
-	/* wipe out trailing garbage */
-	while (p != end)
-		*p++ = '\0';
-}
-EXPORT_SYMBOL(ide_fixstring);
-
-/*
- * This routine busy-waits for the drive status to be not "busy".
- * It then checks the status for all of the "good" bits and none
- * of the "bad" bits, and if all is okay it returns 0.  All other
- * cases return error -- caller may then invoke ide_error().
- *
- * This routine should get fixed to not hog the cpu during extra long waits..
- * That could be done by busy-waiting for the first jiffy or two, and then
- * setting a timer to wake up at half second intervals thereafter,
- * until timeout is achieved, before timing out.
- */
-int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
-		    unsigned long timeout, u8 *rstat)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	unsigned long flags;
-	bool irqs_threaded = force_irqthreads;
-	int i;
-	u8 stat;
-
-	udelay(1);	/* spec allows drive 400ns to assert "BUSY" */
-	stat = tp_ops->read_status(hwif);
-
-	if (stat & ATA_BUSY) {
-		if (!irqs_threaded) {
-			local_save_flags(flags);
-			local_irq_enable_in_hardirq();
-		}
-		timeout += jiffies;
-		while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) {
-			if (time_after(jiffies, timeout)) {
-				/*
-				 * One last read after the timeout in case
-				 * heavy interrupt load made us not make any
-				 * progress during the timeout..
-				 */
-				stat = tp_ops->read_status(hwif);
-				if ((stat & ATA_BUSY) == 0)
-					break;
-
-				if (!irqs_threaded)
-					local_irq_restore(flags);
-				*rstat = stat;
-				return -EBUSY;
-			}
-		}
-		if (!irqs_threaded)
-			local_irq_restore(flags);
-	}
-	/*
-	 * Allow status to settle, then read it again.
-	 * A few rare drives vastly violate the 400ns spec here,
-	 * so we'll wait up to 10usec for a "good" status
-	 * rather than expensively fail things immediately.
-	 * This fix courtesy of Matthew Faupel & Niccolo Rigacci.
-	 */
-	for (i = 0; i < 10; i++) {
-		udelay(1);
-		stat = tp_ops->read_status(hwif);
-
-		if (OK_STAT(stat, good, bad)) {
-			*rstat = stat;
-			return 0;
-		}
-	}
-	*rstat = stat;
-	return -EFAULT;
-}
-
-/*
- * In case of error returns error value after doing "*startstop = ide_error()".
- * The caller should return the updated value of "startstop" in this case,
- * "startstop" is unchanged when the function returns 0.
- */
-int ide_wait_stat(ide_startstop_t *startstop, ide_drive_t *drive, u8 good,
-		  u8 bad, unsigned long timeout)
-{
-	int err;
-	u8 stat;
-
-	/* bail early if we've exceeded max_failures */
-	if (drive->max_failures && (drive->failures > drive->max_failures)) {
-		*startstop = ide_stopped;
-		return 1;
-	}
-
-	err = __ide_wait_stat(drive, good, bad, timeout, &stat);
-
-	if (err) {
-		char *s = (err == -EBUSY) ? "status timeout" : "status error";
-		*startstop = ide_error(drive, s, stat);
-	}
-
-	return err;
-}
-EXPORT_SYMBOL(ide_wait_stat);
-
-/**
- *	ide_in_drive_list	-	look for drive in black/white list
- *	@id: drive identifier
- *	@table: list to inspect
- *
- *	Look for a drive in the blacklist and the whitelist tables
- *	Returns 1 if the drive is found in the table.
- */
-
-int ide_in_drive_list(u16 *id, const struct drive_list_entry *table)
-{
-	for ( ; table->id_model; table++)
-		if ((!strcmp(table->id_model, (char *)&id[ATA_ID_PROD])) &&
-		    (!table->id_firmware ||
-		     strstr((char *)&id[ATA_ID_FW_REV], table->id_firmware)))
-			return 1;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_in_drive_list);
-
-/*
- * Early UDMA66 devices don't set bit14 to 1, only bit13 is valid.
- * Some optical devices with the buggy firmwares have the same problem.
- */
-static const struct drive_list_entry ivb_list[] = {
-	{ "QUANTUM FIREBALLlct10 05"	, "A03.0900"	},
-	{ "QUANTUM FIREBALLlct20 30"	, "APL.0900"	},
-	{ "TSSTcorp CDDVDW SH-S202J"	, "SB00"	},
-	{ "TSSTcorp CDDVDW SH-S202J"	, "SB01"	},
-	{ "TSSTcorp CDDVDW SH-S202N"	, "SB00"	},
-	{ "TSSTcorp CDDVDW SH-S202N"	, "SB01"	},
-	{ "TSSTcorp CDDVDW SH-S202H"	, "SB00"	},
-	{ "TSSTcorp CDDVDW SH-S202H"	, "SB01"	},
-	{ "SAMSUNG SP0822N"		, "WA100-10"	},
-	{ NULL				, NULL		}
-};
-
-/*
- *  All hosts that use the 80c ribbon must use!
- *  The name is derived from upper byte of word 93 and the 80c ribbon.
- */
-u8 eighty_ninty_three(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u16 *id = drive->id;
-	int ivb = ide_in_drive_list(id, ivb_list);
-
-	if (hwif->cbl == ATA_CBL_SATA || hwif->cbl == ATA_CBL_PATA40_SHORT)
-		return 1;
-
-	if (ivb)
-		printk(KERN_DEBUG "%s: skipping word 93 validity check\n",
-				  drive->name);
-
-	if (ata_id_is_sata(id) && !ivb)
-		return 1;
-
-	if (hwif->cbl != ATA_CBL_PATA80 && !ivb)
-		goto no_80w;
-
-	/*
-	 * FIXME:
-	 * - change master/slave IDENTIFY order
-	 * - force bit13 (80c cable present) check also for !ivb devices
-	 *   (unless the slave device is pre-ATA3)
-	 */
-	if (id[ATA_ID_HW_CONFIG] & 0x4000)
-		return 1;
-
-	if (ivb) {
-		const char *model = (char *)&id[ATA_ID_PROD];
-
-		if (strstr(model, "TSSTcorp CDDVDW SH-S202")) {
-			/*
-			 * These ATAPI devices always report 80c cable
-			 * so we have to depend on the host in this case.
-			 */
-			if (hwif->cbl == ATA_CBL_PATA80)
-				return 1;
-		} else {
-			/* Depend on the device side cable detection. */
-			if (id[ATA_ID_HW_CONFIG] & 0x2000)
-				return 1;
-		}
-	}
-no_80w:
-	if (drive->dev_flags & IDE_DFLAG_UDMA33_WARNED)
-		return 0;
-
-	printk(KERN_WARNING "%s: %s side 80-wire cable detection failed, "
-			    "limiting max speed to UDMA33\n",
-			    drive->name,
-			    hwif->cbl == ATA_CBL_PATA80 ? "drive" : "host");
-
-	drive->dev_flags |= IDE_DFLAG_UDMA33_WARNED;
-
-	return 0;
-}
-
-static const char *nien_quirk_list[] = {
-	"QUANTUM FIREBALLlct08 08",
-	"QUANTUM FIREBALLP KA6.4",
-	"QUANTUM FIREBALLP KA9.1",
-	"QUANTUM FIREBALLP KX13.6",
-	"QUANTUM FIREBALLP KX20.5",
-	"QUANTUM FIREBALLP KX27.3",
-	"QUANTUM FIREBALLP LM20.4",
-	"QUANTUM FIREBALLP LM20.5",
-	"FUJITSU MHZ2160BH G2",
-	NULL
-};
-
-void ide_check_nien_quirk_list(ide_drive_t *drive)
-{
-	const char **list, *m = (char *)&drive->id[ATA_ID_PROD];
-
-	for (list = nien_quirk_list; *list != NULL; list++)
-		if (strstr(m, *list) != NULL) {
-			drive->dev_flags |= IDE_DFLAG_NIEN_QUIRK;
-			return;
-		}
-}
-
-int ide_driveid_update(ide_drive_t *drive)
-{
-	u16 *id;
-	int rc;
-
-	id = kmalloc(SECTOR_SIZE, GFP_ATOMIC);
-	if (id == NULL)
-		return 0;
-
-	SELECT_MASK(drive, 1);
-	rc = ide_dev_read_id(drive, ATA_CMD_ID_ATA, id, 1);
-	SELECT_MASK(drive, 0);
-
-	if (rc)
-		goto out_err;
-
-	drive->id[ATA_ID_UDMA_MODES]  = id[ATA_ID_UDMA_MODES];
-	drive->id[ATA_ID_MWDMA_MODES] = id[ATA_ID_MWDMA_MODES];
-	drive->id[ATA_ID_SWDMA_MODES] = id[ATA_ID_SWDMA_MODES];
-	drive->id[ATA_ID_CFA_MODES]   = id[ATA_ID_CFA_MODES];
-	/* anything more ? */
-
-	kfree(id);
-
-	return 1;
-out_err:
-	if (rc == 2)
-		printk(KERN_ERR "%s: %s: bad status\n", drive->name, __func__);
-	kfree(id);
-	return 0;
-}
-
-int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	struct ide_taskfile tf;
-	u16 *id = drive->id, i;
-	int error = 0;
-	u8 stat;
-
-#ifdef CONFIG_BLK_DEV_IDEDMA
-	if (hwif->dma_ops)	/* check if host supports DMA */
-		hwif->dma_ops->dma_host_set(drive, 0);
-#endif
-
-	/* Skip setting PIO flow-control modes on pre-EIDE drives */
-	if ((speed & 0xf8) == XFER_PIO_0 && ata_id_has_iordy(drive->id) == 0)
-		goto skip;
-
-	/*
-	 * Don't use ide_wait_cmd here - it will
-	 * attempt to set_geometry and recalibrate,
-	 * but for some reason these don't work at
-	 * this point (lost interrupt).
-	 */
-
-	udelay(1);
-	tp_ops->dev_select(drive);
-	SELECT_MASK(drive, 1);
-	udelay(1);
-	tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS);
-
-	memset(&tf, 0, sizeof(tf));
-	tf.feature = SETFEATURES_XFER;
-	tf.nsect   = speed;
-
-	tp_ops->tf_load(drive, &tf, IDE_VALID_FEATURE | IDE_VALID_NSECT);
-
-	tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES);
-
-	if (drive->dev_flags & IDE_DFLAG_NIEN_QUIRK)
-		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
-
-	error = __ide_wait_stat(drive, drive->ready_stat,
-				ATA_BUSY | ATA_DRQ | ATA_ERR,
-				WAIT_CMD, &stat);
-
-	SELECT_MASK(drive, 0);
-
-	if (error) {
-		(void) ide_dump_status(drive, "set_drive_speed_status", stat);
-		return error;
-	}
-
-	if (speed >= XFER_SW_DMA_0) {
-		id[ATA_ID_UDMA_MODES]  &= ~0xFF00;
-		id[ATA_ID_MWDMA_MODES] &= ~0x0700;
-		id[ATA_ID_SWDMA_MODES] &= ~0x0700;
-		if (ata_id_is_cfa(id))
-			id[ATA_ID_CFA_MODES] &= ~0x0E00;
-	} else	if (ata_id_is_cfa(id))
-		id[ATA_ID_CFA_MODES] &= ~0x01C0;
-
- skip:
-#ifdef CONFIG_BLK_DEV_IDEDMA
-	if (speed >= XFER_SW_DMA_0 && (drive->dev_flags & IDE_DFLAG_USING_DMA))
-		hwif->dma_ops->dma_host_set(drive, 1);
-	else if (hwif->dma_ops)	/* check if host supports DMA */
-		ide_dma_off_quietly(drive);
-#endif
-
-	if (speed >= XFER_UDMA_0) {
-		i = 1 << (speed - XFER_UDMA_0);
-		id[ATA_ID_UDMA_MODES] |= (i << 8 | i);
-	} else if (ata_id_is_cfa(id) && speed >= XFER_MW_DMA_3) {
-		i = speed - XFER_MW_DMA_2;
-		id[ATA_ID_CFA_MODES] |= i << 9;
-	} else if (speed >= XFER_MW_DMA_0) {
-		i = 1 << (speed - XFER_MW_DMA_0);
-		id[ATA_ID_MWDMA_MODES] |= (i << 8 | i);
-	} else if (speed >= XFER_SW_DMA_0) {
-		i = 1 << (speed - XFER_SW_DMA_0);
-		id[ATA_ID_SWDMA_MODES] |= (i << 8 | i);
-	} else if (ata_id_is_cfa(id) && speed >= XFER_PIO_5) {
-		i = speed - XFER_PIO_4;
-		id[ATA_ID_CFA_MODES] |= i << 6;
-	}
-
-	if (!drive->init_speed)
-		drive->init_speed = speed;
-	drive->current_speed = speed;
-	return error;
-}
-
-/*
- * This should get invoked any time we exit the driver to
- * wait for an interrupt response from a drive.  handler() points
- * at the appropriate code to handle the next interrupt, and a
- * timer is started to prevent us from waiting forever in case
- * something goes wrong (see the ide_timer_expiry() handler later on).
- *
- * See also ide_execute_command
- */
-void __ide_set_handler(ide_drive_t *drive, ide_handler_t *handler,
-		       unsigned int timeout)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	BUG_ON(hwif->handler);
-	hwif->handler		= handler;
-	hwif->timer.expires	= jiffies + timeout;
-	hwif->req_gen_timer	= hwif->req_gen;
-	add_timer(&hwif->timer);
-}
-
-void ide_set_handler(ide_drive_t *drive, ide_handler_t *handler,
-		     unsigned int timeout)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hwif->lock, flags);
-	__ide_set_handler(drive, handler, timeout);
-	spin_unlock_irqrestore(&hwif->lock, flags);
-}
-EXPORT_SYMBOL(ide_set_handler);
-
-/**
- *	ide_execute_command	-	execute an IDE command
- *	@drive: IDE drive to issue the command against
- *	@cmd: command
- *	@handler: handler for next phase
- *	@timeout: timeout for command
- *
- *	Helper function to issue an IDE command. This handles the
- *	atomicity requirements, command timing and ensures that the
- *	handler and IRQ setup do not race. All IDE command kick off
- *	should go via this function or do equivalent locking.
- */
-
-void ide_execute_command(ide_drive_t *drive, struct ide_cmd *cmd,
-			 ide_handler_t *handler, unsigned timeout)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hwif->lock, flags);
-	if ((cmd->protocol != ATAPI_PROT_DMA &&
-	     cmd->protocol != ATAPI_PROT_PIO) ||
-	    (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT))
-		__ide_set_handler(drive, handler, timeout);
-	hwif->tp_ops->exec_command(hwif, cmd->tf.command);
-	/*
-	 * Drive takes 400nS to respond, we must avoid the IRQ being
-	 * serviced before that.
-	 *
-	 * FIXME: we could skip this delay with care on non shared devices
-	 */
-	ndelay(400);
-	spin_unlock_irqrestore(&hwif->lock, flags);
-}
-
-/*
- * ide_wait_not_busy() waits for the currently selected device on the hwif
- * to report a non-busy status, see comments in ide_probe_port().
- */
-int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout)
-{
-	u8 stat = 0;
-
-	while (timeout--) {
-		/*
-		 * Turn this into a schedule() sleep once I'm sure
-		 * about locking issues (2.5 work ?).
-		 */
-		mdelay(1);
-		stat = hwif->tp_ops->read_status(hwif);
-		if ((stat & ATA_BUSY) == 0)
-			return 0;
-		/*
-		 * Assume a value of 0xff means nothing is connected to
-		 * the interface and it doesn't implement the pull-down
-		 * resistor on D7.
-		 */
-		if (stat == 0xff)
-			return -ENODEV;
-		touch_nmi_watchdog();
-	}
-	return -EBUSY;
-}
diff --git a/drivers/ide/ide-legacy.c b/drivers/ide/ide-legacy.c
deleted file mode 100644
index be65b411ab53b..0000000000000
--- a/drivers/ide/ide-legacy.c
+++ /dev/null
@@ -1,59 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/ide.h>
-
-static void ide_legacy_init_one(struct ide_hw **hws, struct ide_hw *hw,
-				u8 port_no, const struct ide_port_info *d,
-				unsigned long config)
-{
-	unsigned long base, ctl;
-	int irq;
-
-	if (port_no == 0) {
-		base = 0x1f0;
-		ctl  = 0x3f6;
-		irq  = 14;
-	} else {
-		base = 0x170;
-		ctl  = 0x376;
-		irq  = 15;
-	}
-
-	if (!request_region(base, 8, d->name)) {
-		printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
-				d->name, base, base + 7);
-		return;
-	}
-
-	if (!request_region(ctl, 1, d->name)) {
-		printk(KERN_ERR "%s: I/O resource 0x%lX not free.\n",
-				d->name, ctl);
-		release_region(base, 8);
-		return;
-	}
-
-	ide_std_init_ports(hw, base, ctl);
-	hw->irq = irq;
-	hw->config = config;
-
-	hws[port_no] = hw;
-}
-
-int ide_legacy_device_add(const struct ide_port_info *d, unsigned long config)
-{
-	struct ide_hw hw[2], *hws[] = { NULL, NULL };
-
-	memset(&hw, 0, sizeof(hw));
-
-	if ((d->host_flags & IDE_HFLAG_QD_2ND_PORT) == 0)
-		ide_legacy_init_one(hws, &hw[0], 0, d, config);
-	ide_legacy_init_one(hws, &hw[1], 1, d, config);
-
-	if (hws[0] == NULL && hws[1] == NULL &&
-	    (d->host_flags & IDE_HFLAG_SINGLE))
-		return -ENOENT;
-
-	return ide_host_add(d, hws, 2, NULL);
-}
-EXPORT_SYMBOL_GPL(ide_legacy_device_add);
diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c
deleted file mode 100644
index 7b9f655adbc2d..0000000000000
--- a/drivers/ide/ide-lib.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/interrupt.h>
-#include <linux/ide.h>
-#include <linux/bitops.h>
-
-u64 ide_get_lba_addr(struct ide_cmd *cmd, int lba48)
-{
-	struct ide_taskfile *tf = &cmd->tf;
-	u32 high, low;
-
-	low  = (tf->lbah << 16) | (tf->lbam << 8) | tf->lbal;
-	if (lba48) {
-		tf = &cmd->hob;
-		high = (tf->lbah << 16) | (tf->lbam << 8) | tf->lbal;
-	} else
-		high = tf->device & 0xf;
-
-	return ((u64)high << 24) | low;
-}
-EXPORT_SYMBOL_GPL(ide_get_lba_addr);
-
-static void ide_dump_sector(ide_drive_t *drive)
-{
-	struct ide_cmd cmd;
-	struct ide_taskfile *tf = &cmd.tf;
-	u8 lba48 = !!(drive->dev_flags & IDE_DFLAG_LBA48);
-
-	memset(&cmd, 0, sizeof(cmd));
-	if (lba48) {
-		cmd.valid.in.tf  = IDE_VALID_LBA;
-		cmd.valid.in.hob = IDE_VALID_LBA;
-		cmd.tf_flags = IDE_TFLAG_LBA48;
-	} else
-		cmd.valid.in.tf  = IDE_VALID_LBA | IDE_VALID_DEVICE;
-
-	ide_tf_readback(drive, &cmd);
-
-	if (lba48 || (tf->device & ATA_LBA))
-		printk(KERN_CONT ", LBAsect=%llu",
-			(unsigned long long)ide_get_lba_addr(&cmd, lba48));
-	else
-		printk(KERN_CONT ", CHS=%d/%d/%d", (tf->lbah << 8) + tf->lbam,
-			tf->device & 0xf, tf->lbal);
-}
-
-static void ide_dump_ata_error(ide_drive_t *drive, u8 err)
-{
-	printk(KERN_CONT "{ ");
-	if (err & ATA_ABORTED)
-		printk(KERN_CONT "DriveStatusError ");
-	if (err & ATA_ICRC)
-		printk(KERN_CONT "%s",
-			(err & ATA_ABORTED) ? "BadCRC " : "BadSector ");
-	if (err & ATA_UNC)
-		printk(KERN_CONT "UncorrectableError ");
-	if (err & ATA_IDNF)
-		printk(KERN_CONT "SectorIdNotFound ");
-	if (err & ATA_TRK0NF)
-		printk(KERN_CONT "TrackZeroNotFound ");
-	if (err & ATA_AMNF)
-		printk(KERN_CONT "AddrMarkNotFound ");
-	printk(KERN_CONT "}");
-	if ((err & (ATA_BBK | ATA_ABORTED)) == ATA_BBK ||
-	    (err & (ATA_UNC | ATA_IDNF | ATA_AMNF))) {
-		struct request *rq = drive->hwif->rq;
-
-		ide_dump_sector(drive);
-
-		if (rq)
-			printk(KERN_CONT ", sector=%llu",
-			       (unsigned long long)blk_rq_pos(rq));
-	}
-	printk(KERN_CONT "\n");
-}
-
-static void ide_dump_atapi_error(ide_drive_t *drive, u8 err)
-{
-	printk(KERN_CONT "{ ");
-	if (err & ATAPI_ILI)
-		printk(KERN_CONT "IllegalLengthIndication ");
-	if (err & ATAPI_EOM)
-		printk(KERN_CONT "EndOfMedia ");
-	if (err & ATA_ABORTED)
-		printk(KERN_CONT "AbortedCommand ");
-	if (err & ATA_MCR)
-		printk(KERN_CONT "MediaChangeRequested ");
-	if (err & ATAPI_LFS)
-		printk(KERN_CONT "LastFailedSense=0x%02x ",
-			(err & ATAPI_LFS) >> 4);
-	printk(KERN_CONT "}\n");
-}
-
-/**
- *	ide_dump_status		-	translate ATA/ATAPI error
- *	@drive: drive that status applies to
- *	@msg: text message to print
- *	@stat: status byte to decode
- *
- *	Error reporting, in human readable form (luxurious, but a memory hog).
- *	Combines the drive name, message and status byte to provide a
- *	user understandable explanation of the device error.
- */
-
-u8 ide_dump_status(ide_drive_t *drive, const char *msg, u8 stat)
-{
-	u8 err = 0;
-
-	printk(KERN_ERR "%s: %s: status=0x%02x { ", drive->name, msg, stat);
-	if (stat & ATA_BUSY)
-		printk(KERN_CONT "Busy ");
-	else {
-		if (stat & ATA_DRDY)
-			printk(KERN_CONT "DriveReady ");
-		if (stat & ATA_DF)
-			printk(KERN_CONT "DeviceFault ");
-		if (stat & ATA_DSC)
-			printk(KERN_CONT "SeekComplete ");
-		if (stat & ATA_DRQ)
-			printk(KERN_CONT "DataRequest ");
-		if (stat & ATA_CORR)
-			printk(KERN_CONT "CorrectedError ");
-		if (stat & ATA_SENSE)
-			printk(KERN_CONT "Sense ");
-		if (stat & ATA_ERR)
-			printk(KERN_CONT "Error ");
-	}
-	printk(KERN_CONT "}\n");
-	if ((stat & (ATA_BUSY | ATA_ERR)) == ATA_ERR) {
-		err = ide_read_error(drive);
-		printk(KERN_ERR "%s: %s: error=0x%02x ", drive->name, msg, err);
-		if (drive->media == ide_disk)
-			ide_dump_ata_error(drive, err);
-		else
-			ide_dump_atapi_error(drive, err);
-	}
-
-	printk(KERN_ERR "%s: possibly failed opcode: 0x%02x\n",
-		drive->name, drive->hwif->cmd.tf.command);
-
-	return err;
-}
-EXPORT_SYMBOL(ide_dump_status);
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
deleted file mode 100644
index a80a0f28f7b9e..0000000000000
--- a/drivers/ide/ide-park.c
+++ /dev/null
@@ -1,155 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/ide.h>
-#include <linux/jiffies.h>
-#include <linux/blkdev.h>
-
-DECLARE_WAIT_QUEUE_HEAD(ide_park_wq);
-
-static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct request_queue *q = drive->queue;
-	struct request *rq;
-	int rc;
-
-	timeout += jiffies;
-	spin_lock_irq(&hwif->lock);
-	if (drive->dev_flags & IDE_DFLAG_PARKED) {
-		int reset_timer = time_before(timeout, drive->sleep);
-		int start_queue = 0;
-
-		drive->sleep = timeout;
-		wake_up_all(&ide_park_wq);
-		if (reset_timer && del_timer(&hwif->timer))
-			start_queue = 1;
-		spin_unlock_irq(&hwif->lock);
-
-		if (start_queue)
-			blk_mq_run_hw_queues(q, true);
-		return;
-	}
-	spin_unlock_irq(&hwif->lock);
-
-	rq = blk_get_request(q, REQ_OP_DRV_IN, 0);
-	scsi_req(rq)->cmd[0] = REQ_PARK_HEADS;
-	scsi_req(rq)->cmd_len = 1;
-	ide_req(rq)->type = ATA_PRIV_MISC;
-	ide_req(rq)->special = &timeout;
-	blk_execute_rq(NULL, rq, 1);
-	rc = scsi_req(rq)->result ? -EIO : 0;
-	blk_put_request(rq);
-	if (rc)
-		goto out;
-
-	/*
-	 * Make sure that *some* command is sent to the drive after the
-	 * timeout has expired, so power management will be reenabled.
-	 */
-	rq = blk_get_request(q, REQ_OP_DRV_IN, BLK_MQ_REQ_NOWAIT);
-	if (IS_ERR(rq))
-		goto out;
-
-	scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS;
-	scsi_req(rq)->cmd_len = 1;
-	ide_req(rq)->type = ATA_PRIV_MISC;
-	spin_lock_irq(&hwif->lock);
-	ide_insert_request_head(drive, rq);
-	spin_unlock_irq(&hwif->lock);
-
-out:
-	return;
-}
-
-ide_startstop_t ide_do_park_unpark(ide_drive_t *drive, struct request *rq)
-{
-	struct ide_cmd cmd;
-	struct ide_taskfile *tf = &cmd.tf;
-
-	memset(&cmd, 0, sizeof(cmd));
-	if (scsi_req(rq)->cmd[0] == REQ_PARK_HEADS) {
-		drive->sleep = *(unsigned long *)ide_req(rq)->special;
-		drive->dev_flags |= IDE_DFLAG_SLEEPING;
-		tf->command = ATA_CMD_IDLEIMMEDIATE;
-		tf->feature = 0x44;
-		tf->lbal = 0x4c;
-		tf->lbam = 0x4e;
-		tf->lbah = 0x55;
-		cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-		cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	} else		/* cmd == REQ_UNPARK_HEADS */
-		tf->command = ATA_CMD_CHK_POWER;
-
-	cmd.tf_flags |= IDE_TFLAG_CUSTOM_HANDLER;
-	cmd.protocol = ATA_PROT_NODATA;
-
-	cmd.rq = rq;
-
-	return do_rw_taskfile(drive, &cmd);
-}
-
-ssize_t ide_park_show(struct device *dev, struct device_attribute *attr,
-		      char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned long now;
-	unsigned int msecs;
-
-	if (drive->dev_flags & IDE_DFLAG_NO_UNLOAD)
-		return -EOPNOTSUPP;
-
-	spin_lock_irq(&hwif->lock);
-	now = jiffies;
-	if (drive->dev_flags & IDE_DFLAG_PARKED &&
-	    time_after(drive->sleep, now))
-		msecs = jiffies_to_msecs(drive->sleep - now);
-	else
-		msecs = 0;
-	spin_unlock_irq(&hwif->lock);
-
-	return snprintf(buf, 20, "%u\n", msecs);
-}
-
-ssize_t ide_park_store(struct device *dev, struct device_attribute *attr,
-		       const char *buf, size_t len)
-{
-#define MAX_PARK_TIMEOUT 30000
-	ide_drive_t *drive = to_ide_device(dev);
-	long int input;
-	int rc;
-
-	rc = kstrtol(buf, 10, &input);
-	if (rc)
-		return rc;
-	if (input < -2)
-		return -EINVAL;
-	if (input > MAX_PARK_TIMEOUT) {
-		input = MAX_PARK_TIMEOUT;
-		rc = -EOVERFLOW;
-	}
-
-	mutex_lock(&ide_setting_mtx);
-	if (input >= 0) {
-		if (drive->dev_flags & IDE_DFLAG_NO_UNLOAD)
-			rc = -EOPNOTSUPP;
-		else if (input || drive->dev_flags & IDE_DFLAG_PARKED)
-			issue_park_cmd(drive, msecs_to_jiffies(input));
-	} else {
-		if (drive->media == ide_disk)
-			switch (input) {
-			case -1:
-				drive->dev_flags &= ~IDE_DFLAG_NO_UNLOAD;
-				break;
-			case -2:
-				drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
-				break;
-			}
-		else
-			rc = -EOPNOTSUPP;
-	}
-	mutex_unlock(&ide_setting_mtx);
-
-	return rc ? rc : len;
-}
diff --git a/drivers/ide/ide-pci-generic.c b/drivers/ide/ide-pci-generic.c
deleted file mode 100644
index 673420db953f2..0000000000000
--- a/drivers/ide/ide-pci-generic.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- *  Copyright (C) 2001-2002	Andre Hedrick <andre@linux-ide.org>
- *  Portions (C) Copyright 2002  Red Hat Inc
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * For the avoidance of doubt the "preferred form" of this code is one which
- * is in an open non patent encumbered format. Where cryptographic key signing
- * forms part of the process of creating an executable the information
- * including keys needed to generate an equivalently functional executable
- * are deemed to be part of the source code.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define DRV_NAME "ide_pci_generic"
-
-static bool ide_generic_all;		/* Set to claim all devices */
-
-module_param_named(all_generic_ide, ide_generic_all, bool, 0444);
-MODULE_PARM_DESC(all_generic_ide, "IDE generic will claim all unknown PCI IDE storage controllers.");
-
-static void netcell_quirkproc(ide_drive_t *drive)
-{
-	/* mark words 85-87 as valid */
-	drive->id[ATA_ID_CSF_DEFAULT] |= 0x4000;
-}
-
-static const struct ide_port_ops netcell_port_ops = {
-	.quirkproc		= netcell_quirkproc,
-};
-
-#define DECLARE_GENERIC_PCI_DEV(extra_flags) \
-	{ \
-		.name		= DRV_NAME, \
-		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA | \
-				  extra_flags, \
-		.swdma_mask	= ATA_SWDMA2, \
-		.mwdma_mask	= ATA_MWDMA2, \
-		.udma_mask	= ATA_UDMA6, \
-	}
-
-static const struct ide_port_info generic_chipsets[] = {
-	/*  0: Unknown */
-	DECLARE_GENERIC_PCI_DEV(0),
-
-	{	/* 1: NS87410 */
-		.name		= DRV_NAME,
-		.enablebits	= { {0x43, 0x08, 0x08}, {0x47, 0x08, 0x08} },
-		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA,
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA6,
-	},
-
-	/*  2: SAMURAI / HT6565 / HINT_IDE */
-	DECLARE_GENERIC_PCI_DEV(0),
-	/*  3: UM8673F / UM8886A / UM8886BF */
-	DECLARE_GENERIC_PCI_DEV(IDE_HFLAG_NO_DMA),
-	/*  4: VIA_IDE / OPTI621V / Piccolo010{2,3,5} */
-	DECLARE_GENERIC_PCI_DEV(IDE_HFLAG_NO_AUTODMA),
-
-	{	/* 5: VIA8237SATA */
-		.name		= DRV_NAME,
-		.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA |
-				  IDE_HFLAG_OFF_BOARD,
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA6,
-	},
-
-	{	/* 6: Revolution */
-		.name		= DRV_NAME,
-		.port_ops	= &netcell_port_ops,
-		.host_flags	= IDE_HFLAG_CLEAR_SIMPLEX |
-				  IDE_HFLAG_TRUST_BIOS_FOR_DMA |
-				  IDE_HFLAG_OFF_BOARD,
-		.swdma_mask	= ATA_SWDMA2,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA6,
-	}
-};
-
-/**
- *	generic_init_one	-	called when a PIIX is found
- *	@dev: the generic device
- *	@id: the matching pci id
- *
- *	Called when the PCI registration layer (or the IDE initialization)
- *	finds a device matching our IDE device tables.
- */
-
-static int generic_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	const struct ide_port_info *d = &generic_chipsets[id->driver_data];
-	int ret = -ENODEV;
-
-	/* Don't use the generic entry unless instructed to do so */
-	if (id->driver_data == 0 && ide_generic_all == 0)
-			goto out;
-
-	switch (dev->vendor) {
-	case PCI_VENDOR_ID_UMC:
-		if (dev->device == PCI_DEVICE_ID_UMC_UM8886A &&
-				!(PCI_FUNC(dev->devfn) & 1))
-			goto out; /* UM8886A/BF pair */
-		break;
-	case PCI_VENDOR_ID_OPTI:
-		if (dev->device == PCI_DEVICE_ID_OPTI_82C558 &&
-				!(PCI_FUNC(dev->devfn) & 1))
-			goto out;
-		break;
-	case PCI_VENDOR_ID_JMICRON:
-		if (dev->device != PCI_DEVICE_ID_JMICRON_JMB368 &&
-				PCI_FUNC(dev->devfn) != 1)
-			goto out;
-		break;
-	case PCI_VENDOR_ID_NS:
-		if (dev->device == PCI_DEVICE_ID_NS_87410 &&
-				(dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
-			goto out;
-		break;
-	}
-
-	if (dev->vendor != PCI_VENDOR_ID_JMICRON) {
-		u16 command;
-		pci_read_config_word(dev, PCI_COMMAND, &command);
-		if (!(command & PCI_COMMAND_IO)) {
-			printk(KERN_INFO "%s %s: skipping disabled "
-				"controller\n", d->name, pci_name(dev));
-			goto out;
-		}
-	}
-	ret = ide_pci_init_one(dev, d, NULL);
-out:
-	return ret;
-}
-
-static const struct pci_device_id generic_pci_tbl[] = {
-	{ PCI_VDEVICE(NS,	PCI_DEVICE_ID_NS_87410),		 1 },
-	{ PCI_VDEVICE(PCTECH,	PCI_DEVICE_ID_PCTECH_SAMURAI_IDE),	 2 },
-	{ PCI_VDEVICE(HOLTEK,	PCI_DEVICE_ID_HOLTEK_6565),		 2 },
-	{ PCI_VDEVICE(UMC,	PCI_DEVICE_ID_UMC_UM8673F),		 3 },
-	{ PCI_VDEVICE(UMC,	PCI_DEVICE_ID_UMC_UM8886A),		 3 },
-	{ PCI_VDEVICE(UMC,	PCI_DEVICE_ID_UMC_UM8886BF),		 3 },
-	{ PCI_VDEVICE(HINT,	PCI_DEVICE_ID_HINT_VXPROII_IDE),	 2 },
-	{ PCI_VDEVICE(VIA,	PCI_DEVICE_ID_VIA_82C561),		 4 },
-	{ PCI_VDEVICE(OPTI,	PCI_DEVICE_ID_OPTI_82C558),		 4 },
-#ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VDEVICE(VIA,	PCI_DEVICE_ID_VIA_8237_SATA),		 5 },
-#endif
-	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_1),	 4 },
-	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_2),	 4 },
-	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_3),	 4 },
-	{ PCI_VDEVICE(TOSHIBA,	PCI_DEVICE_ID_TOSHIBA_PICCOLO_5),	 4 },
-	{ PCI_VDEVICE(NETCELL,	PCI_DEVICE_ID_REVOLUTION),		 6 },
-	/*
-	 * Must come last.  If you add entries adjust
-	 * this table and generic_chipsets[] appropriately.
-	 */
-	{ PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, generic_pci_tbl);
-
-static struct pci_driver generic_pci_driver = {
-	.name		= "PCI_IDE",
-	.id_table	= generic_pci_tbl,
-	.probe		= generic_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init generic_ide_init(void)
-{
-	return ide_pci_register_driver(&generic_pci_driver);
-}
-
-static void __exit generic_ide_exit(void)
-{
-	pci_unregister_driver(&generic_pci_driver);
-}
-
-module_init(generic_ide_init);
-module_exit(generic_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for generic PCI IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-pio-blacklist.c b/drivers/ide/ide-pio-blacklist.c
deleted file mode 100644
index 1fd24798e5c90..0000000000000
--- a/drivers/ide/ide-pio-blacklist.c
+++ /dev/null
@@ -1,96 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * PIO blacklist.  Some drives incorrectly report their maximal PIO mode,
- * at least in respect to CMD640.  Here we keep info on some known drives.
- *
- * Changes to the ide_pio_blacklist[] should be made with EXTREME CAUTION
- * to avoid breaking the fragile cmd640.c support.
- */
-
-#include <linux/string.h>
-#include <linux/ide.h>
-
-static struct ide_pio_info {
-	const char	*name;
-	int		pio;
-} ide_pio_blacklist [] = {
-	{ "Conner Peripherals 540MB - CFS540A", 3 },
-
-	{ "WDC AC2700",  3 },
-	{ "WDC AC2540",  3 },
-	{ "WDC AC2420",  3 },
-	{ "WDC AC2340",  3 },
-	{ "WDC AC2250",  0 },
-	{ "WDC AC2200",  0 },
-	{ "WDC AC21200", 4 },
-	{ "WDC AC2120",  0 },
-	{ "WDC AC2850",  3 },
-	{ "WDC AC1270",  3 },
-	{ "WDC AC1170",  1 },
-	{ "WDC AC1210",  1 },
-	{ "WDC AC280",   0 },
-	{ "WDC AC31000", 3 },
-	{ "WDC AC31200", 3 },
-
-	{ "Maxtor 7131 AT", 1 },
-	{ "Maxtor 7171 AT", 1 },
-	{ "Maxtor 7213 AT", 1 },
-	{ "Maxtor 7245 AT", 1 },
-	{ "Maxtor 7345 AT", 1 },
-	{ "Maxtor 7546 AT", 3 },
-	{ "Maxtor 7540 AV", 3 },
-
-	{ "SAMSUNG SHD-3121A", 1 },
-	{ "SAMSUNG SHD-3122A", 1 },
-	{ "SAMSUNG SHD-3172A", 1 },
-
-	{ "ST5660A",  3 },
-	{ "ST3660A",  3 },
-	{ "ST3630A",  3 },
-	{ "ST3655A",  3 },
-	{ "ST3391A",  3 },
-	{ "ST3390A",  1 },
-	{ "ST3600A",  1 },
-	{ "ST3290A",  0 },
-	{ "ST3144A",  0 },
-	{ "ST3491A",  1 }, /* reports 3, should be 1 or 2 (depending on drive)
-			      according to Seagate's FIND-ATA program */
-
-	{ "QUANTUM ELS127A", 0 },
-	{ "QUANTUM ELS170A", 0 },
-	{ "QUANTUM LPS240A", 0 },
-	{ "QUANTUM LPS210A", 3 },
-	{ "QUANTUM LPS270A", 3 },
-	{ "QUANTUM LPS365A", 3 },
-	{ "QUANTUM LPS540A", 3 },
-	{ "QUANTUM LIGHTNING 540A", 3 },
-	{ "QUANTUM LIGHTNING 730A", 3 },
-
-	{ "QUANTUM FIREBALL_540", 3 }, /* Older Quantum Fireballs don't work */
-	{ "QUANTUM FIREBALL_640", 3 },
-	{ "QUANTUM FIREBALL_1080", 3 },
-	{ "QUANTUM FIREBALL_1280", 3 },
-	{ NULL, 0 }
-};
-
-/**
- *	ide_scan_pio_blacklist 	-	check for a blacklisted drive
- *	@model: Drive model string
- *
- *	This routine searches the ide_pio_blacklist for an entry
- *	matching the start/whole of the supplied model name.
- *
- *	Returns -1 if no match found.
- *	Otherwise returns the recommended PIO mode from ide_pio_blacklist[].
- */
-
-int ide_scan_pio_blacklist(char *model)
-{
-	struct ide_pio_info *p;
-
-	for (p = ide_pio_blacklist; p->name != NULL; p++) {
-		if (strncmp(p->name, model, strlen(p->name)) == 0)
-			return p->pio;
-	}
-	return -1;
-}
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
deleted file mode 100644
index d680b3e3295fa..0000000000000
--- a/drivers/ide/ide-pm.c
+++ /dev/null
@@ -1,261 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/ide.h>
-
-int generic_ide_suspend(struct device *dev, pm_message_t mesg)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	ide_drive_t *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq;
-	struct ide_pm_state rqpm;
-	int ret;
-
-	if (ide_port_acpi(hwif)) {
-		/* call ACPI _GTM only once */
-		if ((drive->dn & 1) == 0 || pair == NULL)
-			ide_acpi_get_timing(hwif);
-	}
-
-	memset(&rqpm, 0, sizeof(rqpm));
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
-	ide_req(rq)->type = ATA_PRIV_PM_SUSPEND;
-	ide_req(rq)->special = &rqpm;
-	rqpm.pm_step = IDE_PM_START_SUSPEND;
-	if (mesg.event == PM_EVENT_PRETHAW)
-		mesg.event = PM_EVENT_FREEZE;
-	rqpm.pm_state = mesg.event;
-
-	blk_execute_rq(NULL, rq, 0);
-	ret = scsi_req(rq)->result ? -EIO : 0;
-	blk_put_request(rq);
-
-	if (ret == 0 && ide_port_acpi(hwif)) {
-		/* call ACPI _PS3 only after both devices are suspended */
-		if ((drive->dn & 1) || pair == NULL)
-			ide_acpi_set_state(hwif, 0);
-	}
-
-	return ret;
-}
-
-static int ide_pm_execute_rq(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-
-	if (unlikely(blk_queue_dying(q))) {
-		rq->rq_flags |= RQF_QUIET;
-		scsi_req(rq)->result = -ENXIO;
-		blk_mq_end_request(rq, BLK_STS_OK);
-		return -ENXIO;
-	}
-	blk_execute_rq(NULL, rq, true);
-
-	return scsi_req(rq)->result ? -EIO : 0;
-}
-
-int generic_ide_resume(struct device *dev)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	ide_drive_t *pair = ide_get_pair_dev(drive);
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq;
-	struct ide_pm_state rqpm;
-	int err;
-
-	blk_mq_start_stopped_hw_queues(drive->queue, true);
-
-	if (ide_port_acpi(hwif)) {
-		/* call ACPI _PS0 / _STM only once */
-		if ((drive->dn & 1) == 0 || pair == NULL) {
-			ide_acpi_set_state(hwif, 1);
-			ide_acpi_push_timing(hwif);
-		}
-
-		ide_acpi_exec_tfs(drive);
-	}
-
-	memset(&rqpm, 0, sizeof(rqpm));
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PM);
-	ide_req(rq)->type = ATA_PRIV_PM_RESUME;
-	ide_req(rq)->special = &rqpm;
-	rqpm.pm_step = IDE_PM_START_RESUME;
-	rqpm.pm_state = PM_EVENT_ON;
-
-	err = ide_pm_execute_rq(rq);
-	blk_put_request(rq);
-
-	if (err == 0 && dev->driver) {
-		struct ide_driver *drv = to_ide_driver(dev->driver);
-
-		if (drv->resume)
-			drv->resume(drive);
-	}
-
-	return err;
-}
-
-void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
-{
-	struct ide_pm_state *pm = ide_req(rq)->special;
-
-#ifdef DEBUG_PM
-	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
-		drive->name, pm->pm_step);
-#endif
-	if (drive->media != ide_disk)
-		return;
-
-	switch (pm->pm_step) {
-	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
-		if (pm->pm_state == PM_EVENT_FREEZE)
-			pm->pm_step = IDE_PM_COMPLETED;
-		else
-			pm->pm_step = IDE_PM_STANDBY;
-		break;
-	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		pm->pm_step = IDE_PM_COMPLETED;
-		break;
-	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
-		pm->pm_step = IDE_PM_IDLE;
-		break;
-	case IDE_PM_IDLE:		/* Resume step 2 (idle)*/
-		pm->pm_step = IDE_PM_RESTORE_DMA;
-		break;
-	}
-}
-
-ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
-{
-	struct ide_pm_state *pm = ide_req(rq)->special;
-	struct ide_cmd cmd = { };
-
-	switch (pm->pm_step) {
-	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
-		if (drive->media != ide_disk)
-			break;
-		/* Not supported? Switch to next step now. */
-		if (ata_id_flush_enabled(drive->id) == 0 ||
-		    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) {
-			ide_complete_power_step(drive, rq);
-			return ide_stopped;
-		}
-		if (ata_id_flush_ext_enabled(drive->id))
-			cmd.tf.command = ATA_CMD_FLUSH_EXT;
-		else
-			cmd.tf.command = ATA_CMD_FLUSH;
-		goto out_do_tf;
-	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
-		cmd.tf.command = ATA_CMD_STANDBYNOW1;
-		goto out_do_tf;
-	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
-		ide_set_max_pio(drive);
-		/*
-		 * skip IDE_PM_IDLE for ATAPI devices
-		 */
-		if (drive->media != ide_disk)
-			pm->pm_step = IDE_PM_RESTORE_DMA;
-		else
-			ide_complete_power_step(drive, rq);
-		return ide_stopped;
-	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
-		cmd.tf.command = ATA_CMD_IDLEIMMEDIATE;
-		goto out_do_tf;
-	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
-		/*
-		 * Right now, all we do is call ide_set_dma(drive),
-		 * we could be smarter and check for current xfer_speed
-		 * in struct drive etc...
-		 */
-		if (drive->hwif->dma_ops == NULL)
-			break;
-		/*
-		 * TODO: respect IDE_DFLAG_USING_DMA
-		 */
-		ide_set_dma(drive);
-		break;
-	}
-
-	pm->pm_step = IDE_PM_COMPLETED;
-
-	return ide_stopped;
-
-out_do_tf:
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	cmd.protocol = ATA_PROT_NODATA;
-
-	return do_rw_taskfile(drive, &cmd);
-}
-
-/**
- *	ide_complete_pm_rq - end the current Power Management request
- *	@drive: target drive
- *	@rq: request
- *
- *	This function cleans up the current PM request and stops the queue
- *	if necessary.
- */
-void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
-{
-	struct request_queue *q = drive->queue;
-	struct ide_pm_state *pm = ide_req(rq)->special;
-
-	ide_complete_power_step(drive, rq);
-	if (pm->pm_step != IDE_PM_COMPLETED)
-		return;
-
-#ifdef DEBUG_PM
-	printk("%s: completing PM request, %s\n", drive->name,
-	       (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND) ? "suspend" : "resume");
-#endif
-	if (ide_req(rq)->type == ATA_PRIV_PM_SUSPEND)
-		blk_mq_stop_hw_queues(q);
-	else
-		drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
-
-	drive->hwif->rq = NULL;
-
-	blk_mq_end_request(rq, BLK_STS_OK);
-}
-
-void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
-{
-	struct ide_pm_state *pm = ide_req(rq)->special;
-
-	if (blk_rq_is_private(rq) &&
-	    ide_req(rq)->type == ATA_PRIV_PM_SUSPEND &&
-	    pm->pm_step == IDE_PM_START_SUSPEND)
-		/* Mark drive blocked when starting the suspend sequence. */
-		drive->dev_flags |= IDE_DFLAG_BLOCKED;
-	else if (blk_rq_is_private(rq) &&
-	         ide_req(rq)->type == ATA_PRIV_PM_RESUME &&
-		 pm->pm_step == IDE_PM_START_RESUME) {
-		/*
-		 * The first thing we do on wakeup is to wait for BSY bit to
-		 * go away (with a looong timeout) as a drive on this hwif may
-		 * just be POSTing itself.
-		 * We do that before even selecting as the "other" device on
-		 * the bus may be broken enough to walk on our toes at this
-		 * point.
-		 */
-		ide_hwif_t *hwif = drive->hwif;
-		const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-		struct request_queue *q = drive->queue;
-		int rc;
-#ifdef DEBUG_PM
-		printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name);
-#endif
-		rc = ide_wait_not_busy(hwif, 35000);
-		if (rc)
-			printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name);
-		tp_ops->dev_select(drive);
-		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
-		rc = ide_wait_not_busy(hwif, 100000);
-		if (rc)
-			printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name);
-
-		blk_mq_start_hw_queues(q);
-	}
-}
diff --git a/drivers/ide/ide-pnp.c b/drivers/ide/ide-pnp.c
deleted file mode 100644
index fc541f1cf8de6..0000000000000
--- a/drivers/ide/ide-pnp.c
+++ /dev/null
@@ -1,92 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * This file provides autodetection for ISA PnP IDE interfaces.
- * It was tested with "ESS ES1868 Plug and Play AudioDrive" IDE interface.
- *
- * Copyright (C) 2000 Andrey Panin <pazke@donpac.ru>
- */
-
-#include <linux/init.h>
-#include <linux/pnp.h>
-#include <linux/ide.h>
-#include <linux/module.h>
-
-#define DRV_NAME "ide-pnp"
-
-/* Add your devices here :)) */
-static const struct pnp_device_id idepnp_devices[] = {
-	/* Generic ESDI/IDE/ATA compatible hard disk controller */
-	{.id = "PNP0600", .driver_data = 0},
-	{.id = ""}
-};
-
-static const struct ide_port_info ide_pnp_port_info = {
-	.host_flags		= IDE_HFLAG_NO_DMA,
-	.chipset		= ide_generic,
-};
-
-static int idepnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id)
-{
-	struct ide_host *host;
-	unsigned long base, ctl;
-	int rc;
-	struct ide_hw hw, *hws[] = { &hw };
-
-	printk(KERN_INFO DRV_NAME ": generic PnP IDE interface\n");
-
-	if (!(pnp_port_valid(dev, 0) && pnp_port_valid(dev, 1) && pnp_irq_valid(dev, 0)))
-		return -1;
-
-	base = pnp_port_start(dev, 0);
-	ctl = pnp_port_start(dev, 1);
-
-	if (!request_region(base, 8, DRV_NAME)) {
-		printk(KERN_ERR "%s: I/O resource 0x%lX-0x%lX not free.\n",
-				DRV_NAME, base, base + 7);
-		return -EBUSY;
-	}
-
-	if (!request_region(ctl, 1, DRV_NAME)) {
-		printk(KERN_ERR "%s: I/O resource 0x%lX not free.\n",
-				DRV_NAME, ctl);
-		release_region(base, 8);
-		return -EBUSY;
-	}
-
-	memset(&hw, 0, sizeof(hw));
-	ide_std_init_ports(&hw, base, ctl);
-	hw.irq = pnp_irq(dev, 0);
-
-	rc = ide_host_add(&ide_pnp_port_info, hws, 1, &host);
-	if (rc)
-		goto out;
-
-	pnp_set_drvdata(dev, host);
-
-	return 0;
-out:
-	release_region(ctl, 1);
-	release_region(base, 8);
-
-	return rc;
-}
-
-static void idepnp_remove(struct pnp_dev *dev)
-{
-	struct ide_host *host = pnp_get_drvdata(dev);
-
-	ide_host_remove(host);
-
-	release_region(pnp_port_start(dev, 1), 1);
-	release_region(pnp_port_start(dev, 0), 8);
-}
-
-static struct pnp_driver idepnp_driver = {
-	.name		= "ide",
-	.id_table	= idepnp_devices,
-	.probe		= idepnp_probe,
-	.remove		= idepnp_remove,
-};
-
-module_pnp_driver(idepnp_driver);
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
deleted file mode 100644
index aefd74c0d8628..0000000000000
--- a/drivers/ide/ide-probe.c
+++ /dev/null
@@ -1,1623 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1994-1998   Linus Torvalds & authors (see below)
- *  Copyright (C) 2005, 2007  Bartlomiej Zolnierkiewicz
- */
-
-/*
- *  Mostly written by Mark Lord <mlord@pobox.com>
- *                and Gadi Oxman <gadio@netvision.net.il>
- *                and Andre Hedrick <andre@linux-ide.org>
- *
- *  See linux/MAINTAINERS for address of current maintainer.
- *
- * This is the IDE probe module, as evolved from hd.c and ide.c.
- *
- * -- increase WAIT_PIDENTIFY to avoid CD-ROM locking at boot
- *	 by Andrea Arcangeli
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/major.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/ide.h>
-#include <linux/spinlock.h>
-#include <linux/kmod.h>
-#include <linux/pci.h>
-#include <linux/scatterlist.h>
-
-#include <asm/byteorder.h>
-#include <asm/irq.h>
-#include <linux/uaccess.h>
-#include <asm/io.h>
-
-/**
- *	generic_id		-	add a generic drive id
- *	@drive:	drive to make an ID block for
- *	
- *	Add a fake id field to the drive we are passed. This allows
- *	use to skip a ton of NULL checks (which people always miss) 
- *	and make drive properties unconditional outside of this file
- */
- 
-static void generic_id(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-
-	id[ATA_ID_CUR_CYLS]	= id[ATA_ID_CYLS]	= drive->cyl;
-	id[ATA_ID_CUR_HEADS]	= id[ATA_ID_HEADS]	= drive->head;
-	id[ATA_ID_CUR_SECTORS]	= id[ATA_ID_SECTORS]	= drive->sect;
-}
-
-static void ide_disk_init_chs(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-
-	/* Extract geometry if we did not already have one for the drive */
-	if (!drive->cyl || !drive->head || !drive->sect) {
-		drive->cyl  = drive->bios_cyl  = id[ATA_ID_CYLS];
-		drive->head = drive->bios_head = id[ATA_ID_HEADS];
-		drive->sect = drive->bios_sect = id[ATA_ID_SECTORS];
-	}
-
-	/* Handle logical geometry translation by the drive */
-	if (ata_id_current_chs_valid(id)) {
-		drive->cyl  = id[ATA_ID_CUR_CYLS];
-		drive->head = id[ATA_ID_CUR_HEADS];
-		drive->sect = id[ATA_ID_CUR_SECTORS];
-	}
-
-	/* Use physical geometry if what we have still makes no sense */
-	if (drive->head > 16 && id[ATA_ID_HEADS] && id[ATA_ID_HEADS] <= 16) {
-		drive->cyl  = id[ATA_ID_CYLS];
-		drive->head = id[ATA_ID_HEADS];
-		drive->sect = id[ATA_ID_SECTORS];
-	}
-}
-
-static void ide_disk_init_mult_count(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-	u8 max_multsect = id[ATA_ID_MAX_MULTSECT] & 0xff;
-
-	if (max_multsect) {
-		if ((max_multsect / 2) > 1)
-			id[ATA_ID_MULTSECT] = max_multsect | 0x100;
-		else
-			id[ATA_ID_MULTSECT] &= ~0x1ff;
-
-		drive->mult_req = id[ATA_ID_MULTSECT] & 0xff;
-
-		if (drive->mult_req)
-			drive->special_flags |= IDE_SFLAG_SET_MULTMODE;
-	}
-}
-
-static void ide_classify_ata_dev(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-	char *m = (char *)&id[ATA_ID_PROD];
-	int is_cfa = ata_id_is_cfa(id);
-
-	/* CF devices are *not* removable in Linux definition of the term */
-	if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7)))
-		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-
-	drive->media = ide_disk;
-
-	if (!ata_id_has_unload(drive->id))
-		drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
-
-	printk(KERN_INFO "%s: %s, %s DISK drive\n", drive->name, m,
-		is_cfa ? "CFA" : "ATA");
-}
-
-static void ide_classify_atapi_dev(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-	char *m = (char *)&id[ATA_ID_PROD];
-	u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f;
-
-	printk(KERN_INFO "%s: %s, ATAPI ", drive->name, m);
-	switch (type) {
-	case ide_floppy:
-		if (!strstr(m, "CD-ROM")) {
-			if (!strstr(m, "oppy") &&
-			    !strstr(m, "poyp") &&
-			    !strstr(m, "ZIP"))
-				printk(KERN_CONT "cdrom or floppy?, assuming ");
-			if (drive->media != ide_cdrom) {
-				printk(KERN_CONT "FLOPPY");
-				drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-				break;
-			}
-		}
-		/* Early cdrom models used zero */
-		type = ide_cdrom;
-		fallthrough;
-	case ide_cdrom:
-		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-#ifdef CONFIG_PPC
-		/* kludge for Apple PowerBook internal zip */
-		if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) {
-			printk(KERN_CONT "FLOPPY");
-			type = ide_floppy;
-			break;
-		}
-#endif
-		printk(KERN_CONT "CD/DVD-ROM");
-		break;
-	case ide_tape:
-		printk(KERN_CONT "TAPE");
-		break;
-	case ide_optical:
-		printk(KERN_CONT "OPTICAL");
-		drive->dev_flags |= IDE_DFLAG_REMOVABLE;
-		break;
-	default:
-		printk(KERN_CONT "UNKNOWN (type %d)", type);
-		break;
-	}
-
-	printk(KERN_CONT " drive\n");
-	drive->media = type;
-	/* an ATAPI device ignores DRDY */
-	drive->ready_stat = 0;
-	if (ata_id_cdb_intr(id))
-		drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT;
-	drive->dev_flags |= IDE_DFLAG_DOORLOCKING;
-	/* we don't do head unloading on ATAPI devices */
-	drive->dev_flags |= IDE_DFLAG_NO_UNLOAD;
-}
-
-/**
- *	do_identify	-	identify a drive
- *	@drive: drive to identify 
- *	@cmd: command used
- *	@id: buffer for IDENTIFY data
- *
- *	Called when we have issued a drive identify command to
- *	read and parse the results. This function is run with
- *	interrupts disabled. 
- */
-
-static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	char *m = (char *)&id[ATA_ID_PROD];
-	unsigned long flags;
-	int bswap = 1;
-
-	/* local CPU only; some systems need this */
-	local_irq_save(flags);
-	/* read 512 bytes of id info */
-	hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
-	local_irq_restore(flags);
-
-	drive->dev_flags |= IDE_DFLAG_ID_READ;
-#ifdef DEBUG
-	printk(KERN_INFO "%s: dumping identify data\n", drive->name);
-	ide_dump_identify((u8 *)id);
-#endif
-	ide_fix_driveid(id);
-
-	/*
-	 *  ATA_CMD_ID_ATA returns little-endian info,
-	 *  ATA_CMD_ID_ATAPI *usually* returns little-endian info.
-	 */
-	if (cmd == ATA_CMD_ID_ATAPI) {
-		if ((m[0] == 'N' && m[1] == 'E') ||  /* NEC */
-		    (m[0] == 'F' && m[1] == 'X') ||  /* Mitsumi */
-		    (m[0] == 'P' && m[1] == 'i'))    /* Pioneer */
-			/* Vertos drives may still be weird */
-			bswap ^= 1;
-	}
-
-	ide_fixstring(m, ATA_ID_PROD_LEN, bswap);
-	ide_fixstring((char *)&id[ATA_ID_FW_REV], ATA_ID_FW_REV_LEN, bswap);
-	ide_fixstring((char *)&id[ATA_ID_SERNO], ATA_ID_SERNO_LEN, bswap);
-
-	/* we depend on this a lot! */
-	m[ATA_ID_PROD_LEN - 1] = '\0';
-
-	if (strstr(m, "E X A B Y T E N E S T"))
-		drive->dev_flags &= ~IDE_DFLAG_PRESENT;
-	else
-		drive->dev_flags |= IDE_DFLAG_PRESENT;
-}
-
-/**
- *	ide_dev_read_id	-	send ATA/ATAPI IDENTIFY command
- *	@drive: drive to identify
- *	@cmd: command to use
- *	@id: buffer for IDENTIFY data
- *	@irq_ctx: flag set when called from the IRQ context
- *
- *	Sends an ATA(PI) IDENTIFY request to a drive and waits for a response.
- *
- *	Returns:	0  device was identified
- *			1  device timed-out (no response to identify request)
- *			2  device aborted the command (refused to identify itself)
- */
-
-int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id, int irq_ctx)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_io_ports *io_ports = &hwif->io_ports;
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	int use_altstatus = 0, rc;
-	unsigned long timeout;
-	u8 s = 0, a = 0;
-
-	/*
-	 * Disable device IRQ.  Otherwise we'll get spurious interrupts
-	 * during the identify phase that the IRQ handler isn't expecting.
-	 */
-	if (io_ports->ctl_addr)
-		tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS);
-
-	/* take a deep breath */
-	if (irq_ctx)
-		mdelay(50);
-	else
-		msleep(50);
-
-	if (io_ports->ctl_addr &&
-	    (hwif->host_flags & IDE_HFLAG_BROKEN_ALTSTATUS) == 0) {
-		a = tp_ops->read_altstatus(hwif);
-		s = tp_ops->read_status(hwif);
-		if ((a ^ s) & ~ATA_SENSE)
-			/* ancient Seagate drives, broken interfaces */
-			printk(KERN_INFO "%s: probing with STATUS(0x%02x) "
-					 "instead of ALTSTATUS(0x%02x)\n",
-					 drive->name, s, a);
-		else
-			/* use non-intrusive polling */
-			use_altstatus = 1;
-	}
-
-	/* set features register for atapi
-	 * identify command to be sure of reply
-	 */
-	if (cmd == ATA_CMD_ID_ATAPI) {
-		struct ide_taskfile tf;
-
-		memset(&tf, 0, sizeof(tf));
-		/* disable DMA & overlap */
-		tp_ops->tf_load(drive, &tf, IDE_VALID_FEATURE);
-	}
-
-	/* ask drive for ID */
-	tp_ops->exec_command(hwif, cmd);
-
-	timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
-
-	/* wait for IRQ and ATA_DRQ */
-	if (irq_ctx) {
-		rc = __ide_wait_stat(drive, ATA_DRQ, BAD_R_STAT, timeout, &s);
-		if (rc)
-			return 1;
-	} else {
-		rc = ide_busy_sleep(drive, timeout, use_altstatus);
-		if (rc)
-			return 1;
-
-		msleep(50);
-		s = tp_ops->read_status(hwif);
-	}
-
-	if (OK_STAT(s, ATA_DRQ, BAD_R_STAT)) {
-		/* drive returned ID */
-		do_identify(drive, cmd, id);
-		/* drive responded with ID */
-		rc = 0;
-		/* clear drive IRQ */
-		(void)tp_ops->read_status(hwif);
-	} else {
-		/* drive refused ID */
-		rc = 2;
-	}
-	return rc;
-}
-
-int ide_busy_sleep(ide_drive_t *drive, unsigned long timeout, int altstatus)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 stat;
-
-	timeout += jiffies;
-
-	do {
-		msleep(50);	/* give drive a breather */
-		stat = altstatus ? hwif->tp_ops->read_altstatus(hwif)
-				 : hwif->tp_ops->read_status(hwif);
-		if ((stat & ATA_BUSY) == 0)
-			return 0;
-	} while (time_before(jiffies, timeout));
-
-	printk(KERN_ERR "%s: timeout in %s\n", drive->name, __func__);
-
-	return 1;	/* drive timed-out */
-}
-
-static u8 ide_read_device(ide_drive_t *drive)
-{
-	struct ide_taskfile tf;
-
-	drive->hwif->tp_ops->tf_read(drive, &tf, IDE_VALID_DEVICE);
-
-	return tf.device;
-}
-
-/**
- *	do_probe		-	probe an IDE device
- *	@drive: drive to probe
- *	@cmd: command to use
- *
- *	do_probe() has the difficult job of finding a drive if it exists,
- *	without getting hung up if it doesn't exist, without trampling on
- *	ethernet cards, and without leaving any IRQs dangling to haunt us later.
- *
- *	If a drive is "known" to exist (from CMOS or kernel parameters),
- *	but does not respond right away, the probe will "hang in there"
- *	for the maximum wait time (about 30 seconds), otherwise it will
- *	exit much more quickly.
- *
- * Returns:	0  device was identified
- *		1  device timed-out (no response to identify request)
- *		2  device aborted the command (refused to identify itself)
- *		3  bad status from device (possible for ATAPI drives)
- *		4  probe was not attempted because failure was obvious
- */
-
-static int do_probe (ide_drive_t *drive, u8 cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	u16 *id = drive->id;
-	int rc;
-	u8 present = !!(drive->dev_flags & IDE_DFLAG_PRESENT), stat;
-
-	/* avoid waiting for inappropriate probes */
-	if (present && drive->media != ide_disk && cmd == ATA_CMD_ID_ATA)
-		return 4;
-
-#ifdef DEBUG
-	printk(KERN_INFO "probing for %s: present=%d, media=%d, probetype=%s\n",
-		drive->name, present, drive->media,
-		(cmd == ATA_CMD_ID_ATA) ? "ATA" : "ATAPI");
-#endif
-
-	/* needed for some systems
-	 * (e.g. crw9624 as drive0 with disk as slave)
-	 */
-	msleep(50);
-	tp_ops->dev_select(drive);
-	msleep(50);
-
-	if (ide_read_device(drive) != drive->select && present == 0) {
-		if (drive->dn & 1) {
-			/* exit with drive0 selected */
-			tp_ops->dev_select(hwif->devices[0]);
-			/* allow ATA_BUSY to assert & clear */
-			msleep(50);
-		}
-		/* no i/f present: mmm.. this should be a 4 -ml */
-		return 3;
-	}
-
-	stat = tp_ops->read_status(hwif);
-
-	if (OK_STAT(stat, ATA_DRDY, ATA_BUSY) ||
-	    present || cmd == ATA_CMD_ID_ATAPI) {
-		rc = ide_dev_read_id(drive, cmd, id, 0);
-		if (rc)
-			/* failed: try again */
-			rc = ide_dev_read_id(drive, cmd, id, 0);
-
-		stat = tp_ops->read_status(hwif);
-
-		if (stat == (ATA_BUSY | ATA_DRDY))
-			return 4;
-
-		if (rc == 1 && cmd == ATA_CMD_ID_ATAPI) {
-			printk(KERN_ERR "%s: no response (status = 0x%02x), "
-					"resetting drive\n", drive->name, stat);
-			msleep(50);
-			tp_ops->dev_select(drive);
-			msleep(50);
-			tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
-			(void)ide_busy_sleep(drive, WAIT_WORSTCASE, 0);
-			rc = ide_dev_read_id(drive, cmd, id, 0);
-		}
-
-		/* ensure drive IRQ is clear */
-		stat = tp_ops->read_status(hwif);
-
-		if (rc == 1)
-			printk(KERN_ERR "%s: no response (status = 0x%02x)\n",
-					drive->name, stat);
-	} else {
-		/* not present or maybe ATAPI */
-		rc = 3;
-	}
-	if (drive->dn & 1) {
-		/* exit with drive0 selected */
-		tp_ops->dev_select(hwif->devices[0]);
-		msleep(50);
-		/* ensure drive irq is clear */
-		(void)tp_ops->read_status(hwif);
-	}
-	return rc;
-}
-
-/**
- *	probe_for_drives	-	upper level drive probe
- *	@drive: drive to probe for
- *
- *	probe_for_drive() tests for existence of a given drive using do_probe()
- *	and presents things to the user as needed.
- *
- *	Returns:	0  no device was found
- *			1  device was found
- *			   (note: IDE_DFLAG_PRESENT might still be not set)
- */
-
-static u8 probe_for_drive(ide_drive_t *drive)
-{
-	char *m;
-	int rc;
-	u8 cmd;
-
-	drive->dev_flags &= ~IDE_DFLAG_ID_READ;
-
-	m = (char *)&drive->id[ATA_ID_PROD];
-	strcpy(m, "UNKNOWN");
-
-	/* skip probing? */
-	if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0) {
-		/* if !(success||timed-out) */
-		cmd = ATA_CMD_ID_ATA;
-		rc = do_probe(drive, cmd);
-		if (rc >= 2) {
-			/* look for ATAPI device */
-			cmd = ATA_CMD_ID_ATAPI;
-			rc = do_probe(drive, cmd);
-		}
-
-		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-			return 0;
-
-		/* identification failed? */
-		if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
-			if (drive->media == ide_disk) {
-				printk(KERN_INFO "%s: non-IDE drive, CHS=%d/%d/%d\n",
-					drive->name, drive->cyl,
-					drive->head, drive->sect);
-			} else if (drive->media == ide_cdrom) {
-				printk(KERN_INFO "%s: ATAPI cdrom (?)\n", drive->name);
-			} else {
-				/* nuke it */
-				printk(KERN_WARNING "%s: Unknown device on bus refused identification. Ignoring.\n", drive->name);
-				drive->dev_flags &= ~IDE_DFLAG_PRESENT;
-			}
-		} else {
-			if (cmd == ATA_CMD_ID_ATAPI)
-				ide_classify_atapi_dev(drive);
-			else
-				ide_classify_ata_dev(drive);
-		}
-	}
-
-	if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-		return 0;
-
-	/* The drive wasn't being helpful. Add generic info only */
-	if ((drive->dev_flags & IDE_DFLAG_ID_READ) == 0) {
-		generic_id(drive);
-		return 1;
-	}
-
-	if (drive->media == ide_disk) {
-		ide_disk_init_chs(drive);
-		ide_disk_init_mult_count(drive);
-	}
-
-	return 1;
-}
-
-static void hwif_release_dev(struct device *dev)
-{
-	ide_hwif_t *hwif = container_of(dev, ide_hwif_t, gendev);
-
-	complete(&hwif->gendev_rel_comp);
-}
-
-static int ide_register_port(ide_hwif_t *hwif)
-{
-	int ret;
-
-	/* register with global device tree */
-	dev_set_name(&hwif->gendev, "%s", hwif->name);
-	dev_set_drvdata(&hwif->gendev, hwif);
-	if (hwif->gendev.parent == NULL)
-		hwif->gendev.parent = hwif->dev;
-	hwif->gendev.release = hwif_release_dev;
-
-	ret = device_register(&hwif->gendev);
-	if (ret < 0) {
-		printk(KERN_WARNING "IDE: %s: device_register error: %d\n",
-			__func__, ret);
-		goto out;
-	}
-
-	hwif->portdev = device_create(ide_port_class, &hwif->gendev,
-				      MKDEV(0, 0), hwif, "%s", hwif->name);
-	if (IS_ERR(hwif->portdev)) {
-		ret = PTR_ERR(hwif->portdev);
-		device_unregister(&hwif->gendev);
-	}
-out:
-	return ret;
-}
-
-/**
- *	ide_port_wait_ready	-	wait for port to become ready
- *	@hwif: IDE port
- *
- *	This is needed on some PPCs and a bunch of BIOS-less embedded
- *	platforms.  Typical cases are:
- *
- *	- The firmware hard reset the disk before booting the kernel,
- *	  the drive is still doing it's poweron-reset sequence, that
- *	  can take up to 30 seconds.
- *
- *	- The firmware does nothing (or no firmware), the device is
- *	  still in POST state (same as above actually).
- *
- *	- Some CD/DVD/Writer combo drives tend to drive the bus during
- *	  their reset sequence even when they are non-selected slave
- *	  devices, thus preventing discovery of the main HD.
- *
- *	Doing this wait-for-non-busy should not harm any existing
- *	configuration and fix some issues like the above.
- *
- *	BenH.
- *
- *	Returns 0 on success, error code (< 0) otherwise.
- */
-
-static int ide_port_wait_ready(ide_hwif_t *hwif)
-{
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	ide_drive_t *drive;
-	int i, rc;
-
-	printk(KERN_DEBUG "Probing IDE interface %s...\n", hwif->name);
-
-	/* Let HW settle down a bit from whatever init state we
-	 * come from */
-	mdelay(2);
-
-	/* Wait for BSY bit to go away, spec timeout is 30 seconds,
-	 * I know of at least one disk who takes 31 seconds, I use 35
-	 * here to be safe
-	 */
-	rc = ide_wait_not_busy(hwif, 35000);
-	if (rc)
-		return rc;
-
-	/* Now make sure both master & slave are ready */
-	ide_port_for_each_dev(i, drive, hwif) {
-		/* Ignore disks that we will not probe for later. */
-		if ((drive->dev_flags & IDE_DFLAG_NOPROBE) == 0 ||
-		    (drive->dev_flags & IDE_DFLAG_PRESENT)) {
-			tp_ops->dev_select(drive);
-			tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
-			mdelay(2);
-			rc = ide_wait_not_busy(hwif, 35000);
-			if (rc)
-				goto out;
-		} else
-			printk(KERN_DEBUG "%s: ide_wait_not_busy() skipped\n",
-					  drive->name);
-	}
-out:
-	/* Exit function with master reselected (let's be sane) */
-	if (i)
-		tp_ops->dev_select(hwif->devices[0]);
-
-	return rc;
-}
-
-/**
- *	ide_undecoded_slave	-	look for bad CF adapters
- *	@dev1: slave device
- *
- *	Analyse the drives on the interface and attempt to decide if we
- *	have the same drive viewed twice. This occurs with crap CF adapters
- *	and PCMCIA sometimes.
- */
-
-void ide_undecoded_slave(ide_drive_t *dev1)
-{
-	ide_drive_t *dev0 = dev1->hwif->devices[0];
-
-	if ((dev1->dn & 1) == 0 || (dev0->dev_flags & IDE_DFLAG_PRESENT) == 0)
-		return;
-
-	/* If the models don't match they are not the same product */
-	if (strcmp((char *)&dev0->id[ATA_ID_PROD],
-		   (char *)&dev1->id[ATA_ID_PROD]))
-		return;
-
-	/* Serial numbers do not match */
-	if (strncmp((char *)&dev0->id[ATA_ID_SERNO],
-		    (char *)&dev1->id[ATA_ID_SERNO], ATA_ID_SERNO_LEN))
-		return;
-
-	/* No serial number, thankfully very rare for CF */
-	if (*(char *)&dev0->id[ATA_ID_SERNO] == 0)
-		return;
-
-	/* Appears to be an IDE flash adapter with decode bugs */
-	printk(KERN_WARNING "ide-probe: ignoring undecoded slave\n");
-
-	dev1->dev_flags &= ~IDE_DFLAG_PRESENT;
-}
-
-EXPORT_SYMBOL_GPL(ide_undecoded_slave);
-
-static int ide_probe_port(ide_hwif_t *hwif)
-{
-	ide_drive_t *drive;
-	unsigned int irqd;
-	int i, rc = -ENODEV;
-
-	BUG_ON(hwif->present);
-
-	if ((hwif->devices[0]->dev_flags & IDE_DFLAG_NOPROBE) &&
-	    (hwif->devices[1]->dev_flags & IDE_DFLAG_NOPROBE))
-		return -EACCES;
-
-	/*
-	 * We must always disable IRQ, as probe_for_drive will assert IRQ, but
-	 * we'll install our IRQ driver much later...
-	 */
-	irqd = hwif->irq;
-	if (irqd)
-		disable_irq(hwif->irq);
-
-	if (ide_port_wait_ready(hwif) == -EBUSY)
-		printk(KERN_DEBUG "%s: Wait for ready failed before probe !\n", hwif->name);
-
-	/*
-	 * Second drive should only exist if first drive was found,
-	 * but a lot of cdrom drives are configured as single slaves.
-	 */
-	ide_port_for_each_dev(i, drive, hwif) {
-		(void) probe_for_drive(drive);
-		if (drive->dev_flags & IDE_DFLAG_PRESENT)
-			rc = 0;
-	}
-
-	/*
-	 * Use cached IRQ number. It might be (and is...) changed by probe
-	 * code above
-	 */
-	if (irqd)
-		enable_irq(irqd);
-
-	return rc;
-}
-
-static void ide_port_tune_devices(ide_hwif_t *hwif)
-{
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-	ide_drive_t *drive;
-	int i;
-
-	ide_port_for_each_present_dev(i, drive, hwif) {
-		ide_check_nien_quirk_list(drive);
-
-		if (port_ops && port_ops->quirkproc)
-			port_ops->quirkproc(drive);
-	}
-
-	ide_port_for_each_present_dev(i, drive, hwif) {
-		ide_set_max_pio(drive);
-
-		drive->dev_flags |= IDE_DFLAG_NICE1;
-
-		if (hwif->dma_ops)
-			ide_set_dma(drive);
-	}
-}
-
-static void ide_initialize_rq(struct request *rq)
-{
-	struct ide_request *req = blk_mq_rq_to_pdu(rq);
-
-	req->special = NULL;
-	scsi_req_init(&req->sreq);
-	req->sreq.sense = req->sense;
-}
-
-static const struct blk_mq_ops ide_mq_ops = {
-	.queue_rq		= ide_queue_rq,
-	.initialize_rq_fn	= ide_initialize_rq,
-};
-
-/*
- * init request queue
- */
-static int ide_init_queue(ide_drive_t *drive)
-{
-	struct request_queue *q;
-	ide_hwif_t *hwif = drive->hwif;
-	int max_sectors = 256;
-	int max_sg_entries = PRD_ENTRIES;
-	struct blk_mq_tag_set *set;
-
-	/*
-	 *	Our default set up assumes the normal IDE case,
-	 *	that is 64K segmenting, standard PRD setup
-	 *	and LBA28. Some drivers then impose their own
-	 *	limits and LBA48 we could raise it but as yet
-	 *	do not.
-	 */
-
-	set = &drive->tag_set;
-	set->ops = &ide_mq_ops;
-	set->nr_hw_queues = 1;
-	set->queue_depth = 32;
-	set->reserved_tags = 1;
-	set->cmd_size = sizeof(struct ide_request);
-	set->numa_node = hwif_to_node(hwif);
-	set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
-	if (blk_mq_alloc_tag_set(set))
-		return 1;
-
-	q = blk_mq_init_queue(set);
-	if (IS_ERR(q)) {
-		blk_mq_free_tag_set(set);
-		return 1;
-	}
-
-	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
-
-	q->queuedata = drive;
-	blk_queue_segment_boundary(q, 0xffff);
-
-	if (hwif->rqsize < max_sectors)
-		max_sectors = hwif->rqsize;
-	blk_queue_max_hw_sectors(q, max_sectors);
-
-#ifdef CONFIG_PCI
-	/* When we have an IOMMU, we may have a problem where pci_map_sg()
-	 * creates segments that don't completely match our boundary
-	 * requirements and thus need to be broken up again. Because it
-	 * doesn't align properly either, we may actually have to break up
-	 * to more segments than what was we got in the first place, a max
-	 * worst case is twice as many.
-	 * This will be fixed once we teach pci_map_sg() about our boundary
-	 * requirements, hopefully soon. *FIXME*
-	 */
-	max_sg_entries >>= 1;
-#endif /* CONFIG_PCI */
-
-	blk_queue_max_segments(q, max_sg_entries);
-
-	/* assign drive queue */
-	drive->queue = q;
-
-	return 0;
-}
-
-static DEFINE_MUTEX(ide_cfg_mtx);
-
-/*
- * For any present drive:
- * - allocate the block device queue
- */
-static int ide_port_setup_devices(ide_hwif_t *hwif)
-{
-	ide_drive_t *drive;
-	int i, j = 0;
-
-	mutex_lock(&ide_cfg_mtx);
-	ide_port_for_each_present_dev(i, drive, hwif) {
-		if (ide_init_queue(drive)) {
-			printk(KERN_ERR "ide: failed to init %s\n",
-					drive->name);
-			drive->dev_flags &= ~IDE_DFLAG_PRESENT;
-			continue;
-		}
-
-		j++;
-	}
-	mutex_unlock(&ide_cfg_mtx);
-
-	return j;
-}
-
-static void ide_host_enable_irqs(struct ide_host *host)
-{
-	ide_hwif_t *hwif;
-	int i;
-
-	ide_host_for_each_port(i, hwif, host) {
-		if (hwif == NULL)
-			continue;
-
-		/* clear any pending IRQs */
-		hwif->tp_ops->read_status(hwif);
-
-		/* unmask IRQs */
-		if (hwif->io_ports.ctl_addr)
-			hwif->tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
-	}
-}
-
-/*
- * This routine sets up the IRQ for an IDE interface.
- */
-static int init_irq (ide_hwif_t *hwif)
-{
-	struct ide_io_ports *io_ports = &hwif->io_ports;
-	struct ide_host *host = hwif->host;
-	irq_handler_t irq_handler = host->irq_handler;
-	int sa = host->irq_flags;
-
-	if (irq_handler == NULL)
-		irq_handler = ide_intr;
-
-	if (!host->get_lock)
-		if (request_irq(hwif->irq, irq_handler, sa, hwif->name, hwif))
-			goto out_up;
-
-#if !defined(__mc68000__)
-	printk(KERN_INFO "%s at 0x%03lx-0x%03lx,0x%03lx on irq %d", hwif->name,
-		io_ports->data_addr, io_ports->status_addr,
-		io_ports->ctl_addr, hwif->irq);
-#else
-	printk(KERN_INFO "%s at 0x%08lx on irq %d", hwif->name,
-		io_ports->data_addr, hwif->irq);
-#endif /* __mc68000__ */
-	if (hwif->host->host_flags & IDE_HFLAG_SERIALIZE)
-		printk(KERN_CONT " (serialized)");
-	printk(KERN_CONT "\n");
-
-	return 0;
-out_up:
-	return 1;
-}
-
-static void ata_probe(dev_t dev)
-{
-	request_module("ide-disk");
-	request_module("ide-cd");
-	request_module("ide-tape");
-	request_module("ide-floppy");
-}
-
-void ide_init_disk(struct gendisk *disk, ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned int unit = drive->dn & 1;
-
-	disk->major = hwif->major;
-	disk->first_minor = unit << PARTN_BITS;
-	sprintf(disk->disk_name, "hd%c", 'a' + hwif->index * MAX_DRIVES + unit);
-	disk->queue = drive->queue;
-}
-
-EXPORT_SYMBOL_GPL(ide_init_disk);
-
-static void drive_release_dev (struct device *dev)
-{
-	ide_drive_t *drive = container_of(dev, ide_drive_t, gendev);
-
-	ide_proc_unregister_device(drive);
-
-	if (drive->sense_rq)
-		blk_mq_free_request(drive->sense_rq);
-
-	blk_cleanup_queue(drive->queue);
-	drive->queue = NULL;
-	blk_mq_free_tag_set(&drive->tag_set);
-
-	drive->dev_flags &= ~IDE_DFLAG_PRESENT;
-
-	complete(&drive->gendev_rel_comp);
-}
-
-static int hwif_init(ide_hwif_t *hwif)
-{
-	if (!hwif->irq) {
-		printk(KERN_ERR "%s: disabled, no IRQ\n", hwif->name);
-		return 0;
-	}
-
-	if (__register_blkdev(hwif->major, hwif->name, ata_probe))
-		return 0;
-
-	if (!hwif->sg_max_nents)
-		hwif->sg_max_nents = PRD_ENTRIES;
-
-	hwif->sg_table = kmalloc_array(hwif->sg_max_nents,
-				       sizeof(struct scatterlist),
-				       GFP_KERNEL);
-	if (!hwif->sg_table) {
-		printk(KERN_ERR "%s: unable to allocate SG table.\n", hwif->name);
-		goto out;
-	}
-
-	sg_init_table(hwif->sg_table, hwif->sg_max_nents);
-	
-	if (init_irq(hwif)) {
-		printk(KERN_ERR "%s: disabled, unable to get IRQ %d\n",
-			hwif->name, hwif->irq);
-		goto out;
-	}
-
-	return 1;
-
-out:
-	unregister_blkdev(hwif->major, hwif->name);
-	return 0;
-}
-
-static void hwif_register_devices(ide_hwif_t *hwif)
-{
-	ide_drive_t *drive;
-	unsigned int i;
-
-	ide_port_for_each_present_dev(i, drive, hwif) {
-		struct device *dev = &drive->gendev;
-		int ret;
-
-		dev_set_name(dev, "%u.%u", hwif->index, i);
-		dev_set_drvdata(dev, drive);
-		dev->parent = &hwif->gendev;
-		dev->bus = &ide_bus_type;
-		dev->release = drive_release_dev;
-
-		ret = device_register(dev);
-		if (ret < 0)
-			printk(KERN_WARNING "IDE: %s: device_register error: "
-					    "%d\n", __func__, ret);
-	}
-}
-
-static void ide_port_init_devices(ide_hwif_t *hwif)
-{
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-	ide_drive_t *drive;
-	int i;
-
-	ide_port_for_each_dev(i, drive, hwif) {
-		drive->dn = i + hwif->channel * 2;
-
-		if (hwif->host_flags & IDE_HFLAG_IO_32BIT)
-			drive->io_32bit = 1;
-		if (hwif->host_flags & IDE_HFLAG_NO_IO_32BIT)
-			drive->dev_flags |= IDE_DFLAG_NO_IO_32BIT;
-		if (hwif->host_flags & IDE_HFLAG_UNMASK_IRQS)
-			drive->dev_flags |= IDE_DFLAG_UNMASK;
-		if (hwif->host_flags & IDE_HFLAG_NO_UNMASK_IRQS)
-			drive->dev_flags |= IDE_DFLAG_NO_UNMASK;
-
-		drive->pio_mode = XFER_PIO_0;
-
-		if (port_ops && port_ops->init_dev)
-			port_ops->init_dev(drive);
-	}
-}
-
-static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
-			  const struct ide_port_info *d)
-{
-	hwif->channel = port;
-
-	hwif->chipset = d->chipset ? d->chipset : ide_pci;
-
-	if (d->init_iops)
-		d->init_iops(hwif);
-
-	/* ->host_flags may be set by ->init_iops (or even earlier...) */
-	hwif->host_flags |= d->host_flags;
-	hwif->pio_mask = d->pio_mask;
-
-	if (d->tp_ops)
-		hwif->tp_ops = d->tp_ops;
-
-	/* ->set_pio_mode for DTC2278 is currently limited to port 0 */
-	if ((hwif->host_flags & IDE_HFLAG_DTC2278) == 0 || hwif->channel == 0)
-		hwif->port_ops = d->port_ops;
-
-	hwif->swdma_mask = d->swdma_mask;
-	hwif->mwdma_mask = d->mwdma_mask;
-	hwif->ultra_mask = d->udma_mask;
-
-	if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) {
-		int rc;
-
-		hwif->dma_ops = d->dma_ops;
-
-		if (d->init_dma)
-			rc = d->init_dma(hwif, d);
-		else
-			rc = ide_hwif_setup_dma(hwif, d);
-
-		if (rc < 0) {
-			printk(KERN_INFO "%s: DMA disabled\n", hwif->name);
-
-			hwif->dma_ops = NULL;
-			hwif->dma_base = 0;
-			hwif->swdma_mask = 0;
-			hwif->mwdma_mask = 0;
-			hwif->ultra_mask = 0;
-		}
-	}
-
-	if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
-	    ((d->host_flags & IDE_HFLAG_SERIALIZE_DMA) && hwif->dma_base))
-		hwif->host->host_flags |= IDE_HFLAG_SERIALIZE;
-
-	if (d->max_sectors)
-		hwif->rqsize = d->max_sectors;
-	else {
-		if ((hwif->host_flags & IDE_HFLAG_NO_LBA48) ||
-		    (hwif->host_flags & IDE_HFLAG_NO_LBA48_DMA))
-			hwif->rqsize = 256;
-		else
-			hwif->rqsize = 65536;
-	}
-
-	/* call chipset specific routine for each enabled port */
-	if (d->init_hwif)
-		d->init_hwif(hwif);
-}
-
-static void ide_port_cable_detect(ide_hwif_t *hwif)
-{
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-
-	if (port_ops && port_ops->cable_detect && (hwif->ultra_mask & 0x78)) {
-		if (hwif->cbl != ATA_CBL_PATA40_SHORT)
-			hwif->cbl = port_ops->cable_detect(hwif);
-	}
-}
-
-/*
- * Deferred request list insertion handler
- */
-static void drive_rq_insert_work(struct work_struct *work)
-{
-	ide_drive_t *drive = container_of(work, ide_drive_t, rq_work);
-	ide_hwif_t *hwif = drive->hwif;
-	struct request *rq;
-	blk_status_t ret;
-	LIST_HEAD(list);
-
-	blk_mq_quiesce_queue(drive->queue);
-
-	ret = BLK_STS_OK;
-	spin_lock_irq(&hwif->lock);
-	while (!list_empty(&drive->rq_list)) {
-		rq = list_first_entry(&drive->rq_list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-
-		spin_unlock_irq(&hwif->lock);
-		ret = ide_issue_rq(drive, rq, true);
-		spin_lock_irq(&hwif->lock);
-	}
-	spin_unlock_irq(&hwif->lock);
-
-	blk_mq_unquiesce_queue(drive->queue);
-
-	if (ret != BLK_STS_OK)
-		kblockd_schedule_work(&drive->rq_work);
-}
-
-static const u8 ide_hwif_to_major[] =
-	{ IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR, IDE4_MAJOR,
-	  IDE5_MAJOR, IDE6_MAJOR, IDE7_MAJOR, IDE8_MAJOR, IDE9_MAJOR };
-
-static void ide_port_init_devices_data(ide_hwif_t *hwif)
-{
-	ide_drive_t *drive;
-	int i;
-
-	ide_port_for_each_dev(i, drive, hwif) {
-		u8 j = (hwif->index * MAX_DRIVES) + i;
-		u16 *saved_id = drive->id;
-
-		memset(drive, 0, sizeof(*drive));
-		memset(saved_id, 0, SECTOR_SIZE);
-		drive->id = saved_id;
-
-		drive->media			= ide_disk;
-		drive->select			= (i << 4) | ATA_DEVICE_OBS;
-		drive->hwif			= hwif;
-		drive->ready_stat		= ATA_DRDY;
-		drive->bad_wstat		= BAD_W_STAT;
-		drive->special_flags		= IDE_SFLAG_RECALIBRATE |
-						  IDE_SFLAG_SET_GEOMETRY;
-		drive->name[0]			= 'h';
-		drive->name[1]			= 'd';
-		drive->name[2]			= 'a' + j;
-		drive->max_failures		= IDE_DEFAULT_MAX_FAILURES;
-
-		INIT_LIST_HEAD(&drive->list);
-		init_completion(&drive->gendev_rel_comp);
-
-		INIT_WORK(&drive->rq_work, drive_rq_insert_work);
-		INIT_LIST_HEAD(&drive->rq_list);
-	}
-}
-
-static void ide_init_port_data(ide_hwif_t *hwif, unsigned int index)
-{
-	/* fill in any non-zero initial values */
-	hwif->index	= index;
-	hwif->major	= ide_hwif_to_major[index];
-
-	hwif->name[0]	= 'i';
-	hwif->name[1]	= 'd';
-	hwif->name[2]	= 'e';
-	hwif->name[3]	= '0' + index;
-
-	spin_lock_init(&hwif->lock);
-
-	timer_setup(&hwif->timer, ide_timer_expiry, 0);
-
-	init_completion(&hwif->gendev_rel_comp);
-
-	hwif->tp_ops = &default_tp_ops;
-
-	ide_port_init_devices_data(hwif);
-}
-
-static void ide_init_port_hw(ide_hwif_t *hwif, struct ide_hw *hw)
-{
-	memcpy(&hwif->io_ports, &hw->io_ports, sizeof(hwif->io_ports));
-	hwif->irq = hw->irq;
-	hwif->dev = hw->dev;
-	hwif->gendev.parent = hw->parent ? hw->parent : hw->dev;
-	hwif->config_data = hw->config;
-}
-
-static unsigned int ide_indexes;
-
-/**
- *	ide_find_port_slot	-	find free port slot
- *	@d: IDE port info
- *
- *	Return the new port slot index or -ENOENT if we are out of free slots.
- */
-
-static int ide_find_port_slot(const struct ide_port_info *d)
-{
-	int idx = -ENOENT;
-	u8 bootable = (d && (d->host_flags & IDE_HFLAG_NON_BOOTABLE)) ? 0 : 1;
-	u8 i = (d && (d->host_flags & IDE_HFLAG_QD_2ND_PORT)) ? 1 : 0;
-
-	/*
-	 * Claim an unassigned slot.
-	 *
-	 * Give preference to claiming other slots before claiming ide0/ide1,
-	 * just in case there's another interface yet-to-be-scanned
-	 * which uses ports 0x1f0/0x170 (the ide0/ide1 defaults).
-	 *
-	 * Unless there is a bootable card that does not use the standard
-	 * ports 0x1f0/0x170 (the ide0/ide1 defaults).
-	 */
-	mutex_lock(&ide_cfg_mtx);
-	if (bootable) {
-		if ((ide_indexes | i) != (1 << MAX_HWIFS) - 1)
-			idx = ffz(ide_indexes | i);
-	} else {
-		if ((ide_indexes | 3) != (1 << MAX_HWIFS) - 1)
-			idx = ffz(ide_indexes | 3);
-		else if ((ide_indexes & 3) != 3)
-			idx = ffz(ide_indexes);
-	}
-	if (idx >= 0)
-		ide_indexes |= (1 << idx);
-	mutex_unlock(&ide_cfg_mtx);
-
-	return idx;
-}
-
-static void ide_free_port_slot(int idx)
-{
-	mutex_lock(&ide_cfg_mtx);
-	ide_indexes &= ~(1 << idx);
-	mutex_unlock(&ide_cfg_mtx);
-}
-
-static void ide_port_free_devices(ide_hwif_t *hwif)
-{
-	ide_drive_t *drive;
-	int i;
-
-	ide_port_for_each_dev(i, drive, hwif) {
-		kfree(drive->id);
-		kfree(drive);
-	}
-}
-
-static int ide_port_alloc_devices(ide_hwif_t *hwif, int node)
-{
-	ide_drive_t *drive;
-	int i;
-
-	for (i = 0; i < MAX_DRIVES; i++) {
-		drive = kzalloc_node(sizeof(*drive), GFP_KERNEL, node);
-		if (drive == NULL)
-			goto out_nomem;
-
-		/*
-		 * In order to keep things simple we have an id
-		 * block for all drives at all times. If the device
-		 * is pre ATA or refuses ATA/ATAPI identify we
-		 * will add faked data to this.
-		 *
-		 * Also note that 0 everywhere means "can't do X"
-		 */
-		drive->id = kzalloc_node(SECTOR_SIZE, GFP_KERNEL, node);
-		if (drive->id == NULL)
-			goto out_free_drive;
-
-		hwif->devices[i] = drive;
-	}
-	return 0;
-
-out_free_drive:
-	kfree(drive);
-out_nomem:
-	ide_port_free_devices(hwif);
-	return -ENOMEM;
-}
-
-struct ide_host *ide_host_alloc(const struct ide_port_info *d,
-				struct ide_hw **hws, unsigned int n_ports)
-{
-	struct ide_host *host;
-	struct device *dev = hws[0] ? hws[0]->dev : NULL;
-	int node = dev ? dev_to_node(dev) : -1;
-	int i;
-
-	host = kzalloc_node(sizeof(*host), GFP_KERNEL, node);
-	if (host == NULL)
-		return NULL;
-
-	for (i = 0; i < n_ports; i++) {
-		ide_hwif_t *hwif;
-		int idx;
-
-		if (hws[i] == NULL)
-			continue;
-
-		hwif = kzalloc_node(sizeof(*hwif), GFP_KERNEL, node);
-		if (hwif == NULL)
-			continue;
-
-		if (ide_port_alloc_devices(hwif, node) < 0) {
-			kfree(hwif);
-			continue;
-		}
-
-		idx = ide_find_port_slot(d);
-		if (idx < 0) {
-			printk(KERN_ERR "%s: no free slot for interface\n",
-					d ? d->name : "ide");
-			ide_port_free_devices(hwif);
-			kfree(hwif);
-			continue;
-		}
-
-		ide_init_port_data(hwif, idx);
-
-		hwif->host = host;
-
-		host->ports[i] = hwif;
-		host->n_ports++;
-	}
-
-	if (host->n_ports == 0) {
-		kfree(host);
-		return NULL;
-	}
-
-	host->dev[0] = dev;
-
-	if (d) {
-		host->init_chipset = d->init_chipset;
-		host->get_lock     = d->get_lock;
-		host->release_lock = d->release_lock;
-		host->host_flags = d->host_flags;
-		host->irq_flags = d->irq_flags;
-	}
-
-	return host;
-}
-EXPORT_SYMBOL_GPL(ide_host_alloc);
-
-static void ide_port_free(ide_hwif_t *hwif)
-{
-	ide_port_free_devices(hwif);
-	ide_free_port_slot(hwif->index);
-	kfree(hwif);
-}
-
-static void ide_disable_port(ide_hwif_t *hwif)
-{
-	struct ide_host *host = hwif->host;
-	int i;
-
-	printk(KERN_INFO "%s: disabling port\n", hwif->name);
-
-	for (i = 0; i < MAX_HOST_PORTS; i++) {
-		if (host->ports[i] == hwif) {
-			host->ports[i] = NULL;
-			host->n_ports--;
-		}
-	}
-
-	ide_port_free(hwif);
-}
-
-int ide_host_register(struct ide_host *host, const struct ide_port_info *d,
-		      struct ide_hw **hws)
-{
-	ide_hwif_t *hwif, *mate = NULL;
-	int i, j = 0;
-
-	pr_warn("legacy IDE will be removed in 2021, please switch to libata\n"
-		"Report any missing HW support to linux-ide@vger.kernel.org\n");
-
-	ide_host_for_each_port(i, hwif, host) {
-		if (hwif == NULL) {
-			mate = NULL;
-			continue;
-		}
-
-		ide_init_port_hw(hwif, hws[i]);
-		ide_port_apply_params(hwif);
-
-		if ((i & 1) && mate) {
-			hwif->mate = mate;
-			mate->mate = hwif;
-		}
-
-		mate = (i & 1) ? NULL : hwif;
-
-		ide_init_port(hwif, i & 1, d);
-		ide_port_cable_detect(hwif);
-
-		hwif->port_flags |= IDE_PFLAG_PROBING;
-
-		ide_port_init_devices(hwif);
-	}
-
-	ide_host_for_each_port(i, hwif, host) {
-		if (hwif == NULL)
-			continue;
-
-		if (ide_probe_port(hwif) == 0)
-			hwif->present = 1;
-
-		hwif->port_flags &= ~IDE_PFLAG_PROBING;
-
-		if ((hwif->host_flags & IDE_HFLAG_4DRIVES) == 0 ||
-		    hwif->mate == NULL || hwif->mate->present == 0) {
-			if (ide_register_port(hwif)) {
-				ide_disable_port(hwif);
-				continue;
-			}
-		}
-
-		if (hwif->present)
-			ide_port_tune_devices(hwif);
-	}
-
-	ide_host_enable_irqs(host);
-
-	ide_host_for_each_port(i, hwif, host) {
-		if (hwif == NULL)
-			continue;
-
-		if (hwif_init(hwif) == 0) {
-			printk(KERN_INFO "%s: failed to initialize IDE "
-					 "interface\n", hwif->name);
-			device_unregister(hwif->portdev);
-			device_unregister(&hwif->gendev);
-			ide_disable_port(hwif);
-			continue;
-		}
-
-		if (hwif->present)
-			if (ide_port_setup_devices(hwif) == 0) {
-				hwif->present = 0;
-				continue;
-			}
-
-		j++;
-
-		ide_acpi_init_port(hwif);
-
-		if (hwif->present)
-			ide_acpi_port_init_devices(hwif);
-	}
-
-	ide_host_for_each_port(i, hwif, host) {
-		if (hwif == NULL)
-			continue;
-
-		ide_sysfs_register_port(hwif);
-		ide_proc_register_port(hwif);
-
-		if (hwif->present) {
-			ide_proc_port_register_devices(hwif);
-			hwif_register_devices(hwif);
-		}
-	}
-
-	return j ? 0 : -1;
-}
-EXPORT_SYMBOL_GPL(ide_host_register);
-
-int ide_host_add(const struct ide_port_info *d, struct ide_hw **hws,
-		 unsigned int n_ports, struct ide_host **hostp)
-{
-	struct ide_host *host;
-	int rc;
-
-	host = ide_host_alloc(d, hws, n_ports);
-	if (host == NULL)
-		return -ENOMEM;
-
-	rc = ide_host_register(host, d, hws);
-	if (rc) {
-		ide_host_free(host);
-		return rc;
-	}
-
-	if (hostp)
-		*hostp = host;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_host_add);
-
-static void __ide_port_unregister_devices(ide_hwif_t *hwif)
-{
-	ide_drive_t *drive;
-	int i;
-
-	ide_port_for_each_present_dev(i, drive, hwif) {
-		device_unregister(&drive->gendev);
-		wait_for_completion(&drive->gendev_rel_comp);
-	}
-}
-
-void ide_port_unregister_devices(ide_hwif_t *hwif)
-{
-	mutex_lock(&ide_cfg_mtx);
-	__ide_port_unregister_devices(hwif);
-	hwif->present = 0;
-	ide_port_init_devices_data(hwif);
-	mutex_unlock(&ide_cfg_mtx);
-}
-EXPORT_SYMBOL_GPL(ide_port_unregister_devices);
-
-/**
- *	ide_unregister		-	free an IDE interface
- *	@hwif: IDE interface
- *
- *	Perform the final unregister of an IDE interface.
- *
- *	Locking:
- *	The caller must not hold the IDE locks.
- *
- *	It is up to the caller to be sure there is no pending I/O here,
- *	and that the interface will not be reopened (present/vanishing
- *	locking isn't yet done BTW).
- */
-
-static void ide_unregister(ide_hwif_t *hwif)
-{
-	mutex_lock(&ide_cfg_mtx);
-
-	if (hwif->present) {
-		__ide_port_unregister_devices(hwif);
-		hwif->present = 0;
-	}
-
-	ide_proc_unregister_port(hwif);
-
-	if (!hwif->host->get_lock)
-		free_irq(hwif->irq, hwif);
-
-	device_unregister(hwif->portdev);
-	device_unregister(&hwif->gendev);
-	wait_for_completion(&hwif->gendev_rel_comp);
-
-	/*
-	 * Remove us from the kernel's knowledge
-	 */
-	kfree(hwif->sg_table);
-	unregister_blkdev(hwif->major, hwif->name);
-
-	ide_release_dma_engine(hwif);
-
-	mutex_unlock(&ide_cfg_mtx);
-}
-
-void ide_host_free(struct ide_host *host)
-{
-	ide_hwif_t *hwif;
-	int i;
-
-	ide_host_for_each_port(i, hwif, host) {
-		if (hwif)
-			ide_port_free(hwif);
-	}
-
-	kfree(host);
-}
-EXPORT_SYMBOL_GPL(ide_host_free);
-
-void ide_host_remove(struct ide_host *host)
-{
-	ide_hwif_t *hwif;
-	int i;
-
-	ide_host_for_each_port(i, hwif, host) {
-		if (hwif)
-			ide_unregister(hwif);
-	}
-
-	ide_host_free(host);
-}
-EXPORT_SYMBOL_GPL(ide_host_remove);
-
-void ide_port_scan(ide_hwif_t *hwif)
-{
-	int rc;
-
-	ide_port_apply_params(hwif);
-	ide_port_cable_detect(hwif);
-
-	hwif->port_flags |= IDE_PFLAG_PROBING;
-
-	ide_port_init_devices(hwif);
-
-	rc = ide_probe_port(hwif);
-
-	hwif->port_flags &= ~IDE_PFLAG_PROBING;
-
-	if (rc < 0)
-		return;
-
-	hwif->present = 1;
-
-	ide_port_tune_devices(hwif);
-	ide_port_setup_devices(hwif);
-	ide_acpi_port_init_devices(hwif);
-	hwif_register_devices(hwif);
-	ide_proc_port_register_devices(hwif);
-}
-EXPORT_SYMBOL_GPL(ide_port_scan);
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
deleted file mode 100644
index 15c17f3781ee9..0000000000000
--- a/drivers/ide/ide-proc.c
+++ /dev/null
@@ -1,633 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1997-1998	Mark Lord
- *  Copyright (C) 2003		Red Hat
- *
- *  Some code was moved here from ide.c, see it for original copyrights.
- */
-
-/*
- * This is the /proc/ide/ filesystem implementation.
- *
- * Drive/Driver settings can be retrieved by reading the drive's
- * "settings" files.  e.g.    "cat /proc/ide0/hda/settings"
- * To write a new value "val" into a specific setting "name", use:
- *   echo "name:val" >/proc/ide/ide0/hda/settings
- */
-
-#include <linux/module.h>
-
-#include <linux/uaccess.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
-#include <linux/ctype.h>
-#include <linux/ide.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-
-static struct proc_dir_entry *proc_ide_root;
-
-static int ide_imodel_proc_show(struct seq_file *m, void *v)
-{
-	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
-	const char	*name;
-
-	switch (hwif->chipset) {
-	case ide_generic:	name = "generic";	break;
-	case ide_pci:		name = "pci";		break;
-	case ide_cmd640:	name = "cmd640";	break;
-	case ide_dtc2278:	name = "dtc2278";	break;
-	case ide_ali14xx:	name = "ali14xx";	break;
-	case ide_qd65xx:	name = "qd65xx";	break;
-	case ide_umc8672:	name = "umc8672";	break;
-	case ide_ht6560b:	name = "ht6560b";	break;
-	case ide_4drives:	name = "4drives";	break;
-	case ide_pmac:		name = "mac-io";	break;
-	case ide_au1xxx:	name = "au1xxx";	break;
-	case ide_palm3710:      name = "palm3710";      break;
-	case ide_acorn:		name = "acorn";		break;
-	default:		name = "(unknown)";	break;
-	}
-	seq_printf(m, "%s\n", name);
-	return 0;
-}
-
-static int ide_mate_proc_show(struct seq_file *m, void *v)
-{
-	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
-
-	if (hwif && hwif->mate)
-		seq_printf(m, "%s\n", hwif->mate->name);
-	else
-		seq_printf(m, "(none)\n");
-	return 0;
-}
-
-static int ide_channel_proc_show(struct seq_file *m, void *v)
-{
-	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
-
-	seq_printf(m, "%c\n", hwif->channel ? '1' : '0');
-	return 0;
-}
-
-static int ide_identify_proc_show(struct seq_file *m, void *v)
-{
-	ide_drive_t *drive = (ide_drive_t *)m->private;
-	u8 *buf;
-
-	if (!drive) {
-		seq_putc(m, '\n');
-		return 0;
-	}
-
-	buf = kmalloc(SECTOR_SIZE, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
-	if (taskfile_lib_get_identify(drive, buf) == 0) {
-		__le16 *val = (__le16 *)buf;
-		int i;
-
-		for (i = 0; i < SECTOR_SIZE / 2; i++) {
-			seq_printf(m, "%04x%c", le16_to_cpu(val[i]),
-					(i % 8) == 7 ? '\n' : ' ');
-		}
-	} else
-		seq_putc(m, buf[0]);
-	kfree(buf);
-	return 0;
-}
-
-/**
- *	ide_find_setting	-	find a specific setting
- *	@st: setting table pointer
- *	@name: setting name
- *
- *	Scan's the setting table for a matching entry and returns
- *	this or NULL if no entry is found. The caller must hold the
- *	setting semaphore
- */
-
-static
-const struct ide_proc_devset *ide_find_setting(const struct ide_proc_devset *st,
-					       char *name)
-{
-	while (st->name) {
-		if (strcmp(st->name, name) == 0)
-			break;
-		st++;
-	}
-	return st->name ? st : NULL;
-}
-
-/**
- *	ide_read_setting	-	read an IDE setting
- *	@drive: drive to read from
- *	@setting: drive setting
- *
- *	Read a drive setting and return the value. The caller
- *	must hold the ide_setting_mtx when making this call.
- *
- *	BUGS: the data return and error are the same return value
- *	so an error -EINVAL and true return of the same value cannot
- *	be told apart
- */
-
-static int ide_read_setting(ide_drive_t *drive,
-			    const struct ide_proc_devset *setting)
-{
-	const struct ide_devset *ds = setting->setting;
-	int val = -EINVAL;
-
-	if (ds->get)
-		val = ds->get(drive);
-
-	return val;
-}
-
-/**
- *	ide_write_setting	-	read an IDE setting
- *	@drive: drive to read from
- *	@setting: drive setting
- *	@val: value
- *
- *	Write a drive setting if it is possible. The caller
- *	must hold the ide_setting_mtx when making this call.
- *
- *	BUGS: the data return and error are the same return value
- *	so an error -EINVAL and true return of the same value cannot
- *	be told apart
- *
- *	FIXME:  This should be changed to enqueue a special request
- *	to the driver to change settings, and then wait on a sema for completion.
- *	The current scheme of polling is kludgy, though safe enough.
- */
-
-static int ide_write_setting(ide_drive_t *drive,
-			     const struct ide_proc_devset *setting, int val)
-{
-	const struct ide_devset *ds = setting->setting;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-	if (!ds->set)
-		return -EPERM;
-	if ((ds->flags & DS_SYNC)
-	    && (val < setting->min || val > setting->max))
-		return -EINVAL;
-	return ide_devset_execute(drive, ds, val);
-}
-
-ide_devset_get(xfer_rate, current_speed);
-
-static int set_xfer_rate (ide_drive_t *drive, int arg)
-{
-	struct ide_cmd cmd;
-
-	if (arg < XFER_PIO_0 || arg > XFER_UDMA_6)
-		return -EINVAL;
-
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.tf.command = ATA_CMD_SET_FEATURES;
-	cmd.tf.feature = SETFEATURES_XFER;
-	cmd.tf.nsect   = (u8)arg;
-	cmd.valid.out.tf = IDE_VALID_FEATURE | IDE_VALID_NSECT;
-	cmd.valid.in.tf  = IDE_VALID_NSECT;
-	cmd.tf_flags   = IDE_TFLAG_SET_XFER;
-
-	return ide_no_data_taskfile(drive, &cmd);
-}
-
-ide_devset_rw(current_speed, xfer_rate);
-ide_devset_rw_field(init_speed, init_speed);
-ide_devset_rw_flag(nice1, IDE_DFLAG_NICE1);
-ide_devset_ro_field(number, dn);
-
-static const struct ide_proc_devset ide_generic_settings[] = {
-	IDE_PROC_DEVSET(current_speed, 0, 70),
-	IDE_PROC_DEVSET(init_speed, 0, 70),
-	IDE_PROC_DEVSET(io_32bit,  0, 1 + (SUPPORT_VLB_SYNC << 1)),
-	IDE_PROC_DEVSET(keepsettings, 0, 1),
-	IDE_PROC_DEVSET(nice1, 0, 1),
-	IDE_PROC_DEVSET(number, 0, 3),
-	IDE_PROC_DEVSET(pio_mode, 0, 255),
-	IDE_PROC_DEVSET(unmaskirq, 0, 1),
-	IDE_PROC_DEVSET(using_dma, 0, 1),
-	{ NULL },
-};
-
-static void proc_ide_settings_warn(void)
-{
-	printk_once(KERN_WARNING "Warning: /proc/ide/hd?/settings interface is "
-			    "obsolete, and will be removed soon!\n");
-}
-
-static int ide_settings_proc_show(struct seq_file *m, void *v)
-{
-	const struct ide_proc_devset *setting, *g, *d;
-	const struct ide_devset *ds;
-	ide_drive_t	*drive = (ide_drive_t *) m->private;
-	int		rc, mul_factor, div_factor;
-
-	proc_ide_settings_warn();
-
-	mutex_lock(&ide_setting_mtx);
-	g = ide_generic_settings;
-	d = drive->settings;
-	seq_printf(m, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n");
-	seq_printf(m, "----\t\t\t-----\t\t---\t\t---\t\t----\n");
-	while (g->name || (d && d->name)) {
-		/* read settings in the alphabetical order */
-		if (g->name && d && d->name) {
-			if (strcmp(d->name, g->name) < 0)
-				setting = d++;
-			else
-				setting = g++;
-		} else if (d && d->name) {
-			setting = d++;
-		} else
-			setting = g++;
-		mul_factor = setting->mulf ? setting->mulf(drive) : 1;
-		div_factor = setting->divf ? setting->divf(drive) : 1;
-		seq_printf(m, "%-24s", setting->name);
-		rc = ide_read_setting(drive, setting);
-		if (rc >= 0)
-			seq_printf(m, "%-16d", rc * mul_factor / div_factor);
-		else
-			seq_printf(m, "%-16s", "write-only");
-		seq_printf(m, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor);
-		ds = setting->setting;
-		if (ds->get)
-			seq_printf(m, "r");
-		if (ds->set)
-			seq_printf(m, "w");
-		seq_printf(m, "\n");
-	}
-	mutex_unlock(&ide_setting_mtx);
-	return 0;
-}
-
-static int ide_settings_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_settings_proc_show, PDE_DATA(inode));
-}
-
-#define MAX_LEN	30
-
-static ssize_t ide_settings_proc_write(struct file *file, const char __user *buffer,
-				       size_t count, loff_t *pos)
-{
-	ide_drive_t	*drive = PDE_DATA(file_inode(file));
-	char		name[MAX_LEN + 1];
-	int		for_real = 0, mul_factor, div_factor;
-	unsigned long	n;
-
-	const struct ide_proc_devset *setting;
-	char *buf, *s;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-
-	proc_ide_settings_warn();
-
-	if (count >= PAGE_SIZE)
-		return -EINVAL;
-
-	s = buf = (char *)__get_free_page(GFP_USER);
-	if (!buf)
-		return -ENOMEM;
-
-	if (copy_from_user(buf, buffer, count)) {
-		free_page((unsigned long)buf);
-		return -EFAULT;
-	}
-
-	buf[count] = '\0';
-
-	/*
-	 * Skip over leading whitespace
-	 */
-	while (count && isspace(*s)) {
-		--count;
-		++s;
-	}
-	/*
-	 * Do one full pass to verify all parameters,
-	 * then do another to actually write the new settings.
-	 */
-	do {
-		char *p = s;
-		n = count;
-		while (n > 0) {
-			unsigned val;
-			char *q = p;
-
-			while (n > 0 && *p != ':') {
-				--n;
-				p++;
-			}
-			if (*p != ':')
-				goto parse_error;
-			if (p - q > MAX_LEN)
-				goto parse_error;
-			memcpy(name, q, p - q);
-			name[p - q] = 0;
-
-			if (n > 0) {
-				--n;
-				p++;
-			} else
-				goto parse_error;
-
-			val = simple_strtoul(p, &q, 10);
-			n -= q - p;
-			p = q;
-			if (n > 0 && !isspace(*p))
-				goto parse_error;
-			while (n > 0 && isspace(*p)) {
-				--n;
-				++p;
-			}
-
-			mutex_lock(&ide_setting_mtx);
-			/* generic settings first, then driver specific ones */
-			setting = ide_find_setting(ide_generic_settings, name);
-			if (!setting) {
-				if (drive->settings)
-					setting = ide_find_setting(drive->settings, name);
-				if (!setting) {
-					mutex_unlock(&ide_setting_mtx);
-					goto parse_error;
-				}
-			}
-			if (for_real) {
-				mul_factor = setting->mulf ? setting->mulf(drive) : 1;
-				div_factor = setting->divf ? setting->divf(drive) : 1;
-				ide_write_setting(drive, setting, val * div_factor / mul_factor);
-			}
-			mutex_unlock(&ide_setting_mtx);
-		}
-	} while (!for_real++);
-	free_page((unsigned long)buf);
-	return count;
-parse_error:
-	free_page((unsigned long)buf);
-	printk("%s(): parse error\n", __func__);
-	return -EINVAL;
-}
-
-static const struct proc_ops ide_settings_proc_ops = {
-	.proc_open	= ide_settings_proc_open,
-	.proc_read	= seq_read,
-	.proc_lseek	= seq_lseek,
-	.proc_release	= single_release,
-	.proc_write	= ide_settings_proc_write,
-};
-
-int ide_capacity_proc_show(struct seq_file *m, void *v)
-{
-	seq_printf(m, "%llu\n", (long long)0x7fffffff);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_capacity_proc_show);
-
-int ide_geometry_proc_show(struct seq_file *m, void *v)
-{
-	ide_drive_t	*drive = (ide_drive_t *) m->private;
-
-	seq_printf(m, "physical     %d/%d/%d\n",
-			drive->cyl, drive->head, drive->sect);
-	seq_printf(m, "logical      %d/%d/%d\n",
-			drive->bios_cyl, drive->bios_head, drive->bios_sect);
-	return 0;
-}
-EXPORT_SYMBOL(ide_geometry_proc_show);
-
-static int ide_dmodel_proc_show(struct seq_file *seq, void *v)
-{
-	ide_drive_t	*drive = (ide_drive_t *) seq->private;
-	char		*m = (char *)&drive->id[ATA_ID_PROD];
-
-	seq_printf(seq, "%.40s\n", m[0] ? m : "(none)");
-	return 0;
-}
-
-static int ide_driver_proc_show(struct seq_file *m, void *v)
-{
-	ide_drive_t		*drive = (ide_drive_t *)m->private;
-	struct device		*dev = &drive->gendev;
-	struct ide_driver	*ide_drv;
-
-	if (dev->driver) {
-		ide_drv = to_ide_driver(dev->driver);
-		seq_printf(m, "%s version %s\n",
-				dev->driver->name, ide_drv->version);
-	} else
-		seq_printf(m, "ide-default version 0.9.newide\n");
-	return 0;
-}
-
-static int ide_media_proc_show(struct seq_file *m, void *v)
-{
-	ide_drive_t	*drive = (ide_drive_t *) m->private;
-	const char	*media;
-
-	switch (drive->media) {
-	case ide_disk:		media = "disk\n";	break;
-	case ide_cdrom:		media = "cdrom\n";	break;
-	case ide_tape:		media = "tape\n";	break;
-	case ide_floppy:	media = "floppy\n";	break;
-	case ide_optical:	media = "optical\n";	break;
-	default:		media = "UNKNOWN\n";	break;
-	}
-	seq_puts(m, media);
-	return 0;
-}
-
-static int ide_media_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, ide_media_proc_show, PDE_DATA(inode));
-}
-
-static const struct file_operations ide_media_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ide_media_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-static ide_proc_entry_t generic_drive_entries[] = {
-	{ "driver",	S_IFREG|S_IRUGO,	 ide_driver_proc_show	},
-	{ "identify",	S_IFREG|S_IRUSR,	 ide_identify_proc_show	},
-	{ "media",	S_IFREG|S_IRUGO,	 ide_media_proc_show	},
-	{ "model",	S_IFREG|S_IRUGO,	 ide_dmodel_proc_show	},
-	{}
-};
-
-static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void *data)
-{
-	struct proc_dir_entry *ent;
-
-	if (!dir || !p)
-		return;
-	while (p->name != NULL) {
-		ent = proc_create_single_data(p->name, p->mode, dir, p->show, data);
-		if (!ent) return;
-		p++;
-	}
-}
-
-static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p)
-{
-	if (!dir || !p)
-		return;
-	while (p->name != NULL) {
-		remove_proc_entry(p->name, dir);
-		p++;
-	}
-}
-
-void ide_proc_register_driver(ide_drive_t *drive, struct ide_driver *driver)
-{
-	mutex_lock(&ide_setting_mtx);
-	drive->settings = driver->proc_devsets(drive);
-	mutex_unlock(&ide_setting_mtx);
-
-	ide_add_proc_entries(drive->proc, driver->proc_entries(drive), drive);
-}
-
-EXPORT_SYMBOL(ide_proc_register_driver);
-
-/**
- *	ide_proc_unregister_driver	-	remove driver specific data
- *	@drive: drive
- *	@driver: driver
- *
- *	Clean up the driver specific /proc files and IDE settings
- *	for a given drive.
- *
- *	Takes ide_setting_mtx.
- */
-
-void ide_proc_unregister_driver(ide_drive_t *drive, struct ide_driver *driver)
-{
-	ide_remove_proc_entries(drive->proc, driver->proc_entries(drive));
-
-	mutex_lock(&ide_setting_mtx);
-	/*
-	 * ide_setting_mtx protects both the settings list and the use
-	 * of settings (we cannot take a setting out that is being used).
-	 */
-	drive->settings = NULL;
-	mutex_unlock(&ide_setting_mtx);
-}
-EXPORT_SYMBOL(ide_proc_unregister_driver);
-
-void ide_proc_port_register_devices(ide_hwif_t *hwif)
-{
-	struct proc_dir_entry *ent;
-	struct proc_dir_entry *parent = hwif->proc;
-	ide_drive_t *drive;
-	char name[64];
-	int i;
-
-	ide_port_for_each_dev(i, drive, hwif) {
-		if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0)
-			continue;
-
-		drive->proc = proc_mkdir(drive->name, parent);
-		if (drive->proc) {
-			ide_add_proc_entries(drive->proc, generic_drive_entries, drive);
-			proc_create_data("settings", S_IFREG|S_IRUSR|S_IWUSR,
-					drive->proc, &ide_settings_proc_ops,
-					drive);
-		}
-		sprintf(name, "ide%d/%s", (drive->name[2]-'a')/2, drive->name);
-		ent = proc_symlink(drive->name, proc_ide_root, name);
-		if (!ent) return;
-	}
-}
-
-void ide_proc_unregister_device(ide_drive_t *drive)
-{
-	if (drive->proc) {
-		remove_proc_entry("settings", drive->proc);
-		ide_remove_proc_entries(drive->proc, generic_drive_entries);
-		remove_proc_entry(drive->name, proc_ide_root);
-		remove_proc_entry(drive->name, drive->hwif->proc);
-		drive->proc = NULL;
-	}
-}
-
-static ide_proc_entry_t hwif_entries[] = {
-	{ "channel",	S_IFREG|S_IRUGO,	ide_channel_proc_show	},
-	{ "mate",	S_IFREG|S_IRUGO,	ide_mate_proc_show	},
-	{ "model",	S_IFREG|S_IRUGO,	ide_imodel_proc_show	},
-	{}
-};
-
-void ide_proc_register_port(ide_hwif_t *hwif)
-{
-	if (!hwif->proc) {
-		hwif->proc = proc_mkdir(hwif->name, proc_ide_root);
-
-		if (!hwif->proc)
-			return;
-
-		ide_add_proc_entries(hwif->proc, hwif_entries, hwif);
-	}
-}
-
-void ide_proc_unregister_port(ide_hwif_t *hwif)
-{
-	if (hwif->proc) {
-		ide_remove_proc_entries(hwif->proc, hwif_entries);
-		remove_proc_entry(hwif->name, proc_ide_root);
-		hwif->proc = NULL;
-	}
-}
-
-static int proc_print_driver(struct device_driver *drv, void *data)
-{
-	struct ide_driver *ide_drv = to_ide_driver(drv);
-	struct seq_file *s = data;
-
-	seq_printf(s, "%s version %s\n", drv->name, ide_drv->version);
-
-	return 0;
-}
-
-static int ide_drivers_show(struct seq_file *s, void *p)
-{
-	int err;
-
-	err = bus_for_each_drv(&ide_bus_type, NULL, s, proc_print_driver);
-	if (err < 0)
-		printk(KERN_WARNING "IDE: %s: bus_for_each_drv error: %d\n",
-			__func__, err);
-	return 0;
-}
-
-DEFINE_PROC_SHOW_ATTRIBUTE(ide_drivers);
-
-void proc_ide_create(void)
-{
-	proc_ide_root = proc_mkdir("ide", NULL);
-
-	if (!proc_ide_root)
-		return;
-
-	proc_create("drivers", 0, proc_ide_root, &ide_drivers_proc_ops);
-}
-
-void proc_ide_destroy(void)
-{
-	remove_proc_entry("drivers", proc_ide_root);
-	remove_proc_entry("ide", NULL);
-}
diff --git a/drivers/ide/ide-scan-pci.c b/drivers/ide/ide-scan-pci.c
deleted file mode 100644
index b0411a1827a36..0000000000000
--- a/drivers/ide/ide-scan-pci.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * support for probing IDE PCI devices in the PCI bus order
- *
- * Copyright (c) 1998-2000  Andre Hedrick <andre@linux-ide.org>
- * Copyright (c) 1995-1998  Mark Lord
- *
- * May be copied or modified under the terms of the GNU General Public License
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ide.h>
-
-/*
- *	Module interfaces
- */
-
-static int pre_init = 1;		/* Before first ordered IDE scan */
-static LIST_HEAD(ide_pci_drivers);
-
-/*
- *	__ide_pci_register_driver	-	attach IDE driver
- *	@driver: pci driver
- *	@module: owner module of the driver
- *
- *	Registers a driver with the IDE layer. The IDE layer arranges that
- *	boot time setup is done in the expected device order and then
- *	hands the controllers off to the core PCI code to do the rest of
- *	the work.
- *
- *	Returns are the same as for pci_register_driver
- */
-
-int __ide_pci_register_driver(struct pci_driver *driver, struct module *module,
-			      const char *mod_name)
-{
-	if (!pre_init)
-		return __pci_register_driver(driver, module, mod_name);
-	driver->driver.owner = module;
-	list_add_tail(&driver->node, &ide_pci_drivers);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(__ide_pci_register_driver);
-
-/**
- *	ide_scan_pcidev		-	find an IDE driver for a device
- *	@dev: PCI device to check
- *
- *	Look for an IDE driver to handle the device we are considering.
- *	This is only used during boot up to get the ordering correct. After
- *	boot up the pci layer takes over the job.
- */
-
-static int __init ide_scan_pcidev(struct pci_dev *dev)
-{
-	struct list_head *l;
-	struct pci_driver *d;
-	int ret;
-
-	list_for_each(l, &ide_pci_drivers) {
-		d = list_entry(l, struct pci_driver, node);
-		if (d->id_table) {
-			const struct pci_device_id *id =
-				pci_match_id(d->id_table, dev);
-
-			if (id != NULL) {
-				pci_assign_irq(dev);
-				ret = d->probe(dev, id);
-				if (ret >= 0) {
-					dev->driver = d;
-					pci_dev_get(dev);
-					return 1;
-				}
-			}
-		}
-	}
-	return 0;
-}
-
-/**
- *	ide_scan_pcibus		-	perform the initial IDE driver scan
- *
- *	Perform the initial bus rather than driver ordered scan of the
- *	PCI drivers. After this all IDE pci handling becomes standard
- *	module ordering not traditionally ordered.
- */
-
-static int __init ide_scan_pcibus(void)
-{
-	struct pci_dev *dev = NULL;
-	struct pci_driver *d, *tmp;
-
-	pre_init = 0;
-	for_each_pci_dev(dev)
-		ide_scan_pcidev(dev);
-
-	/*
-	 *	Hand the drivers over to the PCI layer now we
-	 *	are post init.
-	 */
-
-	list_for_each_entry_safe(d, tmp, &ide_pci_drivers, node) {
-		list_del(&d->node);
-		if (__pci_register_driver(d, d->driver.owner,
-					  d->driver.mod_name))
-			printk(KERN_ERR "%s: failed to register %s driver\n",
-					__func__, d->driver.mod_name);
-	}
-
-	return 0;
-}
-device_initcall(ide_scan_pcibus);
diff --git a/drivers/ide/ide-sysfs.c b/drivers/ide/ide-sysfs.c
deleted file mode 100644
index c08a8a0916e22..0000000000000
--- a/drivers/ide/ide-sysfs.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/kernel.h>
-#include <linux/ide.h>
-
-char *ide_media_string(ide_drive_t *drive)
-{
-	switch (drive->media) {
-	case ide_disk:
-		return "disk";
-	case ide_cdrom:
-		return "cdrom";
-	case ide_tape:
-		return "tape";
-	case ide_floppy:
-		return "floppy";
-	case ide_optical:
-		return "optical";
-	default:
-		return "UNKNOWN";
-	}
-}
-
-static ssize_t media_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", ide_media_string(drive));
-}
-static DEVICE_ATTR_RO(media);
-
-static ssize_t drivename_show(struct device *dev, struct device_attribute *attr,
-			      char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", drive->name);
-}
-static DEVICE_ATTR_RO(drivename);
-
-static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
-			     char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "ide:m-%s\n", ide_media_string(drive));
-}
-static DEVICE_ATTR_RO(modalias);
-
-static ssize_t model_show(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]);
-}
-static DEVICE_ATTR_RO(model);
-
-static ssize_t firmware_show(struct device *dev, struct device_attribute *attr,
-			     char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]);
-}
-static DEVICE_ATTR_RO(firmware);
-
-static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
-			   char *buf)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]);
-}
-static DEVICE_ATTR(serial, 0400, serial_show, NULL);
-
-static DEVICE_ATTR(unload_heads, 0644, ide_park_show, ide_park_store);
-
-static struct attribute *ide_attrs[] = {
-	&dev_attr_media.attr,
-	&dev_attr_drivename.attr,
-	&dev_attr_modalias.attr,
-	&dev_attr_model.attr,
-	&dev_attr_firmware.attr,
-	&dev_attr_serial.attr,
-	&dev_attr_unload_heads.attr,
-	NULL,
-};
-
-static const struct attribute_group ide_attr_group = {
-	.attrs = ide_attrs,
-};
-
-const struct attribute_group *ide_dev_groups[] = {
-	&ide_attr_group,
-	NULL,
-};
-
-static ssize_t store_delete_devices(struct device *portdev,
-				    struct device_attribute *attr,
-				    const char *buf, size_t n)
-{
-	ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
-	if (strncmp(buf, "1", n))
-		return -EINVAL;
-
-	ide_port_unregister_devices(hwif);
-
-	return n;
-};
-
-static DEVICE_ATTR(delete_devices, S_IWUSR, NULL, store_delete_devices);
-
-static ssize_t store_scan(struct device *portdev,
-			  struct device_attribute *attr,
-			  const char *buf, size_t n)
-{
-	ide_hwif_t *hwif = dev_get_drvdata(portdev);
-
-	if (strncmp(buf, "1", n))
-		return -EINVAL;
-
-	ide_port_unregister_devices(hwif);
-	ide_port_scan(hwif);
-
-	return n;
-};
-
-static DEVICE_ATTR(scan, S_IWUSR, NULL, store_scan);
-
-static struct device_attribute *ide_port_attrs[] = {
-	&dev_attr_delete_devices,
-	&dev_attr_scan,
-	NULL
-};
-
-int ide_sysfs_register_port(ide_hwif_t *hwif)
-{
-	int i, rc;
-
-	for (i = 0; ide_port_attrs[i]; i++) {
-		rc = device_create_file(hwif->portdev, ide_port_attrs[i]);
-		if (rc)
-			break;
-	}
-
-	return rc;
-}
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
deleted file mode 100644
index fa05e7e7d6090..0000000000000
--- a/drivers/ide/ide-tape.c
+++ /dev/null
@@ -1,2083 +0,0 @@
-/*
- * IDE ATAPI streaming tape driver.
- *
- * Copyright (C) 1995-1999  Gadi Oxman <gadio@netvision.net.il>
- * Copyright (C) 2003-2005  Bartlomiej Zolnierkiewicz
- *
- * This driver was constructed as a student project in the software laboratory
- * of the faculty of electrical engineering in the Technion - Israel's
- * Institute Of Technology, with the guide of Avner Lottem and Dr. Ilana David.
- *
- * It is hereby placed under the terms of the GNU general public license.
- * (See linux/COPYING).
- *
- * For a historical changelog see
- * Documentation/ide/ChangeLog.ide-tape.1995-2002
- */
-
-#define DRV_NAME "ide-tape"
-
-#define IDETAPE_VERSION "1.20"
-
-#include <linux/compat.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/jiffies.h>
-#include <linux/major.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/completion.h>
-#include <linux/bitops.h>
-#include <linux/mutex.h>
-#include <scsi/scsi.h>
-
-#include <asm/byteorder.h>
-#include <linux/uaccess.h>
-#include <linux/io.h>
-#include <asm/unaligned.h>
-#include <linux/mtio.h>
-
-/* define to see debug info */
-#undef IDETAPE_DEBUG_LOG
-
-#ifdef IDETAPE_DEBUG_LOG
-#define ide_debug_log(lvl, fmt, args...) __ide_debug_log(lvl, fmt, ## args)
-#else
-#define ide_debug_log(lvl, fmt, args...) do {} while (0)
-#endif
-
-/**************************** Tunable parameters *****************************/
-/*
- * After each failed packet command we issue a request sense command and retry
- * the packet command IDETAPE_MAX_PC_RETRIES times.
- *
- * Setting IDETAPE_MAX_PC_RETRIES to 0 will disable retries.
- */
-#define IDETAPE_MAX_PC_RETRIES		3
-
-/*
- * The following parameter is used to select the point in the internal tape fifo
- * in which we will start to refill the buffer. Decreasing the following
- * parameter will improve the system's latency and interactive response, while
- * using a high value might improve system throughput.
- */
-#define IDETAPE_FIFO_THRESHOLD		2
-
-/*
- * DSC polling parameters.
- *
- * Polling for DSC (a single bit in the status register) is a very important
- * function in ide-tape. There are two cases in which we poll for DSC:
- *
- * 1. Before a read/write packet command, to ensure that we can transfer data
- * from/to the tape's data buffers, without causing an actual media access.
- * In case the tape is not ready yet, we take out our request from the device
- * request queue, so that ide.c could service requests from the other device
- * on the same interface in the meantime.
- *
- * 2. After the successful initialization of a "media access packet command",
- * which is a command that can take a long time to complete (the interval can
- * range from several seconds to even an hour). Again, we postpone our request
- * in the middle to free the bus for the other device. The polling frequency
- * here should be lower than the read/write frequency since those media access
- * commands are slow. We start from a "fast" frequency - IDETAPE_DSC_MA_FAST
- * (1 second), and if we don't receive DSC after IDETAPE_DSC_MA_THRESHOLD
- * (5 min), we switch it to a lower frequency - IDETAPE_DSC_MA_SLOW (1 min).
- *
- * We also set a timeout for the timer, in case something goes wrong. The
- * timeout should be longer then the maximum execution time of a tape operation.
- */
-
-/* DSC timings. */
-#define IDETAPE_DSC_RW_MIN		5*HZ/100	/* 50 msec */
-#define IDETAPE_DSC_RW_MAX		40*HZ/100	/* 400 msec */
-#define IDETAPE_DSC_RW_TIMEOUT		2*60*HZ		/* 2 minutes */
-#define IDETAPE_DSC_MA_FAST		2*HZ		/* 2 seconds */
-#define IDETAPE_DSC_MA_THRESHOLD	5*60*HZ		/* 5 minutes */
-#define IDETAPE_DSC_MA_SLOW		30*HZ		/* 30 seconds */
-#define IDETAPE_DSC_MA_TIMEOUT		2*60*60*HZ	/* 2 hours */
-
-/*************************** End of tunable parameters ***********************/
-
-/* tape directions */
-enum {
-	IDETAPE_DIR_NONE  = (1 << 0),
-	IDETAPE_DIR_READ  = (1 << 1),
-	IDETAPE_DIR_WRITE = (1 << 2),
-};
-
-/* Tape door status */
-#define DOOR_UNLOCKED			0
-#define DOOR_LOCKED			1
-#define DOOR_EXPLICITLY_LOCKED		2
-
-/* Some defines for the SPACE command */
-#define IDETAPE_SPACE_OVER_FILEMARK	1
-#define IDETAPE_SPACE_TO_EOD		3
-
-/* Some defines for the LOAD UNLOAD command */
-#define IDETAPE_LU_LOAD_MASK		1
-#define IDETAPE_LU_RETENSION_MASK	2
-#define IDETAPE_LU_EOT_MASK		4
-
-/* Structures related to the SELECT SENSE / MODE SENSE packet commands. */
-#define IDETAPE_BLOCK_DESCRIPTOR	0
-#define IDETAPE_CAPABILITIES_PAGE	0x2a
-
-/*
- * Most of our global data which we need to save even as we leave the driver due
- * to an interrupt or a timer event is stored in the struct defined below.
- */
-typedef struct ide_tape_obj {
-	ide_drive_t		*drive;
-	struct ide_driver	*driver;
-	struct gendisk		*disk;
-	struct device		dev;
-
-	/* used by REQ_IDETAPE_{READ,WRITE} requests */
-	struct ide_atapi_pc queued_pc;
-
-	/*
-	 * DSC polling variables.
-	 *
-	 * While polling for DSC we use postponed_rq to postpone the current
-	 * request so that ide.c will be able to service pending requests on the
-	 * other device. Note that at most we will have only one DSC (usually
-	 * data transfer) request in the device request queue.
-	 */
-	bool postponed_rq;
-
-	/* The time in which we started polling for DSC */
-	unsigned long dsc_polling_start;
-	/* Timer used to poll for dsc */
-	struct timer_list dsc_timer;
-	/* Read/Write dsc polling frequency */
-	unsigned long best_dsc_rw_freq;
-	unsigned long dsc_poll_freq;
-	unsigned long dsc_timeout;
-
-	/* Read position information */
-	u8 partition;
-	/* Current block */
-	unsigned int first_frame;
-
-	/* Last error information */
-	u8 sense_key, asc, ascq;
-
-	/* Character device operation */
-	unsigned int minor;
-	/* device name */
-	char name[4];
-	/* Current character device data transfer direction */
-	u8 chrdev_dir;
-
-	/* tape block size, usually 512 or 1024 bytes */
-	unsigned short blk_size;
-	int user_bs_factor;
-
-	/* Copy of the tape's Capabilities and Mechanical Page */
-	u8 caps[20];
-
-	/*
-	 * Active data transfer request parameters.
-	 *
-	 * At most, there is only one ide-tape originated data transfer request
-	 * in the device request queue. This allows ide.c to easily service
-	 * requests from the other device when we postpone our active request.
-	 */
-
-	/* Data buffer size chosen based on the tape's recommendation */
-	int buffer_size;
-	/* Staging buffer of buffer_size bytes */
-	void *buf;
-	/* The read/write cursor */
-	void *cur;
-	/* The number of valid bytes in buf */
-	size_t valid;
-
-	/* Measures average tape speed */
-	unsigned long avg_time;
-	int avg_size;
-	int avg_speed;
-
-	/* the door is currently locked */
-	int door_locked;
-	/* the tape hardware is write protected */
-	char drv_write_prot;
-	/* the tape is write protected (hardware or opened as read-only) */
-	char write_prot;
-} idetape_tape_t;
-
-static DEFINE_MUTEX(ide_tape_mutex);
-static DEFINE_MUTEX(idetape_ref_mutex);
-
-static DEFINE_MUTEX(idetape_chrdev_mutex);
-
-static struct class *idetape_sysfs_class;
-
-static void ide_tape_release(struct device *);
-
-static struct ide_tape_obj *idetape_devs[MAX_HWIFS * MAX_DRIVES];
-
-static struct ide_tape_obj *ide_tape_get(struct gendisk *disk, bool cdev,
-					 unsigned int i)
-{
-	struct ide_tape_obj *tape = NULL;
-
-	mutex_lock(&idetape_ref_mutex);
-
-	if (cdev)
-		tape = idetape_devs[i];
-	else
-		tape = ide_drv_g(disk, ide_tape_obj);
-
-	if (tape) {
-		if (ide_device_get(tape->drive))
-			tape = NULL;
-		else
-			get_device(&tape->dev);
-	}
-
-	mutex_unlock(&idetape_ref_mutex);
-	return tape;
-}
-
-static void ide_tape_put(struct ide_tape_obj *tape)
-{
-	ide_drive_t *drive = tape->drive;
-
-	mutex_lock(&idetape_ref_mutex);
-	put_device(&tape->dev);
-	ide_device_put(drive);
-	mutex_unlock(&idetape_ref_mutex);
-}
-
-/*
- * called on each failed packet command retry to analyze the request sense. We
- * currently do not utilize this information.
- */
-static void idetape_analyze_error(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct ide_atapi_pc *pc = drive->failed_pc;
-	struct request *rq = drive->hwif->rq;
-	u8 *sense = bio_data(rq->bio);
-
-	tape->sense_key = sense[2] & 0xF;
-	tape->asc       = sense[12];
-	tape->ascq      = sense[13];
-
-	ide_debug_log(IDE_DBG_FUNC,
-		      "cmd: 0x%x, sense key = %x, asc = %x, ascq = %x",
-		      rq->cmd[0], tape->sense_key, tape->asc, tape->ascq);
-
-	/* correct remaining bytes to transfer */
-	if (pc->flags & PC_FLAG_DMA_ERROR)
-		scsi_req(rq)->resid_len = tape->blk_size * get_unaligned_be32(&sense[3]);
-
-	/*
-	 * If error was the result of a zero-length read or write command,
-	 * with sense key=5, asc=0x22, ascq=0, let it slide.  Some drives
-	 * (i.e. Seagate STT3401A Travan) don't support 0-length read/writes.
-	 */
-	if ((pc->c[0] == READ_6 || pc->c[0] == WRITE_6)
-	    /* length == 0 */
-	    && pc->c[4] == 0 && pc->c[3] == 0 && pc->c[2] == 0) {
-		if (tape->sense_key == 5) {
-			/* don't report an error, everything's ok */
-			pc->error = 0;
-			/* don't retry read/write */
-			pc->flags |= PC_FLAG_ABORT;
-		}
-	}
-	if (pc->c[0] == READ_6 && (sense[2] & 0x80)) {
-		pc->error = IDE_DRV_ERROR_FILEMARK;
-		pc->flags |= PC_FLAG_ABORT;
-	}
-	if (pc->c[0] == WRITE_6) {
-		if ((sense[2] & 0x40) || (tape->sense_key == 0xd
-		     && tape->asc == 0x0 && tape->ascq == 0x2)) {
-			pc->error = IDE_DRV_ERROR_EOD;
-			pc->flags |= PC_FLAG_ABORT;
-		}
-	}
-	if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
-		if (tape->sense_key == 8) {
-			pc->error = IDE_DRV_ERROR_EOD;
-			pc->flags |= PC_FLAG_ABORT;
-		}
-		if (!(pc->flags & PC_FLAG_ABORT) &&
-		    (blk_rq_bytes(rq) - scsi_req(rq)->resid_len))
-			pc->retries = IDETAPE_MAX_PC_RETRIES + 1;
-	}
-}
-
-static void ide_tape_handle_dsc(ide_drive_t *);
-
-static int ide_tape_callback(ide_drive_t *drive, int dsc)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct ide_atapi_pc *pc = drive->pc;
-	struct request *rq = drive->hwif->rq;
-	int uptodate = pc->error ? 0 : 1;
-	int err = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
-
-	ide_debug_log(IDE_DBG_FUNC, "cmd: 0x%x, dsc: %d, err: %d", rq->cmd[0],
-		      dsc, err);
-
-	if (dsc)
-		ide_tape_handle_dsc(drive);
-
-	if (drive->failed_pc == pc)
-		drive->failed_pc = NULL;
-
-	if (pc->c[0] == REQUEST_SENSE) {
-		if (uptodate)
-			idetape_analyze_error(drive);
-		else
-			printk(KERN_ERR "ide-tape: Error in REQUEST SENSE "
-					"itself - Aborting request!\n");
-	} else if (pc->c[0] == READ_6 || pc->c[0] == WRITE_6) {
-		unsigned int blocks =
-			(blk_rq_bytes(rq) - scsi_req(rq)->resid_len) / tape->blk_size;
-
-		tape->avg_size += blocks * tape->blk_size;
-
-		if (time_after_eq(jiffies, tape->avg_time + HZ)) {
-			tape->avg_speed = tape->avg_size * HZ /
-				(jiffies - tape->avg_time) / 1024;
-			tape->avg_size = 0;
-			tape->avg_time = jiffies;
-		}
-
-		tape->first_frame += blocks;
-
-		if (pc->error) {
-			uptodate = 0;
-			err = pc->error;
-		}
-	}
-	scsi_req(rq)->result = err;
-
-	return uptodate;
-}
-
-/*
- * Postpone the current request so that ide.c will be able to service requests
- * from another device on the same port while we are polling for DSC.
- */
-static void ide_tape_stall_queue(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	ide_debug_log(IDE_DBG_FUNC, "cmd: 0x%x, dsc_poll_freq: %lu",
-		      drive->hwif->rq->cmd[0], tape->dsc_poll_freq);
-
-	tape->postponed_rq = true;
-
-	ide_stall_queue(drive, tape->dsc_poll_freq);
-}
-
-static void ide_tape_handle_dsc(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	/* Media access command */
-	tape->dsc_polling_start = jiffies;
-	tape->dsc_poll_freq = IDETAPE_DSC_MA_FAST;
-	tape->dsc_timeout = jiffies + IDETAPE_DSC_MA_TIMEOUT;
-	/* Allow ide.c to handle other requests */
-	ide_tape_stall_queue(drive);
-}
-
-/*
- * Packet Command Interface
- *
- * The current Packet Command is available in drive->pc, and will not change
- * until we finish handling it. Each packet command is associated with a
- * callback function that will be called when the command is finished.
- *
- * The handling will be done in three stages:
- *
- * 1. ide_tape_issue_pc will send the packet command to the drive, and will set
- * the interrupt handler to ide_pc_intr.
- *
- * 2. On each interrupt, ide_pc_intr will be called. This step will be
- * repeated until the device signals us that no more interrupts will be issued.
- *
- * 3. ATAPI Tape media access commands have immediate status with a delayed
- * process. In case of a successful initiation of a media access packet command,
- * the DSC bit will be set when the actual execution of the command is finished.
- * Since the tape drive will not issue an interrupt, we have to poll for this
- * event. In this case, we define the request as "low priority request" by
- * setting rq_status to IDETAPE_RQ_POSTPONED, set a timer to poll for DSC and
- * exit the driver.
- *
- * ide.c will then give higher priority to requests which originate from the
- * other device, until will change rq_status to RQ_ACTIVE.
- *
- * 4. When the packet command is finished, it will be checked for errors.
- *
- * 5. In case an error was found, we queue a request sense packet command in
- * front of the request queue and retry the operation up to
- * IDETAPE_MAX_PC_RETRIES times.
- *
- * 6. In case no error was found, or we decided to give up and not to retry
- * again, the callback function will be called and then we will handle the next
- * request.
- */
-
-static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
-					 struct ide_cmd *cmd,
-					 struct ide_atapi_pc *pc)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct request *rq = drive->hwif->rq;
-
-	if (drive->failed_pc == NULL && pc->c[0] != REQUEST_SENSE)
-		drive->failed_pc = pc;
-
-	/* Set the current packet command */
-	drive->pc = pc;
-
-	if (pc->retries > IDETAPE_MAX_PC_RETRIES ||
-		(pc->flags & PC_FLAG_ABORT)) {
-
-		/*
-		 * We will "abort" retrying a packet command in case legitimate
-		 * error code was received (crossing a filemark, or end of the
-		 * media, for example).
-		 */
-		if (!(pc->flags & PC_FLAG_ABORT)) {
-			if (!(pc->c[0] == TEST_UNIT_READY &&
-			      tape->sense_key == 2 && tape->asc == 4 &&
-			     (tape->ascq == 1 || tape->ascq == 8))) {
-				printk(KERN_ERR "ide-tape: %s: I/O error, "
-						"pc = %2x, key = %2x, "
-						"asc = %2x, ascq = %2x\n",
-						tape->name, pc->c[0],
-						tape->sense_key, tape->asc,
-						tape->ascq);
-			}
-			/* Giving up */
-			pc->error = IDE_DRV_ERROR_GENERAL;
-		}
-
-		drive->failed_pc = NULL;
-		drive->pc_callback(drive, 0);
-		ide_complete_rq(drive, BLK_STS_IOERR, blk_rq_bytes(rq));
-		return ide_stopped;
-	}
-	ide_debug_log(IDE_DBG_SENSE, "retry #%d, cmd: 0x%02x", pc->retries,
-		      pc->c[0]);
-
-	pc->retries++;
-
-	return ide_issue_pc(drive, cmd);
-}
-
-/* A mode sense command is used to "sense" tape parameters. */
-static void idetape_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code)
-{
-	ide_init_pc(pc);
-	pc->c[0] = MODE_SENSE;
-	if (page_code != IDETAPE_BLOCK_DESCRIPTOR)
-		/* DBD = 1 - Don't return block descriptors */
-		pc->c[1] = 8;
-	pc->c[2] = page_code;
-	/*
-	 * Changed pc->c[3] to 0 (255 will at best return unused info).
-	 *
-	 * For SCSI this byte is defined as subpage instead of high byte
-	 * of length and some IDE drives seem to interpret it this way
-	 * and return an error when 255 is used.
-	 */
-	pc->c[3] = 0;
-	/* We will just discard data in that case */
-	pc->c[4] = 255;
-	if (page_code == IDETAPE_BLOCK_DESCRIPTOR)
-		pc->req_xfer = 12;
-	else if (page_code == IDETAPE_CAPABILITIES_PAGE)
-		pc->req_xfer = 24;
-	else
-		pc->req_xfer = 50;
-}
-
-static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	idetape_tape_t *tape = drive->driver_data;
-	struct ide_atapi_pc *pc = drive->pc;
-	u8 stat;
-
-	stat = hwif->tp_ops->read_status(hwif);
-
-	if (stat & ATA_DSC) {
-		if (stat & ATA_ERR) {
-			/* Error detected */
-			if (pc->c[0] != TEST_UNIT_READY)
-				printk(KERN_ERR "ide-tape: %s: I/O error, ",
-						tape->name);
-			/* Retry operation */
-			ide_retry_pc(drive);
-			return ide_stopped;
-		}
-		pc->error = 0;
-	} else {
-		pc->error = IDE_DRV_ERROR_GENERAL;
-		drive->failed_pc = NULL;
-	}
-	drive->pc_callback(drive, 0);
-	return ide_stopped;
-}
-
-static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
-				   struct ide_atapi_pc *pc, struct request *rq,
-				   u8 opcode)
-{
-	unsigned int length = blk_rq_sectors(rq) / (tape->blk_size >> 9);
-
-	ide_init_pc(pc);
-	put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
-	pc->c[1] = 1;
-
-	if (blk_rq_bytes(rq) == tape->buffer_size)
-		pc->flags |= PC_FLAG_DMA_OK;
-
-	if (opcode == READ_6)
-		pc->c[0] = READ_6;
-	else if (opcode == WRITE_6) {
-		pc->c[0] = WRITE_6;
-		pc->flags |= PC_FLAG_WRITING;
-	}
-
-	memcpy(scsi_req(rq)->cmd, pc->c, 12);
-}
-
-static ide_startstop_t idetape_do_request(ide_drive_t *drive,
-					  struct request *rq, sector_t block)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	idetape_tape_t *tape = drive->driver_data;
-	struct ide_atapi_pc *pc = NULL;
-	struct ide_cmd cmd;
-	struct scsi_request *req = scsi_req(rq);
-	u8 stat;
-
-	ide_debug_log(IDE_DBG_RQ, "cmd: 0x%x, sector: %llu, nr_sectors: %u",
-		      req->cmd[0], (unsigned long long)blk_rq_pos(rq),
-		      blk_rq_sectors(rq));
-
-	BUG_ON(!blk_rq_is_private(rq));
-	BUG_ON(ide_req(rq)->type != ATA_PRIV_MISC &&
-	       ide_req(rq)->type != ATA_PRIV_SENSE);
-
-	/* Retry a failed packet command */
-	if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) {
-		pc = drive->failed_pc;
-		goto out;
-	}
-
-	/*
-	 * If the tape is still busy, postpone our request and service
-	 * the other device meanwhile.
-	 */
-	stat = hwif->tp_ops->read_status(hwif);
-
-	if ((drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) == 0 &&
-	    (req->cmd[13] & REQ_IDETAPE_PC2) == 0)
-		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
-
-	if (drive->dev_flags & IDE_DFLAG_POST_RESET) {
-		drive->atapi_flags |= IDE_AFLAG_IGNORE_DSC;
-		drive->dev_flags &= ~IDE_DFLAG_POST_RESET;
-	}
-
-	if (!(drive->atapi_flags & IDE_AFLAG_IGNORE_DSC) &&
-	    !(stat & ATA_DSC)) {
-		if (!tape->postponed_rq) {
-			tape->dsc_polling_start = jiffies;
-			tape->dsc_poll_freq = tape->best_dsc_rw_freq;
-			tape->dsc_timeout = jiffies + IDETAPE_DSC_RW_TIMEOUT;
-		} else if (time_after(jiffies, tape->dsc_timeout)) {
-			printk(KERN_ERR "ide-tape: %s: DSC timeout\n",
-				tape->name);
-			if (req->cmd[13] & REQ_IDETAPE_PC2) {
-				idetape_media_access_finished(drive);
-				return ide_stopped;
-			} else {
-				return ide_do_reset(drive);
-			}
-		} else if (time_after(jiffies,
-					tape->dsc_polling_start +
-					IDETAPE_DSC_MA_THRESHOLD))
-			tape->dsc_poll_freq = IDETAPE_DSC_MA_SLOW;
-		ide_tape_stall_queue(drive);
-		return ide_stopped;
-	} else {
-		drive->atapi_flags &= ~IDE_AFLAG_IGNORE_DSC;
-		tape->postponed_rq = false;
-	}
-
-	if (req->cmd[13] & REQ_IDETAPE_READ) {
-		pc = &tape->queued_pc;
-		ide_tape_create_rw_cmd(tape, pc, rq, READ_6);
-		goto out;
-	}
-	if (req->cmd[13] & REQ_IDETAPE_WRITE) {
-		pc = &tape->queued_pc;
-		ide_tape_create_rw_cmd(tape, pc, rq, WRITE_6);
-		goto out;
-	}
-	if (req->cmd[13] & REQ_IDETAPE_PC1) {
-		pc = (struct ide_atapi_pc *)ide_req(rq)->special;
-		req->cmd[13] &= ~(REQ_IDETAPE_PC1);
-		req->cmd[13] |= REQ_IDETAPE_PC2;
-		goto out;
-	}
-	if (req->cmd[13] & REQ_IDETAPE_PC2) {
-		idetape_media_access_finished(drive);
-		return ide_stopped;
-	}
-	BUG();
-
-out:
-	/* prepare sense request for this command */
-	ide_prep_sense(drive, rq);
-
-	memset(&cmd, 0, sizeof(cmd));
-
-	if (rq_data_dir(rq))
-		cmd.tf_flags |= IDE_TFLAG_WRITE;
-
-	cmd.rq = rq;
-
-	ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
-	ide_map_sg(drive, &cmd);
-
-	return ide_tape_issue_pc(drive, &cmd, pc);
-}
-
-/*
- * Write a filemark if write_filemark=1. Flush the device buffers without
- * writing a filemark otherwise.
- */
-static void idetape_create_write_filemark_cmd(ide_drive_t *drive,
-		struct ide_atapi_pc *pc, int write_filemark)
-{
-	ide_init_pc(pc);
-	pc->c[0] = WRITE_FILEMARKS;
-	pc->c[4] = write_filemark;
-	pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-}
-
-static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct gendisk *disk = tape->disk;
-	int load_attempted = 0;
-
-	/* Wait for the tape to become ready */
-	set_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT), &drive->atapi_flags);
-	timeout += jiffies;
-	while (time_before(jiffies, timeout)) {
-		if (ide_do_test_unit_ready(drive, disk) == 0)
-			return 0;
-		if ((tape->sense_key == 2 && tape->asc == 4 && tape->ascq == 2)
-		    || (tape->asc == 0x3A)) {
-			/* no media */
-			if (load_attempted)
-				return -ENOMEDIUM;
-			ide_do_start_stop(drive, disk, IDETAPE_LU_LOAD_MASK);
-			load_attempted = 1;
-		/* not about to be ready */
-		} else if (!(tape->sense_key == 2 && tape->asc == 4 &&
-			     (tape->ascq == 1 || tape->ascq == 8)))
-			return -EIO;
-		msleep(100);
-	}
-	return -EIO;
-}
-
-static int idetape_flush_tape_buffers(ide_drive_t *drive)
-{
-	struct ide_tape_obj *tape = drive->driver_data;
-	struct ide_atapi_pc pc;
-	int rc;
-
-	idetape_create_write_filemark_cmd(drive, &pc, 0);
-	rc = ide_queue_pc_tail(drive, tape->disk, &pc, NULL, 0);
-	if (rc)
-		return rc;
-	idetape_wait_ready(drive, 60 * 5 * HZ);
-	return 0;
-}
-
-static int ide_tape_read_position(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct ide_atapi_pc pc;
-	u8 buf[20];
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	/* prep cmd */
-	ide_init_pc(&pc);
-	pc.c[0] = READ_POSITION;
-	pc.req_xfer = 20;
-
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, buf, pc.req_xfer))
-		return -1;
-
-	if (!pc.error) {
-		ide_debug_log(IDE_DBG_FUNC, "BOP - %s",
-				(buf[0] & 0x80) ? "Yes" : "No");
-		ide_debug_log(IDE_DBG_FUNC, "EOP - %s",
-				(buf[0] & 0x40) ? "Yes" : "No");
-
-		if (buf[0] & 0x4) {
-			printk(KERN_INFO "ide-tape: Block location is unknown"
-					 "to the tape\n");
-			clear_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
-				  &drive->atapi_flags);
-			return -1;
-		} else {
-			ide_debug_log(IDE_DBG_FUNC, "Block Location: %u",
-				      be32_to_cpup((__be32 *)&buf[4]));
-
-			tape->partition = buf[1];
-			tape->first_frame = be32_to_cpup((__be32 *)&buf[4]);
-			set_bit(ilog2(IDE_AFLAG_ADDRESS_VALID),
-				&drive->atapi_flags);
-		}
-	}
-
-	return tape->first_frame;
-}
-
-static void idetape_create_locate_cmd(ide_drive_t *drive,
-		struct ide_atapi_pc *pc,
-		unsigned int block, u8 partition, int skip)
-{
-	ide_init_pc(pc);
-	pc->c[0] = POSITION_TO_ELEMENT;
-	pc->c[1] = 2;
-	put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[3]);
-	pc->c[8] = partition;
-	pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-}
-
-static void __ide_tape_discard_merge_buffer(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	if (tape->chrdev_dir != IDETAPE_DIR_READ)
-		return;
-
-	clear_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags);
-	tape->valid = 0;
-	if (tape->buf != NULL) {
-		kfree(tape->buf);
-		tape->buf = NULL;
-	}
-
-	tape->chrdev_dir = IDETAPE_DIR_NONE;
-}
-
-/*
- * Position the tape to the requested block using the LOCATE packet command.
- * A READ POSITION command is then issued to check where we are positioned. Like
- * all higher level operations, we queue the commands at the tail of the request
- * queue and wait for their completion.
- */
-static int idetape_position_tape(ide_drive_t *drive, unsigned int block,
-		u8 partition, int skip)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct gendisk *disk = tape->disk;
-	int ret;
-	struct ide_atapi_pc pc;
-
-	if (tape->chrdev_dir == IDETAPE_DIR_READ)
-		__ide_tape_discard_merge_buffer(drive);
-	idetape_wait_ready(drive, 60 * 5 * HZ);
-	idetape_create_locate_cmd(drive, &pc, block, partition, skip);
-	ret = ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
-	if (ret)
-		return ret;
-
-	ret = ide_tape_read_position(drive);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
-					  int restore_position)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	int seek, position;
-
-	__ide_tape_discard_merge_buffer(drive);
-	if (restore_position) {
-		position = ide_tape_read_position(drive);
-		seek = position > 0 ? position : 0;
-		if (idetape_position_tape(drive, seek, 0, 0)) {
-			printk(KERN_INFO "ide-tape: %s: position_tape failed in"
-					 " %s\n", tape->name, __func__);
-			return;
-		}
-	}
-}
-
-/*
- * Generate a read/write request for the block device interface and wait for it
- * to be serviced.
- */
-static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct request *rq;
-	int ret;
-
-	ide_debug_log(IDE_DBG_FUNC, "cmd: 0x%x, size: %d", cmd, size);
-
-	BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
-	BUG_ON(size < 0 || size % tape->blk_size);
-
-	rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, 0);
-	ide_req(rq)->type = ATA_PRIV_MISC;
-	scsi_req(rq)->cmd[13] = cmd;
-	rq->rq_disk = tape->disk;
-	rq->__sector = tape->first_frame;
-
-	if (size) {
-		ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
-				      GFP_NOIO);
-		if (ret)
-			goto out_put;
-	}
-
-	blk_execute_rq(tape->disk, rq, 0);
-
-	/* calculate the number of transferred bytes and update buffer state */
-	size -= scsi_req(rq)->resid_len;
-	tape->cur = tape->buf;
-	if (cmd == REQ_IDETAPE_READ)
-		tape->valid = size;
-	else
-		tape->valid = 0;
-
-	ret = size;
-	if (scsi_req(rq)->result == IDE_DRV_ERROR_GENERAL)
-		ret = -EIO;
-out_put:
-	blk_put_request(rq);
-	return ret;
-}
-
-static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc)
-{
-	ide_init_pc(pc);
-	pc->c[0] = INQUIRY;
-	pc->c[4] = 254;
-	pc->req_xfer = 254;
-}
-
-static void idetape_create_rewind_cmd(ide_drive_t *drive,
-		struct ide_atapi_pc *pc)
-{
-	ide_init_pc(pc);
-	pc->c[0] = REZERO_UNIT;
-	pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-}
-
-static void idetape_create_erase_cmd(struct ide_atapi_pc *pc)
-{
-	ide_init_pc(pc);
-	pc->c[0] = ERASE;
-	pc->c[1] = 1;
-	pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-}
-
-static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
-{
-	ide_init_pc(pc);
-	pc->c[0] = SPACE;
-	put_unaligned(cpu_to_be32(count), (unsigned int *) &pc->c[1]);
-	pc->c[1] = cmd;
-	pc->flags |= PC_FLAG_WAIT_FOR_DSC;
-}
-
-static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
-		printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer"
-				" but we are not writing.\n");
-		return;
-	}
-	if (tape->buf) {
-		size_t aligned = roundup(tape->valid, tape->blk_size);
-
-		memset(tape->cur, 0, aligned - tape->valid);
-		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned);
-		kfree(tape->buf);
-		tape->buf = NULL;
-	}
-	tape->chrdev_dir = IDETAPE_DIR_NONE;
-}
-
-static int idetape_init_rw(ide_drive_t *drive, int dir)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	int rc;
-
-	BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE);
-
-	if (tape->chrdev_dir == dir)
-		return 0;
-
-	if (tape->chrdev_dir == IDETAPE_DIR_READ)
-		ide_tape_discard_merge_buffer(drive, 1);
-	else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
-		ide_tape_flush_merge_buffer(drive);
-		idetape_flush_tape_buffers(drive);
-	}
-
-	if (tape->buf || tape->valid) {
-		printk(KERN_ERR "ide-tape: valid should be 0 now\n");
-		tape->valid = 0;
-	}
-
-	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
-	if (!tape->buf)
-		return -ENOMEM;
-	tape->chrdev_dir = dir;
-	tape->cur = tape->buf;
-
-	/*
-	 * Issue a 0 rw command to ensure that DSC handshake is
-	 * switched from completion mode to buffer available mode.  No
-	 * point in issuing this if DSC overlap isn't supported, some
-	 * drives (Seagate STT3401A) will return an error.
-	 */
-	if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-		int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ
-						  : REQ_IDETAPE_WRITE;
-
-		rc = idetape_queue_rw_tail(drive, cmd, 0);
-		if (rc < 0) {
-			kfree(tape->buf);
-			tape->buf = NULL;
-			tape->chrdev_dir = IDETAPE_DIR_NONE;
-			return rc;
-		}
-	}
-
-	return 0;
-}
-
-static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	memset(tape->buf, 0, tape->buffer_size);
-
-	while (bcount) {
-		unsigned int count = min(tape->buffer_size, bcount);
-
-		idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count);
-		bcount -= count;
-	}
-}
-
-/*
- * Rewinds the tape to the Beginning Of the current Partition (BOP). We
- * currently support only one partition.
- */
-static int idetape_rewind_tape(ide_drive_t *drive)
-{
-	struct ide_tape_obj *tape = drive->driver_data;
-	struct gendisk *disk = tape->disk;
-	struct ide_atapi_pc pc;
-	int ret;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	idetape_create_rewind_cmd(drive, &pc);
-	ret = ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
-	if (ret)
-		return ret;
-
-	ret = ide_tape_read_position(drive);
-	if (ret < 0)
-		return ret;
-	return 0;
-}
-
-/* mtio.h compatible commands should be issued to the chrdev interface. */
-static int idetape_blkdev_ioctl(ide_drive_t *drive, unsigned int cmd,
-				unsigned long arg)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	void __user *argp = (void __user *)arg;
-
-	struct idetape_config {
-		int dsc_rw_frequency;
-		int dsc_media_access_frequency;
-		int nr_stages;
-	} config;
-
-	ide_debug_log(IDE_DBG_FUNC, "cmd: 0x%04x", cmd);
-
-	switch (cmd) {
-	case 0x0340:
-		if (copy_from_user(&config, argp, sizeof(config)))
-			return -EFAULT;
-		tape->best_dsc_rw_freq = config.dsc_rw_frequency;
-		break;
-	case 0x0350:
-		memset(&config, 0, sizeof(config));
-		config.dsc_rw_frequency = (int) tape->best_dsc_rw_freq;
-		config.nr_stages = 1;
-		if (copy_to_user(argp, &config, sizeof(config)))
-			return -EFAULT;
-		break;
-	default:
-		return -EIO;
-	}
-	return 0;
-}
-
-static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
-					int mt_count)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct gendisk *disk = tape->disk;
-	struct ide_atapi_pc pc;
-	int retval, count = 0;
-	int sprev = !!(tape->caps[4] & 0x20);
-
-
-	ide_debug_log(IDE_DBG_FUNC, "mt_op: %d, mt_count: %d", mt_op, mt_count);
-
-	if (mt_count == 0)
-		return 0;
-	if (MTBSF == mt_op || MTBSFM == mt_op) {
-		if (!sprev)
-			return -EIO;
-		mt_count = -mt_count;
-	}
-
-	if (tape->chrdev_dir == IDETAPE_DIR_READ) {
-		tape->valid = 0;
-		if (test_and_clear_bit(ilog2(IDE_AFLAG_FILEMARK),
-				       &drive->atapi_flags))
-			++count;
-		ide_tape_discard_merge_buffer(drive, 0);
-	}
-
-	switch (mt_op) {
-	case MTFSF:
-	case MTBSF:
-		idetape_create_space_cmd(&pc, mt_count - count,
-					 IDETAPE_SPACE_OVER_FILEMARK);
-		return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
-	case MTFSFM:
-	case MTBSFM:
-		if (!sprev)
-			return -EIO;
-		retval = idetape_space_over_filemarks(drive, MTFSF,
-						      mt_count - count);
-		if (retval)
-			return retval;
-		count = (MTBSFM == mt_op ? 1 : -1);
-		return idetape_space_over_filemarks(drive, MTFSF, count);
-	default:
-		printk(KERN_ERR "ide-tape: MTIO operation %d not supported\n",
-				mt_op);
-		return -EIO;
-	}
-}
-
-/*
- * Our character device read / write functions.
- *
- * The tape is optimized to maximize throughput when it is transferring an
- * integral number of the "continuous transfer limit", which is a parameter of
- * the specific tape (26kB on my particular tape, 32kB for Onstream).
- *
- * As of version 1.3 of the driver, the character device provides an abstract
- * continuous view of the media - any mix of block sizes (even 1 byte) on the
- * same backup/restore procedure is supported. The driver will internally
- * convert the requests to the recommended transfer unit, so that an unmatch
- * between the user's block size to the recommended size will only result in a
- * (slightly) increased driver overhead, but will no longer hit performance.
- * This is not applicable to Onstream.
- */
-static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
-				   size_t count, loff_t *ppos)
-{
-	struct ide_tape_obj *tape = file->private_data;
-	ide_drive_t *drive = tape->drive;
-	size_t done = 0;
-	ssize_t ret = 0;
-	int rc;
-
-	ide_debug_log(IDE_DBG_FUNC, "count %zd", count);
-
-	if (tape->chrdev_dir != IDETAPE_DIR_READ) {
-		if (test_bit(ilog2(IDE_AFLAG_DETECT_BS), &drive->atapi_flags))
-			if (count > tape->blk_size &&
-			    (count % tape->blk_size) == 0)
-				tape->user_bs_factor = count / tape->blk_size;
-	}
-
-	rc = idetape_init_rw(drive, IDETAPE_DIR_READ);
-	if (rc < 0)
-		return rc;
-
-	while (done < count) {
-		size_t todo;
-
-		/* refill if staging buffer is empty */
-		if (!tape->valid) {
-			/* If we are at a filemark, nothing more to read */
-			if (test_bit(ilog2(IDE_AFLAG_FILEMARK),
-				     &drive->atapi_flags))
-				break;
-			/* read */
-			if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
-						  tape->buffer_size) <= 0)
-				break;
-		}
-
-		/* copy out */
-		todo = min_t(size_t, count - done, tape->valid);
-		if (copy_to_user(buf + done, tape->cur, todo))
-			ret = -EFAULT;
-
-		tape->cur += todo;
-		tape->valid -= todo;
-		done += todo;
-	}
-
-	if (!done && test_bit(ilog2(IDE_AFLAG_FILEMARK), &drive->atapi_flags)) {
-		idetape_space_over_filemarks(drive, MTFSF, 1);
-		return 0;
-	}
-
-	return ret ? ret : done;
-}
-
-static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
-				     size_t count, loff_t *ppos)
-{
-	struct ide_tape_obj *tape = file->private_data;
-	ide_drive_t *drive = tape->drive;
-	size_t done = 0;
-	ssize_t ret = 0;
-	int rc;
-
-	/* The drive is write protected. */
-	if (tape->write_prot)
-		return -EACCES;
-
-	ide_debug_log(IDE_DBG_FUNC, "count %zd", count);
-
-	/* Initialize write operation */
-	rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
-	if (rc < 0)
-		return rc;
-
-	while (done < count) {
-		size_t todo;
-
-		/* flush if staging buffer is full */
-		if (tape->valid == tape->buffer_size &&
-		    idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
-					  tape->buffer_size) <= 0)
-			return rc;
-
-		/* copy in */
-		todo = min_t(size_t, count - done,
-			     tape->buffer_size - tape->valid);
-		if (copy_from_user(tape->cur, buf + done, todo))
-			ret = -EFAULT;
-
-		tape->cur += todo;
-		tape->valid += todo;
-		done += todo;
-	}
-
-	return ret ? ret : done;
-}
-
-static int idetape_write_filemark(ide_drive_t *drive)
-{
-	struct ide_tape_obj *tape = drive->driver_data;
-	struct ide_atapi_pc pc;
-
-	/* Write a filemark */
-	idetape_create_write_filemark_cmd(drive, &pc, 1);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, NULL, 0)) {
-		printk(KERN_ERR "ide-tape: Couldn't write a filemark\n");
-		return -EIO;
-	}
-	return 0;
-}
-
-/*
- * Called from idetape_chrdev_ioctl when the general mtio MTIOCTOP ioctl is
- * requested.
- *
- * Note: MTBSF and MTBSFM are not supported when the tape doesn't support
- * spacing over filemarks in the reverse direction. In this case, MTFSFM is also
- * usually not supported.
- *
- * The following commands are currently not supported:
- *
- * MTFSS, MTBSS, MTWSM, MTSETDENSITY, MTSETDRVBUFFER, MT_ST_BOOLEANS,
- * MT_ST_WRITE_THRESHOLD.
- */
-static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct gendisk *disk = tape->disk;
-	struct ide_atapi_pc pc;
-	int i, retval;
-
-	ide_debug_log(IDE_DBG_FUNC, "MTIOCTOP ioctl: mt_op: %d, mt_count: %d",
-		      mt_op, mt_count);
-
-	switch (mt_op) {
-	case MTFSF:
-	case MTFSFM:
-	case MTBSF:
-	case MTBSFM:
-		if (!mt_count)
-			return 0;
-		return idetape_space_over_filemarks(drive, mt_op, mt_count);
-	default:
-		break;
-	}
-
-	switch (mt_op) {
-	case MTWEOF:
-		if (tape->write_prot)
-			return -EACCES;
-		ide_tape_discard_merge_buffer(drive, 1);
-		for (i = 0; i < mt_count; i++) {
-			retval = idetape_write_filemark(drive);
-			if (retval)
-				return retval;
-		}
-		return 0;
-	case MTREW:
-		ide_tape_discard_merge_buffer(drive, 0);
-		if (idetape_rewind_tape(drive))
-			return -EIO;
-		return 0;
-	case MTLOAD:
-		ide_tape_discard_merge_buffer(drive, 0);
-		return ide_do_start_stop(drive, disk, IDETAPE_LU_LOAD_MASK);
-	case MTUNLOAD:
-	case MTOFFL:
-		/*
-		 * If door is locked, attempt to unlock before
-		 * attempting to eject.
-		 */
-		if (tape->door_locked) {
-			if (!ide_set_media_lock(drive, disk, 0))
-				tape->door_locked = DOOR_UNLOCKED;
-		}
-		ide_tape_discard_merge_buffer(drive, 0);
-		retval = ide_do_start_stop(drive, disk, !IDETAPE_LU_LOAD_MASK);
-		if (!retval)
-			clear_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
-				  &drive->atapi_flags);
-		return retval;
-	case MTNOP:
-		ide_tape_discard_merge_buffer(drive, 0);
-		return idetape_flush_tape_buffers(drive);
-	case MTRETEN:
-		ide_tape_discard_merge_buffer(drive, 0);
-		return ide_do_start_stop(drive, disk,
-			IDETAPE_LU_RETENSION_MASK | IDETAPE_LU_LOAD_MASK);
-	case MTEOM:
-		idetape_create_space_cmd(&pc, 0, IDETAPE_SPACE_TO_EOD);
-		return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
-	case MTERASE:
-		(void)idetape_rewind_tape(drive);
-		idetape_create_erase_cmd(&pc);
-		return ide_queue_pc_tail(drive, disk, &pc, NULL, 0);
-	case MTSETBLK:
-		if (mt_count) {
-			if (mt_count < tape->blk_size ||
-			    mt_count % tape->blk_size)
-				return -EIO;
-			tape->user_bs_factor = mt_count / tape->blk_size;
-			clear_bit(ilog2(IDE_AFLAG_DETECT_BS),
-				  &drive->atapi_flags);
-		} else
-			set_bit(ilog2(IDE_AFLAG_DETECT_BS),
-				&drive->atapi_flags);
-		return 0;
-	case MTSEEK:
-		ide_tape_discard_merge_buffer(drive, 0);
-		return idetape_position_tape(drive,
-			mt_count * tape->user_bs_factor, tape->partition, 0);
-	case MTSETPART:
-		ide_tape_discard_merge_buffer(drive, 0);
-		return idetape_position_tape(drive, 0, mt_count, 0);
-	case MTFSR:
-	case MTBSR:
-	case MTLOCK:
-		retval = ide_set_media_lock(drive, disk, 1);
-		if (retval)
-			return retval;
-		tape->door_locked = DOOR_EXPLICITLY_LOCKED;
-		return 0;
-	case MTUNLOCK:
-		retval = ide_set_media_lock(drive, disk, 0);
-		if (retval)
-			return retval;
-		tape->door_locked = DOOR_UNLOCKED;
-		return 0;
-	default:
-		printk(KERN_ERR "ide-tape: MTIO operation %d not supported\n",
-				mt_op);
-		return -EIO;
-	}
-}
-
-/*
- * Our character device ioctls. General mtio.h magnetic io commands are
- * supported here, and not in the corresponding block interface. Our own
- * ide-tape ioctls are supported on both interfaces.
- */
-static long do_idetape_chrdev_ioctl(struct file *file,
-				unsigned int cmd, unsigned long arg)
-{
-	struct ide_tape_obj *tape = file->private_data;
-	ide_drive_t *drive = tape->drive;
-	struct mtop mtop;
-	struct mtget mtget;
-	struct mtpos mtpos;
-	int block_offset = 0, position = tape->first_frame;
-	void __user *argp = (void __user *)arg;
-
-	ide_debug_log(IDE_DBG_FUNC, "cmd: 0x%x", cmd);
-
-	if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
-		ide_tape_flush_merge_buffer(drive);
-		idetape_flush_tape_buffers(drive);
-	}
-	if (cmd == MTIOCGET || cmd == MTIOCPOS) {
-		block_offset = tape->valid /
-			(tape->blk_size * tape->user_bs_factor);
-		position = ide_tape_read_position(drive);
-		if (position < 0)
-			return -EIO;
-	}
-	switch (cmd) {
-	case MTIOCTOP:
-		if (copy_from_user(&mtop, argp, sizeof(struct mtop)))
-			return -EFAULT;
-		return idetape_mtioctop(drive, mtop.mt_op, mtop.mt_count);
-	case MTIOCGET:
-		memset(&mtget, 0, sizeof(struct mtget));
-		mtget.mt_type = MT_ISSCSI2;
-		mtget.mt_blkno = position / tape->user_bs_factor - block_offset;
-		mtget.mt_dsreg =
-			((tape->blk_size * tape->user_bs_factor)
-			 << MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK;
-
-		if (tape->drv_write_prot)
-			mtget.mt_gstat |= GMT_WR_PROT(0xffffffff);
-
-		return put_user_mtget(argp, &mtget);
-	case MTIOCPOS:
-		mtpos.mt_blkno = position / tape->user_bs_factor - block_offset;
-		return put_user_mtpos(argp, &mtpos);
-	default:
-		if (tape->chrdev_dir == IDETAPE_DIR_READ)
-			ide_tape_discard_merge_buffer(drive, 1);
-		return idetape_blkdev_ioctl(drive, cmd, arg);
-	}
-}
-
-static long idetape_chrdev_ioctl(struct file *file,
-				unsigned int cmd, unsigned long arg)
-{
-	long ret;
-	mutex_lock(&ide_tape_mutex);
-	ret = do_idetape_chrdev_ioctl(file, cmd, arg);
-	mutex_unlock(&ide_tape_mutex);
-	return ret;
-}
-
-static long idetape_chrdev_compat_ioctl(struct file *file,
-				unsigned int cmd, unsigned long arg)
-{
-	long ret;
-
-	if (cmd == MTIOCPOS32)
-		cmd = MTIOCPOS;
-	else if (cmd == MTIOCGET32)
-		cmd = MTIOCGET;
-
-	mutex_lock(&ide_tape_mutex);
-	ret = do_idetape_chrdev_ioctl(file, cmd, arg);
-	mutex_unlock(&ide_tape_mutex);
-	return ret;
-}
-
-/*
- * Do a mode sense page 0 with block descriptor and if it succeeds set the tape
- * block size with the reported value.
- */
-static void ide_tape_get_bsize_from_bdesc(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct ide_atapi_pc pc;
-	u8 buf[12];
-
-	idetape_create_mode_sense_cmd(&pc, IDETAPE_BLOCK_DESCRIPTOR);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, buf, pc.req_xfer)) {
-		printk(KERN_ERR "ide-tape: Can't get block descriptor\n");
-		if (tape->blk_size == 0) {
-			printk(KERN_WARNING "ide-tape: Cannot deal with zero "
-					    "block size, assuming 32k\n");
-			tape->blk_size = 32768;
-		}
-		return;
-	}
-	tape->blk_size = (buf[4 + 5] << 16) +
-				(buf[4 + 6] << 8)  +
-				 buf[4 + 7];
-	tape->drv_write_prot = (buf[2] & 0x80) >> 7;
-
-	ide_debug_log(IDE_DBG_FUNC, "blk_size: %d, write_prot: %d",
-		      tape->blk_size, tape->drv_write_prot);
-}
-
-static int idetape_chrdev_open(struct inode *inode, struct file *filp)
-{
-	unsigned int minor = iminor(inode), i = minor & ~0xc0;
-	ide_drive_t *drive;
-	idetape_tape_t *tape;
-	int retval;
-
-	if (i >= MAX_HWIFS * MAX_DRIVES)
-		return -ENXIO;
-
-	mutex_lock(&idetape_chrdev_mutex);
-
-	tape = ide_tape_get(NULL, true, i);
-	if (!tape) {
-		mutex_unlock(&idetape_chrdev_mutex);
-		return -ENXIO;
-	}
-
-	drive = tape->drive;
-	filp->private_data = tape;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	/*
-	 * We really want to do nonseekable_open(inode, filp); here, but some
-	 * versions of tar incorrectly call lseek on tapes and bail out if that
-	 * fails.  So we disallow pread() and pwrite(), but permit lseeks.
-	 */
-	filp->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE);
-
-
-	if (test_and_set_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags)) {
-		retval = -EBUSY;
-		goto out_put_tape;
-	}
-
-	retval = idetape_wait_ready(drive, 60 * HZ);
-	if (retval) {
-		clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
-		printk(KERN_ERR "ide-tape: %s: drive not ready\n", tape->name);
-		goto out_put_tape;
-	}
-
-	ide_tape_read_position(drive);
-	if (!test_bit(ilog2(IDE_AFLAG_ADDRESS_VALID), &drive->atapi_flags))
-		(void)idetape_rewind_tape(drive);
-
-	/* Read block size and write protect status from drive. */
-	ide_tape_get_bsize_from_bdesc(drive);
-
-	/* Set write protect flag if device is opened as read-only. */
-	if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
-		tape->write_prot = 1;
-	else
-		tape->write_prot = tape->drv_write_prot;
-
-	/* Make sure drive isn't write protected if user wants to write. */
-	if (tape->write_prot) {
-		if ((filp->f_flags & O_ACCMODE) == O_WRONLY ||
-		    (filp->f_flags & O_ACCMODE) == O_RDWR) {
-			clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
-			retval = -EROFS;
-			goto out_put_tape;
-		}
-	}
-
-	/* Lock the tape drive door so user can't eject. */
-	if (tape->chrdev_dir == IDETAPE_DIR_NONE) {
-		if (!ide_set_media_lock(drive, tape->disk, 1)) {
-			if (tape->door_locked != DOOR_EXPLICITLY_LOCKED)
-				tape->door_locked = DOOR_LOCKED;
-		}
-	}
-	mutex_unlock(&idetape_chrdev_mutex);
-
-	return 0;
-
-out_put_tape:
-	ide_tape_put(tape);
-
-	mutex_unlock(&idetape_chrdev_mutex);
-
-	return retval;
-}
-
-static void idetape_write_release(ide_drive_t *drive, unsigned int minor)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	ide_tape_flush_merge_buffer(drive);
-	tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
-	if (tape->buf != NULL) {
-		idetape_pad_zeros(drive, tape->blk_size *
-				(tape->user_bs_factor - 1));
-		kfree(tape->buf);
-		tape->buf = NULL;
-	}
-	idetape_write_filemark(drive);
-	idetape_flush_tape_buffers(drive);
-	idetape_flush_tape_buffers(drive);
-}
-
-static int idetape_chrdev_release(struct inode *inode, struct file *filp)
-{
-	struct ide_tape_obj *tape = filp->private_data;
-	ide_drive_t *drive = tape->drive;
-	unsigned int minor = iminor(inode);
-
-	mutex_lock(&idetape_chrdev_mutex);
-
-	tape = drive->driver_data;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	if (tape->chrdev_dir == IDETAPE_DIR_WRITE)
-		idetape_write_release(drive, minor);
-	if (tape->chrdev_dir == IDETAPE_DIR_READ) {
-		if (minor < 128)
-			ide_tape_discard_merge_buffer(drive, 1);
-	}
-
-	if (minor < 128 && test_bit(ilog2(IDE_AFLAG_MEDIUM_PRESENT),
-				    &drive->atapi_flags))
-		(void) idetape_rewind_tape(drive);
-
-	if (tape->chrdev_dir == IDETAPE_DIR_NONE) {
-		if (tape->door_locked == DOOR_LOCKED) {
-			if (!ide_set_media_lock(drive, tape->disk, 0))
-				tape->door_locked = DOOR_UNLOCKED;
-		}
-	}
-	clear_bit(ilog2(IDE_AFLAG_BUSY), &drive->atapi_flags);
-	ide_tape_put(tape);
-
-	mutex_unlock(&idetape_chrdev_mutex);
-
-	return 0;
-}
-
-static void idetape_get_inquiry_results(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct ide_atapi_pc pc;
-	u8 pc_buf[256];
-	char fw_rev[4], vendor_id[8], product_id[16];
-
-	idetape_create_inquiry_cmd(&pc);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, pc_buf, pc.req_xfer)) {
-		printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n",
-				tape->name);
-		return;
-	}
-	memcpy(vendor_id, &pc_buf[8], 8);
-	memcpy(product_id, &pc_buf[16], 16);
-	memcpy(fw_rev, &pc_buf[32], 4);
-
-	ide_fixstring(vendor_id, 8, 0);
-	ide_fixstring(product_id, 16, 0);
-	ide_fixstring(fw_rev, 4, 0);
-
-	printk(KERN_INFO "ide-tape: %s <-> %s: %.8s %.16s rev %.4s\n",
-			drive->name, tape->name, vendor_id, product_id, fw_rev);
-}
-
-/*
- * Ask the tape about its various parameters. In particular, we will adjust our
- * data transfer buffer	size to the recommended value as returned by the tape.
- */
-static void idetape_get_mode_sense_results(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-	struct ide_atapi_pc pc;
-	u8 buf[24], *caps;
-	u8 speed, max_speed;
-
-	idetape_create_mode_sense_cmd(&pc, IDETAPE_CAPABILITIES_PAGE);
-	if (ide_queue_pc_tail(drive, tape->disk, &pc, buf, pc.req_xfer)) {
-		printk(KERN_ERR "ide-tape: Can't get tape parameters - assuming"
-				" some default values\n");
-		tape->blk_size = 512;
-		put_unaligned(52,   (u16 *)&tape->caps[12]);
-		put_unaligned(540,  (u16 *)&tape->caps[14]);
-		put_unaligned(6*52, (u16 *)&tape->caps[16]);
-		return;
-	}
-	caps = buf + 4 + buf[3];
-
-	/* convert to host order and save for later use */
-	speed = be16_to_cpup((__be16 *)&caps[14]);
-	max_speed = be16_to_cpup((__be16 *)&caps[8]);
-
-	*(u16 *)&caps[8] = max_speed;
-	*(u16 *)&caps[12] = be16_to_cpup((__be16 *)&caps[12]);
-	*(u16 *)&caps[14] = speed;
-	*(u16 *)&caps[16] = be16_to_cpup((__be16 *)&caps[16]);
-
-	if (!speed) {
-		printk(KERN_INFO "ide-tape: %s: invalid tape speed "
-				"(assuming 650KB/sec)\n", drive->name);
-		*(u16 *)&caps[14] = 650;
-	}
-	if (!max_speed) {
-		printk(KERN_INFO "ide-tape: %s: invalid max_speed "
-				"(assuming 650KB/sec)\n", drive->name);
-		*(u16 *)&caps[8] = 650;
-	}
-
-	memcpy(&tape->caps, caps, 20);
-
-	/* device lacks locking support according to capabilities page */
-	if ((caps[6] & 1) == 0)
-		drive->dev_flags &= ~IDE_DFLAG_DOORLOCKING;
-
-	if (caps[7] & 0x02)
-		tape->blk_size = 512;
-	else if (caps[7] & 0x04)
-		tape->blk_size = 1024;
-}
-
-#ifdef CONFIG_IDE_PROC_FS
-#define ide_tape_devset_get(name, field) \
-static int get_##name(ide_drive_t *drive) \
-{ \
-	idetape_tape_t *tape = drive->driver_data; \
-	return tape->field; \
-}
-
-#define ide_tape_devset_set(name, field) \
-static int set_##name(ide_drive_t *drive, int arg) \
-{ \
-	idetape_tape_t *tape = drive->driver_data; \
-	tape->field = arg; \
-	return 0; \
-}
-
-#define ide_tape_devset_rw_field(_name, _field) \
-ide_tape_devset_get(_name, _field) \
-ide_tape_devset_set(_name, _field) \
-IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name)
-
-#define ide_tape_devset_r_field(_name, _field) \
-ide_tape_devset_get(_name, _field) \
-IDE_DEVSET(_name, 0, get_##_name, NULL)
-
-static int mulf_tdsc(ide_drive_t *drive)	{ return 1000; }
-static int divf_tdsc(ide_drive_t *drive)	{ return   HZ; }
-static int divf_buffer(ide_drive_t *drive)	{ return    2; }
-static int divf_buffer_size(ide_drive_t *drive)	{ return 1024; }
-
-ide_devset_rw_flag(dsc_overlap, IDE_DFLAG_DSC_OVERLAP);
-
-ide_tape_devset_rw_field(tdsc, best_dsc_rw_freq);
-
-ide_tape_devset_r_field(avg_speed, avg_speed);
-ide_tape_devset_r_field(speed, caps[14]);
-ide_tape_devset_r_field(buffer, caps[16]);
-ide_tape_devset_r_field(buffer_size, buffer_size);
-
-static const struct ide_proc_devset idetape_settings[] = {
-	__IDE_PROC_DEVSET(avg_speed,	0, 0xffff, NULL, NULL),
-	__IDE_PROC_DEVSET(buffer,	0, 0xffff, NULL, divf_buffer),
-	__IDE_PROC_DEVSET(buffer_size,	0, 0xffff, NULL, divf_buffer_size),
-	__IDE_PROC_DEVSET(dsc_overlap,	0,      1, NULL, NULL),
-	__IDE_PROC_DEVSET(speed,	0, 0xffff, NULL, NULL),
-	__IDE_PROC_DEVSET(tdsc,		IDETAPE_DSC_RW_MIN, IDETAPE_DSC_RW_MAX,
-					mulf_tdsc, divf_tdsc),
-	{ NULL },
-};
-#endif
-
-/*
- * The function below is called to:
- *
- * 1. Initialize our various state variables.
- * 2. Ask the tape for its capabilities.
- * 3. Allocate a buffer which will be used for data transfer. The buffer size
- * is chosen based on the recommendation which we received in step 2.
- *
- * Note that at this point ide.c already assigned us an irq, so that we can
- * queue requests here and wait for their completion.
- */
-static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
-{
-	unsigned long t;
-	int speed;
-	u16 *ctl = (u16 *)&tape->caps[12];
-
-	ide_debug_log(IDE_DBG_FUNC, "minor: %d", minor);
-
-	drive->pc_callback = ide_tape_callback;
-
-	drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP;
-
-	if (drive->hwif->host_flags & IDE_HFLAG_NO_DSC) {
-		printk(KERN_INFO "ide-tape: %s: disabling DSC overlap\n",
-				 tape->name);
-		drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
-	}
-
-	/* Seagate Travan drives do not support DSC overlap. */
-	if (strstr((char *)&drive->id[ATA_ID_PROD], "Seagate STT3401"))
-		drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
-
-	tape->minor = minor;
-	tape->name[0] = 'h';
-	tape->name[1] = 't';
-	tape->name[2] = '0' + minor;
-	tape->chrdev_dir = IDETAPE_DIR_NONE;
-
-	idetape_get_inquiry_results(drive);
-	idetape_get_mode_sense_results(drive);
-	ide_tape_get_bsize_from_bdesc(drive);
-	tape->user_bs_factor = 1;
-	tape->buffer_size = *ctl * tape->blk_size;
-	while (tape->buffer_size > 0xffff) {
-		printk(KERN_NOTICE "ide-tape: decreasing stage size\n");
-		*ctl /= 2;
-		tape->buffer_size = *ctl * tape->blk_size;
-	}
-
-	/* select the "best" DSC read/write polling freq */
-	speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]);
-
-	t = (IDETAPE_FIFO_THRESHOLD * tape->buffer_size * HZ) / (speed * 1000);
-
-	/*
-	 * Ensure that the number we got makes sense; limit it within
-	 * IDETAPE_DSC_RW_MIN and IDETAPE_DSC_RW_MAX.
-	 */
-	tape->best_dsc_rw_freq = clamp_t(unsigned long, t, IDETAPE_DSC_RW_MIN,
-					 IDETAPE_DSC_RW_MAX);
-	printk(KERN_INFO "ide-tape: %s <-> %s: %dKBps, %d*%dkB buffer, "
-		"%ums tDSC%s\n",
-		drive->name, tape->name, *(u16 *)&tape->caps[14],
-		(*(u16 *)&tape->caps[16] * 512) / tape->buffer_size,
-		tape->buffer_size / 1024,
-		jiffies_to_msecs(tape->best_dsc_rw_freq),
-		(drive->dev_flags & IDE_DFLAG_USING_DMA) ? ", DMA" : "");
-
-	ide_proc_register_driver(drive, tape->driver);
-}
-
-static void ide_tape_remove(ide_drive_t *drive)
-{
-	idetape_tape_t *tape = drive->driver_data;
-
-	ide_proc_unregister_driver(drive, tape->driver);
-	device_del(&tape->dev);
-
-	mutex_lock(&idetape_ref_mutex);
-	put_device(&tape->dev);
-	mutex_unlock(&idetape_ref_mutex);
-}
-
-static void ide_tape_release(struct device *dev)
-{
-	struct ide_tape_obj *tape = to_ide_drv(dev, ide_tape_obj);
-	ide_drive_t *drive = tape->drive;
-	struct gendisk *g = tape->disk;
-
-	BUG_ON(tape->valid);
-
-	drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
-	drive->driver_data = NULL;
-	device_destroy(idetape_sysfs_class, MKDEV(IDETAPE_MAJOR, tape->minor));
-	device_destroy(idetape_sysfs_class,
-			MKDEV(IDETAPE_MAJOR, tape->minor + 128));
-	idetape_devs[tape->minor] = NULL;
-	g->private_data = NULL;
-	put_disk(g);
-	kfree(tape);
-}
-
-#ifdef CONFIG_IDE_PROC_FS
-static int idetape_name_proc_show(struct seq_file *m, void *v)
-{
-	ide_drive_t	*drive = (ide_drive_t *) m->private;
-	idetape_tape_t	*tape = drive->driver_data;
-
-	seq_printf(m, "%s\n", tape->name);
-	return 0;
-}
-
-static ide_proc_entry_t idetape_proc[] = {
-	{ "capacity",	S_IFREG|S_IRUGO,	ide_capacity_proc_show	},
-	{ "name",	S_IFREG|S_IRUGO,	idetape_name_proc_show	},
-	{}
-};
-
-static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive)
-{
-	return idetape_proc;
-}
-
-static const struct ide_proc_devset *ide_tape_proc_devsets(ide_drive_t *drive)
-{
-	return idetape_settings;
-}
-#endif
-
-static int ide_tape_probe(ide_drive_t *);
-
-static struct ide_driver idetape_driver = {
-	.gen_driver = {
-		.owner		= THIS_MODULE,
-		.name		= "ide-tape",
-		.bus		= &ide_bus_type,
-	},
-	.probe			= ide_tape_probe,
-	.remove			= ide_tape_remove,
-	.version		= IDETAPE_VERSION,
-	.do_request		= idetape_do_request,
-#ifdef CONFIG_IDE_PROC_FS
-	.proc_entries		= ide_tape_proc_entries,
-	.proc_devsets		= ide_tape_proc_devsets,
-#endif
-};
-
-/* Our character device supporting functions, passed to register_chrdev. */
-static const struct file_operations idetape_fops = {
-	.owner		= THIS_MODULE,
-	.read		= idetape_chrdev_read,
-	.write		= idetape_chrdev_write,
-	.unlocked_ioctl	= idetape_chrdev_ioctl,
-	.compat_ioctl	= IS_ENABLED(CONFIG_COMPAT) ?
-			  idetape_chrdev_compat_ioctl : NULL,
-	.open		= idetape_chrdev_open,
-	.release	= idetape_chrdev_release,
-	.llseek		= noop_llseek,
-};
-
-static int idetape_open(struct block_device *bdev, fmode_t mode)
-{
-	struct ide_tape_obj *tape;
-
-	mutex_lock(&ide_tape_mutex);
-	tape = ide_tape_get(bdev->bd_disk, false, 0);
-	mutex_unlock(&ide_tape_mutex);
-
-	if (!tape)
-		return -ENXIO;
-
-	return 0;
-}
-
-static void idetape_release(struct gendisk *disk, fmode_t mode)
-{
-	struct ide_tape_obj *tape = ide_drv_g(disk, ide_tape_obj);
-
-	mutex_lock(&ide_tape_mutex);
-	ide_tape_put(tape);
-	mutex_unlock(&ide_tape_mutex);
-}
-
-static int idetape_ioctl(struct block_device *bdev, fmode_t mode,
-			unsigned int cmd, unsigned long arg)
-{
-	struct ide_tape_obj *tape = ide_drv_g(bdev->bd_disk, ide_tape_obj);
-	ide_drive_t *drive = tape->drive;
-	int err;
-
-	mutex_lock(&ide_tape_mutex);
-	err = generic_ide_ioctl(drive, bdev, cmd, arg);
-	if (err == -EINVAL)
-		err = idetape_blkdev_ioctl(drive, cmd, arg);
-	mutex_unlock(&ide_tape_mutex);
-
-	return err;
-}
-
-static int idetape_compat_ioctl(struct block_device *bdev, fmode_t mode,
-				unsigned int cmd, unsigned long arg)
-{
-        if (cmd == 0x0340 || cmd == 0x350)
-		arg = (unsigned long)compat_ptr(arg);
-
-	return idetape_ioctl(bdev, mode, cmd, arg);
-}
-
-static const struct block_device_operations idetape_block_ops = {
-	.owner		= THIS_MODULE,
-	.open		= idetape_open,
-	.release	= idetape_release,
-	.ioctl		= idetape_ioctl,
-	.compat_ioctl	= IS_ENABLED(CONFIG_COMPAT) ?
-				idetape_compat_ioctl : NULL,
-};
-
-static int ide_tape_probe(ide_drive_t *drive)
-{
-	idetape_tape_t *tape;
-	struct gendisk *g;
-	int minor;
-
-	ide_debug_log(IDE_DBG_FUNC, "enter");
-
-	if (!strstr(DRV_NAME, drive->driver_req))
-		goto failed;
-
-	if (drive->media != ide_tape)
-		goto failed;
-
-	if ((drive->dev_flags & IDE_DFLAG_ID_READ) &&
-	    ide_check_atapi_device(drive, DRV_NAME) == 0) {
-		printk(KERN_ERR "ide-tape: %s: not supported by this version of"
-				" the driver\n", drive->name);
-		goto failed;
-	}
-	tape = kzalloc(sizeof(idetape_tape_t), GFP_KERNEL);
-	if (tape == NULL) {
-		printk(KERN_ERR "ide-tape: %s: Can't allocate a tape struct\n",
-				drive->name);
-		goto failed;
-	}
-
-	g = alloc_disk(1 << PARTN_BITS);
-	if (!g)
-		goto out_free_tape;
-
-	ide_init_disk(g, drive);
-
-	tape->dev.parent = &drive->gendev;
-	tape->dev.release = ide_tape_release;
-	dev_set_name(&tape->dev, "%s", dev_name(&drive->gendev));
-
-	if (device_register(&tape->dev))
-		goto out_free_disk;
-
-	tape->drive = drive;
-	tape->driver = &idetape_driver;
-	tape->disk = g;
-
-	g->private_data = &tape->driver;
-
-	drive->driver_data = tape;
-
-	mutex_lock(&idetape_ref_mutex);
-	for (minor = 0; idetape_devs[minor]; minor++)
-		;
-	idetape_devs[minor] = tape;
-	mutex_unlock(&idetape_ref_mutex);
-
-	idetape_setup(drive, tape, minor);
-
-	device_create(idetape_sysfs_class, &drive->gendev,
-		      MKDEV(IDETAPE_MAJOR, minor), NULL, "%s", tape->name);
-	device_create(idetape_sysfs_class, &drive->gendev,
-		      MKDEV(IDETAPE_MAJOR, minor + 128), NULL,
-		      "n%s", tape->name);
-
-	g->fops = &idetape_block_ops;
-
-	return 0;
-
-out_free_disk:
-	put_disk(g);
-out_free_tape:
-	kfree(tape);
-failed:
-	return -ENODEV;
-}
-
-static void __exit idetape_exit(void)
-{
-	driver_unregister(&idetape_driver.gen_driver);
-	class_destroy(idetape_sysfs_class);
-	unregister_chrdev(IDETAPE_MAJOR, "ht");
-}
-
-static int __init idetape_init(void)
-{
-	int error = 1;
-	idetape_sysfs_class = class_create(THIS_MODULE, "ide_tape");
-	if (IS_ERR(idetape_sysfs_class)) {
-		idetape_sysfs_class = NULL;
-		printk(KERN_ERR "Unable to create sysfs class for ide tapes\n");
-		error = -EBUSY;
-		goto out;
-	}
-
-	if (register_chrdev(IDETAPE_MAJOR, "ht", &idetape_fops)) {
-		printk(KERN_ERR "ide-tape: Failed to register chrdev"
-				" interface\n");
-		error = -EBUSY;
-		goto out_free_class;
-	}
-
-	error = driver_register(&idetape_driver.gen_driver);
-	if (error)
-		goto out_free_chrdev;
-
-	return 0;
-
-out_free_chrdev:
-	unregister_chrdev(IDETAPE_MAJOR, "ht");
-out_free_class:
-	class_destroy(idetape_sysfs_class);
-out:
-	return error;
-}
-
-MODULE_ALIAS("ide:*m-tape*");
-module_init(idetape_init);
-module_exit(idetape_exit);
-MODULE_ALIAS_CHARDEV_MAJOR(IDETAPE_MAJOR);
-MODULE_DESCRIPTION("ATAPI Streaming TAPE Driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
deleted file mode 100644
index 6665fc4724b99..0000000000000
--- a/drivers/ide/ide-taskfile.c
+++ /dev/null
@@ -1,668 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2000-2002	   Michael Cornwell <cornwell@acm.org>
- *  Copyright (C) 2000-2002	   Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2001-2002	   Klaus Smolin
- *					IBM Storage Technology Division
- *  Copyright (C) 2003-2004, 2007  Bartlomiej Zolnierkiewicz
- *
- *  The big the bad and the ugly.
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/hdreg.h>
-#include <linux/ide.h>
-#include <linux/nmi.h>
-#include <linux/scatterlist.h>
-#include <linux/uaccess.h>
-
-#include <asm/io.h>
-
-void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-
-	/* Be sure we're looking at the low order bytes */
-	tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
-
-	tp_ops->tf_read(drive, &cmd->tf, cmd->valid.in.tf);
-
-	if (cmd->tf_flags & IDE_TFLAG_LBA48) {
-		tp_ops->write_devctl(hwif, ATA_HOB | ATA_DEVCTL_OBS);
-
-		tp_ops->tf_read(drive, &cmd->hob, cmd->valid.in.hob);
-	}
-}
-
-void ide_tf_dump(const char *s, struct ide_cmd *cmd)
-{
-#ifdef DEBUG
-	printk("%s: tf: feat 0x%02x nsect 0x%02x lbal 0x%02x "
-		"lbam 0x%02x lbah 0x%02x dev 0x%02x cmd 0x%02x\n",
-	       s, cmd->tf.feature, cmd->tf.nsect,
-	       cmd->tf.lbal, cmd->tf.lbam, cmd->tf.lbah,
-	       cmd->tf.device, cmd->tf.command);
-	printk("%s: hob: nsect 0x%02x lbal 0x%02x lbam 0x%02x lbah 0x%02x\n",
-	       s, cmd->hob.nsect, cmd->hob.lbal, cmd->hob.lbam, cmd->hob.lbah);
-#endif
-}
-
-int taskfile_lib_get_identify(ide_drive_t *drive, u8 *buf)
-{
-	struct ide_cmd cmd;
-
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.tf.nsect = 0x01;
-	if (drive->media == ide_disk)
-		cmd.tf.command = ATA_CMD_ID_ATA;
-	else
-		cmd.tf.command = ATA_CMD_ID_ATAPI;
-	cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-	cmd.protocol = ATA_PROT_PIO;
-
-	return ide_raw_taskfile(drive, &cmd, buf, 1);
-}
-
-static ide_startstop_t task_no_data_intr(ide_drive_t *);
-static ide_startstop_t pre_task_out_intr(ide_drive_t *, struct ide_cmd *);
-static ide_startstop_t task_pio_intr(ide_drive_t *);
-
-ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_cmd *cmd = &hwif->cmd;
-	struct ide_taskfile *tf = &cmd->tf;
-	ide_handler_t *handler = NULL;
-	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-	const struct ide_dma_ops *dma_ops = hwif->dma_ops;
-
-	if (orig_cmd->protocol == ATA_PROT_PIO &&
-	    (orig_cmd->tf_flags & IDE_TFLAG_MULTI_PIO) &&
-	    drive->mult_count == 0) {
-		pr_err("%s: multimode not set!\n", drive->name);
-		return ide_stopped;
-	}
-
-	if (orig_cmd->ftf_flags & IDE_FTFLAG_FLAGGED)
-		orig_cmd->ftf_flags |= IDE_FTFLAG_SET_IN_FLAGS;
-
-	memcpy(cmd, orig_cmd, sizeof(*cmd));
-
-	if ((cmd->tf_flags & IDE_TFLAG_DMA_PIO_FALLBACK) == 0) {
-		ide_tf_dump(drive->name, cmd);
-		tp_ops->write_devctl(hwif, ATA_DEVCTL_OBS);
-
-		if (cmd->ftf_flags & IDE_FTFLAG_OUT_DATA) {
-			u8 data[2] = { cmd->tf.data, cmd->hob.data };
-
-			tp_ops->output_data(drive, cmd, data, 2);
-		}
-
-		if (cmd->valid.out.tf & IDE_VALID_DEVICE) {
-			u8 HIHI = (cmd->tf_flags & IDE_TFLAG_LBA48) ?
-				  0xE0 : 0xEF;
-
-			if (!(cmd->ftf_flags & IDE_FTFLAG_FLAGGED))
-				cmd->tf.device &= HIHI;
-			cmd->tf.device |= drive->select;
-		}
-
-		tp_ops->tf_load(drive, &cmd->hob, cmd->valid.out.hob);
-		tp_ops->tf_load(drive, &cmd->tf,  cmd->valid.out.tf);
-	}
-
-	switch (cmd->protocol) {
-	case ATA_PROT_PIO:
-		if (cmd->tf_flags & IDE_TFLAG_WRITE) {
-			tp_ops->exec_command(hwif, tf->command);
-			ndelay(400);	/* FIXME */
-			return pre_task_out_intr(drive, cmd);
-		}
-		handler = task_pio_intr;
-		fallthrough;
-	case ATA_PROT_NODATA:
-		if (handler == NULL)
-			handler = task_no_data_intr;
-		ide_execute_command(drive, cmd, handler, WAIT_WORSTCASE);
-		return ide_started;
-	case ATA_PROT_DMA:
-		if (ide_dma_prepare(drive, cmd))
-			return ide_stopped;
-		hwif->expiry = dma_ops->dma_timer_expiry;
-		ide_execute_command(drive, cmd, ide_dma_intr, 2 * WAIT_CMD);
-		dma_ops->dma_start(drive);
-		fallthrough;
-	default:
-		return ide_started;
-	}
-}
-EXPORT_SYMBOL_GPL(do_rw_taskfile);
-
-static ide_startstop_t task_no_data_intr(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_cmd *cmd = &hwif->cmd;
-	struct ide_taskfile *tf = &cmd->tf;
-	int custom = (cmd->tf_flags & IDE_TFLAG_CUSTOM_HANDLER) ? 1 : 0;
-	int retries = (custom && tf->command == ATA_CMD_INIT_DEV_PARAMS) ? 5 : 1;
-	u8 stat;
-
-	local_irq_enable_in_hardirq();
-
-	while (1) {
-		stat = hwif->tp_ops->read_status(hwif);
-		if ((stat & ATA_BUSY) == 0 || retries-- == 0)
-			break;
-		udelay(10);
-	};
-
-	if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) {
-		if (custom && tf->command == ATA_CMD_SET_MULTI) {
-			drive->mult_req = drive->mult_count = 0;
-			drive->special_flags |= IDE_SFLAG_RECALIBRATE;
-			(void)ide_dump_status(drive, __func__, stat);
-			return ide_stopped;
-		} else if (custom && tf->command == ATA_CMD_INIT_DEV_PARAMS) {
-			if ((stat & (ATA_ERR | ATA_DRQ)) == 0) {
-				ide_set_handler(drive, &task_no_data_intr,
-						WAIT_WORSTCASE);
-				return ide_started;
-			}
-		}
-		return ide_error(drive, "task_no_data_intr", stat);
-	}
-
-	if (custom && tf->command == ATA_CMD_SET_MULTI)
-		drive->mult_count = drive->mult_req;
-
-	if (custom == 0 || tf->command == ATA_CMD_IDLEIMMEDIATE ||
-	    tf->command == ATA_CMD_CHK_POWER) {
-		struct request *rq = hwif->rq;
-
-		if (ata_pm_request(rq))
-			ide_complete_pm_rq(drive, rq);
-		else
-			ide_finish_cmd(drive, cmd, stat);
-	}
-
-	return ide_stopped;
-}
-
-static u8 wait_drive_not_busy(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	int retries;
-	u8 stat;
-
-	/*
-	 * Last sector was transferred, wait until device is ready.  This can
-	 * take up to 6 ms on some ATAPI devices, so we will wait max 10 ms.
-	 */
-	for (retries = 0; retries < 1000; retries++) {
-		stat = hwif->tp_ops->read_status(hwif);
-
-		if (stat & ATA_BUSY)
-			udelay(10);
-		else
-			break;
-	}
-
-	if (stat & ATA_BUSY)
-		pr_err("%s: drive still BUSY!\n", drive->name);
-
-	return stat;
-}
-
-void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd,
-		   unsigned int write, unsigned int len)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct scatterlist *sg = hwif->sg_table;
-	struct scatterlist *cursg = cmd->cursg;
-	struct page *page;
-	unsigned int offset;
-	u8 *buf;
-
-	if (cursg == NULL)
-		cursg = cmd->cursg = sg;
-
-	while (len) {
-		unsigned nr_bytes = min(len, cursg->length - cmd->cursg_ofs);
-
-		page = sg_page(cursg);
-		offset = cursg->offset + cmd->cursg_ofs;
-
-		/* get the current page and offset */
-		page = nth_page(page, (offset >> PAGE_SHIFT));
-		offset %= PAGE_SIZE;
-
-		nr_bytes = min_t(unsigned, nr_bytes, (PAGE_SIZE - offset));
-
-		buf = kmap_atomic(page) + offset;
-
-		cmd->nleft -= nr_bytes;
-		cmd->cursg_ofs += nr_bytes;
-
-		if (cmd->cursg_ofs == cursg->length) {
-			cursg = cmd->cursg = sg_next(cmd->cursg);
-			cmd->cursg_ofs = 0;
-		}
-
-		/* do the actual data transfer */
-		if (write)
-			hwif->tp_ops->output_data(drive, cmd, buf, nr_bytes);
-		else
-			hwif->tp_ops->input_data(drive, cmd, buf, nr_bytes);
-
-		kunmap_atomic(buf);
-
-		len -= nr_bytes;
-	}
-}
-EXPORT_SYMBOL_GPL(ide_pio_bytes);
-
-static void ide_pio_datablock(ide_drive_t *drive, struct ide_cmd *cmd,
-			      unsigned int write)
-{
-	unsigned int nr_bytes;
-
-	u8 saved_io_32bit = drive->io_32bit;
-
-	if (cmd->tf_flags & IDE_TFLAG_FS)
-		scsi_req(cmd->rq)->result = 0;
-
-	if (cmd->tf_flags & IDE_TFLAG_IO_16BIT)
-		drive->io_32bit = 0;
-
-	touch_softlockup_watchdog();
-
-	if (cmd->tf_flags & IDE_TFLAG_MULTI_PIO)
-		nr_bytes = min_t(unsigned, cmd->nleft, drive->mult_count << 9);
-	else
-		nr_bytes = SECTOR_SIZE;
-
-	ide_pio_bytes(drive, cmd, write, nr_bytes);
-
-	drive->io_32bit = saved_io_32bit;
-}
-
-static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	if (cmd->tf_flags & IDE_TFLAG_FS) {
-		int nr_bytes = cmd->nbytes - cmd->nleft;
-
-		if (cmd->protocol == ATA_PROT_PIO &&
-		    ((cmd->tf_flags & IDE_TFLAG_WRITE) || cmd->nleft == 0)) {
-			if (cmd->tf_flags & IDE_TFLAG_MULTI_PIO)
-				nr_bytes -= drive->mult_count << 9;
-			else
-				nr_bytes -= SECTOR_SIZE;
-		}
-
-		if (nr_bytes > 0)
-			ide_complete_rq(drive, BLK_STS_OK, nr_bytes);
-	}
-}
-
-void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
-{
-	struct request *rq = drive->hwif->rq;
-	u8 err = ide_read_error(drive), nsect = cmd->tf.nsect;
-	u8 set_xfer = !!(cmd->tf_flags & IDE_TFLAG_SET_XFER);
-
-	ide_complete_cmd(drive, cmd, stat, err);
-	scsi_req(rq)->result = err;
-
-	if (err == 0 && set_xfer) {
-		ide_set_xfer_rate(drive, nsect);
-		ide_driveid_update(drive);
-	}
-
-	ide_complete_rq(drive, err ? BLK_STS_IOERR : BLK_STS_OK, blk_rq_bytes(rq));
-}
-
-/*
- * Handler for command with PIO data phase.
- */
-static ide_startstop_t task_pio_intr(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct ide_cmd *cmd = &drive->hwif->cmd;
-	u8 stat = hwif->tp_ops->read_status(hwif);
-	u8 write = !!(cmd->tf_flags & IDE_TFLAG_WRITE);
-
-	if (write == 0) {
-		/* Error? */
-		if (stat & ATA_ERR)
-			goto out_err;
-
-		/* Didn't want any data? Odd. */
-		if ((stat & ATA_DRQ) == 0) {
-			/* Command all done? */
-			if (OK_STAT(stat, ATA_DRDY, ATA_BUSY))
-				goto out_end;
-
-			/* Assume it was a spurious irq */
-			goto out_wait;
-		}
-	} else {
-		if (!OK_STAT(stat, DRIVE_READY, drive->bad_wstat))
-			goto out_err;
-
-		/* Deal with unexpected ATA data phase. */
-		if (((stat & ATA_DRQ) == 0) ^ (cmd->nleft == 0))
-			goto out_err;
-	}
-
-	if (write && cmd->nleft == 0)
-		goto out_end;
-
-	/* Still data left to transfer. */
-	ide_pio_datablock(drive, cmd, write);
-
-	/* Are we done? Check status and finish transfer. */
-	if (write == 0 && cmd->nleft == 0) {
-		stat = wait_drive_not_busy(drive);
-		if (!OK_STAT(stat, 0, BAD_STAT))
-			goto out_err;
-
-		goto out_end;
-	}
-out_wait:
-	/* Still data left to transfer. */
-	ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
-	return ide_started;
-out_end:
-	if ((cmd->tf_flags & IDE_TFLAG_FS) == 0)
-		ide_finish_cmd(drive, cmd, stat);
-	else
-		ide_complete_rq(drive, BLK_STS_OK, blk_rq_sectors(cmd->rq) << 9);
-	return ide_stopped;
-out_err:
-	ide_error_cmd(drive, cmd);
-	return ide_error(drive, __func__, stat);
-}
-
-static ide_startstop_t pre_task_out_intr(ide_drive_t *drive,
-					 struct ide_cmd *cmd)
-{
-	ide_startstop_t startstop;
-
-	if (ide_wait_stat(&startstop, drive, ATA_DRQ,
-			  drive->bad_wstat, WAIT_DRQ)) {
-		pr_err("%s: no DRQ after issuing %sWRITE%s\n", drive->name,
-			(cmd->tf_flags & IDE_TFLAG_MULTI_PIO) ? "MULT" : "",
-			(drive->dev_flags & IDE_DFLAG_LBA48) ? "_EXT" : "");
-		return startstop;
-	}
-
-	if (!force_irqthreads && (drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
-		local_irq_disable();
-
-	ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE);
-
-	ide_pio_datablock(drive, cmd, 1);
-
-	return ide_started;
-}
-
-int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
-		     u16 nsect)
-{
-	struct request *rq;
-	int error;
-
-	rq = blk_get_request(drive->queue,
-		(cmd->tf_flags & IDE_TFLAG_WRITE) ?
-			REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
-	ide_req(rq)->type = ATA_PRIV_TASKFILE;
-
-	/*
-	 * (ks) We transfer currently only whole sectors.
-	 * This is suffient for now.  But, it would be great,
-	 * if we would find a solution to transfer any size.
-	 * To support special commands like READ LONG.
-	 */
-	if (nsect) {
-		error = blk_rq_map_kern(drive->queue, rq, buf,
-					nsect * SECTOR_SIZE, GFP_NOIO);
-		if (error)
-			goto put_req;
-	}
-
-	ide_req(rq)->special = cmd;
-	cmd->rq = rq;
-
-	blk_execute_rq(NULL, rq, 0);
-	error = scsi_req(rq)->result ? -EIO : 0;
-put_req:
-	blk_put_request(rq);
-	return error;
-}
-EXPORT_SYMBOL(ide_raw_taskfile);
-
-int ide_no_data_taskfile(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	cmd->protocol = ATA_PROT_NODATA;
-
-	return ide_raw_taskfile(drive, cmd, NULL, 0);
-}
-EXPORT_SYMBOL_GPL(ide_no_data_taskfile);
-
-#ifdef CONFIG_IDE_TASK_IOCTL
-int ide_taskfile_ioctl(ide_drive_t *drive, unsigned long arg)
-{
-	ide_task_request_t	*req_task;
-	struct ide_cmd		cmd;
-	u8 *outbuf		= NULL;
-	u8 *inbuf		= NULL;
-	u8 *data_buf		= NULL;
-	int err			= 0;
-	int tasksize		= sizeof(struct ide_task_request_s);
-	unsigned int taskin	= 0;
-	unsigned int taskout	= 0;
-	u16 nsect		= 0;
-	char __user *buf = (char __user *)arg;
-
-	req_task = memdup_user(buf, tasksize);
-	if (IS_ERR(req_task))
-		return PTR_ERR(req_task);
-
-	taskout = req_task->out_size;
-	taskin  = req_task->in_size;
-
-	if (taskin > 65536 || taskout > 65536) {
-		err = -EINVAL;
-		goto abort;
-	}
-
-	if (taskout) {
-		int outtotal = tasksize;
-		outbuf = kzalloc(taskout, GFP_KERNEL);
-		if (outbuf == NULL) {
-			err = -ENOMEM;
-			goto abort;
-		}
-		if (copy_from_user(outbuf, buf + outtotal, taskout)) {
-			err = -EFAULT;
-			goto abort;
-		}
-	}
-
-	if (taskin) {
-		int intotal = tasksize + taskout;
-		inbuf = kzalloc(taskin, GFP_KERNEL);
-		if (inbuf == NULL) {
-			err = -ENOMEM;
-			goto abort;
-		}
-		if (copy_from_user(inbuf, buf + intotal, taskin)) {
-			err = -EFAULT;
-			goto abort;
-		}
-	}
-
-	memset(&cmd, 0, sizeof(cmd));
-
-	memcpy(&cmd.hob, req_task->hob_ports, HDIO_DRIVE_HOB_HDR_SIZE - 2);
-	memcpy(&cmd.tf,  req_task->io_ports,  HDIO_DRIVE_TASK_HDR_SIZE);
-
-	cmd.valid.out.tf = IDE_VALID_DEVICE;
-	cmd.valid.in.tf  = IDE_VALID_DEVICE | IDE_VALID_IN_TF;
-	cmd.tf_flags = IDE_TFLAG_IO_16BIT;
-
-	if (drive->dev_flags & IDE_DFLAG_LBA48) {
-		cmd.tf_flags |= IDE_TFLAG_LBA48;
-		cmd.valid.in.hob = IDE_VALID_IN_HOB;
-	}
-
-	if (req_task->out_flags.all) {
-		cmd.ftf_flags |= IDE_FTFLAG_FLAGGED;
-
-		if (req_task->out_flags.b.data)
-			cmd.ftf_flags |= IDE_FTFLAG_OUT_DATA;
-
-		if (req_task->out_flags.b.nsector_hob)
-			cmd.valid.out.hob |= IDE_VALID_NSECT;
-		if (req_task->out_flags.b.sector_hob)
-			cmd.valid.out.hob |= IDE_VALID_LBAL;
-		if (req_task->out_flags.b.lcyl_hob)
-			cmd.valid.out.hob |= IDE_VALID_LBAM;
-		if (req_task->out_flags.b.hcyl_hob)
-			cmd.valid.out.hob |= IDE_VALID_LBAH;
-
-		if (req_task->out_flags.b.error_feature)
-			cmd.valid.out.tf  |= IDE_VALID_FEATURE;
-		if (req_task->out_flags.b.nsector)
-			cmd.valid.out.tf  |= IDE_VALID_NSECT;
-		if (req_task->out_flags.b.sector)
-			cmd.valid.out.tf  |= IDE_VALID_LBAL;
-		if (req_task->out_flags.b.lcyl)
-			cmd.valid.out.tf  |= IDE_VALID_LBAM;
-		if (req_task->out_flags.b.hcyl)
-			cmd.valid.out.tf  |= IDE_VALID_LBAH;
-	} else {
-		cmd.valid.out.tf |= IDE_VALID_OUT_TF;
-		if (cmd.tf_flags & IDE_TFLAG_LBA48)
-			cmd.valid.out.hob |= IDE_VALID_OUT_HOB;
-	}
-
-	if (req_task->in_flags.b.data)
-		cmd.ftf_flags |= IDE_FTFLAG_IN_DATA;
-
-	if (req_task->req_cmd == IDE_DRIVE_TASK_RAW_WRITE) {
-		/* fixup data phase if needed */
-		if (req_task->data_phase == TASKFILE_IN_DMAQ ||
-		    req_task->data_phase == TASKFILE_IN_DMA)
-			cmd.tf_flags |= IDE_TFLAG_WRITE;
-	}
-
-	cmd.protocol = ATA_PROT_DMA;
-
-	switch (req_task->data_phase) {
-	case TASKFILE_MULTI_OUT:
-		if (!drive->mult_count) {
-			/* (hs): give up if multcount is not set */
-			pr_err("%s: %s Multimode Write multcount is not set\n",
-				drive->name, __func__);
-			err = -EPERM;
-			goto abort;
-		}
-		cmd.tf_flags |= IDE_TFLAG_MULTI_PIO;
-		fallthrough;
-	case TASKFILE_OUT:
-		cmd.protocol = ATA_PROT_PIO;
-		fallthrough;
-	case TASKFILE_OUT_DMAQ:
-	case TASKFILE_OUT_DMA:
-		cmd.tf_flags |= IDE_TFLAG_WRITE;
-		nsect = taskout / SECTOR_SIZE;
-		data_buf = outbuf;
-		break;
-	case TASKFILE_MULTI_IN:
-		if (!drive->mult_count) {
-			/* (hs): give up if multcount is not set */
-			pr_err("%s: %s Multimode Read multcount is not set\n",
-				drive->name, __func__);
-			err = -EPERM;
-			goto abort;
-		}
-		cmd.tf_flags |= IDE_TFLAG_MULTI_PIO;
-		fallthrough;
-	case TASKFILE_IN:
-		cmd.protocol = ATA_PROT_PIO;
-		fallthrough;
-	case TASKFILE_IN_DMAQ:
-	case TASKFILE_IN_DMA:
-		nsect = taskin / SECTOR_SIZE;
-		data_buf = inbuf;
-		break;
-	case TASKFILE_NO_DATA:
-		cmd.protocol = ATA_PROT_NODATA;
-		break;
-	default:
-		err = -EFAULT;
-		goto abort;
-	}
-
-	if (req_task->req_cmd == IDE_DRIVE_TASK_NO_DATA)
-		nsect = 0;
-	else if (!nsect) {
-		nsect = (cmd.hob.nsect << 8) | cmd.tf.nsect;
-
-		if (!nsect) {
-			pr_err("%s: in/out command without data\n",
-					drive->name);
-			err = -EFAULT;
-			goto abort;
-		}
-	}
-
-	err = ide_raw_taskfile(drive, &cmd, data_buf, nsect);
-
-	memcpy(req_task->hob_ports, &cmd.hob, HDIO_DRIVE_HOB_HDR_SIZE - 2);
-	memcpy(req_task->io_ports,  &cmd.tf,  HDIO_DRIVE_TASK_HDR_SIZE);
-
-	if ((cmd.ftf_flags & IDE_FTFLAG_SET_IN_FLAGS) &&
-	    req_task->in_flags.all == 0) {
-		req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
-		if (drive->dev_flags & IDE_DFLAG_LBA48)
-			req_task->in_flags.all |= (IDE_HOB_STD_IN_FLAGS << 8);
-	}
-
-	if (copy_to_user(buf, req_task, tasksize)) {
-		err = -EFAULT;
-		goto abort;
-	}
-	if (taskout) {
-		int outtotal = tasksize;
-		if (copy_to_user(buf + outtotal, outbuf, taskout)) {
-			err = -EFAULT;
-			goto abort;
-		}
-	}
-	if (taskin) {
-		int intotal = tasksize + taskout;
-		if (copy_to_user(buf + intotal, inbuf, taskin)) {
-			err = -EFAULT;
-			goto abort;
-		}
-	}
-abort:
-	kfree(req_task);
-	kfree(outbuf);
-	kfree(inbuf);
-
-	return err;
-}
-#endif
diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c
deleted file mode 100644
index cfe78df74b7de..0000000000000
--- a/drivers/ide/ide-timings.c
+++ /dev/null
@@ -1,198 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Copyright (c) 1999-2001 Vojtech Pavlik
- *  Copyright (c) 2007-2008 Bartlomiej Zolnierkiewicz
- *
- * Should you need to contact me, the author, you can do so either by
- * e-mail - mail your message to <vojtech@ucw.cz>, or by paper mail:
- * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
- */
-
-#include <linux/kernel.h>
-#include <linux/ide.h>
-#include <linux/module.h>
-
-/*
- * PIO 0-5, MWDMA 0-2 and UDMA 0-6 timings (in nanoseconds).
- * These were taken from ATA/ATAPI-6 standard, rev 0a, except
- * for PIO 5, which is a nonstandard extension and UDMA6, which
- * is currently supported only by Maxtor drives.
- */
-
-static struct ide_timing ide_timing[] = {
-
-	{ XFER_UDMA_6,     0,   0,   0,   0,   0,   0,   0,  15 },
-	{ XFER_UDMA_5,     0,   0,   0,   0,   0,   0,   0,  20 },
-	{ XFER_UDMA_4,     0,   0,   0,   0,   0,   0,   0,  30 },
-	{ XFER_UDMA_3,     0,   0,   0,   0,   0,   0,   0,  45 },
-
-	{ XFER_UDMA_2,     0,   0,   0,   0,   0,   0,   0,  60 },
-	{ XFER_UDMA_1,     0,   0,   0,   0,   0,   0,   0,  80 },
-	{ XFER_UDMA_0,     0,   0,   0,   0,   0,   0,   0, 120 },
-
-	{ XFER_MW_DMA_4,  25,   0,   0,   0,  55,  20,  80,   0 },
-	{ XFER_MW_DMA_3,  25,   0,   0,   0,  65,  25, 100,   0 },
-	{ XFER_MW_DMA_2,  25,   0,   0,   0,  70,  25, 120,   0 },
-	{ XFER_MW_DMA_1,  45,   0,   0,   0,  80,  50, 150,   0 },
-	{ XFER_MW_DMA_0,  60,   0,   0,   0, 215, 215, 480,   0 },
-
-	{ XFER_SW_DMA_2,  60,   0,   0,   0, 120, 120, 240,   0 },
-	{ XFER_SW_DMA_1,  90,   0,   0,   0, 240, 240, 480,   0 },
-	{ XFER_SW_DMA_0, 120,   0,   0,   0, 480, 480, 960,   0 },
-
-	{ XFER_PIO_6,     10,  55,  20,  80,  55,  20,  80,   0 },
-	{ XFER_PIO_5,     15,  65,  25, 100,  65,  25, 100,   0 },
-	{ XFER_PIO_4,     25,  70,  25, 120,  70,  25, 120,   0 },
-	{ XFER_PIO_3,     30,  80,  70, 180,  80,  70, 180,   0 },
-
-	{ XFER_PIO_2,     30, 290,  40, 330, 100,  90, 240,   0 },
-	{ XFER_PIO_1,     50, 290,  93, 383, 125, 100, 383,   0 },
-	{ XFER_PIO_0,     70, 290, 240, 600, 165, 150, 600,   0 },
-
-	{ XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960,   0 },
-
-	{ 0xff }
-};
-
-struct ide_timing *ide_timing_find_mode(u8 speed)
-{
-	struct ide_timing *t;
-
-	for (t = ide_timing; t->mode != speed; t++)
-		if (t->mode == 0xff)
-			return NULL;
-	return t;
-}
-EXPORT_SYMBOL_GPL(ide_timing_find_mode);
-
-u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio)
-{
-	u16 *id = drive->id;
-	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
-	u16 cycle = 0;
-
-	if (id[ATA_ID_FIELD_VALID] & 2) {
-		if (ata_id_has_iordy(drive->id))
-			cycle = id[ATA_ID_EIDE_PIO_IORDY];
-		else
-			cycle = id[ATA_ID_EIDE_PIO];
-
-		/* conservative "downgrade" for all pre-ATA2 drives */
-		if (pio < 3 && cycle < t->cycle)
-			cycle = 0; /* use standard timing */
-
-		/* Use the standard timing for the CF specific modes too */
-		if (pio > 4 && ata_id_is_cfa(id))
-			cycle = 0;
-	}
-
-	return cycle ? cycle : t->cycle;
-}
-EXPORT_SYMBOL_GPL(ide_pio_cycle_time);
-
-#define ENOUGH(v, unit)		(((v) - 1) / (unit) + 1)
-#define EZ(v, unit)		((v) ? ENOUGH((v) * 1000, unit) : 0)
-
-static void ide_timing_quantize(struct ide_timing *t, struct ide_timing *q,
-				int T, int UT)
-{
-	q->setup   = EZ(t->setup,   T);
-	q->act8b   = EZ(t->act8b,   T);
-	q->rec8b   = EZ(t->rec8b,   T);
-	q->cyc8b   = EZ(t->cyc8b,   T);
-	q->active  = EZ(t->active,  T);
-	q->recover = EZ(t->recover, T);
-	q->cycle   = EZ(t->cycle,   T);
-	q->udma    = EZ(t->udma,    UT);
-}
-
-void ide_timing_merge(struct ide_timing *a, struct ide_timing *b,
-		      struct ide_timing *m, unsigned int what)
-{
-	if (what & IDE_TIMING_SETUP)
-		m->setup   = max(a->setup,   b->setup);
-	if (what & IDE_TIMING_ACT8B)
-		m->act8b   = max(a->act8b,   b->act8b);
-	if (what & IDE_TIMING_REC8B)
-		m->rec8b   = max(a->rec8b,   b->rec8b);
-	if (what & IDE_TIMING_CYC8B)
-		m->cyc8b   = max(a->cyc8b,   b->cyc8b);
-	if (what & IDE_TIMING_ACTIVE)
-		m->active  = max(a->active,  b->active);
-	if (what & IDE_TIMING_RECOVER)
-		m->recover = max(a->recover, b->recover);
-	if (what & IDE_TIMING_CYCLE)
-		m->cycle   = max(a->cycle,   b->cycle);
-	if (what & IDE_TIMING_UDMA)
-		m->udma    = max(a->udma,    b->udma);
-}
-EXPORT_SYMBOL_GPL(ide_timing_merge);
-
-int ide_timing_compute(ide_drive_t *drive, u8 speed,
-		       struct ide_timing *t, int T, int UT)
-{
-	u16 *id = drive->id;
-	struct ide_timing *s, p;
-
-	/*
-	 * Find the mode.
-	 */
-	s = ide_timing_find_mode(speed);
-	if (s == NULL)
-		return -EINVAL;
-
-	/*
-	 * Copy the timing from the table.
-	 */
-	*t = *s;
-
-	/*
-	 * If the drive is an EIDE drive, it can tell us it needs extended
-	 * PIO/MWDMA cycle timing.
-	 */
-	if (id[ATA_ID_FIELD_VALID] & 2) {	/* EIDE drive */
-		memset(&p, 0, sizeof(p));
-
-		if (speed >= XFER_PIO_0 && speed < XFER_SW_DMA_0) {
-			if (speed <= XFER_PIO_2)
-				p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO];
-			else if ((speed <= XFER_PIO_4) ||
-				 (speed == XFER_PIO_5 && !ata_id_is_cfa(id)))
-				p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY];
-		} else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
-			p.cycle = id[ATA_ID_EIDE_DMA_MIN];
-
-		ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B);
-	}
-
-	/*
-	 * Convert the timing to bus clock counts.
-	 */
-	ide_timing_quantize(t, t, T, UT);
-
-	/*
-	 * Even in DMA/UDMA modes we still use PIO access for IDENTIFY,
-	 * S.M.A.R.T and some other commands. We have to ensure that the
-	 * DMA cycle timing is slower/equal than the current PIO timing.
-	 */
-	if (speed >= XFER_SW_DMA_0) {
-		ide_timing_compute(drive, drive->pio_mode, &p, T, UT);
-		ide_timing_merge(&p, t, t, IDE_TIMING_ALL);
-	}
-
-	/*
-	 * Lengthen active & recovery time so that cycle time is correct.
-	 */
-	if (t->act8b + t->rec8b < t->cyc8b) {
-		t->act8b += (t->cyc8b - (t->act8b + t->rec8b)) / 2;
-		t->rec8b = t->cyc8b - t->act8b;
-	}
-
-	if (t->active + t->recover < t->cycle) {
-		t->active += (t->cycle - (t->active + t->recover)) / 2;
-		t->recover = t->cycle - t->active;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_timing_compute);
diff --git a/drivers/ide/ide-xfer-mode.c b/drivers/ide/ide-xfer-mode.c
deleted file mode 100644
index 0b9709b489b78..0000000000000
--- a/drivers/ide/ide-xfer-mode.c
+++ /dev/null
@@ -1,267 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/interrupt.h>
-#include <linux/ide.h>
-#include <linux/bitops.h>
-
-static const char *udma_str[] =
-	 { "UDMA/16", "UDMA/25",  "UDMA/33",  "UDMA/44",
-	   "UDMA/66", "UDMA/100", "UDMA/133", "UDMA7" };
-static const char *mwdma_str[] =
-	{ "MWDMA0", "MWDMA1", "MWDMA2", "MWDMA3", "MWDMA4" };
-static const char *swdma_str[] =
-	{ "SWDMA0", "SWDMA1", "SWDMA2" };
-static const char *pio_str[] =
-	{ "PIO0", "PIO1", "PIO2", "PIO3", "PIO4", "PIO5", "PIO6" };
-
-/**
- *	ide_xfer_verbose	-	return IDE mode names
- *	@mode: transfer mode
- *
- *	Returns a constant string giving the name of the mode
- *	requested.
- */
-
-const char *ide_xfer_verbose(u8 mode)
-{
-	const char *s;
-	u8 i = mode & 0xf;
-
-	if (mode >= XFER_UDMA_0 && mode <= XFER_UDMA_7)
-		s = udma_str[i];
-	else if (mode >= XFER_MW_DMA_0 && mode <= XFER_MW_DMA_4)
-		s = mwdma_str[i];
-	else if (mode >= XFER_SW_DMA_0 && mode <= XFER_SW_DMA_2)
-		s = swdma_str[i];
-	else if (mode >= XFER_PIO_0 && mode <= XFER_PIO_6)
-		s = pio_str[i & 0x7];
-	else if (mode == XFER_PIO_SLOW)
-		s = "PIO SLOW";
-	else
-		s = "XFER ERROR";
-
-	return s;
-}
-EXPORT_SYMBOL(ide_xfer_verbose);
-
-/**
- *	ide_get_best_pio_mode	-	get PIO mode from drive
- *	@drive: drive to consider
- *	@mode_wanted: preferred mode
- *	@max_mode: highest allowed mode
- *
- *	This routine returns the recommended PIO settings for a given drive,
- *	based on the drive->id information and the ide_pio_blacklist[].
- *
- *	Drive PIO mode is auto-selected if 255 is passed as mode_wanted.
- *	This is used by most chipset support modules when "auto-tuning".
- */
-
-static u8 ide_get_best_pio_mode(ide_drive_t *drive, u8 mode_wanted, u8 max_mode)
-{
-	u16 *id = drive->id;
-	int pio_mode = -1, overridden = 0;
-
-	if (mode_wanted != 255)
-		return min_t(u8, mode_wanted, max_mode);
-
-	if ((drive->hwif->host_flags & IDE_HFLAG_PIO_NO_BLACKLIST) == 0)
-		pio_mode = ide_scan_pio_blacklist((char *)&id[ATA_ID_PROD]);
-
-	if (pio_mode != -1) {
-		printk(KERN_INFO "%s: is on PIO blacklist\n", drive->name);
-	} else {
-		pio_mode = id[ATA_ID_OLD_PIO_MODES] >> 8;
-		if (pio_mode > 2) {	/* 2 is maximum allowed tPIO value */
-			pio_mode = 2;
-			overridden = 1;
-		}
-
-		if (id[ATA_ID_FIELD_VALID] & 2) {	      /* ATA2? */
-			if (ata_id_is_cfa(id) && (id[ATA_ID_CFA_MODES] & 7))
-				pio_mode = 4 + min_t(int, 2,
-						     id[ATA_ID_CFA_MODES] & 7);
-			else if (ata_id_has_iordy(id)) {
-				if (id[ATA_ID_PIO_MODES] & 7) {
-					overridden = 0;
-					if (id[ATA_ID_PIO_MODES] & 4)
-						pio_mode = 5;
-					else if (id[ATA_ID_PIO_MODES] & 2)
-						pio_mode = 4;
-					else
-						pio_mode = 3;
-				}
-			}
-		}
-
-		if (overridden)
-			printk(KERN_INFO "%s: tPIO > 2, assuming tPIO = 2\n",
-					 drive->name);
-	}
-
-	if (pio_mode > max_mode)
-		pio_mode = max_mode;
-
-	return pio_mode;
-}
-
-int ide_pio_need_iordy(ide_drive_t *drive, const u8 pio)
-{
-	/*
-	 * IORDY may lead to controller lock up on certain controllers
-	 * if the port is not occupied.
-	 */
-	if (pio == 0 && (drive->hwif->port_flags & IDE_PFLAG_PROBING))
-		return 0;
-	return ata_id_pio_need_iordy(drive->id, pio);
-}
-EXPORT_SYMBOL_GPL(ide_pio_need_iordy);
-
-int ide_set_pio_mode(ide_drive_t *drive, const u8 mode)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-
-	if (hwif->host_flags & IDE_HFLAG_NO_SET_MODE)
-		return 0;
-
-	if (port_ops == NULL || port_ops->set_pio_mode == NULL)
-		return -1;
-
-	/*
-	 * TODO: temporary hack for some legacy host drivers that didn't
-	 * set transfer mode on the device in ->set_pio_mode method...
-	 */
-	if (port_ops->set_dma_mode == NULL) {
-		drive->pio_mode = mode;
-		port_ops->set_pio_mode(hwif, drive);
-		return 0;
-	}
-
-	if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) {
-		if (ide_config_drive_speed(drive, mode))
-			return -1;
-		drive->pio_mode = mode;
-		port_ops->set_pio_mode(hwif, drive);
-		return 0;
-	} else {
-		drive->pio_mode = mode;
-		port_ops->set_pio_mode(hwif, drive);
-		return ide_config_drive_speed(drive, mode);
-	}
-}
-
-int ide_set_dma_mode(ide_drive_t *drive, const u8 mode)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-
-	if (hwif->host_flags & IDE_HFLAG_NO_SET_MODE)
-		return 0;
-
-	if (port_ops == NULL || port_ops->set_dma_mode == NULL)
-		return -1;
-
-	if (hwif->host_flags & IDE_HFLAG_POST_SET_MODE) {
-		if (ide_config_drive_speed(drive, mode))
-			return -1;
-		drive->dma_mode = mode;
-		port_ops->set_dma_mode(hwif, drive);
-		return 0;
-	} else {
-		drive->dma_mode = mode;
-		port_ops->set_dma_mode(hwif, drive);
-		return ide_config_drive_speed(drive, mode);
-	}
-}
-EXPORT_SYMBOL_GPL(ide_set_dma_mode);
-
-/* req_pio == "255" for auto-tune */
-void ide_set_pio(ide_drive_t *drive, u8 req_pio)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-	u8 host_pio, pio;
-
-	if (port_ops == NULL || port_ops->set_pio_mode == NULL ||
-	    (hwif->host_flags & IDE_HFLAG_NO_SET_MODE))
-		return;
-
-	BUG_ON(hwif->pio_mask == 0x00);
-
-	host_pio = fls(hwif->pio_mask) - 1;
-
-	pio = ide_get_best_pio_mode(drive, req_pio, host_pio);
-
-	/*
-	 * TODO:
-	 * - report device max PIO mode
-	 * - check req_pio != 255 against device max PIO mode
-	 */
-	printk(KERN_DEBUG "%s: host max PIO%d wanted PIO%d%s selected PIO%d\n",
-			  drive->name, host_pio, req_pio,
-			  req_pio == 255 ? "(auto-tune)" : "", pio);
-
-	(void)ide_set_pio_mode(drive, XFER_PIO_0 + pio);
-}
-EXPORT_SYMBOL_GPL(ide_set_pio);
-
-/**
- *	ide_rate_filter		-	filter transfer mode
- *	@drive: IDE device
- *	@speed: desired speed
- *
- *	Given the available transfer modes this function returns
- *	the best available speed at or below the speed requested.
- *
- *	TODO: check device PIO capabilities
- */
-
-static u8 ide_rate_filter(ide_drive_t *drive, u8 speed)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 mode = ide_find_dma_mode(drive, speed);
-
-	if (mode == 0) {
-		if (hwif->pio_mask)
-			mode = fls(hwif->pio_mask) - 1 + XFER_PIO_0;
-		else
-			mode = XFER_PIO_4;
-	}
-
-/*	printk("%s: mode 0x%02x, speed 0x%02x\n", __func__, mode, speed); */
-
-	return min(speed, mode);
-}
-
-/**
- *	ide_set_xfer_rate	-	set transfer rate
- *	@drive: drive to set
- *	@rate: speed to attempt to set
- *
- *	General helper for setting the speed of an IDE device. This
- *	function knows about user enforced limits from the configuration
- *	which ->set_pio_mode/->set_dma_mode does not.
- */
-
-int ide_set_xfer_rate(ide_drive_t *drive, u8 rate)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	const struct ide_port_ops *port_ops = hwif->port_ops;
-
-	if (port_ops == NULL || port_ops->set_dma_mode == NULL ||
-	    (hwif->host_flags & IDE_HFLAG_NO_SET_MODE))
-		return -1;
-
-	rate = ide_rate_filter(drive, rate);
-
-	BUG_ON(rate < XFER_PIO_0);
-
-	if (rate >= XFER_PIO_0 && rate <= XFER_PIO_6)
-		return ide_set_pio_mode(drive, rate);
-
-	return ide_set_dma_mode(drive, rate);
-}
diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c
deleted file mode 100644
index 9a9c64fd10327..0000000000000
--- a/drivers/ide/ide.c
+++ /dev/null
@@ -1,415 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1994-1998	    Linus Torvalds & authors (see below)
- *  Copyright (C) 2003-2005, 2007   Bartlomiej Zolnierkiewicz
- */
-
-/*
- *  Mostly written by Mark Lord  <mlord@pobox.com>
- *                and Gadi Oxman <gadio@netvision.net.il>
- *                and Andre Hedrick <andre@linux-ide.org>
- *
- *  See linux/MAINTAINERS for address of current maintainer.
- *
- * This is the multiple IDE interface driver, as evolved from hd.c.
- * It supports up to MAX_HWIFS IDE interfaces, on one or more IRQs
- *   (usually 14 & 15).
- * There can be up to two drives per interface, as per the ATA-2 spec.
- *
- * ...
- *
- *  From hd.c:
- *  |
- *  | It traverses the request-list, using interrupts to jump between functions.
- *  | As nearly all functions can be called within interrupts, we may not sleep.
- *  | Special care is recommended.  Have Fun!
- *  |
- *  | modified by Drew Eckhardt to check nr of hd's from the CMOS.
- *  |
- *  | Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
- *  | in the early extended-partition checks and added DM partitions.
- *  |
- *  | Early work on error handling by Mika Liljeberg (liljeber@cs.Helsinki.FI).
- *  |
- *  | IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
- *  | and general streamlining by Mark Lord (mlord@pobox.com).
- *
- *  October, 1994 -- Complete line-by-line overhaul for linux 1.1.x, by:
- *
- *	Mark Lord	(mlord@pobox.com)		(IDE Perf.Pkg)
- *	Delman Lee	(delman@ieee.org)		("Mr. atdisk2")
- *	Scott Snyder	(snyder@fnald0.fnal.gov)	(ATAPI IDE cd-rom)
- *
- *  This was a rewrite of just about everything from hd.c, though some original
- *  code is still sprinkled about.  Think of it as a major evolution, with
- *  inspiration from lots of linux users, esp.  hamish@zot.apana.org.au
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/major.h>
-#include <linux/errno.h>
-#include <linux/genhd.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/hdreg.h>
-#include <linux/completion.h>
-#include <linux/device.h>
-
-struct class *ide_port_class;
-
-/**
- * ide_device_get	-	get an additional reference to a ide_drive_t
- * @drive:	device to get a reference to
- *
- * Gets a reference to the ide_drive_t and increments the use count of the
- * underlying LLDD module.
- */
-int ide_device_get(ide_drive_t *drive)
-{
-	struct device *host_dev;
-	struct module *module;
-
-	if (!get_device(&drive->gendev))
-		return -ENXIO;
-
-	host_dev = drive->hwif->host->dev[0];
-	module = host_dev ? host_dev->driver->owner : NULL;
-
-	if (module && !try_module_get(module)) {
-		put_device(&drive->gendev);
-		return -ENXIO;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_device_get);
-
-/**
- * ide_device_put	-	release a reference to a ide_drive_t
- * @drive:	device to release a reference on
- *
- * Release a reference to the ide_drive_t and decrements the use count of
- * the underlying LLDD module.
- */
-void ide_device_put(ide_drive_t *drive)
-{
-#ifdef CONFIG_MODULE_UNLOAD
-	struct device *host_dev = drive->hwif->host->dev[0];
-	struct module *module = host_dev ? host_dev->driver->owner : NULL;
-
-	module_put(module);
-#endif
-	put_device(&drive->gendev);
-}
-EXPORT_SYMBOL_GPL(ide_device_put);
-
-static int ide_bus_match(struct device *dev, struct device_driver *drv)
-{
-	return 1;
-}
-
-static int ide_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-
-	add_uevent_var(env, "MEDIA=%s", ide_media_string(drive));
-	add_uevent_var(env, "DRIVENAME=%s", drive->name);
-	add_uevent_var(env, "MODALIAS=ide:m-%s", ide_media_string(drive));
-	return 0;
-}
-
-static int generic_ide_probe(struct device *dev)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	struct ide_driver *drv = to_ide_driver(dev->driver);
-
-	return drv->probe ? drv->probe(drive) : -ENODEV;
-}
-
-static int generic_ide_remove(struct device *dev)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	struct ide_driver *drv = to_ide_driver(dev->driver);
-
-	if (drv->remove)
-		drv->remove(drive);
-
-	return 0;
-}
-
-static void generic_ide_shutdown(struct device *dev)
-{
-	ide_drive_t *drive = to_ide_device(dev);
-	struct ide_driver *drv = to_ide_driver(dev->driver);
-
-	if (dev->driver && drv->shutdown)
-		drv->shutdown(drive);
-}
-
-struct bus_type ide_bus_type = {
-	.name		= "ide",
-	.match		= ide_bus_match,
-	.uevent		= ide_uevent,
-	.probe		= generic_ide_probe,
-	.remove		= generic_ide_remove,
-	.shutdown	= generic_ide_shutdown,
-	.dev_groups	= ide_dev_groups,
-	.suspend	= generic_ide_suspend,
-	.resume		= generic_ide_resume,
-};
-
-EXPORT_SYMBOL_GPL(ide_bus_type);
-
-int ide_vlb_clk;
-EXPORT_SYMBOL_GPL(ide_vlb_clk);
-
-module_param_named(vlb_clock, ide_vlb_clk, int, 0);
-MODULE_PARM_DESC(vlb_clock, "VLB clock frequency (in MHz)");
-
-int ide_pci_clk;
-EXPORT_SYMBOL_GPL(ide_pci_clk);
-
-module_param_named(pci_clock, ide_pci_clk, int, 0);
-MODULE_PARM_DESC(pci_clock, "PCI bus clock frequency (in MHz)");
-
-static int ide_set_dev_param_mask(const char *s, const struct kernel_param *kp)
-{
-	unsigned int a, b, i, j = 1;
-	unsigned int *dev_param_mask = (unsigned int *)kp->arg;
-
-	/* controller . device (0 or 1) [ : 1 (set) | 0 (clear) ] */
-	if (sscanf(s, "%u.%u:%u", &a, &b, &j) != 3 &&
-	    sscanf(s, "%u.%u", &a, &b) != 2)
-		return -EINVAL;
-
-	i = a * MAX_DRIVES + b;
-
-	if (i >= MAX_HWIFS * MAX_DRIVES || j > 1)
-		return -EINVAL;
-
-	if (j)
-		*dev_param_mask |= (1 << i);
-	else
-		*dev_param_mask &= ~(1 << i);
-
-	return 0;
-}
-
-static const struct kernel_param_ops param_ops_ide_dev_mask = {
-	.set = ide_set_dev_param_mask
-};
-
-#define param_check_ide_dev_mask(name, p) param_check_uint(name, p)
-
-static unsigned int ide_nodma;
-
-module_param_named(nodma, ide_nodma, ide_dev_mask, 0);
-MODULE_PARM_DESC(nodma, "disallow DMA for a device");
-
-static unsigned int ide_noflush;
-
-module_param_named(noflush, ide_noflush, ide_dev_mask, 0);
-MODULE_PARM_DESC(noflush, "disable flush requests for a device");
-
-static unsigned int ide_nohpa;
-
-module_param_named(nohpa, ide_nohpa, ide_dev_mask, 0);
-MODULE_PARM_DESC(nohpa, "disable Host Protected Area for a device");
-
-static unsigned int ide_noprobe;
-
-module_param_named(noprobe, ide_noprobe, ide_dev_mask, 0);
-MODULE_PARM_DESC(noprobe, "skip probing for a device");
-
-static unsigned int ide_nowerr;
-
-module_param_named(nowerr, ide_nowerr, ide_dev_mask, 0);
-MODULE_PARM_DESC(nowerr, "ignore the ATA_DF bit for a device");
-
-static unsigned int ide_cdroms;
-
-module_param_named(cdrom, ide_cdroms, ide_dev_mask, 0);
-MODULE_PARM_DESC(cdrom, "force device as a CD-ROM");
-
-struct chs_geom {
-	unsigned int	cyl;
-	u8		head;
-	u8		sect;
-};
-
-static unsigned int ide_disks;
-static struct chs_geom ide_disks_chs[MAX_HWIFS * MAX_DRIVES];
-
-static int ide_set_disk_chs(const char *str, const struct kernel_param *kp)
-{
-	unsigned int a, b, c = 0, h = 0, s = 0, i, j = 1;
-
-	/* controller . device (0 or 1) : Cylinders , Heads , Sectors */
-	/* controller . device (0 or 1) : 1 (use CHS) | 0 (ignore CHS) */
-	if (sscanf(str, "%u.%u:%u,%u,%u", &a, &b, &c, &h, &s) != 5 &&
-	    sscanf(str, "%u.%u:%u", &a, &b, &j) != 3)
-		return -EINVAL;
-
-	i = a * MAX_DRIVES + b;
-
-	if (i >= MAX_HWIFS * MAX_DRIVES || j > 1)
-		return -EINVAL;
-
-	if (c > INT_MAX || h > 255 || s > 255)
-		return -EINVAL;
-
-	if (j)
-		ide_disks |= (1 << i);
-	else
-		ide_disks &= ~(1 << i);
-
-	ide_disks_chs[i].cyl  = c;
-	ide_disks_chs[i].head = h;
-	ide_disks_chs[i].sect = s;
-
-	return 0;
-}
-
-module_param_call(chs, ide_set_disk_chs, NULL, NULL, 0);
-MODULE_PARM_DESC(chs, "force device as a disk (using CHS)");
-
-static void ide_dev_apply_params(ide_drive_t *drive, u8 unit)
-{
-	int i = drive->hwif->index * MAX_DRIVES + unit;
-
-	if (ide_nodma & (1 << i)) {
-		printk(KERN_INFO "ide: disallowing DMA for %s\n", drive->name);
-		drive->dev_flags |= IDE_DFLAG_NODMA;
-	}
-	if (ide_noflush & (1 << i)) {
-		printk(KERN_INFO "ide: disabling flush requests for %s\n",
-				 drive->name);
-		drive->dev_flags |= IDE_DFLAG_NOFLUSH;
-	}
-	if (ide_nohpa & (1 << i)) {
-		printk(KERN_INFO "ide: disabling Host Protected Area for %s\n",
-				 drive->name);
-		drive->dev_flags |= IDE_DFLAG_NOHPA;
-	}
-	if (ide_noprobe & (1 << i)) {
-		printk(KERN_INFO "ide: skipping probe for %s\n", drive->name);
-		drive->dev_flags |= IDE_DFLAG_NOPROBE;
-	}
-	if (ide_nowerr & (1 << i)) {
-		printk(KERN_INFO "ide: ignoring the ATA_DF bit for %s\n",
-				 drive->name);
-		drive->bad_wstat = BAD_R_STAT;
-	}
-	if (ide_cdroms & (1 << i)) {
-		printk(KERN_INFO "ide: forcing %s as a CD-ROM\n", drive->name);
-		drive->dev_flags |= IDE_DFLAG_PRESENT;
-		drive->media = ide_cdrom;
-		/* an ATAPI device ignores DRDY */
-		drive->ready_stat = 0;
-	}
-	if (ide_disks & (1 << i)) {
-		drive->cyl  = drive->bios_cyl  = ide_disks_chs[i].cyl;
-		drive->head = drive->bios_head = ide_disks_chs[i].head;
-		drive->sect = drive->bios_sect = ide_disks_chs[i].sect;
-
-		printk(KERN_INFO "ide: forcing %s as a disk (%d/%d/%d)\n",
-				 drive->name,
-				 drive->cyl, drive->head, drive->sect);
-
-		drive->dev_flags |= IDE_DFLAG_FORCED_GEOM | IDE_DFLAG_PRESENT;
-		drive->media = ide_disk;
-		drive->ready_stat = ATA_DRDY;
-	}
-}
-
-static unsigned int ide_ignore_cable;
-
-static int ide_set_ignore_cable(const char *s, const struct kernel_param *kp)
-{
-	int i, j = 1;
-
-	/* controller (ignore) */
-	/* controller : 1 (ignore) | 0 (use) */
-	if (sscanf(s, "%d:%d", &i, &j) != 2 && sscanf(s, "%d", &i) != 1)
-		return -EINVAL;
-
-	if (i >= MAX_HWIFS || j < 0 || j > 1)
-		return -EINVAL;
-
-	if (j)
-		ide_ignore_cable |= (1 << i);
-	else
-		ide_ignore_cable &= ~(1 << i);
-
-	return 0;
-}
-
-module_param_call(ignore_cable, ide_set_ignore_cable, NULL, NULL, 0);
-MODULE_PARM_DESC(ignore_cable, "ignore cable detection");
-
-void ide_port_apply_params(ide_hwif_t *hwif)
-{
-	ide_drive_t *drive;
-	int i;
-
-	if (ide_ignore_cable & (1 << hwif->index)) {
-		printk(KERN_INFO "ide: ignoring cable detection for %s\n",
-				 hwif->name);
-		hwif->cbl = ATA_CBL_PATA40_SHORT;
-	}
-
-	ide_port_for_each_dev(i, drive, hwif)
-		ide_dev_apply_params(drive, i);
-}
-
-/*
- * This is gets invoked once during initialization, to set *everything* up
- */
-static int __init ide_init(void)
-{
-	int ret;
-
-	printk(KERN_INFO "Uniform Multi-Platform E-IDE driver\n");
-
-	ret = bus_register(&ide_bus_type);
-	if (ret < 0) {
-		printk(KERN_WARNING "IDE: bus_register error: %d\n", ret);
-		return ret;
-	}
-
-	ide_port_class = class_create(THIS_MODULE, "ide_port");
-	if (IS_ERR(ide_port_class)) {
-		ret = PTR_ERR(ide_port_class);
-		goto out_port_class;
-	}
-
-	ide_acpi_init();
-
-	proc_ide_create();
-
-	return 0;
-
-out_port_class:
-	bus_unregister(&ide_bus_type);
-
-	return ret;
-}
-
-static void __exit ide_exit(void)
-{
-	proc_ide_destroy();
-
-	class_destroy(ide_port_class);
-
-	bus_unregister(&ide_bus_type);
-}
-
-module_init(ide_init);
-module_exit(ide_exit);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ide_platform.c b/drivers/ide/ide_platform.c
deleted file mode 100644
index 91639fd6c276b..0000000000000
--- a/drivers/ide/ide_platform.c
+++ /dev/null
@@ -1,133 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Platform IDE driver
- *
- * Copyright (C) 2007 MontaVista Software
- *
- * Maintainer: Kumar Gala <galak@kernel.crashing.org>
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/ide.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/ata_platform.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-
-static void plat_ide_setup_ports(struct ide_hw *hw, void __iomem *base,
-				 void __iomem *ctrl,
-				 struct pata_platform_info *pdata, int irq)
-{
-	unsigned long port = (unsigned long)base;
-	int i;
-
-	hw->io_ports.data_addr = port;
-
-	port += (1 << pdata->ioport_shift);
-	for (i = 1; i <= 7;
-	     i++, port += (1 << pdata->ioport_shift))
-		hw->io_ports_array[i] = port;
-
-	hw->io_ports.ctl_addr = (unsigned long)ctrl;
-
-	hw->irq = irq;
-}
-
-static const struct ide_port_info platform_ide_port_info = {
-	.host_flags		= IDE_HFLAG_NO_DMA,
-	.chipset		= ide_generic,
-};
-
-static int plat_ide_probe(struct platform_device *pdev)
-{
-	struct resource *res_base, *res_alt, *res_irq;
-	void __iomem *base, *alt_base;
-	struct pata_platform_info *pdata;
-	struct ide_host *host;
-	int ret = 0, mmio = 0;
-	struct ide_hw hw, *hws[] = { &hw };
-	struct ide_port_info d = platform_ide_port_info;
-
-	pdata = dev_get_platdata(&pdev->dev);
-
-	/* get a pointer to the register memory */
-	res_base = platform_get_resource(pdev, IORESOURCE_IO, 0);
-	res_alt = platform_get_resource(pdev, IORESOURCE_IO, 1);
-
-	if (!res_base || !res_alt) {
-		res_base = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		res_alt = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-		if (!res_base || !res_alt) {
-			ret = -ENOMEM;
-			goto out;
-		}
-		mmio = 1;
-	}
-
-	res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!res_irq) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (mmio) {
-		base = devm_ioremap(&pdev->dev,
-			res_base->start, resource_size(res_base));
-		alt_base = devm_ioremap(&pdev->dev,
-			res_alt->start, resource_size(res_alt));
-	} else {
-		base = devm_ioport_map(&pdev->dev,
-			res_base->start, resource_size(res_base));
-		alt_base = devm_ioport_map(&pdev->dev,
-			res_alt->start, resource_size(res_alt));
-	}
-
-	memset(&hw, 0, sizeof(hw));
-	plat_ide_setup_ports(&hw, base, alt_base, pdata, res_irq->start);
-	hw.dev = &pdev->dev;
-
-	d.irq_flags = res_irq->flags & IRQF_TRIGGER_MASK;
-	if (res_irq->flags & IORESOURCE_IRQ_SHAREABLE)
-		d.irq_flags |= IRQF_SHARED;
-
-	if (mmio)
-		d.host_flags |= IDE_HFLAG_MMIO;
-
-	ret = ide_host_add(&d, hws, 1, &host);
-	if (ret)
-		goto out;
-
-	platform_set_drvdata(pdev, host);
-
-	return 0;
-
-out:
-	return ret;
-}
-
-static int plat_ide_remove(struct platform_device *pdev)
-{
-	struct ide_host *host = dev_get_drvdata(&pdev->dev);
-
-	ide_host_remove(host);
-
-	return 0;
-}
-
-static struct platform_driver platform_ide_driver = {
-	.driver = {
-		.name = "pata_platform",
-	},
-	.probe = plat_ide_probe,
-	.remove = plat_ide_remove,
-};
-
-module_platform_driver(platform_ide_driver);
-
-MODULE_DESCRIPTION("Platform IDE driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:pata_platform");
diff --git a/drivers/ide/it8172.c b/drivers/ide/it8172.c
deleted file mode 100644
index b6f674ab4fb7c..0000000000000
--- a/drivers/ide/it8172.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- *
- * BRIEF MODULE DESCRIPTION
- *      IT8172 IDE controller support
- *
- * Copyright (C) 2000 MontaVista Software Inc.
- * Copyright (C) 2008 Shane McDonald
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *  You should have received a copy of the  GNU General Public License along
- *  with this program; if not, write  to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/ioport.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define DRV_NAME "IT8172"
-
-static void it8172_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u16 drive_enables;
-	u32 drive_timing;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	/*
-	 * The highest value of DIOR/DIOW pulse width and recovery time
-	 * that can be set in the IT8172 is 8 PCI clock cycles.  As a result,
-	 * it cannot be configured for PIO mode 0.  This table sets these
-	 * parameters to the maximum supported by the IT8172.
-	 */
-	static const u8 timings[] = { 0x3f, 0x3c, 0x1b, 0x12, 0x0a };
-
-	pci_read_config_word(dev, 0x40, &drive_enables);
-	pci_read_config_dword(dev, 0x44, &drive_timing);
-
-	/*
-	 * Enable port 0x44. The IT8172 spec is confused; it calls
-	 * this register the "Slave IDE Timing Register", but in fact,
-	 * it controls timing for both master and slave drives.
-	 */
-	drive_enables |= 0x4000;
-
-	drive_enables &= drive->dn ? 0xc006 : 0xc060;
-	if (drive->media == ide_disk)
-		/* enable prefetch */
-		drive_enables |= 0x0004 << (drive->dn * 4);
-	if (ide_pio_need_iordy(drive, pio))
-		/* enable IORDY sample-point */
-		drive_enables |= 0x0002 << (drive->dn * 4);
-
-	drive_timing &= drive->dn ? 0x00003f00 : 0x000fc000;
-	drive_timing |= timings[pio] << (drive->dn * 6 + 8);
-
-	pci_write_config_word(dev, 0x40, drive_enables);
-	pci_write_config_dword(dev, 0x44, drive_timing);
-}
-
-static void it8172_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	int a_speed		= 3 << (drive->dn * 4);
-	int u_flag		= 1 << drive->dn;
-	int u_speed		= 0;
-	u8 reg48, reg4a;
-	const u8 speed		= drive->dma_mode;
-
-	pci_read_config_byte(dev, 0x48, &reg48);
-	pci_read_config_byte(dev, 0x4a, &reg4a);
-
-	if (speed >= XFER_UDMA_0) {
-		u8 udma = speed - XFER_UDMA_0;
-		u_speed = udma << (drive->dn * 4);
-
-		pci_write_config_byte(dev, 0x48, reg48 | u_flag);
-		reg4a &= ~a_speed;
-		pci_write_config_byte(dev, 0x4a, reg4a | u_speed);
-	} else {
-		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-
-		pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
-		pci_write_config_byte(dev, 0x4a, reg4a & ~a_speed);
-
-		drive->pio_mode =
-			mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
-
-		it8172_set_pio_mode(hwif, drive);
-	}
-}
-
-
-static const struct ide_port_ops it8172_port_ops = {
-	.set_pio_mode	= it8172_set_pio_mode,
-	.set_dma_mode	= it8172_set_dma_mode,
-};
-
-static const struct ide_port_info it8172_port_info = {
-	.name		= DRV_NAME,
-	.port_ops	= &it8172_port_ops,
-	.enablebits	= { {0x41, 0x80, 0x80}, {0x00, 0x00, 0x00} },
-	.host_flags	= IDE_HFLAG_SINGLE,
-	.pio_mask	= ATA_PIO4 & ~ATA_PIO0,
-	.mwdma_mask	= ATA_MWDMA2,
-	.udma_mask	= ATA_UDMA2,
-};
-
-static int it8172_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE)
-		return -ENODEV; /* IT8172 is more than an IDE controller */
-	return ide_pci_init_one(dev, &it8172_port_info, NULL);
-}
-
-static struct pci_device_id it8172_pci_tbl[] = {
-	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8172), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, it8172_pci_tbl);
-
-static struct pci_driver it8172_pci_driver = {
-	.name		= "IT8172_IDE",
-	.id_table	= it8172_pci_tbl,
-	.probe		= it8172_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init it8172_ide_init(void)
-{
-	return ide_pci_register_driver(&it8172_pci_driver);
-}
-
-static void __exit it8172_ide_exit(void)
-{
-	pci_unregister_driver(&it8172_pci_driver);
-}
-
-module_init(it8172_ide_init);
-module_exit(it8172_ide_exit);
-
-MODULE_AUTHOR("Steve Longerbeam");
-MODULE_DESCRIPTION("PCI driver module for ITE 8172 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/it8213.c b/drivers/ide/it8213.c
deleted file mode 100644
index d0bf4430c4374..0000000000000
--- a/drivers/ide/it8213.c
+++ /dev/null
@@ -1,217 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ITE 8213 IDE driver
- *
- * Copyright (C) 2006 Jack Lee
- * Copyright (C) 2006 Alan Cox
- * Copyright (C) 2007 Bartlomiej Zolnierkiewicz
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define DRV_NAME "it8213"
-
-/**
- *	it8213_set_pio_mode	-	set host controller for PIO mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Set the interface PIO mode.
- */
-
-static void it8213_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	int is_slave		= drive->dn & 1;
-	int master_port		= 0x40;
-	int slave_port		= 0x44;
-	unsigned long flags;
-	u16 master_data;
-	u8 slave_data;
-	static DEFINE_SPINLOCK(tune_lock);
-	int control = 0;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	static const u8 timings[][2] = {
-					{ 0, 0 },
-					{ 0, 0 },
-					{ 1, 0 },
-					{ 2, 1 },
-					{ 2, 3 }, };
-
-	spin_lock_irqsave(&tune_lock, flags);
-	pci_read_config_word(dev, master_port, &master_data);
-
-	if (pio > 1)
-		control |= 1;	/* Programmable timing on */
-	if (drive->media != ide_disk)
-		control |= 4;	/* ATAPI */
-	if (ide_pio_need_iordy(drive, pio))
-		control |= 2;	/* IORDY */
-	if (is_slave) {
-		master_data |=  0x4000;
-		master_data &= ~0x0070;
-		if (pio > 1)
-			master_data = master_data | (control << 4);
-		pci_read_config_byte(dev, slave_port, &slave_data);
-		slave_data = slave_data & 0xf0;
-		slave_data = slave_data | (timings[pio][0] << 2) | timings[pio][1];
-	} else {
-		master_data &= ~0x3307;
-		if (pio > 1)
-			master_data = master_data | control;
-		master_data = master_data | (timings[pio][0] << 12) | (timings[pio][1] << 8);
-	}
-	pci_write_config_word(dev, master_port, master_data);
-	if (is_slave)
-		pci_write_config_byte(dev, slave_port, slave_data);
-	spin_unlock_irqrestore(&tune_lock, flags);
-}
-
-/**
- *	it8213_set_dma_mode	-	set host controller for DMA mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Tune the ITE chipset for the DMA mode.
- */
-
-static void it8213_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u8 maslave		= 0x40;
-	int a_speed		= 3 << (drive->dn * 4);
-	int u_flag		= 1 << drive->dn;
-	int v_flag		= 0x01 << drive->dn;
-	int w_flag		= 0x10 << drive->dn;
-	int u_speed		= 0;
-	u16			reg4042, reg4a;
-	u8			reg48, reg54, reg55;
-	const u8 speed		= drive->dma_mode;
-
-	pci_read_config_word(dev, maslave, &reg4042);
-	pci_read_config_byte(dev, 0x48, &reg48);
-	pci_read_config_word(dev, 0x4a, &reg4a);
-	pci_read_config_byte(dev, 0x54, &reg54);
-	pci_read_config_byte(dev, 0x55, &reg55);
-
-	if (speed >= XFER_UDMA_0) {
-		u8 udma = speed - XFER_UDMA_0;
-
-		u_speed = min_t(u8, 2 - (udma & 1), udma) << (drive->dn * 4);
-
-		if (!(reg48 & u_flag))
-			pci_write_config_byte(dev, 0x48, reg48 | u_flag);
-		if (speed >= XFER_UDMA_5)
-			pci_write_config_byte(dev, 0x55, (u8) reg55|w_flag);
-		else
-			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
-
-		if ((reg4a & a_speed) != u_speed)
-			pci_write_config_word(dev, 0x4a, (reg4a & ~a_speed) | u_speed);
-		if (speed > XFER_UDMA_2) {
-			if (!(reg54 & v_flag))
-				pci_write_config_byte(dev, 0x54, reg54 | v_flag);
-		} else
-			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
-	} else {
-		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-
-		if (reg48 & u_flag)
-			pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
-		if (reg4a & a_speed)
-			pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
-		if (reg54 & v_flag)
-			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
-		if (reg55 & w_flag)
-			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
-
-		if (speed >= XFER_MW_DMA_0)
-			drive->pio_mode =
-				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
-		else
-			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
-
-		it8213_set_pio_mode(hwif, drive);
-	}
-}
-
-static u8 it8213_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	u8 reg42h = 0;
-
-	pci_read_config_byte(dev, 0x42, &reg42h);
-
-	return (reg42h & 0x02) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
-}
-
-static const struct ide_port_ops it8213_port_ops = {
-	.set_pio_mode		= it8213_set_pio_mode,
-	.set_dma_mode		= it8213_set_dma_mode,
-	.cable_detect		= it8213_cable_detect,
-};
-
-static const struct ide_port_info it8213_chipset = {
-	.name		= DRV_NAME,
-	.enablebits	= { {0x41, 0x80, 0x80} },
-	.port_ops	= &it8213_port_ops,
-	.host_flags	= IDE_HFLAG_SINGLE,
-	.pio_mask	= ATA_PIO4,
-	.swdma_mask	= ATA_SWDMA2_ONLY,
-	.mwdma_mask	= ATA_MWDMA12_ONLY,
-	.udma_mask	= ATA_UDMA6,
-};
-
-/**
- *	it8213_init_one	-	pci layer discovery entry
- *	@dev: PCI device
- *	@id: ident table entry
- *
- *	Called by the PCI code when it finds an ITE8213 controller. As
- *	this device follows the standard interfaces we can use the
- *	standard helper functions to do almost all the work for us.
- */
-
-static int it8213_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &it8213_chipset, NULL);
-}
-
-static const struct pci_device_id it8213_pci_tbl[] = {
-	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8213), 0 },
-	{ 0, },
-};
-
-MODULE_DEVICE_TABLE(pci, it8213_pci_tbl);
-
-static struct pci_driver it8213_pci_driver = {
-	.name		= "ITE8213_IDE",
-	.id_table	= it8213_pci_tbl,
-	.probe		= it8213_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init it8213_ide_init(void)
-{
-	return ide_pci_register_driver(&it8213_pci_driver);
-}
-
-static void __exit it8213_ide_exit(void)
-{
-	pci_unregister_driver(&it8213_pci_driver);
-}
-
-module_init(it8213_ide_init);
-module_exit(it8213_ide_exit);
-
-MODULE_AUTHOR("Jack Lee, Alan Cox");
-MODULE_DESCRIPTION("PCI driver module for the ITE 8213");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c
deleted file mode 100644
index 36a64c8ea575d..0000000000000
--- a/drivers/ide/it821x.c
+++ /dev/null
@@ -1,715 +0,0 @@
-/*
- * Copyright (C) 2004		Red Hat
- * Copyright (C) 2007		Bartlomiej Zolnierkiewicz
- *
- *  May be copied or modified under the terms of the GNU General Public License
- *  Based in part on the ITE vendor provided SCSI driver.
- *
- *  Documentation:
- *	Datasheet is freely available, some other documents under NDA.
- *
- *  The ITE8212 isn't exactly a standard IDE controller. It has two
- *  modes. In pass through mode then it is an IDE controller. In its smart
- *  mode its actually quite a capable hardware raid controller disguised
- *  as an IDE controller. Smart mode only understands DMA read/write and
- *  identify, none of the fancier commands apply. The IT8211 is identical
- *  in other respects but lacks the raid mode.
- *
- *  Errata:
- *  o	Rev 0x10 also requires master/slave hold the same DMA timings and
- *	cannot do ATAPI MWDMA.
- *  o	The identify data for raid volumes lacks CHS info (technically ok)
- *	but also fails to set the LBA28 and other bits. We fix these in
- *	the IDE probe quirk code.
- *  o	If you write LBA48 sized I/O's (ie > 256 sector) in smart mode
- *	raid then the controller firmware dies
- *  o	Smart mode without RAID doesn't clear all the necessary identify
- *	bits to reduce the command set to the one used
- *
- *  This has a few impacts on the driver
- *  - In pass through mode we do all the work you would expect
- *  - In smart mode the clocking set up is done by the controller generally
- *    but we must watch the other limits and filter.
- *  - There are a few extra vendor commands that actually talk to the
- *    controller but only work PIO with no IRQ.
- *
- *  Vendor areas of the identify block in smart mode are used for the
- *  timing and policy set up. Each HDD in raid mode also has a serial
- *  block on the disk. The hardware extra commands are get/set chip status,
- *  rebuild, get rebuild status.
- *
- *  In Linux the driver supports pass through mode as if the device was
- *  just another IDE controller. If the smart mode is running then
- *  volumes are managed by the controller firmware and each IDE "disk"
- *  is a raid volume. Even more cute - the controller can do automated
- *  hotplug and rebuild.
- *
- *  The pass through controller itself is a little demented. It has a
- *  flaw that it has a single set of PIO/MWDMA timings per channel so
- *  non UDMA devices restrict each others performance. It also has a
- *  single clock source per channel so mixed UDMA100/133 performance
- *  isn't perfect and we have to pick a clock. Thankfully none of this
- *  matters in smart mode. ATAPI DMA is not currently supported.
- *
- *  It seems the smart mode is a win for RAID1/RAID10 but otherwise not.
- *
- *  TODO
- *	-	ATAPI UDMA is ok but not MWDMA it seems
- *	-	RAID configuration ioctls
- *	-	Move to libata once it grows up
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define DRV_NAME "it821x"
-
-#define QUIRK_VORTEX86 1
-
-struct it821x_dev
-{
-	unsigned int smart:1,		/* Are we in smart raid mode */
-		timing10:1;		/* Rev 0x10 */
-	u8	clock_mode;		/* 0, ATA_50 or ATA_66 */
-	u8	want[2][2];		/* Mode/Pri log for master slave */
-	/* We need these for switching the clock when DMA goes on/off
-	   The high byte is the 66Mhz timing */
-	u16	pio[2];			/* Cached PIO values */
-	u16	mwdma[2];		/* Cached MWDMA values */
-	u16	udma[2];		/* Cached UDMA values (per drive) */
-	u16	quirks;
-};
-
-#define ATA_66		0
-#define ATA_50		1
-#define ATA_ANY		2
-
-#define UDMA_OFF	0
-#define MWDMA_OFF	0
-
-/*
- *	We allow users to force the card into non raid mode without
- *	flashing the alternative BIOS. This is also necessary right now
- *	for embedded platforms that cannot run a PC BIOS but are using this
- *	device.
- */
-
-static int it8212_noraid;
-
-/**
- *	it821x_program	-	program the PIO/MWDMA registers
- *	@drive: drive to tune
- *	@timing: timing info
- *
- *	Program the PIO/MWDMA timing for this channel according to the
- *	current clock.
- */
-
-static void it821x_program(ide_drive_t *drive, u16 timing)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
-	int channel = hwif->channel;
-	u8 conf;
-
-	/* Program PIO/MWDMA timing bits */
-	if(itdev->clock_mode == ATA_66)
-		conf = timing >> 8;
-	else
-		conf = timing & 0xFF;
-
-	pci_write_config_byte(dev, 0x54 + 4 * channel, conf);
-}
-
-/**
- *	it821x_program_udma	-	program the UDMA registers
- *	@drive: drive to tune
- *	@timing: timing info
- *
- *	Program the UDMA timing for this drive according to the
- *	current clock.
- */
-
-static void it821x_program_udma(ide_drive_t *drive, u16 timing)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
-	int channel = hwif->channel;
-	u8 unit = drive->dn & 1, conf;
-
-	/* Program UDMA timing bits */
-	if(itdev->clock_mode == ATA_66)
-		conf = timing >> 8;
-	else
-		conf = timing & 0xFF;
-
-	if (itdev->timing10 == 0)
-		pci_write_config_byte(dev, 0x56 + 4 * channel + unit, conf);
-	else {
-		pci_write_config_byte(dev, 0x56 + 4 * channel, conf);
-		pci_write_config_byte(dev, 0x56 + 4 * channel + 1, conf);
-	}
-}
-
-/**
- *	it821x_clock_strategy
- *	@drive: drive to set up
- *
- *	Select between the 50 and 66Mhz base clocks to get the best
- *	results for this interface.
- */
-
-static void it821x_clock_strategy(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
-	ide_drive_t *pair = ide_get_pair_dev(drive);
-	int clock, altclock, sel = 0;
-	u8 unit = drive->dn & 1, v;
-
-	if(itdev->want[0][0] > itdev->want[1][0]) {
-		clock = itdev->want[0][1];
-		altclock = itdev->want[1][1];
-	} else {
-		clock = itdev->want[1][1];
-		altclock = itdev->want[0][1];
-	}
-
-	/*
-	 * if both clocks can be used for the mode with the higher priority
-	 * use the clock needed by the mode with the lower priority
-	 */
-	if (clock == ATA_ANY)
-		clock = altclock;
-
-	/* Nobody cares - keep the same clock */
-	if(clock == ATA_ANY)
-		return;
-	/* No change */
-	if(clock == itdev->clock_mode)
-		return;
-
-	/* Load this into the controller ? */
-	if(clock == ATA_66)
-		itdev->clock_mode = ATA_66;
-	else {
-		itdev->clock_mode = ATA_50;
-		sel = 1;
-	}
-
-	pci_read_config_byte(dev, 0x50, &v);
-	v &= ~(1 << (1 + hwif->channel));
-	v |= sel << (1 + hwif->channel);
-	pci_write_config_byte(dev, 0x50, v);
-
-	/*
-	 *	Reprogram the UDMA/PIO of the pair drive for the switch
-	 *	MWDMA will be dealt with by the dma switcher
-	 */
-	if(pair && itdev->udma[1-unit] != UDMA_OFF) {
-		it821x_program_udma(pair, itdev->udma[1-unit]);
-		it821x_program(pair, itdev->pio[1-unit]);
-	}
-	/*
-	 *	Reprogram the UDMA/PIO of our drive for the switch.
-	 *	MWDMA will be dealt with by the dma switcher
-	 */
-	if(itdev->udma[unit] != UDMA_OFF) {
-		it821x_program_udma(drive, itdev->udma[unit]);
-		it821x_program(drive, itdev->pio[unit]);
-	}
-}
-
-/**
- *	it821x_set_pio_mode	-	set host controller for PIO mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Tune the host to the desired PIO mode taking into the consideration
- *	the maximum PIO mode supported by the other device on the cable.
- */
-
-static void it821x_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
-	ide_drive_t *pair = ide_get_pair_dev(drive);
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-	u8 unit = drive->dn & 1, set_pio = pio;
-
-	/* Spec says 89 ref driver uses 88 */
-	static u16 pio_timings[]= { 0xAA88, 0xA382, 0xA181, 0x3332, 0x3121 };
-	static u8 pio_want[]    = { ATA_66, ATA_66, ATA_66, ATA_66, ATA_ANY };
-
-	/*
-	 * Compute the best PIO mode we can for a given device. We must
-	 * pick a speed that does not cause problems with the other device
-	 * on the cable.
-	 */
-	if (pair) {
-		u8 pair_pio = pair->pio_mode - XFER_PIO_0;
-		/* trim PIO to the slowest of the master/slave */
-		if (pair_pio < set_pio)
-			set_pio = pair_pio;
-	}
-
-	/* We prefer 66Mhz clock for PIO 0-3, don't care for PIO4 */
-	itdev->want[unit][1] = pio_want[set_pio];
-	itdev->want[unit][0] = 1;	/* PIO is lowest priority */
-	itdev->pio[unit] = pio_timings[set_pio];
-	it821x_clock_strategy(drive);
-	it821x_program(drive, itdev->pio[unit]);
-}
-
-/**
- *	it821x_tune_mwdma	-	tune a channel for MWDMA
- *	@drive: drive to set up
- *	@mode_wanted: the target operating mode
- *
- *	Load the timing settings for this device mode into the
- *	controller when doing MWDMA in pass through mode. The caller
- *	must manage the whole lack of per device MWDMA/PIO timings and
- *	the shared MWDMA/PIO timing register.
- */
-
-static void it821x_tune_mwdma(ide_drive_t *drive, u8 mode_wanted)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct it821x_dev *itdev = (void *)ide_get_hwifdata(hwif);
-	u8 unit = drive->dn & 1, channel = hwif->channel, conf;
-
-	static u16 dma[]	= { 0x8866, 0x3222, 0x3121 };
-	static u8 mwdma_want[]	= { ATA_ANY, ATA_66, ATA_ANY };
-
-	itdev->want[unit][1] = mwdma_want[mode_wanted];
-	itdev->want[unit][0] = 2;	/* MWDMA is low priority */
-	itdev->mwdma[unit] = dma[mode_wanted];
-	itdev->udma[unit] = UDMA_OFF;
-
-	/* UDMA bits off - Revision 0x10 do them in pairs */
-	pci_read_config_byte(dev, 0x50, &conf);
-	if (itdev->timing10)
-		conf |= channel ? 0x60: 0x18;
-	else
-		conf |= 1 << (3 + 2 * channel + unit);
-	pci_write_config_byte(dev, 0x50, conf);
-
-	it821x_clock_strategy(drive);
-	/* FIXME: do we need to program this ? */
-	/* it821x_program(drive, itdev->mwdma[unit]); */
-}
-
-/**
- *	it821x_tune_udma	-	tune a channel for UDMA
- *	@drive: drive to set up
- *	@mode_wanted: the target operating mode
- *
- *	Load the timing settings for this device mode into the
- *	controller when doing UDMA modes in pass through.
- */
-
-static void it821x_tune_udma(ide_drive_t *drive, u8 mode_wanted)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
-	u8 unit = drive->dn & 1, channel = hwif->channel, conf;
-
-	static u16 udma[]	= { 0x4433, 0x4231, 0x3121, 0x2121, 0x1111, 0x2211, 0x1111 };
-	static u8 udma_want[]	= { ATA_ANY, ATA_50, ATA_ANY, ATA_66, ATA_66, ATA_50, ATA_66 };
-
-	itdev->want[unit][1] = udma_want[mode_wanted];
-	itdev->want[unit][0] = 3;	/* UDMA is high priority */
-	itdev->mwdma[unit] = MWDMA_OFF;
-	itdev->udma[unit] = udma[mode_wanted];
-	if(mode_wanted >= 5)
-		itdev->udma[unit] |= 0x8080;	/* UDMA 5/6 select on */
-
-	/* UDMA on. Again revision 0x10 must do the pair */
-	pci_read_config_byte(dev, 0x50, &conf);
-	if (itdev->timing10)
-		conf &= channel ? 0x9F: 0xE7;
-	else
-		conf &= ~ (1 << (3 + 2 * channel + unit));
-	pci_write_config_byte(dev, 0x50, conf);
-
-	it821x_clock_strategy(drive);
-	it821x_program_udma(drive, itdev->udma[unit]);
-
-}
-
-/**
- *	it821x_dma_read	-	DMA hook
- *	@drive: drive for DMA
- *
- *	The IT821x has a single timing register for MWDMA and for PIO
- *	operations. As we flip back and forth we have to reload the
- *	clock. In addition the rev 0x10 device only works if the same
- *	timing value is loaded into the master and slave UDMA clock
- * 	so we must also reload that.
- *
- *	FIXME: we could figure out in advance if we need to do reloads
- */
-
-static void it821x_dma_start(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
-	u8 unit = drive->dn & 1;
-
-	if(itdev->mwdma[unit] != MWDMA_OFF)
-		it821x_program(drive, itdev->mwdma[unit]);
-	else if(itdev->udma[unit] != UDMA_OFF && itdev->timing10)
-		it821x_program_udma(drive, itdev->udma[unit]);
-	ide_dma_start(drive);
-}
-
-/**
- *	it821x_dma_write	-	DMA hook
- *	@drive: drive for DMA stop
- *
- *	The IT821x has a single timing register for MWDMA and for PIO
- *	operations. As we flip back and forth we have to reload the
- *	clock.
- */
-
-static int it821x_dma_end(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct it821x_dev *itdev = ide_get_hwifdata(hwif);
-	int ret = ide_dma_end(drive);
-	u8 unit = drive->dn & 1;
-
-	if(itdev->mwdma[unit] != MWDMA_OFF)
-		it821x_program(drive, itdev->pio[unit]);
-	return ret;
-}
-
-/**
- *	it821x_set_dma_mode	-	set host controller for DMA mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Tune the ITE chipset for the desired DMA mode.
- */
-
-static void it821x_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	const u8 speed = drive->dma_mode;
-
-	/*
-	 * MWDMA tuning is really hard because our MWDMA and PIO
-	 * timings are kept in the same place.  We can switch in the
-	 * host dma on/off callbacks.
-	 */
-	if (speed >= XFER_UDMA_0 && speed <= XFER_UDMA_6)
-		it821x_tune_udma(drive, speed - XFER_UDMA_0);
-	else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2)
-		it821x_tune_mwdma(drive, speed - XFER_MW_DMA_0);
-}
-
-/**
- *	it821x_cable_detect	-	cable detection
- *	@hwif: interface to check
- *
- *	Check for the presence of an ATA66 capable cable on the
- *	interface. Problematic as it seems some cards don't have
- *	the needed logic onboard.
- */
-
-static u8 it821x_cable_detect(ide_hwif_t *hwif)
-{
-	/* The reference driver also only does disk side */
-	return ATA_CBL_PATA80;
-}
-
-/**
- *	it821x_quirkproc	-	post init callback
- *	@drive: drive
- *
- *	This callback is run after the drive has been probed but
- *	before anything gets attached. It allows drivers to do any
- *	final tuning that is needed, or fixups to work around bugs.
- */
-
-static void it821x_quirkproc(ide_drive_t *drive)
-{
-	struct it821x_dev *itdev = ide_get_hwifdata(drive->hwif);
-	u16 *id = drive->id;
-
-	if (!itdev->smart) {
-		/*
-		 *	If we are in pass through mode then not much
-		 *	needs to be done, but we do bother to clear the
-		 *	IRQ mask as we may well be in PIO (eg rev 0x10)
-		 *	for now and we know unmasking is safe on this chipset.
-		 */
-		drive->dev_flags |= IDE_DFLAG_UNMASK;
-	} else {
-	/*
-	 *	Perform fixups on smart mode. We need to "lose" some
-	 *	capabilities the firmware lacks but does not filter, and
-	 *	also patch up some capability bits that it forgets to set
-	 *	in RAID mode.
-	 */
-
-		/* Check for RAID v native */
-		if (strstr((char *)&id[ATA_ID_PROD],
-			   "Integrated Technology Express")) {
-			/* In raid mode the ident block is slightly buggy
-			   We need to set the bits so that the IDE layer knows
-			   LBA28. LBA48 and DMA ar valid */
-			id[ATA_ID_CAPABILITY]    |= (3 << 8); /* LBA28, DMA */
-			id[ATA_ID_COMMAND_SET_2] |= 0x0400;   /* LBA48 valid */
-			id[ATA_ID_CFS_ENABLE_2]  |= 0x0400;   /* LBA48 on */
-			/* Reporting logic */
-			printk(KERN_INFO "%s: IT8212 %sRAID %d volume",
-				drive->name, id[147] ? "Bootable " : "",
-				id[ATA_ID_CSFO]);
-			if (id[ATA_ID_CSFO] != 1)
-				printk(KERN_CONT "(%dK stripe)", id[146]);
-			printk(KERN_CONT ".\n");
-		} else {
-			/* Non RAID volume. Fixups to stop the core code
-			   doing unsupported things */
-			id[ATA_ID_FIELD_VALID]	 &= 3;
-			id[ATA_ID_QUEUE_DEPTH]	  = 0;
-			id[ATA_ID_COMMAND_SET_1]  = 0;
-			id[ATA_ID_COMMAND_SET_2] &= 0xC400;
-			id[ATA_ID_CFSSE]	 &= 0xC000;
-			id[ATA_ID_CFS_ENABLE_1]	  = 0;
-			id[ATA_ID_CFS_ENABLE_2]	 &= 0xC400;
-			id[ATA_ID_CSF_DEFAULT]	 &= 0xC000;
-			id[127]			  = 0;
-			id[ATA_ID_DLF]		  = 0;
-			id[ATA_ID_CSFO]		  = 0;
-			id[ATA_ID_CFA_POWER]	  = 0;
-			printk(KERN_INFO "%s: Performing identify fixups.\n",
-				drive->name);
-		}
-
-		/*
-		 * Set MWDMA0 mode as enabled/support - just to tell
-		 * IDE core that DMA is supported (it821x hardware
-		 * takes care of DMA mode programming).
-		 */
-		if (ata_id_has_dma(id)) {
-			id[ATA_ID_MWDMA_MODES] |= 0x0101;
-			drive->current_speed = XFER_MW_DMA_0;
-		}
-	}
-
-}
-
-static const struct ide_dma_ops it821x_pass_through_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= it821x_dma_start,
-	.dma_end		= it821x_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-/**
- *	init_hwif_it821x	-	set up hwif structs
- *	@hwif: interface to set up
- *
- *	We do the basic set up of the interface structure. The IT8212
- *	requires several custom handlers so we override the default
- *	ide DMA handlers appropriately
- */
-
-static void init_hwif_it821x(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct it821x_dev *itdevs = host->host_priv;
-	struct it821x_dev *idev = itdevs + hwif->channel;
-	u8 conf;
-
-	ide_set_hwifdata(hwif, idev);
-
-	pci_read_config_byte(dev, 0x50, &conf);
-	if (conf & 1) {
-		idev->smart = 1;
-		hwif->host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
-		/* Long I/O's although allowed in LBA48 space cause the
-		   onboard firmware to enter the twighlight zone */
-		hwif->rqsize = 256;
-	}
-
-	/* Pull the current clocks from 0x50 also */
-	if (conf & (1 << (1 + hwif->channel)))
-		idev->clock_mode = ATA_50;
-	else
-		idev->clock_mode = ATA_66;
-
-	idev->want[0][1] = ATA_ANY;
-	idev->want[1][1] = ATA_ANY;
-
-	/*
-	 *	Not in the docs but according to the reference driver
-	 *	this is necessary.
-	 */
-
-	if (dev->revision == 0x10) {
-		idev->timing10 = 1;
-		hwif->host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
-		if (idev->smart == 0)
-			printk(KERN_WARNING DRV_NAME " %s: revision 0x10, "
-				"workarounds activated\n", pci_name(dev));
-	}
-
-	if (idev->smart == 0) {
-		/* MWDMA/PIO clock switching for pass through mode */
-		hwif->dma_ops = &it821x_pass_through_dma_ops;
-	} else
-		hwif->host_flags |= IDE_HFLAG_NO_SET_MODE;
-
-	if (hwif->dma_base == 0)
-		return;
-
-	hwif->ultra_mask = ATA_UDMA6;
-	hwif->mwdma_mask = ATA_MWDMA2;
-
-	/* Vortex86SX quirk: prevent Ultra-DMA mode to fix BadCRC issue */
-	if (idev->quirks & QUIRK_VORTEX86) {
-		if (dev->revision == 0x11)
-			hwif->ultra_mask = 0;
-	}
-}
-
-static void it8212_disable_raid(struct pci_dev *dev)
-{
-	/* Reset local CPU, and set BIOS not ready */
-	pci_write_config_byte(dev, 0x5E, 0x01);
-
-	/* Set to bypass mode, and reset PCI bus */
-	pci_write_config_byte(dev, 0x50, 0x00);
-	pci_write_config_word(dev, PCI_COMMAND,
-			      PCI_COMMAND_PARITY | PCI_COMMAND_IO |
-			      PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
-	pci_write_config_word(dev, 0x40, 0xA0F3);
-
-	pci_write_config_dword(dev,0x4C, 0x02040204);
-	pci_write_config_byte(dev, 0x42, 0x36);
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20);
-}
-
-static int init_chipset_it821x(struct pci_dev *dev)
-{
-	u8 conf;
-	static char *mode[2] = { "pass through", "smart" };
-
-	/* Force the card into bypass mode if so requested */
-	if (it8212_noraid) {
-		printk(KERN_INFO DRV_NAME " %s: forcing bypass mode\n",
-			pci_name(dev));
-		it8212_disable_raid(dev);
-	}
-	pci_read_config_byte(dev, 0x50, &conf);
-	printk(KERN_INFO DRV_NAME " %s: controller in %s mode\n",
-		pci_name(dev), mode[conf & 1]);
-	return 0;
-}
-
-static const struct ide_port_ops it821x_port_ops = {
-	/* it821x_set_{pio,dma}_mode() are only used in pass-through mode */
-	.set_pio_mode		= it821x_set_pio_mode,
-	.set_dma_mode		= it821x_set_dma_mode,
-	.quirkproc		= it821x_quirkproc,
-	.cable_detect		= it821x_cable_detect,
-};
-
-static const struct ide_port_info it821x_chipset = {
-	.name		= DRV_NAME,
-	.init_chipset	= init_chipset_it821x,
-	.init_hwif	= init_hwif_it821x,
-	.port_ops	= &it821x_port_ops,
-	.pio_mask	= ATA_PIO4,
-};
-
-/**
- *	it821x_init_one	-	pci layer discovery entry
- *	@dev: PCI device
- *	@id: ident table entry
- *
- *	Called by the PCI code when it finds an ITE821x controller.
- *	We then use the IDE PCI generic helper to do most of the work.
- */
-
-static int it821x_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct it821x_dev *itdevs;
-	int rc;
-
-	itdevs = kcalloc(2, sizeof(*itdevs), GFP_KERNEL);
-	if (itdevs == NULL) {
-		printk(KERN_ERR DRV_NAME " %s: out of memory\n", pci_name(dev));
-		return -ENOMEM;
-	}
-
-	itdevs->quirks = id->driver_data;
-
-	rc = ide_pci_init_one(dev, &it821x_chipset, itdevs);
-	if (rc)
-		kfree(itdevs);
-
-	return rc;
-}
-
-static void it821x_remove(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct it821x_dev *itdevs = host->host_priv;
-
-	ide_pci_remove(dev);
-	kfree(itdevs);
-}
-
-static const struct pci_device_id it821x_pci_tbl[] = {
-	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8211), 0 },
-	{ PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8212), 0 },
-	{ PCI_VDEVICE(RDC, PCI_DEVICE_ID_RDC_D1010), QUIRK_VORTEX86 },
-	{ 0, },
-};
-
-MODULE_DEVICE_TABLE(pci, it821x_pci_tbl);
-
-static struct pci_driver it821x_pci_driver = {
-	.name		= "ITE821x IDE",
-	.id_table	= it821x_pci_tbl,
-	.probe		= it821x_init_one,
-	.remove		= it821x_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init it821x_ide_init(void)
-{
-	return ide_pci_register_driver(&it821x_pci_driver);
-}
-
-static void __exit it821x_ide_exit(void)
-{
-	pci_unregister_driver(&it821x_pci_driver);
-}
-
-module_init(it821x_ide_init);
-module_exit(it821x_ide_exit);
-
-module_param_named(noraid, it8212_noraid, int, S_IRUGO);
-MODULE_PARM_DESC(noraid, "Force card into bypass mode");
-
-MODULE_AUTHOR("Alan Cox");
-MODULE_DESCRIPTION("PCI driver module for the ITE 821x");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/jmicron.c b/drivers/ide/jmicron.c
deleted file mode 100644
index ae6480dcbadf2..0000000000000
--- a/drivers/ide/jmicron.c
+++ /dev/null
@@ -1,176 +0,0 @@
-
-/*
- * Copyright (C) 2006		Red Hat
- *
- *  May be copied or modified under the terms of the GNU General Public License
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define DRV_NAME "jmicron"
-
-typedef enum {
-	PORT_PATA0 = 0,
-	PORT_PATA1 = 1,
-	PORT_SATA = 2,
-} port_type;
-
-/**
- *	jmicron_cable_detect	-	cable detection
- *	@hwif: IDE port
- *
- *	Returns the cable type.
- */
-
-static u8 jmicron_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-
-	u32 control;
-	u32 control5;
-
-	int port = hwif->channel;
-	port_type port_map[2];
-
-	pci_read_config_dword(pdev, 0x40, &control);
-
-	/* There are two basic mappings. One has the two SATA ports merged
-	   as master/slave and the secondary as PATA, the other has only the
-	   SATA port mapped */
-	if (control & (1 << 23)) {
-		port_map[0] = PORT_SATA;
-		port_map[1] = PORT_PATA0;
-	} else {
-		port_map[0] = PORT_SATA;
-		port_map[1] = PORT_SATA;
-	}
-
-	/* The 365/366 may have this bit set to map the second PATA port
-	   as the internal primary channel */
-	pci_read_config_dword(pdev, 0x80, &control5);
-	if (control5 & (1<<24))
-		port_map[0] = PORT_PATA1;
-
-	/* The two ports may then be logically swapped by the firmware */
-	if (control & (1 << 22))
-		port = port ^ 1;
-
-	/*
-	 *	Now we know which physical port we are talking about we can
-	 *	actually do our cable checking etc. Thankfully we don't need
-	 *	to do the plumbing for other cases.
-	 */
-	switch (port_map[port]) {
-	case PORT_PATA0:
-		if (control & (1 << 3))	/* 40/80 pin primary */
-			return ATA_CBL_PATA40;
-		return ATA_CBL_PATA80;
-	case PORT_PATA1:
-		if (control5 & (1 << 19))	/* 40/80 pin secondary */
-			return ATA_CBL_PATA40;
-		return ATA_CBL_PATA80;
-	case PORT_SATA:
-		break;
-	}
-	/* Avoid bogus "control reaches end of non-void function" */
-	return ATA_CBL_PATA80;
-}
-
-static void jmicron_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-}
-
-/**
- *	jmicron_set_dma_mode	-	set host controller for DMA mode
- *	@hwif: port
- *	@drive: drive
- *
- *	As the JMicron snoops for timings we don't need to do anything here.
- */
-
-static void jmicron_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-}
-
-static const struct ide_port_ops jmicron_port_ops = {
-	.set_pio_mode		= jmicron_set_pio_mode,
-	.set_dma_mode		= jmicron_set_dma_mode,
-	.cable_detect		= jmicron_cable_detect,
-};
-
-static const struct ide_port_info jmicron_chipset = {
-	.name		= DRV_NAME,
-	.enablebits	= { { 0x40, 0x01, 0x01 }, { 0x40, 0x10, 0x10 } },
-	.port_ops	= &jmicron_port_ops,
-	.pio_mask	= ATA_PIO5,
-	.mwdma_mask	= ATA_MWDMA2,
-	.udma_mask	= ATA_UDMA6,
-};
-
-/**
- *	jmicron_init_one	-	pci layer discovery entry
- *	@dev: PCI device
- *	@id: ident table entry
- *
- *	Called by the PCI code when it finds a Jmicron controller.
- *	We then use the IDE PCI generic helper to do most of the work.
- */
-
-static int jmicron_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &jmicron_chipset, NULL);
-}
-
-/* All JMB PATA controllers have and will continue to have the same
- * interface.  Matching vendor and device class is enough for all
- * current and future controllers if the controller is programmed
- * properly.
- *
- * If libata is configured, jmicron PCI quirk programs the controller
- * into the correct mode.  If libata isn't configured, match known
- * device IDs too to maintain backward compatibility.
- */
-static struct pci_device_id jmicron_pci_tbl[] = {
-#if !defined(CONFIG_ATA) && !defined(CONFIG_ATA_MODULE)
-	{ PCI_VDEVICE(JMICRON, PCI_DEVICE_ID_JMICRON_JMB361) },
-	{ PCI_VDEVICE(JMICRON, PCI_DEVICE_ID_JMICRON_JMB363) },
-	{ PCI_VDEVICE(JMICRON, PCI_DEVICE_ID_JMICRON_JMB365) },
-	{ PCI_VDEVICE(JMICRON, PCI_DEVICE_ID_JMICRON_JMB366) },
-	{ PCI_VDEVICE(JMICRON, PCI_DEVICE_ID_JMICRON_JMB368) },
-#endif
-	{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
-	  PCI_CLASS_STORAGE_IDE << 8, 0xffff00, 0 },
-	{ 0, },
-};
-
-MODULE_DEVICE_TABLE(pci, jmicron_pci_tbl);
-
-static struct pci_driver jmicron_pci_driver = {
-	.name		= "JMicron IDE",
-	.id_table	= jmicron_pci_tbl,
-	.probe		= jmicron_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init jmicron_ide_init(void)
-{
-	return ide_pci_register_driver(&jmicron_pci_driver);
-}
-
-static void __exit jmicron_ide_exit(void)
-{
-	pci_unregister_driver(&jmicron_pci_driver);
-}
-
-module_init(jmicron_ide_init);
-module_exit(jmicron_ide_exit);
-
-MODULE_AUTHOR("Alan Cox");
-MODULE_DESCRIPTION("PCI driver module for the JMicron in legacy modes");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/ns87415.c b/drivers/ide/ns87415.c
deleted file mode 100644
index 11a672aba6ee8..0000000000000
--- a/drivers/ide/ns87415.c
+++ /dev/null
@@ -1,350 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 1997-1998	Mark Lord <mlord@pobox.com>
- * Copyright (C) 1998		Eddie C. Dost <ecd@skynet.be>
- * Copyright (C) 1999-2000	Andre Hedrick <andre@linux-ide.org>
- * Copyright (C) 2004		Grant Grundler <grundler at parisc-linux.org>
- *
- * Inspired by an earlier effort from David S. Miller <davem@redhat.com>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "ns87415"
-
-#ifdef CONFIG_SUPERIO
-/* SUPERIO 87560 is a PoS chip that NatSem denies exists.
- * Unfortunately, it's built-in on all Astro-based PA-RISC workstations
- * which use the integrated NS87514 cell for CD-ROM support.
- * i.e we have to support for CD-ROM installs.
- * See drivers/parisc/superio.c for more gory details.
- */
-#include <asm/superio.h>
-
-#define SUPERIO_IDE_MAX_RETRIES 25
-
-/* Because of a defect in Super I/O, all reads of the PCI DMA status 
- * registers, IDE status register and the IDE select register need to be 
- * retried
- */
-static u8 superio_ide_inb (unsigned long port)
-{
-	u8 tmp;
-	int retries = SUPERIO_IDE_MAX_RETRIES;
-
-	/* printk(" [ reading port 0x%x with retry ] ", port); */
-
-	do {
-		tmp = inb(port);
-		if (tmp == 0)
-			udelay(50);
-	} while (tmp == 0 && retries-- > 0);
-
-	return tmp;
-}
-
-static u8 superio_read_status(ide_hwif_t *hwif)
-{
-	return superio_ide_inb(hwif->io_ports.status_addr);
-}
-
-static u8 superio_dma_sff_read_status(ide_hwif_t *hwif)
-{
-	return superio_ide_inb(hwif->dma_base + ATA_DMA_STATUS);
-}
-
-static void superio_tf_read(ide_drive_t *drive, struct ide_taskfile *tf,
-			    u8 valid)
-{
-	struct ide_io_ports *io_ports = &drive->hwif->io_ports;
-
-	if (valid & IDE_VALID_ERROR)
-		tf->error  = inb(io_ports->feature_addr);
-	if (valid & IDE_VALID_NSECT)
-		tf->nsect  = inb(io_ports->nsect_addr);
-	if (valid & IDE_VALID_LBAL)
-		tf->lbal   = inb(io_ports->lbal_addr);
-	if (valid & IDE_VALID_LBAM)
-		tf->lbam   = inb(io_ports->lbam_addr);
-	if (valid & IDE_VALID_LBAH)
-		tf->lbah   = inb(io_ports->lbah_addr);
-	if (valid & IDE_VALID_DEVICE)
-		tf->device = superio_ide_inb(io_ports->device_addr);
-}
-
-static void ns87415_dev_select(ide_drive_t *drive);
-
-static const struct ide_tp_ops superio_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= superio_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= ns87415_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= superio_tf_read,
-
-	.input_data		= ide_input_data,
-	.output_data		= ide_output_data,
-};
-
-static void superio_init_iops(struct hwif_s *hwif)
-{
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	u32 dma_stat;
-	u8 port = hwif->channel, tmp;
-
-	dma_stat = (pci_resource_start(pdev, 4) & ~3) + (!port ? 2 : 0xa);
-
-	/* Clear error/interrupt, enable dma */
-	tmp = superio_ide_inb(dma_stat);
-	outb(tmp | 0x66, dma_stat);
-}
-#else
-#define superio_dma_sff_read_status ide_dma_sff_read_status
-#endif
-
-static unsigned int ns87415_count = 0, ns87415_control[MAX_HWIFS] = { 0 };
-
-/*
- * This routine either enables/disables (according to IDE_DFLAG_PRESENT)
- * the IRQ associated with the port,
- * and selects either PIO or DMA handshaking for the next I/O operation.
- */
-static void ns87415_prepare_drive (ide_drive_t *drive, unsigned int use_dma)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	unsigned int bit, other, new, *old = (unsigned int *) hwif->select_data;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	new = *old;
-
-	/* Adjust IRQ enable bit */
-	bit = 1 << (8 + hwif->channel);
-
-	if (drive->dev_flags & IDE_DFLAG_PRESENT)
-		new &= ~bit;
-	else
-		new |= bit;
-
-	/* Select PIO or DMA, DMA may only be selected for one drive/channel. */
-	bit   = 1 << (20 + (drive->dn & 1) + (hwif->channel << 1));
-	other = 1 << (20 + (1 - (drive->dn & 1)) + (hwif->channel << 1));
-	new = use_dma ? ((new & ~other) | bit) : (new & ~bit);
-
-	if (new != *old) {
-		unsigned char stat;
-
-		/*
-		 * Don't change DMA engine settings while Write Buffers
-		 * are busy.
-		 */
-		(void) pci_read_config_byte(dev, 0x43, &stat);
-		while (stat & 0x03) {
-			udelay(1);
-			(void) pci_read_config_byte(dev, 0x43, &stat);
-		}
-
-		*old = new;
-		(void) pci_write_config_dword(dev, 0x40, new);
-
-		/*
-		 * And let things settle...
-		 */
-		udelay(10);
-	}
-
-	local_irq_restore(flags);
-}
-
-static void ns87415_dev_select(ide_drive_t *drive)
-{
-	ns87415_prepare_drive(drive,
-			      !!(drive->dev_flags & IDE_DFLAG_USING_DMA));
-
-	outb(drive->select | ATA_DEVICE_OBS, drive->hwif->io_ports.device_addr);
-}
-
-static void ns87415_dma_start(ide_drive_t *drive)
-{
-	ns87415_prepare_drive(drive, 1);
-	ide_dma_start(drive);
-}
-
-static int ns87415_dma_end(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat = 0, dma_cmd = 0;
-
-	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
-	/* get DMA command mode */
-	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
-	/* stop DMA */
-	outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD);
-	/* from ERRATA: clear the INTR & ERROR bits */
-	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
-	outb(dma_cmd | 6, hwif->dma_base + ATA_DMA_CMD);
-
-	ns87415_prepare_drive(drive, 0);
-
-	/* verify good DMA status */
-	return (dma_stat & 7) != 4;
-}
-
-static void init_hwif_ns87415 (ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	unsigned int ctrl, using_inta;
-	u8 progif;
-#ifdef __sparc_v9__
-	int timeout;
-	u8 stat;
-#endif
-
-	/*
-	 * We cannot probe for IRQ: both ports share common IRQ on INTA.
-	 * Also, leave IRQ masked during drive probing, to prevent infinite
-	 * interrupts from a potentially floating INTA..
-	 *
-	 * IRQs get unmasked in dev_select() when drive is first used.
-	 */
-	(void) pci_read_config_dword(dev, 0x40, &ctrl);
-	(void) pci_read_config_byte(dev, 0x09, &progif);
-	/* is irq in "native" mode? */
-	using_inta = progif & (1 << (hwif->channel << 1));
-	if (!using_inta)
-		using_inta = ctrl & (1 << (4 + hwif->channel));
-	if (hwif->mate) {
-		hwif->select_data = hwif->mate->select_data;
-	} else {
-		hwif->select_data = (unsigned long)
-					&ns87415_control[ns87415_count++];
-		ctrl |= (1 << 8) | (1 << 9);	/* mask both IRQs */
-		if (using_inta)
-			ctrl &= ~(1 << 6);	/* unmask INTA */
-		*((unsigned int *)hwif->select_data) = ctrl;
-		(void) pci_write_config_dword(dev, 0x40, ctrl);
-
-		/*
-		 * Set prefetch size to 512 bytes for both ports,
-		 * but don't turn on/off prefetching here.
-		 */
-		pci_write_config_byte(dev, 0x55, 0xee);
-
-#ifdef __sparc_v9__
-		/*
-		 * XXX: Reset the device, if we don't it will not respond to
-		 *      dev_select() properly during first ide_probe_port().
-		 */
-		timeout = 10000;
-		outb(12, hwif->io_ports.ctl_addr);
-		udelay(10);
-		outb(8, hwif->io_ports.ctl_addr);
-		do {
-			udelay(50);
-			stat = hwif->tp_ops->read_status(hwif);
-			if (stat == 0xff)
-				break;
-		} while ((stat & ATA_BUSY) && --timeout);
-#endif
-	}
-
-	if (!using_inta)
-		hwif->irq = pci_get_legacy_ide_irq(dev, hwif->channel);
-
-	if (!hwif->dma_base)
-		return;
-
-	outb(0x60, hwif->dma_base + ATA_DMA_STATUS);
-}
-
-static const struct ide_tp_ops ns87415_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= ns87415_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= ide_input_data,
-	.output_data		= ide_output_data,
-};
-
-static const struct ide_dma_ops ns87415_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ns87415_dma_start,
-	.dma_end		= ns87415_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= superio_dma_sff_read_status,
-};
-
-static const struct ide_port_info ns87415_chipset = {
-	.name		= DRV_NAME,
-	.init_hwif	= init_hwif_ns87415,
-	.tp_ops 	= &ns87415_tp_ops,
-	.dma_ops	= &ns87415_dma_ops,
-	.host_flags	= IDE_HFLAG_TRUST_BIOS_FOR_DMA |
-			  IDE_HFLAG_NO_ATAPI_DMA,
-};
-
-static int ns87415_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct ide_port_info d = ns87415_chipset;
-
-#ifdef CONFIG_SUPERIO
-	if (PCI_SLOT(dev->devfn) == 0xE) {
-		/* Built-in - assume it's under superio. */
-		d.init_iops = superio_init_iops;
-		d.tp_ops = &superio_tp_ops;
-	}
-#endif
-	return ide_pci_init_one(dev, &d, NULL);
-}
-
-static const struct pci_device_id ns87415_pci_tbl[] = {
-	{ PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_87415), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, ns87415_pci_tbl);
-
-static struct pci_driver ns87415_pci_driver = {
-	.name		= "NS87415_IDE",
-	.id_table	= ns87415_pci_tbl,
-	.probe		= ns87415_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init ns87415_ide_init(void)
-{
-	return ide_pci_register_driver(&ns87415_pci_driver);
-}
-
-static void __exit ns87415_ide_exit(void)
-{
-	pci_unregister_driver(&ns87415_pci_driver);
-}
-
-module_init(ns87415_ide_init);
-module_exit(ns87415_ide_exit);
-
-MODULE_AUTHOR("Mark Lord, Eddie Dost, Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for NS87415 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/opti621.c b/drivers/ide/opti621.c
deleted file mode 100644
index c374f82333c6d..0000000000000
--- a/drivers/ide/opti621.c
+++ /dev/null
@@ -1,179 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1996-1998  Linus Torvalds & authors (see below)
- */
-
-/*
- * Authors:
- * Jaromir Koutek <miri@punknet.cz>,
- * Jan Harkes <jaharkes@cwi.nl>,
- * Mark Lord <mlord@pobox.com>
- * Some parts of code are from ali14xx.c and from rz1000.c.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "opti621"
-
-#define READ_REG 0	/* index of Read cycle timing register */
-#define WRITE_REG 1	/* index of Write cycle timing register */
-#define CNTRL_REG 3	/* index of Control register */
-#define STRAP_REG 5	/* index of Strap register */
-#define MISC_REG 6	/* index of Miscellaneous register */
-
-static int reg_base;
-
-static DEFINE_SPINLOCK(opti621_lock);
-
-/* Write value to register reg, base of register
- * is at reg_base (0x1f0 primary, 0x170 secondary,
- * if not changed by PCI configuration).
- * This is from setupvic.exe program.
- */
-static void write_reg(u8 value, int reg)
-{
-	inw(reg_base + 1);
-	inw(reg_base + 1);
-	outb(3, reg_base + 2);
-	outb(value, reg_base + reg);
-	outb(0x83, reg_base + 2);
-}
-
-/* Read value from register reg, base of register
- * is at reg_base (0x1f0 primary, 0x170 secondary,
- * if not changed by PCI configuration).
- * This is from setupvic.exe program.
- */
-static u8 read_reg(int reg)
-{
-	u8 ret = 0;
-
-	inw(reg_base + 1);
-	inw(reg_base + 1);
-	outb(3, reg_base + 2);
-	ret = inb(reg_base + reg);
-	outb(0x83, reg_base + 2);
-
-	return ret;
-}
-
-static void opti621_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	ide_drive_t *pair = ide_get_pair_dev(drive);
-	unsigned long flags;
-	unsigned long mode = drive->pio_mode, pair_mode;
-	const u8 pio = mode - XFER_PIO_0;
-	u8 tim, misc, addr_pio = pio, clk;
-
-	/* DRDY is default 2 (by OPTi Databook) */
-	static const u8 addr_timings[2][5] = {
-		{ 0x20, 0x10, 0x00, 0x00, 0x00 },	/* 33 MHz */
-		{ 0x10, 0x10, 0x00, 0x00, 0x00 },	/* 25 MHz */
-	};
-	static const u8 data_rec_timings[2][5] = {
-		{ 0x5b, 0x45, 0x32, 0x21, 0x20 },	/* 33 MHz */
-		{ 0x48, 0x34, 0x21, 0x10, 0x10 }	/* 25 MHz */
-	};
-
-	ide_set_drivedata(drive, (void *)mode);
-
-	if (pair) {
-		pair_mode = (unsigned long)ide_get_drivedata(pair);
-		if (pair_mode && pair_mode < mode)
-			addr_pio = pair_mode - XFER_PIO_0;
-	}
-
-	spin_lock_irqsave(&opti621_lock, flags);
-
-	reg_base = hwif->io_ports.data_addr;
-
-	/* allow Register-B */
-	outb(0xc0, reg_base + CNTRL_REG);
-	/* hmm, setupvic.exe does this ;-) */
-	outb(0xff, reg_base + 5);
-	/* if reads 0xff, adapter not exist? */
-	(void)inb(reg_base + CNTRL_REG);
-	/* if reads 0xc0, no interface exist? */
-	read_reg(CNTRL_REG);
-
-	/* check CLK speed */
-	clk = read_reg(STRAP_REG) & 1;
-
-	printk(KERN_INFO "%s: CLK = %d MHz\n", hwif->name, clk ? 25 : 33);
-
-	tim  = data_rec_timings[clk][pio];
-	misc = addr_timings[clk][addr_pio];
-
-	/* select Index-0/1 for Register-A/B */
-	write_reg(drive->dn & 1, MISC_REG);
-	/* set read cycle timings */
-	write_reg(tim, READ_REG);
-	/* set write cycle timings */
-	write_reg(tim, WRITE_REG);
-
-	/* use Register-A for drive 0 */
-	/* use Register-B for drive 1 */
-	write_reg(0x85, CNTRL_REG);
-
-	/* set address setup, DRDY timings,   */
-	/*  and read prefetch for both drives */
-	write_reg(misc, MISC_REG);
-
-	spin_unlock_irqrestore(&opti621_lock, flags);
-}
-
-static const struct ide_port_ops opti621_port_ops = {
-	.set_pio_mode		= opti621_set_pio_mode,
-};
-
-static const struct ide_port_info opti621_chipset = {
-	.name		= DRV_NAME,
-	.enablebits	= { {0x45, 0x80, 0x00}, {0x40, 0x08, 0x00} },
-	.port_ops	= &opti621_port_ops,
-	.host_flags	= IDE_HFLAG_NO_DMA,
-	.pio_mask	= ATA_PIO4,
-};
-
-static int opti621_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &opti621_chipset, NULL);
-}
-
-static const struct pci_device_id opti621_pci_tbl[] = {
-	{ PCI_VDEVICE(OPTI, PCI_DEVICE_ID_OPTI_82C621), 0 },
-	{ PCI_VDEVICE(OPTI, PCI_DEVICE_ID_OPTI_82C825), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, opti621_pci_tbl);
-
-static struct pci_driver opti621_pci_driver = {
-	.name		= "Opti621_IDE",
-	.id_table	= opti621_pci_tbl,
-	.probe		= opti621_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init opti621_ide_init(void)
-{
-	return ide_pci_register_driver(&opti621_pci_driver);
-}
-
-static void __exit opti621_ide_exit(void)
-{
-	pci_unregister_driver(&opti621_pci_driver);
-}
-
-module_init(opti621_ide_init);
-module_exit(opti621_ide_exit);
-
-MODULE_AUTHOR("Jaromir Koutek, Jan Harkes, Mark Lord");
-MODULE_DESCRIPTION("PCI driver module for Opti621 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/palm_bk3710.c b/drivers/ide/palm_bk3710.c
deleted file mode 100644
index d1fe4c13e35c2..0000000000000
--- a/drivers/ide/palm_bk3710.c
+++ /dev/null
@@ -1,387 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Palmchip bk3710 IDE controller
- *
- * Copyright (C) 2006 Texas Instruments.
- * Copyright (C) 2007 MontaVista Software, Inc., <source@mvista.com>
- *
- * ----------------------------------------------------------------------------
- *
- * ----------------------------------------------------------------------------
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ioport.h>
-#include <linux/ide.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/clk.h>
-#include <linux/platform_device.h>
-
-/* Offset of the primary interface registers */
-#define IDE_PALM_ATA_PRI_REG_OFFSET 0x1F0
-
-/* Primary Control Offset */
-#define IDE_PALM_ATA_PRI_CTL_OFFSET 0x3F6
-
-#define BK3710_BMICP		0x00
-#define BK3710_BMISP		0x02
-#define BK3710_BMIDTP		0x04
-#define BK3710_IDETIMP		0x40
-#define BK3710_IDESTATUS	0x47
-#define BK3710_UDMACTL		0x48
-#define BK3710_MISCCTL		0x50
-#define BK3710_REGSTB		0x54
-#define BK3710_REGRCVR		0x58
-#define BK3710_DATSTB		0x5C
-#define BK3710_DATRCVR		0x60
-#define BK3710_DMASTB		0x64
-#define BK3710_DMARCVR		0x68
-#define BK3710_UDMASTB		0x6C
-#define BK3710_UDMATRP		0x70
-#define BK3710_UDMAENV		0x74
-#define BK3710_IORDYTMP		0x78
-
-static unsigned ideclk_period; /* in nanoseconds */
-
-struct palm_bk3710_udmatiming {
-	unsigned int rptime;	/* tRP -- Ready to pause time (nsec) */
-	unsigned int cycletime;	/* tCYCTYP2/2 -- avg Cycle Time (nsec) */
-				/* tENV is always a minimum of 20 nsec */
-};
-
-static const struct palm_bk3710_udmatiming palm_bk3710_udmatimings[6] = {
-	{ 160, 240 / 2 },	/* UDMA Mode 0 */
-	{ 125, 160 / 2 },	/* UDMA Mode 1 */
-	{ 100, 120 / 2 },	/* UDMA Mode 2 */
-	{ 100,  90 / 2 },	/* UDMA Mode 3 */
-	{ 100,  60 / 2 },	/* UDMA Mode 4 */
-	{  85,  40 / 2 },	/* UDMA Mode 5 */
-};
-
-static void palm_bk3710_setudmamode(void __iomem *base, unsigned int dev,
-				    unsigned int mode)
-{
-	u8 tenv, trp, t0;
-	u32 val32;
-	u16 val16;
-
-	/* DMA Data Setup */
-	t0 = DIV_ROUND_UP(palm_bk3710_udmatimings[mode].cycletime,
-			  ideclk_period) - 1;
-	tenv = DIV_ROUND_UP(20, ideclk_period) - 1;
-	trp = DIV_ROUND_UP(palm_bk3710_udmatimings[mode].rptime,
-			   ideclk_period) - 1;
-
-	/* udmastb Ultra DMA Access Strobe Width */
-	val32 = readl(base + BK3710_UDMASTB) & (0xFF << (dev ? 0 : 8));
-	val32 |= (t0 << (dev ? 8 : 0));
-	writel(val32, base + BK3710_UDMASTB);
-
-	/* udmatrp Ultra DMA Ready to Pause Time */
-	val32 = readl(base + BK3710_UDMATRP) & (0xFF << (dev ? 0 : 8));
-	val32 |= (trp << (dev ? 8 : 0));
-	writel(val32, base + BK3710_UDMATRP);
-
-	/* udmaenv Ultra DMA envelop Time */
-	val32 = readl(base + BK3710_UDMAENV) & (0xFF << (dev ? 0 : 8));
-	val32 |= (tenv << (dev ? 8 : 0));
-	writel(val32, base + BK3710_UDMAENV);
-
-	/* Enable UDMA for Device */
-	val16 = readw(base + BK3710_UDMACTL) | (1 << dev);
-	writew(val16, base + BK3710_UDMACTL);
-}
-
-static void palm_bk3710_setdmamode(void __iomem *base, unsigned int dev,
-				   unsigned short min_cycle,
-				   unsigned int mode)
-{
-	u8 td, tkw, t0;
-	u32 val32;
-	u16 val16;
-	struct ide_timing *t;
-	int cycletime;
-
-	t = ide_timing_find_mode(mode);
-	cycletime = max_t(int, t->cycle, min_cycle);
-
-	/* DMA Data Setup */
-	t0 = DIV_ROUND_UP(cycletime, ideclk_period);
-	td = DIV_ROUND_UP(t->active, ideclk_period);
-	tkw = t0 - td - 1;
-	td -= 1;
-
-	val32 = readl(base + BK3710_DMASTB) & (0xFF << (dev ? 0 : 8));
-	val32 |= (td << (dev ? 8 : 0));
-	writel(val32, base + BK3710_DMASTB);
-
-	val32 = readl(base + BK3710_DMARCVR) & (0xFF << (dev ? 0 : 8));
-	val32 |= (tkw << (dev ? 8 : 0));
-	writel(val32, base + BK3710_DMARCVR);
-
-	/* Disable UDMA for Device */
-	val16 = readw(base + BK3710_UDMACTL) & ~(1 << dev);
-	writew(val16, base + BK3710_UDMACTL);
-}
-
-static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate,
-				   unsigned int dev, unsigned int cycletime,
-				   unsigned int mode)
-{
-	u8 t2, t2i, t0;
-	u32 val32;
-	struct ide_timing *t;
-
-	t = ide_timing_find_mode(XFER_PIO_0 + mode);
-
-	/* PIO Data Setup */
-	t0 = DIV_ROUND_UP(cycletime, ideclk_period);
-	t2 = DIV_ROUND_UP(t->active, ideclk_period);
-
-	t2i = t0 - t2 - 1;
-	t2 -= 1;
-
-	val32 = readl(base + BK3710_DATSTB) & (0xFF << (dev ? 0 : 8));
-	val32 |= (t2 << (dev ? 8 : 0));
-	writel(val32, base + BK3710_DATSTB);
-
-	val32 = readl(base + BK3710_DATRCVR) & (0xFF << (dev ? 0 : 8));
-	val32 |= (t2i << (dev ? 8 : 0));
-	writel(val32, base + BK3710_DATRCVR);
-
-	if (mate) {
-		u8 mode2 = mate->pio_mode - XFER_PIO_0;
-
-		if (mode2 < mode)
-			mode = mode2;
-	}
-
-	/* TASKFILE Setup */
-	t0 = DIV_ROUND_UP(t->cyc8b, ideclk_period);
-	t2 = DIV_ROUND_UP(t->act8b, ideclk_period);
-
-	t2i = t0 - t2 - 1;
-	t2 -= 1;
-
-	val32 = readl(base + BK3710_REGSTB) & (0xFF << (dev ? 0 : 8));
-	val32 |= (t2 << (dev ? 8 : 0));
-	writel(val32, base + BK3710_REGSTB);
-
-	val32 = readl(base + BK3710_REGRCVR) & (0xFF << (dev ? 0 : 8));
-	val32 |= (t2i << (dev ? 8 : 0));
-	writel(val32, base + BK3710_REGRCVR);
-}
-
-static void palm_bk3710_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	int is_slave = drive->dn & 1;
-	void __iomem *base = (void __iomem *)hwif->dma_base;
-	const u8 xferspeed = drive->dma_mode;
-
-	if (xferspeed >= XFER_UDMA_0) {
-		palm_bk3710_setudmamode(base, is_slave,
-					xferspeed - XFER_UDMA_0);
-	} else {
-		palm_bk3710_setdmamode(base, is_slave,
-				       drive->id[ATA_ID_EIDE_DMA_MIN],
-				       xferspeed);
-	}
-}
-
-static void palm_bk3710_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	unsigned int cycle_time;
-	int is_slave = drive->dn & 1;
-	ide_drive_t *mate;
-	void __iomem *base = (void __iomem *)hwif->dma_base;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	/*
-	 * Obtain the drive PIO data for tuning the Palm Chip registers
-	 */
-	cycle_time = ide_pio_cycle_time(drive, pio);
-	mate = ide_get_pair_dev(drive);
-	palm_bk3710_setpiomode(base, mate, is_slave, cycle_time, pio);
-}
-
-static void palm_bk3710_chipinit(void __iomem *base)
-{
-	/*
-	 * REVISIT:  the ATA reset signal needs to be managed through a
-	 * GPIO, which means it should come from platform_data.  Until
-	 * we get and use such information, we have to trust that things
-	 * have been reset before we get here.
-	 */
-
-	/*
-	 * Program the IDETIMP Register Value based on the following assumptions
-	 *
-	 * (ATA_IDETIMP_IDEEN		, ENABLE ) |
-	 * (ATA_IDETIMP_PREPOST1	, DISABLE) |
-	 * (ATA_IDETIMP_PREPOST0	, DISABLE) |
-	 *
-	 * DM6446 silicon rev 2.1 and earlier have no observed net benefit
-	 * from enabling prefetch/postwrite.
-	 */
-	writew(BIT(15), base + BK3710_IDETIMP);
-
-	/*
-	 * UDMACTL Ultra-ATA DMA Control
-	 * (ATA_UDMACTL_UDMAP1	, 0 ) |
-	 * (ATA_UDMACTL_UDMAP0	, 0 )
-	 *
-	 */
-	writew(0, base + BK3710_UDMACTL);
-
-	/*
-	 * MISCCTL Miscellaneous Conrol Register
-	 * (ATA_MISCCTL_HWNHLD1P	, 1 cycle)
-	 * (ATA_MISCCTL_HWNHLD0P	, 1 cycle)
-	 * (ATA_MISCCTL_TIMORIDE	, 1)
-	 */
-	writel(0x001, base + BK3710_MISCCTL);
-
-	/*
-	 * IORDYTMP IORDY Timer for Primary Register
-	 * (ATA_IORDYTMP_IORDYTMP     , 0xffff  )
-	 */
-	writel(0xFFFF, base + BK3710_IORDYTMP);
-
-	/*
-	 * Configure BMISP Register
-	 * (ATA_BMISP_DMAEN1	, DISABLE )	|
-	 * (ATA_BMISP_DMAEN0	, DISABLE )	|
-	 * (ATA_BMISP_IORDYINT	, CLEAR)	|
-	 * (ATA_BMISP_INTRSTAT	, CLEAR)	|
-	 * (ATA_BMISP_DMAERROR	, CLEAR)
-	 */
-	writew(0, base + BK3710_BMISP);
-
-	palm_bk3710_setpiomode(base, NULL, 0, 600, 0);
-	palm_bk3710_setpiomode(base, NULL, 1, 600, 0);
-}
-
-static u8 palm_bk3710_cable_detect(ide_hwif_t *hwif)
-{
-	return ATA_CBL_PATA80;
-}
-
-static int palm_bk3710_init_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-	printk(KERN_INFO "    %s: MMIO-DMA\n", hwif->name);
-
-	if (ide_allocate_dma_engine(hwif))
-		return -1;
-
-	hwif->dma_base = hwif->io_ports.data_addr - IDE_PALM_ATA_PRI_REG_OFFSET;
-
-	return 0;
-}
-
-static const struct ide_port_ops palm_bk3710_ports_ops = {
-	.set_pio_mode		= palm_bk3710_set_pio_mode,
-	.set_dma_mode		= palm_bk3710_set_dma_mode,
-	.cable_detect		= palm_bk3710_cable_detect,
-};
-
-static struct ide_port_info palm_bk3710_port_info __initdata = {
-	.init_dma		= palm_bk3710_init_dma,
-	.port_ops		= &palm_bk3710_ports_ops,
-	.dma_ops		= &sff_dma_ops,
-	.host_flags		= IDE_HFLAG_MMIO,
-	.pio_mask		= ATA_PIO4,
-	.mwdma_mask		= ATA_MWDMA2,
-	.chipset		= ide_palm3710,
-};
-
-static int __init palm_bk3710_probe(struct platform_device *pdev)
-{
-	struct clk *clk;
-	struct resource *mem, *irq;
-	void __iomem *base;
-	unsigned long rate, mem_size;
-	int i, rc;
-	struct ide_hw hw, *hws[] = { &hw };
-
-	clk = clk_get(&pdev->dev, NULL);
-	if (IS_ERR(clk))
-		return -ENODEV;
-
-	clk_enable(clk);
-	rate = clk_get_rate(clk);
-	if (!rate)
-		return -EINVAL;
-
-	/* NOTE:  round *down* to meet minimum timings; we count in clocks */
-	ideclk_period = 1000000000UL / rate;
-
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (mem == NULL) {
-		printk(KERN_ERR "failed to get memory region resource\n");
-		return -ENODEV;
-	}
-
-	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (irq == NULL) {
-		printk(KERN_ERR "failed to get IRQ resource\n");
-		return -ENODEV;
-	}
-
-	mem_size = resource_size(mem);
-	if (request_mem_region(mem->start, mem_size, "palm_bk3710") == NULL) {
-		printk(KERN_ERR "failed to request memory region\n");
-		return -EBUSY;
-	}
-
-	base = ioremap(mem->start, mem_size);
-	if (!base) {
-		printk(KERN_ERR "failed to map IO memory\n");
-		release_mem_region(mem->start, mem_size);
-		return -ENOMEM;
-	}
-
-	/* Configure the Palm Chip controller */
-	palm_bk3710_chipinit(base);
-
-	memset(&hw, 0, sizeof(hw));
-	for (i = 0; i < IDE_NR_PORTS - 2; i++)
-		hw.io_ports_array[i] = (unsigned long)
-				(base + IDE_PALM_ATA_PRI_REG_OFFSET + i);
-	hw.io_ports.ctl_addr = (unsigned long)
-			(base + IDE_PALM_ATA_PRI_CTL_OFFSET);
-	hw.irq = irq->start;
-	hw.dev = &pdev->dev;
-
-	palm_bk3710_port_info.udma_mask = rate < 100000000 ? ATA_UDMA4 :
-							     ATA_UDMA5;
-
-	/* Register the IDE interface with Linux */
-	rc = ide_host_add(&palm_bk3710_port_info, hws, 1, NULL);
-	if (rc)
-		goto out;
-
-	return 0;
-out:
-	printk(KERN_WARNING "Palm Chip BK3710 IDE Register Fail\n");
-	return rc;
-}
-
-/* work with hotplug and coldplug */
-MODULE_ALIAS("platform:palm_bk3710");
-
-static struct platform_driver platform_bk_driver = {
-	.driver = {
-		.name = "palm_bk3710",
-	},
-};
-
-static int __init palm_bk3710_init(void)
-{
-	return platform_driver_probe(&platform_bk_driver, palm_bk3710_probe);
-}
-
-module_init(palm_bk3710_init);
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/pdc202xx_new.c b/drivers/ide/pdc202xx_new.c
deleted file mode 100644
index 4fcafb9121e00..0000000000000
--- a/drivers/ide/pdc202xx_new.c
+++ /dev/null
@@ -1,557 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  Promise TX2/TX4/TX2000/133 IDE driver
- *
- *  Split from:
- *  linux/drivers/ide/pdc202xx.c	Version 0.35	Mar. 30, 2002
- *  Copyright (C) 1998-2002		Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2005-2007		MontaVista Software, Inc.
- *  Portions Copyright (C) 1999 Promise Technology, Inc.
- *  Author: Frank Tiernan (frankt@promise.com)
- *  Released under terms of General Public License
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-#include <linux/ktime.h>
-
-#include <asm/io.h>
-
-#ifdef CONFIG_PPC_PMAC
-#include <asm/prom.h>
-#endif
-
-#define DRV_NAME "pdc202xx_new"
-
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(fmt, args...) printk("%s: " fmt, __func__, ## args)
-#else
-#define DBG(fmt, args...)
-#endif
-
-static u8 max_dma_rate(struct pci_dev *pdev)
-{
-	u8 mode;
-
-	switch(pdev->device) {
-		case PCI_DEVICE_ID_PROMISE_20277:
-		case PCI_DEVICE_ID_PROMISE_20276:
-		case PCI_DEVICE_ID_PROMISE_20275:
-		case PCI_DEVICE_ID_PROMISE_20271:
-		case PCI_DEVICE_ID_PROMISE_20269:
-			mode = 4;
-			break;
-		case PCI_DEVICE_ID_PROMISE_20270:
-		case PCI_DEVICE_ID_PROMISE_20268:
-			mode = 3;
-			break;
-		default:
-			return 0;
-	}
-
-	return mode;
-}
-
-/**
- * get_indexed_reg - Get indexed register
- * @hwif: for the port address
- * @index: index of the indexed register
- */
-static u8 get_indexed_reg(ide_hwif_t *hwif, u8 index)
-{
-	u8 value;
-
-	outb(index, hwif->dma_base + 1);
-	value = inb(hwif->dma_base + 3);
-
-	DBG("index[%02X] value[%02X]\n", index, value);
-	return value;
-}
-
-/**
- * set_indexed_reg - Set indexed register
- * @hwif: for the port address
- * @index: index of the indexed register
- */
-static void set_indexed_reg(ide_hwif_t *hwif, u8 index, u8 value)
-{
-	outb(index, hwif->dma_base + 1);
-	outb(value, hwif->dma_base + 3);
-	DBG("index[%02X] value[%02X]\n", index, value);
-}
-
-/*
- * ATA Timing Tables based on 133 MHz PLL output clock.
- *
- * If the PLL outputs 100 MHz clock, the ASIC hardware will set
- * the timing registers automatically when "set features" command is
- * issued to the device. However, if the PLL output clock is 133 MHz,
- * the following tables must be used.
- */
-static struct pio_timing {
-	u8 reg0c, reg0d, reg13;
-} pio_timings [] = {
-	{ 0xfb, 0x2b, 0xac },	/* PIO mode 0, IORDY off, Prefetch off */
-	{ 0x46, 0x29, 0xa4 },	/* PIO mode 1, IORDY off, Prefetch off */
-	{ 0x23, 0x26, 0x64 },	/* PIO mode 2, IORDY off, Prefetch off */
-	{ 0x27, 0x0d, 0x35 },	/* PIO mode 3, IORDY on,  Prefetch off */
-	{ 0x23, 0x09, 0x25 },	/* PIO mode 4, IORDY on,  Prefetch off */
-};
-
-static struct mwdma_timing {
-	u8 reg0e, reg0f;
-} mwdma_timings [] = {
-	{ 0xdf, 0x5f }, 	/* MWDMA mode 0 */
-	{ 0x6b, 0x27 }, 	/* MWDMA mode 1 */
-	{ 0x69, 0x25 }, 	/* MWDMA mode 2 */
-};
-
-static struct udma_timing {
-	u8 reg10, reg11, reg12;
-} udma_timings [] = {
-	{ 0x4a, 0x0f, 0xd5 },	/* UDMA mode 0 */
-	{ 0x3a, 0x0a, 0xd0 },	/* UDMA mode 1 */
-	{ 0x2a, 0x07, 0xcd },	/* UDMA mode 2 */
-	{ 0x1a, 0x05, 0xcd },	/* UDMA mode 3 */
-	{ 0x1a, 0x03, 0xcd },	/* UDMA mode 4 */
-	{ 0x1a, 0x02, 0xcb },	/* UDMA mode 5 */
-	{ 0x1a, 0x01, 0xcb },	/* UDMA mode 6 */
-};
-
-static void pdcnew_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u8 adj			= (drive->dn & 1) ? 0x08 : 0x00;
-	const u8 speed		= drive->dma_mode;
-
-	/*
-	 * IDE core issues SETFEATURES_XFER to the drive first (thanks to
-	 * IDE_HFLAG_POST_SET_MODE in ->host_flags).  PDC202xx hardware will
-	 * automatically set the timing registers based on 100 MHz PLL output.
-	 *
-	 * As we set up the PLL to output 133 MHz for UltraDMA/133 capable
-	 * chips, we must override the default register settings...
-	 */
-	if (max_dma_rate(dev) == 4) {
-		u8 mode = speed & 0x07;
-
-		if (speed >= XFER_UDMA_0) {
-			set_indexed_reg(hwif, 0x10 + adj,
-					udma_timings[mode].reg10);
-			set_indexed_reg(hwif, 0x11 + adj,
-					udma_timings[mode].reg11);
-			set_indexed_reg(hwif, 0x12 + adj,
-					udma_timings[mode].reg12);
-		} else {
-			set_indexed_reg(hwif, 0x0e + adj,
-					mwdma_timings[mode].reg0e);
-			set_indexed_reg(hwif, 0x0f + adj,
-					mwdma_timings[mode].reg0f);
-		}
-	} else if (speed == XFER_UDMA_2) {
-		/* Set tHOLD bit to 0 if using UDMA mode 2 */
-		u8 tmp = get_indexed_reg(hwif, 0x10 + adj);
-
-		set_indexed_reg(hwif, 0x10 + adj, tmp & 0x7f);
- 	}
-}
-
-static void pdcnew_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	u8 adj = (drive->dn & 1) ? 0x08 : 0x00;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	if (max_dma_rate(dev) == 4) {
-		set_indexed_reg(hwif, 0x0c + adj, pio_timings[pio].reg0c);
-		set_indexed_reg(hwif, 0x0d + adj, pio_timings[pio].reg0d);
-		set_indexed_reg(hwif, 0x13 + adj, pio_timings[pio].reg13);
-	}
-}
-
-static u8 pdcnew_cable_detect(ide_hwif_t *hwif)
-{
-	if (get_indexed_reg(hwif, 0x0b) & 0x04)
-		return ATA_CBL_PATA40;
-	else
-		return ATA_CBL_PATA80;
-}
-
-static void pdcnew_reset(ide_drive_t *drive)
-{
-	/*
-	 * Deleted this because it is redundant from the caller.
-	 */
-	printk(KERN_WARNING "pdc202xx_new: %s channel reset.\n",
-		drive->hwif->channel ? "Secondary" : "Primary");
-}
-
-/**
- * read_counter - Read the byte count registers
- * @dma_base: for the port address
- */
-static long read_counter(u32 dma_base)
-{
-	u32  pri_dma_base = dma_base, sec_dma_base = dma_base + 0x08;
-	u8   cnt0, cnt1, cnt2, cnt3;
-	long count = 0, last;
-	int  retry = 3;
-
-	do {
-		last = count;
-
-		/* Read the current count */
-		outb(0x20, pri_dma_base + 0x01);
-		cnt0 = inb(pri_dma_base + 0x03);
-		outb(0x21, pri_dma_base + 0x01);
-		cnt1 = inb(pri_dma_base + 0x03);
-		outb(0x20, sec_dma_base + 0x01);
-		cnt2 = inb(sec_dma_base + 0x03);
-		outb(0x21, sec_dma_base + 0x01);
-		cnt3 = inb(sec_dma_base + 0x03);
-
-		count = (cnt3 << 23) | (cnt2 << 15) | (cnt1 << 8) | cnt0;
-
-		/*
-		 * The 30-bit decrementing counter is read in 4 pieces.
-		 * Incorrect value may be read when the most significant bytes
-		 * are changing...
-		 */
-	} while (retry-- && (((last ^ count) & 0x3fff8000) || last < count));
-
-	DBG("cnt0[%02X] cnt1[%02X] cnt2[%02X] cnt3[%02X]\n",
-		  cnt0, cnt1, cnt2, cnt3);
-
-	return count;
-}
-
-/**
- * detect_pll_input_clock - Detect the PLL input clock in Hz.
- * @dma_base: for the port address
- * E.g. 16949000 on 33 MHz PCI bus, i.e. half of the PCI clock.
- */
-static long detect_pll_input_clock(unsigned long dma_base)
-{
-	ktime_t start_time, end_time;
-	long start_count, end_count;
-	long pll_input, usec_elapsed;
-	u8 scr1;
-
-	start_count = read_counter(dma_base);
-	start_time = ktime_get();
-
-	/* Start the test mode */
-	outb(0x01, dma_base + 0x01);
-	scr1 = inb(dma_base + 0x03);
-	DBG("scr1[%02X]\n", scr1);
-	outb(scr1 | 0x40, dma_base + 0x03);
-
-	/* Let the counter run for 10 ms. */
-	mdelay(10);
-
-	end_count = read_counter(dma_base);
-	end_time = ktime_get();
-
-	/* Stop the test mode */
-	outb(0x01, dma_base + 0x01);
-	scr1 = inb(dma_base + 0x03);
-	DBG("scr1[%02X]\n", scr1);
-	outb(scr1 & ~0x40, dma_base + 0x03);
-
-	/*
-	 * Calculate the input clock in Hz
-	 * (the clock counter is 30 bit wide and counts down)
-	 */
-	usec_elapsed = ktime_us_delta(end_time, start_time);
-	pll_input = ((start_count - end_count) & 0x3fffffff) / 10 *
-		(10000000 / usec_elapsed);
-
-	DBG("start[%ld] end[%ld]\n", start_count, end_count);
-
-	return pll_input;
-}
-
-#ifdef CONFIG_PPC_PMAC
-static void apple_kiwi_init(struct pci_dev *pdev)
-{
-	struct device_node *np = pci_device_to_OF_node(pdev);
-	u8 conf;
-
-	if (np == NULL || !of_device_is_compatible(np, "kiwi-root"))
-		return;
-
-	if (pdev->revision >= 0x03) {
-		/* Setup chip magic config stuff (from darwin) */
-		pci_read_config_byte (pdev, 0x40, &conf);
-		pci_write_config_byte(pdev, 0x40, (conf | 0x01));
-	}
-}
-#endif /* CONFIG_PPC_PMAC */
-
-static int init_chipset_pdcnew(struct pci_dev *dev)
-{
-	const char *name = DRV_NAME;
-	unsigned long dma_base = pci_resource_start(dev, 4);
-	unsigned long sec_dma_base = dma_base + 0x08;
-	long pll_input, pll_output, ratio;
-	int f, r;
-	u8 pll_ctl0, pll_ctl1;
-
-	if (dma_base == 0)
-		return -EFAULT;
-
-#ifdef CONFIG_PPC_PMAC
-	apple_kiwi_init(dev);
-#endif
-
-	/* Calculate the required PLL output frequency */
-	switch(max_dma_rate(dev)) {
-		case 4: /* it's 133 MHz for Ultra133 chips */
-			pll_output = 133333333;
-			break;
-		case 3: /* and  100 MHz for Ultra100 chips */
-		default:
-			pll_output = 100000000;
-			break;
-	}
-
-	/*
-	 * Detect PLL input clock.
-	 * On some systems, where PCI bus is running at non-standard clock rate
-	 * (e.g. 25 or 40 MHz), we have to adjust the cycle time.
-	 * PDC20268 and newer chips employ PLL circuit to help correct timing
-	 * registers setting.
-	 */
-	pll_input = detect_pll_input_clock(dma_base);
-	printk(KERN_INFO "%s %s: PLL input clock is %ld kHz\n",
-		name, pci_name(dev), pll_input / 1000);
-
-	/* Sanity check */
-	if (unlikely(pll_input < 5000000L || pll_input > 70000000L)) {
-		printk(KERN_ERR "%s %s: Bad PLL input clock %ld Hz, giving up!"
-			"\n", name, pci_name(dev), pll_input);
-		goto out;
-	}
-
-#ifdef DEBUG
-	DBG("pll_output is %ld Hz\n", pll_output);
-
-	/* Show the current clock value of PLL control register
-	 * (maybe already configured by the BIOS)
-	 */
-	outb(0x02, sec_dma_base + 0x01);
-	pll_ctl0 = inb(sec_dma_base + 0x03);
-	outb(0x03, sec_dma_base + 0x01);
-	pll_ctl1 = inb(sec_dma_base + 0x03);
-
-	DBG("pll_ctl[%02X][%02X]\n", pll_ctl0, pll_ctl1);
-#endif
-
-	/*
-	 * Calculate the ratio of F, R and NO
-	 * POUT = (F + 2) / (( R + 2) * NO)
-	 */
-	ratio = pll_output / (pll_input / 1000);
-	if (ratio < 8600L) { /* 8.6x */
-		/* Using NO = 0x01, R = 0x0d */
-		r = 0x0d;
-	} else if (ratio < 12900L) { /* 12.9x */
-		/* Using NO = 0x01, R = 0x08 */
-		r = 0x08;
-	} else if (ratio < 16100L) { /* 16.1x */
-		/* Using NO = 0x01, R = 0x06 */
-		r = 0x06;
-	} else if (ratio < 64000L) { /* 64x */
-		r = 0x00;
-	} else {
-		/* Invalid ratio */
-		printk(KERN_ERR "%s %s: Bad ratio %ld, giving up!\n",
-			name, pci_name(dev), ratio);
-		goto out;
-	}
-
-	f = (ratio * (r + 2)) / 1000 - 2;
-
-	DBG("F[%d] R[%d] ratio*1000[%ld]\n", f, r, ratio);
-
-	if (unlikely(f < 0 || f > 127)) {
-		/* Invalid F */
-		printk(KERN_ERR "%s %s: F[%d] invalid!\n",
-			name, pci_name(dev), f);
-		goto out;
-	}
-
-	pll_ctl0 = (u8) f;
-	pll_ctl1 = (u8) r;
-
-	DBG("Writing pll_ctl[%02X][%02X]\n", pll_ctl0, pll_ctl1);
-
-	outb(0x02,     sec_dma_base + 0x01);
-	outb(pll_ctl0, sec_dma_base + 0x03);
-	outb(0x03,     sec_dma_base + 0x01);
-	outb(pll_ctl1, sec_dma_base + 0x03);
-
-	/* Wait the PLL circuit to be stable */
-	mdelay(30);
-
-#ifdef DEBUG
-	/*
-	 *  Show the current clock value of PLL control register
-	 */
-	outb(0x02, sec_dma_base + 0x01);
-	pll_ctl0 = inb(sec_dma_base + 0x03);
-	outb(0x03, sec_dma_base + 0x01);
-	pll_ctl1 = inb(sec_dma_base + 0x03);
-
-	DBG("pll_ctl[%02X][%02X]\n", pll_ctl0, pll_ctl1);
-#endif
-
- out:
-	return 0;
-}
-
-static struct pci_dev *pdc20270_get_dev2(struct pci_dev *dev)
-{
-	struct pci_dev *dev2;
-
-	dev2 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn) + 1,
-						PCI_FUNC(dev->devfn)));
-
-	if (dev2 &&
-	    dev2->vendor == dev->vendor &&
-	    dev2->device == dev->device) {
-
-		if (dev2->irq != dev->irq) {
-			dev2->irq = dev->irq;
-			printk(KERN_INFO DRV_NAME " %s: PCI config space "
-				"interrupt fixed\n", pci_name(dev));
-		}
-
-		return dev2;
-	}
-
-	return NULL;
-}
-
-static const struct ide_port_ops pdcnew_port_ops = {
-	.set_pio_mode		= pdcnew_set_pio_mode,
-	.set_dma_mode		= pdcnew_set_dma_mode,
-	.resetproc		= pdcnew_reset,
-	.cable_detect		= pdcnew_cable_detect,
-};
-
-#define DECLARE_PDCNEW_DEV(udma) \
-	{ \
-		.name		= DRV_NAME, \
-		.init_chipset	= init_chipset_pdcnew, \
-		.port_ops	= &pdcnew_port_ops, \
-		.host_flags	= IDE_HFLAG_POST_SET_MODE | \
-				  IDE_HFLAG_ERROR_STOPS_FIFO | \
-				  IDE_HFLAG_OFF_BOARD, \
-		.pio_mask	= ATA_PIO4, \
-		.mwdma_mask	= ATA_MWDMA2, \
-		.udma_mask	= udma, \
-	}
-
-static const struct ide_port_info pdcnew_chipsets[] = {
-	/* 0: PDC202{68,70} */		DECLARE_PDCNEW_DEV(ATA_UDMA5),
-	/* 1: PDC202{69,71,75,76,77} */	DECLARE_PDCNEW_DEV(ATA_UDMA6),
-};
-
-/**
- *	pdc202new_init_one	-	called when a pdc202xx is found
- *	@dev: the pdc202new device
- *	@id: the matching pci id
- *
- *	Called when the PCI registration layer (or the IDE initialization)
- *	finds a device matching our IDE device tables.
- */
- 
-static int pdc202new_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	const struct ide_port_info *d = &pdcnew_chipsets[id->driver_data];
-	struct pci_dev *bridge = dev->bus->self;
-
-	if (dev->device == PCI_DEVICE_ID_PROMISE_20270 && bridge &&
-	    bridge->vendor == PCI_VENDOR_ID_DEC &&
-	    bridge->device == PCI_DEVICE_ID_DEC_21150) {
-		struct pci_dev *dev2;
-
-		if (PCI_SLOT(dev->devfn) & 2)
-			return -ENODEV;
-
-		dev2 = pdc20270_get_dev2(dev);
-
-		if (dev2) {
-			int ret = ide_pci_init_two(dev, dev2, d, NULL);
-			if (ret < 0)
-				pci_dev_put(dev2);
-			return ret;
-		}
-	}
-
-	if (dev->device == PCI_DEVICE_ID_PROMISE_20276 && bridge &&
-	    bridge->vendor == PCI_VENDOR_ID_INTEL &&
-	    (bridge->device == PCI_DEVICE_ID_INTEL_I960 ||
-	     bridge->device == PCI_DEVICE_ID_INTEL_I960RM)) {
-		printk(KERN_INFO DRV_NAME " %s: attached to I2O RAID controller,"
-			" skipping\n", pci_name(dev));
-		return -ENODEV;
-	}
-
-	return ide_pci_init_one(dev, d, NULL);
-}
-
-static void pdc202new_remove(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct pci_dev *dev2 = host->dev[1] ? to_pci_dev(host->dev[1]) : NULL;
-
-	ide_pci_remove(dev);
-	pci_dev_put(dev2);
-}
-
-static const struct pci_device_id pdc202new_pci_tbl[] = {
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20268), 0 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20269), 1 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20270), 0 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20271), 1 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20275), 1 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20276), 1 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20277), 1 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, pdc202new_pci_tbl);
-
-static struct pci_driver pdc202new_pci_driver = {
-	.name		= "Promise_IDE",
-	.id_table	= pdc202new_pci_tbl,
-	.probe		= pdc202new_init_one,
-	.remove		= pdc202new_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init pdc202new_ide_init(void)
-{
-	return ide_pci_register_driver(&pdc202new_pci_driver);
-}
-
-static void __exit pdc202new_ide_exit(void)
-{
-	pci_unregister_driver(&pdc202new_pci_driver);
-}
-
-module_init(pdc202new_ide_init);
-module_exit(pdc202new_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick, Frank Tiernan");
-MODULE_DESCRIPTION("PCI driver module for Promise PDC20268 and higher");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/pdc202xx_old.c b/drivers/ide/pdc202xx_old.c
deleted file mode 100644
index 5248ac064e6e0..0000000000000
--- a/drivers/ide/pdc202xx_old.c
+++ /dev/null
@@ -1,362 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1998-2002		Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2006-2007, 2009	MontaVista Software, Inc.
- *  Copyright (C) 2007-2010		Bartlomiej Zolnierkiewicz
- *
- *  Portions Copyright (C) 1999 Promise Technology, Inc.
- *  Author: Frank Tiernan (frankt@promise.com)
- *  Released under terms of General Public License
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/blkdev.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "pdc202xx_old"
-
-static void pdc202xx_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u8 drive_pci		= 0x60 + (drive->dn << 2);
-	const u8 speed		= drive->dma_mode;
-
-	u8			AP = 0, BP = 0, CP = 0;
-	u8			TA = 0, TB = 0, TC = 0;
-
-	pci_read_config_byte(dev, drive_pci,     &AP);
-	pci_read_config_byte(dev, drive_pci + 1, &BP);
-	pci_read_config_byte(dev, drive_pci + 2, &CP);
-
-	switch(speed) {
-		case XFER_UDMA_5:
-		case XFER_UDMA_4:	TB = 0x20; TC = 0x01; break;
-		case XFER_UDMA_2:	TB = 0x20; TC = 0x01; break;
-		case XFER_UDMA_3:
-		case XFER_UDMA_1:	TB = 0x40; TC = 0x02; break;
-		case XFER_UDMA_0:
-		case XFER_MW_DMA_2:	TB = 0x60; TC = 0x03; break;
-		case XFER_MW_DMA_1:	TB = 0x60; TC = 0x04; break;
-		case XFER_MW_DMA_0:	TB = 0xE0; TC = 0x0F; break;
-		case XFER_PIO_4:	TA = 0x01; TB = 0x04; break;
-		case XFER_PIO_3:	TA = 0x02; TB = 0x06; break;
-		case XFER_PIO_2:	TA = 0x03; TB = 0x08; break;
-		case XFER_PIO_1:	TA = 0x05; TB = 0x0C; break;
-		case XFER_PIO_0:
-		default:		TA = 0x09; TB = 0x13; break;
-	}
-
-	if (speed < XFER_SW_DMA_0) {
-		/*
-		 * preserve SYNC_INT / ERDDY_EN bits while clearing
-		 * Prefetch_EN / IORDY_EN / PA[3:0] bits of register A
-		 */
-		AP &= ~0x3f;
-		if (ide_pio_need_iordy(drive, speed - XFER_PIO_0))
-			AP |= 0x20;	/* set IORDY_EN bit */
-		if (drive->media == ide_disk)
-			AP |= 0x10;	/* set Prefetch_EN bit */
-		/* clear PB[4:0] bits of register B */
-		BP &= ~0x1f;
-		pci_write_config_byte(dev, drive_pci,     AP | TA);
-		pci_write_config_byte(dev, drive_pci + 1, BP | TB);
-	} else {
-		/* clear MB[2:0] bits of register B */
-		BP &= ~0xe0;
-		/* clear MC[3:0] bits of register C */
-		CP &= ~0x0f;
-		pci_write_config_byte(dev, drive_pci + 1, BP | TB);
-		pci_write_config_byte(dev, drive_pci + 2, CP | TC);
-	}
-}
-
-static void pdc202xx_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	drive->dma_mode = drive->pio_mode;
-	pdc202xx_set_mode(hwif, drive);
-}
-
-static int pdc202xx_test_irq(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned long high_16	= pci_resource_start(dev, 4);
-	u8 sc1d			= inb(high_16 + 0x1d);
-
-	if (hwif->channel) {
-		/*
-		 * bit 7: error, bit 6: interrupting,
-		 * bit 5: FIFO full, bit 4: FIFO empty
-		 */
-		return (sc1d & 0x40) ? 1 : 0;
-	} else	{
-		/*
-		 * bit 3: error, bit 2: interrupting,
-		 * bit 1: FIFO full, bit 0: FIFO empty
-		 */
-		return (sc1d & 0x04) ? 1 : 0;
-	}
-}
-
-static u8 pdc2026x_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	u16 CIS, mask = hwif->channel ? (1 << 11) : (1 << 10);
-
-	pci_read_config_word(dev, 0x50, &CIS);
-
-	return (CIS & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
-}
-
-/*
- * Set the control register to use the 66MHz system
- * clock for UDMA 3/4/5 mode operation when necessary.
- *
- * FIXME: this register is shared by both channels, some locking is needed
- *
- * It may also be possible to leave the 66MHz clock on
- * and readjust the timing parameters.
- */
-static void pdc_old_enable_66MHz_clock(ide_hwif_t *hwif)
-{
-	unsigned long clock_reg = hwif->extra_base + 0x01;
-	u8 clock = inb(clock_reg);
-
-	outb(clock | (hwif->channel ? 0x08 : 0x02), clock_reg);
-}
-
-static void pdc_old_disable_66MHz_clock(ide_hwif_t *hwif)
-{
-	unsigned long clock_reg = hwif->extra_base + 0x01;
-	u8 clock = inb(clock_reg);
-
-	outb(clock & ~(hwif->channel ? 0x08 : 0x02), clock_reg);
-}
-
-static void pdc2026x_init_hwif(ide_hwif_t *hwif)
-{
-	pdc_old_disable_66MHz_clock(hwif);
-}
-
-static void pdc202xx_dma_start(ide_drive_t *drive)
-{
-	if (drive->current_speed > XFER_UDMA_2)
-		pdc_old_enable_66MHz_clock(drive->hwif);
-	if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		ide_hwif_t *hwif	= drive->hwif;
-		struct request *rq	= hwif->rq;
-		unsigned long high_16	= hwif->extra_base - 16;
-		unsigned long atapi_reg	= high_16 + (hwif->channel ? 0x24 : 0x20);
-		u32 word_count	= 0;
-		u8 clock = inb(high_16 + 0x11);
-
-		outb(clock | (hwif->channel ? 0x08 : 0x02), high_16 + 0x11);
-		word_count = (blk_rq_sectors(rq) << 8);
-		word_count = (rq_data_dir(rq) == READ) ?
-					word_count | 0x05000000 :
-					word_count | 0x06000000;
-		outl(word_count, atapi_reg);
-	}
-	ide_dma_start(drive);
-}
-
-static int pdc202xx_dma_end(ide_drive_t *drive)
-{
-	if (drive->media != ide_disk || (drive->dev_flags & IDE_DFLAG_LBA48)) {
-		ide_hwif_t *hwif	= drive->hwif;
-		unsigned long high_16	= hwif->extra_base - 16;
-		unsigned long atapi_reg	= high_16 + (hwif->channel ? 0x24 : 0x20);
-		u8 clock		= 0;
-
-		outl(0, atapi_reg); /* zero out extra */
-		clock = inb(high_16 + 0x11);
-		outb(clock & ~(hwif->channel ? 0x08:0x02), high_16 + 0x11);
-	}
-	if (drive->current_speed > XFER_UDMA_2)
-		pdc_old_disable_66MHz_clock(drive->hwif);
-	return ide_dma_end(drive);
-}
-
-static int init_chipset_pdc202xx(struct pci_dev *dev)
-{
-	unsigned long dmabase = pci_resource_start(dev, 4);
-	u8 udma_speed_flag = 0, primary_mode = 0, secondary_mode = 0;
-
-	if (dmabase == 0)
-		goto out;
-
-	udma_speed_flag	= inb(dmabase | 0x1f);
-	primary_mode	= inb(dmabase | 0x1a);
-	secondary_mode	= inb(dmabase | 0x1b);
-	printk(KERN_INFO "%s: (U)DMA Burst Bit %sABLED " \
-		"Primary %s Mode " \
-		"Secondary %s Mode.\n", pci_name(dev),
-		(udma_speed_flag & 1) ? "EN" : "DIS",
-		(primary_mode & 1) ? "MASTER" : "PCI",
-		(secondary_mode & 1) ? "MASTER" : "PCI" );
-
-	if (!(udma_speed_flag & 1)) {
-		printk(KERN_INFO "%s: FORCING BURST BIT 0x%02x->0x%02x ",
-			pci_name(dev), udma_speed_flag,
-			(udma_speed_flag|1));
-		outb(udma_speed_flag | 1, dmabase | 0x1f);
-		printk("%sACTIVE\n", (inb(dmabase | 0x1f) & 1) ? "" : "IN");
-	}
-out:
-	return 0;
-}
-
-static void pdc202ata4_fixup_irq(struct pci_dev *dev, const char *name)
-{
-	if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE) {
-		u8 irq = 0, irq2 = 0;
-		pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
-		/* 0xbc */
-		pci_read_config_byte(dev, (PCI_INTERRUPT_LINE)|0x80, &irq2);
-		if (irq != irq2) {
-			pci_write_config_byte(dev,
-				(PCI_INTERRUPT_LINE)|0x80, irq);     /* 0xbc */
-			printk(KERN_INFO "%s %s: PCI config space interrupt "
-				"mirror fixed\n", name, pci_name(dev));
-		}
-	}
-}
-
-#define IDE_HFLAGS_PDC202XX \
-	(IDE_HFLAG_ERROR_STOPS_FIFO | \
-	 IDE_HFLAG_OFF_BOARD)
-
-static const struct ide_port_ops pdc20246_port_ops = {
-	.set_pio_mode		= pdc202xx_set_pio_mode,
-	.set_dma_mode		= pdc202xx_set_mode,
-	.test_irq		= pdc202xx_test_irq,
-};
-
-static const struct ide_port_ops pdc2026x_port_ops = {
-	.set_pio_mode		= pdc202xx_set_pio_mode,
-	.set_dma_mode		= pdc202xx_set_mode,
-	.test_irq		= pdc202xx_test_irq,
-	.cable_detect		= pdc2026x_cable_detect,
-};
-
-static const struct ide_dma_ops pdc2026x_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= pdc202xx_dma_start,
-	.dma_end		= pdc202xx_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-#define DECLARE_PDC2026X_DEV(udma, sectors) \
-	{ \
-		.name		= DRV_NAME, \
-		.init_chipset	= init_chipset_pdc202xx, \
-		.init_hwif	= pdc2026x_init_hwif, \
-		.port_ops	= &pdc2026x_port_ops, \
-		.dma_ops	= &pdc2026x_dma_ops, \
-		.host_flags	= IDE_HFLAGS_PDC202XX, \
-		.pio_mask	= ATA_PIO4, \
-		.mwdma_mask	= ATA_MWDMA2, \
-		.udma_mask	= udma, \
-		.max_sectors	= sectors, \
-	}
-
-static const struct ide_port_info pdc202xx_chipsets[] = {
-	{	/* 0: PDC20246 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_pdc202xx,
-		.port_ops	= &pdc20246_port_ops,
-		.dma_ops	= &sff_dma_ops,
-		.host_flags	= IDE_HFLAGS_PDC202XX,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA2,
-	},
-
-	/* 1: PDC2026{2,3} */
-	DECLARE_PDC2026X_DEV(ATA_UDMA4, 0),
-	/* 2: PDC2026{5,7}: UDMA5, limit LBA48 requests to 256 sectors */
-	DECLARE_PDC2026X_DEV(ATA_UDMA5, 256),
-};
-
-/**
- *	pdc202xx_init_one	-	called when a PDC202xx is found
- *	@dev: the pdc202xx device
- *	@id: the matching pci id
- *
- *	Called when the PCI registration layer (or the IDE initialization)
- *	finds a device matching our IDE device tables.
- */
- 
-static int pdc202xx_init_one(struct pci_dev *dev,
-			     const struct pci_device_id *id)
-{
-	const struct ide_port_info *d;
-	u8 idx = id->driver_data;
-
-	d = &pdc202xx_chipsets[idx];
-
-	if (idx < 2)
-		pdc202ata4_fixup_irq(dev, d->name);
-
-	if (dev->vendor == PCI_DEVICE_ID_PROMISE_20265) {
-		struct pci_dev *bridge = dev->bus->self;
-
-		if (bridge &&
-		    bridge->vendor == PCI_VENDOR_ID_INTEL &&
-		    (bridge->device == PCI_DEVICE_ID_INTEL_I960 ||
-		     bridge->device == PCI_DEVICE_ID_INTEL_I960RM)) {
-			printk(KERN_INFO DRV_NAME " %s: skipping Promise "
-				"PDC20265 attached to I2O RAID controller\n",
-				pci_name(dev));
-			return -ENODEV;
-		}
-	}
-
-	return ide_pci_init_one(dev, d, NULL);
-}
-
-static const struct pci_device_id pdc202xx_pci_tbl[] = {
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20246), 0 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20262), 1 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20263), 1 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20265), 2 },
-	{ PCI_VDEVICE(PROMISE, PCI_DEVICE_ID_PROMISE_20267), 2 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, pdc202xx_pci_tbl);
-
-static struct pci_driver pdc202xx_pci_driver = {
-	.name		= "Promise_Old_IDE",
-	.id_table	= pdc202xx_pci_tbl,
-	.probe		= pdc202xx_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init pdc202xx_ide_init(void)
-{
-	return ide_pci_register_driver(&pdc202xx_pci_driver);
-}
-
-static void __exit pdc202xx_ide_exit(void)
-{
-	pci_unregister_driver(&pdc202xx_pci_driver);
-}
-
-module_init(pdc202xx_ide_init);
-module_exit(pdc202xx_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick, Frank Tiernan, Bartlomiej Zolnierkiewicz");
-MODULE_DESCRIPTION("PCI driver module for older Promise IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/piix.c b/drivers/ide/piix.c
deleted file mode 100644
index a671cead6ae72..0000000000000
--- a/drivers/ide/piix.c
+++ /dev/null
@@ -1,476 +0,0 @@
-/*
- *  Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer
- *  Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2003 Red Hat
- *  Copyright (C) 2006-2007 MontaVista Software, Inc. <source@mvista.com>
- *
- *  May be copied or modified under the terms of the GNU General Public License
- *
- * Documentation:
- *
- *	Publicly available from Intel web site. Errata documentation
- * is also publicly available. As an aide to anyone hacking on this
- * driver the list of errata that are relevant is below.going back to
- * PIIX4. Older device documentation is now a bit tricky to find.
- *
- * Errata of note:
- *
- * Unfixable
- *	PIIX4    errata #9	- Only on ultra obscure hw
- *	ICH3	 errata #13     - Not observed to affect real hw
- *				  by Intel
- *
- * Things we must deal with
- *	PIIX4	errata #10	- BM IDE hang with non UDMA
- *				  (must stop/start dma to recover)
- *	440MX   errata #15	- As PIIX4 errata #10
- *	PIIX4	errata #15	- Must not read control registers
- * 				  during a PIO transfer
- *	440MX   errata #13	- As PIIX4 errata #15
- *	ICH2	errata #21	- DMA mode 0 doesn't work right
- *	ICH0/1  errata #55	- As ICH2 errata #21
- *	ICH2	spec c #9	- Extra operations needed to handle
- *				  drive hotswap [NOT YET SUPPORTED]
- *	ICH2    spec c #20	- IDE PRD must not cross a 64K boundary
- *				  and must be dword aligned
- *	ICH2    spec c #24	- UDMA mode 4,5 t85/86 should be 6ns not 3.3
- *
- * Should have been BIOS fixed:
- *	450NX:	errata #19	- DMA hangs on old 450NX
- *	450NX:  errata #20	- DMA hangs on old 450NX
- *	450NX:  errata #25	- Corruption with DMA on old 450NX
- *	ICH3    errata #15      - IDE deadlock under high load
- *				  (BIOS must set dev 31 fn 0 bit 23)
- *	ICH3	errata #18	- Don't use native mode
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "piix"
-
-static int no_piix_dma;
-
-/**
- *	piix_set_pio_mode	-	set host controller for PIO mode
- *	@port: port
- *	@drive: drive
- *
- *	Set the interface PIO mode based upon the settings done by AMI BIOS.
- */
-
-static void piix_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	int is_slave		= drive->dn & 1;
-	int master_port		= hwif->channel ? 0x42 : 0x40;
-	int slave_port		= 0x44;
-	unsigned long flags;
-	u16 master_data;
-	u8 slave_data;
-	static DEFINE_SPINLOCK(tune_lock);
-	int control = 0;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-				     /* ISP  RTC */
-	static const u8 timings[][2]= {
-					{ 0, 0 },
-					{ 0, 0 },
-					{ 1, 0 },
-					{ 2, 1 },
-					{ 2, 3 }, };
-
-	/*
-	 * Master vs slave is synchronized above us but the slave register is
-	 * shared by the two hwifs so the corner case of two slave timeouts in
-	 * parallel must be locked.
-	 */
-	spin_lock_irqsave(&tune_lock, flags);
-	pci_read_config_word(dev, master_port, &master_data);
-
-	if (pio > 1)
-		control |= 1;	/* Programmable timing on */
-	if (drive->media == ide_disk)
-		control |= 4;	/* Prefetch, post write */
-	if (ide_pio_need_iordy(drive, pio))
-		control |= 2;	/* IORDY */
-	if (is_slave) {
-		master_data |=  0x4000;
-		master_data &= ~0x0070;
-		if (pio > 1) {
-			/* Set PPE, IE and TIME */
-			master_data |= control << 4;
-		}
-		pci_read_config_byte(dev, slave_port, &slave_data);
-		slave_data &= hwif->channel ? 0x0f : 0xf0;
-		slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) <<
-			       (hwif->channel ? 4 : 0);
-	} else {
-		master_data &= ~0x3307;
-		if (pio > 1) {
-			/* enable PPE, IE and TIME */
-			master_data |= control;
-		}
-		master_data |= (timings[pio][0] << 12) | (timings[pio][1] << 8);
-	}
-	pci_write_config_word(dev, master_port, master_data);
-	if (is_slave)
-		pci_write_config_byte(dev, slave_port, slave_data);
-	spin_unlock_irqrestore(&tune_lock, flags);
-}
-
-/**
- *	piix_set_dma_mode	-	set host controller for DMA mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Set a PIIX host controller to the desired DMA mode.  This involves
- *	programming the right timing data into the PCI configuration space.
- */
-
-static void piix_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u8 maslave		= hwif->channel ? 0x42 : 0x40;
-	int a_speed		= 3 << (drive->dn * 4);
-	int u_flag		= 1 << drive->dn;
-	int v_flag		= 0x01 << drive->dn;
-	int w_flag		= 0x10 << drive->dn;
-	int u_speed		= 0;
-	int			sitre;
-	u16			reg4042, reg4a;
-	u8			reg48, reg54, reg55;
-	const u8 speed		= drive->dma_mode;
-
-	pci_read_config_word(dev, maslave, &reg4042);
-	sitre = (reg4042 & 0x4000) ? 1 : 0;
-	pci_read_config_byte(dev, 0x48, &reg48);
-	pci_read_config_word(dev, 0x4a, &reg4a);
-	pci_read_config_byte(dev, 0x54, &reg54);
-	pci_read_config_byte(dev, 0x55, &reg55);
-
-	if (speed >= XFER_UDMA_0) {
-		u8 udma = speed - XFER_UDMA_0;
-
-		u_speed = min_t(u8, 2 - (udma & 1), udma) << (drive->dn * 4);
-
-		if (!(reg48 & u_flag))
-			pci_write_config_byte(dev, 0x48, reg48 | u_flag);
-		if (speed == XFER_UDMA_5) {
-			pci_write_config_byte(dev, 0x55, (u8) reg55|w_flag);
-		} else {
-			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
-		}
-		if ((reg4a & a_speed) != u_speed)
-			pci_write_config_word(dev, 0x4a, (reg4a & ~a_speed) | u_speed);
-		if (speed > XFER_UDMA_2) {
-			if (!(reg54 & v_flag))
-				pci_write_config_byte(dev, 0x54, reg54 | v_flag);
-		} else
-			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
-	} else {
-		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-
-		if (reg48 & u_flag)
-			pci_write_config_byte(dev, 0x48, reg48 & ~u_flag);
-		if (reg4a & a_speed)
-			pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
-		if (reg54 & v_flag)
-			pci_write_config_byte(dev, 0x54, reg54 & ~v_flag);
-		if (reg55 & w_flag)
-			pci_write_config_byte(dev, 0x55, (u8) reg55 & ~w_flag);
-
-		if (speed >= XFER_MW_DMA_0)
-			drive->pio_mode =
-				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
-		else
-			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
-
-		piix_set_pio_mode(hwif, drive);
-	}
-}
-
-/**
- *	init_chipset_ich	-	set up the ICH chipset
- *	@dev: PCI device to set up
- *
- *	Initialize the PCI device as required.  For the ICH this turns
- *	out to be nice and simple.
- */
-
-static int init_chipset_ich(struct pci_dev *dev)
-{
-	u32 extra = 0;
-
-	pci_read_config_dword(dev, 0x54, &extra);
-	pci_write_config_dword(dev, 0x54, extra | 0x400);
-
-	return 0;
-}
-
-/**
- *	ich_clear_irq	-	clear BMDMA status
- *	@drive: IDE drive
- *
- *	ICHx contollers set DMA INTR no matter DMA or PIO.
- *	BMDMA status might need to be cleared even for
- *	PIO interrupts to prevent spurious/lost IRQ.
- */
-static void ich_clear_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat;
-
-	/*
-	 * ide_dma_end() needs BMDMA status for error checking.
-	 * So, skip clearing BMDMA status here and leave it
-	 * to ide_dma_end() if this is DMA interrupt.
-	 */
-	if (drive->waiting_for_dma || hwif->dma_base == 0)
-		return;
-
-	/* clear the INTR & ERROR bits */
-	dma_stat = inb(hwif->dma_base + ATA_DMA_STATUS);
-	/* Should we force the bit as well ? */
-	outb(dma_stat, hwif->dma_base + ATA_DMA_STATUS);
-}
-
-struct ich_laptop {
-	u16 device;
-	u16 subvendor;
-	u16 subdevice;
-};
-
-/*
- *	List of laptops that use short cables rather than 80 wire
- */
-
-static const struct ich_laptop ich_laptop[] = {
-	/* devid, subvendor, subdev */
-	{ 0x27DF, 0x1025, 0x0102 },	/* ICH7 on Acer 5602aWLMi */
-	{ 0x27DF, 0x0005, 0x0280 },	/* ICH7 on Acer 5602WLMi */
-	{ 0x27DF, 0x1025, 0x0110 },	/* ICH7 on Acer 3682WLMi */
-	{ 0x27DF, 0x1043, 0x1267 },	/* ICH7 on Asus W5F */
-	{ 0x27DF, 0x103C, 0x30A1 },	/* ICH7 on HP Compaq nc2400 */
-	{ 0x27DF, 0x1071, 0xD221 },	/* ICH7 on Hercules EC-900 */
-	{ 0x24CA, 0x1025, 0x0061 },	/* ICH4 on Acer Aspire 2023WLMi */
-	{ 0x24CA, 0x1025, 0x003d },	/* ICH4 on ACER TM290 */
-	{ 0x266F, 0x1025, 0x0066 },	/* ICH6 on ACER Aspire 1694WLMi */
-	{ 0x2653, 0x1043, 0x82D8 },	/* ICH6M on Asus Eee 701 */
-	{ 0x27df, 0x104d, 0x900e },	/* ICH7 on Sony TZ-90 */
-	/* end marker */
-	{ 0, }
-};
-
-static u8 piix_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	const struct ich_laptop *lap = &ich_laptop[0];
-	u8 reg54h = 0, mask = hwif->channel ? 0xc0 : 0x30;
-
-	/* check for specials */
-	while (lap->device) {
-		if (lap->device == pdev->device &&
-		    lap->subvendor == pdev->subsystem_vendor &&
-		    lap->subdevice == pdev->subsystem_device) {
-			return ATA_CBL_PATA40_SHORT;
-		}
-		lap++;
-	}
-
-	pci_read_config_byte(pdev, 0x54, &reg54h);
-
-	return (reg54h & mask) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
-}
-
-/**
- *	init_hwif_piix		-	fill in the hwif for the PIIX
- *	@hwif: IDE interface
- *
- *	Set up the ide_hwif_t for the PIIX interface according to the
- *	capabilities of the hardware.
- */
-
-static void init_hwif_piix(ide_hwif_t *hwif)
-{
-	if (!hwif->dma_base)
-		return;
-
-	if (no_piix_dma)
-		hwif->ultra_mask = hwif->mwdma_mask = hwif->swdma_mask = 0;
-}
-
-static const struct ide_port_ops piix_port_ops = {
-	.set_pio_mode		= piix_set_pio_mode,
-	.set_dma_mode		= piix_set_dma_mode,
-	.cable_detect		= piix_cable_detect,
-};
-
-static const struct ide_port_ops ich_port_ops = {
-	.set_pio_mode		= piix_set_pio_mode,
-	.set_dma_mode		= piix_set_dma_mode,
-	.clear_irq		= ich_clear_irq,
-	.cable_detect		= piix_cable_detect,
-};
-
-#define DECLARE_PIIX_DEV(udma) \
-	{						\
-		.name		= DRV_NAME,		\
-		.init_hwif	= init_hwif_piix,	\
-		.enablebits	= {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, \
-		.port_ops	= &piix_port_ops,	\
-		.pio_mask	= ATA_PIO4,		\
-		.swdma_mask	= ATA_SWDMA2_ONLY,	\
-		.mwdma_mask	= ATA_MWDMA12_ONLY,	\
-		.udma_mask	= udma,			\
-	}
-
-#define DECLARE_ICH_DEV(mwdma, udma) \
-	{ \
-		.name		= DRV_NAME, \
-		.init_chipset	= init_chipset_ich, \
-		.init_hwif	= init_hwif_piix, \
-		.enablebits	= {{0x41,0x80,0x80}, {0x43,0x80,0x80}}, \
-		.port_ops	= &ich_port_ops, \
-		.pio_mask	= ATA_PIO4, \
-		.swdma_mask	= ATA_SWDMA2_ONLY, \
-		.mwdma_mask	= mwdma, \
-		.udma_mask	= udma, \
-	}
-
-static const struct ide_port_info piix_pci_info[] = {
-	/* 0: MPIIX */
-	{	/*
-		 * MPIIX actually has only a single IDE channel mapped to
-		 * the primary or secondary ports depending on the value
-		 * of the bit 14 of the IDETIM register at offset 0x6c
-		 */
-		.name		= DRV_NAME,
-		.enablebits	= {{0x6d,0xc0,0x80}, {0x6d,0xc0,0xc0}},
-		.host_flags	= IDE_HFLAG_ISA_PORTS | IDE_HFLAG_NO_DMA,
-		.pio_mask	= ATA_PIO4,
-		/* This is a painful system best to let it self tune for now */
-	},
-	/* 1: PIIXa/PIIXb/PIIX3 */
-	DECLARE_PIIX_DEV(0x00), /* no udma */
-	/* 2: PIIX4 */
-	DECLARE_PIIX_DEV(ATA_UDMA2),
-	/* 3: ICH0 */
-	DECLARE_ICH_DEV(ATA_MWDMA12_ONLY, ATA_UDMA2),
-	/* 4: ICH */
-	DECLARE_ICH_DEV(ATA_MWDMA12_ONLY, ATA_UDMA4),
-	/* 5: PIIX4 */
-	DECLARE_PIIX_DEV(ATA_UDMA4),
-	/* 6: ICH[2-6]/ICH[2-3]M/C-ICH/ICH5-SATA/ESB2/ICH8M */
-	DECLARE_ICH_DEV(ATA_MWDMA12_ONLY, ATA_UDMA5),
-	/* 7: ICH7/7-R, no MWDMA1 */
-	DECLARE_ICH_DEV(ATA_MWDMA2_ONLY, ATA_UDMA5),
-};
-
-/**
- *	piix_init_one	-	called when a PIIX is found
- *	@dev: the piix device
- *	@id: the matching pci id
- *
- *	Called when the PCI registration layer (or the IDE initialization)
- *	finds a device matching our IDE device tables.
- */
- 
-static int piix_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &piix_pci_info[id->driver_data], NULL);
-}
-
-/**
- *	piix_check_450nx	-	Check for problem 450NX setup
- *	
- *	Check for the present of 450NX errata #19 and errata #25. If
- *	they are found, disable use of DMA IDE
- */
-
-static void piix_check_450nx(void)
-{
-	struct pci_dev *pdev = NULL;
-	u16 cfg;
-	while((pdev=pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454NX, pdev))!=NULL)
-	{
-		/* Look for 450NX PXB. Check for problem configurations
-		   A PCI quirk checks bit 6 already */
-		pci_read_config_word(pdev, 0x41, &cfg);
-		/* Only on the original revision: IDE DMA can hang */
-		if (pdev->revision == 0x00)
-			no_piix_dma = 1;
-		/* On all revisions below 5 PXB bus lock must be disabled for IDE */
-		else if (cfg & (1<<14) && pdev->revision < 5)
-			no_piix_dma = 2;
-	}
-	if(no_piix_dma)
-		printk(KERN_WARNING DRV_NAME ": 450NX errata present, disabling IDE DMA.\n");
-	if(no_piix_dma == 2)
-		printk(KERN_WARNING DRV_NAME ": A BIOS update may resolve this.\n");
-}		
-
-static const struct pci_device_id piix_pci_tbl[] = {
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371FB_0),  1 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371FB_1),  1 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371MX),    0 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371SB_1),  1 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82371AB),    2 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801AB_1),  3 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82443MX_1),  2 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801AA_1),  4 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82372FB_1),  5 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82451NX),    2 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801BA_9),  6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801BA_8),  6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801CA_10), 6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801CA_11), 6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_11), 6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801EB_11), 6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801E_11),  6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_10), 6 },
-#ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801EB_1),  6 },
-#endif
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ESB_2),      6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH6_19),    6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH7_21),    7 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_82801DB_1),  6 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ESB2_18),    7 },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICH8_6),     6 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, piix_pci_tbl);
-
-static struct pci_driver piix_pci_driver = {
-	.name		= "PIIX_IDE",
-	.id_table	= piix_pci_tbl,
-	.probe		= piix_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init piix_ide_init(void)
-{
-	piix_check_450nx();
-	return ide_pci_register_driver(&piix_pci_driver);
-}
-
-static void __exit piix_ide_exit(void)
-{
-	pci_unregister_driver(&piix_pci_driver);
-}
-
-module_init(piix_ide_init);
-module_exit(piix_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick, Andrzej Krzysztofowicz");
-MODULE_DESCRIPTION("PCI driver module for Intel PIIX IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
deleted file mode 100644
index ea0b064b5f56b..0000000000000
--- a/drivers/ide/pmac.c
+++ /dev/null
@@ -1,1703 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Support for IDE interfaces on PowerMacs.
- *
- * These IDE interfaces are memory-mapped and have a DBDMA channel
- * for doing DMA.
- *
- *  Copyright (C) 1998-2003 Paul Mackerras & Ben. Herrenschmidt
- *  Copyright (C) 2007-2008 Bartlomiej Zolnierkiewicz
- *
- * Some code taken from drivers/ide/ide-dma.c:
- *
- *  Copyright (c) 1995-1998  Mark Lord
- *
- * TODO: - Use pre-calculated (kauai) timing tables all the time and
- * get rid of the "rounded" tables used previously, so we have the
- * same table format for all controllers and can then just have one
- * big table
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/ide.h>
-#include <linux/notifier.h>
-#include <linux/module.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/adb.h>
-#include <linux/pmu.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/dbdma.h>
-#include <asm/ide.h>
-#include <asm/machdep.h>
-#include <asm/pmac_feature.h>
-#include <asm/sections.h>
-#include <asm/irq.h>
-#include <asm/mediabay.h>
-
-#define DRV_NAME "ide-pmac"
-
-#undef IDE_PMAC_DEBUG
-
-#define DMA_WAIT_TIMEOUT	50
-
-typedef struct pmac_ide_hwif {
-	unsigned long			regbase;
-	int				irq;
-	int				kind;
-	int				aapl_bus_id;
-	unsigned			broken_dma : 1;
-	unsigned			broken_dma_warn : 1;
-	struct device_node*		node;
-	struct macio_dev		*mdev;
-	u32				timings[4];
-	volatile u32 __iomem *		*kauai_fcr;
-	ide_hwif_t			*hwif;
-
-	/* Those fields are duplicating what is in hwif. We currently
-	 * can't use the hwif ones because of some assumptions that are
-	 * beeing done by the generic code about the kind of dma controller
-	 * and format of the dma table. This will have to be fixed though.
-	 */
-	volatile struct dbdma_regs __iomem *	dma_regs;
-	struct dbdma_cmd*		dma_table_cpu;
-} pmac_ide_hwif_t;
-
-enum {
-	controller_ohare,	/* OHare based */
-	controller_heathrow,	/* Heathrow/Paddington */
-	controller_kl_ata3,	/* KeyLargo ATA-3 */
-	controller_kl_ata4,	/* KeyLargo ATA-4 */
-	controller_un_ata6,	/* UniNorth2 ATA-6 */
-	controller_k2_ata6,	/* K2 ATA-6 */
-	controller_sh_ata6,	/* Shasta ATA-6 */
-};
-
-static const char* model_name[] = {
-	"OHare ATA",		/* OHare based */
-	"Heathrow ATA",		/* Heathrow/Paddington */
-	"KeyLargo ATA-3",	/* KeyLargo ATA-3 (MDMA only) */
-	"KeyLargo ATA-4",	/* KeyLargo ATA-4 (UDMA/66) */
-	"UniNorth ATA-6",	/* UniNorth2 ATA-6 (UDMA/100) */
-	"K2 ATA-6",		/* K2 ATA-6 (UDMA/100) */
-	"Shasta ATA-6",		/* Shasta ATA-6 (UDMA/133) */
-};
-
-/*
- * Extra registers, both 32-bit little-endian
- */
-#define IDE_TIMING_CONFIG	0x200
-#define IDE_INTERRUPT		0x300
-
-/* Kauai (U2) ATA has different register setup */
-#define IDE_KAUAI_PIO_CONFIG	0x200
-#define IDE_KAUAI_ULTRA_CONFIG	0x210
-#define IDE_KAUAI_POLL_CONFIG	0x220
-
-/*
- * Timing configuration register definitions
- */
-
-/* Number of IDE_SYSCLK_NS ticks, argument is in nanoseconds */
-#define SYSCLK_TICKS(t)		(((t) + IDE_SYSCLK_NS - 1) / IDE_SYSCLK_NS)
-#define SYSCLK_TICKS_66(t)	(((t) + IDE_SYSCLK_66_NS - 1) / IDE_SYSCLK_66_NS)
-#define IDE_SYSCLK_NS		30	/* 33Mhz cell */
-#define IDE_SYSCLK_66_NS	15	/* 66Mhz cell */
-
-/* 133Mhz cell, found in shasta.
- * See comments about 100 Mhz Uninorth 2...
- * Note that PIO_MASK and MDMA_MASK seem to overlap
- */
-#define TR_133_PIOREG_PIO_MASK		0xff000fff
-#define TR_133_PIOREG_MDMA_MASK		0x00fff800
-#define TR_133_UDMAREG_UDMA_MASK	0x0003ffff
-#define TR_133_UDMAREG_UDMA_EN		0x00000001
-
-/* 100Mhz cell, found in Uninorth 2. I don't have much infos about
- * this one yet, it appears as a pci device (106b/0033) on uninorth
- * internal PCI bus and it's clock is controlled like gem or fw. It
- * appears to be an evolution of keylargo ATA4 with a timing register
- * extended to 2 32bits registers and a similar DBDMA channel. Other
- * registers seem to exist but I can't tell much about them.
- * 
- * So far, I'm using pre-calculated tables for this extracted from
- * the values used by the MacOS X driver.
- * 
- * The "PIO" register controls PIO and MDMA timings, the "ULTRA"
- * register controls the UDMA timings. At least, it seems bit 0
- * of this one enables UDMA vs. MDMA, and bits 4..7 are the
- * cycle time in units of 10ns. Bits 8..15 are used by I don't
- * know their meaning yet
- */
-#define TR_100_PIOREG_PIO_MASK		0xff000fff
-#define TR_100_PIOREG_MDMA_MASK		0x00fff000
-#define TR_100_UDMAREG_UDMA_MASK	0x0000ffff
-#define TR_100_UDMAREG_UDMA_EN		0x00000001
-
-
-/* 66Mhz cell, found in KeyLargo. Can do ultra mode 0 to 2 on
- * 40 connector cable and to 4 on 80 connector one.
- * Clock unit is 15ns (66Mhz)
- * 
- * 3 Values can be programmed:
- *  - Write data setup, which appears to match the cycle time. They
- *    also call it DIOW setup.
- *  - Ready to pause time (from spec)
- *  - Address setup. That one is weird. I don't see where exactly
- *    it fits in UDMA cycles, I got it's name from an obscure piece
- *    of commented out code in Darwin. They leave it to 0, we do as
- *    well, despite a comment that would lead to think it has a
- *    min value of 45ns.
- * Apple also add 60ns to the write data setup (or cycle time ?) on
- * reads.
- */
-#define TR_66_UDMA_MASK			0xfff00000
-#define TR_66_UDMA_EN			0x00100000 /* Enable Ultra mode for DMA */
-#define TR_66_UDMA_ADDRSETUP_MASK	0xe0000000 /* Address setup */
-#define TR_66_UDMA_ADDRSETUP_SHIFT	29
-#define TR_66_UDMA_RDY2PAUS_MASK	0x1e000000 /* Ready 2 pause time */
-#define TR_66_UDMA_RDY2PAUS_SHIFT	25
-#define TR_66_UDMA_WRDATASETUP_MASK	0x01e00000 /* Write data setup time */
-#define TR_66_UDMA_WRDATASETUP_SHIFT	21
-#define TR_66_MDMA_MASK			0x000ffc00
-#define TR_66_MDMA_RECOVERY_MASK	0x000f8000
-#define TR_66_MDMA_RECOVERY_SHIFT	15
-#define TR_66_MDMA_ACCESS_MASK		0x00007c00
-#define TR_66_MDMA_ACCESS_SHIFT		10
-#define TR_66_PIO_MASK			0x000003ff
-#define TR_66_PIO_RECOVERY_MASK		0x000003e0
-#define TR_66_PIO_RECOVERY_SHIFT	5
-#define TR_66_PIO_ACCESS_MASK		0x0000001f
-#define TR_66_PIO_ACCESS_SHIFT		0
-
-/* 33Mhz cell, found in OHare, Heathrow (& Paddington) and KeyLargo
- * Can do pio & mdma modes, clock unit is 30ns (33Mhz)
- * 
- * The access time and recovery time can be programmed. Some older
- * Darwin code base limit OHare to 150ns cycle time. I decided to do
- * the same here fore safety against broken old hardware ;)
- * The HalfTick bit, when set, adds half a clock (15ns) to the access
- * time and removes one from recovery. It's not supported on KeyLargo
- * implementation afaik. The E bit appears to be set for PIO mode 0 and
- * is used to reach long timings used in this mode.
- */
-#define TR_33_MDMA_MASK			0x003ff800
-#define TR_33_MDMA_RECOVERY_MASK	0x001f0000
-#define TR_33_MDMA_RECOVERY_SHIFT	16
-#define TR_33_MDMA_ACCESS_MASK		0x0000f800
-#define TR_33_MDMA_ACCESS_SHIFT		11
-#define TR_33_MDMA_HALFTICK		0x00200000
-#define TR_33_PIO_MASK			0x000007ff
-#define TR_33_PIO_E			0x00000400
-#define TR_33_PIO_RECOVERY_MASK		0x000003e0
-#define TR_33_PIO_RECOVERY_SHIFT	5
-#define TR_33_PIO_ACCESS_MASK		0x0000001f
-#define TR_33_PIO_ACCESS_SHIFT		0
-
-/*
- * Interrupt register definitions
- */
-#define IDE_INTR_DMA			0x80000000
-#define IDE_INTR_DEVICE			0x40000000
-
-/*
- * FCR Register on Kauai. Not sure what bit 0x4 is  ...
- */
-#define KAUAI_FCR_UATA_MAGIC		0x00000004
-#define KAUAI_FCR_UATA_RESET_N		0x00000002
-#define KAUAI_FCR_UATA_ENABLE		0x00000001
-
-/* Rounded Multiword DMA timings
- * 
- * I gave up finding a generic formula for all controller
- * types and instead, built tables based on timing values
- * used by Apple in Darwin's implementation.
- */
-struct mdma_timings_t {
-	int	accessTime;
-	int	recoveryTime;
-	int	cycleTime;
-};
-
-struct mdma_timings_t mdma_timings_33[] =
-{
-    { 240, 240, 480 },
-    { 180, 180, 360 },
-    { 135, 135, 270 },
-    { 120, 120, 240 },
-    { 105, 105, 210 },
-    {  90,  90, 180 },
-    {  75,  75, 150 },
-    {  75,  45, 120 },
-    {   0,   0,   0 }
-};
-
-struct mdma_timings_t mdma_timings_33k[] =
-{
-    { 240, 240, 480 },
-    { 180, 180, 360 },
-    { 150, 150, 300 },
-    { 120, 120, 240 },
-    {  90, 120, 210 },
-    {  90,  90, 180 },
-    {  90,  60, 150 },
-    {  90,  30, 120 },
-    {   0,   0,   0 }
-};
-
-struct mdma_timings_t mdma_timings_66[] =
-{
-    { 240, 240, 480 },
-    { 180, 180, 360 },
-    { 135, 135, 270 },
-    { 120, 120, 240 },
-    { 105, 105, 210 },
-    {  90,  90, 180 },
-    {  90,  75, 165 },
-    {  75,  45, 120 },
-    {   0,   0,   0 }
-};
-
-/* KeyLargo ATA-4 Ultra DMA timings (rounded) */
-struct {
-	int	addrSetup; /* ??? */
-	int	rdy2pause;
-	int	wrDataSetup;
-} kl66_udma_timings[] =
-{
-    {   0, 180,  120 },	/* Mode 0 */
-    {   0, 150,  90 },	/*      1 */
-    {   0, 120,  60 },	/*      2 */
-    {   0, 90,   45 },	/*      3 */
-    {   0, 90,   30 }	/*      4 */
-};
-
-/* UniNorth 2 ATA/100 timings */
-struct kauai_timing {
-	int	cycle_time;
-	u32	timing_reg;
-};
-
-static struct kauai_timing	kauai_pio_timings[] =
-{
-	{ 930	, 0x08000fff },
-	{ 600	, 0x08000a92 },
-	{ 383	, 0x0800060f },
-	{ 360	, 0x08000492 },
-	{ 330	, 0x0800048f },
-	{ 300	, 0x080003cf },
-	{ 270	, 0x080003cc },
-	{ 240	, 0x0800038b },
-	{ 239	, 0x0800030c },
-	{ 180	, 0x05000249 },
-	{ 120	, 0x04000148 },
-	{ 0	, 0 },
-};
-
-static struct kauai_timing	kauai_mdma_timings[] =
-{
-	{ 1260	, 0x00fff000 },
-	{ 480	, 0x00618000 },
-	{ 360	, 0x00492000 },
-	{ 270	, 0x0038e000 },
-	{ 240	, 0x0030c000 },
-	{ 210	, 0x002cb000 },
-	{ 180	, 0x00249000 },
-	{ 150	, 0x00209000 },
-	{ 120	, 0x00148000 },
-	{ 0	, 0 },
-};
-
-static struct kauai_timing	kauai_udma_timings[] =
-{
-	{ 120	, 0x000070c0 },
-	{ 90	, 0x00005d80 },
-	{ 60	, 0x00004a60 },
-	{ 45	, 0x00003a50 },
-	{ 30	, 0x00002a30 },
-	{ 20	, 0x00002921 },
-	{ 0	, 0 },
-};
-
-static struct kauai_timing	shasta_pio_timings[] =
-{
-	{ 930	, 0x08000fff },
-	{ 600	, 0x0A000c97 },
-	{ 383	, 0x07000712 },
-	{ 360	, 0x040003cd },
-	{ 330	, 0x040003cd },
-	{ 300	, 0x040003cd },
-	{ 270	, 0x040003cd },
-	{ 240	, 0x040003cd },
-	{ 239	, 0x040003cd },
-	{ 180	, 0x0400028b },
-	{ 120	, 0x0400010a },
-	{ 0	, 0 },
-};
-
-static struct kauai_timing	shasta_mdma_timings[] =
-{
-	{ 1260	, 0x00fff000 },
-	{ 480	, 0x00820800 },
-	{ 360	, 0x00820800 },
-	{ 270	, 0x00820800 },
-	{ 240	, 0x00820800 },
-	{ 210	, 0x00820800 },
-	{ 180	, 0x00820800 },
-	{ 150	, 0x0028b000 },
-	{ 120	, 0x001ca000 },
-	{ 0	, 0 },
-};
-
-static struct kauai_timing	shasta_udma133_timings[] =
-{
-	{ 120   , 0x00035901, },
-	{ 90    , 0x000348b1, },
-	{ 60    , 0x00033881, },
-	{ 45    , 0x00033861, },
-	{ 30    , 0x00033841, },
-	{ 20    , 0x00033031, },
-	{ 15    , 0x00033021, },
-	{ 0	, 0 },
-};
-
-
-static inline u32
-kauai_lookup_timing(struct kauai_timing* table, int cycle_time)
-{
-	int i;
-	
-	for (i=0; table[i].cycle_time; i++)
-		if (cycle_time > table[i+1].cycle_time)
-			return table[i].timing_reg;
-	BUG();
-	return 0;
-}
-
-/* allow up to 256 DBDMA commands per xfer */
-#define MAX_DCMDS		256
-
-/* 
- * Wait 1s for disk to answer on IDE bus after a hard reset
- * of the device (via GPIO/FCR).
- * 
- * Some devices seem to "pollute" the bus even after dropping
- * the BSY bit (typically some combo drives slave on the UDMA
- * bus) after a hard reset. Since we hard reset all drives on
- * KeyLargo ATA66, we have to keep that delay around. I may end
- * up not hard resetting anymore on these and keep the delay only
- * for older interfaces instead (we have to reset when coming
- * from MacOS...) --BenH. 
- */
-#define IDE_WAKEUP_DELAY	(1*HZ)
-
-static int pmac_ide_init_dma(ide_hwif_t *, const struct ide_port_info *);
-
-#define PMAC_IDE_REG(x) \
-	((void __iomem *)((drive)->hwif->io_ports.data_addr + (x)))
-
-/*
- * Apply the timings of the proper unit (master/slave) to the shared
- * timing register when selecting that unit. This version is for
- * ASICs with a single timing register
- */
-static void pmac_ide_apply_timings(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-
-	if (drive->dn & 1)
-		writel(pmif->timings[1], PMAC_IDE_REG(IDE_TIMING_CONFIG));
-	else
-		writel(pmif->timings[0], PMAC_IDE_REG(IDE_TIMING_CONFIG));
-	(void)readl(PMAC_IDE_REG(IDE_TIMING_CONFIG));
-}
-
-/*
- * Apply the timings of the proper unit (master/slave) to the shared
- * timing register when selecting that unit. This version is for
- * ASICs with a dual timing register (Kauai)
- */
-static void pmac_ide_kauai_apply_timings(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-
-	if (drive->dn & 1) {
-		writel(pmif->timings[1], PMAC_IDE_REG(IDE_KAUAI_PIO_CONFIG));
-		writel(pmif->timings[3], PMAC_IDE_REG(IDE_KAUAI_ULTRA_CONFIG));
-	} else {
-		writel(pmif->timings[0], PMAC_IDE_REG(IDE_KAUAI_PIO_CONFIG));
-		writel(pmif->timings[2], PMAC_IDE_REG(IDE_KAUAI_ULTRA_CONFIG));
-	}
-	(void)readl(PMAC_IDE_REG(IDE_KAUAI_PIO_CONFIG));
-}
-
-/*
- * Force an update of controller timing values for a given drive
- */
-static void
-pmac_ide_do_update_timings(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-
-	if (pmif->kind == controller_sh_ata6 ||
-	    pmif->kind == controller_un_ata6 ||
-	    pmif->kind == controller_k2_ata6)
-		pmac_ide_kauai_apply_timings(drive);
-	else
-		pmac_ide_apply_timings(drive);
-}
-
-static void pmac_dev_select(ide_drive_t *drive)
-{
-	pmac_ide_apply_timings(drive);
-
-	writeb(drive->select | ATA_DEVICE_OBS,
-	       (void __iomem *)drive->hwif->io_ports.device_addr);
-}
-
-static void pmac_kauai_dev_select(ide_drive_t *drive)
-{
-	pmac_ide_kauai_apply_timings(drive);
-
-	writeb(drive->select | ATA_DEVICE_OBS,
-	       (void __iomem *)drive->hwif->io_ports.device_addr);
-}
-
-static void pmac_exec_command(ide_hwif_t *hwif, u8 cmd)
-{
-	writeb(cmd, (void __iomem *)hwif->io_ports.command_addr);
-	(void)readl((void __iomem *)(hwif->io_ports.data_addr
-				     + IDE_TIMING_CONFIG));
-}
-
-static void pmac_write_devctl(ide_hwif_t *hwif, u8 ctl)
-{
-	writeb(ctl, (void __iomem *)hwif->io_ports.ctl_addr);
-	(void)readl((void __iomem *)(hwif->io_ports.data_addr
-				     + IDE_TIMING_CONFIG));
-}
-
-/*
- * Old tuning functions (called on hdparm -p), sets up drive PIO timings
- */
-static void pmac_ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-	struct ide_timing *tim = ide_timing_find_mode(XFER_PIO_0 + pio);
-	u32 *timings, t;
-	unsigned accessTicks, recTicks;
-	unsigned accessTime, recTime;
-	unsigned int cycle_time;
-
-	/* which drive is it ? */
-	timings = &pmif->timings[drive->dn & 1];
-	t = *timings;
-
-	cycle_time = ide_pio_cycle_time(drive, pio);
-
-	switch (pmif->kind) {
-	case controller_sh_ata6: {
-		/* 133Mhz cell */
-		u32 tr = kauai_lookup_timing(shasta_pio_timings, cycle_time);
-		t = (t & ~TR_133_PIOREG_PIO_MASK) | tr;
-		break;
-		}
-	case controller_un_ata6:
-	case controller_k2_ata6: {
-		/* 100Mhz cell */
-		u32 tr = kauai_lookup_timing(kauai_pio_timings, cycle_time);
-		t = (t & ~TR_100_PIOREG_PIO_MASK) | tr;
-		break;
-		}
-	case controller_kl_ata4:
-		/* 66Mhz cell */
-		recTime = cycle_time - tim->active - tim->setup;
-		recTime = max(recTime, 150U);
-		accessTime = tim->active;
-		accessTime = max(accessTime, 150U);
-		accessTicks = SYSCLK_TICKS_66(accessTime);
-		accessTicks = min(accessTicks, 0x1fU);
-		recTicks = SYSCLK_TICKS_66(recTime);
-		recTicks = min(recTicks, 0x1fU);
-		t = (t & ~TR_66_PIO_MASK) |
-			(accessTicks << TR_66_PIO_ACCESS_SHIFT) |
-			(recTicks << TR_66_PIO_RECOVERY_SHIFT);
-		break;
-	default: {
-		/* 33Mhz cell */
-		int ebit = 0;
-		recTime = cycle_time - tim->active - tim->setup;
-		recTime = max(recTime, 150U);
-		accessTime = tim->active;
-		accessTime = max(accessTime, 150U);
-		accessTicks = SYSCLK_TICKS(accessTime);
-		accessTicks = min(accessTicks, 0x1fU);
-		accessTicks = max(accessTicks, 4U);
-		recTicks = SYSCLK_TICKS(recTime);
-		recTicks = min(recTicks, 0x1fU);
-		recTicks = max(recTicks, 5U) - 4;
-		if (recTicks > 9) {
-			recTicks--; /* guess, but it's only for PIO0, so... */
-			ebit = 1;
-		}
-		t = (t & ~TR_33_PIO_MASK) |
-				(accessTicks << TR_33_PIO_ACCESS_SHIFT) |
-				(recTicks << TR_33_PIO_RECOVERY_SHIFT);
-		if (ebit)
-			t |= TR_33_PIO_E;
-		break;
-		}
-	}
-
-#ifdef IDE_PMAC_DEBUG
-	printk(KERN_ERR "%s: Set PIO timing for mode %d, reg: 0x%08x\n",
-		drive->name, pio,  *timings);
-#endif	
-
-	*timings = t;
-	pmac_ide_do_update_timings(drive);
-}
-
-/*
- * Calculate KeyLargo ATA/66 UDMA timings
- */
-static int
-set_timings_udma_ata4(u32 *timings, u8 speed)
-{
-	unsigned rdyToPauseTicks, wrDataSetupTicks, addrTicks;
-
-	if (speed > XFER_UDMA_4)
-		return 1;
-
-	rdyToPauseTicks = SYSCLK_TICKS_66(kl66_udma_timings[speed & 0xf].rdy2pause);
-	wrDataSetupTicks = SYSCLK_TICKS_66(kl66_udma_timings[speed & 0xf].wrDataSetup);
-	addrTicks = SYSCLK_TICKS_66(kl66_udma_timings[speed & 0xf].addrSetup);
-
-	*timings = ((*timings) & ~(TR_66_UDMA_MASK | TR_66_MDMA_MASK)) |
-			(wrDataSetupTicks << TR_66_UDMA_WRDATASETUP_SHIFT) | 
-			(rdyToPauseTicks << TR_66_UDMA_RDY2PAUS_SHIFT) |
-			(addrTicks <<TR_66_UDMA_ADDRSETUP_SHIFT) |
-			TR_66_UDMA_EN;
-#ifdef IDE_PMAC_DEBUG
-	printk(KERN_ERR "ide_pmac: Set UDMA timing for mode %d, reg: 0x%08x\n",
-		speed & 0xf,  *timings);
-#endif	
-
-	return 0;
-}
-
-/*
- * Calculate Kauai ATA/100 UDMA timings
- */
-static int
-set_timings_udma_ata6(u32 *pio_timings, u32 *ultra_timings, u8 speed)
-{
-	struct ide_timing *t = ide_timing_find_mode(speed);
-	u32 tr;
-
-	if (speed > XFER_UDMA_5 || t == NULL)
-		return 1;
-	tr = kauai_lookup_timing(kauai_udma_timings, (int)t->udma);
-	*ultra_timings = ((*ultra_timings) & ~TR_100_UDMAREG_UDMA_MASK) | tr;
-	*ultra_timings = (*ultra_timings) | TR_100_UDMAREG_UDMA_EN;
-
-	return 0;
-}
-
-/*
- * Calculate Shasta ATA/133 UDMA timings
- */
-static int
-set_timings_udma_shasta(u32 *pio_timings, u32 *ultra_timings, u8 speed)
-{
-	struct ide_timing *t = ide_timing_find_mode(speed);
-	u32 tr;
-
-	if (speed > XFER_UDMA_6 || t == NULL)
-		return 1;
-	tr = kauai_lookup_timing(shasta_udma133_timings, (int)t->udma);
-	*ultra_timings = ((*ultra_timings) & ~TR_133_UDMAREG_UDMA_MASK) | tr;
-	*ultra_timings = (*ultra_timings) | TR_133_UDMAREG_UDMA_EN;
-
-	return 0;
-}
-
-/*
- * Calculate MDMA timings for all cells
- */
-static void
-set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2,
-		 	u8 speed)
-{
-	u16 *id = drive->id;
-	int cycleTime, accessTime = 0, recTime = 0;
-	unsigned accessTicks, recTicks;
-	struct mdma_timings_t* tm = NULL;
-	int i;
-
-	/* Get default cycle time for mode */
-	switch(speed & 0xf) {
-		case 0: cycleTime = 480; break;
-		case 1: cycleTime = 150; break;
-		case 2: cycleTime = 120; break;
-		default:
-			BUG();
-			break;
-	}
-
-	/* Check if drive provides explicit DMA cycle time */
-	if ((id[ATA_ID_FIELD_VALID] & 2) && id[ATA_ID_EIDE_DMA_TIME])
-		cycleTime = max_t(int, id[ATA_ID_EIDE_DMA_TIME], cycleTime);
-
-	/* OHare limits according to some old Apple sources */	
-	if ((intf_type == controller_ohare) && (cycleTime < 150))
-		cycleTime = 150;
-	/* Get the proper timing array for this controller */
-	switch(intf_type) {
-	        case controller_sh_ata6:
-		case controller_un_ata6:
-		case controller_k2_ata6:
-			break;
-		case controller_kl_ata4:
-			tm = mdma_timings_66;
-			break;
-		case controller_kl_ata3:
-			tm = mdma_timings_33k;
-			break;
-		default:
-			tm = mdma_timings_33;
-			break;
-	}
-	if (tm != NULL) {
-		/* Lookup matching access & recovery times */
-		i = -1;
-		for (;;) {
-			if (tm[i+1].cycleTime < cycleTime)
-				break;
-			i++;
-		}
-		cycleTime = tm[i].cycleTime;
-		accessTime = tm[i].accessTime;
-		recTime = tm[i].recoveryTime;
-
-#ifdef IDE_PMAC_DEBUG
-		printk(KERN_ERR "%s: MDMA, cycleTime: %d, accessTime: %d, recTime: %d\n",
-			drive->name, cycleTime, accessTime, recTime);
-#endif
-	}
-	switch(intf_type) {
-	case controller_sh_ata6: {
-		/* 133Mhz cell */
-		u32 tr = kauai_lookup_timing(shasta_mdma_timings, cycleTime);
-		*timings = ((*timings) & ~TR_133_PIOREG_MDMA_MASK) | tr;
-		*timings2 = (*timings2) & ~TR_133_UDMAREG_UDMA_EN;
-		}
-		break;
-	case controller_un_ata6:
-	case controller_k2_ata6: {
-		/* 100Mhz cell */
-		u32 tr = kauai_lookup_timing(kauai_mdma_timings, cycleTime);
-		*timings = ((*timings) & ~TR_100_PIOREG_MDMA_MASK) | tr;
-		*timings2 = (*timings2) & ~TR_100_UDMAREG_UDMA_EN;
-		}
-		break;
-	case controller_kl_ata4:
-		/* 66Mhz cell */
-		accessTicks = SYSCLK_TICKS_66(accessTime);
-		accessTicks = min(accessTicks, 0x1fU);
-		accessTicks = max(accessTicks, 0x1U);
-		recTicks = SYSCLK_TICKS_66(recTime);
-		recTicks = min(recTicks, 0x1fU);
-		recTicks = max(recTicks, 0x3U);
-		/* Clear out mdma bits and disable udma */
-		*timings = ((*timings) & ~(TR_66_MDMA_MASK | TR_66_UDMA_MASK)) |
-			(accessTicks << TR_66_MDMA_ACCESS_SHIFT) |
-			(recTicks << TR_66_MDMA_RECOVERY_SHIFT);
-		break;
-	case controller_kl_ata3:
-		/* 33Mhz cell on KeyLargo */
-		accessTicks = SYSCLK_TICKS(accessTime);
-		accessTicks = max(accessTicks, 1U);
-		accessTicks = min(accessTicks, 0x1fU);
-		accessTime = accessTicks * IDE_SYSCLK_NS;
-		recTicks = SYSCLK_TICKS(recTime);
-		recTicks = max(recTicks, 1U);
-		recTicks = min(recTicks, 0x1fU);
-		*timings = ((*timings) & ~TR_33_MDMA_MASK) |
-				(accessTicks << TR_33_MDMA_ACCESS_SHIFT) |
-				(recTicks << TR_33_MDMA_RECOVERY_SHIFT);
-		break;
-	default: {
-		/* 33Mhz cell on others */
-		int halfTick = 0;
-		int origAccessTime = accessTime;
-		int origRecTime = recTime;
-		
-		accessTicks = SYSCLK_TICKS(accessTime);
-		accessTicks = max(accessTicks, 1U);
-		accessTicks = min(accessTicks, 0x1fU);
-		accessTime = accessTicks * IDE_SYSCLK_NS;
-		recTicks = SYSCLK_TICKS(recTime);
-		recTicks = max(recTicks, 2U) - 1;
-		recTicks = min(recTicks, 0x1fU);
-		recTime = (recTicks + 1) * IDE_SYSCLK_NS;
-		if ((accessTicks > 1) &&
-		    ((accessTime - IDE_SYSCLK_NS/2) >= origAccessTime) &&
-		    ((recTime - IDE_SYSCLK_NS/2) >= origRecTime)) {
-            		halfTick = 1;
-			accessTicks--;
-		}
-		*timings = ((*timings) & ~TR_33_MDMA_MASK) |
-				(accessTicks << TR_33_MDMA_ACCESS_SHIFT) |
-				(recTicks << TR_33_MDMA_RECOVERY_SHIFT);
-		if (halfTick)
-			*timings |= TR_33_MDMA_HALFTICK;
-		}
-	}
-#ifdef IDE_PMAC_DEBUG
-	printk(KERN_ERR "%s: Set MDMA timing for mode %d, reg: 0x%08x\n",
-		drive->name, speed & 0xf,  *timings);
-#endif	
-}
-
-static void pmac_ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	int ret = 0;
-	u32 *timings, *timings2, tl[2];
-	u8 unit = drive->dn & 1;
-	const u8 speed = drive->dma_mode;
-
-	timings = &pmif->timings[unit];
-	timings2 = &pmif->timings[unit+2];
-
-	/* Copy timings to local image */
-	tl[0] = *timings;
-	tl[1] = *timings2;
-
-	if (speed >= XFER_UDMA_0) {
-		if (pmif->kind == controller_kl_ata4)
-			ret = set_timings_udma_ata4(&tl[0], speed);
-		else if (pmif->kind == controller_un_ata6
-			 || pmif->kind == controller_k2_ata6)
-			ret = set_timings_udma_ata6(&tl[0], &tl[1], speed);
-		else if (pmif->kind == controller_sh_ata6)
-			ret = set_timings_udma_shasta(&tl[0], &tl[1], speed);
-		else
-			ret = -1;
-	} else
-		set_timings_mdma(drive, pmif->kind, &tl[0], &tl[1], speed);
-
-	if (ret)
-		return;
-
-	/* Apply timings to controller */
-	*timings = tl[0];
-	*timings2 = tl[1];
-
-	pmac_ide_do_update_timings(drive);	
-}
-
-/*
- * Blast some well known "safe" values to the timing registers at init or
- * wakeup from sleep time, before we do real calculation
- */
-static void
-sanitize_timings(pmac_ide_hwif_t *pmif)
-{
-	unsigned int value, value2 = 0;
-	
-	switch(pmif->kind) {
-		case controller_sh_ata6:
-			value = 0x0a820c97;
-			value2 = 0x00033031;
-			break;
-		case controller_un_ata6:
-		case controller_k2_ata6:
-			value = 0x08618a92;
-			value2 = 0x00002921;
-			break;
-		case controller_kl_ata4:
-			value = 0x0008438c;
-			break;
-		case controller_kl_ata3:
-			value = 0x00084526;
-			break;
-		case controller_heathrow:
-		case controller_ohare:
-		default:
-			value = 0x00074526;
-			break;
-	}
-	pmif->timings[0] = pmif->timings[1] = value;
-	pmif->timings[2] = pmif->timings[3] = value2;
-}
-
-static int on_media_bay(pmac_ide_hwif_t *pmif)
-{
-	return pmif->mdev && pmif->mdev->media_bay != NULL;
-}
-
-/* Suspend call back, should be called after the child devices
- * have actually been suspended
- */
-static int pmac_ide_do_suspend(pmac_ide_hwif_t *pmif)
-{
-	/* We clear the timings */
-	pmif->timings[0] = 0;
-	pmif->timings[1] = 0;
-	
-	disable_irq(pmif->irq);
-
-	/* The media bay will handle itself just fine */
-	if (on_media_bay(pmif))
-		return 0;
-	
-	/* Kauai has bus control FCRs directly here */
-	if (pmif->kauai_fcr) {
-		u32 fcr = readl(pmif->kauai_fcr);
-		fcr &= ~(KAUAI_FCR_UATA_RESET_N | KAUAI_FCR_UATA_ENABLE);
-		writel(fcr, pmif->kauai_fcr);
-	}
-
-	/* Disable the bus on older machines and the cell on kauai */
-	ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, pmif->node, pmif->aapl_bus_id,
-			    0);
-
-	return 0;
-}
-
-/* Resume call back, should be called before the child devices
- * are resumed
- */
-static int pmac_ide_do_resume(pmac_ide_hwif_t *pmif)
-{
-	/* Hard reset & re-enable controller (do we really need to reset ? -BenH) */
-	if (!on_media_bay(pmif)) {
-		ppc_md.feature_call(PMAC_FTR_IDE_RESET, pmif->node, pmif->aapl_bus_id, 1);
-		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, pmif->node, pmif->aapl_bus_id, 1);
-		msleep(10);
-		ppc_md.feature_call(PMAC_FTR_IDE_RESET, pmif->node, pmif->aapl_bus_id, 0);
-
-		/* Kauai has it different */
-		if (pmif->kauai_fcr) {
-			u32 fcr = readl(pmif->kauai_fcr);
-			fcr |= KAUAI_FCR_UATA_RESET_N | KAUAI_FCR_UATA_ENABLE;
-			writel(fcr, pmif->kauai_fcr);
-		}
-
-		msleep(jiffies_to_msecs(IDE_WAKEUP_DELAY));
-	}
-
-	/* Sanitize drive timings */
-	sanitize_timings(pmif);
-
-	enable_irq(pmif->irq);
-
-	return 0;
-}
-
-static u8 pmac_ide_cable_detect(ide_hwif_t *hwif)
-{
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	struct device_node *np = pmif->node;
-	const char *cable = of_get_property(np, "cable-type", NULL);
-	struct device_node *root = of_find_node_by_path("/");
-	const char *model = of_get_property(root, "model", NULL);
-
-	of_node_put(root);
-	/* Get cable type from device-tree. */
-	if (cable && !strncmp(cable, "80-", 3)) {
-		/* Some drives fail to detect 80c cable in PowerBook */
-		/* These machine use proprietary short IDE cable anyway */
-		if (!strncmp(model, "PowerBook", 9))
-			return ATA_CBL_PATA40_SHORT;
-		else
-			return ATA_CBL_PATA80;
-	}
-
-	/*
-	 * G5's seem to have incorrect cable type in device-tree.
-	 * Let's assume they have a 80 conductor cable, this seem
-	 * to be always the case unless the user mucked around.
-	 */
-	if (of_device_is_compatible(np, "K2-UATA") ||
-	    of_device_is_compatible(np, "shasta-ata"))
-		return ATA_CBL_PATA80;
-
-	return ATA_CBL_PATA40;
-}
-
-static void pmac_ide_init_dev(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-
-	if (on_media_bay(pmif)) {
-		if (check_media_bay(pmif->mdev->media_bay) == MB_CD) {
-			drive->dev_flags &= ~IDE_DFLAG_NOPROBE;
-			return;
-		}
-		drive->dev_flags |= IDE_DFLAG_NOPROBE;
-	}
-}
-
-static const struct ide_tp_ops pmac_tp_ops = {
-	.exec_command		= pmac_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= pmac_write_devctl,
-
-	.dev_select		= pmac_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= ide_input_data,
-	.output_data		= ide_output_data,
-};
-
-static const struct ide_tp_ops pmac_ata6_tp_ops = {
-	.exec_command		= pmac_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= pmac_write_devctl,
-
-	.dev_select		= pmac_kauai_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= ide_input_data,
-	.output_data		= ide_output_data,
-};
-
-static const struct ide_port_ops pmac_ide_ata4_port_ops = {
-	.init_dev		= pmac_ide_init_dev,
-	.set_pio_mode		= pmac_ide_set_pio_mode,
-	.set_dma_mode		= pmac_ide_set_dma_mode,
-	.cable_detect		= pmac_ide_cable_detect,
-};
-
-static const struct ide_port_ops pmac_ide_port_ops = {
-	.init_dev		= pmac_ide_init_dev,
-	.set_pio_mode		= pmac_ide_set_pio_mode,
-	.set_dma_mode		= pmac_ide_set_dma_mode,
-};
-
-static const struct ide_dma_ops pmac_dma_ops;
-
-static const struct ide_port_info pmac_port_info = {
-	.name			= DRV_NAME,
-	.init_dma		= pmac_ide_init_dma,
-	.chipset		= ide_pmac,
-	.tp_ops			= &pmac_tp_ops,
-	.port_ops		= &pmac_ide_port_ops,
-	.dma_ops		= &pmac_dma_ops,
-	.host_flags		= IDE_HFLAG_SET_PIO_MODE_KEEP_DMA |
-				  IDE_HFLAG_POST_SET_MODE |
-				  IDE_HFLAG_MMIO |
-				  IDE_HFLAG_UNMASK_IRQS,
-	.pio_mask		= ATA_PIO4,
-	.mwdma_mask		= ATA_MWDMA2,
-};
-
-/*
- * Setup, register & probe an IDE channel driven by this driver, this is
- * called by one of the 2 probe functions (macio or PCI).
- */
-static int pmac_ide_setup_device(pmac_ide_hwif_t *pmif, struct ide_hw *hw)
-{
-	struct device_node *np = pmif->node;
-	const int *bidp;
-	struct ide_host *host;
-	struct ide_hw *hws[] = { hw };
-	struct ide_port_info d = pmac_port_info;
-	int rc;
-
-	pmif->broken_dma = pmif->broken_dma_warn = 0;
-	if (of_device_is_compatible(np, "shasta-ata")) {
-		pmif->kind = controller_sh_ata6;
-		d.tp_ops = &pmac_ata6_tp_ops;
-		d.port_ops = &pmac_ide_ata4_port_ops;
-		d.udma_mask = ATA_UDMA6;
-	} else if (of_device_is_compatible(np, "kauai-ata")) {
-		pmif->kind = controller_un_ata6;
-		d.tp_ops = &pmac_ata6_tp_ops;
-		d.port_ops = &pmac_ide_ata4_port_ops;
-		d.udma_mask = ATA_UDMA5;
-	} else if (of_device_is_compatible(np, "K2-UATA")) {
-		pmif->kind = controller_k2_ata6;
-		d.tp_ops = &pmac_ata6_tp_ops;
-		d.port_ops = &pmac_ide_ata4_port_ops;
-		d.udma_mask = ATA_UDMA5;
-	} else if (of_device_is_compatible(np, "keylargo-ata")) {
-		if (of_node_name_eq(np, "ata-4")) {
-			pmif->kind = controller_kl_ata4;
-			d.port_ops = &pmac_ide_ata4_port_ops;
-			d.udma_mask = ATA_UDMA4;
-		} else
-			pmif->kind = controller_kl_ata3;
-	} else if (of_device_is_compatible(np, "heathrow-ata")) {
-		pmif->kind = controller_heathrow;
-	} else {
-		pmif->kind = controller_ohare;
-		pmif->broken_dma = 1;
-	}
-
-	bidp = of_get_property(np, "AAPL,bus-id", NULL);
-	pmif->aapl_bus_id =  bidp ? *bidp : 0;
-
-	/* On Kauai-type controllers, we make sure the FCR is correct */
-	if (pmif->kauai_fcr)
-		writel(KAUAI_FCR_UATA_MAGIC |
-		       KAUAI_FCR_UATA_RESET_N |
-		       KAUAI_FCR_UATA_ENABLE, pmif->kauai_fcr);
-	
-	/* Make sure we have sane timings */
-	sanitize_timings(pmif);
-
-	/* If we are on a media bay, wait for it to settle and lock it */
-	if (pmif->mdev)
-		lock_media_bay(pmif->mdev->media_bay);
-
-	host = ide_host_alloc(&d, hws, 1);
-	if (host == NULL) {
-		rc = -ENOMEM;
-		goto bail;
-	}
-	pmif->hwif = host->ports[0];
-
-	if (on_media_bay(pmif)) {
-		/* Fixup bus ID for media bay */
-		if (!bidp)
-			pmif->aapl_bus_id = 1;
-	} else if (pmif->kind == controller_ohare) {
-		/* The code below is having trouble on some ohare machines
-		 * (timing related ?). Until I can put my hand on one of these
-		 * units, I keep the old way
-		 */
-		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, np, 0, 1);
-	} else {
- 		/* This is necessary to enable IDE when net-booting */
-		ppc_md.feature_call(PMAC_FTR_IDE_RESET, np, pmif->aapl_bus_id, 1);
-		ppc_md.feature_call(PMAC_FTR_IDE_ENABLE, np, pmif->aapl_bus_id, 1);
-		msleep(10);
-		ppc_md.feature_call(PMAC_FTR_IDE_RESET, np, pmif->aapl_bus_id, 0);
-		msleep(jiffies_to_msecs(IDE_WAKEUP_DELAY));
-	}
-
-	printk(KERN_INFO DRV_NAME ": Found Apple %s controller (%s), "
-	       "bus ID %d%s, irq %d\n", model_name[pmif->kind],
-	       pmif->mdev ? "macio" : "PCI", pmif->aapl_bus_id,
-	       on_media_bay(pmif) ? " (mediabay)" : "", hw->irq);
-
-	rc = ide_host_register(host, &d, hws);
-	if (rc)
-		pmif->hwif = NULL;
-
-	if (pmif->mdev)
-		unlock_media_bay(pmif->mdev->media_bay);
-
- bail:
-	if (rc && host)
-		ide_host_free(host);
-	return rc;
-}
-
-static void pmac_ide_init_ports(struct ide_hw *hw, unsigned long base)
-{
-	int i;
-
-	for (i = 0; i < 8; ++i)
-		hw->io_ports_array[i] = base + i * 0x10;
-
-	hw->io_ports.ctl_addr = base + 0x160;
-}
-
-/*
- * Attach to a macio probed interface
- */
-static int pmac_ide_macio_attach(struct macio_dev *mdev,
-				 const struct of_device_id *match)
-{
-	void __iomem *base;
-	unsigned long regbase;
-	pmac_ide_hwif_t *pmif;
-	int irq, rc;
-	struct ide_hw hw;
-
-	pmif = kzalloc(sizeof(*pmif), GFP_KERNEL);
-	if (pmif == NULL)
-		return -ENOMEM;
-
-	if (macio_resource_count(mdev) == 0) {
-		printk(KERN_WARNING "ide-pmac: no address for %pOF\n",
-				    mdev->ofdev.dev.of_node);
-		rc = -ENXIO;
-		goto out_free_pmif;
-	}
-
-	/* Request memory resource for IO ports */
-	if (macio_request_resource(mdev, 0, "ide-pmac (ports)")) {
-		printk(KERN_ERR "ide-pmac: can't request MMIO resource for "
-				"%pOF!\n", mdev->ofdev.dev.of_node);
-		rc = -EBUSY;
-		goto out_free_pmif;
-	}
-			
-	/* XXX This is bogus. Should be fixed in the registry by checking
-	 * the kind of host interrupt controller, a bit like gatwick
-	 * fixes in irq.c. That works well enough for the single case
-	 * where that happens though...
-	 */
-	if (macio_irq_count(mdev) == 0) {
-		printk(KERN_WARNING "ide-pmac: no intrs for device %pOF, using "
-				    "13\n", mdev->ofdev.dev.of_node);
-		irq = irq_create_mapping(NULL, 13);
-	} else
-		irq = macio_irq(mdev, 0);
-
-	base = ioremap(macio_resource_start(mdev, 0), 0x400);
-	regbase = (unsigned long) base;
-
-	pmif->mdev = mdev;
-	pmif->node = mdev->ofdev.dev.of_node;
-	pmif->regbase = regbase;
-	pmif->irq = irq;
-	pmif->kauai_fcr = NULL;
-
-	if (macio_resource_count(mdev) >= 2) {
-		if (macio_request_resource(mdev, 1, "ide-pmac (dma)"))
-			printk(KERN_WARNING "ide-pmac: can't request DMA "
-					    "resource for %pOF!\n",
-					    mdev->ofdev.dev.of_node);
-		else
-			pmif->dma_regs = ioremap(macio_resource_start(mdev, 1), 0x1000);
-	} else
-		pmif->dma_regs = NULL;
-
-	dev_set_drvdata(&mdev->ofdev.dev, pmif);
-
-	memset(&hw, 0, sizeof(hw));
-	pmac_ide_init_ports(&hw, pmif->regbase);
-	hw.irq = irq;
-	hw.dev = &mdev->bus->pdev->dev;
-	hw.parent = &mdev->ofdev.dev;
-
-	rc = pmac_ide_setup_device(pmif, &hw);
-	if (rc != 0) {
-		/* The inteface is released to the common IDE layer */
-		dev_set_drvdata(&mdev->ofdev.dev, NULL);
-		iounmap(base);
-		if (pmif->dma_regs) {
-			iounmap(pmif->dma_regs);
-			macio_release_resource(mdev, 1);
-		}
-		macio_release_resource(mdev, 0);
-		kfree(pmif);
-	}
-
-	return rc;
-
-out_free_pmif:
-	kfree(pmif);
-	return rc;
-}
-
-static int
-pmac_ide_macio_suspend(struct macio_dev *mdev, pm_message_t mesg)
-{
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(&mdev->ofdev.dev);
-	int rc = 0;
-
-	if (mesg.event != mdev->ofdev.dev.power.power_state.event
-			&& (mesg.event & PM_EVENT_SLEEP)) {
-		rc = pmac_ide_do_suspend(pmif);
-		if (rc == 0)
-			mdev->ofdev.dev.power.power_state = mesg;
-	}
-
-	return rc;
-}
-
-static int
-pmac_ide_macio_resume(struct macio_dev *mdev)
-{
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(&mdev->ofdev.dev);
-	int rc = 0;
-
-	if (mdev->ofdev.dev.power.power_state.event != PM_EVENT_ON) {
-		rc = pmac_ide_do_resume(pmif);
-		if (rc == 0)
-			mdev->ofdev.dev.power.power_state = PMSG_ON;
-	}
-
-	return rc;
-}
-
-/*
- * Attach to a PCI probed interface
- */
-static int pmac_ide_pci_attach(struct pci_dev *pdev,
-			       const struct pci_device_id *id)
-{
-	struct device_node *np;
-	pmac_ide_hwif_t *pmif;
-	void __iomem *base;
-	unsigned long rbase, rlen;
-	int rc;
-	struct ide_hw hw;
-
-	np = pci_device_to_OF_node(pdev);
-	if (np == NULL) {
-		printk(KERN_ERR "ide-pmac: cannot find MacIO node for Kauai ATA interface\n");
-		return -ENODEV;
-	}
-
-	pmif = kzalloc(sizeof(*pmif), GFP_KERNEL);
-	if (pmif == NULL)
-		return -ENOMEM;
-
-	if (pci_enable_device(pdev)) {
-		printk(KERN_WARNING "ide-pmac: Can't enable PCI device for "
-				    "%pOF\n", np);
-		rc = -ENXIO;
-		goto out_free_pmif;
-	}
-	pci_set_master(pdev);
-			
-	if (pci_request_regions(pdev, "Kauai ATA")) {
-		printk(KERN_ERR "ide-pmac: Cannot obtain PCI resources for "
-				"%pOF\n", np);
-		rc = -ENXIO;
-		goto out_free_pmif;
-	}
-
-	pmif->mdev = NULL;
-	pmif->node = np;
-
-	rbase = pci_resource_start(pdev, 0);
-	rlen = pci_resource_len(pdev, 0);
-
-	base = ioremap(rbase, rlen);
-	pmif->regbase = (unsigned long) base + 0x2000;
-	pmif->dma_regs = base + 0x1000;
-	pmif->kauai_fcr = base;
-	pmif->irq = pdev->irq;
-
-	pci_set_drvdata(pdev, pmif);
-
-	memset(&hw, 0, sizeof(hw));
-	pmac_ide_init_ports(&hw, pmif->regbase);
-	hw.irq = pdev->irq;
-	hw.dev = &pdev->dev;
-
-	rc = pmac_ide_setup_device(pmif, &hw);
-	if (rc != 0) {
-		/* The inteface is released to the common IDE layer */
-		iounmap(base);
-		pci_release_regions(pdev);
-		kfree(pmif);
-	}
-
-	return rc;
-
-out_free_pmif:
-	kfree(pmif);
-	return rc;
-}
-
-static int
-pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
-{
-	pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev);
-	int rc = 0;
-
-	if (mesg.event != pdev->dev.power.power_state.event
-			&& (mesg.event & PM_EVENT_SLEEP)) {
-		rc = pmac_ide_do_suspend(pmif);
-		if (rc == 0)
-			pdev->dev.power.power_state = mesg;
-	}
-
-	return rc;
-}
-
-static int
-pmac_ide_pci_resume(struct pci_dev *pdev)
-{
-	pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev);
-	int rc = 0;
-
-	if (pdev->dev.power.power_state.event != PM_EVENT_ON) {
-		rc = pmac_ide_do_resume(pmif);
-		if (rc == 0)
-			pdev->dev.power.power_state = PMSG_ON;
-	}
-
-	return rc;
-}
-
-#ifdef CONFIG_PMAC_MEDIABAY
-static void pmac_ide_macio_mb_event(struct macio_dev* mdev, int mb_state)
-{
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(&mdev->ofdev.dev);
-
-	switch(mb_state) {
-	case MB_CD:
-		if (!pmif->hwif->present)
-			ide_port_scan(pmif->hwif);
-		break;
-	default:
-		if (pmif->hwif->present)
-			ide_port_unregister_devices(pmif->hwif);
-	}
-}
-#endif /* CONFIG_PMAC_MEDIABAY */
-
-
-static struct of_device_id pmac_ide_macio_match[] = 
-{
-	{
-	.name 		= "IDE",
-	},
-	{
-	.name 		= "ATA",
-	},
-	{
-	.type		= "ide",
-	},
-	{
-	.type		= "ata",
-	},
-	{},
-};
-
-static struct macio_driver pmac_ide_macio_driver = 
-{
-	.driver = {
-		.name 		= "ide-pmac",
-		.owner		= THIS_MODULE,
-		.of_match_table	= pmac_ide_macio_match,
-	},
-	.probe		= pmac_ide_macio_attach,
-	.suspend	= pmac_ide_macio_suspend,
-	.resume		= pmac_ide_macio_resume,
-#ifdef CONFIG_PMAC_MEDIABAY
-	.mediabay_event	= pmac_ide_macio_mb_event,
-#endif
-};
-
-static const struct pci_device_id pmac_ide_pci_match[] = {
-	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_UNI_N_ATA),	0 },
-	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_IPID_ATA100),	0 },
-	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_K2_ATA100),	0 },
-	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_SH_ATA),	0 },
-	{ PCI_VDEVICE(APPLE, PCI_DEVICE_ID_APPLE_IPID2_ATA),	0 },
-	{},
-};
-
-static struct pci_driver pmac_ide_pci_driver = {
-	.name		= "ide-pmac",
-	.id_table	= pmac_ide_pci_match,
-	.probe		= pmac_ide_pci_attach,
-	.suspend	= pmac_ide_pci_suspend,
-	.resume		= pmac_ide_pci_resume,
-};
-MODULE_DEVICE_TABLE(pci, pmac_ide_pci_match);
-
-int __init pmac_ide_probe(void)
-{
-	int error;
-
-	if (!machine_is(powermac))
-		return -ENODEV;
-
-#ifdef CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST
-	error = pci_register_driver(&pmac_ide_pci_driver);
-	if (error)
-		goto out;
-	error = macio_register_driver(&pmac_ide_macio_driver);
-	if (error) {
-		pci_unregister_driver(&pmac_ide_pci_driver);
-		goto out;
-	}
-#else
-	error = macio_register_driver(&pmac_ide_macio_driver);
-	if (error)
-		goto out;
-	error = pci_register_driver(&pmac_ide_pci_driver);
-	if (error) {
-		macio_unregister_driver(&pmac_ide_macio_driver);
-		goto out;
-	}
-#endif
-out:
-	return error;
-}
-
-/*
- * pmac_ide_build_dmatable builds the DBDMA command list
- * for a transfer and sets the DBDMA channel to point to it.
- */
-static int pmac_ide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	struct dbdma_cmd *table;
-	volatile struct dbdma_regs __iomem *dma = pmif->dma_regs;
-	struct scatterlist *sg;
-	int wr = !!(cmd->tf_flags & IDE_TFLAG_WRITE);
-	int i = cmd->sg_nents, count = 0;
-
-	/* DMA table is already aligned */
-	table = (struct dbdma_cmd *) pmif->dma_table_cpu;
-
-	/* Make sure DMA controller is stopped (necessary ?) */
-	writel((RUN|PAUSE|FLUSH|WAKE|DEAD) << 16, &dma->control);
-	while (readl(&dma->status) & RUN)
-		udelay(1);
-
-	/* Build DBDMA commands list */
-	sg = hwif->sg_table;
-	while (i && sg_dma_len(sg)) {
-		u32 cur_addr;
-		u32 cur_len;
-
-		cur_addr = sg_dma_address(sg);
-		cur_len = sg_dma_len(sg);
-
-		if (pmif->broken_dma && cur_addr & (L1_CACHE_BYTES - 1)) {
-			if (pmif->broken_dma_warn == 0) {
-				printk(KERN_WARNING "%s: DMA on non aligned address, "
-				       "switching to PIO on Ohare chipset\n", drive->name);
-				pmif->broken_dma_warn = 1;
-			}
-			return 0;
-		}
-		while (cur_len) {
-			unsigned int tc = (cur_len < 0xfe00)? cur_len: 0xfe00;
-
-			if (count++ >= MAX_DCMDS) {
-				printk(KERN_WARNING "%s: DMA table too small\n",
-				       drive->name);
-				return 0;
-			}
-			table->command = cpu_to_le16(wr? OUTPUT_MORE: INPUT_MORE);
-			table->req_count = cpu_to_le16(tc);
-			table->phy_addr = cpu_to_le32(cur_addr);
-			table->cmd_dep = 0;
-			table->xfer_status = 0;
-			table->res_count = 0;
-			cur_addr += tc;
-			cur_len -= tc;
-			++table;
-		}
-		sg = sg_next(sg);
-		i--;
-	}
-
-	/* convert the last command to an input/output last command */
-	if (count) {
-		table[-1].command = cpu_to_le16(wr? OUTPUT_LAST: INPUT_LAST);
-		/* add the stop command to the end of the list */
-		memset(table, 0, sizeof(struct dbdma_cmd));
-		table->command = cpu_to_le16(DBDMA_STOP);
-		mb();
-		writel(hwif->dmatable_dma, &dma->cmdptr);
-		return 1;
-	}
-
-	printk(KERN_DEBUG "%s: empty DMA table?\n", drive->name);
-
-	return 0; /* revert to PIO for this request */
-}
-
-/*
- * Prepare a DMA transfer. We build the DMA table, adjust the timings for
- * a read on KeyLargo ATA/66 and mark us as waiting for DMA completion
- */
-static int pmac_ide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	u8 unit = drive->dn & 1, ata4 = (pmif->kind == controller_kl_ata4);
-	u8 write = !!(cmd->tf_flags & IDE_TFLAG_WRITE);
-
-	if (pmac_ide_build_dmatable(drive, cmd) == 0)
-		return 1;
-
-	/* Apple adds 60ns to wrDataSetup on reads */
-	if (ata4 && (pmif->timings[unit] & TR_66_UDMA_EN)) {
-		writel(pmif->timings[unit] + (write ? 0 : 0x00800000UL),
-			PMAC_IDE_REG(IDE_TIMING_CONFIG));
-		(void)readl(PMAC_IDE_REG(IDE_TIMING_CONFIG));
-	}
-
-	return 0;
-}
-
-/*
- * Kick the DMA controller into life after the DMA command has been issued
- * to the drive.
- */
-static void
-pmac_ide_dma_start(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	volatile struct dbdma_regs __iomem *dma;
-
-	dma = pmif->dma_regs;
-
-	writel((RUN << 16) | RUN, &dma->control);
-	/* Make sure it gets to the controller right now */
-	(void)readl(&dma->control);
-}
-
-/*
- * After a DMA transfer, make sure the controller is stopped
- */
-static int
-pmac_ide_dma_end (ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	volatile struct dbdma_regs __iomem *dma = pmif->dma_regs;
-	u32 dstat;
-
-	dstat = readl(&dma->status);
-	writel(((RUN|WAKE|DEAD) << 16), &dma->control);
-
-	/* verify good dma status. we don't check for ACTIVE beeing 0. We should...
-	 * in theory, but with ATAPI decices doing buffer underruns, that would
-	 * cause us to disable DMA, which isn't what we want
-	 */
-	return (dstat & (RUN|DEAD)) != RUN;
-}
-
-/*
- * Check out that the interrupt we got was for us. We can't always know this
- * for sure with those Apple interfaces (well, we could on the recent ones but
- * that's not implemented yet), on the other hand, we don't have shared interrupts
- * so it's not really a problem
- */
-static int
-pmac_ide_dma_test_irq (ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	volatile struct dbdma_regs __iomem *dma = pmif->dma_regs;
-	unsigned long status, timeout;
-
-	/* We have to things to deal with here:
-	 * 
-	 * - The dbdma won't stop if the command was started
-	 * but completed with an error without transferring all
-	 * datas. This happens when bad blocks are met during
-	 * a multi-block transfer.
-	 * 
-	 * - The dbdma fifo hasn't yet finished flushing to
-	 * to system memory when the disk interrupt occurs.
-	 * 
-	 */
-
-	/* If ACTIVE is cleared, the STOP command have passed and
-	 * transfer is complete.
-	 */
-	status = readl(&dma->status);
-	if (!(status & ACTIVE))
-		return 1;
-
-	/* If dbdma didn't execute the STOP command yet, the
-	 * active bit is still set. We consider that we aren't
-	 * sharing interrupts (which is hopefully the case with
-	 * those controllers) and so we just try to flush the
-	 * channel for pending data in the fifo
-	 */
-	udelay(1);
-	writel((FLUSH << 16) | FLUSH, &dma->control);
-	timeout = 0;
-	for (;;) {
-		udelay(1);
-		status = readl(&dma->status);
-		if ((status & FLUSH) == 0)
-			break;
-		if (++timeout > 100) {
-			printk(KERN_WARNING "ide%d, ide_dma_test_irq timeout flushing channel\n",
-			       hwif->index);
-			break;
-		}
-	}	
-	return 1;
-}
-
-static void pmac_ide_dma_host_set(ide_drive_t *drive, int on)
-{
-}
-
-static void
-pmac_ide_dma_lost_irq (ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	volatile struct dbdma_regs __iomem *dma = pmif->dma_regs;
-	unsigned long status = readl(&dma->status);
-
-	printk(KERN_ERR "ide-pmac lost interrupt, dma status: %lx\n", status);
-}
-
-static const struct ide_dma_ops pmac_dma_ops = {
-	.dma_host_set		= pmac_ide_dma_host_set,
-	.dma_setup		= pmac_ide_dma_setup,
-	.dma_start		= pmac_ide_dma_start,
-	.dma_end		= pmac_ide_dma_end,
-	.dma_test_irq		= pmac_ide_dma_test_irq,
-	.dma_lost_irq		= pmac_ide_dma_lost_irq,
-};
-
-/*
- * Allocate the data structures needed for using DMA with an interface
- * and fill the proper list of functions pointers
- */
-static int pmac_ide_init_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-	pmac_ide_hwif_t *pmif = dev_get_drvdata(hwif->gendev.parent);
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-
-	/* We won't need pci_dev if we switch to generic consistent
-	 * DMA routines ...
-	 */
-	if (dev == NULL || pmif->dma_regs == 0)
-		return -ENODEV;
-	/*
-	 * Allocate space for the DBDMA commands.
-	 * The +2 is +1 for the stop command and +1 to allow for
-	 * aligning the start address to a multiple of 16 bytes.
-	 */
-	pmif->dma_table_cpu = dma_alloc_coherent(&dev->dev,
-		(MAX_DCMDS + 2) * sizeof(struct dbdma_cmd),
-		&hwif->dmatable_dma, GFP_KERNEL);
-	if (pmif->dma_table_cpu == NULL) {
-		printk(KERN_ERR "%s: unable to allocate DMA command list\n",
-		       hwif->name);
-		return -ENOMEM;
-	}
-
-	hwif->sg_max_nents = MAX_DCMDS;
-
-	return 0;
-}
-
-module_init(pmac_ide_probe);
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/qd65xx.c b/drivers/ide/qd65xx.c
deleted file mode 100644
index ab79b62894645..0000000000000
--- a/drivers/ide/qd65xx.c
+++ /dev/null
@@ -1,446 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1996-2001  Linus Torvalds & author (see below)
- */
-
-/*
- *  Version 0.03	Cleaned auto-tune, added probe
- *  Version 0.04	Added second channel tuning
- *  Version 0.05	Enhanced tuning ; added qd6500 support
- *  Version 0.06	Added dos driver's list
- *  Version 0.07	Second channel bug fix 
- *
- * QDI QD6500/QD6580 EIDE controller fast support
- *
- * To activate controller support, use "ide0=qd65xx"
- */
-
-/*
- * Rewritten from the work of Colten Edwards <pje120@cs.usask.ca> by
- * Samuel Thibault <samuel.thibault@ens-lyon.org>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/ioport.h>
-#include <linux/blkdev.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <asm/io.h>
-
-#define DRV_NAME "qd65xx"
-
-#include "qd65xx.h"
-
-/*
- * I/O ports are 0x30-0x31 (and 0x32-0x33 for qd6580)
- *            or 0xb0-0xb1 (and 0xb2-0xb3 for qd6580)
- *	-- qd6500 is a single IDE interface
- *	-- qd6580 is a dual IDE interface
- *
- * More research on qd6580 being done by willmore@cig.mot.com (David)
- * More Information given by Petr Soucek (petr@ryston.cz)
- * http://www.ryston.cz/petr/vlb
- */
-
-/*
- * base: Timer1
- *
- *
- * base+0x01: Config (R/O)
- *
- * bit 0: ide baseport: 1 = 0x1f0 ; 0 = 0x170 (only useful for qd6500)
- * bit 1: qd65xx baseport: 1 = 0xb0 ; 0 = 0x30
- * bit 2: ID3: bus speed: 1 = <=33MHz ; 0 = >33MHz
- * bit 3: qd6500: 1 = disabled, 0 = enabled
- *        qd6580: 1
- * upper nibble:
- *        qd6500: 1100
- *        qd6580: either 1010 or 0101
- *
- *
- * base+0x02: Timer2 (qd6580 only)
- *
- *
- * base+0x03: Control (qd6580 only)
- *
- * bits 0-3 must always be set 1
- * bit 4 must be set 1, but is set 0 by dos driver while measuring vlb clock
- * bit 0 : 1 = Only primary port enabled : channel 0 for hda, channel 1 for hdb
- *         0 = Primary and Secondary ports enabled : channel 0 for hda & hdb
- *                                                   channel 1 for hdc & hdd
- * bit 1 : 1 = only disks on primary port
- *         0 = disks & ATAPI devices on primary port
- * bit 2-4 : always 0
- * bit 5 : status, but of what ?
- * bit 6 : always set 1 by dos driver
- * bit 7 : set 1 for non-ATAPI devices on primary port
- *	(maybe read-ahead and post-write buffer ?)
- */
-
-static int timings[4]={-1,-1,-1,-1}; /* stores current timing for each timer */
-
-/*
- * qd65xx_select:
- *
- * This routine is invoked to prepare for access to a given drive.
- */
-
-static void qd65xx_dev_select(ide_drive_t *drive)
-{
-	u8 index = ((	(QD_TIMREG(drive)) & 0x80 ) >> 7) |
-			(QD_TIMREG(drive) & 0x02);
-
-	if (timings[index] != QD_TIMING(drive))
-		outb(timings[index] = QD_TIMING(drive), QD_TIMREG(drive));
-
-	outb(drive->select | ATA_DEVICE_OBS, drive->hwif->io_ports.device_addr);
-}
-
-/*
- * qd6500_compute_timing
- *
- * computes the timing value where
- *	lower nibble represents active time,   in count of VLB clocks
- *	upper nibble represents recovery time, in count of VLB clocks
- */
-
-static u8 qd6500_compute_timing (ide_hwif_t *hwif, int active_time, int recovery_time)
-{
-	int clk = ide_vlb_clk ? ide_vlb_clk : 50;
-	u8 act_cyc, rec_cyc;
-
-	if (clk <= 33) {
-		act_cyc =  9 - IDE_IN(active_time   * clk / 1000 + 1, 2,  9);
-		rec_cyc = 15 - IDE_IN(recovery_time * clk / 1000 + 1, 0, 15);
-	} else {
-		act_cyc =  8 - IDE_IN(active_time   * clk / 1000 + 1, 1,  8);
-		rec_cyc = 18 - IDE_IN(recovery_time * clk / 1000 + 1, 3, 18);
-	}
-
-	return (rec_cyc << 4) | 0x08 | act_cyc;
-}
-
-/*
- * qd6580_compute_timing
- *
- * idem for qd6580
- */
-
-static u8 qd6580_compute_timing (int active_time, int recovery_time)
-{
-	int clk = ide_vlb_clk ? ide_vlb_clk : 50;
-	u8 act_cyc, rec_cyc;
-
-	act_cyc = 17 - IDE_IN(active_time   * clk / 1000 + 1, 2, 17);
-	rec_cyc = 15 - IDE_IN(recovery_time * clk / 1000 + 1, 2, 15);
-
-	return (rec_cyc << 4) | act_cyc;
-}
-
-/*
- * qd_find_disk_type
- *
- * tries to find timing from dos driver's table
- */
-
-static int qd_find_disk_type (ide_drive_t *drive,
-		int *active_time, int *recovery_time)
-{
-	struct qd65xx_timing_s *p;
-	char *m = (char *)&drive->id[ATA_ID_PROD];
-	char model[ATA_ID_PROD_LEN];
-
-	if (*m == 0)
-		return 0;
-
-	strncpy(model, m, ATA_ID_PROD_LEN);
-	ide_fixstring(model, ATA_ID_PROD_LEN, 1); /* byte-swap */
-
-	for (p = qd65xx_timing ; p->offset != -1 ; p++) {
-		if (!strncmp(p->model, model+p->offset, 4)) {
-			printk(KERN_DEBUG "%s: listed !\n", drive->name);
-			*active_time = p->active;
-			*recovery_time = p->recovery;
-			return 1;
-		}
-	}
-	return 0;
-}
-
-/*
- * qd_set_timing:
- *
- * records the timing
- */
-
-static void qd_set_timing (ide_drive_t *drive, u8 timing)
-{
-	unsigned long data = (unsigned long)ide_get_drivedata(drive);
-
-	data &= 0xff00;
-	data |= timing;
-	ide_set_drivedata(drive, (void *)data);
-
-	printk(KERN_DEBUG "%s: %#x\n", drive->name, timing);
-}
-
-static void qd6500_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-	int active_time   = 175;
-	int recovery_time = 415; /* worst case values from the dos driver */
-
-	/* FIXME: use drive->pio_mode value */
-	if (!qd_find_disk_type(drive, &active_time, &recovery_time) &&
-	    (id[ATA_ID_OLD_PIO_MODES] & 0xff) && (id[ATA_ID_FIELD_VALID] & 2) &&
-	    id[ATA_ID_EIDE_PIO] >= 240) {
-		printk(KERN_INFO "%s: PIO mode%d\n", drive->name,
-			id[ATA_ID_OLD_PIO_MODES] & 0xff);
-		active_time = 110;
-		recovery_time = drive->id[ATA_ID_EIDE_PIO] - 120;
-	}
-
-	qd_set_timing(drive, qd6500_compute_timing(drive->hwif,
-				active_time, recovery_time));
-}
-
-static void qd6580_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
-	unsigned int cycle_time;
-	int active_time   = 175;
-	int recovery_time = 415; /* worst case values from the dos driver */
-	u8 base = (hwif->config_data & 0xff00) >> 8;
-
-	if (drive->id && !qd_find_disk_type(drive, &active_time, &recovery_time)) {
-		cycle_time = ide_pio_cycle_time(drive, pio);
-
-		switch (pio) {
-			case 0: break;
-			case 3:
-				if (cycle_time >= 110) {
-					active_time = 86;
-					recovery_time = cycle_time - 102;
-				} else
-					printk(KERN_WARNING "%s: Strange recovery time !\n",drive->name);
-				break;
-			case 4:
-				if (cycle_time >= 69) {
-					active_time = 70;
-					recovery_time = cycle_time - 61;
-				} else
-					printk(KERN_WARNING "%s: Strange recovery time !\n",drive->name);
-				break;
-			default:
-				if (cycle_time >= 180) {
-					active_time = 110;
-					recovery_time = cycle_time - 120;
-				} else {
-					active_time = t->active;
-					recovery_time = cycle_time - active_time;
-				}
-		}
-		printk(KERN_INFO "%s: PIO mode%d\n", drive->name,pio);
-	}
-
-	if (!hwif->channel && drive->media != ide_disk) {
-		outb(0x5f, QD_CONTROL_PORT);
-		printk(KERN_WARNING "%s: ATAPI: disabled read-ahead FIFO "
-			"and post-write buffer on %s.\n",
-			drive->name, hwif->name);
-	}
-
-	qd_set_timing(drive, qd6580_compute_timing(active_time, recovery_time));
-}
-
-/*
- * qd_testreg
- *
- * tests if the given port is a register
- */
-
-static int __init qd_testreg(int port)
-{
-	unsigned long flags;
-	u8 savereg, readreg;
-
-	local_irq_save(flags);
-	savereg = inb_p(port);
-	outb_p(QD_TESTVAL, port);	/* safe value */
-	readreg = inb_p(port);
-	outb(savereg, port);
-	local_irq_restore(flags);
-
-	if (savereg == QD_TESTVAL) {
-		printk(KERN_ERR "Outch ! the probe for qd65xx isn't reliable !\n");
-		printk(KERN_ERR "Please contact maintainers to tell about your hardware\n");
-		printk(KERN_ERR "Assuming qd65xx is not present.\n");
-		return 1;
-	}
-
-	return (readreg != QD_TESTVAL);
-}
-
-static void __init qd6500_init_dev(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 base = (hwif->config_data & 0xff00) >> 8;
-	u8 config = QD_CONFIG(hwif);
-
-	ide_set_drivedata(drive, (void *)QD6500_DEF_DATA);
-}
-
-static void __init qd6580_init_dev(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned long t1, t2;
-	u8 base = (hwif->config_data & 0xff00) >> 8;
-	u8 config = QD_CONFIG(hwif);
-
-	if (hwif->host_flags & IDE_HFLAG_SINGLE) {
-		t1 = QD6580_DEF_DATA;
-		t2 = QD6580_DEF_DATA2;
-	} else
-		t2 = t1 = hwif->channel ? QD6580_DEF_DATA2 : QD6580_DEF_DATA;
-
-	ide_set_drivedata(drive, (void *)((drive->dn & 1) ? t2 : t1));
-}
-
-static const struct ide_tp_ops qd65xx_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= qd65xx_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= ide_input_data,
-	.output_data		= ide_output_data,
-};
-
-static const struct ide_port_ops qd6500_port_ops = {
-	.init_dev		= qd6500_init_dev,
-	.set_pio_mode		= qd6500_set_pio_mode,
-};
-
-static const struct ide_port_ops qd6580_port_ops = {
-	.init_dev		= qd6580_init_dev,
-	.set_pio_mode		= qd6580_set_pio_mode,
-};
-
-static const struct ide_port_info qd65xx_port_info __initconst = {
-	.name			= DRV_NAME,
-	.tp_ops 		= &qd65xx_tp_ops,
-	.chipset		= ide_qd65xx,
-	.host_flags		= IDE_HFLAG_IO_32BIT |
-				  IDE_HFLAG_NO_DMA,
-	.pio_mask		= ATA_PIO4,
-};
-
-/*
- * qd_probe:
- *
- * looks at the specified baseport, and if qd found, registers & initialises it
- * return 1 if another qd may be probed
- */
-
-static int __init qd_probe(int base)
-{
-	int rc;
-	u8 config, unit, control;
-	struct ide_port_info d = qd65xx_port_info;
-
-	config = inb(QD_CONFIG_PORT);
-
-	if (! ((config & QD_CONFIG_BASEPORT) >> 1 == (base == 0xb0)) )
-		return -ENODEV;
-
-	unit = ! (config & QD_CONFIG_IDE_BASEPORT);
-
-	if (unit)
-		d.host_flags |= IDE_HFLAG_QD_2ND_PORT;
-
-	switch (config & 0xf0) {
-	case QD_CONFIG_QD6500:
-		if (qd_testreg(base))
-			 return -ENODEV;	/* bad register */
-
-		if (config & QD_CONFIG_DISABLED) {
-			printk(KERN_WARNING "qd6500 is disabled !\n");
-			return -ENODEV;
-		}
-
-		printk(KERN_NOTICE "qd6500 at %#x\n", base);
-		printk(KERN_DEBUG "qd6500: config=%#x, ID3=%u\n",
-			config, QD_ID3);
-
-		d.port_ops = &qd6500_port_ops;
-		d.host_flags |= IDE_HFLAG_SINGLE;
-		break;
-	case QD_CONFIG_QD6580_A:
-	case QD_CONFIG_QD6580_B:
-		if (qd_testreg(base) || qd_testreg(base + 0x02))
-			return -ENODEV;	/* bad registers */
-
-		control = inb(QD_CONTROL_PORT);
-
-		printk(KERN_NOTICE "qd6580 at %#x\n", base);
-		printk(KERN_DEBUG "qd6580: config=%#x, control=%#x, ID3=%u\n",
-			config, control, QD_ID3);
-
-		outb(QD_DEF_CONTR, QD_CONTROL_PORT);
-
-		d.port_ops = &qd6580_port_ops;
-		if (control & QD_CONTR_SEC_DISABLED)
-			d.host_flags |= IDE_HFLAG_SINGLE;
-
-		printk(KERN_INFO "qd6580: %s IDE board\n",
-			(control & QD_CONTR_SEC_DISABLED) ? "single" : "dual");
-		break;
-	default:
-		return -ENODEV;
-	}
-
-	rc = ide_legacy_device_add(&d, (base << 8) | config);
-
-	if (d.host_flags & IDE_HFLAG_SINGLE)
-		return (rc == 0) ? 1 : rc;
-
-	return rc;
-}
-
-static bool probe_qd65xx;
-
-module_param_named(probe, probe_qd65xx, bool, 0);
-MODULE_PARM_DESC(probe, "probe for QD65xx chipsets");
-
-static int __init qd65xx_init(void)
-{
-	int rc1, rc2 = -ENODEV;
-
-	if (probe_qd65xx == 0)
-		return -ENODEV;
-
-	rc1 = qd_probe(0x30);
-	if (rc1)
-		rc2 = qd_probe(0xb0);
-
-	if (rc1 < 0 && rc2 < 0)
-		return -ENODEV;
-
-	return 0;
-}
-
-module_init(qd65xx_init);
-
-MODULE_AUTHOR("Samuel Thibault");
-MODULE_DESCRIPTION("support of qd65xx vlb ide chipset");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/qd65xx.h b/drivers/ide/qd65xx.h
deleted file mode 100644
index 01a43ab45e0ec..0000000000000
--- a/drivers/ide/qd65xx.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2000	Linus Torvalds & authors
- */
-
-/*
- * Authors:	Petr Soucek <petr@ryston.cz>
- * 		Samuel Thibault <samuel.thibault@ens-lyon.org>
- */
-
-/* truncates a in [b,c] */
-#define IDE_IN(a,b,c)   ( ((a)<(b)) ? (b) : ( (a)>(c) ? (c) : (a)) )
-
-#define IDE_IMPLY(a,b)	((!(a)) || (b))
-
-#define QD_TIM1_PORT		(base)
-#define QD_CONFIG_PORT		(base+0x01)
-#define QD_TIM2_PORT		(base+0x02)
-#define QD_CONTROL_PORT		(base+0x03)
-
-#define QD_CONFIG_IDE_BASEPORT	0x01
-#define QD_CONFIG_BASEPORT	0x02
-#define QD_CONFIG_ID3		0x04
-#define QD_CONFIG_DISABLED	0x08
-#define QD_CONFIG_QD6500	0xc0
-#define QD_CONFIG_QD6580_A	0xa0
-#define QD_CONFIG_QD6580_B	0x50
-
-#define QD_CONTR_SEC_DISABLED	0x01
-
-#define QD_ID3			((config & QD_CONFIG_ID3)!=0)
-
-#define QD_CONFIG(hwif)		((hwif)->config_data & 0x00ff)
-
-static inline u8 QD_TIMING(ide_drive_t *drive)
-{
-	return (unsigned long)ide_get_drivedata(drive) & 0x00ff;
-}
-
-static inline u8 QD_TIMREG(ide_drive_t *drive)
-{
-	return ((unsigned long)ide_get_drivedata(drive) & 0xff00) >> 8;
-}
-
-#define QD6500_DEF_DATA		((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0c : 0x08))
-#define QD6580_DEF_DATA		((QD_TIM1_PORT<<8) | (QD_ID3 ? 0x0a : 0x00))
-#define QD6580_DEF_DATA2	((QD_TIM2_PORT<<8) | (QD_ID3 ? 0x0a : 0x00))
-#define QD_DEF_CONTR		(0x40 | ((control & 0x02) ? 0x9f : 0x1f))
-
-#define QD_TESTVAL		0x19	/* safe value */
-
-/* Drive specific timing taken from DOS driver v3.7 */
-
-static struct qd65xx_timing_s {
-	s8	offset;   /* ofset from the beginning of Model Number" */
-	char	model[4];    /* 4 chars from Model number, no conversion */
-	s16	active;   /* active time */
-	s16	recovery; /* recovery time */
-} qd65xx_timing [] = {
-	{ 30, "2040", 110, 225 },  /* Conner CP30204			*/
-	{ 30, "2045", 135, 225 },  /* Conner CP30254			*/
-	{ 30, "1040", 155, 325 },  /* Conner CP30104			*/
-	{ 30, "1047", 135, 265 },  /* Conner CP30174			*/
-	{ 30, "5344", 135, 225 },  /* Conner CP3544			*/
-	{ 30, "01 4", 175, 405 },  /* Conner CP-3104			*/
-	{ 27, "C030", 175, 375 },  /* Conner CP3000			*/
-	{  8, "PL42", 110, 295 },  /* Quantum LP240			*/
-	{  8, "PL21", 110, 315 },  /* Quantum LP120			*/
-	{  8, "PL25", 175, 385 },  /* Quantum LP52			*/
-	{  4, "PA24", 110, 285 },  /* WD Piranha SP4200			*/
-	{  6, "2200", 110, 260 },  /* WD Caviar AC2200			*/
-	{  6, "3204", 110, 235 },  /* WD Caviar AC2340			*/
-	{  6, "1202", 110, 265 },  /* WD Caviar AC2120			*/
-	{  0, "DS3-", 135, 315 },  /* Teac SD340			*/
-	{  8, "KM32", 175, 355 },  /* Toshiba MK234			*/
-	{  2, "53A1", 175, 355 },  /* Seagate ST351A			*/
-	{  2, "4108", 175, 295 },  /* Seagate ST1480A			*/
-	{  2, "1344", 175, 335 },  /* Seagate ST3144A			*/
-	{  6, "7 12", 110, 225 },  /* Maxtor 7213A			*/
-	{ 30, "02F4", 145, 295 },  /* Conner 3204F			*/
-	{  2, "1302", 175, 335 },  /* Seagate ST3120A			*/
-	{  2, "2334", 145, 265 },  /* Seagate ST3243A			*/
-	{  2, "2338", 145, 275 },  /* Seagate ST3283A			*/
-	{  2, "3309", 145, 275 },  /* Seagate ST3390A			*/
-	{  2, "5305", 145, 275 },  /* Seagate ST3550A			*/
-	{  2, "4100", 175, 295 },  /* Seagate ST1400A			*/
-	{  2, "4110", 175, 295 },  /* Seagate ST1401A			*/
-	{  2, "6300", 135, 265 },  /* Seagate ST3600A			*/
-	{  2, "5300", 135, 265 },  /* Seagate ST3500A			*/
-	{  6, "7 31", 135, 225 },  /* Maxtor 7131 AT			*/
-	{  6, "7 43", 115, 265 },  /* Maxtor 7345 AT			*/
-	{  6, "7 42", 110, 255 },  /* Maxtor 7245 AT			*/
-	{  6, "3 04", 135, 265 },  /* Maxtor 340 AT			*/
-	{  6, "61 0", 135, 285 },  /* WD AC160				*/
-	{  6, "1107", 135, 235 },  /* WD AC1170				*/
-	{  6, "2101", 110, 220 },  /* WD AC1210				*/
-	{  6, "4202", 135, 245 },  /* WD AC2420				*/
-	{  6, "41 0", 175, 355 },  /* WD Caviar 140			*/
-	{  6, "82 0", 175, 355 },  /* WD Caviar 280			*/
-	{  8, "PL01", 175, 375 },  /* Quantum LP105			*/
-	{  8, "PL25", 110, 295 },  /* Quantum LP525			*/
-	{ 10, "4S 2", 175, 385 },  /* Quantum ELS42			*/
-	{ 10, "8S 5", 175, 385 },  /* Quantum ELS85			*/
-	{ 10, "1S72", 175, 385 },  /* Quantum ELS127			*/
-	{ 10, "1S07", 175, 385 },  /* Quantum ELS170			*/
-	{  8, "ZE42", 135, 295 },  /* Quantum EZ240			*/
-	{  8, "ZE21", 175, 385 },  /* Quantum EZ127			*/
-	{  8, "ZE58", 175, 385 },  /* Quantum EZ85			*/
-	{  8, "ZE24", 175, 385 },  /* Quantum EZ42			*/
-	{ 27, "C036", 155, 325 },  /* Conner CP30064			*/
-	{ 27, "C038", 155, 325 },  /* Conner CP30084			*/
-	{  6, "2205", 110, 255 },  /* WDC AC2250			*/
-	{  2, " CHA", 140, 415 },  /* WDC AH series; WDC AH260, WDC	*/
-	{  2, " CLA", 140, 415 },  /* WDC AL series: WDC AL2120, 2170,	*/
-	{  4, "UC41", 140, 415 },  /* WDC CU140				*/
-	{  6, "1207", 130, 275 },  /* WDC AC2170			*/
-	{  6, "2107", 130, 275 },  /* WDC AC1270			*/
-	{  6, "5204", 130, 275 },  /* WDC AC2540			*/
-	{ 30, "3004", 110, 235 },  /* Conner CP30340			*/
-	{ 30, "0345", 135, 255 },  /* Conner CP30544			*/
-	{ 12, "12A3", 175, 320 },  /* MAXTOR LXT-213A			*/
-	{ 12, "43A0", 145, 240 },  /* MAXTOR LXT-340A			*/
-	{  6, "7 21", 180, 290 },  /* Maxtor 7120 AT			*/
-	{  6, "7 71", 135, 240 },  /* Maxtor 7170 AT			*/
-	{ 12, "45\0000", 110, 205 },   /* MAXTOR MXT-540		*/
-	{  8, "PL11", 180, 290 },  /* QUANTUM LP110A			*/
-	{  8, "OG21", 150, 275 },  /* QUANTUM GO120			*/
-	{ 12, "42A5", 175, 320 },  /* MAXTOR LXT-245A			*/
-	{  2, "2309", 175, 295 },  /* ST3290A				*/
-	{  2, "3358", 180, 310 },  /* ST3385A				*/
-	{  2, "6355", 180, 310 },  /* ST3655A				*/
-	{  2, "1900", 175, 270 },  /* ST9100A				*/
-	{  2, "1954", 175, 270 },  /* ST9145A				*/
-	{  2, "1909", 175, 270 },  /* ST9190AG				*/
-	{  2, "2953", 175, 270 },  /* ST9235A				*/
-	{  2, "1359", 175, 270 },  /* ST3195A				*/
-	{ 24, "3R11", 175, 290 },  /* ALPS ELECTRIC Co.,LTD, DR311C	*/
-	{  0, "2M26", 175, 215 },  /* M262XT-0Ah			*/
-	{  4, "2253", 175, 300 },  /* HP C2235A				*/
-	{  4, "-32A", 145, 245 },  /* H3133-A2				*/
-	{ 30, "0326", 150, 270 },  /* Samsung Electronics 120MB		*/
-	{ 30, "3044", 110, 195 },  /* Conner CFA340A			*/
-	{ 30, "43A0", 110, 195 },  /* Conner CFA340A			*/
-	{ -1, "    ", 175, 415 }   /* unknown disk name			*/
-};
diff --git a/drivers/ide/rapide.c b/drivers/ide/rapide.c
deleted file mode 100644
index 0ab8b86b7ed70..0000000000000
--- a/drivers/ide/rapide.c
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 1996-2002 Russell King.
- */
-
-#include <linux/module.h>
-#include <linux/blkdev.h>
-#include <linux/errno.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/ecard.h>
-
-static const struct ide_port_info rapide_port_info = {
-	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-	.chipset		= ide_generic,
-};
-
-static void rapide_setup_ports(struct ide_hw *hw, void __iomem *base,
-			       void __iomem *ctrl, unsigned int sz, int irq)
-{
-	unsigned long port = (unsigned long)base;
-	int i;
-
-	for (i = 0; i <= 7; i++) {
-		hw->io_ports_array[i] = port;
-		port += sz;
-	}
-	hw->io_ports.ctl_addr = (unsigned long)ctrl;
-	hw->irq = irq;
-}
-
-static int rapide_probe(struct expansion_card *ec, const struct ecard_id *id)
-{
-	void __iomem *base;
-	struct ide_host *host;
-	int ret;
-	struct ide_hw hw, *hws[] = { &hw };
-
-	ret = ecard_request_resources(ec);
-	if (ret)
-		goto out;
-
-	base = ecardm_iomap(ec, ECARD_RES_MEMC, 0, 0);
-	if (!base) {
-		ret = -ENOMEM;
-		goto release;
-	}
-
-	memset(&hw, 0, sizeof(hw));
-	rapide_setup_ports(&hw, base, base + 0x818, 1 << 6, ec->irq);
-	hw.dev = &ec->dev;
-
-	ret = ide_host_add(&rapide_port_info, hws, 1, &host);
-	if (ret)
-		goto release;
-
-	ecard_set_drvdata(ec, host);
-	goto out;
-
- release:
-	ecard_release_resources(ec);
- out:
-	return ret;
-}
-
-static void rapide_remove(struct expansion_card *ec)
-{
-	struct ide_host *host = ecard_get_drvdata(ec);
-
-	ecard_set_drvdata(ec, NULL);
-
-	ide_host_remove(host);
-
-	ecard_release_resources(ec);
-}
-
-static struct ecard_id rapide_ids[] = {
-	{ MANU_YELLOWSTONE, PROD_YELLOWSTONE_RAPIDE32 },
-	{ 0xffff, 0xffff }
-};
-
-static struct ecard_driver rapide_driver = {
-	.probe		= rapide_probe,
-	.remove		= rapide_remove,
-	.id_table	= rapide_ids,
-	.drv = {
-		.name	= "rapide",
-	},
-};
-
-static int __init rapide_init(void)
-{
-	return ecard_register_driver(&rapide_driver);
-}
-
-static void __exit rapide_exit(void)
-{
-	ecard_remove_driver(&rapide_driver);
-}
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Yellowstone RAPIDE driver");
-
-module_init(rapide_init);
-module_exit(rapide_exit);
diff --git a/drivers/ide/rz1000.c b/drivers/ide/rz1000.c
deleted file mode 100644
index fce2b7de5a19a..0000000000000
--- a/drivers/ide/rz1000.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1995-1998  Linus Torvalds & author (see below)
- */
-
-/*
- *  Principal Author:  mlord@pobox.com (Mark Lord)
- *
- *  See linux/MAINTAINERS for address of current maintainer.
- *
- *  This file provides support for disabling the buggy read-ahead
- *  mode of the RZ1000 IDE chipset, commonly used on Intel motherboards.
- *
- *  Dunno if this fixes both ports, or only the primary port (?).
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define DRV_NAME "rz1000"
-
-static int rz1000_disable_readahead(struct pci_dev *dev)
-{
-	u16 reg;
-
-	if (!pci_read_config_word (dev, 0x40, &reg) &&
-	    !pci_write_config_word(dev, 0x40, reg & 0xdfff)) {
-		printk(KERN_INFO "%s: disabled chipset read-ahead "
-			"(buggy RZ1000/RZ1001)\n", pci_name(dev));
-		return 0;
-	} else {
-		printk(KERN_INFO "%s: serialized, disabled unmasking "
-			"(buggy RZ1000/RZ1001)\n", pci_name(dev));
-		return 1;
-	}
-}
-
-static const struct ide_port_info rz1000_chipset = {
-	.name		= DRV_NAME,
-	.host_flags	= IDE_HFLAG_NO_DMA,
-};
-
-static int rz1000_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct ide_port_info d = rz1000_chipset;
-	int rc;
-
-	rc = pci_enable_device(dev);
-	if (rc)
-		return rc;
-
-	if (rz1000_disable_readahead(dev)) {
-		d.host_flags |= IDE_HFLAG_SERIALIZE;
-		d.host_flags |= IDE_HFLAG_NO_UNMASK_IRQS;
-	}
-
-	return ide_pci_init_one(dev, &d, NULL);
-}
-
-static void rz1000_remove(struct pci_dev *dev)
-{
-	ide_pci_remove(dev);
-	pci_disable_device(dev);
-}
-
-static const struct pci_device_id rz1000_pci_tbl[] = {
-	{ PCI_VDEVICE(PCTECH, PCI_DEVICE_ID_PCTECH_RZ1000), 0 },
-	{ PCI_VDEVICE(PCTECH, PCI_DEVICE_ID_PCTECH_RZ1001), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, rz1000_pci_tbl);
-
-static struct pci_driver rz1000_pci_driver = {
-	.name		= "RZ1000_IDE",
-	.id_table	= rz1000_pci_tbl,
-	.probe		= rz1000_init_one,
-	.remove		= rz1000_remove,
-};
-
-static int __init rz1000_ide_init(void)
-{
-	return ide_pci_register_driver(&rz1000_pci_driver);
-}
-
-static void __exit rz1000_ide_exit(void)
-{
-	pci_unregister_driver(&rz1000_pci_driver);
-}
-
-module_init(rz1000_ide_init);
-module_exit(rz1000_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for RZ1000 IDE");
-MODULE_LICENSE("GPL");
-
diff --git a/drivers/ide/sc1200.c b/drivers/ide/sc1200.c
deleted file mode 100644
index a5b701818405b..0000000000000
--- a/drivers/ide/sc1200.c
+++ /dev/null
@@ -1,355 +0,0 @@
-/*
- * Copyright (C) 2000-2002		Mark Lord <mlord@pobox.com>
- * Copyright (C)      2007		Bartlomiej Zolnierkiewicz
- *
- * May be copied or modified under the terms of the GNU General Public License
- *
- * Development of this chipset driver was funded
- * by the nice folks at National Semiconductor.
- *
- * Documentation:
- *	Available from National Semiconductor
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-#include <linux/pm.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "sc1200"
-
-#define SC1200_REV_A	0x00
-#define SC1200_REV_B1	0x01
-#define SC1200_REV_B3	0x02
-#define SC1200_REV_C1	0x03
-#define SC1200_REV_D1	0x04
-
-#define PCI_CLK_33	0x00
-#define PCI_CLK_48	0x01
-#define PCI_CLK_66	0x02
-#define PCI_CLK_33A	0x03
-
-static unsigned short sc1200_get_pci_clock (void)
-{
-	unsigned char chip_id, silicon_revision;
-	unsigned int pci_clock;
-	/*
-	 * Check the silicon revision, as not all versions of the chip
-	 * have the register with the fast PCI bus timings.
-	 */
-	chip_id = inb (0x903c);
-	silicon_revision = inb (0x903d);
-
-	// Read the fast pci clock frequency
-	if (chip_id == 0x04 && silicon_revision < SC1200_REV_B1) {
-		pci_clock = PCI_CLK_33;
-	} else {
-		// check clock generator configuration (cfcc)
-		// the clock is in bits 8 and 9 of this word
-
-		pci_clock = inw (0x901e);
-		pci_clock >>= 8;
-		pci_clock &= 0x03;
-		if (pci_clock == PCI_CLK_33A)
-			pci_clock = PCI_CLK_33;
-	}
-	return pci_clock;
-}
-
-/*
- * Here are the standard PIO mode 0-4 timings for each "format".
- * Format-0 uses fast data reg timings, with slower command reg timings.
- * Format-1 uses fast timings for all registers, but won't work with all drives.
- */
-static const unsigned int sc1200_pio_timings[4][5] =
-	{{0x00009172, 0x00012171, 0x00020080, 0x00032010, 0x00040010},	// format0  33Mhz
-	 {0xd1329172, 0x71212171, 0x30200080, 0x20102010, 0x00100010},	// format1, 33Mhz
-	 {0xfaa3f4f3, 0xc23232b2, 0x513101c1, 0x31213121, 0x10211021},	// format1, 48Mhz
-	 {0xfff4fff4, 0xf35353d3, 0x814102f1, 0x42314231, 0x11311131}};	// format1, 66Mhz
-
-/*
- * After chip reset, the PIO timings are set to 0x00009172, which is not valid.
- */
-//#define SC1200_BAD_PIO(timings) (((timings)&~0x80000000)==0x00009172)
-
-static void sc1200_tunepio(ide_drive_t *drive, u8 pio)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	unsigned int basereg = hwif->channel ? 0x50 : 0x40, format = 0;
-
-	pci_read_config_dword(pdev, basereg + 4, &format);
-	format = (format >> 31) & 1;
-	if (format)
-		format += sc1200_get_pci_clock();
-	pci_write_config_dword(pdev, basereg + ((drive->dn & 1) << 3),
-			       sc1200_pio_timings[format][pio]);
-}
-
-/*
- *	The SC1200 specifies that two drives sharing a cable cannot mix
- *	UDMA/MDMA.  It has to be one or the other, for the pair, though
- *	different timings can still be chosen for each drive.  We could
- *	set the appropriate timing bits on the fly, but that might be
- *	a bit confusing.  So, for now we statically handle this requirement
- *	by looking at our mate drive to see what it is capable of, before
- *	choosing a mode for our own drive.
- */
-static u8 sc1200_udma_filter(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	ide_drive_t *mate = ide_get_pair_dev(drive);
-	u16 *mateid;
-	u8 mask = hwif->ultra_mask;
-
-	if (mate == NULL)
-		goto out;
-	mateid = mate->id;
-
-	if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) {
-		if ((mateid[ATA_ID_FIELD_VALID] & 4) &&
-		    (mateid[ATA_ID_UDMA_MODES] & 7))
-			goto out;
-		if (mateid[ATA_ID_MWDMA_MODES] & 7)
-			mask = 0;
-	}
-out:
-	return mask;
-}
-
-static void sc1200_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev		*dev = to_pci_dev(hwif->dev);
-	unsigned int		reg, timings;
-	unsigned short		pci_clock;
-	unsigned int		basereg = hwif->channel ? 0x50 : 0x40;
-	const u8		mode = drive->dma_mode;
-
-	static const u32 udma_timing[3][3] = {
-		{ 0x00921250, 0x00911140, 0x00911030 },
-		{ 0x00932470, 0x00922260, 0x00922140 },
-		{ 0x009436a1, 0x00933481, 0x00923261 },
-	};
-
-	static const u32 mwdma_timing[3][3] = {
-		{ 0x00077771, 0x00012121, 0x00002020 },
-		{ 0x000bbbb2, 0x00024241, 0x00013131 },
-		{ 0x000ffff3, 0x00035352, 0x00015151 },
-	};
-
-	pci_clock = sc1200_get_pci_clock();
-
-	/*
-	 * Note that each DMA mode has several timings associated with it.
-	 * The correct timing depends on the fast PCI clock freq.
-	 */
-
-	if (mode >= XFER_UDMA_0)
-		timings =  udma_timing[pci_clock][mode - XFER_UDMA_0];
-	else
-		timings = mwdma_timing[pci_clock][mode - XFER_MW_DMA_0];
-
-	if ((drive->dn & 1) == 0) {
-		pci_read_config_dword(dev, basereg + 4, &reg);
-		timings |= reg & 0x80000000;	/* preserve PIO format bit */
-		pci_write_config_dword(dev, basereg + 4, timings);
-	} else
-		pci_write_config_dword(dev, basereg + 12, timings);
-}
-
-/*  Replacement for the standard ide_dma_end action in
- *  dma_proc.
- *
- *  returns 1 on error, 0 otherwise
- */
-static int sc1200_dma_end(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned long dma_base = hwif->dma_base;
-	u8 dma_stat;
-
-	dma_stat = inb(dma_base+2);		/* get DMA status */
-
-	if (!(dma_stat & 4))
-		printk(" ide_dma_end dma_stat=%0x err=%x newerr=%x\n",
-		  dma_stat, ((dma_stat&7)!=4), ((dma_stat&2)==2));
-
-	outb(dma_stat|0x1b, dma_base+2);	/* clear the INTR & ERROR bits */
-	outb(inb(dma_base)&~1, dma_base);	/* !! DO THIS HERE !! stop DMA */
-
-	return (dma_stat & 7) != 4;		/* verify good DMA status */
-}
-
-/*
- * sc1200_set_pio_mode() handles setting of PIO modes
- * for both the chipset and drive.
- *
- * All existing BIOSs for this chipset guarantee that all drives
- * will have valid default PIO timings set up before we get here.
- */
-
-static void sc1200_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	int		mode = -1;
-	const u8	pio = drive->pio_mode - XFER_PIO_0;
-
-	/*
-	 * bad abuse of ->set_pio_mode interface
-	 */
-	switch (pio) {
-		case 200: mode = XFER_UDMA_0;	break;
-		case 201: mode = XFER_UDMA_1;	break;
-		case 202: mode = XFER_UDMA_2;	break;
-		case 100: mode = XFER_MW_DMA_0;	break;
-		case 101: mode = XFER_MW_DMA_1;	break;
-		case 102: mode = XFER_MW_DMA_2;	break;
-	}
-	if (mode != -1) {
-		printk("SC1200: %s: changing (U)DMA mode\n", drive->name);
-		ide_dma_off_quietly(drive);
-		if (ide_set_dma_mode(drive, mode) == 0 &&
-		    (drive->dev_flags & IDE_DFLAG_USING_DMA))
-			hwif->dma_ops->dma_host_set(drive, 1);
-		return;
-	}
-
-	sc1200_tunepio(drive, pio);
-}
-
-#ifdef CONFIG_PM
-struct sc1200_saved_state {
-	u32 regs[8];
-};
-
-static int sc1200_suspend (struct pci_dev *dev, pm_message_t state)
-{
-	printk("SC1200: suspend(%u)\n", state.event);
-
-	/*
-	 * we only save state when going from full power to less
-	 */
-	if (state.event == PM_EVENT_ON) {
-		struct ide_host *host = pci_get_drvdata(dev);
-		struct sc1200_saved_state *ss = host->host_priv;
-		unsigned int r;
-
-		/*
-		 * save timing registers
-		 * (this may be unnecessary if BIOS also does it)
-		 */
-		for (r = 0; r < 8; r++)
-			pci_read_config_dword(dev, 0x40 + r * 4, &ss->regs[r]);
-	}
-
-	pci_disable_device(dev);
-	pci_set_power_state(dev, pci_choose_state(dev, state));
-	return 0;
-}
-
-static int sc1200_resume (struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct sc1200_saved_state *ss = host->host_priv;
-	unsigned int r;
-	int i;
-
-	i = pci_enable_device(dev);
-	if (i)
-		return i;
-
-	/*
-	 * restore timing registers
-	 * (this may be unnecessary if BIOS also does it)
-	 */
-	for (r = 0; r < 8; r++)
-		pci_write_config_dword(dev, 0x40 + r * 4, ss->regs[r]);
-
-	return 0;
-}
-#endif
-
-static const struct ide_port_ops sc1200_port_ops = {
-	.set_pio_mode		= sc1200_set_pio_mode,
-	.set_dma_mode		= sc1200_set_dma_mode,
-	.udma_filter		= sc1200_udma_filter,
-};
-
-static const struct ide_dma_ops sc1200_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= sc1200_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-static const struct ide_port_info sc1200_chipset = {
-	.name		= DRV_NAME,
-	.port_ops	= &sc1200_port_ops,
-	.dma_ops	= &sc1200_dma_ops,
-	.host_flags	= IDE_HFLAG_SERIALIZE |
-			  IDE_HFLAG_POST_SET_MODE |
-			  IDE_HFLAG_ABUSE_DMA_MODES,
-	.pio_mask	= ATA_PIO4,
-	.mwdma_mask	= ATA_MWDMA2,
-	.udma_mask	= ATA_UDMA2,
-};
-
-static int sc1200_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct sc1200_saved_state *ss = NULL;
-	int rc;
-
-#ifdef CONFIG_PM
-	ss = kmalloc(sizeof(*ss), GFP_KERNEL);
-	if (ss == NULL)
-		return -ENOMEM;
-#endif
-	rc = ide_pci_init_one(dev, &sc1200_chipset, ss);
-	if (rc)
-		kfree(ss);
-
-	return rc;
-}
-
-static const struct pci_device_id sc1200_pci_tbl[] = {
-	{ PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_SCx200_IDE), 0},
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, sc1200_pci_tbl);
-
-static struct pci_driver sc1200_pci_driver = {
-	.name		= "SC1200_IDE",
-	.id_table	= sc1200_pci_tbl,
-	.probe		= sc1200_init_one,
-	.remove		= ide_pci_remove,
-#ifdef CONFIG_PM
-	.suspend	= sc1200_suspend,
-	.resume		= sc1200_resume,
-#endif
-};
-
-static int __init sc1200_ide_init(void)
-{
-	return ide_pci_register_driver(&sc1200_pci_driver);
-}
-
-static void __exit sc1200_ide_exit(void)
-{
-	pci_unregister_driver(&sc1200_pci_driver);
-}
-
-module_init(sc1200_ide_init);
-module_exit(sc1200_ide_exit);
-
-MODULE_AUTHOR("Mark Lord");
-MODULE_DESCRIPTION("PCI driver module for NS SC1200 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c
deleted file mode 100644
index 458e72e034b09..0000000000000
--- a/drivers/ide/serverworks.c
+++ /dev/null
@@ -1,456 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 1998-2000 Michel Aubry
- * Copyright (C) 1998-2000 Andrzej Krzysztofowicz
- * Copyright (C) 1998-2000 Andre Hedrick <andre@linux-ide.org>
- * Copyright (C) 2007-2010 Bartlomiej Zolnierkiewicz
- * Portions copyright (c) 2001 Sun Microsystems
- *
- *
- * RCC/ServerWorks IDE driver for Linux
- *
- *   OSB4: `Open South Bridge' IDE Interface (fn 1)
- *         supports UDMA mode 2 (33 MB/s)
- *
- *   CSB5: `Champion South Bridge' IDE Interface (fn 1)
- *         all revisions support UDMA mode 4 (66 MB/s)
- *         revision A2.0 and up support UDMA mode 5 (100 MB/s)
- *
- *         *** The CSB5 does not provide ANY register ***
- *         *** to detect 80-conductor cable presence. ***
- *
- *   CSB6: `Champion South Bridge' IDE Interface (optional: third channel)
- *
- *   HT1000: AKA BCM5785 - Hypertransport Southbridge for Opteron systems. IDE
- *   controller same as the CSB6. Single channel ATA100 only.
- *
- * Documentation:
- *	Available under NDA only. Errata info very hard to get.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "serverworks"
-
-#define SVWKS_CSB5_REVISION_NEW	0x92 /* min PCI_REVISION_ID for UDMA5 (A2.0) */
-#define SVWKS_CSB6_REVISION	0xa0 /* min PCI_REVISION_ID for UDMA4 (A1.0) */
-
-/* Seagate Barracuda ATA IV Family drives in UDMA mode 5
- * can overrun their FIFOs when used with the CSB5 */
-static const char *svwks_bad_ata100[] = {
-	"ST320011A",
-	"ST340016A",
-	"ST360021A",
-	"ST380021A",
-	NULL
-};
-
-static int check_in_drive_lists (ide_drive_t *drive, const char **list)
-{
-	char *m = (char *)&drive->id[ATA_ID_PROD];
-
-	while (*list)
-		if (!strcmp(*list++, m))
-			return 1;
-	return 0;
-}
-
-static u8 svwks_udma_filter(ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-
-	if (dev->device == PCI_DEVICE_ID_SERVERWORKS_HT1000IDE) {
-		return 0x1f;
-	} else if (dev->revision < SVWKS_CSB5_REVISION_NEW) {
-		return 0x07;
-	} else {
-		u8 btr = 0, mode, mask;
-
-		pci_read_config_byte(dev, 0x5A, &btr);
-		mode = btr & 0x3;
-
-		/* If someone decides to do UDMA133 on CSB5 the same
-		   issue will bite so be inclusive */
-		if (mode > 2 && check_in_drive_lists(drive, svwks_bad_ata100))
-			mode = 2;
-
-		switch(mode) {
-		case 3:	 mask = 0x3f; break;
-		case 2:	 mask = 0x1f; break;
-		case 1:	 mask = 0x07; break;
-		default: mask = 0x00; break;
-		}
-
-		return mask;
-	}
-}
-
-static u8 svwks_csb_check (struct pci_dev *dev)
-{
-	switch (dev->device) {
-		case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE:
-		case PCI_DEVICE_ID_SERVERWORKS_CSB6IDE:
-		case PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2:
-		case PCI_DEVICE_ID_SERVERWORKS_HT1000IDE:
-			return 1;
-		default:
-			break;
-	}
-	return 0;
-}
-
-static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	static const u8 pio_modes[] = { 0x5d, 0x47, 0x34, 0x22, 0x20 };
-	static const u8 drive_pci[] = { 0x41, 0x40, 0x43, 0x42 };
-
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	if (drive->dn >= ARRAY_SIZE(drive_pci))
-		return;
-
-	pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]);
-
-	if (svwks_csb_check(dev)) {
-		u16 csb_pio = 0;
-
-		pci_read_config_word(dev, 0x4a, &csb_pio);
-
-		csb_pio &= ~(0x0f << (4 * drive->dn));
-		csb_pio |= (pio << (4 * drive->dn));
-
-		pci_write_config_word(dev, 0x4a, csb_pio);
-	}
-}
-
-static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	static const u8 udma_modes[]		= { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05 };
-	static const u8 dma_modes[]		= { 0x77, 0x21, 0x20 };
-	static const u8 drive_pci2[]		= { 0x45, 0x44, 0x47, 0x46 };
-
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	const u8 speed		= drive->dma_mode;
-	u8 unit			= drive->dn & 1;
-
-	u8 ultra_enable	 = 0, ultra_timing = 0, dma_timing = 0;
-
-	if (drive->dn >= ARRAY_SIZE(drive_pci2))
-		return;
-
-	pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing);
-	pci_read_config_byte(dev, 0x54, &ultra_enable);
-
-	ultra_timing	&= ~(0x0F << (4*unit));
-	ultra_enable	&= ~(0x01 << drive->dn);
-
-	if (speed >= XFER_UDMA_0) {
-		dma_timing   |= dma_modes[2];
-		ultra_timing |= (udma_modes[speed - XFER_UDMA_0] << (4 * unit));
-		ultra_enable |= (0x01 << drive->dn);
-	} else if (speed >= XFER_MW_DMA_0)
-		dma_timing   |= dma_modes[speed - XFER_MW_DMA_0];
-
-	pci_write_config_byte(dev, drive_pci2[drive->dn], dma_timing);
-	pci_write_config_byte(dev, (0x56|hwif->channel), ultra_timing);
-	pci_write_config_byte(dev, 0x54, ultra_enable);
-}
-
-static int init_chipset_svwks(struct pci_dev *dev)
-{
-	unsigned int reg;
-	u8 btr;
-
-	/* force Master Latency Timer value to 64 PCICLKs */
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x40);
-
-	/* OSB4 : South Bridge and IDE */
-	if (dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) {
-		struct pci_dev *isa_dev =
-			pci_get_device(PCI_VENDOR_ID_SERVERWORKS,
-					PCI_DEVICE_ID_SERVERWORKS_OSB4, NULL);
-		if (isa_dev) {
-			pci_read_config_dword(isa_dev, 0x64, &reg);
-			reg &= ~0x00002000; /* disable 600ns interrupt mask */
-			if(!(reg & 0x00004000))
-				printk(KERN_DEBUG DRV_NAME " %s: UDMA not BIOS "
-					"enabled.\n", pci_name(dev));
-			reg |=  0x00004000; /* enable UDMA/33 support */
-			pci_write_config_dword(isa_dev, 0x64, reg);
-			pci_dev_put(isa_dev);
-		}
-	}
-
-	/* setup CSB5/CSB6 : South Bridge and IDE option RAID */
-	else if ((dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE) ||
-		 (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) ||
-		 (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2)) {
-
-		/* Third Channel Test */
-		if (!(PCI_FUNC(dev->devfn) & 1)) {
-			struct pci_dev * findev = NULL;
-			u32 reg4c = 0;
-			findev = pci_get_device(PCI_VENDOR_ID_SERVERWORKS,
-				PCI_DEVICE_ID_SERVERWORKS_CSB5, NULL);
-			if (findev) {
-				pci_read_config_dword(findev, 0x4C, &reg4c);
-				reg4c &= ~0x000007FF;
-				reg4c |=  0x00000040;
-				reg4c |=  0x00000020;
-				pci_write_config_dword(findev, 0x4C, reg4c);
-				pci_dev_put(findev);
-			}
-			outb_p(0x06, 0x0c00);
-			dev->irq = inb_p(0x0c01);
-		} else {
-			struct pci_dev * findev = NULL;
-			u8 reg41 = 0;
-
-			findev = pci_get_device(PCI_VENDOR_ID_SERVERWORKS,
-					PCI_DEVICE_ID_SERVERWORKS_CSB6, NULL);
-			if (findev) {
-				pci_read_config_byte(findev, 0x41, &reg41);
-				reg41 &= ~0x40;
-				pci_write_config_byte(findev, 0x41, reg41);
-				pci_dev_put(findev);
-			}
-			/*
-			 * This is a device pin issue on CSB6.
-			 * Since there will be a future raid mode,
-			 * early versions of the chipset require the
-			 * interrupt pin to be set, and it is a compatibility
-			 * mode issue.
-			 */
-			if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE)
-				dev->irq = 0;
-		}
-//		pci_read_config_dword(dev, 0x40, &pioreg)
-//		pci_write_config_dword(dev, 0x40, 0x99999999);
-//		pci_read_config_dword(dev, 0x44, &dmareg);
-//		pci_write_config_dword(dev, 0x44, 0xFFFFFFFF);
-		/* setup the UDMA Control register
-		 *
-		 * 1. clear bit 6 to enable DMA
-		 * 2. enable DMA modes with bits 0-1
-		 * 	00 : legacy
-		 * 	01 : udma2
-		 * 	10 : udma2/udma4
-		 * 	11 : udma2/udma4/udma5
-		 */
-		pci_read_config_byte(dev, 0x5A, &btr);
-		btr &= ~0x40;
-		if (!(PCI_FUNC(dev->devfn) & 1))
-			btr |= 0x2;
-		else
-			btr |= (dev->revision >= SVWKS_CSB5_REVISION_NEW) ? 0x3 : 0x2;
-		pci_write_config_byte(dev, 0x5A, btr);
-	}
-	/* Setup HT1000 SouthBridge Controller - Single Channel Only */
-	else if (dev->device == PCI_DEVICE_ID_SERVERWORKS_HT1000IDE) {
-		pci_read_config_byte(dev, 0x5A, &btr);
-		btr &= ~0x40;
-		btr |= 0x3;
-		pci_write_config_byte(dev, 0x5A, btr);
-	}
-
-	return 0;
-}
-
-static u8 ata66_svwks_svwks(ide_hwif_t *hwif)
-{
-	return ATA_CBL_PATA80;
-}
-
-/* On Dell PowerEdge servers with a CSB5/CSB6, the top two bits
- * of the subsystem device ID indicate presence of an 80-pin cable.
- * Bit 15 clear = secondary IDE channel does not have 80-pin cable.
- * Bit 15 set   = secondary IDE channel has 80-pin cable.
- * Bit 14 clear = primary IDE channel does not have 80-pin cable.
- * Bit 14 set   = primary IDE channel has 80-pin cable.
- */
-static u8 ata66_svwks_dell(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-
-	if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL &&
-	    dev->vendor	== PCI_VENDOR_ID_SERVERWORKS &&
-	    (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE ||
-	     dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE))
-		return ((1 << (hwif->channel + 14)) &
-			dev->subsystem_device) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
-	return ATA_CBL_PATA40;
-}
-
-/* Sun Cobalt Alpine hardware avoids the 80-pin cable
- * detect issue by attaching the drives directly to the board.
- * This check follows the Dell precedent (how scary is that?!)
- *
- * WARNING: this only works on Alpine hardware!
- */
-static u8 ata66_svwks_cobalt(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-
-	if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN &&
-	    dev->vendor	== PCI_VENDOR_ID_SERVERWORKS &&
-	    dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB5IDE)
-		return ((1 << (hwif->channel + 14)) &
-			dev->subsystem_device) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
-	return ATA_CBL_PATA40;
-}
-
-static u8 svwks_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-
-	/* Server Works */
-	if (dev->subsystem_vendor == PCI_VENDOR_ID_SERVERWORKS)
-		return ata66_svwks_svwks (hwif);
-	
-	/* Dell PowerEdge */
-	if (dev->subsystem_vendor == PCI_VENDOR_ID_DELL)
-		return ata66_svwks_dell (hwif);
-
-	/* Cobalt Alpine */
-	if (dev->subsystem_vendor == PCI_VENDOR_ID_SUN)
-		return ata66_svwks_cobalt (hwif);
-
-	/* Per Specified Design by OEM, and ASIC Architect */
-	if ((dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE) ||
-	    (dev->device == PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2))
-		return ATA_CBL_PATA80;
-
-	return ATA_CBL_PATA40;
-}
-
-static const struct ide_port_ops osb4_port_ops = {
-	.set_pio_mode		= svwks_set_pio_mode,
-	.set_dma_mode		= svwks_set_dma_mode,
-};
-
-static const struct ide_port_ops svwks_port_ops = {
-	.set_pio_mode		= svwks_set_pio_mode,
-	.set_dma_mode		= svwks_set_dma_mode,
-	.udma_filter		= svwks_udma_filter,
-	.cable_detect		= svwks_cable_detect,
-};
-
-static const struct ide_port_info serverworks_chipsets[] = {
-	{	/* 0: OSB4 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_svwks,
-		.port_ops	= &osb4_port_ops,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= 0x00, /* UDMA is problematic on OSB4 */
-	},
-	{	/* 1: CSB5 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_svwks,
-		.port_ops	= &svwks_port_ops,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA5,
-	},
-	{	/* 2: CSB6 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_svwks,
-		.port_ops	= &svwks_port_ops,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA5,
-	},
-	{	/* 3: CSB6-2 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_svwks,
-		.port_ops	= &svwks_port_ops,
-		.host_flags	= IDE_HFLAG_SINGLE,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA5,
-	},
-	{	/* 4: HT1000 */
-		.name		= DRV_NAME,
-		.init_chipset	= init_chipset_svwks,
-		.port_ops	= &svwks_port_ops,
-		.host_flags	= IDE_HFLAG_SINGLE,
-		.pio_mask	= ATA_PIO4,
-		.mwdma_mask	= ATA_MWDMA2,
-		.udma_mask	= ATA_UDMA5,
-	}
-};
-
-/**
- *	svwks_init_one	-	called when a OSB/CSB is found
- *	@dev: the svwks device
- *	@id: the matching pci id
- *
- *	Called when the PCI registration layer (or the IDE initialization)
- *	finds a device matching our IDE device tables.
- */
- 
-static int svwks_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct ide_port_info d;
-	u8 idx = id->driver_data;
-
-	d = serverworks_chipsets[idx];
-
-	if (idx == 1)
-		d.host_flags |= IDE_HFLAG_CLEAR_SIMPLEX;
-	else if (idx == 2 || idx == 3) {
-		if ((PCI_FUNC(dev->devfn) & 1) == 0) {
-			if (pci_resource_start(dev, 0) != 0x01f1)
-				d.host_flags |= IDE_HFLAG_NON_BOOTABLE;
-			d.host_flags |= IDE_HFLAG_SINGLE;
-		} else
-			d.host_flags &= ~IDE_HFLAG_SINGLE;
-	}
-
-	return ide_pci_init_one(dev, &d, NULL);
-}
-
-static const struct pci_device_id svwks_pci_tbl[] = {
-	{ PCI_VDEVICE(SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4IDE),   0 },
-	{ PCI_VDEVICE(SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE),   1 },
-	{ PCI_VDEVICE(SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB6IDE),   2 },
-	{ PCI_VDEVICE(SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2),  3 },
-	{ PCI_VDEVICE(SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000IDE), 4 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, svwks_pci_tbl);
-
-static struct pci_driver svwks_pci_driver = {
-	.name		= "Serverworks_IDE",
-	.id_table	= svwks_pci_tbl,
-	.probe		= svwks_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init svwks_ide_init(void)
-{
-	return ide_pci_register_driver(&svwks_pci_driver);
-}
-
-static void __exit svwks_ide_exit(void)
-{
-	pci_unregister_driver(&svwks_pci_driver);
-}
-
-module_init(svwks_ide_init);
-module_exit(svwks_ide_exit);
-
-MODULE_AUTHOR("Michael Aubry. Andrzej Krzysztofowicz, Andre Hedrick, Bartlomiej Zolnierkiewicz");
-MODULE_DESCRIPTION("PCI driver module for Serverworks OSB4/CSB5/CSB6 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c
deleted file mode 100644
index fdc8e813170c3..0000000000000
--- a/drivers/ide/setup-pci.c
+++ /dev/null
@@ -1,682 +0,0 @@
-/*
- *  Copyright (C) 1998-2000  Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 1995-1998  Mark Lord
- *  Copyright (C) 2007-2009  Bartlomiej Zolnierkiewicz
- *
- *  May be copied or modified under the terms of the GNU General Public License
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/ide.h>
-#include <linux/dma-mapping.h>
-
-#include <asm/io.h>
-
-/**
- *	ide_setup_pci_baseregs	-	place a PCI IDE controller native
- *	@dev: PCI device of interface to switch native
- *	@name: Name of interface
- *
- *	We attempt to place the PCI interface into PCI native mode. If
- *	we succeed the BARs are ok and the controller is in PCI mode.
- *	Returns 0 on success or an errno code.
- *
- *	FIXME: if we program the interface and then fail to set the BARS
- *	we don't switch it back to legacy mode. Do we actually care ??
- */
-
-static int ide_setup_pci_baseregs(struct pci_dev *dev, const char *name)
-{
-	u8 progif = 0;
-
-	/*
-	 * Place both IDE interfaces into PCI "native" mode:
-	 */
-	if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) ||
-			 (progif & 5) != 5) {
-		if ((progif & 0xa) != 0xa) {
-			printk(KERN_INFO "%s %s: device not capable of full "
-				"native PCI mode\n", name, pci_name(dev));
-			return -EOPNOTSUPP;
-		}
-		printk(KERN_INFO "%s %s: placing both ports into native PCI "
-			"mode\n", name, pci_name(dev));
-		(void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5);
-		if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) ||
-		    (progif & 5) != 5) {
-			printk(KERN_ERR "%s %s: rewrite of PROGIF failed, "
-				"wanted 0x%04x, got 0x%04x\n",
-				name, pci_name(dev), progif | 5, progif);
-			return -EOPNOTSUPP;
-		}
-	}
-	return 0;
-}
-
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
-static int ide_pci_clear_simplex(unsigned long dma_base, const char *name)
-{
-	u8 dma_stat = inb(dma_base + 2);
-
-	outb(dma_stat & 0x60, dma_base + 2);
-	dma_stat = inb(dma_base + 2);
-
-	return (dma_stat & 0x80) ? 1 : 0;
-}
-
-/**
- *	ide_pci_dma_base	-	setup BMIBA
- *	@hwif: IDE interface
- *	@d: IDE port info
- *
- *	Fetch the DMA Bus-Master-I/O-Base-Address (BMIBA) from PCI space.
- */
-
-unsigned long ide_pci_dma_base(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	unsigned long dma_base = 0;
-
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		return hwif->dma_base;
-
-	if (hwif->mate && hwif->mate->dma_base) {
-		dma_base = hwif->mate->dma_base - (hwif->channel ? 0 : 8);
-	} else {
-		u8 baridx = (d->host_flags & IDE_HFLAG_CS5520) ? 2 : 4;
-
-		dma_base = pci_resource_start(dev, baridx);
-
-		if (dma_base == 0) {
-			printk(KERN_ERR "%s %s: DMA base is invalid\n",
-				d->name, pci_name(dev));
-			return 0;
-		}
-	}
-
-	if (hwif->channel)
-		dma_base += 8;
-
-	return dma_base;
-}
-EXPORT_SYMBOL_GPL(ide_pci_dma_base);
-
-int ide_pci_check_simplex(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	u8 dma_stat;
-
-	if (d->host_flags & (IDE_HFLAG_MMIO | IDE_HFLAG_CS5520))
-		goto out;
-
-	if (d->host_flags & IDE_HFLAG_CLEAR_SIMPLEX) {
-		if (ide_pci_clear_simplex(hwif->dma_base, d->name))
-			printk(KERN_INFO "%s %s: simplex device: DMA forced\n",
-				d->name, pci_name(dev));
-		goto out;
-	}
-
-	/*
-	 * If the device claims "simplex" DMA, this means that only one of
-	 * the two interfaces can be trusted with DMA at any point in time
-	 * (so we should enable DMA only on one of the two interfaces).
-	 *
-	 * FIXME: At this point we haven't probed the drives so we can't make
-	 * the appropriate decision.  Really we should defer this problem until
-	 * we tune the drive then try to grab DMA ownership if we want to be
-	 * the DMA end.  This has to be become dynamic to handle hot-plug.
-	 */
-	dma_stat = hwif->dma_ops->dma_sff_read_status(hwif);
-	if ((dma_stat & 0x80) && hwif->mate && hwif->mate->dma_base) {
-		printk(KERN_INFO "%s %s: simplex device: DMA disabled\n",
-			d->name, pci_name(dev));
-		return -1;
-	}
-out:
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_pci_check_simplex);
-
-/*
- * Set up BM-DMA capability (PnP BIOS should have done this)
- */
-int ide_pci_set_master(struct pci_dev *dev, const char *name)
-{
-	u16 pcicmd;
-
-	pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
-
-	if ((pcicmd & PCI_COMMAND_MASTER) == 0) {
-		pci_set_master(dev);
-
-		if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd) ||
-		    (pcicmd & PCI_COMMAND_MASTER) == 0) {
-			printk(KERN_ERR "%s %s: error updating PCICMD\n",
-				name, pci_name(dev));
-			return -EIO;
-		}
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_pci_set_master);
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
-
-void ide_setup_pci_noise(struct pci_dev *dev, const struct ide_port_info *d)
-{
-	printk(KERN_INFO "%s %s: IDE controller (0x%04x:0x%04x rev 0x%02x)\n",
-		d->name, pci_name(dev),
-		dev->vendor, dev->device, dev->revision);
-}
-EXPORT_SYMBOL_GPL(ide_setup_pci_noise);
-
-
-/**
- *	ide_pci_enable	-	do PCI enables
- *	@dev: PCI device
- *	@bars: PCI BARs mask
- *	@d: IDE port info
- *
- *	Enable the IDE PCI device. We attempt to enable the device in full
- *	but if that fails then we only need IO space. The PCI code should
- *	have setup the proper resources for us already for controllers in
- *	legacy mode.
- *
- *	Returns zero on success or an error code
- */
-
-static int ide_pci_enable(struct pci_dev *dev, int bars,
-			  const struct ide_port_info *d)
-{
-	int ret;
-
-	if (pci_enable_device(dev)) {
-		ret = pci_enable_device_io(dev);
-		if (ret < 0) {
-			printk(KERN_WARNING "%s %s: couldn't enable device\n",
-				d->name, pci_name(dev));
-			goto out;
-		}
-		printk(KERN_WARNING "%s %s: BIOS configuration fixed\n",
-			d->name, pci_name(dev));
-	}
-
-	/*
-	 * assume all devices can do 32-bit DMA for now, we can add
-	 * a DMA mask field to the struct ide_port_info if we need it
-	 * (or let lower level driver set the DMA mask)
-	 */
-	ret = dma_set_mask(&dev->dev, DMA_BIT_MASK(32));
-	if (ret < 0) {
-		printk(KERN_ERR "%s %s: can't set DMA mask\n",
-			d->name, pci_name(dev));
-		goto out;
-	}
-
-	ret = pci_request_selected_regions(dev, bars, d->name);
-	if (ret < 0)
-		printk(KERN_ERR "%s %s: can't reserve resources\n",
-			d->name, pci_name(dev));
-out:
-	return ret;
-}
-
-/**
- *	ide_pci_configure	-	configure an unconfigured device
- *	@dev: PCI device
- *	@d: IDE port info
- *
- *	Enable and configure the PCI device we have been passed.
- *	Returns zero on success or an error code.
- */
-
-static int ide_pci_configure(struct pci_dev *dev, const struct ide_port_info *d)
-{
-	u16 pcicmd = 0;
-	/*
-	 * PnP BIOS was *supposed* to have setup this device, but we
-	 * can do it ourselves, so long as the BIOS has assigned an IRQ
-	 * (or possibly the device is using a "legacy header" for IRQs).
-	 * Maybe the user deliberately *disabled* the device,
-	 * but we'll eventually ignore it again if no drives respond.
-	 */
-	if (ide_setup_pci_baseregs(dev, d->name) ||
-	    pci_write_config_word(dev, PCI_COMMAND, pcicmd | PCI_COMMAND_IO)) {
-		printk(KERN_INFO "%s %s: device disabled (BIOS)\n",
-			d->name, pci_name(dev));
-		return -ENODEV;
-	}
-	if (pci_read_config_word(dev, PCI_COMMAND, &pcicmd)) {
-		printk(KERN_ERR "%s %s: error accessing PCI regs\n",
-			d->name, pci_name(dev));
-		return -EIO;
-	}
-	if (!(pcicmd & PCI_COMMAND_IO)) {
-		printk(KERN_ERR "%s %s: unable to enable IDE controller\n",
-			d->name, pci_name(dev));
-		return -ENXIO;
-	}
-	return 0;
-}
-
-/**
- *	ide_pci_check_iomem	-	check a register is I/O
- *	@dev: PCI device
- *	@d: IDE port info
- *	@bar: BAR number
- *
- *	Checks if a BAR is configured and points to MMIO space. If so,
- *	return an error code. Otherwise return 0
- */
-
-static int ide_pci_check_iomem(struct pci_dev *dev, const struct ide_port_info *d,
-			       int bar)
-{
-	ulong flags = pci_resource_flags(dev, bar);
-
-	/* Unconfigured ? */
-	if (!flags || pci_resource_len(dev, bar) == 0)
-		return 0;
-
-	/* I/O space */
-	if (flags & IORESOURCE_IO)
-		return 0;
-
-	/* Bad */
-	return -EINVAL;
-}
-
-/**
- *	ide_hw_configure	-	configure a struct ide_hw instance
- *	@dev: PCI device holding interface
- *	@d: IDE port info
- *	@port: port number
- *	@hw: struct ide_hw instance corresponding to this port
- *
- *	Perform the initial set up for the hardware interface structure. This
- *	is done per interface port rather than per PCI device. There may be
- *	more than one port per device.
- *
- *	Returns zero on success or an error code.
- */
-
-static int ide_hw_configure(struct pci_dev *dev, const struct ide_port_info *d,
-			    unsigned int port, struct ide_hw *hw)
-{
-	unsigned long ctl = 0, base = 0;
-
-	if ((d->host_flags & IDE_HFLAG_ISA_PORTS) == 0) {
-		if (ide_pci_check_iomem(dev, d, 2 * port) ||
-		    ide_pci_check_iomem(dev, d, 2 * port + 1)) {
-			printk(KERN_ERR "%s %s: I/O baseregs (BIOS) are "
-				"reported as MEM for port %d!\n",
-				d->name, pci_name(dev), port);
-			return -EINVAL;
-		}
-
-		ctl  = pci_resource_start(dev, 2*port+1);
-		base = pci_resource_start(dev, 2*port);
-	} else {
-		/* Use default values */
-		ctl = port ? 0x374 : 0x3f4;
-		base = port ? 0x170 : 0x1f0;
-	}
-
-	if (!base || !ctl) {
-		printk(KERN_ERR "%s %s: bad PCI BARs for port %d, skipping\n",
-			d->name, pci_name(dev), port);
-		return -EINVAL;
-	}
-
-	memset(hw, 0, sizeof(*hw));
-	hw->dev = &dev->dev;
-	ide_std_init_ports(hw, base, ctl | 2);
-
-	return 0;
-}
-
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
-/**
- *	ide_hwif_setup_dma	-	configure DMA interface
- *	@hwif: IDE interface
- *	@d: IDE port info
- *
- *	Set up the DMA base for the interface. Enable the master bits as
- *	necessary and attempt to bring the device DMA into a ready to use
- *	state
- */
-
-int ide_hwif_setup_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-
-	if ((d->host_flags & IDE_HFLAG_NO_AUTODMA) == 0 ||
-	    ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE &&
-	     (dev->class & 0x80))) {
-		unsigned long base = ide_pci_dma_base(hwif, d);
-
-		if (base == 0)
-			return -1;
-
-		hwif->dma_base = base;
-
-		if (hwif->dma_ops == NULL)
-			hwif->dma_ops = &sff_dma_ops;
-
-		if (ide_pci_check_simplex(hwif, d) < 0)
-			return -1;
-
-		if (ide_pci_set_master(dev, d->name) < 0)
-			return -1;
-
-		if (hwif->host_flags & IDE_HFLAG_MMIO)
-			printk(KERN_INFO "    %s: MMIO-DMA\n", hwif->name);
-		else
-			printk(KERN_INFO "    %s: BM-DMA at 0x%04lx-0x%04lx\n",
-					 hwif->name, base, base + 7);
-
-		hwif->extra_base = base + (hwif->channel ? 8 : 16);
-
-		if (ide_allocate_dma_engine(hwif))
-			return -1;
-	}
-
-	return 0;
-}
-#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */
-
-/**
- *	ide_setup_pci_controller	-	set up IDE PCI
- *	@dev: PCI device
- *	@bars: PCI BARs mask
- *	@d: IDE port info
- *	@noisy: verbose flag
- *
- *	Set up the PCI and controller side of the IDE interface. This brings
- *	up the PCI side of the device, checks that the device is enabled
- *	and enables it if need be
- */
-
-static int ide_setup_pci_controller(struct pci_dev *dev, int bars,
-				    const struct ide_port_info *d, int noisy)
-{
-	int ret;
-	u16 pcicmd;
-
-	if (noisy)
-		ide_setup_pci_noise(dev, d);
-
-	ret = ide_pci_enable(dev, bars, d);
-	if (ret < 0)
-		goto out;
-
-	ret = pci_read_config_word(dev, PCI_COMMAND, &pcicmd);
-	if (ret < 0) {
-		printk(KERN_ERR "%s %s: error accessing PCI regs\n",
-			d->name, pci_name(dev));
-		goto out_free_bars;
-	}
-	if (!(pcicmd & PCI_COMMAND_IO)) {	/* is device disabled? */
-		ret = ide_pci_configure(dev, d);
-		if (ret < 0)
-			goto out_free_bars;
-		printk(KERN_INFO "%s %s: device enabled (Linux)\n",
-			d->name, pci_name(dev));
-	}
-
-	goto out;
-
-out_free_bars:
-	pci_release_selected_regions(dev, bars);
-out:
-	return ret;
-}
-
-/**
- *	ide_pci_setup_ports	-	configure ports/devices on PCI IDE
- *	@dev: PCI device
- *	@d: IDE port info
- *	@hw: struct ide_hw instances corresponding to this PCI IDE device
- *	@hws: struct ide_hw pointers table to update
- *
- *	Scan the interfaces attached to this device and do any
- *	necessary per port setup. Attach the devices and ask the
- *	generic DMA layer to do its work for us.
- *
- *	Normally called automaticall from do_ide_pci_setup_device,
- *	but is also used directly as a helper function by some controllers
- *	where the chipset setup is not the default PCI IDE one.
- */
-
-void ide_pci_setup_ports(struct pci_dev *dev, const struct ide_port_info *d,
-			 struct ide_hw *hw, struct ide_hw **hws)
-{
-	int channels = (d->host_flags & IDE_HFLAG_SINGLE) ? 1 : 2, port;
-	u8 tmp;
-
-	/*
-	 * Set up the IDE ports
-	 */
-
-	for (port = 0; port < channels; ++port) {
-		const struct ide_pci_enablebit *e = &d->enablebits[port];
-
-		if (e->reg && (pci_read_config_byte(dev, e->reg, &tmp) ||
-		    (tmp & e->mask) != e->val)) {
-			printk(KERN_INFO "%s %s: IDE port disabled\n",
-				d->name, pci_name(dev));
-			continue;	/* port not enabled */
-		}
-
-		if (ide_hw_configure(dev, d, port, hw + port))
-			continue;
-
-		*(hws + port) = hw + port;
-	}
-}
-EXPORT_SYMBOL_GPL(ide_pci_setup_ports);
-
-/*
- * ide_setup_pci_device() looks at the primary/secondary interfaces
- * on a PCI IDE device and, if they are enabled, prepares the IDE driver
- * for use with them.  This generic code works for most PCI chipsets.
- *
- * One thing that is not standardized is the location of the
- * primary/secondary interface "enable/disable" bits.  For chipsets that
- * we "know" about, this information is in the struct ide_port_info;
- * for all other chipsets, we just assume both interfaces are enabled.
- */
-static int do_ide_setup_pci_device(struct pci_dev *dev,
-				   const struct ide_port_info *d,
-				   u8 noisy)
-{
-	int pciirq, ret;
-
-	/*
-	 * Can we trust the reported IRQ?
-	 */
-	pciirq = dev->irq;
-
-	/*
-	 * This allows offboard ide-pci cards the enable a BIOS,
-	 * verify interrupt settings of split-mirror pci-config
-	 * space, place chipset into init-mode, and/or preserve
-	 * an interrupt if the card is not native ide support.
-	 */
-	ret = d->init_chipset ? d->init_chipset(dev) : 0;
-	if (ret < 0)
-		goto out;
-
-	if (ide_pci_is_in_compatibility_mode(dev)) {
-		if (noisy)
-			printk(KERN_INFO "%s %s: not 100%% native mode: will "
-				"probe irqs later\n", d->name, pci_name(dev));
-		pciirq = 0;
-	} else if (!pciirq && noisy) {
-		printk(KERN_WARNING "%s %s: bad irq (%d): will probe later\n",
-			d->name, pci_name(dev), pciirq);
-	} else if (noisy) {
-		printk(KERN_INFO "%s %s: 100%% native mode on irq %d\n",
-			d->name, pci_name(dev), pciirq);
-	}
-
-	ret = pciirq;
-out:
-	return ret;
-}
-
-int ide_pci_init_two(struct pci_dev *dev1, struct pci_dev *dev2,
-		     const struct ide_port_info *d, void *priv)
-{
-	struct pci_dev *pdev[] = { dev1, dev2 };
-	struct ide_host *host;
-	int ret, i, n_ports = dev2 ? 4 : 2, bars;
-	struct ide_hw hw[4], *hws[] = { NULL, NULL, NULL, NULL };
-
-	if (d->host_flags & IDE_HFLAG_SINGLE)
-		bars = (1 << 2) - 1;
-	else
-		bars = (1 << 4) - 1;
-
-	if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0) {
-		if (d->host_flags & IDE_HFLAG_CS5520)
-			bars |= (1 << 2);
-		else
-			bars |= (1 << 4);
-	}
-
-	for (i = 0; i < n_ports / 2; i++) {
-		ret = ide_setup_pci_controller(pdev[i], bars, d, !i);
-		if (ret < 0) {
-			if (i == 1)
-				pci_release_selected_regions(pdev[0], bars);
-			goto out;
-		}
-
-		ide_pci_setup_ports(pdev[i], d, &hw[i*2], &hws[i*2]);
-	}
-
-	host = ide_host_alloc(d, hws, n_ports);
-	if (host == NULL) {
-		ret = -ENOMEM;
-		goto out_free_bars;
-	}
-
-	host->dev[0] = &dev1->dev;
-	if (dev2)
-		host->dev[1] = &dev2->dev;
-
-	host->host_priv = priv;
-	host->irq_flags = IRQF_SHARED;
-
-	pci_set_drvdata(pdev[0], host);
-	if (dev2)
-		pci_set_drvdata(pdev[1], host);
-
-	for (i = 0; i < n_ports / 2; i++) {
-		ret = do_ide_setup_pci_device(pdev[i], d, !i);
-
-		/*
-		 * FIXME: Mom, mom, they stole me the helper function to undo
-		 * do_ide_setup_pci_device() on the first device!
-		 */
-		if (ret < 0)
-			goto out_free_bars;
-
-		/* fixup IRQ */
-		if (ide_pci_is_in_compatibility_mode(pdev[i])) {
-			hw[i*2].irq = pci_get_legacy_ide_irq(pdev[i], 0);
-			hw[i*2 + 1].irq = pci_get_legacy_ide_irq(pdev[i], 1);
-		} else
-			hw[i*2 + 1].irq = hw[i*2].irq = ret;
-	}
-
-	ret = ide_host_register(host, d, hws);
-	if (ret)
-		ide_host_free(host);
-	else
-		goto out;
-
-out_free_bars:
-	i = n_ports / 2;
-	while (i--)
-		pci_release_selected_regions(pdev[i], bars);
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ide_pci_init_two);
-
-int ide_pci_init_one(struct pci_dev *dev, const struct ide_port_info *d,
-		     void *priv)
-{
-	return ide_pci_init_two(dev, NULL, d, priv);
-}
-EXPORT_SYMBOL_GPL(ide_pci_init_one);
-
-void ide_pci_remove(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct pci_dev *dev2 = host->dev[1] ? to_pci_dev(host->dev[1]) : NULL;
-	int bars;
-
-	if (host->host_flags & IDE_HFLAG_SINGLE)
-		bars = (1 << 2) - 1;
-	else
-		bars = (1 << 4) - 1;
-
-	if ((host->host_flags & IDE_HFLAG_NO_DMA) == 0) {
-		if (host->host_flags & IDE_HFLAG_CS5520)
-			bars |= (1 << 2);
-		else
-			bars |= (1 << 4);
-	}
-
-	ide_host_remove(host);
-
-	if (dev2)
-		pci_release_selected_regions(dev2, bars);
-	pci_release_selected_regions(dev, bars);
-
-	if (dev2)
-		pci_disable_device(dev2);
-	pci_disable_device(dev);
-}
-EXPORT_SYMBOL_GPL(ide_pci_remove);
-
-#ifdef CONFIG_PM
-int ide_pci_suspend(struct pci_dev *dev, pm_message_t state)
-{
-	pci_save_state(dev);
-	pci_disable_device(dev);
-	pci_set_power_state(dev, pci_choose_state(dev, state));
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_pci_suspend);
-
-int ide_pci_resume(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	int rc;
-
-	pci_set_power_state(dev, PCI_D0);
-
-	rc = pci_enable_device(dev);
-	if (rc)
-		return rc;
-
-	pci_restore_state(dev);
-	pci_set_master(dev);
-
-	if (host->init_chipset)
-		host->init_chipset(dev);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ide_pci_resume);
-#endif
diff --git a/drivers/ide/siimage.c b/drivers/ide/siimage.c
deleted file mode 100644
index c4b20f350b84b..0000000000000
--- a/drivers/ide/siimage.c
+++ /dev/null
@@ -1,843 +0,0 @@
-/*
- * Copyright (C) 2001-2002	Andre Hedrick <andre@linux-ide.org>
- * Copyright (C) 2003		Red Hat
- * Copyright (C) 2007-2008	MontaVista Software, Inc.
- * Copyright (C) 2007-2008	Bartlomiej Zolnierkiewicz
- *
- *  May be copied or modified under the terms of the GNU General Public License
- *
- *  Documentation for CMD680:
- *  http://gkernel.sourceforge.net/specs/sii/sii-0680a-v1.31.pdf.bz2
- *
- *  Documentation for SiI 3112:
- *  http://gkernel.sourceforge.net/specs/sii/3112A_SiI-DS-0095-B2.pdf.bz2
- *
- *  Errata and other documentation only available under NDA.
- *
- *
- *  FAQ Items:
- *	If you are using Marvell SATA-IDE adapters with Maxtor drives
- *	ensure the system is set up for ATA100/UDMA5, not UDMA6.
- *
- *	If you are using WD drives with SATA bridges you must set the
- *	drive to "Single". "Master" will hang.
- *
- *	If you have strange problems with nVidia chipset systems please
- *	see the SI support documentation and update your system BIOS
- *	if necessary
- *
- *  The Dell DRAC4 has some interesting features including effectively hot
- *  unplugging/replugging the virtual CD interface when the DRAC is reset.
- *  This often causes drivers/ide/siimage to panic but is ok with the rather
- *  smarter code in libata.
- *
- * TODO:
- * - VDMA support
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <linux/io.h>
-
-#define DRV_NAME "siimage"
-
-/**
- *	pdev_is_sata		-	check if device is SATA
- *	@pdev:	PCI device to check
- *
- *	Returns true if this is a SATA controller
- */
-
-static int pdev_is_sata(struct pci_dev *pdev)
-{
-#ifdef CONFIG_BLK_DEV_IDE_SATA
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_SII_3112:
-	case PCI_DEVICE_ID_SII_1210SA:
-		return 1;
-	case PCI_DEVICE_ID_SII_680:
-		return 0;
-	}
-	BUG();
-#endif
-	return 0;
-}
-
-/**
- *	is_sata			-	check if hwif is SATA
- *	@hwif:	interface to check
- *
- *	Returns true if this is a SATA controller
- */
-
-static inline int is_sata(ide_hwif_t *hwif)
-{
-	return pdev_is_sata(to_pci_dev(hwif->dev));
-}
-
-/**
- *	siimage_selreg		-	return register base
- *	@hwif: interface
- *	@r: config offset
- *
- *	Turn a config register offset into the right address in either
- *	PCI space or MMIO space to access the control register in question
- *	Thankfully this is a configuration operation, so isn't performance
- *	critical.
- */
-
-static unsigned long siimage_selreg(ide_hwif_t *hwif, int r)
-{
-	unsigned long base = (unsigned long)hwif->hwif_data;
-
-	base += 0xA0 + r;
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		base += hwif->channel << 6;
-	else
-		base += hwif->channel << 4;
-	return base;
-}
-
-/**
- *	siimage_seldev		-	return register base
- *	@hwif: interface
- *	@r: config offset
- *
- *	Turn a config register offset into the right address in either
- *	PCI space or MMIO space to access the control register in question
- *	including accounting for the unit shift.
- */
-
-static inline unsigned long siimage_seldev(ide_drive_t *drive, int r)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	unsigned long base	= (unsigned long)hwif->hwif_data;
-	u8 unit			= drive->dn & 1;
-
-	base += 0xA0 + r;
-	if (hwif->host_flags & IDE_HFLAG_MMIO)
-		base += hwif->channel << 6;
-	else
-		base += hwif->channel << 4;
-	base |= unit << unit;
-	return base;
-}
-
-static u8 sil_ioread8(struct pci_dev *dev, unsigned long addr)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	u8 tmp = 0;
-
-	if (host->host_priv)
-		tmp = readb((void __iomem *)addr);
-	else
-		pci_read_config_byte(dev, addr, &tmp);
-
-	return tmp;
-}
-
-static u16 sil_ioread16(struct pci_dev *dev, unsigned long addr)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	u16 tmp = 0;
-
-	if (host->host_priv)
-		tmp = readw((void __iomem *)addr);
-	else
-		pci_read_config_word(dev, addr, &tmp);
-
-	return tmp;
-}
-
-static void sil_iowrite8(struct pci_dev *dev, u8 val, unsigned long addr)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-
-	if (host->host_priv)
-		writeb(val, (void __iomem *)addr);
-	else
-		pci_write_config_byte(dev, addr, val);
-}
-
-static void sil_iowrite16(struct pci_dev *dev, u16 val, unsigned long addr)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-
-	if (host->host_priv)
-		writew(val, (void __iomem *)addr);
-	else
-		pci_write_config_word(dev, addr, val);
-}
-
-static void sil_iowrite32(struct pci_dev *dev, u32 val, unsigned long addr)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-
-	if (host->host_priv)
-		writel(val, (void __iomem *)addr);
-	else
-		pci_write_config_dword(dev, addr, val);
-}
-
-/**
- *	sil_udma_filter		-	compute UDMA mask
- *	@drive: IDE device
- *
- *	Compute the available UDMA speeds for the device on the interface.
- *
- *	For the CMD680 this depends on the clocking mode (scsc), for the
- *	SI3112 SATA controller life is a bit simpler.
- */
-
-static u8 sil_pata_udma_filter(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned long base	= (unsigned long)hwif->hwif_data;
-	u8 scsc, mask		= 0;
-
-	base += (hwif->host_flags & IDE_HFLAG_MMIO) ? 0x4A : 0x8A;
-
-	scsc = sil_ioread8(dev, base);
-
-	switch (scsc & 0x30) {
-	case 0x10:	/* 133 */
-		mask = ATA_UDMA6;
-		break;
-	case 0x20:	/* 2xPCI */
-		mask = ATA_UDMA6;
-		break;
-	case 0x00:	/* 100 */
-		mask = ATA_UDMA5;
-		break;
-	default: 	/* Disabled ? */
-		BUG();
-	}
-
-	return mask;
-}
-
-static u8 sil_sata_udma_filter(ide_drive_t *drive)
-{
-	char *m = (char *)&drive->id[ATA_ID_PROD];
-
-	return strstr(m, "Maxtor") ? ATA_UDMA5 : ATA_UDMA6;
-}
-
-/**
- *	sil_set_pio_mode	-	set host controller for PIO mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Load the timing settings for this device mode into the
- *	controller.
- */
-
-static void sil_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	static const u16 tf_speed[]   = { 0x328a, 0x2283, 0x1281, 0x10c3, 0x10c1 };
-	static const u16 data_speed[] = { 0x328a, 0x2283, 0x1104, 0x10c3, 0x10c1 };
-
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	ide_drive_t *pair	= ide_get_pair_dev(drive);
-	u32 speedt		= 0;
-	u16 speedp		= 0;
-	unsigned long addr	= siimage_seldev(drive, 0x04);
-	unsigned long tfaddr	= siimage_selreg(hwif,	0x02);
-	unsigned long base	= (unsigned long)hwif->hwif_data;
-	const u8 pio		= drive->pio_mode - XFER_PIO_0;
-	u8 tf_pio		= pio;
-	u8 mmio			= (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
-	u8 addr_mask		= hwif->channel ? (mmio ? 0xF4 : 0x84)
-						: (mmio ? 0xB4 : 0x80);
-	u8 mode			= 0;
-	u8 unit			= drive->dn & 1;
-
-	/* trim *taskfile* PIO to the slowest of the master/slave */
-	if (pair) {
-		u8 pair_pio = pair->pio_mode - XFER_PIO_0;
-
-		if (pair_pio < tf_pio)
-			tf_pio = pair_pio;
-	}
-
-	/* cheat for now and use the docs */
-	speedp = data_speed[pio];
-	speedt = tf_speed[tf_pio];
-
-	sil_iowrite16(dev, speedp, addr);
-	sil_iowrite16(dev, speedt, tfaddr);
-
-	/* now set up IORDY */
-	speedp = sil_ioread16(dev, tfaddr - 2);
-	speedp &= ~0x200;
-
-	mode = sil_ioread8(dev, base + addr_mask);
-	mode &= ~(unit ? 0x30 : 0x03);
-
-	if (ide_pio_need_iordy(drive, pio)) {
-		speedp |= 0x200;
-		mode |= unit ? 0x10 : 0x01;
-	}
-
-	sil_iowrite16(dev, speedp, tfaddr - 2);
-	sil_iowrite8(dev, mode, base + addr_mask);
-}
-
-/**
- *	sil_set_dma_mode	-	set host controller for DMA mode
- *	@hwif: port
- *	@drive: drive
- *
- *	Tune the SiI chipset for the desired DMA mode.
- */
-
-static void sil_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	static const u8 ultra6[] = { 0x0F, 0x0B, 0x07, 0x05, 0x03, 0x02, 0x01 };
-	static const u8 ultra5[] = { 0x0C, 0x07, 0x05, 0x04, 0x02, 0x01 };
-	static const u16 dma[]	 = { 0x2208, 0x10C2, 0x10C1 };
-
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned long base	= (unsigned long)hwif->hwif_data;
-	u16 ultra = 0, multi	= 0;
-	u8 mode = 0, unit	= drive->dn & 1;
-	u8 mmio			= (hwif->host_flags & IDE_HFLAG_MMIO) ? 1 : 0;
-	u8 scsc = 0, addr_mask	= hwif->channel ? (mmio ? 0xF4 : 0x84)
-						: (mmio ? 0xB4 : 0x80);
-	unsigned long ma	= siimage_seldev(drive, 0x08);
-	unsigned long ua	= siimage_seldev(drive, 0x0C);
-	const u8 speed		= drive->dma_mode;
-
-	scsc  = sil_ioread8 (dev, base + (mmio ? 0x4A : 0x8A));
-	mode  = sil_ioread8 (dev, base + addr_mask);
-	multi = sil_ioread16(dev, ma);
-	ultra = sil_ioread16(dev, ua);
-
-	mode  &= ~(unit ? 0x30 : 0x03);
-	ultra &= ~0x3F;
-	scsc = ((scsc & 0x30) == 0x00) ? 0 : 1;
-
-	scsc = is_sata(hwif) ? 1 : scsc;
-
-	if (speed >= XFER_UDMA_0) {
-		multi  = dma[2];
-		ultra |= scsc ? ultra6[speed - XFER_UDMA_0] :
-				ultra5[speed - XFER_UDMA_0];
-		mode  |= unit ? 0x30 : 0x03;
-	} else {
-		multi = dma[speed - XFER_MW_DMA_0];
-		mode |= unit ? 0x20 : 0x02;
-	}
-
-	sil_iowrite8 (dev, mode, base + addr_mask);
-	sil_iowrite16(dev, multi, ma);
-	sil_iowrite16(dev, ultra, ua);
-}
-
-static int sil_test_irq(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned long addr	= siimage_selreg(hwif, 1);
-	u8 val			= sil_ioread8(dev, addr);
-
-	/* Return 1 if INTRQ asserted */
-	return (val & 8) ? 1 : 0;
-}
-
-/**
- *	siimage_mmio_dma_test_irq	-	check we caused an IRQ
- *	@drive: drive we are testing
- *
- *	Check if we caused an IDE DMA interrupt. We may also have caused
- *	SATA status interrupts, if so we clean them up and continue.
- */
-
-static int siimage_mmio_dma_test_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	void __iomem *sata_error_addr
-		= (void __iomem *)hwif->sata_scr[SATA_ERROR_OFFSET];
-
-	if (sata_error_addr) {
-		unsigned long base	= (unsigned long)hwif->hwif_data;
-		u32 ext_stat		= readl((void __iomem *)(base + 0x10));
-		u8 watchdog		= 0;
-
-		if (ext_stat & ((hwif->channel) ? 0x40 : 0x10)) {
-			u32 sata_error = readl(sata_error_addr);
-
-			writel(sata_error, sata_error_addr);
-			watchdog = (sata_error & 0x00680000) ? 1 : 0;
-			printk(KERN_WARNING "%s: sata_error = 0x%08x, "
-				"watchdog = %d, %s\n",
-				drive->name, sata_error, watchdog, __func__);
-		} else
-			watchdog = (ext_stat & 0x8000) ? 1 : 0;
-
-		ext_stat >>= 16;
-		if (!(ext_stat & 0x0404) && !watchdog)
-			return 0;
-	}
-
-	/* return 1 if INTR asserted */
-	if (readb((void __iomem *)(hwif->dma_base + ATA_DMA_STATUS)) & 4)
-		return 1;
-
-	return 0;
-}
-
-static int siimage_dma_test_irq(ide_drive_t *drive)
-{
-	if (drive->hwif->host_flags & IDE_HFLAG_MMIO)
-		return siimage_mmio_dma_test_irq(drive);
-	else
-		return ide_dma_test_irq(drive);
-}
-
-/**
- *	sil_sata_reset_poll	-	wait for SATA reset
- *	@drive: drive we are resetting
- *
- *	Poll the SATA phy and see whether it has come back from the dead
- *	yet.
- */
-
-static blk_status_t sil_sata_reset_poll(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	void __iomem *sata_status_addr
-		= (void __iomem *)hwif->sata_scr[SATA_STATUS_OFFSET];
-
-	if (sata_status_addr) {
-		/* SATA Status is available only when in MMIO mode */
-		u32 sata_stat = readl(sata_status_addr);
-
-		if ((sata_stat & 0x03) != 0x03) {
-			printk(KERN_WARNING "%s: reset phy dead, status=0x%08x\n",
-					    hwif->name, sata_stat);
-			return BLK_STS_IOERR;
-		}
-	}
-
-	return BLK_STS_OK;
-}
-
-/**
- *	sil_sata_pre_reset	-	reset hook
- *	@drive: IDE device being reset
- *
- *	For the SATA devices we need to handle recalibration/geometry
- *	differently
- */
-
-static void sil_sata_pre_reset(ide_drive_t *drive)
-{
-	if (drive->media == ide_disk) {
-		drive->special_flags &=
-			~(IDE_SFLAG_SET_GEOMETRY | IDE_SFLAG_RECALIBRATE);
-	}
-}
-
-/**
- *	init_chipset_siimage	-	set up an SI device
- *	@dev: PCI device
- *
- *	Perform the initial PCI set up for this device. Attempt to switch
- *	to 133 MHz clocking if the system isn't already set up to do it.
- */
-
-static int init_chipset_siimage(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	void __iomem *ioaddr = host->host_priv;
-	unsigned long base, scsc_addr;
-	u8 rev = dev->revision, tmp;
-
-	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, rev ? 1 : 255);
-
-	if (ioaddr)
-		pci_set_master(dev);
-
-	base = (unsigned long)ioaddr;
-
-	if (ioaddr && pdev_is_sata(dev)) {
-		u32 tmp32, irq_mask;
-
-		/* make sure IDE0/1 interrupts are not masked */
-		irq_mask = (1 << 22) | (1 << 23);
-		tmp32 = readl(ioaddr + 0x48);
-		if (tmp32 & irq_mask) {
-			tmp32 &= ~irq_mask;
-			writel(tmp32, ioaddr + 0x48);
-			readl(ioaddr + 0x48); /* flush */
-		}
-		writel(0, ioaddr + 0x148);
-		writel(0, ioaddr + 0x1C8);
-	}
-
-	sil_iowrite8(dev, 0, base ? (base + 0xB4) : 0x80);
-	sil_iowrite8(dev, 0, base ? (base + 0xF4) : 0x84);
-
-	scsc_addr = base ? (base + 0x4A) : 0x8A;
-	tmp = sil_ioread8(dev, scsc_addr);
-
-	switch (tmp & 0x30) {
-	case 0x00:
-		/* On 100 MHz clocking, try and switch to 133 MHz */
-		sil_iowrite8(dev, tmp | 0x10, scsc_addr);
-		break;
-	case 0x30:
-		/* Clocking is disabled, attempt to force 133MHz clocking. */
-		sil_iowrite8(dev, tmp & ~0x20, scsc_addr);
-	case 0x10:
-		/* On 133Mhz clocking. */
-		break;
-	case 0x20:
-		/* On PCIx2 clocking. */
-		break;
-	}
-
-	tmp = sil_ioread8(dev, scsc_addr);
-
-	sil_iowrite8 (dev,       0x72, base + 0xA1);
-	sil_iowrite16(dev,     0x328A, base + 0xA2);
-	sil_iowrite32(dev, 0x62DD62DD, base + 0xA4);
-	sil_iowrite32(dev, 0x43924392, base + 0xA8);
-	sil_iowrite32(dev, 0x40094009, base + 0xAC);
-	sil_iowrite8 (dev,       0x72, base ? (base + 0xE1) : 0xB1);
-	sil_iowrite16(dev,     0x328A, base ? (base + 0xE2) : 0xB2);
-	sil_iowrite32(dev, 0x62DD62DD, base ? (base + 0xE4) : 0xB4);
-	sil_iowrite32(dev, 0x43924392, base ? (base + 0xE8) : 0xB8);
-	sil_iowrite32(dev, 0x40094009, base ? (base + 0xEC) : 0xBC);
-
-	if (base && pdev_is_sata(dev)) {
-		writel(0xFFFF0000, ioaddr + 0x108);
-		writel(0xFFFF0000, ioaddr + 0x188);
-		writel(0x00680000, ioaddr + 0x148);
-		writel(0x00680000, ioaddr + 0x1C8);
-	}
-
-	/* report the clocking mode of the controller */
-	if (!pdev_is_sata(dev)) {
-		static const char *clk_str[] =
-			{ "== 100", "== 133", "== 2X PCI", "DISABLED!" };
-
-		tmp >>= 4;
-		printk(KERN_INFO DRV_NAME " %s: BASE CLOCK %s\n",
-			pci_name(dev), clk_str[tmp & 3]);
-	}
-
-	return 0;
-}
-
-/**
- *	init_mmio_iops_siimage	-	set up the iops for MMIO
- *	@hwif: interface to set up
- *
- *	The basic setup here is fairly simple, we can use standard MMIO
- *	operations. However we do have to set the taskfile register offsets
- *	by hand as there isn't a standard defined layout for them this time.
- *
- *	The hardware supports buffered taskfiles and also some rather nice
- *	extended PRD tables. For better SI3112 support use the libata driver
- */
-
-static void init_mmio_iops_siimage(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	struct ide_host *host	= pci_get_drvdata(dev);
-	void *addr		= host->host_priv;
-	u8 ch			= hwif->channel;
-	struct ide_io_ports *io_ports = &hwif->io_ports;
-	unsigned long base;
-
-	/*
-	 *	Fill in the basic hwif bits
-	 */
-	hwif->host_flags |= IDE_HFLAG_MMIO;
-
-	hwif->hwif_data	= addr;
-
-	/*
-	 *	Now set up the hw. We have to do this ourselves as the
-	 *	MMIO layout isn't the same as the standard port based I/O.
-	 */
-	memset(io_ports, 0, sizeof(*io_ports));
-
-	base = (unsigned long)addr;
-	if (ch)
-		base += 0xC0;
-	else
-		base += 0x80;
-
-	/*
-	 *	The buffered task file doesn't have status/control, so we
-	 *	can't currently use it sanely since we want to use LBA48 mode.
-	 */
-	io_ports->data_addr	= base;
-	io_ports->error_addr	= base + 1;
-	io_ports->nsect_addr	= base + 2;
-	io_ports->lbal_addr	= base + 3;
-	io_ports->lbam_addr	= base + 4;
-	io_ports->lbah_addr	= base + 5;
-	io_ports->device_addr	= base + 6;
-	io_ports->status_addr	= base + 7;
-	io_ports->ctl_addr	= base + 10;
-
-	if (pdev_is_sata(dev)) {
-		base = (unsigned long)addr;
-		if (ch)
-			base += 0x80;
-		hwif->sata_scr[SATA_STATUS_OFFSET]	= base + 0x104;
-		hwif->sata_scr[SATA_ERROR_OFFSET]	= base + 0x108;
-		hwif->sata_scr[SATA_CONTROL_OFFSET]	= base + 0x100;
-	}
-
-	hwif->irq = dev->irq;
-
-	hwif->dma_base = (unsigned long)addr + (ch ? 0x08 : 0x00);
-}
-
-static int is_dev_seagate_sata(ide_drive_t *drive)
-{
-	const char *s	= (const char *)&drive->id[ATA_ID_PROD];
-	unsigned len	= strnlen(s, ATA_ID_PROD_LEN);
-
-	if ((len > 4) && (!memcmp(s, "ST", 2)))
-		if ((!memcmp(s + len - 2, "AS", 2)) ||
-		    (!memcmp(s + len - 3, "ASL", 3))) {
-			printk(KERN_INFO "%s: applying pessimistic Seagate "
-					 "errata fix\n", drive->name);
-			return 1;
-		}
-
-	return 0;
-}
-
-/**
- *	sil_quirkproc		-	post probe fixups
- *	@drive: drive
- *
- *	Called after drive probe we use this to decide whether the
- *	Seagate fixup must be applied. This used to be in init_iops but
- *	that can occur before we know what drives are present.
- */
-
-static void sil_quirkproc(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-
-	/* Try and rise the rqsize */
-	if (!is_sata(hwif) || !is_dev_seagate_sata(drive))
-		hwif->rqsize = 128;
-}
-
-/**
- *	init_iops_siimage	-	set up iops
- *	@hwif: interface to set up
- *
- *	Do the basic setup for the SIIMAGE hardware interface
- *	and then do the MMIO setup if we can. This is the first
- *	look in we get for setting up the hwif so that we
- *	can get the iops right before using them.
- */
-
-static void init_iops_siimage(ide_hwif_t *hwif)
-{
-	struct ide_host *host = dev_get_drvdata(hwif->dev);
-
-	hwif->hwif_data = NULL;
-
-	/* Pessimal until we finish probing */
-	hwif->rqsize = 15;
-
-	if (host->host_priv)
-		init_mmio_iops_siimage(hwif);
-}
-
-/**
- *	sil_cable_detect	-	cable detection
- *	@hwif: interface to check
- *
- *	Check for the presence of an ATA66 capable cable on the interface.
- */
-
-static u8 sil_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned long addr	= siimage_selreg(hwif, 0);
-	u8 ata66		= sil_ioread8(dev, addr);
-
-	return (ata66 & 0x01) ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
-}
-
-static const struct ide_port_ops sil_pata_port_ops = {
-	.set_pio_mode		= sil_set_pio_mode,
-	.set_dma_mode		= sil_set_dma_mode,
-	.quirkproc		= sil_quirkproc,
-	.test_irq		= sil_test_irq,
-	.udma_filter		= sil_pata_udma_filter,
-	.cable_detect		= sil_cable_detect,
-};
-
-static const struct ide_port_ops sil_sata_port_ops = {
-	.set_pio_mode		= sil_set_pio_mode,
-	.set_dma_mode		= sil_set_dma_mode,
-	.reset_poll		= sil_sata_reset_poll,
-	.pre_reset		= sil_sata_pre_reset,
-	.quirkproc		= sil_quirkproc,
-	.test_irq		= sil_test_irq,
-	.udma_filter		= sil_sata_udma_filter,
-	.cable_detect		= sil_cable_detect,
-};
-
-static const struct ide_dma_ops sil_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= ide_dma_end,
-	.dma_test_irq		= siimage_dma_test_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-#define DECLARE_SII_DEV(p_ops)				\
-	{						\
-		.name		= DRV_NAME,		\
-		.init_chipset	= init_chipset_siimage,	\
-		.init_iops	= init_iops_siimage,	\
-		.port_ops	= p_ops,		\
-		.dma_ops	= &sil_dma_ops,		\
-		.pio_mask	= ATA_PIO4,		\
-		.mwdma_mask	= ATA_MWDMA2,		\
-		.udma_mask	= ATA_UDMA6,		\
-	}
-
-static const struct ide_port_info siimage_chipsets[] = {
-	/* 0: SiI680 */  DECLARE_SII_DEV(&sil_pata_port_ops),
-	/* 1: SiI3112 */ DECLARE_SII_DEV(&sil_sata_port_ops)
-};
-
-/**
- *	siimage_init_one	-	PCI layer discovery entry
- *	@dev: PCI device
- *	@id: ident table entry
- *
- *	Called by the PCI code when it finds an SiI680 or SiI3112 controller.
- *	We then use the IDE PCI generic helper to do most of the work.
- */
-
-static int siimage_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	void __iomem *ioaddr = NULL;
-	resource_size_t bar5 = pci_resource_start(dev, 5);
-	unsigned long barsize = pci_resource_len(dev, 5);
-	int rc;
-	struct ide_port_info d;
-	u8 idx = id->driver_data;
-	u8 BA5_EN;
-
-	d = siimage_chipsets[idx];
-
-	if (idx) {
-		static int first = 1;
-
-		if (first) {
-			printk(KERN_INFO DRV_NAME ": For full SATA support you "
-				"should use the libata sata_sil module.\n");
-			first = 0;
-		}
-
-		d.host_flags |= IDE_HFLAG_NO_ATAPI_DMA;
-	}
-
-	rc = pci_enable_device(dev);
-	if (rc)
-		return rc;
-
-	pci_read_config_byte(dev, 0x8A, &BA5_EN);
-	if ((BA5_EN & 0x01) || bar5) {
-		/*
-		* Drop back to PIO if we can't map the MMIO. Some systems
-		* seem to get terminally confused in the PCI spaces.
-		*/
-		if (!request_mem_region(bar5, barsize, d.name)) {
-			printk(KERN_WARNING DRV_NAME " %s: MMIO ports not "
-				"available\n", pci_name(dev));
-		} else {
-			ioaddr = pci_ioremap_bar(dev, 5);
-			if (ioaddr == NULL)
-				release_mem_region(bar5, barsize);
-		}
-	}
-
-	rc = ide_pci_init_one(dev, &d, ioaddr);
-	if (rc) {
-		if (ioaddr) {
-			iounmap(ioaddr);
-			release_mem_region(bar5, barsize);
-		}
-		pci_disable_device(dev);
-	}
-
-	return rc;
-}
-
-static void siimage_remove(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	void __iomem *ioaddr = host->host_priv;
-
-	ide_pci_remove(dev);
-
-	if (ioaddr) {
-		resource_size_t bar5 = pci_resource_start(dev, 5);
-		unsigned long barsize = pci_resource_len(dev, 5);
-
-		iounmap(ioaddr);
-		release_mem_region(bar5, barsize);
-	}
-
-	pci_disable_device(dev);
-}
-
-static const struct pci_device_id siimage_pci_tbl[] = {
-	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_SII_680),    0 },
-#ifdef CONFIG_BLK_DEV_IDE_SATA
-	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_SII_3112),   1 },
-	{ PCI_VDEVICE(CMD, PCI_DEVICE_ID_SII_1210SA), 1 },
-#endif
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, siimage_pci_tbl);
-
-static struct pci_driver siimage_pci_driver = {
-	.name		= "SiI_IDE",
-	.id_table	= siimage_pci_tbl,
-	.probe		= siimage_init_one,
-	.remove		= siimage_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init siimage_ide_init(void)
-{
-	return ide_pci_register_driver(&siimage_pci_driver);
-}
-
-static void __exit siimage_ide_exit(void)
-{
-	pci_unregister_driver(&siimage_pci_driver);
-}
-
-module_init(siimage_ide_init);
-module_exit(siimage_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick, Alan Cox");
-MODULE_DESCRIPTION("PCI driver module for SiI IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
deleted file mode 100644
index 1a700bef6c565..0000000000000
--- a/drivers/ide/sis5513.c
+++ /dev/null
@@ -1,637 +0,0 @@
-/*
- * Copyright (C) 1999-2000	Andre Hedrick <andre@linux-ide.org>
- * Copyright (C) 2002		Lionel Bouton <Lionel.Bouton@inet6.fr>, Maintainer
- * Copyright (C) 2003		Vojtech Pavlik <vojtech@suse.cz>
- * Copyright (C) 2007-2009	Bartlomiej Zolnierkiewicz
- *
- * May be copied or modified under the terms of the GNU General Public License
- *
- *
- * Thanks :
- *
- * SiS Taiwan		: for direct support and hardware.
- * Daniela Engert	: for initial ATA100 advices and numerous others.
- * John Fremlin, Manfred Spraul, Dave Morgan, Peter Kjellerstedt	:
- *			  for checking code correctness, providing patches.
- *
- *
- * Original tests and design on the SiS620 chipset.
- * ATA100 tests and design on the SiS735 chipset.
- * ATA16/33 support from specs
- * ATA133 support for SiS961/962 by L.C. Chang <lcchang@sis.com.tw>
- * ATA133 961/962/963 fixes by Vojtech Pavlik <vojtech@suse.cz>
- *
- * Documentation:
- *	SiS chipset documentation available under NDA to companies only
- *      (not to individuals).
- */
-
-/*
- * The original SiS5513 comes from a SiS5511/55112/5513 chipset. The original
- * SiS5513 was also used in the SiS5596/5513 chipset. Thus if we see a SiS5511
- * or SiS5596, we can assume we see the first MWDMA-16 capable SiS5513 chip.
- *
- * Later SiS chipsets integrated the 5513 functionality into the NorthBridge,
- * starting with SiS5571 and up to SiS745. The PCI ID didn't change, though. We
- * can figure out that we have a more modern and more capable 5513 by looking
- * for the respective NorthBridge IDs.
- *
- * Even later (96x family) SiS chipsets use the MuTIOL link and place the 5513
- * into the SouthBrige. Here we cannot rely on looking up the NorthBridge PCI
- * ID, while the now ATA-133 capable 5513 still has the same PCI ID.
- * Fortunately the 5513 can be 'unmasked' by fiddling with some config space
- * bits, changing its device id to the true one - 5517 for 961 and 5518 for
- * 962/963.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-
-#define DRV_NAME "sis5513"
-
-/* registers layout and init values are chipset family dependent */
-#undef ATA_16
-#define ATA_16		0x01
-#define ATA_33		0x02
-#define ATA_66		0x03
-#define ATA_100a	0x04 /* SiS730/SiS550 is ATA100 with ATA66 layout */
-#define ATA_100		0x05
-#define ATA_133a	0x06 /* SiS961b with 133 support */
-#define ATA_133		0x07 /* SiS962/963 */
-
-static u8 chipset_family;
-
-/*
- * Devices supported
- */
-static const struct {
-	const char *name;
-	u16 host_id;
-	u8 chipset_family;
-	u8 flags;
-} SiSHostChipInfo[] = {
-	{ "SiS968",	PCI_DEVICE_ID_SI_968,	ATA_133  },
-	{ "SiS966",	PCI_DEVICE_ID_SI_966,	ATA_133  },
-	{ "SiS965",	PCI_DEVICE_ID_SI_965,	ATA_133  },
-	{ "SiS745",	PCI_DEVICE_ID_SI_745,	ATA_100  },
-	{ "SiS735",	PCI_DEVICE_ID_SI_735,	ATA_100  },
-	{ "SiS733",	PCI_DEVICE_ID_SI_733,	ATA_100  },
-	{ "SiS635",	PCI_DEVICE_ID_SI_635,	ATA_100  },
-	{ "SiS633",	PCI_DEVICE_ID_SI_633,	ATA_100  },
-
-	{ "SiS730",	PCI_DEVICE_ID_SI_730,	ATA_100a },
-	{ "SiS550",	PCI_DEVICE_ID_SI_550,	ATA_100a },
-
-	{ "SiS640",	PCI_DEVICE_ID_SI_640,	ATA_66   },
-	{ "SiS630",	PCI_DEVICE_ID_SI_630,	ATA_66   },
-	{ "SiS620",	PCI_DEVICE_ID_SI_620,	ATA_66   },
-	{ "SiS540",	PCI_DEVICE_ID_SI_540,	ATA_66   },
-	{ "SiS530",	PCI_DEVICE_ID_SI_530,	ATA_66   },
-
-	{ "SiS5600",	PCI_DEVICE_ID_SI_5600,	ATA_33   },
-	{ "SiS5598",	PCI_DEVICE_ID_SI_5598,	ATA_33   },
-	{ "SiS5597",	PCI_DEVICE_ID_SI_5597,	ATA_33   },
-	{ "SiS5591/2",	PCI_DEVICE_ID_SI_5591,	ATA_33   },
-	{ "SiS5582",	PCI_DEVICE_ID_SI_5582,	ATA_33   },
-	{ "SiS5581",	PCI_DEVICE_ID_SI_5581,	ATA_33   },
-
-	{ "SiS5596",	PCI_DEVICE_ID_SI_5596,	ATA_16   },
-	{ "SiS5571",	PCI_DEVICE_ID_SI_5571,	ATA_16   },
-	{ "SiS5517",	PCI_DEVICE_ID_SI_5517,	ATA_16   },
-	{ "SiS551x",	PCI_DEVICE_ID_SI_5511,	ATA_16   },
-};
-
-/* Cycle time bits and values vary across chip dma capabilities
-   These three arrays hold the register layout and the values to set.
-   Indexed by chipset_family and (dma_mode - XFER_UDMA_0) */
-
-/* {0, ATA_16, ATA_33, ATA_66, ATA_100a, ATA_100, ATA_133} */
-static u8 cycle_time_offset[] = { 0, 0, 5, 4, 4, 0, 0 };
-static u8 cycle_time_range[]  = { 0, 0, 2, 3, 3, 4, 4 };
-static u8 cycle_time_value[][XFER_UDMA_6 - XFER_UDMA_0 + 1] = {
-	{  0,  0, 0, 0, 0, 0, 0 }, /* no UDMA */
-	{  0,  0, 0, 0, 0, 0, 0 }, /* no UDMA */
-	{  3,  2, 1, 0, 0, 0, 0 }, /* ATA_33 */
-	{  7,  5, 3, 2, 1, 0, 0 }, /* ATA_66 */
-	{  7,  5, 3, 2, 1, 0, 0 }, /* ATA_100a (730 specific),
-				      different cycle_time range and offset */
-	{ 11,  7, 5, 4, 2, 1, 0 }, /* ATA_100 */
-	{ 15, 10, 7, 5, 3, 2, 1 }, /* ATA_133a (earliest 691 southbridges) */
-	{ 15, 10, 7, 5, 3, 2, 1 }, /* ATA_133 */
-};
-/* CRC Valid Setup Time vary across IDE clock setting 33/66/100/133
-   See SiS962 data sheet for more detail */
-static u8 cvs_time_value[][XFER_UDMA_6 - XFER_UDMA_0 + 1] = {
-	{ 0, 0, 0, 0, 0, 0, 0 }, /* no UDMA */
-	{ 0, 0, 0, 0, 0, 0, 0 }, /* no UDMA */
-	{ 2, 1, 1, 0, 0, 0, 0 },
-	{ 4, 3, 2, 1, 0, 0, 0 },
-	{ 4, 3, 2, 1, 0, 0, 0 },
-	{ 6, 4, 3, 1, 1, 1, 0 },
-	{ 9, 6, 4, 2, 2, 2, 2 },
-	{ 9, 6, 4, 2, 2, 2, 2 },
-};
-/* Initialize time, Active time, Recovery time vary across
-   IDE clock settings. These 3 arrays hold the register value
-   for PIO0/1/2/3/4 and DMA0/1/2 mode in order */
-static u8 ini_time_value[][8] = {
-	{ 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ 0, 0, 0, 0, 0, 0, 0, 0 },
-	{ 2, 1, 0, 0, 0, 1, 0, 0 },
-	{ 4, 3, 1, 1, 1, 3, 1, 1 },
-	{ 4, 3, 1, 1, 1, 3, 1, 1 },
-	{ 6, 4, 2, 2, 2, 4, 2, 2 },
-	{ 9, 6, 3, 3, 3, 6, 3, 3 },
-	{ 9, 6, 3, 3, 3, 6, 3, 3 },
-};
-static u8 act_time_value[][8] = {
-	{  0,  0,  0,  0, 0,  0,  0, 0 },
-	{  0,  0,  0,  0, 0,  0,  0, 0 },
-	{  9,  9,  9,  2, 2,  7,  2, 2 },
-	{ 19, 19, 19,  5, 4, 14,  5, 4 },
-	{ 19, 19, 19,  5, 4, 14,  5, 4 },
-	{ 28, 28, 28,  7, 6, 21,  7, 6 },
-	{ 38, 38, 38, 10, 9, 28, 10, 9 },
-	{ 38, 38, 38, 10, 9, 28, 10, 9 },
-};
-static u8 rco_time_value[][8] = {
-	{  0,  0, 0,  0, 0,  0,  0, 0 },
-	{  0,  0, 0,  0, 0,  0,  0, 0 },
-	{  9,  2, 0,  2, 0,  7,  1, 1 },
-	{ 19,  5, 1,  5, 2, 16,  3, 2 },
-	{ 19,  5, 1,  5, 2, 16,  3, 2 },
-	{ 30,  9, 3,  9, 4, 25,  6, 4 },
-	{ 40, 12, 4, 12, 5, 34, 12, 5 },
-	{ 40, 12, 4, 12, 5, 34, 12, 5 },
-};
-
-/*
- * Printing configuration
- */
-/* Used for chipset type printing at boot time */
-static char *chipset_capability[] = {
-	"ATA", "ATA 16",
-	"ATA 33", "ATA 66",
-	"ATA 100 (1st gen)", "ATA 100 (2nd gen)",
-	"ATA 133 (1st gen)", "ATA 133 (2nd gen)"
-};
-
-/*
- * Configuration functions
- */
-
-static u8 sis_ata133_get_base(ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u32 reg54 = 0;
-
-	pci_read_config_dword(dev, 0x54, &reg54);
-
-	return ((reg54 & 0x40000000) ? 0x70 : 0x40) + drive->dn * 4;
-}
-
-static void sis_ata16_program_timings(ide_drive_t *drive, const u8 mode)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u16 t1 = 0;
-	u8 drive_pci = 0x40 + drive->dn * 2;
-
-	const u16 pio_timings[]   = { 0x000, 0x607, 0x404, 0x303, 0x301 };
-	const u16 mwdma_timings[] = { 0x008, 0x302, 0x301 };
-
-	pci_read_config_word(dev, drive_pci, &t1);
-
-	/* clear active/recovery timings */
-	t1 &= ~0x070f;
-	if (mode >= XFER_MW_DMA_0) {
-		if (chipset_family > ATA_16)
-			t1 &= ~0x8000;	/* disable UDMA */
-		t1 |= mwdma_timings[mode - XFER_MW_DMA_0];
-	} else
-		t1 |= pio_timings[mode - XFER_PIO_0];
-
-	pci_write_config_word(dev, drive_pci, t1);
-}
-
-static void sis_ata100_program_timings(ide_drive_t *drive, const u8 mode)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u8 t1, drive_pci = 0x40 + drive->dn * 2;
-
-	/* timing bits: 7:4 active 3:0 recovery */
-	const u8 pio_timings[]   = { 0x00, 0x67, 0x44, 0x33, 0x31 };
-	const u8 mwdma_timings[] = { 0x08, 0x32, 0x31 };
-
-	if (mode >= XFER_MW_DMA_0) {
-		u8 t2 = 0;
-
-		pci_read_config_byte(dev, drive_pci, &t2);
-		t2 &= ~0x80;	/* disable UDMA */
-		pci_write_config_byte(dev, drive_pci, t2);
-
-		t1 = mwdma_timings[mode - XFER_MW_DMA_0];
-	} else
-		t1 = pio_timings[mode - XFER_PIO_0];
-
-	pci_write_config_byte(dev, drive_pci + 1, t1);
-}
-
-static void sis_ata133_program_timings(ide_drive_t *drive, const u8 mode)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u32 t1 = 0;
-	u8 drive_pci = sis_ata133_get_base(drive), clk, idx;
-
-	pci_read_config_dword(dev, drive_pci, &t1);
-
-	t1 &= 0xc0c00fff;
-	clk = (t1 & 0x08) ? ATA_133 : ATA_100;
-	if (mode >= XFER_MW_DMA_0) {
-		t1 &= ~0x04;	/* disable UDMA */
-		idx = mode - XFER_MW_DMA_0 + 5;
-	} else
-		idx = mode - XFER_PIO_0;
-	t1 |= ini_time_value[clk][idx] << 12;
-	t1 |= act_time_value[clk][idx] << 16;
-	t1 |= rco_time_value[clk][idx] << 24;
-
-	pci_write_config_dword(dev, drive_pci, t1);
-}
-
-static void sis_program_timings(ide_drive_t *drive, const u8 mode)
-{
-	if (chipset_family < ATA_100)		/* ATA_16/33/66/100a */
-		sis_ata16_program_timings(drive, mode);
-	else if (chipset_family < ATA_133)	/* ATA_100/133a */
-		sis_ata100_program_timings(drive, mode);
-	else					/* ATA_133 */
-		sis_ata133_program_timings(drive, mode);
-}
-
-static void config_drive_art_rwp(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u8 reg4bh		= 0;
-	u8 rw_prefetch		= 0;
-
-	pci_read_config_byte(dev, 0x4b, &reg4bh);
-
-	rw_prefetch = reg4bh & ~(0x11 << drive->dn);
-
-	if (drive->media == ide_disk)
-		rw_prefetch |= 0x11 << drive->dn;
-
-	if (reg4bh != rw_prefetch)
-		pci_write_config_byte(dev, 0x4b, rw_prefetch);
-}
-
-static void sis_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	config_drive_art_rwp(drive);
-	sis_program_timings(drive, drive->pio_mode);
-}
-
-static void sis_ata133_program_udma_timings(ide_drive_t *drive, const u8 mode)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u32 regdw = 0;
-	u8 drive_pci = sis_ata133_get_base(drive), clk, idx;
-
-	pci_read_config_dword(dev, drive_pci, &regdw);
-
-	regdw |= 0x04;
-	regdw &= 0xfffff00f;
-	/* check if ATA133 enable */
-	clk = (regdw & 0x08) ? ATA_133 : ATA_100;
-	idx = mode - XFER_UDMA_0;
-	regdw |= cycle_time_value[clk][idx] << 4;
-	regdw |= cvs_time_value[clk][idx] << 8;
-
-	pci_write_config_dword(dev, drive_pci, regdw);
-}
-
-static void sis_ata33_program_udma_timings(ide_drive_t *drive, const u8 mode)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u8 drive_pci = 0x40 + drive->dn * 2, reg = 0, i = chipset_family;
-
-	pci_read_config_byte(dev, drive_pci + 1, &reg);
-
-	/* force the UDMA bit on if we want to use UDMA */
-	reg |= 0x80;
-	/* clean reg cycle time bits */
-	reg &= ~((0xff >> (8 - cycle_time_range[i])) << cycle_time_offset[i]);
-	/* set reg cycle time bits */
-	reg |= cycle_time_value[i][mode - XFER_UDMA_0] << cycle_time_offset[i];
-
-	pci_write_config_byte(dev, drive_pci + 1, reg);
-}
-
-static void sis_program_udma_timings(ide_drive_t *drive, const u8 mode)
-{
-	if (chipset_family >= ATA_133)	/* ATA_133 */
-		sis_ata133_program_udma_timings(drive, mode);
-	else				/* ATA_33/66/100a/100/133a */
-		sis_ata33_program_udma_timings(drive, mode);
-}
-
-static void sis_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	const u8 speed = drive->dma_mode;
-
-	if (speed >= XFER_UDMA_0)
-		sis_program_udma_timings(drive, speed);
-	else
-		sis_program_timings(drive, speed);
-}
-
-static u8 sis_ata133_udma_filter(ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u32 regdw = 0;
-	u8 drive_pci = sis_ata133_get_base(drive);
-
-	pci_read_config_dword(dev, drive_pci, &regdw);
-
-	/* if ATA133 disable, we should not set speed above UDMA5 */
-	return (regdw & 0x08) ? ATA_UDMA6 : ATA_UDMA5;
-}
-
-static int sis_find_family(struct pci_dev *dev)
-{
-	struct pci_dev *host;
-	int i = 0;
-
-	chipset_family = 0;
-
-	for (i = 0; i < ARRAY_SIZE(SiSHostChipInfo) && !chipset_family; i++) {
-
-		host = pci_get_device(PCI_VENDOR_ID_SI, SiSHostChipInfo[i].host_id, NULL);
-
-		if (!host)
-			continue;
-
-		chipset_family = SiSHostChipInfo[i].chipset_family;
-
-		/* Special case for SiS630 : 630S/ET is ATA_100a */
-		if (SiSHostChipInfo[i].host_id == PCI_DEVICE_ID_SI_630) {
-			if (host->revision >= 0x30)
-				chipset_family = ATA_100a;
-		}
-		pci_dev_put(host);
-
-		printk(KERN_INFO DRV_NAME " %s: %s %s controller\n",
-			pci_name(dev), SiSHostChipInfo[i].name,
-			chipset_capability[chipset_family]);
-	}
-
-	if (!chipset_family) { /* Belongs to pci-quirks */
-
-			u32 idemisc;
-			u16 trueid;
-
-			/* Disable ID masking and register remapping */
-			pci_read_config_dword(dev, 0x54, &idemisc);
-			pci_write_config_dword(dev, 0x54, (idemisc & 0x7fffffff));
-			pci_read_config_word(dev, PCI_DEVICE_ID, &trueid);
-			pci_write_config_dword(dev, 0x54, idemisc);
-
-			if (trueid == 0x5518) {
-				printk(KERN_INFO DRV_NAME " %s: SiS 962/963 MuTIOL IDE UDMA133 controller\n",
-					pci_name(dev));
-				chipset_family = ATA_133;
-
-				/* Check for 5513 compatibility mapping
-				 * We must use this, else the port enabled code will fail,
-				 * as it expects the enablebits at 0x4a.
-				 */
-				if ((idemisc & 0x40000000) == 0) {
-					pci_write_config_dword(dev, 0x54, idemisc | 0x40000000);
-					printk(KERN_INFO DRV_NAME " %s: Switching to 5513 register mapping\n",
-						pci_name(dev));
-				}
-			}
-	}
-
-	if (!chipset_family) { /* Belongs to pci-quirks */
-
-			struct pci_dev *lpc_bridge;
-			u16 trueid;
-			u8 prefctl;
-			u8 idecfg;
-
-			pci_read_config_byte(dev, 0x4a, &idecfg);
-			pci_write_config_byte(dev, 0x4a, idecfg | 0x10);
-			pci_read_config_word(dev, PCI_DEVICE_ID, &trueid);
-			pci_write_config_byte(dev, 0x4a, idecfg);
-
-			if (trueid == 0x5517) { /* SiS 961/961B */
-
-				lpc_bridge = pci_get_slot(dev->bus, 0x10); /* Bus 0, Dev 2, Fn 0 */
-				pci_read_config_byte(dev, 0x49, &prefctl);
-				pci_dev_put(lpc_bridge);
-
-				if (lpc_bridge->revision == 0x10 && (prefctl & 0x80)) {
-					printk(KERN_INFO DRV_NAME " %s: SiS 961B MuTIOL IDE UDMA133 controller\n",
-						pci_name(dev));
-					chipset_family = ATA_133a;
-				} else {
-					printk(KERN_INFO DRV_NAME " %s: SiS 961 MuTIOL IDE UDMA100 controller\n",
-						pci_name(dev));
-					chipset_family = ATA_100;
-				}
-			}
-	}
-
-	return chipset_family;
-}
-
-static int init_chipset_sis5513(struct pci_dev *dev)
-{
-	/* Make general config ops here
-	   1/ tell IDE channels to operate in Compatibility mode only
-	   2/ tell old chips to allow per drive IDE timings */
-
-	u8 reg;
-	u16 regw;
-
-	switch (chipset_family) {
-	case ATA_133:
-		/* SiS962 operation mode */
-		pci_read_config_word(dev, 0x50, &regw);
-		if (regw & 0x08)
-			pci_write_config_word(dev, 0x50, regw&0xfff7);
-		pci_read_config_word(dev, 0x52, &regw);
-		if (regw & 0x08)
-			pci_write_config_word(dev, 0x52, regw&0xfff7);
-		break;
-	case ATA_133a:
-	case ATA_100:
-		/* Fixup latency */
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x80);
-		/* Set compatibility bit */
-		pci_read_config_byte(dev, 0x49, &reg);
-		if (!(reg & 0x01))
-			pci_write_config_byte(dev, 0x49, reg|0x01);
-		break;
-	case ATA_100a:
-	case ATA_66:
-		/* Fixup latency */
-		pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x10);
-
-		/* On ATA_66 chips the bit was elsewhere */
-		pci_read_config_byte(dev, 0x52, &reg);
-		if (!(reg & 0x04))
-			pci_write_config_byte(dev, 0x52, reg|0x04);
-		break;
-	case ATA_33:
-		/* On ATA_33 we didn't have a single bit to set */
-		pci_read_config_byte(dev, 0x09, &reg);
-		if ((reg & 0x0f) != 0x00)
-			pci_write_config_byte(dev, 0x09, reg&0xf0);
-		fallthrough;
-	case ATA_16:
-		/* force per drive recovery and active timings
-		   needed on ATA_33 and below chips */
-		pci_read_config_byte(dev, 0x52, &reg);
-		if (!(reg & 0x08))
-			pci_write_config_byte(dev, 0x52, reg|0x08);
-		break;
-	}
-
-	return 0;
-}
-
-struct sis_laptop {
-	u16 device;
-	u16 subvendor;
-	u16 subdevice;
-};
-
-static const struct sis_laptop sis_laptop[] = {
-	/* devid, subvendor, subdev */
-	{ 0x5513, 0x1043, 0x1107 },	/* ASUS A6K */
-	{ 0x5513, 0x1734, 0x105f },	/* FSC Amilo A1630 */
-	{ 0x5513, 0x1071, 0x8640 },     /* EasyNote K5305 */
-	/* end marker */
-	{ 0, }
-};
-
-static u8 sis_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	const struct sis_laptop *lap = &sis_laptop[0];
-	u8 ata66 = 0;
-
-	while (lap->device) {
-		if (lap->device == pdev->device &&
-		    lap->subvendor == pdev->subsystem_vendor &&
-		    lap->subdevice == pdev->subsystem_device)
-			return ATA_CBL_PATA40_SHORT;
-		lap++;
-	}
-
-	if (chipset_family >= ATA_133) {
-		u16 regw = 0;
-		u16 reg_addr = hwif->channel ? 0x52: 0x50;
-		pci_read_config_word(pdev, reg_addr, &regw);
-		ata66 = (regw & 0x8000) ? 0 : 1;
-	} else if (chipset_family >= ATA_66) {
-		u8 reg48h = 0;
-		u8 mask = hwif->channel ? 0x20 : 0x10;
-		pci_read_config_byte(pdev, 0x48, &reg48h);
-		ata66 = (reg48h & mask) ? 0 : 1;
-	}
-
-	return ata66 ? ATA_CBL_PATA80 : ATA_CBL_PATA40;
-}
-
-static const struct ide_port_ops sis_port_ops = {
-	.set_pio_mode		= sis_set_pio_mode,
-	.set_dma_mode		= sis_set_dma_mode,
-	.cable_detect		= sis_cable_detect,
-};
-
-static const struct ide_port_ops sis_ata133_port_ops = {
-	.set_pio_mode		= sis_set_pio_mode,
-	.set_dma_mode		= sis_set_dma_mode,
-	.udma_filter		= sis_ata133_udma_filter,
-	.cable_detect		= sis_cable_detect,
-};
-
-static const struct ide_port_info sis5513_chipset = {
-	.name		= DRV_NAME,
-	.init_chipset	= init_chipset_sis5513,
-	.enablebits	= { {0x4a, 0x02, 0x02}, {0x4a, 0x04, 0x04} },
-	.host_flags	= IDE_HFLAG_NO_AUTODMA,
-	.pio_mask	= ATA_PIO4,
-	.mwdma_mask	= ATA_MWDMA2,
-};
-
-static int sis5513_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct ide_port_info d = sis5513_chipset;
-	u8 udma_rates[] = { 0x00, 0x00, 0x07, 0x1f, 0x3f, 0x3f, 0x7f, 0x7f };
-	int rc;
-
-	rc = pci_enable_device(dev);
-	if (rc)
-		return rc;
-
-	if (sis_find_family(dev) == 0)
-		return -ENOTSUPP;
-
-	if (chipset_family >= ATA_133)
-		d.port_ops = &sis_ata133_port_ops;
-	else
-		d.port_ops = &sis_port_ops;
-
-	d.udma_mask = udma_rates[chipset_family];
-
-	return ide_pci_init_one(dev, &d, NULL);
-}
-
-static void sis5513_remove(struct pci_dev *dev)
-{
-	ide_pci_remove(dev);
-	pci_disable_device(dev);
-}
-
-static const struct pci_device_id sis5513_pci_tbl[] = {
-	{ PCI_VDEVICE(SI, PCI_DEVICE_ID_SI_5513), 0 },
-	{ PCI_VDEVICE(SI, PCI_DEVICE_ID_SI_5518), 0 },
-	{ PCI_VDEVICE(SI, PCI_DEVICE_ID_SI_1180), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, sis5513_pci_tbl);
-
-static struct pci_driver sis5513_pci_driver = {
-	.name		= "SIS_IDE",
-	.id_table	= sis5513_pci_tbl,
-	.probe		= sis5513_init_one,
-	.remove		= sis5513_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init sis5513_ide_init(void)
-{
-	return ide_pci_register_driver(&sis5513_pci_driver);
-}
-
-static void __exit sis5513_ide_exit(void)
-{
-	pci_unregister_driver(&sis5513_pci_driver);
-}
-
-module_init(sis5513_ide_init);
-module_exit(sis5513_ide_exit);
-
-MODULE_AUTHOR("Lionel Bouton, L C Chang, Andre Hedrick, Vojtech Pavlik");
-MODULE_DESCRIPTION("PCI driver module for SIS IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c
deleted file mode 100644
index 5c24c420c4387..0000000000000
--- a/drivers/ide/sl82c105.c
+++ /dev/null
@@ -1,367 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * SL82C105/Winbond 553 IDE driver
- *
- * Maintainer unknown.
- *
- * Drive tuning added from Rebel.com's kernel sources
- *  -- Russell King (15/11/98) linux@arm.linux.org.uk
- * 
- * Merge in Russell's HW workarounds, fix various problems
- * with the timing registers setup.
- *  -- Benjamin Herrenschmidt (01/11/03) benh@kernel.crashing.org
- *
- * Copyright (C) 2006-2007,2009 MontaVista Software, Inc. <source@mvista.com>
- * Copyright (C)      2007 Bartlomiej Zolnierkiewicz
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "sl82c105"
-
-/*
- * SL82C105 PCI config register 0x40 bits.
- */
-#define CTRL_IDE_IRQB   (1 << 30)
-#define CTRL_IDE_IRQA   (1 << 28)
-#define CTRL_LEGIRQ     (1 << 11)
-#define CTRL_P1F16      (1 << 5)
-#define CTRL_P1EN       (1 << 4)
-#define CTRL_P0F16      (1 << 1)
-#define CTRL_P0EN       (1 << 0)
-
-/*
- * Convert a PIO mode and cycle time to the required on/off times
- * for the interface.  This has protection against runaway timings.
- */
-static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio)
-{
-	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
-	unsigned int cmd_on, cmd_off;
-	u8 iordy = 0;
-
-	cmd_on  = (t->active + 29) / 30;
-	cmd_off = (ide_pio_cycle_time(drive, pio) - 30 * cmd_on + 29) / 30;
-
-	if (cmd_on == 0)
-		cmd_on = 1;
-
-	if (cmd_off == 0)
-		cmd_off = 1;
-
-	if (ide_pio_need_iordy(drive, pio))
-		iordy = 0x40;
-
-	return (cmd_on - 1) << 8 | (cmd_off - 1) | iordy;
-}
-
-/*
- * Configure the chipset for PIO mode.
- */
-static void sl82c105_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned long timings	= (unsigned long)ide_get_drivedata(drive);
-	int reg			= 0x44 + drive->dn * 4;
-	u16 drv_ctrl;
-	const u8 pio		= drive->pio_mode - XFER_PIO_0;
-
-	drv_ctrl = get_pio_timings(drive, pio);
-
-	/*
-	 * Store the PIO timings so that we can restore them
-	 * in case DMA will be turned off...
-	 */
-	timings &= 0xffff0000;
-	timings |= drv_ctrl;
-	ide_set_drivedata(drive, (void *)timings);
-
-	pci_write_config_word(dev, reg,  drv_ctrl);
-	pci_read_config_word (dev, reg, &drv_ctrl);
-
-	printk(KERN_DEBUG "%s: selected %s (%dns) (%04X)\n", drive->name,
-			  ide_xfer_verbose(pio + XFER_PIO_0),
-			  ide_pio_cycle_time(drive, pio), drv_ctrl);
-}
-
-/*
- * Configure the chipset for DMA mode.
- */
-static void sl82c105_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	static u16 mwdma_timings[] = {0x0707, 0x0201, 0x0200};
-	unsigned long timings = (unsigned long)ide_get_drivedata(drive);
-	u16 drv_ctrl;
-	const u8 speed = drive->dma_mode;
-
-	drv_ctrl = mwdma_timings[speed - XFER_MW_DMA_0];
-
-	/*
-	 * Store the DMA timings so that we can actually program
-	 * them when DMA will be turned on...
-	 */
-	timings &= 0x0000ffff;
-	timings |= (unsigned long)drv_ctrl << 16;
-	ide_set_drivedata(drive, (void *)timings);
-}
-
-static int sl82c105_test_irq(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u32 val, mask		= hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA;
-
-	pci_read_config_dword(dev, 0x40, &val);
-
-	return (val & mask) ? 1 : 0;
-}
-
-/*
- * The SL82C105 holds off all IDE interrupts while in DMA mode until
- * all DMA activity is completed.  Sometimes this causes problems (eg,
- * when the drive wants to report an error condition).
- *
- * 0x7e is a "chip testing" register.  Bit 2 resets the DMA controller
- * state machine.  We need to kick this to work around various bugs.
- */
-static inline void sl82c105_reset_host(struct pci_dev *dev)
-{
-	u16 val;
-
-	pci_read_config_word(dev, 0x7e, &val);
-	pci_write_config_word(dev, 0x7e, val | (1 << 2));
-	pci_write_config_word(dev, 0x7e, val & ~(1 << 2));
-}
-
-/*
- * If we get an IRQ timeout, it might be that the DMA state machine
- * got confused.  Fix from Todd Inglett.  Details from Winbond.
- *
- * This function is called when the IDE timer expires, the drive
- * indicates that it is READY, and we were waiting for DMA to complete.
- */
-static void sl82c105_dma_lost_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u32 val, mask		= hwif->channel ? CTRL_IDE_IRQB : CTRL_IDE_IRQA;
-	u8 dma_cmd;
-
-	printk(KERN_WARNING "sl82c105: lost IRQ, resetting host\n");
-
-	/*
-	 * Check the raw interrupt from the drive.
-	 */
-	pci_read_config_dword(dev, 0x40, &val);
-	if (val & mask)
-		printk(KERN_INFO "sl82c105: drive was requesting IRQ, "
-		       "but host lost it\n");
-
-	/*
-	 * Was DMA enabled?  If so, disable it - we're resetting the
-	 * host.  The IDE layer will be handling the drive for us.
-	 */
-	dma_cmd = inb(hwif->dma_base + ATA_DMA_CMD);
-	if (dma_cmd & 1) {
-		outb(dma_cmd & ~1, hwif->dma_base + ATA_DMA_CMD);
-		printk(KERN_INFO "sl82c105: DMA was enabled\n");
-	}
-
-	sl82c105_reset_host(dev);
-}
-
-/*
- * ATAPI devices can cause the SL82C105 DMA state machine to go gaga.
- * Winbond recommend that the DMA state machine is reset prior to
- * setting the bus master DMA enable bit.
- *
- * The generic IDE core will have disabled the BMEN bit before this
- * function is called.
- */
-static void sl82c105_dma_start(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	int reg 		= 0x44 + drive->dn * 4;
-
-	pci_write_config_word(dev, reg,
-			      (unsigned long)ide_get_drivedata(drive) >> 16);
-
-	sl82c105_reset_host(dev);
-	ide_dma_start(drive);
-}
-
-static void sl82c105_dma_clear(ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-
-	sl82c105_reset_host(dev);
-}
-
-static int sl82c105_dma_end(ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(drive->hwif->dev);
-	int reg 		= 0x44 + drive->dn * 4;
-	int ret			= ide_dma_end(drive);
-
-	pci_write_config_word(dev, reg,
-			      (unsigned long)ide_get_drivedata(drive));
-
-	return ret;
-}
-
-/*
- * ATA reset will clear the 16 bits mode in the control
- * register, we need to reprogram it
- */
-static void sl82c105_resetproc(ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(drive->hwif->dev);
-	u32 val;
-
-	pci_read_config_dword(dev, 0x40, &val);
-	val |= (CTRL_P1F16 | CTRL_P0F16);
-	pci_write_config_dword(dev, 0x40, val);
-}
-
-/*
- * Return the revision of the Winbond bridge
- * which this function is part of.
- */
-static u8 sl82c105_bridge_revision(struct pci_dev *dev)
-{
-	struct pci_dev *bridge;
-
-	/*
-	 * The bridge should be part of the same device, but function 0.
-	 */
-	bridge = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
-					dev->bus->number,
-					PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
-	if (!bridge)
-		return -1;
-
-	/*
-	 * Make sure it is a Winbond 553 and is an ISA bridge.
-	 */
-	if (bridge->vendor != PCI_VENDOR_ID_WINBOND ||
-	    bridge->device != PCI_DEVICE_ID_WINBOND_83C553 ||
-	    bridge->class >> 8 != PCI_CLASS_BRIDGE_ISA) {
-	    	pci_dev_put(bridge);
-		return -1;
-	}
-	/*
-	 * We need to find function 0's revision, not function 1
-	 */
-	pci_dev_put(bridge);
-
-	return bridge->revision;
-}
-
-/*
- * Enable the PCI device
- * 
- * --BenH: It's arch fixup code that should enable channels that
- * have not been enabled by firmware. I decided we can still enable
- * channel 0 here at least, but channel 1 has to be enabled by
- * firmware or arch code. We still set both to 16 bits mode.
- */
-static int init_chipset_sl82c105(struct pci_dev *dev)
-{
-	u32 val;
-
-	pci_read_config_dword(dev, 0x40, &val);
-	val |= CTRL_P0EN | CTRL_P0F16 | CTRL_P1F16;
-	pci_write_config_dword(dev, 0x40, val);
-
-	return 0;
-}
-
-static const struct ide_port_ops sl82c105_port_ops = {
-	.set_pio_mode		= sl82c105_set_pio_mode,
-	.set_dma_mode		= sl82c105_set_dma_mode,
-	.resetproc		= sl82c105_resetproc,
-	.test_irq		= sl82c105_test_irq,
-};
-
-static const struct ide_dma_ops sl82c105_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= sl82c105_dma_start,
-	.dma_end		= sl82c105_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= sl82c105_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_clear		= sl82c105_dma_clear,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-static const struct ide_port_info sl82c105_chipset = {
-	.name		= DRV_NAME,
-	.init_chipset	= init_chipset_sl82c105,
-	.enablebits	= {{0x40,0x01,0x01}, {0x40,0x10,0x10}},
-	.port_ops	= &sl82c105_port_ops,
-	.dma_ops	= &sl82c105_dma_ops,
-	.host_flags	= IDE_HFLAG_IO_32BIT |
-			  IDE_HFLAG_UNMASK_IRQS |
-			  IDE_HFLAG_SERIALIZE_DMA |
-			  IDE_HFLAG_NO_AUTODMA,
-	.pio_mask	= ATA_PIO5,
-	.mwdma_mask	= ATA_MWDMA2,
-};
-
-static int sl82c105_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct ide_port_info d = sl82c105_chipset;
-	u8 rev = sl82c105_bridge_revision(dev);
-
-	if (rev <= 5) {
-		/*
-		 * Never ever EVER under any circumstances enable
-		 * DMA when the bridge is this old.
-		 */
-		printk(KERN_INFO DRV_NAME ": Winbond W83C553 bridge "
-				 "revision %d, BM-DMA disabled\n", rev);
-		d.dma_ops = NULL;
-		d.mwdma_mask = 0;
-		d.host_flags &= ~IDE_HFLAG_SERIALIZE_DMA;
-	}
-
-	return ide_pci_init_one(dev, &d, NULL);
-}
-
-static const struct pci_device_id sl82c105_pci_tbl[] = {
-	{ PCI_VDEVICE(WINBOND, PCI_DEVICE_ID_WINBOND_82C105), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, sl82c105_pci_tbl);
-
-static struct pci_driver sl82c105_pci_driver = {
-	.name		= "W82C105_IDE",
-	.id_table	= sl82c105_pci_tbl,
-	.probe		= sl82c105_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init sl82c105_ide_init(void)
-{
-	return ide_pci_register_driver(&sl82c105_pci_driver);
-}
-
-static void __exit sl82c105_ide_exit(void)
-{
-	pci_unregister_driver(&sl82c105_pci_driver);
-}
-
-module_init(sl82c105_ide_init);
-module_exit(sl82c105_ide_exit);
-
-MODULE_DESCRIPTION("PCI driver module for W82C105 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/slc90e66.c b/drivers/ide/slc90e66.c
deleted file mode 100644
index f521d5ebf9167..0000000000000
--- a/drivers/ide/slc90e66.c
+++ /dev/null
@@ -1,182 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 2000-2002 Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2006-2007 MontaVista Software, Inc. <source@mvista.com>
- *
- * This is a look-alike variation of the ICH0 PIIX4 Ultra-66,
- * but this keeps the ISA-Bridge and slots alive.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define DRV_NAME "slc90e66"
-
-static DEFINE_SPINLOCK(slc90e66_lock);
-
-static void slc90e66_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	int is_slave		= drive->dn & 1;
-	int master_port		= hwif->channel ? 0x42 : 0x40;
-	int slave_port		= 0x44;
-	unsigned long flags;
-	u16 master_data;
-	u8 slave_data;
-	int control = 0;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-				     /* ISP  RTC */
-	static const u8 timings[][2] = {
-					{ 0, 0 },
-					{ 0, 0 },
-					{ 1, 0 },
-					{ 2, 1 },
-					{ 2, 3 }, };
-
-	spin_lock_irqsave(&slc90e66_lock, flags);
-	pci_read_config_word(dev, master_port, &master_data);
-
-	if (pio > 1)
-		control |= 1;	/* Programmable timing on */
-	if (drive->media == ide_disk)
-		control |= 4;	/* Prefetch, post write */
-	if (ide_pio_need_iordy(drive, pio))
-		control |= 2;	/* IORDY */
-	if (is_slave) {
-		master_data |=  0x4000;
-		master_data &= ~0x0070;
-		if (pio > 1) {
-			/* Set PPE, IE and TIME */
-			master_data |= control << 4;
-		}
-		pci_read_config_byte(dev, slave_port, &slave_data);
-		slave_data &= hwif->channel ? 0x0f : 0xf0;
-		slave_data |= ((timings[pio][0] << 2) | timings[pio][1]) <<
-			       (hwif->channel ? 4 : 0);
-	} else {
-		master_data &= ~0x3307;
-		if (pio > 1) {
-			/* enable PPE, IE and TIME */
-			master_data |= control;
-		}
-		master_data |= (timings[pio][0] << 12) | (timings[pio][1] << 8);
-	}
-	pci_write_config_word(dev, master_port, master_data);
-	if (is_slave)
-		pci_write_config_byte(dev, slave_port, slave_data);
-	spin_unlock_irqrestore(&slc90e66_lock, flags);
-}
-
-static void slc90e66_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	u8 maslave		= hwif->channel ? 0x42 : 0x40;
-	int sitre = 0, a_speed	= 7 << (drive->dn * 4);
-	int u_speed = 0, u_flag = 1 << drive->dn;
-	u16			reg4042, reg44, reg48, reg4a;
-	const u8 speed		= drive->dma_mode;
-
-	pci_read_config_word(dev, maslave, &reg4042);
-	sitre = (reg4042 & 0x4000) ? 1 : 0;
-	pci_read_config_word(dev, 0x44, &reg44);
-	pci_read_config_word(dev, 0x48, &reg48);
-	pci_read_config_word(dev, 0x4a, &reg4a);
-
-	if (speed >= XFER_UDMA_0) {
-		u_speed = (speed - XFER_UDMA_0) << (drive->dn * 4);
-
-		if (!(reg48 & u_flag))
-			pci_write_config_word(dev, 0x48, reg48|u_flag);
-		if ((reg4a & a_speed) != u_speed) {
-			pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
-			pci_read_config_word(dev, 0x4a, &reg4a);
-			pci_write_config_word(dev, 0x4a, reg4a|u_speed);
-		}
-	} else {
-		const u8 mwdma_to_pio[] = { 0, 3, 4 };
-
-		if (reg48 & u_flag)
-			pci_write_config_word(dev, 0x48, reg48 & ~u_flag);
-		if (reg4a & a_speed)
-			pci_write_config_word(dev, 0x4a, reg4a & ~a_speed);
-
-		if (speed >= XFER_MW_DMA_0)
-			drive->pio_mode =
-				mwdma_to_pio[speed - XFER_MW_DMA_0] + XFER_PIO_0;
-		else
-			drive->pio_mode = XFER_PIO_2; /* for SWDMA2 */
-
-		slc90e66_set_pio_mode(hwif, drive);
-	}
-}
-
-static u8 slc90e66_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	u8 reg47 = 0, mask = hwif->channel ? 0x01 : 0x02;
-
-	pci_read_config_byte(dev, 0x47, &reg47);
-
-	/* bit[0(1)]: 0:80, 1:40 */
-	return (reg47 & mask) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
-}
-
-static const struct ide_port_ops slc90e66_port_ops = {
-	.set_pio_mode		= slc90e66_set_pio_mode,
-	.set_dma_mode		= slc90e66_set_dma_mode,
-	.cable_detect		= slc90e66_cable_detect,
-};
-
-static const struct ide_port_info slc90e66_chipset = {
-	.name		= DRV_NAME,
-	.enablebits	= { {0x41, 0x80, 0x80}, {0x43, 0x80, 0x80} },
-	.port_ops	= &slc90e66_port_ops,
-	.pio_mask	= ATA_PIO4,
-	.swdma_mask	= ATA_SWDMA2_ONLY,
-	.mwdma_mask	= ATA_MWDMA12_ONLY,
-	.udma_mask	= ATA_UDMA4,
-};
-
-static int slc90e66_init_one(struct pci_dev *dev,
-			     const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &slc90e66_chipset, NULL);
-}
-
-static const struct pci_device_id slc90e66_pci_tbl[] = {
-	{ PCI_VDEVICE(EFAR, PCI_DEVICE_ID_EFAR_SLC90E66_1), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, slc90e66_pci_tbl);
-
-static struct pci_driver slc90e66_pci_driver = {
-	.name		= "SLC90e66_IDE",
-	.id_table	= slc90e66_pci_tbl,
-	.probe		= slc90e66_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init slc90e66_ide_init(void)
-{
-	return ide_pci_register_driver(&slc90e66_pci_driver);
-}
-
-static void __exit slc90e66_ide_exit(void)
-{
-	pci_unregister_driver(&slc90e66_pci_driver);
-}
-
-module_init(slc90e66_ide_init);
-module_exit(slc90e66_ide_exit);
-
-MODULE_AUTHOR("Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for SLC90E66 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/tc86c001.c b/drivers/ide/tc86c001.c
deleted file mode 100644
index 17e6132b99bf0..0000000000000
--- a/drivers/ide/tc86c001.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * Copyright (C) 2002 Toshiba Corporation
- * Copyright (C) 2005-2006 MontaVista Software, Inc. <source@mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#include <linux/types.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/module.h>
-
-#define DRV_NAME "tc86c001"
-
-static void tc86c001_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	unsigned long scr_port	= hwif->config_data + (drive->dn ? 0x02 : 0x00);
-	u16 mode, scr		= inw(scr_port);
-	const u8 speed		= drive->dma_mode;
-
-	switch (speed) {
-	case XFER_UDMA_4:	mode = 0x00c0; break;
-	case XFER_UDMA_3:	mode = 0x00b0; break;
-	case XFER_UDMA_2:	mode = 0x00a0; break;
-	case XFER_UDMA_1:	mode = 0x0090; break;
-	case XFER_UDMA_0:	mode = 0x0080; break;
-	case XFER_MW_DMA_2:	mode = 0x0070; break;
-	case XFER_MW_DMA_1:	mode = 0x0060; break;
-	case XFER_MW_DMA_0:	mode = 0x0050; break;
-	case XFER_PIO_4:	mode = 0x0400; break;
-	case XFER_PIO_3:	mode = 0x0300; break;
-	case XFER_PIO_2:	mode = 0x0200; break;
-	case XFER_PIO_1:	mode = 0x0100; break;
-	case XFER_PIO_0:
-	default:		mode = 0x0000; break;
-	}
-
-	scr &= (speed < XFER_MW_DMA_0) ? 0xf8ff : 0xff0f;
-	scr |= mode;
-	outw(scr, scr_port);
-}
-
-static void tc86c001_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	drive->dma_mode = drive->pio_mode;
-	tc86c001_set_mode(hwif, drive);
-}
-
-/*
- * HACKITY HACK
- *
- * This is a workaround for the limitation 5 of the TC86C001 IDE controller:
- * if a DMA transfer terminates prematurely, the controller leaves the device's
- * interrupt request (INTRQ) pending and does not generate a PCI interrupt (or
- * set the interrupt bit in the DMA status register), thus no PCI interrupt
- * will occur until a DMA transfer has been successfully completed.
- *
- * We work around this by initiating dummy, zero-length DMA transfer on
- * a DMA timeout expiration. I found no better way to do this with the current
- * IDE core than to temporarily replace a higher level driver's timer expiry
- * handler with our own backing up to that handler in case our recovery fails.
- */
-static int tc86c001_timer_expiry(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	ide_expiry_t *expiry	= ide_get_hwifdata(hwif);
-	u8 dma_stat		= inb(hwif->dma_base + ATA_DMA_STATUS);
-
-	/* Restore a higher level driver's expiry handler first. */
-	hwif->expiry = expiry;
-
-	if ((dma_stat & 5) == 1) {	/* DMA active and no interrupt */
-		unsigned long sc_base	= hwif->config_data;
-		unsigned long twcr_port	= sc_base + (drive->dn ? 0x06 : 0x04);
-		u8 dma_cmd		= inb(hwif->dma_base + ATA_DMA_CMD);
-
-		printk(KERN_WARNING "%s: DMA interrupt possibly stuck, "
-		       "attempting recovery...\n", drive->name);
-
-		/* Stop DMA */
-		outb(dma_cmd & ~0x01, hwif->dma_base + ATA_DMA_CMD);
-
-		/* Setup the dummy DMA transfer */
-		outw(0, sc_base + 0x0a);	/* Sector Count */
-		outw(0, twcr_port);	/* Transfer Word Count 1 or 2 */
-
-		/* Start the dummy DMA transfer */
-
-		/* clear R_OR_WCTR for write */
-		outb(0x00, hwif->dma_base + ATA_DMA_CMD);
-		/* set START_STOPBM */
-		outb(0x01, hwif->dma_base + ATA_DMA_CMD);
-
-		/*
-		 * If an interrupt was pending, it should come thru shortly.
-		 * If not, a higher level driver's expiry handler should
-		 * eventually cause some kind of recovery from the DMA stall.
-		 */
-		return WAIT_MIN_SLEEP;
-	}
-
-	/* Chain to the restored expiry handler if DMA wasn't active. */
-	if (likely(expiry != NULL))
-		return expiry(drive);
-
-	/* If there was no handler, "emulate" that for ide_timer_expiry()... */
-	return -1;
-}
-
-static void tc86c001_dma_start(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif	= drive->hwif;
-	unsigned long sc_base	= hwif->config_data;
-	unsigned long twcr_port	= sc_base + (drive->dn ? 0x06 : 0x04);
-	unsigned long nsectors	= blk_rq_sectors(hwif->rq);
-
-	/*
-	 * We have to manually load the sector count and size into
-	 * the appropriate system control registers for DMA to work
-	 * with LBA48 and ATAPI devices...
-	 */
-	outw(nsectors, sc_base + 0x0a);	/* Sector Count */
-	outw(SECTOR_SIZE / 2, twcr_port); /* Transfer Word Count 1/2 */
-
-	/* Install our timeout expiry hook, saving the current handler... */
-	ide_set_hwifdata(hwif, hwif->expiry);
-	hwif->expiry = &tc86c001_timer_expiry;
-
-	ide_dma_start(drive);
-}
-
-static u8 tc86c001_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	unsigned long sc_base = pci_resource_start(dev, 5);
-	u16 scr1 = inw(sc_base + 0x00);
-
-	/*
-	 * System Control  1 Register bit 13 (PDIAGN):
-	 * 0=80-pin cable, 1=40-pin cable
-	 */
-	return (scr1 & 0x2000) ? ATA_CBL_PATA40 : ATA_CBL_PATA80;
-}
-
-static void init_hwif_tc86c001(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned long sc_base	= pci_resource_start(dev, 5);
-	u16 scr1		= inw(sc_base + 0x00);
-
-	/* System Control 1 Register bit 15 (Soft Reset) set */
-	outw(scr1 |  0x8000, sc_base + 0x00);
-
-	/* System Control 1 Register bit 14 (FIFO Reset) set */
-	outw(scr1 |  0x4000, sc_base + 0x00);
-
-	/* System Control 1 Register: reset clear */
-	outw(scr1 & ~0xc000, sc_base + 0x00);
-
-	/* Store the system control register base for convenience... */
-	hwif->config_data = sc_base;
-
-	if (!hwif->dma_base)
-		return;
-
-	/*
-	 * Sector Count Control Register bits 0 and 1 set:
-	 * software sets Sector Count Register for master and slave device
-	 */
-	outw(0x0003, sc_base + 0x0c);
-
-	/* Sector Count Register limit */
-	hwif->rqsize	 = 0xffff;
-}
-
-static const struct ide_port_ops tc86c001_port_ops = {
-	.set_pio_mode		= tc86c001_set_pio_mode,
-	.set_dma_mode		= tc86c001_set_mode,
-	.cable_detect		= tc86c001_cable_detect,
-};
-
-static const struct ide_dma_ops tc86c001_dma_ops = {
-	.dma_host_set		= ide_dma_host_set,
-	.dma_setup		= ide_dma_setup,
-	.dma_start		= tc86c001_dma_start,
-	.dma_end		= ide_dma_end,
-	.dma_test_irq		= ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= ide_dma_sff_read_status,
-};
-
-static const struct ide_port_info tc86c001_chipset = {
-	.name		= DRV_NAME,
-	.init_hwif	= init_hwif_tc86c001,
-	.port_ops	= &tc86c001_port_ops,
-	.dma_ops	= &tc86c001_dma_ops,
-	.host_flags	= IDE_HFLAG_SINGLE | IDE_HFLAG_OFF_BOARD,
-	.pio_mask	= ATA_PIO4,
-	.mwdma_mask	= ATA_MWDMA2,
-	.udma_mask	= ATA_UDMA4,
-};
-
-static int tc86c001_init_one(struct pci_dev *dev,
-			     const struct pci_device_id *id)
-{
-	int rc;
-
-	rc = pci_enable_device(dev);
-	if (rc)
-		goto out;
-
-	rc = pci_request_region(dev, 5, DRV_NAME);
-	if (rc) {
-		printk(KERN_ERR DRV_NAME ": system control regs already in use");
-		goto out_disable;
-	}
-
-	rc = ide_pci_init_one(dev, &tc86c001_chipset, NULL);
-	if (rc)
-		goto out_release;
-
-	goto out;
-
-out_release:
-	pci_release_region(dev, 5);
-out_disable:
-	pci_disable_device(dev);
-out:
-	return rc;
-}
-
-static void tc86c001_remove(struct pci_dev *dev)
-{
-	ide_pci_remove(dev);
-	pci_release_region(dev, 5);
-	pci_disable_device(dev);
-}
-
-static const struct pci_device_id tc86c001_pci_tbl[] = {
-	{ PCI_VDEVICE(TOSHIBA_2, PCI_DEVICE_ID_TOSHIBA_TC86C001_IDE), 0 },
-	{ 0, }
-};
-MODULE_DEVICE_TABLE(pci, tc86c001_pci_tbl);
-
-static struct pci_driver tc86c001_pci_driver = {
-	.name		= "TC86C001",
-	.id_table	= tc86c001_pci_tbl,
-	.probe		= tc86c001_init_one,
-	.remove		= tc86c001_remove,
-};
-
-static int __init tc86c001_ide_init(void)
-{
-	return ide_pci_register_driver(&tc86c001_pci_driver);
-}
-
-static void __exit tc86c001_ide_exit(void)
-{
-	pci_unregister_driver(&tc86c001_pci_driver);
-}
-
-module_init(tc86c001_ide_init);
-module_exit(tc86c001_ide_exit);
-
-MODULE_AUTHOR("MontaVista Software, Inc. <source@mvista.com>");
-MODULE_DESCRIPTION("PCI driver module for TC86C001 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/triflex.c b/drivers/ide/triflex.c
deleted file mode 100644
index 16ddd09568325..0000000000000
--- a/drivers/ide/triflex.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * IDE Chipset driver for the Compaq TriFlex IDE controller.
- * 
- * Known to work with the Compaq Workstation 5x00 series.
- *
- * Copyright (C) 2002 Hewlett-Packard Development Group, L.P.
- * Author: Torben Mathiasen <torben.mathiasen@hp.com>
- * 
- * Loosely based on the piix & svwks drivers.
- *
- * Documentation:
- *	Not publicly available.
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#define DRV_NAME "triflex"
-
-static void triflex_set_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	u32 triflex_timings = 0;
-	u16 timing = 0;
-	u8 channel_offset = hwif->channel ? 0x74 : 0x70, unit = drive->dn & 1;
-
-	pci_read_config_dword(dev, channel_offset, &triflex_timings);
-
-	switch (drive->dma_mode) {
-		case XFER_MW_DMA_2:
-			timing = 0x0103; 
-			break;
-		case XFER_MW_DMA_1:
-			timing = 0x0203;
-			break;
-		case XFER_MW_DMA_0:
-			timing = 0x0808;
-			break;
-		case XFER_SW_DMA_2:
-		case XFER_SW_DMA_1:
-		case XFER_SW_DMA_0:
-			timing = 0x0f0f;
-			break;
-		case XFER_PIO_4:
-			timing = 0x0202;
-			break;
-		case XFER_PIO_3:
-			timing = 0x0204;
-			break;
-		case XFER_PIO_2:
-			timing = 0x0404;
-			break;
-		case XFER_PIO_1:
-			timing = 0x0508;
-			break;
-		case XFER_PIO_0:
-			timing = 0x0808;
-			break;
-	}
-
-	triflex_timings &= ~(0xFFFF << (16 * unit));
-	triflex_timings |= (timing << (16 * unit));
-	
-	pci_write_config_dword(dev, channel_offset, triflex_timings);
-}
-
-static void triflex_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	drive->dma_mode = drive->pio_mode;
-	triflex_set_mode(hwif, drive);
-}
-
-static const struct ide_port_ops triflex_port_ops = {
-	.set_pio_mode		= triflex_set_pio_mode,
-	.set_dma_mode		= triflex_set_mode,
-};
-
-static const struct ide_port_info triflex_device = {
-	.name		= DRV_NAME,
-	.enablebits	= {{0x80, 0x01, 0x01}, {0x80, 0x02, 0x02}},
-	.port_ops	= &triflex_port_ops,
-	.pio_mask	= ATA_PIO4,
-	.swdma_mask	= ATA_SWDMA2,
-	.mwdma_mask	= ATA_MWDMA2,
-};
-
-static int triflex_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &triflex_device, NULL);
-}
-
-static const struct pci_device_id triflex_pci_tbl[] = {
-	{ PCI_VDEVICE(COMPAQ, PCI_DEVICE_ID_COMPAQ_TRIFLEX_IDE), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, triflex_pci_tbl);
-
-#ifdef CONFIG_PM
-static int triflex_ide_pci_suspend(struct pci_dev *dev, pm_message_t state)
-{
-	/*
-	 * We must not disable or powerdown the device.
-	 * APM bios refuses to suspend if IDE is not accessible.
-	 */
-	pci_save_state(dev);
-	return 0;
-}
-#else
-#define triflex_ide_pci_suspend NULL
-#endif
-
-static struct pci_driver triflex_pci_driver = {
-	.name		= "TRIFLEX_IDE",
-	.id_table	= triflex_pci_tbl,
-	.probe		= triflex_init_one,
-	.remove		= ide_pci_remove,
-	.suspend	= triflex_ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init triflex_ide_init(void)
-{
-	return ide_pci_register_driver(&triflex_pci_driver);
-}
-
-static void __exit triflex_ide_exit(void)
-{
-	pci_unregister_driver(&triflex_pci_driver);
-}
-
-module_init(triflex_ide_init);
-module_exit(triflex_ide_exit);
-
-MODULE_AUTHOR("Torben Mathiasen");
-MODULE_DESCRIPTION("PCI driver module for Compaq Triflex IDE");
-MODULE_LICENSE("GPL");
-
-
diff --git a/drivers/ide/trm290.c b/drivers/ide/trm290.c
deleted file mode 100644
index d550b379b0f11..0000000000000
--- a/drivers/ide/trm290.c
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- *  Copyright (c) 1997-1998  Mark Lord
- *  Copyright (c) 2007       MontaVista Software, Inc. <source@mvista.com>
- *
- *  May be copied or modified under the terms of the GNU General Public License
- *
- *  June 22, 2004 - get rid of check_region
- *                   - Jesper Juhl
- *
- */
-
-/*
- * This module provides support for the bus-master IDE DMA function
- * of the Tekram TRM290 chip, used on a variety of PCI IDE add-on boards,
- * including a "Precision Instruments" board.  The TRM290 pre-dates
- * the sff-8038 standard (ide-dma.c) by a few months, and differs
- * significantly enough to warrant separate routines for some functions,
- * while re-using others from ide-dma.c.
- *
- * EXPERIMENTAL!  It works for me (a sample of one).
- *
- * Works reliably for me in DMA mode (READs only),
- * DMA WRITEs are disabled by default (see #define below);
- *
- * DMA is not enabled automatically for this chipset,
- * but can be turned on manually (with "hdparm -d1") at run time.
- *
- * I need volunteers with "spare" drives for further testing
- * and development, and maybe to help figure out the peculiarities.
- * Even knowing the registers (below), some things behave strangely.
- */
-
-#define TRM290_NO_DMA_WRITES	/* DMA writes seem unreliable sometimes */
-
-/*
- * TRM-290 PCI-IDE2 Bus Master Chip
- * ================================
- * The configuration registers are addressed in normal I/O port space
- * and are used as follows:
- *
- * trm290_base depends on jumper settings, and is probed for by ide-dma.c
- *
- * trm290_base+2 when WRITTEN: chiptest register (byte, write-only)
- *	bit7 must always be written as "1"
- *	bits6-2 undefined
- *	bit1 1=legacy_compatible_mode, 0=native_pci_mode
- *	bit0 1=test_mode, 0=normal(default)
- *
- * trm290_base+2 when READ: status register (byte, read-only)
- *	bits7-2 undefined
- *	bit1 channel0 busmaster interrupt status 0=none, 1=asserted
- *	bit0 channel0 interrupt status 0=none, 1=asserted
- *
- * trm290_base+3 Interrupt mask register
- *	bits7-5 undefined
- *	bit4 legacy_header: 1=present, 0=absent
- *	bit3 channel1 busmaster interrupt status 0=none, 1=asserted (read only)
- *	bit2 channel1 interrupt status 0=none, 1=asserted (read only)
- *	bit1 channel1 interrupt mask: 1=masked, 0=unmasked(default)
- *	bit0 channel0 interrupt mask: 1=masked, 0=unmasked(default)
- *
- * trm290_base+1 "CPR" Config Pointer Register (byte)
- *	bit7 1=autoincrement CPR bits 2-0 after each access of CDR
- *	bit6 1=min. 1 wait-state posted write cycle (default), 0=0 wait-state
- *	bit5 0=enabled master burst access (default), 1=disable  (write only)
- *	bit4 PCI DEVSEL# timing select: 1=medium(default), 0=fast
- *	bit3 0=primary IDE channel, 1=secondary IDE channel
- *	bits2-0 register index for accesses through CDR port
- *
- * trm290_base+0 "CDR" Config Data Register (word)
- *	two sets of seven config registers,
- *	selected by CPR bit 3 (channel) and CPR bits 2-0 (index 0 to 6),
- *	each index defined below:
- *
- * Index-0 Base address register for command block (word)
- *	defaults: 0x1f0 for primary, 0x170 for secondary
- *
- * Index-1 general config register (byte)
- *	bit7 1=DMA enable, 0=DMA disable
- *	bit6 1=activate IDE_RESET, 0=no action (default)
- *	bit5 1=enable IORDY, 0=disable IORDY (default)
- *	bit4 0=16-bit data port(default), 1=8-bit (XT) data port
- *	bit3 interrupt polarity: 1=active_low, 0=active_high(default)
- *	bit2 power-saving-mode(?): 1=enable, 0=disable(default) (write only)
- *	bit1 bus_master_mode(?): 1=enable, 0=disable(default)
- *	bit0 enable_io_ports: 1=enable(default), 0=disable
- *
- * Index-2 read-ahead counter preload bits 0-7 (byte, write only)
- *	bits7-0 bits7-0 of readahead count
- *
- * Index-3 read-ahead config register (byte, write only)
- *	bit7 1=enable_readahead, 0=disable_readahead(default)
- *	bit6 1=clear_FIFO, 0=no_action
- *	bit5 undefined
- *	bit4 mode4 timing control: 1=enable, 0=disable(default)
- *	bit3 undefined
- *	bit2 undefined
- *	bits1-0 bits9-8 of read-ahead count
- *
- * Index-4 base address register for control block (word)
- *	defaults: 0x3f6 for primary, 0x376 for secondary
- *
- * Index-5 data port timings (shared by both drives) (byte)
- *	standard PCI "clk" (clock) counts, default value = 0xf5
- *
- *	bits7-6 setup time:  00=1clk, 01=2clk, 10=3clk, 11=4clk
- *	bits5-3 hold time:	000=1clk, 001=2clk, 010=3clk,
- *				011=4clk, 100=5clk, 101=6clk,
- *				110=8clk, 111=12clk
- *	bits2-0 active time:	000=2clk, 001=3clk, 010=4clk,
- *				011=5clk, 100=6clk, 101=8clk,
- *				110=12clk, 111=16clk
- *
- * Index-6 command/control port timings (shared by both drives) (byte)
- *	same layout as Index-5, default value = 0xde
- *
- * Suggested CDR programming for PIO mode0 (600ns):
- *	0x01f0,0x21,0xff,0x80,0x03f6,0xf5,0xde	; primary
- *	0x0170,0x21,0xff,0x80,0x0376,0xf5,0xde	; secondary
- *
- * Suggested CDR programming for PIO mode3 (180ns):
- *	0x01f0,0x21,0xff,0x80,0x03f6,0x09,0xde	; primary
- *	0x0170,0x21,0xff,0x80,0x0376,0x09,0xde	; secondary
- *
- * Suggested CDR programming for PIO mode4 (120ns):
- *	0x01f0,0x21,0xff,0x80,0x03f6,0x00,0xde	; primary
- *	0x0170,0x21,0xff,0x80,0x0376,0x00,0xde	; secondary
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/blkdev.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/ide.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "trm290"
-
-static void trm290_prepare_drive (ide_drive_t *drive, unsigned int use_dma)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u16 reg = 0;
-	unsigned long flags;
-
-	/* select PIO or DMA */
-	reg = use_dma ? (0x21 | 0x82) : (0x21 & ~0x82);
-
-	local_irq_save(flags);
-
-	if (reg != hwif->select_data) {
-		hwif->select_data = reg;
-		/* set PIO/DMA */
-		outb(0x51 | (hwif->channel << 3), hwif->config_data + 1);
-		outw(reg & 0xff, hwif->config_data);
-	}
-
-	/* enable IRQ if not probing */
-	if (drive->dev_flags & IDE_DFLAG_PRESENT) {
-		reg = inw(hwif->config_data + 3);
-		reg &= 0x13;
-		reg &= ~(1 << hwif->channel);
-		outw(reg, hwif->config_data + 3);
-	}
-
-	local_irq_restore(flags);
-}
-
-static void trm290_dev_select(ide_drive_t *drive)
-{
-	trm290_prepare_drive(drive, !!(drive->dev_flags & IDE_DFLAG_USING_DMA));
-
-	outb(drive->select | ATA_DEVICE_OBS, drive->hwif->io_ports.device_addr);
-}
-
-static int trm290_dma_check(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	if (cmd->tf_flags & IDE_TFLAG_WRITE) {
-#ifdef TRM290_NO_DMA_WRITES
-		/* always use PIO for writes */
-		return 1;
-#endif
-	}
-	return 0;
-}
-
-static int trm290_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	unsigned int count, rw = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 1 : 2;
-
-	count = ide_build_dmatable(drive, cmd);
-	if (count == 0)
-		/* try PIO instead of DMA */
-		return 1;
-
-	outl(hwif->dmatable_dma | rw, hwif->dma_base);
-	/* start DMA */
-	outw(count * 2 - 1, hwif->dma_base + 2);
-
-	return 0;
-}
-
-static void trm290_dma_start(ide_drive_t *drive)
-{
-	trm290_prepare_drive(drive, 1);
-}
-
-static int trm290_dma_end(ide_drive_t *drive)
-{
-	u16 status = inw(drive->hwif->dma_base + 2);
-
-	trm290_prepare_drive(drive, 0);
-
-	return status != 0x00ff;
-}
-
-static int trm290_dma_test_irq(ide_drive_t *drive)
-{
-	u16 status = inw(drive->hwif->dma_base + 2);
-
-	return status == 0x00ff;
-}
-
-static void trm290_dma_host_set(ide_drive_t *drive, int on)
-{
-}
-
-static void init_hwif_trm290(ide_hwif_t *hwif)
-{
-	struct pci_dev *dev	= to_pci_dev(hwif->dev);
-	unsigned int  cfg_base	= pci_resource_start(dev, 4);
-	unsigned long flags;
-	u8 reg = 0;
-
-	if ((dev->class & 5) && cfg_base)
-		printk(KERN_INFO DRV_NAME " %s: chip", pci_name(dev));
-	else {
-		cfg_base = 0x3df0;
-		printk(KERN_INFO DRV_NAME " %s: using default", pci_name(dev));
-	}
-	printk(KERN_CONT " config base at 0x%04x\n", cfg_base);
-	hwif->config_data = cfg_base;
-	hwif->dma_base = (cfg_base + 4) ^ (hwif->channel ? 0x80 : 0);
-
-	printk(KERN_INFO "    %s: BM-DMA at 0x%04lx-0x%04lx\n",
-	       hwif->name, hwif->dma_base, hwif->dma_base + 3);
-
-	if (ide_allocate_dma_engine(hwif))
-		return;
-
-	local_irq_save(flags);
-	/* put config reg into first byte of hwif->select_data */
-	outb(0x51 | (hwif->channel << 3), hwif->config_data + 1);
-	/* select PIO as default */
-	hwif->select_data = 0x21;
-	outb(hwif->select_data, hwif->config_data);
-	/* get IRQ info */
-	reg = inb(hwif->config_data + 3);
-	/* mask IRQs for both ports */
-	reg = (reg & 0x10) | 0x03;
-	outb(reg, hwif->config_data + 3);
-	local_irq_restore(flags);
-
-	if (reg & 0x10)
-		/* legacy mode */
-		hwif->irq = hwif->channel ? 15 : 14;
-
-#if 1
-	{
-	/*
-	 * My trm290-based card doesn't seem to work with all possible values
-	 * for the control basereg, so this kludge ensures that we use only
-	 * values that are known to work.  Ugh.		-ml
-	 */
-		u16 new, old, compat = hwif->channel ? 0x374 : 0x3f4;
-		static u16 next_offset = 0;
-		u8 old_mask;
-
-		outb(0x54 | (hwif->channel << 3), hwif->config_data + 1);
-		old = inw(hwif->config_data);
-		old &= ~1;
-		old_mask = inb(old + 2);
-		if (old != compat && old_mask == 0xff) {
-			/* leave lower 10 bits untouched */
-			compat += (next_offset += 0x400);
-			hwif->io_ports.ctl_addr = compat + 2;
-			outw(compat | 1, hwif->config_data);
-			new = inw(hwif->config_data);
-			printk(KERN_INFO "%s: control basereg workaround: "
-				"old=0x%04x, new=0x%04x\n",
-				hwif->name, old, new & ~1);
-		}
-	}
-#endif
-}
-
-static const struct ide_tp_ops trm290_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= trm290_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= ide_input_data,
-	.output_data		= ide_output_data,
-};
-
-static const struct ide_dma_ops trm290_dma_ops = {
-	.dma_host_set		= trm290_dma_host_set,
-	.dma_setup 		= trm290_dma_setup,
-	.dma_start 		= trm290_dma_start,
-	.dma_end		= trm290_dma_end,
-	.dma_test_irq		= trm290_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_check		= trm290_dma_check,
-};
-
-static const struct ide_port_info trm290_chipset = {
-	.name		= DRV_NAME,
-	.init_hwif	= init_hwif_trm290,
-	.tp_ops 	= &trm290_tp_ops,
-	.dma_ops	= &trm290_dma_ops,
-	.host_flags	= IDE_HFLAG_TRM290 |
-			  IDE_HFLAG_NO_ATAPI_DMA |
-#if 0 /* play it safe for now */
-			  IDE_HFLAG_TRUST_BIOS_FOR_DMA |
-#endif
-			  IDE_HFLAG_NO_AUTODMA |
-			  IDE_HFLAG_NO_LBA48,
-};
-
-static int trm290_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	return ide_pci_init_one(dev, &trm290_chipset, NULL);
-}
-
-static const struct pci_device_id trm290_pci_tbl[] = {
-	{ PCI_VDEVICE(TEKRAM, PCI_DEVICE_ID_TEKRAM_DC290), 0 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, trm290_pci_tbl);
-
-static struct pci_driver trm290_pci_driver = {
-	.name		= "TRM290_IDE",
-	.id_table	= trm290_pci_tbl,
-	.probe		= trm290_init_one,
-	.remove		= ide_pci_remove,
-};
-
-static int __init trm290_ide_init(void)
-{
-	return ide_pci_register_driver(&trm290_pci_driver);
-}
-
-static void __exit trm290_ide_exit(void)
-{
-	pci_unregister_driver(&trm290_pci_driver);
-}
-
-module_init(trm290_ide_init);
-module_exit(trm290_ide_exit);
-
-MODULE_AUTHOR("Mark Lord");
-MODULE_DESCRIPTION("PCI driver module for Tekram TRM290 IDE");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c
deleted file mode 100644
index 962eb92501b5b..0000000000000
--- a/drivers/ide/tx4938ide.c
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * TX4938 internal IDE driver
- * Based on tx4939ide.c.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * (C) Copyright TOSHIBA CORPORATION 2005-2007
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-
-#include <asm/ide.h>
-#include <asm/txx9/tx4938.h>
-
-static void tx4938ide_tune_ebusc(unsigned int ebus_ch,
-				 unsigned int gbus_clock,
-				 u8 pio)
-{
-	struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio);
-	u64 cr = __raw_readq(&tx4938_ebuscptr->cr[ebus_ch]);
-	unsigned int sp = (cr >> 4) & 3;
-	unsigned int clock = gbus_clock / (4 - sp);
-	unsigned int cycle = 1000000000 / clock;
-	unsigned int shwt;
-	int wt;
-
-	/* Minimum DIOx- active time */
-	wt = DIV_ROUND_UP(t->act8b, cycle) - 2;
-	/* IORDY setup time: 35ns */
-	wt = max_t(int, wt, DIV_ROUND_UP(35, cycle));
-	/* actual wait-cycle is max(wt & ~1, 1) */
-	if (wt > 2 && (wt & 1))
-		wt++;
-	wt &= ~1;
-	/* Address-valid to DIOR/DIOW setup */
-	shwt = DIV_ROUND_UP(t->setup, cycle);
-
-	/* -DIOx recovery time (SHWT * 4) and cycle time requirement */
-	while ((shwt * 4 + wt + (wt ? 2 : 3)) * cycle < t->cycle)
-		shwt++;
-	if (shwt > 7) {
-		pr_warn("tx4938ide: SHWT violation (%d)\n", shwt);
-		shwt = 7;
-	}
-	pr_debug("tx4938ide: ebus %d, bus cycle %dns, WT %d, SHWT %d\n",
-		 ebus_ch, cycle, wt, shwt);
-
-	__raw_writeq((cr & ~0x3f007ull) | (wt << 12) | shwt,
-		     &tx4938_ebuscptr->cr[ebus_ch]);
-}
-
-static void tx4938ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	struct tx4938ide_platform_info *pdata = dev_get_platdata(hwif->dev);
-	u8 safe = drive->pio_mode - XFER_PIO_0;
-	ide_drive_t *pair;
-
-	pair = ide_get_pair_dev(drive);
-	if (pair)
-		safe = min_t(u8, safe, pair->pio_mode - XFER_PIO_0);
-	tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, safe);
-}
-
-#ifdef __BIG_ENDIAN
-
-/* custom iops (independent from SWAP_IO_SPACE) */
-static void tx4938ide_input_data_swap(ide_drive_t *drive, struct ide_cmd *cmd,
-				void *buf, unsigned int len)
-{
-	unsigned long port = drive->hwif->io_ports.data_addr;
-	unsigned short *ptr = buf;
-	unsigned int count = (len + 1) / 2;
-
-	while (count--)
-		*ptr++ = cpu_to_le16(__raw_readw((void __iomem *)port));
-	__ide_flush_dcache_range((unsigned long)buf, roundup(len, 2));
-}
-
-static void tx4938ide_output_data_swap(ide_drive_t *drive, struct ide_cmd *cmd,
-				void *buf, unsigned int len)
-{
-	unsigned long port = drive->hwif->io_ports.data_addr;
-	unsigned short *ptr = buf;
-	unsigned int count = (len + 1) / 2;
-
-	while (count--) {
-		__raw_writew(le16_to_cpu(*ptr), (void __iomem *)port);
-		ptr++;
-	}
-	__ide_flush_dcache_range((unsigned long)buf, roundup(len, 2));
-}
-
-static const struct ide_tp_ops tx4938ide_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= ide_dev_select,
-	.tf_load		= ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= tx4938ide_input_data_swap,
-	.output_data		= tx4938ide_output_data_swap,
-};
-
-#endif	/* __BIG_ENDIAN */
-
-static const struct ide_port_ops tx4938ide_port_ops = {
-	.set_pio_mode		= tx4938ide_set_pio_mode,
-};
-
-static const struct ide_port_info tx4938ide_port_info __initconst = {
-	.port_ops		= &tx4938ide_port_ops,
-#ifdef __BIG_ENDIAN
-	.tp_ops			= &tx4938ide_tp_ops,
-#endif
-	.host_flags		= IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA,
-	.pio_mask		= ATA_PIO5,
-	.chipset		= ide_generic,
-};
-
-static int __init tx4938ide_probe(struct platform_device *pdev)
-{
-	struct ide_hw hw, *hws[] = { &hw };
-	struct ide_host *host;
-	struct resource *res;
-	struct tx4938ide_platform_info *pdata = dev_get_platdata(&pdev->dev);
-	int irq, ret, i;
-	unsigned long mapbase, mapctl;
-	struct ide_port_info d = tx4938ide_port_info;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return -ENODEV;
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
-	if (!devm_request_mem_region(&pdev->dev, res->start,
-				     resource_size(res), "tx4938ide"))
-		return -EBUSY;
-	mapbase = (unsigned long)devm_ioremap(&pdev->dev, res->start,
-					      8 << pdata->ioport_shift);
-	mapctl = (unsigned long)devm_ioremap(&pdev->dev,
-					     res->start + 0x10000 +
-					     (6 << pdata->ioport_shift),
-					     1 << pdata->ioport_shift);
-	if (!mapbase || !mapctl)
-		return -EBUSY;
-
-	memset(&hw, 0, sizeof(hw));
-	if (pdata->ioport_shift) {
-		unsigned long port = mapbase;
-		unsigned long ctl = mapctl;
-
-		hw.io_ports_array[0] = port;
-#ifdef __BIG_ENDIAN
-		port++;
-		ctl++;
-#endif
-		for (i = 1; i <= 7; i++)
-			hw.io_ports_array[i] =
-				port + (i << pdata->ioport_shift);
-		hw.io_ports.ctl_addr = ctl;
-	} else
-		ide_std_init_ports(&hw, mapbase, mapctl);
-	hw.irq = irq;
-	hw.dev = &pdev->dev;
-
-	pr_info("TX4938 IDE interface (base %#lx, ctl %#lx, irq %d)\n",
-		mapbase, mapctl, hw.irq);
-	if (pdata->gbus_clock)
-		tx4938ide_tune_ebusc(pdata->ebus_ch, pdata->gbus_clock, 0);
-	else
-		d.port_ops = NULL;
-	ret = ide_host_add(&d, hws, 1, &host);
-	if (!ret)
-		platform_set_drvdata(pdev, host);
-	return ret;
-}
-
-static int __exit tx4938ide_remove(struct platform_device *pdev)
-{
-	struct ide_host *host = platform_get_drvdata(pdev);
-
-	ide_host_remove(host);
-	return 0;
-}
-
-static struct platform_driver tx4938ide_driver = {
-	.driver		= {
-		.name	= "tx4938ide",
-	},
-	.remove = __exit_p(tx4938ide_remove),
-};
-
-module_platform_driver_probe(tx4938ide_driver, tx4938ide_probe);
-
-MODULE_DESCRIPTION("TX4938 internal IDE driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:tx4938ide");
diff --git a/drivers/ide/tx4939ide.c b/drivers/ide/tx4939ide.c
deleted file mode 100644
index b1bbf807bb3d1..0000000000000
--- a/drivers/ide/tx4939ide.c
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * TX4939 internal IDE driver
- * Based on RBTX49xx patch from CELF patch archive.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * (C) Copyright TOSHIBA CORPORATION 2005-2007
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/scatterlist.h>
-
-#include <asm/ide.h>
-
-#define MODNAME	"tx4939ide"
-
-/* ATA Shadow Registers (8-bit except for Data which is 16-bit) */
-#define TX4939IDE_Data			0x000
-#define TX4939IDE_Error_Feature		0x001
-#define TX4939IDE_Sec			0x002
-#define TX4939IDE_LBA0			0x003
-#define TX4939IDE_LBA1			0x004
-#define TX4939IDE_LBA2			0x005
-#define TX4939IDE_DevHead		0x006
-#define TX4939IDE_Stat_Cmd		0x007
-#define TX4939IDE_AltStat_DevCtl	0x402
-/* H/W DMA Registers  */
-#define TX4939IDE_DMA_Cmd	0x800	/* 8-bit */
-#define TX4939IDE_DMA_Stat	0x802	/* 8-bit */
-#define TX4939IDE_PRD_Ptr	0x804	/* 32-bit */
-/* ATA100 CORE Registers (16-bit) */
-#define TX4939IDE_Sys_Ctl	0xc00
-#define TX4939IDE_Xfer_Cnt_1	0xc08
-#define TX4939IDE_Xfer_Cnt_2	0xc0a
-#define TX4939IDE_Sec_Cnt	0xc10
-#define TX4939IDE_Start_Lo_Addr	0xc18
-#define TX4939IDE_Start_Up_Addr	0xc20
-#define TX4939IDE_Add_Ctl	0xc28
-#define TX4939IDE_Lo_Burst_Cnt	0xc30
-#define TX4939IDE_Up_Burst_Cnt	0xc38
-#define TX4939IDE_PIO_Addr	0xc88
-#define TX4939IDE_H_Rst_Tim	0xc90
-#define TX4939IDE_Int_Ctl	0xc98
-#define TX4939IDE_Pkt_Cmd	0xcb8
-#define TX4939IDE_Bxfer_Cnt_Hi	0xcc0
-#define TX4939IDE_Bxfer_Cnt_Lo	0xcc8
-#define TX4939IDE_Dev_TErr	0xcd0
-#define TX4939IDE_Pkt_Xfer_Ctl	0xcd8
-#define TX4939IDE_Start_TAddr	0xce0
-
-/* bits for Int_Ctl */
-#define TX4939IDE_INT_ADDRERR	0x80
-#define TX4939IDE_INT_REACHMUL	0x40
-#define TX4939IDE_INT_DEVTIMING	0x20
-#define TX4939IDE_INT_UDMATERM	0x10
-#define TX4939IDE_INT_TIMER	0x08
-#define TX4939IDE_INT_BUSERR	0x04
-#define TX4939IDE_INT_XFEREND	0x02
-#define TX4939IDE_INT_HOST	0x01
-
-#define TX4939IDE_IGNORE_INTS	\
-	(TX4939IDE_INT_ADDRERR | TX4939IDE_INT_REACHMUL | \
-	 TX4939IDE_INT_DEVTIMING | TX4939IDE_INT_UDMATERM | \
-	 TX4939IDE_INT_TIMER | TX4939IDE_INT_XFEREND)
-
-#ifdef __BIG_ENDIAN
-#define tx4939ide_swizzlel(a)	((a) ^ 4)
-#define tx4939ide_swizzlew(a)	((a) ^ 6)
-#define tx4939ide_swizzleb(a)	((a) ^ 7)
-#else
-#define tx4939ide_swizzlel(a)	(a)
-#define tx4939ide_swizzlew(a)	(a)
-#define tx4939ide_swizzleb(a)	(a)
-#endif
-
-static u16 tx4939ide_readw(void __iomem *base, u32 reg)
-{
-	return __raw_readw(base + tx4939ide_swizzlew(reg));
-}
-static u8 tx4939ide_readb(void __iomem *base, u32 reg)
-{
-	return __raw_readb(base + tx4939ide_swizzleb(reg));
-}
-static void tx4939ide_writel(u32 val, void __iomem *base, u32 reg)
-{
-	__raw_writel(val, base + tx4939ide_swizzlel(reg));
-}
-static void tx4939ide_writew(u16 val, void __iomem *base, u32 reg)
-{
-	__raw_writew(val, base + tx4939ide_swizzlew(reg));
-}
-static void tx4939ide_writeb(u8 val, void __iomem *base, u32 reg)
-{
-	__raw_writeb(val, base + tx4939ide_swizzleb(reg));
-}
-
-#define TX4939IDE_BASE(hwif)	((void __iomem *)(hwif)->extra_base)
-
-static void tx4939ide_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	int is_slave = drive->dn;
-	u32 mask, val;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-	u8 safe = pio;
-	ide_drive_t *pair;
-
-	pair = ide_get_pair_dev(drive);
-	if (pair)
-		safe = min_t(u8, safe, pair->pio_mode - XFER_PIO_0);
-	/*
-	 * Update Command Transfer Mode for master/slave and Data
-	 * Transfer Mode for this drive.
-	 */
-	mask = is_slave ? 0x07f00000 : 0x000007f0;
-	val = ((safe << 8) | (pio << 4)) << (is_slave ? 16 : 0);
-	hwif->select_data = (hwif->select_data & ~mask) | val;
-	/* tx4939ide_tf_load_fixup() will set the Sys_Ctl register */
-}
-
-static void tx4939ide_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	u32 mask, val;
-	const u8 mode = drive->dma_mode;
-
-	/* Update Data Transfer Mode for this drive. */
-	if (mode >= XFER_UDMA_0)
-		val = mode - XFER_UDMA_0 + 8;
-	else
-		val = mode - XFER_MW_DMA_0 + 5;
-	if (drive->dn) {
-		mask = 0x00f00000;
-		val <<= 20;
-	} else {
-		mask = 0x000000f0;
-		val <<= 4;
-	}
-	hwif->select_data = (hwif->select_data & ~mask) | val;
-	/* tx4939ide_tf_load_fixup() will set the Sys_Ctl register */
-}
-
-static u16 tx4939ide_check_error_ints(ide_hwif_t *hwif)
-{
-	void __iomem *base = TX4939IDE_BASE(hwif);
-	u16 ctl = tx4939ide_readw(base, TX4939IDE_Int_Ctl);
-
-	if (ctl & TX4939IDE_INT_BUSERR) {
-		/* reset FIFO */
-		u16 sysctl = tx4939ide_readw(base, TX4939IDE_Sys_Ctl);
-
-		tx4939ide_writew(sysctl | 0x4000, base, TX4939IDE_Sys_Ctl);
-		/* wait 12GBUSCLK (typ. 60ns @ GBUS200MHz, max 270ns) */
-		ndelay(270);
-		tx4939ide_writew(sysctl, base, TX4939IDE_Sys_Ctl);
-	}
-	if (ctl & (TX4939IDE_INT_ADDRERR |
-		   TX4939IDE_INT_DEVTIMING | TX4939IDE_INT_BUSERR))
-		pr_err("%s: Error interrupt %#x (%s%s%s )\n",
-		       hwif->name, ctl,
-		       ctl & TX4939IDE_INT_ADDRERR ? " Address-Error" : "",
-		       ctl & TX4939IDE_INT_DEVTIMING ? " DEV-Timing" : "",
-		       ctl & TX4939IDE_INT_BUSERR ? " Bus-Error" : "");
-	return ctl;
-}
-
-static void tx4939ide_clear_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif;
-	void __iomem *base;
-	u16 ctl;
-
-	/*
-	 * tx4939ide_dma_test_irq() and tx4939ide_dma_end() do all job
-	 * for DMA case.
-	 */
-	if (drive->waiting_for_dma)
-		return;
-	hwif = drive->hwif;
-	base = TX4939IDE_BASE(hwif);
-	ctl = tx4939ide_check_error_ints(hwif);
-	tx4939ide_writew(ctl, base, TX4939IDE_Int_Ctl);
-}
-
-static u8 tx4939ide_cable_detect(ide_hwif_t *hwif)
-{
-	void __iomem *base = TX4939IDE_BASE(hwif);
-
-	return tx4939ide_readw(base, TX4939IDE_Sys_Ctl) & 0x2000 ?
-		ATA_CBL_PATA40 : ATA_CBL_PATA80;
-}
-
-#ifdef __BIG_ENDIAN
-static void tx4939ide_dma_host_set(ide_drive_t *drive, int on)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 unit = drive->dn;
-	void __iomem *base = TX4939IDE_BASE(hwif);
-	u8 dma_stat = tx4939ide_readb(base, TX4939IDE_DMA_Stat);
-
-	if (on)
-		dma_stat |= (1 << (5 + unit));
-	else
-		dma_stat &= ~(1 << (5 + unit));
-
-	tx4939ide_writeb(dma_stat, base, TX4939IDE_DMA_Stat);
-}
-#else
-#define tx4939ide_dma_host_set	ide_dma_host_set
-#endif
-
-static u8 tx4939ide_clear_dma_status(void __iomem *base)
-{
-	u8 dma_stat;
-
-	/* read DMA status for INTR & ERROR flags */
-	dma_stat = tx4939ide_readb(base, TX4939IDE_DMA_Stat);
-	/* clear INTR & ERROR flags */
-	tx4939ide_writeb(dma_stat | ATA_DMA_INTR | ATA_DMA_ERR, base,
-			 TX4939IDE_DMA_Stat);
-	/* recover intmask cleared by writing to bit2 of DMA_Stat */
-	tx4939ide_writew(TX4939IDE_IGNORE_INTS << 8, base, TX4939IDE_Int_Ctl);
-	return dma_stat;
-}
-
-#ifdef __BIG_ENDIAN
-/* custom ide_build_dmatable to handle swapped layout */
-static int tx4939ide_build_dmatable(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u32 *table = (u32 *)hwif->dmatable_cpu;
-	unsigned int count = 0;
-	int i;
-	struct scatterlist *sg;
-
-	for_each_sg(hwif->sg_table, sg, cmd->sg_nents, i) {
-		u32 cur_addr, cur_len, bcount;
-
-		cur_addr = sg_dma_address(sg);
-		cur_len = sg_dma_len(sg);
-
-		/*
-		 * Fill in the DMA table, without crossing any 64kB boundaries.
-		 */
-
-		while (cur_len) {
-			if (count++ >= PRD_ENTRIES)
-				goto use_pio_instead;
-
-			bcount = 0x10000 - (cur_addr & 0xffff);
-			if (bcount > cur_len)
-				bcount = cur_len;
-			/*
-			 * This workaround for zero count seems required.
-			 * (standard ide_build_dmatable does it too)
-			 */
-			if (bcount == 0x10000)
-				bcount = 0x8000;
-			*table++ = bcount & 0xffff;
-			*table++ = cur_addr;
-			cur_addr += bcount;
-			cur_len -= bcount;
-		}
-	}
-
-	if (count) {
-		*(table - 2) |= 0x80000000;
-		return count;
-	}
-
-use_pio_instead:
-	printk(KERN_ERR "%s: %s\n", drive->name,
-		count ? "DMA table too small" : "empty DMA table?");
-
-	return 0; /* revert to PIO for this request */
-}
-#else
-#define tx4939ide_build_dmatable	ide_build_dmatable
-#endif
-
-static int tx4939ide_dma_setup(ide_drive_t *drive, struct ide_cmd *cmd)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	void __iomem *base = TX4939IDE_BASE(hwif);
-	u8 rw = (cmd->tf_flags & IDE_TFLAG_WRITE) ? 0 : ATA_DMA_WR;
-
-	/* fall back to PIO! */
-	if (tx4939ide_build_dmatable(drive, cmd) == 0)
-		return 1;
-
-	/* PRD table */
-	tx4939ide_writel(hwif->dmatable_dma, base, TX4939IDE_PRD_Ptr);
-
-	/* specify r/w */
-	tx4939ide_writeb(rw, base, TX4939IDE_DMA_Cmd);
-
-	/* clear INTR & ERROR flags */
-	tx4939ide_clear_dma_status(base);
-
-	tx4939ide_writew(SECTOR_SIZE / 2, base, drive->dn ?
-			 TX4939IDE_Xfer_Cnt_2 : TX4939IDE_Xfer_Cnt_1);
-
-	tx4939ide_writew(blk_rq_sectors(cmd->rq), base, TX4939IDE_Sec_Cnt);
-
-	return 0;
-}
-
-static int tx4939ide_dma_end(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	u8 dma_stat, dma_cmd;
-	void __iomem *base = TX4939IDE_BASE(hwif);
-	u16 ctl = tx4939ide_readw(base, TX4939IDE_Int_Ctl);
-
-	/* get DMA command mode */
-	dma_cmd = tx4939ide_readb(base, TX4939IDE_DMA_Cmd);
-	/* stop DMA */
-	tx4939ide_writeb(dma_cmd & ~ATA_DMA_START, base, TX4939IDE_DMA_Cmd);
-
-	/* read and clear the INTR & ERROR bits */
-	dma_stat = tx4939ide_clear_dma_status(base);
-
-#define CHECK_DMA_MASK (ATA_DMA_ACTIVE | ATA_DMA_ERR | ATA_DMA_INTR)
-
-	/* verify good DMA status */
-	if ((dma_stat & CHECK_DMA_MASK) == 0 &&
-	    (ctl & (TX4939IDE_INT_XFEREND | TX4939IDE_INT_HOST)) ==
-	    (TX4939IDE_INT_XFEREND | TX4939IDE_INT_HOST))
-		/* INT_IDE lost... bug? */
-		return 0;
-	return ((dma_stat & CHECK_DMA_MASK) !=
-		ATA_DMA_INTR) ? 0x10 | dma_stat : 0;
-}
-
-/* returns 1 if DMA IRQ issued, 0 otherwise */
-static int tx4939ide_dma_test_irq(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	void __iomem *base = TX4939IDE_BASE(hwif);
-	u16 ctl, ide_int;
-	u8 dma_stat, stat;
-	int found = 0;
-
-	ctl = tx4939ide_check_error_ints(hwif);
-	ide_int = ctl & (TX4939IDE_INT_XFEREND | TX4939IDE_INT_HOST);
-	switch (ide_int) {
-	case TX4939IDE_INT_HOST:
-		/* On error, XFEREND might not be asserted. */
-		stat = tx4939ide_readb(base, TX4939IDE_AltStat_DevCtl);
-		if ((stat & (ATA_BUSY | ATA_DRQ | ATA_ERR)) == ATA_ERR)
-			found = 1;
-		else
-			/* Wait for XFEREND (Mask HOST and unmask XFEREND) */
-			ctl &= ~TX4939IDE_INT_XFEREND << 8;
-		ctl |= ide_int << 8;
-		break;
-	case TX4939IDE_INT_HOST | TX4939IDE_INT_XFEREND:
-		dma_stat = tx4939ide_readb(base, TX4939IDE_DMA_Stat);
-		if (!(dma_stat & ATA_DMA_INTR))
-			pr_warn("%s: weird interrupt status. "
-				"DMA_Stat %#02x int_ctl %#04x\n",
-				hwif->name, dma_stat, ctl);
-		found = 1;
-		break;
-	}
-	/*
-	 * Do not clear XFEREND, HOST now.  They will be cleared by
-	 * clearing bit2 of DMA_Stat.
-	 */
-	ctl &= ~ide_int;
-	tx4939ide_writew(ctl, base, TX4939IDE_Int_Ctl);
-	return found;
-}
-
-#ifdef __BIG_ENDIAN
-static u8 tx4939ide_dma_sff_read_status(ide_hwif_t *hwif)
-{
-	void __iomem *base = TX4939IDE_BASE(hwif);
-
-	return tx4939ide_readb(base, TX4939IDE_DMA_Stat);
-}
-#else
-#define tx4939ide_dma_sff_read_status ide_dma_sff_read_status
-#endif
-
-static void tx4939ide_init_hwif(ide_hwif_t *hwif)
-{
-	void __iomem *base = TX4939IDE_BASE(hwif);
-
-	/* Soft Reset */
-	tx4939ide_writew(0x8000, base, TX4939IDE_Sys_Ctl);
-	/* at least 20 GBUSCLK (typ. 100ns @ GBUS200MHz, max 450ns) */
-	ndelay(450);
-	tx4939ide_writew(0x0000, base, TX4939IDE_Sys_Ctl);
-	/* mask some interrupts and clear all interrupts */
-	tx4939ide_writew((TX4939IDE_IGNORE_INTS << 8) | 0xff, base,
-			 TX4939IDE_Int_Ctl);
-
-	tx4939ide_writew(0x0008, base, TX4939IDE_Lo_Burst_Cnt);
-	tx4939ide_writew(0, base, TX4939IDE_Up_Burst_Cnt);
-}
-
-static int tx4939ide_init_dma(ide_hwif_t *hwif, const struct ide_port_info *d)
-{
-	hwif->dma_base =
-		hwif->extra_base + tx4939ide_swizzleb(TX4939IDE_DMA_Cmd);
-	/*
-	 * Note that we cannot use ATA_DMA_TABLE_OFS, ATA_DMA_STATUS
-	 * for big endian.
-	 */
-	return ide_allocate_dma_engine(hwif);
-}
-
-static void tx4939ide_tf_load_fixup(ide_drive_t *drive)
-{
-	ide_hwif_t *hwif = drive->hwif;
-	void __iomem *base = TX4939IDE_BASE(hwif);
-	u16 sysctl = hwif->select_data >> (drive->dn ? 16 : 0);
-
-	/*
-	 * Fix ATA100 CORE System Control Register. (The write to the
-	 * Device/Head register may write wrong data to the System
-	 * Control Register)
-	 * While Sys_Ctl is written here, dev_select() is not needed.
-	 */
-	tx4939ide_writew(sysctl, base, TX4939IDE_Sys_Ctl);
-}
-
-static void tx4939ide_tf_load(ide_drive_t *drive, struct ide_taskfile *tf,
-			      u8 valid)
-{
-	ide_tf_load(drive, tf, valid);
-
-	if (valid & IDE_VALID_DEVICE)
-		tx4939ide_tf_load_fixup(drive);
-}
-
-#ifdef __BIG_ENDIAN
-
-/* custom iops (independent from SWAP_IO_SPACE) */
-static void tx4939ide_input_data_swap(ide_drive_t *drive, struct ide_cmd *cmd,
-				void *buf, unsigned int len)
-{
-	unsigned long port = drive->hwif->io_ports.data_addr;
-	unsigned short *ptr = buf;
-	unsigned int count = (len + 1) / 2;
-
-	while (count--)
-		*ptr++ = cpu_to_le16(__raw_readw((void __iomem *)port));
-	__ide_flush_dcache_range((unsigned long)buf, roundup(len, 2));
-}
-
-static void tx4939ide_output_data_swap(ide_drive_t *drive, struct ide_cmd *cmd,
-				void *buf, unsigned int len)
-{
-	unsigned long port = drive->hwif->io_ports.data_addr;
-	unsigned short *ptr = buf;
-	unsigned int count = (len + 1) / 2;
-
-	while (count--) {
-		__raw_writew(le16_to_cpu(*ptr), (void __iomem *)port);
-		ptr++;
-	}
-	__ide_flush_dcache_range((unsigned long)buf, roundup(len, 2));
-}
-
-static const struct ide_tp_ops tx4939ide_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= ide_dev_select,
-	.tf_load		= tx4939ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= tx4939ide_input_data_swap,
-	.output_data		= tx4939ide_output_data_swap,
-};
-
-#else	/* __LITTLE_ENDIAN */
-
-static const struct ide_tp_ops tx4939ide_tp_ops = {
-	.exec_command		= ide_exec_command,
-	.read_status		= ide_read_status,
-	.read_altstatus		= ide_read_altstatus,
-	.write_devctl		= ide_write_devctl,
-
-	.dev_select		= ide_dev_select,
-	.tf_load		= tx4939ide_tf_load,
-	.tf_read		= ide_tf_read,
-
-	.input_data		= ide_input_data,
-	.output_data		= ide_output_data,
-};
-
-#endif	/* __LITTLE_ENDIAN */
-
-static const struct ide_port_ops tx4939ide_port_ops = {
-	.set_pio_mode		= tx4939ide_set_pio_mode,
-	.set_dma_mode		= tx4939ide_set_dma_mode,
-	.clear_irq		= tx4939ide_clear_irq,
-	.cable_detect		= tx4939ide_cable_detect,
-};
-
-static const struct ide_dma_ops tx4939ide_dma_ops = {
-	.dma_host_set		= tx4939ide_dma_host_set,
-	.dma_setup		= tx4939ide_dma_setup,
-	.dma_start		= ide_dma_start,
-	.dma_end		= tx4939ide_dma_end,
-	.dma_test_irq		= tx4939ide_dma_test_irq,
-	.dma_lost_irq		= ide_dma_lost_irq,
-	.dma_timer_expiry	= ide_dma_sff_timer_expiry,
-	.dma_sff_read_status	= tx4939ide_dma_sff_read_status,
-};
-
-static const struct ide_port_info tx4939ide_port_info __initconst = {
-	.init_hwif		= tx4939ide_init_hwif,
-	.init_dma		= tx4939ide_init_dma,
-	.port_ops		= &tx4939ide_port_ops,
-	.dma_ops		= &tx4939ide_dma_ops,
-	.tp_ops			= &tx4939ide_tp_ops,
-	.host_flags		= IDE_HFLAG_MMIO,
-	.pio_mask		= ATA_PIO4,
-	.mwdma_mask		= ATA_MWDMA2,
-	.udma_mask		= ATA_UDMA5,
-	.chipset		= ide_generic,
-};
-
-static int __init tx4939ide_probe(struct platform_device *pdev)
-{
-	struct ide_hw hw, *hws[] = { &hw };
-	struct ide_host *host;
-	struct resource *res;
-	int irq, ret;
-	unsigned long mapbase;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return -ENODEV;
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
-	if (!devm_request_mem_region(&pdev->dev, res->start,
-				     resource_size(res), MODNAME))
-		return -EBUSY;
-	mapbase = (unsigned long)devm_ioremap(&pdev->dev, res->start,
-					      resource_size(res));
-	if (!mapbase)
-		return -EBUSY;
-	memset(&hw, 0, sizeof(hw));
-	hw.io_ports.data_addr =
-		mapbase + tx4939ide_swizzlew(TX4939IDE_Data);
-	hw.io_ports.error_addr =
-		mapbase + tx4939ide_swizzleb(TX4939IDE_Error_Feature);
-	hw.io_ports.nsect_addr =
-		mapbase + tx4939ide_swizzleb(TX4939IDE_Sec);
-	hw.io_ports.lbal_addr =
-		mapbase + tx4939ide_swizzleb(TX4939IDE_LBA0);
-	hw.io_ports.lbam_addr =
-		mapbase + tx4939ide_swizzleb(TX4939IDE_LBA1);
-	hw.io_ports.lbah_addr =
-		mapbase + tx4939ide_swizzleb(TX4939IDE_LBA2);
-	hw.io_ports.device_addr =
-		mapbase + tx4939ide_swizzleb(TX4939IDE_DevHead);
-	hw.io_ports.command_addr =
-		mapbase + tx4939ide_swizzleb(TX4939IDE_Stat_Cmd);
-	hw.io_ports.ctl_addr =
-		mapbase + tx4939ide_swizzleb(TX4939IDE_AltStat_DevCtl);
-	hw.irq = irq;
-	hw.dev = &pdev->dev;
-
-	pr_info("TX4939 IDE interface (base %#lx, irq %d)\n", mapbase, irq);
-	host = ide_host_alloc(&tx4939ide_port_info, hws, 1);
-	if (!host)
-		return -ENOMEM;
-	/* use extra_base for base address of the all registers */
-	host->ports[0]->extra_base = mapbase;
-	ret = ide_host_register(host, &tx4939ide_port_info, hws);
-	if (ret) {
-		ide_host_free(host);
-		return ret;
-	}
-	platform_set_drvdata(pdev, host);
-	return 0;
-}
-
-static int __exit tx4939ide_remove(struct platform_device *pdev)
-{
-	struct ide_host *host = platform_get_drvdata(pdev);
-
-	ide_host_remove(host);
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int tx4939ide_resume(struct platform_device *dev)
-{
-	struct ide_host *host = platform_get_drvdata(dev);
-	ide_hwif_t *hwif = host->ports[0];
-
-	tx4939ide_init_hwif(hwif);
-	return 0;
-}
-#else
-#define tx4939ide_resume	NULL
-#endif
-
-static struct platform_driver tx4939ide_driver = {
-	.driver = {
-		.name = MODNAME,
-	},
-	.remove = __exit_p(tx4939ide_remove),
-	.resume = tx4939ide_resume,
-};
-
-module_platform_driver_probe(tx4939ide_driver, tx4939ide_probe);
-
-MODULE_DESCRIPTION("TX4939 internal IDE driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:tx4939ide");
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
deleted file mode 100644
index cf996f7882927..0000000000000
--- a/drivers/ide/umc8672.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  Copyright (C) 1995-1996  Linus Torvalds & author (see below)
- */
-
-/*
- *  Principal Author/Maintainer:  PODIEN@hml2.atlas.de (Wolfram Podien)
- *
- *  This file provides support for the advanced features
- *  of the UMC 8672 IDE interface.
- *
- *  Version 0.01	Initial version, hacked out of ide.c,
- *			and #include'd rather than compiled separately.
- *			This will get cleaned up in a subsequent release.
- *
- *  Version 0.02	now configs/compiles separate from ide.c  -ml
- *  Version 0.03	enhanced auto-tune, fix display bug
- *  Version 0.05	replace sti() with restore_flags()  -ml
- *			add detection of possible race condition  -ml
- */
-
-/*
- * VLB Controller Support from
- * Wolfram Podien
- * Rohoefe 3
- * D28832 Achim
- * Germany
- *
- * To enable UMC8672 support there must a lilo line like
- * append="ide0=umc8672"...
- * To set the speed according to the abilities of the hardware there must be a
- * line like
- * #define UMC_DRIVE0 11
- * in the beginning of the driver, which sets the speed of drive 0 to 11 (there
- * are some lines present). 0 - 11 are allowed speed values. These values are
- * the results from the DOS speed test program supplied from UMC. 11 is the
- * highest speed (about PIO mode 3)
- */
-#define REALLY_SLOW_IO		/* some systems can safely undef this */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/ioport.h>
-#include <linux/blkdev.h>
-#include <linux/ide.h>
-#include <linux/init.h>
-
-#include <asm/io.h>
-
-#define DRV_NAME "umc8672"
-
-/*
- * Default speeds.  These can be changed with "auto-tune" and/or hdparm.
- */
-#define UMC_DRIVE0      1              /* DOS measured drive speeds */
-#define UMC_DRIVE1      1              /* 0 to 11 allowed */
-#define UMC_DRIVE2      1              /* 11 = Fastest Speed */
-#define UMC_DRIVE3      1              /* In case of crash reduce speed */
-
-static u8 current_speeds[4] = {UMC_DRIVE0, UMC_DRIVE1, UMC_DRIVE2, UMC_DRIVE3};
-static const u8 pio_to_umc [5] = {0, 3, 7, 10, 11};	/* rough guesses */
-
-/*       0    1    2    3    4    5    6    7    8    9    10   11      */
-static const u8 speedtab [3][12] = {
-	{0x0f, 0x0b, 0x02, 0x02, 0x02, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x1},
-	{0x03, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0x01, 0x01, 0x01, 0x1},
-	{0xff, 0xcb, 0xc0, 0x58, 0x36, 0x33, 0x23, 0x22, 0x21, 0x11, 0x10, 0x0}
-};
-
-static void out_umc(char port, char wert)
-{
-	outb_p(port, 0x108);
-	outb_p(wert, 0x109);
-}
-
-static inline u8 in_umc(char port)
-{
-	outb_p(port, 0x108);
-	return inb_p(0x109);
-}
-
-static void umc_set_speeds(u8 speeds[])
-{
-	int i, tmp;
-
-	outb_p(0x5A, 0x108); /* enable umc */
-
-	out_umc(0xd7, (speedtab[0][speeds[2]] | (speedtab[0][speeds[3]]<<4)));
-	out_umc(0xd6, (speedtab[0][speeds[0]] | (speedtab[0][speeds[1]]<<4)));
-	tmp = 0;
-	for (i = 3; i >= 0; i--)
-		tmp = (tmp << 2) | speedtab[1][speeds[i]];
-	out_umc(0xdc, tmp);
-	for (i = 0; i < 4; i++) {
-		out_umc(0xd0 + i, speedtab[2][speeds[i]]);
-		out_umc(0xd8 + i, speedtab[2][speeds[i]]);
-	}
-	outb_p(0xa5, 0x108); /* disable umc */
-
-	printk("umc8672: drive speeds [0 to 11]: %d %d %d %d\n",
-		speeds[0], speeds[1], speeds[2], speeds[3]);
-}
-
-static void umc_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	ide_hwif_t *mate = hwif->mate;
-	unsigned long flags;
-	const u8 pio = drive->pio_mode - XFER_PIO_0;
-
-	printk("%s: setting umc8672 to PIO mode%d (speed %d)\n",
-		drive->name, pio, pio_to_umc[pio]);
-	if (mate)
-		spin_lock_irqsave(&mate->lock, flags);
-	if (mate && mate->handler) {
-		printk(KERN_ERR "umc8672: other interface is busy: exiting tune_umc()\n");
-	} else {
-		current_speeds[drive->name[2] - 'a'] = pio_to_umc[pio];
-		umc_set_speeds(current_speeds);
-	}
-	if (mate)
-		spin_unlock_irqrestore(&mate->lock, flags);
-}
-
-static const struct ide_port_ops umc8672_port_ops = {
-	.set_pio_mode		= umc_set_pio_mode,
-};
-
-static const struct ide_port_info umc8672_port_info __initconst = {
-	.name			= DRV_NAME,
-	.chipset		= ide_umc8672,
-	.port_ops		= &umc8672_port_ops,
-	.host_flags		= IDE_HFLAG_NO_DMA,
-	.pio_mask		= ATA_PIO4,
-};
-
-static int __init umc8672_probe(void)
-{
-	unsigned long flags;
-
-	if (!request_region(0x108, 2, "umc8672")) {
-		printk(KERN_ERR "umc8672: ports 0x108-0x109 already in use.\n");
-		return 1;
-	}
-	local_irq_save(flags);
-	outb_p(0x5A, 0x108); /* enable umc */
-	if (in_umc (0xd5) != 0xa0) {
-		local_irq_restore(flags);
-		printk(KERN_ERR "umc8672: not found\n");
-		release_region(0x108, 2);
-		return 1;
-	}
-	outb_p(0xa5, 0x108); /* disable umc */
-
-	umc_set_speeds(current_speeds);
-	local_irq_restore(flags);
-
-	return ide_legacy_device_add(&umc8672_port_info, 0);
-}
-
-static bool probe_umc8672;
-
-module_param_named(probe, probe_umc8672, bool, 0);
-MODULE_PARM_DESC(probe, "probe for UMC8672 chipset");
-
-static int __init umc8672_init(void)
-{
-	if (probe_umc8672 == 0)
-		goto out;
-
-	if (umc8672_probe() == 0)
-		return 0;
-out:
-	return -ENODEV;
-}
-
-module_init(umc8672_init);
-
-MODULE_AUTHOR("Wolfram Podien");
-MODULE_DESCRIPTION("Support for UMC 8672 IDE chipset");
-MODULE_LICENSE("GPL");
diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c
deleted file mode 100644
index 63a3aca506fca..0000000000000
--- a/drivers/ide/via82cxxx.c
+++ /dev/null
@@ -1,532 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * VIA IDE driver for Linux. Supported southbridges:
- *
- *   vt82c576, vt82c586, vt82c586a, vt82c586b, vt82c596a, vt82c596b,
- *   vt82c686, vt82c686a, vt82c686b, vt8231, vt8233, vt8233c, vt8233a,
- *   vt8235, vt8237, vt8237a
- *
- * Copyright (c) 2000-2002 Vojtech Pavlik
- * Copyright (c) 2007-2010 Bartlomiej Zolnierkiewicz
- *
- * Based on the work of:
- *	Michel Aubry
- *	Jeff Garzik
- *	Andre Hedrick
- *
- * Documentation:
- *	Obsolete device documentation publicly available from via.com.tw
- *	Current device documentation available under NDA only
- */
-
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-#include <linux/ide.h>
-#include <linux/dmi.h>
-
-#ifdef CONFIG_PPC_CHRP
-#include <asm/processor.h>
-#endif
-
-#define DRV_NAME "via82cxxx"
-
-#define VIA_IDE_ENABLE		0x40
-#define VIA_IDE_CONFIG		0x41
-#define VIA_FIFO_CONFIG		0x43
-#define VIA_MISC_1		0x44
-#define VIA_MISC_2		0x45
-#define VIA_MISC_3		0x46
-#define VIA_DRIVE_TIMING	0x48
-#define VIA_8BIT_TIMING		0x4e
-#define VIA_ADDRESS_SETUP	0x4c
-#define VIA_UDMA_TIMING		0x50
-
-#define VIA_BAD_PREQ		0x01 /* Crashes if PREQ# till DDACK# set */
-#define VIA_BAD_CLK66		0x02 /* 66 MHz clock doesn't work correctly */
-#define VIA_SET_FIFO		0x04 /* Needs to have FIFO split set */
-#define VIA_NO_UNMASK		0x08 /* Doesn't work with IRQ unmasking on */
-#define VIA_BAD_ID		0x10 /* Has wrong vendor ID (0x1107) */
-#define VIA_BAD_AST		0x20 /* Don't touch Address Setup Timing */
-#define VIA_SATA_PATA		0x80 /* SATA/PATA combined configuration */
-
-enum {
-	VIA_IDFLAG_SINGLE = (1 << 1), /* single channel controller */
-};
-
-/*
- * VIA SouthBridge chips.
- */
-
-static struct via_isa_bridge {
-	char *name;
-	u16 id;
-	u8 rev_min;
-	u8 rev_max;
-	u8 udma_mask;
-	u8 flags;
-} via_isa_bridges[] = {
-	{ "vx855",	PCI_DEVICE_ID_VIA_VX855,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST | VIA_SATA_PATA },
-	{ "vx800",	PCI_DEVICE_ID_VIA_VX800,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST | VIA_SATA_PATA },
-	{ "cx700",	PCI_DEVICE_ID_VIA_CX700,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST | VIA_SATA_PATA },
-	{ "vt8261",	PCI_DEVICE_ID_VIA_8261,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "vt8237s",	PCI_DEVICE_ID_VIA_8237S,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "vt6410",	PCI_DEVICE_ID_VIA_6410,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "vt6415",	PCI_DEVICE_ID_VIA_6415,     0x00, 0xff, ATA_UDMA6, VIA_BAD_AST },
-	{ "vt8251",	PCI_DEVICE_ID_VIA_8251,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "vt8237",	PCI_DEVICE_ID_VIA_8237,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "vt8237a",	PCI_DEVICE_ID_VIA_8237A,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "vt8235",	PCI_DEVICE_ID_VIA_8235,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "vt8233a",	PCI_DEVICE_ID_VIA_8233A,    0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ "vt8233c",	PCI_DEVICE_ID_VIA_8233C_0,  0x00, 0x2f, ATA_UDMA5, },
-	{ "vt8233",	PCI_DEVICE_ID_VIA_8233_0,   0x00, 0x2f, ATA_UDMA5, },
-	{ "vt8231",	PCI_DEVICE_ID_VIA_8231,     0x00, 0x2f, ATA_UDMA5, },
-	{ "vt82c686b",	PCI_DEVICE_ID_VIA_82C686,   0x40, 0x4f, ATA_UDMA5, },
-	{ "vt82c686a",	PCI_DEVICE_ID_VIA_82C686,   0x10, 0x2f, ATA_UDMA4, },
-	{ "vt82c686",	PCI_DEVICE_ID_VIA_82C686,   0x00, 0x0f, ATA_UDMA2, VIA_BAD_CLK66 },
-	{ "vt82c596b",	PCI_DEVICE_ID_VIA_82C596,   0x10, 0x2f, ATA_UDMA4, },
-	{ "vt82c596a",	PCI_DEVICE_ID_VIA_82C596,   0x00, 0x0f, ATA_UDMA2, VIA_BAD_CLK66 },
-	{ "vt82c586b",	PCI_DEVICE_ID_VIA_82C586_0, 0x47, 0x4f, ATA_UDMA2, VIA_SET_FIFO },
-	{ "vt82c586b",	PCI_DEVICE_ID_VIA_82C586_0, 0x40, 0x46, ATA_UDMA2, VIA_SET_FIFO | VIA_BAD_PREQ },
-	{ "vt82c586b",	PCI_DEVICE_ID_VIA_82C586_0, 0x30, 0x3f, ATA_UDMA2, VIA_SET_FIFO },
-	{ "vt82c586a",	PCI_DEVICE_ID_VIA_82C586_0, 0x20, 0x2f, ATA_UDMA2, VIA_SET_FIFO },
-	{ "vt82c586",	PCI_DEVICE_ID_VIA_82C586_0, 0x00, 0x0f,      0x00, VIA_SET_FIFO },
-	{ "vt82c576",	PCI_DEVICE_ID_VIA_82C576,   0x00, 0x2f,      0x00, VIA_SET_FIFO | VIA_NO_UNMASK },
-	{ "vt82c576",	PCI_DEVICE_ID_VIA_82C576,   0x00, 0x2f,      0x00, VIA_SET_FIFO | VIA_NO_UNMASK | VIA_BAD_ID },
-	{ "vtxxxx",	PCI_DEVICE_ID_VIA_ANON,     0x00, 0x2f, ATA_UDMA6, VIA_BAD_AST },
-	{ NULL }
-};
-
-static unsigned int via_clock;
-static char *via_dma[] = { "16", "25", "33", "44", "66", "100", "133" };
-
-struct via82cxxx_dev
-{
-	struct via_isa_bridge *via_config;
-	unsigned int via_80w;
-};
-
-/**
- *	via_set_speed			-	write timing registers
- *	@dev: PCI device
- *	@dn: device
- *	@timing: IDE timing data to use
- *
- *	via_set_speed writes timing values to the chipset registers
- */
-
-static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
-{
-	struct pci_dev *dev = to_pci_dev(hwif->dev);
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct via82cxxx_dev *vdev = host->host_priv;
-	u8 t;
-
-	if (~vdev->via_config->flags & VIA_BAD_AST) {
-		pci_read_config_byte(dev, VIA_ADDRESS_SETUP, &t);
-		t = (t & ~(3 << ((3 - dn) << 1))) | ((clamp_val(timing->setup, 1, 4) - 1) << ((3 - dn) << 1));
-		pci_write_config_byte(dev, VIA_ADDRESS_SETUP, t);
-	}
-
-	pci_write_config_byte(dev, VIA_8BIT_TIMING + (1 - (dn >> 1)),
-		((clamp_val(timing->act8b, 1, 16) - 1) << 4) | (clamp_val(timing->rec8b, 1, 16) - 1));
-
-	pci_write_config_byte(dev, VIA_DRIVE_TIMING + (3 - dn),
-		((clamp_val(timing->active, 1, 16) - 1) << 4) | (clamp_val(timing->recover, 1, 16) - 1));
-
-	switch (vdev->via_config->udma_mask) {
-	case ATA_UDMA2: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 5) - 2)) : 0x03; break;
-	case ATA_UDMA4: t = timing->udma ? (0xe8 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x0f; break;
-	case ATA_UDMA5: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x07; break;
-	case ATA_UDMA6: t = timing->udma ? (0xe0 | (clamp_val(timing->udma, 2, 9) - 2)) : 0x07; break;
-	}
-
-	/* Set UDMA unless device is not UDMA capable */
-	if (vdev->via_config->udma_mask) {
-		u8 udma_etc;
-
-		pci_read_config_byte(dev, VIA_UDMA_TIMING + 3 - dn, &udma_etc);
-
-		/* clear transfer mode bit */
-		udma_etc &= ~0x20;
-
-		if (timing->udma) {
-			/* preserve 80-wire cable detection bit */
-			udma_etc &= 0x10;
-			udma_etc |= t;
-		}
-
-		pci_write_config_byte(dev, VIA_UDMA_TIMING + 3 - dn, udma_etc);
-	}
-}
-
-/**
- *	via_set_drive		-	configure transfer mode
- *	@hwif: port
- *	@drive: Drive to set up
- *
- *	via_set_drive() computes timing values configures the chipset to
- *	a desired transfer mode.  It also can be called by upper layers.
- */
-
-static void via_set_drive(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	ide_drive_t *peer = ide_get_pair_dev(drive);
-	struct ide_host *host = dev_get_drvdata(hwif->dev);
-	struct via82cxxx_dev *vdev = host->host_priv;
-	struct ide_timing t, p;
-	unsigned int T, UT;
-	const u8 speed = drive->dma_mode;
-
-	T = 1000000000 / via_clock;
-
-	switch (vdev->via_config->udma_mask) {
-	case ATA_UDMA2: UT = T;   break;
-	case ATA_UDMA4: UT = T/2; break;
-	case ATA_UDMA5: UT = T/3; break;
-	case ATA_UDMA6: UT = T/4; break;
-	default:	UT = T;
-	}
-
-	ide_timing_compute(drive, speed, &t, T, UT);
-
-	if (peer) {
-		ide_timing_compute(peer, peer->pio_mode, &p, T, UT);
-		ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT);
-	}
-
-	via_set_speed(hwif, drive->dn, &t);
-}
-
-/**
- *	via_set_pio_mode	-	set host controller for PIO mode
- *	@hwif: port
- *	@drive: drive
- *
- *	A callback from the upper layers for PIO-only tuning.
- */
-
-static void via_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
-{
-	drive->dma_mode = drive->pio_mode;
-	via_set_drive(hwif, drive);
-}
-
-static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
-{
-	struct via_isa_bridge *via_config;
-
-	for (via_config = via_isa_bridges;
-	     via_config->id != PCI_DEVICE_ID_VIA_ANON; via_config++)
-		if ((*isa = pci_get_device(PCI_VENDOR_ID_VIA +
-			!!(via_config->flags & VIA_BAD_ID),
-			via_config->id, NULL))) {
-
-			if ((*isa)->revision >= via_config->rev_min &&
-			    (*isa)->revision <= via_config->rev_max)
-				break;
-			pci_dev_put(*isa);
-		}
-
-	return via_config;
-}
-
-/*
- * Check and handle 80-wire cable presence
- */
-static void via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
-{
-	int i;
-
-	switch (vdev->via_config->udma_mask) {
-		case ATA_UDMA4:
-			for (i = 24; i >= 0; i -= 8)
-				if (((u >> (i & 16)) & 8) &&
-				    ((u >> i) & 0x20) &&
-				     (((u >> i) & 7) < 2)) {
-					/*
-					 * 2x PCI clock and
-					 * UDMA w/ < 3T/cycle
-					 */
-					vdev->via_80w |= (1 << (1 - (i >> 4)));
-				}
-			break;
-
-		case ATA_UDMA5:
-			for (i = 24; i >= 0; i -= 8)
-				if (((u >> i) & 0x10) ||
-				    (((u >> i) & 0x20) &&
-				     (((u >> i) & 7) < 4))) {
-					/* BIOS 80-wire bit or
-					 * UDMA w/ < 60ns/cycle
-					 */
-					vdev->via_80w |= (1 << (1 - (i >> 4)));
-				}
-			break;
-
-		case ATA_UDMA6:
-			for (i = 24; i >= 0; i -= 8)
-				if (((u >> i) & 0x10) ||
-				    (((u >> i) & 0x20) &&
-				     (((u >> i) & 7) < 6))) {
-					/* BIOS 80-wire bit or
-					 * UDMA w/ < 60ns/cycle
-					 */
-					vdev->via_80w |= (1 << (1 - (i >> 4)));
-				}
-			break;
-	}
-}
-
-/**
- *	init_chipset_via82cxxx	-	initialization handler
- *	@dev: PCI device
- *
- *	The initialization callback. Here we determine the IDE chip type
- *	and initialize its drive independent registers.
- */
-
-static int init_chipset_via82cxxx(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct via82cxxx_dev *vdev = host->host_priv;
-	struct via_isa_bridge *via_config = vdev->via_config;
-	u8 t, v;
-	u32 u;
-
-	/*
-	 * Detect cable and configure Clk66
-	 */
-	pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
-
-	via_cable_detect(vdev, u);
-
-	if (via_config->udma_mask == ATA_UDMA4) {
-		/* Enable Clk66 */
-		pci_write_config_dword(dev, VIA_UDMA_TIMING, u|0x80008);
-	} else if (via_config->flags & VIA_BAD_CLK66) {
-		/* Would cause trouble on 596a and 686 */
-		pci_write_config_dword(dev, VIA_UDMA_TIMING, u & ~0x80008);
-	}
-
-	/*
-	 * Check whether interfaces are enabled.
-	 */
-
-	pci_read_config_byte(dev, VIA_IDE_ENABLE, &v);
-
-	/*
-	 * Set up FIFO sizes and thresholds.
-	 */
-
-	pci_read_config_byte(dev, VIA_FIFO_CONFIG, &t);
-
-	/* Disable PREQ# till DDACK# */
-	if (via_config->flags & VIA_BAD_PREQ) {
-		/* Would crash on 586b rev 41 */
-		t &= 0x7f;
-	}
-
-	/* Fix FIFO split between channels */
-	if (via_config->flags & VIA_SET_FIFO) {
-		t &= (t & 0x9f);
-		switch (v & 3) {
-			case 2: t |= 0x00; break;	/* 16 on primary */
-			case 1: t |= 0x60; break;	/* 16 on secondary */
-			case 3: t |= 0x20; break;	/* 8 pri 8 sec */
-		}
-	}
-
-	pci_write_config_byte(dev, VIA_FIFO_CONFIG, t);
-
-	return 0;
-}
-
-/*
- *	Cable special cases
- */
-
-static const struct dmi_system_id cable_dmi_table[] = {
-	{
-		.ident = "Acer Ferrari 3400",
-		.matches = {
-			DMI_MATCH(DMI_BOARD_VENDOR, "Acer,Inc."),
-			DMI_MATCH(DMI_BOARD_NAME, "Ferrari 3400"),
-		},
-	},
-	{ }
-};
-
-static int via_cable_override(struct pci_dev *pdev)
-{
-	/* Systems by DMI */
-	if (dmi_check_system(cable_dmi_table))
-		return 1;
-
-	/* Arima W730-K8/Targa Visionary 811/... */
-	if (pdev->subsystem_vendor == 0x161F &&
-	    pdev->subsystem_device == 0x2032)
-		return 1;
-
-	return 0;
-}
-
-static u8 via82cxxx_cable_detect(ide_hwif_t *hwif)
-{
-	struct pci_dev *pdev = to_pci_dev(hwif->dev);
-	struct ide_host *host = pci_get_drvdata(pdev);
-	struct via82cxxx_dev *vdev = host->host_priv;
-
-	if (via_cable_override(pdev))
-		return ATA_CBL_PATA40_SHORT;
-
-	if ((vdev->via_config->flags & VIA_SATA_PATA) && hwif->channel == 0)
-		return ATA_CBL_SATA;
-
-	if ((vdev->via_80w >> hwif->channel) & 1)
-		return ATA_CBL_PATA80;
-	else
-		return ATA_CBL_PATA40;
-}
-
-static const struct ide_port_ops via_port_ops = {
-	.set_pio_mode		= via_set_pio_mode,
-	.set_dma_mode		= via_set_drive,
-	.cable_detect		= via82cxxx_cable_detect,
-};
-
-static const struct ide_port_info via82cxxx_chipset = {
-	.name		= DRV_NAME,
-	.init_chipset	= init_chipset_via82cxxx,
-	.enablebits	= { { 0x40, 0x02, 0x02 }, { 0x40, 0x01, 0x01 } },
-	.port_ops	= &via_port_ops,
-	.host_flags	= IDE_HFLAG_PIO_NO_BLACKLIST |
-			  IDE_HFLAG_POST_SET_MODE |
-			  IDE_HFLAG_IO_32BIT,
-	.pio_mask	= ATA_PIO5,
-	.swdma_mask	= ATA_SWDMA2,
-	.mwdma_mask	= ATA_MWDMA2,
-};
-
-static int via_init_one(struct pci_dev *dev, const struct pci_device_id *id)
-{
-	struct pci_dev *isa = NULL;
-	struct via_isa_bridge *via_config;
-	struct via82cxxx_dev *vdev;
-	int rc;
-	u8 idx = id->driver_data;
-	struct ide_port_info d;
-
-	d = via82cxxx_chipset;
-
-	/*
-	 * Find the ISA bridge and check we know what it is.
-	 */
-	via_config = via_config_find(&isa);
-
-	/*
-	 * Print the boot message.
-	 */
-	printk(KERN_INFO DRV_NAME " %s: VIA %s (rev %02x) IDE %sDMA%s\n",
-		pci_name(dev), via_config->name, isa->revision,
-		via_config->udma_mask ? "U" : "MW",
-		via_dma[via_config->udma_mask ?
-			(fls(via_config->udma_mask) - 1) : 0]);
-
-	pci_dev_put(isa);
-
-	/*
-	 * Determine system bus clock.
-	 */
-	via_clock = (ide_pci_clk ? ide_pci_clk : 33) * 1000;
-
-	switch (via_clock) {
-	case 33000: via_clock = 33333; break;
-	case 37000: via_clock = 37500; break;
-	case 41000: via_clock = 41666; break;
-	}
-
-	if (via_clock < 20000 || via_clock > 50000) {
-		printk(KERN_WARNING DRV_NAME ": User given PCI clock speed "
-			"impossible (%d), using 33 MHz instead.\n", via_clock);
-		via_clock = 33333;
-	}
-
-	if (idx == 1)
-		d.enablebits[1].reg = d.enablebits[0].reg = 0;
-	else
-		d.host_flags |= IDE_HFLAG_NO_AUTODMA;
-
-	if (idx == VIA_IDFLAG_SINGLE)
-		d.host_flags |= IDE_HFLAG_SINGLE;
-
-	if ((via_config->flags & VIA_NO_UNMASK) == 0)
-		d.host_flags |= IDE_HFLAG_UNMASK_IRQS;
-
-	d.udma_mask = via_config->udma_mask;
-
-	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
-	if (!vdev) {
-		printk(KERN_ERR DRV_NAME " %s: out of memory :(\n",
-			pci_name(dev));
-		return -ENOMEM;
-	}
-
-	vdev->via_config = via_config;
-
-	rc = ide_pci_init_one(dev, &d, vdev);
-	if (rc)
-		kfree(vdev);
-
-	return rc;
-}
-
-static void via_remove(struct pci_dev *dev)
-{
-	struct ide_host *host = pci_get_drvdata(dev);
-	struct via82cxxx_dev *vdev = host->host_priv;
-
-	ide_pci_remove(dev);
-	kfree(vdev);
-}
-
-static const struct pci_device_id via_pci_tbl[] = {
-	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C576_1),  0 },
-	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_82C586_1),  0 },
-	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_CX700_IDE), 0 },
-	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_VX855_IDE), VIA_IDFLAG_SINGLE },
-	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_6410),      1 },
-	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_6415),      1 },
-	{ PCI_VDEVICE(VIA, PCI_DEVICE_ID_VIA_SATA_EIDE), 1 },
-	{ 0, },
-};
-MODULE_DEVICE_TABLE(pci, via_pci_tbl);
-
-static struct pci_driver via_pci_driver = {
-	.name 		= "VIA_IDE",
-	.id_table 	= via_pci_tbl,
-	.probe 		= via_init_one,
-	.remove		= via_remove,
-	.suspend	= ide_pci_suspend,
-	.resume		= ide_pci_resume,
-};
-
-static int __init via_ide_init(void)
-{
-	return ide_pci_register_driver(&via_pci_driver);
-}
-
-static void __exit via_ide_exit(void)
-{
-	pci_unregister_driver(&via_pci_driver);
-}
-
-module_init(via_ide_init);
-module_exit(via_ide_exit);
-
-MODULE_AUTHOR("Vojtech Pavlik, Bartlomiej Zolnierkiewicz, Michel Aubry, Jeff Garzik, Andre Hedrick");
-MODULE_DESCRIPTION("PCI driver module for VIA IDE");
-MODULE_LICENSE("GPL");
diff --git a/include/linux/ide.h b/include/linux/ide.h
deleted file mode 100644
index 2c300689a51a5..0000000000000
--- a/include/linux/ide.h
+++ /dev/null
@@ -1,1623 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _IDE_H
-#define _IDE_H
-/*
- *  linux/include/linux/ide.h
- *
- *  Copyright (C) 1994-2002  Linus Torvalds & authors
- */
-
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/ata.h>
-#include <linux/blk-mq.h>
-#include <linux/proc_fs.h>
-#include <linux/interrupt.h>
-#include <linux/bitops.h>
-#include <linux/bio.h>
-#include <linux/pci.h>
-#include <linux/completion.h>
-#include <linux/pm.h>
-#include <linux/mutex.h>
-/* for request_sense */
-#include <linux/cdrom.h>
-#include <scsi/scsi_cmnd.h>
-#include <asm/byteorder.h>
-#include <asm/io.h>
-
-/*
- * Probably not wise to fiddle with these
- */
-#define SUPPORT_VLB_SYNC 1
-#define IDE_DEFAULT_MAX_FAILURES	1
-#define ERROR_MAX	8	/* Max read/write errors per sector */
-#define ERROR_RESET	3	/* Reset controller every 4th retry */
-#define ERROR_RECAL	1	/* Recalibrate every 2nd retry */
-
-struct device;
-
-/* values for ide_request.type */
-enum ata_priv_type {
-	ATA_PRIV_MISC,
-	ATA_PRIV_TASKFILE,
-	ATA_PRIV_PC,
-	ATA_PRIV_SENSE,		/* sense request */
-	ATA_PRIV_PM_SUSPEND,	/* suspend request */
-	ATA_PRIV_PM_RESUME,	/* resume request */
-};
-
-struct ide_request {
-	struct scsi_request sreq;
-	u8 sense[SCSI_SENSE_BUFFERSIZE];
-	u8 type;
-	void *special;
-};
-
-static inline struct ide_request *ide_req(struct request *rq)
-{
-	return blk_mq_rq_to_pdu(rq);
-}
-
-static inline bool ata_misc_request(struct request *rq)
-{
-	return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_MISC;
-}
-
-static inline bool ata_taskfile_request(struct request *rq)
-{
-	return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_TASKFILE;
-}
-
-static inline bool ata_pc_request(struct request *rq)
-{
-	return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_PC;
-}
-
-static inline bool ata_sense_request(struct request *rq)
-{
-	return blk_rq_is_private(rq) && ide_req(rq)->type == ATA_PRIV_SENSE;
-}
-
-static inline bool ata_pm_request(struct request *rq)
-{
-	return blk_rq_is_private(rq) &&
-		(ide_req(rq)->type == ATA_PRIV_PM_SUSPEND ||
-		 ide_req(rq)->type == ATA_PRIV_PM_RESUME);
-}
-
-/* Error codes returned in result to the higher part of the driver. */
-enum {
-	IDE_DRV_ERROR_GENERAL	= 101,
-	IDE_DRV_ERROR_FILEMARK	= 102,
-	IDE_DRV_ERROR_EOD	= 103,
-};
-
-/*
- * Definitions for accessing IDE controller registers
- */
-#define IDE_NR_PORTS		(10)
-
-struct ide_io_ports {
-	unsigned long	data_addr;
-
-	union {
-		unsigned long error_addr;	/*   read:  error */
-		unsigned long feature_addr;	/*  write: feature */
-	};
-
-	unsigned long	nsect_addr;
-	unsigned long	lbal_addr;
-	unsigned long	lbam_addr;
-	unsigned long	lbah_addr;
-
-	unsigned long	device_addr;
-
-	union {
-		unsigned long status_addr;	/*  read: status  */
-		unsigned long command_addr;	/* write: command */
-	};
-
-	unsigned long	ctl_addr;
-
-	unsigned long	irq_addr;
-};
-
-#define OK_STAT(stat,good,bad)	(((stat)&((good)|(bad)))==(good))
-
-#define BAD_R_STAT	(ATA_BUSY | ATA_ERR)
-#define BAD_W_STAT	(BAD_R_STAT | ATA_DF)
-#define BAD_STAT	(BAD_R_STAT | ATA_DRQ)
-#define DRIVE_READY	(ATA_DRDY | ATA_DSC)
-
-#define BAD_CRC		(ATA_ABORTED | ATA_ICRC)
-
-#define SATA_NR_PORTS		(3)	/* 16 possible ?? */
-
-#define SATA_STATUS_OFFSET	(0)
-#define SATA_ERROR_OFFSET	(1)
-#define SATA_CONTROL_OFFSET	(2)
-
-/*
- * Our Physical Region Descriptor (PRD) table should be large enough
- * to handle the biggest I/O request we are likely to see.  Since requests
- * can have no more than 256 sectors, and since the typical blocksize is
- * two or more sectors, we could get by with a limit of 128 entries here for
- * the usual worst case.  Most requests seem to include some contiguous blocks,
- * further reducing the number of table entries required.
- *
- * The driver reverts to PIO mode for individual requests that exceed
- * this limit (possible with 512 byte blocksizes, eg. MSDOS f/s), so handling
- * 100% of all crazy scenarios here is not necessary.
- *
- * As it turns out though, we must allocate a full 4KB page for this,
- * so the two PRD tables (ide0 & ide1) will each get half of that,
- * allowing each to have about 256 entries (8 bytes each) from this.
- */
-#define PRD_BYTES       8
-#define PRD_ENTRIES	256
-
-/*
- * Some more useful definitions
- */
-#define PARTN_BITS	6	/* number of minor dev bits for partitions */
-#define MAX_DRIVES	2	/* per interface; 2 assumed by lots of code */
-
-/*
- * Timeouts for various operations:
- */
-enum {
-	/* spec allows up to 20ms, but CF cards and SSD drives need more */
-	WAIT_DRQ	= 1 * HZ,	/* 1s */
-	/* some laptops are very slow */
-	WAIT_READY	= 5 * HZ,	/* 5s */
-	/* should be less than 3ms (?), if all ATAPI CD is closed at boot */
-	WAIT_PIDENTIFY	= 10 * HZ,	/* 10s */
-	/* worst case when spinning up */
-	WAIT_WORSTCASE	= 30 * HZ,	/* 30s */
-	/* maximum wait for an IRQ to happen */
-	WAIT_CMD	= 10 * HZ,	/* 10s */
-	/* Some drives require a longer IRQ timeout. */
-	WAIT_FLOPPY_CMD	= 50 * HZ,	/* 50s */
-	/*
-	 * Some drives (for example, Seagate STT3401A Travan) require a very
-	 * long timeout, because they don't return an interrupt or clear their
-	 * BSY bit until after the command completes (even retension commands).
-	 */
-	WAIT_TAPE_CMD	= 900 * HZ,	/* 900s */
-	/* minimum sleep time */
-	WAIT_MIN_SLEEP	= HZ / 50,	/* 20ms */
-};
-
-/*
- * Op codes for special requests to be handled by ide_special_rq().
- * Values should be in the range of 0x20 to 0x3f.
- */
-#define REQ_DRIVE_RESET		0x20
-#define REQ_DEVSET_EXEC		0x21
-#define REQ_PARK_HEADS		0x22
-#define REQ_UNPARK_HEADS	0x23
-
-/*
- * hwif_chipset_t is used to keep track of the specific hardware
- * chipset used by each IDE interface, if known.
- */
-enum {		ide_unknown,	ide_generic,	ide_pci,
-		ide_cmd640,	ide_dtc2278,	ide_ali14xx,
-		ide_qd65xx,	ide_umc8672,	ide_ht6560b,
-		ide_4drives,	ide_pmac,	ide_acorn,
-		ide_au1xxx,	ide_palm3710
-};
-
-typedef u8 hwif_chipset_t;
-
-/*
- * Structure to hold all information about the location of this port
- */
-struct ide_hw {
-	union {
-		struct ide_io_ports	io_ports;
-		unsigned long		io_ports_array[IDE_NR_PORTS];
-	};
-
-	int		irq;			/* our irq number */
-	struct device	*dev, *parent;
-	unsigned long	config;
-};
-
-static inline void ide_std_init_ports(struct ide_hw *hw,
-				      unsigned long io_addr,
-				      unsigned long ctl_addr)
-{
-	unsigned int i;
-
-	for (i = 0; i <= 7; i++)
-		hw->io_ports_array[i] = io_addr++;
-
-	hw->io_ports.ctl_addr = ctl_addr;
-}
-
-#define MAX_HWIFS	10
-
-/*
- * Now for the data we need to maintain per-drive:  ide_drive_t
- */
-
-#define ide_scsi	0x21
-#define ide_disk	0x20
-#define ide_optical	0x7
-#define ide_cdrom	0x5
-#define ide_tape	0x1
-#define ide_floppy	0x0
-
-/*
- * Special Driver Flags
- */
-enum {
-	IDE_SFLAG_SET_GEOMETRY		= BIT(0),
-	IDE_SFLAG_RECALIBRATE		= BIT(1),
-	IDE_SFLAG_SET_MULTMODE		= BIT(2),
-};
-
-/*
- * Status returned from various ide_ functions
- */
-typedef enum {
-	ide_stopped,	/* no drive operation was started */
-	ide_started,	/* a drive operation was started, handler was set */
-} ide_startstop_t;
-
-enum {
-	IDE_VALID_ERROR 		= BIT(1),
-	IDE_VALID_FEATURE		= IDE_VALID_ERROR,
-	IDE_VALID_NSECT 		= BIT(2),
-	IDE_VALID_LBAL			= BIT(3),
-	IDE_VALID_LBAM			= BIT(4),
-	IDE_VALID_LBAH			= BIT(5),
-	IDE_VALID_DEVICE		= BIT(6),
-	IDE_VALID_LBA			= IDE_VALID_LBAL |
-					  IDE_VALID_LBAM |
-					  IDE_VALID_LBAH,
-	IDE_VALID_OUT_TF		= IDE_VALID_FEATURE |
-					  IDE_VALID_NSECT |
-					  IDE_VALID_LBA,
-	IDE_VALID_IN_TF 		= IDE_VALID_NSECT |
-					  IDE_VALID_LBA,
-	IDE_VALID_OUT_HOB		= IDE_VALID_OUT_TF,
-	IDE_VALID_IN_HOB		= IDE_VALID_ERROR |
-					  IDE_VALID_NSECT |
-					  IDE_VALID_LBA,
-};
-
-enum {
-	IDE_TFLAG_LBA48			= BIT(0),
-	IDE_TFLAG_WRITE			= BIT(1),
-	IDE_TFLAG_CUSTOM_HANDLER	= BIT(2),
-	IDE_TFLAG_DMA_PIO_FALLBACK	= BIT(3),
-	/* force 16-bit I/O operations */
-	IDE_TFLAG_IO_16BIT		= BIT(4),
-	/* struct ide_cmd was allocated using kmalloc() */
-	IDE_TFLAG_DYN			= BIT(5),
-	IDE_TFLAG_FS			= BIT(6),
-	IDE_TFLAG_MULTI_PIO		= BIT(7),
-	IDE_TFLAG_SET_XFER		= BIT(8),
-};
-
-enum {
-	IDE_FTFLAG_FLAGGED		= BIT(0),
-	IDE_FTFLAG_SET_IN_FLAGS		= BIT(1),
-	IDE_FTFLAG_OUT_DATA		= BIT(2),
-	IDE_FTFLAG_IN_DATA		= BIT(3),
-};
-
-struct ide_taskfile {
-	u8	data;		/* 0: data byte (for TASKFILE ioctl) */
-	union {			/* 1: */
-		u8 error;	/*  read: error */
-		u8 feature;	/* write: feature */
-	};
-	u8	nsect;		/* 2: number of sectors */
-	u8	lbal;		/* 3: LBA low */
-	u8	lbam;		/* 4: LBA mid */
-	u8	lbah;		/* 5: LBA high */
-	u8	device;		/* 6: device select */
-	union {			/* 7: */
-		u8 status;	/*  read: status */
-		u8 command;	/* write: command */
-	};
-};
-
-struct ide_cmd {
-	struct ide_taskfile	tf;
-	struct ide_taskfile	hob;
-	struct {
-		struct {
-			u8		tf;
-			u8		hob;
-		} out, in;
-	} valid;
-
-	u16			tf_flags;
-	u8			ftf_flags;	/* for TASKFILE ioctl */
-	int			protocol;
-
-	int			sg_nents;	  /* number of sg entries */
-	int			orig_sg_nents;
-	int			sg_dma_direction; /* DMA transfer direction */
-
-	unsigned int		nbytes;
-	unsigned int		nleft;
-	unsigned int		last_xfer_len;
-
-	struct scatterlist	*cursg;
-	unsigned int		cursg_ofs;
-
-	struct request		*rq;		/* copy of request */
-};
-
-/* ATAPI packet command flags */
-enum {
-	/* set when an error is considered normal - no retry (ide-tape) */
-	PC_FLAG_ABORT			= BIT(0),
-	PC_FLAG_SUPPRESS_ERROR		= BIT(1),
-	PC_FLAG_WAIT_FOR_DSC		= BIT(2),
-	PC_FLAG_DMA_OK			= BIT(3),
-	PC_FLAG_DMA_IN_PROGRESS		= BIT(4),
-	PC_FLAG_DMA_ERROR		= BIT(5),
-	PC_FLAG_WRITING			= BIT(6),
-};
-
-#define ATAPI_WAIT_PC		(60 * HZ)
-
-struct ide_atapi_pc {
-	/* actual packet bytes */
-	u8 c[12];
-	/* incremented on each retry */
-	int retries;
-	int error;
-
-	/* bytes to transfer */
-	int req_xfer;
-
-	/* the corresponding request */
-	struct request *rq;
-
-	unsigned long flags;
-
-	/*
-	 * those are more or less driver-specific and some of them are subject
-	 * to change/removal later.
-	 */
-	unsigned long timeout;
-};
-
-struct ide_devset;
-struct ide_driver;
-
-#ifdef CONFIG_BLK_DEV_IDEACPI
-struct ide_acpi_drive_link;
-struct ide_acpi_hwif_link;
-#endif
-
-struct ide_drive_s;
-
-struct ide_disk_ops {
-	int		(*check)(struct ide_drive_s *, const char *);
-	int		(*get_capacity)(struct ide_drive_s *);
-	void		(*unlock_native_capacity)(struct ide_drive_s *);
-	void		(*setup)(struct ide_drive_s *);
-	void		(*flush)(struct ide_drive_s *);
-	int		(*init_media)(struct ide_drive_s *, struct gendisk *);
-	int		(*set_doorlock)(struct ide_drive_s *, struct gendisk *,
-					int);
-	ide_startstop_t	(*do_request)(struct ide_drive_s *, struct request *,
-				      sector_t);
-	int		(*ioctl)(struct ide_drive_s *, struct block_device *,
-				 fmode_t, unsigned int, unsigned long);
-	int		(*compat_ioctl)(struct ide_drive_s *, struct block_device *,
-					fmode_t, unsigned int, unsigned long);
-};
-
-/* ATAPI device flags */
-enum {
-	IDE_AFLAG_DRQ_INTERRUPT		= BIT(0),
-
-	/* ide-cd */
-	/* Drive cannot eject the disc. */
-	IDE_AFLAG_NO_EJECT		= BIT(1),
-	/* Drive is a pre ATAPI 1.2 drive. */
-	IDE_AFLAG_PRE_ATAPI12		= BIT(2),
-	/* TOC addresses are in BCD. */
-	IDE_AFLAG_TOCADDR_AS_BCD	= BIT(3),
-	/* TOC track numbers are in BCD. */
-	IDE_AFLAG_TOCTRACKS_AS_BCD	= BIT(4),
-	/* Saved TOC information is current. */
-	IDE_AFLAG_TOC_VALID		= BIT(6),
-	/* We think that the drive door is locked. */
-	IDE_AFLAG_DOOR_LOCKED		= BIT(7),
-	/* SET_CD_SPEED command is unsupported. */
-	IDE_AFLAG_NO_SPEED_SELECT	= BIT(8),
-	IDE_AFLAG_VERTOS_300_SSD	= BIT(9),
-	IDE_AFLAG_VERTOS_600_ESD	= BIT(10),
-	IDE_AFLAG_SANYO_3CD		= BIT(11),
-	IDE_AFLAG_FULL_CAPS_PAGE	= BIT(12),
-	IDE_AFLAG_PLAY_AUDIO_OK		= BIT(13),
-	IDE_AFLAG_LE_SPEED_FIELDS	= BIT(14),
-
-	/* ide-floppy */
-	/* Avoid commands not supported in Clik drive */
-	IDE_AFLAG_CLIK_DRIVE		= BIT(15),
-	/* Requires BH algorithm for packets */
-	IDE_AFLAG_ZIP_DRIVE		= BIT(16),
-	/* Supports format progress report */
-	IDE_AFLAG_SRFP			= BIT(17),
-
-	/* ide-tape */
-	IDE_AFLAG_IGNORE_DSC		= BIT(18),
-	/* 0 When the tape position is unknown */
-	IDE_AFLAG_ADDRESS_VALID		= BIT(19),
-	/* Device already opened */
-	IDE_AFLAG_BUSY			= BIT(20),
-	/* Attempt to auto-detect the current user block size */
-	IDE_AFLAG_DETECT_BS		= BIT(21),
-	/* Currently on a filemark */
-	IDE_AFLAG_FILEMARK		= BIT(22),
-	/* 0 = no tape is loaded, so we don't rewind after ejecting */
-	IDE_AFLAG_MEDIUM_PRESENT	= BIT(23),
-
-	IDE_AFLAG_NO_AUTOCLOSE		= BIT(24),
-};
-
-/* device flags */
-enum {
-	/* restore settings after device reset */
-	IDE_DFLAG_KEEP_SETTINGS		= BIT(0),
-	/* device is using DMA for read/write */
-	IDE_DFLAG_USING_DMA		= BIT(1),
-	/* okay to unmask other IRQs */
-	IDE_DFLAG_UNMASK		= BIT(2),
-	/* don't attempt flushes */
-	IDE_DFLAG_NOFLUSH		= BIT(3),
-	/* DSC overlap */
-	IDE_DFLAG_DSC_OVERLAP		= BIT(4),
-	/* give potential excess bandwidth */
-	IDE_DFLAG_NICE1			= BIT(5),
-	/* device is physically present */
-	IDE_DFLAG_PRESENT		= BIT(6),
-	/* disable Host Protected Area */
-	IDE_DFLAG_NOHPA			= BIT(7),
-	/* id read from device (synthetic if not set) */
-	IDE_DFLAG_ID_READ		= BIT(8),
-	IDE_DFLAG_NOPROBE		= BIT(9),
-	/* need to do check_media_change() */
-	IDE_DFLAG_REMOVABLE		= BIT(10),
-	IDE_DFLAG_FORCED_GEOM		= BIT(12),
-	/* disallow setting unmask bit */
-	IDE_DFLAG_NO_UNMASK		= BIT(13),
-	/* disallow enabling 32-bit I/O */
-	IDE_DFLAG_NO_IO_32BIT		= BIT(14),
-	/* for removable only: door lock/unlock works */
-	IDE_DFLAG_DOORLOCKING		= BIT(15),
-	/* disallow DMA */
-	IDE_DFLAG_NODMA			= BIT(16),
-	/* powermanagement told us not to do anything, so sleep nicely */
-	IDE_DFLAG_BLOCKED		= BIT(17),
-	/* sleeping & sleep field valid */
-	IDE_DFLAG_SLEEPING		= BIT(18),
-	IDE_DFLAG_POST_RESET		= BIT(19),
-	IDE_DFLAG_UDMA33_WARNED		= BIT(20),
-	IDE_DFLAG_LBA48			= BIT(21),
-	/* status of write cache */
-	IDE_DFLAG_WCACHE		= BIT(22),
-	/* used for ignoring ATA_DF */
-	IDE_DFLAG_NOWERR		= BIT(23),
-	/* retrying in PIO */
-	IDE_DFLAG_DMA_PIO_RETRY		= BIT(24),
-	IDE_DFLAG_LBA			= BIT(25),
-	/* don't unload heads */
-	IDE_DFLAG_NO_UNLOAD		= BIT(26),
-	/* heads unloaded, please don't reset port */
-	IDE_DFLAG_PARKED		= BIT(27),
-	IDE_DFLAG_MEDIA_CHANGED		= BIT(28),
-	/* write protect */
-	IDE_DFLAG_WP			= BIT(29),
-	IDE_DFLAG_FORMAT_IN_PROGRESS	= BIT(30),
-	IDE_DFLAG_NIEN_QUIRK		= BIT(31),
-};
-
-struct ide_drive_s {
-	char		name[4];	/* drive name, such as "hda" */
-        char            driver_req[10];	/* requests specific driver */
-
-	struct request_queue	*queue;	/* request queue */
-
-	bool (*prep_rq)(struct ide_drive_s *, struct request *);
-
-	struct blk_mq_tag_set	tag_set;
-
-	struct request		*rq;	/* current request */
-	void		*driver_data;	/* extra driver data */
-	u16			*id;	/* identification info */
-#ifdef CONFIG_IDE_PROC_FS
-	struct proc_dir_entry *proc;	/* /proc/ide/ directory entry */
-	const struct ide_proc_devset *settings; /* /proc/ide/ drive settings */
-#endif
-	struct hwif_s		*hwif;	/* actually (ide_hwif_t *) */
-
-	const struct ide_disk_ops *disk_ops;
-
-	unsigned long dev_flags;
-
-	unsigned long sleep;		/* sleep until this time */
-	unsigned long timeout;		/* max time to wait for irq */
-
-	u8	special_flags;		/* special action flags */
-
-	u8	select;			/* basic drive/head select reg value */
-	u8	retry_pio;		/* retrying dma capable host in pio */
-	u8	waiting_for_dma;	/* dma currently in progress */
-	u8	dma;			/* atapi dma flag */
-
-        u8	init_speed;	/* transfer rate set at boot */
-        u8	current_speed;	/* current transfer rate set */
-	u8	desired_speed;	/* desired transfer rate set */
-	u8	pio_mode;	/* for ->set_pio_mode _only_ */
-	u8	dma_mode;	/* for ->set_dma_mode _only_ */
-	u8	dn;		/* now wide spread use */
-	u8	acoustic;	/* acoustic management */
-	u8	media;		/* disk, cdrom, tape, floppy, ... */
-	u8	ready_stat;	/* min status value for drive ready */
-	u8	mult_count;	/* current multiple sector setting */
-	u8	mult_req;	/* requested multiple sector setting */
-	u8	io_32bit;	/* 0=16-bit, 1=32-bit, 2/3=32bit+sync */
-	u8	bad_wstat;	/* used for ignoring ATA_DF */
-	u8	head;		/* "real" number of heads */
-	u8	sect;		/* "real" sectors per track */
-	u8	bios_head;	/* BIOS/fdisk/LILO number of heads */
-	u8	bios_sect;	/* BIOS/fdisk/LILO sectors per track */
-
-	/* delay this long before sending packet command */
-	u8 pc_delay;
-
-	unsigned int	bios_cyl;	/* BIOS/fdisk/LILO number of cyls */
-	unsigned int	cyl;		/* "real" number of cyls */
-	void		*drive_data;	/* used by set_pio_mode/dev_select() */
-	unsigned int	failures;	/* current failure count */
-	unsigned int	max_failures;	/* maximum allowed failure count */
-	u64		probed_capacity;/* initial/native media capacity */
-	u64		capacity64;	/* total number of sectors */
-
-	int		lun;		/* logical unit */
-	int		crc_count;	/* crc counter to reduce drive speed */
-
-	unsigned long	debug_mask;	/* debugging levels switch */
-
-#ifdef CONFIG_BLK_DEV_IDEACPI
-	struct ide_acpi_drive_link *acpidata;
-#endif
-	struct list_head list;
-	struct device	gendev;
-	struct completion gendev_rel_comp;	/* to deal with device release() */
-
-	/* current packet command */
-	struct ide_atapi_pc *pc;
-
-	/* last failed packet command */
-	struct ide_atapi_pc *failed_pc;
-
-	/* callback for packet commands */
-	int  (*pc_callback)(struct ide_drive_s *, int);
-
-	ide_startstop_t (*irq_handler)(struct ide_drive_s *);
-
-	unsigned long atapi_flags;
-
-	struct ide_atapi_pc request_sense_pc;
-
-	/* current sense rq and buffer */
-	bool sense_rq_armed;
-	bool sense_rq_active;
-	struct request *sense_rq;
-	struct request_sense sense_data;
-
-	/* async sense insertion */
-	struct work_struct rq_work;
-	struct list_head rq_list;
-};
-
-typedef struct ide_drive_s ide_drive_t;
-
-#define to_ide_device(dev)		container_of(dev, ide_drive_t, gendev)
-
-#define to_ide_drv(obj, cont_type)	\
-	container_of(obj, struct cont_type, dev)
-
-#define ide_drv_g(disk, cont_type)	\
-	container_of((disk)->private_data, struct cont_type, driver)
-
-struct ide_port_info;
-
-struct ide_tp_ops {
-	void	(*exec_command)(struct hwif_s *, u8);
-	u8	(*read_status)(struct hwif_s *);
-	u8	(*read_altstatus)(struct hwif_s *);
-	void	(*write_devctl)(struct hwif_s *, u8);
-
-	void	(*dev_select)(ide_drive_t *);
-	void	(*tf_load)(ide_drive_t *, struct ide_taskfile *, u8);
-	void	(*tf_read)(ide_drive_t *, struct ide_taskfile *, u8);
-
-	void	(*input_data)(ide_drive_t *, struct ide_cmd *,
-			      void *, unsigned int);
-	void	(*output_data)(ide_drive_t *, struct ide_cmd *,
-			       void *, unsigned int);
-};
-
-extern const struct ide_tp_ops default_tp_ops;
-
-/**
- * struct ide_port_ops - IDE port operations
- *
- * @init_dev:		host specific initialization of a device
- * @set_pio_mode:	routine to program host for PIO mode
- * @set_dma_mode:	routine to program host for DMA mode
- * @reset_poll:		chipset polling based on hba specifics
- * @pre_reset:		chipset specific changes to default for device-hba resets
- * @resetproc:		routine to reset controller after a disk reset
- * @maskproc:		special host masking for drive selection
- * @quirkproc:		check host's drive quirk list
- * @clear_irq:		clear IRQ
- *
- * @mdma_filter:	filter MDMA modes
- * @udma_filter:	filter UDMA modes
- *
- * @cable_detect:	detect cable type
- */
-struct ide_port_ops {
-	void	(*init_dev)(ide_drive_t *);
-	void	(*set_pio_mode)(struct hwif_s *, ide_drive_t *);
-	void	(*set_dma_mode)(struct hwif_s *, ide_drive_t *);
-	blk_status_t (*reset_poll)(ide_drive_t *);
-	void	(*pre_reset)(ide_drive_t *);
-	void	(*resetproc)(ide_drive_t *);
-	void	(*maskproc)(ide_drive_t *, int);
-	void	(*quirkproc)(ide_drive_t *);
-	void	(*clear_irq)(ide_drive_t *);
-	int	(*test_irq)(struct hwif_s *);
-
-	u8	(*mdma_filter)(ide_drive_t *);
-	u8	(*udma_filter)(ide_drive_t *);
-
-	u8	(*cable_detect)(struct hwif_s *);
-};
-
-struct ide_dma_ops {
-	void	(*dma_host_set)(struct ide_drive_s *, int);
-	int	(*dma_setup)(struct ide_drive_s *, struct ide_cmd *);
-	void	(*dma_start)(struct ide_drive_s *);
-	int	(*dma_end)(struct ide_drive_s *);
-	int	(*dma_test_irq)(struct ide_drive_s *);
-	void	(*dma_lost_irq)(struct ide_drive_s *);
-	/* below ones are optional */
-	int	(*dma_check)(struct ide_drive_s *, struct ide_cmd *);
-	int	(*dma_timer_expiry)(struct ide_drive_s *);
-	void	(*dma_clear)(struct ide_drive_s *);
-	/*
-	 * The following method is optional and only required to be
-	 * implemented for the SFF-8038i compatible controllers.
-	 */
-	u8	(*dma_sff_read_status)(struct hwif_s *);
-};
-
-enum {
-	IDE_PFLAG_PROBING		= BIT(0),
-};
-
-struct ide_host;
-
-typedef struct hwif_s {
-	struct hwif_s *mate;		/* other hwif from same PCI chip */
-	struct proc_dir_entry *proc;	/* /proc/ide/ directory entry */
-
-	struct ide_host *host;
-
-	char name[6];			/* name of interface, eg. "ide0" */
-
-	struct ide_io_ports	io_ports;
-
-	unsigned long	sata_scr[SATA_NR_PORTS];
-
-	ide_drive_t	*devices[MAX_DRIVES + 1];
-
-	unsigned long	port_flags;
-
-	u8 major;	/* our major number */
-	u8 index;	/* 0 for ide0; 1 for ide1; ... */
-	u8 channel;	/* for dual-port chips: 0=primary, 1=secondary */
-
-	u32 host_flags;
-
-	u8 pio_mask;
-
-	u8 ultra_mask;
-	u8 mwdma_mask;
-	u8 swdma_mask;
-
-	u8 cbl;		/* cable type */
-
-	hwif_chipset_t chipset;	/* sub-module for tuning.. */
-
-	struct device *dev;
-
-	void (*rw_disk)(ide_drive_t *, struct request *);
-
-	const struct ide_tp_ops		*tp_ops;
-	const struct ide_port_ops	*port_ops;
-	const struct ide_dma_ops	*dma_ops;
-
-	/* dma physical region descriptor table (cpu view) */
-	unsigned int	*dmatable_cpu;
-	/* dma physical region descriptor table (dma view) */
-	dma_addr_t	dmatable_dma;
-
-	/* maximum number of PRD table entries */
-	int prd_max_nents;
-	/* PRD entry size in bytes */
-	int prd_ent_size;
-
-	/* Scatter-gather list used to build the above */
-	struct scatterlist *sg_table;
-	int sg_max_nents;		/* Maximum number of entries in it */
-
-	struct ide_cmd cmd;		/* current command */
-
-	int		rqsize;		/* max sectors per request */
-	int		irq;		/* our irq number */
-
-	unsigned long	dma_base;	/* base addr for dma ports */
-
-	unsigned long	config_data;	/* for use by chipset-specific code */
-	unsigned long	select_data;	/* for use by chipset-specific code */
-
-	unsigned long	extra_base;	/* extra addr for dma ports */
-	unsigned	extra_ports;	/* number of extra dma ports */
-
-	unsigned	present    : 1;	/* this interface exists */
-	unsigned	busy	   : 1; /* serializes devices on a port */
-
-	struct device		gendev;
-	struct device		*portdev;
-
-	struct completion gendev_rel_comp; /* To deal with device release() */
-
-	void		*hwif_data;	/* extra hwif data */
-
-#ifdef CONFIG_BLK_DEV_IDEACPI
-	struct ide_acpi_hwif_link *acpidata;
-#endif
-
-	/* IRQ handler, if active */
-	ide_startstop_t	(*handler)(ide_drive_t *);
-
-	/* BOOL: polling active & poll_timeout field valid */
-	unsigned int polling : 1;
-
-	/* current drive */
-	ide_drive_t *cur_dev;
-
-	/* current request */
-	struct request *rq;
-
-	/* failsafe timer */
-	struct timer_list timer;
-	/* timeout value during long polls */
-	unsigned long poll_timeout;
-	/* queried upon timeouts */
-	int (*expiry)(ide_drive_t *);
-
-	int req_gen;
-	int req_gen_timer;
-
-	spinlock_t lock;
-} ____cacheline_internodealigned_in_smp ide_hwif_t;
-
-#define MAX_HOST_PORTS 4
-
-struct ide_host {
-	ide_hwif_t	*ports[MAX_HOST_PORTS + 1];
-	unsigned int	n_ports;
-	struct device	*dev[2];
-
-	int		(*init_chipset)(struct pci_dev *);
-
-	void		(*get_lock)(irq_handler_t, void *);
-	void		(*release_lock)(void);
-
-	irq_handler_t	irq_handler;
-
-	unsigned long	host_flags;
-
-	int		irq_flags;
-
-	void		*host_priv;
-	ide_hwif_t	*cur_port;	/* for hosts requiring serialization */
-
-	/* used for hosts requiring serialization */
-	volatile unsigned long	host_busy;
-};
-
-#define IDE_HOST_BUSY 0
-
-/*
- *  internal ide interrupt handler type
- */
-typedef ide_startstop_t (ide_handler_t)(ide_drive_t *);
-typedef int (ide_expiry_t)(ide_drive_t *);
-
-/* used by ide-cd, ide-floppy, etc. */
-typedef void (xfer_func_t)(ide_drive_t *, struct ide_cmd *, void *, unsigned);
-
-extern struct mutex ide_setting_mtx;
-
-/*
- * configurable drive settings
- */
-
-#define DS_SYNC	BIT(0)
-
-struct ide_devset {
-	int		(*get)(ide_drive_t *);
-	int		(*set)(ide_drive_t *, int);
-	unsigned int	flags;
-};
-
-#define __DEVSET(_flags, _get, _set) { \
-	.flags	= _flags, \
-	.get	= _get,	\
-	.set	= _set,	\
-}
-
-#define ide_devset_get(name, field) \
-static int get_##name(ide_drive_t *drive) \
-{ \
-	return drive->field; \
-}
-
-#define ide_devset_set(name, field) \
-static int set_##name(ide_drive_t *drive, int arg) \
-{ \
-	drive->field = arg; \
-	return 0; \
-}
-
-#define ide_devset_get_flag(name, flag) \
-static int get_##name(ide_drive_t *drive) \
-{ \
-	return !!(drive->dev_flags & flag); \
-}
-
-#define ide_devset_set_flag(name, flag) \
-static int set_##name(ide_drive_t *drive, int arg) \
-{ \
-	if (arg) \
-		drive->dev_flags |= flag; \
-	else \
-		drive->dev_flags &= ~flag; \
-	return 0; \
-}
-
-#define __IDE_DEVSET(_name, _flags, _get, _set) \
-const struct ide_devset ide_devset_##_name = \
-	__DEVSET(_flags, _get, _set)
-
-#define IDE_DEVSET(_name, _flags, _get, _set) \
-static __IDE_DEVSET(_name, _flags, _get, _set)
-
-#define ide_devset_rw(_name, _func) \
-IDE_DEVSET(_name, 0, get_##_func, set_##_func)
-
-#define ide_devset_w(_name, _func) \
-IDE_DEVSET(_name, 0, NULL, set_##_func)
-
-#define ide_ext_devset_rw(_name, _func) \
-__IDE_DEVSET(_name, 0, get_##_func, set_##_func)
-
-#define ide_ext_devset_rw_sync(_name, _func) \
-__IDE_DEVSET(_name, DS_SYNC, get_##_func, set_##_func)
-
-#define ide_decl_devset(_name) \
-extern const struct ide_devset ide_devset_##_name
-
-ide_decl_devset(io_32bit);
-ide_decl_devset(keepsettings);
-ide_decl_devset(pio_mode);
-ide_decl_devset(unmaskirq);
-ide_decl_devset(using_dma);
-
-#ifdef CONFIG_IDE_PROC_FS
-/*
- * /proc/ide interface
- */
-
-#define ide_devset_rw_field(_name, _field) \
-ide_devset_get(_name, _field); \
-ide_devset_set(_name, _field); \
-IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name)
-
-#define ide_devset_ro_field(_name, _field) \
-ide_devset_get(_name, _field); \
-IDE_DEVSET(_name, 0, get_##_name, NULL)
-
-#define ide_devset_rw_flag(_name, _field) \
-ide_devset_get_flag(_name, _field); \
-ide_devset_set_flag(_name, _field); \
-IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name)
-
-struct ide_proc_devset {
-	const char		*name;
-	const struct ide_devset	*setting;
-	int			min, max;
-	int			(*mulf)(ide_drive_t *);
-	int			(*divf)(ide_drive_t *);
-};
-
-#define __IDE_PROC_DEVSET(_name, _min, _max, _mulf, _divf) { \
-	.name = __stringify(_name), \
-	.setting = &ide_devset_##_name, \
-	.min = _min, \
-	.max = _max, \
-	.mulf = _mulf, \
-	.divf = _divf, \
-}
-
-#define IDE_PROC_DEVSET(_name, _min, _max) \
-__IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL)
-
-typedef struct {
-	const char	*name;
-	umode_t		mode;
-	int (*show)(struct seq_file *, void *);
-} ide_proc_entry_t;
-
-void proc_ide_create(void);
-void proc_ide_destroy(void);
-void ide_proc_register_port(ide_hwif_t *);
-void ide_proc_port_register_devices(ide_hwif_t *);
-void ide_proc_unregister_device(ide_drive_t *);
-void ide_proc_unregister_port(ide_hwif_t *);
-void ide_proc_register_driver(ide_drive_t *, struct ide_driver *);
-void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *);
-
-int ide_capacity_proc_show(struct seq_file *m, void *v);
-int ide_geometry_proc_show(struct seq_file *m, void *v);
-#else
-static inline void proc_ide_create(void) { ; }
-static inline void proc_ide_destroy(void) { ; }
-static inline void ide_proc_register_port(ide_hwif_t *hwif) { ; }
-static inline void ide_proc_port_register_devices(ide_hwif_t *hwif) { ; }
-static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; }
-static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; }
-static inline void ide_proc_register_driver(ide_drive_t *drive,
-					    struct ide_driver *driver) { ; }
-static inline void ide_proc_unregister_driver(ide_drive_t *drive,
-					      struct ide_driver *driver) { ; }
-#endif
-
-enum {
-	/* enter/exit functions */
-	IDE_DBG_FUNC =			BIT(0),
-	/* sense key/asc handling */
-	IDE_DBG_SENSE =			BIT(1),
-	/* packet commands handling */
-	IDE_DBG_PC =			BIT(2),
-	/* request handling */
-	IDE_DBG_RQ =			BIT(3),
-	/* driver probing/setup */
-	IDE_DBG_PROBE =			BIT(4),
-};
-
-/* DRV_NAME has to be defined in the driver before using the macro below */
-#define __ide_debug_log(lvl, fmt, args...)				\
-{									\
-	if (unlikely(drive->debug_mask & lvl))				\
-		printk(KERN_INFO DRV_NAME ": %s: " fmt "\n",		\
-					  __func__, ## args);		\
-}
-
-/*
- * Power Management state machine (rq->pm->pm_step).
- *
- * For each step, the core calls ide_start_power_step() first.
- * This can return:
- *	- ide_stopped :	In this case, the core calls us back again unless
- *			step have been set to ide_power_state_completed.
- *	- ide_started :	In this case, the channel is left busy until an
- *			async event (interrupt) occurs.
- * Typically, ide_start_power_step() will issue a taskfile request with
- * do_rw_taskfile().
- *
- * Upon reception of the interrupt, the core will call ide_complete_power_step()
- * with the error code if any. This routine should update the step value
- * and return. It should not start a new request. The core will call
- * ide_start_power_step() for the new step value, unless step have been
- * set to IDE_PM_COMPLETED.
- */
-enum {
-	IDE_PM_START_SUSPEND,
-	IDE_PM_FLUSH_CACHE	= IDE_PM_START_SUSPEND,
-	IDE_PM_STANDBY,
-
-	IDE_PM_START_RESUME,
-	IDE_PM_RESTORE_PIO	= IDE_PM_START_RESUME,
-	IDE_PM_IDLE,
-	IDE_PM_RESTORE_DMA,
-
-	IDE_PM_COMPLETED,
-};
-
-int generic_ide_suspend(struct device *, pm_message_t);
-int generic_ide_resume(struct device *);
-
-void ide_complete_power_step(ide_drive_t *, struct request *);
-ide_startstop_t ide_start_power_step(ide_drive_t *, struct request *);
-void ide_complete_pm_rq(ide_drive_t *, struct request *);
-void ide_check_pm_state(ide_drive_t *, struct request *);
-
-/*
- * Subdrivers support.
- *
- * The gendriver.owner field should be set to the module owner of this driver.
- * The gendriver.name field should be set to the name of this driver
- */
-struct ide_driver {
-	const char			*version;
-	ide_startstop_t	(*do_request)(ide_drive_t *, struct request *, sector_t);
-	struct device_driver	gen_driver;
-	int		(*probe)(ide_drive_t *);
-	void		(*remove)(ide_drive_t *);
-	void		(*resume)(ide_drive_t *);
-	void		(*shutdown)(ide_drive_t *);
-#ifdef CONFIG_IDE_PROC_FS
-	ide_proc_entry_t *		(*proc_entries)(ide_drive_t *);
-	const struct ide_proc_devset *	(*proc_devsets)(ide_drive_t *);
-#endif
-};
-
-#define to_ide_driver(drv) container_of(drv, struct ide_driver, gen_driver)
-
-int ide_device_get(ide_drive_t *);
-void ide_device_put(ide_drive_t *);
-
-struct ide_ioctl_devset {
-	unsigned int	get_ioctl;
-	unsigned int	set_ioctl;
-	const struct ide_devset *setting;
-};
-
-int ide_setting_ioctl(ide_drive_t *, struct block_device *, unsigned int,
-		      unsigned long, const struct ide_ioctl_devset *);
-
-int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned long);
-
-extern int ide_vlb_clk;
-extern int ide_pci_clk;
-
-int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int);
-void ide_kill_rq(ide_drive_t *, struct request *);
-void ide_insert_request_head(ide_drive_t *, struct request *);
-
-void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
-void ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int);
-
-void ide_execute_command(ide_drive_t *, struct ide_cmd *, ide_handler_t *,
-			 unsigned int);
-
-void ide_pad_transfer(ide_drive_t *, int, int);
-
-ide_startstop_t ide_error(ide_drive_t *, const char *, u8);
-
-void ide_fix_driveid(u16 *);
-
-extern void ide_fixstring(u8 *, const int, const int);
-
-int ide_busy_sleep(ide_drive_t *, unsigned long, int);
-
-int __ide_wait_stat(ide_drive_t *, u8, u8, unsigned long, u8 *);
-int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
-
-ide_startstop_t ide_do_park_unpark(ide_drive_t *, struct request *);
-ide_startstop_t ide_do_devset(ide_drive_t *, struct request *);
-
-extern ide_startstop_t ide_do_reset (ide_drive_t *);
-
-extern int ide_devset_execute(ide_drive_t *drive,
-			      const struct ide_devset *setting, int arg);
-
-void ide_complete_cmd(ide_drive_t *, struct ide_cmd *, u8, u8);
-int ide_complete_rq(ide_drive_t *, blk_status_t, unsigned int);
-
-void ide_tf_readback(ide_drive_t *drive, struct ide_cmd *cmd);
-void ide_tf_dump(const char *, struct ide_cmd *);
-
-void ide_exec_command(ide_hwif_t *, u8);
-u8 ide_read_status(ide_hwif_t *);
-u8 ide_read_altstatus(ide_hwif_t *);
-void ide_write_devctl(ide_hwif_t *, u8);
-
-void ide_dev_select(ide_drive_t *);
-void ide_tf_load(ide_drive_t *, struct ide_taskfile *, u8);
-void ide_tf_read(ide_drive_t *, struct ide_taskfile *, u8);
-
-void ide_input_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int);
-void ide_output_data(ide_drive_t *, struct ide_cmd *, void *, unsigned int);
-
-void SELECT_MASK(ide_drive_t *, int);
-
-u8 ide_read_error(ide_drive_t *);
-void ide_read_bcount_and_ireason(ide_drive_t *, u16 *, u8 *);
-
-int ide_check_ireason(ide_drive_t *, struct request *, int, int, int);
-
-int ide_check_atapi_device(ide_drive_t *, const char *);
-
-void ide_init_pc(struct ide_atapi_pc *);
-
-/* Disk head parking */
-extern wait_queue_head_t ide_park_wq;
-ssize_t ide_park_show(struct device *dev, struct device_attribute *attr,
-		      char *buf);
-ssize_t ide_park_store(struct device *dev, struct device_attribute *attr,
-		       const char *buf, size_t len);
-
-/*
- * Special requests for ide-tape block device strategy routine.
- *
- * In order to service a character device command, we add special requests to
- * the tail of our block device request queue and wait for their completion.
- */
-enum {
-	REQ_IDETAPE_PC1		= BIT(0), /* packet command (first stage) */
-	REQ_IDETAPE_PC2		= BIT(1), /* packet command (second stage) */
-	REQ_IDETAPE_READ	= BIT(2),
-	REQ_IDETAPE_WRITE	= BIT(3),
-};
-
-int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *,
-		      void *, unsigned int);
-
-int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
-int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
-int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
-void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
-void ide_retry_pc(ide_drive_t *drive);
-
-void ide_prep_sense(ide_drive_t *drive, struct request *rq);
-int ide_queue_sense_rq(ide_drive_t *drive, void *special);
-
-int ide_cd_expiry(ide_drive_t *);
-
-int ide_cd_get_xferlen(struct request *);
-
-ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_cmd *);
-
-ide_startstop_t do_rw_taskfile(ide_drive_t *, struct ide_cmd *);
-
-void ide_pio_bytes(ide_drive_t *, struct ide_cmd *, unsigned int, unsigned int);
-
-void ide_finish_cmd(ide_drive_t *, struct ide_cmd *, u8);
-
-int ide_raw_taskfile(ide_drive_t *, struct ide_cmd *, u8 *, u16);
-int ide_no_data_taskfile(ide_drive_t *, struct ide_cmd *);
-
-int ide_taskfile_ioctl(ide_drive_t *, unsigned long);
-
-int ide_dev_read_id(ide_drive_t *, u8, u16 *, int);
-
-extern int ide_driveid_update(ide_drive_t *);
-extern int ide_config_drive_speed(ide_drive_t *, u8);
-extern u8 eighty_ninty_three (ide_drive_t *);
-extern int taskfile_lib_get_identify(ide_drive_t *drive, u8 *);
-
-extern int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout);
-
-extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
-
-extern void ide_timer_expiry(struct timer_list *t);
-extern irqreturn_t ide_intr(int irq, void *dev_id);
-extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
-extern blk_status_t ide_issue_rq(ide_drive_t *, struct request *, bool);
-extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq);
-
-void ide_init_disk(struct gendisk *, ide_drive_t *);
-
-#ifdef CONFIG_IDEPCI_PCIBUS_ORDER
-extern int __ide_pci_register_driver(struct pci_driver *driver, struct module *owner, const char *mod_name);
-#define ide_pci_register_driver(d) __ide_pci_register_driver(d, THIS_MODULE, KBUILD_MODNAME)
-#else
-#define ide_pci_register_driver(d) pci_register_driver(d)
-#endif
-
-static inline int ide_pci_is_in_compatibility_mode(struct pci_dev *dev)
-{
-	if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 5) != 5)
-		return 1;
-	return 0;
-}
-
-void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *,
-			 struct ide_hw *, struct ide_hw **);
-void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *);
-
-#ifdef CONFIG_BLK_DEV_IDEDMA_PCI
-int ide_pci_set_master(struct pci_dev *, const char *);
-unsigned long ide_pci_dma_base(ide_hwif_t *, const struct ide_port_info *);
-int ide_pci_check_simplex(ide_hwif_t *, const struct ide_port_info *);
-int ide_hwif_setup_dma(ide_hwif_t *, const struct ide_port_info *);
-#else
-static inline int ide_hwif_setup_dma(ide_hwif_t *hwif,
-				     const struct ide_port_info *d)
-{
-	return -EINVAL;
-}
-#endif
-
-struct ide_pci_enablebit {
-	u8	reg;	/* byte pci reg holding the enable-bit */
-	u8	mask;	/* mask to isolate the enable-bit */
-	u8	val;	/* value of masked reg when "enabled" */
-};
-
-enum {
-	/* Uses ISA control ports not PCI ones. */
-	IDE_HFLAG_ISA_PORTS		= BIT(0),
-	/* single port device */
-	IDE_HFLAG_SINGLE		= BIT(1),
-	/* don't use legacy PIO blacklist */
-	IDE_HFLAG_PIO_NO_BLACKLIST	= BIT(2),
-	/* set for the second port of QD65xx */
-	IDE_HFLAG_QD_2ND_PORT		= BIT(3),
-	/* use PIO8/9 for prefetch off/on */
-	IDE_HFLAG_ABUSE_PREFETCH	= BIT(4),
-	/* use PIO6/7 for fast-devsel off/on */
-	IDE_HFLAG_ABUSE_FAST_DEVSEL	= BIT(5),
-	/* use 100-102 and 200-202 PIO values to set DMA modes */
-	IDE_HFLAG_ABUSE_DMA_MODES	= BIT(6),
-	/*
-	 * keep DMA setting when programming PIO mode, may be used only
-	 * for hosts which have separate PIO and DMA timings (ie. PMAC)
-	 */
-	IDE_HFLAG_SET_PIO_MODE_KEEP_DMA	= BIT(7),
-	/* program host for the transfer mode after programming device */
-	IDE_HFLAG_POST_SET_MODE		= BIT(8),
-	/* don't program host/device for the transfer mode ("smart" hosts) */
-	IDE_HFLAG_NO_SET_MODE		= BIT(9),
-	/* trust BIOS for programming chipset/device for DMA */
-	IDE_HFLAG_TRUST_BIOS_FOR_DMA	= BIT(10),
-	/* host is CS5510/CS5520 */
-	IDE_HFLAG_CS5520		= BIT(11),
-	/* ATAPI DMA is unsupported */
-	IDE_HFLAG_NO_ATAPI_DMA		= BIT(12),
-	/* set if host is a "non-bootable" controller */
-	IDE_HFLAG_NON_BOOTABLE		= BIT(13),
-	/* host doesn't support DMA */
-	IDE_HFLAG_NO_DMA		= BIT(14),
-	/* check if host is PCI IDE device before allowing DMA */
-	IDE_HFLAG_NO_AUTODMA		= BIT(15),
-	/* host uses MMIO */
-	IDE_HFLAG_MMIO			= BIT(16),
-	/* no LBA48 */
-	IDE_HFLAG_NO_LBA48		= BIT(17),
-	/* no LBA48 DMA */
-	IDE_HFLAG_NO_LBA48_DMA		= BIT(18),
-	/* data FIFO is cleared by an error */
-	IDE_HFLAG_ERROR_STOPS_FIFO	= BIT(19),
-	/* serialize ports */
-	IDE_HFLAG_SERIALIZE		= BIT(20),
-	/* host is DTC2278 */
-	IDE_HFLAG_DTC2278		= BIT(21),
-	/* 4 devices on a single set of I/O ports */
-	IDE_HFLAG_4DRIVES		= BIT(22),
-	/* host is TRM290 */
-	IDE_HFLAG_TRM290		= BIT(23),
-	/* use 32-bit I/O ops */
-	IDE_HFLAG_IO_32BIT		= BIT(24),
-	/* unmask IRQs */
-	IDE_HFLAG_UNMASK_IRQS		= BIT(25),
-	IDE_HFLAG_BROKEN_ALTSTATUS	= BIT(26),
-	/* serialize ports if DMA is possible (for sl82c105) */
-	IDE_HFLAG_SERIALIZE_DMA		= BIT(27),
-	/* force host out of "simplex" mode */
-	IDE_HFLAG_CLEAR_SIMPLEX		= BIT(28),
-	/* DSC overlap is unsupported */
-	IDE_HFLAG_NO_DSC		= BIT(29),
-	/* never use 32-bit I/O ops */
-	IDE_HFLAG_NO_IO_32BIT		= BIT(30),
-	/* never unmask IRQs */
-	IDE_HFLAG_NO_UNMASK_IRQS	= BIT(31),
-};
-
-#ifdef CONFIG_BLK_DEV_OFFBOARD
-# define IDE_HFLAG_OFF_BOARD	0
-#else
-# define IDE_HFLAG_OFF_BOARD	IDE_HFLAG_NON_BOOTABLE
-#endif
-
-struct ide_port_info {
-	char			*name;
-
-	int			(*init_chipset)(struct pci_dev *);
-
-	void			(*get_lock)(irq_handler_t, void *);
-	void			(*release_lock)(void);
-
-	void			(*init_iops)(ide_hwif_t *);
-	void                    (*init_hwif)(ide_hwif_t *);
-	int			(*init_dma)(ide_hwif_t *,
-					    const struct ide_port_info *);
-
-	const struct ide_tp_ops		*tp_ops;
-	const struct ide_port_ops	*port_ops;
-	const struct ide_dma_ops	*dma_ops;
-
-	struct ide_pci_enablebit	enablebits[2];
-
-	hwif_chipset_t		chipset;
-
-	u16			max_sectors;	/* if < than the default one */
-
-	u32			host_flags;
-
-	int			irq_flags;
-
-	u8			pio_mask;
-	u8			swdma_mask;
-	u8			mwdma_mask;
-	u8			udma_mask;
-};
-
-/*
- * State information carried for REQ_TYPE_ATA_PM_SUSPEND and REQ_TYPE_ATA_PM_RESUME
- * requests.
- */
-struct ide_pm_state {
-	/* PM state machine step value, currently driver specific */
-	int	pm_step;
-	/* requested PM state value (S1, S2, S3, S4, ...) */
-	u32	pm_state;
-	void*	data;		/* for driver use */
-};
-
-
-int ide_pci_init_one(struct pci_dev *, const struct ide_port_info *, void *);
-int ide_pci_init_two(struct pci_dev *, struct pci_dev *,
-		     const struct ide_port_info *, void *);
-void ide_pci_remove(struct pci_dev *);
-
-#ifdef CONFIG_PM
-int ide_pci_suspend(struct pci_dev *, pm_message_t);
-int ide_pci_resume(struct pci_dev *);
-#else
-#define ide_pci_suspend NULL
-#define ide_pci_resume NULL
-#endif
-
-void ide_map_sg(ide_drive_t *, struct ide_cmd *);
-void ide_init_sg_cmd(struct ide_cmd *, unsigned int);
-
-#define BAD_DMA_DRIVE		0
-#define GOOD_DMA_DRIVE		1
-
-struct drive_list_entry {
-	const char *id_model;
-	const char *id_firmware;
-};
-
-int ide_in_drive_list(u16 *, const struct drive_list_entry *);
-
-#ifdef CONFIG_BLK_DEV_IDEDMA
-int ide_dma_good_drive(ide_drive_t *);
-int __ide_dma_bad_drive(ide_drive_t *);
-
-u8 ide_find_dma_mode(ide_drive_t *, u8);
-
-static inline u8 ide_max_dma_mode(ide_drive_t *drive)
-{
-	return ide_find_dma_mode(drive, XFER_UDMA_6);
-}
-
-void ide_dma_off_quietly(ide_drive_t *);
-void ide_dma_off(ide_drive_t *);
-void ide_dma_on(ide_drive_t *);
-int ide_set_dma(ide_drive_t *);
-void ide_check_dma_crc(ide_drive_t *);
-ide_startstop_t ide_dma_intr(ide_drive_t *);
-
-int ide_allocate_dma_engine(ide_hwif_t *);
-void ide_release_dma_engine(ide_hwif_t *);
-
-int ide_dma_prepare(ide_drive_t *, struct ide_cmd *);
-void ide_dma_unmap_sg(ide_drive_t *, struct ide_cmd *);
-
-#ifdef CONFIG_BLK_DEV_IDEDMA_SFF
-int config_drive_for_dma(ide_drive_t *);
-int ide_build_dmatable(ide_drive_t *, struct ide_cmd *);
-void ide_dma_host_set(ide_drive_t *, int);
-int ide_dma_setup(ide_drive_t *, struct ide_cmd *);
-extern void ide_dma_start(ide_drive_t *);
-int ide_dma_end(ide_drive_t *);
-int ide_dma_test_irq(ide_drive_t *);
-int ide_dma_sff_timer_expiry(ide_drive_t *);
-u8 ide_dma_sff_read_status(ide_hwif_t *);
-extern const struct ide_dma_ops sff_dma_ops;
-#else
-static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; }
-#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */
-
-void ide_dma_lost_irq(ide_drive_t *);
-ide_startstop_t ide_dma_timeout_retry(ide_drive_t *, int);
-
-#else
-static inline u8 ide_find_dma_mode(ide_drive_t *drive, u8 speed) { return 0; }
-static inline u8 ide_max_dma_mode(ide_drive_t *drive) { return 0; }
-static inline void ide_dma_off_quietly(ide_drive_t *drive) { ; }
-static inline void ide_dma_off(ide_drive_t *drive) { ; }
-static inline void ide_dma_on(ide_drive_t *drive) { ; }
-static inline void ide_dma_verbose(ide_drive_t *drive) { ; }
-static inline int ide_set_dma(ide_drive_t *drive) { return 1; }
-static inline void ide_check_dma_crc(ide_drive_t *drive) { ; }
-static inline ide_startstop_t ide_dma_intr(ide_drive_t *drive) { return ide_stopped; }
-static inline ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) { return ide_stopped; }
-static inline void ide_release_dma_engine(ide_hwif_t *hwif) { ; }
-static inline int ide_dma_prepare(ide_drive_t *drive,
-				  struct ide_cmd *cmd) { return 1; }
-static inline void ide_dma_unmap_sg(ide_drive_t *drive,
-				    struct ide_cmd *cmd) { ; }
-#endif /* CONFIG_BLK_DEV_IDEDMA */
-
-#ifdef CONFIG_BLK_DEV_IDEACPI
-int ide_acpi_init(void);
-bool ide_port_acpi(ide_hwif_t *hwif);
-extern int ide_acpi_exec_tfs(ide_drive_t *drive);
-extern void ide_acpi_get_timing(ide_hwif_t *hwif);
-extern void ide_acpi_push_timing(ide_hwif_t *hwif);
-void ide_acpi_init_port(ide_hwif_t *);
-void ide_acpi_port_init_devices(ide_hwif_t *);
-extern void ide_acpi_set_state(ide_hwif_t *hwif, int on);
-#else
-static inline int ide_acpi_init(void) { return 0; }
-static inline bool ide_port_acpi(ide_hwif_t *hwif) { return 0; }
-static inline int ide_acpi_exec_tfs(ide_drive_t *drive) { return 0; }
-static inline void ide_acpi_get_timing(ide_hwif_t *hwif) { ; }
-static inline void ide_acpi_push_timing(ide_hwif_t *hwif) { ; }
-static inline void ide_acpi_init_port(ide_hwif_t *hwif) { ; }
-static inline void ide_acpi_port_init_devices(ide_hwif_t *hwif) { ; }
-static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {}
-#endif
-
-void ide_check_nien_quirk_list(ide_drive_t *);
-void ide_undecoded_slave(ide_drive_t *);
-
-void ide_port_apply_params(ide_hwif_t *);
-int ide_sysfs_register_port(ide_hwif_t *);
-
-struct ide_host *ide_host_alloc(const struct ide_port_info *, struct ide_hw **,
-				unsigned int);
-void ide_host_free(struct ide_host *);
-int ide_host_register(struct ide_host *, const struct ide_port_info *,
-		      struct ide_hw **);
-int ide_host_add(const struct ide_port_info *, struct ide_hw **, unsigned int,
-		 struct ide_host **);
-void ide_host_remove(struct ide_host *);
-int ide_legacy_device_add(const struct ide_port_info *, unsigned long);
-void ide_port_unregister_devices(ide_hwif_t *);
-void ide_port_scan(ide_hwif_t *);
-
-static inline void *ide_get_hwifdata (ide_hwif_t * hwif)
-{
-	return hwif->hwif_data;
-}
-
-static inline void ide_set_hwifdata (ide_hwif_t * hwif, void *data)
-{
-	hwif->hwif_data = data;
-}
-
-u64 ide_get_lba_addr(struct ide_cmd *, int);
-u8 ide_dump_status(ide_drive_t *, const char *, u8);
-
-struct ide_timing {
-	u8  mode;
-	u8  setup;	/* t1 */
-	u16 act8b;	/* t2 for 8-bit io */
-	u16 rec8b;	/* t2i for 8-bit io */
-	u16 cyc8b;	/* t0 for 8-bit io */
-	u16 active;	/* t2 or tD */
-	u16 recover;	/* t2i or tK */
-	u16 cycle;	/* t0 */
-	u16 udma;	/* t2CYCTYP/2 */
-};
-
-enum {
-	IDE_TIMING_SETUP	= BIT(0),
-	IDE_TIMING_ACT8B	= BIT(1),
-	IDE_TIMING_REC8B	= BIT(2),
-	IDE_TIMING_CYC8B	= BIT(3),
-	IDE_TIMING_8BIT		= IDE_TIMING_ACT8B | IDE_TIMING_REC8B |
-				  IDE_TIMING_CYC8B,
-	IDE_TIMING_ACTIVE	= BIT(4),
-	IDE_TIMING_RECOVER	= BIT(5),
-	IDE_TIMING_CYCLE	= BIT(6),
-	IDE_TIMING_UDMA		= BIT(7),
-	IDE_TIMING_ALL		= IDE_TIMING_SETUP | IDE_TIMING_8BIT |
-				  IDE_TIMING_ACTIVE | IDE_TIMING_RECOVER |
-				  IDE_TIMING_CYCLE | IDE_TIMING_UDMA,
-};
-
-struct ide_timing *ide_timing_find_mode(u8);
-u16 ide_pio_cycle_time(ide_drive_t *, u8);
-void ide_timing_merge(struct ide_timing *, struct ide_timing *,
-		      struct ide_timing *, unsigned int);
-int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int);
-
-#ifdef CONFIG_IDE_XFER_MODE
-int ide_scan_pio_blacklist(char *);
-const char *ide_xfer_verbose(u8);
-int ide_pio_need_iordy(ide_drive_t *, const u8);
-int ide_set_pio_mode(ide_drive_t *, u8);
-int ide_set_dma_mode(ide_drive_t *, u8);
-void ide_set_pio(ide_drive_t *, u8);
-int ide_set_xfer_rate(ide_drive_t *, u8);
-#else
-static inline void ide_set_pio(ide_drive_t *drive, u8 pio) { ; }
-static inline int ide_set_xfer_rate(ide_drive_t *drive, u8 rate) { return -1; }
-#endif
-
-static inline void ide_set_max_pio(ide_drive_t *drive)
-{
-	ide_set_pio(drive, 255);
-}
-
-char *ide_media_string(ide_drive_t *);
-
-extern const struct attribute_group *ide_dev_groups[];
-extern struct bus_type ide_bus_type;
-extern struct class *ide_port_class;
-
-static inline void ide_dump_identify(u8 *id)
-{
-	print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 2, id, 512, 0);
-}
-
-static inline int hwif_to_node(ide_hwif_t *hwif)
-{
-	return hwif->dev ? dev_to_node(hwif->dev) : -1;
-}
-
-static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive)
-{
-	ide_drive_t *peer = drive->hwif->devices[(drive->dn ^ 1) & 1];
-
-	return (peer->dev_flags & IDE_DFLAG_PRESENT) ? peer : NULL;
-}
-
-static inline void *ide_get_drivedata(ide_drive_t *drive)
-{
-	return drive->drive_data;
-}
-
-static inline void ide_set_drivedata(ide_drive_t *drive, void *data)
-{
-	drive->drive_data = data;
-}
-
-#define ide_port_for_each_dev(i, dev, port) \
-	for ((i) = 0; ((dev) = (port)->devices[i]) || (i) < MAX_DRIVES; (i)++)
-
-#define ide_port_for_each_present_dev(i, dev, port) \
-	for ((i) = 0; ((dev) = (port)->devices[i]) || (i) < MAX_DRIVES; (i)++) \
-		if ((dev)->dev_flags & IDE_DFLAG_PRESENT)
-
-#define ide_host_for_each_port(i, port, host) \
-	for ((i) = 0; ((port) = (host)->ports[i]) || (i) < MAX_HOST_PORTS; (i)++)
-
-
-#endif /* _IDE_H */
-- 
GitLab


From 6eed261f48d5a53f369c88d4296621f2d8647493 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 15 Jun 2021 08:40:04 -0700
Subject: [PATCH 2936/3804] pstore/blk: Improve failure reporting

There was no feedback on bad registration attempts. Add details on the
failure cause.

Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/blk.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index 4bb8a344957af..eca83820fb5dc 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -114,8 +114,22 @@ static int __register_pstore_device(struct pstore_device_info *dev)
 
 	lockdep_assert_held(&pstore_blk_lock);
 
-	if (!dev || !dev->total_size || !dev->read || !dev->write)
+	if (!dev) {
+		pr_err("NULL device info\n");
 		return -EINVAL;
+	}
+	if (!dev->total_size) {
+		pr_err("zero sized device\n");
+		return -EINVAL;
+	}
+	if (!dev->read) {
+		pr_err("no read handler for device\n");
+		return -EINVAL;
+	}
+	if (!dev->write) {
+		pr_err("no write handler for device\n");
+		return -EINVAL;
+	}
 
 	/* someone already registered before */
 	if (pstore_zone_info)
-- 
GitLab


From 2a03ddbde1e1268f15de6f15b09f305a33bff4ba Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Jun 2021 07:51:28 -0700
Subject: [PATCH 2937/3804] pstore/blk: Move verify_size() macro out of
 function

There's no good reason for the verify_size macro to live inside the
function. Move it up with the check_size() macro and fix indenting.

Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/pstore/blk.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index eca83820fb5dc..7d8e5a1ddd5be 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -108,6 +108,17 @@ struct bdev_info {
 	_##name_;						\
 })
 
+#define verify_size(name, alignsize, enabled) {			\
+	long _##name_;						\
+	if (enabled)						\
+		_##name_ = check_size(name, alignsize);		\
+	else							\
+		_##name_ = 0;					\
+	/* Synchronize module parameters with resuls. */	\
+	name = _##name_ / 1024;					\
+	pstore_zone_info->name = _##name_;			\
+}
+
 static int __register_pstore_device(struct pstore_device_info *dev)
 {
 	int ret;
@@ -143,21 +154,10 @@ static int __register_pstore_device(struct pstore_device_info *dev)
 	if (!dev->flags)
 		dev->flags = UINT_MAX;
 
-#define verify_size(name, alignsize, enabled) {				\
-		long _##name_;						\
-		if (enabled)						\
-			_##name_ = check_size(name, alignsize);		\
-		else							\
-			_##name_ = 0;					\
-		name = _##name_ / 1024;					\
-		pstore_zone_info->name = _##name_;			\
-	}
-
 	verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG);
 	verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG);
 	verify_size(console_size, 4096, dev->flags & PSTORE_FLAGS_CONSOLE);
 	verify_size(ftrace_size, 4096, dev->flags & PSTORE_FLAGS_FTRACE);
-#undef verify_size
 
 	pstore_zone_info->total_size = dev->total_size;
 	pstore_zone_info->max_reason = max_reason;
-- 
GitLab


From 171b45a4a70eef2fd36bb794ce4f5a48c440361e Mon Sep 17 00:00:00 2001
From: Andrea Merello <andrea.merello@gmail.com>
Date: Tue, 6 Apr 2021 15:00:44 +0200
Subject: [PATCH 2938/3804] clocksource/drivers/arm_global_timer: Implement
 rate compensation whenever source clock changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds rate change notification support for the parent clock;
should that clock change, then we try to adjust the our prescaler in order
to compensate (i.e. we adjust to still get the same timer frequency).

This is loosely based on what it's done in timer-cadence-ttc. timer-sun51,
mips-gic-timer and smp_twd.c also seem to look at their parent clock rate
and to perform some kind of adjustment whenever needed.

In this particular case we have only one single counter and prescaler for
all clocksource, clockevent and timer_delay, and we just update it for all
(i.e. we don't let it go and call clockevents_update_freq() to notify to
the kernel that our rate has changed).

Note that, there is apparently no other way to fixup things, because once
we call register_current_timer_delay(), specifying the timer rate, it seems
that that rate is not supposed to change ever.

In order for this mechanism to work, we have to make assumptions about how
much the initial clock is supposed to eventually decrease from the initial
one, and set our initial prescaler to a value that we can eventually
decrease enough to compensate. We provide an option in KConfig for this.

In case we end up in a situation in which we are not able to compensate the
parent clock change, we fail returning NOTIFY_BAD.

This fixes a real-world problem with Zynq arch not being able to use this
driver and CPU_FREQ at the same time (because ARM global timer is fed by
the CPU clock, which may keep changing when CPU_FREQ is enabled).

Signed-off-by: Andrea Merello <andrea.merello@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Sören Brinkmann <soren.brinkmann@xilinx.com>
Reviewed-by: Patrice Chotard <patrice.chotard@foss.st.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210406130045.15491-2-andrea.merello@gmail.com
---
 drivers/clocksource/Kconfig            |  13 +++
 drivers/clocksource/arm_global_timer.c | 122 +++++++++++++++++++++++--
 2 files changed, 125 insertions(+), 10 deletions(-)

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 39aa21d01e054..19fc5f8883e03 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -358,6 +358,19 @@ config ARM_GLOBAL_TIMER
 	help
 	  This option enables support for the ARM global timer unit.
 
+config ARM_GT_INITIAL_PRESCALER_VAL
+	int "ARM global timer initial prescaler value"
+	default 1
+	depends on ARM_GLOBAL_TIMER
+	help
+	  When the ARM global timer initializes, its current rate is declared
+	  to the kernel and maintained forever. Should it's parent clock
+	  change, the driver tries to fix the timer's internal prescaler.
+	  On some machs (i.e. Zynq) the initial prescaler value thus poses
+	  bounds about how much the parent clock is allowed to decrease or
+	  increase wrt the initial clock value.
+	  This affects CPU_FREQ max delta from the initial frequency.
+
 config ARM_TIMER_SP804
 	bool "Support for Dual Timer SP804 module" if COMPILE_TEST
 	depends on GENERIC_SCHED_CLOCK && CLKDEV_LOOKUP
diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 88b2d38a7a61a..60a8047fd32e5 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -31,6 +31,10 @@
 #define GT_CONTROL_COMP_ENABLE		BIT(1)	/* banked */
 #define GT_CONTROL_IRQ_ENABLE		BIT(2)	/* banked */
 #define GT_CONTROL_AUTO_INC		BIT(3)	/* banked */
+#define GT_CONTROL_PRESCALER_SHIFT      8
+#define GT_CONTROL_PRESCALER_MAX        0xF
+#define GT_CONTROL_PRESCALER_MASK       (GT_CONTROL_PRESCALER_MAX << \
+					 GT_CONTROL_PRESCALER_SHIFT)
 
 #define GT_INT_STATUS	0x0c
 #define GT_INT_STATUS_EVENT_FLAG	BIT(0)
@@ -39,6 +43,7 @@
 #define GT_COMP1	0x14
 #define GT_AUTO_INC	0x18
 
+#define MAX_F_ERR 50
 /*
  * We are expecting to be clocked by the ARM peripheral clock.
  *
@@ -46,7 +51,8 @@
  * the units for all operations.
  */
 static void __iomem *gt_base;
-static unsigned long gt_clk_rate;
+struct notifier_block gt_clk_rate_change_nb;
+static u32 gt_psv_new, gt_psv_bck, gt_target_rate;
 static int gt_ppi;
 static struct clock_event_device __percpu *gt_evt;
 
@@ -96,7 +102,10 @@ static void gt_compare_set(unsigned long delta, int periodic)
 	unsigned long ctrl;
 
 	counter += delta;
-	ctrl = GT_CONTROL_TIMER_ENABLE;
+	ctrl = readl(gt_base + GT_CONTROL);
+	ctrl &= ~(GT_CONTROL_COMP_ENABLE | GT_CONTROL_IRQ_ENABLE |
+		  GT_CONTROL_AUTO_INC | GT_CONTROL_AUTO_INC);
+	ctrl |= GT_CONTROL_TIMER_ENABLE;
 	writel_relaxed(ctrl, gt_base + GT_CONTROL);
 	writel_relaxed(lower_32_bits(counter), gt_base + GT_COMP0);
 	writel_relaxed(upper_32_bits(counter), gt_base + GT_COMP1);
@@ -123,7 +132,7 @@ static int gt_clockevent_shutdown(struct clock_event_device *evt)
 
 static int gt_clockevent_set_periodic(struct clock_event_device *evt)
 {
-	gt_compare_set(DIV_ROUND_CLOSEST(gt_clk_rate, HZ), 1);
+	gt_compare_set(DIV_ROUND_CLOSEST(gt_target_rate, HZ), 1);
 	return 0;
 }
 
@@ -177,7 +186,7 @@ static int gt_starting_cpu(unsigned int cpu)
 	clk->cpumask = cpumask_of(cpu);
 	clk->rating = 300;
 	clk->irq = gt_ppi;
-	clockevents_config_and_register(clk, gt_clk_rate,
+	clockevents_config_and_register(clk, gt_target_rate,
 					1, 0xffffffff);
 	enable_percpu_irq(clk->irq, IRQ_TYPE_NONE);
 	return 0;
@@ -232,9 +241,28 @@ static struct delay_timer gt_delay_timer = {
 	.read_current_timer = gt_read_long,
 };
 
+static void gt_write_presc(u32 psv)
+{
+	u32 reg;
+
+	reg = readl(gt_base + GT_CONTROL);
+	reg &= ~GT_CONTROL_PRESCALER_MASK;
+	reg |= psv << GT_CONTROL_PRESCALER_SHIFT;
+	writel(reg, gt_base + GT_CONTROL);
+}
+
+static u32 gt_read_presc(void)
+{
+	u32 reg;
+
+	reg = readl(gt_base + GT_CONTROL);
+	reg &= GT_CONTROL_PRESCALER_MASK;
+	return reg >> GT_CONTROL_PRESCALER_SHIFT;
+}
+
 static void __init gt_delay_timer_init(void)
 {
-	gt_delay_timer.freq = gt_clk_rate;
+	gt_delay_timer.freq = gt_target_rate;
 	register_current_timer_delay(&gt_delay_timer);
 }
 
@@ -243,18 +271,81 @@ static int __init gt_clocksource_init(void)
 	writel(0, gt_base + GT_CONTROL);
 	writel(0, gt_base + GT_COUNTER0);
 	writel(0, gt_base + GT_COUNTER1);
-	/* enables timer on all the cores */
-	writel(GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL);
+	/* set prescaler and enable timer on all the cores */
+	writel(((CONFIG_ARM_GT_INITIAL_PRESCALER_VAL - 1) <<
+		GT_CONTROL_PRESCALER_SHIFT)
+	       | GT_CONTROL_TIMER_ENABLE, gt_base + GT_CONTROL);
 
 #ifdef CONFIG_CLKSRC_ARM_GLOBAL_TIMER_SCHED_CLOCK
-	sched_clock_register(gt_sched_clock_read, 64, gt_clk_rate);
+	sched_clock_register(gt_sched_clock_read, 64, gt_target_rate);
 #endif
-	return clocksource_register_hz(&gt_clocksource, gt_clk_rate);
+	return clocksource_register_hz(&gt_clocksource, gt_target_rate);
+}
+
+static int gt_clk_rate_change_cb(struct notifier_block *nb,
+				 unsigned long event, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+
+	switch (event) {
+	case PRE_RATE_CHANGE:
+	{
+		int psv;
+
+		psv = DIV_ROUND_CLOSEST(ndata->new_rate,
+					gt_target_rate);
+
+		if (abs(gt_target_rate - (ndata->new_rate / psv)) > MAX_F_ERR)
+			return NOTIFY_BAD;
+
+		psv--;
+
+		/* prescaler within legal range? */
+		if (psv < 0 || psv > GT_CONTROL_PRESCALER_MAX)
+			return NOTIFY_BAD;
+
+		/*
+		 * store timer clock ctrl register so we can restore it in case
+		 * of an abort.
+		 */
+		gt_psv_bck = gt_read_presc();
+		gt_psv_new = psv;
+		/* scale down: adjust divider in post-change notification */
+		if (ndata->new_rate < ndata->old_rate)
+			return NOTIFY_DONE;
+
+		/* scale up: adjust divider now - before frequency change */
+		gt_write_presc(psv);
+		break;
+	}
+	case POST_RATE_CHANGE:
+		/* scale up: pre-change notification did the adjustment */
+		if (ndata->new_rate > ndata->old_rate)
+			return NOTIFY_OK;
+
+		/* scale down: adjust divider now - after frequency change */
+		gt_write_presc(gt_psv_new);
+		break;
+
+	case ABORT_RATE_CHANGE:
+		/* we have to undo the adjustment in case we scale up */
+		if (ndata->new_rate < ndata->old_rate)
+			return NOTIFY_OK;
+
+		/* restore original register value */
+		gt_write_presc(gt_psv_bck);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	return NOTIFY_DONE;
 }
 
 static int __init global_timer_of_register(struct device_node *np)
 {
 	struct clk *gt_clk;
+	static unsigned long gt_clk_rate;
 	int err = 0;
 
 	/*
@@ -292,11 +383,20 @@ static int __init global_timer_of_register(struct device_node *np)
 	}
 
 	gt_clk_rate = clk_get_rate(gt_clk);
+	gt_target_rate = gt_clk_rate / CONFIG_ARM_GT_INITIAL_PRESCALER_VAL;
+	gt_clk_rate_change_nb.notifier_call =
+		gt_clk_rate_change_cb;
+	err = clk_notifier_register(gt_clk, &gt_clk_rate_change_nb);
+	if (err) {
+		pr_warn("Unable to register clock notifier\n");
+		goto out_clk;
+	}
+
 	gt_evt = alloc_percpu(struct clock_event_device);
 	if (!gt_evt) {
 		pr_warn("global-timer: can't allocate memory\n");
 		err = -ENOMEM;
-		goto out_clk;
+		goto out_clk_nb;
 	}
 
 	err = request_percpu_irq(gt_ppi, gt_clockevent_interrupt,
@@ -326,6 +426,8 @@ out_irq:
 	free_percpu_irq(gt_ppi, gt_evt);
 out_free:
 	free_percpu(gt_evt);
+out_clk_nb:
+	clk_notifier_unregister(gt_clk, &gt_clk_rate_change_nb);
 out_clk:
 	clk_disable_unprepare(gt_clk);
 out_unmap:
-- 
GitLab


From 68e2215e9d5f5ec8e5ba0158683742932519cad9 Mon Sep 17 00:00:00 2001
From: Andrea Merello <andrea.merello@gmail.com>
Date: Tue, 6 Apr 2021 15:00:45 +0200
Subject: [PATCH 2939/3804] arm: zynq: don't disable CONFIG_ARM_GLOBAL_TIMER
 due to CONFIG_CPU_FREQ anymore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now ARM global timer driver could work even if it's source clock rate
changes, so we don't need to disable that driver when cpu frequency scaling
is in use.

This cause Zynq arch to get support for timer delay and get_cycles().

Signed-off-by: Andrea Merello <andrea.merello@gmail.com>
Cc: Patrice Chotard <patrice.chotard@st.com>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Sören Brinkmann <soren.brinkmann@xilinx.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210406130045.15491-3-andrea.merello@gmail.com
---
 arch/arm/mach-zynq/Kconfig  | 2 +-
 drivers/clocksource/Kconfig | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-zynq/Kconfig b/arch/arm/mach-zynq/Kconfig
index 43fb941dcd073..a56748d671c43 100644
--- a/arch/arm/mach-zynq/Kconfig
+++ b/arch/arm/mach-zynq/Kconfig
@@ -6,7 +6,7 @@ config ARCH_ZYNQ
 	select ARCH_SUPPORTS_BIG_ENDIAN
 	select ARM_AMBA
 	select ARM_GIC
-	select ARM_GLOBAL_TIMER if !CPU_FREQ
+	select ARM_GLOBAL_TIMER
 	select CADENCE_TTC_TIMER
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if SMP
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 19fc5f8883e03..9fa28237715a3 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -360,6 +360,7 @@ config ARM_GLOBAL_TIMER
 
 config ARM_GT_INITIAL_PRESCALER_VAL
 	int "ARM global timer initial prescaler value"
+	default 2 if ARCH_ZYNQ
 	default 1
 	depends on ARM_GLOBAL_TIMER
 	help
-- 
GitLab


From be534f8ee137b95046d7c53c8200ffdcf05781a7 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Sat, 12 Jun 2021 17:27:26 +0800
Subject: [PATCH 2940/3804] clocksource/drivers/arm_global_timer: Make symbol
 'gt_clk_rate_change_nb' static

The sparse tool complains as follows:

drivers/clocksource/arm_global_timer.c:54:23: warning:
 symbol 'gt_clk_rate_change_nb' was not declared. Should it be static?

This symbol is not used outside of arm_global_timer.c, so mark it static.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/1623490046-37972-1-git-send-email-zou_wei@huawei.com
---
 drivers/clocksource/arm_global_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 60a8047fd32e5..68b1d144a4128 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -51,7 +51,7 @@
  * the units for all operations.
  */
 static void __iomem *gt_base;
-struct notifier_block gt_clk_rate_change_nb;
+static struct notifier_block gt_clk_rate_change_nb;
 static u32 gt_psv_new, gt_psv_bck, gt_target_rate;
 static int gt_ppi;
 static struct clock_event_device __percpu *gt_evt;
-- 
GitLab


From f94bc2667fb204d7c131ac39d9ea342bd16116dc Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 15 Jun 2021 19:54:40 +0800
Subject: [PATCH 2941/3804] clocksource/drivers/arm_global_timer: Remove
 duplicated argument in arm_global_timer

Fix the following coccicheck warning:

    drivers/clocksource/arm_global_timer.c:107:4-23:
    duplicated argument to & or |

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Reviewed-by: Patrice Chotard <patrice.chotard@foss.st.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210615115440.8881-1-wanjiabing@vivo.com
---
 drivers/clocksource/arm_global_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/arm_global_timer.c b/drivers/clocksource/arm_global_timer.c
index 68b1d144a4128..44a61dc6f9320 100644
--- a/drivers/clocksource/arm_global_timer.c
+++ b/drivers/clocksource/arm_global_timer.c
@@ -104,7 +104,7 @@ static void gt_compare_set(unsigned long delta, int periodic)
 	counter += delta;
 	ctrl = readl(gt_base + GT_CONTROL);
 	ctrl &= ~(GT_CONTROL_COMP_ENABLE | GT_CONTROL_IRQ_ENABLE |
-		  GT_CONTROL_AUTO_INC | GT_CONTROL_AUTO_INC);
+		  GT_CONTROL_AUTO_INC);
 	ctrl |= GT_CONTROL_TIMER_ENABLE;
 	writel_relaxed(ctrl, gt_base + GT_CONTROL);
 	writel_relaxed(lower_32_bits(counter), gt_base + GT_COMP0);
-- 
GitLab


From 8b33dfe0ba1c84c1aab2456590b38195837f1e6e Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Fri, 14 May 2021 21:14:39 -0500
Subject: [PATCH 2942/3804] clocksource/arm_arch_timer: Improve Allwinner A64
 timer workaround

Bad counter reads are experienced sometimes when bit 10 or greater rolls
over. Originally, testing showed that at least 10 lower bits would be
set to the same value during these bad reads. However, some users still
reported time skips.

Wider testing revealed that on some chips, occasionally only the lowest
9 bits would read as the anomalous value. During these reads (which
still happen only when bit 10), bit 9 would read as the correct value.

Reduce the mask by one bit to cover these cases as well.

Cc: stable@vger.kernel.org
Fixes: c950ca8c35ee ("clocksource/drivers/arch_timer: Workaround for Allwinner A64 timer instability")
Reported-by: Roman Stratiienko <r.stratiienko@gmail.com>
Signed-off-by: Samuel Holland <samuel@sholland.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210515021439.55316-1-samuel@sholland.org
---
 drivers/clocksource/arm_arch_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 89a9e0524555f..be6d741d404c0 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -364,7 +364,7 @@ static u64 notrace arm64_858921_read_cntvct_el0(void)
 	do {								\
 		_val = read_sysreg(reg);				\
 		_retries--;						\
-	} while (((_val + 1) & GENMASK(9, 0)) <= 1 && _retries);	\
+	} while (((_val + 1) & GENMASK(8, 0)) <= 1 && _retries);	\
 									\
 	WARN_ON_ONCE(!_retries);					\
 	_val;								\
-- 
GitLab


From 3d41fff3ae3980c055f3c7861264c46c924f3e4c Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 18 May 2021 10:53:06 +0300
Subject: [PATCH 2943/3804] clocksource/drivers/timer-ti-dm: Drop unnecessary
 restore

The device is not losing context on CPU_CLUSTER_PM_ERROR. As we are only
saving and restoring context with cpu_pm, there is no need to restore the
context in case of an error.

Note that the unnecessary restoring of context does not cause issues, it's
just not needed.

Cc: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20210518075306.35532-1-tony@atomide.com
---
 drivers/clocksource/timer-ti-dm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c
index e5c631f1b5cbe..3e52c5226c4d9 100644
--- a/drivers/clocksource/timer-ti-dm.c
+++ b/drivers/clocksource/timer-ti-dm.c
@@ -128,7 +128,8 @@ static int omap_timer_context_notifier(struct notifier_block *nb,
 			break;
 		omap_timer_save_context(timer);
 		break;
-	case CPU_CLUSTER_PM_ENTER_FAILED:
+	case CPU_CLUSTER_PM_ENTER_FAILED:	/* No need to restore context */
+		break;
 	case CPU_CLUSTER_PM_EXIT:
 		if ((timer->capability & OMAP_TIMER_ALWON) ||
 		    !atomic_read(&timer->enabled))
-- 
GitLab


From 25182f05ffed0b45602438693e4eed5d7f3ebadd Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Tue, 15 Jun 2021 18:23:13 -0700
Subject: [PATCH 2944/3804] mm,hwpoison: fix race with hugetlb page allocation

When hugetlb page fault (under overcommitting situation) and
memory_failure() race, VM_BUG_ON_PAGE() is triggered by the following
race:

    CPU0:                           CPU1:

                                    gather_surplus_pages()
                                      page = alloc_surplus_huge_page()
    memory_failure_hugetlb()
      get_hwpoison_page(page)
        __get_hwpoison_page(page)
          get_page_unless_zero(page)
                                      zero = put_page_testzero(page)
                                      VM_BUG_ON_PAGE(!zero, page)
                                      enqueue_huge_page(h, page)
      put_page(page)

__get_hwpoison_page() only checks the page refcount before taking an
additional one for memory error handling, which is not enough because
there's a time window where compound pages have non-zero refcount during
hugetlb page initialization.

So make __get_hwpoison_page() check page status a bit more for hugetlb
pages with get_hwpoison_huge_page().  Checking hugetlb-specific flags
under hugetlb_lock makes sure that the hugetlb page is not transitive.
It's notable that another new function, HWPoisonHandlable(), is helpful
to prevent a race against other transitive page states (like a generic
compound page just before PageHuge becomes true).

Link: https://lkml.kernel.org/r/20210603233632.2964832-2-nao.horiguchi@gmail.com
Fixes: ead07f6a867b ("mm/memory-failure: introduce get_hwpoison_page() for consistent refcount handling")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reported-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: <stable@vger.kernel.org>	[5.12+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  6 ++++++
 mm/hugetlb.c            | 15 +++++++++++++++
 mm/memory-failure.c     | 29 +++++++++++++++++++++++++++--
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b92f25ccef588..790ae618548df 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -149,6 +149,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 						long freed);
 bool isolate_huge_page(struct page *page, struct list_head *list);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
 void putback_active_hugepage(struct page *page);
 void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
 void free_huge_page(struct page *page);
@@ -339,6 +340,11 @@ static inline bool isolate_huge_page(struct page *page, struct list_head *list)
 	return false;
 }
 
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+	return 0;
+}
+
 static inline void putback_active_hugepage(struct page *page)
 {
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5560b50876fb7..85f42ec1a927c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5857,6 +5857,21 @@ unlock:
 	return ret;
 }
 
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+{
+	int ret = 0;
+
+	*hugetlb = false;
+	spin_lock_irq(&hugetlb_lock);
+	if (PageHeadHuge(page)) {
+		*hugetlb = true;
+		if (HPageFreed(page) || HPageMigratable(page))
+			ret = get_page_unless_zero(page);
+	}
+	spin_unlock_irq(&hugetlb_lock);
+	return ret;
+}
+
 void putback_active_hugepage(struct page *page)
 {
 	spin_lock_irq(&hugetlb_lock);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 85ad98c00fd9d..29ab7b70d3260 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -949,6 +949,17 @@ static int page_action(struct page_state *ps, struct page *p,
 	return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
 }
 
+/*
+ * Return true if a page type of a given page is supported by hwpoison
+ * mechanism (while handling could fail), otherwise false.  This function
+ * does not return true for hugetlb or device memory pages, so it's assumed
+ * to be called only in the context where we never have such pages.
+ */
+static inline bool HWPoisonHandlable(struct page *page)
+{
+	return PageLRU(page) || __PageMovable(page);
+}
+
 /**
  * __get_hwpoison_page() - Get refcount for memory error handling:
  * @page:	raw error page (hit by memory error)
@@ -959,8 +970,22 @@ static int page_action(struct page_state *ps, struct page *p,
 static int __get_hwpoison_page(struct page *page)
 {
 	struct page *head = compound_head(page);
+	int ret = 0;
+	bool hugetlb = false;
+
+	ret = get_hwpoison_huge_page(head, &hugetlb);
+	if (hugetlb)
+		return ret;
+
+	/*
+	 * This check prevents from calling get_hwpoison_unless_zero()
+	 * for any unsupported type of page in order to reduce the risk of
+	 * unexpected races caused by taking a page refcount.
+	 */
+	if (!HWPoisonHandlable(head))
+		return 0;
 
-	if (!PageHuge(head) && PageTransHuge(head)) {
+	if (PageTransHuge(head)) {
 		/*
 		 * Non anonymous thp exists only in allocation/free time. We
 		 * can't handle such a case correctly, so let's give it up.
@@ -1017,7 +1042,7 @@ try_again:
 			ret = -EIO;
 		}
 	} else {
-		if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+		if (PageHuge(p) || HWPoisonHandlable(p)) {
 			ret = 1;
 		} else {
 			/*
-- 
GitLab


From 099dd6878b9b12d6bbfa6bf29ce0c8ddd38f6901 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 15 Jun 2021 18:23:16 -0700
Subject: [PATCH 2945/3804] mm/swap: fix pte_same_as_swp() not removing uffd-wp
 bit when compare

I found it by pure code review, that pte_same_as_swp() of unuse_vma()
didn't take uffd-wp bit into account when comparing ptes.
pte_same_as_swp() returning false negative could cause failure to
swapoff swap ptes that was wr-protected by userfaultfd.

Link: https://lkml.kernel.org/r/20210603180546.9083-1-peterx@redhat.com
Fixes: f45ec5ff16a7 ("userfaultfd: wp: support swap and page migration")
Signed-off-by: Peter Xu <peterx@redhat.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>	[5.7+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h | 15 +++++++++++----
 mm/swapfile.c           |  2 +-
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index d9b7c9132c2f6..6430a94c69818 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -23,6 +23,16 @@
 #define SWP_TYPE_SHIFT	(BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
 #define SWP_OFFSET_MASK	((1UL << SWP_TYPE_SHIFT) - 1)
 
+/* Clear all flags but only keep swp_entry_t related information */
+static inline pte_t pte_swp_clear_flags(pte_t pte)
+{
+	if (pte_swp_soft_dirty(pte))
+		pte = pte_swp_clear_soft_dirty(pte);
+	if (pte_swp_uffd_wp(pte))
+		pte = pte_swp_clear_uffd_wp(pte);
+	return pte;
+}
+
 /*
  * Store a type+offset into a swp_entry_t in an arch-independent format
  */
@@ -66,10 +76,7 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
 {
 	swp_entry_t arch_entry;
 
-	if (pte_swp_soft_dirty(pte))
-		pte = pte_swp_clear_soft_dirty(pte);
-	if (pte_swp_uffd_wp(pte))
-		pte = pte_swp_clear_uffd_wp(pte);
+	pte = pte_swp_clear_flags(pte);
 	arch_entry = __pte_to_swp_entry(pte);
 	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 149e77454e3c5..996afa8131c86 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1900,7 +1900,7 @@ unsigned int count_swap_pages(int type, int free)
 
 static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte)
 {
-	return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte);
+	return pte_same(pte_swp_clear_flags(pte), swp_pte);
 }
 
 /*
-- 
GitLab


From 8669dbab2ae56085c128894b181c2aa50f97e368 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 15 Jun 2021 18:23:19 -0700
Subject: [PATCH 2946/3804] mm/slub: clarify verification reporting

Patch series "Actually fix freelist pointer vs redzoning", v4.

This fixes redzoning vs the freelist pointer (both for middle-position
and very small caches).  Both are "theoretical" fixes, in that I see no
evidence of such small-sized caches actually be used in the kernel, but
that's no reason to let the bugs continue to exist, especially since
people doing local development keep tripping over it.  :)

This patch (of 3):

Instead of repeating "Redzone" and "Poison", clarify which sides of
those zones got tripped.  Additionally fix column alignment in the
trailer.

Before:

  BUG test (Tainted: G    B            ): Redzone overwritten
  ...
  Redzone (____ptrval____): bb bb bb bb bb bb bb bb      ........
  Object (____ptrval____): f6 f4 a5 40 1d e8            ...@..
  Redzone (____ptrval____): 1a aa                        ..
  Padding (____ptrval____): 00 00 00 00 00 00 00 00      ........

After:

  BUG test (Tainted: G    B            ): Right Redzone overwritten
  ...
  Redzone  (____ptrval____): bb bb bb bb bb bb bb bb      ........
  Object   (____ptrval____): f6 f4 a5 40 1d e8            ...@..
  Redzone  (____ptrval____): 1a aa                        ..
  Padding  (____ptrval____): 00 00 00 00 00 00 00 00      ........

The earlier commits that slowly resulted in the "Before" reporting were:

  d86bd1bece6f ("mm/slub: support left redzone")
  ffc79d288000 ("slub: use print_hex_dump")
  2492268472e7 ("SLUB: change error reporting format to follow lockdep loosely")

Link: https://lkml.kernel.org/r/20210608183955.280836-1-keescook@chromium.org
Link: https://lkml.kernel.org/r/20210608183955.280836-2-keescook@chromium.org
Link: https://lore.kernel.org/lkml/cfdb11d7-fb8e-e578-c939-f7f5fb69a6bd@suse.cz/
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Marco Elver <elver@google.com>
Cc: "Lin, Zhenpeng" <zplin@psu.edu>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/slub.rst | 10 +++++-----
 mm/slub.c                 | 14 +++++++-------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/Documentation/vm/slub.rst b/Documentation/vm/slub.rst
index 03f294a638bd8..d3028554b1e9c 100644
--- a/Documentation/vm/slub.rst
+++ b/Documentation/vm/slub.rst
@@ -181,7 +181,7 @@ SLUB Debug output
 Here is a sample of slub debug output::
 
  ====================================================================
- BUG kmalloc-8: Redzone overwritten
+ BUG kmalloc-8: Right Redzone overwritten
  --------------------------------------------------------------------
 
  INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
@@ -189,10 +189,10 @@ Here is a sample of slub debug output::
  INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
  INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
 
- Bytes b4 0xc90f6d10:  00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
-   Object 0xc90f6d20:  31 30 31 39 2e 30 30 35                         1019.005
-  Redzone 0xc90f6d28:  00 cc cc cc                                     .
-  Padding 0xc90f6d50:  5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
+ Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
+ Object   (0xc90f6d20): 31 30 31 39 2e 30 30 35                         1019.005
+ Redzone  (0xc90f6d28): 00 cc cc cc                                     .
+ Padding  (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a                         ZZZZZZZZ
 
    [<c010523d>] dump_trace+0x63/0x1eb
    [<c01053df>] show_trace_log_lvl+0x1a/0x2f
diff --git a/mm/slub.c b/mm/slub.c
index 3f96e099817a1..f91d9fe7d0d8f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -712,15 +712,15 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 	       p, p - addr, get_freepointer(s, p));
 
 	if (s->flags & SLAB_RED_ZONE)
-		print_section(KERN_ERR, "Redzone ", p - s->red_left_pad,
+		print_section(KERN_ERR, "Redzone  ", p - s->red_left_pad,
 			      s->red_left_pad);
 	else if (p > addr + 16)
 		print_section(KERN_ERR, "Bytes b4 ", p - 16, 16);
 
-	print_section(KERN_ERR, "Object ", p,
+	print_section(KERN_ERR,         "Object   ", p,
 		      min_t(unsigned int, s->object_size, PAGE_SIZE));
 	if (s->flags & SLAB_RED_ZONE)
-		print_section(KERN_ERR, "Redzone ", p + s->object_size,
+		print_section(KERN_ERR, "Redzone  ", p + s->object_size,
 			s->inuse - s->object_size);
 
 	off = get_info_end(s);
@@ -732,7 +732,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 
 	if (off != size_from_object(s))
 		/* Beginning of the filler is the free pointer */
-		print_section(KERN_ERR, "Padding ", p + off,
+		print_section(KERN_ERR, "Padding  ", p + off,
 			      size_from_object(s) - off);
 
 	dump_stack();
@@ -909,11 +909,11 @@ static int check_object(struct kmem_cache *s, struct page *page,
 	u8 *endobject = object + s->object_size;
 
 	if (s->flags & SLAB_RED_ZONE) {
-		if (!check_bytes_and_report(s, page, object, "Redzone",
+		if (!check_bytes_and_report(s, page, object, "Left Redzone",
 			object - s->red_left_pad, val, s->red_left_pad))
 			return 0;
 
-		if (!check_bytes_and_report(s, page, object, "Redzone",
+		if (!check_bytes_and_report(s, page, object, "Right Redzone",
 			endobject, val, s->inuse - s->object_size))
 			return 0;
 	} else {
@@ -928,7 +928,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
 		if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) &&
 			(!check_bytes_and_report(s, page, p, "Poison", p,
 					POISON_FREE, s->object_size - 1) ||
-			 !check_bytes_and_report(s, page, p, "Poison",
+			 !check_bytes_and_report(s, page, p, "End Poison",
 				p + s->object_size - 1, POISON_END, 1)))
 			return 0;
 		/*
-- 
GitLab


From 74c1d3e081533825f2611e46edea1fcdc0701985 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 15 Jun 2021 18:23:22 -0700
Subject: [PATCH 2947/3804] mm/slub: fix redzoning for small allocations

The redzone area for SLUB exists between s->object_size and s->inuse
(which is at least the word-aligned object_size).  If a cache were
created with an object_size smaller than sizeof(void *), the in-object
stored freelist pointer would overwrite the redzone (e.g.  with boot
param "slub_debug=ZF"):

  BUG test (Tainted: G    B            ): Right Redzone overwritten
  -----------------------------------------------------------------------------

  INFO: 0xffff957ead1c05de-0xffff957ead1c05df @offset=1502. First byte 0x1a instead of 0xbb
  INFO: Slab 0xffffef3950b47000 objects=170 used=170 fp=0x0000000000000000 flags=0x8000000000000200
  INFO: Object 0xffff957ead1c05d8 @offset=1496 fp=0xffff957ead1c0620

  Redzone  (____ptrval____): bb bb bb bb bb bb bb bb    ........
  Object   (____ptrval____): f6 f4 a5 40 1d e8          ...@..
  Redzone  (____ptrval____): 1a aa                      ..
  Padding  (____ptrval____): 00 00 00 00 00 00 00 00    ........

Store the freelist pointer out of line when object_size is smaller than
sizeof(void *) and redzoning is enabled.

Additionally remove the "smaller than sizeof(void *)" check under
CONFIG_DEBUG_VM in kmem_cache_sanity_check() as it is now redundant:
SLAB and SLOB both handle small sizes.

(Note that no caches within this size range are known to exist in the
kernel currently.)

Link: https://lkml.kernel.org/r/20210608183955.280836-3-keescook@chromium.org
Fixes: 81819f0fc828 ("SLUB core")
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: "Lin, Zhenpeng" <zplin@psu.edu>
Cc: Marco Elver <elver@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab_common.c | 3 +--
 mm/slub.c        | 8 +++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index a4a571428c511..7cab77655f11a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -97,8 +97,7 @@ EXPORT_SYMBOL(kmem_cache_size);
 #ifdef CONFIG_DEBUG_VM
 static int kmem_cache_sanity_check(const char *name, unsigned int size)
 {
-	if (!name || in_interrupt() || size < sizeof(void *) ||
-		size > KMALLOC_MAX_SIZE) {
+	if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) {
 		pr_err("kmem_cache_create(%s) integrity check failed\n", name);
 		return -EINVAL;
 	}
diff --git a/mm/slub.c b/mm/slub.c
index f91d9fe7d0d8f..f58cfd4565489 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3734,15 +3734,17 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 */
 	s->inuse = size;
 
-	if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
-		s->ctor)) {
+	if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) ||
+	    ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) ||
+	    s->ctor) {
 		/*
 		 * Relocate free pointer after the object if it is not
 		 * permitted to overwrite the first word of the object on
 		 * kmem_cache_free.
 		 *
 		 * This is the case if we do RCU, have a constructor or
-		 * destructor or are poisoning the objects.
+		 * destructor, are poisoning the objects, or are
+		 * redzoning an object smaller than sizeof(void *).
 		 *
 		 * The assumption that s->offset >= s->inuse means free
 		 * pointer is outside of the object is used in the
-- 
GitLab


From e41a49fadbc80b60b48d3c095d9e2ee7ef7c9a8e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 15 Jun 2021 18:23:26 -0700
Subject: [PATCH 2948/3804] mm/slub: actually fix freelist pointer vs redzoning

It turns out that SLUB redzoning ("slub_debug=Z") checks from
s->object_size rather than from s->inuse (which is normally bumped to
make room for the freelist pointer), so a cache created with an object
size less than 24 would have the freelist pointer written beyond
s->object_size, causing the redzone to be corrupted by the freelist
pointer.  This was very visible with "slub_debug=ZF":

  BUG test (Tainted: G    B            ): Right Redzone overwritten
  -----------------------------------------------------------------------------

  INFO: 0xffff957ead1c05de-0xffff957ead1c05df @offset=1502. First byte 0x1a instead of 0xbb
  INFO: Slab 0xffffef3950b47000 objects=170 used=170 fp=0x0000000000000000 flags=0x8000000000000200
  INFO: Object 0xffff957ead1c05d8 @offset=1496 fp=0xffff957ead1c0620

  Redzone  (____ptrval____): bb bb bb bb bb bb bb bb               ........
  Object   (____ptrval____): 00 00 00 00 00 f6 f4 a5               ........
  Redzone  (____ptrval____): 40 1d e8 1a aa                        @....
  Padding  (____ptrval____): 00 00 00 00 00 00 00 00               ........

Adjust the offset to stay within s->object_size.

(Note that no caches of in this size range are known to exist in the
kernel currently.)

Link: https://lkml.kernel.org/r/20210608183955.280836-4-keescook@chromium.org
Link: https://lore.kernel.org/linux-mm/20200807160627.GA1420741@elver.google.com/
Link: https://lore.kernel.org/lkml/0f7dd7b2-7496-5e2d-9488-2ec9f8e90441@suse.cz/Fixes: 89b83f282d8b (slub: avoid redzone when choosing freepointer location)
Link: https://lore.kernel.org/lkml/CANpmjNOwZ5VpKQn+SYWovTkFB4VsT-RPwyENBmaK0dLcpqStkA@mail.gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Marco Elver <elver@google.com>
Reported-by: "Lin, Zhenpeng" <zplin@psu.edu>
Tested-by: Marco Elver <elver@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index f58cfd4565489..fe30df460fad3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3689,7 +3689,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 {
 	slab_flags_t flags = s->flags;
 	unsigned int size = s->object_size;
-	unsigned int freepointer_area;
 	unsigned int order;
 
 	/*
@@ -3698,13 +3697,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	 * the possible location of the free pointer.
 	 */
 	size = ALIGN(size, sizeof(void *));
-	/*
-	 * This is the area of the object where a freepointer can be
-	 * safely written. If redzoning adds more to the inuse size, we
-	 * can't use that portion for writing the freepointer, so
-	 * s->offset must be limited within this for the general case.
-	 */
-	freepointer_area = size;
 
 #ifdef CONFIG_SLUB_DEBUG
 	/*
@@ -3730,7 +3722,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 
 	/*
 	 * With that we have determined the number of bytes in actual use
-	 * by the object. This is the potential offset to the free pointer.
+	 * by the object and redzoning.
 	 */
 	s->inuse = size;
 
@@ -3753,13 +3745,13 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 		 */
 		s->offset = size;
 		size += sizeof(void *);
-	} else if (freepointer_area > sizeof(void *)) {
+	} else {
 		/*
 		 * Store freelist pointer near middle of object to keep
 		 * it away from the edges of the object to avoid small
 		 * sized over/underflows from neighboring allocations.
 		 */
-		s->offset = ALIGN(freepointer_area / 2, sizeof(void *));
+		s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
 	}
 
 #ifdef CONFIG_SLUB_DEBUG
-- 
GitLab


From 846be08578edb81f02bc8534577e6c367ef34f41 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 15 Jun 2021 18:23:29 -0700
Subject: [PATCH 2949/3804] mm/hugetlb: expand restore_reserve_on_error
 functionality

The routine restore_reserve_on_error is called to restore reservation
information when an error occurs after page allocation.  The routine
alloc_huge_page modifies the mapping reserve map and potentially the
reserve count during allocation.  If code calling alloc_huge_page
encounters an error after allocation and needs to free the page, the
reservation information needs to be adjusted.

Currently, restore_reserve_on_error only takes action on pages for which
the reserve count was adjusted(HPageRestoreReserve flag).  There is
nothing wrong with these adjustments.  However, alloc_huge_page ALWAYS
modifies the reserve map during allocation even if the reserve count is
not adjusted.  This can cause issues as observed during development of
this patch [1].

One specific series of operations causing an issue is:

 - Create a shared hugetlb mapping
   Reservations for all pages created by default

 - Fault in a page in the mapping
   Reservation exists so reservation count is decremented

 - Punch a hole in the file/mapping at index previously faulted
   Reservation and any associated pages will be removed

 - Allocate a page to fill the hole
   No reservation entry, so reserve count unmodified
   Reservation entry added to map by alloc_huge_page

 - Error after allocation and before instantiating the page
   Reservation entry remains in map

 - Allocate a page to fill the hole
   Reservation entry exists, so decrement reservation count

This will cause a reservation count underflow as the reservation count
was decremented twice for the same index.

A user would observe a very large number for HugePages_Rsvd in
/proc/meminfo.  This would also likely cause subsequent allocations of
hugetlb pages to fail as it would 'appear' that all pages are reserved.

This sequence of operations is unlikely to happen, however they were
easily reproduced and observed using hacked up code as described in [1].

Address the issue by having the routine restore_reserve_on_error take
action on pages where HPageRestoreReserve is not set.  In this case, we
need to remove any reserve map entry created by alloc_huge_page.  A new
helper routine vma_del_reservation assists with this operation.

There are three callers of alloc_huge_page which do not currently call
restore_reserve_on error before freeing a page on error paths.  Add
those missing calls.

[1] https://lore.kernel.org/linux-mm/20210528005029.88088-1-almasrymina@google.com/

Link: https://lkml.kernel.org/r/20210607204510.22617-1-mike.kravetz@oracle.com
Fixes: 96b96a96ddee ("mm/hugetlb: fix huge page reservation leak in private mapping error paths"
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Mina Almasry <almasrymina@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    |   1 +
 include/linux/hugetlb.h |   2 +
 mm/hugetlb.c            | 120 ++++++++++++++++++++++++++++++++--------
 3 files changed, 100 insertions(+), 23 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 55efd3dd04f62..30dee68458c7e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -735,6 +735,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 		__SetPageUptodate(page);
 		error = huge_add_to_page_cache(page, mapping, index);
 		if (unlikely(error)) {
+			restore_reserve_on_error(h, &pseudo_vma, addr, page);
 			put_page(page);
 			mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 			goto out;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 790ae618548df..6504346a19473 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -610,6 +610,8 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
 				unsigned long address);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
+void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
+				unsigned long address, struct page *page);
 
 /* arch callback */
 int __init __alloc_bootmem_huge_page(struct hstate *h);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 85f42ec1a927c..e0a5f9cbbece9 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2121,12 +2121,18 @@ out:
  * be restored when a newly allocated huge page must be freed.  It is
  * to be called after calling vma_needs_reservation to determine if a
  * reservation exists.
+ *
+ * vma_del_reservation is used in error paths where an entry in the reserve
+ * map was created during huge page allocation and must be removed.  It is to
+ * be called after calling vma_needs_reservation to determine if a reservation
+ * exists.
  */
 enum vma_resv_mode {
 	VMA_NEEDS_RESV,
 	VMA_COMMIT_RESV,
 	VMA_END_RESV,
 	VMA_ADD_RESV,
+	VMA_DEL_RESV,
 };
 static long __vma_reservation_common(struct hstate *h,
 				struct vm_area_struct *vma, unsigned long addr,
@@ -2170,11 +2176,21 @@ static long __vma_reservation_common(struct hstate *h,
 			ret = region_del(resv, idx, idx + 1);
 		}
 		break;
+	case VMA_DEL_RESV:
+		if (vma->vm_flags & VM_MAYSHARE) {
+			region_abort(resv, idx, idx + 1, 1);
+			ret = region_del(resv, idx, idx + 1);
+		} else {
+			ret = region_add(resv, idx, idx + 1, 1, NULL, NULL);
+			/* region_add calls of range 1 should never fail. */
+			VM_BUG_ON(ret < 0);
+		}
+		break;
 	default:
 		BUG();
 	}
 
-	if (vma->vm_flags & VM_MAYSHARE)
+	if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV)
 		return ret;
 	/*
 	 * We know private mapping must have HPAGE_RESV_OWNER set.
@@ -2222,25 +2238,39 @@ static long vma_add_reservation(struct hstate *h,
 	return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
 }
 
+static long vma_del_reservation(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long addr)
+{
+	return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV);
+}
+
 /*
- * This routine is called to restore a reservation on error paths.  In the
- * specific error paths, a huge page was allocated (via alloc_huge_page)
- * and is about to be freed.  If a reservation for the page existed,
- * alloc_huge_page would have consumed the reservation and set
- * HPageRestoreReserve in the newly allocated page.  When the page is freed
- * via free_huge_page, the global reservation count will be incremented if
- * HPageRestoreReserve is set.  However, free_huge_page can not adjust the
- * reserve map.  Adjust the reserve map here to be consistent with global
- * reserve count adjustments to be made by free_huge_page.
+ * This routine is called to restore reservation information on error paths.
+ * It should ONLY be called for pages allocated via alloc_huge_page(), and
+ * the hugetlb mutex should remain held when calling this routine.
+ *
+ * It handles two specific cases:
+ * 1) A reservation was in place and the page consumed the reservation.
+ *    HPageRestoreReserve is set in the page.
+ * 2) No reservation was in place for the page, so HPageRestoreReserve is
+ *    not set.  However, alloc_huge_page always updates the reserve map.
+ *
+ * In case 1, free_huge_page later in the error path will increment the
+ * global reserve count.  But, free_huge_page does not have enough context
+ * to adjust the reservation map.  This case deals primarily with private
+ * mappings.  Adjust the reserve map here to be consistent with global
+ * reserve count adjustments to be made by free_huge_page.  Make sure the
+ * reserve map indicates there is a reservation present.
+ *
+ * In case 2, simply undo reserve map modifications done by alloc_huge_page.
  */
-static void restore_reserve_on_error(struct hstate *h,
-			struct vm_area_struct *vma, unsigned long address,
-			struct page *page)
+void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
+			unsigned long address, struct page *page)
 {
-	if (unlikely(HPageRestoreReserve(page))) {
-		long rc = vma_needs_reservation(h, vma, address);
+	long rc = vma_needs_reservation(h, vma, address);
 
-		if (unlikely(rc < 0)) {
+	if (HPageRestoreReserve(page)) {
+		if (unlikely(rc < 0))
 			/*
 			 * Rare out of memory condition in reserve map
 			 * manipulation.  Clear HPageRestoreReserve so that
@@ -2253,16 +2283,57 @@ static void restore_reserve_on_error(struct hstate *h,
 			 * accounting of reserve counts.
 			 */
 			ClearHPageRestoreReserve(page);
-		} else if (rc) {
-			rc = vma_add_reservation(h, vma, address);
-			if (unlikely(rc < 0))
+		else if (rc)
+			(void)vma_add_reservation(h, vma, address);
+		else
+			vma_end_reservation(h, vma, address);
+	} else {
+		if (!rc) {
+			/*
+			 * This indicates there is an entry in the reserve map
+			 * added by alloc_huge_page.  We know it was added
+			 * before the alloc_huge_page call, otherwise
+			 * HPageRestoreReserve would be set on the page.
+			 * Remove the entry so that a subsequent allocation
+			 * does not consume a reservation.
+			 */
+			rc = vma_del_reservation(h, vma, address);
+			if (rc < 0)
 				/*
-				 * See above comment about rare out of
-				 * memory condition.
+				 * VERY rare out of memory condition.  Since
+				 * we can not delete the entry, set
+				 * HPageRestoreReserve so that the reserve
+				 * count will be incremented when the page
+				 * is freed.  This reserve will be consumed
+				 * on a subsequent allocation.
 				 */
-				ClearHPageRestoreReserve(page);
+				SetHPageRestoreReserve(page);
+		} else if (rc < 0) {
+			/*
+			 * Rare out of memory condition from
+			 * vma_needs_reservation call.  Memory allocation is
+			 * only attempted if a new entry is needed.  Therefore,
+			 * this implies there is not an entry in the
+			 * reserve map.
+			 *
+			 * For shared mappings, no entry in the map indicates
+			 * no reservation.  We are done.
+			 */
+			if (!(vma->vm_flags & VM_MAYSHARE))
+				/*
+				 * For private mappings, no entry indicates
+				 * a reservation is present.  Since we can
+				 * not add an entry, set SetHPageRestoreReserve
+				 * on the page so reserve count will be
+				 * incremented when freed.  This reserve will
+				 * be consumed on a subsequent allocation.
+				 */
+				SetHPageRestoreReserve(page);
 		} else
-			vma_end_reservation(h, vma, address);
+			/*
+			 * No reservation present, do nothing
+			 */
+			 vma_end_reservation(h, vma, address);
 	}
 }
 
@@ -4037,6 +4108,8 @@ again:
 				spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 				entry = huge_ptep_get(src_pte);
 				if (!pte_same(src_pte_old, entry)) {
+					restore_reserve_on_error(h, vma, addr,
+								new);
 					put_page(new);
 					/* dst_entry won't change as in child */
 					goto again;
@@ -5006,6 +5079,7 @@ out_release_unlock:
 	if (vm_shared || is_continue)
 		unlock_page(page);
 out_release_nounlock:
+	restore_reserve_on_error(h, dst_vma, dst_addr, page);
 	put_page(page);
 	goto out;
 }
-- 
GitLab


From e8675d291ac007e1c636870db880f837a9ea112a Mon Sep 17 00:00:00 2001
From: yangerkun <yangerkun@huawei.com>
Date: Tue, 15 Jun 2021 18:23:32 -0700
Subject: [PATCH 2950/3804] mm/memory-failure: make sure wait for page
 writeback in memory_failure

Our syzkaller trigger the "BUG_ON(!list_empty(&inode->i_wb_list))" in
clear_inode:

  kernel BUG at fs/inode.c:519!
  Internal error: Oops - BUG: 0 [#1] SMP
  Modules linked in:
  Process syz-executor.0 (pid: 249, stack limit = 0x00000000a12409d7)
  CPU: 1 PID: 249 Comm: syz-executor.0 Not tainted 4.19.95
  Hardware name: linux,dummy-virt (DT)
  pstate: 80000005 (Nzcv daif -PAN -UAO)
  pc : clear_inode+0x280/0x2a8
  lr : clear_inode+0x280/0x2a8
  Call trace:
    clear_inode+0x280/0x2a8
    ext4_clear_inode+0x38/0xe8
    ext4_free_inode+0x130/0xc68
    ext4_evict_inode+0xb20/0xcb8
    evict+0x1a8/0x3c0
    iput+0x344/0x460
    do_unlinkat+0x260/0x410
    __arm64_sys_unlinkat+0x6c/0xc0
    el0_svc_common+0xdc/0x3b0
    el0_svc_handler+0xf8/0x160
    el0_svc+0x10/0x218
  Kernel panic - not syncing: Fatal exception

A crash dump of this problem show that someone called __munlock_pagevec
to clear page LRU without lock_page: do_mmap -> mmap_region -> do_munmap
-> munlock_vma_pages_range -> __munlock_pagevec.

As a result memory_failure will call identify_page_state without
wait_on_page_writeback.  And after truncate_error_page clear the mapping
of this page.  end_page_writeback won't call sb_clear_inode_writeback to
clear inode->i_wb_list.  That will trigger BUG_ON in clear_inode!

Fix it by checking PageWriteback too to help determine should we skip
wait_on_page_writeback.

Link: https://lkml.kernel.org/r/20210604084705.3729204-1-yangerkun@huawei.com
Fixes: 0bc1f8b0682c ("hwpoison: fix the handling path of the victimized page frame that belong to non-LRU")
Signed-off-by: yangerkun <yangerkun@huawei.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 29ab7b70d3260..0143d32bc6663 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1552,7 +1552,12 @@ try_again:
 		return 0;
 	}
 
-	if (!PageTransTail(p) && !PageLRU(p))
+	/*
+	 * __munlock_pagevec may clear a writeback page's LRU flag without
+	 * page_lock. We need wait writeback completion for this page or it
+	 * may trigger vfs BUG while evict inode.
+	 */
+	if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
 		goto identify_page_state;
 
 	/*
-- 
GitLab


From 4f5aecdff25f59fb5ea456d5152a913906ecf287 Mon Sep 17 00:00:00 2001
From: Pingfan Liu <kernelfans@gmail.com>
Date: Tue, 15 Jun 2021 18:23:36 -0700
Subject: [PATCH 2951/3804] crash_core, vmcoreinfo: append 'SECTION_SIZE_BITS'
 to vmcoreinfo

As mentioned in kernel commit 1d50e5d0c505 ("crash_core, vmcoreinfo:
Append 'MAX_PHYSMEM_BITS' to vmcoreinfo"), SECTION_SIZE_BITS in the
formula:

    #define SECTIONS_SHIFT    (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)

Besides SECTIONS_SHIFT, SECTION_SIZE_BITS is also used to calculate
PAGES_PER_SECTION in makedumpfile just like kernel.

Unfortunately, this arch-dependent macro SECTION_SIZE_BITS changes, e.g.
recently in kernel commit f0b13ee23241 ("arm64/sparsemem: reduce
SECTION_SIZE_BITS").  But user space wants a stable interface to get
this info.  Such info is impossible to be deduced from a crashdump
vmcore.  Hence append SECTION_SIZE_BITS to vmcoreinfo.

Link: https://lkml.kernel.org/r/20210608103359.84907-1-kernelfans@gmail.com
Link: http://lists.infradead.org/pipermail/kexec/2021-June/022676.html
Signed-off-by: Pingfan Liu <kernelfans@gmail.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Cc: Kazuhito Hagio <k-hagio@ab.jp.nec.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Boris Petkov <bp@alien8.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: James Morse <james.morse@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Anderson <anderson@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/crash_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 825284baaf466..684a6061a13a4 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -464,6 +464,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
 	VMCOREINFO_STRUCT_SIZE(mem_section);
 	VMCOREINFO_OFFSET(mem_section, section_mem_map);
+	VMCOREINFO_NUMBER(SECTION_SIZE_BITS);
 	VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS);
 #endif
 	VMCOREINFO_STRUCT_SIZE(page);
-- 
GitLab


From 1b3865d016815cbd69a1879ca1c8a8901fda1072 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 15 Jun 2021 18:23:39 -0700
Subject: [PATCH 2952/3804] mm/slub.c: include swab.h

Fixes build with CONFIG_SLAB_FREELIST_HARDENED=y.

Hopefully.  But it's the right thing to do anwyay.

Fixes: 1ad53d9fa3f61 ("slub: improve bit diffusion for freelist ptr obfuscation")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213417
Reported-by: <vannguye@cisco.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/slub.c b/mm/slub.c
index fe30df460fad3..61bd40e3eb9a4 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/bit_spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/swab.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include "slab.h"
-- 
GitLab


From ffc90cbb2970ab88b66ea51dd580469eede57b67 Mon Sep 17 00:00:00 2001
From: Xu Yu <xuyu@linux.alibaba.com>
Date: Tue, 15 Jun 2021 18:23:42 -0700
Subject: [PATCH 2953/3804] mm, thp: use head page in __migration_entry_wait()

We notice that hung task happens in a corner but practical scenario when
CONFIG_PREEMPT_NONE is enabled, as follows.

Process 0                       Process 1                     Process 2..Inf
split_huge_page_to_list
    unmap_page
        split_huge_pmd_address
                                __migration_entry_wait(head)
                                                              __migration_entry_wait(tail)
    remap_page (roll back)
        remove_migration_ptes
            rmap_walk_anon
                cond_resched

Where __migration_entry_wait(tail) is occurred in kernel space, e.g.,
copy_to_user in fstat, which will immediately fault again without
rescheduling, and thus occupy the cpu fully.

When there are too many processes performing __migration_entry_wait on
tail page, remap_page will never be done after cond_resched.

This makes __migration_entry_wait operate on the compound head page,
thus waits for remap_page to complete, whether the THP is split
successfully or roll back.

Note that put_and_wait_on_page_locked helps to drop the page reference
acquired with get_page_unless_zero, as soon as the page is on the wait
queue, before actually waiting.  So splitting the THP is only prevented
for a brief interval.

Link: https://lkml.kernel.org/r/b9836c1dd522e903891760af9f0c86a2cce987eb.1623144009.git.xuyu@linux.alibaba.com
Fixes: ba98828088ad ("thp: add option to setup migration entries during PMD split")
Suggested-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Gang Deng <gavin.dg@linux.alibaba.com>
Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/migrate.c b/mm/migrate.c
index b234c3f3acb79..41ff2c9896c4f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -295,6 +295,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
 		goto out;
 
 	page = migration_entry_to_page(entry);
+	page = compound_head(page);
 
 	/*
 	 * Once page cache replacement of page migration started, page_count
-- 
GitLab


From 99fa8a48203d62b3743d866fc48ef6abaee682be Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 15 Jun 2021 18:23:45 -0700
Subject: [PATCH 2954/3804] mm/thp: fix __split_huge_pmd_locked() on shmem
 migration entry

Patch series "mm/thp: fix THP splitting unmap BUGs and related", v10.

Here is v2 batch of long-standing THP bug fixes that I had not got
around to sending before, but prompted now by Wang Yugui's report
https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/

Wang Yugui has tested a rollup of these fixes applied to 5.10.39, and
they have done no harm, but have *not* fixed that issue: something more
is needed and I have no idea of what.

This patch (of 7):

Stressing huge tmpfs page migration racing hole punch often crashed on
the VM_BUG_ON(!pmd_present) in pmdp_huge_clear_flush(), with DEBUG_VM=y
kernel; or shortly afterwards, on a bad dereference in
__split_huge_pmd_locked() when DEBUG_VM=n.  They forgot to allow for pmd
migration entries in the non-anonymous case.

Full disclosure: those particular experiments were on a kernel with more
relaxed mmap_lock and i_mmap_rwsem locking, and were not repeated on the
vanilla kernel: it is conceivable that stricter locking happens to avoid
those cases, or makes them less likely; but __split_huge_pmd_locked()
already allowed for pmd migration entries when handling anonymous THPs,
so this commit brings the shmem and file THP handling into line.

And while there: use old_pmd rather than _pmd, as in the following
blocks; and make it clearer to the eye that the !vma_is_anonymous()
block is self-contained, making an early return after accounting for
unmapping.

Link: https://lkml.kernel.org/r/af88612-1473-2eaa-903-8d1a448b26@google.com
Link: https://lkml.kernel.org/r/dd221a99-efb3-cd1d-6256-7e646af29314@google.com
Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Jue Wang <juew@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c     | 27 ++++++++++++++++++---------
 mm/pgtable-generic.c |  5 ++---
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 63ed6b25deaab..42cfefc6e66e3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2044,7 +2044,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 	count_vm_event(THP_SPLIT_PMD);
 
 	if (!vma_is_anonymous(vma)) {
-		_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+		old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
 		/*
 		 * We are going to unmap this huge page. So
 		 * just go ahead and zap it
@@ -2053,16 +2053,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 			zap_deposited_table(mm, pmd);
 		if (vma_is_special_huge(vma))
 			return;
-		page = pmd_page(_pmd);
-		if (!PageDirty(page) && pmd_dirty(_pmd))
-			set_page_dirty(page);
-		if (!PageReferenced(page) && pmd_young(_pmd))
-			SetPageReferenced(page);
-		page_remove_rmap(page, true);
-		put_page(page);
+		if (unlikely(is_pmd_migration_entry(old_pmd))) {
+			swp_entry_t entry;
+
+			entry = pmd_to_swp_entry(old_pmd);
+			page = migration_entry_to_page(entry);
+		} else {
+			page = pmd_page(old_pmd);
+			if (!PageDirty(page) && pmd_dirty(old_pmd))
+				set_page_dirty(page);
+			if (!PageReferenced(page) && pmd_young(old_pmd))
+				SetPageReferenced(page);
+			page_remove_rmap(page, true);
+			put_page(page);
+		}
 		add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
 		return;
-	} else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+	}
+
+	if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
 		/*
 		 * FIXME: Do we want to invalidate secondary mmu by calling
 		 * mmu_notifier_invalidate_range() see comments below inside
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index c2210e1cdb515..4e640baf97948 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -135,9 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
 {
 	pmd_t pmd;
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-	VM_BUG_ON(!pmd_present(*pmdp));
-	/* Below assumes pmd_present() is true */
-	VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
+	VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+			   !pmd_devmap(*pmdp));
 	pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
 	flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
 	return pmd;
-- 
GitLab


From 3b77e8c8cde581dadab9a0f1543a347e24315f11 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 15 Jun 2021 18:23:49 -0700
Subject: [PATCH 2955/3804] mm/thp: make is_huge_zero_pmd() safe and quicker

Most callers of is_huge_zero_pmd() supply a pmd already verified
present; but a few (notably zap_huge_pmd()) do not - it might be a pmd
migration entry, in which the pfn is encoded differently from a present
pmd: which might pass the is_huge_zero_pmd() test (though not on x86,
since L1TF forced us to protect against that); or perhaps even crash in
pmd_page() applied to a swap-like entry.

Make it safe by adding pmd_present() check into is_huge_zero_pmd()
itself; and make it quicker by saving huge_zero_pfn, so that
is_huge_zero_pmd() will not need to do that pmd_page() lookup each time.

__split_huge_pmd_locked() checked pmd_trans_huge() before: that worked,
but is unnecessary now that is_huge_zero_pmd() checks present.

Link: https://lkml.kernel.org/r/21ea9ca-a1f5-8b90-5e88-95fb1c49bbfa@google.com
Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jue Wang <juew@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h | 8 +++++++-
 mm/huge_memory.c        | 5 ++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 9626fda5efcea..2a8ebe6c222ef 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -286,6 +286,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
 extern struct page *huge_zero_page;
+extern unsigned long huge_zero_pfn;
 
 static inline bool is_huge_zero_page(struct page *page)
 {
@@ -294,7 +295,7 @@ static inline bool is_huge_zero_page(struct page *page)
 
 static inline bool is_huge_zero_pmd(pmd_t pmd)
 {
-	return is_huge_zero_page(pmd_page(pmd));
+	return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
 }
 
 static inline bool is_huge_zero_pud(pud_t pud)
@@ -440,6 +441,11 @@ static inline bool is_huge_zero_page(struct page *page)
 	return false;
 }
 
+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+	return false;
+}
+
 static inline bool is_huge_zero_pud(pud_t pud)
 {
 	return false;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 42cfefc6e66e3..5885c5f5836f4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -62,6 +62,7 @@ static struct shrinker deferred_split_shrinker;
 
 static atomic_t huge_zero_refcount;
 struct page *huge_zero_page __read_mostly;
+unsigned long huge_zero_pfn __read_mostly = ~0UL;
 
 bool transparent_hugepage_enabled(struct vm_area_struct *vma)
 {
@@ -98,6 +99,7 @@ retry:
 		__free_pages(zero_page, compound_order(zero_page));
 		goto retry;
 	}
+	WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
 
 	/* We take additional reference here. It will be put back by shrinker */
 	atomic_set(&huge_zero_refcount, 2);
@@ -147,6 +149,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
 	if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
 		struct page *zero_page = xchg(&huge_zero_page, NULL);
 		BUG_ON(zero_page == NULL);
+		WRITE_ONCE(huge_zero_pfn, ~0UL);
 		__free_pages(zero_page, compound_order(zero_page));
 		return HPAGE_PMD_NR;
 	}
@@ -2071,7 +2074,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		return;
 	}
 
-	if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+	if (is_huge_zero_pmd(*pmd)) {
 		/*
 		 * FIXME: Do we want to invalidate secondary mmu by calling
 		 * mmu_notifier_invalidate_range() see comments below inside
-- 
GitLab


From 732ed55823fc3ad998d43b86bf771887bcc5ec67 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 15 Jun 2021 18:23:53 -0700
Subject: [PATCH 2956/3804] mm/thp: try_to_unmap() use TTU_SYNC for safe
 splitting

Stressing huge tmpfs often crashed on unmap_page()'s VM_BUG_ON_PAGE
(!unmap_success): with dump_page() showing mapcount:1, but then its raw
struct page output showing _mapcount ffffffff i.e.  mapcount 0.

And even if that particular VM_BUG_ON_PAGE(!unmap_success) is removed,
it is immediately followed by a VM_BUG_ON_PAGE(compound_mapcount(head)),
and further down an IS_ENABLED(CONFIG_DEBUG_VM) total_mapcount BUG():
all indicative of some mapcount difficulty in development here perhaps.
But the !CONFIG_DEBUG_VM path handles the failures correctly and
silently.

I believe the problem is that once a racing unmap has cleared pte or
pmd, try_to_unmap_one() may skip taking the page table lock, and emerge
from try_to_unmap() before the racing task has reached decrementing
mapcount.

Instead of abandoning the unsafe VM_BUG_ON_PAGE(), and the ones that
follow, use PVMW_SYNC in try_to_unmap_one() in this case: adding
TTU_SYNC to the options, and passing that from unmap_page().

When CONFIG_DEBUG_VM, or for non-debug too? Consensus is to do the same
for both: the slight overhead added should rarely matter, except perhaps
if splitting sparsely-populated multiply-mapped shmem.  Once confident
that bugs are fixed, TTU_SYNC here can be removed, and the race
tolerated.

Link: https://lkml.kernel.org/r/c1e95853-8bcd-d8fd-55fa-e7f2488e78f@google.com
Fixes: fec89c109f3a ("thp: rewrite freeze_page()/unfreeze_page() with generic rmap walkers")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jue Wang <juew@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  1 +
 mm/huge_memory.c     |  2 +-
 mm/page_vma_mapped.c | 11 +++++++++++
 mm/rmap.c            | 17 ++++++++++++++++-
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index def5c62c93b3b..8d04e7deedc66 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -91,6 +91,7 @@ enum ttu_flags {
 
 	TTU_SPLIT_HUGE_PMD	= 0x4,	/* split huge PMD if any */
 	TTU_IGNORE_MLOCK	= 0x8,	/* ignore mlock */
+	TTU_SYNC		= 0x10,	/* avoid racy checks with PVMW_SYNC */
 	TTU_IGNORE_HWPOISON	= 0x20,	/* corrupted page is recoverable */
 	TTU_BATCH_FLUSH		= 0x40,	/* Batch TLB flushes where possible
 					 * and caller guarantees they will
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5885c5f5836f4..84ab735139dcd 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2350,7 +2350,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
 static void unmap_page(struct page *page)
 {
-	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
+	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC |
 		TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
 	bool unmap_success;
 
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 2cf01d933f136..5b559967410ef 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -212,6 +212,17 @@ restart:
 			pvmw->ptl = NULL;
 		}
 	} else if (!pmd_present(pmde)) {
+		/*
+		 * If PVMW_SYNC, take and drop THP pmd lock so that we
+		 * cannot return prematurely, while zap_huge_pmd() has
+		 * cleared *pmd but not decremented compound_mapcount().
+		 */
+		if ((pvmw->flags & PVMW_SYNC) &&
+		    PageTransCompound(pvmw->page)) {
+			spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+
+			spin_unlock(ptl);
+		}
 		return false;
 	}
 	if (!map_pte(pvmw))
diff --git a/mm/rmap.c b/mm/rmap.c
index 693a610e181d1..07811b4ae7936 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1405,6 +1405,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	struct mmu_notifier_range range;
 	enum ttu_flags flags = (enum ttu_flags)(long)arg;
 
+	/*
+	 * When racing against e.g. zap_pte_range() on another cpu,
+	 * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+	 * try_to_unmap() may return false when it is about to become true,
+	 * if page table locking is skipped: use TTU_SYNC to wait for that.
+	 */
+	if (flags & TTU_SYNC)
+		pvmw.flags = PVMW_SYNC;
+
 	/* munlock has nothing to gain from examining un-locked vmas */
 	if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
 		return true;
@@ -1777,7 +1786,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
 	else
 		rmap_walk(page, &rwc);
 
-	return !page_mapcount(page) ? true : false;
+	/*
+	 * When racing against e.g. zap_pte_range() on another cpu,
+	 * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+	 * try_to_unmap() may return false when it is about to become true,
+	 * if page table locking is skipped: use TTU_SYNC to wait for that.
+	 */
+	return !page_mapcount(page);
 }
 
 /**
-- 
GitLab


From 494334e43c16d63b878536a26505397fce6ff3a2 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 15 Jun 2021 18:23:56 -0700
Subject: [PATCH 2957/3804] mm/thp: fix vma_address() if virtual address below
 file offset

Running certain tests with a DEBUG_VM kernel would crash within hours,
on the total_mapcount BUG() in split_huge_page_to_list(), while trying
to free up some memory by punching a hole in a shmem huge page: split's
try_to_unmap() was unable to find all the mappings of the page (which,
on a !DEBUG_VM kernel, would then keep the huge page pinned in memory).

When that BUG() was changed to a WARN(), it would later crash on the
VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma) in
mm/internal.h:vma_address(), used by rmap_walk_file() for
try_to_unmap().

vma_address() is usually correct, but there's a wraparound case when the
vm_start address is unusually low, but vm_pgoff not so low:
vma_address() chooses max(start, vma->vm_start), but that decides on the
wrong address, because start has become almost ULONG_MAX.

Rewrite vma_address() to be more careful about vm_pgoff; move the
VM_BUG_ON_VMA() out of it, returning -EFAULT for errors, so that it can
be safely used from page_mapped_in_vma() and page_address_in_vma() too.

Add vma_address_end() to apply similar care to end address calculation,
in page_vma_mapped_walk() and page_mkclean_one() and try_to_unmap_one();
though it raises a question of whether callers would do better to supply
pvmw->end to page_vma_mapped_walk() - I chose not, for a smaller patch.

An irritation is that their apparent generality breaks down on KSM
pages, which cannot be located by the page->index that page_to_pgoff()
uses: as commit 4b0ece6fa016 ("mm: migrate: fix remove_migration_pte()
for ksm pages") once discovered.  I dithered over the best thing to do
about that, and have ended up with a VM_BUG_ON_PAGE(PageKsm) in both
vma_address() and vma_address_end(); though the only place in danger of
using it on them was try_to_unmap_one().

Sidenote: vma_address() and vma_address_end() now use compound_nr() on a
head page, instead of thp_size(): to make the right calculation on a
hugetlbfs page, whether or not THPs are configured.  try_to_unmap() is
used on hugetlbfs pages, but perhaps the wrong calculation never
mattered.

Link: https://lkml.kernel.org/r/caf1c1a3-7cfb-7f8f-1beb-ba816e932825@google.com
Fixes: a8fa41ad2f6f ("mm, rmap: check all VMAs that PTE-mapped THP can be part of")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jue Wang <juew@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/internal.h        | 53 ++++++++++++++++++++++++++++++++------------
 mm/page_vma_mapped.c | 16 +++++--------
 mm/rmap.c            | 16 ++++++-------
 3 files changed, 53 insertions(+), 32 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 2f1182948aa6e..e8fdb531f887d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -384,27 +384,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
 
 /*
- * At what user virtual address is page expected in @vma?
+ * At what user virtual address is page expected in vma?
+ * Returns -EFAULT if all of the page is outside the range of vma.
+ * If page is a compound head, the entire compound page is considered.
  */
 static inline unsigned long
-__vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address(struct page *page, struct vm_area_struct *vma)
 {
-	pgoff_t pgoff = page_to_pgoff(page);
-	return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	pgoff_t pgoff;
+	unsigned long address;
+
+	VM_BUG_ON_PAGE(PageKsm(page), page);	/* KSM page->index unusable */
+	pgoff = page_to_pgoff(page);
+	if (pgoff >= vma->vm_pgoff) {
+		address = vma->vm_start +
+			((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+		/* Check for address beyond vma (or wrapped through 0?) */
+		if (address < vma->vm_start || address >= vma->vm_end)
+			address = -EFAULT;
+	} else if (PageHead(page) &&
+		   pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) {
+		/* Test above avoids possibility of wrap to 0 on 32-bit */
+		address = vma->vm_start;
+	} else {
+		address = -EFAULT;
+	}
+	return address;
 }
 
+/*
+ * Then at what user virtual address will none of the page be found in vma?
+ * Assumes that vma_address() already returned a good starting address.
+ * If page is a compound head, the entire compound page is considered.
+ */
 static inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address_end(struct page *page, struct vm_area_struct *vma)
 {
-	unsigned long start, end;
-
-	start = __vma_address(page, vma);
-	end = start + thp_size(page) - PAGE_SIZE;
-
-	/* page should be within @vma mapping range */
-	VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
-
-	return max(start, vma->vm_start);
+	pgoff_t pgoff;
+	unsigned long address;
+
+	VM_BUG_ON_PAGE(PageKsm(page), page);	/* KSM page->index unusable */
+	pgoff = page_to_pgoff(page) + compound_nr(page);
+	address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+	/* Check for address beyond vma (or wrapped through 0?) */
+	if (address < vma->vm_start || address > vma->vm_end)
+		address = vma->vm_end;
+	return address;
 }
 
 static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 5b559967410ef..e37bd43904af7 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -228,18 +228,18 @@ restart:
 	if (!map_pte(pvmw))
 		goto next_pte;
 	while (1) {
+		unsigned long end;
+
 		if (check_pte(pvmw))
 			return true;
 next_pte:
 		/* Seek to next pte only makes sense for THP */
 		if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
 			return not_found(pvmw);
+		end = vma_address_end(pvmw->page, pvmw->vma);
 		do {
 			pvmw->address += PAGE_SIZE;
-			if (pvmw->address >= pvmw->vma->vm_end ||
-			    pvmw->address >=
-					__vma_address(pvmw->page, pvmw->vma) +
-					thp_size(pvmw->page))
+			if (pvmw->address >= end)
 				return not_found(pvmw);
 			/* Did we cross page table boundary? */
 			if (pvmw->address % PMD_SIZE == 0) {
@@ -277,14 +277,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 		.vma = vma,
 		.flags = PVMW_SYNC,
 	};
-	unsigned long start, end;
-
-	start = __vma_address(page, vma);
-	end = start + thp_size(page) - PAGE_SIZE;
 
-	if (unlikely(end < vma->vm_start || start >= vma->vm_end))
+	pvmw.address = vma_address(page, vma);
+	if (pvmw.address == -EFAULT)
 		return 0;
-	pvmw.address = max(start, vma->vm_start);
 	if (!page_vma_mapped_walk(&pvmw))
 		return 0;
 	page_vma_mapped_walk_done(&pvmw);
diff --git a/mm/rmap.c b/mm/rmap.c
index 07811b4ae7936..144de54efc1c3 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -707,7 +707,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
  */
 unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 {
-	unsigned long address;
 	if (PageAnon(page)) {
 		struct anon_vma *page__anon_vma = page_anon_vma(page);
 		/*
@@ -722,10 +721,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 			return -EFAULT;
 	} else
 		return -EFAULT;
-	address = __vma_address(page, vma);
-	if (unlikely(address < vma->vm_start || address >= vma->vm_end))
-		return -EFAULT;
-	return address;
+
+	return vma_address(page, vma);
 }
 
 pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
@@ -919,7 +916,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 	 */
 	mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
 				0, vma, vma->vm_mm, address,
-				min(vma->vm_end, address + page_size(page)));
+				vma_address_end(page, vma));
 	mmu_notifier_invalidate_range_start(&range);
 
 	while (page_vma_mapped_walk(&pvmw)) {
@@ -1435,9 +1432,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 * Note that the page can not be free in this function as call of
 	 * try_to_unmap() must hold a reference on the page.
 	 */
+	range.end = PageKsm(page) ?
+			address + PAGE_SIZE : vma_address_end(page, vma);
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
-				address,
-				min(vma->vm_end, address + page_size(page)));
+				address, range.end);
 	if (PageHuge(page)) {
 		/*
 		 * If sharing is possible, start and end will be adjusted
@@ -1889,6 +1887,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
 		struct vm_area_struct *vma = avc->vma;
 		unsigned long address = vma_address(page, vma);
 
+		VM_BUG_ON_VMA(address == -EFAULT, vma);
 		cond_resched();
 
 		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
@@ -1943,6 +1942,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
 			pgoff_start, pgoff_end) {
 		unsigned long address = vma_address(page, vma);
 
+		VM_BUG_ON_VMA(address == -EFAULT, vma);
 		cond_resched();
 
 		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
-- 
GitLab


From 31657170deaf1d8d2f6a1955fbc6fa9d228be036 Mon Sep 17 00:00:00 2001
From: Jue Wang <juew@google.com>
Date: Tue, 15 Jun 2021 18:24:00 -0700
Subject: [PATCH 2958/3804] mm/thp: fix page_address_in_vma() on file THP tails

Anon THP tails were already supported, but memory-failure may need to
use page_address_in_vma() on file THP tails, which its page->mapping
check did not permit: fix it.

hughd adds: no current usage is known to hit the issue, but this does
fix a subtle trap in a general helper: best fixed in stable sooner than
later.

Link: https://lkml.kernel.org/r/a0d9b53-bf5d-8bab-ac5-759dc61819c1@google.com
Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
Signed-off-by: Jue Wang <juew@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/rmap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 144de54efc1c3..e05c300048e63 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -716,11 +716,11 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 		if (!vma->anon_vma || !page__anon_vma ||
 		    vma->anon_vma->root != page__anon_vma->root)
 			return -EFAULT;
-	} else if (page->mapping) {
-		if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
-			return -EFAULT;
-	} else
+	} else if (!vma->vm_file) {
+		return -EFAULT;
+	} else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
 		return -EFAULT;
+	}
 
 	return vma_address(page, vma);
 }
-- 
GitLab


From 22061a1ffabdb9c3385de159c5db7aac3a4df1cc Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 15 Jun 2021 18:24:03 -0700
Subject: [PATCH 2959/3804] mm/thp: unmap_mapping_page() to fix THP
 truncate_cleanup_page()

There is a race between THP unmapping and truncation, when truncate sees
pmd_none() and skips the entry, after munmap's zap_huge_pmd() cleared
it, but before its page_remove_rmap() gets to decrement
compound_mapcount: generating false "BUG: Bad page cache" reports that
the page is still mapped when deleted.  This commit fixes that, but not
in the way I hoped.

The first attempt used try_to_unmap(page, TTU_SYNC|TTU_IGNORE_MLOCK)
instead of unmap_mapping_range() in truncate_cleanup_page(): it has
often been an annoyance that we usually call unmap_mapping_range() with
no pages locked, but there apply it to a single locked page.
try_to_unmap() looks more suitable for a single locked page.

However, try_to_unmap_one() contains a VM_BUG_ON_PAGE(!pvmw.pte,page):
it is used to insert THP migration entries, but not used to unmap THPs.
Copy zap_huge_pmd() and add THP handling now? Perhaps, but their TLB
needs are different, I'm too ignorant of the DAX cases, and couldn't
decide how far to go for anon+swap.  Set that aside.

The second attempt took a different tack: make no change in truncate.c,
but modify zap_huge_pmd() to insert an invalidated huge pmd instead of
clearing it initially, then pmd_clear() between page_remove_rmap() and
unlocking at the end.  Nice.  But powerpc blows that approach out of the
water, with its serialize_against_pte_lookup(), and interesting pgtable
usage.  It would need serious help to get working on powerpc (with a
minor optimization issue on s390 too).  Set that aside.

Just add an "if (page_mapped(page)) synchronize_rcu();" or other such
delay, after unmapping in truncate_cleanup_page()? Perhaps, but though
that's likely to reduce or eliminate the number of incidents, it would
give less assurance of whether we had identified the problem correctly.

This successful iteration introduces "unmap_mapping_page(page)" instead
of try_to_unmap(), and goes the usual unmap_mapping_range_tree() route,
with an addition to details.  Then zap_pmd_range() watches for this
case, and does spin_unlock(pmd_lock) if so - just like
page_vma_mapped_walk() now does in the PVMW_SYNC case.  Not pretty, but
safe.

Note that unmap_mapping_page() is doing a VM_BUG_ON(!PageLocked) to
assert its interface; but currently that's only used to make sure that
page->mapping is stable, and zap_pmd_range() doesn't care if the page is
locked or not.  Along these lines, in invalidate_inode_pages2_range()
move the initial unmap_mapping_range() out from under page lock, before
then calling unmap_mapping_page() under page lock if still mapped.

Link: https://lkml.kernel.org/r/a2a4a148-cdd8-942c-4ef8-51b77f643dbe@google.com
Fixes: fc127da085c2 ("truncate: handle file thp")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jue Wang <juew@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  3 +++
 mm/memory.c        | 41 +++++++++++++++++++++++++++++++++++++++++
 mm/truncate.c      | 43 +++++++++++++++++++------------------------
 3 files changed, 63 insertions(+), 24 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c274f75efcf97..8ae31622deeff 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1719,6 +1719,7 @@ struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
+	struct page *single_page;		/* Locked page to be unmapped */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1766,6 +1767,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
 extern int fixup_user_fault(struct mm_struct *mm,
 			    unsigned long address, unsigned int fault_flags,
 			    bool *unlocked);
+void unmap_mapping_page(struct page *page);
 void unmap_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t nr, bool even_cows);
 void unmap_mapping_range(struct address_space *mapping,
@@ -1786,6 +1788,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
 	BUG();
 	return -EFAULT;
 }
+static inline void unmap_mapping_page(struct page *page) { }
 static inline void unmap_mapping_pages(struct address_space *mapping,
 		pgoff_t start, pgoff_t nr, bool even_cows) { }
 static inline void unmap_mapping_range(struct address_space *mapping,
diff --git a/mm/memory.c b/mm/memory.c
index f3ffab9b9e391..486f4a2874e72 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1361,7 +1361,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
 			else if (zap_huge_pmd(tlb, vma, pmd, addr))
 				goto next;
 			/* fall through */
+		} else if (details && details->single_page &&
+			   PageTransCompound(details->single_page) &&
+			   next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
+			spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
+			/*
+			 * Take and drop THP pmd lock so that we cannot return
+			 * prematurely, while zap_huge_pmd() has cleared *pmd,
+			 * but not yet decremented compound_mapcount().
+			 */
+			spin_unlock(ptl);
 		}
+
 		/*
 		 * Here there can be other concurrent MADV_DONTNEED or
 		 * trans huge page faults running, and if the pmd is
@@ -3236,6 +3247,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
 	}
 }
 
+/**
+ * unmap_mapping_page() - Unmap single page from processes.
+ * @page: The locked page to be unmapped.
+ *
+ * Unmap this page from any userspace process which still has it mmaped.
+ * Typically, for efficiency, the range of nearby pages has already been
+ * unmapped by unmap_mapping_pages() or unmap_mapping_range().  But once
+ * truncation or invalidation holds the lock on a page, it may find that
+ * the page has been remapped again: and then uses unmap_mapping_page()
+ * to unmap it finally.
+ */
+void unmap_mapping_page(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct zap_details details = { };
+
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(PageTail(page));
+
+	details.check_mapping = mapping;
+	details.first_index = page->index;
+	details.last_index = page->index + thp_nr_pages(page) - 1;
+	details.single_page = page;
+
+	i_mmap_lock_write(mapping);
+	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+		unmap_mapping_range_tree(&mapping->i_mmap, &details);
+	i_mmap_unlock_write(mapping);
+}
+
 /**
  * unmap_mapping_pages() - Unmap pages from processes.
  * @mapping: The address space containing pages to be unmapped.
diff --git a/mm/truncate.c b/mm/truncate.c
index 95af244b112a0..234ddd879caa1 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -167,13 +167,10 @@ void do_invalidatepage(struct page *page, unsigned int offset,
  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
  */
-static void
-truncate_cleanup_page(struct address_space *mapping, struct page *page)
+static void truncate_cleanup_page(struct page *page)
 {
-	if (page_mapped(page)) {
-		unsigned int nr = thp_nr_pages(page);
-		unmap_mapping_pages(mapping, page->index, nr, false);
-	}
+	if (page_mapped(page))
+		unmap_mapping_page(page);
 
 	if (page_has_private(page))
 		do_invalidatepage(page, 0, thp_size(page));
@@ -218,7 +215,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
 	if (page->mapping != mapping)
 		return -EIO;
 
-	truncate_cleanup_page(mapping, page);
+	truncate_cleanup_page(page);
 	delete_from_page_cache(page);
 	return 0;
 }
@@ -325,7 +322,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 		index = indices[pagevec_count(&pvec) - 1] + 1;
 		truncate_exceptional_pvec_entries(mapping, &pvec, indices);
 		for (i = 0; i < pagevec_count(&pvec); i++)
-			truncate_cleanup_page(mapping, pvec.pages[i]);
+			truncate_cleanup_page(pvec.pages[i]);
 		delete_from_page_cache_batch(mapping, &pvec);
 		for (i = 0; i < pagevec_count(&pvec); i++)
 			unlock_page(pvec.pages[i]);
@@ -639,6 +636,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 				continue;
 			}
 
+			if (!did_range_unmap && page_mapped(page)) {
+				/*
+				 * If page is mapped, before taking its lock,
+				 * zap the rest of the file in one hit.
+				 */
+				unmap_mapping_pages(mapping, index,
+						(1 + end - index), false);
+				did_range_unmap = 1;
+			}
+
 			lock_page(page);
 			WARN_ON(page_to_index(page) != index);
 			if (page->mapping != mapping) {
@@ -646,23 +653,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 				continue;
 			}
 			wait_on_page_writeback(page);
-			if (page_mapped(page)) {
-				if (!did_range_unmap) {
-					/*
-					 * Zap the rest of the file in one hit.
-					 */
-					unmap_mapping_pages(mapping, index,
-						(1 + end - index), false);
-					did_range_unmap = 1;
-				} else {
-					/*
-					 * Just zap this page
-					 */
-					unmap_mapping_pages(mapping, index,
-								1, false);
-				}
-			}
+
+			if (page_mapped(page))
+				unmap_mapping_page(page);
 			BUG_ON(page_mapped(page));
+
 			ret2 = do_launder_page(mapping, page);
 			if (ret2 == 0) {
 				if (!invalidate_complete_page2(mapping, page))
-- 
GitLab


From 504e070dc08f757bccaed6d05c0f53ecbfac8a23 Mon Sep 17 00:00:00 2001
From: Yang Shi <shy828301@gmail.com>
Date: Tue, 15 Jun 2021 18:24:07 -0700
Subject: [PATCH 2960/3804] mm: thp: replace DEBUG_VM BUG with VM_WARN when
 unmap fails for split

When debugging the bug reported by Wang Yugui [1], try_to_unmap() may
fail, but the first VM_BUG_ON_PAGE() just checks page_mapcount() however
it may miss the failure when head page is unmapped but other subpage is
mapped.  Then the second DEBUG_VM BUG() that check total mapcount would
catch it.  This may incur some confusion.

As this is not a fatal issue, so consolidate the two DEBUG_VM checks
into one VM_WARN_ON_ONCE_PAGE().

[1] https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/

Link: https://lkml.kernel.org/r/d0f0db68-98b8-ebfb-16dc-f29df24cf012@google.com
Signed-off-by: Yang Shi <shy828301@gmail.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jue Wang <juew@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 84ab735139dcd..6d2a0119fc58e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2352,15 +2352,15 @@ static void unmap_page(struct page *page)
 {
 	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC |
 		TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
-	bool unmap_success;
 
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 
 	if (PageAnon(page))
 		ttu_flags |= TTU_SPLIT_FREEZE;
 
-	unmap_success = try_to_unmap(page, ttu_flags);
-	VM_BUG_ON_PAGE(!unmap_success, page);
+	try_to_unmap(page, ttu_flags);
+
+	VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
 }
 
 static void remap_page(struct page *page, unsigned int nr)
@@ -2671,7 +2671,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	struct deferred_split *ds_queue = get_deferred_split_queue(head);
 	struct anon_vma *anon_vma = NULL;
 	struct address_space *mapping = NULL;
-	int count, mapcount, extra_pins, ret;
+	int extra_pins, ret;
 	pgoff_t end;
 
 	VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
@@ -2730,7 +2730,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	}
 
 	unmap_page(head);
-	VM_BUG_ON_PAGE(compound_mapcount(head), head);
 
 	/* block interrupt reentry in xa_lock and spinlock */
 	local_irq_disable();
@@ -2748,9 +2747,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 
 	/* Prevent deferred_split_scan() touching ->_refcount */
 	spin_lock(&ds_queue->split_queue_lock);
-	count = page_count(head);
-	mapcount = total_mapcount(head);
-	if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
+	if (page_ref_freeze(head, 1 + extra_pins)) {
 		if (!list_empty(page_deferred_list(head))) {
 			ds_queue->split_queue_len--;
 			list_del(page_deferred_list(head));
@@ -2770,16 +2767,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		__split_huge_page(page, list, end);
 		ret = 0;
 	} else {
-		if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
-			pr_alert("total_mapcount: %u, page_count(): %u\n",
-					mapcount, count);
-			if (PageTail(page))
-				dump_page(head, NULL);
-			dump_page(page, "total_mapcount(head) > 0");
-			BUG();
-		}
 		spin_unlock(&ds_queue->split_queue_lock);
-fail:		if (mapping)
+fail:
+		if (mapping)
 			xa_unlock(&mapping->i_pages);
 		local_irq_enable();
 		remap_page(head, thp_nr_pages(head));
-- 
GitLab


From ccbd6283a9b640c8d5c2b44db318fd72a63338ff Mon Sep 17 00:00:00 2001
From: Miles Chen <miles.chen@mediatek.com>
Date: Tue, 15 Jun 2021 18:24:10 -0700
Subject: [PATCH 2961/3804] mm/sparse: fix check_usemap_section_nr warnings

I see a "virt_to_phys used for non-linear address" warning from
check_usemap_section_nr() on arm64 platforms.

In current implementation of NODE_DATA, if CONFIG_NEED_MULTIPLE_NODES=y,
pglist_data is dynamically allocated and assigned to node_data[].

For example, in arch/arm64/include/asm/mmzone.h:

  extern struct pglist_data *node_data[];
  #define NODE_DATA(nid)          (node_data[(nid)])

If CONFIG_NEED_MULTIPLE_NODES=n, pglist_data is defined as a global
variable named "contig_page_data".

For example, in include/linux/mmzone.h:

  extern struct pglist_data contig_page_data;
  #define NODE_DATA(nid)          (&contig_page_data)

If CONFIG_DEBUG_VIRTUAL is not enabled, __pa() can handle both
dynamically allocated linear addresses and symbol addresses.  However,
if (CONFIG_DEBUG_VIRTUAL=y && CONFIG_NEED_MULTIPLE_NODES=n) we can see
the "virt_to_phys used for non-linear address" warning because that
&contig_page_data is not a linear address on arm64.

Warning message:

  virt_to_phys used for non-linear address: (contig_page_data+0x0/0x1c00)
  WARNING: CPU: 0 PID: 0 at arch/arm64/mm/physaddr.c:15 __virt_to_phys+0x58/0x68
  Modules linked in:
  CPU: 0 PID: 0 Comm: swapper Tainted: G        W         5.13.0-rc1-00074-g1140ab592e2e #3
  Hardware name: linux,dummy-virt (DT)
  pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO BTYPE=--)
  Call trace:
     __virt_to_phys+0x58/0x68
     check_usemap_section_nr+0x50/0xfc
     sparse_init_nid+0x1ac/0x28c
     sparse_init+0x1c4/0x1e0
     bootmem_init+0x60/0x90
     setup_arch+0x184/0x1f0
     start_kernel+0x78/0x488

To fix it, create a small function to handle both translation.

Link: https://lkml.kernel.org/r/1623058729-27264-1-git-send-email-miles.chen@mediatek.com
Signed-off-by: Miles Chen <miles.chen@mediatek.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Baoquan He <bhe@redhat.com>
Cc: Kazu <k-hagio-ab@nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/sparse.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/mm/sparse.c b/mm/sparse.c
index b2ada9dc00cb4..55c18aff3e423 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -344,6 +344,15 @@ size_t mem_section_usage_size(void)
 	return sizeof(struct mem_section_usage) + usemap_size();
 }
 
+static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat)
+{
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+	return __pa_symbol(pgdat);
+#else
+	return __pa(pgdat);
+#endif
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static struct mem_section_usage * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
@@ -362,7 +371,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 	 * from the same section as the pgdat where possible to avoid
 	 * this problem.
 	 */
-	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
+	goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
 	limit = goal + (1UL << PA_SECTION_SHIFT);
 	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 again:
@@ -390,7 +399,7 @@ static void __init check_usemap_section_nr(int nid,
 	}
 
 	usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
-	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+	pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT);
 	if (usemap_snr == pgdat_snr)
 		return;
 
-- 
GitLab


From 7bb9557b48fcabaa12750a8775352740def381a8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 14 Jun 2021 12:39:51 -0700
Subject: [PATCH 2962/3804] pstore/blk: Use the normal block device I/O path

Stop poking into block layer internals and just open the block device
file an use kernel_read and kernel_write on it. Note that this means
the transformation from name_to_dev_t can't be used anymore when
pstore_blk is loaded as a module: a full filesystem device path name
must be used instead. Additionally removes ":internal:" kerndoc link,
since no such documentation remains.

Co-developed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/admin-guide/pstore-blk.rst |   3 -
 fs/pstore/blk.c                          | 264 +++++++----------------
 2 files changed, 83 insertions(+), 184 deletions(-)

diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst
index 49d8149f8d326..79f6d23e8cda1 100644
--- a/Documentation/admin-guide/pstore-blk.rst
+++ b/Documentation/admin-guide/pstore-blk.rst
@@ -227,8 +227,5 @@ For developer reference, here are all the important structures and APIs:
 .. kernel-doc:: include/linux/pstore_zone.h
    :internal:
 
-.. kernel-doc:: fs/pstore/blk.c
-   :internal:
-
 .. kernel-doc:: include/linux/pstore_blk.h
    :internal:
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index 7d8e5a1ddd5be..dc5ff763d4146 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -8,15 +8,16 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include "../../block/blk.h"
 #include <linux/blkdev.h>
 #include <linux/string.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/pstore_blk.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/init_syscalls.h>
 #include <linux/mount.h>
-#include <linux/uio.h>
 
 static long kmsg_size = CONFIG_PSTORE_BLK_KMSG_SIZE;
 module_param(kmsg_size, long, 0400);
@@ -60,23 +61,25 @@ MODULE_PARM_DESC(best_effort, "use best effort to write (i.e. do not require sto
  *
  * Usually, this will be a partition of a block device.
  *
- * blkdev accepts the following variants:
- * 1) <hex_major><hex_minor> device number in hexadecimal representation,
- *    with no leading 0x, for example b302.
- * 2) /dev/<disk_name> represents the device number of disk
- * 3) /dev/<disk_name><decimal> represents the device number
+ * blkdev accepts the following variants, when built as a module:
+ * 1) /dev/<disk_name> represents the device number of disk
+ * 2) /dev/<disk_name><decimal> represents the device number
  *    of partition - device number of disk plus the partition number
- * 4) /dev/<disk_name>p<decimal> - same as the above, that form is
+ * 3) /dev/<disk_name>p<decimal> - same as the above, that form is
  *    used when disk name of partitioned disk ends on a digit.
- * 5) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
+ *
+ * blkdev accepts the following variants when built into the kernel:
+ * 1) <hex_major><hex_minor> device number in hexadecimal representation,
+ *    with no leading 0x, for example b302.
+ * 2) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
  *    unique id of a partition if the partition table provides it.
  *    The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
  *    partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
  *    filled hex representation of the 32-bit "NT disk signature", and PP
  *    is a zero-filled hex representation of the 1-based partition number.
- * 6) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
+ * 3) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
  *    a partition with a known unique id.
- * 7) <major>:<minor> major and minor number of the device separated by
+ * 4) <major>:<minor> major and minor number of the device separated by
  *    a colon.
  */
 static char blkdev[80] = CONFIG_PSTORE_BLK_BLKDEV;
@@ -88,15 +91,9 @@ MODULE_PARM_DESC(blkdev, "block device for pstore storage");
  * during the register/unregister functions.
  */
 static DEFINE_MUTEX(pstore_blk_lock);
-static struct block_device *psblk_bdev;
+static struct file *psblk_file;
 static struct pstore_zone_info *pstore_zone_info;
 
-struct bdev_info {
-	dev_t devt;
-	sector_t nr_sects;
-	sector_t start_sect;
-};
-
 #define check_size(name, alignsize) ({				\
 	long _##name_ = (name);					\
 	_##name_ = _##name_ <= 0 ? 0 : (_##name_ * 1024);	\
@@ -219,203 +216,73 @@ void unregister_pstore_device(struct pstore_device_info *dev)
 }
 EXPORT_SYMBOL_GPL(unregister_pstore_device);
 
-/**
- * psblk_get_bdev() - open block device
- *
- * @holder:	Exclusive holder identifier
- * @info:	Information about bdev to fill in
- *
- * Return: pointer to block device on success and others on error.
- *
- * On success, the returned block_device has reference count of one.
- */
-static struct block_device *psblk_get_bdev(void *holder,
-					   struct bdev_info *info)
-{
-	struct block_device *bdev = ERR_PTR(-ENODEV);
-	fmode_t mode = FMODE_READ | FMODE_WRITE;
-	sector_t nr_sects;
-
-	lockdep_assert_held(&pstore_blk_lock);
-
-	if (pstore_zone_info)
-		return ERR_PTR(-EBUSY);
-
-	if (!blkdev[0])
-		return ERR_PTR(-ENODEV);
-
-	if (holder)
-		mode |= FMODE_EXCL;
-	bdev = blkdev_get_by_path(blkdev, mode, holder);
-	if (IS_ERR(bdev)) {
-		dev_t devt;
-
-		devt = name_to_dev_t(blkdev);
-		if (devt == 0)
-			return ERR_PTR(-ENODEV);
-		bdev = blkdev_get_by_dev(devt, mode, holder);
-		if (IS_ERR(bdev))
-			return bdev;
-	}
-
-	nr_sects = bdev_nr_sectors(bdev);
-	if (!nr_sects) {
-		pr_err("not enough space for '%s'\n", blkdev);
-		blkdev_put(bdev, mode);
-		return ERR_PTR(-ENOSPC);
-	}
-
-	if (info) {
-		info->devt = bdev->bd_dev;
-		info->nr_sects = nr_sects;
-		info->start_sect = get_start_sect(bdev);
-	}
-
-	return bdev;
-}
-
-static void psblk_put_bdev(struct block_device *bdev, void *holder)
-{
-	fmode_t mode = FMODE_READ | FMODE_WRITE;
-
-	lockdep_assert_held(&pstore_blk_lock);
-
-	if (!bdev)
-		return;
-
-	if (holder)
-		mode |= FMODE_EXCL;
-	blkdev_put(bdev, mode);
-}
-
 static ssize_t psblk_generic_blk_read(char *buf, size_t bytes, loff_t pos)
 {
-	struct block_device *bdev = psblk_bdev;
-	struct file file;
-	struct kiocb kiocb;
-	struct iov_iter iter;
-	struct kvec iov = {.iov_base = buf, .iov_len = bytes};
-
-	if (!bdev)
-		return -ENODEV;
-
-	memset(&file, 0, sizeof(struct file));
-	file.f_mapping = bdev->bd_inode->i_mapping;
-	file.f_flags = O_DSYNC | __O_SYNC | O_NOATIME;
-	file.f_inode = bdev->bd_inode;
-	file_ra_state_init(&file.f_ra, file.f_mapping);
-
-	init_sync_kiocb(&kiocb, &file);
-	kiocb.ki_pos = pos;
-	iov_iter_kvec(&iter, READ, &iov, 1, bytes);
-
-	return generic_file_read_iter(&kiocb, &iter);
+	return kernel_read(psblk_file, buf, bytes, &pos);
 }
 
 static ssize_t psblk_generic_blk_write(const char *buf, size_t bytes,
 		loff_t pos)
 {
-	struct block_device *bdev = psblk_bdev;
-	struct iov_iter iter;
-	struct kiocb kiocb;
-	struct file file;
-	ssize_t ret;
-	struct kvec iov = {.iov_base = (void *)buf, .iov_len = bytes};
-
-	if (!bdev)
-		return -ENODEV;
-
 	/* Console/Ftrace backend may handle buffer until flush dirty zones */
 	if (in_interrupt() || irqs_disabled())
 		return -EBUSY;
-
-	memset(&file, 0, sizeof(struct file));
-	file.f_mapping = bdev->bd_inode->i_mapping;
-	file.f_flags = O_DSYNC | __O_SYNC | O_NOATIME;
-	file.f_inode = bdev->bd_inode;
-
-	init_sync_kiocb(&kiocb, &file);
-	kiocb.ki_pos = pos;
-	iov_iter_kvec(&iter, WRITE, &iov, 1, bytes);
-
-	inode_lock(bdev->bd_inode);
-	ret = generic_write_checks(&kiocb, &iter);
-	if (ret > 0)
-		ret = generic_perform_write(&file, &iter, pos);
-	inode_unlock(bdev->bd_inode);
-
-	if (likely(ret > 0)) {
-		const struct file_operations f_op = {.fsync = blkdev_fsync};
-
-		file.f_op = &f_op;
-		kiocb.ki_pos += ret;
-		ret = generic_write_sync(&kiocb, ret);
-	}
-	return ret;
+	return kernel_write(psblk_file, buf, bytes, &pos);
 }
 
 /*
  * This takes its configuration only from the module parameters now.
- * See psblk_get_bdev() and blkdev.
  */
-static int __register_pstore_blk(void)
+static int __register_pstore_blk(const char *devpath)
 {
-	char bdev_name[BDEVNAME_SIZE];
-	struct block_device *bdev;
-	struct pstore_device_info dev;
-	struct bdev_info binfo;
-	void *holder = blkdev;
+	struct pstore_device_info dev = {
+		.read = psblk_generic_blk_read,
+		.write = psblk_generic_blk_write,
+	};
+	struct inode *inode;
 	int ret = -ENODEV;
 
 	lockdep_assert_held(&pstore_blk_lock);
 
-	/* hold bdev exclusively */
-	memset(&binfo, 0, sizeof(binfo));
-	bdev = psblk_get_bdev(holder, &binfo);
-	if (IS_ERR(bdev)) {
-		pr_err("failed to open '%s'!\n", blkdev);
-		return PTR_ERR(bdev);
+	psblk_file = filp_open(devpath, O_RDWR | O_DSYNC | O_NOATIME | O_EXCL, 0);
+	if (IS_ERR(psblk_file)) {
+		ret = PTR_ERR(psblk_file);
+		pr_err("failed to open '%s': %d!\n", devpath, ret);
+		goto err;
 	}
 
-	/* only allow driver matching the @blkdev */
-	if (!binfo.devt) {
-		pr_debug("no major\n");
-		ret = -ENODEV;
-		goto err_put_bdev;
+	inode = file_inode(psblk_file);
+	if (!S_ISBLK(inode->i_mode)) {
+		pr_err("'%s' is not block device!\n", devpath);
+		goto err_fput;
 	}
 
-	/* psblk_bdev must be assigned before register to pstore/blk */
-	psblk_bdev = bdev;
-
-	memset(&dev, 0, sizeof(dev));
-	dev.total_size = binfo.nr_sects << SECTOR_SHIFT;
-	dev.read = psblk_generic_blk_read;
-	dev.write = psblk_generic_blk_write;
+	inode = I_BDEV(psblk_file->f_mapping->host)->bd_inode;
+	dev.total_size = i_size_read(inode);
 
 	ret = __register_pstore_device(&dev);
 	if (ret)
-		goto err_put_bdev;
+		goto err_fput;
 
-	bdevname(bdev, bdev_name);
-	pr_info("attached %s (no dedicated panic_write!)\n", bdev_name);
 	return 0;
 
-err_put_bdev:
-	psblk_bdev = NULL;
-	psblk_put_bdev(bdev, holder);
+err_fput:
+	fput(psblk_file);
+err:
+	psblk_file = NULL;
+
 	return ret;
 }
 
-static void __unregister_pstore_blk(unsigned int major)
+static void __unregister_pstore_blk(struct file *device)
 {
 	struct pstore_device_info dev = { .read = psblk_generic_blk_read };
-	void *holder = blkdev;
 
 	lockdep_assert_held(&pstore_blk_lock);
-	if (psblk_bdev && MAJOR(psblk_bdev->bd_dev) == major) {
+	if (psblk_file && psblk_file == device) {
 		__unregister_pstore_device(&dev);
-		psblk_put_bdev(psblk_bdev, holder);
-		psblk_bdev = NULL;
+		fput(psblk_file);
+		psblk_file = NULL;
 	}
 }
 
@@ -433,13 +300,48 @@ int pstore_blk_get_config(struct pstore_blk_config *info)
 }
 EXPORT_SYMBOL_GPL(pstore_blk_get_config);
 
+
+#ifndef MODULE
+static const char devname[] = "/dev/pstore-blk";
+static __init const char *early_boot_devpath(const char *initial_devname)
+{
+	/*
+	 * During early boot the real root file system hasn't been
+	 * mounted yet, and no device nodes are present yet. Use the
+	 * same scheme to find the device that we use for mounting
+	 * the root file system.
+	 */
+	dev_t dev = name_to_dev_t(initial_devname);
+
+	if (!dev) {
+		pr_err("failed to resolve '%s'!\n", initial_devname);
+		return initial_devname;
+	}
+
+	init_unlink(devname);
+	init_mknod(devname, S_IFBLK | 0600, new_encode_dev(dev));
+
+	return devname;
+}
+#else
+static inline const char *early_boot_devpath(const char *initial_devname)
+{
+	return initial_devname;
+}
+#endif
+
 static int __init pstore_blk_init(void)
 {
 	int ret = 0;
 
 	mutex_lock(&pstore_blk_lock);
-	if (!pstore_zone_info && best_effort && blkdev[0])
-		ret = __register_pstore_blk();
+	if (!pstore_zone_info && best_effort && blkdev[0]) {
+		ret = __register_pstore_blk(early_boot_devpath(blkdev));
+		if (ret == 0 && pstore_zone_info)
+			pr_info("attached %s:%s (%zu) (no dedicated panic_write!)\n",
+				pstore_zone_info->name, blkdev,
+				pstore_zone_info->total_size);
+	}
 	mutex_unlock(&pstore_blk_lock);
 
 	return ret;
@@ -449,8 +351,8 @@ late_initcall(pstore_blk_init);
 static void __exit pstore_blk_exit(void)
 {
 	mutex_lock(&pstore_blk_lock);
-	if (psblk_bdev)
-		__unregister_pstore_blk(MAJOR(psblk_bdev->bd_dev));
+	if (psblk_file)
+		__unregister_pstore_blk(psblk_file);
 	else {
 		struct pstore_device_info dev = { };
 
-- 
GitLab


From c811659bb9a09b319842bf61602ce858b1d1920a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 15 Jun 2021 10:33:00 -0700
Subject: [PATCH 2963/3804] pstore/blk: Fix kerndoc and redundancy on blkdev
 param

Remove redundant details of blkdev and fix up resulting kerndoc.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/admin-guide/pstore-blk.rst | 11 +++++++----
 fs/pstore/blk.c                          | 24 +-----------------------
 2 files changed, 8 insertions(+), 27 deletions(-)

diff --git a/Documentation/admin-guide/pstore-blk.rst b/Documentation/admin-guide/pstore-blk.rst
index 79f6d23e8cda1..2d22ead9520e3 100644
--- a/Documentation/admin-guide/pstore-blk.rst
+++ b/Documentation/admin-guide/pstore-blk.rst
@@ -45,15 +45,18 @@ blkdev
 The block device to use. Most of the time, it is a partition of block device.
 It's required for pstore/blk. It is also used for MTD device.
 
-It accepts the following variants for block device:
+When pstore/blk is built as a module, "blkdev" accepts the following variants:
 
-1. <hex_major><hex_minor> device number in hexadecimal represents itself; no
-   leading 0x, for example b302.
-#. /dev/<disk_name> represents the device number of disk
+1. /dev/<disk_name> represents the device number of disk
 #. /dev/<disk_name><decimal> represents the device number of partition - device
    number of disk plus the partition number
 #. /dev/<disk_name>p<decimal> - same as the above; this form is used when disk
    name of partitioned disk ends with a digit.
+
+When pstore/blk is built into the kernel, "blkdev" accepts the following variants:
+
+#. <hex_major><hex_minor> device number in hexadecimal representation,
+   with no leading 0x, for example b302.
 #. PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF represents the unique id of
    a partition if the partition table provides it. The UUID may be either an
    EFI/GPT UUID, or refer to an MSDOS partition using the format SSSSSSSS-PP,
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index dc5ff763d4146..c373e0d73e6c7 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -58,29 +58,7 @@ MODULE_PARM_DESC(best_effort, "use best effort to write (i.e. do not require sto
 
 /*
  * blkdev - the block device to use for pstore storage
- *
- * Usually, this will be a partition of a block device.
- *
- * blkdev accepts the following variants, when built as a module:
- * 1) /dev/<disk_name> represents the device number of disk
- * 2) /dev/<disk_name><decimal> represents the device number
- *    of partition - device number of disk plus the partition number
- * 3) /dev/<disk_name>p<decimal> - same as the above, that form is
- *    used when disk name of partitioned disk ends on a digit.
- *
- * blkdev accepts the following variants when built into the kernel:
- * 1) <hex_major><hex_minor> device number in hexadecimal representation,
- *    with no leading 0x, for example b302.
- * 2) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
- *    unique id of a partition if the partition table provides it.
- *    The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
- *    partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
- *    filled hex representation of the 32-bit "NT disk signature", and PP
- *    is a zero-filled hex representation of the 1-based partition number.
- * 3) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
- *    a partition with a known unique id.
- * 4) <major>:<minor> major and minor number of the device separated by
- *    a colon.
+ * See Documentation/admin-guide/pstore-blk.rst for details.
  */
 static char blkdev[80] = CONFIG_PSTORE_BLK_BLKDEV;
 module_param_string(blkdev, blkdev, 80, 0400);
-- 
GitLab


From 8f269102baf788aecfcbbc6313b6bceb54c9b990 Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Wed, 16 Jun 2021 17:10:24 +0800
Subject: [PATCH 2964/3804] net: stmmac: disable clocks in
 stmmac_remove_config_dt()

Platform drivers may call stmmac_probe_config_dt() to parse dt, could
call stmmac_remove_config_dt() in error handing after dt parsed, so need
disable clocks in stmmac_remove_config_dt().

Go through all platforms drivers which use stmmac_probe_config_dt(),
none of them disable clocks manually, so it's safe to disable them in
stmmac_remove_config_dt().

Fixes: commit d2ed0a7755fe ("net: ethernet: stmmac: fix of-node and fixed-link-phydev leaks")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 1e17a23d91185..a696ada013eb5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -622,6 +622,8 @@ error_pclk_get:
 void stmmac_remove_config_dt(struct platform_device *pdev,
 			     struct plat_stmmacenet_data *plat)
 {
+	clk_disable_unprepare(plat->stmmac_clk);
+	clk_disable_unprepare(plat->pclk);
 	of_node_put(plat->phy_node);
 	of_node_put(plat->mdio_node);
 }
-- 
GitLab


From 56b786d86694e079d8aad9b314e015cd4ac02a3d Mon Sep 17 00:00:00 2001
From: Dongliang Mu <mudongliangabcd@gmail.com>
Date: Wed, 16 Jun 2021 10:48:33 +0800
Subject: [PATCH 2965/3804] net: usb: fix possible use-after-free in
 smsc75xx_bind

The commit 46a8b29c6306 ("net: usb: fix memory leak in smsc75xx_bind")
fails to clean up the work scheduled in smsc75xx_reset->
smsc75xx_set_multicast, which leads to use-after-free if the work is
scheduled to start after the deallocation. In addition, this patch
also removes a dangling pointer - dev->data[0].

This patch calls cancel_work_sync to cancel the scheduled work and set
the dangling pointer to NULL.

Fixes: 46a8b29c6306 ("net: usb: fix memory leak in smsc75xx_bind")
Signed-off-by: Dongliang Mu <mudongliangabcd@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc75xx.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index b286993da67c9..13141dbfa3a83 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -1483,7 +1483,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	ret = smsc75xx_wait_ready(dev, 0);
 	if (ret < 0) {
 		netdev_warn(dev->net, "device not ready in smsc75xx_bind\n");
-		goto err;
+		goto free_pdata;
 	}
 
 	smsc75xx_init_mac_address(dev);
@@ -1492,7 +1492,7 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	ret = smsc75xx_reset(dev);
 	if (ret < 0) {
 		netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret);
-		goto err;
+		goto cancel_work;
 	}
 
 	dev->net->netdev_ops = &smsc75xx_netdev_ops;
@@ -1503,8 +1503,11 @@ static int smsc75xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE;
 	return 0;
 
-err:
+cancel_work:
+	cancel_work_sync(&pdata->set_multicast);
+free_pdata:
 	kfree(pdata);
+	dev->data[0] = 0;
 	return ret;
 }
 
@@ -1515,7 +1518,6 @@ static void smsc75xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 		cancel_work_sync(&pdata->set_multicast);
 		netif_dbg(dev, ifdown, dev->net, "free pdata\n");
 		kfree(pdata);
-		pdata = NULL;
 		dev->data[0] = 0;
 	}
 }
-- 
GitLab


From cb3cefe3f3f8af27c6076ef7d1f00350f502055d Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Wed, 16 Jun 2021 17:14:25 +0800
Subject: [PATCH 2966/3804] net: fec_ptp: add clock rate zero check

Add clock rate zero check to fix coverity issue of "divide by 0".

Fixes: commit 85bd1798b24a ("net: fec: fix spin_lock dead lock")
Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_ptp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 1753807cbf97e..7326a06128238 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -604,6 +604,10 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx)
 	fep->ptp_caps.enable = fec_ptp_enable;
 
 	fep->cycle_speed = clk_get_rate(fep->clk_ptp);
+	if (!fep->cycle_speed) {
+		fep->cycle_speed = NSEC_PER_SEC;
+		dev_err(&fep->pdev->dev, "clk_ptp clock rate is zero\n");
+	}
 	fep->ptp_inc = NSEC_PER_SEC / fep->cycle_speed;
 
 	spin_lock_init(&fep->tmreg_lock);
-- 
GitLab


From d23765646e71b43ed2b809930411ba5c0aadee7b Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Wed, 16 Jun 2021 17:14:26 +0800
Subject: [PATCH 2967/3804] net: fec_ptp: fix issue caused by refactor the
 fec_devtype

Commit da722186f654 ("net: fec: set GPR bit on suspend by DT configuration.")
refactor the fec_devtype, need adjust ptp driver accordingly.

Fixes: da722186f654 ("net: fec: set GPR bit on suspend by DT configuration.")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_ptp.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 7326a06128238..d71eac7e19249 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -215,15 +215,13 @@ static u64 fec_ptp_read(const struct cyclecounter *cc)
 {
 	struct fec_enet_private *fep =
 		container_of(cc, struct fec_enet_private, cc);
-	const struct platform_device_id *id_entry =
-		platform_get_device_id(fep->pdev);
 	u32 tempval;
 
 	tempval = readl(fep->hwp + FEC_ATIME_CTRL);
 	tempval |= FEC_T_CTRL_CAPTURE;
 	writel(tempval, fep->hwp + FEC_ATIME_CTRL);
 
-	if (id_entry->driver_data & FEC_QUIRK_BUG_CAPTURE)
+	if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
 		udelay(1);
 
 	return readl(fep->hwp + FEC_ATIME);
-- 
GitLab


From d8e2973029b8b2ce477b564824431f3385c77083 Mon Sep 17 00:00:00 2001
From: Chengyang Fan <cy.fan@huawei.com>
Date: Wed, 16 Jun 2021 17:59:25 +0800
Subject: [PATCH 2968/3804] net: ipv4: fix memory leak in ip_mc_add1_src

BUG: memory leak
unreferenced object 0xffff888101bc4c00 (size 32):
  comm "syz-executor527", pid 360, jiffies 4294807421 (age 19.329s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
    01 00 00 00 00 00 00 00 ac 14 14 bb 00 00 02 00 ................
  backtrace:
    [<00000000f17c5244>] kmalloc include/linux/slab.h:558 [inline]
    [<00000000f17c5244>] kzalloc include/linux/slab.h:688 [inline]
    [<00000000f17c5244>] ip_mc_add1_src net/ipv4/igmp.c:1971 [inline]
    [<00000000f17c5244>] ip_mc_add_src+0x95f/0xdb0 net/ipv4/igmp.c:2095
    [<000000001cb99709>] ip_mc_source+0x84c/0xea0 net/ipv4/igmp.c:2416
    [<0000000052cf19ed>] do_ip_setsockopt net/ipv4/ip_sockglue.c:1294 [inline]
    [<0000000052cf19ed>] ip_setsockopt+0x114b/0x30c0 net/ipv4/ip_sockglue.c:1423
    [<00000000477edfbc>] raw_setsockopt+0x13d/0x170 net/ipv4/raw.c:857
    [<00000000e75ca9bb>] __sys_setsockopt+0x158/0x270 net/socket.c:2117
    [<00000000bdb993a8>] __do_sys_setsockopt net/socket.c:2128 [inline]
    [<00000000bdb993a8>] __se_sys_setsockopt net/socket.c:2125 [inline]
    [<00000000bdb993a8>] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2125
    [<000000006a1ffdbd>] do_syscall_64+0x40/0x80 arch/x86/entry/common.c:47
    [<00000000b11467c4>] entry_SYSCALL_64_after_hwframe+0x44/0xae

In commit 24803f38a5c0 ("igmp: do not remove igmp souce list info when set
link down"), the ip_mc_clear_src() in ip_mc_destroy_dev() was removed,
because it was also called in igmpv3_clear_delrec().

Rough callgraph:

inetdev_destroy
-> ip_mc_destroy_dev
     -> igmpv3_clear_delrec
        -> ip_mc_clear_src
-> RCU_INIT_POINTER(dev->ip_ptr, NULL)

However, ip_mc_clear_src() called in igmpv3_clear_delrec() doesn't
release in_dev->mc_list->sources. And RCU_INIT_POINTER() assigns the
NULL to dev->ip_ptr. As a result, in_dev cannot be obtained through
inetdev_by_index() and then in_dev->mc_list->sources cannot be released
by ip_mc_del1_src() in the sock_close. Rough call sequence goes like:

sock_close
-> __sock_release
   -> inet_release
      -> ip_mc_drop_socket
         -> inetdev_by_index
         -> ip_mc_leave_src
            -> ip_mc_del_src
               -> ip_mc_del1_src

So we still need to call ip_mc_clear_src() in ip_mc_destroy_dev() to free
in_dev->mc_list->sources.

Fixes: 24803f38a5c0 ("igmp: do not remove igmp souce list info ...")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Chengyang Fan <cy.fan@huawei.com>
Acked-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 7b272bbed2b43..6b3c558a4f232 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1801,6 +1801,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 	while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
 		in_dev->mc_list = i->next_rcu;
 		in_dev->mc_count--;
+		ip_mc_clear_src(i);
 		ip_ma_put(i);
 	}
 }
-- 
GitLab


From c7d2ef5dd4b03ed0ee1d13bc0c55f9cf62d49bd6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 16 Jun 2021 06:42:01 -0700
Subject: [PATCH 2969/3804] net/packet: annotate accesses to po->bind

tpacket_snd(), packet_snd(), packet_getname() and packet_seq_show()
can read po->num without holding a lock. This means other threads
can change po->num at the same time.

KCSAN complained about this known fact [1]
Add READ_ONCE()/WRITE_ONCE() to address the issue.

[1] BUG: KCSAN: data-race in packet_do_bind / packet_sendmsg

write to 0xffff888131a0dcc0 of 2 bytes by task 24714 on cpu 0:
 packet_do_bind+0x3ab/0x7e0 net/packet/af_packet.c:3181
 packet_bind+0xc3/0xd0 net/packet/af_packet.c:3255
 __sys_bind+0x200/0x290 net/socket.c:1637
 __do_sys_bind net/socket.c:1648 [inline]
 __se_sys_bind net/socket.c:1646 [inline]
 __x64_sys_bind+0x3d/0x50 net/socket.c:1646
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff888131a0dcc0 of 2 bytes by task 24719 on cpu 1:
 packet_snd net/packet/af_packet.c:2899 [inline]
 packet_sendmsg+0x317/0x3570 net/packet/af_packet.c:3040
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg net/socket.c:674 [inline]
 ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350
 ___sys_sendmsg net/socket.c:2404 [inline]
 __sys_sendmsg+0x1ed/0x270 net/socket.c:2433
 __do_sys_sendmsg net/socket.c:2442 [inline]
 __se_sys_sendmsg net/socket.c:2440 [inline]
 __x64_sys_sendmsg+0x42/0x50 net/socket.c:2440
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x0000 -> 0x1200

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 24719 Comm: syz-executor.5 Not tainted 5.13.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 74e6e45a8e843..e91a36bdd1aba 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2683,7 +2683,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	}
 	if (likely(saddr == NULL)) {
 		dev	= packet_cached_dev_get(po);
-		proto	= po->num;
+		proto	= READ_ONCE(po->num);
 	} else {
 		err = -EINVAL;
 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2896,7 +2896,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 	if (likely(saddr == NULL)) {
 		dev	= packet_cached_dev_get(po);
-		proto	= po->num;
+		proto	= READ_ONCE(po->num);
 	} else {
 		err = -EINVAL;
 		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -3171,7 +3171,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
 			/* prevents packet_notifier() from calling
 			 * register_prot_hook()
 			 */
-			po->num = 0;
+			WRITE_ONCE(po->num, 0);
 			__unregister_prot_hook(sk, true);
 			rcu_read_lock();
 			dev_curr = po->prot_hook.dev;
@@ -3181,7 +3181,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
 		}
 
 		BUG_ON(po->running);
-		po->num = proto;
+		WRITE_ONCE(po->num, proto);
 		po->prot_hook.type = proto;
 
 		if (unlikely(unlisted)) {
@@ -3526,7 +3526,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
 
 	sll->sll_family = AF_PACKET;
 	sll->sll_ifindex = po->ifindex;
-	sll->sll_protocol = po->num;
+	sll->sll_protocol = READ_ONCE(po->num);
 	sll->sll_pkttype = 0;
 	rcu_read_lock();
 	dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
@@ -4414,7 +4414,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 	was_running = po->running;
 	num = po->num;
 	if (was_running) {
-		po->num = 0;
+		WRITE_ONCE(po->num, 0);
 		__unregister_prot_hook(sk, false);
 	}
 	spin_unlock(&po->bind_lock);
@@ -4449,7 +4449,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 
 	spin_lock(&po->bind_lock);
 	if (was_running) {
-		po->num = num;
+		WRITE_ONCE(po->num, num);
 		register_prot_hook(sk);
 	}
 	spin_unlock(&po->bind_lock);
@@ -4619,7 +4619,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
 			   s,
 			   refcount_read(&s->sk_refcnt),
 			   s->sk_type,
-			   ntohs(po->num),
+			   ntohs(READ_ONCE(po->num)),
 			   po->ifindex,
 			   po->running,
 			   atomic_read(&s->sk_rmem_alloc),
-- 
GitLab


From e032f7c9c7cefffcfb79b9fc16c53011d2d9d11f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 16 Jun 2021 06:42:02 -0700
Subject: [PATCH 2970/3804] net/packet: annotate accesses to po->ifindex

Like prior patch, we need to annotate lockless accesses to po->ifindex
For instance, packet_getname() is reading po->ifindex (twice) while
another thread is able to change po->ifindex.

KCSAN reported:

BUG: KCSAN: data-race in packet_do_bind / packet_getname

write to 0xffff888143ce3cbc of 4 bytes by task 25573 on cpu 1:
 packet_do_bind+0x420/0x7e0 net/packet/af_packet.c:3191
 packet_bind+0xc3/0xd0 net/packet/af_packet.c:3255
 __sys_bind+0x200/0x290 net/socket.c:1637
 __do_sys_bind net/socket.c:1648 [inline]
 __se_sys_bind net/socket.c:1646 [inline]
 __x64_sys_bind+0x3d/0x50 net/socket.c:1646
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff888143ce3cbc of 4 bytes by task 25578 on cpu 0:
 packet_getname+0x5b/0x1a0 net/packet/af_packet.c:3525
 __sys_getsockname+0x10e/0x1a0 net/socket.c:1887
 __do_sys_getsockname net/socket.c:1902 [inline]
 __se_sys_getsockname net/socket.c:1899 [inline]
 __x64_sys_getsockname+0x3e/0x50 net/socket.c:1899
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00000000 -> 0x00000001

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 25578 Comm: syz-executor.5 Not tainted 5.13.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e91a36bdd1aba..330ba68828e7d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3187,11 +3187,11 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
 		if (unlikely(unlisted)) {
 			dev_put(dev);
 			po->prot_hook.dev = NULL;
-			po->ifindex = -1;
+			WRITE_ONCE(po->ifindex, -1);
 			packet_cached_dev_reset(po);
 		} else {
 			po->prot_hook.dev = dev;
-			po->ifindex = dev ? dev->ifindex : 0;
+			WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
 			packet_cached_dev_assign(po, dev);
 		}
 	}
@@ -3505,7 +3505,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
 	uaddr->sa_family = AF_PACKET;
 	memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
 	rcu_read_lock();
-	dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
+	dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
 	if (dev)
 		strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
 	rcu_read_unlock();
@@ -3520,16 +3520,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
+	int ifindex;
 
 	if (peer)
 		return -EOPNOTSUPP;
 
+	ifindex = READ_ONCE(po->ifindex);
 	sll->sll_family = AF_PACKET;
-	sll->sll_ifindex = po->ifindex;
+	sll->sll_ifindex = ifindex;
 	sll->sll_protocol = READ_ONCE(po->num);
 	sll->sll_pkttype = 0;
 	rcu_read_lock();
-	dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
+	dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
 	if (dev) {
 		sll->sll_hatype = dev->type;
 		sll->sll_halen = dev->addr_len;
@@ -4108,7 +4110,7 @@ static int packet_notifier(struct notifier_block *this,
 				}
 				if (msg == NETDEV_UNREGISTER) {
 					packet_cached_dev_reset(po);
-					po->ifindex = -1;
+					WRITE_ONCE(po->ifindex, -1);
 					if (po->prot_hook.dev)
 						dev_put(po->prot_hook.dev);
 					po->prot_hook.dev = NULL;
@@ -4620,7 +4622,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
 			   refcount_read(&s->sk_refcnt),
 			   s->sk_type,
 			   ntohs(READ_ONCE(po->num)),
-			   po->ifindex,
+			   READ_ONCE(po->ifindex),
 			   po->running,
 			   atomic_read(&s->sk_rmem_alloc),
 			   from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
-- 
GitLab


From 0fd158b89b50b3a31c97a639ff496e1c59686e97 Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Wed, 16 Jun 2021 16:03:21 +0200
Subject: [PATCH 2971/3804] selftests: net: veth: make test compatible with
 dash

veth.sh is a shell script that uses /bin/sh; some distro (Ubuntu for
example) use dash as /bin/sh and in this case the test reports the
following error:

 # ./veth.sh: 21: local: -r: bad variable name
 # ./veth.sh: 21: local: -r: bad variable name

This happens because dash doesn't support the option "-r" with local.

Moreover, in case of missing bpf object, the script is exiting -1, that
is an illegal number for dash:

 exit: Illegal number: -1

Change the script to be compatible both with bash and dash and prevent
the errors above.

Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/veth.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 2fedc0781ce8c..11d7cdb898c03 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -18,7 +18,8 @@ ret=0
 
 cleanup() {
 	local ns
-	local -r jobs="$(jobs -p)"
+	local jobs
+	readonly jobs="$(jobs -p)"
 	[ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
 	rm -f $STATS
 
@@ -108,7 +109,7 @@ chk_gro() {
 
 if [ ! -f ../bpf/xdp_dummy.o ]; then
 	echo "Missing xdp_dummy helper. Build bpf selftest first"
-	exit -1
+	exit 1
 fi
 
 create_ns
-- 
GitLab


From a494bd642d9120648b06bb7d28ce6d05f55a7819 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 16 Jun 2021 07:47:15 -0700
Subject: [PATCH 2972/3804] net/af_unix: fix a data-race in unix_dgram_sendmsg
 / unix_release_sock

While unix_may_send(sk, osk) is called while osk is locked, it appears
unix_release_sock() can overwrite unix_peer() after this lock has been
released, making KCSAN unhappy.

Changing unix_release_sock() to access/change unix_peer()
before lock is released should fix this issue.

BUG: KCSAN: data-race in unix_dgram_sendmsg / unix_release_sock

write to 0xffff88810465a338 of 8 bytes by task 20852 on cpu 1:
 unix_release_sock+0x4ed/0x6e0 net/unix/af_unix.c:558
 unix_release+0x2f/0x50 net/unix/af_unix.c:859
 __sock_release net/socket.c:599 [inline]
 sock_close+0x6c/0x150 net/socket.c:1258
 __fput+0x25b/0x4e0 fs/file_table.c:280
 ____fput+0x11/0x20 fs/file_table.c:313
 task_work_run+0xae/0x130 kernel/task_work.c:164
 tracehook_notify_resume include/linux/tracehook.h:189 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:175 [inline]
 exit_to_user_mode_prepare+0x156/0x190 kernel/entry/common.c:209
 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
 syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:302
 do_syscall_64+0x56/0x90 arch/x86/entry/common.c:57
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff88810465a338 of 8 bytes by task 20888 on cpu 0:
 unix_may_send net/unix/af_unix.c:189 [inline]
 unix_dgram_sendmsg+0x923/0x1610 net/unix/af_unix.c:1712
 sock_sendmsg_nosec net/socket.c:654 [inline]
 sock_sendmsg net/socket.c:674 [inline]
 ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350
 ___sys_sendmsg net/socket.c:2404 [inline]
 __sys_sendmmsg+0x315/0x4b0 net/socket.c:2490
 __do_sys_sendmmsg net/socket.c:2519 [inline]
 __se_sys_sendmmsg net/socket.c:2516 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2516
 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0xffff888167905400 -> 0x0000000000000000

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 20888 Comm: syz-executor.0 Not tainted 5.13.0-rc5-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5a31307ceb76d..5d1192ceb1397 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -535,12 +535,14 @@ static void unix_release_sock(struct sock *sk, int embrion)
 	u->path.mnt = NULL;
 	state = sk->sk_state;
 	sk->sk_state = TCP_CLOSE;
+
+	skpair = unix_peer(sk);
+	unix_peer(sk) = NULL;
+
 	unix_state_unlock(sk);
 
 	wake_up_interruptible_all(&u->peer_wait);
 
-	skpair = unix_peer(sk);
-
 	if (skpair != NULL) {
 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 			unix_state_lock(skpair);
@@ -555,7 +557,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
 
 		unix_dgram_peer_wake_disconnect(sk, skpair);
 		sock_put(skpair); /* It may now die */
-		unix_peer(sk) = NULL;
 	}
 
 	/* Try to flush out this socket. Throw out buffers at least */
-- 
GitLab


From 1b29df0e2e802cb15a5196c936f494161ec97502 Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Wed, 16 Jun 2021 16:57:27 +0200
Subject: [PATCH 2973/3804] selftests: net: use bash to run udpgro_fwd test
 case

udpgro_fwd.sh contains many bash specific operators ("[[", "local -r"),
but it's using /bin/sh; in some distro /bin/sh is mapped to /bin/dash,
that doesn't support such operators.

Force the test to use /bin/bash explicitly and prevent false positive
test failures.

Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/udpgro_fwd.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index a8fa641362828..7f26591f236b9 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 readonly BASE="ns-$(mktemp -u XXXXXX)"
-- 
GitLab


From 99718abdc00e86e4f286dd836408e2834886c16e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Jun 2021 12:53:03 -0700
Subject: [PATCH 2974/3804] r8152: Avoid memcpy() over-reading of ETH_SS_STATS

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally reading across neighboring array fields.

The memcpy() is copying the entire structure, not just the first array.
Adjust the source argument so the compiler can do appropriate bounds
checking.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index f6abb2fbf9728..e25bfb7021ed4 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -8678,7 +8678,7 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	switch (stringset) {
 	case ETH_SS_STATS:
-		memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings));
+		memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings));
 		break;
 	}
 }
-- 
GitLab


From 224004fbb033600715dbd626bceec10bfd9c58bc Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Jun 2021 12:53:33 -0700
Subject: [PATCH 2975/3804] sh_eth: Avoid memcpy() over-reading of ETH_SS_STATS

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally reading across neighboring array fields.

The memcpy() is copying the entire structure, not just the first array.
Adjust the source argument so the compiler can do appropriate bounds
checking.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index c5b154868c1fc..713d3629b4c1c 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2287,7 +2287,7 @@ static void sh_eth_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 {
 	switch (stringset) {
 	case ETH_SS_STATS:
-		memcpy(data, *sh_eth_gstrings_stats,
+		memcpy(data, sh_eth_gstrings_stats,
 		       sizeof(sh_eth_gstrings_stats));
 		break;
 	}
-- 
GitLab


From da5ac772cfe2a03058b0accfac03fad60c46c24d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Jun 2021 12:53:59 -0700
Subject: [PATCH 2976/3804] r8169: Avoid memcpy() over-reading of ETH_SS_STATS

In preparation for FORTIFY_SOURCE performing compile-time and run-time
field bounds checking for memcpy(), memmove(), and memset(), avoid
intentionally reading across neighboring array fields.

The memcpy() is copying the entire structure, not just the first array.
Adjust the source argument so the compiler can do appropriate bounds
checking.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 2c89cde7da1e6..2ee72dc431cd5 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1671,7 +1671,7 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 {
 	switch(stringset) {
 	case ETH_SS_STATS:
-		memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings));
+		memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings));
 		break;
 	}
 }
-- 
GitLab


From 4cbbe34807938e6e494e535a68d5ff64edac3f20 Mon Sep 17 00:00:00 2001
From: Yifan Zhang <yifan1.zhang@amd.com>
Date: Thu, 10 Jun 2021 09:55:01 +0800
Subject: [PATCH 2977/3804] drm/amdgpu/gfx9: fix the doorbell missing when in
 CGPG issue.

If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC.
Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue.

Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 516467e962b72..c09225d065c27 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3673,8 +3673,12 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 	if (ring->use_doorbell) {
 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
 					(adev->doorbell_index.kiq * 2) << 2);
+		/* If GC has entered CGPG, ringing doorbell > first page doesn't
+		 * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
+		 * this issue.
+		 */
 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
-					(adev->doorbell_index.userqueue_end * 2) << 2);
+					(adev->doorbell.size - 4));
 	}
 
 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
-- 
GitLab


From 1c0b0efd148d5b24c4932ddb3fa03c8edd6097b3 Mon Sep 17 00:00:00 2001
From: Yifan Zhang <yifan1.zhang@amd.com>
Date: Thu, 10 Jun 2021 10:10:07 +0800
Subject: [PATCH 2978/3804] drm/amdgpu/gfx10: enlarge
 CP_MEC_DOORBELL_RANGE_UPPER to cover full doorbell.

If GC has entered CGPG, ringing doorbell > first page doesn't wakeup GC.
Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround this issue.

Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 0597aeb5f0e89..327b1f8213a8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6871,8 +6871,12 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
 	if (ring->use_doorbell) {
 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
 			(adev->doorbell_index.kiq * 2) << 2);
+		/* If GC has entered CGPG, ringing doorbell > first page doesn't
+		 * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
+		 * this issue.
+		 */
 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
-			(adev->doorbell_index.userqueue_end * 2) << 2);
+			(adev->doorbell.size - 4));
 	}
 
 	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
-- 
GitLab


From e73a99f3287a740a07d6618e9470f4d6cb217da8 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.ibm.com>
Date: Tue, 1 Jun 2021 08:27:29 +0200
Subject: [PATCH 2979/3804] s390/ap: Fix hanging ioctl caused by wrong msg
 counter

When a AP queue is switched to soft offline, all pending
requests are purged out of the pending requests list and
'received' by the upper layer like zcrypt device drivers.
This is also done for requests which are already enqueued
into the firmware queue. A request in a firmware queue
may eventually produce an response message, but there is
no waiting process any more. However, the response was
counted with the queue_counter and as this counter was
reset to 0 with the offline switch, the pending response
caused the queue_counter to get negative. The next request
increased this counter to 0 (instead of 1) which caused
the ap code to assume there is nothing to receive and so
the response for this valid request was never tried to
fetch from the firmware queue.

This all caused a queue to not work properly after a
switch offline/online and in the end processes to hang
forever when trying to send a crypto request after an
queue offline/online switch cicle.

Fixed by a) making sure the counter does not drop below 0
and b) on a successful enqueue of a message has at least
a value of 1.

Additionally a warning is emitted, when a reply can't get
assigned to a waiting process. This may be normal operation
(process had timeout or has been killed) but may give a
hint that something unexpected happened (like this odd
behavior described above).

Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 drivers/s390/crypto/ap_queue.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index ecefc25eff0c0..337353c9655ed 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -135,12 +135,13 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
 {
 	struct ap_queue_status status;
 	struct ap_message *ap_msg;
+	bool found = false;
 
 	status = ap_dqap(aq->qid, &aq->reply->psmid,
 			 aq->reply->msg, aq->reply->len);
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
-		aq->queue_count--;
+		aq->queue_count = max_t(int, 0, aq->queue_count - 1);
 		if (aq->queue_count > 0)
 			mod_timer(&aq->timeout,
 				  jiffies + aq->request_timeout);
@@ -150,8 +151,14 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq)
 			list_del_init(&ap_msg->list);
 			aq->pendingq_count--;
 			ap_msg->receive(aq, ap_msg, aq->reply);
+			found = true;
 			break;
 		}
+		if (!found) {
+			AP_DBF_WARN("%s unassociated reply psmid=0x%016llx on 0x%02x.%04x\n",
+				    __func__, aq->reply->psmid,
+				    AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
+		}
 		fallthrough;
 	case AP_RESPONSE_NO_PENDING_REPLY:
 		if (!status.queue_empty || aq->queue_count <= 0)
@@ -232,7 +239,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
 			   ap_msg->flags & AP_MSG_FLAG_SPECIAL);
 	switch (status.response_code) {
 	case AP_RESPONSE_NORMAL:
-		aq->queue_count++;
+		aq->queue_count = max_t(int, 1, aq->queue_count + 1);
 		if (aq->queue_count == 1)
 			mod_timer(&aq->timeout, jiffies + aq->request_timeout);
 		list_move_tail(&ap_msg->list, &aq->pendingq);
-- 
GitLab


From 94a4b8414d3e91104873007b659252f855ee344a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Mon, 8 Mar 2021 15:41:55 +0200
Subject: [PATCH 2980/3804] net/mlx5: Fix error path for set HCA defaults

In the case of the failure to execute mlx5_core_set_hca_defaults(),
we used wrong goto label to execute error unwind flow.

Fixes: 5bef709d76a2 ("net/mlx5: Enable host PF HCA after eswitch is initialized")
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a1d67bd7fb43b..0d0f63a27aba8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1161,7 +1161,7 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 	err = mlx5_core_set_hca_defaults(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed to set hca defaults\n");
-		goto err_sriov;
+		goto err_set_hca;
 	}
 
 	mlx5_vhca_event_start(dev);
@@ -1194,6 +1194,7 @@ err_ec:
 	mlx5_sf_hw_table_destroy(dev);
 err_vhca:
 	mlx5_vhca_event_stop(dev);
+err_set_hca:
 	mlx5_cleanup_fs(dev);
 err_fs:
 	mlx5_accel_tls_cleanup(dev);
-- 
GitLab


From 2058cc9c8041fde9c0bdd8e868c72b137cff8563 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sun, 21 Mar 2021 19:57:14 +0200
Subject: [PATCH 2981/3804] net/mlx5: Check that driver was probed prior
 attaching the device

The device can be requested to be attached despite being not probed.
This situation is possible if devlink reload races with module removal,
and the following kernel panic is an outcome of such race.

 mlx5_core 0000:00:09.0: firmware version: 4.7.9999
 mlx5_core 0000:00:09.0: 0.000 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x255 link)
 BUG: unable to handle page fault for address: fffffffffffffff0
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 3218067 P4D 3218067 PUD 321a067 PMD 0
 Oops: 0000 [#1] SMP KASAN NOPTI
 CPU: 7 PID: 250 Comm: devlink Not tainted 5.12.0-rc2+ #2836
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
 RIP: 0010:mlx5_attach_device+0x80/0x280 [mlx5_core]
 Code: f8 48 c1 e8 03 42 80 3c 38 00 0f 85 80 01 00 00 48 8b 45 68 48 8d 78 f0 48 89 fe 48 c1 ee 03 42 80 3c 3e 00 0f 85 70 01 00 00 <48> 8b 40 f0 48 85 c0 74 0d 48 89 ef ff d0 85 c0 0f 85 84 05 0e 00
 RSP: 0018:ffff8880129675f0 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffffffff827407f1
 RDX: 1ffff110011336cf RSI: 1ffffffffffffffe RDI: fffffffffffffff0
 RBP: ffff888008e0c000 R08: 0000000000000008 R09: ffffffffa0662ee7
 R10: fffffbfff40cc5dc R11: 0000000000000000 R12: ffff88800ea002e0
 R13: ffffed1001d459f7 R14: ffffffffa05ef4f8 R15: dffffc0000000000
 FS:  00007f51dfeaf740(0000) GS:ffff88806d5c0000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: fffffffffffffff0 CR3: 000000000bc82006 CR4: 0000000000370ea0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  mlx5_load_one+0x117/0x1d0 [mlx5_core]
  devlink_reload+0x2d5/0x520
  ? devlink_remote_reload_actions_performed+0x30/0x30
  ? mutex_trylock+0x24b/0x2d0
  ? devlink_nl_cmd_reload+0x62b/0x1070
  devlink_nl_cmd_reload+0x66d/0x1070
  ? devlink_reload+0x520/0x520
  ? devlink_nl_pre_doit+0x64/0x4d0
  genl_family_rcv_msg_doit+0x1e9/0x2f0
  ? mutex_lock_io_nested+0x1130/0x1130
  ? genl_family_rcv_msg_attrs_parse.constprop.0+0x240/0x240
  ? security_capable+0x51/0x90
  genl_rcv_msg+0x27f/0x4a0
  ? genl_get_cmd+0x3c0/0x3c0
  ? lock_acquire+0x1a9/0x6d0
  ? devlink_reload+0x520/0x520
  ? lock_release+0x6c0/0x6c0
  netlink_rcv_skb+0x11d/0x340
  ? genl_get_cmd+0x3c0/0x3c0
  ? netlink_ack+0x9f0/0x9f0
  ? lock_release+0x1f9/0x6c0
  genl_rcv+0x24/0x40
  netlink_unicast+0x433/0x700
  ? netlink_attachskb+0x730/0x730
  ? _copy_from_iter_full+0x178/0x650
  ? __alloc_skb+0x113/0x2b0
  netlink_sendmsg+0x6f1/0xbd0
  ? netlink_unicast+0x700/0x700
  ? netlink_unicast+0x700/0x700
  sock_sendmsg+0xb0/0xe0
  __sys_sendto+0x193/0x240
  ? __x64_sys_getpeername+0xb0/0xb0
  ? copy_page_range+0x2300/0x2300
  ? __up_read+0x1a1/0x7b0
  ? do_user_addr_fault+0x219/0xdc0
  __x64_sys_sendto+0xdd/0x1b0
  ? syscall_enter_from_user_mode+0x1d/0x50
  do_syscall_64+0x2d/0x40
  entry_SYSCALL_64_after_hwframe+0x44/0xae
 RIP: 0033:0x7f51dffb514a
 Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 c3 0f 1f 44 00 00 55 48 83 ec 30 44 89 4c
 RSP: 002b:00007ffcaef22e78 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f51dffb514a
 RDX: 0000000000000030 RSI: 000055750daf2440 RDI: 0000000000000003
 RBP: 000055750daf2410 R08: 00007f51e0081200 R09: 000000000000000c
 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
 Modules linked in: mlx5_core(-) ptp pps_core ib_ipoib rdma_ucm rdma_cm iw_cm ib_cm ib_umad ib_uverbs ib_core [last unloaded: mlx5_ib]
 CR2: fffffffffffffff0
 ---[ end trace 7789831bfe74fa42 ]---

Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index a9166cd850131..8de118adfb544 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -320,6 +320,16 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
 			}
 		} else {
 			adev = &priv->adev[i]->adev;
+
+			/* Pay attention that this is not PCI driver that
+			 * mlx5_core_dev is connected, but auxiliary driver.
+			 *
+			 * Here we can race of module unload with devlink
+			 * reload, but we don't need to take extra lock because
+			 * we are holding global mlx5_intf_mutex.
+			 */
+			if (!adev->dev.driver)
+				continue;
 			adrv = to_auxiliary_drv(adev->dev.driver);
 
 			if (adrv->resume)
@@ -350,6 +360,10 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
 			continue;
 
 		adev = &priv->adev[i]->adev;
+		/* Auxiliary driver was unbind manually through sysfs */
+		if (!adev->dev.driver)
+			goto skip_suspend;
+
 		adrv = to_auxiliary_drv(adev->dev.driver);
 
 		if (adrv->suspend) {
@@ -357,6 +371,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev)
 			continue;
 		}
 
+skip_suspend:
 		del_adev(&priv->adev[i]->adev);
 		priv->adev[i] = NULL;
 	}
-- 
GitLab


From bbc8222dc49db8d49add0f27bcac33f4b92193dc Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 8 Jun 2021 19:14:08 +0300
Subject: [PATCH 2982/3804] net/mlx5: E-Switch, Read PF mac address

External controller PF's MAC address is not read from the device during
vport setup. Fail to read this results in showing all zeros to user
while the factory programmed MAC is a valid value.

$ devlink port show eth1 -jp
{
    "port": {
        "pci/0000:03:00.0/196608": {
            "type": "eth",
            "netdev": "eth1",
            "flavour": "pcipf",
            "controller": 1,
            "pfnum": 0,
            "splittable": false,
            "function": {
                "hw_addr": "00:00:00:00:00:00"
            }
        }
    }
}

Hence, read it when enabling a vport.

After the fix,

$ devlink port show eth1 -jp
{
    "port": {
        "pci/0000:03:00.0/196608": {
            "type": "eth",
            "netdev": "eth1",
            "flavour": "pcipf",
            "controller": 1,
            "pfnum": 0,
            "splittable": false,
            "function": {
                "hw_addr": "98:03:9b:a0:60:11"
            }
        }
    }
}

Fixes: f099fde16db3 ("net/mlx5: E-switch, Support querying port function mac address")
Signed-off-by: Bodong Wang <bodong@nvidia.com>
Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index b88705a3a1a8e..97e6cb6f13c14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1054,6 +1054,12 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, u16 vport_num,
 			goto err_vhca_mapping;
 	}
 
+	/* External controller host PF has factory programmed MAC.
+	 * Read it from the device.
+	 */
+	if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF)
+		mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac);
+
 	esw_vport_change_handle_locked(vport);
 
 	esw->enabled_vports++;
-- 
GitLab


From ca36fc4d77b35b8d142cf1ed0eae5ec2e071dc3c Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 8 Jun 2021 19:03:24 +0300
Subject: [PATCH 2983/3804] net/mlx5: E-Switch, Allow setting GUID for host PF
 vport

E-switch should be able to set the GUID of host PF vport.
Currently it returns an error. This results in below error
when user attempts to configure MAC address of the PF of an
external controller.

$ devlink port function set pci/0000:03:00.0/196608 \
   hw_addr 00:00:00:11:22:33

mlx5_core 0000:03:00.0: mlx5_esw_set_vport_mac_locked:1876:(pid 6715):\
"Failed to set vport 0 node guid, err = -22.
RDMA_CM will not function properly for this VF."

Check for zero vport is no longer needed.

Fixes: 330077d14de1 ("net/mlx5: E-switch, Supporting setting devlink port function mac address")
Signed-off-by: Yuval Avnery <yuvalav@nvidia.com>
Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Bodong Wang <bodong@nvidia.com>
Reviewed-by: Alaa Hleihel <alaa@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 457ad42eaa2a2..4c1440a95ad75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -465,8 +465,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 	void *in;
 	int err;
 
-	if (!vport)
-		return -EINVAL;
 	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
 		return -EACCES;
 
-- 
GitLab


From c7d6c19b3bde66d7aebbe93e0f9e6d9ff57fc3fa Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Thu, 10 Jun 2021 18:39:53 +0300
Subject: [PATCH 2984/3804] net/mlx5: SF_DEV, remove SF device on invalid state

When auxiliary bus autoprobe is disabled and SF is in ACTIVE state,
on SF port deletion it transitions from ACTIVE->ALLOCATED->INVALID.

When VHCA event handler queries the state, it is already transition
to INVALID state.

In this scenario, event handler missed to delete the SF device.

Fix it by deleting the SF when SF state is INVALID.

Fixes: 90d010b8634b ("net/mlx5: SF, Add auxiliary device support")
Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Vu Pham <vuhuong@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
index 6a0c6f965ad16..fa0288afc0dd4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -163,6 +163,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
 	sf_index = event->function_id - base_id;
 	sf_dev = xa_load(&table->devices, sf_index);
 	switch (event->new_vhca_state) {
+	case MLX5_VHCA_STATE_INVALID:
 	case MLX5_VHCA_STATE_ALLOCATED:
 		if (sf_dev)
 			mlx5_sf_dev_del(table->dev, sf_dev, sf_index);
-- 
GitLab


From 65fb7d109abe3a1a9f1c2d3ba7e1249bc978d5f0 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@nvidia.com>
Date: Tue, 1 Jun 2021 18:10:06 +0300
Subject: [PATCH 2985/3804] net/mlx5: DR, Fix STEv1 incorrect L3 decapsulation
 padding

Decapsulation L3 on small inner packets which are less than
64 Bytes was done incorrectly. In small packets there is an
extra padding added in L2 which should not be included in L3
length. The issue was that after decapL3 the extra L2 padding
caused an update on the L3 length.

To avoid this issue the new header is pushed to the beginning
of the packet (offset 0) which should not cause a HW reparse
and update the L3 length.

Fixes: c349b4137cfd ("net/mlx5: DR, Add STEv1 modify header logic")
Reviewed-by: Erez Shitrit <erezsh@nvidia.com>
Reviewed-by: Yevgeny Kliteynik <kliteyn@nvidia.com>
Signed-off-by: Alex Vesker <valex@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/steering/dr_ste_v1.c   | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 054c2e2b65548..7466f016375cd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -694,7 +694,11 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
 	if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM)
 		return -EINVAL;
 
-	memcpy(padded_data, data, data_sz);
+	inline_data_sz =
+		MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+
+	/* Add an alignment padding  */
+	memcpy(padded_data + data_sz % inline_data_sz, data, data_sz);
 
 	/* Remove L2L3 outer headers */
 	MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id,
@@ -706,32 +710,34 @@ static int dr_ste_v1_set_action_decap_l3_list(void *data,
 	hw_action += DR_STE_ACTION_DOUBLE_SZ;
 	used_actions++; /* Remove and NOP are a single double action */
 
-	inline_data_sz =
-		MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+	/* Point to the last dword of the header */
+	data_ptr += (data_sz / inline_data_sz) * inline_data_sz;
 
-	/* Add the new header inline + 2 extra bytes */
+	/* Add the new header using inline action 4Byte at a time, the header
+	 * is added in reversed order to the beginning of the packet to avoid
+	 * incorrect parsing by the HW. Since header is 14B or 18B an extra
+	 * two bytes are padded and later removed.
+	 */
 	for (i = 0; i < data_sz / inline_data_sz + 1; i++) {
 		void *addr_inline;
 
 		MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id,
 			 DR_STE_V1_ACTION_ID_INSERT_INLINE);
 		/* The hardware expects here offset to words (2 bytes) */
-		MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset,
-			 i * 2);
+		MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0);
 
 		/* Copy bytes one by one to avoid endianness problem */
 		addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1,
 					   hw_action, inline_data);
-		memcpy(addr_inline, data_ptr, inline_data_sz);
+		memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz);
 		hw_action += DR_STE_ACTION_DOUBLE_SZ;
-		data_ptr += inline_data_sz;
 		used_actions++;
 	}
 
-	/* Remove 2 extra bytes */
+	/* Remove first 2 extra bytes */
 	MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id,
 		 DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
-	MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, data_sz / 2);
+	MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0);
 	/* The hardware expects here size in words (2 bytes) */
 	MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1);
 	used_actions++;
-- 
GitLab


From a5ae8fc9058e37437c8c1f82b3d412b4abd1b9e6 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dlinkin@nvidia.com>
Date: Fri, 14 May 2021 11:14:19 +0300
Subject: [PATCH 2986/3804] net/mlx5e: Don't create devices during unload flow

Running devlink reload command for port in switchdev mode cause
resources to corrupt: driver can't release allocated EQ and reclaim
memory pages, because "rdma" auxiliary device had add CQs which blocks
EQ from deletion.
Erroneous sequence happens during reload-down phase, and is following:

1. detach device - suspends auxiliary devices which support it, destroys
   others. During this step "eth-rep" and "rdma-rep" are destroyed,
   "eth" - suspended.
2. disable SRIOV - moves device to legacy mode; as part of disablement -
   rescans drivers. This step adds "rdma" auxiliary device.
3. destroy EQ table - <failure>.

Driver shouldn't create any device during unload flows. To handle that
implement MLX5_PRIV_FLAGS_DETACH flag, set it on device detach and unset
on device attach. If flag is set do no-op on drivers rescan.

Fixes: a925b5e309c9 ("net/mlx5: Register mlx5 devices to auxiliary virtual bus")
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c | 4 ++++
 include/linux/mlx5/driver.h                   | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 8de118adfb544..ceebfc20f65e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -303,6 +303,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev)
 	int ret = 0, i;
 
 	mutex_lock(&mlx5_intf_mutex);
+	priv->flags &= ~MLX5_PRIV_FLAGS_DETACH;
 	for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) {
 		if (!priv->adev[i]) {
 			bool is_supported = false;
@@ -375,6 +376,7 @@ skip_suspend:
 		del_adev(&priv->adev[i]->adev);
 		priv->adev[i] = NULL;
 	}
+	priv->flags |= MLX5_PRIV_FLAGS_DETACH;
 	mutex_unlock(&mlx5_intf_mutex);
 }
 
@@ -463,6 +465,8 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
 	struct mlx5_priv *priv = &dev->priv;
 
 	lockdep_assert_held(&mlx5_intf_mutex);
+	if (priv->flags & MLX5_PRIV_FLAGS_DETACH)
+		return 0;
 
 	delete_drivers(dev);
 	if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 020a8f7fdbdd4..f8902bcd91e26 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -542,6 +542,10 @@ struct mlx5_core_roce {
 enum {
 	MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0,
 	MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1,
+	/* Set during device detach to block any further devices
+	 * creation/deletion on drivers rescan. Unset during device attach.
+	 */
+	MLX5_PRIV_FLAGS_DETACH = 1 << 2,
 };
 
 struct mlx5_adev {
-- 
GitLab


From 0232fc2ddcf4ffe01069fd1aa07922652120f44a Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Thu, 10 Jun 2021 14:20:28 +0300
Subject: [PATCH 2987/3804] net/mlx5: Reset mkey index on creation

Reset only the index part of the mkey and keep the variant part. On
devlink reload, driver recreates mkeys, so the mkey index may change.
Trying to preserve the variant part of the mkey, driver mistakenly
merged the mkey index with current value. In case of a devlink reload,
current value of index part is dirty, so the index may be corrupted.

Fixes: 54c62e13ad76 ("{IB,net}/mlx5: Setup mkey variant before mr create command invocation")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Signed-off-by: Amir Tzin <amirtz@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 50af84e76fb6a..174f71ed52800 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -54,7 +54,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
 	mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
 	mkey->size = MLX5_GET64(mkc, mkc, len);
-	mkey->key |= mlx5_idx_to_mkey(mkey_index);
+	mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
 	mkey->pd = MLX5_GET(mkc, mkc, pd);
 	init_waitqueue_head(&mkey->wait);
 
-- 
GitLab


From 1d1f6cc5818c750ac69473e4951e7165913fbf16 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 15 Jun 2021 10:19:13 -0700
Subject: [PATCH 2988/3804] pstore/blk: Include zone in pstore_device_info

Information was redundant between struct pstore_zone_info and struct
pstore_device_info. Use struct pstore_zone_info, with member name "zone".

Additionally untangle the logic for the "best effort" block device
instance.

Signed-off-by: Kees Cook <keescook@chromium.org>
Fixed-by: Pu Lehui <pulehui@huawei.com>
Link: https://lore.kernel.org/lkml/20210617005424.182305-1-pulehui@huawei.com
---
 drivers/mtd/mtdpstore.c    |  10 +--
 fs/pstore/blk.c            | 143 ++++++++++++++++++++-----------------
 include/linux/pstore_blk.h |  27 +------
 3 files changed, 87 insertions(+), 93 deletions(-)

diff --git a/drivers/mtd/mtdpstore.c b/drivers/mtd/mtdpstore.c
index a3ae8778f6a9b..e13d42c0acb0f 100644
--- a/drivers/mtd/mtdpstore.c
+++ b/drivers/mtd/mtdpstore.c
@@ -423,13 +423,13 @@ static void mtdpstore_notify_add(struct mtd_info *mtd)
 	longcnt = BITS_TO_LONGS(div_u64(mtd->size, mtd->erasesize));
 	cxt->badmap = kcalloc(longcnt, sizeof(long), GFP_KERNEL);
 
-	cxt->dev.total_size = mtd->size;
 	/* just support dmesg right now */
 	cxt->dev.flags = PSTORE_FLAGS_DMESG;
-	cxt->dev.read = mtdpstore_read;
-	cxt->dev.write = mtdpstore_write;
-	cxt->dev.erase = mtdpstore_erase;
-	cxt->dev.panic_write = mtdpstore_panic_write;
+	cxt->dev.zone.read = mtdpstore_read;
+	cxt->dev.zone.write = mtdpstore_write;
+	cxt->dev.zone.erase = mtdpstore_erase;
+	cxt->dev.zone.panic_write = mtdpstore_panic_write;
+	cxt->dev.zone.total_size = mtd->size;
 
 	ret = register_pstore_device(&cxt->dev);
 	if (ret) {
diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
index c373e0d73e6c7..04ce58c939a0b 100644
--- a/fs/pstore/blk.c
+++ b/fs/pstore/blk.c
@@ -70,7 +70,7 @@ MODULE_PARM_DESC(blkdev, "block device for pstore storage");
  */
 static DEFINE_MUTEX(pstore_blk_lock);
 static struct file *psblk_file;
-static struct pstore_zone_info *pstore_zone_info;
+static struct pstore_device_info *pstore_device_info;
 
 #define check_size(name, alignsize) ({				\
 	long _##name_ = (name);					\
@@ -91,7 +91,7 @@ static struct pstore_zone_info *pstore_zone_info;
 		_##name_ = 0;					\
 	/* Synchronize module parameters with resuls. */	\
 	name = _##name_ / 1024;					\
-	pstore_zone_info->name = _##name_;			\
+	dev->zone.name = _##name_;				\
 }
 
 static int __register_pstore_device(struct pstore_device_info *dev)
@@ -104,50 +104,42 @@ static int __register_pstore_device(struct pstore_device_info *dev)
 		pr_err("NULL device info\n");
 		return -EINVAL;
 	}
-	if (!dev->total_size) {
+	if (!dev->zone.total_size) {
 		pr_err("zero sized device\n");
 		return -EINVAL;
 	}
-	if (!dev->read) {
+	if (!dev->zone.read) {
 		pr_err("no read handler for device\n");
 		return -EINVAL;
 	}
-	if (!dev->write) {
+	if (!dev->zone.write) {
 		pr_err("no write handler for device\n");
 		return -EINVAL;
 	}
 
 	/* someone already registered before */
-	if (pstore_zone_info)
+	if (pstore_device_info)
 		return -EBUSY;
 
-	pstore_zone_info = kzalloc(sizeof(struct pstore_zone_info), GFP_KERNEL);
-	if (!pstore_zone_info)
-		return -ENOMEM;
-
 	/* zero means not limit on which backends to attempt to store. */
 	if (!dev->flags)
 		dev->flags = UINT_MAX;
 
+	/* Copy in module parameters. */
 	verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG);
 	verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG);
 	verify_size(console_size, 4096, dev->flags & PSTORE_FLAGS_CONSOLE);
 	verify_size(ftrace_size, 4096, dev->flags & PSTORE_FLAGS_FTRACE);
+	dev->zone.max_reason = max_reason;
+
+	/* Initialize required zone ownership details. */
+	dev->zone.name = KBUILD_MODNAME;
+	dev->zone.owner = THIS_MODULE;
+
+	ret = register_pstore_zone(&dev->zone);
+	if (ret == 0)
+		pstore_device_info = dev;
 
-	pstore_zone_info->total_size = dev->total_size;
-	pstore_zone_info->max_reason = max_reason;
-	pstore_zone_info->read = dev->read;
-	pstore_zone_info->write = dev->write;
-	pstore_zone_info->erase = dev->erase;
-	pstore_zone_info->panic_write = dev->panic_write;
-	pstore_zone_info->name = KBUILD_MODNAME;
-	pstore_zone_info->owner = THIS_MODULE;
-
-	ret = register_pstore_zone(pstore_zone_info);
-	if (ret) {
-		kfree(pstore_zone_info);
-		pstore_zone_info = NULL;
-	}
 	return ret;
 }
 /**
@@ -174,10 +166,9 @@ EXPORT_SYMBOL_GPL(register_pstore_device);
 static void __unregister_pstore_device(struct pstore_device_info *dev)
 {
 	lockdep_assert_held(&pstore_blk_lock);
-	if (pstore_zone_info && pstore_zone_info->read == dev->read) {
-		unregister_pstore_zone(pstore_zone_info);
-		kfree(pstore_zone_info);
-		pstore_zone_info = NULL;
+	if (pstore_device_info && pstore_device_info == dev) {
+		unregister_pstore_zone(&dev->zone);
+		pstore_device_info = NULL;
 	}
 }
 
@@ -211,12 +202,9 @@ static ssize_t psblk_generic_blk_write(const char *buf, size_t bytes,
 /*
  * This takes its configuration only from the module parameters now.
  */
-static int __register_pstore_blk(const char *devpath)
+static int __register_pstore_blk(struct pstore_device_info *dev,
+				 const char *devpath)
 {
-	struct pstore_device_info dev = {
-		.read = psblk_generic_blk_read,
-		.write = psblk_generic_blk_write,
-	};
 	struct inode *inode;
 	int ret = -ENODEV;
 
@@ -236,9 +224,9 @@ static int __register_pstore_blk(const char *devpath)
 	}
 
 	inode = I_BDEV(psblk_file->f_mapping->host)->bd_inode;
-	dev.total_size = i_size_read(inode);
+	dev->zone.total_size = i_size_read(inode);
 
-	ret = __register_pstore_device(&dev);
+	ret = __register_pstore_device(dev);
 	if (ret)
 		goto err_fput;
 
@@ -252,18 +240,6 @@ err:
 	return ret;
 }
 
-static void __unregister_pstore_blk(struct file *device)
-{
-	struct pstore_device_info dev = { .read = psblk_generic_blk_read };
-
-	lockdep_assert_held(&pstore_blk_lock);
-	if (psblk_file && psblk_file == device) {
-		__unregister_pstore_device(&dev);
-		fput(psblk_file);
-		psblk_file = NULL;
-	}
-}
-
 /* get information of pstore/blk */
 int pstore_blk_get_config(struct pstore_blk_config *info)
 {
@@ -308,18 +284,63 @@ static inline const char *early_boot_devpath(const char *initial_devname)
 }
 #endif
 
+static int __init __best_effort_init(void)
+{
+	struct pstore_device_info *best_effort_dev;
+	int ret;
+
+	/* No best-effort mode requested. */
+	if (!best_effort)
+		return 0;
+
+	/* Reject an empty blkdev. */
+	if (!blkdev[0]) {
+		pr_err("blkdev empty with best_effort=Y\n");
+		return -EINVAL;
+	}
+
+	best_effort_dev = kzalloc(sizeof(*best_effort_dev), GFP_KERNEL);
+	if (!best_effort_dev)
+		return -ENOMEM;
+
+	best_effort_dev->zone.read = psblk_generic_blk_read;
+	best_effort_dev->zone.write = psblk_generic_blk_write;
+
+	ret = __register_pstore_blk(best_effort_dev,
+				    early_boot_devpath(blkdev));
+	if (ret)
+		kfree(best_effort_dev);
+	else
+		pr_info("attached %s (%zu) (no dedicated panic_write!)\n",
+			blkdev, best_effort_dev->zone.total_size);
+
+	return ret;
+}
+
+static void __exit __best_effort_exit(void)
+{
+	/*
+	 * Currently, the only user of psblk_file is best_effort, so
+	 * we can assume that pstore_device_info is associated with it.
+	 * Once there are "real" blk devices, there will need to be a
+	 * dedicated pstore_blk_info, etc.
+	 */
+	if (psblk_file) {
+		struct pstore_device_info *dev = pstore_device_info;
+
+		__unregister_pstore_device(dev);
+		kfree(dev);
+		fput(psblk_file);
+		psblk_file = NULL;
+	}
+}
+
 static int __init pstore_blk_init(void)
 {
-	int ret = 0;
+	int ret;
 
 	mutex_lock(&pstore_blk_lock);
-	if (!pstore_zone_info && best_effort && blkdev[0]) {
-		ret = __register_pstore_blk(early_boot_devpath(blkdev));
-		if (ret == 0 && pstore_zone_info)
-			pr_info("attached %s:%s (%zu) (no dedicated panic_write!)\n",
-				pstore_zone_info->name, blkdev,
-				pstore_zone_info->total_size);
-	}
+	ret = __best_effort_init();
 	mutex_unlock(&pstore_blk_lock);
 
 	return ret;
@@ -329,15 +350,9 @@ late_initcall(pstore_blk_init);
 static void __exit pstore_blk_exit(void)
 {
 	mutex_lock(&pstore_blk_lock);
-	if (psblk_file)
-		__unregister_pstore_blk(psblk_file);
-	else {
-		struct pstore_device_info dev = { };
-
-		if (pstore_zone_info)
-			dev.read = pstore_zone_info->read;
-		__unregister_pstore_device(&dev);
-	}
+	__best_effort_exit();
+	/* If we've been asked to unload, unregister any remaining device. */
+	__unregister_pstore_device(pstore_device_info);
 	mutex_unlock(&pstore_blk_lock);
 }
 module_exit(pstore_blk_exit);
diff --git a/include/linux/pstore_blk.h b/include/linux/pstore_blk.h
index 99564f93d7748..924ca07aafbd9 100644
--- a/include/linux/pstore_blk.h
+++ b/include/linux/pstore_blk.h
@@ -10,36 +10,15 @@
 /**
  * struct pstore_device_info - back-end pstore/blk driver structure.
  *
- * @total_size: The total size in bytes pstore/blk can use. It must be greater
- *		than 4096 and be multiple of 4096.
  * @flags:	Refer to macro starting with PSTORE_FLAGS defined in
  *		linux/pstore.h. It means what front-ends this device support.
  *		Zero means all backends for compatible.
- * @read:	The general read operation. Both of the function parameters
- *		@size and @offset are relative value to bock device (not the
- *		whole disk).
- *		On success, the number of bytes should be returned, others
- *		means error.
- * @write:	The same as @read, but the following error number:
- *		-EBUSY means try to write again later.
- *		-ENOMSG means to try next zone.
- * @erase:	The general erase operation for device with special removing
- *		job. Both of the function parameters @size and @offset are
- *		relative value to storage.
- *		Return 0 on success and others on failure.
- * @panic_write:The write operation only used for panic case. It's optional
- *		if you do not care panic log. The parameters are relative
- *		value to storage.
- *		On success, the number of bytes should be returned, others
- *		excluding -ENOMSG mean error. -ENOMSG means to try next zone.
+ * @zone:	The struct pstore_zone_info details.
+ *
  */
 struct pstore_device_info {
-	unsigned long total_size;
 	unsigned int flags;
-	pstore_zone_read_op read;
-	pstore_zone_write_op write;
-	pstore_zone_erase_op erase;
-	pstore_zone_write_op panic_write;
+	struct pstore_zone_info zone;
 };
 
 int  register_pstore_device(struct pstore_device_info *dev);
-- 
GitLab


From 3f52c9aef27b0427ff4091f3d08095219e1046af Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Tue, 8 Jun 2021 14:23:43 -0700
Subject: [PATCH 2989/3804] crypto: marvell/cesa - change FPGA indirect article
 to an

Change use of 'a fpga' to 'an fpga'

Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/cesa/cesa.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/marvell/cesa/cesa.h b/drivers/crypto/marvell/cesa/cesa.h
index c1007f2ba79c8..d215a6bed6bc7 100644
--- a/drivers/crypto/marvell/cesa/cesa.h
+++ b/drivers/crypto/marvell/cesa/cesa.h
@@ -66,7 +66,7 @@
 #define CESA_SA_ST_ACT_1			BIT(1)
 
 /*
- * CESA_SA_FPGA_INT_STATUS looks like a FPGA leftover and is documented only
+ * CESA_SA_FPGA_INT_STATUS looks like an FPGA leftover and is documented only
  * in Errata 4.12. It looks like that it was part of an IRQ-controller in FPGA
  * and someone forgot to remove  it while switching to the core and moving to
  * CESA_SA_INT_STATUS.
-- 
GitLab


From 87c8ba5cd7f99b1c05589c455703f54e92f43ed0 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 9 Jun 2021 15:15:26 +0800
Subject: [PATCH 2990/3804] crypto: ccp - Use list_move_tail instead of
 list_del/list_add_tail in ccp-dmaengine.c

Using list_move_tail() instead of list_del() + list_add_tail() in ccp-dmaengine.c.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-dmaengine.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index 0770a83bf1a57..d718db224be42 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -307,8 +307,7 @@ static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc)
 	spin_lock_irqsave(&chan->lock, flags);
 
 	cookie = dma_cookie_assign(tx_desc);
-	list_del(&desc->entry);
-	list_add_tail(&desc->entry, &chan->pending);
+	list_move_tail(&desc->entry, &chan->pending);
 
 	spin_unlock_irqrestore(&chan->lock, flags);
 
-- 
GitLab


From 22ca9f4aaf431a9413dcc115dd590123307f274f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 10 Jun 2021 08:21:50 +0200
Subject: [PATCH 2991/3804] crypto: shash - avoid comparing pointers to
 exported functions under CFI

crypto_shash_alg_has_setkey() is implemented by testing whether the
.setkey() member of a struct shash_alg points to the default version,
called shash_no_setkey(). As crypto_shash_alg_has_setkey() is a static
inline, this requires shash_no_setkey() to be exported to modules.

Unfortunately, when building with CFI, function pointers are routed
via CFI stubs which are private to each module (or to the kernel proper)
and so this function pointer comparison may fail spuriously.

Let's fix this by turning crypto_shash_alg_has_setkey() into an out of
line function.

Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c                 | 18 +++++++++++++++---
 include/crypto/internal/hash.h |  8 +-------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/crypto/shash.c b/crypto/shash.c
index 2e3433ad97629..0a0a50cb694f0 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -20,12 +20,24 @@
 
 static const struct crypto_type crypto_shash_type;
 
-int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
-		    unsigned int keylen)
+static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+			   unsigned int keylen)
 {
 	return -ENOSYS;
 }
-EXPORT_SYMBOL_GPL(shash_no_setkey);
+
+/*
+ * Check whether an shash algorithm has a setkey function.
+ *
+ * For CFI compatibility, this must not be an inline function.  This is because
+ * when CFI is enabled, modules won't get the same address for shash_no_setkey
+ * (if it were exported, which inlining would require) as the core kernel will.
+ */
+bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
+{
+	return alg->setkey != shash_no_setkey;
+}
+EXPORT_SYMBOL_GPL(crypto_shash_alg_has_setkey);
 
 static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
 				  unsigned int keylen)
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 0a288dddcf5be..25806141db591 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -75,13 +75,7 @@ void crypto_unregister_ahashes(struct ahash_alg *algs, int count);
 int ahash_register_instance(struct crypto_template *tmpl,
 			    struct ahash_instance *inst);
 
-int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
-		    unsigned int keylen);
-
-static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
-{
-	return alg->setkey != shash_no_setkey;
-}
+bool crypto_shash_alg_has_setkey(struct shash_alg *alg);
 
 static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg)
 {
-- 
GitLab


From 10ff9976d06fc6a11f512755d500ab2860cbe650 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Fri, 11 Jun 2021 10:01:00 +0800
Subject: [PATCH 2992/3804] crypto: api - remove CRYPTOA_U32 and related
 functions

According to the advice of Eric and Herbert, type CRYPTOA_U32
has been unused for over a decade, so remove the code related to
CRYPTOA_U32.

After removing CRYPTOA_U32, the type of the variable attrs can be
changed from union to struct.

Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         | 18 ------------------
 crypto/algboss.c        | 31 ++++++-------------------------
 include/crypto/algapi.h |  1 -
 include/linux/crypto.h  |  5 -----
 4 files changed, 6 insertions(+), 49 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index fdabf2675b63f..43f999dba4dc0 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -868,24 +868,6 @@ const char *crypto_attr_alg_name(struct rtattr *rta)
 }
 EXPORT_SYMBOL_GPL(crypto_attr_alg_name);
 
-int crypto_attr_u32(struct rtattr *rta, u32 *num)
-{
-	struct crypto_attr_u32 *nu32;
-
-	if (!rta)
-		return -ENOENT;
-	if (RTA_PAYLOAD(rta) < sizeof(*nu32))
-		return -EINVAL;
-	if (rta->rta_type != CRYPTOA_U32)
-		return -EINVAL;
-
-	nu32 = RTA_DATA(rta);
-	*num = nu32->num;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_attr_u32);
-
 int crypto_inst_setname(struct crypto_instance *inst, const char *name,
 			struct crypto_alg *alg)
 {
diff --git a/crypto/algboss.c b/crypto/algboss.c
index 5ebccbd6b74ed..1814d2c5188a3 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -28,16 +28,9 @@ struct cryptomgr_param {
 		struct crypto_attr_type data;
 	} type;
 
-	union {
+	struct {
 		struct rtattr attr;
-		struct {
-			struct rtattr attr;
-			struct crypto_attr_alg data;
-		} alg;
-		struct {
-			struct rtattr attr;
-			struct crypto_attr_u32 data;
-		} nu32;
+		struct crypto_attr_alg data;
 	} attrs[CRYPTO_MAX_ATTRS];
 
 	char template[CRYPTO_MAX_ALG_NAME];
@@ -104,12 +97,10 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
 
 	i = 0;
 	for (;;) {
-		int notnum = 0;
-
 		name = ++p;
 
 		for (; isalnum(*p) || *p == '-' || *p == '_'; p++)
-			notnum |= !isdigit(*p);
+			;
 
 		if (*p == '(') {
 			int recursion = 0;
@@ -123,7 +114,6 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
 					break;
 			}
 
-			notnum = 1;
 			p++;
 		}
 
@@ -131,18 +121,9 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
 		if (!len)
 			goto err_free_param;
 
-		if (notnum) {
-			param->attrs[i].alg.attr.rta_len =
-				sizeof(param->attrs[i].alg);
-			param->attrs[i].alg.attr.rta_type = CRYPTOA_ALG;
-			memcpy(param->attrs[i].alg.data.name, name, len);
-		} else {
-			param->attrs[i].nu32.attr.rta_len =
-				sizeof(param->attrs[i].nu32);
-			param->attrs[i].nu32.attr.rta_type = CRYPTOA_U32;
-			param->attrs[i].nu32.data.num =
-				simple_strtol(name, NULL, 0);
-		}
+		param->attrs[i].attr.rta_len = sizeof(param->attrs[i]);
+		param->attrs[i].attr.rta_type = CRYPTOA_ALG;
+		memcpy(param->attrs[i].data.name, name, len);
 
 		param->tb[i + 1] = &param->attrs[i].attr;
 		i++;
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 86f0748009af3..41d42e649da4f 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -118,7 +118,6 @@ void *crypto_spawn_tfm2(struct crypto_spawn *spawn);
 struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb);
 int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret);
 const char *crypto_attr_alg_name(struct rtattr *rta);
-int crypto_attr_u32(struct rtattr *rta, u32 *num);
 int crypto_inst_setname(struct crypto_instance *inst, const char *name,
 			struct crypto_alg *alg);
 
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index da5e0d74bb2f4..3b9263d6122fd 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -647,7 +647,6 @@ enum {
 	CRYPTOA_UNSPEC,
 	CRYPTOA_ALG,
 	CRYPTOA_TYPE,
-	CRYPTOA_U32,
 	__CRYPTOA_MAX,
 };
 
@@ -665,10 +664,6 @@ struct crypto_attr_type {
 	u32 mask;
 };
 
-struct crypto_attr_u32 {
-	u32 num;
-};
-
 /* 
  * Transform user interface.
  */
-- 
GitLab


From 72b010dc33b9598883bc84d40b0a9d07c16f5e39 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Jun 2021 17:06:43 +0800
Subject: [PATCH 2993/3804] crypto: hisilicon/qm - supports writing QoS int the
 host

Based on the Token bucket algorithm. The HAC driver supports to configure
each function's QoS in the host. The driver supports writing QoS by the
debugfs node that named "alg_qos". The qos value is 1~1000.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 298 +++++++++++++++++++++++++++++++++-
 drivers/crypto/hisilicon/qm.h |  14 ++
 2 files changed, 310 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index efa14c9ee9763..2dd450fdc01a4 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -240,6 +240,32 @@
 #define QM_DRIVER_REMOVING		0
 #define QM_RST_SCHED			1
 #define QM_RESETTING			2
+#define QM_QOS_PARAM_NUM		2
+#define QM_QOS_VAL_NUM			1
+#define QM_QOS_BDF_PARAM_NUM		4
+#define QM_QOS_MAX_VAL			1000
+#define QM_QOS_RATE			100
+#define QM_QOS_EXPAND_RATE		1000
+#define QM_SHAPER_CIR_B_MASK		GENMASK(7, 0)
+#define QM_SHAPER_CIR_U_MASK		GENMASK(10, 8)
+#define QM_SHAPER_CIR_S_MASK		GENMASK(14, 11)
+#define QM_SHAPER_FACTOR_CIR_U_SHIFT	8
+#define QM_SHAPER_FACTOR_CIR_S_SHIFT	11
+#define QM_SHAPER_FACTOR_CBS_B_SHIFT	15
+#define QM_SHAPER_FACTOR_CBS_S_SHIFT	19
+#define QM_SHAPER_CBS_B			1
+#define QM_SHAPER_CBS_S			16
+#define QM_SHAPER_VFT_OFFSET		6
+#define QM_QOS_MIN_ERROR_RATE		5
+#define QM_QOS_TYPICAL_NUM		8
+#define QM_SHAPER_MIN_CBS_S		8
+#define QM_QOS_TICK			0x300U
+#define QM_QOS_DIVISOR_CLK		0x1f40U
+#define QM_QOS_MAX_CIR_B		200
+#define QM_QOS_MIN_CIR_B		100
+#define QM_QOS_MAX_CIR_U		6
+#define QM_QOS_MAX_CIR_S		11
+#define QM_QOS_VAL_MAX_LEN		32
 
 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
 	(((hop_num) << QM_CQ_HOP_NUM_SHIFT)	| \
@@ -280,6 +306,7 @@
 enum vft_type {
 	SQC_VFT = 0,
 	CQC_VFT,
+	SHAPER_VFT,
 };
 
 enum acc_err_result {
@@ -288,6 +315,11 @@ enum acc_err_result {
 	ACC_ERR_RECOVERED,
 };
 
+enum qm_alg_type {
+	ALG_TYPE_0,
+	ALG_TYPE_1,
+};
+
 enum qm_mb_cmd {
 	QM_PF_FLR_PREPARE = 0x01,
 	QM_PF_SRST_PREPARE,
@@ -460,6 +492,11 @@ static const char * const qp_s[] = {
 	"none", "init", "start", "stop", "close",
 };
 
+static const u32 typical_qos_val[QM_QOS_TYPICAL_NUM] = {100, 250, 500, 1000,
+						10000, 25000, 50000, 100000};
+static const u32 typical_qos_cbs_s[QM_QOS_TYPICAL_NUM] = {9, 10, 11, 12, 16,
+							 17, 18, 19};
+
 static bool qm_avail_state(struct hisi_qm *qm, enum qm_state new)
 {
 	enum qm_state curr = atomic_read(&qm->status.flags);
@@ -899,8 +936,69 @@ static void qm_init_prefetch(struct hisi_qm *qm)
 	writel(page_type, qm->io_base + QM_PAGE_SIZE);
 }
 
+/*
+ * the formula:
+ * IR = X Mbps if ir = 1 means IR = 100 Mbps, if ir = 10000 means = 10Gbps
+ *
+ *		        IR_b * (2 ^ IR_u) * 8
+ * IR(Mbps) * 10 ^ -3 = -------------------------
+ *		        Tick * (2 ^ IR_s)
+ */
+static u32 acc_shaper_para_calc(u64 cir_b, u64 cir_u, u64 cir_s)
+{
+	return ((cir_b * QM_QOS_DIVISOR_CLK) * (1 << cir_u)) /
+					(QM_QOS_TICK * (1 << cir_s));
+}
+
+static u32 acc_shaper_calc_cbs_s(u32 ir)
+{
+	int i;
+
+	if (ir < typical_qos_val[0])
+		return QM_SHAPER_MIN_CBS_S;
+
+	for (i = 1; i < QM_QOS_TYPICAL_NUM; i++) {
+		if (ir >= typical_qos_val[i - 1] && ir < typical_qos_val[i])
+			return typical_qos_cbs_s[i - 1];
+	}
+
+	return typical_qos_cbs_s[QM_QOS_TYPICAL_NUM - 1];
+}
+
+static int qm_get_shaper_para(u32 ir, struct qm_shaper_factor *factor)
+{
+	u32 cir_b, cir_u, cir_s, ir_calc;
+	u32 error_rate;
+
+	factor->cbs_s = acc_shaper_calc_cbs_s(ir);
+
+	for (cir_b = QM_QOS_MIN_CIR_B; cir_b <= QM_QOS_MAX_CIR_B; cir_b++) {
+		for (cir_u = 0; cir_u <= QM_QOS_MAX_CIR_U; cir_u++) {
+			for (cir_s = 0; cir_s <= QM_QOS_MAX_CIR_S; cir_s++) {
+				/** the formula is changed to:
+				 *	   IR_b * (2 ^ IR_u) * DIVISOR_CLK
+				 * IR(Mbps) = -------------------------
+				 *	       768 * (2 ^ IR_s)
+				 */
+				ir_calc = acc_shaper_para_calc(cir_b, cir_u,
+							       cir_s);
+				error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir;
+				if (error_rate <= QM_QOS_MIN_ERROR_RATE) {
+					factor->cir_b = cir_b;
+					factor->cir_u = cir_u;
+					factor->cir_s = cir_s;
+
+					return 0;
+				}
+			}
+		}
+	}
+
+	return -EINVAL;
+}
+
 static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
-			    u32 number)
+			    u32 number, struct qm_shaper_factor *factor)
 {
 	u64 tmp = 0;
 
@@ -929,6 +1027,15 @@ static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
 				tmp = QM_CQC_VFT_VALID;
 			}
 			break;
+		case SHAPER_VFT:
+			if (qm->ver >= QM_HW_V3) {
+				tmp = factor->cir_b |
+				(factor->cir_u << QM_SHAPER_FACTOR_CIR_U_SHIFT) |
+				(factor->cir_s << QM_SHAPER_FACTOR_CIR_S_SHIFT) |
+				(QM_SHAPER_CBS_B << QM_SHAPER_FACTOR_CBS_B_SHIFT) |
+				(factor->cbs_s << QM_SHAPER_FACTOR_CBS_S_SHIFT);
+			}
+			break;
 		}
 	}
 
@@ -939,6 +1046,7 @@ static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
 static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
 			     u32 fun_num, u32 base, u32 number)
 {
+	struct qm_shaper_factor *factor = &qm->factor[fun_num];
 	unsigned int val;
 	int ret;
 
@@ -950,9 +1058,12 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
 
 	writel(0x0, qm->io_base + QM_VFT_CFG_OP_WR);
 	writel(type, qm->io_base + QM_VFT_CFG_TYPE);
+	if (type == SHAPER_VFT)
+		fun_num |= base << QM_SHAPER_VFT_OFFSET;
+
 	writel(fun_num, qm->io_base + QM_VFT_CFG);
 
-	qm_vft_data_cfg(qm, type, base, number);
+	qm_vft_data_cfg(qm, type, base, number, factor);
 
 	writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
 	writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
@@ -962,6 +1073,27 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
 					  POLL_TIMEOUT);
 }
 
+static int qm_shaper_init_vft(struct hisi_qm *qm, u32 fun_num)
+{
+	int ret, i;
+
+	qm->factor[fun_num].func_qos = QM_QOS_MAX_VAL;
+	ret = qm_get_shaper_para(QM_QOS_MAX_VAL * QM_QOS_RATE, &qm->factor[fun_num]);
+	if (ret) {
+		dev_err(&qm->pdev->dev, "failed to calculate shaper parameter!\n");
+		return ret;
+	}
+	writel(qm->type_rate, qm->io_base + QM_SHAPER_CFG);
+	for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) {
+		/* The base number of queue reuse for different alg type */
+		ret = qm_set_vft_common(qm, SHAPER_VFT, fun_num, i, 1);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 /* The config should be conducted after qm_dev_mem_reset() */
 static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
 			      u32 number)
@@ -974,7 +1106,21 @@ static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
 			return ret;
 	}
 
+	/* init default shaper qos val */
+	if (qm->ver >= QM_HW_V3) {
+		ret = qm_shaper_init_vft(qm, fun_num);
+		if (ret)
+			goto back_sqc_cqc;
+	}
+
 	return 0;
+back_sqc_cqc:
+	for (i = SQC_VFT; i <= CQC_VFT; i++) {
+		ret = qm_set_vft_common(qm, i, fun_num, 0, 0);
+		if (ret)
+			return ret;
+	}
+	return ret;
 }
 
 static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
@@ -3113,6 +3259,7 @@ void hisi_qm_uninit(struct hisi_qm *qm)
 	struct device *dev = &pdev->dev;
 
 	qm_cmd_uninit(qm);
+	kfree(qm->factor);
 	down_write(&qm->qps_lock);
 
 	if (!qm_avail_state(qm, QM_CLOSE)) {
@@ -3842,6 +3989,149 @@ static int qm_clear_vft_config(struct hisi_qm *qm)
 	return 0;
 }
 
+static int qm_func_shaper_enable(struct hisi_qm *qm, u32 fun_index, u32 qos)
+{
+	struct device *dev = &qm->pdev->dev;
+	u32 ir = qos * QM_QOS_RATE;
+	int ret, total_vfs, i;
+
+	total_vfs = pci_sriov_get_totalvfs(qm->pdev);
+	if (fun_index > total_vfs)
+		return -EINVAL;
+
+	qm->factor[fun_index].func_qos = qos;
+
+	ret = qm_get_shaper_para(ir, &qm->factor[fun_index]);
+	if (ret) {
+		dev_err(dev, "failed to calculate shaper parameter!\n");
+		return -EINVAL;
+	}
+
+	for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) {
+		/* The base number of queue reuse for different alg type */
+		ret = qm_set_vft_common(qm, SHAPER_VFT, fun_index, i, 1);
+		if (ret) {
+			dev_err(dev, "type: %d, failed to set shaper vft!\n", i);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t qm_qos_value_init(const char *buf, unsigned long *val)
+{
+	int buflen = strlen(buf);
+	int ret, i;
+
+	for (i = 0; i < buflen; i++) {
+		if (!isdigit(buf[i]))
+			return -EINVAL;
+	}
+
+	ret = sscanf(buf, "%ld", val);
+	if (ret != QM_QOS_VAL_NUM)
+		return -EINVAL;
+
+	return 0;
+}
+
+static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
+			       size_t count, loff_t *pos)
+{
+	struct hisi_qm *qm = filp->private_data;
+	char tbuf[QM_DBG_READ_LEN];
+	int tmp1, bus, device, function;
+	char tbuf_bdf[QM_DBG_READ_LEN] = {0};
+	char val_buf[QM_QOS_VAL_MAX_LEN] = {0};
+	unsigned int fun_index;
+	unsigned long val = 0;
+	int len, ret;
+
+	if (qm->fun_type == QM_HW_VF)
+		return -EINVAL;
+
+	/* Mailbox and reset cannot be operated at the same time */
+	if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
+		pci_err(qm->pdev, "dev resetting, write alg qos failed!\n");
+		return -EAGAIN;
+	}
+
+	if (*pos != 0) {
+		ret = 0;
+		goto err_get_status;
+	}
+
+	if (count >= QM_DBG_READ_LEN) {
+		ret = -ENOSPC;
+		goto err_get_status;
+	}
+
+	len = simple_write_to_buffer(tbuf, QM_DBG_READ_LEN - 1, pos, buf, count);
+	if (len < 0) {
+		ret = len;
+		goto err_get_status;
+	}
+
+	tbuf[len] = '\0';
+	ret = sscanf(tbuf, "%s %s", tbuf_bdf, val_buf);
+	if (ret != QM_QOS_PARAM_NUM) {
+		ret = -EINVAL;
+		goto err_get_status;
+	}
+
+	ret = qm_qos_value_init(val_buf, &val);
+	if (val == 0 || val > QM_QOS_MAX_VAL || ret) {
+		pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n");
+		ret = -EINVAL;
+		goto err_get_status;
+	}
+
+	ret = sscanf(tbuf_bdf, "%d:%x:%d.%d", &tmp1, &bus, &device, &function);
+	if (ret != QM_QOS_BDF_PARAM_NUM) {
+		pci_err(qm->pdev, "input pci bdf value is error!\n");
+		ret = -EINVAL;
+		goto err_get_status;
+	}
+
+	fun_index = device * 8 + function;
+
+	ret = qm_func_shaper_enable(qm, fun_index, val);
+	if (ret) {
+		pci_err(qm->pdev, "failed to enable function shaper!\n");
+		ret = -EINVAL;
+		goto err_get_status;
+	}
+
+	ret =  count;
+
+err_get_status:
+	clear_bit(QM_RESETTING, &qm->misc_ctl);
+	return ret;
+}
+
+static const struct file_operations qm_algqos_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.write = qm_algqos_write,
+};
+
+/**
+ * hisi_qm_set_algqos_init() - Initialize function qos debugfs files.
+ * @qm: The qm for which we want to add debugfs files.
+ *
+ * Create function qos debugfs files.
+ */
+static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
+{
+	if (qm->fun_type == QM_HW_PF)
+		debugfs_create_file("alg_qos", 0644, qm->debug.debug_root,
+				    qm, &qm_algqos_fops);
+	else
+		debugfs_create_file("alg_qos", 0444, qm->debug.debug_root,
+				    qm, &qm_algqos_fops);
+}
+
 /**
  * hisi_qm_sriov_enable() - enable virtual functions
  * @pdev: the PCIe device
@@ -3896,6 +4186,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable);
 int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
 {
 	struct hisi_qm *qm = pci_get_drvdata(pdev);
+	int total_vfs = pci_sriov_get_totalvfs(qm->pdev);
 
 	if (pci_vfs_assigned(pdev)) {
 		pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n");
@@ -3909,6 +4200,9 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
 	}
 
 	pci_disable_sriov(pdev);
+	/* clear vf function shaper configure array */
+	memset(qm->factor + 1, 0, sizeof(struct qm_shaper_factor) * total_vfs);
+
 	return qm_clear_vft_config(qm);
 }
 EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable);
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index 8a36bade103d8..035eaf8c442dd 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -76,6 +76,9 @@
 #define QM_Q_DEPTH			1024
 #define QM_MIN_QNUM                     2
 #define HISI_ACC_SGL_SGE_NR_MAX		255
+#define QM_SHAPER_CFG			0x100164
+#define QM_SHAPER_ENABLE		BIT(30)
+#define QM_SHAPER_TYPE1_OFFSET		10
 
 /* page number for queue file region */
 #define QM_DOORBELL_PAGE_NR		1
@@ -148,6 +151,14 @@ struct qm_debug {
 	struct debugfs_file files[DEBUG_FILE_NUM];
 };
 
+struct qm_shaper_factor {
+	u32 func_qos;
+	u64 cir_b;
+	u64 cir_u;
+	u64 cir_s;
+	u64 cbs_s;
+};
+
 struct qm_dma {
 	void *va;
 	dma_addr_t dma;
@@ -262,6 +273,9 @@ struct hisi_qm {
 	resource_size_t db_phys_base;
 	struct uacce_device *uacce;
 	int mode;
+	struct qm_shaper_factor *factor;
+	u32 mb_qos;
+	u32 type_rate;
 };
 
 struct hisi_qp_status {
-- 
GitLab


From cc0c40c613d2c7a00f3bce4770a925dc56672f01 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Jun 2021 17:06:44 +0800
Subject: [PATCH 2994/3804] crypto: hisilicon/qm - add the "alg_qos" file node

1. Just move the code as needed.
2. Add the "alg_qos" file node in the qm debug sysfs.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 284 +++++++++++++++++++---------------
 1 file changed, 155 insertions(+), 129 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 2dd450fdc01a4..4350b67968217 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -3125,62 +3125,6 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
 	return 0;
 }
 
-static int hisi_qm_memory_init(struct hisi_qm *qm)
-{
-	struct device *dev = &qm->pdev->dev;
-	size_t qp_dma_size, off = 0;
-	int i, ret = 0;
-
-#define QM_INIT_BUF(qm, type, num) do { \
-	(qm)->type = ((qm)->qdma.va + (off)); \
-	(qm)->type##_dma = (qm)->qdma.dma + (off); \
-	off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \
-} while (0)
-
-	idr_init(&qm->qp_idr);
-	qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_EQ_DEPTH) +
-			QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) +
-			QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) +
-			QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num);
-	qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size, &qm->qdma.dma,
-					 GFP_ATOMIC);
-	dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size);
-	if (!qm->qdma.va)
-		return -ENOMEM;
-
-	QM_INIT_BUF(qm, eqe, QM_EQ_DEPTH);
-	QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH);
-	QM_INIT_BUF(qm, sqc, qm->qp_num);
-	QM_INIT_BUF(qm, cqc, qm->qp_num);
-
-	qm->qp_array = kcalloc(qm->qp_num, sizeof(struct hisi_qp), GFP_KERNEL);
-	if (!qm->qp_array) {
-		ret = -ENOMEM;
-		goto err_alloc_qp_array;
-	}
-
-	/* one more page for device or qp statuses */
-	qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
-		      sizeof(struct qm_cqe) * QM_Q_DEPTH;
-	qp_dma_size = PAGE_ALIGN(qp_dma_size);
-	for (i = 0; i < qm->qp_num; i++) {
-		ret = hisi_qp_memory_init(qm, qp_dma_size, i);
-		if (ret)
-			goto err_init_qp_mem;
-
-		dev_dbg(dev, "allocate qp dma buf size=%zx)\n", qp_dma_size);
-	}
-
-	return ret;
-
-err_init_qp_mem:
-	hisi_qp_memory_uninit(qm, i);
-err_alloc_qp_array:
-	dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma);
-
-	return ret;
-}
-
 static void hisi_qm_pre_init(struct hisi_qm *qm)
 {
 	struct pci_dev *pdev = qm->pdev;
@@ -3661,79 +3605,6 @@ static int qm_debugfs_atomic64_get(void *data, u64 *val)
 DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
 			 qm_debugfs_atomic64_set, "%llu\n");
 
-/**
- * hisi_qm_debug_init() - Initialize qm related debugfs files.
- * @qm: The qm for which we want to add debugfs files.
- *
- * Create qm related debugfs files.
- */
-void hisi_qm_debug_init(struct hisi_qm *qm)
-{
-	struct qm_dfx *dfx = &qm->debug.dfx;
-	struct dentry *qm_d;
-	void *data;
-	int i;
-
-	qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
-	qm->debug.qm_d = qm_d;
-
-	/* only show this in PF */
-	if (qm->fun_type == QM_HW_PF) {
-		qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
-		for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
-			qm_create_debugfs_file(qm, qm_d, i);
-	}
-
-	debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
-
-	debugfs_create_file("cmd", 0444, qm->debug.qm_d, qm, &qm_cmd_fops);
-
-	debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
-			&qm_status_fops);
-	for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
-		data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
-		debugfs_create_file(qm_dfx_files[i].name,
-			0644,
-			qm_d,
-			data,
-			&qm_atomic64_ops);
-	}
-}
-EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
-
-/**
- * hisi_qm_debug_regs_clear() - clear qm debug related registers.
- * @qm: The qm for which we want to clear its debug registers.
- */
-void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
-{
-	struct qm_dfx_registers *regs;
-	int i;
-
-	/* clear current_qm */
-	writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
-	writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
-
-	/* clear current_q */
-	writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
-	writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
-
-	/*
-	 * these registers are reading and clearing, so clear them after
-	 * reading them.
-	 */
-	writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE);
-
-	regs = qm_dfx_regs;
-	for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
-		readl(qm->io_base + regs->reg_offset);
-		regs++;
-	}
-
-	writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
-
 static void qm_hw_error_init(struct hisi_qm *qm)
 {
 	struct hisi_qm_err_info *err_info = &qm->err_info;
@@ -4132,6 +4003,83 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
 				    qm, &qm_algqos_fops);
 }
 
+/**
+ * hisi_qm_debug_init() - Initialize qm related debugfs files.
+ * @qm: The qm for which we want to add debugfs files.
+ *
+ * Create qm related debugfs files.
+ */
+void hisi_qm_debug_init(struct hisi_qm *qm)
+{
+	struct qm_dfx *dfx = &qm->debug.dfx;
+	struct dentry *qm_d;
+	void *data;
+	int i;
+
+	qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
+	qm->debug.qm_d = qm_d;
+
+	/* only show this in PF */
+	if (qm->fun_type == QM_HW_PF) {
+		qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
+		for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
+			qm_create_debugfs_file(qm, qm->debug.qm_d, i);
+	}
+
+	debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
+
+	debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
+
+	debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
+			&qm_status_fops);
+	for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
+		data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
+		debugfs_create_file(qm_dfx_files[i].name,
+			0644,
+			qm_d,
+			data,
+			&qm_atomic64_ops);
+	}
+
+	if (qm->ver >= QM_HW_V3)
+		hisi_qm_set_algqos_init(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
+
+/**
+ * hisi_qm_debug_regs_clear() - clear qm debug related registers.
+ * @qm: The qm for which we want to clear its debug registers.
+ */
+void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
+{
+	struct qm_dfx_registers *regs;
+	int i;
+
+	/* clear current_qm */
+	writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
+	writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
+
+	/* clear current_q */
+	writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+	writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+	/*
+	 * these registers are reading and clearing, so clear them after
+	 * reading them.
+	 */
+	writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE);
+
+	regs = qm_dfx_regs;
+	for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
+		readl(qm->io_base + regs->reg_offset);
+		regs++;
+	}
+
+	/* clear clear_enable */
+	writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
+
 /**
  * hisi_qm_sriov_enable() - enable virtual functions
  * @pdev: the PCIe device
@@ -5369,6 +5317,84 @@ err_disable_pcidev:
 	return ret;
 }
 
+static int hisi_qp_alloc_memory(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	size_t qp_dma_size;
+	int i, ret;
+
+	qm->qp_array = kcalloc(qm->qp_num, sizeof(struct hisi_qp), GFP_KERNEL);
+	if (!qm->qp_array)
+		return -ENOMEM;
+
+	/* one more page for device or qp statuses */
+	qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
+		      sizeof(struct qm_cqe) * QM_Q_DEPTH;
+	qp_dma_size = PAGE_ALIGN(qp_dma_size) + PAGE_SIZE;
+	for (i = 0; i < qm->qp_num; i++) {
+		ret = hisi_qp_memory_init(qm, qp_dma_size, i);
+		if (ret)
+			goto err_init_qp_mem;
+
+		dev_dbg(dev, "allocate qp dma buf size=%zx)\n", qp_dma_size);
+	}
+
+	return 0;
+err_init_qp_mem:
+	hisi_qp_memory_uninit(qm, i);
+
+	return ret;
+}
+
+static int hisi_qm_memory_init(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	int ret, total_vfs;
+	size_t off = 0;
+
+	total_vfs = pci_sriov_get_totalvfs(qm->pdev);
+	qm->factor = kcalloc(total_vfs + 1, sizeof(struct qm_shaper_factor), GFP_KERNEL);
+	if (!qm->factor)
+		return -ENOMEM;
+
+#define QM_INIT_BUF(qm, type, num) do { \
+	(qm)->type = ((qm)->qdma.va + (off)); \
+	(qm)->type##_dma = (qm)->qdma.dma + (off); \
+	off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \
+} while (0)
+
+	idr_init(&qm->qp_idr);
+	qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_EQ_DEPTH) +
+			QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) +
+			QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) +
+			QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num);
+	qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size, &qm->qdma.dma,
+					 GFP_ATOMIC);
+	dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size);
+	if (!qm->qdma.va) {
+		ret =  -ENOMEM;
+		goto err_alloc_qdma;
+	}
+
+	QM_INIT_BUF(qm, eqe, QM_EQ_DEPTH);
+	QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH);
+	QM_INIT_BUF(qm, sqc, qm->qp_num);
+	QM_INIT_BUF(qm, cqc, qm->qp_num);
+
+	ret = hisi_qp_alloc_memory(qm);
+	if (ret)
+		goto err_alloc_qp_array;
+
+	return 0;
+
+err_alloc_qp_array:
+	dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma);
+err_alloc_qdma:
+	kfree(qm->factor);
+
+	return ret;
+}
+
 /**
  * hisi_qm_init() - Initialize configures about qm.
  * @qm: The qm needing init.
-- 
GitLab


From 362c50bad3a792969f8142372a0813aadee89a61 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Jun 2021 17:06:45 +0800
Subject: [PATCH 2995/3804] crypto: hisilicon/qm - merges the work
 initialization process into a single function

Merges the work initialization process into a single function from
qm initialization.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 4350b67968217..754ddb5dec219 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -5317,6 +5317,16 @@ err_disable_pcidev:
 	return ret;
 }
 
+static void hisi_qm_init_work(struct hisi_qm *qm)
+{
+	INIT_WORK(&qm->work, qm_work_process);
+	if (qm->fun_type == QM_HW_PF)
+		INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
+
+	if (qm->ver > QM_HW_V2)
+		INIT_WORK(&qm->cmd_process, qm_cmd_process);
+}
+
 static int hisi_qp_alloc_memory(struct hisi_qm *qm)
 {
 	struct device *dev = &qm->pdev->dev;
@@ -5432,13 +5442,7 @@ int hisi_qm_init(struct hisi_qm *qm)
 	if (ret)
 		goto err_alloc_uacce;
 
-	INIT_WORK(&qm->work, qm_work_process);
-	if (qm->fun_type == QM_HW_PF)
-		INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
-
-	if (qm->ver >= QM_HW_V3)
-		INIT_WORK(&qm->cmd_process, qm_cmd_process);
-
+	hisi_qm_init_work(qm);
 	qm_cmd_init(qm);
 	atomic_set(&qm->status.flags, QM_INIT);
 
-- 
GitLab


From 2966d9d3078c623f48054ef1bfe9a975e5d1fe0c Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Jun 2021 17:06:46 +0800
Subject: [PATCH 2996/3804] crypto: hisilicon/qm - add pf ping single vf
 function

According to the function communication, add pf ping single
vf function to be used in the vf read QoS.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 36 +++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 754ddb5dec219..735c8b07b1e9e 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -2142,6 +2142,42 @@ static void qm_trigger_pf_interrupt(struct hisi_qm *qm)
 	writel(val, qm->io_base + QM_IFC_INT_SET_V);
 }
 
+static int qm_ping_single_vf(struct hisi_qm *qm, u64 cmd, u32 fun_num)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct qm_mailbox mailbox;
+	int cnt = 0;
+	u64 val;
+	int ret;
+
+	qm_mb_pre_init(&mailbox, QM_MB_CMD_SRC, cmd, fun_num, 0);
+	mutex_lock(&qm->mailbox_lock);
+	ret = qm_mb_nolock(qm, &mailbox);
+	if (ret) {
+		dev_err(dev, "failed to send command to vf(%u)!\n", fun_num);
+		goto err_unlock;
+	}
+
+	qm_trigger_vf_interrupt(qm, fun_num);
+	while (true) {
+		msleep(QM_WAIT_DST_ACK);
+		val = readq(qm->io_base + QM_IFC_READY_STATUS);
+		/* if VF respond, PF notifies VF successfully. */
+		if (!(val & BIT(fun_num)))
+			goto err_unlock;
+
+		if (++cnt > QM_MAX_PF_WAIT_COUNT) {
+			dev_err(dev, "failed to get response from VF(%u)!\n", fun_num);
+			ret = -ETIMEDOUT;
+			break;
+		}
+	}
+
+err_unlock:
+	mutex_unlock(&qm->mailbox_lock);
+	return ret;
+}
+
 static int qm_ping_all_vfs(struct hisi_qm *qm, u64 cmd)
 {
 	struct device *dev = &qm->pdev->dev;
-- 
GitLab


From 3bbf0783636be8fd672907df25904288f14566f2 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Jun 2021 17:06:47 +0800
Subject: [PATCH 2997/3804] crypto: hisilicon/qm - supports to inquiry each
 function's QoS

1. The ACC driver supports to inquiry each function's QoS in the Host
and VM. The driver supports reading QoS by the device debug SysFS
attribute file "alg_qos", like "cat alg_qos".
2. Modify the communication process between pf and vf as needed.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 181 ++++++++++++++++++++++++++++++++--
 1 file changed, 174 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 735c8b07b1e9e..580709408cfc7 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -51,6 +51,7 @@
 #define QM_MB_CMD_DATA_ADDR_L		0x304
 #define QM_MB_CMD_DATA_ADDR_H		0x308
 #define QM_MB_PING_ALL_VFS		0xffff
+#define QM_MB_CMD_DATA_SHIFT		32
 #define QM_MB_CMD_DATA_MASK		GENMASK(31, 0)
 
 /* sqc shift */
@@ -185,6 +186,7 @@
 
 /* interfunction communication */
 #define QM_IFC_READY_STATUS		0x100128
+#define QM_IFC_C_STS_M			0x10012C
 #define QM_IFC_INT_SET_P		0x100130
 #define QM_IFC_INT_CFG			0x100134
 #define QM_IFC_INT_SOURCE_P		0x100138
@@ -256,6 +258,7 @@
 #define QM_SHAPER_CBS_B			1
 #define QM_SHAPER_CBS_S			16
 #define QM_SHAPER_VFT_OFFSET		6
+#define WAIT_FOR_QOS_VF			100
 #define QM_QOS_MIN_ERROR_RATE		5
 #define QM_QOS_TYPICAL_NUM		8
 #define QM_SHAPER_MIN_CBS_S		8
@@ -328,6 +331,8 @@ enum qm_mb_cmd {
 	QM_VF_PREPARE_FAIL,
 	QM_VF_START_DONE,
 	QM_VF_START_FAIL,
+	QM_PF_SET_QOS,
+	QM_VF_GET_QOS,
 };
 
 struct qm_cqe {
@@ -2124,7 +2129,7 @@ static void qm_trigger_vf_interrupt(struct hisi_qm *qm, u32 fun_num)
 	u32 val;
 
 	val = readl(qm->io_base + QM_IFC_INT_CFG);
-	val |= ~QM_IFC_SEND_ALL_VFS;
+	val &= ~QM_IFC_SEND_ALL_VFS;
 	val |= fun_num;
 	writel(val, qm->io_base + QM_IFC_INT_CFG);
 
@@ -3926,6 +3931,139 @@ static int qm_func_shaper_enable(struct hisi_qm *qm, u32 fun_index, u32 qos)
 	return 0;
 }
 
+static u32 qm_get_shaper_vft_qos(struct hisi_qm *qm, u32 fun_index)
+{
+	u64 cir_u = 0, cir_b = 0, cir_s = 0;
+	u64 shaper_vft, ir_calc, ir;
+	unsigned int val;
+	u32 error_rate;
+	int ret;
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+					 val & BIT(0), POLL_PERIOD,
+					 POLL_TIMEOUT);
+	if (ret)
+		return 0;
+
+	writel(0x1, qm->io_base + QM_VFT_CFG_OP_WR);
+	writel(SHAPER_VFT, qm->io_base + QM_VFT_CFG_TYPE);
+	writel(fun_index, qm->io_base + QM_VFT_CFG);
+
+	writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
+	writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+					 val & BIT(0), POLL_PERIOD,
+					 POLL_TIMEOUT);
+	if (ret)
+		return 0;
+
+	shaper_vft = readl(qm->io_base + QM_VFT_CFG_DATA_L) |
+		  ((u64)readl(qm->io_base + QM_VFT_CFG_DATA_H) << 32);
+
+	cir_b = shaper_vft & QM_SHAPER_CIR_B_MASK;
+	cir_u = shaper_vft & QM_SHAPER_CIR_U_MASK;
+	cir_u = cir_u >> QM_SHAPER_FACTOR_CIR_U_SHIFT;
+
+	cir_s = shaper_vft & QM_SHAPER_CIR_S_MASK;
+	cir_s = cir_s >> QM_SHAPER_FACTOR_CIR_S_SHIFT;
+
+	ir_calc = acc_shaper_para_calc(cir_b, cir_u, cir_s);
+
+	ir = qm->factor[fun_index].func_qos * QM_QOS_RATE;
+
+	error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir;
+	if (error_rate > QM_QOS_MIN_ERROR_RATE) {
+		pci_err(qm->pdev, "error_rate: %u, get function qos is error!\n", error_rate);
+		return 0;
+	}
+
+	return ir;
+}
+
+static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num)
+{
+	struct device *dev = &qm->pdev->dev;
+	u64 mb_cmd;
+	u32 qos;
+	int ret;
+
+	qos = qm_get_shaper_vft_qos(qm, fun_num);
+	if (!qos) {
+		dev_err(dev, "function(%u) failed to get qos by PF!\n", fun_num);
+		return;
+	}
+
+	mb_cmd = QM_PF_SET_QOS | (u64)qos << QM_MB_CMD_DATA_SHIFT;
+	ret = qm_ping_single_vf(qm, mb_cmd, fun_num);
+	if (ret)
+		dev_err(dev, "failed to send cmd to VF(%u)!\n", fun_num);
+}
+
+static int qm_vf_read_qos(struct hisi_qm *qm)
+{
+	int cnt = 0;
+	int ret;
+
+	/* reset mailbox qos val */
+	qm->mb_qos = 0;
+
+	/* vf ping pf to get function qos */
+	if (qm->ops->ping_pf) {
+		ret = qm->ops->ping_pf(qm, QM_VF_GET_QOS);
+		if (ret) {
+			pci_err(qm->pdev, "failed to send cmd to PF to get qos!\n");
+			return ret;
+		}
+	}
+
+	while (true) {
+		msleep(QM_WAIT_DST_ACK);
+		if (qm->mb_qos)
+			break;
+
+		if (++cnt > QM_MAX_VF_WAIT_COUNT) {
+			pci_err(qm->pdev, "PF ping VF timeout!\n");
+			return  -ETIMEDOUT;
+		}
+	}
+
+	return ret;
+}
+
+static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
+			       size_t count, loff_t *pos)
+{
+	struct hisi_qm *qm = filp->private_data;
+	char tbuf[QM_DBG_READ_LEN];
+	u32 qos_val, ir;
+	int ret;
+
+	/* Mailbox and reset cannot be operated at the same time */
+	if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
+		pci_err(qm->pdev, "dev resetting, read alg qos failed!\n");
+		return  -EAGAIN;
+	}
+
+	if (qm->fun_type == QM_HW_PF) {
+		ir = qm_get_shaper_vft_qos(qm, 0);
+	} else {
+		ret = qm_vf_read_qos(qm);
+		if (ret)
+			goto err_get_status;
+		ir = qm->mb_qos;
+	}
+
+	qos_val = ir / QM_QOS_RATE;
+	ret = scnprintf(tbuf, QM_DBG_READ_LEN, "%u\n", qos_val);
+
+	ret =  simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_get_status:
+	clear_bit(QM_RESETTING, &qm->misc_ctl);
+	return ret;
+}
+
 static ssize_t qm_qos_value_init(const char *buf, unsigned long *val)
 {
 	int buflen = strlen(buf);
@@ -4020,6 +4158,7 @@ err_get_status:
 static const struct file_operations qm_algqos_fops = {
 	.owner = THIS_MODULE,
 	.open = simple_open,
+	.read = qm_algqos_read,
 	.write = qm_algqos_write,
 };
 
@@ -5129,10 +5268,8 @@ err_get_status:
 	qm_reset_bit_clear(qm);
 }
 
-static void qm_cmd_process(struct work_struct *cmd_process)
+static void qm_handle_cmd_msg(struct hisi_qm *qm, u32 fun_num)
 {
-	struct hisi_qm *qm = container_of(cmd_process,
-					struct hisi_qm, cmd_process);
 	struct device *dev = &qm->pdev->dev;
 	u64 msg;
 	u32 cmd;
@@ -5142,8 +5279,8 @@ static void qm_cmd_process(struct work_struct *cmd_process)
 	 * Get the msg from source by sending mailbox. Whether message is got
 	 * successfully, destination needs to ack source by clearing the interrupt.
 	 */
-	ret = qm_get_mb_cmd(qm, &msg, 0);
-	qm_clear_cmd_interrupt(qm, 0);
+	ret = qm_get_mb_cmd(qm, &msg, fun_num);
+	qm_clear_cmd_interrupt(qm, BIT(fun_num));
 	if (ret) {
 		dev_err(dev, "failed to get msg from source!\n");
 		return;
@@ -5157,12 +5294,42 @@ static void qm_cmd_process(struct work_struct *cmd_process)
 	case QM_PF_SRST_PREPARE:
 		qm_pf_reset_vf_process(qm, QM_SOFT_RESET);
 		break;
+	case QM_VF_GET_QOS:
+		qm_vf_get_qos(qm, fun_num);
+		break;
+	case QM_PF_SET_QOS:
+		qm->mb_qos = msg >> QM_MB_CMD_DATA_SHIFT;
+		break;
 	default:
-		dev_err(dev, "unsupported cmd %u sent by PF!\n", cmd);
+		dev_err(dev, "unsupported cmd %u sent by function(%u)!\n", cmd, fun_num);
 		break;
 	}
 }
 
+static void qm_cmd_process(struct work_struct *cmd_process)
+{
+	struct hisi_qm *qm = container_of(cmd_process,
+					struct hisi_qm, cmd_process);
+	u32 vfs_num = qm->vfs_num;
+	u64 val;
+	u32 i;
+
+	if (qm->fun_type == QM_HW_PF) {
+		val = readq(qm->io_base + QM_IFC_INT_SOURCE_P);
+		if (!val)
+			return;
+
+		for (i = 1; i <= vfs_num; i++) {
+			if (val & BIT(i))
+				qm_handle_cmd_msg(qm, i);
+		}
+
+		return;
+	}
+
+	qm_handle_cmd_msg(qm, 0);
+}
+
 /**
  * hisi_qm_alg_register() - Register alg to crypto and add qm to qm_list.
  * @qm: The qm needs add.
-- 
GitLab


From 3d2a429271bb622da48983631625c20de3b5f1e5 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Jun 2021 17:06:48 +0800
Subject: [PATCH 2998/3804] crypto: hisilicon/sec - adds the max shaper type
 rate

The SEC driver support configure each function's QoS in the Host
for Kunpeng930. The SEC driver needs to configure the maximum shaper
type rate.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_main.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 8ab4e67b8a417..d120ce3e34eda 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -98,6 +98,7 @@
 
 #define SEC_SQE_MASK_OFFSET		64
 #define SEC_SQE_MASK_LEN		48
+#define SEC_SHAPER_TYPE_RATE		128
 
 struct sec_hw_error {
 	u32 int_msk;
@@ -874,6 +875,7 @@ static void sec_qm_uninit(struct hisi_qm *qm)
 
 static int sec_probe_init(struct sec_dev *sec)
 {
+	u32 type_rate = SEC_SHAPER_TYPE_RATE;
 	struct hisi_qm *qm = &sec->qm;
 	int ret;
 
@@ -881,6 +883,11 @@ static int sec_probe_init(struct sec_dev *sec)
 		ret = sec_pf_probe_init(sec);
 		if (ret)
 			return ret;
+		/* enable shaper type 0 */
+		if (qm->ver >= QM_HW_V3) {
+			type_rate |= QM_SHAPER_ENABLE;
+			qm->type_rate = type_rate;
+		}
 	}
 
 	return 0;
-- 
GitLab


From c02f5302e46a2505cb0a6170470759a7db929979 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Jun 2021 17:06:49 +0800
Subject: [PATCH 2999/3804] crypto: hisilicon/hpre - adds the max shaper type
 rate

The HPRE driver support configure each function's QoS in the Host
for Kunpeng930. The HPRE driver needs to configure the maximum shaper
type rate.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 37c5296008474..8b0640fb04be6 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -92,6 +92,7 @@
 #define HPRE_QM_PM_FLR			BIT(11)
 #define HPRE_QM_SRIOV_FLR		BIT(12)
 
+#define HPRE_SHAPER_TYPE_RATE		128
 #define HPRE_VIA_MSI_DSM		1
 #define HPRE_SQE_MASK_OFFSET		8
 #define HPRE_SQE_MASK_LEN		24
@@ -947,6 +948,7 @@ static int hpre_pf_probe_init(struct hpre *hpre)
 
 static int hpre_probe_init(struct hpre *hpre)
 {
+	u32 type_rate = HPRE_SHAPER_TYPE_RATE;
 	struct hisi_qm *qm = &hpre->qm;
 	int ret;
 
@@ -954,6 +956,11 @@ static int hpre_probe_init(struct hpre *hpre)
 		ret = hpre_pf_probe_init(hpre);
 		if (ret)
 			return ret;
+		/* Enable shaper type 0 */
+		if (qm->ver >= QM_HW_V3) {
+			type_rate |= QM_SHAPER_ENABLE;
+			qm->type_rate = type_rate;
+		}
 	}
 
 	return 0;
-- 
GitLab


From 38a9eb8182a24c7ef2dbe82ab46cafe8f8e9b271 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Fri, 11 Jun 2021 17:06:50 +0800
Subject: [PATCH 3000/3804] crypto: hisilicon/zip - adds the max shaper type
 rate

The ZIP driver support configure each function's QoS in the Host
for Kunpeng930. The ZIP driver needs to configure the maximum shaper
type rate.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/zip/zip_main.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 9e4c49cd6f3ab..f8482ceebf2ab 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -102,6 +102,8 @@
 #define HZIP_PREFETCH_ENABLE		(~(BIT(26) | BIT(17) | BIT(0)))
 #define HZIP_SVA_PREFETCH_DISABLE	BIT(26)
 #define HZIP_SVA_DISABLE_READY		(BIT(26) | BIT(30))
+#define HZIP_SHAPER_RATE_COMPRESS	252
+#define HZIP_SHAPER_RATE_DECOMPRESS	229
 #define HZIP_DELAY_1_US		1
 #define HZIP_POLL_TIMEOUT_US	1000
 
@@ -823,6 +825,7 @@ static void hisi_zip_qm_uninit(struct hisi_qm *qm)
 
 static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)
 {
+	u32 type_rate = HZIP_SHAPER_RATE_COMPRESS;
 	struct hisi_qm *qm = &hisi_zip->qm;
 	int ret;
 
@@ -830,6 +833,14 @@ static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)
 		ret = hisi_zip_pf_probe_init(hisi_zip);
 		if (ret)
 			return ret;
+		/* enable shaper type 0 */
+		if (qm->ver >= QM_HW_V3) {
+			type_rate |= QM_SHAPER_ENABLE;
+
+			/* ZIP need to enable shaper type 1 */
+			type_rate |= HZIP_SHAPER_RATE_DECOMPRESS << QM_SHAPER_TYPE1_OFFSET;
+			qm->type_rate = type_rate;
+		}
 	}
 
 	return 0;
-- 
GitLab


From d382c5be4cc24597d5d12800558e537bbc12a71a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 13:02:07 +0200
Subject: [PATCH 3001/3804] media: dvb_ca_en50221: avoid speculation from CA
 slot

As warned by smatch:
	drivers/media/dvb-core/dvb_ca_en50221.c:1392 dvb_ca_en50221_io_do_ioctl() warn: potential spectre issue 'ca->slot_info' [r] (local cap)

There's a potential of using a CAM ioctl for speculation.

The risk here is minimum, as only a small subset of DVB
boards have CI, with a CAM module installed. Also, exploiting
it would require a user capable of starting a DVB application.

There are probably a lot of easier ways to try to exploit.

Yet, it doesn't harm addressing it.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/dvb-core/dvb_ca_en50221.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c
index b7e4a33711761..15a08d8c69ef8 100644
--- a/drivers/media/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb-core/dvb_ca_en50221.c
@@ -1386,6 +1386,7 @@ static int dvb_ca_en50221_io_do_ioctl(struct file *file,
 			err = -EINVAL;
 			goto out_unlock;
 		}
+		slot = array_index_nospec(slot, ca->slot_count);
 
 		info->type = CA_CI_LINK;
 		info->flags = 0;
-- 
GitLab


From abc0226df64dc137b48b911c1fe4319aec5891bb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 13:13:54 +0200
Subject: [PATCH 3002/3804] media: dvb_net: avoid speculation from net slot

The risk of especulation is actually almost-non-existing here,
as there are very few users of TCP/IP using the DVB stack,
as, this is mainly used with DVB-S/S2 cards, and only by people
that receives TCP/IP from satellite connections, which limits
a lot the number of users of such feature(*).

(*) In thesis, DVB-C cards could also benefit from it, but I'm
yet to see a hardware that supports it.

Yet, fixing it is trivial.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/dvb-core/dvb_net.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index 89620da983bab..dddebea644bb8 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -45,6 +45,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/nospec.h>
 #include <linux/etherdevice.h>
 #include <linux/dvb/net.h>
 #include <linux/uio.h>
@@ -1462,14 +1463,20 @@ static int dvb_net_do_ioctl(struct file *file,
 		struct net_device *netdev;
 		struct dvb_net_priv *priv_data;
 		struct dvb_net_if *dvbnetif = parg;
+		int if_num = dvbnetif->if_num;
 
-		if (dvbnetif->if_num >= DVB_NET_DEVICES_MAX ||
-		    !dvbnet->state[dvbnetif->if_num]) {
+		if (if_num >= DVB_NET_DEVICES_MAX) {
 			ret = -EINVAL;
 			goto ioctl_error;
 		}
+		if_num = array_index_nospec(if_num, DVB_NET_DEVICES_MAX);
 
-		netdev = dvbnet->device[dvbnetif->if_num];
+		if (!dvbnet->state[if_num]) {
+			ret = -EINVAL;
+			goto ioctl_error;
+		}
+
+		netdev = dvbnet->device[if_num];
 
 		priv_data = netdev_priv(netdev);
 		dvbnetif->pid=priv_data->pid;
@@ -1522,14 +1529,20 @@ static int dvb_net_do_ioctl(struct file *file,
 		struct net_device *netdev;
 		struct dvb_net_priv *priv_data;
 		struct __dvb_net_if_old *dvbnetif = parg;
+		int if_num = dvbnetif->if_num;
+
+		if (if_num >= DVB_NET_DEVICES_MAX) {
+			ret = -EINVAL;
+			goto ioctl_error;
+		}
+		if_num = array_index_nospec(if_num, DVB_NET_DEVICES_MAX);
 
-		if (dvbnetif->if_num >= DVB_NET_DEVICES_MAX ||
-		    !dvbnet->state[dvbnetif->if_num]) {
+		if (!dvbnet->state[if_num]) {
 			ret = -EINVAL;
 			goto ioctl_error;
 		}
 
-		netdev = dvbnet->device[dvbnetif->if_num];
+		netdev = dvbnet->device[if_num];
 
 		priv_data = netdev_priv(netdev);
 		dvbnetif->pid=priv_data->pid;
-- 
GitLab


From 1fec2ecc252301110e4149e6183fa70460d29674 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 9 Jun 2021 14:32:29 +0200
Subject: [PATCH 3003/3804] media: dvbdev: fix error logic at
 dvb_register_device()

As reported by smatch:

	drivers/media/dvb-core/dvbdev.c: drivers/media/dvb-core/dvbdev.c:510 dvb_register_device() warn: '&dvbdev->list_head' not removed from list
	drivers/media/dvb-core/dvbdev.c: drivers/media/dvb-core/dvbdev.c:530 dvb_register_device() warn: '&dvbdev->list_head' not removed from list
	drivers/media/dvb-core/dvbdev.c: drivers/media/dvb-core/dvbdev.c:545 dvb_register_device() warn: '&dvbdev->list_head' not removed from list

The error logic inside dvb_register_device() doesn't remove
devices from the dvb_adapter_list in case of errors.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/dvb-core/dvbdev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c
index 3862ddc86ec48..795d9bfaba5cf 100644
--- a/drivers/media/dvb-core/dvbdev.c
+++ b/drivers/media/dvb-core/dvbdev.c
@@ -506,6 +506,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
 			break;
 
 	if (minor == MAX_DVB_MINORS) {
+		list_del (&dvbdev->list_head);
 		kfree(dvbdevfops);
 		kfree(dvbdev);
 		up_write(&minor_rwsem);
@@ -526,6 +527,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
 		      __func__);
 
 		dvb_media_device_free(dvbdev);
+		list_del (&dvbdev->list_head);
 		kfree(dvbdevfops);
 		kfree(dvbdev);
 		mutex_unlock(&dvbdev_register_lock);
@@ -541,6 +543,7 @@ int dvb_register_device(struct dvb_adapter *adap, struct dvb_device **pdvbdev,
 		pr_err("%s: failed to create device dvb%d.%s%d (%ld)\n",
 		       __func__, adap->num, dnames[type], id, PTR_ERR(clsdev));
 		dvb_media_device_free(dvbdev);
+		list_del (&dvbdev->list_head);
 		kfree(dvbdevfops);
 		kfree(dvbdev);
 		return PTR_ERR(clsdev);
-- 
GitLab


From ba9139116bc053897e6fb16a51c463604c4da371 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 10 Jun 2021 08:20:16 +0200
Subject: [PATCH 3004/3804] media: sun6i-csi: add a missing return code

As pointed by smatch, there's a missing return code:

	drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c:485 sun6i_video_open() warn: missing error code 'ret'

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c b/drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c
index 3181d0781b613..07b2161392d21 100644
--- a/drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c
+++ b/drivers/media/platform/sunxi/sun6i-csi/sun6i_video.c
@@ -481,8 +481,10 @@ static int sun6i_video_open(struct file *file)
 		goto fh_release;
 
 	/* check if already powered */
-	if (!v4l2_fh_is_singular_file(file))
+	if (!v4l2_fh_is_singular_file(file)) {
+		ret = -EBUSY;
 		goto unlock;
+	}
 
 	ret = sun6i_csi_set_power(video->csi, true);
 	if (ret < 0)
-- 
GitLab


From 7f9197f11888c45d1aab470b7fd2c1f1fc1a2a35 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 10 Jun 2021 08:33:12 +0200
Subject: [PATCH 3005/3804] media: saa7134: use more meaninful goto labels

Instead of just numbering fail0 to fail4, use more meaninful
goto labels.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/saa7134/saa7134-core.c | 34 ++++++++++++------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/media/pci/saa7134/saa7134-core.c b/drivers/media/pci/saa7134/saa7134-core.c
index ec8dd41f9ebb9..97b1767f1fff1 100644
--- a/drivers/media/pci/saa7134/saa7134-core.c
+++ b/drivers/media/pci/saa7134/saa7134-core.c
@@ -1031,7 +1031,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	dev->media_dev = kzalloc(sizeof(*dev->media_dev), GFP_KERNEL);
 	if (!dev->media_dev) {
 		err = -ENOMEM;
-		goto fail0;
+		goto err_free_dev;
 	}
 	media_device_pci_init(dev->media_dev, pci_dev, dev->name);
 	dev->v4l2_dev.mdev = dev->media_dev;
@@ -1039,13 +1039,13 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 
 	err = v4l2_device_register(&pci_dev->dev, &dev->v4l2_dev);
 	if (err)
-		goto fail0;
+		goto err_free_dev;
 
 	/* pci init */
 	dev->pci = pci_dev;
 	if (pci_enable_device(pci_dev)) {
 		err = -EIO;
-		goto fail1;
+		goto err_v4l2_unregister;
 	}
 
 	/* pci quirks */
@@ -1095,7 +1095,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	err = pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32));
 	if (err) {
 		pr_warn("%s: Oops: no 32bit PCI DMA ???\n", dev->name);
-		goto fail1;
+		goto err_v4l2_unregister;
 	}
 
 	/* board config */
@@ -1129,7 +1129,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 		err = -EBUSY;
 		pr_err("%s: can't get MMIO memory @ 0x%llx\n",
 		       dev->name,(unsigned long long)pci_resource_start(pci_dev,0));
-		goto fail1;
+		goto err_v4l2_unregister;
 	}
 	dev->lmmio = ioremap(pci_resource_start(pci_dev, 0),
 			     pci_resource_len(pci_dev, 0));
@@ -1138,7 +1138,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 		err = -EIO;
 		pr_err("%s: can't ioremap() MMIO memory\n",
 		       dev->name);
-		goto fail2;
+		goto err_release_mem_reg;
 	}
 
 	/* initialize hardware #1 */
@@ -1151,7 +1151,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	if (err < 0) {
 		pr_err("%s: can't get IRQ %d\n",
 		       dev->name,pci_dev->irq);
-		goto fail3;
+		goto err_iounmap;
 	}
 
 	/* wait a bit, register i2c bus */
@@ -1217,7 +1217,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	if (err < 0) {
 		pr_info("%s: can't register video device\n",
 		       dev->name);
-		goto fail4;
+		goto err_unregister_video;
 	}
 	pr_info("%s: registered device %s [v4l2]\n",
 	       dev->name, video_device_node_name(dev->video_dev));
@@ -1234,7 +1234,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	err = video_register_device(dev->vbi_dev,VFL_TYPE_VBI,
 				    vbi_nr[dev->nr]);
 	if (err < 0)
-		goto fail4;
+		goto err_unregister_video;
 	pr_info("%s: registered device %s\n",
 	       dev->name, video_device_node_name(dev->vbi_dev));
 
@@ -1248,7 +1248,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 		err = video_register_device(dev->radio_dev,VFL_TYPE_RADIO,
 					    radio_nr[dev->nr]);
 		if (err < 0)
-			goto fail4;
+			goto err_unregister_video;
 		pr_info("%s: registered device %s\n",
 		       dev->name, video_device_node_name(dev->radio_dev));
 	}
@@ -1259,7 +1259,7 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	err = v4l2_mc_create_media_graph(dev->media_dev);
 	if (err) {
 		pr_err("failed to create media graph\n");
-		goto fail4;
+		goto err_unregister_video;
 	}
 #endif
 	/* everything worked */
@@ -1278,24 +1278,24 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 #ifdef CONFIG_MEDIA_CONTROLLER
 	err = media_device_register(dev->media_dev);
 	if (err)
-		goto fail4;
+		goto err_unregister_video;
 #endif
 
 	return 0;
 
- fail4:
+err_unregister_video:
 	saa7134_unregister_video(dev);
 	saa7134_i2c_unregister(dev);
 	free_irq(pci_dev->irq, dev);
- fail3:
+err_iounmap:
 	saa7134_hwfini(dev);
 	iounmap(dev->lmmio);
- fail2:
+err_release_mem_reg:
 	release_mem_region(pci_resource_start(pci_dev,0),
 			   pci_resource_len(pci_dev,0));
- fail1:
+err_v4l2_unregister:
 	v4l2_device_unregister(&dev->v4l2_dev);
- fail0:
+err_free_dev:
 #ifdef CONFIG_MEDIA_CONTROLLER
 	kfree(dev->media_dev);
 #endif
-- 
GitLab


From 235406dca37ecf6f00e0378e965a3dd37590c389 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 10 Jun 2021 08:40:58 +0200
Subject: [PATCH 3006/3804] media: saa7134: fix saa7134_initdev error handling
 logic

Smatch reported an issue there:
	drivers/media/pci/saa7134/saa7134-core.c:1302 saa7134_initdev() warn: '&dev->devlist' not removed from list

But besides freeing the list, the media controller graph also
needs to be cleaned up on errors. Address those issues.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/saa7134/saa7134-core.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/media/pci/saa7134/saa7134-core.c b/drivers/media/pci/saa7134/saa7134-core.c
index 97b1767f1fff1..47158ab3956bf 100644
--- a/drivers/media/pci/saa7134/saa7134-core.c
+++ b/drivers/media/pci/saa7134/saa7134-core.c
@@ -1277,14 +1277,17 @@ static int saa7134_initdev(struct pci_dev *pci_dev,
 	 */
 #ifdef CONFIG_MEDIA_CONTROLLER
 	err = media_device_register(dev->media_dev);
-	if (err)
+	if (err) {
+		media_device_cleanup(dev->media_dev);
 		goto err_unregister_video;
+	}
 #endif
 
 	return 0;
 
 err_unregister_video:
 	saa7134_unregister_video(dev);
+	list_del(&dev->devlist);
 	saa7134_i2c_unregister(dev);
 	free_irq(pci_dev->irq, dev);
 err_iounmap:
-- 
GitLab


From 5368b1ee2939961a16e74972b69088433fc52195 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 10 Jun 2021 08:57:02 +0200
Subject: [PATCH 3007/3804] media: siano: fix device register error path

As reported by smatch:
	drivers/media/common/siano/smsdvb-main.c:1231 smsdvb_hotplug() warn: '&client->entry' not removed from list

If an error occur at the end of the registration logic, it won't
drop the device from the list.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/common/siano/smsdvb-main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/media/common/siano/smsdvb-main.c b/drivers/media/common/siano/smsdvb-main.c
index b8a163a47d09d..f80caaa333daf 100644
--- a/drivers/media/common/siano/smsdvb-main.c
+++ b/drivers/media/common/siano/smsdvb-main.c
@@ -1212,6 +1212,10 @@ static int smsdvb_hotplug(struct smscore_device_t *coredev,
 	return 0;
 
 media_graph_error:
+	mutex_lock(&g_smsdvb_clientslock);
+	list_del(&client->entry);
+	mutex_unlock(&g_smsdvb_clientslock);
+
 	smsdvb_debugfs_release(client);
 
 client_error:
-- 
GitLab


From dba328bab4c6fa4ec1ed3be616f7196865f2ce41 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 10 Jun 2021 16:00:53 +0200
Subject: [PATCH 3008/3804] media: ttusb-dec: cleanup an error handling logic

Simplify the logic at ttusb_dec_send_command().

Besides avoiding some code duplication, as a side effect,
this could remove this false positive return with spatch:

	drivers/media/usb/ttusb-dec/ttusb_dec.c:380 ttusb_dec_send_command() warn: inconsistent returns '&dec->usb_mutex'.
	  Locked on  : 330
	  Unlocked on: 354,365,380

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/ttusb-dec/ttusb_dec.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c
index a852ee5f7ac94..bfda46a36dc50 100644
--- a/drivers/media/usb/ttusb-dec/ttusb_dec.c
+++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c
@@ -324,10 +324,10 @@ static int ttusb_dec_send_command(struct ttusb_dec *dec, const u8 command,
 	if (!b)
 		return -ENOMEM;
 
-	if ((result = mutex_lock_interruptible(&dec->usb_mutex))) {
-		kfree(b);
+	result = mutex_lock_interruptible(&dec->usb_mutex);
+	if (result) {
 		printk("%s: Failed to lock usb mutex.\n", __func__);
-		return result;
+		goto err;
 	}
 
 	b[0] = 0xaa;
@@ -349,9 +349,7 @@ static int ttusb_dec_send_command(struct ttusb_dec *dec, const u8 command,
 	if (result) {
 		printk("%s: command bulk message failed: error %d\n",
 		       __func__, result);
-		mutex_unlock(&dec->usb_mutex);
-		kfree(b);
-		return result;
+		goto err;
 	}
 
 	result = usb_bulk_msg(dec->udev, dec->result_pipe, b,
@@ -360,9 +358,7 @@ static int ttusb_dec_send_command(struct ttusb_dec *dec, const u8 command,
 	if (result) {
 		printk("%s: result bulk message failed: error %d\n",
 		       __func__, result);
-		mutex_unlock(&dec->usb_mutex);
-		kfree(b);
-		return result;
+		goto err;
 	} else {
 		if (debug) {
 			printk(KERN_DEBUG "%s: result: %*ph\n",
@@ -373,12 +369,13 @@ static int ttusb_dec_send_command(struct ttusb_dec *dec, const u8 command,
 			*result_length = b[3];
 		if (cmd_result && b[3] > 0)
 			memcpy(cmd_result, &b[4], b[3]);
+	}
 
-		mutex_unlock(&dec->usb_mutex);
+err:
+	mutex_unlock(&dec->usb_mutex);
 
-		kfree(b);
-		return 0;
-	}
+	kfree(b);
+	return result;
 }
 
 static int ttusb_dec_get_stb_state (struct ttusb_dec *dec, unsigned int *mode,
-- 
GitLab


From 60f0618d157b8c8bf1d09d4a6e10070a0b580160 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 14 Jun 2021 10:43:24 +0200
Subject: [PATCH 3009/3804] media: dvb-core: frontend: make GET/SET safer

The implementation for FE_SET_PROPERTY/FE_GET_PROPERTY has
a debug code that might be explored via spectre.
Improve the logic in order to mitigate such risk.

It should be noticed that, before this patch, the logic
which implements FE_GET_PROPERTY doesn't check the length passed
by the user, which might lead to expose some information. This
is probably not exploitable, though, as the frontend drivers
won't rely on the buffer length value set by userspace, but
it helps to return a valid value back to userspace.

The code was changed to only try to access an array based on
userspace values only when DVB debug is turned on, helping to
reduce the attack surface, as a speculation attack would work
only if DVB dev_dbg() macros are enabled, which is usually
enabled only on test Kernels or by the root user.

As a side effect, a const array size can now be reduced by
~570 bytes, as it now needs to contain just the name of each
DTV command.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/dvb-core/dvb_frontend.c | 222 ++++++++++++++------------
 1 file changed, 117 insertions(+), 105 deletions(-)

diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index a6915582d1a63..258637d762d64 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -23,6 +23,7 @@
 #include <linux/poll.h>
 #include <linux/semaphore.h>
 #include <linux/module.h>
+#include <linux/nospec.h>
 #include <linux/list.h>
 #include <linux/freezer.h>
 #include <linux/jiffies.h>
@@ -1063,107 +1064,97 @@ static int dvb_frontend_clear_cache(struct dvb_frontend *fe)
 	return 0;
 }
 
-#define _DTV_CMD(n, s, b) \
-[n] = { \
-	.name = #n, \
-	.cmd  = n, \
-	.set  = s,\
-	.buffer = b \
-}
-
-struct dtv_cmds_h {
-	char	*name;		/* A display name for debugging purposes */
+#define _DTV_CMD(n) \
+	[n] =  #n
 
-	__u32	cmd;		/* A unique ID */
-
-	/* Flags */
-	__u32	set:1;		/* Either a set or get property */
-	__u32	buffer:1;	/* Does this property use the buffer? */
-	__u32	reserved:30;	/* Align */
-};
-
-static struct dtv_cmds_h dtv_cmds[DTV_MAX_COMMAND + 1] = {
-	_DTV_CMD(DTV_TUNE, 1, 0),
-	_DTV_CMD(DTV_CLEAR, 1, 0),
+static char *dtv_cmds[DTV_MAX_COMMAND + 1] = {
+	_DTV_CMD(DTV_TUNE),
+	_DTV_CMD(DTV_CLEAR),
 
 	/* Set */
-	_DTV_CMD(DTV_FREQUENCY, 1, 0),
-	_DTV_CMD(DTV_BANDWIDTH_HZ, 1, 0),
-	_DTV_CMD(DTV_MODULATION, 1, 0),
-	_DTV_CMD(DTV_INVERSION, 1, 0),
-	_DTV_CMD(DTV_DISEQC_MASTER, 1, 1),
-	_DTV_CMD(DTV_SYMBOL_RATE, 1, 0),
-	_DTV_CMD(DTV_INNER_FEC, 1, 0),
-	_DTV_CMD(DTV_VOLTAGE, 1, 0),
-	_DTV_CMD(DTV_TONE, 1, 0),
-	_DTV_CMD(DTV_PILOT, 1, 0),
-	_DTV_CMD(DTV_ROLLOFF, 1, 0),
-	_DTV_CMD(DTV_DELIVERY_SYSTEM, 1, 0),
-	_DTV_CMD(DTV_HIERARCHY, 1, 0),
-	_DTV_CMD(DTV_CODE_RATE_HP, 1, 0),
-	_DTV_CMD(DTV_CODE_RATE_LP, 1, 0),
-	_DTV_CMD(DTV_GUARD_INTERVAL, 1, 0),
-	_DTV_CMD(DTV_TRANSMISSION_MODE, 1, 0),
-	_DTV_CMD(DTV_INTERLEAVING, 1, 0),
-
-	_DTV_CMD(DTV_ISDBT_PARTIAL_RECEPTION, 1, 0),
-	_DTV_CMD(DTV_ISDBT_SOUND_BROADCASTING, 1, 0),
-	_DTV_CMD(DTV_ISDBT_SB_SUBCHANNEL_ID, 1, 0),
-	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_IDX, 1, 0),
-	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_COUNT, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYER_ENABLED, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERA_FEC, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERA_MODULATION, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERA_SEGMENT_COUNT, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERA_TIME_INTERLEAVING, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERB_FEC, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERB_MODULATION, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERB_SEGMENT_COUNT, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERB_TIME_INTERLEAVING, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERC_FEC, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERC_MODULATION, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERC_SEGMENT_COUNT, 1, 0),
-	_DTV_CMD(DTV_ISDBT_LAYERC_TIME_INTERLEAVING, 1, 0),
-
-	_DTV_CMD(DTV_STREAM_ID, 1, 0),
-	_DTV_CMD(DTV_DVBT2_PLP_ID_LEGACY, 1, 0),
-	_DTV_CMD(DTV_SCRAMBLING_SEQUENCE_INDEX, 1, 0),
-	_DTV_CMD(DTV_LNA, 1, 0),
+	_DTV_CMD(DTV_FREQUENCY),
+	_DTV_CMD(DTV_BANDWIDTH_HZ),
+	_DTV_CMD(DTV_MODULATION),
+	_DTV_CMD(DTV_INVERSION),
+	_DTV_CMD(DTV_DISEQC_MASTER),
+	_DTV_CMD(DTV_SYMBOL_RATE),
+	_DTV_CMD(DTV_INNER_FEC),
+	_DTV_CMD(DTV_VOLTAGE),
+	_DTV_CMD(DTV_TONE),
+	_DTV_CMD(DTV_PILOT),
+	_DTV_CMD(DTV_ROLLOFF),
+	_DTV_CMD(DTV_DELIVERY_SYSTEM),
+	_DTV_CMD(DTV_HIERARCHY),
+	_DTV_CMD(DTV_CODE_RATE_HP),
+	_DTV_CMD(DTV_CODE_RATE_LP),
+	_DTV_CMD(DTV_GUARD_INTERVAL),
+	_DTV_CMD(DTV_TRANSMISSION_MODE),
+	_DTV_CMD(DTV_INTERLEAVING),
+
+	_DTV_CMD(DTV_ISDBT_PARTIAL_RECEPTION),
+	_DTV_CMD(DTV_ISDBT_SOUND_BROADCASTING),
+	_DTV_CMD(DTV_ISDBT_SB_SUBCHANNEL_ID),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_IDX),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_COUNT),
+	_DTV_CMD(DTV_ISDBT_LAYER_ENABLED),
+	_DTV_CMD(DTV_ISDBT_LAYERA_FEC),
+	_DTV_CMD(DTV_ISDBT_LAYERA_MODULATION),
+	_DTV_CMD(DTV_ISDBT_LAYERA_SEGMENT_COUNT),
+	_DTV_CMD(DTV_ISDBT_LAYERA_TIME_INTERLEAVING),
+	_DTV_CMD(DTV_ISDBT_LAYERB_FEC),
+	_DTV_CMD(DTV_ISDBT_LAYERB_MODULATION),
+	_DTV_CMD(DTV_ISDBT_LAYERB_SEGMENT_COUNT),
+	_DTV_CMD(DTV_ISDBT_LAYERB_TIME_INTERLEAVING),
+	_DTV_CMD(DTV_ISDBT_LAYERC_FEC),
+	_DTV_CMD(DTV_ISDBT_LAYERC_MODULATION),
+	_DTV_CMD(DTV_ISDBT_LAYERC_SEGMENT_COUNT),
+	_DTV_CMD(DTV_ISDBT_LAYERC_TIME_INTERLEAVING),
+
+	_DTV_CMD(DTV_STREAM_ID),
+	_DTV_CMD(DTV_DVBT2_PLP_ID_LEGACY),
+	_DTV_CMD(DTV_SCRAMBLING_SEQUENCE_INDEX),
+	_DTV_CMD(DTV_LNA),
 
 	/* Get */
-	_DTV_CMD(DTV_DISEQC_SLAVE_REPLY, 0, 1),
-	_DTV_CMD(DTV_API_VERSION, 0, 0),
-
-	_DTV_CMD(DTV_ENUM_DELSYS, 0, 0),
-
-	_DTV_CMD(DTV_ATSCMH_PARADE_ID, 1, 0),
-	_DTV_CMD(DTV_ATSCMH_RS_FRAME_ENSEMBLE, 1, 0),
-
-	_DTV_CMD(DTV_ATSCMH_FIC_VER, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_NOG, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_TNOG, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_SGN, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_PRC, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_RS_FRAME_MODE, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_RS_CODE_MODE_PRI, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_RS_CODE_MODE_SEC, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_SCCC_BLOCK_MODE, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_A, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_B, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_C, 0, 0),
-	_DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_D, 0, 0),
+	_DTV_CMD(DTV_DISEQC_SLAVE_REPLY),
+	_DTV_CMD(DTV_API_VERSION),
+
+	_DTV_CMD(DTV_ENUM_DELSYS),
+
+	_DTV_CMD(DTV_ATSCMH_PARADE_ID),
+	_DTV_CMD(DTV_ATSCMH_RS_FRAME_ENSEMBLE),
+
+	_DTV_CMD(DTV_ATSCMH_FIC_VER),
+	_DTV_CMD(DTV_ATSCMH_NOG),
+	_DTV_CMD(DTV_ATSCMH_TNOG),
+	_DTV_CMD(DTV_ATSCMH_SGN),
+	_DTV_CMD(DTV_ATSCMH_PRC),
+	_DTV_CMD(DTV_ATSCMH_RS_FRAME_MODE),
+	_DTV_CMD(DTV_ATSCMH_RS_CODE_MODE_PRI),
+	_DTV_CMD(DTV_ATSCMH_RS_CODE_MODE_SEC),
+	_DTV_CMD(DTV_ATSCMH_SCCC_BLOCK_MODE),
+	_DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_A),
+	_DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_B),
+	_DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_C),
+	_DTV_CMD(DTV_ATSCMH_SCCC_CODE_MODE_D),
 
 	/* Statistics API */
-	_DTV_CMD(DTV_STAT_SIGNAL_STRENGTH, 0, 0),
-	_DTV_CMD(DTV_STAT_CNR, 0, 0),
-	_DTV_CMD(DTV_STAT_PRE_ERROR_BIT_COUNT, 0, 0),
-	_DTV_CMD(DTV_STAT_PRE_TOTAL_BIT_COUNT, 0, 0),
-	_DTV_CMD(DTV_STAT_POST_ERROR_BIT_COUNT, 0, 0),
-	_DTV_CMD(DTV_STAT_POST_TOTAL_BIT_COUNT, 0, 0),
-	_DTV_CMD(DTV_STAT_ERROR_BLOCK_COUNT, 0, 0),
-	_DTV_CMD(DTV_STAT_TOTAL_BLOCK_COUNT, 0, 0),
+	_DTV_CMD(DTV_STAT_SIGNAL_STRENGTH),
+	_DTV_CMD(DTV_STAT_CNR),
+	_DTV_CMD(DTV_STAT_PRE_ERROR_BIT_COUNT),
+	_DTV_CMD(DTV_STAT_PRE_TOTAL_BIT_COUNT),
+	_DTV_CMD(DTV_STAT_POST_ERROR_BIT_COUNT),
+	_DTV_CMD(DTV_STAT_POST_TOTAL_BIT_COUNT),
+	_DTV_CMD(DTV_STAT_ERROR_BLOCK_COUNT),
+	_DTV_CMD(DTV_STAT_TOTAL_BLOCK_COUNT),
 };
 
+static char *dtv_cmd_name(u32 cmd)
+{
+	cmd = array_index_nospec(cmd, DTV_MAX_COMMAND);
+	return dtv_cmds[cmd];
+}
+
 /* Synchronise the legacy tuning parameters into the cache, so that demodulator
  * drivers can use a single set_frontend tuning function, regardless of whether
  * it's being used for the legacy or new API, reducing code and complexity.
@@ -1346,6 +1337,7 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 				    struct file *file)
 {
 	int ncaps;
+	unsigned int len = 1;
 
 	switch (tvp->cmd) {
 	case DTV_ENUM_DELSYS:
@@ -1355,6 +1347,7 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 			ncaps++;
 		}
 		tvp->u.buffer.len = ncaps;
+		len = ncaps;
 		break;
 	case DTV_FREQUENCY:
 		tvp->u.data = c->frequency;
@@ -1532,27 +1525,51 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 	/* Fill quality measures */
 	case DTV_STAT_SIGNAL_STRENGTH:
 		tvp->u.st = c->strength;
+		if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
+			tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
+		len = tvp->u.buffer.len;
 		break;
 	case DTV_STAT_CNR:
 		tvp->u.st = c->cnr;
+		if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
+			tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
+		len = tvp->u.buffer.len;
 		break;
 	case DTV_STAT_PRE_ERROR_BIT_COUNT:
 		tvp->u.st = c->pre_bit_error;
+		if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
+			tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
+		len = tvp->u.buffer.len;
 		break;
 	case DTV_STAT_PRE_TOTAL_BIT_COUNT:
 		tvp->u.st = c->pre_bit_count;
+		if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
+			tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
+		len = tvp->u.buffer.len;
 		break;
 	case DTV_STAT_POST_ERROR_BIT_COUNT:
 		tvp->u.st = c->post_bit_error;
+		if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
+			tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
+		len = tvp->u.buffer.len;
 		break;
 	case DTV_STAT_POST_TOTAL_BIT_COUNT:
 		tvp->u.st = c->post_bit_count;
+		if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
+			tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
+		len = tvp->u.buffer.len;
 		break;
 	case DTV_STAT_ERROR_BLOCK_COUNT:
 		tvp->u.st = c->block_error;
+		if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
+			tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
+		len = tvp->u.buffer.len;
 		break;
 	case DTV_STAT_TOTAL_BLOCK_COUNT:
 		tvp->u.st = c->block_count;
+		if (tvp->u.buffer.len > MAX_DTV_STATS * sizeof(u32))
+			tvp->u.buffer.len = MAX_DTV_STATS * sizeof(u32);
+		len = tvp->u.buffer.len;
 		break;
 	default:
 		dev_dbg(fe->dvb->device,
@@ -1561,18 +1578,13 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 		return -EINVAL;
 	}
 
-	if (!dtv_cmds[tvp->cmd].buffer)
-		dev_dbg(fe->dvb->device,
-			"%s: GET cmd 0x%08x (%s) = 0x%08x\n",
-			__func__, tvp->cmd, dtv_cmds[tvp->cmd].name,
-			tvp->u.data);
-	else
-		dev_dbg(fe->dvb->device,
-			"%s: GET cmd 0x%08x (%s) len %d: %*ph\n",
-			__func__,
-			tvp->cmd, dtv_cmds[tvp->cmd].name,
-			tvp->u.buffer.len,
-			tvp->u.buffer.len, tvp->u.buffer.data);
+	if (len < 1)
+		len = 1;
+
+	dev_dbg(fe->dvb->device,
+		"%s: GET cmd 0x%08x (%s) len %d: %*ph\n",
+		__func__, tvp->cmd, dtv_cmd_name(tvp->cmd),
+		tvp->u.buffer.len, tvp->u.buffer.len, tvp->u.buffer.data);
 
 	return 0;
 }
@@ -1870,7 +1882,7 @@ static int dtv_property_process_set(struct dvb_frontend *fe,
 	else
 		dev_dbg(fe->dvb->device,
 			"%s: SET cmd 0x%08x (%s) to 0x%08x\n",
-			__func__, cmd, dtv_cmds[cmd].name, data);
+			__func__, cmd, dtv_cmd_name(cmd), data);
 	switch (cmd) {
 	case DTV_CLEAR:
 		/*
-- 
GitLab


From 128916984208d8f7ccaed6eda840c603fa112910 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 10 Jun 2021 11:53:45 +0200
Subject: [PATCH 3010/3804] media: xilinx: simplify get fourcc logic

Right now, there are two calls for xvip_get_format_by_fourcc().
If the first one fails, it is called again in order to pick
the first available format: V4L2_PIX_FMT_YUYV.

This ends by producing a smatch warnings:
	drivers/media/platform/xilinx/xilinx-dma.c:555 __xvip_dma_try_format() error: 'info' dereferencing possible ERR_PTR()
	drivers/media/platform/xilinx/xilinx-dma.c: drivers/media/platform/xilinx/xilinx-dma.c:664 xvip_dma_init() error: 'dma->fmtinfo' dereferencing possible ERR_PTR()

as it is hard for an static analyzer to ensure that calling
xvip_get_format_by_fourcc(XVIP_DMA_DEF_FORMAT) won't return an
error.

So, better to optimize the logic, ensuring that the function
will never return an error.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/xilinx/xilinx-dma.c | 5 +----
 drivers/media/platform/xilinx/xilinx-vip.c | 6 +++---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/media/platform/xilinx/xilinx-dma.c b/drivers/media/platform/xilinx/xilinx-dma.c
index 2a56201cb8534..338c3661d8099 100644
--- a/drivers/media/platform/xilinx/xilinx-dma.c
+++ b/drivers/media/platform/xilinx/xilinx-dma.c
@@ -26,7 +26,6 @@
 #include "xilinx-vip.h"
 #include "xilinx-vipp.h"
 
-#define XVIP_DMA_DEF_FORMAT		V4L2_PIX_FMT_YUYV
 #define XVIP_DMA_DEF_WIDTH		1920
 #define XVIP_DMA_DEF_HEIGHT		1080
 
@@ -549,8 +548,6 @@ __xvip_dma_try_format(struct xvip_dma *dma, struct v4l2_pix_format *pix,
 	 * requested format isn't supported.
 	 */
 	info = xvip_get_format_by_fourcc(pix->pixelformat);
-	if (IS_ERR(info))
-		info = xvip_get_format_by_fourcc(XVIP_DMA_DEF_FORMAT);
 
 	pix->pixelformat = info->fourcc;
 	pix->field = V4L2_FIELD_NONE;
@@ -660,7 +657,7 @@ int xvip_dma_init(struct xvip_composite_device *xdev, struct xvip_dma *dma,
 	INIT_LIST_HEAD(&dma->queued_bufs);
 	spin_lock_init(&dma->queued_lock);
 
-	dma->fmtinfo = xvip_get_format_by_fourcc(XVIP_DMA_DEF_FORMAT);
+	dma->fmtinfo = xvip_get_format_by_fourcc(V4L2_PIX_FMT_YUYV);
 	dma->format.pixelformat = dma->fmtinfo->fourcc;
 	dma->format.colorspace = V4L2_COLORSPACE_SRGB;
 	dma->format.field = V4L2_FIELD_NONE;
diff --git a/drivers/media/platform/xilinx/xilinx-vip.c b/drivers/media/platform/xilinx/xilinx-vip.c
index 6ad61b08a31ab..a4eb576834110 100644
--- a/drivers/media/platform/xilinx/xilinx-vip.c
+++ b/drivers/media/platform/xilinx/xilinx-vip.c
@@ -70,8 +70,8 @@ EXPORT_SYMBOL_GPL(xvip_get_format_by_code);
  * @fourcc: the format 4CC
  *
  * Return: a pointer to the format information structure corresponding to the
- * given V4L2 format @fourcc, or ERR_PTR if no corresponding format can be
- * found.
+ * given V4L2 format @fourcc. If not found, return a pointer to the first
+ * available format (V4L2_PIX_FMT_YUYV).
  */
 const struct xvip_video_format *xvip_get_format_by_fourcc(u32 fourcc)
 {
@@ -84,7 +84,7 @@ const struct xvip_video_format *xvip_get_format_by_fourcc(u32 fourcc)
 			return format;
 	}
 
-	return ERR_PTR(-EINVAL);
+	return &xvip_video_formats[0];
 }
 EXPORT_SYMBOL_GPL(xvip_get_format_by_fourcc);
 
-- 
GitLab


From c73c23f347168e315d65fd3b7cffca8439724b26 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 3 Jun 2021 02:17:08 +0200
Subject: [PATCH 3011/3804] media: venus: hfi_cmds: Fix packet size calculation

Now that a one-element array was replaced with a flexible-array member
in struct hfi_sys_set_property_pkt, use the struct_size() helper to
correctly calculate the packet size.

Fixes: 701e10b3fd9f ("media: venus: hfi_cmds.h: Replace one-element array with flexible-array member")
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/hfi_cmds.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/hfi_cmds.c b/drivers/media/platform/qcom/venus/hfi_cmds.c
index 4b9dea7f6940e..f510247869916 100644
--- a/drivers/media/platform/qcom/venus/hfi_cmds.c
+++ b/drivers/media/platform/qcom/venus/hfi_cmds.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2012-2016, The Linux Foundation. All rights reserved.
  * Copyright (C) 2017 Linaro Ltd.
  */
+#include <linux/overflow.h>
 #include <linux/errno.h>
 #include <linux/hash.h>
 
@@ -27,7 +28,7 @@ void pkt_sys_idle_indicator(struct hfi_sys_set_property_pkt *pkt, u32 enable)
 {
 	struct hfi_enable *hfi = (struct hfi_enable *)&pkt->data[1];
 
-	pkt->hdr.size = sizeof(*pkt) + sizeof(*hfi) + sizeof(u32);
+	pkt->hdr.size = struct_size(pkt, data, 1) + sizeof(*hfi);
 	pkt->hdr.pkt_type = HFI_CMD_SYS_SET_PROPERTY;
 	pkt->num_properties = 1;
 	pkt->data[0] = HFI_PROPERTY_SYS_IDLE_INDICATOR;
@@ -39,7 +40,7 @@ void pkt_sys_debug_config(struct hfi_sys_set_property_pkt *pkt, u32 mode,
 {
 	struct hfi_debug_config *hfi;
 
-	pkt->hdr.size = sizeof(*pkt) + sizeof(*hfi) + sizeof(u32);
+	pkt->hdr.size = struct_size(pkt, data, 1) + sizeof(*hfi);
 	pkt->hdr.pkt_type = HFI_CMD_SYS_SET_PROPERTY;
 	pkt->num_properties = 1;
 	pkt->data[0] = HFI_PROPERTY_SYS_DEBUG_CONFIG;
@@ -50,7 +51,7 @@ void pkt_sys_debug_config(struct hfi_sys_set_property_pkt *pkt, u32 mode,
 
 void pkt_sys_coverage_config(struct hfi_sys_set_property_pkt *pkt, u32 mode)
 {
-	pkt->hdr.size = sizeof(*pkt) + sizeof(u32);
+	pkt->hdr.size = struct_size(pkt, data, 2);
 	pkt->hdr.pkt_type = HFI_CMD_SYS_SET_PROPERTY;
 	pkt->num_properties = 1;
 	pkt->data[0] = HFI_PROPERTY_SYS_CONFIG_COVERAGE;
@@ -116,7 +117,7 @@ void pkt_sys_power_control(struct hfi_sys_set_property_pkt *pkt, u32 enable)
 {
 	struct hfi_enable *hfi = (struct hfi_enable *)&pkt->data[1];
 
-	pkt->hdr.size = sizeof(*pkt) + sizeof(*hfi) + sizeof(u32);
+	pkt->hdr.size = struct_size(pkt, data, 1) + sizeof(*hfi);
 	pkt->hdr.pkt_type = HFI_CMD_SYS_SET_PROPERTY;
 	pkt->num_properties = 1;
 	pkt->data[0] = HFI_PROPERTY_SYS_CODEC_POWER_PLANE_CTRL;
-- 
GitLab


From 6f2f49ae4c287fbaaed89b2b262a9b99d27302fb Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 4 Jun 2021 02:43:38 +0200
Subject: [PATCH 3012/3804] media: venus: hfi_msgs.h: Replace one-element
 arrays with flexible-array members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a regular need in the kernel to provide a way to declare having
a dynamically sized set of trailing elements in a structure. Kernel code
should always use “flexible array members”[1] for these cases. The older
style of one-element or zero-length arrays should no longer be used[2].

Use flexible-array members in struct hfi_msg_sys_property_info_pkt and
hfi_msg_session_property_info_pkt instead of one-element arrays, and
refactor the code accordingly.

Also, this helps with the ongoing efforts to enable -Warray-bounds by
fixing the following warnings:

  CC [M]  drivers/media/platform/qcom/venus/hfi_msgs.o
drivers/media/platform/qcom/venus/hfi_msgs.c: In function ‘hfi_sys_property_info’:
drivers/media/platform/qcom/venus/hfi_msgs.c:246:35: warning: array subscript 1 is above array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds]
  246 |  if (req_bytes < 128 || !pkt->data[1] || pkt->num_properties > 1)
      |                          ~~~~~~~~~^~~
drivers/media/platform/qcom/venus/hfi_msgs.c: In function ‘hfi_session_prop_info’:
drivers/media/platform/qcom/venus/hfi_msgs.c:342:62: warning: array subscript 1 is above array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} [-Warray-bounds]
  342 |  if (!req_bytes || req_bytes % sizeof(*buf_req) || !pkt->data[1])
      |                                                     ~~~~~~~~~^~~

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.9/process/deprecated.html#zero-length-and-one-element-arrays

Link: https://github.com/KSPP/linux/issues/79
Link: https://github.com/KSPP/linux/issues/109

Co-developed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/qcom/venus/hfi_msgs.c | 16 ++++++++--------
 drivers/media/platform/qcom/venus/hfi_msgs.h |  6 ++++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/media/platform/qcom/venus/hfi_msgs.c b/drivers/media/platform/qcom/venus/hfi_msgs.c
index a2d436d407b22..d9fde66f6fa8c 100644
--- a/drivers/media/platform/qcom/venus/hfi_msgs.c
+++ b/drivers/media/platform/qcom/venus/hfi_msgs.c
@@ -251,11 +251,11 @@ sys_get_prop_image_version(struct device *dev,
 
 	req_bytes = pkt->hdr.size - sizeof(*pkt);
 
-	if (req_bytes < VER_STR_SZ || !pkt->data[1] || pkt->num_properties > 1)
+	if (req_bytes < VER_STR_SZ || !pkt->data[0] || pkt->num_properties > 1)
 		/* bad packet */
 		return;
 
-	img_ver = (u8 *)&pkt->data[1];
+	img_ver = pkt->data;
 
 	dev_dbg(dev, VDBGL "F/W version: %s\n", img_ver);
 
@@ -277,7 +277,7 @@ static void hfi_sys_property_info(struct venus_core *core,
 		return;
 	}
 
-	switch (pkt->data[0]) {
+	switch (pkt->property) {
 	case HFI_PROPERTY_SYS_IMAGE_VERSION:
 		sys_get_prop_image_version(dev, pkt);
 		break;
@@ -338,7 +338,7 @@ session_get_prop_profile_level(struct hfi_msg_session_property_info_pkt *pkt,
 		/* bad packet */
 		return HFI_ERR_SESSION_INVALID_PARAMETER;
 
-	hfi = (struct hfi_profile_level *)&pkt->data[1];
+	hfi = (struct hfi_profile_level *)&pkt->data[0];
 	profile_level->profile = hfi->profile;
 	profile_level->level = hfi->level;
 
@@ -355,11 +355,11 @@ session_get_prop_buf_req(struct hfi_msg_session_property_info_pkt *pkt,
 
 	req_bytes = pkt->shdr.hdr.size - sizeof(*pkt);
 
-	if (!req_bytes || req_bytes % sizeof(*buf_req) || !pkt->data[1])
+	if (!req_bytes || req_bytes % sizeof(*buf_req) || !pkt->data[0])
 		/* bad packet */
 		return HFI_ERR_SESSION_INVALID_PARAMETER;
 
-	buf_req = (struct hfi_buffer_requirements *)&pkt->data[1];
+	buf_req = (struct hfi_buffer_requirements *)&pkt->data[0];
 	if (!buf_req)
 		return HFI_ERR_SESSION_INVALID_PARAMETER;
 
@@ -391,7 +391,7 @@ static void hfi_session_prop_info(struct venus_core *core,
 		goto done;
 	}
 
-	switch (pkt->data[0]) {
+	switch (pkt->property) {
 	case HFI_PROPERTY_CONFIG_BUFFER_REQUIREMENTS:
 		memset(hprop->bufreq, 0, sizeof(hprop->bufreq));
 		error = session_get_prop_buf_req(pkt, hprop->bufreq);
@@ -404,7 +404,7 @@ static void hfi_session_prop_info(struct venus_core *core,
 	case HFI_PROPERTY_CONFIG_VDEC_ENTROPY:
 		break;
 	default:
-		dev_dbg(dev, VDBGM "unknown property id:%x\n", pkt->data[0]);
+		dev_dbg(dev, VDBGM "unknown property id:%x\n", pkt->property);
 		return;
 	}
 
diff --git a/drivers/media/platform/qcom/venus/hfi_msgs.h b/drivers/media/platform/qcom/venus/hfi_msgs.h
index 526d9f5b487bb..510513697335b 100644
--- a/drivers/media/platform/qcom/venus/hfi_msgs.h
+++ b/drivers/media/platform/qcom/venus/hfi_msgs.h
@@ -113,7 +113,8 @@ struct hfi_msg_sys_ping_ack_pkt {
 struct hfi_msg_sys_property_info_pkt {
 	struct hfi_pkt_hdr hdr;
 	u32 num_properties;
-	u32 data[1];
+	u32 property;
+	u8 data[];
 };
 
 struct hfi_msg_session_load_resources_done_pkt {
@@ -233,7 +234,8 @@ struct hfi_msg_session_parse_sequence_header_done_pkt {
 struct hfi_msg_session_property_info_pkt {
 	struct hfi_session_hdr_pkt shdr;
 	u32 num_properties;
-	u32 data[1];
+	u32 property;
+	u8 data[];
 };
 
 struct hfi_msg_session_release_resources_done_pkt {
-- 
GitLab


From 0d346d2a6f54f06f36b224fd27cd6eafe8c83be9 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Date: Thu, 10 Jun 2021 17:55:58 +0300
Subject: [PATCH 3013/3804] media: v4l2-subdev: add subdev-wide state struct

We have 'struct v4l2_subdev_pad_config' which contains configuration for
a single pad used for the TRY functionality, and an array of those
structs is passed to various v4l2_subdev_pad_ops.

I was working on subdev internal routing between pads, and realized that
there's no way to add TRY functionality for routes, which is not pad
specific configuration. Adding a separate struct for try-route config
wouldn't work either, as e.g. set-fmt needs to know the try-route
configuration to propagate the settings.

This patch adds a new struct, 'struct v4l2_subdev_state' (which at the
moment only contains the v4l2_subdev_pad_config array) and the new
struct is used in most of the places where v4l2_subdev_pad_config was
used. All v4l2_subdev_pad_ops functions taking v4l2_subdev_pad_config
are changed to instead take v4l2_subdev_state.

The changes to drivers/media/v4l2-core/v4l2-subdev.c and
include/media/v4l2-subdev.h were written by hand, and all the driver
changes were done with the semantic patch below. The spatch needs to be
applied to a select list of directories. I used the following shell
commands to apply the spatch:

dirs="drivers/media/i2c drivers/media/platform drivers/media/usb drivers/media/test-drivers/vimc drivers/media/pci drivers/staging/media"
for dir in $dirs; do spatch -j8 --dir --include-headers --no-show-diff --in-place --sp-file v4l2-subdev-state.cocci $dir; done

Note that Coccinelle chokes on a few drivers (gcc extensions?). With
minor changes we can make Coccinelle run fine, and these changes can be
reverted after spatch. The diff for these changes is:

For drivers/media/i2c/s5k5baf.c:

	@@ -1481,7 +1481,7 @@ static int s5k5baf_set_selection(struct v4l2_subdev *sd,
	 				&s5k5baf_cis_rect,
	 				v4l2_subdev_get_try_crop(sd, cfg, PAD_CIS),
	 				v4l2_subdev_get_try_compose(sd, cfg, PAD_CIS),
	-				v4l2_subdev_get_try_crop(sd, cfg, PAD_OUT)
	+				v4l2_subdev_get_try_crop(sd, cfg, PAD_OUT),
	 			};
	 		s5k5baf_set_rect_and_adjust(rects, rtype, &sel->r);
	 		return 0;

For drivers/media/platform/s3c-camif/camif-capture.c:

	@@ -1230,7 +1230,7 @@ static int s3c_camif_subdev_get_fmt(struct v4l2_subdev *sd,
	 		*mf = camif->mbus_fmt;
	 		break;

	-	case CAMIF_SD_PAD_SOURCE_C...CAMIF_SD_PAD_SOURCE_P:
	+	case CAMIF_SD_PAD_SOURCE_C:
	 		/* crop rectangle at camera interface input */
	 		mf->width = camif->camif_crop.width;
	 		mf->height = camif->camif_crop.height;
	@@ -1332,7 +1332,7 @@ static int s3c_camif_subdev_set_fmt(struct v4l2_subdev *sd,
	 		}
	 		break;

	-	case CAMIF_SD_PAD_SOURCE_C...CAMIF_SD_PAD_SOURCE_P:
	+	case CAMIF_SD_PAD_SOURCE_C:
	 		/* Pixel format can be only changed on the sink pad. */
	 		mf->code = camif->mbus_fmt.code;
	 		mf->width = crop->width;

The semantic patch is:

// <smpl>

// Change function parameter

@@
identifier func;
identifier cfg;
@@

 func(...,
-   struct v4l2_subdev_pad_config *cfg
+   struct v4l2_subdev_state *sd_state
    , ...)
 {
 <...
- cfg
+ sd_state
 ...>
 }

// Change function declaration parameter

@@
identifier func;
identifier cfg;
type T;
@@
T func(...,
-   struct v4l2_subdev_pad_config *cfg
+   struct v4l2_subdev_state *sd_state
    , ...);

// Change function return value

@@
identifier func;
@@
- struct v4l2_subdev_pad_config
+ struct v4l2_subdev_state
 *func(...)
 {
    ...
 }

// Change function declaration return value

@@
identifier func;
@@
- struct v4l2_subdev_pad_config
+ struct v4l2_subdev_state
 *func(...);

// Some drivers pass a local pad_cfg for a single pad to a called function. Wrap it
// inside a pad_state.

@@
identifier func;
identifier pad_cfg;
@@
func(...)
{
    ...
    struct v4l2_subdev_pad_config pad_cfg;
+   struct v4l2_subdev_state pad_state = { .pads = &pad_cfg };

    <+...

(
    v4l2_subdev_call
|
    sensor_call
|
    isi_try_fse
|
    isc_try_fse
|
    saa_call_all
)
    (...,
-   &pad_cfg
+   &pad_state
    ,...)

    ...+>
}

// If the function uses fields from pad_config, access via state->pads

@@
identifier func;
identifier state;
@@
 func(...,
    struct v4l2_subdev_state *state
    , ...)
 {
    <...
(
-   state->try_fmt
+   state->pads->try_fmt
|
-   state->try_crop
+   state->pads->try_crop
|
-   state->try_compose
+   state->pads->try_compose
)
    ...>
}

// If the function accesses the filehandle, use fh->state instead

@@
struct v4l2_subdev_fh *fh;
@@
-    fh->pad
+    fh->state

@@
struct v4l2_subdev_fh fh;
@@
-    fh.pad
+    fh.state

// Start of vsp1 specific

@@
@@
struct vsp1_entity {
    ...
-    struct v4l2_subdev_pad_config *config;
+    struct v4l2_subdev_state *config;
    ...
};

@@
symbol entity;
@@
vsp1_entity_init(...)
{
    ...
    entity->config =
-    v4l2_subdev_alloc_pad_config
+    v4l2_subdev_alloc_state
    (&entity->subdev);
    ...
}

@@
symbol entity;
@@
vsp1_entity_destroy(...)
{
    ...
-   v4l2_subdev_free_pad_config
+   v4l2_subdev_free_state
    (entity->config);
    ...
}

@exists@
identifier func =~ "(^vsp1.*)|(hsit_set_format)|(sru_enum_frame_size)|(sru_set_format)|(uif_get_selection)|(uif_set_selection)|(uds_enum_frame_size)|(uds_set_format)|(brx_set_format)|(brx_get_selection)|(histo_get_selection)|(histo_set_selection)|(brx_set_selection)";
symbol config;
@@
func(...) {
    ...
-    struct v4l2_subdev_pad_config *config;
+    struct v4l2_subdev_state *config;
    ...
}

// End of vsp1 specific

// Start of rcar specific

@@
identifier sd;
identifier pad_cfg;
@@
 rvin_try_format(...)
 {
    ...
-   struct v4l2_subdev_pad_config *pad_cfg;
+   struct v4l2_subdev_state *sd_state;
    ...
-   pad_cfg = v4l2_subdev_alloc_pad_config(sd);
+   sd_state = v4l2_subdev_alloc_state(sd);
    <...
-   pad_cfg
+   sd_state
    ...>
-   v4l2_subdev_free_pad_config(pad_cfg);
+   v4l2_subdev_free_state(sd_state);
    ...
 }

// End of rcar specific

// Start of rockchip specific

@@
identifier func =~ "(rkisp1_rsz_get_pad_fmt)|(rkisp1_rsz_get_pad_crop)|(rkisp1_rsz_register)";
symbol rsz;
symbol pad_cfg;
@@

 func(...)
 {
+   struct v4l2_subdev_state state = { .pads = rsz->pad_cfg };
    ...
-   rsz->pad_cfg
+   &state
    ...
 }

@@
identifier func =~ "(rkisp1_isp_get_pad_fmt)|(rkisp1_isp_get_pad_crop)";
symbol isp;
symbol pad_cfg;
@@

 func(...)
 {
+   struct v4l2_subdev_state state = { .pads = isp->pad_cfg };
    ...
-   isp->pad_cfg
+   &state
    ...
 }

@@
symbol rkisp1;
symbol isp;
symbol pad_cfg;
@@

 rkisp1_isp_register(...)
 {
+   struct v4l2_subdev_state state = { .pads = rkisp1->isp.pad_cfg };
    ...
-   rkisp1->isp.pad_cfg
+   &state
    ...
 }

// End of rockchip specific

// Start of tegra-video specific

@@
identifier sd;
identifier pad_cfg;
@@
 __tegra_channel_try_format(...)
 {
    ...
-   struct v4l2_subdev_pad_config *pad_cfg;
+   struct v4l2_subdev_state *sd_state;
    ...
-   pad_cfg = v4l2_subdev_alloc_pad_config(sd);
+   sd_state = v4l2_subdev_alloc_state(sd);
    <...
-   pad_cfg
+   sd_state
    ...>
-   v4l2_subdev_free_pad_config(pad_cfg);
+   v4l2_subdev_free_state(sd_state);
    ...
 }

@@
identifier sd_state;
@@
 __tegra_channel_try_format(...)
 {
    ...
    struct v4l2_subdev_state *sd_state;
    <...
-   sd_state->try_crop
+   sd_state->pads->try_crop
    ...>
 }

// End of tegra-video specific

// </smpl>

Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/adv7170.c                   |   6 +-
 drivers/media/i2c/adv7175.c                   |   6 +-
 drivers/media/i2c/adv7180.c                   |  18 +--
 drivers/media/i2c/adv7183.c                   |   8 +-
 drivers/media/i2c/adv748x/adv748x-afe.c       |  13 +-
 drivers/media/i2c/adv748x/adv748x-csi2.c      |  14 +-
 drivers/media/i2c/adv748x/adv748x-hdmi.c      |  13 +-
 drivers/media/i2c/adv7511-v4l2.c              |  10 +-
 drivers/media/i2c/adv7604.c                   |  12 +-
 drivers/media/i2c/adv7842.c                   |  12 +-
 drivers/media/i2c/ak881x.c                    |   6 +-
 drivers/media/i2c/ccs/ccs-core.c              |  84 ++++++-----
 drivers/media/i2c/cx25840/cx25840-core.c      |   2 +-
 drivers/media/i2c/et8ek8/et8ek8_driver.c      |  23 +--
 drivers/media/i2c/hi556.c                     |  15 +-
 drivers/media/i2c/imx208.c                    |  19 +--
 drivers/media/i2c/imx214.c                    |  37 ++---
 drivers/media/i2c/imx219.c                    |  30 ++--
 drivers/media/i2c/imx258.c                    |  19 +--
 drivers/media/i2c/imx274.c                    |  38 ++---
 drivers/media/i2c/imx290.c                    |  20 +--
 drivers/media/i2c/imx319.c                    |  18 +--
 drivers/media/i2c/imx334.c                    |  28 ++--
 drivers/media/i2c/imx355.c                    |  18 +--
 drivers/media/i2c/m5mols/m5mols_core.c        |  21 ++-
 drivers/media/i2c/max9286.c                   |  17 ++-
 drivers/media/i2c/ml86v7667.c                 |   4 +-
 drivers/media/i2c/mt9m001.c                   |  18 +--
 drivers/media/i2c/mt9m032.c                   |  38 ++---
 drivers/media/i2c/mt9m111.c                   |  18 +--
 drivers/media/i2c/mt9p031.c                   |  45 +++---
 drivers/media/i2c/mt9t001.c                   |  44 +++---
 drivers/media/i2c/mt9t112.c                   |  14 +-
 drivers/media/i2c/mt9v011.c                   |   6 +-
 drivers/media/i2c/mt9v032.c                   |  44 +++---
 drivers/media/i2c/mt9v111.c                   |  25 +--
 drivers/media/i2c/noon010pc30.c               |  19 ++-
 drivers/media/i2c/ov02a10.c                   |  17 ++-
 drivers/media/i2c/ov13858.c                   |  18 +--
 drivers/media/i2c/ov2640.c                    |  16 +-
 drivers/media/i2c/ov2659.c                    |  14 +-
 drivers/media/i2c/ov2680.c                    |  23 +--
 drivers/media/i2c/ov2685.c                    |  10 +-
 drivers/media/i2c/ov2740.c                    |  15 +-
 drivers/media/i2c/ov5640.c                    |  14 +-
 drivers/media/i2c/ov5645.c                    |  38 ++---
 drivers/media/i2c/ov5647.c                    |  26 ++--
 drivers/media/i2c/ov5648.c                    |  14 +-
 drivers/media/i2c/ov5670.c                    |  19 +--
 drivers/media/i2c/ov5675.c                    |  15 +-
 drivers/media/i2c/ov5695.c                    |  15 +-
 drivers/media/i2c/ov6650.c                    |  28 ++--
 drivers/media/i2c/ov7251.c                    |  39 ++---
 drivers/media/i2c/ov7670.c                    |  17 ++-
 drivers/media/i2c/ov772x.c                    |  12 +-
 drivers/media/i2c/ov7740.c                    |  17 ++-
 drivers/media/i2c/ov8856.c                    |  15 +-
 drivers/media/i2c/ov8865.c                    |  14 +-
 drivers/media/i2c/ov9640.c                    |   8 +-
 drivers/media/i2c/ov9650.c                    |  17 ++-
 drivers/media/i2c/ov9734.c                    |  15 +-
 drivers/media/i2c/rdacm20.c                   |   4 +-
 drivers/media/i2c/rdacm21.c                   |   4 +-
 drivers/media/i2c/rj54n1cb0c.c                |  12 +-
 drivers/media/i2c/s5c73m3/s5c73m3-core.c      |  55 +++----
 drivers/media/i2c/s5k4ecgx.c                  |  22 +--
 drivers/media/i2c/s5k5baf.c                   |  49 +++---
 drivers/media/i2c/s5k6a3.c                    |  19 ++-
 drivers/media/i2c/s5k6aa.c                    |  39 ++---
 drivers/media/i2c/saa6752hs.c                 |   6 +-
 drivers/media/i2c/saa7115.c                   |   2 +-
 drivers/media/i2c/saa717x.c                   |   2 +-
 drivers/media/i2c/sr030pc30.c                 |   8 +-
 drivers/media/i2c/st-mipid02.c                |  21 +--
 drivers/media/i2c/tc358743.c                  |   8 +-
 drivers/media/i2c/tda1997x.c                  |  14 +-
 drivers/media/i2c/tvp514x.c                   |  12 +-
 drivers/media/i2c/tvp5150.c                   |  20 +--
 drivers/media/i2c/tvp7002.c                   |  11 +-
 drivers/media/i2c/tw9910.c                    |  10 +-
 drivers/media/i2c/vs6624.c                    |   8 +-
 drivers/media/pci/cx18/cx18-av-core.c         |   2 +-
 drivers/media/pci/intel/ipu3/ipu3-cio2-main.c |  17 ++-
 drivers/media/pci/saa7134/saa7134-empress.c   |   5 +-
 drivers/media/platform/atmel/atmel-isc-base.c |  19 ++-
 drivers/media/platform/atmel/atmel-isi.c      |  19 ++-
 drivers/media/platform/cadence/cdns-csi2tx.c  |  14 +-
 .../media/platform/exynos4-is/fimc-capture.c  |  22 +--
 drivers/media/platform/exynos4-is/fimc-isp.c  |  37 +++--
 drivers/media/platform/exynos4-is/fimc-lite.c |  39 ++---
 drivers/media/platform/exynos4-is/mipi-csis.c |  17 ++-
 .../media/platform/marvell-ccic/mcam-core.c   |   5 +-
 drivers/media/platform/omap3isp/ispccdc.c     |  85 ++++++-----
 drivers/media/platform/omap3isp/ispccp2.c     |  49 +++---
 drivers/media/platform/omap3isp/ispcsi2.c     |  41 ++---
 drivers/media/platform/omap3isp/isppreview.c  |  69 +++++----
 drivers/media/platform/omap3isp/ispresizer.c  |  70 +++++----
 drivers/media/platform/pxa_camera.c           |   5 +-
 .../media/platform/qcom/camss/camss-csid.c    |  35 ++---
 .../media/platform/qcom/camss/camss-csiphy.c  |  40 ++---
 .../media/platform/qcom/camss/camss-ispif.c   |  36 ++---
 drivers/media/platform/qcom/camss/camss-vfe.c |  84 ++++++-----
 drivers/media/platform/rcar-vin/rcar-csi2.c   |   8 +-
 drivers/media/platform/rcar-vin/rcar-v4l2.c   |  10 +-
 drivers/media/platform/renesas-ceu.c          |   7 +-
 .../platform/rockchip/rkisp1/rkisp1-isp.c     | 112 ++++++++------
 .../platform/rockchip/rkisp1/rkisp1-resizer.c |  95 +++++++-----
 .../media/platform/s3c-camif/camif-capture.c  |  18 +--
 drivers/media/platform/stm32/stm32-dcmi.c     |  14 +-
 .../platform/sunxi/sun4i-csi/sun4i_v4l2.c     |  16 +-
 drivers/media/platform/ti-vpe/cal-camerarx.c  |  35 +++--
 drivers/media/platform/via-camera.c           |   5 +-
 drivers/media/platform/video-mux.c            |  22 +--
 drivers/media/platform/vsp1/vsp1_brx.c        |  34 +++--
 drivers/media/platform/vsp1/vsp1_clu.c        |  13 +-
 drivers/media/platform/vsp1/vsp1_entity.c     |  59 ++++----
 drivers/media/platform/vsp1/vsp1_entity.h     |  20 +--
 drivers/media/platform/vsp1/vsp1_histo.c      |  51 ++++---
 drivers/media/platform/vsp1/vsp1_hsit.c       |  14 +-
 drivers/media/platform/vsp1/vsp1_lif.c        |  13 +-
 drivers/media/platform/vsp1/vsp1_lut.c        |  13 +-
 drivers/media/platform/vsp1/vsp1_rwpf.c       |  32 ++--
 drivers/media/platform/vsp1/vsp1_rwpf.h       |   2 +-
 drivers/media/platform/vsp1/vsp1_sru.c        |  22 +--
 drivers/media/platform/vsp1/vsp1_uds.c        |  22 +--
 drivers/media/platform/vsp1/vsp1_uif.c        |  27 ++--
 .../media/platform/xilinx/xilinx-csi2rxss.c   |  26 ++--
 drivers/media/platform/xilinx/xilinx-tpg.c    |  25 +--
 drivers/media/platform/xilinx/xilinx-vip.c    |  12 +-
 drivers/media/platform/xilinx/xilinx-vip.h    |   4 +-
 .../media/test-drivers/vimc/vimc-debayer.c    |  20 +--
 drivers/media/test-drivers/vimc/vimc-scaler.c |  36 ++---
 drivers/media/test-drivers/vimc/vimc-sensor.c |  16 +-
 drivers/media/usb/go7007/s2250-board.c        |   2 +-
 drivers/media/v4l2-core/v4l2-subdev.c         | 142 ++++++++++--------
 .../media/atomisp/i2c/atomisp-gc0310.c        |  10 +-
 .../media/atomisp/i2c/atomisp-gc2235.c        |  10 +-
 .../media/atomisp/i2c/atomisp-mt9m114.c       |  12 +-
 .../media/atomisp/i2c/atomisp-ov2680.c        |  10 +-
 .../media/atomisp/i2c/atomisp-ov2722.c        |  10 +-
 .../media/atomisp/i2c/ov5693/atomisp-ov5693.c |  10 +-
 .../staging/media/atomisp/pci/atomisp_cmd.c   |  33 ++--
 .../staging/media/atomisp/pci/atomisp_csi2.c  |  28 ++--
 .../staging/media/atomisp/pci/atomisp_csi2.h  |   2 +-
 .../staging/media/atomisp/pci/atomisp_file.c  |  14 +-
 .../staging/media/atomisp/pci/atomisp_fops.c  |   6 +-
 .../media/atomisp/pci/atomisp_subdev.c        |  64 ++++----
 .../media/atomisp/pci/atomisp_subdev.h        |   9 +-
 .../staging/media/atomisp/pci/atomisp_tpg.c   |  12 +-
 drivers/staging/media/imx/imx-ic-prp.c        |  19 +--
 drivers/staging/media/imx/imx-ic-prpencvf.c   |  31 ++--
 drivers/staging/media/imx/imx-media-csi.c     |  82 +++++-----
 drivers/staging/media/imx/imx-media-utils.c   |   4 +-
 drivers/staging/media/imx/imx-media-vdic.c    |  24 +--
 drivers/staging/media/imx/imx-media.h         |   2 +-
 drivers/staging/media/imx/imx6-mipi-csi2.c    |  12 +-
 drivers/staging/media/imx/imx7-media-csi.c    |  33 ++--
 drivers/staging/media/imx/imx7-mipi-csis.c    |  34 +++--
 drivers/staging/media/ipu3/ipu3-v4l2.c        |  26 ++--
 drivers/staging/media/omap4iss/iss_csi2.c     |  37 ++---
 drivers/staging/media/omap4iss/iss_ipipe.c    |  37 ++---
 drivers/staging/media/omap4iss/iss_ipipeif.c  |  47 +++---
 drivers/staging/media/omap4iss/iss_resizer.c  |  39 ++---
 drivers/staging/media/tegra-video/csi.c       |  10 +-
 drivers/staging/media/tegra-video/vi.c        |  24 +--
 include/media/v4l2-subdev.h                   |  74 +++++----
 166 files changed, 2163 insertions(+), 1803 deletions(-)

diff --git a/drivers/media/i2c/adv7170.c b/drivers/media/i2c/adv7170.c
index e4e8fda51ad8a..714e31f993e1c 100644
--- a/drivers/media/i2c/adv7170.c
+++ b/drivers/media/i2c/adv7170.c
@@ -250,7 +250,7 @@ static int adv7170_s_routing(struct v4l2_subdev *sd,
 }
 
 static int adv7170_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(adv7170_codes))
@@ -261,7 +261,7 @@ static int adv7170_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int adv7170_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -284,7 +284,7 @@ static int adv7170_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int adv7170_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
diff --git a/drivers/media/i2c/adv7175.c b/drivers/media/i2c/adv7175.c
index 0cdd8e0331970..1813f67f0fe1d 100644
--- a/drivers/media/i2c/adv7175.c
+++ b/drivers/media/i2c/adv7175.c
@@ -288,7 +288,7 @@ static int adv7175_s_routing(struct v4l2_subdev *sd,
 }
 
 static int adv7175_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(adv7175_codes))
@@ -299,7 +299,7 @@ static int adv7175_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int adv7175_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -322,7 +322,7 @@ static int adv7175_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int adv7175_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
diff --git a/drivers/media/i2c/adv7180.c b/drivers/media/i2c/adv7180.c
index 44bb6fe856440..fa5bc55bc9445 100644
--- a/drivers/media/i2c/adv7180.c
+++ b/drivers/media/i2c/adv7180.c
@@ -633,7 +633,7 @@ static void adv7180_exit_controls(struct adv7180_state *state)
 }
 
 static int adv7180_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index != 0)
@@ -699,13 +699,13 @@ static int adv7180_set_field_mode(struct adv7180_state *state)
 }
 
 static int adv7180_get_pad_format(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_format *format)
 {
 	struct adv7180_state *state = to_state(sd);
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		format->format = *v4l2_subdev_get_try_format(sd, cfg, 0);
+		format->format = *v4l2_subdev_get_try_format(sd, sd_state, 0);
 	} else {
 		adv7180_mbus_fmt(sd, &format->format);
 		format->format.field = state->field;
@@ -715,7 +715,7 @@ static int adv7180_get_pad_format(struct v4l2_subdev *sd,
 }
 
 static int adv7180_set_pad_format(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_format *format)
 {
 	struct adv7180_state *state = to_state(sd);
@@ -742,7 +742,7 @@ static int adv7180_set_pad_format(struct v4l2_subdev *sd,
 			adv7180_set_power(state, true);
 		}
 	} else {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, 0);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		*framefmt = format->format;
 	}
 
@@ -750,14 +750,14 @@ static int adv7180_set_pad_format(struct v4l2_subdev *sd,
 }
 
 static int adv7180_init_cfg(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg)
+			    struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_subdev_format fmt = {
-		.which = cfg ? V4L2_SUBDEV_FORMAT_TRY
-			: V4L2_SUBDEV_FORMAT_ACTIVE,
+		.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY
+		: V4L2_SUBDEV_FORMAT_ACTIVE,
 	};
 
-	return adv7180_set_pad_format(sd, cfg, &fmt);
+	return adv7180_set_pad_format(sd, sd_state, &fmt);
 }
 
 static int adv7180_get_mbus_config(struct v4l2_subdev *sd,
diff --git a/drivers/media/i2c/adv7183.c b/drivers/media/i2c/adv7183.c
index 8bcd632c081aa..92cafdea3f1f1 100644
--- a/drivers/media/i2c/adv7183.c
+++ b/drivers/media/i2c/adv7183.c
@@ -409,7 +409,7 @@ static int adv7183_g_input_status(struct v4l2_subdev *sd, u32 *status)
 }
 
 static int adv7183_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index > 0)
@@ -420,7 +420,7 @@ static int adv7183_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int adv7183_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct adv7183 *decoder = to_adv7183(sd);
@@ -443,12 +443,12 @@ static int adv7183_set_fmt(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		decoder->fmt = *fmt;
 	else
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 	return 0;
 }
 
 static int adv7183_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct adv7183 *decoder = to_adv7183(sd);
diff --git a/drivers/media/i2c/adv748x/adv748x-afe.c b/drivers/media/i2c/adv748x/adv748x-afe.c
index 4052cf67bf16c..02eabe10ab970 100644
--- a/drivers/media/i2c/adv748x/adv748x-afe.c
+++ b/drivers/media/i2c/adv748x/adv748x-afe.c
@@ -331,7 +331,7 @@ static int adv748x_afe_propagate_pixelrate(struct adv748x_afe *afe)
 }
 
 static int adv748x_afe_enum_mbus_code(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index != 0)
@@ -343,7 +343,7 @@ static int adv748x_afe_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int adv748x_afe_get_format(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_format *sdformat)
 {
 	struct adv748x_afe *afe = adv748x_sd_to_afe(sd);
@@ -354,7 +354,8 @@ static int adv748x_afe_get_format(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (sdformat->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mbusformat = v4l2_subdev_get_try_format(sd, cfg, sdformat->pad);
+		mbusformat = v4l2_subdev_get_try_format(sd, sd_state,
+							sdformat->pad);
 		sdformat->format = *mbusformat;
 	} else {
 		adv748x_afe_fill_format(afe, &sdformat->format);
@@ -365,7 +366,7 @@ static int adv748x_afe_get_format(struct v4l2_subdev *sd,
 }
 
 static int adv748x_afe_set_format(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_format *sdformat)
 {
 	struct v4l2_mbus_framefmt *mbusformat;
@@ -375,9 +376,9 @@ static int adv748x_afe_set_format(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (sdformat->which == V4L2_SUBDEV_FORMAT_ACTIVE)
-		return adv748x_afe_get_format(sd, cfg, sdformat);
+		return adv748x_afe_get_format(sd, sd_state, sdformat);
 
-	mbusformat = v4l2_subdev_get_try_format(sd, cfg, sdformat->pad);
+	mbusformat = v4l2_subdev_get_try_format(sd, sd_state, sdformat->pad);
 	*mbusformat = sdformat->format;
 
 	return 0;
diff --git a/drivers/media/i2c/adv748x/adv748x-csi2.c b/drivers/media/i2c/adv748x/adv748x-csi2.c
index fa9278a08fdee..589e9644fcdcd 100644
--- a/drivers/media/i2c/adv748x/adv748x-csi2.c
+++ b/drivers/media/i2c/adv748x/adv748x-csi2.c
@@ -141,26 +141,26 @@ static const struct v4l2_subdev_video_ops adv748x_csi2_video_ops = {
 
 static struct v4l2_mbus_framefmt *
 adv748x_csi2_get_pad_format(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    unsigned int pad, u32 which)
 {
 	struct adv748x_csi2 *tx = adv748x_sd_to_csi2(sd);
 
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(sd, cfg, pad);
+		return v4l2_subdev_get_try_format(sd, sd_state, pad);
 
 	return &tx->format;
 }
 
 static int adv748x_csi2_get_format(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *sdformat)
 {
 	struct adv748x_csi2 *tx = adv748x_sd_to_csi2(sd);
 	struct adv748x_state *state = tx->state;
 	struct v4l2_mbus_framefmt *mbusformat;
 
-	mbusformat = adv748x_csi2_get_pad_format(sd, cfg, sdformat->pad,
+	mbusformat = adv748x_csi2_get_pad_format(sd, sd_state, sdformat->pad,
 						 sdformat->which);
 	if (!mbusformat)
 		return -EINVAL;
@@ -175,7 +175,7 @@ static int adv748x_csi2_get_format(struct v4l2_subdev *sd,
 }
 
 static int adv748x_csi2_set_format(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *sdformat)
 {
 	struct adv748x_csi2 *tx = adv748x_sd_to_csi2(sd);
@@ -183,7 +183,7 @@ static int adv748x_csi2_set_format(struct v4l2_subdev *sd,
 	struct v4l2_mbus_framefmt *mbusformat;
 	int ret = 0;
 
-	mbusformat = adv748x_csi2_get_pad_format(sd, cfg, sdformat->pad,
+	mbusformat = adv748x_csi2_get_pad_format(sd, sd_state, sdformat->pad,
 						 sdformat->which);
 	if (!mbusformat)
 		return -EINVAL;
@@ -193,7 +193,7 @@ static int adv748x_csi2_set_format(struct v4l2_subdev *sd,
 	if (sdformat->pad == ADV748X_CSI2_SOURCE) {
 		const struct v4l2_mbus_framefmt *sink_fmt;
 
-		sink_fmt = adv748x_csi2_get_pad_format(sd, cfg,
+		sink_fmt = adv748x_csi2_get_pad_format(sd, sd_state,
 						       ADV748X_CSI2_SINK,
 						       sdformat->which);
 
diff --git a/drivers/media/i2c/adv748x/adv748x-hdmi.c b/drivers/media/i2c/adv748x/adv748x-hdmi.c
index c557f8fdf11a8..52fa7bd756605 100644
--- a/drivers/media/i2c/adv748x/adv748x-hdmi.c
+++ b/drivers/media/i2c/adv748x/adv748x-hdmi.c
@@ -409,7 +409,7 @@ static int adv748x_hdmi_propagate_pixelrate(struct adv748x_hdmi *hdmi)
 }
 
 static int adv748x_hdmi_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index != 0)
@@ -421,7 +421,7 @@ static int adv748x_hdmi_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int adv748x_hdmi_get_format(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *sdformat)
 {
 	struct adv748x_hdmi *hdmi = adv748x_sd_to_hdmi(sd);
@@ -431,7 +431,8 @@ static int adv748x_hdmi_get_format(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (sdformat->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mbusformat = v4l2_subdev_get_try_format(sd, cfg, sdformat->pad);
+		mbusformat = v4l2_subdev_get_try_format(sd, sd_state,
+							sdformat->pad);
 		sdformat->format = *mbusformat;
 	} else {
 		adv748x_hdmi_fill_format(hdmi, &sdformat->format);
@@ -442,7 +443,7 @@ static int adv748x_hdmi_get_format(struct v4l2_subdev *sd,
 }
 
 static int adv748x_hdmi_set_format(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *sdformat)
 {
 	struct v4l2_mbus_framefmt *mbusformat;
@@ -451,9 +452,9 @@ static int adv748x_hdmi_set_format(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (sdformat->which == V4L2_SUBDEV_FORMAT_ACTIVE)
-		return adv748x_hdmi_get_format(sd, cfg, sdformat);
+		return adv748x_hdmi_get_format(sd, sd_state, sdformat);
 
-	mbusformat = v4l2_subdev_get_try_format(sd, cfg, sdformat->pad);
+	mbusformat = v4l2_subdev_get_try_format(sd, sd_state, sdformat->pad);
 	*mbusformat = sdformat->format;
 
 	return 0;
diff --git a/drivers/media/i2c/adv7511-v4l2.c b/drivers/media/i2c/adv7511-v4l2.c
index 5fc6c06edda1a..41f4e749a859c 100644
--- a/drivers/media/i2c/adv7511-v4l2.c
+++ b/drivers/media/i2c/adv7511-v4l2.c
@@ -1216,7 +1216,7 @@ static int adv7511_get_edid(struct v4l2_subdev *sd, struct v4l2_edid *edid)
 }
 
 static int adv7511_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad != 0)
@@ -1247,7 +1247,7 @@ static void adv7511_fill_format(struct adv7511_state *state,
 }
 
 static int adv7511_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct adv7511_state *state = get_adv7511_state(sd);
@@ -1261,7 +1261,7 @@ static int adv7511_get_fmt(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *fmt;
 
-		fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 		format->format.code = fmt->code;
 		format->format.colorspace = fmt->colorspace;
 		format->format.ycbcr_enc = fmt->ycbcr_enc;
@@ -1279,7 +1279,7 @@ static int adv7511_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int adv7511_set_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct adv7511_state *state = get_adv7511_state(sd);
@@ -1316,7 +1316,7 @@ static int adv7511_set_fmt(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *fmt;
 
-		fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 		fmt->code = format->format.code;
 		fmt->colorspace = format->format.colorspace;
 		fmt->ycbcr_enc = format->format.ycbcr_enc;
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index 3049aa2fd0f01..122e1fdccd962 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -1833,7 +1833,7 @@ static int adv76xx_s_routing(struct v4l2_subdev *sd,
 }
 
 static int adv76xx_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct adv76xx_state *state = to_state(sd);
@@ -1913,7 +1913,7 @@ static void adv76xx_setup_format(struct adv76xx_state *state)
 }
 
 static int adv76xx_get_format(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct adv76xx_state *state = to_state(sd);
@@ -1926,7 +1926,7 @@ static int adv76xx_get_format(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *fmt;
 
-		fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 		format->format.code = fmt->code;
 	} else {
 		format->format.code = state->format->code;
@@ -1936,7 +1936,7 @@ static int adv76xx_get_format(struct v4l2_subdev *sd,
 }
 
 static int adv76xx_get_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct adv76xx_state *state = to_state(sd);
@@ -1956,7 +1956,7 @@ static int adv76xx_get_selection(struct v4l2_subdev *sd,
 }
 
 static int adv76xx_set_format(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct adv76xx_state *state = to_state(sd);
@@ -1975,7 +1975,7 @@ static int adv76xx_set_format(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *fmt;
 
-		fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 		fmt->code = format->format.code;
 	} else {
 		state->format = info;
diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c
index 78e61fe6f2f0c..263713963a00f 100644
--- a/drivers/media/i2c/adv7842.c
+++ b/drivers/media/i2c/adv7842.c
@@ -1993,7 +1993,7 @@ static int adv7842_s_routing(struct v4l2_subdev *sd,
 }
 
 static int adv7842_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(adv7842_formats))
@@ -2069,7 +2069,7 @@ static void adv7842_setup_format(struct adv7842_state *state)
 }
 
 static int adv7842_get_format(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct adv7842_state *state = to_state(sd);
@@ -2097,7 +2097,7 @@ static int adv7842_get_format(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *fmt;
 
-		fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 		format->format.code = fmt->code;
 	} else {
 		format->format.code = state->format->code;
@@ -2107,7 +2107,7 @@ static int adv7842_get_format(struct v4l2_subdev *sd,
 }
 
 static int adv7842_set_format(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct adv7842_state *state = to_state(sd);
@@ -2117,7 +2117,7 @@ static int adv7842_set_format(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (state->mode == ADV7842_MODE_SDP)
-		return adv7842_get_format(sd, cfg, format);
+		return adv7842_get_format(sd, sd_state, format);
 
 	info = adv7842_format_info(state, format->format.code);
 	if (info == NULL)
@@ -2129,7 +2129,7 @@ static int adv7842_set_format(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *fmt;
 
-		fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 		fmt->code = format->format.code;
 	} else {
 		state->format = info;
diff --git a/drivers/media/i2c/ak881x.c b/drivers/media/i2c/ak881x.c
index 1adaf470c75a2..dc569d5a4d9d9 100644
--- a/drivers/media/i2c/ak881x.c
+++ b/drivers/media/i2c/ak881x.c
@@ -91,7 +91,7 @@ static int ak881x_s_register(struct v4l2_subdev *sd,
 #endif
 
 static int ak881x_fill_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -111,7 +111,7 @@ static int ak881x_fill_fmt(struct v4l2_subdev *sd,
 }
 
 static int ak881x_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index)
@@ -122,7 +122,7 @@ static int ak881x_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ak881x_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c
index a349189a38dbb..a9403a227c6b3 100644
--- a/drivers/media/i2c/ccs/ccs-core.c
+++ b/drivers/media/i2c/ccs/ccs-core.c
@@ -1944,7 +1944,7 @@ static int ccs_set_stream(struct v4l2_subdev *subdev, int enable)
 }
 
 static int ccs_enum_mbus_code(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(subdev);
@@ -1997,13 +1997,13 @@ static u32 __ccs_get_mbus_code(struct v4l2_subdev *subdev, unsigned int pad)
 }
 
 static int __ccs_get_format(struct v4l2_subdev *subdev,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct ccs_subdev *ssd = to_ccs_subdev(subdev);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		fmt->format = *v4l2_subdev_get_try_format(subdev, cfg,
+		fmt->format = *v4l2_subdev_get_try_format(subdev, sd_state,
 							  fmt->pad);
 	} else {
 		struct v4l2_rect *r;
@@ -2023,21 +2023,21 @@ static int __ccs_get_format(struct v4l2_subdev *subdev,
 }
 
 static int ccs_get_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct ccs_sensor *sensor = to_ccs_sensor(subdev);
 	int rval;
 
 	mutex_lock(&sensor->mutex);
-	rval = __ccs_get_format(subdev, cfg, fmt);
+	rval = __ccs_get_format(subdev, sd_state, fmt);
 	mutex_unlock(&sensor->mutex);
 
 	return rval;
 }
 
 static void ccs_get_crop_compose(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_rect **crops,
 				 struct v4l2_rect **comps, int which)
 {
@@ -2054,24 +2054,25 @@ static void ccs_get_crop_compose(struct v4l2_subdev *subdev,
 		if (crops) {
 			for (i = 0; i < subdev->entity.num_pads; i++)
 				crops[i] = v4l2_subdev_get_try_crop(subdev,
-								    cfg, i);
+								    sd_state,
+								    i);
 		}
 		if (comps)
-			*comps = v4l2_subdev_get_try_compose(subdev, cfg,
+			*comps = v4l2_subdev_get_try_compose(subdev, sd_state,
 							     CCS_PAD_SINK);
 	}
 }
 
 /* Changes require propagation only on sink pad. */
 static void ccs_propagate(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg, int which,
+			  struct v4l2_subdev_state *sd_state, int which,
 			  int target)
 {
 	struct ccs_sensor *sensor = to_ccs_sensor(subdev);
 	struct ccs_subdev *ssd = to_ccs_subdev(subdev);
 	struct v4l2_rect *comp, *crops[CCS_PADS];
 
-	ccs_get_crop_compose(subdev, cfg, crops, &comp, which);
+	ccs_get_crop_compose(subdev, sd_state, crops, &comp, which);
 
 	switch (target) {
 	case V4L2_SEL_TGT_CROP:
@@ -2111,7 +2112,7 @@ static const struct ccs_csi_data_format
 }
 
 static int ccs_set_format_source(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct ccs_sensor *sensor = to_ccs_sensor(subdev);
@@ -2122,7 +2123,7 @@ static int ccs_set_format_source(struct v4l2_subdev *subdev,
 	unsigned int i;
 	int rval;
 
-	rval = __ccs_get_format(subdev, cfg, fmt);
+	rval = __ccs_get_format(subdev, sd_state, fmt);
 	if (rval)
 		return rval;
 
@@ -2164,7 +2165,7 @@ static int ccs_set_format_source(struct v4l2_subdev *subdev,
 }
 
 static int ccs_set_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct ccs_sensor *sensor = to_ccs_sensor(subdev);
@@ -2176,7 +2177,7 @@ static int ccs_set_format(struct v4l2_subdev *subdev,
 	if (fmt->pad == ssd->source_pad) {
 		int rval;
 
-		rval = ccs_set_format_source(subdev, cfg, fmt);
+		rval = ccs_set_format_source(subdev, sd_state, fmt);
 
 		mutex_unlock(&sensor->mutex);
 
@@ -2198,7 +2199,7 @@ static int ccs_set_format(struct v4l2_subdev *subdev,
 		      CCS_LIM(sensor, MIN_Y_OUTPUT_SIZE),
 		      CCS_LIM(sensor, MAX_Y_OUTPUT_SIZE));
 
-	ccs_get_crop_compose(subdev, cfg, crops, NULL, fmt->which);
+	ccs_get_crop_compose(subdev, sd_state, crops, NULL, fmt->which);
 
 	crops[ssd->sink_pad]->left = 0;
 	crops[ssd->sink_pad]->top = 0;
@@ -2206,7 +2207,7 @@ static int ccs_set_format(struct v4l2_subdev *subdev,
 	crops[ssd->sink_pad]->height = fmt->format.height;
 	if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		ssd->sink_fmt = *crops[ssd->sink_pad];
-	ccs_propagate(subdev, cfg, fmt->which, V4L2_SEL_TGT_CROP);
+	ccs_propagate(subdev, sd_state, fmt->which, V4L2_SEL_TGT_CROP);
 
 	mutex_unlock(&sensor->mutex);
 
@@ -2258,7 +2259,7 @@ static int scaling_goodness(struct v4l2_subdev *subdev, int w, int ask_w,
 }
 
 static void ccs_set_compose_binner(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_selection *sel,
 				   struct v4l2_rect **crops,
 				   struct v4l2_rect *comp)
@@ -2306,7 +2307,7 @@ static void ccs_set_compose_binner(struct v4l2_subdev *subdev,
  * result.
  */
 static void ccs_set_compose_scaler(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_selection *sel,
 				   struct v4l2_rect **crops,
 				   struct v4l2_rect *comp)
@@ -2421,25 +2422,25 @@ static void ccs_set_compose_scaler(struct v4l2_subdev *subdev,
 }
 /* We're only called on source pads. This function sets scaling. */
 static int ccs_set_compose(struct v4l2_subdev *subdev,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_selection *sel)
 {
 	struct ccs_sensor *sensor = to_ccs_sensor(subdev);
 	struct ccs_subdev *ssd = to_ccs_subdev(subdev);
 	struct v4l2_rect *comp, *crops[CCS_PADS];
 
-	ccs_get_crop_compose(subdev, cfg, crops, &comp, sel->which);
+	ccs_get_crop_compose(subdev, sd_state, crops, &comp, sel->which);
 
 	sel->r.top = 0;
 	sel->r.left = 0;
 
 	if (ssd == sensor->binner)
-		ccs_set_compose_binner(subdev, cfg, sel, crops, comp);
+		ccs_set_compose_binner(subdev, sd_state, sel, crops, comp);
 	else
-		ccs_set_compose_scaler(subdev, cfg, sel, crops, comp);
+		ccs_set_compose_scaler(subdev, sd_state, sel, crops, comp);
 
 	*comp = sel->r;
-	ccs_propagate(subdev, cfg, sel->which, V4L2_SEL_TGT_COMPOSE);
+	ccs_propagate(subdev, sd_state, sel->which, V4L2_SEL_TGT_COMPOSE);
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		return ccs_pll_blanking_update(sensor);
@@ -2486,7 +2487,7 @@ static int __ccs_sel_supported(struct v4l2_subdev *subdev,
 }
 
 static int ccs_set_crop(struct v4l2_subdev *subdev,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_selection *sel)
 {
 	struct ccs_sensor *sensor = to_ccs_sensor(subdev);
@@ -2494,7 +2495,7 @@ static int ccs_set_crop(struct v4l2_subdev *subdev,
 	struct v4l2_rect *src_size, *crops[CCS_PADS];
 	struct v4l2_rect _r;
 
-	ccs_get_crop_compose(subdev, cfg, crops, NULL, sel->which);
+	ccs_get_crop_compose(subdev, sd_state, crops, NULL, sel->which);
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_ACTIVE) {
 		if (sel->pad == ssd->sink_pad)
@@ -2505,16 +2506,18 @@ static int ccs_set_crop(struct v4l2_subdev *subdev,
 		if (sel->pad == ssd->sink_pad) {
 			_r.left = 0;
 			_r.top = 0;
-			_r.width = v4l2_subdev_get_try_format(subdev, cfg,
+			_r.width = v4l2_subdev_get_try_format(subdev,
+							      sd_state,
 							      sel->pad)
 				->width;
-			_r.height = v4l2_subdev_get_try_format(subdev, cfg,
+			_r.height = v4l2_subdev_get_try_format(subdev,
+							       sd_state,
 							       sel->pad)
 				->height;
 			src_size = &_r;
 		} else {
 			src_size = v4l2_subdev_get_try_compose(
-				subdev, cfg, ssd->sink_pad);
+				subdev, sd_state, ssd->sink_pad);
 		}
 	}
 
@@ -2532,7 +2535,7 @@ static int ccs_set_crop(struct v4l2_subdev *subdev,
 	*crops[sel->pad] = sel->r;
 
 	if (ssd != sensor->pixel_array && sel->pad == CCS_PAD_SINK)
-		ccs_propagate(subdev, cfg, sel->which, V4L2_SEL_TGT_CROP);
+		ccs_propagate(subdev, sd_state, sel->which, V4L2_SEL_TGT_CROP);
 
 	return 0;
 }
@@ -2546,7 +2549,7 @@ static void ccs_get_native_size(struct ccs_subdev *ssd, struct v4l2_rect *r)
 }
 
 static int __ccs_get_selection(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_selection *sel)
 {
 	struct ccs_sensor *sensor = to_ccs_sensor(subdev);
@@ -2559,13 +2562,14 @@ static int __ccs_get_selection(struct v4l2_subdev *subdev,
 	if (ret)
 		return ret;
 
-	ccs_get_crop_compose(subdev, cfg, crops, &comp, sel->which);
+	ccs_get_crop_compose(subdev, sd_state, crops, &comp, sel->which);
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_ACTIVE) {
 		sink_fmt = ssd->sink_fmt;
 	} else {
 		struct v4l2_mbus_framefmt *fmt =
-			v4l2_subdev_get_try_format(subdev, cfg, ssd->sink_pad);
+			v4l2_subdev_get_try_format(subdev, sd_state,
+						   ssd->sink_pad);
 
 		sink_fmt.left = 0;
 		sink_fmt.top = 0;
@@ -2596,21 +2600,21 @@ static int __ccs_get_selection(struct v4l2_subdev *subdev,
 }
 
 static int ccs_get_selection(struct v4l2_subdev *subdev,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct ccs_sensor *sensor = to_ccs_sensor(subdev);
 	int rval;
 
 	mutex_lock(&sensor->mutex);
-	rval = __ccs_get_selection(subdev, cfg, sel);
+	rval = __ccs_get_selection(subdev, sd_state, sel);
 	mutex_unlock(&sensor->mutex);
 
 	return rval;
 }
 
 static int ccs_set_selection(struct v4l2_subdev *subdev,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct ccs_sensor *sensor = to_ccs_sensor(subdev);
@@ -2634,10 +2638,10 @@ static int ccs_set_selection(struct v4l2_subdev *subdev,
 
 	switch (sel->target) {
 	case V4L2_SEL_TGT_CROP:
-		ret = ccs_set_crop(subdev, cfg, sel);
+		ret = ccs_set_crop(subdev, sd_state, sel);
 		break;
 	case V4L2_SEL_TGT_COMPOSE:
-		ret = ccs_set_compose(subdev, cfg, sel);
+		ret = ccs_set_compose(subdev, sd_state, sel);
 		break;
 	default:
 		ret = -EINVAL;
@@ -3028,9 +3032,9 @@ static int ccs_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 
 	for (i = 0; i < ssd->npads; i++) {
 		struct v4l2_mbus_framefmt *try_fmt =
-			v4l2_subdev_get_try_format(sd, fh->pad, i);
+			v4l2_subdev_get_try_format(sd, fh->state, i);
 		struct v4l2_rect *try_crop =
-			v4l2_subdev_get_try_crop(sd, fh->pad, i);
+			v4l2_subdev_get_try_crop(sd, fh->state, i);
 		struct v4l2_rect *try_comp;
 
 		ccs_get_native_size(ssd, try_crop);
@@ -3043,7 +3047,7 @@ static int ccs_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 		if (ssd != sensor->pixel_array)
 			continue;
 
-		try_comp = v4l2_subdev_get_try_compose(sd, fh->pad, i);
+		try_comp = v4l2_subdev_get_try_compose(sd, fh->state, i);
 		*try_comp = *try_crop;
 	}
 
diff --git a/drivers/media/i2c/cx25840/cx25840-core.c b/drivers/media/i2c/cx25840/cx25840-core.c
index e2e935f789869..dc31944c7d5b1 100644
--- a/drivers/media/i2c/cx25840/cx25840-core.c
+++ b/drivers/media/i2c/cx25840/cx25840-core.c
@@ -1746,7 +1746,7 @@ static int cx25840_s_ctrl(struct v4l2_ctrl *ctrl)
 /* ----------------------------------------------------------------------- */
 
 static int cx25840_set_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
diff --git a/drivers/media/i2c/et8ek8/et8ek8_driver.c b/drivers/media/i2c/et8ek8/et8ek8_driver.c
index bb3eac5e005e4..c7b91c0c03b5a 100644
--- a/drivers/media/i2c/et8ek8/et8ek8_driver.c
+++ b/drivers/media/i2c/et8ek8/et8ek8_driver.c
@@ -882,7 +882,7 @@ out:
  */
 #define MAX_FMTS 4
 static int et8ek8_enum_mbus_code(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct et8ek8_reglist **list =
@@ -920,7 +920,7 @@ static int et8ek8_enum_mbus_code(struct v4l2_subdev *subdev,
 }
 
 static int et8ek8_enum_frame_size(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct et8ek8_reglist **list =
@@ -958,7 +958,7 @@ static int et8ek8_enum_frame_size(struct v4l2_subdev *subdev,
 }
 
 static int et8ek8_enum_frame_ival(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_interval_enum *fie)
 {
 	struct et8ek8_reglist **list =
@@ -990,12 +990,13 @@ static int et8ek8_enum_frame_ival(struct v4l2_subdev *subdev,
 
 static struct v4l2_mbus_framefmt *
 __et8ek8_get_pad_format(struct et8ek8_sensor *sensor,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&sensor->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&sensor->subdev, sd_state,
+						  pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &sensor->format;
 	default:
@@ -1004,13 +1005,14 @@ __et8ek8_get_pad_format(struct et8ek8_sensor *sensor,
 }
 
 static int et8ek8_get_pad_format(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct et8ek8_sensor *sensor = to_et8ek8_sensor(subdev);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __et8ek8_get_pad_format(sensor, cfg, fmt->pad, fmt->which);
+	format = __et8ek8_get_pad_format(sensor, sd_state, fmt->pad,
+					 fmt->which);
 	if (!format)
 		return -EINVAL;
 
@@ -1020,14 +1022,15 @@ static int et8ek8_get_pad_format(struct v4l2_subdev *subdev,
 }
 
 static int et8ek8_set_pad_format(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct et8ek8_sensor *sensor = to_et8ek8_sensor(subdev);
 	struct v4l2_mbus_framefmt *format;
 	struct et8ek8_reglist *reglist;
 
-	format = __et8ek8_get_pad_format(sensor, cfg, fmt->pad, fmt->which);
+	format = __et8ek8_get_pad_format(sensor, sd_state, fmt->pad,
+					 fmt->which);
 	if (!format)
 		return -EINVAL;
 
@@ -1327,7 +1330,7 @@ static int et8ek8_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	struct et8ek8_reglist *reglist;
 
 	reglist = et8ek8_reglist_find_type(&meta_reglist, ET8EK8_REGLIST_MODE);
-	format = __et8ek8_get_pad_format(sensor, fh->pad, 0,
+	format = __et8ek8_get_pad_format(sensor, fh->state, 0,
 					 V4L2_SUBDEV_FORMAT_TRY);
 	et8ek8_reglist_to_mbus(reglist, format);
 
diff --git a/drivers/media/i2c/hi556.c b/drivers/media/i2c/hi556.c
index 627ccfa34835d..8db1cbedc1fd8 100644
--- a/drivers/media/i2c/hi556.c
+++ b/drivers/media/i2c/hi556.c
@@ -875,7 +875,7 @@ error:
 }
 
 static int hi556_set_format(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct hi556 *hi556 = to_hi556(sd);
@@ -890,7 +890,7 @@ static int hi556_set_format(struct v4l2_subdev *sd,
 	mutex_lock(&hi556->mutex);
 	hi556_assign_pad_format(mode, &fmt->format);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format;
+		*v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) = fmt->format;
 	} else {
 		hi556->cur_mode = mode;
 		__v4l2_ctrl_s_ctrl(hi556->link_freq, mode->link_freq_index);
@@ -917,14 +917,15 @@ static int hi556_set_format(struct v4l2_subdev *sd,
 }
 
 static int hi556_get_format(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct hi556 *hi556 = to_hi556(sd);
 
 	mutex_lock(&hi556->mutex);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt->format = *v4l2_subdev_get_try_format(&hi556->sd, cfg,
+		fmt->format = *v4l2_subdev_get_try_format(&hi556->sd,
+							  sd_state,
 							  fmt->pad);
 	else
 		hi556_assign_pad_format(hi556->cur_mode, &fmt->format);
@@ -935,7 +936,7 @@ static int hi556_get_format(struct v4l2_subdev *sd,
 }
 
 static int hi556_enum_mbus_code(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -947,7 +948,7 @@ static int hi556_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int hi556_enum_frame_size(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= ARRAY_SIZE(supported_modes))
@@ -970,7 +971,7 @@ static int hi556_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 
 	mutex_lock(&hi556->mutex);
 	hi556_assign_pad_format(&supported_modes[0],
-				v4l2_subdev_get_try_format(sd, fh->pad, 0));
+				v4l2_subdev_get_try_format(sd, fh->state, 0));
 	mutex_unlock(&hi556->mutex);
 
 	return 0;
diff --git a/drivers/media/i2c/imx208.c b/drivers/media/i2c/imx208.c
index 9ed261ea72553..6f3d9c1b5879f 100644
--- a/drivers/media/i2c/imx208.c
+++ b/drivers/media/i2c/imx208.c
@@ -395,7 +395,7 @@ static int imx208_write_regs(struct imx208 *imx208,
 static int imx208_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct v4l2_mbus_framefmt *try_fmt =
-		v4l2_subdev_get_try_format(sd, fh->pad, 0);
+		v4l2_subdev_get_try_format(sd, fh->state, 0);
 
 	/* Initialize try_fmt */
 	try_fmt->width = supported_modes[0].width;
@@ -500,7 +500,7 @@ static const struct v4l2_ctrl_config imx208_digital_gain_control = {
 };
 
 static int imx208_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct imx208 *imx208 = to_imx208(sd);
@@ -514,7 +514,7 @@ static int imx208_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int imx208_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct imx208 *imx208 = to_imx208(sd);
@@ -544,11 +544,12 @@ static void imx208_mode_to_pad_format(struct imx208 *imx208,
 }
 
 static int __imx208_get_pad_format(struct imx208 *imx208,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *fmt)
 {
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt->format = *v4l2_subdev_get_try_format(&imx208->sd, cfg,
+		fmt->format = *v4l2_subdev_get_try_format(&imx208->sd,
+							  sd_state,
 							  fmt->pad);
 	else
 		imx208_mode_to_pad_format(imx208, imx208->cur_mode, fmt);
@@ -557,21 +558,21 @@ static int __imx208_get_pad_format(struct imx208 *imx208,
 }
 
 static int imx208_get_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx208 *imx208 = to_imx208(sd);
 	int ret;
 
 	mutex_lock(&imx208->imx208_mx);
-	ret = __imx208_get_pad_format(imx208, cfg, fmt);
+	ret = __imx208_get_pad_format(imx208, sd_state, fmt);
 	mutex_unlock(&imx208->imx208_mx);
 
 	return ret;
 }
 
 static int imx208_set_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx208 *imx208 = to_imx208(sd);
@@ -590,7 +591,7 @@ static int imx208_set_pad_format(struct v4l2_subdev *sd,
 				      fmt->format.width, fmt->format.height);
 	imx208_mode_to_pad_format(imx208, mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format;
+		*v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) = fmt->format;
 	} else {
 		imx208->cur_mode = mode;
 		__v4l2_ctrl_s_ctrl(imx208->link_freq, mode->link_freq_index);
diff --git a/drivers/media/i2c/imx214.c b/drivers/media/i2c/imx214.c
index 1a770a530cf52..83c1737abeece 100644
--- a/drivers/media/i2c/imx214.c
+++ b/drivers/media/i2c/imx214.c
@@ -474,7 +474,7 @@ static int __maybe_unused imx214_power_off(struct device *dev)
 }
 
 static int imx214_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -486,7 +486,7 @@ static int imx214_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int imx214_enum_frame_size(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->code != IMX214_MBUS_CODE)
@@ -534,13 +534,13 @@ static const struct v4l2_subdev_core_ops imx214_core_ops = {
 
 static struct v4l2_mbus_framefmt *
 __imx214_get_pad_format(struct imx214 *imx214,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			unsigned int pad,
 			enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&imx214->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&imx214->sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &imx214->fmt;
 	default:
@@ -549,13 +549,14 @@ __imx214_get_pad_format(struct imx214 *imx214,
 }
 
 static int imx214_get_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *format)
 {
 	struct imx214 *imx214 = to_imx214(sd);
 
 	mutex_lock(&imx214->mutex);
-	format->format = *__imx214_get_pad_format(imx214, cfg, format->pad,
+	format->format = *__imx214_get_pad_format(imx214, sd_state,
+						  format->pad,
 						  format->which);
 	mutex_unlock(&imx214->mutex);
 
@@ -563,12 +564,13 @@ static int imx214_get_format(struct v4l2_subdev *sd,
 }
 
 static struct v4l2_rect *
-__imx214_get_pad_crop(struct imx214 *imx214, struct v4l2_subdev_pad_config *cfg,
+__imx214_get_pad_crop(struct imx214 *imx214,
+		      struct v4l2_subdev_state *sd_state,
 		      unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_crop(&imx214->sd, cfg, pad);
+		return v4l2_subdev_get_try_crop(&imx214->sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &imx214->crop;
 	default:
@@ -577,7 +579,7 @@ __imx214_get_pad_crop(struct imx214 *imx214, struct v4l2_subdev_pad_config *cfg,
 }
 
 static int imx214_set_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *format)
 {
 	struct imx214 *imx214 = to_imx214(sd);
@@ -587,7 +589,8 @@ static int imx214_set_format(struct v4l2_subdev *sd,
 
 	mutex_lock(&imx214->mutex);
 
-	__crop = __imx214_get_pad_crop(imx214, cfg, format->pad, format->which);
+	__crop = __imx214_get_pad_crop(imx214, sd_state, format->pad,
+				       format->which);
 
 	mode = v4l2_find_nearest_size(imx214_modes,
 				      ARRAY_SIZE(imx214_modes), width, height,
@@ -597,7 +600,7 @@ static int imx214_set_format(struct v4l2_subdev *sd,
 	__crop->width = mode->width;
 	__crop->height = mode->height;
 
-	__format = __imx214_get_pad_format(imx214, cfg, format->pad,
+	__format = __imx214_get_pad_format(imx214, sd_state, format->pad,
 					   format->which);
 	__format->width = __crop->width;
 	__format->height = __crop->height;
@@ -617,7 +620,7 @@ static int imx214_set_format(struct v4l2_subdev *sd,
 }
 
 static int imx214_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct imx214 *imx214 = to_imx214(sd);
@@ -626,22 +629,22 @@ static int imx214_get_selection(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	mutex_lock(&imx214->mutex);
-	sel->r = *__imx214_get_pad_crop(imx214, cfg, sel->pad,
+	sel->r = *__imx214_get_pad_crop(imx214, sd_state, sel->pad,
 					sel->which);
 	mutex_unlock(&imx214->mutex);
 	return 0;
 }
 
 static int imx214_entity_init_cfg(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg)
+				  struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_subdev_format fmt = { };
 
-	fmt.which = cfg ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
+	fmt.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
 	fmt.format.width = imx214_modes[0].width;
 	fmt.format.height = imx214_modes[0].height;
 
-	imx214_set_format(subdev, cfg, &fmt);
+	imx214_set_format(subdev, sd_state, &fmt);
 
 	return 0;
 }
@@ -808,7 +811,7 @@ static int imx214_g_frame_interval(struct v4l2_subdev *subdev,
 }
 
 static int imx214_enum_frame_interval(struct v4l2_subdev *subdev,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_interval_enum *fie)
 {
 	const struct imx214_mode *mode;
diff --git a/drivers/media/i2c/imx219.c b/drivers/media/i2c/imx219.c
index 74a0bf9b088b2..e10af3f74b38f 100644
--- a/drivers/media/i2c/imx219.c
+++ b/drivers/media/i2c/imx219.c
@@ -689,7 +689,7 @@ static int imx219_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct imx219 *imx219 = to_imx219(sd);
 	struct v4l2_mbus_framefmt *try_fmt =
-		v4l2_subdev_get_try_format(sd, fh->pad, 0);
+		v4l2_subdev_get_try_format(sd, fh->state, 0);
 	struct v4l2_rect *try_crop;
 
 	mutex_lock(&imx219->mutex);
@@ -702,7 +702,7 @@ static int imx219_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	try_fmt->field = V4L2_FIELD_NONE;
 
 	/* Initialize try_crop rectangle. */
-	try_crop = v4l2_subdev_get_try_crop(sd, fh->pad, 0);
+	try_crop = v4l2_subdev_get_try_crop(sd, fh->state, 0);
 	try_crop->top = IMX219_PIXEL_ARRAY_TOP;
 	try_crop->left = IMX219_PIXEL_ARRAY_LEFT;
 	try_crop->width = IMX219_PIXEL_ARRAY_WIDTH;
@@ -803,7 +803,7 @@ static const struct v4l2_ctrl_ops imx219_ctrl_ops = {
 };
 
 static int imx219_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct imx219 *imx219 = to_imx219(sd);
@@ -819,7 +819,7 @@ static int imx219_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int imx219_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct imx219 *imx219 = to_imx219(sd);
@@ -863,12 +863,13 @@ static void imx219_update_pad_format(struct imx219 *imx219,
 }
 
 static int __imx219_get_pad_format(struct imx219 *imx219,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *fmt)
 {
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *try_fmt =
-			v4l2_subdev_get_try_format(&imx219->sd, cfg, fmt->pad);
+			v4l2_subdev_get_try_format(&imx219->sd, sd_state,
+						   fmt->pad);
 		/* update the code which could change due to vflip or hflip: */
 		try_fmt->code = imx219_get_format_code(imx219, try_fmt->code);
 		fmt->format = *try_fmt;
@@ -882,21 +883,21 @@ static int __imx219_get_pad_format(struct imx219 *imx219,
 }
 
 static int imx219_get_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx219 *imx219 = to_imx219(sd);
 	int ret;
 
 	mutex_lock(&imx219->mutex);
-	ret = __imx219_get_pad_format(imx219, cfg, fmt);
+	ret = __imx219_get_pad_format(imx219, sd_state, fmt);
 	mutex_unlock(&imx219->mutex);
 
 	return ret;
 }
 
 static int imx219_set_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx219 *imx219 = to_imx219(sd);
@@ -922,7 +923,7 @@ static int imx219_set_pad_format(struct v4l2_subdev *sd,
 				      fmt->format.width, fmt->format.height);
 	imx219_update_pad_format(imx219, mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*framefmt = fmt->format;
 	} else if (imx219->mode != mode ||
 		   imx219->fmt.code != fmt->format.code) {
@@ -979,12 +980,13 @@ static int imx219_set_framefmt(struct imx219 *imx219)
 }
 
 static const struct v4l2_rect *
-__imx219_get_pad_crop(struct imx219 *imx219, struct v4l2_subdev_pad_config *cfg,
+__imx219_get_pad_crop(struct imx219 *imx219,
+		      struct v4l2_subdev_state *sd_state,
 		      unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_crop(&imx219->sd, cfg, pad);
+		return v4l2_subdev_get_try_crop(&imx219->sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &imx219->mode->crop;
 	}
@@ -993,7 +995,7 @@ __imx219_get_pad_crop(struct imx219 *imx219, struct v4l2_subdev_pad_config *cfg,
 }
 
 static int imx219_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	switch (sel->target) {
@@ -1001,7 +1003,7 @@ static int imx219_get_selection(struct v4l2_subdev *sd,
 		struct imx219 *imx219 = to_imx219(sd);
 
 		mutex_lock(&imx219->mutex);
-		sel->r = *__imx219_get_pad_crop(imx219, cfg, sel->pad,
+		sel->r = *__imx219_get_pad_crop(imx219, sd_state, sel->pad,
 						sel->which);
 		mutex_unlock(&imx219->mutex);
 
diff --git a/drivers/media/i2c/imx258.c b/drivers/media/i2c/imx258.c
index 90529424d5b66..7ab9e5f9f2676 100644
--- a/drivers/media/i2c/imx258.c
+++ b/drivers/media/i2c/imx258.c
@@ -710,7 +710,7 @@ static int imx258_write_regs(struct imx258 *imx258,
 static int imx258_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct v4l2_mbus_framefmt *try_fmt =
-		v4l2_subdev_get_try_format(sd, fh->pad, 0);
+		v4l2_subdev_get_try_format(sd, fh->state, 0);
 
 	/* Initialize try_fmt */
 	try_fmt->width = supported_modes[0].width;
@@ -820,7 +820,7 @@ static const struct v4l2_ctrl_ops imx258_ctrl_ops = {
 };
 
 static int imx258_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	/* Only one bayer order(GRBG) is supported */
@@ -833,7 +833,7 @@ static int imx258_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int imx258_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= ARRAY_SIZE(supported_modes))
@@ -860,11 +860,12 @@ static void imx258_update_pad_format(const struct imx258_mode *mode,
 }
 
 static int __imx258_get_pad_format(struct imx258 *imx258,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *fmt)
 {
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt->format = *v4l2_subdev_get_try_format(&imx258->sd, cfg,
+		fmt->format = *v4l2_subdev_get_try_format(&imx258->sd,
+							  sd_state,
 							  fmt->pad);
 	else
 		imx258_update_pad_format(imx258->cur_mode, fmt);
@@ -873,21 +874,21 @@ static int __imx258_get_pad_format(struct imx258 *imx258,
 }
 
 static int imx258_get_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx258 *imx258 = to_imx258(sd);
 	int ret;
 
 	mutex_lock(&imx258->mutex);
-	ret = __imx258_get_pad_format(imx258, cfg, fmt);
+	ret = __imx258_get_pad_format(imx258, sd_state, fmt);
 	mutex_unlock(&imx258->mutex);
 
 	return ret;
 }
 
 static int imx258_set_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx258 *imx258 = to_imx258(sd);
@@ -909,7 +910,7 @@ static int imx258_set_pad_format(struct v4l2_subdev *sd,
 		fmt->format.width, fmt->format.height);
 	imx258_update_pad_format(mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*framefmt = fmt->format;
 	} else {
 		imx258->cur_mode = mode;
diff --git a/drivers/media/i2c/imx274.c b/drivers/media/i2c/imx274.c
index ee2127436f0bc..0dce92872176d 100644
--- a/drivers/media/i2c/imx274.c
+++ b/drivers/media/i2c/imx274.c
@@ -996,7 +996,7 @@ static int imx274_binning_goodness(struct stimx274 *imx274,
  * Must be called with imx274->lock locked.
  *
  * @imx274: The device object
- * @cfg:    The pad config we are editing for TRY requests
+ * @sd_state: The subdev state we are editing for TRY requests
  * @which:  V4L2_SUBDEV_FORMAT_ACTIVE or V4L2_SUBDEV_FORMAT_TRY from the caller
  * @width:  Input-output parameter: set to the desired width before
  *          the call, contains the chosen value after returning successfully
@@ -1005,7 +1005,7 @@ static int imx274_binning_goodness(struct stimx274 *imx274,
  *          available (when called from set_fmt)
  */
 static int __imx274_change_compose(struct stimx274 *imx274,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   u32 which,
 				   u32 *width,
 				   u32 *height,
@@ -1019,8 +1019,8 @@ static int __imx274_change_compose(struct stimx274 *imx274,
 	int best_goodness = INT_MIN;
 
 	if (which == V4L2_SUBDEV_FORMAT_TRY) {
-		cur_crop = &cfg->try_crop;
-		tgt_fmt = &cfg->try_fmt;
+		cur_crop = &sd_state->pads->try_crop;
+		tgt_fmt = &sd_state->pads->try_fmt;
 	} else {
 		cur_crop = &imx274->crop;
 		tgt_fmt = &imx274->format;
@@ -1061,7 +1061,7 @@ static int __imx274_change_compose(struct stimx274 *imx274,
 /**
  * imx274_get_fmt - Get the pad format
  * @sd: Pointer to V4L2 Sub device structure
- * @cfg: Pointer to sub device pad information structure
+ * @sd_state: Pointer to sub device state structure
  * @fmt: Pointer to pad level media bus format
  *
  * This function is used to get the pad format information.
@@ -1069,7 +1069,7 @@ static int __imx274_change_compose(struct stimx274 *imx274,
  * Return: 0 on success
  */
 static int imx274_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct stimx274 *imx274 = to_imx274(sd);
@@ -1083,7 +1083,7 @@ static int imx274_get_fmt(struct v4l2_subdev *sd,
 /**
  * imx274_set_fmt - This is used to set the pad format
  * @sd: Pointer to V4L2 Sub device structure
- * @cfg: Pointer to sub device pad information structure
+ * @sd_state: Pointer to sub device state information structure
  * @format: Pointer to pad level media bus format
  *
  * This function is used to set the pad format.
@@ -1091,7 +1091,7 @@ static int imx274_get_fmt(struct v4l2_subdev *sd,
  * Return: 0 on success
  */
 static int imx274_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -1100,7 +1100,7 @@ static int imx274_set_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&imx274->lock);
 
-	err = __imx274_change_compose(imx274, cfg, format->which,
+	err = __imx274_change_compose(imx274, sd_state, format->which,
 				      &fmt->width, &fmt->height, 0);
 
 	if (err)
@@ -1113,7 +1113,7 @@ static int imx274_set_fmt(struct v4l2_subdev *sd,
 	 */
 	fmt->field = V4L2_FIELD_NONE;
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 	else
 		imx274->format = *fmt;
 
@@ -1124,7 +1124,7 @@ out:
 }
 
 static int imx274_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct stimx274 *imx274 = to_imx274(sd);
@@ -1144,8 +1144,8 @@ static int imx274_get_selection(struct v4l2_subdev *sd,
 	}
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
-		src_crop = &cfg->try_crop;
-		src_fmt = &cfg->try_fmt;
+		src_crop = &sd_state->pads->try_crop;
+		src_fmt = &sd_state->pads->try_fmt;
 	} else {
 		src_crop = &imx274->crop;
 		src_fmt = &imx274->format;
@@ -1179,7 +1179,7 @@ static int imx274_get_selection(struct v4l2_subdev *sd,
 }
 
 static int imx274_set_selection_crop(struct stimx274 *imx274,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_selection *sel)
 {
 	struct v4l2_rect *tgt_crop;
@@ -1216,7 +1216,7 @@ static int imx274_set_selection_crop(struct stimx274 *imx274,
 	sel->r = new_crop;
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_TRY)
-		tgt_crop = &cfg->try_crop;
+		tgt_crop = &sd_state->pads->try_crop;
 	else
 		tgt_crop = &imx274->crop;
 
@@ -1230,7 +1230,7 @@ static int imx274_set_selection_crop(struct stimx274 *imx274,
 
 	/* if crop size changed then reset the output image size */
 	if (size_changed)
-		__imx274_change_compose(imx274, cfg, sel->which,
+		__imx274_change_compose(imx274, sd_state, sel->which,
 					&new_crop.width, &new_crop.height,
 					sel->flags);
 
@@ -1240,7 +1240,7 @@ static int imx274_set_selection_crop(struct stimx274 *imx274,
 }
 
 static int imx274_set_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct stimx274 *imx274 = to_imx274(sd);
@@ -1249,13 +1249,13 @@ static int imx274_set_selection(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (sel->target == V4L2_SEL_TGT_CROP)
-		return imx274_set_selection_crop(imx274, cfg, sel);
+		return imx274_set_selection_crop(imx274, sd_state, sel);
 
 	if (sel->target == V4L2_SEL_TGT_COMPOSE) {
 		int err;
 
 		mutex_lock(&imx274->lock);
-		err =  __imx274_change_compose(imx274, cfg, sel->which,
+		err =  __imx274_change_compose(imx274, sd_state, sel->which,
 					       &sel->r.width, &sel->r.height,
 					       sel->flags);
 		mutex_unlock(&imx274->lock);
diff --git a/drivers/media/i2c/imx290.c b/drivers/media/i2c/imx290.c
index 06020e648a97c..bf7a6c37ca5da 100644
--- a/drivers/media/i2c/imx290.c
+++ b/drivers/media/i2c/imx290.c
@@ -516,7 +516,7 @@ static const struct v4l2_ctrl_ops imx290_ctrl_ops = {
 };
 
 static int imx290_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(imx290_formats))
@@ -528,7 +528,7 @@ static int imx290_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int imx290_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	const struct imx290 *imx290 = to_imx290(sd);
@@ -550,7 +550,7 @@ static int imx290_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int imx290_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct imx290 *imx290 = to_imx290(sd);
@@ -559,7 +559,7 @@ static int imx290_get_fmt(struct v4l2_subdev *sd,
 	mutex_lock(&imx290->lock);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		framefmt = v4l2_subdev_get_try_format(&imx290->sd, cfg,
+		framefmt = v4l2_subdev_get_try_format(&imx290->sd, sd_state,
 						      fmt->pad);
 	else
 		framefmt = &imx290->current_format;
@@ -596,8 +596,8 @@ static u64 imx290_calc_pixel_rate(struct imx290 *imx290)
 }
 
 static int imx290_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
-		      struct v4l2_subdev_format *fmt)
+			  struct v4l2_subdev_state *sd_state,
+			  struct v4l2_subdev_format *fmt)
 {
 	struct imx290 *imx290 = to_imx290(sd);
 	const struct imx290_mode *mode;
@@ -624,7 +624,7 @@ static int imx290_set_fmt(struct v4l2_subdev *sd,
 	fmt->format.field = V4L2_FIELD_NONE;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		format = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		format = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 	} else {
 		format = &imx290->current_format;
 		imx290->current_mode = mode;
@@ -646,15 +646,15 @@ static int imx290_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int imx290_entity_init_cfg(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg)
+				  struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_subdev_format fmt = { 0 };
 
-	fmt.which = cfg ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
+	fmt.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
 	fmt.format.width = 1920;
 	fmt.format.height = 1080;
 
-	imx290_set_fmt(subdev, cfg, &fmt);
+	imx290_set_fmt(subdev, sd_state, &fmt);
 
 	return 0;
 }
diff --git a/drivers/media/i2c/imx319.c b/drivers/media/i2c/imx319.c
index 4e0a8c9d271f8..dba0854ab5aad 100644
--- a/drivers/media/i2c/imx319.c
+++ b/drivers/media/i2c/imx319.c
@@ -1860,7 +1860,7 @@ static int imx319_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct imx319 *imx319 = to_imx319(sd);
 	struct v4l2_mbus_framefmt *try_fmt =
-		v4l2_subdev_get_try_format(sd, fh->pad, 0);
+		v4l2_subdev_get_try_format(sd, fh->state, 0);
 
 	mutex_lock(&imx319->mutex);
 
@@ -1947,7 +1947,7 @@ static const struct v4l2_ctrl_ops imx319_ctrl_ops = {
 };
 
 static int imx319_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct imx319 *imx319 = to_imx319(sd);
@@ -1963,7 +1963,7 @@ static int imx319_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int imx319_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct imx319 *imx319 = to_imx319(sd);
@@ -1997,14 +1997,14 @@ static void imx319_update_pad_format(struct imx319 *imx319,
 }
 
 static int imx319_do_get_pad_format(struct imx319 *imx319,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	struct v4l2_mbus_framefmt *framefmt;
 	struct v4l2_subdev *sd = &imx319->sd;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		fmt->format = *framefmt;
 	} else {
 		imx319_update_pad_format(imx319, imx319->cur_mode, fmt);
@@ -2014,14 +2014,14 @@ static int imx319_do_get_pad_format(struct imx319 *imx319,
 }
 
 static int imx319_get_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx319 *imx319 = to_imx319(sd);
 	int ret;
 
 	mutex_lock(&imx319->mutex);
-	ret = imx319_do_get_pad_format(imx319, cfg, fmt);
+	ret = imx319_do_get_pad_format(imx319, sd_state, fmt);
 	mutex_unlock(&imx319->mutex);
 
 	return ret;
@@ -2029,7 +2029,7 @@ static int imx319_get_pad_format(struct v4l2_subdev *sd,
 
 static int
 imx319_set_pad_format(struct v4l2_subdev *sd,
-		      struct v4l2_subdev_pad_config *cfg,
+		      struct v4l2_subdev_state *sd_state,
 		      struct v4l2_subdev_format *fmt)
 {
 	struct imx319 *imx319 = to_imx319(sd);
@@ -2055,7 +2055,7 @@ imx319_set_pad_format(struct v4l2_subdev *sd,
 				      fmt->format.width, fmt->format.height);
 	imx319_update_pad_format(imx319, mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*framefmt = fmt->format;
 	} else {
 		imx319->cur_mode = mode;
diff --git a/drivers/media/i2c/imx334.c b/drivers/media/i2c/imx334.c
index 23f28606e570f..062125501788a 100644
--- a/drivers/media/i2c/imx334.c
+++ b/drivers/media/i2c/imx334.c
@@ -497,13 +497,13 @@ static const struct v4l2_ctrl_ops imx334_ctrl_ops = {
 /**
  * imx334_enum_mbus_code() - Enumerate V4L2 sub-device mbus codes
  * @sd: pointer to imx334 V4L2 sub-device structure
- * @cfg: V4L2 sub-device pad configuration
+ * @sd_state: V4L2 sub-device state
  * @code: V4L2 sub-device code enumeration need to be filled
  *
  * Return: 0 if successful, error code otherwise.
  */
 static int imx334_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -517,13 +517,13 @@ static int imx334_enum_mbus_code(struct v4l2_subdev *sd,
 /**
  * imx334_enum_frame_size() - Enumerate V4L2 sub-device frame sizes
  * @sd: pointer to imx334 V4L2 sub-device structure
- * @cfg: V4L2 sub-device pad configuration
+ * @sd_state: V4L2 sub-device state
  * @fsize: V4L2 sub-device size enumeration need to be filled
  *
  * Return: 0 if successful, error code otherwise.
  */
 static int imx334_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fsize)
 {
 	if (fsize->index > 0)
@@ -564,13 +564,13 @@ static void imx334_fill_pad_format(struct imx334 *imx334,
 /**
  * imx334_get_pad_format() - Get subdevice pad format
  * @sd: pointer to imx334 V4L2 sub-device structure
- * @cfg: V4L2 sub-device pad configuration
+ * @sd_state: V4L2 sub-device state
  * @fmt: V4L2 sub-device format need to be set
  *
  * Return: 0 if successful, error code otherwise.
  */
 static int imx334_get_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx334 *imx334 = to_imx334(sd);
@@ -580,7 +580,7 @@ static int imx334_get_pad_format(struct v4l2_subdev *sd,
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *framefmt;
 
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		fmt->format = *framefmt;
 	} else {
 		imx334_fill_pad_format(imx334, imx334->cur_mode, fmt);
@@ -594,13 +594,13 @@ static int imx334_get_pad_format(struct v4l2_subdev *sd,
 /**
  * imx334_set_pad_format() - Set subdevice pad format
  * @sd: pointer to imx334 V4L2 sub-device structure
- * @cfg: V4L2 sub-device pad configuration
+ * @sd_state: V4L2 sub-device state
  * @fmt: V4L2 sub-device format need to be set
  *
  * Return: 0 if successful, error code otherwise.
  */
 static int imx334_set_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx334 *imx334 = to_imx334(sd);
@@ -615,7 +615,7 @@ static int imx334_set_pad_format(struct v4l2_subdev *sd,
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *framefmt;
 
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*framefmt = fmt->format;
 	} else {
 		ret = imx334_update_controls(imx334, mode);
@@ -631,20 +631,20 @@ static int imx334_set_pad_format(struct v4l2_subdev *sd,
 /**
  * imx334_init_pad_cfg() - Initialize sub-device pad configuration
  * @sd: pointer to imx334 V4L2 sub-device structure
- * @cfg: V4L2 sub-device pad configuration
+ * @sd_state: V4L2 sub-device state
  *
  * Return: 0 if successful, error code otherwise.
  */
 static int imx334_init_pad_cfg(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg)
+			       struct v4l2_subdev_state *sd_state)
 {
 	struct imx334 *imx334 = to_imx334(sd);
 	struct v4l2_subdev_format fmt = { 0 };
 
-	fmt.which = cfg ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
+	fmt.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
 	imx334_fill_pad_format(imx334, &supported_mode, &fmt);
 
-	return imx334_set_pad_format(sd, cfg, &fmt);
+	return imx334_set_pad_format(sd, sd_state, &fmt);
 }
 
 /**
diff --git a/drivers/media/i2c/imx355.c b/drivers/media/i2c/imx355.c
index 93f13a04439a8..cb51c81786bd3 100644
--- a/drivers/media/i2c/imx355.c
+++ b/drivers/media/i2c/imx355.c
@@ -1161,7 +1161,7 @@ static int imx355_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct imx355 *imx355 = to_imx355(sd);
 	struct v4l2_mbus_framefmt *try_fmt =
-		v4l2_subdev_get_try_format(sd, fh->pad, 0);
+		v4l2_subdev_get_try_format(sd, fh->state, 0);
 
 	mutex_lock(&imx355->mutex);
 
@@ -1248,7 +1248,7 @@ static const struct v4l2_ctrl_ops imx355_ctrl_ops = {
 };
 
 static int imx355_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct imx355 *imx355 = to_imx355(sd);
@@ -1264,7 +1264,7 @@ static int imx355_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int imx355_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct imx355 *imx355 = to_imx355(sd);
@@ -1298,14 +1298,14 @@ static void imx355_update_pad_format(struct imx355 *imx355,
 }
 
 static int imx355_do_get_pad_format(struct imx355 *imx355,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	struct v4l2_mbus_framefmt *framefmt;
 	struct v4l2_subdev *sd = &imx355->sd;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		fmt->format = *framefmt;
 	} else {
 		imx355_update_pad_format(imx355, imx355->cur_mode, fmt);
@@ -1315,14 +1315,14 @@ static int imx355_do_get_pad_format(struct imx355 *imx355,
 }
 
 static int imx355_get_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct imx355 *imx355 = to_imx355(sd);
 	int ret;
 
 	mutex_lock(&imx355->mutex);
-	ret = imx355_do_get_pad_format(imx355, cfg, fmt);
+	ret = imx355_do_get_pad_format(imx355, sd_state, fmt);
 	mutex_unlock(&imx355->mutex);
 
 	return ret;
@@ -1330,7 +1330,7 @@ static int imx355_get_pad_format(struct v4l2_subdev *sd,
 
 static int
 imx355_set_pad_format(struct v4l2_subdev *sd,
-		      struct v4l2_subdev_pad_config *cfg,
+		      struct v4l2_subdev_state *sd_state,
 		      struct v4l2_subdev_format *fmt)
 {
 	struct imx355 *imx355 = to_imx355(sd);
@@ -1356,7 +1356,7 @@ imx355_set_pad_format(struct v4l2_subdev *sd,
 				      fmt->format.width, fmt->format.height);
 	imx355_update_pad_format(imx355, mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*framefmt = fmt->format;
 	} else {
 		imx355->cur_mode = mode;
diff --git a/drivers/media/i2c/m5mols/m5mols_core.c b/drivers/media/i2c/m5mols/m5mols_core.c
index 21666d705e372..e29be0242f078 100644
--- a/drivers/media/i2c/m5mols/m5mols_core.c
+++ b/drivers/media/i2c/m5mols/m5mols_core.c
@@ -539,17 +539,19 @@ static int __find_resolution(struct v4l2_subdev *sd,
 }
 
 static struct v4l2_mbus_framefmt *__find_format(struct m5mols_info *info,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				enum v4l2_subdev_format_whence which,
 				enum m5mols_restype type)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return cfg ? v4l2_subdev_get_try_format(&info->sd, cfg, 0) : NULL;
+		return sd_state ? v4l2_subdev_get_try_format(&info->sd,
+							     sd_state, 0) : NULL;
 
 	return &info->ffmt[type];
 }
 
-static int m5mols_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int m5mols_get_fmt(struct v4l2_subdev *sd,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct m5mols_info *info = to_m5mols(sd);
@@ -558,7 +560,7 @@ static int m5mols_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 
 	mutex_lock(&info->lock);
 
-	format = __find_format(info, cfg, fmt->which, info->res_type);
+	format = __find_format(info, sd_state, fmt->which, info->res_type);
 	if (format)
 		fmt->format = *format;
 	else
@@ -568,7 +570,8 @@ static int m5mols_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 	return ret;
 }
 
-static int m5mols_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int m5mols_set_fmt(struct v4l2_subdev *sd,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct m5mols_info *info = to_m5mols(sd);
@@ -582,7 +585,7 @@ static int m5mols_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 	if (ret < 0)
 		return ret;
 
-	sfmt = __find_format(info, cfg, fmt->which, type);
+	sfmt = __find_format(info, sd_state, fmt->which, type);
 	if (!sfmt)
 		return 0;
 
@@ -648,7 +651,7 @@ static int m5mols_set_frame_desc(struct v4l2_subdev *sd, unsigned int pad,
 
 
 static int m5mols_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (!code || code->index >= SIZE_DEFAULT_FFMT)
@@ -909,7 +912,9 @@ static const struct v4l2_subdev_core_ops m5mols_core_ops = {
  */
 static int m5mols_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	struct v4l2_mbus_framefmt *format = v4l2_subdev_get_try_format(sd, fh->pad, 0);
+	struct v4l2_mbus_framefmt *format = v4l2_subdev_get_try_format(sd,
+								       fh->state,
+								       0);
 
 	*format = m5mols_default_ffmt[0];
 	return 0;
diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c
index 6fd4d59fcc723..4631bfeeacc0f 100644
--- a/drivers/media/i2c/max9286.c
+++ b/drivers/media/i2c/max9286.c
@@ -712,7 +712,7 @@ static int max9286_s_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static int max9286_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index > 0)
@@ -725,12 +725,12 @@ static int max9286_enum_mbus_code(struct v4l2_subdev *sd,
 
 static struct v4l2_mbus_framefmt *
 max9286_get_pad_format(struct max9286_priv *priv,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       unsigned int pad, u32 which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&priv->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&priv->sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &priv->fmt[pad];
 	default:
@@ -739,7 +739,7 @@ max9286_get_pad_format(struct max9286_priv *priv,
 }
 
 static int max9286_set_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct max9286_priv *priv = sd_to_max9286(sd);
@@ -760,7 +760,8 @@ static int max9286_set_fmt(struct v4l2_subdev *sd,
 		break;
 	}
 
-	cfg_fmt = max9286_get_pad_format(priv, cfg, format->pad, format->which);
+	cfg_fmt = max9286_get_pad_format(priv, sd_state, format->pad,
+					 format->which);
 	if (!cfg_fmt)
 		return -EINVAL;
 
@@ -772,7 +773,7 @@ static int max9286_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int max9286_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct max9286_priv *priv = sd_to_max9286(sd);
@@ -788,7 +789,7 @@ static int max9286_get_fmt(struct v4l2_subdev *sd,
 	if (pad == MAX9286_SRC_PAD)
 		pad = __ffs(priv->bound_sources);
 
-	cfg_fmt = max9286_get_pad_format(priv, cfg, pad, format->which);
+	cfg_fmt = max9286_get_pad_format(priv, sd_state, pad, format->which);
 	if (!cfg_fmt)
 		return -EINVAL;
 
@@ -832,7 +833,7 @@ static int max9286_open(struct v4l2_subdev *subdev, struct v4l2_subdev_fh *fh)
 	unsigned int i;
 
 	for (i = 0; i < MAX9286_N_SINKS; i++) {
-		format = v4l2_subdev_get_try_format(subdev, fh->pad, i);
+		format = v4l2_subdev_get_try_format(subdev, fh->state, i);
 		max9286_init_format(format);
 	}
 
diff --git a/drivers/media/i2c/ml86v7667.c b/drivers/media/i2c/ml86v7667.c
index ff212335326a7..4a1410ebb4c85 100644
--- a/drivers/media/i2c/ml86v7667.c
+++ b/drivers/media/i2c/ml86v7667.c
@@ -188,7 +188,7 @@ static int ml86v7667_g_input_status(struct v4l2_subdev *sd, u32 *status)
 }
 
 static int ml86v7667_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index > 0)
@@ -200,7 +200,7 @@ static int ml86v7667_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ml86v7667_fill_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct ml86v7667_priv *priv = to_ml86v7667(sd);
diff --git a/drivers/media/i2c/mt9m001.c b/drivers/media/i2c/mt9m001.c
index 58c85a3bccf64..c9f0bd997ea73 100644
--- a/drivers/media/i2c/mt9m001.c
+++ b/drivers/media/i2c/mt9m001.c
@@ -254,7 +254,7 @@ unlock:
 }
 
 static int mt9m001_set_selection(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -295,7 +295,7 @@ static int mt9m001_set_selection(struct v4l2_subdev *sd,
 }
 
 static int mt9m001_get_selection(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -320,7 +320,7 @@ static int mt9m001_get_selection(struct v4l2_subdev *sd,
 }
 
 static int mt9m001_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -331,7 +331,7 @@ static int mt9m001_get_fmt(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		format->format = *mf;
 		return 0;
 	}
@@ -377,7 +377,7 @@ static int mt9m001_s_fmt(struct v4l2_subdev *sd,
 }
 
 static int mt9m001_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -411,7 +411,7 @@ static int mt9m001_set_fmt(struct v4l2_subdev *sd,
 
 	if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		return mt9m001_s_fmt(sd, fmt, mf);
-	cfg->try_fmt = *mf;
+	sd_state->pads->try_fmt = *mf;
 	return 0;
 }
 
@@ -657,12 +657,12 @@ static const struct v4l2_subdev_core_ops mt9m001_subdev_core_ops = {
 };
 
 static int mt9m001_init_cfg(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg)
+			    struct v4l2_subdev_state *sd_state)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	struct mt9m001 *mt9m001 = to_mt9m001(client);
 	struct v4l2_mbus_framefmt *try_fmt =
-		v4l2_subdev_get_try_format(sd, cfg, 0);
+		v4l2_subdev_get_try_format(sd, sd_state, 0);
 
 	try_fmt->width		= MT9M001_MAX_WIDTH;
 	try_fmt->height		= MT9M001_MAX_HEIGHT;
@@ -677,7 +677,7 @@ static int mt9m001_init_cfg(struct v4l2_subdev *sd,
 }
 
 static int mt9m001_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/i2c/mt9m032.c b/drivers/media/i2c/mt9m032.c
index 5a4c0f9d1eee6..ba0c0ea91c954 100644
--- a/drivers/media/i2c/mt9m032.c
+++ b/drivers/media/i2c/mt9m032.c
@@ -304,7 +304,7 @@ static int mt9m032_setup_pll(struct mt9m032 *sensor)
  */
 
 static int mt9m032_enum_mbus_code(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index != 0)
@@ -315,7 +315,7 @@ static int mt9m032_enum_mbus_code(struct v4l2_subdev *subdev,
 }
 
 static int mt9m032_enum_frame_size(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index != 0 || fse->code != MEDIA_BUS_FMT_Y8_1X8)
@@ -332,18 +332,19 @@ static int mt9m032_enum_frame_size(struct v4l2_subdev *subdev,
 /**
  * __mt9m032_get_pad_crop() - get crop rect
  * @sensor: pointer to the sensor struct
- * @cfg: v4l2_subdev_pad_config for getting the try crop rect from
+ * @sd_state: v4l2_subdev_state for getting the try crop rect from
  * @which: select try or active crop rect
  *
  * Returns a pointer the current active or fh relative try crop rect
  */
 static struct v4l2_rect *
-__mt9m032_get_pad_crop(struct mt9m032 *sensor, struct v4l2_subdev_pad_config *cfg,
+__mt9m032_get_pad_crop(struct mt9m032 *sensor,
+		       struct v4l2_subdev_state *sd_state,
 		       enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_crop(&sensor->subdev, cfg, 0);
+		return v4l2_subdev_get_try_crop(&sensor->subdev, sd_state, 0);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &sensor->crop;
 	default:
@@ -354,18 +355,20 @@ __mt9m032_get_pad_crop(struct mt9m032 *sensor, struct v4l2_subdev_pad_config *cf
 /**
  * __mt9m032_get_pad_format() - get format
  * @sensor: pointer to the sensor struct
- * @cfg: v4l2_subdev_pad_config for getting the try format from
+ * @sd_state: v4l2_subdev_state for getting the try format from
  * @which: select try or active format
  *
  * Returns a pointer the current active or fh relative try format
  */
 static struct v4l2_mbus_framefmt *
-__mt9m032_get_pad_format(struct mt9m032 *sensor, struct v4l2_subdev_pad_config *cfg,
+__mt9m032_get_pad_format(struct mt9m032 *sensor,
+			 struct v4l2_subdev_state *sd_state,
 			 enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&sensor->subdev, cfg, 0);
+		return v4l2_subdev_get_try_format(&sensor->subdev, sd_state,
+						  0);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &sensor->format;
 	default:
@@ -374,20 +377,20 @@ __mt9m032_get_pad_format(struct mt9m032 *sensor, struct v4l2_subdev_pad_config *
 }
 
 static int mt9m032_get_pad_format(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_format *fmt)
 {
 	struct mt9m032 *sensor = to_mt9m032(subdev);
 
 	mutex_lock(&sensor->lock);
-	fmt->format = *__mt9m032_get_pad_format(sensor, cfg, fmt->which);
+	fmt->format = *__mt9m032_get_pad_format(sensor, sd_state, fmt->which);
 	mutex_unlock(&sensor->lock);
 
 	return 0;
 }
 
 static int mt9m032_set_pad_format(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_format *fmt)
 {
 	struct mt9m032 *sensor = to_mt9m032(subdev);
@@ -401,7 +404,7 @@ static int mt9m032_set_pad_format(struct v4l2_subdev *subdev,
 	}
 
 	/* Scaling is not supported, the format is thus fixed. */
-	fmt->format = *__mt9m032_get_pad_format(sensor, cfg, fmt->which);
+	fmt->format = *__mt9m032_get_pad_format(sensor, sd_state, fmt->which);
 	ret = 0;
 
 done:
@@ -410,7 +413,7 @@ done:
 }
 
 static int mt9m032_get_pad_selection(struct v4l2_subdev *subdev,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_selection *sel)
 {
 	struct mt9m032 *sensor = to_mt9m032(subdev);
@@ -419,14 +422,14 @@ static int mt9m032_get_pad_selection(struct v4l2_subdev *subdev,
 		return -EINVAL;
 
 	mutex_lock(&sensor->lock);
-	sel->r = *__mt9m032_get_pad_crop(sensor, cfg, sel->which);
+	sel->r = *__mt9m032_get_pad_crop(sensor, sd_state, sel->which);
 	mutex_unlock(&sensor->lock);
 
 	return 0;
 }
 
 static int mt9m032_set_pad_selection(struct v4l2_subdev *subdev,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_selection *sel)
 {
 	struct mt9m032 *sensor = to_mt9m032(subdev);
@@ -462,13 +465,14 @@ static int mt9m032_set_pad_selection(struct v4l2_subdev *subdev,
 	rect.height = min_t(unsigned int, rect.height,
 			    MT9M032_PIXEL_ARRAY_HEIGHT - rect.top);
 
-	__crop = __mt9m032_get_pad_crop(sensor, cfg, sel->which);
+	__crop = __mt9m032_get_pad_crop(sensor, sd_state, sel->which);
 
 	if (rect.width != __crop->width || rect.height != __crop->height) {
 		/* Reset the output image size if the crop rectangle size has
 		 * been modified.
 		 */
-		format = __mt9m032_get_pad_format(sensor, cfg, sel->which);
+		format = __mt9m032_get_pad_format(sensor, sd_state,
+						  sel->which);
 		format->width = rect.width;
 		format->height = rect.height;
 	}
diff --git a/drivers/media/i2c/mt9m111.c b/drivers/media/i2c/mt9m111.c
index 0e11734f75aa3..91a44359bcd3e 100644
--- a/drivers/media/i2c/mt9m111.c
+++ b/drivers/media/i2c/mt9m111.c
@@ -449,7 +449,7 @@ static int mt9m111_reset(struct mt9m111 *mt9m111)
 }
 
 static int mt9m111_set_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -493,7 +493,7 @@ static int mt9m111_set_selection(struct v4l2_subdev *sd,
 }
 
 static int mt9m111_get_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -518,7 +518,7 @@ static int mt9m111_get_selection(struct v4l2_subdev *sd,
 }
 
 static int mt9m111_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -529,7 +529,7 @@ static int mt9m111_get_fmt(struct v4l2_subdev *sd,
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		mf = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 		format->format = *mf;
 		return 0;
 #else
@@ -624,7 +624,7 @@ static int mt9m111_set_pixfmt(struct mt9m111 *mt9m111,
 }
 
 static int mt9m111_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -678,7 +678,7 @@ static int mt9m111_set_fmt(struct v4l2_subdev *sd,
 	mf->xfer_func	= V4L2_XFER_FUNC_DEFAULT;
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *mf;
+		sd_state->pads->try_fmt = *mf;
 		return 0;
 	}
 
@@ -1100,7 +1100,7 @@ static int mt9m111_s_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int mt9m111_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(mt9m111_colour_fmts))
@@ -1119,11 +1119,11 @@ static int mt9m111_s_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static int mt9m111_init_cfg(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg)
+			    struct v4l2_subdev_state *sd_state)
 {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
 	struct v4l2_mbus_framefmt *format =
-		v4l2_subdev_get_try_format(sd, cfg, 0);
+		v4l2_subdev_get_try_format(sd, sd_state, 0);
 
 	format->width	= MT9M111_MAX_WIDTH;
 	format->height	= MT9M111_MAX_HEIGHT;
diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c
index a633b934d93e8..6eb88ef997836 100644
--- a/drivers/media/i2c/mt9p031.c
+++ b/drivers/media/i2c/mt9p031.c
@@ -470,7 +470,7 @@ static int mt9p031_s_stream(struct v4l2_subdev *subdev, int enable)
 }
 
 static int mt9p031_enum_mbus_code(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct mt9p031 *mt9p031 = to_mt9p031(subdev);
@@ -483,7 +483,7 @@ static int mt9p031_enum_mbus_code(struct v4l2_subdev *subdev,
 }
 
 static int mt9p031_enum_frame_size(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct mt9p031 *mt9p031 = to_mt9p031(subdev);
@@ -501,12 +501,14 @@ static int mt9p031_enum_frame_size(struct v4l2_subdev *subdev,
 }
 
 static struct v4l2_mbus_framefmt *
-__mt9p031_get_pad_format(struct mt9p031 *mt9p031, struct v4l2_subdev_pad_config *cfg,
+__mt9p031_get_pad_format(struct mt9p031 *mt9p031,
+			 struct v4l2_subdev_state *sd_state,
 			 unsigned int pad, u32 which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&mt9p031->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&mt9p031->subdev, sd_state,
+						  pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &mt9p031->format;
 	default:
@@ -515,12 +517,14 @@ __mt9p031_get_pad_format(struct mt9p031 *mt9p031, struct v4l2_subdev_pad_config
 }
 
 static struct v4l2_rect *
-__mt9p031_get_pad_crop(struct mt9p031 *mt9p031, struct v4l2_subdev_pad_config *cfg,
-		     unsigned int pad, u32 which)
+__mt9p031_get_pad_crop(struct mt9p031 *mt9p031,
+		       struct v4l2_subdev_state *sd_state,
+		       unsigned int pad, u32 which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_crop(&mt9p031->subdev, cfg, pad);
+		return v4l2_subdev_get_try_crop(&mt9p031->subdev, sd_state,
+						pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &mt9p031->crop;
 	default:
@@ -529,18 +533,18 @@ __mt9p031_get_pad_crop(struct mt9p031 *mt9p031, struct v4l2_subdev_pad_config *c
 }
 
 static int mt9p031_get_format(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct mt9p031 *mt9p031 = to_mt9p031(subdev);
 
-	fmt->format = *__mt9p031_get_pad_format(mt9p031, cfg, fmt->pad,
+	fmt->format = *__mt9p031_get_pad_format(mt9p031, sd_state, fmt->pad,
 						fmt->which);
 	return 0;
 }
 
 static int mt9p031_set_format(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct mt9p031 *mt9p031 = to_mt9p031(subdev);
@@ -551,7 +555,7 @@ static int mt9p031_set_format(struct v4l2_subdev *subdev,
 	unsigned int hratio;
 	unsigned int vratio;
 
-	__crop = __mt9p031_get_pad_crop(mt9p031, cfg, format->pad,
+	__crop = __mt9p031_get_pad_crop(mt9p031, sd_state, format->pad,
 					format->which);
 
 	/* Clamp the width and height to avoid dividing by zero. */
@@ -567,7 +571,7 @@ static int mt9p031_set_format(struct v4l2_subdev *subdev,
 	hratio = DIV_ROUND_CLOSEST(__crop->width, width);
 	vratio = DIV_ROUND_CLOSEST(__crop->height, height);
 
-	__format = __mt9p031_get_pad_format(mt9p031, cfg, format->pad,
+	__format = __mt9p031_get_pad_format(mt9p031, sd_state, format->pad,
 					    format->which);
 	__format->width = __crop->width / hratio;
 	__format->height = __crop->height / vratio;
@@ -578,7 +582,7 @@ static int mt9p031_set_format(struct v4l2_subdev *subdev,
 }
 
 static int mt9p031_get_selection(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct mt9p031 *mt9p031 = to_mt9p031(subdev);
@@ -586,12 +590,13 @@ static int mt9p031_get_selection(struct v4l2_subdev *subdev,
 	if (sel->target != V4L2_SEL_TGT_CROP)
 		return -EINVAL;
 
-	sel->r = *__mt9p031_get_pad_crop(mt9p031, cfg, sel->pad, sel->which);
+	sel->r = *__mt9p031_get_pad_crop(mt9p031, sd_state, sel->pad,
+					 sel->which);
 	return 0;
 }
 
 static int mt9p031_set_selection(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct mt9p031 *mt9p031 = to_mt9p031(subdev);
@@ -621,13 +626,15 @@ static int mt9p031_set_selection(struct v4l2_subdev *subdev,
 	rect.height = min_t(unsigned int, rect.height,
 			    MT9P031_PIXEL_ARRAY_HEIGHT - rect.top);
 
-	__crop = __mt9p031_get_pad_crop(mt9p031, cfg, sel->pad, sel->which);
+	__crop = __mt9p031_get_pad_crop(mt9p031, sd_state, sel->pad,
+					sel->which);
 
 	if (rect.width != __crop->width || rect.height != __crop->height) {
 		/* Reset the output image size if the crop rectangle size has
 		 * been modified.
 		 */
-		__format = __mt9p031_get_pad_format(mt9p031, cfg, sel->pad,
+		__format = __mt9p031_get_pad_format(mt9p031, sd_state,
+						    sel->pad,
 						    sel->which);
 		__format->width = rect.width;
 		__format->height = rect.height;
@@ -942,13 +949,13 @@ static int mt9p031_open(struct v4l2_subdev *subdev, struct v4l2_subdev_fh *fh)
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *crop;
 
-	crop = v4l2_subdev_get_try_crop(subdev, fh->pad, 0);
+	crop = v4l2_subdev_get_try_crop(subdev, fh->state, 0);
 	crop->left = MT9P031_COLUMN_START_DEF;
 	crop->top = MT9P031_ROW_START_DEF;
 	crop->width = MT9P031_WINDOW_WIDTH_DEF;
 	crop->height = MT9P031_WINDOW_HEIGHT_DEF;
 
-	format = v4l2_subdev_get_try_format(subdev, fh->pad, 0);
+	format = v4l2_subdev_get_try_format(subdev, fh->state, 0);
 
 	if (mt9p031->model == MT9P031_MODEL_MONOCHROME)
 		format->code = MEDIA_BUS_FMT_Y12_1X12;
diff --git a/drivers/media/i2c/mt9t001.c b/drivers/media/i2c/mt9t001.c
index 2e96ff5234b4a..b651ee4a26e87 100644
--- a/drivers/media/i2c/mt9t001.c
+++ b/drivers/media/i2c/mt9t001.c
@@ -252,12 +252,14 @@ e_power:
  */
 
 static struct v4l2_mbus_framefmt *
-__mt9t001_get_pad_format(struct mt9t001 *mt9t001, struct v4l2_subdev_pad_config *cfg,
+__mt9t001_get_pad_format(struct mt9t001 *mt9t001,
+			 struct v4l2_subdev_state *sd_state,
 			 unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&mt9t001->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&mt9t001->subdev, sd_state,
+						  pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &mt9t001->format;
 	default:
@@ -266,12 +268,14 @@ __mt9t001_get_pad_format(struct mt9t001 *mt9t001, struct v4l2_subdev_pad_config
 }
 
 static struct v4l2_rect *
-__mt9t001_get_pad_crop(struct mt9t001 *mt9t001, struct v4l2_subdev_pad_config *cfg,
+__mt9t001_get_pad_crop(struct mt9t001 *mt9t001,
+		       struct v4l2_subdev_state *sd_state,
 		       unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_crop(&mt9t001->subdev, cfg, pad);
+		return v4l2_subdev_get_try_crop(&mt9t001->subdev, sd_state,
+						pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &mt9t001->crop;
 	default:
@@ -335,7 +339,7 @@ static int mt9t001_s_stream(struct v4l2_subdev *subdev, int enable)
 }
 
 static int mt9t001_enum_mbus_code(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -346,7 +350,7 @@ static int mt9t001_enum_mbus_code(struct v4l2_subdev *subdev,
 }
 
 static int mt9t001_enum_frame_size(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= 8 || fse->code != MEDIA_BUS_FMT_SGRBG10_1X10)
@@ -361,18 +365,19 @@ static int mt9t001_enum_frame_size(struct v4l2_subdev *subdev,
 }
 
 static int mt9t001_get_format(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct mt9t001 *mt9t001 = to_mt9t001(subdev);
 
-	format->format = *__mt9t001_get_pad_format(mt9t001, cfg, format->pad,
+	format->format = *__mt9t001_get_pad_format(mt9t001, sd_state,
+						   format->pad,
 						   format->which);
 	return 0;
 }
 
 static int mt9t001_set_format(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct mt9t001 *mt9t001 = to_mt9t001(subdev);
@@ -383,7 +388,7 @@ static int mt9t001_set_format(struct v4l2_subdev *subdev,
 	unsigned int hratio;
 	unsigned int vratio;
 
-	__crop = __mt9t001_get_pad_crop(mt9t001, cfg, format->pad,
+	__crop = __mt9t001_get_pad_crop(mt9t001, sd_state, format->pad,
 					format->which);
 
 	/* Clamp the width and height to avoid dividing by zero. */
@@ -399,7 +404,7 @@ static int mt9t001_set_format(struct v4l2_subdev *subdev,
 	hratio = DIV_ROUND_CLOSEST(__crop->width, width);
 	vratio = DIV_ROUND_CLOSEST(__crop->height, height);
 
-	__format = __mt9t001_get_pad_format(mt9t001, cfg, format->pad,
+	__format = __mt9t001_get_pad_format(mt9t001, sd_state, format->pad,
 					    format->which);
 	__format->width = __crop->width / hratio;
 	__format->height = __crop->height / vratio;
@@ -410,7 +415,7 @@ static int mt9t001_set_format(struct v4l2_subdev *subdev,
 }
 
 static int mt9t001_get_selection(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct mt9t001 *mt9t001 = to_mt9t001(subdev);
@@ -418,12 +423,13 @@ static int mt9t001_get_selection(struct v4l2_subdev *subdev,
 	if (sel->target != V4L2_SEL_TGT_CROP)
 		return -EINVAL;
 
-	sel->r = *__mt9t001_get_pad_crop(mt9t001, cfg, sel->pad, sel->which);
+	sel->r = *__mt9t001_get_pad_crop(mt9t001, sd_state, sel->pad,
+					 sel->which);
 	return 0;
 }
 
 static int mt9t001_set_selection(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct mt9t001 *mt9t001 = to_mt9t001(subdev);
@@ -455,13 +461,15 @@ static int mt9t001_set_selection(struct v4l2_subdev *subdev,
 	rect.height = min_t(unsigned int, rect.height,
 			    MT9T001_PIXEL_ARRAY_HEIGHT - rect.top);
 
-	__crop = __mt9t001_get_pad_crop(mt9t001, cfg, sel->pad, sel->which);
+	__crop = __mt9t001_get_pad_crop(mt9t001, sd_state, sel->pad,
+					sel->which);
 
 	if (rect.width != __crop->width || rect.height != __crop->height) {
 		/* Reset the output image size if the crop rectangle size has
 		 * been modified.
 		 */
-		__format = __mt9t001_get_pad_format(mt9t001, cfg, sel->pad,
+		__format = __mt9t001_get_pad_format(mt9t001, sd_state,
+						    sel->pad,
 						    sel->which);
 		__format->width = rect.width;
 		__format->height = rect.height;
@@ -798,13 +806,13 @@ static int mt9t001_open(struct v4l2_subdev *subdev, struct v4l2_subdev_fh *fh)
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *crop;
 
-	crop = v4l2_subdev_get_try_crop(subdev, fh->pad, 0);
+	crop = v4l2_subdev_get_try_crop(subdev, fh->state, 0);
 	crop->left = MT9T001_COLUMN_START_DEF;
 	crop->top = MT9T001_ROW_START_DEF;
 	crop->width = MT9T001_WINDOW_WIDTH_DEF + 1;
 	crop->height = MT9T001_WINDOW_HEIGHT_DEF + 1;
 
-	format = v4l2_subdev_get_try_format(subdev, fh->pad, 0);
+	format = v4l2_subdev_get_try_format(subdev, fh->state, 0);
 	format->code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	format->width = MT9T001_WINDOW_WIDTH_DEF + 1;
 	format->height = MT9T001_WINDOW_HEIGHT_DEF + 1;
diff --git a/drivers/media/i2c/mt9t112.c b/drivers/media/i2c/mt9t112.c
index ae3c336eadf5c..8d2e3caa9b286 100644
--- a/drivers/media/i2c/mt9t112.c
+++ b/drivers/media/i2c/mt9t112.c
@@ -872,8 +872,8 @@ static int mt9t112_set_params(struct mt9t112_priv *priv,
 }
 
 static int mt9t112_get_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
-		struct v4l2_subdev_selection *sel)
+				 struct v4l2_subdev_state *sd_state,
+				 struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	struct mt9t112_priv *priv = to_mt9t112(client);
@@ -897,7 +897,7 @@ static int mt9t112_get_selection(struct v4l2_subdev *sd,
 }
 
 static int mt9t112_set_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -912,7 +912,7 @@ static int mt9t112_set_selection(struct v4l2_subdev *sd,
 }
 
 static int mt9t112_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -953,7 +953,7 @@ static int mt9t112_s_fmt(struct v4l2_subdev *sd,
 }
 
 static int mt9t112_set_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -982,13 +982,13 @@ static int mt9t112_set_fmt(struct v4l2_subdev *sd,
 
 	if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		return mt9t112_s_fmt(sd, mf);
-	cfg->try_fmt = *mf;
+	sd_state->pads->try_fmt = *mf;
 
 	return 0;
 }
 
 static int mt9t112_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
diff --git a/drivers/media/i2c/mt9v011.c b/drivers/media/i2c/mt9v011.c
index 46ef74a2ca364..7699e64e11271 100644
--- a/drivers/media/i2c/mt9v011.c
+++ b/drivers/media/i2c/mt9v011.c
@@ -327,7 +327,7 @@ static int mt9v011_reset(struct v4l2_subdev *sd, u32 val)
 }
 
 static int mt9v011_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index > 0)
@@ -338,7 +338,7 @@ static int mt9v011_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int mt9v011_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -358,7 +358,7 @@ static int mt9v011_set_fmt(struct v4l2_subdev *sd,
 
 		set_res(sd);
 	} else {
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 	}
 
 	return 0;
diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c
index 5bd3ae82992f3..4cfdd3dfbd42a 100644
--- a/drivers/media/i2c/mt9v032.c
+++ b/drivers/media/i2c/mt9v032.c
@@ -349,12 +349,14 @@ static int __mt9v032_set_power(struct mt9v032 *mt9v032, bool on)
  */
 
 static struct v4l2_mbus_framefmt *
-__mt9v032_get_pad_format(struct mt9v032 *mt9v032, struct v4l2_subdev_pad_config *cfg,
+__mt9v032_get_pad_format(struct mt9v032 *mt9v032,
+			 struct v4l2_subdev_state *sd_state,
 			 unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&mt9v032->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&mt9v032->subdev, sd_state,
+						  pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &mt9v032->format;
 	default:
@@ -363,12 +365,14 @@ __mt9v032_get_pad_format(struct mt9v032 *mt9v032, struct v4l2_subdev_pad_config
 }
 
 static struct v4l2_rect *
-__mt9v032_get_pad_crop(struct mt9v032 *mt9v032, struct v4l2_subdev_pad_config *cfg,
+__mt9v032_get_pad_crop(struct mt9v032 *mt9v032,
+		       struct v4l2_subdev_state *sd_state,
 		       unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_crop(&mt9v032->subdev, cfg, pad);
+		return v4l2_subdev_get_try_crop(&mt9v032->subdev, sd_state,
+						pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &mt9v032->crop;
 	default:
@@ -425,7 +429,7 @@ static int mt9v032_s_stream(struct v4l2_subdev *subdev, int enable)
 }
 
 static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct mt9v032 *mt9v032 = to_mt9v032(subdev);
@@ -438,7 +442,7 @@ static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev,
 }
 
 static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct mt9v032 *mt9v032 = to_mt9v032(subdev);
@@ -457,12 +461,13 @@ static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev,
 }
 
 static int mt9v032_get_format(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct mt9v032 *mt9v032 = to_mt9v032(subdev);
 
-	format->format = *__mt9v032_get_pad_format(mt9v032, cfg, format->pad,
+	format->format = *__mt9v032_get_pad_format(mt9v032, sd_state,
+						   format->pad,
 						   format->which);
 	return 0;
 }
@@ -492,7 +497,7 @@ static unsigned int mt9v032_calc_ratio(unsigned int input, unsigned int output)
 }
 
 static int mt9v032_set_format(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct mt9v032 *mt9v032 = to_mt9v032(subdev);
@@ -503,7 +508,7 @@ static int mt9v032_set_format(struct v4l2_subdev *subdev,
 	unsigned int hratio;
 	unsigned int vratio;
 
-	__crop = __mt9v032_get_pad_crop(mt9v032, cfg, format->pad,
+	__crop = __mt9v032_get_pad_crop(mt9v032, sd_state, format->pad,
 					format->which);
 
 	/* Clamp the width and height to avoid dividing by zero. */
@@ -519,7 +524,7 @@ static int mt9v032_set_format(struct v4l2_subdev *subdev,
 	hratio = mt9v032_calc_ratio(__crop->width, width);
 	vratio = mt9v032_calc_ratio(__crop->height, height);
 
-	__format = __mt9v032_get_pad_format(mt9v032, cfg, format->pad,
+	__format = __mt9v032_get_pad_format(mt9v032, sd_state, format->pad,
 					    format->which);
 	__format->width = __crop->width / hratio;
 	__format->height = __crop->height / vratio;
@@ -536,7 +541,7 @@ static int mt9v032_set_format(struct v4l2_subdev *subdev,
 }
 
 static int mt9v032_get_selection(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct mt9v032 *mt9v032 = to_mt9v032(subdev);
@@ -544,12 +549,13 @@ static int mt9v032_get_selection(struct v4l2_subdev *subdev,
 	if (sel->target != V4L2_SEL_TGT_CROP)
 		return -EINVAL;
 
-	sel->r = *__mt9v032_get_pad_crop(mt9v032, cfg, sel->pad, sel->which);
+	sel->r = *__mt9v032_get_pad_crop(mt9v032, sd_state, sel->pad,
+					 sel->which);
 	return 0;
 }
 
 static int mt9v032_set_selection(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct mt9v032 *mt9v032 = to_mt9v032(subdev);
@@ -581,13 +587,15 @@ static int mt9v032_set_selection(struct v4l2_subdev *subdev,
 	rect.height = min_t(unsigned int,
 			    rect.height, MT9V032_PIXEL_ARRAY_HEIGHT - rect.top);
 
-	__crop = __mt9v032_get_pad_crop(mt9v032, cfg, sel->pad, sel->which);
+	__crop = __mt9v032_get_pad_crop(mt9v032, sd_state, sel->pad,
+					sel->which);
 
 	if (rect.width != __crop->width || rect.height != __crop->height) {
 		/* Reset the output image size if the crop rectangle size has
 		 * been modified.
 		 */
-		__format = __mt9v032_get_pad_format(mt9v032, cfg, sel->pad,
+		__format = __mt9v032_get_pad_format(mt9v032, sd_state,
+						    sel->pad,
 						    sel->which);
 		__format->width = rect.width;
 		__format->height = rect.height;
@@ -922,13 +930,13 @@ static int mt9v032_open(struct v4l2_subdev *subdev, struct v4l2_subdev_fh *fh)
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *crop;
 
-	crop = v4l2_subdev_get_try_crop(subdev, fh->pad, 0);
+	crop = v4l2_subdev_get_try_crop(subdev, fh->state, 0);
 	crop->left = MT9V032_COLUMN_START_DEF;
 	crop->top = MT9V032_ROW_START_DEF;
 	crop->width = MT9V032_WINDOW_WIDTH_DEF;
 	crop->height = MT9V032_WINDOW_HEIGHT_DEF;
 
-	format = v4l2_subdev_get_try_format(subdev, fh->pad, 0);
+	format = v4l2_subdev_get_try_format(subdev, fh->state, 0);
 
 	if (mt9v032->model->color)
 		format->code = MEDIA_BUS_FMT_SGRBG10_1X10;
diff --git a/drivers/media/i2c/mt9v111.c b/drivers/media/i2c/mt9v111.c
index 97c7527b74eda..2dc4a0f24ce86 100644
--- a/drivers/media/i2c/mt9v111.c
+++ b/drivers/media/i2c/mt9v111.c
@@ -791,16 +791,16 @@ static int mt9v111_g_frame_interval(struct v4l2_subdev *sd,
 
 static struct v4l2_mbus_framefmt *__mt9v111_get_pad_format(
 					struct mt9v111_dev *mt9v111,
-					struct v4l2_subdev_pad_config *cfg,
+					struct v4l2_subdev_state *sd_state,
 					unsigned int pad,
 					enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
 #if IS_ENABLED(CONFIG_VIDEO_V4L2_SUBDEV_API)
-		return v4l2_subdev_get_try_format(&mt9v111->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&mt9v111->sd, sd_state, pad);
 #else
-		return &cfg->try_fmt;
+		return &sd_state->pads->try_fmt;
 #endif
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &mt9v111->fmt;
@@ -810,7 +810,7 @@ static struct v4l2_mbus_framefmt *__mt9v111_get_pad_format(
 }
 
 static int mt9v111_enum_mbus_code(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index > ARRAY_SIZE(mt9v111_formats) - 1)
@@ -822,7 +822,7 @@ static int mt9v111_enum_mbus_code(struct v4l2_subdev *subdev,
 }
 
 static int mt9v111_enum_frame_interval(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_interval_enum *fie)
 {
 	unsigned int i;
@@ -845,7 +845,7 @@ static int mt9v111_enum_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int mt9v111_enum_frame_size(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->pad || fse->index >= ARRAY_SIZE(mt9v111_frame_sizes))
@@ -860,7 +860,7 @@ static int mt9v111_enum_frame_size(struct v4l2_subdev *subdev,
 }
 
 static int mt9v111_get_format(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct mt9v111_dev *mt9v111 = sd_to_mt9v111(subdev);
@@ -869,7 +869,8 @@ static int mt9v111_get_format(struct v4l2_subdev *subdev,
 		return -EINVAL;
 
 	mutex_lock(&mt9v111->stream_mutex);
-	format->format = *__mt9v111_get_pad_format(mt9v111, cfg, format->pad,
+	format->format = *__mt9v111_get_pad_format(mt9v111, sd_state,
+						   format->pad,
 						   format->which);
 	mutex_unlock(&mt9v111->stream_mutex);
 
@@ -877,7 +878,7 @@ static int mt9v111_get_format(struct v4l2_subdev *subdev,
 }
 
 static int mt9v111_set_format(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct mt9v111_dev *mt9v111 = sd_to_mt9v111(subdev);
@@ -925,7 +926,7 @@ static int mt9v111_set_format(struct v4l2_subdev *subdev,
 	new_fmt.height = mt9v111_frame_sizes[idx].height;
 
 	/* Update the device (or pad) format if it has changed. */
-	__fmt = __mt9v111_get_pad_format(mt9v111, cfg, format->pad,
+	__fmt = __mt9v111_get_pad_format(mt9v111, sd_state, format->pad,
 					 format->which);
 
 	/* Format hasn't changed, stop here. */
@@ -954,9 +955,9 @@ done:
 }
 
 static int mt9v111_init_cfg(struct v4l2_subdev *subdev,
-			    struct v4l2_subdev_pad_config *cfg)
+			    struct v4l2_subdev_state *sd_state)
 {
-	cfg->try_fmt = mt9v111_def_fmt;
+	sd_state->pads->try_fmt = mt9v111_def_fmt;
 
 	return 0;
 }
diff --git a/drivers/media/i2c/noon010pc30.c b/drivers/media/i2c/noon010pc30.c
index 87d76a7f691a5..f3ac379ef34ad 100644
--- a/drivers/media/i2c/noon010pc30.c
+++ b/drivers/media/i2c/noon010pc30.c
@@ -488,7 +488,7 @@ unlock:
 }
 
 static int noon010_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(noon010_formats))
@@ -499,15 +499,15 @@ static int noon010_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int noon010_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct noon010_info *info = to_noon010(sd);
 	struct v4l2_mbus_framefmt *mf;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		if (cfg) {
-			mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		if (sd_state) {
+			mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 			fmt->format = *mf;
 		}
 		return 0;
@@ -539,7 +539,8 @@ static const struct noon010_format *noon010_try_fmt(struct v4l2_subdev *sd,
 	return &noon010_formats[i];
 }
 
-static int noon010_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int noon010_set_fmt(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct noon010_info *info = to_noon010(sd);
@@ -554,8 +555,8 @@ static int noon010_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 	fmt->format.field = V4L2_FIELD_NONE;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		if (cfg) {
-			mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		if (sd_state) {
+			mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 			*mf = fmt->format;
 		}
 		return 0;
@@ -637,7 +638,9 @@ static int noon010_log_status(struct v4l2_subdev *sd)
 
 static int noon010_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	struct v4l2_mbus_framefmt *mf = v4l2_subdev_get_try_format(sd, fh->pad, 0);
+	struct v4l2_mbus_framefmt *mf = v4l2_subdev_get_try_format(sd,
+								   fh->state,
+								   0);
 
 	mf->width = noon010_sizes[0].width;
 	mf->height = noon010_sizes[0].height;
diff --git a/drivers/media/i2c/ov02a10.c b/drivers/media/i2c/ov02a10.c
index a1d7314b20a96..a3ce5500d3551 100644
--- a/drivers/media/i2c/ov02a10.c
+++ b/drivers/media/i2c/ov02a10.c
@@ -295,7 +295,7 @@ static void ov02a10_fill_fmt(const struct ov02a10_mode *mode,
 }
 
 static int ov02a10_set_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct ov02a10 *ov02a10 = to_ov02a10(sd);
@@ -315,7 +315,7 @@ static int ov02a10_set_fmt(struct v4l2_subdev *sd,
 	ov02a10_fill_fmt(ov02a10->cur_mode, mbus_fmt);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		frame_fmt = v4l2_subdev_get_try_format(sd, cfg, 0);
+		frame_fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
 	else
 		frame_fmt = &ov02a10->fmt;
 
@@ -327,7 +327,7 @@ out_unlock:
 }
 
 static int ov02a10_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct ov02a10 *ov02a10 = to_ov02a10(sd);
@@ -336,7 +336,8 @@ static int ov02a10_get_fmt(struct v4l2_subdev *sd,
 	mutex_lock(&ov02a10->mutex);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		fmt->format = *v4l2_subdev_get_try_format(sd, sd_state,
+							  fmt->pad);
 	} else {
 		fmt->format = ov02a10->fmt;
 		mbus_fmt->code = ov02a10->fmt.code;
@@ -349,7 +350,7 @@ static int ov02a10_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov02a10_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct ov02a10 *ov02a10 = to_ov02a10(sd);
@@ -363,7 +364,7 @@ static int ov02a10_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov02a10_enum_frame_sizes(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= ARRAY_SIZE(supported_modes))
@@ -511,7 +512,7 @@ static int __ov02a10_stop_stream(struct ov02a10 *ov02a10)
 }
 
 static int ov02a10_entity_init_cfg(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg)
+				   struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_subdev_format fmt = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
@@ -521,7 +522,7 @@ static int ov02a10_entity_init_cfg(struct v4l2_subdev *sd,
 		}
 	};
 
-	ov02a10_set_fmt(sd, cfg, &fmt);
+	ov02a10_set_fmt(sd, sd_state, &fmt);
 
 	return 0;
 }
diff --git a/drivers/media/i2c/ov13858.c b/drivers/media/i2c/ov13858.c
index 9598c0b19603d..7fc70af53e45d 100644
--- a/drivers/media/i2c/ov13858.c
+++ b/drivers/media/i2c/ov13858.c
@@ -1150,7 +1150,7 @@ static int ov13858_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct ov13858 *ov13858 = to_ov13858(sd);
 	struct v4l2_mbus_framefmt *try_fmt = v4l2_subdev_get_try_format(sd,
-									fh->pad,
+									fh->state,
 									0);
 
 	mutex_lock(&ov13858->mutex);
@@ -1275,7 +1275,7 @@ static const struct v4l2_ctrl_ops ov13858_ctrl_ops = {
 };
 
 static int ov13858_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	/* Only one bayer order(GRBG) is supported */
@@ -1288,7 +1288,7 @@ static int ov13858_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov13858_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= ARRAY_SIZE(supported_modes))
@@ -1315,14 +1315,14 @@ static void ov13858_update_pad_format(const struct ov13858_mode *mode,
 }
 
 static int ov13858_do_get_pad_format(struct ov13858 *ov13858,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_format *fmt)
 {
 	struct v4l2_mbus_framefmt *framefmt;
 	struct v4l2_subdev *sd = &ov13858->sd;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		fmt->format = *framefmt;
 	} else {
 		ov13858_update_pad_format(ov13858->cur_mode, fmt);
@@ -1332,14 +1332,14 @@ static int ov13858_do_get_pad_format(struct ov13858 *ov13858,
 }
 
 static int ov13858_get_pad_format(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_format *fmt)
 {
 	struct ov13858 *ov13858 = to_ov13858(sd);
 	int ret;
 
 	mutex_lock(&ov13858->mutex);
-	ret = ov13858_do_get_pad_format(ov13858, cfg, fmt);
+	ret = ov13858_do_get_pad_format(ov13858, sd_state, fmt);
 	mutex_unlock(&ov13858->mutex);
 
 	return ret;
@@ -1347,7 +1347,7 @@ static int ov13858_get_pad_format(struct v4l2_subdev *sd,
 
 static int
 ov13858_set_pad_format(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *fmt)
 {
 	struct ov13858 *ov13858 = to_ov13858(sd);
@@ -1371,7 +1371,7 @@ ov13858_set_pad_format(struct v4l2_subdev *sd,
 				      fmt->format.width, fmt->format.height);
 	ov13858_update_pad_format(mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*framefmt = fmt->format;
 	} else {
 		ov13858->cur_mode = mode;
diff --git a/drivers/media/i2c/ov2640.c b/drivers/media/i2c/ov2640.c
index 4a4bd5b665a1f..4b75da55b2608 100644
--- a/drivers/media/i2c/ov2640.c
+++ b/drivers/media/i2c/ov2640.c
@@ -913,7 +913,7 @@ err:
 }
 
 static int ov2640_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -925,7 +925,7 @@ static int ov2640_get_fmt(struct v4l2_subdev *sd,
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		format->format = *mf;
 		return 0;
 #else
@@ -946,7 +946,7 @@ static int ov2640_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov2640_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -996,7 +996,7 @@ static int ov2640_set_fmt(struct v4l2_subdev *sd,
 		/* select format */
 		priv->cfmt_code = mf->code;
 	} else {
-		cfg->try_fmt = *mf;
+		sd_state->pads->try_fmt = *mf;
 	}
 out:
 	mutex_unlock(&priv->lock);
@@ -1005,11 +1005,11 @@ out:
 }
 
 static int ov2640_init_cfg(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg)
+			   struct v4l2_subdev_state *sd_state)
 {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
 	struct v4l2_mbus_framefmt *try_fmt =
-		v4l2_subdev_get_try_format(sd, cfg, 0);
+		v4l2_subdev_get_try_format(sd, sd_state, 0);
 	const struct ov2640_win_size *win =
 		ov2640_select_win(SVGA_WIDTH, SVGA_HEIGHT);
 
@@ -1026,7 +1026,7 @@ static int ov2640_init_cfg(struct v4l2_subdev *sd,
 }
 
 static int ov2640_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(ov2640_codes))
@@ -1037,7 +1037,7 @@ static int ov2640_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov2640_get_selection(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_selection *sel)
 {
 	if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE)
diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c
index befef14aa86bc..13ded5b2aa663 100644
--- a/drivers/media/i2c/ov2659.c
+++ b/drivers/media/i2c/ov2659.c
@@ -980,7 +980,7 @@ static int ov2659_init(struct v4l2_subdev *sd, u32 val)
  */
 
 static int ov2659_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -996,7 +996,7 @@ static int ov2659_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov2659_enum_frame_sizes(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -1022,7 +1022,7 @@ static int ov2659_enum_frame_sizes(struct v4l2_subdev *sd,
 }
 
 static int ov2659_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -1034,7 +1034,7 @@ static int ov2659_get_fmt(struct v4l2_subdev *sd,
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
 		struct v4l2_mbus_framefmt *mf;
 
-		mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		mutex_lock(&ov2659->lock);
 		fmt->format = *mf;
 		mutex_unlock(&ov2659->lock);
@@ -1084,7 +1084,7 @@ static void __ov2659_try_frame_size(struct v4l2_mbus_framefmt *mf,
 }
 
 static int ov2659_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -1114,7 +1114,7 @@ static int ov2659_set_fmt(struct v4l2_subdev *sd,
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*mf = fmt->format;
 #endif
 	} else {
@@ -1311,7 +1311,7 @@ static int ov2659_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format =
-				v4l2_subdev_get_try_format(sd, fh->pad, 0);
+				v4l2_subdev_get_try_format(sd, fh->state, 0);
 
 	dev_dbg(&client->dev, "%s:\n", __func__);
 
diff --git a/drivers/media/i2c/ov2680.c b/drivers/media/i2c/ov2680.c
index 178dfe985a25a..906c711f6821b 100644
--- a/drivers/media/i2c/ov2680.c
+++ b/drivers/media/i2c/ov2680.c
@@ -645,7 +645,7 @@ unlock:
 }
 
 static int ov2680_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct ov2680_dev *sensor = to_ov2680_dev(sd);
@@ -659,7 +659,7 @@ static int ov2680_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov2680_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov2680_dev *sensor = to_ov2680_dev(sd);
@@ -673,7 +673,8 @@ static int ov2680_get_fmt(struct v4l2_subdev *sd,
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		fmt = v4l2_subdev_get_try_format(&sensor->sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(&sensor->sd, sd_state,
+						 format->pad);
 #else
 		ret = -EINVAL;
 #endif
@@ -690,7 +691,7 @@ static int ov2680_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov2680_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov2680_dev *sensor = to_ov2680_dev(sd);
@@ -721,7 +722,7 @@ static int ov2680_set_fmt(struct v4l2_subdev *sd,
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		try_fmt = v4l2_subdev_get_try_format(sd, cfg, 0);
+		try_fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		format->format = *try_fmt;
 #endif
 		goto unlock;
@@ -743,22 +744,22 @@ unlock:
 }
 
 static int ov2680_init_cfg(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg)
+			   struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_subdev_format fmt = {
-		.which = cfg ? V4L2_SUBDEV_FORMAT_TRY
-				: V4L2_SUBDEV_FORMAT_ACTIVE,
+		.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY
+		: V4L2_SUBDEV_FORMAT_ACTIVE,
 		.format = {
 			.width = 800,
 			.height = 600,
 		}
 	};
 
-	return ov2680_set_fmt(sd, cfg, &fmt);
+	return ov2680_set_fmt(sd, sd_state, &fmt);
 }
 
 static int ov2680_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	int index = fse->index;
@@ -775,7 +776,7 @@ static int ov2680_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int ov2680_enum_frame_interval(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_frame_interval_enum *fie)
 {
 	struct v4l2_fract tpf;
diff --git a/drivers/media/i2c/ov2685.c b/drivers/media/i2c/ov2685.c
index 2f3836dd8eed9..b6e010ea3249b 100644
--- a/drivers/media/i2c/ov2685.c
+++ b/drivers/media/i2c/ov2685.c
@@ -328,7 +328,7 @@ static void ov2685_fill_fmt(const struct ov2685_mode *mode,
 }
 
 static int ov2685_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct ov2685 *ov2685 = to_ov2685(sd);
@@ -341,7 +341,7 @@ static int ov2685_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov2685_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct ov2685 *ov2685 = to_ov2685(sd);
@@ -353,7 +353,7 @@ static int ov2685_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov2685_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(supported_modes))
@@ -365,7 +365,7 @@ static int ov2685_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov2685_enum_frame_sizes(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	int index = fse->index;
@@ -493,7 +493,7 @@ static int ov2685_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 
 	mutex_lock(&ov2685->mutex);
 
-	try_fmt = v4l2_subdev_get_try_format(sd, fh->pad, 0);
+	try_fmt = v4l2_subdev_get_try_format(sd, fh->state, 0);
 	/* Initialize try_fmt */
 	ov2685_fill_fmt(&supported_modes[0], try_fmt);
 
diff --git a/drivers/media/i2c/ov2740.c b/drivers/media/i2c/ov2740.c
index 54779f720f9de..599369a3d192f 100644
--- a/drivers/media/i2c/ov2740.c
+++ b/drivers/media/i2c/ov2740.c
@@ -810,7 +810,7 @@ exit:
 }
 
 static int ov2740_set_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct ov2740 *ov2740 = to_ov2740(sd);
@@ -825,7 +825,7 @@ static int ov2740_set_format(struct v4l2_subdev *sd,
 	mutex_lock(&ov2740->mutex);
 	ov2740_update_pad_format(mode, &fmt->format);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format;
+		*v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) = fmt->format;
 	} else {
 		ov2740->cur_mode = mode;
 		__v4l2_ctrl_s_ctrl(ov2740->link_freq, mode->link_freq_index);
@@ -850,14 +850,15 @@ static int ov2740_set_format(struct v4l2_subdev *sd,
 }
 
 static int ov2740_get_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct ov2740 *ov2740 = to_ov2740(sd);
 
 	mutex_lock(&ov2740->mutex);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt->format = *v4l2_subdev_get_try_format(&ov2740->sd, cfg,
+		fmt->format = *v4l2_subdev_get_try_format(&ov2740->sd,
+							  sd_state,
 							  fmt->pad);
 	else
 		ov2740_update_pad_format(ov2740->cur_mode, &fmt->format);
@@ -868,7 +869,7 @@ static int ov2740_get_format(struct v4l2_subdev *sd,
 }
 
 static int ov2740_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -880,7 +881,7 @@ static int ov2740_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov2740_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= ARRAY_SIZE(supported_modes))
@@ -903,7 +904,7 @@ static int ov2740_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 
 	mutex_lock(&ov2740->mutex);
 	ov2740_update_pad_format(&supported_modes[0],
-				 v4l2_subdev_get_try_format(sd, fh->pad, 0));
+				 v4l2_subdev_get_try_format(sd, fh->state, 0));
 	mutex_unlock(&ov2740->mutex);
 
 	return 0;
diff --git a/drivers/media/i2c/ov5640.c b/drivers/media/i2c/ov5640.c
index 5b9cc71df473d..f6e1e51e0375f 100644
--- a/drivers/media/i2c/ov5640.c
+++ b/drivers/media/i2c/ov5640.c
@@ -2227,7 +2227,7 @@ find_mode:
 }
 
 static int ov5640_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov5640_dev *sensor = to_ov5640_dev(sd);
@@ -2239,7 +2239,7 @@ static int ov5640_get_fmt(struct v4l2_subdev *sd,
 	mutex_lock(&sensor->lock);
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt = v4l2_subdev_get_try_format(&sensor->sd, cfg,
+		fmt = v4l2_subdev_get_try_format(&sensor->sd, sd_state,
 						 format->pad);
 	else
 		fmt = &sensor->fmt;
@@ -2285,7 +2285,7 @@ static int ov5640_try_fmt_internal(struct v4l2_subdev *sd,
 }
 
 static int ov5640_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov5640_dev *sensor = to_ov5640_dev(sd);
@@ -2310,7 +2310,7 @@ static int ov5640_set_fmt(struct v4l2_subdev *sd,
 		goto out;
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt = v4l2_subdev_get_try_format(sd, cfg, 0);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
 	else
 		fmt = &sensor->fmt;
 
@@ -2818,7 +2818,7 @@ free_ctrls:
 }
 
 static int ov5640_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->pad != 0)
@@ -2838,7 +2838,7 @@ static int ov5640_enum_frame_size(struct v4l2_subdev *sd,
 
 static int ov5640_enum_frame_interval(
 	struct v4l2_subdev *sd,
-	struct v4l2_subdev_pad_config *cfg,
+	struct v4l2_subdev_state *sd_state,
 	struct v4l2_subdev_frame_interval_enum *fie)
 {
 	struct ov5640_dev *sensor = to_ov5640_dev(sd);
@@ -2924,7 +2924,7 @@ out:
 }
 
 static int ov5640_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad != 0)
diff --git a/drivers/media/i2c/ov5645.c b/drivers/media/i2c/ov5645.c
index a6c17d15d7547..368fa21e675ed 100644
--- a/drivers/media/i2c/ov5645.c
+++ b/drivers/media/i2c/ov5645.c
@@ -837,7 +837,7 @@ static const struct v4l2_ctrl_ops ov5645_ctrl_ops = {
 };
 
 static int ov5645_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -849,7 +849,7 @@ static int ov5645_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov5645_enum_frame_size(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->code != MEDIA_BUS_FMT_UYVY8_2X8)
@@ -868,13 +868,13 @@ static int ov5645_enum_frame_size(struct v4l2_subdev *subdev,
 
 static struct v4l2_mbus_framefmt *
 __ov5645_get_pad_format(struct ov5645 *ov5645,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			unsigned int pad,
 			enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&ov5645->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&ov5645->sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &ov5645->fmt;
 	default:
@@ -883,23 +883,25 @@ __ov5645_get_pad_format(struct ov5645 *ov5645,
 }
 
 static int ov5645_get_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *format)
 {
 	struct ov5645 *ov5645 = to_ov5645(sd);
 
-	format->format = *__ov5645_get_pad_format(ov5645, cfg, format->pad,
+	format->format = *__ov5645_get_pad_format(ov5645, sd_state,
+						  format->pad,
 						  format->which);
 	return 0;
 }
 
 static struct v4l2_rect *
-__ov5645_get_pad_crop(struct ov5645 *ov5645, struct v4l2_subdev_pad_config *cfg,
+__ov5645_get_pad_crop(struct ov5645 *ov5645,
+		      struct v4l2_subdev_state *sd_state,
 		      unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_crop(&ov5645->sd, cfg, pad);
+		return v4l2_subdev_get_try_crop(&ov5645->sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &ov5645->crop;
 	default:
@@ -908,7 +910,7 @@ __ov5645_get_pad_crop(struct ov5645 *ov5645, struct v4l2_subdev_pad_config *cfg,
 }
 
 static int ov5645_set_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *format)
 {
 	struct ov5645 *ov5645 = to_ov5645(sd);
@@ -917,8 +919,8 @@ static int ov5645_set_format(struct v4l2_subdev *sd,
 	const struct ov5645_mode_info *new_mode;
 	int ret;
 
-	__crop = __ov5645_get_pad_crop(ov5645, cfg, format->pad,
-			format->which);
+	__crop = __ov5645_get_pad_crop(ov5645, sd_state, format->pad,
+				       format->which);
 
 	new_mode = v4l2_find_nearest_size(ov5645_mode_info_data,
 			       ARRAY_SIZE(ov5645_mode_info_data),
@@ -942,8 +944,8 @@ static int ov5645_set_format(struct v4l2_subdev *sd,
 		ov5645->current_mode = new_mode;
 	}
 
-	__format = __ov5645_get_pad_format(ov5645, cfg, format->pad,
-			format->which);
+	__format = __ov5645_get_pad_format(ov5645, sd_state, format->pad,
+					   format->which);
 	__format->width = __crop->width;
 	__format->height = __crop->height;
 	__format->code = MEDIA_BUS_FMT_UYVY8_2X8;
@@ -956,21 +958,21 @@ static int ov5645_set_format(struct v4l2_subdev *sd,
 }
 
 static int ov5645_entity_init_cfg(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg)
+				  struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_subdev_format fmt = { 0 };
 
-	fmt.which = cfg ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
+	fmt.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
 	fmt.format.width = 1920;
 	fmt.format.height = 1080;
 
-	ov5645_set_format(subdev, cfg, &fmt);
+	ov5645_set_format(subdev, sd_state, &fmt);
 
 	return 0;
 }
 
 static int ov5645_get_selection(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_selection *sel)
 {
 	struct ov5645 *ov5645 = to_ov5645(sd);
@@ -978,7 +980,7 @@ static int ov5645_get_selection(struct v4l2_subdev *sd,
 	if (sel->target != V4L2_SEL_TGT_CROP)
 		return -EINVAL;
 
-	sel->r = *__ov5645_get_pad_crop(ov5645, cfg, sel->pad,
+	sel->r = *__ov5645_get_pad_crop(ov5645, sd_state, sel->pad,
 					sel->which);
 	return 0;
 }
diff --git a/drivers/media/i2c/ov5647.c b/drivers/media/i2c/ov5647.c
index 38faa74755e3c..d346d18ce629e 100644
--- a/drivers/media/i2c/ov5647.c
+++ b/drivers/media/i2c/ov5647.c
@@ -856,12 +856,13 @@ static const struct v4l2_subdev_core_ops ov5647_subdev_core_ops = {
 };
 
 static const struct v4l2_rect *
-__ov5647_get_pad_crop(struct ov5647 *ov5647, struct v4l2_subdev_pad_config *cfg,
+__ov5647_get_pad_crop(struct ov5647 *ov5647,
+		      struct v4l2_subdev_state *sd_state,
 		      unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_crop(&ov5647->sd, cfg, pad);
+		return v4l2_subdev_get_try_crop(&ov5647->sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &ov5647->mode->crop;
 	}
@@ -918,7 +919,7 @@ static const struct v4l2_subdev_video_ops ov5647_subdev_video_ops = {
 };
 
 static int ov5647_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -930,7 +931,7 @@ static int ov5647_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov5647_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	const struct v4l2_mbus_framefmt *fmt;
@@ -949,7 +950,7 @@ static int ov5647_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int ov5647_get_pad_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -959,7 +960,8 @@ static int ov5647_get_pad_fmt(struct v4l2_subdev *sd,
 	mutex_lock(&sensor->lock);
 	switch (format->which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		sensor_format = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		sensor_format = v4l2_subdev_get_try_format(sd, sd_state,
+							   format->pad);
 		break;
 	default:
 		sensor_format = &sensor->mode->format;
@@ -973,7 +975,7 @@ static int ov5647_get_pad_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov5647_set_pad_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -987,7 +989,7 @@ static int ov5647_set_pad_fmt(struct v4l2_subdev *sd,
 	/* Update the sensor mode and apply at it at streamon time. */
 	mutex_lock(&sensor->lock);
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_format(sd, cfg, format->pad) = mode->format;
+		*v4l2_subdev_get_try_format(sd, sd_state, format->pad) = mode->format;
 	} else {
 		int exposure_max, exposure_def;
 		int hblank, vblank;
@@ -1020,7 +1022,7 @@ static int ov5647_set_pad_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov5647_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	switch (sel->target) {
@@ -1028,7 +1030,7 @@ static int ov5647_get_selection(struct v4l2_subdev *sd,
 		struct ov5647 *sensor = to_sensor(sd);
 
 		mutex_lock(&sensor->lock);
-		sel->r = *__ov5647_get_pad_crop(sensor, cfg, sel->pad,
+		sel->r = *__ov5647_get_pad_crop(sensor, sd_state, sel->pad,
 						sel->which);
 		mutex_unlock(&sensor->lock);
 
@@ -1104,8 +1106,8 @@ static int ov5647_detect(struct v4l2_subdev *sd)
 static int ov5647_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct v4l2_mbus_framefmt *format =
-				v4l2_subdev_get_try_format(sd, fh->pad, 0);
-	struct v4l2_rect *crop = v4l2_subdev_get_try_crop(sd, fh->pad, 0);
+				v4l2_subdev_get_try_format(sd, fh->state, 0);
+	struct v4l2_rect *crop = v4l2_subdev_get_try_crop(sd, fh->state, 0);
 
 	crop->left = OV5647_PIXEL_ARRAY_LEFT;
 	crop->top = OV5647_PIXEL_ARRAY_TOP;
diff --git a/drivers/media/i2c/ov5648.c b/drivers/media/i2c/ov5648.c
index 07e64ff0be3ff..947d437ed0efe 100644
--- a/drivers/media/i2c/ov5648.c
+++ b/drivers/media/i2c/ov5648.c
@@ -2188,7 +2188,7 @@ static const struct v4l2_subdev_video_ops ov5648_subdev_video_ops = {
 /* Subdev Pad Operations */
 
 static int ov5648_enum_mbus_code(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *config,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code_enum)
 {
 	if (code_enum->index >= ARRAY_SIZE(ov5648_mbus_codes))
@@ -2217,7 +2217,7 @@ static void ov5648_mbus_format_fill(struct v4l2_mbus_framefmt *mbus_format,
 }
 
 static int ov5648_get_fmt(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *config,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov5648_sensor *sensor = ov5648_subdev_sensor(subdev);
@@ -2226,7 +2226,7 @@ static int ov5648_get_fmt(struct v4l2_subdev *subdev,
 	mutex_lock(&sensor->mutex);
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		*mbus_format = *v4l2_subdev_get_try_format(subdev, config,
+		*mbus_format = *v4l2_subdev_get_try_format(subdev, sd_state,
 							   format->pad);
 	else
 		ov5648_mbus_format_fill(mbus_format, sensor->state.mbus_code,
@@ -2238,7 +2238,7 @@ static int ov5648_get_fmt(struct v4l2_subdev *subdev,
 }
 
 static int ov5648_set_fmt(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *config,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov5648_sensor *sensor = ov5648_subdev_sensor(subdev);
@@ -2279,7 +2279,7 @@ static int ov5648_set_fmt(struct v4l2_subdev *subdev,
 	ov5648_mbus_format_fill(mbus_format, mbus_code, mode);
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		*v4l2_subdev_get_try_format(subdev, config, format->pad) =
+		*v4l2_subdev_get_try_format(subdev, sd_state, format->pad) =
 			*mbus_format;
 	else if (sensor->state.mode != mode ||
 		 sensor->state.mbus_code != mbus_code)
@@ -2292,7 +2292,7 @@ complete:
 }
 
 static int ov5648_enum_frame_size(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *config,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *size_enum)
 {
 	const struct ov5648_mode *mode;
@@ -2309,7 +2309,7 @@ static int ov5648_enum_frame_size(struct v4l2_subdev *subdev,
 }
 
 static int ov5648_enum_frame_interval(struct v4l2_subdev *subdev,
-				      struct v4l2_subdev_pad_config *config,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_frame_interval_enum *interval_enum)
 {
 	const struct ov5648_mode *mode = NULL;
diff --git a/drivers/media/i2c/ov5670.c b/drivers/media/i2c/ov5670.c
index 182f271f118f2..49189926afd67 100644
--- a/drivers/media/i2c/ov5670.c
+++ b/drivers/media/i2c/ov5670.c
@@ -1937,7 +1937,7 @@ static int ov5670_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct ov5670 *ov5670 = to_ov5670(sd);
 	struct v4l2_mbus_framefmt *try_fmt =
-				v4l2_subdev_get_try_format(sd, fh->pad, 0);
+				v4l2_subdev_get_try_format(sd, fh->state, 0);
 
 	mutex_lock(&ov5670->mutex);
 
@@ -2153,7 +2153,7 @@ error:
 }
 
 static int ov5670_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	/* Only one bayer order GRBG is supported */
@@ -2166,7 +2166,7 @@ static int ov5670_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov5670_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= ARRAY_SIZE(supported_modes))
@@ -2193,11 +2193,12 @@ static void ov5670_update_pad_format(const struct ov5670_mode *mode,
 }
 
 static int ov5670_do_get_pad_format(struct ov5670 *ov5670,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt->format = *v4l2_subdev_get_try_format(&ov5670->sd, cfg,
+		fmt->format = *v4l2_subdev_get_try_format(&ov5670->sd,
+							  sd_state,
 							  fmt->pad);
 	else
 		ov5670_update_pad_format(ov5670->cur_mode, fmt);
@@ -2206,21 +2207,21 @@ static int ov5670_do_get_pad_format(struct ov5670 *ov5670,
 }
 
 static int ov5670_get_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct ov5670 *ov5670 = to_ov5670(sd);
 	int ret;
 
 	mutex_lock(&ov5670->mutex);
-	ret = ov5670_do_get_pad_format(ov5670, cfg, fmt);
+	ret = ov5670_do_get_pad_format(ov5670, sd_state, fmt);
 	mutex_unlock(&ov5670->mutex);
 
 	return ret;
 }
 
 static int ov5670_set_pad_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	struct ov5670 *ov5670 = to_ov5670(sd);
@@ -2238,7 +2239,7 @@ static int ov5670_set_pad_format(struct v4l2_subdev *sd,
 				      fmt->format.width, fmt->format.height);
 	ov5670_update_pad_format(mode, fmt);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format;
+		*v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) = fmt->format;
 	} else {
 		ov5670->cur_mode = mode;
 		__v4l2_ctrl_s_ctrl(ov5670->link_freq, mode->link_freq_index);
diff --git a/drivers/media/i2c/ov5675.c b/drivers/media/i2c/ov5675.c
index e7e297a239609..da5850b7ad07f 100644
--- a/drivers/media/i2c/ov5675.c
+++ b/drivers/media/i2c/ov5675.c
@@ -923,7 +923,7 @@ static int __maybe_unused ov5675_resume(struct device *dev)
 }
 
 static int ov5675_set_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct ov5675 *ov5675 = to_ov5675(sd);
@@ -938,7 +938,7 @@ static int ov5675_set_format(struct v4l2_subdev *sd,
 	mutex_lock(&ov5675->mutex);
 	ov5675_update_pad_format(mode, &fmt->format);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format;
+		*v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) = fmt->format;
 	} else {
 		ov5675->cur_mode = mode;
 		__v4l2_ctrl_s_ctrl(ov5675->link_freq, mode->link_freq_index);
@@ -964,14 +964,15 @@ static int ov5675_set_format(struct v4l2_subdev *sd,
 }
 
 static int ov5675_get_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct ov5675 *ov5675 = to_ov5675(sd);
 
 	mutex_lock(&ov5675->mutex);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt->format = *v4l2_subdev_get_try_format(&ov5675->sd, cfg,
+		fmt->format = *v4l2_subdev_get_try_format(&ov5675->sd,
+							  sd_state,
 							  fmt->pad);
 	else
 		ov5675_update_pad_format(ov5675->cur_mode, &fmt->format);
@@ -982,7 +983,7 @@ static int ov5675_get_format(struct v4l2_subdev *sd,
 }
 
 static int ov5675_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -994,7 +995,7 @@ static int ov5675_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov5675_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= ARRAY_SIZE(supported_modes))
@@ -1017,7 +1018,7 @@ static int ov5675_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 
 	mutex_lock(&ov5675->mutex);
 	ov5675_update_pad_format(&supported_modes[0],
-				 v4l2_subdev_get_try_format(sd, fh->pad, 0));
+				 v4l2_subdev_get_try_format(sd, fh->state, 0));
 	mutex_unlock(&ov5675->mutex);
 
 	return 0;
diff --git a/drivers/media/i2c/ov5695.c b/drivers/media/i2c/ov5695.c
index 469d941813c6e..439385938a511 100644
--- a/drivers/media/i2c/ov5695.c
+++ b/drivers/media/i2c/ov5695.c
@@ -806,7 +806,7 @@ ov5695_find_best_fit(struct v4l2_subdev_format *fmt)
 }
 
 static int ov5695_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct ov5695 *ov5695 = to_ov5695(sd);
@@ -822,7 +822,7 @@ static int ov5695_set_fmt(struct v4l2_subdev *sd,
 	fmt->format.field = V4L2_FIELD_NONE;
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format;
+		*v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) = fmt->format;
 #endif
 	} else {
 		ov5695->cur_mode = mode;
@@ -841,7 +841,7 @@ static int ov5695_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov5695_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct ov5695 *ov5695 = to_ov5695(sd);
@@ -850,7 +850,8 @@ static int ov5695_get_fmt(struct v4l2_subdev *sd,
 	mutex_lock(&ov5695->mutex);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		fmt->format = *v4l2_subdev_get_try_format(sd, sd_state,
+							  fmt->pad);
 #else
 		mutex_unlock(&ov5695->mutex);
 		return -EINVAL;
@@ -867,7 +868,7 @@ static int ov5695_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov5695_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index != 0)
@@ -878,7 +879,7 @@ static int ov5695_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov5695_enum_frame_sizes(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= ARRAY_SIZE(supported_modes))
@@ -1052,7 +1053,7 @@ static int ov5695_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct ov5695 *ov5695 = to_ov5695(sd);
 	struct v4l2_mbus_framefmt *try_fmt =
-				v4l2_subdev_get_try_format(sd, fh->pad, 0);
+				v4l2_subdev_get_try_format(sd, fh->state, 0);
 	const struct ov5695_mode *def_mode = &supported_modes[0];
 
 	mutex_lock(&ov5695->mutex);
diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c
index 85dd13694bd2f..f67412150b16b 100644
--- a/drivers/media/i2c/ov6650.c
+++ b/drivers/media/i2c/ov6650.c
@@ -467,7 +467,7 @@ static int ov6650_s_power(struct v4l2_subdev *sd, int on)
 }
 
 static int ov6650_get_selection(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -492,7 +492,7 @@ static int ov6650_get_selection(struct v4l2_subdev *sd,
 }
 
 static int ov6650_set_selection(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -535,7 +535,7 @@ static int ov6650_set_selection(struct v4l2_subdev *sd,
 }
 
 static int ov6650_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -550,9 +550,9 @@ static int ov6650_get_fmt(struct v4l2_subdev *sd,
 
 	/* update media bus format code and frame size */
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf->width = cfg->try_fmt.width;
-		mf->height = cfg->try_fmt.height;
-		mf->code = cfg->try_fmt.code;
+		mf->width = sd_state->pads->try_fmt.width;
+		mf->height = sd_state->pads->try_fmt.height;
+		mf->code = sd_state->pads->try_fmt.code;
 
 	} else {
 		mf->width = priv->rect.width >> priv->half_scale;
@@ -668,7 +668,7 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf)
 }
 
 static int ov6650_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -701,15 +701,15 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd,
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 		/* store media bus format code and frame size in pad config */
-		cfg->try_fmt.width = mf->width;
-		cfg->try_fmt.height = mf->height;
-		cfg->try_fmt.code = mf->code;
+		sd_state->pads->try_fmt.width = mf->width;
+		sd_state->pads->try_fmt.height = mf->height;
+		sd_state->pads->try_fmt.code = mf->code;
 
 		/* return default mbus frame format updated with pad config */
 		*mf = ov6650_def_fmt;
-		mf->width = cfg->try_fmt.width;
-		mf->height = cfg->try_fmt.height;
-		mf->code = cfg->try_fmt.code;
+		mf->width = sd_state->pads->try_fmt.width;
+		mf->height = sd_state->pads->try_fmt.height;
+		mf->code = sd_state->pads->try_fmt.code;
 
 	} else {
 		/* apply new media bus format code and frame size */
@@ -728,7 +728,7 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov6650_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(ov6650_codes))
diff --git a/drivers/media/i2c/ov7251.c b/drivers/media/i2c/ov7251.c
index 0c10203f822b1..ebb299f207e58 100644
--- a/drivers/media/i2c/ov7251.c
+++ b/drivers/media/i2c/ov7251.c
@@ -898,7 +898,7 @@ static const struct v4l2_ctrl_ops ov7251_ctrl_ops = {
 };
 
 static int ov7251_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -910,7 +910,7 @@ static int ov7251_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov7251_enum_frame_size(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->code != MEDIA_BUS_FMT_Y10_1X10)
@@ -928,7 +928,7 @@ static int ov7251_enum_frame_size(struct v4l2_subdev *subdev,
 }
 
 static int ov7251_enum_frame_ival(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_interval_enum *fie)
 {
 	unsigned int index = fie->index;
@@ -950,13 +950,13 @@ static int ov7251_enum_frame_ival(struct v4l2_subdev *subdev,
 
 static struct v4l2_mbus_framefmt *
 __ov7251_get_pad_format(struct ov7251 *ov7251,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			unsigned int pad,
 			enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&ov7251->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&ov7251->sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &ov7251->fmt;
 	default:
@@ -965,13 +965,14 @@ __ov7251_get_pad_format(struct ov7251 *ov7251,
 }
 
 static int ov7251_get_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *format)
 {
 	struct ov7251 *ov7251 = to_ov7251(sd);
 
 	mutex_lock(&ov7251->lock);
-	format->format = *__ov7251_get_pad_format(ov7251, cfg, format->pad,
+	format->format = *__ov7251_get_pad_format(ov7251, sd_state,
+						  format->pad,
 						  format->which);
 	mutex_unlock(&ov7251->lock);
 
@@ -979,12 +980,13 @@ static int ov7251_get_format(struct v4l2_subdev *sd,
 }
 
 static struct v4l2_rect *
-__ov7251_get_pad_crop(struct ov7251 *ov7251, struct v4l2_subdev_pad_config *cfg,
+__ov7251_get_pad_crop(struct ov7251 *ov7251,
+		      struct v4l2_subdev_state *sd_state,
 		      unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_crop(&ov7251->sd, cfg, pad);
+		return v4l2_subdev_get_try_crop(&ov7251->sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &ov7251->crop;
 	default:
@@ -1027,7 +1029,7 @@ ov7251_find_mode_by_ival(struct ov7251 *ov7251, struct v4l2_fract *timeperframe)
 }
 
 static int ov7251_set_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *format)
 {
 	struct ov7251 *ov7251 = to_ov7251(sd);
@@ -1038,7 +1040,8 @@ static int ov7251_set_format(struct v4l2_subdev *sd,
 
 	mutex_lock(&ov7251->lock);
 
-	__crop = __ov7251_get_pad_crop(ov7251, cfg, format->pad, format->which);
+	__crop = __ov7251_get_pad_crop(ov7251, sd_state, format->pad,
+				       format->which);
 
 	new_mode = v4l2_find_nearest_size(ov7251_mode_info_data,
 				ARRAY_SIZE(ov7251_mode_info_data),
@@ -1077,7 +1080,7 @@ static int ov7251_set_format(struct v4l2_subdev *sd,
 		ov7251->current_mode = new_mode;
 	}
 
-	__format = __ov7251_get_pad_format(ov7251, cfg, format->pad,
+	__format = __ov7251_get_pad_format(ov7251, sd_state, format->pad,
 					   format->which);
 	__format->width = __crop->width;
 	__format->height = __crop->height;
@@ -1098,24 +1101,24 @@ exit:
 }
 
 static int ov7251_entity_init_cfg(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *cfg)
+				  struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_subdev_format fmt = {
-		.which = cfg ? V4L2_SUBDEV_FORMAT_TRY
-			     : V4L2_SUBDEV_FORMAT_ACTIVE,
+		.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY
+		: V4L2_SUBDEV_FORMAT_ACTIVE,
 		.format = {
 			.width = 640,
 			.height = 480
 		}
 	};
 
-	ov7251_set_format(subdev, cfg, &fmt);
+	ov7251_set_format(subdev, sd_state, &fmt);
 
 	return 0;
 }
 
 static int ov7251_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct ov7251 *ov7251 = to_ov7251(sd);
@@ -1124,7 +1127,7 @@ static int ov7251_get_selection(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	mutex_lock(&ov7251->lock);
-	sel->r = *__ov7251_get_pad_crop(ov7251, cfg, sel->pad,
+	sel->r = *__ov7251_get_pad_crop(ov7251, sd_state, sel->pad,
 					sel->which);
 	mutex_unlock(&ov7251->lock);
 
diff --git a/drivers/media/i2c/ov7670.c b/drivers/media/i2c/ov7670.c
index d2df811b1a400..1967464231160 100644
--- a/drivers/media/i2c/ov7670.c
+++ b/drivers/media/i2c/ov7670.c
@@ -960,7 +960,7 @@ static int ov7670_set_hw(struct v4l2_subdev *sd, int hstart, int hstop,
 
 
 static int ov7670_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= N_OV7670_FMTS)
@@ -1105,7 +1105,7 @@ static int ov7670_apply_fmt(struct v4l2_subdev *sd)
  * Set a format.
  */
 static int ov7670_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct ov7670_info *info = to_state(sd);
@@ -1122,7 +1122,8 @@ static int ov7670_set_fmt(struct v4l2_subdev *sd,
 		if (ret)
 			return ret;
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		mbus_fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		mbus_fmt = v4l2_subdev_get_try_format(sd, sd_state,
+						      format->pad);
 		*mbus_fmt = format->format;
 #endif
 		return 0;
@@ -1144,7 +1145,7 @@ static int ov7670_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov7670_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov7670_info *info = to_state(sd);
@@ -1154,7 +1155,7 @@ static int ov7670_get_fmt(struct v4l2_subdev *sd,
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		mbus_fmt = v4l2_subdev_get_try_format(sd, cfg, 0);
+		mbus_fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		format->format = *mbus_fmt;
 		return 0;
 #else
@@ -1202,7 +1203,7 @@ static int ov7670_s_frame_interval(struct v4l2_subdev *sd,
 static int ov7670_frame_rates[] = { 30, 15, 10, 5, 1 };
 
 static int ov7670_enum_frame_interval(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_frame_interval_enum *fie)
 {
 	struct ov7670_info *info = to_state(sd);
@@ -1241,7 +1242,7 @@ static int ov7670_enum_frame_interval(struct v4l2_subdev *sd,
  * Frame size enumeration
  */
 static int ov7670_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct ov7670_info *info = to_state(sd);
@@ -1724,7 +1725,7 @@ static void ov7670_get_default_format(struct v4l2_subdev *sd,
 static int ov7670_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct v4l2_mbus_framefmt *format =
-				v4l2_subdev_get_try_format(sd, fh->pad, 0);
+				v4l2_subdev_get_try_format(sd, fh->state, 0);
 
 	ov7670_get_default_format(sd, format);
 
diff --git a/drivers/media/i2c/ov772x.c b/drivers/media/i2c/ov772x.c
index d94cf2d39c2ac..78602a2f70b0f 100644
--- a/drivers/media/i2c/ov772x.c
+++ b/drivers/media/i2c/ov772x.c
@@ -1157,7 +1157,7 @@ ov772x_set_fmt_error:
 }
 
 static int ov772x_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct ov772x_priv *priv = to_ov772x(sd);
@@ -1179,7 +1179,7 @@ static int ov772x_get_selection(struct v4l2_subdev *sd,
 }
 
 static int ov772x_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -1198,7 +1198,7 @@ static int ov772x_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov772x_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov772x_priv *priv = to_ov772x(sd);
@@ -1222,7 +1222,7 @@ static int ov772x_set_fmt(struct v4l2_subdev *sd,
 	mf->xfer_func = V4L2_XFER_FUNC_DEFAULT;
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *mf;
+		sd_state->pads->try_fmt = *mf;
 		return 0;
 	}
 
@@ -1320,7 +1320,7 @@ static const struct v4l2_subdev_core_ops ov772x_subdev_core_ops = {
 };
 
 static int ov772x_enum_frame_interval(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_frame_interval_enum *fie)
 {
 	if (fie->pad || fie->index >= ARRAY_SIZE(ov772x_frame_intervals))
@@ -1338,7 +1338,7 @@ static int ov772x_enum_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int ov772x_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(ov772x_cfmts))
diff --git a/drivers/media/i2c/ov7740.c b/drivers/media/i2c/ov7740.c
index e0ff6506a5430..2539cfee85c82 100644
--- a/drivers/media/i2c/ov7740.c
+++ b/drivers/media/i2c/ov7740.c
@@ -707,7 +707,7 @@ static const struct ov7740_pixfmt ov7740_formats[] = {
 #define N_OV7740_FMTS ARRAY_SIZE(ov7740_formats)
 
 static int ov7740_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= N_OV7740_FMTS)
@@ -719,7 +719,7 @@ static int ov7740_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov7740_enum_frame_interval(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_interval_enum *fie)
 {
 	if (fie->pad)
@@ -738,7 +738,7 @@ static int ov7740_enum_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int ov7740_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->pad)
@@ -801,7 +801,7 @@ static int ov7740_try_fmt_internal(struct v4l2_subdev *sd,
 }
 
 static int ov7740_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov7740 *ov7740 = container_of(sd, struct ov7740, subdev);
@@ -823,7 +823,8 @@ static int ov7740_set_fmt(struct v4l2_subdev *sd,
 		if (ret)
 			goto error;
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		mbus_fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		mbus_fmt = v4l2_subdev_get_try_format(sd, sd_state,
+						      format->pad);
 		*mbus_fmt = format->format;
 #endif
 		mutex_unlock(&ov7740->mutex);
@@ -846,7 +847,7 @@ error:
 }
 
 static int ov7740_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov7740 *ov7740 = container_of(sd, struct ov7740, subdev);
@@ -858,7 +859,7 @@ static int ov7740_get_fmt(struct v4l2_subdev *sd,
 	mutex_lock(&ov7740->mutex);
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 #ifdef CONFIG_VIDEO_V4L2_SUBDEV_API
-		mbus_fmt = v4l2_subdev_get_try_format(sd, cfg, 0);
+		mbus_fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		format->format = *mbus_fmt;
 		ret = 0;
 #else
@@ -903,7 +904,7 @@ static int ov7740_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct ov7740 *ov7740 = container_of(sd, struct ov7740, subdev);
 	struct v4l2_mbus_framefmt *format =
-				v4l2_subdev_get_try_format(sd, fh->pad, 0);
+				v4l2_subdev_get_try_format(sd, fh->state, 0);
 
 	mutex_lock(&ov7740->mutex);
 	ov7740_get_default_format(sd, format);
diff --git a/drivers/media/i2c/ov8856.c b/drivers/media/i2c/ov8856.c
index a6bc665a64309..88e19f30d3762 100644
--- a/drivers/media/i2c/ov8856.c
+++ b/drivers/media/i2c/ov8856.c
@@ -2083,7 +2083,7 @@ static int __maybe_unused ov8856_resume(struct device *dev)
 }
 
 static int ov8856_set_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct ov8856 *ov8856 = to_ov8856(sd);
@@ -2098,7 +2098,7 @@ static int ov8856_set_format(struct v4l2_subdev *sd,
 	mutex_lock(&ov8856->mutex);
 	ov8856_update_pad_format(mode, &fmt->format);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format;
+		*v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) = fmt->format;
 	} else {
 		ov8856->cur_mode = mode;
 		__v4l2_ctrl_s_ctrl(ov8856->link_freq, mode->link_freq_index);
@@ -2129,14 +2129,15 @@ static int ov8856_set_format(struct v4l2_subdev *sd,
 }
 
 static int ov8856_get_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct ov8856 *ov8856 = to_ov8856(sd);
 
 	mutex_lock(&ov8856->mutex);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt->format = *v4l2_subdev_get_try_format(&ov8856->sd, cfg,
+		fmt->format = *v4l2_subdev_get_try_format(&ov8856->sd,
+							  sd_state,
 							  fmt->pad);
 	else
 		ov8856_update_pad_format(ov8856->cur_mode, &fmt->format);
@@ -2147,7 +2148,7 @@ static int ov8856_get_format(struct v4l2_subdev *sd,
 }
 
 static int ov8856_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	/* Only one bayer order GRBG is supported */
@@ -2160,7 +2161,7 @@ static int ov8856_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov8856_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct ov8856 *ov8856 = to_ov8856(sd);
@@ -2185,7 +2186,7 @@ static int ov8856_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 
 	mutex_lock(&ov8856->mutex);
 	ov8856_update_pad_format(&ov8856->priv_lane->supported_modes[0],
-				 v4l2_subdev_get_try_format(sd, fh->pad, 0));
+				 v4l2_subdev_get_try_format(sd, fh->state, 0));
 	mutex_unlock(&ov8856->mutex);
 
 	return 0;
diff --git a/drivers/media/i2c/ov8865.c b/drivers/media/i2c/ov8865.c
index b16c825598005..ce50f3ea87b8e 100644
--- a/drivers/media/i2c/ov8865.c
+++ b/drivers/media/i2c/ov8865.c
@@ -2542,7 +2542,7 @@ static const struct v4l2_subdev_video_ops ov8865_subdev_video_ops = {
 /* Subdev Pad Operations */
 
 static int ov8865_enum_mbus_code(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *config,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code_enum)
 {
 	if (code_enum->index >= ARRAY_SIZE(ov8865_mbus_codes))
@@ -2571,7 +2571,7 @@ static void ov8865_mbus_format_fill(struct v4l2_mbus_framefmt *mbus_format,
 }
 
 static int ov8865_get_fmt(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *config,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov8865_sensor *sensor = ov8865_subdev_sensor(subdev);
@@ -2580,7 +2580,7 @@ static int ov8865_get_fmt(struct v4l2_subdev *subdev,
 	mutex_lock(&sensor->mutex);
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		*mbus_format = *v4l2_subdev_get_try_format(subdev, config,
+		*mbus_format = *v4l2_subdev_get_try_format(subdev, sd_state,
 							   format->pad);
 	else
 		ov8865_mbus_format_fill(mbus_format, sensor->state.mbus_code,
@@ -2592,7 +2592,7 @@ static int ov8865_get_fmt(struct v4l2_subdev *subdev,
 }
 
 static int ov8865_set_fmt(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *config,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct ov8865_sensor *sensor = ov8865_subdev_sensor(subdev);
@@ -2633,7 +2633,7 @@ static int ov8865_set_fmt(struct v4l2_subdev *subdev,
 	ov8865_mbus_format_fill(mbus_format, mbus_code, mode);
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		*v4l2_subdev_get_try_format(subdev, config, format->pad) =
+		*v4l2_subdev_get_try_format(subdev, sd_state, format->pad) =
 			*mbus_format;
 	else if (sensor->state.mode != mode ||
 		 sensor->state.mbus_code != mbus_code)
@@ -2646,7 +2646,7 @@ complete:
 }
 
 static int ov8865_enum_frame_size(struct v4l2_subdev *subdev,
-				  struct v4l2_subdev_pad_config *config,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *size_enum)
 {
 	const struct ov8865_mode *mode;
@@ -2663,7 +2663,7 @@ static int ov8865_enum_frame_size(struct v4l2_subdev *subdev,
 }
 
 static int ov8865_enum_frame_interval(struct v4l2_subdev *subdev,
-				      struct v4l2_subdev_pad_config *config,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_frame_interval_enum *interval_enum)
 {
 	const struct ov8865_mode *mode = NULL;
diff --git a/drivers/media/i2c/ov9640.c b/drivers/media/i2c/ov9640.c
index d36b04c49628c..0bab8c2cf1602 100644
--- a/drivers/media/i2c/ov9640.c
+++ b/drivers/media/i2c/ov9640.c
@@ -519,7 +519,7 @@ static int ov9640_s_fmt(struct v4l2_subdev *sd,
 }
 
 static int ov9640_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -547,13 +547,13 @@ static int ov9640_set_fmt(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		return ov9640_s_fmt(sd, mf);
 
-	cfg->try_fmt = *mf;
+	sd_state->pads->try_fmt = *mf;
 
 	return 0;
 }
 
 static int ov9640_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(ov9640_codes))
@@ -565,7 +565,7 @@ static int ov9640_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov9640_get_selection(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_selection *sel)
 {
 	if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE)
diff --git a/drivers/media/i2c/ov9650.c b/drivers/media/i2c/ov9650.c
index a9f13dc2f053b..c313e11a9754e 100644
--- a/drivers/media/i2c/ov9650.c
+++ b/drivers/media/i2c/ov9650.c
@@ -1070,7 +1070,7 @@ static void ov965x_get_default_format(struct v4l2_mbus_framefmt *mf)
 }
 
 static int ov965x_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(ov965x_formats))
@@ -1081,7 +1081,7 @@ static int ov965x_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov965x_enum_frame_sizes(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	int i = ARRAY_SIZE(ov965x_formats);
@@ -1167,14 +1167,14 @@ static int ov965x_s_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int ov965x_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct ov965x *ov965x = to_ov965x(sd);
 	struct v4l2_mbus_framefmt *mf;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		fmt->format = *mf;
 		return 0;
 	}
@@ -1212,7 +1212,7 @@ static void __ov965x_try_frame_size(struct v4l2_mbus_framefmt *mf,
 }
 
 static int ov965x_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	unsigned int index = ARRAY_SIZE(ov965x_formats);
@@ -1234,8 +1234,9 @@ static int ov965x_set_fmt(struct v4l2_subdev *sd,
 	mutex_lock(&ov965x->lock);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		if (cfg) {
-			mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		if (sd_state) {
+			mf = v4l2_subdev_get_try_format(sd, sd_state,
+							fmt->pad);
 			*mf = fmt->format;
 		}
 	} else {
@@ -1364,7 +1365,7 @@ static int ov965x_s_stream(struct v4l2_subdev *sd, int on)
 static int ov965x_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct v4l2_mbus_framefmt *mf =
-		v4l2_subdev_get_try_format(sd, fh->pad, 0);
+		v4l2_subdev_get_try_format(sd, fh->state, 0);
 
 	ov965x_get_default_format(mf);
 	return 0;
diff --git a/drivers/media/i2c/ov9734.c b/drivers/media/i2c/ov9734.c
index ba156683c5338..af50c66cf5ce2 100644
--- a/drivers/media/i2c/ov9734.c
+++ b/drivers/media/i2c/ov9734.c
@@ -705,7 +705,7 @@ exit:
 }
 
 static int ov9734_set_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct ov9734 *ov9734 = to_ov9734(sd);
@@ -720,7 +720,7 @@ static int ov9734_set_format(struct v4l2_subdev *sd,
 	mutex_lock(&ov9734->mutex);
 	ov9734_update_pad_format(mode, &fmt->format);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = fmt->format;
+		*v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) = fmt->format;
 	} else {
 		ov9734->cur_mode = mode;
 		__v4l2_ctrl_s_ctrl(ov9734->link_freq, mode->link_freq_index);
@@ -746,14 +746,15 @@ static int ov9734_set_format(struct v4l2_subdev *sd,
 }
 
 static int ov9734_get_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct ov9734 *ov9734 = to_ov9734(sd);
 
 	mutex_lock(&ov9734->mutex);
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt->format = *v4l2_subdev_get_try_format(&ov9734->sd, cfg,
+		fmt->format = *v4l2_subdev_get_try_format(&ov9734->sd,
+							  sd_state,
 							  fmt->pad);
 	else
 		ov9734_update_pad_format(ov9734->cur_mode, &fmt->format);
@@ -764,7 +765,7 @@ static int ov9734_get_format(struct v4l2_subdev *sd,
 }
 
 static int ov9734_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index > 0)
@@ -776,7 +777,7 @@ static int ov9734_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov9734_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index >= ARRAY_SIZE(supported_modes))
@@ -799,7 +800,7 @@ static int ov9734_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 
 	mutex_lock(&ov9734->mutex);
 	ov9734_update_pad_format(&supported_modes[0],
-				 v4l2_subdev_get_try_format(sd, fh->pad, 0));
+				 v4l2_subdev_get_try_format(sd, fh->state, 0));
 	mutex_unlock(&ov9734->mutex);
 
 	return 0;
diff --git a/drivers/media/i2c/rdacm20.c b/drivers/media/i2c/rdacm20.c
index 90eb73f0e6e9c..a4b639cf80637 100644
--- a/drivers/media/i2c/rdacm20.c
+++ b/drivers/media/i2c/rdacm20.c
@@ -403,7 +403,7 @@ static int rdacm20_s_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static int rdacm20_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index > 0)
@@ -415,7 +415,7 @@ static int rdacm20_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int rdacm20_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
diff --git a/drivers/media/i2c/rdacm21.c b/drivers/media/i2c/rdacm21.c
index 179d107f494ca..5b78d81857730 100644
--- a/drivers/media/i2c/rdacm21.c
+++ b/drivers/media/i2c/rdacm21.c
@@ -281,7 +281,7 @@ static int rdacm21_s_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static int rdacm21_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index > 0)
@@ -293,7 +293,7 @@ static int rdacm21_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int rdacm21_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
diff --git a/drivers/media/i2c/rj54n1cb0c.c b/drivers/media/i2c/rj54n1cb0c.c
index 4cc51e0018744..2e4018c269124 100644
--- a/drivers/media/i2c/rj54n1cb0c.c
+++ b/drivers/media/i2c/rj54n1cb0c.c
@@ -488,7 +488,7 @@ static int reg_write_multiple(struct i2c_client *client,
 }
 
 static int rj54n1_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(rj54n1_colour_fmts))
@@ -541,7 +541,7 @@ static int rj54n1_sensor_scale(struct v4l2_subdev *sd, s32 *in_w, s32 *in_h,
 			       s32 *out_w, s32 *out_h);
 
 static int rj54n1_set_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -578,7 +578,7 @@ static int rj54n1_set_selection(struct v4l2_subdev *sd,
 }
 
 static int rj54n1_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -603,7 +603,7 @@ static int rj54n1_get_selection(struct v4l2_subdev *sd,
 }
 
 static int rj54n1_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -973,7 +973,7 @@ static int rj54n1_reg_init(struct i2c_client *client)
 }
 
 static int rj54n1_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -1009,7 +1009,7 @@ static int rj54n1_set_fmt(struct v4l2_subdev *sd,
 			      &mf->height, 84, RJ54N1_MAX_HEIGHT, align, 0);
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *mf;
+		sd_state->pads->try_fmt = *mf;
 		return 0;
 	}
 
diff --git a/drivers/media/i2c/s5c73m3/s5c73m3-core.c b/drivers/media/i2c/s5c73m3/s5c73m3-core.c
index 71804a70bc6d7..e2b88c5e4f983 100644
--- a/drivers/media/i2c/s5c73m3/s5c73m3-core.c
+++ b/drivers/media/i2c/s5c73m3/s5c73m3-core.c
@@ -817,7 +817,7 @@ static const struct s5c73m3_frame_size *s5c73m3_find_frame_size(
 }
 
 static void s5c73m3_oif_try_format(struct s5c73m3 *state,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *fmt,
 				   const struct s5c73m3_frame_size **fs)
 {
@@ -844,8 +844,8 @@ static void s5c73m3_oif_try_format(struct s5c73m3 *state,
 			*fs = state->oif_pix_size[RES_ISP];
 		else
 			*fs = s5c73m3_find_frame_size(
-						v4l2_subdev_get_try_format(sd, cfg,
-							OIF_ISP_PAD),
+						v4l2_subdev_get_try_format(sd, sd_state,
+									   OIF_ISP_PAD),
 						RES_ISP);
 		break;
 	}
@@ -854,7 +854,7 @@ static void s5c73m3_oif_try_format(struct s5c73m3 *state,
 }
 
 static void s5c73m3_try_format(struct s5c73m3 *state,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt,
 			      const struct s5c73m3_frame_size **fs)
 {
@@ -946,7 +946,7 @@ static int s5c73m3_oif_s_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int s5c73m3_oif_enum_frame_interval(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_frame_interval_enum *fie)
 {
 	struct s5c73m3 *state = oif_sd_to_s5c73m3(sd);
@@ -984,7 +984,7 @@ static int s5c73m3_oif_get_pad_code(int pad, int index)
 }
 
 static int s5c73m3_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct s5c73m3 *state = sensor_sd_to_s5c73m3(sd);
@@ -992,7 +992,8 @@ static int s5c73m3_get_fmt(struct v4l2_subdev *sd,
 	u32 code;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		fmt->format = *v4l2_subdev_get_try_format(sd, sd_state,
+							  fmt->pad);
 		return 0;
 	}
 
@@ -1018,7 +1019,7 @@ static int s5c73m3_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int s5c73m3_oif_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct s5c73m3 *state = oif_sd_to_s5c73m3(sd);
@@ -1026,7 +1027,8 @@ static int s5c73m3_oif_get_fmt(struct v4l2_subdev *sd,
 	u32 code;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		fmt->format = *v4l2_subdev_get_try_format(sd, sd_state,
+							  fmt->pad);
 		return 0;
 	}
 
@@ -1056,7 +1058,7 @@ static int s5c73m3_oif_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int s5c73m3_set_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	const struct s5c73m3_frame_size *frame_size = NULL;
@@ -1066,10 +1068,10 @@ static int s5c73m3_set_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&state->lock);
 
-	s5c73m3_try_format(state, cfg, fmt, &frame_size);
+	s5c73m3_try_format(state, sd_state, fmt, &frame_size);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*mf = fmt->format;
 	} else {
 		switch (fmt->pad) {
@@ -1095,7 +1097,7 @@ static int s5c73m3_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int s5c73m3_oif_set_fmt(struct v4l2_subdev *sd,
-			 struct v4l2_subdev_pad_config *cfg,
+			 struct v4l2_subdev_state *sd_state,
 			 struct v4l2_subdev_format *fmt)
 {
 	const struct s5c73m3_frame_size *frame_size = NULL;
@@ -1105,13 +1107,14 @@ static int s5c73m3_oif_set_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&state->lock);
 
-	s5c73m3_oif_try_format(state, cfg, fmt, &frame_size);
+	s5c73m3_oif_try_format(state, sd_state, fmt, &frame_size);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*mf = fmt->format;
 		if (fmt->pad == OIF_ISP_PAD) {
-			mf = v4l2_subdev_get_try_format(sd, cfg, OIF_SOURCE_PAD);
+			mf = v4l2_subdev_get_try_format(sd, sd_state,
+							OIF_SOURCE_PAD);
 			mf->width = fmt->format.width;
 			mf->height = fmt->format.height;
 		}
@@ -1183,7 +1186,7 @@ static int s5c73m3_oif_set_frame_desc(struct v4l2_subdev *sd, unsigned int pad,
 }
 
 static int s5c73m3_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	static const int codes[] = {
@@ -1199,7 +1202,7 @@ static int s5c73m3_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int s5c73m3_oif_enum_mbus_code(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_mbus_code_enum *code)
 {
 	int ret;
@@ -1214,7 +1217,7 @@ static int s5c73m3_oif_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int s5c73m3_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	int idx;
@@ -1241,7 +1244,7 @@ static int s5c73m3_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int s5c73m3_oif_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct s5c73m3 *state = oif_sd_to_s5c73m3(sd);
@@ -1259,7 +1262,7 @@ static int s5c73m3_oif_enum_frame_size(struct v4l2_subdev *sd,
 			if (fse->which == V4L2_SUBDEV_FORMAT_TRY) {
 				struct v4l2_mbus_framefmt *mf;
 
-				mf = v4l2_subdev_get_try_format(sd, cfg,
+				mf = v4l2_subdev_get_try_format(sd, sd_state,
 								OIF_ISP_PAD);
 
 				w = mf->width;
@@ -1315,11 +1318,11 @@ static int s5c73m3_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct v4l2_mbus_framefmt *mf;
 
-	mf = v4l2_subdev_get_try_format(sd, fh->pad, S5C73M3_ISP_PAD);
+	mf = v4l2_subdev_get_try_format(sd, fh->state, S5C73M3_ISP_PAD);
 	s5c73m3_fill_mbus_fmt(mf, &s5c73m3_isp_resolutions[1],
 						S5C73M3_ISP_FMT);
 
-	mf = v4l2_subdev_get_try_format(sd, fh->pad, S5C73M3_JPEG_PAD);
+	mf = v4l2_subdev_get_try_format(sd, fh->state, S5C73M3_JPEG_PAD);
 	s5c73m3_fill_mbus_fmt(mf, &s5c73m3_jpeg_resolutions[1],
 					S5C73M3_JPEG_FMT);
 
@@ -1330,15 +1333,15 @@ static int s5c73m3_oif_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct v4l2_mbus_framefmt *mf;
 
-	mf = v4l2_subdev_get_try_format(sd, fh->pad, OIF_ISP_PAD);
+	mf = v4l2_subdev_get_try_format(sd, fh->state, OIF_ISP_PAD);
 	s5c73m3_fill_mbus_fmt(mf, &s5c73m3_isp_resolutions[1],
 						S5C73M3_ISP_FMT);
 
-	mf = v4l2_subdev_get_try_format(sd, fh->pad, OIF_JPEG_PAD);
+	mf = v4l2_subdev_get_try_format(sd, fh->state, OIF_JPEG_PAD);
 	s5c73m3_fill_mbus_fmt(mf, &s5c73m3_jpeg_resolutions[1],
 					S5C73M3_JPEG_FMT);
 
-	mf = v4l2_subdev_get_try_format(sd, fh->pad, OIF_SOURCE_PAD);
+	mf = v4l2_subdev_get_try_format(sd, fh->state, OIF_SOURCE_PAD);
 	s5c73m3_fill_mbus_fmt(mf, &s5c73m3_isp_resolutions[1],
 						S5C73M3_ISP_FMT);
 	return 0;
diff --git a/drivers/media/i2c/s5k4ecgx.c b/drivers/media/i2c/s5k4ecgx.c
index 4e97309a67f41..af9a305242cd0 100644
--- a/drivers/media/i2c/s5k4ecgx.c
+++ b/drivers/media/i2c/s5k4ecgx.c
@@ -525,7 +525,7 @@ static int s5k4ecgx_try_frame_size(struct v4l2_mbus_framefmt *mf,
 }
 
 static int s5k4ecgx_enum_mbus_code(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(s5k4ecgx_formats))
@@ -535,15 +535,16 @@ static int s5k4ecgx_enum_mbus_code(struct v4l2_subdev *sd,
 	return 0;
 }
 
-static int s5k4ecgx_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
-			   struct v4l2_subdev_format *fmt)
+static int s5k4ecgx_get_fmt(struct v4l2_subdev *sd,
+			    struct v4l2_subdev_state *sd_state,
+			    struct v4l2_subdev_format *fmt)
 {
 	struct s5k4ecgx *priv = to_s5k4ecgx(sd);
 	struct v4l2_mbus_framefmt *mf;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		if (cfg) {
-			mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		if (sd_state) {
+			mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 			fmt->format = *mf;
 		}
 		return 0;
@@ -575,7 +576,8 @@ static const struct s5k4ecgx_pixfmt *s5k4ecgx_try_fmt(struct v4l2_subdev *sd,
 	return &s5k4ecgx_formats[i];
 }
 
-static int s5k4ecgx_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int s5k4ecgx_set_fmt(struct v4l2_subdev *sd,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct s5k4ecgx *priv = to_s5k4ecgx(sd);
@@ -590,8 +592,8 @@ static int s5k4ecgx_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_confi
 	fmt->format.field = V4L2_FIELD_NONE;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		if (cfg) {
-			mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		if (sd_state) {
+			mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 			*mf = fmt->format;
 		}
 		return 0;
@@ -686,7 +688,9 @@ static int s5k4ecgx_registered(struct v4l2_subdev *sd)
  */
 static int s5k4ecgx_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	struct v4l2_mbus_framefmt *mf = v4l2_subdev_get_try_format(sd, fh->pad, 0);
+	struct v4l2_mbus_framefmt *mf = v4l2_subdev_get_try_format(sd,
+								   fh->state,
+								   0);
 
 	mf->width = s5k4ecgx_prev_sizes[0].size.width;
 	mf->height = s5k4ecgx_prev_sizes[0].size.height;
diff --git a/drivers/media/i2c/s5k5baf.c b/drivers/media/i2c/s5k5baf.c
index bc560817e5046..6a5dceb699a88 100644
--- a/drivers/media/i2c/s5k5baf.c
+++ b/drivers/media/i2c/s5k5baf.c
@@ -1180,7 +1180,7 @@ static int s5k5baf_s_frame_interval(struct v4l2_subdev *sd,
  * V4L2 subdev pad level and video operations
  */
 static int s5k5baf_enum_frame_interval(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_frame_interval_enum *fie)
 {
 	if (fie->index > S5K5BAF_MAX_FR_TIME - S5K5BAF_MIN_FR_TIME ||
@@ -1199,7 +1199,7 @@ static int s5k5baf_enum_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int s5k5baf_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad == PAD_CIS) {
@@ -1217,7 +1217,7 @@ static int s5k5baf_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int s5k5baf_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	int i;
@@ -1274,15 +1274,16 @@ static int s5k5baf_try_isp_format(struct v4l2_mbus_framefmt *mf)
 	return pixfmt;
 }
 
-static int s5k5baf_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
-			  struct v4l2_subdev_format *fmt)
+static int s5k5baf_get_fmt(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
+			   struct v4l2_subdev_format *fmt)
 {
 	struct s5k5baf *state = to_s5k5baf(sd);
 	const struct s5k5baf_pixfmt *pixfmt;
 	struct v4l2_mbus_framefmt *mf;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		fmt->format = *mf;
 		return 0;
 	}
@@ -1304,8 +1305,9 @@ static int s5k5baf_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 	return 0;
 }
 
-static int s5k5baf_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
-			  struct v4l2_subdev_format *fmt)
+static int s5k5baf_set_fmt(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
+			   struct v4l2_subdev_format *fmt)
 {
 	struct v4l2_mbus_framefmt *mf = &fmt->format;
 	struct s5k5baf *state = to_s5k5baf(sd);
@@ -1315,7 +1317,7 @@ static int s5k5baf_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 	mf->field = V4L2_FIELD_NONE;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_format(sd, cfg, fmt->pad) = *mf;
+		*v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) = *mf;
 		return 0;
 	}
 
@@ -1367,7 +1369,7 @@ static int s5k5baf_is_bound_target(u32 target)
 }
 
 static int s5k5baf_get_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	enum selection_rect rtype;
@@ -1387,9 +1389,11 @@ static int s5k5baf_get_selection(struct v4l2_subdev *sd,
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
 		if (rtype == R_COMPOSE)
-			sel->r = *v4l2_subdev_get_try_compose(sd, cfg, sel->pad);
+			sel->r = *v4l2_subdev_get_try_compose(sd, sd_state,
+							      sel->pad);
 		else
-			sel->r = *v4l2_subdev_get_try_crop(sd, cfg, sel->pad);
+			sel->r = *v4l2_subdev_get_try_crop(sd, sd_state,
+							   sel->pad);
 		return 0;
 	}
 
@@ -1458,7 +1462,7 @@ static bool s5k5baf_cmp_rect(const struct v4l2_rect *r1,
 }
 
 static int s5k5baf_set_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	static enum selection_rect rtype;
@@ -1479,9 +1483,12 @@ static int s5k5baf_set_selection(struct v4l2_subdev *sd,
 	if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
 		rects = (struct v4l2_rect * []) {
 				&s5k5baf_cis_rect,
-				v4l2_subdev_get_try_crop(sd, cfg, PAD_CIS),
-				v4l2_subdev_get_try_compose(sd, cfg, PAD_CIS),
-				v4l2_subdev_get_try_crop(sd, cfg, PAD_OUT)
+				v4l2_subdev_get_try_crop(sd, sd_state,
+							 PAD_CIS),
+				v4l2_subdev_get_try_compose(sd, sd_state,
+							    PAD_CIS),
+				v4l2_subdev_get_try_crop(sd, sd_state,
+							 PAD_OUT)
 			};
 		s5k5baf_set_rect_and_adjust(rects, rtype, &sel->r);
 		return 0;
@@ -1699,22 +1706,22 @@ static int s5k5baf_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
 	struct v4l2_mbus_framefmt *mf;
 
-	mf = v4l2_subdev_get_try_format(sd, fh->pad, PAD_CIS);
+	mf = v4l2_subdev_get_try_format(sd, fh->state, PAD_CIS);
 	s5k5baf_try_cis_format(mf);
 
 	if (s5k5baf_is_cis_subdev(sd))
 		return 0;
 
-	mf = v4l2_subdev_get_try_format(sd, fh->pad, PAD_OUT);
+	mf = v4l2_subdev_get_try_format(sd, fh->state, PAD_OUT);
 	mf->colorspace = s5k5baf_formats[0].colorspace;
 	mf->code = s5k5baf_formats[0].code;
 	mf->width = s5k5baf_cis_rect.width;
 	mf->height = s5k5baf_cis_rect.height;
 	mf->field = V4L2_FIELD_NONE;
 
-	*v4l2_subdev_get_try_crop(sd, fh->pad, PAD_CIS) = s5k5baf_cis_rect;
-	*v4l2_subdev_get_try_compose(sd, fh->pad, PAD_CIS) = s5k5baf_cis_rect;
-	*v4l2_subdev_get_try_crop(sd, fh->pad, PAD_OUT) = s5k5baf_cis_rect;
+	*v4l2_subdev_get_try_crop(sd, fh->state, PAD_CIS) = s5k5baf_cis_rect;
+	*v4l2_subdev_get_try_compose(sd, fh->state, PAD_CIS) = s5k5baf_cis_rect;
+	*v4l2_subdev_get_try_crop(sd, fh->state, PAD_OUT) = s5k5baf_cis_rect;
 
 	return 0;
 }
diff --git a/drivers/media/i2c/s5k6a3.c b/drivers/media/i2c/s5k6a3.c
index f26c168ef942e..b97dd6149e90f 100644
--- a/drivers/media/i2c/s5k6a3.c
+++ b/drivers/media/i2c/s5k6a3.c
@@ -99,7 +99,7 @@ static const struct v4l2_mbus_framefmt *find_sensor_format(
 }
 
 static int s5k6a3_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(s5k6a3_formats))
@@ -123,17 +123,18 @@ static void s5k6a3_try_format(struct v4l2_mbus_framefmt *mf)
 }
 
 static struct v4l2_mbus_framefmt *__s5k6a3_get_format(
-		struct s5k6a3 *sensor, struct v4l2_subdev_pad_config *cfg,
+		struct s5k6a3 *sensor, struct v4l2_subdev_state *sd_state,
 		u32 pad, enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return cfg ? v4l2_subdev_get_try_format(&sensor->subdev, cfg, pad) : NULL;
+		return sd_state ? v4l2_subdev_get_try_format(&sensor->subdev,
+							     sd_state, pad) : NULL;
 
 	return &sensor->format;
 }
 
 static int s5k6a3_set_fmt(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_format *fmt)
 {
 	struct s5k6a3 *sensor = sd_to_s5k6a3(sd);
@@ -141,7 +142,7 @@ static int s5k6a3_set_fmt(struct v4l2_subdev *sd,
 
 	s5k6a3_try_format(&fmt->format);
 
-	mf = __s5k6a3_get_format(sensor, cfg, fmt->pad, fmt->which);
+	mf = __s5k6a3_get_format(sensor, sd_state, fmt->pad, fmt->which);
 	if (mf) {
 		mutex_lock(&sensor->lock);
 		*mf = fmt->format;
@@ -151,13 +152,13 @@ static int s5k6a3_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int s5k6a3_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct s5k6a3 *sensor = sd_to_s5k6a3(sd);
 	struct v4l2_mbus_framefmt *mf;
 
-	mf = __s5k6a3_get_format(sensor, cfg, fmt->pad, fmt->which);
+	mf = __s5k6a3_get_format(sensor, sd_state, fmt->pad, fmt->which);
 
 	mutex_lock(&sensor->lock);
 	fmt->format = *mf;
@@ -173,7 +174,9 @@ static const struct v4l2_subdev_pad_ops s5k6a3_pad_ops = {
 
 static int s5k6a3_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	struct v4l2_mbus_framefmt *format = v4l2_subdev_get_try_format(sd, fh->pad, 0);
+	struct v4l2_mbus_framefmt *format = v4l2_subdev_get_try_format(sd,
+								       fh->state,
+								       0);
 
 	*format		= s5k6a3_formats[0];
 	format->width	= S5K6A3_DEFAULT_WIDTH;
diff --git a/drivers/media/i2c/s5k6aa.c b/drivers/media/i2c/s5k6aa.c
index e9be7323a22e9..105a4b7d8354b 100644
--- a/drivers/media/i2c/s5k6aa.c
+++ b/drivers/media/i2c/s5k6aa.c
@@ -997,7 +997,7 @@ static int s5k6aa_s_frame_interval(struct v4l2_subdev *sd,
  * V4L2 subdev pad level and video operations
  */
 static int s5k6aa_enum_frame_interval(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_frame_interval_enum *fie)
 {
 	struct s5k6aa *s5k6aa = to_s5k6aa(sd);
@@ -1024,7 +1024,7 @@ static int s5k6aa_enum_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int s5k6aa_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(s5k6aa_formats))
@@ -1035,7 +1035,7 @@ static int s5k6aa_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int s5k6aa_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	int i = ARRAY_SIZE(s5k6aa_formats);
@@ -1057,14 +1057,15 @@ static int s5k6aa_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static struct v4l2_rect *
-__s5k6aa_get_crop_rect(struct s5k6aa *s5k6aa, struct v4l2_subdev_pad_config *cfg,
+__s5k6aa_get_crop_rect(struct s5k6aa *s5k6aa,
+		       struct v4l2_subdev_state *sd_state,
 		       enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		return &s5k6aa->ccd_rect;
 
 	WARN_ON(which != V4L2_SUBDEV_FORMAT_TRY);
-	return v4l2_subdev_get_try_crop(&s5k6aa->sd, cfg, 0);
+	return v4l2_subdev_get_try_crop(&s5k6aa->sd, sd_state, 0);
 }
 
 static void s5k6aa_try_format(struct s5k6aa *s5k6aa,
@@ -1088,7 +1089,8 @@ static void s5k6aa_try_format(struct s5k6aa *s5k6aa,
 	mf->field	= V4L2_FIELD_NONE;
 }
 
-static int s5k6aa_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int s5k6aa_get_fmt(struct v4l2_subdev *sd,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct s5k6aa *s5k6aa = to_s5k6aa(sd);
@@ -1097,7 +1099,7 @@ static int s5k6aa_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 	memset(fmt->reserved, 0, sizeof(fmt->reserved));
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		fmt->format = *mf;
 		return 0;
 	}
@@ -1109,7 +1111,8 @@ static int s5k6aa_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 	return 0;
 }
 
-static int s5k6aa_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int s5k6aa_set_fmt(struct v4l2_subdev *sd,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct s5k6aa *s5k6aa = to_s5k6aa(sd);
@@ -1122,8 +1125,8 @@ static int s5k6aa_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 	s5k6aa_try_format(s5k6aa, &fmt->format);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
-		crop = v4l2_subdev_get_try_crop(sd, cfg, 0);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
+		crop = v4l2_subdev_get_try_crop(sd, sd_state, 0);
 	} else {
 		if (s5k6aa->streaming) {
 			ret = -EBUSY;
@@ -1163,7 +1166,7 @@ static int s5k6aa_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 }
 
 static int s5k6aa_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct s5k6aa *s5k6aa = to_s5k6aa(sd);
@@ -1175,7 +1178,7 @@ static int s5k6aa_get_selection(struct v4l2_subdev *sd,
 	memset(sel->reserved, 0, sizeof(sel->reserved));
 
 	mutex_lock(&s5k6aa->lock);
-	rect = __s5k6aa_get_crop_rect(s5k6aa, cfg, sel->which);
+	rect = __s5k6aa_get_crop_rect(s5k6aa, sd_state, sel->which);
 	sel->r = *rect;
 	mutex_unlock(&s5k6aa->lock);
 
@@ -1186,7 +1189,7 @@ static int s5k6aa_get_selection(struct v4l2_subdev *sd,
 }
 
 static int s5k6aa_set_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct s5k6aa *s5k6aa = to_s5k6aa(sd);
@@ -1198,13 +1201,13 @@ static int s5k6aa_set_selection(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	mutex_lock(&s5k6aa->lock);
-	crop_r = __s5k6aa_get_crop_rect(s5k6aa, cfg, sel->which);
+	crop_r = __s5k6aa_get_crop_rect(s5k6aa, sd_state, sel->which);
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_ACTIVE) {
 		mf = &s5k6aa->preset->mbus_fmt;
 		s5k6aa->apply_crop = 1;
 	} else {
-		mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 	}
 	v4l_bound_align_image(&sel->r.width, mf->width,
 			      S5K6AA_WIN_WIDTH_MAX, 1,
@@ -1425,8 +1428,10 @@ static int s5k6aa_initialize_ctrls(struct s5k6aa *s5k6aa)
  */
 static int s5k6aa_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 {
-	struct v4l2_mbus_framefmt *format = v4l2_subdev_get_try_format(sd, fh->pad, 0);
-	struct v4l2_rect *crop = v4l2_subdev_get_try_crop(sd, fh->pad, 0);
+	struct v4l2_mbus_framefmt *format = v4l2_subdev_get_try_format(sd,
+								       fh->state,
+								       0);
+	struct v4l2_rect *crop = v4l2_subdev_get_try_crop(sd, fh->state, 0);
 
 	format->colorspace = s5k6aa_formats[0].colorspace;
 	format->code = s5k6aa_formats[0].code;
diff --git a/drivers/media/i2c/saa6752hs.c b/drivers/media/i2c/saa6752hs.c
index 6171ced809bbc..a7f043cad149b 100644
--- a/drivers/media/i2c/saa6752hs.c
+++ b/drivers/media/i2c/saa6752hs.c
@@ -543,7 +543,7 @@ static int saa6752hs_init(struct v4l2_subdev *sd, u32 leading_null_bytes)
 }
 
 static int saa6752hs_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *f = &format->format;
@@ -563,7 +563,7 @@ static int saa6752hs_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int saa6752hs_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *f = &format->format;
@@ -595,7 +595,7 @@ static int saa6752hs_set_fmt(struct v4l2_subdev *sd,
 	f->colorspace = V4L2_COLORSPACE_SMPTE170M;
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *f;
+		sd_state->pads->try_fmt = *f;
 		return 0;
 	}
 
diff --git a/drivers/media/i2c/saa7115.c b/drivers/media/i2c/saa7115.c
index 88dc6baac6391..a958bbc2c33d4 100644
--- a/drivers/media/i2c/saa7115.c
+++ b/drivers/media/i2c/saa7115.c
@@ -1167,7 +1167,7 @@ static int saa711x_s_sliced_fmt(struct v4l2_subdev *sd, struct v4l2_sliced_vbi_f
 }
 
 static int saa711x_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
diff --git a/drivers/media/i2c/saa717x.c b/drivers/media/i2c/saa717x.c
index ba103a6a1875d..adf9053601713 100644
--- a/drivers/media/i2c/saa717x.c
+++ b/drivers/media/i2c/saa717x.c
@@ -980,7 +980,7 @@ static int saa717x_s_register(struct v4l2_subdev *sd, const struct v4l2_dbg_regi
 #endif
 
 static int saa717x_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
diff --git a/drivers/media/i2c/sr030pc30.c b/drivers/media/i2c/sr030pc30.c
index 46924024faa8c..19c0252df2f1e 100644
--- a/drivers/media/i2c/sr030pc30.c
+++ b/drivers/media/i2c/sr030pc30.c
@@ -468,7 +468,7 @@ static int sr030pc30_s_ctrl(struct v4l2_ctrl *ctrl)
 }
 
 static int sr030pc30_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (!code || code->pad ||
@@ -480,7 +480,7 @@ static int sr030pc30_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int sr030pc30_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf;
@@ -525,7 +525,7 @@ static const struct sr030pc30_format *try_fmt(struct v4l2_subdev *sd,
 
 /* Return nearest media bus frame format. */
 static int sr030pc30_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct sr030pc30_info *info = sd ? to_sr030pc30(sd) : NULL;
@@ -541,7 +541,7 @@ static int sr030pc30_set_fmt(struct v4l2_subdev *sd,
 
 	fmt = try_fmt(sd, mf);
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *mf;
+		sd_state->pads->try_fmt = *mf;
 		return 0;
 	}
 
diff --git a/drivers/media/i2c/st-mipid02.c b/drivers/media/i2c/st-mipid02.c
index 7f07ef56fbbdf..f630b88cbfaa9 100644
--- a/drivers/media/i2c/st-mipid02.c
+++ b/drivers/media/i2c/st-mipid02.c
@@ -643,7 +643,7 @@ out:
 }
 
 static int mipid02_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct mipid02_dev *bridge = to_mipid02_dev(sd);
@@ -670,7 +670,7 @@ static int mipid02_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int mipid02_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mbus_fmt = &format->format;
@@ -687,7 +687,8 @@ static int mipid02_get_fmt(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt = v4l2_subdev_get_try_format(&bridge->sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(&bridge->sd, sd_state,
+						 format->pad);
 	else
 		fmt = &bridge->fmt;
 
@@ -704,7 +705,7 @@ static int mipid02_get_fmt(struct v4l2_subdev *sd,
 }
 
 static void mipid02_set_fmt_source(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *format)
 {
 	struct mipid02_dev *bridge = to_mipid02_dev(sd);
@@ -718,11 +719,11 @@ static void mipid02_set_fmt_source(struct v4l2_subdev *sd,
 	if (format->which != V4L2_SUBDEV_FORMAT_TRY)
 		return;
 
-	*v4l2_subdev_get_try_format(sd, cfg, format->pad) = format->format;
+	*v4l2_subdev_get_try_format(sd, sd_state, format->pad) = format->format;
 }
 
 static void mipid02_set_fmt_sink(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *format)
 {
 	struct mipid02_dev *bridge = to_mipid02_dev(sd);
@@ -731,7 +732,7 @@ static void mipid02_set_fmt_sink(struct v4l2_subdev *sd,
 	format->format.code = get_fmt_code(format->format.code);
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 	else
 		fmt = &bridge->fmt;
 
@@ -739,7 +740,7 @@ static void mipid02_set_fmt_sink(struct v4l2_subdev *sd,
 }
 
 static int mipid02_set_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct mipid02_dev *bridge = to_mipid02_dev(sd);
@@ -762,9 +763,9 @@ static int mipid02_set_fmt(struct v4l2_subdev *sd,
 	}
 
 	if (format->pad == MIPID02_SOURCE)
-		mipid02_set_fmt_source(sd, cfg, format);
+		mipid02_set_fmt_source(sd, sd_state, format);
 	else
-		mipid02_set_fmt_sink(sd, cfg, format);
+		mipid02_set_fmt_sink(sd, sd_state, format);
 
 error:
 	mutex_unlock(&bridge->lock);
diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c
index f21da11caf224..3205cd8298dd8 100644
--- a/drivers/media/i2c/tc358743.c
+++ b/drivers/media/i2c/tc358743.c
@@ -1649,7 +1649,7 @@ static int tc358743_s_stream(struct v4l2_subdev *sd, int enable)
 /* --------------- PAD OPS --------------- */
 
 static int tc358743_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	switch (code->index) {
@@ -1666,7 +1666,7 @@ static int tc358743_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int tc358743_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct tc358743_state *state = to_state(sd);
@@ -1702,13 +1702,13 @@ static int tc358743_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int tc358743_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct tc358743_state *state = to_state(sd);
 
 	u32 code = format->format.code; /* is overwritten by get_fmt */
-	int ret = tc358743_get_fmt(sd, cfg, format);
+	int ret = tc358743_get_fmt(sd, sd_state, format);
 
 	format->format.code = code;
 
diff --git a/drivers/media/i2c/tda1997x.c b/drivers/media/i2c/tda1997x.c
index 89bb7e6dc7a42..91e6db847bb5a 100644
--- a/drivers/media/i2c/tda1997x.c
+++ b/drivers/media/i2c/tda1997x.c
@@ -1718,19 +1718,19 @@ static const struct v4l2_subdev_video_ops tda1997x_video_ops = {
  */
 
 static int tda1997x_init_cfg(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg)
+			     struct v4l2_subdev_state *sd_state)
 {
 	struct tda1997x_state *state = to_state(sd);
 	struct v4l2_mbus_framefmt *mf;
 
-	mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+	mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 	mf->code = state->mbus_codes[0];
 
 	return 0;
 }
 
 static int tda1997x_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct tda1997x_state *state = to_state(sd);
@@ -1762,7 +1762,7 @@ static void tda1997x_fill_format(struct tda1997x_state *state,
 }
 
 static int tda1997x_get_format(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *format)
 {
 	struct tda1997x_state *state = to_state(sd);
@@ -1775,7 +1775,7 @@ static int tda1997x_get_format(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *fmt;
 
-		fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 		format->format.code = fmt->code;
 	} else
 		format->format.code = state->mbus_code;
@@ -1784,7 +1784,7 @@ static int tda1997x_get_format(struct v4l2_subdev *sd,
 }
 
 static int tda1997x_set_format(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *format)
 {
 	struct tda1997x_state *state = to_state(sd);
@@ -1809,7 +1809,7 @@ static int tda1997x_set_format(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *fmt;
 
-		fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
+		fmt = v4l2_subdev_get_try_format(sd, sd_state, format->pad);
 		*fmt = format->format;
 	} else {
 		int ret = tda1997x_setup_format(state, format->format.code);
diff --git a/drivers/media/i2c/tvp514x.c b/drivers/media/i2c/tvp514x.c
index a7fbe5b400c2d..cee60f9450367 100644
--- a/drivers/media/i2c/tvp514x.c
+++ b/drivers/media/i2c/tvp514x.c
@@ -853,13 +853,13 @@ static const struct v4l2_ctrl_ops tvp514x_ctrl_ops = {
 /**
  * tvp514x_enum_mbus_code() - V4L2 decoder interface handler for enum_mbus_code
  * @sd: pointer to standard V4L2 sub-device structure
- * @cfg: pad configuration
+ * @sd_state: subdev state
  * @code: pointer to v4l2_subdev_mbus_code_enum structure
  *
  * Enumertaes mbus codes supported
  */
 static int tvp514x_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	u32 pad = code->pad;
@@ -880,13 +880,13 @@ static int tvp514x_enum_mbus_code(struct v4l2_subdev *sd,
 /**
  * tvp514x_get_pad_format() - V4L2 decoder interface handler for get pad format
  * @sd: pointer to standard V4L2 sub-device structure
- * @cfg: pad configuration
+ * @sd_state: subdev state
  * @format: pointer to v4l2_subdev_format structure
  *
  * Retrieves pad format which is active or tried based on requirement
  */
 static int tvp514x_get_pad_format(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_format *format)
 {
 	struct tvp514x_decoder *decoder = to_decoder(sd);
@@ -912,13 +912,13 @@ static int tvp514x_get_pad_format(struct v4l2_subdev *sd,
 /**
  * tvp514x_set_pad_format() - V4L2 decoder interface handler for set pad format
  * @sd: pointer to standard V4L2 sub-device structure
- * @cfg: pad configuration
+ * @sd_state: subdev state
  * @fmt: pointer to v4l2_subdev_format structure
  *
  * Set pad format for the output pad
  */
 static int tvp514x_set_pad_format(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_format *fmt)
 {
 	struct tvp514x_decoder *decoder = to_decoder(sd);
diff --git a/drivers/media/i2c/tvp5150.c b/drivers/media/i2c/tvp5150.c
index 374a9da75e4db..30c63552556d0 100644
--- a/drivers/media/i2c/tvp5150.c
+++ b/drivers/media/i2c/tvp5150.c
@@ -1027,7 +1027,7 @@ static void tvp5150_set_default(v4l2_std_id std, struct v4l2_rect *crop)
 
 static struct v4l2_rect *
 tvp5150_get_pad_crop(struct tvp5150 *decoder,
-		     struct v4l2_subdev_pad_config *cfg, unsigned int pad,
+		     struct v4l2_subdev_state *sd_state, unsigned int pad,
 		     enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
@@ -1035,7 +1035,7 @@ tvp5150_get_pad_crop(struct tvp5150 *decoder,
 		return &decoder->rect;
 	case V4L2_SUBDEV_FORMAT_TRY:
 #if defined(CONFIG_VIDEO_V4L2_SUBDEV_API)
-		return v4l2_subdev_get_try_crop(&decoder->sd, cfg, pad);
+		return v4l2_subdev_get_try_crop(&decoder->sd, sd_state, pad);
 #else
 		return ERR_PTR(-EINVAL);
 #endif
@@ -1045,7 +1045,7 @@ tvp5150_get_pad_crop(struct tvp5150 *decoder,
 }
 
 static int tvp5150_fill_fmt(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *f;
@@ -1104,7 +1104,7 @@ static void tvp5150_set_hw_selection(struct v4l2_subdev *sd,
 }
 
 static int tvp5150_set_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct tvp5150 *decoder = to_tvp5150(sd);
@@ -1138,7 +1138,7 @@ static int tvp5150_set_selection(struct v4l2_subdev *sd,
 	    sel->which == V4L2_SUBDEV_FORMAT_TRY)
 		return 0;
 
-	crop = tvp5150_get_pad_crop(decoder, cfg, sel->pad, sel->which);
+	crop = tvp5150_get_pad_crop(decoder, sd_state, sel->pad, sel->which);
 	if (IS_ERR(crop))
 		return PTR_ERR(crop);
 
@@ -1156,7 +1156,7 @@ static int tvp5150_set_selection(struct v4l2_subdev *sd,
 }
 
 static int tvp5150_get_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct tvp5150 *decoder = container_of(sd, struct tvp5150, sd);
@@ -1180,7 +1180,7 @@ static int tvp5150_get_selection(struct v4l2_subdev *sd,
 			sel->r.height = TVP5150_V_MAX_OTHERS;
 		return 0;
 	case V4L2_SEL_TGT_CROP:
-		crop = tvp5150_get_pad_crop(decoder, cfg, sel->pad,
+		crop = tvp5150_get_pad_crop(decoder, sd_state, sel->pad,
 					    sel->which);
 		if (IS_ERR(crop))
 			return PTR_ERR(crop);
@@ -1208,7 +1208,7 @@ static int tvp5150_get_mbus_config(struct v4l2_subdev *sd,
 			V4L2 subdev pad ops
  ****************************************************************************/
 static int tvp5150_init_cfg(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg)
+			    struct v4l2_subdev_state *sd_state)
 {
 	struct tvp5150 *decoder = to_tvp5150(sd);
 	v4l2_std_id std;
@@ -1229,7 +1229,7 @@ static int tvp5150_init_cfg(struct v4l2_subdev *sd,
 }
 
 static int tvp5150_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index)
@@ -1240,7 +1240,7 @@ static int tvp5150_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int tvp5150_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct tvp5150 *decoder = to_tvp5150(sd);
diff --git a/drivers/media/i2c/tvp7002.c b/drivers/media/i2c/tvp7002.c
index ada4ec5ef7828..2de18833b07b4 100644
--- a/drivers/media/i2c/tvp7002.c
+++ b/drivers/media/i2c/tvp7002.c
@@ -797,7 +797,8 @@ static const struct v4l2_ctrl_ops tvp7002_ctrl_ops = {
  * Enumerate supported digital video formats for pad.
  */
 static int
-tvp7002_enum_mbus_code(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+tvp7002_enum_mbus_code(struct v4l2_subdev *sd,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_mbus_code_enum *code)
 {
 	/* Check requested format index is within range */
@@ -818,7 +819,8 @@ tvp7002_enum_mbus_code(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cf
  * get video format for pad.
  */
 static int
-tvp7002_get_pad_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+tvp7002_get_pad_format(struct v4l2_subdev *sd,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *fmt)
 {
 	struct tvp7002 *tvp7002 = to_tvp7002(sd);
@@ -841,10 +843,11 @@ tvp7002_get_pad_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cf
  * set video format for pad.
  */
 static int
-tvp7002_set_pad_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+tvp7002_set_pad_format(struct v4l2_subdev *sd,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *fmt)
 {
-	return tvp7002_get_pad_format(sd, cfg, fmt);
+	return tvp7002_get_pad_format(sd, sd_state, fmt);
 }
 
 /* V4L2 core operation handlers */
diff --git a/drivers/media/i2c/tw9910.c b/drivers/media/i2c/tw9910.c
index a25a350b0ddcb..09f5b39869284 100644
--- a/drivers/media/i2c/tw9910.c
+++ b/drivers/media/i2c/tw9910.c
@@ -720,7 +720,7 @@ tw9910_set_fmt_error:
 }
 
 static int tw9910_get_selection(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -746,7 +746,7 @@ static int tw9910_get_selection(struct v4l2_subdev *sd,
 }
 
 static int tw9910_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -797,7 +797,7 @@ static int tw9910_s_fmt(struct v4l2_subdev *sd,
 }
 
 static int tw9910_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *mf = &format->format;
@@ -829,7 +829,7 @@ static int tw9910_set_fmt(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		return tw9910_s_fmt(sd, mf);
 
-	cfg->try_fmt = *mf;
+	sd_state->pads->try_fmt = *mf;
 
 	return 0;
 }
@@ -886,7 +886,7 @@ static const struct v4l2_subdev_core_ops tw9910_subdev_core_ops = {
 };
 
 static int tw9910_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index)
diff --git a/drivers/media/i2c/vs6624.c b/drivers/media/i2c/vs6624.c
index c292c92e37b97..29003dec6f2da 100644
--- a/drivers/media/i2c/vs6624.c
+++ b/drivers/media/i2c/vs6624.c
@@ -546,7 +546,7 @@ static int vs6624_s_ctrl(struct v4l2_ctrl *ctrl)
 }
 
 static int vs6624_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(vs6624_formats))
@@ -557,7 +557,7 @@ static int vs6624_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int vs6624_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -587,7 +587,7 @@ static int vs6624_set_fmt(struct v4l2_subdev *sd,
 	fmt->colorspace = vs6624_formats[index].colorspace;
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 		return 0;
 	}
 
@@ -637,7 +637,7 @@ static int vs6624_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int vs6624_get_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct vs6624 *sensor = to_vs6624(sd);
diff --git a/drivers/media/pci/cx18/cx18-av-core.c b/drivers/media/pci/cx18/cx18-av-core.c
index 11cfe35fd730b..76e5a504df8c2 100644
--- a/drivers/media/pci/cx18/cx18-av-core.c
+++ b/drivers/media/pci/cx18/cx18-av-core.c
@@ -930,7 +930,7 @@ static int cx18_av_s_ctrl(struct v4l2_ctrl *ctrl)
 }
 
 static int cx18_av_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
index ca8040d1a725b..47db0ee0fcbfa 100644
--- a/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
+++ b/drivers/media/pci/intel/ipu3/ipu3-cio2-main.c
@@ -1199,11 +1199,11 @@ static int cio2_subdev_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	};
 
 	/* Initialize try_fmt */
-	format = v4l2_subdev_get_try_format(sd, fh->pad, CIO2_PAD_SINK);
+	format = v4l2_subdev_get_try_format(sd, fh->state, CIO2_PAD_SINK);
 	*format = fmt_default;
 
 	/* same as sink */
-	format = v4l2_subdev_get_try_format(sd, fh->pad, CIO2_PAD_SOURCE);
+	format = v4l2_subdev_get_try_format(sd, fh->state, CIO2_PAD_SOURCE);
 	*format = fmt_default;
 
 	return 0;
@@ -1217,7 +1217,7 @@ static int cio2_subdev_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
  * return -EINVAL or zero on success
  */
 static int cio2_subdev_get_fmt(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt)
 {
 	struct cio2_queue *q = container_of(sd, struct cio2_queue, subdev);
@@ -1225,7 +1225,8 @@ static int cio2_subdev_get_fmt(struct v4l2_subdev *sd,
 	mutex_lock(&q->subdev_lock);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		fmt->format = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		fmt->format = *v4l2_subdev_get_try_format(sd, sd_state,
+							  fmt->pad);
 	else
 		fmt->format = q->subdev_fmt;
 
@@ -1242,7 +1243,7 @@ static int cio2_subdev_get_fmt(struct v4l2_subdev *sd,
  * return -EINVAL or zero on success
  */
 static int cio2_subdev_set_fmt(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt)
 {
 	struct cio2_queue *q = container_of(sd, struct cio2_queue, subdev);
@@ -1255,10 +1256,10 @@ static int cio2_subdev_set_fmt(struct v4l2_subdev *sd,
 	 * source always propagates from sink
 	 */
 	if (fmt->pad == CIO2_PAD_SOURCE)
-		return cio2_subdev_get_fmt(sd, cfg, fmt);
+		return cio2_subdev_get_fmt(sd, sd_state, fmt);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		mbus = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mbus = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 	else
 		mbus = &q->subdev_fmt;
 
@@ -1283,7 +1284,7 @@ static int cio2_subdev_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int cio2_subdev_enum_mbus_code(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(formats))
diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c
index 76a37fbd84587..aafbb34765b06 100644
--- a/drivers/media/pci/saa7134/saa7134-empress.c
+++ b/drivers/media/pci/saa7134/saa7134-empress.c
@@ -138,12 +138,15 @@ static int empress_try_fmt_vid_cap(struct file *file, void *priv,
 {
 	struct saa7134_dev *dev = video_drvdata(file);
 	struct v4l2_subdev_pad_config pad_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	struct v4l2_subdev_format format = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
 
 	v4l2_fill_mbus_format(&format.format, &f->fmt.pix, MEDIA_BUS_FMT_FIXED);
-	saa_call_all(dev, pad, set_fmt, &pad_cfg, &format);
+	saa_call_all(dev, pad, set_fmt, &pad_state, &format);
 	v4l2_fill_pix_format(&f->fmt.pix, &format.format);
 
 	f->fmt.pix.pixelformat  = V4L2_PIX_FMT_MPEG;
diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c
index 46c6e3e20f33f..19daa49bf604d 100644
--- a/drivers/media/platform/atmel/atmel-isc-base.c
+++ b/drivers/media/platform/atmel/atmel-isc-base.c
@@ -1095,7 +1095,7 @@ static int isc_try_configure_pipeline(struct isc_device *isc)
 }
 
 static void isc_try_fse(struct isc_device *isc,
-			struct v4l2_subdev_pad_config *pad_cfg)
+			struct v4l2_subdev_state *sd_state)
 {
 	int ret;
 	struct v4l2_subdev_frame_size_enum fse = {};
@@ -1111,17 +1111,17 @@ static void isc_try_fse(struct isc_device *isc,
 	fse.which = V4L2_SUBDEV_FORMAT_TRY;
 
 	ret = v4l2_subdev_call(isc->current_subdev->sd, pad, enum_frame_size,
-			       pad_cfg, &fse);
+			       sd_state, &fse);
 	/*
 	 * Attempt to obtain format size from subdev. If not available,
 	 * just use the maximum ISC can receive.
 	 */
 	if (ret) {
-		pad_cfg->try_crop.width = isc->max_width;
-		pad_cfg->try_crop.height = isc->max_height;
+		sd_state->pads->try_crop.width = isc->max_width;
+		sd_state->pads->try_crop.height = isc->max_height;
 	} else {
-		pad_cfg->try_crop.width = fse.max_width;
-		pad_cfg->try_crop.height = fse.max_height;
+		sd_state->pads->try_crop.width = fse.max_width;
+		sd_state->pads->try_crop.height = fse.max_height;
 	}
 }
 
@@ -1132,6 +1132,9 @@ static int isc_try_fmt(struct isc_device *isc, struct v4l2_format *f,
 	struct isc_format *sd_fmt = NULL, *direct_fmt = NULL;
 	struct v4l2_pix_format *pixfmt = &f->fmt.pix;
 	struct v4l2_subdev_pad_config pad_cfg = {};
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	struct v4l2_subdev_format format = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
@@ -1229,11 +1232,11 @@ static int isc_try_fmt(struct isc_device *isc, struct v4l2_format *f,
 		goto isc_try_fmt_err;
 
 	/* Obtain frame sizes if possible to have crop requirements ready */
-	isc_try_fse(isc, &pad_cfg);
+	isc_try_fse(isc, &pad_state);
 
 	v4l2_fill_mbus_format(&format.format, pixfmt, mbus_code);
 	ret = v4l2_subdev_call(isc->current_subdev->sd, pad, set_fmt,
-			       &pad_cfg, &format);
+			       &pad_state, &format);
 	if (ret < 0)
 		goto isc_try_fmt_subdev_err;
 
diff --git a/drivers/media/platform/atmel/atmel-isi.c b/drivers/media/platform/atmel/atmel-isi.c
index 5b1dd358f2e63..095d80c4f59e7 100644
--- a/drivers/media/platform/atmel/atmel-isi.c
+++ b/drivers/media/platform/atmel/atmel-isi.c
@@ -557,7 +557,7 @@ static const struct isi_format *find_format_by_fourcc(struct atmel_isi *isi,
 }
 
 static void isi_try_fse(struct atmel_isi *isi, const struct isi_format *isi_fmt,
-			struct v4l2_subdev_pad_config *pad_cfg)
+			struct v4l2_subdev_state *sd_state)
 {
 	int ret;
 	struct v4l2_subdev_frame_size_enum fse = {
@@ -566,17 +566,17 @@ static void isi_try_fse(struct atmel_isi *isi, const struct isi_format *isi_fmt,
 	};
 
 	ret = v4l2_subdev_call(isi->entity.subdev, pad, enum_frame_size,
-			       pad_cfg, &fse);
+			       sd_state, &fse);
 	/*
 	 * Attempt to obtain format size from subdev. If not available,
 	 * just use the maximum ISI can receive.
 	 */
 	if (ret) {
-		pad_cfg->try_crop.width = MAX_SUPPORT_WIDTH;
-		pad_cfg->try_crop.height = MAX_SUPPORT_HEIGHT;
+		sd_state->pads->try_crop.width = MAX_SUPPORT_WIDTH;
+		sd_state->pads->try_crop.height = MAX_SUPPORT_HEIGHT;
 	} else {
-		pad_cfg->try_crop.width = fse.max_width;
-		pad_cfg->try_crop.height = fse.max_height;
+		sd_state->pads->try_crop.width = fse.max_width;
+		sd_state->pads->try_crop.height = fse.max_height;
 	}
 }
 
@@ -586,6 +586,9 @@ static int isi_try_fmt(struct atmel_isi *isi, struct v4l2_format *f,
 	const struct isi_format *isi_fmt;
 	struct v4l2_pix_format *pixfmt = &f->fmt.pix;
 	struct v4l2_subdev_pad_config pad_cfg = {};
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	struct v4l2_subdev_format format = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
@@ -603,10 +606,10 @@ static int isi_try_fmt(struct atmel_isi *isi, struct v4l2_format *f,
 
 	v4l2_fill_mbus_format(&format.format, pixfmt, isi_fmt->mbus_code);
 
-	isi_try_fse(isi, isi_fmt, &pad_cfg);
+	isi_try_fse(isi, isi_fmt, &pad_state);
 
 	ret = v4l2_subdev_call(isi->entity.subdev, pad, set_fmt,
-			       &pad_cfg, &format);
+			       &pad_state, &format);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/media/platform/cadence/cdns-csi2tx.c b/drivers/media/platform/cadence/cdns-csi2tx.c
index 765ae408970a5..5a67fba73ddd8 100644
--- a/drivers/media/platform/cadence/cdns-csi2tx.c
+++ b/drivers/media/platform/cadence/cdns-csi2tx.c
@@ -156,7 +156,7 @@ static const struct csi2tx_fmt *csi2tx_get_fmt_from_mbus(u32 mbus)
 }
 
 static int csi2tx_enum_mbus_code(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad || code->index >= ARRAY_SIZE(csi2tx_formats))
@@ -169,20 +169,20 @@ static int csi2tx_enum_mbus_code(struct v4l2_subdev *subdev,
 
 static struct v4l2_mbus_framefmt *
 __csi2tx_get_pad_format(struct v4l2_subdev *subdev,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_format *fmt)
 {
 	struct csi2tx_priv *csi2tx = v4l2_subdev_to_csi2tx(subdev);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(subdev, cfg,
+		return v4l2_subdev_get_try_format(subdev, sd_state,
 						  fmt->pad);
 
 	return &csi2tx->pad_fmts[fmt->pad];
 }
 
 static int csi2tx_get_pad_format(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	const struct v4l2_mbus_framefmt *format;
@@ -191,7 +191,7 @@ static int csi2tx_get_pad_format(struct v4l2_subdev *subdev,
 	if (fmt->pad == CSI2TX_PAD_SOURCE)
 		return -EINVAL;
 
-	format = __csi2tx_get_pad_format(subdev, cfg, fmt);
+	format = __csi2tx_get_pad_format(subdev, sd_state, fmt);
 	if (!format)
 		return -EINVAL;
 
@@ -201,7 +201,7 @@ static int csi2tx_get_pad_format(struct v4l2_subdev *subdev,
 }
 
 static int csi2tx_set_pad_format(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
 	const struct v4l2_mbus_framefmt *src_format = &fmt->format;
@@ -214,7 +214,7 @@ static int csi2tx_set_pad_format(struct v4l2_subdev *subdev,
 	if (!csi2tx_get_fmt_from_mbus(fmt->format.code))
 		src_format = &fmt_default;
 
-	dst_format = __csi2tx_get_pad_format(subdev, cfg, fmt);
+	dst_format = __csi2tx_get_pad_format(subdev, sd_state, fmt);
 	if (!dst_format)
 		return -EINVAL;
 
diff --git a/drivers/media/platform/exynos4-is/fimc-capture.c b/drivers/media/platform/exynos4-is/fimc-capture.c
index 0da36443173c1..7ff4024003f4a 100644
--- a/drivers/media/platform/exynos4-is/fimc-capture.c
+++ b/drivers/media/platform/exynos4-is/fimc-capture.c
@@ -1454,7 +1454,7 @@ void fimc_sensor_notify(struct v4l2_subdev *sd, unsigned int notification,
 }
 
 static int fimc_subdev_enum_mbus_code(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct fimc_fmt *fmt;
@@ -1467,7 +1467,7 @@ static int fimc_subdev_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int fimc_subdev_get_fmt(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt)
 {
 	struct fimc_dev *fimc = v4l2_get_subdevdata(sd);
@@ -1476,7 +1476,7 @@ static int fimc_subdev_get_fmt(struct v4l2_subdev *sd,
 	struct v4l2_mbus_framefmt *mf;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		fmt->format = *mf;
 		return 0;
 	}
@@ -1508,7 +1508,7 @@ static int fimc_subdev_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int fimc_subdev_set_fmt(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt)
 {
 	struct fimc_dev *fimc = v4l2_get_subdevdata(sd);
@@ -1531,7 +1531,7 @@ static int fimc_subdev_set_fmt(struct v4l2_subdev *sd,
 	mf->colorspace = V4L2_COLORSPACE_JPEG;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*mf = fmt->format;
 		return 0;
 	}
@@ -1574,7 +1574,7 @@ static int fimc_subdev_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int fimc_subdev_get_selection(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_selection *sel)
 {
 	struct fimc_dev *fimc = v4l2_get_subdevdata(sd);
@@ -1601,10 +1601,10 @@ static int fimc_subdev_get_selection(struct v4l2_subdev *sd,
 		return 0;
 
 	case V4L2_SEL_TGT_CROP:
-		try_sel = v4l2_subdev_get_try_crop(sd, cfg, sel->pad);
+		try_sel = v4l2_subdev_get_try_crop(sd, sd_state, sel->pad);
 		break;
 	case V4L2_SEL_TGT_COMPOSE:
-		try_sel = v4l2_subdev_get_try_compose(sd, cfg, sel->pad);
+		try_sel = v4l2_subdev_get_try_compose(sd, sd_state, sel->pad);
 		f = &ctx->d_frame;
 		break;
 	default:
@@ -1630,7 +1630,7 @@ static int fimc_subdev_get_selection(struct v4l2_subdev *sd,
 }
 
 static int fimc_subdev_set_selection(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_selection *sel)
 {
 	struct fimc_dev *fimc = v4l2_get_subdevdata(sd);
@@ -1648,10 +1648,10 @@ static int fimc_subdev_set_selection(struct v4l2_subdev *sd,
 
 	switch (sel->target) {
 	case V4L2_SEL_TGT_CROP:
-		try_sel = v4l2_subdev_get_try_crop(sd, cfg, sel->pad);
+		try_sel = v4l2_subdev_get_try_crop(sd, sd_state, sel->pad);
 		break;
 	case V4L2_SEL_TGT_COMPOSE:
-		try_sel = v4l2_subdev_get_try_compose(sd, cfg, sel->pad);
+		try_sel = v4l2_subdev_get_try_compose(sd, sd_state, sel->pad);
 		f = &ctx->d_frame;
 		break;
 	default:
diff --git a/drivers/media/platform/exynos4-is/fimc-isp.c b/drivers/media/platform/exynos4-is/fimc-isp.c
index 74b49d30901ed..855235bea46dd 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp.c
+++ b/drivers/media/platform/exynos4-is/fimc-isp.c
@@ -106,7 +106,7 @@ static const struct media_entity_operations fimc_is_subdev_media_ops = {
 };
 
 static int fimc_is_subdev_enum_mbus_code(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_mbus_code_enum *code)
 {
 	const struct fimc_fmt *fmt;
@@ -119,14 +119,14 @@ static int fimc_is_subdev_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int fimc_isp_subdev_get_fmt(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *fmt)
 {
 	struct fimc_isp *isp = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *mf = &fmt->format;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*mf = *v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		*mf = *v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		return 0;
 	}
 
@@ -156,7 +156,7 @@ static int fimc_isp_subdev_get_fmt(struct v4l2_subdev *sd,
 }
 
 static void __isp_subdev_try_format(struct fimc_isp *isp,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	struct v4l2_mbus_framefmt *mf = &fmt->format;
@@ -172,8 +172,9 @@ static void __isp_subdev_try_format(struct fimc_isp *isp,
 		mf->code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	} else {
 		if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-			format = v4l2_subdev_get_try_format(&isp->subdev, cfg,
-						FIMC_ISP_SD_PAD_SINK);
+			format = v4l2_subdev_get_try_format(&isp->subdev,
+							    sd_state,
+							    FIMC_ISP_SD_PAD_SINK);
 		else
 			format = &isp->sink_fmt;
 
@@ -191,7 +192,7 @@ static void __isp_subdev_try_format(struct fimc_isp *isp,
 }
 
 static int fimc_isp_subdev_set_fmt(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *fmt)
 {
 	struct fimc_isp *isp = v4l2_get_subdevdata(sd);
@@ -203,10 +204,10 @@ static int fimc_isp_subdev_set_fmt(struct v4l2_subdev *sd,
 		 __func__, fmt->pad, mf->code, mf->width, mf->height);
 
 	mutex_lock(&isp->subdev_lock);
-	__isp_subdev_try_format(isp, cfg, fmt);
+	__isp_subdev_try_format(isp, sd_state, fmt);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*mf = fmt->format;
 
 		/* Propagate format to the source pads */
@@ -217,8 +218,10 @@ static int fimc_isp_subdev_set_fmt(struct v4l2_subdev *sd,
 			for (pad = FIMC_ISP_SD_PAD_SRC_FIFO;
 					pad < FIMC_ISP_SD_PADS_NUM; pad++) {
 				format.pad = pad;
-				__isp_subdev_try_format(isp, cfg, &format);
-				mf = v4l2_subdev_get_try_format(sd, cfg, pad);
+				__isp_subdev_try_format(isp, sd_state,
+							&format);
+				mf = v4l2_subdev_get_try_format(sd, sd_state,
+								pad);
 				*mf = format.format;
 			}
 		}
@@ -230,7 +233,8 @@ static int fimc_isp_subdev_set_fmt(struct v4l2_subdev *sd,
 				isp->sink_fmt = *mf;
 
 				format.pad = FIMC_ISP_SD_PAD_SRC_DMA;
-				__isp_subdev_try_format(isp, cfg, &format);
+				__isp_subdev_try_format(isp, sd_state,
+							&format);
 
 				isp->src_fmt = format.format;
 				__is_set_frame_size(is, &isp->src_fmt);
@@ -370,15 +374,18 @@ static int fimc_isp_subdev_open(struct v4l2_subdev *sd,
 		.field = V4L2_FIELD_NONE,
 	};
 
-	format = v4l2_subdev_get_try_format(sd, fh->pad, FIMC_ISP_SD_PAD_SINK);
+	format = v4l2_subdev_get_try_format(sd, fh->state,
+					    FIMC_ISP_SD_PAD_SINK);
 	*format = fmt;
 
-	format = v4l2_subdev_get_try_format(sd, fh->pad, FIMC_ISP_SD_PAD_SRC_FIFO);
+	format = v4l2_subdev_get_try_format(sd, fh->state,
+					    FIMC_ISP_SD_PAD_SRC_FIFO);
 	fmt.width = DEFAULT_PREVIEW_STILL_WIDTH;
 	fmt.height = DEFAULT_PREVIEW_STILL_HEIGHT;
 	*format = fmt;
 
-	format = v4l2_subdev_get_try_format(sd, fh->pad, FIMC_ISP_SD_PAD_SRC_DMA);
+	format = v4l2_subdev_get_try_format(sd, fh->state,
+					    FIMC_ISP_SD_PAD_SRC_DMA);
 	*format = fmt;
 
 	return 0;
diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c
index 4d8b18078ff37..aaa3af0493cee 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite.c
+++ b/drivers/media/platform/exynos4-is/fimc-lite.c
@@ -550,7 +550,7 @@ static const struct v4l2_file_operations fimc_lite_fops = {
  */
 
 static const struct fimc_fmt *fimc_lite_subdev_try_fmt(struct fimc_lite *fimc,
-					struct v4l2_subdev_pad_config *cfg,
+					struct v4l2_subdev_state *sd_state,
 					struct v4l2_subdev_format *format)
 {
 	struct flite_drvdata *dd = fimc->dd;
@@ -574,14 +574,16 @@ static const struct fimc_fmt *fimc_lite_subdev_try_fmt(struct fimc_lite *fimc,
 		struct v4l2_rect *rect;
 
 		if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-			sink_fmt = v4l2_subdev_get_try_format(&fimc->subdev, cfg,
-						FLITE_SD_PAD_SINK);
+			sink_fmt = v4l2_subdev_get_try_format(&fimc->subdev,
+							      sd_state,
+							      FLITE_SD_PAD_SINK);
 
 			mf->code = sink_fmt->code;
 			mf->colorspace = sink_fmt->colorspace;
 
-			rect = v4l2_subdev_get_try_crop(&fimc->subdev, cfg,
-						FLITE_SD_PAD_SINK);
+			rect = v4l2_subdev_get_try_crop(&fimc->subdev,
+							sd_state,
+							FLITE_SD_PAD_SINK);
 		} else {
 			mf->code = sink->fmt->mbus_code;
 			mf->colorspace = sink->fmt->colorspace;
@@ -1002,7 +1004,7 @@ static const struct media_entity_operations fimc_lite_subdev_media_ops = {
 };
 
 static int fimc_lite_subdev_enum_mbus_code(struct v4l2_subdev *sd,
-					   struct v4l2_subdev_pad_config *cfg,
+					   struct v4l2_subdev_state *sd_state,
 					   struct v4l2_subdev_mbus_code_enum *code)
 {
 	const struct fimc_fmt *fmt;
@@ -1016,16 +1018,16 @@ static int fimc_lite_subdev_enum_mbus_code(struct v4l2_subdev *sd,
 
 static struct v4l2_mbus_framefmt *__fimc_lite_subdev_get_try_fmt(
 		struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg, unsigned int pad)
+		struct v4l2_subdev_state *sd_state, unsigned int pad)
 {
 	if (pad != FLITE_SD_PAD_SINK)
 		pad = FLITE_SD_PAD_SOURCE_DMA;
 
-	return v4l2_subdev_get_try_format(sd, cfg, pad);
+	return v4l2_subdev_get_try_format(sd, sd_state, pad);
 }
 
 static int fimc_lite_subdev_get_fmt(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	struct fimc_lite *fimc = v4l2_get_subdevdata(sd);
@@ -1033,7 +1035,7 @@ static int fimc_lite_subdev_get_fmt(struct v4l2_subdev *sd,
 	struct flite_frame *f = &fimc->inp_frame;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = __fimc_lite_subdev_get_try_fmt(sd, cfg, fmt->pad);
+		mf = __fimc_lite_subdev_get_try_fmt(sd, sd_state, fmt->pad);
 		fmt->format = *mf;
 		return 0;
 	}
@@ -1056,7 +1058,7 @@ static int fimc_lite_subdev_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int fimc_lite_subdev_set_fmt(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	struct fimc_lite *fimc = v4l2_get_subdevdata(sd);
@@ -1078,17 +1080,18 @@ static int fimc_lite_subdev_set_fmt(struct v4l2_subdev *sd,
 		return -EBUSY;
 	}
 
-	ffmt = fimc_lite_subdev_try_fmt(fimc, cfg, fmt);
+	ffmt = fimc_lite_subdev_try_fmt(fimc, sd_state, fmt);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
 		struct v4l2_mbus_framefmt *src_fmt;
 
-		mf = __fimc_lite_subdev_get_try_fmt(sd, cfg, fmt->pad);
+		mf = __fimc_lite_subdev_get_try_fmt(sd, sd_state, fmt->pad);
 		*mf = fmt->format;
 
 		if (fmt->pad == FLITE_SD_PAD_SINK) {
 			unsigned int pad = FLITE_SD_PAD_SOURCE_DMA;
-			src_fmt = __fimc_lite_subdev_get_try_fmt(sd, cfg, pad);
+			src_fmt = __fimc_lite_subdev_get_try_fmt(sd, sd_state,
+								 pad);
 			*src_fmt = *mf;
 		}
 
@@ -1116,7 +1119,7 @@ static int fimc_lite_subdev_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int fimc_lite_subdev_get_selection(struct v4l2_subdev *sd,
-					  struct v4l2_subdev_pad_config *cfg,
+					  struct v4l2_subdev_state *sd_state,
 					  struct v4l2_subdev_selection *sel)
 {
 	struct fimc_lite *fimc = v4l2_get_subdevdata(sd);
@@ -1128,7 +1131,7 @@ static int fimc_lite_subdev_get_selection(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
-		sel->r = *v4l2_subdev_get_try_crop(sd, cfg, sel->pad);
+		sel->r = *v4l2_subdev_get_try_crop(sd, sd_state, sel->pad);
 		return 0;
 	}
 
@@ -1151,7 +1154,7 @@ static int fimc_lite_subdev_get_selection(struct v4l2_subdev *sd,
 }
 
 static int fimc_lite_subdev_set_selection(struct v4l2_subdev *sd,
-					  struct v4l2_subdev_pad_config *cfg,
+					  struct v4l2_subdev_state *sd_state,
 					  struct v4l2_subdev_selection *sel)
 {
 	struct fimc_lite *fimc = v4l2_get_subdevdata(sd);
@@ -1165,7 +1168,7 @@ static int fimc_lite_subdev_set_selection(struct v4l2_subdev *sd,
 	fimc_lite_try_crop(fimc, &sel->r);
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_crop(sd, cfg, sel->pad) = sel->r;
+		*v4l2_subdev_get_try_crop(sd, sd_state, sel->pad) = sel->r;
 	} else {
 		unsigned long flags;
 		spin_lock_irqsave(&fimc->slock, flags);
diff --git a/drivers/media/platform/exynos4-is/mipi-csis.c b/drivers/media/platform/exynos4-is/mipi-csis.c
index ebf39c8568943..32b23329b0331 100644
--- a/drivers/media/platform/exynos4-is/mipi-csis.c
+++ b/drivers/media/platform/exynos4-is/mipi-csis.c
@@ -537,7 +537,7 @@ unlock:
 }
 
 static int s5pcsis_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(s5pcsis_formats))
@@ -565,23 +565,25 @@ static struct csis_pix_format const *s5pcsis_try_format(
 }
 
 static struct v4l2_mbus_framefmt *__s5pcsis_get_format(
-		struct csis_state *state, struct v4l2_subdev_pad_config *cfg,
+		struct csis_state *state, struct v4l2_subdev_state *sd_state,
 		enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return cfg ? v4l2_subdev_get_try_format(&state->sd, cfg, 0) : NULL;
+		return sd_state ? v4l2_subdev_get_try_format(&state->sd,
+							     sd_state, 0) : NULL;
 
 	return &state->format;
 }
 
-static int s5pcsis_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int s5pcsis_set_fmt(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct csis_state *state = sd_to_csis_state(sd);
 	struct csis_pix_format const *csis_fmt;
 	struct v4l2_mbus_framefmt *mf;
 
-	mf = __s5pcsis_get_format(state, cfg, fmt->which);
+	mf = __s5pcsis_get_format(state, sd_state, fmt->which);
 
 	if (fmt->pad == CSIS_PAD_SOURCE) {
 		if (mf) {
@@ -602,13 +604,14 @@ static int s5pcsis_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 	return 0;
 }
 
-static int s5pcsis_get_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int s5pcsis_get_fmt(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct csis_state *state = sd_to_csis_state(sd);
 	struct v4l2_mbus_framefmt *mf;
 
-	mf = __s5pcsis_get_format(state, cfg, fmt->which);
+	mf = __s5pcsis_get_format(state, sd_state, fmt->which);
 	if (!mf)
 		return -EINVAL;
 
diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c
index ea87110d90738..070a0f3fc3376 100644
--- a/drivers/media/platform/marvell-ccic/mcam-core.c
+++ b/drivers/media/platform/marvell-ccic/mcam-core.c
@@ -1350,6 +1350,9 @@ static int mcam_vidioc_try_fmt_vid_cap(struct file *filp, void *priv,
 	struct mcam_format_struct *f;
 	struct v4l2_pix_format *pix = &fmt->fmt.pix;
 	struct v4l2_subdev_pad_config pad_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	struct v4l2_subdev_format format = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
@@ -1358,7 +1361,7 @@ static int mcam_vidioc_try_fmt_vid_cap(struct file *filp, void *priv,
 	f = mcam_find_format(pix->pixelformat);
 	pix->pixelformat = f->pixelformat;
 	v4l2_fill_mbus_format(&format.format, pix, f->mbus_code);
-	ret = sensor_call(cam, pad, set_fmt, &pad_cfg, &format);
+	ret = sensor_call(cam, pad, set_fmt, &pad_state, &format);
 	v4l2_fill_pix_format(pix, &format.format);
 	pix->bytesperline = pix->width * f->bpp;
 	switch (f->pixelformat) {
diff --git a/drivers/media/platform/omap3isp/ispccdc.c b/drivers/media/platform/omap3isp/ispccdc.c
index 4e8905ef362f2..108b5e9f82cb0 100644
--- a/drivers/media/platform/omap3isp/ispccdc.c
+++ b/drivers/media/platform/omap3isp/ispccdc.c
@@ -29,7 +29,8 @@
 #define CCDC_MIN_HEIGHT		32
 
 static struct v4l2_mbus_framefmt *
-__ccdc_get_format(struct isp_ccdc_device *ccdc, struct v4l2_subdev_pad_config *cfg,
+__ccdc_get_format(struct isp_ccdc_device *ccdc,
+		  struct v4l2_subdev_state *sd_state,
 		  unsigned int pad, enum v4l2_subdev_format_whence which);
 
 static const unsigned int ccdc_fmts[] = {
@@ -1936,21 +1937,25 @@ static int ccdc_set_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static struct v4l2_mbus_framefmt *
-__ccdc_get_format(struct isp_ccdc_device *ccdc, struct v4l2_subdev_pad_config *cfg,
+__ccdc_get_format(struct isp_ccdc_device *ccdc,
+		  struct v4l2_subdev_state *sd_state,
 		  unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&ccdc->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&ccdc->subdev, sd_state,
+						  pad);
 	else
 		return &ccdc->formats[pad];
 }
 
 static struct v4l2_rect *
-__ccdc_get_crop(struct isp_ccdc_device *ccdc, struct v4l2_subdev_pad_config *cfg,
+__ccdc_get_crop(struct isp_ccdc_device *ccdc,
+		struct v4l2_subdev_state *sd_state,
 		enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_crop(&ccdc->subdev, cfg, CCDC_PAD_SOURCE_OF);
+		return v4l2_subdev_get_try_crop(&ccdc->subdev, sd_state,
+						CCDC_PAD_SOURCE_OF);
 	else
 		return &ccdc->crop;
 }
@@ -1963,7 +1968,8 @@ __ccdc_get_crop(struct isp_ccdc_device *ccdc, struct v4l2_subdev_pad_config *cfg
  * @fmt: Format
  */
 static void
-ccdc_try_format(struct isp_ccdc_device *ccdc, struct v4l2_subdev_pad_config *cfg,
+ccdc_try_format(struct isp_ccdc_device *ccdc,
+		struct v4l2_subdev_state *sd_state,
 		unsigned int pad, struct v4l2_mbus_framefmt *fmt,
 		enum v4l2_subdev_format_whence which)
 {
@@ -1999,7 +2005,8 @@ ccdc_try_format(struct isp_ccdc_device *ccdc, struct v4l2_subdev_pad_config *cfg
 	case CCDC_PAD_SOURCE_OF:
 		pixelcode = fmt->code;
 		field = fmt->field;
-		*fmt = *__ccdc_get_format(ccdc, cfg, CCDC_PAD_SINK, which);
+		*fmt = *__ccdc_get_format(ccdc, sd_state, CCDC_PAD_SINK,
+					  which);
 
 		/* In SYNC mode the bridge converts YUV formats from 2X8 to
 		 * 1X16. In BT.656 no such conversion occurs. As we don't know
@@ -2024,7 +2031,7 @@ ccdc_try_format(struct isp_ccdc_device *ccdc, struct v4l2_subdev_pad_config *cfg
 		}
 
 		/* Hardcode the output size to the crop rectangle size. */
-		crop = __ccdc_get_crop(ccdc, cfg, which);
+		crop = __ccdc_get_crop(ccdc, sd_state, which);
 		fmt->width = crop->width;
 		fmt->height = crop->height;
 
@@ -2041,7 +2048,8 @@ ccdc_try_format(struct isp_ccdc_device *ccdc, struct v4l2_subdev_pad_config *cfg
 		break;
 
 	case CCDC_PAD_SOURCE_VP:
-		*fmt = *__ccdc_get_format(ccdc, cfg, CCDC_PAD_SINK, which);
+		*fmt = *__ccdc_get_format(ccdc, sd_state, CCDC_PAD_SINK,
+					  which);
 
 		/* The video port interface truncates the data to 10 bits. */
 		info = omap3isp_video_format_info(fmt->code);
@@ -2118,7 +2126,7 @@ static void ccdc_try_crop(struct isp_ccdc_device *ccdc,
  * return -EINVAL or zero on success
  */
 static int ccdc_enum_mbus_code(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct isp_ccdc_device *ccdc = v4l2_get_subdevdata(sd);
@@ -2133,7 +2141,7 @@ static int ccdc_enum_mbus_code(struct v4l2_subdev *sd,
 		break;
 
 	case CCDC_PAD_SOURCE_OF:
-		format = __ccdc_get_format(ccdc, cfg, code->pad,
+		format = __ccdc_get_format(ccdc, sd_state, code->pad,
 					   code->which);
 
 		if (format->code == MEDIA_BUS_FMT_YUYV8_2X8 ||
@@ -2164,7 +2172,7 @@ static int ccdc_enum_mbus_code(struct v4l2_subdev *sd,
 		if (code->index != 0)
 			return -EINVAL;
 
-		format = __ccdc_get_format(ccdc, cfg, code->pad,
+		format = __ccdc_get_format(ccdc, sd_state, code->pad,
 					   code->which);
 
 		/* A pixel code equal to 0 means that the video port doesn't
@@ -2184,7 +2192,7 @@ static int ccdc_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ccdc_enum_frame_size(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct isp_ccdc_device *ccdc = v4l2_get_subdevdata(sd);
@@ -2196,7 +2204,7 @@ static int ccdc_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	ccdc_try_format(ccdc, cfg, fse->pad, &format, fse->which);
+	ccdc_try_format(ccdc, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -2206,7 +2214,7 @@ static int ccdc_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	ccdc_try_format(ccdc, cfg, fse->pad, &format, fse->which);
+	ccdc_try_format(ccdc, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -2224,7 +2232,8 @@ static int ccdc_enum_frame_size(struct v4l2_subdev *sd,
  *
  * Return 0 on success or a negative error code otherwise.
  */
-static int ccdc_get_selection(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int ccdc_get_selection(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_selection *sel)
 {
 	struct isp_ccdc_device *ccdc = v4l2_get_subdevdata(sd);
@@ -2240,12 +2249,13 @@ static int ccdc_get_selection(struct v4l2_subdev *sd, struct v4l2_subdev_pad_con
 		sel->r.width = INT_MAX;
 		sel->r.height = INT_MAX;
 
-		format = __ccdc_get_format(ccdc, cfg, CCDC_PAD_SINK, sel->which);
+		format = __ccdc_get_format(ccdc, sd_state, CCDC_PAD_SINK,
+					   sel->which);
 		ccdc_try_crop(ccdc, format, &sel->r);
 		break;
 
 	case V4L2_SEL_TGT_CROP:
-		sel->r = *__ccdc_get_crop(ccdc, cfg, sel->which);
+		sel->r = *__ccdc_get_crop(ccdc, sd_state, sel->which);
 		break;
 
 	default:
@@ -2266,7 +2276,8 @@ static int ccdc_get_selection(struct v4l2_subdev *sd, struct v4l2_subdev_pad_con
  *
  * Return 0 on success or a negative error code otherwise.
  */
-static int ccdc_set_selection(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int ccdc_set_selection(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_selection *sel)
 {
 	struct isp_ccdc_device *ccdc = v4l2_get_subdevdata(sd);
@@ -2285,17 +2296,19 @@ static int ccdc_set_selection(struct v4l2_subdev *sd, struct v4l2_subdev_pad_con
 	 * rectangle.
 	 */
 	if (sel->flags & V4L2_SEL_FLAG_KEEP_CONFIG) {
-		sel->r = *__ccdc_get_crop(ccdc, cfg, sel->which);
+		sel->r = *__ccdc_get_crop(ccdc, sd_state, sel->which);
 		return 0;
 	}
 
-	format = __ccdc_get_format(ccdc, cfg, CCDC_PAD_SINK, sel->which);
+	format = __ccdc_get_format(ccdc, sd_state, CCDC_PAD_SINK, sel->which);
 	ccdc_try_crop(ccdc, format, &sel->r);
-	*__ccdc_get_crop(ccdc, cfg, sel->which) = sel->r;
+	*__ccdc_get_crop(ccdc, sd_state, sel->which) = sel->r;
 
 	/* Update the source format. */
-	format = __ccdc_get_format(ccdc, cfg, CCDC_PAD_SOURCE_OF, sel->which);
-	ccdc_try_format(ccdc, cfg, CCDC_PAD_SOURCE_OF, format, sel->which);
+	format = __ccdc_get_format(ccdc, sd_state, CCDC_PAD_SOURCE_OF,
+				   sel->which);
+	ccdc_try_format(ccdc, sd_state, CCDC_PAD_SOURCE_OF, format,
+			sel->which);
 
 	return 0;
 }
@@ -2309,13 +2322,14 @@ static int ccdc_set_selection(struct v4l2_subdev *sd, struct v4l2_subdev_pad_con
  * Return 0 on success or -EINVAL if the pad is invalid or doesn't correspond
  * to the format type.
  */
-static int ccdc_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int ccdc_get_format(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct isp_ccdc_device *ccdc = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __ccdc_get_format(ccdc, cfg, fmt->pad, fmt->which);
+	format = __ccdc_get_format(ccdc, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
@@ -2332,24 +2346,25 @@ static int ccdc_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
  * Return 0 on success or -EINVAL if the pad is invalid or doesn't correspond
  * to the format type.
  */
-static int ccdc_set_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int ccdc_set_format(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct isp_ccdc_device *ccdc = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *crop;
 
-	format = __ccdc_get_format(ccdc, cfg, fmt->pad, fmt->which);
+	format = __ccdc_get_format(ccdc, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
-	ccdc_try_format(ccdc, cfg, fmt->pad, &fmt->format, fmt->which);
+	ccdc_try_format(ccdc, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == CCDC_PAD_SINK) {
 		/* Reset the crop rectangle. */
-		crop = __ccdc_get_crop(ccdc, cfg, fmt->which);
+		crop = __ccdc_get_crop(ccdc, sd_state, fmt->which);
 		crop->left = 0;
 		crop->top = 0;
 		crop->width = fmt->format.width;
@@ -2358,16 +2373,16 @@ static int ccdc_set_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
 		ccdc_try_crop(ccdc, &fmt->format, crop);
 
 		/* Update the source formats. */
-		format = __ccdc_get_format(ccdc, cfg, CCDC_PAD_SOURCE_OF,
+		format = __ccdc_get_format(ccdc, sd_state, CCDC_PAD_SOURCE_OF,
 					   fmt->which);
 		*format = fmt->format;
-		ccdc_try_format(ccdc, cfg, CCDC_PAD_SOURCE_OF, format,
+		ccdc_try_format(ccdc, sd_state, CCDC_PAD_SOURCE_OF, format,
 				fmt->which);
 
-		format = __ccdc_get_format(ccdc, cfg, CCDC_PAD_SOURCE_VP,
+		format = __ccdc_get_format(ccdc, sd_state, CCDC_PAD_SOURCE_VP,
 					   fmt->which);
 		*format = fmt->format;
-		ccdc_try_format(ccdc, cfg, CCDC_PAD_SOURCE_VP, format,
+		ccdc_try_format(ccdc, sd_state, CCDC_PAD_SOURCE_VP, format,
 				fmt->which);
 	}
 
@@ -2454,7 +2469,7 @@ static int ccdc_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	format.format.width = 4096;
 	format.format.height = 4096;
-	ccdc_set_format(sd, fh ? fh->pad : NULL, &format);
+	ccdc_set_format(sd, fh ? fh->state : NULL, &format);
 
 	return 0;
 }
diff --git a/drivers/media/platform/omap3isp/ispccp2.c b/drivers/media/platform/omap3isp/ispccp2.c
index d0a49cdfd22d2..acb58b6ddba18 100644
--- a/drivers/media/platform/omap3isp/ispccp2.c
+++ b/drivers/media/platform/omap3isp/ispccp2.c
@@ -618,11 +618,13 @@ static const unsigned int ccp2_fmts[] = {
  * return format structure or NULL on error
  */
 static struct v4l2_mbus_framefmt *
-__ccp2_get_format(struct isp_ccp2_device *ccp2, struct v4l2_subdev_pad_config *cfg,
-		     unsigned int pad, enum v4l2_subdev_format_whence which)
+__ccp2_get_format(struct isp_ccp2_device *ccp2,
+		  struct v4l2_subdev_state *sd_state,
+		  unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&ccp2->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&ccp2->subdev, sd_state,
+						  pad);
 	else
 		return &ccp2->formats[pad];
 }
@@ -636,7 +638,8 @@ __ccp2_get_format(struct isp_ccp2_device *ccp2, struct v4l2_subdev_pad_config *c
  * @which : wanted subdev format
  */
 static void ccp2_try_format(struct isp_ccp2_device *ccp2,
-			       struct v4l2_subdev_pad_config *cfg, unsigned int pad,
+			       struct v4l2_subdev_state *sd_state,
+			       unsigned int pad,
 			       struct v4l2_mbus_framefmt *fmt,
 			       enum v4l2_subdev_format_whence which)
 {
@@ -670,7 +673,8 @@ static void ccp2_try_format(struct isp_ccp2_device *ccp2,
 		 * When CCP2 write to memory feature will be added this
 		 * should be changed properly.
 		 */
-		format = __ccp2_get_format(ccp2, cfg, CCP2_PAD_SINK, which);
+		format = __ccp2_get_format(ccp2, sd_state, CCP2_PAD_SINK,
+					   which);
 		memcpy(fmt, format, sizeof(*fmt));
 		fmt->code = MEDIA_BUS_FMT_SGRBG10_1X10;
 		break;
@@ -688,7 +692,7 @@ static void ccp2_try_format(struct isp_ccp2_device *ccp2,
  * return -EINVAL or zero on success
  */
 static int ccp2_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct isp_ccp2_device *ccp2 = v4l2_get_subdevdata(sd);
@@ -703,8 +707,8 @@ static int ccp2_enum_mbus_code(struct v4l2_subdev *sd,
 		if (code->index != 0)
 			return -EINVAL;
 
-		format = __ccp2_get_format(ccp2, cfg, CCP2_PAD_SINK,
-					      code->which);
+		format = __ccp2_get_format(ccp2, sd_state, CCP2_PAD_SINK,
+					   code->which);
 		code->code = format->code;
 	}
 
@@ -712,7 +716,7 @@ static int ccp2_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ccp2_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct isp_ccp2_device *ccp2 = v4l2_get_subdevdata(sd);
@@ -724,7 +728,7 @@ static int ccp2_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	ccp2_try_format(ccp2, cfg, fse->pad, &format, fse->which);
+	ccp2_try_format(ccp2, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -734,7 +738,7 @@ static int ccp2_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	ccp2_try_format(ccp2, cfg, fse->pad, &format, fse->which);
+	ccp2_try_format(ccp2, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -748,13 +752,14 @@ static int ccp2_enum_frame_size(struct v4l2_subdev *sd,
  * @fmt   : pointer to v4l2 subdev format structure
  * return -EINVAL or zero on success
  */
-static int ccp2_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
-			      struct v4l2_subdev_format *fmt)
+static int ccp2_get_format(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
+			   struct v4l2_subdev_format *fmt)
 {
 	struct isp_ccp2_device *ccp2 = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __ccp2_get_format(ccp2, cfg, fmt->pad, fmt->which);
+	format = __ccp2_get_format(ccp2, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
@@ -769,25 +774,27 @@ static int ccp2_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
  * @fmt   : pointer to v4l2 subdev format structure
  * returns zero
  */
-static int ccp2_set_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
-			      struct v4l2_subdev_format *fmt)
+static int ccp2_set_format(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
+			   struct v4l2_subdev_format *fmt)
 {
 	struct isp_ccp2_device *ccp2 = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __ccp2_get_format(ccp2, cfg, fmt->pad, fmt->which);
+	format = __ccp2_get_format(ccp2, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
-	ccp2_try_format(ccp2, cfg, fmt->pad, &fmt->format, fmt->which);
+	ccp2_try_format(ccp2, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == CCP2_PAD_SINK) {
-		format = __ccp2_get_format(ccp2, cfg, CCP2_PAD_SOURCE,
+		format = __ccp2_get_format(ccp2, sd_state, CCP2_PAD_SOURCE,
 					   fmt->which);
 		*format = fmt->format;
-		ccp2_try_format(ccp2, cfg, CCP2_PAD_SOURCE, format, fmt->which);
+		ccp2_try_format(ccp2, sd_state, CCP2_PAD_SOURCE, format,
+				fmt->which);
 	}
 
 	return 0;
@@ -812,7 +819,7 @@ static int ccp2_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	format.format.width = 4096;
 	format.format.height = 4096;
-	ccp2_set_format(sd, fh ? fh->pad : NULL, &format);
+	ccp2_set_format(sd, fh ? fh->state : NULL, &format);
 
 	return 0;
 }
diff --git a/drivers/media/platform/omap3isp/ispcsi2.c b/drivers/media/platform/omap3isp/ispcsi2.c
index fd493c5e4e24f..6302e0c94034f 100644
--- a/drivers/media/platform/omap3isp/ispcsi2.c
+++ b/drivers/media/platform/omap3isp/ispcsi2.c
@@ -827,17 +827,20 @@ static const struct isp_video_operations csi2_ispvideo_ops = {
  */
 
 static struct v4l2_mbus_framefmt *
-__csi2_get_format(struct isp_csi2_device *csi2, struct v4l2_subdev_pad_config *cfg,
+__csi2_get_format(struct isp_csi2_device *csi2,
+		  struct v4l2_subdev_state *sd_state,
 		  unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&csi2->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&csi2->subdev, sd_state,
+						  pad);
 	else
 		return &csi2->formats[pad];
 }
 
 static void
-csi2_try_format(struct isp_csi2_device *csi2, struct v4l2_subdev_pad_config *cfg,
+csi2_try_format(struct isp_csi2_device *csi2,
+		struct v4l2_subdev_state *sd_state,
 		unsigned int pad, struct v4l2_mbus_framefmt *fmt,
 		enum v4l2_subdev_format_whence which)
 {
@@ -867,7 +870,8 @@ csi2_try_format(struct isp_csi2_device *csi2, struct v4l2_subdev_pad_config *cfg
 		 * compression.
 		 */
 		pixelcode = fmt->code;
-		format = __csi2_get_format(csi2, cfg, CSI2_PAD_SINK, which);
+		format = __csi2_get_format(csi2, sd_state, CSI2_PAD_SINK,
+					   which);
 		memcpy(fmt, format, sizeof(*fmt));
 
 		/*
@@ -893,7 +897,7 @@ csi2_try_format(struct isp_csi2_device *csi2, struct v4l2_subdev_pad_config *cfg
  * return -EINVAL or zero on success
  */
 static int csi2_enum_mbus_code(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct isp_csi2_device *csi2 = v4l2_get_subdevdata(sd);
@@ -906,7 +910,7 @@ static int csi2_enum_mbus_code(struct v4l2_subdev *sd,
 
 		code->code = csi2_input_fmts[code->index];
 	} else {
-		format = __csi2_get_format(csi2, cfg, CSI2_PAD_SINK,
+		format = __csi2_get_format(csi2, sd_state, CSI2_PAD_SINK,
 					   code->which);
 		switch (code->index) {
 		case 0:
@@ -930,7 +934,7 @@ static int csi2_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int csi2_enum_frame_size(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct isp_csi2_device *csi2 = v4l2_get_subdevdata(sd);
@@ -942,7 +946,7 @@ static int csi2_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	csi2_try_format(csi2, cfg, fse->pad, &format, fse->which);
+	csi2_try_format(csi2, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -952,7 +956,7 @@ static int csi2_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	csi2_try_format(csi2, cfg, fse->pad, &format, fse->which);
+	csi2_try_format(csi2, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -966,13 +970,14 @@ static int csi2_enum_frame_size(struct v4l2_subdev *sd,
  * @fmt: pointer to v4l2 subdev format structure
  * return -EINVAL or zero on success
  */
-static int csi2_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int csi2_get_format(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct isp_csi2_device *csi2 = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __csi2_get_format(csi2, cfg, fmt->pad, fmt->which);
+	format = __csi2_get_format(csi2, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
@@ -987,25 +992,27 @@ static int csi2_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config
  * @fmt: pointer to v4l2 subdev format structure
  * return -EINVAL or zero on success
  */
-static int csi2_set_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int csi2_set_format(struct v4l2_subdev *sd,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct isp_csi2_device *csi2 = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __csi2_get_format(csi2, cfg, fmt->pad, fmt->which);
+	format = __csi2_get_format(csi2, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
-	csi2_try_format(csi2, cfg, fmt->pad, &fmt->format, fmt->which);
+	csi2_try_format(csi2, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == CSI2_PAD_SINK) {
-		format = __csi2_get_format(csi2, cfg, CSI2_PAD_SOURCE,
+		format = __csi2_get_format(csi2, sd_state, CSI2_PAD_SOURCE,
 					   fmt->which);
 		*format = fmt->format;
-		csi2_try_format(csi2, cfg, CSI2_PAD_SOURCE, format, fmt->which);
+		csi2_try_format(csi2, sd_state, CSI2_PAD_SOURCE, format,
+				fmt->which);
 	}
 
 	return 0;
@@ -1030,7 +1037,7 @@ static int csi2_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	format.format.width = 4096;
 	format.format.height = 4096;
-	csi2_set_format(sd, fh ? fh->pad : NULL, &format);
+	csi2_set_format(sd, fh ? fh->state : NULL, &format);
 
 	return 0;
 }
diff --git a/drivers/media/platform/omap3isp/isppreview.c b/drivers/media/platform/omap3isp/isppreview.c
index 607b7685c982f..53aedec7990da 100644
--- a/drivers/media/platform/omap3isp/isppreview.c
+++ b/drivers/media/platform/omap3isp/isppreview.c
@@ -1679,21 +1679,25 @@ static int preview_set_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static struct v4l2_mbus_framefmt *
-__preview_get_format(struct isp_prev_device *prev, struct v4l2_subdev_pad_config *cfg,
+__preview_get_format(struct isp_prev_device *prev,
+		     struct v4l2_subdev_state *sd_state,
 		     unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&prev->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&prev->subdev, sd_state,
+						  pad);
 	else
 		return &prev->formats[pad];
 }
 
 static struct v4l2_rect *
-__preview_get_crop(struct isp_prev_device *prev, struct v4l2_subdev_pad_config *cfg,
+__preview_get_crop(struct isp_prev_device *prev,
+		   struct v4l2_subdev_state *sd_state,
 		   enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_crop(&prev->subdev, cfg, PREV_PAD_SINK);
+		return v4l2_subdev_get_try_crop(&prev->subdev, sd_state,
+						PREV_PAD_SINK);
 	else
 		return &prev->crop;
 }
@@ -1729,7 +1733,8 @@ static const unsigned int preview_output_fmts[] = {
  * engine limits and the format and crop rectangles on other pads.
  */
 static void preview_try_format(struct isp_prev_device *prev,
-			       struct v4l2_subdev_pad_config *cfg, unsigned int pad,
+			       struct v4l2_subdev_state *sd_state,
+			       unsigned int pad,
 			       struct v4l2_mbus_framefmt *fmt,
 			       enum v4l2_subdev_format_whence which)
 {
@@ -1770,7 +1775,8 @@ static void preview_try_format(struct isp_prev_device *prev,
 
 	case PREV_PAD_SOURCE:
 		pixelcode = fmt->code;
-		*fmt = *__preview_get_format(prev, cfg, PREV_PAD_SINK, which);
+		*fmt = *__preview_get_format(prev, sd_state, PREV_PAD_SINK,
+					     which);
 
 		switch (pixelcode) {
 		case MEDIA_BUS_FMT_YUYV8_1X16:
@@ -1788,7 +1794,7 @@ static void preview_try_format(struct isp_prev_device *prev,
 		 * is not supported yet, hardcode the output size to the crop
 		 * rectangle size.
 		 */
-		crop = __preview_get_crop(prev, cfg, which);
+		crop = __preview_get_crop(prev, sd_state, which);
 		fmt->width = crop->width;
 		fmt->height = crop->height;
 
@@ -1862,7 +1868,7 @@ static void preview_try_crop(struct isp_prev_device *prev,
  * return -EINVAL or zero on success
  */
 static int preview_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	switch (code->pad) {
@@ -1886,7 +1892,7 @@ static int preview_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int preview_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct isp_prev_device *prev = v4l2_get_subdevdata(sd);
@@ -1898,7 +1904,7 @@ static int preview_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	preview_try_format(prev, cfg, fse->pad, &format, fse->which);
+	preview_try_format(prev, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -1908,7 +1914,7 @@ static int preview_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	preview_try_format(prev, cfg, fse->pad, &format, fse->which);
+	preview_try_format(prev, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -1926,7 +1932,7 @@ static int preview_enum_frame_size(struct v4l2_subdev *sd,
  * Return 0 on success or a negative error code otherwise.
  */
 static int preview_get_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct isp_prev_device *prev = v4l2_get_subdevdata(sd);
@@ -1942,13 +1948,13 @@ static int preview_get_selection(struct v4l2_subdev *sd,
 		sel->r.width = INT_MAX;
 		sel->r.height = INT_MAX;
 
-		format = __preview_get_format(prev, cfg, PREV_PAD_SINK,
+		format = __preview_get_format(prev, sd_state, PREV_PAD_SINK,
 					      sel->which);
 		preview_try_crop(prev, format, &sel->r);
 		break;
 
 	case V4L2_SEL_TGT_CROP:
-		sel->r = *__preview_get_crop(prev, cfg, sel->which);
+		sel->r = *__preview_get_crop(prev, sd_state, sel->which);
 		break;
 
 	default:
@@ -1969,7 +1975,7 @@ static int preview_get_selection(struct v4l2_subdev *sd,
  * Return 0 on success or a negative error code otherwise.
  */
 static int preview_set_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct isp_prev_device *prev = v4l2_get_subdevdata(sd);
@@ -1988,17 +1994,20 @@ static int preview_set_selection(struct v4l2_subdev *sd,
 	 * rectangle.
 	 */
 	if (sel->flags & V4L2_SEL_FLAG_KEEP_CONFIG) {
-		sel->r = *__preview_get_crop(prev, cfg, sel->which);
+		sel->r = *__preview_get_crop(prev, sd_state, sel->which);
 		return 0;
 	}
 
-	format = __preview_get_format(prev, cfg, PREV_PAD_SINK, sel->which);
+	format = __preview_get_format(prev, sd_state, PREV_PAD_SINK,
+				      sel->which);
 	preview_try_crop(prev, format, &sel->r);
-	*__preview_get_crop(prev, cfg, sel->which) = sel->r;
+	*__preview_get_crop(prev, sd_state, sel->which) = sel->r;
 
 	/* Update the source format. */
-	format = __preview_get_format(prev, cfg, PREV_PAD_SOURCE, sel->which);
-	preview_try_format(prev, cfg, PREV_PAD_SOURCE, format, sel->which);
+	format = __preview_get_format(prev, sd_state, PREV_PAD_SOURCE,
+				      sel->which);
+	preview_try_format(prev, sd_state, PREV_PAD_SOURCE, format,
+			   sel->which);
 
 	return 0;
 }
@@ -2010,13 +2019,14 @@ static int preview_set_selection(struct v4l2_subdev *sd,
  * @fmt: pointer to v4l2 subdev format structure
  * return -EINVAL or zero on success
  */
-static int preview_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int preview_get_format(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct isp_prev_device *prev = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __preview_get_format(prev, cfg, fmt->pad, fmt->which);
+	format = __preview_get_format(prev, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
@@ -2031,24 +2041,25 @@ static int preview_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_con
  * @fmt: pointer to v4l2 subdev format structure
  * return -EINVAL or zero on success
  */
-static int preview_set_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int preview_set_format(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct isp_prev_device *prev = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *crop;
 
-	format = __preview_get_format(prev, cfg, fmt->pad, fmt->which);
+	format = __preview_get_format(prev, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
-	preview_try_format(prev, cfg, fmt->pad, &fmt->format, fmt->which);
+	preview_try_format(prev, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == PREV_PAD_SINK) {
 		/* Reset the crop rectangle. */
-		crop = __preview_get_crop(prev, cfg, fmt->which);
+		crop = __preview_get_crop(prev, sd_state, fmt->which);
 		crop->left = 0;
 		crop->top = 0;
 		crop->width = fmt->format.width;
@@ -2057,9 +2068,9 @@ static int preview_set_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_con
 		preview_try_crop(prev, &fmt->format, crop);
 
 		/* Update the source format. */
-		format = __preview_get_format(prev, cfg, PREV_PAD_SOURCE,
+		format = __preview_get_format(prev, sd_state, PREV_PAD_SOURCE,
 					      fmt->which);
-		preview_try_format(prev, cfg, PREV_PAD_SOURCE, format,
+		preview_try_format(prev, sd_state, PREV_PAD_SOURCE, format,
 				   fmt->which);
 	}
 
@@ -2086,7 +2097,7 @@ static int preview_init_formats(struct v4l2_subdev *sd,
 	format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	format.format.width = 4096;
 	format.format.height = 4096;
-	preview_set_format(sd, fh ? fh->pad : NULL, &format);
+	preview_set_format(sd, fh ? fh->state : NULL, &format);
 
 	return 0;
 }
diff --git a/drivers/media/platform/omap3isp/ispresizer.c b/drivers/media/platform/omap3isp/ispresizer.c
index 78d9dd7ea2da7..ed2fb0c7a57e5 100644
--- a/drivers/media/platform/omap3isp/ispresizer.c
+++ b/drivers/media/platform/omap3isp/ispresizer.c
@@ -114,11 +114,12 @@ static const struct isprsz_coef filter_coefs = {
  * return zero
  */
 static struct v4l2_mbus_framefmt *
-__resizer_get_format(struct isp_res_device *res, struct v4l2_subdev_pad_config *cfg,
+__resizer_get_format(struct isp_res_device *res,
+		     struct v4l2_subdev_state *sd_state,
 		     unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&res->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&res->subdev, sd_state, pad);
 	else
 		return &res->formats[pad];
 }
@@ -130,11 +131,13 @@ __resizer_get_format(struct isp_res_device *res, struct v4l2_subdev_pad_config *
  * @which : wanted subdev crop rectangle
  */
 static struct v4l2_rect *
-__resizer_get_crop(struct isp_res_device *res, struct v4l2_subdev_pad_config *cfg,
+__resizer_get_crop(struct isp_res_device *res,
+		   struct v4l2_subdev_state *sd_state,
 		   enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_crop(&res->subdev, cfg, RESZ_PAD_SINK);
+		return v4l2_subdev_get_try_crop(&res->subdev, sd_state,
+						RESZ_PAD_SINK);
 	else
 		return &res->crop.request;
 }
@@ -1220,7 +1223,7 @@ static void resizer_try_crop(const struct v4l2_mbus_framefmt *sink,
  * Return 0 on success or a negative error code otherwise.
  */
 static int resizer_get_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct isp_res_device *res = v4l2_get_subdevdata(sd);
@@ -1231,9 +1234,9 @@ static int resizer_get_selection(struct v4l2_subdev *sd,
 	if (sel->pad != RESZ_PAD_SINK)
 		return -EINVAL;
 
-	format_sink = __resizer_get_format(res, cfg, RESZ_PAD_SINK,
+	format_sink = __resizer_get_format(res, sd_state, RESZ_PAD_SINK,
 					   sel->which);
-	format_source = __resizer_get_format(res, cfg, RESZ_PAD_SOURCE,
+	format_source = __resizer_get_format(res, sd_state, RESZ_PAD_SOURCE,
 					     sel->which);
 
 	switch (sel->target) {
@@ -1248,7 +1251,7 @@ static int resizer_get_selection(struct v4l2_subdev *sd,
 		break;
 
 	case V4L2_SEL_TGT_CROP:
-		sel->r = *__resizer_get_crop(res, cfg, sel->which);
+		sel->r = *__resizer_get_crop(res, sd_state, sel->which);
 		resizer_calc_ratios(res, &sel->r, format_source, &ratio);
 		break;
 
@@ -1273,7 +1276,7 @@ static int resizer_get_selection(struct v4l2_subdev *sd,
  * Return 0 on success or a negative error code otherwise.
  */
 static int resizer_set_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_selection *sel)
 {
 	struct isp_res_device *res = v4l2_get_subdevdata(sd);
@@ -1287,9 +1290,9 @@ static int resizer_set_selection(struct v4l2_subdev *sd,
 	    sel->pad != RESZ_PAD_SINK)
 		return -EINVAL;
 
-	format_sink = __resizer_get_format(res, cfg, RESZ_PAD_SINK,
+	format_sink = __resizer_get_format(res, sd_state, RESZ_PAD_SINK,
 					   sel->which);
-	format_source = *__resizer_get_format(res, cfg, RESZ_PAD_SOURCE,
+	format_source = *__resizer_get_format(res, sd_state, RESZ_PAD_SOURCE,
 					      sel->which);
 
 	dev_dbg(isp->dev, "%s(%s): req %ux%u -> (%d,%d)/%ux%u -> %ux%u\n",
@@ -1307,7 +1310,7 @@ static int resizer_set_selection(struct v4l2_subdev *sd,
 	 * stored the mangled rectangle.
 	 */
 	resizer_try_crop(format_sink, &format_source, &sel->r);
-	*__resizer_get_crop(res, cfg, sel->which) = sel->r;
+	*__resizer_get_crop(res, sd_state, sel->which) = sel->r;
 	resizer_calc_ratios(res, &sel->r, &format_source, &ratio);
 
 	dev_dbg(isp->dev, "%s(%s): got %ux%u -> (%d,%d)/%ux%u -> %ux%u\n",
@@ -1317,7 +1320,8 @@ static int resizer_set_selection(struct v4l2_subdev *sd,
 		format_source.width, format_source.height);
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*__resizer_get_format(res, cfg, RESZ_PAD_SOURCE, sel->which) =
+		*__resizer_get_format(res, sd_state, RESZ_PAD_SOURCE,
+				      sel->which) =
 			format_source;
 		return 0;
 	}
@@ -1328,7 +1332,7 @@ static int resizer_set_selection(struct v4l2_subdev *sd,
 	 */
 	spin_lock_irqsave(&res->lock, flags);
 
-	*__resizer_get_format(res, cfg, RESZ_PAD_SOURCE, sel->which) =
+	*__resizer_get_format(res, sd_state, RESZ_PAD_SOURCE, sel->which) =
 		format_source;
 
 	res->ratio = ratio;
@@ -1371,7 +1375,8 @@ static unsigned int resizer_max_in_width(struct isp_res_device *res)
  * @which : wanted subdev format
  */
 static void resizer_try_format(struct isp_res_device *res,
-			       struct v4l2_subdev_pad_config *cfg, unsigned int pad,
+			       struct v4l2_subdev_state *sd_state,
+			       unsigned int pad,
 			       struct v4l2_mbus_framefmt *fmt,
 			       enum v4l2_subdev_format_whence which)
 {
@@ -1392,10 +1397,11 @@ static void resizer_try_format(struct isp_res_device *res,
 		break;
 
 	case RESZ_PAD_SOURCE:
-		format = __resizer_get_format(res, cfg, RESZ_PAD_SINK, which);
+		format = __resizer_get_format(res, sd_state, RESZ_PAD_SINK,
+					      which);
 		fmt->code = format->code;
 
-		crop = *__resizer_get_crop(res, cfg, which);
+		crop = *__resizer_get_crop(res, sd_state, which);
 		resizer_calc_ratios(res, &crop, fmt, &ratio);
 		break;
 	}
@@ -1412,7 +1418,7 @@ static void resizer_try_format(struct isp_res_device *res,
  * return -EINVAL or zero on success
  */
 static int resizer_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct isp_res_device *res = v4l2_get_subdevdata(sd);
@@ -1427,7 +1433,7 @@ static int resizer_enum_mbus_code(struct v4l2_subdev *sd,
 		if (code->index != 0)
 			return -EINVAL;
 
-		format = __resizer_get_format(res, cfg, RESZ_PAD_SINK,
+		format = __resizer_get_format(res, sd_state, RESZ_PAD_SINK,
 					      code->which);
 		code->code = format->code;
 	}
@@ -1436,7 +1442,7 @@ static int resizer_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int resizer_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct isp_res_device *res = v4l2_get_subdevdata(sd);
@@ -1448,7 +1454,7 @@ static int resizer_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	resizer_try_format(res, cfg, fse->pad, &format, fse->which);
+	resizer_try_format(res, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -1458,7 +1464,7 @@ static int resizer_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	resizer_try_format(res, cfg, fse->pad, &format, fse->which);
+	resizer_try_format(res, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -1472,13 +1478,14 @@ static int resizer_enum_frame_size(struct v4l2_subdev *sd,
  * @fmt   : pointer to v4l2 subdev format structure
  * return -EINVAL or zero on success
  */
-static int resizer_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int resizer_get_format(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct isp_res_device *res = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __resizer_get_format(res, cfg, fmt->pad, fmt->which);
+	format = __resizer_get_format(res, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
@@ -1493,33 +1500,34 @@ static int resizer_get_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_con
  * @fmt   : pointer to v4l2 subdev format structure
  * return -EINVAL or zero on success
  */
-static int resizer_set_format(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg,
+static int resizer_set_format(struct v4l2_subdev *sd,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct isp_res_device *res = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *crop;
 
-	format = __resizer_get_format(res, cfg, fmt->pad, fmt->which);
+	format = __resizer_get_format(res, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
-	resizer_try_format(res, cfg, fmt->pad, &fmt->format, fmt->which);
+	resizer_try_format(res, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	if (fmt->pad == RESZ_PAD_SINK) {
 		/* reset crop rectangle */
-		crop = __resizer_get_crop(res, cfg, fmt->which);
+		crop = __resizer_get_crop(res, sd_state, fmt->which);
 		crop->left = 0;
 		crop->top = 0;
 		crop->width = fmt->format.width;
 		crop->height = fmt->format.height;
 
 		/* Propagate the format from sink to source */
-		format = __resizer_get_format(res, cfg, RESZ_PAD_SOURCE,
+		format = __resizer_get_format(res, sd_state, RESZ_PAD_SOURCE,
 					      fmt->which);
 		*format = fmt->format;
-		resizer_try_format(res, cfg, RESZ_PAD_SOURCE, format,
+		resizer_try_format(res, sd_state, RESZ_PAD_SOURCE, format,
 				   fmt->which);
 	}
 
@@ -1570,7 +1578,7 @@ static int resizer_init_formats(struct v4l2_subdev *sd,
 	format.format.code = MEDIA_BUS_FMT_YUYV8_1X16;
 	format.format.width = 4096;
 	format.format.height = 4096;
-	resizer_set_format(sd, fh ? fh->pad : NULL, &format);
+	resizer_set_format(sd, fh ? fh->state : NULL, &format);
 
 	return 0;
 }
diff --git a/drivers/media/platform/pxa_camera.c b/drivers/media/platform/pxa_camera.c
index dd510ee9b58aa..ec4c010644cae 100644
--- a/drivers/media/platform/pxa_camera.c
+++ b/drivers/media/platform/pxa_camera.c
@@ -1792,6 +1792,9 @@ static int pxac_vidioc_try_fmt_vid_cap(struct file *filp, void *priv,
 	const struct pxa_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_subdev_pad_config pad_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	struct v4l2_subdev_format format = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
@@ -1816,7 +1819,7 @@ static int pxac_vidioc_try_fmt_vid_cap(struct file *filp, void *priv,
 			      pixfmt == V4L2_PIX_FMT_YUV422P ? 4 : 0);
 
 	v4l2_fill_mbus_format(mf, pix, xlate->code);
-	ret = sensor_call(pcdev, pad, set_fmt, &pad_cfg, &format);
+	ret = sensor_call(pcdev, pad, set_fmt, &pad_state, &format);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/media/platform/qcom/camss/camss-csid.c b/drivers/media/platform/qcom/camss/camss-csid.c
index 251f4c4afe196..a1637b78568b2 100644
--- a/drivers/media/platform/qcom/camss/camss-csid.c
+++ b/drivers/media/platform/qcom/camss/camss-csid.c
@@ -245,12 +245,13 @@ static int csid_set_stream(struct v4l2_subdev *sd, int enable)
  */
 static struct v4l2_mbus_framefmt *
 __csid_get_format(struct csid_device *csid,
-		  struct v4l2_subdev_pad_config *cfg,
+		  struct v4l2_subdev_state *sd_state,
 		  unsigned int pad,
 		  enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&csid->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&csid->subdev, sd_state,
+						  pad);
 
 	return &csid->fmt[pad];
 }
@@ -264,7 +265,7 @@ __csid_get_format(struct csid_device *csid,
  * @which: wanted subdev format
  */
 static void csid_try_format(struct csid_device *csid,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    unsigned int pad,
 			    struct v4l2_mbus_framefmt *fmt,
 			    enum v4l2_subdev_format_whence which)
@@ -297,7 +298,7 @@ static void csid_try_format(struct csid_device *csid,
 			/* keep pad formats in sync */
 			u32 code = fmt->code;
 
-			*fmt = *__csid_get_format(csid, cfg,
+			*fmt = *__csid_get_format(csid, sd_state,
 						      MSM_CSID_PAD_SINK, which);
 			fmt->code = csid->ops->src_pad_code(csid, fmt->code, 0, code);
 		} else {
@@ -331,7 +332,7 @@ static void csid_try_format(struct csid_device *csid,
  * return -EINVAL or zero on success
  */
 static int csid_enum_mbus_code(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct csid_device *csid = v4l2_get_subdevdata(sd);
@@ -345,7 +346,7 @@ static int csid_enum_mbus_code(struct v4l2_subdev *sd,
 		if (csid->testgen_mode->cur.val == 0) {
 			struct v4l2_mbus_framefmt *sink_fmt;
 
-			sink_fmt = __csid_get_format(csid, cfg,
+			sink_fmt = __csid_get_format(csid, sd_state,
 						     MSM_CSID_PAD_SINK,
 						     code->which);
 
@@ -372,7 +373,7 @@ static int csid_enum_mbus_code(struct v4l2_subdev *sd,
  * return -EINVAL or zero on success
  */
 static int csid_enum_frame_size(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct csid_device *csid = v4l2_get_subdevdata(sd);
@@ -384,7 +385,7 @@ static int csid_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	csid_try_format(csid, cfg, fse->pad, &format, fse->which);
+	csid_try_format(csid, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -394,7 +395,7 @@ static int csid_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	csid_try_format(csid, cfg, fse->pad, &format, fse->which);
+	csid_try_format(csid, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -410,13 +411,13 @@ static int csid_enum_frame_size(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int csid_get_format(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct csid_device *csid = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __csid_get_format(csid, cfg, fmt->pad, fmt->which);
+	format = __csid_get_format(csid, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
@@ -434,26 +435,26 @@ static int csid_get_format(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int csid_set_format(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct csid_device *csid = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __csid_get_format(csid, cfg, fmt->pad, fmt->which);
+	format = __csid_get_format(csid, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
-	csid_try_format(csid, cfg, fmt->pad, &fmt->format, fmt->which);
+	csid_try_format(csid, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == MSM_CSID_PAD_SINK) {
-		format = __csid_get_format(csid, cfg, MSM_CSID_PAD_SRC,
+		format = __csid_get_format(csid, sd_state, MSM_CSID_PAD_SRC,
 					   fmt->which);
 
 		*format = fmt->format;
-		csid_try_format(csid, cfg, MSM_CSID_PAD_SRC, format,
+		csid_try_format(csid, sd_state, MSM_CSID_PAD_SRC, format,
 				fmt->which);
 	}
 
@@ -482,7 +483,7 @@ static int csid_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 		}
 	};
 
-	return csid_set_format(sd, fh ? fh->pad : NULL, &format);
+	return csid_set_format(sd, fh ? fh->state : NULL, &format);
 }
 
 /*
diff --git a/drivers/media/platform/qcom/camss/camss-csiphy.c b/drivers/media/platform/qcom/camss/camss-csiphy.c
index 35470cbaea863..24eec16197e76 100644
--- a/drivers/media/platform/qcom/camss/camss-csiphy.c
+++ b/drivers/media/platform/qcom/camss/camss-csiphy.c
@@ -338,12 +338,13 @@ static int csiphy_set_stream(struct v4l2_subdev *sd, int enable)
  */
 static struct v4l2_mbus_framefmt *
 __csiphy_get_format(struct csiphy_device *csiphy,
-		    struct v4l2_subdev_pad_config *cfg,
+		    struct v4l2_subdev_state *sd_state,
 		    unsigned int pad,
 		    enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&csiphy->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&csiphy->subdev, sd_state,
+						  pad);
 
 	return &csiphy->fmt[pad];
 }
@@ -357,7 +358,7 @@ __csiphy_get_format(struct csiphy_device *csiphy,
  * @which: wanted subdev format
  */
 static void csiphy_try_format(struct csiphy_device *csiphy,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      unsigned int pad,
 			      struct v4l2_mbus_framefmt *fmt,
 			      enum v4l2_subdev_format_whence which)
@@ -387,7 +388,8 @@ static void csiphy_try_format(struct csiphy_device *csiphy,
 	case MSM_CSIPHY_PAD_SRC:
 		/* Set and return a format same as sink pad */
 
-		*fmt = *__csiphy_get_format(csiphy, cfg, MSM_CSID_PAD_SINK,
+		*fmt = *__csiphy_get_format(csiphy, sd_state,
+					    MSM_CSID_PAD_SINK,
 					    which);
 
 		break;
@@ -402,7 +404,7 @@ static void csiphy_try_format(struct csiphy_device *csiphy,
  * return -EINVAL or zero on success
  */
 static int csiphy_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct csiphy_device *csiphy = v4l2_get_subdevdata(sd);
@@ -417,7 +419,8 @@ static int csiphy_enum_mbus_code(struct v4l2_subdev *sd,
 		if (code->index > 0)
 			return -EINVAL;
 
-		format = __csiphy_get_format(csiphy, cfg, MSM_CSIPHY_PAD_SINK,
+		format = __csiphy_get_format(csiphy, sd_state,
+					     MSM_CSIPHY_PAD_SINK,
 					     code->which);
 
 		code->code = format->code;
@@ -434,7 +437,7 @@ static int csiphy_enum_mbus_code(struct v4l2_subdev *sd,
  * return -EINVAL or zero on success
  */
 static int csiphy_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct csiphy_device *csiphy = v4l2_get_subdevdata(sd);
@@ -446,7 +449,7 @@ static int csiphy_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	csiphy_try_format(csiphy, cfg, fse->pad, &format, fse->which);
+	csiphy_try_format(csiphy, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -456,7 +459,7 @@ static int csiphy_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	csiphy_try_format(csiphy, cfg, fse->pad, &format, fse->which);
+	csiphy_try_format(csiphy, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -472,13 +475,13 @@ static int csiphy_enum_frame_size(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int csiphy_get_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct csiphy_device *csiphy = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __csiphy_get_format(csiphy, cfg, fmt->pad, fmt->which);
+	format = __csiphy_get_format(csiphy, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
@@ -496,26 +499,29 @@ static int csiphy_get_format(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int csiphy_set_format(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *fmt)
 {
 	struct csiphy_device *csiphy = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __csiphy_get_format(csiphy, cfg, fmt->pad, fmt->which);
+	format = __csiphy_get_format(csiphy, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
-	csiphy_try_format(csiphy, cfg, fmt->pad, &fmt->format, fmt->which);
+	csiphy_try_format(csiphy, sd_state, fmt->pad, &fmt->format,
+			  fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == MSM_CSIPHY_PAD_SINK) {
-		format = __csiphy_get_format(csiphy, cfg, MSM_CSIPHY_PAD_SRC,
+		format = __csiphy_get_format(csiphy, sd_state,
+					     MSM_CSIPHY_PAD_SRC,
 					     fmt->which);
 
 		*format = fmt->format;
-		csiphy_try_format(csiphy, cfg, MSM_CSIPHY_PAD_SRC, format,
+		csiphy_try_format(csiphy, sd_state, MSM_CSIPHY_PAD_SRC,
+				  format,
 				  fmt->which);
 	}
 
@@ -545,7 +551,7 @@ static int csiphy_init_formats(struct v4l2_subdev *sd,
 		}
 	};
 
-	return csiphy_set_format(sd, fh ? fh->pad : NULL, &format);
+	return csiphy_set_format(sd, fh ? fh->state : NULL, &format);
 }
 
 /*
diff --git a/drivers/media/platform/qcom/camss/camss-ispif.c b/drivers/media/platform/qcom/camss/camss-ispif.c
index 1b716182d35c0..ba5d65f6ef34b 100644
--- a/drivers/media/platform/qcom/camss/camss-ispif.c
+++ b/drivers/media/platform/qcom/camss/camss-ispif.c
@@ -874,12 +874,13 @@ static int ispif_set_stream(struct v4l2_subdev *sd, int enable)
  */
 static struct v4l2_mbus_framefmt *
 __ispif_get_format(struct ispif_line *line,
-		   struct v4l2_subdev_pad_config *cfg,
+		   struct v4l2_subdev_state *sd_state,
 		   unsigned int pad,
 		   enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&line->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&line->subdev, sd_state,
+						  pad);
 
 	return &line->fmt[pad];
 }
@@ -893,7 +894,7 @@ __ispif_get_format(struct ispif_line *line,
  * @which: wanted subdev format
  */
 static void ispif_try_format(struct ispif_line *line,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     unsigned int pad,
 			     struct v4l2_mbus_framefmt *fmt,
 			     enum v4l2_subdev_format_whence which)
@@ -923,7 +924,7 @@ static void ispif_try_format(struct ispif_line *line,
 	case MSM_ISPIF_PAD_SRC:
 		/* Set and return a format same as sink pad */
 
-		*fmt = *__ispif_get_format(line, cfg, MSM_ISPIF_PAD_SINK,
+		*fmt = *__ispif_get_format(line, sd_state, MSM_ISPIF_PAD_SINK,
 					   which);
 
 		break;
@@ -940,7 +941,7 @@ static void ispif_try_format(struct ispif_line *line,
  * return -EINVAL or zero on success
  */
 static int ispif_enum_mbus_code(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct ispif_line *line = v4l2_get_subdevdata(sd);
@@ -955,7 +956,8 @@ static int ispif_enum_mbus_code(struct v4l2_subdev *sd,
 		if (code->index > 0)
 			return -EINVAL;
 
-		format = __ispif_get_format(line, cfg, MSM_ISPIF_PAD_SINK,
+		format = __ispif_get_format(line, sd_state,
+					    MSM_ISPIF_PAD_SINK,
 					    code->which);
 
 		code->code = format->code;
@@ -972,7 +974,7 @@ static int ispif_enum_mbus_code(struct v4l2_subdev *sd,
  * return -EINVAL or zero on success
  */
 static int ispif_enum_frame_size(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct ispif_line *line = v4l2_get_subdevdata(sd);
@@ -984,7 +986,7 @@ static int ispif_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	ispif_try_format(line, cfg, fse->pad, &format, fse->which);
+	ispif_try_format(line, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -994,7 +996,7 @@ static int ispif_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	ispif_try_format(line, cfg, fse->pad, &format, fse->which);
+	ispif_try_format(line, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -1010,13 +1012,13 @@ static int ispif_enum_frame_size(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int ispif_get_format(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct ispif_line *line = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __ispif_get_format(line, cfg, fmt->pad, fmt->which);
+	format = __ispif_get_format(line, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
@@ -1034,26 +1036,26 @@ static int ispif_get_format(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int ispif_set_format(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct ispif_line *line = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __ispif_get_format(line, cfg, fmt->pad, fmt->which);
+	format = __ispif_get_format(line, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
-	ispif_try_format(line, cfg, fmt->pad, &fmt->format, fmt->which);
+	ispif_try_format(line, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == MSM_ISPIF_PAD_SINK) {
-		format = __ispif_get_format(line, cfg, MSM_ISPIF_PAD_SRC,
+		format = __ispif_get_format(line, sd_state, MSM_ISPIF_PAD_SRC,
 					    fmt->which);
 
 		*format = fmt->format;
-		ispif_try_format(line, cfg, MSM_ISPIF_PAD_SRC, format,
+		ispif_try_format(line, sd_state, MSM_ISPIF_PAD_SRC, format,
 				 fmt->which);
 	}
 
@@ -1082,7 +1084,7 @@ static int ispif_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 		}
 	};
 
-	return ispif_set_format(sd, fh ? fh->pad : NULL, &format);
+	return ispif_set_format(sd, fh ? fh->state : NULL, &format);
 }
 
 /*
diff --git a/drivers/media/platform/qcom/camss/camss-vfe.c b/drivers/media/platform/qcom/camss/camss-vfe.c
index 27ab20c5b57eb..e0f3a36f3f3f9 100644
--- a/drivers/media/platform/qcom/camss/camss-vfe.c
+++ b/drivers/media/platform/qcom/camss/camss-vfe.c
@@ -763,12 +763,13 @@ static int vfe_set_stream(struct v4l2_subdev *sd, int enable)
  */
 static struct v4l2_mbus_framefmt *
 __vfe_get_format(struct vfe_line *line,
-		 struct v4l2_subdev_pad_config *cfg,
+		 struct v4l2_subdev_state *sd_state,
 		 unsigned int pad,
 		 enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&line->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&line->subdev, sd_state,
+						  pad);
 
 	return &line->fmt[pad];
 }
@@ -783,11 +784,11 @@ __vfe_get_format(struct vfe_line *line,
  */
 static struct v4l2_rect *
 __vfe_get_compose(struct vfe_line *line,
-		  struct v4l2_subdev_pad_config *cfg,
+		  struct v4l2_subdev_state *sd_state,
 		  enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_compose(&line->subdev, cfg,
+		return v4l2_subdev_get_try_compose(&line->subdev, sd_state,
 						   MSM_VFE_PAD_SINK);
 
 	return &line->compose;
@@ -803,11 +804,11 @@ __vfe_get_compose(struct vfe_line *line,
  */
 static struct v4l2_rect *
 __vfe_get_crop(struct vfe_line *line,
-	       struct v4l2_subdev_pad_config *cfg,
+	       struct v4l2_subdev_state *sd_state,
 	       enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_crop(&line->subdev, cfg,
+		return v4l2_subdev_get_try_crop(&line->subdev, sd_state,
 						MSM_VFE_PAD_SRC);
 
 	return &line->crop;
@@ -822,7 +823,7 @@ __vfe_get_crop(struct vfe_line *line,
  * @which: wanted subdev format
  */
 static void vfe_try_format(struct vfe_line *line,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   unsigned int pad,
 			   struct v4l2_mbus_framefmt *fmt,
 			   enum v4l2_subdev_format_whence which)
@@ -854,14 +855,15 @@ static void vfe_try_format(struct vfe_line *line,
 		/* Set and return a format same as sink pad */
 		code = fmt->code;
 
-		*fmt = *__vfe_get_format(line, cfg, MSM_VFE_PAD_SINK, which);
+		*fmt = *__vfe_get_format(line, sd_state, MSM_VFE_PAD_SINK,
+					 which);
 
 		fmt->code = vfe_src_pad_code(line, fmt->code, 0, code);
 
 		if (line->id == VFE_LINE_PIX) {
 			struct v4l2_rect *rect;
 
-			rect = __vfe_get_crop(line, cfg, which);
+			rect = __vfe_get_crop(line, sd_state, which);
 
 			fmt->width = rect->width;
 			fmt->height = rect->height;
@@ -881,13 +883,13 @@ static void vfe_try_format(struct vfe_line *line,
  * @which: wanted subdev format
  */
 static void vfe_try_compose(struct vfe_line *line,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_rect *rect,
 			    enum v4l2_subdev_format_whence which)
 {
 	struct v4l2_mbus_framefmt *fmt;
 
-	fmt = __vfe_get_format(line, cfg, MSM_VFE_PAD_SINK, which);
+	fmt = __vfe_get_format(line, sd_state, MSM_VFE_PAD_SINK, which);
 
 	if (rect->width > fmt->width)
 		rect->width = fmt->width;
@@ -920,13 +922,13 @@ static void vfe_try_compose(struct vfe_line *line,
  * @which: wanted subdev format
  */
 static void vfe_try_crop(struct vfe_line *line,
-			 struct v4l2_subdev_pad_config *cfg,
+			 struct v4l2_subdev_state *sd_state,
 			 struct v4l2_rect *rect,
 			 enum v4l2_subdev_format_whence which)
 {
 	struct v4l2_rect *compose;
 
-	compose = __vfe_get_compose(line, cfg, which);
+	compose = __vfe_get_compose(line, sd_state, which);
 
 	if (rect->width > compose->width)
 		rect->width = compose->width;
@@ -964,7 +966,7 @@ static void vfe_try_crop(struct vfe_line *line,
  * return -EINVAL or zero on success
  */
 static int vfe_enum_mbus_code(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct vfe_line *line = v4l2_get_subdevdata(sd);
@@ -977,7 +979,7 @@ static int vfe_enum_mbus_code(struct v4l2_subdev *sd,
 	} else {
 		struct v4l2_mbus_framefmt *sink_fmt;
 
-		sink_fmt = __vfe_get_format(line, cfg, MSM_VFE_PAD_SINK,
+		sink_fmt = __vfe_get_format(line, sd_state, MSM_VFE_PAD_SINK,
 					    code->which);
 
 		code->code = vfe_src_pad_code(line, sink_fmt->code,
@@ -998,7 +1000,7 @@ static int vfe_enum_mbus_code(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int vfe_enum_frame_size(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct vfe_line *line = v4l2_get_subdevdata(sd);
@@ -1010,7 +1012,7 @@ static int vfe_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	vfe_try_format(line, cfg, fse->pad, &format, fse->which);
+	vfe_try_format(line, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -1020,7 +1022,7 @@ static int vfe_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	vfe_try_format(line, cfg, fse->pad, &format, fse->which);
+	vfe_try_format(line, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -1036,13 +1038,13 @@ static int vfe_enum_frame_size(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int vfe_get_format(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct vfe_line *line = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __vfe_get_format(line, cfg, fmt->pad, fmt->which);
+	format = __vfe_get_format(line, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
@@ -1052,7 +1054,7 @@ static int vfe_get_format(struct v4l2_subdev *sd,
 }
 
 static int vfe_set_selection(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel);
 
 /*
@@ -1064,17 +1066,17 @@ static int vfe_set_selection(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int vfe_set_format(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct vfe_line *line = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __vfe_get_format(line, cfg, fmt->pad, fmt->which);
+	format = __vfe_get_format(line, sd_state, fmt->pad, fmt->which);
 	if (format == NULL)
 		return -EINVAL;
 
-	vfe_try_format(line, cfg, fmt->pad, &fmt->format, fmt->which);
+	vfe_try_format(line, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	if (fmt->pad == MSM_VFE_PAD_SINK) {
@@ -1082,11 +1084,11 @@ static int vfe_set_format(struct v4l2_subdev *sd,
 		int ret;
 
 		/* Propagate the format from sink to source */
-		format = __vfe_get_format(line, cfg, MSM_VFE_PAD_SRC,
+		format = __vfe_get_format(line, sd_state, MSM_VFE_PAD_SRC,
 					  fmt->which);
 
 		*format = fmt->format;
-		vfe_try_format(line, cfg, MSM_VFE_PAD_SRC, format,
+		vfe_try_format(line, sd_state, MSM_VFE_PAD_SRC, format,
 			       fmt->which);
 
 		if (line->id != VFE_LINE_PIX)
@@ -1098,7 +1100,7 @@ static int vfe_set_format(struct v4l2_subdev *sd,
 		sel.target = V4L2_SEL_TGT_COMPOSE;
 		sel.r.width = fmt->format.width;
 		sel.r.height = fmt->format.height;
-		ret = vfe_set_selection(sd, cfg, &sel);
+		ret = vfe_set_selection(sd, sd_state, &sel);
 		if (ret < 0)
 			return ret;
 	}
@@ -1115,7 +1117,7 @@ static int vfe_set_format(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int vfe_get_selection(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct vfe_line *line = v4l2_get_subdevdata(sd);
@@ -1131,7 +1133,7 @@ static int vfe_get_selection(struct v4l2_subdev *sd,
 		case V4L2_SEL_TGT_COMPOSE_BOUNDS:
 			fmt.pad = sel->pad;
 			fmt.which = sel->which;
-			ret = vfe_get_format(sd, cfg, &fmt);
+			ret = vfe_get_format(sd, sd_state, &fmt);
 			if (ret < 0)
 				return ret;
 
@@ -1141,7 +1143,7 @@ static int vfe_get_selection(struct v4l2_subdev *sd,
 			sel->r.height = fmt.format.height;
 			break;
 		case V4L2_SEL_TGT_COMPOSE:
-			rect = __vfe_get_compose(line, cfg, sel->which);
+			rect = __vfe_get_compose(line, sd_state, sel->which);
 			if (rect == NULL)
 				return -EINVAL;
 
@@ -1153,7 +1155,7 @@ static int vfe_get_selection(struct v4l2_subdev *sd,
 	else if (sel->pad == MSM_VFE_PAD_SRC)
 		switch (sel->target) {
 		case V4L2_SEL_TGT_CROP_BOUNDS:
-			rect = __vfe_get_compose(line, cfg, sel->which);
+			rect = __vfe_get_compose(line, sd_state, sel->which);
 			if (rect == NULL)
 				return -EINVAL;
 
@@ -1163,7 +1165,7 @@ static int vfe_get_selection(struct v4l2_subdev *sd,
 			sel->r.height = rect->height;
 			break;
 		case V4L2_SEL_TGT_CROP:
-			rect = __vfe_get_crop(line, cfg, sel->which);
+			rect = __vfe_get_crop(line, sd_state, sel->which);
 			if (rect == NULL)
 				return -EINVAL;
 
@@ -1185,7 +1187,7 @@ static int vfe_get_selection(struct v4l2_subdev *sd,
  * Return -EINVAL or zero on success
  */
 static int vfe_set_selection(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct vfe_line *line = v4l2_get_subdevdata(sd);
@@ -1199,11 +1201,11 @@ static int vfe_set_selection(struct v4l2_subdev *sd,
 		sel->pad == MSM_VFE_PAD_SINK) {
 		struct v4l2_subdev_selection crop = { 0 };
 
-		rect = __vfe_get_compose(line, cfg, sel->which);
+		rect = __vfe_get_compose(line, sd_state, sel->which);
 		if (rect == NULL)
 			return -EINVAL;
 
-		vfe_try_compose(line, cfg, &sel->r, sel->which);
+		vfe_try_compose(line, sd_state, &sel->r, sel->which);
 		*rect = sel->r;
 
 		/* Reset source crop selection */
@@ -1211,28 +1213,28 @@ static int vfe_set_selection(struct v4l2_subdev *sd,
 		crop.pad = MSM_VFE_PAD_SRC;
 		crop.target = V4L2_SEL_TGT_CROP;
 		crop.r = *rect;
-		ret = vfe_set_selection(sd, cfg, &crop);
+		ret = vfe_set_selection(sd, sd_state, &crop);
 	} else if (sel->target == V4L2_SEL_TGT_CROP &&
 		sel->pad == MSM_VFE_PAD_SRC) {
 		struct v4l2_subdev_format fmt = { 0 };
 
-		rect = __vfe_get_crop(line, cfg, sel->which);
+		rect = __vfe_get_crop(line, sd_state, sel->which);
 		if (rect == NULL)
 			return -EINVAL;
 
-		vfe_try_crop(line, cfg, &sel->r, sel->which);
+		vfe_try_crop(line, sd_state, &sel->r, sel->which);
 		*rect = sel->r;
 
 		/* Reset source pad format width and height */
 		fmt.which = sel->which;
 		fmt.pad = MSM_VFE_PAD_SRC;
-		ret = vfe_get_format(sd, cfg, &fmt);
+		ret = vfe_get_format(sd, sd_state, &fmt);
 		if (ret < 0)
 			return ret;
 
 		fmt.format.width = rect->width;
 		fmt.format.height = rect->height;
-		ret = vfe_set_format(sd, cfg, &fmt);
+		ret = vfe_set_format(sd, sd_state, &fmt);
 	} else {
 		ret = -EINVAL;
 	}
@@ -1262,7 +1264,7 @@ static int vfe_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 		}
 	};
 
-	return vfe_set_format(sd, fh ? fh->pad : NULL, &format);
+	return vfe_set_format(sd, fh ? fh->state : NULL, &format);
 }
 
 /*
diff --git a/drivers/media/platform/rcar-vin/rcar-csi2.c b/drivers/media/platform/rcar-vin/rcar-csi2.c
index b87d5453e4188..a128bf80e42cb 100644
--- a/drivers/media/platform/rcar-vin/rcar-csi2.c
+++ b/drivers/media/platform/rcar-vin/rcar-csi2.c
@@ -717,7 +717,7 @@ out:
 }
 
 static int rcsi2_set_pad_format(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_format *format)
 {
 	struct rcar_csi2 *priv = sd_to_csi2(sd);
@@ -729,7 +729,7 @@ static int rcsi2_set_pad_format(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE) {
 		priv->mf = format->format;
 	} else {
-		framefmt = v4l2_subdev_get_try_format(sd, cfg, 0);
+		framefmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
 		*framefmt = format->format;
 	}
 
@@ -737,7 +737,7 @@ static int rcsi2_set_pad_format(struct v4l2_subdev *sd,
 }
 
 static int rcsi2_get_pad_format(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_format *format)
 {
 	struct rcar_csi2 *priv = sd_to_csi2(sd);
@@ -745,7 +745,7 @@ static int rcsi2_get_pad_format(struct v4l2_subdev *sd,
 	if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		format->format = priv->mf;
 	else
-		format->format = *v4l2_subdev_get_try_format(sd, cfg, 0);
+		format->format = *v4l2_subdev_get_try_format(sd, sd_state, 0);
 
 	return 0;
 }
diff --git a/drivers/media/platform/rcar-vin/rcar-v4l2.c b/drivers/media/platform/rcar-vin/rcar-v4l2.c
index b1e9f86caa5cf..cca15a10c0b34 100644
--- a/drivers/media/platform/rcar-vin/rcar-v4l2.c
+++ b/drivers/media/platform/rcar-vin/rcar-v4l2.c
@@ -243,7 +243,7 @@ static int rvin_try_format(struct rvin_dev *vin, u32 which,
 			   struct v4l2_rect *src_rect)
 {
 	struct v4l2_subdev *sd = vin_to_source(vin);
-	struct v4l2_subdev_pad_config *pad_cfg;
+	struct v4l2_subdev_state *sd_state;
 	struct v4l2_subdev_format format = {
 		.which = which,
 		.pad = vin->parallel.source_pad,
@@ -252,8 +252,8 @@ static int rvin_try_format(struct rvin_dev *vin, u32 which,
 	u32 width, height;
 	int ret;
 
-	pad_cfg = v4l2_subdev_alloc_pad_config(sd);
-	if (pad_cfg == NULL)
+	sd_state = v4l2_subdev_alloc_state(sd);
+	if (sd_state == NULL)
 		return -ENOMEM;
 
 	if (!rvin_format_from_pixel(vin, pix->pixelformat))
@@ -266,7 +266,7 @@ static int rvin_try_format(struct rvin_dev *vin, u32 which,
 	width = pix->width;
 	height = pix->height;
 
-	ret = v4l2_subdev_call(sd, pad, set_fmt, pad_cfg, &format);
+	ret = v4l2_subdev_call(sd, pad, set_fmt, sd_state, &format);
 	if (ret < 0 && ret != -ENOIOCTLCMD)
 		goto done;
 	ret = 0;
@@ -288,7 +288,7 @@ static int rvin_try_format(struct rvin_dev *vin, u32 which,
 
 	rvin_format_align(vin, pix);
 done:
-	v4l2_subdev_free_pad_config(pad_cfg);
+	v4l2_subdev_free_state(sd_state);
 
 	return ret;
 }
diff --git a/drivers/media/platform/renesas-ceu.c b/drivers/media/platform/renesas-ceu.c
index 17f01b6e3fe0f..f432032c7084f 100644
--- a/drivers/media/platform/renesas-ceu.c
+++ b/drivers/media/platform/renesas-ceu.c
@@ -794,6 +794,9 @@ static int __ceu_try_fmt(struct ceu_device *ceudev, struct v4l2_format *v4l2_fmt
 	struct v4l2_pix_format_mplane *pix = &v4l2_fmt->fmt.pix_mp;
 	struct v4l2_subdev *v4l2_sd = ceu_sd->v4l2_sd;
 	struct v4l2_subdev_pad_config pad_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	const struct ceu_fmt *ceu_fmt;
 	u32 mbus_code_old;
 	u32 mbus_code;
@@ -850,13 +853,13 @@ static int __ceu_try_fmt(struct ceu_device *ceudev, struct v4l2_format *v4l2_fmt
 	 * time.
 	 */
 	sd_format.format.code = mbus_code;
-	ret = v4l2_subdev_call(v4l2_sd, pad, set_fmt, &pad_cfg, &sd_format);
+	ret = v4l2_subdev_call(v4l2_sd, pad, set_fmt, &pad_state, &sd_format);
 	if (ret) {
 		if (ret == -EINVAL) {
 			/* fallback */
 			sd_format.format.code = mbus_code_old;
 			ret = v4l2_subdev_call(v4l2_sd, pad, set_fmt,
-					       &pad_cfg, &sd_format);
+					       &pad_state, &sd_format);
 		}
 
 		if (ret)
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
index 2e5b57e3aedc9..d596bc040005f 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-isp.c
@@ -208,24 +208,30 @@ static struct v4l2_subdev *rkisp1_get_remote_sensor(struct v4l2_subdev *sd)
 
 static struct v4l2_mbus_framefmt *
 rkisp1_isp_get_pad_fmt(struct rkisp1_isp *isp,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       unsigned int pad, u32 which)
 {
+	struct v4l2_subdev_state state = {
+		.pads = isp->pad_cfg
+		};
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&isp->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&isp->sd, sd_state, pad);
 	else
-		return v4l2_subdev_get_try_format(&isp->sd, isp->pad_cfg, pad);
+		return v4l2_subdev_get_try_format(&isp->sd, &state, pad);
 }
 
 static struct v4l2_rect *
 rkisp1_isp_get_pad_crop(struct rkisp1_isp *isp,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			unsigned int pad, u32 which)
 {
+	struct v4l2_subdev_state state = {
+		.pads = isp->pad_cfg
+		};
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_crop(&isp->sd, cfg, pad);
+		return v4l2_subdev_get_try_crop(&isp->sd, sd_state, pad);
 	else
-		return v4l2_subdev_get_try_crop(&isp->sd, isp->pad_cfg, pad);
+		return v4l2_subdev_get_try_crop(&isp->sd, &state, pad);
 }
 
 /* ----------------------------------------------------------------------------
@@ -561,7 +567,7 @@ static void rkisp1_isp_start(struct rkisp1_device *rkisp1)
  */
 
 static int rkisp1_isp_enum_mbus_code(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_mbus_code_enum *code)
 {
 	unsigned int i, dir;
@@ -601,7 +607,7 @@ static int rkisp1_isp_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int rkisp1_isp_enum_frame_size(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_frame_size_enum *fse)
 {
 	const struct rkisp1_isp_mbus_info *mbus_info;
@@ -634,37 +640,37 @@ static int rkisp1_isp_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int rkisp1_isp_init_config(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg)
+				  struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_mbus_framefmt *sink_fmt, *src_fmt;
 	struct v4l2_rect *sink_crop, *src_crop;
 
-	sink_fmt = v4l2_subdev_get_try_format(sd, cfg,
+	sink_fmt = v4l2_subdev_get_try_format(sd, sd_state,
 					      RKISP1_ISP_PAD_SINK_VIDEO);
 	sink_fmt->width = RKISP1_DEFAULT_WIDTH;
 	sink_fmt->height = RKISP1_DEFAULT_HEIGHT;
 	sink_fmt->field = V4L2_FIELD_NONE;
 	sink_fmt->code = RKISP1_DEF_SINK_PAD_FMT;
 
-	sink_crop = v4l2_subdev_get_try_crop(sd, cfg,
+	sink_crop = v4l2_subdev_get_try_crop(sd, sd_state,
 					     RKISP1_ISP_PAD_SINK_VIDEO);
 	sink_crop->width = RKISP1_DEFAULT_WIDTH;
 	sink_crop->height = RKISP1_DEFAULT_HEIGHT;
 	sink_crop->left = 0;
 	sink_crop->top = 0;
 
-	src_fmt = v4l2_subdev_get_try_format(sd, cfg,
+	src_fmt = v4l2_subdev_get_try_format(sd, sd_state,
 					     RKISP1_ISP_PAD_SOURCE_VIDEO);
 	*src_fmt = *sink_fmt;
 	src_fmt->code = RKISP1_DEF_SRC_PAD_FMT;
 
-	src_crop = v4l2_subdev_get_try_crop(sd, cfg,
+	src_crop = v4l2_subdev_get_try_crop(sd, sd_state,
 					    RKISP1_ISP_PAD_SOURCE_VIDEO);
 	*src_crop = *sink_crop;
 
-	sink_fmt = v4l2_subdev_get_try_format(sd, cfg,
+	sink_fmt = v4l2_subdev_get_try_format(sd, sd_state,
 					      RKISP1_ISP_PAD_SINK_PARAMS);
-	src_fmt = v4l2_subdev_get_try_format(sd, cfg,
+	src_fmt = v4l2_subdev_get_try_format(sd, sd_state,
 					     RKISP1_ISP_PAD_SOURCE_STATS);
 	sink_fmt->width = 0;
 	sink_fmt->height = 0;
@@ -676,7 +682,7 @@ static int rkisp1_isp_init_config(struct v4l2_subdev *sd,
 }
 
 static void rkisp1_isp_set_src_fmt(struct rkisp1_isp *isp,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_mbus_framefmt *format,
 				   unsigned int which)
 {
@@ -684,9 +690,9 @@ static void rkisp1_isp_set_src_fmt(struct rkisp1_isp *isp,
 	struct v4l2_mbus_framefmt *src_fmt;
 	const struct v4l2_rect *src_crop;
 
-	src_fmt = rkisp1_isp_get_pad_fmt(isp, cfg,
+	src_fmt = rkisp1_isp_get_pad_fmt(isp, sd_state,
 					 RKISP1_ISP_PAD_SOURCE_VIDEO, which);
-	src_crop = rkisp1_isp_get_pad_crop(isp, cfg,
+	src_crop = rkisp1_isp_get_pad_crop(isp, sd_state,
 					   RKISP1_ISP_PAD_SOURCE_VIDEO, which);
 
 	src_fmt->code = format->code;
@@ -717,17 +723,17 @@ static void rkisp1_isp_set_src_fmt(struct rkisp1_isp *isp,
 }
 
 static void rkisp1_isp_set_src_crop(struct rkisp1_isp *isp,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_rect *r, unsigned int which)
 {
 	struct v4l2_mbus_framefmt *src_fmt;
 	const struct v4l2_rect *sink_crop;
 	struct v4l2_rect *src_crop;
 
-	src_crop = rkisp1_isp_get_pad_crop(isp, cfg,
+	src_crop = rkisp1_isp_get_pad_crop(isp, sd_state,
 					   RKISP1_ISP_PAD_SOURCE_VIDEO,
 					   which);
-	sink_crop = rkisp1_isp_get_pad_crop(isp, cfg,
+	sink_crop = rkisp1_isp_get_pad_crop(isp, sd_state,
 					    RKISP1_ISP_PAD_SINK_VIDEO,
 					    which);
 
@@ -740,21 +746,23 @@ static void rkisp1_isp_set_src_crop(struct rkisp1_isp *isp,
 	*r = *src_crop;
 
 	/* Propagate to out format */
-	src_fmt = rkisp1_isp_get_pad_fmt(isp, cfg,
+	src_fmt = rkisp1_isp_get_pad_fmt(isp, sd_state,
 					 RKISP1_ISP_PAD_SOURCE_VIDEO, which);
-	rkisp1_isp_set_src_fmt(isp, cfg, src_fmt, which);
+	rkisp1_isp_set_src_fmt(isp, sd_state, src_fmt, which);
 }
 
 static void rkisp1_isp_set_sink_crop(struct rkisp1_isp *isp,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_rect *r, unsigned int which)
 {
 	struct v4l2_rect *sink_crop, *src_crop;
 	struct v4l2_mbus_framefmt *sink_fmt;
 
-	sink_crop = rkisp1_isp_get_pad_crop(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
+	sink_crop = rkisp1_isp_get_pad_crop(isp, sd_state,
+					    RKISP1_ISP_PAD_SINK_VIDEO,
 					    which);
-	sink_fmt = rkisp1_isp_get_pad_fmt(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
+	sink_fmt = rkisp1_isp_get_pad_fmt(isp, sd_state,
+					  RKISP1_ISP_PAD_SINK_VIDEO,
 					  which);
 
 	sink_crop->left = ALIGN(r->left, 2);
@@ -766,13 +774,13 @@ static void rkisp1_isp_set_sink_crop(struct rkisp1_isp *isp,
 	*r = *sink_crop;
 
 	/* Propagate to out crop */
-	src_crop = rkisp1_isp_get_pad_crop(isp, cfg,
+	src_crop = rkisp1_isp_get_pad_crop(isp, sd_state,
 					   RKISP1_ISP_PAD_SOURCE_VIDEO, which);
-	rkisp1_isp_set_src_crop(isp, cfg, src_crop, which);
+	rkisp1_isp_set_src_crop(isp, sd_state, src_crop, which);
 }
 
 static void rkisp1_isp_set_sink_fmt(struct rkisp1_isp *isp,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_mbus_framefmt *format,
 				    unsigned int which)
 {
@@ -780,7 +788,8 @@ static void rkisp1_isp_set_sink_fmt(struct rkisp1_isp *isp,
 	struct v4l2_mbus_framefmt *sink_fmt;
 	struct v4l2_rect *sink_crop;
 
-	sink_fmt = rkisp1_isp_get_pad_fmt(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
+	sink_fmt = rkisp1_isp_get_pad_fmt(isp, sd_state,
+					  RKISP1_ISP_PAD_SINK_VIDEO,
 					  which);
 	sink_fmt->code = format->code;
 	mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
@@ -801,36 +810,40 @@ static void rkisp1_isp_set_sink_fmt(struct rkisp1_isp *isp,
 	*format = *sink_fmt;
 
 	/* Propagate to in crop */
-	sink_crop = rkisp1_isp_get_pad_crop(isp, cfg, RKISP1_ISP_PAD_SINK_VIDEO,
+	sink_crop = rkisp1_isp_get_pad_crop(isp, sd_state,
+					    RKISP1_ISP_PAD_SINK_VIDEO,
 					    which);
-	rkisp1_isp_set_sink_crop(isp, cfg, sink_crop, which);
+	rkisp1_isp_set_sink_crop(isp, sd_state, sink_crop, which);
 }
 
 static int rkisp1_isp_get_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
 
 	mutex_lock(&isp->ops_lock);
-	fmt->format = *rkisp1_isp_get_pad_fmt(isp, cfg, fmt->pad, fmt->which);
+	fmt->format = *rkisp1_isp_get_pad_fmt(isp, sd_state, fmt->pad,
+					      fmt->which);
 	mutex_unlock(&isp->ops_lock);
 	return 0;
 }
 
 static int rkisp1_isp_set_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
 
 	mutex_lock(&isp->ops_lock);
 	if (fmt->pad == RKISP1_ISP_PAD_SINK_VIDEO)
-		rkisp1_isp_set_sink_fmt(isp, cfg, &fmt->format, fmt->which);
+		rkisp1_isp_set_sink_fmt(isp, sd_state, &fmt->format,
+					fmt->which);
 	else if (fmt->pad == RKISP1_ISP_PAD_SOURCE_VIDEO)
-		rkisp1_isp_set_src_fmt(isp, cfg, &fmt->format, fmt->which);
+		rkisp1_isp_set_src_fmt(isp, sd_state, &fmt->format,
+				       fmt->which);
 	else
-		fmt->format = *rkisp1_isp_get_pad_fmt(isp, cfg, fmt->pad,
+		fmt->format = *rkisp1_isp_get_pad_fmt(isp, sd_state, fmt->pad,
 						      fmt->which);
 
 	mutex_unlock(&isp->ops_lock);
@@ -838,7 +851,7 @@ static int rkisp1_isp_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int rkisp1_isp_get_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_selection *sel)
 {
 	struct rkisp1_isp *isp = container_of(sd, struct rkisp1_isp, sd);
@@ -854,20 +867,20 @@ static int rkisp1_isp_get_selection(struct v4l2_subdev *sd,
 		if (sel->pad == RKISP1_ISP_PAD_SINK_VIDEO) {
 			struct v4l2_mbus_framefmt *fmt;
 
-			fmt = rkisp1_isp_get_pad_fmt(isp, cfg, sel->pad,
+			fmt = rkisp1_isp_get_pad_fmt(isp, sd_state, sel->pad,
 						     sel->which);
 			sel->r.height = fmt->height;
 			sel->r.width = fmt->width;
 			sel->r.left = 0;
 			sel->r.top = 0;
 		} else {
-			sel->r = *rkisp1_isp_get_pad_crop(isp, cfg,
-						RKISP1_ISP_PAD_SINK_VIDEO,
-						sel->which);
+			sel->r = *rkisp1_isp_get_pad_crop(isp, sd_state,
+							  RKISP1_ISP_PAD_SINK_VIDEO,
+							  sel->which);
 		}
 		break;
 	case V4L2_SEL_TGT_CROP:
-		sel->r = *rkisp1_isp_get_pad_crop(isp, cfg, sel->pad,
+		sel->r = *rkisp1_isp_get_pad_crop(isp, sd_state, sel->pad,
 						  sel->which);
 		break;
 	default:
@@ -878,7 +891,7 @@ static int rkisp1_isp_get_selection(struct v4l2_subdev *sd,
 }
 
 static int rkisp1_isp_set_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_selection *sel)
 {
 	struct rkisp1_device *rkisp1 =
@@ -893,9 +906,9 @@ static int rkisp1_isp_set_selection(struct v4l2_subdev *sd,
 		sel->pad, sel->r.left, sel->r.top, sel->r.width, sel->r.height);
 	mutex_lock(&isp->ops_lock);
 	if (sel->pad == RKISP1_ISP_PAD_SINK_VIDEO)
-		rkisp1_isp_set_sink_crop(isp, cfg, &sel->r, sel->which);
+		rkisp1_isp_set_sink_crop(isp, sd_state, &sel->r, sel->which);
 	else if (sel->pad == RKISP1_ISP_PAD_SOURCE_VIDEO)
-		rkisp1_isp_set_src_crop(isp, cfg, &sel->r, sel->which);
+		rkisp1_isp_set_src_crop(isp, sd_state, &sel->r, sel->which);
 	else
 		ret = -EINVAL;
 
@@ -1037,6 +1050,9 @@ static const struct v4l2_subdev_ops rkisp1_isp_ops = {
 
 int rkisp1_isp_register(struct rkisp1_device *rkisp1)
 {
+	struct v4l2_subdev_state state = {
+		.pads = rkisp1->isp.pad_cfg
+		};
 	struct rkisp1_isp *isp = &rkisp1->isp;
 	struct media_pad *pads = isp->pads;
 	struct v4l2_subdev *sd = &isp->sd;
@@ -1069,7 +1085,7 @@ int rkisp1_isp_register(struct rkisp1_device *rkisp1)
 		goto err_cleanup_media_entity;
 	}
 
-	rkisp1_isp_init_config(sd, rkisp1->isp.pad_cfg);
+	rkisp1_isp_init_config(sd, &state);
 	return 0;
 
 err_cleanup_media_entity:
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
index 79deed8adceab..2070f4b067059 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-resizer.c
@@ -180,24 +180,30 @@ static const struct rkisp1_rsz_config rkisp1_rsz_config_sp = {
 
 static struct v4l2_mbus_framefmt *
 rkisp1_rsz_get_pad_fmt(struct rkisp1_resizer *rsz,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       unsigned int pad, u32 which)
 {
+	struct v4l2_subdev_state state = {
+		.pads = rsz->pad_cfg
+		};
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&rsz->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&rsz->sd, sd_state, pad);
 	else
-		return v4l2_subdev_get_try_format(&rsz->sd, rsz->pad_cfg, pad);
+		return v4l2_subdev_get_try_format(&rsz->sd, &state, pad);
 }
 
 static struct v4l2_rect *
 rkisp1_rsz_get_pad_crop(struct rkisp1_resizer *rsz,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			unsigned int pad, u32 which)
 {
+	struct v4l2_subdev_state state = {
+		.pads = rsz->pad_cfg
+		};
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_crop(&rsz->sd, cfg, pad);
+		return v4l2_subdev_get_try_crop(&rsz->sd, sd_state, pad);
 	else
-		return v4l2_subdev_get_try_crop(&rsz->sd, rsz->pad_cfg, pad);
+		return v4l2_subdev_get_try_crop(&rsz->sd, &state, pad);
 }
 
 /* ----------------------------------------------------------------------------
@@ -451,12 +457,15 @@ static void rkisp1_rsz_config(struct rkisp1_resizer *rsz,
  */
 
 static int rkisp1_rsz_enum_mbus_code(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct rkisp1_resizer *rsz =
 		container_of(sd, struct rkisp1_resizer, sd);
 	struct v4l2_subdev_pad_config dummy_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &dummy_cfg
+		};
 	u32 pad = code->pad;
 	int ret;
 
@@ -481,7 +490,7 @@ static int rkisp1_rsz_enum_mbus_code(struct v4l2_subdev *sd,
 	/* supported mbus codes on the sink pad are the same as isp src pad */
 	code->pad = RKISP1_ISP_PAD_SOURCE_VIDEO;
 	ret = v4l2_subdev_call(&rsz->rkisp1->isp.sd, pad, enum_mbus_code,
-			       &dummy_cfg, code);
+			       &pad_state, code);
 
 	/* restore pad */
 	code->pad = pad;
@@ -490,24 +499,27 @@ static int rkisp1_rsz_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int rkisp1_rsz_init_config(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg)
+				  struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_mbus_framefmt *sink_fmt, *src_fmt;
 	struct v4l2_rect *sink_crop;
 
-	sink_fmt = v4l2_subdev_get_try_format(sd, cfg, RKISP1_RSZ_PAD_SRC);
+	sink_fmt = v4l2_subdev_get_try_format(sd, sd_state,
+					      RKISP1_RSZ_PAD_SRC);
 	sink_fmt->width = RKISP1_DEFAULT_WIDTH;
 	sink_fmt->height = RKISP1_DEFAULT_HEIGHT;
 	sink_fmt->field = V4L2_FIELD_NONE;
 	sink_fmt->code = RKISP1_DEF_FMT;
 
-	sink_crop = v4l2_subdev_get_try_crop(sd, cfg, RKISP1_RSZ_PAD_SINK);
+	sink_crop = v4l2_subdev_get_try_crop(sd, sd_state,
+					     RKISP1_RSZ_PAD_SINK);
 	sink_crop->width = RKISP1_DEFAULT_WIDTH;
 	sink_crop->height = RKISP1_DEFAULT_HEIGHT;
 	sink_crop->left = 0;
 	sink_crop->top = 0;
 
-	src_fmt = v4l2_subdev_get_try_format(sd, cfg, RKISP1_RSZ_PAD_SINK);
+	src_fmt = v4l2_subdev_get_try_format(sd, sd_state,
+					     RKISP1_RSZ_PAD_SINK);
 	*src_fmt = *sink_fmt;
 
 	/* NOTE: there is no crop in the source pad, only in the sink */
@@ -516,15 +528,17 @@ static int rkisp1_rsz_init_config(struct v4l2_subdev *sd,
 }
 
 static void rkisp1_rsz_set_src_fmt(struct rkisp1_resizer *rsz,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_mbus_framefmt *format,
 				   unsigned int which)
 {
 	const struct rkisp1_isp_mbus_info *sink_mbus_info;
 	struct v4l2_mbus_framefmt *src_fmt, *sink_fmt;
 
-	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK, which);
-	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SRC, which);
+	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, sd_state, RKISP1_RSZ_PAD_SINK,
+					  which);
+	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, sd_state, RKISP1_RSZ_PAD_SRC,
+					 which);
 	sink_mbus_info = rkisp1_isp_mbus_info_get(sink_fmt->code);
 
 	/* for YUV formats, userspace can change the mbus code on the src pad if it is supported */
@@ -543,7 +557,7 @@ static void rkisp1_rsz_set_src_fmt(struct rkisp1_resizer *rsz,
 }
 
 static void rkisp1_rsz_set_sink_crop(struct rkisp1_resizer *rsz,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_rect *r,
 				     unsigned int which)
 {
@@ -551,8 +565,10 @@ static void rkisp1_rsz_set_sink_crop(struct rkisp1_resizer *rsz,
 	struct v4l2_mbus_framefmt *sink_fmt;
 	struct v4l2_rect *sink_crop;
 
-	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK, which);
-	sink_crop = rkisp1_rsz_get_pad_crop(rsz, cfg, RKISP1_RSZ_PAD_SINK,
+	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, sd_state, RKISP1_RSZ_PAD_SINK,
+					  which);
+	sink_crop = rkisp1_rsz_get_pad_crop(rsz, sd_state,
+					    RKISP1_RSZ_PAD_SINK,
 					    which);
 
 	/* Not crop for MP bayer raw data */
@@ -579,7 +595,7 @@ static void rkisp1_rsz_set_sink_crop(struct rkisp1_resizer *rsz,
 }
 
 static void rkisp1_rsz_set_sink_fmt(struct rkisp1_resizer *rsz,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_mbus_framefmt *format,
 				    unsigned int which)
 {
@@ -587,9 +603,12 @@ static void rkisp1_rsz_set_sink_fmt(struct rkisp1_resizer *rsz,
 	struct v4l2_mbus_framefmt *sink_fmt, *src_fmt;
 	struct v4l2_rect *sink_crop;
 
-	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK, which);
-	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SRC, which);
-	sink_crop = rkisp1_rsz_get_pad_crop(rsz, cfg, RKISP1_RSZ_PAD_SINK,
+	sink_fmt = rkisp1_rsz_get_pad_fmt(rsz, sd_state, RKISP1_RSZ_PAD_SINK,
+					  which);
+	src_fmt = rkisp1_rsz_get_pad_fmt(rsz, sd_state, RKISP1_RSZ_PAD_SRC,
+					 which);
+	sink_crop = rkisp1_rsz_get_pad_crop(rsz, sd_state,
+					    RKISP1_RSZ_PAD_SINK,
 					    which);
 	if (rsz->id == RKISP1_SELFPATH)
 		sink_fmt->code = MEDIA_BUS_FMT_YUYV8_2X8;
@@ -617,24 +636,25 @@ static void rkisp1_rsz_set_sink_fmt(struct rkisp1_resizer *rsz,
 	*format = *sink_fmt;
 
 	/* Update sink crop */
-	rkisp1_rsz_set_sink_crop(rsz, cfg, sink_crop, which);
+	rkisp1_rsz_set_sink_crop(rsz, sd_state, sink_crop, which);
 }
 
 static int rkisp1_rsz_get_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct rkisp1_resizer *rsz =
 		container_of(sd, struct rkisp1_resizer, sd);
 
 	mutex_lock(&rsz->ops_lock);
-	fmt->format = *rkisp1_rsz_get_pad_fmt(rsz, cfg, fmt->pad, fmt->which);
+	fmt->format = *rkisp1_rsz_get_pad_fmt(rsz, sd_state, fmt->pad,
+					      fmt->which);
 	mutex_unlock(&rsz->ops_lock);
 	return 0;
 }
 
 static int rkisp1_rsz_set_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct rkisp1_resizer *rsz =
@@ -642,16 +662,18 @@ static int rkisp1_rsz_set_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&rsz->ops_lock);
 	if (fmt->pad == RKISP1_RSZ_PAD_SINK)
-		rkisp1_rsz_set_sink_fmt(rsz, cfg, &fmt->format, fmt->which);
+		rkisp1_rsz_set_sink_fmt(rsz, sd_state, &fmt->format,
+					fmt->which);
 	else
-		rkisp1_rsz_set_src_fmt(rsz, cfg, &fmt->format, fmt->which);
+		rkisp1_rsz_set_src_fmt(rsz, sd_state, &fmt->format,
+				       fmt->which);
 
 	mutex_unlock(&rsz->ops_lock);
 	return 0;
 }
 
 static int rkisp1_rsz_get_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_selection *sel)
 {
 	struct rkisp1_resizer *rsz =
@@ -665,7 +687,8 @@ static int rkisp1_rsz_get_selection(struct v4l2_subdev *sd,
 	mutex_lock(&rsz->ops_lock);
 	switch (sel->target) {
 	case V4L2_SEL_TGT_CROP_BOUNDS:
-		mf_sink = rkisp1_rsz_get_pad_fmt(rsz, cfg, RKISP1_RSZ_PAD_SINK,
+		mf_sink = rkisp1_rsz_get_pad_fmt(rsz, sd_state,
+						 RKISP1_RSZ_PAD_SINK,
 						 sel->which);
 		sel->r.height = mf_sink->height;
 		sel->r.width = mf_sink->width;
@@ -673,7 +696,8 @@ static int rkisp1_rsz_get_selection(struct v4l2_subdev *sd,
 		sel->r.top = 0;
 		break;
 	case V4L2_SEL_TGT_CROP:
-		sel->r = *rkisp1_rsz_get_pad_crop(rsz, cfg, RKISP1_RSZ_PAD_SINK,
+		sel->r = *rkisp1_rsz_get_pad_crop(rsz, sd_state,
+						  RKISP1_RSZ_PAD_SINK,
 						  sel->which);
 		break;
 	default:
@@ -685,7 +709,7 @@ static int rkisp1_rsz_get_selection(struct v4l2_subdev *sd,
 }
 
 static int rkisp1_rsz_set_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_selection *sel)
 {
 	struct rkisp1_resizer *rsz =
@@ -698,7 +722,7 @@ static int rkisp1_rsz_set_selection(struct v4l2_subdev *sd,
 		sel->pad, sel->r.left, sel->r.top, sel->r.width, sel->r.height);
 
 	mutex_lock(&rsz->ops_lock);
-	rkisp1_rsz_set_sink_crop(rsz, cfg, &sel->r, sel->which);
+	rkisp1_rsz_set_sink_crop(rsz, sd_state, &sel->r, sel->which);
 	mutex_unlock(&rsz->ops_lock);
 
 	return 0;
@@ -764,6 +788,9 @@ static void rkisp1_rsz_unregister(struct rkisp1_resizer *rsz)
 
 static int rkisp1_rsz_register(struct rkisp1_resizer *rsz)
 {
+	struct v4l2_subdev_state state = {
+		.pads = rsz->pad_cfg
+		};
 	static const char * const dev_names[] = {
 		RKISP1_RSZ_MP_DEV_NAME,
 		RKISP1_RSZ_SP_DEV_NAME
@@ -802,7 +829,7 @@ static int rkisp1_rsz_register(struct rkisp1_resizer *rsz)
 		goto err_cleanup_media_entity;
 	}
 
-	rkisp1_rsz_init_config(sd, rsz->pad_cfg);
+	rkisp1_rsz_init_config(sd, &state);
 	return 0;
 
 err_cleanup_media_entity:
diff --git a/drivers/media/platform/s3c-camif/camif-capture.c b/drivers/media/platform/s3c-camif/camif-capture.c
index 62241ec3b978d..140854ab4dd8c 100644
--- a/drivers/media/platform/s3c-camif/camif-capture.c
+++ b/drivers/media/platform/s3c-camif/camif-capture.c
@@ -1199,7 +1199,7 @@ static const u32 camif_mbus_formats[] = {
  */
 
 static int s3c_camif_subdev_enum_mbus_code(struct v4l2_subdev *sd,
-					struct v4l2_subdev_pad_config *cfg,
+					struct v4l2_subdev_state *sd_state,
 					struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(camif_mbus_formats))
@@ -1210,14 +1210,14 @@ static int s3c_camif_subdev_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int s3c_camif_subdev_get_fmt(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	struct camif_dev *camif = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *mf = &fmt->format;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		fmt->format = *mf;
 		return 0;
 	}
@@ -1278,7 +1278,7 @@ static void __camif_subdev_try_format(struct camif_dev *camif,
 }
 
 static int s3c_camif_subdev_set_fmt(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	struct camif_dev *camif = v4l2_get_subdevdata(sd);
@@ -1306,7 +1306,7 @@ static int s3c_camif_subdev_set_fmt(struct v4l2_subdev *sd,
 	__camif_subdev_try_format(camif, mf, fmt->pad);
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 		*mf = fmt->format;
 		mutex_unlock(&camif->lock);
 		return 0;
@@ -1345,7 +1345,7 @@ static int s3c_camif_subdev_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int s3c_camif_subdev_get_selection(struct v4l2_subdev *sd,
-					  struct v4l2_subdev_pad_config *cfg,
+					  struct v4l2_subdev_state *sd_state,
 					  struct v4l2_subdev_selection *sel)
 {
 	struct camif_dev *camif = v4l2_get_subdevdata(sd);
@@ -1358,7 +1358,7 @@ static int s3c_camif_subdev_get_selection(struct v4l2_subdev *sd,
 		return -EINVAL;
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
-		sel->r = *v4l2_subdev_get_try_crop(sd, cfg, sel->pad);
+		sel->r = *v4l2_subdev_get_try_crop(sd, sd_state, sel->pad);
 		return 0;
 	}
 
@@ -1432,7 +1432,7 @@ static void __camif_try_crop(struct camif_dev *camif, struct v4l2_rect *r)
 }
 
 static int s3c_camif_subdev_set_selection(struct v4l2_subdev *sd,
-					  struct v4l2_subdev_pad_config *cfg,
+					  struct v4l2_subdev_state *sd_state,
 					  struct v4l2_subdev_selection *sel)
 {
 	struct camif_dev *camif = v4l2_get_subdevdata(sd);
@@ -1446,7 +1446,7 @@ static int s3c_camif_subdev_set_selection(struct v4l2_subdev *sd,
 	__camif_try_crop(camif, &sel->r);
 
 	if (sel->which == V4L2_SUBDEV_FORMAT_TRY) {
-		*v4l2_subdev_get_try_crop(sd, cfg, sel->pad) = sel->r;
+		*v4l2_subdev_get_try_crop(sd, sd_state, sel->pad) = sel->r;
 	} else {
 		unsigned long flags;
 		unsigned int i;
diff --git a/drivers/media/platform/stm32/stm32-dcmi.c b/drivers/media/platform/stm32/stm32-dcmi.c
index b33c6e7ae0a1b..d914ccef98317 100644
--- a/drivers/media/platform/stm32/stm32-dcmi.c
+++ b/drivers/media/platform/stm32/stm32-dcmi.c
@@ -600,7 +600,7 @@ static struct media_entity *dcmi_find_source(struct stm32_dcmi *dcmi)
 }
 
 static int dcmi_pipeline_s_fmt(struct stm32_dcmi *dcmi,
-			       struct v4l2_subdev_pad_config *pad_cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *format)
 {
 	struct media_entity *entity = &dcmi->source->entity;
@@ -642,7 +642,7 @@ static int dcmi_pipeline_s_fmt(struct stm32_dcmi *dcmi,
 			format->format.width, format->format.height);
 
 		fmt.pad = pad->index;
-		ret = v4l2_subdev_call(subdev, pad, set_fmt, pad_cfg, &fmt);
+		ret = v4l2_subdev_call(subdev, pad, set_fmt, sd_state, &fmt);
 		if (ret < 0) {
 			dev_err(dcmi->dev, "%s: Failed to set format 0x%x %ux%u on \"%s\":%d pad (%d)\n",
 				__func__, format->format.code,
@@ -978,6 +978,9 @@ static int dcmi_try_fmt(struct stm32_dcmi *dcmi, struct v4l2_format *f,
 	struct dcmi_framesize sd_fsize;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_subdev_pad_config pad_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	struct v4l2_subdev_format format = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
@@ -1013,7 +1016,7 @@ static int dcmi_try_fmt(struct stm32_dcmi *dcmi, struct v4l2_format *f,
 
 	v4l2_fill_mbus_format(&format.format, pix, sd_fmt->mbus_code);
 	ret = v4l2_subdev_call(dcmi->source, pad, set_fmt,
-			       &pad_cfg, &format);
+			       &pad_state, &format);
 	if (ret < 0)
 		return ret;
 
@@ -1163,6 +1166,9 @@ static int dcmi_set_sensor_format(struct stm32_dcmi *dcmi,
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
 	struct v4l2_subdev_pad_config pad_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	int ret;
 
 	sd_fmt = find_format_by_fourcc(dcmi, pix->pixelformat);
@@ -1176,7 +1182,7 @@ static int dcmi_set_sensor_format(struct stm32_dcmi *dcmi,
 
 	v4l2_fill_mbus_format(&format.format, pix, sd_fmt->mbus_code);
 	ret = v4l2_subdev_call(dcmi->source, pad, set_fmt,
-			       &pad_cfg, &format);
+			       &pad_state, &format);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/media/platform/sunxi/sun4i-csi/sun4i_v4l2.c b/drivers/media/platform/sunxi/sun4i-csi/sun4i_v4l2.c
index 54b909987caa5..3872027ed2faf 100644
--- a/drivers/media/platform/sunxi/sun4i-csi/sun4i_v4l2.c
+++ b/drivers/media/platform/sunxi/sun4i-csi/sun4i_v4l2.c
@@ -271,25 +271,26 @@ static const struct v4l2_mbus_framefmt sun4i_csi_pad_fmt_default = {
 };
 
 static int sun4i_csi_subdev_init_cfg(struct v4l2_subdev *subdev,
-				     struct v4l2_subdev_pad_config *cfg)
+				     struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_mbus_framefmt *fmt;
 
-	fmt = v4l2_subdev_get_try_format(subdev, cfg, CSI_SUBDEV_SINK);
+	fmt = v4l2_subdev_get_try_format(subdev, sd_state, CSI_SUBDEV_SINK);
 	*fmt = sun4i_csi_pad_fmt_default;
 
 	return 0;
 }
 
 static int sun4i_csi_subdev_get_fmt(struct v4l2_subdev *subdev,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	struct sun4i_csi *csi = container_of(subdev, struct sun4i_csi, subdev);
 	struct v4l2_mbus_framefmt *subdev_fmt;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		subdev_fmt = v4l2_subdev_get_try_format(subdev, cfg, fmt->pad);
+		subdev_fmt = v4l2_subdev_get_try_format(subdev, sd_state,
+							fmt->pad);
 	else
 		subdev_fmt = &csi->subdev_fmt;
 
@@ -299,14 +300,15 @@ static int sun4i_csi_subdev_get_fmt(struct v4l2_subdev *subdev,
 }
 
 static int sun4i_csi_subdev_set_fmt(struct v4l2_subdev *subdev,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_format *fmt)
 {
 	struct sun4i_csi *csi = container_of(subdev, struct sun4i_csi, subdev);
 	struct v4l2_mbus_framefmt *subdev_fmt;
 
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		subdev_fmt = v4l2_subdev_get_try_format(subdev, cfg, fmt->pad);
+		subdev_fmt = v4l2_subdev_get_try_format(subdev, sd_state,
+							fmt->pad);
 	else
 		subdev_fmt = &csi->subdev_fmt;
 
@@ -325,7 +327,7 @@ static int sun4i_csi_subdev_set_fmt(struct v4l2_subdev *subdev,
 
 static int
 sun4i_csi_subdev_enum_mbus_code(struct v4l2_subdev *subdev,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_mbus_code_enum *mbus)
 {
 	if (mbus->index >= ARRAY_SIZE(sun4i_csi_formats))
diff --git a/drivers/media/platform/ti-vpe/cal-camerarx.c b/drivers/media/platform/ti-vpe/cal-camerarx.c
index cbe6114908de7..124a4e2bdefe0 100644
--- a/drivers/media/platform/ti-vpe/cal-camerarx.c
+++ b/drivers/media/platform/ti-vpe/cal-camerarx.c
@@ -586,12 +586,12 @@ static inline struct cal_camerarx *to_cal_camerarx(struct v4l2_subdev *sd)
 
 static struct v4l2_mbus_framefmt *
 cal_camerarx_get_pad_format(struct cal_camerarx *phy,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    unsigned int pad, u32 which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&phy->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&phy->subdev, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &phy->formats[pad];
 	default:
@@ -611,7 +611,7 @@ static int cal_camerarx_sd_s_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static int cal_camerarx_sd_enum_mbus_code(struct v4l2_subdev *sd,
-					  struct v4l2_subdev_pad_config *cfg,
+					  struct v4l2_subdev_state *sd_state,
 					  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct cal_camerarx *phy = to_cal_camerarx(sd);
@@ -623,7 +623,7 @@ static int cal_camerarx_sd_enum_mbus_code(struct v4l2_subdev *sd,
 		if (code->index > 0)
 			return -EINVAL;
 
-		fmt = cal_camerarx_get_pad_format(phy, cfg,
+		fmt = cal_camerarx_get_pad_format(phy, sd_state,
 						  CAL_CAMERARX_PAD_SINK,
 						  code->which);
 		code->code = fmt->code;
@@ -639,7 +639,7 @@ static int cal_camerarx_sd_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int cal_camerarx_sd_enum_frame_size(struct v4l2_subdev *sd,
-					   struct v4l2_subdev_pad_config *cfg,
+					   struct v4l2_subdev_state *sd_state,
 					   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct cal_camerarx *phy = to_cal_camerarx(sd);
@@ -652,7 +652,7 @@ static int cal_camerarx_sd_enum_frame_size(struct v4l2_subdev *sd,
 	if (fse->pad == CAL_CAMERARX_PAD_SOURCE) {
 		struct v4l2_mbus_framefmt *fmt;
 
-		fmt = cal_camerarx_get_pad_format(phy, cfg,
+		fmt = cal_camerarx_get_pad_format(phy, sd_state,
 						  CAL_CAMERARX_PAD_SINK,
 						  fse->which);
 		if (fse->code != fmt->code)
@@ -679,20 +679,21 @@ static int cal_camerarx_sd_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int cal_camerarx_sd_get_fmt(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *format)
 {
 	struct cal_camerarx *phy = to_cal_camerarx(sd);
 	struct v4l2_mbus_framefmt *fmt;
 
-	fmt = cal_camerarx_get_pad_format(phy, cfg, format->pad, format->which);
+	fmt = cal_camerarx_get_pad_format(phy, sd_state, format->pad,
+					  format->which);
 	format->format = *fmt;
 
 	return 0;
 }
 
 static int cal_camerarx_sd_set_fmt(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_format *format)
 {
 	struct cal_camerarx *phy = to_cal_camerarx(sd);
@@ -702,7 +703,7 @@ static int cal_camerarx_sd_set_fmt(struct v4l2_subdev *sd,
 
 	/* No transcoding, source and sink formats must match. */
 	if (format->pad == CAL_CAMERARX_PAD_SOURCE)
-		return cal_camerarx_sd_get_fmt(sd, cfg, format);
+		return cal_camerarx_sd_get_fmt(sd, sd_state, format);
 
 	/*
 	 * Default to the first format is the requested media bus code isn't
@@ -727,11 +728,13 @@ static int cal_camerarx_sd_set_fmt(struct v4l2_subdev *sd,
 	format->format.code = fmtinfo->code;
 
 	/* Store the format and propagate it to the source pad. */
-	fmt = cal_camerarx_get_pad_format(phy, cfg, CAL_CAMERARX_PAD_SINK,
+	fmt = cal_camerarx_get_pad_format(phy, sd_state,
+					  CAL_CAMERARX_PAD_SINK,
 					  format->which);
 	*fmt = format->format;
 
-	fmt = cal_camerarx_get_pad_format(phy, cfg, CAL_CAMERARX_PAD_SOURCE,
+	fmt = cal_camerarx_get_pad_format(phy, sd_state,
+					  CAL_CAMERARX_PAD_SOURCE,
 					  format->which);
 	*fmt = format->format;
 
@@ -742,11 +745,11 @@ static int cal_camerarx_sd_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int cal_camerarx_sd_init_cfg(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg)
+				    struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_subdev_format format = {
-		.which = cfg ? V4L2_SUBDEV_FORMAT_TRY
-		       : V4L2_SUBDEV_FORMAT_ACTIVE,
+		.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY
+		: V4L2_SUBDEV_FORMAT_ACTIVE,
 		.pad = CAL_CAMERARX_PAD_SINK,
 		.format = {
 			.width = 640,
@@ -760,7 +763,7 @@ static int cal_camerarx_sd_init_cfg(struct v4l2_subdev *sd,
 		},
 	};
 
-	return cal_camerarx_sd_set_fmt(sd, cfg, &format);
+	return cal_camerarx_sd_set_fmt(sd, sd_state, &format);
 }
 
 static const struct v4l2_subdev_video_ops cal_camerarx_video_ops = {
diff --git a/drivers/media/platform/via-camera.c b/drivers/media/platform/via-camera.c
index ed0ad68c5c483..3655573e8581d 100644
--- a/drivers/media/platform/via-camera.c
+++ b/drivers/media/platform/via-camera.c
@@ -844,6 +844,9 @@ static int viacam_do_try_fmt(struct via_camera *cam,
 {
 	int ret;
 	struct v4l2_subdev_pad_config pad_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	struct v4l2_subdev_format format = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
@@ -852,7 +855,7 @@ static int viacam_do_try_fmt(struct via_camera *cam,
 	upix->pixelformat = f->pixelformat;
 	viacam_fmt_pre(upix, spix);
 	v4l2_fill_mbus_format(&format.format, spix, f->mbus_code);
-	ret = sensor_call(cam, pad, set_fmt, &pad_cfg, &format);
+	ret = sensor_call(cam, pad, set_fmt, &pad_state, &format);
 	v4l2_fill_pix_format(spix, &format.format);
 	viacam_fmt_post(upix, spix);
 	return ret;
diff --git a/drivers/media/platform/video-mux.c b/drivers/media/platform/video-mux.c
index 133122e385150..f7e2a5e48ccf0 100644
--- a/drivers/media/platform/video-mux.c
+++ b/drivers/media/platform/video-mux.c
@@ -140,14 +140,14 @@ static const struct v4l2_subdev_video_ops video_mux_subdev_video_ops = {
 
 static struct v4l2_mbus_framefmt *
 __video_mux_get_pad_format(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   unsigned int pad, u32 which)
 {
 	struct video_mux *vmux = v4l2_subdev_to_video_mux(sd);
 
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(sd, cfg, pad);
+		return v4l2_subdev_get_try_format(sd, sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &vmux->format_mbus[pad];
 	default:
@@ -156,14 +156,15 @@ __video_mux_get_pad_format(struct v4l2_subdev *sd,
 }
 
 static int video_mux_get_format(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *sdformat)
 {
 	struct video_mux *vmux = v4l2_subdev_to_video_mux(sd);
 
 	mutex_lock(&vmux->lock);
 
-	sdformat->format = *__video_mux_get_pad_format(sd, cfg, sdformat->pad,
+	sdformat->format = *__video_mux_get_pad_format(sd, sd_state,
+						       sdformat->pad,
 						       sdformat->which);
 
 	mutex_unlock(&vmux->lock);
@@ -172,7 +173,7 @@ static int video_mux_get_format(struct v4l2_subdev *sd,
 }
 
 static int video_mux_set_format(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *sdformat)
 {
 	struct video_mux *vmux = v4l2_subdev_to_video_mux(sd);
@@ -180,12 +181,13 @@ static int video_mux_set_format(struct v4l2_subdev *sd,
 	struct media_pad *pad = &vmux->pads[sdformat->pad];
 	u16 source_pad = sd->entity.num_pads - 1;
 
-	mbusformat = __video_mux_get_pad_format(sd, cfg, sdformat->pad,
-					    sdformat->which);
+	mbusformat = __video_mux_get_pad_format(sd, sd_state, sdformat->pad,
+						sdformat->which);
 	if (!mbusformat)
 		return -EINVAL;
 
-	source_mbusformat = __video_mux_get_pad_format(sd, cfg, source_pad,
+	source_mbusformat = __video_mux_get_pad_format(sd, sd_state,
+						       source_pad,
 						       sdformat->which);
 	if (!source_mbusformat)
 		return -EINVAL;
@@ -310,7 +312,7 @@ static int video_mux_set_format(struct v4l2_subdev *sd,
 }
 
 static int video_mux_init_cfg(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg)
+			      struct v4l2_subdev_state *sd_state)
 {
 	struct video_mux *vmux = v4l2_subdev_to_video_mux(sd);
 	struct v4l2_mbus_framefmt *mbusformat;
@@ -319,7 +321,7 @@ static int video_mux_init_cfg(struct v4l2_subdev *sd,
 	mutex_lock(&vmux->lock);
 
 	for (i = 0; i < sd->entity.num_pads; i++) {
-		mbusformat = v4l2_subdev_get_try_format(sd, cfg, i);
+		mbusformat = v4l2_subdev_get_try_format(sd, sd_state, i);
 		*mbusformat = video_mux_format_mbus_default;
 	}
 
diff --git a/drivers/media/platform/vsp1/vsp1_brx.c b/drivers/media/platform/vsp1/vsp1_brx.c
index 2d86c718a5cf9..89385b4cabe57 100644
--- a/drivers/media/platform/vsp1/vsp1_brx.c
+++ b/drivers/media/platform/vsp1/vsp1_brx.c
@@ -65,7 +65,7 @@ static const struct v4l2_ctrl_ops brx_ctrl_ops = {
  */
 
 static int brx_enum_mbus_code(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
 	static const unsigned int codes[] = {
@@ -73,12 +73,12 @@ static int brx_enum_mbus_code(struct v4l2_subdev *subdev,
 		MEDIA_BUS_FMT_AYUV8_1X32,
 	};
 
-	return vsp1_subdev_enum_mbus_code(subdev, cfg, code, codes,
+	return vsp1_subdev_enum_mbus_code(subdev, sd_state, code, codes,
 					  ARRAY_SIZE(codes));
 }
 
 static int brx_enum_frame_size(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index)
@@ -97,14 +97,14 @@ static int brx_enum_frame_size(struct v4l2_subdev *subdev,
 }
 
 static struct v4l2_rect *brx_get_compose(struct vsp1_brx *brx,
-					 struct v4l2_subdev_pad_config *cfg,
+					 struct v4l2_subdev_state *sd_state,
 					 unsigned int pad)
 {
-	return v4l2_subdev_get_try_compose(&brx->entity.subdev, cfg, pad);
+	return v4l2_subdev_get_try_compose(&brx->entity.subdev, sd_state, pad);
 }
 
 static void brx_try_format(struct vsp1_brx *brx,
-			   struct v4l2_subdev_pad_config *config,
+			   struct v4l2_subdev_state *sd_state,
 			   unsigned int pad, struct v4l2_mbus_framefmt *fmt)
 {
 	struct v4l2_mbus_framefmt *format;
@@ -119,7 +119,7 @@ static void brx_try_format(struct vsp1_brx *brx,
 
 	default:
 		/* The BRx can't perform format conversion. */
-		format = vsp1_entity_get_pad_format(&brx->entity, config,
+		format = vsp1_entity_get_pad_format(&brx->entity, sd_state,
 						    BRX_PAD_SINK(0));
 		fmt->code = format->code;
 		break;
@@ -132,17 +132,18 @@ static void brx_try_format(struct vsp1_brx *brx,
 }
 
 static int brx_set_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct vsp1_brx *brx = to_brx(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
 	mutex_lock(&brx->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&brx->entity, cfg, fmt->which);
+	config = vsp1_entity_get_pad_config(&brx->entity, sd_state,
+					    fmt->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
@@ -181,11 +182,11 @@ done:
 }
 
 static int brx_get_selection(struct v4l2_subdev *subdev,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_brx *brx = to_brx(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 
 	if (sel->pad == brx->entity.source_pad)
 		return -EINVAL;
@@ -199,7 +200,7 @@ static int brx_get_selection(struct v4l2_subdev *subdev,
 		return 0;
 
 	case V4L2_SEL_TGT_COMPOSE:
-		config = vsp1_entity_get_pad_config(&brx->entity, cfg,
+		config = vsp1_entity_get_pad_config(&brx->entity, sd_state,
 						    sel->which);
 		if (!config)
 			return -EINVAL;
@@ -215,11 +216,11 @@ static int brx_get_selection(struct v4l2_subdev *subdev,
 }
 
 static int brx_set_selection(struct v4l2_subdev *subdev,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_brx *brx = to_brx(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *compose;
 	int ret = 0;
@@ -232,7 +233,8 @@ static int brx_set_selection(struct v4l2_subdev *subdev,
 
 	mutex_lock(&brx->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&brx->entity, cfg, sel->which);
+	config = vsp1_entity_get_pad_config(&brx->entity, sd_state,
+					    sel->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
diff --git a/drivers/media/platform/vsp1/vsp1_clu.c b/drivers/media/platform/vsp1/vsp1_clu.c
index a47b23bf5abf4..c5217fee24f13 100644
--- a/drivers/media/platform/vsp1/vsp1_clu.c
+++ b/drivers/media/platform/vsp1/vsp1_clu.c
@@ -123,27 +123,28 @@ static const unsigned int clu_codes[] = {
 };
 
 static int clu_enum_mbus_code(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
-	return vsp1_subdev_enum_mbus_code(subdev, cfg, code, clu_codes,
+	return vsp1_subdev_enum_mbus_code(subdev, sd_state, code, clu_codes,
 					  ARRAY_SIZE(clu_codes));
 }
 
 static int clu_enum_frame_size(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
-	return vsp1_subdev_enum_frame_size(subdev, cfg, fse, CLU_MIN_SIZE,
+	return vsp1_subdev_enum_frame_size(subdev, sd_state, fse,
+					   CLU_MIN_SIZE,
 					   CLU_MIN_SIZE, CLU_MAX_SIZE,
 					   CLU_MAX_SIZE);
 }
 
 static int clu_set_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
-	return vsp1_subdev_set_pad_format(subdev, cfg, fmt, clu_codes,
+	return vsp1_subdev_set_pad_format(subdev, sd_state, fmt, clu_codes,
 					  ARRAY_SIZE(clu_codes),
 					  CLU_MIN_SIZE, CLU_MIN_SIZE,
 					  CLU_MAX_SIZE, CLU_MAX_SIZE);
diff --git a/drivers/media/platform/vsp1/vsp1_entity.c b/drivers/media/platform/vsp1/vsp1_entity.c
index aa9d2286056eb..6f51e5c755432 100644
--- a/drivers/media/platform/vsp1/vsp1_entity.c
+++ b/drivers/media/platform/vsp1/vsp1_entity.c
@@ -103,7 +103,7 @@ void vsp1_entity_configure_partition(struct vsp1_entity *entity,
 /**
  * vsp1_entity_get_pad_config - Get the pad configuration for an entity
  * @entity: the entity
- * @cfg: the TRY pad configuration
+ * @sd_state: the TRY state
  * @which: configuration selector (ACTIVE or TRY)
  *
  * When called with which set to V4L2_SUBDEV_FORMAT_ACTIVE the caller must hold
@@ -114,9 +114,9 @@ void vsp1_entity_configure_partition(struct vsp1_entity *entity,
  * and simply returned when requested. The ACTIVE configuration comes from the
  * entity structure.
  */
-struct v4l2_subdev_pad_config *
+struct v4l2_subdev_state *
 vsp1_entity_get_pad_config(struct vsp1_entity *entity,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   enum v4l2_subdev_format_whence which)
 {
 	switch (which) {
@@ -124,14 +124,14 @@ vsp1_entity_get_pad_config(struct vsp1_entity *entity,
 		return entity->config;
 	case V4L2_SUBDEV_FORMAT_TRY:
 	default:
-		return cfg;
+		return sd_state;
 	}
 }
 
 /**
  * vsp1_entity_get_pad_format - Get a pad format from storage for an entity
  * @entity: the entity
- * @cfg: the configuration storage
+ * @sd_state: the state storage
  * @pad: the pad number
  *
  * Return the format stored in the given configuration for an entity's pad. The
@@ -139,16 +139,16 @@ vsp1_entity_get_pad_config(struct vsp1_entity *entity,
  */
 struct v4l2_mbus_framefmt *
 vsp1_entity_get_pad_format(struct vsp1_entity *entity,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   unsigned int pad)
 {
-	return v4l2_subdev_get_try_format(&entity->subdev, cfg, pad);
+	return v4l2_subdev_get_try_format(&entity->subdev, sd_state, pad);
 }
 
 /**
  * vsp1_entity_get_pad_selection - Get a pad selection from storage for entity
  * @entity: the entity
- * @cfg: the configuration storage
+ * @sd_state: the state storage
  * @pad: the pad number
  * @target: the selection target
  *
@@ -158,14 +158,16 @@ vsp1_entity_get_pad_format(struct vsp1_entity *entity,
  */
 struct v4l2_rect *
 vsp1_entity_get_pad_selection(struct vsp1_entity *entity,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      unsigned int pad, unsigned int target)
 {
 	switch (target) {
 	case V4L2_SEL_TGT_COMPOSE:
-		return v4l2_subdev_get_try_compose(&entity->subdev, cfg, pad);
+		return v4l2_subdev_get_try_compose(&entity->subdev, sd_state,
+						   pad);
 	case V4L2_SEL_TGT_CROP:
-		return v4l2_subdev_get_try_crop(&entity->subdev, cfg, pad);
+		return v4l2_subdev_get_try_crop(&entity->subdev, sd_state,
+						pad);
 	default:
 		return NULL;
 	}
@@ -180,7 +182,7 @@ vsp1_entity_get_pad_selection(struct vsp1_entity *entity,
  * function can be used as a handler for the subdev pad::init_cfg operation.
  */
 int vsp1_entity_init_cfg(struct v4l2_subdev *subdev,
-			 struct v4l2_subdev_pad_config *cfg)
+			 struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_subdev_format format;
 	unsigned int pad;
@@ -189,10 +191,10 @@ int vsp1_entity_init_cfg(struct v4l2_subdev *subdev,
 		memset(&format, 0, sizeof(format));
 
 		format.pad = pad;
-		format.which = cfg ? V4L2_SUBDEV_FORMAT_TRY
+		format.which = sd_state ? V4L2_SUBDEV_FORMAT_TRY
 			     : V4L2_SUBDEV_FORMAT_ACTIVE;
 
-		v4l2_subdev_call(subdev, pad, set_fmt, cfg, &format);
+		v4l2_subdev_call(subdev, pad, set_fmt, sd_state, &format);
 	}
 
 	return 0;
@@ -208,13 +210,13 @@ int vsp1_entity_init_cfg(struct v4l2_subdev *subdev,
  * a direct drop-in for the operation handler.
  */
 int vsp1_subdev_get_pad_format(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt)
 {
 	struct vsp1_entity *entity = to_vsp1_entity(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 
-	config = vsp1_entity_get_pad_config(entity, cfg, fmt->which);
+	config = vsp1_entity_get_pad_config(entity, sd_state, fmt->which);
 	if (!config)
 		return -EINVAL;
 
@@ -239,7 +241,7 @@ int vsp1_subdev_get_pad_format(struct v4l2_subdev *subdev,
  * the sink pad.
  */
 int vsp1_subdev_enum_mbus_code(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_mbus_code_enum *code,
 			       const unsigned int *codes, unsigned int ncodes)
 {
@@ -251,7 +253,7 @@ int vsp1_subdev_enum_mbus_code(struct v4l2_subdev *subdev,
 
 		code->code = codes[code->index];
 	} else {
-		struct v4l2_subdev_pad_config *config;
+		struct v4l2_subdev_state *config;
 		struct v4l2_mbus_framefmt *format;
 
 		/*
@@ -261,7 +263,8 @@ int vsp1_subdev_enum_mbus_code(struct v4l2_subdev *subdev,
 		if (code->index)
 			return -EINVAL;
 
-		config = vsp1_entity_get_pad_config(entity, cfg, code->which);
+		config = vsp1_entity_get_pad_config(entity, sd_state,
+						    code->which);
 		if (!config)
 			return -EINVAL;
 
@@ -290,17 +293,17 @@ int vsp1_subdev_enum_mbus_code(struct v4l2_subdev *subdev,
  * source pad size identical to the sink pad.
  */
 int vsp1_subdev_enum_frame_size(struct v4l2_subdev *subdev,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_size_enum *fse,
 				unsigned int min_width, unsigned int min_height,
 				unsigned int max_width, unsigned int max_height)
 {
 	struct vsp1_entity *entity = to_vsp1_entity(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
-	config = vsp1_entity_get_pad_config(entity, cfg, fse->which);
+	config = vsp1_entity_get_pad_config(entity, sd_state, fse->which);
 	if (!config)
 		return -EINVAL;
 
@@ -353,14 +356,14 @@ done:
  * source pad.
  */
 int vsp1_subdev_set_pad_format(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt,
 			       const unsigned int *codes, unsigned int ncodes,
 			       unsigned int min_width, unsigned int min_height,
 			       unsigned int max_width, unsigned int max_height)
 {
 	struct vsp1_entity *entity = to_vsp1_entity(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *selection;
 	unsigned int i;
@@ -368,7 +371,7 @@ int vsp1_subdev_set_pad_format(struct v4l2_subdev *subdev,
 
 	mutex_lock(&entity->lock);
 
-	config = vsp1_entity_get_pad_config(entity, cfg, fmt->which);
+	config = vsp1_entity_get_pad_config(entity, sd_state, fmt->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
@@ -672,7 +675,7 @@ int vsp1_entity_init(struct vsp1_device *vsp1, struct vsp1_entity *entity,
 	 * Allocate the pad configuration to store formats and selection
 	 * rectangles.
 	 */
-	entity->config = v4l2_subdev_alloc_pad_config(&entity->subdev);
+	entity->config = v4l2_subdev_alloc_state(&entity->subdev);
 	if (entity->config == NULL) {
 		media_entity_cleanup(&entity->subdev.entity);
 		return -ENOMEM;
@@ -687,6 +690,6 @@ void vsp1_entity_destroy(struct vsp1_entity *entity)
 		entity->ops->destroy(entity);
 	if (entity->subdev.ctrl_handler)
 		v4l2_ctrl_handler_free(entity->subdev.ctrl_handler);
-	v4l2_subdev_free_pad_config(entity->config);
+	v4l2_subdev_free_state(entity->config);
 	media_entity_cleanup(&entity->subdev.entity);
 }
diff --git a/drivers/media/platform/vsp1/vsp1_entity.h b/drivers/media/platform/vsp1/vsp1_entity.h
index a1ceb37bb837a..f22724439cdcb 100644
--- a/drivers/media/platform/vsp1/vsp1_entity.h
+++ b/drivers/media/platform/vsp1/vsp1_entity.h
@@ -115,7 +115,7 @@ struct vsp1_entity {
 	unsigned int sink_pad;
 
 	struct v4l2_subdev subdev;
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 
 	struct mutex lock;	/* Protects the pad config */
 };
@@ -136,20 +136,20 @@ int vsp1_entity_link_setup(struct media_entity *entity,
 			   const struct media_pad *local,
 			   const struct media_pad *remote, u32 flags);
 
-struct v4l2_subdev_pad_config *
+struct v4l2_subdev_state *
 vsp1_entity_get_pad_config(struct vsp1_entity *entity,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   enum v4l2_subdev_format_whence which);
 struct v4l2_mbus_framefmt *
 vsp1_entity_get_pad_format(struct vsp1_entity *entity,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   unsigned int pad);
 struct v4l2_rect *
 vsp1_entity_get_pad_selection(struct vsp1_entity *entity,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      unsigned int pad, unsigned int target);
 int vsp1_entity_init_cfg(struct v4l2_subdev *subdev,
-			 struct v4l2_subdev_pad_config *cfg);
+			 struct v4l2_subdev_state *sd_state);
 
 void vsp1_entity_route_setup(struct vsp1_entity *entity,
 			     struct vsp1_pipeline *pipe,
@@ -173,20 +173,20 @@ void vsp1_entity_configure_partition(struct vsp1_entity *entity,
 struct media_pad *vsp1_entity_remote_pad(struct media_pad *pad);
 
 int vsp1_subdev_get_pad_format(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt);
 int vsp1_subdev_set_pad_format(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt,
 			       const unsigned int *codes, unsigned int ncodes,
 			       unsigned int min_width, unsigned int min_height,
 			       unsigned int max_width, unsigned int max_height);
 int vsp1_subdev_enum_mbus_code(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_mbus_code_enum *code,
 			       const unsigned int *codes, unsigned int ncodes);
 int vsp1_subdev_enum_frame_size(struct v4l2_subdev *subdev,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_size_enum *fse,
 				unsigned int min_w, unsigned int min_h,
 				unsigned int max_w, unsigned int max_h);
diff --git a/drivers/media/platform/vsp1/vsp1_histo.c b/drivers/media/platform/vsp1/vsp1_histo.c
index a91e142bcb948..5e5013d2cd2ad 100644
--- a/drivers/media/platform/vsp1/vsp1_histo.c
+++ b/drivers/media/platform/vsp1/vsp1_histo.c
@@ -170,7 +170,7 @@ static const struct vb2_ops histo_video_queue_qops = {
  */
 
 static int histo_enum_mbus_code(struct v4l2_subdev *subdev,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct vsp1_histogram *histo = subdev_to_histo(subdev);
@@ -180,28 +180,30 @@ static int histo_enum_mbus_code(struct v4l2_subdev *subdev,
 		return 0;
 	}
 
-	return vsp1_subdev_enum_mbus_code(subdev, cfg, code, histo->formats,
+	return vsp1_subdev_enum_mbus_code(subdev, sd_state, code,
+					  histo->formats,
 					  histo->num_formats);
 }
 
 static int histo_enum_frame_size(struct v4l2_subdev *subdev,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->pad != HISTO_PAD_SINK)
 		return -EINVAL;
 
-	return vsp1_subdev_enum_frame_size(subdev, cfg, fse, HISTO_MIN_SIZE,
+	return vsp1_subdev_enum_frame_size(subdev, sd_state, fse,
+					   HISTO_MIN_SIZE,
 					   HISTO_MIN_SIZE, HISTO_MAX_SIZE,
 					   HISTO_MAX_SIZE);
 }
 
 static int histo_get_selection(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_histogram *histo = subdev_to_histo(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *crop;
 	int ret = 0;
@@ -211,7 +213,8 @@ static int histo_get_selection(struct v4l2_subdev *subdev,
 
 	mutex_lock(&histo->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&histo->entity, cfg, sel->which);
+	config = vsp1_entity_get_pad_config(&histo->entity, sd_state,
+					    sel->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
@@ -256,15 +259,15 @@ done:
 }
 
 static int histo_set_crop(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *config,
-			 struct v4l2_subdev_selection *sel)
+			  struct v4l2_subdev_state *sd_state,
+			  struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_histogram *histo = subdev_to_histo(subdev);
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *selection;
 
 	/* The crop rectangle must be inside the input frame. */
-	format = vsp1_entity_get_pad_format(&histo->entity, config,
+	format = vsp1_entity_get_pad_format(&histo->entity, sd_state,
 					    HISTO_PAD_SINK);
 	sel->r.left = clamp_t(unsigned int, sel->r.left, 0, format->width - 1);
 	sel->r.top = clamp_t(unsigned int, sel->r.top, 0, format->height - 1);
@@ -274,11 +277,11 @@ static int histo_set_crop(struct v4l2_subdev *subdev,
 				format->height - sel->r.top);
 
 	/* Set the crop rectangle and reset the compose rectangle. */
-	selection = vsp1_entity_get_pad_selection(&histo->entity, config,
+	selection = vsp1_entity_get_pad_selection(&histo->entity, sd_state,
 						  sel->pad, V4L2_SEL_TGT_CROP);
 	*selection = sel->r;
 
-	selection = vsp1_entity_get_pad_selection(&histo->entity, config,
+	selection = vsp1_entity_get_pad_selection(&histo->entity, sd_state,
 						  sel->pad,
 						  V4L2_SEL_TGT_COMPOSE);
 	*selection = sel->r;
@@ -287,7 +290,7 @@ static int histo_set_crop(struct v4l2_subdev *subdev,
 }
 
 static int histo_set_compose(struct v4l2_subdev *subdev,
-			     struct v4l2_subdev_pad_config *config,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_histogram *histo = subdev_to_histo(subdev);
@@ -303,7 +306,8 @@ static int histo_set_compose(struct v4l2_subdev *subdev,
 	sel->r.left = 0;
 	sel->r.top = 0;
 
-	crop = vsp1_entity_get_pad_selection(&histo->entity, config, sel->pad,
+	crop = vsp1_entity_get_pad_selection(&histo->entity, sd_state,
+					     sel->pad,
 					     V4L2_SEL_TGT_CROP);
 
 	/*
@@ -329,7 +333,7 @@ static int histo_set_compose(struct v4l2_subdev *subdev,
 	ratio = 1 << (crop->height * 2 / sel->r.height / 3);
 	sel->r.height = crop->height / ratio;
 
-	compose = vsp1_entity_get_pad_selection(&histo->entity, config,
+	compose = vsp1_entity_get_pad_selection(&histo->entity, sd_state,
 						sel->pad,
 						V4L2_SEL_TGT_COMPOSE);
 	*compose = sel->r;
@@ -338,11 +342,11 @@ static int histo_set_compose(struct v4l2_subdev *subdev,
 }
 
 static int histo_set_selection(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_histogram *histo = subdev_to_histo(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	int ret;
 
 	if (sel->pad != HISTO_PAD_SINK)
@@ -350,7 +354,8 @@ static int histo_set_selection(struct v4l2_subdev *subdev,
 
 	mutex_lock(&histo->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&histo->entity, cfg, sel->which);
+	config = vsp1_entity_get_pad_config(&histo->entity, sd_state,
+					    sel->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
@@ -369,7 +374,7 @@ done:
 }
 
 static int histo_get_format(struct v4l2_subdev *subdev,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	if (fmt->pad == HISTO_PAD_SOURCE) {
@@ -381,19 +386,19 @@ static int histo_get_format(struct v4l2_subdev *subdev,
 		return 0;
 	}
 
-	return vsp1_subdev_get_pad_format(subdev, cfg, fmt);
+	return vsp1_subdev_get_pad_format(subdev, sd_state, fmt);
 }
 
 static int histo_set_format(struct v4l2_subdev *subdev,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct vsp1_histogram *histo = subdev_to_histo(subdev);
 
 	if (fmt->pad != HISTO_PAD_SINK)
-		return histo_get_format(subdev, cfg, fmt);
+		return histo_get_format(subdev, sd_state, fmt);
 
-	return vsp1_subdev_set_pad_format(subdev, cfg, fmt,
+	return vsp1_subdev_set_pad_format(subdev, sd_state, fmt,
 					  histo->formats, histo->num_formats,
 					  HISTO_MIN_SIZE, HISTO_MIN_SIZE,
 					  HISTO_MAX_SIZE, HISTO_MAX_SIZE);
diff --git a/drivers/media/platform/vsp1/vsp1_hsit.c b/drivers/media/platform/vsp1/vsp1_hsit.c
index d5ebd9d08c8a4..361a870380c20 100644
--- a/drivers/media/platform/vsp1/vsp1_hsit.c
+++ b/drivers/media/platform/vsp1/vsp1_hsit.c
@@ -34,7 +34,7 @@ static inline void vsp1_hsit_write(struct vsp1_hsit *hsit,
  */
 
 static int hsit_enum_mbus_code(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct vsp1_hsit *hsit = to_hsit(subdev);
@@ -52,26 +52,28 @@ static int hsit_enum_mbus_code(struct v4l2_subdev *subdev,
 }
 
 static int hsit_enum_frame_size(struct v4l2_subdev *subdev,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_size_enum *fse)
 {
-	return vsp1_subdev_enum_frame_size(subdev, cfg, fse, HSIT_MIN_SIZE,
+	return vsp1_subdev_enum_frame_size(subdev, sd_state, fse,
+					   HSIT_MIN_SIZE,
 					   HSIT_MIN_SIZE, HSIT_MAX_SIZE,
 					   HSIT_MAX_SIZE);
 }
 
 static int hsit_set_format(struct v4l2_subdev *subdev,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct vsp1_hsit *hsit = to_hsit(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
 	mutex_lock(&hsit->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&hsit->entity, cfg, fmt->which);
+	config = vsp1_entity_get_pad_config(&hsit->entity, sd_state,
+					    fmt->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
diff --git a/drivers/media/platform/vsp1/vsp1_lif.c b/drivers/media/platform/vsp1/vsp1_lif.c
index 14ed5d7bd061e..6a6857ac93270 100644
--- a/drivers/media/platform/vsp1/vsp1_lif.c
+++ b/drivers/media/platform/vsp1/vsp1_lif.c
@@ -40,27 +40,28 @@ static const unsigned int lif_codes[] = {
 };
 
 static int lif_enum_mbus_code(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
-	return vsp1_subdev_enum_mbus_code(subdev, cfg, code, lif_codes,
+	return vsp1_subdev_enum_mbus_code(subdev, sd_state, code, lif_codes,
 					  ARRAY_SIZE(lif_codes));
 }
 
 static int lif_enum_frame_size(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
-	return vsp1_subdev_enum_frame_size(subdev, cfg, fse, LIF_MIN_SIZE,
+	return vsp1_subdev_enum_frame_size(subdev, sd_state, fse,
+					   LIF_MIN_SIZE,
 					   LIF_MIN_SIZE, LIF_MAX_SIZE,
 					   LIF_MAX_SIZE);
 }
 
 static int lif_set_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
-	return vsp1_subdev_set_pad_format(subdev, cfg, fmt, lif_codes,
+	return vsp1_subdev_set_pad_format(subdev, sd_state, fmt, lif_codes,
 					  ARRAY_SIZE(lif_codes),
 					  LIF_MIN_SIZE, LIF_MIN_SIZE,
 					  LIF_MAX_SIZE, LIF_MAX_SIZE);
diff --git a/drivers/media/platform/vsp1/vsp1_lut.c b/drivers/media/platform/vsp1/vsp1_lut.c
index 9f88842d70489..ac6802a325f5a 100644
--- a/drivers/media/platform/vsp1/vsp1_lut.c
+++ b/drivers/media/platform/vsp1/vsp1_lut.c
@@ -99,27 +99,28 @@ static const unsigned int lut_codes[] = {
 };
 
 static int lut_enum_mbus_code(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
-	return vsp1_subdev_enum_mbus_code(subdev, cfg, code, lut_codes,
+	return vsp1_subdev_enum_mbus_code(subdev, sd_state, code, lut_codes,
 					  ARRAY_SIZE(lut_codes));
 }
 
 static int lut_enum_frame_size(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
-	return vsp1_subdev_enum_frame_size(subdev, cfg, fse, LUT_MIN_SIZE,
+	return vsp1_subdev_enum_frame_size(subdev, sd_state, fse,
+					   LUT_MIN_SIZE,
 					   LUT_MIN_SIZE, LUT_MAX_SIZE,
 					   LUT_MAX_SIZE);
 }
 
 static int lut_set_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
-	return vsp1_subdev_set_pad_format(subdev, cfg, fmt, lut_codes,
+	return vsp1_subdev_set_pad_format(subdev, sd_state, fmt, lut_codes,
 					  ARRAY_SIZE(lut_codes),
 					  LUT_MIN_SIZE, LUT_MIN_SIZE,
 					  LUT_MAX_SIZE, LUT_MAX_SIZE);
diff --git a/drivers/media/platform/vsp1/vsp1_rwpf.c b/drivers/media/platform/vsp1/vsp1_rwpf.c
index 049bdd958e56b..22a82d218152f 100644
--- a/drivers/media/platform/vsp1/vsp1_rwpf.c
+++ b/drivers/media/platform/vsp1/vsp1_rwpf.c
@@ -17,9 +17,9 @@
 #define RWPF_MIN_HEIGHT				1
 
 struct v4l2_rect *vsp1_rwpf_get_crop(struct vsp1_rwpf *rwpf,
-				     struct v4l2_subdev_pad_config *config)
+				     struct v4l2_subdev_state *sd_state)
 {
-	return v4l2_subdev_get_try_crop(&rwpf->entity.subdev, config,
+	return v4l2_subdev_get_try_crop(&rwpf->entity.subdev, sd_state,
 					RWPF_PAD_SINK);
 }
 
@@ -28,7 +28,7 @@ struct v4l2_rect *vsp1_rwpf_get_crop(struct vsp1_rwpf *rwpf,
  */
 
 static int vsp1_rwpf_enum_mbus_code(struct v4l2_subdev *subdev,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_mbus_code_enum *code)
 {
 	static const unsigned int codes[] = {
@@ -46,28 +46,30 @@ static int vsp1_rwpf_enum_mbus_code(struct v4l2_subdev *subdev,
 }
 
 static int vsp1_rwpf_enum_frame_size(struct v4l2_subdev *subdev,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct vsp1_rwpf *rwpf = to_rwpf(subdev);
 
-	return vsp1_subdev_enum_frame_size(subdev, cfg, fse, RWPF_MIN_WIDTH,
+	return vsp1_subdev_enum_frame_size(subdev, sd_state, fse,
+					   RWPF_MIN_WIDTH,
 					   RWPF_MIN_HEIGHT, rwpf->max_width,
 					   rwpf->max_height);
 }
 
 static int vsp1_rwpf_set_format(struct v4l2_subdev *subdev,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_format *fmt)
 {
 	struct vsp1_rwpf *rwpf = to_rwpf(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
 	mutex_lock(&rwpf->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&rwpf->entity, cfg, fmt->which);
+	config = vsp1_entity_get_pad_config(&rwpf->entity, sd_state,
+					    fmt->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
@@ -128,11 +130,11 @@ done:
 }
 
 static int vsp1_rwpf_get_selection(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_rwpf *rwpf = to_rwpf(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
@@ -145,7 +147,8 @@ static int vsp1_rwpf_get_selection(struct v4l2_subdev *subdev,
 
 	mutex_lock(&rwpf->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&rwpf->entity, cfg, sel->which);
+	config = vsp1_entity_get_pad_config(&rwpf->entity, sd_state,
+					    sel->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
@@ -176,11 +179,11 @@ done:
 }
 
 static int vsp1_rwpf_set_selection(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_rwpf *rwpf = to_rwpf(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *crop;
 	int ret = 0;
@@ -197,7 +200,8 @@ static int vsp1_rwpf_set_selection(struct v4l2_subdev *subdev,
 
 	mutex_lock(&rwpf->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&rwpf->entity, cfg, sel->which);
+	config = vsp1_entity_get_pad_config(&rwpf->entity, sd_state,
+					    sel->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
diff --git a/drivers/media/platform/vsp1/vsp1_rwpf.h b/drivers/media/platform/vsp1/vsp1_rwpf.h
index 2f3582590618b..eac5c04c22393 100644
--- a/drivers/media/platform/vsp1/vsp1_rwpf.h
+++ b/drivers/media/platform/vsp1/vsp1_rwpf.h
@@ -84,6 +84,6 @@ int vsp1_rwpf_init_ctrls(struct vsp1_rwpf *rwpf, unsigned int ncontrols);
 extern const struct v4l2_subdev_pad_ops vsp1_rwpf_pad_ops;
 
 struct v4l2_rect *vsp1_rwpf_get_crop(struct vsp1_rwpf *rwpf,
-				     struct v4l2_subdev_pad_config *config);
+				     struct v4l2_subdev_state *sd_state);
 
 #endif /* __VSP1_RWPF_H__ */
diff --git a/drivers/media/platform/vsp1/vsp1_sru.c b/drivers/media/platform/vsp1/vsp1_sru.c
index 2b65457ee12fe..b614a2aea4611 100644
--- a/drivers/media/platform/vsp1/vsp1_sru.c
+++ b/drivers/media/platform/vsp1/vsp1_sru.c
@@ -106,7 +106,7 @@ static const struct v4l2_ctrl_config sru_intensity_control = {
  */
 
 static int sru_enum_mbus_code(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
 	static const unsigned int codes[] = {
@@ -114,20 +114,21 @@ static int sru_enum_mbus_code(struct v4l2_subdev *subdev,
 		MEDIA_BUS_FMT_AYUV8_1X32,
 	};
 
-	return vsp1_subdev_enum_mbus_code(subdev, cfg, code, codes,
+	return vsp1_subdev_enum_mbus_code(subdev, sd_state, code, codes,
 					  ARRAY_SIZE(codes));
 }
 
 static int sru_enum_frame_size(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct vsp1_sru *sru = to_sru(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
-	config = vsp1_entity_get_pad_config(&sru->entity, cfg, fse->which);
+	config = vsp1_entity_get_pad_config(&sru->entity, sd_state,
+					    fse->which);
 	if (!config)
 		return -EINVAL;
 
@@ -164,7 +165,7 @@ done:
 }
 
 static void sru_try_format(struct vsp1_sru *sru,
-			   struct v4l2_subdev_pad_config *config,
+			   struct v4l2_subdev_state *sd_state,
 			   unsigned int pad, struct v4l2_mbus_framefmt *fmt)
 {
 	struct v4l2_mbus_framefmt *format;
@@ -184,7 +185,7 @@ static void sru_try_format(struct vsp1_sru *sru,
 
 	case SRU_PAD_SOURCE:
 		/* The SRU can't perform format conversion. */
-		format = vsp1_entity_get_pad_format(&sru->entity, config,
+		format = vsp1_entity_get_pad_format(&sru->entity, sd_state,
 						    SRU_PAD_SINK);
 		fmt->code = format->code;
 
@@ -216,17 +217,18 @@ static void sru_try_format(struct vsp1_sru *sru,
 }
 
 static int sru_set_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct vsp1_sru *sru = to_sru(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
 	mutex_lock(&sru->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&sru->entity, cfg, fmt->which);
+	config = vsp1_entity_get_pad_config(&sru->entity, sd_state,
+					    fmt->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
diff --git a/drivers/media/platform/vsp1/vsp1_uds.c b/drivers/media/platform/vsp1/vsp1_uds.c
index 5fc04c082d1a9..1c290cda005a3 100644
--- a/drivers/media/platform/vsp1/vsp1_uds.c
+++ b/drivers/media/platform/vsp1/vsp1_uds.c
@@ -111,7 +111,7 @@ static unsigned int uds_compute_ratio(unsigned int input, unsigned int output)
  */
 
 static int uds_enum_mbus_code(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
 	static const unsigned int codes[] = {
@@ -119,20 +119,21 @@ static int uds_enum_mbus_code(struct v4l2_subdev *subdev,
 		MEDIA_BUS_FMT_AYUV8_1X32,
 	};
 
-	return vsp1_subdev_enum_mbus_code(subdev, cfg, code, codes,
+	return vsp1_subdev_enum_mbus_code(subdev, sd_state, code, codes,
 					  ARRAY_SIZE(codes));
 }
 
 static int uds_enum_frame_size(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct vsp1_uds *uds = to_uds(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
-	config = vsp1_entity_get_pad_config(&uds->entity, cfg, fse->which);
+	config = vsp1_entity_get_pad_config(&uds->entity, sd_state,
+					    fse->which);
 	if (!config)
 		return -EINVAL;
 
@@ -164,7 +165,7 @@ done:
 }
 
 static void uds_try_format(struct vsp1_uds *uds,
-			   struct v4l2_subdev_pad_config *config,
+			   struct v4l2_subdev_state *sd_state,
 			   unsigned int pad, struct v4l2_mbus_framefmt *fmt)
 {
 	struct v4l2_mbus_framefmt *format;
@@ -184,7 +185,7 @@ static void uds_try_format(struct vsp1_uds *uds,
 
 	case UDS_PAD_SOURCE:
 		/* The UDS scales but can't perform format conversion. */
-		format = vsp1_entity_get_pad_format(&uds->entity, config,
+		format = vsp1_entity_get_pad_format(&uds->entity, sd_state,
 						    UDS_PAD_SINK);
 		fmt->code = format->code;
 
@@ -200,17 +201,18 @@ static void uds_try_format(struct vsp1_uds *uds,
 }
 
 static int uds_set_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct vsp1_uds *uds = to_uds(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
 	mutex_lock(&uds->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&uds->entity, cfg, fmt->which);
+	config = vsp1_entity_get_pad_config(&uds->entity, sd_state,
+					    fmt->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
diff --git a/drivers/media/platform/vsp1/vsp1_uif.c b/drivers/media/platform/vsp1/vsp1_uif.c
index 467d1072577be..83d7f17df80e7 100644
--- a/drivers/media/platform/vsp1/vsp1_uif.c
+++ b/drivers/media/platform/vsp1/vsp1_uif.c
@@ -54,38 +54,39 @@ static const unsigned int uif_codes[] = {
 };
 
 static int uif_enum_mbus_code(struct v4l2_subdev *subdev,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
-	return vsp1_subdev_enum_mbus_code(subdev, cfg, code, uif_codes,
+	return vsp1_subdev_enum_mbus_code(subdev, sd_state, code, uif_codes,
 					  ARRAY_SIZE(uif_codes));
 }
 
 static int uif_enum_frame_size(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
-	return vsp1_subdev_enum_frame_size(subdev, cfg, fse, UIF_MIN_SIZE,
+	return vsp1_subdev_enum_frame_size(subdev, sd_state, fse,
+					   UIF_MIN_SIZE,
 					   UIF_MIN_SIZE, UIF_MAX_SIZE,
 					   UIF_MAX_SIZE);
 }
 
 static int uif_set_format(struct v4l2_subdev *subdev,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
-	return vsp1_subdev_set_pad_format(subdev, cfg, fmt, uif_codes,
+	return vsp1_subdev_set_pad_format(subdev, sd_state, fmt, uif_codes,
 					  ARRAY_SIZE(uif_codes),
 					  UIF_MIN_SIZE, UIF_MIN_SIZE,
 					  UIF_MAX_SIZE, UIF_MAX_SIZE);
 }
 
 static int uif_get_selection(struct v4l2_subdev *subdev,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_uif *uif = to_uif(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	int ret = 0;
 
@@ -94,7 +95,8 @@ static int uif_get_selection(struct v4l2_subdev *subdev,
 
 	mutex_lock(&uif->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&uif->entity, cfg, sel->which);
+	config = vsp1_entity_get_pad_config(&uif->entity, sd_state,
+					    sel->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
@@ -127,11 +129,11 @@ done:
 }
 
 static int uif_set_selection(struct v4l2_subdev *subdev,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct vsp1_uif *uif = to_uif(subdev);
-	struct v4l2_subdev_pad_config *config;
+	struct v4l2_subdev_state *config;
 	struct v4l2_mbus_framefmt *format;
 	struct v4l2_rect *selection;
 	int ret = 0;
@@ -142,7 +144,8 @@ static int uif_set_selection(struct v4l2_subdev *subdev,
 
 	mutex_lock(&uif->entity.lock);
 
-	config = vsp1_entity_get_pad_config(&uif->entity, cfg, sel->which);
+	config = vsp1_entity_get_pad_config(&uif->entity, sd_state,
+					    sel->which);
 	if (!config) {
 		ret = -EINVAL;
 		goto done;
diff --git a/drivers/media/platform/xilinx/xilinx-csi2rxss.c b/drivers/media/platform/xilinx/xilinx-csi2rxss.c
index fff7ddec6745f..b1baf9d7b6ecf 100644
--- a/drivers/media/platform/xilinx/xilinx-csi2rxss.c
+++ b/drivers/media/platform/xilinx/xilinx-csi2rxss.c
@@ -681,12 +681,13 @@ stream_done:
 
 static struct v4l2_mbus_framefmt *
 __xcsi2rxss_get_pad_format(struct xcsi2rxss_state *xcsi2rxss,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   unsigned int pad, u32 which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&xcsi2rxss->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&xcsi2rxss->subdev,
+						  sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &xcsi2rxss->format;
 	default:
@@ -697,7 +698,7 @@ __xcsi2rxss_get_pad_format(struct xcsi2rxss_state *xcsi2rxss,
 /**
  * xcsi2rxss_init_cfg - Initialise the pad format config to default
  * @sd: Pointer to V4L2 Sub device structure
- * @cfg: Pointer to sub device pad information structure
+ * @sd_state: Pointer to sub device state structure
  *
  * This function is used to initialize the pad format with the default
  * values.
@@ -705,7 +706,7 @@ __xcsi2rxss_get_pad_format(struct xcsi2rxss_state *xcsi2rxss,
  * Return: 0 on success
  */
 static int xcsi2rxss_init_cfg(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg)
+			      struct v4l2_subdev_state *sd_state)
 {
 	struct xcsi2rxss_state *xcsi2rxss = to_xcsi2rxssstate(sd);
 	struct v4l2_mbus_framefmt *format;
@@ -713,7 +714,7 @@ static int xcsi2rxss_init_cfg(struct v4l2_subdev *sd,
 
 	mutex_lock(&xcsi2rxss->lock);
 	for (i = 0; i < XCSI_MEDIA_PADS; i++) {
-		format = v4l2_subdev_get_try_format(sd, cfg, i);
+		format = v4l2_subdev_get_try_format(sd, sd_state, i);
 		*format = xcsi2rxss->default_format;
 	}
 	mutex_unlock(&xcsi2rxss->lock);
@@ -724,7 +725,7 @@ static int xcsi2rxss_init_cfg(struct v4l2_subdev *sd,
 /**
  * xcsi2rxss_get_format - Get the pad format
  * @sd: Pointer to V4L2 Sub device structure
- * @cfg: Pointer to sub device pad information structure
+ * @sd_state: Pointer to sub device state structure
  * @fmt: Pointer to pad level media bus format
  *
  * This function is used to get the pad format information.
@@ -732,13 +733,14 @@ static int xcsi2rxss_init_cfg(struct v4l2_subdev *sd,
  * Return: 0 on success
  */
 static int xcsi2rxss_get_format(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_format *fmt)
 {
 	struct xcsi2rxss_state *xcsi2rxss = to_xcsi2rxssstate(sd);
 
 	mutex_lock(&xcsi2rxss->lock);
-	fmt->format = *__xcsi2rxss_get_pad_format(xcsi2rxss, cfg, fmt->pad,
+	fmt->format = *__xcsi2rxss_get_pad_format(xcsi2rxss, sd_state,
+						  fmt->pad,
 						  fmt->which);
 	mutex_unlock(&xcsi2rxss->lock);
 
@@ -748,7 +750,7 @@ static int xcsi2rxss_get_format(struct v4l2_subdev *sd,
 /**
  * xcsi2rxss_set_format - This is used to set the pad format
  * @sd: Pointer to V4L2 Sub device structure
- * @cfg: Pointer to sub device pad information structure
+ * @sd_state: Pointer to sub device state structure
  * @fmt: Pointer to pad level media bus format
  *
  * This function is used to set the pad format. Since the pad format is fixed
@@ -759,7 +761,7 @@ static int xcsi2rxss_get_format(struct v4l2_subdev *sd,
  * Return: 0 on success
  */
 static int xcsi2rxss_set_format(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_format *fmt)
 {
 	struct xcsi2rxss_state *xcsi2rxss = to_xcsi2rxssstate(sd);
@@ -773,7 +775,7 @@ static int xcsi2rxss_set_format(struct v4l2_subdev *sd,
 	 * CSI format cannot be changed at runtime.
 	 * Ensure that format to set is copied to over to CSI pad format
 	 */
-	__format = __xcsi2rxss_get_pad_format(xcsi2rxss, cfg,
+	__format = __xcsi2rxss_get_pad_format(xcsi2rxss, sd_state,
 					      fmt->pad, fmt->which);
 
 	/* only sink pad format can be updated */
@@ -811,7 +813,7 @@ static int xcsi2rxss_set_format(struct v4l2_subdev *sd,
  * Return: -EINVAL or zero on success
  */
 static int xcsi2rxss_enum_mbus_code(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct xcsi2rxss_state *state = to_xcsi2rxssstate(sd);
diff --git a/drivers/media/platform/xilinx/xilinx-tpg.c b/drivers/media/platform/xilinx/xilinx-tpg.c
index ed01bedb5db69..0f2d5a0edf0c3 100644
--- a/drivers/media/platform/xilinx/xilinx-tpg.c
+++ b/drivers/media/platform/xilinx/xilinx-tpg.c
@@ -251,12 +251,13 @@ static int xtpg_s_stream(struct v4l2_subdev *subdev, int enable)
 
 static struct v4l2_mbus_framefmt *
 __xtpg_get_pad_format(struct xtpg_device *xtpg,
-		      struct v4l2_subdev_pad_config *cfg,
+		      struct v4l2_subdev_state *sd_state,
 		      unsigned int pad, u32 which)
 {
 	switch (which) {
 	case V4L2_SUBDEV_FORMAT_TRY:
-		return v4l2_subdev_get_try_format(&xtpg->xvip.subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&xtpg->xvip.subdev,
+						  sd_state, pad);
 	case V4L2_SUBDEV_FORMAT_ACTIVE:
 		return &xtpg->formats[pad];
 	default:
@@ -265,25 +266,26 @@ __xtpg_get_pad_format(struct xtpg_device *xtpg,
 }
 
 static int xtpg_get_format(struct v4l2_subdev *subdev,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct xtpg_device *xtpg = to_tpg(subdev);
 
-	fmt->format = *__xtpg_get_pad_format(xtpg, cfg, fmt->pad, fmt->which);
+	fmt->format = *__xtpg_get_pad_format(xtpg, sd_state, fmt->pad,
+					     fmt->which);
 
 	return 0;
 }
 
 static int xtpg_set_format(struct v4l2_subdev *subdev,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct xtpg_device *xtpg = to_tpg(subdev);
 	struct v4l2_mbus_framefmt *__format;
 	u32 bayer_phase;
 
-	__format = __xtpg_get_pad_format(xtpg, cfg, fmt->pad, fmt->which);
+	__format = __xtpg_get_pad_format(xtpg, sd_state, fmt->pad, fmt->which);
 
 	/* In two pads mode the source pad format is always identical to the
 	 * sink pad format.
@@ -306,7 +308,8 @@ static int xtpg_set_format(struct v4l2_subdev *subdev,
 
 	/* Propagate the format to the source pad. */
 	if (xtpg->npads == 2) {
-		__format = __xtpg_get_pad_format(xtpg, cfg, 1, fmt->which);
+		__format = __xtpg_get_pad_format(xtpg, sd_state, 1,
+						 fmt->which);
 		*__format = fmt->format;
 	}
 
@@ -318,12 +321,12 @@ static int xtpg_set_format(struct v4l2_subdev *subdev,
  */
 
 static int xtpg_enum_frame_size(struct v4l2_subdev *subdev,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct v4l2_mbus_framefmt *format;
 
-	format = v4l2_subdev_get_try_format(subdev, cfg, fse->pad);
+	format = v4l2_subdev_get_try_format(subdev, sd_state, fse->pad);
 
 	if (fse->index || fse->code != format->code)
 		return -EINVAL;
@@ -351,11 +354,11 @@ static int xtpg_open(struct v4l2_subdev *subdev, struct v4l2_subdev_fh *fh)
 	struct xtpg_device *xtpg = to_tpg(subdev);
 	struct v4l2_mbus_framefmt *format;
 
-	format = v4l2_subdev_get_try_format(subdev, fh->pad, 0);
+	format = v4l2_subdev_get_try_format(subdev, fh->state, 0);
 	*format = xtpg->default_format;
 
 	if (xtpg->npads == 2) {
-		format = v4l2_subdev_get_try_format(subdev, fh->pad, 1);
+		format = v4l2_subdev_get_try_format(subdev, fh->state, 1);
 		*format = xtpg->default_format;
 	}
 
diff --git a/drivers/media/platform/xilinx/xilinx-vip.c b/drivers/media/platform/xilinx/xilinx-vip.c
index a4eb576834110..425a32dd5d198 100644
--- a/drivers/media/platform/xilinx/xilinx-vip.c
+++ b/drivers/media/platform/xilinx/xilinx-vip.c
@@ -234,7 +234,7 @@ EXPORT_SYMBOL_GPL(xvip_cleanup_resources);
 /**
  * xvip_enum_mbus_code - Enumerate the media format code
  * @subdev: V4L2 subdevice
- * @cfg: V4L2 subdev pad configuration
+ * @sd_state: V4L2 subdev state
  * @code: returning media bus code
  *
  * Enumerate the media bus code of the subdevice. Return the corresponding
@@ -246,7 +246,7 @@ EXPORT_SYMBOL_GPL(xvip_cleanup_resources);
  * is not valid.
  */
 int xvip_enum_mbus_code(struct v4l2_subdev *subdev,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct v4l2_mbus_framefmt *format;
@@ -260,7 +260,7 @@ int xvip_enum_mbus_code(struct v4l2_subdev *subdev,
 	if (code->index)
 		return -EINVAL;
 
-	format = v4l2_subdev_get_try_format(subdev, cfg, code->pad);
+	format = v4l2_subdev_get_try_format(subdev, sd_state, code->pad);
 
 	code->code = format->code;
 
@@ -271,7 +271,7 @@ EXPORT_SYMBOL_GPL(xvip_enum_mbus_code);
 /**
  * xvip_enum_frame_size - Enumerate the media bus frame size
  * @subdev: V4L2 subdevice
- * @cfg: V4L2 subdev pad configuration
+ * @sd_state: V4L2 subdev state
  * @fse: returning media bus frame size
  *
  * This function is a drop-in implementation of the subdev enum_frame_size pad
@@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(xvip_enum_mbus_code);
  * if the index or the code is not valid.
  */
 int xvip_enum_frame_size(struct v4l2_subdev *subdev,
-			 struct v4l2_subdev_pad_config *cfg,
+			 struct v4l2_subdev_state *sd_state,
 			 struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct v4l2_mbus_framefmt *format;
@@ -295,7 +295,7 @@ int xvip_enum_frame_size(struct v4l2_subdev *subdev,
 	if (fse->which == V4L2_SUBDEV_FORMAT_ACTIVE)
 		return -EINVAL;
 
-	format = v4l2_subdev_get_try_format(subdev, cfg, fse->pad);
+	format = v4l2_subdev_get_try_format(subdev, sd_state, fse->pad);
 
 	if (fse->index || fse->code != format->code)
 		return -EINVAL;
diff --git a/drivers/media/platform/xilinx/xilinx-vip.h b/drivers/media/platform/xilinx/xilinx-vip.h
index a528a32ea1dc5..d0b0e06009529 100644
--- a/drivers/media/platform/xilinx/xilinx-vip.h
+++ b/drivers/media/platform/xilinx/xilinx-vip.h
@@ -125,10 +125,10 @@ const struct xvip_video_format *xvip_of_get_format(struct device_node *node);
 void xvip_set_format_size(struct v4l2_mbus_framefmt *format,
 			  const struct v4l2_subdev_format *fmt);
 int xvip_enum_mbus_code(struct v4l2_subdev *subdev,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_mbus_code_enum *code);
 int xvip_enum_frame_size(struct v4l2_subdev *subdev,
-			 struct v4l2_subdev_pad_config *cfg,
+			 struct v4l2_subdev_state *sd_state,
 			 struct v4l2_subdev_frame_size_enum *fse);
 
 static inline u32 xvip_read(struct xvip_device *xvip, u32 addr)
diff --git a/drivers/media/test-drivers/vimc/vimc-debayer.c b/drivers/media/test-drivers/vimc/vimc-debayer.c
index c3f6fef34f686..2d06cdbacc767 100644
--- a/drivers/media/test-drivers/vimc/vimc-debayer.c
+++ b/drivers/media/test-drivers/vimc/vimc-debayer.c
@@ -150,17 +150,17 @@ static bool vimc_deb_src_code_is_valid(u32 code)
 }
 
 static int vimc_deb_init_cfg(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg)
+			     struct v4l2_subdev_state *sd_state)
 {
 	struct vimc_deb_device *vdeb = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *mf;
 	unsigned int i;
 
-	mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+	mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 	*mf = sink_fmt_default;
 
 	for (i = 1; i < sd->entity.num_pads; i++) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, i);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, i);
 		*mf = sink_fmt_default;
 		mf->code = vdeb->src_code;
 	}
@@ -169,7 +169,7 @@ static int vimc_deb_init_cfg(struct v4l2_subdev *sd,
 }
 
 static int vimc_deb_enum_mbus_code(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (VIMC_IS_SRC(code->pad)) {
@@ -188,7 +188,7 @@ static int vimc_deb_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int vimc_deb_enum_frame_size(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (fse->index)
@@ -213,14 +213,14 @@ static int vimc_deb_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int vimc_deb_get_fmt(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct vimc_deb_device *vdeb = v4l2_get_subdevdata(sd);
 
 	/* Get the current sink format */
 	fmt->format = fmt->which == V4L2_SUBDEV_FORMAT_TRY ?
-		      *v4l2_subdev_get_try_format(sd, cfg, 0) :
+		      *v4l2_subdev_get_try_format(sd, sd_state, 0) :
 		      vdeb->sink_fmt;
 
 	/* Set the right code for the source pad */
@@ -251,7 +251,7 @@ static void vimc_deb_adjust_sink_fmt(struct v4l2_mbus_framefmt *fmt)
 }
 
 static int vimc_deb_set_fmt(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct vimc_deb_device *vdeb = v4l2_get_subdevdata(sd);
@@ -266,8 +266,8 @@ static int vimc_deb_set_fmt(struct v4l2_subdev *sd,
 		sink_fmt = &vdeb->sink_fmt;
 		src_code = &vdeb->src_code;
 	} else {
-		sink_fmt = v4l2_subdev_get_try_format(sd, cfg, 0);
-		src_code = &v4l2_subdev_get_try_format(sd, cfg, 1)->code;
+		sink_fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
+		src_code = &v4l2_subdev_get_try_format(sd, sd_state, 1)->code;
 	}
 
 	/*
diff --git a/drivers/media/test-drivers/vimc/vimc-scaler.c b/drivers/media/test-drivers/vimc/vimc-scaler.c
index 121fa7d62a2e6..06880dd0b6aca 100644
--- a/drivers/media/test-drivers/vimc/vimc-scaler.c
+++ b/drivers/media/test-drivers/vimc/vimc-scaler.c
@@ -84,20 +84,20 @@ static void vimc_sca_adjust_sink_crop(struct v4l2_rect *r,
 }
 
 static int vimc_sca_init_cfg(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg)
+			     struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_mbus_framefmt *mf;
 	struct v4l2_rect *r;
 	unsigned int i;
 
-	mf = v4l2_subdev_get_try_format(sd, cfg, 0);
+	mf = v4l2_subdev_get_try_format(sd, sd_state, 0);
 	*mf = sink_fmt_default;
 
-	r = v4l2_subdev_get_try_crop(sd, cfg, 0);
+	r = v4l2_subdev_get_try_crop(sd, sd_state, 0);
 	*r = crop_rect_default;
 
 	for (i = 1; i < sd->entity.num_pads; i++) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, i);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, i);
 		*mf = sink_fmt_default;
 		mf->width = mf->width * sca_mult;
 		mf->height = mf->height * sca_mult;
@@ -107,7 +107,7 @@ static int vimc_sca_init_cfg(struct v4l2_subdev *sd,
 }
 
 static int vimc_sca_enum_mbus_code(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_mbus_code_enum *code)
 {
 	u32 mbus_code = vimc_mbus_code_by_index(code->index);
@@ -128,7 +128,7 @@ static int vimc_sca_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int vimc_sca_enum_frame_size(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_frame_size_enum *fse)
 {
 	const struct vimc_pix_map *vpix;
@@ -156,7 +156,7 @@ static int vimc_sca_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int vimc_sca_get_fmt(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *format)
 {
 	struct vimc_sca_device *vsca = v4l2_get_subdevdata(sd);
@@ -164,8 +164,8 @@ static int vimc_sca_get_fmt(struct v4l2_subdev *sd,
 
 	/* Get the current sink format */
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		format->format = *v4l2_subdev_get_try_format(sd, cfg, 0);
-		crop_rect = v4l2_subdev_get_try_crop(sd, cfg, 0);
+		format->format = *v4l2_subdev_get_try_format(sd, sd_state, 0);
+		crop_rect = v4l2_subdev_get_try_crop(sd, sd_state, 0);
 	} else {
 		format->format = vsca->sink_fmt;
 		crop_rect = &vsca->crop_rect;
@@ -201,7 +201,7 @@ static void vimc_sca_adjust_sink_fmt(struct v4l2_mbus_framefmt *fmt)
 }
 
 static int vimc_sca_set_fmt(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct vimc_sca_device *vsca = v4l2_get_subdevdata(sd);
@@ -216,8 +216,8 @@ static int vimc_sca_set_fmt(struct v4l2_subdev *sd,
 		sink_fmt = &vsca->sink_fmt;
 		crop_rect = &vsca->crop_rect;
 	} else {
-		sink_fmt = v4l2_subdev_get_try_format(sd, cfg, 0);
-		crop_rect = v4l2_subdev_get_try_crop(sd, cfg, 0);
+		sink_fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
+		crop_rect = v4l2_subdev_get_try_crop(sd, sd_state, 0);
 	}
 
 	/*
@@ -254,7 +254,7 @@ static int vimc_sca_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int vimc_sca_get_selection(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_selection *sel)
 {
 	struct vimc_sca_device *vsca = v4l2_get_subdevdata(sd);
@@ -268,8 +268,8 @@ static int vimc_sca_get_selection(struct v4l2_subdev *sd,
 		sink_fmt = &vsca->sink_fmt;
 		crop_rect = &vsca->crop_rect;
 	} else {
-		sink_fmt = v4l2_subdev_get_try_format(sd, cfg, 0);
-		crop_rect = v4l2_subdev_get_try_crop(sd, cfg, 0);
+		sink_fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
+		crop_rect = v4l2_subdev_get_try_crop(sd, sd_state, 0);
 	}
 
 	switch (sel->target) {
@@ -287,7 +287,7 @@ static int vimc_sca_get_selection(struct v4l2_subdev *sd,
 }
 
 static int vimc_sca_set_selection(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_selection *sel)
 {
 	struct vimc_sca_device *vsca = v4l2_get_subdevdata(sd);
@@ -305,8 +305,8 @@ static int vimc_sca_set_selection(struct v4l2_subdev *sd,
 		crop_rect = &vsca->crop_rect;
 		sink_fmt = &vsca->sink_fmt;
 	} else {
-		crop_rect = v4l2_subdev_get_try_crop(sd, cfg, 0);
-		sink_fmt = v4l2_subdev_get_try_format(sd, cfg, 0);
+		crop_rect = v4l2_subdev_get_try_crop(sd, sd_state, 0);
+		sink_fmt = v4l2_subdev_get_try_format(sd, sd_state, 0);
 	}
 
 	switch (sel->target) {
diff --git a/drivers/media/test-drivers/vimc/vimc-sensor.c b/drivers/media/test-drivers/vimc/vimc-sensor.c
index ba5db5a150b4b..74ab79cadb5db 100644
--- a/drivers/media/test-drivers/vimc/vimc-sensor.c
+++ b/drivers/media/test-drivers/vimc/vimc-sensor.c
@@ -42,14 +42,14 @@ static const struct v4l2_mbus_framefmt fmt_default = {
 };
 
 static int vimc_sen_init_cfg(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg)
+			     struct v4l2_subdev_state *sd_state)
 {
 	unsigned int i;
 
 	for (i = 0; i < sd->entity.num_pads; i++) {
 		struct v4l2_mbus_framefmt *mf;
 
-		mf = v4l2_subdev_get_try_format(sd, cfg, i);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, i);
 		*mf = fmt_default;
 	}
 
@@ -57,7 +57,7 @@ static int vimc_sen_init_cfg(struct v4l2_subdev *sd,
 }
 
 static int vimc_sen_enum_mbus_code(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_mbus_code_enum *code)
 {
 	u32 mbus_code = vimc_mbus_code_by_index(code->index);
@@ -71,7 +71,7 @@ static int vimc_sen_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int vimc_sen_enum_frame_size(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_frame_size_enum *fse)
 {
 	const struct vimc_pix_map *vpix;
@@ -93,14 +93,14 @@ static int vimc_sen_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int vimc_sen_get_fmt(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct vimc_sen_device *vsen =
 				container_of(sd, struct vimc_sen_device, sd);
 
 	fmt->format = fmt->which == V4L2_SUBDEV_FORMAT_TRY ?
-		      *v4l2_subdev_get_try_format(sd, cfg, fmt->pad) :
+		      *v4l2_subdev_get_try_format(sd, sd_state, fmt->pad) :
 		      vsen->mbus_format;
 
 	return 0;
@@ -146,7 +146,7 @@ static void vimc_sen_adjust_fmt(struct v4l2_mbus_framefmt *fmt)
 }
 
 static int vimc_sen_set_fmt(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct vimc_sen_device *vsen = v4l2_get_subdevdata(sd);
@@ -159,7 +159,7 @@ static int vimc_sen_set_fmt(struct v4l2_subdev *sd,
 
 		mf = &vsen->mbus_format;
 	} else {
-		mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, fmt->pad);
 	}
 
 	/* Set the new format */
diff --git a/drivers/media/usb/go7007/s2250-board.c b/drivers/media/usb/go7007/s2250-board.c
index b9e45124673b6..c742cc88fac5c 100644
--- a/drivers/media/usb/go7007/s2250-board.c
+++ b/drivers/media/usb/go7007/s2250-board.c
@@ -398,7 +398,7 @@ static int s2250_s_ctrl(struct v4l2_ctrl *ctrl)
 }
 
 static int s2250_set_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c
index 956dafab43d49..6d3e030365198 100644
--- a/drivers/media/v4l2-core/v4l2-subdev.c
+++ b/drivers/media/v4l2-core/v4l2-subdev.c
@@ -26,19 +26,21 @@
 #if defined(CONFIG_VIDEO_V4L2_SUBDEV_API)
 static int subdev_fh_init(struct v4l2_subdev_fh *fh, struct v4l2_subdev *sd)
 {
-	if (sd->entity.num_pads) {
-		fh->pad = v4l2_subdev_alloc_pad_config(sd);
-		if (fh->pad == NULL)
-			return -ENOMEM;
-	}
+	struct v4l2_subdev_state *state;
+
+	state = v4l2_subdev_alloc_state(sd);
+	if (IS_ERR(state))
+		return PTR_ERR(state);
+
+	fh->state = state;
 
 	return 0;
 }
 
 static void subdev_fh_free(struct v4l2_subdev_fh *fh)
 {
-	v4l2_subdev_free_pad_config(fh->pad);
-	fh->pad = NULL;
+	v4l2_subdev_free_state(fh->state);
+	fh->state = NULL;
 }
 
 static int subdev_open(struct file *file)
@@ -146,63 +148,63 @@ static inline int check_pad(struct v4l2_subdev *sd, u32 pad)
 	return 0;
 }
 
-static int check_cfg(u32 which, struct v4l2_subdev_pad_config *cfg)
+static int check_state_pads(u32 which, struct v4l2_subdev_state *state)
 {
-	if (which == V4L2_SUBDEV_FORMAT_TRY && !cfg)
+	if (which == V4L2_SUBDEV_FORMAT_TRY && (!state || !state->pads))
 		return -EINVAL;
 
 	return 0;
 }
 
 static inline int check_format(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *state,
 			       struct v4l2_subdev_format *format)
 {
 	if (!format)
 		return -EINVAL;
 
 	return check_which(format->which) ? : check_pad(sd, format->pad) ? :
-	       check_cfg(format->which, cfg);
+	       check_state_pads(format->which, state);
 }
 
 static int call_get_fmt(struct v4l2_subdev *sd,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *state,
 			struct v4l2_subdev_format *format)
 {
-	return check_format(sd, cfg, format) ? :
-	       sd->ops->pad->get_fmt(sd, cfg, format);
+	return check_format(sd, state, format) ? :
+	       sd->ops->pad->get_fmt(sd, state, format);
 }
 
 static int call_set_fmt(struct v4l2_subdev *sd,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *state,
 			struct v4l2_subdev_format *format)
 {
-	return check_format(sd, cfg, format) ? :
-	       sd->ops->pad->set_fmt(sd, cfg, format);
+	return check_format(sd, state, format) ? :
+	       sd->ops->pad->set_fmt(sd, state, format);
 }
 
 static int call_enum_mbus_code(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *state,
 			       struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (!code)
 		return -EINVAL;
 
 	return check_which(code->which) ? : check_pad(sd, code->pad) ? :
-	       check_cfg(code->which, cfg) ? :
-	       sd->ops->pad->enum_mbus_code(sd, cfg, code);
+	       check_state_pads(code->which, state) ? :
+	       sd->ops->pad->enum_mbus_code(sd, state, code);
 }
 
 static int call_enum_frame_size(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *state,
 				struct v4l2_subdev_frame_size_enum *fse)
 {
 	if (!fse)
 		return -EINVAL;
 
 	return check_which(fse->which) ? : check_pad(sd, fse->pad) ? :
-	       check_cfg(fse->which, cfg) ? :
-	       sd->ops->pad->enum_frame_size(sd, cfg, fse);
+	       check_state_pads(fse->which, state) ? :
+	       sd->ops->pad->enum_frame_size(sd, state, fse);
 }
 
 static inline int check_frame_interval(struct v4l2_subdev *sd,
@@ -229,42 +231,42 @@ static int call_s_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int call_enum_frame_interval(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *state,
 				    struct v4l2_subdev_frame_interval_enum *fie)
 {
 	if (!fie)
 		return -EINVAL;
 
 	return check_which(fie->which) ? : check_pad(sd, fie->pad) ? :
-	       check_cfg(fie->which, cfg) ? :
-	       sd->ops->pad->enum_frame_interval(sd, cfg, fie);
+	       check_state_pads(fie->which, state) ? :
+	       sd->ops->pad->enum_frame_interval(sd, state, fie);
 }
 
 static inline int check_selection(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *state,
 				  struct v4l2_subdev_selection *sel)
 {
 	if (!sel)
 		return -EINVAL;
 
 	return check_which(sel->which) ? : check_pad(sd, sel->pad) ? :
-	       check_cfg(sel->which, cfg);
+	       check_state_pads(sel->which, state);
 }
 
 static int call_get_selection(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *state,
 			      struct v4l2_subdev_selection *sel)
 {
-	return check_selection(sd, cfg, sel) ? :
-	       sd->ops->pad->get_selection(sd, cfg, sel);
+	return check_selection(sd, state, sel) ? :
+	       sd->ops->pad->get_selection(sd, state, sel);
 }
 
 static int call_set_selection(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *state,
 			      struct v4l2_subdev_selection *sel)
 {
-	return check_selection(sd, cfg, sel) ? :
-	       sd->ops->pad->set_selection(sd, cfg, sel);
+	return check_selection(sd, state, sel) ? :
+	       sd->ops->pad->set_selection(sd, state, sel);
 }
 
 static inline int check_edid(struct v4l2_subdev *sd,
@@ -506,7 +508,7 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 
 		memset(format->reserved, 0, sizeof(format->reserved));
 		memset(format->format.reserved, 0, sizeof(format->format.reserved));
-		return v4l2_subdev_call(sd, pad, get_fmt, subdev_fh->pad, format);
+		return v4l2_subdev_call(sd, pad, get_fmt, subdev_fh->state, format);
 	}
 
 	case VIDIOC_SUBDEV_S_FMT: {
@@ -517,7 +519,7 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 
 		memset(format->reserved, 0, sizeof(format->reserved));
 		memset(format->format.reserved, 0, sizeof(format->format.reserved));
-		return v4l2_subdev_call(sd, pad, set_fmt, subdev_fh->pad, format);
+		return v4l2_subdev_call(sd, pad, set_fmt, subdev_fh->state, format);
 	}
 
 	case VIDIOC_SUBDEV_G_CROP: {
@@ -531,7 +533,7 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 		sel.target = V4L2_SEL_TGT_CROP;
 
 		rval = v4l2_subdev_call(
-			sd, pad, get_selection, subdev_fh->pad, &sel);
+			sd, pad, get_selection, subdev_fh->state, &sel);
 
 		crop->rect = sel.r;
 
@@ -553,7 +555,7 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 		sel.r = crop->rect;
 
 		rval = v4l2_subdev_call(
-			sd, pad, set_selection, subdev_fh->pad, &sel);
+			sd, pad, set_selection, subdev_fh->state, &sel);
 
 		crop->rect = sel.r;
 
@@ -564,7 +566,7 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 		struct v4l2_subdev_mbus_code_enum *code = arg;
 
 		memset(code->reserved, 0, sizeof(code->reserved));
-		return v4l2_subdev_call(sd, pad, enum_mbus_code, subdev_fh->pad,
+		return v4l2_subdev_call(sd, pad, enum_mbus_code, subdev_fh->state,
 					code);
 	}
 
@@ -572,7 +574,7 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 		struct v4l2_subdev_frame_size_enum *fse = arg;
 
 		memset(fse->reserved, 0, sizeof(fse->reserved));
-		return v4l2_subdev_call(sd, pad, enum_frame_size, subdev_fh->pad,
+		return v4l2_subdev_call(sd, pad, enum_frame_size, subdev_fh->state,
 					fse);
 	}
 
@@ -597,7 +599,7 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 		struct v4l2_subdev_frame_interval_enum *fie = arg;
 
 		memset(fie->reserved, 0, sizeof(fie->reserved));
-		return v4l2_subdev_call(sd, pad, enum_frame_interval, subdev_fh->pad,
+		return v4l2_subdev_call(sd, pad, enum_frame_interval, subdev_fh->state,
 					fie);
 	}
 
@@ -606,7 +608,7 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 
 		memset(sel->reserved, 0, sizeof(sel->reserved));
 		return v4l2_subdev_call(
-			sd, pad, get_selection, subdev_fh->pad, sel);
+			sd, pad, get_selection, subdev_fh->state, sel);
 	}
 
 	case VIDIOC_SUBDEV_S_SELECTION: {
@@ -617,7 +619,7 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 
 		memset(sel->reserved, 0, sizeof(sel->reserved));
 		return v4l2_subdev_call(
-			sd, pad, set_selection, subdev_fh->pad, sel);
+			sd, pad, set_selection, subdev_fh->state, sel);
 	}
 
 	case VIDIOC_G_EDID: {
@@ -892,35 +894,51 @@ int v4l2_subdev_link_validate(struct media_link *link)
 }
 EXPORT_SYMBOL_GPL(v4l2_subdev_link_validate);
 
-struct v4l2_subdev_pad_config *
-v4l2_subdev_alloc_pad_config(struct v4l2_subdev *sd)
+struct v4l2_subdev_state *v4l2_subdev_alloc_state(struct v4l2_subdev *sd)
 {
-	struct v4l2_subdev_pad_config *cfg;
+	struct v4l2_subdev_state *state;
 	int ret;
 
-	if (!sd->entity.num_pads)
-		return NULL;
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return ERR_PTR(-ENOMEM);
 
-	cfg = kvmalloc_array(sd->entity.num_pads, sizeof(*cfg),
-			     GFP_KERNEL | __GFP_ZERO);
-	if (!cfg)
-		return NULL;
-
-	ret = v4l2_subdev_call(sd, pad, init_cfg, cfg);
-	if (ret < 0 && ret != -ENOIOCTLCMD) {
-		kvfree(cfg);
-		return NULL;
+	if (sd->entity.num_pads) {
+		state->pads = kvmalloc_array(sd->entity.num_pads,
+					     sizeof(*state->pads),
+					     GFP_KERNEL | __GFP_ZERO);
+		if (!state->pads) {
+			ret = -ENOMEM;
+			goto err;
+		}
 	}
 
-	return cfg;
+	ret = v4l2_subdev_call(sd, pad, init_cfg, state);
+	if (ret < 0 && ret != -ENOIOCTLCMD)
+		goto err;
+
+	return state;
+
+err:
+	if (state && state->pads)
+		kvfree(state->pads);
+
+	kfree(state);
+
+	return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(v4l2_subdev_alloc_pad_config);
+EXPORT_SYMBOL_GPL(v4l2_subdev_alloc_state);
 
-void v4l2_subdev_free_pad_config(struct v4l2_subdev_pad_config *cfg)
+void v4l2_subdev_free_state(struct v4l2_subdev_state *state)
 {
-	kvfree(cfg);
+	if (!state)
+		return;
+
+	kvfree(state->pads);
+	kfree(state);
 }
-EXPORT_SYMBOL_GPL(v4l2_subdev_free_pad_config);
+EXPORT_SYMBOL_GPL(v4l2_subdev_free_state);
+
 #endif /* CONFIG_MEDIA_CONTROLLER */
 
 void v4l2_subdev_init(struct v4l2_subdev *sd, const struct v4l2_subdev_ops *ops)
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
index 6c5a378a2eb5c..687888d643dfb 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-gc0310.c
@@ -965,7 +965,7 @@ static int startup(struct v4l2_subdev *sd)
 }
 
 static int gc0310_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -999,7 +999,7 @@ static int gc0310_set_fmt(struct v4l2_subdev *sd,
 	fmt->code = MEDIA_BUS_FMT_SGRBG8_1X8;
 
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 		mutex_unlock(&dev->input_lock);
 		return 0;
 	}
@@ -1032,7 +1032,7 @@ err:
 }
 
 static int gc0310_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -1205,7 +1205,7 @@ static int gc0310_g_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int gc0310_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= MAX_FMTS)
@@ -1216,7 +1216,7 @@ static int gc0310_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int gc0310_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	int index = fse->index;
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
index 38defa0f81513..9363c1a52ae9b 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c
@@ -769,7 +769,7 @@ static int startup(struct v4l2_subdev *sd)
 }
 
 static int gc2235_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -798,7 +798,7 @@ static int gc2235_set_fmt(struct v4l2_subdev *sd,
 	}
 	fmt->code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 		mutex_unlock(&dev->input_lock);
 		return 0;
 	}
@@ -827,7 +827,7 @@ err:
 }
 
 static int gc2235_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -966,7 +966,7 @@ static int gc2235_g_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int gc2235_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= MAX_FMTS)
@@ -977,7 +977,7 @@ static int gc2235_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int gc2235_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	int index = fse->index;
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
index 0a6f8f68b215a..11196180a2066 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-mt9m114.c
@@ -803,7 +803,7 @@ static int mt9m114_get_intg_factor(struct i2c_client *client,
 }
 
 static int mt9m114_get_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -824,7 +824,7 @@ static int mt9m114_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int mt9m114_set_fmt(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -848,7 +848,7 @@ static int mt9m114_set_fmt(struct v4l2_subdev *sd,
 
 	mt9m114_try_res(&width, &height);
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 		return 0;
 	}
 	res_index = mt9m114_to_res(width, height);
@@ -1168,7 +1168,7 @@ static int mt9m114_s_exposure_metering(struct v4l2_subdev *sd, s32 val)
  * This function is for touch exposure feature.
  */
 static int mt9m114_s_exposure_selection(struct v4l2_subdev *sd,
-					struct v4l2_subdev_pad_config *cfg,
+					struct v4l2_subdev_state *sd_state,
 					struct v4l2_subdev_selection *sel)
 {
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
@@ -1731,7 +1731,7 @@ static int mt9m114_s_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static int mt9m114_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index)
@@ -1742,7 +1742,7 @@ static int mt9m114_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int mt9m114_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	unsigned int index = fse->index;
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
index eb1ecd198c22f..2111e4a478c1a 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2680.c
@@ -914,7 +914,7 @@ static int get_resolution_index(int w, int h)
 }
 
 static int ov2680_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -951,7 +951,7 @@ static int ov2680_set_fmt(struct v4l2_subdev *sd,
 	}
 	fmt->code = MEDIA_BUS_FMT_SBGGR10_1X10;
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 		mutex_unlock(&dev->input_lock);
 		return 0;
 	}
@@ -1002,7 +1002,7 @@ err:
 }
 
 static int ov2680_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -1161,7 +1161,7 @@ static int ov2680_g_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int ov2680_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= MAX_FMTS)
@@ -1172,7 +1172,7 @@ static int ov2680_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov2680_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	int index = fse->index;
diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
index 90a985ee25fa8..90d0871a78a3d 100644
--- a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
+++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c
@@ -876,7 +876,7 @@ static int startup(struct v4l2_subdev *sd)
 }
 
 static int ov2722_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -906,7 +906,7 @@ static int ov2722_set_fmt(struct v4l2_subdev *sd,
 	}
 	fmt->code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 		mutex_unlock(&dev->input_lock);
 		return 0;
 	}
@@ -961,7 +961,7 @@ err:
 }
 
 static int ov2722_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -1104,7 +1104,7 @@ static int ov2722_g_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int ov2722_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= MAX_FMTS)
@@ -1115,7 +1115,7 @@ static int ov2722_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov2722_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	int index = fse->index;
diff --git a/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c b/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
index e698b63d6cb7a..0828ca9ab6f22 100644
--- a/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
+++ b/drivers/staging/media/atomisp/i2c/ov5693/atomisp-ov5693.c
@@ -1577,7 +1577,7 @@ static int startup(struct v4l2_subdev *sd)
 }
 
 static int ov5693_set_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -1608,7 +1608,7 @@ static int ov5693_set_fmt(struct v4l2_subdev *sd,
 
 	fmt->code = MEDIA_BUS_FMT_SBGGR10_1X10;
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 		mutex_unlock(&dev->input_lock);
 		return 0;
 	}
@@ -1676,7 +1676,7 @@ err:
 }
 
 static int ov5693_get_fmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -1825,7 +1825,7 @@ static int ov5693_g_frame_interval(struct v4l2_subdev *sd,
 }
 
 static int ov5693_enum_mbus_code(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= MAX_FMTS)
@@ -1836,7 +1836,7 @@ static int ov5693_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ov5693_enum_frame_size(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_frame_size_enum *fse)
 {
 	int index = fse->index;
diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
index 24d8eaccb9c6a..366161cff5602 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
@@ -4842,6 +4842,9 @@ int atomisp_try_fmt(struct video_device *vdev, struct v4l2_pix_format *f,
 	struct atomisp_device *isp = video_get_drvdata(vdev);
 	struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
 	struct v4l2_subdev_pad_config pad_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	struct v4l2_subdev_format format = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
@@ -4877,7 +4880,7 @@ int atomisp_try_fmt(struct video_device *vdev, struct v4l2_pix_format *f,
 		snr_mbus_fmt->width, snr_mbus_fmt->height);
 
 	ret = v4l2_subdev_call(isp->inputs[asd->input_curr].camera,
-			       pad, set_fmt, &pad_cfg, &format);
+			       pad, set_fmt, &pad_state, &format);
 	if (ret)
 		return ret;
 
@@ -5252,11 +5255,11 @@ static int atomisp_set_fmt_to_isp(struct video_device *vdev,
 		   atomisp_output_fmts[] in atomisp_v4l2.c */
 		vf_ffmt.code = V4L2_MBUS_FMT_CUSTOM_YUV420;
 
-		atomisp_subdev_set_selection(&asd->subdev, fh.pad,
+		atomisp_subdev_set_selection(&asd->subdev, fh.state,
 					     V4L2_SUBDEV_FORMAT_ACTIVE,
 					     ATOMISP_SUBDEV_PAD_SOURCE_VF,
 					     V4L2_SEL_TGT_COMPOSE, 0, &vf_size);
-		atomisp_subdev_set_ffmt(&asd->subdev, fh.pad,
+		atomisp_subdev_set_ffmt(&asd->subdev, fh.state,
 					V4L2_SUBDEV_FORMAT_ACTIVE,
 					ATOMISP_SUBDEV_PAD_SOURCE_VF, &vf_ffmt);
 		asd->video_out_vf.sh_fmt = IA_CSS_FRAME_FORMAT_NV12;
@@ -5493,6 +5496,9 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
 	struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd;
 	const struct atomisp_format_bridge *format;
 	struct v4l2_subdev_pad_config pad_cfg;
+	struct v4l2_subdev_state pad_state = {
+		.pads = &pad_cfg
+		};
 	struct v4l2_subdev_format vformat = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
@@ -5531,7 +5537,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
 	    source_pad == ATOMISP_SUBDEV_PAD_SOURCE_VIDEO) {
 		vformat.which = V4L2_SUBDEV_FORMAT_TRY;
 		ret = v4l2_subdev_call(isp->inputs[asd->input_curr].camera,
-				       pad, set_fmt, &pad_cfg, &vformat);
+				       pad, set_fmt, &pad_state, &vformat);
 		if (ret)
 			return ret;
 		if (ffmt->width < req_ffmt->width ||
@@ -5569,7 +5575,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev,
 		asd->params.video_dis_en = false;
 	}
 
-	atomisp_subdev_set_ffmt(&asd->subdev, fh.pad,
+	atomisp_subdev_set_ffmt(&asd->subdev, fh.state,
 				V4L2_SUBDEV_FORMAT_ACTIVE,
 				ATOMISP_SUBDEV_PAD_SINK, ffmt);
 
@@ -5648,7 +5654,7 @@ int atomisp_set_fmt(struct video_device *vdev, struct v4l2_format *f)
 			}
 
 			atomisp_subdev_set_selection(
-			    &asd->subdev, fh.pad,
+			    &asd->subdev, fh.state,
 			    V4L2_SUBDEV_FORMAT_ACTIVE, source_pad,
 			    V4L2_SEL_TGT_COMPOSE, 0, &r);
 
@@ -5778,7 +5784,7 @@ int atomisp_set_fmt(struct video_device *vdev, struct v4l2_format *f)
 						ATOMISP_SUBDEV_PAD_SINK);
 
 	isp_source_fmt.code = format_bridge->mbus_code;
-	atomisp_subdev_set_ffmt(&asd->subdev, fh.pad,
+	atomisp_subdev_set_ffmt(&asd->subdev, fh.state,
 				V4L2_SUBDEV_FORMAT_ACTIVE,
 				source_pad, &isp_source_fmt);
 
@@ -5897,13 +5903,13 @@ int atomisp_set_fmt(struct video_device *vdev, struct v4l2_format *f)
 			isp_sink_crop.height = f->fmt.pix.height;
 		}
 
-		atomisp_subdev_set_selection(&asd->subdev, fh.pad,
+		atomisp_subdev_set_selection(&asd->subdev, fh.state,
 					     V4L2_SUBDEV_FORMAT_ACTIVE,
 					     ATOMISP_SUBDEV_PAD_SINK,
 					     V4L2_SEL_TGT_CROP,
 					     V4L2_SEL_FLAG_KEEP_CONFIG,
 					     &isp_sink_crop);
-		atomisp_subdev_set_selection(&asd->subdev, fh.pad,
+		atomisp_subdev_set_selection(&asd->subdev, fh.state,
 					     V4L2_SUBDEV_FORMAT_ACTIVE,
 					     source_pad, V4L2_SEL_TGT_COMPOSE,
 					     0, &isp_sink_crop);
@@ -5922,7 +5928,7 @@ int atomisp_set_fmt(struct video_device *vdev, struct v4l2_format *f)
 					 f->fmt.pix.height);
 		}
 
-		atomisp_subdev_set_selection(&asd->subdev, fh.pad,
+		atomisp_subdev_set_selection(&asd->subdev, fh.state,
 					     V4L2_SUBDEV_FORMAT_ACTIVE,
 					     source_pad,
 					     V4L2_SEL_TGT_COMPOSE, 0,
@@ -5956,14 +5962,14 @@ int atomisp_set_fmt(struct video_device *vdev, struct v4l2_format *f)
 						       f->fmt.pix.width,
 						       ATOM_ISP_STEP_HEIGHT);
 			}
-			atomisp_subdev_set_selection(&asd->subdev, fh.pad,
+			atomisp_subdev_set_selection(&asd->subdev, fh.state,
 						     V4L2_SUBDEV_FORMAT_ACTIVE,
 						     ATOMISP_SUBDEV_PAD_SINK,
 						     V4L2_SEL_TGT_CROP,
 						     V4L2_SEL_FLAG_KEEP_CONFIG,
 						     &sink_crop);
 		}
-		atomisp_subdev_set_selection(&asd->subdev, fh.pad,
+		atomisp_subdev_set_selection(&asd->subdev, fh.state,
 					     V4L2_SUBDEV_FORMAT_ACTIVE,
 					     source_pad,
 					     V4L2_SEL_TGT_COMPOSE, 0,
@@ -6054,7 +6060,8 @@ int atomisp_set_fmt_file(struct video_device *vdev, struct v4l2_format *f)
 	ffmt.height = f->fmt.pix.height;
 	ffmt.code = format_bridge->mbus_code;
 
-	atomisp_subdev_set_ffmt(&asd->subdev, fh.pad, V4L2_SUBDEV_FORMAT_ACTIVE,
+	atomisp_subdev_set_ffmt(&asd->subdev, fh.state,
+				V4L2_SUBDEV_FORMAT_ACTIVE,
 				ATOMISP_SUBDEV_PAD_SINK, &ffmt);
 
 	return 0;
diff --git a/drivers/staging/media/atomisp/pci/atomisp_csi2.c b/drivers/staging/media/atomisp/pci/atomisp_csi2.c
index 060b8765ae968..56456e59bf893 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_csi2.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_csi2.c
@@ -25,13 +25,13 @@
 static struct v4l2_mbus_framefmt *__csi2_get_format(struct
 	atomisp_mipi_csi2_device
 	* csi2,
-	struct
-	v4l2_subdev_pad_config *cfg,
+	struct v4l2_subdev_state *sd_state,
 	enum
 	v4l2_subdev_format_whence
 	which, unsigned int pad) {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&csi2->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&csi2->subdev, sd_state,
+						  pad);
 	else
 		return &csi2->formats[pad];
 }
@@ -44,7 +44,7 @@ static struct v4l2_mbus_framefmt *__csi2_get_format(struct
  * return -EINVAL or zero on success
 */
 static int csi2_enum_mbus_code(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_mbus_code_enum *code)
 {
 	const struct atomisp_in_fmt_conv *ic = atomisp_in_fmt_conv;
@@ -70,13 +70,13 @@ static int csi2_enum_mbus_code(struct v4l2_subdev *sd,
  * return -EINVAL or zero on success
 */
 static int csi2_get_format(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct atomisp_mipi_csi2_device *csi2 = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __csi2_get_format(csi2, cfg, fmt->which, fmt->pad);
+	format = __csi2_get_format(csi2, sd_state, fmt->which, fmt->pad);
 
 	fmt->format = *format;
 
@@ -84,12 +84,14 @@ static int csi2_get_format(struct v4l2_subdev *sd,
 }
 
 int atomisp_csi2_set_ffmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  unsigned int which, uint16_t pad,
 			  struct v4l2_mbus_framefmt *ffmt)
 {
 	struct atomisp_mipi_csi2_device *csi2 = v4l2_get_subdevdata(sd);
-	struct v4l2_mbus_framefmt *actual_ffmt = __csi2_get_format(csi2, cfg, which, pad);
+	struct v4l2_mbus_framefmt *actual_ffmt = __csi2_get_format(csi2,
+								   sd_state,
+								   which, pad);
 
 	if (pad == CSI2_PAD_SINK) {
 		const struct atomisp_in_fmt_conv *ic;
@@ -110,12 +112,14 @@ int atomisp_csi2_set_ffmt(struct v4l2_subdev *sd,
 
 		tmp_ffmt = *ffmt = *actual_ffmt;
 
-		return atomisp_csi2_set_ffmt(sd, cfg, which, CSI2_PAD_SOURCE,
+		return atomisp_csi2_set_ffmt(sd, sd_state, which,
+					     CSI2_PAD_SOURCE,
 					     &tmp_ffmt);
 	}
 
 	/* FIXME: DPCM decompression */
-	*actual_ffmt = *ffmt = *__csi2_get_format(csi2, cfg, which, CSI2_PAD_SINK);
+	*actual_ffmt = *ffmt = *__csi2_get_format(csi2, sd_state, which,
+						  CSI2_PAD_SINK);
 
 	return 0;
 }
@@ -129,10 +133,10 @@ int atomisp_csi2_set_ffmt(struct v4l2_subdev *sd,
  * return -EINVAL or zero on success
 */
 static int csi2_set_format(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
-	return atomisp_csi2_set_ffmt(sd, cfg, fmt->which, fmt->pad,
+	return atomisp_csi2_set_ffmt(sd, sd_state, fmt->which, fmt->pad,
 				     &fmt->format);
 }
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp_csi2.h b/drivers/staging/media/atomisp/pci/atomisp_csi2.h
index 59261e8f1a1a5..e35711be8a37b 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_csi2.h
+++ b/drivers/staging/media/atomisp/pci/atomisp_csi2.h
@@ -44,7 +44,7 @@ struct atomisp_mipi_csi2_device {
 };
 
 int atomisp_csi2_set_ffmt(struct v4l2_subdev *sd,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  unsigned int which, uint16_t pad,
 			  struct v4l2_mbus_framefmt *ffmt);
 int atomisp_mipi_csi2_init(struct atomisp_device *isp);
diff --git a/drivers/staging/media/atomisp/pci/atomisp_file.c b/drivers/staging/media/atomisp/pci/atomisp_file.c
index e568ca99c45a5..4570a9ab100b7 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_file.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_file.c
@@ -80,7 +80,7 @@ static int file_input_s_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static int file_input_get_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -104,16 +104,16 @@ static int file_input_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int file_input_set_fmt(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
 
 	if (format->pad)
 		return -EINVAL;
-	file_input_get_fmt(sd, cfg, format);
+	file_input_get_fmt(sd, sd_state, format);
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY)
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 	return 0;
 }
 
@@ -130,7 +130,7 @@ static int file_input_s_power(struct v4l2_subdev *sd, int on)
 }
 
 static int file_input_enum_mbus_code(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_mbus_code_enum *code)
 {
 	/*to fake*/
@@ -138,7 +138,7 @@ static int file_input_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int file_input_enum_frame_size(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_frame_size_enum *fse)
 {
 	/*to fake*/
@@ -146,7 +146,7 @@ static int file_input_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int file_input_enum_frame_ival(struct v4l2_subdev *sd,
-				      struct v4l2_subdev_pad_config *cfg,
+				      struct v4l2_subdev_state *sd_state,
 				      struct v4l2_subdev_frame_interval_enum
 				      *fie)
 {
diff --git a/drivers/staging/media/atomisp/pci/atomisp_fops.c b/drivers/staging/media/atomisp/pci/atomisp_fops.c
index 26d05474a035c..022efd4151c03 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_fops.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_fops.c
@@ -963,7 +963,7 @@ static int atomisp_release(struct file *file)
 	if (!isp->sw_contex.file_input && asd->fmt_auto->val) {
 		struct v4l2_mbus_framefmt isp_sink_fmt = { 0 };
 
-		atomisp_subdev_set_ffmt(&asd->subdev, fh.pad,
+		atomisp_subdev_set_ffmt(&asd->subdev, fh.state,
 					V4L2_SUBDEV_FORMAT_ACTIVE,
 					ATOMISP_SUBDEV_PAD_SINK, &isp_sink_fmt);
 	}
@@ -975,7 +975,7 @@ subdev_uninit:
 	if (isp->sw_contex.file_input && asd->fmt_auto->val) {
 		struct v4l2_mbus_framefmt isp_sink_fmt = { 0 };
 
-		atomisp_subdev_set_ffmt(&asd->subdev, fh.pad,
+		atomisp_subdev_set_ffmt(&asd->subdev, fh.state,
 					V4L2_SUBDEV_FORMAT_ACTIVE,
 					ATOMISP_SUBDEV_PAD_SINK, &isp_sink_fmt);
 	}
@@ -1016,7 +1016,7 @@ subdev_uninit:
 
 done:
 	if (!acc_node) {
-		atomisp_subdev_set_selection(&asd->subdev, fh.pad,
+		atomisp_subdev_set_selection(&asd->subdev, fh.state,
 					     V4L2_SUBDEV_FORMAT_ACTIVE,
 					     atomisp_subdev_source_pad(vdev),
 					     V4L2_SEL_TGT_COMPOSE, 0,
diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
index aeabd07bf518d..12f22ad007c73 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c
@@ -213,7 +213,7 @@ static int isp_subdev_unsubscribe_event(struct v4l2_subdev *sd,
  * return -EINVAL or zero on success
  */
 static int isp_subdev_enum_mbus_code(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->index >= ARRAY_SIZE(atomisp_in_fmt_conv) - 1)
@@ -246,7 +246,7 @@ static int isp_subdev_validate_rect(struct v4l2_subdev *sd, uint32_t pad,
 }
 
 struct v4l2_rect *atomisp_subdev_get_rect(struct v4l2_subdev *sd,
-	struct v4l2_subdev_pad_config *cfg,
+	struct v4l2_subdev_state *sd_state,
 	u32 which, uint32_t pad,
 	uint32_t target)
 {
@@ -255,9 +255,9 @@ struct v4l2_rect *atomisp_subdev_get_rect(struct v4l2_subdev *sd,
 	if (which == V4L2_SUBDEV_FORMAT_TRY) {
 		switch (target) {
 		case V4L2_SEL_TGT_CROP:
-			return v4l2_subdev_get_try_crop(sd, cfg, pad);
+			return v4l2_subdev_get_try_crop(sd, sd_state, pad);
 		case V4L2_SEL_TGT_COMPOSE:
-			return v4l2_subdev_get_try_compose(sd, cfg, pad);
+			return v4l2_subdev_get_try_compose(sd, sd_state, pad);
 		}
 	}
 
@@ -273,19 +273,20 @@ struct v4l2_rect *atomisp_subdev_get_rect(struct v4l2_subdev *sd,
 
 struct v4l2_mbus_framefmt
 *atomisp_subdev_get_ffmt(struct v4l2_subdev *sd,
-			 struct v4l2_subdev_pad_config *cfg, uint32_t which,
+			 struct v4l2_subdev_state *sd_state, uint32_t which,
 			 uint32_t pad)
 {
 	struct atomisp_sub_device *isp_sd = v4l2_get_subdevdata(sd);
 
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(sd, cfg, pad);
+		return v4l2_subdev_get_try_format(sd, sd_state, pad);
 
 	return &isp_sd->fmt[pad].fmt;
 }
 
 static void isp_get_fmt_rect(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg, uint32_t which,
+			     struct v4l2_subdev_state *sd_state,
+			     uint32_t which,
 			     struct v4l2_mbus_framefmt **ffmt,
 			     struct v4l2_rect *crop[ATOMISP_SUBDEV_PADS_NUM],
 			     struct v4l2_rect *comp[ATOMISP_SUBDEV_PADS_NUM])
@@ -293,16 +294,16 @@ static void isp_get_fmt_rect(struct v4l2_subdev *sd,
 	unsigned int i;
 
 	for (i = 0; i < ATOMISP_SUBDEV_PADS_NUM; i++) {
-		ffmt[i] = atomisp_subdev_get_ffmt(sd, cfg, which, i);
-		crop[i] = atomisp_subdev_get_rect(sd, cfg, which, i,
+		ffmt[i] = atomisp_subdev_get_ffmt(sd, sd_state, which, i);
+		crop[i] = atomisp_subdev_get_rect(sd, sd_state, which, i,
 						  V4L2_SEL_TGT_CROP);
-		comp[i] = atomisp_subdev_get_rect(sd, cfg, which, i,
+		comp[i] = atomisp_subdev_get_rect(sd, sd_state, which, i,
 						  V4L2_SEL_TGT_COMPOSE);
 	}
 }
 
 static void isp_subdev_propagate(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 u32 which, uint32_t pad, uint32_t target,
 				 uint32_t flags)
 {
@@ -313,7 +314,7 @@ static void isp_subdev_propagate(struct v4l2_subdev *sd,
 	if (flags & V4L2_SEL_FLAG_KEEP_CONFIG)
 		return;
 
-	isp_get_fmt_rect(sd, cfg, which, ffmt, crop, comp);
+	isp_get_fmt_rect(sd, sd_state, which, ffmt, crop, comp);
 
 	switch (pad) {
 	case ATOMISP_SUBDEV_PAD_SINK: {
@@ -323,7 +324,7 @@ static void isp_subdev_propagate(struct v4l2_subdev *sd,
 		r.width = ffmt[pad]->width;
 		r.height = ffmt[pad]->height;
 
-		atomisp_subdev_set_selection(sd, cfg, which, pad,
+		atomisp_subdev_set_selection(sd, sd_state, which, pad,
 					     target, flags, &r);
 		break;
 	}
@@ -331,7 +332,7 @@ static void isp_subdev_propagate(struct v4l2_subdev *sd,
 }
 
 static int isp_subdev_get_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_selection *sel)
 {
 	struct v4l2_rect *rec;
@@ -340,7 +341,7 @@ static int isp_subdev_get_selection(struct v4l2_subdev *sd,
 	if (rval)
 		return rval;
 
-	rec = atomisp_subdev_get_rect(sd, cfg, sel->which, sel->pad,
+	rec = atomisp_subdev_get_rect(sd, sd_state, sel->which, sel->pad,
 				      sel->target);
 	if (!rec)
 		return -EINVAL;
@@ -365,7 +366,7 @@ static const char *atomisp_pad_str(unsigned int pad)
 }
 
 int atomisp_subdev_set_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 u32 which, uint32_t pad, uint32_t target,
 				 u32 flags, struct v4l2_rect *r)
 {
@@ -382,7 +383,7 @@ int atomisp_subdev_set_selection(struct v4l2_subdev *sd,
 
 	stream_id = atomisp_source_pad_to_stream_id(isp_sd, vdev_pad);
 
-	isp_get_fmt_rect(sd, cfg, which, ffmt, crop, comp);
+	isp_get_fmt_rect(sd, sd_state, which, ffmt, crop, comp);
 
 	dev_dbg(isp->dev,
 		"sel: pad %s tgt %s l %d t %d w %d h %d which %s f 0x%8.8x\n",
@@ -450,7 +451,8 @@ int atomisp_subdev_set_selection(struct v4l2_subdev *sd,
 				struct v4l2_rect tmp = *crop[pad];
 
 				atomisp_subdev_set_selection(
-				    sd, cfg, which, i, V4L2_SEL_TGT_COMPOSE,
+				    sd, sd_state, which, i,
+				    V4L2_SEL_TGT_COMPOSE,
 				    flags, &tmp);
 			}
 		}
@@ -551,9 +553,9 @@ int atomisp_subdev_set_selection(struct v4l2_subdev *sd,
 		ffmt[pad]->height = comp[pad]->height;
 	}
 
-	if (!atomisp_subdev_get_rect(sd, cfg, which, pad, target))
+	if (!atomisp_subdev_get_rect(sd, sd_state, which, pad, target))
 		return -EINVAL;
-	*r = *atomisp_subdev_get_rect(sd, cfg, which, pad, target);
+	*r = *atomisp_subdev_get_rect(sd, sd_state, which, pad, target);
 
 	dev_dbg(isp->dev, "sel actual: l %d t %d w %d h %d\n",
 		r->left, r->top, r->width, r->height);
@@ -562,7 +564,7 @@ int atomisp_subdev_set_selection(struct v4l2_subdev *sd,
 }
 
 static int isp_subdev_set_selection(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_selection *sel)
 {
 	int rval = isp_subdev_validate_rect(sd, sel->pad, sel->target);
@@ -570,7 +572,8 @@ static int isp_subdev_set_selection(struct v4l2_subdev *sd,
 	if (rval)
 		return rval;
 
-	return atomisp_subdev_set_selection(sd, cfg, sel->which, sel->pad,
+	return atomisp_subdev_set_selection(sd, sd_state, sel->which,
+					    sel->pad,
 					    sel->target, sel->flags, &sel->r);
 }
 
@@ -609,13 +612,14 @@ static int atomisp_get_sensor_bin_factor(struct atomisp_sub_device *asd)
 }
 
 void atomisp_subdev_set_ffmt(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg, uint32_t which,
+			     struct v4l2_subdev_state *sd_state,
+			     uint32_t which,
 			     u32 pad, struct v4l2_mbus_framefmt *ffmt)
 {
 	struct atomisp_sub_device *isp_sd = v4l2_get_subdevdata(sd);
 	struct atomisp_device *isp = isp_sd->isp;
 	struct v4l2_mbus_framefmt *__ffmt =
-	    atomisp_subdev_get_ffmt(sd, cfg, which, pad);
+	    atomisp_subdev_get_ffmt(sd, sd_state, which, pad);
 	u16 vdev_pad = atomisp_subdev_source_pad(sd->devnode);
 	enum atomisp_input_stream_id stream_id;
 
@@ -640,7 +644,7 @@ void atomisp_subdev_set_ffmt(struct v4l2_subdev *sd,
 
 		*__ffmt = *ffmt;
 
-		isp_subdev_propagate(sd, cfg, which, pad,
+		isp_subdev_propagate(sd, sd_state, which, pad,
 				     V4L2_SEL_TGT_CROP, 0);
 
 		if (which == V4L2_SUBDEV_FORMAT_ACTIVE) {
@@ -679,10 +683,11 @@ void atomisp_subdev_set_ffmt(struct v4l2_subdev *sd,
  * to the format type.
  */
 static int isp_subdev_get_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
-	fmt->format = *atomisp_subdev_get_ffmt(sd, cfg, fmt->which, fmt->pad);
+	fmt->format = *atomisp_subdev_get_ffmt(sd, sd_state, fmt->which,
+					       fmt->pad);
 
 	return 0;
 }
@@ -698,10 +703,11 @@ static int isp_subdev_get_format(struct v4l2_subdev *sd,
  * to the format type.
  */
 static int isp_subdev_set_format(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_format *fmt)
 {
-	atomisp_subdev_set_ffmt(sd, cfg, fmt->which, fmt->pad, &fmt->format);
+	atomisp_subdev_set_ffmt(sd, sd_state, fmt->which, fmt->pad,
+				&fmt->format);
 
 	return 0;
 }
diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.h b/drivers/staging/media/atomisp/pci/atomisp_subdev.h
index 330a77eed8aa6..d6fcfab6352d7 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_subdev.h
+++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.h
@@ -437,19 +437,20 @@ uint16_t atomisp_subdev_source_pad(struct video_device *vdev);
 /* Get pointer to appropriate format */
 struct v4l2_mbus_framefmt
 *atomisp_subdev_get_ffmt(struct v4l2_subdev *sd,
-			 struct v4l2_subdev_pad_config *cfg, uint32_t which,
+			 struct v4l2_subdev_state *sd_state, uint32_t which,
 			 uint32_t pad);
 struct v4l2_rect *atomisp_subdev_get_rect(struct v4l2_subdev *sd,
-	struct v4l2_subdev_pad_config *cfg,
+	struct v4l2_subdev_state *sd_state,
 	u32 which, uint32_t pad,
 	uint32_t target);
 int atomisp_subdev_set_selection(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 u32 which, uint32_t pad, uint32_t target,
 				 u32 flags, struct v4l2_rect *r);
 /* Actually set the format */
 void atomisp_subdev_set_ffmt(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg, uint32_t which,
+			     struct v4l2_subdev_state *sd_state,
+			     uint32_t which,
 			     u32 pad, struct v4l2_mbus_framefmt *ffmt);
 
 int atomisp_update_run_mode(struct atomisp_sub_device *asd);
diff --git a/drivers/staging/media/atomisp/pci/atomisp_tpg.c b/drivers/staging/media/atomisp/pci/atomisp_tpg.c
index 1def80bab1808..e29a96da5f98e 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_tpg.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_tpg.c
@@ -29,7 +29,7 @@ static int tpg_s_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static int tpg_get_fmt(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *format)
 {
 	/*to fake*/
@@ -37,7 +37,7 @@ static int tpg_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int tpg_set_fmt(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *format)
 {
 	struct v4l2_mbus_framefmt *fmt = &format->format;
@@ -47,7 +47,7 @@ static int tpg_set_fmt(struct v4l2_subdev *sd,
 	/* only raw8 grbg is supported by TPG */
 	fmt->code = MEDIA_BUS_FMT_SGRBG8_1X8;
 	if (format->which == V4L2_SUBDEV_FORMAT_TRY) {
-		cfg->try_fmt = *fmt;
+		sd_state->pads->try_fmt = *fmt;
 		return 0;
 	}
 	return 0;
@@ -65,7 +65,7 @@ static int tpg_s_power(struct v4l2_subdev *sd, int on)
 }
 
 static int tpg_enum_mbus_code(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
 	/*to fake*/
@@ -73,7 +73,7 @@ static int tpg_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int tpg_enum_frame_size(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
 	/*to fake*/
@@ -81,7 +81,7 @@ static int tpg_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int tpg_enum_frame_ival(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_interval_enum *fie)
 {
 	/*to fake*/
diff --git a/drivers/staging/media/imx/imx-ic-prp.c b/drivers/staging/media/imx/imx-ic-prp.c
index f21ed881295f5..ac5fb332088ea 100644
--- a/drivers/staging/media/imx/imx-ic-prp.c
+++ b/drivers/staging/media/imx/imx-ic-prp.c
@@ -79,13 +79,13 @@ static void prp_stop(struct prp_priv *priv)
 }
 
 static struct v4l2_mbus_framefmt *
-__prp_get_fmt(struct prp_priv *priv, struct v4l2_subdev_pad_config *cfg,
+__prp_get_fmt(struct prp_priv *priv, struct v4l2_subdev_state *sd_state,
 	      unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	struct imx_ic_priv *ic_priv = priv->ic_priv;
 
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&ic_priv->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&ic_priv->sd, sd_state, pad);
 	else
 		return &priv->format_mbus;
 }
@@ -95,7 +95,7 @@ __prp_get_fmt(struct prp_priv *priv, struct v4l2_subdev_pad_config *cfg,
  */
 
 static int prp_enum_mbus_code(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct prp_priv *priv = sd_to_priv(sd);
@@ -115,7 +115,8 @@ static int prp_enum_mbus_code(struct v4l2_subdev *sd,
 			ret = -EINVAL;
 			goto out;
 		}
-		infmt = __prp_get_fmt(priv, cfg, PRP_SINK_PAD, code->which);
+		infmt = __prp_get_fmt(priv, sd_state, PRP_SINK_PAD,
+				      code->which);
 		code->code = infmt->code;
 		break;
 	default:
@@ -127,7 +128,7 @@ out:
 }
 
 static int prp_get_fmt(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *sdformat)
 {
 	struct prp_priv *priv = sd_to_priv(sd);
@@ -139,7 +140,7 @@ static int prp_get_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&priv->lock);
 
-	fmt = __prp_get_fmt(priv, cfg, sdformat->pad, sdformat->which);
+	fmt = __prp_get_fmt(priv, sd_state, sdformat->pad, sdformat->which);
 	if (!fmt) {
 		ret = -EINVAL;
 		goto out;
@@ -152,7 +153,7 @@ out:
 }
 
 static int prp_set_fmt(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *sdformat)
 {
 	struct prp_priv *priv = sd_to_priv(sd);
@@ -171,7 +172,7 @@ static int prp_set_fmt(struct v4l2_subdev *sd,
 		goto out;
 	}
 
-	infmt = __prp_get_fmt(priv, cfg, PRP_SINK_PAD, sdformat->which);
+	infmt = __prp_get_fmt(priv, sd_state, PRP_SINK_PAD, sdformat->which);
 
 	switch (sdformat->pad) {
 	case PRP_SINK_PAD:
@@ -201,7 +202,7 @@ static int prp_set_fmt(struct v4l2_subdev *sd,
 
 	imx_media_try_colorimetry(&sdformat->format, true);
 
-	fmt = __prp_get_fmt(priv, cfg, sdformat->pad, sdformat->which);
+	fmt = __prp_get_fmt(priv, sd_state, sdformat->pad, sdformat->which);
 	*fmt = sdformat->format;
 out:
 	mutex_unlock(&priv->lock);
diff --git a/drivers/staging/media/imx/imx-ic-prpencvf.c b/drivers/staging/media/imx/imx-ic-prpencvf.c
index d990553de87b3..9b81cfbcd7779 100644
--- a/drivers/staging/media/imx/imx-ic-prpencvf.c
+++ b/drivers/staging/media/imx/imx-ic-prpencvf.c
@@ -787,13 +787,13 @@ static void prp_stop(struct prp_priv *priv)
 }
 
 static struct v4l2_mbus_framefmt *
-__prp_get_fmt(struct prp_priv *priv, struct v4l2_subdev_pad_config *cfg,
+__prp_get_fmt(struct prp_priv *priv, struct v4l2_subdev_state *sd_state,
 	      unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	struct imx_ic_priv *ic_priv = priv->ic_priv;
 
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&ic_priv->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&ic_priv->sd, sd_state, pad);
 	else
 		return &priv->format_mbus[pad];
 }
@@ -841,7 +841,7 @@ static bool prp_bound_align_output(struct v4l2_mbus_framefmt *outfmt,
  */
 
 static int prp_enum_mbus_code(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad >= PRPENCVF_NUM_PADS)
@@ -852,7 +852,7 @@ static int prp_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int prp_get_fmt(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *sdformat)
 {
 	struct prp_priv *priv = sd_to_priv(sd);
@@ -864,7 +864,7 @@ static int prp_get_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&priv->lock);
 
-	fmt = __prp_get_fmt(priv, cfg, sdformat->pad, sdformat->which);
+	fmt = __prp_get_fmt(priv, sd_state, sdformat->pad, sdformat->which);
 	if (!fmt) {
 		ret = -EINVAL;
 		goto out;
@@ -877,7 +877,7 @@ out:
 }
 
 static void prp_try_fmt(struct prp_priv *priv,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_format *sdformat,
 			const struct imx_media_pixfmt **cc)
 {
@@ -894,7 +894,8 @@ static void prp_try_fmt(struct prp_priv *priv,
 		sdformat->format.code = (*cc)->codes[0];
 	}
 
-	infmt = __prp_get_fmt(priv, cfg, PRPENCVF_SINK_PAD, sdformat->which);
+	infmt = __prp_get_fmt(priv, sd_state, PRPENCVF_SINK_PAD,
+			      sdformat->which);
 
 	if (sdformat->pad == PRPENCVF_SRC_PAD) {
 		sdformat->format.field = infmt->field;
@@ -920,7 +921,7 @@ static void prp_try_fmt(struct prp_priv *priv,
 }
 
 static int prp_set_fmt(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *sdformat)
 {
 	struct prp_priv *priv = sd_to_priv(sd);
@@ -938,9 +939,9 @@ static int prp_set_fmt(struct v4l2_subdev *sd,
 		goto out;
 	}
 
-	prp_try_fmt(priv, cfg, sdformat, &cc);
+	prp_try_fmt(priv, sd_state, sdformat, &cc);
 
-	fmt = __prp_get_fmt(priv, cfg, sdformat->pad, sdformat->which);
+	fmt = __prp_get_fmt(priv, sd_state, sdformat->pad, sdformat->which);
 	*fmt = sdformat->format;
 
 	/* propagate a default format to source pad */
@@ -952,9 +953,9 @@ static int prp_set_fmt(struct v4l2_subdev *sd,
 		format.pad = PRPENCVF_SRC_PAD;
 		format.which = sdformat->which;
 		format.format = sdformat->format;
-		prp_try_fmt(priv, cfg, &format, &outcc);
+		prp_try_fmt(priv, sd_state, &format, &outcc);
 
-		outfmt = __prp_get_fmt(priv, cfg, PRPENCVF_SRC_PAD,
+		outfmt = __prp_get_fmt(priv, sd_state, PRPENCVF_SRC_PAD,
 				       sdformat->which);
 		*outfmt = format.format;
 		if (sdformat->which == V4L2_SUBDEV_FORMAT_ACTIVE)
@@ -970,7 +971,7 @@ out:
 }
 
 static int prp_enum_frame_size(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct prp_priv *priv = sd_to_priv(sd);
@@ -988,7 +989,7 @@ static int prp_enum_frame_size(struct v4l2_subdev *sd,
 	format.format.code = fse->code;
 	format.format.width = 1;
 	format.format.height = 1;
-	prp_try_fmt(priv, cfg, &format, &cc);
+	prp_try_fmt(priv, sd_state, &format, &cc);
 	fse->min_width = format.format.width;
 	fse->min_height = format.format.height;
 
@@ -1000,7 +1001,7 @@ static int prp_enum_frame_size(struct v4l2_subdev *sd,
 	format.format.code = fse->code;
 	format.format.width = -1;
 	format.format.height = -1;
-	prp_try_fmt(priv, cfg, &format, &cc);
+	prp_try_fmt(priv, sd_state, &format, &cc);
 	fse->max_width = format.format.width;
 	fse->max_height = format.format.height;
 out:
diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c
index e3bfd635a89ae..d2f1d40b2d5a2 100644
--- a/drivers/staging/media/imx/imx-media-csi.c
+++ b/drivers/staging/media/imx/imx-media-csi.c
@@ -1139,31 +1139,32 @@ static int csi_link_validate(struct v4l2_subdev *sd,
 }
 
 static struct v4l2_mbus_framefmt *
-__csi_get_fmt(struct csi_priv *priv, struct v4l2_subdev_pad_config *cfg,
+__csi_get_fmt(struct csi_priv *priv, struct v4l2_subdev_state *sd_state,
 	      unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&priv->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&priv->sd, sd_state, pad);
 	else
 		return &priv->format_mbus[pad];
 }
 
 static struct v4l2_rect *
-__csi_get_crop(struct csi_priv *priv, struct v4l2_subdev_pad_config *cfg,
+__csi_get_crop(struct csi_priv *priv, struct v4l2_subdev_state *sd_state,
 	       enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_crop(&priv->sd, cfg, CSI_SINK_PAD);
+		return v4l2_subdev_get_try_crop(&priv->sd, sd_state,
+						CSI_SINK_PAD);
 	else
 		return &priv->crop;
 }
 
 static struct v4l2_rect *
-__csi_get_compose(struct csi_priv *priv, struct v4l2_subdev_pad_config *cfg,
+__csi_get_compose(struct csi_priv *priv, struct v4l2_subdev_state *sd_state,
 		  enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_compose(&priv->sd, cfg,
+		return v4l2_subdev_get_try_compose(&priv->sd, sd_state,
 						   CSI_SINK_PAD);
 	else
 		return &priv->compose;
@@ -1171,7 +1172,7 @@ __csi_get_compose(struct csi_priv *priv, struct v4l2_subdev_pad_config *cfg,
 
 static void csi_try_crop(struct csi_priv *priv,
 			 struct v4l2_rect *crop,
-			 struct v4l2_subdev_pad_config *cfg,
+			 struct v4l2_subdev_state *sd_state,
 			 struct v4l2_mbus_framefmt *infmt,
 			 struct v4l2_fwnode_endpoint *upstream_ep)
 {
@@ -1210,7 +1211,7 @@ static void csi_try_crop(struct csi_priv *priv,
 }
 
 static int csi_enum_mbus_code(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct csi_priv *priv = v4l2_get_subdevdata(sd);
@@ -1221,7 +1222,7 @@ static int csi_enum_mbus_code(struct v4l2_subdev *sd,
 
 	mutex_lock(&priv->lock);
 
-	infmt = __csi_get_fmt(priv, cfg, CSI_SINK_PAD, code->which);
+	infmt = __csi_get_fmt(priv, sd_state, CSI_SINK_PAD, code->which);
 	incc = imx_media_find_mbus_format(infmt->code, PIXFMT_SEL_ANY);
 
 	switch (code->pad) {
@@ -1263,7 +1264,7 @@ out:
 }
 
 static int csi_enum_frame_size(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct csi_priv *priv = v4l2_get_subdevdata(sd);
@@ -1282,7 +1283,7 @@ static int csi_enum_frame_size(struct v4l2_subdev *sd,
 		fse->min_height = MIN_H;
 		fse->max_height = MAX_H;
 	} else {
-		crop = __csi_get_crop(priv, cfg, fse->which);
+		crop = __csi_get_crop(priv, sd_state, fse->which);
 
 		fse->min_width = fse->index & 1 ?
 			crop->width / 2 : crop->width;
@@ -1297,7 +1298,7 @@ static int csi_enum_frame_size(struct v4l2_subdev *sd,
 }
 
 static int csi_enum_frame_interval(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_interval_enum *fie)
 {
 	struct csi_priv *priv = v4l2_get_subdevdata(sd);
@@ -1313,7 +1314,7 @@ static int csi_enum_frame_interval(struct v4l2_subdev *sd,
 	mutex_lock(&priv->lock);
 
 	input_fi = &priv->frame_interval[CSI_SINK_PAD];
-	crop = __csi_get_crop(priv, cfg, fie->which);
+	crop = __csi_get_crop(priv, sd_state, fie->which);
 
 	if ((fie->width != crop->width && fie->width != crop->width / 2) ||
 	    (fie->height != crop->height && fie->height != crop->height / 2)) {
@@ -1333,7 +1334,7 @@ out:
 }
 
 static int csi_get_fmt(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *sdformat)
 {
 	struct csi_priv *priv = v4l2_get_subdevdata(sd);
@@ -1345,7 +1346,7 @@ static int csi_get_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&priv->lock);
 
-	fmt = __csi_get_fmt(priv, cfg, sdformat->pad, sdformat->which);
+	fmt = __csi_get_fmt(priv, sd_state, sdformat->pad, sdformat->which);
 	if (!fmt) {
 		ret = -EINVAL;
 		goto out;
@@ -1358,11 +1359,11 @@ out:
 }
 
 static void csi_try_field(struct csi_priv *priv,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *sdformat)
 {
 	struct v4l2_mbus_framefmt *infmt =
-		__csi_get_fmt(priv, cfg, CSI_SINK_PAD, sdformat->which);
+		__csi_get_fmt(priv, sd_state, CSI_SINK_PAD, sdformat->which);
 
 	/*
 	 * no restrictions on sink pad field type except must
@@ -1408,7 +1409,7 @@ static void csi_try_field(struct csi_priv *priv,
 
 static void csi_try_fmt(struct csi_priv *priv,
 			struct v4l2_fwnode_endpoint *upstream_ep,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_format *sdformat,
 			struct v4l2_rect *crop,
 			struct v4l2_rect *compose,
@@ -1418,7 +1419,7 @@ static void csi_try_fmt(struct csi_priv *priv,
 	struct v4l2_mbus_framefmt *infmt;
 	u32 code;
 
-	infmt = __csi_get_fmt(priv, cfg, CSI_SINK_PAD, sdformat->which);
+	infmt = __csi_get_fmt(priv, sd_state, CSI_SINK_PAD, sdformat->which);
 
 	switch (sdformat->pad) {
 	case CSI_SRC_PAD_DIRECT:
@@ -1445,7 +1446,7 @@ static void csi_try_fmt(struct csi_priv *priv,
 			}
 		}
 
-		csi_try_field(priv, cfg, sdformat);
+		csi_try_field(priv, sd_state, sdformat);
 
 		/* propagate colorimetry from sink */
 		sdformat->format.colorspace = infmt->colorspace;
@@ -1469,7 +1470,7 @@ static void csi_try_fmt(struct csi_priv *priv,
 			sdformat->format.code = (*cc)->codes[0];
 		}
 
-		csi_try_field(priv, cfg, sdformat);
+		csi_try_field(priv, sd_state, sdformat);
 
 		/* Reset crop and compose rectangles */
 		crop->left = 0;
@@ -1478,7 +1479,8 @@ static void csi_try_fmt(struct csi_priv *priv,
 		crop->height = sdformat->format.height;
 		if (sdformat->format.field == V4L2_FIELD_ALTERNATE)
 			crop->height *= 2;
-		csi_try_crop(priv, crop, cfg, &sdformat->format, upstream_ep);
+		csi_try_crop(priv, crop, sd_state, &sdformat->format,
+			     upstream_ep);
 		compose->left = 0;
 		compose->top = 0;
 		compose->width = crop->width;
@@ -1492,7 +1494,7 @@ static void csi_try_fmt(struct csi_priv *priv,
 }
 
 static int csi_set_fmt(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *sd_state,
 		       struct v4l2_subdev_format *sdformat)
 {
 	struct csi_priv *priv = v4l2_get_subdevdata(sd);
@@ -1518,12 +1520,13 @@ static int csi_set_fmt(struct v4l2_subdev *sd,
 		goto out;
 	}
 
-	crop = __csi_get_crop(priv, cfg, sdformat->which);
-	compose = __csi_get_compose(priv, cfg, sdformat->which);
+	crop = __csi_get_crop(priv, sd_state, sdformat->which);
+	compose = __csi_get_compose(priv, sd_state, sdformat->which);
 
-	csi_try_fmt(priv, &upstream_ep, cfg, sdformat, crop, compose, &cc);
+	csi_try_fmt(priv, &upstream_ep, sd_state, sdformat, crop, compose,
+		    &cc);
 
-	fmt = __csi_get_fmt(priv, cfg, sdformat->pad, sdformat->which);
+	fmt = __csi_get_fmt(priv, sd_state, sdformat->pad, sdformat->which);
 	*fmt = sdformat->format;
 
 	if (sdformat->pad == CSI_SINK_PAD) {
@@ -1538,10 +1541,11 @@ static int csi_set_fmt(struct v4l2_subdev *sd,
 			format.pad = pad;
 			format.which = sdformat->which;
 			format.format = sdformat->format;
-			csi_try_fmt(priv, &upstream_ep, cfg, &format,
+			csi_try_fmt(priv, &upstream_ep, sd_state, &format,
 				    NULL, compose, &outcc);
 
-			outfmt = __csi_get_fmt(priv, cfg, pad, sdformat->which);
+			outfmt = __csi_get_fmt(priv, sd_state, pad,
+					       sdformat->which);
 			*outfmt = format.format;
 
 			if (sdformat->which == V4L2_SUBDEV_FORMAT_ACTIVE)
@@ -1558,7 +1562,7 @@ out:
 }
 
 static int csi_get_selection(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct csi_priv *priv = v4l2_get_subdevdata(sd);
@@ -1571,9 +1575,9 @@ static int csi_get_selection(struct v4l2_subdev *sd,
 
 	mutex_lock(&priv->lock);
 
-	infmt = __csi_get_fmt(priv, cfg, CSI_SINK_PAD, sel->which);
-	crop = __csi_get_crop(priv, cfg, sel->which);
-	compose = __csi_get_compose(priv, cfg, sel->which);
+	infmt = __csi_get_fmt(priv, sd_state, CSI_SINK_PAD, sel->which);
+	crop = __csi_get_crop(priv, sd_state, sel->which);
+	compose = __csi_get_compose(priv, sd_state, sel->which);
 
 	switch (sel->target) {
 	case V4L2_SEL_TGT_CROP_BOUNDS:
@@ -1622,7 +1626,7 @@ static int csi_set_scale(u32 *compose, u32 crop, u32 flags)
 }
 
 static int csi_set_selection(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_selection *sel)
 {
 	struct csi_priv *priv = v4l2_get_subdevdata(sd);
@@ -1647,9 +1651,9 @@ static int csi_set_selection(struct v4l2_subdev *sd,
 		goto out;
 	}
 
-	infmt = __csi_get_fmt(priv, cfg, CSI_SINK_PAD, sel->which);
-	crop = __csi_get_crop(priv, cfg, sel->which);
-	compose = __csi_get_compose(priv, cfg, sel->which);
+	infmt = __csi_get_fmt(priv, sd_state, CSI_SINK_PAD, sel->which);
+	crop = __csi_get_crop(priv, sd_state, sel->which);
+	compose = __csi_get_compose(priv, sd_state, sel->which);
 
 	switch (sel->target) {
 	case V4L2_SEL_TGT_CROP:
@@ -1665,7 +1669,7 @@ static int csi_set_selection(struct v4l2_subdev *sd,
 			goto out;
 		}
 
-		csi_try_crop(priv, &sel->r, cfg, infmt, &upstream_ep);
+		csi_try_crop(priv, &sel->r, sd_state, infmt, &upstream_ep);
 
 		*crop = sel->r;
 
@@ -1706,7 +1710,7 @@ static int csi_set_selection(struct v4l2_subdev *sd,
 	for (pad = CSI_SINK_PAD + 1; pad < CSI_NUM_PADS; pad++) {
 		struct v4l2_mbus_framefmt *outfmt;
 
-		outfmt = __csi_get_fmt(priv, cfg, pad, sel->which);
+		outfmt = __csi_get_fmt(priv, sd_state, pad, sel->which);
 		outfmt->width = compose->width;
 		outfmt->height = compose->height;
 	}
diff --git a/drivers/staging/media/imx/imx-media-utils.c b/drivers/staging/media/imx/imx-media-utils.c
index 5128915a5d6f4..6f90acf9c725c 100644
--- a/drivers/staging/media/imx/imx-media-utils.c
+++ b/drivers/staging/media/imx/imx-media-utils.c
@@ -429,7 +429,7 @@ EXPORT_SYMBOL_GPL(imx_media_init_mbus_fmt);
  * of a subdev. Can be used as the .init_cfg pad operation.
  */
 int imx_media_init_cfg(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg)
+		       struct v4l2_subdev_state *sd_state)
 {
 	struct v4l2_mbus_framefmt *mf_try;
 	struct v4l2_subdev_format format;
@@ -445,7 +445,7 @@ int imx_media_init_cfg(struct v4l2_subdev *sd,
 		if (ret)
 			continue;
 
-		mf_try = v4l2_subdev_get_try_format(sd, cfg, pad);
+		mf_try = v4l2_subdev_get_try_format(sd, sd_state, pad);
 		*mf_try = format.format;
 	}
 
diff --git a/drivers/staging/media/imx/imx-media-vdic.c b/drivers/staging/media/imx/imx-media-vdic.c
index abf290bda98da..3c2093c520bab 100644
--- a/drivers/staging/media/imx/imx-media-vdic.c
+++ b/drivers/staging/media/imx/imx-media-vdic.c
@@ -532,17 +532,17 @@ out:
 }
 
 static struct v4l2_mbus_framefmt *
-__vdic_get_fmt(struct vdic_priv *priv, struct v4l2_subdev_pad_config *cfg,
+__vdic_get_fmt(struct vdic_priv *priv, struct v4l2_subdev_state *sd_state,
 	       unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&priv->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&priv->sd, sd_state, pad);
 	else
 		return &priv->format_mbus[pad];
 }
 
 static int vdic_enum_mbus_code(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (code->pad >= VDIC_NUM_PADS)
@@ -553,7 +553,7 @@ static int vdic_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int vdic_get_fmt(struct v4l2_subdev *sd,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_format *sdformat)
 {
 	struct vdic_priv *priv = v4l2_get_subdevdata(sd);
@@ -565,7 +565,7 @@ static int vdic_get_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&priv->lock);
 
-	fmt = __vdic_get_fmt(priv, cfg, sdformat->pad, sdformat->which);
+	fmt = __vdic_get_fmt(priv, sd_state, sdformat->pad, sdformat->which);
 	if (!fmt) {
 		ret = -EINVAL;
 		goto out;
@@ -578,7 +578,7 @@ out:
 }
 
 static void vdic_try_fmt(struct vdic_priv *priv,
-			 struct v4l2_subdev_pad_config *cfg,
+			 struct v4l2_subdev_state *sd_state,
 			 struct v4l2_subdev_format *sdformat,
 			 const struct imx_media_pixfmt **cc)
 {
@@ -594,7 +594,7 @@ static void vdic_try_fmt(struct vdic_priv *priv,
 		sdformat->format.code = (*cc)->codes[0];
 	}
 
-	infmt = __vdic_get_fmt(priv, cfg, priv->active_input_pad,
+	infmt = __vdic_get_fmt(priv, sd_state, priv->active_input_pad,
 			       sdformat->which);
 
 	switch (sdformat->pad) {
@@ -620,7 +620,7 @@ static void vdic_try_fmt(struct vdic_priv *priv,
 }
 
 static int vdic_set_fmt(struct v4l2_subdev *sd,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_format *sdformat)
 {
 	struct vdic_priv *priv = v4l2_get_subdevdata(sd);
@@ -638,9 +638,9 @@ static int vdic_set_fmt(struct v4l2_subdev *sd,
 		goto out;
 	}
 
-	vdic_try_fmt(priv, cfg, sdformat, &cc);
+	vdic_try_fmt(priv, sd_state, sdformat, &cc);
 
-	fmt = __vdic_get_fmt(priv, cfg, sdformat->pad, sdformat->which);
+	fmt = __vdic_get_fmt(priv, sd_state, sdformat->pad, sdformat->which);
 	*fmt = sdformat->format;
 
 	/* propagate format to source pad */
@@ -653,9 +653,9 @@ static int vdic_set_fmt(struct v4l2_subdev *sd,
 		format.pad = VDIC_SRC_PAD_DIRECT;
 		format.which = sdformat->which;
 		format.format = sdformat->format;
-		vdic_try_fmt(priv, cfg, &format, &outcc);
+		vdic_try_fmt(priv, sd_state, &format, &outcc);
 
-		outfmt = __vdic_get_fmt(priv, cfg, VDIC_SRC_PAD_DIRECT,
+		outfmt = __vdic_get_fmt(priv, sd_state, VDIC_SRC_PAD_DIRECT,
 					sdformat->which);
 		*outfmt = format.format;
 		if (sdformat->which == V4L2_SUBDEV_FORMAT_ACTIVE)
diff --git a/drivers/staging/media/imx/imx-media.h b/drivers/staging/media/imx/imx-media.h
index 492d9a64e7044..6740e79174589 100644
--- a/drivers/staging/media/imx/imx-media.h
+++ b/drivers/staging/media/imx/imx-media.h
@@ -193,7 +193,7 @@ int imx_media_init_mbus_fmt(struct v4l2_mbus_framefmt *mbus,
 			    u32 width, u32 height, u32 code, u32 field,
 			    const struct imx_media_pixfmt **cc);
 int imx_media_init_cfg(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg);
+		       struct v4l2_subdev_state *sd_state);
 void imx_media_try_colorimetry(struct v4l2_mbus_framefmt *tryfmt,
 			       bool ic_route);
 int imx_media_mbus_fmt_to_pix_fmt(struct v4l2_pix_format *pix,
diff --git a/drivers/staging/media/imx/imx6-mipi-csi2.c b/drivers/staging/media/imx/imx6-mipi-csi2.c
index fc2378ac04b74..9de0ebd439dc6 100644
--- a/drivers/staging/media/imx/imx6-mipi-csi2.c
+++ b/drivers/staging/media/imx/imx6-mipi-csi2.c
@@ -508,17 +508,17 @@ out:
 }
 
 static struct v4l2_mbus_framefmt *
-__csi2_get_fmt(struct csi2_dev *csi2, struct v4l2_subdev_pad_config *cfg,
+__csi2_get_fmt(struct csi2_dev *csi2, struct v4l2_subdev_state *sd_state,
 	       unsigned int pad, enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&csi2->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&csi2->sd, sd_state, pad);
 	else
 		return &csi2->format_mbus;
 }
 
 static int csi2_get_fmt(struct v4l2_subdev *sd,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_format *sdformat)
 {
 	struct csi2_dev *csi2 = sd_to_dev(sd);
@@ -526,7 +526,7 @@ static int csi2_get_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&csi2->lock);
 
-	fmt = __csi2_get_fmt(csi2, cfg, sdformat->pad, sdformat->which);
+	fmt = __csi2_get_fmt(csi2, sd_state, sdformat->pad, sdformat->which);
 
 	sdformat->format = *fmt;
 
@@ -536,7 +536,7 @@ static int csi2_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int csi2_set_fmt(struct v4l2_subdev *sd,
-			struct v4l2_subdev_pad_config *cfg,
+			struct v4l2_subdev_state *sd_state,
 			struct v4l2_subdev_format *sdformat)
 {
 	struct csi2_dev *csi2 = sd_to_dev(sd);
@@ -557,7 +557,7 @@ static int csi2_set_fmt(struct v4l2_subdev *sd,
 	if (sdformat->pad != CSI2_SINK_PAD)
 		sdformat->format = csi2->format_mbus;
 
-	fmt = __csi2_get_fmt(csi2, cfg, sdformat->pad, sdformat->which);
+	fmt = __csi2_get_fmt(csi2, sd_state, sdformat->pad, sdformat->which);
 
 	*fmt = sdformat->format;
 out:
diff --git a/drivers/staging/media/imx/imx7-media-csi.c b/drivers/staging/media/imx/imx7-media-csi.c
index f85a2f5f1413b..894c4de31790e 100644
--- a/drivers/staging/media/imx/imx7-media-csi.c
+++ b/drivers/staging/media/imx/imx7-media-csi.c
@@ -724,7 +724,7 @@ out_unlock:
 }
 
 static int imx7_csi_init_cfg(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg)
+			     struct v4l2_subdev_state *sd_state)
 {
 	struct imx7_csi *csi = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *mf;
@@ -732,7 +732,7 @@ static int imx7_csi_init_cfg(struct v4l2_subdev *sd,
 	int i;
 
 	for (i = 0; i < IMX7_CSI_PADS_NUM; i++) {
-		mf = v4l2_subdev_get_try_format(sd, cfg, i);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, i);
 
 		ret = imx_media_init_mbus_fmt(mf, 800, 600, 0, V4L2_FIELD_NONE,
 					      &csi->cc[i]);
@@ -745,18 +745,18 @@ static int imx7_csi_init_cfg(struct v4l2_subdev *sd,
 
 static struct v4l2_mbus_framefmt *
 imx7_csi_get_format(struct imx7_csi *csi,
-		    struct v4l2_subdev_pad_config *cfg,
+		    struct v4l2_subdev_state *sd_state,
 		    unsigned int pad,
 		    enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&csi->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&csi->sd, sd_state, pad);
 
 	return &csi->format_mbus[pad];
 }
 
 static int imx7_csi_enum_mbus_code(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct imx7_csi *csi = v4l2_get_subdevdata(sd);
@@ -765,7 +765,8 @@ static int imx7_csi_enum_mbus_code(struct v4l2_subdev *sd,
 
 	mutex_lock(&csi->lock);
 
-	in_fmt = imx7_csi_get_format(csi, cfg, IMX7_CSI_PAD_SINK, code->which);
+	in_fmt = imx7_csi_get_format(csi, sd_state, IMX7_CSI_PAD_SINK,
+				     code->which);
 
 	switch (code->pad) {
 	case IMX7_CSI_PAD_SINK:
@@ -791,7 +792,7 @@ out_unlock:
 }
 
 static int imx7_csi_get_fmt(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *sdformat)
 {
 	struct imx7_csi *csi = v4l2_get_subdevdata(sd);
@@ -800,7 +801,8 @@ static int imx7_csi_get_fmt(struct v4l2_subdev *sd,
 
 	mutex_lock(&csi->lock);
 
-	fmt = imx7_csi_get_format(csi, cfg, sdformat->pad, sdformat->which);
+	fmt = imx7_csi_get_format(csi, sd_state, sdformat->pad,
+				  sdformat->which);
 	if (!fmt) {
 		ret = -EINVAL;
 		goto out_unlock;
@@ -815,7 +817,7 @@ out_unlock:
 }
 
 static int imx7_csi_try_fmt(struct imx7_csi *csi,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *sdformat,
 			    const struct imx_media_pixfmt **cc)
 {
@@ -823,7 +825,7 @@ static int imx7_csi_try_fmt(struct imx7_csi *csi,
 	struct v4l2_mbus_framefmt *in_fmt;
 	u32 code;
 
-	in_fmt = imx7_csi_get_format(csi, cfg, IMX7_CSI_PAD_SINK,
+	in_fmt = imx7_csi_get_format(csi, sd_state, IMX7_CSI_PAD_SINK,
 				     sdformat->which);
 	if (!in_fmt)
 		return -EINVAL;
@@ -868,7 +870,7 @@ static int imx7_csi_try_fmt(struct imx7_csi *csi,
 }
 
 static int imx7_csi_set_fmt(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *sdformat)
 {
 	struct imx7_csi *csi = v4l2_get_subdevdata(sd);
@@ -889,11 +891,12 @@ static int imx7_csi_set_fmt(struct v4l2_subdev *sd,
 		goto out_unlock;
 	}
 
-	ret = imx7_csi_try_fmt(csi, cfg, sdformat, &cc);
+	ret = imx7_csi_try_fmt(csi, sd_state, sdformat, &cc);
 	if (ret < 0)
 		goto out_unlock;
 
-	fmt = imx7_csi_get_format(csi, cfg, sdformat->pad, sdformat->which);
+	fmt = imx7_csi_get_format(csi, sd_state, sdformat->pad,
+				  sdformat->which);
 	if (!fmt) {
 		ret = -EINVAL;
 		goto out_unlock;
@@ -906,11 +909,11 @@ static int imx7_csi_set_fmt(struct v4l2_subdev *sd,
 		format.pad = IMX7_CSI_PAD_SRC;
 		format.which = sdformat->which;
 		format.format = sdformat->format;
-		if (imx7_csi_try_fmt(csi, cfg, &format, &outcc)) {
+		if (imx7_csi_try_fmt(csi, sd_state, &format, &outcc)) {
 			ret = -EINVAL;
 			goto out_unlock;
 		}
-		outfmt = imx7_csi_get_format(csi, cfg, IMX7_CSI_PAD_SRC,
+		outfmt = imx7_csi_get_format(csi, sd_state, IMX7_CSI_PAD_SRC,
 					     sdformat->which);
 		*outfmt = format.format;
 
diff --git a/drivers/staging/media/imx/imx7-mipi-csis.c b/drivers/staging/media/imx/imx7-mipi-csis.c
index 9cd3c86fee583..ead696eb46103 100644
--- a/drivers/staging/media/imx/imx7-mipi-csis.c
+++ b/drivers/staging/media/imx/imx7-mipi-csis.c
@@ -880,26 +880,26 @@ done:
 
 static struct v4l2_mbus_framefmt *
 mipi_csis_get_format(struct csi_state *state,
-		     struct v4l2_subdev_pad_config *cfg,
+		     struct v4l2_subdev_state *sd_state,
 		     enum v4l2_subdev_format_whence which,
 		     unsigned int pad)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&state->sd, cfg, pad);
+		return v4l2_subdev_get_try_format(&state->sd, sd_state, pad);
 
 	return &state->format_mbus;
 }
 
 static int mipi_csis_init_cfg(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg)
+			      struct v4l2_subdev_state *sd_state)
 {
 	struct csi_state *state = mipi_sd_to_csis_state(sd);
 	struct v4l2_mbus_framefmt *fmt_sink;
 	struct v4l2_mbus_framefmt *fmt_source;
 	enum v4l2_subdev_format_whence which;
 
-	which = cfg ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
-	fmt_sink = mipi_csis_get_format(state, cfg, which, CSIS_PAD_SINK);
+	which = sd_state ? V4L2_SUBDEV_FORMAT_TRY : V4L2_SUBDEV_FORMAT_ACTIVE;
+	fmt_sink = mipi_csis_get_format(state, sd_state, which, CSIS_PAD_SINK);
 
 	fmt_sink->code = MEDIA_BUS_FMT_UYVY8_1X16;
 	fmt_sink->width = MIPI_CSIS_DEF_PIX_WIDTH;
@@ -918,23 +918,25 @@ static int mipi_csis_init_cfg(struct v4l2_subdev *sd,
 	 * configuration, cfg is NULL, which indicates there's no source pad
 	 * configuration to set.
 	 */
-	if (!cfg)
+	if (!sd_state)
 		return 0;
 
-	fmt_source = mipi_csis_get_format(state, cfg, which, CSIS_PAD_SOURCE);
+	fmt_source = mipi_csis_get_format(state, sd_state, which,
+					  CSIS_PAD_SOURCE);
 	*fmt_source = *fmt_sink;
 
 	return 0;
 }
 
 static int mipi_csis_get_fmt(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *sdformat)
 {
 	struct csi_state *state = mipi_sd_to_csis_state(sd);
 	struct v4l2_mbus_framefmt *fmt;
 
-	fmt = mipi_csis_get_format(state, cfg, sdformat->which, sdformat->pad);
+	fmt = mipi_csis_get_format(state, sd_state, sdformat->which,
+				   sdformat->pad);
 
 	mutex_lock(&state->lock);
 	sdformat->format = *fmt;
@@ -944,7 +946,7 @@ static int mipi_csis_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int mipi_csis_enum_mbus_code(struct v4l2_subdev *sd,
-				    struct v4l2_subdev_pad_config *cfg,
+				    struct v4l2_subdev_state *sd_state,
 				    struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct csi_state *state = mipi_sd_to_csis_state(sd);
@@ -959,7 +961,8 @@ static int mipi_csis_enum_mbus_code(struct v4l2_subdev *sd,
 		if (code->index > 0)
 			return -EINVAL;
 
-		fmt = mipi_csis_get_format(state, cfg, code->which, code->pad);
+		fmt = mipi_csis_get_format(state, sd_state, code->which,
+					   code->pad);
 		code->code = fmt->code;
 		return 0;
 	}
@@ -976,7 +979,7 @@ static int mipi_csis_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int mipi_csis_set_fmt(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_format *sdformat)
 {
 	struct csi_state *state = mipi_sd_to_csis_state(sd);
@@ -989,7 +992,7 @@ static int mipi_csis_set_fmt(struct v4l2_subdev *sd,
 	 * modified.
 	 */
 	if (sdformat->pad == CSIS_PAD_SOURCE)
-		return mipi_csis_get_fmt(sd, cfg, sdformat);
+		return mipi_csis_get_fmt(sd, sd_state, sdformat);
 
 	if (sdformat->pad != CSIS_PAD_SINK)
 		return -EINVAL;
@@ -1029,7 +1032,8 @@ static int mipi_csis_set_fmt(struct v4l2_subdev *sd,
 			      &sdformat->format.height, 1,
 			      CSIS_MAX_PIX_HEIGHT, 0, 0);
 
-	fmt = mipi_csis_get_format(state, cfg, sdformat->which, sdformat->pad);
+	fmt = mipi_csis_get_format(state, sd_state, sdformat->which,
+				   sdformat->pad);
 
 	mutex_lock(&state->lock);
 
@@ -1040,7 +1044,7 @@ static int mipi_csis_set_fmt(struct v4l2_subdev *sd,
 	sdformat->format = *fmt;
 
 	/* Propagate the format from sink to source. */
-	fmt = mipi_csis_get_format(state, cfg, sdformat->which,
+	fmt = mipi_csis_get_format(state, sd_state, sdformat->which,
 				   CSIS_PAD_SOURCE);
 	*fmt = sdformat->format;
 
diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c
index 6d9c49b395314..38a2407645096 100644
--- a/drivers/staging/media/ipu3/ipu3-v4l2.c
+++ b/drivers/staging/media/ipu3/ipu3-v4l2.c
@@ -36,7 +36,7 @@ static int imgu_subdev_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	/* Initialize try_fmt */
 	for (i = 0; i < IMGU_NODE_NUM; i++) {
 		struct v4l2_mbus_framefmt *try_fmt =
-			v4l2_subdev_get_try_format(sd, fh->pad, i);
+			v4l2_subdev_get_try_format(sd, fh->state, i);
 
 		try_fmt->width = try_crop.width;
 		try_fmt->height = try_crop.height;
@@ -44,8 +44,8 @@ static int imgu_subdev_open(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 		try_fmt->field = V4L2_FIELD_NONE;
 	}
 
-	*v4l2_subdev_get_try_crop(sd, fh->pad, IMGU_NODE_IN) = try_crop;
-	*v4l2_subdev_get_try_compose(sd, fh->pad, IMGU_NODE_IN) = try_crop;
+	*v4l2_subdev_get_try_crop(sd, fh->state, IMGU_NODE_IN) = try_crop;
+	*v4l2_subdev_get_try_compose(sd, fh->state, IMGU_NODE_IN) = try_crop;
 
 	return 0;
 }
@@ -120,7 +120,7 @@ static int imgu_subdev_s_stream(struct v4l2_subdev *sd, int enable)
 }
 
 static int imgu_subdev_get_fmt(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt)
 {
 	struct imgu_device *imgu = v4l2_get_subdevdata(sd);
@@ -136,7 +136,7 @@ static int imgu_subdev_get_fmt(struct v4l2_subdev *sd,
 	if (fmt->which == V4L2_SUBDEV_FORMAT_ACTIVE) {
 		fmt->format = imgu_pipe->nodes[pad].pad_fmt;
 	} else {
-		mf = v4l2_subdev_get_try_format(sd, cfg, pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, pad);
 		fmt->format = *mf;
 	}
 
@@ -144,7 +144,7 @@ static int imgu_subdev_get_fmt(struct v4l2_subdev *sd,
 }
 
 static int imgu_subdev_set_fmt(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_format *fmt)
 {
 	struct imgu_media_pipe *imgu_pipe;
@@ -161,7 +161,7 @@ static int imgu_subdev_set_fmt(struct v4l2_subdev *sd,
 
 	imgu_pipe = &imgu->imgu_pipe[pipe];
 	if (fmt->which == V4L2_SUBDEV_FORMAT_TRY)
-		mf = v4l2_subdev_get_try_format(sd, cfg, pad);
+		mf = v4l2_subdev_get_try_format(sd, sd_state, pad);
 	else
 		mf = &imgu_pipe->nodes[pad].pad_fmt;
 
@@ -189,7 +189,7 @@ static int imgu_subdev_set_fmt(struct v4l2_subdev *sd,
 }
 
 static int imgu_subdev_get_selection(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_selection *sel)
 {
 	struct v4l2_rect *try_sel, *r;
@@ -202,11 +202,11 @@ static int imgu_subdev_get_selection(struct v4l2_subdev *sd,
 
 	switch (sel->target) {
 	case V4L2_SEL_TGT_CROP:
-		try_sel = v4l2_subdev_get_try_crop(sd, cfg, sel->pad);
+		try_sel = v4l2_subdev_get_try_crop(sd, sd_state, sel->pad);
 		r = &imgu_sd->rect.eff;
 		break;
 	case V4L2_SEL_TGT_COMPOSE:
-		try_sel = v4l2_subdev_get_try_compose(sd, cfg, sel->pad);
+		try_sel = v4l2_subdev_get_try_compose(sd, sd_state, sel->pad);
 		r = &imgu_sd->rect.bds;
 		break;
 	default:
@@ -222,7 +222,7 @@ static int imgu_subdev_get_selection(struct v4l2_subdev *sd,
 }
 
 static int imgu_subdev_set_selection(struct v4l2_subdev *sd,
-				     struct v4l2_subdev_pad_config *cfg,
+				     struct v4l2_subdev_state *sd_state,
 				     struct v4l2_subdev_selection *sel)
 {
 	struct imgu_device *imgu = v4l2_get_subdevdata(sd);
@@ -241,11 +241,11 @@ static int imgu_subdev_set_selection(struct v4l2_subdev *sd,
 
 	switch (sel->target) {
 	case V4L2_SEL_TGT_CROP:
-		try_sel = v4l2_subdev_get_try_crop(sd, cfg, sel->pad);
+		try_sel = v4l2_subdev_get_try_crop(sd, sd_state, sel->pad);
 		rect = &imgu_sd->rect.eff;
 		break;
 	case V4L2_SEL_TGT_COMPOSE:
-		try_sel = v4l2_subdev_get_try_compose(sd, cfg, sel->pad);
+		try_sel = v4l2_subdev_get_try_compose(sd, sd_state, sel->pad);
 		rect = &imgu_sd->rect.bds;
 		break;
 	default:
diff --git a/drivers/staging/media/omap4iss/iss_csi2.c b/drivers/staging/media/omap4iss/iss_csi2.c
index a6dc2d2b1228b..124ab2f44fbfb 100644
--- a/drivers/staging/media/omap4iss/iss_csi2.c
+++ b/drivers/staging/media/omap4iss/iss_csi2.c
@@ -825,19 +825,20 @@ static const struct iss_video_operations csi2_issvideo_ops = {
 
 static struct v4l2_mbus_framefmt *
 __csi2_get_format(struct iss_csi2_device *csi2,
-		  struct v4l2_subdev_pad_config *cfg,
+		  struct v4l2_subdev_state *sd_state,
 		  unsigned int pad,
 		  enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&csi2->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&csi2->subdev, sd_state,
+						  pad);
 
 	return &csi2->formats[pad];
 }
 
 static void
 csi2_try_format(struct iss_csi2_device *csi2,
-		struct v4l2_subdev_pad_config *cfg,
+		struct v4l2_subdev_state *sd_state,
 		unsigned int pad,
 		struct v4l2_mbus_framefmt *fmt,
 		enum v4l2_subdev_format_whence which)
@@ -868,7 +869,8 @@ csi2_try_format(struct iss_csi2_device *csi2,
 		 * compression.
 		 */
 		pixelcode = fmt->code;
-		format = __csi2_get_format(csi2, cfg, CSI2_PAD_SINK, which);
+		format = __csi2_get_format(csi2, sd_state, CSI2_PAD_SINK,
+					   which);
 		memcpy(fmt, format, sizeof(*fmt));
 
 		/*
@@ -894,7 +896,7 @@ csi2_try_format(struct iss_csi2_device *csi2,
  * return -EINVAL or zero on success
  */
 static int csi2_enum_mbus_code(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd);
@@ -907,7 +909,7 @@ static int csi2_enum_mbus_code(struct v4l2_subdev *sd,
 
 		code->code = csi2_input_fmts[code->index];
 	} else {
-		format = __csi2_get_format(csi2, cfg, CSI2_PAD_SINK,
+		format = __csi2_get_format(csi2, sd_state, CSI2_PAD_SINK,
 					   code->which);
 		switch (code->index) {
 		case 0:
@@ -931,7 +933,7 @@ static int csi2_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int csi2_enum_frame_size(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd);
@@ -943,7 +945,7 @@ static int csi2_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	csi2_try_format(csi2, cfg, fse->pad, &format, fse->which);
+	csi2_try_format(csi2, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -953,7 +955,7 @@ static int csi2_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	csi2_try_format(csi2, cfg, fse->pad, &format, fse->which);
+	csi2_try_format(csi2, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -968,13 +970,13 @@ static int csi2_enum_frame_size(struct v4l2_subdev *sd,
  * return -EINVAL or zero on success
  */
 static int csi2_get_format(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __csi2_get_format(csi2, cfg, fmt->pad, fmt->which);
+	format = __csi2_get_format(csi2, sd_state, fmt->pad, fmt->which);
 	if (!format)
 		return -EINVAL;
 
@@ -990,25 +992,26 @@ static int csi2_get_format(struct v4l2_subdev *sd,
  * return -EINVAL or zero on success
  */
 static int csi2_set_format(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *sd_state,
 			   struct v4l2_subdev_format *fmt)
 {
 	struct iss_csi2_device *csi2 = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __csi2_get_format(csi2, cfg, fmt->pad, fmt->which);
+	format = __csi2_get_format(csi2, sd_state, fmt->pad, fmt->which);
 	if (!format)
 		return -EINVAL;
 
-	csi2_try_format(csi2, cfg, fmt->pad, &fmt->format, fmt->which);
+	csi2_try_format(csi2, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == CSI2_PAD_SINK) {
-		format = __csi2_get_format(csi2, cfg, CSI2_PAD_SOURCE,
+		format = __csi2_get_format(csi2, sd_state, CSI2_PAD_SOURCE,
 					   fmt->which);
 		*format = fmt->format;
-		csi2_try_format(csi2, cfg, CSI2_PAD_SOURCE, format, fmt->which);
+		csi2_try_format(csi2, sd_state, CSI2_PAD_SOURCE, format,
+				fmt->which);
 	}
 
 	return 0;
@@ -1050,7 +1053,7 @@ static int csi2_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	format.format.width = 4096;
 	format.format.height = 4096;
-	csi2_set_format(sd, fh ? fh->pad : NULL, &format);
+	csi2_set_format(sd, fh ? fh->state : NULL, &format);
 
 	return 0;
 }
diff --git a/drivers/staging/media/omap4iss/iss_ipipe.c b/drivers/staging/media/omap4iss/iss_ipipe.c
index 26be078b69f3e..23f707cb336f5 100644
--- a/drivers/staging/media/omap4iss/iss_ipipe.c
+++ b/drivers/staging/media/omap4iss/iss_ipipe.c
@@ -21,7 +21,7 @@
 
 static struct v4l2_mbus_framefmt *
 __ipipe_get_format(struct iss_ipipe_device *ipipe,
-		   struct v4l2_subdev_pad_config *cfg,
+		   struct v4l2_subdev_state *sd_state,
 		   unsigned int pad,
 		   enum v4l2_subdev_format_whence which);
 
@@ -175,12 +175,13 @@ static int ipipe_set_stream(struct v4l2_subdev *sd, int enable)
 
 static struct v4l2_mbus_framefmt *
 __ipipe_get_format(struct iss_ipipe_device *ipipe,
-		   struct v4l2_subdev_pad_config *cfg,
+		   struct v4l2_subdev_state *sd_state,
 		   unsigned int pad,
 		   enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&ipipe->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&ipipe->subdev, sd_state,
+						  pad);
 
 	return &ipipe->formats[pad];
 }
@@ -194,7 +195,7 @@ __ipipe_get_format(struct iss_ipipe_device *ipipe,
  */
 static void
 ipipe_try_format(struct iss_ipipe_device *ipipe,
-		 struct v4l2_subdev_pad_config *cfg,
+		 struct v4l2_subdev_state *sd_state,
 		 unsigned int pad,
 		 struct v4l2_mbus_framefmt *fmt,
 		 enum v4l2_subdev_format_whence which)
@@ -222,7 +223,8 @@ ipipe_try_format(struct iss_ipipe_device *ipipe,
 		break;
 
 	case IPIPE_PAD_SOURCE_VP:
-		format = __ipipe_get_format(ipipe, cfg, IPIPE_PAD_SINK, which);
+		format = __ipipe_get_format(ipipe, sd_state, IPIPE_PAD_SINK,
+					    which);
 		memcpy(fmt, format, sizeof(*fmt));
 
 		fmt->code = MEDIA_BUS_FMT_UYVY8_1X16;
@@ -243,7 +245,7 @@ ipipe_try_format(struct iss_ipipe_device *ipipe,
  * return -EINVAL or zero on success
  */
 static int ipipe_enum_mbus_code(struct v4l2_subdev *sd,
-				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_state *sd_state,
 				struct v4l2_subdev_mbus_code_enum *code)
 {
 	switch (code->pad) {
@@ -270,7 +272,7 @@ static int ipipe_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ipipe_enum_frame_size(struct v4l2_subdev *sd,
-				 struct v4l2_subdev_pad_config *cfg,
+				 struct v4l2_subdev_state *sd_state,
 				 struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct iss_ipipe_device *ipipe = v4l2_get_subdevdata(sd);
@@ -282,7 +284,7 @@ static int ipipe_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	ipipe_try_format(ipipe, cfg, fse->pad, &format, fse->which);
+	ipipe_try_format(ipipe, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -292,7 +294,7 @@ static int ipipe_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	ipipe_try_format(ipipe, cfg, fse->pad, &format, fse->which);
+	ipipe_try_format(ipipe, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -309,13 +311,13 @@ static int ipipe_enum_frame_size(struct v4l2_subdev *sd,
  * to the format type.
  */
 static int ipipe_get_format(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct iss_ipipe_device *ipipe = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __ipipe_get_format(ipipe, cfg, fmt->pad, fmt->which);
+	format = __ipipe_get_format(ipipe, sd_state, fmt->pad, fmt->which);
 	if (!format)
 		return -EINVAL;
 
@@ -333,25 +335,26 @@ static int ipipe_get_format(struct v4l2_subdev *sd,
  * to the format type.
  */
 static int ipipe_set_format(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *sd_state,
 			    struct v4l2_subdev_format *fmt)
 {
 	struct iss_ipipe_device *ipipe = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __ipipe_get_format(ipipe, cfg, fmt->pad, fmt->which);
+	format = __ipipe_get_format(ipipe, sd_state, fmt->pad, fmt->which);
 	if (!format)
 		return -EINVAL;
 
-	ipipe_try_format(ipipe, cfg, fmt->pad, &fmt->format, fmt->which);
+	ipipe_try_format(ipipe, sd_state, fmt->pad, &fmt->format, fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == IPIPE_PAD_SINK) {
-		format = __ipipe_get_format(ipipe, cfg, IPIPE_PAD_SOURCE_VP,
+		format = __ipipe_get_format(ipipe, sd_state,
+					    IPIPE_PAD_SOURCE_VP,
 					    fmt->which);
 		*format = fmt->format;
-		ipipe_try_format(ipipe, cfg, IPIPE_PAD_SOURCE_VP, format,
+		ipipe_try_format(ipipe, sd_state, IPIPE_PAD_SOURCE_VP, format,
 				 fmt->which);
 	}
 
@@ -392,7 +395,7 @@ static int ipipe_init_formats(struct v4l2_subdev *sd, struct v4l2_subdev_fh *fh)
 	format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	format.format.width = 4096;
 	format.format.height = 4096;
-	ipipe_set_format(sd, fh ? fh->pad : NULL, &format);
+	ipipe_set_format(sd, fh ? fh->state : NULL, &format);
 
 	return 0;
 }
diff --git a/drivers/staging/media/omap4iss/iss_ipipeif.c b/drivers/staging/media/omap4iss/iss_ipipeif.c
index c2978d02e7971..5e7f25cd53acf 100644
--- a/drivers/staging/media/omap4iss/iss_ipipeif.c
+++ b/drivers/staging/media/omap4iss/iss_ipipeif.c
@@ -357,11 +357,12 @@ static int ipipeif_set_stream(struct v4l2_subdev *sd, int enable)
 
 static struct v4l2_mbus_framefmt *
 __ipipeif_get_format(struct iss_ipipeif_device *ipipeif,
-		     struct v4l2_subdev_pad_config *cfg, unsigned int pad,
+		     struct v4l2_subdev_state *sd_state, unsigned int pad,
 		     enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&ipipeif->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&ipipeif->subdev, sd_state,
+						  pad);
 	return &ipipeif->formats[pad];
 }
 
@@ -374,7 +375,7 @@ __ipipeif_get_format(struct iss_ipipeif_device *ipipeif,
  */
 static void
 ipipeif_try_format(struct iss_ipipeif_device *ipipeif,
-		   struct v4l2_subdev_pad_config *cfg, unsigned int pad,
+		   struct v4l2_subdev_state *sd_state, unsigned int pad,
 		   struct v4l2_mbus_framefmt *fmt,
 		   enum v4l2_subdev_format_whence which)
 {
@@ -403,7 +404,8 @@ ipipeif_try_format(struct iss_ipipeif_device *ipipeif,
 		break;
 
 	case IPIPEIF_PAD_SOURCE_ISIF_SF:
-		format = __ipipeif_get_format(ipipeif, cfg, IPIPEIF_PAD_SINK,
+		format = __ipipeif_get_format(ipipeif, sd_state,
+					      IPIPEIF_PAD_SINK,
 					      which);
 		memcpy(fmt, format, sizeof(*fmt));
 
@@ -418,7 +420,8 @@ ipipeif_try_format(struct iss_ipipeif_device *ipipeif,
 		break;
 
 	case IPIPEIF_PAD_SOURCE_VP:
-		format = __ipipeif_get_format(ipipeif, cfg, IPIPEIF_PAD_SINK,
+		format = __ipipeif_get_format(ipipeif, sd_state,
+					      IPIPEIF_PAD_SINK,
 					      which);
 		memcpy(fmt, format, sizeof(*fmt));
 
@@ -442,7 +445,7 @@ ipipeif_try_format(struct iss_ipipeif_device *ipipeif,
  * return -EINVAL or zero on success
  */
 static int ipipeif_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd);
@@ -462,7 +465,8 @@ static int ipipeif_enum_mbus_code(struct v4l2_subdev *sd,
 		if (code->index != 0)
 			return -EINVAL;
 
-		format = __ipipeif_get_format(ipipeif, cfg, IPIPEIF_PAD_SINK,
+		format = __ipipeif_get_format(ipipeif, sd_state,
+					      IPIPEIF_PAD_SINK,
 					      code->which);
 
 		code->code = format->code;
@@ -476,7 +480,7 @@ static int ipipeif_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int ipipeif_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd);
@@ -488,7 +492,7 @@ static int ipipeif_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	ipipeif_try_format(ipipeif, cfg, fse->pad, &format, fse->which);
+	ipipeif_try_format(ipipeif, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -498,7 +502,7 @@ static int ipipeif_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	ipipeif_try_format(ipipeif, cfg, fse->pad, &format, fse->which);
+	ipipeif_try_format(ipipeif, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -515,13 +519,13 @@ static int ipipeif_enum_frame_size(struct v4l2_subdev *sd,
  * to the format type.
  */
 static int ipipeif_get_format(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __ipipeif_get_format(ipipeif, cfg, fmt->pad, fmt->which);
+	format = __ipipeif_get_format(ipipeif, sd_state, fmt->pad, fmt->which);
 	if (!format)
 		return -EINVAL;
 
@@ -539,33 +543,36 @@ static int ipipeif_get_format(struct v4l2_subdev *sd,
  * to the format type.
  */
 static int ipipeif_set_format(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct iss_ipipeif_device *ipipeif = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __ipipeif_get_format(ipipeif, cfg, fmt->pad, fmt->which);
+	format = __ipipeif_get_format(ipipeif, sd_state, fmt->pad, fmt->which);
 	if (!format)
 		return -EINVAL;
 
-	ipipeif_try_format(ipipeif, cfg, fmt->pad, &fmt->format, fmt->which);
+	ipipeif_try_format(ipipeif, sd_state, fmt->pad, &fmt->format,
+			   fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == IPIPEIF_PAD_SINK) {
-		format = __ipipeif_get_format(ipipeif, cfg,
+		format = __ipipeif_get_format(ipipeif, sd_state,
 					      IPIPEIF_PAD_SOURCE_ISIF_SF,
 					      fmt->which);
 		*format = fmt->format;
-		ipipeif_try_format(ipipeif, cfg, IPIPEIF_PAD_SOURCE_ISIF_SF,
+		ipipeif_try_format(ipipeif, sd_state,
+				   IPIPEIF_PAD_SOURCE_ISIF_SF,
 				   format, fmt->which);
 
-		format = __ipipeif_get_format(ipipeif, cfg,
+		format = __ipipeif_get_format(ipipeif, sd_state,
 					      IPIPEIF_PAD_SOURCE_VP,
 					      fmt->which);
 		*format = fmt->format;
-		ipipeif_try_format(ipipeif, cfg, IPIPEIF_PAD_SOURCE_VP, format,
+		ipipeif_try_format(ipipeif, sd_state, IPIPEIF_PAD_SOURCE_VP,
+				   format,
 				   fmt->which);
 	}
 
@@ -608,7 +615,7 @@ static int ipipeif_init_formats(struct v4l2_subdev *sd,
 	format.format.code = MEDIA_BUS_FMT_SGRBG10_1X10;
 	format.format.width = 4096;
 	format.format.height = 4096;
-	ipipeif_set_format(sd, fh ? fh->pad : NULL, &format);
+	ipipeif_set_format(sd, fh ? fh->state : NULL, &format);
 
 	return 0;
 }
diff --git a/drivers/staging/media/omap4iss/iss_resizer.c b/drivers/staging/media/omap4iss/iss_resizer.c
index 3b6875cbca9be..a5f8f9f1ab16d 100644
--- a/drivers/staging/media/omap4iss/iss_resizer.c
+++ b/drivers/staging/media/omap4iss/iss_resizer.c
@@ -416,11 +416,12 @@ static int resizer_set_stream(struct v4l2_subdev *sd, int enable)
 
 static struct v4l2_mbus_framefmt *
 __resizer_get_format(struct iss_resizer_device *resizer,
-		     struct v4l2_subdev_pad_config *cfg, unsigned int pad,
+		     struct v4l2_subdev_state *sd_state, unsigned int pad,
 		     enum v4l2_subdev_format_whence which)
 {
 	if (which == V4L2_SUBDEV_FORMAT_TRY)
-		return v4l2_subdev_get_try_format(&resizer->subdev, cfg, pad);
+		return v4l2_subdev_get_try_format(&resizer->subdev, sd_state,
+						  pad);
 	return &resizer->formats[pad];
 }
 
@@ -433,7 +434,7 @@ __resizer_get_format(struct iss_resizer_device *resizer,
  */
 static void
 resizer_try_format(struct iss_resizer_device *resizer,
-		   struct v4l2_subdev_pad_config *cfg, unsigned int pad,
+		   struct v4l2_subdev_state *sd_state, unsigned int pad,
 		   struct v4l2_mbus_framefmt *fmt,
 		   enum v4l2_subdev_format_whence which)
 {
@@ -461,7 +462,8 @@ resizer_try_format(struct iss_resizer_device *resizer,
 
 	case RESIZER_PAD_SOURCE_MEM:
 		pixelcode = fmt->code;
-		format = __resizer_get_format(resizer, cfg, RESIZER_PAD_SINK,
+		format = __resizer_get_format(resizer, sd_state,
+					      RESIZER_PAD_SINK,
 					      which);
 		memcpy(fmt, format, sizeof(*fmt));
 
@@ -492,7 +494,7 @@ resizer_try_format(struct iss_resizer_device *resizer,
  * return -EINVAL or zero on success
  */
 static int resizer_enum_mbus_code(struct v4l2_subdev *sd,
-				  struct v4l2_subdev_pad_config *cfg,
+				  struct v4l2_subdev_state *sd_state,
 				  struct v4l2_subdev_mbus_code_enum *code)
 {
 	struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd);
@@ -507,7 +509,8 @@ static int resizer_enum_mbus_code(struct v4l2_subdev *sd,
 		break;
 
 	case RESIZER_PAD_SOURCE_MEM:
-		format = __resizer_get_format(resizer, cfg, RESIZER_PAD_SINK,
+		format = __resizer_get_format(resizer, sd_state,
+					      RESIZER_PAD_SINK,
 					      code->which);
 
 		if (code->index == 0) {
@@ -537,7 +540,7 @@ static int resizer_enum_mbus_code(struct v4l2_subdev *sd,
 }
 
 static int resizer_enum_frame_size(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_size_enum *fse)
 {
 	struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd);
@@ -549,7 +552,7 @@ static int resizer_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = 1;
 	format.height = 1;
-	resizer_try_format(resizer, cfg, fse->pad, &format, fse->which);
+	resizer_try_format(resizer, sd_state, fse->pad, &format, fse->which);
 	fse->min_width = format.width;
 	fse->min_height = format.height;
 
@@ -559,7 +562,7 @@ static int resizer_enum_frame_size(struct v4l2_subdev *sd,
 	format.code = fse->code;
 	format.width = -1;
 	format.height = -1;
-	resizer_try_format(resizer, cfg, fse->pad, &format, fse->which);
+	resizer_try_format(resizer, sd_state, fse->pad, &format, fse->which);
 	fse->max_width = format.width;
 	fse->max_height = format.height;
 
@@ -576,13 +579,13 @@ static int resizer_enum_frame_size(struct v4l2_subdev *sd,
  * to the format type.
  */
 static int resizer_get_format(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __resizer_get_format(resizer, cfg, fmt->pad, fmt->which);
+	format = __resizer_get_format(resizer, sd_state, fmt->pad, fmt->which);
 	if (!format)
 		return -EINVAL;
 
@@ -600,26 +603,28 @@ static int resizer_get_format(struct v4l2_subdev *sd,
  * to the format type.
  */
 static int resizer_set_format(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *sd_state,
 			      struct v4l2_subdev_format *fmt)
 {
 	struct iss_resizer_device *resizer = v4l2_get_subdevdata(sd);
 	struct v4l2_mbus_framefmt *format;
 
-	format = __resizer_get_format(resizer, cfg, fmt->pad, fmt->which);
+	format = __resizer_get_format(resizer, sd_state, fmt->pad, fmt->which);
 	if (!format)
 		return -EINVAL;
 
-	resizer_try_format(resizer, cfg, fmt->pad, &fmt->format, fmt->which);
+	resizer_try_format(resizer, sd_state, fmt->pad, &fmt->format,
+			   fmt->which);
 	*format = fmt->format;
 
 	/* Propagate the format from sink to source */
 	if (fmt->pad == RESIZER_PAD_SINK) {
-		format = __resizer_get_format(resizer, cfg,
+		format = __resizer_get_format(resizer, sd_state,
 					      RESIZER_PAD_SOURCE_MEM,
 					      fmt->which);
 		*format = fmt->format;
-		resizer_try_format(resizer, cfg, RESIZER_PAD_SOURCE_MEM, format,
+		resizer_try_format(resizer, sd_state, RESIZER_PAD_SOURCE_MEM,
+				   format,
 				   fmt->which);
 	}
 
@@ -662,7 +667,7 @@ static int resizer_init_formats(struct v4l2_subdev *sd,
 	format.format.code = MEDIA_BUS_FMT_UYVY8_1X16;
 	format.format.width = 4096;
 	format.format.height = 4096;
-	resizer_set_format(sd, fh ? fh->pad : NULL, &format);
+	resizer_set_format(sd, fh ? fh->state : NULL, &format);
 
 	return 0;
 }
diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c
index e938bf4c48b63..b26e44adb2be7 100644
--- a/drivers/staging/media/tegra-video/csi.c
+++ b/drivers/staging/media/tegra-video/csi.c
@@ -64,7 +64,7 @@ static const struct v4l2_frmsize_discrete tegra_csi_tpg_sizes[] = {
  * V4L2 Subdevice Pad Operations
  */
 static int csi_enum_bus_code(struct v4l2_subdev *subdev,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *sd_state,
 			     struct v4l2_subdev_mbus_code_enum *code)
 {
 	if (!IS_ENABLED(CONFIG_VIDEO_TEGRA_TPG))
@@ -79,7 +79,7 @@ static int csi_enum_bus_code(struct v4l2_subdev *subdev,
 }
 
 static int csi_get_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct tegra_csi_channel *csi_chan = to_csi_chan(subdev);
@@ -127,7 +127,7 @@ static void csi_chan_update_blank_intervals(struct tegra_csi_channel *csi_chan,
 }
 
 static int csi_enum_framesizes(struct v4l2_subdev *subdev,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *sd_state,
 			       struct v4l2_subdev_frame_size_enum *fse)
 {
 	unsigned int i;
@@ -154,7 +154,7 @@ static int csi_enum_framesizes(struct v4l2_subdev *subdev,
 }
 
 static int csi_enum_frameintervals(struct v4l2_subdev *subdev,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *sd_state,
 				   struct v4l2_subdev_frame_interval_enum *fie)
 {
 	struct tegra_csi_channel *csi_chan = to_csi_chan(subdev);
@@ -181,7 +181,7 @@ static int csi_enum_frameintervals(struct v4l2_subdev *subdev,
 }
 
 static int csi_set_format(struct v4l2_subdev *subdev,
-			  struct v4l2_subdev_pad_config *cfg,
+			  struct v4l2_subdev_state *sd_state,
 			  struct v4l2_subdev_format *fmt)
 {
 	struct tegra_csi_channel *csi_chan = to_csi_chan(subdev);
diff --git a/drivers/staging/media/tegra-video/vi.c b/drivers/staging/media/tegra-video/vi.c
index b76e9110e7064..89709cd06d4d3 100644
--- a/drivers/staging/media/tegra-video/vi.c
+++ b/drivers/staging/media/tegra-video/vi.c
@@ -493,7 +493,7 @@ static int __tegra_channel_try_format(struct tegra_vi_channel *chan,
 	const struct tegra_video_format *fmtinfo;
 	struct v4l2_subdev *subdev;
 	struct v4l2_subdev_format fmt;
-	struct v4l2_subdev_pad_config *pad_cfg;
+	struct v4l2_subdev_state *sd_state;
 	struct v4l2_subdev_frame_size_enum fse = {
 		.which = V4L2_SUBDEV_FORMAT_TRY,
 	};
@@ -507,8 +507,8 @@ static int __tegra_channel_try_format(struct tegra_vi_channel *chan,
 	if (!subdev)
 		return -ENODEV;
 
-	pad_cfg = v4l2_subdev_alloc_pad_config(subdev);
-	if (!pad_cfg)
+	sd_state = v4l2_subdev_alloc_state(subdev);
+	if (!sd_state)
 		return -ENOMEM;
 	/*
 	 * Retrieve the format information and if requested format isn't
@@ -532,33 +532,33 @@ static int __tegra_channel_try_format(struct tegra_vi_channel *chan,
 	 * If not available, try to get crop boundary from subdev.
 	 */
 	fse.code = fmtinfo->code;
-	ret = v4l2_subdev_call(subdev, pad, enum_frame_size, pad_cfg, &fse);
+	ret = v4l2_subdev_call(subdev, pad, enum_frame_size, sd_state, &fse);
 	if (ret) {
 		if (!v4l2_subdev_has_op(subdev, pad, get_selection)) {
-			pad_cfg->try_crop.width = 0;
-			pad_cfg->try_crop.height = 0;
+			sd_state->pads->try_crop.width = 0;
+			sd_state->pads->try_crop.height = 0;
 		} else {
 			ret = v4l2_subdev_call(subdev, pad, get_selection,
 					       NULL, &sdsel);
 			if (ret)
 				return -EINVAL;
 
-			pad_cfg->try_crop.width = sdsel.r.width;
-			pad_cfg->try_crop.height = sdsel.r.height;
+			sd_state->pads->try_crop.width = sdsel.r.width;
+			sd_state->pads->try_crop.height = sdsel.r.height;
 		}
 	} else {
-		pad_cfg->try_crop.width = fse.max_width;
-		pad_cfg->try_crop.height = fse.max_height;
+		sd_state->pads->try_crop.width = fse.max_width;
+		sd_state->pads->try_crop.height = fse.max_height;
 	}
 
-	ret = v4l2_subdev_call(subdev, pad, set_fmt, pad_cfg, &fmt);
+	ret = v4l2_subdev_call(subdev, pad, set_fmt, sd_state, &fmt);
 	if (ret < 0)
 		return ret;
 
 	v4l2_fill_pix_format(pix, &fmt.format);
 	tegra_channel_fmt_align(chan, pix, fmtinfo->bpp);
 
-	v4l2_subdev_free_pad_config(pad_cfg);
+	v4l2_subdev_free_state(sd_state);
 
 	return 0;
 }
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index d0e9a5bdb08bb..89115ba4c0f2f 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -623,6 +623,19 @@ struct v4l2_subdev_pad_config {
 	struct v4l2_rect try_compose;
 };
 
+/**
+ * struct v4l2_subdev_state - Used for storing subdev state information.
+ *
+ * @pads: &struct v4l2_subdev_pad_config array
+ *
+ * This structure only needs to be passed to the pad op if the 'which' field
+ * of the main argument is set to %V4L2_SUBDEV_FORMAT_TRY. For
+ * %V4L2_SUBDEV_FORMAT_ACTIVE it is safe to pass %NULL.
+ */
+struct v4l2_subdev_state {
+	struct v4l2_subdev_pad_config *pads;
+};
+
 /**
  * struct v4l2_subdev_pad_ops - v4l2-subdev pad level operations
  *
@@ -687,27 +700,27 @@ struct v4l2_subdev_pad_config {
  */
 struct v4l2_subdev_pad_ops {
 	int (*init_cfg)(struct v4l2_subdev *sd,
-			struct v4l2_subdev_pad_config *cfg);
+			struct v4l2_subdev_state *state);
 	int (*enum_mbus_code)(struct v4l2_subdev *sd,
-			      struct v4l2_subdev_pad_config *cfg,
+			      struct v4l2_subdev_state *state,
 			      struct v4l2_subdev_mbus_code_enum *code);
 	int (*enum_frame_size)(struct v4l2_subdev *sd,
-			       struct v4l2_subdev_pad_config *cfg,
+			       struct v4l2_subdev_state *state,
 			       struct v4l2_subdev_frame_size_enum *fse);
 	int (*enum_frame_interval)(struct v4l2_subdev *sd,
-				   struct v4l2_subdev_pad_config *cfg,
+				   struct v4l2_subdev_state *state,
 				   struct v4l2_subdev_frame_interval_enum *fie);
 	int (*get_fmt)(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *state,
 		       struct v4l2_subdev_format *format);
 	int (*set_fmt)(struct v4l2_subdev *sd,
-		       struct v4l2_subdev_pad_config *cfg,
+		       struct v4l2_subdev_state *state,
 		       struct v4l2_subdev_format *format);
 	int (*get_selection)(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *state,
 			     struct v4l2_subdev_selection *sel);
 	int (*set_selection)(struct v4l2_subdev *sd,
-			     struct v4l2_subdev_pad_config *cfg,
+			     struct v4l2_subdev_state *state,
 			     struct v4l2_subdev_selection *sel);
 	int (*get_edid)(struct v4l2_subdev *sd, struct v4l2_edid *edid);
 	int (*set_edid)(struct v4l2_subdev *sd, struct v4l2_edid *edid);
@@ -918,14 +931,14 @@ struct v4l2_subdev {
  * struct v4l2_subdev_fh - Used for storing subdev information per file handle
  *
  * @vfh: pointer to &struct v4l2_fh
- * @pad: pointer to &struct v4l2_subdev_pad_config
+ * @state: pointer to &struct v4l2_subdev_state
  * @owner: module pointer to the owner of this file handle
  */
 struct v4l2_subdev_fh {
 	struct v4l2_fh vfh;
 	struct module *owner;
 #if defined(CONFIG_VIDEO_V4L2_SUBDEV_API)
-	struct v4l2_subdev_pad_config *pad;
+	struct v4l2_subdev_state *state;
 #endif
 };
 
@@ -945,17 +958,17 @@ struct v4l2_subdev_fh {
  *	&struct v4l2_subdev_pad_config->try_fmt
  *
  * @sd: pointer to &struct v4l2_subdev
- * @cfg: pointer to &struct v4l2_subdev_pad_config array.
- * @pad: index of the pad in the @cfg array.
+ * @state: pointer to &struct v4l2_subdev_state
+ * @pad: index of the pad in the &struct v4l2_subdev_state->pads array
  */
 static inline struct v4l2_mbus_framefmt *
 v4l2_subdev_get_try_format(struct v4l2_subdev *sd,
-			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_state *state,
 			   unsigned int pad)
 {
 	if (WARN_ON(pad >= sd->entity.num_pads))
 		pad = 0;
-	return &cfg[pad].try_fmt;
+	return &state->pads[pad].try_fmt;
 }
 
 /**
@@ -963,17 +976,17 @@ v4l2_subdev_get_try_format(struct v4l2_subdev *sd,
  *	&struct v4l2_subdev_pad_config->try_crop
  *
  * @sd: pointer to &struct v4l2_subdev
- * @cfg: pointer to &struct v4l2_subdev_pad_config array.
- * @pad: index of the pad in the @cfg array.
+ * @state: pointer to &struct v4l2_subdev_state.
+ * @pad: index of the pad in the &struct v4l2_subdev_state->pads array.
  */
 static inline struct v4l2_rect *
 v4l2_subdev_get_try_crop(struct v4l2_subdev *sd,
-			 struct v4l2_subdev_pad_config *cfg,
+			 struct v4l2_subdev_state *state,
 			 unsigned int pad)
 {
 	if (WARN_ON(pad >= sd->entity.num_pads))
 		pad = 0;
-	return &cfg[pad].try_crop;
+	return &state->pads[pad].try_crop;
 }
 
 /**
@@ -981,17 +994,17 @@ v4l2_subdev_get_try_crop(struct v4l2_subdev *sd,
  *	&struct v4l2_subdev_pad_config->try_compose
  *
  * @sd: pointer to &struct v4l2_subdev
- * @cfg: pointer to &struct v4l2_subdev_pad_config array.
- * @pad: index of the pad in the @cfg array.
+ * @state: pointer to &struct v4l2_subdev_state.
+ * @pad: index of the pad in the &struct v4l2_subdev_state->pads array.
  */
 static inline struct v4l2_rect *
 v4l2_subdev_get_try_compose(struct v4l2_subdev *sd,
-			    struct v4l2_subdev_pad_config *cfg,
+			    struct v4l2_subdev_state *state,
 			    unsigned int pad)
 {
 	if (WARN_ON(pad >= sd->entity.num_pads))
 		pad = 0;
-	return &cfg[pad].try_compose;
+	return &state->pads[pad].try_compose;
 }
 
 #endif
@@ -1093,20 +1106,21 @@ int v4l2_subdev_link_validate_default(struct v4l2_subdev *sd,
 int v4l2_subdev_link_validate(struct media_link *link);
 
 /**
- * v4l2_subdev_alloc_pad_config - Allocates memory for pad config
+ * v4l2_subdev_alloc_state - allocate v4l2_subdev_state
  *
- * @sd: pointer to struct v4l2_subdev
+ * @sd: pointer to &struct v4l2_subdev for which the state is being allocated.
+ *
+ * Must call v4l2_subdev_free_state() when state is no longer needed.
  */
-struct
-v4l2_subdev_pad_config *v4l2_subdev_alloc_pad_config(struct v4l2_subdev *sd);
+struct v4l2_subdev_state *v4l2_subdev_alloc_state(struct v4l2_subdev *sd);
 
 /**
- * v4l2_subdev_free_pad_config - Frees memory allocated by
- *	v4l2_subdev_alloc_pad_config().
+ * v4l2_subdev_free_state - free a v4l2_subdev_state
  *
- * @cfg: pointer to &struct v4l2_subdev_pad_config
+ * @state: v4l2_subdev_state to be freed.
  */
-void v4l2_subdev_free_pad_config(struct v4l2_subdev_pad_config *cfg);
+void v4l2_subdev_free_state(struct v4l2_subdev_state *state);
+
 #endif /* CONFIG_MEDIA_CONTROLLER */
 
 /**
-- 
GitLab


From 1ad4f329fccb5d9eb7b0a38d7fdf0f4688c6b341 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Mon, 24 May 2021 10:11:58 +0800
Subject: [PATCH 3014/3804] PM / devfreq: userspace: Use DEVICE_ATTR_RW macro

Use DEVICE_ATTR_RW helper instead of plain DEVICE_ATTR,
which makes the code a bit shorter and easier to read.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/governor_userspace.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
index 0fd6c48510711..ab9db7adb3ade 100644
--- a/drivers/devfreq/governor_userspace.c
+++ b/drivers/devfreq/governor_userspace.c
@@ -31,8 +31,8 @@ static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq)
 	return 0;
 }
 
-static ssize_t store_freq(struct device *dev, struct device_attribute *attr,
-			  const char *buf, size_t count)
+static ssize_t set_freq_store(struct device *dev, struct device_attribute *attr,
+			      const char *buf, size_t count)
 {
 	struct devfreq *devfreq = to_devfreq(dev);
 	struct userspace_data *data;
@@ -52,8 +52,8 @@ static ssize_t store_freq(struct device *dev, struct device_attribute *attr,
 	return err;
 }
 
-static ssize_t show_freq(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+static ssize_t set_freq_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
 {
 	struct devfreq *devfreq = to_devfreq(dev);
 	struct userspace_data *data;
@@ -70,7 +70,7 @@ static ssize_t show_freq(struct device *dev, struct device_attribute *attr,
 	return err;
 }
 
-static DEVICE_ATTR(set_freq, 0644, show_freq, store_freq);
+static DEVICE_ATTR_RW(set_freq);
 static struct attribute *dev_entries[] = {
 	&dev_attr_set_freq.attr,
 	NULL,
-- 
GitLab


From 271ca53cb0c8b3a45c73e1140fc3336c2da42315 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Tue, 1 Jun 2021 05:23:18 +0300
Subject: [PATCH 3015/3804] dt-bindings: devfreq: tegra30-actmon: Convert to
 schema

Convert NVIDIA Tegra ACTMON binding to schema.

Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 .../arm/tegra/nvidia,tegra30-actmon.txt       |  57 ---------
 .../devfreq/nvidia,tegra30-actmon.yaml        | 121 ++++++++++++++++++
 2 files changed, 121 insertions(+), 57 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
 create mode 100644 Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml

diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
deleted file mode 100644
index 897eedfa2bc8a..0000000000000
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-NVIDIA Tegra Activity Monitor
-
-The activity monitor block collects statistics about the behaviour of other
-components in the system. This information can be used to derive the rate at
-which the external memory needs to be clocked in order to serve all requests
-from the monitored clients.
-
-Required properties:
-- compatible: should be "nvidia,tegra<chip>-actmon"
-- reg: offset and length of the register set for the device
-- interrupts: standard interrupt property
-- clocks: Must contain a phandle and clock specifier pair for each entry in
-clock-names. See ../../clock/clock-bindings.txt for details.
-- clock-names: Must include the following entries:
-  - actmon
-  - emc
-- resets: Must contain an entry for each entry in reset-names. See
-../../reset/reset.txt for details.
-- reset-names: Must include the following entries:
-  - actmon
-- operating-points-v2: See ../bindings/opp/opp.txt for details.
-- interconnects: Should contain entries for memory clients sitting on
-                 MC->EMC memory interconnect path.
-- interconnect-names: Should include name of the interconnect path for each
-                      interconnect entry. Consult TRM documentation for
-                      information about available memory clients, see MEMORY
-                      CONTROLLER section.
-
-For each opp entry in 'operating-points-v2' table:
-- opp-supported-hw: bitfield indicating SoC speedo ID mask
-- opp-peak-kBps: peak bandwidth of the memory channel
-
-Example:
-	dfs_opp_table: opp-table {
-		compatible = "operating-points-v2";
-
-		opp@12750000 {
-			opp-hz = /bits/ 64 <12750000>;
-			opp-supported-hw = <0x000F>;
-			opp-peak-kBps = <51000>;
-		};
-		...
-	};
-
-	actmon@6000c800 {
-		compatible = "nvidia,tegra124-actmon";
-		reg = <0x0 0x6000c800 0x0 0x400>;
-		interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&tegra_car TEGRA124_CLK_ACTMON>,
-			 <&tegra_car TEGRA124_CLK_EMC>;
-		clock-names = "actmon", "emc";
-		resets = <&tegra_car 119>;
-		reset-names = "actmon";
-		operating-points-v2 = <&dfs_opp_table>;
-		interconnects = <&mc TEGRA124_MC_MPCORER &emc>;
-		interconnect-names = "cpu";
-	};
diff --git a/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
new file mode 100644
index 0000000000000..ba938eed28ee7
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
@@ -0,0 +1,121 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/devfreq/nvidia,tegra30-actmon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVIDIA Tegra30 Activity Monitor
+
+maintainers:
+  - Dmitry Osipenko <digetx@gmail.com>
+  - Jon Hunter <jonathanh@nvidia.com>
+  - Thierry Reding <thierry.reding@gmail.com>
+
+description: |
+  The activity monitor block collects statistics about the behaviour of other
+  components in the system. This information can be used to derive the rate at
+  which the external memory needs to be clocked in order to serve all requests
+  from the monitored clients.
+
+properties:
+  compatible:
+    enum:
+      - nvidia,tegra30-actmon
+      - nvidia,tegra114-actmon
+      - nvidia,tegra124-actmon
+      - nvidia,tegra210-actmon
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: actmon
+      - const: emc
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    items:
+      - const: actmon
+
+  interrupts:
+    maxItems: 1
+
+  interconnects:
+    minItems: 1
+    maxItems: 12
+
+  interconnect-names:
+    minItems: 1
+    maxItems: 12
+    description:
+      Should include name of the interconnect path for each interconnect
+      entry. Consult TRM documentation for information about available
+      memory clients, see MEMORY CONTROLLER and ACTIVITY MONITOR sections.
+
+  operating-points-v2:
+    description:
+      Should contain freqs and voltages and opp-supported-hw property, which
+      is a bitfield indicating SoC speedo ID mask.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - interrupts
+  - interconnects
+  - interconnect-names
+  - operating-points-v2
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/memory/tegra30-mc.h>
+
+    mc: memory-controller@7000f000 {
+        compatible = "nvidia,tegra30-mc";
+        reg = <0x7000f000 0x400>;
+        clocks = <&clk 32>;
+        clock-names = "mc";
+
+        interrupts = <0 77 4>;
+
+        #iommu-cells = <1>;
+        #reset-cells = <1>;
+        #interconnect-cells = <1>;
+    };
+
+    emc: external-memory-controller@7000f400 {
+        compatible = "nvidia,tegra30-emc";
+        reg = <0x7000f400 0x400>;
+        interrupts = <0 78 4>;
+        clocks = <&clk 57>;
+
+        nvidia,memory-controller = <&mc>;
+        operating-points-v2 = <&dvfs_opp_table>;
+        power-domains = <&domain>;
+
+        #interconnect-cells = <0>;
+    };
+
+    actmon@6000c800 {
+        compatible = "nvidia,tegra30-actmon";
+        reg = <0x6000c800 0x400>;
+        interrupts = <0 45 4>;
+        clocks = <&clk 119>, <&clk 57>;
+        clock-names = "actmon", "emc";
+        resets = <&rst 119>;
+        reset-names = "actmon";
+        operating-points-v2 = <&dvfs_opp_table>;
+        interconnects = <&mc TEGRA30_MC_MPCORER &emc>;
+        interconnect-names = "cpu-read";
+    };
-- 
GitLab


From 6b61f55ecbe693d9d0d7ae14ebce01dabe10ecf1 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Tue, 1 Jun 2021 05:23:19 +0300
Subject: [PATCH 3016/3804] dt-bindings: devfreq: tegra30-actmon: Add
 cooling-cells

The ACTMON watches activity of memory clients. Decisions about a minimum
required frequency are made based on the info from ACTMON. We can use
ACTMON as a thermal cooling device by limiting the required frequency.
Document new cooling-cells property of NVIDIA Tegra ACTMON hardware unit.

Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 .../devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml   | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
index ba938eed28ee7..e3379d1067283 100644
--- a/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
+++ b/Documentation/devicetree/bindings/devfreq/nvidia,tegra30-actmon.yaml
@@ -63,6 +63,9 @@ properties:
       Should contain freqs and voltages and opp-supported-hw property, which
       is a bitfield indicating SoC speedo ID mask.
 
+  "#cooling-cells":
+    const: 2
+
 required:
   - compatible
   - reg
@@ -74,6 +77,7 @@ required:
   - interconnects
   - interconnect-names
   - operating-points-v2
+  - "#cooling-cells"
 
 additionalProperties: false
 
@@ -118,4 +122,5 @@ examples:
         operating-points-v2 = <&dvfs_opp_table>;
         interconnects = <&mc TEGRA30_MC_MPCORER &emc>;
         interconnect-names = "cpu-read";
+        #cooling-cells = <2>;
     };
-- 
GitLab


From b6c57d313f5f8d0da150f6e02882f0607443abe7 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@chromium.org>
Date: Sat, 5 Jun 2021 04:29:13 +0200
Subject: [PATCH 3017/3804] media: mtk-vcodec: venc: remove redundant code

vidioc_try_fmt() does clamp height and width when called on the OUTPUT
queue, so clamping them prior to calling this function is redundant. Set
the queue's parameters after calling vidioc_try_fmt() so we can use the
values it computed.

Signed-off-by: Alexandre Courbot <acourbot@chromium.org>
Signed-off-by: Irui Wang <irui.wang@mediatek.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../media/platform/mtk-vcodec/mtk_vcodec_enc.c   | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
index 4831052f475db..42ff138679403 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
@@ -443,7 +443,6 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv,
 	struct mtk_q_data *q_data;
 	int ret, i;
 	const struct mtk_video_fmt *fmt;
-	struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
 
 	vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type);
 	if (!vq) {
@@ -468,20 +467,13 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv,
 		f->fmt.pix.pixelformat = fmt->fourcc;
 	}
 
-	pix_fmt_mp->height = clamp(pix_fmt_mp->height,
-				MTK_VENC_MIN_H,
-				MTK_VENC_MAX_H);
-	pix_fmt_mp->width = clamp(pix_fmt_mp->width,
-				MTK_VENC_MIN_W,
-				MTK_VENC_MAX_W);
-
-	q_data->visible_width = f->fmt.pix_mp.width;
-	q_data->visible_height = f->fmt.pix_mp.height;
-	q_data->fmt = fmt;
-	ret = vidioc_try_fmt(f, q_data->fmt);
+	ret = vidioc_try_fmt(f, fmt);
 	if (ret)
 		return ret;
 
+	q_data->fmt = fmt;
+	q_data->visible_width = f->fmt.pix_mp.width;
+	q_data->visible_height = f->fmt.pix_mp.height;
 	q_data->coded_width = f->fmt.pix_mp.width;
 	q_data->coded_height = f->fmt.pix_mp.height;
 
-- 
GitLab


From 5cd57605771216755bd6f98748d4f11d1e65b780 Mon Sep 17 00:00:00 2001
From: Irui Wang <irui.wang@mediatek.com>
Date: Sat, 5 Jun 2021 04:29:14 +0200
Subject: [PATCH 3018/3804] media: dt-bindings: media: mtk-vcodec: Add
 dma-ranges property

The mt8192 iommu support 0~16GB iova. We separate it to four banks:
0~4G; 4G~8G; 8G~12G; 12G~16G.

The "dma-ranges" could be used to adjust the bank we locate.
If we don't set this property. The default range always is 0~4G.

This is optional and only needed in mt8192, the dma ranges should
not cross 4G/8G/12G.

Here we don't have actual bus/parent concept here.  And the iova
requirement is for our HW. Thus put the property in our node.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Irui Wang <irui.wang@mediatek.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/devicetree/bindings/media/mediatek-vcodec.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
index 06db6837cefd0..5bb9e6e191b70 100644
--- a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
+++ b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
@@ -22,6 +22,7 @@ Required properties:
 - iommus : should point to the respective IOMMU block with master port as
   argument, see Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
   for details.
+- dma-ranges : describes the dma address range space that the codec hw access.
 One of the two following nodes:
 - mediatek,vpu : the node of the video processor unit, if using VPU.
 - mediatek,scp : the node of the SCP unit, if using SCP.
-- 
GitLab


From c2c3bde0e1aed4250e7eafb1bc739760c61d10b8 Mon Sep 17 00:00:00 2001
From: Irui Wang <irui.wang@mediatek.com>
Date: Sat, 5 Jun 2021 04:29:15 +0200
Subject: [PATCH 3019/3804] media: mtk-vcodec: Support 34bits dma address for
 venc

Use the dma_set_mask_and_coherent helper to set venc
DMA bit mask to support 34bits iova space(16GB) that
the mt8192 iommu HW support.

Whole the iova range separate to 0~4G/4G~8G/8G~12G/12G~16G,
regarding which iova range VENC actually locate, it
depends on the dma-ranges property of venc dtsi node.

Signed-off-by: Irui Wang <irui.wang@mediatek.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
index 7d7b8cfc2cc55..26b089e81213e 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
@@ -361,6 +361,9 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
 		goto err_event_workq;
 	}
 
+	if (of_get_property(pdev->dev.of_node, "dma-ranges", NULL))
+		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(34));
+
 	ret = video_register_device(vfd_enc, VFL_TYPE_VIDEO, 1);
 	if (ret) {
 		mtk_v4l2_err("Failed to register video device");
-- 
GitLab


From aa950d8619694fb1a7d0e68aa556976e2f34476d Mon Sep 17 00:00:00 2001
From: Irui Wang <irui.wang@mediatek.com>
Date: Sat, 5 Jun 2021 04:29:16 +0200
Subject: [PATCH 3020/3804] media: dt-bindings: media: mtk-vcodec: Add binding
 for MT8192 VENC

Updates binding document for mt8192 encoder driver.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Irui Wang <irui.wang@mediatek.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 Documentation/devicetree/bindings/media/mediatek-vcodec.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
index 5bb9e6e191b70..ad1321e5a22d8 100644
--- a/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
+++ b/Documentation/devicetree/bindings/media/mediatek-vcodec.txt
@@ -9,6 +9,7 @@ Required properties:
   "mediatek,mt8173-vcodec-enc" for mt8173 avc encoder.
   "mediatek,mt8183-vcodec-enc" for MT8183 encoder.
   "mediatek,mt8173-vcodec-dec" for MT8173 decoder.
+  "mediatek,mt8192-vcodec-enc" for MT8192 encoder.
 - reg : Physical base address of the video codec registers and length of
   memory mapped region.
 - interrupts : interrupt number to the cpu.
-- 
GitLab


From 37eeacba7cb6bfbed9596e7b2f8b672e1c957ac7 Mon Sep 17 00:00:00 2001
From: Irui Wang <irui.wang@mediatek.com>
Date: Sat, 5 Jun 2021 04:29:17 +0200
Subject: [PATCH 3021/3804] media: mtk-vcodec: Add MT8192 H264 venc driver

Add MT8192 venc driver's compatible and device private data.

Reviewed-by: Tzung-Bi Shih <tzungbi@google.com>
Signed-off-by: Irui Wang <irui.wang@mediatek.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h |  1 +
 .../media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
index d03cca95e99bc..14893d277bb8c 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
@@ -302,6 +302,7 @@ struct mtk_vcodec_ctx {
 enum mtk_chip {
 	MTK_MT8173,
 	MTK_MT8183,
+	MTK_MT8192,
 };
 
 /**
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
index 26b089e81213e..45d1870c83dd7 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
@@ -425,12 +425,26 @@ static const struct mtk_vcodec_enc_pdata mt8183_pdata = {
 	.core_id = VENC_SYS,
 };
 
+static const struct mtk_vcodec_enc_pdata mt8192_pdata = {
+	.chip = MTK_MT8192,
+	.uses_ext = true,
+	/* MT8192 supports the same capture formats as MT8183 */
+	.capture_formats = mtk_video_formats_capture_mt8183,
+	.num_capture_formats = ARRAY_SIZE(mtk_video_formats_capture_mt8183),
+	/* MT8192 supports the same output formats as MT8173 */
+	.output_formats = mtk_video_formats_output_mt8173,
+	.num_output_formats = ARRAY_SIZE(mtk_video_formats_output_mt8173),
+	.min_bitrate = 64,
+	.max_bitrate = 100000000,
+	.core_id = VENC_SYS,
+};
 static const struct of_device_id mtk_vcodec_enc_match[] = {
 	{.compatible = "mediatek,mt8173-vcodec-enc",
 			.data = &mt8173_avc_pdata},
 	{.compatible = "mediatek,mt8173-vcodec-enc-vp8",
 			.data = &mt8173_vp8_pdata},
 	{.compatible = "mediatek,mt8183-vcodec-enc", .data = &mt8183_pdata},
+	{.compatible = "mediatek,mt8192-vcodec-enc", .data = &mt8192_pdata},
 	{},
 };
 MODULE_DEVICE_TABLE(of, mtk_vcodec_enc_match);
-- 
GitLab


From caf231ac25bdde69d257366e2f8d13b37af5458e Mon Sep 17 00:00:00 2001
From: Irui Wang <irui.wang@mediatek.com>
Date: Sat, 5 Jun 2021 04:29:18 +0200
Subject: [PATCH 3022/3804] media: mtk-vcodec: Support MT8192 H264 4K encoding

MT8192 H264 support 4k(3840x2176) and Level 5.1 encoding,
add related path according to enc_capability.

Signed-off-by: Irui Wang <irui.wang@mediatek.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../platform/mtk-vcodec/mtk_vcodec_enc.c      | 78 +++++++++++++------
 .../platform/mtk-vcodec/venc/venc_h264_if.c   |  4 +
 2 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
index 42ff138679403..416f356af363d 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
@@ -19,23 +19,30 @@
 
 #define MTK_VENC_MIN_W	160U
 #define MTK_VENC_MIN_H	128U
-#define MTK_VENC_MAX_W	1920U
-#define MTK_VENC_MAX_H	1088U
+#define MTK_VENC_HD_MAX_W	1920U
+#define MTK_VENC_HD_MAX_H	1088U
+#define MTK_VENC_4K_MAX_W	3840U
+#define MTK_VENC_4K_MAX_H	2176U
+
 #define DFT_CFG_WIDTH	MTK_VENC_MIN_W
 #define DFT_CFG_HEIGHT	MTK_VENC_MIN_H
 #define MTK_MAX_CTRLS_HINT	20
 
 #define MTK_DEFAULT_FRAMERATE_NUM 1001
 #define MTK_DEFAULT_FRAMERATE_DENOM 30000
+#define MTK_VENC_4K_CAPABILITY_ENABLE BIT(0)
 
 static void mtk_venc_worker(struct work_struct *work);
 
-static const struct v4l2_frmsize_stepwise mtk_venc_framesizes = {
-	MTK_VENC_MIN_W, MTK_VENC_MAX_W, 16,
-	MTK_VENC_MIN_H, MTK_VENC_MAX_H, 16,
+static const struct v4l2_frmsize_stepwise mtk_venc_hd_framesizes = {
+	MTK_VENC_MIN_W, MTK_VENC_HD_MAX_W, 16,
+	MTK_VENC_MIN_H, MTK_VENC_HD_MAX_H, 16,
 };
 
-#define NUM_SUPPORTED_FRAMESIZE ARRAY_SIZE(mtk_venc_framesizes)
+static const struct v4l2_frmsize_stepwise mtk_venc_4k_framesizes = {
+	MTK_VENC_MIN_W, MTK_VENC_4K_MAX_W, 16,
+	MTK_VENC_MIN_H, MTK_VENC_4K_MAX_H, 16,
+};
 
 static int vidioc_venc_s_ctrl(struct v4l2_ctrl *ctrl)
 {
@@ -151,17 +158,22 @@ static int vidioc_enum_framesizes(struct file *file, void *fh,
 				  struct v4l2_frmsizeenum *fsize)
 {
 	const struct mtk_video_fmt *fmt;
+	struct mtk_vcodec_ctx *ctx = fh_to_ctx(fh);
 
 	if (fsize->index != 0)
 		return -EINVAL;
 
 	fmt = mtk_venc_find_format(fsize->pixel_format,
-				   fh_to_ctx(fh)->dev->venc_pdata);
+				   ctx->dev->venc_pdata);
 	if (!fmt)
 		return -EINVAL;
 
 	fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE;
-	fsize->stepwise = mtk_venc_framesizes;
+
+	if (ctx->dev->enc_capability & MTK_VENC_4K_CAPABILITY_ENABLE)
+		fsize->stepwise = mtk_venc_4k_framesizes;
+	else
+		fsize->stepwise = mtk_venc_hd_framesizes;
 
 	return 0;
 }
@@ -248,7 +260,7 @@ static struct mtk_q_data *mtk_venc_get_q_data(struct mtk_vcodec_ctx *ctx,
 /* V4L2 specification suggests the driver corrects the format struct if any of
  * the dimensions is unsupported
  */
-static int vidioc_try_fmt(struct v4l2_format *f,
+static int vidioc_try_fmt(struct mtk_vcodec_ctx *ctx, struct v4l2_format *f,
 			  const struct mtk_video_fmt *fmt)
 {
 	struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
@@ -260,13 +272,22 @@ static int vidioc_try_fmt(struct v4l2_format *f,
 		pix_fmt_mp->plane_fmt[0].bytesperline = 0;
 	} else if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
 		int tmp_w, tmp_h;
+		unsigned int max_width, max_height;
+
+		if (ctx->dev->enc_capability & MTK_VENC_4K_CAPABILITY_ENABLE) {
+			max_width = MTK_VENC_4K_MAX_W;
+			max_height = MTK_VENC_4K_MAX_H;
+		} else {
+			max_width = MTK_VENC_HD_MAX_W;
+			max_height = MTK_VENC_HD_MAX_H;
+		}
 
 		pix_fmt_mp->height = clamp(pix_fmt_mp->height,
 					MTK_VENC_MIN_H,
-					MTK_VENC_MAX_H);
+					max_height);
 		pix_fmt_mp->width = clamp(pix_fmt_mp->width,
 					MTK_VENC_MIN_W,
-					MTK_VENC_MAX_W);
+					max_width);
 
 		/* find next closer width align 16, heign align 32, size align
 		 * 64 rectangle
@@ -275,16 +296,16 @@ static int vidioc_try_fmt(struct v4l2_format *f,
 		tmp_h = pix_fmt_mp->height;
 		v4l_bound_align_image(&pix_fmt_mp->width,
 					MTK_VENC_MIN_W,
-					MTK_VENC_MAX_W, 4,
+					max_width, 4,
 					&pix_fmt_mp->height,
 					MTK_VENC_MIN_H,
-					MTK_VENC_MAX_H, 5, 6);
+					max_height, 5, 6);
 
 		if (pix_fmt_mp->width < tmp_w &&
-			(pix_fmt_mp->width + 16) <= MTK_VENC_MAX_W)
+			(pix_fmt_mp->width + 16) <= max_width)
 			pix_fmt_mp->width += 16;
 		if (pix_fmt_mp->height < tmp_h &&
-			(pix_fmt_mp->height + 32) <= MTK_VENC_MAX_H)
+			(pix_fmt_mp->height + 32) <= max_height)
 			pix_fmt_mp->height += 32;
 
 		mtk_v4l2_debug(0,
@@ -405,7 +426,7 @@ static int vidioc_venc_s_fmt_cap(struct file *file, void *priv,
 	}
 
 	q_data->fmt = fmt;
-	ret = vidioc_try_fmt(f, q_data->fmt);
+	ret = vidioc_try_fmt(ctx, f, q_data->fmt);
 	if (ret)
 		return ret;
 
@@ -467,7 +488,7 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv,
 		f->fmt.pix.pixelformat = fmt->fourcc;
 	}
 
-	ret = vidioc_try_fmt(f, fmt);
+	ret = vidioc_try_fmt(ctx, f, fmt);
 	if (ret)
 		return ret;
 
@@ -545,7 +566,7 @@ static int vidioc_try_fmt_vid_cap_mplane(struct file *file, void *priv,
 	f->fmt.pix_mp.quantization = ctx->quantization;
 	f->fmt.pix_mp.xfer_func = ctx->xfer_func;
 
-	return vidioc_try_fmt(f, fmt);
+	return vidioc_try_fmt(ctx, f, fmt);
 }
 
 static int vidioc_try_fmt_vid_out_mplane(struct file *file, void *priv,
@@ -567,7 +588,7 @@ static int vidioc_try_fmt_vid_out_mplane(struct file *file, void *priv,
 		f->fmt.pix_mp.xfer_func = V4L2_XFER_FUNC_DEFAULT;
 	}
 
-	return vidioc_try_fmt(f, fmt);
+	return vidioc_try_fmt(ctx, f, fmt);
 }
 
 static int vidioc_venc_g_selection(struct file *file, void *priv,
@@ -1171,16 +1192,16 @@ void mtk_vcodec_enc_set_default_params(struct mtk_vcodec_ctx *ctx)
 
 	v4l_bound_align_image(&q_data->coded_width,
 				MTK_VENC_MIN_W,
-				MTK_VENC_MAX_W, 4,
+				MTK_VENC_HD_MAX_W, 4,
 				&q_data->coded_height,
 				MTK_VENC_MIN_H,
-				MTK_VENC_MAX_H, 5, 6);
+				MTK_VENC_HD_MAX_H, 5, 6);
 
 	if (q_data->coded_width < DFT_CFG_WIDTH &&
-		(q_data->coded_width + 16) <= MTK_VENC_MAX_W)
+		(q_data->coded_width + 16) <= MTK_VENC_HD_MAX_W)
 		q_data->coded_width += 16;
 	if (q_data->coded_height < DFT_CFG_HEIGHT &&
-		(q_data->coded_height + 32) <= MTK_VENC_MAX_H)
+		(q_data->coded_height + 32) <= MTK_VENC_HD_MAX_H)
 		q_data->coded_height += 32;
 
 	q_data->sizeimage[0] =
@@ -1210,6 +1231,12 @@ int mtk_vcodec_enc_ctrls_setup(struct mtk_vcodec_ctx *ctx)
 {
 	const struct v4l2_ctrl_ops *ops = &mtk_vcodec_enc_ctrl_ops;
 	struct v4l2_ctrl_handler *handler = &ctx->ctrl_hdl;
+	u8 h264_max_level;
+
+	if (ctx->dev->enc_capability & MTK_VENC_4K_CAPABILITY_ENABLE)
+		h264_max_level = V4L2_MPEG_VIDEO_H264_LEVEL_5_1;
+	else
+		h264_max_level = V4L2_MPEG_VIDEO_H264_LEVEL_4_2;
 
 	v4l2_ctrl_handler_init(handler, MTK_MAX_CTRLS_HINT);
 
@@ -1240,8 +1267,9 @@ int mtk_vcodec_enc_ctrls_setup(struct mtk_vcodec_ctx *ctx)
 			V4L2_MPEG_VIDEO_H264_PROFILE_HIGH,
 			0, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH);
 	v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL,
-			V4L2_MPEG_VIDEO_H264_LEVEL_4_2,
-			0, V4L2_MPEG_VIDEO_H264_LEVEL_4_0);
+			       h264_max_level,
+			       0, V4L2_MPEG_VIDEO_H264_LEVEL_4_0);
+
 	if (handler->error) {
 		mtk_v4l2_err("Init control handler fail %d",
 				handler->error);
diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
index d0123dfc5f93d..b6a4f2074fa57 100644
--- a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
+++ b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
@@ -215,6 +215,10 @@ static unsigned int h264_get_level(struct venc_h264_inst *inst,
 		return 41;
 	case V4L2_MPEG_VIDEO_H264_LEVEL_4_2:
 		return 42;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_5_0:
+		return 50;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_5_1:
+		return 51;
 	default:
 		mtk_vcodec_debug(inst, "unsupported level %d", level);
 		return 31;
-- 
GitLab


From c344f07aa1b4ba38ca8fabe407a2afe2f436323c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Jun 2021 12:34:02 +0200
Subject: [PATCH 3023/3804] media: v4l2-core: ignore native time32 ioctls on
 64-bit

Syzbot found that passing ioctl command 0xc0505609 into a 64-bit
kernel from a 32-bit process causes uninitialized kernel memory to
get passed to drivers instead of the user space data:

BUG: KMSAN: uninit-value in check_array_args drivers/media/v4l2-core/v4l2-ioctl.c:3041 [inline]
BUG: KMSAN: uninit-value in video_usercopy+0x1631/0x3d30 drivers/media/v4l2-core/v4l2-ioctl.c:3315
CPU: 0 PID: 19595 Comm: syz-executor.4 Not tainted 5.11.0-rc7-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:79 [inline]
 dump_stack+0x21c/0x280 lib/dump_stack.c:120
 kmsan_report+0xfb/0x1e0 mm/kmsan/kmsan_report.c:118
 __msan_warning+0x5f/0xa0 mm/kmsan/kmsan_instr.c:197
 check_array_args drivers/media/v4l2-core/v4l2-ioctl.c:3041 [inline]
 video_usercopy+0x1631/0x3d30 drivers/media/v4l2-core/v4l2-ioctl.c:3315
 video_ioctl2+0x9f/0xb0 drivers/media/v4l2-core/v4l2-ioctl.c:3391
 v4l2_ioctl+0x255/0x290 drivers/media/v4l2-core/v4l2-dev.c:360
 v4l2_compat_ioctl32+0x2c6/0x370 drivers/media/v4l2-core/v4l2-compat-ioctl32.c:1248
 __do_compat_sys_ioctl fs/ioctl.c:842 [inline]
 __se_compat_sys_ioctl+0x53d/0x1100 fs/ioctl.c:793
 __ia32_compat_sys_ioctl+0x4a/0x70 fs/ioctl.c:793
 do_syscall_32_irqs_on arch/x86/entry/common.c:79 [inline]
 __do_fast_syscall_32+0x102/0x160 arch/x86/entry/common.c:141
 do_fast_syscall_32+0x6a/0xc0 arch/x86/entry/common.c:166
 do_SYSENTER_32+0x73/0x90 arch/x86/entry/common.c:209
 entry_SYSENTER_compat_after_hwframe+0x4d/0x5c

The time32 commands are defined but were never meant to be called on
64-bit machines, as those have always used time64 interfaces.  I missed
this in my patch that introduced the time64 handling on 32-bit platforms.

The problem in this case is the mismatch of one function checking for
the numeric value of the command and another function checking for the
type of process (native vs compat) instead, with the result being that
for this combination, nothing gets copied into the buffer at all.

Avoid this by only trying to convert the time32 commands when running
on a 32-bit kernel where these are defined in a meaningful way.

[hverkuil: fix 3 warnings: switch with no cases]

Fixes: 577c89b0ce72 ("media: v4l2-core: fix v4l2_buffer handling for time64 ABI")
Reported-by: syzbot+142888ffec98ab194028@syzkaller.appspotmail.com
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 2673f51aafa4d..07d823656ee65 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -3072,8 +3072,8 @@ static int check_array_args(unsigned int cmd, void *parg, size_t *array_size,
 
 static unsigned int video_translate_cmd(unsigned int cmd)
 {
+#if !defined(CONFIG_64BIT) && defined(CONFIG_COMPAT_32BIT_TIME)
 	switch (cmd) {
-#ifdef CONFIG_COMPAT_32BIT_TIME
 	case VIDIOC_DQEVENT_TIME32:
 		return VIDIOC_DQEVENT;
 	case VIDIOC_QUERYBUF_TIME32:
@@ -3084,8 +3084,8 @@ static unsigned int video_translate_cmd(unsigned int cmd)
 		return VIDIOC_DQBUF;
 	case VIDIOC_PREPARE_BUF_TIME32:
 		return VIDIOC_PREPARE_BUF;
-#endif
 	}
+#endif
 	if (in_compat_syscall())
 		return v4l2_compat_translate_cmd(cmd);
 
@@ -3126,8 +3126,8 @@ static int video_get_user(void __user *arg, void *parg,
 	} else if (in_compat_syscall()) {
 		err = v4l2_compat_get_user(arg, parg, cmd);
 	} else {
+#if !defined(CONFIG_64BIT) && defined(CONFIG_COMPAT_32BIT_TIME)
 		switch (cmd) {
-#ifdef CONFIG_COMPAT_32BIT_TIME
 		case VIDIOC_QUERYBUF_TIME32:
 		case VIDIOC_QBUF_TIME32:
 		case VIDIOC_DQBUF_TIME32:
@@ -3155,8 +3155,8 @@ static int video_get_user(void __user *arg, void *parg,
 			};
 			break;
 		}
-#endif
 		}
+#endif
 	}
 
 	/* zero out anything we don't copy from userspace */
@@ -3181,8 +3181,8 @@ static int video_put_user(void __user *arg, void *parg,
 	if (in_compat_syscall())
 		return v4l2_compat_put_user(arg, parg, cmd);
 
+#if !defined(CONFIG_64BIT) && defined(CONFIG_COMPAT_32BIT_TIME)
 	switch (cmd) {
-#ifdef CONFIG_COMPAT_32BIT_TIME
 	case VIDIOC_DQEVENT_TIME32: {
 		struct v4l2_event *ev = parg;
 		struct v4l2_event_time32 ev32;
@@ -3230,8 +3230,8 @@ static int video_put_user(void __user *arg, void *parg,
 			return -EFAULT;
 		break;
 	}
-#endif
 	}
+#endif
 
 	return 0;
 }
-- 
GitLab


From 7b53cca764f9b291b7907fcd39d9e66ad728ee0b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Jun 2021 12:34:03 +0200
Subject: [PATCH 3024/3804] media: v4l2-core: explicitly clear ioctl input data

As seen from a recent syzbot bug report, mistakes in the compat ioctl
implementation can lead to uninitialized kernel stack data getting used
as input for driver ioctl handlers.

The reported bug is now fixed, but it's possible that other related
bugs are still present or get added in the future. As the drivers need
to check user input already, the possible impact is fairly low, but it
might still cause an information leak.

To be on the safe side, always clear the entire ioctl buffer before
calling the conversion handler functions that are meant to initialize
them.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 07d823656ee65..cf50c60bbb5d3 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -3124,8 +3124,10 @@ static int video_get_user(void __user *arg, void *parg,
 		if (copy_from_user(parg, (void __user *)arg, n))
 			err = -EFAULT;
 	} else if (in_compat_syscall()) {
+		memset(parg, 0, n);
 		err = v4l2_compat_get_user(arg, parg, cmd);
 	} else {
+		memset(parg, 0, n);
 #if !defined(CONFIG_64BIT) && defined(CONFIG_COMPAT_32BIT_TIME)
 		switch (cmd) {
 		case VIDIOC_QUERYBUF_TIME32:
-- 
GitLab


From e84c8932897e8c59e01c33f4052a72d5b2890884 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Jun 2021 12:34:04 +0200
Subject: [PATCH 3025/3804] media: v4l2-core: fix whitespace damage in
 video_get_user()

The initialization was indented with an extra tab in most lines,
remove them to get the normal coding style.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index cf50c60bbb5d3..05d5db3d85e58 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -3142,18 +3142,18 @@ static int video_get_user(void __user *arg, void *parg,
 
 			*vb = (struct v4l2_buffer) {
 				.index		= vb32.index,
-					.type		= vb32.type,
-					.bytesused	= vb32.bytesused,
-					.flags		= vb32.flags,
-					.field		= vb32.field,
-					.timestamp.tv_sec	= vb32.timestamp.tv_sec,
-					.timestamp.tv_usec	= vb32.timestamp.tv_usec,
-					.timecode	= vb32.timecode,
-					.sequence	= vb32.sequence,
-					.memory		= vb32.memory,
-					.m.userptr	= vb32.m.userptr,
-					.length		= vb32.length,
-					.request_fd	= vb32.request_fd,
+				.type		= vb32.type,
+				.bytesused	= vb32.bytesused,
+				.flags		= vb32.flags,
+				.field		= vb32.field,
+				.timestamp.tv_sec	= vb32.timestamp.tv_sec,
+				.timestamp.tv_usec	= vb32.timestamp.tv_usec,
+				.timecode	= vb32.timecode,
+				.sequence	= vb32.sequence,
+				.memory		= vb32.memory,
+				.m.userptr	= vb32.m.userptr,
+				.length		= vb32.length,
+				.request_fd	= vb32.request_fd,
 			};
 			break;
 		}
-- 
GitLab


From 765ba251d2522e2a0daa2f0793fd0f0ce34816ec Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Jun 2021 12:34:05 +0200
Subject: [PATCH 3026/3804] media: subdev: remove VIDIOC_DQEVENT_TIME32
 handling

Converting the VIDIOC_DQEVENT_TIME32/VIDIOC_DQEVENT32/
VIDIOC_DQEVENT32_TIME32 arguments to the canonical form is done in common
code, but for some reason I ended up adding another conversion helper to
subdev_do_ioctl() as well. I must have concluded that this does not go
through the common conversion, but it has done that since the ioctl
handler was first added.

I assume this one is harmless as there should be no way to arrive here
from user space if CONFIG_COMPAT_32BIT_TIME is set, but since it is dead
code, it should just get removed.

On a 64-bit architecture, as well as a 32-bit architecture without
CONFIG_COMPAT_32BIT_TIME, handling this command is a mistake,
and the kernel should return an error.

Fixes: 1a6c0b36dd19 ("media: v4l2-core: fix VIDIOC_DQEVENT for time64 ABI")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-subdev.c | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c
index 6d3e030365198..5d27a27cc2f24 100644
--- a/drivers/media/v4l2-core/v4l2-subdev.c
+++ b/drivers/media/v4l2-core/v4l2-subdev.c
@@ -430,30 +430,6 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 
 		return v4l2_event_dequeue(vfh, arg, file->f_flags & O_NONBLOCK);
 
-	case VIDIOC_DQEVENT_TIME32: {
-		struct v4l2_event_time32 *ev32 = arg;
-		struct v4l2_event ev = { };
-
-		if (!(sd->flags & V4L2_SUBDEV_FL_HAS_EVENTS))
-			return -ENOIOCTLCMD;
-
-		rval = v4l2_event_dequeue(vfh, &ev, file->f_flags & O_NONBLOCK);
-
-		*ev32 = (struct v4l2_event_time32) {
-			.type		= ev.type,
-			.pending	= ev.pending,
-			.sequence	= ev.sequence,
-			.timestamp.tv_sec  = ev.timestamp.tv_sec,
-			.timestamp.tv_nsec = ev.timestamp.tv_nsec,
-			.id		= ev.id,
-		};
-
-		memcpy(&ev32->u, &ev.u, sizeof(ev.u));
-		memcpy(&ev32->reserved, &ev.reserved, sizeof(ev.reserved));
-
-		return rval;
-	}
-
 	case VIDIOC_SUBSCRIBE_EVENT:
 		return v4l2_subdev_call(sd, core, subscribe_event, vfh, arg);
 
-- 
GitLab


From 8162f78d27c61e148a4342c62bddef3c26135bcb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Jun 2021 12:34:06 +0200
Subject: [PATCH 3027/3804] media: v4l2-core: return -ENODEV from ioctl when
 not registered

I spotted a minor difference is handling of unregistered devices
between native and compat ioctls: the native handler never tries
to call into the driver if a device is not marked as registered.

I did not check whether this can cause issues in the kernel, or
just a different between return codes, but it clearly makes
sense that both should behave the same way.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index 0ca75f6784c56..47aff3b197426 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -1244,6 +1244,9 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	if (!file->f_op->unlocked_ioctl)
 		return ret;
 
+	if (!video_is_registered(vdev))
+		return -ENODEV;
+
 	if (_IOC_TYPE(cmd) == 'V' && _IOC_NR(cmd) < BASE_VIDIOC_PRIVATE)
 		ret = file->f_op->unlocked_ioctl(file, cmd,
 					(unsigned long)compat_ptr(arg));
-- 
GitLab


From b4c650f1af68251f1970aecfc3c2fceec1552da2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Jun 2021 12:34:07 +0200
Subject: [PATCH 3028/3804] media: atomisp: remove compat_ioctl32 code

This is one of the last remaining users of compat_alloc_user_space()
and copy_in_user(), which are in the process of getting removed.

As of commit 57e6b6f2303e ("media: atomisp_fops.c: disable
atomisp_compat_ioctl32"), nothing in this file is actually getting used
as the only reference has been stubbed out.

Remove the entire file -- anyone willing to restore the functionality
can equally well just look up the contents in the git history if needed.

Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/atomisp/Makefile        |    1 -
 drivers/staging/media/atomisp/TODO            |    5 +
 .../atomisp/pci/atomisp_compat_ioctl32.c      | 1202 -----------------
 .../staging/media/atomisp/pci/atomisp_fops.c  |    8 +-
 4 files changed, 8 insertions(+), 1208 deletions(-)
 delete mode 100644 drivers/staging/media/atomisp/pci/atomisp_compat_ioctl32.c

diff --git a/drivers/staging/media/atomisp/Makefile b/drivers/staging/media/atomisp/Makefile
index 51498b2e85b8f..606b7754fdfd7 100644
--- a/drivers/staging/media/atomisp/Makefile
+++ b/drivers/staging/media/atomisp/Makefile
@@ -16,7 +16,6 @@ atomisp-objs += \
 	pci/atomisp_acc.o \
 	pci/atomisp_cmd.o \
 	pci/atomisp_compat_css20.o \
-	pci/atomisp_compat_ioctl32.o \
 	pci/atomisp_csi2.o \
 	pci/atomisp_drvfs.o \
 	pci/atomisp_file.o \
diff --git a/drivers/staging/media/atomisp/TODO b/drivers/staging/media/atomisp/TODO
index 6987bb2d32cf2..2d1ef9eb262a5 100644
--- a/drivers/staging/media/atomisp/TODO
+++ b/drivers/staging/media/atomisp/TODO
@@ -120,6 +120,11 @@ TODO
     for this driver until the other work is done, as there will be a lot
     of code churn until this driver becomes functional again.
 
+16. Fix private ioctls to not need a compat_ioctl handler for running
+    32-bit tasks. The compat code has been removed because of bugs,
+    and should not be needed for modern drivers. Fixing this properly
+    unfortunately means an incompatible ABI change.
+
 Limitations
 ===========
 
diff --git a/drivers/staging/media/atomisp/pci/atomisp_compat_ioctl32.c b/drivers/staging/media/atomisp/pci/atomisp_compat_ioctl32.c
deleted file mode 100644
index e5553df5bad4e..0000000000000
--- a/drivers/staging/media/atomisp/pci/atomisp_compat_ioctl32.c
+++ /dev/null
@@ -1,1202 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Support for Intel Camera Imaging ISP subsystem.
- *
- * Copyright (c) 2013 Intel Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- *
- */
-#ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-
-#include <linux/videodev2.h>
-
-#include "atomisp_internal.h"
-#include "atomisp_compat.h"
-#include "atomisp_ioctl.h"
-#include "atomisp_compat_ioctl32.h"
-
-/* Macros borrowed from v4l2-compat-ioctl32.c */
-
-#define get_user_cast(__x, __ptr)					\
-({									\
-	get_user(__x, (typeof(*__ptr) __user *)(__ptr));		\
-})
-
-#define put_user_force(__x, __ptr)					\
-({									\
-	put_user((typeof(*__x) __force *)(__x), __ptr);			\
-})
-
-/* Use the same argument order as copy_in_user */
-#define assign_in_user(to, from)					\
-({									\
-	typeof(*from) __assign_tmp;					\
-									\
-	get_user_cast(__assign_tmp, from) || put_user(__assign_tmp, to);\
-})
-
-static int get_atomisp_histogram32(struct atomisp_histogram __user *kp,
-				   struct atomisp_histogram32 __user *up)
-{
-	compat_uptr_t tmp;
-
-	if (!access_ok(up, sizeof(struct atomisp_histogram32)) ||
-	    assign_in_user(&kp->num_elements, &up->num_elements) ||
-	    get_user(tmp, &up->data) ||
-	    put_user(compat_ptr(tmp), &kp->data))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int put_atomisp_histogram32(struct atomisp_histogram __user *kp,
-				   struct atomisp_histogram32 __user *up)
-{
-	void __user *tmp;
-
-	if (!access_ok(up, sizeof(struct atomisp_histogram32)) ||
-	    assign_in_user(&up->num_elements, &kp->num_elements) ||
-	    get_user(tmp, &kp->data) ||
-	    put_user(ptr_to_compat(tmp), &up->data))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_v4l2_framebuffer32(struct v4l2_framebuffer __user *kp,
-				  struct v4l2_framebuffer32 __user *up)
-{
-	compat_uptr_t tmp;
-
-	if (!access_ok(up, sizeof(struct v4l2_framebuffer32)) ||
-	    get_user(tmp, &up->base) ||
-	    put_user_force(compat_ptr(tmp), &kp->base) ||
-	    assign_in_user(&kp->capability, &up->capability) ||
-	    assign_in_user(&kp->flags, &up->flags) ||
-	    copy_in_user(&kp->fmt, &up->fmt, sizeof(kp->fmt)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_atomisp_dis_statistics32(struct atomisp_dis_statistics __user *kp,
-					struct atomisp_dis_statistics32 __user *up)
-{
-	compat_uptr_t hor_prod_odd_real;
-	compat_uptr_t hor_prod_odd_imag;
-	compat_uptr_t hor_prod_even_real;
-	compat_uptr_t hor_prod_even_imag;
-	compat_uptr_t ver_prod_odd_real;
-	compat_uptr_t ver_prod_odd_imag;
-	compat_uptr_t ver_prod_even_real;
-	compat_uptr_t ver_prod_even_imag;
-
-	if (!access_ok(up, sizeof(struct atomisp_dis_statistics32)) ||
-	    copy_in_user(kp, up, sizeof(struct atomisp_dvs_grid_info)) ||
-	    get_user(hor_prod_odd_real,
-		     &up->dvs2_stat.hor_prod.odd_real) ||
-	    get_user(hor_prod_odd_imag,
-		     &up->dvs2_stat.hor_prod.odd_imag) ||
-	    get_user(hor_prod_even_real,
-		     &up->dvs2_stat.hor_prod.even_real) ||
-	    get_user(hor_prod_even_imag,
-		     &up->dvs2_stat.hor_prod.even_imag) ||
-	    get_user(ver_prod_odd_real,
-		     &up->dvs2_stat.ver_prod.odd_real) ||
-	    get_user(ver_prod_odd_imag,
-		     &up->dvs2_stat.ver_prod.odd_imag) ||
-	    get_user(ver_prod_even_real,
-		     &up->dvs2_stat.ver_prod.even_real) ||
-	    get_user(ver_prod_even_imag,
-		     &up->dvs2_stat.ver_prod.even_imag) ||
-	    assign_in_user(&kp->exp_id, &up->exp_id) ||
-	    put_user(compat_ptr(hor_prod_odd_real),
-		     &kp->dvs2_stat.hor_prod.odd_real) ||
-	    put_user(compat_ptr(hor_prod_odd_imag),
-		     &kp->dvs2_stat.hor_prod.odd_imag) ||
-	    put_user(compat_ptr(hor_prod_even_real),
-		     &kp->dvs2_stat.hor_prod.even_real) ||
-	    put_user(compat_ptr(hor_prod_even_imag),
-		     &kp->dvs2_stat.hor_prod.even_imag) ||
-	    put_user(compat_ptr(ver_prod_odd_real),
-		     &kp->dvs2_stat.ver_prod.odd_real) ||
-	    put_user(compat_ptr(ver_prod_odd_imag),
-		     &kp->dvs2_stat.ver_prod.odd_imag) ||
-	    put_user(compat_ptr(ver_prod_even_real),
-		     &kp->dvs2_stat.ver_prod.even_real) ||
-	    put_user(compat_ptr(ver_prod_even_imag),
-		     &kp->dvs2_stat.ver_prod.even_imag))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int put_atomisp_dis_statistics32(struct atomisp_dis_statistics __user *kp,
-					struct atomisp_dis_statistics32 __user *up)
-{
-	void __user *hor_prod_odd_real;
-	void __user *hor_prod_odd_imag;
-	void __user *hor_prod_even_real;
-	void __user *hor_prod_even_imag;
-	void __user *ver_prod_odd_real;
-	void __user *ver_prod_odd_imag;
-	void __user *ver_prod_even_real;
-	void __user *ver_prod_even_imag;
-
-	if (!!access_ok(up, sizeof(struct atomisp_dis_statistics32)) ||
-	    copy_in_user(up, kp, sizeof(struct atomisp_dvs_grid_info)) ||
-	    get_user(hor_prod_odd_real,
-		     &kp->dvs2_stat.hor_prod.odd_real) ||
-	    get_user(hor_prod_odd_imag,
-		     &kp->dvs2_stat.hor_prod.odd_imag) ||
-	    get_user(hor_prod_even_real,
-		     &kp->dvs2_stat.hor_prod.even_real) ||
-	    get_user(hor_prod_even_imag,
-		     &kp->dvs2_stat.hor_prod.even_imag) ||
-	    get_user(ver_prod_odd_real,
-		     &kp->dvs2_stat.ver_prod.odd_real) ||
-	    get_user(ver_prod_odd_imag,
-		     &kp->dvs2_stat.ver_prod.odd_imag) ||
-	    get_user(ver_prod_even_real,
-		     &kp->dvs2_stat.ver_prod.even_real) ||
-	    get_user(ver_prod_even_imag,
-		     &kp->dvs2_stat.ver_prod.even_imag) ||
-	    put_user(ptr_to_compat(hor_prod_odd_real),
-		     &up->dvs2_stat.hor_prod.odd_real) ||
-	    put_user(ptr_to_compat(hor_prod_odd_imag),
-		     &up->dvs2_stat.hor_prod.odd_imag) ||
-	    put_user(ptr_to_compat(hor_prod_even_real),
-		     &up->dvs2_stat.hor_prod.even_real) ||
-	    put_user(ptr_to_compat(hor_prod_even_imag),
-		     &up->dvs2_stat.hor_prod.even_imag) ||
-	    put_user(ptr_to_compat(ver_prod_odd_real),
-		     &up->dvs2_stat.ver_prod.odd_real) ||
-	    put_user(ptr_to_compat(ver_prod_odd_imag),
-		     &up->dvs2_stat.ver_prod.odd_imag) ||
-	    put_user(ptr_to_compat(ver_prod_even_real),
-		     &up->dvs2_stat.ver_prod.even_real) ||
-	    put_user(ptr_to_compat(ver_prod_even_imag),
-		     &up->dvs2_stat.ver_prod.even_imag) ||
-	    assign_in_user(&up->exp_id, &kp->exp_id))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_atomisp_dis_coefficients32(struct atomisp_dis_coefficients __user *kp,
-					  struct atomisp_dis_coefficients32 __user *up)
-{
-	compat_uptr_t hor_coefs_odd_real;
-	compat_uptr_t hor_coefs_odd_imag;
-	compat_uptr_t hor_coefs_even_real;
-	compat_uptr_t hor_coefs_even_imag;
-	compat_uptr_t ver_coefs_odd_real;
-	compat_uptr_t ver_coefs_odd_imag;
-	compat_uptr_t ver_coefs_even_real;
-	compat_uptr_t ver_coefs_even_imag;
-
-	if (!access_ok(up, sizeof(struct atomisp_dis_coefficients32)) ||
-	    copy_in_user(kp, up, sizeof(struct atomisp_dvs_grid_info)) ||
-	    get_user(hor_coefs_odd_real, &up->hor_coefs.odd_real) ||
-	    get_user(hor_coefs_odd_imag, &up->hor_coefs.odd_imag) ||
-	    get_user(hor_coefs_even_real, &up->hor_coefs.even_real) ||
-	    get_user(hor_coefs_even_imag, &up->hor_coefs.even_imag) ||
-	    get_user(ver_coefs_odd_real, &up->ver_coefs.odd_real) ||
-	    get_user(ver_coefs_odd_imag, &up->ver_coefs.odd_imag) ||
-	    get_user(ver_coefs_even_real, &up->ver_coefs.even_real) ||
-	    get_user(ver_coefs_even_imag, &up->ver_coefs.even_imag) ||
-	    put_user(compat_ptr(hor_coefs_odd_real),
-		     &kp->hor_coefs.odd_real) ||
-	    put_user(compat_ptr(hor_coefs_odd_imag),
-		     &kp->hor_coefs.odd_imag) ||
-	    put_user(compat_ptr(hor_coefs_even_real),
-		     &kp->hor_coefs.even_real) ||
-	    put_user(compat_ptr(hor_coefs_even_imag),
-		     &kp->hor_coefs.even_imag) ||
-	    put_user(compat_ptr(ver_coefs_odd_real),
-		     &kp->ver_coefs.odd_real) ||
-	    put_user(compat_ptr(ver_coefs_odd_imag),
-		     &kp->ver_coefs.odd_imag) ||
-	    put_user(compat_ptr(ver_coefs_even_real),
-		     &kp->ver_coefs.even_real) ||
-	    put_user(compat_ptr(ver_coefs_even_imag),
-		     &kp->ver_coefs.even_imag))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_atomisp_dvs_6axis_config32(struct atomisp_dvs_6axis_config __user *kp,
-					  struct atomisp_dvs_6axis_config32 __user *up)
-{
-	compat_uptr_t xcoords_y;
-	compat_uptr_t ycoords_y;
-	compat_uptr_t xcoords_uv;
-	compat_uptr_t ycoords_uv;
-
-	if (!access_ok(up, sizeof(struct atomisp_dvs_6axis_config32)) ||
-	    assign_in_user(&kp->exp_id, &up->exp_id) ||
-	    assign_in_user(&kp->width_y, &up->width_y) ||
-	    assign_in_user(&kp->height_y, &up->height_y) ||
-	    assign_in_user(&kp->width_uv, &up->width_uv) ||
-	    assign_in_user(&kp->height_uv, &up->height_uv) ||
-	    get_user(xcoords_y, &up->xcoords_y) ||
-	    get_user(ycoords_y, &up->ycoords_y) ||
-	    get_user(xcoords_uv, &up->xcoords_uv) ||
-	    get_user(ycoords_uv, &up->ycoords_uv) ||
-	    put_user_force(compat_ptr(xcoords_y), &kp->xcoords_y) ||
-	    put_user_force(compat_ptr(ycoords_y), &kp->ycoords_y) ||
-	    put_user_force(compat_ptr(xcoords_uv), &kp->xcoords_uv) ||
-	    put_user_force(compat_ptr(ycoords_uv), &kp->ycoords_uv))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_atomisp_3a_statistics32(struct atomisp_3a_statistics __user *kp,
-				       struct atomisp_3a_statistics32 __user *up)
-{
-	compat_uptr_t data;
-	compat_uptr_t rgby_data;
-
-	if (!access_ok(up, sizeof(struct atomisp_3a_statistics32)) ||
-	    copy_in_user(kp, up, sizeof(struct atomisp_grid_info)) ||
-	    get_user(rgby_data, &up->rgby_data) ||
-	    put_user(compat_ptr(rgby_data), &kp->rgby_data) ||
-	    get_user(data, &up->data) ||
-	    put_user(compat_ptr(data), &kp->data) ||
-	    assign_in_user(&kp->exp_id, &up->exp_id) ||
-	    assign_in_user(&kp->isp_config_id, &up->isp_config_id))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int put_atomisp_3a_statistics32(struct atomisp_3a_statistics __user *kp,
-				       struct atomisp_3a_statistics32 __user *up)
-{
-	void __user *data;
-	void __user *rgby_data;
-
-	if (!access_ok(up, sizeof(struct atomisp_3a_statistics32)) ||
-	    copy_in_user(up, kp, sizeof(struct atomisp_grid_info)) ||
-	    get_user(rgby_data, &kp->rgby_data) ||
-	    put_user(ptr_to_compat(rgby_data), &up->rgby_data) ||
-	    get_user(data, &kp->data) ||
-	    put_user(ptr_to_compat(data), &up->data) ||
-	    assign_in_user(&up->exp_id, &kp->exp_id) ||
-	    assign_in_user(&up->isp_config_id, &kp->isp_config_id))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_atomisp_metadata_stat32(struct atomisp_metadata __user *kp,
-				       struct atomisp_metadata32 __user *up)
-{
-	compat_uptr_t data;
-	compat_uptr_t effective_width;
-
-	if (!access_ok(up, sizeof(struct atomisp_metadata32)) ||
-	    get_user(data, &up->data) ||
-	    put_user(compat_ptr(data), &kp->data) ||
-	    assign_in_user(&kp->width, &up->width) ||
-	    assign_in_user(&kp->height, &up->height) ||
-	    assign_in_user(&kp->stride, &up->stride) ||
-	    assign_in_user(&kp->exp_id, &up->exp_id) ||
-	    get_user(effective_width, &up->effective_width) ||
-	    put_user_force(compat_ptr(effective_width), &kp->effective_width))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int put_atomisp_metadata_stat32(struct atomisp_metadata __user *kp,
-				struct atomisp_metadata32 __user *up)
-{
-	void __user *data;
-	void *effective_width;
-
-	if (!access_ok(up, sizeof(struct atomisp_metadata32)) ||
-	    get_user(data, &kp->data) ||
-	    put_user(ptr_to_compat(data), &up->data) ||
-	    assign_in_user(&up->width, &kp->width) ||
-	    assign_in_user(&up->height, &kp->height) ||
-	    assign_in_user(&up->stride, &kp->stride) ||
-	    assign_in_user(&up->exp_id, &kp->exp_id) ||
-	    get_user(effective_width, &kp->effective_width) ||
-	    put_user(ptr_to_compat((void __user *)effective_width),
-				   &up->effective_width))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-put_atomisp_metadata_by_type_stat32(struct atomisp_metadata_with_type __user *kp,
-				    struct atomisp_metadata_with_type32 __user *up)
-{
-	void __user *data;
-	u32 *effective_width;
-
-	if (!access_ok(up, sizeof(struct atomisp_metadata_with_type32)) ||
-	    get_user(data, &kp->data) ||
-	    put_user(ptr_to_compat(data), &up->data) ||
-	    assign_in_user(&up->width, &kp->width) ||
-	    assign_in_user(&up->height, &kp->height) ||
-	    assign_in_user(&up->stride, &kp->stride) ||
-	    assign_in_user(&up->exp_id, &kp->exp_id) ||
-	    get_user(effective_width, &kp->effective_width) ||
-	    put_user(ptr_to_compat((void __user *)effective_width),
-		     &up->effective_width) ||
-	    assign_in_user(&up->type, &kp->type))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-get_atomisp_metadata_by_type_stat32(struct atomisp_metadata_with_type __user *kp,
-				    struct atomisp_metadata_with_type32 __user *up)
-{
-	compat_uptr_t data;
-	compat_uptr_t effective_width;
-
-	if (!access_ok(up, sizeof(struct atomisp_metadata_with_type32)) ||
-	    get_user(data, &up->data) ||
-	    put_user(compat_ptr(data), &kp->data) ||
-	    assign_in_user(&kp->width, &up->width) ||
-	    assign_in_user(&kp->height, &up->height) ||
-	    assign_in_user(&kp->stride, &up->stride) ||
-	    assign_in_user(&kp->exp_id, &up->exp_id) ||
-	    get_user(effective_width, &up->effective_width) ||
-	    put_user_force(compat_ptr(effective_width), &kp->effective_width) ||
-	    assign_in_user(&kp->type, &up->type))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-get_atomisp_morph_table32(struct atomisp_morph_table __user *kp,
-			  struct atomisp_morph_table32 __user *up)
-{
-	unsigned int n = ATOMISP_MORPH_TABLE_NUM_PLANES;
-
-	if (!access_ok(up, sizeof(struct atomisp_morph_table32)) ||
-		assign_in_user(&kp->enabled, &up->enabled) ||
-		assign_in_user(&kp->width, &up->width) ||
-		assign_in_user(&kp->height, &up->height))
-			return -EFAULT;
-
-	while (n-- > 0) {
-		compat_uptr_t coord_kp;
-
-		if (get_user(coord_kp, &up->coordinates_x[n]) ||
-		    put_user(compat_ptr(coord_kp), &kp->coordinates_x[n]) ||
-		    get_user(coord_kp, &up->coordinates_y[n]) ||
-		    put_user(compat_ptr(coord_kp), &kp->coordinates_y[n]))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-static int put_atomisp_morph_table32(struct atomisp_morph_table __user *kp,
-				     struct atomisp_morph_table32 __user *up)
-{
-	unsigned int n = ATOMISP_MORPH_TABLE_NUM_PLANES;
-
-	if (!access_ok(up, sizeof(struct atomisp_morph_table32)) ||
-		assign_in_user(&up->enabled, &kp->enabled) ||
-		assign_in_user(&up->width, &kp->width) ||
-		assign_in_user(&up->height, &kp->height))
-			return -EFAULT;
-
-	while (n-- > 0) {
-		void __user *coord_kp;
-
-		if (get_user(coord_kp, &kp->coordinates_x[n]) ||
-		    put_user(ptr_to_compat(coord_kp), &up->coordinates_x[n]) ||
-		    get_user(coord_kp, &kp->coordinates_y[n]) ||
-		    put_user(ptr_to_compat(coord_kp), &up->coordinates_y[n]))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-static int get_atomisp_overlay32(struct atomisp_overlay __user *kp,
-				 struct atomisp_overlay32 __user *up)
-{
-	compat_uptr_t frame;
-
-	if (!access_ok(up, sizeof(struct atomisp_overlay32)) ||
-	    get_user(frame, &up->frame) ||
-	    put_user_force(compat_ptr(frame), &kp->frame) ||
-	    assign_in_user(&kp->bg_y, &up->bg_y) ||
-	    assign_in_user(&kp->bg_u, &up->bg_u) ||
-	    assign_in_user(&kp->bg_v, &up->bg_v) ||
-	    assign_in_user(&kp->blend_input_perc_y,
-			   &up->blend_input_perc_y) ||
-	    assign_in_user(&kp->blend_input_perc_u,
-			   &up->blend_input_perc_u) ||
-	    assign_in_user(&kp->blend_input_perc_v,
-			   &up->blend_input_perc_v) ||
-	    assign_in_user(&kp->blend_overlay_perc_y,
-			   &up->blend_overlay_perc_y) ||
-	    assign_in_user(&kp->blend_overlay_perc_u,
-			   &up->blend_overlay_perc_u) ||
-	    assign_in_user(&kp->blend_overlay_perc_v,
-			   &up->blend_overlay_perc_v) ||
-	    assign_in_user(&kp->overlay_start_x, &up->overlay_start_x) ||
-	    assign_in_user(&kp->overlay_start_y, &up->overlay_start_y))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int put_atomisp_overlay32(struct atomisp_overlay __user *kp,
-				 struct atomisp_overlay32 __user *up)
-{
-	void *frame;
-
-	if (!access_ok(up, sizeof(struct atomisp_overlay32)) ||
-	    get_user(frame, &kp->frame) ||
-	    put_user(ptr_to_compat((void __user *)frame), &up->frame) ||
-	    assign_in_user(&up->bg_y, &kp->bg_y) ||
-	    assign_in_user(&up->bg_u, &kp->bg_u) ||
-	    assign_in_user(&up->bg_v, &kp->bg_v) ||
-	    assign_in_user(&up->blend_input_perc_y,
-			   &kp->blend_input_perc_y) ||
-	    assign_in_user(&up->blend_input_perc_u,
-			   &kp->blend_input_perc_u) ||
-	    assign_in_user(&up->blend_input_perc_v,
-			   &kp->blend_input_perc_v) ||
-	    assign_in_user(&up->blend_overlay_perc_y,
-			   &kp->blend_overlay_perc_y) ||
-	    assign_in_user(&up->blend_overlay_perc_u,
-			   &kp->blend_overlay_perc_u) ||
-	    assign_in_user(&up->blend_overlay_perc_v,
-			   &kp->blend_overlay_perc_v) ||
-	    assign_in_user(&up->overlay_start_x, &kp->overlay_start_x) ||
-	    assign_in_user(&up->overlay_start_y, &kp->overlay_start_y))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-get_atomisp_calibration_group32(struct atomisp_calibration_group __user *kp,
-				struct atomisp_calibration_group32 __user *up)
-{
-	compat_uptr_t calb_grp_values;
-
-	if (!access_ok(up, sizeof(struct atomisp_calibration_group32)) ||
-	    assign_in_user(&kp->size, &up->size) ||
-	    assign_in_user(&kp->type, &up->type) ||
-	    get_user(calb_grp_values, &up->calb_grp_values) ||
-	    put_user_force(compat_ptr(calb_grp_values), &kp->calb_grp_values))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-put_atomisp_calibration_group32(struct atomisp_calibration_group __user *kp,
-				struct atomisp_calibration_group32 __user *up)
-{
-	void *calb_grp_values;
-
-	if (!access_ok(up, sizeof(struct atomisp_calibration_group32)) ||
-	    assign_in_user(&up->size, &kp->size) ||
-	    assign_in_user(&up->type, &kp->type) ||
-	    get_user(calb_grp_values, &kp->calb_grp_values) ||
-	    put_user(ptr_to_compat((void __user *)calb_grp_values),
-		     &up->calb_grp_values))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_atomisp_acc_fw_load32(struct atomisp_acc_fw_load __user *kp,
-				     struct atomisp_acc_fw_load32 __user *up)
-{
-	compat_uptr_t data;
-
-	if (!access_ok(up, sizeof(struct atomisp_acc_fw_load32)) ||
-	    assign_in_user(&kp->size, &up->size) ||
-	    assign_in_user(&kp->fw_handle, &up->fw_handle) ||
-	    get_user_cast(data, &up->data) ||
-	    put_user(compat_ptr(data), &kp->data))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int put_atomisp_acc_fw_load32(struct atomisp_acc_fw_load __user *kp,
-				     struct atomisp_acc_fw_load32 __user *up)
-{
-	void __user *data;
-
-	if (!access_ok(up, sizeof(struct atomisp_acc_fw_load32)) ||
-	    assign_in_user(&up->size, &kp->size) ||
-	    assign_in_user(&up->fw_handle, &kp->fw_handle) ||
-	    get_user(data, &kp->data) ||
-	    put_user(ptr_to_compat(data), &up->data))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_atomisp_acc_fw_arg32(struct atomisp_acc_fw_arg __user *kp,
-				    struct atomisp_acc_fw_arg32 __user *up)
-{
-	compat_uptr_t value;
-
-	if (!access_ok(up, sizeof(struct atomisp_acc_fw_arg32)) ||
-	    assign_in_user(&kp->fw_handle, &up->fw_handle) ||
-	    assign_in_user(&kp->index, &up->index) ||
-	    get_user(value, &up->value) ||
-	    put_user(compat_ptr(value), &kp->value) ||
-	    assign_in_user(&kp->size, &up->size))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int put_atomisp_acc_fw_arg32(struct atomisp_acc_fw_arg __user *kp,
-				    struct atomisp_acc_fw_arg32 __user *up)
-{
-	void __user *value;
-
-	if (!access_ok(up, sizeof(struct atomisp_acc_fw_arg32)) ||
-	    assign_in_user(&up->fw_handle, &kp->fw_handle) ||
-	    assign_in_user(&up->index, &kp->index) ||
-	    get_user(value, &kp->value) ||
-	    put_user(ptr_to_compat(value), &up->value) ||
-	    assign_in_user(&up->size, &kp->size))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_v4l2_private_int_data32(struct v4l2_private_int_data __user *kp,
-				       struct v4l2_private_int_data32 __user *up)
-{
-	compat_uptr_t data;
-
-	if (!access_ok(up, sizeof(struct v4l2_private_int_data32)) ||
-	    assign_in_user(&kp->size, &up->size) ||
-	    get_user(data, &up->data) ||
-	    put_user(compat_ptr(data), &kp->data) ||
-	    assign_in_user(&kp->reserved[0], &up->reserved[0]) ||
-	    assign_in_user(&kp->reserved[1], &up->reserved[1]))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int put_v4l2_private_int_data32(struct v4l2_private_int_data __user *kp,
-				       struct v4l2_private_int_data32 __user *up)
-{
-	void __user *data;
-
-	if (!access_ok(up, sizeof(struct v4l2_private_int_data32)) ||
-	    assign_in_user(&up->size, &kp->size) ||
-	    get_user(data, &kp->data) ||
-	    put_user(ptr_to_compat(data), &up->data) ||
-	    assign_in_user(&up->reserved[0], &kp->reserved[0]) ||
-	    assign_in_user(&up->reserved[1], &kp->reserved[1]))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_atomisp_shading_table32(struct atomisp_shading_table __user *kp,
-				       struct atomisp_shading_table32 __user *up)
-{
-	unsigned int n = ATOMISP_NUM_SC_COLORS;
-
-	if (!access_ok(up, sizeof(struct atomisp_shading_table32)) ||
-	    assign_in_user(&kp->enable, &up->enable) ||
-	    assign_in_user(&kp->sensor_width, &up->sensor_width) ||
-	    assign_in_user(&kp->sensor_height, &up->sensor_height) ||
-	    assign_in_user(&kp->width, &up->width) ||
-	    assign_in_user(&kp->height, &up->height) ||
-	    assign_in_user(&kp->fraction_bits, &up->fraction_bits))
-		return -EFAULT;
-
-	while (n-- > 0) {
-		compat_uptr_t tmp;
-
-		if (get_user(tmp, &up->data[n]) ||
-		    put_user_force(compat_ptr(tmp), &kp->data[n]))
-			return -EFAULT;
-	}
-	return 0;
-}
-
-static int get_atomisp_acc_map32(struct atomisp_acc_map __user *kp,
-				 struct atomisp_acc_map32 __user *up)
-{
-	compat_uptr_t user_ptr;
-
-	if (!access_ok(up, sizeof(struct atomisp_acc_map32)) ||
-	    assign_in_user(&kp->flags, &up->flags) ||
-	    assign_in_user(&kp->length, &up->length) ||
-	    get_user(user_ptr, &up->user_ptr) ||
-	    put_user(compat_ptr(user_ptr), &kp->user_ptr) ||
-	    assign_in_user(&kp->css_ptr, &up->css_ptr) ||
-	    assign_in_user(&kp->reserved[0], &up->reserved[0]) ||
-	    assign_in_user(&kp->reserved[1], &up->reserved[1]) ||
-	    assign_in_user(&kp->reserved[2], &up->reserved[2]) ||
-	    assign_in_user(&kp->reserved[3], &up->reserved[3]))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int put_atomisp_acc_map32(struct atomisp_acc_map __user *kp,
-				 struct atomisp_acc_map32 __user *up)
-{
-	void __user *user_ptr;
-
-	if (!access_ok(up, sizeof(struct atomisp_acc_map32)) ||
-	    assign_in_user(&up->flags, &kp->flags) ||
-	    assign_in_user(&up->length, &kp->length) ||
-	    get_user(user_ptr, &kp->user_ptr) ||
-	    put_user(ptr_to_compat(user_ptr), &up->user_ptr) ||
-	    assign_in_user(&up->css_ptr, &kp->css_ptr) ||
-	    assign_in_user(&up->reserved[0], &kp->reserved[0]) ||
-	    assign_in_user(&up->reserved[1], &kp->reserved[1]) ||
-	    assign_in_user(&up->reserved[2], &kp->reserved[2]) ||
-	    assign_in_user(&up->reserved[3], &kp->reserved[3]))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-get_atomisp_acc_s_mapped_arg32(struct atomisp_acc_s_mapped_arg __user *kp,
-			       struct atomisp_acc_s_mapped_arg32 __user *up)
-{
-	if (!access_ok(up, sizeof(struct atomisp_acc_s_mapped_arg32)) ||
-	    assign_in_user(&kp->fw_handle, &up->fw_handle) ||
-	    assign_in_user(&kp->memory, &up->memory) ||
-	    assign_in_user(&kp->length, &up->length) ||
-	    assign_in_user(&kp->css_ptr, &up->css_ptr))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-put_atomisp_acc_s_mapped_arg32(struct atomisp_acc_s_mapped_arg __user *kp,
-			       struct atomisp_acc_s_mapped_arg32 __user *up)
-{
-	if (!access_ok(up, sizeof(struct atomisp_acc_s_mapped_arg32)) ||
-	    assign_in_user(&up->fw_handle, &kp->fw_handle) ||
-	    assign_in_user(&up->memory, &kp->memory) ||
-	    assign_in_user(&up->length, &kp->length) ||
-	    assign_in_user(&up->css_ptr, &kp->css_ptr))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int get_atomisp_parameters32(struct atomisp_parameters __user *kp,
-				    struct atomisp_parameters32 __user *up)
-{
-	int n = offsetof(struct atomisp_parameters32, output_frame) /
-		sizeof(compat_uptr_t);
-	compat_uptr_t stp, mtp, dcp, dscp;
-	struct {
-		struct atomisp_shading_table shading_table;
-		struct atomisp_morph_table morph_table;
-		struct atomisp_dis_coefficients dvs2_coefs;
-		struct atomisp_dvs_6axis_config dvs_6axis_config;
-	} __user *karg = (void __user *)(kp + 1);
-
-	if (!access_ok(up, sizeof(struct atomisp_parameters32)))
-		return -EFAULT;
-
-	while (n >= 0) {
-		compat_uptr_t __user *src = (compat_uptr_t __user *)up + n;
-		void * __user *dst = (void * __user *)kp + n;
-		compat_uptr_t tmp;
-
-		if (get_user_cast(tmp, src) || put_user_force(compat_ptr(tmp), dst))
-			return -EFAULT;
-		n--;
-	}
-
-	if (assign_in_user(&kp->isp_config_id, &up->isp_config_id) ||
-	    assign_in_user(&kp->per_frame_setting, &up->per_frame_setting) ||
-	    get_user(stp, &up->shading_table) ||
-	    get_user(mtp, &up->morph_table) ||
-	    get_user(dcp, &up->dvs2_coefs) ||
-	    get_user(dscp, &up->dvs_6axis_config))
-		return -EFAULT;
-
-	/* handle shading table */
-	if (stp && (get_atomisp_shading_table32(&karg->shading_table,
-						compat_ptr(stp)) ||
-		    put_user_force(&karg->shading_table, &kp->shading_table)))
-		return -EFAULT;
-
-	/* handle morph table */
-	if (mtp && (get_atomisp_morph_table32(&karg->morph_table,
-					      compat_ptr(mtp)) ||
-		    put_user_force(&karg->morph_table, &kp->morph_table)))
-		return -EFAULT;
-
-	/* handle dvs2 coefficients */
-	if (dcp && (get_atomisp_dis_coefficients32(&karg->dvs2_coefs,
-						   compat_ptr(dcp)) ||
-		    put_user_force(&karg->dvs2_coefs, &kp->dvs2_coefs)))
-		return -EFAULT;
-
-	/* handle dvs 6axis configuration */
-	if (dscp &&
-	    (get_atomisp_dvs_6axis_config32(&karg->dvs_6axis_config,
-					    compat_ptr(dscp)) ||
-	     put_user_force(&karg->dvs_6axis_config, &kp->dvs_6axis_config)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-get_atomisp_acc_fw_load_to_pipe32(struct atomisp_acc_fw_load_to_pipe __user *kp,
-				  struct atomisp_acc_fw_load_to_pipe32 __user *up)
-{
-	compat_uptr_t data;
-
-	if (!access_ok(up, sizeof(struct atomisp_acc_fw_load_to_pipe32)) ||
-	    assign_in_user(&kp->flags, &up->flags) ||
-	    assign_in_user(&kp->fw_handle, &up->fw_handle) ||
-	    assign_in_user(&kp->size, &up->size) ||
-	    assign_in_user(&kp->type, &up->type) ||
-	    assign_in_user(&kp->reserved[0], &up->reserved[0]) ||
-	    assign_in_user(&kp->reserved[1], &up->reserved[1]) ||
-	    assign_in_user(&kp->reserved[2], &up->reserved[2]) ||
-	    get_user(data, &up->data) ||
-	    put_user(compat_ptr(data), &kp->data))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-put_atomisp_acc_fw_load_to_pipe32(struct atomisp_acc_fw_load_to_pipe __user *kp,
-				  struct atomisp_acc_fw_load_to_pipe32 __user *up)
-{
-	void __user *data;
-
-	if (!access_ok(up, sizeof(struct atomisp_acc_fw_load_to_pipe32)) ||
-	    assign_in_user(&up->flags, &kp->flags) ||
-	    assign_in_user(&up->fw_handle, &kp->fw_handle) ||
-	    assign_in_user(&up->size, &kp->size) ||
-	    assign_in_user(&up->type, &kp->type) ||
-	    assign_in_user(&up->reserved[0], &kp->reserved[0]) ||
-	    assign_in_user(&up->reserved[1], &kp->reserved[1]) ||
-	    assign_in_user(&up->reserved[2], &kp->reserved[2]) ||
-	    get_user(data, &kp->data) ||
-	    put_user(ptr_to_compat(data), &up->data))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int
-get_atomisp_sensor_ae_bracketing_lut(struct atomisp_sensor_ae_bracketing_lut __user *kp,
-				     struct atomisp_sensor_ae_bracketing_lut32 __user *up)
-{
-	compat_uptr_t lut;
-
-	if (!access_ok(up, sizeof(struct atomisp_sensor_ae_bracketing_lut32)) ||
-	    assign_in_user(&kp->lut_size, &up->lut_size) ||
-	    get_user(lut, &up->lut) ||
-	    put_user_force(compat_ptr(lut), &kp->lut))
-		return -EFAULT;
-
-	return 0;
-}
-
-static long native_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	long ret = -ENOIOCTLCMD;
-
-	if (file->f_op->unlocked_ioctl)
-		ret = file->f_op->unlocked_ioctl(file, cmd, arg);
-
-	return ret;
-}
-
-static long atomisp_do_compat_ioctl(struct file *file,
-				    unsigned int cmd, unsigned long arg)
-{
-	union {
-		struct atomisp_histogram his;
-		struct atomisp_dis_statistics dis_s;
-		struct atomisp_dis_coefficients dis_c;
-		struct atomisp_dvs_6axis_config dvs_c;
-		struct atomisp_3a_statistics s3a_s;
-		struct atomisp_morph_table mor_t;
-		struct v4l2_framebuffer v4l2_buf;
-		struct atomisp_overlay overlay;
-		struct atomisp_calibration_group cal_grp;
-		struct atomisp_acc_fw_load acc_fw_load;
-		struct atomisp_acc_fw_arg acc_fw_arg;
-		struct v4l2_private_int_data v4l2_pri_data;
-		struct atomisp_shading_table shd_tbl;
-		struct atomisp_acc_map acc_map;
-		struct atomisp_acc_s_mapped_arg acc_map_arg;
-		struct atomisp_parameters param;
-		struct atomisp_acc_fw_load_to_pipe acc_fw_to_pipe;
-		struct atomisp_metadata md;
-		struct atomisp_metadata_with_type md_with_type;
-		struct atomisp_sensor_ae_bracketing_lut lut;
-	} __user *karg;
-	void __user *up = compat_ptr(arg);
-	long err = -ENOIOCTLCMD;
-
-	karg = compat_alloc_user_space(
-		sizeof(*karg) + (cmd == ATOMISP_IOC_S_PARAMETERS32 ?
-				 sizeof(struct atomisp_shading_table) +
-				 sizeof(struct atomisp_morph_table) +
-				 sizeof(struct atomisp_dis_coefficients) +
-				 sizeof(struct atomisp_dvs_6axis_config) : 0));
-	if (!karg)
-		return -ENOMEM;
-
-	/* First, convert the command. */
-	switch (cmd) {
-	case ATOMISP_IOC_G_HISTOGRAM32:
-		cmd = ATOMISP_IOC_G_HISTOGRAM;
-		break;
-	case ATOMISP_IOC_S_HISTOGRAM32:
-		cmd = ATOMISP_IOC_S_HISTOGRAM;
-		break;
-	case ATOMISP_IOC_G_DIS_STAT32:
-		cmd = ATOMISP_IOC_G_DIS_STAT;
-		break;
-	case ATOMISP_IOC_S_DIS_COEFS32:
-		cmd = ATOMISP_IOC_S_DIS_COEFS;
-		break;
-	case ATOMISP_IOC_S_DIS_VECTOR32:
-		cmd = ATOMISP_IOC_S_DIS_VECTOR;
-		break;
-	case ATOMISP_IOC_G_3A_STAT32:
-		cmd = ATOMISP_IOC_G_3A_STAT;
-		break;
-	case ATOMISP_IOC_G_ISP_GDC_TAB32:
-		cmd = ATOMISP_IOC_G_ISP_GDC_TAB;
-		break;
-	case ATOMISP_IOC_S_ISP_GDC_TAB32:
-		cmd = ATOMISP_IOC_S_ISP_GDC_TAB;
-		break;
-	case ATOMISP_IOC_S_ISP_FPN_TABLE32:
-		cmd = ATOMISP_IOC_S_ISP_FPN_TABLE;
-		break;
-	case ATOMISP_IOC_G_ISP_OVERLAY32:
-		cmd = ATOMISP_IOC_G_ISP_OVERLAY;
-		break;
-	case ATOMISP_IOC_S_ISP_OVERLAY32:
-		cmd = ATOMISP_IOC_S_ISP_OVERLAY;
-		break;
-	case ATOMISP_IOC_G_SENSOR_CALIBRATION_GROUP32:
-		cmd = ATOMISP_IOC_G_SENSOR_CALIBRATION_GROUP;
-		break;
-	case ATOMISP_IOC_ACC_LOAD32:
-		cmd = ATOMISP_IOC_ACC_LOAD;
-		break;
-	case ATOMISP_IOC_ACC_S_ARG32:
-		cmd = ATOMISP_IOC_ACC_S_ARG;
-		break;
-	case ATOMISP_IOC_G_SENSOR_PRIV_INT_DATA32:
-		cmd = ATOMISP_IOC_G_SENSOR_PRIV_INT_DATA;
-		break;
-	case ATOMISP_IOC_S_ISP_SHD_TAB32:
-		cmd = ATOMISP_IOC_S_ISP_SHD_TAB;
-		break;
-	case ATOMISP_IOC_ACC_DESTAB32:
-		cmd = ATOMISP_IOC_ACC_DESTAB;
-		break;
-	case ATOMISP_IOC_G_MOTOR_PRIV_INT_DATA32:
-		cmd = ATOMISP_IOC_G_MOTOR_PRIV_INT_DATA;
-		break;
-	case ATOMISP_IOC_ACC_MAP32:
-		cmd = ATOMISP_IOC_ACC_MAP;
-		break;
-	case ATOMISP_IOC_ACC_UNMAP32:
-		cmd = ATOMISP_IOC_ACC_UNMAP;
-		break;
-	case ATOMISP_IOC_ACC_S_MAPPED_ARG32:
-		cmd = ATOMISP_IOC_ACC_S_MAPPED_ARG;
-		break;
-	case ATOMISP_IOC_S_PARAMETERS32:
-		cmd = ATOMISP_IOC_S_PARAMETERS;
-		break;
-	case ATOMISP_IOC_ACC_LOAD_TO_PIPE32:
-		cmd = ATOMISP_IOC_ACC_LOAD_TO_PIPE;
-		break;
-	case ATOMISP_IOC_G_METADATA32:
-		cmd = ATOMISP_IOC_G_METADATA;
-		break;
-	case ATOMISP_IOC_G_METADATA_BY_TYPE32:
-		cmd = ATOMISP_IOC_G_METADATA_BY_TYPE;
-		break;
-	case ATOMISP_IOC_S_SENSOR_AE_BRACKETING_LUT32:
-		cmd = ATOMISP_IOC_S_SENSOR_AE_BRACKETING_LUT;
-		break;
-	}
-
-	switch (cmd) {
-	case ATOMISP_IOC_G_HISTOGRAM:
-	case ATOMISP_IOC_S_HISTOGRAM:
-		err = get_atomisp_histogram32(&karg->his, up);
-		break;
-	case ATOMISP_IOC_G_DIS_STAT:
-		err = get_atomisp_dis_statistics32(&karg->dis_s, up);
-		break;
-	case ATOMISP_IOC_S_DIS_COEFS:
-		err = get_atomisp_dis_coefficients32(&karg->dis_c, up);
-		break;
-	case ATOMISP_IOC_S_DIS_VECTOR:
-		err = get_atomisp_dvs_6axis_config32(&karg->dvs_c, up);
-		break;
-	case ATOMISP_IOC_G_3A_STAT:
-		err = get_atomisp_3a_statistics32(&karg->s3a_s, up);
-		break;
-	case ATOMISP_IOC_G_ISP_GDC_TAB:
-	case ATOMISP_IOC_S_ISP_GDC_TAB:
-		err = get_atomisp_morph_table32(&karg->mor_t, up);
-		break;
-	case ATOMISP_IOC_S_ISP_FPN_TABLE:
-		err = get_v4l2_framebuffer32(&karg->v4l2_buf, up);
-		break;
-	case ATOMISP_IOC_G_ISP_OVERLAY:
-	case ATOMISP_IOC_S_ISP_OVERLAY:
-		err = get_atomisp_overlay32(&karg->overlay, up);
-		break;
-	case ATOMISP_IOC_G_SENSOR_CALIBRATION_GROUP:
-		err = get_atomisp_calibration_group32(&karg->cal_grp, up);
-		break;
-	case ATOMISP_IOC_ACC_LOAD:
-		err = get_atomisp_acc_fw_load32(&karg->acc_fw_load, up);
-		break;
-	case ATOMISP_IOC_ACC_S_ARG:
-	case ATOMISP_IOC_ACC_DESTAB:
-		err = get_atomisp_acc_fw_arg32(&karg->acc_fw_arg, up);
-		break;
-	case ATOMISP_IOC_G_SENSOR_PRIV_INT_DATA:
-	case ATOMISP_IOC_G_MOTOR_PRIV_INT_DATA:
-		err = get_v4l2_private_int_data32(&karg->v4l2_pri_data, up);
-		break;
-	case ATOMISP_IOC_S_ISP_SHD_TAB:
-		err = get_atomisp_shading_table32(&karg->shd_tbl, up);
-		break;
-	case ATOMISP_IOC_ACC_MAP:
-	case ATOMISP_IOC_ACC_UNMAP:
-		err = get_atomisp_acc_map32(&karg->acc_map, up);
-		break;
-	case ATOMISP_IOC_ACC_S_MAPPED_ARG:
-		err = get_atomisp_acc_s_mapped_arg32(&karg->acc_map_arg, up);
-		break;
-	case ATOMISP_IOC_S_PARAMETERS:
-		err = get_atomisp_parameters32(&karg->param, up);
-		break;
-	case ATOMISP_IOC_ACC_LOAD_TO_PIPE:
-		err = get_atomisp_acc_fw_load_to_pipe32(&karg->acc_fw_to_pipe,
-							up);
-		break;
-	case ATOMISP_IOC_G_METADATA:
-		err = get_atomisp_metadata_stat32(&karg->md, up);
-		break;
-	case ATOMISP_IOC_G_METADATA_BY_TYPE:
-		err = get_atomisp_metadata_by_type_stat32(&karg->md_with_type,
-							  up);
-		break;
-	case ATOMISP_IOC_S_SENSOR_AE_BRACKETING_LUT:
-		err = get_atomisp_sensor_ae_bracketing_lut(&karg->lut, up);
-		break;
-	}
-	if (err)
-		return err;
-
-	err = native_ioctl(file, cmd, (unsigned long)karg);
-	if (err)
-		return err;
-
-	switch (cmd) {
-	case ATOMISP_IOC_G_HISTOGRAM:
-		err = put_atomisp_histogram32(&karg->his, up);
-		break;
-	case ATOMISP_IOC_G_DIS_STAT:
-		err = put_atomisp_dis_statistics32(&karg->dis_s, up);
-		break;
-	case ATOMISP_IOC_G_3A_STAT:
-		err = put_atomisp_3a_statistics32(&karg->s3a_s, up);
-		break;
-	case ATOMISP_IOC_G_ISP_GDC_TAB:
-		err = put_atomisp_morph_table32(&karg->mor_t, up);
-		break;
-	case ATOMISP_IOC_G_ISP_OVERLAY:
-		err = put_atomisp_overlay32(&karg->overlay, up);
-		break;
-	case ATOMISP_IOC_G_SENSOR_CALIBRATION_GROUP:
-		err = put_atomisp_calibration_group32(&karg->cal_grp, up);
-		break;
-	case ATOMISP_IOC_ACC_LOAD:
-		err = put_atomisp_acc_fw_load32(&karg->acc_fw_load, up);
-		break;
-	case ATOMISP_IOC_ACC_S_ARG:
-	case ATOMISP_IOC_ACC_DESTAB:
-		err = put_atomisp_acc_fw_arg32(&karg->acc_fw_arg, up);
-		break;
-	case ATOMISP_IOC_G_SENSOR_PRIV_INT_DATA:
-	case ATOMISP_IOC_G_MOTOR_PRIV_INT_DATA:
-		err = put_v4l2_private_int_data32(&karg->v4l2_pri_data, up);
-		break;
-	case ATOMISP_IOC_ACC_MAP:
-	case ATOMISP_IOC_ACC_UNMAP:
-		err = put_atomisp_acc_map32(&karg->acc_map, up);
-		break;
-	case ATOMISP_IOC_ACC_S_MAPPED_ARG:
-		err = put_atomisp_acc_s_mapped_arg32(&karg->acc_map_arg, up);
-		break;
-	case ATOMISP_IOC_ACC_LOAD_TO_PIPE:
-		err = put_atomisp_acc_fw_load_to_pipe32(&karg->acc_fw_to_pipe,
-							up);
-		break;
-	case ATOMISP_IOC_G_METADATA:
-		err = put_atomisp_metadata_stat32(&karg->md, up);
-		break;
-	case ATOMISP_IOC_G_METADATA_BY_TYPE:
-		err = put_atomisp_metadata_by_type_stat32(&karg->md_with_type,
-							  up);
-		break;
-	}
-
-	return err;
-}
-
-long atomisp_compat_ioctl32(struct file *file,
-			    unsigned int cmd, unsigned long arg)
-{
-	struct video_device *vdev = video_devdata(file);
-	struct atomisp_device *isp = video_get_drvdata(vdev);
-	long ret = -ENOIOCTLCMD;
-
-	if (!file->f_op->unlocked_ioctl)
-		return ret;
-
-	switch (cmd) {
-	case ATOMISP_IOC_G_XNR:
-	case ATOMISP_IOC_S_XNR:
-	case ATOMISP_IOC_G_NR:
-	case ATOMISP_IOC_S_NR:
-	case ATOMISP_IOC_G_TNR:
-	case ATOMISP_IOC_S_TNR:
-	case ATOMISP_IOC_G_BLACK_LEVEL_COMP:
-	case ATOMISP_IOC_S_BLACK_LEVEL_COMP:
-	case ATOMISP_IOC_G_EE:
-	case ATOMISP_IOC_S_EE:
-	case ATOMISP_IOC_S_DIS_VECTOR:
-	case ATOMISP_IOC_G_ISP_PARM:
-	case ATOMISP_IOC_S_ISP_PARM:
-	case ATOMISP_IOC_G_ISP_GAMMA:
-	case ATOMISP_IOC_S_ISP_GAMMA:
-	case ATOMISP_IOC_ISP_MAKERNOTE:
-	case ATOMISP_IOC_G_ISP_MACC:
-	case ATOMISP_IOC_S_ISP_MACC:
-	case ATOMISP_IOC_G_ISP_BAD_PIXEL_DETECTION:
-	case ATOMISP_IOC_S_ISP_BAD_PIXEL_DETECTION:
-	case ATOMISP_IOC_G_ISP_FALSE_COLOR_CORRECTION:
-	case ATOMISP_IOC_S_ISP_FALSE_COLOR_CORRECTION:
-	case ATOMISP_IOC_G_ISP_CTC:
-	case ATOMISP_IOC_S_ISP_CTC:
-	case ATOMISP_IOC_G_ISP_WHITE_BALANCE:
-	case ATOMISP_IOC_S_ISP_WHITE_BALANCE:
-	case ATOMISP_IOC_CAMERA_BRIDGE:
-	case ATOMISP_IOC_G_SENSOR_MODE_DATA:
-	case ATOMISP_IOC_S_EXPOSURE:
-	case ATOMISP_IOC_G_3A_CONFIG:
-	case ATOMISP_IOC_S_3A_CONFIG:
-	case ATOMISP_IOC_ACC_UNLOAD:
-	case ATOMISP_IOC_ACC_START:
-	case ATOMISP_IOC_ACC_WAIT:
-	case ATOMISP_IOC_ACC_ABORT:
-	case ATOMISP_IOC_G_ISP_GAMMA_CORRECTION:
-	case ATOMISP_IOC_S_ISP_GAMMA_CORRECTION:
-	case ATOMISP_IOC_S_CONT_CAPTURE_CONFIG:
-	case ATOMISP_IOC_G_DVS2_BQ_RESOLUTIONS:
-	case ATOMISP_IOC_EXT_ISP_CTRL:
-	case ATOMISP_IOC_EXP_ID_UNLOCK:
-	case ATOMISP_IOC_EXP_ID_CAPTURE:
-	case ATOMISP_IOC_S_ENABLE_DZ_CAPT_PIPE:
-	case ATOMISP_IOC_G_FORMATS_CONFIG:
-	case ATOMISP_IOC_S_FORMATS_CONFIG:
-	case ATOMISP_IOC_S_EXPOSURE_WINDOW:
-	case ATOMISP_IOC_S_ACC_STATE:
-	case ATOMISP_IOC_G_ACC_STATE:
-	case ATOMISP_IOC_INJECT_A_FAKE_EVENT:
-	case ATOMISP_IOC_G_SENSOR_AE_BRACKETING_INFO:
-	case ATOMISP_IOC_S_SENSOR_AE_BRACKETING_MODE:
-	case ATOMISP_IOC_G_SENSOR_AE_BRACKETING_MODE:
-	case ATOMISP_IOC_G_INVALID_FRAME_NUM:
-	case ATOMISP_IOC_S_ARRAY_RESOLUTION:
-	case ATOMISP_IOC_S_SENSOR_RUNMODE:
-	case ATOMISP_IOC_G_UPDATE_EXPOSURE:
-		ret = native_ioctl(file, cmd, arg);
-		break;
-
-	case ATOMISP_IOC_G_HISTOGRAM32:
-	case ATOMISP_IOC_S_HISTOGRAM32:
-	case ATOMISP_IOC_G_DIS_STAT32:
-	case ATOMISP_IOC_S_DIS_COEFS32:
-	case ATOMISP_IOC_S_DIS_VECTOR32:
-	case ATOMISP_IOC_G_3A_STAT32:
-	case ATOMISP_IOC_G_ISP_GDC_TAB32:
-	case ATOMISP_IOC_S_ISP_GDC_TAB32:
-	case ATOMISP_IOC_S_ISP_FPN_TABLE32:
-	case ATOMISP_IOC_G_ISP_OVERLAY32:
-	case ATOMISP_IOC_S_ISP_OVERLAY32:
-	case ATOMISP_IOC_G_SENSOR_CALIBRATION_GROUP32:
-	case ATOMISP_IOC_ACC_LOAD32:
-	case ATOMISP_IOC_ACC_S_ARG32:
-	case ATOMISP_IOC_G_SENSOR_PRIV_INT_DATA32:
-	case ATOMISP_IOC_S_ISP_SHD_TAB32:
-	case ATOMISP_IOC_ACC_DESTAB32:
-	case ATOMISP_IOC_G_MOTOR_PRIV_INT_DATA32:
-	case ATOMISP_IOC_ACC_MAP32:
-	case ATOMISP_IOC_ACC_UNMAP32:
-	case ATOMISP_IOC_ACC_S_MAPPED_ARG32:
-	case ATOMISP_IOC_S_PARAMETERS32:
-	case ATOMISP_IOC_ACC_LOAD_TO_PIPE32:
-	case ATOMISP_IOC_G_METADATA32:
-	case ATOMISP_IOC_G_METADATA_BY_TYPE32:
-	case ATOMISP_IOC_S_SENSOR_AE_BRACKETING_LUT32:
-		ret = atomisp_do_compat_ioctl(file, cmd, arg);
-		break;
-
-	default:
-		dev_warn(isp->dev,
-			 "%s: unknown ioctl '%c', dir=%d, #%d (0x%08x)\n",
-			 __func__, _IOC_TYPE(cmd), _IOC_DIR(cmd), _IOC_NR(cmd),
-			 cmd);
-		break;
-	}
-	return ret;
-}
-#endif /* CONFIG_COMPAT */
diff --git a/drivers/staging/media/atomisp/pci/atomisp_fops.c b/drivers/staging/media/atomisp/pci/atomisp_fops.c
index 022efd4151c03..f82bf082aa796 100644
--- a/drivers/staging/media/atomisp/pci/atomisp_fops.c
+++ b/drivers/staging/media/atomisp/pci/atomisp_fops.c
@@ -1283,7 +1283,8 @@ const struct v4l2_file_operations atomisp_fops = {
 	.unlocked_ioctl = video_ioctl2,
 #ifdef CONFIG_COMPAT
 	/*
-	 * There are problems with this code. Disable this for now.
+	 * this was removed because of bugs, the interface
+	 * needs to be made safe for compat tasks instead.
 	.compat_ioctl32 = atomisp_compat_ioctl32,
 	 */
 #endif
@@ -1297,10 +1298,7 @@ const struct v4l2_file_operations atomisp_file_fops = {
 	.mmap = atomisp_file_mmap,
 	.unlocked_ioctl = video_ioctl2,
 #ifdef CONFIG_COMPAT
-	/*
-	 * There are problems with this code. Disable this for now.
-	.compat_ioctl32 = atomisp_compat_ioctl32,
-	 */
+	/* .compat_ioctl32 = atomisp_compat_ioctl32, */
 #endif
 	.poll = atomisp_poll,
 };
-- 
GitLab


From 0a7790be182d32b9b332a37cb4206e24fe94b728 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Jun 2021 12:34:09 +0200
Subject: [PATCH 3029/3804] media: subdev: disallow ioctl for saa6588/davinci

The saa6588_ioctl() function expects to get called from other kernel
functions with a 'saa6588_command' pointer, but I found nothing stops it
from getting called from user space instead, which seems rather dangerous.

The same thing happens in the davinci vpbe driver with its VENC_GET_FLD
command.

As a quick fix, add a separate .command() callback pointer for this
driver and change the two callers over to that.  This change can easily
get backported to stable kernels if necessary, but since there are only
two drivers, we may want to eventually replace this with a set of more
specialized callbacks in the long run.

Fixes: c3fda7f835b0 ("V4L/DVB (10537): saa6588: convert to v4l2_subdev.")
Cc: stable@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/saa6588.c                   | 4 ++--
 drivers/media/pci/bt8xx/bttv-driver.c         | 6 +++---
 drivers/media/pci/saa7134/saa7134-video.c     | 6 +++---
 drivers/media/platform/davinci/vpbe_display.c | 2 +-
 drivers/media/platform/davinci/vpbe_venc.c    | 6 ++----
 include/media/v4l2-subdev.h                   | 4 ++++
 6 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/drivers/media/i2c/saa6588.c b/drivers/media/i2c/saa6588.c
index ecb491d5f2ab8..d1e0716bdfffd 100644
--- a/drivers/media/i2c/saa6588.c
+++ b/drivers/media/i2c/saa6588.c
@@ -380,7 +380,7 @@ static void saa6588_configure(struct saa6588 *s)
 
 /* ---------------------------------------------------------------------- */
 
-static long saa6588_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
+static long saa6588_command(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
 {
 	struct saa6588 *s = to_saa6588(sd);
 	struct saa6588_command *a = arg;
@@ -433,7 +433,7 @@ static int saa6588_s_tuner(struct v4l2_subdev *sd, const struct v4l2_tuner *vt)
 /* ----------------------------------------------------------------------- */
 
 static const struct v4l2_subdev_core_ops saa6588_core_ops = {
-	.ioctl = saa6588_ioctl,
+	.command = saa6588_command,
 };
 
 static const struct v4l2_subdev_tuner_ops saa6588_tuner_ops = {
diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c
index 1f62a9d8ea1d3..0e9df8b35ac66 100644
--- a/drivers/media/pci/bt8xx/bttv-driver.c
+++ b/drivers/media/pci/bt8xx/bttv-driver.c
@@ -3179,7 +3179,7 @@ static int radio_release(struct file *file)
 
 	btv->radio_user--;
 
-	bttv_call_all(btv, core, ioctl, SAA6588_CMD_CLOSE, &cmd);
+	bttv_call_all(btv, core, command, SAA6588_CMD_CLOSE, &cmd);
 
 	if (btv->radio_user == 0)
 		btv->has_radio_tuner = 0;
@@ -3260,7 +3260,7 @@ static ssize_t radio_read(struct file *file, char __user *data,
 	cmd.result = -ENODEV;
 	radio_enable(btv);
 
-	bttv_call_all(btv, core, ioctl, SAA6588_CMD_READ, &cmd);
+	bttv_call_all(btv, core, command, SAA6588_CMD_READ, &cmd);
 
 	return cmd.result;
 }
@@ -3281,7 +3281,7 @@ static __poll_t radio_poll(struct file *file, poll_table *wait)
 	cmd.instance = file;
 	cmd.event_list = wait;
 	cmd.poll_mask = res;
-	bttv_call_all(btv, core, ioctl, SAA6588_CMD_POLL, &cmd);
+	bttv_call_all(btv, core, command, SAA6588_CMD_POLL, &cmd);
 
 	return cmd.poll_mask;
 }
diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c
index 0f9d6b9edb90a..374c8e1087de1 100644
--- a/drivers/media/pci/saa7134/saa7134-video.c
+++ b/drivers/media/pci/saa7134/saa7134-video.c
@@ -1181,7 +1181,7 @@ static int video_release(struct file *file)
 
 	saa_call_all(dev, tuner, standby);
 	if (vdev->vfl_type == VFL_TYPE_RADIO)
-		saa_call_all(dev, core, ioctl, SAA6588_CMD_CLOSE, &cmd);
+		saa_call_all(dev, core, command, SAA6588_CMD_CLOSE, &cmd);
 	mutex_unlock(&dev->lock);
 
 	return 0;
@@ -1200,7 +1200,7 @@ static ssize_t radio_read(struct file *file, char __user *data,
 	cmd.result = -ENODEV;
 
 	mutex_lock(&dev->lock);
-	saa_call_all(dev, core, ioctl, SAA6588_CMD_READ, &cmd);
+	saa_call_all(dev, core, command, SAA6588_CMD_READ, &cmd);
 	mutex_unlock(&dev->lock);
 
 	return cmd.result;
@@ -1216,7 +1216,7 @@ static __poll_t radio_poll(struct file *file, poll_table *wait)
 	cmd.event_list = wait;
 	cmd.poll_mask = 0;
 	mutex_lock(&dev->lock);
-	saa_call_all(dev, core, ioctl, SAA6588_CMD_POLL, &cmd);
+	saa_call_all(dev, core, command, SAA6588_CMD_POLL, &cmd);
 	mutex_unlock(&dev->lock);
 
 	return rc | cmd.poll_mask;
diff --git a/drivers/media/platform/davinci/vpbe_display.c b/drivers/media/platform/davinci/vpbe_display.c
index d19bad997f30c..bf3c3e76b9213 100644
--- a/drivers/media/platform/davinci/vpbe_display.c
+++ b/drivers/media/platform/davinci/vpbe_display.c
@@ -47,7 +47,7 @@ static int venc_is_second_field(struct vpbe_display *disp_dev)
 
 	ret = v4l2_subdev_call(vpbe_dev->venc,
 			       core,
-			       ioctl,
+			       command,
 			       VENC_GET_FLD,
 			       &val);
 	if (ret < 0) {
diff --git a/drivers/media/platform/davinci/vpbe_venc.c b/drivers/media/platform/davinci/vpbe_venc.c
index 8caa084e57046..bde241c26d795 100644
--- a/drivers/media/platform/davinci/vpbe_venc.c
+++ b/drivers/media/platform/davinci/vpbe_venc.c
@@ -521,9 +521,7 @@ static int venc_s_routing(struct v4l2_subdev *sd, u32 input, u32 output,
 	return ret;
 }
 
-static long venc_ioctl(struct v4l2_subdev *sd,
-			unsigned int cmd,
-			void *arg)
+static long venc_command(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
 {
 	u32 val;
 
@@ -542,7 +540,7 @@ static long venc_ioctl(struct v4l2_subdev *sd,
 }
 
 static const struct v4l2_subdev_core_ops venc_core_ops = {
-	.ioctl      = venc_ioctl,
+	.command      = venc_command,
 };
 
 static const struct v4l2_subdev_video_ops venc_video_ops = {
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 89115ba4c0f2f..95f8bfd632735 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -162,6 +162,9 @@ struct v4l2_subdev_io_pin_config {
  * @s_gpio: set GPIO pins. Very simple right now, might need to be extended with
  *	a direction argument if needed.
  *
+ * @command: called by in-kernel drivers in order to call functions internal
+ *	   to subdev drivers driver that have a separate callback.
+ *
  * @ioctl: called at the end of ioctl() syscall handler at the V4L2 core.
  *	   used to provide support for private ioctls used on the driver.
  *
@@ -193,6 +196,7 @@ struct v4l2_subdev_core_ops {
 	int (*load_fw)(struct v4l2_subdev *sd);
 	int (*reset)(struct v4l2_subdev *sd, u32 val);
 	int (*s_gpio)(struct v4l2_subdev *sd, u32 val);
+	long (*command)(struct v4l2_subdev *sd, unsigned int cmd, void *arg);
 	long (*ioctl)(struct v4l2_subdev *sd, unsigned int cmd, void *arg);
 #ifdef CONFIG_COMPAT
 	long (*compat_ioctl32)(struct v4l2_subdev *sd, unsigned int cmd,
-- 
GitLab


From ca816468bc3712c8ae120a94c19983851a4c6c4a Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 4 May 2021 14:26:48 +0200
Subject: [PATCH 3030/3804] media: coda: set debugfs blobs to read only

Those blobs can only be read. So, don't confuse users with 'writable'
flags.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/coda/coda-common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c
index 2017de85713eb..0e312b0842d7f 100644
--- a/drivers/media/platform/coda/coda-common.c
+++ b/drivers/media/platform/coda/coda-common.c
@@ -1935,7 +1935,7 @@ int coda_alloc_aux_buf(struct coda_dev *dev, struct coda_aux_buf *buf,
 	if (name && parent) {
 		buf->blob.data = buf->vaddr;
 		buf->blob.size = size;
-		buf->dentry = debugfs_create_blob(name, 0644, parent,
+		buf->dentry = debugfs_create_blob(name, 0444, parent,
 						  &buf->blob);
 	}
 
@@ -3233,7 +3233,7 @@ static int coda_probe(struct platform_device *pdev)
 		memset(dev->iram.vaddr, 0, dev->iram.size);
 		dev->iram.blob.data = dev->iram.vaddr;
 		dev->iram.blob.size = dev->iram.size;
-		dev->iram.dentry = debugfs_create_blob("iram", 0644,
+		dev->iram.dentry = debugfs_create_blob("iram", 0444,
 						       dev->debugfs_root,
 						       &dev->iram.blob);
 	}
-- 
GitLab


From e198be37e52551bb863d07d2edc535d0932a3c4f Mon Sep 17 00:00:00 2001
From: Steve Longerbeam <slongerbeam@gmail.com>
Date: Mon, 17 May 2021 16:29:23 +0200
Subject: [PATCH 3031/3804] media: imx-csi: Skip first few frames from a BT.656
 source

Some BT.656 sensors (e.g. ADV718x) transmit frames with unstable BT.656
sync codes after initial power on. This confuses the imx CSI,resulting
in vertical and/or horizontal sync issues. Skip the first 20 frames
to avoid the unstable sync codes.

[fabio: fixed checkpatch warning and increased the frame skipping to 20]

Signed-off-by: Steve Longerbeam <slongerbeam@gmail.com>
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Reviewed-by: Tim Harvey <tharvey@gateworks.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/imx/imx-media-csi.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c
index d2f1d40b2d5a2..bb1305c9daaf5 100644
--- a/drivers/staging/media/imx/imx-media-csi.c
+++ b/drivers/staging/media/imx/imx-media-csi.c
@@ -750,9 +750,10 @@ static int csi_setup(struct csi_priv *priv)
 
 static int csi_start(struct csi_priv *priv)
 {
-	struct v4l2_fract *output_fi;
+	struct v4l2_fract *input_fi, *output_fi;
 	int ret;
 
+	input_fi = &priv->frame_interval[CSI_SINK_PAD];
 	output_fi = &priv->frame_interval[priv->active_output_pad];
 
 	/* start upstream */
@@ -761,6 +762,17 @@ static int csi_start(struct csi_priv *priv)
 	if (ret)
 		return ret;
 
+	/* Skip first few frames from a BT.656 source */
+	if (priv->upstream_ep.bus_type == V4L2_MBUS_BT656) {
+		u32 delay_usec, bad_frames = 20;
+
+		delay_usec = DIV_ROUND_UP_ULL((u64)USEC_PER_SEC *
+			input_fi->numerator * bad_frames,
+			input_fi->denominator);
+
+		usleep_range(delay_usec, delay_usec + 1000);
+	}
+
 	if (priv->dest == IPU_CSI_DEST_IDMAC) {
 		ret = csi_idmac_start(priv);
 		if (ret)
-- 
GitLab


From 2b889a4afcacef4888ac8203a60e68004816e1fd Mon Sep 17 00:00:00 2001
From: Evgeny Novikov <novikov@ispras.ru>
Date: Wed, 26 May 2021 16:35:06 +0200
Subject: [PATCH 3032/3804] media: marvell-ccic: set error code in probe

When i2c_new_client_device() fails, cafe_pci_probe() cleans up all
resources and returns 0. The patch sets the error code on the
corresponding path.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Evgeny Novikov <novikov@ispras.ru>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/marvell-ccic/cafe-driver.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/media/platform/marvell-ccic/cafe-driver.c b/drivers/media/platform/marvell-ccic/cafe-driver.c
index baac86f3d1530..9aa374fa8b364 100644
--- a/drivers/media/platform/marvell-ccic/cafe-driver.c
+++ b/drivers/media/platform/marvell-ccic/cafe-driver.c
@@ -486,6 +486,7 @@ static int cafe_pci_probe(struct pci_dev *pdev,
 	struct cafe_camera *cam;
 	struct mcam_camera *mcam;
 	struct v4l2_async_subdev *asd;
+	struct i2c_client *i2c_dev;
 
 	/*
 	 * Start putting together one of our big camera structures.
@@ -561,11 +562,16 @@ static int cafe_pci_probe(struct pci_dev *pdev,
 	clkdev_create(mcam->mclk, "xclk", "%d-%04x",
 		i2c_adapter_id(cam->i2c_adapter), ov7670_info.addr);
 
-	if (!IS_ERR(i2c_new_client_device(cam->i2c_adapter, &ov7670_info))) {
-		cam->registered = 1;
-		return 0;
+	i2c_dev = i2c_new_client_device(cam->i2c_adapter, &ov7670_info);
+	if (IS_ERR(i2c_dev)) {
+		ret = PTR_ERR(i2c_dev);
+		goto out_mccic_shutdown;
 	}
 
+	cam->registered = 1;
+	return 0;
+
+out_mccic_shutdown:
 	mccic_shutdown(mcam);
 out_smbus_shutdown:
 	cafe_smbus_shutdown(cam);
-- 
GitLab


From 5d11e6aad1811ea293ee2996cec9124f7fccb661 Mon Sep 17 00:00:00 2001
From: Dillon Min <dillon.minfei@gmail.com>
Date: Wed, 26 May 2021 17:18:32 +0200
Subject: [PATCH 3033/3804] media: s5p-g2d: Fix a memory leak on
 ctx->fh.m2m_ctx

The m2m_ctx resources was allocated by v4l2_m2m_ctx_init() in g2d_open()
should be freed from g2d_release() when it's not used.

Fix it

Fixes: 918847341af0 ("[media] v4l: add G2D driver for s5p device family")
Signed-off-by: Dillon Min <dillon.minfei@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/s5p-g2d/g2d.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/media/platform/s5p-g2d/g2d.c b/drivers/media/platform/s5p-g2d/g2d.c
index 15bcb7f6e113c..1cb5eaabf340b 100644
--- a/drivers/media/platform/s5p-g2d/g2d.c
+++ b/drivers/media/platform/s5p-g2d/g2d.c
@@ -276,6 +276,9 @@ static int g2d_release(struct file *file)
 	struct g2d_dev *dev = video_drvdata(file);
 	struct g2d_ctx *ctx = fh2ctx(file->private_data);
 
+	mutex_lock(&dev->mutex);
+	v4l2_m2m_ctx_release(ctx->fh.m2m_ctx);
+	mutex_unlock(&dev->mutex);
 	v4l2_ctrl_handler_free(&ctx->ctrl_handler);
 	v4l2_fh_del(&ctx->fh);
 	v4l2_fh_exit(&ctx->fh);
-- 
GitLab


From 584b2373eef9c487620153a758072e295ab28cc1 Mon Sep 17 00:00:00 2001
From: Piyush Thange <pthange19@gmail.com>
Date: Wed, 26 May 2021 17:26:19 +0200
Subject: [PATCH 3034/3804] media: usb: cpia2: Fixed Coding Style issues

Fixed all the Coding style issues generated by checkpatch.pl.
The changes made considering the --strict option.

Signed-off-by: Piyush Thange <pthange19@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/usb/cpia2/cpia2_v4l.c | 149 ++++++++++++++--------------
 1 file changed, 74 insertions(+), 75 deletions(-)

diff --git a/drivers/media/usb/cpia2/cpia2_v4l.c b/drivers/media/usb/cpia2/cpia2_v4l.c
index 69d5c628a7978..926ecfc9b64ab 100644
--- a/drivers/media/usb/cpia2/cpia2_v4l.c
+++ b/drivers/media/usb/cpia2/cpia2_v4l.c
@@ -140,10 +140,10 @@ static ssize_t cpia2_v4l_read(struct file *file, char __user *buf, size_t count,
 			      loff_t *off)
 {
 	struct camera_data *cam = video_drvdata(file);
-	int noblock = file->f_flags&O_NONBLOCK;
+	int noblock = file->f_flags & O_NONBLOCK;
 	ssize_t ret;
 
-	if(!cam)
+	if (!cam)
 		return -EINVAL;
 
 	if (mutex_lock_interruptible(&cam->v4l2_lock))
@@ -153,7 +153,6 @@ static ssize_t cpia2_v4l_read(struct file *file, char __user *buf, size_t count,
 	return ret;
 }
 
-
 /******************************************************************************
  *
  *  cpia2_v4l_poll
@@ -170,7 +169,6 @@ static __poll_t cpia2_v4l_poll(struct file *filp, struct poll_table_struct *wait
 	return res;
 }
 
-
 static int sync(struct camera_data *cam, int frame_nr)
 {
 	struct framebuf *frame = &cam->buffers[frame_nr];
@@ -247,8 +245,8 @@ static int cpia2_querycap(struct file *file, void *fh, struct v4l2_capability *v
 		break;
 	}
 
-	if (usb_make_path(cam->dev, vc->bus_info, sizeof(vc->bus_info)) <0)
-		memset(vc->bus_info,0, sizeof(vc->bus_info));
+	if (usb_make_path(cam->dev, vc->bus_info, sizeof(vc->bus_info)) < 0)
+		memset(vc->bus_info, 0, sizeof(vc->bus_info));
 	return 0;
 }
 
@@ -289,7 +287,7 @@ static int cpia2_s_input(struct file *file, void *fh, unsigned int i)
  *****************************************************************************/
 
 static int cpia2_enum_fmt_vid_cap(struct file *file, void *fh,
-					    struct v4l2_fmtdesc *f)
+				  struct v4l2_fmtdesc *f)
 {
 	if (f->index > 1)
 		return -EINVAL;
@@ -310,13 +308,13 @@ static int cpia2_enum_fmt_vid_cap(struct file *file, void *fh,
  *****************************************************************************/
 
 static int cpia2_try_fmt_vid_cap(struct file *file, void *fh,
-					  struct v4l2_format *f)
+				 struct v4l2_format *f)
 {
 	struct camera_data *cam = video_drvdata(file);
 
 	if (f->fmt.pix.pixelformat != V4L2_PIX_FMT_MJPEG &&
 	    f->fmt.pix.pixelformat != V4L2_PIX_FMT_JPEG)
-	       return -EINVAL;
+		return -EINVAL;
 
 	f->fmt.pix.field = V4L2_FIELD_NONE;
 	f->fmt.pix.bytesperline = 0;
@@ -371,19 +369,20 @@ static int cpia2_try_fmt_vid_cap(struct file *file, void *fh,
  *****************************************************************************/
 
 static int cpia2_s_fmt_vid_cap(struct file *file, void *_fh,
-					struct v4l2_format *f)
+			       struct v4l2_format *f)
 {
 	struct camera_data *cam = video_drvdata(file);
 	int err, frame;
 
 	err = cpia2_try_fmt_vid_cap(file, _fh, f);
-	if(err != 0)
+	if (err != 0)
 		return err;
 
 	cam->pixelformat = f->fmt.pix.pixelformat;
 
 	/* NOTE: This should be set to 1 for MJPEG, but some apps don't handle
-	 * the missing Huffman table properly. */
+	 * the missing Huffman table properly.
+	 */
 	cam->params.compression.inhibit_htables = 0;
 		/*f->fmt.pix.pixelformat == V4L2_PIX_FMT_MJPEG;*/
 
@@ -421,7 +420,7 @@ static int cpia2_s_fmt_vid_cap(struct file *file, void *_fh,
  *****************************************************************************/
 
 static int cpia2_g_fmt_vid_cap(struct file *file, void *fh,
-					struct v4l2_format *f)
+			       struct v4l2_format *f)
 {
 	struct camera_data *cam = video_drvdata(file);
 
@@ -547,9 +546,8 @@ static const struct {
 };
 
 static int cpia2_enum_framesizes(struct file *file, void *fh,
-					 struct v4l2_frmsizeenum *fsize)
+				 struct v4l2_frmsizeenum *fsize)
 {
-
 	if (fsize->pixel_format != V4L2_PIX_FMT_MJPEG &&
 	    fsize->pixel_format != V4L2_PIX_FMT_JPEG)
 		return -EINVAL;
@@ -563,7 +561,7 @@ static int cpia2_enum_framesizes(struct file *file, void *fh,
 }
 
 static int cpia2_enum_frameintervals(struct file *file, void *fh,
-					   struct v4l2_frmivalenum *fival)
+				     struct v4l2_frmivalenum *fival)
 {
 	struct camera_data *cam = video_drvdata(file);
 	int max = ARRAY_SIZE(framerate_controls) - 1;
@@ -665,19 +663,18 @@ static int cpia2_g_jpegcomp(struct file *file, void *fh, struct v4l2_jpegcompres
 	parms->quality = 80; // TODO: Can this be made meaningful?
 
 	parms->jpeg_markers = V4L2_JPEG_MARKER_DQT | V4L2_JPEG_MARKER_DRI;
-	if(!cam->params.compression.inhibit_htables) {
+	if (!cam->params.compression.inhibit_htables)
 		parms->jpeg_markers |= V4L2_JPEG_MARKER_DHT;
-	}
 
 	parms->APPn = cam->APPn;
 	parms->APP_len = cam->APP_len;
-	if(cam->APP_len > 0) {
+	if (cam->APP_len > 0) {
 		memcpy(parms->APP_data, cam->APP_data, cam->APP_len);
 		parms->jpeg_markers |= V4L2_JPEG_MARKER_APP;
 	}
 
 	parms->COM_len = cam->COM_len;
-	if(cam->COM_len > 0) {
+	if (cam->COM_len > 0) {
 		memcpy(parms->COM_data, cam->COM_data, cam->COM_len);
 		parms->jpeg_markers |= JPEG_MARKER_COM;
 	}
@@ -698,7 +695,7 @@ static int cpia2_g_jpegcomp(struct file *file, void *fh, struct v4l2_jpegcompres
  *****************************************************************************/
 
 static int cpia2_s_jpegcomp(struct file *file, void *fh,
-		const struct v4l2_jpegcompression *parms)
+			    const struct v4l2_jpegcompression *parms)
 {
 	struct camera_data *cam = video_drvdata(file);
 
@@ -708,9 +705,9 @@ static int cpia2_s_jpegcomp(struct file *file, void *fh,
 	cam->params.compression.inhibit_htables =
 		!(parms->jpeg_markers & V4L2_JPEG_MARKER_DHT);
 
-	if(parms->APP_len != 0) {
-		if(parms->APP_len > 0 &&
-		   parms->APP_len <= sizeof(cam->APP_data) &&
+	if (parms->APP_len != 0) {
+		if (parms->APP_len > 0 &&
+		    parms->APP_len <= sizeof(cam->APP_data) &&
 		   parms->APPn >= 0 && parms->APPn <= 15) {
 			cam->APPn = parms->APPn;
 			cam->APP_len = parms->APP_len;
@@ -724,9 +721,9 @@ static int cpia2_s_jpegcomp(struct file *file, void *fh,
 		cam->APP_len = 0;
 	}
 
-	if(parms->COM_len != 0) {
-		if(parms->COM_len > 0 &&
-		   parms->COM_len <= sizeof(cam->COM_data)) {
+	if (parms->COM_len != 0) {
+		if (parms->COM_len > 0 &&
+		    parms->COM_len <= sizeof(cam->COM_data)) {
 			cam->COM_len = parms->COM_len;
 			memcpy(cam->COM_data, parms->COM_data, parms->COM_len);
 		} else {
@@ -751,8 +748,8 @@ static int cpia2_reqbufs(struct file *file, void *fh, struct v4l2_requestbuffers
 {
 	struct camera_data *cam = video_drvdata(file);
 
-	if(req->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
-	   req->memory != V4L2_MEMORY_MMAP)
+	if (req->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+	    req->memory != V4L2_MEMORY_MMAP)
 		return -EINVAL;
 
 	DBG("REQBUFS requested:%d returning:%d\n", req->count, cam->num_frames);
@@ -774,8 +771,8 @@ static int cpia2_querybuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 {
 	struct camera_data *cam = video_drvdata(file);
 
-	if(buf->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
-	   buf->index >= cam->num_frames)
+	if (buf->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+	    buf->index >= cam->num_frames)
 		return -EINVAL;
 
 	buf->m.offset = cam->buffers[buf->index].data - cam->frame_buffer;
@@ -783,7 +780,7 @@ static int cpia2_querybuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 
 	buf->memory = V4L2_MEMORY_MMAP;
 
-	if(cam->mmapped)
+	if (cam->mmapped)
 		buf->flags = V4L2_BUF_FLAG_MAPPED;
 	else
 		buf->flags = 0;
@@ -806,8 +803,8 @@ static int cpia2_querybuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	}
 
 	DBG("QUERYBUF index:%d offset:%d flags:%d seq:%d bytesused:%d\n",
-	     buf->index, buf->m.offset, buf->flags, buf->sequence,
-	     buf->bytesused);
+	    buf->index, buf->m.offset, buf->flags, buf->sequence,
+	    buf->bytesused);
 
 	return 0;
 }
@@ -824,14 +821,14 @@ static int cpia2_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 {
 	struct camera_data *cam = video_drvdata(file);
 
-	if(buf->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
-	   buf->memory != V4L2_MEMORY_MMAP ||
+	if (buf->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+	    buf->memory != V4L2_MEMORY_MMAP ||
 	   buf->index >= cam->num_frames)
 		return -EINVAL;
 
 	DBG("QBUF #%d\n", buf->index);
 
-	if(cam->buffers[buf->index].status == FRAME_READY)
+	if (cam->buffers[buf->index].status == FRAME_READY)
 		cam->buffers[buf->index].status = FRAME_EMPTY;
 
 	return 0;
@@ -849,9 +846,10 @@ static int find_earliest_filled_buffer(struct camera_data *cam)
 {
 	int i;
 	int found = -1;
-	for (i=0; i<cam->num_frames; i++) {
-		if(cam->buffers[i].status == FRAME_READY) {
-			if(found < 0) {
+
+	for (i = 0; i < cam->num_frames; i++) {
+		if (cam->buffers[i].status == FRAME_READY) {
+			if (found < 0) {
 				found = i;
 			} else {
 				/* find which buffer is earlier */
@@ -876,22 +874,23 @@ static int cpia2_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 	struct camera_data *cam = video_drvdata(file);
 	int frame;
 
-	if(buf->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
-	   buf->memory != V4L2_MEMORY_MMAP)
+	if (buf->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ||
+	    buf->memory != V4L2_MEMORY_MMAP)
 		return -EINVAL;
 
 	frame = find_earliest_filled_buffer(cam);
 
-	if(frame < 0 && file->f_flags&O_NONBLOCK)
+	if (frame < 0 && file->f_flags & O_NONBLOCK)
 		return -EAGAIN;
 
-	if(frame < 0) {
+	if (frame < 0) {
 		/* Wait for a frame to become available */
-		struct framebuf *cb=cam->curbuff;
+		struct framebuf *cb = cam->curbuff;
+
 		mutex_unlock(&cam->v4l2_lock);
 		wait_event_interruptible(cam->wq_stream,
 					 !video_is_registered(&cam->vdev) ||
-					 (cb=cam->curbuff)->status == FRAME_READY);
+					 (cb = cam->curbuff)->status == FRAME_READY);
 		mutex_lock(&cam->v4l2_lock);
 		if (signal_pending(current))
 			return -ERESTARTSYS;
@@ -900,7 +899,6 @@ static int cpia2_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf)
 		frame = cb->num;
 	}
 
-
 	buf->index = frame;
 	buf->bytesused = cam->buffers[buf->index].length;
 	buf->flags = V4L2_BUF_FLAG_MAPPED | V4L2_BUF_FLAG_DONE
@@ -931,7 +929,7 @@ static int cpia2_streamon(struct file *file, void *fh, enum v4l2_buf_type type)
 
 	if (!cam->streaming) {
 		ret = cpia2_usb_stream_start(cam,
-				cam->params.camera_state.stream_mode);
+					     cam->params.camera_state.stream_mode);
 		if (!ret)
 			v4l2_ctrl_grab(cam->usb_alt, true);
 	}
@@ -969,7 +967,7 @@ static int cpia2_mmap(struct file *file, struct vm_area_struct *area)
 		return -ERESTARTSYS;
 	retval = cpia2_remap_buffer(cam, area);
 
-	if(!retval)
+	if (!retval)
 		cam->stream_fh = file->private_data;
 	mutex_unlock(&cam->v4l2_lock);
 	return retval;
@@ -1080,39 +1078,42 @@ int cpia2_register_camera(struct camera_data *cam)
 
 	v4l2_ctrl_handler_init(hdl, 12);
 	v4l2_ctrl_new_std(hdl, &cpia2_ctrl_ops,
-			V4L2_CID_BRIGHTNESS,
-			cam->params.pnp_id.device_type == DEVICE_STV_672 ? 1 : 0,
-			255, 1, DEFAULT_BRIGHTNESS);
+			  V4L2_CID_BRIGHTNESS,
+			  cam->params.pnp_id.device_type == DEVICE_STV_672 ? 1 : 0,
+			  255, 1, DEFAULT_BRIGHTNESS);
 	v4l2_ctrl_new_std(hdl, &cpia2_ctrl_ops,
-			V4L2_CID_CONTRAST, 0, 255, 1, DEFAULT_CONTRAST);
+			  V4L2_CID_CONTRAST, 0, 255, 1, DEFAULT_CONTRAST);
 	v4l2_ctrl_new_std(hdl, &cpia2_ctrl_ops,
-			V4L2_CID_SATURATION, 0, 255, 1, DEFAULT_SATURATION);
+			  V4L2_CID_SATURATION, 0, 255, 1, DEFAULT_SATURATION);
 	v4l2_ctrl_new_std(hdl, &cpia2_ctrl_ops,
-			V4L2_CID_HFLIP, 0, 1, 1, 0);
+			  V4L2_CID_HFLIP, 0, 1, 1, 0);
 	v4l2_ctrl_new_std(hdl, &cpia2_ctrl_ops,
-			V4L2_CID_JPEG_ACTIVE_MARKER, 0,
-			V4L2_JPEG_ACTIVE_MARKER_DHT, 0,
-			V4L2_JPEG_ACTIVE_MARKER_DHT);
+			  V4L2_CID_JPEG_ACTIVE_MARKER, 0,
+			  V4L2_JPEG_ACTIVE_MARKER_DHT, 0,
+			  V4L2_JPEG_ACTIVE_MARKER_DHT);
 	v4l2_ctrl_new_std(hdl, &cpia2_ctrl_ops,
-			V4L2_CID_JPEG_COMPRESSION_QUALITY, 1,
-			100, 1, 100);
+			  V4L2_CID_JPEG_COMPRESSION_QUALITY, 1,
+			  100, 1, 100);
 	cpia2_usb_alt.def = alternate;
 	cam->usb_alt = v4l2_ctrl_new_custom(hdl, &cpia2_usb_alt, NULL);
 	/* VP5 Only */
 	if (cam->params.pnp_id.device_type != DEVICE_STV_672)
 		v4l2_ctrl_new_std(hdl, &cpia2_ctrl_ops,
-			V4L2_CID_VFLIP, 0, 1, 1, 0);
+				  V4L2_CID_VFLIP, 0, 1, 1, 0);
 	/* Flicker control only valid for 672 */
 	if (cam->params.pnp_id.device_type == DEVICE_STV_672)
 		v4l2_ctrl_new_std_menu(hdl, &cpia2_ctrl_ops,
-			V4L2_CID_POWER_LINE_FREQUENCY,
-			V4L2_CID_POWER_LINE_FREQUENCY_60HZ, 0, 0);
+				       V4L2_CID_POWER_LINE_FREQUENCY,
+				       V4L2_CID_POWER_LINE_FREQUENCY_60HZ,
+				       0, 0);
 	/* Light control only valid for the QX5 Microscope */
 	if (cam->params.pnp_id.product == 0x151) {
 		cam->top_light = v4l2_ctrl_new_std(hdl, &cpia2_ctrl_ops,
-				V4L2_CID_ILLUMINATORS_1, 0, 1, 1, 0);
+						   V4L2_CID_ILLUMINATORS_1,
+						   0, 1, 1, 0);
 		cam->bottom_light = v4l2_ctrl_new_std(hdl, &cpia2_ctrl_ops,
-				V4L2_CID_ILLUMINATORS_2, 0, 1, 1, 0);
+						      V4L2_CID_ILLUMINATORS_2,
+						      0, 1, 1, 0);
 		v4l2_ctrl_cluster(2, &cam->top_light);
 	}
 
@@ -1159,28 +1160,28 @@ void cpia2_unregister_camera(struct camera_data *cam)
  *****************************************************************************/
 static void __init check_parameters(void)
 {
-	if(buffer_size < PAGE_SIZE) {
+	if (buffer_size < PAGE_SIZE) {
 		buffer_size = PAGE_SIZE;
 		LOG("buffer_size too small, setting to %d\n", buffer_size);
-	} else if(buffer_size > 1024*1024) {
+	} else if (buffer_size > 1024 * 1024) {
 		/* arbitrary upper limiit */
-		buffer_size = 1024*1024;
+		buffer_size = 1024 * 1024;
 		LOG("buffer_size ridiculously large, setting to %d\n",
 		    buffer_size);
 	} else {
-		buffer_size += PAGE_SIZE-1;
-		buffer_size &= ~(PAGE_SIZE-1);
+		buffer_size += PAGE_SIZE - 1;
+		buffer_size &= ~(PAGE_SIZE - 1);
 	}
 
-	if(num_buffers < 1) {
+	if (num_buffers < 1) {
 		num_buffers = 1;
 		LOG("num_buffers too small, setting to %d\n", num_buffers);
-	} else if(num_buffers > VIDEO_MAX_FRAME) {
+	} else if (num_buffers > VIDEO_MAX_FRAME) {
 		num_buffers = VIDEO_MAX_FRAME;
 		LOG("num_buffers too large, setting to %d\n", num_buffers);
 	}
 
-	if(alternate < USBIF_ISO_1 || alternate > USBIF_ISO_6) {
+	if (alternate < USBIF_ISO_1 || alternate > USBIF_ISO_6) {
 		alternate = DEFAULT_ALT;
 		LOG("alternate specified is invalid, using %d\n", alternate);
 	}
@@ -1197,7 +1198,6 @@ static void __init check_parameters(void)
 
 /************   Module Stuff ***************/
 
-
 /******************************************************************************
  *
  * cpia2_init/module_init
@@ -1211,7 +1211,6 @@ static int __init cpia2_init(void)
 	return cpia2_usb_init();
 }
 
-
 /******************************************************************************
  *
  * cpia2_exit/module_exit
-- 
GitLab


From 40d62da2a1278ea1e58ed8e304142cf09de41232 Mon Sep 17 00:00:00 2001
From: lijian <lijian@yulong.com>
Date: Thu, 27 May 2021 04:46:43 +0200
Subject: [PATCH 3035/3804] media: v4l2-event: Modified variable type
 'unsigned' to 'unsigned int'

Prefer 'unsigned int' to bare use of 'unsigned'.
So modified variable type 'unsigned' to 'unsigned int' in v4l2-event.c.

Signed-off-by: lijian <lijian@yulong.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/v4l2-core/v4l2-event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-event.c b/drivers/media/v4l2-core/v4l2-event.c
index caad58bde3268..c5ce9f11ad7bc 100644
--- a/drivers/media/v4l2-core/v4l2-event.c
+++ b/drivers/media/v4l2-core/v4l2-event.c
@@ -18,7 +18,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 
-static unsigned sev_pos(const struct v4l2_subscribed_event *sev, unsigned idx)
+static unsigned int sev_pos(const struct v4l2_subscribed_event *sev, unsigned int idx)
 {
 	idx += sev->first;
 	return idx >= sev->elems ? idx - sev->elems : idx;
@@ -221,12 +221,12 @@ static void __v4l2_event_unsubscribe(struct v4l2_subscribed_event *sev)
 }
 
 int v4l2_event_subscribe(struct v4l2_fh *fh,
-			 const struct v4l2_event_subscription *sub, unsigned elems,
+			 const struct v4l2_event_subscription *sub, unsigned int elems,
 			 const struct v4l2_subscribed_event_ops *ops)
 {
 	struct v4l2_subscribed_event *sev, *found_ev;
 	unsigned long flags;
-	unsigned i;
+	unsigned int i;
 	int ret = 0;
 
 	if (sub->type == V4L2_EVENT_ALL)
-- 
GitLab


From e70bc1ea973ddac75119c75fe11b064dd8731051 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?=
 <niklas.soderlund+renesas@ragnatech.se>
Date: Fri, 11 Jun 2021 18:07:34 +0200
Subject: [PATCH 3036/3804] media: rcar-csi2: Add support for Y10 and Y8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support for two new media bus formats, Y10 and Y8.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/rcar-vin/rcar-csi2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/media/platform/rcar-vin/rcar-csi2.c b/drivers/media/platform/rcar-vin/rcar-csi2.c
index a128bf80e42cb..e28eff0396888 100644
--- a/drivers/media/platform/rcar-vin/rcar-csi2.c
+++ b/drivers/media/platform/rcar-vin/rcar-csi2.c
@@ -320,10 +320,12 @@ static const struct rcar_csi2_format rcar_csi2_formats[] = {
 	{ .code = MEDIA_BUS_FMT_YUYV8_1X16,	.datatype = 0x1e, .bpp = 16 },
 	{ .code = MEDIA_BUS_FMT_UYVY8_2X8,	.datatype = 0x1e, .bpp = 16 },
 	{ .code = MEDIA_BUS_FMT_YUYV10_2X10,	.datatype = 0x1e, .bpp = 20 },
+	{ .code = MEDIA_BUS_FMT_Y10_1X10,	.datatype = 0x2b, .bpp = 10 },
 	{ .code = MEDIA_BUS_FMT_SBGGR8_1X8,     .datatype = 0x2a, .bpp = 8 },
 	{ .code = MEDIA_BUS_FMT_SGBRG8_1X8,     .datatype = 0x2a, .bpp = 8 },
 	{ .code = MEDIA_BUS_FMT_SGRBG8_1X8,     .datatype = 0x2a, .bpp = 8 },
 	{ .code = MEDIA_BUS_FMT_SRGGB8_1X8,     .datatype = 0x2a, .bpp = 8 },
+	{ .code = MEDIA_BUS_FMT_Y8_1X8,		.datatype = 0x2a, .bpp = 8 },
 };
 
 static const struct rcar_csi2_format *rcsi2_code_to_fmt(unsigned int code)
-- 
GitLab


From 682e69d7a262d8959f8b8cc1a8ed68bc6ec4be61 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 12 Jun 2021 01:42:00 +0200
Subject: [PATCH 3037/3804] media: imx-jpeg: Constify static struct
 v4l2_m2m_ops

The only usage of mxc_jpeg_m2m_ops is to pass its address to
v4l2_m2m_init() which takes a pointer to const struct v4l2_m2m_ops. Make
it const to allow the compiler to put it in read-only memory.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/imx-jpeg/mxc-jpeg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg.c b/drivers/media/platform/imx-jpeg/mxc-jpeg.c
index 03b9264af068d..3a49007e12646 100644
--- a/drivers/media/platform/imx-jpeg/mxc-jpeg.c
+++ b/drivers/media/platform/imx-jpeg/mxc-jpeg.c
@@ -1890,7 +1890,7 @@ static const struct v4l2_file_operations mxc_jpeg_fops = {
 	.mmap		= v4l2_m2m_fop_mmap,
 };
 
-static struct v4l2_m2m_ops mxc_jpeg_m2m_ops = {
+static const struct v4l2_m2m_ops mxc_jpeg_m2m_ops = {
 	.device_run	= mxc_jpeg_device_run,
 };
 
-- 
GitLab


From 7ec1c4a57c428a2114b81059e8683f8cf348920f Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 12 Jun 2021 01:42:01 +0200
Subject: [PATCH 3038/3804] media: imx-jpeg: Constify static struct
 mxc_jpeg_fmt

It is only read-from, so make it const. In order to be able to do this,
constify all places where mxc_jpeg_fmt is used, in function arguments,
return values and pointers. On top of that, make the name a pointer to
const char.

On aarch64, this shrinks object code size with 550 bytes with gcc 11.1.0,
and almost 2kB with clang 12.0.0.

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/imx-jpeg/mxc-jpeg.c | 16 ++++++++--------
 drivers/media/platform/imx-jpeg/mxc-jpeg.h | 18 +++++++++---------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg.c b/drivers/media/platform/imx-jpeg/mxc-jpeg.c
index 3a49007e12646..755138063ee61 100644
--- a/drivers/media/platform/imx-jpeg/mxc-jpeg.c
+++ b/drivers/media/platform/imx-jpeg/mxc-jpeg.c
@@ -62,7 +62,7 @@
 #include "mxc-jpeg-hw.h"
 #include "mxc-jpeg.h"
 
-static struct mxc_jpeg_fmt mxc_formats[] = {
+static const struct mxc_jpeg_fmt mxc_formats[] = {
 	{
 		.name		= "JPEG",
 		.fourcc		= V4L2_PIX_FMT_JPEG,
@@ -341,7 +341,7 @@ static inline struct mxc_jpeg_ctx *mxc_jpeg_fh_to_ctx(struct v4l2_fh *fh)
 	return container_of(fh, struct mxc_jpeg_ctx, fh);
 }
 
-static int enum_fmt(struct mxc_jpeg_fmt *mxc_formats, int n,
+static int enum_fmt(const struct mxc_jpeg_fmt *mxc_formats, int n,
 		    struct v4l2_fmtdesc *f, u32 type)
 {
 	int i, num = 0;
@@ -368,13 +368,13 @@ static int enum_fmt(struct mxc_jpeg_fmt *mxc_formats, int n,
 	return 0;
 }
 
-static struct mxc_jpeg_fmt *mxc_jpeg_find_format(struct mxc_jpeg_ctx *ctx,
-						 u32 pixelformat)
+static const struct mxc_jpeg_fmt *mxc_jpeg_find_format(struct mxc_jpeg_ctx *ctx,
+						       u32 pixelformat)
 {
 	unsigned int k;
 
 	for (k = 0; k < MXC_JPEG_NUM_FORMATS; k++) {
-		struct mxc_jpeg_fmt *fmt = &mxc_formats[k];
+		const struct mxc_jpeg_fmt *fmt = &mxc_formats[k];
 
 		if (fmt->fourcc == pixelformat)
 			return fmt;
@@ -1536,7 +1536,7 @@ static int mxc_jpeg_enum_fmt_vid_out(struct file *file, void *priv,
 				MXC_JPEG_FMT_TYPE_RAW);
 }
 
-static int mxc_jpeg_try_fmt(struct v4l2_format *f, struct mxc_jpeg_fmt *fmt,
+static int mxc_jpeg_try_fmt(struct v4l2_format *f, const struct mxc_jpeg_fmt *fmt,
 			    struct mxc_jpeg_ctx *ctx, int q_type)
 {
 	struct v4l2_pix_format_mplane *pix_mp = &f->fmt.pix_mp;
@@ -1612,7 +1612,7 @@ static int mxc_jpeg_try_fmt_vid_cap(struct file *file, void *priv,
 	struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(priv);
 	struct mxc_jpeg_dev *jpeg = ctx->mxc_jpeg;
 	struct device *dev = jpeg->dev;
-	struct mxc_jpeg_fmt *fmt;
+	const struct mxc_jpeg_fmt *fmt;
 	u32 fourcc = f->fmt.pix_mp.pixelformat;
 
 	int q_type = (jpeg->mode == MXC_JPEG_DECODE) ?
@@ -1643,7 +1643,7 @@ static int mxc_jpeg_try_fmt_vid_out(struct file *file, void *priv,
 	struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(priv);
 	struct mxc_jpeg_dev *jpeg = ctx->mxc_jpeg;
 	struct device *dev = jpeg->dev;
-	struct mxc_jpeg_fmt *fmt;
+	const struct mxc_jpeg_fmt *fmt;
 	u32 fourcc = f->fmt.pix_mp.pixelformat;
 
 	int q_type = (jpeg->mode == MXC_JPEG_ENCODE) ?
diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg.h b/drivers/media/platform/imx-jpeg/mxc-jpeg.h
index 7697de490d2e2..4c210852e876c 100644
--- a/drivers/media/platform/imx-jpeg/mxc-jpeg.h
+++ b/drivers/media/platform/imx-jpeg/mxc-jpeg.h
@@ -51,7 +51,7 @@ enum mxc_jpeg_mode {
  * @flags:	flags describing format applicability
  */
 struct mxc_jpeg_fmt {
-	char					*name;
+	const char				*name;
 	u32					fourcc;
 	enum v4l2_jpeg_chroma_subsampling	subsampling;
 	int					nc;
@@ -74,14 +74,14 @@ struct mxc_jpeg_desc {
 } __packed;
 
 struct mxc_jpeg_q_data {
-	struct mxc_jpeg_fmt	*fmt;
-	u32			sizeimage[MXC_JPEG_MAX_PLANES];
-	u32			bytesperline[MXC_JPEG_MAX_PLANES];
-	int			w;
-	int			w_adjusted;
-	int			h;
-	int			h_adjusted;
-	unsigned int		sequence;
+	const struct mxc_jpeg_fmt	*fmt;
+	u32				sizeimage[MXC_JPEG_MAX_PLANES];
+	u32				bytesperline[MXC_JPEG_MAX_PLANES];
+	int				w;
+	int				w_adjusted;
+	int				h;
+	int				h_adjusted;
+	unsigned int			sequence;
 };
 
 struct mxc_jpeg_ctx {
-- 
GitLab


From 00ae4ebc2d07db50d8432ebec3158c96b36f1a6d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 13 Jun 2021 19:36:34 +0200
Subject: [PATCH 3039/3804] media: pci: cx88: switch from 'pci_' to 'dma_' API

The wrappers in include/linux/pci-dma-compat.h should go away.

The patch has been generated with the coccinelle script below and has been
hand modified to replace GFP_ with a correct flag.
It has been compile tested.

Only 2 functions allocate some memory. They are both in cx88-core.c

When memory is allocated in 'cx88_risc_buffer()', GFP_KERNEL can be
used because its 2 callers end up to '.buf_prepare' functions in 'vb2_ops'
structures.
The call chains are:

  .buf_prepare                   (cx88-vbi.c)
    --> buffer_prepare           (cx88-vbi.c)
      --> cx88_risc_buffer

  .buf_prepare                   (cx88-video.c)
    --> buffer_prepare           (cx88-video.c)
      --> cx88_risc_buffer

When memory is allocated in 'cx88_risc_databuffer()', GFP_KERNEL can be
used because its 2 callers end up to 'snd_cx88_hw_params' which already
uses GFP_KERNEL and '.buf_prepare' functions in 'vb2_ops' structures.
The call chains are:

  snd_cx88_hw_params                   (cx88-alsa.c)
    --> cx88_risc_databuffer

  .buf_prepare                         (cx88-blackbird.c)
    --> buffer_prepare                 (cx88-blackbird.c)
      --> cx8802_buf_prepare           (cx88-mpeg.c)
        --> cx88_risc_databuffer

  .buf_prepare                         (cx88-dvb.c)
    --> buffer_prepare                 (cx88-dvb.c)
      --> cx8802_buf_prepare           (cx88-mpeg.c)
        --> cx88_risc_databuffer

@@ @@
-    PCI_DMA_BIDIRECTIONAL
+    DMA_BIDIRECTIONAL

@@ @@
-    PCI_DMA_TODEVICE
+    DMA_TO_DEVICE

@@ @@
-    PCI_DMA_FROMDEVICE
+    DMA_FROM_DEVICE

@@ @@
-    PCI_DMA_NONE
+    DMA_NONE

@@
expression e1, e2, e3;
@@
-    pci_alloc_consistent(e1, e2, e3)
+    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)

@@
expression e1, e2, e3;
@@
-    pci_zalloc_consistent(e1, e2, e3)
+    dma_alloc_coherent(&e1->dev, e2, e3, GFP_)

@@
expression e1, e2, e3, e4;
@@
-    pci_free_consistent(e1, e2, e3, e4)
+    dma_free_coherent(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_map_single(e1, e2, e3, e4)
+    dma_map_single(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_unmap_single(e1, e2, e3, e4)
+    dma_unmap_single(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4, e5;
@@
-    pci_map_page(e1, e2, e3, e4, e5)
+    dma_map_page(&e1->dev, e2, e3, e4, e5)

@@
expression e1, e2, e3, e4;
@@
-    pci_unmap_page(e1, e2, e3, e4)
+    dma_unmap_page(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_map_sg(e1, e2, e3, e4)
+    dma_map_sg(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_unmap_sg(e1, e2, e3, e4)
+    dma_unmap_sg(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_single_for_cpu(e1, e2, e3, e4)
+    dma_sync_single_for_cpu(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_single_for_device(e1, e2, e3, e4)
+    dma_sync_single_for_device(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_sg_for_cpu(e1, e2, e3, e4)
+    dma_sync_sg_for_cpu(&e1->dev, e2, e3, e4)

@@
expression e1, e2, e3, e4;
@@
-    pci_dma_sync_sg_for_device(e1, e2, e3, e4)
+    dma_sync_sg_for_device(&e1->dev, e2, e3, e4)

@@
expression e1, e2;
@@
-    pci_dma_mapping_error(e1, e2)
+    dma_mapping_error(&e1->dev, e2)

@@
expression e1, e2;
@@
-    pci_set_dma_mask(e1, e2)
+    dma_set_mask(&e1->dev, e2)

@@
expression e1, e2;
@@
-    pci_set_consistent_dma_mask(e1, e2)
+    dma_set_coherent_mask(&e1->dev, e2)

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/pci/cx88/cx88-alsa.c      | 6 +++---
 drivers/media/pci/cx88/cx88-blackbird.c | 3 ++-
 drivers/media/pci/cx88/cx88-core.c      | 6 ++++--
 drivers/media/pci/cx88/cx88-dvb.c       | 3 ++-
 drivers/media/pci/cx88/cx88-mpeg.c      | 6 +++---
 drivers/media/pci/cx88/cx88-vbi.c       | 3 ++-
 drivers/media/pci/cx88/cx88-video.c     | 5 +++--
 7 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/media/pci/cx88/cx88-alsa.c b/drivers/media/pci/cx88/cx88-alsa.c
index c83814c052d36..29fb1311e4434 100644
--- a/drivers/media/pci/cx88/cx88-alsa.c
+++ b/drivers/media/pci/cx88/cx88-alsa.c
@@ -357,8 +357,8 @@ static int dsp_buffer_free(struct cx88_audio_dev *chip)
 	cx88_alsa_dma_unmap(chip);
 	cx88_alsa_dma_free(chip->buf);
 	if (risc->cpu)
-		pci_free_consistent(chip->pci, risc->size,
-				    risc->cpu, risc->dma);
+		dma_free_coherent(&chip->pci->dev, risc->size, risc->cpu,
+				  risc->dma);
 	kfree(chip->buf);
 
 	chip->buf = NULL;
@@ -868,7 +868,7 @@ static int snd_cx88_create(struct snd_card *card, struct pci_dev *pci,
 		return err;
 	}
 
-	err = pci_set_dma_mask(pci, DMA_BIT_MASK(32));
+	err = dma_set_mask(&pci->dev, DMA_BIT_MASK(32));
 	if (err) {
 		dprintk(0, "%s/1: Oops: no 32bit PCI DMA ???\n", core->name);
 		cx88_core_put(core, pci);
diff --git a/drivers/media/pci/cx88/cx88-blackbird.c b/drivers/media/pci/cx88/cx88-blackbird.c
index fa4ca002ed191..d5da3bd5695d7 100644
--- a/drivers/media/pci/cx88/cx88-blackbird.c
+++ b/drivers/media/pci/cx88/cx88-blackbird.c
@@ -685,7 +685,8 @@ static void buffer_finish(struct vb2_buffer *vb)
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
-		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
+		dma_free_coherent(&dev->pci->dev, risc->size, risc->cpu,
+				  risc->dma);
 	memset(risc, 0, sizeof(*risc));
 }
 
diff --git a/drivers/media/pci/cx88/cx88-core.c b/drivers/media/pci/cx88/cx88-core.c
index 48c8a34295424..89d4d5a3ba34c 100644
--- a/drivers/media/pci/cx88/cx88-core.c
+++ b/drivers/media/pci/cx88/cx88-core.c
@@ -152,7 +152,8 @@ int cx88_risc_buffer(struct pci_dev *pci, struct cx88_riscmem *risc,
 	instructions += 4;
 	risc->size = instructions * 8;
 	risc->dma = 0;
-	risc->cpu = pci_zalloc_consistent(pci, risc->size, &risc->dma);
+	risc->cpu = dma_alloc_coherent(&pci->dev, risc->size, &risc->dma,
+				       GFP_KERNEL);
 	if (!risc->cpu)
 		return -ENOMEM;
 
@@ -190,7 +191,8 @@ int cx88_risc_databuffer(struct pci_dev *pci, struct cx88_riscmem *risc,
 	instructions += 3;
 	risc->size = instructions * 8;
 	risc->dma = 0;
-	risc->cpu = pci_zalloc_consistent(pci, risc->size, &risc->dma);
+	risc->cpu = dma_alloc_coherent(&pci->dev, risc->size, &risc->dma,
+				       GFP_KERNEL);
 	if (!risc->cpu)
 		return -ENOMEM;
 
diff --git a/drivers/media/pci/cx88/cx88-dvb.c b/drivers/media/pci/cx88/cx88-dvb.c
index 202ff9e8c2571..2087f2491c42f 100644
--- a/drivers/media/pci/cx88/cx88-dvb.c
+++ b/drivers/media/pci/cx88/cx88-dvb.c
@@ -103,7 +103,8 @@ static void buffer_finish(struct vb2_buffer *vb)
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
-		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
+		dma_free_coherent(&dev->pci->dev, risc->size, risc->cpu,
+				  risc->dma);
 	memset(risc, 0, sizeof(*risc));
 }
 
diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c
index a3edb548afde4..680e1e3fe89b7 100644
--- a/drivers/media/pci/cx88/cx88-mpeg.c
+++ b/drivers/media/pci/cx88/cx88-mpeg.c
@@ -226,8 +226,8 @@ int cx8802_buf_prepare(struct vb2_queue *q, struct cx8802_dev *dev,
 				  dev->ts_packet_size, dev->ts_packet_count, 0);
 	if (rc) {
 		if (risc->cpu)
-			pci_free_consistent(dev->pci, risc->size,
-					    risc->cpu, risc->dma);
+			dma_free_coherent(&dev->pci->dev, risc->size,
+					  risc->cpu, risc->dma);
 		memset(risc, 0, sizeof(*risc));
 		return rc;
 	}
@@ -386,7 +386,7 @@ static int cx8802_init_common(struct cx8802_dev *dev)
 	if (pci_enable_device(dev->pci))
 		return -EIO;
 	pci_set_master(dev->pci);
-	err = pci_set_dma_mask(dev->pci, DMA_BIT_MASK(32));
+	err = dma_set_mask(&dev->pci->dev, DMA_BIT_MASK(32));
 	if (err) {
 		pr_err("Oops: no 32bit PCI DMA ???\n");
 		return -EIO;
diff --git a/drivers/media/pci/cx88/cx88-vbi.c b/drivers/media/pci/cx88/cx88-vbi.c
index 58489ea0c1da1..a075788c64d45 100644
--- a/drivers/media/pci/cx88/cx88-vbi.c
+++ b/drivers/media/pci/cx88/cx88-vbi.c
@@ -159,7 +159,8 @@ static void buffer_finish(struct vb2_buffer *vb)
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
-		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
+		dma_free_coherent(&dev->pci->dev, risc->size, risc->cpu,
+				  risc->dma);
 	memset(risc, 0, sizeof(*risc));
 }
 
diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c
index 8cffdacf60079..c17ad9f7d822b 100644
--- a/drivers/media/pci/cx88/cx88-video.c
+++ b/drivers/media/pci/cx88/cx88-video.c
@@ -492,7 +492,8 @@ static void buffer_finish(struct vb2_buffer *vb)
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
-		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
+		dma_free_coherent(&dev->pci->dev, risc->size, risc->cpu,
+				  risc->dma);
 	memset(risc, 0, sizeof(*risc));
 }
 
@@ -1288,7 +1289,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev,
 		(unsigned long long)pci_resource_start(pci_dev, 0));
 
 	pci_set_master(pci_dev);
-	err = pci_set_dma_mask(pci_dev, DMA_BIT_MASK(32));
+	err = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32));
 	if (err) {
 		pr_err("Oops: no 32bit PCI DMA ???\n");
 		goto fail_core;
-- 
GitLab


From 7629cbd6872f6aef0b7f1e20812194f4f4249bb2 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 10 Jun 2021 08:11:40 +0200
Subject: [PATCH 3040/3804] media: adv7842: remove spurious & and fix vga_edid
 size

No need to use & to get the start address of an array.

Fix the size of vga_edid.edid to a single block (128 bytes) to fix
this smatch error:

adv7842.c:2538 adv7842_set_edid() error: memcpy() '&state->vga_edid.edid' too small (128 vs 512)

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/adv7842.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/adv7842.c b/drivers/media/i2c/adv7842.c
index 263713963a00f..7f8acbdf0db4a 100644
--- a/drivers/media/i2c/adv7842.c
+++ b/drivers/media/i2c/adv7842.c
@@ -2531,20 +2531,20 @@ static int adv7842_set_edid(struct v4l2_subdev *sd, struct v4l2_edid *e)
 
 	switch (e->pad) {
 	case ADV7842_EDID_PORT_VGA:
-		memset(&state->vga_edid.edid, 0, sizeof(state->vga_edid.edid));
+		memset(state->vga_edid.edid, 0, sizeof(state->vga_edid.edid));
 		state->vga_edid.blocks = e->blocks;
 		state->vga_edid.present = e->blocks ? 0x1 : 0x0;
 		if (e->blocks)
-			memcpy(&state->vga_edid.edid, e->edid, 128 * e->blocks);
+			memcpy(state->vga_edid.edid, e->edid, 128);
 		err = edid_write_vga_segment(sd);
 		break;
 	case ADV7842_EDID_PORT_A:
 	case ADV7842_EDID_PORT_B:
-		memset(&state->hdmi_edid.edid, 0, sizeof(state->hdmi_edid.edid));
+		memset(state->hdmi_edid.edid, 0, sizeof(state->hdmi_edid.edid));
 		state->hdmi_edid.blocks = e->blocks;
 		if (e->blocks) {
 			state->hdmi_edid.present |= 0x04 << e->pad;
-			memcpy(&state->hdmi_edid.edid, e->edid, 128 * e->blocks);
+			memcpy(state->hdmi_edid.edid, e->edid, 128 * e->blocks);
 		} else {
 			state->hdmi_edid.present &= ~(0x04 << e->pad);
 			adv7842_s_detect_tx_5v_ctrl(sd);
-- 
GitLab


From 493ae3f2ba13a1c0a9d866c6c24a43ebe0d1ba42 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 10 Jun 2021 08:23:10 +0200
Subject: [PATCH 3041/3804] media: mtk-vcodec: fix kerneldoc warnings

Fix the following kerneldoc warnings:

drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Enum value 'AP_IPIMSG_ENC_INIT' not described in enum 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Enum value 'AP_IPIMSG_ENC_SET_PARAM' not described in enum 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Enum value 'AP_IPIMSG_ENC_ENCODE' not described in enum 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Enum value 'AP_IPIMSG_ENC_DEINIT' not described in enum 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Enum value 'VPU_IPIMSG_ENC_INIT_DONE' not described in enum 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Enum value 'VPU_IPIMSG_ENC_SET_PARAM_DONE' not described in enum 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Enum value 'VPU_IPIMSG_ENC_ENCODE_DONE' not described in enum 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Enum value 'VPU_IPIMSG_ENC_DEINIT_DONE' not described in enum 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Excess enum value 'VPU_IPIMSG_ENC_XXX_DONE' description in 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:31: warning: Excess enum value 'AP_IPIMSG_ENC_XXX' description in 'venc_ipi_msg_id'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:120: warning: Enum value 'VENC_IPI_MSG_STATUS_OK' not described in enum 'venc_ipi_msg_status'
drivers/media/platform/mtk-vcodec/venc_ipi_msg.h:120: warning: Enum value 'VENC_IPI_MSG_STATUS_FAIL' not described in enum 'venc_ipi_msg_status'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_SYS' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_MISC' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_LD' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_TOP' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_CM' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_AD' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_AV' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_PP' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_HWD' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_HWQ' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_HWB' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VDEC_HWG' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'NUM_MAX_VDEC_REG_BASE' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VENC_SYS' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'VENC_LT_SYS' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:50: warning: Enum value 'NUM_MAX_VCODEC_REG_BASE' not described in enum 'mtk_hw_reg_idx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:58: warning: Enum value 'MTK_INST_DECODER' not described in enum 'mtk_instance_type'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:58: warning: Enum value 'MTK_INST_ENCODER' not described in enum 'mtk_instance_type'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:87: warning: Enum value 'MTK_ENCODE_PARAM_NONE' not described in enum 'mtk_encode_param'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:87: warning: Enum value 'MTK_ENCODE_PARAM_BITRATE' not described in enum 'mtk_encode_param'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:87: warning: Enum value 'MTK_ENCODE_PARAM_FRAMERATE' not described in enum 'mtk_encode_param'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:87: warning: Enum value 'MTK_ENCODE_PARAM_INTRA_PERIOD' not described in enum
'mtk_encode_param'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:87: warning: Enum value 'MTK_ENCODE_PARAM_FORCE_INTRA' not described in enum
'mtk_encode_param'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:87: warning: Enum value 'MTK_ENCODE_PARAM_GOP_SIZE' not described in enum 'mtk_encode_param'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:103: warning: Function parameter or member 'fourcc' not described in 'mtk_video_fmt'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:103: warning: Function parameter or member 'type' not described in 'mtk_video_fmt'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:103: warning: Function parameter or member 'num_planes' not described in 'mtk_video_fmt'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:103: warning: Function parameter or member 'flags' not described in 'mtk_video_fmt'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:112: warning: Function parameter or member 'fourcc' not described in 'mtk_codec_framesizes'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:112: warning: Function parameter or member 'stepwise' not described in 'mtk_codec_framesizes'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:120: warning: Enum value 'MTK_Q_DATA_SRC' not described in enum 'mtk_q_type'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:120: warning: Enum value 'MTK_Q_DATA_DST' not described in enum 'mtk_q_type'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:134: warning: Function parameter or member 'visible_width' not described in 'mtk_q_data'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:134: warning: Function parameter or member 'visible_height' not described in 'mtk_q_data'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:134: warning: Function parameter or member 'coded_width' not described in 'mtk_q_data'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:134: warning: Function parameter or member 'coded_height' not described in 'mtk_q_data'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:134: warning: Function parameter or member 'field' not described in 'mtk_q_data'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:134: warning: Function parameter or member 'bytesperline' not described in 'mtk_q_data'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:134: warning: Function parameter or member 'sizeimage' not described in 'mtk_q_data'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:134: warning: Function parameter or member 'fmt' not described in 'mtk_q_data'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:177: warning: Function parameter or member 'clk_name' not described in 'mtk_vcodec_clk_info'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:177: warning: Function parameter or member 'vcodec_clk' not described in
'mtk_vcodec_clk_info'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:185: warning: Function parameter or member 'clk_info' not described in 'mtk_vcodec_clk'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:185: warning: Function parameter or member 'clk_num' not described in 'mtk_vcodec_clk'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:198: warning: Function parameter or member 'vdec_clk' not described in 'mtk_vcodec_pm'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:198: warning: Function parameter or member 'larbvdec' not described in 'mtk_vcodec_pm'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:198: warning: Function parameter or member 'venc_clk' not described in 'mtk_vcodec_pm'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:198: warning: Function parameter or member 'larbvenc' not described in 'mtk_vcodec_pm'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:198: warning: Function parameter or member 'dev' not described in 'mtk_vcodec_pm'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:198: warning: Function parameter or member 'mtkdev' not described in 'mtk_vcodec_pm'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:300: warning: Function parameter or member 'decoded_frame_cnt' not described in
'mtk_vcodec_ctx'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:332: warning: Function parameter or member 'min_bitrate' not described in
'mtk_vcodec_enc_pdata'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:405: warning: Function parameter or member 'venc_pdata' not described in 'mtk_vcodec_dev'
drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h:405: warning: Function parameter or member 'decode_workqueue' not described in
'mtk_vcodec_dev'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'AP_IPIMSG_DEC_INIT' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'AP_IPIMSG_DEC_START' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'AP_IPIMSG_DEC_END' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'AP_IPIMSG_DEC_DEINIT' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'AP_IPIMSG_DEC_RESET' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'VPU_IPIMSG_DEC_INIT_ACK' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'VPU_IPIMSG_DEC_START_ACK' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'VPU_IPIMSG_DEC_END_ACK' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'VPU_IPIMSG_DEC_DEINIT_ACK' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Enum value 'VPU_IPIMSG_DEC_RESET_ACK' not described in enum 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Excess enum value 'AP_IPIMSG_XXX' description in 'vdec_ipi_msgid'
drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h:27: warning: Excess enum value 'VPU_IPIMSG_XXX_ACK' description in 'vdec_ipi_msgid'

In some cases I just changed /** to /*, in other cases the missing
field descriptions were added.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../platform/mtk-vcodec/mtk_vcodec_drv.h      | 25 +++++++++++--------
 .../media/platform/mtk-vcodec/vdec_ipi_msg.h  |  2 +-
 .../media/platform/mtk-vcodec/venc_ipi_msg.h  |  4 +--
 3 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
index 14893d277bb8c..c6c7672fecfb1 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
@@ -25,7 +25,7 @@
 #define MTK_V4L2_BENCHMARK	0
 #define WAIT_INTR_TIMEOUT_MS	1000
 
-/**
+/*
  * enum mtk_hw_reg_idx - MTK hw register base index
  */
 enum mtk_hw_reg_idx {
@@ -49,7 +49,7 @@ enum mtk_hw_reg_idx {
 	NUM_MAX_VCODEC_REG_BASE
 };
 
-/**
+/*
  * enum mtk_instance_type - The type of an MTK Vcodec instance.
  */
 enum mtk_instance_type {
@@ -74,7 +74,7 @@ enum mtk_instance_state {
 	MTK_STATE_ABORT = 4,
 };
 
-/**
+/*
  * enum mtk_encode_param - General encoding parameters type
  */
 enum mtk_encode_param {
@@ -92,7 +92,7 @@ enum mtk_fmt_type {
 	MTK_FMT_FRAME = 2,
 };
 
-/**
+/*
  * struct mtk_video_fmt - Structure used to store information about pixelformats
  */
 struct mtk_video_fmt {
@@ -102,7 +102,7 @@ struct mtk_video_fmt {
 	u32	flags;
 };
 
-/**
+/*
  * struct mtk_codec_framesizes - Structure used to store information about
  *							framesizes
  */
@@ -111,7 +111,7 @@ struct mtk_codec_framesizes {
 	struct	v4l2_frmsize_stepwise	stepwise;
 };
 
-/**
+/*
  * enum mtk_q_type - Type of queue
  */
 enum mtk_q_type {
@@ -119,7 +119,7 @@ enum mtk_q_type {
 	MTK_Q_DATA_DST = 1,
 };
 
-/**
+/*
  * struct mtk_q_data - Structure used to store information about queue
  */
 struct mtk_q_data {
@@ -168,7 +168,7 @@ struct mtk_enc_params {
 	unsigned int	force_intra;
 };
 
-/**
+/*
  * struct mtk_vcodec_clk_info - Structure used to store clock name
  */
 struct mtk_vcodec_clk_info {
@@ -176,7 +176,7 @@ struct mtk_vcodec_clk_info {
 	struct clk	*vcodec_clk;
 };
 
-/**
+/*
  * struct mtk_vcodec_clk - Structure used to store vcodec clock information
  */
 struct mtk_vcodec_clk {
@@ -184,7 +184,7 @@ struct mtk_vcodec_clk {
 	int	clk_num;
 };
 
-/**
+/*
  * struct mtk_vcodec_pm - Power management data structure
  */
 struct mtk_vcodec_pm {
@@ -255,6 +255,7 @@ struct vdec_pic_info {
  * @ycbcr_enc: enum v4l2_ycbcr_encoding, Y'CbCr encoding
  * @quantization: enum v4l2_quantization, colorspace quantization
  * @xfer_func: enum v4l2_xfer_func, colorspace transfer function
+ * @decoded_frame_cnt: number of decoded frames
  * @lock: protect variables accessed by V4L2 threads and worker thread such as
  *	  mtk_video_dec_buf.
  */
@@ -311,7 +312,7 @@ enum mtk_chip {
  * @chip: chip this encoder is compatible with
  *
  * @uses_ext: whether the encoder uses the extended firmware messaging format
- * @min_birate: minimum supported encoding bitrate
+ * @min_bitrate: minimum supported encoding bitrate
  * @max_bitrate: maximum supported encoding bitrate
  * @capture_formats: array of supported capture formats
  * @num_capture_formats: number of entries in capture_formats
@@ -348,10 +349,12 @@ struct mtk_vcodec_enc_pdata {
  * @curr_ctx: The context that is waiting for codec hardware
  *
  * @reg_base: Mapped address of MTK Vcodec registers.
+ * @venc_pdata: encoder IC-specific data
  *
  * @fw_handler: used to communicate with the firmware.
  * @id_counter: used to identify current opened instance
  *
+ * @decode_workqueue: decode work queue
  * @encode_workqueue: encode work queue
  *
  * @int_cond: used to identify interrupt condition happen
diff --git a/drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h b/drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h
index 47a1c1c0fd042..68e8d5cb16d7e 100644
--- a/drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h
+++ b/drivers/media/platform/mtk-vcodec/vdec_ipi_msg.h
@@ -7,7 +7,7 @@
 #ifndef _VDEC_IPI_MSG_H_
 #define _VDEC_IPI_MSG_H_
 
-/**
+/*
  * enum vdec_ipi_msgid - message id between AP and VPU
  * @AP_IPIMSG_XXX	: AP to VPU cmd message id
  * @VPU_IPIMSG_XXX_ACK	: VPU ack AP cmd message id
diff --git a/drivers/media/platform/mtk-vcodec/venc_ipi_msg.h b/drivers/media/platform/mtk-vcodec/venc_ipi_msg.h
index 5f53d4255c368..587a2cf15b765 100644
--- a/drivers/media/platform/mtk-vcodec/venc_ipi_msg.h
+++ b/drivers/media/platform/mtk-vcodec/venc_ipi_msg.h
@@ -12,7 +12,7 @@
 #define AP_IPIMSG_VENC_BASE 0xC000
 #define VPU_IPIMSG_VENC_BASE 0xD000
 
-/**
+/*
  * enum venc_ipi_msg_id - message id between AP and VPU
  * (ipi stands for inter-processor interrupt)
  * @AP_IPIMSG_ENC_XXX:		AP to VPU cmd message id
@@ -111,7 +111,7 @@ struct venc_ap_ipi_msg_deinit {
 	uint32_t vpu_inst_addr;
 };
 
-/**
+/*
  * enum venc_ipi_msg_status - VPU ack AP cmd status
  */
 enum venc_ipi_msg_status {
-- 
GitLab


From b32178e77d257c148b8ad8c31db36bb0c2d49bab Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Mon, 14 Jun 2021 23:32:10 +0200
Subject: [PATCH 3042/3804] media: dt-bindings: media: rockchip-vpu: add new
 compatibles

Add compatibles for RK3036, RK3066, RK3188 and RK3228. Also reflect the
changes to the additional clocks for RK3066/RK3188.

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../bindings/media/rockchip-vpu.yaml          | 33 ++++++++++++++-----
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
index c81dbc3e89601..b88172a59de7a 100644
--- a/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
+++ b/Documentation/devicetree/bindings/media/rockchip-vpu.yaml
@@ -15,10 +15,19 @@ description:
 
 properties:
   compatible:
-    enum:
-      - rockchip,rk3288-vpu
-      - rockchip,rk3328-vpu
-      - rockchip,rk3399-vpu
+    oneOf:
+      - enum:
+          - rockchip,rk3036-vpu
+          - rockchip,rk3066-vpu
+          - rockchip,rk3288-vpu
+          - rockchip,rk3328-vpu
+          - rockchip,rk3399-vpu
+      - items:
+          - const: rockchip,rk3188-vpu
+          - const: rockchip,rk3066-vpu
+      - items:
+          - const: rockchip,rk3228-vpu
+          - const: rockchip,rk3399-vpu
 
   reg:
     maxItems: 1
@@ -35,12 +44,20 @@ properties:
           - const: vdpu
 
   clocks:
-    maxItems: 2
+    oneOf:
+      - maxItems: 2
+      - maxItems: 4
 
   clock-names:
-    items:
-      - const: aclk
-      - const: hclk
+    oneOf:
+      - items:
+          - const: aclk
+          - const: hclk
+      - items:
+          - const: aclk_vdpu
+          - const: hclk_vdpu
+          - const: aclk_vepu
+          - const: hclk_vepu
 
   power-domains:
     maxItems: 1
-- 
GitLab


From 502cf736419aba4cfa0a6737cf66d286c699f144 Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Mon, 14 Jun 2021 23:32:11 +0200
Subject: [PATCH 3043/3804] media: dt-bindings: media: rockchip-vdec: add
 RK3228 compatible

Document the RK3228 compatible for rockchip-vdec.
Also add the optional assigned-clocks and assigned-clock-rates
properties.

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../devicetree/bindings/media/rockchip,vdec.yaml       | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/media/rockchip,vdec.yaml b/Documentation/devicetree/bindings/media/rockchip,vdec.yaml
index 8d35c327018b1..089f11d21b25b 100644
--- a/Documentation/devicetree/bindings/media/rockchip,vdec.yaml
+++ b/Documentation/devicetree/bindings/media/rockchip,vdec.yaml
@@ -15,7 +15,11 @@ description: |-
 
 properties:
   compatible:
-    const: rockchip,rk3399-vdec
+    oneOf:
+      - const: rockchip,rk3399-vdec
+      - items:
+          - const: rockchip,rk3228-vdec
+          - const: rockchip,rk3399-vdec
 
   reg:
     maxItems: 1
@@ -37,6 +41,10 @@ properties:
       - const: cabac
       - const: core
 
+  assigned-clocks: true
+
+  assigned-clock-rates: true
+
   power-domains:
     maxItems: 1
 
-- 
GitLab


From 4b898fedeb26c4d09b83a2c5a3246a34ab99e216 Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Mon, 14 Jun 2021 23:32:12 +0200
Subject: [PATCH 3044/3804] media: hantro: reorder variants

Reorder variants in hantro driver alphanumeric.

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_drv.c | 4 ++--
 drivers/staging/media/hantro/hantro_hw.h  | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index dbc69ee0b562b..34e778e1cea11 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -582,9 +582,9 @@ static const struct v4l2_file_operations hantro_fops = {
 
 static const struct of_device_id of_hantro_match[] = {
 #ifdef CONFIG_VIDEO_HANTRO_ROCKCHIP
-	{ .compatible = "rockchip,rk3399-vpu", .data = &rk3399_vpu_variant, },
-	{ .compatible = "rockchip,rk3328-vpu", .data = &rk3328_vpu_variant, },
 	{ .compatible = "rockchip,rk3288-vpu", .data = &rk3288_vpu_variant, },
+	{ .compatible = "rockchip,rk3328-vpu", .data = &rk3328_vpu_variant, },
+	{ .compatible = "rockchip,rk3399-vpu", .data = &rk3399_vpu_variant, },
 #endif
 #ifdef CONFIG_VIDEO_HANTRO_IMX8M
 	{ .compatible = "nxp,imx8mq-vpu", .data = &imx8mq_vpu_variant, },
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 5737a7707944a..7fa67d0c7e0fb 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -203,12 +203,12 @@ enum hantro_enc_fmt {
 	RK3288_VPU_ENC_FMT_UYVY422 = 3,
 };
 
-extern const struct hantro_variant rk3399_vpu_variant;
-extern const struct hantro_variant rk3328_vpu_variant;
-extern const struct hantro_variant rk3288_vpu_variant;
+extern const struct hantro_variant imx8mq_vpu_g2_variant;
 extern const struct hantro_variant imx8mq_vpu_variant;
+extern const struct hantro_variant rk3288_vpu_variant;
+extern const struct hantro_variant rk3328_vpu_variant;
+extern const struct hantro_variant rk3399_vpu_variant;
 extern const struct hantro_variant sama5d4_vdec_variant;
-extern const struct hantro_variant imx8mq_vpu_g2_variant;
 
 extern const struct hantro_postproc_regs hantro_g1_postproc_regs;
 
-- 
GitLab


From c9caebd57b3a0e3fc981bfc9e79de5c4086e0c1b Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Mon, 14 Jun 2021 23:32:13 +0200
Subject: [PATCH 3045/3804] media: hantro: merge Rockchip platform drivers

Merge the two Rockchip platform drivers into one as it was suggested at
[1] and [2].
This will hopefully make it easier to add new variants (which are surely
to come for Rockchip).
Also rename from "rk3288" to "v(d/e)pu1" and "rk3399" to "v(d/e)pu2"
where applicable, as this is the dicition the vendor uses and will
also refelect the variants that get added later in this series. Rename
from "rk3288" to "rockchip" if applicable to both hardware versions.

[1] https://patchwork.kernel.org/project/linux-rockchip/patch/20210107134101.195426-6-paul.kocialkowski@bootlin.com/
[2] https://patchwork.kernel.org/project/linux-rockchip/patch/20210525152225.154302-5-knaerzche@gmail.com/

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/Makefile         |   9 +-
 drivers/staging/media/hantro/hantro_hw.h      |  22 +-
 drivers/staging/media/hantro/rk3288_vpu_hw.c  | 208 ----------
 drivers/staging/media/hantro/rk3399_vpu_hw.c  | 222 -----------
 ...jpeg_enc.c => rockchip_vpu2_hw_jpeg_enc.c} |  30 +-
 ...eg2_dec.c => rockchip_vpu2_hw_mpeg2_dec.c} |  25 +-
 ...w_vp8_dec.c => rockchip_vpu2_hw_vp8_dec.c} |   2 +-
 ...rk3399_vpu_regs.h => rockchip_vpu2_regs.h} |   6 +-
 .../staging/media/hantro/rockchip_vpu_hw.c    | 356 ++++++++++++++++++
 9 files changed, 402 insertions(+), 478 deletions(-)
 delete mode 100644 drivers/staging/media/hantro/rk3288_vpu_hw.c
 delete mode 100644 drivers/staging/media/hantro/rk3399_vpu_hw.c
 rename drivers/staging/media/hantro/{rk3399_vpu_hw_jpeg_enc.c => rockchip_vpu2_hw_jpeg_enc.c} (87%)
 rename drivers/staging/media/hantro/{rk3399_vpu_hw_mpeg2_dec.c => rockchip_vpu2_hw_mpeg2_dec.c} (94%)
 rename drivers/staging/media/hantro/{rk3399_vpu_hw_vp8_dec.c => rockchip_vpu2_hw_vp8_dec.c} (99%)
 rename drivers/staging/media/hantro/{rk3399_vpu_regs.h => rockchip_vpu2_regs.h} (99%)
 create mode 100644 drivers/staging/media/hantro/rockchip_vpu_hw.c

diff --git a/drivers/staging/media/hantro/Makefile b/drivers/staging/media/hantro/Makefile
index 23bfc423b23c3..287370188d2ae 100644
--- a/drivers/staging/media/hantro/Makefile
+++ b/drivers/staging/media/hantro/Makefile
@@ -12,9 +12,9 @@ hantro-vpu-y += \
 		hantro_g1_mpeg2_dec.o \
 		hantro_g2_hevc_dec.o \
 		hantro_g1_vp8_dec.o \
-		rk3399_vpu_hw_jpeg_enc.o \
-		rk3399_vpu_hw_mpeg2_dec.o \
-		rk3399_vpu_hw_vp8_dec.o \
+		rockchip_vpu2_hw_jpeg_enc.o \
+		rockchip_vpu2_hw_mpeg2_dec.o \
+		rockchip_vpu2_hw_vp8_dec.o \
 		hantro_jpeg.o \
 		hantro_h264.o \
 		hantro_hevc.o \
@@ -28,5 +28,4 @@ hantro-vpu-$(CONFIG_VIDEO_HANTRO_SAMA5D4) += \
 		sama5d4_vdec_hw.o
 
 hantro-vpu-$(CONFIG_VIDEO_HANTRO_ROCKCHIP) += \
-		rk3288_vpu_hw.o \
-		rk3399_vpu_hw.o
+		rockchip_vpu_hw.o
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 7fa67d0c7e0fb..a7b75b05e8493 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -191,16 +191,16 @@ struct hantro_codec_ops {
 /**
  * enum hantro_enc_fmt - source format ID for hardware registers.
  *
- * @RK3288_VPU_ENC_FMT_YUV420P: Y/CbCr 4:2:0 planar format
- * @RK3288_VPU_ENC_FMT_YUV420SP: Y/CbCr 4:2:0 semi-planar format
- * @RK3288_VPU_ENC_FMT_YUYV422: YUV 4:2:2 packed format (YUYV)
- * @RK3288_VPU_ENC_FMT_UYVY422: YUV 4:2:2 packed format (UYVY)
+ * @ROCKCHIP_VPU_ENC_FMT_YUV420P: Y/CbCr 4:2:0 planar format
+ * @ROCKCHIP_VPU_ENC_FMT_YUV420SP: Y/CbCr 4:2:0 semi-planar format
+ * @ROCKCHIP_VPU_ENC_FMT_YUYV422: YUV 4:2:2 packed format (YUYV)
+ * @ROCKCHIP_VPU_ENC_FMT_UYVY422: YUV 4:2:2 packed format (UYVY)
  */
 enum hantro_enc_fmt {
-	RK3288_VPU_ENC_FMT_YUV420P = 0,
-	RK3288_VPU_ENC_FMT_YUV420SP = 1,
-	RK3288_VPU_ENC_FMT_YUYV422 = 2,
-	RK3288_VPU_ENC_FMT_UYVY422 = 3,
+	ROCKCHIP_VPU_ENC_FMT_YUV420P = 0,
+	ROCKCHIP_VPU_ENC_FMT_YUV420SP = 1,
+	ROCKCHIP_VPU_ENC_FMT_YUYV422 = 2,
+	ROCKCHIP_VPU_ENC_FMT_UYVY422 = 3,
 };
 
 extern const struct hantro_variant imx8mq_vpu_g2_variant;
@@ -225,7 +225,7 @@ irqreturn_t hantro_g1_irq(int irq, void *dev_id);
 void hantro_g1_reset(struct hantro_ctx *ctx);
 
 int hantro_h1_jpeg_enc_run(struct hantro_ctx *ctx);
-int rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx);
+int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx);
 int hantro_jpeg_enc_init(struct hantro_ctx *ctx);
 void hantro_jpeg_enc_exit(struct hantro_ctx *ctx);
 void hantro_jpeg_enc_done(struct hantro_ctx *ctx);
@@ -274,14 +274,14 @@ hantro_h264_mv_size(unsigned int width, unsigned int height)
 }
 
 int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx);
-int rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx);
+int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx);
 void hantro_mpeg2_dec_copy_qtable(u8 *qtable,
 				  const struct v4l2_ctrl_mpeg2_quantisation *ctrl);
 int hantro_mpeg2_dec_init(struct hantro_ctx *ctx);
 void hantro_mpeg2_dec_exit(struct hantro_ctx *ctx);
 
 int hantro_g1_vp8_dec_run(struct hantro_ctx *ctx);
-int rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx);
+int rockchip_vpu2_vp8_dec_run(struct hantro_ctx *ctx);
 int hantro_vp8_dec_init(struct hantro_ctx *ctx);
 void hantro_vp8_dec_exit(struct hantro_ctx *ctx);
 void hantro_vp8_prob_update(struct hantro_ctx *ctx,
diff --git a/drivers/staging/media/hantro/rk3288_vpu_hw.c b/drivers/staging/media/hantro/rk3288_vpu_hw.c
deleted file mode 100644
index fefd45269e52a..0000000000000
--- a/drivers/staging/media/hantro/rk3288_vpu_hw.c
+++ /dev/null
@@ -1,208 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Hantro VPU codec driver
- *
- * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
- *	Jeffy Chen <jeffy.chen@rock-chips.com>
- */
-
-#include <linux/clk.h>
-
-#include "hantro.h"
-#include "hantro_jpeg.h"
-#include "hantro_h1_regs.h"
-
-#define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000)
-
-/*
- * Supported formats.
- */
-
-static const struct hantro_fmt rk3288_vpu_enc_fmts[] = {
-	{
-		.fourcc = V4L2_PIX_FMT_YUV420M,
-		.codec_mode = HANTRO_MODE_NONE,
-		.enc_fmt = RK3288_VPU_ENC_FMT_YUV420P,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_NV12M,
-		.codec_mode = HANTRO_MODE_NONE,
-		.enc_fmt = RK3288_VPU_ENC_FMT_YUV420SP,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_YUYV,
-		.codec_mode = HANTRO_MODE_NONE,
-		.enc_fmt = RK3288_VPU_ENC_FMT_YUYV422,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_UYVY,
-		.codec_mode = HANTRO_MODE_NONE,
-		.enc_fmt = RK3288_VPU_ENC_FMT_UYVY422,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_JPEG,
-		.codec_mode = HANTRO_MODE_JPEG_ENC,
-		.max_depth = 2,
-		.header_size = JPEG_HEADER_SIZE,
-		.frmsize = {
-			.min_width = 96,
-			.max_width = 8192,
-			.step_width = MB_DIM,
-			.min_height = 32,
-			.max_height = 8192,
-			.step_height = MB_DIM,
-		},
-	},
-};
-
-static const struct hantro_fmt rk3288_vpu_postproc_fmts[] = {
-	{
-		.fourcc = V4L2_PIX_FMT_YUYV,
-		.codec_mode = HANTRO_MODE_NONE,
-	},
-};
-
-static const struct hantro_fmt rk3288_vpu_dec_fmts[] = {
-	{
-		.fourcc = V4L2_PIX_FMT_NV12,
-		.codec_mode = HANTRO_MODE_NONE,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_H264_SLICE,
-		.codec_mode = HANTRO_MODE_H264_DEC,
-		.max_depth = 2,
-		.frmsize = {
-			.min_width = 48,
-			.max_width = 4096,
-			.step_width = MB_DIM,
-			.min_height = 48,
-			.max_height = 2304,
-			.step_height = MB_DIM,
-		},
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
-		.codec_mode = HANTRO_MODE_MPEG2_DEC,
-		.max_depth = 2,
-		.frmsize = {
-			.min_width = 48,
-			.max_width = 1920,
-			.step_width = MB_DIM,
-			.min_height = 48,
-			.max_height = 1088,
-			.step_height = MB_DIM,
-		},
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_VP8_FRAME,
-		.codec_mode = HANTRO_MODE_VP8_DEC,
-		.max_depth = 2,
-		.frmsize = {
-			.min_width = 48,
-			.max_width = 3840,
-			.step_width = MB_DIM,
-			.min_height = 48,
-			.max_height = 2160,
-			.step_height = MB_DIM,
-		},
-	},
-};
-
-static irqreturn_t rk3288_vepu_irq(int irq, void *dev_id)
-{
-	struct hantro_dev *vpu = dev_id;
-	enum vb2_buffer_state state;
-	u32 status;
-
-	status = vepu_read(vpu, H1_REG_INTERRUPT);
-	state = (status & H1_REG_INTERRUPT_FRAME_RDY) ?
-		VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
-
-	vepu_write(vpu, 0, H1_REG_INTERRUPT);
-	vepu_write(vpu, 0, H1_REG_AXI_CTRL);
-
-	hantro_irq_done(vpu, state);
-
-	return IRQ_HANDLED;
-}
-
-static int rk3288_vpu_hw_init(struct hantro_dev *vpu)
-{
-	/* Bump ACLK to max. possible freq. to improve performance. */
-	clk_set_rate(vpu->clocks[0].clk, RK3288_ACLK_MAX_FREQ);
-	return 0;
-}
-
-static void rk3288_vpu_enc_reset(struct hantro_ctx *ctx)
-{
-	struct hantro_dev *vpu = ctx->dev;
-
-	vepu_write(vpu, H1_REG_INTERRUPT_DIS_BIT, H1_REG_INTERRUPT);
-	vepu_write(vpu, 0, H1_REG_ENC_CTRL);
-	vepu_write(vpu, 0, H1_REG_AXI_CTRL);
-}
-
-/*
- * Supported codec ops.
- */
-
-static const struct hantro_codec_ops rk3288_vpu_codec_ops[] = {
-	[HANTRO_MODE_JPEG_ENC] = {
-		.run = hantro_h1_jpeg_enc_run,
-		.reset = rk3288_vpu_enc_reset,
-		.init = hantro_jpeg_enc_init,
-		.done = hantro_jpeg_enc_done,
-		.exit = hantro_jpeg_enc_exit,
-	},
-	[HANTRO_MODE_H264_DEC] = {
-		.run = hantro_g1_h264_dec_run,
-		.reset = hantro_g1_reset,
-		.init = hantro_h264_dec_init,
-		.exit = hantro_h264_dec_exit,
-	},
-	[HANTRO_MODE_MPEG2_DEC] = {
-		.run = hantro_g1_mpeg2_dec_run,
-		.reset = hantro_g1_reset,
-		.init = hantro_mpeg2_dec_init,
-		.exit = hantro_mpeg2_dec_exit,
-	},
-	[HANTRO_MODE_VP8_DEC] = {
-		.run = hantro_g1_vp8_dec_run,
-		.reset = hantro_g1_reset,
-		.init = hantro_vp8_dec_init,
-		.exit = hantro_vp8_dec_exit,
-	},
-};
-
-/*
- * VPU variant.
- */
-
-static const struct hantro_irq rk3288_irqs[] = {
-	{ "vepu", rk3288_vepu_irq },
-	{ "vdpu", hantro_g1_irq },
-};
-
-static const char * const rk3288_clk_names[] = {
-	"aclk", "hclk"
-};
-
-const struct hantro_variant rk3288_vpu_variant = {
-	.enc_offset = 0x0,
-	.enc_fmts = rk3288_vpu_enc_fmts,
-	.num_enc_fmts = ARRAY_SIZE(rk3288_vpu_enc_fmts),
-	.dec_offset = 0x400,
-	.dec_fmts = rk3288_vpu_dec_fmts,
-	.num_dec_fmts = ARRAY_SIZE(rk3288_vpu_dec_fmts),
-	.postproc_fmts = rk3288_vpu_postproc_fmts,
-	.num_postproc_fmts = ARRAY_SIZE(rk3288_vpu_postproc_fmts),
-	.postproc_regs = &hantro_g1_postproc_regs,
-	.codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
-		 HANTRO_VP8_DECODER | HANTRO_H264_DECODER,
-	.codec_ops = rk3288_vpu_codec_ops,
-	.irqs = rk3288_irqs,
-	.num_irqs = ARRAY_SIZE(rk3288_irqs),
-	.init = rk3288_vpu_hw_init,
-	.clk_names = rk3288_clk_names,
-	.num_clocks = ARRAY_SIZE(rk3288_clk_names)
-};
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw.c b/drivers/staging/media/hantro/rk3399_vpu_hw.c
deleted file mode 100644
index 7a7962cf771e0..0000000000000
--- a/drivers/staging/media/hantro/rk3399_vpu_hw.c
+++ /dev/null
@@ -1,222 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Hantro VPU codec driver
- *
- * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
- *	Jeffy Chen <jeffy.chen@rock-chips.com>
- */
-
-#include <linux/clk.h>
-
-#include "hantro.h"
-#include "hantro_jpeg.h"
-#include "rk3399_vpu_regs.h"
-
-#define RK3399_ACLK_MAX_FREQ (400 * 1000 * 1000)
-
-/*
- * Supported formats.
- */
-
-static const struct hantro_fmt rk3399_vpu_enc_fmts[] = {
-	{
-		.fourcc = V4L2_PIX_FMT_YUV420M,
-		.codec_mode = HANTRO_MODE_NONE,
-		.enc_fmt = RK3288_VPU_ENC_FMT_YUV420P,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_NV12M,
-		.codec_mode = HANTRO_MODE_NONE,
-		.enc_fmt = RK3288_VPU_ENC_FMT_YUV420SP,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_YUYV,
-		.codec_mode = HANTRO_MODE_NONE,
-		.enc_fmt = RK3288_VPU_ENC_FMT_YUYV422,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_UYVY,
-		.codec_mode = HANTRO_MODE_NONE,
-		.enc_fmt = RK3288_VPU_ENC_FMT_UYVY422,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_JPEG,
-		.codec_mode = HANTRO_MODE_JPEG_ENC,
-		.max_depth = 2,
-		.header_size = JPEG_HEADER_SIZE,
-		.frmsize = {
-			.min_width = 96,
-			.max_width = 8192,
-			.step_width = MB_DIM,
-			.min_height = 32,
-			.max_height = 8192,
-			.step_height = MB_DIM,
-		},
-	},
-};
-
-static const struct hantro_fmt rk3399_vpu_dec_fmts[] = {
-	{
-		.fourcc = V4L2_PIX_FMT_NV12,
-		.codec_mode = HANTRO_MODE_NONE,
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
-		.codec_mode = HANTRO_MODE_MPEG2_DEC,
-		.max_depth = 2,
-		.frmsize = {
-			.min_width = 48,
-			.max_width = 1920,
-			.step_width = MB_DIM,
-			.min_height = 48,
-			.max_height = 1088,
-			.step_height = MB_DIM,
-		},
-	},
-	{
-		.fourcc = V4L2_PIX_FMT_VP8_FRAME,
-		.codec_mode = HANTRO_MODE_VP8_DEC,
-		.max_depth = 2,
-		.frmsize = {
-			.min_width = 48,
-			.max_width = 3840,
-			.step_width = MB_DIM,
-			.min_height = 48,
-			.max_height = 2160,
-			.step_height = MB_DIM,
-		},
-	},
-};
-
-static irqreturn_t rk3399_vepu_irq(int irq, void *dev_id)
-{
-	struct hantro_dev *vpu = dev_id;
-	enum vb2_buffer_state state;
-	u32 status;
-
-	status = vepu_read(vpu, VEPU_REG_INTERRUPT);
-	state = (status & VEPU_REG_INTERRUPT_FRAME_READY) ?
-		VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
-
-	vepu_write(vpu, 0, VEPU_REG_INTERRUPT);
-	vepu_write(vpu, 0, VEPU_REG_AXI_CTRL);
-
-	hantro_irq_done(vpu, state);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t rk3399_vdpu_irq(int irq, void *dev_id)
-{
-	struct hantro_dev *vpu = dev_id;
-	enum vb2_buffer_state state;
-	u32 status;
-
-	status = vdpu_read(vpu, VDPU_REG_INTERRUPT);
-	state = (status & VDPU_REG_INTERRUPT_DEC_IRQ) ?
-		VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
-
-	vdpu_write(vpu, 0, VDPU_REG_INTERRUPT);
-	vdpu_write(vpu, 0, VDPU_REG_AXI_CTRL);
-
-	hantro_irq_done(vpu, state);
-
-	return IRQ_HANDLED;
-}
-
-static int rk3399_vpu_hw_init(struct hantro_dev *vpu)
-{
-	/* Bump ACLK to max. possible freq. to improve performance. */
-	clk_set_rate(vpu->clocks[0].clk, RK3399_ACLK_MAX_FREQ);
-	return 0;
-}
-
-static void rk3399_vpu_enc_reset(struct hantro_ctx *ctx)
-{
-	struct hantro_dev *vpu = ctx->dev;
-
-	vepu_write(vpu, VEPU_REG_INTERRUPT_DIS_BIT, VEPU_REG_INTERRUPT);
-	vepu_write(vpu, 0, VEPU_REG_ENCODE_START);
-	vepu_write(vpu, 0, VEPU_REG_AXI_CTRL);
-}
-
-static void rk3399_vpu_dec_reset(struct hantro_ctx *ctx)
-{
-	struct hantro_dev *vpu = ctx->dev;
-
-	vdpu_write(vpu, VDPU_REG_INTERRUPT_DEC_IRQ_DIS, VDPU_REG_INTERRUPT);
-	vdpu_write(vpu, 0, VDPU_REG_EN_FLAGS);
-	vdpu_write(vpu, 1, VDPU_REG_SOFT_RESET);
-}
-
-/*
- * Supported codec ops.
- */
-
-static const struct hantro_codec_ops rk3399_vpu_codec_ops[] = {
-	[HANTRO_MODE_JPEG_ENC] = {
-		.run = rk3399_vpu_jpeg_enc_run,
-		.reset = rk3399_vpu_enc_reset,
-		.init = hantro_jpeg_enc_init,
-		.exit = hantro_jpeg_enc_exit,
-	},
-	[HANTRO_MODE_MPEG2_DEC] = {
-		.run = rk3399_vpu_mpeg2_dec_run,
-		.reset = rk3399_vpu_dec_reset,
-		.init = hantro_mpeg2_dec_init,
-		.exit = hantro_mpeg2_dec_exit,
-	},
-	[HANTRO_MODE_VP8_DEC] = {
-		.run = rk3399_vpu_vp8_dec_run,
-		.reset = rk3399_vpu_dec_reset,
-		.init = hantro_vp8_dec_init,
-		.exit = hantro_vp8_dec_exit,
-	},
-};
-
-/*
- * VPU variant.
- */
-
-static const struct hantro_irq rk3399_irqs[] = {
-	{ "vepu", rk3399_vepu_irq },
-	{ "vdpu", rk3399_vdpu_irq },
-};
-
-static const char * const rk3399_clk_names[] = {
-	"aclk", "hclk"
-};
-
-const struct hantro_variant rk3399_vpu_variant = {
-	.enc_offset = 0x0,
-	.enc_fmts = rk3399_vpu_enc_fmts,
-	.num_enc_fmts = ARRAY_SIZE(rk3399_vpu_enc_fmts),
-	.dec_offset = 0x400,
-	.dec_fmts = rk3399_vpu_dec_fmts,
-	.num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts),
-	.codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
-		 HANTRO_VP8_DECODER,
-	.codec_ops = rk3399_vpu_codec_ops,
-	.irqs = rk3399_irqs,
-	.num_irqs = ARRAY_SIZE(rk3399_irqs),
-	.init = rk3399_vpu_hw_init,
-	.clk_names = rk3399_clk_names,
-	.num_clocks = ARRAY_SIZE(rk3399_clk_names)
-};
-
-static const struct hantro_irq rk3328_irqs[] = {
-	{ "vdpu", rk3399_vdpu_irq },
-};
-
-const struct hantro_variant rk3328_vpu_variant = {
-	.dec_offset = 0x400,
-	.dec_fmts = rk3399_vpu_dec_fmts,
-	.num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts),
-	.codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER,
-	.codec_ops = rk3399_vpu_codec_ops,
-	.irqs = rk3328_irqs,
-	.num_irqs = ARRAY_SIZE(rk3328_irqs),
-	.init = rk3399_vpu_hw_init,
-	.clk_names = rk3399_clk_names,
-	.num_clocks = ARRAY_SIZE(rk3399_clk_names),
-};
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c b/drivers/staging/media/hantro/rockchip_vpu2_hw_jpeg_enc.c
similarity index 87%
rename from drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c
rename to drivers/staging/media/hantro/rockchip_vpu2_hw_jpeg_enc.c
index 3a27ebef4f388..991213ce16108 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_jpeg_enc.c
+++ b/drivers/staging/media/hantro/rockchip_vpu2_hw_jpeg_enc.c
@@ -28,12 +28,12 @@
 #include "hantro.h"
 #include "hantro_v4l2.h"
 #include "hantro_hw.h"
-#include "rk3399_vpu_regs.h"
+#include "rockchip_vpu2_regs.h"
 
 #define VEPU_JPEG_QUANT_TABLE_COUNT 16
 
-static void rk3399_vpu_set_src_img_ctrl(struct hantro_dev *vpu,
-					struct hantro_ctx *ctx)
+static void rockchip_vpu2_set_src_img_ctrl(struct hantro_dev *vpu,
+					   struct hantro_ctx *ctx)
 {
 	struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
 	u32 reg;
@@ -59,9 +59,9 @@ static void rk3399_vpu_set_src_img_ctrl(struct hantro_dev *vpu,
 	vepu_write_relaxed(vpu, reg, VEPU_REG_ENC_CTRL1);
 }
 
-static void rk3399_vpu_jpeg_enc_set_buffers(struct hantro_dev *vpu,
-					    struct hantro_ctx *ctx,
-					    struct vb2_buffer *src_buf)
+static void rockchip_vpu2_jpeg_enc_set_buffers(struct hantro_dev *vpu,
+					       struct hantro_ctx *ctx,
+					       struct vb2_buffer *src_buf)
 {
 	struct v4l2_pix_format_mplane *pix_fmt = &ctx->src_fmt;
 	dma_addr_t src[3];
@@ -92,9 +92,9 @@ static void rk3399_vpu_jpeg_enc_set_buffers(struct hantro_dev *vpu,
 }
 
 static void
-rk3399_vpu_jpeg_enc_set_qtable(struct hantro_dev *vpu,
-			       unsigned char *luma_qtable,
-			       unsigned char *chroma_qtable)
+rockchip_vpu2_jpeg_enc_set_qtable(struct hantro_dev *vpu,
+				  unsigned char *luma_qtable,
+				  unsigned char *chroma_qtable)
 {
 	u32 reg, i;
 	__be32 *luma_qtable_p;
@@ -118,7 +118,7 @@ rk3399_vpu_jpeg_enc_set_qtable(struct hantro_dev *vpu,
 	}
 }
 
-int rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx)
+int rockchip_vpu2_jpeg_enc_run(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
@@ -141,11 +141,11 @@ int rk3399_vpu_jpeg_enc_run(struct hantro_ctx *ctx)
 	vepu_write_relaxed(vpu, VEPU_REG_ENCODE_FORMAT_JPEG,
 			   VEPU_REG_ENCODE_START);
 
-	rk3399_vpu_set_src_img_ctrl(vpu, ctx);
-	rk3399_vpu_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf);
-	rk3399_vpu_jpeg_enc_set_qtable(vpu,
-				       hantro_jpeg_get_qtable(0),
-				       hantro_jpeg_get_qtable(1));
+	rockchip_vpu2_set_src_img_ctrl(vpu, ctx);
+	rockchip_vpu2_jpeg_enc_set_buffers(vpu, ctx, &src_buf->vb2_buf);
+	rockchip_vpu2_jpeg_enc_set_qtable(vpu,
+					  hantro_jpeg_get_qtable(0),
+					  hantro_jpeg_get_qtable(1));
 
 	reg = VEPU_REG_OUTPUT_SWAP32
 		| VEPU_REG_OUTPUT_SWAP16
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c b/drivers/staging/media/hantro/rockchip_vpu2_hw_mpeg2_dec.c
similarity index 94%
rename from drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
rename to drivers/staging/media/hantro/rockchip_vpu2_hw_mpeg2_dec.c
index 683982c24c2dd..b66737fab46b4 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_mpeg2_dec.c
+++ b/drivers/staging/media/hantro/rockchip_vpu2_hw_mpeg2_dec.c
@@ -80,8 +80,8 @@
 #define VDPU_REG_MV_ACCURACY_BWD(v)	((v) ? BIT(1) : 0)
 
 static void
-rk3399_vpu_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
-				      struct hantro_ctx *ctx)
+rockchip_vpu2_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
+					 struct hantro_ctx *ctx)
 {
 	struct v4l2_ctrl_mpeg2_quantisation *q;
 
@@ -91,12 +91,12 @@ rk3399_vpu_mpeg2_dec_set_quantisation(struct hantro_dev *vpu,
 }
 
 static void
-rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
-				 struct hantro_ctx *ctx,
-				 struct vb2_buffer *src_buf,
-				 struct vb2_buffer *dst_buf,
-				 const struct v4l2_ctrl_mpeg2_sequence *seq,
-				 const struct v4l2_ctrl_mpeg2_picture *pic)
+rockchip_vpu2_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
+				    struct hantro_ctx *ctx,
+				    struct vb2_buffer *src_buf,
+				    struct vb2_buffer *dst_buf,
+				    const struct v4l2_ctrl_mpeg2_sequence *seq,
+				    const struct v4l2_ctrl_mpeg2_picture *pic)
 {
 	dma_addr_t forward_addr = 0, backward_addr = 0;
 	dma_addr_t current_addr, addr;
@@ -148,7 +148,7 @@ rk3399_vpu_mpeg2_dec_set_buffers(struct hantro_dev *vpu,
 	vdpu_write_relaxed(vpu, backward_addr, VDPU_REG_REFER3_BASE);
 }
 
-int rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
+int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
@@ -233,11 +233,10 @@ int rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx)
 	      VDPU_REG_MV_ACCURACY_BWD(1);
 	vdpu_write_relaxed(vpu, reg, VDPU_SWREG(136));
 
-	rk3399_vpu_mpeg2_dec_set_quantisation(vpu, ctx);
+	rockchip_vpu2_mpeg2_dec_set_quantisation(vpu, ctx);
 
-	rk3399_vpu_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
-					 &dst_buf->vb2_buf,
-					 seq, pic);
+	rockchip_vpu2_mpeg2_dec_set_buffers(vpu, ctx, &src_buf->vb2_buf,
+					    &dst_buf->vb2_buf, seq, pic);
 
 	/* Kick the watchdog and start decoding */
 	hantro_end_prepare_run(ctx);
diff --git a/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c b/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c
similarity index 99%
rename from drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c
rename to drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c
index e5d20fe5b0070..951b55f58a612 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_hw_vp8_dec.c
+++ b/drivers/staging/media/hantro/rockchip_vpu2_hw_vp8_dec.c
@@ -503,7 +503,7 @@ static void cfg_buffers(struct hantro_ctx *ctx,
 	vdpu_write_relaxed(vpu, dst_dma, VDPU_REG_ADDR_DST);
 }
 
-int rk3399_vpu_vp8_dec_run(struct hantro_ctx *ctx)
+int rockchip_vpu2_vp8_dec_run(struct hantro_ctx *ctx)
 {
 	const struct v4l2_ctrl_vp8_frame *hdr;
 	struct hantro_dev *vpu = ctx->dev;
diff --git a/drivers/staging/media/hantro/rk3399_vpu_regs.h b/drivers/staging/media/hantro/rockchip_vpu2_regs.h
similarity index 99%
rename from drivers/staging/media/hantro/rk3399_vpu_regs.h
rename to drivers/staging/media/hantro/rockchip_vpu2_regs.h
index 88d096920f307..49e40889545b4 100644
--- a/drivers/staging/media/hantro/rk3399_vpu_regs.h
+++ b/drivers/staging/media/hantro/rockchip_vpu2_regs.h
@@ -6,8 +6,8 @@
  *	Alpha Lin <alpha.lin@rock-chips.com>
  */
 
-#ifndef RK3399_VPU_REGS_H_
-#define RK3399_VPU_REGS_H_
+#ifndef ROCKCHIP_VPU2_REGS_H_
+#define ROCKCHIP_VPU2_REGS_H_
 
 /* Encoder registers. */
 #define VEPU_REG_VP8_QUT_1ST(i)			(0x000 + ((i) * 0x24))
@@ -597,4 +597,4 @@
 #define     VDPU_REG_PRED_FLT_PRED_BC_TAP_4_3(x)	(((x) & 0x3ff) << 12)
 #define     VDPU_REG_PRED_FLT_PRED_BC_TAP_5_0(x)	(((x) & 0x3ff) << 2)
 
-#endif /* RK3399_VPU_REGS_H_ */
+#endif /* ROCKCHIP_VPU2_REGS_H_ */
diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c
new file mode 100644
index 0000000000000..bf760e8e65ceb
--- /dev/null
+++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hantro VPU codec driver
+ *
+ * Copyright (C) 2018 Rockchip Electronics Co., Ltd.
+ *	Jeffy Chen <jeffy.chen@rock-chips.com>
+ */
+
+#include <linux/clk.h>
+
+#include "hantro.h"
+#include "hantro_jpeg.h"
+#include "hantro_h1_regs.h"
+#include "rockchip_vpu2_regs.h"
+
+#define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000)
+
+/*
+ * Supported formats.
+ */
+
+static const struct hantro_fmt rockchip_vpu_enc_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_YUV420M,
+		.codec_mode = HANTRO_MODE_NONE,
+		.enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUV420P,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_NV12M,
+		.codec_mode = HANTRO_MODE_NONE,
+		.enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUV420SP,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_YUYV,
+		.codec_mode = HANTRO_MODE_NONE,
+		.enc_fmt = ROCKCHIP_VPU_ENC_FMT_YUYV422,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_UYVY,
+		.codec_mode = HANTRO_MODE_NONE,
+		.enc_fmt = ROCKCHIP_VPU_ENC_FMT_UYVY422,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_JPEG,
+		.codec_mode = HANTRO_MODE_JPEG_ENC,
+		.max_depth = 2,
+		.header_size = JPEG_HEADER_SIZE,
+		.frmsize = {
+			.min_width = 96,
+			.max_width = 8192,
+			.step_width = MB_DIM,
+			.min_height = 32,
+			.max_height = 8192,
+			.step_height = MB_DIM,
+		},
+	},
+};
+
+static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_YUYV,
+		.codec_mode = HANTRO_MODE_NONE,
+	},
+};
+
+static const struct hantro_fmt rk3288_vpu_dec_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.codec_mode = HANTRO_MODE_NONE,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_H264_SLICE,
+		.codec_mode = HANTRO_MODE_H264_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 4096,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 2304,
+			.step_height = MB_DIM,
+		},
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+		.codec_mode = HANTRO_MODE_MPEG2_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 1920,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 1088,
+			.step_height = MB_DIM,
+		},
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_VP8_FRAME,
+		.codec_mode = HANTRO_MODE_VP8_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 3840,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 2160,
+			.step_height = MB_DIM,
+		},
+	},
+};
+
+static const struct hantro_fmt rk3399_vpu_dec_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.codec_mode = HANTRO_MODE_NONE,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+		.codec_mode = HANTRO_MODE_MPEG2_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 1920,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 1088,
+			.step_height = MB_DIM,
+		},
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_VP8_FRAME,
+		.codec_mode = HANTRO_MODE_VP8_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 3840,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 2160,
+			.step_height = MB_DIM,
+		},
+	},
+};
+
+static irqreturn_t rockchip_vpu1_vepu_irq(int irq, void *dev_id)
+{
+	struct hantro_dev *vpu = dev_id;
+	enum vb2_buffer_state state;
+	u32 status;
+
+	status = vepu_read(vpu, H1_REG_INTERRUPT);
+	state = (status & H1_REG_INTERRUPT_FRAME_RDY) ?
+		VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+	vepu_write(vpu, 0, H1_REG_INTERRUPT);
+	vepu_write(vpu, 0, H1_REG_AXI_CTRL);
+
+	hantro_irq_done(vpu, state);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t rockchip_vpu2_vdpu_irq(int irq, void *dev_id)
+{
+	struct hantro_dev *vpu = dev_id;
+	enum vb2_buffer_state state;
+	u32 status;
+
+	status = vdpu_read(vpu, VDPU_REG_INTERRUPT);
+	state = (status & VDPU_REG_INTERRUPT_DEC_IRQ) ?
+		VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+	vdpu_write(vpu, 0, VDPU_REG_INTERRUPT);
+	vdpu_write(vpu, 0, VDPU_REG_AXI_CTRL);
+
+	hantro_irq_done(vpu, state);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t rockchip_vpu2_vepu_irq(int irq, void *dev_id)
+{
+	struct hantro_dev *vpu = dev_id;
+	enum vb2_buffer_state state;
+	u32 status;
+
+	status = vepu_read(vpu, VEPU_REG_INTERRUPT);
+	state = (status & VEPU_REG_INTERRUPT_FRAME_READY) ?
+		VB2_BUF_STATE_DONE : VB2_BUF_STATE_ERROR;
+
+	vepu_write(vpu, 0, VEPU_REG_INTERRUPT);
+	vepu_write(vpu, 0, VEPU_REG_AXI_CTRL);
+
+	hantro_irq_done(vpu, state);
+
+	return IRQ_HANDLED;
+}
+
+static int rockchip_vpu_hw_init(struct hantro_dev *vpu)
+{
+	/* Bump ACLK to max. possible freq. to improve performance. */
+	clk_set_rate(vpu->clocks[0].clk, RK3288_ACLK_MAX_FREQ);
+	return 0;
+}
+
+static void rockchip_vpu1_enc_reset(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	vepu_write(vpu, H1_REG_INTERRUPT_DIS_BIT, H1_REG_INTERRUPT);
+	vepu_write(vpu, 0, H1_REG_ENC_CTRL);
+	vepu_write(vpu, 0, H1_REG_AXI_CTRL);
+}
+
+static void rockchip_vpu2_dec_reset(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	vdpu_write(vpu, VDPU_REG_INTERRUPT_DEC_IRQ_DIS, VDPU_REG_INTERRUPT);
+	vdpu_write(vpu, 0, VDPU_REG_EN_FLAGS);
+	vdpu_write(vpu, 1, VDPU_REG_SOFT_RESET);
+}
+
+static void rockchip_vpu2_enc_reset(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	vepu_write(vpu, VEPU_REG_INTERRUPT_DIS_BIT, VEPU_REG_INTERRUPT);
+	vepu_write(vpu, 0, VEPU_REG_ENCODE_START);
+	vepu_write(vpu, 0, VEPU_REG_AXI_CTRL);
+}
+
+/*
+ * Supported codec ops.
+ */
+
+static const struct hantro_codec_ops rk3288_vpu_codec_ops[] = {
+	[HANTRO_MODE_JPEG_ENC] = {
+		.run = hantro_h1_jpeg_enc_run,
+		.reset = rockchip_vpu1_enc_reset,
+		.init = hantro_jpeg_enc_init,
+		.done = hantro_jpeg_enc_done,
+		.exit = hantro_jpeg_enc_exit,
+	},
+	[HANTRO_MODE_H264_DEC] = {
+		.run = hantro_g1_h264_dec_run,
+		.reset = hantro_g1_reset,
+		.init = hantro_h264_dec_init,
+		.exit = hantro_h264_dec_exit,
+	},
+	[HANTRO_MODE_MPEG2_DEC] = {
+		.run = hantro_g1_mpeg2_dec_run,
+		.reset = hantro_g1_reset,
+		.init = hantro_mpeg2_dec_init,
+		.exit = hantro_mpeg2_dec_exit,
+	},
+	[HANTRO_MODE_VP8_DEC] = {
+		.run = hantro_g1_vp8_dec_run,
+		.reset = hantro_g1_reset,
+		.init = hantro_vp8_dec_init,
+		.exit = hantro_vp8_dec_exit,
+	},
+};
+
+static const struct hantro_codec_ops rk3399_vpu_codec_ops[] = {
+	[HANTRO_MODE_JPEG_ENC] = {
+		.run = rockchip_vpu2_jpeg_enc_run,
+		.reset = rockchip_vpu2_enc_reset,
+		.init = hantro_jpeg_enc_init,
+		.exit = hantro_jpeg_enc_exit,
+	},
+	[HANTRO_MODE_MPEG2_DEC] = {
+		.run = rockchip_vpu2_mpeg2_dec_run,
+		.reset = rockchip_vpu2_dec_reset,
+		.init = hantro_mpeg2_dec_init,
+		.exit = hantro_mpeg2_dec_exit,
+	},
+	[HANTRO_MODE_VP8_DEC] = {
+		.run = rockchip_vpu2_vp8_dec_run,
+		.reset = rockchip_vpu2_dec_reset,
+		.init = hantro_vp8_dec_init,
+		.exit = hantro_vp8_dec_exit,
+	},
+};
+
+/*
+ * VPU variant.
+ */
+
+static const struct hantro_irq rockchip_vpu1_irqs[] = {
+	{ "vepu", rockchip_vpu1_vepu_irq },
+	{ "vdpu", hantro_g1_irq },
+};
+
+static const struct hantro_irq rockchip_vdpu2_irqs[] = {
+	{ "vdpu", rockchip_vpu2_vdpu_irq },
+};
+
+static const struct hantro_irq rockchip_vpu2_irqs[] = {
+	{ "vepu", rockchip_vpu2_vepu_irq },
+	{ "vdpu", rockchip_vpu2_vdpu_irq },
+};
+
+static const char * const rockchip_vpu_clk_names[] = {
+	"aclk", "hclk"
+};
+
+const struct hantro_variant rk3288_vpu_variant = {
+	.enc_offset = 0x0,
+	.enc_fmts = rockchip_vpu_enc_fmts,
+	.num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts),
+	.dec_offset = 0x400,
+	.dec_fmts = rk3288_vpu_dec_fmts,
+	.num_dec_fmts = ARRAY_SIZE(rk3288_vpu_dec_fmts),
+	.postproc_fmts = rockchip_vpu1_postproc_fmts,
+	.num_postproc_fmts = ARRAY_SIZE(rockchip_vpu1_postproc_fmts),
+	.postproc_regs = &hantro_g1_postproc_regs,
+	.codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
+		 HANTRO_VP8_DECODER | HANTRO_H264_DECODER,
+	.codec_ops = rk3288_vpu_codec_ops,
+	.irqs = rockchip_vpu1_irqs,
+	.num_irqs = ARRAY_SIZE(rockchip_vpu1_irqs),
+	.init = rockchip_vpu_hw_init,
+	.clk_names = rockchip_vpu_clk_names,
+	.num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
+};
+
+const struct hantro_variant rk3328_vpu_variant = {
+	.dec_offset = 0x400,
+	.dec_fmts = rk3399_vpu_dec_fmts,
+	.num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts),
+	.codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER,
+	.codec_ops = rk3399_vpu_codec_ops,
+	.irqs = rockchip_vdpu2_irqs,
+	.num_irqs = ARRAY_SIZE(rockchip_vdpu2_irqs),
+	.init = rockchip_vpu_hw_init,
+	.clk_names = rockchip_vpu_clk_names,
+	.num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names),
+};
+
+const struct hantro_variant rk3399_vpu_variant = {
+	.enc_offset = 0x0,
+	.enc_fmts = rockchip_vpu_enc_fmts,
+	.num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts),
+	.dec_offset = 0x400,
+	.dec_fmts = rk3399_vpu_dec_fmts,
+	.num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts),
+	.codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
+		 HANTRO_VP8_DECODER,
+	.codec_ops = rk3399_vpu_codec_ops,
+	.irqs = rockchip_vpu2_irqs,
+	.num_irqs = ARRAY_SIZE(rockchip_vpu2_irqs),
+	.init = rockchip_vpu_hw_init,
+	.clk_names = rockchip_vpu_clk_names,
+	.num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
+};
-- 
GitLab


From 78bb1ae5472cabfaf474d348437c25ccaddde75e Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Mon, 14 Jun 2021 23:32:14 +0200
Subject: [PATCH 3046/3804] media: hantro: add support for Rockchip RK3066

RK3066's VPU IP block is the predecessor from what RK3288 has.
The hardware differences are:
  - supports decoding frame sizes up to 1920x1088 only
  - doesn't have the 'G1_REG_SOFT_RESET' register
    (requires another .reset callback for hantro_codec_ops,
     since writing this register will result in non-working
     IP block)
  - has one ACLK/HCLK per vdpu/vepu
  - ACLKs can be clocked up to 300 MHz only
  - no MMU
    (no changes required: CMA will be transparently used)

Add a new RK3066 variant which reflect this differences. This variant
can be used for RK3188 as well.

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_drv.c     |   1 +
 drivers/staging/media/hantro/hantro_hw.h      |   1 +
 .../staging/media/hantro/rockchip_vpu_hw.c    | 121 ++++++++++++++++++
 3 files changed, 123 insertions(+)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index 34e778e1cea11..aaef66c4c795d 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -582,6 +582,7 @@ static const struct v4l2_file_operations hantro_fops = {
 
 static const struct of_device_id of_hantro_match[] = {
 #ifdef CONFIG_VIDEO_HANTRO_ROCKCHIP
+	{ .compatible = "rockchip,rk3066-vpu", .data = &rk3066_vpu_variant, },
 	{ .compatible = "rockchip,rk3288-vpu", .data = &rk3288_vpu_variant, },
 	{ .compatible = "rockchip,rk3328-vpu", .data = &rk3328_vpu_variant, },
 	{ .compatible = "rockchip,rk3399-vpu", .data = &rk3399_vpu_variant, },
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index a7b75b05e8493..77df0eba4e6f2 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -205,6 +205,7 @@ enum hantro_enc_fmt {
 
 extern const struct hantro_variant imx8mq_vpu_g2_variant;
 extern const struct hantro_variant imx8mq_vpu_variant;
+extern const struct hantro_variant rk3066_vpu_variant;
 extern const struct hantro_variant rk3288_vpu_variant;
 extern const struct hantro_variant rk3328_vpu_variant;
 extern const struct hantro_variant rk3399_vpu_variant;
diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c
index bf760e8e65ceb..b370b5e802fa6 100644
--- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
+++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
@@ -10,9 +10,11 @@
 
 #include "hantro.h"
 #include "hantro_jpeg.h"
+#include "hantro_g1_regs.h"
 #include "hantro_h1_regs.h"
 #include "rockchip_vpu2_regs.h"
 
+#define RK3066_ACLK_MAX_FREQ (300 * 1000 * 1000)
 #define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000)
 
 /*
@@ -63,6 +65,52 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = {
 	},
 };
 
+static const struct hantro_fmt rk3066_vpu_dec_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.codec_mode = HANTRO_MODE_NONE,
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_H264_SLICE,
+		.codec_mode = HANTRO_MODE_H264_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 1920,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 1088,
+			.step_height = MB_DIM,
+		},
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_MPEG2_SLICE,
+		.codec_mode = HANTRO_MODE_MPEG2_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 1920,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 1088,
+			.step_height = MB_DIM,
+		},
+	},
+	{
+		.fourcc = V4L2_PIX_FMT_VP8_FRAME,
+		.codec_mode = HANTRO_MODE_VP8_DEC,
+		.max_depth = 2,
+		.frmsize = {
+			.min_width = 48,
+			.max_width = 1920,
+			.step_width = MB_DIM,
+			.min_height = 48,
+			.max_height = 1088,
+			.step_height = MB_DIM,
+		},
+	},
+};
+
 static const struct hantro_fmt rk3288_vpu_dec_fmts[] = {
 	{
 		.fourcc = V4L2_PIX_FMT_NV12,
@@ -196,6 +244,14 @@ static irqreturn_t rockchip_vpu2_vepu_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int rk3066_vpu_hw_init(struct hantro_dev *vpu)
+{
+	/* Bump ACLKs to max. possible freq. to improve performance. */
+	clk_set_rate(vpu->clocks[0].clk, RK3066_ACLK_MAX_FREQ);
+	clk_set_rate(vpu->clocks[2].clk, RK3066_ACLK_MAX_FREQ);
+	return 0;
+}
+
 static int rockchip_vpu_hw_init(struct hantro_dev *vpu)
 {
 	/* Bump ACLK to max. possible freq. to improve performance. */
@@ -203,6 +259,14 @@ static int rockchip_vpu_hw_init(struct hantro_dev *vpu)
 	return 0;
 }
 
+static void rk3066_vpu_dec_reset(struct hantro_ctx *ctx)
+{
+	struct hantro_dev *vpu = ctx->dev;
+
+	vdpu_write(vpu, G1_REG_INTERRUPT_DEC_IRQ_DIS, G1_REG_INTERRUPT);
+	vdpu_write(vpu, G1_REG_CONFIG_DEC_CLK_GATE_E, G1_REG_CONFIG);
+}
+
 static void rockchip_vpu1_enc_reset(struct hantro_ctx *ctx)
 {
 	struct hantro_dev *vpu = ctx->dev;
@@ -233,6 +297,33 @@ static void rockchip_vpu2_enc_reset(struct hantro_ctx *ctx)
 /*
  * Supported codec ops.
  */
+static const struct hantro_codec_ops rk3066_vpu_codec_ops[] = {
+	[HANTRO_MODE_JPEG_ENC] = {
+		.run = hantro_h1_jpeg_enc_run,
+		.reset = rockchip_vpu1_enc_reset,
+		.init = hantro_jpeg_enc_init,
+		.done = hantro_jpeg_enc_done,
+		.exit = hantro_jpeg_enc_exit,
+	},
+	[HANTRO_MODE_H264_DEC] = {
+		.run = hantro_g1_h264_dec_run,
+		.reset = rk3066_vpu_dec_reset,
+		.init = hantro_h264_dec_init,
+		.exit = hantro_h264_dec_exit,
+	},
+	[HANTRO_MODE_MPEG2_DEC] = {
+		.run = hantro_g1_mpeg2_dec_run,
+		.reset = rk3066_vpu_dec_reset,
+		.init = hantro_mpeg2_dec_init,
+		.exit = hantro_mpeg2_dec_exit,
+	},
+	[HANTRO_MODE_VP8_DEC] = {
+		.run = hantro_g1_vp8_dec_run,
+		.reset = rk3066_vpu_dec_reset,
+		.init = hantro_vp8_dec_init,
+		.exit = hantro_vp8_dec_exit,
+	},
+};
 
 static const struct hantro_codec_ops rk3288_vpu_codec_ops[] = {
 	[HANTRO_MODE_JPEG_ENC] = {
@@ -301,10 +392,40 @@ static const struct hantro_irq rockchip_vpu2_irqs[] = {
 	{ "vdpu", rockchip_vpu2_vdpu_irq },
 };
 
+static const char * const rk3066_vpu_clk_names[] = {
+	"aclk_vdpu", "hclk_vdpu",
+	"aclk_vepu", "hclk_vepu"
+};
+
 static const char * const rockchip_vpu_clk_names[] = {
 	"aclk", "hclk"
 };
 
+/*
+ * Despite this variant has separate clocks for decoder and encoder,
+ * it's still required to enable all four of them for either decoding
+ * or encoding and we can't split it in separate g1/h1 variants.
+ */
+const struct hantro_variant rk3066_vpu_variant = {
+	.enc_offset = 0x0,
+	.enc_fmts = rockchip_vpu_enc_fmts,
+	.num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts),
+	.dec_offset = 0x400,
+	.dec_fmts = rk3066_vpu_dec_fmts,
+	.num_dec_fmts = ARRAY_SIZE(rk3066_vpu_dec_fmts),
+	.postproc_fmts = rockchip_vpu1_postproc_fmts,
+	.num_postproc_fmts = ARRAY_SIZE(rockchip_vpu1_postproc_fmts),
+	.postproc_regs = &hantro_g1_postproc_regs,
+	.codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER |
+		 HANTRO_VP8_DECODER | HANTRO_H264_DECODER,
+	.codec_ops = rk3066_vpu_codec_ops,
+	.irqs = rockchip_vpu1_irqs,
+	.num_irqs = ARRAY_SIZE(rockchip_vpu1_irqs),
+	.init = rk3066_vpu_hw_init,
+	.clk_names = rk3066_vpu_clk_names,
+	.num_clocks = ARRAY_SIZE(rk3066_vpu_clk_names)
+};
+
 const struct hantro_variant rk3288_vpu_variant = {
 	.enc_offset = 0x0,
 	.enc_fmts = rockchip_vpu_enc_fmts,
-- 
GitLab


From 4f34591568e7a1e4a9d0839b4c8d3155f3047f72 Mon Sep 17 00:00:00 2001
From: Alex Bee <knaerzche@gmail.com>
Date: Mon, 14 Jun 2021 23:32:15 +0200
Subject: [PATCH 3047/3804] media: hantro: add support for Rockchip RK3036

RK3036's VPU IP block is the same as RK3288 has, except that it doesn't
have an encoder, decoding is supported up to 1920x1088 only and the axi
clock can be set to 300 MHz max.

Add a new RK3036 variant which reflects these differences.

Signed-off-by: Alex Bee <knaerzche@gmail.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/staging/media/hantro/hantro_drv.c     |  1 +
 drivers/staging/media/hantro/hantro_hw.h      |  1 +
 .../staging/media/hantro/rockchip_vpu_hw.c    | 49 +++++++++++++++++++
 3 files changed, 51 insertions(+)

diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c
index aaef66c4c795d..31d8449ca1d2d 100644
--- a/drivers/staging/media/hantro/hantro_drv.c
+++ b/drivers/staging/media/hantro/hantro_drv.c
@@ -582,6 +582,7 @@ static const struct v4l2_file_operations hantro_fops = {
 
 static const struct of_device_id of_hantro_match[] = {
 #ifdef CONFIG_VIDEO_HANTRO_ROCKCHIP
+	{ .compatible = "rockchip,rk3036-vpu", .data = &rk3036_vpu_variant, },
 	{ .compatible = "rockchip,rk3066-vpu", .data = &rk3066_vpu_variant, },
 	{ .compatible = "rockchip,rk3288-vpu", .data = &rk3288_vpu_variant, },
 	{ .compatible = "rockchip,rk3328-vpu", .data = &rk3328_vpu_variant, },
diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h
index 77df0eba4e6f2..5dcf65805396b 100644
--- a/drivers/staging/media/hantro/hantro_hw.h
+++ b/drivers/staging/media/hantro/hantro_hw.h
@@ -205,6 +205,7 @@ enum hantro_enc_fmt {
 
 extern const struct hantro_variant imx8mq_vpu_g2_variant;
 extern const struct hantro_variant imx8mq_vpu_variant;
+extern const struct hantro_variant rk3036_vpu_variant;
 extern const struct hantro_variant rk3066_vpu_variant;
 extern const struct hantro_variant rk3288_vpu_variant;
 extern const struct hantro_variant rk3328_vpu_variant;
diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c
index b370b5e802fa6..3ccc16413f42f 100644
--- a/drivers/staging/media/hantro/rockchip_vpu_hw.c
+++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c
@@ -244,6 +244,13 @@ static irqreturn_t rockchip_vpu2_vepu_irq(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int rk3036_vpu_hw_init(struct hantro_dev *vpu)
+{
+	/* Bump ACLK to max. possible freq. to improve performance. */
+	clk_set_rate(vpu->clocks[0].clk, RK3066_ACLK_MAX_FREQ);
+	return 0;
+}
+
 static int rk3066_vpu_hw_init(struct hantro_dev *vpu)
 {
 	/* Bump ACLKs to max. possible freq. to improve performance. */
@@ -297,6 +304,27 @@ static void rockchip_vpu2_enc_reset(struct hantro_ctx *ctx)
 /*
  * Supported codec ops.
  */
+static const struct hantro_codec_ops rk3036_vpu_codec_ops[] = {
+	[HANTRO_MODE_H264_DEC] = {
+		.run = hantro_g1_h264_dec_run,
+		.reset = hantro_g1_reset,
+		.init = hantro_h264_dec_init,
+		.exit = hantro_h264_dec_exit,
+	},
+	[HANTRO_MODE_MPEG2_DEC] = {
+		.run = hantro_g1_mpeg2_dec_run,
+		.reset = hantro_g1_reset,
+		.init = hantro_mpeg2_dec_init,
+		.exit = hantro_mpeg2_dec_exit,
+	},
+	[HANTRO_MODE_VP8_DEC] = {
+		.run = hantro_g1_vp8_dec_run,
+		.reset = hantro_g1_reset,
+		.init = hantro_vp8_dec_init,
+		.exit = hantro_vp8_dec_exit,
+	},
+};
+
 static const struct hantro_codec_ops rk3066_vpu_codec_ops[] = {
 	[HANTRO_MODE_JPEG_ENC] = {
 		.run = hantro_h1_jpeg_enc_run,
@@ -378,6 +406,10 @@ static const struct hantro_codec_ops rk3399_vpu_codec_ops[] = {
  * VPU variant.
  */
 
+static const struct hantro_irq rockchip_vdpu1_irqs[] = {
+	{ "vdpu", hantro_g1_irq },
+};
+
 static const struct hantro_irq rockchip_vpu1_irqs[] = {
 	{ "vepu", rockchip_vpu1_vepu_irq },
 	{ "vdpu", hantro_g1_irq },
@@ -401,6 +433,23 @@ static const char * const rockchip_vpu_clk_names[] = {
 	"aclk", "hclk"
 };
 
+const struct hantro_variant rk3036_vpu_variant = {
+	.dec_offset = 0x400,
+	.dec_fmts = rk3066_vpu_dec_fmts,
+	.num_dec_fmts = ARRAY_SIZE(rk3066_vpu_dec_fmts),
+	.postproc_fmts = rockchip_vpu1_postproc_fmts,
+	.num_postproc_fmts = ARRAY_SIZE(rockchip_vpu1_postproc_fmts),
+	.postproc_regs = &hantro_g1_postproc_regs,
+	.codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER |
+		 HANTRO_H264_DECODER,
+	.codec_ops = rk3036_vpu_codec_ops,
+	.irqs = rockchip_vdpu1_irqs,
+	.num_irqs = ARRAY_SIZE(rockchip_vdpu1_irqs),
+	.init = rk3036_vpu_hw_init,
+	.clk_names = rockchip_vpu_clk_names,
+	.num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names)
+};
+
 /*
  * Despite this variant has separate clocks for decoder and encoder,
  * it's still required to enable all four of them for either decoding
-- 
GitLab


From f9f28e5bd0baee9708c9011897196f06ae3a2733 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Thu, 17 Jun 2021 13:56:18 +0900
Subject: [PATCH 3048/3804] btrfs: zoned: fix negative
 space_info->bytes_readonly

Consider we have a using block group on zoned btrfs.

|<- ZU ->|<- used ->|<---free--->|
                     `- Alloc offset
ZU: Zone unusable

Marking the block group read-only will migrate the zone unusable bytes
to the read-only bytes. So, we will have this.

|<- RO ->|<- used ->|<--- RO --->|

RO: Read only

When marking it back to read-write, btrfs_dec_block_group_ro()
subtracts the above "RO" bytes from the
space_info->bytes_readonly. And, it moves the zone unusable bytes back
and again subtracts those bytes from the space_info->bytes_readonly,
leading to negative bytes_readonly.

This can be observed in the output as eg.:

  Data, single: total=512.00MiB, used=165.21MiB, zone_unusable=16.00EiB
  Data, single: total=536870912, used=173256704, zone_unusable=18446744073603186688

This commit fixes the issue by reordering the operations.

Link: https://github.com/naota/linux/issues/37
Reported-by: David Sterba <dsterba@suse.com>
Fixes: 169e0da91a21 ("btrfs: zoned: track unusable bytes for zones")
CC: stable@vger.kernel.org # 5.12+
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index aa57bdc8fc89d..6d5c4e45cfef0 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2442,16 +2442,16 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
 	spin_lock(&sinfo->lock);
 	spin_lock(&cache->lock);
 	if (!--cache->ro) {
-		num_bytes = cache->length - cache->reserved -
-			    cache->pinned - cache->bytes_super -
-			    cache->zone_unusable - cache->used;
-		sinfo->bytes_readonly -= num_bytes;
 		if (btrfs_is_zoned(cache->fs_info)) {
 			/* Migrate zone_unusable bytes back */
 			cache->zone_unusable = cache->alloc_offset - cache->used;
 			sinfo->bytes_zone_unusable += cache->zone_unusable;
 			sinfo->bytes_readonly -= cache->zone_unusable;
 		}
+		num_bytes = cache->length - cache->reserved -
+			    cache->pinned - cache->bytes_super -
+			    cache->zone_unusable - cache->used;
+		sinfo->bytes_readonly -= num_bytes;
 		list_del_init(&cache->ro_list);
 	}
 	spin_unlock(&cache->lock);
-- 
GitLab


From 3de09c7ae70d544b13b4da74fa3ebd4c25eb9ab9 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:02 +0200
Subject: [PATCH 3049/3804] media: i2c: max9286: Adjust parameters indent

The parameters to max9286_i2c_mux_configure() fits on the previous
line. Adjust it.

Cosmetic change only.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/max9286.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c
index 4631bfeeacc0f..e3c23916b926d 100644
--- a/drivers/media/i2c/max9286.c
+++ b/drivers/media/i2c/max9286.c
@@ -287,9 +287,8 @@ static int max9286_i2c_mux_select(struct i2c_mux_core *muxc, u32 chan)
 
 	priv->mux_channel = chan;
 
-	max9286_i2c_mux_configure(priv,
-				  MAX9286_FWDCCEN(chan) |
-				  MAX9286_REVCCEN(chan));
+	max9286_i2c_mux_configure(priv, MAX9286_FWDCCEN(chan) |
+					MAX9286_REVCCEN(chan));
 
 	return 0;
 }
-- 
GitLab


From f78723eb627554213048918caa02a42cae66884e Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:03 +0200
Subject: [PATCH 3050/3804] media: i2c: max9286: Rename reverse_channel_mv

Rename the reverse_channel_mv variable to init_rev_chan_mv as
the next patch will cache the reverse channel amplitude in
a new driver variable.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/max9286.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c
index e3c23916b926d..22021e90e3228 100644
--- a/drivers/media/i2c/max9286.c
+++ b/drivers/media/i2c/max9286.c
@@ -163,7 +163,8 @@ struct max9286_priv {
 	unsigned int mux_channel;
 	bool mux_open;
 
-	u32 reverse_channel_mv;
+	/* The initial reverse control channel amplitude. */
+	u32 init_rev_chan_mv;
 
 	struct v4l2_ctrl_handler ctrls;
 	struct v4l2_ctrl *pixelrate;
@@ -563,7 +564,7 @@ static int max9286_notify_bound(struct v4l2_async_notifier *notifier,
 	 * - Disable auto-ack as communication on the control channel are now
 	 *   stable.
 	 */
-	if (priv->reverse_channel_mv < 170)
+	if (priv->init_rev_chan_mv < 170)
 		max9286_reverse_channel_setup(priv, 170);
 	max9286_check_config_link(priv, priv->source_mask);
 
@@ -972,7 +973,7 @@ static int max9286_setup(struct max9286_priv *priv)
 	 * only. This should be disabled after the mux is initialised.
 	 */
 	max9286_configure_i2c(priv, true);
-	max9286_reverse_channel_setup(priv, priv->reverse_channel_mv);
+	max9286_reverse_channel_setup(priv, priv->init_rev_chan_mv);
 
 	/*
 	 * Enable GMSL links, mask unused ones and autodetect link
@@ -1237,9 +1238,9 @@ static int max9286_parse_dt(struct max9286_priv *priv)
 	if (of_property_read_u32(dev->of_node,
 				 "maxim,reverse-channel-microvolt",
 				 &reverse_channel_microvolt))
-		priv->reverse_channel_mv = 170;
+		priv->init_rev_chan_mv = 170;
 	else
-		priv->reverse_channel_mv = reverse_channel_microvolt / 1000U;
+		priv->init_rev_chan_mv = reverse_channel_microvolt / 1000U;
 
 	priv->route_mask = priv->source_mask;
 
-- 
GitLab


From 902edc2a1c1ae4b514efd800dc5d5bc5b6d58991 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:04 +0200
Subject: [PATCH 3051/3804] media: i2c: max9286: Cache channel amplitude

Cache the current channel amplitude in a driver variable
to skip updating it if the newly requested value is the same
as the currently configured one.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/max9286.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c
index 22021e90e3228..53368625fb8e7 100644
--- a/drivers/media/i2c/max9286.c
+++ b/drivers/media/i2c/max9286.c
@@ -165,6 +165,7 @@ struct max9286_priv {
 
 	/* The initial reverse control channel amplitude. */
 	u32 init_rev_chan_mv;
+	u32 rev_chan_mv;
 
 	struct v4l2_ctrl_handler ctrls;
 	struct v4l2_ctrl *pixelrate;
@@ -341,8 +342,15 @@ static void max9286_configure_i2c(struct max9286_priv *priv, bool localack)
 static void max9286_reverse_channel_setup(struct max9286_priv *priv,
 					  unsigned int chan_amplitude)
 {
+	u8 chan_config;
+
+	if (priv->rev_chan_mv == chan_amplitude)
+		return;
+
+	priv->rev_chan_mv = chan_amplitude;
+
 	/* Reverse channel transmission time: default to 1. */
-	u8 chan_config = MAX9286_REV_TRF(1);
+	chan_config = MAX9286_REV_TRF(1);
 
 	/*
 	 * Reverse channel setup.
@@ -564,8 +572,7 @@ static int max9286_notify_bound(struct v4l2_async_notifier *notifier,
 	 * - Disable auto-ack as communication on the control channel are now
 	 *   stable.
 	 */
-	if (priv->init_rev_chan_mv < 170)
-		max9286_reverse_channel_setup(priv, 170);
+	max9286_reverse_channel_setup(priv, 170);
 	max9286_check_config_link(priv, priv->source_mask);
 
 	/*
-- 
GitLab


From 731c24ffa2b1614335987645d8821bf2ceedc841 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:05 +0200
Subject: [PATCH 3052/3804] media: i2c: max9286: Define high channel amplitude

Provide a macro to define the reverse channel amplitude to
be used to compensate the remote serializer noise immunity.

While at it, update a comment.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/max9286.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c
index 53368625fb8e7..b0cd9fc8f7e2e 100644
--- a/drivers/media/i2c/max9286.c
+++ b/drivers/media/i2c/max9286.c
@@ -113,6 +113,7 @@
 #define MAX9286_REV_TRF(n)		((n) << 4)
 #define MAX9286_REV_AMP(n)		((((n) - 30) / 10) << 1) /* in mV */
 #define MAX9286_REV_AMP_X		BIT(0)
+#define MAX9286_REV_AMP_HIGH		170
 /* Register 0x3f */
 #define MAX9286_EN_REV_CFG		BIT(6)
 #define MAX9286_REV_FLEN(n)		((n) - 20)
@@ -567,12 +568,12 @@ static int max9286_notify_bound(struct v4l2_async_notifier *notifier,
 	 * channels:
 	 *
 	 * - Increase the reverse channel amplitude to compensate for the
-	 *   remote ends high threshold, if not done already
+	 *   remote ends high threshold
 	 * - Verify all configuration links are properly detected
 	 * - Disable auto-ack as communication on the control channel are now
 	 *   stable.
 	 */
-	max9286_reverse_channel_setup(priv, 170);
+	max9286_reverse_channel_setup(priv, MAX9286_REV_AMP_HIGH);
 	max9286_check_config_link(priv, priv->source_mask);
 
 	/*
-- 
GitLab


From 4ff5278dcef900879252556a51b74b33efb06623 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:06 +0200
Subject: [PATCH 3053/3804] media: i2c: max9286: Rework comments in .bound()

Rephrase a comment in .bound() callback to make it clear we register
a subdev notifier and remove a redundant comment about disabling i2c
auto-ack.

No functional changes intended.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/max9286.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c
index b0cd9fc8f7e2e..1aa2c58fd38c5 100644
--- a/drivers/media/i2c/max9286.c
+++ b/drivers/media/i2c/max9286.c
@@ -556,9 +556,9 @@ static int max9286_notify_bound(struct v4l2_async_notifier *notifier,
 		subdev->name, src_pad, index);
 
 	/*
-	 * We can only register v4l2_async_notifiers, which do not provide a
-	 * means to register a complete callback. bound_sources allows us to
-	 * identify when all remote serializers have completed their probe.
+	 * As we register a subdev notifiers we won't get a .complete() callback
+	 * here, so we have to use bound_sources to identify when all remote
+	 * serializers have probed.
 	 */
 	if (priv->bound_sources != priv->source_mask)
 		return 0;
@@ -575,11 +575,6 @@ static int max9286_notify_bound(struct v4l2_async_notifier *notifier,
 	 */
 	max9286_reverse_channel_setup(priv, MAX9286_REV_AMP_HIGH);
 	max9286_check_config_link(priv, priv->source_mask);
-
-	/*
-	 * Re-configure I2C with local acknowledge disabled after cameras have
-	 * probed.
-	 */
 	max9286_configure_i2c(priv, false);
 
 	return max9286_set_pixelrate(priv);
-- 
GitLab


From ad01032aaf437c526d7135384bb4f998828d0cfc Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:07 +0200
Subject: [PATCH 3054/3804] media: i2c: max9271: Check max9271_write() return

Check the return value of the max9271_write() function in the
max9271 library driver.

While at it, modify an existing condition to be made identical
to other checks.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/max9271.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/drivers/media/i2c/max9271.c b/drivers/media/i2c/max9271.c
index c495582dcff6c..2c7dc7fb98462 100644
--- a/drivers/media/i2c/max9271.c
+++ b/drivers/media/i2c/max9271.c
@@ -106,7 +106,10 @@ int max9271_set_serial_link(struct max9271_device *dev, bool enable)
 	 * Short delays here appear to show bit-errors in the writes following.
 	 * Therefore a conservative delay seems best here.
 	 */
-	max9271_write(dev, 0x04, val);
+	ret = max9271_write(dev, 0x04, val);
+	if (ret < 0)
+		return ret;
+
 	usleep_range(5000, 8000);
 
 	return 0;
@@ -118,7 +121,7 @@ int max9271_configure_i2c(struct max9271_device *dev, u8 i2c_config)
 	int ret;
 
 	ret = max9271_write(dev, 0x0d, i2c_config);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	/* The delay required after an I2C bus configuration change is not
@@ -143,7 +146,10 @@ int max9271_set_high_threshold(struct max9271_device *dev, bool enable)
 	 * Enable or disable reverse channel high threshold to increase
 	 * immunity to power supply noise.
 	 */
-	max9271_write(dev, 0x08, enable ? ret | BIT(0) : ret & ~BIT(0));
+	ret = max9271_write(dev, 0x08, enable ? ret | BIT(0) : ret & ~BIT(0));
+	if (ret < 0)
+		return ret;
+
 	usleep_range(2000, 2500);
 
 	return 0;
@@ -152,6 +158,8 @@ EXPORT_SYMBOL_GPL(max9271_set_high_threshold);
 
 int max9271_configure_gmsl_link(struct max9271_device *dev)
 {
+	int ret;
+
 	/*
 	 * Configure the GMSL link:
 	 *
@@ -162,16 +170,24 @@ int max9271_configure_gmsl_link(struct max9271_device *dev)
 	 *
 	 * TODO: Make the GMSL link configuration parametric.
 	 */
-	max9271_write(dev, 0x07, MAX9271_DBL | MAX9271_HVEN |
-		      MAX9271_EDC_1BIT_PARITY);
+	ret = max9271_write(dev, 0x07, MAX9271_DBL | MAX9271_HVEN |
+			    MAX9271_EDC_1BIT_PARITY);
+	if (ret < 0)
+		return ret;
+
 	usleep_range(5000, 8000);
 
 	/*
 	 * Adjust spread spectrum to +4% and auto-detect pixel clock
 	 * and serial link rate.
 	 */
-	max9271_write(dev, 0x02, MAX9271_SPREAD_SPECT_4 | MAX9271_R02_RES |
-		      MAX9271_PCLK_AUTODETECT | MAX9271_SERIAL_AUTODETECT);
+	ret = max9271_write(dev, 0x02,
+			    MAX9271_SPREAD_SPECT_4 | MAX9271_R02_RES |
+			    MAX9271_PCLK_AUTODETECT |
+			    MAX9271_SERIAL_AUTODETECT);
+	if (ret < 0)
+		return ret;
+
 	usleep_range(5000, 8000);
 
 	return 0;
-- 
GitLab


From 9e0bf8393d0602cc7fda749b77cf8ec7f81249cb Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:08 +0200
Subject: [PATCH 3055/3804] media: i2c: max9271: Introduce wake_up() function

The MAX9271 chip manual prescribes a delay of 5 milliseconds
after the chip exits from low power state.

Add a new function to the max9271 library driver that wakes up the chip
with a dummy i2c transaction and implements the correct delay of 5
milliseconds after the chip exits from low power state.

Use the newly introduced function in the rdacm20 and rdacm21 camera
drivers. The former was not respecting the required delay while the
latter was waiting for a too-short timeout.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/max9271.c | 12 ++++++++++++
 drivers/media/i2c/max9271.h |  9 +++++++++
 drivers/media/i2c/rdacm20.c |  4 +---
 drivers/media/i2c/rdacm21.c |  5 +----
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/drivers/media/i2c/max9271.c b/drivers/media/i2c/max9271.c
index 2c7dc7fb98462..ff86c8c4ea61d 100644
--- a/drivers/media/i2c/max9271.c
+++ b/drivers/media/i2c/max9271.c
@@ -80,6 +80,18 @@ static int max9271_pclk_detect(struct max9271_device *dev)
 	return -EIO;
 }
 
+void max9271_wake_up(struct max9271_device *dev)
+{
+	/*
+	 * Use the chip default address as this function has to be called
+	 * before any other one.
+	 */
+	dev->client->addr = MAX9271_DEFAULT_ADDR;
+	i2c_smbus_read_byte(dev->client);
+	usleep_range(5000, 8000);
+}
+EXPORT_SYMBOL_GPL(max9271_wake_up);
+
 int max9271_set_serial_link(struct max9271_device *dev, bool enable)
 {
 	int ret;
diff --git a/drivers/media/i2c/max9271.h b/drivers/media/i2c/max9271.h
index d78fb21441e99..dc5e4e70ba6f5 100644
--- a/drivers/media/i2c/max9271.h
+++ b/drivers/media/i2c/max9271.h
@@ -85,6 +85,15 @@ struct max9271_device {
 	struct i2c_client *client;
 };
 
+/**
+ * max9271_wake_up() - Wake up the serializer by issuing an i2c transaction
+ * @dev: The max9271 device
+ *
+ * This function shall be called before any other interaction with the
+ * serializer.
+ */
+void max9271_wake_up(struct max9271_device *dev);
+
 /**
  * max9271_set_serial_link() - Enable/disable serial link
  * @dev: The max9271 device
diff --git a/drivers/media/i2c/rdacm20.c b/drivers/media/i2c/rdacm20.c
index a4b639cf80637..ecd8bf97aae17 100644
--- a/drivers/media/i2c/rdacm20.c
+++ b/drivers/media/i2c/rdacm20.c
@@ -455,9 +455,7 @@ static int rdacm20_initialize(struct rdacm20_device *dev)
 	unsigned int retry = 3;
 	int ret;
 
-	/* Verify communication with the MAX9271: ping to wakeup. */
-	dev->serializer->client->addr = MAX9271_DEFAULT_ADDR;
-	i2c_smbus_read_byte(dev->serializer->client);
+	max9271_wake_up(dev->serializer);
 
 	/* Serial link disabled during config as it needs a valid pixel clock. */
 	ret = max9271_set_serial_link(dev->serializer, false);
diff --git a/drivers/media/i2c/rdacm21.c b/drivers/media/i2c/rdacm21.c
index 5b78d81857730..67ed1e5c450d7 100644
--- a/drivers/media/i2c/rdacm21.c
+++ b/drivers/media/i2c/rdacm21.c
@@ -450,10 +450,7 @@ static int rdacm21_initialize(struct rdacm21_device *dev)
 {
 	int ret;
 
-	/* Verify communication with the MAX9271: ping to wakeup. */
-	dev->serializer.client->addr = MAX9271_DEFAULT_ADDR;
-	i2c_smbus_read_byte(dev->serializer.client);
-	usleep_range(3000, 5000);
+	max9271_wake_up(&dev->serializer);
 
 	/* Enable reverse channel and disable the serial link. */
 	ret = max9271_set_serial_link(&dev->serializer, false);
-- 
GitLab


From 7028772092b7f2fc50de7f00aa0817505b3b11f7 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:09 +0200
Subject: [PATCH 3056/3804] media: i2c: rdacm21: Add delay after OV490 reset

Add a delay after the OV490 chip is put in reset state. The reset
signal shall be held low for at least 250 useconds.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/rdacm21.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/media/i2c/rdacm21.c b/drivers/media/i2c/rdacm21.c
index 67ed1e5c450d7..336fd5d482b7f 100644
--- a/drivers/media/i2c/rdacm21.c
+++ b/drivers/media/i2c/rdacm21.c
@@ -469,7 +469,10 @@ static int rdacm21_initialize(struct rdacm21_device *dev)
 	if (ret)
 		return ret;
 
-	/* Enable GPIO1 and hold OV490 in reset during max9271 configuration. */
+	/*
+	 * Enable GPIO1 and hold OV490 in reset during max9271 configuration.
+	 * The reset signal has to be asserted for at least 250 useconds.
+	 */
 	ret = max9271_enable_gpios(&dev->serializer, MAX9271_GPIO1OUT);
 	if (ret)
 		return ret;
@@ -477,6 +480,7 @@ static int rdacm21_initialize(struct rdacm21_device *dev)
 	ret = max9271_clear_gpios(&dev->serializer, MAX9271_GPIO1OUT);
 	if (ret)
 		return ret;
+	usleep_range(250, 500);
 
 	ret = max9271_configure_gmsl_link(&dev->serializer);
 	if (ret)
-- 
GitLab


From ff75332b260cd33cc19000fdb5d256d9db4470d1 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:10 +0200
Subject: [PATCH 3057/3804] media: i2c: rdacm21: Fix OV10640 powerup

The OV10640 image sensor powerdown signal is controlled by the first
line of the OV490 GPIO pad #1, but the pad #0 identifier
OV490_GPIO_OUTPUT_VALUE0 was erroneously used. As a result the image
sensor powerdown signal was never asserted but was left floating and
kept high by an internal pull-up resistor, causing sporadic failures
during the image sensor startup phase.

Fix this by using the correct GPIO pad identifier and wait the mandatory
1.5 millisecond delay after the powerup lane is asserted. The reset
delay is not characterized in the chip manual if not as "255 XVCLK +
initialization". Wait for at least 3 milliseconds to guarantee the SCCB
bus is available.

While at it also fix the reset sequence, as the reset line was released
before the powerdown one, and the line was not cycled.

This commit fixes a sporadic start-up error triggered by a failure to
read the OV10640 chip ID:
rdacm21 8-0054: OV10640 ID mismatch: (0x01)

Fixes: a59f853b3b4b ("media: i2c: Add driver for RDACM21 camera module")
Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/rdacm21.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/media/i2c/rdacm21.c b/drivers/media/i2c/rdacm21.c
index 336fd5d482b7f..6ab8a894e94b6 100644
--- a/drivers/media/i2c/rdacm21.c
+++ b/drivers/media/i2c/rdacm21.c
@@ -333,13 +333,19 @@ static int ov10640_initialize(struct rdacm21_device *dev)
 {
 	u8 val;
 
-	/* Power-up OV10640 by setting RESETB and PWDNB pins high. */
+	/* Enable GPIO0#0 (reset) and GPIO1#0 (pwdn) as output lines. */
 	ov490_write_reg(dev, OV490_GPIO_SEL0, OV490_GPIO0);
 	ov490_write_reg(dev, OV490_GPIO_SEL1, OV490_SPWDN0);
 	ov490_write_reg(dev, OV490_GPIO_DIRECTION0, OV490_GPIO0);
 	ov490_write_reg(dev, OV490_GPIO_DIRECTION1, OV490_SPWDN0);
+
+	/* Power up OV10640 and then reset it. */
+	ov490_write_reg(dev, OV490_GPIO_OUTPUT_VALUE1, OV490_SPWDN0);
+	usleep_range(1500, 3000);
+
+	ov490_write_reg(dev, OV490_GPIO_OUTPUT_VALUE0, 0x00);
+	usleep_range(1500, 3000);
 	ov490_write_reg(dev, OV490_GPIO_OUTPUT_VALUE0, OV490_GPIO0);
-	ov490_write_reg(dev, OV490_GPIO_OUTPUT_VALUE0, OV490_SPWDN0);
 	usleep_range(3000, 5000);
 
 	/* Read OV10640 ID to test communications. */
-- 
GitLab


From 2b821698dc73c00719e3dc367db712f727bbda85 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:11 +0200
Subject: [PATCH 3058/3804] media: i2c: rdacm21: Power up OV10640 before OV490

The current RDACM21 initialization routine powers up the OV10640 image
sensor after the OV490 ISP. The ISP is programmed with a firmware loaded
from an embedded serial flash that (most probably) tries to interact and
program also the image sensor connected to the ISP.

As described in commit "media: i2c: rdacm21: Fix OV10640 powerup" the
image sensor powerdown signal is kept high by an internal pull up
resistor and occasionally fails to startup correctly if the powerdown
line is not asserted explicitly. Failures in the OV10640 startup causes
the OV490 firmware to fail to boot correctly resulting in the camera
module initialization to fail consequentially.

Fix this by powering up the OV10640 image sensor before testing the
OV490 firmware boot completion, by splitting the ov10640_initialize()
function in an ov10640_power_up() one and an ov10640_check_id() one.

Also make sure the OV10640 identification procedure gives enough time to
the image sensor to resume after the programming phase performed by the
OV490 firmware by repeating the ID read procedure.

This commit fixes a sporadic start-up error triggered by a failure to
detect the OV490 firmware boot completion:
rdacm21 8-0054: Timeout waiting for firmware boot

[hverkuil: fixed two typos in commit log]

Fixes: a59f853b3b4b ("media: i2c: Add driver for RDACM21 camera module")
Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/rdacm21.c | 46 ++++++++++++++++++++++++++-----------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/drivers/media/i2c/rdacm21.c b/drivers/media/i2c/rdacm21.c
index 6ab8a894e94b6..12ec5467ed1ee 100644
--- a/drivers/media/i2c/rdacm21.c
+++ b/drivers/media/i2c/rdacm21.c
@@ -69,6 +69,7 @@
 #define OV490_ISP_VSIZE_LOW		0x80820062
 #define OV490_ISP_VSIZE_HIGH		0x80820063
 
+#define OV10640_PID_TIMEOUT		20
 #define OV10640_ID_HIGH			0xa6
 #define OV10640_CHIP_ID			0x300a
 #define OV10640_PIXEL_RATE		55000000
@@ -329,10 +330,8 @@ static const struct v4l2_subdev_ops rdacm21_subdev_ops = {
 	.pad		= &rdacm21_subdev_pad_ops,
 };
 
-static int ov10640_initialize(struct rdacm21_device *dev)
+static void ov10640_power_up(struct rdacm21_device *dev)
 {
-	u8 val;
-
 	/* Enable GPIO0#0 (reset) and GPIO1#0 (pwdn) as output lines. */
 	ov490_write_reg(dev, OV490_GPIO_SEL0, OV490_GPIO0);
 	ov490_write_reg(dev, OV490_GPIO_SEL1, OV490_SPWDN0);
@@ -347,18 +346,35 @@ static int ov10640_initialize(struct rdacm21_device *dev)
 	usleep_range(1500, 3000);
 	ov490_write_reg(dev, OV490_GPIO_OUTPUT_VALUE0, OV490_GPIO0);
 	usleep_range(3000, 5000);
+}
 
-	/* Read OV10640 ID to test communications. */
-	ov490_write_reg(dev, OV490_SCCB_SLAVE0_DIR, OV490_SCCB_SLAVE_READ);
-	ov490_write_reg(dev, OV490_SCCB_SLAVE0_ADDR_HIGH, OV10640_CHIP_ID >> 8);
-	ov490_write_reg(dev, OV490_SCCB_SLAVE0_ADDR_LOW, OV10640_CHIP_ID & 0xff);
-
-	/* Trigger SCCB slave transaction and give it some time to complete. */
-	ov490_write_reg(dev, OV490_HOST_CMD, OV490_HOST_CMD_TRIGGER);
-	usleep_range(1000, 1500);
+static int ov10640_check_id(struct rdacm21_device *dev)
+{
+	unsigned int i;
+	u8 val;
 
-	ov490_read_reg(dev, OV490_SCCB_SLAVE0_DIR, &val);
-	if (val != OV10640_ID_HIGH) {
+	/* Read OV10640 ID to test communications. */
+	for (i = 0; i < OV10640_PID_TIMEOUT; ++i) {
+		ov490_write_reg(dev, OV490_SCCB_SLAVE0_DIR,
+				OV490_SCCB_SLAVE_READ);
+		ov490_write_reg(dev, OV490_SCCB_SLAVE0_ADDR_HIGH,
+				OV10640_CHIP_ID >> 8);
+		ov490_write_reg(dev, OV490_SCCB_SLAVE0_ADDR_LOW,
+				OV10640_CHIP_ID & 0xff);
+
+		/*
+		 * Trigger SCCB slave transaction and give it some time
+		 * to complete.
+		 */
+		ov490_write_reg(dev, OV490_HOST_CMD, OV490_HOST_CMD_TRIGGER);
+		usleep_range(1000, 1500);
+
+		ov490_read_reg(dev, OV490_SCCB_SLAVE0_DIR, &val);
+		if (val == OV10640_ID_HIGH)
+			break;
+		usleep_range(1000, 1500);
+	}
+	if (i == OV10640_PID_TIMEOUT) {
 		dev_err(dev->dev, "OV10640 ID mismatch: (0x%02x)\n", val);
 		return -ENODEV;
 	}
@@ -374,6 +390,8 @@ static int ov490_initialize(struct rdacm21_device *dev)
 	unsigned int i;
 	int ret;
 
+	ov10640_power_up(dev);
+
 	/*
 	 * Read OV490 Id to test communications. Give it up to 40msec to
 	 * exit from reset.
@@ -411,7 +429,7 @@ static int ov490_initialize(struct rdacm21_device *dev)
 		return -ENODEV;
 	}
 
-	ret = ov10640_initialize(dev);
+	ret = ov10640_check_id(dev);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 1524bb765d33a5b999b7af361e1e0fc9068b79d5 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:13 +0200
Subject: [PATCH 3059/3804] media: i2c: rdacm20: Embed 'serializer' field

There's no reason to allocate dynamically the 'serializer' field in
the driver structure.

Embed the field and adjust all its users in the driver.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/rdacm20.c | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/drivers/media/i2c/rdacm20.c b/drivers/media/i2c/rdacm20.c
index ecd8bf97aae17..91b9b68e115c3 100644
--- a/drivers/media/i2c/rdacm20.c
+++ b/drivers/media/i2c/rdacm20.c
@@ -312,7 +312,7 @@ static const struct ov10635_reg {
 
 struct rdacm20_device {
 	struct device			*dev;
-	struct max9271_device		*serializer;
+	struct max9271_device		serializer;
 	struct i2c_client		*sensor;
 	struct v4l2_subdev		sd;
 	struct media_pad		pad;
@@ -399,7 +399,7 @@ static int rdacm20_s_stream(struct v4l2_subdev *sd, int enable)
 {
 	struct rdacm20_device *dev = sd_to_rdacm20(sd);
 
-	return max9271_set_serial_link(dev->serializer, enable);
+	return max9271_set_serial_link(&dev->serializer, enable);
 }
 
 static int rdacm20_enum_mbus_code(struct v4l2_subdev *sd,
@@ -455,10 +455,10 @@ static int rdacm20_initialize(struct rdacm20_device *dev)
 	unsigned int retry = 3;
 	int ret;
 
-	max9271_wake_up(dev->serializer);
+	max9271_wake_up(&dev->serializer);
 
 	/* Serial link disabled during config as it needs a valid pixel clock. */
-	ret = max9271_set_serial_link(dev->serializer, false);
+	ret = max9271_set_serial_link(&dev->serializer, false);
 	if (ret)
 		return ret;
 
@@ -466,35 +466,35 @@ static int rdacm20_initialize(struct rdacm20_device *dev)
 	 *  Ensure that we have a good link configuration before attempting to
 	 *  identify the device.
 	 */
-	max9271_configure_i2c(dev->serializer, MAX9271_I2CSLVSH_469NS_234NS |
-					       MAX9271_I2CSLVTO_1024US |
-					       MAX9271_I2CMSTBT_105KBPS);
+	max9271_configure_i2c(&dev->serializer, MAX9271_I2CSLVSH_469NS_234NS |
+						MAX9271_I2CSLVTO_1024US |
+						MAX9271_I2CMSTBT_105KBPS);
 
-	max9271_configure_gmsl_link(dev->serializer);
+	max9271_configure_gmsl_link(&dev->serializer);
 
-	ret = max9271_verify_id(dev->serializer);
+	ret = max9271_verify_id(&dev->serializer);
 	if (ret < 0)
 		return ret;
 
-	ret = max9271_set_address(dev->serializer, dev->addrs[0]);
+	ret = max9271_set_address(&dev->serializer, dev->addrs[0]);
 	if (ret < 0)
 		return ret;
-	dev->serializer->client->addr = dev->addrs[0];
+	dev->serializer.client->addr = dev->addrs[0];
 
 	/*
 	 * Reset the sensor by cycling the OV10635 reset signal connected to the
 	 * MAX9271 GPIO1 and verify communication with the OV10635.
 	 */
-	ret = max9271_enable_gpios(dev->serializer, MAX9271_GPIO1OUT);
+	ret = max9271_enable_gpios(&dev->serializer, MAX9271_GPIO1OUT);
 	if (ret)
 		return ret;
 
-	ret = max9271_clear_gpios(dev->serializer, MAX9271_GPIO1OUT);
+	ret = max9271_clear_gpios(&dev->serializer, MAX9271_GPIO1OUT);
 	if (ret)
 		return ret;
 	usleep_range(10000, 15000);
 
-	ret = max9271_set_gpios(dev->serializer, MAX9271_GPIO1OUT);
+	ret = max9271_set_gpios(&dev->serializer, MAX9271_GPIO1OUT);
 	if (ret)
 		return ret;
 	usleep_range(10000, 15000);
@@ -552,13 +552,7 @@ static int rdacm20_probe(struct i2c_client *client)
 	if (!dev)
 		return -ENOMEM;
 	dev->dev = &client->dev;
-
-	dev->serializer = devm_kzalloc(&client->dev, sizeof(*dev->serializer),
-				       GFP_KERNEL);
-	if (!dev->serializer)
-		return -ENOMEM;
-
-	dev->serializer->client = client;
+	dev->serializer.client = client;
 
 	ret = of_property_read_u32_array(client->dev.of_node, "reg",
 					 dev->addrs, 2);
-- 
GitLab


From 09741de09bf8a05558c37b2bbd85ca8f516fb753 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:12 +0200
Subject: [PATCH 3060/3804] media: i2c: rdacm20: Enable noise immunity

Enable the noise immunity threshold at the end of the rdacm20
initialization routine.

The rdacm20 camera module has been so far tested with a startup
delay that allowed the embedded MCU to program the serializer. If
the initialization routine is run before the MCU programs the
serializer and the image sensor and their addresses gets changed
by the rdacm20 driver it is required to manually enable the noise
immunity threshold to make the communication on the control channel
more reliable.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/rdacm20.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/media/i2c/rdacm20.c b/drivers/media/i2c/rdacm20.c
index 91b9b68e115c3..9d5de15fbdb89 100644
--- a/drivers/media/i2c/rdacm20.c
+++ b/drivers/media/i2c/rdacm20.c
@@ -539,7 +539,19 @@ again:
 
 	dev_info(dev->dev, "Identified MAX9271 + OV10635 device\n");
 
-	return 0;
+	/*
+	 * Set reverse channel high threshold to increase noise immunity.
+	 *
+	 * This should be compensated by increasing the reverse channel
+	 * amplitude on the remote deserializer side.
+	 *
+	 * TODO Inspect the embedded MCU programming sequence to make sure
+	 * there are no conflicts with the configuration applied here.
+	 *
+	 * TODO Clarify the embedded MCU startup delay to avoid write
+	 * collisions on the I2C bus.
+	 */
+	return max9271_set_high_threshold(&dev->serializer, true);
 }
 
 static int rdacm20_probe(struct i2c_client *client)
-- 
GitLab


From 59a81c70b0b3563fe8426b0fe4d96263b6fa8823 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:14 +0200
Subject: [PATCH 3061/3804] media: i2c: rdacm20: Report camera module name

When the device is identified the driver currently reports the
names of the chips embedded in the camera module.

Report the name of the camera module itself instead.
Cosmetic change only.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/rdacm20.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/i2c/rdacm20.c b/drivers/media/i2c/rdacm20.c
index 9d5de15fbdb89..343604b61e2ab 100644
--- a/drivers/media/i2c/rdacm20.c
+++ b/drivers/media/i2c/rdacm20.c
@@ -537,7 +537,7 @@ again:
 	if (ret)
 		return ret;
 
-	dev_info(dev->dev, "Identified MAX9271 + OV10635 device\n");
+	dev_info(dev->dev, "Identified RDACM20 camera module\n");
 
 	/*
 	 * Set reverse channel high threshold to increase noise immunity.
-- 
GitLab


From 47f8b8a2cfee45f2405527e225a566fe39f9d400 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:15 +0200
Subject: [PATCH 3062/3804] media: i2c: rdacm20: Check return values

The camera module initialization routine does not check the return
value of a few functions. Fix that.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/rdacm20.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/media/i2c/rdacm20.c b/drivers/media/i2c/rdacm20.c
index 343604b61e2ab..0fc5512cd5e71 100644
--- a/drivers/media/i2c/rdacm20.c
+++ b/drivers/media/i2c/rdacm20.c
@@ -466,11 +466,16 @@ static int rdacm20_initialize(struct rdacm20_device *dev)
 	 *  Ensure that we have a good link configuration before attempting to
 	 *  identify the device.
 	 */
-	max9271_configure_i2c(&dev->serializer, MAX9271_I2CSLVSH_469NS_234NS |
-						MAX9271_I2CSLVTO_1024US |
-						MAX9271_I2CMSTBT_105KBPS);
+	ret = max9271_configure_i2c(&dev->serializer,
+				    MAX9271_I2CSLVSH_469NS_234NS |
+				    MAX9271_I2CSLVTO_1024US |
+				    MAX9271_I2CMSTBT_105KBPS);
+	if (ret)
+		return ret;
 
-	max9271_configure_gmsl_link(&dev->serializer);
+	ret = max9271_configure_gmsl_link(&dev->serializer);
+	if (ret)
+		return ret;
 
 	ret = max9271_verify_id(&dev->serializer);
 	if (ret < 0)
-- 
GitLab


From 198bb646e8553e8abd8d83492a27b601ab97b75d Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Wed, 16 Jun 2021 14:46:16 +0200
Subject: [PATCH 3063/3804] media: i2c: rdacm20: Re-work ov10635 reset

The OV10635 image sensor embedded in the camera module is currently
reset after the MAX9271 initialization with two long delays that were
most probably not correctly characterized.

Re-work the image sensor reset procedure by holding the chip in reset
during the MAX9271 configuration, removing the long sleep delays and
only wait after the chip exits from reset for 350-500 microseconds
interval, which is larger than the minimum (2048 * (1 / XVCLK)) timeout
characterized in the chip manual.

Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/i2c/rdacm20.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/drivers/media/i2c/rdacm20.c b/drivers/media/i2c/rdacm20.c
index 0fc5512cd5e71..025a610de8935 100644
--- a/drivers/media/i2c/rdacm20.c
+++ b/drivers/media/i2c/rdacm20.c
@@ -473,6 +473,19 @@ static int rdacm20_initialize(struct rdacm20_device *dev)
 	if (ret)
 		return ret;
 
+	/*
+	 * Hold OV10635 in reset during max9271 configuration. The reset signal
+	 * has to be asserted for at least 200 microseconds.
+	 */
+	ret = max9271_enable_gpios(&dev->serializer, MAX9271_GPIO1OUT);
+	if (ret)
+		return ret;
+
+	ret = max9271_clear_gpios(&dev->serializer, MAX9271_GPIO1OUT);
+	if (ret)
+		return ret;
+	usleep_range(200, 500);
+
 	ret = max9271_configure_gmsl_link(&dev->serializer);
 	if (ret)
 		return ret;
@@ -487,22 +500,14 @@ static int rdacm20_initialize(struct rdacm20_device *dev)
 	dev->serializer.client->addr = dev->addrs[0];
 
 	/*
-	 * Reset the sensor by cycling the OV10635 reset signal connected to the
-	 * MAX9271 GPIO1 and verify communication with the OV10635.
+	 * Release ov10635 from reset and initialize it. The image sensor
+	 * requires at least 2048 XVCLK cycles (85 micro-seconds at 24MHz)
+	 * before being available. Stay safe and wait up to 500 micro-seconds.
 	 */
-	ret = max9271_enable_gpios(&dev->serializer, MAX9271_GPIO1OUT);
-	if (ret)
-		return ret;
-
-	ret = max9271_clear_gpios(&dev->serializer, MAX9271_GPIO1OUT);
-	if (ret)
-		return ret;
-	usleep_range(10000, 15000);
-
 	ret = max9271_set_gpios(&dev->serializer, MAX9271_GPIO1OUT);
 	if (ret)
 		return ret;
-	usleep_range(10000, 15000);
+	usleep_range(100, 500);
 
 again:
 	ret = ov10635_read16(dev, OV10635_PID);
-- 
GitLab


From 6e9ef8ca687e69e9d4cc89033d98e06350b0f3e0 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Wed, 16 Jun 2021 15:42:18 +1200
Subject: [PATCH 3064/3804] hwmon: (pmbus/bpa-rs600) Handle Vin readings >=
 256V

The BPA-RS600 doesn't follow the PMBus spec for linear data.
Specifically it treats the mantissa as an unsigned 11-bit value instead
of a two's complement 11-bit value. At this point it's unclear whether
this only affects Vin or if Pin/Pout1 are affected as well. Erring on
the side of caution only Vin is dealt with here.

Fixes: 15b2703e5e02 ("hwmon: (pmbus) Add driver for BluTek BPA-RS600")
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Link: https://lore.kernel.org/r/20210616034218.25821-1-chris.packham@alliedtelesis.co.nz
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/bpa-rs600.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/drivers/hwmon/pmbus/bpa-rs600.c b/drivers/hwmon/pmbus/bpa-rs600.c
index f6558ee9dec36..2be69fedfa361 100644
--- a/drivers/hwmon/pmbus/bpa-rs600.c
+++ b/drivers/hwmon/pmbus/bpa-rs600.c
@@ -46,6 +46,32 @@ static int bpa_rs600_read_byte_data(struct i2c_client *client, int page, int reg
 	return ret;
 }
 
+/*
+ * The BPA-RS600 violates the PMBus spec. Specifically it treats the
+ * mantissa as unsigned. Deal with this here to allow the PMBus core
+ * to work with correctly encoded data.
+ */
+static int bpa_rs600_read_vin(struct i2c_client *client)
+{
+	int ret, exponent, mantissa;
+
+	ret = pmbus_read_word_data(client, 0, 0xff, PMBUS_READ_VIN);
+	if (ret < 0)
+		return ret;
+
+	if (ret & BIT(10)) {
+		exponent = ret >> 11;
+		mantissa = ret & 0x7ff;
+
+		exponent++;
+		mantissa >>= 1;
+
+		ret = (exponent << 11) | mantissa;
+	}
+
+	return ret;
+}
+
 static int bpa_rs600_read_word_data(struct i2c_client *client, int page, int phase, int reg)
 {
 	int ret;
@@ -85,6 +111,9 @@ static int bpa_rs600_read_word_data(struct i2c_client *client, int page, int pha
 		/* These commands return data but it is invalid/un-documented */
 		ret = -ENXIO;
 		break;
+	case PMBUS_READ_VIN:
+		ret = bpa_rs600_read_vin(client);
+		break;
 	default:
 		if (reg >= PMBUS_VIRT_BASE)
 			ret = -ENXIO;
-- 
GitLab


From ab9d85e9d5555c75992dc42bf3b9eebe0955ceb9 Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Fri, 23 Apr 2021 17:33:28 +0200
Subject: [PATCH 3065/3804] hwmon: (pmbus/zl6100) Add support for ZLS1003,
 ZLS4009 and ZL8802

Add support for Renesas ZL8802 Dual Channel/Dual Phase PMBus DC/DC
Digital Controller as well as ZLS1003 and ZLS4009 custom DC/DC
controller chips.

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
Link: https://lore.kernel.org/r/20210423153329.33457-2-erik.rosen@metormote.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/zl6100.c | 94 ++++++++++++++++++++++++++++++------
 1 file changed, 80 insertions(+), 14 deletions(-)

diff --git a/drivers/hwmon/pmbus/zl6100.c b/drivers/hwmon/pmbus/zl6100.c
index b7d4eacdc3ef7..e9df0c56d91e9 100644
--- a/drivers/hwmon/pmbus/zl6100.c
+++ b/drivers/hwmon/pmbus/zl6100.c
@@ -18,7 +18,7 @@
 #include "pmbus.h"
 
 enum chips { zl2004, zl2005, zl2006, zl2008, zl2105, zl2106, zl6100, zl6105,
-	     zl9101, zl9117 };
+	     zl8802, zl9101, zl9117, zls1003, zls4009 };
 
 struct zl6100_data {
 	int id;
@@ -34,6 +34,13 @@ struct zl6100_data {
 
 #define ZL6100_MFR_XTEMP_ENABLE		BIT(7)
 
+#define ZL8802_MFR_USER_GLOBAL_CONFIG	0xe9
+#define ZL8802_MFR_TMON_ENABLE		BIT(12)
+#define ZL8802_MFR_USER_CONFIG		0xd1
+#define ZL8802_MFR_XTEMP_ENABLE_2	BIT(1)
+#define ZL8802_MFR_DDC_CONFIG		0xd3
+#define ZL8802_MFR_PHASES_MASK		0x0007
+
 #define MFR_VMON_OV_FAULT_LIMIT		0xf5
 #define MFR_VMON_UV_FAULT_LIMIT		0xf6
 #define MFR_READ_VMON			0xf7
@@ -132,7 +139,7 @@ static int zl6100_read_word_data(struct i2c_client *client, int page,
 	struct zl6100_data *data = to_zl6100_data(info);
 	int ret, vreg;
 
-	if (page > 0)
+	if (page >= info->pages)
 		return -ENXIO;
 
 	if (data->id == zl2005) {
@@ -191,7 +198,7 @@ static int zl6100_read_byte_data(struct i2c_client *client, int page, int reg)
 	struct zl6100_data *data = to_zl6100_data(info);
 	int ret, status;
 
-	if (page > 0)
+	if (page >= info->pages)
 		return -ENXIO;
 
 	zl6100_wait(data);
@@ -230,7 +237,7 @@ static int zl6100_write_word_data(struct i2c_client *client, int page, int reg,
 	struct zl6100_data *data = to_zl6100_data(info);
 	int ret, vreg;
 
-	if (page > 0)
+	if (page >= info->pages)
 		return -ENXIO;
 
 	switch (reg) {
@@ -271,7 +278,7 @@ static int zl6100_write_byte(struct i2c_client *client, int page, u8 value)
 	struct zl6100_data *data = to_zl6100_data(info);
 	int ret;
 
-	if (page > 0)
+	if (page >= info->pages)
 		return -ENXIO;
 
 	zl6100_wait(data);
@@ -287,6 +294,10 @@ static const struct i2c_device_id zl6100_id[] = {
 	{"bmr462", zl2008},
 	{"bmr463", zl2008},
 	{"bmr464", zl2008},
+	{"bmr465", zls4009},
+	{"bmr466", zls1003},
+	{"bmr467", zls4009},
+	{"bmr469", zl8802},
 	{"zl2004", zl2004},
 	{"zl2005", zl2005},
 	{"zl2006", zl2006},
@@ -295,15 +306,18 @@ static const struct i2c_device_id zl6100_id[] = {
 	{"zl2106", zl2106},
 	{"zl6100", zl6100},
 	{"zl6105", zl6105},
+	{"zl8802", zl8802},
 	{"zl9101", zl9101},
 	{"zl9117", zl9117},
+	{"zls1003", zls1003},
+	{"zls4009", zls4009},
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, zl6100_id);
 
 static int zl6100_probe(struct i2c_client *client)
 {
-	int ret;
+	int ret, i;
 	struct zl6100_data *data;
 	struct pmbus_driver_info *info;
 	u8 device_id[I2C_SMBUS_BLOCK_MAX + 1];
@@ -367,18 +381,70 @@ static int zl6100_probe(struct i2c_client *client)
 	  | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP;
 
 	/*
-	 * ZL2004, ZL9101M, and ZL9117M support monitoring an extra voltage
-	 * (VMON for ZL2004, VDRV for ZL9101M and ZL9117M). Report it as vmon.
+	 * ZL2004, ZL8802, ZL9101M, ZL9117M and ZLS4009 support monitoring
+	 * an extra voltage (VMON for ZL2004, ZL8802 and ZLS4009,
+	 * VDRV for ZL9101M and ZL9117M). Report it as vmon.
 	 */
-	if (data->id == zl2004 || data->id == zl9101 || data->id == zl9117)
+	if (data->id == zl2004 || data->id == zl8802 || data->id == zl9101 ||
+	    data->id == zl9117 || data->id == zls4009)
 		info->func[0] |= PMBUS_HAVE_VMON | PMBUS_HAVE_STATUS_VMON;
 
-	ret = i2c_smbus_read_word_data(client, ZL6100_MFR_CONFIG);
-	if (ret < 0)
-		return ret;
+	/*
+	 * ZL8802 has two outputs that can be used either independently or in
+	 * a current sharing configuration. The driver uses the DDC_CONFIG
+	 * register to check if the module is running with independent or
+	 * shared outputs. If the module is in shared output mode, only one
+	 * output voltage will be reported.
+	 */
+	if (data->id == zl8802) {
+		info->pages = 2;
+		info->func[0] |= PMBUS_HAVE_IIN;
+
+		ret = i2c_smbus_read_word_data(client, ZL8802_MFR_DDC_CONFIG);
+		if (ret < 0)
+			return ret;
+
+		data->access = ktime_get();
+		zl6100_wait(data);
+
+		if (ret & ZL8802_MFR_PHASES_MASK)
+			info->func[1] |= PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT;
+		else
+			info->func[1] = PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT
+				| PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT;
 
-	if (ret & ZL6100_MFR_XTEMP_ENABLE)
-		info->func[0] |= PMBUS_HAVE_TEMP2;
+		for (i = 0; i < 2; i++) {
+			ret = i2c_smbus_write_byte_data(client, PMBUS_PAGE, i);
+			if (ret < 0)
+				return ret;
+
+			data->access = ktime_get();
+			zl6100_wait(data);
+
+			ret = i2c_smbus_read_word_data(client, ZL8802_MFR_USER_CONFIG);
+			if (ret < 0)
+				return ret;
+
+			if (ret & ZL8802_MFR_XTEMP_ENABLE_2)
+				info->func[i] |= PMBUS_HAVE_TEMP2;
+
+			data->access = ktime_get();
+			zl6100_wait(data);
+		}
+		ret = i2c_smbus_read_word_data(client, ZL8802_MFR_USER_GLOBAL_CONFIG);
+		if (ret < 0)
+			return ret;
+
+		if (ret & ZL8802_MFR_TMON_ENABLE)
+			info->func[0] |= PMBUS_HAVE_TEMP3;
+	} else {
+		ret = i2c_smbus_read_word_data(client, ZL6100_MFR_CONFIG);
+		if (ret < 0)
+			return ret;
+
+		if (ret & ZL6100_MFR_XTEMP_ENABLE)
+			info->func[0] |= PMBUS_HAVE_TEMP2;
+	}
 
 	data->access = ktime_get();
 	zl6100_wait(data);
-- 
GitLab


From 6e954d2e649a373cdebb4d2b0de5197ca3f6b87e Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Fri, 23 Apr 2021 17:33:29 +0200
Subject: [PATCH 3066/3804] hwmon: (pmbus/zl6100) Update documentation for
 zl6100 driver

Update documentation for zl6100 driver and fix dead links to technical
specifications

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
Link: https://lore.kernel.org/r/20210423153329.33457-3-erik.rosen@metormote.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/zl6100.rst | 132 ++++++++++++++++++++++-----------
 1 file changed, 89 insertions(+), 43 deletions(-)

diff --git a/Documentation/hwmon/zl6100.rst b/Documentation/hwmon/zl6100.rst
index 968aff10ce0aa..d42ed9d3ac69a 100644
--- a/Documentation/hwmon/zl6100.rst
+++ b/Documentation/hwmon/zl6100.rst
@@ -3,87 +3,103 @@ Kernel driver zl6100
 
 Supported chips:
 
-  * Intersil / Zilker Labs ZL2004
+  * Renesas / Intersil / Zilker Labs ZL2004
 
     Prefix: 'zl2004'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn6847.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl2004-datasheet.pdf
 
-  * Intersil / Zilker Labs ZL2005
+  * Renesas / Intersil / Zilker Labs ZL2005
 
     Prefix: 'zl2005'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn6848.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl2005-datasheet.pdf
 
-  * Intersil / Zilker Labs ZL2006
+  * Renesas / Intersil / Zilker Labs ZL2006
 
     Prefix: 'zl2006'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn6850.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl2006-datasheet.pdf
 
-  * Intersil / Zilker Labs ZL2008
+  * Renesas / Intersil / Zilker Labs ZL2008
 
     Prefix: 'zl2008'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn6859.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl2008-datasheet.pdf
 
-  * Intersil / Zilker Labs ZL2105
+  * Renesas / Intersil / Zilker Labs ZL2105
 
     Prefix: 'zl2105'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn6851.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl2105-datasheet.pdf
 
-  * Intersil / Zilker Labs ZL2106
+  * Renesas / Intersil / Zilker Labs ZL2106
 
     Prefix: 'zl2106'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn6852.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl2106-datasheet.pdf
 
-  * Intersil / Zilker Labs ZL6100
+  * Renesas / Intersil / Zilker Labs ZL6100
 
     Prefix: 'zl6100'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn6876.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl6100-datasheet.pdf
 
-  * Intersil / Zilker Labs ZL6105
+  * Renesas / Intersil / Zilker Labs ZL6105
 
     Prefix: 'zl6105'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn6906.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl6105-datasheet.pdf
 
-  * Intersil / Zilker Labs ZL9101M
+  * Renesas / Intersil / Zilker Labs ZL8802
+
+    Prefix: 'zl8802'
+
+    Addresses scanned: -
+
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl8802-datasheet
+
+  * Renesas / Intersil / Zilker Labs ZL9101M
 
     Prefix: 'zl9101'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn7669.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl9101m-datasheet
 
-  * Intersil / Zilker Labs ZL9117M
+  * Renesas / Intersil / Zilker Labs ZL9117M
 
     Prefix: 'zl9117'
 
     Addresses scanned: -
 
-    Datasheet: http://www.intersil.com/data/fn/fn7914.pdf
+    Datasheet: https://www.renesas.com/us/en/document/dst/zl9117m-datasheet
+
+  * Renesas / Intersil / Zilker Labs ZLS1003, ZLS4009
+
+    Prefix: 'zls1003', zls4009
+
+    Addresses scanned: -
+
+    Datasheet: Not published
 
-  * Ericsson BMR450, BMR451
+  * Flex BMR450, BMR451
 
     Prefix: 'bmr450', 'bmr451'
 
@@ -91,17 +107,39 @@ Supported chips:
 
     Datasheet:
 
-http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146401
+https://flexpowermodules.com/resources/fpm-techspec-bmr450-digital-pol-regulators-20a
 
-  * Ericsson BMR462, BMR463, BMR464
+  * Flex BMR462, BMR463, BMR464
 
     Prefixes: 'bmr462', 'bmr463', 'bmr464'
 
     Addresses scanned: -
 
-    Datasheet:
+    Datasheet: https://flexpowermodules.com/resources/fpm-techspec-bmr462
+
+  * Flex BMR465, BMR467
+
+    Prefixes: 'bmr465', 'bmr467'
+
+    Addresses scanned: -
+
+    Datasheet: https://flexpowermodules.com/resources/fpm-techspec-bmr465-digital-pol
+
+  * Flex BMR466
+
+    Prefixes: 'bmr466'
+
+    Addresses scanned: -
+
+    Datasheet: https://flexpowermodules.com/resources/fpm-techspec-bmr466-8x12
 
-	http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146256
+  * Flex BMR469
+
+    Prefixes: 'bmr469'
+
+    Addresses scanned: -
+
+    Datasheet: https://flexpowermodules.com/resources/fpm-techspec-bmr4696001
 
 Author: Guenter Roeck <linux@roeck-us.net>
 
@@ -109,8 +147,8 @@ Author: Guenter Roeck <linux@roeck-us.net>
 Description
 -----------
 
-This driver supports hardware monitoring for Intersil / Zilker Labs ZL6100 and
-compatible digital DC-DC controllers.
+This driver supports hardware monitoring for Renesas / Intersil / Zilker Labs
+ZL6100 and compatible digital DC-DC controllers.
 
 The driver is a client driver to the core PMBus driver. Please see
 Documentation/hwmon/pmbus.rst and Documentation.hwmon/pmbus-core for details
@@ -147,12 +185,12 @@ Module parameters
 delay
 -----
 
-Intersil/Zilker Labs DC-DC controllers require a minimum interval between I2C
-bus accesses. According to Intersil, the minimum interval is 2 ms, though 1 ms
-appears to be sufficient and has not caused any problems in testing. The problem
-is known to affect all currently supported chips. For manual override, the
-driver provides a writeable module parameter, 'delay', which can be used to set
-the interval to a value between 0 and 65,535 microseconds.
+Renesas/Intersil/Zilker Labs DC-DC controllers require a minimum interval
+between I2C bus accesses. According to Intersil, the minimum interval is 2 ms,
+though 1 ms appears to be sufficient and has not caused any problems in testing.
+The problem is known to affect all currently supported chips. For manual override,
+the driver provides a writeable module parameter, 'delay', which can be used
+to set the interval to a value between 0 and 65,535 microseconds.
 
 
 Sysfs entries
@@ -182,24 +220,32 @@ in2_crit		Critical maximum VMON/VDRV voltage.
 in2_lcrit_alarm		VMON/VDRV voltage critical low alarm.
 in2_crit_alarm		VMON/VDRV voltage critical high alarm.
 
-			vmon attributes are supported on ZL2004, ZL9101M,
-			and ZL9117M only.
+			vmon attributes are supported on ZL2004, ZL8802,
+			ZL9101M, ZL9117M and ZLS4009 only.
 
-inX_label		"vout1"
+inX_label		"vout[12]"
 inX_input		Measured output voltage.
 inX_lcrit		Critical minimum output Voltage.
 inX_crit		Critical maximum output voltage.
 inX_lcrit_alarm		Critical output voltage critical low alarm.
 inX_crit_alarm		Critical output voltage critical high alarm.
 
-			X is 3 for ZL2004, ZL9101M, and ZL9117M, 2 otherwise.
+			X is 3 for ZL2004, ZL9101M, and ZL9117M,
+			3, 4 for ZL8802 and 2 otherwise.
+
+curr1_label		"iin"
+curr1_input		Measured input current.
+
+			iin attributes are supported on ZL8802 only
+
+currY_label		"iout[12]"
+currY_input		Measured output current.
+currY_lcrit		Critical minimum output current.
+currY_crit		Critical maximum output current.
+currY_lcrit_alarm	Output current critical low alarm.
+currY_crit_alarm	Output current critical high alarm.
 
-curr1_label		"iout1"
-curr1_input		Measured output current.
-curr1_lcrit		Critical minimum output current.
-curr1_crit		Critical maximum output current.
-curr1_lcrit_alarm	Output current critical low alarm.
-curr1_crit_alarm	Output current critical high alarm.
+			Y is 2, 3 for ZL8802, 1 otherwise
 
 temp[12]_input		Measured temperature.
 temp[12]_min		Minimum temperature.
-- 
GitLab


From ec081f9154766be98b7be6e4c4483b580c5b12e7 Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Thu, 29 Apr 2021 14:11:49 +0200
Subject: [PATCH 3067/3804] hwmon: (lm75) Add TI TMP1075 support

TI TMP1075 is a LM75 compatible sensor, so lets
add support for it.

Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Link: https://lore.kernel.org/r/20210429121150.106804-1-robert.marko@sartura.hr
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/lm75.rst |  6 ++++--
 drivers/hwmon/lm75.c         | 13 +++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/Documentation/hwmon/lm75.rst b/Documentation/hwmon/lm75.rst
index 81257d5fc48f1..8d0ab4ad5fb52 100644
--- a/Documentation/hwmon/lm75.rst
+++ b/Documentation/hwmon/lm75.rst
@@ -93,9 +93,9 @@ Supported chips:
 
 	       https://www.st.com/resource/en/datasheet/stlm75.pdf
 
-  * Texas Instruments TMP100, TMP101, TMP105, TMP112, TMP75, TMP75B, TMP75C, TMP175, TMP275
+  * Texas Instruments TMP100, TMP101, TMP105, TMP112, TMP75, TMP75B, TMP75C, TMP175, TMP275, TMP1075
 
-    Prefixes: 'tmp100', 'tmp101', 'tmp105', 'tmp112', 'tmp175', 'tmp75', 'tmp75b', 'tmp75c', 'tmp275'
+    Prefixes: 'tmp100', 'tmp101', 'tmp105', 'tmp112', 'tmp175', 'tmp75', 'tmp75b', 'tmp75c', 'tmp275', 'tmp1075'
 
     Addresses scanned: none
 
@@ -119,6 +119,8 @@ Supported chips:
 
 	       https://www.ti.com/product/tmp275
 
+         https://www.ti.com/product/TMP1075
+
   * NXP LM75B, PCT2075
 
     Prefix: 'lm75b', 'pct2075'
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index e447febd121a2..afdbb63237b9e 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -50,6 +50,7 @@ enum lm75_type {		/* keep sorted in alphabetical order */
 	tmp75,
 	tmp75b,
 	tmp75c,
+	tmp1075,
 };
 
 /**
@@ -293,6 +294,13 @@ static const struct lm75_params device_params[] = {
 		.clr_mask = 1 << 5,	/*not one-shot mode*/
 		.default_resolution = 12,
 		.default_sample_time = MSEC_PER_SEC / 12,
+	},
+	[tmp1075] = { /* not one-shot mode, 27.5 ms sample rate */
+		.clr_mask = 1 << 5 | 1 << 6 | 1 << 7,
+		.default_resolution = 12,
+		.default_sample_time = 28,
+		.num_sample_times = 4,
+		.sample_times = (unsigned int []){ 28, 55, 110, 220 },
 	}
 };
 
@@ -662,6 +670,7 @@ static const struct i2c_device_id lm75_ids[] = {
 	{ "tmp75", tmp75, },
 	{ "tmp75b", tmp75b, },
 	{ "tmp75c", tmp75c, },
+	{ "tmp1075", tmp1075, },
 	{ /* LIST END */ }
 };
 MODULE_DEVICE_TABLE(i2c, lm75_ids);
@@ -771,6 +780,10 @@ static const struct of_device_id __maybe_unused lm75_of_match[] = {
 		.compatible = "ti,tmp75c",
 		.data = (void *)tmp75c
 	},
+	{
+		.compatible = "ti,tmp1075",
+		.data = (void *)tmp1075
+	},
 	{ },
 };
 MODULE_DEVICE_TABLE(of, lm75_of_match);
-- 
GitLab


From 42c7fd53aeff8241d64cdcfaffe06bb955852112 Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Thu, 29 Apr 2021 14:11:50 +0200
Subject: [PATCH 3068/3804] dt-bindings: hwmon: Add Texas Instruments TMP1075

Document the DT compatible for TI TMP1075 which
is a LM75 compatible sensor.

Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Link: https://lore.kernel.org/r/20210429121150.106804-2-robert.marko@sartura.hr
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/hwmon/lm75.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/hwmon/lm75.yaml b/Documentation/devicetree/bindings/hwmon/lm75.yaml
index 96eed5cc7841b..72980d083c210 100644
--- a/Documentation/devicetree/bindings/hwmon/lm75.yaml
+++ b/Documentation/devicetree/bindings/hwmon/lm75.yaml
@@ -30,6 +30,7 @@ properties:
       - st,stds75
       - st,stlm75
       - microchip,tcn75
+      - ti,tmp1075
       - ti,tmp100
       - ti,tmp101
       - ti,tmp105
-- 
GitLab


From f0635523c8b57aea6b1b75e99ea9c86ccc2a8b45 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Sun, 16 May 2021 12:18:18 +0200
Subject: [PATCH 3069/3804] docs: hwmon: ir36021.rst: replace some characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+2010 ('‐'): HYPHEN
	  as ASCII HYPHEN is preferred over U+2010

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/ba8b5122ac9d4918fd966d0eb0a5ca9d89044b04.1621159997.git.mchehab+huawei@kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/ir36021.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/hwmon/ir36021.rst b/Documentation/hwmon/ir36021.rst
index ca3436b04e202..1faa85c39f1b8 100644
--- a/Documentation/hwmon/ir36021.rst
+++ b/Documentation/hwmon/ir36021.rst
@@ -19,7 +19,7 @@ Authors:
 Description
 -----------
 
-The IR36021 is a dual‐loop digital multi‐phase buck controller designed for
+The IR36021 is a dual-loop digital multi-phase buck controller designed for
 point of load applications.
 
 Usage Notes
-- 
GitLab


From b3ea2fe7e2814d17426674eff3d440c4e9c3a107 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Mon, 10 May 2021 12:26:17 +0200
Subject: [PATCH 3070/3804] docs: hwmon: avoid using UTF-8 chars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While UTF-8 characters can be used at the Linux documentation,
the best is to use them only when ASCII doesn't offer a good replacement.
So, replace the occurences of the following UTF-8 characters:

	- U+2010 ('‐'): HYPHEN
	- U+2013 ('–'): EN DASH
	- U+2019 ('’'): RIGHT SINGLE QUOTATION MARK

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/ccdd1bf45963a7748188a97c75f667b37bd43d2f.1620641727.git.mchehab+huawei@kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/ltc2992.rst  | 2 +-
 Documentation/hwmon/pm6764tr.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/hwmon/ltc2992.rst b/Documentation/hwmon/ltc2992.rst
index 46aa1aa84a1a7..a0bcd867a0f5a 100644
--- a/Documentation/hwmon/ltc2992.rst
+++ b/Documentation/hwmon/ltc2992.rst
@@ -19,7 +19,7 @@ This driver supports hardware monitoring for Linear Technology LTC2992 power mon
 LTC2992 is a rail-to-rail system monitor that measures current,
 voltage, and power of two supplies.
 
-Two ADCs simultaneously measure each supply’s current. A third ADC monitors
+Two ADCs simultaneously measure each supply's current. A third ADC monitors
 the input voltages and four auxiliary external voltages.
 
 
diff --git a/Documentation/hwmon/pm6764tr.rst b/Documentation/hwmon/pm6764tr.rst
index a1fb8fea23267..294a8ffc8bd8d 100644
--- a/Documentation/hwmon/pm6764tr.rst
+++ b/Documentation/hwmon/pm6764tr.rst
@@ -20,7 +20,7 @@ Description:
 ------------
 
 This driver supports the STMicroelectronics PM6764TR chip. The PM6764TR is a high
-performance digital controller designed to power Intel’s VR12.5 processors and memories.
+performance digital controller designed to power Intel's VR12.5 processors and memories.
 
 The device utilizes digital technology to implement all control and power management
 functions to provide maximum flexibility and performance. The NVM is embedded to store
-- 
GitLab


From ac61c8aae446b9c0fe18981fe721d4a43e283ad6 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 8 May 2021 09:44:50 -0700
Subject: [PATCH 3071/3804] hwmon: (lm70) Revert "hwmon: (lm70) Add support for
 ACPI"

This reverts commit b58bd4c6dfe709646ed9efcbba2a70643f9bc873.

None of the ACPI IDs introduced with the reverted patch is a valid ACPI
device ID. Any ACPI users of this driver are advised to use PRP0001 and
a devicetree-compatible device identification.

Fixes: b58bd4c6dfe7 ("hwmon: (lm70) Add support for ACPI")
Cc: Andrej Picej <andpicej@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm70.c | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/drivers/hwmon/lm70.c b/drivers/hwmon/lm70.c
index 40eab3349904b..6b884ea009877 100644
--- a/drivers/hwmon/lm70.c
+++ b/drivers/hwmon/lm70.c
@@ -22,10 +22,10 @@
 #include <linux/hwmon.h>
 #include <linux/mutex.h>
 #include <linux/mod_devicetable.h>
+#include <linux/of.h>
 #include <linux/property.h>
 #include <linux/spi/spi.h>
 #include <linux/slab.h>
-#include <linux/acpi.h>
 
 #define DRVNAME		"lm70"
 
@@ -148,29 +148,6 @@ static const struct of_device_id lm70_of_ids[] = {
 MODULE_DEVICE_TABLE(of, lm70_of_ids);
 #endif
 
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id lm70_acpi_ids[] = {
-	{
-		.id = "LM000070",
-		.driver_data = LM70_CHIP_LM70,
-	},
-	{
-		.id = "TMP00121",
-		.driver_data = LM70_CHIP_TMP121,
-	},
-	{
-		.id = "LM000071",
-		.driver_data = LM70_CHIP_LM71,
-	},
-	{
-		.id = "LM000074",
-		.driver_data = LM70_CHIP_LM74,
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(acpi, lm70_acpi_ids);
-#endif
-
 static int lm70_probe(struct spi_device *spi)
 {
 	struct device *hwmon_dev;
@@ -217,7 +194,6 @@ static struct spi_driver lm70_driver = {
 	.driver = {
 		.name	= "lm70",
 		.of_match_table	= of_match_ptr(lm70_of_ids),
-		.acpi_match_table = ACPI_PTR(lm70_acpi_ids),
 	},
 	.id_table = lm70_ids,
 	.probe	= lm70_probe,
-- 
GitLab


From 97387c2f06bcfd79d04a848d35517b32ee6dca7c Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sat, 8 May 2021 09:50:25 -0700
Subject: [PATCH 3072/3804] hwmon: (max31722) Remove non-standard ACPI device
 IDs

Valid Maxim Integrated ACPI device IDs would start with MXIM,
not with MAX1. On top of that, ACPI device IDs reflecting chip names
are almost always invalid.

Remove the invalid ACPI IDs.

Fixes: 04e1e70afec6 ("hwmon: (max31722) Add support for MAX31722/MAX31723 temperature sensors")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/max31722.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/hwmon/max31722.c b/drivers/hwmon/max31722.c
index 062eceb7be0db..613338cbcb170 100644
--- a/drivers/hwmon/max31722.c
+++ b/drivers/hwmon/max31722.c
@@ -6,7 +6,6 @@
  * Copyright (c) 2016, Intel Corporation.
  */
 
-#include <linux/acpi.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/kernel.h>
@@ -133,20 +132,12 @@ static const struct spi_device_id max31722_spi_id[] = {
 	{"max31723", 0},
 	{}
 };
-
-static const struct acpi_device_id __maybe_unused max31722_acpi_id[] = {
-	{"MAX31722", 0},
-	{"MAX31723", 0},
-	{}
-};
-
 MODULE_DEVICE_TABLE(spi, max31722_spi_id);
 
 static struct spi_driver max31722_driver = {
 	.driver = {
 		.name = "max31722",
 		.pm = &max31722_pm_ops,
-		.acpi_match_table = ACPI_PTR(max31722_acpi_id),
 	},
 	.probe =            max31722_probe,
 	.remove =           max31722_remove,
-- 
GitLab


From ba9c5fc395de5bb642ed973dbf34c1d0c82d185d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 10 May 2021 17:13:31 +0300
Subject: [PATCH 3073/3804] hwmon: (lm70) Use SPI_MODE_X_MASK

Use SPI_MODE_X_MASK instead of open coded variant.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20210510141331.56736-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm70.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/lm70.c b/drivers/hwmon/lm70.c
index 6b884ea009877..d2a60de5b8de9 100644
--- a/drivers/hwmon/lm70.c
+++ b/drivers/hwmon/lm70.c
@@ -161,7 +161,7 @@ static int lm70_probe(struct spi_device *spi)
 
 
 	/* signaling is SPI_MODE_0 */
-	if (spi->mode & (SPI_CPOL | SPI_CPHA))
+	if ((spi->mode & SPI_MODE_X_MASK) != SPI_MODE_0)
 		return -EINVAL;
 
 	/* NOTE:  we assume 8-bit words, and convert to 16 bits manually */
-- 
GitLab


From 2be5f0d7532566d41194fe99d35d022ad399460d Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sat, 8 May 2021 15:14:54 +0200
Subject: [PATCH 3074/3804] hwmon: (sch56xx) Use devres functions for watchdog

Use devm_kzalloc()/devm_watchdog_register() for
watchdog registration since it allows us to remove
the sch56xx_watchdog_data struct from the drivers
own data structs.
Remove sch56xx_watchdog_unregister since devres
takes care of that now.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20210508131457.12780-2-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
[groeck: Dropped unnecessary return; at end of void function]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sch5627.c        | 18 +++---------------
 drivers/hwmon/sch5636.c        |  9 ++-------
 drivers/hwmon/sch56xx-common.c | 28 +++++++++-------------------
 drivers/hwmon/sch56xx-common.h |  4 ++--
 4 files changed, 16 insertions(+), 43 deletions(-)

diff --git a/drivers/hwmon/sch5627.c b/drivers/hwmon/sch5627.c
index 4324a5dbc9684..8f1b569c69e7b 100644
--- a/drivers/hwmon/sch5627.c
+++ b/drivers/hwmon/sch5627.c
@@ -64,7 +64,6 @@ static const char * const SCH5627_IN_LABELS[SCH5627_NO_IN] = {
 
 struct sch5627_data {
 	unsigned short addr;
-	struct sch56xx_watchdog_data *watchdog;
 	u8 control;
 	u8 temp_max[SCH5627_NO_TEMPS];
 	u8 temp_crit[SCH5627_NO_TEMPS];
@@ -357,16 +356,6 @@ static const struct hwmon_chip_info sch5627_chip_info = {
 	.info = sch5627_info,
 };
 
-static int sch5627_remove(struct platform_device *pdev)
-{
-	struct sch5627_data *data = platform_get_drvdata(pdev);
-
-	if (data->watchdog)
-		sch56xx_watchdog_unregister(data->watchdog);
-
-	return 0;
-}
-
 static int sch5627_probe(struct platform_device *pdev)
 {
 	struct sch5627_data *data;
@@ -460,9 +449,9 @@ static int sch5627_probe(struct platform_device *pdev)
 		return PTR_ERR(hwmon_dev);
 
 	/* Note failing to register the watchdog is not a fatal error */
-	data->watchdog = sch56xx_watchdog_register(&pdev->dev, data->addr,
-			(build_code << 24) | (build_id << 8) | hwmon_rev,
-			&data->update_lock, 1);
+	sch56xx_watchdog_register(&pdev->dev, data->addr,
+				  (build_code << 24) | (build_id << 8) | hwmon_rev,
+				  &data->update_lock, 1);
 
 	return 0;
 }
@@ -472,7 +461,6 @@ static struct platform_driver sch5627_driver = {
 		.name	= DRVNAME,
 	},
 	.probe		= sch5627_probe,
-	.remove		= sch5627_remove,
 };
 
 module_platform_driver(sch5627_driver);
diff --git a/drivers/hwmon/sch5636.c b/drivers/hwmon/sch5636.c
index 5683a38740f63..a5cd4de36575e 100644
--- a/drivers/hwmon/sch5636.c
+++ b/drivers/hwmon/sch5636.c
@@ -54,7 +54,6 @@ static const u16 SCH5636_REG_FAN_VAL[SCH5636_NO_FANS] = {
 struct sch5636_data {
 	unsigned short addr;
 	struct device *hwmon_dev;
-	struct sch56xx_watchdog_data *watchdog;
 
 	struct mutex update_lock;
 	char valid;			/* !=0 if following fields are valid */
@@ -372,9 +371,6 @@ static int sch5636_remove(struct platform_device *pdev)
 	struct sch5636_data *data = platform_get_drvdata(pdev);
 	int i;
 
-	if (data->watchdog)
-		sch56xx_watchdog_unregister(data->watchdog);
-
 	if (data->hwmon_dev)
 		hwmon_device_unregister(data->hwmon_dev);
 
@@ -495,9 +491,8 @@ static int sch5636_probe(struct platform_device *pdev)
 	}
 
 	/* Note failing to register the watchdog is not a fatal error */
-	data->watchdog = sch56xx_watchdog_register(&pdev->dev, data->addr,
-					(revision[0] << 8) | revision[1],
-					&data->update_lock, 0);
+	sch56xx_watchdog_register(&pdev->dev, data->addr, (revision[0] << 8) | revision[1],
+				  &data->update_lock, 0);
 
 	return 0;
 
diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c
index 6c84780e358e8..b469d16ec1759 100644
--- a/drivers/hwmon/sch56xx-common.c
+++ b/drivers/hwmon/sch56xx-common.c
@@ -378,8 +378,8 @@ static const struct watchdog_ops watchdog_ops = {
 	.set_timeout	= watchdog_set_timeout,
 };
 
-struct sch56xx_watchdog_data *sch56xx_watchdog_register(struct device *parent,
-	u16 addr, u32 revision, struct mutex *io_lock, int check_enabled)
+void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision,
+			       struct mutex *io_lock, int check_enabled)
 {
 	struct sch56xx_watchdog_data *data;
 	int err, control, output_enable;
@@ -393,17 +393,17 @@ struct sch56xx_watchdog_data *sch56xx_watchdog_register(struct device *parent,
 	mutex_unlock(io_lock);
 
 	if (control < 0)
-		return NULL;
+		return;
 	if (output_enable < 0)
-		return NULL;
+		return;
 	if (check_enabled && !(output_enable & SCH56XX_WDOG_OUTPUT_ENABLE)) {
 		pr_warn("Watchdog not enabled by BIOS, not registering\n");
-		return NULL;
+		return;
 	}
 
-	data = kzalloc(sizeof(struct sch56xx_watchdog_data), GFP_KERNEL);
+	data = devm_kzalloc(parent, sizeof(struct sch56xx_watchdog_data), GFP_KERNEL);
 	if (!data)
-		return NULL;
+		return;
 
 	data->addr = addr;
 	data->io_lock = io_lock;
@@ -438,24 +438,14 @@ struct sch56xx_watchdog_data *sch56xx_watchdog_register(struct device *parent,
 	data->watchdog_output_enable = output_enable;
 
 	watchdog_set_drvdata(&data->wddev, data);
-	err = watchdog_register_device(&data->wddev);
+	err = devm_watchdog_register_device(parent, &data->wddev);
 	if (err) {
 		pr_err("Registering watchdog chardev: %d\n", err);
-		kfree(data);
-		return NULL;
+		devm_kfree(parent, data);
 	}
-
-	return data;
 }
 EXPORT_SYMBOL(sch56xx_watchdog_register);
 
-void sch56xx_watchdog_unregister(struct sch56xx_watchdog_data *data)
-{
-	watchdog_unregister_device(&data->wddev);
-	kfree(data);
-}
-EXPORT_SYMBOL(sch56xx_watchdog_unregister);
-
 /*
  * platform dev find, add and remove functions
  */
diff --git a/drivers/hwmon/sch56xx-common.h b/drivers/hwmon/sch56xx-common.h
index 75eb73617cf27..e907d9da0dd56 100644
--- a/drivers/hwmon/sch56xx-common.h
+++ b/drivers/hwmon/sch56xx-common.h
@@ -14,6 +14,6 @@ int sch56xx_read_virtual_reg16(u16 addr, u16 reg);
 int sch56xx_read_virtual_reg12(u16 addr, u16 msb_reg, u16 lsn_reg,
 			       int high_nibble);
 
-struct sch56xx_watchdog_data *sch56xx_watchdog_register(struct device *parent,
-	u16 addr, u32 revision, struct mutex *io_lock, int check_enabled);
+void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision,
+			       struct mutex *io_lock, int check_enabled);
 void sch56xx_watchdog_unregister(struct sch56xx_watchdog_data *data);
-- 
GitLab


From 6df5cba5c9e7bf98c114f15835d20dfd6c7898cf Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sat, 8 May 2021 15:14:55 +0200
Subject: [PATCH 3075/3804] hwmon: (sch56xx-common) Use strscpy

strlcpy is considered deprecated.
Replace it with strscpy.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20210508131457.12780-3-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sch56xx-common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c
index b469d16ec1759..da915ca2b3b53 100644
--- a/drivers/hwmon/sch56xx-common.c
+++ b/drivers/hwmon/sch56xx-common.c
@@ -408,8 +408,7 @@ void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision,
 	data->addr = addr;
 	data->io_lock = io_lock;
 
-	strlcpy(data->wdinfo.identity, "sch56xx watchdog",
-		sizeof(data->wdinfo.identity));
+	strscpy(data->wdinfo.identity, "sch56xx watchdog", sizeof(data->wdinfo.identity));
 	data->wdinfo.firmware_version = revision;
 	data->wdinfo.options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT;
 	if (!nowayout)
-- 
GitLab


From 989c9c675bbbf3264b42b05e8924a9930b500e6c Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sat, 8 May 2021 15:14:56 +0200
Subject: [PATCH 3076/3804] hwmon: (sch56xx-common) Use helper function

Use watchdog_set_nowayout() to process param
setting and change param type to bool.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20210508131457.12780-4-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sch56xx-common.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c
index da915ca2b3b53..04739bbff8f93 100644
--- a/drivers/hwmon/sch56xx-common.c
+++ b/drivers/hwmon/sch56xx-common.c
@@ -20,8 +20,8 @@
 #include "sch56xx-common.h"
 
 /* Insmod parameters */
-static int nowayout = WATCHDOG_NOWAYOUT;
-module_param(nowayout, int, 0);
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 	__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
@@ -420,8 +420,7 @@ void sch56xx_watchdog_register(struct device *parent, u16 addr, u32 revision,
 	data->wddev.timeout = 60;
 	data->wddev.min_timeout = 1;
 	data->wddev.max_timeout = 255 * 60;
-	if (nowayout)
-		set_bit(WDOG_NO_WAY_OUT, &data->wddev.status);
+	watchdog_set_nowayout(&data->wddev, nowayout);
 	if (output_enable & SCH56XX_WDOG_OUTPUT_ENABLE)
 		set_bit(WDOG_ACTIVE, &data->wddev.status);
 
-- 
GitLab


From 5c1c78e0a0a2f37de0b05851878af8e02eeae02f Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Sat, 8 May 2021 15:14:57 +0200
Subject: [PATCH 3077/3804] hwmon: (sch56xx-common) Simplify sch56xx_device_add

Use platform_device_register_simple() instead of
manually calling platform_device_alloc()/platform_device_add().

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Link: https://lore.kernel.org/r/20210508131457.12780-5-W_Armin@gmx.de
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sch56xx-common.c | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c
index 04739bbff8f93..40cdadad35e52 100644
--- a/drivers/hwmon/sch56xx-common.c
+++ b/drivers/hwmon/sch56xx-common.c
@@ -504,37 +504,18 @@ static int __init sch56xx_device_add(int address, const char *name)
 	struct resource res = {
 		.start	= address,
 		.end	= address + REGION_LENGTH - 1,
+		.name	= name,
 		.flags	= IORESOURCE_IO,
 	};
 	int err;
 
-	sch56xx_pdev = platform_device_alloc(name, address);
-	if (!sch56xx_pdev)
-		return -ENOMEM;
-
-	res.name = sch56xx_pdev->name;
 	err = acpi_check_resource_conflict(&res);
 	if (err)
-		goto exit_device_put;
-
-	err = platform_device_add_resources(sch56xx_pdev, &res, 1);
-	if (err) {
-		pr_err("Device resource addition failed\n");
-		goto exit_device_put;
-	}
-
-	err = platform_device_add(sch56xx_pdev);
-	if (err) {
-		pr_err("Device addition failed\n");
-		goto exit_device_put;
-	}
-
-	return 0;
+		return err;
 
-exit_device_put:
-	platform_device_put(sch56xx_pdev);
+	sch56xx_pdev = platform_device_register_simple(name, -1, &res, 1);
 
-	return err;
+	return PTR_ERR_OR_ZERO(sch56xx_pdev);
 }
 
 static int __init sch56xx_init(void)
-- 
GitLab


From 86c908d90fb17273f5f6d15539ad3d7bf134d892 Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Fri, 7 May 2021 21:40:21 +0200
Subject: [PATCH 3078/3804] hwmon: (pmbus) Add new flag
 PMBUS_READ_STATUS_AFTER_FAILED_CHECK

Some PMBus chips end up in an undefined state when trying to read an
unsupported register. For such chips, it is necessary to reset the
chip pmbus controller to a known state after a failed register check.
This can be done by reading a known register. By setting this flag the
driver will try to read the STATUS register after each failed
register check. This read may fail, but it will put the chip into a
known state.

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
Link: https://lore.kernel.org/r/20210507194023.61138-2-erik.rosen@metormote.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c |  2 ++
 include/linux/pmbus.h            | 13 +++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index bbd7451781472..1f7fa5337974c 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -523,6 +523,8 @@ static bool pmbus_check_register(struct i2c_client *client,
 	rv = func(client, page, reg);
 	if (rv >= 0 && !(data->flags & PMBUS_SKIP_STATUS_CHECK))
 		rv = pmbus_check_status_cml(client);
+	if (rv < 0 && (data->flags & PMBUS_READ_STATUS_AFTER_FAILED_CHECK))
+		data->read_status(client, -1);
 	pmbus_clear_fault_page(client, -1);
 	return rv >= 0;
 }
diff --git a/include/linux/pmbus.h b/include/linux/pmbus.h
index 12cbbf3059696..edd7c84fef658 100644
--- a/include/linux/pmbus.h
+++ b/include/linux/pmbus.h
@@ -43,6 +43,19 @@
  */
 #define PMBUS_NO_CAPABILITY			BIT(2)
 
+/*
+ * PMBUS_READ_STATUS_AFTER_FAILED_CHECK
+ *
+ * Some PMBus chips end up in an undefined state when trying to read an
+ * unsupported register. For such chips, it is necessary to reset the
+ * chip pmbus controller to a known state after a failed register check.
+ * This can be done by reading a known register. By setting this flag the
+ * driver will try to read the STATUS register after each failed
+ * register check. This read may fail, but it will put the chip in a
+ * known state.
+ */
+#define PMBUS_READ_STATUS_AFTER_FAILED_CHECK	BIT(3)
+
 struct pmbus_platform_data {
 	u32 flags;		/* Device specific flags */
 
-- 
GitLab


From b976760dc4efd1de7965bf020195a22fce4f456c Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Fri, 7 May 2021 21:40:22 +0200
Subject: [PATCH 3079/3804] hwmon: (pmbus) Add documentation for new flags

Add documentation for the new pmbus flags PMBUS_WRITE_PROTECTED,
PMBUS_NO_CAPABILITY and PMBUS_READ_STATUS_AFTER_FAILED_CHECK

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
Link: https://lore.kernel.org/r/20210507194023.61138-3-erik.rosen@metormote.com
[groeck: Added newline at end of file]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/pmbus-core.rst | 42 +++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/Documentation/hwmon/pmbus-core.rst b/Documentation/hwmon/pmbus-core.rst
index 73e23ab42cc3a..e7e0c9ef10bec 100644
--- a/Documentation/hwmon/pmbus-core.rst
+++ b/Documentation/hwmon/pmbus-core.rst
@@ -289,12 +289,22 @@ PMBus driver platform data
 ==========================
 
 PMBus platform data is defined in include/linux/pmbus.h. Platform data
-currently only provides a flag field with a single bit used::
+currently provides a flags field with four bits used::
 
-	#define PMBUS_SKIP_STATUS_CHECK (1 << 0)
+	#define PMBUS_SKIP_STATUS_CHECK			BIT(0)
+
+	#define PMBUS_WRITE_PROTECTED			BIT(1)
+
+	#define PMBUS_NO_CAPABILITY			BIT(2)
+
+	#define PMBUS_READ_STATUS_AFTER_FAILED_CHECK	BIT(3)
 
 	struct pmbus_platform_data {
 		u32 flags;              /* Device specific flags */
+
+		/* regulator support */
+		int num_regulators;
+		struct regulator_init_data *reg_init_data;
 	};
 
 
@@ -302,8 +312,9 @@ Flags
 -----
 
 PMBUS_SKIP_STATUS_CHECK
-	During register detection, skip checking the status register for
-	communication or command errors.
+
+During register detection, skip checking the status register for
+communication or command errors.
 
 Some PMBus chips respond with valid data when trying to read an unsupported
 register. For such chips, checking the status register is mandatory when
@@ -315,3 +326,26 @@ status register must be disabled.
 Some i2c controllers do not support single-byte commands (write commands with
 no data, i2c_smbus_write_byte()). With such controllers, clearing the status
 register is impossible, and the PMBUS_SKIP_STATUS_CHECK flag must be set.
+
+PMBUS_WRITE_PROTECTED
+
+Set if the chip is write protected and write protection is not determined
+by the standard WRITE_PROTECT command.
+
+PMBUS_NO_CAPABILITY
+
+Some PMBus chips don't respond with valid data when reading the CAPABILITY
+register. For such chips, this flag should be set so that the PMBus core
+driver doesn't use CAPABILITY to determine it's behavior.
+
+PMBUS_READ_STATUS_AFTER_FAILED_CHECK
+
+Read the STATUS register after each failed register check.
+
+Some PMBus chips end up in an undefined state when trying to read an
+unsupported register. For such chips, it is necessary to reset the
+chip pmbus controller to a known state after a failed register check.
+This can be done by reading a known register. By setting this flag the
+driver will try to read the STATUS register after each failed
+register check. This read may fail, but it will put the chip into a
+known state.
-- 
GitLab


From ea541c185c358f870ccb0d5fce6f726c5146daae Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Fri, 7 May 2021 21:40:23 +0200
Subject: [PATCH 3080/3804] hwmon: (pmbus) Add support for additional Flex BMR
 converters to pmbus

Add support for Flex BMR310, BMR456, BMR457, BMR458, BMR480, BMR490,
BMR491 and BMR492 to the pmbus driver

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
Link: https://lore.kernel.org/r/20210507194023.61138-4-erik.rosen@metormote.com
[groeck: Fixed minor whitespace error]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/pmbus.rst | 11 +++++++----
 drivers/hwmon/pmbus/Kconfig   |  7 ++++---
 drivers/hwmon/pmbus/pmbus.c   | 19 +++++++++++++++++--
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/Documentation/hwmon/pmbus.rst b/Documentation/hwmon/pmbus.rst
index c44f14115413d..7ecfec6ca2dbc 100644
--- a/Documentation/hwmon/pmbus.rst
+++ b/Documentation/hwmon/pmbus.rst
@@ -3,15 +3,18 @@ Kernel driver pmbus
 
 Supported chips:
 
-  * Ericsson BMR453, BMR454
+  * Flex BMR310, BMR453, BMR454, BMR456, BMR457, BMR458, BMR480,
+    BMR490, BMR491, BMR492
 
-    Prefixes: 'bmr453', 'bmr454'
+    Prefixes: 'bmr310', 'bmr453', 'bmr454', 'bmr456', 'bmr457', 'bmr458', 'bmr480',
+    'bmr490', 'bmr491', 'bmr492'
 
     Addresses scanned: -
 
-    Datasheet:
+    Datasheets:
+
+	https://flexpowermodules.com/products
 
- http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146395
 
   * ON Semiconductor ADP4000, NCP4200, NCP4208
 
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 37a5c39784fa9..6275dcf78675e 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -19,9 +19,10 @@ config SENSORS_PMBUS
 	default y
 	help
 	  If you say yes here you get hardware monitoring support for generic
-	  PMBus devices, including but not limited to ADP4000, BMR453, BMR454,
-	  MAX20796, MDT040, NCP4200, NCP4208, PDT003, PDT006, PDT012, TPS40400,
-	  TPS544B20, TPS544B25, TPS544C20, TPS544C25, and UDT020.
+	  PMBus devices, including but not limited to ADP4000, BMR310, BMR453,
+	  BMR454, BMR456, BMR457, BMR458, BMR480, BMR490, BMR491, BMR492,
+	  MAX20796, MDT040, NCP4200, NCP4208, PDT003, PDT006, PDT012,
+	  TPS40400, TPS544B20, TPS544B25, TPS544C20, TPS544C25, and UDT020.
 
 	  This driver can also be built as a module. If so, the module will
 	  be called pmbus.
diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c
index 618c377664c41..d0d386990af5e 100644
--- a/drivers/hwmon/pmbus/pmbus.c
+++ b/drivers/hwmon/pmbus/pmbus.c
@@ -173,13 +173,13 @@ static int pmbus_probe(struct i2c_client *client)
 		return -ENOMEM;
 
 	device_info = (struct pmbus_device_info *)i2c_match_id(pmbus_id, client)->driver_data;
-	if (device_info->flags & PMBUS_SKIP_STATUS_CHECK) {
+	if (device_info->flags) {
 		pdata = devm_kzalloc(dev, sizeof(struct pmbus_platform_data),
 				     GFP_KERNEL);
 		if (!pdata)
 			return -ENOMEM;
 
-		pdata->flags = PMBUS_SKIP_STATUS_CHECK;
+		pdata->flags = device_info->flags;
 	}
 
 	info->pages = device_info->pages;
@@ -193,22 +193,37 @@ static const struct pmbus_device_info pmbus_info_one = {
 	.pages = 1,
 	.flags = 0
 };
+
 static const struct pmbus_device_info pmbus_info_zero = {
 	.pages = 0,
 	.flags = 0
 };
+
 static const struct pmbus_device_info pmbus_info_one_skip = {
 	.pages = 1,
 	.flags = PMBUS_SKIP_STATUS_CHECK
 };
 
+static const struct pmbus_device_info pmbus_info_one_status = {
+	.pages = 1,
+	.flags = PMBUS_READ_STATUS_AFTER_FAILED_CHECK
+};
+
 /*
  * Use driver_data to set the number of pages supported by the chip.
  */
 static const struct i2c_device_id pmbus_id[] = {
 	{"adp4000", (kernel_ulong_t)&pmbus_info_one},
+	{"bmr310", (kernel_ulong_t)&pmbus_info_one_status},
 	{"bmr453", (kernel_ulong_t)&pmbus_info_one},
 	{"bmr454", (kernel_ulong_t)&pmbus_info_one},
+	{"bmr456", (kernel_ulong_t)&pmbus_info_one},
+	{"bmr457", (kernel_ulong_t)&pmbus_info_one},
+	{"bmr458", (kernel_ulong_t)&pmbus_info_one_status},
+	{"bmr480", (kernel_ulong_t)&pmbus_info_one_status},
+	{"bmr490", (kernel_ulong_t)&pmbus_info_one_status},
+	{"bmr491", (kernel_ulong_t)&pmbus_info_one_status},
+	{"bmr492", (kernel_ulong_t)&pmbus_info_one},
 	{"dps460", (kernel_ulong_t)&pmbus_info_one_skip},
 	{"dps650ab", (kernel_ulong_t)&pmbus_info_one_skip},
 	{"dps800", (kernel_ulong_t)&pmbus_info_one_skip},
-- 
GitLab


From 4943c6039d4ac1ae8535786da7c2a28c376c589c Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Tue, 11 May 2021 17:18:43 +0800
Subject: [PATCH 3081/3804] hwmon: (bt1-pvt) Remove redundant error printing in
 pvt_request_regs()

When devm_ioremap_resource() fails, a clear enough error message will be
printed by its subfunction __devm_ioremap_resource(). The error
information contains the device name, failure cause, and possibly resource
information.

Therefore, remove the error printing here to simplify code and reduce the
binary size.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Link: https://lore.kernel.org/r/20210511091843.4561-1-thunder.leizhen@huawei.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/bt1-pvt.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/hwmon/bt1-pvt.c b/drivers/hwmon/bt1-pvt.c
index 3e1d56585b91a..74ce5211eb752 100644
--- a/drivers/hwmon/bt1-pvt.c
+++ b/drivers/hwmon/bt1-pvt.c
@@ -924,10 +924,8 @@ static int pvt_request_regs(struct pvt_hwmon *pvt)
 	}
 
 	pvt->regs = devm_ioremap_resource(pvt->dev, res);
-	if (IS_ERR(pvt->regs)) {
-		dev_err(pvt->dev, "Couldn't map PVT registers\n");
+	if (IS_ERR(pvt->regs))
 		return PTR_ERR(pvt->regs);
-	}
 
 	return 0;
 }
-- 
GitLab


From 0c1acde1d3d0032814be89c838483471582bc32e Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Tue, 11 May 2021 08:56:17 +0300
Subject: [PATCH 3082/3804] hwmon: (pmbus) Increase maximum number of phases
 per page

Increase maximum number of phases from 8 to 10 to support multi-phase
devices allowing up to 10 phases.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/20210511055619.118104-2-vadimp@nvidia.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h
index 3968924f85330..e0aa8aa46d8c4 100644
--- a/drivers/hwmon/pmbus/pmbus.h
+++ b/drivers/hwmon/pmbus/pmbus.h
@@ -375,7 +375,7 @@ enum pmbus_sensor_classes {
 };
 
 #define PMBUS_PAGES	32	/* Per PMBus specification */
-#define PMBUS_PHASES	8	/* Maximum number of phases per page */
+#define PMBUS_PHASES	10	/* Maximum number of phases per page */
 
 /* Functionality bit mask */
 #define PMBUS_HAVE_VIN		BIT(0)
-- 
GitLab


From e4db7719d037b820024a213f74703ae1abf5b00c Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Tue, 11 May 2021 08:56:18 +0300
Subject: [PATCH 3083/3804] hwmon: (pmbus) Add support for MPS Multi-phase
 mp2888 controller

Add support for mp2888 device from Monolithic Power Systems, Inc. (MPS)
vendor. This is a digital, multi-phase, pulse-width modulation
controller.

This device supports:
- One power rail.
- Programmable Multi-Phase up to 10 Phases.
- PWM-VID Interface
- One pages 0 for telemetry.
- Programmable pins for PMBus Address.
- Built-In EEPROM to Store Custom Configurations.
- Can configured VOUT readout in direct or VID format and allows
  setting of different formats on rails 1 and 2. For VID the following
  protocols are available: VR13 mode with 5-mV DAC; VR13 mode with
  10-mV DAC, IMVP9 mode with 5-mV DAC.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20210511055619.118104-3-vadimp@nvidia.com
[groeck: Add MODULE_IMPORT_NS]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/mp2888.rst | 113 +++++++++
 drivers/hwmon/pmbus/Kconfig    |   9 +
 drivers/hwmon/pmbus/Makefile   |   1 +
 drivers/hwmon/pmbus/mp2888.c   | 408 +++++++++++++++++++++++++++++++++
 4 files changed, 531 insertions(+)
 create mode 100644 Documentation/hwmon/mp2888.rst
 create mode 100644 drivers/hwmon/pmbus/mp2888.c

diff --git a/Documentation/hwmon/mp2888.rst b/Documentation/hwmon/mp2888.rst
new file mode 100644
index 0000000000000..5e578fd7b1476
--- /dev/null
+++ b/Documentation/hwmon/mp2888.rst
@@ -0,0 +1,113 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver mp2888
+====================
+
+Supported chips:
+
+  * MPS MP12254
+
+    Prefix: 'mp2888'
+
+Author:
+
+	Vadim Pasternak <vadimp@nvidia.com>
+
+Description
+-----------
+
+This driver implements support for Monolithic Power Systems, Inc. (MPS)
+vendor dual-loop, digital, multi-phase controller MP2888.
+
+This device: supports:
+
+- One power rail.
+- Programmable Multi-Phase up to 10 Phases.
+- PWM-VID Interface
+- One pages 0 for telemetry.
+- Programmable pins for PMBus Address.
+- Built-In EEPROM to Store Custom Configurations.
+
+Device complaint with:
+
+- PMBus rev 1.3 interface.
+
+Device supports direct format for reading output current, output voltage,
+input and output power and temperature.
+Device supports linear format for reading input voltage and input power.
+
+The driver provides the next attributes for the current:
+
+- for current out input and maximum alarm;
+- for phase current: input and label.
+
+The driver exports the following attributes via the 'sysfs' files, where:
+
+- 'n' is number of configured phases (from 1 to 10);
+- index 1 for "iout";
+- indexes 2 ... 1 + n for phases.
+
+**curr[1-{1+n}]_input**
+
+**curr[1-{1+n}]_label**
+
+**curr1_max**
+
+**curr1_max_alarm**
+
+The driver provides the next attributes for the voltage:
+
+- for voltage in: input, low and high critical thresholds, low and high
+  critical alarms;
+- for voltage out: input and high alarm;
+
+The driver exports the following attributes via the 'sysfs' files, where
+
+**in1_crit**
+
+**in1_crit_alarm**
+
+**in1_input**
+
+**in1_label**
+
+**in1_min**
+
+**in1_min_alarm**
+
+**in2_alarm**
+
+**in2_input**
+
+**in2_label**
+
+The driver provides the next attributes for the power:
+
+- for power in alarm and input.
+- for power out: cap, cap alarm an input.
+
+The driver exports the following attributes via the 'sysfs' files, where
+- indexes 1 for "pin";
+- indexes 2 for "pout";
+
+**power1_alarm**
+
+**power1_input**
+
+**power1_label**
+
+**power2_input**
+
+**power2_label**
+
+**power2_max**
+
+**power2_max_alarm**
+
+The driver provides the next attributes for the temperature:
+
+**temp1_input**
+
+**temp1_max**
+
+**temp1_max_alarm**
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 6275dcf78675e..52d8cd63603e8 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -249,6 +249,15 @@ config SENSORS_MAX8688
 	  This driver can also be built as a module. If so, the module will
 	  be called max8688.
 
+config SENSORS_MP2888
+	tristate "MPS MP2888"
+	help
+	  If you say yes here you get hardware monitoring support for MPS
+	  MP2888 Digital, Multi-Phase, Pulse-Width Modulation Controller.
+
+	  This driver can also be built as a module. If so, the module will
+	  be called mp2888.
+
 config SENSORS_MP2975
 	tristate "MPS MP2975"
 	help
diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile
index f8dcc27cd56a8..35d293bb44bf9 100644
--- a/drivers/hwmon/pmbus/Makefile
+++ b/drivers/hwmon/pmbus/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_SENSORS_MAX20751)	+= max20751.o
 obj-$(CONFIG_SENSORS_MAX31785)	+= max31785.o
 obj-$(CONFIG_SENSORS_MAX34440)	+= max34440.o
 obj-$(CONFIG_SENSORS_MAX8688)	+= max8688.o
+obj-$(CONFIG_SENSORS_MP2888)	+= mp2888.o
 obj-$(CONFIG_SENSORS_MP2975)	+= mp2975.o
 obj-$(CONFIG_SENSORS_PM6764TR)	+= pm6764tr.o
 obj-$(CONFIG_SENSORS_PXE1610)	+= pxe1610.o
diff --git a/drivers/hwmon/pmbus/mp2888.c b/drivers/hwmon/pmbus/mp2888.c
new file mode 100644
index 0000000000000..8ecd4adfef40e
--- /dev/null
+++ b/drivers/hwmon/pmbus/mp2888.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Hardware monitoring driver for MPS Multi-phase Digital VR Controllers
+ *
+ * Copyright (C) 2020 Nvidia Technologies Ltd.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include "pmbus.h"
+
+/* Vendor specific registers. */
+#define MP2888_MFR_SYS_CONFIG	0x44
+#define MP2888_MFR_READ_CS1_2	0x73
+#define MP2888_MFR_READ_CS3_4	0x74
+#define MP2888_MFR_READ_CS5_6	0x75
+#define MP2888_MFR_READ_CS7_8	0x76
+#define MP2888_MFR_READ_CS9_10	0x77
+#define MP2888_MFR_VR_CONFIG1	0xe1
+
+#define MP2888_TOTAL_CURRENT_RESOLUTION	BIT(3)
+#define MP2888_PHASE_CURRENT_RESOLUTION	BIT(4)
+#define MP2888_DRMOS_KCS		GENMASK(2, 0)
+#define MP2888_TEMP_UNIT		10
+#define MP2888_MAX_PHASE		10
+
+struct mp2888_data {
+	struct pmbus_driver_info info;
+	int total_curr_resolution;
+	int phase_curr_resolution;
+	int curr_sense_gain;
+};
+
+#define to_mp2888_data(x)  container_of(x, struct mp2888_data, info)
+
+static int mp2888_read_byte_data(struct i2c_client *client, int page, int reg)
+{
+	switch (reg) {
+	case PMBUS_VOUT_MODE:
+		/* Enforce VOUT direct format. */
+		return PB_VOUT_MODE_DIRECT;
+	default:
+		return -ENODATA;
+	}
+}
+
+static int
+mp2888_current_sense_gain_and_resolution_get(struct i2c_client *client, struct mp2888_data *data)
+{
+	int ret;
+
+	/*
+	 * Obtain DrMOS current sense gain of power stage from the register
+	 * , bits 0-2. The value is selected as below:
+	 * 00b - 5µA/A, 01b - 8.5µA/A, 10b - 9.7µA/A, 11b - 10µA/A. Other
+	 * values are reserved.
+	 */
+	ret = i2c_smbus_read_word_data(client, MP2888_MFR_SYS_CONFIG);
+	if (ret < 0)
+		return ret;
+
+	switch (ret & MP2888_DRMOS_KCS) {
+	case 0:
+		data->curr_sense_gain = 85;
+		break;
+	case 1:
+		data->curr_sense_gain = 97;
+		break;
+	case 2:
+		data->curr_sense_gain = 100;
+		break;
+	case 3:
+		data->curr_sense_gain = 50;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * Obtain resolution selector for total and phase current report and protection.
+	 * 0: original resolution; 1: half resolution (in such case phase current value should
+	 * be doubled.
+	 */
+	data->total_curr_resolution = (ret & MP2888_TOTAL_CURRENT_RESOLUTION) >> 3;
+	data->phase_curr_resolution = (ret & MP2888_PHASE_CURRENT_RESOLUTION) >> 4;
+
+	return 0;
+}
+
+static int
+mp2888_read_phase(struct i2c_client *client, struct mp2888_data *data, int page, int phase, u8 reg)
+{
+	int ret;
+
+	ret = pmbus_read_word_data(client, page, phase, reg);
+	if (ret < 0)
+		return ret;
+
+	if (!((phase + 1) % 2))
+		ret >>= 8;
+	ret &= 0xff;
+
+	/*
+	 * Output value is calculated as: (READ_CSx / 80 – 1.23) / (Kcs * Rcs)
+	 * where:
+	 * - Kcs is the DrMOS current sense gain of power stage, which is obtained from the
+	 *   register MP2888_MFR_VR_CONFIG1, bits 13-12 with the following selection of DrMOS
+	 *   (data->curr_sense_gain):
+	 *   00b - 5µA/A, 01b - 8.5µA/A, 10b - 9.7µA/A, 11b - 10µA/A.
+	 * - Rcs is the internal phase current sense resistor. This parameter depends on hardware
+	 *   assembly. By default it is set to 1kΩ. In case of different assembly, user should
+	 *   scale this parameter by dividing it by Rcs.
+	 * If phase current resolution bit is set to 1, READ_CSx value should be doubled.
+	 * Note, that current phase sensing, providing by the device is not accurate. This is
+	 * because sampling of current occurrence of bit weight has a big deviation, especially for
+	 * light load.
+	 */
+	ret = DIV_ROUND_CLOSEST(ret * 100 - 9800, data->curr_sense_gain);
+	ret = (data->phase_curr_resolution) ? ret * 2 : ret;
+	/* Scale according to total current resolution. */
+	ret = (data->total_curr_resolution) ? ret * 8 : ret * 4;
+	return ret;
+}
+
+static int
+mp2888_read_phases(struct i2c_client *client, struct mp2888_data *data, int page, int phase)
+{
+	int ret;
+
+	switch (phase) {
+	case 0 ... 1:
+		ret = mp2888_read_phase(client, data, page, phase, MP2888_MFR_READ_CS1_2);
+		break;
+	case 2 ... 3:
+		ret = mp2888_read_phase(client, data, page, phase, MP2888_MFR_READ_CS3_4);
+		break;
+	case 4 ... 5:
+		ret = mp2888_read_phase(client, data, page, phase, MP2888_MFR_READ_CS5_6);
+		break;
+	case 6 ... 7:
+		ret = mp2888_read_phase(client, data, page, phase, MP2888_MFR_READ_CS7_8);
+		break;
+	case 8 ... 9:
+		ret = mp2888_read_phase(client, data, page, phase, MP2888_MFR_READ_CS9_10);
+		break;
+	default:
+		return -ENODATA;
+	}
+	return ret;
+}
+
+static int mp2888_read_word_data(struct i2c_client *client, int page, int phase, int reg)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct mp2888_data *data = to_mp2888_data(info);
+	int ret;
+
+	switch (reg) {
+	case PMBUS_READ_VIN:
+		ret = pmbus_read_word_data(client, page, phase, reg);
+		if (ret <= 0)
+			return ret;
+
+		/*
+		 * READ_VIN requires fixup to scale it to linear11 format. Register data format
+		 * provides 10 bits for mantissa and 6 bits for exponent. Bits 15:10 are set with
+		 * the fixed value 111011b.
+		 */
+		ret = (ret & GENMASK(9, 0)) | ((ret & GENMASK(31, 10)) << 1);
+		break;
+	case PMBUS_OT_WARN_LIMIT:
+		ret = pmbus_read_word_data(client, page, phase, reg);
+		if (ret < 0)
+			return ret;
+		/*
+		 * Chip reports limits in degrees C, but the actual temperature in 10th of
+		 * degrees C - scaling is needed to match both.
+		 */
+		ret *= MP2888_TEMP_UNIT;
+		break;
+	case PMBUS_READ_IOUT:
+		if (phase != 0xff)
+			return mp2888_read_phases(client, data, page, phase);
+
+		ret = pmbus_read_word_data(client, page, phase, reg);
+		if (ret < 0)
+			return ret;
+		/*
+		 * READ_IOUT register has unused bits 15:12 with fixed value 1110b. Clear these
+		 * bits and scale with total current resolution. Data is provided in direct format.
+		 */
+		ret &= GENMASK(11, 0);
+		ret = data->total_curr_resolution ? ret * 2 : ret;
+		break;
+	case PMBUS_IOUT_OC_WARN_LIMIT:
+		ret = pmbus_read_word_data(client, page, phase, reg);
+		if (ret < 0)
+			return ret;
+		ret &= GENMASK(9, 0);
+		/*
+		 * Chip reports limits with resolution 1A or 2A, if total current resolution bit is
+		 * set 1. Actual current is reported with 0.25A or respectively 0.5A resolution.
+		 * Scaling is needed to match both.
+		 */
+		ret = data->total_curr_resolution ? ret * 8 : ret * 4;
+		break;
+	case PMBUS_READ_POUT:
+	case PMBUS_READ_PIN:
+		ret = pmbus_read_word_data(client, page, phase, reg);
+		if (ret < 0)
+			return ret;
+		ret = data->total_curr_resolution ? ret * 2 : ret;
+		break;
+	case PMBUS_POUT_OP_WARN_LIMIT:
+		ret = pmbus_read_word_data(client, page, phase, reg);
+		if (ret < 0)
+			return ret;
+		/*
+		 * Chip reports limits with resolution 1W or 2W, if total current resolution bit is
+		 * set 1. Actual power is reported with 0.5W or 1W respectively resolution. Scaling
+		 * is needed to match both.
+		 */
+		ret = data->total_curr_resolution ? ret * 4 : ret * 2;
+		break;
+	/*
+	 * The below registers are not implemented by device or implemented not according to the
+	 * spec. Skip all of them to avoid exposing non-relevant inputs to sysfs.
+	 */
+	case PMBUS_OT_FAULT_LIMIT:
+	case PMBUS_UT_WARN_LIMIT:
+	case PMBUS_UT_FAULT_LIMIT:
+	case PMBUS_VIN_UV_FAULT_LIMIT:
+	case PMBUS_VOUT_UV_WARN_LIMIT:
+	case PMBUS_VOUT_OV_WARN_LIMIT:
+	case PMBUS_VOUT_UV_FAULT_LIMIT:
+	case PMBUS_VOUT_OV_FAULT_LIMIT:
+	case PMBUS_VIN_OV_WARN_LIMIT:
+	case PMBUS_IOUT_OC_LV_FAULT_LIMIT:
+	case PMBUS_IOUT_OC_FAULT_LIMIT:
+	case PMBUS_POUT_MAX:
+	case PMBUS_IOUT_UC_FAULT_LIMIT:
+	case PMBUS_POUT_OP_FAULT_LIMIT:
+	case PMBUS_PIN_OP_WARN_LIMIT:
+	case PMBUS_MFR_VIN_MIN:
+	case PMBUS_MFR_VOUT_MIN:
+	case PMBUS_MFR_VIN_MAX:
+	case PMBUS_MFR_VOUT_MAX:
+	case PMBUS_MFR_IIN_MAX:
+	case PMBUS_MFR_IOUT_MAX:
+	case PMBUS_MFR_PIN_MAX:
+	case PMBUS_MFR_POUT_MAX:
+	case PMBUS_MFR_MAX_TEMP_1:
+		return -ENXIO;
+	default:
+		return -ENODATA;
+	}
+
+	return ret;
+}
+
+static int mp2888_write_word_data(struct i2c_client *client, int page, int reg, u16 word)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct mp2888_data *data = to_mp2888_data(info);
+
+	switch (reg) {
+	case PMBUS_OT_WARN_LIMIT:
+		word = DIV_ROUND_CLOSEST(word, MP2888_TEMP_UNIT);
+		/* Drop unused bits 15:8. */
+		word = clamp_val(word, 0, GENMASK(7, 0));
+		break;
+	case PMBUS_IOUT_OC_WARN_LIMIT:
+		/* Fix limit according to total curent resolution. */
+		word = data->total_curr_resolution ? DIV_ROUND_CLOSEST(word, 8) :
+		       DIV_ROUND_CLOSEST(word, 4);
+		/* Drop unused bits 15:10. */
+		word = clamp_val(word, 0, GENMASK(9, 0));
+		break;
+	case PMBUS_POUT_OP_WARN_LIMIT:
+		/* Fix limit according to total curent resolution. */
+		word = data->total_curr_resolution ? DIV_ROUND_CLOSEST(word, 4) :
+		       DIV_ROUND_CLOSEST(word, 2);
+		/* Drop unused bits 15:10. */
+		word = clamp_val(word, 0, GENMASK(9, 0));
+		break;
+	default:
+		return -ENODATA;
+	}
+	return pmbus_write_word_data(client, page, reg, word);
+}
+
+static int
+mp2888_identify_multiphase(struct i2c_client *client, struct mp2888_data *data,
+			   struct pmbus_driver_info *info)
+{
+	int ret;
+
+	ret = i2c_smbus_write_byte_data(client, PMBUS_PAGE, 0);
+	if (ret < 0)
+		return ret;
+
+	/* Identify multiphase number - could be from 1 to 10. */
+	ret = i2c_smbus_read_word_data(client, MP2888_MFR_VR_CONFIG1);
+	if (ret <= 0)
+		return ret;
+
+	info->phases[0] = ret & GENMASK(3, 0);
+
+	/*
+	 * The device provides a total of 10 PWM pins, and can be configured to different phase
+	 * count applications for rail.
+	 */
+	if (info->phases[0] > MP2888_MAX_PHASE)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct pmbus_driver_info mp2888_info = {
+	.pages = 1,
+	.format[PSC_VOLTAGE_IN] = linear,
+	.format[PSC_VOLTAGE_OUT] = direct,
+	.format[PSC_TEMPERATURE] = direct,
+	.format[PSC_CURRENT_IN] = linear,
+	.format[PSC_CURRENT_OUT] = direct,
+	.format[PSC_POWER] = direct,
+	.m[PSC_TEMPERATURE] = 1,
+	.R[PSC_TEMPERATURE] = 1,
+	.m[PSC_VOLTAGE_OUT] = 1,
+	.R[PSC_VOLTAGE_OUT] = 3,
+	.m[PSC_CURRENT_OUT] = 4,
+	.m[PSC_POWER] = 1,
+	.func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_IOUT |
+		   PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP |
+		   PMBUS_HAVE_POUT | PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT |
+		   PMBUS_PHASE_VIRTUAL,
+	.pfunc[0] = PMBUS_HAVE_IOUT,
+	.pfunc[1] = PMBUS_HAVE_IOUT,
+	.pfunc[2] = PMBUS_HAVE_IOUT,
+	.pfunc[3] = PMBUS_HAVE_IOUT,
+	.pfunc[4] = PMBUS_HAVE_IOUT,
+	.pfunc[5] = PMBUS_HAVE_IOUT,
+	.pfunc[6] = PMBUS_HAVE_IOUT,
+	.pfunc[7] = PMBUS_HAVE_IOUT,
+	.pfunc[8] = PMBUS_HAVE_IOUT,
+	.pfunc[9] = PMBUS_HAVE_IOUT,
+	.read_byte_data = mp2888_read_byte_data,
+	.read_word_data = mp2888_read_word_data,
+	.write_word_data = mp2888_write_word_data,
+};
+
+static int mp2888_probe(struct i2c_client *client)
+{
+	struct pmbus_driver_info *info;
+	struct mp2888_data *data;
+	int ret;
+
+	data = devm_kzalloc(&client->dev, sizeof(struct mp2888_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	memcpy(&data->info, &mp2888_info, sizeof(*info));
+	info = &data->info;
+
+	/* Identify multiphase configuration. */
+	ret = mp2888_identify_multiphase(client, data, info);
+	if (ret)
+		return ret;
+
+	/* Obtain current sense gain of power stage and current resolution. */
+	ret = mp2888_current_sense_gain_and_resolution_get(client, data);
+	if (ret)
+		return ret;
+
+	return pmbus_do_probe(client, info);
+}
+
+static const struct i2c_device_id mp2888_id[] = {
+	{"mp2888", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, mp2888_id);
+
+static const struct of_device_id __maybe_unused mp2888_of_match[] = {
+	{.compatible = "mps,mp2888"},
+	{}
+};
+MODULE_DEVICE_TABLE(of, mp2888_of_match);
+
+static struct i2c_driver mp2888_driver = {
+	.driver = {
+		.name = "mp2888",
+		.of_match_table = of_match_ptr(mp2888_of_match),
+	},
+	.probe_new = mp2888_probe,
+	.id_table = mp2888_id,
+};
+
+module_i2c_driver(mp2888_driver);
+
+MODULE_AUTHOR("Vadim Pasternak <vadimp@nvidia.com>");
+MODULE_DESCRIPTION("PMBus driver for MPS MP2888 device");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(PMBUS);
-- 
GitLab


From 9abfb52b502889f1528316cf0b7d4116d40abebe Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Tue, 11 May 2021 08:56:19 +0300
Subject: [PATCH 3084/3804] dt-bindings: Add MP2888 voltage regulator device

Monolithic Power Systems, Inc. (MPS) dual-loop, digital, multi-phase
controller.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210511055619.118104-4-vadimp@nvidia.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/trivial-devices.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 8341e9d23c1e6..f8824e1dd24c4 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -103,6 +103,8 @@ properties:
           - fsl,mpl3115
             # MPR121: Proximity Capacitive Touch Sensor Controller
           - fsl,mpr121
+            # Monolithic Power Systems Inc. multi-phase controller mp2888
+          - mps,mp2888
             # Monolithic Power Systems Inc. multi-phase controller mp2975
           - mps,mp2975
             # G751: Digital Temperature Sensor and Thermal Watchdog with Two-Wire Interface
-- 
GitLab


From 9da9c2dc57b2fa2e65521894cb66df4bf615214d Mon Sep 17 00:00:00 2001
From: Chu Lin <linchuyuan@google.com>
Date: Wed, 12 May 2021 17:10:43 +0000
Subject: [PATCH 3085/3804] hwmon: (adm1275) enable adm1272 temperature
 reporting

adm1272 supports temperature reporting but it is disabled by default.

Tested:
ls temp1_*
temp1_crit           temp1_highest        temp1_max
temp1_crit_alarm     temp1_input          temp1_max_alarm

cat temp1_input
26642

Signed-off-by: Chu Lin <linchuyuan@google.com>
Link: https://lore.kernel.org/r/20210512171043.2433694-1-linchuyuan@google.com
[groeck: Updated subject to reflect correct driver]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/adm1275.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/pmbus/adm1275.c b/drivers/hwmon/pmbus/adm1275.c
index 980a3850b2f39..d311e0557401c 100644
--- a/drivers/hwmon/pmbus/adm1275.c
+++ b/drivers/hwmon/pmbus/adm1275.c
@@ -611,11 +611,13 @@ static int adm1275_probe(struct i2c_client *client)
 		tindex = 8;
 
 		info->func[0] |= PMBUS_HAVE_PIN | PMBUS_HAVE_STATUS_INPUT |
-			PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT;
+			PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT |
+			PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP;
 
-		/* Enable VOUT if not enabled (it is disabled by default) */
-		if (!(config & ADM1278_VOUT_EN)) {
-			config |= ADM1278_VOUT_EN;
+		/* Enable VOUT & TEMP1 if not enabled (disabled by default) */
+		if ((config & (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) !=
+		    (ADM1278_VOUT_EN | ADM1278_TEMP1_EN)) {
+			config |= ADM1278_VOUT_EN | ADM1278_TEMP1_EN;
 			ret = i2c_smbus_write_byte_data(client,
 							ADM1275_PMON_CONFIG,
 							config);
@@ -625,10 +627,6 @@ static int adm1275_probe(struct i2c_client *client)
 				return -ENODEV;
 			}
 		}
-
-		if (config & ADM1278_TEMP1_EN)
-			info->func[0] |=
-				PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP;
 		if (config & ADM1278_VIN_EN)
 			info->func[0] |= PMBUS_HAVE_VIN;
 		break;
-- 
GitLab


From f20f7363e7e1d24defc27b1cb814071791a535b0 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Fri, 21 May 2021 14:22:18 -0300
Subject: [PATCH 3086/3804] docs: hwmon: Add an entry for mp2888

The entry for mp2888 is missing and it causes the following
'make htmldocs' build warning:

Documentation/hwmon/mp2888.rst: WARNING: document isn't included in any toctree

Add the mp2888 entry.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Link: https://lore.kernel.org/r/20210521172218.37592-1-festevam@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 9ed60fa84cbec..6925a8a70511e 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -137,6 +137,7 @@ Hardware Monitoring Kernel Drivers
    mcp3021
    menf21bmc
    mlxreg-fan
+   mp2888
    mp2975
    nct6683
    nct6775
-- 
GitLab


From 505c2549373f3aa9ee16493f872e57876ffb70b1 Mon Sep 17 00:00:00 2001
From: Navin Sankar Velliangiri <navin@linumiz.com>
Date: Mon, 24 May 2021 19:50:38 +0530
Subject: [PATCH 3087/3804] hwmon: Add sht4x Temperature and Humidity Sensor
 Driver

This patch adds a hwmon driver for the SHT4x Temperature and
Humidity sensor.

Signed-off-by: Navin Sankar Velliangiri <navin@linumiz.com>
[groeck: dropped unnecessary empty line and continuation lines]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/index.rst |   1 +
 Documentation/hwmon/sht4x.rst |  45 +++++
 drivers/hwmon/Kconfig         |  11 ++
 drivers/hwmon/Makefile        |   1 +
 drivers/hwmon/sht4x.c         | 301 ++++++++++++++++++++++++++++++++++
 5 files changed, 359 insertions(+)
 create mode 100644 Documentation/hwmon/sht4x.rst
 create mode 100644 drivers/hwmon/sht4x.c

diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 6925a8a70511e..61e5d45326226 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -165,6 +165,7 @@ Hardware Monitoring Kernel Drivers
    sht15
    sht21
    sht3x
+   sht4x
    shtc1
    sis5595
    sl28cpld
diff --git a/Documentation/hwmon/sht4x.rst b/Documentation/hwmon/sht4x.rst
new file mode 100644
index 0000000000000..3b37abcd4a464
--- /dev/null
+++ b/Documentation/hwmon/sht4x.rst
@@ -0,0 +1,45 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver sht4x
+===================
+
+Supported Chips:
+
+  * Sensirion SHT4X
+
+    Prefix: 'sht4x'
+
+    Addresses scanned: None
+
+    Datasheet:
+
+      English: https://www.sensirion.com/fileadmin/user_upload/customers/sensirion/Dokumente/2_Humidity_Sensors/Datasheets/Sensirion_Humidity_Sensors_SHT4x_Datasheet.pdf
+
+Author: Navin Sankar Velliangiri <navin@linumiz.com>
+
+
+Description
+-----------
+
+This driver implements support for the Sensirion SHT4x chip, a humidity
+and temperature sensor. Temperature is measured in degree celsius, relative
+humidity is expressed as a percentage. In sysfs interface, all values are
+scaled by 1000, i.e. the value for 31.5 degrees celsius is 31500.
+
+Usage Notes
+-----------
+
+The device communicates with the I2C protocol. Sensors can have the I2C
+address 0x44. See Documentation/i2c/instantiating-devices.rst for methods
+to instantiate the device.
+
+Sysfs entries
+-------------
+
+=============== ============================================
+temp1_input     Measured temperature in millidegrees Celcius
+humidity1_input Measured humidity in %H
+update_interval The minimum interval for polling the sensor,
+                in milliseconds. Writable. Must be at least
+                2000.
+============== =============================================
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 87624902ea809..e3675377bc5d8 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1583,6 +1583,17 @@ config SENSORS_SHT3x
 	  This driver can also be built as a module. If so, the module
 	  will be called sht3x.
 
+config SENSORS_SHT4x
+	tristate "Sensiron humidity and temperature sensors. SHT4x and compat."
+	depends on I2C
+	select CRC8
+	help
+	  If you say yes here you get support for the Sensiron SHT40, SHT41 and
+	  SHT45 humidity and temperature sensors.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called sht4x.
+
 config SENSORS_SHTC1
 	tristate "Sensiron humidity and temperature sensors. SHTC1 and compat."
 	depends on I2C
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 59e78bc212cf3..d712c61c1f5e9 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -171,6 +171,7 @@ obj-$(CONFIG_SENSORS_SL28CPLD)	+= sl28cpld-hwmon.o
 obj-$(CONFIG_SENSORS_SHT15)	+= sht15.o
 obj-$(CONFIG_SENSORS_SHT21)	+= sht21.o
 obj-$(CONFIG_SENSORS_SHT3x)	+= sht3x.o
+obj-$(CONFIG_SENSORS_SHT4x)	+= sht4x.o
 obj-$(CONFIG_SENSORS_SHTC1)	+= shtc1.o
 obj-$(CONFIG_SENSORS_SIS5595)	+= sis5595.o
 obj-$(CONFIG_SENSORS_SMM665)	+= smm665.o
diff --git a/drivers/hwmon/sht4x.c b/drivers/hwmon/sht4x.c
new file mode 100644
index 0000000000000..1dc51ee2a72ba
--- /dev/null
+++ b/drivers/hwmon/sht4x.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) Linumiz 2021
+ *
+ * sht4x.c - Linux hwmon driver for SHT4x Temperature and Humidity sensor
+ *
+ * Author: Navin Sankar Velliangiri <navin@linumiz.com>
+ */
+
+#include <linux/crc8.h>
+#include <linux/delay.h>
+#include <linux/hwmon.h>
+#include <linux/i2c.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+
+/*
+ * Poll intervals (in milliseconds)
+ */
+#define SHT4X_MIN_POLL_INTERVAL	2000
+
+/*
+ * I2C command delays (in microseconds)
+ */
+#define SHT4X_MEAS_DELAY	1000
+#define SHT4X_DELAY_EXTRA	10000
+
+/*
+ * Command Bytes
+ */
+#define SHT4X_CMD_MEASURE_HPM	0b11111101
+#define SHT4X_CMD_RESET		0b10010100
+
+#define SHT4X_CMD_LEN		1
+#define SHT4X_CRC8_LEN		1
+#define SHT4X_WORD_LEN		2
+#define SHT4X_RESPONSE_LENGTH	6
+#define SHT4X_CRC8_POLYNOMIAL	0x31
+#define SHT4X_CRC8_INIT		0xff
+#define SHT4X_MIN_TEMPERATURE	-45000
+#define SHT4X_MAX_TEMPERATURE	125000
+#define SHT4X_MIN_HUMIDITY	0
+#define SHT4X_MAX_HUMIDITY	100000
+
+DECLARE_CRC8_TABLE(sht4x_crc8_table);
+
+/**
+ * struct sht4x_data - All the data required to operate an SHT4X chip
+ * @client: the i2c client associated with the SHT4X
+ * @lock: a mutex that is used to prevent parallel access to the i2c client
+ * @update_interval: the minimum poll interval
+ * @last_updated: the previous time that the SHT4X was polled
+ * @temperature: the latest temperature value received from the SHT4X
+ * @humidity: the latest humidity value received from the SHT4X
+ */
+struct sht4x_data {
+	struct i2c_client	*client;
+	struct mutex		lock;	/* atomic read data updates */
+	bool			valid;	/* validity of fields below */
+	long			update_interval;	/* in milli-seconds */
+	long			last_updated;	/* in jiffies */
+	s32			temperature;
+	s32			humidity;
+};
+
+/**
+ * sht4x_read_values() - read and parse the raw data from the SHT4X
+ * @sht4x_data: the struct sht4x_data to use for the lock
+ * Return: 0 if succesfull, 1 if not
+ */
+static int sht4x_read_values(struct sht4x_data *data)
+{
+	int ret = 0;
+	u16 t_ticks, rh_ticks;
+	unsigned long next_update;
+	struct i2c_client *client = data->client;
+	u8 crc, raw_data[SHT4X_RESPONSE_LENGTH],
+	cmd[] = {SHT4X_CMD_MEASURE_HPM};
+
+	mutex_lock(&data->lock);
+	next_update = data->last_updated +
+		      msecs_to_jiffies(data->update_interval);
+	if (!data->valid || time_after(jiffies, next_update)) {
+		ret = i2c_master_send(client, cmd, SHT4X_CMD_LEN);
+		if (ret < 0)
+			goto unlock;
+
+		usleep_range(SHT4X_MEAS_DELAY,
+			     SHT4X_MEAS_DELAY + SHT4X_DELAY_EXTRA);
+
+		ret = i2c_master_recv(client, raw_data, SHT4X_RESPONSE_LENGTH);
+		if (ret != SHT4X_RESPONSE_LENGTH) {
+			if (ret >= 0)
+				ret = -ENODATA;
+
+			goto unlock;
+		}
+
+		t_ticks = raw_data[0] << 8 | raw_data[1];
+		rh_ticks = raw_data[3] << 8 | raw_data[4];
+
+		crc = crc8(sht4x_crc8_table, &raw_data[0], SHT4X_WORD_LEN, CRC8_INIT_VALUE);
+		if (crc != raw_data[2]) {
+			dev_err(&client->dev, "data integrity check failed\n");
+			ret = -EIO;
+			goto unlock;
+		}
+
+		crc = crc8(sht4x_crc8_table, &raw_data[3], SHT4X_WORD_LEN, CRC8_INIT_VALUE);
+		if (crc != raw_data[5]) {
+			dev_err(&client->dev, "data integrity check failed\n");
+			ret = -EIO;
+			goto unlock;
+		}
+
+		data->temperature = ((21875 * (int32_t)t_ticks) >> 13) - 45000;
+		data->humidity = ((15625 * (int32_t)rh_ticks) >> 13) - 6000;
+		data->last_updated = jiffies;
+		data->valid = true;
+	}
+
+unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+static ssize_t sht4x_interval_write(struct sht4x_data *data, long val)
+{
+	data->update_interval = clamp_val(val, SHT4X_MIN_POLL_INTERVAL, UINT_MAX);
+
+	return 0;
+}
+
+/**
+ * sht4x_interval_read() - read the minimum poll interval
+ *			   in milliseconds
+ */
+static size_t sht4x_interval_read(struct sht4x_data *data, long *val)
+{
+	*val = data->update_interval;
+	return 0;
+}
+
+/**
+ * sht4x_temperature1_read() - read the temperature in millidegrees
+ */
+static int sht4x_temperature1_read(struct sht4x_data *data, long *val)
+{
+	int ret;
+
+	ret = sht4x_read_values(data);
+	if (ret < 0)
+		return ret;
+
+	*val = data->temperature;
+
+	return 0;
+}
+
+/**
+ * sht4x_humidity1_read() - read a relative humidity in millipercent
+ */
+static int sht4x_humidity1_read(struct sht4x_data *data, long *val)
+{
+	int ret;
+
+	ret = sht4x_read_values(data);
+	if (ret < 0)
+		return ret;
+
+	*val = data->humidity;
+
+	return 0;
+}
+
+static umode_t sht4x_hwmon_visible(const void *data,
+				   enum hwmon_sensor_types type,
+				   u32 attr, int channel)
+{
+	switch (type) {
+	case hwmon_temp:
+	case hwmon_humidity:
+		return 0444;
+	case hwmon_chip:
+		return 0644;
+	default:
+		return 0;
+	}
+}
+
+static int sht4x_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+			    u32 attr, int channel, long *val)
+{
+	struct sht4x_data *data = dev_get_drvdata(dev);
+
+	switch (type) {
+	case hwmon_temp:
+		return sht4x_temperature1_read(data, val);
+	case hwmon_humidity:
+		return sht4x_humidity1_read(data, val);
+	case hwmon_chip:
+		return sht4x_interval_read(data, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int sht4x_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
+			     u32 attr, int channel, long val)
+{
+	struct sht4x_data *data = dev_get_drvdata(dev);
+
+	switch (type) {
+	case hwmon_chip:
+		return sht4x_interval_write(data, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct hwmon_channel_info *sht4x_info[] = {
+	HWMON_CHANNEL_INFO(chip, HWMON_C_UPDATE_INTERVAL),
+	HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
+	HWMON_CHANNEL_INFO(humidity, HWMON_H_INPUT),
+	NULL,
+};
+
+static const struct hwmon_ops sht4x_hwmon_ops = {
+	.is_visible = sht4x_hwmon_visible,
+	.read = sht4x_hwmon_read,
+	.write = sht4x_hwmon_write,
+};
+
+static const struct hwmon_chip_info sht4x_chip_info = {
+	.ops = &sht4x_hwmon_ops,
+	.info = sht4x_info,
+};
+
+static int sht4x_probe(struct i2c_client *client,
+		       const struct i2c_device_id *sht4x_id)
+{
+	struct device *device = &client->dev;
+	struct device *hwmon_dev;
+	struct sht4x_data *data;
+	u8 cmd[] = {SHT4X_CMD_RESET};
+	int ret;
+
+	/*
+	 * we require full i2c support since the sht4x uses multi-byte read and
+	 * writes as well as multi-byte commands which are not supported by
+	 * the smbus protocol
+	 */
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+		return -EOPNOTSUPP;
+
+	data = devm_kzalloc(device, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->update_interval = SHT4X_MIN_POLL_INTERVAL;
+	data->client = client;
+
+	mutex_init(&data->lock);
+
+	crc8_populate_msb(sht4x_crc8_table, SHT4X_CRC8_POLYNOMIAL);
+
+	ret = i2c_master_send(client, cmd, SHT4X_CMD_LEN);
+	if (ret < 0)
+		return ret;
+	if (ret != SHT4X_CMD_LEN)
+		return -EIO;
+
+	hwmon_dev = devm_hwmon_device_register_with_info(device,
+							 client->name,
+							 data,
+							 &sht4x_chip_info,
+							 NULL);
+
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+static const struct i2c_device_id sht4x_id[] = {
+	{ "sht4x", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, sht4x_id);
+
+static struct i2c_driver sht4x_driver = {
+	.driver = {
+		.name = "sht4x",
+	},
+	.probe		= sht4x_probe,
+	.id_table	= sht4x_id,
+};
+
+module_i2c_driver(sht4x_driver);
+
+MODULE_AUTHOR("Navin Sankar Velliangiri <navin@linumiz.com>");
+MODULE_DESCRIPTION("Sensirion SHT4x humidity and temperature sensor driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 07c6621a37352e38b4ad9addaba473ad90fbfe5e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 29 May 2021 02:13:52 -0700
Subject: [PATCH 3088/3804] hwmon: (sht4x) Fix sht4x_read_values return value

Kernel doc for sht4x_read_values() shows 0 on success, 1 on failure but
the return value on success is actually always positive as it is set to
SHT4X_RESPONSE_LENGTH by a successful call to i2c_master_recv().

Miscellanea:

o Update the kernel doc for sht4x_read_values to 0 for success or -ERRNO
o Remove incorrectly used kernel doc /** header for other _read functions
o Typo fix succesfull->successful
o Reverse a test to unindent a block and use goto unlock
o Declare cmd[SHT4X_CMD_LEN] rather than cmd[]

At least for gcc 10.2, object size is reduced a tiny bit.

$ size drivers/hwmon/sht4x.o*
   text	   data	    bss	    dec	    hex	filename
   1752	    404	    256	   2412	    96c	drivers/hwmon/sht4x.o.new
   1825	    404	    256	   2485	    9b5	drivers/hwmon/sht4x.o.old

Signed-off-by: Joe Perches <joe@perches.com>
Link: https://lore.kernel.org/r/60eedce497137eb34448c0c77e01ec9d9c972ad7.camel@perches.com
Reviewed by: Navin Sankar Velliangiri <navin@linumiz.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/sht4x.c | 95 ++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 50 deletions(-)

diff --git a/drivers/hwmon/sht4x.c b/drivers/hwmon/sht4x.c
index 1dc51ee2a72ba..09c2a0b064444 100644
--- a/drivers/hwmon/sht4x.c
+++ b/drivers/hwmon/sht4x.c
@@ -67,7 +67,7 @@ struct sht4x_data {
 /**
  * sht4x_read_values() - read and parse the raw data from the SHT4X
  * @sht4x_data: the struct sht4x_data to use for the lock
- * Return: 0 if succesfull, 1 if not
+ * Return: 0 if successful, -ERRNO if not
  */
 static int sht4x_read_values(struct sht4x_data *data)
 {
@@ -75,51 +75,53 @@ static int sht4x_read_values(struct sht4x_data *data)
 	u16 t_ticks, rh_ticks;
 	unsigned long next_update;
 	struct i2c_client *client = data->client;
-	u8 crc, raw_data[SHT4X_RESPONSE_LENGTH],
-	cmd[] = {SHT4X_CMD_MEASURE_HPM};
+	u8 crc;
+	u8 cmd[SHT4X_CMD_LEN] = {SHT4X_CMD_MEASURE_HPM};
+	u8 raw_data[SHT4X_RESPONSE_LENGTH];
 
 	mutex_lock(&data->lock);
 	next_update = data->last_updated +
 		      msecs_to_jiffies(data->update_interval);
-	if (!data->valid || time_after(jiffies, next_update)) {
-		ret = i2c_master_send(client, cmd, SHT4X_CMD_LEN);
-		if (ret < 0)
-			goto unlock;
-
-		usleep_range(SHT4X_MEAS_DELAY,
-			     SHT4X_MEAS_DELAY + SHT4X_DELAY_EXTRA);
-
-		ret = i2c_master_recv(client, raw_data, SHT4X_RESPONSE_LENGTH);
-		if (ret != SHT4X_RESPONSE_LENGTH) {
-			if (ret >= 0)
-				ret = -ENODATA;
-
-			goto unlock;
-		}
-
-		t_ticks = raw_data[0] << 8 | raw_data[1];
-		rh_ticks = raw_data[3] << 8 | raw_data[4];
-
-		crc = crc8(sht4x_crc8_table, &raw_data[0], SHT4X_WORD_LEN, CRC8_INIT_VALUE);
-		if (crc != raw_data[2]) {
-			dev_err(&client->dev, "data integrity check failed\n");
-			ret = -EIO;
-			goto unlock;
-		}
-
-		crc = crc8(sht4x_crc8_table, &raw_data[3], SHT4X_WORD_LEN, CRC8_INIT_VALUE);
-		if (crc != raw_data[5]) {
-			dev_err(&client->dev, "data integrity check failed\n");
-			ret = -EIO;
-			goto unlock;
-		}
-
-		data->temperature = ((21875 * (int32_t)t_ticks) >> 13) - 45000;
-		data->humidity = ((15625 * (int32_t)rh_ticks) >> 13) - 6000;
-		data->last_updated = jiffies;
-		data->valid = true;
+
+	if (data->valid && time_before_eq(jiffies, next_update))
+		goto unlock;
+
+	ret = i2c_master_send(client, cmd, SHT4X_CMD_LEN);
+	if (ret < 0)
+		goto unlock;
+
+	usleep_range(SHT4X_MEAS_DELAY, SHT4X_MEAS_DELAY + SHT4X_DELAY_EXTRA);
+
+	ret = i2c_master_recv(client, raw_data, SHT4X_RESPONSE_LENGTH);
+	if (ret != SHT4X_RESPONSE_LENGTH) {
+		if (ret >= 0)
+			ret = -ENODATA;
+		goto unlock;
+	}
+
+	t_ticks = raw_data[0] << 8 | raw_data[1];
+	rh_ticks = raw_data[3] << 8 | raw_data[4];
+
+	crc = crc8(sht4x_crc8_table, &raw_data[0], SHT4X_WORD_LEN, CRC8_INIT_VALUE);
+	if (crc != raw_data[2]) {
+		dev_err(&client->dev, "data integrity check failed\n");
+		ret = -EIO;
+		goto unlock;
 	}
 
+	crc = crc8(sht4x_crc8_table, &raw_data[3], SHT4X_WORD_LEN, CRC8_INIT_VALUE);
+	if (crc != raw_data[5]) {
+		dev_err(&client->dev, "data integrity check failed\n");
+		ret = -EIO;
+		goto unlock;
+	}
+
+	data->temperature = ((21875 * (int32_t)t_ticks) >> 13) - 45000;
+	data->humidity = ((15625 * (int32_t)rh_ticks) >> 13) - 6000;
+	data->last_updated = jiffies;
+	data->valid = true;
+	ret = 0;
+
 unlock:
 	mutex_unlock(&data->lock);
 	return ret;
@@ -132,19 +134,14 @@ static ssize_t sht4x_interval_write(struct sht4x_data *data, long val)
 	return 0;
 }
 
-/**
- * sht4x_interval_read() - read the minimum poll interval
- *			   in milliseconds
- */
+/* sht4x_interval_read() - read the minimum poll interval in milliseconds */
 static size_t sht4x_interval_read(struct sht4x_data *data, long *val)
 {
 	*val = data->update_interval;
 	return 0;
 }
 
-/**
- * sht4x_temperature1_read() - read the temperature in millidegrees
- */
+/* sht4x_temperature1_read() - read the temperature in millidegrees */
 static int sht4x_temperature1_read(struct sht4x_data *data, long *val)
 {
 	int ret;
@@ -158,9 +155,7 @@ static int sht4x_temperature1_read(struct sht4x_data *data, long *val)
 	return 0;
 }
 
-/**
- * sht4x_humidity1_read() - read a relative humidity in millipercent
- */
+/* sht4x_humidity1_read() - read a relative humidity in millipercent */
 static int sht4x_humidity1_read(struct sht4x_data *data, long *val)
 {
 	int ret;
-- 
GitLab


From cbbf244f0515af3472084f22b6213121b4a63835 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 26 May 2021 08:40:16 -0700
Subject: [PATCH 3089/3804] hwmon: (max31790) Fix fan speed reporting for
 fan7..12
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fans 7..12 do not have their own set of configuration registers.
So far the code ignored that and read beyond the end of the configuration
register range to get the tachometer period. This resulted in more or less
random fan speed values for those fans.

The datasheet is quite vague when it comes to defining the tachometer
period for fans 7..12. Experiments confirm that the period is the same
for both fans associated with a given set of configuration registers.

Fixes: 54187ff9d766 ("hwmon: (max31790) Convert to use new hwmon registration API")
Fixes: 195a4b4298a7 ("hwmon: Driver for Maxim MAX31790")
Cc: Jan Kundrát <jan.kundrat@cesnet.cz>
Reviewed-by: Jan Kundrát <jan.kundrat@cesnet.cz>
Cc: Václav Kubernát <kubernat@cesnet.cz>
Reviewed-by: Jan Kundrát <jan.kundrat@cesnet.cz>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20210526154022.3223012-2-linux@roeck-us.net
---
 drivers/hwmon/max31790.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c
index 86e6c71db685c..f6d4fc0a2f137 100644
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -170,7 +170,7 @@ static int max31790_read_fan(struct device *dev, u32 attr, int channel,
 
 	switch (attr) {
 	case hwmon_fan_input:
-		sr = get_tach_period(data->fan_dynamics[channel]);
+		sr = get_tach_period(data->fan_dynamics[channel % NR_CHANNEL]);
 		rpm = RPM_FROM_REG(data->tach[channel], sr);
 		*val = rpm;
 		return 0;
-- 
GitLab


From 897f6339893b741a5d68ae8e2475df65946041c2 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 26 May 2021 08:40:17 -0700
Subject: [PATCH 3090/3804] hwmon: (max31790) Report correct current pwm duty
 cycles
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The MAX31790 has two sets of registers for pwm duty cycles, one to request
a duty cycle and one to read the actual current duty cycle. Both do not
have to be the same.

When reporting the pwm duty cycle to the user, the actual pwm duty cycle
from pwm duty cycle registers needs to be reported. When setting it, the
pwm target duty cycle needs to be written. Since we don't know the actual
pwm duty cycle after a target pwm duty cycle has been written, set the
valid flag to false to indicate that actual pwm duty cycle should be read
from the chip instead of using cached values.

Cc: Jan Kundrát <jan.kundrat@cesnet.cz>
Cc: Václav Kubernát <kubernat@cesnet.cz>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Václav Kubernát <kubernat@ceesnet.cz>
Reviewed-by: Jan Kundrát <jan.kundrat@cesnet.cz>
Link: https://lore.kernel.org/r/20210526154022.3223012-3-linux@roeck-us.net
---
 Documentation/hwmon/max31790.rst | 3 ++-
 drivers/hwmon/max31790.c         | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/Documentation/hwmon/max31790.rst b/Documentation/hwmon/max31790.rst
index f301385d8cef3..54ff0f49e28fc 100644
--- a/Documentation/hwmon/max31790.rst
+++ b/Documentation/hwmon/max31790.rst
@@ -39,5 +39,6 @@ fan[1-12]_input    RO  fan tachometer speed in RPM
 fan[1-12]_fault    RO  fan experienced fault
 fan[1-6]_target    RW  desired fan speed in RPM
 pwm[1-6]_enable    RW  regulator mode, 0=disabled, 1=manual mode, 2=rpm mode
-pwm[1-6]           RW  fan target duty cycle (0-255)
+pwm[1-6]           RW  read: current pwm duty cycle,
+                       write: target pwm duty cycle (0-255)
 ================== === =======================================================
diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c
index f6d4fc0a2f137..693497e09ac03 100644
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -104,7 +104,7 @@ static struct max31790_data *max31790_update_device(struct device *dev)
 				data->tach[NR_CHANNEL + i] = rv;
 			} else {
 				rv = i2c_smbus_read_word_swapped(client,
-						MAX31790_REG_PWMOUT(i));
+						MAX31790_REG_PWM_DUTY_CYCLE(i));
 				if (rv < 0)
 					goto abort;
 				data->pwm[i] = rv;
@@ -299,10 +299,10 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel,
 			err = -EINVAL;
 			break;
 		}
-		data->pwm[channel] = val << 8;
+		data->valid = false;
 		err = i2c_smbus_write_word_swapped(client,
 						   MAX31790_REG_PWMOUT(channel),
-						   data->pwm[channel]);
+						   val << 8);
 		break;
 	case hwmon_pwm_enable:
 		fan_config = data->fan_config[channel];
-- 
GitLab


From 148c847c9e5a54b99850617bf9c143af9a344f92 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 26 May 2021 08:40:18 -0700
Subject: [PATCH 3091/3804] hwmon: (max31790) Fix pwmX_enable attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pwmX_enable supports three possible values:

0: Fan control disabled. Duty cycle is fixed to 0%
1: Fan control enabled, pwm mode. Duty cycle is determined by
   values written into Target Duty Cycle registers.
2: Fan control enabled, rpm mode
   Duty cycle is adjusted such that fan speed matches
   the values in Target Count registers

The current code does not do this; instead, it mixes pwm control
configuration with fan speed monitoring configuration. Worse, it
reports that pwm control would be disabled (pwmX_enable==0) when
it is in fact enabled in pwm mode. Part of the problem may be that
the chip sets the "TACH input enable" bit on its own whenever the
mode bit is set to RPM mode, but that doesn't mean that "TACH input
enable" accurately reflects the pwm mode.

Fix it up and only handle pwm control with the pwmX_enable attributes.
In the documentation, clarify that disabling pwm control (pwmX_enable=0)
sets the pwm duty cycle to 0%. In the code, explain why TACH_INPUT_EN
is set together with RPM_MODE.

While at it, only update the configuration register if the configuration
has changed, and only update the cached configuration if updating the
chip configuration was successful.

Cc: Jan Kundrát <jan.kundrat@cesnet.cz>
Cc: Václav Kubernát <kubernat@cesnet.cz>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Václav Kubernát <kubernat@cesnet.cz>
Reviewed-by: Jan Kundrát <jan.kundrat@cesnet.cz>
Link: https://lore.kernel.org/r/20210526154022.3223012-4-linux@roeck-us.net
---
 Documentation/hwmon/max31790.rst |  2 +-
 drivers/hwmon/max31790.c         | 41 ++++++++++++++++++++------------
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/Documentation/hwmon/max31790.rst b/Documentation/hwmon/max31790.rst
index 54ff0f49e28fc..7b097c3b9b908 100644
--- a/Documentation/hwmon/max31790.rst
+++ b/Documentation/hwmon/max31790.rst
@@ -38,7 +38,7 @@ Sysfs entries
 fan[1-12]_input    RO  fan tachometer speed in RPM
 fan[1-12]_fault    RO  fan experienced fault
 fan[1-6]_target    RW  desired fan speed in RPM
-pwm[1-6]_enable    RW  regulator mode, 0=disabled, 1=manual mode, 2=rpm mode
+pwm[1-6]_enable    RW  regulator mode, 0=disabled (duty cycle=0%), 1=manual mode, 2=rpm mode
 pwm[1-6]           RW  read: current pwm duty cycle,
                        write: target pwm duty cycle (0-255)
 ================== === =======================================================
diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c
index 693497e09ac03..67677c4377687 100644
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -27,6 +27,7 @@
 
 /* Fan Config register bits */
 #define MAX31790_FAN_CFG_RPM_MODE	0x80
+#define MAX31790_FAN_CFG_CTRL_MON	0x10
 #define MAX31790_FAN_CFG_TACH_INPUT_EN	0x08
 #define MAX31790_FAN_CFG_TACH_INPUT	0x01
 
@@ -271,12 +272,12 @@ static int max31790_read_pwm(struct device *dev, u32 attr, int channel,
 		*val = data->pwm[channel] >> 8;
 		return 0;
 	case hwmon_pwm_enable:
-		if (fan_config & MAX31790_FAN_CFG_RPM_MODE)
+		if (fan_config & MAX31790_FAN_CFG_CTRL_MON)
+			*val = 0;
+		else if (fan_config & MAX31790_FAN_CFG_RPM_MODE)
 			*val = 2;
-		else if (fan_config & MAX31790_FAN_CFG_TACH_INPUT_EN)
-			*val = 1;
 		else
-			*val = 0;
+			*val = 1;
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -307,23 +308,33 @@ static int max31790_write_pwm(struct device *dev, u32 attr, int channel,
 	case hwmon_pwm_enable:
 		fan_config = data->fan_config[channel];
 		if (val == 0) {
-			fan_config &= ~(MAX31790_FAN_CFG_TACH_INPUT_EN |
-					MAX31790_FAN_CFG_RPM_MODE);
+			fan_config |= MAX31790_FAN_CFG_CTRL_MON;
+			/*
+			 * Disable RPM mode; otherwise disabling fan speed
+			 * monitoring is not possible.
+			 */
+			fan_config &= ~MAX31790_FAN_CFG_RPM_MODE;
 		} else if (val == 1) {
-			fan_config = (fan_config |
-				      MAX31790_FAN_CFG_TACH_INPUT_EN) &
-				     ~MAX31790_FAN_CFG_RPM_MODE;
+			fan_config &= ~(MAX31790_FAN_CFG_CTRL_MON | MAX31790_FAN_CFG_RPM_MODE);
 		} else if (val == 2) {
-			fan_config |= MAX31790_FAN_CFG_TACH_INPUT_EN |
-				      MAX31790_FAN_CFG_RPM_MODE;
+			fan_config &= ~MAX31790_FAN_CFG_CTRL_MON;
+			/*
+			 * The chip sets MAX31790_FAN_CFG_TACH_INPUT_EN on its
+			 * own if MAX31790_FAN_CFG_RPM_MODE is set.
+			 * Do it here as well to reflect the actual register
+			 * value in the cache.
+			 */
+			fan_config |= (MAX31790_FAN_CFG_RPM_MODE | MAX31790_FAN_CFG_TACH_INPUT_EN);
 		} else {
 			err = -EINVAL;
 			break;
 		}
-		data->fan_config[channel] = fan_config;
-		err = i2c_smbus_write_byte_data(client,
-					MAX31790_REG_FAN_CONFIG(channel),
-					fan_config);
+		if (fan_config != data->fan_config[channel]) {
+			err = i2c_smbus_write_byte_data(client, MAX31790_REG_FAN_CONFIG(channel),
+							fan_config);
+			if (!err)
+				data->fan_config[channel] = fan_config;
+		}
 		break;
 	default:
 		err = -EOPNOTSUPP;
-- 
GitLab


From 2013607b85f03ff24a5a19933705905a1b324a31 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 26 May 2021 08:40:20 -0700
Subject: [PATCH 3092/3804] hwmon: (max31790) Clear fan fault after reporting
 it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fault bits in MAX31790 are sticky and have to be cleared explicitly.
A write operation into either the 'Target Duty Cycle' register or the
'Target Count' register is necessary to clear a fault.

At the same time, we can never clear cached fault status values before
reading them because the companion fault status for any given fan is
cleared as well when clearing a fault.

Cc: Jan Kundrát <jan.kundrat@cesnet.cz>
Cc: Václav Kubernát <kubernat@cesnet.cz>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Václav Kubernát <kubernat@cesnet.cz>
Link: https://lore.kernel.org/r/20210526154022.3223012-6-linux@roeck-us.net
---
 drivers/hwmon/max31790.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c
index 67677c4377687..7927468c52715 100644
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -80,7 +80,7 @@ static struct max31790_data *max31790_update_device(struct device *dev)
 				MAX31790_REG_FAN_FAULT_STATUS1);
 		if (rv < 0)
 			goto abort;
-		data->fault_status = rv & 0x3F;
+		data->fault_status |= rv & 0x3F;
 
 		rv = i2c_smbus_read_byte_data(client,
 				MAX31790_REG_FAN_FAULT_STATUS2);
@@ -181,7 +181,21 @@ static int max31790_read_fan(struct device *dev, u32 attr, int channel,
 		*val = rpm;
 		return 0;
 	case hwmon_fan_fault:
+		mutex_lock(&data->update_lock);
 		*val = !!(data->fault_status & (1 << channel));
+		data->fault_status &= ~(1 << channel);
+		/*
+		 * If a fault bit is set, we need to write into one of the fan
+		 * configuration registers to clear it. Note that this also
+		 * clears the fault for the companion channel if enabled.
+		 */
+		if (*val) {
+			int reg = MAX31790_REG_TARGET_COUNT(channel % NR_CHANNEL);
+
+			i2c_smbus_write_byte_data(data->client, reg,
+						  data->target_count[channel % NR_CHANNEL] >> 8);
+		}
+		mutex_unlock(&data->update_lock);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
-- 
GitLab


From 1814c4e84de2a89d1c2e1e9bbd241240561075a4 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Wed, 26 May 2021 08:40:21 -0700
Subject: [PATCH 3093/3804] hwmon: (max31790) Detect and report zero fan speed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If a fan is not running or not connected, of if fan monitoring is disabled,
the fan count register returns a fixed value of 0xffe0. So far this is then
translated to a RPM value larger than 0. Since this is misleading and does
not really make much sense, report a fan RPM of 0 in this situation.

Cc: Jan Kundrát <jan.kundrat@cesnet.cz>
Cc: Václav Kubernát <kubernat@cesnet.cz>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Václav Kubernát <kubernat@cesnet.cz>
Link: https://lore.kernel.org/r/20210526154022.3223012-7-linux@roeck-us.net
---
 drivers/hwmon/max31790.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c
index 7927468c52715..7e9362f6dc29e 100644
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -40,6 +40,8 @@
 #define FAN_RPM_MIN			120
 #define FAN_RPM_MAX			7864320
 
+#define FAN_COUNT_REG_MAX		0xffe0
+
 #define RPM_FROM_REG(reg, sr)		(((reg) >> 4) ? \
 					 ((60 * (sr) * 8192) / ((reg) >> 4)) : \
 					 FAN_RPM_MAX)
@@ -172,7 +174,10 @@ static int max31790_read_fan(struct device *dev, u32 attr, int channel,
 	switch (attr) {
 	case hwmon_fan_input:
 		sr = get_tach_period(data->fan_dynamics[channel % NR_CHANNEL]);
-		rpm = RPM_FROM_REG(data->tach[channel], sr);
+		if (data->tach[channel] == FAN_COUNT_REG_MAX)
+			rpm = 0;
+		else
+			rpm = RPM_FROM_REG(data->tach[channel], sr);
 		*val = rpm;
 		return 0;
 	case hwmon_fan_target:
-- 
GitLab


From 6b6af85410cf2db95d39ad9aa1d812a35eb1651e Mon Sep 17 00:00:00 2001
From: Ninad Malwade <nmalwade@nvidia.com>
Date: Fri, 4 Jun 2021 14:54:43 +0800
Subject: [PATCH 3094/3804] hwmon: (ina3221) use CVRF only for single-shot
 conversion

As per current logic the wait time per conversion is arouns 430ms
for 512 samples and around 860ms for 1024 samples for 3 channels
considering 140us as the bus voltage and shunt voltage sampling
conversion time.

This waiting time is a lot for the continuous mode and even for
the single shot mode. For continuous mode when moving average is
considered the waiting for CVRF bit is not required and the data
from the previous conversion is sufficuent. As mentioned in the
datasheet the conversion ready bit is provided to help coordinate
single-shot conversions, we can restrict the use to single-shot
mode only.

Also, the conversion time is for the averaged samples, the wait
time for the polling can omit the number of samples consideration.

Signed-off-by: Ninad Malwade <nmalwade@nvidia.com>
Link: https://lore.kernel.org/r/1622789683-30931-1-git-send-email-nmalwade@nvidia.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ina3221.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c
index c602583d19f3e..58d3828e2ec0c 100644
--- a/drivers/hwmon/ina3221.c
+++ b/drivers/hwmon/ina3221.c
@@ -196,13 +196,11 @@ static inline u32 ina3221_reg_to_interval_us(u16 config)
 	u32 channels = hweight16(config & INA3221_CONFIG_CHs_EN_MASK);
 	u32 vbus_ct_idx = INA3221_CONFIG_VBUS_CT(config);
 	u32 vsh_ct_idx = INA3221_CONFIG_VSH_CT(config);
-	u32 samples_idx = INA3221_CONFIG_AVG(config);
-	u32 samples = ina3221_avg_samples[samples_idx];
 	u32 vbus_ct = ina3221_conv_time[vbus_ct_idx];
 	u32 vsh_ct = ina3221_conv_time[vsh_ct_idx];
 
 	/* Calculate total conversion time */
-	return channels * (vbus_ct + vsh_ct) * samples;
+	return channels * (vbus_ct + vsh_ct);
 }
 
 static inline int ina3221_wait_for_data(struct ina3221_data *ina)
@@ -288,13 +286,14 @@ static int ina3221_read_in(struct device *dev, u32 attr, int channel, long *val)
 			return -ENODATA;
 
 		/* Write CONFIG register to trigger a single-shot measurement */
-		if (ina->single_shot)
+		if (ina->single_shot) {
 			regmap_write(ina->regmap, INA3221_CONFIG,
 				     ina->reg_config);
 
-		ret = ina3221_wait_for_data(ina);
-		if (ret)
-			return ret;
+			ret = ina3221_wait_for_data(ina);
+			if (ret)
+				return ret;
+		}
 
 		ret = ina3221_read_value(ina, reg, &regval);
 		if (ret)
@@ -344,13 +343,14 @@ static int ina3221_read_curr(struct device *dev, u32 attr,
 			return -ENODATA;
 
 		/* Write CONFIG register to trigger a single-shot measurement */
-		if (ina->single_shot)
+		if (ina->single_shot) {
 			regmap_write(ina->regmap, INA3221_CONFIG,
 				     ina->reg_config);
 
-		ret = ina3221_wait_for_data(ina);
-		if (ret)
-			return ret;
+			ret = ina3221_wait_for_data(ina);
+			if (ret)
+				return ret;
+		}
 
 		fallthrough;
 	case hwmon_curr_crit:
-- 
GitLab


From 4e5418f787ec56d7fe3c6efee486b8f508c58baf Mon Sep 17 00:00:00 2001
From: Madhava Reddy Siddareddygari <msiddare@cisco.com>
Date: Sat, 5 Jun 2021 07:27:02 +0200
Subject: [PATCH 3095/3804] hwmon: (pmbus_core) Check adapter PEC support

Currently, for Packet Error Checking (PEC) only the controller
is checked for support. This causes problems on the cisco-8000
platform where a SMBUS transaction errors are observed. This is
because PEC has to be enabled only if both controller and
adapter support it.

Added code to check PEC capability for adapter and enable it
only if both controller and adapter supports PEC.

Signed-off-by: Madhava Reddy Siddareddygari <msiddare@cisco.com>
[Upstream from SONiC https://github.com/Azure/sonic-linux-kernel/pull/215]
Signed-off-by: Paul Menzel <pmenzel@molgen.mpg.de>
Link: https://lore.kernel.org/r/20210605052700.541455-1-pmenzel@molgen.mpg.de
[groeck: Dropped unnecessary continuation line]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 1f7fa5337974c..01a1ffc74bb6d 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -2216,11 +2216,14 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 		data->has_status_word = true;
 	}
 
-	/* Enable PEC if the controller supports it */
+	/* Enable PEC if the controller and bus supports it */
 	if (!(data->flags & PMBUS_NO_CAPABILITY)) {
 		ret = i2c_smbus_read_byte_data(client, PMBUS_CAPABILITY);
-		if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK))
-			client->flags |= I2C_CLIENT_PEC;
+		if (ret >= 0 && (ret & PB_CAPABILITY_ERROR_CHECK)) {
+			if (i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_PEC)) {
+				client->flags |= I2C_CLIENT_PEC;
+			}
+		}
 	}
 
 	/*
-- 
GitLab


From ff53b77e1e1bc9fd21e087e37a8444e8559d8d36 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Sat, 5 Jun 2021 15:18:21 +0200
Subject: [PATCH 3096/3804] docs: hwmon: adm1177.rst: avoid using ReSt
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/32b0db7e79a3ed0e817213113c607a1b819e3867.1622898327.git.mchehab+huawei@kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/adm1177.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/hwmon/adm1177.rst b/Documentation/hwmon/adm1177.rst
index 471be1e98d6f6..1c85a2af92bf7 100644
--- a/Documentation/hwmon/adm1177.rst
+++ b/Documentation/hwmon/adm1177.rst
@@ -20,7 +20,8 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see :doc:`/i2c/instantiating-devices` for details.
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst
+for details.
 
 
 Sysfs entries
-- 
GitLab


From dbc0860f7a3d43604c380822a456d26ef6f70a06 Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Wed, 9 Jun 2021 11:32:05 +0200
Subject: [PATCH 3097/3804] hwmon: (pmbus) Add new pmbus flag NO_WRITE_PROTECT

Some PMBus chips respond with invalid data when reading the WRITE_PROTECT
register. For such chips, this flag should be set so that the PMBus core
driver doesn't use the WRITE_PROTECT command to determine its behavior.

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c | 8 +++++---
 include/linux/pmbus.h            | 9 +++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index 01a1ffc74bb6d..a5367df1cee83 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -2231,9 +2231,11 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 	 * faults, and we should not try it. Also, in that case, writes into
 	 * limit registers need to be disabled.
 	 */
-	ret = i2c_smbus_read_byte_data(client, PMBUS_WRITE_PROTECT);
-	if (ret > 0 && (ret & PB_WP_ANY))
-		data->flags |= PMBUS_WRITE_PROTECTED | PMBUS_SKIP_STATUS_CHECK;
+	if (!(data->flags & PMBUS_NO_WRITE_PROTECT)) {
+		ret = i2c_smbus_read_byte_data(client, PMBUS_WRITE_PROTECT);
+		if (ret > 0 && (ret & PB_WP_ANY))
+			data->flags |= PMBUS_WRITE_PROTECTED | PMBUS_SKIP_STATUS_CHECK;
+	}
 
 	if (data->info->pages)
 		pmbus_clear_faults(client);
diff --git a/include/linux/pmbus.h b/include/linux/pmbus.h
index edd7c84fef658..12c515a27d3a0 100644
--- a/include/linux/pmbus.h
+++ b/include/linux/pmbus.h
@@ -56,6 +56,15 @@
  */
 #define PMBUS_READ_STATUS_AFTER_FAILED_CHECK	BIT(3)
 
+/*
+ * PMBUS_NO_WRITE_PROTECT
+ *
+ * Some PMBus chips respond with invalid data when reading the WRITE_PROTECT
+ * register. For such chips, this flag should be set so that the PMBus core
+ * driver doesn't use the WRITE_PROTECT command to determine its behavior.
+ */
+#define PMBUS_NO_WRITE_PROTECT			BIT(4)
+
 struct pmbus_platform_data {
 	u32 flags;		/* Device specific flags */
 
-- 
GitLab


From e8e00c83a268d5b7d2f5bd490c2269c1ede76a07 Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Wed, 9 Jun 2021 11:32:06 +0200
Subject: [PATCH 3098/3804] hwmon: (pmbus) Add support for reading direct mode
 coefficients

Add support for reading and decoding direct format coefficients to
the PMBus core driver. If the new flag PMBUS_USE_COEFFICIENTS_CMD
is set, the driver will use the COEFFICIENTS register together with
the information in the pmbus_sensor_attr structs to initialize
relevant coefficients for the direct mode format.

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
[groeck: Initialize ret with -EINVAL in pmbus_init_coefficients()]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c | 116 +++++++++++++++++++++++++++++++
 include/linux/pmbus.h            |   8 +++
 2 files changed, 124 insertions(+)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index a5367df1cee83..c4f557c8955b2 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -2141,6 +2141,111 @@ static int pmbus_find_attributes(struct i2c_client *client,
 	return ret;
 }
 
+/*
+ * The pmbus_class_attr_map structure maps one sensor class to
+ * it's corresponding sensor attributes array.
+ */
+struct pmbus_class_attr_map {
+	enum pmbus_sensor_classes class;
+	int nattr;
+	const struct pmbus_sensor_attr *attr;
+};
+
+static const struct pmbus_class_attr_map class_attr_map[] = {
+	{
+		.class = PSC_VOLTAGE_IN,
+		.attr = voltage_attributes,
+		.nattr = ARRAY_SIZE(voltage_attributes),
+	}, {
+		.class = PSC_VOLTAGE_OUT,
+		.attr = voltage_attributes,
+		.nattr = ARRAY_SIZE(voltage_attributes),
+	}, {
+		.class = PSC_CURRENT_IN,
+		.attr = current_attributes,
+		.nattr = ARRAY_SIZE(current_attributes),
+	}, {
+		.class = PSC_CURRENT_OUT,
+		.attr = current_attributes,
+		.nattr = ARRAY_SIZE(current_attributes),
+	}, {
+		.class = PSC_POWER,
+		.attr = power_attributes,
+		.nattr = ARRAY_SIZE(power_attributes),
+	}, {
+		.class = PSC_TEMPERATURE,
+		.attr = temp_attributes,
+		.nattr = ARRAY_SIZE(temp_attributes),
+	}
+};
+
+/*
+ * Read the coefficients for direct mode.
+ */
+static int pmbus_read_coefficients(struct i2c_client *client,
+				   struct pmbus_driver_info *info,
+				   const struct pmbus_sensor_attr *attr)
+{
+	int rv;
+	union i2c_smbus_data data;
+	enum pmbus_sensor_classes class = attr->class;
+	s8 R;
+	s16 m, b;
+
+	data.block[0] = 2;
+	data.block[1] = attr->reg;
+	data.block[2] = 0x01;
+
+	rv = i2c_smbus_xfer(client->adapter, client->addr, client->flags,
+			    I2C_SMBUS_WRITE, PMBUS_COEFFICIENTS,
+			    I2C_SMBUS_BLOCK_PROC_CALL, &data);
+
+	if (rv < 0)
+		return rv;
+
+	if (data.block[0] != 5)
+		return -EIO;
+
+	m = data.block[1] | (data.block[2] << 8);
+	b = data.block[3] | (data.block[4] << 8);
+	R = data.block[5];
+	info->m[class] = m;
+	info->b[class] = b;
+	info->R[class] = R;
+
+	return rv;
+}
+
+static int pmbus_init_coefficients(struct i2c_client *client,
+				   struct pmbus_driver_info *info)
+{
+	int i, n, ret = -EINVAL;
+	const struct pmbus_class_attr_map *map;
+	const struct pmbus_sensor_attr *attr;
+
+	for (i = 0; i < ARRAY_SIZE(class_attr_map); i++) {
+		map = &class_attr_map[i];
+		if (info->format[map->class] != direct)
+			continue;
+		for (n = 0; n < map->nattr; n++) {
+			attr = &map->attr[n];
+			if (map->class != attr->class)
+				continue;
+			ret = pmbus_read_coefficients(client, info, attr);
+			if (ret >= 0)
+				break;
+		}
+		if (ret < 0) {
+			dev_err(&client->dev,
+				"No coefficients found for sensor class %d\n",
+				map->class);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Identify chip parameters.
  * This function is called for all chips.
@@ -2262,6 +2367,17 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 			return ret;
 		}
 	}
+
+	if (data->flags & PMBUS_USE_COEFFICIENTS_CMD) {
+		if (!i2c_check_functionality(client->adapter,
+					     I2C_FUNC_SMBUS_BLOCK_PROC_CALL))
+			return -ENODEV;
+
+		ret = pmbus_init_coefficients(client, info);
+		if (ret < 0)
+			return ret;
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/pmbus.h b/include/linux/pmbus.h
index 12c515a27d3a0..fa9f08164c365 100644
--- a/include/linux/pmbus.h
+++ b/include/linux/pmbus.h
@@ -65,6 +65,14 @@
  */
 #define PMBUS_NO_WRITE_PROTECT			BIT(4)
 
+/*
+ * PMBUS_USE_COEFFICIENTS_CMD
+ *
+ * When this flag is set the PMBus core driver will use the COEFFICIENTS
+ * register to initialize the coefficients for the direct mode format.
+ */
+#define PMBUS_USE_COEFFICIENTS_CMD		BIT(5)
+
 struct pmbus_platform_data {
 	u32 flags;		/* Device specific flags */
 
-- 
GitLab


From 5e86f128d9eb44b19e311e5a1e50452344fd5628 Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Wed, 9 Jun 2021 11:32:07 +0200
Subject: [PATCH 3099/3804] hwmon: (pmbus) Allow phase function even if it's
 not on page

Allow the use of a phase function even if it does not exist on
the associated page.

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus_core.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index c4f557c8955b2..776ee2237be20 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -1329,14 +1329,14 @@ static int pmbus_add_sensor_attrs(struct i2c_client *client,
 
 		pages = paged ? info->pages : 1;
 		for (page = 0; page < pages; page++) {
-			if (!(info->func[page] & attrs->func))
-				continue;
-			ret = pmbus_add_sensor_attrs_one(client, data, info,
-							 name, index, page,
-							 0xff, attrs, paged);
-			if (ret)
-				return ret;
-			index++;
+			if (info->func[page] & attrs->func) {
+				ret = pmbus_add_sensor_attrs_one(client, data, info,
+								 name, index, page,
+								 0xff, attrs, paged);
+				if (ret)
+					return ret;
+				index++;
+			}
 			if (info->phases[page]) {
 				int phase;
 
-- 
GitLab


From 317f9d808a7a0dad28eba10d96527f536ff28347 Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Wed, 9 Jun 2021 11:32:08 +0200
Subject: [PATCH 3100/3804] hwmon: (pmbus/pim4328) Add PMBus driver for
 PIM4006, PIM4328 and PIM4820

Add hardware monitoring support for Flex power interface modules PIM4006,
PIM4328 and PIM4820.

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/Kconfig   |   9 ++
 drivers/hwmon/pmbus/Makefile  |   1 +
 drivers/hwmon/pmbus/pim4328.c | 233 ++++++++++++++++++++++++++++++++++
 3 files changed, 243 insertions(+)
 create mode 100644 drivers/hwmon/pmbus/pim4328.c

diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 52d8cd63603e8..10ef548f74a46 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -267,6 +267,15 @@ config SENSORS_MP2975
 	  This driver can also be built as a module. If so, the module will
 	  be called mp2975.
 
+config SENSORS_PIM4328
+	tristate "Flex PIM4328 and compatibles"
+	help
+	  If you say yes here you get hardware monitoring support for Flex
+	  PIM4328, PIM4820 and PIM4006 Power Interface Modules.
+
+	  This driver can also be built as a module. If so, the module will
+	  be called pim4328.
+
 config SENSORS_PM6764TR
 	tristate "ST PM6764TR"
 	help
diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile
index 35d293bb44bf9..b3354518f66f9 100644
--- a/drivers/hwmon/pmbus/Makefile
+++ b/drivers/hwmon/pmbus/Makefile
@@ -40,3 +40,4 @@ obj-$(CONFIG_SENSORS_UCD9000)	+= ucd9000.o
 obj-$(CONFIG_SENSORS_UCD9200)	+= ucd9200.o
 obj-$(CONFIG_SENSORS_XDPE122)	+= xdpe12284.o
 obj-$(CONFIG_SENSORS_ZL6100)	+= zl6100.o
+obj-$(CONFIG_SENSORS_PIM4328)	+= pim4328.o
diff --git a/drivers/hwmon/pmbus/pim4328.c b/drivers/hwmon/pmbus/pim4328.c
new file mode 100644
index 0000000000000..273ff6e576549
--- /dev/null
+++ b/drivers/hwmon/pmbus/pim4328.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Hardware monitoring driver for PIM4006, PIM4328 and PIM4820
+ *
+ * Copyright (c) 2021 Flextronics International Sweden AB
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pmbus.h>
+#include <linux/slab.h>
+#include "pmbus.h"
+
+enum chips { pim4006, pim4328, pim4820 };
+
+struct pim4328_data {
+	enum chips id;
+	struct pmbus_driver_info info;
+};
+
+#define to_pim4328_data(x)  container_of(x, struct pim4328_data, info)
+
+/* PIM4006 and PIM4328 */
+#define PIM4328_MFR_READ_VINA		0xd3
+#define PIM4328_MFR_READ_VINB		0xd4
+
+/* PIM4006 */
+#define PIM4328_MFR_READ_IINA		0xd6
+#define PIM4328_MFR_READ_IINB		0xd7
+#define PIM4328_MFR_FET_CHECKSTATUS	0xd9
+
+/* PIM4328 */
+#define PIM4328_MFR_STATUS_BITS		0xd5
+
+/* PIM4820 */
+#define PIM4328_MFR_READ_STATUS		0xd0
+
+static const struct i2c_device_id pim4328_id[] = {
+	{"bmr455", pim4328},
+	{"pim4006", pim4006},
+	{"pim4106", pim4006},
+	{"pim4206", pim4006},
+	{"pim4306", pim4006},
+	{"pim4328", pim4328},
+	{"pim4406", pim4006},
+	{"pim4820", pim4820},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, pim4328_id);
+
+static int pim4328_read_word_data(struct i2c_client *client, int page,
+				  int phase, int reg)
+{
+	int ret;
+
+	if (page > 0)
+		return -ENXIO;
+
+	if (phase == 0xff)
+		return -ENODATA;
+
+	switch (reg) {
+	case PMBUS_READ_VIN:
+		ret = pmbus_read_word_data(client, page, phase,
+					   phase == 0 ? PIM4328_MFR_READ_VINA
+						      : PIM4328_MFR_READ_VINB);
+		break;
+	case PMBUS_READ_IIN:
+		ret = pmbus_read_word_data(client, page, phase,
+					   phase == 0 ? PIM4328_MFR_READ_IINA
+						      : PIM4328_MFR_READ_IINB);
+		break;
+	default:
+		ret = -ENODATA;
+	}
+
+	return ret;
+}
+
+static int pim4328_read_byte_data(struct i2c_client *client, int page, int reg)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct pim4328_data *data = to_pim4328_data(info);
+	int ret, status;
+
+	if (page > 0)
+		return -ENXIO;
+
+	switch (reg) {
+	case PMBUS_STATUS_BYTE:
+		ret = pmbus_read_byte_data(client, page, PMBUS_STATUS_BYTE);
+		if (ret < 0)
+			return ret;
+		if (data->id == pim4006) {
+			status = pmbus_read_word_data(client, page, 0xff,
+						      PIM4328_MFR_FET_CHECKSTATUS);
+			if (status < 0)
+				return status;
+			if (status & 0x0630) /* Input UV */
+				ret |= PB_STATUS_VIN_UV;
+		} else if (data->id == pim4328) {
+			status = pmbus_read_byte_data(client, page,
+						      PIM4328_MFR_STATUS_BITS);
+			if (status < 0)
+				return status;
+			if (status & 0x04) /* Input UV */
+				ret |= PB_STATUS_VIN_UV;
+			if (status & 0x40) /* Output UV */
+				ret |= PB_STATUS_NONE_ABOVE;
+		} else if (data->id == pim4820) {
+			status = pmbus_read_byte_data(client, page,
+						      PIM4328_MFR_READ_STATUS);
+			if (status < 0)
+				return status;
+			if (status & 0x05) /* Input OV or OC */
+				ret |= PB_STATUS_NONE_ABOVE;
+			if (status & 0x1a) /* Input UV */
+				ret |= PB_STATUS_VIN_UV;
+			if (status & 0x40) /* OT */
+				ret |= PB_STATUS_TEMPERATURE;
+		}
+		break;
+	default:
+		ret = -ENODATA;
+	}
+
+	return ret;
+}
+
+static int pim4328_probe(struct i2c_client *client)
+{
+	int status;
+	u8 device_id[I2C_SMBUS_BLOCK_MAX + 1];
+	const struct i2c_device_id *mid;
+	struct pim4328_data *data;
+	struct pmbus_driver_info *info;
+	struct pmbus_platform_data *pdata;
+	struct device *dev = &client->dev;
+
+	if (!i2c_check_functionality(client->adapter,
+				     I2C_FUNC_SMBUS_READ_BYTE_DATA
+				     | I2C_FUNC_SMBUS_BLOCK_DATA))
+		return -ENODEV;
+
+	data = devm_kzalloc(&client->dev, sizeof(struct pim4328_data),
+			    GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	status = i2c_smbus_read_block_data(client, PMBUS_MFR_MODEL, device_id);
+	if (status < 0) {
+		dev_err(&client->dev, "Failed to read Manufacturer Model\n");
+		return status;
+	}
+	for (mid = pim4328_id; mid->name[0]; mid++) {
+		if (!strncasecmp(mid->name, device_id, strlen(mid->name)))
+			break;
+	}
+	if (!mid->name[0]) {
+		dev_err(&client->dev, "Unsupported device\n");
+		return -ENODEV;
+	}
+
+	if (strcmp(client->name, mid->name))
+		dev_notice(&client->dev,
+			   "Device mismatch: Configured %s, detected %s\n",
+			   client->name, mid->name);
+
+	data->id = mid->driver_data;
+	info = &data->info;
+	info->pages = 1;
+	info->read_byte_data = pim4328_read_byte_data;
+	info->read_word_data = pim4328_read_word_data;
+
+	pdata = devm_kzalloc(dev, sizeof(struct pmbus_platform_data),
+			     GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+	dev->platform_data = pdata;
+	pdata->flags = PMBUS_NO_CAPABILITY | PMBUS_NO_WRITE_PROTECT;
+
+	switch (data->id) {
+	case pim4006:
+		info->phases[0] = 2;
+		info->func[0] = PMBUS_PHASE_VIRTUAL | PMBUS_HAVE_VIN
+			| PMBUS_HAVE_TEMP | PMBUS_HAVE_IOUT;
+		info->pfunc[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_IIN;
+		info->pfunc[1] = PMBUS_HAVE_VIN | PMBUS_HAVE_IIN;
+		break;
+	case pim4328:
+		info->phases[0] = 2;
+		info->func[0] = PMBUS_PHASE_VIRTUAL
+			| PMBUS_HAVE_VCAP | PMBUS_HAVE_VIN
+			| PMBUS_HAVE_TEMP | PMBUS_HAVE_IOUT;
+		info->pfunc[0] = PMBUS_HAVE_VIN;
+		info->pfunc[1] = PMBUS_HAVE_VIN;
+		info->format[PSC_VOLTAGE_IN] = direct;
+		info->format[PSC_TEMPERATURE] = direct;
+		info->format[PSC_CURRENT_OUT] = direct;
+		pdata->flags |= PMBUS_USE_COEFFICIENTS_CMD;
+		break;
+	case pim4820:
+		info->func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_TEMP
+			| PMBUS_HAVE_IIN;
+		info->format[PSC_VOLTAGE_IN] = direct;
+		info->format[PSC_TEMPERATURE] = direct;
+		info->format[PSC_CURRENT_IN] = direct;
+		pdata->flags |= PMBUS_USE_COEFFICIENTS_CMD;
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	return pmbus_do_probe(client, info);
+}
+
+static struct i2c_driver pim4328_driver = {
+	.driver = {
+		   .name = "pim4328",
+		   },
+	.probe_new = pim4328_probe,
+	.id_table = pim4328_id,
+};
+
+module_i2c_driver(pim4328_driver);
+
+MODULE_AUTHOR("Erik Rosen <erik.rosen@metormote.com>");
+MODULE_DESCRIPTION("PMBus driver for PIM4006, PIM4328, PIM4820 power interface modules");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(PMBUS);
-- 
GitLab


From bf8e0cd8d6b2c9be365ea53d36e9368f07880a2f Mon Sep 17 00:00:00 2001
From: Erik Rosen <erik.rosen@metormote.com>
Date: Wed, 9 Jun 2021 11:32:09 +0200
Subject: [PATCH 3101/3804] hwmon: (pmbus/pim4328) Add documentation for the
 pim4328 PMBus driver

Add documentation and index link for pim4328 PMBus driver.

Signed-off-by: Erik Rosen <erik.rosen@metormote.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/index.rst   |   1 +
 Documentation/hwmon/pim4328.rst | 105 ++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 Documentation/hwmon/pim4328.rst

diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 61e5d45326226..9d4f5b2b84b02 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -151,6 +151,7 @@ Hardware Monitoring Kernel Drivers
    pc87360
    pc87427
    pcf8591
+   pim4328
    pm6764tr
    pmbus
    powr1220
diff --git a/Documentation/hwmon/pim4328.rst b/Documentation/hwmon/pim4328.rst
new file mode 100644
index 0000000000000..70c9e7a6882c5
--- /dev/null
+++ b/Documentation/hwmon/pim4328.rst
@@ -0,0 +1,105 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Kernel driver pim4328
+=====================
+
+Supported chips:
+
+  * Flex PIM4328
+
+    Prefix: 'pim4328', 'bmr455'
+
+    Addresses scanned: -
+
+    Datasheet:
+
+https://flexpowermodules.com/resources/fpm-techspec-pim4328
+
+  * Flex PIM4820
+
+    Prefixes: 'pim4820'
+
+    Addresses scanned: -
+
+    Datasheet: https://flexpowermodules.com/resources/fpm-techspec-pim4820
+
+  * Flex PIM4006, PIM4106, PIM4206, PIM4306, PIM4406
+
+    Prefixes: 'pim4006', 'pim4106', 'pim4206', 'pim4306', 'pim4406'
+
+    Addresses scanned: -
+
+    Datasheet: https://flexpowermodules.com/resources/fpm-techspec-pim4006
+
+Author: Erik Rosen <erik.rosen@metormote.com>
+
+
+Description
+-----------
+
+This driver supports hardware monitoring for Flex PIM4328 and
+compatible digital power interface modules.
+
+The driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus.rst and Documentation.hwmon/pmbus-core for details
+on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. All attributes are read-only.
+
+======================= ========================================================
+in1_label		"vin"
+in1_input		Measured input voltage.
+in1_alarm		Input voltage alarm.
+
+in2_label		"vin.0"
+in2_input		Measured input voltage on input A.
+
+			PIM4328 and PIM4X06
+
+in3_label		"vin.1"
+in3_input		Measured input voltage on input B.
+
+			PIM4328 and PIM4X06
+
+in4_label		"vcap"
+in4_input		Measured voltage on holdup capacitor.
+
+			PIM4328
+
+curr1_label		"iin.0"
+curr1_input		Measured input current on input A.
+
+			PIM4X06
+
+curr2_label		"iin.1"
+curr2_input		Measured input current on input B.
+
+			PIM4X06
+
+currX_label		"iout1"
+currX_input		Measured output current.
+currX_alarm		Output current alarm.
+
+			X is 1 for PIM4820, 3 otherwise.
+
+temp1_input		Measured temperature.
+temp1_alarm		High temperature alarm.
+======================= ========================================================
-- 
GitLab


From 3efbcee8d4029795fa0a1ef90dc5b9ea763ed207 Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Mon, 7 Jun 2021 12:34:29 +0200
Subject: [PATCH 3102/3804] hwmon: (pmbus) Add driver for Delta DPS-920AB PSU

This adds support for the Delta DPS-920AB PSU.

Only missing feature is fan control which the PSU supports.

Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Link: https://lore.kernel.org/r/20210607103431.2039073-1-robert.marko@sartura.hr
[groeck: Add MODULE_IMPORT_NS(PMBUS);]
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/dps920ab.rst |  73 +++++++++++
 Documentation/hwmon/index.rst    |   1 +
 drivers/hwmon/pmbus/Kconfig      |   9 ++
 drivers/hwmon/pmbus/Makefile     |   1 +
 drivers/hwmon/pmbus/dps920ab.c   | 208 +++++++++++++++++++++++++++++++
 5 files changed, 292 insertions(+)
 create mode 100644 Documentation/hwmon/dps920ab.rst
 create mode 100644 drivers/hwmon/pmbus/dps920ab.c

diff --git a/Documentation/hwmon/dps920ab.rst b/Documentation/hwmon/dps920ab.rst
new file mode 100644
index 0000000000000..c33b4cdc0a60c
--- /dev/null
+++ b/Documentation/hwmon/dps920ab.rst
@@ -0,0 +1,73 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver dps920ab
+========================
+
+Supported chips:
+
+  * Delta DPS920AB
+
+    Prefix: 'dps920ab'
+
+    Addresses scanned: -
+
+Authors:
+    Robert Marko <robert.marko@sartura.hr>
+
+
+Description
+-----------
+
+This driver implements support for Delta DPS920AB 920W 54V DC single output
+power supply with PMBus support.
+
+The driver is a client driver to the core PMBus driver.
+Please see Documentation/hwmon/pmbus.rst for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
+details.
+
+
+Sysfs entries
+-------------
+
+======================= ======================================================
+curr1_label		"iin"
+curr1_input		Measured input current
+curr1_alarm		Input current high alarm
+
+curr2_label		"iout1"
+curr2_input		Measured output current
+curr2_max		Maximum output current
+curr2_rated_max		Maximum rated output current
+
+in1_label		"vin"
+in1_input		Measured input voltage
+in1_alarm		Input voltage alarm
+
+in2_label		"vout1"
+in2_input		Measured output voltage
+in2_rated_min		Minimum rated output voltage
+in2_rated_max		Maximum rated output voltage
+in2_alarm		Output voltage alarm
+
+power1_label		"pin"
+power1_input		Measured input power
+power1_alarm		Input power high alarm
+
+power2_label		"pout1"
+power2_input		Measured output power
+power2_rated_max	Maximum rated output power
+
+temp[1-3]_input		Measured temperature
+temp[1-3]_alarm		Temperature alarm
+
+fan1_alarm		Fan 1 warning.
+fan1_fault		Fan 1 fault.
+fan1_input		Fan 1 speed in RPM.
+======================= ======================================================
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 9d4f5b2b84b02..bc01601ea81aa 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -53,6 +53,7 @@ Hardware Monitoring Kernel Drivers
    da9055
    dell-smm-hwmon
    dme1737
+   dps920ab
    drivetemp
    ds1621
    ds620
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 10ef548f74a46..ffb609cee3a4a 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -86,6 +86,15 @@ config SENSORS_IBM_CFFPS
 	  This driver can also be built as a module. If so, the module will
 	  be called ibm-cffps.
 
+config SENSORS_DPS920AB
+	tristate "Delta DPS920AB Power Supply"
+	help
+	  If you say yes here you get hardware monitoring support for Delta
+	  DPS920AB Power Supplies.
+
+	  This driver can also be built as a module. If so, the module will
+	  be called dps920ab.
+
 config SENSORS_INSPUR_IPSPS
 	tristate "INSPUR Power System Power Supply"
 	help
diff --git a/drivers/hwmon/pmbus/Makefile b/drivers/hwmon/pmbus/Makefile
index b3354518f66f9..0ed4d596a948e 100644
--- a/drivers/hwmon/pmbus/Makefile
+++ b/drivers/hwmon/pmbus/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_SENSORS_BEL_PFE)	+= bel-pfe.o
 obj-$(CONFIG_SENSORS_BPA_RS600)	+= bpa-rs600.o
 obj-$(CONFIG_SENSORS_FSP_3Y)	+= fsp-3y.o
 obj-$(CONFIG_SENSORS_IBM_CFFPS)	+= ibm-cffps.o
+obj-$(CONFIG_SENSORS_DPS920AB)	+= dps920ab.o
 obj-$(CONFIG_SENSORS_INSPUR_IPSPS) += inspur-ipsps.o
 obj-$(CONFIG_SENSORS_IR35221)	+= ir35221.o
 obj-$(CONFIG_SENSORS_IR36021)	+= ir36021.o
diff --git a/drivers/hwmon/pmbus/dps920ab.c b/drivers/hwmon/pmbus/dps920ab.c
new file mode 100644
index 0000000000000..bd2df2a3c8e30
--- /dev/null
+++ b/drivers/hwmon/pmbus/dps920ab.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Driver for Delta DPS920AB PSU
+ *
+ * Copyright (C) 2021 Delta Networks, Inc.
+ * Copyright (C) 2021 Sartura Ltd.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include "pmbus.h"
+
+struct dps920ab_data {
+	char *mfr_model;
+	char *mfr_id;
+};
+
+static int dps920ab_read_word_data(struct i2c_client *client, int page, int phase, int reg)
+{
+	/*
+	 * This masks commands which are not supported.
+	 * PSU advertises that all features are supported,
+	 * in reality that unfortunately is not true.
+	 * So enable only those that the datasheet confirms.
+	 */
+	switch (reg) {
+	case PMBUS_FAN_COMMAND_1:
+	case PMBUS_IOUT_OC_WARN_LIMIT:
+	case PMBUS_STATUS_WORD:
+	case PMBUS_READ_VIN:
+	case PMBUS_READ_IIN:
+	case PMBUS_READ_VOUT:
+	case PMBUS_READ_IOUT:
+	case PMBUS_READ_TEMPERATURE_1:
+	case PMBUS_READ_TEMPERATURE_2:
+	case PMBUS_READ_TEMPERATURE_3:
+	case PMBUS_READ_FAN_SPEED_1:
+	case PMBUS_READ_POUT:
+	case PMBUS_READ_PIN:
+	case PMBUS_MFR_VOUT_MIN:
+	case PMBUS_MFR_VOUT_MAX:
+	case PMBUS_MFR_IOUT_MAX:
+	case PMBUS_MFR_POUT_MAX:
+		return pmbus_read_word_data(client, page, phase, reg);
+	default:
+		return -ENXIO;
+	}
+}
+
+static int dps920ab_write_word_data(struct i2c_client *client, int page, int reg,
+				    u16 word)
+{
+	/*
+	 * This masks commands which are not supported.
+	 * PSU only has one R/W register and that is
+	 * for the fan.
+	 */
+	switch (reg) {
+	case PMBUS_FAN_COMMAND_1:
+		return pmbus_write_word_data(client, page, reg, word);
+	default:
+		return -EACCES;
+	}
+}
+
+static struct pmbus_driver_info dps920ab_info = {
+	.pages = 1,
+
+	.format[PSC_VOLTAGE_IN] = linear,
+	.format[PSC_VOLTAGE_OUT] = linear,
+	.format[PSC_CURRENT_IN] = linear,
+	.format[PSC_CURRENT_OUT] = linear,
+	.format[PSC_POWER] = linear,
+	.format[PSC_FAN] = linear,
+	.format[PSC_TEMPERATURE] = linear,
+
+	.func[0] =
+		PMBUS_HAVE_VIN | PMBUS_HAVE_IIN | PMBUS_HAVE_PIN |
+		PMBUS_HAVE_VOUT | PMBUS_HAVE_IOUT | PMBUS_HAVE_POUT |
+		PMBUS_HAVE_TEMP  | PMBUS_HAVE_TEMP2 | PMBUS_HAVE_TEMP3 |
+		PMBUS_HAVE_FAN12 | PMBUS_HAVE_STATUS_FAN12 |
+		PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_STATUS_IOUT |
+		PMBUS_HAVE_STATUS_INPUT | PMBUS_HAVE_STATUS_TEMP,
+	.read_word_data = dps920ab_read_word_data,
+	.write_word_data = dps920ab_write_word_data,
+};
+
+static int dps920ab_mfr_id_show(struct seq_file *s, void *data)
+{
+	struct dps920ab_data *priv = s->private;
+
+	seq_printf(s, "%s\n", priv->mfr_id);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(dps920ab_mfr_id);
+
+static int dps920ab_mfr_model_show(struct seq_file *s, void *data)
+{
+	struct dps920ab_data *priv = s->private;
+
+	seq_printf(s, "%s\n", priv->mfr_model);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(dps920ab_mfr_model);
+
+static void dps920ab_init_debugfs(struct dps920ab_data *data, struct i2c_client *client)
+{
+	struct dentry *debugfs_dir;
+	struct dentry *root;
+
+	root = pmbus_get_debugfs_dir(client);
+	if (!root)
+		return;
+
+	debugfs_dir = debugfs_create_dir(client->name, root);
+	if (!debugfs_dir)
+		return;
+
+	debugfs_create_file("mfr_id",
+			    0400,
+			    debugfs_dir,
+			    data,
+			    &dps920ab_mfr_id_fops);
+
+	debugfs_create_file("mfr_model",
+			    0400,
+			    debugfs_dir,
+			    data,
+			    &dps920ab_mfr_model_fops);
+}
+
+static int dps920ab_probe(struct i2c_client *client)
+{
+	u8 buf[I2C_SMBUS_BLOCK_MAX + 1];
+	struct dps920ab_data *data;
+	int ret;
+
+	data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	ret = i2c_smbus_read_block_data(client, PMBUS_MFR_ID, buf);
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed to read Manufacturer ID\n");
+		return ret;
+	}
+
+	buf[ret] = '\0';
+	if (ret != 5 || strncmp(buf, "DELTA", 5)) {
+		buf[ret] = '\0';
+		dev_err(&client->dev, "Unsupported Manufacturer ID '%s'\n", buf);
+		return -ENODEV;
+	}
+	data->mfr_id = devm_kstrdup(&client->dev, buf, GFP_KERNEL);
+	if (!data->mfr_id)
+		return -ENOMEM;
+
+	ret = i2c_smbus_read_block_data(client, PMBUS_MFR_MODEL, buf);
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed to read Manufacturer Model\n");
+		return ret;
+	}
+
+	buf[ret] = '\0';
+	if (ret != 11 || strncmp(buf, "DPS-920AB", 9)) {
+		dev_err(&client->dev, "Unsupported Manufacturer Model '%s'\n", buf);
+		return -ENODEV;
+	}
+	data->mfr_model = devm_kstrdup(&client->dev, buf, GFP_KERNEL);
+	if (!data->mfr_model)
+		return -ENOMEM;
+
+	ret = pmbus_do_probe(client, &dps920ab_info);
+	if (ret)
+		return ret;
+
+	dps920ab_init_debugfs(data, client);
+
+	return 0;
+}
+
+static const struct of_device_id __maybe_unused dps920ab_of_match[] = {
+	{ .compatible = "delta,dps920ab", },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, dps920ab_of_match);
+
+static struct i2c_driver dps920ab_driver = {
+	.driver = {
+		   .name = "dps920ab",
+		   .of_match_table = of_match_ptr(dps920ab_of_match),
+	},
+	.probe_new = dps920ab_probe,
+};
+
+module_i2c_driver(dps920ab_driver);
+
+MODULE_AUTHOR("Robert Marko <robert.marko@sartura.hr>");
+MODULE_DESCRIPTION("PMBus driver for Delta DPS920AB PSU");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(PMBUS);
-- 
GitLab


From 8b1d61cd47ccea482a3f68c99d7358e3daea35fa Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Mon, 7 Jun 2021 12:34:30 +0200
Subject: [PATCH 3103/3804] dt-bindings: trivial-devices: Add Delta DPS920AB

Add trivial device entry for Delta DPS920AB PSU.

Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Link: https://lore.kernel.org/r/20210607103431.2039073-2-robert.marko@sartura.hr
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/devicetree/bindings/trivial-devices.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index f8824e1dd24c4..37ac0a3ae3b46 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -73,6 +73,8 @@ properties:
           - dallas,ds4510
             # Digital Thermometer and Thermostat
           - dallas,ds75
+          # Delta Electronics DPS920AB 920W 54V Power Supply
+          - delta,dps920ab
             # 1/4 Brick DC/DC Regulated Power Module
           - delta,q54sj108a2
             # Devantech SRF02 ultrasonic ranger in I2C mode
-- 
GitLab


From c5679f3e702ce6b7d3d0d95b5a7e2e4b5c780006 Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Wed, 9 Jun 2021 11:59:23 -0700
Subject: [PATCH 3104/3804] MAINTAINERS: Add Delta DPS920AB PSU driver

Add maintainers entry for the Delta DPS920AB PSU driver.

Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bc0ceef87b73f..2c743057de3a1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5189,6 +5189,13 @@ W:	https://linuxtv.org
 T:	git git://linuxtv.org/media_tree.git
 F:	drivers/media/platform/sti/delta
 
+DELTA DPS920AB PSU DRIVER
+M:	Robert Marko <robert.marko@sartura.hr>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	Documentation/hwmon/dps920ab.rst
+F:	drivers/hwmon/pmbus/dps920ab.c
+
 DENALI NAND DRIVER
 L:	linux-mtd@lists.infradead.org
 S:	Orphan
-- 
GitLab


From f0000797a3862eba99d06e65be846317c1ccbd8e Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Fri, 11 Jun 2021 15:22:57 +0100
Subject: [PATCH 3105/3804] hwmon: (ntc_thermistor) Drop unused headers.

The IIO usage in this driver is purely consumer so it should only
be including linux/iio/consumer.h  Whilst here drop pm_runtime.h
as there is no runtime power management in the driver.

Found using include-what-you-use and manual inspection of the
suggestions.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20210611142257.103094-1-jic23@kernel.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ntc_thermistor.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index 8587189c7f150..18fd6f12ca162 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -8,7 +8,6 @@
 
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/pm_runtime.h>
 #include <linux/math64.h>
 #include <linux/platform_device.h>
 #include <linux/err.h>
@@ -17,9 +16,6 @@
 
 #include <linux/platform_data/ntc_thermistor.h>
 
-#include <linux/iio/iio.h>
-#include <linux/iio/machine.h>
-#include <linux/iio/driver.h>
 #include <linux/iio/consumer.h>
 
 #include <linux/hwmon.h>
-- 
GitLab


From 9e077b52d86ac364a295b05c916c7478a16865b2 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Tue, 1 Jun 2021 17:53:28 +0200
Subject: [PATCH 3106/3804] sched/pelt: Check that *_avg are null when *_sum
 are

Check that we never break the rule that pelt's avg values are null if
pelt's sum are.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Acked-by: Odin Ugedal <odin@uged.al>
Link: https://lore.kernel.org/r/20210601155328.19487-1-vincent.guittot@linaro.org
---
 kernel/sched/fair.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ce625bf8024d9..198514dcbe46b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8026,6 +8026,15 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
 	if (cfs_rq->avg.runnable_sum)
 		return false;
 
+	/*
+	 * _avg must be null when _sum are null because _avg = _sum / divider
+	 * Make sure that rounding and/or propagation of PELT values never
+	 * break this.
+	 */
+	SCHED_WARN_ON(cfs_rq->avg.load_avg ||
+		      cfs_rq->avg.util_avg ||
+		      cfs_rq->avg.runnable_avg);
+
 	return true;
 }
 
-- 
GitLab


From 83c5e9d573e1f0757f324d01adb6ee77b49c3f0e Mon Sep 17 00:00:00 2001
From: Dietmar Eggemann <dietmar.eggemann@arm.com>
Date: Tue, 1 Jun 2021 10:36:16 +0200
Subject: [PATCH 3107/3804] sched/fair: Return early from update_tg_cfs_load()
 if delta == 0

In case the _avg delta is 0 there is no need to update se's _avg
(level n) nor cfs_rq's _avg (level n-1). These values stay the same.

Since cfs_rq's _avg isn't changed, i.e. no load is propagated down,
cfs_rq's _sum should stay the same as well.

So bail out after se's _sum has been updated.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/20210601083616.804229-1-dietmar.eggemann@arm.com
---
 kernel/sched/fair.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 198514dcbe46b..06c8ba7b34008 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3502,9 +3502,12 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
 	load_sum = (s64)se_weight(se) * runnable_sum;
 	load_avg = div_s64(load_sum, divider);
 
+	se->avg.load_sum = runnable_sum;
+
 	delta = load_avg - se->avg.load_avg;
+	if (!delta)
+		return;
 
-	se->avg.load_sum = runnable_sum;
 	se->avg.load_avg = load_avg;
 
 	add_positive(&cfs_rq->avg.load_avg, delta);
-- 
GitLab


From 2ad8ccc17d1e4270cf65a3f2a07a7534aa23e3fb Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Mon, 14 Jun 2021 20:10:30 +0100
Subject: [PATCH 3108/3804] thermal/cpufreq_cooling: Update offline CPUs
 per-cpu thermal_pressure

The thermal pressure signal gives information to the scheduler about
reduced CPU capacity due to thermal. It is based on a value stored in
a per-cpu 'thermal_pressure' variable. The online CPUs will get the
new value there, while the offline won't. Unfortunately, when the CPU
is back online, the value read from per-cpu variable might be wrong
(stale data).  This might affect the scheduler decisions, since it
sees the CPU capacity differently than what is actually available.

Fix it by making sure that all online+offline CPUs would get the
proper value in their per-cpu variable when thermal framework sets
capping.

Fixes: f12e4f66ab6a3 ("thermal/cpu-cooling: Update thermal pressure in case of a maximum frequency capping")
Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://lore.kernel.org/r/20210614191030.22241-1-lukasz.luba@arm.com
---
 drivers/thermal/cpufreq_cooling.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c
index eeb4e4b76c0be..43b1ae8a77893 100644
--- a/drivers/thermal/cpufreq_cooling.c
+++ b/drivers/thermal/cpufreq_cooling.c
@@ -478,7 +478,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
 	ret = freq_qos_update_request(&cpufreq_cdev->qos_req, frequency);
 	if (ret >= 0) {
 		cpufreq_cdev->cpufreq_state = state;
-		cpus = cpufreq_cdev->policy->cpus;
+		cpus = cpufreq_cdev->policy->related_cpus;
 		max_capacity = arch_scale_cpu_capacity(cpumask_first(cpus));
 		capacity = frequency * max_capacity;
 		capacity /= cpufreq_cdev->policy->cpuinfo.max_freq;
-- 
GitLab


From 489f16459e0008c7a5c4c5af34bd80898aa82c2d Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Mon, 14 Jun 2021 20:11:28 +0100
Subject: [PATCH 3109/3804] sched/fair: Take thermal pressure into account
 while estimating energy

Energy Aware Scheduling (EAS) needs to be able to predict the frequency
requests made by the SchedUtil governor to properly estimate energy used
in the future. It has to take into account CPUs utilization and forecast
Performance Domain (PD) frequency. There is a corner case when the max
allowed frequency might be reduced due to thermal. SchedUtil is aware of
that reduced frequency, so it should be taken into account also in EAS
estimations.

SchedUtil, as a CPUFreq governor, knows the maximum allowed frequency of
a CPU, thanks to cpufreq_driver_resolve_freq() and internal clamping
to 'policy::max'. SchedUtil is responsible to respect that upper limit
while setting the frequency through CPUFreq drivers. This effective
frequency is stored internally in 'sugov_policy::next_freq' and EAS has
to predict that value.

In the existing code the raw value of arch_scale_cpu_capacity() is used
for clamping the returned CPU utilization from effective_cpu_util().
This patch fixes issue with too big single CPU utilization, by introducing
clamping to the allowed CPU capacity. The allowed CPU capacity is a CPU
capacity reduced by thermal pressure raw value.

Thanks to knowledge about allowed CPU capacity, we don't get too big value
for a single CPU utilization, which is then added to the util sum. The
util sum is used as a source of information for estimating whole PD energy.
To avoid wrong energy estimation in EAS (due to capped frequency), make
sure that the calculation of util sum is aware of allowed CPU capacity.

This thermal pressure might be visible in scenarios where the CPUs are not
heavily loaded, but some other component (like GPU) drastically reduced
available power budget and increased the SoC temperature. Thus, we still
use EAS for task placement and CPUs are not over-utilized.

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/20210614191128.22735-1-lukasz.luba@arm.com
---
 kernel/sched/fair.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 06c8ba7b34008..0d6d190accb09 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6535,8 +6535,11 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 	struct cpumask *pd_mask = perf_domain_span(pd);
 	unsigned long cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
 	unsigned long max_util = 0, sum_util = 0;
+	unsigned long _cpu_cap = cpu_cap;
 	int cpu;
 
+	_cpu_cap -= arch_scale_thermal_pressure(cpumask_first(pd_mask));
+
 	/*
 	 * The capacity state of CPUs of the current rd can be driven by CPUs
 	 * of another rd if they belong to the same pd. So, account for the
@@ -6572,8 +6575,10 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 		 * is already enough to scale the EM reported power
 		 * consumption at the (eventually clamped) cpu_capacity.
 		 */
-		sum_util += effective_cpu_util(cpu, util_running, cpu_cap,
-					       ENERGY_UTIL, NULL);
+		cpu_util = effective_cpu_util(cpu, util_running, cpu_cap,
+					      ENERGY_UTIL, NULL);
+
+		sum_util += min(cpu_util, _cpu_cap);
 
 		/*
 		 * Performance domain frequency: utilization clamping
@@ -6584,7 +6589,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 		 */
 		cpu_util = effective_cpu_util(cpu, util_freq, cpu_cap,
 					      FREQUENCY_UTIL, tsk);
-		max_util = max(max_util, cpu_util);
+		max_util = max(max_util, min(cpu_util, _cpu_cap));
 	}
 
 	return em_cpu_energy(pd->em_pd, max_util, sum_util);
-- 
GitLab


From 8f1b971b4750e83e8fbd2f91a9efd4a38ad0ae51 Mon Sep 17 00:00:00 2001
From: Lukasz Luba <lukasz.luba@arm.com>
Date: Mon, 14 Jun 2021 20:12:38 +0100
Subject: [PATCH 3110/3804] sched/cpufreq: Consider reduced CPU capacity in
 energy calculation

Energy Aware Scheduling (EAS) needs to predict the decisions made by
SchedUtil. The map_util_freq() exists to do that.

There are corner cases where the max allowed frequency might be reduced
(due to thermal). SchedUtil as a CPUFreq governor, is aware of that
but EAS is not. This patch aims to address it.

SchedUtil stores the maximum allowed frequency in
'sugov_policy::next_freq' field. EAS has to predict that value, which is
the real used frequency. That value is made after a call to
cpufreq_driver_resolve_freq() which clamps to the CPUFreq policy limits.
In the existing code EAS is not able to predict that real frequency.
This leads to energy estimation errors.

To avoid wrong energy estimation in EAS (due to frequency miss prediction)
make sure that the step which calculates Performance Domain frequency,
is also aware of the allowed CPU capacity.

Furthermore, modify map_util_freq() to not extend the frequency value.
Instead, use map_util_perf() to extend the util value in both places:
SchedUtil and EAS, but for EAS clamp it to max allowed CPU capacity.
In the end, we achieve the same desirable behavior for both subsystems
and alignment in regards to the real CPU frequency.

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> (For the schedutil part)
Link: https://lore.kernel.org/r/20210614191238.23224-1-lukasz.luba@arm.com
---
 include/linux/energy_model.h     | 16 +++++++++++++---
 include/linux/sched/cpufreq.h    |  2 +-
 kernel/sched/cpufreq_schedutil.c |  1 +
 kernel/sched/fair.c              |  2 +-
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 757fc60658fa6..3f221dbf5f95d 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -91,6 +91,8 @@ void em_dev_unregister_perf_domain(struct device *dev);
  * @pd		: performance domain for which energy has to be estimated
  * @max_util	: highest utilization among CPUs of the domain
  * @sum_util	: sum of the utilization of all CPUs in the domain
+ * @allowed_cpu_cap	: maximum allowed CPU capacity for the @pd, which
+			  might reflect reduced frequency (due to thermal)
  *
  * This function must be used only for CPU devices. There is no validation,
  * i.e. if the EM is a CPU type and has cpumask allocated. It is called from
@@ -100,7 +102,8 @@ void em_dev_unregister_perf_domain(struct device *dev);
  * a capacity state satisfying the max utilization of the domain.
  */
 static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
-				unsigned long max_util, unsigned long sum_util)
+				unsigned long max_util, unsigned long sum_util,
+				unsigned long allowed_cpu_cap)
 {
 	unsigned long freq, scale_cpu;
 	struct em_perf_state *ps;
@@ -112,11 +115,17 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	/*
 	 * In order to predict the performance state, map the utilization of
 	 * the most utilized CPU of the performance domain to a requested
-	 * frequency, like schedutil.
+	 * frequency, like schedutil. Take also into account that the real
+	 * frequency might be set lower (due to thermal capping). Thus, clamp
+	 * max utilization to the allowed CPU capacity before calculating
+	 * effective frequency.
 	 */
 	cpu = cpumask_first(to_cpumask(pd->cpus));
 	scale_cpu = arch_scale_cpu_capacity(cpu);
 	ps = &pd->table[pd->nr_perf_states - 1];
+
+	max_util = map_util_perf(max_util);
+	max_util = min(max_util, allowed_cpu_cap);
 	freq = map_util_freq(max_util, ps->frequency, scale_cpu);
 
 	/*
@@ -209,7 +218,8 @@ static inline struct em_perf_domain *em_pd_get(struct device *dev)
 	return NULL;
 }
 static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
-			unsigned long max_util, unsigned long sum_util)
+			unsigned long max_util, unsigned long sum_util,
+			unsigned long allowed_cpu_cap)
 {
 	return 0;
 }
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index 6205578ab6ee6..bdd31ab93bc51 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -26,7 +26,7 @@ bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy);
 static inline unsigned long map_util_freq(unsigned long util,
 					unsigned long freq, unsigned long cap)
 {
-	return (freq + (freq >> 2)) * util / cap;
+	return freq * util / cap;
 }
 
 static inline unsigned long map_util_perf(unsigned long util)
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 4f09afd2f3211..57124614363df 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -151,6 +151,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
 	unsigned int freq = arch_scale_freq_invariant() ?
 				policy->cpuinfo.max_freq : policy->cur;
 
+	util = map_util_perf(util);
 	freq = map_util_freq(util, freq, max);
 
 	if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0d6d190accb09..ed7df1b9cba93 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6592,7 +6592,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 		max_util = max(max_util, min(cpu_util, _cpu_cap));
 	}
 
-	return em_cpu_energy(pd->em_pd, max_util, sum_util);
+	return em_cpu_energy(pd->em_pd, max_util, sum_util, _cpu_cap);
 }
 
 /*
-- 
GitLab


From 94aafc3ee31dc199d1078ffac9edd976b7f47b3d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 15 Jun 2021 12:16:11 +0100
Subject: [PATCH 3111/3804] sched/fair: Age the average idle time

This is a partial forward-port of Peter Ziljstra's work first posted
at:

   https://lore.kernel.org/lkml/20180530142236.667774973@infradead.org/

Currently select_idle_cpu()'s proportional scheme uses the average idle
time *for when we are idle*, that is temporally challenged.  When a CPU
is not at all idle, we'll happily continue using whatever value we did
see when the CPU goes idle. To fix this, introduce a separate average
idle and age it (the existing value still makes sense for things like
new-idle balancing, which happens when we do go idle).

The overall goal is to not spend more time scanning for idle CPUs than
we're idle for. Otherwise we're inhibiting work. This means that we need to
consider the cost over all the wake-ups between consecutive idle periods.
To track this, the scan cost is subtracted from the estimated average
idle time.

The impact of this patch is related to workloads that have domains that
are fully busy or overloaded. Without the patch, the scan depth may be
too high because a CPU is not reaching idle.

Due to the nature of the patch, this is a regression magnet. It
potentially wins when domains are almost fully busy or overloaded --
at that point searches are likely to fail but idle is not being aged
as CPUs are active so search depth is too large and useless. It will
potentially show regressions when there are idle CPUs and a deep search is
beneficial. This tbench result on a 2-socket broadwell machine partially
illustates the problem

                          5.13.0-rc2             5.13.0-rc2
                             vanilla     sched-avgidle-v1r5
Hmean     1        445.02 (   0.00%)      451.36 *   1.42%*
Hmean     2        830.69 (   0.00%)      846.03 *   1.85%*
Hmean     4       1350.80 (   0.00%)     1505.56 *  11.46%*
Hmean     8       2888.88 (   0.00%)     2586.40 * -10.47%*
Hmean     16      5248.18 (   0.00%)     5305.26 *   1.09%*
Hmean     32      8914.03 (   0.00%)     9191.35 *   3.11%*
Hmean     64     10663.10 (   0.00%)    10192.65 *  -4.41%*
Hmean     128    18043.89 (   0.00%)    18478.92 *   2.41%*
Hmean     256    16530.89 (   0.00%)    17637.16 *   6.69%*
Hmean     320    16451.13 (   0.00%)    17270.97 *   4.98%*

Note that 8 was a regression point where a deeper search would have helped
but it gains for high thread counts when searches are useless. Hackbench
is a more extreme example although not perfect as the tasks idle rapidly

hackbench-process-pipes
                          5.13.0-rc2             5.13.0-rc2
                             vanilla     sched-avgidle-v1r5
Amean     1        0.3950 (   0.00%)      0.3887 (   1.60%)
Amean     4        0.9450 (   0.00%)      0.9677 (  -2.40%)
Amean     7        1.4737 (   0.00%)      1.4890 (  -1.04%)
Amean     12       2.3507 (   0.00%)      2.3360 *   0.62%*
Amean     21       4.0807 (   0.00%)      4.0993 *  -0.46%*
Amean     30       5.6820 (   0.00%)      5.7510 *  -1.21%*
Amean     48       8.7913 (   0.00%)      8.7383 (   0.60%)
Amean     79      14.3880 (   0.00%)     13.9343 *   3.15%*
Amean     110     21.2233 (   0.00%)     19.4263 *   8.47%*
Amean     141     28.2930 (   0.00%)     25.1003 *  11.28%*
Amean     172     34.7570 (   0.00%)     30.7527 *  11.52%*
Amean     203     41.0083 (   0.00%)     36.4267 *  11.17%*
Amean     234     47.7133 (   0.00%)     42.0623 *  11.84%*
Amean     265     53.0353 (   0.00%)     47.7720 *   9.92%*
Amean     296     60.0170 (   0.00%)     53.4273 *  10.98%*
Stddev    1        0.0052 (   0.00%)      0.0025 (  51.57%)
Stddev    4        0.0357 (   0.00%)      0.0370 (  -3.75%)
Stddev    7        0.0190 (   0.00%)      0.0298 ( -56.64%)
Stddev    12       0.0064 (   0.00%)      0.0095 ( -48.38%)
Stddev    21       0.0065 (   0.00%)      0.0097 ( -49.28%)
Stddev    30       0.0185 (   0.00%)      0.0295 ( -59.54%)
Stddev    48       0.0559 (   0.00%)      0.0168 (  69.92%)
Stddev    79       0.1559 (   0.00%)      0.0278 (  82.17%)
Stddev    110      1.1728 (   0.00%)      0.0532 (  95.47%)
Stddev    141      0.7867 (   0.00%)      0.0968 (  87.69%)
Stddev    172      1.0255 (   0.00%)      0.0420 (  95.91%)
Stddev    203      0.8106 (   0.00%)      0.1384 (  82.92%)
Stddev    234      1.1949 (   0.00%)      0.1328 (  88.89%)
Stddev    265      0.9231 (   0.00%)      0.0820 (  91.11%)
Stddev    296      1.0456 (   0.00%)      0.1327 (  87.31%)

Again, higher thread counts benefit and the standard deviation
shows that results are also a lot more stable when the idle
time is aged.

The patch potentially matters when a socket was multiple LLCs as the
maximum search depth is lower. However, some of the test results were
suspiciously good (e.g. specjbb2005 gaining 50% on a Zen1 machine) and
other results were not dramatically different to other mcahines.

Given the nature of the patch, Peter's full series is not being forward
ported as each part should stand on its own. Preferably they would be
merged at different times to reduce the risk of false bisections.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210615111611.GH30378@techsingularity.net
---
 kernel/sched/core.c  |  5 +++++
 kernel/sched/fair.c  | 25 +++++++++++++++++++++----
 kernel/sched/sched.h |  3 +++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9e9a5be35cde9..75655cdee3bb9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3340,6 +3340,9 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
 		if (rq->avg_idle > max)
 			rq->avg_idle = max;
 
+		rq->wake_stamp = jiffies;
+		rq->wake_avg_idle = rq->avg_idle / 2;
+
 		rq->idle_stamp = 0;
 	}
 #endif
@@ -9023,6 +9026,8 @@ void __init sched_init(void)
 		rq->online = 0;
 		rq->idle_stamp = 0;
 		rq->avg_idle = 2*sysctl_sched_migration_cost;
+		rq->wake_stamp = jiffies;
+		rq->wake_avg_idle = rq->avg_idle;
 		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
 
 		INIT_LIST_HEAD(&rq->cfs_tasks);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ed7df1b9cba93..3af4afe2165b8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6139,9 +6139,10 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
 	int i, cpu, idle_cpu = -1, nr = INT_MAX;
+	struct rq *this_rq = this_rq();
 	int this = smp_processor_id();
 	struct sched_domain *this_sd;
-	u64 time;
+	u64 time = 0;
 
 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
 	if (!this_sd)
@@ -6151,12 +6152,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
 
 	if (sched_feat(SIS_PROP) && !has_idle_core) {
 		u64 avg_cost, avg_idle, span_avg;
+		unsigned long now = jiffies;
 
 		/*
-		 * Due to large variance we need a large fuzz factor;
-		 * hackbench in particularly is sensitive here.
+		 * If we're busy, the assumption that the last idle period
+		 * predicts the future is flawed; age away the remaining
+		 * predicted idle time.
 		 */
-		avg_idle = this_rq()->avg_idle / 512;
+		if (unlikely(this_rq->wake_stamp < now)) {
+			while (this_rq->wake_stamp < now && this_rq->wake_avg_idle) {
+				this_rq->wake_stamp++;
+				this_rq->wake_avg_idle >>= 1;
+			}
+		}
+
+		avg_idle = this_rq->wake_avg_idle;
 		avg_cost = this_sd->avg_scan_cost + 1;
 
 		span_avg = sd->span_weight * avg_idle;
@@ -6188,6 +6198,13 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
 
 	if (sched_feat(SIS_PROP) && !has_idle_core) {
 		time = cpu_clock(this) - time;
+
+		/*
+		 * Account for the scan cost of wakeups against the average
+		 * idle time.
+		 */
+		this_rq->wake_avg_idle -= min(this_rq->wake_avg_idle, time);
+
 		update_avg(&this_sd->avg_scan_cost, time);
 	}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8f0194cee0baf..01e48f682d54e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1022,6 +1022,9 @@ struct rq {
 	u64			idle_stamp;
 	u64			avg_idle;
 
+	unsigned long		wake_stamp;
+	u64			wake_avg_idle;
+
 	/* This is used to determine avg_idle's max value */
 	u64			max_idle_balance_cost;
 
-- 
GitLab


From 5471eea5d3bf850316f1064a6f57b34c444bce67 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 14 Jun 2021 10:59:42 -0700
Subject: [PATCH 3112/3804] perf/x86: Reset the dirty counter to prevent the
 leak for an RDPMC task

The counter value of a perf task may leak to another RDPMC task.
For example, a perf stat task as below is running on CPU 0.

    perf stat -e 'branches,cycles' -- taskset -c 0 ./workload

In the meantime, an RDPMC task, which is also running on CPU 0, may read
the GP counters periodically. (The RDPMC task creates a fixed event,
but read four GP counters.)

    $./rdpmc_read_all_counters
    index 0x0 value 0x8001e5970f99
    index 0x1 value 0x8005d750edb6
    index 0x2 value 0x0
    index 0x3 value 0x0

    index 0x0 value 0x8002358e48a5
    index 0x1 value 0x8006bd1e3bc9
    index 0x2 value 0x0
    index 0x3 value 0x0

It is a potential security issue. Once the attacker knows what the other
thread is counting. The PerfMon counter can be used as a side-channel to
attack cryptosystems.

The counter value of the perf stat task leaks to the RDPMC task because
perf never clears the counter when it's stopped.

Three methods were considered to address the issue.

 - Unconditionally reset the counter in x86_pmu_del(). It can bring extra
   overhead even when there is no RDPMC task running.

 - Only reset the un-assigned dirty counters when the RDPMC task is
   scheduled in via sched_task(). It fails for the below case.

	Thread A			Thread B

	clone(CLONE_THREAD) --->
	set_affine(0)
					set_affine(1)
					while (!event-enabled)
						;
	event = perf_event_open()
	mmap(event)
	ioctl(event, IOC_ENABLE); --->
					RDPMC

   Counters are still leaked to the thread B.

 - Only reset the un-assigned dirty counters before updating the CR4.PCE
   bit. The method is implemented here.

The dirty counter is a counter, on which the assigned event has been
deleted, but the counter is not reset. To track the dirty counters,
add a 'dirty' variable in the struct cpu_hw_events.

The security issue can only be found with an RDPMC task. To enable the
RDMPC, the CR4.PCE bit has to be updated. Add a
perf_clear_dirty_counters() right before updating the CR4.PCE bit to
clear the existing dirty counters. Only the current un-assigned dirty
counters are reset, because the RDPMC assigned dirty counters will be
updated soon.

After applying the patch,

        $ ./rdpmc_read_all_counters
        index 0x0 value 0x0
        index 0x1 value 0x0
        index 0x2 value 0x0
        index 0x3 value 0x0

        index 0x0 value 0x0
        index 0x1 value 0x0
        index 0x2 value 0x0
        index 0x3 value 0x0

Performance

The performance of a context switch only be impacted when there are two
or more perf users and one of the users must be an RDPMC user. In other
cases, there is no performance impact.

The worst-case occurs when there are two users: the RDPMC user only
uses one counter; while the other user uses all available counters.
When the RDPMC task is scheduled in, all the counters, other than the
RDPMC assigned one, have to be reset.

Test results for the worst-case, using a modified lat_ctx as measured
on an Ice Lake platform, which has 8 GP and 3 FP counters (ignoring
SLOTS).

    lat_ctx -s 128K -N 1000 processes 2

Without the patch:
  The context switch time is 4.97 us

With the patch:
  The context switch time is 5.16 us

There is ~4% performance drop for the context switching time in the
worst-case.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/1623693582-187370-1-git-send-email-kan.liang@linux.intel.com
---
 arch/x86/events/core.c            | 28 +++++++++++++++++++++++++++-
 arch/x86/events/perf_event.h      |  1 +
 arch/x86/include/asm/perf_event.h |  1 +
 arch/x86/mm/tlb.c                 | 10 ++++++++--
 4 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 8e509325c2c3d..c0167d52832e2 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1624,6 +1624,8 @@ static void x86_pmu_del(struct perf_event *event, int flags)
 	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		goto do_del;
 
+	__set_bit(event->hw.idx, cpuc->dirty);
+
 	/*
 	 * Not a TXN, therefore cleanup properly.
 	 */
@@ -2472,6 +2474,31 @@ static int x86_pmu_event_init(struct perf_event *event)
 	return err;
 }
 
+void perf_clear_dirty_counters(void)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	int i;
+
+	 /* Don't need to clear the assigned counter. */
+	for (i = 0; i < cpuc->n_events; i++)
+		__clear_bit(cpuc->assign[i], cpuc->dirty);
+
+	if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX))
+		return;
+
+	for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
+		/* Metrics and fake events don't have corresponding HW counters. */
+		if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
+			continue;
+		else if (i >= INTEL_PMC_IDX_FIXED)
+			wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
+		else
+			wrmsrl(x86_pmu_event_addr(i), 0);
+	}
+
+	bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
+}
+
 static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
 {
 	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
@@ -2495,7 +2522,6 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
 
 static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
 {
-
 	if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
 		return;
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 27fa85e7d4fda..d6003e08b055f 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -229,6 +229,7 @@ struct cpu_hw_events {
 	 */
 	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	int			enabled;
 
 	int			n_events; /* the # of events in the below arrays */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 544f41a179fb6..8fc1b5003713f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -478,6 +478,7 @@ struct x86_pmu_lbr {
 
 extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 extern void perf_check_microcode(void);
+extern void perf_clear_dirty_counters(void);
 extern int x86_perf_rdpmc_index(struct perf_event *event);
 #else
 static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 78804680e9231..cfe6b1e85fa61 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -14,6 +14,7 @@
 #include <asm/nospec-branch.h>
 #include <asm/cache.h>
 #include <asm/apic.h>
+#include <asm/perf_event.h>
 
 #include "mm_internal.h"
 
@@ -404,9 +405,14 @@ static inline void cr4_update_pce_mm(struct mm_struct *mm)
 {
 	if (static_branch_unlikely(&rdpmc_always_available_key) ||
 	    (!static_branch_unlikely(&rdpmc_never_available_key) &&
-	     atomic_read(&mm->context.perf_rdpmc_allowed)))
+	     atomic_read(&mm->context.perf_rdpmc_allowed))) {
+		/*
+		 * Clear the existing dirty counters to
+		 * prevent the leak for an RDPMC task.
+		 */
+		perf_clear_dirty_counters();
 		cr4_set_bits_irqsoff(X86_CR4_PCE);
-	else
+	} else
 		cr4_clear_bits_irqsoff(X86_CR4_PCE);
 }
 
-- 
GitLab


From 795e0e38de2c36561a4f14e6e97b8a82f6f2e03c Mon Sep 17 00:00:00 2001
From: Wan Jiabing <wanjiabing@vivo.com>
Date: Tue, 15 Jun 2021 19:49:20 +0800
Subject: [PATCH 3113/3804] cpuidle: teo: remove unneeded semicolon in
 teo_select()

Fix following coccicheck warning:
drivers/cpuidle/governors/teo.c:315:10-11: Unneeded semicolon

Signed-off-by: Wan Jiabing <wanjiabing@vivo.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 1e0b2f828abbe..7b91060e82f67 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -312,7 +312,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
 	/* Check if there is any choice in the first place. */
 	if (drv->state_count < 2) {
-		idx = 0;;
+		idx = 0;
 		goto end;
 	}
 	if (!dev->states_usage[0].disable) {
-- 
GitLab


From aa7968682a2b8a9cecf1d7d07e1c8ae8c08d211e Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Date: Sat, 5 Jun 2021 09:38:11 +0900
Subject: [PATCH 3114/3804] spi: convert Cadence SPI bindings to YAML

Convert spi for Cadence SPI bindings documentation to YAML.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210605003811.858676-1-iwamatsu@nigauri.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/spi-cadence.txt   | 30 ---------
 .../devicetree/bindings/spi/spi-cadence.yaml  | 66 +++++++++++++++++++
 2 files changed, 66 insertions(+), 30 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-cadence.txt
 create mode 100644 Documentation/devicetree/bindings/spi/spi-cadence.yaml

diff --git a/Documentation/devicetree/bindings/spi/spi-cadence.txt b/Documentation/devicetree/bindings/spi/spi-cadence.txt
deleted file mode 100644
index 05a2ef945664b..0000000000000
--- a/Documentation/devicetree/bindings/spi/spi-cadence.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Cadence SPI controller Device Tree Bindings
--------------------------------------------
-
-Required properties:
-- compatible		: Should be "cdns,spi-r1p6" or "xlnx,zynq-spi-r1p6".
-- reg			: Physical base address and size of SPI registers map.
-- interrupts		: Property with a value describing the interrupt
-			  number.
-- clock-names		: List of input clock names - "ref_clk", "pclk"
-			  (See clock bindings for details).
-- clocks		: Clock phandles (see clock bindings for details).
-
-Optional properties:
-- num-cs		: Number of chip selects used.
-			  If a decoder is used, this will be the number of
-			  chip selects after the decoder.
-- is-decoded-cs		: Flag to indicate whether decoder is used or not.
-
-Example:
-
-	spi@e0007000 {
-		compatible = "xlnx,zynq-spi-r1p6";
-		clock-names = "ref_clk", "pclk";
-		clocks = <&clkc 26>, <&clkc 35>;
-		interrupt-parent = <&intc>;
-		interrupts = <0 49 4>;
-		num-cs = <4>;
-		is-decoded-cs = <0>;
-		reg = <0xe0007000 0x1000>;
-	} ;
diff --git a/Documentation/devicetree/bindings/spi/spi-cadence.yaml b/Documentation/devicetree/bindings/spi/spi-cadence.yaml
new file mode 100644
index 0000000000000..9787be21318e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-cadence.yaml
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/spi-cadence.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Cadence SPI controller Device Tree Bindings
+
+maintainers:
+  - Michal Simek <michal.simek@xilinx.com>
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - cdns,spi-r1p6
+      - xlnx,zynq-spi-r1p6
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: ref_clk
+      - const: pclk
+
+  clocks:
+    maxItems: 2
+
+  num-cs:
+    description: |
+      Number of chip selects used. If a decoder is used,
+      this will be the number of chip selects after the
+      decoder.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1
+    maximum: 4
+    default: 4
+
+  is-decoded-cs:
+    description: |
+      Flag to indicate whether decoder is used or not.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [ 0, 1 ]
+    default: 0
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi@e0007000 {
+      compatible = "xlnx,zynq-spi-r1p6";
+      clock-names = "ref_clk", "pclk";
+      clocks = <&clkc 26>, <&clkc 35>;
+      interrupt-parent = <&intc>;
+      interrupts = <0 49 4>;
+      num-cs = <4>;
+      is-decoded-cs = <0>;
+      reg = <0xe0007000 0x1000>;
+    };
+...
-- 
GitLab


From 476ad3ff8952db3569a77d9ed4a067c5f0f4b733 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Date: Sat, 5 Jun 2021 09:29:31 +0900
Subject: [PATCH 3115/3804] spi: xilinx: convert to yaml

Convert SPI for Xilinx bindings documentation to YAML schemas.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210605002931.858031-1-iwamatsu@nigauri.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/spi-xilinx.txt    | 23 --------
 .../devicetree/bindings/spi/spi-xilinx.yaml   | 57 +++++++++++++++++++
 2 files changed, 57 insertions(+), 23 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-xilinx.txt
 create mode 100644 Documentation/devicetree/bindings/spi/spi-xilinx.yaml

diff --git a/Documentation/devicetree/bindings/spi/spi-xilinx.txt b/Documentation/devicetree/bindings/spi/spi-xilinx.txt
deleted file mode 100644
index 5f4ed3e5c9942..0000000000000
--- a/Documentation/devicetree/bindings/spi/spi-xilinx.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-Xilinx SPI controller Device Tree Bindings
--------------------------------------------------
-
-Required properties:
-- compatible		: Should be "xlnx,xps-spi-2.00.a", "xlnx,xps-spi-2.00.b" or "xlnx,axi-quad-spi-1.00.a"
-- reg			: Physical base address and size of SPI registers map.
-- interrupts		: Property with a value describing the interrupt
-			  number.
-
-Optional properties:
-- xlnx,num-ss-bits	 : Number of chip selects used.
-- xlnx,num-transfer-bits : Number of bits per transfer. This will be 8 if not specified
-
-Example:
-	axi_quad_spi@41e00000 {
-			compatible = "xlnx,xps-spi-2.00.a";
-			interrupt-parent = <&intc>;
-			interrupts = <0 31 1>;
-			reg = <0x41e00000 0x10000>;
-			xlnx,num-ss-bits = <0x1>;
-			xlnx,num-transfer-bits = <32>;
-	};
-
diff --git a/Documentation/devicetree/bindings/spi/spi-xilinx.yaml b/Documentation/devicetree/bindings/spi/spi-xilinx.yaml
new file mode 100644
index 0000000000000..593f7693bacee
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-xilinx.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/spi-xilinx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx SPI controller Device Tree Bindings
+
+maintainers:
+  - Michal Simek <michal.simek@xilinx.com>
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - xlnx,xps-spi-2.00.a
+      - xlnx,xps-spi-2.00.b
+      - xlnx,axi-quad-spi-1.00.a
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  xlnx,num-ss-bits:
+    description: Number of chip selects used.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1
+    maximum: 32
+
+  xlnx,num-transfer-bits:
+    description: Number of bits per transfer. This will be 8 if not specified.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [8, 16, 32]
+    default: 8
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi0: spi@41e00000 {
+      compatible = "xlnx,xps-spi-2.00.a";
+      interrupt-parent = <&intc>;
+      interrupts = <0 31 1>;
+      reg = <0x41e00000 0x10000>;
+      xlnx,num-ss-bits = <0x1>;
+      xlnx,num-transfer-bits = <32>;
+    };
+...
-- 
GitLab


From a7d8d1c7a7f73e780aa9ae74926ae5985b2f895f Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 14 Jun 2021 17:55:23 +0200
Subject: [PATCH 3116/3804] usb: core: hub: Disable autosuspend for Cypress
 CY7C65632

The Cypress CY7C65632 appears to have an issue with auto suspend and
detecting devices, not too dissimilar to the SMSC 5534B hub. It is
easiest to reproduce by connecting multiple mass storage devices to
the hub at the same time. On a Lenovo Yoga, around 1 in 3 attempts
result in the devices not being detected. It is however possible to
make them appear using lsusb -v.

Disabling autosuspend for this hub resolves the issue.

Fixes: 1208f9e1d758 ("USB: hub: Fix the broken detection of USB3 device in SMSC hub")
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20210614155524.2228800-1-andrew@lunn.ch
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index fc7d6cdacf16b..df8e69e60aaf7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -41,6 +41,8 @@
 #define USB_VENDOR_GENESYS_LOGIC		0x05e3
 #define USB_VENDOR_SMSC				0x0424
 #define USB_PRODUCT_USB5534B			0x5534
+#define USB_VENDOR_CYPRESS			0x04b4
+#define USB_PRODUCT_CY7C65632			0x6570
 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND	0x01
 #define HUB_QUIRK_DISABLE_AUTOSUSPEND		0x02
 
@@ -5697,6 +5699,11 @@ static const struct usb_device_id hub_id_table[] = {
       .idProduct = USB_PRODUCT_USB5534B,
       .bInterfaceClass = USB_CLASS_HUB,
       .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
+    { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
+                   | USB_DEVICE_ID_MATCH_PRODUCT,
+      .idVendor = USB_VENDOR_CYPRESS,
+      .idProduct = USB_PRODUCT_CY7C65632,
+      .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND},
     { .match_flags = USB_DEVICE_ID_MATCH_VENDOR
 			| USB_DEVICE_ID_MATCH_INT_CLASS,
       .idVendor = USB_VENDOR_GENESYS_LOGIC,
-- 
GitLab


From 8848f0665b3cd4fbb3107b384f5205380c90634d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 15 Jun 2021 12:12:24 +0100
Subject: [PATCH 3117/3804] arm64: Add cpuidle context save/restore helpers

As we need to start doing some additional work on all idle
paths, let's introduce a set of macros that will perform
the work related to the GICv3 pseudo-NMI idle entry exit.

Stubs are introduced to 32bit ARM for compatibility.
As these helpers are currently unused, there is no functional
change.

Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210615111227.2454465-2-maz@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm/include/asm/cpuidle.h   |  5 +++++
 arch/arm64/include/asm/cpuidle.h | 35 ++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h
index 0d67ed682e077..dc8f53f1a2195 100644
--- a/arch/arm/include/asm/cpuidle.h
+++ b/arch/arm/include/asm/cpuidle.h
@@ -49,4 +49,9 @@ extern int arm_cpuidle_suspend(int index);
 
 extern int arm_cpuidle_init(int cpu);
 
+struct arm_cpuidle_irq_context { };
+
+#define arm_cpuidle_save_irq_context(c)		(void)c
+#define arm_cpuidle_restore_irq_context(c)	(void)c
+
 #endif
diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h
index 3c5ddb429ea29..14a19d1141bd2 100644
--- a/arch/arm64/include/asm/cpuidle.h
+++ b/arch/arm64/include/asm/cpuidle.h
@@ -18,4 +18,39 @@ static inline int arm_cpuidle_suspend(int index)
 	return -EOPNOTSUPP;
 }
 #endif
+
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#include <asm/arch_gicv3.h>
+
+struct arm_cpuidle_irq_context {
+	unsigned long pmr;
+	unsigned long daif_bits;
+};
+
+#define arm_cpuidle_save_irq_context(__c)				\
+	do {								\
+		struct arm_cpuidle_irq_context *c = __c;		\
+		if (system_uses_irq_prio_masking()) {			\
+			c->daif_bits = read_sysreg(daif);		\
+			write_sysreg(c->daif_bits | PSR_I_BIT | PSR_F_BIT, \
+				     daif);				\
+			c->pmr = gic_read_pmr();			\
+			gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); \
+		}							\
+	} while (0)
+
+#define arm_cpuidle_restore_irq_context(__c)				\
+	do {								\
+		struct arm_cpuidle_irq_context *c = __c;		\
+		if (system_uses_irq_prio_masking()) {			\
+			gic_write_pmr(c->pmr);				\
+			write_sysreg(c->daif_bits, daif);		\
+		}							\
+	} while (0)
+#else
+struct arm_cpuidle_irq_context { };
+
+#define arm_cpuidle_save_irq_context(c)		(void)c
+#define arm_cpuidle_restore_irq_context(c)	(void)c
+#endif
 #endif
-- 
GitLab


From d4dc10277255afc303de4f00cbee0b9ce74d870f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 15 Jun 2021 12:12:25 +0100
Subject: [PATCH 3118/3804] arm64: Convert cpu_do_idle() to using cpuidle
 context helpers

Now that we have helpers that are aware of the pseudo-NMI
feature, introduce them to cpu_do_idle(). This allows for
some nice cleanup.

No functional change intended.

Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210615111227.2454465-3-maz@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/process.c | 41 ++++++++-----------------------------
 1 file changed, 9 insertions(+), 32 deletions(-)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b4bb67f17a2ca..b715c6b2558ff 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -46,9 +46,9 @@
 #include <linux/prctl.h>
 
 #include <asm/alternative.h>
-#include <asm/arch_gicv3.h>
 #include <asm/compat.h>
 #include <asm/cpufeature.h>
+#include <asm/cpuidle.h>
 #include <asm/cacheflush.h>
 #include <asm/exec.h>
 #include <asm/fpsimd.h>
@@ -74,33 +74,6 @@ EXPORT_SYMBOL_GPL(pm_power_off);
 
 void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 
-static void noinstr __cpu_do_idle(void)
-{
-	dsb(sy);
-	wfi();
-}
-
-static void noinstr __cpu_do_idle_irqprio(void)
-{
-	unsigned long pmr;
-	unsigned long daif_bits;
-
-	daif_bits = read_sysreg(daif);
-	write_sysreg(daif_bits | PSR_I_BIT | PSR_F_BIT, daif);
-
-	/*
-	 * Unmask PMR before going idle to make sure interrupts can
-	 * be raised.
-	 */
-	pmr = gic_read_pmr();
-	gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
-	__cpu_do_idle();
-
-	gic_write_pmr(pmr);
-	write_sysreg(daif_bits, daif);
-}
-
 /*
  *	cpu_do_idle()
  *
@@ -112,10 +85,14 @@ static void noinstr __cpu_do_idle_irqprio(void)
  */
 void noinstr cpu_do_idle(void)
 {
-	if (system_uses_irq_prio_masking())
-		__cpu_do_idle_irqprio();
-	else
-		__cpu_do_idle();
+	struct arm_cpuidle_irq_context context;
+
+	arm_cpuidle_save_irq_context(&context);
+
+	dsb(sy);
+	wfi();
+
+	arm_cpuidle_restore_irq_context(&context);
 }
 
 /*
-- 
GitLab


From c9223b616298c3d0e6ff5dd20d14d65c2131c535 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 15 Jun 2021 12:12:26 +0100
Subject: [PATCH 3119/3804] PSCI: Use cpuidle context helpers in
 psci_cpu_suspend_enter()

The PSCI CPU suspend code isn't aware of the PMR vs DAIF game,
resulting in a system that locks up if entering CPU suspend
with GICv3 pNMI enabled.

To save the day, teach the suspend code about our new cpuidle
context helpers, which will do everything that's required just
like the usual WFI cpuidle code.

This fixes my Altra system, which would otherwise lock-up at
boot time when booted with irqchip.gicv3_pseudo_nmi=1.

Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20210615111227.2454465-4-maz@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/firmware/psci/psci.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index 3c1c5daf6df2e..e3da38e15c5b6 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -335,10 +335,15 @@ int psci_cpu_suspend_enter(u32 state)
 {
 	int ret;
 
-	if (!psci_power_state_loses_context(state))
+	if (!psci_power_state_loses_context(state)) {
+		struct arm_cpuidle_irq_context context;
+
+		arm_cpuidle_save_irq_context(&context);
 		ret = psci_ops.cpu_suspend(state, 0);
-	else
+		arm_cpuidle_restore_irq_context(&context);
+	} else {
 		ret = cpu_suspend(state, psci_suspend_finisher);
+	}
 
 	return ret;
 }
-- 
GitLab


From 77345ef70445a8f16e0685dade0d68bdf41f19d7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 15 Jun 2021 12:12:27 +0100
Subject: [PATCH 3120/3804] arm64: suspend: Use cpuidle context helpers in
 cpu_suspend()

Use cpuidle context helpers to switch to using DAIF.IF instead
of PMR to mask interrupts, ensuring that we suspend with
interrupts being able to reach the CPU interface.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20210615111227.2454465-5-maz@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/suspend.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index e3f72df9509d7..938ce6fbee8a8 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -7,6 +7,7 @@
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
 #include <asm/cpufeature.h>
+#include <asm/cpuidle.h>
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/exec.h>
@@ -91,6 +92,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	int ret = 0;
 	unsigned long flags;
 	struct sleep_stack_data state;
+	struct arm_cpuidle_irq_context context;
 
 	/* Report any MTE async fault before going to suspend */
 	mte_suspend_enter();
@@ -103,12 +105,18 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	flags = local_daif_save();
 
 	/*
-	 * Function graph tracer state gets incosistent when the kernel
+	 * Function graph tracer state gets inconsistent when the kernel
 	 * calls functions that never return (aka suspend finishers) hence
 	 * disable graph tracing during their execution.
 	 */
 	pause_graph_tracing();
 
+	/*
+	 * Switch to using DAIF.IF instead of PMR in order to reliably
+	 * resume if we're using pseudo-NMIs.
+	 */
+	arm_cpuidle_save_irq_context(&context);
+
 	if (__cpu_suspend_enter(&state)) {
 		/* Call the suspend finisher */
 		ret = fn(arg);
@@ -126,6 +134,8 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 		RCU_NONIDLE(__cpu_suspend_exit());
 	}
 
+	arm_cpuidle_restore_irq_context(&context);
+
 	unpause_graph_tracing();
 
 	/*
-- 
GitLab


From d8ac05ea13d789d5491a5920d70a05659015441d Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Tue, 15 Jun 2021 16:04:43 +0100
Subject: [PATCH 3121/3804] KVM: selftests: Fix kvm_check_cap() assertion

KVM_CHECK_EXTENSION ioctl can return any negative value on error,
and not necessarily -1. Change the assertion to reflect that.

Signed-off-by: Fuad Tabba <tabba@google.com>
Message-Id: <20210615150443.1183365-1-tabba@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 5c70596dd1b98..a2b732cf96ea4 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -82,7 +82,7 @@ int kvm_check_cap(long cap)
 
 	kvm_fd = open_kvm_dev_path_or_exit();
 	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
-	TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+	TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
 		"  rc: %i errno: %i", ret, errno);
 
 	close(kvm_fd);
-- 
GitLab


From a9d6496d667fdb86713868a402378a0e4db62b50 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Thu, 27 May 2021 15:57:51 +0800
Subject: [PATCH 3122/3804] KVM: x86/mmu: Make is_nx_huge_page_enabled an
 inline function

Function 'is_nx_huge_page_enabled' is called only by kvm/mmu, so make
it as inline fucntion and remove the unnecessary declaration.

Cc: Ben Gardon <bgardon@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Message-Id: <1622102271-63107-1-git-send-email-zhangshaokun@hisilicon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c          | 7 +------
 arch/x86/kvm/mmu/mmu_internal.h | 9 ++++++---
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8d5876dfc6b71..8ac1b9c935fee 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -55,7 +55,7 @@
 
 extern bool itlb_multihit_kvm_mitigation;
 
-static int __read_mostly nx_huge_pages = -1;
+int __read_mostly nx_huge_pages = -1;
 #ifdef CONFIG_PREEMPT_RT
 /* Recovery can cause latency spikes, disable it for PREEMPT_RT.  */
 static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
@@ -208,11 +208,6 @@ void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
 	kvm_flush_remote_tlbs_with_range(kvm, &range);
 }
 
-bool is_nx_huge_page_enabled(void)
-{
-	return READ_ONCE(nx_huge_pages);
-}
-
 static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
 			   unsigned int access)
 {
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index d64ccb417c60a..ff4c6256f3f9e 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -116,7 +116,12 @@ static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
 	       kvm_x86_ops.cpu_dirty_log_size;
 }
 
-bool is_nx_huge_page_enabled(void);
+extern int nx_huge_pages;
+static inline bool is_nx_huge_page_enabled(void)
+{
+	return READ_ONCE(nx_huge_pages);
+}
+
 bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 			    bool can_unsync);
 
@@ -158,8 +163,6 @@ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
 void disallowed_hugepage_adjust(u64 spte, gfn_t gfn, int cur_level,
 				kvm_pfn_t *pfnp, int *goal_levelp);
 
-bool is_nx_huge_page_enabled(void);
-
 void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 
 void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
-- 
GitLab


From 43e5146436099a98fcd30793598d61e582ec6830 Mon Sep 17 00:00:00 2001
From: Siddharth Chandrasekaran <sidcha@amazon.de>
Date: Wed, 26 May 2021 10:56:08 +0200
Subject: [PATCH 3123/3804] KVM: x86: Move FPU register accessors into fpu.h

Hyper-v XMM fast hypercalls use XMM registers to pass input/output
parameters. To access these, hyperv.c can reuse some FPU register
accessors defined in emulator.c. Move them to a common location so both
can access them.

While at it, reorder the parameters of these accessor methods to make
them more readable.

Cc: Alexander Graf <graf@amazon.com>
Cc: Evgeny Iakovlev <eyakovl@amazon.de>
Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Message-Id: <01a85a6560714d4d3637d3d86e5eba65073318fa.1622019133.git.sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c     | 137 +++++-------------------------------
 arch/x86/kvm/fpu.h         | 140 +++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/kvm_emulate.h |   3 +-
 3 files changed, 158 insertions(+), 122 deletions(-)
 create mode 100644 arch/x86/kvm/fpu.h

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5e5de05a8fbfa..10e16a70b361c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -22,7 +22,6 @@
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
 #include <linux/stringify.h>
-#include <asm/fpu/api.h>
 #include <asm/debugreg.h>
 #include <asm/nospec-branch.h>
 
@@ -1081,116 +1080,14 @@ static void fetch_register_operand(struct operand *op)
 	}
 }
 
-static void emulator_get_fpu(void)
-{
-	fpregs_lock();
-
-	fpregs_assert_state_consistent();
-	if (test_thread_flag(TIF_NEED_FPU_LOAD))
-		switch_fpu_return();
-}
-
-static void emulator_put_fpu(void)
-{
-	fpregs_unlock();
-}
-
-static void read_sse_reg(sse128_t *data, int reg)
-{
-	emulator_get_fpu();
-	switch (reg) {
-	case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
-	case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
-	case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
-	case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
-	case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
-	case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
-	case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
-	case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
-#ifdef CONFIG_X86_64
-	case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
-	case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
-	case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
-	case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
-	case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
-	case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
-	case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
-	case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
-#endif
-	default: BUG();
-	}
-	emulator_put_fpu();
-}
-
-static void write_sse_reg(sse128_t *data, int reg)
-{
-	emulator_get_fpu();
-	switch (reg) {
-	case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
-	case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
-	case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
-	case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
-	case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
-	case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
-	case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
-	case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
-#ifdef CONFIG_X86_64
-	case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
-	case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
-	case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
-	case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
-	case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
-	case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
-	case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
-	case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
-#endif
-	default: BUG();
-	}
-	emulator_put_fpu();
-}
-
-static void read_mmx_reg(u64 *data, int reg)
-{
-	emulator_get_fpu();
-	switch (reg) {
-	case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
-	case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
-	case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
-	case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
-	case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
-	case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
-	case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
-	case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
-	default: BUG();
-	}
-	emulator_put_fpu();
-}
-
-static void write_mmx_reg(u64 *data, int reg)
-{
-	emulator_get_fpu();
-	switch (reg) {
-	case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
-	case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
-	case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
-	case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
-	case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
-	case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
-	case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
-	case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
-	default: BUG();
-	}
-	emulator_put_fpu();
-}
-
 static int em_fninit(struct x86_emulate_ctxt *ctxt)
 {
 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
 		return emulate_nm(ctxt);
 
-	emulator_get_fpu();
+	kvm_fpu_get();
 	asm volatile("fninit");
-	emulator_put_fpu();
+	kvm_fpu_put();
 	return X86EMUL_CONTINUE;
 }
 
@@ -1201,9 +1098,9 @@ static int em_fnstcw(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
 		return emulate_nm(ctxt);
 
-	emulator_get_fpu();
+	kvm_fpu_get();
 	asm volatile("fnstcw %0": "+m"(fcw));
-	emulator_put_fpu();
+	kvm_fpu_put();
 
 	ctxt->dst.val = fcw;
 
@@ -1217,9 +1114,9 @@ static int em_fnstsw(struct x86_emulate_ctxt *ctxt)
 	if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
 		return emulate_nm(ctxt);
 
-	emulator_get_fpu();
+	kvm_fpu_get();
 	asm volatile("fnstsw %0": "+m"(fsw));
-	emulator_put_fpu();
+	kvm_fpu_put();
 
 	ctxt->dst.val = fsw;
 
@@ -1238,7 +1135,7 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
 		op->type = OP_XMM;
 		op->bytes = 16;
 		op->addr.xmm = reg;
-		read_sse_reg(&op->vec_val, reg);
+		kvm_read_sse_reg(reg, &op->vec_val);
 		return;
 	}
 	if (ctxt->d & Mmx) {
@@ -1289,7 +1186,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			op->type = OP_XMM;
 			op->bytes = 16;
 			op->addr.xmm = ctxt->modrm_rm;
-			read_sse_reg(&op->vec_val, ctxt->modrm_rm);
+			kvm_read_sse_reg(ctxt->modrm_rm, &op->vec_val);
 			return rc;
 		}
 		if (ctxt->d & Mmx) {
@@ -1866,10 +1763,10 @@ static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
 				       op->bytes * op->count);
 		break;
 	case OP_XMM:
-		write_sse_reg(&op->vec_val, op->addr.xmm);
+		kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
 		break;
 	case OP_MM:
-		write_mmx_reg(&op->mm_val, op->addr.mm);
+		kvm_write_mmx_reg(op->addr.mm, &op->mm_val);
 		break;
 	case OP_NONE:
 		/* no writeback */
@@ -4124,11 +4021,11 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	emulator_get_fpu();
+	kvm_fpu_get();
 
 	rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
 
-	emulator_put_fpu();
+	kvm_fpu_put();
 
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
@@ -4172,7 +4069,7 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	emulator_get_fpu();
+	kvm_fpu_get();
 
 	if (size < __fxstate_size(16)) {
 		rc = fxregs_fixup(&fx_state, size);
@@ -4189,7 +4086,7 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
 		rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
 
 out:
-	emulator_put_fpu();
+	kvm_fpu_put();
 
 	return rc;
 }
@@ -5437,9 +5334,9 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
 
-	emulator_get_fpu();
+	kvm_fpu_get();
 	rc = asm_safe("fwait");
-	emulator_put_fpu();
+	kvm_fpu_put();
 
 	if (unlikely(rc != X86EMUL_CONTINUE))
 		return emulate_exception(ctxt, MF_VECTOR, 0, false);
@@ -5450,7 +5347,7 @@ static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
 static void fetch_possible_mmx_operand(struct operand *op)
 {
 	if (op->type == OP_MM)
-		read_mmx_reg(&op->mm_val, op->addr.mm);
+		kvm_read_mmx_reg(op->addr.mm, &op->mm_val);
 }
 
 static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop)
diff --git a/arch/x86/kvm/fpu.h b/arch/x86/kvm/fpu.h
new file mode 100644
index 0000000000000..3ba12888bf66a
--- /dev/null
+++ b/arch/x86/kvm/fpu.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __KVM_FPU_H_
+#define __KVM_FPU_H_
+
+#include <asm/fpu/api.h>
+
+typedef u32		__attribute__((vector_size(16))) sse128_t;
+#define __sse128_u	union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
+#define sse128_lo(x)	({ __sse128_u t; t.vec = x; t.as_u64[0]; })
+#define sse128_hi(x)	({ __sse128_u t; t.vec = x; t.as_u64[1]; })
+#define sse128_l0(x)	({ __sse128_u t; t.vec = x; t.as_u32[0]; })
+#define sse128_l1(x)	({ __sse128_u t; t.vec = x; t.as_u32[1]; })
+#define sse128_l2(x)	({ __sse128_u t; t.vec = x; t.as_u32[2]; })
+#define sse128_l3(x)	({ __sse128_u t; t.vec = x; t.as_u32[3]; })
+#define sse128(lo, hi)	({ __sse128_u t; t.as_u64[0] = lo; t.as_u64[1] = hi; t.vec; })
+
+static inline void _kvm_read_sse_reg(int reg, sse128_t *data)
+{
+	switch (reg) {
+	case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
+	case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
+	case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
+	case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
+	case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
+	case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
+	case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
+	case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
+#ifdef CONFIG_X86_64
+	case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
+	case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
+	case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
+	case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
+	case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
+	case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
+	case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
+	case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
+#endif
+	default: BUG();
+	}
+}
+
+static inline void _kvm_write_sse_reg(int reg, const sse128_t *data)
+{
+	switch (reg) {
+	case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
+	case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
+	case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
+	case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
+	case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
+	case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
+	case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
+	case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
+#ifdef CONFIG_X86_64
+	case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
+	case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
+	case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
+	case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
+	case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
+	case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
+	case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
+	case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
+#endif
+	default: BUG();
+	}
+}
+
+static inline void _kvm_read_mmx_reg(int reg, u64 *data)
+{
+	switch (reg) {
+	case 0: asm("movq %%mm0, %0" : "=m"(*data)); break;
+	case 1: asm("movq %%mm1, %0" : "=m"(*data)); break;
+	case 2: asm("movq %%mm2, %0" : "=m"(*data)); break;
+	case 3: asm("movq %%mm3, %0" : "=m"(*data)); break;
+	case 4: asm("movq %%mm4, %0" : "=m"(*data)); break;
+	case 5: asm("movq %%mm5, %0" : "=m"(*data)); break;
+	case 6: asm("movq %%mm6, %0" : "=m"(*data)); break;
+	case 7: asm("movq %%mm7, %0" : "=m"(*data)); break;
+	default: BUG();
+	}
+}
+
+static inline void _kvm_write_mmx_reg(int reg, const u64 *data)
+{
+	switch (reg) {
+	case 0: asm("movq %0, %%mm0" : : "m"(*data)); break;
+	case 1: asm("movq %0, %%mm1" : : "m"(*data)); break;
+	case 2: asm("movq %0, %%mm2" : : "m"(*data)); break;
+	case 3: asm("movq %0, %%mm3" : : "m"(*data)); break;
+	case 4: asm("movq %0, %%mm4" : : "m"(*data)); break;
+	case 5: asm("movq %0, %%mm5" : : "m"(*data)); break;
+	case 6: asm("movq %0, %%mm6" : : "m"(*data)); break;
+	case 7: asm("movq %0, %%mm7" : : "m"(*data)); break;
+	default: BUG();
+	}
+}
+
+static inline void kvm_fpu_get(void)
+{
+	fpregs_lock();
+
+	fpregs_assert_state_consistent();
+	if (test_thread_flag(TIF_NEED_FPU_LOAD))
+		switch_fpu_return();
+}
+
+static inline void kvm_fpu_put(void)
+{
+	fpregs_unlock();
+}
+
+static inline void kvm_read_sse_reg(int reg, sse128_t *data)
+{
+	kvm_fpu_get();
+	_kvm_read_sse_reg(reg, data);
+	kvm_fpu_put();
+}
+
+static inline void kvm_write_sse_reg(int reg, const sse128_t *data)
+{
+	kvm_fpu_get();
+	_kvm_write_sse_reg(reg, data);
+	kvm_fpu_put();
+}
+
+static inline void kvm_read_mmx_reg(int reg, u64 *data)
+{
+	kvm_fpu_get();
+	_kvm_read_mmx_reg(reg, data);
+	kvm_fpu_put();
+}
+
+static inline void kvm_write_mmx_reg(int reg, const u64 *data)
+{
+	kvm_fpu_get();
+	_kvm_write_mmx_reg(reg, data);
+	kvm_fpu_put();
+}
+
+#endif
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 3e870bf9ca4d5..b063d376b7d92 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -13,6 +13,7 @@
 #define _ASM_X86_KVM_X86_EMULATE_H
 
 #include <asm/desc_defs.h>
+#include "fpu.h"
 
 struct x86_emulate_ctxt;
 enum x86_intercept;
@@ -236,8 +237,6 @@ struct x86_emulate_ops {
 	int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
 };
 
-typedef u32 __attribute__((vector_size(16))) sse128_t;
-
 /* Type, address-of, and value of an instruction's operand. */
 struct operand {
 	enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
-- 
GitLab


From bd38b32053eb1c53ddb7030cf0fc6d700f7f1d82 Mon Sep 17 00:00:00 2001
From: Siddharth Chandrasekaran <sidcha@amazon.de>
Date: Wed, 26 May 2021 10:56:09 +0200
Subject: [PATCH 3124/3804] KVM: hyper-v: Collect hypercall params into struct

As of now there are 7 parameters (and flags) that are used in various
hyper-v hypercall handlers. There are 6 more input/output parameters
passed from XMM registers which are to be added in an upcoming patch.

To make passing arguments to the handlers more readable, capture all
these parameters into a single structure.

Cc: Alexander Graf <graf@amazon.com>
Cc: Evgeny Iakovlev <eyakovl@amazon.de>
Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Message-Id: <273f7ed510a1f6ba177e61b73a5c7bfbee4a4a87.1622019133.git.sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 147 +++++++++++++++++++++++-------------------
 1 file changed, 79 insertions(+), 68 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f00830e5202fe..7c7a2da591da9 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1631,7 +1631,18 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask(
 	return vcpu_bitmap;
 }
 
-static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, bool ex)
+struct kvm_hv_hcall {
+	u64 param;
+	u64 ingpa;
+	u64 outgpa;
+	u16 code;
+	u16 rep_cnt;
+	u16 rep_idx;
+	bool fast;
+	bool rep;
+};
+
+static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
@@ -1646,7 +1657,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, bool
 	bool all_cpus;
 
 	if (!ex) {
-		if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
+		if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush))))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
 		trace_kvm_hv_flush_tlb(flush.processor_mask,
@@ -1665,7 +1676,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, bool
 		all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
 			flush.processor_mask == 0;
 	} else {
-		if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
+		if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
 					    sizeof(flush_ex))))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
@@ -1687,8 +1698,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, bool
 
 		if (!all_cpus &&
 		    kvm_read_guest(kvm,
-				   ingpa + offsetof(struct hv_tlb_flush_ex,
-						    hv_vp_set.bank_contents),
+				   hc->ingpa + offsetof(struct hv_tlb_flush_ex,
+							hv_vp_set.bank_contents),
 				   sparse_banks,
 				   sparse_banks_len))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
@@ -1708,9 +1719,9 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, bool
 				    NULL, vcpu_mask, &hv_vcpu->tlb_flush);
 
 ret_success:
-	/* We always do full TLB flush, set rep_done = rep_cnt. */
+	/* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */
 	return (u64)HV_STATUS_SUCCESS |
-		((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+		((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
 }
 
 static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
@@ -1732,8 +1743,7 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
 	}
 }
 
-static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 ingpa, u64 outgpa,
-			   bool ex, bool fast)
+static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_send_ipi_ex send_ipi_ex;
@@ -1748,25 +1758,25 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 ingpa, u64 outgpa,
 	bool all_cpus;
 
 	if (!ex) {
-		if (!fast) {
-			if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
+		if (!hc->fast) {
+			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
 						    sizeof(send_ipi))))
 				return HV_STATUS_INVALID_HYPERCALL_INPUT;
 			sparse_banks[0] = send_ipi.cpu_mask;
 			vector = send_ipi.vector;
 		} else {
 			/* 'reserved' part of hv_send_ipi should be 0 */
-			if (unlikely(ingpa >> 32 != 0))
+			if (unlikely(hc->ingpa >> 32 != 0))
 				return HV_STATUS_INVALID_HYPERCALL_INPUT;
-			sparse_banks[0] = outgpa;
-			vector = (u32)ingpa;
+			sparse_banks[0] = hc->outgpa;
+			vector = (u32)hc->ingpa;
 		}
 		all_cpus = false;
 		valid_bank_mask = BIT_ULL(0);
 
 		trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
 	} else {
-		if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
+		if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
 					    sizeof(send_ipi_ex))))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
@@ -1786,8 +1796,8 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, u64 ingpa, u64 outgpa,
 
 		if (!all_cpus &&
 		    kvm_read_guest(kvm,
-				   ingpa + offsetof(struct hv_send_ipi_ex,
-						    vp_set.bank_contents),
+				   hc->ingpa + offsetof(struct hv_send_ipi_ex,
+							vp_set.bank_contents),
 				   sparse_banks,
 				   sparse_banks_len))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
@@ -1847,20 +1857,21 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
 	return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
 }
 
-static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
+static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 {
 	struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
 	struct eventfd_ctx *eventfd;
 
-	if (unlikely(!fast)) {
+	if (unlikely(!hc->fast)) {
 		int ret;
-		gpa_t gpa = param;
+		gpa_t gpa = hc->ingpa;
 
-		if ((gpa & (__alignof__(param) - 1)) ||
-		    offset_in_page(gpa) + sizeof(param) > PAGE_SIZE)
+		if ((gpa & (__alignof__(hc->ingpa) - 1)) ||
+		    offset_in_page(gpa) + sizeof(hc->ingpa) > PAGE_SIZE)
 			return HV_STATUS_INVALID_ALIGNMENT;
 
-		ret = kvm_vcpu_read_guest(vcpu, gpa, &param, sizeof(param));
+		ret = kvm_vcpu_read_guest(vcpu, gpa,
+					  &hc->ingpa, sizeof(hc->ingpa));
 		if (ret < 0)
 			return HV_STATUS_INVALID_ALIGNMENT;
 	}
@@ -1870,15 +1881,15 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
 	 * have no use for it, and in all known usecases it is zero, so just
 	 * report lookup failure if it isn't.
 	 */
-	if (param & 0xffff00000000ULL)
+	if (hc->ingpa & 0xffff00000000ULL)
 		return HV_STATUS_INVALID_PORT_ID;
 	/* remaining bits are reserved-zero */
-	if (param & ~KVM_HYPERV_CONN_ID_MASK)
+	if (hc->ingpa & ~KVM_HYPERV_CONN_ID_MASK)
 		return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
 	/* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
 	rcu_read_lock();
-	eventfd = idr_find(&hv->conn_to_evt, param);
+	eventfd = idr_find(&hv->conn_to_evt, hc->ingpa);
 	rcu_read_unlock();
 	if (!eventfd)
 		return HV_STATUS_INVALID_PORT_ID;
@@ -1889,9 +1900,8 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param)
 
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
-	u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
-	uint16_t code, rep_idx, rep_cnt;
-	bool fast, rep;
+	struct kvm_hv_hcall hc;
+	u64 ret = HV_STATUS_SUCCESS;
 
 	/*
 	 * hypercall generates UD from non zero cpl and real mode
@@ -1904,104 +1914,105 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 
 #ifdef CONFIG_X86_64
 	if (is_64_bit_mode(vcpu)) {
-		param = kvm_rcx_read(vcpu);
-		ingpa = kvm_rdx_read(vcpu);
-		outgpa = kvm_r8_read(vcpu);
+		hc.param = kvm_rcx_read(vcpu);
+		hc.ingpa = kvm_rdx_read(vcpu);
+		hc.outgpa = kvm_r8_read(vcpu);
 	} else
 #endif
 	{
-		param = ((u64)kvm_rdx_read(vcpu) << 32) |
-			(kvm_rax_read(vcpu) & 0xffffffff);
-		ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
-			(kvm_rcx_read(vcpu) & 0xffffffff);
-		outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
-			(kvm_rsi_read(vcpu) & 0xffffffff);
+		hc.param = ((u64)kvm_rdx_read(vcpu) << 32) |
+			    (kvm_rax_read(vcpu) & 0xffffffff);
+		hc.ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
+			    (kvm_rcx_read(vcpu) & 0xffffffff);
+		hc.outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
+			     (kvm_rsi_read(vcpu) & 0xffffffff);
 	}
 
-	code = param & 0xffff;
-	fast = !!(param & HV_HYPERCALL_FAST_BIT);
-	rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
-	rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
-	rep = !!(rep_cnt || rep_idx);
+	hc.code = hc.param & 0xffff;
+	hc.fast = !!(hc.param & HV_HYPERCALL_FAST_BIT);
+	hc.rep_cnt = (hc.param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
+	hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
+	hc.rep = !!(hc.rep_cnt || hc.rep_idx);
 
-	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
+	trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
+			       hc.ingpa, hc.outgpa);
 
-	switch (code) {
+	switch (hc.code) {
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
-		if (unlikely(rep)) {
+		if (unlikely(hc.rep)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
 		kvm_vcpu_on_spin(vcpu, true);
 		break;
 	case HVCALL_SIGNAL_EVENT:
-		if (unlikely(rep)) {
+		if (unlikely(hc.rep)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-		ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
+		ret = kvm_hvcall_signal_event(vcpu, &hc);
 		if (ret != HV_STATUS_INVALID_PORT_ID)
 			break;
 		fallthrough;	/* maybe userspace knows this conn_id */
 	case HVCALL_POST_MESSAGE:
 		/* don't bother userspace if it has no way to handle it */
-		if (unlikely(rep || !to_hv_synic(vcpu)->active)) {
+		if (unlikely(hc.rep || !to_hv_synic(vcpu)->active)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
 		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
 		vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
-		vcpu->run->hyperv.u.hcall.input = param;
-		vcpu->run->hyperv.u.hcall.params[0] = ingpa;
-		vcpu->run->hyperv.u.hcall.params[1] = outgpa;
+		vcpu->run->hyperv.u.hcall.input = hc.param;
+		vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
+		vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
 		vcpu->arch.complete_userspace_io =
 				kvm_hv_hypercall_complete_userspace;
 		return 0;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
-		if (unlikely(fast || !rep_cnt || rep_idx)) {
+		if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+		ret = kvm_hv_flush_tlb(vcpu, &hc, false);
 		break;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
-		if (unlikely(fast || rep)) {
+		if (unlikely(hc.fast || hc.rep)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+		ret = kvm_hv_flush_tlb(vcpu, &hc, false);
 		break;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
-		if (unlikely(fast || !rep_cnt || rep_idx)) {
+		if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+		ret = kvm_hv_flush_tlb(vcpu, &hc, true);
 		break;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
-		if (unlikely(fast || rep)) {
+		if (unlikely(hc.fast || hc.rep)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+		ret = kvm_hv_flush_tlb(vcpu, &hc, true);
 		break;
 	case HVCALL_SEND_IPI:
-		if (unlikely(rep)) {
+		if (unlikely(hc.rep)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-		ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast);
+		ret = kvm_hv_send_ipi(vcpu, &hc, false);
 		break;
 	case HVCALL_SEND_IPI_EX:
-		if (unlikely(fast || rep)) {
+		if (unlikely(hc.fast || hc.rep)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-		ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false);
+		ret = kvm_hv_send_ipi(vcpu, &hc, true);
 		break;
 	case HVCALL_POST_DEBUG_DATA:
 	case HVCALL_RETRIEVE_DEBUG_DATA:
-		if (unlikely(fast)) {
+		if (unlikely(hc.fast)) {
 			ret = HV_STATUS_INVALID_PARAMETER;
 			break;
 		}
@@ -2020,9 +2031,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		}
 		vcpu->run->exit_reason = KVM_EXIT_HYPERV;
 		vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
-		vcpu->run->hyperv.u.hcall.input = param;
-		vcpu->run->hyperv.u.hcall.params[0] = ingpa;
-		vcpu->run->hyperv.u.hcall.params[1] = outgpa;
+		vcpu->run->hyperv.u.hcall.input = hc.param;
+		vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
+		vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
 		vcpu->arch.complete_userspace_io =
 				kvm_hv_hypercall_complete_userspace;
 		return 0;
-- 
GitLab


From 5974565bc26d6a599189db7c0b1f79eaa9af8eb9 Mon Sep 17 00:00:00 2001
From: Siddharth Chandrasekaran <sidcha@amazon.de>
Date: Wed, 26 May 2021 10:56:10 +0200
Subject: [PATCH 3125/3804] KVM: x86: kvm_hv_flush_tlb use inputs from XMM
 registers

Hyper-V supports the use of XMM registers to perform fast hypercalls.
This allows guests to take advantage of the improved performance of the
fast hypercall interface even though a hypercall may require more than
(the current maximum of) two input registers.

The XMM fast hypercall interface uses six additional XMM registers (XMM0
to XMM5) to allow the guest to pass an input parameter block of up to
112 bytes.

Add framework to read from XMM registers in kvm_hv_hypercall() and use
the additional hypercall inputs from XMM registers in kvm_hv_flush_tlb()
when possible.

Cc: Alexander Graf <graf@amazon.com>
Co-developed-by: Evgeny Iakovlev <eyakovl@amazon.de>
Signed-off-by: Evgeny Iakovlev <eyakovl@amazon.de>
Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Message-Id: <fc62edad33f1920fe5c74dde47d7d0b4275a9012.1622019134.git.sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/hyperv-tlfs.h |  3 +
 arch/x86/kvm/hyperv.c              | 90 +++++++++++++++++++++++-------
 2 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 606f5cc579b2b..27a9f08e83860 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -314,6 +314,9 @@ struct hv_tsc_emulation_status {
 #define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
 
+/* Number of XMM registers used in hypercall input/output */
+#define HV_HYPERCALL_MAX_XMM_REGISTERS		6
+
 struct hv_nested_enlightenments_control {
 	struct {
 		__u32 directhypercall:1;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 7c7a2da591da9..449589e283d6f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -36,6 +36,7 @@
 
 #include "trace.h"
 #include "irq.h"
+#include "fpu.h"
 
 /* "Hv#1" signature */
 #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
@@ -1640,10 +1641,13 @@ struct kvm_hv_hcall {
 	u16 rep_idx;
 	bool fast;
 	bool rep;
+	sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
 };
 
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex)
 {
+	int i;
+	gpa_t gpa;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 	struct hv_tlb_flush_ex flush_ex;
@@ -1657,8 +1661,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	bool all_cpus;
 
 	if (!ex) {
-		if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush))))
-			return HV_STATUS_INVALID_HYPERCALL_INPUT;
+		if (hc->fast) {
+			flush.address_space = hc->ingpa;
+			flush.flags = hc->outgpa;
+			flush.processor_mask = sse128_lo(hc->xmm[0]);
+		} else {
+			if (unlikely(kvm_read_guest(kvm, hc->ingpa,
+						    &flush, sizeof(flush))))
+				return HV_STATUS_INVALID_HYPERCALL_INPUT;
+		}
 
 		trace_kvm_hv_flush_tlb(flush.processor_mask,
 				       flush.address_space, flush.flags);
@@ -1676,9 +1687,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 		all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
 			flush.processor_mask == 0;
 	} else {
-		if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
-					    sizeof(flush_ex))))
-			return HV_STATUS_INVALID_HYPERCALL_INPUT;
+		if (hc->fast) {
+			flush_ex.address_space = hc->ingpa;
+			flush_ex.flags = hc->outgpa;
+			memcpy(&flush_ex.hv_vp_set,
+			       &hc->xmm[0], sizeof(hc->xmm[0]));
+		} else {
+			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
+						    sizeof(flush_ex))))
+				return HV_STATUS_INVALID_HYPERCALL_INPUT;
+		}
 
 		trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
 					  flush_ex.hv_vp_set.format,
@@ -1689,20 +1707,28 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 		all_cpus = flush_ex.hv_vp_set.format !=
 			HV_GENERIC_SET_SPARSE_4K;
 
-		sparse_banks_len =
-			bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
-			sizeof(sparse_banks[0]);
+		sparse_banks_len = bitmap_weight((unsigned long *)&valid_bank_mask, 64);
 
 		if (!sparse_banks_len && !all_cpus)
 			goto ret_success;
 
-		if (!all_cpus &&
-		    kvm_read_guest(kvm,
-				   hc->ingpa + offsetof(struct hv_tlb_flush_ex,
-							hv_vp_set.bank_contents),
-				   sparse_banks,
-				   sparse_banks_len))
-			return HV_STATUS_INVALID_HYPERCALL_INPUT;
+		if (!all_cpus) {
+			if (hc->fast) {
+				if (sparse_banks_len > HV_HYPERCALL_MAX_XMM_REGISTERS - 1)
+					return HV_STATUS_INVALID_HYPERCALL_INPUT;
+				for (i = 0; i < sparse_banks_len; i += 2) {
+					sparse_banks[i] = sse128_lo(hc->xmm[i / 2 + 1]);
+					sparse_banks[i + 1] = sse128_hi(hc->xmm[i / 2 + 1]);
+				}
+			} else {
+				gpa = hc->ingpa + offsetof(struct hv_tlb_flush_ex,
+							   hv_vp_set.bank_contents);
+				if (unlikely(kvm_read_guest(kvm, gpa, sparse_banks,
+							    sparse_banks_len *
+							    sizeof(sparse_banks[0]))))
+					return HV_STATUS_INVALID_HYPERCALL_INPUT;
+			}
+		}
 	}
 
 	cpumask_clear(&hv_vcpu->tlb_flush);
@@ -1898,6 +1924,29 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h
 	return HV_STATUS_SUCCESS;
 }
 
+static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
+{
+	switch (hc->code) {
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+		return true;
+	}
+
+	return false;
+}
+
+static void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
+{
+	int reg;
+
+	kvm_fpu_get();
+	for (reg = 0; reg < HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
+		_kvm_read_sse_reg(reg, &hc->xmm[reg]);
+	kvm_fpu_put();
+}
+
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
 	struct kvm_hv_hcall hc;
@@ -1934,6 +1983,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
 	hc.rep = !!(hc.rep_cnt || hc.rep_idx);
 
+	if (hc.fast && is_xmm_fast_hypercall(&hc))
+		kvm_hv_hypercall_read_xmm(&hc);
+
 	trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
 			       hc.ingpa, hc.outgpa);
 
@@ -1969,28 +2021,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 				kvm_hv_hypercall_complete_userspace;
 		return 0;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
-		if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
+		if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
 		ret = kvm_hv_flush_tlb(vcpu, &hc, false);
 		break;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
-		if (unlikely(hc.fast || hc.rep)) {
+		if (unlikely(hc.rep)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
 		ret = kvm_hv_flush_tlb(vcpu, &hc, false);
 		break;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
-		if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) {
+		if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
 		ret = kvm_hv_flush_tlb(vcpu, &hc, true);
 		break;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
-		if (unlikely(hc.fast || hc.rep)) {
+		if (unlikely(hc.rep)) {
 			ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
 			break;
 		}
-- 
GitLab


From d8f5537a8816c8f00ea3103e74b65987963a56c6 Mon Sep 17 00:00:00 2001
From: Siddharth Chandrasekaran <sidcha@amazon.de>
Date: Wed, 26 May 2021 11:03:56 +0200
Subject: [PATCH 3126/3804] KVM: hyper-v: Advertise support for fast XMM
 hypercalls

Now that kvm_hv_flush_tlb() has been patched to support XMM hypercall
inputs, we can start advertising this feature to guests.

Cc: Alexander Graf <graf@amazon.com>
Cc: Evgeny Iakovlev <eyakovl@amazon.de>
Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de>
Message-Id: <e63fc1c61dd2efecbefef239f4f0a598bd552750.1622019134.git.sidcha@amazon.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/hyperv-tlfs.h | 7 ++++++-
 arch/x86/kvm/hyperv.c              | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 27a9f08e83860..9fe4cc9c0f7d5 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -52,7 +52,7 @@
  * Support for passing hypercall input parameter block via XMM
  * registers is available
  */
-#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE		BIT(4)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE		BIT(4)
 /* Support for a virtual guest idle state is available */
 #define HV_X64_GUEST_IDLE_STATE_AVAILABLE		BIT(5)
 /* Frequency MSRs available */
@@ -61,6 +61,11 @@
 #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE		BIT(10)
 /* Support for debug MSRs available */
 #define HV_FEATURE_DEBUG_MSRS_AVAILABLE			BIT(11)
+/*
+ * Support for returning hypercall output block via XMM
+ * registers is available
+ */
+#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE		BIT(15)
 /* stimer Direct Mode is available */
 #define HV_STIMER_DIRECT_MODE_AVAILABLE			BIT(19)
 
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 449589e283d6f..dbd3152b1379e 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2243,6 +2243,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 			ent->ebx |= HV_POST_MESSAGES;
 			ent->ebx |= HV_SIGNAL_EVENTS;
 
+			ent->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
 			ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
 			ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
 
-- 
GitLab


From 3ad93562093d764bc22d6460e84ba60d0c57f7ab Mon Sep 17 00:00:00 2001
From: Keqian Zhu <zhukeqian1@huawei.com>
Date: Thu, 29 Apr 2021 11:41:14 +0800
Subject: [PATCH 3127/3804] KVM: x86: Support write protecting only large pages

Prepare for write protecting large page lazily during dirty log tracking,
for which we will only need to write protect gfns at large page
granularity.

No functional or performance change expected.

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Message-Id: <20210429034115.35560-2-zhukeqian1@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c          |  9 +++++----
 arch/x86/kvm/mmu/mmu_internal.h |  3 ++-
 arch/x86/kvm/mmu/page_track.c   |  2 +-
 arch/x86/kvm/mmu/tdp_mmu.c      | 16 ++++++++++++----
 arch/x86/kvm/mmu/tdp_mmu.h      |  3 ++-
 5 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8ac1b9c935fee..a668d2050b795 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1249,20 +1249,21 @@ int kvm_cpu_dirty_log_size(void)
 }
 
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
-				    struct kvm_memory_slot *slot, u64 gfn)
+				    struct kvm_memory_slot *slot, u64 gfn,
+				    int min_level)
 {
 	struct kvm_rmap_head *rmap_head;
 	int i;
 	bool write_protected = false;
 
-	for (i = PG_LEVEL_4K; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
+	for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
 		rmap_head = __gfn_to_rmap(gfn, i, slot);
 		write_protected |= __rmap_write_protect(kvm, rmap_head, true);
 	}
 
 	if (is_tdp_mmu_enabled(kvm))
 		write_protected |=
-			kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn);
+			kvm_tdp_mmu_write_protect_gfn(kvm, slot, gfn, min_level);
 
 	return write_protected;
 }
@@ -1272,7 +1273,7 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 	struct kvm_memory_slot *slot;
 
 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
-	return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn);
+	return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn, PG_LEVEL_4K);
 }
 
 static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index ff4c6256f3f9e..18be103df9d59 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -128,7 +128,8 @@ bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
-				    struct kvm_memory_slot *slot, u64 gfn);
+				    struct kvm_memory_slot *slot, u64 gfn,
+				    int min_level);
 void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
 					u64 start_gfn, u64 pages);
 
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 34bb0ec69bd8b..91a9f7e0fd914 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -100,7 +100,7 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
 	kvm_mmu_gfn_disallow_lpage(slot, gfn);
 
 	if (mode == KVM_PAGE_TRACK_WRITE)
-		if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
+		if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
 			kvm_flush_remote_tlbs(kvm);
 }
 EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 237317b1eddda..6b6dfcdcb1797 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1462,15 +1462,22 @@ bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
  * Returns true if an SPTE was set and a TLB flush is needed.
  */
 static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
-			      gfn_t gfn)
+			      gfn_t gfn, int min_level)
 {
 	struct tdp_iter iter;
 	u64 new_spte;
 	bool spte_set = false;
 
+	BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
+
 	rcu_read_lock();
 
-	tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) {
+	for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
+				   min_level, gfn, gfn + 1) {
+		if (!is_shadow_present_pte(iter.old_spte) ||
+		    !is_last_spte(iter.old_spte, iter.level))
+			continue;
+
 		if (!is_writable_pte(iter.old_spte))
 			break;
 
@@ -1492,14 +1499,15 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
  * Returns true if an SPTE was set and a TLB flush is needed.
  */
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
-				   struct kvm_memory_slot *slot, gfn_t gfn)
+				   struct kvm_memory_slot *slot, gfn_t gfn,
+				   int min_level)
 {
 	struct kvm_mmu_page *root;
 	bool spte_set = false;
 
 	lockdep_assert_held_write(&kvm->mmu_lock);
 	for_each_tdp_mmu_root(kvm, root, slot->as_id)
-		spte_set |= write_protect_gfn(kvm, root, gfn);
+		spte_set |= write_protect_gfn(kvm, root, gfn, min_level);
 
 	return spte_set;
 }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 5fdf630904517..a861570fcd7cd 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -74,7 +74,8 @@ bool kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
 				       bool flush);
 
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
-				   struct kvm_memory_slot *slot, gfn_t gfn);
+				   struct kvm_memory_slot *slot, gfn_t gfn,
+				   int min_level);
 
 int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level);
-- 
GitLab


From 8921291980db8184cdeb95987281c663f844b22c Mon Sep 17 00:00:00 2001
From: Keqian Zhu <zhukeqian1@huawei.com>
Date: Thu, 29 Apr 2021 11:41:15 +0800
Subject: [PATCH 3128/3804] KVM: x86: Do not write protect huge page in
 initially-all-set mode

Currently, when dirty logging is started in initially-all-set mode,
we write protect huge pages to prepare for splitting them into
4K pages, and leave normal pages untouched as the logging will
be enabled lazily as dirty bits are cleared.

However, enabling dirty logging lazily is also feasible for huge pages.
This not only reduces the time of start dirty logging, but it also
greatly reduces side-effect on guest when there is high dirty rate.

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Message-Id: <20210429034115.35560-3-zhukeqian1@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 30 ++++++++++++++++++++++++++----
 arch/x86/kvm/x86.c     | 37 ++++++++++---------------------------
 2 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a668d2050b795..66e4d096fe05a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1172,8 +1172,7 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
  * @gfn_offset: start of the BITS_PER_LONG pages we care about
  * @mask: indicates which pages we should protect
  *
- * Used when we do not need to care about huge page mappings: e.g. during dirty
- * logging we do not have any such mappings.
+ * Used when we do not need to care about huge page mappings.
  */
 static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 				     struct kvm_memory_slot *slot,
@@ -1230,13 +1229,36 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
  * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to
  * enable dirty logging for them.
  *
- * Used when we do not need to care about huge page mappings: e.g. during dirty
- * logging we do not have any such mappings.
+ * We need to care about huge page mappings: e.g. during dirty logging we may
+ * have such mappings.
  */
 void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 				struct kvm_memory_slot *slot,
 				gfn_t gfn_offset, unsigned long mask)
 {
+	/*
+	 * Huge pages are NOT write protected when we start dirty logging in
+	 * initially-all-set mode; must write protect them here so that they
+	 * are split to 4K on the first write.
+	 *
+	 * The gfn_offset is guaranteed to be aligned to 64, but the base_gfn
+	 * of memslot has no such restriction, so the range can cross two large
+	 * pages.
+	 */
+	if (kvm_dirty_log_manual_protect_and_init_set(kvm)) {
+		gfn_t start = slot->base_gfn + gfn_offset + __ffs(mask);
+		gfn_t end = slot->base_gfn + gfn_offset + __fls(mask);
+
+		kvm_mmu_slot_gfn_write_protect(kvm, slot, start, PG_LEVEL_2M);
+
+		/* Cross two large pages? */
+		if (ALIGN(start << PAGE_SHIFT, PMD_SIZE) !=
+		    ALIGN(end << PAGE_SHIFT, PMD_SIZE))
+			kvm_mmu_slot_gfn_write_protect(kvm, slot, end,
+						       PG_LEVEL_2M);
+	}
+
+	/* Now handle 4K PTEs.  */
 	if (kvm_x86_ops.cpu_dirty_log_size)
 		kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask);
 	else
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6d425310054b4..4ae708eb35f5c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11103,36 +11103,19 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
 		 */
 		kvm_mmu_zap_collapsible_sptes(kvm, new);
 	} else {
-		/* By default, write-protect everything to log writes. */
-		int level = PG_LEVEL_4K;
+		/*
+		 * Initially-all-set does not require write protecting any page,
+		 * because they're all assumed to be dirty.
+		 */
+		if (kvm_dirty_log_manual_protect_and_init_set(kvm))
+			return;
 
 		if (kvm_x86_ops.cpu_dirty_log_size) {
-			/*
-			 * Clear all dirty bits, unless pages are treated as
-			 * dirty from the get-go.
-			 */
-			if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
-				kvm_mmu_slot_leaf_clear_dirty(kvm, new);
-
-			/*
-			 * Write-protect large pages on write so that dirty
-			 * logging happens at 4k granularity.  No need to
-			 * write-protect small SPTEs since write accesses are
-			 * logged by the CPU via dirty bits.
-			 */
-			level = PG_LEVEL_2M;
-		} else if (kvm_dirty_log_manual_protect_and_init_set(kvm)) {
-			/*
-			 * If we're with initial-all-set, we don't need
-			 * to write protect any small page because
-			 * they're reported as dirty already.  However
-			 * we still need to write-protect huge pages
-			 * so that the page split can happen lazily on
-			 * the first write to the huge page.
-			 */
-			level = PG_LEVEL_2M;
+			kvm_mmu_slot_leaf_clear_dirty(kvm, new);
+			kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_2M);
+		} else {
+			kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K);
 		}
-		kvm_mmu_slot_remove_write_access(kvm, new, level);
 	}
 }
 
-- 
GitLab


From c9b929b3fadc0504605d29016eb8274358c7d3ed Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 18 May 2021 10:34:08 -0700
Subject: [PATCH 3129/3804] KVM: x86/mmu: Deduplicate rmap freeing

Small code deduplication. No functional change expected.

Reviewed-by: David Hildenbrand <david@redhat.com>

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210518173414.450044-2-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4ae708eb35f5c..eaa01e6fe39b2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10917,17 +10917,23 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_hv_destroy_vm(kvm);
 }
 
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+static void memslot_rmap_free(struct kvm_memory_slot *slot)
 {
 	int i;
 
 	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		kvfree(slot->arch.rmap[i]);
 		slot->arch.rmap[i] = NULL;
+	}
+}
 
-		if (i == 0)
-			continue;
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	int i;
+
+	memslot_rmap_free(slot);
 
+	for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
 		kvfree(slot->arch.lpage_info[i - 1]);
 		slot->arch.lpage_info[i - 1] = NULL;
 	}
@@ -10993,12 +10999,9 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 	return 0;
 
 out_free:
-	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
-		kvfree(slot->arch.rmap[i]);
-		slot->arch.rmap[i] = NULL;
-		if (i == 0)
-			continue;
+	memslot_rmap_free(slot);
 
+	for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
 		kvfree(slot->arch.lpage_info[i - 1]);
 		slot->arch.lpage_info[i - 1] = NULL;
 	}
-- 
GitLab


From 56dd1019c88510e79a820965a2da35907fbab00d Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 18 May 2021 10:34:09 -0700
Subject: [PATCH 3130/3804] KVM: x86/mmu: Factor out allocating memslot rmap

Small refactor to facilitate allocating rmaps for all memslots at once.

No functional change expected.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210518173414.450044-3-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eaa01e6fe39b2..5f66a5972d829 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10941,10 +10941,31 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
 	kvm_page_track_free_memslot(slot);
 }
 
+static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
+			      unsigned long npages)
+{
+	const int sz = sizeof(*slot->arch.rmap[0]);
+	int i;
+
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+		int level = i + 1;
+		int lpages = gfn_to_index(slot->base_gfn + npages - 1,
+					  slot->base_gfn, level) + 1;
+
+		slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
+		if (!slot->arch.rmap[i]) {
+			memslot_rmap_free(slot);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
 static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 				      unsigned long npages)
 {
-	int i;
+	int i, r;
 
 	/*
 	 * Clear out the previous array pointers for the KVM_MR_MOVE case.  The
@@ -10953,7 +10974,11 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 	 */
 	memset(&slot->arch, 0, sizeof(slot->arch));
 
-	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+	r = memslot_rmap_alloc(slot, npages);
+	if (r)
+		return r;
+
+	for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
 		struct kvm_lpage_info *linfo;
 		unsigned long ugfn;
 		int lpages;
@@ -10962,14 +10987,6 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 		lpages = gfn_to_index(slot->base_gfn + npages - 1,
 				      slot->base_gfn, level) + 1;
 
-		slot->arch.rmap[i] =
-			kvcalloc(lpages, sizeof(*slot->arch.rmap[i]),
-				 GFP_KERNEL_ACCOUNT);
-		if (!slot->arch.rmap[i])
-			goto out_free;
-		if (i == 0)
-			continue;
-
 		linfo = kvcalloc(lpages, sizeof(*linfo), GFP_KERNEL_ACCOUNT);
 		if (!linfo)
 			goto out_free;
-- 
GitLab


From ddc12f2a12917c10b0deb0928f0560bffb7729ec Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 18 May 2021 10:34:10 -0700
Subject: [PATCH 3131/3804] KVM: mmu: Refactor memslot copy

Factor out copying kvm_memslots from allocating the memory for new ones
in preparation for adding a new lock to protect the arch-specific fields
of the memslots.

No functional change intended.

Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210518173414.450044-4-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6a6bc7af0e28d..d65be94614937 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1307,6 +1307,18 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	return old_memslots;
 }
 
+static size_t kvm_memslots_size(int slots)
+{
+	return sizeof(struct kvm_memslots) +
+	       (sizeof(struct kvm_memory_slot) * slots);
+}
+
+static void kvm_copy_memslots(struct kvm_memslots *to,
+			      struct kvm_memslots *from)
+{
+	memcpy(to, from, kvm_memslots_size(from->used_slots));
+}
+
 /*
  * Note, at a minimum, the current number of used slots must be allocated, even
  * when deleting a memslot, as we need a complete duplicate of the memslots for
@@ -1316,19 +1328,16 @@ static struct kvm_memslots *kvm_dup_memslots(struct kvm_memslots *old,
 					     enum kvm_mr_change change)
 {
 	struct kvm_memslots *slots;
-	size_t old_size, new_size;
-
-	old_size = sizeof(struct kvm_memslots) +
-		   (sizeof(struct kvm_memory_slot) * old->used_slots);
+	size_t new_size;
 
 	if (change == KVM_MR_CREATE)
-		new_size = old_size + sizeof(struct kvm_memory_slot);
+		new_size = kvm_memslots_size(old->used_slots + 1);
 	else
-		new_size = old_size;
+		new_size = kvm_memslots_size(old->used_slots);
 
 	slots = kvzalloc(new_size, GFP_KERNEL_ACCOUNT);
 	if (likely(slots))
-		memcpy(slots, old, old_size);
+		kvm_copy_memslots(slots, old);
 
 	return slots;
 }
-- 
GitLab


From b10a038e84d188e15819058b2978b2daa9853aeb Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 18 May 2021 10:34:11 -0700
Subject: [PATCH 3132/3804] KVM: mmu: Add slots_arch_lock for memslot arch
 fields

Add a new lock to protect the arch-specific fields of memslots if they
need to be modified in a kvm->srcu read critical section. A future
commit will use this lock to lazily allocate memslot rmaps for x86.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210518173414.450044-5-bgardon@google.com>
[Add Documentation/ hunk. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/locking.rst |  5 +++
 include/linux/kvm_host.h           |  9 +++++
 virt/kvm/kvm_main.c                | 54 ++++++++++++++++++++++++++----
 3 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 1fc860c007a3c..35eca377543df 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -16,6 +16,11 @@ The acquisition orders for mutexes are as follows:
 - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
   them together is quite rare.
 
+- Unlike kvm->slots_lock, kvm->slots_arch_lock is released before
+  synchronize_srcu(&kvm->srcu).  Therefore kvm->slots_arch_lock
+  can be taken inside a kvm->srcu read-side critical section,
+  while kvm->slots_lock cannot.
+
 On x86:
 
 - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8583ed3ff3447..11b9b11a5e9b0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -523,6 +523,15 @@ struct kvm {
 #endif /* KVM_HAVE_MMU_RWLOCK */
 
 	struct mutex slots_lock;
+
+	/*
+	 * Protects the arch-specific fields of struct kvm_memory_slots in
+	 * use by the VM. To be used under the slots_lock (above) or in a
+	 * kvm->srcu critical section where acquiring the slots_lock would
+	 * lead to deadlock with the synchronize_srcu in
+	 * install_new_memslots.
+	 */
+	struct mutex slots_arch_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d65be94614937..fa7e7ebefc796 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -909,6 +909,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
+	mutex_init(&kvm->slots_arch_lock);
 	INIT_LIST_HEAD(&kvm->devices);
 
 	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
@@ -1281,6 +1282,14 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
 
 	rcu_assign_pointer(kvm->memslots[as_id], slots);
+
+	/*
+	 * Acquired in kvm_set_memslot. Must be released before synchronize
+	 * SRCU below in order to avoid deadlock with another thread
+	 * acquiring the slots_arch_lock in an srcu critical section.
+	 */
+	mutex_unlock(&kvm->slots_arch_lock);
+
 	synchronize_srcu_expedited(&kvm->srcu);
 
 	/*
@@ -1352,9 +1361,27 @@ static int kvm_set_memslot(struct kvm *kvm,
 	struct kvm_memslots *slots;
 	int r;
 
+	/*
+	 * Released in install_new_memslots.
+	 *
+	 * Must be held from before the current memslots are copied until
+	 * after the new memslots are installed with rcu_assign_pointer,
+	 * then released before the synchronize srcu in install_new_memslots.
+	 *
+	 * When modifying memslots outside of the slots_lock, must be held
+	 * before reading the pointer to the current memslots until after all
+	 * changes to those memslots are complete.
+	 *
+	 * These rules ensure that installing new memslots does not lose
+	 * changes made to the previous memslots.
+	 */
+	mutex_lock(&kvm->slots_arch_lock);
+
 	slots = kvm_dup_memslots(__kvm_memslots(kvm, as_id), change);
-	if (!slots)
+	if (!slots) {
+		mutex_unlock(&kvm->slots_arch_lock);
 		return -ENOMEM;
+	}
 
 	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
 		/*
@@ -1365,10 +1392,9 @@ static int kvm_set_memslot(struct kvm *kvm,
 		slot->flags |= KVM_MEMSLOT_INVALID;
 
 		/*
-		 * We can re-use the old memslots, the only difference from the
-		 * newly installed memslots is the invalid flag, which will get
-		 * dropped by update_memslots anyway.  We'll also revert to the
-		 * old memslots if preparing the new memory region fails.
+		 * We can re-use the memory from the old memslots.
+		 * It will be overwritten with a copy of the new memslots
+		 * after reacquiring the slots_arch_lock below.
 		 */
 		slots = install_new_memslots(kvm, as_id, slots);
 
@@ -1380,6 +1406,17 @@ static int kvm_set_memslot(struct kvm *kvm,
 		 *	- kvm_is_visible_gfn (mmu_check_root)
 		 */
 		kvm_arch_flush_shadow_memslot(kvm, slot);
+
+		/* Released in install_new_memslots. */
+		mutex_lock(&kvm->slots_arch_lock);
+
+		/*
+		 * The arch-specific fields of the memslots could have changed
+		 * between releasing the slots_arch_lock in
+		 * install_new_memslots and here, so get a fresh copy of the
+		 * slots.
+		 */
+		kvm_copy_memslots(slots, __kvm_memslots(kvm, as_id));
 	}
 
 	r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
@@ -1395,8 +1432,13 @@ static int kvm_set_memslot(struct kvm *kvm,
 	return 0;
 
 out_slots:
-	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE)
+	if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
+		slot = id_to_memslot(slots, old->id);
+		slot->flags &= ~KVM_MEMSLOT_INVALID;
 		slots = install_new_memslots(kvm, as_id, slots);
+	} else {
+		mutex_unlock(&kvm->slots_arch_lock);
+	}
 	kvfree(slots);
 	return r;
 }
-- 
GitLab


From a255740876f006eb9041fadcc4750557d26add5f Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 18 May 2021 10:34:12 -0700
Subject: [PATCH 3133/3804] KVM: x86/mmu: Add a field to control memslot rmap
 allocation

Add a field to control whether new memslots should have rmaps allocated
for them. As of this change, it's not safe to skip allocating rmaps, so
the field is always set to allocate rmaps. Future changes will make it
safe to operate without rmaps, using the TDP MMU. Then further changes
will allow the rmaps to be allocated lazily when needed for nested
oprtation.

No functional change expected.

Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210518173414.450044-6-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  6 ++++++
 arch/x86/kvm/mmu/mmu.c          |  2 ++
 arch/x86/kvm/x86.c              | 13 ++++++++-----
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9c7ced0e31718..11798a9ff3e9d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1124,6 +1124,12 @@ struct kvm_arch {
 	 */
 	spinlock_t tdp_mmu_pages_lock;
 #endif /* CONFIG_X86_64 */
+
+	/*
+	 * If set, rmaps have been allocated for all memslots and should be
+	 * allocated for any newly created or modified memslots.
+	 */
+	bool memslots_have_rmaps;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 66e4d096fe05a..64b3ee7ea4672 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5511,6 +5511,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
 
 	kvm_mmu_init_tdp_mmu(kvm);
 
+	kvm->arch.memslots_have_rmaps = true;
+
 	node->track_write = kvm_mmu_pte_write;
 	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
 	kvm_page_track_register_notifier(kvm, node);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5f66a5972d829..11637fb103608 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10962,7 +10962,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
 	return 0;
 }
 
-static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
+static int kvm_alloc_memslot_metadata(struct kvm *kvm,
+				      struct kvm_memory_slot *slot,
 				      unsigned long npages)
 {
 	int i, r;
@@ -10974,9 +10975,11 @@ static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
 	 */
 	memset(&slot->arch, 0, sizeof(slot->arch));
 
-	r = memslot_rmap_alloc(slot, npages);
-	if (r)
-		return r;
+	if (kvm->arch.memslots_have_rmaps) {
+		r = memslot_rmap_alloc(slot, npages);
+		if (r)
+			return r;
+	}
 
 	for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
 		struct kvm_lpage_info *linfo;
@@ -11047,7 +11050,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				enum kvm_mr_change change)
 {
 	if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
-		return kvm_alloc_memslot_metadata(memslot,
+		return kvm_alloc_memslot_metadata(kvm, memslot,
 						  mem->memory_size >> PAGE_SHIFT);
 	return 0;
 }
-- 
GitLab


From e2209710ccc5d28d8b88c822d2f3e03b269a2856 Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 18 May 2021 10:34:13 -0700
Subject: [PATCH 3134/3804] KVM: x86/mmu: Skip rmap operations if rmaps not
 allocated

If only the TDP MMU is being used to manage the memory mappings for a VM,
then many rmap operations can be skipped as they are guaranteed to be
no-ops. This saves some time which would be spent on the rmap operation.
It also avoids acquiring the MMU lock in write mode for many operations.

This makes it safe to run the VM without rmaps allocated, when only
using the TDP MMU and sets the stage for waiting to allocate the rmaps
until they're needed.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210518173414.450044-7-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.h     |   5 ++
 arch/x86/kvm/mmu/mmu.c | 113 ++++++++++++++++++++++++-----------------
 arch/x86/kvm/x86.c     |   2 +-
 3 files changed, 72 insertions(+), 48 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 88d0ed5225a4d..af09c47b1aa22 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -232,4 +232,9 @@ int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
 int kvm_mmu_post_init_vm(struct kvm *kvm);
 void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
 
+static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
+{
+	return kvm->arch.memslots_have_rmaps;
+}
+
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 64b3ee7ea4672..2131f71577bc6 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1183,6 +1183,10 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	if (is_tdp_mmu_enabled(kvm))
 		kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
 				slot->base_gfn + gfn_offset, mask, true);
+
+	if (!kvm_memslots_have_rmaps(kvm))
+		return;
+
 	while (mask) {
 		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
 					  PG_LEVEL_4K, slot);
@@ -1212,6 +1216,10 @@ static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 	if (is_tdp_mmu_enabled(kvm))
 		kvm_tdp_mmu_clear_dirty_pt_masked(kvm, slot,
 				slot->base_gfn + gfn_offset, mask, false);
+
+	if (!kvm_memslots_have_rmaps(kvm))
+		return;
+
 	while (mask) {
 		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
 					  PG_LEVEL_4K, slot);
@@ -1278,9 +1286,11 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
 	int i;
 	bool write_protected = false;
 
-	for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
-		rmap_head = __gfn_to_rmap(gfn, i, slot);
-		write_protected |= __rmap_write_protect(kvm, rmap_head, true);
+	if (kvm_memslots_have_rmaps(kvm)) {
+		for (i = min_level; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
+			rmap_head = __gfn_to_rmap(gfn, i, slot);
+			write_protected |= __rmap_write_protect(kvm, rmap_head, true);
+		}
 	}
 
 	if (is_tdp_mmu_enabled(kvm))
@@ -1451,9 +1461,10 @@ static __always_inline bool kvm_handle_gfn_range(struct kvm *kvm,
 
 bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	bool flush;
+	bool flush = false;
 
-	flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
+	if (kvm_memslots_have_rmaps(kvm))
+		flush = kvm_handle_gfn_range(kvm, range, kvm_unmap_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
 		flush |= kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush);
@@ -1463,9 +1474,10 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	bool flush;
+	bool flush = false;
 
-	flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmapp);
+	if (kvm_memslots_have_rmaps(kvm))
+		flush = kvm_handle_gfn_range(kvm, range, kvm_set_pte_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
 		flush |= kvm_tdp_mmu_set_spte_gfn(kvm, range);
@@ -1518,9 +1530,10 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	bool young;
+	bool young = false;
 
-	young = kvm_handle_gfn_range(kvm, range, kvm_age_rmapp);
+	if (kvm_memslots_have_rmaps(kvm))
+		young = kvm_handle_gfn_range(kvm, range, kvm_age_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
 		young |= kvm_tdp_mmu_age_gfn_range(kvm, range);
@@ -1530,9 +1543,10 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 
 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
-	bool young;
+	bool young = false;
 
-	young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmapp);
+	if (kvm_memslots_have_rmaps(kvm))
+		young = kvm_handle_gfn_range(kvm, range, kvm_test_age_rmapp);
 
 	if (is_tdp_mmu_enabled(kvm))
 		young |= kvm_tdp_mmu_test_age_gfn(kvm, range);
@@ -5534,29 +5548,29 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	int i;
 	bool flush = false;
 
-	write_lock(&kvm->mmu_lock);
-	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
-		slots = __kvm_memslots(kvm, i);
-		kvm_for_each_memslot(memslot, slots) {
-			gfn_t start, end;
-
-			start = max(gfn_start, memslot->base_gfn);
-			end = min(gfn_end, memslot->base_gfn + memslot->npages);
-			if (start >= end)
-				continue;
+	if (kvm_memslots_have_rmaps(kvm)) {
+		write_lock(&kvm->mmu_lock);
+		for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+			slots = __kvm_memslots(kvm, i);
+			kvm_for_each_memslot(memslot, slots) {
+				gfn_t start, end;
+
+				start = max(gfn_start, memslot->base_gfn);
+				end = min(gfn_end, memslot->base_gfn + memslot->npages);
+				if (start >= end)
+					continue;
 
-			flush = slot_handle_level_range(kvm, memslot, kvm_zap_rmapp,
-							PG_LEVEL_4K,
-							KVM_MAX_HUGEPAGE_LEVEL,
-							start, end - 1, true, flush);
+				flush = slot_handle_level_range(kvm, memslot,
+						kvm_zap_rmapp, PG_LEVEL_4K,
+						KVM_MAX_HUGEPAGE_LEVEL, start,
+						end - 1, true, flush);
+			}
 		}
+		if (flush)
+			kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
+		write_unlock(&kvm->mmu_lock);
 	}
 
-	if (flush)
-		kvm_flush_remote_tlbs_with_address(kvm, gfn_start, gfn_end);
-
-	write_unlock(&kvm->mmu_lock);
-
 	if (is_tdp_mmu_enabled(kvm)) {
 		flush = false;
 
@@ -5583,12 +5597,15 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 				      struct kvm_memory_slot *memslot,
 				      int start_level)
 {
-	bool flush;
+	bool flush = false;
 
-	write_lock(&kvm->mmu_lock);
-	flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
-				start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
-	write_unlock(&kvm->mmu_lock);
+	if (kvm_memslots_have_rmaps(kvm)) {
+		write_lock(&kvm->mmu_lock);
+		flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
+					  start_level, KVM_MAX_HUGEPAGE_LEVEL,
+					  false);
+		write_unlock(&kvm->mmu_lock);
+	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
 		read_lock(&kvm->mmu_lock);
@@ -5658,16 +5675,15 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 	struct kvm_memory_slot *slot = (struct kvm_memory_slot *)memslot;
 	bool flush;
 
-	write_lock(&kvm->mmu_lock);
-	flush = slot_handle_leaf(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
-
-	if (flush)
-		kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
-	write_unlock(&kvm->mmu_lock);
+	if (kvm_memslots_have_rmaps(kvm)) {
+		write_lock(&kvm->mmu_lock);
+		flush = slot_handle_leaf(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
+		if (flush)
+			kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+		write_unlock(&kvm->mmu_lock);
+	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
-		flush = false;
-
 		read_lock(&kvm->mmu_lock);
 		flush = kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot, flush);
 		if (flush)
@@ -5694,11 +5710,14 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 				   struct kvm_memory_slot *memslot)
 {
-	bool flush;
+	bool flush = false;
 
-	write_lock(&kvm->mmu_lock);
-	flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
-	write_unlock(&kvm->mmu_lock);
+	if (kvm_memslots_have_rmaps(kvm)) {
+		write_lock(&kvm->mmu_lock);
+		flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty,
+					 false);
+		write_unlock(&kvm->mmu_lock);
+	}
 
 	if (is_tdp_mmu_enabled(kvm)) {
 		read_lock(&kvm->mmu_lock);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 11637fb103608..ddeff81f90a48 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10975,7 +10975,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
 	 */
 	memset(&slot->arch, 0, sizeof(slot->arch));
 
-	if (kvm->arch.memslots_have_rmaps) {
+	if (kvm_memslots_have_rmaps(kvm)) {
 		r = memslot_rmap_alloc(slot, npages);
 		if (r)
 			return r;
-- 
GitLab


From d501f747ef5c0ac0c917f9a6781d04ae4ae39d63 Mon Sep 17 00:00:00 2001
From: Ben Gardon <bgardon@google.com>
Date: Tue, 18 May 2021 10:34:14 -0700
Subject: [PATCH 3135/3804] KVM: x86/mmu: Lazily allocate memslot rmaps

If the TDP MMU is in use, wait to allocate the rmaps until the shadow
MMU is actually used. (i.e. a nested VM is launched.) This saves memory
equal to 0.2% of guest memory in cases where the TDP MMU is used and
there are no nested guests involved.

Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <20210518173414.450044-8-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/mmu.h              |  7 ++++-
 arch/x86/kvm/mmu/mmu.c          | 14 +++++++---
 arch/x86/kvm/mmu/tdp_mmu.c      |  6 +++--
 arch/x86/kvm/mmu/tdp_mmu.h      |  4 +--
 arch/x86/kvm/x86.c              | 46 +++++++++++++++++++++++++++++++++
 6 files changed, 71 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 11798a9ff3e9d..dadb545c429f6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1869,4 +1869,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 
 int kvm_cpu_dirty_log_size(void);
 
+int alloc_all_memslots_rmaps(struct kvm *kvm);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index af09c47b1aa22..9d8550af994c4 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -234,7 +234,12 @@ void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
 
 static inline bool kvm_memslots_have_rmaps(struct kvm *kvm)
 {
-	return kvm->arch.memslots_have_rmaps;
+	/*
+	 * Read memslot_have_rmaps before rmap pointers.  Hence, threads reading
+	 * memslots_have_rmaps in any lock context are guaranteed to see the
+	 * pointers.  Pairs with smp_store_release in alloc_all_memslots_rmaps.
+	 */
+	return smp_load_acquire(&kvm->arch.memslots_have_rmaps);
 }
 
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2131f71577bc6..aa9e77f406d9c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3312,6 +3312,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	r = alloc_all_memslots_rmaps(vcpu->kvm);
+	if (r)
+		return r;
+
 	write_lock(&vcpu->kvm->mmu_lock);
 	r = make_mmu_pages_available(vcpu);
 	if (r < 0)
@@ -5523,9 +5527,13 @@ void kvm_mmu_init_vm(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
-	kvm_mmu_init_tdp_mmu(kvm);
-
-	kvm->arch.memslots_have_rmaps = true;
+	if (!kvm_mmu_init_tdp_mmu(kvm))
+		/*
+		 * No smp_load/store wrappers needed here as we are in
+		 * VM init and there cannot be any memslots / other threads
+		 * accessing this struct kvm yet.
+		 */
+		kvm->arch.memslots_have_rmaps = true;
 
 	node->track_write = kvm_mmu_pte_write;
 	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6b6dfcdcb1797..cc13e001f3de0 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -14,10 +14,10 @@ static bool __read_mostly tdp_mmu_enabled = false;
 module_param_named(tdp_mmu, tdp_mmu_enabled, bool, 0644);
 
 /* Initializes the TDP MMU for the VM, if enabled. */
-void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
+bool kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 {
 	if (!tdp_enabled || !READ_ONCE(tdp_mmu_enabled))
-		return;
+		return false;
 
 	/* This should not be changed for the lifetime of the VM. */
 	kvm->arch.tdp_mmu_enabled = true;
@@ -25,6 +25,8 @@ void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
 	spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
+
+	return true;
 }
 
 static __always_inline void kvm_lockdep_assert_mmu_lock_held(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index a861570fcd7cd..f7a7990da11da 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -81,12 +81,12 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 			 int *root_level);
 
 #ifdef CONFIG_X86_64
-void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
+bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
 #else
-static inline void kvm_mmu_init_tdp_mmu(struct kvm *kvm) {}
+static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
 static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ddeff81f90a48..e838e999ab49d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10952,6 +10952,8 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
 		int lpages = gfn_to_index(slot->base_gfn + npages - 1,
 					  slot->base_gfn, level) + 1;
 
+		WARN_ON(slot->arch.rmap[i]);
+
 		slot->arch.rmap[i] = kvcalloc(lpages, sz, GFP_KERNEL_ACCOUNT);
 		if (!slot->arch.rmap[i]) {
 			memslot_rmap_free(slot);
@@ -10962,6 +10964,50 @@ static int memslot_rmap_alloc(struct kvm_memory_slot *slot,
 	return 0;
 }
 
+int alloc_all_memslots_rmaps(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *slot;
+	int r, i;
+
+	/*
+	 * Check if memslots alreday have rmaps early before acquiring
+	 * the slots_arch_lock below.
+	 */
+	if (kvm_memslots_have_rmaps(kvm))
+		return 0;
+
+	mutex_lock(&kvm->slots_arch_lock);
+
+	/*
+	 * Read memslots_have_rmaps again, under the slots arch lock,
+	 * before allocating the rmaps
+	 */
+	if (kvm_memslots_have_rmaps(kvm)) {
+		mutex_unlock(&kvm->slots_arch_lock);
+		return 0;
+	}
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		kvm_for_each_memslot(slot, slots) {
+			r = memslot_rmap_alloc(slot, slot->npages);
+			if (r) {
+				mutex_unlock(&kvm->slots_arch_lock);
+				return r;
+			}
+		}
+	}
+
+	/*
+	 * Ensure that memslots_have_rmaps becomes true strictly after
+	 * all the rmap pointers are set.
+	 */
+	smp_store_release(&kvm->arch.memslots_have_rmaps, true);
+	mutex_unlock(&kvm->slots_arch_lock);
+	return 0;
+}
+
 static int kvm_alloc_memslot_metadata(struct kvm *kvm,
 				      struct kvm_memory_slot *slot,
 				      unsigned long npages)
-- 
GitLab


From 605a140a49099effc069f0fd509db34d91f48496 Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:08 +0100
Subject: [PATCH 3136/3804] math64.h: Add mul_s64_u64_shr()

This function is needed for KVM's nested virtualization. The nested TSC
scaling implementation requires multiplying the signed TSC offset with
the unsigned TSC multiplier.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-2-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/math64.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/include/linux/math64.h b/include/linux/math64.h
index 66deb1fdc2ef6..2928f03d6d46a 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -3,6 +3,7 @@
 #define _LINUX_MATH64_H
 
 #include <linux/types.h>
+#include <linux/math.h>
 #include <vdso/math64.h>
 #include <asm/div64.h>
 
@@ -234,6 +235,24 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift)
 
 #endif
 
+#ifndef mul_s64_u64_shr
+static inline u64 mul_s64_u64_shr(s64 a, u64 b, unsigned int shift)
+{
+	u64 ret;
+
+	/*
+	 * Extract the sign before the multiplication and put it back
+	 * afterwards if needed.
+	 */
+	ret = mul_u64_u64_shr(abs(a), b, shift);
+
+	if (a < 0)
+		ret = -((s64) ret);
+
+	return ret;
+}
+#endif /* mul_s64_u64_shr */
+
 #ifndef mul_u64_u32_div
 static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
 {
-- 
GitLab


From 805d705ff8f3a05e63ce350ac0c37a3290ed9bb7 Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:09 +0100
Subject: [PATCH 3137/3804] KVM: X86: Store L1's TSC scaling ratio in 'struct
 kvm_vcpu_arch'

Store L1's scaling ratio in the kvm_vcpu_arch struct like we already do
for L1's TSC offset. This allows for easy save/restore when we enter and
then exit the nested guest.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-3-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 5 +++--
 arch/x86/kvm/vmx/vmx.c          | 4 ++--
 arch/x86/kvm/x86.c              | 6 ++++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dadb545c429f6..8808c8ae9370b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -707,7 +707,7 @@ struct kvm_vcpu_arch {
 	} st;
 
 	u64 l1_tsc_offset;
-	u64 tsc_offset;
+	u64 tsc_offset; /* current tsc offset */
 	u64 last_guest_tsc;
 	u64 last_host_tsc;
 	u64 tsc_offset_adjustment;
@@ -721,7 +721,8 @@ struct kvm_vcpu_arch {
 	u32 virtual_tsc_khz;
 	s64 ia32_tsc_adjust_msr;
 	u64 msr_ia32_power_ctl;
-	u64 tsc_scaling_ratio;
+	u64 l1_tsc_scaling_ratio;
+	u64 tsc_scaling_ratio; /* current scaling ratio */
 
 	atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
 	unsigned nmi_pending; /* NMI queued after currently running handler */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c2a779b688e64..d3201efa6a07f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7453,10 +7453,10 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
 		delta_tsc = 0;
 
 	/* Convert to host delta tsc if tsc scaling is enabled */
-	if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
+	if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
 	    delta_tsc && u64_shl_div_u64(delta_tsc,
 				kvm_tsc_scaling_ratio_frac_bits,
-				vcpu->arch.tsc_scaling_ratio, &delta_tsc))
+				vcpu->arch.l1_tsc_scaling_ratio, &delta_tsc))
 		return -ERANGE;
 
 	/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e838e999ab49d..571ee7ef3e0ae 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2185,6 +2185,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
 
 	/* Guest TSC same frequency as host TSC? */
 	if (!scale) {
+		vcpu->arch.l1_tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
 		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
 		return 0;
 	}
@@ -2211,7 +2212,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
 		return -1;
 	}
 
-	vcpu->arch.tsc_scaling_ratio = ratio;
+	vcpu->arch.l1_tsc_scaling_ratio = vcpu->arch.tsc_scaling_ratio = ratio;
 	return 0;
 }
 
@@ -2223,6 +2224,7 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
 	/* tsc_khz can be zero if TSC calibration fails */
 	if (user_tsc_khz == 0) {
 		/* set tsc_scaling_ratio to a safe value */
+		vcpu->arch.l1_tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
 		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
 		return -1;
 	}
@@ -2459,7 +2461,7 @@ static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
 
 static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
 {
-	if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
+	if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
 		WARN_ON(adjustment < 0);
 	adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
 	adjust_tsc_offset_guest(vcpu, adjustment);
-- 
GitLab


From 9b399dfd4c60a2249f45f3938b1b9b49394dfe3a Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:10 +0100
Subject: [PATCH 3138/3804] KVM: X86: Rename kvm_compute_tsc_offset() to
 kvm_compute_l1_tsc_offset()

All existing code uses kvm_compute_tsc_offset() passing L1 TSC values to
it. Let's document this by renaming it to kvm_compute_l1_tsc_offset().

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-4-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 571ee7ef3e0ae..a1338bf871f7c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2319,7 +2319,7 @@ u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
 }
 EXPORT_SYMBOL_GPL(kvm_scale_tsc);
 
-static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
+static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 {
 	u64 tsc;
 
@@ -2363,7 +2363,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
 	bool synchronizing = false;
 
 	raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
-	offset = kvm_compute_tsc_offset(vcpu, data);
+	offset = kvm_compute_l1_tsc_offset(vcpu, data);
 	ns = get_kvmclock_base_ns();
 	elapsed = ns - kvm->arch.last_tsc_nsec;
 
@@ -2402,7 +2402,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
 		} else {
 			u64 delta = nsec_to_cycles(vcpu, elapsed);
 			data += delta;
-			offset = kvm_compute_tsc_offset(vcpu, data);
+			offset = kvm_compute_l1_tsc_offset(vcpu, data);
 		}
 		matched = true;
 		already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
@@ -3252,7 +3252,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		if (msr_info->host_initiated) {
 			kvm_synchronize_tsc(vcpu, data);
 		} else {
-			u64 adj = kvm_compute_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
+			u64 adj = kvm_compute_l1_tsc_offset(vcpu, data) - vcpu->arch.l1_tsc_offset;
 			adjust_tsc_offset_guest(vcpu, adj);
 			vcpu->arch.ia32_tsc_adjust_msr += adj;
 		}
@@ -4140,7 +4140,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 			mark_tsc_unstable("KVM discovered backwards TSC");
 
 		if (kvm_check_tsc_unstable()) {
-			u64 offset = kvm_compute_tsc_offset(vcpu,
+			u64 offset = kvm_compute_l1_tsc_offset(vcpu,
 						vcpu->arch.last_guest_tsc);
 			kvm_vcpu_write_tsc_offset(vcpu, offset);
 			vcpu->arch.tsc_catchup = 1;
-- 
GitLab


From fe3eb50418174567f6fbfb3d90a95cbd7a0cc17b Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:11 +0100
Subject: [PATCH 3139/3804] KVM: X86: Add a ratio parameter to kvm_scale_tsc()

Sometimes kvm_scale_tsc() needs to use the current scaling ratio and
other times (like when reading the TSC from user space) it needs to use
L1's scaling ratio. Have the caller specify this by passing the ratio as
a parameter.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-5-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/x86.c              | 27 ++++++++++++++++++---------
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8808c8ae9370b..d6bba19bc094a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1795,7 +1795,7 @@ static inline bool kvm_is_supported_user_return_msr(u32 msr)
 	return kvm_find_user_return_msr(msr) >= 0;
 }
 
-u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
+u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio);
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a1338bf871f7c..a6d46520b5502 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2307,10 +2307,9 @@ static inline u64 __scale_tsc(u64 ratio, u64 tsc)
 	return mul_u64_u64_shr(tsc, ratio, kvm_tsc_scaling_ratio_frac_bits);
 }
 
-u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
+u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio)
 {
 	u64 _tsc = tsc;
-	u64 ratio = vcpu->arch.tsc_scaling_ratio;
 
 	if (ratio != kvm_default_tsc_scaling_ratio)
 		_tsc = __scale_tsc(ratio, tsc);
@@ -2323,14 +2322,15 @@ static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 {
 	u64 tsc;
 
-	tsc = kvm_scale_tsc(vcpu, rdtsc());
+	tsc = kvm_scale_tsc(vcpu, rdtsc(), vcpu->arch.l1_tsc_scaling_ratio);
 
 	return target_tsc - tsc;
 }
 
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
-	return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
+	return vcpu->arch.l1_tsc_offset +
+		kvm_scale_tsc(vcpu, host_tsc, vcpu->arch.l1_tsc_scaling_ratio);
 }
 EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
 
@@ -2463,7 +2463,8 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
 {
 	if (vcpu->arch.l1_tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
 		WARN_ON(adjustment < 0);
-	adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
+	adjustment = kvm_scale_tsc(vcpu, (u64) adjustment,
+				   vcpu->arch.l1_tsc_scaling_ratio);
 	adjust_tsc_offset_guest(vcpu, adjustment);
 }
 
@@ -2846,7 +2847,8 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	/* With all the info we got, fill in the values */
 
 	if (kvm_has_tsc_control)
-		tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
+		tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz,
+					    v->arch.l1_tsc_scaling_ratio);
 
 	if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
 		kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
@@ -3554,10 +3556,17 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		 * return L1's TSC value to ensure backwards-compatible
 		 * behavior for migration.
 		 */
-		u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
-							    vcpu->arch.tsc_offset;
+		u64 offset, ratio;
 
-		msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
+		if (msr_info->host_initiated) {
+			offset = vcpu->arch.l1_tsc_offset;
+			ratio = vcpu->arch.l1_tsc_scaling_ratio;
+		} else {
+			offset = vcpu->arch.tsc_offset;
+			ratio = vcpu->arch.tsc_scaling_ratio;
+		}
+
+		msr_info->data = kvm_scale_tsc(vcpu, rdtsc(), ratio) + offset;
 		break;
 	}
 	case MSR_MTRRcap:
-- 
GitLab


From 3c0f99366e34c1b45e4908e151089a8bf93fbe71 Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:12 +0100
Subject: [PATCH 3140/3804] KVM: nVMX: Add a TSC multiplier field in VMCS12

This is required for supporting nested TSC scaling.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-6-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmcs12.c | 1 +
 arch/x86/kvm/vmx/vmcs12.h | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index 034adb6404dca..d9f5d7c56ae3d 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -37,6 +37,7 @@ const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr),
 	FIELD64(PML_ADDRESS, pml_address),
 	FIELD64(TSC_OFFSET, tsc_offset),
+	FIELD64(TSC_MULTIPLIER, tsc_multiplier),
 	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
 	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
 	FIELD64(POSTED_INTR_DESC_ADDR, posted_intr_desc_addr),
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 13494956d0e97..bb81a23afe898 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -70,7 +70,8 @@ struct __packed vmcs12 {
 	u64 eptp_list_address;
 	u64 pml_address;
 	u64 encls_exiting_bitmap;
-	u64 padding64[2]; /* room for future expansion */
+	u64 tsc_multiplier;
+	u64 padding64[1]; /* room for future expansion */
 	/*
 	 * To allow migration of L1 (complete with its L2 guests) between
 	 * machines of different natural widths (32 or 64 bit), we cannot have
@@ -258,6 +259,7 @@ static inline void vmx_check_vmcs12_offsets(void)
 	CHECK_OFFSET(eptp_list_address, 304);
 	CHECK_OFFSET(pml_address, 312);
 	CHECK_OFFSET(encls_exiting_bitmap, 320);
+	CHECK_OFFSET(tsc_multiplier, 328);
 	CHECK_OFFSET(cr0_guest_host_mask, 344);
 	CHECK_OFFSET(cr4_guest_host_mask, 352);
 	CHECK_OFFSET(cr0_read_shadow, 360);
-- 
GitLab


From 307a94c721fed1aaaeee68115df6f7fb8193b23f Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:13 +0100
Subject: [PATCH 3141/3804] KVM: X86: Add functions for retrieving L2 TSC
 fields from common code

In order to implement as much of the nested TSC scaling logic as
possible in common code, we need these vendor callbacks for retrieving
the TSC offset and the TSC multiplier that L1 has set for L2.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-7-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  2 ++
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/kvm/svm/svm.c             | 14 ++++++++++++++
 arch/x86/kvm/vmx/vmx.c             | 23 +++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.h             |  3 +++
 5 files changed, 44 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index e7bef91cee04a..c4906f73603d9 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -87,6 +87,8 @@ KVM_X86_OP(set_identity_map_addr)
 KVM_X86_OP(get_mt_mask)
 KVM_X86_OP(load_mmu_pgd)
 KVM_X86_OP_NULL(has_wbinvd_exit)
+KVM_X86_OP(get_l2_tsc_offset)
+KVM_X86_OP(get_l2_tsc_multiplier)
 KVM_X86_OP(write_l1_tsc_offset)
 KVM_X86_OP(get_exit_info)
 KVM_X86_OP(check_intercept)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d6bba19bc094a..6ec00427c6fd5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1311,6 +1311,8 @@ struct kvm_x86_ops {
 
 	bool (*has_wbinvd_exit)(void);
 
+	u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
+	u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
 	/* Returns actual tsc_offset set in active VMCS */
 	u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 8c3918a11826a..95ae2734760eb 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1080,6 +1080,18 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
 	seg->base = 0;
 }
 
+static u64 svm_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	return svm->nested.ctl.tsc_offset;
+}
+
+static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
+{
+	return kvm_default_tsc_scaling_ratio;
+}
+
 static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -4524,6 +4536,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 
 	.has_wbinvd_exit = svm_has_wbinvd_exit,
 
+	.get_l2_tsc_offset = svm_get_l2_tsc_offset,
+	.get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier,
 	.write_l1_tsc_offset = svm_write_l1_tsc_offset,
 
 	.load_mmu_pgd = svm_load_mmu_pgd,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d3201efa6a07f..2ce2c73645bf9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1787,6 +1787,27 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 	vmx->guest_uret_msrs_loaded = false;
 }
 
+u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+	if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING))
+		return vmcs12->tsc_offset;
+
+	return 0;
+}
+
+u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
+{
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+	if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING) &&
+	    nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
+		return vmcs12->tsc_multiplier;
+
+	return kvm_default_tsc_scaling_ratio;
+}
+
 static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -7700,6 +7721,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 
+	.get_l2_tsc_offset = vmx_get_l2_tsc_offset,
+	.get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
 	.write_l1_tsc_offset = vmx_write_l1_tsc_offset,
 
 	.load_mmu_pgd = vmx_load_mmu_pgd,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 16e4e457ba23c..aa97c82e3451b 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -404,6 +404,9 @@ void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
 void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
 void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type);
 
+u64 vmx_get_l2_tsc_offset(struct kvm_vcpu *vcpu);
+u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu);
+
 static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
 					     int type, bool value)
 {
-- 
GitLab


From 83150f2932ec4712e2630009ac4a585d4aba7a9e Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:14 +0100
Subject: [PATCH 3142/3804] KVM: X86: Add functions that calculate the nested
 TSC fields

When L2 is entered we need to "merge" the TSC multiplier and TSC offset
values of 01 and 12 together.

The merging is done using the following equations:
  offset_02 = ((offset_01 * mult_12) >> shift_bits) + offset_12
  mult_02 = (mult_01 * mult_12) >> shift_bits

Where shift_bits is kvm_tsc_scaling_ratio_frac_bits.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-8-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ec00427c6fd5..14546c30bc635 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1799,6 +1799,8 @@ static inline bool kvm_is_supported_user_return_msr(u32 msr)
 
 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc, u64 ratio);
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
+u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier);
+u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
 
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a6d46520b5502..61024ee9e85f4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2334,6 +2334,31 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 }
 EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
 
+u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier)
+{
+	u64 nested_offset;
+
+	if (l2_multiplier == kvm_default_tsc_scaling_ratio)
+		nested_offset = l1_offset;
+	else
+		nested_offset = mul_s64_u64_shr((s64) l1_offset, l2_multiplier,
+						kvm_tsc_scaling_ratio_frac_bits);
+
+	nested_offset += l2_offset;
+	return nested_offset;
+}
+EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_offset);
+
+u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
+{
+	if (l2_multiplier != kvm_default_tsc_scaling_ratio)
+		return mul_u64_u64_shr(l1_multiplier, l2_multiplier,
+				       kvm_tsc_scaling_ratio_frac_bits);
+
+	return l1_multiplier;
+}
+EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
+
 static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
 	vcpu->arch.l1_tsc_offset = offset;
-- 
GitLab


From edcfe54058114cb3782cd2e919c224e14420e76e Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:15 +0100
Subject: [PATCH 3143/3804] KVM: X86: Move write_l1_tsc_offset() logic to
 common code and rename it

The write_l1_tsc_offset() callback has a misleading name. It does not
set L1's TSC offset, it rather updates the current TSC offset which
might be different if a nested guest is executing. Additionally, both
the vmx and svm implementations use the same logic for calculating the
current TSC before writing it to hardware.

Rename the function and move the common logic to the caller. The vmx/svm
specific code now merely sets the given offset to the corresponding
hardware structure.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-9-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  2 +-
 arch/x86/include/asm/kvm_host.h    |  3 +--
 arch/x86/kvm/svm/svm.c             | 21 ++++-----------------
 arch/x86/kvm/vmx/vmx.c             | 23 +++--------------------
 arch/x86/kvm/x86.c                 | 24 +++++++++++++++++++++---
 5 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index c4906f73603d9..026ca50ef73ee 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -89,7 +89,7 @@ KVM_X86_OP(load_mmu_pgd)
 KVM_X86_OP_NULL(has_wbinvd_exit)
 KVM_X86_OP(get_l2_tsc_offset)
 KVM_X86_OP(get_l2_tsc_multiplier)
-KVM_X86_OP(write_l1_tsc_offset)
+KVM_X86_OP(write_tsc_offset)
 KVM_X86_OP(get_exit_info)
 KVM_X86_OP(check_intercept)
 KVM_X86_OP(handle_exit_irqoff)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 14546c30bc635..08773980393d4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1313,8 +1313,7 @@ struct kvm_x86_ops {
 
 	u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
 	u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
-	/* Returns actual tsc_offset set in active VMCS */
-	u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
 	/*
 	 * Retrieve somewhat arbitrary exit information.  Intended to be used
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 95ae2734760eb..623f3c4b795a4 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1092,26 +1092,13 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
 	return kvm_default_tsc_scaling_ratio;
 }
 
-static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-	u64 g_tsc_offset = 0;
-
-	if (is_guest_mode(vcpu)) {
-		/* Write L1's TSC offset.  */
-		g_tsc_offset = svm->vmcb->control.tsc_offset -
-			       svm->vmcb01.ptr->control.tsc_offset;
-		svm->vmcb01.ptr->control.tsc_offset = offset;
-	}
-
-	trace_kvm_write_tsc_offset(vcpu->vcpu_id,
-				   svm->vmcb->control.tsc_offset - g_tsc_offset,
-				   offset);
-
-	svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
 
+	svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
+	svm->vmcb->control.tsc_offset = offset;
 	vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-	return svm->vmcb->control.tsc_offset;
 }
 
 /* Evaluate instruction intercepts that depend on guest CPUID features. */
@@ -4538,7 +4525,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 
 	.get_l2_tsc_offset = svm_get_l2_tsc_offset,
 	.get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier,
-	.write_l1_tsc_offset = svm_write_l1_tsc_offset,
+	.write_tsc_offset = svm_write_tsc_offset,
 
 	.load_mmu_pgd = svm_load_mmu_pgd,
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2ce2c73645bf9..54d08bebf9c62 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1808,26 +1808,9 @@ u64 vmx_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu)
 	return kvm_default_tsc_scaling_ratio;
 }
 
-static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
-	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-	u64 g_tsc_offset = 0;
-
-	/*
-	 * We're here if L1 chose not to trap WRMSR to TSC. According
-	 * to the spec, this should set L1's TSC; The offset that L1
-	 * set for L2 remains unchanged, and still needs to be added
-	 * to the newly set TSC to get L2's TSC.
-	 */
-	if (is_guest_mode(vcpu) &&
-	    (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING))
-		g_tsc_offset = vmcs12->tsc_offset;
-
-	trace_kvm_write_tsc_offset(vcpu->vcpu_id,
-				   vcpu->arch.tsc_offset - g_tsc_offset,
-				   offset);
-	vmcs_write64(TSC_OFFSET, offset + g_tsc_offset);
-	return offset + g_tsc_offset;
+	vmcs_write64(TSC_OFFSET, offset);
 }
 
 /*
@@ -7723,7 +7706,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
 	.get_l2_tsc_offset = vmx_get_l2_tsc_offset,
 	.get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
-	.write_l1_tsc_offset = vmx_write_l1_tsc_offset,
+	.write_tsc_offset = vmx_write_tsc_offset,
 
 	.load_mmu_pgd = vmx_load_mmu_pgd,
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 61024ee9e85f4..b42f6c8674e63 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2359,10 +2359,28 @@ u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier)
 }
 EXPORT_SYMBOL_GPL(kvm_calc_nested_tsc_multiplier);
 
-static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
 {
-	vcpu->arch.l1_tsc_offset = offset;
-	vcpu->arch.tsc_offset = static_call(kvm_x86_write_l1_tsc_offset)(vcpu, offset);
+	trace_kvm_write_tsc_offset(vcpu->vcpu_id,
+				   vcpu->arch.l1_tsc_offset,
+				   l1_offset);
+
+	vcpu->arch.l1_tsc_offset = l1_offset;
+
+	/*
+	 * If we are here because L1 chose not to trap WRMSR to TSC then
+	 * according to the spec this should set L1's TSC (as opposed to
+	 * setting L1's offset for L2).
+	 */
+	if (is_guest_mode(vcpu))
+		vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
+			l1_offset,
+			static_call(kvm_x86_get_l2_tsc_offset)(vcpu),
+			static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
+	else
+		vcpu->arch.tsc_offset = l1_offset;
+
+	static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
 }
 
 static inline bool kvm_check_tsc_unstable(void)
-- 
GitLab


From 1ab9287add5e265352d18517551abf6d01d004fd Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Mon, 7 Jun 2021 11:54:38 +0100
Subject: [PATCH 3144/3804] KVM: X86: Add vendor callbacks for writing the TSC
 multiplier

Currently vmx_vcpu_load_vmcs() writes the TSC_MULTIPLIER field of the
VMCS every time the VMCS is loaded. Instead of doing this, set this
field from common code on initialization and whenever the scaling ratio
changes.

Additionally remove vmx->current_tsc_ratio. This field is redundant as
vcpu->arch.tsc_scaling_ratio already tracks the current TSC scaling
ratio. The vmx->current_tsc_ratio field is only used for avoiding
unnecessary writes but it is no longer needed after removing the code
from the VMCS load path.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Message-Id: <20210607105438.16541-1-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  1 +
 arch/x86/kvm/svm/svm.c             |  6 ++++++
 arch/x86/kvm/vmx/nested.c          |  9 ++++-----
 arch/x86/kvm/vmx/vmx.c             | 11 ++++++-----
 arch/x86/kvm/vmx/vmx.h             |  8 --------
 arch/x86/kvm/x86.c                 | 30 +++++++++++++++++++++++-------
 7 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 026ca50ef73ee..aeb5f11367181 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -90,6 +90,7 @@ KVM_X86_OP_NULL(has_wbinvd_exit)
 KVM_X86_OP(get_l2_tsc_offset)
 KVM_X86_OP(get_l2_tsc_multiplier)
 KVM_X86_OP(write_tsc_offset)
+KVM_X86_OP(write_tsc_multiplier)
 KVM_X86_OP(get_exit_info)
 KVM_X86_OP(check_intercept)
 KVM_X86_OP(handle_exit_irqoff)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 08773980393d4..ca3b1925cffb8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1314,6 +1314,7 @@ struct kvm_x86_ops {
 	u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
 	u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
 	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+	void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
 
 	/*
 	 * Retrieve somewhat arbitrary exit information.  Intended to be used
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 623f3c4b795a4..a4d29ee9422d0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1101,6 +1101,11 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 	vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
+static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+{
+	wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
+}
+
 /* Evaluate instruction intercepts that depend on guest CPUID features. */
 static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
 					      struct vcpu_svm *svm)
@@ -4526,6 +4531,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.get_l2_tsc_offset = svm_get_l2_tsc_offset,
 	.get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier,
 	.write_tsc_offset = svm_write_tsc_offset,
+	.write_tsc_multiplier = svm_write_tsc_multiplier,
 
 	.load_mmu_pgd = svm_load_mmu_pgd,
 
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6058a65a6ede6..239154d3e4e75 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2533,9 +2533,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	}
 
 	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
-
 	if (kvm_has_tsc_control)
-		decache_tsc_multiplier(vmx);
+		vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
 
 	nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
 
@@ -4501,12 +4500,12 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
 	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
+	if (kvm_has_tsc_control)
+		vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
+
 	if (vmx->nested.l1_tpr_threshold != -1)
 		vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
 
-	if (kvm_has_tsc_control)
-		decache_tsc_multiplier(vmx);
-
 	if (vmx->nested.change_vmcs01_virtual_apic_mode) {
 		vmx->nested.change_vmcs01_virtual_apic_mode = false;
 		vmx_set_virtual_apic_mode(vcpu);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 54d08bebf9c62..092a045de8690 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1390,11 +1390,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 
 		vmx->loaded_vmcs->cpu = cpu;
 	}
-
-	/* Setup TSC multiplier */
-	if (kvm_has_tsc_control &&
-	    vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
-		decache_tsc_multiplier(vmx);
 }
 
 /*
@@ -1813,6 +1808,11 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 	vmcs_write64(TSC_OFFSET, offset);
 }
 
+static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+{
+	vmcs_write64(TSC_MULTIPLIER, multiplier);
+}
+
 /*
  * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
  * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
@@ -7707,6 +7707,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.get_l2_tsc_offset = vmx_get_l2_tsc_offset,
 	.get_l2_tsc_multiplier = vmx_get_l2_tsc_multiplier,
 	.write_tsc_offset = vmx_write_tsc_offset,
+	.write_tsc_multiplier = vmx_write_tsc_multiplier,
 
 	.load_mmu_pgd = vmx_load_mmu_pgd,
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index aa97c82e3451b..3eaa86a0ba3e3 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -322,8 +322,6 @@ struct vcpu_vmx {
 	/* apic deadline value in host tsc */
 	u64 hv_deadline_tsc;
 
-	u64 current_tsc_ratio;
-
 	unsigned long host_debugctlmsr;
 
 	/*
@@ -532,12 +530,6 @@ static inline struct vmcs *alloc_vmcs(bool shadow)
 			      GFP_KERNEL_ACCOUNT);
 }
 
-static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx)
-{
-	vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
-	vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
-}
-
 static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
 {
 	return vmx->secondary_exec_control &
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b42f6c8674e63..85b40e9191e54 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2179,14 +2179,15 @@ static u32 adjust_tsc_khz(u32 khz, s32 ppm)
 	return v;
 }
 
+static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier);
+
 static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
 {
 	u64 ratio;
 
 	/* Guest TSC same frequency as host TSC? */
 	if (!scale) {
-		vcpu->arch.l1_tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
-		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
+		kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
 		return 0;
 	}
 
@@ -2212,7 +2213,7 @@ static int set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
 		return -1;
 	}
 
-	vcpu->arch.l1_tsc_scaling_ratio = vcpu->arch.tsc_scaling_ratio = ratio;
+	kvm_vcpu_write_tsc_multiplier(vcpu, ratio);
 	return 0;
 }
 
@@ -2224,8 +2225,7 @@ static int kvm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
 	/* tsc_khz can be zero if TSC calibration fails */
 	if (user_tsc_khz == 0) {
 		/* set tsc_scaling_ratio to a safe value */
-		vcpu->arch.l1_tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
-		vcpu->arch.tsc_scaling_ratio = kvm_default_tsc_scaling_ratio;
+		kvm_vcpu_write_tsc_multiplier(vcpu, kvm_default_tsc_scaling_ratio);
 		return -1;
 	}
 
@@ -2383,6 +2383,23 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 l1_offset)
 	static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
 }
 
+static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
+{
+	vcpu->arch.l1_tsc_scaling_ratio = l1_multiplier;
+
+	/* Userspace is changing the multiplier while L2 is active */
+	if (is_guest_mode(vcpu))
+		vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
+			l1_multiplier,
+			static_call(kvm_x86_get_l2_tsc_multiplier)(vcpu));
+	else
+		vcpu->arch.tsc_scaling_ratio = l1_multiplier;
+
+	if (kvm_has_tsc_control)
+		static_call(kvm_x86_write_tsc_multiplier)(
+			vcpu, vcpu->arch.tsc_scaling_ratio);
+}
+
 static inline bool kvm_check_tsc_unstable(void)
 {
 #ifdef CONFIG_X86_64
@@ -10364,8 +10381,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	else
 		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
 
-	kvm_set_tsc_khz(vcpu, max_tsc_khz);
-
 	r = kvm_mmu_create(vcpu);
 	if (r < 0)
 		return r;
@@ -10433,6 +10448,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
 	kvm_vcpu_mtrr_init(vcpu);
 	vcpu_load(vcpu);
+	kvm_set_tsc_khz(vcpu, max_tsc_khz);
 	kvm_vcpu_reset(vcpu, false);
 	kvm_init_mmu(vcpu, false);
 	vcpu_put(vcpu);
-- 
GitLab


From d041b5ea93352b3d226352a7238a89da2dd7becb Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:17 +0100
Subject: [PATCH 3145/3804] KVM: nVMX: Enable nested TSC scaling

Calculate the TSC offset and multiplier on nested transitions and expose
the TSC scaling feature to L1.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-11-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 239154d3e4e75..e8183e2247065 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2277,7 +2277,8 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 				  SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
 				  SECONDARY_EXEC_APIC_REGISTER_VIRT |
-				  SECONDARY_EXEC_ENABLE_VMFUNC);
+				  SECONDARY_EXEC_ENABLE_VMFUNC |
+				  SECONDARY_EXEC_TSC_SCALING);
 		if (nested_cpu_has(vmcs12,
 				   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 			exec_control |= vmcs12->secondary_vm_exec_control;
@@ -2532,6 +2533,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
 	}
 
+	vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
+			vcpu->arch.l1_tsc_offset,
+			vmx_get_l2_tsc_offset(vcpu),
+			vmx_get_l2_tsc_multiplier(vcpu));
+
+	vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
+			vcpu->arch.l1_tsc_scaling_ratio,
+			vmx_get_l2_tsc_multiplier(vcpu));
+
 	vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
 	if (kvm_has_tsc_control)
 		vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
@@ -3353,8 +3363,6 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 	}
 
 	enter_guest_mode(vcpu);
-	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
-		vcpu->arch.tsc_offset += vmcs12->tsc_offset;
 
 	if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) {
 		exit_reason.basic = EXIT_REASON_INVALID_STATE;
@@ -4462,8 +4470,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 	if (nested_cpu_has_preemption_timer(vmcs12))
 		hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
 
-	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
-		vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
+	if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) {
+		vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset;
+		if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
+			vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
+	}
 
 	if (likely(!vmx->fail)) {
 		sync_vmcs02_to_vmcs12(vcpu, vmcs12);
@@ -6473,7 +6484,8 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
 		SECONDARY_EXEC_RDRAND_EXITING |
 		SECONDARY_EXEC_ENABLE_INVPCID |
 		SECONDARY_EXEC_RDSEED_EXITING |
-		SECONDARY_EXEC_XSAVES;
+		SECONDARY_EXEC_XSAVES |
+		SECONDARY_EXEC_TSC_SCALING;
 
 	/*
 	 * We can emulate "VMCS shadowing," even if the hardware
-- 
GitLab


From efe585493f914388de2382fac5ae7bd13c0555a5 Mon Sep 17 00:00:00 2001
From: Ilias Stamatis <ilstam@amazon.com>
Date: Wed, 26 May 2021 19:44:18 +0100
Subject: [PATCH 3146/3804] KVM: selftests: x86: Add
 vmx_nested_tsc_scaling_test

Test that nested TSC scaling works as expected with both L1 and L2
scaled.

Signed-off-by: Ilias Stamatis <ilstam@amazon.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210526184418.28881-12-ilstam@amazon.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../kvm/x86_64/vmx_nested_tsc_scaling_test.c  | 242 ++++++++++++++++++
 3 files changed, 244 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 524c857a049c3..db51571b8a366 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -29,6 +29,7 @@
 /x86_64/vmx_preemption_timer_test
 /x86_64/vmx_set_nested_state_test
 /x86_64/vmx_tsc_adjust_test
+/x86_64/vmx_nested_tsc_scaling_test
 /x86_64/xapic_ipi_test
 /x86_64/xen_shinfo_test
 /x86_64/xen_vmcall_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index daaee1888b128..6d241c97a890b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -60,6 +60,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
 TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
 TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
 TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
new file mode 100644
index 0000000000000..280c01fd24126
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+
+#define VCPU_ID 0
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec)         ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+	uint64_t tolerance, thresh_low, thresh_high;
+
+	tolerance = expected / 100;
+	thresh_low = expected - tolerance;
+	thresh_high = expected + tolerance;
+
+	TEST_ASSERT(thresh_low < actual,
+		"TSC freq is expected to be between %"PRIu64" and %"PRIu64
+		" but it actually is %"PRIu64,
+		thresh_low, thresh_high, actual);
+	TEST_ASSERT(thresh_high > actual,
+		"TSC freq is expected to be between %"PRIu64" and %"PRIu64
+		" but it actually is %"PRIu64,
+		thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+	uint64_t tsc_start, tsc_end, tsc_freq;
+
+	/*
+	 * Reading the TSC twice with about a second's difference should give
+	 * us an approximation of the TSC frequency from the guest's
+	 * perspective. Now, this won't be completely accurate, but it should
+	 * be good enough for the purposes of this test.
+	 */
+	tsc_start = rdmsr(MSR_IA32_TSC);
+	GUEST_SLEEP(1);
+	tsc_end = rdmsr(MSR_IA32_TSC);
+
+	tsc_freq = tsc_end - tsc_start;
+
+	GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+	check_tsc_freq(UCHECK_L2);
+
+	/* exit to L1 */
+	__asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	uint32_t control;
+
+	/* check that L1's frequency looks alright before launching L2 */
+	check_tsc_freq(UCHECK_L1);
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(load_vmcs(vmx_pages));
+
+	/* prepare the VMCS for L2 execution */
+	prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/* enable TSC offsetting and TSC scaling for L2 */
+	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+	control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+	control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+	control |= SECONDARY_EXEC_TSC_SCALING;
+	vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+	vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+	vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+	vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+	/* launch L2 */
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	/* check that L1's frequency still looks good */
+	check_tsc_freq(UCHECK_L1);
+
+	GUEST_DONE();
+}
+
+static void tsc_scaling_check_supported(void)
+{
+	if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) {
+		print_skip("TSC scaling not supported by the HW");
+		exit(KSFT_SKIP);
+	}
+}
+
+static void stable_tsc_check_supported(void)
+{
+	FILE *fp;
+	char buf[4];
+
+	fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
+	if (fp == NULL)
+		goto skip_test;
+
+	if (fgets(buf, sizeof(buf), fp) == NULL)
+		goto skip_test;
+
+	if (strncmp(buf, "tsc", sizeof(buf)))
+		goto skip_test;
+
+	return;
+skip_test:
+	print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable");
+	exit(KSFT_SKIP);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	vm_vaddr_t vmx_pages_gva;
+
+	uint64_t tsc_start, tsc_end;
+	uint64_t tsc_khz;
+	uint64_t l1_scale_factor;
+	uint64_t l0_tsc_freq = 0;
+	uint64_t l1_tsc_freq = 0;
+	uint64_t l2_tsc_freq = 0;
+
+	nested_vmx_check_supported();
+	tsc_scaling_check_supported();
+	stable_tsc_check_supported();
+
+	/*
+	 * We set L1's scale factor to be a random number from 2 to 10.
+	 * Ideally we would do the same for L2's factor but that one is
+	 * referenced by both main() and l1_guest_code() and using a global
+	 * variable does not work.
+	 */
+	srand(time(NULL));
+	l1_scale_factor = (rand() % 9) + 2;
+	printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+	printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+	tsc_start = rdtsc();
+	sleep(1);
+	tsc_end = rdtsc();
+
+	l0_tsc_freq = tsc_end - tsc_start;
+	printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+	vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+	tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL);
+	TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+	/* scale down L1's TSC frequency */
+	vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ,
+		  (void *) (tsc_khz / l1_scale_factor));
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		struct ucall uc;
+
+		vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_ABORT:
+			TEST_FAIL("%s", (const char *) uc.args[0]);
+		case UCALL_SYNC:
+			switch (uc.args[0]) {
+			case USLEEP:
+				sleep(uc.args[1]);
+				break;
+			case UCHECK_L1:
+				l1_tsc_freq = uc.args[1];
+				printf("L1's TSC frequency is around: %"PRIu64
+				       "\n", l1_tsc_freq);
+
+				compare_tsc_freq(l1_tsc_freq,
+						 l0_tsc_freq / l1_scale_factor);
+				break;
+			case UCHECK_L2:
+				l2_tsc_freq = uc.args[1];
+				printf("L2's TSC frequency is around: %"PRIu64
+				       "\n", l2_tsc_freq);
+
+				compare_tsc_freq(l2_tsc_freq,
+						 l1_tsc_freq * L2_SCALE_FACTOR);
+				break;
+			}
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
-- 
GitLab


From d82ee2819517eefd6f42465ccf3e3e621bbf4080 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 4 Jun 2021 10:26:00 -0700
Subject: [PATCH 3147/3804] KVM: x86: Remove guest mode check from
 kvm_check_nested_events

A survey of the callsites reveals that they all ensure the vCPU is in
guest mode before calling kvm_check_nested_events. Remove this dead
code so that the only negative value this function returns (at the
moment) is -EBUSY.

Signed-off-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210604172611.281819-2-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 85b40e9191e54..211d2dccb441b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8625,9 +8625,6 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 
 int kvm_check_nested_events(struct kvm_vcpu *vcpu)
 {
-	if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
-		return -EIO;
-
 	if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
 		kvm_x86_ops.nested_ops->triple_fault(vcpu);
 		return 1;
-- 
GitLab


From 650293c3de6b042c4a2e87b2bc678efcff3843e8 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 4 Jun 2021 10:26:02 -0700
Subject: [PATCH 3148/3804] KVM: nVMX: Add a return code to
 vmx_complete_nested_posted_interrupt

No functional change intended.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Message-Id: <20210604172611.281819-4-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e8183e2247065..73f63ad063662 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3689,7 +3689,7 @@ void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
 	}
 }
 
-static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
+static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int max_irr;
@@ -3697,17 +3697,17 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 	u16 status;
 
 	if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
-		return;
+		return 0;
 
 	vmx->nested.pi_pending = false;
 	if (!pi_test_and_clear_on(vmx->nested.pi_desc))
-		return;
+		return 0;
 
 	max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
 	if (max_irr != 256) {
 		vapic_page = vmx->nested.virtual_apic_map.hva;
 		if (!vapic_page)
-			return;
+			return 0;
 
 		__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
 			vapic_page, &max_irr);
@@ -3720,6 +3720,7 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 	}
 
 	nested_mark_vmcs12_pages_dirty(vcpu);
+	return 0;
 }
 
 static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
@@ -3894,8 +3895,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
 	}
 
 no_vmexit:
-	vmx_complete_nested_posted_interrupt(vcpu);
-	return 0;
+	return vmx_complete_nested_posted_interrupt(vcpu);
 }
 
 static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
-- 
GitLab


From a5f6909a71f9223b7d7da71974bae226f94d9d68 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 4 Jun 2021 10:26:03 -0700
Subject: [PATCH 3149/3804] KVM: x86: Add a return code to inject_pending_event

No functional change intended. At present, 'r' will always be -EBUSY
on a control transfer to the 'out' label.

Signed-off-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210604172611.281819-5-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 211d2dccb441b..1ae827f0d9542 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8640,7 +8640,7 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
 	static_call(kvm_x86_queue_exception)(vcpu);
 }
 
-static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
+static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
 {
 	int r;
 	bool can_inject = true;
@@ -8687,7 +8687,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
 	if (is_guest_mode(vcpu)) {
 		r = kvm_check_nested_events(vcpu);
 		if (r < 0)
-			goto busy;
+			goto out;
 	}
 
 	/* try to inject new event if pending */
@@ -8729,7 +8729,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
 	if (vcpu->arch.smi_pending) {
 		r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
 		if (r < 0)
-			goto busy;
+			goto out;
 		if (r) {
 			vcpu->arch.smi_pending = false;
 			++vcpu->arch.smi_count;
@@ -8742,7 +8742,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
 	if (vcpu->arch.nmi_pending) {
 		r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
 		if (r < 0)
-			goto busy;
+			goto out;
 		if (r) {
 			--vcpu->arch.nmi_pending;
 			vcpu->arch.nmi_injected = true;
@@ -8757,7 +8757,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
 	if (kvm_cpu_has_injectable_intr(vcpu)) {
 		r = can_inject ? static_call(kvm_x86_interrupt_allowed)(vcpu, true) : -EBUSY;
 		if (r < 0)
-			goto busy;
+			goto out;
 		if (r) {
 			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
 			static_call(kvm_x86_set_irq)(vcpu);
@@ -8773,11 +8773,14 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
 		*req_immediate_exit = true;
 
 	WARN_ON(vcpu->arch.exception.pending);
-	return;
+	return 0;
 
-busy:
-	*req_immediate_exit = true;
-	return;
+out:
+	if (r == -EBUSY) {
+		*req_immediate_exit = true;
+		r = 0;
+	}
+	return r;
 }
 
 static void process_nmi(struct kvm_vcpu *vcpu)
@@ -9338,7 +9341,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			goto out;
 		}
 
-		inject_pending_event(vcpu, &req_immediate_exit);
+		r = inject_pending_event(vcpu, &req_immediate_exit);
+		if (r < 0) {
+			r = 0;
+			goto out;
+		}
 		if (req_int_win)
 			static_call(kvm_x86_enable_irq_window)(vcpu);
 
-- 
GitLab


From 4fe09bcf14a666b8fa4d79ce1b4c87afa753f827 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 4 Jun 2021 10:26:04 -0700
Subject: [PATCH 3150/3804] KVM: x86: Add a return code to
 kvm_apic_accept_events

No functional change intended. At present, the only negative value
returned by kvm_check_nested_events is -EBUSY.

Signed-off-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210604172611.281819-6-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 11 ++++++-----
 arch/x86/kvm/lapic.h |  2 +-
 arch/x86/kvm/x86.c   | 25 ++++++++++++++++++++-----
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 17fa4ab1b8344..4b80e613096be 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2872,7 +2872,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
 	return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
 }
 
-void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
+int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u8 sipi_vector;
@@ -2880,7 +2880,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 	unsigned long pe;
 
 	if (!lapic_in_kernel(vcpu))
-		return;
+		return 0;
 
 	/*
 	 * Read pending events before calling the check_events
@@ -2888,12 +2888,12 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 	 */
 	pe = smp_load_acquire(&apic->pending_events);
 	if (!pe)
-		return;
+		return 0;
 
 	if (is_guest_mode(vcpu)) {
 		r = kvm_check_nested_events(vcpu);
 		if (r < 0)
-			return;
+			return r == -EBUSY ? 0 : r;
 		/*
 		 * If an event has happened and caused a vmexit,
 		 * we know INITs are latched and therefore
@@ -2914,7 +2914,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 		WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
 		if (test_bit(KVM_APIC_SIPI, &pe))
 			clear_bit(KVM_APIC_SIPI, &apic->pending_events);
-		return;
+		return 0;
 	}
 
 	if (test_bit(KVM_APIC_INIT, &pe)) {
@@ -2935,6 +2935,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		}
 	}
+	return 0;
 }
 
 void kvm_lapic_exit(void)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 997c45a5963af..d7c25d0c13549 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -76,7 +76,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
-void kvm_apic_accept_events(struct kvm_vcpu *vcpu);
+int kvm_apic_accept_events(struct kvm_vcpu *vcpu);
 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event);
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1ae827f0d9542..d1fdbaa6e1a9c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9335,7 +9335,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
 	    kvm_xen_has_interrupt(vcpu)) {
 		++vcpu->stat.req_event;
-		kvm_apic_accept_events(vcpu);
+		r = kvm_apic_accept_events(vcpu);
+		if (r < 0) {
+			r = 0;
+			goto out;
+		}
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 			r = 1;
 			goto out;
@@ -9547,7 +9551,8 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
 			return 1;
 	}
 
-	kvm_apic_accept_events(vcpu);
+	if (kvm_apic_accept_events(vcpu) < 0)
+		return 0;
 	switch(vcpu->arch.mp_state) {
 	case KVM_MP_STATE_HALTED:
 	case KVM_MP_STATE_AP_RESET_HOLD:
@@ -9771,7 +9776,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 			goto out;
 		}
 		kvm_vcpu_block(vcpu);
-		kvm_apic_accept_events(vcpu);
+		if (kvm_apic_accept_events(vcpu) < 0) {
+			r = 0;
+			goto out;
+		}
 		kvm_clear_request(KVM_REQ_UNHALT, vcpu);
 		r = -EAGAIN;
 		if (signal_pending(current)) {
@@ -9973,11 +9981,17 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 				    struct kvm_mp_state *mp_state)
 {
+	int r;
+
 	vcpu_load(vcpu);
 	if (kvm_mpx_supported())
 		kvm_load_guest_fpu(vcpu);
 
-	kvm_apic_accept_events(vcpu);
+	r = kvm_apic_accept_events(vcpu);
+	if (r < 0)
+		goto out;
+	r = 0;
+
 	if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED ||
 	     vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) &&
 	    vcpu->arch.pv.pv_unhalted)
@@ -9985,10 +9999,11 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
 	else
 		mp_state->mp_state = vcpu->arch.mp_state;
 
+out:
 	if (kvm_mpx_supported())
 		kvm_put_guest_fpu(vcpu);
 	vcpu_put(vcpu);
-	return 0;
+	return r;
 }
 
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
-- 
GitLab


From 0fe998b295a37234392072c23e22b8bba4774d0f Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 4 Jun 2021 10:26:05 -0700
Subject: [PATCH 3151/3804] KVM: nVMX: Fail on MMIO completion for nested
 posted interrupts

When the kernel has no mapping for the vmcs02 virtual APIC page,
userspace MMIO completion is necessary to process nested posted
interrupts. This is not a configuration that KVM supports. Rather than
silently ignoring the problem, try to exit to userspace with
KVM_INTERNAL_ERROR.

Note that the event that triggers this error is consumed as a
side-effect of a call to kvm_check_nested_events. On some paths
(notably through kvm_vcpu_check_block), the error is dropped. In any
case, this is an incremental improvement over always ignoring the
error.

Signed-off-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210604172611.281819-7-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 73f63ad063662..4e545996440bb 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3707,7 +3707,7 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 	if (max_irr != 256) {
 		vapic_page = vmx->nested.virtual_apic_map.hva;
 		if (!vapic_page)
-			return 0;
+			goto mmio_needed;
 
 		__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
 			vapic_page, &max_irr);
@@ -3721,6 +3721,10 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 
 	nested_mark_vmcs12_pages_dirty(vcpu);
 	return 0;
+
+mmio_needed:
+	kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
+	return -ENXIO;
 }
 
 static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
-- 
GitLab


From 966eefb8965798478c2a6de3aa35ec180323792d Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 4 Jun 2021 10:26:06 -0700
Subject: [PATCH 3152/3804] KVM: nVMX: Disable vmcs02 posted interrupts if
 vmcs12 PID isn't mappable

Don't allow posted interrupts to modify a stale posted interrupt
descriptor (including the initial value of 0).

Empirical tests on real hardware reveal that a posted interrupt
descriptor referencing an unbacked address has PCI bus error semantics
(reads as all 1's; writes are ignored). However, kvm can't distinguish
unbacked addresses from device-backed (MMIO) addresses, so it should
really ask userspace for an MMIO completion. That's overly
complicated, so just punt with KVM_INTERNAL_ERROR.

Don't return the error until the posted interrupt descriptor is
actually accessed. We don't want to break the existing kvm-unit-tests
that assume they can launch an L2 VM with a posted interrupt
descriptor that references MMIO space in L1.

Fixes: 6beb7bd52e48 ("kvm: nVMX: Refactor nested_get_vmcs12_pages()")
Signed-off-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210604172611.281819-8-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 4e545996440bb..98b5f5f104da7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3184,6 +3184,15 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 				offset_in_page(vmcs12->posted_intr_desc_addr));
 			vmcs_write64(POSTED_INTR_DESC_ADDR,
 				     pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
+		} else {
+			/*
+			 * Defer the KVM_INTERNAL_EXIT until KVM tries to
+			 * access the contents of the VMCS12 posted interrupt
+			 * descriptor. (Note that KVM may do this when it
+			 * should not, per the architectural specification.)
+			 */
+			vmx->nested.pi_desc = NULL;
+			pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR);
 		}
 	}
 	if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
@@ -3696,10 +3705,14 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
 	void *vapic_page;
 	u16 status;
 
-	if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
+	if (!vmx->nested.pi_pending)
 		return 0;
 
+	if (!vmx->nested.pi_desc)
+		goto mmio_needed;
+
 	vmx->nested.pi_pending = false;
+
 	if (!pi_test_and_clear_on(vmx->nested.pi_desc))
 		return 0;
 
-- 
GitLab


From 150a282d43b89c054f88ec248cb2a294b3ab0a4d Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 4 Jun 2021 10:26:07 -0700
Subject: [PATCH 3153/3804] KVM: selftests: Move APIC definitions into a
 separate file

Processor.h is a hodgepodge of definitions. Though the local APIC is
technically built into the CPU these days, move the APIC definitions
into a new header file: apic.h.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Message-Id: <20210604172611.281819-9-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/apic.h       | 58 +++++++++++++++++++
 .../selftests/kvm/include/x86_64/processor.h  | 47 ---------------
 .../selftests/kvm/include/x86_64/vmx.h        |  1 +
 3 files changed, 59 insertions(+), 47 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/include/x86_64/apic.h

diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
new file mode 100644
index 0000000000000..0d0e35c8866b0
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/apic.h
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_APIC_H
+#define SELFTEST_KVM_APIC_H
+
+#define APIC_DEFAULT_GPA		0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE		0x0000001b
+#define MSR_IA32_APICBASE_BSP		(1<<8)
+#define MSR_IA32_APICBASE_EXTD		(1<<10)
+#define MSR_IA32_APICBASE_ENABLE	(1<<11)
+#define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
+#define		GET_APIC_BASE(x)	(((x) >> 12) << 12)
+
+#define APIC_BASE_MSR	0x800
+#define X2APIC_ENABLE	(1UL << 10)
+#define	APIC_ID		0x20
+#define	APIC_LVR	0x30
+#define		GET_APIC_ID_FIELD(x)	(((x) >> 24) & 0xFF)
+#define	APIC_TASKPRI	0x80
+#define	APIC_PROCPRI	0xA0
+#define	APIC_EOI	0xB0
+#define	APIC_SPIV	0xF0
+#define		APIC_SPIV_FOCUS_DISABLED	(1 << 9)
+#define		APIC_SPIV_APIC_ENABLED		(1 << 8)
+#define	APIC_ICR	0x300
+#define		APIC_DEST_SELF		0x40000
+#define		APIC_DEST_ALLINC	0x80000
+#define		APIC_DEST_ALLBUT	0xC0000
+#define		APIC_ICR_RR_MASK	0x30000
+#define		APIC_ICR_RR_INVALID	0x00000
+#define		APIC_ICR_RR_INPROG	0x10000
+#define		APIC_ICR_RR_VALID	0x20000
+#define		APIC_INT_LEVELTRIG	0x08000
+#define		APIC_INT_ASSERT		0x04000
+#define		APIC_ICR_BUSY		0x01000
+#define		APIC_DEST_LOGICAL	0x00800
+#define		APIC_DEST_PHYSICAL	0x00000
+#define		APIC_DM_FIXED		0x00000
+#define		APIC_DM_FIXED_MASK	0x00700
+#define		APIC_DM_LOWEST		0x00100
+#define		APIC_DM_SMI		0x00200
+#define		APIC_DM_REMRD		0x00300
+#define		APIC_DM_NMI		0x00400
+#define		APIC_DM_INIT		0x00500
+#define		APIC_DM_STARTUP		0x00600
+#define		APIC_DM_EXTINT		0x00700
+#define		APIC_VECTOR_MASK	0x000FF
+#define	APIC_ICR2	0x310
+#define		SET_APIC_DEST_FIELD(x)	((x) << 24)
+
+#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 0b30b4e15c386..a4729d9032ced 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -425,53 +425,6 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
 #define X86_CR0_CD          (1UL<<30) /* Cache Disable */
 #define X86_CR0_PG          (1UL<<31) /* Paging */
 
-#define APIC_DEFAULT_GPA		0xfee00000ULL
-
-/* APIC base address MSR and fields */
-#define MSR_IA32_APICBASE		0x0000001b
-#define MSR_IA32_APICBASE_BSP		(1<<8)
-#define MSR_IA32_APICBASE_EXTD		(1<<10)
-#define MSR_IA32_APICBASE_ENABLE	(1<<11)
-#define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
-#define		GET_APIC_BASE(x)	(((x) >> 12) << 12)
-
-#define APIC_BASE_MSR	0x800
-#define X2APIC_ENABLE	(1UL << 10)
-#define	APIC_ID		0x20
-#define	APIC_LVR	0x30
-#define		GET_APIC_ID_FIELD(x)	(((x) >> 24) & 0xFF)
-#define	APIC_TASKPRI	0x80
-#define	APIC_PROCPRI	0xA0
-#define	APIC_EOI	0xB0
-#define	APIC_SPIV	0xF0
-#define		APIC_SPIV_FOCUS_DISABLED	(1 << 9)
-#define		APIC_SPIV_APIC_ENABLED		(1 << 8)
-#define	APIC_ICR	0x300
-#define		APIC_DEST_SELF		0x40000
-#define		APIC_DEST_ALLINC	0x80000
-#define		APIC_DEST_ALLBUT	0xC0000
-#define		APIC_ICR_RR_MASK	0x30000
-#define		APIC_ICR_RR_INVALID	0x00000
-#define		APIC_ICR_RR_INPROG	0x10000
-#define		APIC_ICR_RR_VALID	0x20000
-#define		APIC_INT_LEVELTRIG	0x08000
-#define		APIC_INT_ASSERT		0x04000
-#define		APIC_ICR_BUSY		0x01000
-#define		APIC_DEST_LOGICAL	0x00800
-#define		APIC_DEST_PHYSICAL	0x00000
-#define		APIC_DM_FIXED		0x00000
-#define		APIC_DM_FIXED_MASK	0x00700
-#define		APIC_DM_LOWEST		0x00100
-#define		APIC_DM_SMI		0x00200
-#define		APIC_DM_REMRD		0x00300
-#define		APIC_DM_NMI		0x00400
-#define		APIC_DM_INIT		0x00500
-#define		APIC_DM_STARTUP		0x00600
-#define		APIC_DM_EXTINT		0x00700
-#define		APIC_VECTOR_MASK	0x000FF
-#define	APIC_ICR2	0x310
-#define		SET_APIC_DEST_FIELD(x)	((x) << 24)
-
 /* VMX_EPT_VPID_CAP bits */
 #define VMX_EPT_VPID_CAP_AD_BITS       (1ULL << 21)
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 65eb1079a161e..516c81d863537 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -10,6 +10,7 @@
 
 #include <stdint.h>
 #include "processor.h"
+#include "apic.h"
 
 /*
  * Definitions of Primary Processor-Based VM-Execution Controls.
-- 
GitLab


From 4c63c923408595eede59ce9fef6f4ab868928549 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 4 Jun 2021 10:26:08 -0700
Subject: [PATCH 3154/3804] KVM: selftests: Hoist APIC functions out of
 individual tests

Move the APIC functions into the library to encourage code reuse and
to avoid unintended deviations.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Message-Id: <20210604172611.281819-10-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile          |  2 +-
 .../selftests/kvm/include/x86_64/apic.h       | 23 ++++++++
 .../selftests/kvm/include/x86_64/processor.h  |  2 +
 tools/testing/selftests/kvm/lib/x86_64/apic.c | 46 +++++++++++++++
 .../testing/selftests/kvm/x86_64/evmcs_test.c | 11 +---
 .../selftests/kvm/x86_64/set_boot_cpu_id.c    |  6 +-
 .../selftests/kvm/x86_64/xapic_ipi_test.c     | 59 +++----------------
 7 files changed, 83 insertions(+), 66 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/lib/x86_64/apic.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 6d241c97a890b..1c750910c27b1 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -34,7 +34,7 @@ ifeq ($(ARCH),s390)
 endif
 
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
+LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
 LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
index 0d0e35c8866b0..e5a9fe040a6c8 100644
--- a/tools/testing/selftests/kvm/include/x86_64/apic.h
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -8,6 +8,10 @@
 #ifndef SELFTEST_KVM_APIC_H
 #define SELFTEST_KVM_APIC_H
 
+#include <stdint.h>
+
+#include "processor.h"
+
 #define APIC_DEFAULT_GPA		0xfee00000ULL
 
 /* APIC base address MSR and fields */
@@ -55,4 +59,23 @@
 #define	APIC_ICR2	0x310
 #define		SET_APIC_DEST_FIELD(x)	((x) << 24)
 
+void apic_disable(void);
+void xapic_enable(void);
+void x2apic_enable(void);
+
+static inline uint32_t get_bsp_flag(void)
+{
+	return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static inline uint32_t xapic_read_reg(unsigned int reg)
+{
+	return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+}
+
+static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+{
+	((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+}
+
 #endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index a4729d9032ced..9a5b47d2d5d63 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -13,6 +13,8 @@
 
 #include <asm/msr-index.h>
 
+#include "../kvm_util.h"
+
 #define X86_EFLAGS_FIXED	 (1u << 1)
 
 #define X86_CR4_VME		(1ul << 0)
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
new file mode 100644
index 0000000000000..31f318ac67bae
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/x86_64/processor.c
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+	wrmsr(MSR_IA32_APICBASE,
+	      rdmsr(MSR_IA32_APICBASE) &
+		~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+	uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+	/* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+	if (val & MSR_IA32_APICBASE_EXTD) {
+		apic_disable();
+		wrmsr(MSR_IA32_APICBASE,
+		      rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+	} else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+		wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+	}
+
+	/*
+	 * Per SDM: reset value of spurious interrupt vector register has the
+	 * APIC software enabled bit=0. It must be enabled in addition to the
+	 * enable bit in the MSR.
+	 */
+	val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+	xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+	uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
+
+	wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+	      MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+	wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 63096cea26c61..d058d9e428c63 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -22,15 +22,6 @@
 
 static int ud_count;
 
-void enable_x2apic(void)
-{
-	uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
-
-	wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
-	      MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
-	wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
-}
-
 static void guest_ud_handler(struct ex_regs *regs)
 {
 	ud_count++;
@@ -59,7 +50,7 @@ void guest_code(struct vmx_pages *vmx_pages)
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
-	enable_x2apic();
+	x2apic_enable();
 
 	GUEST_SYNC(1);
 	GUEST_SYNC(2);
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
index 12c558fc8074a..5f8dd74d415f8 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -14,16 +14,12 @@
 #include "test_util.h"
 #include "kvm_util.h"
 #include "processor.h"
+#include "apic.h"
 
 #define N_VCPU 2
 #define VCPU_ID0 0
 #define VCPU_ID1 1
 
-static uint32_t get_bsp_flag(void)
-{
-	return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
-}
-
 static void guest_bsp_vcpu(void *arg)
 {
 	GUEST_SYNC(1);
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 2f964cdc273c9..21b22718a9dbd 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -42,8 +42,6 @@
 #define HALTER_VCPU_ID 0
 #define SENDER_VCPU_ID 1
 
-volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA;
-
 /*
  * Vector for IPI from sender vCPU to halting vCPU.
  * Value is arbitrary and was chosen for the alternating bit pattern. Any
@@ -86,45 +84,6 @@ struct thread_params {
 	uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
 };
 
-uint32_t read_apic_reg(uint reg)
-{
-	return apic_base[reg >> 2];
-}
-
-void write_apic_reg(uint reg, uint32_t val)
-{
-	apic_base[reg >> 2] = val;
-}
-
-void disable_apic(void)
-{
-	wrmsr(MSR_IA32_APICBASE,
-	      rdmsr(MSR_IA32_APICBASE) &
-		~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
-}
-
-void enable_xapic(void)
-{
-	uint64_t val = rdmsr(MSR_IA32_APICBASE);
-
-	/* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
-	if (val & MSR_IA32_APICBASE_EXTD) {
-		disable_apic();
-		wrmsr(MSR_IA32_APICBASE,
-		      rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
-	} else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
-		wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
-	}
-
-	/*
-	 * Per SDM: reset value of spurious interrupt vector register has the
-	 * APIC software enabled bit=0. It must be enabled in addition to the
-	 * enable bit in the MSR.
-	 */
-	val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
-	write_apic_reg(APIC_SPIV, val);
-}
-
 void verify_apic_base_addr(void)
 {
 	uint64_t msr = rdmsr(MSR_IA32_APICBASE);
@@ -136,10 +95,10 @@ void verify_apic_base_addr(void)
 static void halter_guest_code(struct test_data_page *data)
 {
 	verify_apic_base_addr();
-	enable_xapic();
+	xapic_enable();
 
-	data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID));
-	data->halter_lvr = read_apic_reg(APIC_LVR);
+	data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+	data->halter_lvr = xapic_read_reg(APIC_LVR);
 
 	/*
 	 * Loop forever HLTing and recording halts & wakes. Disable interrupts
@@ -150,8 +109,8 @@ static void halter_guest_code(struct test_data_page *data)
 	 * TPR and PPR for diagnostic purposes in case the test fails.
 	 */
 	for (;;) {
-		data->halter_tpr = read_apic_reg(APIC_TASKPRI);
-		data->halter_ppr = read_apic_reg(APIC_PROCPRI);
+		data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+		data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
 		data->hlt_count++;
 		asm volatile("sti; hlt; cli");
 		data->wake_count++;
@@ -166,7 +125,7 @@ static void halter_guest_code(struct test_data_page *data)
 static void guest_ipi_handler(struct ex_regs *regs)
 {
 	ipis_rcvd++;
-	write_apic_reg(APIC_EOI, 77);
+	xapic_write_reg(APIC_EOI, 77);
 }
 
 static void sender_guest_code(struct test_data_page *data)
@@ -179,7 +138,7 @@ static void sender_guest_code(struct test_data_page *data)
 	uint64_t tsc_start;
 
 	verify_apic_base_addr();
-	enable_xapic();
+	xapic_enable();
 
 	/*
 	 * Init interrupt command register for sending IPIs
@@ -206,8 +165,8 @@ static void sender_guest_code(struct test_data_page *data)
 		 * First IPI can be sent unconditionally because halter vCPU
 		 * starts earlier.
 		 */
-		write_apic_reg(APIC_ICR2, icr2_val);
-		write_apic_reg(APIC_ICR, icr_val);
+		xapic_write_reg(APIC_ICR2, icr2_val);
+		xapic_write_reg(APIC_ICR, icr_val);
 		data->ipis_sent++;
 
 		/*
-- 
GitLab


From 768d134d8cb4cb595966d8c509a9329a075a5fa2 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 4 Jun 2021 10:26:09 -0700
Subject: [PATCH 3155/3804] KVM: selftests: Introduce x2APIC register
 manipulation functions

Standardize reads and writes of the x2APIC MSRs.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Message-Id: <20210604172611.281819-11-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/apic.h | 10 ++++++++++
 tools/testing/selftests/kvm/lib/x86_64/apic.c     |  5 ++---
 tools/testing/selftests/kvm/x86_64/smm_test.c     |  4 ++--
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
index e5a9fe040a6c8..0be4757f1f201 100644
--- a/tools/testing/selftests/kvm/include/x86_64/apic.h
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -78,4 +78,14 @@ static inline void xapic_write_reg(unsigned int reg, uint32_t val)
 	((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
 }
 
+static inline uint64_t x2apic_read_reg(unsigned int reg)
+{
+	return rdmsr(APIC_BASE_MSR + (reg >> 4));
+}
+
+static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+{
+	wrmsr(APIC_BASE_MSR + (reg >> 4), value);
+}
+
 #endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
index 31f318ac67bae..7168e25c194e1 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/apic.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -38,9 +38,8 @@ void xapic_enable(void)
 
 void x2apic_enable(void)
 {
-	uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
-
 	wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
 	      MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
-	wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
+	x2apic_write_reg(APIC_SPIV,
+			 x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
 }
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 613c42c5a9b8d..c1f831803ad2d 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -55,8 +55,8 @@ static inline void sync_with_host(uint64_t phase)
 
 void self_smi(void)
 {
-	wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
-	      APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+	x2apic_write_reg(APIC_ICR,
+			 APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
 }
 
 void guest_code(void *arg)
-- 
GitLab


From 2fdef3a2ae01dfd928c4b42c5a3b76546170a74c Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Sun, 6 Jun 2021 11:10:44 +0900
Subject: [PATCH 3156/3804] kvm: add PM-notifier

Add KVM PM-notifier so that architectures can have arch-specific
VM suspend/resume routines. Such architectures need to select
CONFIG_HAVE_KVM_PM_NOTIFIER and implement kvm_arch_pm_notifier().

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Marc Zyngier <maz@kernel.org>
Message-Id: <20210606021045.14159-1-senozhatsky@chromium.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  9 +++++++++
 virt/kvm/Kconfig         |  3 +++
 virt/kvm/kvm_main.c      | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 11b9b11a5e9b0..37cbb56ccd09c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -28,6 +28,7 @@
 #include <linux/rcuwait.h>
 #include <linux/refcount.h>
 #include <linux/nospec.h>
+#include <linux/notifier.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -594,6 +595,10 @@ struct kvm {
 	pid_t userspace_pid;
 	unsigned int max_halt_poll_ns;
 	u32 dirty_ring_size;
+
+#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+	struct notifier_block pm_notifier;
+#endif
 };
 
 #define kvm_err(fmt, ...) \
@@ -1007,6 +1012,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state);
+#endif
+
 #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
 void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
 #endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 1c37ccd5d402a..62b39149b8c82 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -63,3 +63,6 @@ config HAVE_KVM_NO_POLL
 
 config KVM_XFER_TO_GUEST_WORK
        bool
+
+config HAVE_KVM_PM_NOTIFIER
+       bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fa7e7ebefc796..fc35ba0ea5d3b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -51,6 +51,7 @@
 #include <linux/io.h>
 #include <linux/lockdep.h>
 #include <linux/kthread.h>
+#include <linux/suspend.h>
 
 #include <asm/processor.h>
 #include <asm/ioctl.h>
@@ -780,6 +781,38 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
 
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
 
+#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+static int kvm_pm_notifier_call(struct notifier_block *bl,
+				unsigned long state,
+				void *unused)
+{
+	struct kvm *kvm = container_of(bl, struct kvm, pm_notifier);
+
+	return kvm_arch_pm_notifier(kvm, state);
+}
+
+static void kvm_init_pm_notifier(struct kvm *kvm)
+{
+	kvm->pm_notifier.notifier_call = kvm_pm_notifier_call;
+	/* Suspend KVM before we suspend ftrace, RCU, etc. */
+	kvm->pm_notifier.priority = INT_MAX;
+	register_pm_notifier(&kvm->pm_notifier);
+}
+
+static void kvm_destroy_pm_notifier(struct kvm *kvm)
+{
+	unregister_pm_notifier(&kvm->pm_notifier);
+}
+#else /* !CONFIG_HAVE_KVM_PM_NOTIFIER */
+static void kvm_init_pm_notifier(struct kvm *kvm)
+{
+}
+
+static void kvm_destroy_pm_notifier(struct kvm *kvm)
+{
+}
+#endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */
+
 static struct kvm_memslots *kvm_alloc_memslots(void)
 {
 	int i;
@@ -964,6 +997,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_unlock(&kvm_lock);
 
 	preempt_notifier_inc();
+	kvm_init_pm_notifier(kvm);
 
 	return kvm;
 
@@ -1011,6 +1045,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	int i;
 	struct mm_struct *mm = kvm->mm;
 
+	kvm_destroy_pm_notifier(kvm);
 	kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
 	kvm_destroy_vm_debugfs(kvm);
 	kvm_arch_sync_events(kvm);
-- 
GitLab


From 7d62874f69d7e5c1c1063a5848075bd1adff3998 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Sun, 6 Jun 2021 11:10:45 +0900
Subject: [PATCH 3157/3804] kvm: x86: implement KVM PM-notifier

Implement PM hibernation/suspend prepare notifiers so that KVM
can reliably set PVCLOCK_GUEST_STOPPED on VCPUs and properly
suspend VMs.

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Message-Id: <20210606021045.14159-2-senozhatsky@chromium.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Kconfig |  1 +
 arch/x86/kvm/x86.c   | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index f6b93a35ce145..7a78b88c0f1a9 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -46,6 +46,7 @@ config KVM
 	select KVM_GENERIC_DIRTYLOG_READ_PROTECT
 	select KVM_VFIO
 	select SRCU
+	select HAVE_KVM_PM_NOTIFIER if PM
 	help
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d1fdbaa6e1a9c..3c5a33ab10c04 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -58,6 +58,7 @@
 #include <linux/sched/isolation.h>
 #include <linux/mem_encrypt.h>
 #include <linux/entry-kvm.h>
+#include <linux/suspend.h>
 
 #include <trace/events/kvm.h>
 
@@ -5701,6 +5702,41 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
 	return 0;
 }
 
+#ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
+static int kvm_arch_suspend_notifier(struct kvm *kvm)
+{
+	struct kvm_vcpu *vcpu;
+	int i, ret = 0;
+
+	mutex_lock(&kvm->lock);
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!vcpu->arch.pv_time_enabled)
+			continue;
+
+		ret = kvm_set_guest_paused(vcpu);
+		if (ret) {
+			kvm_err("Failed to pause guest VCPU%d: %d\n",
+				vcpu->vcpu_id, ret);
+			break;
+		}
+	}
+	mutex_unlock(&kvm->lock);
+
+	return ret ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
+int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
+{
+	switch (state) {
+	case PM_HIBERNATION_PREPARE:
+	case PM_SUSPEND_PREPARE:
+		return kvm_arch_suspend_notifier(kvm);
+	}
+
+	return NOTIFY_DONE;
+}
+#endif /* CONFIG_HAVE_KVM_PM_NOTIFIER */
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
-- 
GitLab


From fdf513e37a3bd9f498179c878cfcd59693bf507c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 9 Jun 2021 17:09:08 +0200
Subject: [PATCH 3158/3804] KVM: x86: Use common 'enable_apicv' variable for
 both APICv and AVIC

Unify VMX and SVM code by moving APICv/AVIC enablement tracking to common
'enable_apicv' variable. Note: unlike APICv, AVIC is disabled by default.

No functional change intended.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210609150911.1471882-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm/avic.c         | 14 +++++---------
 arch/x86/kvm/svm/svm.c          | 23 ++++++++++++++---------
 arch/x86/kvm/svm/svm.h          |  2 --
 arch/x86/kvm/vmx/capabilities.h |  1 -
 arch/x86/kvm/vmx/vmx.c          |  1 -
 arch/x86/kvm/x86.c              |  3 +++
 7 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ca3b1925cffb8..7f53e5fba7358 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1432,6 +1432,7 @@ struct kvm_arch_async_pf {
 extern u32 __read_mostly kvm_nr_uret_msrs;
 extern u64 __read_mostly host_efer;
 extern bool __read_mostly allow_smaller_maxphyaddr;
+extern bool __read_mostly enable_apicv;
 extern struct kvm_x86_ops kvm_x86_ops;
 
 #define KVM_X86_OP(func) \
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 5e7e920113f39..a9abed054cd5c 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -27,10 +27,6 @@
 #include "irq.h"
 #include "svm.h"
 
-/* enable / disable AVIC */
-bool avic;
-module_param(avic, bool, S_IRUGO);
-
 #define SVM_AVIC_DOORBELL	0xc001011b
 
 #define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
@@ -124,7 +120,7 @@ void avic_vm_destroy(struct kvm *kvm)
 	unsigned long flags;
 	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 
-	if (!avic)
+	if (!enable_apicv)
 		return;
 
 	if (kvm_svm->avic_logical_id_table_page)
@@ -147,7 +143,7 @@ int avic_vm_init(struct kvm *kvm)
 	struct page *l_page;
 	u32 vm_id;
 
-	if (!avic)
+	if (!enable_apicv)
 		return 0;
 
 	/* Allocating physical APIC ID table (4KB) */
@@ -569,7 +565,7 @@ int avic_init_vcpu(struct vcpu_svm *svm)
 	int ret;
 	struct kvm_vcpu *vcpu = &svm->vcpu;
 
-	if (!avic || !irqchip_in_kernel(vcpu->kvm))
+	if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
 		return 0;
 
 	ret = avic_init_backing_page(vcpu);
@@ -593,7 +589,7 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu)
 
 void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
 {
-	if (!avic || !lapic_in_kernel(vcpu))
+	if (!enable_apicv || !lapic_in_kernel(vcpu))
 		return;
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -653,7 +649,7 @@ void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 	struct vmcb *vmcb = svm->vmcb;
 	bool activated = kvm_vcpu_apicv_active(vcpu);
 
-	if (!avic)
+	if (!enable_apicv)
 		return;
 
 	if (activated) {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index a4d29ee9422d0..00ea8dc1bc9c4 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -185,6 +185,13 @@ module_param(vls, int, 0444);
 static int vgif = true;
 module_param(vgif, int, 0444);
 
+/*
+ * enable / disable AVIC.  Because the defaults differ for APICv
+ * support between VMX and SVM we cannot use module_param_named.
+ */
+static bool avic;
+module_param(avic, bool, 0444);
+
 bool __read_mostly dump_invalid_vmcb;
 module_param(dump_invalid_vmcb, bool, 0644);
 
@@ -1009,14 +1016,12 @@ static __init int svm_hardware_setup(void)
 			nrips = false;
 	}
 
-	if (avic) {
-		if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)) {
-			avic = false;
-		} else {
-			pr_info("AVIC enabled\n");
+	enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC);
 
-			amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
-		}
+	if (enable_apicv) {
+		pr_info("AVIC enabled\n");
+
+		amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
 	}
 
 	if (vls) {
@@ -4431,13 +4436,13 @@ static int svm_vm_init(struct kvm *kvm)
 	if (!pause_filter_count || !pause_filter_thresh)
 		kvm->arch.pause_in_guest = true;
 
-	if (avic) {
+	if (enable_apicv) {
 		int ret = avic_vm_init(kvm);
 		if (ret)
 			return ret;
 	}
 
-	kvm_apicv_init(kvm, avic);
+	kvm_apicv_init(kvm, enable_apicv);
 	return 0;
 }
 
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 70419e417c0d9..a514b490db4a1 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -479,8 +479,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
 
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 
-extern bool avic;
-
 static inline void avic_update_vapic_bar(struct vcpu_svm *svm, u64 data)
 {
 	svm->vmcb->control.avic_vapic_bar = data & VMCB_AVIC_APIC_BAR_MASK;
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index aa0e7872fcc9f..4705ad55abb56 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -12,7 +12,6 @@ extern bool __read_mostly enable_ept;
 extern bool __read_mostly enable_unrestricted_guest;
 extern bool __read_mostly enable_ept_ad_bits;
 extern bool __read_mostly enable_pml;
-extern bool __read_mostly enable_apicv;
 extern int __read_mostly pt_mode;
 
 #define PT_MODE_SYSTEM		0
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 092a045de8690..981361d095edf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -101,7 +101,6 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 static bool __read_mostly fasteoi = 1;
 module_param(fasteoi, bool, S_IRUGO);
 
-bool __read_mostly enable_apicv = 1;
 module_param(enable_apicv, bool, S_IRUGO);
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3c5a33ab10c04..8324313f12b53 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -210,6 +210,9 @@ EXPORT_SYMBOL_GPL(host_efer);
 bool __read_mostly allow_smaller_maxphyaddr = 0;
 EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
 
+bool __read_mostly enable_apicv = true;
+EXPORT_SYMBOL_GPL(enable_apicv);
+
 u64 __read_mostly host_xss;
 EXPORT_SYMBOL_GPL(host_xss);
 u64 __read_mostly supported_xss;
-- 
GitLab


From 4651fc56bad01d340844c5fbf1e1f817639208ab Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 9 Jun 2021 17:09:09 +0200
Subject: [PATCH 3159/3804] KVM: x86: Drop vendor specific functions for
 APICv/AVIC enablement

Now that APICv/AVIC enablement is kept in common 'enable_apicv' variable,
there's no need to call kvm_apicv_init() from vendor specific code.

No functional change intended.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210609150911.1471882-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 arch/x86/kvm/svm/svm.c          | 1 -
 arch/x86/kvm/vmx/vmx.c          | 1 -
 arch/x86/kvm/x86.c              | 6 +++---
 4 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7f53e5fba7358..ced3e3b94b777 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1672,7 +1672,6 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
 				struct x86_exception *exception);
 
 bool kvm_apicv_activated(struct kvm *kvm);
-void kvm_apicv_init(struct kvm *kvm, bool enable);
 void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
 void kvm_request_apicv_update(struct kvm *kvm, bool activate,
 			      unsigned long bit);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 00ea8dc1bc9c4..1e2c635d308c9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4442,7 +4442,6 @@ static int svm_vm_init(struct kvm *kvm)
 			return ret;
 	}
 
-	kvm_apicv_init(kvm, enable_apicv);
 	return 0;
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 981361d095edf..76586ce9cf769 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7004,7 +7004,6 @@ static int vmx_vm_init(struct kvm *kvm)
 			break;
 		}
 	}
-	kvm_apicv_init(kvm, enable_apicv);
 	return 0;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8324313f12b53..ec11ce280fdc5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8471,16 +8471,15 @@ bool kvm_apicv_activated(struct kvm *kvm)
 }
 EXPORT_SYMBOL_GPL(kvm_apicv_activated);
 
-void kvm_apicv_init(struct kvm *kvm, bool enable)
+static void kvm_apicv_init(struct kvm *kvm)
 {
-	if (enable)
+	if (enable_apicv)
 		clear_bit(APICV_INHIBIT_REASON_DISABLE,
 			  &kvm->arch.apicv_inhibit_reasons);
 	else
 		set_bit(APICV_INHIBIT_REASON_DISABLE,
 			&kvm->arch.apicv_inhibit_reasons);
 }
-EXPORT_SYMBOL_GPL(kvm_apicv_init);
 
 static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
 {
@@ -10885,6 +10884,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
 	INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
 
+	kvm_apicv_init(kvm);
 	kvm_hv_init_vm(kvm);
 	kvm_page_track_init(kvm);
 	kvm_mmu_init_vm(kvm);
-- 
GitLab


From 25b17226cd9a77982fc8c915d4118d7238a0f079 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 11:56:12 -0700
Subject: [PATCH 3160/3804] KVM: x86: Emulate triple fault shutdown if RSM
 emulation fails

Use the recently introduced KVM_REQ_TRIPLE_FAULT to properly emulate
shutdown if RSM from SMM fails.

Note, entering shutdown after clearing the SMM flag and restoring NMI
blocking is architecturally correct with respect to AMD's APM, which KVM
also uses for SMRAM layout and RSM NMI blocking behavior.  The APM says:

  An RSM causes a processor shutdown if an invalid-state condition is
  found in the SMRAM state-save area. Only an external reset, external
  processor-initialization, or non-maskable external interrupt (NMI) can
  cause the processor to leave the shutdown state.

Of note is processor-initialization (INIT) as a valid shutdown wake
event, as INIT is blocked by SMM, implying that entering shutdown also
forces the CPU out of SMM.

For recent Intel CPUs, restoring NMI blocking is technically wrong, but
so is restoring NMI blocking in the first place, and Intel's RSM
"architecture" is such a mess that just about anything is allowed and can
be justified as micro-architectural behavior.

Per the SDM:

  On Pentium 4 and later processors, shutdown will inhibit INTR and A20M
  but will not change any of the other inhibits. On these processors,
  NMIs will be inhibited if no action is taken in the SMI handler to
  uninhibit them (see Section 34.8).

where Section 34.8 says:

  When the processor enters SMM while executing an NMI handler, the
  processor saves the SMRAM state save map but does not save the
  attribute to keep NMI interrupts disabled. Potentially, an NMI could be
  latched (while in SMM or upon exit) and serviced upon exit of SMM even
  though the previous NMI handler has still not completed.

I.e. RSM unconditionally unblocks NMI, but shutdown on RSM does not,
which is in direct contradiction of KVM's behavior.  But, as mentioned
above, KVM follows AMD architecture and restores NMI blocking on RSM, so
that micro-architectural detail is already lost.

And for Pentium era CPUs, SMI# can break shutdown, meaning that at least
some Intel CPUs fully leave SMM when entering shutdown:

  In the shutdown state, Intel processors stop executing instructions
  until a RESET#, INIT# or NMI# is asserted.  While Pentium family
  processors recognize the SMI# signal in shutdown state, P6 family and
  Intel486 processors do not.

In other words, the fact that Intel CPUs have implemented the two
extremes gives KVM carte blanche when it comes to honoring Intel's
architecture for handling shutdown during RSM.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609185619.992058-3-seanjc@google.com>
[Return X86EMUL_CONTINUE after triple fault. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c     | 12 +++++++-----
 arch/x86/kvm/kvm_emulate.h |  1 +
 arch/x86/kvm/x86.c         |  6 ++++++
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 10e16a70b361c..63f9ca1c0ce06 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2580,7 +2580,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	 * state-save area.
 	 */
 	if (ctxt->ops->pre_leave_smm(ctxt, buf))
-		return X86EMUL_UNHANDLEABLE;
+		goto emulate_shutdown;
 
 #ifdef CONFIG_X86_64
 	if (emulator_has_longmode(ctxt))
@@ -2589,14 +2589,16 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 #endif
 		ret = rsm_load_state_32(ctxt, buf);
 
-	if (ret != X86EMUL_CONTINUE) {
-		/* FIXME: should triple fault */
-		return X86EMUL_UNHANDLEABLE;
-	}
+	if (ret != X86EMUL_CONTINUE)
+		goto emulate_shutdown;
 
 	ctxt->ops->post_leave_smm(ctxt);
 
 	return X86EMUL_CONTINUE;
+
+emulate_shutdown:
+	ctxt->ops->triple_fault(ctxt);
+	return X86EMUL_CONTINUE;
 }
 
 static void
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index b063d376b7d92..357cfd1ccafd7 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -234,6 +234,7 @@ struct x86_emulate_ops {
 	int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
 			     const char *smstate);
 	void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
+	void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
 	int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
 };
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ec11ce280fdc5..7bd1ddfec5221 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7233,6 +7233,11 @@ static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
 	kvm_smm_changed(emul_to_vcpu(ctxt));
 }
 
+static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
+{
+	kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
+}
+
 static int emulator_set_xcr(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr)
 {
 	return __kvm_set_xcr(emul_to_vcpu(ctxt), index, xcr);
@@ -7282,6 +7287,7 @@ static const struct x86_emulate_ops emulate_ops = {
 	.set_hflags          = emulator_set_hflags,
 	.pre_leave_smm       = emulator_pre_leave_smm,
 	.post_leave_smm      = emulator_post_leave_smm,
+	.triple_fault        = emulator_triple_fault,
 	.set_xcr             = emulator_set_xcr,
 };
 
-- 
GitLab


From edce46548b70b8637694d96122447662ff35af0c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 11:56:13 -0700
Subject: [PATCH 3161/3804] KVM: x86: Replace .set_hflags() with dedicated
 .exiting_smm() helper

Replace the .set_hflags() emulator hook with a dedicated .exiting_smm(),
moving the SMM and SMM_INSIDE_NMI flag handling out of the emulator in
the process.  This is a step towards consolidating much of the logic in
kvm_smm_changed(), including the SMM hflags updates.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609185619.992058-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c     | 3 +--
 arch/x86/kvm/kvm_emulate.h | 2 +-
 arch/x86/kvm/x86.c         | 6 +++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 63f9ca1c0ce06..4996eec7aa79f 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2535,8 +2535,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
 		ctxt->ops->set_nmi_mask(ctxt, false);
 
-	ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
-		~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
+	ctxt->ops->exiting_smm(ctxt);
 
 	/*
 	 * Get back to real mode, to prepare a safe state in which to load
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 357cfd1ccafd7..298bb0da7b973 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -230,7 +230,7 @@ struct x86_emulate_ops {
 	void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
 
 	unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
-	void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
+	void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
 	int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
 			     const char *smstate);
 	void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7bd1ddfec5221..15a9859b60464 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7214,11 +7214,11 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
 	return emul_to_vcpu(ctxt)->arch.hflags;
 }
 
-static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
+static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 
-	vcpu->arch.hflags = emul_flags;
+	vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
 	kvm_mmu_reset_context(vcpu);
 }
 
@@ -7284,7 +7284,7 @@ static const struct x86_emulate_ops emulate_ops = {
 	.guest_has_fxsr      = emulator_guest_has_fxsr,
 	.set_nmi_mask        = emulator_set_nmi_mask,
 	.get_hflags          = emulator_get_hflags,
-	.set_hflags          = emulator_set_hflags,
+	.exiting_smm         = emulator_exiting_smm,
 	.pre_leave_smm       = emulator_pre_leave_smm,
 	.post_leave_smm      = emulator_post_leave_smm,
 	.triple_fault        = emulator_triple_fault,
-- 
GitLab


From fa75e08bbe4f8ea609f61bbb6c04b3bb2b38c793 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 11:56:14 -0700
Subject: [PATCH 3162/3804] KVM: x86: Invoke kvm_smm_changed() immediately
 after clearing SMM flag

Move RSM emulation's call to kvm_smm_changed() from .post_leave_smm() to
.exiting_smm(), leaving behind the MMU context reset.  The primary
motivation is to allow for future cleanup, but this also fixes a bug of
sorts by queueing KVM_REQ_EVENT even if RSM causes shutdown, e.g. to let
an INIT wake the vCPU from shutdown.  Of course, KVM doesn't properly
emulate a shutdown state, e.g. KVM doesn't block SMIs after shutdown, and
immediately exits to userspace, so the event request is a moot point in
practice.

Moving kvm_smm_changed() also moves the RSM tracepoint.  This isn't
strictly necessary, but will allow consolidating the SMI and RSM
tracepoints in a future commit (by also moving the SMI tracepoint).
Invoking the tracepoint before loading SMRAM state also means the SMBASE
that reported in the tracepoint will point that the state that will be
used for RSM, as opposed to the SMBASE _after_ RSM completes, which is
arguably a good thing if the tracepoint is being used to debug a RSM/SMM
issue.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609185619.992058-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15a9859b60464..774f2e7bedae6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7219,7 +7219,7 @@ static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 
 	vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
-	kvm_mmu_reset_context(vcpu);
+	kvm_smm_changed(vcpu);
 }
 
 static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
@@ -7230,7 +7230,7 @@ static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
 
 static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
 {
-	kvm_smm_changed(emul_to_vcpu(ctxt));
+	kvm_mmu_reset_context(emul_to_vcpu(ctxt));
 }
 
 static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
-- 
GitLab


From dc87275f47332be922d4eb299595523cc3a97479 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 11:56:15 -0700
Subject: [PATCH 3163/3804] KVM: x86: Move (most) SMM hflags modifications into
 kvm_smm_changed()

Move the core of SMM hflags modifications into kvm_smm_changed() and use
kvm_smm_changed() in enter_smm().  Clear HF_SMM_INSIDE_NMI_MASK for
leaving SMM but do not set it for entering SMM.  If the vCPU is executing
outside of SMM, the flag should unequivocally be cleared, e.g. this
technically fixes a benign bug where the flag could be left set after
KVM_SET_VCPU_EVENTS, but the reverse is not true as NMI blocking depends
on pre-SMM state or userspace input.

Note, this adds an extra kvm_mmu_reset_context() to enter_smm().  The
extra/early reset isn't strictly necessary, and in a way can never be
necessary since the vCPU/MMU context is in a half-baked state until the
final context reset at the end of the function.  But, enter_smm() is not
a hot path, and exploding on an invalid root_hpa is probably better than
having a stale SMM flag in the MMU role; it's at least no worse.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609185619.992058-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 774f2e7bedae6..57efc3a49753b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4532,7 +4532,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 	memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
-static void kvm_smm_changed(struct kvm_vcpu *vcpu);
+static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
 
 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 					      struct kvm_vcpu_events *events)
@@ -4592,13 +4592,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 		vcpu->arch.apic->sipi_vector = events->sipi_vector;
 
 	if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
-		if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
-			if (events->smi.smm)
-				vcpu->arch.hflags |= HF_SMM_MASK;
-			else
-				vcpu->arch.hflags &= ~HF_SMM_MASK;
-			kvm_smm_changed(vcpu);
-		}
+		if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm)
+			kvm_smm_changed(vcpu, events->smi.smm);
 
 		vcpu->arch.smi_pending = events->smi.pending;
 
@@ -7218,8 +7213,7 @@ static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 
-	vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
-	kvm_smm_changed(vcpu);
+	kvm_smm_changed(vcpu, false);
 }
 
 static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
@@ -7548,9 +7542,13 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
 static int complete_emulated_pio(struct kvm_vcpu *vcpu);
 
-static void kvm_smm_changed(struct kvm_vcpu *vcpu)
+static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
 {
-	if (!(vcpu->arch.hflags & HF_SMM_MASK)) {
+	if (entering_smm) {
+		vcpu->arch.hflags |= HF_SMM_MASK;
+	} else {
+		vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
+
 		/* This is a good place to trace that we are exiting SMM.  */
 		trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
 
@@ -9022,7 +9020,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 	 */
 	static_call(kvm_x86_pre_enter_smm)(vcpu, buf);
 
-	vcpu->arch.hflags |= HF_SMM_MASK;
+	kvm_smm_changed(vcpu, true);
 	kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
 
 	if (static_call(kvm_x86_get_nmi_mask)(vcpu))
-- 
GitLab


From 0d7ee6f4b58dc6aca54df285cec027727c976892 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 11:56:16 -0700
Subject: [PATCH 3164/3804] KVM: x86: Move "entering SMM" tracepoint into
 kvm_smm_changed()

Invoke the "entering SMM" tracepoint from kvm_smm_changed() instead of
enter_smm(), effectively moving it from before reading vCPU state to
after reading state (but still before writing it to SMRAM!).  The primary
motivation is to consolidate code, but calling the tracepoint from
kvm_smm_changed() also makes its invocation consistent with respect to
SMI and RSM, and with respect to KVM_SET_VCPU_EVENTS (which previously
only invoked the tracepoint when forcing the vCPU out of SMM).

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609185619.992058-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 57efc3a49753b..389f634a40839 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7544,14 +7544,13 @@ static int complete_emulated_pio(struct kvm_vcpu *vcpu);
 
 static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
 {
+	trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
+
 	if (entering_smm) {
 		vcpu->arch.hflags |= HF_SMM_MASK;
 	} else {
 		vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
 
-		/* This is a good place to trace that we are exiting SMM.  */
-		trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, false);
-
 		/* Process a latched INIT or SMI, if any.  */
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
 	}
@@ -9004,7 +9003,6 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 	char buf[512];
 	u32 cr0;
 
-	trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, true);
 	memset(buf, 0, 512);
 #ifdef CONFIG_X86_64
 	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
-- 
GitLab


From 1270e647c802b427c8114816b0f35b961600f104 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 11:56:17 -0700
Subject: [PATCH 3165/3804] KVM: x86: Rename SMM tracepoint to make it reflect
 reality

Rename the SMM tracepoint, which handles both entering and exiting SMM,
from kvm_enter_smm to kvm_smm_transition.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609185619.992058-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/trace.h | 2 +-
 arch/x86/kvm/x86.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4f839148948bc..b484141ea15bb 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -997,7 +997,7 @@ TRACE_EVENT(kvm_wait_lapic_expire,
 		  __entry->delta < 0 ? "early" : "late")
 );
 
-TRACE_EVENT(kvm_enter_smm,
+TRACE_EVENT(kvm_smm_transition,
 	TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering),
 	TP_ARGS(vcpu_id, smbase, entering),
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 389f634a40839..1017d398e72de 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7544,7 +7544,7 @@ static int complete_emulated_pio(struct kvm_vcpu *vcpu);
 
 static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
 {
-	trace_kvm_enter_smm(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
+	trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
 
 	if (entering_smm) {
 		vcpu->arch.hflags |= HF_SMM_MASK;
-- 
GitLab


From 0128116550acf52043a0aa5cca3caa85e3853aca Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 11:56:18 -0700
Subject: [PATCH 3166/3804] KVM: x86: Drop .post_leave_smm(), i.e. the manual
 post-RSM MMU reset

Drop the .post_leave_smm() emulator callback, which at this point is just
a wrapper to kvm_mmu_reset_context().  The manual context reset is
unnecessary, because unlike enter_smm() which calls vendor MSR/CR helpers
directly, em_rsm() bounces through the KVM helpers, e.g. kvm_set_cr4(),
which are responsible for processing side effects.  em_rsm() is already
subtly relying on this behavior as it doesn't manually do
kvm_update_cpuid_runtime(), e.g. to recognize CR4.OSXSAVE changes.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609185619.992058-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c     | 10 ++++++++--
 arch/x86/kvm/kvm_emulate.h |  1 -
 arch/x86/kvm/x86.c         |  6 ------
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4996eec7aa79f..83520a9f171ee 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2591,8 +2591,14 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	if (ret != X86EMUL_CONTINUE)
 		goto emulate_shutdown;
 
-	ctxt->ops->post_leave_smm(ctxt);
-
+	/*
+	 * Note, the ctxt->ops callbacks are responsible for handling side
+	 * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
+	 * runtime updates, etc...  If that changes, e.g. this flow is moved
+	 * out of the emulator to make it look more like enter_smm(), then
+	 * those side effects need to be explicitly handled for both success
+	 * and shutdown.
+	 */
 	return X86EMUL_CONTINUE;
 
 emulate_shutdown:
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 298bb0da7b973..3ee701b0ef103 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -233,7 +233,6 @@ struct x86_emulate_ops {
 	void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
 	int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
 			     const char *smstate);
-	void (*post_leave_smm)(struct x86_emulate_ctxt *ctxt);
 	void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
 	int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1017d398e72de..9a268728399eb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7222,11 +7222,6 @@ static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
 	return static_call(kvm_x86_pre_leave_smm)(emul_to_vcpu(ctxt), smstate);
 }
 
-static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
-{
-	kvm_mmu_reset_context(emul_to_vcpu(ctxt));
-}
-
 static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
 {
 	kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
@@ -7280,7 +7275,6 @@ static const struct x86_emulate_ops emulate_ops = {
 	.get_hflags          = emulator_get_hflags,
 	.exiting_smm         = emulator_exiting_smm,
 	.pre_leave_smm       = emulator_pre_leave_smm,
-	.post_leave_smm      = emulator_post_leave_smm,
 	.triple_fault        = emulator_triple_fault,
 	.set_xcr             = emulator_set_xcr,
 };
-- 
GitLab


From ecc513e5bb7ed5d007dcaa533729360e9eb673ba Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 11:56:19 -0700
Subject: [PATCH 3167/3804] KVM: x86: Drop "pre_" from enter/leave_smm()
 helpers

Now that .post_leave_smm() is gone, drop "pre_" from the remaining
helpers.  The helpers aren't invoked purely before SMI/RSM processing,
e.g. both helpers are invoked after state is snapshotted (from regs or
SMRAM), and the RSM helper is invoked after some amount of register state
has been stuffed.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609185619.992058-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  4 ++--
 arch/x86/include/asm/kvm_host.h    |  4 ++--
 arch/x86/kvm/emulate.c             |  6 +++---
 arch/x86/kvm/kvm_emulate.h         |  3 +--
 arch/x86/kvm/svm/svm.c             |  8 ++++----
 arch/x86/kvm/vmx/vmx.c             |  8 ++++----
 arch/x86/kvm/x86.c                 | 14 +++++++-------
 7 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index aeb5f11367181..a12a4987154ee 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -109,8 +109,8 @@ KVM_X86_OP_NULL(set_hv_timer)
 KVM_X86_OP_NULL(cancel_hv_timer)
 KVM_X86_OP(setup_mce)
 KVM_X86_OP(smi_allowed)
-KVM_X86_OP(pre_enter_smm)
-KVM_X86_OP(pre_leave_smm)
+KVM_X86_OP(enter_smm)
+KVM_X86_OP(leave_smm)
 KVM_X86_OP(enable_smi_window)
 KVM_X86_OP_NULL(mem_enc_op)
 KVM_X86_OP_NULL(mem_enc_reg_region)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ced3e3b94b777..921de30c23c53 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1372,8 +1372,8 @@ struct kvm_x86_ops {
 	void (*setup_mce)(struct kvm_vcpu *vcpu);
 
 	int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
-	int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
-	int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
+	int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
+	int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
 	void (*enable_smi_window)(struct kvm_vcpu *vcpu);
 
 	int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 83520a9f171ee..2837110e66eda 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2574,11 +2574,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 	}
 
 	/*
-	 * Give pre_leave_smm() a chance to make ISA-specific changes to the
-	 * vCPU state (e.g. enter guest mode) before loading state from the SMM
+	 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
+	 * state (e.g. enter guest mode) before loading state from the SMM
 	 * state-save area.
 	 */
-	if (ctxt->ops->pre_leave_smm(ctxt, buf))
+	if (ctxt->ops->leave_smm(ctxt, buf))
 		goto emulate_shutdown;
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 3ee701b0ef103..68b420289d7ed 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -231,8 +231,7 @@ struct x86_emulate_ops {
 
 	unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
 	void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
-	int (*pre_leave_smm)(struct x86_emulate_ctxt *ctxt,
-			     const char *smstate);
+	int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate);
 	void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
 	int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
 };
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1e2c635d308c9..e7bec71a3d9b0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4258,7 +4258,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 	return !svm_smi_blocked(vcpu);
 }
 
-static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	int ret;
@@ -4280,7 +4280,7 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 	return 0;
 }
 
-static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct kvm_host_map map;
@@ -4555,8 +4555,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.setup_mce = svm_setup_mce,
 
 	.smi_allowed = svm_smi_allowed,
-	.pre_enter_smm = svm_pre_enter_smm,
-	.pre_leave_smm = svm_pre_leave_smm,
+	.enter_smm = svm_enter_smm,
+	.leave_smm = svm_leave_smm,
 	.enable_smi_window = svm_enable_smi_window,
 
 	.mem_enc_op = svm_mem_enc_op,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 76586ce9cf769..51bbde75b1fd4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7544,7 +7544,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 	return !is_smm(vcpu);
 }
 
-static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -7558,7 +7558,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 	return 0;
 }
 
-static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int ret;
@@ -7736,8 +7736,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 	.setup_mce = vmx_setup_mce,
 
 	.smi_allowed = vmx_smi_allowed,
-	.pre_enter_smm = vmx_pre_enter_smm,
-	.pre_leave_smm = vmx_pre_leave_smm,
+	.enter_smm = vmx_enter_smm,
+	.leave_smm = vmx_leave_smm,
 	.enable_smi_window = vmx_enable_smi_window,
 
 	.can_emulate_instruction = vmx_can_emulate_instruction,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9a268728399eb..8d88e4513294c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7216,10 +7216,10 @@ static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
 	kvm_smm_changed(vcpu, false);
 }
 
-static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
+static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
 				  const char *smstate)
 {
-	return static_call(kvm_x86_pre_leave_smm)(emul_to_vcpu(ctxt), smstate);
+	return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
 }
 
 static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
@@ -7274,7 +7274,7 @@ static const struct x86_emulate_ops emulate_ops = {
 	.set_nmi_mask        = emulator_set_nmi_mask,
 	.get_hflags          = emulator_get_hflags,
 	.exiting_smm         = emulator_exiting_smm,
-	.pre_leave_smm       = emulator_pre_leave_smm,
+	.leave_smm           = emulator_leave_smm,
 	.triple_fault        = emulator_triple_fault,
 	.set_xcr             = emulator_set_xcr,
 };
@@ -9006,11 +9006,11 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 		enter_smm_save_state_32(vcpu, buf);
 
 	/*
-	 * Give pre_enter_smm() a chance to make ISA-specific changes to the
-	 * vCPU state (e.g. leave guest mode) after we've saved the state into
-	 * the SMM state-save area.
+	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
+	 * state (e.g. leave guest mode) after we've saved the state into the
+	 * SMM state-save area.
 	 */
-	static_call(kvm_x86_pre_enter_smm)(vcpu, buf);
+	static_call(kvm_x86_enter_smm)(vcpu, buf);
 
 	kvm_smm_changed(vcpu, true);
 	kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
-- 
GitLab


From b93af02c6722fde384ed2e921b71b61b9addb740 Mon Sep 17 00:00:00 2001
From: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Date: Wed, 9 Jun 2021 14:03:38 -0400
Subject: [PATCH 3168/3804] KVM: nVMX: nSVM: 'nested_run' should count
 guest-entry attempts that make it to guest code

Currently, the 'nested_run' statistic counts all guest-entry attempts,
including those that fail during vmentry checks on Intel and during
consistency checks on AMD. Convert this statistic to count only those
guest-entries that make it past these state checks and make it to guest
code. This will tell us the number of guest-entries that actually executed
or tried to execute guest code.

Signed-off-by: Krish Sadhukhan <Krish.Sadhukhan@oracle.com>
Message-Id: <20210609180340.104248-2-krish.sadhukhan@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c |  2 --
 arch/x86/kvm/svm/svm.c    |  6 ++++++
 arch/x86/kvm/vmx/nested.c |  2 --
 arch/x86/kvm/vmx/vmx.c    | 13 ++++++++++++-
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 5e8d8443154e8..34fc74b0d58a3 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -596,8 +596,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 	struct kvm_host_map map;
 	u64 vmcb12_gpa;
 
-	++vcpu->stat.nested_run;
-
 	if (is_smm(vcpu)) {
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 1;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e7bec71a3d9b0..d223f5dfac533 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3844,6 +3844,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 	svm->next_rip = 0;
 	if (is_guest_mode(vcpu)) {
 		nested_sync_control_from_vmcb02(svm);
+
+		/* Track VMRUNs that have made past consistency checking */
+		if (svm->nested.nested_run_pending &&
+		    svm->vmcb->control.exit_code != SVM_EXIT_ERR)
+                        ++vcpu->stat.nested_run;
+
 		svm->nested.nested_run_pending = 0;
 	}
 
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 98b5f5f104da7..e77b8ee28df87 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3470,8 +3470,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
 	enum nested_evmptrld_status evmptrld_status;
 
-	++vcpu->stat.nested_run;
-
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 51bbde75b1fd4..5aa0e54c793b2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6809,7 +6809,18 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	kvm_load_host_xsave_state(vcpu);
 
-	vmx->nested.nested_run_pending = 0;
+	if (is_guest_mode(vcpu)) {
+		/*
+		 * Track VMLAUNCH/VMRESUME that have made past guest state
+		 * checking.
+		 */
+		if (vmx->nested.nested_run_pending &&
+		    !vmx->exit_reason.failed_vmentry)
+			++vcpu->stat.nested_run;
+
+		vmx->nested.nested_run_pending = 0;
+	}
+
 	vmx->idt_vectoring_info = 0;
 
 	if (unlikely(vmx->fail)) {
-- 
GitLab


From d5a0483f9f3250fe359224327ca1b4a29d106981 Mon Sep 17 00:00:00 2001
From: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Date: Wed, 9 Jun 2021 14:03:39 -0400
Subject: [PATCH 3169/3804] KVM: nVMX: nSVM: Add a new VCPU statistic to show
 if VCPU is in guest mode

Add the following per-VCPU statistic to KVM debugfs to show if a given
VCPU is in guest mode:

	guest_mode

Also add this as a per-VM statistic to KVM debugfs to show the total number
of VCPUs that are in guest mode in a given VM.

Signed-off-by: Krish Sadhukhan <Krish.Sadhukhan@oracle.com>
Message-Id: <20210609180340.104248-3-krish.sadhukhan@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/debugfs.c          | 11 +++++++++++
 arch/x86/kvm/kvm_cache_regs.h   |  3 +++
 arch/x86/kvm/x86.c              |  1 +
 4 files changed, 16 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 921de30c23c53..bea7290ef1735 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1180,6 +1180,7 @@ struct kvm_vcpu_stat {
 	u64 nested_run;
 	u64 directed_yield_attempted;
 	u64 directed_yield_successful;
+	u64 guest_mode;
 };
 
 struct x86_instruction_info;
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 7e818d64bb4d7..95a98413dc326 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -17,6 +17,15 @@ static int vcpu_get_timer_advance_ns(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_timer_advance_ns_fops, vcpu_get_timer_advance_ns, NULL, "%llu\n");
 
+static int vcpu_get_guest_mode(void *data, u64 *val)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
+	*val = vcpu->stat.guest_mode;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_guest_mode_fops, vcpu_get_guest_mode, NULL, "%lld\n");
+
 static int vcpu_get_tsc_offset(void *data, u64 *val)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
@@ -45,6 +54,8 @@ DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bi
 
 void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
 {
+	debugfs_create_file("guest_mode", 0444, debugfs_dentry, vcpu,
+			    &vcpu_guest_mode_fops);
 	debugfs_create_file("tsc-offset", 0444, debugfs_dentry, vcpu,
 			    &vcpu_tsc_offset_fops);
 
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 3db5c42c9ecde..ebddbd37a0bf5 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -162,6 +162,7 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
 static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.hflags |= HF_GUEST_MASK;
+	vcpu->stat.guest_mode = 1;
 }
 
 static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
@@ -172,6 +173,8 @@ static inline void leave_guest_mode(struct kvm_vcpu *vcpu)
 		vcpu->arch.load_eoi_exitmap_pending = false;
 		kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu);
 	}
+
+	vcpu->stat.guest_mode = 0;
 }
 
 static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8d88e4513294c..0e2dbc7fdb976 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -250,6 +250,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("nested_run", nested_run),
 	VCPU_STAT("directed_yield_attempted", directed_yield_attempted),
 	VCPU_STAT("directed_yield_successful", directed_yield_successful),
+	VCPU_STAT("guest_mode", guest_mode),
 	VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
 	VM_STAT("mmu_pte_write", mmu_pte_write),
 	VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
-- 
GitLab


From a6c776a952175e0fad22110e8d43019f3ac6f9af Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <viremana@linux.microsoft.com>
Date: Thu, 3 Jun 2021 15:14:34 +0000
Subject: [PATCH 3170/3804] hyperv: Detect Nested virtualization support for
 SVM

Previously, to detect nested virtualization enlightenment support,
we were using HV_X64_ENLIGHTENED_VMCS_RECOMMENDED feature bit of
HYPERV_CPUID_ENLIGHTMENT_INFO.EAX CPUID as docuemented in TLFS:
 "Bit 14: Recommend a nested hypervisor using the enlightened VMCS
  interface. Also indicates that additional nested enlightenments
  may be available (see leaf 0x4000000A)".

Enlightened VMCS, however, is an Intel only feature so the above
detection method doesn't work for AMD. So, use the
HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS.EAX CPUID information ("The
maximum input value for hypervisor CPUID information.") and this
works for both AMD and Intel.

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Message-Id: <43b25ff21cd2d9a51582033c9bdd895afefac056.1622730232.git.viremana@linux.microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/cpu/mshyperv.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 22f13343b5da8..c268c27300481 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -252,6 +252,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
 
 static void __init ms_hyperv_init_platform(void)
 {
+	int hv_max_functions_eax;
 	int hv_host_info_eax;
 	int hv_host_info_ebx;
 	int hv_host_info_ecx;
@@ -269,6 +270,8 @@ static void __init ms_hyperv_init_platform(void)
 	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
 	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
+	hv_max_functions_eax = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS);
+
 	pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n",
 		ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
 		ms_hyperv.misc_features);
@@ -298,8 +301,7 @@ static void __init ms_hyperv_init_platform(void)
 	/*
 	 * Extract host information.
 	 */
-	if (cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS) >=
-	    HYPERV_CPUID_VERSION) {
+	if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) {
 		hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION);
 		hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION);
 		hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION);
@@ -325,9 +327,11 @@ static void __init ms_hyperv_init_platform(void)
 			ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b);
 	}
 
-	if (ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED) {
+	if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) {
 		ms_hyperv.nested_features =
 			cpuid_eax(HYPERV_CPUID_NESTED_FEATURES);
+		pr_info("Hyper-V: Nested features: 0x%x\n",
+			ms_hyperv.nested_features);
 	}
 
 	/*
-- 
GitLab


From 32431fb2538df56693a5852a50013549c827f57c Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <viremana@linux.microsoft.com>
Date: Thu, 3 Jun 2021 15:14:35 +0000
Subject: [PATCH 3171/3804] hyperv: SVM enlightened TLB flush support flag

Bit 22 of HYPERV_CPUID_FEATURES.EDX is specific to SVM and specifies
support for enlightened TLB flush. With this enlightenment enabled,
ASID invalidations flushes only gva->hpa entries. To flush TLB entries
derived from NPT, hypercalls should be used
(HvFlushGuestPhysicalAddressSpace or HvFlushGuestPhysicalAddressList)

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Message-Id: <a060f872d0df1955e52e30b877b3300485edb27c.1622730232.git.viremana@linux.microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/hyperv-tlfs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 9fe4cc9c0f7d5..f1366ce609e37 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -138,6 +138,15 @@
 #define HV_X64_NESTED_GUEST_MAPPING_FLUSH		BIT(18)
 #define HV_X64_NESTED_MSR_BITMAP			BIT(19)
 
+/*
+ * This is specific to AMD and specifies that enlightened TLB flush is
+ * supported. If guest opts in to this feature, ASID invalidations only
+ * flushes gva -> hpa mapping entries. To flush the TLB entries derived
+ * from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace
+ * or HvFlushGuestPhysicalAddressList).
+ */
+#define HV_X64_NESTED_ENLIGHTENED_TLB			BIT(22)
+
 /* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */
 #define HV_PARAVISOR_PRESENT				BIT(0)
 
-- 
GitLab


From 3c86c0d3dbb98865a60a0c9d5c3a229af15a8a96 Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <viremana@linux.microsoft.com>
Date: Thu, 3 Jun 2021 15:14:36 +0000
Subject: [PATCH 3172/3804] KVM: x86: hyper-v: Move the remote TLB flush logic
 out of vmx

Currently the remote TLB flush logic is specific to VMX.
Move it to a common place so that SVM can use it as well.

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
Message-Id: <4f4e4ca19778437dae502f44363a38e99e3ef5d1.1622730232.git.viremana@linux.microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   9 +++
 arch/x86/kvm/Makefile           |   5 ++
 arch/x86/kvm/kvm_onhyperv.c     |  93 ++++++++++++++++++++++++++++
 arch/x86/kvm/kvm_onhyperv.h     |  32 ++++++++++
 arch/x86/kvm/vmx/vmx.c          | 105 +-------------------------------
 arch/x86/kvm/vmx/vmx.h          |   9 ---
 arch/x86/kvm/x86.c              |   9 +++
 7 files changed, 150 insertions(+), 112 deletions(-)
 create mode 100644 arch/x86/kvm/kvm_onhyperv.c
 create mode 100644 arch/x86/kvm/kvm_onhyperv.h

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bea7290ef1735..1fdb212127c4a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -852,6 +852,10 @@ struct kvm_vcpu_arch {
 
 	/* Protected Guests */
 	bool guest_state_protected;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+	hpa_t hv_root_tdp;
+#endif
 };
 
 struct kvm_lpage_info {
@@ -1131,6 +1135,11 @@ struct kvm_arch {
 	 * allocated for any newly created or modified memslots.
 	 */
 	bool memslots_have_rmaps;
+
+#if IS_ENABLED(CONFIG_HYPERV)
+	hpa_t	hv_root_tdp;
+	spinlock_t hv_root_tdp_lock;
+#endif
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c589db5d91b35..a06745c2fef1e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -18,6 +18,11 @@ kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
 			   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
 			   mmu/spte.o
+
+ifdef CONFIG_HYPERV
+kvm-y			+= kvm_onhyperv.o
+endif
+
 kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
 kvm-$(CONFIG_KVM_XEN)	+= xen.o
 
diff --git a/arch/x86/kvm/kvm_onhyperv.c b/arch/x86/kvm/kvm_onhyperv.c
new file mode 100644
index 0000000000000..c7db2df50a7ab
--- /dev/null
+++ b/arch/x86/kvm/kvm_onhyperv.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/mshyperv.h>
+
+#include "hyperv.h"
+#include "kvm_onhyperv.h"
+
+static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
+		void *data)
+{
+	struct kvm_tlb_range *range = data;
+
+	return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
+			range->pages);
+}
+
+static inline int hv_remote_flush_root_tdp(hpa_t root_tdp,
+					   struct kvm_tlb_range *range)
+{
+	if (range)
+		return hyperv_flush_guest_mapping_range(root_tdp,
+				kvm_fill_hv_flush_list_func, (void *)range);
+	else
+		return hyperv_flush_guest_mapping(root_tdp);
+}
+
+int hv_remote_flush_tlb_with_range(struct kvm *kvm,
+		struct kvm_tlb_range *range)
+{
+	struct kvm_arch *kvm_arch = &kvm->arch;
+	struct kvm_vcpu *vcpu;
+	int ret = 0, i, nr_unique_valid_roots;
+	hpa_t root;
+
+	spin_lock(&kvm_arch->hv_root_tdp_lock);
+
+	if (!VALID_PAGE(kvm_arch->hv_root_tdp)) {
+		nr_unique_valid_roots = 0;
+
+		/*
+		 * Flush all valid roots, and see if all vCPUs have converged
+		 * on a common root, in which case future flushes can skip the
+		 * loop and flush the common root.
+		 */
+		kvm_for_each_vcpu(i, vcpu, kvm) {
+			root = vcpu->arch.hv_root_tdp;
+			if (!VALID_PAGE(root) || root == kvm_arch->hv_root_tdp)
+				continue;
+
+			/*
+			 * Set the tracked root to the first valid root.  Keep
+			 * this root for the entirety of the loop even if more
+			 * roots are encountered as a low effort optimization
+			 * to avoid flushing the same (first) root again.
+			 */
+			if (++nr_unique_valid_roots == 1)
+				kvm_arch->hv_root_tdp = root;
+
+			if (!ret)
+				ret = hv_remote_flush_root_tdp(root, range);
+
+			/*
+			 * Stop processing roots if a failure occurred and
+			 * multiple valid roots have already been detected.
+			 */
+			if (ret && nr_unique_valid_roots > 1)
+				break;
+		}
+
+		/*
+		 * The optimized flush of a single root can't be used if there
+		 * are multiple valid roots (obviously).
+		 */
+		if (nr_unique_valid_roots > 1)
+			kvm_arch->hv_root_tdp = INVALID_PAGE;
+	} else {
+		ret = hv_remote_flush_root_tdp(kvm_arch->hv_root_tdp, range);
+	}
+
+	spin_unlock(&kvm_arch->hv_root_tdp_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hv_remote_flush_tlb_with_range);
+
+int hv_remote_flush_tlb(struct kvm *kvm)
+{
+	return hv_remote_flush_tlb_with_range(kvm, NULL);
+}
+EXPORT_SYMBOL_GPL(hv_remote_flush_tlb);
diff --git a/arch/x86/kvm/kvm_onhyperv.h b/arch/x86/kvm/kvm_onhyperv.h
new file mode 100644
index 0000000000000..1c67abf2eba92
--- /dev/null
+++ b/arch/x86/kvm/kvm_onhyperv.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V.
+ */
+
+#ifndef __ARCH_X86_KVM_KVM_ONHYPERV_H__
+#define __ARCH_X86_KVM_KVM_ONHYPERV_H__
+
+#if IS_ENABLED(CONFIG_HYPERV)
+int hv_remote_flush_tlb_with_range(struct kvm *kvm,
+		struct kvm_tlb_range *range);
+int hv_remote_flush_tlb(struct kvm *kvm);
+
+static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
+{
+	struct kvm_arch *kvm_arch = &vcpu->kvm->arch;
+
+	if (kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb) {
+		spin_lock(&kvm_arch->hv_root_tdp_lock);
+		vcpu->arch.hv_root_tdp = root_tdp;
+		if (root_tdp != kvm_arch->hv_root_tdp)
+			kvm_arch->hv_root_tdp = INVALID_PAGE;
+		spin_unlock(&kvm_arch->hv_root_tdp_lock);
+	}
+}
+#else /* !CONFIG_HYPERV */
+static inline void hv_track_root_tdp(struct kvm_vcpu *vcpu, hpa_t root_tdp)
+{
+}
+#endif /* !CONFIG_HYPERV */
+
+#endif
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5aa0e54c793b2..e3f744bec7630 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -52,6 +52,7 @@
 #include "cpuid.h"
 #include "evmcs.h"
 #include "hyperv.h"
+#include "kvm_onhyperv.h"
 #include "irq.h"
 #include "kvm_cache_regs.h"
 #include "lapic.h"
@@ -458,86 +459,6 @@ static unsigned long host_idt_base;
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
 
-static int kvm_fill_hv_flush_list_func(struct hv_guest_mapping_flush_list *flush,
-		void *data)
-{
-	struct kvm_tlb_range *range = data;
-
-	return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
-			range->pages);
-}
-
-static inline int hv_remote_flush_root_ept(hpa_t root_ept,
-					   struct kvm_tlb_range *range)
-{
-	if (range)
-		return hyperv_flush_guest_mapping_range(root_ept,
-				kvm_fill_hv_flush_list_func, (void *)range);
-	else
-		return hyperv_flush_guest_mapping(root_ept);
-}
-
-static int hv_remote_flush_tlb_with_range(struct kvm *kvm,
-		struct kvm_tlb_range *range)
-{
-	struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
-	struct kvm_vcpu *vcpu;
-	int ret = 0, i, nr_unique_valid_roots;
-	hpa_t root;
-
-	spin_lock(&kvm_vmx->hv_root_ept_lock);
-
-	if (!VALID_PAGE(kvm_vmx->hv_root_ept)) {
-		nr_unique_valid_roots = 0;
-
-		/*
-		 * Flush all valid roots, and see if all vCPUs have converged
-		 * on a common root, in which case future flushes can skip the
-		 * loop and flush the common root.
-		 */
-		kvm_for_each_vcpu(i, vcpu, kvm) {
-			root = to_vmx(vcpu)->hv_root_ept;
-			if (!VALID_PAGE(root) || root == kvm_vmx->hv_root_ept)
-				continue;
-
-			/*
-			 * Set the tracked root to the first valid root.  Keep
-			 * this root for the entirety of the loop even if more
-			 * roots are encountered as a low effort optimization
-			 * to avoid flushing the same (first) root again.
-			 */
-			if (++nr_unique_valid_roots == 1)
-				kvm_vmx->hv_root_ept = root;
-
-			if (!ret)
-				ret = hv_remote_flush_root_ept(root, range);
-
-			/*
-			 * Stop processing roots if a failure occurred and
-			 * multiple valid roots have already been detected.
-			 */
-			if (ret && nr_unique_valid_roots > 1)
-				break;
-		}
-
-		/*
-		 * The optimized flush of a single root can't be used if there
-		 * are multiple valid roots (obviously).
-		 */
-		if (nr_unique_valid_roots > 1)
-			kvm_vmx->hv_root_ept = INVALID_PAGE;
-	} else {
-		ret = hv_remote_flush_root_ept(kvm_vmx->hv_root_ept, range);
-	}
-
-	spin_unlock(&kvm_vmx->hv_root_ept_lock);
-	return ret;
-}
-static int hv_remote_flush_tlb(struct kvm *kvm)
-{
-	return hv_remote_flush_tlb_with_range(kvm, NULL);
-}
-
 static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
 {
 	struct hv_enlightened_vmcs *evmcs;
@@ -565,21 +486,6 @@ static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
 
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
-static void hv_track_root_ept(struct kvm_vcpu *vcpu, hpa_t root_ept)
-{
-#if IS_ENABLED(CONFIG_HYPERV)
-	struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
-
-	if (kvm_x86_ops.tlb_remote_flush == hv_remote_flush_tlb) {
-		spin_lock(&kvm_vmx->hv_root_ept_lock);
-		to_vmx(vcpu)->hv_root_ept = root_ept;
-		if (root_ept != kvm_vmx->hv_root_ept)
-			kvm_vmx->hv_root_ept = INVALID_PAGE;
-		spin_unlock(&kvm_vmx->hv_root_ept_lock);
-	}
-#endif
-}
-
 /*
  * Comment's format: document - errata name - stepping - processor name.
  * Refer from
@@ -3184,7 +3090,7 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
 		eptp = construct_eptp(vcpu, root_hpa, root_level);
 		vmcs_write64(EPT_POINTER, eptp);
 
-		hv_track_root_ept(vcpu, root_hpa);
+		hv_track_root_tdp(vcpu, root_hpa);
 
 		if (!enable_unrestricted_guest && !is_paging(vcpu))
 			guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr;
@@ -6966,9 +6872,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
 	vmx->pi_desc.sn = 1;
 
-#if IS_ENABLED(CONFIG_HYPERV)
-	vmx->hv_root_ept = INVALID_PAGE;
-#endif
 	return 0;
 
 free_vmcs:
@@ -6985,10 +6888,6 @@ free_vpid:
 
 static int vmx_vm_init(struct kvm *kvm)
 {
-#if IS_ENABLED(CONFIG_HYPERV)
-	spin_lock_init(&to_kvm_vmx(kvm)->hv_root_ept_lock);
-#endif
-
 	if (!ple_gap)
 		kvm->arch.pause_in_guest = true;
 
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 3eaa86a0ba3e3..5740f8e2aa231 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -334,10 +334,6 @@ struct vcpu_vmx {
 	/* SGX Launch Control public key hash */
 	u64 msr_ia32_sgxlepubkeyhash[4];
 
-#if IS_ENABLED(CONFIG_HYPERV)
-	u64 hv_root_ept;
-#endif
-
 	struct pt_desc pt_desc;
 	struct lbr_desc lbr_desc;
 
@@ -355,11 +351,6 @@ struct kvm_vmx {
 	unsigned int tss_addr;
 	bool ept_identity_pagetable_done;
 	gpa_t ept_identity_map_addr;
-
-#if IS_ENABLED(CONFIG_HYPERV)
-	hpa_t hv_root_ept;
-	spinlock_t hv_root_ept_lock;
-#endif
 };
 
 bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0e2dbc7fdb976..63e48738764e6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10494,6 +10494,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	vcpu->arch.pending_external_vector = -1;
 	vcpu->arch.preempted_in_kernel = false;
 
+#if IS_ENABLED(CONFIG_HYPERV)
+	vcpu->arch.hv_root_tdp = INVALID_PAGE;
+#endif
+
 	r = static_call(kvm_x86_vcpu_create)(vcpu);
 	if (r)
 		goto free_guest_fpu;
@@ -10878,6 +10882,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	kvm->arch.guest_can_read_msr_platform_info = true;
 
+#if IS_ENABLED(CONFIG_HYPERV)
+	spin_lock_init(&kvm->arch.hv_root_tdp_lock);
+	kvm->arch.hv_root_tdp = INVALID_PAGE;
+#endif
+
 	INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
 	INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
 
-- 
GitLab


From 59d21d67f37481cfde25551ee6a467fa943812b4 Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <viremana@linux.microsoft.com>
Date: Thu, 3 Jun 2021 15:14:37 +0000
Subject: [PATCH 3173/3804] KVM: SVM: Software reserved fields

SVM added support for certain reserved fields to be used by
software or hypervisor. Add the following reserved fields:
  - VMCB offset 0x3e0 - 0x3ff
  - Clean bit 31
  - SVM intercept exit code 0xf0000000

Later patches will make use of this for supporting Hyper-V
nested virtualization enhancements.

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
Message-Id: <a1f17a43a8e9e751a1a9cc0281649d71bdbf721b.1622730232.git.viremana@linux.microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/svm.h      |  9 +++++++--
 arch/x86/include/uapi/asm/svm.h |  3 +++
 arch/x86/kvm/svm/svm.h          | 17 +++++++++++++++--
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 772e60efe243a..e322676039f4e 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -156,6 +156,12 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	u64 avic_physical_id;	/* Offset 0xf8 */
 	u8 reserved_7[8];
 	u64 vmsa_pa;		/* Used for an SEV-ES guest */
+	u8 reserved_8[720];
+	/*
+	 * Offset 0x3e0, 32 bytes reserved
+	 * for use by hypervisor/software.
+	 */
+	u8 reserved_sw[32];
 };
 
 
@@ -314,7 +320,7 @@ struct ghcb {
 
 
 #define EXPECTED_VMCB_SAVE_AREA_SIZE		1032
-#define EXPECTED_VMCB_CONTROL_AREA_SIZE		272
+#define EXPECTED_VMCB_CONTROL_AREA_SIZE		1024
 #define EXPECTED_GHCB_SIZE			PAGE_SIZE
 
 static inline void __unused_size_checks(void)
@@ -326,7 +332,6 @@ static inline void __unused_size_checks(void)
 
 struct vmcb {
 	struct vmcb_control_area control;
-	u8 reserved_control[1024 - sizeof(struct vmcb_control_area)];
 	struct vmcb_save_area save;
 } __packed;
 
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 554f75fe013cf..efa969325ede5 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -110,6 +110,9 @@
 #define SVM_VMGEXIT_GET_AP_JUMP_TABLE		1
 #define SVM_VMGEXIT_UNSUPPORTED_EVENT		0x8000ffff
 
+/* Exit code reserved for hypervisor/software use */
+#define SVM_EXIT_SW				0xf0000000
+
 #define SVM_EXIT_ERR           -1
 
 #define SVM_EXIT_REASONS \
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index a514b490db4a1..af09bcd229bdb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -31,6 +31,11 @@
 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
 extern bool npt_enabled;
 
+/*
+ * Clean bits in VMCB.
+ * VMCB_ALL_CLEAN_MASK might also need to
+ * be updated if this enum is modified.
+ */
 enum {
 	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
 			    pause filter count */
@@ -48,9 +53,17 @@ enum {
 			  * AVIC PHYSICAL_TABLE pointer,
 			  * AVIC LOGICAL_TABLE pointer
 			  */
-	VMCB_DIRTY_MAX,
+	VMCB_SW = 31,    /* Reserved for hypervisor/software use */
 };
 
+#define VMCB_ALL_CLEAN_MASK (					\
+	(1U << VMCB_INTERCEPTS) | (1U << VMCB_PERM_MAP) |	\
+	(1U << VMCB_ASID) | (1U << VMCB_INTR) |			\
+	(1U << VMCB_NPT) | (1U << VMCB_CR) | (1U << VMCB_DR) |	\
+	(1U << VMCB_DT) | (1U << VMCB_SEG) | (1U << VMCB_CR2) |	\
+	(1U << VMCB_LBR) | (1U << VMCB_AVIC) |			\
+	(1U << VMCB_SW))
+
 /* TPR and CR2 are always written before VMRUN */
 #define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))
 
@@ -237,7 +250,7 @@ static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
 
 static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
 {
-	vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
+	vmcb->control.clean = VMCB_ALL_CLEAN_MASK
 			       & ~VMCB_ALWAYS_DIRTY_MASK;
 }
 
-- 
GitLab


From 1e0c7d40758bcd45b4af936031144e995f87a7f6 Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <viremana@linux.microsoft.com>
Date: Thu, 3 Jun 2021 15:14:38 +0000
Subject: [PATCH 3174/3804] KVM: SVM: hyper-v: Remote TLB flush for SVM

Enable remote TLB flush for SVM.

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
Message-Id: <1ee364e397e142aed662d2920d198cd03772f1a5.1622730232.git.viremana@linux.microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c          |  9 +++++
 arch/x86/kvm/svm/svm_onhyperv.h | 66 +++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 arch/x86/kvm/svm/svm_onhyperv.h

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d223f5dfac533..4d7b67c78a899 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -43,6 +43,9 @@
 #include "svm.h"
 #include "svm_ops.h"
 
+#include "kvm_onhyperv.h"
+#include "svm_onhyperv.h"
+
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
 MODULE_AUTHOR("Qumranet");
@@ -1003,6 +1006,8 @@ static __init int svm_hardware_setup(void)
 	/* Note, SEV setup consumes npt_enabled. */
 	sev_hardware_setup();
 
+	svm_hv_hardware_setup();
+
 	svm_adjust_mmio_mask();
 
 	for_each_possible_cpu(cpu) {
@@ -1296,6 +1301,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
 		}
 	}
 
+	svm_hv_init_vmcb(svm->vmcb);
+
 	vmcb_mark_all_dirty(svm->vmcb);
 
 	enable_gif(svm);
@@ -3892,6 +3899,8 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
 		svm->vmcb->control.nested_cr3 = __sme_set(root_hpa);
 		vmcb_mark_dirty(svm->vmcb, VMCB_NPT);
 
+		hv_track_root_tdp(vcpu, root_hpa);
+
 		/* Loading L2's CR3 is handled by enter_svm_guest_mode.  */
 		if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
 			return;
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
new file mode 100644
index 0000000000000..57291e2223956
--- /dev/null
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V for SVM.
+ */
+
+#ifndef __ARCH_X86_KVM_SVM_ONHYPERV_H__
+#define __ARCH_X86_KVM_SVM_ONHYPERV_H__
+
+#if IS_ENABLED(CONFIG_HYPERV)
+#include <asm/mshyperv.h>
+
+#include "hyperv.h"
+#include "kvm_onhyperv.h"
+
+static struct kvm_x86_ops svm_x86_ops;
+
+/*
+ * Hyper-V uses the software reserved 32 bytes in VMCB
+ * control area to expose SVM enlightenments to guests.
+ */
+struct hv_enlightenments {
+	struct __packed hv_enlightenments_control {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 enlightened_npt_tlb: 1;
+		u32 reserved:29;
+	} __packed hv_enlightenments_control;
+	u32 hv_vp_id;
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 reserved;
+} __packed;
+
+static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
+{
+	struct hv_enlightenments *hve =
+		(struct hv_enlightenments *)vmcb->control.reserved_sw;
+
+	if (npt_enabled &&
+	    ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
+		hve->hv_enlightenments_control.enlightened_npt_tlb = 1;
+}
+
+static inline void svm_hv_hardware_setup(void)
+{
+	if (npt_enabled &&
+	    ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) {
+		pr_info("kvm: Hyper-V enlightened NPT TLB flush enabled\n");
+		svm_x86_ops.tlb_remote_flush = hv_remote_flush_tlb;
+		svm_x86_ops.tlb_remote_flush_with_range =
+				hv_remote_flush_tlb_with_range;
+	}
+}
+
+#else
+
+static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
+{
+}
+
+static inline void svm_hv_hardware_setup(void)
+{
+}
+#endif /* CONFIG_HYPERV */
+
+#endif /* __ARCH_X86_KVM_SVM_ONHYPERV_H__ */
-- 
GitLab


From c4327f15dfc7294b2abde0ea49b3e43eec3cca38 Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <viremana@linux.microsoft.com>
Date: Thu, 3 Jun 2021 15:14:39 +0000
Subject: [PATCH 3175/3804] KVM: SVM: hyper-v: Enlightened MSR-Bitmap support

Enlightened MSR-Bitmap as per TLFS:

 "The L1 hypervisor may collaborate with the L0 hypervisor to make MSR
  accesses more efficient. It can enable enlightened MSR bitmaps by setting
  the corresponding field in the enlightened VMCS to 1. When enabled, L0
  hypervisor does not monitor the MSR bitmaps for changes. Instead, the L1
  hypervisor must invalidate the corresponding clean field after making
  changes to one of the MSR bitmaps."

Enable this for SVM.

Related VMX changes:
commit ceef7d10dfb6 ("KVM: x86: VMX: hyper-v: Enlightened MSR-Bitmap support")

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
Message-Id: <87df0710f95d28b91cc4ea014fc4d71056eebbee.1622730232.git.viremana@linux.microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c          |  3 +++
 arch/x86/kvm/svm/svm.h          |  5 +++++
 arch/x86/kvm/svm/svm_onhyperv.h | 27 +++++++++++++++++++++++++++
 3 files changed, 35 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4d7b67c78a899..1b0056ef36af6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -683,6 +683,9 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
 	write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
 
 	msrpm[offset] = tmp;
+
+	svm_hv_vmcb_dirty_nested_enlightenments(vcpu);
+
 }
 
 void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index af09bcd229bdb..670f0c0ed73bc 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -254,6 +254,11 @@ static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
 			       & ~VMCB_ALWAYS_DIRTY_MASK;
 }
 
+static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit)
+{
+	return (vmcb->control.clean & (1 << bit));
+}
+
 static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
 {
 	vmcb->control.clean &= ~(1 << bit);
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index 57291e2223956..0f262460b2e68 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -31,6 +31,11 @@ struct hv_enlightenments {
 	u64 reserved;
 } __packed;
 
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW
+
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 {
 	struct hv_enlightenments *hve =
@@ -52,6 +57,23 @@ static inline void svm_hv_hardware_setup(void)
 	}
 }
 
+static inline void svm_hv_vmcb_dirty_nested_enlightenments(
+		struct kvm_vcpu *vcpu)
+{
+	struct vmcb *vmcb = to_svm(vcpu)->vmcb;
+	struct hv_enlightenments *hve =
+		(struct hv_enlightenments *)vmcb->control.reserved_sw;
+
+	/*
+	 * vmcb can be NULL if called during early vcpu init.
+	 * And its okay not to mark vmcb dirty during vcpu init
+	 * as we mark it dirty unconditionally towards end of vcpu
+	 * init phase.
+	 */
+	if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) &&
+	    hve->hv_enlightenments_control.msr_bitmap)
+		vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+}
 #else
 
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
@@ -61,6 +83,11 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 static inline void svm_hv_hardware_setup(void)
 {
 }
+
+static inline void svm_hv_vmcb_dirty_nested_enlightenments(
+		struct kvm_vcpu *vcpu)
+{
+}
 #endif /* CONFIG_HYPERV */
 
 #endif /* __ARCH_X86_KVM_SVM_ONHYPERV_H__ */
-- 
GitLab


From 1183646a67d01ef9c46ac87da1c57dea5f7bb153 Mon Sep 17 00:00:00 2001
From: Vineeth Pillai <viremana@linux.microsoft.com>
Date: Thu, 3 Jun 2021 15:14:40 +0000
Subject: [PATCH 3176/3804] KVM: SVM: hyper-v: Direct Virtual Flush support

From Hyper-V TLFS:
 "The hypervisor exposes hypercalls (HvFlushVirtualAddressSpace,
  HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressList, and
  HvFlushVirtualAddressListEx) that allow operating systems to more
  efficiently manage the virtual TLB. The L1 hypervisor can choose to
  allow its guest to use those hypercalls and delegate the responsibility
  to handle them to the L0 hypervisor. This requires the use of a
  partition assist page."

Add the Direct Virtual Flush support for SVM.

Related VMX changes:
commit 6f6a657c9998 ("KVM/Hyper-V/VMX: Add direct tlb flush support")

Signed-off-by: Vineeth Pillai <viremana@linux.microsoft.com>
Message-Id: <fc8d24d8eb7017266bb961e39a171b0caf298d7f.1622730232.git.viremana@linux.microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Makefile           |  4 ++++
 arch/x86/kvm/svm/svm.c          |  2 ++
 arch/x86/kvm/svm/svm_onhyperv.c | 41 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm_onhyperv.h | 37 +++++++++++++++++++++++++++++
 4 files changed, 84 insertions(+)
 create mode 100644 arch/x86/kvm/svm/svm_onhyperv.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index a06745c2fef1e..83331376b779b 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -32,6 +32,10 @@ kvm-intel-$(CONFIG_X86_SGX_KVM)	+= vmx/sgx.o
 
 kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
 
+ifdef CONFIG_HYPERV
+kvm-amd-y		+= svm/svm_onhyperv.o
+endif
+
 obj-$(CONFIG_KVM)	+= kvm.o
 obj-$(CONFIG_KVM_INTEL)	+= kvm-intel.o
 obj-$(CONFIG_KVM_AMD)	+= kvm-amd.o
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 1b0056ef36af6..9bb4692728ef2 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3781,6 +3781,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 	}
 	svm->vmcb->save.cr2 = vcpu->arch.cr2;
 
+	svm_hv_update_vp_id(svm->vmcb, vcpu);
+
 	/*
 	 * Run with all-zero DR6 unless needed, so that we can get the exact cause
 	 * of a #DB.
diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
new file mode 100644
index 0000000000000..98aa981c04ec5
--- /dev/null
+++ b/arch/x86/kvm/svm/svm_onhyperv.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM L1 hypervisor optimizations on Hyper-V for SVM.
+ */
+
+#include <linux/kvm_host.h>
+#include "kvm_cache_regs.h"
+
+#include <asm/mshyperv.h>
+
+#include "svm.h"
+#include "svm_ops.h"
+
+#include "hyperv.h"
+#include "kvm_onhyperv.h"
+#include "svm_onhyperv.h"
+
+int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+{
+	struct hv_enlightenments *hve;
+	struct hv_partition_assist_pg **p_hv_pa_pg =
+			&to_kvm_hv(vcpu->kvm)->hv_pa_pg;
+
+	if (!*p_hv_pa_pg)
+		*p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+	if (!*p_hv_pa_pg)
+		return -ENOMEM;
+
+	hve = (struct hv_enlightenments *)to_svm(vcpu)->vmcb->control.reserved_sw;
+
+	hve->partition_assist_page = __pa(*p_hv_pa_pg);
+	hve->hv_vm_id = (unsigned long)vcpu->kvm;
+	if (!hve->hv_enlightenments_control.nested_flush_hypercall) {
+		hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+		vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+	}
+
+	return 0;
+}
+
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index 0f262460b2e68..9b9a55abc29fb 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -36,6 +36,8 @@ struct hv_enlightenments {
  */
 #define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW
 
+int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu);
+
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 {
 	struct hv_enlightenments *hve =
@@ -55,6 +57,23 @@ static inline void svm_hv_hardware_setup(void)
 		svm_x86_ops.tlb_remote_flush_with_range =
 				hv_remote_flush_tlb_with_range;
 	}
+
+	if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) {
+		int cpu;
+
+		pr_info("kvm: Hyper-V Direct TLB Flush enabled\n");
+		for_each_online_cpu(cpu) {
+			struct hv_vp_assist_page *vp_ap =
+				hv_get_vp_assist_page(cpu);
+
+			if (!vp_ap)
+				continue;
+
+			vp_ap->nested_control.features.directhypercall = 1;
+		}
+		svm_x86_ops.enable_direct_tlbflush =
+				svm_hv_enable_direct_tlbflush;
+	}
 }
 
 static inline void svm_hv_vmcb_dirty_nested_enlightenments(
@@ -74,6 +93,19 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
 	    hve->hv_enlightenments_control.msr_bitmap)
 		vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
 }
+
+static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
+		struct kvm_vcpu *vcpu)
+{
+	struct hv_enlightenments *hve =
+		(struct hv_enlightenments *)vmcb->control.reserved_sw;
+	u32 vp_index = kvm_hv_get_vpindex(vcpu);
+
+	if (hve->hv_vp_id != vp_index) {
+		hve->hv_vp_id = vp_index;
+		vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+	}
+}
 #else
 
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
@@ -88,6 +120,11 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
 		struct kvm_vcpu *vcpu)
 {
 }
+
+static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
+		struct kvm_vcpu *vcpu)
+{
+}
 #endif /* CONFIG_HYPERV */
 
 #endif /* __ARCH_X86_KVM_SVM_ONHYPERV_H__ */
-- 
GitLab


From f15cdceab543059a9afd9e6277cf15d56d7dfd82 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:35 +0200
Subject: [PATCH 3177/3804] asm-generic/hyperv: add HV_STATUS_ACCESS_DENIED
 definition

From TLFSv6.0b, this status means: "The caller did not possess sufficient
access rights to perform the requested operation."

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Acked-by: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/asm-generic/hyperv-tlfs.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 515c3fb06ab3f..56348a541c501 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -194,6 +194,7 @@ enum HV_GENERIC_SET_FORMAT {
 #define HV_STATUS_INVALID_HYPERCALL_INPUT	3
 #define HV_STATUS_INVALID_ALIGNMENT		4
 #define HV_STATUS_INVALID_PARAMETER		5
+#define HV_STATUS_ACCESS_DENIED			6
 #define HV_STATUS_OPERATION_DENIED		8
 #define HV_STATUS_INSUFFICIENT_MEMORY		11
 #define HV_STATUS_INVALID_PORT_ID		17
-- 
GitLab


From 644f706719f0297bc5f65c8891de1c32f042eae5 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:36 +0200
Subject: [PATCH 3178/3804] KVM: x86: hyper-v: Introduce
 KVM_CAP_HYPERV_ENFORCE_CPUID

Modeled after KVM_CAP_ENFORCE_PV_FEATURE_CPUID, the new capability allows
for limiting Hyper-V features to those exposed to the guest in Hyper-V
CPUIDs (0x40000003, 0x40000004, ...).

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst  | 11 +++++++++++
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/hyperv.c           | 21 +++++++++++++++++++++
 arch/x86/kvm/hyperv.h           |  1 +
 arch/x86/kvm/x86.c              |  4 ++++
 include/uapi/linux/kvm.h        |  1 +
 6 files changed, 39 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 7fcb2fd38f42e..80154d5d98a18 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6891,3 +6891,14 @@ This capability is always enabled.
 This capability indicates that the KVM virtual PTP service is
 supported in the host. A VMM can check whether the service is
 available to the guest on migration.
+
+8.33 KVM_CAP_HYPERV_ENFORCE_CPUID
+-----------------------------
+
+Architectures: x86
+
+When enabled, KVM will disable emulated Hyper-V features provided to the
+guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all
+currently implmented Hyper-V features are provided unconditionally when
+Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001)
+leaf.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1fdb212127c4a..556a8ec89451a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -543,6 +543,7 @@ struct kvm_vcpu_hv {
 	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
 	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
 	cpumask_t tlb_flush;
+	bool enforce_cpuid;
 };
 
 /* Xen HVM per vcpu emulation context */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index dbd3152b1379e..02b0ee189f82d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1853,6 +1853,27 @@ void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
 		vcpu->arch.hyperv_enabled = false;
 }
 
+int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce)
+{
+	struct kvm_vcpu_hv *hv_vcpu;
+	int ret = 0;
+
+	if (!to_hv_vcpu(vcpu)) {
+		if (enforce) {
+			ret = kvm_hv_vcpu_init(vcpu);
+			if (ret)
+				return ret;
+		} else {
+			return 0;
+		}
+	}
+
+	hv_vcpu = to_hv_vcpu(vcpu);
+	hv_vcpu->enforce_cpuid = enforce;
+
+	return ret;
+}
+
 bool kvm_hv_hypercall_enabled(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.hyperv_enabled && to_kvm_hv(vcpu->kvm)->hv_guest_os_id;
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 60547d5cb6d72..730da8537d058 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -138,6 +138,7 @@ void kvm_hv_invalidate_tsc_page(struct kvm *kvm);
 void kvm_hv_init_vm(struct kvm *kvm);
 void kvm_hv_destroy_vm(struct kvm *kvm);
 void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu);
+int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce);
 int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
 int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		     struct kvm_cpuid_entry2 __user *entries);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 63e48738764e6..475376a974191 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3955,6 +3955,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_TLBFLUSH:
 	case KVM_CAP_HYPERV_SEND_IPI:
 	case KVM_CAP_HYPERV_CPUID:
+	case KVM_CAP_HYPERV_ENFORCE_CPUID:
 	case KVM_CAP_SYS_HYPERV_CPUID:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
@@ -4878,6 +4879,9 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 
 		return static_call(kvm_x86_enable_direct_tlbflush)(vcpu);
 
+	case KVM_CAP_HYPERV_ENFORCE_CPUID:
+		return kvm_hv_set_enforce_cpuid(vcpu, cap->args[0]);
+
 	case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
 		vcpu->arch.pv_cpuid.enforce = cap->args[0];
 		if (vcpu->arch.pv_cpuid.enforce)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 79d9c44d1ad73..7928161440925 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_SGX_ATTRIBUTE 196
 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
 #define KVM_CAP_PTP_KVM 198
+#define KVM_CAP_HYPERV_ENFORCE_CPUID 199
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
GitLab


From 10d7bf1e46dc19d964f0f67d2a6d20df907742d1 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:37 +0200
Subject: [PATCH 3179/3804] KVM: x86: hyper-v: Cache guest CPUID leaves
 determining features availability

Limiting exposed Hyper-V features requires a fast way to check if the
particular feature is exposed in guest visible CPUIDs or not. To aboid
looping through all CPUID entries on every hypercall/MSR access cache
the required leaves on CPUID update.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-4-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  8 ++++++
 arch/x86/kvm/hyperv.c           | 49 ++++++++++++++++++++++++++-------
 2 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 556a8ec89451a..95b254b5a5230 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -544,6 +544,14 @@ struct kvm_vcpu_hv {
 	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
 	cpumask_t tlb_flush;
 	bool enforce_cpuid;
+	struct {
+		u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */
+		u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */
+		u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */
+		u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+		u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
+		u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+	} cpuid_cache;
 };
 
 /* Xen HVM per vcpu emulation context */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 02b0ee189f82d..7e7928fc77ef1 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -274,15 +274,10 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 
 static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu)
 {
-	struct kvm_cpuid_entry2 *entry;
-
-	entry = kvm_find_cpuid_entry(vcpu,
-				     HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES,
-				     0);
-	if (!entry)
-		return false;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
-	return entry->eax & HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+	return hv_vcpu->cpuid_cache.syndbg_cap_eax &
+		HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
 }
 
 static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu)
@@ -1845,12 +1840,46 @@ ret_success:
 void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *entry;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
 	entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0);
-	if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX)
+	if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
 		vcpu->arch.hyperv_enabled = true;
-	else
+	} else {
 		vcpu->arch.hyperv_enabled = false;
+		return;
+	}
+
+	if (!to_hv_vcpu(vcpu) && kvm_hv_vcpu_init(vcpu))
+		return;
+
+	hv_vcpu = to_hv_vcpu(vcpu);
+
+	entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES, 0);
+	if (entry) {
+		hv_vcpu->cpuid_cache.features_eax = entry->eax;
+		hv_vcpu->cpuid_cache.features_ebx = entry->ebx;
+		hv_vcpu->cpuid_cache.features_edx = entry->edx;
+	} else {
+		hv_vcpu->cpuid_cache.features_eax = 0;
+		hv_vcpu->cpuid_cache.features_ebx = 0;
+		hv_vcpu->cpuid_cache.features_edx = 0;
+	}
+
+	entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO, 0);
+	if (entry) {
+		hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax;
+		hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx;
+	} else {
+		hv_vcpu->cpuid_cache.enlightenments_eax = 0;
+		hv_vcpu->cpuid_cache.enlightenments_ebx = 0;
+	}
+
+	entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0);
+	if (entry)
+		hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax;
+	else
+		hv_vcpu->cpuid_cache.syndbg_cap_eax = 0;
 }
 
 int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce)
-- 
GitLab


From b4128000e2c9b176a449d748dcb083c61d61cc6e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:38 +0200
Subject: [PATCH 3180/3804] KVM: x86: hyper-v: Prepare to check access to
 Hyper-V MSRs

Introduce hv_check_msr_access() to check if the particular MSR
should be accessible by guest, this will be used with
KVM_CAP_HYPERV_ENFORCE_CPUID mode.

No functional change intended.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-5-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 7e7928fc77ef1..ab8dc23f05bf6 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1202,12 +1202,21 @@ out_unlock:
 	mutex_unlock(&hv->hv_lock);
 }
 
+
+static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
+{
+	return true;
+}
+
 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
 			     bool host)
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_hv *hv = to_kvm_hv(kvm);
 
+	if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr)))
+		return 1;
+
 	switch (msr) {
 	case HV_X64_MSR_GUEST_OS_ID:
 		hv->hv_guest_os_id = data;
@@ -1336,6 +1345,9 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 {
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
+	if (unlikely(!host && !hv_check_msr_access(hv_vcpu, msr)))
+		return 1;
+
 	switch (msr) {
 	case HV_X64_MSR_VP_INDEX: {
 		struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
@@ -1450,6 +1462,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_hv *hv = to_kvm_hv(kvm);
 
+	if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr)))
+		return 1;
+
 	switch (msr) {
 	case HV_X64_MSR_GUEST_OS_ID:
 		data = hv->hv_guest_os_id;
@@ -1499,6 +1514,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
 	u64 data = 0;
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
+	if (unlikely(!host && !hv_check_msr_access(hv_vcpu, msr)))
+		return 1;
+
 	switch (msr) {
 	case HV_X64_MSR_VP_INDEX:
 		data = hv_vcpu->vp_index;
-- 
GitLab


From 1561c2cb87ab39400d76998bf7be581c1e57f108 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:39 +0200
Subject: [PATCH 3181/3804] KVM: x86: hyper-v: Honor HV_MSR_HYPERCALL_AVAILABLE
 privilege bit

HV_X64_MSR_GUEST_OS_ID/HV_X64_MSR_HYPERCALL are only available to guest
when HV_MSR_HYPERCALL_AVAILABLE bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-6-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ab8dc23f05bf6..cb66842ccb8d4 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1205,6 +1205,18 @@ out_unlock:
 
 static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 {
+	if (!hv_vcpu->enforce_cpuid)
+		return true;
+
+	switch (msr) {
+	case HV_X64_MSR_GUEST_OS_ID:
+	case HV_X64_MSR_HYPERCALL:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_MSR_HYPERCALL_AVAILABLE;
+	default:
+		break;
+	}
+
 	return true;
 }
 
-- 
GitLab


From b80a92ff81587c556da740e9073821b5c3c23b72 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:40 +0200
Subject: [PATCH 3182/3804] KVM: x86: hyper-v: Honor
 HV_MSR_VP_RUNTIME_AVAILABLE privilege bit

HV_X64_MSR_VP_RUNTIME is only available to guest when
HV_MSR_VP_RUNTIME_AVAILABLE bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-7-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index cb66842ccb8d4..6a9eb934ffe09 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1213,6 +1213,9 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_HYPERCALL:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_HYPERCALL_AVAILABLE;
+	case HV_X64_MSR_VP_RUNTIME:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_MSR_VP_RUNTIME_AVAILABLE;
 	default:
 		break;
 	}
-- 
GitLab


From c2b32867f2e7bfa7e7521e417ab8bbd586ac6bcc Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:41 +0200
Subject: [PATCH 3183/3804] KVM: x86: hyper-v: Honor
 HV_MSR_TIME_REF_COUNT_AVAILABLE privilege bit

HV_X64_MSR_TIME_REF_COUNT is only available to guest when
HV_MSR_TIME_REF_COUNT_AVAILABLE bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-8-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 6a9eb934ffe09..c90679247185f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1216,6 +1216,9 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_VP_RUNTIME:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_VP_RUNTIME_AVAILABLE;
+	case HV_X64_MSR_TIME_REF_COUNT:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_MSR_TIME_REF_COUNT_AVAILABLE;
 	default:
 		break;
 	}
-- 
GitLab


From d2ac25d4196da2ff404c88bec480c835995ea69c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:42 +0200
Subject: [PATCH 3184/3804] KVM: x86: hyper-v: Honor HV_MSR_VP_INDEX_AVAILABLE
 privilege bit

HV_X64_MSR_VP_INDEX is only available to guest when
HV_MSR_VP_INDEX_AVAILABLE bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-9-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index c90679247185f..fb5ed867b53c1 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1219,6 +1219,9 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_TIME_REF_COUNT:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_TIME_REF_COUNT_AVAILABLE;
+	case HV_X64_MSR_VP_INDEX:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_MSR_VP_INDEX_AVAILABLE;
 	default:
 		break;
 	}
-- 
GitLab


From 679008e4bbeb12f4905ee0820cd2d0b9d4a21dbb Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:43 +0200
Subject: [PATCH 3185/3804] KVM: x86: hyper-v: Honor HV_MSR_RESET_AVAILABLE
 privilege bit

HV_X64_MSR_RESET is only available to guest when HV_MSR_RESET_AVAILABLE bit
is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-10-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index fb5ed867b53c1..1348f76913107 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1222,6 +1222,9 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_VP_INDEX:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_VP_INDEX_AVAILABLE;
+	case HV_X64_MSR_RESET:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_MSR_RESET_AVAILABLE;
 	default:
 		break;
 	}
-- 
GitLab


From a1ec661c3fdc8177a8789a9528d5bcfe0d9fc8a8 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:44 +0200
Subject: [PATCH 3186/3804] KVM: x86: hyper-v: Honor
 HV_MSR_REFERENCE_TSC_AVAILABLE privilege bit

HV_X64_MSR_REFERENCE_TSC is only available to guest when
HV_MSR_REFERENCE_TSC_AVAILABLE bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-11-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 1348f76913107..7ca7ea0b6e741 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1225,6 +1225,9 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_RESET:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_RESET_AVAILABLE;
+	case HV_X64_MSR_REFERENCE_TSC:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_MSR_REFERENCE_TSC_AVAILABLE;
 	default:
 		break;
 	}
-- 
GitLab


From 9e2715ca20d7b540a271464b3ac862cf387935c1 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:45 +0200
Subject: [PATCH 3187/3804] KVM: x86: hyper-v: Honor HV_MSR_SYNIC_AVAILABLE
 privilege bit

SynIC MSRs (HV_X64_MSR_SCONTROL, HV_X64_MSR_SVERSION, HV_X64_MSR_SIEFP,
HV_X64_MSR_SIMP, HV_X64_MSR_EOM, HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15)
are only available to guest when HV_MSR_SYNIC_AVAILABLE bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-12-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 7ca7ea0b6e741..9d3aed3bebcd9 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1228,6 +1228,14 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_REFERENCE_TSC:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_REFERENCE_TSC_AVAILABLE;
+	case HV_X64_MSR_SCONTROL:
+	case HV_X64_MSR_SVERSION:
+	case HV_X64_MSR_SIEFP:
+	case HV_X64_MSR_SIMP:
+	case HV_X64_MSR_EOM:
+	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_MSR_SYNIC_AVAILABLE;
 	default:
 		break;
 	}
-- 
GitLab


From eba60ddae794bdefb9531cb08e30c19a0bc53c15 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:46 +0200
Subject: [PATCH 3188/3804] KVM: x86: hyper-v: Honor HV_MSR_SYNTIMER_AVAILABLE
 privilege bit

Synthetic timers MSRs (HV_X64_MSR_STIMER[0-3]_CONFIG,
HV_X64_MSR_STIMER[0-3]_COUNT) are only available to guest when
HV_MSR_SYNTIMER_AVAILABLE bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-13-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 9d3aed3bebcd9..787fd58593dda 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1236,6 +1236,16 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_SYNIC_AVAILABLE;
+	case HV_X64_MSR_STIMER0_CONFIG:
+	case HV_X64_MSR_STIMER1_CONFIG:
+	case HV_X64_MSR_STIMER2_CONFIG:
+	case HV_X64_MSR_STIMER3_CONFIG:
+	case HV_X64_MSR_STIMER0_COUNT:
+	case HV_X64_MSR_STIMER1_COUNT:
+	case HV_X64_MSR_STIMER2_COUNT:
+	case HV_X64_MSR_STIMER3_COUNT:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_MSR_SYNTIMER_AVAILABLE;
 	default:
 		break;
 	}
-- 
GitLab


From 978b57475c7795824676122acb75a1dea264b6d1 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:47 +0200
Subject: [PATCH 3189/3804] KVM: x86: hyper-v: Honor
 HV_MSR_APIC_ACCESS_AVAILABLE privilege bit

HV_X64_MSR_EOI, HV_X64_MSR_ICR, HV_X64_MSR_TPR, and
HV_X64_MSR_VP_ASSIST_PAGE  are only available to guest when
HV_MSR_APIC_ACCESS_AVAILABLE bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-14-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 787fd58593dda..a168b72334cc4 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1246,6 +1246,13 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_STIMER3_COUNT:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_SYNTIMER_AVAILABLE;
+	case HV_X64_MSR_EOI:
+	case HV_X64_MSR_ICR:
+	case HV_X64_MSR_TPR:
+	case HV_X64_MSR_VP_ASSIST_PAGE:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_MSR_APIC_ACCESS_AVAILABLE;
+		break;
 	default:
 		break;
 	}
-- 
GitLab


From 9442f3bd9012f37ba2b4ec3ab2d7c248b137391c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:48 +0200
Subject: [PATCH 3190/3804] KVM: x86: hyper-v: Honor HV_ACCESS_FREQUENCY_MSRS
 privilege bit

HV_X64_MSR_TSC_FREQUENCY/HV_X64_MSR_APIC_FREQUENCY are only available to
guest when HV_ACCESS_FREQUENCY_MSRS bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-15-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a168b72334cc4..2a0660b4e779d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1253,6 +1253,10 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_MSR_APIC_ACCESS_AVAILABLE;
 		break;
+	case HV_X64_MSR_TSC_FREQUENCY:
+	case HV_X64_MSR_APIC_FREQUENCY:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_ACCESS_FREQUENCY_MSRS;
 	default:
 		break;
 	}
-- 
GitLab


From 234d01baec5b216b60b560672957470f773ecf78 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:49 +0200
Subject: [PATCH 3191/3804] KVM: x86: hyper-v: Honor HV_ACCESS_REENLIGHTENMENT
 privilege bit

HV_X64_MSR_REENLIGHTENMENT_CONTROL/HV_X64_MSR_TSC_EMULATION_CONTROL/
HV_X64_MSR_TSC_EMULATION_STATUS are only available to guest when
HV_ACCESS_REENLIGHTENMENT bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-16-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 2a0660b4e779d..230f52606e396 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1257,6 +1257,11 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_APIC_FREQUENCY:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_ACCESS_FREQUENCY_MSRS;
+	case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+	case HV_X64_MSR_TSC_EMULATION_CONTROL:
+	case HV_X64_MSR_TSC_EMULATION_STATUS:
+		return hv_vcpu->cpuid_cache.features_eax &
+			HV_ACCESS_REENLIGHTENMENT;
 	default:
 		break;
 	}
-- 
GitLab


From 0a19c8992db834c9c9e28c5633720d994629539d Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:50 +0200
Subject: [PATCH 3192/3804] KVM: x86: hyper-v: Honor
 HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE privilege bit

HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL are only
available to guest when HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE bit is
exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-17-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 230f52606e396..7b7da057b54be 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1262,6 +1262,10 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_TSC_EMULATION_STATUS:
 		return hv_vcpu->cpuid_cache.features_eax &
 			HV_ACCESS_REENLIGHTENMENT;
+	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
+	case HV_X64_MSR_CRASH_CTL:
+		return hv_vcpu->cpuid_cache.features_edx &
+			HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
 	default:
 		break;
 	}
-- 
GitLab


From 17b6d51771a15c7d8552c3e855b5862b3dce0977 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:51 +0200
Subject: [PATCH 3193/3804] KVM: x86: hyper-v: Honor
 HV_FEATURE_DEBUG_MSRS_AVAILABLE privilege bit

Synthetic debugging MSRs (HV_X64_MSR_SYNDBG_CONTROL,
HV_X64_MSR_SYNDBG_STATUS, HV_X64_MSR_SYNDBG_SEND_BUFFER,
HV_X64_MSR_SYNDBG_RECV_BUFFER, HV_X64_MSR_SYNDBG_PENDING_BUFFER,
HV_X64_MSR_SYNDBG_OPTIONS) are only available to guest when
HV_FEATURE_DEBUG_MSRS_AVAILABLE bit is exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-18-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 7b7da057b54be..3bf00a9299dde 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1266,6 +1266,10 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 	case HV_X64_MSR_CRASH_CTL:
 		return hv_vcpu->cpuid_cache.features_edx &
 			HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+	case HV_X64_MSR_SYNDBG_OPTIONS:
+	case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
+		return hv_vcpu->cpuid_cache.features_edx &
+			HV_FEATURE_DEBUG_MSRS_AVAILABLE;
 	default:
 		break;
 	}
-- 
GitLab


From d66bfa36f9edc5ca8c83206ab39d09091623104e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:52 +0200
Subject: [PATCH 3194/3804] KVM: x86: hyper-v: Inverse the default in
 hv_check_msr_access()

Access to all MSRs is now properly checked. To avoid 'forgetting' to
properly check access to new MSRs in the future change the default
to 'false' meaning 'no access'.

No functional change intended.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-19-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 3bf00a9299dde..db735692fc62c 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1274,7 +1274,7 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
 		break;
 	}
 
-	return true;
+	return false;
 }
 
 static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
-- 
GitLab


From 1aa8a4184dbde5f50b70b2c706bcfb6b57da9ea7 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:53 +0200
Subject: [PATCH 3195/3804] KVM: x86: hyper-v: Honor
 HV_STIMER_DIRECT_MODE_AVAILABLE privilege bit

Synthetic timers can only be configured in 'direct' mode when
HV_STIMER_DIRECT_MODE_AVAILABLE bit was exposed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-20-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index db735692fc62c..1c70303118854 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -631,11 +631,17 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
 	union hv_stimer_config new_config = {.as_uint64 = config},
 		old_config = {.as_uint64 = stimer->config.as_uint64};
 	struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 	struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
 
 	if (!synic->active && !host)
 		return 1;
 
+	if (unlikely(!host && hv_vcpu->enforce_cpuid && new_config.direct_mode &&
+		     !(hv_vcpu->cpuid_cache.features_edx &
+		       HV_STIMER_DIRECT_MODE_AVAILABLE)))
+		return 1;
+
 	trace_kvm_hv_stimer_set_config(hv_stimer_to_vcpu(stimer)->vcpu_id,
 				       stimer->index, config, host);
 
-- 
GitLab


From 4ad81a91119df7c0e868f9e4c82b9159645bc906 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:54 +0200
Subject: [PATCH 3196/3804] KVM: x86: hyper-v: Prepare to check access to
 Hyper-V hypercalls

Introduce hv_check_hypercallr_access() to check if the particular hypercall
should be available to guest, this will be used with
KVM_CAP_HYPERV_ENFORCE_CPUID mode.

No functional change intended.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-21-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 1c70303118854..51fc74ea773f3 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2090,6 +2090,11 @@ static void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
 	kvm_fpu_put();
 }
 
+static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
+{
+	return true;
+}
+
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 {
 	struct kvm_hv_hcall hc;
@@ -2132,6 +2137,11 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 	trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx,
 			       hc.ingpa, hc.outgpa);
 
+	if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) {
+		ret = HV_STATUS_ACCESS_DENIED;
+		goto hypercall_complete;
+	}
+
 	switch (hc.code) {
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
 		if (unlikely(hc.rep)) {
@@ -2238,6 +2248,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		break;
 	}
 
+hypercall_complete:
 	return kvm_hv_hypercall_complete(vcpu, ret);
 }
 
-- 
GitLab


From 34ef7d7b9c0422316ee2c34c564b222255c91532 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:55 +0200
Subject: [PATCH 3197/3804] KVM: x86: hyper-v: Check access to
 HVCALL_NOTIFY_LONG_SPIN_WAIT hypercall

TLFS6.0b states that partition issuing HVCALL_NOTIFY_LONG_SPIN_WAIT must
posess 'UseHypercallForLongSpinWait' privilege but there's no
corresponding feature bit. Instead, we have "Recommended number of attempts
to retry a spinlock failure before notifying the hypervisor about the
failures. 0xFFFFFFFF indicates never notify." Use this to check access to
the hypercall. Also, check against zero as the corresponding CPUID must
be set (and '0' attempts before re-try is weird anyway).

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-22-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 51fc74ea773f3..13bfa4e0b93db 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2092,6 +2092,17 @@ static void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
 
 static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
 {
+	if (!hv_vcpu->enforce_cpuid)
+		return true;
+
+	switch (code) {
+	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
+		return hv_vcpu->cpuid_cache.enlightenments_ebx &&
+			hv_vcpu->cpuid_cache.enlightenments_ebx != U32_MAX;
+	default:
+		break;
+	}
+
 	return true;
 }
 
-- 
GitLab


From 4f532b7f969fcba010703fe21e0a85f662373041 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:56 +0200
Subject: [PATCH 3198/3804] KVM: x86: hyper-v: Honor HV_POST_MESSAGES privilege
 bit

Hyper-V partition must possess 'HV_POST_MESSAGES' privilege to issue
HVCALL_POST_MESSAGE hypercalls.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-23-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 13bfa4e0b93db..293619998c385 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2099,6 +2099,8 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
 		return hv_vcpu->cpuid_cache.enlightenments_ebx &&
 			hv_vcpu->cpuid_cache.enlightenments_ebx != U32_MAX;
+	case HVCALL_POST_MESSAGE:
+		return hv_vcpu->cpuid_cache.features_ebx & HV_POST_MESSAGES;
 	default:
 		break;
 	}
-- 
GitLab


From a60b3c594ef3221275d4fa8aa94e206607ea66f3 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:57 +0200
Subject: [PATCH 3199/3804] KVM: x86: hyper-v: Honor HV_SIGNAL_EVENTS privilege
 bit

Hyper-V partition must possess 'HV_SIGNAL_EVENTS' privilege to issue
HVCALL_SIGNAL_EVENT hypercalls.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-24-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 293619998c385..ce057827da03b 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2101,6 +2101,8 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
 			hv_vcpu->cpuid_cache.enlightenments_ebx != U32_MAX;
 	case HVCALL_POST_MESSAGE:
 		return hv_vcpu->cpuid_cache.features_ebx & HV_POST_MESSAGES;
+	case HVCALL_SIGNAL_EVENT:
+		return hv_vcpu->cpuid_cache.features_ebx & HV_SIGNAL_EVENTS;
 	default:
 		break;
 	}
-- 
GitLab


From a921cf83cc4c927f29eef1e7a17bff176c74b886 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:58 +0200
Subject: [PATCH 3200/3804] KVM: x86: hyper-v: Honor HV_DEBUGGING privilege bit

Hyper-V partition must possess 'HV_DEBUGGING' privilege to issue
HVCALL_POST_DEBUG_DATA/HVCALL_RETRIEVE_DEBUG_DATA/
HVCALL_RESET_DEBUG_SESSION hypercalls.

Note, when SynDBG is disabled hv_check_hypercall_access() returns
'true' (like for any other unknown hypercall) so the result will
be HV_STATUS_INVALID_HYPERCALL_CODE and not HV_STATUS_ACCESS_DENIED.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-25-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ce057827da03b..3d6b448ab18f6 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2103,6 +2103,15 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
 		return hv_vcpu->cpuid_cache.features_ebx & HV_POST_MESSAGES;
 	case HVCALL_SIGNAL_EVENT:
 		return hv_vcpu->cpuid_cache.features_ebx & HV_SIGNAL_EVENTS;
+	case HVCALL_POST_DEBUG_DATA:
+	case HVCALL_RETRIEVE_DEBUG_DATA:
+	case HVCALL_RESET_DEBUG_SESSION:
+		/*
+		 * Return 'true' when SynDBG is disabled so the resulting code
+		 * will be HV_STATUS_INVALID_HYPERCALL_CODE.
+		 */
+		return !kvm_hv_is_syndbg_enabled(hv_vcpu->vcpu) ||
+			hv_vcpu->cpuid_cache.features_ebx & HV_DEBUGGING;
 	default:
 		break;
 	}
-- 
GitLab


From bb53ecb4d6ea453e55a971295e55dbf76adc0f8c Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:51:59 +0200
Subject: [PATCH 3201/3804] KVM: x86: hyper-v: Honor
 HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED bit

Hyper-V partition must possess 'HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED'
privilege ('recommended' is rather a misnomer) to issue
HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST/SPACE hypercalls.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-26-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 3d6b448ab18f6..831279976d9f1 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2112,6 +2112,12 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
 		 */
 		return !kvm_hv_is_syndbg_enabled(hv_vcpu->vcpu) ||
 			hv_vcpu->cpuid_cache.features_ebx & HV_DEBUGGING;
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+		return hv_vcpu->cpuid_cache.enlightenments_eax &
+			HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
 	default:
 		break;
 	}
-- 
GitLab


From d264eb3c14d0e5df49ecab3e8b51caadf78abefa Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:52:00 +0200
Subject: [PATCH 3202/3804] KVM: x86: hyper-v: Honor
 HV_X64_CLUSTER_IPI_RECOMMENDED bit

Hyper-V partition must possess 'HV_X64_CLUSTER_IPI_RECOMMENDED'
privilege ('recommended' is rather a misnomer) to issue
HVCALL_SEND_IPI hypercalls.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-27-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 831279976d9f1..eded585620a7a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2118,6 +2118,10 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
 		return hv_vcpu->cpuid_cache.enlightenments_eax &
 			HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+	case HVCALL_SEND_IPI_EX:
+	case HVCALL_SEND_IPI:
+		return hv_vcpu->cpuid_cache.enlightenments_eax &
+			HV_X64_CLUSTER_IPI_RECOMMENDED;
 	default:
 		break;
 	}
-- 
GitLab


From 445caed0213acef29b9d3822b6906f99860ca9ab Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:52:01 +0200
Subject: [PATCH 3203/3804] KVM: x86: hyper-v: Honor
 HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED bit

Hypercalls which use extended processor masks are only available when
HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED privilege bit is exposed (and
'RECOMMENDED' is rather a misnomer).

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-28-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index eded585620a7a..4f911dca7dd67 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2114,11 +2114,19 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
 			hv_vcpu->cpuid_cache.features_ebx & HV_DEBUGGING;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+		if (!(hv_vcpu->cpuid_cache.enlightenments_eax &
+		      HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+			return false;
+		fallthrough;
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
 	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
 		return hv_vcpu->cpuid_cache.enlightenments_eax &
 			HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
 	case HVCALL_SEND_IPI_EX:
+		if (!(hv_vcpu->cpuid_cache.enlightenments_eax &
+		      HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
+			return false;
+		fallthrough;
 	case HVCALL_SEND_IPI:
 		return hv_vcpu->cpuid_cache.enlightenments_eax &
 			HV_X64_CLUSTER_IPI_RECOMMENDED;
-- 
GitLab


From 75a3f4287fdbdca406b5a087cbc67fad313bce7d Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:52:02 +0200
Subject: [PATCH 3204/3804] KVM: selftests: move Hyper-V MSR definitions to
 hyperv.h

These defines can be shared by multiple tests, move them to a dedicated
header.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-29-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/hyperv.h     | 19 +++++++++++++++++++
 .../selftests/kvm/x86_64/hyperv_clock.c       |  8 +-------
 2 files changed, 20 insertions(+), 7 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/include/x86_64/hyperv.h

diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
new file mode 100644
index 0000000000000..443c6572512bd
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/hyperv.h
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_HYPERV_H
+#define SELFTEST_KVM_HYPERV_H
+
+#define HV_X64_MSR_GUEST_OS_ID			0x40000000
+#define HV_X64_MSR_TIME_REF_COUNT		0x40000020
+#define HV_X64_MSR_REFERENCE_TSC		0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY		0x40000022
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
+
+#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index 7f1d2765572c3..489625acc9cfd 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -7,6 +7,7 @@
 #include "test_util.h"
 #include "kvm_util.h"
 #include "processor.h"
+#include "hyperv.h"
 
 struct ms_hyperv_tsc_page {
 	volatile u32 tsc_sequence;
@@ -15,13 +16,6 @@ struct ms_hyperv_tsc_page {
 	volatile s64 tsc_offset;
 } __packed;
 
-#define HV_X64_MSR_GUEST_OS_ID			0x40000000
-#define HV_X64_MSR_TIME_REF_COUNT		0x40000020
-#define HV_X64_MSR_REFERENCE_TSC		0x40000021
-#define HV_X64_MSR_TSC_FREQUENCY		0x40000022
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
-
 /* Simplified mul_u64_u64_shr() */
 static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
 {
-- 
GitLab


From d504df3c913bb91dda41fffaebbb5bfd6d8c4b07 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:52:03 +0200
Subject: [PATCH 3205/3804] KVM: selftests: Move evmcs.h to x86_64/

evmcs.h is x86_64 only thing, move it to x86_64/ subdirectory.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-30-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/{ => x86_64}/evmcs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename tools/testing/selftests/kvm/include/{ => x86_64}/evmcs.h (99%)

diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
similarity index 99%
rename from tools/testing/selftests/kvm/include/evmcs.h
rename to tools/testing/selftests/kvm/include/x86_64/evmcs.h
index a034438b62662..c9af97abd6221 100644
--- a/tools/testing/selftests/kvm/include/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * tools/testing/selftests/kvm/include/vmx.h
+ * tools/testing/selftests/kvm/include/x86_64/evmcs.h
  *
  * Copyright (C) 2018, Red Hat, Inc.
  *
-- 
GitLab


From e2e1cc1fbe54a9520956a4539a3676d2ebf122dd Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 21 May 2021 11:52:04 +0200
Subject: [PATCH 3206/3804] KVM: selftests: Introduce hyperv_features test

The initial implementation of the test only tests that access to Hyper-V
MSRs and hypercalls is in compliance with guest visible CPUID feature bits.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210521095204.2161214-31-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/include/x86_64/hyperv.h     | 166 +++++
 .../selftests/kvm/x86_64/hyperv_features.c    | 649 ++++++++++++++++++
 4 files changed, 817 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_features.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index db51571b8a366..e0e14150744ec 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -13,6 +13,7 @@
 /x86_64/kvm_pv_test
 /x86_64/hyperv_clock
 /x86_64/hyperv_cpuid
+/x86_64/hyperv_features
 /x86_64/mmio_warning_test
 /x86_64/platform_info_test
 /x86_64/set_boot_cpu_id
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 1c750910c27b1..61e2accd080dc 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -44,6 +44,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index 443c6572512bd..412eaee7884ae 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -9,11 +9,177 @@
 #ifndef SELFTEST_KVM_HYPERV_H
 #define SELFTEST_KVM_HYPERV_H
 
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS	0x40000000
+#define HYPERV_CPUID_INTERFACE			0x40000001
+#define HYPERV_CPUID_VERSION			0x40000002
+#define HYPERV_CPUID_FEATURES			0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO		0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS		0x40000005
+#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES	0x40000007
+#define HYPERV_CPUID_NESTED_FEATURES		0x4000000A
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS	0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE			0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES	0x40000082
+
 #define HV_X64_MSR_GUEST_OS_ID			0x40000000
+#define HV_X64_MSR_HYPERCALL			0x40000001
+#define HV_X64_MSR_VP_INDEX			0x40000002
+#define HV_X64_MSR_RESET			0x40000003
+#define HV_X64_MSR_VP_RUNTIME			0x40000010
 #define HV_X64_MSR_TIME_REF_COUNT		0x40000020
 #define HV_X64_MSR_REFERENCE_TSC		0x40000021
 #define HV_X64_MSR_TSC_FREQUENCY		0x40000022
+#define HV_X64_MSR_APIC_FREQUENCY		0x40000023
+#define HV_X64_MSR_EOI				0x40000070
+#define HV_X64_MSR_ICR				0x40000071
+#define HV_X64_MSR_TPR				0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
+#define HV_X64_MSR_SCONTROL			0x40000080
+#define HV_X64_MSR_SVERSION			0x40000081
+#define HV_X64_MSR_SIEFP			0x40000082
+#define HV_X64_MSR_SIMP				0x40000083
+#define HV_X64_MSR_EOM				0x40000084
+#define HV_X64_MSR_SINT0			0x40000090
+#define HV_X64_MSR_SINT1			0x40000091
+#define HV_X64_MSR_SINT2			0x40000092
+#define HV_X64_MSR_SINT3			0x40000093
+#define HV_X64_MSR_SINT4			0x40000094
+#define HV_X64_MSR_SINT5			0x40000095
+#define HV_X64_MSR_SINT6			0x40000096
+#define HV_X64_MSR_SINT7			0x40000097
+#define HV_X64_MSR_SINT8			0x40000098
+#define HV_X64_MSR_SINT9			0x40000099
+#define HV_X64_MSR_SINT10			0x4000009A
+#define HV_X64_MSR_SINT11			0x4000009B
+#define HV_X64_MSR_SINT12			0x4000009C
+#define HV_X64_MSR_SINT13			0x4000009D
+#define HV_X64_MSR_SINT14			0x4000009E
+#define HV_X64_MSR_SINT15			0x4000009F
+#define HV_X64_MSR_STIMER0_CONFIG		0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT		0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG		0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT		0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG		0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT		0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG		0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT		0x400000B7
+#define HV_X64_MSR_GUEST_IDLE			0x400000F0
+#define HV_X64_MSR_CRASH_P0			0x40000100
+#define HV_X64_MSR_CRASH_P1			0x40000101
+#define HV_X64_MSR_CRASH_P2			0x40000102
+#define HV_X64_MSR_CRASH_P3			0x40000103
+#define HV_X64_MSR_CRASH_P4			0x40000104
+#define HV_X64_MSR_CRASH_CTL			0x40000105
 #define HV_X64_MSR_REENLIGHTENMENT_CONTROL	0x40000106
 #define HV_X64_MSR_TSC_EMULATION_CONTROL	0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS		0x40000108
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL	0x40000118
+
+#define HV_X64_MSR_SYNDBG_CONTROL		0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS		0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER		0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER		0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER	0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS		0x400000FF
+
+/* HYPERV_CPUID_FEATURES.EAX */
+#define HV_MSR_VP_RUNTIME_AVAILABLE		BIT(0)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE		BIT(1)
+#define HV_MSR_SYNIC_AVAILABLE			BIT(2)
+#define HV_MSR_SYNTIMER_AVAILABLE		BIT(3)
+#define HV_MSR_APIC_ACCESS_AVAILABLE		BIT(4)
+#define HV_MSR_HYPERCALL_AVAILABLE		BIT(5)
+#define HV_MSR_VP_INDEX_AVAILABLE		BIT(6)
+#define HV_MSR_RESET_AVAILABLE			BIT(7)
+#define HV_MSR_STAT_PAGES_AVAILABLE		BIT(8)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE		BIT(9)
+#define HV_MSR_GUEST_IDLE_AVAILABLE		BIT(10)
+#define HV_ACCESS_FREQUENCY_MSRS		BIT(11)
+#define HV_ACCESS_REENLIGHTENMENT		BIT(13)
+#define HV_ACCESS_TSC_INVARIANT			BIT(15)
+
+/* HYPERV_CPUID_FEATURES.EBX */
+#define HV_CREATE_PARTITIONS			BIT(0)
+#define HV_ACCESS_PARTITION_ID			BIT(1)
+#define HV_ACCESS_MEMORY_POOL			BIT(2)
+#define HV_ADJUST_MESSAGE_BUFFERS		BIT(3)
+#define HV_POST_MESSAGES			BIT(4)
+#define HV_SIGNAL_EVENTS			BIT(5)
+#define HV_CREATE_PORT				BIT(6)
+#define HV_CONNECT_PORT				BIT(7)
+#define HV_ACCESS_STATS				BIT(8)
+#define HV_DEBUGGING				BIT(11)
+#define HV_CPU_MANAGEMENT			BIT(12)
+#define HV_ISOLATION				BIT(22)
+
+/* HYPERV_CPUID_FEATURES.EDX */
+#define HV_X64_MWAIT_AVAILABLE				BIT(0)
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE		BIT(1)
+#define HV_X64_PERF_MONITOR_AVAILABLE			BIT(2)
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE	BIT(3)
+#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE		BIT(4)
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE		BIT(5)
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE		BIT(8)
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE		BIT(10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE			BIT(11)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE			BIT(19)
+
+/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+#define HV_X64_AS_SWITCH_RECOMMENDED			BIT(0)
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED		BIT(1)
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED		BIT(2)
+#define HV_X64_APIC_ACCESS_RECOMMENDED			BIT(3)
+#define HV_X64_SYSTEM_RESET_RECOMMENDED			BIT(4)
+#define HV_X64_RELAXED_TIMING_RECOMMENDED		BIT(5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED			BIT(9)
+#define HV_X64_CLUSTER_IPI_RECOMMENDED			BIT(10)
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED		BIT(11)
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED		BIT(14)
+
+/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING	BIT(1)
+
+/* Hypercalls */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE	0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST	0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT		0x0008
+#define HVCALL_SEND_IPI				0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX	0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX	0x0014
+#define HVCALL_SEND_IPI_EX			0x0015
+#define HVCALL_GET_PARTITION_ID			0x0046
+#define HVCALL_DEPOSIT_MEMORY			0x0048
+#define HVCALL_CREATE_VP			0x004e
+#define HVCALL_GET_VP_REGISTERS			0x0050
+#define HVCALL_SET_VP_REGISTERS			0x0051
+#define HVCALL_POST_MESSAGE			0x005c
+#define HVCALL_SIGNAL_EVENT			0x005d
+#define HVCALL_POST_DEBUG_DATA			0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA		0x006a
+#define HVCALL_RESET_DEBUG_SESSION		0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR		0x0076
+#define HVCALL_MAP_DEVICE_INTERRUPT		0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT		0x007d
+#define HVCALL_RETARGET_INTERRUPT		0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+#define HV_FLUSH_ALL_PROCESSORS			BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES	BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS			0
+#define HV_STATUS_INVALID_HYPERCALL_CODE	2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT	3
+#define HV_STATUS_INVALID_ALIGNMENT		4
+#define HV_STATUS_INVALID_PARAMETER		5
+#define HV_STATUS_ACCESS_DENIED			6
+#define HV_STATUS_OPERATION_DENIED		8
+#define HV_STATUS_INSUFFICIENT_MEMORY		11
+#define HV_STATUS_INVALID_PORT_ID		17
+#define HV_STATUS_INVALID_CONNECTION_ID		18
+#define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
 #endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
new file mode 100644
index 0000000000000..9947ef63dfa1a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+#define VCPU_ID 0
+#define LINUX_OS_ID ((u64)0x8100 << 48)
+
+extern unsigned char rdmsr_start;
+extern unsigned char rdmsr_end;
+
+static u64 do_rdmsr(u32 idx)
+{
+	u32 lo, hi;
+
+	asm volatile("rdmsr_start: rdmsr;"
+		     "rdmsr_end:"
+		     : "=a"(lo), "=c"(hi)
+		     : "c"(idx));
+
+	return (((u64) hi) << 32) | lo;
+}
+
+extern unsigned char wrmsr_start;
+extern unsigned char wrmsr_end;
+
+static void do_wrmsr(u32 idx, u64 val)
+{
+	u32 lo, hi;
+
+	lo = val;
+	hi = val >> 32;
+
+	asm volatile("wrmsr_start: wrmsr;"
+		     "wrmsr_end:"
+		     : : "a"(lo), "c"(idx), "d"(hi));
+}
+
+static int nr_gp;
+
+static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
+			    vm_vaddr_t output_address)
+{
+	u64 hv_status;
+
+	asm volatile("mov %3, %%r8\n"
+		     "vmcall"
+		     : "=a" (hv_status),
+		       "+c" (control), "+d" (input_address)
+		     :  "r" (output_address)
+		     : "cc", "memory", "r8", "r9", "r10", "r11");
+
+	return hv_status;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+	unsigned char *rip = (unsigned char *)regs->rip;
+	bool r, w;
+
+	r = rip == &rdmsr_start;
+	w = rip == &wrmsr_start;
+	GUEST_ASSERT(r || w);
+
+	nr_gp++;
+
+	if (r)
+		regs->rip = (uint64_t)&rdmsr_end;
+	else
+		regs->rip = (uint64_t)&wrmsr_end;
+}
+
+struct msr_data {
+	uint32_t idx;
+	bool available;
+	bool write;
+	u64 write_val;
+};
+
+struct hcall_data {
+	uint64_t control;
+	uint64_t expect;
+};
+
+static void guest_msr(struct msr_data *msr)
+{
+	int i = 0;
+
+	while (msr->idx) {
+		WRITE_ONCE(nr_gp, 0);
+		if (!msr->write)
+			do_rdmsr(msr->idx);
+		else
+			do_wrmsr(msr->idx, msr->write_val);
+
+		if (msr->available)
+			GUEST_ASSERT(READ_ONCE(nr_gp) == 0);
+		else
+			GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+
+		GUEST_SYNC(i++);
+	}
+
+	GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+	int i = 0;
+
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
+	wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+	while (hcall->control) {
+		GUEST_ASSERT(hypercall(hcall->control, pgs_gpa,
+				       pgs_gpa + 4096) == hcall->expect);
+		GUEST_SYNC(i++);
+	}
+
+	GUEST_DONE();
+}
+
+static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid,
+			 struct kvm_cpuid_entry2 *feat,
+			 struct kvm_cpuid_entry2 *recomm,
+			 struct kvm_cpuid_entry2 *dbg)
+{
+	TEST_ASSERT(set_cpuid(cpuid, feat),
+		    "failed to set KVM_CPUID_FEATURES leaf");
+	TEST_ASSERT(set_cpuid(cpuid, recomm),
+		    "failed to set HYPERV_CPUID_ENLIGHTMENT_INFO leaf");
+	TEST_ASSERT(set_cpuid(cpuid, dbg),
+		    "failed to set HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES leaf");
+	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+}
+
+static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
+				   struct kvm_cpuid2 *best)
+{
+	struct kvm_run *run;
+	struct ucall uc;
+	int stage = 0, r;
+	struct kvm_cpuid_entry2 feat = {
+		.function = HYPERV_CPUID_FEATURES
+	};
+	struct kvm_cpuid_entry2 recomm = {
+		.function = HYPERV_CPUID_ENLIGHTMENT_INFO
+	};
+	struct kvm_cpuid_entry2 dbg = {
+		.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
+	};
+	struct kvm_enable_cap cap = {0};
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (true) {
+		switch (stage) {
+		case 0:
+			/*
+			 * Only available when Hyper-V identification is set
+			 */
+			msr->idx = HV_X64_MSR_GUEST_OS_ID;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 1:
+			msr->idx = HV_X64_MSR_HYPERCALL;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 2:
+			feat.eax |= HV_MSR_HYPERCALL_AVAILABLE;
+			/*
+			 * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+			 * HV_X64_MSR_HYPERCALL available.
+			 */
+			msr->idx = HV_X64_MSR_GUEST_OS_ID;
+			msr->write = 1;
+			msr->write_val = LINUX_OS_ID;
+			msr->available = 1;
+			break;
+		case 3:
+			msr->idx = HV_X64_MSR_GUEST_OS_ID;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 4:
+			msr->idx = HV_X64_MSR_HYPERCALL;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+
+		case 5:
+			msr->idx = HV_X64_MSR_VP_RUNTIME;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 6:
+			feat.eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 7:
+			/* Read only */
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 8:
+			msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 9:
+			feat.eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 10:
+			/* Read only */
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 11:
+			msr->idx = HV_X64_MSR_VP_INDEX;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 12:
+			feat.eax |= HV_MSR_VP_INDEX_AVAILABLE;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 13:
+			/* Read only */
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 14:
+			msr->idx = HV_X64_MSR_RESET;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 15:
+			feat.eax |= HV_MSR_RESET_AVAILABLE;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 16:
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+
+		case 17:
+			msr->idx = HV_X64_MSR_REFERENCE_TSC;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 18:
+			feat.eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 19:
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+
+		case 20:
+			msr->idx = HV_X64_MSR_EOM;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 21:
+			/*
+			 * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+			 * capability enabled and guest visible CPUID bit unset.
+			 */
+			cap.cap = KVM_CAP_HYPERV_SYNIC2;
+			vcpu_enable_cap(vm, VCPU_ID, &cap);
+			break;
+		case 22:
+			feat.eax |= HV_MSR_SYNIC_AVAILABLE;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 23:
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+
+		case 24:
+			msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 25:
+			feat.eax |= HV_MSR_SYNTIMER_AVAILABLE;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 26:
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+		case 27:
+			/* Direct mode test */
+			msr->write = 1;
+			msr->write_val = 1 << 12;
+			msr->available = 0;
+			break;
+		case 28:
+			feat.edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+			msr->available = 1;
+			break;
+
+		case 29:
+			msr->idx = HV_X64_MSR_EOI;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 30:
+			feat.eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 1;
+			break;
+
+		case 31:
+			msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 32:
+			feat.eax |= HV_ACCESS_FREQUENCY_MSRS;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 33:
+			/* Read only */
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 34:
+			msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 35:
+			feat.eax |= HV_ACCESS_REENLIGHTENMENT;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 36:
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 1;
+			break;
+		case 37:
+			/* Can only write '0' */
+			msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 0;
+			break;
+
+		case 38:
+			msr->idx = HV_X64_MSR_CRASH_P0;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 39:
+			feat.edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 40:
+			msr->write = 1;
+			msr->write_val = 1;
+			msr->available = 1;
+			break;
+
+		case 41:
+			msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+			msr->write = 0;
+			msr->available = 0;
+			break;
+		case 42:
+			feat.edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
+			dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+			msr->write = 0;
+			msr->available = 1;
+			break;
+		case 43:
+			msr->write = 1;
+			msr->write_val = 0;
+			msr->available = 1;
+			break;
+
+		case 44:
+			/* END */
+			msr->idx = 0;
+			break;
+		}
+
+		hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+
+		if (msr->idx)
+			pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+				 msr->idx, msr->write ? "write" : "read");
+		else
+			pr_debug("Stage %d: finish\n", stage);
+
+		r = _vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    run->exit_reason, exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_SYNC:
+			TEST_ASSERT(uc.args[1] == stage,
+				    "Unexpected stage: %ld (%d expected)\n",
+				    uc.args[1], stage);
+			break;
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+				  __FILE__, uc.args[1]);
+			return;
+		case UCALL_DONE:
+			return;
+		}
+
+		stage++;
+	}
+}
+
+static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall,
+				     void *input, void *output, struct kvm_cpuid2 *best)
+{
+	struct kvm_run *run;
+	struct ucall uc;
+	int stage = 0, r;
+	struct kvm_cpuid_entry2 feat = {
+		.function = HYPERV_CPUID_FEATURES,
+		.eax = HV_MSR_HYPERCALL_AVAILABLE
+	};
+	struct kvm_cpuid_entry2 recomm = {
+		.function = HYPERV_CPUID_ENLIGHTMENT_INFO
+	};
+	struct kvm_cpuid_entry2 dbg = {
+		.function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
+	};
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (true) {
+		switch (stage) {
+		case 0:
+			hcall->control = 0xdeadbeef;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+			break;
+
+		case 1:
+			hcall->control = HVCALL_POST_MESSAGE;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 2:
+			feat.ebx |= HV_POST_MESSAGES;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+
+		case 3:
+			hcall->control = HVCALL_SIGNAL_EVENT;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 4:
+			feat.ebx |= HV_SIGNAL_EVENTS;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+
+		case 5:
+			hcall->control = HVCALL_RESET_DEBUG_SESSION;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+			break;
+		case 6:
+			dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 7:
+			feat.ebx |= HV_DEBUGGING;
+			hcall->expect = HV_STATUS_OPERATION_DENIED;
+			break;
+
+		case 8:
+			hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 9:
+			recomm.eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+		case 10:
+			hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 11:
+			recomm.eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+
+		case 12:
+			hcall->control = HVCALL_SEND_IPI;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 13:
+			recomm.eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
+			hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+			break;
+		case 14:
+			/* Nothing in 'sparse banks' -> success */
+			hcall->control = HVCALL_SEND_IPI_EX;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+
+		case 15:
+			hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+			hcall->expect = HV_STATUS_ACCESS_DENIED;
+			break;
+		case 16:
+			recomm.ebx = 0xfff;
+			hcall->expect = HV_STATUS_SUCCESS;
+			break;
+
+		case 17:
+			/* END */
+			hcall->control = 0;
+			break;
+		}
+
+		hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+
+		if (hcall->control)
+			pr_debug("Stage %d: testing hcall: 0x%lx\n", stage,
+				 hcall->control);
+		else
+			pr_debug("Stage %d: finish\n", stage);
+
+		r = _vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    run->exit_reason, exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_SYNC:
+			TEST_ASSERT(uc.args[1] == stage,
+				    "Unexpected stage: %ld (%d expected)\n",
+				    uc.args[1], stage);
+			break;
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+				  __FILE__, uc.args[1]);
+			return;
+		case UCALL_DONE:
+			return;
+		}
+
+		stage++;
+	}
+}
+
+int main(void)
+{
+	struct kvm_cpuid2 *best;
+	struct kvm_vm *vm;
+	vm_vaddr_t msr_gva, hcall_page, hcall_params;
+	struct kvm_enable_cap cap = {
+		.cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+		.args = {1}
+	};
+
+	/* Test MSRs */
+	vm = vm_create_default(VCPU_ID, 0, guest_msr);
+
+	msr_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+	vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
+	vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+	vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+	best = kvm_get_supported_hv_cpuid();
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+	pr_info("Testing access to Hyper-V specific MSRs\n");
+	guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
+			       best);
+	kvm_vm_free(vm);
+
+	/* Test hypercalls */
+	vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+
+	/* Hypercall input/output */
+	hcall_page = vm_vaddr_alloc(vm, 2 * getpagesize(), 0x10000, 0, 0);
+	memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+	hcall_params = vm_vaddr_alloc(vm, getpagesize(), 0x20000, 0, 0);
+	memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+
+	vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+	vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+	vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+	best = kvm_get_supported_hv_cpuid();
+
+	pr_info("Testing access to Hyper-V hypercalls\n");
+	guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params),
+				 addr_gva2hva(vm, hcall_page),
+				 addr_gva2hva(vm, hcall_page) + getpagesize(),
+				 best);
+
+	kvm_vm_free(vm);
+}
-- 
GitLab


From bcb72d0627e8a3e531021c9bd2a14fae8da63ef3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 7 Jun 2021 12:01:56 +0300
Subject: [PATCH 3207/3804] KVM: nVMX: Drop obsolete (and pointless)
 pdptrs_changed() check

Remove the pdptrs_changed() check when loading L2's CR3.  The set of
available registers is always reset when switching VMCSes (see commit
e5d03de5937e, "KVM: nVMX: Reset register cache (available and dirty
masks) on VMCS switch"), thus the "are PDPTRs available" check will
always fail.  And even if it didn't fail, reading guest memory to check
the PDPTRs is just as expensive as reading guest memory to load 'em.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210607090203.133058-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e77b8ee28df87..47f4aa6097788 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1118,11 +1118,9 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 	 * must not be dereferenced.
 	 */
 	if (!nested_ept && is_pae_paging(vcpu) &&
-	    (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
-		if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
-			*entry_failure_code = ENTRY_FAIL_PDPTE;
-			return -EINVAL;
-		}
+	    CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
+		*entry_failure_code = ENTRY_FAIL_PDPTE;
+		return -EINVAL;
 	}
 
 	/*
-- 
GitLab


From a36dbec67e26febc1fc551f4819e3c058b25e79c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 7 Jun 2021 12:01:57 +0300
Subject: [PATCH 3208/3804] KVM: nSVM: Drop pointless pdptrs_changed() check on
 nested transition

Remove the "PDPTRs unchanged" check to skip PDPTR loading during nested
SVM transitions as it's not at all an optimization.  Reading guest memory
to get the PDPTRs isn't magically cheaper by doing it in pdptrs_changed(),
and if the PDPTRs did change, KVM will end up doing the read twice.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210607090203.133058-3-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 34fc74b0d58a3..f0a7f8432527e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -391,10 +391,8 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 		return -EINVAL;
 
 	if (!nested_npt && is_pae_paging(vcpu) &&
-	    (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
-		if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
-			return -EINVAL;
-	}
+	    CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
+		return -EINVAL;
 
 	/*
 	 * TODO: optimize unconditional TLB flush/MMU sync here and in
-- 
GitLab


From c7313155bf11906ad75ae0edc4a97bf93d69c275 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 7 Jun 2021 12:01:58 +0300
Subject: [PATCH 3209/3804] KVM: x86: Always load PDPTRs on CR3 load for SVM
 w/o NPT and a PAE guest

Kill off pdptrs_changed() and instead go through the full kvm_set_cr3()
for PAE guest, even if the new CR3 is the same as the current CR3.  For
VMX, and SVM with NPT enabled, the PDPTRs are unconditionally marked as
unavailable after VM-Exit, i.e. the optimization is dead code except for
SVM without NPT.

In the unlikely scenario that anyone cares about SVM without NPT _and_ a
PAE guest, they've got bigger problems if their guest is loading the same
CR3 so frequently that the performance of kvm_set_cr3() is notable,
especially since KVM's fast PGD switching means reloading the same CR3
does not require a full rebuild.  Given that PAE and PCID are mutually
exclusive, i.e. a sync and flush are guaranteed in any case, the actual
benefits of the pdptrs_changed() optimization are marginal at best.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210607090203.133058-4-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/x86.c              | 34 ++-------------------------------
 2 files changed, 2 insertions(+), 33 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 95b254b5a5230..601e00876b388 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1506,7 +1506,6 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
 
 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
-bool pdptrs_changed(struct kvm_vcpu *vcpu);
 
 int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 475376a974191..188c180d9f6ee 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -783,13 +783,6 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 }
 EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu);
 
-static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
-			       void *data, int offset, int len, u32 access)
-{
-	return kvm_read_guest_page_mmu(vcpu, vcpu->arch.walk_mmu, gfn,
-				       data, offset, len, access);
-}
-
 static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.reserved_gpa_bits | rsvd_bits(5, 8) | rsvd_bits(1, 2);
@@ -831,30 +824,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(load_pdptrs);
 
-bool pdptrs_changed(struct kvm_vcpu *vcpu)
-{
-	u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
-	int offset;
-	gfn_t gfn;
-	int r;
-
-	if (!is_pae_paging(vcpu))
-		return false;
-
-	if (!kvm_register_is_available(vcpu, VCPU_EXREG_PDPTR))
-		return true;
-
-	gfn = (kvm_read_cr3(vcpu) & 0xffffffe0ul) >> PAGE_SHIFT;
-	offset = (kvm_read_cr3(vcpu) & 0xffffffe0ul) & (PAGE_SIZE - 1);
-	r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte),
-				       PFERR_USER_MASK | PFERR_WRITE_MASK);
-	if (r < 0)
-		return true;
-
-	return memcmp(pdpte, vcpu->arch.walk_mmu->pdptrs, sizeof(pdpte)) != 0;
-}
-EXPORT_SYMBOL_GPL(pdptrs_changed);
-
 void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
 {
 	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
@@ -1101,7 +1070,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 	}
 #endif
 
-	if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
+	/* PDPTRs are always reloaded for PAE paging. */
+	if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu)) {
 		if (!skip_tlb_flush) {
 			kvm_mmu_sync_roots(vcpu);
 			kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-- 
GitLab


From b222b0b88162bdef4eceb12a79d5edbbdb23dbfd Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 7 Jun 2021 12:01:59 +0300
Subject: [PATCH 3210/3804] KVM: nSVM: refactor the CR3 reload on migration

Document the actual reason why we need to do it
on migration and move the call to svm_set_nested_state
to be closer to VMX code.

To avoid loading the PDPTRs from possibly not up to date memory map,
in nested_svm_load_cr3 after the move, move this code to
.get_nested_state_pages.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210607090203.133058-5-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index f0a7f8432527e..e917eba659b3c 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -385,12 +385,12 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
  * if we are emulating VM-Entry into a guest with NPT enabled.
  */
 static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
-			       bool nested_npt)
+			       bool nested_npt, bool reload_pdptrs)
 {
 	if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
 		return -EINVAL;
 
-	if (!nested_npt && is_pae_paging(vcpu) &&
+	if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
 	    CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
 		return -EINVAL;
 
@@ -574,7 +574,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
 	nested_vmcb02_prepare_save(svm, vmcb12);
 
 	ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
-				  nested_npt_enabled(svm));
+				  nested_npt_enabled(svm), true);
 	if (ret)
 		return ret;
 
@@ -801,7 +801,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 
 	nested_svm_uninit_mmu_context(vcpu);
 
-	rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false);
+	rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false, true);
 	if (rc)
 		return 1;
 
@@ -1297,6 +1297,19 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 	    !nested_vmcb_valid_sregs(vcpu, save))
 		goto out_free;
 
+	/*
+	 * While the nested guest CR3 is already checked and set by
+	 * KVM_SET_SREGS, it was set when nested state was yet loaded,
+	 * thus MMU might not be initialized correctly.
+	 * Set it again to fix this.
+	 */
+
+	ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
+				  nested_npt_enabled(svm), false);
+	if (WARN_ON_ONCE(ret))
+		goto out_free;
+
+
 	/*
 	 * All checks done, we can enter guest mode. Userspace provides
 	 * vmcb12.control, which will be combined with L1 and stored into
@@ -1354,9 +1367,14 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
 	if (WARN_ON(!is_guest_mode(vcpu)))
 		return true;
 
-	if (nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
-				nested_npt_enabled(svm)))
-		return false;
+	if (!nested_npt_enabled(svm) && is_pae_paging(vcpu))
+		/*
+		 * Reload the guest's PDPTRs since after a migration
+		 * the guest CR3 might be restored prior to setting the nested
+		 * state which can lead to a load of wrong PDPTRs.
+		 */
+		if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
+			return false;
 
 	if (!nested_svm_vmrun_msrpm(svm)) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-- 
GitLab


From 0f85722341b0e3a67d0f2d2ae943b9193cb3e1b0 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 7 Jun 2021 12:02:00 +0300
Subject: [PATCH 3211/3804] KVM: nVMX: delay loading of PDPTRs to
 KVM_REQ_GET_NESTED_STATE_PAGES

Similar to the rest of guest page accesses after a migration,
this access should be delayed to KVM_REQ_GET_NESTED_STATE_PAGES.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210607090203.133058-6-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 47f4aa6097788..ac306678afccd 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1105,7 +1105,8 @@ static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu)
  * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
  * @entry_failure_code.
  */
-static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
+static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
+			       bool nested_ept, bool reload_pdptrs,
 			       enum vm_entry_failure_code *entry_failure_code)
 {
 	if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
@@ -1117,7 +1118,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
 	 * If PAE paging and EPT are both on, CR3 is not used by the CPU and
 	 * must not be dereferenced.
 	 */
-	if (!nested_ept && is_pae_paging(vcpu) &&
+	if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
 	    CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
 		*entry_failure_code = ENTRY_FAIL_PDPTE;
 		return -EINVAL;
@@ -2487,6 +2488,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
  * is assigned to entry_failure_code on failure.
  */
 static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+			  bool from_vmentry,
 			  enum vm_entry_failure_code *entry_failure_code)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2579,7 +2581,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
 	/* Shadow page tables on either EPT or shadow page tables. */
 	if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
-				entry_failure_code))
+				from_vmentry, entry_failure_code))
 		return -EINVAL;
 
 	/*
@@ -3120,6 +3122,17 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 	struct page *page;
 	u64 hpa;
 
+	if (!nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
+		/*
+		 * Reload the guest's PDPTRs since after a migration
+		 * the guest CR3 might be restored prior to setting the nested
+		 * state which can lead to a load of wrong PDPTRs.
+		 */
+		if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
+			return false;
+	}
+
+
 	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
 		/*
 		 * Translate L1 physical address to host physical
@@ -3371,7 +3384,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
 
 	enter_guest_mode(vcpu);
 
-	if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) {
+	if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &entry_failure_code)) {
 		exit_reason.basic = EXIT_REASON_INVALID_STATE;
 		vmcs12->exit_qualification = entry_failure_code;
 		goto vmentry_fail_vmexit_guest_mode;
@@ -4226,7 +4239,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	 * Only PDPTE load can fail as the value of cr3 was checked on entry and
 	 * couldn't have changed.
 	 */
-	if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored))
+	if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored))
 		nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
 
 	nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
-- 
GitLab


From 329675dde93c6f30009dc413197bdf2b971f1e5e Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 7 Jun 2021 12:02:01 +0300
Subject: [PATCH 3212/3804] KVM: x86: introduce kvm_register_clear_available

Small refactoring that will be used in the next patch.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210607090203.133058-7-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/kvm_cache_regs.h | 7 +++++++
 arch/x86/kvm/svm/svm.c        | 6 ++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index ebddbd37a0bf5..296d67f689ef3 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -55,6 +55,13 @@ static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
 	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
 }
 
+static inline void kvm_register_clear_available(struct kvm_vcpu *vcpu,
+					       enum kvm_reg reg)
+{
+	__clear_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+	__clear_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
 static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
 					   enum kvm_reg reg)
 {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9bb4692728ef2..b6afa6b63c8fa 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3873,10 +3873,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
 		vcpu->arch.apf.host_apf_flags =
 			kvm_read_and_reset_apf_flags();
 
-	if (npt_enabled) {
-		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
-		vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
-	}
+	if (npt_enabled)
+		kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR);
 
 	/*
 	 * We need to handle MC intercepts here before the vcpu has a chance to
-- 
GitLab


From 6dba940352038b56db9b591b172fb2ec76a5fd5e Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 7 Jun 2021 12:02:02 +0300
Subject: [PATCH 3213/3804] KVM: x86: Introduce KVM_GET_SREGS2 / KVM_SET_SREGS2

This is a new version of KVM_GET_SREGS / KVM_SET_SREGS.

It has the following changes:
   * Has flags for future extensions
   * Has vcpu's PDPTRs, allowing to save/restore them on migration.
   * Lacks obsolete interrupt bitmap (done now via KVM_SET_VCPU_EVENTS)

New capability, KVM_CAP_SREGS2 is added to signal
the userspace of this ioctl.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210607090203.133058-8-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst  |  48 +++++++++++
 arch/x86/include/uapi/asm/kvm.h |  13 +++
 arch/x86/kvm/kvm_cache_regs.h   |   5 ++
 arch/x86/kvm/x86.c              | 142 ++++++++++++++++++++++++++------
 include/uapi/linux/kvm.h        |   4 +
 5 files changed, 185 insertions(+), 27 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 80154d5d98a18..cded99561adf4 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5034,6 +5034,54 @@ see KVM_XEN_VCPU_SET_ATTR above.
 The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
 with the KVM_XEN_VCPU_GET_ATTR ioctl.
 
+
+4.131 KVM_GET_SREGS2
+------------------
+
+:Capability: KVM_CAP_SREGS2
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_sregs2 (out)
+:Returns: 0 on success, -1 on error
+
+Reads special registers from the vcpu.
+This ioctl (when supported) replaces the KVM_GET_SREGS.
+
+::
+
+struct kvm_sregs2 {
+	/* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+	struct kvm_segment cs, ds, es, fs, gs, ss;
+	struct kvm_segment tr, ldt;
+	struct kvm_dtable gdt, idt;
+	__u64 cr0, cr2, cr3, cr4, cr8;
+	__u64 efer;
+	__u64 apic_base;
+	__u64 flags;
+	__u64 pdptrs[4];
+};
+
+flags values for ``kvm_sregs2``:
+
+``KVM_SREGS2_FLAGS_PDPTRS_VALID``
+
+  Indicates thats the struct contain valid PDPTR values.
+
+
+4.132 KVM_SET_SREGS2
+------------------
+
+:Capability: KVM_CAP_SREGS2
+:Architectures: x86
+:Type: vcpu ioctl
+:Parameters: struct kvm_sregs2 (in)
+:Returns: 0 on success, -1 on error
+
+Writes special registers into the vcpu.
+See KVM_GET_SREGS2 for the data structures.
+This ioctl (when supported) replaces the KVM_SET_SREGS.
+
+
 5. The kvm_run structure
 ========================
 
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 0662f644aad9d..a6c327f8ad9e5 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -159,6 +159,19 @@ struct kvm_sregs {
 	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
 };
 
+struct kvm_sregs2 {
+	/* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+	struct kvm_segment cs, ds, es, fs, gs, ss;
+	struct kvm_segment tr, ldt;
+	struct kvm_dtable gdt, idt;
+	__u64 cr0, cr2, cr3, cr4, cr8;
+	__u64 efer;
+	__u64 apic_base;
+	__u64 flags;
+	__u64 pdptrs[4];
+};
+#define KVM_SREGS2_FLAGS_PDPTRS_VALID 1
+
 /* for KVM_GET_FPU and KVM_SET_FPU */
 struct kvm_fpu {
 	__u8  fpr[8][16];
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 296d67f689ef3..90e1ffdc05b75 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -125,6 +125,11 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
 	return vcpu->arch.walk_mmu->pdptrs[index];
 }
 
+static inline void kvm_pdptr_write(struct kvm_vcpu *vcpu, int index, u64 value)
+{
+	vcpu->arch.walk_mmu->pdptrs[index] = value;
+}
+
 static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
 {
 	ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 188c180d9f6ee..8085ab830c804 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -114,6 +114,9 @@ static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 static void store_regs(struct kvm_vcpu *vcpu);
 static int sync_regs(struct kvm_vcpu *vcpu);
 
+static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
+static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
+
 struct kvm_x86_ops kvm_x86_ops __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
 
@@ -817,7 +820,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 
 	memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
 	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
-
 out:
 
 	return ret;
@@ -3956,6 +3958,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_SGX_ATTRIBUTE:
 #endif
 	case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
+	case KVM_CAP_SREGS2:
 		r = 1;
 		break;
 	case KVM_CAP_SET_GUEST_DEBUG2:
@@ -4870,6 +4873,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	void __user *argp = (void __user *)arg;
 	int r;
 	union {
+		struct kvm_sregs2 *sregs2;
 		struct kvm_lapic_state *lapic;
 		struct kvm_xsave *xsave;
 		struct kvm_xcrs *xcrs;
@@ -5242,6 +5246,28 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		break;
 	}
 #endif
+	case KVM_GET_SREGS2: {
+		u.sregs2 = kzalloc(sizeof(struct kvm_sregs2), GFP_KERNEL);
+		r = -ENOMEM;
+		if (!u.sregs2)
+			goto out;
+		__get_sregs2(vcpu, u.sregs2);
+		r = -EFAULT;
+		if (copy_to_user(argp, u.sregs2, sizeof(struct kvm_sregs2)))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_SET_SREGS2: {
+		u.sregs2 = memdup_user(argp, sizeof(struct kvm_sregs2));
+		if (IS_ERR(u.sregs2)) {
+			r = PTR_ERR(u.sregs2);
+			u.sregs2 = NULL;
+			goto out;
+		}
+		r = __set_sregs2(vcpu, u.sregs2);
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -9937,7 +9963,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
 
-static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static void __get_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
 	struct desc_ptr dt;
 
@@ -9970,14 +9996,36 @@ skip_protected_regs:
 	sregs->cr8 = kvm_get_cr8(vcpu);
 	sregs->efer = vcpu->arch.efer;
 	sregs->apic_base = kvm_get_apic_base(vcpu);
+}
 
-	memset(sregs->interrupt_bitmap, 0, sizeof(sregs->interrupt_bitmap));
+static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	__get_sregs_common(vcpu, sregs);
+
+	if (vcpu->arch.guest_state_protected)
+		return;
 
 	if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
 		set_bit(vcpu->arch.interrupt.nr,
 			(unsigned long *)sregs->interrupt_bitmap);
 }
 
+static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
+{
+	int i;
+
+	__get_sregs_common(vcpu, (struct kvm_sregs *)sregs2);
+
+	if (vcpu->arch.guest_state_protected)
+		return;
+
+	if (is_pae_paging(vcpu)) {
+		for (i = 0 ; i < 4 ; i++)
+			sregs2->pdptrs[i] = kvm_pdptr_read(vcpu, i);
+		sregs2->flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID;
+	}
+}
+
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
@@ -10096,24 +10144,23 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	return kvm_is_valid_cr4(vcpu, sregs->cr4);
 }
 
-static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
+		int *mmu_reset_needed, bool update_pdptrs)
 {
 	struct msr_data apic_base_msr;
-	int mmu_reset_needed = 0;
-	int pending_vec, max_bits, idx;
+	int idx;
 	struct desc_ptr dt;
-	int ret = -EINVAL;
 
 	if (!kvm_is_valid_sregs(vcpu, sregs))
-		goto out;
+		return -EINVAL;
 
 	apic_base_msr.data = sregs->apic_base;
 	apic_base_msr.host_initiated = true;
 	if (kvm_set_apic_base(vcpu, &apic_base_msr))
-		goto out;
+		return -EINVAL;
 
 	if (vcpu->arch.guest_state_protected)
-		goto skip_protected_regs;
+		return 0;
 
 	dt.size = sregs->idt.limit;
 	dt.address = sregs->idt.base;
@@ -10123,31 +10170,30 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	static_call(kvm_x86_set_gdt)(vcpu, &dt);
 
 	vcpu->arch.cr2 = sregs->cr2;
-	mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
+	*mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
 	vcpu->arch.cr3 = sregs->cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
 	kvm_set_cr8(vcpu, sregs->cr8);
 
-	mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
+	*mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
 	static_call(kvm_x86_set_efer)(vcpu, sregs->efer);
 
-	mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
+	*mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
 	static_call(kvm_x86_set_cr0)(vcpu, sregs->cr0);
 	vcpu->arch.cr0 = sregs->cr0;
 
-	mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
+	*mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
 	static_call(kvm_x86_set_cr4)(vcpu, sregs->cr4);
 
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-	if (is_pae_paging(vcpu)) {
-		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
-		mmu_reset_needed = 1;
+	if (update_pdptrs) {
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		if (is_pae_paging(vcpu)) {
+			load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
+			*mmu_reset_needed = 1;
+		}
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	}
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-
-	if (mmu_reset_needed)
-		kvm_mmu_reset_context(vcpu);
 
 	kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
 	kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
@@ -10167,20 +10213,62 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	    !is_protmode(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
-skip_protected_regs:
+	return 0;
+}
+
+static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+	int pending_vec, max_bits;
+	int mmu_reset_needed = 0;
+	int ret = __set_sregs_common(vcpu, sregs, &mmu_reset_needed, true);
+
+	if (ret)
+		return ret;
+
+	if (mmu_reset_needed)
+		kvm_mmu_reset_context(vcpu);
+
 	max_bits = KVM_NR_INTERRUPTS;
 	pending_vec = find_first_bit(
 		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
+
 	if (pending_vec < max_bits) {
 		kvm_queue_interrupt(vcpu, pending_vec, false);
 		pr_debug("Set back pending irq %d\n", pending_vec);
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
 	}
+	return 0;
+}
 
-	kvm_make_request(KVM_REQ_EVENT, vcpu);
+static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
+{
+	int mmu_reset_needed = 0;
+	bool valid_pdptrs = sregs2->flags & KVM_SREGS2_FLAGS_PDPTRS_VALID;
+	bool pae = (sregs2->cr0 & X86_CR0_PG) && (sregs2->cr4 & X86_CR4_PAE) &&
+		!(sregs2->efer & EFER_LMA);
+	int i, ret;
 
-	ret = 0;
-out:
-	return ret;
+	if (sregs2->flags & ~KVM_SREGS2_FLAGS_PDPTRS_VALID)
+		return -EINVAL;
+
+	if (valid_pdptrs && (!pae || vcpu->arch.guest_state_protected))
+		return -EINVAL;
+
+	ret = __set_sregs_common(vcpu, (struct kvm_sregs *)sregs2,
+				 &mmu_reset_needed, !valid_pdptrs);
+	if (ret)
+		return ret;
+
+	if (valid_pdptrs) {
+		for (i = 0; i < 4 ; i++)
+			kvm_pdptr_write(vcpu, i, sregs2->pdptrs[i]);
+
+		kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+		mmu_reset_needed = 1;
+	}
+	if (mmu_reset_needed)
+		kvm_mmu_reset_context(vcpu);
+	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 7928161440925..90d44138dbfbf 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1084,6 +1084,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
 #define KVM_CAP_PTP_KVM 198
 #define KVM_CAP_HYPERV_ENFORCE_CPUID 199
+#define KVM_CAP_SREGS2 200
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1622,6 +1623,9 @@ struct kvm_xen_hvm_attr {
 #define KVM_XEN_VCPU_GET_ATTR	_IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
 #define KVM_XEN_VCPU_SET_ATTR	_IOW(KVMIO,  0xcb, struct kvm_xen_vcpu_attr)
 
+#define KVM_GET_SREGS2             _IOR(KVMIO,  0xcc, struct kvm_sregs2)
+#define KVM_SET_SREGS2             _IOW(KVMIO,  0xcd, struct kvm_sregs2)
+
 struct kvm_xen_vcpu_attr {
 	__u16 type;
 	__u16 pad[3];
-- 
GitLab


From 158a48ecf776d0ebc916befcb0dc0862f136a31f Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 7 Jun 2021 12:02:03 +0300
Subject: [PATCH 3214/3804] KVM: x86: avoid loading PDPTRs after migration when
 possible

if new KVM_*_SREGS2 ioctls are used, the PDPTRs are
a part of the migration state and are correctly
restored by those ioctls.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210607090203.133058-9-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 6 ++++++
 arch/x86/kvm/svm/nested.c       | 3 ++-
 arch/x86/kvm/vmx/nested.c       | 3 ++-
 arch/x86/kvm/x86.c              | 3 +++
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 601e00876b388..383106901fe2a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -862,6 +862,12 @@ struct kvm_vcpu_arch {
 	/* Protected Guests */
 	bool guest_state_protected;
 
+	/*
+	 * Set when PDPTS were loaded directly by the userspace without
+	 * reading the guest memory
+	 */
+	bool pdptrs_from_userspace;
+
 #if IS_ENABLED(CONFIG_HYPERV)
 	hpa_t hv_root_tdp;
 #endif
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index e917eba659b3c..c902ace2bd179 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1367,7 +1367,8 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
 	if (WARN_ON(!is_guest_mode(vcpu)))
 		return true;
 
-	if (!nested_npt_enabled(svm) && is_pae_paging(vcpu))
+	if (!vcpu->arch.pdptrs_from_userspace &&
+	    !nested_npt_enabled(svm) && is_pae_paging(vcpu))
 		/*
 		 * Reload the guest's PDPTRs since after a migration
 		 * the guest CR3 might be restored prior to setting the nested
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ac306678afccd..1a2f000a5daee 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3122,7 +3122,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 	struct page *page;
 	u64 hpa;
 
-	if (!nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
+	if (!vcpu->arch.pdptrs_from_userspace &&
+	    !nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
 		/*
 		 * Reload the guest's PDPTRs since after a migration
 		 * the guest CR3 might be restored prior to setting the nested
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8085ab830c804..1727178b89610 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -820,6 +820,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
 
 	memcpy(mmu->pdptrs, pdpte, sizeof(mmu->pdptrs));
 	kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
+	vcpu->arch.pdptrs_from_userspace = false;
+
 out:
 
 	return ret;
@@ -10265,6 +10267,7 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2)
 
 		kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 		mmu_reset_needed = 1;
+		vcpu->arch.pdptrs_from_userspace = true;
 	}
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
-- 
GitLab


From 1e9dfbd748f37dfa51fcdc82a7afddde1cf8d0ed Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:16 +0200
Subject: [PATCH 3215/3804] KVM: nVMX: Use '-1' in 'hv_evmcs_vmptr' to indicate
 that eVMCS is not in use

Instead of checking 'vmx->nested.hv_evmcs' use '-1' in
'vmx->nested.hv_evmcs_vmptr' to indicate 'evmcs is not in use' state. This
matches how we check 'vmx->nested.current_vmptr'. Introduce EVMPTR_INVALID
and evmptr_is_valid() and use it instead of raw '-1' check as a preparation
to adding other 'special' values.

No functional change intended.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/evmcs.c  |  3 +++
 arch/x86/kvm/vmx/evmcs.h  |  7 +++++
 arch/x86/kvm/vmx/nested.c | 55 ++++++++++++++++++++-------------------
 arch/x86/kvm/vmx/nested.h |  2 +-
 arch/x86/kvm/vmx/vmx.c    |  1 +
 5 files changed, 40 insertions(+), 28 deletions(-)

diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c
index 41f24661af04d..896b2a50b4aae 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/evmcs.c
@@ -319,6 +319,9 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
 	if (unlikely(!assist_page.enlighten_vmentry))
 		return false;
 
+	if (unlikely(!evmptr_is_valid(assist_page.current_nested_vmcs)))
+		return false;
+
 	*evmcs_gpa = assist_page.current_nested_vmcs;
 
 	return true;
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index bd41d9462355f..47f802f71f6a9 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -197,6 +197,13 @@ static inline void evmcs_load(u64 phys_addr) {}
 static inline void evmcs_touch_msr_bitmap(void) {}
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
+#define EVMPTR_INVALID (-1ULL)
+
+static inline bool evmptr_is_valid(u64 evmptr)
+{
+	return evmptr != EVMPTR_INVALID;
+}
+
 enum nested_evmptrld_status {
 	EVMPTRLD_DISABLED,
 	EVMPTRLD_SUCCEEDED,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1a2f000a5daee..84d9a8d569bb7 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -187,7 +187,8 @@ static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
 	 * failValid writes the error number to the current VMCS, which
 	 * can't be done if there isn't a current VMCS.
 	 */
-	if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs)
+	if (vmx->nested.current_vmptr == -1ull &&
+	    !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
 		return nested_vmx_failInvalid(vcpu);
 
 	return nested_vmx_failValid(vcpu, vm_instruction_error);
@@ -221,12 +222,12 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (!vmx->nested.hv_evmcs)
-		return;
+	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+		kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
+		vmx->nested.hv_evmcs = NULL;
+	}
 
-	kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
-	vmx->nested.hv_evmcs_vmptr = 0;
-	vmx->nested.hv_evmcs = NULL;
+	vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
 }
 
 static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
@@ -1981,10 +1982,8 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
 	if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
 		return EVMPTRLD_DISABLED;
 
-	if (unlikely(!vmx->nested.hv_evmcs ||
-		     evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
-		if (!vmx->nested.hv_evmcs)
-			vmx->nested.current_vmptr = -1ull;
+	if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
+		vmx->nested.current_vmptr = -1ull;
 
 		nested_release_evmcs(vcpu);
 
@@ -2055,7 +2054,7 @@ void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (vmx->nested.hv_evmcs) {
+	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
 		copy_vmcs12_to_enlightened(vmx);
 		/* All fields are clean */
 		vmx->nested.hv_evmcs->hv_clean_fields |=
@@ -2207,7 +2206,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 	u32 exec_control;
 	u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
 
-	if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
+	if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
 		prepare_vmcs02_early_rare(vmx, vmcs12);
 
 	/*
@@ -2492,15 +2491,14 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 			  enum vm_entry_failure_code *entry_failure_code)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
 	bool load_guest_pdptrs_vmcs12 = false;
 
-	if (vmx->nested.dirty_vmcs12 || hv_evmcs) {
+	if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
 		prepare_vmcs02_rare(vmx, vmcs12);
 		vmx->nested.dirty_vmcs12 = false;
 
-		load_guest_pdptrs_vmcs12 = !hv_evmcs ||
-			!(hv_evmcs->hv_clean_fields &
+		load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) ||
+			!(vmx->nested.hv_evmcs->hv_clean_fields &
 			  HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
 	}
 
@@ -3102,7 +3100,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
 	 * L2 was running), map it here to make sure vmcs12 changes are
 	 * properly reflected.
 	 */
-	if (vmx->nested.enlightened_vmcs_enabled && !vmx->nested.hv_evmcs) {
+	if (vmx->nested.enlightened_vmcs_enabled &&
+	    !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
 		enum nested_evmptrld_status evmptrld_status =
 			nested_vmx_handle_enlightened_vmptrld(vcpu, false);
 
@@ -3465,7 +3464,7 @@ vmentry_fail_vmexit:
 
 	load_vmcs12_host_state(vcpu, vmcs12);
 	vmcs12->vm_exit_reason = exit_reason.full;
-	if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
+	if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
 		vmx->nested.need_vmcs12_to_shadow_sync = true;
 	return NVMX_VMENTRY_VMEXIT;
 }
@@ -3493,7 +3492,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return nested_vmx_failInvalid(vcpu);
 	}
 
-	if (CC(!vmx->nested.hv_evmcs && vmx->nested.current_vmptr == -1ull))
+	if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
+	       vmx->nested.current_vmptr == -1ull))
 		return nested_vmx_failInvalid(vcpu);
 
 	vmcs12 = get_vmcs12(vcpu);
@@ -3507,7 +3507,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	if (CC(vmcs12->hdr.shadow_vmcs))
 		return nested_vmx_failInvalid(vcpu);
 
-	if (vmx->nested.hv_evmcs) {
+	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
 		copy_enlightened_to_vmcs12(vmx);
 		/* Enlightened VMCS doesn't have launch state */
 		vmcs12->launch_state = !launch;
@@ -4066,10 +4066,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (vmx->nested.hv_evmcs)
+	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
 		sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
 
-	vmx->nested.need_sync_vmcs02_to_vmcs12_rare = !vmx->nested.hv_evmcs;
+	vmx->nested.need_sync_vmcs02_to_vmcs12_rare =
+		!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
 
 	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
 	vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
@@ -4569,7 +4570,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 	}
 
 	if ((vm_exit_reason != -1) &&
-	    (enable_shadow_vmcs || vmx->nested.hv_evmcs))
+	    (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
 		vmx->nested.need_vmcs12_to_shadow_sync = true;
 
 	/* in case we halted in L2 */
@@ -5265,7 +5266,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
 		return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
 
 	/* Forbid normal VMPTRLD if Enlightened version was used */
-	if (vmx->nested.hv_evmcs)
+	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
 		return 1;
 
 	if (vmx->nested.current_vmptr != vmptr) {
@@ -5321,7 +5322,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
 	if (!nested_vmx_check_permission(vcpu))
 		return 1;
 
-	if (unlikely(to_vmx(vcpu)->nested.hv_evmcs))
+	if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr)))
 		return 1;
 
 	if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
@@ -6093,7 +6094,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 		if (vmx_has_valid_vmcs12(vcpu)) {
 			kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
 
-			if (vmx->nested.hv_evmcs)
+			if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
 				kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
 
 			if (is_guest_mode(vcpu) &&
@@ -6149,7 +6150,7 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 	} else  {
 		copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
 		if (!vmx->nested.need_vmcs12_to_shadow_sync) {
-			if (vmx->nested.hv_evmcs)
+			if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
 				copy_enlightened_to_vmcs12(vmx);
 			else if (enable_shadow_vmcs)
 				copy_shadow_to_vmcs12(vmx);
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 184418baeb3cb..c4397e83614d4 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -63,7 +63,7 @@ static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
 	 * have vmcs12 if it is true.
 	 */
 	return is_guest_mode(vcpu) || vmx->nested.current_vmptr != -1ull ||
-		vmx->nested.hv_evmcs;
+		evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
 }
 
 static inline u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e3f744bec7630..68a72c80bd3ff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6861,6 +6861,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 
 	vmx->nested.posted_intr_nv = -1;
 	vmx->nested.current_vmptr = -1ull;
+	vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
 
 	vcpu->arch.microcode_version = 0x100000000ULL;
 	vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
-- 
GitLab


From 6a789ca5d5038a60f51c374067fd9abab13df596 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:17 +0200
Subject: [PATCH 3216/3804] KVM: nVMX: Don't set 'dirty_vmcs12' flag on
 enlightened VMPTRLD

'dirty_vmcs12' is only checked in prepare_vmcs02_early()/prepare_vmcs02()
and both checks look like:

 'vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)'

so for eVMCS case the flag changes nothing. Drop the assignment to avoid
the confusion.

No functional change intended.

Reported-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 84d9a8d569bb7..dbee5479103f4 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2021,7 +2021,6 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
 			return EVMPTRLD_VMFAIL;
 		}
 
-		vmx->nested.dirty_vmcs12 = true;
 		vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
 
 		evmcs_gpa_changed = true;
-- 
GitLab


From 02761716801dbc99d977bb281de7c1052405c9f5 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:18 +0200
Subject: [PATCH 3217/3804] KVM: nVMX: Release eVMCS when enlightened VMENTRY
 was disabled

In theory, L1 can try to disable enlightened VMENTRY in VP assist page and
try to issue VMLAUNCH/VMRESUME. While nested_vmx_handle_enlightened_vmptrld()
properly handles this as 'EVMPTRLD_DISABLED', previously mapped eVMCS
remains mapped and thus all evmptr_is_valid() checks will still pass and
nested_vmx_run() will proceed when it shouldn't.

Release eVMCS immediately when we detect that enlightened vmentry was
disabled by L1.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-4-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index dbee5479103f4..8d814bf8448ee 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1979,8 +1979,10 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
 	if (likely(!vmx->nested.enlightened_vmcs_enabled))
 		return EVMPTRLD_DISABLED;
 
-	if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa))
+	if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) {
+		nested_release_evmcs(vcpu);
 		return EVMPTRLD_DISABLED;
+	}
 
 	if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
 		vmx->nested.current_vmptr = -1ull;
-- 
GitLab


From 25641cafabc6dcc0a2d32dbbfd8fc448513b339d Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:19 +0200
Subject: [PATCH 3218/3804] KVM: nVMX: Make
 copy_vmcs12_to_enlightened()/copy_enlightened_to_vmcs12() return 'void'

copy_vmcs12_to_enlightened()/copy_enlightened_to_vmcs12() don't return any result,
make them return 'void'.

No functional change intended.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-5-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 8d814bf8448ee..e72a637658f14 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1586,7 +1586,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 	vmcs_load(vmx->loaded_vmcs->vmcs);
 }
 
-static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
+static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 {
 	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
 	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
@@ -1799,10 +1799,10 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 	 * vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
 	 */
 
-	return 0;
+	return;
 }
 
-static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
+static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
 {
 	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
 	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
@@ -1962,7 +1962,7 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
 
 	evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
 
-	return 0;
+	return;
 }
 
 /*
-- 
GitLab


From 278499686b18e9012ddefbe0ecabc83e6c0264fe Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:20 +0200
Subject: [PATCH 3219/3804] KVM: nVMX: Introduce 'EVMPTR_MAP_PENDING'
 post-migration state

Unlike regular set_current_vmptr(), nested_vmx_handle_enlightened_vmptrld()
can not be called directly from vmx_set_nested_state() as KVM may not have
all the information yet (e.g. HV_X64_MSR_VP_ASSIST_PAGE MSR may not be
restored yet). Enlightened VMCS is mapped later while getting nested state
pages. In the meantime, vmx->nested.hv_evmcs_vmptr remains 'EVMPTR_INVALID'
and it's indistinguishable from 'evmcs is not in use' case. This leads to
certain issues, in particular, if KVM_GET_NESTED_STATE is called right
after KVM_SET_NESTED_STATE, KVM_STATE_NESTED_EVMCS flag in the resulting
state will be unset (and such state will later fail to load).

Introduce 'EVMPTR_MAP_PENDING' state to detect not-yet-mapped eVMCS after
restore. With this, the 'is_guest_mode(vcpu)' hack in vmx_has_valid_vmcs12()
is no longer needed.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-6-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/evmcs.h  |  3 ++-
 arch/x86/kvm/vmx/nested.c |  6 ++++--
 arch/x86/kvm/vmx/nested.h | 11 +++--------
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h
index 47f802f71f6a9..2ec9b46f0d0cf 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/evmcs.h
@@ -198,10 +198,11 @@ static inline void evmcs_touch_msr_bitmap(void) {}
 #endif /* IS_ENABLED(CONFIG_HYPERV) */
 
 #define EVMPTR_INVALID (-1ULL)
+#define EVMPTR_MAP_PENDING (-2ULL)
 
 static inline bool evmptr_is_valid(u64 evmptr)
 {
-	return evmptr != EVMPTR_INVALID;
+	return evmptr != EVMPTR_INVALID && evmptr != EVMPTR_MAP_PENDING;
 }
 
 enum nested_evmptrld_status {
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e72a637658f14..46de0147ca019 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3102,7 +3102,7 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
 	 * properly reflected.
 	 */
 	if (vmx->nested.enlightened_vmcs_enabled &&
-	    !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+	    vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
 		enum nested_evmptrld_status evmptrld_status =
 			nested_vmx_handle_enlightened_vmptrld(vcpu, false);
 
@@ -6095,7 +6095,8 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 		if (vmx_has_valid_vmcs12(vcpu)) {
 			kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
 
-			if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+			/* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
+			if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
 				kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
 
 			if (is_guest_mode(vcpu) &&
@@ -6294,6 +6295,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 		 * restored yet. EVMCS will be mapped from
 		 * nested_get_vmcs12_pages().
 		 */
+		vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING;
 		kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
 	} else {
 		return -EINVAL;
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index c4397e83614d4..b69a80f43b37e 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -56,14 +56,9 @@ static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	/*
-	 * In case we do two consecutive get/set_nested_state()s while L2 was
-	 * running hv_evmcs may end up not being mapped (we map it from
-	 * nested_vmx_run()/vmx_vcpu_run()). Check is_guest_mode() as we always
-	 * have vmcs12 if it is true.
-	 */
-	return is_guest_mode(vcpu) || vmx->nested.current_vmptr != -1ull ||
-		evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
+	/* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
+	return vmx->nested.current_vmptr != -1ull ||
+		vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID;
 }
 
 static inline u16 nested_get_vpid02(struct kvm_vcpu *vcpu)
-- 
GitLab


From 3b19b81acf300a3d452aa07b21d8db528254cb56 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:21 +0200
Subject: [PATCH 3220/3804] KVM: nVMX: Release enlightened VMCS on VMCLEAR

Unlike VMREAD/VMWRITE/VMPTRLD, VMCLEAR is a valid instruction when
enlightened VMCS is in use. TLFS has the following brief description:
"The L1 hypervisor can execute a VMCLEAR instruction to transition an
enlightened VMCS from the active to the non-active state". Normally,
this change can be ignored as unmapping active eVMCS can be postponed
until the next VMLAUNCH instruction but in case nested state is migrated
with KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE, keeping eVMCS mapped
may result in its synchronization with VMCS12 and this is incorrect:
L1 hypervisor is free to reuse inactive eVMCS memory for something else.

Inactive eVMCS after VMCLEAR can just be unmapped.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-7-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 46de0147ca019..6a3fdb90870af 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5026,6 +5026,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 				     vmptr + offsetof(struct vmcs12,
 						      launch_state),
 				     &zero, sizeof(zero));
+	} else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
+		nested_release_evmcs(vcpu);
 	}
 
 	return nested_vmx_succeed(vcpu);
-- 
GitLab


From d6bf71a18c74de61548ddad44ff95306fe85f829 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:22 +0200
Subject: [PATCH 3221/3804] KVM: nVMX: Ignore 'hv_clean_fields' data when eVMCS
 data is copied in vmx_get_nested_state()

'Clean fields' data from enlightened VMCS is only valid upon vmentry: L1
hypervisor is not obliged to keep it up-to-date while it is mangling L2's
state, KVM_GET_NESTED_STATE request may come at a wrong moment when actual
eVMCS changes are unsynchronized with 'hv_clean_fields'. As upon migration
VMCS12 is used as a source of ultimate truth, we must make sure we pick all
the changes to eVMCS and thus 'clean fields' data must be ignored.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-8-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 43 +++++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6a3fdb90870af..3787be104ff0a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1586,7 +1586,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
 	vmcs_load(vmx->loaded_vmcs->vmcs);
 }
 
-static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
+static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
 {
 	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
 	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
@@ -1595,7 +1595,7 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 	vmcs12->tpr_threshold = evmcs->tpr_threshold;
 	vmcs12->guest_rip = evmcs->guest_rip;
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
 		vmcs12->guest_rsp = evmcs->guest_rsp;
 		vmcs12->guest_rflags = evmcs->guest_rflags;
@@ -1603,23 +1603,23 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 			evmcs->guest_interruptibility_info;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
 		vmcs12->cpu_based_vm_exec_control =
 			evmcs->cpu_based_vm_exec_control;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
 		vmcs12->exception_bitmap = evmcs->exception_bitmap;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
 		vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
 		vmcs12->vm_entry_intr_info_field =
 			evmcs->vm_entry_intr_info_field;
@@ -1629,7 +1629,7 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 			evmcs->vm_entry_instruction_len;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
 		vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
 		vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
@@ -1649,7 +1649,7 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 		vmcs12->host_tr_selector = evmcs->host_tr_selector;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
 		vmcs12->pin_based_vm_exec_control =
 			evmcs->pin_based_vm_exec_control;
@@ -1658,18 +1658,18 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 			evmcs->secondary_vm_exec_control;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
 		vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
 		vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
 		vmcs12->msr_bitmap = evmcs->msr_bitmap;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
 		vmcs12->guest_es_base = evmcs->guest_es_base;
 		vmcs12->guest_cs_base = evmcs->guest_cs_base;
@@ -1709,14 +1709,14 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 		vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
 		vmcs12->tsc_offset = evmcs->tsc_offset;
 		vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
 		vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
 		vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
 		vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
@@ -1728,7 +1728,7 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 		vmcs12->guest_dr7 = evmcs->guest_dr7;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
 		vmcs12->host_fs_base = evmcs->host_fs_base;
 		vmcs12->host_gs_base = evmcs->host_gs_base;
@@ -1738,13 +1738,13 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 		vmcs12->host_rsp = evmcs->host_rsp;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
 		vmcs12->ept_pointer = evmcs->ept_pointer;
 		vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
 	}
 
-	if (unlikely(!(evmcs->hv_clean_fields &
+	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
 		vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
 		vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
@@ -3509,7 +3509,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return nested_vmx_failInvalid(vcpu);
 
 	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
-		copy_enlightened_to_vmcs12(vmx);
+		copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields);
 		/* Enlightened VMCS doesn't have launch state */
 		vmcs12->launch_state = !launch;
 	} else if (enable_shadow_vmcs) {
@@ -6155,7 +6155,14 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
 		copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
 		if (!vmx->nested.need_vmcs12_to_shadow_sync) {
 			if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
-				copy_enlightened_to_vmcs12(vmx);
+				/*
+				 * L1 hypervisor is not obliged to keep eVMCS
+				 * clean fields data always up-to-date while
+				 * not in guest mode, 'hv_clean_fields' is only
+				 * supposed to be actual upon vmentry so we need
+				 * to ignore it here and do full copy.
+				 */
+				copy_enlightened_to_vmcs12(vmx, 0);
 			else if (enable_shadow_vmcs)
 				copy_shadow_to_vmcs12(vmx);
 		}
-- 
GitLab


From b7685cfd5e96456be653b61c405ea65f8de95bd6 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:23 +0200
Subject: [PATCH 3222/3804] KVM: nVMX: Force enlightened VMCS sync from
 nested_vmx_failValid()

'need_vmcs12_to_shadow_sync' is used for both shadow and enlightened
VMCS sync when we exit to L1. The comment in nested_vmx_failValid()
validly states why shadow vmcs sync can be omitted but this doesn't
apply to enlightened VMCS as it 'shadows' all VMCS12 fields.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-9-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 3787be104ff0a..c73668b97f5e1 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -173,9 +173,13 @@ static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
 			| X86_EFLAGS_ZF);
 	get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
 	/*
-	 * We don't need to force a shadow sync because
-	 * VM_INSTRUCTION_ERROR is not shadowed
+	 * We don't need to force sync to shadow VMCS because
+	 * VM_INSTRUCTION_ERROR is not shadowed. Enlightened VMCS 'shadows' all
+	 * fields and thus must be synced.
 	 */
+	if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
+		to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;
+
 	return kvm_skip_emulated_instruction(vcpu);
 }
 
-- 
GitLab


From dc313385529f1a1fa20b06bb61239a31aca9d40f Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:24 +0200
Subject: [PATCH 3223/3804] KVM: nVMX: Reset eVMCS clean fields data from
 prepare_vmcs02()

When nested state migration happens during L1's execution, it
is incorrect to modify eVMCS as it is L1 who 'owns' it at the moment.
At least genuine Hyper-V seems to not be very happy when 'clean fields'
data changes underneath it.

'Clean fields' data is used in KVM twice: by copy_enlightened_to_vmcs12()
and prepare_vmcs02_rare() so we can reset it from prepare_vmcs02() instead.

While at it, update a comment stating why exactly we need to reset
'hv_clean_fields' data from L0.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-10-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c73668b97f5e1..2bdc9a24440f9 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2059,14 +2059,10 @@ void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
+	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
 		copy_vmcs12_to_enlightened(vmx);
-		/* All fields are clean */
-		vmx->nested.hv_evmcs->hv_clean_fields |=
-			HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
-	} else {
+	else
 		copy_vmcs12_to_shadow(vmx);
-	}
 
 	vmx->nested.need_vmcs12_to_shadow_sync = false;
 }
@@ -2616,6 +2612,17 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 
 	kvm_rsp_write(vcpu, vmcs12->guest_rsp);
 	kvm_rip_write(vcpu, vmcs12->guest_rip);
+
+	/*
+	 * It was observed that genuine Hyper-V running in L1 doesn't reset
+	 * 'hv_clean_fields' by itself, it only sets the corresponding dirty
+	 * bits when it changes a field in eVMCS. Mark all fields as clean
+	 * here.
+	 */
+	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
+		vmx->nested.hv_evmcs->hv_clean_fields |=
+			HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+
 	return 0;
 }
 
-- 
GitLab


From 8629b625e0151c0d6b78a938744ffd74da422682 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:25 +0200
Subject: [PATCH 3224/3804] KVM: nVMX: Request to sync eVMCS from VMCS12 after
 migration

VMCS12 is used to keep the authoritative state during nested state
migration. In case 'need_vmcs12_to_shadow_sync' flag is set, we're
in between L2->L1 vmexit and L1 guest run when actual sync to
enlightened (or shadow) VMCS happens. Nested state, however, has
no flag for 'need_vmcs12_to_shadow_sync' so vmx_set_nested_state()->
set_current_vmptr() always sets it. Enlightened vmptrld path, however,
doesn't have the quirk so some VMCS12 changes may not get properly
reflected to eVMCS and L1 will see an incorrect state.

Note, during L2 execution or when need_vmcs12_to_shadow_sync is not
set the change is effectively a nop: in the former case all changes
will get reflected during the first L2->L1 vmexit and in the later
case VMCS12 and eVMCS are already in sync (thanks to
copy_enlightened_to_vmcs12() in vmx_get_nested_state()).

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-11-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2bdc9a24440f9..ee89b48730b6d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3120,6 +3120,12 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
 		if (evmptrld_status == EVMPTRLD_VMFAIL ||
 		    evmptrld_status == EVMPTRLD_ERROR)
 			return false;
+
+		/*
+		 * Post migration VMCS12 always provides the most actual
+		 * information, copy it to eVMCS upon entry.
+		 */
+		vmx->nested.need_vmcs12_to_shadow_sync = true;
 	}
 
 	return true;
-- 
GitLab


From 8f7663cea285ef41306fb3ea5b5a48e8e38a681d Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 26 May 2021 15:20:26 +0200
Subject: [PATCH 3225/3804] KVM: selftests: evmcs_test: Test that
 KVM_STATE_NESTED_EVMCS is never lost

Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly restored VM
(before the first KVM_RUN) to check that KVM_STATE_NESTED_EVMCS is not
lost.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210526132026.270394-12-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/x86_64/evmcs_test.c | 64 +++++++++++--------
 1 file changed, 38 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index d058d9e428c63..d0a7a998e8404 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -112,14 +112,38 @@ void inject_nmi(struct kvm_vm *vm)
 	vcpu_events_set(vm, VCPU_ID, &events);
 }
 
+static void save_restore_vm(struct kvm_vm *vm)
+{
+	struct kvm_regs regs1, regs2;
+	struct kvm_x86_state *state;
+
+	state = vcpu_save_state(vm, VCPU_ID);
+	memset(&regs1, 0, sizeof(regs1));
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	kvm_vm_release(vm);
+
+	/* Restore state in a new VM.  */
+	kvm_vm_restart(vm, O_RDWR);
+	vm_vcpu_add(vm, VCPU_ID);
+	vcpu_set_hv_cpuid(vm, VCPU_ID);
+	vcpu_enable_evmcs(vm, VCPU_ID);
+	vcpu_load_state(vm, VCPU_ID, state);
+	free(state);
+
+	memset(&regs2, 0, sizeof(regs2));
+	vcpu_regs_get(vm, VCPU_ID, &regs2);
+	TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+		    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+		    (ulong) regs2.rdi, (ulong) regs2.rsi);
+}
+
 int main(int argc, char *argv[])
 {
 	vm_vaddr_t vmx_pages_gva = 0;
 
-	struct kvm_regs regs1, regs2;
 	struct kvm_vm *vm;
 	struct kvm_run *run;
-	struct kvm_x86_state *state;
 	struct ucall uc;
 	int stage;
 
@@ -136,10 +160,6 @@ int main(int argc, char *argv[])
 	vcpu_set_hv_cpuid(vm, VCPU_ID);
 	vcpu_enable_evmcs(vm, VCPU_ID);
 
-	run = vcpu_state(vm, VCPU_ID);
-
-	vcpu_regs_get(vm, VCPU_ID, &regs1);
-
 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
 	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 
@@ -151,6 +171,7 @@ int main(int argc, char *argv[])
 	pr_info("Running L1 which uses EVMCS to run L2\n");
 
 	for (stage = 1;; stage++) {
+		run = vcpu_state(vm, VCPU_ID);
 		_vcpu_run(vm, VCPU_ID);
 		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 			    "Stage %d: unexpected exit reason: %u (%s),\n",
@@ -175,32 +196,23 @@ int main(int argc, char *argv[])
 			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
 			    stage, (ulong)uc.args[1]);
 
-		state = vcpu_save_state(vm, VCPU_ID);
-		memset(&regs1, 0, sizeof(regs1));
-		vcpu_regs_get(vm, VCPU_ID, &regs1);
-
-		kvm_vm_release(vm);
-
-		/* Restore state in a new VM.  */
-		kvm_vm_restart(vm, O_RDWR);
-		vm_vcpu_add(vm, VCPU_ID);
-		vcpu_set_hv_cpuid(vm, VCPU_ID);
-		vcpu_enable_evmcs(vm, VCPU_ID);
-		vcpu_load_state(vm, VCPU_ID, state);
-		run = vcpu_state(vm, VCPU_ID);
-		free(state);
-
-		memset(&regs2, 0, sizeof(regs2));
-		vcpu_regs_get(vm, VCPU_ID, &regs2);
-		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
-			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
-			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+		save_restore_vm(vm);
 
 		/* Force immediate L2->L1 exit before resuming */
 		if (stage == 8) {
 			pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
 			inject_nmi(vm);
 		}
+
+		/*
+		 * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+		 * restored VM (before the first KVM_RUN) to check that
+		 * KVM_STATE_NESTED_EVMCS is not lost.
+		 */
+		if (stage == 9) {
+			pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+			save_restore_vm(vm);
+		}
 	}
 
 done:
-- 
GitLab


From 07ffaf343e34b555c9e7ea39a9c81c439a706f13 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:21 -0700
Subject: [PATCH 3226/3804] KVM: nVMX: Sync all PGDs on nested transition with
 shadow paging

Trigger a full TLB flush on behalf of the guest on nested VM-Enter and
VM-Exit when VPID is disabled for L2.  kvm_mmu_new_pgd() syncs only the
current PGD, which can theoretically leave stale, unsync'd entries in a
previous guest PGD, which could be consumed if L2 is allowed to load CR3
with PCID_NOFLUSH=1.

Rename KVM_REQ_HV_TLB_FLUSH to KVM_REQ_TLB_FLUSH_GUEST so that it can
be utilized for its obvious purpose of emulating a guest TLB flush.

Note, there is no change the actual TLB flush executed by KVM, even
though the fast PGD switch uses KVM_REQ_TLB_FLUSH_CURRENT.  When VPID is
disabled for L2, vpid02 is guaranteed to be '0', and thus
nested_get_vpid02() will return the VPID that is shared by L1 and L2.

Generate the request outside of kvm_mmu_new_pgd(), as getting the common
helper to correctly identify which requested is needed is quite painful.
E.g. using KVM_REQ_TLB_FLUSH_GUEST when nested EPT is in play is wrong as
a TLB flush from the L1 kernel's perspective does not invalidate EPT
mappings.  And, by using KVM_REQ_TLB_FLUSH_GUEST, nVMX can do future
simplification by moving the logic into nested_vmx_transition_tlb_flush().

Fixes: 41fab65e7c44 ("KVM: nVMX: Skip MMU sync on nested VMX transition when possible")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/hyperv.c           |  2 +-
 arch/x86/kvm/vmx/nested.c       | 17 ++++++++++++-----
 arch/x86/kvm/x86.c              |  2 +-
 4 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 383106901fe2a..f44a9795b91f4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -85,7 +85,7 @@
 #define KVM_REQ_APICV_UPDATE \
 	KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_TLB_FLUSH_CURRENT	KVM_ARCH_REQ(26)
-#define KVM_REQ_HV_TLB_FLUSH \
+#define KVM_REQ_TLB_FLUSH_GUEST \
 	KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_APF_READY		KVM_ARCH_REQ(28)
 #define KVM_REQ_MSR_FILTER_CHANGED	KVM_ARCH_REQ(29)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4f911dca7dd67..b07592ca92f07 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1829,7 +1829,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
 	 * analyze it here, flush TLB regardless of the specified address space.
 	 */
-	kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH,
+	kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
 				    NULL, vcpu_mask, &hv_vcpu->tlb_flush);
 
 ret_success:
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index ee89b48730b6d..a9906c8344b8e 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1131,12 +1131,19 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 
 	/*
 	 * Unconditionally skip the TLB flush on fast CR3 switch, all TLB
-	 * flushes are handled by nested_vmx_transition_tlb_flush().  See
-	 * nested_vmx_transition_mmu_sync for details on skipping the MMU sync.
+	 * flushes are handled by nested_vmx_transition_tlb_flush().
 	 */
-	if (!nested_ept)
-		kvm_mmu_new_pgd(vcpu, cr3, true,
-				!nested_vmx_transition_mmu_sync(vcpu));
+	if (!nested_ept) {
+		kvm_mmu_new_pgd(vcpu, cr3, true, true);
+
+		/*
+		 * A TLB flush on VM-Enter/VM-Exit flushes all linear mappings
+		 * across all PCIDs, i.e. all PGDs need to be synchronized.
+		 * See nested_vmx_transition_mmu_sync() for more details.
+		 */
+		if (nested_vmx_transition_mmu_sync(vcpu))
+			kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
+	}
 
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1727178b89610..efcdd1f46d643 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9279,7 +9279,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
 			kvm_vcpu_flush_tlb_current(vcpu);
-		if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+		if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
 			kvm_vcpu_flush_tlb_guest(vcpu);
 
 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
-- 
GitLab


From 0e75225dfa4c5d5d51291f54a3d2d5895bad38da Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:22 -0700
Subject: [PATCH 3227/3804] KVM: nVMX: Ensure 64-bit shift when checking VMFUNC
 bitmap

Use BIT_ULL() instead of an open-coded shift to check whether or not a
function is enabled in L1's VMFUNC bitmap.  This is a benign bug as KVM
supports only bit 0, and will fail VM-Enter if any other bits are set,
i.e. bits 63:32 are guaranteed to be zero.

Note, "function" is bounded by hardware as VMFUNC will #UD before taking
a VM-Exit if the function is greater than 63.

Before:
  if ((vmcs12->vm_function_control & (1 << function)) == 0)
   0x000000000001a916 <+118>:	mov    $0x1,%eax
   0x000000000001a91b <+123>:	shl    %cl,%eax
   0x000000000001a91d <+125>:	cltq
   0x000000000001a91f <+127>:	and    0x128(%rbx),%rax

After:
  if (!(vmcs12->vm_function_control & BIT_ULL(function & 63)))
   0x000000000001a955 <+117>:	mov    0x128(%rbx),%rdx
   0x000000000001a95c <+124>:	bt     %rax,%rdx

Fixes: 27c42a1bb867 ("KVM: nVMX: Enable VMFUNC for the L1 hypervisor")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index a9906c8344b8e..775df9e2ff88e 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5598,7 +5598,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
 	}
 
 	vmcs12 = get_vmcs12(vcpu);
-	if ((vmcs12->vm_function_control & (1 << function)) == 0)
+	if (!(vmcs12->vm_function_control & BIT_ULL(function)))
 		goto fail;
 
 	switch (function) {
-- 
GitLab


From 272b0a998d084e7667284bdd2d0c675c6a2d11de Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:23 -0700
Subject: [PATCH 3228/3804] KVM: nVMX: Don't clobber nested MMU's A/D status on
 EPTP switch

Drop bogus logic that incorrectly clobbers the accessed/dirty enabling
status of the nested MMU on an EPTP switch.  When nested EPT is enabled,
walk_mmu points at L2's _legacy_ page tables, not L1's EPT for L2.

This is likely a benign bug, as mmu->ept_ad is never consumed (since the
MMU is not a nested EPT MMU), and stuffing mmu_role.base.ad_disabled will
never propagate into future shadow pages since the nested MMU isn't used
to map anything, just to walk L2's page tables.

Note, KVM also does a full MMU reload, i.e. the guest_mmu will be
recreated using the new EPTP, and thus any change in A/D enabling will be
properly recognized in the relevant MMU.

Fixes: 41ab93727467 ("KVM: nVMX: Emulate EPTP switching for the L1 hypervisor")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 775df9e2ff88e..7210e7ca0af40 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5546,8 +5546,6 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
 {
 	u32 index = kvm_rcx_read(vcpu);
 	u64 new_eptp;
-	bool accessed_dirty;
-	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
 
 	if (!nested_cpu_has_eptp_switching(vmcs12) ||
 	    !nested_cpu_has_ept(vmcs12))
@@ -5556,13 +5554,10 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
 	if (index >= VMFUNC_EPTP_ENTRIES)
 		return 1;
 
-
 	if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
 				     &new_eptp, index * 8, 8))
 		return 1;
 
-	accessed_dirty = !!(new_eptp & VMX_EPTP_AD_ENABLE_BIT);
-
 	/*
 	 * If the (L2) guest does a vmfunc to the currently
 	 * active ept pointer, we don't have to do anything else
@@ -5571,8 +5566,6 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
 		if (!nested_vmx_check_eptp(vcpu, new_eptp))
 			return 1;
 
-		mmu->ept_ad = accessed_dirty;
-		mmu->mmu_role.base.ad_disabled = !accessed_dirty;
 		vmcs12->ept_pointer = new_eptp;
 
 		kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
-- 
GitLab


From 21823fbda552252271c948850f80f15edfdf25b6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:24 -0700
Subject: [PATCH 3229/3804] KVM: x86: Invalidate all PGDs for the current PCID
 on MOV CR3 w/ flush

Flush and sync all PGDs for the current/target PCID on MOV CR3 with a
TLB flush, i.e. without PCID_NOFLUSH set.  Paraphrasing Intel's SDM
regarding the behavior of MOV to CR3:

  - If CR4.PCIDE = 0, invalidates all TLB entries associated with PCID
    000H and all entries in all paging-structure caches associated with
    PCID 000H.

  - If CR4.PCIDE = 1 and NOFLUSH=0, invalidates all TLB entries
    associated with the PCID specified in bits 11:0, and all entries in
    all paging-structure caches associated with that PCID. It is not
    required to invalidate entries in the TLBs and paging-structure
    caches that are associated with other PCIDs.

  - If CR4.PCIDE=1 and NOFLUSH=1, is not required to invalidate any TLB
    entries or entries in paging-structure caches.

Extract and reuse the logic for INVPCID(single) which is effectively the
same flow and works even if CR4.PCIDE=0, as the current PCID will be '0'
in that case, thus honoring the requirement of flushing PCID=0.

Continue passing skip_tlb_flush to kvm_mmu_new_pgd() even though it
_should_ be redundant; the clean up will be done in a future patch.  The
overhead of an unnecessary nop sync is minimal (especially compared to
the actual sync), and the TLB flush is handled via request.  Avoiding the
the negligible overhead is not worth the risk of breaking kernels that
backport the fix.

Fixes: 956bf3531fba ("kvm: x86: Skip shadow page resync on CR3 switch when indicated by guest")
Cc: Junaid Shahid <junaids@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 69 ++++++++++++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index efcdd1f46d643..8ed5f3252e9d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1062,26 +1062,46 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
+static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
+{
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	unsigned long roots_to_free = 0;
+	int i;
+
+	/*
+	 * If neither the current CR3 nor any of the prev_roots use the given
+	 * PCID, then nothing needs to be done here because a resync will
+	 * happen anyway before switching to any other CR3.
+	 */
+	if (kvm_get_active_pcid(vcpu) == pcid) {
+		kvm_mmu_sync_roots(vcpu);
+		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+	}
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
+		if (kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd) == pcid)
+			roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+
+	kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
+}
+
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
 	bool skip_tlb_flush = false;
+	unsigned long pcid = 0;
 #ifdef CONFIG_X86_64
 	bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE);
 
 	if (pcid_enabled) {
 		skip_tlb_flush = cr3 & X86_CR3_PCID_NOFLUSH;
 		cr3 &= ~X86_CR3_PCID_NOFLUSH;
+		pcid = cr3 & X86_CR3_PCID_MASK;
 	}
 #endif
 
 	/* PDPTRs are always reloaded for PAE paging. */
-	if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu)) {
-		if (!skip_tlb_flush) {
-			kvm_mmu_sync_roots(vcpu);
-			kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-		}
-		return 0;
-	}
+	if (cr3 == kvm_read_cr3(vcpu) && !is_pae_paging(vcpu))
+		goto handle_tlb_flush;
 
 	/*
 	 * Do not condition the GPA check on long mode, this helper is used to
@@ -1094,10 +1114,23 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 	if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
 		return 1;
 
-	kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
+	if (cr3 != kvm_read_cr3(vcpu))
+		kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
+
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
+handle_tlb_flush:
+	/*
+	 * A load of CR3 that flushes the TLB flushes only the current PCID,
+	 * even if PCID is disabled, in which case PCID=0 is flushed.  It's a
+	 * moot point in the end because _disabling_ PCID will flush all PCIDs,
+	 * and it's impossible to use a non-zero PCID when PCID is disabled,
+	 * i.e. only PCID=0 can be relevant.
+	 */
+	if (!skip_tlb_flush)
+		kvm_invalidate_pcid(vcpu, pcid);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_cr3);
@@ -11952,8 +11985,6 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
 {
 	bool pcid_enabled;
 	struct x86_exception e;
-	unsigned i;
-	unsigned long roots_to_free = 0;
 	struct {
 		u64 pcid;
 		u64 gla;
@@ -11987,23 +12018,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
 			return 1;
 		}
 
-		if (kvm_get_active_pcid(vcpu) == operand.pcid) {
-			kvm_mmu_sync_roots(vcpu);
-			kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-		}
-
-		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-			if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd)
-			    == operand.pcid)
-				roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
-
-		kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, roots_to_free);
-		/*
-		 * If neither the current cr3 nor any of the prev_roots use the
-		 * given PCID, then nothing needs to be done here because a
-		 * resync will happen anyway before switching to any other CR3.
-		 */
-
+		kvm_invalidate_pcid(vcpu, operand.pcid);
 		return kvm_skip_emulated_instruction(vcpu);
 
 	case INVPCID_TYPE_ALL_NON_GLOBAL:
-- 
GitLab


From 415b1a0105cd05a428f8b28ac1bf406ca2b4bbd7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:25 -0700
Subject: [PATCH 3230/3804] KVM: x86: Uncondtionally skip MMU sync/TLB flush in
 MOV CR3's PGD switch

Stop leveraging the MMU sync and TLB flush requested by the fast PGD
switch helper now that kvm_set_cr3() manually handles the necessary sync,
frees, and TLB flush.  This will allow dropping the params from the fast
PGD helpers since nested SVM is now the odd blob out.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8ed5f3252e9d7..7d2c7a3306b70 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1115,7 +1115,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		return 1;
 
 	if (cr3 != kvm_read_cr3(vcpu))
-		kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush);
+		kvm_mmu_new_pgd(vcpu, cr3, true, true);
 
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
-- 
GitLab


From d2e5601907bd294411920a84c0231473557d16b9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:26 -0700
Subject: [PATCH 3231/3804] KVM: nSVM: Move TLB flushing logic (or lack
 thereof) to dedicated helper

Introduce nested_svm_transition_tlb_flush() and use it force an MMU sync
and TLB flush on nSVM VM-Enter and VM-Exit instead of sneaking the logic
into the __kvm_mmu_new_pgd() call sites.  Add a partial todo list to
document issues that need to be addressed before the unconditional sync
and flush can be modified to look more like nVMX's logic.

In addition to making nSVM's forced flushing more overt (guess who keeps
losing track of it), the new helper brings further convergence between
nSVM and nVMX, and also sets the stage for dropping the "skip" params
from __kvm_mmu_new_pgd().

Cc: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c    |  2 +-
 arch/x86/kvm/svm/nested.c | 38 +++++++++++++++++++++++++++++---------
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index aa9e77f406d9c..64d734239efa5 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4684,7 +4684,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer,
 	struct kvm_mmu *context = &vcpu->arch.guest_mmu;
 	union kvm_mmu_role new_role = kvm_calc_shadow_npt_root_page_role(vcpu);
 
-	__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base, false, false);
+	__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base, true, true);
 
 	if (new_role.as_u64 != context->mmu_role.as_u64) {
 		shadow_mmu_init_context(vcpu, context, cr0, cr4, efer, new_role);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c902ace2bd179..20e672236a75e 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -380,6 +380,25 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm)
 	return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
 }
 
+static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * TODO: optimize unconditional TLB flush/MMU sync.  A partial list of
+	 * things to fix before this can be conditional:
+	 *
+	 *  - Flush TLBs for both L1 and L2 remote TLB flush
+	 *  - Honor L1's request to flush an ASID on nested VMRUN
+	 *  - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*]
+	 *  - Don't crush a pending TLB flush in vmcb02 on nested VMRUN
+	 *  - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST
+	 *
+	 * [*] Unlike nested EPT, SVM's ASID management can invalidate nested
+	 *     NPT guest-physical mappings on VMRUN.
+	 */
+	kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
+	kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+}
+
 /*
  * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
  * if we are emulating VM-Entry into a guest with NPT enabled.
@@ -394,12 +413,8 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 	    CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
 		return -EINVAL;
 
-	/*
-	 * TODO: optimize unconditional TLB flush/MMU sync here and in
-	 * kvm_init_shadow_npt_mmu().
-	 */
 	if (!nested_npt)
-		kvm_mmu_new_pgd(vcpu, cr3, false, false);
+		kvm_mmu_new_pgd(vcpu, cr3, true, true);
 
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
@@ -479,6 +494,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 {
 	const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+	struct kvm_vcpu *vcpu = &svm->vcpu;
 
 	/*
 	 * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
@@ -503,10 +519,10 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 
 	/* nested_cr3.  */
 	if (nested_npt_enabled(svm))
-		nested_svm_init_mmu_context(&svm->vcpu);
+		nested_svm_init_mmu_context(vcpu);
 
-	svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
-		svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
+	svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset =
+		vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
 
 	svm->vmcb->control.int_ctl             =
 		(svm->nested.ctl.int_ctl & ~mask) |
@@ -521,8 +537,10 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 	svm->vmcb->control.pause_filter_count  = svm->nested.ctl.pause_filter_count;
 	svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh;
 
+	nested_svm_transition_tlb_flush(vcpu);
+
 	/* Enter Guest-Mode */
-	enter_guest_mode(&svm->vcpu);
+	enter_guest_mode(vcpu);
 
 	/*
 	 * Merge guest and host intercepts - must be called with vcpu in
@@ -799,6 +817,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 
 	kvm_vcpu_unmap(vcpu, &map, true);
 
+	nested_svm_transition_tlb_flush(vcpu);
+
 	nested_svm_uninit_mmu_context(vcpu);
 
 	rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false, true);
-- 
GitLab


From b5129100398ac3b6364cfa6dbd55abfd36cf7202 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:27 -0700
Subject: [PATCH 3232/3804] KVM: x86: Drop skip MMU sync and TLB flush params
 from "new PGD" helpers

Drop skip_mmu_sync and skip_tlb_flush from __kvm_mmu_new_pgd() now that
all call sites unconditionally skip both the sync and flush.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 +--
 arch/x86/kvm/mmu/mmu.c          | 17 +++++++----------
 arch/x86/kvm/svm/nested.c       |  2 +-
 arch/x86/kvm/vmx/nested.c       |  6 +-----
 arch/x86/kvm/x86.c              |  2 +-
 5 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f44a9795b91f4..d866bfec13376 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1708,8 +1708,7 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
 void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			    gva_t gva, hpa_t root_hpa);
 void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
-void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
-		     bool skip_mmu_sync);
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
 
 void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
 		       int tdp_huge_page_level);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 64d734239efa5..894b9a4a5961c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3949,8 +3949,7 @@ static bool fast_pgd_switch(struct kvm_vcpu *vcpu, gpa_t new_pgd,
 }
 
 static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
-			      union kvm_mmu_page_role new_role,
-			      bool skip_tlb_flush, bool skip_mmu_sync)
+			      union kvm_mmu_page_role new_role)
 {
 	if (!fast_pgd_switch(vcpu, new_pgd, new_role)) {
 		kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, KVM_MMU_ROOT_CURRENT);
@@ -3965,10 +3964,10 @@ static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
 	 */
 	kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
 
-	if (!skip_mmu_sync || force_flush_and_sync_on_reuse)
+	if (force_flush_and_sync_on_reuse) {
 		kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-	if (!skip_tlb_flush || force_flush_and_sync_on_reuse)
 		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+	}
 
 	/*
 	 * The last MMIO access's GVA and GPA are cached in the VCPU. When
@@ -3987,11 +3986,9 @@ static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
 				to_shadow_page(vcpu->arch.mmu->root_hpa));
 }
 
-void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush,
-		     bool skip_mmu_sync)
+void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
 {
-	__kvm_mmu_new_pgd(vcpu, new_pgd, kvm_mmu_calc_root_page_role(vcpu),
-			  skip_tlb_flush, skip_mmu_sync);
+	__kvm_mmu_new_pgd(vcpu, new_pgd, kvm_mmu_calc_root_page_role(vcpu));
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
 
@@ -4684,7 +4681,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer,
 	struct kvm_mmu *context = &vcpu->arch.guest_mmu;
 	union kvm_mmu_role new_role = kvm_calc_shadow_npt_root_page_role(vcpu);
 
-	__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base, true, true);
+	__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base);
 
 	if (new_role.as_u64 != context->mmu_role.as_u64) {
 		shadow_mmu_init_context(vcpu, context, cr0, cr4, efer, new_role);
@@ -4736,7 +4733,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 		kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
 						   execonly, level);
 
-	__kvm_mmu_new_pgd(vcpu, new_eptp, new_role.base, true, true);
+	__kvm_mmu_new_pgd(vcpu, new_eptp, new_role.base);
 
 	if (new_role.as_u64 == context->mmu_role.as_u64)
 		return;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 20e672236a75e..5f45991edcda8 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -414,7 +414,7 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 		return -EINVAL;
 
 	if (!nested_npt)
-		kvm_mmu_new_pgd(vcpu, cr3, true, true);
+		kvm_mmu_new_pgd(vcpu, cr3);
 
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7210e7ca0af40..d07b83b1bd3cc 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1129,12 +1129,8 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 		return -EINVAL;
 	}
 
-	/*
-	 * Unconditionally skip the TLB flush on fast CR3 switch, all TLB
-	 * flushes are handled by nested_vmx_transition_tlb_flush().
-	 */
 	if (!nested_ept) {
-		kvm_mmu_new_pgd(vcpu, cr3, true, true);
+		kvm_mmu_new_pgd(vcpu, cr3);
 
 		/*
 		 * A TLB flush on VM-Enter/VM-Exit flushes all linear mappings
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7d2c7a3306b70..1a0fb0f1c1cb7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1115,7 +1115,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		return 1;
 
 	if (cr3 != kvm_read_cr3(vcpu))
-		kvm_mmu_new_pgd(vcpu, cr3, true, true);
+		kvm_mmu_new_pgd(vcpu, cr3);
 
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
-- 
GitLab


From 50a417962a80525da54fa74105bcf17b479cd4bc Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:28 -0700
Subject: [PATCH 3233/3804] KVM: nVMX: Consolidate VM-Enter/VM-Exit TLB flush
 and MMU sync logic

Drop the dedicated nested_vmx_transition_mmu_sync() now that the MMU sync
is handled via KVM_REQ_TLB_FLUSH_GUEST, and fold that flush into the
all-encompassing nested_vmx_transition_tlb_flush().

Opportunistically add a comment explaning why nested EPT never needs to
sync the MMU on VM-Enter.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 87 +++++++++++----------------------------
 1 file changed, 23 insertions(+), 64 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index d07b83b1bd3cc..8e2487f21a6f5 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1062,48 +1062,6 @@ static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
 	}
 }
 
-/*
- * Returns true if the MMU needs to be sync'd on nested VM-Enter/VM-Exit.
- * tl;dr: the MMU needs a sync if L0 is using shadow paging and L1 didn't
- * enable VPID for L2 (implying it expects a TLB flush on VMX transitions).
- * Here's why.
- *
- * If EPT is enabled by L0 a sync is never needed:
- * - if it is disabled by L1, then L0 is not shadowing L1 or L2 PTEs, there
- *   cannot be unsync'd SPTEs for either L1 or L2.
- *
- * - if it is also enabled by L1, then L0 doesn't need to sync on VM-Enter
- *   VM-Enter as VM-Enter isn't required to invalidate guest-physical mappings
- *   (irrespective of VPID), i.e. L1 can't rely on the (virtual) CPU to flush
- *   stale guest-physical mappings for L2 from the TLB.  And as above, L0 isn't
- *   shadowing L1 PTEs so there are no unsync'd SPTEs to sync on VM-Exit.
- *
- * If EPT is disabled by L0:
- * - if VPID is enabled by L1 (for L2), the situation is similar to when L1
- *   enables EPT: L0 doesn't need to sync as VM-Enter and VM-Exit aren't
- *   required to invalidate linear mappings (EPT is disabled so there are
- *   no combined or guest-physical mappings), i.e. L1 can't rely on the
- *   (virtual) CPU to flush stale linear mappings for either L2 or itself (L1).
- *
- * - however if VPID is disabled by L1, then a sync is needed as L1 expects all
- *   linear mappings (EPT is disabled so there are no combined or guest-physical
- *   mappings) to be invalidated on both VM-Enter and VM-Exit.
- *
- * Note, this logic is subtly different than nested_has_guest_tlb_tag(), which
- * additionally checks that L2 has been assigned a VPID (when EPT is disabled).
- * Whether or not L2 has been assigned a VPID by L0 is irrelevant with respect
- * to L1's expectations, e.g. L0 needs to invalidate hardware TLB entries if L2
- * doesn't have a unique VPID to prevent reusing L1's entries (assuming L1 has
- * been assigned a VPID), but L0 doesn't need to do a MMU sync because L1
- * doesn't expect stale (virtual) TLB entries to be flushed, i.e. L1 doesn't
- * know that L0 will flush the TLB and so L1 will do INVVPID as needed to flush
- * stale TLB entries, at which point L0 will sync L2's MMU.
- */
-static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu)
-{
-	return !enable_ept && !nested_cpu_has_vpid(get_vmcs12(vcpu));
-}
-
 /*
  * Load guest's/host's cr3 at nested entry/exit.  @nested_ept is true if we are
  * emulating VM-Entry into a guest with EPT enabled.  On failure, the expected
@@ -1129,18 +1087,9 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 		return -EINVAL;
 	}
 
-	if (!nested_ept) {
+	if (!nested_ept)
 		kvm_mmu_new_pgd(vcpu, cr3);
 
-		/*
-		 * A TLB flush on VM-Enter/VM-Exit flushes all linear mappings
-		 * across all PCIDs, i.e. all PGDs need to be synchronized.
-		 * See nested_vmx_transition_mmu_sync() for more details.
-		 */
-		if (nested_vmx_transition_mmu_sync(vcpu))
-			kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
-	}
-
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
@@ -1177,17 +1126,28 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	/*
-	 * If VPID is disabled, linear and combined mappings are flushed on
-	 * VM-Enter/VM-Exit, and guest-physical mappings are valid only for
-	 * their associated EPTP.
+	 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
+	 * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
+	 * full TLB flush from the guest's perspective.  This is required even
+	 * if VPID is disabled in the host as KVM may need to synchronize the
+	 * MMU in response to the guest TLB flush.
+	 *
+	 * Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
+	 * EPT is a special snowflake, as guest-physical mappings aren't
+	 * flushed on VPID invalidations, including VM-Enter or VM-Exit with
+	 * VPID disabled.  As a result, KVM _never_ needs to sync nEPT
+	 * entries on VM-Enter because L1 can't rely on VM-Enter to flush
+	 * those mappings.
 	 */
-	if (!enable_vpid)
+	if (!nested_cpu_has_vpid(vmcs12)) {
+		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
 		return;
+	}
+
+	/* L2 should never have a VPID if VPID is disabled. */
+	WARN_ON(!enable_vpid);
 
 	/*
-	 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
-	 * for *all* contexts to be flushed on VM-Enter/VM-Exit.
-	 *
 	 * If VPID is enabled and used by vmc12, but L2 does not have a unique
 	 * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate
 	 * a VPID for L2, flush the current context as the effective ASID is
@@ -1199,13 +1159,12 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
 	 *
 	 * If a TLB flush isn't required due to any of the above, and vpid12 is
 	 * changing then the new "virtual" VPID (vpid12) will reuse the same
-	 * "real" VPID (vpid02), and so needs to be sync'd.  There is no direct
+	 * "real" VPID (vpid02), and so needs to be flushed.  There's no direct
 	 * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for
-	 * all nested vCPUs.
+	 * all nested vCPUs.  Remember, a flush on VM-Enter does not invalidate
+	 * guest-physical mappings, so there is no need to sync the nEPT MMU.
 	 */
-	if (!nested_cpu_has_vpid(vmcs12)) {
-		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
-	} else if (!nested_has_guest_tlb_tag(vcpu)) {
+	if (!nested_has_guest_tlb_tag(vcpu)) {
 		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 	} else if (is_vmenter &&
 		   vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
-- 
GitLab


From 25b62c6274ed466fe2e9f3a681e46d99e6703fd4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:29 -0700
Subject: [PATCH 3234/3804] KVM: nVMX: Free only guest_mode (L2) roots on
 INVVPID w/o EPT

When emulating INVVPID for L1, free only L2+ roots, using the guest_mode
tag in the MMU role to identify L2+ roots.  From L1's perspective, its
own TLB entries use VPID=0, and INVVPID is not requied to invalidate such
entries.  Per Intel's SDM, INVVPID _may_ invalidate entries with VPID=0,
but it is not required to do so.

Cc: Lai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu/mmu.c          | 27 +++++++++++++++++++++++++++
 arch/x86/kvm/vmx/nested.c       |  7 +++----
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d866bfec13376..a92d565906135 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1684,6 +1684,7 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			ulong roots_to_free);
+void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu);
 gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
 			   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 894b9a4a5961c..f4fea68a88f69 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3212,6 +3212,33 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
 
+void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+{
+	unsigned long roots_to_free = 0;
+	hpa_t root_hpa;
+	int i;
+
+	/*
+	 * This should not be called while L2 is active, L2 can't invalidate
+	 * _only_ its own roots, e.g. INVVPID unconditionally exits.
+	 */
+	WARN_ON_ONCE(mmu->mmu_role.base.guest_mode);
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+		root_hpa = mmu->prev_roots[i].hpa;
+		if (!VALID_PAGE(root_hpa))
+			continue;
+
+		if (!to_shadow_page(root_hpa) ||
+			to_shadow_page(root_hpa)->role.guest_mode)
+			roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+	}
+
+	kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots);
+
+
 static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
 {
 	int ret = 0;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 8e2487f21a6f5..13a4accca348f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5481,8 +5481,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 
 	/*
 	 * Sync the shadow page tables if EPT is disabled, L1 is invalidating
-	 * linear mappings for L2 (tagged with L2's VPID).  Free all roots as
-	 * VPIDs are not tracked in the MMU role.
+	 * linear mappings for L2 (tagged with L2's VPID).  Free all guest
+	 * roots as VPIDs are not tracked in the MMU role.
 	 *
 	 * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share
 	 * an MMU when EPT is disabled.
@@ -5490,8 +5490,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 	 * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
 	 */
 	if (!enable_ept)
-		kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu,
-				   KVM_MMU_ROOTS_ALL);
+		kvm_mmu_free_guest_mode_roots(vcpu, &vcpu->arch.root_mmu);
 
 	return nested_vmx_succeed(vcpu);
 }
-- 
GitLab


From 28f28d453ffcca4a45c1fd93666d9e77a48cb45b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:30 -0700
Subject: [PATCH 3235/3804] KVM: x86: Use KVM_REQ_TLB_FLUSH_GUEST to handle
 INVPCID(ALL) emulation

Use KVM_REQ_TLB_FLUSH_GUEST instead of KVM_REQ_MMU_RELOAD when emulating
INVPCID of all contexts.  In the current code, this is a glorified nop as
TLB_FLUSH_GUEST becomes kvm_mmu_unload(), same as MMU_RELOAD, when TDP
is disabled, which is the only time INVPCID is only intercepted+emulated.
In the future, reusing TLB_FLUSH_GUEST will simplify optimizing paths
that emulate a guest TLB flush, e.g. by synchronizing as needed instead
of completely unloading all MMUs.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-11-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1a0fb0f1c1cb7..41b936187b2c8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12031,7 +12031,7 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
 
 		fallthrough;
 	case INVPCID_TYPE_ALL_INCL_GLOBAL:
-		kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+		kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
 		return kvm_skip_emulated_instruction(vcpu);
 
 	default:
-- 
GitLab


From 39353ab5790be2802b0de29caeba43015fb90dcf Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:31 -0700
Subject: [PATCH 3236/3804] KVM: nVMX: Use fast PGD switch when emulating
 VMFUNC[EPTP_SWITCH]

Use __kvm_mmu_new_pgd() via kvm_init_shadow_ept_mmu() to emulate
VMFUNC[EPTP_SWITCH] instead of nuking all MMUs.  EPTP_SWITCH is the EPT
equivalent of MOV to CR3, i.e. is a perfect fit for the common PGD flow,
the only hiccup being that A/D enabling is buried in the EPTP.  But, that
is easily handled by bouncing through kvm_init_shadow_ept_mmu().

Explicitly request a guest TLB flush if VPID is disabled.  Per Intel's
SDM, if VPID is disabled, "an EPTP-switching VMFUNC invalidates combined
mappings associated with VPID 0000H (for all PCIDs and for all EP4TA
values, where EP4TA is the value of bits 51:12 of EPTP)".

Note, this technically is a very bizarre bug fix of sorts if L2 is using
PAE paging, as avoiding the full MMU reload also avoids incorrectly
reloading the PDPTEs, which the SDM explicitly states are not touched:

  If PAE paging is in use, an EPTP-switching VMFUNC does not load the
  four page-directory-pointer-table entries (PDPTEs) from the
  guest-physical address in CR3. The logical processor continues to use
  the four guest-physical addresses already present in the PDPTEs. The
  guest-physical address in CR3 is not translated through the new EPT
  paging structures (until some operation that would load the PDPTEs).

In addition to optimizing L2's MMU shenanigans, avoiding the full reload
also optimizes L1's MMU as KVM_REQ_MMU_RELOAD wipes out all roots in both
root_mmu and guest_mmu.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-12-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 13a4accca348f..23f974fee5d49 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -351,16 +351,21 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
 	vmcs12->guest_physical_address = fault->address;
 }
 
+static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
+{
+	kvm_init_shadow_ept_mmu(vcpu,
+				to_vmx(vcpu)->nested.msrs.ept_caps &
+				VMX_EPT_EXECUTE_ONLY_BIT,
+				nested_ept_ad_enabled(vcpu),
+				nested_ept_get_eptp(vcpu));
+}
+
 static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 {
 	WARN_ON(mmu_is_nested(vcpu));
 
 	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
-	kvm_init_shadow_ept_mmu(vcpu,
-			to_vmx(vcpu)->nested.msrs.ept_caps &
-			VMX_EPT_EXECUTE_ONLY_BIT,
-			nested_ept_ad_enabled(vcpu),
-			nested_ept_get_eptp(vcpu));
+	nested_ept_new_eptp(vcpu);
 	vcpu->arch.mmu->get_guest_pgd     = nested_ept_get_eptp;
 	vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
 	vcpu->arch.mmu->get_pdptr         = kvm_pdptr_read;
@@ -5521,8 +5526,10 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
 			return 1;
 
 		vmcs12->ept_pointer = new_eptp;
+		nested_ept_new_eptp(vcpu);
 
-		kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
+		if (!nested_cpu_has_vpid(vmcs12))
+			kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
 	}
 
 	return 0;
-- 
GitLab


From e62f1aa8b9304f4608a6a1517e9041cec555c09d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:32 -0700
Subject: [PATCH 3237/3804] KVM: x86: Defer MMU sync on PCID invalidation

Defer the MMU sync on PCID invalidation so that multiple sync requests in
a single VM-Exit are batched.  This is a very minor optimization as
checking for unsync'd children is quite cheap.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-13-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 41b936187b2c8..9ca30a3879d4e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1074,7 +1074,7 @@ static void kvm_invalidate_pcid(struct kvm_vcpu *vcpu, unsigned long pcid)
 	 * happen anyway before switching to any other CR3.
 	 */
 	if (kvm_get_active_pcid(vcpu) == pcid) {
-		kvm_mmu_sync_roots(vcpu);
+		kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 	}
 
-- 
GitLab


From c906066288d0da7b8c2b5ac4d0d8e85f10f5d5b8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:33 -0700
Subject: [PATCH 3238/3804] KVM: x86: Drop pointless @reset_roots from
 kvm_init_mmu()

Remove the @reset_roots param from kvm_init_mmu(), the one user,
kvm_mmu_reset_context() has already unloaded the MMU and thus freed and
invalidated all roots.  This also happens to be why the reset_roots=true
paths doesn't leak roots; they're already invalid.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-14-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.h        |  2 +-
 arch/x86/kvm/mmu/mmu.c    | 13 ++-----------
 arch/x86/kvm/svm/nested.c |  2 +-
 arch/x86/kvm/vmx/nested.c |  2 +-
 arch/x86/kvm/x86.c        |  2 +-
 5 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 9d8550af994c4..bc11402df83bb 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -65,7 +65,7 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
 
-void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots);
+void kvm_init_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer,
 			     gpa_t nested_cr3);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f4fea68a88f69..720ceb0a1f5c1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4877,17 +4877,8 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	update_last_nonleaf_level(vcpu, g_context);
 }
 
-void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots)
+void kvm_init_mmu(struct kvm_vcpu *vcpu)
 {
-	if (reset_roots) {
-		uint i;
-
-		vcpu->arch.mmu->root_hpa = INVALID_PAGE;
-
-		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-			vcpu->arch.mmu->prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
-	}
-
 	if (mmu_is_nested(vcpu))
 		init_kvm_nested_mmu(vcpu);
 	else if (tdp_enabled)
@@ -4913,7 +4904,7 @@ kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_unload(vcpu);
-	kvm_init_mmu(vcpu, true);
+	kvm_init_mmu(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_reset_context);
 
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 5f45991edcda8..dca20f949b637 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -419,7 +419,7 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
-	kvm_init_mmu(vcpu, false);
+	kvm_init_mmu(vcpu);
 
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 23f974fee5d49..aba11422500cf 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1098,7 +1098,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
-	kvm_init_mmu(vcpu, false);
+	kvm_init_mmu(vcpu);
 
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9ca30a3879d4e..e050ae2fc19b9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10606,7 +10606,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	vcpu_load(vcpu);
 	kvm_set_tsc_khz(vcpu, max_tsc_khz);
 	kvm_vcpu_reset(vcpu, false);
-	kvm_init_mmu(vcpu, false);
+	kvm_init_mmu(vcpu);
 	vcpu_put(vcpu);
 	return 0;
 
-- 
GitLab


From 546e8398bc0c7f75f696a24a997d2befeb632154 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:34 -0700
Subject: [PATCH 3239/3804] KVM: nVMX: WARN if subtly-impossible VMFUNC
 conditions occur

WARN and inject #UD when emulating VMFUNC for L2 if the function is
out-of-bounds or if VMFUNC is not enabled in vmcs12.  Neither condition
should occur in practice, as the CPU is supposed to prioritize the #UD
over VM-Exit for out-of-bounds input and KVM is supposed to enable
VMFUNC in vmcs02 if and only if it's enabled in vmcs12, but neither of
those dependencies is obvious.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-15-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index aba11422500cf..6342bb4c46b38 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5552,6 +5552,16 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
 	}
 
 	vmcs12 = get_vmcs12(vcpu);
+
+	/*
+	 * #UD on out-of-bounds function has priority over VM-Exit, and VMFUNC
+	 * is enabled in vmcs02 if and only if it's enabled in vmcs12.
+	 */
+	if (WARN_ON_ONCE((function > 63) || !nested_cpu_has_vmfunc(vmcs12))) {
+		kvm_queue_exception(vcpu, UD_VECTOR);
+		return 1;
+	}
+
 	if (!(vmcs12->vm_function_control & BIT_ULL(function)))
 		goto fail;
 
-- 
GitLab


From c5ffd408cdc951ba153aea267d96d7cc62c6a97c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 9 Jun 2021 16:42:35 -0700
Subject: [PATCH 3240/3804] KVM: nVMX: Drop redundant checks on vmcs12 in EPTP
 switching emulation

Drop the explicit check on EPTP switching being enabled.  The EPTP
switching check is handled in the generic VMFUNC function check, while
the underlying VMFUNC enablement check is done by hardware and redone
by generic VMFUNC emulation.

The vmcs12 EPT check is handled by KVM at VM-Enter in the form of a
consistency check, keep it but add a WARN.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210609234235.1244004-16-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6342bb4c46b38..b531e08a095bb 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5506,10 +5506,8 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
 	u32 index = kvm_rcx_read(vcpu);
 	u64 new_eptp;
 
-	if (!nested_cpu_has_eptp_switching(vmcs12) ||
-	    !nested_cpu_has_ept(vmcs12))
+	if (WARN_ON_ONCE(!nested_cpu_has_ept(vmcs12)))
 		return 1;
-
 	if (index >= VMFUNC_EPTP_ENTRIES)
 		return 1;
 
-- 
GitLab


From bca66dbcd28a41c669921ff7ca066f71e6f3e72e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Wed, 9 Jun 2021 17:09:10 +0200
Subject: [PATCH 3241/3804] KVM: x86: Check for pending interrupts when APICv
 is getting disabled

When APICv is active, interrupt injection doesn't raise KVM_REQ_EVENT
request (see __apic_accept_irq()) as the required work is done by hardware.
In case KVM_REQ_APICV_UPDATE collides with such injection, the interrupt
may never get delivered.

Currently, the described situation is hardly possible: all
kvm_request_apicv_update() calls normally happen upon VM creation when
no interrupts are pending. We are, however, going to move unconditional
kvm_request_apicv_update() call from kvm_hv_activate_synic() to
synic_update_vector() and without this fix 'hyperv_connections' test from
kvm-unit-tests gets stuck on IPI delivery attempt right after configuring
a SynIC route which triggers APICv disablement.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210609150911.1471882-4-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e050ae2fc19b9..ceb60f64085c7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9137,6 +9137,15 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
 	vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
 	kvm_apic_update_apicv(vcpu);
 	static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
+
+	/*
+	 * When APICv gets disabled, we may still have injected interrupts
+	 * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was
+	 * still active when the interrupt got accepted. Make sure
+	 * inject_pending_event() is called to check for that.
+	 */
+	if (!vcpu->arch.apicv_active)
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
 
-- 
GitLab


From ade74e1433f32e3fb422e3700d5bab34c57f4f47 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 15 Jun 2021 09:29:05 -0700
Subject: [PATCH 3242/3804] KVM: x86/mmu: Grab nx_lpage_splits as an unsigned
 long before division

Snapshot kvm->stats.nx_lpage_splits into a local unsigned long to avoid
64-bit division on 32-bit kernels.  Casting to an unsigned long is safe
because the maximum number of shadow pages, n_max_mmu_pages, is also an
unsigned long, i.e. KVM will start recycling shadow pages before the
number of splits can exceed a 32-bit value.

  ERROR: modpost: "__udivdi3" [arch/x86/kvm/kvm.ko] undefined!

Fixes: 7ee093d4f3f5 ("KVM: switch per-VM stats to u64")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210615162905.2132937-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 720ceb0a1f5c1..7d3e57678d34c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6043,6 +6043,7 @@ static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel
 
 static void kvm_recover_nx_lpages(struct kvm *kvm)
 {
+	unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits;
 	int rcu_idx;
 	struct kvm_mmu_page *sp;
 	unsigned int ratio;
@@ -6054,7 +6055,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
 	write_lock(&kvm->mmu_lock);
 
 	ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
-	to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
+	to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
 	for ( ; to_zap; --to_zap) {
 		if (list_empty(&kvm->arch.lpage_disallowed_mmu_pages))
 			break;
-- 
GitLab


From c19c8c0e666f9259e2fc4d2fa4b9ff8e3b40ee5d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 16 Jun 2021 20:43:37 +0200
Subject: [PATCH 3243/3804] be2net: Fix an error handling path in 'be_probe()'

If an error occurs after a 'pci_enable_pcie_error_reporting()' call, it
must be undone by a corresponding 'pci_disable_pcie_error_reporting()'
call, as already done in the remove function.

Fixes: d6b6d9877878 ("be2net: use PCIe AER capability")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index b6eba29d8e99e..7968568bbe214 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5897,6 +5897,7 @@ drv_cleanup:
 unmap_bars:
 	be_unmap_pci_bars(adapter);
 free_netdev:
+	pci_disable_pcie_error_reporting(pdev);
 	free_netdev(netdev);
 rel_reg:
 	pci_release_regions(pdev);
-- 
GitLab


From 7edcc682301492380fbdd604b4516af5ae667a13 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Wed, 16 Jun 2021 22:09:06 +0300
Subject: [PATCH 3244/3804] net: hamradio: fix memory leak in mkiss_close

My local syzbot instance hit memory leak in
mkiss_open()[1]. The problem was in missing
free_netdev() in mkiss_close().

In mkiss_open() netdevice is allocated and then
registered, but in mkiss_close() netdevice was
only unregistered, but not freed.

Fail log:

BUG: memory leak
unreferenced object 0xffff8880281ba000 (size 4096):
  comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
  hex dump (first 32 bytes):
    61 78 30 00 00 00 00 00 00 00 00 00 00 00 00 00  ax0.............
    00 27 fa 2a 80 88 ff ff 00 00 00 00 00 00 00 00  .'.*............
  backtrace:
    [<ffffffff81a27201>] kvmalloc_node+0x61/0xf0
    [<ffffffff8706e7e8>] alloc_netdev_mqs+0x98/0xe80
    [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
    [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
    [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
    [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
    [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
    [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
    [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae

BUG: memory leak
unreferenced object 0xffff8880141a9a00 (size 96):
  comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
  hex dump (first 32 bytes):
    e8 a2 1b 28 80 88 ff ff e8 a2 1b 28 80 88 ff ff  ...(.......(....
    98 92 9c aa b0 40 02 00 00 00 00 00 00 00 00 00  .....@..........
  backtrace:
    [<ffffffff8709f68b>] __hw_addr_create_ex+0x5b/0x310
    [<ffffffff8709fb38>] __hw_addr_add_ex+0x1f8/0x2b0
    [<ffffffff870a0c7b>] dev_addr_init+0x10b/0x1f0
    [<ffffffff8706e88b>] alloc_netdev_mqs+0x13b/0xe80
    [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
    [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
    [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
    [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
    [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
    [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
    [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae

BUG: memory leak
unreferenced object 0xffff8880219bfc00 (size 512):
  comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
  hex dump (first 32 bytes):
    00 a0 1b 28 80 88 ff ff 80 8f b1 8d ff ff ff ff  ...(............
    80 8f b1 8d ff ff ff ff 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff81a27201>] kvmalloc_node+0x61/0xf0
    [<ffffffff8706eec7>] alloc_netdev_mqs+0x777/0xe80
    [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
    [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
    [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
    [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
    [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
    [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
    [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae

BUG: memory leak
unreferenced object 0xffff888029b2b200 (size 256):
  comm "syz-executor.1", pid 11443, jiffies 4295046091 (age 17.660s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff81a27201>] kvmalloc_node+0x61/0xf0
    [<ffffffff8706f062>] alloc_netdev_mqs+0x912/0xe80
    [<ffffffff84e64192>] mkiss_open+0xb2/0x6f0 [1]
    [<ffffffff842355db>] tty_ldisc_open+0x9b/0x110
    [<ffffffff84236488>] tty_set_ldisc+0x2e8/0x670
    [<ffffffff8421f7f3>] tty_ioctl+0xda3/0x1440
    [<ffffffff81c9f273>] __x64_sys_ioctl+0x193/0x200
    [<ffffffff8911263a>] do_syscall_64+0x3a/0xb0
    [<ffffffff89200068>] entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: 815f62bf7427 ("[PATCH] SMP rewrite of mkiss")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hamradio/mkiss.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 65154224d5b84..7685a1721597f 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -799,6 +799,7 @@ static void mkiss_close(struct tty_struct *tty)
 	ax->tty = NULL;
 
 	unregister_netdev(ax->dev);
+	free_netdev(ax->dev);
 }
 
 /* Perform I/O control on an active ax25 channel. */
-- 
GitLab


From e3cb6fa0e2bf4ffc6225a55851f0cf2b93b50f91 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 10 Jun 2021 12:30:32 -0400
Subject: [PATCH 3245/3804] KVM: switch per-VM stats to u64

Make them the same type as vCPU stats.  There is no reason
to limit the counters to unsigned long.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/include/asm/kvm_host.h   |  2 +-
 arch/mips/include/asm/kvm_host.h    |  2 +-
 arch/powerpc/include/asm/kvm_host.h |  6 +++---
 arch/x86/include/asm/kvm_host.h     | 22 +++++++++++-----------
 virt/kvm/kvm_main.c                 |  4 ++--
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7cd7d5c8c4bc2..d56f365b38a83 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -556,7 +556,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
 }
 
 struct kvm_vm_stat {
-	ulong remote_tlb_flush;
+	u64 remote_tlb_flush;
 };
 
 struct kvm_vcpu_stat {
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index fca4547d580f0..4245c082095f6 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -109,7 +109,7 @@ static inline bool kvm_is_error_hva(unsigned long addr)
 }
 
 struct kvm_vm_stat {
-	ulong remote_tlb_flush;
+	u64 remote_tlb_flush;
 };
 
 struct kvm_vcpu_stat {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7f2e90db2050b..ae3d4af61b66f 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -81,9 +81,9 @@ struct kvmppc_book3s_shadow_vcpu;
 struct kvm_nested_guest;
 
 struct kvm_vm_stat {
-	ulong remote_tlb_flush;
-	ulong num_2M_pages;
-	ulong num_1G_pages;
+	u64 remote_tlb_flush;
+	u64 num_2M_pages;
+	u64 num_1G_pages;
 };
 
 struct kvm_vcpu_stat {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a92d565906135..a0c29e29dd482 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1158,17 +1158,17 @@ struct kvm_arch {
 };
 
 struct kvm_vm_stat {
-	ulong mmu_shadow_zapped;
-	ulong mmu_pte_write;
-	ulong mmu_pde_zapped;
-	ulong mmu_flooded;
-	ulong mmu_recycled;
-	ulong mmu_cache_miss;
-	ulong mmu_unsync;
-	ulong remote_tlb_flush;
-	ulong lpages;
-	ulong nx_lpage_splits;
-	ulong max_mmu_page_hash_collisions;
+	u64 mmu_shadow_zapped;
+	u64 mmu_pte_write;
+	u64 mmu_pde_zapped;
+	u64 mmu_flooded;
+	u64 mmu_recycled;
+	u64 mmu_cache_miss;
+	u64 mmu_unsync;
+	u64 remote_tlb_flush;
+	u64 lpages;
+	u64 nx_lpage_splits;
+	u64 max_mmu_page_hash_collisions;
 };
 
 struct kvm_vcpu_stat {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fc35ba0ea5d3b..ed4d1581d5029 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4833,14 +4833,14 @@ static int kvm_debugfs_release(struct inode *inode, struct file *file)
 
 static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val)
 {
-	*val = *(ulong *)((void *)kvm + offset);
+	*val = *(u64 *)((void *)kvm + offset);
 
 	return 0;
 }
 
 static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset)
 {
-	*(ulong *)((void *)kvm + offset) = 0;
+	*(u64 *)((void *)kvm + offset) = 0;
 
 	return 0;
 }
-- 
GitLab


From 0dbb11230437895f7cd6fc55da61cef011e997d8 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra@amd.com>
Date: Tue, 8 Jun 2021 18:05:43 +0000
Subject: [PATCH 3246/3804] KVM: X86: Introduce KVM_HC_MAP_GPA_RANGE hypercall

This hypercall is used by the SEV guest to notify a change in the page
encryption status to the hypervisor. The hypercall should be invoked
only when the encryption attribute is changed from encrypted -> decrypted
and vice versa. By default all guest pages are considered encrypted.

The hypercall exits to userspace to manage the guest shared regions and
integrate with the userspace VMM's migration code.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: x86@kernel.org
Cc: kvm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Steve Rutherford <srutherford@google.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <90778988e1ee01926ff9cac447aacb745f954c8c.1623174621.git.ashish.kalra@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst        | 19 +++++++++++
 Documentation/virt/kvm/cpuid.rst      |  7 ++++
 Documentation/virt/kvm/hypercalls.rst | 21 ++++++++++++
 Documentation/virt/kvm/msr.rst        | 13 ++++++++
 arch/x86/include/asm/kvm_host.h       |  2 ++
 arch/x86/include/uapi/asm/kvm_para.h  | 13 ++++++++
 arch/x86/kvm/x86.c                    | 46 +++++++++++++++++++++++++++
 include/uapi/linux/kvm.h              |  1 +
 include/uapi/linux/kvm_para.h         |  1 +
 9 files changed, 123 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index cded99561adf4..e328caa35d6cc 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6950,3 +6950,22 @@ guest according to the bits Hyper-V CPUID feature leaves. Otherwise, all
 currently implmented Hyper-V features are provided unconditionally when
 Hyper-V identification is set in the HYPERV_CPUID_INTERFACE (0x40000001)
 leaf.
+
+8.34 KVM_CAP_EXIT_HYPERCALL
+---------------------------
+
+:Capability: KVM_CAP_EXIT_HYPERCALL
+:Architectures: x86
+:Type: vm
+
+This capability, if enabled, will cause KVM to exit to userspace
+with KVM_EXIT_HYPERCALL exit reason to process some hypercalls.
+
+Calling KVM_CHECK_EXTENSION for this capability will return a bitmask
+of hypercalls that can be configured to exit to userspace.
+Right now, the only such hypercall is KVM_HC_MAP_GPA_RANGE.
+
+The argument to KVM_ENABLE_CAP is also a bitmask, and must be a subset
+of the result of KVM_CHECK_EXTENSION.  KVM will forward to userspace
+the hypercalls whose corresponding bit is in the argument, and return
+ENOSYS for the others.
diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst
index cf62162d4be20..bda3e3e737d71 100644
--- a/Documentation/virt/kvm/cpuid.rst
+++ b/Documentation/virt/kvm/cpuid.rst
@@ -96,6 +96,13 @@ KVM_FEATURE_MSI_EXT_DEST_ID        15          guest checks this feature bit
                                                before using extended destination
                                                ID bits in MSI address bits 11-5.
 
+KVM_FEATURE_HC_MAP_GPA_RANGE       16          guest checks this feature bit before
+                                               using the map gpa range hypercall
+                                               to notify the page state change
+
+KVM_FEATURE_MIGRATION_CONTROL      17          guest checks this feature bit before
+                                               using MSR_KVM_MIGRATION_CONTROL
+
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24          host will warn if no guest-side
                                                per-cpu warps are expected in
                                                kvmclock
diff --git a/Documentation/virt/kvm/hypercalls.rst b/Documentation/virt/kvm/hypercalls.rst
index ed4fddd364ea5..e56fa8b9cfcae 100644
--- a/Documentation/virt/kvm/hypercalls.rst
+++ b/Documentation/virt/kvm/hypercalls.rst
@@ -169,3 +169,24 @@ a0: destination APIC ID
 
 :Usage example: When sending a call-function IPI-many to vCPUs, yield if
 	        any of the IPI target vCPUs was preempted.
+
+8. KVM_HC_MAP_GPA_RANGE
+-------------------------
+:Architecture: x86
+:Status: active
+:Purpose: Request KVM to map a GPA range with the specified attributes.
+
+a0: the guest physical address of the start page
+a1: the number of (4kb) pages (must be contiguous in GPA space)
+a2: attributes
+
+    Where 'attributes' :
+        * bits  3:0 - preferred page size encoding 0 = 4kb, 1 = 2mb, 2 = 1gb, etc...
+        * bit     4 - plaintext = 0, encrypted = 1
+        * bits 63:5 - reserved (must be zero)
+
+**Implementation note**: this hypercall is implemented in userspace via
+the KVM_CAP_EXIT_HYPERCALL capability. Userspace must enable that capability
+before advertising KVM_FEATURE_HC_MAP_GPA_RANGE in the guest CPUID.  In
+addition, if the guest supports KVM_FEATURE_MIGRATION_CONTROL, userspace
+must also set up an MSR filter to process writes to MSR_KVM_MIGRATION_CONTROL.
diff --git a/Documentation/virt/kvm/msr.rst b/Documentation/virt/kvm/msr.rst
index e37a14c323d20..9315fc385fb0b 100644
--- a/Documentation/virt/kvm/msr.rst
+++ b/Documentation/virt/kvm/msr.rst
@@ -376,3 +376,16 @@ data:
 	write '1' to bit 0 of the MSR, this causes the host to re-scan its queue
 	and check if there are more notifications pending. The MSR is available
 	if KVM_FEATURE_ASYNC_PF_INT is present in CPUID.
+
+MSR_KVM_MIGRATION_CONTROL:
+        0x4b564d08
+
+data:
+        This MSR is available if KVM_FEATURE_MIGRATION_CONTROL is present in
+        CPUID.  Bit 0 represents whether live migration of the guest is allowed.
+
+        When a guest is started, bit 0 will be 0 if the guest has encrypted
+        memory and 1 if the guest does not have encrypted memory.  If the
+        guest is communicating page encryption status to the host using the
+        ``KVM_HC_MAP_GPA_RANGE`` hypercall, it can set bit 0 in this MSR to
+        allow live migration of the guest.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a0c29e29dd482..e11d64aa0bcd1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1087,6 +1087,8 @@ struct kvm_arch {
 	u32 user_space_msr_mask;
 	struct kvm_x86_msr_filter __rcu *msr_filter;
 
+	u32 hypercall_exit_enabled;
+
 	/* Guest can access the SGX PROVISIONKEY. */
 	bool sgx_provisioning_allowed;
 
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 950afebfba888..5146bbab84d4c 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -33,6 +33,8 @@
 #define KVM_FEATURE_PV_SCHED_YIELD	13
 #define KVM_FEATURE_ASYNC_PF_INT	14
 #define KVM_FEATURE_MSI_EXT_DEST_ID	15
+#define KVM_FEATURE_HC_MAP_GPA_RANGE	16
+#define KVM_FEATURE_MIGRATION_CONTROL	17
 
 #define KVM_HINTS_REALTIME      0
 
@@ -54,6 +56,7 @@
 #define MSR_KVM_POLL_CONTROL	0x4b564d05
 #define MSR_KVM_ASYNC_PF_INT	0x4b564d06
 #define MSR_KVM_ASYNC_PF_ACK	0x4b564d07
+#define MSR_KVM_MIGRATION_CONTROL	0x4b564d08
 
 struct kvm_steal_time {
 	__u64 steal;
@@ -90,6 +93,16 @@ struct kvm_clock_pairing {
 /* MSR_KVM_ASYNC_PF_INT */
 #define KVM_ASYNC_PF_VEC_MASK			GENMASK(7, 0)
 
+/* MSR_KVM_MIGRATION_CONTROL */
+#define KVM_MIGRATION_READY		(1 << 0)
+
+/* KVM_HC_MAP_GPA_RANGE */
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_4K	0
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_2M	(1 << 0)
+#define KVM_MAP_GPA_RANGE_PAGE_SZ_1G	(1 << 1)
+#define KVM_MAP_GPA_RANGE_ENC_STAT(n)	(n << 4)
+#define KVM_MAP_GPA_RANGE_ENCRYPTED	KVM_MAP_GPA_RANGE_ENC_STAT(1)
+#define KVM_MAP_GPA_RANGE_DECRYPTED	KVM_MAP_GPA_RANGE_ENC_STAT(0)
 
 /* Operations for KVM_HC_MMU_OP */
 #define KVM_MMU_OP_WRITE_PTE            1
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ceb60f64085c7..8b898ec8d349b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -103,6 +103,8 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 
 static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
 
+#define KVM_EXIT_HYPERCALL_VALID_MASK (1 << KVM_HC_MAP_GPA_RANGE)
+
 #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
                                     KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
 
@@ -3996,6 +3998,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_SREGS2:
 		r = 1;
 		break;
+	case KVM_CAP_EXIT_HYPERCALL:
+		r = KVM_EXIT_HYPERCALL_VALID_MASK;
+		break;
 	case KVM_CAP_SET_GUEST_DEBUG2:
 		return KVM_GUESTDBG_VALID_MASK;
 #ifdef CONFIG_KVM_XEN
@@ -5622,6 +5627,14 @@ split_irqchip_unlock:
 		if (kvm_x86_ops.vm_copy_enc_context_from)
 			r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
 		return r;
+	case KVM_CAP_EXIT_HYPERCALL:
+		if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
+			r = -EINVAL;
+			break;
+		}
+		kvm->arch.hypercall_exit_enabled = cap->args[0];
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -8548,6 +8561,17 @@ no_yield:
 	return;
 }
 
+static int complete_hypercall_exit(struct kvm_vcpu *vcpu)
+{
+	u64 ret = vcpu->run->hypercall.ret;
+
+	if (!is_64_bit_mode(vcpu))
+		ret = (u32)ret;
+	kvm_rax_write(vcpu, ret);
+	++vcpu->stat.hypercalls;
+	return kvm_skip_emulated_instruction(vcpu);
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
@@ -8613,6 +8637,28 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		kvm_sched_yield(vcpu, a0);
 		ret = 0;
 		break;
+	case KVM_HC_MAP_GPA_RANGE: {
+		u64 gpa = a0, npages = a1, attrs = a2;
+
+		ret = -KVM_ENOSYS;
+		if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE)))
+			break;
+
+		if (!PAGE_ALIGNED(gpa) || !npages ||
+		    gpa_to_gfn(gpa) + npages <= gpa_to_gfn(gpa)) {
+			ret = -KVM_EINVAL;
+			break;
+		}
+
+		vcpu->run->exit_reason        = KVM_EXIT_HYPERCALL;
+		vcpu->run->hypercall.nr       = KVM_HC_MAP_GPA_RANGE;
+		vcpu->run->hypercall.args[0]  = gpa;
+		vcpu->run->hypercall.args[1]  = npages;
+		vcpu->run->hypercall.args[2]  = attrs;
+		vcpu->run->hypercall.longmode = op_64_bit;
+		vcpu->arch.complete_userspace_io = complete_hypercall_exit;
+		return 0;
+	}
 	default:
 		ret = -KVM_ENOSYS;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 90d44138dbfbf..9febe1412f7a6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1085,6 +1085,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_PTP_KVM 198
 #define KVM_CAP_HYPERV_ENFORCE_CPUID 199
 #define KVM_CAP_SREGS2 200
+#define KVM_CAP_EXIT_HYPERCALL 201
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 8b86609849b9f..960c7e93d1a98 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -29,6 +29,7 @@
 #define KVM_HC_CLOCK_PAIRING		9
 #define KVM_HC_SEND_IPI		10
 #define KVM_HC_SCHED_YIELD		11
+#define KVM_HC_MAP_GPA_RANGE		12
 
 /*
  * hypercalls use architecture specific
-- 
GitLab


From 2735886c9ef115fc7b40d27bfe73605c38e9d56b Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Wed, 9 Jun 2021 00:16:40 -0700
Subject: [PATCH 3247/3804] KVM: LAPIC: Keep stored TMCCT register value 0
 after KVM_SET_LAPIC

KVM_GET_LAPIC stores the current value of TMCCT and KVM_SET_LAPIC's memcpy
stores it in vcpu->arch.apic->regs, KVM_SET_LAPIC could store zero in
vcpu->arch.apic->regs after it uses it, and then the stored value would
always be zero. In addition, the TMCCT is always computed on-demand and
never directly readable.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Message-Id: <1623223000-18116-1-git-send-email-wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4b80e613096be..ba5a27879f1dc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2631,6 +2631,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
 	apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
 	update_divide_count(apic);
 	__start_apic_timer(apic, APIC_TMCCT);
+	kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
 	kvm_apic_update_apicv(vcpu);
 	apic->highest_isr_cache = -1;
 	if (vcpu->arch.apicv_active) {
-- 
GitLab


From 57a3e96d6d17ae5ac9861ef34af024a627f1c3bb Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Tue, 15 Jun 2021 12:57:09 +1200
Subject: [PATCH 3248/3804] KVM: x86/mmu: Fix return value in
 tdp_mmu_map_handle_target_level()

Currently tdp_mmu_map_handle_target_level() returns 0, which is
RET_PF_RETRY, when page fault is actually fixed.  This makes
kvm_tdp_mmu_map() also return RET_PF_RETRY in this case, instead of
RET_PF_FIXED.  Fix by initializing ret to RET_PF_FIXED.

Note that kvm_mmu_page_fault() resumes guest on both RET_PF_RETRY and
RET_PF_FIXED, which means in practice returning the two won't make
difference, so this fix alone won't be necessary for stable tree.

Fixes: bb18842e2111 ("kvm: x86/mmu: Add TDP MMU PF handler")
Reviewed-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <f9e8956223a586cd28c090879a8ff40f5eb6d609.1623717884.git.kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index cc13e001f3de0..6c9c6917925ad 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -914,7 +914,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
 					  kvm_pfn_t pfn, bool prefault)
 {
 	u64 new_spte;
-	int ret = 0;
+	int ret = RET_PF_FIXED;
 	int make_spte_ret = 0;
 
 	if (unlikely(is_noslot_pfn(pfn)))
-- 
GitLab


From 857f84743e4b78500afae010d866675642e18e90 Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Tue, 15 Jun 2021 12:57:10 +1200
Subject: [PATCH 3249/3804] KVM: x86/mmu: Fix pf_fixed count in
 tdp_mmu_map_handle_target_level()

Currently pf_fixed is not increased when prefault is true.  This is not
correct, since prefault here really means "async page fault completed".
In that case, the original page fault from the guest was morphed into as
async page fault and pf_fixed was not increased.  So when prefault
indicates async page fault is completed, pf_fixed should be increased.

Additionally, currently pf_fixed is also increased even when page fault
is spurious, while legacy MMU increases pf_fixed when page fault returns
RET_PF_EMULATE or RET_PF_FIXED.

To fix above two issues, change to increase pf_fixed when return value
is not RET_PF_SPURIOUS (RET_PF_RETRY has already been ruled out by
reaching here).

More information:
https://lore.kernel.org/kvm/cover.1620200410.git.kai.huang@intel.com/T/#mbb5f8083e58a2cd262231512b9211cbe70fc3bd5

Fixes: bb18842e2111 ("kvm: x86/mmu: Add TDP MMU PF handler")
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <2ea8b7f5d4f03c99b32bc56fc982e1e4e3d3fc6b.1623717884.git.kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 6c9c6917925ad..efb7503ed4d5d 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -951,7 +951,11 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
 				       rcu_dereference(iter->sptep));
 	}
 
-	if (!prefault)
+	/*
+	 * Increase pf_fixed in both RET_PF_EMULATE and RET_PF_FIXED to be
+	 * consistent with legacy MMU behavior.
+	 */
+	if (ret != RET_PF_SPURIOUS)
 		vcpu->stat.pf_fixed++;
 
 	return ret;
-- 
GitLab


From f1b8325508327a302f1d5cd8a4bf51e2c9c72fa9 Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@intel.com>
Date: Tue, 15 Jun 2021 12:57:11 +1200
Subject: [PATCH 3250/3804] KVM: x86/mmu: Fix TDP MMU page table level

TDP MMU iterator's level is identical to page table's actual level.  For
instance, for the last level page table (whose entry points to one 4K
page), iter->level is 1 (PG_LEVEL_4K), and in case of 5 level paging,
the iter->level is mmu->shadow_root_level, which is 5.  However, struct
kvm_mmu_page's level currently is not set correctly when it is allocated
in kvm_tdp_mmu_map().  When iterator hits non-present SPTE and needs to
allocate a new child page table, currently iter->level, which is the
level of the page table where the non-present SPTE belongs to, is used.
This results in struct kvm_mmu_page's level always having its parent's
level (excpet root table's level, which is initialized explicitly using
mmu->shadow_root_level).

This is kinda wrong, and not consistent with existing non TDP MMU code.
Fortuantely sp->role.level is only used in handle_removed_tdp_mmu_page()
and kvm_tdp_mmu_zap_sp(), and they are already aware of this and behave
correctly.  However to make it consistent with legacy MMU code (and fix
the issue that both root page table and its child page table have
shadow_root_level), use iter->level - 1 in kvm_tdp_mmu_map(), and change
handle_removed_tdp_mmu_page() and kvm_tdp_mmu_zap_sp() accordingly.

Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Kai Huang <kai.huang@intel.com>
Message-Id: <bcb6569b6e96cb78aaa7b50640e6e6b53291a74e.1623717884.git.kai.huang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 8 ++++----
 arch/x86/kvm/mmu/tdp_mmu.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index efb7503ed4d5d..4d658882a4d8c 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -337,7 +337,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
 		sptep = rcu_dereference(pt) + i;
-		gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1));
+		gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level);
 
 		if (shared) {
 			/*
@@ -379,12 +379,12 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
 			WRITE_ONCE(*sptep, REMOVED_SPTE);
 		}
 		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
-				    old_child_spte, REMOVED_SPTE, level - 1,
+				    old_child_spte, REMOVED_SPTE, level,
 				    shared);
 	}
 
 	kvm_flush_remote_tlbs_with_address(kvm, gfn,
-					   KVM_PAGES_PER_HPAGE(level));
+					   KVM_PAGES_PER_HPAGE(level + 1));
 
 	call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
 }
@@ -1030,7 +1030,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 			if (is_removed_spte(iter.old_spte))
 				break;
 
-			sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
+			sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level - 1);
 			child_pt = sp->spt;
 
 			new_spte = make_nonleaf_spte(child_pt,
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index f7a7990da11da..408aa49731d51 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -31,7 +31,7 @@ static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id,
 }
 static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	gfn_t end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
+	gfn_t end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level + 1);
 
 	/*
 	 * Don't allow yielding, as the caller may have a flush pending.  Note,
-- 
GitLab


From c3b26fdf1b32f91c7a3bc743384b4a298ab53ad7 Mon Sep 17 00:00:00 2001
From: Linyu Yuan <linyyuan@codeaurora.org>
Date: Thu, 17 Jun 2021 07:32:32 +0800
Subject: [PATCH 3251/3804] net: cdc_eem: fix tx fixup skb leak

when usbnet transmit a skb, eem fixup it in eem_tx_fixup(),
if skb_copy_expand() failed, it return NULL,
usbnet_start_xmit() will have no chance to free original skb.

fix it by free orginal skb in eem_tx_fixup() first,
then check skb clone status, if failed, return NULL to usbnet.

Fixes: 9f722c0978b0 ("usbnet: CDC EEM support (v5)")
Signed-off-by: Linyu Yuan <linyyuan@codeaurora.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_eem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c
index 2e60bc1b9a6b0..359ea0d10e597 100644
--- a/drivers/net/usb/cdc_eem.c
+++ b/drivers/net/usb/cdc_eem.c
@@ -123,10 +123,10 @@ static struct sk_buff *eem_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 	}
 
 	skb2 = skb_copy_expand(skb, EEM_HEAD, ETH_FCS_LEN + padlen, flags);
+	dev_kfree_skb_any(skb);
 	if (!skb2)
 		return NULL;
 
-	dev_kfree_skb_any(skb);
 	skb = skb2;
 
 done:
-- 
GitLab


From 4e16f283edc289820e9b2d6f617ed8e514ee8396 Mon Sep 17 00:00:00 2001
From: Tuan Phan <tuanphan@os.amperecomputing.com>
Date: Thu, 17 Jun 2021 09:08:49 -0700
Subject: [PATCH 3252/3804] perf/arm-cmn: Fix invalid pointer when access dtc
 object sharing the same IRQ number

When multiple dtcs share the same IRQ number, the irq_friend which
used to refer to dtc object gets calculated incorrect which leads
to invalid pointer.

Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver")

Signed-off-by: Tuan Phan <tuanphan@os.amperecomputing.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/1623946129-3290-1-git-send-email-tuanphan@os.amperecomputing.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/arm-cmn.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c
index 4f46f654279d3..bc3cba5f8c5dc 100644
--- a/drivers/perf/arm-cmn.c
+++ b/drivers/perf/arm-cmn.c
@@ -1212,7 +1212,7 @@ static int arm_cmn_init_irqs(struct arm_cmn *cmn)
 		irq = cmn->dtc[i].irq;
 		for (j = i; j--; ) {
 			if (cmn->dtc[j].irq == irq) {
-				cmn->dtc[j].irq_friend = j - i;
+				cmn->dtc[j].irq_friend = i - j;
 				goto next;
 			}
 		}
-- 
GitLab


From d96b1b8c9f79b6bb234a31c80972a6f422079376 Mon Sep 17 00:00:00 2001
From: Jing Xiangfeng <jingxiangfeng@huawei.com>
Date: Thu, 17 Jun 2021 20:26:14 +0800
Subject: [PATCH 3253/3804] drivers/perf: fix the missed ida_simple_remove() in
 ddr_perf_probe()

ddr_perf_probe() misses to call ida_simple_remove() in an error path.
Jump to cpuhp_state_err to fix it.

Signed-off-by: Jing Xiangfeng <jingxiangfeng@huawei.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Link: https://lore.kernel.org/r/20210617122614.166823-1-jingxiangfeng@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/perf/fsl_imx8_ddr_perf.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 2a1d78794a4e5..94ebc1ecace7c 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -702,8 +702,10 @@ static int ddr_perf_probe(struct platform_device *pdev)
 
 	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME "%d",
 			      num);
-	if (!name)
-		return -ENOMEM;
+	if (!name) {
+		ret = -ENOMEM;
+		goto cpuhp_state_err;
+	}
 
 	pmu->devtype_data = of_device_get_match_data(&pdev->dev);
 
-- 
GitLab


From 1c200f832e14420fa770193f9871f4ce2df00d07 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Jun 2021 10:09:53 -0700
Subject: [PATCH 3254/3804] net: qed: Fix memcpy() overflow of
 qed_dcbx_params()

The source (&dcbx_info->operational.params) and dest
(&p_hwfn->p_dcbx_info->set.config.params) are both struct qed_dcbx_params
(560 bytes), not struct qed_dcbx_admin_params (564 bytes), which is used
as the memcpy() size.

However it seems that struct qed_dcbx_operational_params
(dcbx_info->operational)'s layout matches struct qed_dcbx_admin_params
(p_hwfn->p_dcbx_info->set.config)'s 4 byte difference (3 padding, 1 byte
for "valid").

On the assumption that the size is wrong (rather than the source structure
type), adjust the memcpy() size argument to be 4 bytes smaller and add
a BUILD_BUG_ON() to validate any changes to the structure sizes.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 17d5b649eb36b..e81dd34a3cac2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -1266,9 +1266,11 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn,
 		p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC;
 
 	p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled;
+	BUILD_BUG_ON(sizeof(dcbx_info->operational.params) !=
+		     sizeof(p_hwfn->p_dcbx_info->set.config.params));
 	memcpy(&p_hwfn->p_dcbx_info->set.config.params,
 	       &dcbx_info->operational.params,
-	       sizeof(struct qed_dcbx_admin_params));
+	       sizeof(p_hwfn->p_dcbx_info->set.config.params));
 	p_hwfn->p_dcbx_info->set.config.valid = true;
 
 	memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set));
-- 
GitLab


From ae1b2aaee7e215f985bf10aad8978f524d8dca60 Mon Sep 17 00:00:00 2001
From: Hao Chen <chenhaoa@uniontech.com>
Date: Thu, 17 Jun 2021 10:33:00 +0800
Subject: [PATCH 3255/3804] Documentation: ACPI: fix error script name

The correct script name should be 'divergence.sh' instead of
'divergences.sh'.
I didn't find divergences.sh in the path of acpica/generate/linux/.

Signed-off-by: Hao Chen <chenhaoa@uniontech.com>
Link: https://lore.kernel.org/r/20210617023300.30114-1-chenhaoa@uniontech.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/acpi/linuxized-acpica.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/driver-api/acpi/linuxized-acpica.rst b/Documentation/driver-api/acpi/linuxized-acpica.rst
index 6bee033832255..cc234353d2c40 100644
--- a/Documentation/driver-api/acpi/linuxized-acpica.rst
+++ b/Documentation/driver-api/acpi/linuxized-acpica.rst
@@ -276,4 +276,4 @@ before they become available from the ACPICA release process.
    # git clone https://github.com/acpica/acpica
    # git clone https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
    # cd acpica
-   # generate/linux/divergences.sh -s ../linux
+   # generate/linux/divergence.sh -s ../linux
-- 
GitLab


From 349660e944b5bcb82df1dbb2156ced9fc9c05351 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:55:07 +0200
Subject: [PATCH 3256/3804] docs: admin-guide: reporting-issues.rst: replace
 some characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+00a0 (' '): NO-BREAK SPACE
	  as it can cause lines being truncated on PDF output

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/551a2af0e654226067e5c376d3e2d959cc738f39.1623826294.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/reporting-issues.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/reporting-issues.rst b/Documentation/admin-guide/reporting-issues.rst
index 18d8e25ba9dfa..d7ac13f789cce 100644
--- a/Documentation/admin-guide/reporting-issues.rst
+++ b/Documentation/admin-guide/reporting-issues.rst
@@ -1248,7 +1248,7 @@ paragraph makes the severeness obvious.
 
 In case you performed a successful bisection, use the title of the change that
 introduced the regression as the second part of your subject. Make the report
-also mention the commit id of the culprit. In case of an unsuccessful bisection,
+also mention the commit id of the culprit. In case of an unsuccessful bisection,
 make your report mention the latest tested version that's working fine (say 5.7)
 and the oldest where the issue occurs (say 5.8-rc1).
 
-- 
GitLab


From 90f40f514f907f0b12873a7337ea638731848ff2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:55:08 +0200
Subject: [PATCH 3257/3804] docs: trace: coresight:
 coresight-etm4x-reference.rst: replace some characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+00a0 (' '): NO-BREAK SPACE
	  as it can cause lines being truncated on PDF output

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/b6a04e881bc80a3c1d3d23ccbc8208ca3c9053fd.1623826294.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/coresight/coresight-etm4x-reference.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/trace/coresight/coresight-etm4x-reference.rst b/Documentation/trace/coresight/coresight-etm4x-reference.rst
index b64d9a9c79dfb..d25dfe86af9bf 100644
--- a/Documentation/trace/coresight/coresight-etm4x-reference.rst
+++ b/Documentation/trace/coresight/coresight-etm4x-reference.rst
@@ -427,7 +427,7 @@ the ‘TRC’ prefix.
 :Syntax:
     ``echo idx > vmid_idx``
 
-    Where idx <  numvmidc
+    Where idx <  numvmidc
 
 ----
 
-- 
GitLab


From f40c2a25b9c33b08ad2098f64b7d1cbaa3daab9f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:55:09 +0200
Subject: [PATCH 3258/3804] docs: driver-api: ioctl.rst: replace some
 characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+00a0 (' '): NO-BREAK SPACE
	  as it can cause lines being truncated on PDF output

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/b2186e313f990488ded56d9b8d35a2d1fe479aa1.1623826294.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/ioctl.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/driver-api/ioctl.rst b/Documentation/driver-api/ioctl.rst
index c455db0e16272..5b76e765827d0 100644
--- a/Documentation/driver-api/ioctl.rst
+++ b/Documentation/driver-api/ioctl.rst
@@ -25,9 +25,9 @@ ioctl commands that follow modern conventions: ``_IO``, ``_IOR``,
 with the correct parameters:
 
 _IO/_IOR/_IOW/_IOWR
-   The macro name specifies how the argument will be used.  It may be a
+   The macro name specifies how the argument will be used.  It may be a
    pointer to data to be passed into the kernel (_IOW), out of the kernel
-   (_IOR), or both (_IOWR).  _IO can indicate either commands with no
+   (_IOR), or both (_IOWR).  _IO can indicate either commands with no
    argument or those passing an integer value instead of a pointer.
    It is recommended to only use _IO for commands without arguments,
    and use pointers for passing data.
@@ -200,10 +200,10 @@ cause an information leak, which can be used to defeat kernel address
 space layout randomization (KASLR), helping in an attack.
 
 For this reason (and for compat support) it is best to avoid any
-implicit padding in data structures.  Where there is implicit padding
+implicit padding in data structures.  Where there is implicit padding
 in an existing structure, kernel drivers must be careful to fully
 initialize an instance of the structure before copying it to user
-space.  This is usually done by calling memset() before assigning to
+space.  This is usually done by calling memset() before assigning to
 individual members.
 
 Subsystem abstractions
-- 
GitLab


From 570eb861243c07f2c3923af428ed20cd3f9d0a29 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:55:10 +0200
Subject: [PATCH 3259/3804] docs: usb: replace some characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+feff ('﻿'): BOM
	  as it is not needed on UTF-8

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/0a4b0c38a9cd1133402a04a7ff60fefd9682d42e.1623826294.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/usb/ehci.rst           | 2 +-
 Documentation/usb/gadget_printer.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/usb/ehci.rst b/Documentation/usb/ehci.rst
index 31f650e7c1b4c..76190501907a6 100644
--- a/Documentation/usb/ehci.rst
+++ b/Documentation/usb/ehci.rst
@@ -1,4 +1,4 @@
-﻿===========
+===========
 EHCI driver
 ===========
 
diff --git a/Documentation/usb/gadget_printer.rst b/Documentation/usb/gadget_printer.rst
index 5e5516c69075f..e611a6d910932 100644
--- a/Documentation/usb/gadget_printer.rst
+++ b/Documentation/usb/gadget_printer.rst
@@ -1,4 +1,4 @@
-﻿===============================
+===============================
 Linux USB Printer Gadget Driver
 ===============================
 
-- 
GitLab


From 1a967a312270356c249466b10bb39890a96e301e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:55:11 +0200
Subject: [PATCH 3260/3804] docs: vm: zswap.rst: replace some characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+00a0 (' '): NO-BREAK SPACE
	  as it can cause lines being truncated on PDF output

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9a93b72f99f8f3328269076ceff50248ac9c5af5.1623826294.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/vm/zswap.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/vm/zswap.rst b/Documentation/vm/zswap.rst
index d8d9fa4a1f0df..8edb8d578caf7 100644
--- a/Documentation/vm/zswap.rst
+++ b/Documentation/vm/zswap.rst
@@ -10,7 +10,7 @@ Overview
 Zswap is a lightweight compressed cache for swap pages. It takes pages that are
 in the process of being swapped out and attempts to compress them into a
 dynamically allocated RAM-based memory pool.  zswap basically trades CPU cycles
-for potentially reduced swap I/O.  This trade-off can also result in a
+for potentially reduced swap I/O.  This trade-off can also result in a
 significant performance improvement if reads from the compressed cache are
 faster than reads from a swap device.
 
@@ -26,7 +26,7 @@ faster than reads from a swap device.
   performance impact of swapping.
 * Overcommitted guests that share a common I/O resource can
   dramatically reduce their swap I/O pressure, avoiding heavy handed I/O
-  throttling by the hypervisor. This allows more work to get done with less
+  throttling by the hypervisor. This allows more work to get done with less
   impact to the guest workload and guests sharing the I/O subsystem
 * Users with SSDs as swap devices can extend the life of the device by
   drastically reducing life-shortening writes.
-- 
GitLab


From d9d2c82738b7cacefde30b701d2ddc4879f6c39a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:55:12 +0200
Subject: [PATCH 3261/3804] docs: filesystems: ext4: blockgroup.rst: replace
 some characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+2217 ('∗'): ASTERISK OPERATOR
	  use ASCII asterisk instead of the ASTERISK OPERATOR

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Theodore Ts'o <tytso@mit.edu>
Link: https://lore.kernel.org/r/c5c3c384c48779ca7c9dcd90183cefe20ac82928.1623826294.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/ext4/blockgroup.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/filesystems/ext4/blockgroup.rst b/Documentation/filesystems/ext4/blockgroup.rst
index 3da1566333393..d5d652addce5e 100644
--- a/Documentation/filesystems/ext4/blockgroup.rst
+++ b/Documentation/filesystems/ext4/blockgroup.rst
@@ -84,7 +84,7 @@ Without the option META\_BG, for safety concerns, all block group
 descriptors copies are kept in the first block group. Given the default
 128MiB(2^27 bytes) block group size and 64-byte group descriptors, ext4
 can have at most 2^27/64 = 2^21 block groups. This limits the entire
-filesystem size to 2^21 ∗ 2^27 = 2^48bytes or 256TiB.
+filesystem size to 2^21 * 2^27 = 2^48bytes or 256TiB.
 
 The solution to this problem is to use the metablock group feature
 (META\_BG), which is already in ext3 for all 2.6 releases. With the
-- 
GitLab


From 729979ebef22b7527ea377bb2814df97ad7d4078 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:55:13 +0200
Subject: [PATCH 3262/3804] docs: networking: device_drivers: replace some
 characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+00a0 (' '): NO-BREAK SPACE
	  as it can cause lines being truncated on PDF output

Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9bd9f5c067c4b068a974730a14fe8d68e1be0c9a.1623826294.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../networking/device_drivers/ethernet/intel/i40e.rst       | 6 +++---
 .../networking/device_drivers/ethernet/intel/iavf.rst       | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
index 2d3f6bd969a2b..ac35bd472bdc9 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
@@ -466,7 +466,7 @@ network. PTP support varies among Intel devices that support this driver. Use
 "ethtool -T <netdev name>" to get a definitive list of PTP capabilities
 supported by the device.
 
-IEEE 802.1ad (QinQ) Support
+IEEE 802.1ad (QinQ) Support
 ---------------------------
 The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN
 IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as
@@ -523,8 +523,8 @@ of a port's bandwidth (should it be available). The sum of all the values for
 Maximum Bandwidth is not restricted, because no more than 100% of a port's
 bandwidth can ever be used.
 
-NOTE: X710/XXV710 devices fail to enable Max VFs (64) when Multiple Functions
-per Port (MFP) and SR-IOV are enabled. An error from i40e is logged that says
+NOTE: X710/XXV710 devices fail to enable Max VFs (64) when Multiple Functions
+per Port (MFP) and SR-IOV are enabled. An error from i40e is logged that says
 "add vsi failed for VF N, aq_err 16". To workaround the issue, enable less than
 64 virtual functions (VFs).
 
diff --git a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
index 25330b7b5168d..151af0a8da9c3 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
@@ -113,7 +113,7 @@ which the AVF is associated. The following are base mode features:
 - AVF device ID
 - HW mailbox is used for VF to PF communications (including on Windows)
 
-IEEE 802.1ad (QinQ) Support
+IEEE 802.1ad (QinQ) Support
 ---------------------------
 The IEEE 802.1ad standard, informally known as QinQ, allows for multiple VLAN
 IDs within a single Ethernet frame. VLAN IDs are sometimes referred to as
-- 
GitLab


From a557f67cd70344bf28442baac4c9b6c94aecb60b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:55:14 +0200
Subject: [PATCH 3263/3804] docs: PCI: Replace non-breaking spaces to avoid PDF
 issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The conversion tools used during DocBook/LaTeX/html/Markdown->ReST
conversion and some cut-and-pasted text contain some characters that
aren't easily reachable on standard keyboards and/or could cause
troubles when parsed by the documentation build system.

Replace the occurences of the following characters:

	- U+00a0 (' '): NO-BREAK SPACE
	  as it can cause lines being truncated on PDF output

Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/8036126a59adb720dbc9233341ad5a08531cf73f.1623826294.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/PCI/acpi-info.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Documentation/PCI/acpi-info.rst b/Documentation/PCI/acpi-info.rst
index 060217081c796..34c64a5a66ec7 100644
--- a/Documentation/PCI/acpi-info.rst
+++ b/Documentation/PCI/acpi-info.rst
@@ -22,9 +22,9 @@ or if the device has INTx interrupts connected by platform interrupt
 controllers and a _PRT is needed to describe those connections.
 
 ACPI resource description is done via _CRS objects of devices in the ACPI
-namespace [2].   The _CRS is like a generalized PCI BAR: the OS can read
+namespace [2].   The _CRS is like a generalized PCI BAR: the OS can read
 _CRS and figure out what resource is being consumed even if it doesn't have
-a driver for the device [3].  That's important because it means an old OS
+a driver for the device [3].  That's important because it means an old OS
 can work correctly even on a system with new devices unknown to the OS.
 The new devices might not do anything, but the OS can at least make sure no
 resources conflict with them.
@@ -41,15 +41,15 @@ ACPI, that device will have a specific _HID/_CID that tells the OS what
 driver to bind to it, and the _CRS tells the OS and the driver where the
 device's registers are.
 
-PCI host bridges are PNP0A03 or PNP0A08 devices.  Their _CRS should
-describe all the address space they consume.  This includes all the windows
+PCI host bridges are PNP0A03 or PNP0A08 devices.  Their _CRS should
+describe all the address space they consume.  This includes all the windows
 they forward down to the PCI bus, as well as registers of the host bridge
-itself that are not forwarded to PCI.  The host bridge registers include
+itself that are not forwarded to PCI.  The host bridge registers include
 things like secondary/subordinate bus registers that determine the bus
 range below the bridge, window registers that describe the apertures, etc.
 These are all device-specific, non-architected things, so the only way a
 PNP0A03/PNP0A08 driver can manage them is via _PRS/_CRS/_SRS, which contain
-the device-specific details.  The host bridge registers also include ECAM
+the device-specific details.  The host bridge registers also include ECAM
 space, since it is consumed by the host bridge.
 
 ACPI defines a Consumer/Producer bit to distinguish the bridge registers
@@ -66,7 +66,7 @@ the PNP0A03/PNP0A08 device itself.  The workaround was to describe the
 bridge registers (including ECAM space) in PNP0C02 catch-all devices [6].
 With the exception of ECAM, the bridge register space is device-specific
 anyway, so the generic PNP0A03/PNP0A08 driver (pci_root.c) has no need to
-know about it.  
+know about it.  
 
 New architectures should be able to use "Consumer" Extended Address Space
 descriptors in the PNP0A03 device for bridge registers, including ECAM,
@@ -75,9 +75,9 @@ ia64 kernels assume all address space descriptors, including "Consumer"
 Extended Address Space ones, are windows, so it would not be safe to
 describe bridge registers this way on those architectures.
 
-PNP0C02 "motherboard" devices are basically a catch-all.  There's no
+PNP0C02 "motherboard" devices are basically a catch-all.  There's no
 programming model for them other than "don't use these resources for
-anything else."  So a PNP0C02 _CRS should claim any address space that is
+anything else."  So a PNP0C02 _CRS should claim any address space that is
 (1) not claimed by _CRS under any other device object in the ACPI namespace
 and (2) should not be assigned by the OS to something else.
 
-- 
GitLab


From 559a66b868d987dca55894218d11d59e5bafafe0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:16 +0200
Subject: [PATCH 3264/3804] docs: devices.rst: better reference documentation
 docs

There's no need to use either :file: or :doc: tags for documentation,
as automarkup.py automatically converts Documentation/*.rst into
a cross-reference.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/5d9c9949a104d10b537a2d780bccad69a2dc58f9.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/pm/devices.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/driver-api/pm/devices.rst b/Documentation/driver-api/pm/devices.rst
index 6b3bfd29fd844..d448cb57df86c 100644
--- a/Documentation/driver-api/pm/devices.rst
+++ b/Documentation/driver-api/pm/devices.rst
@@ -217,7 +217,7 @@ system-wide transition to a sleep state even though its :c:member:`runtime_auto`
 flag is clear.
 
 For more information about the runtime power management framework, refer to
-:file:`Documentation/power/runtime_pm.rst`.
+Documentation/power/runtime_pm.rst.
 
 
 Calling Drivers to Enter and Leave System Sleep States
@@ -655,7 +655,7 @@ been thawed.  Generally speaking, the PM notifiers are suitable for performing
 actions that either require user space to be available, or at least won't
 interfere with user space.
 
-For details refer to :doc:`notifiers`.
+For details refer to Documentation/driver-api/pm/notifiers.rst.
 
 
 Device Low-Power (suspend) States
@@ -726,7 +726,7 @@ it into account in any way.
 
 Devices may be defined as IRQ-safe which indicates to the PM core that their
 runtime PM callbacks may be invoked with disabled interrupts (see
-:file:`Documentation/power/runtime_pm.rst` for more information).  If an
+Documentation/power/runtime_pm.rst for more information).  If an
 IRQ-safe device belongs to a PM domain, the runtime PM of the domain will be
 disallowed, unless the domain itself is defined as IRQ-safe. However, it
 makes sense to define a PM domain as IRQ-safe only if all the devices in it
@@ -805,7 +805,7 @@ The ``DPM_FLAG_MAY_SKIP_RESUME`` Driver Flag
 --------------------------------------------
 
 During system-wide resume from a sleep state it's easiest to put devices into
-the full-power state, as explained in :file:`Documentation/power/runtime_pm.rst`.
+the full-power state, as explained in Documentation/power/runtime_pm.rst.
 [Refer to that document for more information regarding this particular issue as
 well as for information on the device runtime power management framework in
 general.]  However, it often is desirable to leave devices in suspend after
-- 
GitLab


From 9129faf9040d9005e70c604a163faa9f183b00ee Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:17 +0200
Subject: [PATCH 3265/3804] docs: dev-tools: kunit: don't use a table for docs
 name

We'll be replacing :doc:`foo` references to
Documentation/foo.rst. Yet, here it happens inside a table.
Doing a search-and-replace would break it.

Yet, as there's no good reason to use a table there,
let's just convert it into a list.

Reviewed-by: David Gow <davidgow@google.com>
Acked-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/07d3a8ccafbb6345d6e78fb090290859e84361a1.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dev-tools/kunit/api/index.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/dev-tools/kunit/api/index.rst b/Documentation/dev-tools/kunit/api/index.rst
index 9b9bffe5d41a0..b33ad72bcf0bc 100644
--- a/Documentation/dev-tools/kunit/api/index.rst
+++ b/Documentation/dev-tools/kunit/api/index.rst
@@ -10,7 +10,7 @@ API Reference
 This section documents the KUnit kernel testing API. It is divided into the
 following sections:
 
-================================= ==============================================
-:doc:`test`                       documents all of the standard testing API
-                                  excluding mocking or mocking related features.
-================================= ==============================================
+Documentation/dev-tools/kunit/api/test.rst
+
+ - documents all of the standard testing API excluding mocking
+   or mocking related features.
-- 
GitLab


From 17420f3138b957e571144f337b866f8c7a7c1682 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:18 +0200
Subject: [PATCH 3266/3804] docs: admin-guide: pm: avoid using ReST :doc:`foo`
 markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/04616d9fc0b4a0d33486fa0018631a2db2eba860.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/pm/intel_idle.rst   | 16 ++++++++++------
 Documentation/admin-guide/pm/intel_pstate.rst |  9 +++++----
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/pm/intel_idle.rst b/Documentation/admin-guide/pm/intel_idle.rst
index 89309e1b0e484..b799a43da62e9 100644
--- a/Documentation/admin-guide/pm/intel_idle.rst
+++ b/Documentation/admin-guide/pm/intel_idle.rst
@@ -20,8 +20,8 @@ Nehalem and later generations of Intel processors, but the level of support for
 a particular processor model in it depends on whether or not it recognizes that
 processor model and may also depend on information coming from the platform
 firmware.  [To understand ``intel_idle`` it is necessary to know how ``CPUIdle``
-works in general, so this is the time to get familiar with :doc:`cpuidle` if you
-have not done that yet.]
+works in general, so this is the time to get familiar with
+Documentation/admin-guide/pm/cpuidle.rst if you have not done that yet.]
 
 ``intel_idle`` uses the ``MWAIT`` instruction to inform the processor that the
 logical CPU executing it is idle and so it may be possible to put some of the
@@ -53,7 +53,8 @@ processor) corresponding to them depends on the processor model and it may also
 depend on the configuration of the platform.
 
 In order to create a list of available idle states required by the ``CPUIdle``
-subsystem (see :ref:`idle-states-representation` in :doc:`cpuidle`),
+subsystem (see :ref:`idle-states-representation` in
+Documentation/admin-guide/pm/cpuidle.rst),
 ``intel_idle`` can use two sources of information: static tables of idle states
 for different processor models included in the driver itself and the ACPI tables
 of the system.  The former are always used if the processor model at hand is
@@ -98,7 +99,8 @@ states may not be enabled by default if there are no matching entries in the
 preliminary list of idle states coming from the ACPI tables.  In that case user
 space still can enable them later (on a per-CPU basis) with the help of
 the ``disable`` idle state attribute in ``sysfs`` (see
-:ref:`idle-states-representation` in :doc:`cpuidle`).  This basically means that
+:ref:`idle-states-representation` in
+Documentation/admin-guide/pm/cpuidle.rst).  This basically means that
 the idle states "known" to the driver may not be enabled by default if they have
 not been exposed by the platform firmware (through the ACPI tables).
 
@@ -186,7 +188,8 @@ be desirable.  In practice, it is only really necessary to do that if the idle
 states in question cannot be enabled during system startup, because in the
 working state of the system the CPU power management quality of service (PM
 QoS) feature can be used to prevent ``CPUIdle`` from touching those idle states
-even if they have been enumerated (see :ref:`cpu-pm-qos` in :doc:`cpuidle`).
+even if they have been enumerated (see :ref:`cpu-pm-qos` in
+Documentation/admin-guide/pm/cpuidle.rst).
 Setting ``max_cstate`` to 0 causes the ``intel_idle`` initialization to fail.
 
 The ``no_acpi`` and ``use_acpi`` module parameters (recognized by ``intel_idle``
@@ -202,7 +205,8 @@ Namely, the positions of the bits that are set in the ``states_off`` value are
 the indices of idle states to be disabled by default (as reflected by the names
 of the corresponding idle state directories in ``sysfs``, :file:`state0`,
 :file:`state1` ... :file:`state<i>` ..., where ``<i>`` is the index of the given
-idle state; see :ref:`idle-states-representation` in :doc:`cpuidle`).
+idle state; see :ref:`idle-states-representation` in
+Documentation/admin-guide/pm/cpuidle.rst).
 
 For example, if ``states_off`` is equal to 3, the driver will disable idle
 states 0 and 1 by default, and if it is equal to 8, idle state 3 will be
diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index df29b4f1f2195..7a7d4b041eac6 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -18,8 +18,8 @@ General Information
 (``CPUFreq``).  It is a scaling driver for the Sandy Bridge and later
 generations of Intel processors.  Note, however, that some of those processors
 may not be supported.  [To understand ``intel_pstate`` it is necessary to know
-how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if
-you have not done that yet.]
+how ``CPUFreq`` works in general, so this is the time to read
+Documentation/admin-guide/pm/cpufreq.rst if you have not done that yet.]
 
 For the processors supported by ``intel_pstate``, the P-state concept is broader
 than just an operating frequency or an operating performance point (see the
@@ -445,8 +445,9 @@ Interpretation of Policy Attributes
 -----------------------------------
 
 The interpretation of some ``CPUFreq`` policy attributes described in
-:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver
-and it generally depends on the driver's `operation mode <Operation Modes_>`_.
+Documentation/admin-guide/pm/cpufreq.rst is special with ``intel_pstate``
+as the current scaling driver and it generally depends on the driver's
+`operation mode <Operation Modes_>`_.
 
 First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
 ``scaling_cur_freq`` attributes are produced by applying a processor-specific
-- 
GitLab


From e499f4c297e9136a579b4eaee75a3c6ba7172eac Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:19 +0200
Subject: [PATCH 3267/3804] docs: admin-guide: hw-vuln: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/4e378517761f3df07165d5ecdac5a0a81577e68f.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../hw-vuln/special-register-buffer-data-sampling.rst          | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
index 3b1ce68d24567..966c9b3296eaa 100644
--- a/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
+++ b/Documentation/admin-guide/hw-vuln/special-register-buffer-data-sampling.rst
@@ -3,7 +3,8 @@
 SRBDS - Special Register Buffer Data Sampling
 =============================================
 
-SRBDS is a hardware vulnerability that allows MDS :doc:`mds` techniques to
+SRBDS is a hardware vulnerability that allows MDS
+Documentation/admin-guide/hw-vuln/mds.rst techniques to
 infer values returned from special register accesses.  Special register
 accesses are accesses to off core registers.  According to Intel's evaluation,
 the special register reads that have a security expectation of privacy are
-- 
GitLab


From 2793e19d63275304da0359409a1f28b689df1ed8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:20 +0200
Subject: [PATCH 3268/3804] docs: admin-guide: sysctl: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/12abd2290c7ebc05c89178d2556bea740bd70fac.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/admin-guide/sysctl/abi.rst    |  2 +-
 Documentation/admin-guide/sysctl/kernel.rst | 37 +++++++++++----------
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/abi.rst b/Documentation/admin-guide/sysctl/abi.rst
index 77b1d1b2ad42a..4e6db0a2a4c0f 100644
--- a/Documentation/admin-guide/sysctl/abi.rst
+++ b/Documentation/admin-guide/sysctl/abi.rst
@@ -11,7 +11,7 @@ Documentation for /proc/sys/abi/
 
 Copyright (c) 2020, Stephen Kitt
 
-For general info, see :doc:`index`.
+For general info, see Documentation/admin-guide/sysctl/index.rst.
 
 ------------------------------------------------------------------------------
 
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index c24f57f2c7827..10df7fc6495f8 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -9,7 +9,8 @@ Copyright (c) 1998, 1999,  Rik van Riel <riel@nl.linux.org>
 
 Copyright (c) 2009,        Shen Feng<shen@cn.fujitsu.com>
 
-For general info and legal blurb, please look in :doc:`index`.
+For general info and legal blurb, please look in
+Documentation/admin-guide/sysctl/index.rst.
 
 ------------------------------------------------------------------------------
 
@@ -54,7 +55,7 @@ free space valid for 30 seconds.
 acpi_video_flags
 ================
 
-See :doc:`/power/video`. This allows the video resume mode to be set,
+See Documentation/power/video.rst. This allows the video resume mode to be set,
 in a similar fashion to the ``acpi_sleep`` kernel parameter, by
 combining the following values:
 
@@ -89,7 +90,7 @@ is 0x15 and the full version number is 0x234, this file will contain
 the value 340 = 0x154.
 
 See the ``type_of_loader`` and ``ext_loader_type`` fields in
-:doc:`/x86/boot` for additional information.
+Documentation/x86/boot.rst for additional information.
 
 
 bootloader_version (x86 only)
@@ -99,7 +100,7 @@ The complete bootloader version number.  In the example above, this
 file will contain the value 564 = 0x234.
 
 See the ``type_of_loader`` and ``ext_loader_ver`` fields in
-:doc:`/x86/boot` for additional information.
+Documentation/x86/boot.rst for additional information.
 
 
 bpf_stats_enabled
@@ -269,7 +270,7 @@ see the ``hostname(1)`` man page.
 firmware_config
 ===============
 
-See :doc:`/driver-api/firmware/fallback-mechanisms`.
+See Documentation/driver-api/firmware/fallback-mechanisms.rst.
 
 The entries in this directory allow the firmware loader helper
 fallback to be controlled:
@@ -297,7 +298,7 @@ crashes and outputting them to a serial console.
 ftrace_enabled, stack_tracer_enabled
 ====================================
 
-See :doc:`/trace/ftrace`.
+See Documentation/trace/ftrace.rst.
 
 
 hardlockup_all_cpu_backtrace
@@ -325,7 +326,7 @@ when a hard lockup is detected.
 1 Panic on hard lockup.
 = ===========================
 
-See :doc:`/admin-guide/lockup-watchdogs` for more information.
+See Documentation/admin-guide/lockup-watchdogs.rst for more information.
 This can also be set using the nmi_watchdog kernel parameter.
 
 
@@ -586,7 +587,8 @@ in a KVM virtual machine. This default can be overridden by adding::
 
    nmi_watchdog=1
 
-to the guest kernel command line (see :doc:`/admin-guide/kernel-parameters`).
+to the guest kernel command line (see
+Documentation/admin-guide/kernel-parameters.rst).
 
 
 numa_balancing
@@ -1071,7 +1073,7 @@ that support this feature.
 real-root-dev
 =============
 
-See :doc:`/admin-guide/initrd`.
+See Documentation/admin-guide/initrd.rst.
 
 
 reboot-cmd (SPARC only)
@@ -1158,7 +1160,7 @@ will take effect.
 seccomp
 =======
 
-See :doc:`/userspace-api/seccomp_filter`.
+See Documentation/userspace-api/seccomp_filter.rst.
 
 
 sg-big-buff
@@ -1329,7 +1331,7 @@ the boot PROM.
 sysrq
 =====
 
-See :doc:`/admin-guide/sysrq`.
+See Documentation/admin-guide/sysrq.rst.
 
 
 tainted
@@ -1359,15 +1361,16 @@ ORed together. The letters are seen in "Tainted" line of Oops reports.
 131072  `(T)`  The kernel was built with the struct randomization plugin
 ======  =====  ==============================================================
 
-See :doc:`/admin-guide/tainted-kernels` for more information.
+See Documentation/admin-guide/tainted-kernels.rst for more information.
 
 Note:
   writes to this sysctl interface will fail with ``EINVAL`` if the kernel is
   booted with the command line option ``panic_on_taint=<bitmask>,nousertaint``
   and any of the ORed together values being written to ``tainted`` match with
   the bitmask declared on panic_on_taint.
-  See :doc:`/admin-guide/kernel-parameters` for more details on that particular
-  kernel command line option and its optional ``nousertaint`` switch.
+  See Documentation/admin-guide/kernel-parameters.rst for more details on
+  that particular kernel command line option and its optional
+  ``nousertaint`` switch.
 
 threads-max
 ===========
@@ -1391,7 +1394,7 @@ If a value outside of this range is written to ``threads-max`` an
 traceoff_on_warning
 ===================
 
-When set, disables tracing (see :doc:`/trace/ftrace`) when a
+When set, disables tracing (see Documentation/trace/ftrace.rst) when a
 ``WARN()`` is hit.
 
 
@@ -1411,8 +1414,8 @@ will send them to printk() again.
 
 This only works if the kernel was booted with ``tp_printk`` enabled.
 
-See :doc:`/admin-guide/kernel-parameters` and
-:doc:`/trace/boottime-trace`.
+See Documentation/admin-guide/kernel-parameters.rst and
+Documentation/trace/boottime-trace.rst.
 
 
 .. _unaligned-dump-stack:
-- 
GitLab


From 4cd4bdf85c79a87a3510b2e729b074d97546ee52 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:21 +0200
Subject: [PATCH 3269/3804] docs: block: biodoc.rst: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/1d26256b305e02da82a6a990910a5b5fb9a0355e.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/block/biodoc.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/block/biodoc.rst b/Documentation/block/biodoc.rst
index 1d4d71e391afe..2098477851a4b 100644
--- a/Documentation/block/biodoc.rst
+++ b/Documentation/block/biodoc.rst
@@ -196,7 +196,7 @@ a virtual address mapping (unlike the earlier scheme of virtual address
 do not have a corresponding kernel virtual address space mapping) and
 low-memory pages.
 
-Note: Please refer to :doc:`/core-api/dma-api-howto` for a discussion
+Note: Please refer to Documentation/core-api/dma-api-howto.rst for a discussion
 on PCI high mem DMA aspects and mapping of scatter gather lists, and support
 for 64 bit PCI.
 
-- 
GitLab


From 6aadf740aab962702ef97cdba29877867cbc0e31 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:22 +0200
Subject: [PATCH 3270/3804] docs: bpf: bpf_lsm.rst: avoid using ReST :doc:`foo`
 markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/fcee73b9bb55a8d0efd07cf04076c66278a42db4.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/bpf/bpf_lsm.rst | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/Documentation/bpf/bpf_lsm.rst b/Documentation/bpf/bpf_lsm.rst
index 1c0a75a51d79b..0dc3fb0d95446 100644
--- a/Documentation/bpf/bpf_lsm.rst
+++ b/Documentation/bpf/bpf_lsm.rst
@@ -20,10 +20,10 @@ LSM hook:
 Other LSM hooks which can be instrumented can be found in
 ``include/linux/lsm_hooks.h``.
 
-eBPF programs that use :doc:`/bpf/btf` do not need to include kernel headers
-for accessing information from the attached eBPF program's context. They can
-simply declare the structures in the eBPF program and only specify the fields
-that need to be accessed.
+eBPF programs that use Documentation/bpf/btf.rst do not need to include kernel
+headers for accessing information from the attached eBPF program's context.
+They can simply declare the structures in the eBPF program and only specify
+the fields that need to be accessed.
 
 .. code-block:: c
 
@@ -88,8 +88,9 @@ example:
 
 The ``__attribute__((preserve_access_index))`` is a clang feature that allows
 the BPF verifier to update the offsets for the access at runtime using the
-:doc:`/bpf/btf` information. Since the BPF verifier is aware of the types, it
-also validates all the accesses made to the various types in the eBPF program.
+Documentation/bpf/btf.rst information. Since the BPF verifier is aware of the
+types, it also validates all the accesses made to the various types in the
+eBPF program.
 
 Loading
 -------
-- 
GitLab


From a822b2ee266587c3665c471f0de86a3ccbc280b1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:23 +0200
Subject: [PATCH 3271/3804] docs: core-api: avoid using ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/d967d490b6655735b7df292f88859b5a1b07d0d7.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/core-api/bus-virt-phys-mapping.rst | 2 +-
 Documentation/core-api/dma-api.rst               | 5 +++--
 Documentation/core-api/dma-isa-lpc.rst           | 2 +-
 Documentation/core-api/index.rst                 | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/Documentation/core-api/bus-virt-phys-mapping.rst b/Documentation/core-api/bus-virt-phys-mapping.rst
index c7bc99cd2e21d..c72b24a7d52c2 100644
--- a/Documentation/core-api/bus-virt-phys-mapping.rst
+++ b/Documentation/core-api/bus-virt-phys-mapping.rst
@@ -8,7 +8,7 @@ How to access I/O mapped memory from within device drivers
 
 	The virt_to_bus() and bus_to_virt() functions have been
 	superseded by the functionality provided by the PCI DMA interface
-	(see :doc:`/core-api/dma-api-howto`).  They continue
+	(see Documentation/core-api/dma-api-howto.rst).  They continue
 	to be documented below for historical purposes, but new code
 	must not use them. --davidm 00/12/12
 
diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst
index 00a1d4fa3f9e4..6d6d0edd2d278 100644
--- a/Documentation/core-api/dma-api.rst
+++ b/Documentation/core-api/dma-api.rst
@@ -5,7 +5,7 @@ Dynamic DMA mapping using the generic device
 :Author: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
 
 This document describes the DMA API.  For a more gentle introduction
-of the API (and actual examples), see :doc:`/core-api/dma-api-howto`.
+of the API (and actual examples), see Documentation/core-api/dma-api-howto.rst.
 
 This API is split into two pieces.  Part I describes the basic API.
 Part II describes extensions for supporting non-consistent memory
@@ -479,7 +479,8 @@ without the _attrs suffixes, except that they pass an optional
 dma_attrs.
 
 The interpretation of DMA attributes is architecture-specific, and
-each attribute should be documented in :doc:`/core-api/dma-attributes`.
+each attribute should be documented in
+Documentation/core-api/dma-attributes.rst.
 
 If dma_attrs are 0, the semantics of each of these functions
 is identical to those of the corresponding function
diff --git a/Documentation/core-api/dma-isa-lpc.rst b/Documentation/core-api/dma-isa-lpc.rst
index e59a3d35a93dc..17b193603f0ae 100644
--- a/Documentation/core-api/dma-isa-lpc.rst
+++ b/Documentation/core-api/dma-isa-lpc.rst
@@ -17,7 +17,7 @@ To do ISA style DMA you need to include two headers::
 	#include <asm/dma.h>
 
 The first is the generic DMA API used to convert virtual addresses to
-bus addresses (see :doc:`/core-api/dma-api` for details).
+bus addresses (see Documentation/core-api/dma-api.rst for details).
 
 The second contains the routines specific to ISA DMA transfers. Since
 this is not present on all platforms make sure you construct your
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index f1c9d20bd42dd..5de2c7a4b1b3c 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -48,7 +48,7 @@ Concurrency primitives
 ======================
 
 How Linux keeps everything from happening at the same time.  See
-:doc:`/locking/index` for more related documentation.
+Documentation/locking/index.rst for more related documentation.
 
 .. toctree::
    :maxdepth: 1
@@ -77,7 +77,7 @@ Memory management
 =================
 
 How to allocate and use memory in the kernel.  Note that there is a lot
-more memory-management documentation in :doc:`/vm/index`.
+more memory-management documentation in Documentation/vm/index.rst.
 
 .. toctree::
    :maxdepth: 1
-- 
GitLab


From 3a8b57d27a19a341e8d6222630a2c532ef594c42 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:24 +0200
Subject: [PATCH 3272/3804] docs: dev-tools: testing-overview.rst: avoid using
 ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6bbecd4170ee08f36f8060b0719a46c64a21aefc.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dev-tools/testing-overview.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Documentation/dev-tools/testing-overview.rst b/Documentation/dev-tools/testing-overview.rst
index b5b46709969ce..65feb81edb14a 100644
--- a/Documentation/dev-tools/testing-overview.rst
+++ b/Documentation/dev-tools/testing-overview.rst
@@ -71,15 +71,15 @@ can be used to verify that a test is executing particular functions or lines
 of code. This is useful for determining how much of the kernel is being tested,
 and for finding corner-cases which are not covered by the appropriate test.
 
-:doc:`gcov` is GCC's coverage testing tool, which can be used with the kernel
-to get global or per-module coverage. Unlike KCOV, it does not record per-task
-coverage. Coverage data can be read from debugfs, and interpreted using the
-usual gcov tooling.
-
-:doc:`kcov` is a feature which can be built in to the kernel to allow
-capturing coverage on a per-task level. It's therefore useful for fuzzing and
-other situations where information about code executed during, for example, a
-single syscall is useful.
+Documentation/dev-tools/gcov.rst is GCC's coverage testing tool, which can be
+used with the kernel to get global or per-module coverage. Unlike KCOV, it
+does not record per-task coverage. Coverage data can be read from debugfs,
+and interpreted using the usual gcov tooling.
+
+Documentation/dev-tools/kcov.rst is a feature which can be built in to the
+kernel to allow capturing coverage on a per-task level. It's therefore useful
+for fuzzing and other situations where information about code executed during,
+for example, a single syscall is useful.
 
 
 Dynamic Analysis Tools
-- 
GitLab


From 654a5bd0eadbef5f7196215b755dcecd965f11c1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:25 +0200
Subject: [PATCH 3273/3804] docs: dev-tools: kunit: avoid using ReST :doc:`foo`
 markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Reviewed-by: David Gow <davidgow@google.com>
Acked-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6fde409079959a95b62b9b2692503608d7ff0dbd.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/dev-tools/kunit/faq.rst   |  2 +-
 Documentation/dev-tools/kunit/index.rst | 14 +++++++-------
 Documentation/dev-tools/kunit/start.rst |  4 ++--
 Documentation/dev-tools/kunit/tips.rst  |  5 +++--
 Documentation/dev-tools/kunit/usage.rst |  8 +++++---
 5 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/Documentation/dev-tools/kunit/faq.rst b/Documentation/dev-tools/kunit/faq.rst
index 8d5029ad210a5..5c6555d020f31 100644
--- a/Documentation/dev-tools/kunit/faq.rst
+++ b/Documentation/dev-tools/kunit/faq.rst
@@ -97,7 +97,7 @@ things to try.
    modules will automatically execute associated tests when loaded. Test results
    can be collected from ``/sys/kernel/debug/kunit/<test suite>/results``, and
    can be parsed with ``kunit.py parse``. For more details, see "KUnit on
-   non-UML architectures" in :doc:`usage`.
+   non-UML architectures" in Documentation/dev-tools/kunit/usage.rst.
 
 If none of the above tricks help, you are always welcome to email any issues to
 kunit-dev@googlegroups.com.
diff --git a/Documentation/dev-tools/kunit/index.rst b/Documentation/dev-tools/kunit/index.rst
index 8484788383475..25d92a9a05ea5 100644
--- a/Documentation/dev-tools/kunit/index.rst
+++ b/Documentation/dev-tools/kunit/index.rst
@@ -36,7 +36,7 @@ To make running these tests (and reading the results) easier, KUnit offers
 results. This provides a quick way of running KUnit tests during development,
 without requiring a virtual machine or separate hardware.
 
-Get started now: :doc:`start`
+Get started now: Documentation/dev-tools/kunit/start.rst
 
 Why KUnit?
 ==========
@@ -88,9 +88,9 @@ it takes to read their test log?
 How do I use it?
 ================
 
-*   :doc:`start` - for new users of KUnit
-*   :doc:`tips` - for short examples of best practices
-*   :doc:`usage` - for a more detailed explanation of KUnit features
-*   :doc:`api/index` - for the list of KUnit APIs used for testing
-*   :doc:`kunit-tool` - for more information on the kunit_tool helper script
-*   :doc:`faq` - for answers to some common questions about KUnit
+*   Documentation/dev-tools/kunit/start.rst - for new users of KUnit
+*   Documentation/dev-tools/kunit/tips.rst - for short examples of best practices
+*   Documentation/dev-tools/kunit/usage.rst - for a more detailed explanation of KUnit features
+*   Documentation/dev-tools/kunit/api/index.rst - for the list of KUnit APIs used for testing
+*   Documentation/dev-tools/kunit/kunit-tool.rst - for more information on the kunit_tool helper script
+*   Documentation/dev-tools/kunit/faq.rst - for answers to some common questions about KUnit
diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst
index 0e65cabe08eb9..63ef7b625c138 100644
--- a/Documentation/dev-tools/kunit/start.rst
+++ b/Documentation/dev-tools/kunit/start.rst
@@ -21,7 +21,7 @@ The wrapper can be run with:
 	./tools/testing/kunit/kunit.py run
 
 For more information on this wrapper (also called kunit_tool) check out the
-:doc:`kunit-tool` page.
+Documentation/dev-tools/kunit/kunit-tool.rst page.
 
 Creating a .kunitconfig
 -----------------------
@@ -234,7 +234,7 @@ Congrats! You just wrote your first KUnit test!
 
 Next Steps
 ==========
-*   Check out the :doc:`tips` page for tips on
+*   Check out the Documentation/dev-tools/kunit/tips.rst page for tips on
     writing idiomatic KUnit tests.
 *   Optional: see the :doc:`usage` page for a more
     in-depth explanation of KUnit.
diff --git a/Documentation/dev-tools/kunit/tips.rst b/Documentation/dev-tools/kunit/tips.rst
index 8d8c238f7f79f..492d2ded2f5a7 100644
--- a/Documentation/dev-tools/kunit/tips.rst
+++ b/Documentation/dev-tools/kunit/tips.rst
@@ -125,7 +125,8 @@ Here's a slightly in-depth example of how one could implement "mocking":
 
 
 Note: here we're able to get away with using ``test->priv``, but if you wanted
-something more flexible you could use a named ``kunit_resource``, see :doc:`api/test`.
+something more flexible you could use a named ``kunit_resource``, see
+Documentation/dev-tools/kunit/api/test.rst.
 
 Failing the current test
 ------------------------
@@ -185,5 +186,5 @@ Alternatively, one can take full control over the error message by using ``KUNIT
 
 Next Steps
 ==========
-*   Optional: see the :doc:`usage` page for a more
+*   Optional: see the Documentation/dev-tools/kunit/usage.rst page for a more
     in-depth explanation of KUnit.
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index 650f99590df57..3ee7ab91f7125 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -10,7 +10,7 @@ understand it. This guide assumes a working knowledge of the Linux kernel and
 some basic knowledge of testing.
 
 For a high level introduction to KUnit, including setting up KUnit for your
-project, see :doc:`start`.
+project, see Documentation/dev-tools/kunit/start.rst.
 
 Organization of this document
 =============================
@@ -99,7 +99,8 @@ violated; however, the test will continue running, potentially trying other
 expectations until the test case ends or is otherwise terminated. This is as
 opposed to *assertions* which are discussed later.
 
-To learn about more expectations supported by KUnit, see :doc:`api/test`.
+To learn about more expectations supported by KUnit, see
+Documentation/dev-tools/kunit/api/test.rst.
 
 .. note::
    A single test case should be pretty short, pretty easy to understand,
@@ -216,7 +217,8 @@ test suite in a special linker section so that it can be run by KUnit either
 after late_init, or when the test module is loaded (depending on whether the
 test was built in or not).
 
-For more information on these types of things see the :doc:`api/test`.
+For more information on these types of things see the
+Documentation/dev-tools/kunit/api/test.rst.
 
 Common Patterns
 ===============
-- 
GitLab


From 6dce82b28a93492af7a817b2b3166aaf775e4aba Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:26 +0200
Subject: [PATCH 3274/3804] docs: devicetree: bindings: submitting-patches.rst:
 avoid using ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/0048c23d47b582dd1a1959628fd2b895209ac826.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../devicetree/bindings/submitting-patches.rst        | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/submitting-patches.rst b/Documentation/devicetree/bindings/submitting-patches.rst
index 104fa8fb2c177..8087780f16859 100644
--- a/Documentation/devicetree/bindings/submitting-patches.rst
+++ b/Documentation/devicetree/bindings/submitting-patches.rst
@@ -7,8 +7,8 @@ Submitting Devicetree (DT) binding patches
 I. For patch submitters
 =======================
 
-  0) Normal patch submission rules from Documentation/process/submitting-patches.rst
-     applies.
+  0) Normal patch submission rules from
+     Documentation/process/submitting-patches.rst applies.
 
   1) The Documentation/ and include/dt-bindings/ portion of the patch should
      be a separate patch. The preferred subject prefix for binding patches is::
@@ -25,8 +25,8 @@ I. For patch submitters
 
        make dt_binding_check
 
-     See Documentation/devicetree/bindings/writing-schema.rst for more details about
-     schema and tools setup.
+     See Documentation/devicetree/bindings/writing-schema.rst for more details
+     about schema and tools setup.
 
   3) DT binding files should be dual licensed. The preferred license tag is
      (GPL-2.0-only OR BSD-2-Clause).
@@ -84,7 +84,8 @@ II. For kernel maintainers
 III. Notes
 ==========
 
-  0) Please see :doc:`ABI` for details regarding devicetree ABI.
+  0) Please see Documentation/devicetree/bindings/ABI.rst for details
+     regarding devicetree ABI.
 
   1) This document is intended as a general familiarization with the process as
      decided at the 2013 Kernel Summit.  When in doubt, the current word of the
-- 
GitLab


From fd88d2e598dcd13807ecabfc6e1170d2c0ab830a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:27 +0200
Subject: [PATCH 3275/3804] docs: doc-guide: avoid using ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/d6cbe5183406e3378ed4bd0f84f4bcf85a15009c.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/doc-guide/contributing.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/doc-guide/contributing.rst b/Documentation/doc-guide/contributing.rst
index 67ee3691f91f6..207fd93d7c807 100644
--- a/Documentation/doc-guide/contributing.rst
+++ b/Documentation/doc-guide/contributing.rst
@@ -237,10 +237,10 @@ We have been trying to improve the situation through the creation of
 a set of "books" that group documentation for specific readers.  These
 include:
 
- - :doc:`../admin-guide/index`
- - :doc:`../core-api/index`
- - :doc:`../driver-api/index`
- - :doc:`../userspace-api/index`
+ - Documentation/admin-guide/index.rst
+ - Documentation/core-api/index.rst
+ - Documentation/driver-api/index.rst
+ - Documentation/userspace-api/index.rst
 
 As well as this book on documentation itself.
 
-- 
GitLab


From 29602b7c1ecc4a4692e903ac85b09d6b79e0e57d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:28 +0200
Subject: [PATCH 3276/3804] docs: driver-api: avoid using ReST :doc:`foo`
 markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/095b04bff6d49b4097382398bb91102eaa3f0fd3.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/ioctl.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/driver-api/ioctl.rst b/Documentation/driver-api/ioctl.rst
index 5b76e765827d0..35795f6a151ae 100644
--- a/Documentation/driver-api/ioctl.rst
+++ b/Documentation/driver-api/ioctl.rst
@@ -34,7 +34,7 @@ _IO/_IOR/_IOW/_IOWR
 
 type
    An 8-bit number, often a character literal, specific to a subsystem
-   or driver, and listed in :doc:`../userspace-api/ioctl/ioctl-number`
+   or driver, and listed in Documentation/userspace-api/ioctl/ioctl-number.rst
 
 nr
   An 8-bit number identifying the specific command, unique for a give
-- 
GitLab


From 85aa9afd7bf1b239480dd73d5535978b99300fe7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:29 +0200
Subject: [PATCH 3277/3804] docs: driver-api: gpio: using-gpio.rst: avoid using
 ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/506a41353937c455c2e79b5960b0976edc8aa9e9.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/gpio/using-gpio.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/driver-api/gpio/using-gpio.rst b/Documentation/driver-api/gpio/using-gpio.rst
index dda069444032d..64c8d3f76c3a3 100644
--- a/Documentation/driver-api/gpio/using-gpio.rst
+++ b/Documentation/driver-api/gpio/using-gpio.rst
@@ -9,13 +9,13 @@ with them.
 
 For examples of already existing generic drivers that will also be good
 examples for any other kernel drivers you want to author, refer to
-:doc:`drivers-on-gpio`
+Documentation/driver-api/gpio/drivers-on-gpio.rst
 
 For any kind of mass produced system you want to support, such as servers,
 laptops, phones, tablets, routers, and any consumer or office or business goods
 using appropriate kernel drivers is paramount. Submit your code for inclusion
 in the upstream Linux kernel when you feel it is mature enough and you will get
-help to refine it, see :doc:`../../process/submitting-patches`.
+help to refine it, see Documentation/process/submitting-patches.rst.
 
 In Linux GPIO lines also have a userspace ABI.
 
-- 
GitLab


From bbbaf2264db0f0a29d69e3690df67348d95f1cb3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:30 +0200
Subject: [PATCH 3278/3804] docs: driver-api: surface_aggregator: avoid using
 ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Acked-by: Maximilian Luz <luzmaximilian@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/6097027b4de4c9015485cb73b297b98660c4296d.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../surface_aggregator/clients/index.rst          |  3 ++-
 .../driver-api/surface_aggregator/internal.rst    | 15 ++++++++-------
 .../driver-api/surface_aggregator/overview.rst    |  6 ++++--
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/Documentation/driver-api/surface_aggregator/clients/index.rst b/Documentation/driver-api/surface_aggregator/clients/index.rst
index 98ea9946b8a2e..30160513afa5b 100644
--- a/Documentation/driver-api/surface_aggregator/clients/index.rst
+++ b/Documentation/driver-api/surface_aggregator/clients/index.rst
@@ -5,7 +5,8 @@ Client Driver Documentation
 ===========================
 
 This is the documentation for client drivers themselves. Refer to
-:doc:`../client` for documentation on how to write client drivers.
+Documentation/driver-api/surface_aggregator/client.rst for documentation
+on how to write client drivers.
 
 .. toctree::
    :maxdepth: 1
diff --git a/Documentation/driver-api/surface_aggregator/internal.rst b/Documentation/driver-api/surface_aggregator/internal.rst
index 72704734982ab..8c7c80c9f4185 100644
--- a/Documentation/driver-api/surface_aggregator/internal.rst
+++ b/Documentation/driver-api/surface_aggregator/internal.rst
@@ -87,10 +87,11 @@ native SSAM devices, i.e. devices that are not defined in ACPI and not
 implemented as platform devices, via |ssam_device| and |ssam_device_driver|
 simplify management of client devices and client drivers.
 
-Refer to :doc:`client` for documentation regarding the client device/driver
-API and interface options for other kernel drivers. It is recommended to
-familiarize oneself with that chapter and the :doc:`ssh` before continuing
-with the architectural overview below.
+Refer to Documentation/driver-api/surface_aggregator/client.rst for
+documentation regarding the client device/driver API and interface options
+for other kernel drivers. It is recommended to familiarize oneself with
+that chapter and the Documentation/driver-api/surface_aggregator/ssh.rst
+before continuing with the architectural overview below.
 
 
 Packet Transport Layer
@@ -190,9 +191,9 @@ with success on the transmitter thread.
 
 Transmission of sequenced packets is limited by the number of concurrently
 pending packets, i.e. a limit on how many packets may be waiting for an ACK
-from the EC in parallel. This limit is currently set to one (see :doc:`ssh`
-for the reasoning behind this). Control packets (i.e. ACK and NAK) can
-always be transmitted.
+from the EC in parallel. This limit is currently set to one (see
+Documentation/driver-api/surface_aggregator/ssh.rst for the reasoning behind
+this). Control packets (i.e. ACK and NAK) can always be transmitted.
 
 Receiver Thread
 ---------------
diff --git a/Documentation/driver-api/surface_aggregator/overview.rst b/Documentation/driver-api/surface_aggregator/overview.rst
index 1e9d57e500637..26415e1ab7da6 100644
--- a/Documentation/driver-api/surface_aggregator/overview.rst
+++ b/Documentation/driver-api/surface_aggregator/overview.rst
@@ -73,5 +73,7 @@ being a direct response to a previous request. We may also refer to requests
 without response as commands. In general, events need to be enabled via one
 of multiple dedicated requests before they are sent by the EC.
 
-See :doc:`ssh` for a more technical protocol documentation and
-:doc:`internal` for an overview of the internal driver architecture.
+See Documentation/driver-api/surface_aggregator/ssh.rst for a
+more technical protocol documentation and
+Documentation/driver-api/surface_aggregator/internal.rst for an
+overview of the internal driver architecture.
-- 
GitLab


From ab8e8da694d4921252c2dd3fecbd2ab64eaf0eb2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:31 +0200
Subject: [PATCH 3279/3804] docs: driver-api: usb: avoid using ReST :doc:`foo`
 markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/2cd2dc3e6bacde587aeb09a3951594cfb0102014.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/usb/dma.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/driver-api/usb/dma.rst b/Documentation/driver-api/usb/dma.rst
index 2b3dbd3265b4e..d32c27e11b907 100644
--- a/Documentation/driver-api/usb/dma.rst
+++ b/Documentation/driver-api/usb/dma.rst
@@ -10,7 +10,7 @@ API overview
 
 The big picture is that USB drivers can continue to ignore most DMA issues,
 though they still must provide DMA-ready buffers (see
-:doc:`/core-api/dma-api-howto`).  That's how they've worked through
+Documentation/core-api/dma-api-howto.rst).  That's how they've worked through
 the 2.4 (and earlier) kernels, or they can now be DMA-aware.
 
 DMA-aware usb drivers:
@@ -60,7 +60,7 @@ and effects like cache-trashing can impose subtle penalties.
   force a consistent memory access ordering by using memory barriers.  It's
   not using a streaming DMA mapping, so it's good for small transfers on
   systems where the I/O would otherwise thrash an IOMMU mapping.  (See
-  :doc:`/core-api/dma-api-howto` for definitions of "coherent" and
+  Documentation/core-api/dma-api-howto.rst for definitions of "coherent" and
   "streaming" DMA mappings.)
 
   Asking for 1/Nth of a page (as well as asking for N pages) is reasonably
@@ -91,7 +91,7 @@ Working with existing buffers
 Existing buffers aren't usable for DMA without first being mapped into the
 DMA address space of the device.  However, most buffers passed to your
 driver can safely be used with such DMA mapping.  (See the first section
-of :doc:`/core-api/dma-api-howto`, titled "What memory is DMA-able?")
+of Documentation/core-api/dma-api-howto.rst, titled "What memory is DMA-able?")
 
 - When you're using scatterlists, you can map everything at once.  On some
   systems, this kicks in an IOMMU and turns the scatterlists into single
-- 
GitLab


From 4d361d6cc74512308beac8997e4b66d5231e8bfe Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:32 +0200
Subject: [PATCH 3280/3804] docs: firmware-guide: acpi: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://lore.kernel.org/r/7162043c18f1ea96c446b332400e44e8087ba142.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../firmware-guide/acpi/dsd/data-node-references.rst       | 3 ++-
 Documentation/firmware-guide/acpi/dsd/graph.rst            | 2 +-
 Documentation/firmware-guide/acpi/enumeration.rst          | 7 ++++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
index 9b17dc77d18c5..b7ad47df49de0 100644
--- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
+++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst
@@ -79,7 +79,8 @@ the ANOD object which is also the final target node of the reference.
 	    })
 	}
 
-Please also see a graph example in :doc:`graph`.
+Please also see a graph example in
+Documentation/firmware-guide/acpi/dsd/graph.rst.
 
 References
 ==========
diff --git a/Documentation/firmware-guide/acpi/dsd/graph.rst b/Documentation/firmware-guide/acpi/dsd/graph.rst
index 7072db801aeb8..4341299aa9376 100644
--- a/Documentation/firmware-guide/acpi/dsd/graph.rst
+++ b/Documentation/firmware-guide/acpi/dsd/graph.rst
@@ -174,4 +174,4 @@ References
     referenced 2016-10-04.
 
 [7] _DSD Device Properties Usage Rules.
-    :doc:`../DSD-properties-rules`
+    Documentation/firmware-guide/acpi/DSD-properties-rules.rst
diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index 9f0d5c854fa43..18074eb71860f 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -339,8 +339,8 @@ a code like this::
 There are also devm_* versions of these functions which release the
 descriptors once the device is released.
 
-See Documentation/firmware-guide/acpi/gpio-properties.rst for more information about the
-_DSD binding related to GPIOs.
+See Documentation/firmware-guide/acpi/gpio-properties.rst for more information
+about the _DSD binding related to GPIOs.
 
 MFD devices
 ===========
@@ -460,7 +460,8 @@ the _DSD of the device object itself or the _DSD of its ancestor in the
 Otherwise, the _DSD itself is regarded as invalid and therefore the "compatible"
 property returned by it is meaningless.
 
-Refer to :doc:`DSD-properties-rules` for more information.
+Refer to Documentation/firmware-guide/acpi/DSD-properties-rules.rst for more
+information.
 
 PCI hierarchy representation
 ============================
-- 
GitLab


From 25edd3a1625f76ac2265f3357550a782bd2ac7ff Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:33 +0200
Subject: [PATCH 3281/3804] docs: i2c: avoid using ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Acked-by: Wolfram Sang <wsa@kernel.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/569722e3f7d73d746c145ea78d2b4fbe5defee90.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/i2c/instantiating-devices.rst | 2 +-
 Documentation/i2c/old-module-parameters.rst | 3 ++-
 Documentation/i2c/smbus-protocol.rst        | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/Documentation/i2c/instantiating-devices.rst b/Documentation/i2c/instantiating-devices.rst
index e558e0a77e0c5..890c9360ce194 100644
--- a/Documentation/i2c/instantiating-devices.rst
+++ b/Documentation/i2c/instantiating-devices.rst
@@ -59,7 +59,7 @@ Declare the I2C devices via ACPI
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ACPI can also describe I2C devices. There is special documentation for this
-which is currently located at :doc:`../firmware-guide/acpi/enumeration`.
+which is currently located at Documentation/firmware-guide/acpi/enumeration.rst.
 
 
 Declare the I2C devices in board files
diff --git a/Documentation/i2c/old-module-parameters.rst b/Documentation/i2c/old-module-parameters.rst
index 38e55829dee82..b08b6daabce9b 100644
--- a/Documentation/i2c/old-module-parameters.rst
+++ b/Documentation/i2c/old-module-parameters.rst
@@ -17,7 +17,8 @@ address), ``force`` (to forcibly attach the driver to a given device) and
 With the conversion of the I2C subsystem to the standard device driver
 binding model, it became clear that these per-module parameters were no
 longer needed, and that a centralized implementation was possible. The new,
-sysfs-based interface is described in :doc:`instantiating-devices`, section
+sysfs-based interface is described in
+Documentation/i2c/instantiating-devices.rst, section
 "Method 4: Instantiate from user-space".
 
 Below is a mapping from the old module parameters to the new interface.
diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst
index 64689d19dd518..9e07e6bbe6a39 100644
--- a/Documentation/i2c/smbus-protocol.rst
+++ b/Documentation/i2c/smbus-protocol.rst
@@ -27,8 +27,8 @@ a different protocol operation entirely.
 Each transaction type corresponds to a functionality flag. Before calling a
 transaction function, a device driver should always check (just once) for
 the corresponding functionality flag to ensure that the underlying I2C
-adapter supports the transaction in question. See :doc:`functionality` for
-the details.
+adapter supports the transaction in question. See
+Documentation/i2c/functionality.rst for the details.
 
 
 Key to symbols
-- 
GitLab


From 7f3f7bfbbe02cdfeacf9375c73fd33787554bf8f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:34 +0200
Subject: [PATCH 3282/3804] docs: kernel-hacking: hacking.rst: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9537b74d897fab13552535d79337060a3b241b8c.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/kernel-hacking/hacking.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/kernel-hacking/hacking.rst b/Documentation/kernel-hacking/hacking.rst
index 451523424942b..df65c19aa7df3 100644
--- a/Documentation/kernel-hacking/hacking.rst
+++ b/Documentation/kernel-hacking/hacking.rst
@@ -601,7 +601,7 @@ Defined in ``include/linux/export.h``
 
 This is the variant of `EXPORT_SYMBOL()` that allows specifying a symbol
 namespace. Symbol Namespaces are documented in
-:doc:`../core-api/symbol-namespaces`
+Documentation/core-api/symbol-namespaces.rst
 
 :c:func:`EXPORT_SYMBOL_NS_GPL()`
 --------------------------------
@@ -610,7 +610,7 @@ Defined in ``include/linux/export.h``
 
 This is the variant of `EXPORT_SYMBOL_GPL()` that allows specifying a symbol
 namespace. Symbol Namespaces are documented in
-:doc:`../core-api/symbol-namespaces`
+Documentation/core-api/symbol-namespaces.rst
 
 Routines and Conventions
 ========================
-- 
GitLab


From 8d4a0adc9cab0d2a5643bacfd42cd64d1f09ae09 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:35 +0200
Subject: [PATCH 3283/3804] docs: networking: devlink: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/4553858bc9a5442eba6d71caff8047e84ece4d9b.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/networking/devlink/devlink-region.rst | 2 +-
 Documentation/networking/devlink/devlink-trap.rst   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
index 3654c3e9658fd..58fe95e9a49da 100644
--- a/Documentation/networking/devlink/devlink-region.rst
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -22,7 +22,7 @@ The major benefit to creating a region is to provide access to internal
 address regions that are otherwise inaccessible to the user.
 
 Regions may also be used to provide an additional way to debug complex error
-states, but see also :doc:`devlink-health`
+states, but see also Documentation/networking/devlink/devlink-health.rst
 
 Regions may optionally support capturing a snapshot on demand via the
 ``DEVLINK_CMD_REGION_NEW`` netlink message. A driver wishing to allow
diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index 935b6397e8cf6..efa5f7f42c888 100644
--- a/Documentation/networking/devlink/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -495,8 +495,8 @@ help debug packet drops caused by these exceptions. The following list includes
 links to the description of driver-specific traps registered by various device
 drivers:
 
-  * :doc:`netdevsim`
-  * :doc:`mlxsw`
+  * Documentation/networking/devlink/netdevsim.rst
+  * Documentation/networking/devlink/mlxsw.rst
 
 .. _Generic-Packet-Trap-Groups:
 
-- 
GitLab


From e5424f0aec76abd6567e844fbd9a0eb7d138374b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:36 +0200
Subject: [PATCH 3284/3804] docs: PCI: endpoint: pci-endpoint-cfs.rst: avoid
 using ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/4b18febe4a4f030dd9d43e5e6a2a0aa28bd5b734.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/PCI/endpoint/pci-endpoint-cfs.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/PCI/endpoint/pci-endpoint-cfs.rst b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
index 696f8eeb47388..db609b97ad589 100644
--- a/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
+++ b/Documentation/PCI/endpoint/pci-endpoint-cfs.rst
@@ -125,4 +125,4 @@ all the EPF devices are created and linked with the EPC device.
 						| interrupt_pin
 						| function
 
-[1] :doc:`pci-endpoint`
+[1] Documentation/PCI/endpoint/pci-endpoint.rst
-- 
GitLab


From bffbae6d19edc72a408cdbe915d482be0c91e047 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:37 +0200
Subject: [PATCH 3285/3804] docs: PCI: pci.rst: avoid using ReST :doc:`foo`
 markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/8697cf945390f6b45fefb4c5fe22ed1c8070e32e.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/PCI/pci.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/PCI/pci.rst b/Documentation/PCI/pci.rst
index 814b40f8360b2..fa651e25d98c6 100644
--- a/Documentation/PCI/pci.rst
+++ b/Documentation/PCI/pci.rst
@@ -265,7 +265,7 @@ Set the DMA mask size
 ---------------------
 .. note::
    If anything below doesn't make sense, please refer to
-   :doc:`/core-api/dma-api`. This section is just a reminder that
+   Documentation/core-api/dma-api.rst. This section is just a reminder that
    drivers need to indicate DMA capabilities of the device and is not
    an authoritative source for DMA interfaces.
 
@@ -291,7 +291,7 @@ Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
 Setup shared control data
 -------------------------
 Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared)
-memory.  See :doc:`/core-api/dma-api` for a full description of
+memory.  See Documentation/core-api/dma-api.rst for a full description of
 the DMA APIs. This section is just a reminder that it needs to be done
 before enabling DMA on the device.
 
@@ -421,7 +421,7 @@ owners if there is one.
 
 Then clean up "consistent" buffers which contain the control data.
 
-See :doc:`/core-api/dma-api` for details on unmapping interfaces.
+See Documentation/core-api/dma-api.rst for details on unmapping interfaces.
 
 
 Unregister from other subsystems
-- 
GitLab


From 9912d0bb9deeaa4b0680a94fbdaa3ae31e891c1b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:38 +0200
Subject: [PATCH 3286/3804] docs: process: submitting-patches.rst: avoid using
 ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/d172ab629c3e32c8d27ed4b9d2a209933e2a7178.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/submitting-patches.rst | 32 +++++++++-----------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index c66a19201deb5..0852bcf73630c 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -10,10 +10,11 @@ can greatly increase the chances of your change being accepted.
 
 This document contains a large number of suggestions in a relatively terse
 format.  For detailed information on how the kernel development process
-works, see :doc:`development-process`. Also, read :doc:`submit-checklist`
+works, see Documentation/process/development-process.rst. Also, read
+Documentation/process/submit-checklist.rst
 for a list of items to check before submitting code.  If you are submitting
-a driver, also read :doc:`submitting-drivers`; for device tree binding patches,
-read :doc:`submitting-patches`.
+a driver, also read Documentation/process/submitting-drivers.rst; for device
+tree binding patches, read Documentation/process/submitting-patches.rst.
 
 This documentation assumes that you're using ``git`` to prepare your patches.
 If you're unfamiliar with ``git``, you would be well-advised to learn how to
@@ -178,8 +179,7 @@ Style-check your changes
 ------------------------
 
 Check your patch for basic style violations, details of which can be
-found in
-:ref:`Documentation/process/coding-style.rst <codingstyle>`.
+found in Documentation/process/coding-style.rst.
 Failure to do so simply wastes
 the reviewers time and will get your patch rejected, probably
 without even being read.
@@ -238,7 +238,7 @@ If you have a patch that fixes an exploitable security bug, send that patch
 to security@kernel.org.  For severe bugs, a short embargo may be considered
 to allow distributors to get the patch out to users; in such cases,
 obviously, the patch should not be sent to any public lists. See also
-:doc:`/admin-guide/security-bugs`.
+Documentation/admin-guide/security-bugs.rst.
 
 Patches that fix a severe bug in a released kernel should be directed
 toward the stable maintainers by putting a line like this::
@@ -246,9 +246,8 @@ toward the stable maintainers by putting a line like this::
   Cc: stable@vger.kernel.org
 
 into the sign-off area of your patch (note, NOT an email recipient).  You
-should also read
-:ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
-in addition to this file.
+should also read Documentation/process/stable-kernel-rules.rst
+in addition to this document.
 
 If changes affect userland-kernel interfaces, please send the MAN-PAGES
 maintainer (as listed in the MAINTAINERS file) a man-pages patch, or at
@@ -305,8 +304,8 @@ decreasing the likelihood of your MIME-attached change being accepted.
 Exception:  If your mailer is mangling patches then someone may ask
 you to re-send them using MIME.
 
-See :doc:`/process/email-clients` for hints about configuring your e-mail
-client so that it sends your patches untouched.
+See Documentation/process/email-clients.rst for hints about configuring
+your e-mail client so that it sends your patches untouched.
 
 Respond to review comments
 --------------------------
@@ -324,7 +323,7 @@ for their time.  Code review is a tiring and time-consuming process, and
 reviewers sometimes get grumpy.  Even in that case, though, respond
 politely and address the problems they have pointed out.
 
-See :doc:`email-clients` for recommendations on email
+See Documentation/process/email-clients.rst for recommendations on email
 clients and mailing list etiquette.
 
 
@@ -562,10 +561,10 @@ method for indicating a bug fixed by the patch. See :ref:`describe_changes`
 for more details.
 
 Note: Attaching a Fixes: tag does not subvert the stable kernel rules
-process nor the requirement to Cc: stable@vger.kernel.org on all stable 
+process nor the requirement to Cc: stable@vger.kernel.org on all stable
 patch candidates. For more information, please read
-:ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
-     
+Documentation/process/stable-kernel-rules.rst.
+
 .. _the_canonical_patch_format:
 
 The canonical patch format
@@ -824,8 +823,7 @@ Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
 NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
   <https://lore.kernel.org/r/20050711.125305.08322243.davem@davemloft.net>
 
-Kernel Documentation/process/coding-style.rst:
-  :ref:`Documentation/process/coding-style.rst <codingstyle>`
+Kernel Documentation/process/coding-style.rst
 
 Linus Torvalds's mail on the canonical patch format:
   <https://lore.kernel.org/r/Pine.LNX.4.58.0504071023190.28951@ppc970.osdl.org>
-- 
GitLab


From d3122273bd852f532c0d4632b7ade1b11953873d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:39 +0200
Subject: [PATCH 3287/3804] docs: security: landlock.rst: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/9174021ef2c87f395a4cc0895a4b2f7fd97db626.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/security/landlock.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/security/landlock.rst b/Documentation/security/landlock.rst
index 2e84925ae9719..3df68cb1d10fa 100644
--- a/Documentation/security/landlock.rst
+++ b/Documentation/security/landlock.rst
@@ -25,7 +25,8 @@ Any user can enforce Landlock rulesets on their processes.  They are merged and
 evaluated according to the inherited ones in a way that ensures that only more
 constraints can be added.
 
-User space documentation can be found here: :doc:`/userspace-api/landlock`.
+User space documentation can be found here:
+Documentation/userspace-api/landlock.rst.
 
 Guiding principles for safe access controls
 ===========================================
-- 
GitLab


From e480336c25d3dbdfdc5d18225b6f26804369ddba Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:40 +0200
Subject: [PATCH 3288/3804] docs: trace: coresight: coresight.rst: avoid using
 ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: https://lore.kernel.org/r/c79be625f7c90468e13d5380f0e4e1c1ccfa2fc8.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/coresight/coresight.rst | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/trace/coresight/coresight.rst b/Documentation/trace/coresight/coresight.rst
index 169749efd8d1a..1ec8dc35b1d8f 100644
--- a/Documentation/trace/coresight/coresight.rst
+++ b/Documentation/trace/coresight/coresight.rst
@@ -315,7 +315,8 @@ intermediate links as required.
 
 Note: ``cti_sys0`` appears in two of the connections lists above.
 CTIs can connect to multiple devices and are arranged in a star topology
-via the CTM. See (:doc:`coresight-ect`) [#fourth]_ for further details.
+via the CTM. See (Documentation/trace/coresight/coresight-ect.rst)
+[#fourth]_ for further details.
 Looking at this device we see 4 connections::
 
   linaro-developer:~# ls -l /sys/bus/coresight/devices/cti_sys0/connections
@@ -606,7 +607,8 @@ interface provided for that purpose by the generic STM API::
     crw-------    1 root     root       10,  61 Jan  3 18:11 /dev/stm0
     root@genericarmv8:~#
 
-Details on how to use the generic STM API can be found here:- :doc:`../stm` [#second]_.
+Details on how to use the generic STM API can be found here:
+- Documentation/trace/stm.rst [#second]_.
 
 The CTI & CTM Modules
 ---------------------
@@ -616,7 +618,7 @@ individual CTIs and components, and can propagate these between all CTIs via
 channels on the CTM (Cross Trigger Matrix).
 
 A separate documentation file is provided to explain the use of these devices.
-(:doc:`coresight-ect`) [#fourth]_.
+(Documentation/trace/coresight/coresight-ect.rst) [#fourth]_.
 
 
 .. [#first] Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
-- 
GitLab


From 81a2d57873d94b030de789ebe9b8009241abc775 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:41 +0200
Subject: [PATCH 3289/3804] docs: trace: ftrace.rst: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/cf9b03ff4b7917d9846503f198372bc6b821445b.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/trace/ftrace.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst
index b88c6b79db3ee..cfc81e98e0b8a 100644
--- a/Documentation/trace/ftrace.rst
+++ b/Documentation/trace/ftrace.rst
@@ -40,7 +40,7 @@ See events.rst for more information.
 Implementation Details
 ----------------------
 
-See :doc:`ftrace-design` for details for arch porters and such.
+See Documentation/trace/ftrace-design.rst for details for arch porters and such.
 
 
 The File System
-- 
GitLab


From 69fe5540153ff7d7ed4ee36ad4037603eb9c45c9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:42 +0200
Subject: [PATCH 3290/3804] docs: userspace-api: landlock.rst: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/24888a9c5da3c505b2bc274fcd83be348dbaf972.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/userspace-api/landlock.rst | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst
index 62c9361a3c7ff..f35552ff19ba8 100644
--- a/Documentation/userspace-api/landlock.rst
+++ b/Documentation/userspace-api/landlock.rst
@@ -145,7 +145,8 @@ Bind mounts and OverlayFS
 
 Landlock enables to restrict access to file hierarchies, which means that these
 access rights can be propagated with bind mounts (cf.
-:doc:`/filesystems/sharedsubtree`) but not with :doc:`/filesystems/overlayfs`.
+Documentation/filesystems/sharedsubtree.rst) but not with
+Documentation/filesystems/overlayfs.rst.
 
 A bind mount mirrors a source file hierarchy to a destination.  The destination
 hierarchy is then composed of the exact same files, on which Landlock rules can
@@ -170,8 +171,8 @@ Inheritance
 
 Every new thread resulting from a :manpage:`clone(2)` inherits Landlock domain
 restrictions from its parent.  This is similar to the seccomp inheritance (cf.
-:doc:`/userspace-api/seccomp_filter`) or any other LSM dealing with task's
-:manpage:`credentials(7)`.  For instance, one process's thread may apply
+Documentation/userspace-api/seccomp_filter.rst) or any other LSM dealing with
+task's :manpage:`credentials(7)`.  For instance, one process's thread may apply
 Landlock rules to itself, but they will not be automatically applied to other
 sibling threads (unlike POSIX thread credential changes, cf.
 :manpage:`nptl(7)`).
@@ -278,7 +279,7 @@ Memory usage
 ------------
 
 Kernel memory allocated to create rulesets is accounted and can be restricted
-by the :doc:`/admin-guide/cgroup-v1/memory`.
+by the Documentation/admin-guide/cgroup-v1/memory.rst.
 
 Questions and answers
 =====================
@@ -303,7 +304,7 @@ issues, especially when untrusted processes can manipulate them (cf.
 Additional documentation
 ========================
 
-* :doc:`/security/landlock`
+* Documentation/security/landlock.rst
 * https://landlock.io
 
 .. Links
-- 
GitLab


From c6c032bf2c5483c668461d5f33d83034c791fd91 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:43 +0200
Subject: [PATCH 3291/3804] docs: virt: kvm: s390-pv-boot.rst: avoid using ReST
 :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/8c0fc6578ff6384580fd0d622f363bbbd4fe91da.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/virt/kvm/s390-pv-boot.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/s390-pv-boot.rst b/Documentation/virt/kvm/s390-pv-boot.rst
index ad1f7866c001a..73a6083cb5e71 100644
--- a/Documentation/virt/kvm/s390-pv-boot.rst
+++ b/Documentation/virt/kvm/s390-pv-boot.rst
@@ -10,7 +10,7 @@ The memory of Protected Virtual Machines (PVMs) is not accessible to
 I/O or the hypervisor. In those cases where the hypervisor needs to
 access the memory of a PVM, that memory must be made accessible.
 Memory made accessible to the hypervisor will be encrypted. See
-:doc:`s390-pv` for details."
+Documentation/virt/kvm/s390-pv.rst for details."
 
 On IPL (boot) a small plaintext bootloader is started, which provides
 information about the encrypted components and necessary metadata to
-- 
GitLab


From 0ffd643875d3f7dac3cd9fbc637a3645c48ba21f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Wed, 16 Jun 2021 08:27:44 +0200
Subject: [PATCH 3292/3804] docs: x86: avoid using ReST :doc:`foo` markup

The :doc:`foo` tag is auto-generated via automarkup.py.
So, use the filename at the sources, instead of :doc:`foo`.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lore.kernel.org/r/17c68b5f1d72488431c77c1de9f13683fe9f536c.1623824363.git.mchehab+huawei@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/x86/boot.rst | 4 ++--
 Documentation/x86/mtrr.rst | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/x86/boot.rst b/Documentation/x86/boot.rst
index fc844913decef..894a198970055 100644
--- a/Documentation/x86/boot.rst
+++ b/Documentation/x86/boot.rst
@@ -1343,7 +1343,7 @@ follow::
 In addition to read/modify/write the setup header of the struct
 boot_params as that of 16-bit boot protocol, the boot loader should
 also fill the additional fields of the struct boot_params as
-described in chapter :doc:`zero-page`.
+described in chapter Documentation/x86/zero-page.rst.
 
 After setting up the struct boot_params, the boot loader can load the
 32/64-bit kernel in the same way as that of 16-bit boot protocol.
@@ -1379,7 +1379,7 @@ can be calculated as follows::
 In addition to read/modify/write the setup header of the struct
 boot_params as that of 16-bit boot protocol, the boot loader should
 also fill the additional fields of the struct boot_params as described
-in chapter :doc:`zero-page`.
+in chapter Documentation/x86/zero-page.rst.
 
 After setting up the struct boot_params, the boot loader can load
 64-bit kernel in the same way as that of 16-bit boot protocol, but
diff --git a/Documentation/x86/mtrr.rst b/Documentation/x86/mtrr.rst
index c5b695d753499..9f0b1851771a2 100644
--- a/Documentation/x86/mtrr.rst
+++ b/Documentation/x86/mtrr.rst
@@ -28,7 +28,7 @@ are aligned with platform MTRR setup. If MTRRs are only set up by the platform
 firmware code though and the OS does not make any specific MTRR mapping
 requests mtrr_type_lookup() should always return MTRR_TYPE_INVALID.
 
-For details refer to :doc:`pat`.
+For details refer to Documentation/x86/pat.rst.
 
 .. tip::
   On Intel P6 family processors (Pentium Pro, Pentium II and later)
-- 
GitLab


From 102caec1075fe993fb1ef95368ec1c3b2e5d0d77 Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Mon, 14 Jun 2021 16:07:24 -0600
Subject: [PATCH 3293/3804] docs: Take a little noise out of the build process

Sphinx 3.0 works at this point (albeit slowly) so stop scaring people
with a loud warning.  We also don't need to babble about CJK support in the
LaTeX build.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/conf.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/Documentation/conf.py b/Documentation/conf.py
index a05225056e086..7d92ec3e5b6e8 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -41,15 +41,7 @@ extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include',
               'maintainers_include', 'sphinx.ext.autosectionlabel',
               'kernel_abi', 'kernel_feat']
 
-#
-# cdomain is badly broken in Sphinx 3+.  Leaving it out generates *most*
-# of the docs correctly, but not all.  Scream bloody murder but allow
-# the process to proceed; hopefully somebody will fix this properly soon.
-#
 if major >= 3:
-    sys.stderr.write('''WARNING: The kernel documentation build process
-        support for Sphinx v3.0 and above is brand new. Be prepared for
-        possible issues in the generated output.\n''')
     if (major > 3) or (minor > 0 or patch >= 2):
         # Sphinx c function parser is more pedantic with regards to type
         # checking. Due to that, having macros at c:function cause problems.
@@ -368,7 +360,6 @@ latex_elements = {
 
 cjk_cmd = check_output(['fc-list', '--format="%{family[0]}\n"']).decode('utf-8', 'ignore')
 if cjk_cmd.find("Noto Sans CJK SC") >= 0:
-    print ("enabling CJK for LaTeX builder")
     latex_elements['preamble']  += '''
 	% This is needed for translations
         \\usepackage{xeCJK}
-- 
GitLab


From cf814bcfa1e661d6d2fe74ed6da3d2aa558c894a Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 17 Jun 2021 08:30:59 +0100
Subject: [PATCH 3294/3804] arm64: smp: Bump debugging information print down
 to KERN_DEBUG

This sort of information is only generally useful when debugging.

No need to have these sprinkled through the kernel log otherwise.

Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20210617073059.315542-1-lee.jones@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dcd7041b2b077..4d13b1d98e1cb 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -352,7 +352,7 @@ void __cpu_die(unsigned int cpu)
 		pr_crit("CPU%u: cpu didn't die\n", cpu);
 		return;
 	}
-	pr_notice("CPU%u: shutdown\n", cpu);
+	pr_debug("CPU%u: shutdown\n", cpu);
 
 	/*
 	 * Now that the dying CPU is beyond the point of no return w.r.t.
-- 
GitLab


From 4fa82a87ba55f5eca7d194055572110652daa264 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Wed, 16 Jun 2021 13:33:35 +0800
Subject: [PATCH 3295/3804] opp: Allow required-opps to be used for non genpd
 use cases

Don't limit required_opp_table to genpd only. One possible use case is
cpufreq based devfreq governor, which can use required-opps property to
derive devfreq from cpufreq.

Though the OPP core still doesn't support non-genpd required-opps in
_set_required_opps().

Suggested-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
[ Viresh: Update _set_required_opps() to check for genpd ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 10 ++++++++++
 drivers/opp/of.c   | 24 ++----------------------
 2 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index e366218d67367..b335c077f215b 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -893,6 +893,16 @@ static int _set_required_opps(struct device *dev,
 	if (!required_opp_tables)
 		return 0;
 
+	/*
+	 * We only support genpd's OPPs in the "required-opps" for now, as we
+	 * don't know much about other use cases. Error out if the required OPP
+	 * doesn't belong to a genpd.
+	 */
+	if (unlikely(!required_opp_tables[0]->is_genpd)) {
+		dev_err(dev, "required-opps don't belong to a genpd\n");
+		return -ENOENT;
+	}
+
 	/* required-opps not fully initialized yet */
 	if (lazy_linking_pending(opp_table))
 		return -EBUSY;
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index aa75a1caf08a3..d298e38aaf7ef 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -197,21 +197,8 @@ static void _opp_table_alloc_required_tables(struct opp_table *opp_table,
 		required_opp_tables[i] = _find_table_of_opp_np(required_np);
 		of_node_put(required_np);
 
-		if (IS_ERR(required_opp_tables[i])) {
+		if (IS_ERR(required_opp_tables[i]))
 			lazy = true;
-			continue;
-		}
-
-		/*
-		 * We only support genpd's OPPs in the "required-opps" for now,
-		 * as we don't know how much about other cases. Error out if the
-		 * required OPP doesn't belong to a genpd.
-		 */
-		if (!required_opp_tables[i]->is_genpd) {
-			dev_err(dev, "required-opp doesn't belong to genpd: %pOF\n",
-				required_np);
-			goto free_required_tables;
-		}
 	}
 
 	/* Let's do the linking later on */
@@ -379,13 +366,6 @@ static void lazy_link_required_opp_table(struct opp_table *new_table)
 	struct dev_pm_opp *opp;
 	int i, ret;
 
-	/*
-	 * We only support genpd's OPPs in the "required-opps" for now,
-	 * as we don't know much about other cases.
-	 */
-	if (!new_table->is_genpd)
-		return;
-
 	mutex_lock(&opp_table_lock);
 
 	list_for_each_entry_safe(opp_table, temp, &lazy_opp_tables, lazy) {
@@ -873,7 +853,7 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
 		return ERR_PTR(-ENOMEM);
 
 	ret = _read_opp_key(new_opp, opp_table, np, &rate_not_available);
-	if (ret < 0 && !opp_table->is_genpd) {
+	if (ret < 0) {
 		dev_err(dev, "%s: opp key field not found\n", __func__);
 		goto free_opp;
 	}
-- 
GitLab


From 60b7ed54a41b550d50caf7f2418db4a7e75b5bdc Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Thu, 17 Jun 2021 13:55:06 -0400
Subject: [PATCH 3296/3804] powerpc/perf: Fix crash in
 perf_instruction_pointer() when ppmu is not set

On systems without any specific PMU driver support registered, running
perf record causes Oops.

The relevant portion from call trace:

  BUG: Kernel NULL pointer dereference on read at 0x00000040
  Faulting instruction address: 0xc0021f0c
  Oops: Kernel access of bad area, sig: 11 [#1]
  BE PAGE_SIZE=4K PREEMPT CMPCPRO
  SAF3000 DIE NOTIFICATION
  CPU: 0 PID: 442 Comm: null_syscall Not tainted 5.13.0-rc6-s3k-dev-01645-g7649ee3d2957 #5164
  NIP:  c0021f0c LR: c00e8ad8 CTR: c00d8a5c
  NIP perf_instruction_pointer+0x10/0x60
  LR  perf_prepare_sample+0x344/0x674
  Call Trace:
    perf_prepare_sample+0x7c/0x674 (unreliable)
    perf_event_output_forward+0x3c/0x94
    __perf_event_overflow+0x74/0x14c
    perf_swevent_hrtimer+0xf8/0x170
    __hrtimer_run_queues.constprop.0+0x160/0x318
    hrtimer_interrupt+0x148/0x3b0
    timer_interrupt+0xc4/0x22c
    Decrementer_virt+0xb8/0xbc

During perf record session, perf_instruction_pointer() is called to
capture the sample IP. This function in core-book3s accesses
ppmu->flags. If a platform specific PMU driver is not registered, ppmu
is set to NULL and accessing its members results in a crash. Fix this
crash by checking if ppmu is set.

Fixes: 2ca13a4cc56c ("powerpc/perf: Use regs->nip when SIAR is zero")
Cc: stable@vger.kernel.org # v5.11+
Reported-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1623952506-1431-1-git-send-email-atrajeev@linux.vnet.ibm.com
---
 arch/powerpc/perf/core-book3s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 16d4d1b6a1ffb..51622411a7ccd 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs)
 	bool use_siar = regs_use_siar(regs);
 	unsigned long siar = mfspr(SPRN_SIAR);
 
-	if (ppmu->flags & PPMU_P10_DD1) {
+	if (ppmu && (ppmu->flags & PPMU_P10_DD1)) {
 		if (siar)
 			return siar;
 		else
-- 
GitLab


From 76b7f8fae30a9249f820e019f1e62eca992751a2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andy.shevchenko@gmail.com>
Date: Sun, 6 Jun 2021 22:19:40 +0300
Subject: [PATCH 3297/3804] pinctrl: microchip-sgpio: Put fwnode in error case
 during ->probe()

device_for_each_child_node() bumps a reference counting of a returned variable.
We have to balance it whenever we return to the caller.

Fixes: 7e5ea974e61c ("pinctrl: pinctrl-microchip-sgpio: Add pinctrl driver for Microsemi Serial GPIO")
Cc: Lars Povlsen <lars.povlsen@microchip.com>
Signed-off-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20210606191940.29312-1-andy.shevchenko@gmail.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-microchip-sgpio.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c
index c12fa57ebd12c..165cb7a597155 100644
--- a/drivers/pinctrl/pinctrl-microchip-sgpio.c
+++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c
@@ -845,8 +845,10 @@ static int microchip_sgpio_probe(struct platform_device *pdev)
 	i = 0;
 	device_for_each_child_node(dev, fwnode) {
 		ret = microchip_sgpio_register_bank(dev, priv, fwnode, i++);
-		if (ret)
+		if (ret) {
+			fwnode_handle_put(fwnode);
 			return ret;
+		}
 	}
 
 	if (priv->in.gpio.ngpio != priv->out.gpio.ngpio) {
-- 
GitLab


From 1236af327af476731aa548dfcbbefb1a3ec6726a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 17 Jun 2021 12:38:54 +0200
Subject: [PATCH 3298/3804] mac80211: minstrel_ht: fix sample time check

We need to skip sampling if the next sample time is after jiffies, not before.
This patch fixes an issue where in some cases only very little sampling (or none
at all) is performed, leading to really bad data rates

Fixes: 80d55154b2f8 ("mac80211: minstrel_ht: significantly redesign the rate probing strategy")
Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20210617103854.61875-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 6487b05da6fa6..a6f3fb4a91972 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1514,7 +1514,7 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
 	    (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO))
 		return;
 
-	if (time_is_before_jiffies(mi->sample_time))
+	if (time_is_after_jiffies(mi->sample_time))
 		return;
 
 	mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL;
-- 
GitLab


From 37aadc687ab441bbcb693ddae613acf9afcea1ab Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 11 Jun 2021 10:28:11 +0200
Subject: [PATCH 3299/3804] sched: Unbreak wakeups

Remove broken task->state references and let wake_up_process() DTRT.

The anti-pattern in these patches breaks the ordering of ->state vs
COND as described in the comment near set_current_state() and can lead
to missed wakeups:

	(OoO load, observes RUNNING)<-.
	for (;;) {                    |
	  t->state = UNINTERRUPTIBLE; |
	  smp_mb();          ,----->  | (observes !COND)
                             |        /
	  if (COND) ---------'       |	COND = 1;
		break;		     `- if (t->state != RUNNING)
					  wake_up_process(t); // not done
	  schedule(); // forever waiting
	}
	t->state = TASK_RUNNING;

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210611082838.160855222@infradead.org
---
 drivers/net/ethernet/qualcomm/qca_spi.c |  6 ++----
 drivers/usb/gadget/udc/max3420_udc.c    | 15 +++++----------
 drivers/usb/host/max3421-hcd.c          |  3 +--
 kernel/softirq.c                        |  2 +-
 4 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index ab9b02574a152..0a6b8112b5351 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -653,8 +653,7 @@ qcaspi_intr_handler(int irq, void *data)
 	struct qcaspi *qca = data;
 
 	qca->intr_req++;
-	if (qca->spi_thread &&
-	    qca->spi_thread->state != TASK_RUNNING)
+	if (qca->spi_thread)
 		wake_up_process(qca->spi_thread);
 
 	return IRQ_HANDLED;
@@ -777,8 +776,7 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	netif_trans_update(dev);
 
-	if (qca->spi_thread &&
-	    qca->spi_thread->state != TASK_RUNNING)
+	if (qca->spi_thread)
 		wake_up_process(qca->spi_thread);
 
 	return NETDEV_TX_OK;
diff --git a/drivers/usb/gadget/udc/max3420_udc.c b/drivers/usb/gadget/udc/max3420_udc.c
index 35179543c3272..34f4db554977b 100644
--- a/drivers/usb/gadget/udc/max3420_udc.c
+++ b/drivers/usb/gadget/udc/max3420_udc.c
@@ -509,8 +509,7 @@ static irqreturn_t max3420_vbus_handler(int irq, void *dev_id)
 			     ? USB_STATE_POWERED : USB_STATE_NOTATTACHED);
 	spin_unlock_irqrestore(&udc->lock, flags);
 
-	if (udc->thread_task &&
-	    udc->thread_task->state != TASK_RUNNING)
+	if (udc->thread_task)
 		wake_up_process(udc->thread_task);
 
 	return IRQ_HANDLED;
@@ -529,8 +528,7 @@ static irqreturn_t max3420_irq_handler(int irq, void *dev_id)
 	}
 	spin_unlock_irqrestore(&udc->lock, flags);
 
-	if (udc->thread_task &&
-	    udc->thread_task->state != TASK_RUNNING)
+	if (udc->thread_task)
 		wake_up_process(udc->thread_task);
 
 	return IRQ_HANDLED;
@@ -1093,8 +1091,7 @@ static int max3420_wakeup(struct usb_gadget *gadget)
 
 	spin_unlock_irqrestore(&udc->lock, flags);
 
-	if (udc->thread_task &&
-	    udc->thread_task->state != TASK_RUNNING)
+	if (udc->thread_task)
 		wake_up_process(udc->thread_task);
 	return ret;
 }
@@ -1117,8 +1114,7 @@ static int max3420_udc_start(struct usb_gadget *gadget,
 	udc->todo |= UDC_START;
 	spin_unlock_irqrestore(&udc->lock, flags);
 
-	if (udc->thread_task &&
-	    udc->thread_task->state != TASK_RUNNING)
+	if (udc->thread_task)
 		wake_up_process(udc->thread_task);
 
 	return 0;
@@ -1137,8 +1133,7 @@ static int max3420_udc_stop(struct usb_gadget *gadget)
 	udc->todo |= UDC_START;
 	spin_unlock_irqrestore(&udc->lock, flags);
 
-	if (udc->thread_task &&
-	    udc->thread_task->state != TASK_RUNNING)
+	if (udc->thread_task)
 		wake_up_process(udc->thread_task);
 
 	return 0;
diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c
index afd9174d83b14..e7a8e06098535 100644
--- a/drivers/usb/host/max3421-hcd.c
+++ b/drivers/usb/host/max3421-hcd.c
@@ -1169,8 +1169,7 @@ max3421_irq_handler(int irq, void *dev_id)
 	struct spi_device *spi = to_spi_device(hcd->self.controller);
 	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
 
-	if (max3421_hcd->spi_thread &&
-	    max3421_hcd->spi_thread->state != TASK_RUNNING)
+	if (max3421_hcd->spi_thread)
 		wake_up_process(max3421_hcd->spi_thread);
 	if (!test_and_set_bit(ENABLE_IRQ, &max3421_hcd->todo))
 		disable_irq_nosync(spi->irq);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 4992853ef53d2..5ddc3b15a4db8 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -76,7 +76,7 @@ static void wakeup_softirqd(void)
 	/* Interrupts are disabled: no need to stop preemption */
 	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
 
-	if (tsk && tsk->state != TASK_RUNNING)
+	if (tsk)
 		wake_up_process(tsk);
 }
 
-- 
GitLab


From b03fbd4ff24c5f075e58eb19261d5f8b3e40d7c6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 11 Jun 2021 10:28:12 +0200
Subject: [PATCH 3300/3804] sched: Introduce task_is_running()

Replace a bunch of 'p->state == TASK_RUNNING' with a new helper:
task_is_running(p).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210611082838.222401495@infradead.org
---
 arch/alpha/kernel/process.c    | 2 +-
 arch/arc/kernel/stacktrace.c   | 2 +-
 arch/arm/kernel/process.c      | 2 +-
 arch/arm64/kernel/process.c    | 2 +-
 arch/csky/kernel/stacktrace.c  | 2 +-
 arch/h8300/kernel/process.c    | 2 +-
 arch/hexagon/kernel/process.c  | 2 +-
 arch/ia64/kernel/process.c     | 4 ++--
 arch/m68k/kernel/process.c     | 2 +-
 arch/mips/kernel/process.c     | 2 +-
 arch/nds32/kernel/process.c    | 2 +-
 arch/nios2/kernel/process.c    | 2 +-
 arch/parisc/kernel/process.c   | 4 ++--
 arch/powerpc/kernel/process.c  | 4 ++--
 arch/riscv/kernel/stacktrace.c | 2 +-
 arch/s390/kernel/process.c     | 2 +-
 arch/s390/mm/fault.c           | 2 +-
 arch/sh/kernel/process_32.c    | 2 +-
 arch/sparc/kernel/process_32.c | 3 +--
 arch/sparc/kernel/process_64.c | 3 +--
 arch/um/kernel/process.c       | 2 +-
 arch/x86/kernel/process.c      | 4 ++--
 arch/xtensa/kernel/process.c   | 2 +-
 block/blk-mq.c                 | 2 +-
 include/linux/sched.h          | 2 ++
 kernel/kcsan/report.c          | 2 +-
 kernel/locking/lockdep.c       | 2 +-
 kernel/rcu/tree_plugin.h       | 2 +-
 kernel/sched/core.c            | 6 +++---
 kernel/sched/stats.h           | 2 +-
 kernel/signal.c                | 2 +-
 kernel/softirq.c               | 3 +--
 mm/compaction.c                | 2 +-
 33 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 5112ab9963947..ef0c08ed04811 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -380,7 +380,7 @@ get_wchan(struct task_struct *p)
 {
 	unsigned long schedule_frame;
 	unsigned long pc;
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 	/*
 	 * This one depends on the frame size of schedule().  Do a
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index f73da203b1702..1b9576d21e244 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -83,7 +83,7 @@ seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs,
 		 *    is safe-kept and BLINK at a well known location in there
 		 */
 
-		if (tsk->state == TASK_RUNNING)
+		if (task_is_running(tsk))
 			return -1;
 
 		frame_info->task = tsk;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 6324f4db9b029..fc9e8b37eaa84 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -288,7 +288,7 @@ unsigned long get_wchan(struct task_struct *p)
 	struct stackframe frame;
 	unsigned long stack_page;
 	int count = 0;
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	frame.fp = thread_saved_fp(p);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b4bb67f17a2ca..14f3c19c6ad22 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -598,7 +598,7 @@ unsigned long get_wchan(struct task_struct *p)
 	struct stackframe frame;
 	unsigned long stack_page, ret = 0;
 	int count = 0;
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	stack_page = (unsigned long)try_get_task_stack(p);
diff --git a/arch/csky/kernel/stacktrace.c b/arch/csky/kernel/stacktrace.c
index 16ae20a0af342..1b280ef080045 100644
--- a/arch/csky/kernel/stacktrace.c
+++ b/arch/csky/kernel/stacktrace.c
@@ -115,7 +115,7 @@ unsigned long get_wchan(struct task_struct *task)
 {
 	unsigned long pc = 0;
 
-	if (likely(task && task != current && task->state != TASK_RUNNING))
+	if (likely(task && task != current && !task_is_running(task)))
 		walk_stackframe(task, NULL, save_wchan, &pc);
 	return pc;
 }
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 46b1342ce515b..2ac27e4248a46 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -134,7 +134,7 @@ unsigned long get_wchan(struct task_struct *p)
 	unsigned long stack_page;
 	int count = 0;
 
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	stack_page = (unsigned long)p;
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index c61165c99ae0b..6a6835fb42425 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -135,7 +135,7 @@ unsigned long get_wchan(struct task_struct *p)
 	unsigned long fp, pc;
 	unsigned long stack_page;
 	int count = 0;
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	stack_page = (unsigned long)task_stack_page(p);
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 7e1a1525e2026..e56d63f4abf9d 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -529,7 +529,7 @@ get_wchan (struct task_struct *p)
 	unsigned long ip;
 	int count = 0;
 
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	/*
@@ -542,7 +542,7 @@ get_wchan (struct task_struct *p)
 	 */
 	unw_init_from_blocked_task(&info, p);
 	do {
-		if (p->state == TASK_RUNNING)
+		if (task_is_running(p))
 			return 0;
 		if (unw_unwind(&info) < 0)
 			return 0;
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index da83cc83e7912..db49f90917112 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -268,7 +268,7 @@ unsigned long get_wchan(struct task_struct *p)
 	unsigned long fp, pc;
 	unsigned long stack_page;
 	int count = 0;
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	stack_page = (unsigned long)task_stack_page(p);
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index bff080db0294e..73c8e7990a973 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -662,7 +662,7 @@ unsigned long get_wchan(struct task_struct *task)
 	unsigned long ra = 0;
 #endif
 
-	if (!task || task == current || task->state == TASK_RUNNING)
+	if (!task || task == current || task_is_running(task))
 		goto out;
 	if (!task_stack_page(task))
 		goto out;
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index c1327e552ec6c..391895b54d13c 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -239,7 +239,7 @@ unsigned long get_wchan(struct task_struct *p)
 	unsigned long stack_start, stack_end;
 	int count = 0;
 
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	if (IS_ENABLED(CONFIG_FRAME_POINTER)) {
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index c5f916ca6845f..9ff37ba2bb603 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -223,7 +223,7 @@ unsigned long get_wchan(struct task_struct *p)
 	unsigned long stack_page;
 	int count = 0;
 
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	stack_page = (unsigned long)p;
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index b144fbe29bc16..184ec3c1eae44 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -249,7 +249,7 @@ get_wchan(struct task_struct *p)
 	unsigned long ip;
 	int count = 0;
 
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	/*
@@ -260,7 +260,7 @@ get_wchan(struct task_struct *p)
 	do {
 		if (unwind_once(&info) < 0)
 			return 0;
-		if (p->state == TASK_RUNNING)
+		if (task_is_running(p))
                         return 0;
 		ip = info.ip;
 		if (!in_sched_functions(ip))
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 89e34aa273e21..8935c5696bcef 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2084,7 +2084,7 @@ static unsigned long __get_wchan(struct task_struct *p)
 	unsigned long ip, sp;
 	int count = 0;
 
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	sp = p->thread.ksp;
@@ -2094,7 +2094,7 @@ static unsigned long __get_wchan(struct task_struct *p)
 	do {
 		sp = *(unsigned long *)sp;
 		if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
-		    p->state == TASK_RUNNING)
+		    task_is_running(p))
 			return 0;
 		if (count > 0) {
 			ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index bde85fc53357f..ff467b98c3e33 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -132,7 +132,7 @@ unsigned long get_wchan(struct task_struct *task)
 {
 	unsigned long pc = 0;
 
-	if (likely(task && task != current && task->state != TASK_RUNNING))
+	if (likely(task && task != current && !task_is_running(task)))
 		walk_stackframe(task, NULL, save_wchan, &pc);
 	return pc;
 }
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index e20bed1ed34a2..7ae5dde9c54da 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -180,7 +180,7 @@ unsigned long get_wchan(struct task_struct *p)
 	struct unwind_state state;
 	unsigned long ip = 0;
 
-	if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
+	if (!p || p == current || task_is_running(p) || !task_stack_page(p))
 		return 0;
 
 	if (!try_get_task_stack(p))
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 826d017773616..8ae3dc5783fde 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -702,7 +702,7 @@ static void pfault_interrupt(struct ext_code ext_code,
 			 * interrupt since it must be a leftover of a PFAULT
 			 * CANCEL operation which didn't remove all pending
 			 * completion interrupts. */
-			if (tsk->state == TASK_RUNNING)
+			if (task_is_running(tsk))
 				tsk->thread.pfault_wait = -1;
 		}
 	} else {
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 1aa508eb0823a..717de05c81f49 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -186,7 +186,7 @@ unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long pc;
 
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	/*
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 3b9794978e5bc..93983d6d431de 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -376,8 +376,7 @@ unsigned long get_wchan(struct task_struct *task)
 	struct reg_window32 *rw;
 	int count = 0;
 
-	if (!task || task == current ||
-            task->state == TASK_RUNNING)
+	if (!task || task == current || task_is_running(task))
 		goto out;
 
 	fp = task_thread_info(task)->ksp + bias;
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 7afd0a859a78c..d33c58a58d4ff 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -674,8 +674,7 @@ unsigned long get_wchan(struct task_struct *task)
         unsigned long ret = 0;
 	int count = 0; 
 
-	if (!task || task == current ||
-            task->state == TASK_RUNNING)
+	if (!task || task == current || task_is_running(task))
 		goto out;
 
 	tp = task_thread_info(task);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5011064b5dd4..457a38db368b7 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -369,7 +369,7 @@ unsigned long get_wchan(struct task_struct *p)
 	unsigned long stack_page, sp, ip;
 	bool seen_sched = 0;
 
-	if ((p == NULL) || (p == current) || (p->state == TASK_RUNNING))
+	if ((p == NULL) || (p == current) || task_is_running(p))
 		return 0;
 
 	stack_page = (unsigned long) task_stack_page(p);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5e1f38179f495..e52b208b4641b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -931,7 +931,7 @@ unsigned long get_wchan(struct task_struct *p)
 	unsigned long start, bottom, top, sp, fp, ip, ret = 0;
 	int count = 0;
 
-	if (p == current || p->state == TASK_RUNNING)
+	if (p == current || task_is_running(p))
 		return 0;
 
 	if (!try_get_task_stack(p))
@@ -975,7 +975,7 @@ unsigned long get_wchan(struct task_struct *p)
 			goto out;
 		}
 		fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
-	} while (count++ < 16 && p->state != TASK_RUNNING);
+	} while (count++ < 16 && !task_is_running(p));
 
 out:
 	put_task_stack(p);
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 9534ef515d748..0601653406123 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -304,7 +304,7 @@ unsigned long get_wchan(struct task_struct *p)
 	unsigned long stack_page = (unsigned long) task_stack_page(p);
 	int count = 0;
 
-	if (!p || p == current || p->state == TASK_RUNNING)
+	if (!p || p == current || task_is_running(p))
 		return 0;
 
 	sp = p->thread.sp;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c86c01bfecdbe..655db5fb46d01 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3926,7 +3926,7 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 		if (signal_pending_state(state, current))
 			__set_current_state(TASK_RUNNING);
 
-		if (current->state == TASK_RUNNING)
+		if (task_is_running(current))
 			return 1;
 		if (ret < 0 || !spin)
 			break;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac5a7d29fd4f9..2cd56352dae14 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -113,6 +113,8 @@ struct task_group;
 					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
 					 TASK_PARKED)
 
+#define task_is_running(task)		(READ_ONCE((task)->state) == TASK_RUNNING)
+
 #define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
 
 #define task_is_stopped(task)		((task->state & __TASK_STOPPED) != 0)
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index 13dce3c664d63..56016e8e74611 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -460,7 +460,7 @@ static void set_other_info_task_blocking(unsigned long *flags,
 	 * We may be instrumenting a code-path where current->state is already
 	 * something other than TASK_RUNNING.
 	 */
-	const bool is_running = current->state == TASK_RUNNING;
+	const bool is_running = task_is_running(current);
 	/*
 	 * To avoid deadlock in case we are in an interrupt here and this is a
 	 * race with a task on the same CPU (KCSAN_INTERRUPT_WATCHER), provide a
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 7641bd4072390..4931a93c51621 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -760,7 +760,7 @@ static void lockdep_print_held_locks(struct task_struct *p)
 	 * It's not reliable to print a task's held locks if it's not sleeping
 	 * and it's not the current task.
 	 */
-	if (p->state == TASK_RUNNING && p != current)
+	if (p != current && task_is_running(p))
 		return;
 	for (i = 0; i < depth; i++) {
 		printk(" #%d: ", i);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index ad0156b869371..4d6962048c304 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2768,7 +2768,7 @@ EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb);
 #ifdef CONFIG_SMP
 static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
 {
-	return tsp && tsp->state == TASK_RUNNING && !tsp->on_cpu ? "!" : "";
+	return tsp && task_is_running(tsp) && !tsp->on_cpu ? "!" : "";
 }
 #else // #ifdef CONFIG_SMP
 static char *show_rcu_should_be_on_cpu(struct task_struct *tsp)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 75655cdee3bb9..618c2b5a5758f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5974,7 +5974,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
 {
 	unsigned int task_flags;
 
-	if (!tsk->state)
+	if (task_is_running(tsk))
 		return;
 
 	task_flags = tsk->flags;
@@ -7949,7 +7949,7 @@ again:
 	if (curr->sched_class != p->sched_class)
 		goto out_unlock;
 
-	if (task_running(p_rq, p) || p->state)
+	if (task_running(p_rq, p) || !task_is_running(p))
 		goto out_unlock;
 
 	yielded = curr->sched_class->yield_to_task(rq, p);
@@ -8152,7 +8152,7 @@ void sched_show_task(struct task_struct *p)
 
 	pr_info("task:%-15.15s state:%c", p->comm, task_state_to_char(p));
 
-	if (p->state == TASK_RUNNING)
+	if (task_is_running(p))
 		pr_cont("  running task    ");
 #ifdef CONFIG_DEBUG_STACK_USAGE
 	free = stack_not_used(p);
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 111072ee96638..d8f8eb0c655ba 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -217,7 +217,7 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
 
 	rq_sched_info_depart(rq, delta);
 
-	if (t->state == TASK_RUNNING)
+	if (task_is_running(t))
 		sched_info_enqueue(rq, t);
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index f7c6ffcbd0440..5fc8fcf70c240 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -4719,7 +4719,7 @@ void kdb_send_sig(struct task_struct *t, int sig)
 	}
 	new_t = kdb_prev_t != t;
 	kdb_prev_t = t;
-	if (t->state != TASK_RUNNING && new_t) {
+	if (!task_is_running(t) && new_t) {
 		spin_unlock(&t->sighand->siglock);
 		kdb_printf("Process is not RUNNING, sending a signal from "
 			   "kdb risks deadlock\n"
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5ddc3b15a4db8..f3a012179f472 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -92,8 +92,7 @@ static bool ksoftirqd_running(unsigned long pending)
 
 	if (pending & SOFTIRQ_NOW_MASK)
 		return false;
-	return tsk && (tsk->state == TASK_RUNNING) &&
-		!__kthread_should_park(tsk);
+	return tsk && task_is_running(tsk) && !__kthread_should_park(tsk);
 }
 
 #ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/mm/compaction.c b/mm/compaction.c
index 84fde270ae74f..725f564a56640 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1955,7 +1955,7 @@ static inline bool is_via_compact_memory(int order)
 
 static bool kswapd_is_running(pg_data_t *pgdat)
 {
-	return pgdat->kswapd && (pgdat->kswapd->state == TASK_RUNNING);
+	return pgdat->kswapd && task_is_running(pgdat->kswapd);
 }
 
 /*
-- 
GitLab


From 3ba9f93b12361e005dd65fcc8072b42e3189f4f4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 11 Jun 2021 10:28:13 +0200
Subject: [PATCH 3301/3804] sched,perf,kvm: Fix preemption condition

When ran from the sched-out path (preempt_notifier or perf_event),
p->state is irrelevant to determine preemption. You can get preempted
with !task_is_running() just fine.

The right indicator for preemption is if the task is still on the
runqueue in the sched-out path.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20210611082838.285099381@infradead.org
---
 kernel/events/core.c | 7 +++----
 virt/kvm/kvm_main.c  | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index fe88d6eea3c2c..fd89000c9bf59 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8682,13 +8682,12 @@ static void perf_event_switch(struct task_struct *task,
 		},
 	};
 
-	if (!sched_in && task->state == TASK_RUNNING)
+	if (!sched_in && task->on_rq) {
 		switch_event.event_id.header.misc |=
 				PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
+	}
 
-	perf_iterate_sb(perf_event_switch_output,
-		       &switch_event,
-		       NULL);
+	perf_iterate_sb(perf_event_switch_output, &switch_event, NULL);
 }
 
 /*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6a6bc7af0e28d..5f166eb8ee2e5 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5025,7 +5025,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
 {
 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
 
-	if (current->state == TASK_RUNNING) {
+	if (current->on_rq) {
 		WRITE_ONCE(vcpu->preempted, true);
 		WRITE_ONCE(vcpu->ready, true);
 	}
-- 
GitLab


From d6c23bb3a2ad2f8f7dd46292b8bc54d27f2fb3f1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 11 Jun 2021 10:28:14 +0200
Subject: [PATCH 3302/3804] sched: Add get_current_state()

Remove yet another few p->state accesses.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210611082838.347475156@infradead.org
---
 block/blk-mq.c        | 2 +-
 include/linux/sched.h | 2 ++
 kernel/freezer.c      | 2 +-
 kernel/sched/core.c   | 6 +++---
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 655db5fb46d01..56270bb06365e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3910,7 +3910,7 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 
 	hctx->poll_considered++;
 
-	state = current->state;
+	state = get_current_state();
 	do {
 		int ret;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2cd56352dae14..395c8906f502d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -213,6 +213,8 @@ struct task_group;
 
 #endif
 
+#define get_current_state()	READ_ONCE(current->state)
+
 /* Task command name length: */
 #define TASK_COMM_LEN			16
 
diff --git a/kernel/freezer.c b/kernel/freezer.c
index dc520f01f99dd..45ab36ffd0e79 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -58,7 +58,7 @@ bool __refrigerator(bool check_kthr_stop)
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
 	bool was_frozen = false;
-	long save = current->state;
+	unsigned int save = get_current_state();
 
 	pr_debug("%s entered refrigerator\n", current->comm);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 618c2b5a5758f..45ebb3cfe86ca 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9098,15 +9098,15 @@ static inline int preempt_count_equals(int preempt_offset)
 
 void __might_sleep(const char *file, int line, int preempt_offset)
 {
+	unsigned int state = get_current_state();
 	/*
 	 * Blocking primitives will set (and therefore destroy) current->state,
 	 * since we will exit with TASK_RUNNING make sure we enter with it,
 	 * otherwise we will destroy state.
 	 */
-	WARN_ONCE(current->state != TASK_RUNNING && current->task_state_change,
+	WARN_ONCE(state != TASK_RUNNING && current->task_state_change,
 			"do not call blocking ops when !TASK_RUNNING; "
-			"state=%lx set at [<%p>] %pS\n",
-			current->state,
+			"state=%x set at [<%p>] %pS\n", state,
 			(void *)current->task_state_change,
 			(void *)current->task_state_change);
 
-- 
GitLab


From 600642ae9050a872055119ba09d0decc43f6c843 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 11 Jun 2021 10:28:15 +0200
Subject: [PATCH 3303/3804] sched,timer: Use __set_current_state()

There's an existing helper for setting TASK_RUNNING; must've gotten
lost last time we did this cleanup.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210611082838.409696194@infradead.org
---
 kernel/time/timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index d111adf4a0cb4..467087d7bdb66 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1879,7 +1879,7 @@ signed long __sched schedule_timeout(signed long timeout)
 			printk(KERN_ERR "schedule_timeout: wrong timeout "
 				"value %lx\n", timeout);
 			dump_stack();
-			current->state = TASK_RUNNING;
+			__set_current_state(TASK_RUNNING);
 			goto out;
 		}
 	}
-- 
GitLab


From 7c3edd6d9cb4d8ea8db5b167dc2eee94d7e4667b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 11 Jun 2021 10:28:16 +0200
Subject: [PATCH 3304/3804] sched,arch: Remove unused TASK_STATE offsets

All 6 architectures define TASK_STATE in asm-offsets, but then never
actually use it. Remove the definitions to make sure they never will.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210611082838.472811363@infradead.org
---
 arch/csky/kernel/asm-offsets.c       | 1 -
 arch/h8300/kernel/asm-offsets.c      | 1 -
 arch/microblaze/kernel/asm-offsets.c | 1 -
 arch/mips/kernel/asm-offsets.c       | 1 -
 arch/openrisc/kernel/asm-offsets.c   | 1 -
 arch/parisc/kernel/asm-offsets.c     | 1 -
 6 files changed, 6 deletions(-)

diff --git a/arch/csky/kernel/asm-offsets.c b/arch/csky/kernel/asm-offsets.c
index 17479860d43dc..1cbcba4b0dd1f 100644
--- a/arch/csky/kernel/asm-offsets.c
+++ b/arch/csky/kernel/asm-offsets.c
@@ -9,7 +9,6 @@
 int main(void)
 {
 	/* offsets into the task struct */
-	DEFINE(TASK_STATE,        offsetof(struct task_struct, state));
 	DEFINE(TASK_THREAD_INFO,  offsetof(struct task_struct, stack));
 	DEFINE(TASK_FLAGS,        offsetof(struct task_struct, flags));
 	DEFINE(TASK_PTRACE,       offsetof(struct task_struct, ptrace));
diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c
index d4b53af657c84..65571ee15132f 100644
--- a/arch/h8300/kernel/asm-offsets.c
+++ b/arch/h8300/kernel/asm-offsets.c
@@ -21,7 +21,6 @@
 int main(void)
 {
 	/* offsets into the task struct */
-	OFFSET(TASK_STATE, task_struct, state);
 	OFFSET(TASK_FLAGS, task_struct, flags);
 	OFFSET(TASK_PTRACE, task_struct, ptrace);
 	OFFSET(TASK_BLOCKED, task_struct, blocked);
diff --git a/arch/microblaze/kernel/asm-offsets.c b/arch/microblaze/kernel/asm-offsets.c
index 6c69ce7be2e84..b77dd188dec4c 100644
--- a/arch/microblaze/kernel/asm-offsets.c
+++ b/arch/microblaze/kernel/asm-offsets.c
@@ -70,7 +70,6 @@ int main(int argc, char *argv[])
 
 	/* struct task_struct */
 	DEFINE(TS_THREAD_INFO, offsetof(struct task_struct, stack));
-	DEFINE(TASK_STATE, offsetof(struct task_struct, state));
 	DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags));
 	DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
 	DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 5735b2cd6f2af..04ca75278f023 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -78,7 +78,6 @@ void output_ptreg_defines(void)
 void output_task_defines(void)
 {
 	COMMENT("MIPS task_struct offsets.");
-	OFFSET(TASK_STATE, task_struct, state);
 	OFFSET(TASK_THREAD_INFO, task_struct, stack);
 	OFFSET(TASK_FLAGS, task_struct, flags);
 	OFFSET(TASK_MM, task_struct, mm);
diff --git a/arch/openrisc/kernel/asm-offsets.c b/arch/openrisc/kernel/asm-offsets.c
index 18c703d1d7617..710651d5aaae1 100644
--- a/arch/openrisc/kernel/asm-offsets.c
+++ b/arch/openrisc/kernel/asm-offsets.c
@@ -37,7 +37,6 @@
 int main(void)
 {
 	/* offsets into the task_struct */
-	DEFINE(TASK_STATE, offsetof(struct task_struct, state));
 	DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags));
 	DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
 	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index cd2cc1b1648c0..33113ba240544 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -42,7 +42,6 @@
 int main(void)
 {
 	DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack));
-	DEFINE(TASK_STATE, offsetof(struct task_struct, state));
 	DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags));
 	DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, pending));
 	DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
-- 
GitLab


From 2f064a59a11ff9bc22e52e9678bc601404c7cb34 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 11 Jun 2021 10:28:17 +0200
Subject: [PATCH 3305/3804] sched: Change task_struct::state

Change the type and name of task_struct::state. Drop the volatile and
shrink it to an 'unsigned int'. Rename it in order to find all uses
such that we can use READ_ONCE/WRITE_ONCE as appropriate.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Link: https://lore.kernel.org/r/20210611082838.550736351@infradead.org
---
 arch/ia64/kernel/mca.c         |  2 +-
 arch/ia64/kernel/ptrace.c      |  8 ++---
 arch/powerpc/xmon/xmon.c       | 13 +++++----
 block/blk-mq.c                 |  2 +-
 drivers/md/dm.c                |  6 ++--
 fs/binfmt_elf.c                |  8 +++--
 fs/binfmt_elf_fdpic.c          |  4 ++-
 fs/userfaultfd.c               |  4 +--
 include/linux/sched.h          | 31 ++++++++++----------
 include/linux/sched/debug.h    |  2 +-
 include/linux/sched/signal.h   |  2 +-
 init/init_task.c               |  2 +-
 kernel/cgroup/cgroup-v1.c      |  2 +-
 kernel/debug/kdb/kdb_support.c | 18 +++++++-----
 kernel/fork.c                  |  4 +--
 kernel/hung_task.c             |  2 +-
 kernel/kthread.c               |  4 +--
 kernel/locking/mutex.c         |  6 ++--
 kernel/locking/rtmutex.c       |  4 +--
 kernel/locking/rwsem.c         |  2 +-
 kernel/ptrace.c                | 12 ++++----
 kernel/rcu/rcutorture.c        |  4 +--
 kernel/rcu/tree_stall.h        | 12 ++++----
 kernel/sched/core.c            | 53 ++++++++++++++++++----------------
 kernel/sched/deadline.c        | 10 +++----
 kernel/sched/fair.c            | 11 ++++---
 lib/syscall.c                  |  4 +--
 net/core/dev.c                 |  2 +-
 28 files changed, 123 insertions(+), 111 deletions(-)

diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index cdbac4b52f309..e628a88607bbd 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1788,7 +1788,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 	ti->task = p;
 	ti->cpu = cpu;
 	p->stack = ti;
-	p->state = TASK_UNINTERRUPTIBLE;
+	p->__state = TASK_UNINTERRUPTIBLE;
 	cpumask_set_cpu(cpu, &p->cpus_mask);
 	INIT_LIST_HEAD(&p->tasks);
 	p->parent = p->real_parent = p->group_leader = p;
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index e14f5653393ac..df28c7dd164f5 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -641,11 +641,11 @@ ptrace_attach_sync_user_rbs (struct task_struct *child)
 	read_lock(&tasklist_lock);
 	if (child->sighand) {
 		spin_lock_irq(&child->sighand->siglock);
-		if (child->state == TASK_STOPPED &&
+		if (READ_ONCE(child->__state) == TASK_STOPPED &&
 		    !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) {
 			set_notify_resume(child);
 
-			child->state = TASK_TRACED;
+			WRITE_ONCE(child->__state, TASK_TRACED);
 			stopped = 1;
 		}
 		spin_unlock_irq(&child->sighand->siglock);
@@ -665,9 +665,9 @@ ptrace_attach_sync_user_rbs (struct task_struct *child)
 	read_lock(&tasklist_lock);
 	if (child->sighand) {
 		spin_lock_irq(&child->sighand->siglock);
-		if (child->state == TASK_TRACED &&
+		if (READ_ONCE(child->__state) == TASK_TRACED &&
 		    (child->signal->flags & SIGNAL_STOP_STOPPED)) {
-			child->state = TASK_STOPPED;
+			WRITE_ONCE(child->__state, TASK_STOPPED);
 		}
 		spin_unlock_irq(&child->sighand->siglock);
 	}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index c8173e92f19d7..84de2d7c2f40c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -3162,6 +3162,7 @@ memzcan(void)
 
 static void show_task(struct task_struct *tsk)
 {
+	unsigned int p_state = READ_ONCE(tsk->__state);
 	char state;
 
 	/*
@@ -3169,14 +3170,14 @@ static void show_task(struct task_struct *tsk)
 	 * appropriate for calling from xmon. This could be moved
 	 * to a common, generic, routine used by both.
 	 */
-	state = (tsk->state == 0) ? 'R' :
-		(tsk->state < 0) ? 'U' :
-		(tsk->state & TASK_UNINTERRUPTIBLE) ? 'D' :
-		(tsk->state & TASK_STOPPED) ? 'T' :
-		(tsk->state & TASK_TRACED) ? 'C' :
+	state = (p_state == 0) ? 'R' :
+		(p_state < 0) ? 'U' :
+		(p_state & TASK_UNINTERRUPTIBLE) ? 'D' :
+		(p_state & TASK_STOPPED) ? 'T' :
+		(p_state & TASK_TRACED) ? 'C' :
 		(tsk->exit_state & EXIT_ZOMBIE) ? 'Z' :
 		(tsk->exit_state & EXIT_DEAD) ? 'E' :
-		(tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+		(p_state & TASK_INTERRUPTIBLE) ? 'S' : '?';
 
 	printf("%16px %16lx %16px %6d %6d %c %2d %s\n", tsk,
 		tsk->thread.ksp, tsk->thread.regs,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 56270bb06365e..e41edae974879 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3886,7 +3886,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
 int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 {
 	struct blk_mq_hw_ctx *hctx;
-	long state;
+	unsigned int state;
 
 	if (!blk_qc_t_valid(cookie) ||
 	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ca2aedd8ee7d1..190e714cb5653 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2328,7 +2328,7 @@ static bool md_in_flight_bios(struct mapped_device *md)
 	return sum != 0;
 }
 
-static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state)
+static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int task_state)
 {
 	int r = 0;
 	DEFINE_WAIT(wait);
@@ -2351,7 +2351,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, long task_state
 	return r;
 }
 
-static int dm_wait_for_completion(struct mapped_device *md, long task_state)
+static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_state)
 {
 	int r = 0;
 
@@ -2478,7 +2478,7 @@ static void unlock_fs(struct mapped_device *md)
  * are being added to md->deferred list.
  */
 static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
-			unsigned suspend_flags, long task_state,
+			unsigned suspend_flags, unsigned int task_state,
 			int dmf_suspended_flag)
 {
 	bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 187b3f2b9202a..3d73cbb439fae 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1537,7 +1537,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 {
 	const struct cred *cred;
 	unsigned int i, len;
-	
+	unsigned int state;
+
 	/* first copy the parameters from user space */
 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
 
@@ -1559,7 +1560,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_pgrp = task_pgrp_vnr(p);
 	psinfo->pr_sid = task_session_vnr(p);
 
-	i = p->state ? ffz(~p->state) + 1 : 0;
+	state = READ_ONCE(p->__state);
+	i = state ? ffz(~state) + 1 : 0;
 	psinfo->pr_state = i;
 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
@@ -1571,7 +1573,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
 	rcu_read_unlock();
 	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
-	
+
 	return 0;
 }
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2c99b102c8600..ab9c31ddffda2 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1331,6 +1331,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 {
 	const struct cred *cred;
 	unsigned int i, len;
+	unsigned int state;
 
 	/* first copy the parameters from user space */
 	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
@@ -1353,7 +1354,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
 	psinfo->pr_pgrp = task_pgrp_vnr(p);
 	psinfo->pr_sid = task_session_vnr(p);
 
-	i = p->state ? ffz(~p->state) + 1 : 0;
+	state = READ_ONCE(p->__state);
+	i = state ? ffz(~state) + 1 : 0;
 	psinfo->pr_state = i;
 	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
 	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 14f92285d04f8..dd7a6c62b56f0 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -337,7 +337,7 @@ out:
 	return ret;
 }
 
-static inline long userfaultfd_get_blocking_state(unsigned int flags)
+static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags)
 {
 	if (flags & FAULT_FLAG_INTERRUPTIBLE)
 		return TASK_INTERRUPTIBLE;
@@ -370,7 +370,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
 	struct userfaultfd_wait_queue uwq;
 	vm_fault_t ret = VM_FAULT_SIGBUS;
 	bool must_wait;
-	long blocking_state;
+	unsigned int blocking_state;
 
 	/*
 	 * We don't do userfault handling for the final child pid update.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 395c8906f502d..50db9496c99d3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -113,13 +113,13 @@ struct task_group;
 					 __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \
 					 TASK_PARKED)
 
-#define task_is_running(task)		(READ_ONCE((task)->state) == TASK_RUNNING)
+#define task_is_running(task)		(READ_ONCE((task)->__state) == TASK_RUNNING)
 
-#define task_is_traced(task)		((task->state & __TASK_TRACED) != 0)
+#define task_is_traced(task)		((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
 
-#define task_is_stopped(task)		((task->state & __TASK_STOPPED) != 0)
+#define task_is_stopped(task)		((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
 
-#define task_is_stopped_or_traced(task)	((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+#define task_is_stopped_or_traced(task)	((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 
@@ -134,14 +134,14 @@ struct task_group;
 	do {							\
 		WARN_ON_ONCE(is_special_task_state(state_value));\
 		current->task_state_change = _THIS_IP_;		\
-		current->state = (state_value);			\
+		WRITE_ONCE(current->__state, (state_value));	\
 	} while (0)
 
 #define set_current_state(state_value)				\
 	do {							\
 		WARN_ON_ONCE(is_special_task_state(state_value));\
 		current->task_state_change = _THIS_IP_;		\
-		smp_store_mb(current->state, (state_value));	\
+		smp_store_mb(current->__state, (state_value));	\
 	} while (0)
 
 #define set_special_state(state_value)					\
@@ -150,7 +150,7 @@ struct task_group;
 		WARN_ON_ONCE(!is_special_task_state(state_value));	\
 		raw_spin_lock_irqsave(&current->pi_lock, flags);	\
 		current->task_state_change = _THIS_IP_;			\
-		current->state = (state_value);				\
+		WRITE_ONCE(current->__state, (state_value));		\
 		raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
 	} while (0)
 #else
@@ -192,10 +192,10 @@ struct task_group;
  * Also see the comments of try_to_wake_up().
  */
 #define __set_current_state(state_value)				\
-	current->state = (state_value)
+	WRITE_ONCE(current->__state, (state_value))
 
 #define set_current_state(state_value)					\
-	smp_store_mb(current->state, (state_value))
+	smp_store_mb(current->__state, (state_value))
 
 /*
  * set_special_state() should be used for those states when the blocking task
@@ -207,13 +207,13 @@ struct task_group;
 	do {								\
 		unsigned long flags; /* may shadow */			\
 		raw_spin_lock_irqsave(&current->pi_lock, flags);	\
-		current->state = (state_value);				\
+		WRITE_ONCE(current->__state, (state_value));		\
 		raw_spin_unlock_irqrestore(&current->pi_lock, flags);	\
 	} while (0)
 
 #endif
 
-#define get_current_state()	READ_ONCE(current->state)
+#define get_current_state()	READ_ONCE(current->__state)
 
 /* Task command name length: */
 #define TASK_COMM_LEN			16
@@ -666,8 +666,7 @@ struct task_struct {
 	 */
 	struct thread_info		thread_info;
 #endif
-	/* -1 unrunnable, 0 runnable, >0 stopped: */
-	volatile long			state;
+	unsigned int			__state;
 
 	/*
 	 * This begins the randomizable portion of task_struct. Only
@@ -1532,7 +1531,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
 
 static inline unsigned int task_state_index(struct task_struct *tsk)
 {
-	unsigned int tsk_state = READ_ONCE(tsk->state);
+	unsigned int tsk_state = READ_ONCE(tsk->__state);
 	unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
 
 	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
@@ -1840,10 +1839,10 @@ static __always_inline void scheduler_ipi(void)
 	 */
 	preempt_fold_need_resched();
 }
-extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
+extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state);
 #else
 static inline void scheduler_ipi(void) { }
-static inline unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+static inline unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
 {
 	return 1;
 }
diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h
index ae51f4529fc9e..b5035afa23966 100644
--- a/include/linux/sched/debug.h
+++ b/include/linux/sched/debug.h
@@ -14,7 +14,7 @@ extern void dump_cpu_task(int cpu);
 /*
  * Only dump TASK_* tasks. (0 for all tasks)
  */
-extern void show_state_filter(unsigned long state_filter);
+extern void show_state_filter(unsigned int state_filter);
 
 static inline void show_state(void)
 {
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 7f4278fa21fef..c9cf678c347dc 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -382,7 +382,7 @@ static inline int fatal_signal_pending(struct task_struct *p)
 	return task_sigpending(p) && __fatal_signal_pending(p);
 }
 
-static inline int signal_pending_state(long state, struct task_struct *p)
+static inline int signal_pending_state(unsigned int state, struct task_struct *p)
 {
 	if (!(state & (TASK_INTERRUPTIBLE | TASK_WAKEKILL)))
 		return 0;
diff --git a/init/init_task.c b/init/init_task.c
index 8b08c2e19cbb5..562f2ef8d1570 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -71,7 +71,7 @@ struct task_struct init_task
 	.thread_info	= INIT_THREAD_INFO(init_task),
 	.stack_refcount	= REFCOUNT_INIT(1),
 #endif
-	.state		= 0,
+	.__state	= 0,
 	.stack		= init_stack,
 	.usage		= REFCOUNT_INIT(2),
 	.flags		= PF_KTHREAD,
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 1f274d7fc934e..ee93b6e895874 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -713,7 +713,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 
 	css_task_iter_start(&cgrp->self, 0, &it);
 	while ((tsk = css_task_iter_next(&it))) {
-		switch (tsk->state) {
+		switch (READ_ONCE(tsk->__state)) {
 		case TASK_RUNNING:
 			stats->nr_running++;
 			break;
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 91bb666d7c039..9f50d22d68e63 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -609,23 +609,25 @@ unsigned long kdb_task_state_string(const char *s)
  */
 char kdb_task_state_char (const struct task_struct *p)
 {
-	int cpu;
-	char state;
+	unsigned int p_state;
 	unsigned long tmp;
+	char state;
+	int cpu;
 
 	if (!p ||
 	    copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long)))
 		return 'E';
 
 	cpu = kdb_process_cpu(p);
-	state = (p->state == 0) ? 'R' :
-		(p->state < 0) ? 'U' :
-		(p->state & TASK_UNINTERRUPTIBLE) ? 'D' :
-		(p->state & TASK_STOPPED) ? 'T' :
-		(p->state & TASK_TRACED) ? 'C' :
+	p_state = READ_ONCE(p->__state);
+	state = (p_state == 0) ? 'R' :
+		(p_state < 0) ? 'U' :
+		(p_state & TASK_UNINTERRUPTIBLE) ? 'D' :
+		(p_state & TASK_STOPPED) ? 'T' :
+		(p_state & TASK_TRACED) ? 'C' :
 		(p->exit_state & EXIT_ZOMBIE) ? 'Z' :
 		(p->exit_state & EXIT_DEAD) ? 'E' :
-		(p->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+		(p_state & TASK_INTERRUPTIBLE) ? 'S' : '?';
 	if (is_idle_task(p)) {
 		/* Idle task.  Is it really idle, apart from the kdb
 		 * interrupt? */
diff --git a/kernel/fork.c b/kernel/fork.c
index e595e77913eb7..1a9af73b47c18 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -425,7 +425,7 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk)
 
 static void release_task_stack(struct task_struct *tsk)
 {
-	if (WARN_ON(tsk->state != TASK_DEAD))
+	if (WARN_ON(READ_ONCE(tsk->__state) != TASK_DEAD))
 		return;  /* Better to leak the stack than to free prematurely */
 
 	account_kernel_stack(tsk, -1);
@@ -2392,7 +2392,7 @@ bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
 	exit_creds(p);
 bad_fork_free:
-	p->state = TASK_DEAD;
+	WRITE_ONCE(p->__state, TASK_DEAD);
 	put_task_stack(p);
 	delayed_free_task(p);
 fork_out:
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 396ebaebea3fe..b0ce8b3f3822c 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -196,7 +196,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
 			last_break = jiffies;
 		}
 		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
-		if (t->state == TASK_UNINTERRUPTIBLE)
+		if (READ_ONCE(t->__state) == TASK_UNINTERRUPTIBLE)
 			check_hung_task(t, timeout);
 	}
  unlock:
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3d326833092be..7bbfeeb0e956e 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -457,7 +457,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 }
 EXPORT_SYMBOL(kthread_create_on_node);
 
-static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
+static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, unsigned int state)
 {
 	unsigned long flags;
 
@@ -473,7 +473,7 @@ static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mas
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 }
 
-static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
+static void __kthread_bind(struct task_struct *p, unsigned int cpu, unsigned int state)
 {
 	__kthread_bind_mask(p, cpumask_of(cpu), state);
 }
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 013e1b08a1bfb..d2df5e68b5039 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -923,7 +923,7 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,
  * Lock a mutex (possibly interruptible), slowpath:
  */
 static __always_inline int __sched
-__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
+__mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclass,
 		    struct lockdep_map *nest_lock, unsigned long ip,
 		    struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
 {
@@ -1098,14 +1098,14 @@ err_early_kill:
 }
 
 static int __sched
-__mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+__mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
 	     struct lockdep_map *nest_lock, unsigned long ip)
 {
 	return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);
 }
 
 static int __sched
-__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+__ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
 		struct lockdep_map *nest_lock, unsigned long ip,
 		struct ww_acquire_ctx *ww_ctx)
 {
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 406818196a9f7..b5d9bb5202c6b 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1135,7 +1135,7 @@ void __sched rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
  *
  * Must be called with lock->wait_lock held and interrupts disabled
  */
-static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state,
+static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state,
 				       struct hrtimer_sleeper *timeout,
 				       struct rt_mutex_waiter *waiter)
 {
@@ -1190,7 +1190,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
 /*
  * Slow path lock function:
  */
-static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state,
+static int __sched rt_mutex_slowlock(struct rt_mutex *lock, unsigned int state,
 				     struct hrtimer_sleeper *timeout,
 				     enum rtmutex_chainwalk chwalk)
 {
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 809b0016d3445..16bfbb10c74d7 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -889,7 +889,7 @@ rwsem_spin_on_owner(struct rw_semaphore *sem)
  * Wait for the read lock to be granted
  */
 static struct rw_semaphore __sched *
-rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, int state)
+rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state)
 {
 	long adjustment = -RWSEM_READER_BIAS;
 	long rcnt = (count >> RWSEM_READER_SHIFT);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 2997ca600d186..f8589bf8d7dce 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -197,7 +197,7 @@ static bool ptrace_freeze_traced(struct task_struct *task)
 	spin_lock_irq(&task->sighand->siglock);
 	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
 	    !__fatal_signal_pending(task)) {
-		task->state = __TASK_TRACED;
+		WRITE_ONCE(task->__state, __TASK_TRACED);
 		ret = true;
 	}
 	spin_unlock_irq(&task->sighand->siglock);
@@ -207,7 +207,7 @@ static bool ptrace_freeze_traced(struct task_struct *task)
 
 static void ptrace_unfreeze_traced(struct task_struct *task)
 {
-	if (task->state != __TASK_TRACED)
+	if (READ_ONCE(task->__state) != __TASK_TRACED)
 		return;
 
 	WARN_ON(!task->ptrace || task->parent != current);
@@ -217,11 +217,11 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
 	 * Recheck state under the lock to close this race.
 	 */
 	spin_lock_irq(&task->sighand->siglock);
-	if (task->state == __TASK_TRACED) {
+	if (READ_ONCE(task->__state) == __TASK_TRACED) {
 		if (__fatal_signal_pending(task))
 			wake_up_state(task, __TASK_TRACED);
 		else
-			task->state = TASK_TRACED;
+			WRITE_ONCE(task->__state, TASK_TRACED);
 	}
 	spin_unlock_irq(&task->sighand->siglock);
 }
@@ -256,7 +256,7 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 	 */
 	read_lock(&tasklist_lock);
 	if (child->ptrace && child->parent == current) {
-		WARN_ON(child->state == __TASK_TRACED);
+		WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
 		/*
 		 * child->sighand can't be NULL, release_task()
 		 * does ptrace_unlink() before __exit_signal().
@@ -273,7 +273,7 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 			 * ptrace_stop() changes ->state back to TASK_RUNNING,
 			 * so we should not worry about leaking __TASK_TRACED.
 			 */
-			WARN_ON(child->state == __TASK_TRACED);
+			WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED);
 			ret = -ESRCH;
 		}
 	}
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 29d2f4c647d3a..194b9c145c402 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1831,10 +1831,10 @@ rcu_torture_stats_print(void)
 		srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
 					&flags, &gp_seq);
 		wtp = READ_ONCE(writer_task);
-		pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#lx cpu %d\n",
+		pr_alert("??? Writer stall state %s(%d) g%lu f%#x ->state %#x cpu %d\n",
 			 rcu_torture_writer_state_getname(),
 			 rcu_torture_writer_state, gp_seq, flags,
-			 wtp == NULL ? ~0UL : wtp->state,
+			 wtp == NULL ? ~0U : wtp->__state,
 			 wtp == NULL ? -1 : (int)task_cpu(wtp));
 		if (!splatted && wtp) {
 			sched_show_task(wtp);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 59b95cc5cbdf1..acb2288063b53 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -460,12 +460,12 @@ static void rcu_check_gp_kthread_starvation(void)
 
 	if (rcu_is_gp_kthread_starving(&j)) {
 		cpu = gpk ? task_cpu(gpk) : -1;
-		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
+		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x ->cpu=%d\n",
 		       rcu_state.name, j,
 		       (long)rcu_seq_current(&rcu_state.gp_seq),
 		       data_race(rcu_state.gp_flags),
 		       gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
-		       gpk ? gpk->state : ~0, cpu);
+		       gpk ? gpk->__state : ~0, cpu);
 		if (gpk) {
 			pr_err("\tUnless %s kthread gets sufficient CPU time, OOM is now expected behavior.\n", rcu_state.name);
 			pr_err("RCU grace-period kthread stack dump:\n");
@@ -503,12 +503,12 @@ static void rcu_check_gp_kthread_expired_fqs_timer(void)
 	    time_after(jiffies, jiffies_fqs + RCU_STALL_MIGHT_MIN) &&
 	    gpk && !READ_ONCE(gpk->on_rq)) {
 		cpu = task_cpu(gpk);
-		pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx\n",
+		pr_err("%s kthread timer wakeup didn't happen for %ld jiffies! g%ld f%#x %s(%d) ->state=%#x\n",
 		       rcu_state.name, (jiffies - jiffies_fqs),
 		       (long)rcu_seq_current(&rcu_state.gp_seq),
 		       data_race(rcu_state.gp_flags),
 		       gp_state_getname(RCU_GP_WAIT_FQS), RCU_GP_WAIT_FQS,
-		       gpk->state);
+		       gpk->__state);
 		pr_err("\tPossible timer handling issue on cpu=%d timer-softirq=%u\n",
 		       cpu, kstat_softirqs_cpu(TIMER_SOFTIRQ, cpu));
 	}
@@ -735,9 +735,9 @@ void show_rcu_gp_kthreads(void)
 	ja = j - data_race(rcu_state.gp_activity);
 	jr = j - data_race(rcu_state.gp_req_activity);
 	jw = j - data_race(rcu_state.gp_wake_time);
-	pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
+	pr_info("%s: wait state: %s(%d) ->state: %#x delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
 		rcu_state.name, gp_state_getname(rcu_state.gp_state),
-		rcu_state.gp_state, t ? t->state : 0x1ffffL,
+		rcu_state.gp_state, t ? t->__state : 0x1ffff,
 		ja, jr, jw, (long)data_race(rcu_state.gp_wake_seq),
 		(long)data_race(rcu_state.gp_seq),
 		(long)data_race(rcu_get_root()->gp_seq_needed),
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 45ebb3cfe86ca..309745a7ec514 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2638,7 +2638,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
 		return -EINVAL;
 	}
 
-	if (task_running(rq, p) || p->state == TASK_WAKING) {
+	if (task_running(rq, p) || READ_ONCE(p->__state) == TASK_WAKING) {
 		/*
 		 * MIGRATE_ENABLE gets here because 'p == current', but for
 		 * anything else we cannot do is_migration_disabled(), punt
@@ -2781,19 +2781,20 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 #ifdef CONFIG_SCHED_DEBUG
+	unsigned int state = READ_ONCE(p->__state);
+
 	/*
 	 * We should never call set_task_cpu() on a blocked task,
 	 * ttwu() will sort out the placement.
 	 */
-	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
-			!p->on_rq);
+	WARN_ON_ONCE(state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq);
 
 	/*
 	 * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
 	 * because schedstat_wait_{start,end} rebase migrating task's wait_start
 	 * time relying on p->on_rq.
 	 */
-	WARN_ON_ONCE(p->state == TASK_RUNNING &&
+	WARN_ON_ONCE(state == TASK_RUNNING &&
 		     p->sched_class == &fair_sched_class &&
 		     (p->on_rq && !task_on_rq_migrating(p)));
 
@@ -2965,7 +2966,7 @@ out:
  * smp_call_function() if an IPI is sent by the same process we are
  * waiting to become inactive.
  */
-unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
 {
 	int running, queued;
 	struct rq_flags rf;
@@ -2993,7 +2994,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		 * is actually now running somewhere else!
 		 */
 		while (task_running(rq, p)) {
-			if (match_state && unlikely(p->state != match_state))
+			if (match_state && unlikely(READ_ONCE(p->__state) != match_state))
 				return 0;
 			cpu_relax();
 		}
@@ -3008,7 +3009,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
 		running = task_running(rq, p);
 		queued = task_on_rq_queued(p);
 		ncsw = 0;
-		if (!match_state || p->state == match_state)
+		if (!match_state || READ_ONCE(p->__state) == match_state)
 			ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
 		task_rq_unlock(rq, p, &rf);
 
@@ -3317,7 +3318,7 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags,
 			   struct rq_flags *rf)
 {
 	check_preempt_curr(rq, p, wake_flags);
-	p->state = TASK_RUNNING;
+	WRITE_ONCE(p->__state, TASK_RUNNING);
 	trace_sched_wakeup(p);
 
 #ifdef CONFIG_SMP
@@ -3709,12 +3710,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 		 *  - we're serialized against set_special_state() by virtue of
 		 *    it disabling IRQs (this allows not taking ->pi_lock).
 		 */
-		if (!(p->state & state))
+		if (!(READ_ONCE(p->__state) & state))
 			goto out;
 
 		success = 1;
 		trace_sched_waking(p);
-		p->state = TASK_RUNNING;
+		WRITE_ONCE(p->__state, TASK_RUNNING);
 		trace_sched_wakeup(p);
 		goto out;
 	}
@@ -3727,7 +3728,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 */
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	smp_mb__after_spinlock();
-	if (!(p->state & state))
+	if (!(READ_ONCE(p->__state) & state))
 		goto unlock;
 
 	trace_sched_waking(p);
@@ -3793,7 +3794,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	 * TASK_WAKING such that we can unlock p->pi_lock before doing the
 	 * enqueue, such as ttwu_queue_wakelist().
 	 */
-	p->state = TASK_WAKING;
+	WRITE_ONCE(p->__state, TASK_WAKING);
 
 	/*
 	 * If the owning (remote) CPU is still in the middle of schedule() with
@@ -3886,7 +3887,7 @@ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct t
 			ret = func(p, arg);
 		rq_unlock(rq, &rf);
 	} else {
-		switch (p->state) {
+		switch (READ_ONCE(p->__state)) {
 		case TASK_RUNNING:
 		case TASK_WAKING:
 			break;
@@ -4086,7 +4087,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	 * nobody will actually run it, and a signal or other external
 	 * event cannot wake it up and insert it on the runqueue either.
 	 */
-	p->state = TASK_NEW;
+	p->__state = TASK_NEW;
 
 	/*
 	 * Make sure we do not leak PI boosting priority to the child.
@@ -4192,7 +4193,7 @@ void wake_up_new_task(struct task_struct *p)
 	struct rq *rq;
 
 	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
-	p->state = TASK_RUNNING;
+	WRITE_ONCE(p->__state, TASK_RUNNING);
 #ifdef CONFIG_SMP
 	/*
 	 * Fork balancing, do it here and not earlier because:
@@ -4554,7 +4555,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	 * running on another CPU and we could rave with its RUNNING -> DEAD
 	 * transition, resulting in a double drop.
 	 */
-	prev_state = prev->state;
+	prev_state = READ_ONCE(prev->__state);
 	vtime_task_switch(prev);
 	perf_event_task_sched_in(prev, current);
 	finish_task(prev);
@@ -5248,7 +5249,7 @@ static inline void schedule_debug(struct task_struct *prev, bool preempt)
 #endif
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-	if (!preempt && prev->state && prev->non_block_count) {
+	if (!preempt && READ_ONCE(prev->__state) && prev->non_block_count) {
 		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
 			prev->comm, prev->pid, prev->non_block_count);
 		dump_stack();
@@ -5874,10 +5875,10 @@ static void __sched notrace __schedule(bool preempt)
 	 *  - we form a control dependency vs deactivate_task() below.
 	 *  - ptrace_{,un}freeze_traced() can change ->state underneath us.
 	 */
-	prev_state = prev->state;
+	prev_state = READ_ONCE(prev->__state);
 	if (!preempt && prev_state) {
 		if (signal_pending_state(prev_state, prev)) {
-			prev->state = TASK_RUNNING;
+			WRITE_ONCE(prev->__state, TASK_RUNNING);
 		} else {
 			prev->sched_contributes_to_load =
 				(prev_state & TASK_UNINTERRUPTIBLE) &&
@@ -6049,7 +6050,7 @@ void __sched schedule_idle(void)
 	 * current task can be in any other state. Note, idle is always in the
 	 * TASK_RUNNING state.
 	 */
-	WARN_ON_ONCE(current->state);
+	WARN_ON_ONCE(current->__state);
 	do {
 		__schedule(false);
 	} while (need_resched());
@@ -8176,26 +8177,28 @@ EXPORT_SYMBOL_GPL(sched_show_task);
 static inline bool
 state_filter_match(unsigned long state_filter, struct task_struct *p)
 {
+	unsigned int state = READ_ONCE(p->__state);
+
 	/* no filter, everything matches */
 	if (!state_filter)
 		return true;
 
 	/* filter, but doesn't match */
-	if (!(p->state & state_filter))
+	if (!(state & state_filter))
 		return false;
 
 	/*
 	 * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows
 	 * TASK_KILLABLE).
 	 */
-	if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE)
+	if (state_filter == TASK_UNINTERRUPTIBLE && state == TASK_IDLE)
 		return false;
 
 	return true;
 }
 
 
-void show_state_filter(unsigned long state_filter)
+void show_state_filter(unsigned int state_filter)
 {
 	struct task_struct *g, *p;
 
@@ -8252,7 +8255,7 @@ void __init init_idle(struct task_struct *idle, int cpu)
 	raw_spin_lock_irqsave(&idle->pi_lock, flags);
 	raw_spin_rq_lock(rq);
 
-	idle->state = TASK_RUNNING;
+	idle->__state = TASK_RUNNING;
 	idle->se.exec_start = sched_clock();
 	/*
 	 * PF_KTHREAD should already be set at this point; regardless, make it
@@ -9567,7 +9570,7 @@ static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
 		 * has happened. This would lead to problems with PELT, due to
 		 * move wanting to detach+attach while we're not attached yet.
 		 */
-		if (task->state == TASK_NEW)
+		if (READ_ONCE(task->__state) == TASK_NEW)
 			ret = -EINVAL;
 		raw_spin_unlock_irq(&task->pi_lock);
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 3829c5a1b9366..22878cd5bd706 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -348,10 +348,10 @@ static void task_non_contending(struct task_struct *p)
 	if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
 		if (dl_task(p))
 			sub_running_bw(dl_se, dl_rq);
-		if (!dl_task(p) || p->state == TASK_DEAD) {
+		if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
 			struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
 
-			if (p->state == TASK_DEAD)
+			if (READ_ONCE(p->__state) == TASK_DEAD)
 				sub_rq_bw(&p->dl, &rq->dl);
 			raw_spin_lock(&dl_b->lock);
 			__dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
@@ -1355,10 +1355,10 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
 	sched_clock_tick();
 	update_rq_clock(rq);
 
-	if (!dl_task(p) || p->state == TASK_DEAD) {
+	if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
 		struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
 
-		if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
+		if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) {
 			sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
 			sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
 			dl_se->dl_non_contending = 0;
@@ -1722,7 +1722,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
 {
 	struct rq *rq;
 
-	if (p->state != TASK_WAKING)
+	if (READ_ONCE(p->__state) != TASK_WAKING)
 		return;
 
 	rq = task_rq(p);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5d1a6aace1383..7b8990fd48962 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -993,11 +993,14 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
 		struct task_struct *tsk = task_of(se);
+		unsigned int state;
 
-		if (tsk->state & TASK_INTERRUPTIBLE)
+		/* XXX racy against TTWU */
+		state = READ_ONCE(tsk->__state);
+		if (state & TASK_INTERRUPTIBLE)
 			__schedstat_set(se->statistics.sleep_start,
 				      rq_clock(rq_of(cfs_rq)));
-		if (tsk->state & TASK_UNINTERRUPTIBLE)
+		if (state & TASK_UNINTERRUPTIBLE)
 			__schedstat_set(se->statistics.block_start,
 				      rq_clock(rq_of(cfs_rq)));
 	}
@@ -6888,7 +6891,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 	 * min_vruntime -- the latter is done by enqueue_entity() when placing
 	 * the task on the new runqueue.
 	 */
-	if (p->state == TASK_WAKING) {
+	if (READ_ONCE(p->__state) == TASK_WAKING) {
 		struct sched_entity *se = &p->se;
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 		u64 min_vruntime;
@@ -11053,7 +11056,7 @@ static inline bool vruntime_normalized(struct task_struct *p)
 	 *   waiting for actually being woken up by sched_ttwu_pending().
 	 */
 	if (!se->sum_exec_runtime ||
-	    (p->state == TASK_WAKING && p->sched_remote_wakeup))
+	    (READ_ONCE(p->__state) == TASK_WAKING && p->sched_remote_wakeup))
 		return true;
 
 	return false;
diff --git a/lib/syscall.c b/lib/syscall.c
index ba13e924c430f..006e256d22644 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -68,13 +68,13 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info
  */
 int task_current_syscall(struct task_struct *target, struct syscall_info *info)
 {
-	long state;
 	unsigned long ncsw;
+	unsigned int state;
 
 	if (target == current)
 		return collect_syscall(target, info);
 
-	state = target->state;
+	state = READ_ONCE(target->__state);
 	if (unlikely(!state))
 		return -EAGAIN;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index ef8cf7619bafa..2512f672bf8a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4363,7 +4363,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
 			 * makes sure to proceed with napi polling
 			 * if the thread is explicitly woken from here.
 			 */
-			if (READ_ONCE(thread->state) != TASK_INTERRUPTIBLE)
+			if (READ_ONCE(thread->__state) != TASK_INTERRUPTIBLE)
 				set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
 			wake_up_process(thread);
 			return;
-- 
GitLab


From 23f079c2494e9b25048db970b1f4dadf19c3c990 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 15 Jun 2021 09:45:32 -0700
Subject: [PATCH 3306/3804] KVM: VMX: Refuse to load kvm_intel if EPT and NX
 are disabled

Refuse to load KVM if NX support is not available and EPT is not enabled.
Shadow paging has assumed NX support since commit 9167ab799362 ("KVM:
vmx, svm: always run with EFER.NXE=1 when shadow paging is active"), so
for all intents and purposes this has been a de facto requirement for
over a year.

Do not require NX support if EPT is enabled purely because Intel CPUs let
firmware disable NX support via MSR_IA32_MISC_ENABLES.  If not for that,
VMX (and KVM as a whole) could require NX support with minimal risk to
breaking userspace.

Fixes: 9167ab799362 ("KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210615164535.2146172-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 68a72c80bd3ff..889e83f712352 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7723,6 +7723,12 @@ static __init int hardware_setup(void)
 	    !cpu_has_vmx_invept_global())
 		enable_ept = 0;
 
+	/* NX support is required for shadow paging. */
+	if (!enable_ept && !boot_cpu_has(X86_FEATURE_NX)) {
+		pr_err_ratelimited("kvm: NX (Execute Disable) not supported\n");
+		return -EOPNOTSUPP;
+	}
+
 	if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
 		enable_ept_ad_bits = 0;
 
-- 
GitLab


From b26a71a1a5b93531bd93305c9c0c7eae2d5cace1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 15 Jun 2021 09:45:33 -0700
Subject: [PATCH 3307/3804] KVM: SVM: Refuse to load kvm_amd if NX support is
 not available

Refuse to load KVM if NX support is not available.  Shadow paging has
assumed NX support since commit 9167ab799362 ("KVM: vmx, svm: always run
with EFER.NXE=1 when shadow paging is active"), and NPT has assumed NX
support since commit b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation").
While the NX huge pages mitigation should not be enabled by default for
AMD CPUs, it can be turned on by userspace at will.

Unlike Intel CPUs, AMD does not provide a way for firmware to disable NX
support, and Linux always sets EFER.NX=1 if it is supported.  Given that
it's extremely unlikely that a CPU supports NPT but not NX, making NX a
formal requirement is far simpler than adding requirements to the
mitigation flow.

Fixes: 9167ab799362 ("KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active")
Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210615164535.2146172-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b6afa6b63c8fa..12c06ea28f5ca 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -952,6 +952,16 @@ static __init int svm_hardware_setup(void)
 	int r;
 	unsigned int order = get_order(IOPM_SIZE);
 
+	/*
+	 * NX is required for shadow paging and for NPT if the NX huge pages
+	 * mitigation is enabled.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_NX)) {
+		pr_err_ratelimited("NX (Execute Disable) not supported\n");
+		return -EOPNOTSUPP;
+	}
+	kvm_enable_efer_bits(EFER_NX);
+
 	iopm_pages = alloc_pages(GFP_KERNEL, order);
 
 	if (!iopm_pages)
@@ -965,9 +975,6 @@ static __init int svm_hardware_setup(void)
 
 	supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
 
-	if (boot_cpu_has(X86_FEATURE_NX))
-		kvm_enable_efer_bits(EFER_NX);
-
 	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
 		kvm_enable_efer_bits(EFER_FFXSR);
 
-- 
GitLab


From 8bbed95d2cb6e5de8a342d761a89b0a04faed7be Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 15 Jun 2021 09:45:34 -0700
Subject: [PATCH 3308/3804] KVM: x86: WARN and reject loading KVM if NX is
 supported but not enabled

WARN if NX is reported as supported but not enabled in EFER.  All flavors
of the kernel, including non-PAE 32-bit kernels, set EFER.NX=1 if NX is
supported, even if NX usage is disable via kernel command line.  KVM relies
on NX being enabled if it's supported, e.g. KVM will generate illegal NPT
entries if nx_huge_pages is enabled and NX is supported but not enabled.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210615164535.2146172-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b898ec8d349b..76dae88cf5248 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10920,6 +10920,9 @@ int kvm_arch_hardware_setup(void *opaque)
 	int r;
 
 	rdmsrl_safe(MSR_EFER, &host_efer);
+	if (WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_NX) &&
+			 !(host_efer & EFER_NX)))
+		return -EIO;
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		rdmsrl(MSR_IA32_XSS, host_xss);
-- 
GitLab


From c62efff28bb5eb60d60415a0dd0c864c64be0671 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 18 Jun 2021 06:42:10 -0400
Subject: [PATCH 3309/3804] KVM: x86: Stub out is_tdp_mmu_root on 32-bit hosts

If is_tdp_mmu_root is not inlined, the elimination of TDP MMU calls as dead
code might not work out.  To avoid this, explicitly declare the stubbed
is_tdp_mmu_root on 32-bit hosts.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 408aa49731d51..78d8a296f0b60 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -85,12 +85,6 @@ bool kvm_mmu_init_tdp_mmu(struct kvm *kvm);
 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
-#else
-static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
-static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
-static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
-static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
-#endif
 
 static inline bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
 {
@@ -107,5 +101,12 @@ static inline bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
 
 	return is_tdp_mmu_page(sp) && sp->root_count;
 }
+#else
+static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
+static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
+static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
+static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
+static inline bool is_tdp_mmu_root(hpa_t hpa) { return false; }
+#endif
 
 #endif /* __KVM_X86_MMU_TDP_MMU_H */
-- 
GitLab


From aa23c0ad14228ccfcd0b6f799dd34b348a5f2b1e Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Thu, 17 Jun 2021 23:19:45 +0000
Subject: [PATCH 3310/3804] KVM: x86/mmu: Remove redundant is_tdp_mmu_root
 check

The check for is_tdp_mmu_root in kvm_tdp_mmu_map is redundant because
kvm_tdp_mmu_map's only caller (direct_page_fault) already checks
is_tdp_mmu_root.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210617231948.2591431-2-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 4d658882a4d8c..d4c254dc4d5ff 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -985,8 +985,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 
 	if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
 		return RET_PF_RETRY;
-	if (WARN_ON(!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)))
-		return RET_PF_RETRY;
 
 	level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn,
 					huge_page_disallowed, &req_level);
-- 
GitLab


From 0b873fd7fb53ed7343ee7ee166e1373aec02a9cb Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Thu, 17 Jun 2021 23:19:46 +0000
Subject: [PATCH 3311/3804] KVM: x86/mmu: Remove redundant is_tdp_mmu_enabled
 check

This check is redundant because the root shadow page will only be a TDP
MMU page if is_tdp_mmu_enabled() returns true, and is_tdp_mmu_enabled()
never changes for the lifetime of a VM.

It's possible that this check was added for performance reasons but it
is unlikely that it is useful in practice since to_shadow_page() is
cheap. That being said, this patch also caches the return value of
is_tdp_mmu_root() in direct_page_fault() since there's no reason to
duplicate the call so many times, so performance is not a concern.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210617231948.2591431-3-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 11 ++++++-----
 arch/x86/kvm/mmu/tdp_mmu.h |  4 +---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7d3e57678d34c..10c1c2029d351 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3608,7 +3608,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		return reserved;
 	}
 
-	if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
+	if (is_tdp_mmu_root(vcpu->arch.mmu->root_hpa))
 		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
 	else
 		leaf = get_walk(vcpu, addr, sptes, &root);
@@ -3780,6 +3780,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 			     bool prefault, int max_level, bool is_tdp)
 {
+	bool is_tdp_mmu_fault = is_tdp_mmu_root(vcpu->arch.mmu->root_hpa);
 	bool write = error_code & PFERR_WRITE_MASK;
 	bool map_writable;
 
@@ -3792,7 +3793,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	if (page_fault_handle_page_track(vcpu, error_code, gfn))
 		return RET_PF_EMULATE;
 
-	if (!is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa)) {
+	if (!is_tdp_mmu_fault) {
 		r = fast_page_fault(vcpu, gpa, error_code);
 		if (r != RET_PF_INVALID)
 			return r;
@@ -3814,7 +3815,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 
 	r = RET_PF_RETRY;
 
-	if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
+	if (is_tdp_mmu_fault)
 		read_lock(&vcpu->kvm->mmu_lock);
 	else
 		write_lock(&vcpu->kvm->mmu_lock);
@@ -3825,7 +3826,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	if (r)
 		goto out_unlock;
 
-	if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
+	if (is_tdp_mmu_fault)
 		r = kvm_tdp_mmu_map(vcpu, gpa, error_code, map_writable, max_level,
 				    pfn, prefault);
 	else
@@ -3833,7 +3834,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 				 prefault, is_tdp);
 
 out_unlock:
-	if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
+	if (is_tdp_mmu_fault)
 		read_unlock(&vcpu->kvm->mmu_lock);
 	else
 		write_unlock(&vcpu->kvm->mmu_lock);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 78d8a296f0b60..f6e0667cf4b6f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -86,12 +86,10 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
 
-static inline bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
+static inline bool is_tdp_mmu_root(hpa_t hpa)
 {
 	struct kvm_mmu_page *sp;
 
-	if (!is_tdp_mmu_enabled(kvm))
-		return false;
 	if (WARN_ON(!VALID_PAGE(hpa)))
 		return false;
 
-- 
GitLab


From 63c0cac938edfa5d72bfbe8f1eeb9d47b397829c Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Thu, 17 Jun 2021 23:19:47 +0000
Subject: [PATCH 3312/3804] KVM: x86/mmu: Refactor is_tdp_mmu_root into
 is_tdp_mmu

This change simplifies the call sites slightly and also abstracts away
the implementation detail of looking at root_hpa as the mechanism for
determining if the mmu is the TDP MMU.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210617231948.2591431-4-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 4 ++--
 arch/x86/kvm/mmu/tdp_mmu.h | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 10c1c2029d351..f1dd8308f0800 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3608,7 +3608,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		return reserved;
 	}
 
-	if (is_tdp_mmu_root(vcpu->arch.mmu->root_hpa))
+	if (is_tdp_mmu(vcpu->arch.mmu))
 		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
 	else
 		leaf = get_walk(vcpu, addr, sptes, &root);
@@ -3780,7 +3780,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 			     bool prefault, int max_level, bool is_tdp)
 {
-	bool is_tdp_mmu_fault = is_tdp_mmu_root(vcpu->arch.mmu->root_hpa);
+	bool is_tdp_mmu_fault = is_tdp_mmu(vcpu->arch.mmu);
 	bool write = error_code & PFERR_WRITE_MASK;
 	bool map_writable;
 
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index f6e0667cf4b6f..b981a044ab55d 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -86,9 +86,10 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return kvm->arch.tdp_mmu_enabled; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
 
-static inline bool is_tdp_mmu_root(hpa_t hpa)
+static inline bool is_tdp_mmu(struct kvm_mmu *mmu)
 {
 	struct kvm_mmu_page *sp;
+	hpa_t hpa = mmu->root_hpa;
 
 	if (WARN_ON(!VALID_PAGE(hpa)))
 		return false;
@@ -104,7 +105,7 @@ static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
 static inline void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) {}
 static inline bool is_tdp_mmu_enabled(struct kvm *kvm) { return false; }
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return false; }
-static inline bool is_tdp_mmu_root(hpa_t hpa) { return false; }
+static inline bool is_tdp_mmu(struct kvm_mmu *mmu) { return false; }
 #endif
 
 #endif /* __KVM_X86_MMU_TDP_MMU_H */
-- 
GitLab


From 0485cf8dbe964b6cc485178da6ee8ae7b2d0d15c Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Thu, 17 Jun 2021 23:19:48 +0000
Subject: [PATCH 3313/3804] KVM: x86/mmu: Remove redundant root_hpa checks

The root_hpa checks below the top-level check in kvm_mmu_page_fault are
theoretically redundant since there is no longer a way for the root_hpa
to be reset during a page fault. The details of why are described in
commit ddce6208217c ("KVM: x86/mmu: Move root_hpa validity checks to top
of page fault handler")

__direct_map, kvm_tdp_mmu_map, and get_mmio_spte are all only reachable
through kvm_mmu_page_fault, therefore their root_hpa checks are
redundant.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20210617231948.2591431-5-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c     | 8 --------
 arch/x86/kvm/mmu/tdp_mmu.c | 3 ---
 2 files changed, 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f1dd8308f0800..84d48a33e38b3 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2859,9 +2859,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	gfn_t gfn = gpa >> PAGE_SHIFT;
 	gfn_t base_gfn = gfn;
 
-	if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
-		return RET_PF_RETRY;
-
 	level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn,
 					huge_page_disallowed, &req_level);
 
@@ -3603,11 +3600,6 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	int root, leaf, level;
 	bool reserved = false;
 
-	if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
-		*sptep = 0ull;
-		return reserved;
-	}
-
 	if (is_tdp_mmu(vcpu->arch.mmu))
 		leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
 	else
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index d4c254dc4d5ff..caac4ddb46dfc 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -983,9 +983,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 	int level;
 	int req_level;
 
-	if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
-		return RET_PF_RETRY;
-
 	level = kvm_mmu_hugepage_adjust(vcpu, gfn, max_level, &pfn,
 					huge_page_disallowed, &req_level);
 
-- 
GitLab


From b5642479b0f7168fe16d156913533fe65ab4f8d5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:29 +0300
Subject: [PATCH 3314/3804] cfg80211: make certificate generation more robust

If all net/wireless/certs/*.hex files are deleted, the build
will hang at this point since the 'cat' command will have no
arguments. Do "echo | cat - ..." so that even if the "..."
part is empty, the whole thing won't hang.

Cc: stable@vger.kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.c989056c3664.Ic3b77531d00b30b26dcd69c64e55ae2f60c3f31e@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 2eee93985ab0d..af590ae606b69 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -28,7 +28,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
 	@$(kecho) "  GEN     $@"
 	@(echo '#include "reg.h"'; \
 	  echo 'const u8 shipped_regdb_certs[] = {'; \
-	  cat $^ ; \
+	  echo | cat - $^ ; \
 	  echo '};'; \
 	  echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
 	 ) > $@
-- 
GitLab


From 0288e5e16a2e18f0b7e61a2b70d9037fc6e4abeb Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Fri, 18 Jun 2021 13:41:31 +0300
Subject: [PATCH 3315/3804] cfg80211: avoid double free of PMSR request

If cfg80211_pmsr_process_abort() moves all the PMSR requests that
need to be freed into a local list before aborting and freeing them.
As a result, it is possible that cfg80211_pmsr_complete() will run in
parallel and free the same PMSR request.

Fix it by freeing the request in cfg80211_pmsr_complete() only if it
is still in the original pmsr list.

Cc: stable@vger.kernel.org
Fixes: 9bb7e0f24e7e ("cfg80211: add peer measurement with FTM initiator API")
Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.1fbef57e269a.I00294bebdb0680b892f8d1d5c871fd9dbe785a5e@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/pmsr.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 6bdd964080227..d245968b74cb7 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -334,6 +334,7 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
 			    gfp_t gfp)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct cfg80211_pmsr_request *tmp, *prev, *to_free = NULL;
 	struct sk_buff *msg;
 	void *hdr;
 
@@ -364,9 +365,20 @@ free_msg:
 	nlmsg_free(msg);
 free_request:
 	spin_lock_bh(&wdev->pmsr_lock);
-	list_del(&req->list);
+	/*
+	 * cfg80211_pmsr_process_abort() may have already moved this request
+	 * to the free list, and will free it later. In this case, don't free
+	 * it here.
+	 */
+	list_for_each_entry_safe(tmp, prev, &wdev->pmsr_list, list) {
+		if (tmp == req) {
+			list_del(&req->list);
+			to_free = req;
+			break;
+		}
+	}
 	spin_unlock_bh(&wdev->pmsr_lock);
-	kfree(req);
+	kfree(to_free);
 }
 EXPORT_SYMBOL_GPL(cfg80211_pmsr_complete);
 
-- 
GitLab


From bbc6f03ff26e7b71d6135a7b78ce40e7dee3d86a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:49 +0300
Subject: [PATCH 3316/3804] mac80211: reset profile_periodicity/ema_ap

Apparently we never clear these values, so they'll remain set
since the setting of them is conditional. Clear the values in
the relevant other cases.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.316e32d136a9.I2a12e51814258e1e1b526103894f4b9f19a91c8d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2480bd0577bb8..3f2aad2e74366 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4062,10 +4062,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		if (elems.mbssid_config_ie)
 			bss_conf->profile_periodicity =
 				elems.mbssid_config_ie->profile_periodicity;
+		else
+			bss_conf->profile_periodicity = 0;
 
 		if (elems.ext_capab_len >= 11 &&
 		    (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
 			bss_conf->ema_ap = true;
+		else
+			bss_conf->ema_ap = false;
 
 		/* continue assoc process */
 		ifmgd->assoc_data->timeout = jiffies;
@@ -5802,12 +5806,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 					      beacon_ies->data, beacon_ies->len);
 		if (elem && elem->datalen >= 3)
 			sdata->vif.bss_conf.profile_periodicity = elem->data[2];
+		else
+			sdata->vif.bss_conf.profile_periodicity = 0;
 
 		elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
 					  beacon_ies->data, beacon_ies->len);
 		if (elem && elem->datalen >= 11 &&
 		    (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
 			sdata->vif.bss_conf.ema_ap = true;
+		else
+			sdata->vif.bss_conf.ema_ap = false;
 	} else {
 		assoc_data->timeout = jiffies;
 		assoc_data->timeout_started = true;
-- 
GitLab


From 652e8363bbc7d149fa194a5cbf30b1001c0274b0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 18 Jun 2021 13:41:45 +0300
Subject: [PATCH 3317/3804] mac80211: handle various extensible elements
 correctly

Various elements are parsed with a requirement to have an
exact size, when really we should only check that they have
the minimum size that we need. Check only that and therefore
ignore any additional data that they might carry.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20210618133832.cd101f8040a4.Iadf0e9b37b100c6c6e79c7b298cc657c2be9151a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 93d96a4f9c3e6..060059ef96686 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -947,7 +947,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
 
 	switch (elem->data[0]) {
 	case WLAN_EID_EXT_HE_MU_EDCA:
-		if (len == sizeof(*elems->mu_edca_param_set)) {
+		if (len >= sizeof(*elems->mu_edca_param_set)) {
 			elems->mu_edca_param_set = data;
 			if (crc)
 				*crc = crc32_be(*crc, (void *)elem,
@@ -968,7 +968,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
 		}
 		break;
 	case WLAN_EID_EXT_UORA:
-		if (len == 1)
+		if (len >= 1)
 			elems->uora_element = data;
 		break;
 	case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME:
@@ -976,7 +976,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
 			elems->max_channel_switch_time = data;
 		break;
 	case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION:
-		if (len == sizeof(*elems->mbssid_config_ie))
+		if (len >= sizeof(*elems->mbssid_config_ie))
 			elems->mbssid_config_ie = data;
 		break;
 	case WLAN_EID_EXT_HE_SPR:
@@ -985,7 +985,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
 			elems->he_spr = data;
 		break;
 	case WLAN_EID_EXT_HE_6GHZ_CAPA:
-		if (len == sizeof(*elems->he_6ghz_capa))
+		if (len >= sizeof(*elems->he_6ghz_capa))
 			elems->he_6ghz_capa = data;
 		break;
 	}
@@ -1074,14 +1074,14 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 
 		switch (id) {
 		case WLAN_EID_LINK_ID:
-			if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) {
+			if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) {
 				elem_parse_failed = true;
 				break;
 			}
 			elems->lnk_id = (void *)(pos - 2);
 			break;
 		case WLAN_EID_CHAN_SWITCH_TIMING:
-			if (elen != sizeof(struct ieee80211_ch_switch_timing)) {
+			if (elen < sizeof(struct ieee80211_ch_switch_timing)) {
 				elem_parse_failed = true;
 				break;
 			}
@@ -1244,7 +1244,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			elems->sec_chan_offs = (void *)pos;
 			break;
 		case WLAN_EID_CHAN_SWITCH_PARAM:
-			if (elen !=
+			if (elen <
 			    sizeof(*elems->mesh_chansw_params_ie)) {
 				elem_parse_failed = true;
 				break;
@@ -1253,7 +1253,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			break;
 		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
 			if (!action ||
-			    elen != sizeof(*elems->wide_bw_chansw_ie)) {
+			    elen < sizeof(*elems->wide_bw_chansw_ie)) {
 				elem_parse_failed = true;
 				break;
 			}
@@ -1272,7 +1272,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH,
 					      pos, elen);
 			if (ie) {
-				if (ie[1] == sizeof(*elems->wide_bw_chansw_ie))
+				if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie))
 					elems->wide_bw_chansw_ie =
 						(void *)(ie + 2);
 				else
@@ -1316,7 +1316,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			elems->cisco_dtpc_elem = pos;
 			break;
 		case WLAN_EID_ADDBA_EXT:
-			if (elen != sizeof(struct ieee80211_addba_ext_ie)) {
+			if (elen < sizeof(struct ieee80211_addba_ext_ie)) {
 				elem_parse_failed = true;
 				break;
 			}
@@ -1342,7 +1342,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 							  elem, elems);
 			break;
 		case WLAN_EID_S1G_CAPABILITIES:
-			if (elen == sizeof(*elems->s1g_capab))
+			if (elen >= sizeof(*elems->s1g_capab))
 				elems->s1g_capab = (void *)pos;
 			else
 				elem_parse_failed = true;
-- 
GitLab


From 6204004de3160900435bdb4b9a2fb8749a9277d2 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Thu, 17 Jun 2021 18:58:21 +0800
Subject: [PATCH 3318/3804] KVM: arm64: Introduce two cache maintenance
 callbacks

To prepare for performing CMOs for guest stage-2 in the fault handlers
in pgtable.c, here introduce two cache maintenance callbacks in struct
kvm_pgtable_mm_ops. We also adjust the comment alignment for the
existing part but make no real content change at all.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
[maz: fixed up comments and renamed callbacks]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210617105824.31752-2-wangyanan55@huawei.com
---
 arch/arm64/include/asm/kvm_pgtable.h | 42 +++++++++++++++++-----------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index c3674c47d48c6..f004c0115d89d 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -27,23 +27,29 @@ typedef u64 kvm_pte_t;
 
 /**
  * struct kvm_pgtable_mm_ops - Memory management callbacks.
- * @zalloc_page:	Allocate a single zeroed memory page. The @arg parameter
- *			can be used by the walker to pass a memcache. The
- *			initial refcount of the page is 1.
- * @zalloc_pages_exact:	Allocate an exact number of zeroed memory pages. The
- *			@size parameter is in bytes, and is rounded-up to the
- *			next page boundary. The resulting allocation is
- *			physically contiguous.
- * @free_pages_exact:	Free an exact number of memory pages previously
- *			allocated by zalloc_pages_exact.
- * @get_page:		Increment the refcount on a page.
- * @put_page:		Decrement the refcount on a page. When the refcount
- *			reaches 0 the page is automatically freed.
- * @page_count:		Return the refcount of a page.
- * @phys_to_virt:	Convert a physical address into a virtual address mapped
- *			in the current context.
- * @virt_to_phys:	Convert a virtual address mapped in the current context
- *			into a physical address.
+ * @zalloc_page:		Allocate a single zeroed memory page.
+ *				The @arg parameter can be used by the walker
+ *				to pass a memcache. The initial refcount of
+ *				the page is 1.
+ * @zalloc_pages_exact:		Allocate an exact number of zeroed memory pages.
+ *				The @size parameter is in bytes, and is rounded
+ *				up to the next page boundary. The resulting
+ *				allocation is physically contiguous.
+ * @free_pages_exact:		Free an exact number of memory pages previously
+ *				allocated by zalloc_pages_exact.
+ * @get_page:			Increment the refcount on a page.
+ * @put_page:			Decrement the refcount on a page. When the
+ *				refcount reaches 0 the page is automatically
+ *				freed.
+ * @page_count:			Return the refcount of a page.
+ * @phys_to_virt:		Convert a physical address into a virtual
+ *				address	mapped in the current context.
+ * @virt_to_phys:		Convert a virtual address mapped in the current
+ *				context into a physical address.
+ * @dcache_clean_inval_poc:	Clean and invalidate the data cache to the PoC
+ *				for the	specified memory address range.
+ * @icache_inval_pou:		Invalidate the instruction cache to the PoU
+ *				for the specified memory address range.
  */
 struct kvm_pgtable_mm_ops {
 	void*		(*zalloc_page)(void *arg);
@@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops {
 	int		(*page_count)(void *addr);
 	void*		(*phys_to_virt)(phys_addr_t phys);
 	phys_addr_t	(*virt_to_phys)(void *addr);
+	void		(*dcache_clean_inval_poc)(void *addr, size_t size);
+	void		(*icache_inval_pou)(void *addr, size_t size);
 };
 
 /**
-- 
GitLab


From a4d5ca5c7cd8fe85056b8cb838fbcb7e5a05f356 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Thu, 17 Jun 2021 18:58:22 +0800
Subject: [PATCH 3319/3804] KVM: arm64: Introduce mm_ops member for structure
 stage2_attr_data

Also add a mm_ops member for structure stage2_attr_data, since we
will move I-cache maintenance for guest stage-2 to the permission
path and as a result will need mm_ops for some callbacks.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210617105824.31752-3-wangyanan55@huawei.com
---
 arch/arm64/kvm/hyp/pgtable.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index c37c1dc4feafa..d99789432b05c 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -861,10 +861,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 }
 
 struct stage2_attr_data {
-	kvm_pte_t	attr_set;
-	kvm_pte_t	attr_clr;
-	kvm_pte_t	pte;
-	u32		level;
+	kvm_pte_t			attr_set;
+	kvm_pte_t			attr_clr;
+	kvm_pte_t			pte;
+	u32				level;
+	struct kvm_pgtable_mm_ops	*mm_ops;
 };
 
 static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
@@ -903,6 +904,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
 	struct stage2_attr_data data = {
 		.attr_set	= attr_set & attr_mask,
 		.attr_clr	= attr_clr & attr_mask,
+		.mm_ops		= pgt->mm_ops,
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_attr_walker,
-- 
GitLab


From 378e6a9c78a02b4b609846aa0afccf34d3038977 Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Thu, 17 Jun 2021 18:58:23 +0800
Subject: [PATCH 3320/3804] KVM: arm64: Tweak parameters of guest cache
 maintenance functions

Adjust the parameter "kvm_pfn_t pfn" of __clean_dcache_guest_page
and __invalidate_icache_guest_page to "void *va", which paves the
way for converting these two guest CMO functions into callbacks in
structure kvm_pgtable_mm_ops. No functional change.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210617105824.31752-4-wangyanan55@huawei.com
---
 arch/arm64/include/asm/kvm_mmu.h |  9 ++-------
 arch/arm64/kvm/mmu.c             | 28 +++++++++++++++-------------
 2 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 25ed956f9af15..6844a75503923 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -187,10 +187,8 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
 	return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
 }
 
-static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
+static inline void __clean_dcache_guest_page(void *va, size_t size)
 {
-	void *va = page_address(pfn_to_page(pfn));
-
 	/*
 	 * With FWB, we ensure that the guest always accesses memory using
 	 * cacheable attributes, and we don't have to clean to PoC when
@@ -203,16 +201,13 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
 	kvm_flush_dcache_to_poc(va, size);
 }
 
-static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
-						  unsigned long size)
+static inline void __invalidate_icache_guest_page(void *va, size_t size)
 {
 	if (icache_is_aliasing()) {
 		/* any kind of VIPT cache */
 		__flush_icache_all();
 	} else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) {
 		/* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */
-		void *va = page_address(pfn_to_page(pfn));
-
 		invalidate_icache_range((unsigned long)va,
 					(unsigned long)va + size);
 	}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c10207fed2f36..0a5a5b098a4a4 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys)
 	return __va(phys);
 }
 
+static void clean_dcache_guest_page(void *va, size_t size)
+{
+	__clean_dcache_guest_page(va, size);
+}
+
+static void invalidate_icache_guest_page(void *va, size_t size)
+{
+	__invalidate_icache_guest_page(va, size);
+}
+
 /*
  * Unmapping vs dcache management:
  *
@@ -693,16 +703,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 	kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
-static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
-	__clean_dcache_guest_page(pfn, size);
-}
-
-static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size)
-{
-	__invalidate_icache_guest_page(pfn, size);
-}
-
 static void kvm_send_hwpoison_signal(unsigned long address, short lsb)
 {
 	send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current);
@@ -972,11 +972,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		prot |= KVM_PGTABLE_PROT_W;
 
 	if (fault_status != FSC_PERM && !device)
-		clean_dcache_guest_page(pfn, vma_pagesize);
+		clean_dcache_guest_page(page_address(pfn_to_page(pfn)),
+					vma_pagesize);
 
 	if (exec_fault) {
 		prot |= KVM_PGTABLE_PROT_X;
-		invalidate_icache_guest_page(pfn, vma_pagesize);
+		invalidate_icache_guest_page(page_address(pfn_to_page(pfn)),
+					     vma_pagesize);
 	}
 
 	if (device)
@@ -1178,7 +1180,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	 * We've moved a page around, probably through CoW, so let's treat it
 	 * just like a translation fault and clean the cache to the PoC.
 	 */
-	clean_dcache_guest_page(pfn, PAGE_SIZE);
+	clean_dcache_guest_page(page_address(pfn_to_page(pfn)), PAGE_SIZE);
 
 	/*
 	 * The MMU notifiers will have unmapped a huge PMD before calling
-- 
GitLab


From 25aa28691bb960a76f0cffd8862144a29487f6ff Mon Sep 17 00:00:00 2001
From: Yanan Wang <wangyanan55@huawei.com>
Date: Thu, 17 Jun 2021 18:58:24 +0800
Subject: [PATCH 3321/3804] KVM: arm64: Move guest CMOs to the fault handlers

We currently uniformly perform CMOs of D-cache and I-cache in function
user_mem_abort before calling the fault handlers. If we get concurrent
guest faults(e.g. translation faults, permission faults) or some really
unnecessary guest faults caused by BBM, CMOs for the first vcpu are
necessary while the others later are not.

By moving CMOs to the fault handlers, we can easily identify conditions
where they are really needed and avoid the unnecessary ones. As it's a
time consuming process to perform CMOs especially when flushing a block
range, so this solution reduces much load of kvm and improve efficiency
of the stage-2 page table code.

We can imagine two specific scenarios which will gain much benefit:
1) In a normal VM startup, this solution will improve the efficiency of
handling guest page faults incurred by vCPUs, when initially populating
stage-2 page tables.
2) After live migration, the heavy workload will be resumed on the
destination VM, however all the stage-2 page tables need to be rebuilt
at the moment. So this solution will ease the performance drop during
resuming stage.

Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Yanan Wang <wangyanan55@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210617105824.31752-5-wangyanan55@huawei.com
---
 arch/arm64/kvm/hyp/pgtable.c | 38 +++++++++++++++++++++++++++++-------
 arch/arm64/kvm/mmu.c         | 21 +++++++-------------
 2 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index d99789432b05c..72f1d8f500947 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
 	mm_ops->put_page(ptep);
 }
 
+static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
+{
+	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
+	return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
+}
+
+static bool stage2_pte_executable(kvm_pte_t pte)
+{
+	return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+}
+
 static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 				      kvm_pte_t *ptep,
 				      struct stage2_map_data *data)
 {
 	kvm_pte_t new, old = *ptep;
 	u64 granule = kvm_granule_size(level), phys = data->phys;
+	struct kvm_pgtable *pgt = data->mmu->pgt;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
 	if (!kvm_block_mapping_supported(addr, end, phys, level))
@@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
 	}
 
+	/* Perform CMOs before installation of the guest stage-2 PTE */
+	if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
+		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops),
+						granule);
+
+	if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
+		mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
+
 	smp_store_release(ptep, new);
 	if (stage2_pte_is_counted(new))
 		mm_ops->get_page(ptep);
@@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	return ret;
 }
 
-static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
-{
-	u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
-	return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
-}
-
 static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 			       enum kvm_pgtable_walk_flags flag,
 			       void * const arg)
@@ -874,6 +888,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 {
 	kvm_pte_t pte = *ptep;
 	struct stage2_attr_data *data = arg;
+	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
 	if (!kvm_pte_valid(pte))
 		return 0;
@@ -888,8 +903,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 	 * but worst-case the access flag update gets lost and will be
 	 * set on the next access instead.
 	 */
-	if (data->pte != pte)
+	if (data->pte != pte) {
+		/*
+		 * Invalidate instruction cache before updating the guest
+		 * stage-2 PTE if we are going to add executable permission.
+		 */
+		if (mm_ops->icache_inval_pou &&
+		    stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
+			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
+						  kvm_granule_size(level));
 		WRITE_ONCE(*ptep, pte);
+	}
 
 	return 0;
 }
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 0a5a5b098a4a4..0b3ba57849f6f 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -442,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
 	.page_count		= kvm_host_page_count,
 	.phys_to_virt		= kvm_host_va,
 	.virt_to_phys		= kvm_host_pa,
+	.dcache_clean_inval_poc	= clean_dcache_guest_page,
+	.icache_inval_pou	= invalidate_icache_guest_page,
 };
 
 /**
@@ -971,15 +973,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (writable)
 		prot |= KVM_PGTABLE_PROT_W;
 
-	if (fault_status != FSC_PERM && !device)
-		clean_dcache_guest_page(page_address(pfn_to_page(pfn)),
-					vma_pagesize);
-
-	if (exec_fault) {
+	if (exec_fault)
 		prot |= KVM_PGTABLE_PROT_X;
-		invalidate_icache_guest_page(page_address(pfn_to_page(pfn)),
-					     vma_pagesize);
-	}
 
 	if (device)
 		prot |= KVM_PGTABLE_PROT_DEVICE;
@@ -1177,12 +1172,10 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	WARN_ON(range->end - range->start != 1);
 
 	/*
-	 * We've moved a page around, probably through CoW, so let's treat it
-	 * just like a translation fault and clean the cache to the PoC.
-	 */
-	clean_dcache_guest_page(page_address(pfn_to_page(pfn)), PAGE_SIZE);
-
-	/*
+	 * We've moved a page around, probably through CoW, so let's treat
+	 * it just like a translation fault and the map handler will clean
+	 * the cache to the PoC.
+	 *
 	 * The MMU notifiers will have unmapped a huge PMD before calling
 	 * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
 	 * therefore we never need to clear out a huge PMD through this
-- 
GitLab


From 2a71fabf6a1bc9162a84e18d6ab991230ca4d588 Mon Sep 17 00:00:00 2001
From: Alexandru Elisei <alexandru.elisei@arm.com>
Date: Fri, 18 Jun 2021 11:51:39 +0100
Subject: [PATCH 3322/3804] KVM: arm64: Don't zero the cycle count register
 when PMCR_EL0.P is set

According to ARM DDI 0487G.a, page D13-3895, setting the PMCR_EL0.P bit to
1 has the following effect:

"Reset all event counters accessible in the current Exception level, not
including PMCCNTR_EL0, to zero."

Similar behaviour is described for AArch32 on page G8-7022. Make it so.

Fixes: c01d6a18023b ("KVM: arm64: pmu: Only handle supported event counters")
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210618105139.83795-1-alexandru.elisei@arm.com
---
 arch/arm64/kvm/pmu-emul.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index fd167d4f42157..ecc0d19c8cc14 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -578,6 +578,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 		kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
 
 	if (val & ARMV8_PMU_PMCR_P) {
+		mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
 		for_each_set_bit(i, &mask, 32)
 			kvm_pmu_set_counter_value(vcpu, i, 0);
 	}
-- 
GitLab


From 67e2996f72c71ebe4ac2fcbcf77e54479bb7aa11 Mon Sep 17 00:00:00 2001
From: Fabien Dessenne <fabien.dessenne@foss.st.com>
Date: Thu, 17 Jun 2021 16:46:29 +0200
Subject: [PATCH 3323/3804] pinctrl: stm32: fix the reported number of GPIO
 lines per bank

Each GPIO bank supports a variable number of lines which is usually 16, but
is less in some cases : this is specified by the last argument of the
"gpio-ranges" bank node property.
Report to the framework, the actual number of lines, so the libgpiod
gpioinfo command lists the actually existing GPIO lines.

Fixes: 1dc9d289154b ("pinctrl: stm32: add possibility to use gpio-ranges to declare bank range")
Signed-off-by: Fabien Dessenne <fabien.dessenne@foss.st.com>
Link: https://lore.kernel.org/r/20210617144629.2557693-1-fabien.dessenne@foss.st.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/stm32/pinctrl-stm32.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c
index ad9eb5ed8e815..c14d12d54cc57 100644
--- a/drivers/pinctrl/stm32/pinctrl-stm32.c
+++ b/drivers/pinctrl/stm32/pinctrl-stm32.c
@@ -1224,7 +1224,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,
 	struct device *dev = pctl->dev;
 	struct resource res;
 	int npins = STM32_GPIO_PINS_PER_BANK;
-	int bank_nr, err;
+	int bank_nr, err, i = 0;
 
 	if (!IS_ERR(bank->rstc))
 		reset_control_deassert(bank->rstc);
@@ -1246,9 +1246,14 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,
 
 	of_property_read_string(np, "st,bank-name", &bank->gpio_chip.label);
 
-	if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args)) {
+	if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, i, &args)) {
 		bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK;
 		bank->gpio_chip.base = args.args[1];
+
+		npins = args.args[2];
+		while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3,
+							 ++i, &args))
+			npins += args.args[2];
 	} else {
 		bank_nr = pctl->nbanks;
 		bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK;
-- 
GitLab


From fb780761e7bd9f2e94f5b9a296ead6b35b944206 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Jun 2021 23:41:26 +0800
Subject: [PATCH 3324/3804] recordmcount: Correct st_shndx handling

One should only use st_shndx when >SHN_UNDEF and <SHN_LORESERVE. When
SHN_XINDEX, then use .symtab_shndx. Otherwise use 0.

This handles the case: st_shndx >= SHN_LORESERVE && st_shndx != SHN_XINDEX.

Link: https://lore.kernel.org/lkml/20210607023839.26387-1-mark-pk.tsai@mediatek.com/
Link: https://lkml.kernel.org/r/20210616154126.2794-1-mark-pk.tsai@mediatek.com

Reported-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
Tested-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
[handle endianness of sym->st_shndx]
Signed-off-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 scripts/recordmcount.h | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index f9b19524da112..1e9baa5c4fc6e 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -192,15 +192,20 @@ static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab,
 				 Elf32_Word const *symtab_shndx)
 {
 	unsigned long offset;
+	unsigned short shndx = w2(sym->st_shndx);
 	int index;
 
-	if (sym->st_shndx != SHN_XINDEX)
-		return w2(sym->st_shndx);
+	if (shndx > SHN_UNDEF && shndx < SHN_LORESERVE)
+		return shndx;
 
-	offset = (unsigned long)sym - (unsigned long)symtab;
-	index = offset / sizeof(*sym);
+	if (shndx == SHN_XINDEX) {
+		offset = (unsigned long)sym - (unsigned long)symtab;
+		index = offset / sizeof(*sym);
 
-	return w(symtab_shndx[index]);
+		return w(symtab_shndx[index]);
+	}
+
+	return 0;
 }
 
 static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0)
-- 
GitLab


From 85550c83da421fb12dc1816c45012e1e638d2b38 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 17 Jun 2021 13:47:25 -0400
Subject: [PATCH 3325/3804] tracing: Do not stop recording cmdlines when
 tracing is off

The saved_cmdlines is used to map pids to the task name, such that the
output of the tracing does not just show pids, but also gives a human
readable name for the task.

If the name is not mapped, the output looks like this:

    <...>-1316          [005] ...2   132.044039: ...

Instead of this:

    gnome-shell-1316    [005] ...2   132.044039: ...

The names are updated when tracing is running, but are skipped if tracing
is stopped. Unfortunately, this stops the recording of the names if the
top level tracer is stopped, and not if there's other tracers active.

The recording of a name only happens when a new event is written into a
ring buffer, so there is no need to test if tracing is on or not. If
tracing is off, then no event is written and no need to test if tracing is
off or not.

Remove the check, as it hides the names of tasks for events in the
instance buffers.

Cc: stable@vger.kernel.org
Fixes: 7ffbd48d5cab2 ("tracing: Cache comms only after an event occurred")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9299057feb56f..e220b37e29c68 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2486,8 +2486,6 @@ static bool tracing_record_taskinfo_skip(int flags)
 {
 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
 		return true;
-	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
-		return true;
 	if (!__this_cpu_read(trace_taskinfo_save))
 		return true;
 	return false;
-- 
GitLab


From 4fdd595e4f9a1ff6d93ec702eaecae451cfc6591 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 17 Jun 2021 14:32:34 -0400
Subject: [PATCH 3326/3804] tracing: Do not stop recording comms if the trace
 file is being read

A while ago, when the "trace" file was opened, tracing was stopped, and
code was added to stop recording the comms to saved_cmdlines, for mapping
of the pids to the task name.

Code has been added that only records the comm if a trace event occurred,
and there's no reason to not trace it if the trace file is opened.

Cc: stable@vger.kernel.org
Fixes: 7ffbd48d5cab2 ("tracing: Cache comms only after an event occurred")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e220b37e29c68..d23a09d3eb37b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2198,9 +2198,6 @@ struct saved_cmdlines_buffer {
 };
 static struct saved_cmdlines_buffer *savedcmd;
 
-/* temporary disable recording */
-static atomic_t trace_record_taskinfo_disabled __read_mostly;
-
 static inline char *get_saved_cmdlines(int idx)
 {
 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
@@ -3996,9 +3993,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		return ERR_PTR(-EBUSY);
 #endif
 
-	if (!iter->snapshot)
-		atomic_inc(&trace_record_taskinfo_disabled);
-
 	if (*pos != iter->pos) {
 		iter->ent = NULL;
 		iter->cpu = 0;
@@ -4041,9 +4035,6 @@ static void s_stop(struct seq_file *m, void *p)
 		return;
 #endif
 
-	if (!iter->snapshot)
-		atomic_dec(&trace_record_taskinfo_disabled);
-
 	trace_access_unlock(iter->cpu_file);
 	trace_event_read_unlock();
 }
-- 
GitLab


From 89529d8b8f8daf92d9979382b8d2eb39966846ea Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 17 Jun 2021 17:12:35 -0400
Subject: [PATCH 3327/3804] tracing: Do no increment trace_clock_global() by
 one

The trace_clock_global() tries to make sure the events between CPUs is
somewhat in order. A global value is used and updated by the latest read
of a clock. If one CPU is ahead by a little, and is read by another CPU, a
lock is taken, and if the timestamp of the other CPU is behind, it will
simply use the other CPUs timestamp.

The lock is also only taken with a "trylock" due to tracing, and strange
recursions can happen. The lock is not taken at all in NMI context.

In the case where the lock is not able to be taken, the non synced
timestamp is returned. But it will not be less than the saved global
timestamp.

The problem arises because when the time goes "backwards" the time
returned is the saved timestamp plus 1. If the lock is not taken, and the
plus one to the timestamp is returned, there's a small race that can cause
the time to go backwards!

	CPU0				CPU1
	----				----
				trace_clock_global() {
				    ts = clock() [ 1000 ]
				    trylock(clock_lock) [ success ]
				    global_ts = ts; [ 1000 ]

				    <interrupted by NMI>
 trace_clock_global() {
    ts = clock() [ 999 ]
    if (ts < global_ts)
	ts = global_ts + 1 [ 1001 ]

    trylock(clock_lock) [ fail ]

    return ts [ 1001]
 }
				    unlock(clock_lock);
				    return ts; [ 1000 ]
				}

 trace_clock_global() {
    ts = clock() [ 1000 ]
    if (ts < global_ts) [ false 1000 == 1000 ]

    trylock(clock_lock) [ success ]
    global_ts = ts; [ 1000 ]
    unlock(clock_lock)

    return ts; [ 1000 ]
 }

The above case shows to reads of trace_clock_global() on the same CPU, but
the second read returns one less than the first read. That is, time when
backwards, and this is not what is allowed by trace_clock_global().

This was triggered by heavy tracing and the ring buffer checker that tests
for the clock going backwards:

 Ring buffer clock went backwards: 20613921464 -> 20613921463
 ------------[ cut here ]------------
 WARNING: CPU: 2 PID: 0 at kernel/trace/ring_buffer.c:3412 check_buffer+0x1b9/0x1c0
 Modules linked in:
 [..]
 [CPU: 2]TIME DOES NOT MATCH expected:20620711698 actual:20620711697 delta:6790234 before:20613921463 after:20613921463
   [20613915818] PAGE TIME STAMP
   [20613915818] delta:0
   [20613915819] delta:1
   [20613916035] delta:216
   [20613916465] delta:430
   [20613916575] delta:110
   [20613916749] delta:174
   [20613917248] delta:499
   [20613917333] delta:85
   [20613917775] delta:442
   [20613917921] delta:146
   [20613918321] delta:400
   [20613918568] delta:247
   [20613918768] delta:200
   [20613919306] delta:538
   [20613919353] delta:47
   [20613919980] delta:627
   [20613920296] delta:316
   [20613920571] delta:275
   [20613920862] delta:291
   [20613921152] delta:290
   [20613921464] delta:312
   [20613921464] delta:0 TIME EXTEND
   [20613921464] delta:0

This happened more than once, and always for an off by one result. It also
started happening after commit aafe104aa9096 was added.

Cc: stable@vger.kernel.org
Fixes: aafe104aa9096 ("tracing: Restructure trace_clock_global() to never block")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_clock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index c1637f90c8a38..4702efb00ff21 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -115,9 +115,9 @@ u64 notrace trace_clock_global(void)
 	prev_time = READ_ONCE(trace_clock_struct.prev_time);
 	now = sched_clock_cpu(this_cpu);
 
-	/* Make sure that now is always greater than prev_time */
+	/* Make sure that now is always greater than or equal to prev_time */
 	if ((s64)(now - prev_time) < 0)
-		now = prev_time + 1;
+		now = prev_time;
 
 	/*
 	 * If in an NMI context then dont risk lockups and simply return
@@ -131,7 +131,7 @@ u64 notrace trace_clock_global(void)
 		/* Reread prev_time in case it was already updated */
 		prev_time = READ_ONCE(trace_clock_struct.prev_time);
 		if ((s64)(now - prev_time) < 0)
-			now = prev_time + 1;
+			now = prev_time;
 
 		trace_clock_struct.prev_time = now;
 
-- 
GitLab


From d0c94c49792cf780cbfefe29f81bb8c3b73bc76b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 3 Jun 2021 16:50:02 +0100
Subject: [PATCH 3328/3804] KVM: arm64: Restore PMU configuration on first run

Restoring a guest with an active virtual PMU results in no perf
counters being instanciated on the host side. Not quite what
you'd expect from a restore.

In order to fix this, force a writeback of PMCR_EL0 on the first
run of a vcpu (using a new request so that it happens once the
vcpu has been loaded). This will in turn create all the host-side
counters that were missing.

Reported-by: Jinank Jain <jinankj@amazon.de>
Tested-by: Jinank Jain <jinankj@amazon.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/87wnrbylxv.wl-maz@kernel.org
Link: https://lore.kernel.org/r/b53dfcf9bbc4db7f96154b1cd5188d72b9766358.camel@amazon.de
---
 arch/arm64/include/asm/kvm_host.h | 1 +
 arch/arm64/kvm/arm.c              | 4 ++++
 arch/arm64/kvm/pmu-emul.c         | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7cd7d5c8c4bc2..6336b4309114b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -46,6 +46,7 @@
 #define KVM_REQ_VCPU_RESET	KVM_ARCH_REQ(2)
 #define KVM_REQ_RECORD_STEAL	KVM_ARCH_REQ(3)
 #define KVM_REQ_RELOAD_GICv4	KVM_ARCH_REQ(4)
+#define KVM_REQ_RELOAD_PMU	KVM_ARCH_REQ(5)
 
 #define KVM_DIRTY_LOG_MANUAL_CAPS   (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
 				     KVM_DIRTY_LOG_INITIALLY_SET)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e720148232a06..facf4d41d32a2 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -689,6 +689,10 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
 			vgic_v4_load(vcpu);
 			preempt_enable();
 		}
+
+		if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
+			kvm_pmu_handle_pmcr(vcpu,
+					    __vcpu_sys_reg(vcpu, PMCR_EL0));
 	}
 }
 
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index ecc0d19c8cc14..f33825c995cbb 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -851,6 +851,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
 		   return -EINVAL;
 	}
 
+	/* One-off reload of the PMU on first run */
+	kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
+
 	return 0;
 }
 
-- 
GitLab


From 3bd6b8271ee660803c5694cc25420c499c5c0592 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punitagrawal@gmail.com>
Date: Tue, 15 Jun 2021 08:04:57 +0900
Subject: [PATCH 3329/3804] PCI: of: Clear 64-bit flag for non-prefetchable
 memory below 4GB

Alexandru and Qu reported this resource allocation failure on ROCKPro64 v2
and ROCK Pi 4B, both based on the RK3399:

  pci_bus 0000:00: root bus resource [mem 0xfa000000-0xfbdfffff 64bit]
  pci 0000:00:00.0: PCI bridge to [bus 01]
  pci 0000:00:00.0: BAR 14: no space for [mem size 0x00100000]
  pci 0000:01:00.0: reg 0x10: [mem 0x00000000-0x00003fff 64bit]

"BAR 14" is the PCI bridge's 32-bit non-prefetchable window, and our PCI
allocation code isn't smart enough to allocate it in a host bridge window
marked as 64-bit, even though this should work fine.

A DT host bridge description includes the windows from the CPU address
space to the PCI bus space.  On a few architectures (microblaze, powerpc,
sparc), the DT may also describe PCI devices themselves, including their
BARs.

Before 9d57e61bf723 ("of/pci: Add IORESOURCE_MEM_64 to resource flags for
64-bit memory addresses"), of_bus_pci_get_flags() ignored the fact that
some DT addresses described 64-bit windows and BARs.  That was a problem
because the virtio virtual NIC has a 32-bit BAR and a 64-bit BAR, and the
driver couldn't distinguish them.

9d57e61bf723 set IORESOURCE_MEM_64 for those 64-bit DT ranges, which fixed
the virtio driver.  But it also set IORESOURCE_MEM_64 for host bridge
windows, which exposed the fact that the PCI allocator isn't smart enough
to put 32-bit resources in those 64-bit windows.

Clear IORESOURCE_MEM_64 from host bridge windows since we don't need that
information.

Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Fixes: 9d57e61bf723 ("of/pci: Add IORESOURCE_MEM_64 to resource flags for 64-bit memory addresses")
Link: https://lore.kernel.org/r/20210614230457.752811-1-punitagrawal@gmail.com
Reported-at: https://lore.kernel.org/lkml/7a1e2ebc-f7d8-8431-d844-41a9c36a8911@arm.com/
Reported-at: https://lore.kernel.org/lkml/YMyTUv7Jsd89PGci@m4/T/#u
Reported-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reported-by: Qu Wenruo <wqu@suse.com>
Tested-by: Alexandru Elisei <alexandru.elisei@arm.com>
Tested-by: Domenico Andreoli <domenico.andreoli@linux.com>
Signed-off-by: Punit Agrawal <punitagrawal@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/pci/of.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index da5b414d585ab..4866612dfdb2d 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -346,6 +346,8 @@ static int devm_of_pci_get_host_bridge_resources(struct device *dev,
 				dev_warn(dev, "More than one I/O resource converted for %pOF. CPU base address for old range lost!\n",
 					 dev_node);
 			*io_base = range.cpu_addr;
+		} else if (resource_type(res) == IORESOURCE_MEM) {
+			res->flags &= ~IORESOURCE_MEM_64;
 		}
 
 		pci_add_resource_offset(resources, res,	res->start - range.pci_addr);
-- 
GitLab


From a512360f45c930e14a262056e5f742797bc5d3f2 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Thu, 10 Jun 2021 07:41:34 +0100
Subject: [PATCH 3330/3804] PCI: tegra194: Fix MCFG quirk build regressions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

7f100744749e ("PCI: tegra: Add Tegra194 MCFG quirks for ECAM errata")
caused a few build regressions:

  - 7f100744749e removed the Makefile rule for CONFIG_PCIE_TEGRA194, so
    pcie-tegra.c can no longer be built as a module.  Restore that rule.

  - 7f100744749e added "#ifdef CONFIG_PCIE_TEGRA194" around the native
    driver, but that's only set when the driver is built-in (for a module,
    CONFIG_PCIE_TEGRA194_MODULE is defined).

    The ACPI quirk is completely independent of the rest of the native
    driver, so move the quirk to its own file and remove the #ifdef in the
    native driver.

  - 7f100744749e added symbols that are always defined but used only when
    CONFIG_PCIEASPM, which causes warnings when CONFIG_PCIEASPM is not set:

      drivers/pci/controller/dwc/pcie-tegra194.c:259:18: warning: ‘event_cntr_data_offset’ defined but not used [-Wunused-const-variable=]
      drivers/pci/controller/dwc/pcie-tegra194.c:250:18: warning: ‘event_cntr_ctrl_offset’ defined but not used [-Wunused-const-variable=]
      drivers/pci/controller/dwc/pcie-tegra194.c:243:27: warning: ‘pcie_gen_freq’ defined but not used [-Wunused-const-variable=]

Fixes: 7f100744749e ("PCI: tegra: Add Tegra194 MCFG quirks for ECAM errata")
Link: https://lore.kernel.org/r/20210610064134.336781-1-jonathanh@nvidia.com
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
---
 drivers/pci/controller/dwc/Makefile           |   3 +-
 .../pci/controller/dwc/pcie-tegra194-acpi.c   | 108 ++++++++++++++
 drivers/pci/controller/dwc/pcie-tegra194.c    | 138 +++---------------
 3 files changed, 128 insertions(+), 121 deletions(-)
 create mode 100644 drivers/pci/controller/dwc/pcie-tegra194-acpi.c

diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
index eca805c1a0235..9e6ce0dc2f535 100644
--- a/drivers/pci/controller/dwc/Makefile
+++ b/drivers/pci/controller/dwc/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_PCIE_INTEL_GW) += pcie-intel-gw.o
 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
 obj-$(CONFIG_PCI_MESON) += pci-meson.o
+obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
 obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
 
@@ -38,6 +39,6 @@ ifdef CONFIG_ACPI
 ifdef CONFIG_PCI_QUIRKS
 obj-$(CONFIG_ARM64) += pcie-al.o
 obj-$(CONFIG_ARM64) += pcie-hisi.o
-obj-$(CONFIG_ARM64) += pcie-tegra194.o
+obj-$(CONFIG_ARM64) += pcie-tegra194-acpi.o
 endif
 endif
diff --git a/drivers/pci/controller/dwc/pcie-tegra194-acpi.c b/drivers/pci/controller/dwc/pcie-tegra194-acpi.c
new file mode 100644
index 0000000000000..c2de6ed4d86f3
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-tegra194-acpi.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * ACPI quirks for Tegra194 PCIe host controller
+ *
+ * Copyright (C) 2021 NVIDIA Corporation.
+ *
+ * Author: Vidya Sagar <vidyas@nvidia.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
+
+#include "pcie-designware.h"
+
+struct tegra194_pcie_ecam  {
+	void __iomem *config_base;
+	void __iomem *iatu_base;
+	void __iomem *dbi_base;
+};
+
+static int tegra194_acpi_init(struct pci_config_window *cfg)
+{
+	struct device *dev = cfg->parent;
+	struct tegra194_pcie_ecam *pcie_ecam;
+
+	pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
+	if (!pcie_ecam)
+		return -ENOMEM;
+
+	pcie_ecam->config_base = cfg->win;
+	pcie_ecam->iatu_base = cfg->win + SZ_256K;
+	pcie_ecam->dbi_base = cfg->win + SZ_512K;
+	cfg->priv = pcie_ecam;
+
+	return 0;
+}
+
+static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
+			  u32 val, u32 reg)
+{
+	u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);
+
+	writel(val, pcie_ecam->iatu_base + offset + reg);
+}
+
+static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
+				 int index, int type, u64 cpu_addr,
+				 u64 pci_addr, u64 size)
+{
+	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
+		      PCIE_ATU_LOWER_BASE);
+	atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
+		      PCIE_ATU_UPPER_BASE);
+	atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
+		      PCIE_ATU_LOWER_TARGET);
+	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
+		      PCIE_ATU_LIMIT);
+	atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
+		      PCIE_ATU_UPPER_TARGET);
+	atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1);
+	atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
+}
+
+static void __iomem *tegra194_map_bus(struct pci_bus *bus,
+				      unsigned int devfn, int where)
+{
+	struct pci_config_window *cfg = bus->sysdata;
+	struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
+	u32 busdev;
+	int type;
+
+	if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
+		return NULL;
+
+	if (bus->number == cfg->busr.start) {
+		if (PCI_SLOT(devfn) == 0)
+			return pcie_ecam->dbi_base + where;
+		else
+			return NULL;
+	}
+
+	busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
+		 PCIE_ATU_FUNC(PCI_FUNC(devfn));
+
+	if (bus->parent->number == cfg->busr.start) {
+		if (PCI_SLOT(devfn) == 0)
+			type = PCIE_ATU_TYPE_CFG0;
+		else
+			return NULL;
+	} else {
+		type = PCIE_ATU_TYPE_CFG1;
+	}
+
+	program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
+			     SZ_256K);
+
+	return pcie_ecam->config_base + where;
+}
+
+const struct pci_ecam_ops tegra194_pcie_ops = {
+	.init		= tegra194_acpi_init,
+	.pci_ops	= {
+		.map_bus	= tegra194_map_bus,
+		.read		= pci_generic_config_read,
+		.write		= pci_generic_config_write,
+	}
+};
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
index bafd2c6ab3c23..504669e3afe07 100644
--- a/drivers/pci/controller/dwc/pcie-tegra194.c
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -22,8 +22,6 @@
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
-#include <linux/pci-acpi.h>
-#include <linux/pci-ecam.h>
 #include <linux/phy/phy.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
@@ -247,24 +245,6 @@ static const unsigned int pcie_gen_freq[] = {
 	GEN4_CORE_CLK_FREQ
 };
 
-static const u32 event_cntr_ctrl_offset[] = {
-	0x1d8,
-	0x1a8,
-	0x1a8,
-	0x1a8,
-	0x1c4,
-	0x1d8
-};
-
-static const u32 event_cntr_data_offset[] = {
-	0x1dc,
-	0x1ac,
-	0x1ac,
-	0x1ac,
-	0x1c8,
-	0x1dc
-};
-
 struct tegra_pcie_dw {
 	struct device *dev;
 	struct resource *appl_res;
@@ -313,104 +293,6 @@ struct tegra_pcie_dw_of_data {
 	enum dw_pcie_device_mode mode;
 };
 
-#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
-struct tegra194_pcie_ecam  {
-	void __iomem *config_base;
-	void __iomem *iatu_base;
-	void __iomem *dbi_base;
-};
-
-static int tegra194_acpi_init(struct pci_config_window *cfg)
-{
-	struct device *dev = cfg->parent;
-	struct tegra194_pcie_ecam *pcie_ecam;
-
-	pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
-	if (!pcie_ecam)
-		return -ENOMEM;
-
-	pcie_ecam->config_base = cfg->win;
-	pcie_ecam->iatu_base = cfg->win + SZ_256K;
-	pcie_ecam->dbi_base = cfg->win + SZ_512K;
-	cfg->priv = pcie_ecam;
-
-	return 0;
-}
-
-static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
-			  u32 val, u32 reg)
-{
-	u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);
-
-	writel(val, pcie_ecam->iatu_base + offset + reg);
-}
-
-static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
-				 int index, int type, u64 cpu_addr,
-				 u64 pci_addr, u64 size)
-{
-	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
-		      PCIE_ATU_LOWER_BASE);
-	atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
-		      PCIE_ATU_UPPER_BASE);
-	atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
-		      PCIE_ATU_LOWER_TARGET);
-	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
-		      PCIE_ATU_LIMIT);
-	atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
-		      PCIE_ATU_UPPER_TARGET);
-	atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1);
-	atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
-}
-
-static void __iomem *tegra194_map_bus(struct pci_bus *bus,
-				      unsigned int devfn, int where)
-{
-	struct pci_config_window *cfg = bus->sysdata;
-	struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
-	u32 busdev;
-	int type;
-
-	if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
-		return NULL;
-
-	if (bus->number == cfg->busr.start) {
-		if (PCI_SLOT(devfn) == 0)
-			return pcie_ecam->dbi_base + where;
-		else
-			return NULL;
-	}
-
-	busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
-		 PCIE_ATU_FUNC(PCI_FUNC(devfn));
-
-	if (bus->parent->number == cfg->busr.start) {
-		if (PCI_SLOT(devfn) == 0)
-			type = PCIE_ATU_TYPE_CFG0;
-		else
-			return NULL;
-	} else {
-		type = PCIE_ATU_TYPE_CFG1;
-	}
-
-	program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
-			     SZ_256K);
-
-	return pcie_ecam->config_base + where;
-}
-
-const struct pci_ecam_ops tegra194_pcie_ops = {
-	.init		= tegra194_acpi_init,
-	.pci_ops	= {
-		.map_bus	= tegra194_map_bus,
-		.read		= pci_generic_config_read,
-		.write		= pci_generic_config_write,
-	}
-};
-#endif /* defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) */
-
-#ifdef CONFIG_PCIE_TEGRA194
-
 static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci)
 {
 	return container_of(pci, struct tegra_pcie_dw, pci);
@@ -694,6 +576,24 @@ static struct pci_ops tegra_pci_ops = {
 };
 
 #if defined(CONFIG_PCIEASPM)
+static const u32 event_cntr_ctrl_offset[] = {
+	0x1d8,
+	0x1a8,
+	0x1a8,
+	0x1a8,
+	0x1c4,
+	0x1d8
+};
+
+static const u32 event_cntr_data_offset[] = {
+	0x1dc,
+	0x1ac,
+	0x1ac,
+	0x1ac,
+	0x1c8,
+	0x1dc
+};
+
 static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
 {
 	u32 val;
@@ -2411,5 +2311,3 @@ MODULE_DEVICE_TABLE(of, tegra_pcie_dw_of_match);
 MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
 MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
 MODULE_LICENSE("GPL v2");
-
-#endif /* CONFIG_PCIE_TEGRA194 */
-- 
GitLab


From b5cf198e74a91073d12839a3e2db99994a39995d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Antti=20J=C3=A4rvinen?= <antti.jarvinen@gmail.com>
Date: Mon, 15 Mar 2021 10:26:06 +0000
Subject: [PATCH 3331/3804] PCI: Mark TI C667X to avoid bus reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some TI KeyStone C667X devices do not support bus/hot reset.  The PCIESS
automatically disables LTSSM when Secondary Bus Reset is received and
device stops working.  Prevent bus reset for these devices.  With this
change, the device can be assigned to VMs with VFIO, but it will leak state
between VMs.

Reference: https://e2e.ti.com/support/processors/f/791/t/954382
Link: https://lore.kernel.org/r/20210315102606.17153-1-antti.jarvinen@gmail.com
Signed-off-by: Antti Järvinen <antti.jarvinen@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Kishon Vijay Abraham I <kishon@ti.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/quirks.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index dcb229de1acb7..de0c110029f37 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3566,6 +3566,16 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0034, quirk_no_bus_reset);
  */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
 
+/*
+ * Some TI KeyStone C667X devices do not support bus/hot reset.  The PCIESS
+ * automatically disables LTSSM when Secondary Bus Reset is received and
+ * the device stops working.  Prevent bus reset for these devices.  With
+ * this change, the device can be assigned to VMs with VFIO, but it will
+ * leak state between VMs.  Reference
+ * https://e2e.ti.com/support/processors/f/791/t/954382
+ */
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0xb005, quirk_no_bus_reset);
+
 static void quirk_no_pm_reset(struct pci_dev *dev)
 {
 	/*
-- 
GitLab


From 4c207e7121fa92b66bf1896bf8ccb9edfb0f9731 Mon Sep 17 00:00:00 2001
From: Shanker Donthineni <sdonthineni@nvidia.com>
Date: Tue, 8 Jun 2021 11:18:56 +0530
Subject: [PATCH 3332/3804] PCI: Mark some NVIDIA GPUs to avoid bus reset

Some NVIDIA GPU devices do not work with SBR.  Triggering SBR leaves the
device inoperable for the current system boot. It requires a system
hard-reboot to get the GPU device back to normal operating condition
post-SBR. For the affected devices, enable NO_BUS_RESET quirk to avoid the
issue.

This issue will be fixed in the next generation of hardware.

Link: https://lore.kernel.org/r/20210608054857.18963-8-ameynarkhede03@gmail.com
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Sinan Kaya <okaya@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/pci/quirks.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index de0c110029f37..08a87161f3c8c 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3546,6 +3546,18 @@ static void quirk_no_bus_reset(struct pci_dev *dev)
 	dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
 }
 
+/*
+ * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be
+ * prevented for those affected devices.
+ */
+static void quirk_nvidia_no_bus_reset(struct pci_dev *dev)
+{
+	if ((dev->device & 0xffc0) == 0x2340)
+		quirk_no_bus_reset(dev);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID,
+			 quirk_nvidia_no_bus_reset);
+
 /*
  * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset.
  * The device will throw a Link Down error on AER-capable systems and
-- 
GitLab


From ce00322c2365e1f7b0312f2f493539c833465d97 Mon Sep 17 00:00:00 2001
From: Chiqijun <chiqijun@huawei.com>
Date: Mon, 24 May 2021 17:44:07 -0500
Subject: [PATCH 3333/3804] PCI: Work around Huawei Intelligent NIC VF FLR
 erratum

pcie_flr() starts a Function Level Reset (FLR), waits 100ms (the maximum
time allowed for FLR completion by PCIe r5.0, sec 6.6.2), and waits for the
FLR to complete.  It assumes the FLR is complete when a config read returns
valid data.

When we do an FLR on several Huawei Intelligent NIC VFs at the same time,
firmware on the NIC processes them serially.  The VF may respond to config
reads before the firmware has completed its reset processing.  If we bind a
driver to the VF (e.g., by assigning the VF to a virtual machine) in the
interval between the successful config read and completion of the firmware
reset processing, the NIC VF driver may fail to load.

Prevent this driver failure by waiting for the NIC firmware to complete its
reset processing.  Not all NIC firmware supports this feature.

[bhelgaas: commit log]
Link: https://support.huawei.com/enterprise/en/doc/EDOC1100063073/87950645/vm-oss-occasionally-fail-to-load-the-in200-driver-when-the-vf-performs-flr
Link: https://lore.kernel.org/r/20210414132301.1793-1-chiqijun@huawei.com
Signed-off-by: Chiqijun <chiqijun@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/quirks.c | 65 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 08a87161f3c8c..dda0b10181624 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3923,6 +3923,69 @@ static int delay_250ms_after_flr(struct pci_dev *dev, int probe)
 	return 0;
 }
 
+#define PCI_DEVICE_ID_HINIC_VF      0x375E
+#define HINIC_VF_FLR_TYPE           0x1000
+#define HINIC_VF_FLR_CAP_BIT        (1UL << 30)
+#define HINIC_VF_OP                 0xE80
+#define HINIC_VF_FLR_PROC_BIT       (1UL << 18)
+#define HINIC_OPERATION_TIMEOUT     15000	/* 15 seconds */
+
+/* Device-specific reset method for Huawei Intelligent NIC virtual functions */
+static int reset_hinic_vf_dev(struct pci_dev *pdev, int probe)
+{
+	unsigned long timeout;
+	void __iomem *bar;
+	u32 val;
+
+	if (probe)
+		return 0;
+
+	bar = pci_iomap(pdev, 0, 0);
+	if (!bar)
+		return -ENOTTY;
+
+	/* Get and check firmware capabilities */
+	val = ioread32be(bar + HINIC_VF_FLR_TYPE);
+	if (!(val & HINIC_VF_FLR_CAP_BIT)) {
+		pci_iounmap(pdev, bar);
+		return -ENOTTY;
+	}
+
+	/* Set HINIC_VF_FLR_PROC_BIT for the start of FLR */
+	val = ioread32be(bar + HINIC_VF_OP);
+	val = val | HINIC_VF_FLR_PROC_BIT;
+	iowrite32be(val, bar + HINIC_VF_OP);
+
+	pcie_flr(pdev);
+
+	/*
+	 * The device must recapture its Bus and Device Numbers after FLR
+	 * in order generate Completions.  Issue a config write to let the
+	 * device capture this information.
+	 */
+	pci_write_config_word(pdev, PCI_VENDOR_ID, 0);
+
+	/* Firmware clears HINIC_VF_FLR_PROC_BIT when reset is complete */
+	timeout = jiffies + msecs_to_jiffies(HINIC_OPERATION_TIMEOUT);
+	do {
+		val = ioread32be(bar + HINIC_VF_OP);
+		if (!(val & HINIC_VF_FLR_PROC_BIT))
+			goto reset_complete;
+		msleep(20);
+	} while (time_before(jiffies, timeout));
+
+	val = ioread32be(bar + HINIC_VF_OP);
+	if (!(val & HINIC_VF_FLR_PROC_BIT))
+		goto reset_complete;
+
+	pci_warn(pdev, "Reset dev timeout, FLR ack reg: %#010x\n", val);
+
+reset_complete:
+	pci_iounmap(pdev, bar);
+
+	return 0;
+}
+
 static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
 		 reset_intel_82599_sfp_virtfn },
@@ -3935,6 +3998,8 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
 	{ PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr },
 	{ PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
 		reset_chelsio_generic_dev },
+	{ PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF,
+		reset_hinic_vf_dev },
 	{ 0 }
 };
 
-- 
GitLab


From e8946a53e2a698c148b3b3ed732f43c7747fbeb6 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 2 Jun 2021 10:12:55 +0800
Subject: [PATCH 3334/3804] PCI: Mark AMD Navi14 GPU ATS as broken
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Observed unexpected GPU hang during runpm stress test on 0x7341 rev 0x00.
Further debugging shows broken ATS is related.

Disable ATS on this part.  Similar issues on other devices:

  a2da5d8cc0b0 ("PCI: Mark AMD Raven iGPU ATS as broken in some platforms")
  45beb31d3afb ("PCI: Mark AMD Navi10 GPU rev 0x00 ATS as broken")
  5e89cd303e3a ("PCI: Mark AMD Navi14 GPU rev 0xc5 ATS as broken")

Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://lore.kernel.org/r/20210602021255.939090-1-evan.quan@amd.com
Signed-off-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Krzysztof Wilczyński <kw@linux.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/quirks.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index dda0b10181624..877ce61619ca0 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -5241,7 +5241,8 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags);
 static void quirk_amd_harvest_no_ats(struct pci_dev *pdev)
 {
 	if ((pdev->device == 0x7312 && pdev->revision != 0x00) ||
-	    (pdev->device == 0x7340 && pdev->revision != 0xc5))
+	    (pdev->device == 0x7340 && pdev->revision != 0xc5) ||
+	    (pdev->device == 0x7341 && pdev->revision != 0x00))
 		return;
 
 	if (pdev->device == 0x15d8) {
@@ -5268,6 +5269,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats);
 /* AMD Navi14 dGPU */
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats);
 /* AMD Raven platform iGPU */
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats);
 #endif /* CONFIG_PCI_ATS */
-- 
GitLab


From db2f77e2bd99dbd2fb23ddde58f0fae392fe3338 Mon Sep 17 00:00:00 2001
From: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Date: Fri, 21 May 2021 21:13:17 -0400
Subject: [PATCH 3335/3804] PCI: Add ACS quirk for Broadcom BCM57414 NIC

The Broadcom BCM57414 NIC may be a multi-function device.  While it does
not advertise an ACS capability, peer-to-peer transactions are not possible
between the individual functions, so it is safe to treat them as fully
isolated.

Add an ACS quirk for this device so the functions can be in independent
IOMMU groups and attached individually to userspace applications using
VFIO.

[bhelgaas: commit log]
Link: https://lore.kernel.org/r/1621645997-16251-1-git-send-email-michael.chan@broadcom.com
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/quirks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 877ce61619ca0..22b2bb1109c9e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -4840,6 +4840,8 @@ static const struct pci_dev_acs_enabled {
 	{ PCI_VENDOR_ID_AMPERE, 0xE00A, pci_quirk_xgene_acs },
 	{ PCI_VENDOR_ID_AMPERE, 0xE00B, pci_quirk_xgene_acs },
 	{ PCI_VENDOR_ID_AMPERE, 0xE00C, pci_quirk_xgene_acs },
+	/* Broadcom multi-function device */
+	{ PCI_VENDOR_ID_BROADCOM, 0x16D7, pci_quirk_mf_endpoint_acs },
 	{ PCI_VENDOR_ID_BROADCOM, 0xD714, pci_quirk_brcm_acs },
 	/* Amazon Annapurna Labs */
 	{ PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031, pci_quirk_al_acs },
-- 
GitLab


From cacf994a91d3a55c0c2f853d6429cd7b86113915 Mon Sep 17 00:00:00 2001
From: Mikel Rychliski <mikel@mikelr.com>
Date: Fri, 11 Jun 2021 17:48:23 -0400
Subject: [PATCH 3336/3804] PCI: Add AMD RS690 quirk to enable 64-bit DMA

Although the AMD RS690 chipset has 64-bit DMA support, BIOS implementations
sometimes fail to configure the memory limit registers correctly.

The Acer F690GVM mainboard uses this chipset and a Marvell 88E8056 NIC. The
sky2 driver programs the NIC to use 64-bit DMA, which will not work:

  sky2 0000:02:00.0: error interrupt status=0x8
  sky2 0000:02:00.0 eth0: tx timeout
  sky2 0000:02:00.0 eth0: transmit ring 0 .. 22 report=0 done=0

Other drivers required by this mainboard either don't support 64-bit DMA,
or have it disabled using driver specific quirks. For example, the ahci
driver has quirks to enable or disable 64-bit DMA depending on the BIOS
version (see ahci_sb600_enable_64bit() in ahci.c). This ahci quirk matches
against the SB600 SATA controller, but the real issue is almost certainly
with the RS690 PCI host that it was commonly attached to.

To avoid this issue in all drivers with 64-bit DMA support, fix the
configuration of the PCI host. If the kernel is aware of physical memory
above 4GB, but the BIOS never configured the PCI host with this
information, update the registers with our values.

[bhelgaas: drop PCI_DEVICE_ID_ATI_RS690 definition]
Link: https://lore.kernel.org/r/20210611214823.4898-1-mikel@mikelr.com
Signed-off-by: Mikel Rychliski <mikel@mikelr.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/x86/pci/fixup.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 02dc64625e64d..2edd86649468f 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
 
+#define RS690_LOWER_TOP_OF_DRAM2	0x30
+#define RS690_LOWER_TOP_OF_DRAM2_VALID	0x1
+#define RS690_UPPER_TOP_OF_DRAM2	0x31
+#define RS690_HTIU_NB_INDEX		0xA8
+#define RS690_HTIU_NB_INDEX_WR_ENABLE	0x100
+#define RS690_HTIU_NB_DATA		0xAC
+
+/*
+ * Some BIOS implementations support RAM above 4GB, but do not configure the
+ * PCI host to respond to bus master accesses for these addresses. These
+ * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA
+ * works as expected for addresses below 4GB.
+ *
+ * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57)
+ * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf
+ */
+static void rs690_fix_64bit_dma(struct pci_dev *pdev)
+{
+	u32 val = 0;
+	phys_addr_t top_of_dram = __pa(high_memory - 1) + 1;
+
+	if (top_of_dram <= (1ULL << 32))
+		return;
+
+	pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+				RS690_LOWER_TOP_OF_DRAM2);
+	pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val);
+
+	if (val)
+		return;
+
+	pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram);
+
+	pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+		RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+	pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32);
+
+	pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+		RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+	pci_write_config_dword(pdev, RS690_HTIU_NB_DATA,
+		top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+
 #endif
-- 
GitLab


From f18139966d072dab8e4398c95ce955a9742e04f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Tue, 8 Jun 2021 22:36:55 +0200
Subject: [PATCH 3337/3804] PCI: aardvark: Fix kernel panic during PIO transfer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trying to start a new PIO transfer by writing value 0 in PIO_START register
when previous transfer has not yet completed (which is indicated by value 1
in PIO_START) causes an External Abort on CPU, which results in kernel
panic:

    SError Interrupt on CPU0, code 0xbf000002 -- SError
    Kernel panic - not syncing: Asynchronous SError Interrupt

To prevent kernel panic, it is required to reject a new PIO transfer when
previous one has not finished yet.

If previous PIO transfer is not finished yet, the kernel may issue a new
PIO request only if the previous PIO transfer timed out.

In the past the root cause of this issue was incorrectly identified (as it
often happens during link retraining or after link down event) and special
hack was implemented in Trusted Firmware to catch all SError events in EL3,
to ignore errors with code 0xbf000002 and not forwarding any other errors
to kernel and instead throw panic from EL3 Trusted Firmware handler.

Links to discussion and patches about this issue:
https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/commit/?id=3c7dcdac5c50
https://lore.kernel.org/linux-pci/20190316161243.29517-1-repk@triplefau.lt/
https://lore.kernel.org/linux-pci/971be151d24312cc533989a64bd454b4@www.loen.fr/
https://review.trustedfirmware.org/c/TF-A/trusted-firmware-a/+/1541

But the real cause was the fact that during link retraining or after link
down event the PIO transfer may take longer time, up to the 1.44s until it
times out. This increased probability that a new PIO transfer would be
issued by kernel while previous one has not finished yet.

After applying this change into the kernel, it is possible to revert the
mentioned TF-A hack and SError events do not have to be caught in TF-A EL3.

Link: https://lore.kernel.org/r/20210608203655.31228-1-pali@kernel.org
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Marek Behún <kabel@kernel.org>
Cc: stable@vger.kernel.org # 7fbcb5da811b ("PCI: aardvark: Don't rely on jiffies while holding spinlock")
---
 drivers/pci/controller/pci-aardvark.c | 49 ++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index 051b48bd7985d..e3f5e7ab76063 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -514,7 +514,7 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie)
 		udelay(PIO_RETRY_DELAY);
 	}
 
-	dev_err(dev, "config read/write timed out\n");
+	dev_err(dev, "PIO read/write transfer time out\n");
 	return -ETIMEDOUT;
 }
 
@@ -657,6 +657,35 @@ static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus,
 	return true;
 }
 
+static bool advk_pcie_pio_is_running(struct advk_pcie *pcie)
+{
+	struct device *dev = &pcie->pdev->dev;
+
+	/*
+	 * Trying to start a new PIO transfer when previous has not completed
+	 * cause External Abort on CPU which results in kernel panic:
+	 *
+	 *     SError Interrupt on CPU0, code 0xbf000002 -- SError
+	 *     Kernel panic - not syncing: Asynchronous SError Interrupt
+	 *
+	 * Functions advk_pcie_rd_conf() and advk_pcie_wr_conf() are protected
+	 * by raw_spin_lock_irqsave() at pci_lock_config() level to prevent
+	 * concurrent calls at the same time. But because PIO transfer may take
+	 * about 1.5s when link is down or card is disconnected, it means that
+	 * advk_pcie_wait_pio() does not always have to wait for completion.
+	 *
+	 * Some versions of ARM Trusted Firmware handles this External Abort at
+	 * EL3 level and mask it to prevent kernel panic. Relevant TF-A commit:
+	 * https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/commit/?id=3c7dcdac5c50
+	 */
+	if (advk_readl(pcie, PIO_START)) {
+		dev_err(dev, "Previous PIO read/write transfer is still running\n");
+		return true;
+	}
+
+	return false;
+}
+
 static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
 			     int where, int size, u32 *val)
 {
@@ -673,9 +702,10 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
 		return pci_bridge_emul_conf_read(&pcie->bridge, where,
 						 size, val);
 
-	/* Start PIO */
-	advk_writel(pcie, 0, PIO_START);
-	advk_writel(pcie, 1, PIO_ISR);
+	if (advk_pcie_pio_is_running(pcie)) {
+		*val = 0xffffffff;
+		return PCIBIOS_SET_FAILED;
+	}
 
 	/* Program the control register */
 	reg = advk_readl(pcie, PIO_CTRL);
@@ -694,7 +724,8 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
 	/* Program the data strobe */
 	advk_writel(pcie, 0xf, PIO_WR_DATA_STRB);
 
-	/* Start the transfer */
+	/* Clear PIO DONE ISR and start the transfer */
+	advk_writel(pcie, 1, PIO_ISR);
 	advk_writel(pcie, 1, PIO_START);
 
 	ret = advk_pcie_wait_pio(pcie);
@@ -734,9 +765,8 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
 	if (where % size)
 		return PCIBIOS_SET_FAILED;
 
-	/* Start PIO */
-	advk_writel(pcie, 0, PIO_START);
-	advk_writel(pcie, 1, PIO_ISR);
+	if (advk_pcie_pio_is_running(pcie))
+		return PCIBIOS_SET_FAILED;
 
 	/* Program the control register */
 	reg = advk_readl(pcie, PIO_CTRL);
@@ -763,7 +793,8 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
 	/* Program the data strobe */
 	advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB);
 
-	/* Start the transfer */
+	/* Clear PIO DONE ISR and start the transfer */
+	advk_writel(pcie, 1, PIO_ISR);
 	advk_writel(pcie, 1, PIO_START);
 
 	ret = advk_pcie_wait_pio(pcie);
-- 
GitLab


From 222a28edce38b62074a950fb243df621c602b4d3 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Jun 2021 15:58:08 -0700
Subject: [PATCH 3338/3804] docs: Makefile: Use CONFIG_SHELL not SHELL

Fix think-o about which variable to find the Kbuild-configured shell.
This has accidentally worked due to most shells setting $SHELL by
default.

Fixes: 51e46c7a4007 ("docs, parallelism: Rearrange how jobserver reservations are made")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210617225808.3907377-1-keescook@chromium.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 9c42dde97671f..c3feb657b6548 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -76,7 +76,7 @@ quiet_cmd_sphinx = SPHINX  $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
 	PYTHONDONTWRITEBYTECODE=1 \
 	BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \
 	$(PYTHON3) $(srctree)/scripts/jobserver-exec \
-	$(SHELL) $(srctree)/Documentation/sphinx/parallel-wrapper.sh \
+	$(CONFIG_SHELL) $(srctree)/Documentation/sphinx/parallel-wrapper.sh \
 	$(SPHINXBUILD) \
 	-b $2 \
 	-c $(abspath $(srctree)/$(src)) \
-- 
GitLab


From 993b892610d159dc16f6556dd0bf111ddc3ce0b9 Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:06 +0800
Subject: [PATCH 3339/3804] docs: path-lookup: update follow_managed() part

No follow_managed() anymore, handle_mounts(),
traverse_mounts(), will do the job.
see commit 9deed3ebca24 ("new helper: traverse_mounts()")

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-2-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index c482e1619e775..751082d469e80 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -448,10 +448,11 @@ described.  If it finds a ``LAST_NORM`` component it first calls
 filesystem to revalidate the result if it is that sort of filesystem.
 If that doesn't get a good result, it calls "``lookup_slow()``" which
 takes ``i_rwsem``, rechecks the cache, and then asks the filesystem
-to find a definitive answer.  Each of these will call
-``follow_managed()`` (as described below) to handle any mount points.
+to find a definitive answer.
 
-In the absence of symbolic links, ``walk_component()`` creates a new
+As the last step of ``walk_component()``, ``step_into()`` will be called either
+directly from walk_component() or from handle_dots().  It calls
+``handle_mounts()``, to check and handle mount points, in which a new
 ``struct path`` containing a counted reference to the new dentry and a
 reference to the new ``vfsmount`` which is only counted if it is
 different from the previous ``vfsmount``.  It then calls
@@ -535,8 +536,7 @@ covered in greater detail in autofs.txt in the Linux documentation
 tree, but a few notes specifically related to path lookup are in order
 here.
 
-The Linux VFS has a concept of "managed" dentries which is reflected
-in function names such as "``follow_managed()``".  There are three
+The Linux VFS has a concept of "managed" dentries.  There are three
 potentially interesting things about these dentries corresponding
 to three different flags that might be set in ``dentry->d_flags``:
 
-- 
GitLab


From 084c86837a3583c7cf56d74f91fb8e6191f99a8a Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:07 +0800
Subject: [PATCH 3340/3804] docs: path-lookup: update path_to_nameidata() part

No path_to_namei() anymore, step_into() will be called.
Related commit: commit c99687a03a78 ("fold path_to_nameidata()
into its only remaining caller")

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-3-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 751082d469e80..6ea0880fb982d 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -453,11 +453,12 @@ to find a definitive answer.
 As the last step of ``walk_component()``, ``step_into()`` will be called either
 directly from walk_component() or from handle_dots().  It calls
 ``handle_mounts()``, to check and handle mount points, in which a new
-``struct path`` containing a counted reference to the new dentry and a
-reference to the new ``vfsmount`` which is only counted if it is
-different from the previous ``vfsmount``.  It then calls
-``path_to_nameidata()`` to install the new ``struct path`` in the
-``struct nameidata`` and drop the unneeded references.
+``struct path`` is created containing a counted reference to the new dentry and
+a reference to the new ``vfsmount`` which is only counted if it is
+different from the previous ``vfsmount``. Then if there is
+a symbolic link, ``step_into()`` calls ``pick_link()`` to deal with it,
+otherwise it installs the new ``struct path`` in the ``struct nameidata``, and
+drops the unneeded references.
 
 This "hand-over-hand" sequencing of getting a reference to the new
 dentry before dropping the reference to the previous dentry may
-- 
GitLab


From 8593d2cc8c2f09164d674b2318661ede00dd4d0e Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:08 +0800
Subject: [PATCH 3341/3804] docs: path-lookup: update path_mountpoint() part

path_mountpoint() doesn't exist anymore. Have been folded
into path_lookup_at when flag is set with LOOKUP_MOUNTPOINT.
Check commit: commit 161aff1d93abf0e ("LOOKUP_MOUNTPOINT: fold
path_mountpointat() into path_lookupat()")

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-4-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 6ea0880fb982d..652d3284f1784 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -472,7 +472,7 @@ Handling the final component
 ``nd->last_type`` to refer to the final component of the path.  It does
 not call ``walk_component()`` that last time.  Handling that final
 component remains for the caller to sort out. Those callers are
-``path_lookupat()``, ``path_parentat()``, ``path_mountpoint()`` and
+``path_lookupat()``, ``path_parentat()`` and
 ``path_openat()`` each of which handles the differing requirements of
 different system calls.
 
@@ -488,12 +488,10 @@ perform their operation.
 object is wanted such as by ``stat()`` or ``chmod()``.  It essentially just
 calls ``walk_component()`` on the final component through a call to
 ``lookup_last()``.  ``path_lookupat()`` returns just the final dentry.
-
-``path_mountpoint()`` handles the special case of unmounting which must
-not try to revalidate the mounted filesystem.  It effectively
-contains, through a call to ``mountpoint_last()``, an alternate
-implementation of ``lookup_slow()`` which skips that step.  This is
-important when unmounting a filesystem that is inaccessible, such as
+It is worth noting that when flag ``LOOKUP_MOUNTPOINT`` is set,
+``path_lookupat()`` will unset LOOKUP_JUMPED in nameidata so that in the
+subsequent path traversal ``d_weak_revalidate()`` won't be called.
+This is important when unmounting a filesystem that is inaccessible, such as
 one provided by a dead NFS server.
 
 Finally ``path_openat()`` is used for the ``open()`` system call; it
-- 
GitLab


From 71e0a67dc6c26018e27fe0c670e2db023aa72d22 Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:09 +0800
Subject: [PATCH 3342/3804] docs: path-lookup: update do_last() part

traling_symlink() was merged into lookup_last, do_last().

do_last() has later been split into open_last_lookups()
and do_open().

see related commit: commit c5971b8c6354 ("take post-lookup
part of do_last() out of loop")

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-5-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 47 +++++++++++------------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 652d3284f1784..2b0b33168067e 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -495,11 +495,11 @@ This is important when unmounting a filesystem that is inaccessible, such as
 one provided by a dead NFS server.
 
 Finally ``path_openat()`` is used for the ``open()`` system call; it
-contains, in support functions starting with "``do_last()``", all the
+contains, in support functions starting with "``open_last_lookups()``", all the
 complexity needed to handle the different subtleties of O_CREAT (with
 or without O_EXCL), final "``/``" characters, and trailing symbolic
 links.  We will revisit this in the final part of this series, which
-focuses on those symbolic links.  "``do_last()``" will sometimes, but
+focuses on those symbolic links.  "``open_last_lookups()``" will sometimes, but
 not always, take ``i_rwsem``, depending on what it finds.
 
 Each of these, or the functions which call them, need to be alert to
@@ -1196,29 +1196,26 @@ potentially need to call ``link_path_walk()`` again and again on
 successive symlinks until one is found that doesn't point to another
 symlink.
 
-This case is handled by the relevant caller of ``link_path_walk()``, such as
-``path_lookupat()`` using a loop that calls ``link_path_walk()``, and then
-handles the final component.  If the final component is a symlink
-that needs to be followed, then ``trailing_symlink()`` is called to set
-things up properly and the loop repeats, calling ``link_path_walk()``
-again.  This could loop as many as 40 times if the last component of
-each symlink is another symlink.
-
-The various functions that examine the final component and possibly
-report that it is a symlink are ``lookup_last()``, ``mountpoint_last()``
-and ``do_last()``, each of which use the same convention as
-``walk_component()`` of returning ``1`` if a symlink was found that needs
-to be followed.
-
-Of these, ``do_last()`` is the most interesting as it is used for
-opening a file.  Part of ``do_last()`` runs with ``i_rwsem`` held and this
-part is in a separate function: ``lookup_open()``.
-
-Explaining ``do_last()`` completely is beyond the scope of this article,
-but a few highlights should help those interested in exploring the
-code.
-
-1. Rather than just finding the target file, ``do_last()`` needs to open
+This case is handled by relevant callers of ``link_path_walk()``, such as
+``path_lookupat()``, ``path_openat()`` using a loop that calls ``link_path_walk()``,
+and then handles the final component by calling ``open_last_lookups()`` or
+``lookup_last()``. If it is a symlink that needs to be followed,
+``open_last_lookups()`` or ``lookup_last()`` will set things up properly and
+return the path so that the loop repeats, calling
+``link_path_walk()`` again.  This could loop as many as 40 times if the last
+component of each symlink is another symlink.
+
+Of the various functions that examine the final component, 
+``open_last_lookups()`` is the most interesting as it works in tandem
+with ``do_open()`` for opening a file.  Part of ``open_last_lookups()`` runs
+with ``i_rwsem`` held and this part is in a separate function: ``lookup_open()``.
+
+Explaining ``open_last_lookups()`` and ``do_open()`` completely is beyond the scope
+of this article, but a few highlights should help those interested in exploring
+the code.
+
+1. Rather than just finding the target file, ``do_open()`` is used after
+   ``open_last_lookup()`` to open
    it.  If the file was found in the dcache, then ``vfs_open()`` is used for
    this.  If not, then ``lookup_open()`` will either call ``atomic_open()`` (if
    the filesystem provides it) to combine the final lookup with the open, or
-- 
GitLab


From 34ef75ef25c6fdea899acdb0a466f8ed0c365644 Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:10 +0800
Subject: [PATCH 3343/3804] docs: path-lookup: remove filename_mountpoint

No filename_mountpoint any more
see commit: commit 161aff1d93ab ("LOOKUP_MOUNTPOINT:
fold path_mountpointat() into path_lookupat()")

Without filename_mountpoint and path_mountpoint(), the
numbers should be four & three:

"These four correspond roughly to the three path_*() functions"

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-6-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 2b0b33168067e..3cbaf30b0f835 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -652,9 +652,9 @@ restarts from the top with REF-walk.
 
 This pattern of "try RCU-walk, if that fails try REF-walk" can be
 clearly seen in functions like ``filename_lookup()``,
-``filename_parentat()``, ``filename_mountpoint()``,
-``do_filp_open()``, and ``do_file_open_root()``.  These five
-correspond roughly to the four ``path_*()`` functions we met earlier,
+``filename_parentat()``,
+``do_filp_open()``, and ``do_file_open_root()``.  These four
+correspond roughly to the three ``path_*()`` functions we met earlier,
 each of which calls ``link_path_walk()``.  The ``path_*()`` functions are
 called using different mode flags until a mode is found which works.
 They are first called with ``LOOKUP_RCU`` set to request "RCU-walk".  If
-- 
GitLab


From d2d3dd5ecce11ba560ff024e63ddb1640b7b27b0 Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:11 +0800
Subject: [PATCH 3344/3804] docs: path-lookup: Add macro name to symlink limit
 description

Add macro name MAXSYMLINKS to the symlink limit description, so
that it is consistent with path name length description above.

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-7-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 3cbaf30b0f835..40b9afec4d604 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -992,8 +992,8 @@ is 4096.  There are a number of reasons for this limit; not letting the
 kernel spend too much time on just one path is one of them.  With
 symbolic links you can effectively generate much longer paths so some
 sort of limit is needed for the same reason.  Linux imposes a limit of
-at most 40 symlinks in any one path lookup.  It previously imposed a
-further limit of eight on the maximum depth of recursion, but that was
+at most 40 (MAXSYMLINKS) symlinks in any one path lookup.  It previously imposed
+a further limit of eight on the maximum depth of recursion, but that was
 raised to 40 when a separate stack was implemented, so there is now
 just the one limit.
 
-- 
GitLab


From 4a00e4bd59bbd5eac26f1792eb8d7d60f6cafe9a Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:12 +0800
Subject: [PATCH 3345/3804] docs: path-lookup: i_op->follow_link replaced with
 i_op->get_link

follow_link has been replaced by get_link() which can be
called in RCU mode.

see commit: commit 6b2553918d8b ("replace ->follow_link() with
new method that could stay in RCU mode")

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-8-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 40b9afec4d604..4650c64279638 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1060,13 +1060,11 @@ filesystem cannot successfully get a reference in RCU-walk mode, it
 must return ``-ECHILD`` and ``unlazy_walk()`` will be called to return to
 REF-walk mode in which the filesystem is allowed to sleep.
 
-The place for all this to happen is the ``i_op->follow_link()`` inode
-method.  In the present mainline code this is never actually called in
-RCU-walk mode as the rewrite is not quite complete.  It is likely that
-in a future release this method will be passed an ``inode`` pointer when
-called in RCU-walk mode so it both (1) knows to be careful, and (2) has the
-validated pointer.  Much like the ``i_op->permission()`` method we
-looked at previously, ``->follow_link()`` would need to be careful that
+The place for all this to happen is the ``i_op->get_link()`` inode
+method. This is called both in RCU-walk and REF-walk. In RCU-walk the
+``dentry*`` argument is NULL, ``->get_link()`` can return -ECHILD to drop out of
+RCU-walk.  Much like the ``i_op->permission()`` method we
+looked at previously, ``->get_link()`` would need to be careful that
 all the data structures it references are safe to be accessed while
 holding no counted reference, only the RCU lock.  Though getting a
 reference with ``->follow_link()`` is not yet done in RCU-walk mode, the
-- 
GitLab


From 671f73356f6a2aa2fb1bb71f8fdeeba858b6fec6 Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:13 +0800
Subject: [PATCH 3346/3804] docs: path-lookup: update i_op->put_link and cookie
 description

No inode->put_link operation anymore. We use delayed_call to
deal with link destruction. Cookie has been replaced with
struct delayed_call.

Related commit: commit fceef393a538 ("switch ->get_link() to
delayed_call, kill ->put_link()")

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-9-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 30 ++++++-----------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 4650c64279638..3855809784cf5 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1066,34 +1066,20 @@ method. This is called both in RCU-walk and REF-walk. In RCU-walk the
 RCU-walk.  Much like the ``i_op->permission()`` method we
 looked at previously, ``->get_link()`` would need to be careful that
 all the data structures it references are safe to be accessed while
-holding no counted reference, only the RCU lock.  Though getting a
-reference with ``->follow_link()`` is not yet done in RCU-walk mode, the
-code is ready to release the reference when that does happen.
-
-This need to drop the reference to a symlink adds significant
-complexity.  It requires a reference to the inode so that the
-``i_op->put_link()`` inode operation can be called.  In REF-walk, that
-reference is kept implicitly through a reference to the dentry, so
-keeping the ``struct path`` of the symlink is easiest.  For RCU-walk,
-the pointer to the inode is kept separately.  To allow switching from
-RCU-walk back to REF-walk in the middle of processing nested symlinks
-we also need the seq number for the dentry so we can confirm that
-switching back was safe.
-
-Finally, when providing a reference to a symlink, the filesystem also
-provides an opaque "cookie" that must be passed to ``->put_link()`` so that it
-knows what to free.  This might be the allocated memory area, or a
-pointer to the ``struct page`` in the page cache, or something else
-completely.  Only the filesystem knows what it is.
+holding no counted reference, only the RCU lock. A callback
+``struct delayed_called`` will be passed to ``->get_link()``:
+file systems can set their own put_link function and argument through
+``set_delayed_call()``. Later on, when VFS wants to put link, it will call
+``do_delayed_call()`` to invoke that callback function with the argument.
 
 In order for the reference to each symlink to be dropped when the walk completes,
 whether in RCU-walk or REF-walk, the symlink stack needs to contain,
 along with the path remnants:
 
-- the ``struct path`` to provide a reference to the inode in REF-walk
-- the ``struct inode *`` to provide a reference to the inode in RCU-walk
+- the ``struct path`` to provide a reference to the previous path
+- the ``const char *`` to provide a reference to the to previous name
 - the ``seq`` to allow the path to be safely switched from RCU-walk to REF-walk
-- the ``cookie`` that tells ``->put_path()`` what to put.
+- the ``struct delayed_call`` for later invocation.
 
 This means that each entry in the symlink stack needs to hold five
 pointers and an integer instead of just one pointer (the path
-- 
GitLab


From 18edb95a88a947b10536be4dc86b4a190715f816 Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:14 +0800
Subject: [PATCH 3347/3804] docs: path-lookup: no get_link()

no get_link() anymore. we have step_into() and pick_link().

walk_component() will call step_into(), in turn call pick_link,
and return symlink name.

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-10-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 3855809784cf5..0a125673a8fed 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1103,12 +1103,10 @@ doesn't need to notice.  Getting this ``name`` variable on and off the
 stack is very straightforward; pushing and popping the references is
 a little more complex.
 
-When a symlink is found, ``walk_component()`` returns the value ``1``
-(``0`` is returned for any other sort of success, and a negative number
-is, as usual, an error indicator).  This causes ``get_link()`` to be
-called; it then gets the link from the filesystem.  Providing that
-operation is successful, the old path ``name`` is placed on the stack,
-and the new value is used as the ``name`` for a while.  When the end of
+When a symlink is found, ``walk_component()`` calls ``pick_link()`` via ``step_into()``
+which returns the link from the filesystem.
+Providing that operation is successful, the old path ``name`` is placed on the
+stack, and the new value is used as the ``name`` for a while.  When the end of
 the path is found (i.e. ``*name`` is ``'\0'``) the old ``name`` is restored
 off the stack and path walking continues.
 
-- 
GitLab


From de9414adafe4da174212909e054222948aa620fc Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:15 +0800
Subject: [PATCH 3348/3804] docs: path-lookup: update WALK_GET, WALK_PUT desc

WALK_GET is changed to WALK_TRAILING with a different meaning.
Here it should be WALK_NOFOLLOW. WALK_PUT dosn't exist, we have
WALK_MORE.

WALK_PUT == !WALK_MORE

And there is not should_follow_link().

Related commits:
commit 8c4efe22e7c4 ("namei: invert the meaning of WALK_FOLLOW")
commit 1c4ff1a87e46 ("namei: invert WALK_PUT logics")

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
[jc: applied language tweaks suggested by Neil]
Link: https://lore.kernel.org/r/20210527091618.287093-11-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 0a125673a8fed..1102252cbc7a8 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1123,13 +1123,13 @@ stack in ``walk_component()`` immediately when the symlink is found;
 old symlink as it walks that last component.  So it is quite
 convenient for ``walk_component()`` to release the old symlink and pop
 the references just before pushing the reference information for the
-new symlink.  It is guided in this by two flags; ``WALK_GET``, which
-gives it permission to follow a symlink if it finds one, and
-``WALK_PUT``, which tells it to release the current symlink after it has been
-followed.  ``WALK_PUT`` is tested first, leading to a call to
-``put_link()``.  ``WALK_GET`` is tested subsequently (by
-``should_follow_link()``) leading to a call to ``pick_link()`` which sets
-up the stack frame.
+new symlink.  It is guided in this by three flags: ``WALK_NOFOLLOW`` which
+forbids it from following a symlink if it finds one, ``WALK_MORE``
+which indicates that it is yet too early to release the
+current symlink, and ``WALK_TRAILING`` which indicates that it is on the final
+component of the lookup, so we will check userspace flag ``LOOKUP_FOLLOW`` to
+decide whether follow it when it is a symlink and call ``may_follow_link()`` to
+check if we have privilege to follow it.
 
 Symlinks with no final component
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- 
GitLab


From 3c1be84b8d82959a6b7fedb598b8781fa1d09421 Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:16 +0800
Subject: [PATCH 3349/3804] docs: path-lookup: update get_link() ->follow_link
 description

get_link() is merged into pick_link(). i_op->follow_link is
replaced with i_op->get_link(). get_link() can return ERR_PTR(0)
which equals NULL.

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-12-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index 1102252cbc7a8..c150f076abbf4 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1136,10 +1136,10 @@ Symlinks with no final component
 
 A pair of special-case symlinks deserve a little further explanation.
 Both result in a new ``struct path`` (with mount and dentry) being set
-up in the ``nameidata``, and result in ``get_link()`` returning ``NULL``.
+up in the ``nameidata``, and result in ``pick_link()`` returning ``NULL``.
 
 The more obvious case is a symlink to "``/``".  All symlinks starting
-with "``/``" are detected in ``get_link()`` which resets the ``nameidata``
+with "``/``" are detected in ``pick_link()`` which resets the ``nameidata``
 to point to the effective filesystem root.  If the symlink only
 contains "``/``" then there is nothing more to do, no components at all,
 so ``NULL`` is returned to indicate that the symlink can be released and
@@ -1156,12 +1156,11 @@ something that looks like a symlink.  It is really a reference to the
 target file, not just the name of it.  When you ``readlink`` these
 objects you get a name that might refer to the same file - unless it
 has been unlinked or mounted over.  When ``walk_component()`` follows
-one of these, the ``->follow_link()`` method in "procfs" doesn't return
+one of these, the ``->get_link()`` method in "procfs" doesn't return
 a string name, but instead calls ``nd_jump_link()`` which updates the
-``nameidata`` in place to point to that target.  ``->follow_link()`` then
-returns ``NULL``.  Again there is no final component and ``get_link()``
-reports this by leaving the ``last_type`` field of ``nameidata`` as
-``LAST_BIND``.
+``nameidata`` in place to point to that target.  ``->get_link()`` then
+returns ``NULL``.  Again there is no final component and ``pick_link()``
+returns ``NULL``.
 
 Following the symlink in the final component
 --------------------------------------------
-- 
GitLab


From ef4aa53f36a932e656a3b91cdc8a9a9dcb9cef81 Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:17 +0800
Subject: [PATCH 3350/3804] docs: path-lookup: update symlink description

instead of lookup_real()/vfs_create(), i_op->lookup() and
i_op->create() will be called directly.

update vfs_open() logic

should_follow_link is merged into lookup_last() or open_last_lookup()
which returns symlink name instead of an integer.

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-13-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index c150f076abbf4..b746e974393a4 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -1200,16 +1200,15 @@ the code.
    it.  If the file was found in the dcache, then ``vfs_open()`` is used for
    this.  If not, then ``lookup_open()`` will either call ``atomic_open()`` (if
    the filesystem provides it) to combine the final lookup with the open, or
-   will perform the separate ``lookup_real()`` and ``vfs_create()`` steps
+   will perform the separate ``i_op->lookup()`` and ``i_op->create()`` steps
    directly.  In the later case the actual "open" of this newly found or
    created file will be performed by ``vfs_open()``, just as if the name
    were found in the dcache.
 
 2. ``vfs_open()`` can fail with ``-EOPENSTALE`` if the cached information
-   wasn't quite current enough.  Rather than restarting the lookup from
-   the top with ``LOOKUP_REVAL`` set, ``lookup_open()`` is called instead,
-   giving the filesystem a chance to resolve small inconsistencies.
-   If that doesn't work, only then is the lookup restarted from the top.
+   wasn't quite current enough.  If it's in RCU-walk ``-ECHILD`` will be returned
+   otherwise ``-ESTALE`` is returned.  When ``-ESTALE`` is returned, the caller may
+   retry with ``LOOKUP_REVAL`` flag set.
 
 3. An open with O_CREAT **does** follow a symlink in the final component,
    unlike other creation system calls (like ``mkdir``).  So the sequence::
@@ -1219,8 +1218,8 @@ the code.
 
    will create a file called ``/tmp/bar``.  This is not permitted if
    ``O_EXCL`` is set but otherwise is handled for an O_CREAT open much
-   like for a non-creating open: ``should_follow_link()`` returns ``1``, and
-   so does ``do_last()`` so that ``trailing_symlink()`` gets called and the
+   like for a non-creating open: ``lookup_last()`` or ``open_last_lookup()``
+   returns a non ``NULL`` value, and ``link_path_walk()`` gets called and the
    open process continues on the symlink that was found.
 
 Updating the access time
-- 
GitLab


From 8943474a416c0d2eac2366c22be1458ad0ceb812 Mon Sep 17 00:00:00 2001
From: Fox Chen <foxhlchen@gmail.com>
Date: Thu, 27 May 2021 17:16:18 +0800
Subject: [PATCH 3351/3804] docs: path-lookup: use bare function() rather than
 literals

As suggested by Matthew Wilcox and Jonathan Corbet, drop ``...``
literals around function names of this patchset.

Signed-off-by: Fox Chen <foxhlchen@gmail.com>
Reviewed-by: NeilBrown <neilb@suse.de>
Link: https://lore.kernel.org/r/20210527091618.287093-14-foxhlchen@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/path-lookup.rst | 70 +++++++++++------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/Documentation/filesystems/path-lookup.rst b/Documentation/filesystems/path-lookup.rst
index b746e974393a4..a6fa7619b69ec 100644
--- a/Documentation/filesystems/path-lookup.rst
+++ b/Documentation/filesystems/path-lookup.rst
@@ -450,13 +450,13 @@ If that doesn't get a good result, it calls "``lookup_slow()``" which
 takes ``i_rwsem``, rechecks the cache, and then asks the filesystem
 to find a definitive answer.
 
-As the last step of ``walk_component()``, ``step_into()`` will be called either
+As the last step of walk_component(), step_into() will be called either
 directly from walk_component() or from handle_dots().  It calls
-``handle_mounts()``, to check and handle mount points, in which a new
+handle_mounts(), to check and handle mount points, in which a new
 ``struct path`` is created containing a counted reference to the new dentry and
 a reference to the new ``vfsmount`` which is only counted if it is
 different from the previous ``vfsmount``. Then if there is
-a symbolic link, ``step_into()`` calls ``pick_link()`` to deal with it,
+a symbolic link, step_into() calls pick_link() to deal with it,
 otherwise it installs the new ``struct path`` in the ``struct nameidata``, and
 drops the unneeded references.
 
@@ -472,8 +472,8 @@ Handling the final component
 ``nd->last_type`` to refer to the final component of the path.  It does
 not call ``walk_component()`` that last time.  Handling that final
 component remains for the caller to sort out. Those callers are
-``path_lookupat()``, ``path_parentat()`` and
-``path_openat()`` each of which handles the differing requirements of
+path_lookupat(), path_parentat() and
+path_openat() each of which handles the differing requirements of
 different system calls.
 
 ``path_parentat()`` is clearly the simplest - it just wraps a little bit
@@ -489,17 +489,17 @@ object is wanted such as by ``stat()`` or ``chmod()``.  It essentially just
 calls ``walk_component()`` on the final component through a call to
 ``lookup_last()``.  ``path_lookupat()`` returns just the final dentry.
 It is worth noting that when flag ``LOOKUP_MOUNTPOINT`` is set,
-``path_lookupat()`` will unset LOOKUP_JUMPED in nameidata so that in the
-subsequent path traversal ``d_weak_revalidate()`` won't be called.
+path_lookupat() will unset LOOKUP_JUMPED in nameidata so that in the
+subsequent path traversal d_weak_revalidate() won't be called.
 This is important when unmounting a filesystem that is inaccessible, such as
 one provided by a dead NFS server.
 
 Finally ``path_openat()`` is used for the ``open()`` system call; it
-contains, in support functions starting with "``open_last_lookups()``", all the
+contains, in support functions starting with "open_last_lookups()", all the
 complexity needed to handle the different subtleties of O_CREAT (with
 or without O_EXCL), final "``/``" characters, and trailing symbolic
 links.  We will revisit this in the final part of this series, which
-focuses on those symbolic links.  "``open_last_lookups()``" will sometimes, but
+focuses on those symbolic links.  "open_last_lookups()" will sometimes, but
 not always, take ``i_rwsem``, depending on what it finds.
 
 Each of these, or the functions which call them, need to be alert to
@@ -651,9 +651,9 @@ RCU-walk finds it cannot stop gracefully, it simply gives up and
 restarts from the top with REF-walk.
 
 This pattern of "try RCU-walk, if that fails try REF-walk" can be
-clearly seen in functions like ``filename_lookup()``,
-``filename_parentat()``,
-``do_filp_open()``, and ``do_file_open_root()``.  These four
+clearly seen in functions like filename_lookup(),
+filename_parentat(),
+do_filp_open(), and do_file_open_root().  These four
 correspond roughly to the three ``path_*()`` functions we met earlier,
 each of which calls ``link_path_walk()``.  The ``path_*()`` functions are
 called using different mode flags until a mode is found which works.
@@ -1069,8 +1069,8 @@ all the data structures it references are safe to be accessed while
 holding no counted reference, only the RCU lock. A callback
 ``struct delayed_called`` will be passed to ``->get_link()``:
 file systems can set their own put_link function and argument through
-``set_delayed_call()``. Later on, when VFS wants to put link, it will call
-``do_delayed_call()`` to invoke that callback function with the argument.
+set_delayed_call(). Later on, when VFS wants to put link, it will call
+do_delayed_call() to invoke that callback function with the argument.
 
 In order for the reference to each symlink to be dropped when the walk completes,
 whether in RCU-walk or REF-walk, the symlink stack needs to contain,
@@ -1103,7 +1103,7 @@ doesn't need to notice.  Getting this ``name`` variable on and off the
 stack is very straightforward; pushing and popping the references is
 a little more complex.
 
-When a symlink is found, ``walk_component()`` calls ``pick_link()`` via ``step_into()``
+When a symlink is found, walk_component() calls pick_link() via step_into()
 which returns the link from the filesystem.
 Providing that operation is successful, the old path ``name`` is placed on the
 stack, and the new value is used as the ``name`` for a while.  When the end of
@@ -1136,10 +1136,10 @@ Symlinks with no final component
 
 A pair of special-case symlinks deserve a little further explanation.
 Both result in a new ``struct path`` (with mount and dentry) being set
-up in the ``nameidata``, and result in ``pick_link()`` returning ``NULL``.
+up in the ``nameidata``, and result in pick_link() returning ``NULL``.
 
 The more obvious case is a symlink to "``/``".  All symlinks starting
-with "``/``" are detected in ``pick_link()`` which resets the ``nameidata``
+with "``/``" are detected in pick_link() which resets the ``nameidata``
 to point to the effective filesystem root.  If the symlink only
 contains "``/``" then there is nothing more to do, no components at all,
 so ``NULL`` is returned to indicate that the symlink can be released and
@@ -1157,9 +1157,9 @@ target file, not just the name of it.  When you ``readlink`` these
 objects you get a name that might refer to the same file - unless it
 has been unlinked or mounted over.  When ``walk_component()`` follows
 one of these, the ``->get_link()`` method in "procfs" doesn't return
-a string name, but instead calls ``nd_jump_link()`` which updates the
+a string name, but instead calls nd_jump_link() which updates the
 ``nameidata`` in place to point to that target.  ``->get_link()`` then
-returns ``NULL``.  Again there is no final component and ``pick_link()``
+returns ``NULL``.  Again there is no final component and pick_link()
 returns ``NULL``.
 
 Following the symlink in the final component
@@ -1177,35 +1177,35 @@ potentially need to call ``link_path_walk()`` again and again on
 successive symlinks until one is found that doesn't point to another
 symlink.
 
-This case is handled by relevant callers of ``link_path_walk()``, such as
-``path_lookupat()``, ``path_openat()`` using a loop that calls ``link_path_walk()``,
-and then handles the final component by calling ``open_last_lookups()`` or
-``lookup_last()``. If it is a symlink that needs to be followed,
-``open_last_lookups()`` or ``lookup_last()`` will set things up properly and
+This case is handled by relevant callers of link_path_walk(), such as
+path_lookupat(), path_openat() using a loop that calls link_path_walk(),
+and then handles the final component by calling open_last_lookups() or
+lookup_last(). If it is a symlink that needs to be followed,
+open_last_lookups() or lookup_last() will set things up properly and
 return the path so that the loop repeats, calling
-``link_path_walk()`` again.  This could loop as many as 40 times if the last
+link_path_walk() again.  This could loop as many as 40 times if the last
 component of each symlink is another symlink.
 
 Of the various functions that examine the final component, 
-``open_last_lookups()`` is the most interesting as it works in tandem
-with ``do_open()`` for opening a file.  Part of ``open_last_lookups()`` runs
-with ``i_rwsem`` held and this part is in a separate function: ``lookup_open()``.
+open_last_lookups() is the most interesting as it works in tandem
+with do_open() for opening a file.  Part of open_last_lookups() runs
+with ``i_rwsem`` held and this part is in a separate function: lookup_open().
 
-Explaining ``open_last_lookups()`` and ``do_open()`` completely is beyond the scope
+Explaining open_last_lookups() and do_open() completely is beyond the scope
 of this article, but a few highlights should help those interested in exploring
 the code.
 
-1. Rather than just finding the target file, ``do_open()`` is used after
-   ``open_last_lookup()`` to open
+1. Rather than just finding the target file, do_open() is used after
+   open_last_lookup() to open
    it.  If the file was found in the dcache, then ``vfs_open()`` is used for
    this.  If not, then ``lookup_open()`` will either call ``atomic_open()`` (if
    the filesystem provides it) to combine the final lookup with the open, or
    will perform the separate ``i_op->lookup()`` and ``i_op->create()`` steps
    directly.  In the later case the actual "open" of this newly found or
-   created file will be performed by ``vfs_open()``, just as if the name
+   created file will be performed by vfs_open(), just as if the name
    were found in the dcache.
 
-2. ``vfs_open()`` can fail with ``-EOPENSTALE`` if the cached information
+2. vfs_open() can fail with ``-EOPENSTALE`` if the cached information
    wasn't quite current enough.  If it's in RCU-walk ``-ECHILD`` will be returned
    otherwise ``-ESTALE`` is returned.  When ``-ESTALE`` is returned, the caller may
    retry with ``LOOKUP_REVAL`` flag set.
@@ -1218,8 +1218,8 @@ the code.
 
    will create a file called ``/tmp/bar``.  This is not permitted if
    ``O_EXCL`` is set but otherwise is handled for an O_CREAT open much
-   like for a non-creating open: ``lookup_last()`` or ``open_last_lookup()``
-   returns a non ``NULL`` value, and ``link_path_walk()`` gets called and the
+   like for a non-creating open: lookup_last() or open_last_lookup()
+   returns a non ``NULL`` value, and link_path_walk() gets called and the
    open process continues on the symlink that was found.
 
 Updating the access time
-- 
GitLab


From 28e5e44aa3f4e0e0370864ed008fb5e2d85f4dc8 Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@intel.com>
Date: Thu, 17 Jun 2021 12:46:57 -0700
Subject: [PATCH 3352/3804] x86/mm: Avoid truncating memblocks for SGX memory

tl;dr:

Several SGX users reported seeing the following message on NUMA systems:

  sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0.

This turned out to be the memblock code mistakenly throwing away SGX
memory.

=== Full Changelog ===

The 'max_pfn' variable represents the highest known RAM address.  It can
be used, for instance, to quickly determine for which physical addresses
there is mem_map[] space allocated.  The numa_meminfo code makes an
effort to throw out ("trim") all memory blocks which are above 'max_pfn'.

SGX memory is not considered RAM (it is marked as "Reserved" in the
e820) and is not taken into account by max_pfn. Despite this, SGX memory
areas have NUMA affinity and are enumerated in the ACPI SRAT table. The
existing SGX code uses the numa_meminfo mechanism to look up the NUMA
affinity for its memory areas.

In cases where SGX memory was above max_pfn (usually just the one EPC
section in the last highest NUMA node), the numa_memblock is truncated
at 'max_pfn', which is below the SGX memory.  When the SGX code tries to
look up the affinity of this memory, it fails and produces an error message:

  sgx: [Firmware Bug]: Unable to map EPC section to online node. Fallback to the NUMA node 0.

and assigns the memory to NUMA node 0.

Instead of silently truncating the memory block at 'max_pfn' and
dropping the SGX memory, add the truncated portion to
'numa_reserved_meminfo'.  This allows the SGX code to later determine
the NUMA affinity of its 'Reserved' area.

Before, numa_meminfo looked like this (from 'crash'):

  blk = { start =          0x0, end = 0x2080000000, nid = 0x0 }
        { start = 0x2080000000, end = 0x4000000000, nid = 0x1 }

numa_reserved_meminfo is empty.

With this, numa_meminfo looks like this:

  blk = { start =          0x0, end = 0x2080000000, nid = 0x0 }
        { start = 0x2080000000, end = 0x4000000000, nid = 0x1 }

and numa_reserved_meminfo has an entry for node 1's SGX memory:

  blk =  { start = 0x4000000000, end = 0x4080000000, nid = 0x1 }

 [ daveh: completely rewrote/reworked changelog ]

Fixes: 5d30f92e7631 ("x86/NUMA: Provide a range-to-target_node lookup facility")
Reported-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Fan Du <fan.du@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@intel.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20210617194657.0A99CB22@viggo.jf.intel.com
---
 arch/x86/mm/numa.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 5eb4dc2b97dac..e94da744386f3 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -254,7 +254,13 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
 
 		/* make sure all non-reserved blocks are inside the limits */
 		bi->start = max(bi->start, low);
-		bi->end = min(bi->end, high);
+
+		/* preserve info for non-RAM areas above 'max_pfn': */
+		if (bi->end > high) {
+			numa_add_memblk_to(bi->nid, high, bi->end,
+					   &numa_reserved_meminfo);
+			bi->end = high;
+		}
 
 		/* and there's no empty block */
 		if (bi->start >= bi->end)
-- 
GitLab


From 39eb028183bc7378bb6187067e20bf6d8c836407 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@denx.de>
Date: Fri, 18 Jun 2021 11:29:48 +0200
Subject: [PATCH 3353/3804] cxgb4: fix wrong shift.

While fixing coverity warning, commit dd2c79677375 introduced typo in
shift value. Fix that.

Signed-off-by: Pavel Machek (CIP) <pavel@denx.de>
Fixes: dd2c79677375 ("cxgb4: Fix unintentional sign extension issues")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 22c9ac922ebae..6260b3bebd2bd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -198,7 +198,7 @@ static void set_nat_params(struct adapter *adap, struct filter_entry *f,
 				      WORD_MASK, f->fs.nat_lip[3] |
 				      f->fs.nat_lip[2] << 8 |
 				      f->fs.nat_lip[1] << 16 |
-				      (u64)f->fs.nat_lip[0] << 25, 1);
+				      (u64)f->fs.nat_lip[0] << 24, 1);
 		}
 	}
 
-- 
GitLab


From 0afd6a4e8028cc487c240b6cfe04094e45a306e4 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 18 Jun 2021 02:07:25 -0400
Subject: [PATCH 3354/3804] bnxt_en: Rediscover PHY capabilities after firmware
 reset

There is a missing bnxt_probe_phy() call in bnxt_fw_init_one() to
rediscover the PHY capabilities after a firmware reset.  This can cause
some PHY related functionalities to fail after a firmware reset.  For
example, in multi-host, the ability for any host to configure the PHY
settings may be lost after a firmware reset.

Fixes: ec5d31e3c15d ("bnxt_en: Handle firmware reset status during IF_UP.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index fcc729d52b174..3685db6dc93d4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11750,6 +11750,8 @@ static void bnxt_fw_init_one_p3(struct bnxt *bp)
 	bnxt_hwrm_coal_params_qcaps(bp);
 }
 
+static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt);
+
 static int bnxt_fw_init_one(struct bnxt *bp)
 {
 	int rc;
@@ -11764,6 +11766,9 @@ static int bnxt_fw_init_one(struct bnxt *bp)
 		netdev_err(bp->dev, "Firmware init phase 2 failed\n");
 		return rc;
 	}
+	rc = bnxt_probe_phy(bp, false);
+	if (rc)
+		return rc;
 	rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
 	if (rc)
 		return rc;
-- 
GitLab


From c12e1643d2738bcd4e26252ce531878841dd3f38 Mon Sep 17 00:00:00 2001
From: Rukhsana Ansari <rukhsana.ansari@broadcom.com>
Date: Fri, 18 Jun 2021 02:07:26 -0400
Subject: [PATCH 3355/3804] bnxt_en: Fix TQM fastpath ring backing store
 computation

TQM fastpath ring needs to be sized to store both the requester
and responder side of RoCE QPs in TQM for supporting bi-directional
tests.  Fix bnxt_alloc_ctx_mem() to multiply the RoCE QPs by a factor of
2 when computing the number of entries for TQM fastpath ring.  This
fixes an RX pipeline stall issue when running bi-directional max
RoCE QP tests.

Fixes: c7dd7ab4b204 ("bnxt_en: Improve TQM ring context memory sizing formulas.")
Signed-off-by: Rukhsana Ansari <rukhsana.ansari@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 3685db6dc93d4..c913cb1f2a720 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7308,7 +7308,7 @@ skip_rdma:
 	entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries +
 		     2 * (extra_qps + ctx->qp_min_qp1_entries) + min;
 	entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple);
-	entries = ctx->qp_max_l2_entries + extra_qps + ctx->qp_min_qp1_entries;
+	entries = ctx->qp_max_l2_entries + 2 * (extra_qps + ctx->qp_min_qp1_entries);
 	entries = roundup(entries, ctx->tqm_entries_multiple);
 	entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring);
 	for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) {
-- 
GitLab


From 03400aaa69f916a376e11526cf591901a96a3a5c Mon Sep 17 00:00:00 2001
From: Somnath Kotur <somnath.kotur@broadcom.com>
Date: Fri, 18 Jun 2021 02:07:27 -0400
Subject: [PATCH 3356/3804] bnxt_en: Call bnxt_ethtool_free() in
 bnxt_init_one() error path

bnxt_ethtool_init() may have allocated some memory and we need to
call bnxt_ethtool_free() to properly unwind if bnxt_init_one()
fails.

Fixes: 7c3809181468 ("bnxt_en: Refactor bnxt_init_one() and turn on TPA support on 57500 chips.")
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c913cb1f2a720..aef3fccc27a97 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -13160,6 +13160,7 @@ init_err_pci_clean:
 	bnxt_hwrm_func_drv_unrgtr(bp);
 	bnxt_free_hwrm_short_cmd_req(bp);
 	bnxt_free_hwrm_resources(bp);
+	bnxt_ethtool_free(bp);
 	kfree(bp->fw_health);
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
-- 
GitLab


From 35036d69b9bd6f06201f8e2f6b9cadb21ad1e093 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 18 Jun 2021 09:00:30 +0200
Subject: [PATCH 3357/3804] MAINTAINERS: add Guvenc as SMC maintainer

Add Guvenc as maintainer for Shared Memory Communications (SMC)
Sockets.

Cc: Julian Wiedmann <jwi@linux.ibm.com>
Acked-by: Guvenc Gulce <guvenc@linux.ibm.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index bfb3d0931cbaa..1634bb68972f2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16554,6 +16554,7 @@ F:	drivers/misc/sgi-xp/
 
 SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
 M:	Karsten Graul <kgraul@linux.ibm.com>
+M:	Guvenc Gulce <guvenc@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
 S:	Supported
 W:	http://www.ibm.com/developerworks/linux/linux390/
-- 
GitLab


From 6aa32217a9a446275440ee8724b1ecaf1838df47 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Fri, 18 Jun 2021 12:52:23 +0200
Subject: [PATCH 3358/3804] net: ll_temac: Make sure to free skb when it is
 completely used

With the skb pointer piggy-backed on the TX BD, we have a simple and
efficient way to free the skb buffer when the frame has been transmitted.
But in order to avoid freeing the skb while there are still fragments from
the skb in use, we need to piggy-back on the TX BD of the skb, not the
first.

Without this, we are doing use-after-free on the DMA side, when the first
BD of a multi TX BD packet is seen as completed in xmit_done, and the
remaining BDs are still being processed.

Cc: stable@vger.kernel.org # v5.4+
Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/ll_temac_main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index a1f5f07f4ca97..e82f162cd80c6 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -876,7 +876,6 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		return NETDEV_TX_OK;
 	}
 	cur_p->phys = cpu_to_be32(skb_dma_addr);
-	ptr_to_txbd((void *)skb, cur_p);
 
 	for (ii = 0; ii < num_frag; ii++) {
 		if (++lp->tx_bd_tail >= lp->tx_bd_num)
@@ -915,6 +914,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	}
 	cur_p->app0 |= cpu_to_be32(STS_CTRL_APP0_EOP);
 
+	/* Mark last fragment with skb address, so it can be consumed
+	 * in temac_start_xmit_done()
+	 */
+	ptr_to_txbd((void *)skb, cur_p);
+
 	tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail;
 	lp->tx_bd_tail++;
 	if (lp->tx_bd_tail >= lp->tx_bd_num)
-- 
GitLab


From 28d9fab458b16bcd83f9dd07ede3d585c3e1a69e Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Fri, 18 Jun 2021 12:52:28 +0200
Subject: [PATCH 3359/3804] net: ll_temac: Add memory-barriers for TX BD access

Add a couple of memory-barriers to ensure correct ordering of read/write
access to TX BDs.

In xmit_done, we should ensure that reading the additional BD fields are
only done after STS_CTRL_APP0_CMPLT bit is set.

When xmit_done marks the BD as free by setting APP0=0, we need to ensure
that the other BD fields are reset first, so we avoid racing with the xmit
path, which writes to the same fields.

Finally, making sure to read APP0 of next BD after the current BD, ensures
that we see all available buffers.

Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/ll_temac_main.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index e82f162cd80c6..9797aa3221d10 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -774,12 +774,15 @@ static void temac_start_xmit_done(struct net_device *ndev)
 	stat = be32_to_cpu(cur_p->app0);
 
 	while (stat & STS_CTRL_APP0_CMPLT) {
+		/* Make sure that the other fields are read after bd is
+		 * released by dma
+		 */
+		rmb();
 		dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys),
 				 be32_to_cpu(cur_p->len), DMA_TO_DEVICE);
 		skb = (struct sk_buff *)ptr_from_txbd(cur_p);
 		if (skb)
 			dev_consume_skb_irq(skb);
-		cur_p->app0 = 0;
 		cur_p->app1 = 0;
 		cur_p->app2 = 0;
 		cur_p->app3 = 0;
@@ -788,6 +791,12 @@ static void temac_start_xmit_done(struct net_device *ndev)
 		ndev->stats.tx_packets++;
 		ndev->stats.tx_bytes += be32_to_cpu(cur_p->len);
 
+		/* app0 must be visible last, as it is used to flag
+		 * availability of the bd
+		 */
+		smp_mb();
+		cur_p->app0 = 0;
+
 		lp->tx_bd_ci++;
 		if (lp->tx_bd_ci >= lp->tx_bd_num)
 			lp->tx_bd_ci = 0;
@@ -814,6 +823,9 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag)
 		if (cur_p->app0)
 			return NETDEV_TX_BUSY;
 
+		/* Make sure to read next bd app0 after this one */
+		rmb();
+
 		tail++;
 		if (tail >= lp->tx_bd_num)
 			tail = 0;
-- 
GitLab


From c364df2489b8ef2f5e3159b1dff1ff1fdb16040d Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Fri, 18 Jun 2021 12:52:33 +0200
Subject: [PATCH 3360/3804] net: ll_temac: Fix TX BD buffer overwrite

Just as the initial check, we need to ensure num_frag+1 buffers available,
as that is the number of buffers we are going to use.

This fixes a buffer overflow, which might be seen during heavy network
load. Complete lockup of TEMAC was reproducible within about 10 minutes of
a particular load.

Fixes: 84823ff80f74 ("net: ll_temac: Fix race condition causing TX hang")
Cc: stable@vger.kernel.org # v5.4+
Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/ll_temac_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 9797aa3221d10..cc482ee36501e 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -861,7 +861,7 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 		smp_mb();
 
 		/* Space might have just been freed - check again */
-		if (temac_check_tx_bd_space(lp, num_frag))
+		if (temac_check_tx_bd_space(lp, num_frag + 1))
 			return NETDEV_TX_BUSY;
 
 		netif_wake_queue(ndev);
-- 
GitLab


From f6396341194234e9b01cd7538bc2c6ac4501ab14 Mon Sep 17 00:00:00 2001
From: Esben Haabendal <esben@geanix.com>
Date: Fri, 18 Jun 2021 12:52:38 +0200
Subject: [PATCH 3361/3804] net: ll_temac: Avoid ndo_start_xmit returning
 NETDEV_TX_BUSY

As documented in Documentation/networking/driver.rst, the ndo_start_xmit
method must not return NETDEV_TX_BUSY under any normal circumstances, and
as recommended, we simply stop the tx queue in advance, when there is a
risk that the next xmit would cause a NETDEV_TX_BUSY return.

Signed-off-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/ll_temac_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index cc482ee36501e..9a13953ea70fa 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -942,6 +942,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 	wmb();
 	lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */
 
+	if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) {
+		netdev_info(ndev, "%s -> netif_stop_queue\n", __func__);
+		netif_stop_queue(ndev);
+	}
+
 	return NETDEV_TX_OK;
 }
 
-- 
GitLab


From 321827477360934dc040e9d3c626bf1de6c3ab3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Fri, 18 Jun 2021 13:04:35 +0200
Subject: [PATCH 3362/3804] icmp: don't send out ICMP messages with a source
 address of 0.0.0.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When constructing ICMP response messages, the kernel will try to pick a
suitable source address for the outgoing packet. However, if no IPv4
addresses are configured on the system at all, this will fail and we end up
producing an ICMP message with a source address of 0.0.0.0. This can happen
on a box routing IPv4 traffic via v6 nexthops, for instance.

Since 0.0.0.0 is not generally routable on the internet, there's a good
chance that such ICMP messages will never make it back to the sender of the
original packet that the ICMP message was sent in response to. This, in
turn, can create connectivity and PMTUd problems for senders. Fortunately,
RFC7600 reserves a dummy address to be used as a source for ICMP
messages (192.0.0.8/32), so let's teach the kernel to substitute that
address as a last resort if the regular source address selection procedure
fails.

Below is a quick example reproducing this issue with network namespaces:

ip netns add ns0
ip l add type veth peer netns ns0
ip l set dev veth0 up
ip a add 10.0.0.1/24 dev veth0
ip a add fc00:dead:cafe:42::1/64 dev veth0
ip r add 10.1.0.0/24 via inet6 fc00:dead:cafe:42::2
ip -n ns0 l set dev veth0 up
ip -n ns0 a add fc00:dead:cafe:42::2/64 dev veth0
ip -n ns0 r add 10.0.0.0/24 via inet6 fc00:dead:cafe:42::1
ip netns exec ns0 sysctl -w net.ipv4.icmp_ratelimit=0
ip netns exec ns0 sysctl -w net.ipv4.ip_forward=1
tcpdump -tpni veth0 -c 2 icmp &
ping -w 1 10.1.0.1 > /dev/null
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on veth0, link-type EN10MB (Ethernet), snapshot length 262144 bytes
IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 29, seq 1, length 64
IP 0.0.0.0 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92
2 packets captured
2 packets received by filter
0 packets dropped by kernel

With this patch the above capture changes to:
IP 10.0.0.1 > 10.1.0.1: ICMP echo request, id 31127, seq 1, length 64
IP 192.0.0.8 > 10.0.0.1: ICMP net 10.1.0.1 unreachable, length 92

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Juliusz Chroboczek <jch@irif.fr>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/in.h | 3 +++
 net/ipv4/icmp.c         | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 7d6687618d808..d1b327036ae43 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -289,6 +289,9 @@ struct sockaddr_in {
 /* Address indicating an error return. */
 #define	INADDR_NONE		((unsigned long int) 0xffffffff)
 
+/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
+#define	INADDR_DUMMY		((unsigned long int) 0xc0000008)
+
 /* Network number for local host loopback. */
 #define	IN_LOOPBACKNET		127
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7b6931a4d7755..752e392083e64 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -759,6 +759,13 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
 		icmp_param.data_len = room;
 	icmp_param.head_len = sizeof(struct icmphdr);
 
+	/* if we don't have a source address at this point, fall back to the
+	 * dummy address instead of sending out a packet with a source address
+	 * of 0.0.0.0
+	 */
+	if (!fl4.saddr)
+		fl4.saddr = htonl(INADDR_DUMMY);
+
 	icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
 ende:
 	ip_rt_put(rt);
-- 
GitLab


From 7e9838b7915e29ae0dfe4a3e5f007c9dc6ab9b45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Fri, 18 Jun 2021 13:04:36 +0200
Subject: [PATCH 3363/3804] selftests/net: Add icmp.sh for testing ICMP dummy
 address responses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a new icmp.sh selftest for testing that the kernel will respond
correctly with an ICMP unreachable message with the dummy (192.0.0.8)
source address when there are no IPv4 addresses configured to use as source
addresses.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/icmp.sh | 74 +++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100755 tools/testing/selftests/net/icmp.sh

diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh
new file mode 100755
index 0000000000000..e4b04cd1644ad
--- /dev/null
+++ b/tools/testing/selftests/net/icmp.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for checking ICMP response with dummy address instead of 0.0.0.0.
+# Sets up two namespaces like:
+# +----------------------+                          +--------------------+
+# | ns1                  |    v4-via-v6 routes:     | ns2                |
+# |                      |                  '       |                    |
+# |             +--------+   -> 172.16.1.0/24 ->    +--------+           |
+# |             | veth0  +--------------------------+  veth0 |           |
+# |             +--------+   <- 172.16.0.0/24 <-    +--------+           |
+# |           172.16.0.1 |                          | 2001:db8:1::2/64   |
+# |     2001:db8:1::2/64 |                          |                    |
+# +----------------------+                          +--------------------+
+#
+# And then tries to ping 172.16.1.1 from ns1. This results in a "net
+# unreachable" message being sent from ns2, but there is no IPv4 address set in
+# that address space, so the kernel should substitute the dummy address
+# 192.0.0.8 defined in RFC7600.
+
+NS1=ns1
+NS2=ns2
+H1_IP=172.16.0.1/32
+H1_IP6=2001:db8:1::1
+RT1=172.16.1.0/24
+PINGADDR=172.16.1.1
+RT2=172.16.0.0/24
+H2_IP6=2001:db8:1::2
+
+TMPFILE=$(mktemp)
+
+cleanup()
+{
+    rm -f "$TMPFILE"
+    ip netns del $NS1
+    ip netns del $NS2
+}
+
+trap cleanup EXIT
+
+# Namespaces
+ip netns add $NS1
+ip netns add $NS2
+
+# Connectivity
+ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2
+ip -netns $NS1 link set dev veth0 up
+ip -netns $NS2 link set dev veth0 up
+ip -netns $NS1 addr add $H1_IP dev veth0
+ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad
+ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad
+ip -netns $NS1 route add $RT1 via inet6 $H2_IP6
+ip -netns $NS2 route add $RT2 via inet6 $H1_IP6
+
+# Make sure ns2 will respond with ICMP unreachable
+ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1
+
+# Run the test - a ping runs in the background, and we capture ICMP responses
+# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout
+# in case something goes wrong
+ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null &
+ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null
+
+# Parse response and check for dummy address
+# tcpdump output looks like:
+# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92
+RESP_IP=$(awk '{print $2}' < $TMPFILE)
+if [[ "$RESP_IP" != "192.0.0.8" ]]; then
+    echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
-- 
GitLab


From 9e25f01b5f529d397be2e3f595b0b54ae9e80c58 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 18 Jun 2021 16:46:01 +0300
Subject: [PATCH 3364/3804] hwmon: (pmbus/dps920ab) Delete some dead code

The debugfs_create_dir() function returns error pointers, it doesn't
return NULL.  But debugfs functions don't need to be checked in normal
situations and we can just delete this code.

Fixes: 1f442e213ce5 ("hwmon: (pmbus) Add driver for Delta DPS-920AB PSU")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/YMyjmR54ErLtc1sH@mwanda
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/dps920ab.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/hwmon/pmbus/dps920ab.c b/drivers/hwmon/pmbus/dps920ab.c
index bd2df2a3c8e30..d3941f6eb29a9 100644
--- a/drivers/hwmon/pmbus/dps920ab.c
+++ b/drivers/hwmon/pmbus/dps920ab.c
@@ -119,8 +119,6 @@ static void dps920ab_init_debugfs(struct dps920ab_data *data, struct i2c_client
 		return;
 
 	debugfs_dir = debugfs_create_dir(client->name, root);
-	if (!debugfs_dir)
-		return;
 
 	debugfs_create_file("mfr_id",
 			    0400,
-- 
GitLab


From 9cca0c2d70149160407bda9a9446ce0c29b6e6c6 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Fri, 18 Jun 2021 16:49:02 +0300
Subject: [PATCH 3365/3804] net: ethernet: fix potential use-after-free in
 ec_bhf_remove

static void ec_bhf_remove(struct pci_dev *dev)
{
...
	struct ec_bhf_priv *priv = netdev_priv(net_dev);

	unregister_netdev(net_dev);
	free_netdev(net_dev);

	pci_iounmap(dev, priv->dma_io);
	pci_iounmap(dev, priv->io);
...
}

priv is netdev private data, but it is used
after free_netdev(). It can cause use-after-free when accessing priv
pointer. So, fix it by moving free_netdev() after pci_iounmap()
calls.

Fixes: 6af55ff52b02 ("Driver for Beckhoff CX5020 EtherCAT master module.")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ec_bhf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index 46b0dbab8aadc..7c992172933bc 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -576,10 +576,12 @@ static void ec_bhf_remove(struct pci_dev *dev)
 	struct ec_bhf_priv *priv = netdev_priv(net_dev);
 
 	unregister_netdev(net_dev);
-	free_netdev(net_dev);
 
 	pci_iounmap(dev, priv->dma_io);
 	pci_iounmap(dev, priv->io);
+
+	free_netdev(net_dev);
+
 	pci_release_regions(dev);
 	pci_clear_master(dev);
 	pci_disable_device(dev);
-- 
GitLab


From 9620ad86d0e3e8fda4a23efc22e0b2ae4ded1105 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 16 Jun 2021 22:22:28 +0100
Subject: [PATCH 3366/3804] afs: Re-enable freezing once a page fault is
 interrupted

If a task is killed during a page fault, it does not currently call
sb_end_pagefault(), which means that the filesystem cannot be frozen
at any time thereafter.  This may be reported by lockdep like this:

====================================
WARNING: fsstress/10757 still has locks held!
5.13.0-rc4-build4+ #91 Not tainted
------------------------------------
1 lock held by fsstress/10757:
 #0: ffff888104eac530
 (
sb_pagefaults

as filesystem freezing is modelled as a lock.

Fix this by removing all the direct returns from within the function,
and using 'ret' to indicate whether we were interrupted or successful.

Fixes: 1cf7a1518aef ("afs: Implement shared-writeable mmap")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-afs@lists.infradead.org
Link: https://lore.kernel.org/r/20210616154900.1958373-1-willy@infradead.org/
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/write.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index a523bb86915d0..e9ccaa3baf2e6 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -837,6 +837,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 	struct inode *inode = file_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	unsigned long priv;
+	vm_fault_t ret = VM_FAULT_RETRY;
 
 	_enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index);
 
@@ -848,14 +849,14 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 #ifdef CONFIG_AFS_FSCACHE
 	if (PageFsCache(page) &&
 	    wait_on_page_fscache_killable(page) < 0)
-		return VM_FAULT_RETRY;
+		goto out;
 #endif
 
 	if (wait_on_page_writeback_killable(page))
-		return VM_FAULT_RETRY;
+		goto out;
 
 	if (lock_page_killable(page) < 0)
-		return VM_FAULT_RETRY;
+		goto out;
 
 	/* We mustn't change page->private until writeback is complete as that
 	 * details the portion of the page we need to write back and we might
@@ -863,7 +864,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 	 */
 	if (wait_on_page_writeback_killable(page) < 0) {
 		unlock_page(page);
-		return VM_FAULT_RETRY;
+		goto out;
 	}
 
 	priv = afs_page_dirty(page, 0, thp_size(page));
@@ -877,8 +878,10 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 	}
 	file_update_time(file);
 
+	ret = VM_FAULT_LOCKED;
+out:
 	sb_end_pagefault(inode->i_sb);
-	return VM_FAULT_LOCKED;
+	return ret;
 }
 
 /*
-- 
GitLab


From 7dd753ca59d6c8cc09aa1ed24f7657524803c7f3 Mon Sep 17 00:00:00 2001
From: ManYi Li <limanyi@uniontech.com>
Date: Fri, 11 Jun 2021 17:44:02 +0800
Subject: [PATCH 3367/3804] scsi: sr: Return appropriate error code when disk
 is ejected

Handle a reported media event code of 3. This indicates that the media has
been removed from the drive and user intervention is required to proceed.
Return DISK_EVENT_EJECT_REQUEST in that case.

Link: https://lore.kernel.org/r/20210611094402.23884-1-limanyi@uniontech.com
Signed-off-by: ManYi Li <limanyi@uniontech.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index e4633b84c556a..7815ed642d434 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -220,6 +220,8 @@ static unsigned int sr_get_events(struct scsi_device *sdev)
 		return DISK_EVENT_EJECT_REQUEST;
 	else if (med->media_event_code == 2)
 		return DISK_EVENT_MEDIA_CHANGE;
+	else if (med->media_event_code == 3)
+		return DISK_EVENT_EJECT_REQUEST;
 	return 0;
 }
 
-- 
GitLab


From 314b781706e337b8cbde98cfefd3975863e032f2 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Fri, 18 Jun 2021 22:01:36 +0800
Subject: [PATCH 3368/3804] riscv: kasan: Fix MODULES_VADDR evaluation due to
 local variables' name

commit 2bfc6cd81bd1 ("riscv: Move kernel mapping outside of linear
mapping") makes use of MODULES_VADDR to populate kernel, BPF, modules
mapping. Currently, MODULES_VADDR is defined as below for RV64:

| #define MODULES_VADDR   (PFN_ALIGN((unsigned long)&_end) - SZ_2G)

But kasan_init() has two local variables which are also named as _start,
_end, so MODULES_VADDR is evaluated with the local variable _end
rather than the global "_end" as we expected. Fix this issue by
renaming the two local variables.

Fixes: 2bfc6cd81bd1 ("riscv: Move kernel mapping outside of linear mapping")
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/mm/kasan_init.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 9daacae93e335..a0d9e4ace3319 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -169,7 +169,7 @@ static void __init kasan_shallow_populate(void *start, void *end)
 
 void __init kasan_init(void)
 {
-	phys_addr_t _start, _end;
+	phys_addr_t p_start, p_end;
 	u64 i;
 
 	/*
@@ -189,9 +189,9 @@ void __init kasan_init(void)
 			(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
 
 	/* Populate the linear mapping */
-	for_each_mem_range(i, &_start, &_end) {
-		void *start = (void *)__va(_start);
-		void *end = (void *)__va(_end);
+	for_each_mem_range(i, &p_start, &p_end) {
+		void *start = (void *)__va(p_start);
+		void *end = (void *)__va(p_end);
 
 		if (start >= end)
 			break;
-- 
GitLab


From 3a02764c372c50ff7917fde5c6961f6cdb81d9d5 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Fri, 18 Jun 2021 22:09:13 +0800
Subject: [PATCH 3369/3804] riscv: Ensure BPF_JIT_REGION_START aligned with PMD
 size

Andreas reported commit fc8504765ec5 ("riscv: bpf: Avoid breaking W^X")
breaks booting with one kind of defconfig, I reproduced a kernel panic
with the defconfig:

[    0.138553] Unable to handle kernel paging request at virtual address ffffffff81201220
[    0.139159] Oops [#1]
[    0.139303] Modules linked in:
[    0.139601] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-rc5-default+ #1
[    0.139934] Hardware name: riscv-virtio,qemu (DT)
[    0.140193] epc : __memset+0xc4/0xfc
[    0.140416]  ra : skb_flow_dissector_init+0x1e/0x82
[    0.140609] epc : ffffffff8029806c ra : ffffffff8033be78 sp : ffffffe001647da0
[    0.140878]  gp : ffffffff81134b08 tp : ffffffe001654380 t0 : ffffffff81201158
[    0.141156]  t1 : 0000000000000002 t2 : 0000000000000154 s0 : ffffffe001647dd0
[    0.141424]  s1 : ffffffff80a43250 a0 : ffffffff81201220 a1 : 0000000000000000
[    0.141654]  a2 : 000000000000003c a3 : ffffffff81201258 a4 : 0000000000000064
[    0.141893]  a5 : ffffffff8029806c a6 : 0000000000000040 a7 : ffffffffffffffff
[    0.142126]  s2 : ffffffff81201220 s3 : 0000000000000009 s4 : ffffffff81135088
[    0.142353]  s5 : ffffffff81135038 s6 : ffffffff8080ce80 s7 : ffffffff80800438
[    0.142584]  s8 : ffffffff80bc6578 s9 : 0000000000000008 s10: ffffffff806000ac
[    0.142810]  s11: 0000000000000000 t3 : fffffffffffffffc t4 : 0000000000000000
[    0.143042]  t5 : 0000000000000155 t6 : 00000000000003ff
[    0.143220] status: 0000000000000120 badaddr: ffffffff81201220 cause: 000000000000000f
[    0.143560] [<ffffffff8029806c>] __memset+0xc4/0xfc
[    0.143859] [<ffffffff8061e984>] init_default_flow_dissectors+0x22/0x60
[    0.144092] [<ffffffff800010fc>] do_one_initcall+0x3e/0x168
[    0.144278] [<ffffffff80600df0>] kernel_init_freeable+0x1c8/0x224
[    0.144479] [<ffffffff804868a8>] kernel_init+0x12/0x110
[    0.144658] [<ffffffff800022de>] ret_from_exception+0x0/0xc
[    0.145124] ---[ end trace f1e9643daa46d591 ]---

After some investigation, I think I found the root cause: commit
2bfc6cd81bd ("move kernel mapping outside of linear mapping") moves
BPF JIT region after the kernel:

| #define BPF_JIT_REGION_START	PFN_ALIGN((unsigned long)&_end)

The &_end is unlikely aligned with PMD size, so the front bpf jit
region sits with part of kernel .data section in one PMD size mapping.
But kernel is mapped in PMD SIZE, when bpf_jit_binary_lock_ro() is
called to make the first bpf jit prog ROX, we will make part of kernel
.data section RO too, so when we write to, for example memset the
.data section, MMU will trigger a store page fault.

To fix the issue, we need to ensure the BPF JIT region is PMD size
aligned. This patch acchieve this goal by restoring the BPF JIT region
to original position, I.E the 128MB before kernel .text section. The
modification to kasan_init.c is inspired by Alexandre.

Fixes: fc8504765ec5 ("riscv: bpf: Avoid breaking W^X")
Reported-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 Documentation/riscv/vm-layout.rst | 4 ++--
 arch/riscv/include/asm/pgtable.h  | 5 ++---
 arch/riscv/mm/kasan_init.c        | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst
index 329d32098af45..b7f98930d38d3 100644
--- a/Documentation/riscv/vm-layout.rst
+++ b/Documentation/riscv/vm-layout.rst
@@ -58,6 +58,6 @@ RISC-V Linux Kernel SV39
                                                               |
   ____________________________________________________________|____________________________________________________________
                     |            |                  |         |
-   ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules
-   ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel, BPF
+   ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | modules, BPF
+   ffffffff80000000 |   -2    GB | ffffffffffffffff |    2 GB | kernel
   __________________|____________|__________________|_________|____________________________________________________________
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 9469f464e71af..380cd3a7e5483 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -30,9 +30,8 @@
 
 #define BPF_JIT_REGION_SIZE	(SZ_128M)
 #ifdef CONFIG_64BIT
-/* KASLR should leave at least 128MB for BPF after the kernel */
-#define BPF_JIT_REGION_START	PFN_ALIGN((unsigned long)&_end)
-#define BPF_JIT_REGION_END	(BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_START	(BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_END	(MODULES_END)
 #else
 #define BPF_JIT_REGION_START	(PAGE_OFFSET - BPF_JIT_REGION_SIZE)
 #define BPF_JIT_REGION_END	(VMALLOC_END)
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index a0d9e4ace3319..d7189c8714a95 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -201,7 +201,7 @@ void __init kasan_init(void)
 
 	/* Populate kernel, BPF, modules mapping */
 	kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR),
-		       kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END));
+		       kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G));
 
 	for (i = 0; i < PTRS_PER_PTE; i++)
 		set_pte(&kasan_early_shadow_pte[i],
-- 
GitLab


From 7ede12b01b59dc67bef2e2035297dd2da5bfe427 Mon Sep 17 00:00:00 2001
From: David Abdurachmanov <david.abdurachmanov@sifive.com>
Date: Sat, 12 Jun 2021 17:43:57 -0700
Subject: [PATCH 3370/3804] riscv: dts: fu740: fix cache-controller interrupts

The order of interrupt numbers is incorrect.

The order for FU740 is: DirError, DataError, DataFail, DirFail

From SiFive FU740-C000 Manual:
19 - L2 Cache DirError
20 - L2 Cache DirFail
21 - L2 Cache DataError
22 - L2 Cache DataFail

Signed-off-by: David Abdurachmanov <david.abdurachmanov@sifive.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/boot/dts/sifive/fu740-c000.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
index 8eef82e4199f5..abbb960f90a00 100644
--- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
@@ -273,7 +273,7 @@
 			cache-size = <2097152>;
 			cache-unified;
 			interrupt-parent = <&plic0>;
-			interrupts = <19 20 21 22>;
+			interrupts = <19 21 22 20>;
 			reg = <0x0 0x2010000 0x0 0x1000>;
 		};
 		gpio: gpio@10060000 {
-- 
GitLab


From d97fb837b8cce400892e7f0ccf4755edb225ad36 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Sat, 19 Jun 2021 00:54:52 +0300
Subject: [PATCH 3371/3804] hwmon: (lm90) Don't override interrupt trigger type

The lm90 driver sets interrupt trigger type to level-low. This type is
not suitable for sensors like NCT1008 that don't deassert interrupt line
until temperature is back to normal, resulting in interrupt storm. The
appropriate trigger type should come from OF device description and
currently it's overridden by the driver's trigger type. Don't specify
the trigger type in the driver code, letting interrupt core to use the
device-specific trigger type.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm90.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index ebbfd5f352c06..2e057fad05b4a 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -1908,8 +1908,7 @@ static int lm90_probe(struct i2c_client *client)
 		dev_dbg(dev, "IRQ: %d\n", client->irq);
 		err = devm_request_threaded_irq(dev, client->irq,
 						NULL, lm90_irq_thread,
-						IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-						"lm90", client);
+						IRQF_ONESHOT, "lm90", client);
 		if (err < 0) {
 			dev_err(dev, "cannot request IRQ %d\n", client->irq);
 			return err;
-- 
GitLab


From 94dbd23ed88ce70d7baacfa20d21bc0070d1a8da Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Sat, 19 Jun 2021 00:54:53 +0300
Subject: [PATCH 3372/3804] hwmon: (lm90) Use hwmon_notify_event()

Use hwmon_notify_event() to notify userspace and thermal core about
temperature changes.

Suggested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm90.c | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 2e057fad05b4a..e7b678a40b392 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -465,6 +465,7 @@ enum lm90_temp11_reg_index {
 
 struct lm90_data {
 	struct i2c_client *client;
+	struct device *hwmon_dev;
 	u32 channel_config[4];
 	struct hwmon_channel_info temp_info;
 	const struct hwmon_channel_info *info[3];
@@ -1731,22 +1732,41 @@ static bool lm90_is_tripped(struct i2c_client *client, u16 *status)
 
 	if ((st & (LM90_STATUS_LLOW | LM90_STATUS_LHIGH | LM90_STATUS_LTHRM)) ||
 	    (st2 & MAX6696_STATUS2_LOT2))
-		dev_warn(&client->dev,
-			 "temp%d out of range, please check!\n", 1);
+		dev_dbg(&client->dev,
+			"temp%d out of range, please check!\n", 1);
 	if ((st & (LM90_STATUS_RLOW | LM90_STATUS_RHIGH | LM90_STATUS_RTHRM)) ||
 	    (st2 & MAX6696_STATUS2_ROT2))
-		dev_warn(&client->dev,
-			 "temp%d out of range, please check!\n", 2);
+		dev_dbg(&client->dev,
+			"temp%d out of range, please check!\n", 2);
 	if (st & LM90_STATUS_ROPEN)
-		dev_warn(&client->dev,
-			 "temp%d diode open, please check!\n", 2);
+		dev_dbg(&client->dev,
+			"temp%d diode open, please check!\n", 2);
 	if (st2 & (MAX6696_STATUS2_R2LOW | MAX6696_STATUS2_R2HIGH |
 		   MAX6696_STATUS2_R2THRM | MAX6696_STATUS2_R2OT2))
-		dev_warn(&client->dev,
-			 "temp%d out of range, please check!\n", 3);
+		dev_dbg(&client->dev,
+			"temp%d out of range, please check!\n", 3);
 	if (st2 & MAX6696_STATUS2_R2OPEN)
-		dev_warn(&client->dev,
-			 "temp%d diode open, please check!\n", 3);
+		dev_dbg(&client->dev,
+			"temp%d diode open, please check!\n", 3);
+
+	if (st & LM90_STATUS_LLOW)
+		hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+				   hwmon_temp_min, 0);
+	if (st & LM90_STATUS_RLOW)
+		hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+				   hwmon_temp_min, 1);
+	if (st2 & MAX6696_STATUS2_R2LOW)
+		hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+				   hwmon_temp_min, 2);
+	if (st & LM90_STATUS_LHIGH)
+		hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+				   hwmon_temp_max, 0);
+	if (st & LM90_STATUS_RHIGH)
+		hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+				   hwmon_temp_max, 1);
+	if (st2 & MAX6696_STATUS2_R2HIGH)
+		hwmon_notify_event(data->hwmon_dev, hwmon_temp,
+				   hwmon_temp_max, 2);
 
 	return true;
 }
@@ -1904,6 +1924,8 @@ static int lm90_probe(struct i2c_client *client)
 	if (IS_ERR(hwmon_dev))
 		return PTR_ERR(hwmon_dev);
 
+	data->hwmon_dev = hwmon_dev;
+
 	if (client->irq) {
 		dev_dbg(dev, "IRQ: %d\n", client->irq);
 		err = devm_request_threaded_irq(dev, client->irq,
@@ -1940,7 +1962,7 @@ static void lm90_alert(struct i2c_client *client, enum i2c_alert_protocol type,
 			lm90_update_confreg(data, data->config | 0x80);
 		}
 	} else {
-		dev_info(&client->dev, "Everything OK\n");
+		dev_dbg(&client->dev, "Everything OK\n");
 	}
 }
 
-- 
GitLab


From 2abdc357c55d9e728f6710cf22618889f16a00f6 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Sat, 19 Jun 2021 00:54:54 +0300
Subject: [PATCH 3373/3804] hwmon: (lm90) Unmask hardware interrupt

The ALERT interrupt is enabled by default after power-on, but it could
be masked by bootloader. For example this is the case on Acer A500 tablet
device. Unmask the hardware interrupt if interrupt is provided.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm90.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index e7b678a40b392..658b486d2f5ea 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -1704,6 +1704,13 @@ static int lm90_init_client(struct i2c_client *client, struct lm90_data *data)
 	if (data->kind == max6696)
 		config &= ~0x08;
 
+	/*
+	 * Interrupt is enabled by default on reset, but it may be disabled
+	 * by bootloader, unmask it.
+	 */
+	if (client->irq)
+		config &= ~0x80;
+
 	config &= 0xBF;	/* run */
 	lm90_update_confreg(data, config);
 
-- 
GitLab


From 4c7f85a321a1ac265159c22a6998ef4f2a60c21d Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Sat, 19 Jun 2021 00:54:55 +0300
Subject: [PATCH 3374/3804] hwmon: (lm90) Disable interrupt on suspend

I2C accesses are prohibited and will error out after suspending of the
I2C controller, hence we need to ensure that interrupt won't fire on
suspend when it's too late. Disable interrupt across suspend/resume.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm90.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 658b486d2f5ea..b53f17511b054 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -1973,11 +1973,36 @@ static void lm90_alert(struct i2c_client *client, enum i2c_alert_protocol type,
 	}
 }
 
+static int __maybe_unused lm90_suspend(struct device *dev)
+{
+	struct lm90_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+
+	if (client->irq)
+		disable_irq(client->irq);
+
+	return 0;
+}
+
+static int __maybe_unused lm90_resume(struct device *dev)
+{
+	struct lm90_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+
+	if (client->irq)
+		enable_irq(client->irq);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(lm90_pm_ops, lm90_suspend, lm90_resume);
+
 static struct i2c_driver lm90_driver = {
 	.class		= I2C_CLASS_HWMON,
 	.driver = {
 		.name	= "lm90",
 		.of_match_table = of_match_ptr(lm90_of_match),
+		.pm	= &lm90_pm_ops,
 	},
 	.probe_new	= lm90_probe,
 	.alert		= lm90_alert,
-- 
GitLab


From fc96ec4d5d4155c61cbafd49fb2dd403c899a9f4 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 10 Jun 2021 22:32:59 +0800
Subject: [PATCH 3375/3804] perf metricgroup: Fix find_evsel_group() event
 selector

The following command segfaults on my x86 broadwell:

  $ ./perf stat  -M frontend_bound,retiring,backend_bound,bad_speculation sleep 1
  WARNING: grouped events cpus do not match, disabling group:
    anon group { raw 0x10e }
    anon group { raw 0x10e }
  perf: util/evsel.c:1596: get_group_fd: Assertion `!(!leader->core.fd)' failed.
  Aborted (core dumped)

The issue shows itself as a use-after-free in evlist__check_cpu_maps(),
whereby the leader of an event selector (evsel) has been deleted (yet we
still attempt to verify for an evsel).

Fundamentally the problem comes from metricgroup__setup_events() ->
find_evsel_group(), and has developed from the previous fix attempt in
commit 9c880c24cb0d ("perf metricgroup: Fix for metrics containing
duration_time").

The problem now is that the logic in checking if an evsel is in the same
group is subtly broken for the "cycles" event. For the "cycles" event,
the pmu_name is NULL; however the logic in find_evsel_group() may set an
event matched against "cycles" as used, when it should not be.

This leads to a condition where an evsel is set, yet its leader is not.

Fix the check for evsel pmu_name by not matching evsels when either has a
NULL pmu_name.

There is still a pre-existing metric issue whereby the ordering of the
metrics may break the 'stat' function, as discussed at:
https://lore.kernel.org/lkml/49c6fccb-b716-1bf0-18a6-cace1cdb66b9@huawei.com/

Fixes: 9c880c24cb0d ("perf metricgroup: Fix for metrics containing duration_time")
Signed-off-by: John Garry <john.garry@huawei.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> # On a Thinkpad T450S
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/1623335580-187317-2-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/metricgroup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 8336dd8e80986..c456fdeae06a1 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -162,10 +162,10 @@ static bool contains_event(struct evsel **metric_events, int num_events,
 	return false;
 }
 
-static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2)
+static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2)
 {
 	if (!ev1->pmu_name || !ev2->pmu_name)
-		return false;
+		return true;
 
 	return !strcmp(ev1->pmu_name, ev2->pmu_name);
 }
@@ -288,7 +288,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
 			 */
 			if (!has_constraint &&
 			    ev->leader != metric_events[i]->leader &&
-			    evsel_same_pmu(ev->leader, metric_events[i]->leader))
+			    evsel_same_pmu_or_none(ev->leader, metric_events[i]->leader))
 				break;
 			if (!strcmp(metric_events[i]->name, ev->name)) {
 				set_bit(ev->idx, evlist_used);
-- 
GitLab


From fe7a98b9d9b36e5c8a22d76b67d29721f153f66e Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 10 Jun 2021 22:33:00 +0800
Subject: [PATCH 3376/3804] perf metricgroup: Return error code from
 metricgroup__add_metric_sys_event_iter()

The error code is not set at all in the sys event iter function.

This may lead to an uninitialized value of "ret" in
metricgroup__add_metric() when no CPU metric is added.

Fix by properly setting the error code.

It is not necessary to init "ret" to 0 in metricgroup__add_metric(), as
if we have no CPU or sys event metric matching, then "has_match" should
be 0 and "ret" is set to -EINVAL.

However gcc cannot detect that it may not have been set after the
map_for_each_metric() loop for CPU metrics, which is strange.

Fixes: be335ec28efa8 ("perf metricgroup: Support adding metrics for system PMUs")
Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/1623335580-187317-3-git-send-email-john.garry@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/metricgroup.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index c456fdeae06a1..d3cf2dee36c8f 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1073,16 +1073,18 @@ static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe,
 
 	ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids);
 	if (ret)
-		return ret;
+		goto out;
 
 	ret = resolve_metric(d->metric_no_group,
 				     d->metric_list, NULL, d->ids);
 	if (ret)
-		return ret;
+		goto out;
 
 	*(d->has_match) = true;
 
-	return *d->ret;
+out:
+	*(d->ret) = ret;
+	return ret;
 }
 
 static int metricgroup__add_metric(const char *metric, bool metric_no_group,
-- 
GitLab


From c087e9480cf33672ef2c6cce4348d754988b8437 Mon Sep 17 00:00:00 2001
From: Riccardo Mancini <rickyman7@gmail.com>
Date: Sat, 12 Jun 2021 19:37:48 +0200
Subject: [PATCH 3377/3804] perf machine: Fix refcount usage when processing
 PERF_RECORD_KSYMBOL

ASan reported a memory leak of BPF-related ksymbols map and dso. The
leak is caused by refount never reaching 0, due to missing __put calls
in the function machine__process_ksymbol_register.

Once the dso is inserted in the map, dso__put() should be called
(map__new2() increases the refcount to 2).

The same thing applies for the map when it's inserted into maps
(maps__insert() increases the refcount to 2).

  $ sudo ./perf record -- sleep 5
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.025 MB perf.data (8 samples) ]

  =================================================================
  ==297735==ERROR: LeakSanitizer: detected memory leaks

  Direct leak of 6992 byte(s) in 19 object(s) allocated from:
      #0 0x4f43c7 in calloc (/home/user/linux/tools/perf/perf+0x4f43c7)
      #1 0x8e4e53 in map__new2 /home/user/linux/tools/perf/util/map.c:216:20
      #2 0x8cf68c in machine__process_ksymbol_register /home/user/linux/tools/perf/util/machine.c:778:10
      [...]

  Indirect leak of 8702 byte(s) in 19 object(s) allocated from:
      #0 0x4f43c7 in calloc (/home/user/linux/tools/perf/perf+0x4f43c7)
      #1 0x8728d7 in dso__new_id /home/user/linux/tools/perf/util/dso.c:1256:20
      #2 0x872015 in dso__new /home/user/linux/tools/perf/util/dso.c:1295:9
      #3 0x8cf623 in machine__process_ksymbol_register /home/user/linux/tools/perf/util/machine.c:774:21
      [...]

  Indirect leak of 1520 byte(s) in 19 object(s) allocated from:
      #0 0x4f43c7 in calloc (/home/user/linux/tools/perf/perf+0x4f43c7)
      #1 0x87b3da in symbol__new /home/user/linux/tools/perf/util/symbol.c:269:23
      #2 0x888954 in map__process_kallsym_symbol /home/user/linux/tools/perf/util/symbol.c:710:8
      [...]

  Indirect leak of 1406 byte(s) in 19 object(s) allocated from:
      #0 0x4f43c7 in calloc (/home/user/linux/tools/perf/perf+0x4f43c7)
      #1 0x87b3da in symbol__new /home/user/linux/tools/perf/util/symbol.c:269:23
      #2 0x8cfbd8 in machine__process_ksymbol_register /home/user/linux/tools/perf/util/machine.c:803:8
      [...]

Signed-off-by: Riccardo Mancini <rickyman7@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tommi Rantala <tommi.t.rantala@nokia.com>
Link: http://lore.kernel.org/lkml/20210612173751.188582-1-rickyman7@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 3ff4936a15a42..da19be7da284c 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -776,10 +776,10 @@ static int machine__process_ksymbol_register(struct machine *machine,
 		if (dso) {
 			dso->kernel = DSO_SPACE__KERNEL;
 			map = map__new2(0, dso);
+			dso__put(dso);
 		}
 
 		if (!dso || !map) {
-			dso__put(dso);
 			return -ENOMEM;
 		}
 
@@ -792,6 +792,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
 		map->start = event->ksymbol.addr;
 		map->end = map->start + event->ksymbol.len;
 		maps__insert(&machine->kmaps, map);
+		map__put(map);
 		dso__set_loaded(dso);
 
 		if (is_bpf_image(event->ksymbol.name)) {
-- 
GitLab


From 482698c2f848f9dee1a5bd949793c2fe6a71adc5 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 17 Jun 2021 11:42:13 -0700
Subject: [PATCH 3378/3804] perf test: Fix non-bash issue with stat bpf
 counters

$(( .. )) is a bash feature but the test's interpreter is !/bin/sh,
switch the code to use expr.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20210617184216.2075588-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/stat_bpf_counters.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh
index 22eb31e48ca7f..2f9948b3d9439 100755
--- a/tools/perf/tests/shell/stat_bpf_counters.sh
+++ b/tools/perf/tests/shell/stat_bpf_counters.sh
@@ -11,9 +11,9 @@ compare_number()
        second_num=$2
 
        # upper bound is first_num * 110%
-       upper=$(( $first_num + $first_num / 10 ))
+       upper=$(expr $first_num + $first_num / 10 )
        # lower bound is first_num * 90%
-       lower=$(( $first_num - $first_num / 10 ))
+       lower=$(expr $first_num - $first_num / 10 )
 
        if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then
                echo "The difference between $first_num and $second_num are greater than 10%."
-- 
GitLab


From ef83f9efe8461b8fd71eb60b53dbb6a5dd7b39e9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 19 Jun 2021 10:09:08 -0300
Subject: [PATCH 3379/3804] perf beauty: Update copy of linux/socket.h with the
 kernel sources

To pick the changes in:

  ea6932d70e223e02 ("net: make get_net_ns return error if NET_NS is disabled")

That don't result in any changes in the tables generated from that
header.

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/perf/trace/beauty/include/linux/socket.h' differs from latest version at 'include/linux/socket.h'
  diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h

Cc: Changbin Du <changbin.du@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/include/linux/socket.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index b8fc5c53ba6fa..0d8e3dcb7f881 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -438,6 +438,4 @@ extern int __sys_socketpair(int family, int type, int protocol,
 			    int __user *usockvec);
 extern int __sys_shutdown_sock(struct socket *sock, int how);
 extern int __sys_shutdown(int fd, int how);
-
-extern struct ns_common *get_net_ns(struct ns_common *ns);
 #endif /* _LINUX_SOCKET_H */
-- 
GitLab


From 17d27fc314cba0205eec8966735a7a241cc8a5e0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 19 Jun 2021 10:11:46 -0300
Subject: [PATCH 3380/3804] tools headers UAPI: Sync asm-generic/unistd.h with
 the kernel original

To pick the changes in:

  8b1462b67f23da54 ("quota: finish disable quotactl_path syscall")

Those headers are used in some arches to generate the syscall table used
in 'perf trace' to translate syscall numbers into strings.

This addresses this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h'
  diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h

Cc: Jan Kara <jack@suse.cz>
Cc: Marcin Juszkiewicz <marcin@juszkiewicz.com.pl>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/asm-generic/unistd.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 6de5a7fc066b8..d2a942086fcb6 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -863,8 +863,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
-#define __NR_quotactl_path 443
-__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+/* 443 is reserved for quotactl_path */
 
 #define __NR_landlock_create_ruleset 444
 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
-- 
GitLab


From 1792a59eab9593de2eae36c40c5a22d70f52c026 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 19 Jun 2021 10:15:22 -0300
Subject: [PATCH 3381/3804] tools headers UAPI: Sync linux/in.h copy with the
 kernel sources
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To pick the changes in:

  321827477360934d ("icmp: don't send out ICMP messages with a source address of 0.0.0.0")

That don't result in any change in tooling, as INADDR_ are not used to
generate id->string tables used by 'perf trace'.

This addresses this build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h'
  diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h

Cc: David S. Miller <davem@davemloft.net>
Cc: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/in.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 7d6687618d808..d1b327036ae43 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -289,6 +289,9 @@ struct sockaddr_in {
 /* Address indicating an error return. */
 #define	INADDR_NONE		((unsigned long int) 0xffffffff)
 
+/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
+#define	INADDR_DUMMY		((unsigned long int) 0xc0000008)
+
 /* Network number for local host loopback. */
 #define	IN_LOOPBACKNET		127
 
-- 
GitLab


From e484028bf39c0c87c499dc782dc9cd3bb72c0ab5 Mon Sep 17 00:00:00 2001
From: Dan Sneddon <dan.sneddon@microchip.com>
Date: Wed, 2 Jun 2021 09:08:45 -0700
Subject: [PATCH 3382/3804] drm: atmel_hlcdc: Enable the crtc vblank prior to
 crtc usage.

'commit eec44d44a3d2 ("drm/atmel: Use drm_atomic_helper_commit")'
removed the home-grown handling of atomic commits and exposed an issue
in the crtc atomic commit handling where vblank is expected to be
enabled but hasn't yet, causing kernel warnings during boot.  This patch
cleans up the crtc vblank handling thus removing the warning on boot.

Fixes: eec44d44a3d2 ("drm/atmel: Use drm_atomic_helper_commit")

Signed-off-by: Dan Sneddon <dan.sneddon@microchip.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20210602160846.5013-1-dan.sneddon@microchip.com
---
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index 05ad75d155e84..cfe4fc69277e6 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -232,7 +232,6 @@ static void atmel_hlcdc_crtc_atomic_enable(struct drm_crtc *c,
 
 	pm_runtime_put_sync(dev->dev);
 
-	drm_crtc_vblank_on(c);
 }
 
 #define ATMEL_HLCDC_RGB444_OUTPUT	BIT(0)
@@ -343,8 +342,17 @@ static int atmel_hlcdc_crtc_atomic_check(struct drm_crtc *c,
 
 static void atmel_hlcdc_crtc_atomic_begin(struct drm_crtc *c,
 					  struct drm_atomic_state *state)
+{
+	drm_crtc_vblank_on(c);
+}
+
+static void atmel_hlcdc_crtc_atomic_flush(struct drm_crtc *c,
+					  struct drm_atomic_state *state)
 {
 	struct atmel_hlcdc_crtc *crtc = drm_crtc_to_atmel_hlcdc_crtc(c);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->dev->event_lock, flags);
 
 	if (c->state->event) {
 		c->state->event->pipe = drm_crtc_index(c);
@@ -354,12 +362,7 @@ static void atmel_hlcdc_crtc_atomic_begin(struct drm_crtc *c,
 		crtc->event = c->state->event;
 		c->state->event = NULL;
 	}
-}
-
-static void atmel_hlcdc_crtc_atomic_flush(struct drm_crtc *crtc,
-					  struct drm_atomic_state *state)
-{
-	/* TODO: write common plane control register if available */
+	spin_unlock_irqrestore(&c->dev->event_lock, flags);
 }
 
 static const struct drm_crtc_helper_funcs lcdc_crtc_helper_funcs = {
-- 
GitLab


From af42167f53ec18b0856387fc119b28c8c1ba98a1 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Date: Wed, 26 May 2021 08:30:02 -0400
Subject: [PATCH 3383/3804] drm/panel: ld9040: reference spi_device_id table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference the spi_device_id table to silence W=1 warning:

  drivers/gpu/drm/panel/panel-samsung-ld9040.c:377:35:
    warning: ‘ld9040_ids’ defined but not used [-Wunused-const-variable=]

This also would be needed for matching the driver if booted without
CONFIG_OF (although it's not necessarily real case).

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20210526123002.12913-1-krzysztof.kozlowski@canonical.com
---
 drivers/gpu/drm/panel/panel-samsung-ld9040.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/panel/panel-samsung-ld9040.c b/drivers/gpu/drm/panel/panel-samsung-ld9040.c
index f484147fc3a66..c4b388850a13e 100644
--- a/drivers/gpu/drm/panel/panel-samsung-ld9040.c
+++ b/drivers/gpu/drm/panel/panel-samsung-ld9040.c
@@ -383,6 +383,7 @@ MODULE_DEVICE_TABLE(spi, ld9040_ids);
 static struct spi_driver ld9040_driver = {
 	.probe = ld9040_probe,
 	.remove = ld9040_remove,
+	.id_table = ld9040_ids,
 	.driver = {
 		.name = "panel-samsung-ld9040",
 		.of_match_table = ld9040_of_match,
-- 
GitLab


From e541845ae0858616c52dd97df4bf91568c7a7a1b Mon Sep 17 00:00:00 2001
From: Dan Sneddon <dan.sneddon@microchip.com>
Date: Tue, 30 Mar 2021 08:17:20 -0700
Subject: [PATCH 3384/3804] drm/atmel-hlcdc: Allow async page flips

The driver is capable of doing async page flips so we need to tell the
core to allow them.

Signed-off-by: Dan Sneddon <dan.sneddon@microchip.com>
Tested-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20210330151721.6616-1-dan.sneddon@microchip.com
---
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 65af56e471294..f09b6dd8754c6 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -593,6 +593,7 @@ static int atmel_hlcdc_dc_modeset_init(struct drm_device *dev)
 	dev->mode_config.max_width = dc->desc->max_width;
 	dev->mode_config.max_height = dc->desc->max_height;
 	dev->mode_config.funcs = &mode_config_funcs;
+	dev->mode_config.async_page_flip = true;
 
 	return 0;
 }
-- 
GitLab


From 87ac3d002d567fac3527d6612865e81cfd783727 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Thu, 13 May 2021 18:36:04 -0400
Subject: [PATCH 3385/3804] evm: output EVM digest calculation info

Output the data used in calculating the EVM digest and the resulting
digest as ascii hexadecimal strings.

Suggested-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com> (CONFIG_DYNAMIC_DEBUG)
Reviewed-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
Reported-by: kernel test robot <lkp@intel.com> (Use %zu for size_t)
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/evm/evm_crypto.c | 43 +++++++++++++++++++++++++++++
 security/integrity/evm/evm_main.c   |  4 +++
 2 files changed, 47 insertions(+)

diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index 1628e2ca98623..bebe160c57b9e 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -10,6 +10,8 @@
  *	 Using root's kernel master key (kmk), calculate the HMAC
  */
 
+#define pr_fmt(fmt) "EVM: "fmt
+
 #include <linux/export.h>
 #include <linux/crypto.h>
 #include <linux/xattr.h>
@@ -175,6 +177,30 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
 	    type != EVM_XATTR_PORTABLE_DIGSIG)
 		crypto_shash_update(desc, (u8 *)&inode->i_sb->s_uuid, UUID_SIZE);
 	crypto_shash_final(desc, digest);
+
+	pr_debug("hmac_misc: (%zu) [%*phN]\n", sizeof(struct h_misc),
+		 (int)sizeof(struct h_misc), &hmac_misc);
+}
+
+/*
+ * Dump large security xattr values as a continuous ascii hexademical string.
+ * (pr_debug is limited to 64 bytes.)
+ */
+static void dump_security_xattr(const char *prefix, const void *src,
+				size_t count)
+{
+#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
+	char *asciihex, *p;
+
+	p = asciihex = kmalloc(count * 2 + 1, GFP_KERNEL);
+	if (!asciihex)
+		return;
+
+	p = bin2hex(p, src, count);
+	*p = 0;
+	pr_debug("%s: (%zu) %.*s\n", prefix, count, (int)count * 2, asciihex);
+	kfree(asciihex);
+#endif
 }
 
 /*
@@ -230,6 +256,16 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
 					     req_xattr_value_len);
 			if (is_ima)
 				ima_present = true;
+
+			if (req_xattr_value_len < 64)
+				pr_debug("%s: (%zu) [%*phN]\n", req_xattr_name,
+					 req_xattr_value_len,
+					 (int)req_xattr_value_len,
+					 req_xattr_value);
+			else
+				dump_security_xattr(req_xattr_name,
+						    req_xattr_value,
+						    req_xattr_value_len);
 			continue;
 		}
 		size = vfs_getxattr_alloc(&init_user_ns, dentry, xattr->name,
@@ -246,6 +282,13 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
 		crypto_shash_update(desc, (const u8 *)xattr_value, xattr_size);
 		if (is_ima)
 			ima_present = true;
+
+		if (xattr_size < 64)
+			pr_debug("%s: (%zu) [%*phN]", xattr->name, xattr_size,
+				 (int)xattr_size, xattr_value);
+		else
+			dump_security_xattr(xattr->name, xattr_value,
+					    xattr_size);
 	}
 	hmac_add_misc(desc, inode, type, data->digest);
 
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 977208aecd066..1c8435dfabeea 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -11,6 +11,8 @@
  *	evm_inode_removexattr, and evm_verifyxattr
  */
 
+#define pr_fmt(fmt) "EVM: "fmt
+
 #include <linux/init.h>
 #include <linux/crypto.h>
 #include <linux/audit.h>
@@ -272,6 +274,8 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
 		else
 			evm_status = INTEGRITY_FAIL;
 	}
+	pr_debug("digest: (%d) [%*phN]\n", digest.hdr.length, digest.hdr.length,
+		  digest.digest);
 out:
 	if (iint)
 		iint->evm_status = evm_status;
-- 
GitLab


From 065b6211a87746e196b56759a70c7851418dd741 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 6 Jun 2021 15:55:55 +0200
Subject: [PATCH 3386/3804] i2c: i801: Ensure that SMBHSTSTS_INUSE_STS is
 cleared when leaving i801_access

As explained in [0] currently we may leave SMBHSTSTS_INUSE_STS set,
thus potentially breaking ACPI/BIOS usage of the SMBUS device.

Seems patch [0] needs a little bit more of review effort, therefore
I'd suggest to apply a part of it as quick win. Just clearing
SMBHSTSTS_INUSE_STS when leaving i801_access() should fix the
referenced issue and leaves more time for discussing a more
sophisticated locking handling.

[0] https://www.spinics.net/lists/linux-i2c/msg51558.html

Fixes: 01590f361e94 ("i2c: i801: Instantiate SPD EEPROMs automatically")
Suggested-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Hector Martin <marcan@marcan.st>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Tested-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-i801.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index f9e1c2ceaac05..04a1e38f2a6f0 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -978,6 +978,9 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
 	}
 
 out:
+	/* Unlock the SMBus device for use by BIOS/ACPI */
+	outb_p(SMBHSTSTS_INUSE_STS, SMBHSTSTS(priv));
+
 	pm_runtime_mark_last_busy(&priv->pci_dev->dev);
 	pm_runtime_put_autosuspend(&priv->pci_dev->dev);
 	mutex_unlock(&priv->acpi_lock);
-- 
GitLab


From 2269583753d2b8fdd3c861a516ff0cdbfcf4ef0b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 12 May 2021 13:06:41 +0300
Subject: [PATCH 3387/3804] i2c: cp2615: check for allocation failure in
 cp2615_i2c_recv()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to add a check for if the kzalloc() fails.

Fixes: 4a7695429ead ("i2c: cp2615: add i2c driver for Silicon Labs' CP2615 Digital Audio Bridge")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Bence Csókás <bence98@sch.bme.hu>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-cp2615.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-cp2615.c b/drivers/i2c/busses/i2c-cp2615.c
index 78cfecd1ea768..3ded28632e4c1 100644
--- a/drivers/i2c/busses/i2c-cp2615.c
+++ b/drivers/i2c/busses/i2c-cp2615.c
@@ -138,17 +138,23 @@ cp2615_i2c_send(struct usb_interface *usbif, struct cp2615_i2c_transfer *i2c_w)
 static int
 cp2615_i2c_recv(struct usb_interface *usbif, unsigned char tag, void *buf)
 {
-	struct cp2615_iop_msg *msg = kzalloc(sizeof(*msg), GFP_KERNEL);
-	struct cp2615_i2c_transfer_result *i2c_r = (struct cp2615_i2c_transfer_result *)&msg->data;
 	struct usb_device *usbdev = interface_to_usbdev(usbif);
-	int res = usb_bulk_msg(usbdev, usb_rcvbulkpipe(usbdev, IOP_EP_IN),
-			       msg, sizeof(struct cp2615_iop_msg), NULL, 0);
+	struct cp2615_iop_msg *msg;
+	struct cp2615_i2c_transfer_result *i2c_r;
+	int res;
+
+	msg = kzalloc(sizeof(*msg), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
 
+	res = usb_bulk_msg(usbdev, usb_rcvbulkpipe(usbdev, IOP_EP_IN), msg,
+			   sizeof(struct cp2615_iop_msg), NULL, 0);
 	if (res < 0) {
 		kfree(msg);
 		return res;
 	}
 
+	i2c_r = (struct cp2615_i2c_transfer_result *)&msg->data;
 	if (msg->msg != htons(iop_I2cTransferResult) || i2c_r->tag != tag) {
 		kfree(msg);
 		return -EIO;
-- 
GitLab


From 13311e74253fe64329390df80bed3f07314ddd61 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 20 Jun 2021 15:03:15 -0700
Subject: [PATCH 3388/3804] Linux 5.13-rc7

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 2d7a8df84e2bd..3e8dbe68eac8d 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
-NAME = Frozen Wasteland
+EXTRAVERSION = -rc7
+NAME = Opossums on Parade
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
-- 
GitLab


From 7d815f4afa87f2032b650ae1bba7534b550a6b8b Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Tue, 25 May 2021 16:17:33 -0700
Subject: [PATCH 3389/3804] PCI: hv: Add check for hyperv_initialized in
 init_hv_pci_drv()

Add check for hv_is_hyperv_initialized() at the top of
init_hv_pci_drv(), so if the pci-hyperv driver is force-loaded on non
Hyper-V platforms, the init_hv_pci_drv() will exit immediately, without
any side effects, like assignments to hvpci_block_ops, etc.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reported-and-tested-by: Mohammad Alqayeem <mohammad.alqyeem@nutanix.com>
Reviewed-by: Wei Liu <wei.liu@kernel.org>
Link: https://lore.kernel.org/r/1621984653-1210-1-git-send-email-haiyangz@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/pci/controller/pci-hyperv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 6511648271b23..bebe3eeebc4e1 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -3476,6 +3476,9 @@ static void __exit exit_hv_pci_drv(void)
 
 static int __init init_hv_pci_drv(void)
 {
+	if (!hv_is_hyperv_initialized())
+		return -ENODEV;
+
 	/* Set the invalid domain number's bit, so it will not be used */
 	set_bit(HVPCI_DOM_INVALID, hvpci_dom_map);
 
-- 
GitLab


From 77bbbc0cf84834ed130838f7ac1988567f4d0288 Mon Sep 17 00:00:00 2001
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Date: Wed, 2 Jun 2021 14:04:41 +1000
Subject: [PATCH 3390/3804] KVM: PPC: Book3S HV: Fix TLB management on SMT8
 POWER9 and POWER10 processors

The POWER9 vCPU TLB management code assumes all threads in a core share
a TLB, and that TLBIEL execued by one thread will invalidate TLBs for
all threads. This is not the case for SMT8 capable POWER9 and POWER10
(big core) processors, where the TLB is split between groups of threads.
This results in TLB multi-hits, random data corruption, etc.

Fix this by introducing cpu_first_tlb_thread_sibling etc., to determine
which siblings share TLBs, and use that in the guest TLB flushing code.

[npiggin@gmail.com: add changelog and comment]

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210602040441.3984352-1-npiggin@gmail.com
---
 arch/powerpc/include/asm/cputhreads.h | 30 +++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c          | 13 ++++++------
 arch/powerpc/kvm/book3s_hv_builtin.c  |  2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c   |  2 +-
 4 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index 98c8bd155bf9d..b167186aaee4a 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -98,6 +98,36 @@ static inline int cpu_last_thread_sibling(int cpu)
 	return cpu | (threads_per_core - 1);
 }
 
+/*
+ * tlb_thread_siblings are siblings which share a TLB. This is not
+ * architected, is not something a hypervisor could emulate and a future
+ * CPU may change behaviour even in compat mode, so this should only be
+ * used on PowerNV, and only with care.
+ */
+static inline int cpu_first_tlb_thread_sibling(int cpu)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+		return cpu & ~0x6;	/* Big Core */
+	else
+		return cpu_first_thread_sibling(cpu);
+}
+
+static inline int cpu_last_tlb_thread_sibling(int cpu)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+		return cpu | 0x6;	/* Big Core */
+	else
+		return cpu_last_thread_sibling(cpu);
+}
+
+static inline int cpu_tlb_thread_sibling_step(void)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+		return 2;		/* Big Core */
+	else
+		return 1;
+}
+
 static inline u32 get_tensr(void)
 {
 #ifdef	CONFIG_BOOKE
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f4dc4f0c34b5e..c0f1299736b95 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2820,7 +2820,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 	cpumask_t *cpu_in_guest;
 	int i;
 
-	cpu = cpu_first_thread_sibling(cpu);
+	cpu = cpu_first_tlb_thread_sibling(cpu);
 	if (nested) {
 		cpumask_set_cpu(cpu, &nested->need_tlb_flush);
 		cpu_in_guest = &nested->cpu_in_guest;
@@ -2834,9 +2834,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 	 * the other side is the first smp_mb() in kvmppc_run_core().
 	 */
 	smp_mb();
-	for (i = 0; i < threads_per_core; ++i)
-		if (cpumask_test_cpu(cpu + i, cpu_in_guest))
-			smp_call_function_single(cpu + i, do_nothing, NULL, 1);
+	for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+					i += cpu_tlb_thread_sibling_step())
+		if (cpumask_test_cpu(i, cpu_in_guest))
+			smp_call_function_single(i, do_nothing, NULL, 1);
 }
 
 static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
@@ -2867,8 +2868,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
 	 */
 	if (prev_cpu != pcpu) {
 		if (prev_cpu >= 0 &&
-		    cpu_first_thread_sibling(prev_cpu) !=
-		    cpu_first_thread_sibling(pcpu))
+		    cpu_first_tlb_thread_sibling(prev_cpu) !=
+		    cpu_first_tlb_thread_sibling(pcpu))
 			radix_flush_cpu(kvm, prev_cpu, vcpu);
 		if (nested)
 			nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 259492bb41531..be8ef1c5b1bfb 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -721,7 +721,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
 	 * Thus we make all 4 threads use the same bit.
 	 */
 	if (cpu_has_feature(CPU_FTR_ARCH_300))
-		pcpu = cpu_first_thread_sibling(pcpu);
+		pcpu = cpu_first_tlb_thread_sibling(pcpu);
 
 	if (nested)
 		need_tlb_flush = &nested->need_tlb_flush;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index f487ebb3a70aa..8b70de4595f01 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -71,7 +71,7 @@ static int global_invalidates(struct kvm *kvm)
 		 * so use the bit for the first thread to represent the core.
 		 */
 		if (cpu_has_feature(CPU_FTR_ARCH_300))
-			cpu = cpu_first_thread_sibling(cpu);
+			cpu = cpu_first_tlb_thread_sibling(cpu);
 		cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
 	}
 
-- 
GitLab


From b22afcdf04c96ca58327784e280e10288cfd3303 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 27 Mar 2021 22:01:36 +0100
Subject: [PATCH 3391/3804] cpu/hotplug: Cure the cpusets trainwreck

Alexey and Joshua tried to solve a cpusets related hotplug problem which is
user space visible and results in unexpected behaviour for some time after
a CPU has been plugged in and the corresponding uevent was delivered.

cpusets delegate the hotplug work (rebuilding cpumasks etc.) to a
workqueue. This is done because the cpusets code has already a lock
nesting of cgroups_mutex -> cpu_hotplug_lock. A synchronous callback or
waiting for the work to finish with cpu_hotplug_lock held can and will
deadlock because that results in the reverse lock order.

As a consequence the uevent can be delivered before cpusets have consistent
state which means that a user space invocation of sched_setaffinity() to
move a task to the plugged CPU fails up to the point where the scheduled
work has been processed.

The same is true for CPU unplug, but that does not create user observable
failure (yet).

It's still inconsistent to claim that an operation is finished before it
actually is and that's the real issue at hand. uevents just make it
reliably observable.

Obviously the problem should be fixed in cpusets/cgroups, but untangling
that is pretty much impossible because according to the changelog of the
commit which introduced this 8 years ago:

 3a5a6d0c2b03("cpuset: don't nest cgroup_mutex inside get_online_cpus()")

the lock order cgroups_mutex -> cpu_hotplug_lock is a design decision and
the whole code is built around that.

So bite the bullet and invoke the relevant cpuset function, which waits for
the work to finish, in _cpu_up/down() after dropping cpu_hotplug_lock and
only when tasks are not frozen by suspend/hibernate because that would
obviously wait forever.

Waiting there with cpu_add_remove_lock, which is protecting the present
and possible CPU maps, held is not a problem at all because neither work
queues nor cpusets/cgroups have any lockchains related to that lock.

Waiting in the hotplug machinery is not problematic either because there
are already state callbacks which wait for hardware queues to drain. It
makes the operations slightly slower, but hotplug is slow anyway.

This ensures that state is consistent before returning from a hotplug
up/down operation. It's still inconsistent during the operation, but that's
a different story.

Add a large comment which explains why this is done and why this is not a
dump ground for the hack of the day to work around half thought out locking
schemes. Document also the implications vs. hotplug operations and
serialization or the lack of it.

Thanks to Alexy and Joshua for analyzing why this temporary
sched_setaffinity() failure happened.

Fixes: 3a5a6d0c2b03("cpuset: don't nest cgroup_mutex inside get_online_cpus()")
Reported-by: Alexey Klimov <aklimov@redhat.com>
Reported-by: Joshua Baker <jobaker@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Alexey Klimov <aklimov@redhat.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/87tuowcnv3.ffs@nanos.tec.linutronix.de
---
 kernel/cpu.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index e538518556f47..d2e1692d7bdf8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -32,6 +32,7 @@
 #include <linux/relay.h>
 #include <linux/slab.h>
 #include <linux/percpu-rwsem.h>
+#include <linux/cpuset.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -873,6 +874,52 @@ void __init cpuhp_threads_init(void)
 	kthread_unpark(this_cpu_read(cpuhp_state.thread));
 }
 
+/*
+ *
+ * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
+ * protected region.
+ *
+ * The operation is still serialized against concurrent CPU hotplug via
+ * cpu_add_remove_lock, i.e. CPU map protection.  But it is _not_
+ * serialized against other hotplug related activity like adding or
+ * removing of state callbacks and state instances, which invoke either the
+ * startup or the teardown callback of the affected state.
+ *
+ * This is required for subsystems which are unfixable vs. CPU hotplug and
+ * evade lock inversion problems by scheduling work which has to be
+ * completed _before_ cpu_up()/_cpu_down() returns.
+ *
+ * Don't even think about adding anything to this for any new code or even
+ * drivers. It's only purpose is to keep existing lock order trainwrecks
+ * working.
+ *
+ * For cpu_down() there might be valid reasons to finish cleanups which are
+ * not required to be done under cpu_hotplug_lock, but that's a different
+ * story and would be not invoked via this.
+ */
+static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
+{
+	/*
+	 * cpusets delegate hotplug operations to a worker to "solve" the
+	 * lock order problems. Wait for the worker, but only if tasks are
+	 * _not_ frozen (suspend, hibernate) as that would wait forever.
+	 *
+	 * The wait is required because otherwise the hotplug operation
+	 * returns with inconsistent state, which could even be observed in
+	 * user space when a new CPU is brought up. The CPU plug uevent
+	 * would be delivered and user space reacting on it would fail to
+	 * move tasks to the newly plugged CPU up to the point where the
+	 * work has finished because up to that point the newly plugged CPU
+	 * is not assignable in cpusets/cgroups. On unplug that's not
+	 * necessarily a visible issue, but it is still inconsistent state,
+	 * which is the real problem which needs to be "fixed". This can't
+	 * prevent the transient state between scheduling the work and
+	 * returning from waiting for it.
+	 */
+	if (!tasks_frozen)
+		cpuset_wait_for_hotplug();
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 #ifndef arch_clear_mm_cpumask_cpu
 #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
@@ -1108,6 +1155,7 @@ out:
 	 */
 	lockup_detector_cleanup();
 	arch_smt_update();
+	cpu_up_down_serialize_trainwrecks(tasks_frozen);
 	return ret;
 }
 
@@ -1302,6 +1350,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 out:
 	cpus_write_unlock();
 	arch_smt_update();
+	cpu_up_down_serialize_trainwrecks(tasks_frozen);
 	return ret;
 }
 
-- 
GitLab


From fc66127dc3396338f287c3b494dfbf102547e770 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Fri, 11 Jun 2021 10:27:51 +0200
Subject: [PATCH 3392/3804] s390: fix system call restart with multiple signals

glibc complained with "The futex facility returned an unexpected error
code.". It turned out that the futex syscall returned -ERESTARTSYS because
a signal is pending. arch_do_signal_or_restart() restored the syscall
parameters (nameley regs->gprs[2]) and set PIF_SYSCALL_RESTART. When
another signal is made pending later in the exit loop
arch_do_signal_or_restart() is called again. This function clears
PIF_SYSCALL_RESTART and checks the return code which is set in
regs->gprs[2]. However, regs->gprs[2] was restored in the previous run
and no longer contains -ERESTARTSYS, so PIF_SYSCALL_RESTART isn't set
again and the syscall is skipped.

Fix this by not clearing PIF_SYSCALL_RESTART - it is already cleared in
__do_syscall() when the syscall is restarted.

Reported-by: Bjoern Walk <bwalk@linux.ibm.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Fixes: 56e62a737028 ("s390: convert to generic entry")
Cc: <stable@vger.kernel.org> # 5.12
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/signal.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 90163e6184f5c..080e7aed181f4 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -512,7 +512,6 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal)
 
 	/* No handlers present - check for system call restart */
 	clear_pt_regs_flag(regs, PIF_SYSCALL);
-	clear_pt_regs_flag(regs, PIF_SYSCALL_RESTART);
 	if (current->thread.system_call) {
 		regs->int_code = current->thread.system_call;
 		switch (regs->gprs[2]) {
-- 
GitLab


From ca1f4d702d534387aa1f16379edb3b03cdb6ceda Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Fri, 11 Jun 2021 16:08:18 +0200
Subject: [PATCH 3393/3804] s390: clear pt_regs::flags on irq entry

The current irq entry code doesn't initialize pt_regs::flags. On exit to
user mode arch_do_signal_or_restart() tests whether PIF_SYSCALL is set,
which might yield wrong results.

Fix this by clearing pt_regs::flags in the entry.S irq handler
code.

Reported-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Fixes: 56e62a737028 ("s390: convert to generic entry")
Cc: <stable@vger.kernel.org> # 5.12
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/entry.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 9cc71ca9a88f9..e84f495e7eb29 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -418,6 +418,7 @@ ENTRY(\name)
 	xgr	%r6,%r6
 	xgr	%r7,%r7
 	xgr	%r10,%r10
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
 	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
 	stmg	%r8,%r9,__PT_PSW(%r11)
 	tm	%r8,0x0001		# coming from user space?
-- 
GitLab


From 8c0795d2a0f50e2b131f5b2a8c2795939a94058e Mon Sep 17 00:00:00 2001
From: Tony Krowiak <akrowiak@linux.ibm.com>
Date: Wed, 9 Jun 2021 18:46:32 -0400
Subject: [PATCH 3394/3804] s390/vfio-ap: clean up mdev resources when remove
 callback invoked

The mdev remove callback for the vfio_ap device driver bails out with
-EBUSY if the mdev is in use by a KVM guest (i.e., the KVM pointer in the
struct ap_matrix_mdev is not NULL). The intended purpose was
to prevent the mdev from being removed while in use. There are two
problems with this scenario:

1. Returning a non-zero return code from the remove callback does not
   prevent the removal of the mdev.

2. The KVM pointer in the struct ap_matrix_mdev will always be NULL because
   the remove callback will not get invoked until the mdev fd is closed.
   When the mdev fd is closed, the mdev release callback is invoked and
   clears the KVM pointer from the struct ap_matrix_mdev.

Let's go ahead and remove the check for KVM in the remove callback and
allow the cleanup of mdev resources to proceed.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20210609224634.575156-2-akrowiak@linux.ibm.com
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index b2c7e10dfdcdc..122c85c224695 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -366,16 +366,6 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
 	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
 	mutex_lock(&matrix_dev->lock);
-
-	/*
-	 * If the KVM pointer is in flux or the guest is running, disallow
-	 * un-assignment of control domain.
-	 */
-	if (matrix_mdev->kvm_busy || matrix_mdev->kvm) {
-		mutex_unlock(&matrix_dev->lock);
-		return -EBUSY;
-	}
-
 	vfio_ap_mdev_reset_queues(mdev);
 	list_del(&matrix_mdev->node);
 	kfree(matrix_mdev);
-- 
GitLab


From 9e3d62d55bf455d4f9fdf2ede5c8756410c64102 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Tue, 15 Jun 2021 15:05:22 +0200
Subject: [PATCH 3395/3804] s390/topology: clear thread/group maps for offline
 cpus

The current code doesn't clear the thread/group maps for offline
CPUs. This may cause kernel crashes like the one bewlow in common
code that assumes if a CPU has sibblings it is online.

Unable to handle kernel pointer dereference in virtual kernel address space

Call Trace:
 [<000000013a4b8c3c>] blk_mq_map_swqueue+0x10c/0x388
([<000000013a4b8bcc>] blk_mq_map_swqueue+0x9c/0x388)
 [<000000013a4b9300>] blk_mq_init_allocated_queue+0x448/0x478
 [<000000013a4b9416>] blk_mq_init_queue+0x4e/0x90
 [<000003ff8019d3e6>] loop_add+0x106/0x278 [loop]
 [<000003ff801b8148>] loop_init+0x148/0x1000 [loop]
 [<0000000139de4924>] do_one_initcall+0x3c/0x1e0
 [<0000000139ef449a>] do_init_module+0x6a/0x2a0
 [<0000000139ef61bc>] __do_sys_finit_module+0xa4/0xc0
 [<0000000139de9e6e>] do_syscall+0x7e/0xd0
 [<000000013a8e0aec>] __do_syscall+0xbc/0x110
 [<000000013a8ee2e8>] system_call+0x78/0xa0

Fixes: 52aeda7accb6 ("s390/topology: remove offline CPUs from CPU topology masks")
Cc: <stable@kernel.org> # 5.7+
Reported-by: Marius Hillenbrand <mhillen@linux.ibm.com>
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/topology.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index bfcc327acc6b2..26aa2614ee352 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -66,7 +66,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
 {
 	static cpumask_t mask;
 
-	cpumask_copy(&mask, cpumask_of(cpu));
+	cpumask_clear(&mask);
+	if (!cpu_online(cpu))
+		goto out;
+	cpumask_set_cpu(cpu, &mask);
 	switch (topology_mode) {
 	case TOPOLOGY_MODE_HW:
 		while (info) {
@@ -83,10 +86,10 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
 	default:
 		fallthrough;
 	case TOPOLOGY_MODE_SINGLE:
-		cpumask_copy(&mask, cpumask_of(cpu));
 		break;
 	}
 	cpumask_and(&mask, &mask, cpu_online_mask);
+out:
 	cpumask_copy(dst, &mask);
 }
 
@@ -95,7 +98,10 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
 	static cpumask_t mask;
 	int i;
 
-	cpumask_copy(&mask, cpumask_of(cpu));
+	cpumask_clear(&mask);
+	if (!cpu_online(cpu))
+		goto out;
+	cpumask_set_cpu(cpu, &mask);
 	if (topology_mode != TOPOLOGY_MODE_HW)
 		goto out;
 	cpu -= cpu % (smp_cpu_mtid + 1);
-- 
GitLab


From 67147e96a332b56c7206238162771d82467f86c0 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Fri, 18 Jun 2021 16:58:47 +0200
Subject: [PATCH 3396/3804] s390/stack: fix possible register corruption with
 stack switch helper

The CALL_ON_STACK macro is used to call a C function from inline
assembly, and therefore must consider the C ABI, which says that only
registers 6-13, and 15 are non-volatile (restored by the called
function).

The inline assembly incorrectly marks all registers used to pass
parameters to the called function as read-only input operands, instead
of operands that are read and written to. This might result in
register corruption depending on usage, compiler, and compile options.

Fix this by marking all operands used to pass parameters as read/write
operands. To keep the code simple even register 6, if used, is marked
as read-write operand.

Fixes: ff340d2472ec ("s390: add stack switch helper")
Cc: <stable@kernel.org> # 4.20
Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/stacktrace.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 2b543163d90a0..76c6034428be8 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -91,12 +91,16 @@ struct stack_frame {
 	CALL_ARGS_4(arg1, arg2, arg3, arg4);				\
 	register unsigned long r4 asm("6") = (unsigned long)(arg5)
 
-#define CALL_FMT_0 "=&d" (r2) :
-#define CALL_FMT_1 "+&d" (r2) :
-#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
-#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
-#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
-#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
+/*
+ * To keep this simple mark register 2-6 as being changed (volatile)
+ * by the called function, even though register 6 is saved/nonvolatile.
+ */
+#define CALL_FMT_0 "=&d" (r2)
+#define CALL_FMT_1 "+&d" (r2)
+#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3)
+#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4)
+#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5)
+#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6)
 
 #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
 #define CALL_CLOBBER_4 CALL_CLOBBER_5
@@ -118,7 +122,7 @@ struct stack_frame {
 		"	brasl	14,%[_fn]\n"				\
 		"	la	15,0(%[_prev])\n"			\
 		: [_prev] "=&a" (prev), CALL_FMT_##nr			\
-		  [_stack] "R" (stack),					\
+		: [_stack] "R" (stack),					\
 		  [_bc] "i" (offsetof(struct stack_frame, back_chain)),	\
 		  [_frame] "d" (frame),					\
 		  [_fn] "X" (fn) : CALL_CLOBBER_##nr);			\
-- 
GitLab


From 4249cb7d920060dfa925d3b9f6a37f0a7c025a16 Mon Sep 17 00:00:00 2001
From: Huilong Deng <denghuilong@cdjrlc.com>
Date: Sun, 20 Jun 2021 22:29:15 +0800
Subject: [PATCH 3397/3804] printk: Remove trailing semicolon in macros

Macros should not use a trailing semicolon.

Signed-off-by: Huilong Deng <denghuilong@cdjrlc.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/dev_printk.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dev_printk.h b/include/linux/dev_printk.h
index 6f009559ee540..82d3d46005a14 100644
--- a/include/linux/dev_printk.h
+++ b/include/linux/dev_printk.h
@@ -236,7 +236,7 @@ do {									\
  * using WARN/WARN_ONCE to include file/line information and a backtrace.
  */
 #define dev_WARN(dev, format, arg...) \
-	WARN(1, "%s %s: " format, dev_driver_string(dev), dev_name(dev), ## arg);
+	WARN(1, "%s %s: " format, dev_driver_string(dev), dev_name(dev), ## arg)
 
 #define dev_WARN_ONCE(dev, condition, format, arg...) \
 	WARN_ONCE(condition, "%s %s: " format, \
-- 
GitLab


From 69bb0585ebb0c48c93fc55fc27afbfc06adef2fd Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 18 Jun 2021 16:11:22 +0100
Subject: [PATCH 3398/3804] arm64: insn: avoid circular include dependency

Nathan reports that when building with CONFIG_LTO_CLANG_THIN=y, the
build fails due to BUILD_BUG_ON() not being defined before its uss in
<asm/insn.h>.

The problem is that with LTO, we patch READ_ONCE(), and <asm/rwonce.h>
includes <asm/insn.h>, creating a circular include chain:

        <linux/build_bug.h>
        <linux/compiler.h>
        <asm/rwonce.h>
        <asm/alternative-macros.h>
        <asm/insn.h>
        <linux/build-bug.h>

... and so when <asm/insn.h> includes <linux/build_bug.h>, none of the
BUILD_BUG* definitions have happened yet.

To avoid this, let's move AARCH64_INSN_SIZE into a header without any
dependencies, such that it can always be safely included. At the same
time, avoid including <asm/alternative.h> in <asm/insn.h>, which should
no longer be necessary (and doesn't make sense when insn.h is consumed
by userspace).

Reported-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20210621080830.GA37068@C02TD0UTHF1T.local
Fixes: 3e00e39d9dad ("arm64: insn: move AARCH64_INSN_SIZE into <asm/insn.h>")
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/alternative-macros.h | 2 +-
 arch/arm64/include/asm/insn-def.h           | 9 +++++++++
 arch/arm64/include/asm/insn.h               | 5 +----
 3 files changed, 11 insertions(+), 5 deletions(-)
 create mode 100644 arch/arm64/include/asm/insn-def.h

diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h
index 703fbf310b792..eba3173a2a2cd 100644
--- a/arch/arm64/include/asm/alternative-macros.h
+++ b/arch/arm64/include/asm/alternative-macros.h
@@ -3,7 +3,7 @@
 #define __ASM_ALTERNATIVE_MACROS_H
 
 #include <asm/cpucaps.h>
-#include <asm/insn.h>
+#include <asm/insn-def.h>
 
 #define ARM64_CB_PATCH ARM64_NCAPS
 
diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h
new file mode 100644
index 0000000000000..2c075f615c6ac
--- /dev/null
+++ b/arch/arm64/include/asm/insn-def.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_INSN_DEF_H
+#define __ASM_INSN_DEF_H
+
+/* A64 instructions are always 32 bits. */
+#define	AARCH64_INSN_SIZE		4
+
+#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 1430b4973039a..6b776c8667b20 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -10,10 +10,7 @@
 #include <linux/build_bug.h>
 #include <linux/types.h>
 
-#include <asm/alternative.h>
-
-/* A64 instructions are always 32 bits. */
-#define	AARCH64_INSN_SIZE		4
+#include <asm/insn-def.h>
 
 #ifndef __ASSEMBLY__
 /*
-- 
GitLab


From 61eb1b24f9e4f4e0725aa5f8164a932c933f3339 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sun, 20 Jun 2021 21:27:15 +0800
Subject: [PATCH 3399/3804] regulator: hi655x: Fix pass wrong pointer to
 config.driver_data

Current code sets config.driver_data to a zero initialized regulator
which is obviously wrong. Fix it.

Fixes: 4618119b9be5 ("regulator: hi655x: enable regulator for hi655x PMIC")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210620132715.60215-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/hi655x-regulator.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/regulator/hi655x-regulator.c b/drivers/regulator/hi655x-regulator.c
index 68cdb173196d6..556bb73f33292 100644
--- a/drivers/regulator/hi655x-regulator.c
+++ b/drivers/regulator/hi655x-regulator.c
@@ -72,7 +72,7 @@ enum hi655x_regulator_id {
 static int hi655x_is_enabled(struct regulator_dev *rdev)
 {
 	unsigned int value = 0;
-	struct hi655x_regulator *regulator = rdev_get_drvdata(rdev);
+	const struct hi655x_regulator *regulator = rdev_get_drvdata(rdev);
 
 	regmap_read(rdev->regmap, regulator->status_reg, &value);
 	return (value & rdev->desc->enable_mask);
@@ -80,7 +80,7 @@ static int hi655x_is_enabled(struct regulator_dev *rdev)
 
 static int hi655x_disable(struct regulator_dev *rdev)
 {
-	struct hi655x_regulator *regulator = rdev_get_drvdata(rdev);
+	const struct hi655x_regulator *regulator = rdev_get_drvdata(rdev);
 
 	return regmap_write(rdev->regmap, regulator->disable_reg,
 			    rdev->desc->enable_mask);
@@ -169,7 +169,6 @@ static const struct hi655x_regulator regulators[] = {
 static int hi655x_regulator_probe(struct platform_device *pdev)
 {
 	unsigned int i;
-	struct hi655x_regulator *regulator;
 	struct hi655x_pmic *pmic;
 	struct regulator_config config = { };
 	struct regulator_dev *rdev;
@@ -180,22 +179,17 @@ static int hi655x_regulator_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	regulator = devm_kzalloc(&pdev->dev, sizeof(*regulator), GFP_KERNEL);
-	if (!regulator)
-		return -ENOMEM;
-
-	platform_set_drvdata(pdev, regulator);
-
 	config.dev = pdev->dev.parent;
 	config.regmap = pmic->regmap;
-	config.driver_data = regulator;
 	for (i = 0; i < ARRAY_SIZE(regulators); i++) {
+		config.driver_data = (void *) &regulators[i];
+
 		rdev = devm_regulator_register(&pdev->dev,
 					       &regulators[i].rdesc,
 					       &config);
 		if (IS_ERR(rdev)) {
 			dev_err(&pdev->dev, "failed to register regulator %s\n",
-				regulator->rdesc.name);
+				regulators[i].rdesc.name);
 			return PTR_ERR(rdev);
 		}
 	}
-- 
GitLab


From ba5dabf40e9143ff6c48943b76a532d5ab34d0e8 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Thu, 17 Jun 2021 10:47:10 +0530
Subject: [PATCH 3400/3804] regulator: qcom-rpmh: Cleanup terminator line
 commas

Cleanup the qcom-rpmh regulator driver to remove comma(s)
at the end of the terminator line(s).

Cc: Mark Brown <broonie@kernel.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Link: https://lore.kernel.org/r/20210617051712.345372-4-bhupesh.sharma@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom-rpmh-regulator.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index 22fec370fa610..6a8b7ac67bbe0 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -811,12 +811,12 @@ static const struct rpmh_vreg_init_data pm8998_vreg_data[] = {
 	RPMH_VREG("ldo28",  "ldo%s28", &pmic4_pldo,      "vdd-l16-l28"),
 	RPMH_VREG("lvs1",   "vs%s1",   &pmic4_lvs,       "vin-lvs-1-2"),
 	RPMH_VREG("lvs2",   "vs%s2",   &pmic4_lvs,       "vin-lvs-1-2"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pmi8998_vreg_data[] = {
 	RPMH_VREG("bob",    "bob%s1",  &pmic4_bob,       "vdd-bob"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm8005_vreg_data[] = {
@@ -824,7 +824,7 @@ static const struct rpmh_vreg_init_data pm8005_vreg_data[] = {
 	RPMH_VREG("smps2",  "smp%s2",  &pmic4_ftsmps426, "vdd-s2"),
 	RPMH_VREG("smps3",  "smp%s3",  &pmic4_ftsmps426, "vdd-s3"),
 	RPMH_VREG("smps4",  "smp%s4",  &pmic4_ftsmps426, "vdd-s4"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm8150_vreg_data[] = {
@@ -856,7 +856,7 @@ static const struct rpmh_vreg_init_data pm8150_vreg_data[] = {
 	RPMH_VREG("ldo16",  "ldo%s16", &pmic5_pldo,      "vdd-l13-l16-l17"),
 	RPMH_VREG("ldo17",  "ldo%s17", &pmic5_pldo,      "vdd-l13-l16-l17"),
 	RPMH_VREG("ldo18",  "ldo%s18", &pmic5_nldo,      "vdd-l3-l4-l5-l18"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm8150l_vreg_data[] = {
@@ -880,7 +880,7 @@ static const struct rpmh_vreg_init_data pm8150l_vreg_data[] = {
 	RPMH_VREG("ldo10",  "ldo%s10", &pmic5_pldo,      "vdd-l9-l10"),
 	RPMH_VREG("ldo11",  "ldo%s11", &pmic5_pldo,      "vdd-l7-l11"),
 	RPMH_VREG("bob",    "bob%s1",  &pmic5_bob,       "vdd-bob"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm8350_vreg_data[] = {
@@ -906,7 +906,7 @@ static const struct rpmh_vreg_init_data pm8350_vreg_data[] = {
 	RPMH_VREG("ldo8",   "ldo%s8",  &pmic5_nldo,      "vdd-l8"),
 	RPMH_VREG("ldo9",   "ldo%s9",  &pmic5_nldo,      "vdd-l6-l9-l10"),
 	RPMH_VREG("ldo10",  "ldo%s10", &pmic5_nldo,      "vdd-l6-l9-l10"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm8350c_vreg_data[] = {
@@ -934,7 +934,7 @@ static const struct rpmh_vreg_init_data pm8350c_vreg_data[] = {
 	RPMH_VREG("ldo12",  "ldo%s12", &pmic5_pldo_lv,   "vdd-l1-l12"),
 	RPMH_VREG("ldo13",  "ldo%s13", &pmic5_pldo,      "vdd-l3-l4-l5-l7-l13"),
 	RPMH_VREG("bob",    "bob%s1",  &pmic5_bob,       "vdd-bob"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm8009_vreg_data[] = {
@@ -947,7 +947,7 @@ static const struct rpmh_vreg_init_data pm8009_vreg_data[] = {
 	RPMH_VREG("ldo5",   "ldo%s5",  &pmic5_pldo,      "vdd-l5-l6"),
 	RPMH_VREG("ldo6",   "ldo%s6",  &pmic5_pldo,      "vdd-l5-l6"),
 	RPMH_VREG("ldo7",   "ldo%s7",  &pmic5_pldo_lv,   "vdd-l7"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm8009_1_vreg_data[] = {
@@ -960,7 +960,7 @@ static const struct rpmh_vreg_init_data pm8009_1_vreg_data[] = {
 	RPMH_VREG("ldo5",   "ldo%s5",  &pmic5_pldo,      "vdd-l5-l6"),
 	RPMH_VREG("ldo6",   "ldo%s6",  &pmic5_pldo,      "vdd-l5-l6"),
 	RPMH_VREG("ldo7",   "ldo%s6",  &pmic5_pldo_lv,   "vdd-l7"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm6150_vreg_data[] = {
@@ -988,7 +988,7 @@ static const struct rpmh_vreg_init_data pm6150_vreg_data[] = {
 	RPMH_VREG("ldo17",  "ldo%s17", &pmic5_pldo,   "vdd-l5-l16-l17-l18-l19"),
 	RPMH_VREG("ldo18",  "ldo%s18", &pmic5_pldo,   "vdd-l5-l16-l17-l18-l19"),
 	RPMH_VREG("ldo19",  "ldo%s19", &pmic5_pldo,   "vdd-l5-l16-l17-l18-l19"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm6150l_vreg_data[] = {
@@ -1012,7 +1012,7 @@ static const struct rpmh_vreg_init_data pm6150l_vreg_data[] = {
 	RPMH_VREG("ldo10",  "ldo%s10", &pmic5_pldo,      "vdd-l9-l10"),
 	RPMH_VREG("ldo11",  "ldo%s11", &pmic5_pldo,      "vdd-l7-l11"),
 	RPMH_VREG("bob",    "bob%s1",  &pmic5_bob,       "vdd-bob"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pmx55_vreg_data[] = {
@@ -1039,7 +1039,7 @@ static const struct rpmh_vreg_init_data pmx55_vreg_data[] = {
 	RPMH_VREG("ldo14",   "ldo%s14",   &pmic5_nldo,      "vdd-l14"),
 	RPMH_VREG("ldo15",   "ldo%s15",   &pmic5_nldo,      "vdd-l15"),
 	RPMH_VREG("ldo16",   "ldo%s16",   &pmic5_pldo,      "vdd-l16"),
-	{},
+	{}
 };
 
 static const struct rpmh_vreg_init_data pm7325_vreg_data[] = {
-- 
GitLab


From f26cdadad729743888eb4ac2c17eac3cf845b493 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Thu, 17 Jun 2021 10:47:11 +0530
Subject: [PATCH 3401/3804] regulator: qcom-rpmh: Add terminator at the end of
 pm7325x_vreg_data[] array

Add missing terminator(s) at the end of pm7325x_vreg_data[]
array instances.

Fixes: c4e5aa3dbee5 ("regulator: qcom-rpmh: Add PM7325/PMR735A regulator support")
Cc: Mark Brown <broonie@kernel.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Link: https://lore.kernel.org/r/20210617051712.345372-5-bhupesh.sharma@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom-rpmh-regulator.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index 6a8b7ac67bbe0..af41a517da991 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -1070,6 +1070,7 @@ static const struct rpmh_vreg_init_data pm7325_vreg_data[] = {
 	RPMH_VREG("ldo17",  "ldo%s17", &pmic5_pldo_lv,   "vdd-l11-l17-l18-l19"),
 	RPMH_VREG("ldo18",  "ldo%s18", &pmic5_pldo_lv,   "vdd-l11-l17-l18-l19"),
 	RPMH_VREG("ldo19",  "ldo%s19", &pmic5_pldo_lv,   "vdd-l11-l17-l18-l19"),
+	{}
 };
 
 static const struct rpmh_vreg_init_data pmr735a_vreg_data[] = {
@@ -1083,6 +1084,7 @@ static const struct rpmh_vreg_init_data pmr735a_vreg_data[] = {
 	RPMH_VREG("ldo5",   "ldo%s5",  &pmic5_nldo,      "vdd-l5-l6"),
 	RPMH_VREG("ldo6",   "ldo%s6",  &pmic5_nldo,      "vdd-l5-l6"),
 	RPMH_VREG("ldo7",   "ldo%s7",  &pmic5_pldo,      "vdd-l7-bob"),
+	{}
 };
 
 static int rpmh_regulator_probe(struct platform_device *pdev)
-- 
GitLab


From 9a336ed97d00bb69547272fc7d0439802bece375 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Thu, 17 Jun 2021 10:47:12 +0530
Subject: [PATCH 3402/3804] regulator: qcom-rpmh: Add new regulator found on
 SA8155p adp board

SA8155p-adp board supports a new regulator - pmm8155au.

The output power management circuits in this regulator include:
- FTS510 smps,
- HFS510 smps, and
- LDO510 linear regulators

Add support for the same.

Cc: Mark Brown <broonie@kernel.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210617051712.345372-6-bhupesh.sharma@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/qcom-rpmh-regulator.c | 36 +++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index af41a517da991..6cca910a76ded 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -883,6 +883,38 @@ static const struct rpmh_vreg_init_data pm8150l_vreg_data[] = {
 	{}
 };
 
+static const struct rpmh_vreg_init_data pmm8155au_vreg_data[] = {
+	RPMH_VREG("smps1",  "smp%s1",  &pmic5_ftsmps510, "vdd-s1"),
+	RPMH_VREG("smps2",  "smp%s2",  &pmic5_ftsmps510, "vdd-s2"),
+	RPMH_VREG("smps3",  "smp%s3",  &pmic5_ftsmps510, "vdd-s3"),
+	RPMH_VREG("smps4",  "smp%s4",  &pmic5_hfsmps510, "vdd-s4"),
+	RPMH_VREG("smps5",  "smp%s5",  &pmic5_hfsmps510, "vdd-s5"),
+	RPMH_VREG("smps6",  "smp%s6",  &pmic5_ftsmps510, "vdd-s6"),
+	RPMH_VREG("smps7",  "smp%s7",  &pmic5_ftsmps510, "vdd-s7"),
+	RPMH_VREG("smps8",  "smp%s8",  &pmic5_ftsmps510, "vdd-s8"),
+	RPMH_VREG("smps9",  "smp%s9",  &pmic5_ftsmps510, "vdd-s9"),
+	RPMH_VREG("smps10", "smp%s10", &pmic5_ftsmps510, "vdd-s10"),
+	RPMH_VREG("ldo1",   "ldo%s1",  &pmic5_nldo,      "vdd-l1-l8-l11"),
+	RPMH_VREG("ldo2",   "ldo%s2",  &pmic5_pldo,      "vdd-l2-l10"),
+	RPMH_VREG("ldo3",   "ldo%s3",  &pmic5_nldo,      "vdd-l3-l4-l5-l18"),
+	RPMH_VREG("ldo4",   "ldo%s4",  &pmic5_nldo,      "vdd-l3-l4-l5-l18"),
+	RPMH_VREG("ldo5",   "ldo%s5",  &pmic5_nldo,      "vdd-l3-l4-l5-l18"),
+	RPMH_VREG("ldo6",   "ldo%s6",  &pmic5_nldo,      "vdd-l6-l9"),
+	RPMH_VREG("ldo7",   "ldo%s7",  &pmic5_pldo_lv,   "vdd-l7-l12-l14-l15"),
+	RPMH_VREG("ldo8",   "ldo%s8",  &pmic5_nldo,      "vdd-l1-l8-l11"),
+	RPMH_VREG("ldo9",   "ldo%s9",  &pmic5_nldo,      "vdd-l6-l9"),
+	RPMH_VREG("ldo10",  "ldo%s10", &pmic5_pldo,      "vdd-l2-l10"),
+	RPMH_VREG("ldo11",  "ldo%s11", &pmic5_nldo,      "vdd-l1-l8-l11"),
+	RPMH_VREG("ldo12",  "ldo%s12", &pmic5_pldo_lv,   "vdd-l7-l12-l14-l15"),
+	RPMH_VREG("ldo13",  "ldo%s13", &pmic5_pldo,      "vdd-l13-l16-l17"),
+	RPMH_VREG("ldo14",  "ldo%s14", &pmic5_pldo_lv,   "vdd-l7-l12-l14-l15"),
+	RPMH_VREG("ldo15",  "ldo%s15", &pmic5_pldo_lv,   "vdd-l7-l12-l14-l15"),
+	RPMH_VREG("ldo16",  "ldo%s16", &pmic5_pldo,      "vdd-l13-l16-l17"),
+	RPMH_VREG("ldo17",  "ldo%s17", &pmic5_pldo,      "vdd-l13-l16-l17"),
+	RPMH_VREG("ldo18",  "ldo%s18", &pmic5_nldo,      "vdd-l3-l4-l5-l18"),
+	{}
+};
+
 static const struct rpmh_vreg_init_data pm8350_vreg_data[] = {
 	RPMH_VREG("smps1",  "smp%s1",  &pmic5_ftsmps510, "vdd-s1"),
 	RPMH_VREG("smps2",  "smp%s2",  &pmic5_ftsmps510, "vdd-s2"),
@@ -1177,6 +1209,10 @@ static const struct of_device_id __maybe_unused rpmh_regulator_match_table[] = {
 		.compatible = "qcom,pmc8180c-rpmh-regulators",
 		.data = pm8150l_vreg_data,
 	},
+	{
+		.compatible = "qcom,pmm8155au-rpmh-regulators",
+		.data = pmm8155au_vreg_data,
+	},
 	{
 		.compatible = "qcom,pmx55-rpmh-regulators",
 		.data = pmx55_vreg_data,
-- 
GitLab


From 85adaac269c36d8e2e0a5de87a1dc4ec06e984f1 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Thu, 17 Jun 2021 10:47:08 +0530
Subject: [PATCH 3403/3804] regulator: qcom,rpmh-regulator: Arrange compatibles
 alphabetically

Arrange the compatibles inside qcom-rpmh regulator device tree
bindings alphabetically.

Cc: Mark Brown <broonie@kernel.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210617051712.345372-2-bhupesh.sharma@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/qcom,rpmh-regulator.yaml  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
index e561a5b941e46..3546c6a966a33 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
@@ -33,6 +33,9 @@ description: |
 
     The names used for regulator nodes must match those supported by a given
     PMIC. Supported regulator node names are
+      For PM6150, smps1 - smps5, ldo1 - ldo19
+      For PM6150L, smps1 - smps8, ldo1 - ldo11, bob
+      For PM7325, smps1 - smps8, ldo1 - ldo19
       For PM8005, smps1 - smps4
       For PM8009, smps1 - smps2, ldo1 - ldo7
       For PM8150, smps1 - smps10, ldo1 - ldo18
@@ -41,15 +44,15 @@ description: |
       For PM8350C, smps1 - smps10, ldo1 - ldo13, bob
       For PM8998, smps1 - smps13, ldo1 - ldo28, lvs1 - lvs2
       For PMI8998, bob
-      For PM6150, smps1 - smps5, ldo1 - ldo19
-      For PM6150L, smps1 - smps8, ldo1 - ldo11, bob
-      For PMX55, smps1 - smps7, ldo1 - ldo16
-      For PM7325, smps1 - smps8, ldo1 - ldo19
       For PMR735A, smps1 - smps3, ldo1 - ldo7
+      For PMX55, smps1 - smps7, ldo1 - ldo16
 
 properties:
   compatible:
     enum:
+      - qcom,pm6150-rpmh-regulators
+      - qcom,pm6150l-rpmh-regulators
+      - qcom,pm7325-rpmh-regulators
       - qcom,pm8005-rpmh-regulators
       - qcom,pm8009-rpmh-regulators
       - qcom,pm8009-1-rpmh-regulators
@@ -59,11 +62,8 @@ properties:
       - qcom,pm8350c-rpmh-regulators
       - qcom,pm8998-rpmh-regulators
       - qcom,pmi8998-rpmh-regulators
-      - qcom,pm6150-rpmh-regulators
-      - qcom,pm6150l-rpmh-regulators
-      - qcom,pmx55-rpmh-regulators
-      - qcom,pm7325-rpmh-regulators
       - qcom,pmr735a-rpmh-regulators
+      - qcom,pmx55-rpmh-regulators
 
   qcom,pmic-id:
     description: |
-- 
GitLab


From 66376e152303bb60d6a75328b7bc998de86f8c08 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Thu, 17 Jun 2021 10:47:09 +0530
Subject: [PATCH 3404/3804] regulator: qcom,rpmh-regulator: Add compatible for
 SA8155p-adp board pmic

Add compatible string for pmm8155au pmic found on
the SA8155p-adp board.

Cc: Mark Brown <broonie@kernel.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210617051712.345372-3-bhupesh.sharma@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/qcom,rpmh-regulator.yaml       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
index 3546c6a966a33..34de38377aa66 100644
--- a/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/qcom,rpmh-regulator.yaml
@@ -62,6 +62,7 @@ properties:
       - qcom,pm8350c-rpmh-regulators
       - qcom,pm8998-rpmh-regulators
       - qcom,pmi8998-rpmh-regulators
+      - qcom,pmm8155au-rpmh-regulators
       - qcom,pmr735a-rpmh-regulators
       - qcom,pmx55-rpmh-regulators
 
-- 
GitLab


From ae60e6a9d24e89a74e2512204ad04de94921bdd2 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 19 Jun 2021 20:41:33 +0800
Subject: [PATCH 3405/3804] regulator: hi6421: Use correct variable type for
 regmap api val argument

Use unsigned int instead of u32 for regmap_read/regmap_update_bits val
argument.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210619124133.4096683-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/hi6421-regulator.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/regulator/hi6421-regulator.c b/drivers/regulator/hi6421-regulator.c
index dc631c1a46b4c..bff8c515dcde7 100644
--- a/drivers/regulator/hi6421-regulator.c
+++ b/drivers/regulator/hi6421-regulator.c
@@ -386,7 +386,7 @@ static int hi6421_regulator_enable(struct regulator_dev *rdev)
 static unsigned int hi6421_regulator_ldo_get_mode(struct regulator_dev *rdev)
 {
 	struct hi6421_regulator_info *info = rdev_get_drvdata(rdev);
-	u32 reg_val;
+	unsigned int reg_val;
 
 	regmap_read(rdev->regmap, rdev->desc->enable_reg, &reg_val);
 	if (reg_val & info->mode_mask)
@@ -398,7 +398,7 @@ static unsigned int hi6421_regulator_ldo_get_mode(struct regulator_dev *rdev)
 static unsigned int hi6421_regulator_buck_get_mode(struct regulator_dev *rdev)
 {
 	struct hi6421_regulator_info *info = rdev_get_drvdata(rdev);
-	u32 reg_val;
+	unsigned int reg_val;
 
 	regmap_read(rdev->regmap, rdev->desc->enable_reg, &reg_val);
 	if (reg_val & info->mode_mask)
@@ -411,7 +411,7 @@ static int hi6421_regulator_ldo_set_mode(struct regulator_dev *rdev,
 						unsigned int mode)
 {
 	struct hi6421_regulator_info *info = rdev_get_drvdata(rdev);
-	u32 new_mode;
+	unsigned int new_mode;
 
 	switch (mode) {
 	case REGULATOR_MODE_NORMAL:
@@ -435,7 +435,7 @@ static int hi6421_regulator_buck_set_mode(struct regulator_dev *rdev,
 						unsigned int mode)
 {
 	struct hi6421_regulator_info *info = rdev_get_drvdata(rdev);
-	u32 new_mode;
+	unsigned int new_mode;
 
 	switch (mode) {
 	case REGULATOR_MODE_NORMAL:
-- 
GitLab


From d83f778c627ad4e80bd82dbc88ffa1b1b18876bb Mon Sep 17 00:00:00 2001
From: Sergey Larin <cerg2010cerg2010@mail.ru>
Date: Fri, 18 Jun 2021 17:16:06 +0300
Subject: [PATCH 3406/3804] regulator: max8893: add regulator driver

MAX8893 is a simple regulator which can be found on some of Sasmsung
phones.

Signed-off-by: Sergey Larin <cerg2010cerg2010@mail.ru>
Link: https://lore.kernel.org/r/20210618141607.884-1-cerg2010cerg2010@mail.ru
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig   |   7 ++
 drivers/regulator/Makefile  |   1 +
 drivers/regulator/max8893.c | 183 ++++++++++++++++++++++++++++++++++++
 3 files changed, 191 insertions(+)
 create mode 100644 drivers/regulator/max8893.c

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index a69b546872c1b..7c39570b99b09 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -578,6 +578,13 @@ config REGULATOR_MAX8660
 	  This driver controls a Maxim 8660/8661 voltage output
 	  regulator via I2C bus.
 
+config REGULATOR_MAX8893
+	tristate "Maxim 8893 voltage regulator"
+	depends on I2C
+	help
+	  This driver controls a Maxim 8893 voltage output
+	  regulator via I2C bus.
+
 config REGULATOR_MAX8907
 	tristate "Maxim 8907 voltage regulator"
 	depends on MFD_MAX8907 || COMPILE_TEST
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 028f2b8788db2..2f072544285ad 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_REGULATOR_MAX77620) += max77620-regulator.o
 obj-$(CONFIG_REGULATOR_MAX77650) += max77650-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8649)	+= max8649.o
 obj-$(CONFIG_REGULATOR_MAX8660) += max8660.o
+obj-$(CONFIG_REGULATOR_MAX8893) += max8893.o
 obj-$(CONFIG_REGULATOR_MAX8907) += max8907-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8925) += max8925-regulator.o
 obj-$(CONFIG_REGULATOR_MAX8952) += max8952.o
diff --git a/drivers/regulator/max8893.c b/drivers/regulator/max8893.c
new file mode 100644
index 0000000000000..1519bf760da7f
--- /dev/null
+++ b/drivers/regulator/max8893.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+
+static const struct regulator_ops max8893_ops = {
+	.is_enabled		= regulator_is_enabled_regmap,
+	.enable			= regulator_enable_regmap,
+	.disable		= regulator_disable_regmap,
+	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
+	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
+	.list_voltage		= regulator_list_voltage_linear,
+	.map_voltage		= regulator_map_voltage_linear,
+};
+
+static const struct regulator_desc max8893_regulators[] = {
+	{
+		.name = "BUCK",
+		.supply_name = "in-buck",
+		.of_match = of_match_ptr("buck"),
+		.regulators_node = of_match_ptr("regulators"),
+		.n_voltages = 0x11,
+		.id = 6,
+		.ops = &max8893_ops,
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+		.min_uV = 800000,
+		.uV_step = 100000,
+		.vsel_reg = 0x4,
+		.vsel_mask = 0x1f,
+		.enable_reg = 0x0,
+		.enable_mask = BIT(7),
+	},
+	{
+		.name = "LDO1",
+		.supply_name = "in-ldo1",
+		.of_match = of_match_ptr("ldo1"),
+		.regulators_node = of_match_ptr("regulators"),
+		.n_voltages = 0x12,
+		.id = 1,
+		.ops = &max8893_ops,
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+		.min_uV = 1600000,
+		.uV_step = 100000,
+		.vsel_reg = 0x5,
+		.vsel_mask = 0x1f,
+		.enable_reg = 0x0,
+		.enable_mask = BIT(5),
+	},
+	{
+		.name = "LDO2",
+		.supply_name = "in-ldo2",
+		.of_match = of_match_ptr("ldo2"),
+		.regulators_node = of_match_ptr("regulators"),
+		.n_voltages = 0x16,
+		.id = 2,
+		.ops = &max8893_ops,
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+		.min_uV = 1200000,
+		.uV_step = 100000,
+		.vsel_reg = 0x6,
+		.vsel_mask = 0x1f,
+		.enable_reg = 0x0,
+		.enable_mask = BIT(4),
+	},
+	{
+		.name = "LDO3",
+		.supply_name = "in-ldo3",
+		.of_match = of_match_ptr("ldo3"),
+		.regulators_node = of_match_ptr("regulators"),
+		.n_voltages = 0x12,
+		.id = 3,
+		.ops = &max8893_ops,
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+		.min_uV = 1600000,
+		.uV_step = 100000,
+		.vsel_reg = 0x7,
+		.vsel_mask = 0x1f,
+		.enable_reg = 0x0,
+		.enable_mask = BIT(3),
+	},
+	{
+		.name = "LDO4",
+		.supply_name = "in-ldo4",
+		.of_match = of_match_ptr("ldo4"),
+		.regulators_node = of_match_ptr("regulators"),
+		.n_voltages = 0x1a,
+		.id = 4,
+		.ops = &max8893_ops,
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+		.min_uV = 800000,
+		.uV_step = 100000,
+		.vsel_reg = 0x8,
+		.vsel_mask = 0x1f,
+		.enable_reg = 0x0,
+		.enable_mask = BIT(2),
+	},
+	{
+		.name = "LDO5",
+		.supply_name = "in-ldo5",
+		.of_match = of_match_ptr("ldo5"),
+		.regulators_node = of_match_ptr("regulators"),
+		.n_voltages = 0x1a,
+		.id = 5,
+		.ops = &max8893_ops,
+		.type = REGULATOR_VOLTAGE,
+		.owner = THIS_MODULE,
+		.min_uV = 800000,
+		.uV_step = 100000,
+		.vsel_reg = 0x9,
+		.vsel_mask = 0x1f,
+		.enable_reg = 0x0,
+		.enable_mask = BIT(1),
+	}
+};
+
+static const struct regmap_config max8893_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
+
+static int max8893_probe_new(struct i2c_client *i2c)
+{
+	int id, ret;
+	struct regulator_config config = {.dev = &i2c->dev};
+	struct regmap *regmap = devm_regmap_init_i2c(i2c, &max8893_regmap);
+
+	if (IS_ERR(regmap)) {
+		ret = PTR_ERR(regmap);
+		dev_err(&i2c->dev, "regmap init failed: %d\n", ret);
+		return ret;
+	}
+
+	for (id = 0; id < ARRAY_SIZE(max8893_regulators); id++) {
+		struct regulator_dev *rdev;
+		rdev = devm_regulator_register(&i2c->dev,
+					       &max8893_regulators[id],
+					       &config);
+		if (IS_ERR(rdev)) {
+			ret = PTR_ERR(rdev);
+			dev_err(&i2c->dev, "failed to register %s: %d\n",
+				max8893_regulators[id].name, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id max8893_dt_match[] = {
+	{ .compatible = "maxim,max8893" },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, max8893_dt_match);
+#endif
+
+static const struct i2c_device_id max8893_ids[] = {
+	{ "max8893", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, max8893_ids);
+
+static struct i2c_driver max8893_driver = {
+	.probe_new	= max8893_probe_new,
+	.driver		= {
+		.name	= "max8893",
+		.of_match_table = of_match_ptr(max8893_dt_match),
+	},
+	.id_table	= max8893_ids,
+};
+
+module_i2c_driver(max8893_driver);
+
+MODULE_DESCRIPTION("Maxim MAX8893 PMIC driver");
+MODULE_AUTHOR("Sergey Larin <cerg2010cerg2010@mail.ru>");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 01c5741b82969d096ac0870d997b7d2f5a5fe970 Mon Sep 17 00:00:00 2001
From: Sergey Larin <cerg2010cerg2010@mail.ru>
Date: Fri, 18 Jun 2021 17:16:07 +0300
Subject: [PATCH 3407/3804] regulator: Add MAX8893 bindings

Add Maxim MAX8893 PMIC device tree bindings. The example is also
provided.

Signed-off-by: Sergey Larin <cerg2010cerg2010@mail.ru>
Link: https://lore.kernel.org/r/20210618141607.884-2-cerg2010cerg2010@mail.ru
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/max8893.yaml           | 88 +++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/max8893.yaml

diff --git a/Documentation/devicetree/bindings/regulator/max8893.yaml b/Documentation/devicetree/bindings/regulator/max8893.yaml
new file mode 100644
index 0000000000000..2b5e977bf4093
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8893.yaml
@@ -0,0 +1,88 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/regulator/max8893.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Regulator driver for MAX8893 PMIC from Maxim Integrated.
+
+maintainers:
+  - Sergey Larin <cerg2010cerg2010@mail.ru>
+
+description: |
+  The device has 5 LDO regulators and a single BUCK regulator.
+  Programming is done through I2C bus.
+
+properties:
+  compatible:
+    const: maxim,max8893
+
+  reg:
+    maxItems: 1
+
+  regulators:
+    type: object
+
+    patternProperties:
+      "^(ldo[1-5]|buck)$":
+        $ref: "regulator.yaml#"
+
+    additionalProperties: false
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - regulators
+
+examples:
+  - |
+    i2c {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            pmic@3e {
+                    compatible = "maxim,max8893";
+                    reg = <0x3e>;
+
+                    regulators {
+                            /* Front camera - s5k6aafx, back - m5mo */
+                            /* Numbers used to indicate the sequence */
+                            front_1_back_1: buck {
+                                    regulator-name = "cam_isp_core_1v2";
+                                    regulator-min-microvolt = <1200000>;
+                                    regulator-max-microvolt = <1200000>;
+                            };
+
+                            front_4_back_5: ldo1 {
+                                    regulator-name = "vt_io_1v8,cam_isp_1v8";
+                                    regulator-min-microvolt = <1800000>;
+                                    regulator-max-microvolt = <1800000>;
+                            };
+
+                            front_3_back_4: ldo2 {
+                                    regulator-name = "vt_core_1v5";
+                                    regulator-min-microvolt = <1500000>;
+                                    regulator-max-microvolt = <1500000>;
+                            };
+
+                            front_5_back_6: ldo3 {
+                                    regulator-name = "vt_cam_1v8,vt_sensor_io_1v8";
+                                    regulator-min-microvolt = <1800000>;
+                                    regulator-max-microvolt = <1800000>;
+                            };
+
+                            ldo4 {
+                                    /* not used */
+                            };
+
+                            back_7: ldo5 {
+                                    regulator-name = "cam_sensor_io_1v8";
+                                    regulator-min-microvolt = <1800000>;
+                                    regulator-max-microvolt = <1800000>;
+                            };
+                    };
+            };
+    };
+...
-- 
GitLab


From 57c045bc727001c43b6a65adb0418aa7b3e6dbd0 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 19 Jun 2021 20:34:23 +0800
Subject: [PATCH 3408/3804] regulator: hi6421v600: Fix setting idle mode

commit db27f8294cd7 changed eco_mode << (ffs(sreg->eco_mode_mask) - 1)
to sreg->eco_mode_mask << (ffs(sreg->eco_mode_mask) - 1) which is wrong.
Fix it by simply set val = sreg->eco_mode_mask.

In additional, sreg->eco_mode_mask can be 0 (LDO3, LDO33, LDO34).
Return -EINVAL if idle mode is not supported when sreg->eco_mode_mask is 0.

While at it, also use unsigned int for reg_val/val which is the expected
type for regmap_read and regmap_update_bits.

Fixes: db27f8294cd7 ("staging: regulator: hi6421v600-regulator: use shorter names for OF properties")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210619123423.4091429-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/hi6421v600-regulator.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c
index 417cf5b4a1c39..cf14109d486f5 100644
--- a/drivers/regulator/hi6421v600-regulator.c
+++ b/drivers/regulator/hi6421v600-regulator.c
@@ -117,7 +117,7 @@ static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev)
 static unsigned int hi6421_spmi_regulator_get_mode(struct regulator_dev *rdev)
 {
 	struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev);
-	u32 reg_val;
+	unsigned int reg_val;
 
 	regmap_read(rdev->regmap, rdev->desc->enable_reg, &reg_val);
 
@@ -131,14 +131,17 @@ static int hi6421_spmi_regulator_set_mode(struct regulator_dev *rdev,
 					  unsigned int mode)
 {
 	struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev);
-	u32 val;
+	unsigned int val;
 
 	switch (mode) {
 	case REGULATOR_MODE_NORMAL:
 		val = 0;
 		break;
 	case REGULATOR_MODE_IDLE:
-		val = sreg->eco_mode_mask << (ffs(sreg->eco_mode_mask) - 1);
+		if (!sreg->eco_mode_mask)
+			return -EINVAL;
+
+		val = sreg->eco_mode_mask;
 		break;
 	default:
 		return -EINVAL;
-- 
GitLab


From 673e851b7da81256e73fb738c550ec39bac1c9ff Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:40:28 +0300
Subject: [PATCH 3409/3804] regulator: Add protection limit properties

Support specifying protection/error/warning limits for regulator
over current, over temperature and over/under voltage.

Most of the PMICs support only "protection" feature but few
setups do also support error/warning level indications.

On many ICs most of the protection limits can't actually be set.
But for example the ampere limit for over-current protection on ROHM
BD9576 can be configured - or feature can be completely disabled.

Provide limit setting for all protections/errors for the sake of
the completeness and do that using own properties for all so that
not all users would need to set all levels when only one or few are
supported.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/ae2c6056d5ed1334912d27e736d23c9151065433.1622628333.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/regulator.yaml         | 82 +++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/Documentation/devicetree/bindings/regulator/regulator.yaml b/Documentation/devicetree/bindings/regulator/regulator.yaml
index 6d0bc9cd40403..a6ae9ecae5cc3 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/regulator.yaml
@@ -117,6 +117,88 @@ properties:
     description: Enable over current protection.
     type: boolean
 
+  regulator-oc-protection-microamp:
+    description: Set over current protection limit. This is a limit where
+      hardware performs emergency shutdown. Zero can be passed to disable
+      protection and value '1' indicates that protection should be enabled but
+      limit setting can be omitted.
+
+  regulator-oc-error-microamp:
+    description: Set over current error limit. This is a limit where part of
+      the hardware propably is malfunctional and damage prevention is requested.
+      Zero can be passed to disable error detection and value '1' indicates
+      that detection should be enabled but limit setting can be omitted.
+
+  regulator-oc-warn-microamp:
+    description: Set over current warning limit. This is a limit where hardware
+      is assumed still to be functional but approaching limit where it gets
+      damaged. Recovery actions should be initiated. Zero can be passed to
+      disable detection and value '1' indicates that detection should
+      be enabled but limit setting can be omitted.
+
+  regulator-ov-protection-microvolt:
+    description: Set over voltage protection limit. This is a limit where
+      hardware performs emergency shutdown. Zero can be passed to disable
+      protection and value '1' indicates that protection should be enabled but
+      limit setting can be omitted. Limit is given as microvolt offset from
+      voltage set to regulator.
+
+  regulator-ov-error-microvolt:
+    description: Set over voltage error limit. This is a limit where part of
+      the hardware propably is malfunctional and damage prevention is requested
+      Zero can be passed to disable error detection and value '1' indicates
+      that detection should be enabled but limit setting can be omitted. Limit
+      is given as microvolt offset from voltage set to regulator.
+
+  regulator-ov-warn-microvolt:
+    description: Set over voltage warning limit. This is a limit where hardware
+      is assumed still to be functional but approaching limit where it gets
+      damaged. Recovery actions should be initiated. Zero can be passed to
+      disable detection and value '1' indicates that detection should
+      be enabled but limit setting can be omitted. Limit is given as microvolt
+      offset from voltage set to regulator.
+
+  regulator-uv-protection-microvolt:
+    description: Set over under voltage protection limit. This is a limit where
+      hardware performs emergency shutdown. Zero can be passed to disable
+      protection and value '1' indicates that protection should be enabled but
+      limit setting can be omitted. Limit is given as microvolt offset from
+      voltage set to regulator.
+
+  regulator-uv-error-microvolt:
+    description: Set under voltage error limit. This is a limit where part of
+      the hardware propably is malfunctional and damage prevention is requested
+      Zero can be passed to disable error detection and value '1' indicates
+      that detection should be enabled but limit setting can be omitted. Limit
+      is given as microvolt offset from voltage set to regulator.
+
+  regulator-uv-warn-microvolt:
+    description: Set over under voltage warning limit. This is a limit where
+      hardware is assumed still to be functional but approaching limit where
+      it gets damaged. Recovery actions should be initiated. Zero can be passed
+      to disable detection and value '1' indicates that detection should
+      be enabled but limit setting can be omitted. Limit is given as microvolt
+      offset from voltage set to regulator.
+
+  regulator-temp-protection-kelvin:
+    description: Set over temperature protection limit. This is a limit where
+      hardware performs emergency shutdown. Zero can be passed to disable
+      protection and value '1' indicates that protection should be enabled but
+      limit setting can be omitted.
+
+  regulator-temp-error-kelvin:
+    description: Set over temperature error limit. This is a limit where part of
+      the hardware propably is malfunctional and damage prevention is requested
+      Zero can be passed to disable error detection and value '1' indicates
+      that detection should be enabled but limit setting can be omitted.
+
+  regulator-temp-warn-kelvin:
+    description: Set over temperature warning limit. This is a limit where
+      hardware is assumed still to be functional but approaching limit where it
+      gets damaged. Recovery actions should be initiated. Zero can be passed to
+      disable detection and value '1' indicates that detection should
+      be enabled but limit setting can be omitted.
+
   regulator-active-discharge:
     description: |
       tristate, enable/disable active discharge of regulators. The values are:
-- 
GitLab


From dfa19b11385d4cf8f0242fd93e2073e25183c331 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:40:45 +0300
Subject: [PATCH 3410/3804] reboot: Add hardware protection power-off

There can be few cases when we need to shut-down the system in order to
protect the hardware. Currently this is done at least by the thermal core
when temperature raises over certain limit.

Some PMICs can also generate interrupts for example for over-current or
over-voltage, voltage drops, short-circuit, ... etc. On some systems
these are a sign of hardware failure and only thing to do is try to
protect the rest of the hardware by shutting down the system.

Add shut-down logic which can be used by all subsystems instead of
implementing the shutdown in each subsystem. The logic is stolen from
thermal_core with difference of using atomic_t instead of a mutex in
order to allow calls directly from IRQ context and changing the WARN()
to pr_emerg() as discussed here:
https://lore.kernel.org/lkml/YJuPwAZroVZ%2Fw633@alley/
and here:
https://lore.kernel.org/linux-iommu/20210331093104.383705-4-geert+renesas@glider.be/

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/e83ec1ca9408f90c857ea9dcdc57b14d9037b03f.1622628333.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/reboot.h |  1 +
 kernel/reboot.c        | 79 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 3734cd8f38a89..af907a3d68d1d 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -79,6 +79,7 @@ extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN];
 
 extern void orderly_poweroff(bool force);
 extern void orderly_reboot(void);
+void hw_protection_shutdown(const char *reason, int ms_until_forced);
 
 /*
  * Emergency restart, callable from an interrupt handler.
diff --git a/kernel/reboot.c b/kernel/reboot.c
index a6ad5eb2fa733..f7440c0c7e434 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -7,6 +7,7 @@
 
 #define pr_fmt(fmt)	"reboot: " fmt
 
+#include <linux/atomic.h>
 #include <linux/ctype.h>
 #include <linux/export.h>
 #include <linux/kexec.h>
@@ -518,6 +519,84 @@ void orderly_reboot(void)
 }
 EXPORT_SYMBOL_GPL(orderly_reboot);
 
+/**
+ * hw_failure_emergency_poweroff_func - emergency poweroff work after a known delay
+ * @work: work_struct associated with the emergency poweroff function
+ *
+ * This function is called in very critical situations to force
+ * a kernel poweroff after a configurable timeout value.
+ */
+static void hw_failure_emergency_poweroff_func(struct work_struct *work)
+{
+	/*
+	 * We have reached here after the emergency shutdown waiting period has
+	 * expired. This means orderly_poweroff has not been able to shut off
+	 * the system for some reason.
+	 *
+	 * Try to shut down the system immediately using kernel_power_off
+	 * if populated
+	 */
+	pr_emerg("Hardware protection timed-out. Trying forced poweroff\n");
+	kernel_power_off();
+
+	/*
+	 * Worst of the worst case trigger emergency restart
+	 */
+	pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n");
+	emergency_restart();
+}
+
+static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work,
+			    hw_failure_emergency_poweroff_func);
+
+/**
+ * hw_failure_emergency_poweroff - Trigger an emergency system poweroff
+ *
+ * This may be called from any critical situation to trigger a system shutdown
+ * after a given period of time. If time is negative this is not scheduled.
+ */
+static void hw_failure_emergency_poweroff(int poweroff_delay_ms)
+{
+	if (poweroff_delay_ms <= 0)
+		return;
+	schedule_delayed_work(&hw_failure_emergency_poweroff_work,
+			      msecs_to_jiffies(poweroff_delay_ms));
+}
+
+/**
+ * hw_protection_shutdown - Trigger an emergency system poweroff
+ *
+ * @reason:		Reason of emergency shutdown to be printed.
+ * @ms_until_forced:	Time to wait for orderly shutdown before tiggering a
+ *			forced shudown. Negative value disables the forced
+ *			shutdown.
+ *
+ * Initiate an emergency system shutdown in order to protect hardware from
+ * further damage. Usage examples include a thermal protection or a voltage or
+ * current regulator failures.
+ * NOTE: The request is ignored if protection shutdown is already pending even
+ * if the previous request has given a large timeout for forced shutdown.
+ * Can be called from any context.
+ */
+void hw_protection_shutdown(const char *reason, int ms_until_forced)
+{
+	static atomic_t allow_proceed = ATOMIC_INIT(1);
+
+	pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason);
+
+	/* Shutdown should be initiated only once. */
+	if (!atomic_dec_and_test(&allow_proceed))
+		return;
+
+	/*
+	 * Queue a backup emergency shutdown in the event of
+	 * orderly_poweroff failure
+	 */
+	hw_failure_emergency_poweroff(ms_until_forced);
+	orderly_poweroff(true);
+}
+EXPORT_SYMBOL_GPL(hw_protection_shutdown);
+
 static int __init reboot_setup(char *str)
 {
 	for (;;) {
-- 
GitLab


From db0aeb4f074f7023da26fb65078197c39590346b Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:41:03 +0300
Subject: [PATCH 3411/3804] thermal: Use generic HW-protection shutdown API

The hardware shutdown function was exported from kernel/reboot for
other subsystems to use. Logic is copied from the thermal_core. The
protection mutex is replaced by an atomic_t to allow calls also from
an IRQ context. Also the WARN() was replaced by pr_emerg() based on
discussions here:
https://lore.kernel.org/lkml/YJuPwAZroVZ%2Fw633@alley/
and here:
https://lore.kernel.org/linux-iommu/20210331093104.383705-4-geert+renesas@glider.be/

Use the exported API instead of implementing own just for the
thermal_core.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/5531e89d9e710f5d10e7cdce3ee58957335b9e03.1622628333.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../driver-api/thermal/sysfs-api.rst          | 24 +++----
 drivers/thermal/thermal_core.c                | 63 ++-----------------
 2 files changed, 13 insertions(+), 74 deletions(-)

diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst
index 4b638c14bc16c..c93fa5e961a0d 100644
--- a/Documentation/driver-api/thermal/sysfs-api.rst
+++ b/Documentation/driver-api/thermal/sysfs-api.rst
@@ -740,21 +740,15 @@ possible.
 5. thermal_emergency_poweroff
 =============================
 
-On an event of critical trip temperature crossing. Thermal framework
-allows the system to shutdown gracefully by calling orderly_poweroff().
-In the event of a failure of orderly_poweroff() to shut down the system
-we are in danger of keeping the system alive at undesirably high
-temperatures. To mitigate this high risk scenario we program a work
-queue to fire after a pre-determined number of seconds to start
-an emergency shutdown of the device using the kernel_power_off()
-function. In case kernel_power_off() fails then finally
-emergency_restart() is called in the worst case.
+On an event of critical trip temperature crossing the thermal framework
+shuts down the system by calling hw_protection_shutdown(). The
+hw_protection_shutdown() first attempts to perform an orderly shutdown
+but accepts a delay after which it proceeds doing a forced power-off
+or as last resort an emergency_restart.
 
 The delay should be carefully profiled so as to give adequate time for
-orderly_poweroff(). In case of failure of an orderly_poweroff() the
-emergency poweroff kicks in after the delay has elapsed and shuts down
-the system.
+orderly poweroff.
 
-If set to 0 emergency poweroff will not be supported. So a carefully
-profiled non-zero positive value is a must for emergency poweroff to be
-triggered.
+If the delay is set to 0 emergency poweroff will not be supported. So a
+carefully profiled non-zero positive value is a must for emergency
+poweroff to be triggered.
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index d20b25f40d19e..10a2d8e1cacf2 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -36,10 +36,8 @@ static LIST_HEAD(thermal_governor_list);
 
 static DEFINE_MUTEX(thermal_list_lock);
 static DEFINE_MUTEX(thermal_governor_lock);
-static DEFINE_MUTEX(poweroff_lock);
 
 static atomic_t in_suspend;
-static bool power_off_triggered;
 
 static struct thermal_governor *def_governor;
 
@@ -327,70 +325,18 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip)
 		       def_governor->throttle(tz, trip);
 }
 
-/**
- * thermal_emergency_poweroff_func - emergency poweroff work after a known delay
- * @work: work_struct associated with the emergency poweroff function
- *
- * This function is called in very critical situations to force
- * a kernel poweroff after a configurable timeout value.
- */
-static void thermal_emergency_poweroff_func(struct work_struct *work)
-{
-	/*
-	 * We have reached here after the emergency thermal shutdown
-	 * Waiting period has expired. This means orderly_poweroff has
-	 * not been able to shut off the system for some reason.
-	 * Try to shut down the system immediately using kernel_power_off
-	 * if populated
-	 */
-	WARN(1, "Attempting kernel_power_off: Temperature too high\n");
-	kernel_power_off();
-
-	/*
-	 * Worst of the worst case trigger emergency restart
-	 */
-	WARN(1, "Attempting emergency_restart: Temperature too high\n");
-	emergency_restart();
-}
-
-static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work,
-			    thermal_emergency_poweroff_func);
-
-/**
- * thermal_emergency_poweroff - Trigger an emergency system poweroff
- *
- * This may be called from any critical situation to trigger a system shutdown
- * after a known period of time. By default this is not scheduled.
- */
-static void thermal_emergency_poweroff(void)
+void thermal_zone_device_critical(struct thermal_zone_device *tz)
 {
-	int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
 	/*
 	 * poweroff_delay_ms must be a carefully profiled positive value.
-	 * Its a must for thermal_emergency_poweroff_work to be scheduled
+	 * Its a must for forced_emergency_poweroff_work to be scheduled.
 	 */
-	if (poweroff_delay_ms <= 0)
-		return;
-	schedule_delayed_work(&thermal_emergency_poweroff_work,
-			      msecs_to_jiffies(poweroff_delay_ms));
-}
+	int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
 
-void thermal_zone_device_critical(struct thermal_zone_device *tz)
-{
 	dev_emerg(&tz->device, "%s: critical temperature reached, "
 		  "shutting down\n", tz->type);
 
-	mutex_lock(&poweroff_lock);
-	if (!power_off_triggered) {
-		/*
-		 * Queue a backup emergency shutdown in the event of
-		 * orderly_poweroff failure
-		 */
-		thermal_emergency_poweroff();
-		orderly_poweroff(true);
-		power_off_triggered = true;
-	}
-	mutex_unlock(&poweroff_lock);
+	hw_protection_shutdown("Temperature too high", poweroff_delay_ms);
 }
 EXPORT_SYMBOL(thermal_zone_device_critical);
 
@@ -1538,7 +1484,6 @@ error:
 	ida_destroy(&thermal_cdev_ida);
 	mutex_destroy(&thermal_list_lock);
 	mutex_destroy(&thermal_governor_lock);
-	mutex_destroy(&poweroff_lock);
 	return result;
 }
 postcore_initcall(thermal_init);
-- 
GitLab


From e6c3092d43faf0aa095160cc552f8c05490d0962 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:41:21 +0300
Subject: [PATCH 3412/3804] regulator: add warning flags

Add 'warning' level events and error flags to regulator core.
Current regulator core notifications are used to inform consumers
about errors where HW is misbehaving in such way it is assumed to
be broken/unrecoverable.

There are PMICs which are designed for system(s) that may have use
for regulator indications sent before HW is damaged so that some
board/consumer specific recovery-event can be performed while
continuing most of the normal operations.

Add new WARNING level events and notifications to be used for
that purpose.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/9b54aa5589ae4b5945d53d114bac3fae55fa4818.1622628333.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 20e84a84fb779..f72ca73631bee 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -119,6 +119,16 @@ struct regulator_dev;
 #define REGULATOR_EVENT_PRE_DISABLE		0x400
 #define REGULATOR_EVENT_ABORT_DISABLE		0x800
 #define REGULATOR_EVENT_ENABLE			0x1000
+/*
+ * Following notifications should be emitted only if detected condition
+ * is such that the HW is likely to still be working but consumers should
+ * take a recovery action to prevent problems esacalating into errors.
+ */
+#define REGULATOR_EVENT_UNDER_VOLTAGE_WARN	0x2000
+#define REGULATOR_EVENT_OVER_CURRENT_WARN	0x4000
+#define REGULATOR_EVENT_OVER_VOLTAGE_WARN	0x8000
+#define REGULATOR_EVENT_OVER_TEMP_WARN		0x10000
+#define REGULATOR_EVENT_WARN_MASK		0x1E000
 
 /*
  * Regulator errors that can be queried using regulator_get_error_flags
@@ -138,6 +148,10 @@ struct regulator_dev;
 #define REGULATOR_ERROR_FAIL			BIT(4)
 #define REGULATOR_ERROR_OVER_TEMP		BIT(5)
 
+#define REGULATOR_ERROR_UNDER_VOLTAGE_WARN	BIT(6)
+#define REGULATOR_ERROR_OVER_CURRENT_WARN	BIT(7)
+#define REGULATOR_ERROR_OVER_VOLTAGE_WARN	BIT(8)
+#define REGULATOR_ERROR_OVER_TEMP_WARN		BIT(9)
 
 /**
  * struct pre_voltage_change_data - Data sent with PRE_VOLTAGE_CHANGE event
-- 
GitLab


From 157d2230193ae683fcffcc1cd0a2c3aa4479955f Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:41:37 +0300
Subject: [PATCH 3413/3804] regulator: move rdev_print helpers to internal.h

The rdev print helpers are a nice way to print messages related to a
specific regulator device. Move them from core.c to internal.h

As the rdev print helpers use rdev_get_name() export it from core.c. Also
move the declaration from coupler.h to driver.h because the rdev name is
not just a coupled regulator property. I guess the main audience for
rdev_get_name() will be the regulator core and drivers.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/dc7fd70dc31de4d0e820b7646bb78eeb04f80735.1622628333.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c          | 12 +-----------
 drivers/regulator/internal.h      | 11 +++++++++++
 include/linux/regulator/coupler.h |  5 -----
 include/linux/regulator/driver.h  | 10 ++++++++++
 4 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f192bf19492ed..a8188f7e5072d 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -33,17 +33,6 @@
 #include "dummy.h"
 #include "internal.h"
 
-#define rdev_crit(rdev, fmt, ...)					\
-	pr_crit("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
-#define rdev_err(rdev, fmt, ...)					\
-	pr_err("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
-#define rdev_warn(rdev, fmt, ...)					\
-	pr_warn("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
-#define rdev_info(rdev, fmt, ...)					\
-	pr_info("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
-#define rdev_dbg(rdev, fmt, ...)					\
-	pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
-
 static DEFINE_WW_CLASS(regulator_ww_class);
 static DEFINE_MUTEX(regulator_nesting_mutex);
 static DEFINE_MUTEX(regulator_list_mutex);
@@ -117,6 +106,7 @@ const char *rdev_get_name(struct regulator_dev *rdev)
 	else
 		return "";
 }
+EXPORT_SYMBOL_GPL(rdev_get_name);
 
 static bool have_full_constraints(void)
 {
diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h
index 2391b565ef11f..1e9c716421436 100644
--- a/drivers/regulator/internal.h
+++ b/drivers/regulator/internal.h
@@ -15,6 +15,17 @@
 
 #define REGULATOR_STATES_NUM	(PM_SUSPEND_MAX + 1)
 
+#define rdev_crit(rdev, fmt, ...)					\
+	pr_crit("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
+#define rdev_err(rdev, fmt, ...)					\
+	pr_err("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
+#define rdev_warn(rdev, fmt, ...)					\
+	pr_warn("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
+#define rdev_info(rdev, fmt, ...)					\
+	pr_info("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
+#define rdev_dbg(rdev, fmt, ...)					\
+	pr_debug("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
+
 struct regulator_voltage {
 	int min_uV;
 	int max_uV;
diff --git a/include/linux/regulator/coupler.h b/include/linux/regulator/coupler.h
index 5f86824bd1175..73291f280a23e 100644
--- a/include/linux/regulator/coupler.h
+++ b/include/linux/regulator/coupler.h
@@ -52,7 +52,6 @@ struct regulator_coupler {
 
 #ifdef CONFIG_REGULATOR
 int regulator_coupler_register(struct regulator_coupler *coupler);
-const char *rdev_get_name(struct regulator_dev *rdev);
 int regulator_check_consumers(struct regulator_dev *rdev,
 			      int *min_uV, int *max_uV,
 			      suspend_state_t state);
@@ -69,10 +68,6 @@ static inline int regulator_coupler_register(struct regulator_coupler *coupler)
 {
 	return 0;
 }
-static inline const char *rdev_get_name(struct regulator_dev *rdev)
-{
-	return NULL;
-}
 static inline int regulator_check_consumers(struct regulator_dev *rdev,
 					    int *min_uV, int *max_uV,
 					    suspend_state_t state)
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 4ea520c248e9e..7ec0fa79d1a83 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -550,4 +550,14 @@ int regulator_desc_list_voltage_linear_range(const struct regulator_desc *desc,
 
 int regulator_desc_list_voltage_linear(const struct regulator_desc *desc,
 				       unsigned int selector);
+
+#ifdef CONFIG_REGULATOR
+const char *rdev_get_name(struct regulator_dev *rdev);
+#else
+static inline const char *rdev_get_name(struct regulator_dev *rdev)
+{
+	return NULL;
+}
+#endif
+
 #endif
-- 
GitLab


From 7111c6d1b31b42c8c758f6681e895a5116e3bad6 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:41:55 +0300
Subject: [PATCH 3414/3804] regulator: IRQ based event/error notification
 helpers

Provide helper function for IC's implementing regulator notifications
when an IRQ fires. The helper also works for IRQs which can not be acked.
Helper can be set to disable the IRQ at handler and then re-enabling it
on delayed work later. The helper also adds regulator_get_error_flags()
errors in cache for the duration of IRQ disabling.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/ebdf86d8c22b924667ec2385330e30fcbfac0119.1622628334.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Makefile       |   2 +-
 drivers/regulator/core.c         |  29 ++-
 drivers/regulator/devres.c       |  52 ++++
 drivers/regulator/irq_helpers.c  | 397 +++++++++++++++++++++++++++++++
 include/linux/regulator/driver.h | 135 +++++++++++
 5 files changed, 607 insertions(+), 8 deletions(-)
 create mode 100644 drivers/regulator/irq_helpers.c

diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 580b015296ea2..534fc0163bc42 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -4,7 +4,7 @@
 #
 
 
-obj-$(CONFIG_REGULATOR) += core.o dummy.o fixed-helper.o helpers.o devres.o
+obj-$(CONFIG_REGULATOR) += core.o dummy.o fixed-helper.o helpers.o devres.o irq_helpers.o
 obj-$(CONFIG_OF) += of_regulator.o
 obj-$(CONFIG_REGULATOR_FIXED_VOLTAGE) += fixed.o
 obj-$(CONFIG_REGULATOR_VIRTUAL_CONSUMER) += virtual.o
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a8188f7e5072d..85b6d3960369c 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -4370,22 +4370,36 @@ unsigned int regulator_get_mode(struct regulator *regulator)
 }
 EXPORT_SYMBOL_GPL(regulator_get_mode);
 
+static int rdev_get_cached_err_flags(struct regulator_dev *rdev)
+{
+	int ret = 0;
+
+	if (rdev->use_cached_err) {
+		spin_lock(&rdev->err_lock);
+		ret = rdev->cached_err;
+		spin_unlock(&rdev->err_lock);
+	}
+	return ret;
+}
+
 static int _regulator_get_error_flags(struct regulator_dev *rdev,
 					unsigned int *flags)
 {
-	int ret;
+	int cached_flags, ret = 0;
 
 	regulator_lock(rdev);
 
-	/* sanity check */
-	if (!rdev->desc->ops->get_error_flags) {
+	cached_flags = rdev_get_cached_err_flags(rdev);
+
+	if (rdev->desc->ops->get_error_flags)
+		ret = rdev->desc->ops->get_error_flags(rdev, flags);
+	else if (!rdev->use_cached_err)
 		ret = -EINVAL;
-		goto out;
-	}
 
-	ret = rdev->desc->ops->get_error_flags(rdev, flags);
-out:
+	*flags |= cached_flags;
+
 	regulator_unlock(rdev);
+
 	return ret;
 }
 
@@ -5218,6 +5232,7 @@ regulator_register(const struct regulator_desc *regulator_desc,
 		goto rinse;
 	}
 	device_initialize(&rdev->dev);
+	spin_lock_init(&rdev->err_lock);
 
 	/*
 	 * Duplicate the config so the driver could override it after
diff --git a/drivers/regulator/devres.c b/drivers/regulator/devres.c
index 3091210889e31..a8de0aa88bad6 100644
--- a/drivers/regulator/devres.c
+++ b/drivers/regulator/devres.c
@@ -481,3 +481,55 @@ void devm_regulator_unregister_notifier(struct regulator *regulator,
 		WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(devm_regulator_unregister_notifier);
+
+static void regulator_irq_helper_drop(void *res)
+{
+	regulator_irq_helper_cancel(&res);
+}
+
+/**
+ * devm_regulator_irq_helper - resource managed registration of IRQ based
+ * regulator event/error notifier
+ *
+ * @dev:		device to which lifetime the helper's lifetime is
+ *			bound.
+ * @d:			IRQ helper descriptor.
+ * @irq:		IRQ used to inform events/errors to be notified.
+ * @irq_flags:		Extra IRQ flags to be OR'ed with the default
+ *			IRQF_ONESHOT when requesting the (threaded) irq.
+ * @common_errs:	Errors which can be flagged by this IRQ for all rdevs.
+ *			When IRQ is re-enabled these errors will be cleared
+ *			from all associated regulators
+ * @per_rdev_errs:	Optional error flag array describing errors specific
+ *			for only some of the regulators. These errors will be
+ *			or'ed with common errors. If this is given the array
+ *			should contain rdev_amount flags. Can be set to NULL
+ *			if there is no regulator specific error flags for this
+ *			IRQ.
+ * @rdev:		Array of pointers to regulators associated with this
+ *			IRQ.
+ * @rdev_amount:	Amount of regulators associated with this IRQ.
+ *
+ * Return: handle to irq_helper or an ERR_PTR() encoded error code.
+ */
+void *devm_regulator_irq_helper(struct device *dev,
+				const struct regulator_irq_desc *d, int irq,
+				int irq_flags, int common_errs,
+				int *per_rdev_errs,
+				struct regulator_dev **rdev, int rdev_amount)
+{
+	void *ptr;
+	int ret;
+
+	ptr = regulator_irq_helper(dev, d, irq, irq_flags, common_errs,
+				    per_rdev_errs, rdev, rdev_amount);
+	if (IS_ERR(ptr))
+		return ptr;
+
+	ret = devm_add_action_or_reset(dev, regulator_irq_helper_drop, ptr);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return ptr;
+}
+EXPORT_SYMBOL_GPL(devm_regulator_irq_helper);
diff --git a/drivers/regulator/irq_helpers.c b/drivers/regulator/irq_helpers.c
new file mode 100644
index 0000000000000..fabe2e53093ee
--- /dev/null
+++ b/drivers/regulator/irq_helpers.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2021 ROHM Semiconductors
+// regulator IRQ based event notification helpers
+//
+// Logic has been partially adapted from qcom-labibb driver.
+//
+// Author: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/regulator/driver.h>
+
+#include "internal.h"
+
+#define REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS 10000
+
+struct regulator_irq {
+	struct regulator_irq_data rdata;
+	struct regulator_irq_desc desc;
+	int irq;
+	int retry_cnt;
+	struct delayed_work isr_work;
+};
+
+/*
+ * Should only be called from threaded handler to prevent potential deadlock
+ */
+static void rdev_flag_err(struct regulator_dev *rdev, int err)
+{
+	spin_lock(&rdev->err_lock);
+	rdev->cached_err |= err;
+	spin_unlock(&rdev->err_lock);
+}
+
+static void rdev_clear_err(struct regulator_dev *rdev, int err)
+{
+	spin_lock(&rdev->err_lock);
+	rdev->cached_err &= ~err;
+	spin_unlock(&rdev->err_lock);
+}
+
+static void regulator_notifier_isr_work(struct work_struct *work)
+{
+	struct regulator_irq *h;
+	struct regulator_irq_desc *d;
+	struct regulator_irq_data *rid;
+	int ret = 0;
+	int tmo, i;
+	int num_rdevs;
+
+	h = container_of(work, struct regulator_irq,
+			    isr_work.work);
+	d = &h->desc;
+	rid = &h->rdata;
+	num_rdevs = rid->num_states;
+
+reread:
+	if (d->fatal_cnt && h->retry_cnt > d->fatal_cnt) {
+		if (!d->die)
+			return hw_protection_shutdown("Regulator HW failure? - no IC recovery",
+						      REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS);
+		ret = d->die(rid);
+		/*
+		 * If the 'last resort' IC recovery failed we will have
+		 * nothing else left to do...
+		 */
+		if (ret)
+			return hw_protection_shutdown("Regulator HW failure. IC recovery failed",
+						      REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS);
+
+		/*
+		 * If h->die() was implemented we assume recovery has been
+		 * attempted (probably regulator was shut down) and we
+		 * just enable IRQ and bail-out.
+		 */
+		goto enable_out;
+	}
+	if (d->renable) {
+		ret = d->renable(rid);
+
+		if (ret == REGULATOR_FAILED_RETRY) {
+			/* Driver could not get current status */
+			h->retry_cnt++;
+			if (!d->reread_ms)
+				goto reread;
+
+			tmo = d->reread_ms;
+			goto reschedule;
+		}
+
+		if (ret) {
+			/*
+			 * IC status reading succeeded. update error info
+			 * just in case the renable changed it.
+			 */
+			for (i = 0; i < num_rdevs; i++) {
+				struct regulator_err_state *stat;
+				struct regulator_dev *rdev;
+
+				stat = &rid->states[i];
+				rdev = stat->rdev;
+				rdev_clear_err(rdev, (~stat->errors) &
+						      stat->possible_errs);
+			}
+			h->retry_cnt++;
+			/*
+			 * The IC indicated problem is still ON - no point in
+			 * re-enabling the IRQ. Retry later.
+			 */
+			tmo = d->irq_off_ms;
+			goto reschedule;
+		}
+	}
+
+	/*
+	 * Either IC reported problem cleared or no status checker was provided.
+	 * If problems are gone - good. If not - then the IRQ will fire again
+	 * and we'll have a new nice loop. In any case we should clear error
+	 * flags here and re-enable IRQs.
+	 */
+	for (i = 0; i < num_rdevs; i++) {
+		struct regulator_err_state *stat;
+		struct regulator_dev *rdev;
+
+		stat = &rid->states[i];
+		rdev = stat->rdev;
+		rdev_clear_err(rdev, stat->possible_errs);
+	}
+
+	/*
+	 * Things have been seemingly successful => zero retry-counter.
+	 */
+	h->retry_cnt = 0;
+
+enable_out:
+	enable_irq(h->irq);
+
+	return;
+
+reschedule:
+	if (!d->high_prio)
+		mod_delayed_work(system_wq, &h->isr_work,
+				 msecs_to_jiffies(tmo));
+	else
+		mod_delayed_work(system_highpri_wq, &h->isr_work,
+				 msecs_to_jiffies(tmo));
+}
+
+static irqreturn_t regulator_notifier_isr(int irq, void *data)
+{
+	struct regulator_irq *h = data;
+	struct regulator_irq_desc *d;
+	struct regulator_irq_data *rid;
+	unsigned long rdev_map = 0;
+	int num_rdevs;
+	int ret, i;
+
+	d = &h->desc;
+	rid = &h->rdata;
+	num_rdevs = rid->num_states;
+
+	if (d->fatal_cnt)
+		h->retry_cnt++;
+
+	/*
+	 * we spare a few cycles by not clearing statuses prior to this call.
+	 * The IC driver must initialize the status buffers for rdevs
+	 * which it indicates having active events via rdev_map.
+	 *
+	 * Maybe we should just to be on a safer side(?)
+	 */
+	ret = d->map_event(irq, rid, &rdev_map);
+
+	/*
+	 * If status reading fails (which is unlikely) we don't ack/disable
+	 * IRQ but just increase fail count and retry when IRQ fires again.
+	 * If retry_count exceeds the given safety limit we call IC specific die
+	 * handler which can try disabling regulator(s).
+	 *
+	 * If no die handler is given we will just bug() as a last resort.
+	 *
+	 * We could try disabling all associated rdevs - but we might shoot
+	 * ourselves in the head and leave the problematic regulator enabled. So
+	 * if IC has no die-handler populated we just assume the regulator
+	 * can't be disabled.
+	 */
+	if (unlikely(ret == REGULATOR_FAILED_RETRY))
+		goto fail_out;
+
+	h->retry_cnt = 0;
+	/*
+	 * Let's not disable IRQ if there were no status bits for us. We'd
+	 * better leave spurious IRQ handling to genirq
+	 */
+	if (ret || !rdev_map)
+		return IRQ_NONE;
+
+	/*
+	 * Some events are bogus if the regulator is disabled. Skip such events
+	 * if all relevant regulators are disabled
+	 */
+	if (d->skip_off) {
+		for_each_set_bit(i, &rdev_map, num_rdevs) {
+			struct regulator_dev *rdev;
+			const struct regulator_ops *ops;
+
+			rdev = rid->states[i].rdev;
+			ops = rdev->desc->ops;
+
+			/*
+			 * If any of the flagged regulators is enabled we do
+			 * handle this
+			 */
+			if (ops->is_enabled(rdev))
+				break;
+		}
+		if (i == num_rdevs)
+			return IRQ_NONE;
+	}
+
+	/* Disable IRQ if HW keeps line asserted */
+	if (d->irq_off_ms)
+		disable_irq_nosync(irq);
+
+	/*
+	 * IRQ seems to be for us. Let's fire correct notifiers / store error
+	 * flags
+	 */
+	for_each_set_bit(i, &rdev_map, num_rdevs) {
+		struct regulator_err_state *stat;
+		struct regulator_dev *rdev;
+
+		stat = &rid->states[i];
+		rdev = stat->rdev;
+
+		rdev_dbg(rdev, "Sending regulator notification EVT 0x%lx\n",
+			 stat->notifs);
+
+		regulator_notifier_call_chain(rdev, stat->notifs, NULL);
+		rdev_flag_err(rdev, stat->errors);
+	}
+
+	if (d->irq_off_ms) {
+		if (!d->high_prio)
+			schedule_delayed_work(&h->isr_work,
+					      msecs_to_jiffies(d->irq_off_ms));
+		else
+			mod_delayed_work(system_highpri_wq,
+					 &h->isr_work,
+					 msecs_to_jiffies(d->irq_off_ms));
+	}
+
+	return IRQ_HANDLED;
+
+fail_out:
+	if (d->fatal_cnt && h->retry_cnt > d->fatal_cnt) {
+		/* If we have no recovery, just try shut down straight away */
+		if (!d->die) {
+			hw_protection_shutdown("Regulator failure. Retry count exceeded",
+					       REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS);
+		} else {
+			ret = d->die(rid);
+			/* If die() failed shut down as a last attempt to save the HW */
+			if (ret)
+				hw_protection_shutdown("Regulator failure. Recovery failed",
+						       REGULATOR_FORCED_SAFETY_SHUTDOWN_WAIT_MS);
+		}
+	}
+
+	return IRQ_NONE;
+}
+
+static int init_rdev_state(struct device *dev, struct regulator_irq *h,
+			   struct regulator_dev **rdev, int common_err,
+			   int *rdev_err, int rdev_amount)
+{
+	int i;
+
+	h->rdata.states = devm_kzalloc(dev, sizeof(*h->rdata.states) *
+				       rdev_amount, GFP_KERNEL);
+	if (!h->rdata.states)
+		return -ENOMEM;
+
+	h->rdata.num_states = rdev_amount;
+	h->rdata.data = h->desc.data;
+
+	for (i = 0; i < rdev_amount; i++) {
+		h->rdata.states[i].possible_errs = common_err;
+		if (rdev_err)
+			h->rdata.states[i].possible_errs |= *rdev_err++;
+		h->rdata.states[i].rdev = *rdev++;
+	}
+
+	return 0;
+}
+
+static void init_rdev_errors(struct regulator_irq *h)
+{
+	int i;
+
+	for (i = 0; i < h->rdata.num_states; i++)
+		if (h->rdata.states[i].possible_errs)
+			h->rdata.states[i].rdev->use_cached_err = true;
+}
+
+/**
+ * regulator_irq_helper - register IRQ based regulator event/error notifier
+ *
+ * @dev:		device providing the IRQs
+ * @d:			IRQ helper descriptor.
+ * @irq:		IRQ used to inform events/errors to be notified.
+ * @irq_flags:		Extra IRQ flags to be OR'ed with the default
+ *			IRQF_ONESHOT when requesting the (threaded) irq.
+ * @common_errs:	Errors which can be flagged by this IRQ for all rdevs.
+ *			When IRQ is re-enabled these errors will be cleared
+ *			from all associated regulators
+ * @per_rdev_errs:	Optional error flag array describing errors specific
+ *			for only some of the regulators. These errors will be
+ *			or'ed with common errors. If this is given the array
+ *			should contain rdev_amount flags. Can be set to NULL
+ *			if there is no regulator specific error flags for this
+ *			IRQ.
+ * @rdev:		Array of pointers to regulators associated with this
+ *			IRQ.
+ * @rdev_amount:	Amount of regulators associated with this IRQ.
+ *
+ * Return: handle to irq_helper or an ERR_PTR() encoded error code.
+ */
+void *regulator_irq_helper(struct device *dev,
+			   const struct regulator_irq_desc *d, int irq,
+			   int irq_flags, int common_errs, int *per_rdev_errs,
+			   struct regulator_dev **rdev, int rdev_amount)
+{
+	struct regulator_irq *h;
+	int ret;
+
+	if (!rdev_amount || !d || !d->map_event || !d->name)
+		return ERR_PTR(-EINVAL);
+
+	h = devm_kzalloc(dev, sizeof(*h), GFP_KERNEL);
+	if (!h)
+		return ERR_PTR(-ENOMEM);
+
+	h->irq = irq;
+	h->desc = *d;
+
+	ret = init_rdev_state(dev, h, rdev, common_errs, per_rdev_errs,
+			      rdev_amount);
+	if (ret)
+		return ERR_PTR(ret);
+
+	init_rdev_errors(h);
+
+	if (h->desc.irq_off_ms)
+		INIT_DELAYED_WORK(&h->isr_work, regulator_notifier_isr_work);
+
+	ret = request_threaded_irq(h->irq, NULL, regulator_notifier_isr,
+				   IRQF_ONESHOT | irq_flags, h->desc.name, h);
+	if (ret) {
+		dev_err(dev, "Failed to request IRQ %d\n", irq);
+
+		return ERR_PTR(ret);
+	}
+
+	return h;
+}
+EXPORT_SYMBOL_GPL(regulator_irq_helper);
+
+/**
+ * regulator_irq_helper_cancel - drop IRQ based regulator event/error notifier
+ *
+ * @handle:		Pointer to handle returned by a successful call to
+ *			regulator_irq_helper(). Will be NULLed upon return.
+ *
+ * The associated IRQ is released and work is cancelled when the function
+ * returns.
+ */
+void regulator_irq_helper_cancel(void **handle)
+{
+	if (handle && *handle) {
+		struct regulator_irq *h = *handle;
+
+		free_irq(h->irq, h);
+		if (h->desc.irq_off_ms)
+			cancel_delayed_work_sync(&h->isr_work);
+
+		h = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(regulator_irq_helper_cancel);
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 7ec0fa79d1a83..1d1a8951e7407 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -413,6 +413,128 @@ struct regulator_config {
 	struct gpio_desc *ena_gpiod;
 };
 
+/**
+ * struct regulator_err_state - regulator error/notification status
+ *
+ * @rdev:		Regulator which status the struct indicates.
+ * @notifs:		Events which have occurred on the regulator.
+ * @errors:		Errors which are active on the regulator.
+ * @possible_errs:	Errors which can be signaled (by given IRQ).
+ */
+struct regulator_err_state {
+	struct regulator_dev *rdev;
+	unsigned long notifs;
+	unsigned long errors;
+	int possible_errs;
+};
+
+/**
+ * struct regulator_irq_data - regulator error/notification status date
+ *
+ * @states:	Status structs for each of the associated regulators.
+ * @num_states:	Amount of associated regulators.
+ * @data:	Driver data pointer given at regulator_irq_desc.
+ * @opaque:	Value storage for IC driver. Core does not update this. ICs
+ *		may want to store status register value here at map_event and
+ *		compare contents at 'renable' callback to see if new problems
+ *		have been added to status. If that is the case it may be
+ *		desirable to return REGULATOR_ERROR_CLEARED and not
+ *		REGULATOR_ERROR_ON to allow IRQ fire again and to generate
+ *		notifications also for the new issues.
+ *
+ * This structure is passed to 'map_event' and 'renable' callbacks for
+ * reporting regulator status to core.
+ */
+struct regulator_irq_data {
+	struct regulator_err_state *states;
+	int num_states;
+	void *data;
+	long opaque;
+};
+
+/**
+ * struct regulator_irq_desc - notification sender for IRQ based events.
+ *
+ * @name:	The visible name for the IRQ
+ * @fatal_cnt:	If this IRQ is used to signal HW damaging condition it may be
+ *		best to shut-down regulator(s) or reboot the SOC if error
+ *		handling is repeatedly failing. If fatal_cnt is given the IRQ
+ *		handling is aborted if it fails for fatal_cnt times and die()
+ *		callback (if populated) or BUG() is called to try to prevent
+ *		further damage.
+ * @reread_ms:	The time which is waited before attempting to re-read status
+ *		at the worker if IC reading fails. Immediate re-read is done
+ *		if time is not specified.
+ * @irq_off_ms:	The time which IRQ is kept disabled before re-evaluating the
+ *		status for devices which keep IRQ disabled for duration of the
+ *		error. If this is not given the IRQ is left enabled and renable
+ *		is not called.
+ * @skip_off:	If set to true the IRQ handler will attempt to check if any of
+ *		the associated regulators are enabled prior to taking other
+ *		actions. If no regulators are enabled and this is set to true
+ *		a spurious IRQ is assumed and IRQ_NONE is returned.
+ * @high_prio:	Boolean to indicate that high priority WQ should be used.
+ * @data:	Driver private data pointer which will be passed as such to
+ *		the renable, map_event and die callbacks in regulator_irq_data.
+ * @die:	Protection callback. If IC status reading or recovery actions
+ *		fail fatal_cnt times this callback or BUG() is called. This
+ *		callback should implement a final protection attempt like
+ *		disabling the regulator. If protection succeeded this may
+ *		return 0. If anything else is returned the core assumes final
+ *		protection failed and calls BUG() as a last resort.
+ * @map_event:	Driver callback to map IRQ status into regulator devices with
+ *		events / errors. NOTE: callback MUST initialize both the
+ *		errors and notifs for all rdevs which it signals having
+ *		active events as core does not clean the map data.
+ *		REGULATOR_FAILED_RETRY can be returned to indicate that the
+ *		status reading from IC failed. If this is repeated for
+ *		fatal_cnt times the core will call die() callback or BUG()
+ *		as a last resort to protect the HW.
+ * @renable:	Optional callback to check status (if HW supports that) before
+ *		re-enabling IRQ. If implemented this should clear the error
+ *		flags so that errors fetched by regulator_get_error_flags()
+ *		are updated. If callback is not implemented then errors are
+ *		assumed to be cleared and IRQ is re-enabled.
+ *		REGULATOR_FAILED_RETRY can be returned to
+ *		indicate that the status reading from IC failed. If this is
+ *		repeated for 'fatal_cnt' times the core will call die()
+ *		callback or BUG() as a last resort to protect the HW.
+ *		Returning zero indicates that the problem in HW has been solved
+ *		and IRQ will be re-enabled. Returning REGULATOR_ERROR_ON
+ *		indicates the error condition is still active and keeps IRQ
+ *		disabled. Please note that returning REGULATOR_ERROR_ON does
+ *		not retrigger evaluating what events are active or resending
+ *		notifications. If this is needed you probably want to return
+ *		zero and allow IRQ to retrigger causing events to be
+ *		re-evaluated and re-sent.
+ *
+ * This structure is used for registering regulator IRQ notification helper.
+ */
+struct regulator_irq_desc {
+	const char *name;
+	int irq_flags;
+	int fatal_cnt;
+	int reread_ms;
+	int irq_off_ms;
+	bool skip_off;
+	bool high_prio;
+	void *data;
+
+	int (*die)(struct regulator_irq_data *rid);
+	int (*map_event)(int irq, struct regulator_irq_data *rid,
+			  unsigned long *dev_mask);
+	int (*renable)(struct regulator_irq_data *rid);
+};
+
+/*
+ * Return values for regulator IRQ helpers.
+ */
+enum {
+	REGULATOR_ERROR_CLEARED,
+	REGULATOR_FAILED_RETRY,
+	REGULATOR_ERROR_ON,
+};
+
 /*
  * struct coupling_desc
  *
@@ -477,6 +599,9 @@ struct regulator_dev {
 
 	/* time when this regulator was disabled last time */
 	ktime_t last_off;
+	int cached_err;
+	bool use_cached_err;
+	spinlock_t err_lock;
 };
 
 struct regulator_dev *
@@ -491,6 +616,16 @@ void devm_regulator_unregister(struct device *dev, struct regulator_dev *rdev);
 
 int regulator_notifier_call_chain(struct regulator_dev *rdev,
 				  unsigned long event, void *data);
+void *devm_regulator_irq_helper(struct device *dev,
+				const struct regulator_irq_desc *d, int irq,
+				int irq_flags, int common_errs,
+				int *per_rdev_errs, struct regulator_dev **rdev,
+				int rdev_amount);
+void *regulator_irq_helper(struct device *dev,
+			   const struct regulator_irq_desc *d, int irq,
+			   int irq_flags, int common_errs, int *per_rdev_errs,
+			   struct regulator_dev **rdev, int rdev_amount);
+void regulator_irq_helper_cancel(void **handle);
 
 void *rdev_get_drvdata(struct regulator_dev *rdev);
 struct device *rdev_get_dev(struct regulator_dev *rdev);
-- 
GitLab


From 89a6a5e56c8248a077d12424a1383a6b18ea840b Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:42:12 +0300
Subject: [PATCH 3415/3804] regulator: add property parsing and callbacks to
 set protection limits

Add DT property parsing code and setting callback for regulator over/under
voltage, over-current and temperature error limits.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/e7b8007ba9eae7076178bf3363fb942ccb1cc9a5.1622628334.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c                  | 122 +++++++++++++++++++++-
 drivers/regulator/of_regulator.c          |  58 ++++++++++
 drivers/regulator/qcom-labibb-regulator.c |  10 +-
 drivers/regulator/qcom_spmi-regulator.c   |   6 +-
 drivers/regulator/stpmic1_regulator.c     |  20 +++-
 include/linux/regulator/driver.h          |  41 +++++++-
 include/linux/regulator/machine.h         |  26 +++++
 7 files changed, 274 insertions(+), 9 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 85b6d3960369c..92fe05178249b 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1305,6 +1305,52 @@ static int machine_constraints_current(struct regulator_dev *rdev,
 
 static int _regulator_do_enable(struct regulator_dev *rdev);
 
+static int notif_set_limit(struct regulator_dev *rdev,
+			   int (*set)(struct regulator_dev *, int, int, bool),
+			   int limit, int severity)
+{
+	bool enable;
+
+	if (limit == REGULATOR_NOTIF_LIMIT_DISABLE) {
+		enable = false;
+		limit = 0;
+	} else {
+		enable = true;
+	}
+
+	if (limit == REGULATOR_NOTIF_LIMIT_ENABLE)
+		limit = 0;
+
+	return set(rdev, limit, severity, enable);
+}
+
+static int handle_notify_limits(struct regulator_dev *rdev,
+			int (*set)(struct regulator_dev *, int, int, bool),
+			struct notification_limit *limits)
+{
+	int ret = 0;
+
+	if (!set)
+		return -EOPNOTSUPP;
+
+	if (limits->prot)
+		ret = notif_set_limit(rdev, set, limits->prot,
+				      REGULATOR_SEVERITY_PROT);
+	if (ret)
+		return ret;
+
+	if (limits->err)
+		ret = notif_set_limit(rdev, set, limits->err,
+				      REGULATOR_SEVERITY_ERR);
+	if (ret)
+		return ret;
+
+	if (limits->warn)
+		ret = notif_set_limit(rdev, set, limits->warn,
+				      REGULATOR_SEVERITY_WARN);
+
+	return ret;
+}
 /**
  * set_machine_constraints - sets regulator constraints
  * @rdev: regulator source
@@ -1390,9 +1436,27 @@ static int set_machine_constraints(struct regulator_dev *rdev)
 		}
 	}
 
+	/*
+	 * Existing logic does not warn if over_current_protection is given as
+	 * a constraint but driver does not support that. I think we should
+	 * warn about this type of issues as it is possible someone changes
+	 * PMIC on board to another type - and the another PMIC's driver does
+	 * not support setting protection. Board composer may happily believe
+	 * the DT limits are respected - especially if the new PMIC HW also
+	 * supports protection but the driver does not. I won't change the logic
+	 * without hearing more experienced opinion on this though.
+	 *
+	 * If warning is seen as a good idea then we can merge handling the
+	 * over-curret protection and detection and get rid of this special
+	 * handling.
+	 */
 	if (rdev->constraints->over_current_protection
 		&& ops->set_over_current_protection) {
-		ret = ops->set_over_current_protection(rdev);
+		int lim = rdev->constraints->over_curr_limits.prot;
+
+		ret = ops->set_over_current_protection(rdev, lim,
+						       REGULATOR_SEVERITY_PROT,
+						       true);
 		if (ret < 0) {
 			rdev_err(rdev, "failed to set over current protection: %pe\n",
 				 ERR_PTR(ret));
@@ -1400,6 +1464,62 @@ static int set_machine_constraints(struct regulator_dev *rdev)
 		}
 	}
 
+	if (rdev->constraints->over_current_detection)
+		ret = handle_notify_limits(rdev,
+					   ops->set_over_current_protection,
+					   &rdev->constraints->over_curr_limits);
+	if (ret) {
+		if (ret != -EOPNOTSUPP) {
+			rdev_err(rdev, "failed to set over current limits: %pe\n",
+				 ERR_PTR(ret));
+			return ret;
+		}
+		rdev_warn(rdev,
+			  "IC does not support requested over-current limits\n");
+	}
+
+	if (rdev->constraints->over_voltage_detection)
+		ret = handle_notify_limits(rdev,
+					   ops->set_over_voltage_protection,
+					   &rdev->constraints->over_voltage_limits);
+	if (ret) {
+		if (ret != -EOPNOTSUPP) {
+			rdev_err(rdev, "failed to set over voltage limits %pe\n",
+				 ERR_PTR(ret));
+			return ret;
+		}
+		rdev_warn(rdev,
+			  "IC does not support requested over voltage limits\n");
+	}
+
+	if (rdev->constraints->under_voltage_detection)
+		ret = handle_notify_limits(rdev,
+					   ops->set_under_voltage_protection,
+					   &rdev->constraints->under_voltage_limits);
+	if (ret) {
+		if (ret != -EOPNOTSUPP) {
+			rdev_err(rdev, "failed to set under voltage limits %pe\n",
+				 ERR_PTR(ret));
+			return ret;
+		}
+		rdev_warn(rdev,
+			  "IC does not support requested under voltage limits\n");
+	}
+
+	if (rdev->constraints->over_temp_detection)
+		ret = handle_notify_limits(rdev,
+					   ops->set_thermal_protection,
+					   &rdev->constraints->temp_limits);
+	if (ret) {
+		if (ret != -EOPNOTSUPP) {
+			rdev_err(rdev, "failed to set temperature limits %pe\n",
+				 ERR_PTR(ret));
+			return ret;
+		}
+		rdev_warn(rdev,
+			  "IC does not support requested temperature limits\n");
+	}
+
 	if (rdev->constraints->active_discharge && ops->set_active_discharge) {
 		bool ad_state = (rdev->constraints->active_discharge ==
 			      REGULATOR_ACTIVE_DISCHARGE_ENABLE) ? true : false;
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 49f6c05fee34a..f54d4f176882a 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -21,6 +21,62 @@ static const char *const regulator_states[PM_SUSPEND_MAX + 1] = {
 	[PM_SUSPEND_MAX]	= "regulator-state-disk",
 };
 
+static void fill_limit(int *limit, int val)
+{
+	if (val)
+		if (val == 1)
+			*limit = REGULATOR_NOTIF_LIMIT_ENABLE;
+		else
+			*limit = val;
+	else
+		*limit = REGULATOR_NOTIF_LIMIT_DISABLE;
+}
+
+static void of_get_regulator_prot_limits(struct device_node *np,
+				struct regulation_constraints *constraints)
+{
+	u32 pval;
+	int i;
+	static const char *const props[] = {
+		"regulator-oc-%s-microamp",
+		"regulator-ov-%s-microvolt",
+		"regulator-temp-%s-kelvin",
+		"regulator-uv-%s-microvolt",
+	};
+	struct notification_limit *limits[] = {
+		&constraints->over_curr_limits,
+		&constraints->over_voltage_limits,
+		&constraints->temp_limits,
+		&constraints->under_voltage_limits,
+	};
+	bool set[4] = {0};
+
+	/* Protection limits: */
+	for (i = 0; i < ARRAY_SIZE(props); i++) {
+		char prop[255];
+		bool found;
+		int j;
+		static const char *const lvl[] = {
+			"protection", "error", "warn"
+		};
+		int *l[] = {
+			&limits[i]->prot, &limits[i]->err, &limits[i]->warn,
+		};
+
+		for (j = 0; j < ARRAY_SIZE(lvl); j++) {
+			snprintf(prop, 255, props[i], lvl[j]);
+			found = !of_property_read_u32(np, prop, &pval);
+			if (found)
+				fill_limit(l[j], pval);
+			set[i] |= found;
+		}
+	}
+	constraints->over_current_detection = set[0];
+	constraints->over_voltage_detection = set[1];
+	constraints->over_temp_detection = set[2];
+	constraints->under_voltage_detection = set[3];
+}
+
 static int of_get_regulation_constraints(struct device *dev,
 					struct device_node *np,
 					struct regulator_init_data **init_data,
@@ -188,6 +244,8 @@ static int of_get_regulation_constraints(struct device *dev,
 	constraints->over_current_protection = of_property_read_bool(np,
 					"regulator-over-current-protection");
 
+	of_get_regulator_prot_limits(np, constraints);
+
 	for (i = 0; i < ARRAY_SIZE(regulator_states); i++) {
 		switch (i) {
 		case PM_SUSPEND_MEM:
diff --git a/drivers/regulator/qcom-labibb-regulator.c b/drivers/regulator/qcom-labibb-regulator.c
index de25e3279b4b9..b3da0dc58782f 100644
--- a/drivers/regulator/qcom-labibb-regulator.c
+++ b/drivers/regulator/qcom-labibb-regulator.c
@@ -307,13 +307,21 @@ end:
 	return IRQ_HANDLED;
 }
 
-static int qcom_labibb_set_ocp(struct regulator_dev *rdev)
+static int qcom_labibb_set_ocp(struct regulator_dev *rdev, int lim,
+			       int severity, bool enable)
 {
 	struct labibb_regulator *vreg = rdev_get_drvdata(rdev);
 	char *ocp_irq_name;
 	u32 irq_flags = IRQF_ONESHOT;
 	int irq_trig_low, ret;
 
+	/*
+	 * labibb supports only protection - and does not support setting
+	 * limit. Furthermore, we don't support disabling protection.
+	 */
+	if (lim || severity != REGULATOR_SEVERITY_PROT || !enable)
+		return -EINVAL;
+
 	/* If there is no OCP interrupt, there's nothing to set */
 	if (vreg->ocp_irq <= 0)
 		return -EINVAL;
diff --git a/drivers/regulator/qcom_spmi-regulator.c b/drivers/regulator/qcom_spmi-regulator.c
index 95677c51c1fad..41424a3366d0e 100644
--- a/drivers/regulator/qcom_spmi-regulator.c
+++ b/drivers/regulator/qcom_spmi-regulator.c
@@ -595,11 +595,15 @@ static int spmi_regulator_vs_enable(struct regulator_dev *rdev)
 	return regulator_enable_regmap(rdev);
 }
 
-static int spmi_regulator_vs_ocp(struct regulator_dev *rdev)
+static int spmi_regulator_vs_ocp(struct regulator_dev *rdev, int lim_uA,
+				 int severity, bool enable)
 {
 	struct spmi_regulator *vreg = rdev_get_drvdata(rdev);
 	u8 reg = SPMI_VS_OCP_OVERRIDE;
 
+	if (lim_uA || !enable || severity != REGULATOR_SEVERITY_PROT)
+		return -EINVAL;
+
 	return spmi_vreg_write(vreg, SPMI_VS_REG_OCP, &reg, 1);
 }
 
diff --git a/drivers/regulator/stpmic1_regulator.c b/drivers/regulator/stpmic1_regulator.c
index cf10fdb72e320..2d7597c76e4a1 100644
--- a/drivers/regulator/stpmic1_regulator.c
+++ b/drivers/regulator/stpmic1_regulator.c
@@ -32,7 +32,8 @@ struct stpmic1_regulator_cfg {
 
 static int stpmic1_set_mode(struct regulator_dev *rdev, unsigned int mode);
 static unsigned int stpmic1_get_mode(struct regulator_dev *rdev);
-static int stpmic1_set_icc(struct regulator_dev *rdev);
+static int stpmic1_set_icc(struct regulator_dev *rdev, int lim, int severity,
+			   bool enable);
 static unsigned int stpmic1_map_mode(unsigned int mode);
 
 enum {
@@ -491,11 +492,26 @@ static int stpmic1_set_mode(struct regulator_dev *rdev, unsigned int mode)
 				  STPMIC1_BUCK_MODE_LP, value);
 }
 
-static int stpmic1_set_icc(struct regulator_dev *rdev)
+static int stpmic1_set_icc(struct regulator_dev *rdev, int lim, int severity,
+			   bool enable)
 {
 	struct stpmic1_regulator_cfg *cfg = rdev_get_drvdata(rdev);
 	struct regmap *regmap = rdev_get_regmap(rdev);
 
+	/*
+	 * The code seems like one bit in a register controls whether OCP is
+	 * enabled. So we might be able to turn it off here is if that
+	 * was requested. I won't support this because I don't have the HW.
+	 * Feel free to try and implement if you have the HW and need kernel
+	 * to disable this.
+	 *
+	 * Also, I don't know if limit can be configured or if we support
+	 * error/warning instead of protect. So I just keep existing logic
+	 * and assume no.
+	 */
+	if (lim || severity != REGULATOR_SEVERITY_PROT || !enable)
+		return -EINVAL;
+
 	/* enable switch off in case of over current */
 	return regmap_update_bits(regmap, cfg->icc_reg, cfg->icc_mask,
 				  cfg->icc_mask);
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 1d1a8951e7407..4ebfaacf42b7a 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -40,6 +40,15 @@ enum regulator_status {
 	REGULATOR_STATUS_UNDEFINED,
 };
 
+enum regulator_detection_severity {
+	/* Hardware shut down voltage outputs if condition is detected */
+	REGULATOR_SEVERITY_PROT,
+	/* Hardware is probably damaged/inoperable */
+	REGULATOR_SEVERITY_ERR,
+	/* Hardware is still recoverable but recovery action must be taken */
+	REGULATOR_SEVERITY_WARN,
+};
+
 /* Initialize struct linear_range for regulators */
 #define REGULATOR_LINEAR_RANGE(_min_uV, _min_sel, _max_sel, _step_uV)	\
 {									\
@@ -78,8 +87,25 @@ enum regulator_status {
  * @get_current_limit: Get the configured limit for a current-limited regulator.
  * @set_input_current_limit: Configure an input limit.
  *
- * @set_over_current_protection: Support capability of automatically shutting
- *                               down when detecting an over current event.
+ * @set_over_current_protection: Support enabling of and setting limits for over
+ *	current situation detection. Detection can be configured for three
+ *	levels of severity.
+ *	REGULATOR_SEVERITY_PROT should automatically shut down the regulator(s).
+ *	REGULATOR_SEVERITY_ERR should indicate that over-current situation is
+ *		caused by an unrecoverable error but HW does not perform
+ *		automatic shut down.
+ *	REGULATOR_SEVERITY_WARN should indicate situation where hardware is
+ *		still believed to not be damaged but that a board sepcific
+ *		recovery action is needed. If lim_uA is 0 the limit should not
+ *		be changed but the detection should just be enabled/disabled as
+ *		is requested.
+ * @set_over_voltage_protection: Support enabling of and setting limits for over
+ *	voltage situation detection. Detection can be configured for same
+ *	severities as over current protection.
+ * @set_under_voltage_protection: Support enabling of and setting limits for
+ *	under situation detection.
+ * @set_thermal_protection: Support enabling of and setting limits for over
+ *	temperature situation detection.
  *
  * @set_active_discharge: Set active discharge enable/disable of regulators.
  *
@@ -143,8 +169,15 @@ struct regulator_ops {
 	int (*get_current_limit) (struct regulator_dev *);
 
 	int (*set_input_current_limit) (struct regulator_dev *, int lim_uA);
-	int (*set_over_current_protection) (struct regulator_dev *);
-	int (*set_active_discharge) (struct regulator_dev *, bool enable);
+	int (*set_over_current_protection)(struct regulator_dev *, int lim_uA,
+					   int severity, bool enable);
+	int (*set_over_voltage_protection)(struct regulator_dev *, int lim_uV,
+					   int severity, bool enable);
+	int (*set_under_voltage_protection)(struct regulator_dev *, int lim_uV,
+					    int severity, bool enable);
+	int (*set_thermal_protection)(struct regulator_dev *, int lim,
+				      int severity, bool enable);
+	int (*set_active_discharge)(struct regulator_dev *, bool enable);
 
 	/* enable/disable regulator */
 	int (*enable) (struct regulator_dev *);
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 8a56f033b6cde..68b4a514a410b 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -83,6 +83,14 @@ struct regulator_state {
 	bool changeable;
 };
 
+#define REGULATOR_NOTIF_LIMIT_DISABLE -1
+#define REGULATOR_NOTIF_LIMIT_ENABLE -2
+struct notification_limit {
+	int prot;
+	int err;
+	int warn;
+};
+
 /**
  * struct regulation_constraints - regulator operating constraints.
  *
@@ -100,6 +108,11 @@ struct regulator_state {
  * @ilim_uA: Maximum input current.
  * @system_load: Load that isn't captured by any consumer requests.
  *
+ * @over_curr_limits:		Limits for acting on over current.
+ * @over_voltage_limits:	Limits for acting on over voltage.
+ * @under_voltage_limits:	Limits for acting on under voltage.
+ * @temp_limits:		Limits for acting on over temperature.
+
  * @max_spread: Max possible spread between coupled regulators
  * @max_uV_step: Max possible step change in voltage
  * @valid_modes_mask: Mask of modes which may be configured by consumers.
@@ -116,6 +129,11 @@ struct regulator_state {
  * @pull_down: Enable pull down when regulator is disabled.
  * @over_current_protection: Auto disable on over current event.
  *
+ * @over_current_detection: Configure over current limits.
+ * @over_voltage_detection: Configure over voltage limits.
+ * @under_voltage_detection: Configure under voltage limits.
+ * @over_temp_detection: Configure over temperature limits.
+ *
  * @input_uV: Input voltage for regulator when supplied by another regulator.
  *
  * @state_disk: State for regulator when system is suspended in disk mode.
@@ -172,6 +190,10 @@ struct regulation_constraints {
 	struct regulator_state state_disk;
 	struct regulator_state state_mem;
 	struct regulator_state state_standby;
+	struct notification_limit over_curr_limits;
+	struct notification_limit over_voltage_limits;
+	struct notification_limit under_voltage_limits;
+	struct notification_limit temp_limits;
 	suspend_state_t initial_state; /* suspend state to set at init */
 
 	/* mode to set on startup */
@@ -193,6 +215,10 @@ struct regulation_constraints {
 	unsigned soft_start:1;	/* ramp voltage slowly */
 	unsigned pull_down:1;	/* pull down resistor when regulator off */
 	unsigned over_current_protection:1; /* auto disable on over current */
+	unsigned over_current_detection:1; /* notify on over current */
+	unsigned over_voltage_detection:1; /* notify on over voltage */
+	unsigned under_voltage_detection:1; /* notify on under voltage */
+	unsigned over_temp_detection:1; /* notify on over temperature */
 };
 
 /**
-- 
GitLab


From 627793e4ca4f511837de893545baf0e1b8174dc2 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:42:30 +0300
Subject: [PATCH 3416/3804] regulator: bd9576 add FET ON-resistance for OCW

BD9576MUF provides over-current protection and detection. Current is
measured as voltage loss over external FET. Allow specifying FET's on
resistance so current monitoring limits can be converted to voltages.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/e5feb160d7e09f33fff5b88f1928c66a15c6680f.1622628334.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/rohm,bd9576-regulator.yaml           | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/regulator/rohm,bd9576-regulator.yaml b/Documentation/devicetree/bindings/regulator/rohm,bd9576-regulator.yaml
index b6515a0cee629..7cb74cc8c5d96 100644
--- a/Documentation/devicetree/bindings/regulator/rohm,bd9576-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/rohm,bd9576-regulator.yaml
@@ -27,6 +27,12 @@ patternProperties:
       Properties for single regulator.
     $ref: "regulator.yaml#"
 
+    properties:
+      rohm,ocw-fet-ron-micro-ohms:
+        description: |
+          External FET's ON-resistance. Required if VoutS1 OCP/OCW is
+          to be set.
+
     required:
       - regulator-name
 
-- 
GitLab


From e7bf1fa58c46db9f72220c4472272d6da0a54c91 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:42:46 +0300
Subject: [PATCH 3417/3804] regulator: bd9576: Support error reporting

BD9573 and BD9576 support set of "protection" interrupts for "fatal"
issues. Those lead to SOC reset as PMIC shuts the power outputs. Thus
there is no relevant IRQ handling for them.

Few "detection" interrupts were added to the BD9576 with the idea that
SOC could take some recovery-action before error gets unrecoverable.

Add support for over and under voltage detection for Vout1 ... Vout4
and VoutL1. Add over-current detection for VoutS1 and finally a
thermal warning (common for all regulators) which alerts 30 C
before temperature reaches the thermal shutdown point. This way
consumer drivers can build error-recovery mechanisms.

Unfortunately the BD9576 interrupt logic was not re-evaluated. IRQs
are not designed to be properly acknowleged - and IRQ line is kept
active for whole duration of error condition (in comparison to
informing only about state change).

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/05c4f7a8e30ef1d4d5f3ceab07da4ebe68f5b4ed.1622628334.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd9576-regulator.c | 1050 +++++++++++++++++++++++---
 1 file changed, 926 insertions(+), 124 deletions(-)

diff --git a/drivers/regulator/bd9576-regulator.c b/drivers/regulator/bd9576-regulator.c
index 204a2da054f53..6ba12af4c632f 100644
--- a/drivers/regulator/bd9576-regulator.c
+++ b/drivers/regulator/bd9576-regulator.c
@@ -2,10 +2,10 @@
 // Copyright (C) 2020 ROHM Semiconductors
 // ROHM BD9576MUF/BD9573MUF regulator driver
 
-#include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/mfd/rohm-bd957x.h>
 #include <linux/mfd/rohm-generic.h>
@@ -16,11 +16,18 @@
 #include <linux/regulator/machine.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
 
 #define BD957X_VOUTS1_VOLT	3300000
 #define BD957X_VOUTS4_BASE_VOLT	1030000
 #define BD957X_VOUTS34_NUM_VOLT	32
 
+#define BD9576_THERM_IRQ_MASK_TW	BIT(5)
+#define BD9576_xVD_IRQ_MASK_VOUTL1	BIT(5)
+#define BD9576_UVD_IRQ_MASK_VOUTS1_OCW	BIT(6)
+#define BD9576_xVD_IRQ_MASK_VOUT1TO4	0x0F
+
 static int vout1_volt_table[] = {5000000, 4900000, 4800000, 4700000, 4600000,
 				 4500000, 4500000, 4500000, 5000000, 5100000,
 				 5200000, 5300000, 5400000, 5500000, 5500000,
@@ -36,9 +43,85 @@ static int voutl1_volt_table[] = {2500000, 2540000, 2580000, 2620000, 2660000,
 				  2420000, 2380000, 2340000, 2300000, 2260000,
 				  2220000};
 
+static const struct linear_range vout1_xvd_ranges[] = {
+	REGULATOR_LINEAR_RANGE(225000, 0x01, 0x2b, 0),
+	REGULATOR_LINEAR_RANGE(225000, 0x2c, 0x54, 5000),
+	REGULATOR_LINEAR_RANGE(425000, 0x55, 0x7f, 0),
+};
+
+static const struct linear_range vout234_xvd_ranges[] = {
+	REGULATOR_LINEAR_RANGE(17000, 0x01, 0x0f, 0),
+	REGULATOR_LINEAR_RANGE(17000, 0x10, 0x6d, 1000),
+	REGULATOR_LINEAR_RANGE(110000, 0x6e, 0x7f, 0),
+};
+
+static const struct linear_range voutL1_xvd_ranges[] = {
+	REGULATOR_LINEAR_RANGE(34000, 0x01, 0x0f, 0),
+	REGULATOR_LINEAR_RANGE(34000, 0x10, 0x6d, 2000),
+	REGULATOR_LINEAR_RANGE(220000, 0x6e, 0x7f, 0),
+};
+
+static struct linear_range voutS1_ocw_ranges_internal[] = {
+	REGULATOR_LINEAR_RANGE(200000, 0x01, 0x04, 0),
+	REGULATOR_LINEAR_RANGE(250000, 0x05, 0x18, 50000),
+	REGULATOR_LINEAR_RANGE(1200000, 0x19, 0x3f, 0),
+};
+
+static struct linear_range voutS1_ocw_ranges[] = {
+	REGULATOR_LINEAR_RANGE(50000, 0x01, 0x04, 0),
+	REGULATOR_LINEAR_RANGE(60000, 0x05, 0x18, 10000),
+	REGULATOR_LINEAR_RANGE(250000, 0x19, 0x3f, 0),
+};
+
+static struct linear_range voutS1_ocp_ranges_internal[] = {
+	REGULATOR_LINEAR_RANGE(300000, 0x01, 0x06, 0),
+	REGULATOR_LINEAR_RANGE(350000, 0x7, 0x1b, 50000),
+	REGULATOR_LINEAR_RANGE(1350000, 0x1c, 0x3f, 0),
+};
+
+static struct linear_range voutS1_ocp_ranges[] = {
+	REGULATOR_LINEAR_RANGE(70000, 0x01, 0x06, 0),
+	REGULATOR_LINEAR_RANGE(80000, 0x7, 0x1b, 10000),
+	REGULATOR_LINEAR_RANGE(280000, 0x1c, 0x3f, 0),
+};
+
 struct bd957x_regulator_data {
 	struct regulator_desc desc;
 	int base_voltage;
+	struct regulator_dev *rdev;
+	int ovd_notif;
+	int uvd_notif;
+	int temp_notif;
+	int ovd_err;
+	int uvd_err;
+	int temp_err;
+	const struct linear_range *xvd_ranges;
+	int num_xvd_ranges;
+	bool oc_supported;
+	unsigned int ovd_reg;
+	unsigned int uvd_reg;
+	unsigned int xvd_mask;
+	unsigned int ocp_reg;
+	unsigned int ocp_mask;
+	unsigned int ocw_reg;
+	unsigned int ocw_mask;
+	unsigned int ocw_rfet;
+};
+
+#define BD9576_NUM_REGULATORS 6
+#define BD9576_NUM_OVD_REGULATORS 5
+
+struct bd957x_data {
+	struct bd957x_regulator_data regulator_data[BD9576_NUM_REGULATORS];
+	struct regmap *regmap;
+	struct delayed_work therm_irq_suppress;
+	struct delayed_work ovd_irq_suppress;
+	struct delayed_work uvd_irq_suppress;
+	unsigned int therm_irq;
+	unsigned int ovd_irq;
+	unsigned int uvd_irq;
+	spinlock_t err_lock;
+	int regulator_global_err;
 };
 
 static int bd957x_vout34_list_voltage(struct regulator_dev *rdev,
@@ -72,151 +155,784 @@ static int bd957x_list_voltage(struct regulator_dev *rdev,
 	return desc->volt_table[index];
 }
 
-static const struct regulator_ops bd957x_vout34_ops = {
+static void bd9576_fill_ovd_flags(struct bd957x_regulator_data *data,
+				  bool warn)
+{
+	if (warn) {
+		data->ovd_notif = REGULATOR_EVENT_OVER_VOLTAGE_WARN;
+		data->ovd_err = REGULATOR_ERROR_OVER_VOLTAGE_WARN;
+	} else {
+		data->ovd_notif = REGULATOR_EVENT_REGULATION_OUT;
+		data->ovd_err = REGULATOR_ERROR_REGULATION_OUT;
+	}
+}
+
+static void bd9576_fill_ocp_flags(struct bd957x_regulator_data *data,
+				  bool warn)
+{
+	if (warn) {
+		data->uvd_notif = REGULATOR_EVENT_OVER_CURRENT_WARN;
+		data->uvd_err = REGULATOR_ERROR_OVER_CURRENT_WARN;
+	} else {
+		data->uvd_notif = REGULATOR_EVENT_OVER_CURRENT;
+		data->uvd_err = REGULATOR_ERROR_OVER_CURRENT;
+	}
+}
+
+static void bd9576_fill_uvd_flags(struct bd957x_regulator_data *data,
+				  bool warn)
+{
+	if (warn) {
+		data->uvd_notif = REGULATOR_EVENT_UNDER_VOLTAGE_WARN;
+		data->uvd_err = REGULATOR_ERROR_UNDER_VOLTAGE_WARN;
+	} else {
+		data->uvd_notif = REGULATOR_EVENT_UNDER_VOLTAGE;
+		data->uvd_err = REGULATOR_ERROR_UNDER_VOLTAGE;
+	}
+}
+
+static void bd9576_fill_temp_flags(struct bd957x_regulator_data *data,
+				   bool enable, bool warn)
+{
+	if (!enable) {
+		data->temp_notif = 0;
+		data->temp_err = 0;
+	} else if (warn) {
+		data->temp_notif = REGULATOR_EVENT_OVER_TEMP_WARN;
+		data->temp_err = REGULATOR_ERROR_OVER_TEMP_WARN;
+	} else {
+		data->temp_notif = REGULATOR_EVENT_OVER_TEMP;
+		data->temp_err = REGULATOR_ERROR_OVER_TEMP;
+	}
+}
+
+static int bd9576_set_limit(const struct linear_range *r, int num_ranges,
+			    struct regmap *regmap, int reg, int mask, int lim)
+{
+	int ret;
+	bool found;
+	int sel = 0;
+
+	if (lim) {
+
+		ret = linear_range_get_selector_low_array(r, num_ranges,
+							  lim, &sel, &found);
+		if (ret)
+			return ret;
+
+		if (!found)
+			dev_warn(regmap_get_device(regmap),
+				 "limit %d out of range. Setting lower\n",
+				 lim);
+	}
+
+	return regmap_update_bits(regmap, reg, mask, sel);
+}
+
+static bool check_ocp_flag_mismatch(struct regulator_dev *rdev, int severity,
+				    struct bd957x_regulator_data *r)
+{
+	if ((severity == REGULATOR_SEVERITY_ERR &&
+	    r->uvd_notif != REGULATOR_EVENT_OVER_CURRENT) ||
+	    (severity == REGULATOR_SEVERITY_WARN &&
+	    r->uvd_notif != REGULATOR_EVENT_OVER_CURRENT_WARN)) {
+		dev_warn(rdev_get_dev(rdev),
+			 "Can't support both OCP WARN and ERR\n");
+		/* Do not overwrite ERR config with WARN */
+		if (severity == REGULATOR_SEVERITY_WARN)
+			return true;
+
+		bd9576_fill_ocp_flags(r, 0);
+	}
+
+	return false;
+}
+
+static bool check_uvd_flag_mismatch(struct regulator_dev *rdev, int severity,
+				    struct bd957x_regulator_data *r)
+{
+	if ((severity == REGULATOR_SEVERITY_ERR &&
+	     r->uvd_notif != REGULATOR_EVENT_UNDER_VOLTAGE) ||
+	     (severity == REGULATOR_SEVERITY_WARN &&
+	     r->uvd_notif != REGULATOR_EVENT_UNDER_VOLTAGE_WARN)) {
+		dev_warn(rdev_get_dev(rdev),
+			 "Can't support both UVD WARN and ERR\n");
+		if (severity == REGULATOR_SEVERITY_WARN)
+			return true;
+
+		bd9576_fill_uvd_flags(r, 0);
+	}
+
+	return false;
+}
+
+static bool check_ovd_flag_mismatch(struct regulator_dev *rdev, int severity,
+				    struct bd957x_regulator_data *r)
+{
+	if ((severity == REGULATOR_SEVERITY_ERR &&
+	     r->ovd_notif != REGULATOR_EVENT_REGULATION_OUT) ||
+	     (severity == REGULATOR_SEVERITY_WARN &&
+	     r->ovd_notif != REGULATOR_EVENT_OVER_VOLTAGE_WARN)) {
+		dev_warn(rdev_get_dev(rdev),
+			 "Can't support both OVD WARN and ERR\n");
+		if (severity == REGULATOR_SEVERITY_WARN)
+			return true;
+
+		bd9576_fill_ovd_flags(r, 0);
+	}
+
+	return false;
+}
+
+static bool check_temp_flag_mismatch(struct regulator_dev *rdev, int severity,
+				    struct bd957x_regulator_data *r)
+{
+	if ((severity == REGULATOR_SEVERITY_ERR &&
+	     r->ovd_notif != REGULATOR_EVENT_OVER_TEMP) ||
+	     (severity == REGULATOR_SEVERITY_WARN &&
+	     r->ovd_notif != REGULATOR_EVENT_OVER_TEMP_WARN)) {
+		dev_warn(rdev_get_dev(rdev),
+			 "Can't support both thermal WARN and ERR\n");
+		if (severity == REGULATOR_SEVERITY_WARN)
+			return true;
+	}
+
+	return false;
+}
+
+static int bd9576_set_ocp(struct regulator_dev *rdev, int lim_uA, int severity,
+			  bool enable)
+{
+	struct bd957x_data *d;
+	struct bd957x_regulator_data *r;
+	int reg, mask;
+	int Vfet, rfet;
+	const struct linear_range *range;
+	int num_ranges;
+
+	if ((lim_uA && !enable) || (!lim_uA && enable))
+		return -EINVAL;
+
+	r = container_of(rdev->desc, struct bd957x_regulator_data, desc);
+	if (!r->oc_supported)
+		return -EINVAL;
+
+	d = rdev_get_drvdata(rdev);
+
+	if (severity == REGULATOR_SEVERITY_PROT) {
+		reg = r->ocp_reg;
+		mask = r->ocp_mask;
+		if (r->ocw_rfet) {
+			range = voutS1_ocp_ranges;
+			num_ranges = ARRAY_SIZE(voutS1_ocp_ranges);
+			rfet = r->ocw_rfet / 1000;
+		} else {
+			range = voutS1_ocp_ranges_internal;
+			num_ranges = ARRAY_SIZE(voutS1_ocp_ranges_internal);
+			/* Internal values are already micro-amperes */
+			rfet = 1000;
+		}
+	} else {
+		reg = r->ocw_reg;
+		mask = r->ocw_mask;
+
+		if (r->ocw_rfet) {
+			range = voutS1_ocw_ranges;
+			num_ranges = ARRAY_SIZE(voutS1_ocw_ranges);
+			rfet = r->ocw_rfet / 1000;
+		} else {
+			range = voutS1_ocw_ranges_internal;
+			num_ranges = ARRAY_SIZE(voutS1_ocw_ranges_internal);
+			/* Internal values are already micro-amperes */
+			rfet = 1000;
+		}
+
+		/* We abuse uvd fields for OCW on VoutS1 */
+		if (r->uvd_notif) {
+			/*
+			 * If both warning and error are requested, prioritize
+			 * ERROR configuration
+			 */
+			if (check_ocp_flag_mismatch(rdev, severity, r))
+				return 0;
+		} else {
+			bool warn = severity == REGULATOR_SEVERITY_WARN;
+
+			bd9576_fill_ocp_flags(r, warn);
+		}
+	}
+
+	/*
+	 * limits are given in uA, rfet is mOhm
+	 * Divide lim_uA by 1000 to get Vfet in uV.
+	 * (We expect both Rfet and limit uA to be magnitude of hundreds of
+	 * milli Amperes & milli Ohms => we should still have decent accuracy)
+	 */
+	Vfet = lim_uA/1000 * rfet;
+
+	return bd9576_set_limit(range, num_ranges, d->regmap,
+				reg, mask, Vfet);
+}
+
+static int bd9576_set_uvp(struct regulator_dev *rdev, int lim_uV, int severity,
+			  bool enable)
+{
+	struct bd957x_data *d;
+	struct bd957x_regulator_data *r;
+	int mask, reg;
+
+	if (severity == REGULATOR_SEVERITY_PROT) {
+		if (!enable || lim_uV)
+			return -EINVAL;
+		return 0;
+	}
+
+	/*
+	 * BD9576 has enable control as a special value in limit reg. Can't
+	 * set limit but keep feature disabled or enable W/O given limit.
+	 */
+	if ((lim_uV && !enable) || (!lim_uV && enable))
+		return -EINVAL;
+
+	r = container_of(rdev->desc, struct bd957x_regulator_data, desc);
+	d = rdev_get_drvdata(rdev);
+
+	mask = r->xvd_mask;
+	reg = r->uvd_reg;
+	/*
+	 * Check that there is no mismatch for what the detection IRQs are to
+	 * be used.
+	 */
+	if (r->uvd_notif) {
+		if (check_uvd_flag_mismatch(rdev, severity, r))
+			return 0;
+	} else {
+		bd9576_fill_uvd_flags(r, severity == REGULATOR_SEVERITY_WARN);
+	}
+
+	return bd9576_set_limit(r->xvd_ranges, r->num_xvd_ranges, d->regmap,
+				reg, mask, lim_uV);
+}
+
+static int bd9576_set_ovp(struct regulator_dev *rdev, int lim_uV, int severity,
+			  bool enable)
+{
+	struct bd957x_data *d;
+	struct bd957x_regulator_data *r;
+	int mask, reg;
+
+	if (severity == REGULATOR_SEVERITY_PROT) {
+		if (!enable || lim_uV)
+			return -EINVAL;
+		return 0;
+	}
+
+	/*
+	 * BD9576 has enable control as a special value in limit reg. Can't
+	 * set limit but keep feature disabled or enable W/O given limit.
+	 */
+	if ((lim_uV && !enable) || (!lim_uV && enable))
+		return -EINVAL;
+
+	r = container_of(rdev->desc, struct bd957x_regulator_data, desc);
+	d = rdev_get_drvdata(rdev);
+
+	mask = r->xvd_mask;
+	reg = r->ovd_reg;
+	/*
+	 * Check that there is no mismatch for what the detection IRQs are to
+	 * be used.
+	 */
+	if (r->ovd_notif) {
+		if (check_ovd_flag_mismatch(rdev, severity, r))
+			return 0;
+	} else {
+		bd9576_fill_ovd_flags(r, severity == REGULATOR_SEVERITY_WARN);
+	}
+
+	return bd9576_set_limit(r->xvd_ranges, r->num_xvd_ranges, d->regmap,
+				reg, mask, lim_uV);
+}
+
+
+static int bd9576_set_tw(struct regulator_dev *rdev, int lim, int severity,
+			  bool enable)
+{
+	struct bd957x_data *d;
+	struct bd957x_regulator_data *r;
+	int i;
+
+	/*
+	 * BD9576MUF has fixed temperature limits
+	 * The detection can only be enabled/disabled
+	 */
+	if (lim)
+		return -EINVAL;
+
+	/* Protection can't be disabled */
+	if (severity == REGULATOR_SEVERITY_PROT) {
+		if (!enable)
+			return -EINVAL;
+		else
+			return 0;
+	}
+
+	r = container_of(rdev->desc, struct bd957x_regulator_data, desc);
+	d = rdev_get_drvdata(rdev);
+
+	/*
+	 * Check that there is no mismatch for what the detection IRQs are to
+	 * be used.
+	 */
+	if (r->temp_notif)
+		if (check_temp_flag_mismatch(rdev, severity, r))
+			return 0;
+
+	bd9576_fill_temp_flags(r, enable, severity == REGULATOR_SEVERITY_WARN);
+
+	if (enable)
+		return regmap_update_bits(d->regmap, BD957X_REG_INT_THERM_MASK,
+					 BD9576_THERM_IRQ_MASK_TW, 0);
+
+	/*
+	 * If any of the regulators is interested in thermal warning we keep IRQ
+	 * enabled.
+	 */
+	for (i = 0; i < BD9576_NUM_REGULATORS; i++)
+		if (d->regulator_data[i].temp_notif)
+			return 0;
+
+	return regmap_update_bits(d->regmap, BD957X_REG_INT_THERM_MASK,
+				  BD9576_THERM_IRQ_MASK_TW,
+				  BD9576_THERM_IRQ_MASK_TW);
+}
+
+static const struct regulator_ops bd9573_vout34_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = bd957x_vout34_list_voltage,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 };
 
-static const struct regulator_ops bd957X_vouts1_regulator_ops = {
+static const struct regulator_ops bd9576_vout34_ops = {
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = bd957x_vout34_list_voltage,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_over_voltage_protection = bd9576_set_ovp,
+	.set_under_voltage_protection = bd9576_set_uvp,
+	.set_thermal_protection = bd9576_set_tw,
+};
+
+static const struct regulator_ops bd9573_vouts1_regulator_ops = {
+	.is_enabled = regulator_is_enabled_regmap,
+};
+
+static const struct regulator_ops bd9576_vouts1_regulator_ops = {
+	.is_enabled = regulator_is_enabled_regmap,
+	.set_over_current_protection = bd9576_set_ocp,
+};
+
+static const struct regulator_ops bd9573_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = bd957x_list_voltage,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 };
 
-static const struct regulator_ops bd957x_ops = {
+static const struct regulator_ops bd9576_ops = {
 	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = bd957x_list_voltage,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_over_voltage_protection = bd9576_set_ovp,
+	.set_under_voltage_protection = bd9576_set_uvp,
+	.set_thermal_protection = bd9576_set_tw,
+};
+
+static const struct regulator_ops  *bd9573_ops_arr[] = {
+	[BD957X_VD50]	= &bd9573_ops,
+	[BD957X_VD18]	= &bd9573_ops,
+	[BD957X_VDDDR]	= &bd9573_vout34_ops,
+	[BD957X_VD10]	= &bd9573_vout34_ops,
+	[BD957X_VOUTL1]	= &bd9573_ops,
+	[BD957X_VOUTS1]	= &bd9573_vouts1_regulator_ops,
 };
 
-static struct bd957x_regulator_data bd9576_regulators[] = {
-	{
-		.desc = {
-			.name = "VD50",
-			.of_match = of_match_ptr("regulator-vd50"),
-			.regulators_node = of_match_ptr("regulators"),
-			.id = BD957X_VD50,
-			.type = REGULATOR_VOLTAGE,
-			.ops = &bd957x_ops,
-			.volt_table = &vout1_volt_table[0],
-			.n_voltages = ARRAY_SIZE(vout1_volt_table),
-			.vsel_reg = BD957X_REG_VOUT1_TUNE,
-			.vsel_mask = BD957X_MASK_VOUT1_TUNE,
-			.enable_reg = BD957X_REG_POW_TRIGGER1,
-			.enable_mask = BD957X_REGULATOR_EN_MASK,
-			.enable_val = BD957X_REGULATOR_DIS_VAL,
-			.enable_is_inverted = true,
-			.owner = THIS_MODULE,
+static const struct regulator_ops  *bd9576_ops_arr[] = {
+	[BD957X_VD50]	= &bd9576_ops,
+	[BD957X_VD18]	= &bd9576_ops,
+	[BD957X_VDDDR]	= &bd9576_vout34_ops,
+	[BD957X_VD10]	= &bd9576_vout34_ops,
+	[BD957X_VOUTL1]	= &bd9576_ops,
+	[BD957X_VOUTS1]	= &bd9576_vouts1_regulator_ops,
+};
+
+static int vouts1_get_fet_res(struct device_node *np,
+				const struct regulator_desc *desc,
+				struct regulator_config *cfg)
+{
+	struct bd957x_regulator_data *data;
+	int ret;
+	u32 uohms;
+
+	data = container_of(desc, struct bd957x_regulator_data, desc);
+
+	ret = of_property_read_u32(np, "rohm,ocw-fet-ron-micro-ohms", &uohms);
+	if (ret) {
+		if (ret != -EINVAL)
+			return ret;
+
+		return 0;
+	}
+	data->ocw_rfet = uohms;
+	return 0;
+}
+
+static struct bd957x_data bd957x_regulators = {
+	.regulator_data = {
+		{
+			.desc = {
+				.name = "VD50",
+				.of_match = of_match_ptr("regulator-vd50"),
+				.regulators_node = of_match_ptr("regulators"),
+				.id = BD957X_VD50,
+				.type = REGULATOR_VOLTAGE,
+				.volt_table = &vout1_volt_table[0],
+				.n_voltages = ARRAY_SIZE(vout1_volt_table),
+				.vsel_reg = BD957X_REG_VOUT1_TUNE,
+				.vsel_mask = BD957X_MASK_VOUT1_TUNE,
+				.enable_reg = BD957X_REG_POW_TRIGGER1,
+				.enable_mask = BD957X_REGULATOR_EN_MASK,
+				.enable_val = BD957X_REGULATOR_DIS_VAL,
+				.enable_is_inverted = true,
+				.owner = THIS_MODULE,
+			},
+			.xvd_ranges = vout1_xvd_ranges,
+			.num_xvd_ranges = ARRAY_SIZE(vout1_xvd_ranges),
+			.ovd_reg = BD9576_REG_VOUT1_OVD,
+			.uvd_reg = BD9576_REG_VOUT1_UVD,
+			.xvd_mask = BD9576_MASK_XVD,
 		},
-	},
-	{
-		.desc = {
-			.name = "VD18",
-			.of_match = of_match_ptr("regulator-vd18"),
-			.regulators_node = of_match_ptr("regulators"),
-			.id = BD957X_VD18,
-			.type = REGULATOR_VOLTAGE,
-			.ops = &bd957x_ops,
-			.volt_table = &vout2_volt_table[0],
-			.n_voltages = ARRAY_SIZE(vout2_volt_table),
-			.vsel_reg = BD957X_REG_VOUT2_TUNE,
-			.vsel_mask = BD957X_MASK_VOUT2_TUNE,
-			.enable_reg = BD957X_REG_POW_TRIGGER2,
-			.enable_mask = BD957X_REGULATOR_EN_MASK,
-			.enable_val = BD957X_REGULATOR_DIS_VAL,
-			.enable_is_inverted = true,
-			.owner = THIS_MODULE,
+		{
+			.desc = {
+				.name = "VD18",
+				.of_match = of_match_ptr("regulator-vd18"),
+				.regulators_node = of_match_ptr("regulators"),
+				.id = BD957X_VD18,
+				.type = REGULATOR_VOLTAGE,
+				.volt_table = &vout2_volt_table[0],
+				.n_voltages = ARRAY_SIZE(vout2_volt_table),
+				.vsel_reg = BD957X_REG_VOUT2_TUNE,
+				.vsel_mask = BD957X_MASK_VOUT2_TUNE,
+				.enable_reg = BD957X_REG_POW_TRIGGER2,
+				.enable_mask = BD957X_REGULATOR_EN_MASK,
+				.enable_val = BD957X_REGULATOR_DIS_VAL,
+				.enable_is_inverted = true,
+				.owner = THIS_MODULE,
+			},
+			.xvd_ranges = vout234_xvd_ranges,
+			.num_xvd_ranges = ARRAY_SIZE(vout234_xvd_ranges),
+			.ovd_reg = BD9576_REG_VOUT2_OVD,
+			.uvd_reg = BD9576_REG_VOUT2_UVD,
+			.xvd_mask = BD9576_MASK_XVD,
 		},
-	},
-	{
-		.desc = {
-			.name = "VDDDR",
-			.of_match = of_match_ptr("regulator-vdddr"),
-			.regulators_node = of_match_ptr("regulators"),
-			.id = BD957X_VDDDR,
-			.ops = &bd957x_vout34_ops,
-			.type = REGULATOR_VOLTAGE,
-			.n_voltages = BD957X_VOUTS34_NUM_VOLT,
-			.vsel_reg = BD957X_REG_VOUT3_TUNE,
-			.vsel_mask = BD957X_MASK_VOUT3_TUNE,
-			.enable_reg = BD957X_REG_POW_TRIGGER3,
-			.enable_mask = BD957X_REGULATOR_EN_MASK,
-			.enable_val = BD957X_REGULATOR_DIS_VAL,
-			.enable_is_inverted = true,
-			.owner = THIS_MODULE,
+		{
+			.desc = {
+				.name = "VDDDR",
+				.of_match = of_match_ptr("regulator-vdddr"),
+				.regulators_node = of_match_ptr("regulators"),
+				.id = BD957X_VDDDR,
+				.type = REGULATOR_VOLTAGE,
+				.n_voltages = BD957X_VOUTS34_NUM_VOLT,
+				.vsel_reg = BD957X_REG_VOUT3_TUNE,
+				.vsel_mask = BD957X_MASK_VOUT3_TUNE,
+				.enable_reg = BD957X_REG_POW_TRIGGER3,
+				.enable_mask = BD957X_REGULATOR_EN_MASK,
+				.enable_val = BD957X_REGULATOR_DIS_VAL,
+				.enable_is_inverted = true,
+				.owner = THIS_MODULE,
+			},
+			.ovd_reg = BD9576_REG_VOUT3_OVD,
+			.uvd_reg = BD9576_REG_VOUT3_UVD,
+			.xvd_mask = BD9576_MASK_XVD,
+			.xvd_ranges = vout234_xvd_ranges,
+			.num_xvd_ranges = ARRAY_SIZE(vout234_xvd_ranges),
 		},
-	},
-	{
-		.desc = {
-			.name = "VD10",
-			.of_match = of_match_ptr("regulator-vd10"),
-			.regulators_node = of_match_ptr("regulators"),
-			.id = BD957X_VD10,
-			.ops = &bd957x_vout34_ops,
-			.type = REGULATOR_VOLTAGE,
-			.fixed_uV = BD957X_VOUTS4_BASE_VOLT,
-			.n_voltages = BD957X_VOUTS34_NUM_VOLT,
-			.vsel_reg = BD957X_REG_VOUT4_TUNE,
-			.vsel_mask = BD957X_MASK_VOUT4_TUNE,
-			.enable_reg = BD957X_REG_POW_TRIGGER4,
-			.enable_mask = BD957X_REGULATOR_EN_MASK,
-			.enable_val = BD957X_REGULATOR_DIS_VAL,
-			.enable_is_inverted = true,
-			.owner = THIS_MODULE,
+		{
+			.desc = {
+				.name = "VD10",
+				.of_match = of_match_ptr("regulator-vd10"),
+				.regulators_node = of_match_ptr("regulators"),
+				.id = BD957X_VD10,
+				.type = REGULATOR_VOLTAGE,
+				.fixed_uV = BD957X_VOUTS4_BASE_VOLT,
+				.n_voltages = BD957X_VOUTS34_NUM_VOLT,
+				.vsel_reg = BD957X_REG_VOUT4_TUNE,
+				.vsel_mask = BD957X_MASK_VOUT4_TUNE,
+				.enable_reg = BD957X_REG_POW_TRIGGER4,
+				.enable_mask = BD957X_REGULATOR_EN_MASK,
+				.enable_val = BD957X_REGULATOR_DIS_VAL,
+				.enable_is_inverted = true,
+				.owner = THIS_MODULE,
+			},
+			.xvd_ranges = vout234_xvd_ranges,
+			.num_xvd_ranges = ARRAY_SIZE(vout234_xvd_ranges),
+			.ovd_reg = BD9576_REG_VOUT4_OVD,
+			.uvd_reg = BD9576_REG_VOUT4_UVD,
+			.xvd_mask = BD9576_MASK_XVD,
 		},
-	},
-	{
-		.desc = {
-			.name = "VOUTL1",
-			.of_match = of_match_ptr("regulator-voutl1"),
-			.regulators_node = of_match_ptr("regulators"),
-			.id = BD957X_VOUTL1,
-			.ops = &bd957x_ops,
-			.type = REGULATOR_VOLTAGE,
-			.volt_table = &voutl1_volt_table[0],
-			.n_voltages = ARRAY_SIZE(voutl1_volt_table),
-			.vsel_reg = BD957X_REG_VOUTL1_TUNE,
-			.vsel_mask = BD957X_MASK_VOUTL1_TUNE,
-			.enable_reg = BD957X_REG_POW_TRIGGERL1,
-			.enable_mask = BD957X_REGULATOR_EN_MASK,
-			.enable_val = BD957X_REGULATOR_DIS_VAL,
-			.enable_is_inverted = true,
-			.owner = THIS_MODULE,
+		{
+			.desc = {
+				.name = "VOUTL1",
+				.of_match = of_match_ptr("regulator-voutl1"),
+				.regulators_node = of_match_ptr("regulators"),
+				.id = BD957X_VOUTL1,
+				.type = REGULATOR_VOLTAGE,
+				.volt_table = &voutl1_volt_table[0],
+				.n_voltages = ARRAY_SIZE(voutl1_volt_table),
+				.vsel_reg = BD957X_REG_VOUTL1_TUNE,
+				.vsel_mask = BD957X_MASK_VOUTL1_TUNE,
+				.enable_reg = BD957X_REG_POW_TRIGGERL1,
+				.enable_mask = BD957X_REGULATOR_EN_MASK,
+				.enable_val = BD957X_REGULATOR_DIS_VAL,
+				.enable_is_inverted = true,
+				.owner = THIS_MODULE,
+			},
+			.xvd_ranges = voutL1_xvd_ranges,
+			.num_xvd_ranges = ARRAY_SIZE(voutL1_xvd_ranges),
+			.ovd_reg = BD9576_REG_VOUTL1_OVD,
+			.uvd_reg = BD9576_REG_VOUTL1_UVD,
+			.xvd_mask = BD9576_MASK_XVD,
 		},
-	},
-	{
-		.desc = {
-			.name = "VOUTS1",
-			.of_match = of_match_ptr("regulator-vouts1"),
-			.regulators_node = of_match_ptr("regulators"),
-			.id = BD957X_VOUTS1,
-			.ops = &bd957X_vouts1_regulator_ops,
-			.type = REGULATOR_VOLTAGE,
-			.n_voltages = 1,
-			.fixed_uV = BD957X_VOUTS1_VOLT,
-			.enable_reg = BD957X_REG_POW_TRIGGERS1,
-			.enable_mask = BD957X_REGULATOR_EN_MASK,
-			.enable_val = BD957X_REGULATOR_DIS_VAL,
-			.enable_is_inverted = true,
-			.owner = THIS_MODULE,
+		{
+			.desc = {
+				.name = "VOUTS1",
+				.of_match = of_match_ptr("regulator-vouts1"),
+				.regulators_node = of_match_ptr("regulators"),
+				.id = BD957X_VOUTS1,
+				.type = REGULATOR_VOLTAGE,
+				.n_voltages = 1,
+				.fixed_uV = BD957X_VOUTS1_VOLT,
+				.enable_reg = BD957X_REG_POW_TRIGGERS1,
+				.enable_mask = BD957X_REGULATOR_EN_MASK,
+				.enable_val = BD957X_REGULATOR_DIS_VAL,
+				.enable_is_inverted = true,
+				.owner = THIS_MODULE,
+				.of_parse_cb = vouts1_get_fet_res,
+			},
+			.oc_supported = true,
+			.ocw_reg = BD9576_REG_VOUT1S_OCW,
+			.ocw_mask = BD9576_MASK_VOUT1S_OCW,
+			.ocp_reg = BD9576_REG_VOUT1S_OCP,
+			.ocp_mask = BD9576_MASK_VOUT1S_OCP,
 		},
 	},
 };
 
+static int bd9576_renable(struct regulator_irq_data *rid, int reg, int mask)
+{
+	int val, ret;
+	struct bd957x_data *d = (struct bd957x_data *)rid->data;
+
+	ret = regmap_read(d->regmap, reg, &val);
+	if (ret)
+		return REGULATOR_FAILED_RETRY;
+
+	if (rid->opaque && rid->opaque == (val & mask)) {
+		/*
+		 * It seems we stil have same status. Ack and return
+		 * information that we are still out of limits and core
+		 * should not enable IRQ
+		 */
+		regmap_write(d->regmap, reg, mask & val);
+		return REGULATOR_ERROR_ON;
+	}
+	rid->opaque = 0;
+	/*
+	 * Status was changed. Either prolem was solved or we have new issues.
+	 * Let's re-enable IRQs and be prepared to report problems again
+	 */
+	return REGULATOR_ERROR_CLEARED;
+}
+
+static int bd9576_uvd_renable(struct regulator_irq_data *rid)
+{
+	return bd9576_renable(rid, BD957X_REG_INT_UVD_STAT, UVD_IRQ_VALID_MASK);
+}
+
+static int bd9576_ovd_renable(struct regulator_irq_data *rid)
+{
+	return bd9576_renable(rid, BD957X_REG_INT_OVD_STAT, OVD_IRQ_VALID_MASK);
+}
+
+static int bd9576_temp_renable(struct regulator_irq_data *rid)
+{
+	return bd9576_renable(rid, BD957X_REG_INT_THERM_STAT,
+			      BD9576_THERM_IRQ_MASK_TW);
+}
+
+static int bd9576_uvd_handler(int irq, struct regulator_irq_data *rid,
+			      unsigned long *dev_mask)
+{
+	int val, ret, i;
+	struct bd957x_data *d = (struct bd957x_data *)rid->data;
+
+	ret = regmap_read(d->regmap, BD957X_REG_INT_UVD_STAT, &val);
+	if (ret)
+		return REGULATOR_FAILED_RETRY;
+
+	*dev_mask = 0;
+
+	rid->opaque = val & UVD_IRQ_VALID_MASK;
+
+	/*
+	 * Go through the set status bits and report either error or warning
+	 * to the notifier depending on what was flagged in DT
+	 */
+	*dev_mask = val & BD9576_xVD_IRQ_MASK_VOUT1TO4;
+	/* There is 1 bit gap in register after Vout1 .. Vout4 statuses */
+	*dev_mask |= ((val & BD9576_xVD_IRQ_MASK_VOUTL1) >> 1);
+	/*
+	 * We (ab)use the uvd for OCW notification. DT parsing should
+	 * have added correct OCW flag to uvd_notif and uvd_err for S1
+	 */
+	*dev_mask |= ((val & BD9576_UVD_IRQ_MASK_VOUTS1_OCW) >> 1);
+
+	for_each_set_bit(i, dev_mask, 6) {
+		struct bd957x_regulator_data *rdata;
+		struct regulator_err_state *stat;
+
+		rdata = &d->regulator_data[i];
+		stat  = &rid->states[i];
+
+		stat->notifs	= rdata->uvd_notif;
+		stat->errors	= rdata->uvd_err;
+	}
+
+	ret = regmap_write(d->regmap, BD957X_REG_INT_UVD_STAT,
+			   UVD_IRQ_VALID_MASK & val);
+
+	return 0;
+}
+
+static int bd9576_ovd_handler(int irq, struct regulator_irq_data *rid,
+			      unsigned long *dev_mask)
+{
+	int val, ret, i;
+	struct bd957x_data *d = (struct bd957x_data *)rid->data;
+
+	ret = regmap_read(d->regmap, BD957X_REG_INT_OVD_STAT, &val);
+	if (ret)
+		return REGULATOR_FAILED_RETRY;
+
+	rid->opaque = val & OVD_IRQ_VALID_MASK;
+	*dev_mask = 0;
+
+	if (!(val & OVD_IRQ_VALID_MASK))
+		return 0;
+
+	*dev_mask = val & BD9576_xVD_IRQ_MASK_VOUT1TO4;
+	/* There is 1 bit gap in register after Vout1 .. Vout4 statuses */
+	*dev_mask |= ((val & BD9576_xVD_IRQ_MASK_VOUTL1) >> 1);
+
+	for_each_set_bit(i, dev_mask, 5) {
+		struct bd957x_regulator_data *rdata;
+		struct regulator_err_state *stat;
+
+		rdata = &d->regulator_data[i];
+		stat  = &rid->states[i];
+
+		stat->notifs	= rdata->ovd_notif;
+		stat->errors	= rdata->ovd_err;
+	}
+
+	/* Clear the sub-IRQ status */
+	regmap_write(d->regmap, BD957X_REG_INT_OVD_STAT,
+		     OVD_IRQ_VALID_MASK & val);
+
+	return 0;
+}
+
+#define BD9576_DEV_MASK_ALL_REGULATORS 0x3F
+
+static int bd9576_thermal_handler(int irq, struct regulator_irq_data *rid,
+				  unsigned long *dev_mask)
+{
+	int val, ret, i;
+	struct bd957x_data *d = (struct bd957x_data *)rid->data;
+
+	ret = regmap_read(d->regmap, BD957X_REG_INT_THERM_STAT, &val);
+	if (ret)
+		return REGULATOR_FAILED_RETRY;
+
+	if (!(val & BD9576_THERM_IRQ_MASK_TW)) {
+		*dev_mask = 0;
+		return 0;
+	}
+
+	*dev_mask = BD9576_DEV_MASK_ALL_REGULATORS;
+
+	for (i = 0; i < BD9576_NUM_REGULATORS; i++) {
+		struct bd957x_regulator_data *rdata;
+		struct regulator_err_state *stat;
+
+		rdata = &d->regulator_data[i];
+		stat  = &rid->states[i];
+
+		stat->notifs	= rdata->temp_notif;
+		stat->errors	= rdata->temp_err;
+	}
+
+	/* Clear the sub-IRQ status */
+	regmap_write(d->regmap, BD957X_REG_INT_THERM_STAT,
+		     BD9576_THERM_IRQ_MASK_TW);
+
+	return 0;
+}
+
 static int bd957x_probe(struct platform_device *pdev)
 {
+	int i;
+	unsigned int num_reg_data;
+	bool vout_mode, ddr_sel, may_have_irqs;
 	struct regmap *regmap;
+	struct bd957x_data *ic_data;
 	struct regulator_config config = { 0 };
-	int i;
-	bool vout_mode, ddr_sel;
-	const struct bd957x_regulator_data *reg_data = &bd9576_regulators[0];
-	unsigned int num_reg_data = ARRAY_SIZE(bd9576_regulators);
+	/* All regulators are related to UVD and thermal IRQs... */
+	struct regulator_dev *rdevs[BD9576_NUM_REGULATORS];
+	/* ...But VoutS1 is not flagged by OVD IRQ */
+	struct regulator_dev *ovd_devs[BD9576_NUM_OVD_REGULATORS];
+	static const struct regulator_irq_desc bd9576_notif_uvd = {
+		.name = "bd9576-uvd",
+		.irq_off_ms = 1000,
+		.map_event = bd9576_uvd_handler,
+		.renable = bd9576_uvd_renable,
+		.data = &bd957x_regulators,
+	};
+	static const struct regulator_irq_desc bd9576_notif_ovd = {
+		.name = "bd9576-ovd",
+		.irq_off_ms = 1000,
+		.map_event = bd9576_ovd_handler,
+		.renable = bd9576_ovd_renable,
+		.data = &bd957x_regulators,
+	};
+	static const struct regulator_irq_desc bd9576_notif_temp = {
+		.name = "bd9576-temp",
+		.irq_off_ms = 1000,
+		.map_event = bd9576_thermal_handler,
+		.renable = bd9576_temp_renable,
+		.data = &bd957x_regulators,
+	};
 	enum rohm_chip_type chip = platform_get_device_id(pdev)->driver_data;
 
+	num_reg_data = ARRAY_SIZE(bd957x_regulators.regulator_data);
+
+	ic_data = &bd957x_regulators;
+
 	regmap = dev_get_regmap(pdev->dev.parent, NULL);
 	if (!regmap) {
 		dev_err(&pdev->dev, "No regmap\n");
 		return -EINVAL;
 	}
+
+	ic_data->regmap = regmap;
 	vout_mode = of_property_read_bool(pdev->dev.parent->of_node,
 					 "rohm,vout1-en-low");
 	if (vout_mode) {
@@ -263,15 +979,17 @@ static int bd957x_probe(struct platform_device *pdev)
 	 * bytes and use bd9576_regulators directly for non-constant configs
 	 * like DDR voltage selection.
 	 */
+	platform_set_drvdata(pdev, ic_data);
 	ddr_sel =  of_property_read_bool(pdev->dev.parent->of_node,
 					 "rohm,ddr-sel-low");
 	if (ddr_sel)
-		bd9576_regulators[2].desc.fixed_uV = 1350000;
+		ic_data->regulator_data[2].desc.fixed_uV = 1350000;
 	else
-		bd9576_regulators[2].desc.fixed_uV = 1500000;
+		ic_data->regulator_data[2].desc.fixed_uV = 1500000;
 
 	switch (chip) {
 	case ROHM_CHIP_TYPE_BD9576:
+		may_have_irqs = true;
 		dev_dbg(&pdev->dev, "Found BD9576MUF\n");
 		break;
 	case ROHM_CHIP_TYPE_BD9573:
@@ -282,32 +1000,116 @@ static int bd957x_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	for (i = 0; i < num_reg_data; i++) {
+		struct regulator_desc *d;
+
+		d = &ic_data->regulator_data[i].desc;
+
+
+		if (may_have_irqs) {
+			if (d->id >= ARRAY_SIZE(bd9576_ops_arr))
+				return -EINVAL;
+
+			d->ops = bd9576_ops_arr[d->id];
+		} else {
+			if (d->id >= ARRAY_SIZE(bd9573_ops_arr))
+				return -EINVAL;
+
+			d->ops = bd9573_ops_arr[d->id];
+		}
+	}
+
 	config.dev = pdev->dev.parent;
 	config.regmap = regmap;
+	config.driver_data = ic_data;
 
 	for (i = 0; i < num_reg_data; i++) {
 
-		const struct regulator_desc *desc;
-		struct regulator_dev *rdev;
-		const struct bd957x_regulator_data *r;
+		struct bd957x_regulator_data *r = &ic_data->regulator_data[i];
+		const struct regulator_desc *desc = &r->desc;
 
-		r = &reg_data[i];
-		desc = &r->desc;
-
-		rdev = devm_regulator_register(&pdev->dev, desc, &config);
-		if (IS_ERR(rdev)) {
+		r->rdev = devm_regulator_register(&pdev->dev, desc,
+							   &config);
+		if (IS_ERR(r->rdev)) {
 			dev_err(&pdev->dev,
 				"failed to register %s regulator\n",
 				desc->name);
-			return PTR_ERR(rdev);
+			return PTR_ERR(r->rdev);
 		}
 		/*
 		 * Clear the VOUT1 GPIO setting - rest of the regulators do not
 		 * support GPIO control
 		 */
 		config.ena_gpiod = NULL;
+
+		if (!may_have_irqs)
+			continue;
+
+		rdevs[i] = r->rdev;
+		if (i < BD957X_VOUTS1)
+			ovd_devs[i] = r->rdev;
 	}
+	if (may_have_irqs) {
+		void *ret;
+		/*
+		 * We can add both the possible error and warning flags here
+		 * because the core uses these only for status clearing and
+		 * if we use warnings - errors are always clear and the other
+		 * way around. We can also add CURRENT flag for all regulators
+		 * because it is never set if it is not supported. Same applies
+		 * to setting UVD for VoutS1 - it is not accidentally cleared
+		 * as it is never set.
+		 */
+		int uvd_errs = REGULATOR_ERROR_UNDER_VOLTAGE |
+			       REGULATOR_ERROR_UNDER_VOLTAGE_WARN |
+			       REGULATOR_ERROR_OVER_CURRENT |
+			       REGULATOR_ERROR_OVER_CURRENT_WARN;
+		int ovd_errs = REGULATOR_ERROR_OVER_VOLTAGE_WARN |
+			       REGULATOR_ERROR_REGULATION_OUT;
+		int temp_errs = REGULATOR_ERROR_OVER_TEMP |
+				REGULATOR_ERROR_OVER_TEMP_WARN;
+		int irq;
+
+		irq = platform_get_irq_byname(pdev, "bd9576-uvd");
+
+		/* Register notifiers - can fail if IRQ is not given */
+		ret = devm_regulator_irq_helper(&pdev->dev, &bd9576_notif_uvd,
+						irq, 0, uvd_errs, NULL,
+						&rdevs[0],
+						BD9576_NUM_REGULATORS);
+		if (IS_ERR(ret)) {
+			if (PTR_ERR(ret) == -EPROBE_DEFER)
+				return -EPROBE_DEFER;
+
+			dev_warn(&pdev->dev, "UVD disabled %pe\n", ret);
+		}
+
+		irq = platform_get_irq_byname(pdev, "bd9576-ovd");
+
+		ret = devm_regulator_irq_helper(&pdev->dev, &bd9576_notif_ovd,
+						irq, 0, ovd_errs, NULL,
+						&ovd_devs[0],
+						BD9576_NUM_OVD_REGULATORS);
+		if (IS_ERR(ret)) {
+			if (PTR_ERR(ret) == -EPROBE_DEFER)
+				return -EPROBE_DEFER;
+
+			dev_warn(&pdev->dev, "OVD disabled %pe\n", ret);
+		}
+		irq = platform_get_irq_byname(pdev, "bd9576-temp");
+
+		ret = devm_regulator_irq_helper(&pdev->dev, &bd9576_notif_temp,
+						irq, 0, temp_errs, NULL,
+						&rdevs[0],
+						BD9576_NUM_REGULATORS);
+		if (IS_ERR(ret)) {
+			if (PTR_ERR(ret) == -EPROBE_DEFER)
+				return -EPROBE_DEFER;
 
+			dev_warn(&pdev->dev, "Thermal warning disabled %pe\n",
+				 ret);
+		}
+	}
 	return 0;
 }
 
-- 
GitLab


From e71e7d3df7eb712fc29b609bd712a63d60b81b5f Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:43:04 +0300
Subject: [PATCH 3418/3804] regulator: bd9576: Fix the driver name in id table

Driver name was changed in MFD cell:
https://lore.kernel.org/lkml/560b9748094392493ebf7af11b6cc558776c4fd5.1613031055.git.matti.vaittinen@fi.rohmeurope.com/
Fix the ID table to match this.

Fixes: b1b3ced38979 ("mfd: Support ROHM BD9576MUF and BD9573MUF")
Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/e0483149333626b3bea298f305cf2809429d1822.1622628334.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd9576-regulator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/bd9576-regulator.c b/drivers/regulator/bd9576-regulator.c
index 6ba12af4c632f..d78d6127c5739 100644
--- a/drivers/regulator/bd9576-regulator.c
+++ b/drivers/regulator/bd9576-regulator.c
@@ -1114,8 +1114,8 @@ static int bd957x_probe(struct platform_device *pdev)
 }
 
 static const struct platform_device_id bd957x_pmic_id[] = {
-	{ "bd9573-pmic", ROHM_CHIP_TYPE_BD9573 },
-	{ "bd9576-pmic", ROHM_CHIP_TYPE_BD9576 },
+	{ "bd9573-regulator", ROHM_CHIP_TYPE_BD9573 },
+	{ "bd9576-regulator", ROHM_CHIP_TYPE_BD9576 },
 	{ },
 };
 MODULE_DEVICE_TABLE(platform, bd957x_pmic_id);
-- 
GitLab


From d55444adedaee5a3024c61637032057fcf38491b Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 3 Jun 2021 08:43:26 +0300
Subject: [PATCH 3419/3804] MAINTAINERS: Add reviewer for regulator irq_helpers

Add a reviewer entry for the regulator irq_helpers.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/2a4286ed98fd69b2539919e6a3e84d2e9804b4da.1622628334.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 503fd21901f10..68fa235a2d656 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19566,6 +19566,10 @@ F:	include/dt-bindings/regulator/
 F:	include/linux/regulator/
 K:	regulator_get_optional
 
+VOLTAGE AND CURRENT REGULATOR IRQ HELPERS
+R:	Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
+F:	drivers/regulator/irq_helpers.c
+
 VRF
 M:	David Ahern <dsahern@kernel.org>
 L:	netdev@vger.kernel.org
-- 
GitLab


From 907a399de7b0566236c480d0c01ff52220532fb1 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Mon, 21 Jun 2021 14:29:12 +0200
Subject: [PATCH 3420/3804] evm: Check xattr size discrepancy between kernel
 and user

The kernel and the user obtain an xattr value in two different ways:

kernel (EVM): uses vfs_getxattr_alloc() which obtains the xattr value from
              the filesystem handler (raw value);

user (ima-evm-utils): uses vfs_getxattr() which obtains the xattr value
                      from the LSMs (normalized value).

Normally, this does not have an impact unless security.selinux is set with
setfattr, with a value not terminated by '\0' (this is not the recommended
way, security.selinux should be set with the appropriate tools such as
chcon and restorecon).

In this case, the kernel and the user see two different xattr values: the
former sees the xattr value without '\0' (raw value), the latter sees the
value with '\0' (value normalized by SELinux).

This could result in two different verification outcomes from EVM and
ima-evm-utils, if a signature was calculated with a security.selinux value
terminated by '\0' and the value set in the filesystem is not terminated by
'\0'. The former would report verification failure due to the missing '\0',
while the latter would report verification success (because it gets the
normalized value with '\0').

This patch mitigates this issue by comparing in evm_calc_hmac_or_hash() the
size of the xattr returned by the two xattr functions and by warning the
user if there is a discrepancy.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Suggested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 security/integrity/evm/evm_crypto.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index bebe160c57b9e..0450d79afdc8f 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -222,7 +222,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
 	size_t xattr_size = 0;
 	char *xattr_value = NULL;
 	int error;
-	int size;
+	int size, user_space_size;
 	bool ima_present = false;
 
 	if (!(inode->i_opflags & IOP_XATTR) ||
@@ -277,6 +277,12 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
 		if (size < 0)
 			continue;
 
+		user_space_size = vfs_getxattr(&init_user_ns, dentry,
+					       xattr->name, NULL, 0);
+		if (user_space_size != size)
+			pr_debug("file %s: xattr %s size mismatch (kernel: %d, user: %d)\n",
+				 dentry->d_name.name, xattr->name, size,
+				 user_space_size);
 		error = 0;
 		xattr_size = size;
 		crypto_shash_update(desc, (const u8 *)xattr_value, xattr_size);
-- 
GitLab


From f09216a190a4c2f62e1725f9d92e7c122b4ee423 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Mon, 21 Jun 2021 14:19:58 +0530
Subject: [PATCH 3421/3804] KVM: PPC: Book3S HV: Fix comments of
 H_RPT_INVALIDATE arguments

The type values H_RPTI_TYPE_PRT and H_RPTI_TYPE_PAT indicate
invalidating the caching of process and partition scoped entries
respectively.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210621085003.904767-2-bharata@linux.ibm.com
---
 arch/powerpc/include/asm/hvcall.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index e3b29eda8074c..7e4b2cef40c29 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -413,9 +413,9 @@
 #define H_RPTI_TYPE_NESTED	0x0001	/* Invalidate nested guest partition-scope */
 #define H_RPTI_TYPE_TLB		0x0002	/* Invalidate TLB */
 #define H_RPTI_TYPE_PWC		0x0004	/* Invalidate Page Walk Cache */
-/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */
+/* Invalidate caching of Process Table Entries if H_RPTI_TYPE_NESTED is clear */
 #define H_RPTI_TYPE_PRT		0x0008
-/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */
+/* Invalidate caching of Partition Table Entries if H_RPTI_TYPE_NESTED is set */
 #define H_RPTI_TYPE_PAT		0x0008
 #define H_RPTI_TYPE_ALL		(H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
 				 H_RPTI_TYPE_PRT)
-- 
GitLab


From d6265cb33b710789cbc390316eba50a883d6dcc8 Mon Sep 17 00:00:00 2001
From: Bharata B Rao <bharata@linux.ibm.com>
Date: Mon, 21 Jun 2021 14:19:59 +0530
Subject: [PATCH 3422/3804] powerpc/book3s64/radix: Add H_RPT_INVALIDATE pgsize
 encodings to mmu_psize_def

Add a field to mmu_psize_def to store the page size encodings
of H_RPT_INVALIDATE hcall. Initialize this while scanning the radix
AP encodings. This will be used when invalidating with required
page size encoding in the hcall.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210621085003.904767-3-bharata@linux.ibm.com
---
 arch/powerpc/include/asm/book3s/64/mmu.h | 1 +
 arch/powerpc/mm/book3s64/radix_pgtable.c | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index eace8c3f7b0a1..c02f42d1031ee 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -19,6 +19,7 @@ struct mmu_psize_def {
 	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
 	unsigned int	tlbiel;	/* tlbiel supported for that page size */
 	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
+	unsigned long   h_rpt_pgsize; /* H_RPT_INVALIDATE page size encoding */
 	union {
 		unsigned long	sllp;	/* SLB L||LP (exact mask to use in slbmte) */
 		unsigned long ap;	/* Ap encoding used by PowerISA 3.0 */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index fe236c38ce00f..6e3495221ab77 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -475,6 +475,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
 		def = &mmu_psize_defs[idx];
 		def->shift = shift;
 		def->ap  = ap;
+		def->h_rpt_pgsize = psize_to_rpti_pgsize(idx);
 	}
 
 	/* needed ? */
@@ -549,9 +550,13 @@ void __init radix__early_init_devtree(void)
 		 */
 		mmu_psize_defs[MMU_PAGE_4K].shift = 12;
 		mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+		mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize =
+			psize_to_rpti_pgsize(MMU_PAGE_4K);
 
 		mmu_psize_defs[MMU_PAGE_64K].shift = 16;
 		mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+		mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
+			psize_to_rpti_pgsize(MMU_PAGE_64K);
 	}
 
 	/*
-- 
GitLab


From f0c6fbbb90504fb7e9dbf0865463d3c2b4de49e5 Mon Sep 17 00:00:00 2001
From: Bharata B Rao <bharata@linux.ibm.com>
Date: Mon, 21 Jun 2021 14:20:00 +0530
Subject: [PATCH 3423/3804] KVM: PPC: Book3S HV: Add support for
 H_RPT_INVALIDATE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

H_RPT_INVALIDATE does two types of TLB invalidations:

1. Process-scoped invalidations for guests when LPCR[GTSE]=0.
   This is currently not used in KVM as GTSE is not usually
   disabled in KVM.
2. Partition-scoped invalidations that an L1 hypervisor does on
   behalf of an L2 guest. This is currently handled
   by H_TLB_INVALIDATE hcall and this new replaces the old that.

This commit enables process-scoped invalidations for L1 guests.
Support for process-scoped and partition-scoped invalidations
from/for nested guests will be added separately.

Process scoped tlbie invalidations from L1 and nested guests
need RS register for TLBIE instruction to contain both PID and
LPID.  This patch introduces primitives that execute tlbie
instruction with both PID and LPID set in prepartion for
H_RPT_INVALIDATE hcall.

A description of H_RPT_INVALIDATE follows:

int64   /* H_Success: Return code on successful completion */
        /* H_Busy - repeat the call with the same */
        /* H_Parameter, H_P2, H_P3, H_P4, H_P5 : Invalid
	   parameters */
hcall(const uint64 H_RPT_INVALIDATE, /* Invalidate RPT
					translation
					lookaside information */
      uint64 id,        /* PID/LPID to invalidate */
      uint64 target,    /* Invalidation target */
      uint64 type,      /* Type of lookaside information */
      uint64 pg_sizes,  /* Page sizes */
      uint64 start,     /* Start of Effective Address (EA)
			   range (inclusive) */
      uint64 end)       /* End of EA range (exclusive) */

Invalidation targets (target)
-----------------------------
Core MMU        0x01 /* All virtual processors in the
			partition */
Core local MMU  0x02 /* Current virtual processor */
Nest MMU        0x04 /* All nest/accelerator agents
			in use by the partition */

A combination of the above can be specified,
except core and core local.

Type of translation to invalidate (type)
---------------------------------------
NESTED       0x0001  /* invalidate nested guest partition-scope */
TLB          0x0002  /* Invalidate TLB */
PWC          0x0004  /* Invalidate Page Walk Cache */
PRT          0x0008  /* Invalidate caching of Process Table
			Entries if NESTED is clear */
PAT          0x0008  /* Invalidate caching of Partition Table
			Entries if NESTED is set */

A combination of the above can be specified.

Page size mask (pages)
----------------------
4K              0x01
64K             0x02
2M              0x04
1G              0x08
All sizes       (-1UL)

A combination of the above can be specified.
All page sizes can be selected with -1.

Semantics: Invalidate radix tree lookaside information
           matching the parameters given.
* Return H_P2, H_P3 or H_P4 if target, type, or pageSizes parameters
  are different from the defined values.
* Return H_PARAMETER if NESTED is set and pid is not a valid nested
  LPID allocated to this partition
* Return H_P5 if (start, end) doesn't form a valid range. Start and
  end should be a valid Quadrant address and  end > start.
* Return H_NotSupported if the partition is not in running in radix
  translation mode.
* May invalidate more translation information than requested.
* If start = 0 and end = -1, set the range to cover all valid
  addresses. Else start and end should be aligned to 4kB (lower 11
  bits clear).
* If NESTED is clear, then invalidate process scoped lookaside
  information. Else pid specifies a nested LPID, and the invalidation
  is performed   on nested guest partition table and nested guest
  partition scope real addresses.
* If pid = 0 and NESTED is clear, then valid addresses are quadrant 3
  and quadrant 0 spaces, Else valid addresses are quadrant 0.
* Pages which are fully covered by the range are to be invalidated.
  Those which are partially covered are considered outside
  invalidation range, which allows a caller to optimally invalidate
  ranges that may   contain mixed page sizes.
* Return H_SUCCESS on success.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210621085003.904767-4-bharata@linux.ibm.com
---
 arch/powerpc/include/asm/mmu_context.h |  12 ++
 arch/powerpc/kvm/book3s_hv.c           |  36 +++++
 arch/powerpc/mm/book3s64/radix_tlb.c   | 174 +++++++++++++++++++++++++
 3 files changed, 222 insertions(+)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 84e192aa54fdd..db186c539d37e 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -216,6 +216,18 @@ static inline void mm_context_add_copro(struct mm_struct *mm) { }
 static inline void mm_context_remove_copro(struct mm_struct *mm) { }
 #endif
 
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+			     unsigned long type, unsigned long pg_sizes,
+			     unsigned long start, unsigned long end);
+#else
+static inline void do_h_rpt_invalidate_prt(unsigned long pid,
+					   unsigned long lpid,
+					   unsigned long type,
+					   unsigned long pg_sizes,
+					   unsigned long start,
+					   unsigned long end) { }
+#endif
 
 extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			       struct task_struct *tsk);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c0f1299736b95..b32b968ce56d8 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -76,6 +76,7 @@
 #include <asm/kvm_book3s_uvmem.h>
 #include <asm/ultravisor.h>
 #include <asm/dtl.h>
+#include <asm/plpar_wrappers.h>
 
 #include "book3s.h"
 
@@ -922,6 +923,32 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
 	return yield_count;
 }
 
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+				    unsigned long id, unsigned long target,
+				    unsigned long type, unsigned long pg_sizes,
+				    unsigned long start, unsigned long end)
+{
+	if (!kvm_is_radix(vcpu->kvm))
+		return H_UNSUPPORTED;
+
+	if (end < start)
+		return H_P5;
+
+	/*
+	 * Partition-scoped invalidation for nested guests.
+	 * Not yet supported
+	 */
+	if (type & H_RPTI_TYPE_NESTED)
+		return H_P3;
+
+	/*
+	 * Process-scoped invalidation for L1 guests.
+	 */
+	do_h_rpt_invalidate_prt(id, vcpu->kvm->arch.lpid,
+				type, pg_sizes, start, end);
+	return H_SUCCESS;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -1105,6 +1132,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
 			ret = H_HARDWARE;
 		break;
+	case H_RPT_INVALIDATE:
+		ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+					      kvmppc_get_gpr(vcpu, 5),
+					      kvmppc_get_gpr(vcpu, 6),
+					      kvmppc_get_gpr(vcpu, 7),
+					      kvmppc_get_gpr(vcpu, 8),
+					      kvmppc_get_gpr(vcpu, 9));
+		break;
 
 	case H_SET_PARTITION_TABLE:
 		ret = H_FUNCTION;
@@ -1225,6 +1260,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
 	case H_XIRR_X:
 #endif
 	case H_PAGE_INIT:
+	case H_RPT_INVALIDATE:
 		return 1;
 	}
 
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 312236a6b0855..1815fe4c5ffa8 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -130,6 +130,21 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = PPC_BIT(53); /* IS = 1 */
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
 {
 	unsigned long rb,rs,prs,r;
@@ -190,6 +205,23 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
 	trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+					    unsigned long lpid,
+					    unsigned long ap, unsigned long ric)
+{
+	unsigned long rb, rs, prs, r;
+
+	rb = va & ~(PPC_BITMASK(52, 63));
+	rb |= ap << PPC_BITLSHIFT(58);
+	rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+	prs = 1; /* process scoped */
+	r = 1;   /* radix format */
+
+	asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+	trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
 					    unsigned long ap, unsigned long ric)
 {
@@ -235,6 +267,22 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
 	}
 }
 
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+					     unsigned long pid,
+					     unsigned long lpid,
+					     unsigned long ap)
+{
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+	}
+}
+
 static inline void fixup_tlbie_pid(unsigned long pid)
 {
 	/*
@@ -254,6 +302,25 @@ static inline void fixup_tlbie_pid(unsigned long pid)
 	}
 }
 
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+	/*
+	 * We can use any address for the invalidation, pick one which is
+	 * probably unused as an optimisation.
+	 */
+	unsigned long va = ((1UL << 52) - 1);
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+	}
+
+	if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+		asm volatile("ptesync" : : : "memory");
+		__tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+				RIC_FLUSH_TLB);
+	}
+}
 
 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
 				       unsigned long ap)
@@ -344,6 +411,31 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+				   unsigned long ric)
+{
+	asm volatile("ptesync" : : : "memory");
+
+	/*
+	 * Workaround the fact that the "ric" argument to __tlbie_pid
+	 * must be a compile-time contraint to match the "i" constraint
+	 * in the asm statement.
+	 */
+	switch (ric) {
+	case RIC_FLUSH_TLB:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+		fixup_tlbie_pid_lpid(pid, lpid);
+		break;
+	case RIC_FLUSH_PWC:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+		break;
+	case RIC_FLUSH_ALL:
+	default:
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+		fixup_tlbie_pid_lpid(pid, lpid);
+	}
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
 struct tlbiel_pid {
 	unsigned long pid;
 	unsigned long ric;
@@ -469,6 +561,20 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
 	fixup_tlbie_va_range(addr - page_size, pid, ap);
 }
 
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					 unsigned long pid, unsigned long lpid,
+					 unsigned long page_size,
+					 unsigned long psize)
+{
+	unsigned long addr;
+	unsigned long ap = mmu_get_ap(psize);
+
+	for (addr = start; addr < end; addr += page_size)
+		__tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+	fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
 				      unsigned long psize, unsigned long ric)
 {
@@ -549,6 +655,18 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+					unsigned long pid, unsigned long lpid,
+					unsigned long page_size,
+					unsigned long psize, bool also_pwc)
+{
+	asm volatile("ptesync" : : : "memory");
+	if (also_pwc)
+		__tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+	__tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
 				unsigned long start, unsigned long end,
 				unsigned long pid, unsigned long page_size,
@@ -1336,3 +1454,59 @@ void radix__flush_tlb_all(void)
 		     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
 	asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * Performs process-scoped invalidations for a given LPID
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+			     unsigned long type, unsigned long pg_sizes,
+			     unsigned long start, unsigned long end)
+{
+	unsigned long psize, nr_pages;
+	struct mmu_psize_def *def;
+	bool flush_pid;
+
+	/*
+	 * A H_RPTI_TYPE_ALL request implies RIC=3, hence
+	 * do a single IS=1 based flush.
+	 */
+	if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+		return;
+	}
+
+	if (type & H_RPTI_TYPE_PWC)
+		_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+
+	/* Full PID flush */
+	if (start == 0 && end == -1)
+		return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+
+	/* Do range invalidation for all the valid page sizes */
+	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+		def = &mmu_psize_defs[psize];
+		if (!(pg_sizes & def->h_rpt_pgsize))
+			continue;
+
+		nr_pages = (end - start) >> def->shift;
+		flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+
+		/*
+		 * If the number of pages spanning the range is above
+		 * the ceiling, convert the request into a full PID flush.
+		 * And since PID flush takes out all the page sizes, there
+		 * is no need to consider remaining page sizes.
+		 */
+		if (flush_pid) {
+			_tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+			return;
+		}
+		_tlbie_va_range_lpid(start, end, pid, lpid,
+				     (1UL << def->shift), psize, false);
+	}
+}
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
+
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
-- 
GitLab


From 5f89468e2f060031cd89fd4287298e0eaf246bf6 Mon Sep 17 00:00:00 2001
From: Bumyong Lee <bumyong.lee@samsung.com>
Date: Mon, 10 May 2021 18:10:04 +0900
Subject: [PATCH 3424/3804] swiotlb: manipulate orig_addr when tlb_addr has
 offset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

in case of driver wants to sync part of ranges with offset,
swiotlb_tbl_sync_single() copies from orig_addr base to tlb_addr with
offset and ends up with data mismatch.

It was removed from
"swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single",
but said logic has to be added back in.

From Linus's email:
"That commit which the removed the offset calculation entirely, because the old

        (unsigned long)tlb_addr & (IO_TLB_SIZE - 1)

was wrong, but instead of removing it, I think it should have just
fixed it to be

        (tlb_addr - mem->start) & (IO_TLB_SIZE - 1);

instead. That way the slot offset always matches the slot index calculation."

(Unfortunatly that broke NVMe).

The use-case that drivers are hitting is as follow:

1. Get dma_addr_t from dma_map_single()

dma_addr_t tlb_addr = dma_map_single(dev, vaddr, vsize, DMA_TO_DEVICE);

    |<---------------vsize------------->|
    +-----------------------------------+
    |                                   | original buffer
    +-----------------------------------+
  vaddr

 swiotlb_align_offset
     |<----->|<---------------vsize------------->|
     +-------+-----------------------------------+
     |       |                                   | swiotlb buffer
     +-------+-----------------------------------+
          tlb_addr

2. Do something
3. Sync dma_addr_t through dma_sync_single_for_device(..)

dma_sync_single_for_device(dev, tlb_addr + offset, size, DMA_TO_DEVICE);

  Error case.
    Copy data to original buffer but it is from base addr (instead of
  base addr + offset) in original buffer:

 swiotlb_align_offset
     |<----->|<- offset ->|<- size ->|
     +-------+-----------------------------------+
     |       |            |##########|           | swiotlb buffer
     +-------+-----------------------------------+
          tlb_addr

    |<- size ->|
    +-----------------------------------+
    |##########|                        | original buffer
    +-----------------------------------+
  vaddr

The fix is to copy the data to the original buffer and take into
account the offset, like so:

 swiotlb_align_offset
     |<----->|<- offset ->|<- size ->|
     +-------+-----------------------------------+
     |       |            |##########|           | swiotlb buffer
     +-------+-----------------------------------+
          tlb_addr

    |<- offset ->|<- size ->|
    +-----------------------------------+
    |            |##########|           | original buffer
    +-----------------------------------+
  vaddr

[One fix which was Linus's that made more sense to as it created a
symmetry would break NVMe. The reason for that is the:
 unsigned int offset = (tlb_addr - mem->start) & (IO_TLB_SIZE - 1);

would come up with the proper offset, but it would lose the
alignment (which this patch contains).]

Fixes: 16fc3cef33a0 ("swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single")
Signed-off-by: Bumyong Lee <bumyong.lee@samsung.com>
Signed-off-by: Chanho Park <chanho61.park@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reported-by: Dominique MARTINET <dominique.martinet@atmark-techno.com>
Reported-by: Horia Geantă <horia.geanta@nxp.com>
Tested-by: Horia Geantă <horia.geanta@nxp.com>
CC: stable@vger.kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 kernel/dma/swiotlb.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8ca7d505d61cf..e50df8d8f87e2 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -334,6 +334,14 @@ void __init swiotlb_exit(void)
 	io_tlb_default_mem = NULL;
 }
 
+/*
+ * Return the offset into a iotlb slot required to keep the device happy.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+{
+	return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+}
+
 /*
  * Bounce: copy the swiotlb buffer from or back to the original dma location
  */
@@ -346,10 +354,17 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
 	size_t alloc_size = mem->slots[index].alloc_size;
 	unsigned long pfn = PFN_DOWN(orig_addr);
 	unsigned char *vaddr = phys_to_virt(tlb_addr);
+	unsigned int tlb_offset;
 
 	if (orig_addr == INVALID_PHYS_ADDR)
 		return;
 
+	tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+		     swiotlb_align_offset(dev, orig_addr);
+
+	orig_addr += tlb_offset;
+	alloc_size -= tlb_offset;
+
 	if (size > alloc_size) {
 		dev_WARN_ONCE(dev, 1,
 			"Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n",
@@ -390,14 +405,6 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
 
 #define slot_addr(start, idx)	((start) + ((idx) << IO_TLB_SHIFT))
 
-/*
- * Return the offset into a iotlb slot required to keep the device happy.
- */
-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
-{
-	return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
-}
-
 /*
  * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
  */
-- 
GitLab


From dc56219fe22e9d2f395f5c58ba3277f8df4cff84 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.de>
Date: Thu, 8 Apr 2021 07:40:25 -0500
Subject: [PATCH 3425/3804] btrfs: correct try_lock_extent() usage in
 read_extent_buffer_subpage()

try_lock_extent() returns 1 on success or 0 for failure and not an error
code. If try_lock_extent() fails, read_extent_buffer_subpage() returns
zero indicating subpage extent read success.

Return EAGAIN/EWOULDBLOCK if try_lock_extent() fails in locking the
extent.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index dee2dafbc872f..74ba2e1a39277 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -6184,10 +6184,8 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
 	io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
 
 	if (wait == WAIT_NONE) {
-		ret = try_lock_extent(io_tree, eb->start,
-				      eb->start + eb->len - 1);
-		if (ret <= 0)
-			return ret;
+		if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1))
+			return -EAGAIN;
 	} else {
 		ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1);
 		if (ret < 0)
-- 
GitLab


From 94358c35d80a8de5054c295d48332611d48222b4 Mon Sep 17 00:00:00 2001
From: Su Yue <l@damenly.su>
Date: Sun, 25 Apr 2021 16:35:04 +0800
Subject: [PATCH 3426/3804] btrfs: remove stale comment for argument seed of
 btrfs_find_device

Commit b2598edf8b36 ("btrfs: remove unused argument seed from
btrfs_find_device") removed the argument seed from btrfs_find_device
but forgot the comment, so remove it.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Su Yue <l@damenly.su>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 47d27059d0641..e020447b25a24 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6670,8 +6670,6 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
  *
  * If devid and uuid are both specified, the match must be exact, otherwise
  * only devid is used.
- *
- * If @seed is true, traverse through the seed devices.
  */
 struct btrfs_device *btrfs_find_device(struct btrfs_fs_devices *fs_devices,
 				       u64 devid, u8 *uuid, u8 *fsid)
-- 
GitLab


From ed738ba7f96170384f3e94a38be5536560eabc00 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 28 Apr 2021 13:38:42 -0400
Subject: [PATCH 3427/3804] btrfs: check worker before need_preemptive_reclaim

need_preemptive_reclaim() does some calculations, which aren't heavy,
but if we're already running preemptive reclaim there's no reason to do
them at all, so re-order the checks so that we don't do the calculation
if we're already doing reclaim.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 2dc674b7c3b14..c9a5e003bcfa3 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1588,8 +1588,8 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
 		 * the async reclaim as we will panic.
 		 */
 		if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
-		    need_preemptive_reclaim(fs_info, space_info) &&
-		    !work_busy(&fs_info->preempt_reclaim_work)) {
+		    !work_busy(&fs_info->preempt_reclaim_work) &&
+		    need_preemptive_reclaim(fs_info, space_info)) {
 			trace_btrfs_trigger_flush(fs_info, space_info->flags,
 						  orig_bytes, flush, "preempt");
 			queue_work(system_unbound_wq,
-- 
GitLab


From 0aae4ca9e952b83f71ce50af1290f0f5d9ab9df6 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 28 Apr 2021 13:38:43 -0400
Subject: [PATCH 3428/3804] btrfs: only clamp the first time we have to start
 flushing

We were clamping the threshold for preemptive reclaim any time we added
a ticket to wait on, which if we have a lot of threads means we'd
essentially max out the clamp the first time we start to flush.

Instead of doing this, simply do it every time we have to start
flushing, this will make us ramp up gradually instead of going to max
clamping as soon as we start needing to do flushing.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index c9a5e003bcfa3..33edab17af0d3 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1561,6 +1561,15 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
 		    flush == BTRFS_RESERVE_FLUSH_DATA) {
 			list_add_tail(&ticket.list, &space_info->tickets);
 			if (!space_info->flush) {
+				/*
+				 * We were forced to add a reserve ticket, so
+				 * our preemptive flushing is unable to keep
+				 * up.  Clamp down on the threshold for the
+				 * preemptive flushing in order to keep up with
+				 * the workload.
+				 */
+				maybe_clamp_preempt(fs_info, space_info);
+
 				space_info->flush = 1;
 				trace_btrfs_trigger_flush(fs_info,
 							  space_info->flags,
@@ -1572,14 +1581,6 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
 			list_add_tail(&ticket.list,
 				      &space_info->priority_tickets);
 		}
-
-		/*
-		 * We were forced to add a reserve ticket, so our preemptive
-		 * flushing is unable to keep up.  Clamp down on the threshold
-		 * for the preemptive flushing in order to keep up with the
-		 * workload.
-		 */
-		maybe_clamp_preempt(fs_info, space_info);
 	} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
 		used += orig_bytes;
 		/*
-- 
GitLab


From 610a6ef44ea83ef1c1e10b8270bbd157fbde3181 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 28 Apr 2021 13:38:44 -0400
Subject: [PATCH 3429/3804] btrfs: take into account global rsv in
 need_preemptive_reclaim

Global rsv can't be used for normal allocations, and for very full file
systems we can decide to try and async flush constantly even though
there's really not a lot of space to reclaim.  Deal with this by
including the global block rsv size in the "total used" calculation.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 33edab17af0d3..e341f995a7dd4 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -792,12 +792,14 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
 static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
 				    struct btrfs_space_info *space_info)
 {
+	u64 global_rsv_size = fs_info->global_block_rsv.reserved;
 	u64 ordered, delalloc;
 	u64 thresh = div_factor_fine(space_info->total_bytes, 98);
 	u64 used;
 
 	/* If we're just plain full then async reclaim just slows us down. */
-	if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
+	if ((space_info->bytes_used + space_info->bytes_reserved +
+	     global_rsv_size) >= thresh)
 		return false;
 
 	/*
-- 
GitLab


From 1239e2da16bf85e13063de7d2e9638219efca984 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 28 Apr 2021 13:38:45 -0400
Subject: [PATCH 3430/3804] btrfs: use the global rsv size in the preemptive
 thresh calculation

We calculate the amount of "free" space available for normal
reservations by taking the total space and subtracting out the hard used
space, which is readonly, used, and reserved space.

However we weren't taking into account the global block rsv, which is
essentially hard used space.  Handle this by subtracting it from the
available free space, so that our threshold more closely mirrors
reality.

We need to do the check because it's possible that the global_rsv_size +
used is > total_bytes, sometimes the global reserve can end up being
calculated as larger than the available size (think small filesystems
where we only have the original 8MiB chunk of metadata).  It doesn't
usually happen, but that can get us into trouble so this is safer.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index e341f995a7dd4..fedf3440145a2 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -840,8 +840,10 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
 
 	thresh = calc_available_free_space(fs_info, space_info,
 					   BTRFS_RESERVE_FLUSH_ALL);
-	thresh += (space_info->total_bytes - space_info->bytes_used -
-		   space_info->bytes_reserved - space_info->bytes_readonly);
+	used = space_info->bytes_used + space_info->bytes_reserved +
+	       space_info->bytes_readonly + global_rsv_size;
+	if (used < space_info->total_bytes)
+		thresh += space_info->total_bytes - used;
 	thresh >>= space_info->clamp;
 
 	used = space_info->bytes_pinned;
-- 
GitLab


From 30acce4eb032251be4767ee393a7e6e9748259d6 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 28 Apr 2021 13:38:46 -0400
Subject: [PATCH 3431/3804] btrfs: don't include the global rsv size in the
 preemptive used amount

When deciding if we should preemptively flush space, we will add in the
amount of space used by all block rsvs.  However this also includes the
global block rsv, which isn't flushable so shouldn't be accounted for in
this calculation.  If we decide to use ->bytes_may_use in our used
calculation we need to subtract the global rsv size from this amount so
it most closely matches the flushable space.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index fedf3440145a2..0e88a1482624e 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -871,7 +871,7 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
 		used += fs_info->delayed_refs_rsv.reserved +
 			fs_info->delayed_block_rsv.reserved;
 	else
-		used += space_info->bytes_may_use;
+		used += space_info->bytes_may_use - global_rsv_size;
 
 	return (used >= thresh && !btrfs_fs_closing(fs_info) &&
 		!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
-- 
GitLab


From 3e101569973e8c95ba60b5501f8a3caf7754894c Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 28 Apr 2021 13:38:47 -0400
Subject: [PATCH 3432/3804] btrfs: only ignore delalloc if delalloc is much
 smaller than ordered

While testing heavy delalloc workloads I noticed that sometimes we'd
just stop preemptively flushing when we had loads of delalloc available
to flush.  This is because we skip preemptive flushing if delalloc <=
ordered.  However if we start with say 4gib of delalloc, and we flush
2gib of that, we'll stop flushing there, when we still have 2gib of
delalloc to flush.

Instead adjust the ordered bytes down by half, this way if 2/3 of our
outstanding delalloc reservations are tied up by ordered extents we
don't bother preemptive flushing, as we're getting close to the state
where we need to wait on ordered extents.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 0e88a1482624e..639787beb57aa 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -864,8 +864,14 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
 	 * clearly be heavy enough to warrant preemptive flushing.  In the case
 	 * of heavy DIO or ordered reservations, preemptive flushing will just
 	 * waste time and cause us to slow down.
+	 *
+	 * We want to make sure we truly are maxed out on ordered however, so
+	 * cut ordered in half, and if it's still higher than delalloc then we
+	 * can keep flushing.  This is to avoid the case where we start
+	 * flushing, and now delalloc == ordered and we stop preemptively
+	 * flushing when we could still have several gigs of delalloc to flush.
 	 */
-	ordered = percpu_counter_read_positive(&fs_info->ordered_bytes);
+	ordered = percpu_counter_read_positive(&fs_info->ordered_bytes) >> 1;
 	delalloc = percpu_counter_read_positive(&fs_info->delalloc_bytes);
 	if (ordered >= delalloc)
 		used += fs_info->delayed_refs_rsv.reserved +
-- 
GitLab


From 385f421f18be653d21ccfd6520fbddf206ad43eb Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 28 Apr 2021 13:38:48 -0400
Subject: [PATCH 3433/3804] btrfs: handle preemptive delalloc flushing slightly
 differently

If we decide to flush delalloc from the preemptive flusher, we really do
not want to wait on ordered extents, as it gains us nothing.  However
there was logic to go ahead and wait on ordered extents if there was
more ordered bytes than delalloc bytes.  We do not want this behavior,
so pass through whether this flushing is for preemption, and do not wait
for ordered extents if that's the case.  Also break out of the shrink
loop after the first flushing, as we just want to one shot shrink
delalloc.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 639787beb57aa..42d0fa2092d4b 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -495,7 +495,8 @@ static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info,
  */
 static void shrink_delalloc(struct btrfs_fs_info *fs_info,
 			    struct btrfs_space_info *space_info,
-			    u64 to_reclaim, bool wait_ordered)
+			    u64 to_reclaim, bool wait_ordered,
+			    bool for_preempt)
 {
 	struct btrfs_trans_handle *trans;
 	u64 delalloc_bytes;
@@ -532,7 +533,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
 	 * ordered extents, otherwise we'll waste time trying to flush delalloc
 	 * that likely won't give us the space back we need.
 	 */
-	if (ordered_bytes > delalloc_bytes)
+	if (ordered_bytes > delalloc_bytes && !for_preempt)
 		wait_ordered = true;
 
 	loops = 0;
@@ -551,6 +552,14 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
 				break;
 		}
 
+		/*
+		 * If we are for preemption we just want a one-shot of delalloc
+		 * flushing so we can stop flushing if we decide we don't need
+		 * to anymore.
+		 */
+		if (for_preempt)
+			break;
+
 		spin_lock(&space_info->lock);
 		if (list_empty(&space_info->tickets) &&
 		    list_empty(&space_info->priority_tickets)) {
@@ -702,7 +711,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 	case FLUSH_DELALLOC:
 	case FLUSH_DELALLOC_WAIT:
 		shrink_delalloc(fs_info, space_info, num_bytes,
-				state == FLUSH_DELALLOC_WAIT);
+				state == FLUSH_DELALLOC_WAIT, for_preempt);
 		break;
 	case FLUSH_DELAYED_REFS_NR:
 	case FLUSH_DELAYED_REFS:
-- 
GitLab


From 47cdfb5e1dd60422ec2cbc53b667f73ff9a411dc Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Fri, 30 Apr 2021 15:34:17 +0200
Subject: [PATCH 3434/3804] btrfs: zoned: print message when zone sanity check
 type fails

This extends patch 784daf2b9628 ("btrfs: zoned: sanity check zone
type"), the message was supposed to be there but was lost during merge.
We want to make the error noticeable so add it.

Fixes: 784daf2b9628 ("btrfs: zoned: sanity check zone type")
CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index f1f3b10d1dbbe..4f3bbba5815ee 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1140,6 +1140,10 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		}
 
 		if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
+			btrfs_err_in_rcu(fs_info,
+	"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
+				zone.start << SECTOR_SHIFT,
+				rcu_str_deref(device->name), device->devid);
 			ret = -EIO;
 			goto out;
 		}
-- 
GitLab


From 06e1e7f4223c98965fb721b4b1e12083cfbe777e Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Fri, 30 Apr 2021 15:34:18 +0200
Subject: [PATCH 3435/3804] btrfs: zoned: bail out if we can't read a reliable
 write pointer

If we can't read a reliable write pointer from a sequential zone fail
creating the block group with an I/O error.

Also if the read write pointer is beyond the end of the respective zone,
fail the creation of the block group on this zone with an I/O error.

While this could also happen in real world scenarios with misbehaving
drives, this issue addresses a problem uncovered by fstests' test case
generic/475.

CC: stable@vger.kernel.org # 5.12+
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/zoned.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 4f3bbba5815ee..c7243d392ca8e 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1204,6 +1204,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 
 	switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
 	case 0: /* single */
+		if (alloc_offsets[0] == WP_MISSING_DEV) {
+			btrfs_err(fs_info,
+			"zoned: cannot recover write pointer for zone %llu",
+				physical);
+			ret = -EIO;
+			goto out;
+		}
 		cache->alloc_offset = alloc_offsets[0];
 		break;
 	case BTRFS_BLOCK_GROUP_DUP:
@@ -1221,6 +1228,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 	}
 
 out:
+	if (cache->alloc_offset > fs_info->zone_size) {
+		btrfs_err(fs_info,
+			"zoned: invalid write pointer %llu in block group %llu",
+			cache->alloc_offset, cache->start);
+		ret = -EIO;
+	}
+
 	/* An extent is allocated after the write pointer */
 	if (!ret && num_conventional && last_alloc > cache->alloc_offset) {
 		btrfs_err(fs_info,
-- 
GitLab


From f4dcfb30452631f7f308c144e1fd4d8a6ad7111b Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Tue, 11 May 2021 00:17:26 +0900
Subject: [PATCH 3436/3804] btrfs: rename check_async_write and let it return
 bool

The 'check_async_write' function is a helper used in
'btrfs_submit_metadata_bio' and it checks if asynchronous writing can be
used for metadata.

Make the function return bool and get rid of the local variable async in
btrfs_submit_metadata_bio storing the result of check_async_write's
tests.

As this is touching all function call sites, also rename it to
should_async_write as this is more in line with the naming we use.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8d386a5587ee9..dd7dc39b7508b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -917,23 +917,22 @@ static blk_status_t btree_submit_bio_start(struct inode *inode, struct bio *bio,
 	return btree_csum_one_bio(bio);
 }
 
-static int check_async_write(struct btrfs_fs_info *fs_info,
+static bool should_async_write(struct btrfs_fs_info *fs_info,
 			     struct btrfs_inode *bi)
 {
 	if (btrfs_is_zoned(fs_info))
-		return 0;
+		return false;
 	if (atomic_read(&bi->sync_writers))
-		return 0;
+		return false;
 	if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
 blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
 				       int mirror_num, unsigned long bio_flags)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	int async = check_async_write(fs_info, BTRFS_I(inode));
 	blk_status_t ret;
 
 	if (btrfs_op(bio) != BTRFS_MAP_WRITE) {
@@ -946,7 +945,7 @@ blk_status_t btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio,
 		if (ret)
 			goto out_w_error;
 		ret = btrfs_map_bio(fs_info, bio, mirror_num);
-	} else if (!async) {
+	} else if (!should_async_write(fs_info, BTRFS_I(inode))) {
 		ret = btree_csum_one_bio(bio);
 		if (ret)
 			goto out_w_error;
-- 
GitLab


From 08508fea07cdf6f62e61bae85d3af55433a16f98 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 3 May 2021 10:08:54 +0800
Subject: [PATCH 3437/3804] btrfs: make btrfs_verify_data_csum() to return a
 bitmap

This will provide the basis for later per-sector repair for subpage,
while still keeping the existing code happy.

As if all csums match, the return value will be 0, same as now.
Only when csum mismatches, the return value is different.

The new return value will be a bitmap, for 4K sectorsize and 4K page
size, it will be either 1, instead of the -EIO (which is not used
directly by the callers, no effective change).

But for 4K sectorsize and 64K page size, aka subpage case, since the
bvec can contain multiple sectors, knowing which sectors are corrupted
will allow us to submit repair only for corrupted sectors.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h |  4 ++--
 fs/btrfs/inode.c | 18 +++++++++++++-----
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9fb76829a281c..938d8ebf4cf39 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3100,8 +3100,8 @@ u64 btrfs_file_extent_end(const struct btrfs_path *path);
 /* inode.c */
 blk_status_t btrfs_submit_data_bio(struct inode *inode, struct bio *bio,
 				   int mirror_num, unsigned long bio_flags);
-int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
-			   struct page *page, u64 start, u64 end);
+unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
+				    struct page *page, u64 start, u64 end);
 struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
 					   u64 start, u64 len);
 noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 46f392943f4d0..3211469eb90fc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3152,15 +3152,19 @@ zeroit:
  * @bio_offset:	offset to the beginning of the bio (in bytes)
  * @start:	file offset of the range start
  * @end:	file offset of the range end (inclusive)
+ *
+ * Return a bitmap where bit set means a csum mismatch, and bit not set means
+ * csum match.
  */
-int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
-			   struct page *page, u64 start, u64 end)
+unsigned int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
+				    struct page *page, u64 start, u64 end)
 {
 	struct inode *inode = page->mapping->host;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	const u32 sectorsize = root->fs_info->sectorsize;
 	u32 pg_off;
+	unsigned int result = 0;
 
 	if (PageChecked(page)) {
 		ClearPageChecked(page);
@@ -3188,10 +3192,14 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
 
 		ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off,
 				      page_offset(page) + pg_off);
-		if (ret < 0)
-			return -EIO;
+		if (ret < 0) {
+			const int nr_bit = (pg_off - offset_in_page(start)) >>
+				     root->fs_info->sectorsize_bits;
+
+			result |= (1U << nr_bit);
+		}
 	}
-	return 0;
+	return result;
 }
 
 /*
-- 
GitLab


From 150e4b0597a7988f44d13e5199f08749c8ff432d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 3 May 2021 10:08:55 +0800
Subject: [PATCH 3438/3804] btrfs: submit read time repair only for each
 corrupted sector

Currently btrfs_submit_read_repair() has some extra check on whether the
failed bio needs extra validation for repair.  But we can avoid all
these extra mechanisms if we submit the repair for each sector.

By this, each read repair can be easily handled without the need to
verify which sector is corrupted.

This will also benefit subpage, as one subpage bvec can contain several
sectors, making the extra verification more complex.

So this patch will:

- Introduce repair_one_sector()
  The main code submitting repair, which is more or less the same as old
  btrfs_submit_read_repair().
  But this time, it only repairs one sector.

- Make btrfs_submit_read_repair() to handle sectors differently
  There are 3 different cases:

  * Good sector
    We need to release the page and extent, set the range uptodate.

  * Bad sector and failed to submit repair bio
    We need to release the page and extent, but not set the range
    uptodate.

  * Bad sector but repair bio submitted
    The page and extent release will be handled by the submitted repair
    bio. Nothing needs to be done.

  Since btrfs_submit_read_repair() will handle the page and extent
  release now, we need to skip to next bvec even we hit some error.

- Change the lifespan of @uptodate in end_bio_extent_readpage()
  Since now btrfs_submit_read_repair() will handle the full bvec
  which contains any corruption, we don't need to bother updating
  @uptodate bit anymore.
  Just let @uptodate to be local variable inside the main loop,
  so that any error from one bvec won't affect later bvec.

- Only export btrfs_repair_one_sector(), unexport
  btrfs_submit_read_repair()
  The only outside caller for read repair is DIO, which already submits
  its repair for just one sector.
  Only export btrfs_repair_one_sector() for DIO.

This patch will focus on the change on the repair path, the extra
validation code is still kept as is, and will be cleaned up later.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 217 ++++++++++++++++++++++++++++++-------------
 fs/btrfs/extent_io.h |  11 +--
 fs/btrfs/inode.c     |  20 ++--
 3 files changed, 168 insertions(+), 80 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 74ba2e1a39277..848dd54535f6e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2449,7 +2449,7 @@ void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
 }
 
 static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
-							     u64 start, u64 end)
+							     u64 start)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct io_failure_record *failrec;
@@ -2457,6 +2457,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+	const u32 sectorsize = fs_info->sectorsize;
 	int ret;
 	u64 logical;
 
@@ -2480,7 +2481,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 		return ERR_PTR(-ENOMEM);
 
 	failrec->start = start;
-	failrec->len = end - start + 1;
+	failrec->len = sectorsize;
 	failrec->this_mirror = 0;
 	failrec->bio_flags = 0;
 	failrec->in_validation = 0;
@@ -2519,12 +2520,13 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 	free_extent_map(em);
 
 	/* Set the bits in the private failure tree */
-	ret = set_extent_bits(failure_tree, start, end,
+	ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
 			      EXTENT_LOCKED | EXTENT_DIRTY);
 	if (ret >= 0) {
 		ret = set_state_failrec(failure_tree, start, failrec);
 		/* Set the bits in the inode's tree */
-		ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED);
+		ret = set_extent_bits(tree, start, start + sectorsize - 1,
+				      EXTENT_DAMAGED);
 	} else if (ret < 0) {
 		kfree(failrec);
 		return ERR_PTR(ret);
@@ -2639,11 +2641,11 @@ static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
 	return false;
 }
 
-blk_status_t btrfs_submit_read_repair(struct inode *inode,
-				      struct bio *failed_bio, u32 bio_offset,
-				      struct page *page, unsigned int pgoff,
-				      u64 start, u64 end, int failed_mirror,
-				      submit_bio_hook_t *submit_bio_hook)
+int btrfs_repair_one_sector(struct inode *inode,
+			    struct bio *failed_bio, u32 bio_offset,
+			    struct page *page, unsigned int pgoff,
+			    u64 start, int failed_mirror,
+			    submit_bio_hook_t *submit_bio_hook)
 {
 	struct io_failure_record *failrec;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2661,16 +2663,22 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
-	failrec = btrfs_get_io_failure_record(inode, start, end);
+	failrec = btrfs_get_io_failure_record(inode, start);
 	if (IS_ERR(failrec))
-		return errno_to_blk_status(PTR_ERR(failrec));
-
-	need_validation = btrfs_io_needs_validation(inode, failed_bio);
+		return PTR_ERR(failrec);
 
+	/*
+	 * We will only submit repair for one sector, thus we don't need
+	 * extra validation anymore.
+	 *
+	 * TODO: All those extra validation related code will be cleaned up
+	 * later.
+	 */
+	need_validation = false;
 	if (!btrfs_check_repairable(inode, need_validation, failrec,
 				    failed_mirror)) {
 		free_io_failure(failure_tree, tree, failrec);
-		return BLK_STS_IOERR;
+		return -EIO;
 	}
 
 	repair_bio = btrfs_io_bio_alloc(1);
@@ -2704,7 +2712,120 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
 		free_io_failure(failure_tree, tree, failrec);
 		bio_put(repair_bio);
 	}
-	return status;
+	return blk_status_to_errno(status);
+}
+
+static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+
+	ASSERT(page_offset(page) <= start &&
+	       start + len <= page_offset(page) + PAGE_SIZE);
+
+	/*
+	 * For subapge metadata case, all btrfs_page_* helpers need page to
+	 * have page::private populated.
+	 * But we can have rare case where the last eb in the page is only
+	 * referred by the IO, and it gets released immedately after it's
+	 * read and verified.
+	 *
+	 * This can detach the page private completely.
+	 * In that case, we can just skip the page status update completely,
+	 * as the page has no eb anymore.
+	 */
+	if (fs_info->sectorsize < PAGE_SIZE && unlikely(!PagePrivate(page))) {
+		ASSERT(!is_data_inode(page->mapping->host));
+		return;
+	}
+	if (uptodate) {
+		btrfs_page_set_uptodate(fs_info, page, start, len);
+	} else {
+		btrfs_page_clear_uptodate(fs_info, page, start, len);
+		btrfs_page_set_error(fs_info, page, start, len);
+	}
+
+	if (fs_info->sectorsize == PAGE_SIZE)
+		unlock_page(page);
+	else if (is_data_inode(page->mapping->host))
+		/*
+		 * For subpage data, unlock the page if we're the last reader.
+		 * For subpage metadata, page lock is not utilized for read.
+		 */
+		btrfs_subpage_end_reader(fs_info, page, start, len);
+}
+
+static blk_status_t submit_read_repair(struct inode *inode,
+				      struct bio *failed_bio, u32 bio_offset,
+				      struct page *page, unsigned int pgoff,
+				      u64 start, u64 end, int failed_mirror,
+				      unsigned int error_bitmap,
+				      submit_bio_hook_t *submit_bio_hook)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+	const u32 sectorsize = fs_info->sectorsize;
+	const int nr_bits = (end + 1 - start) >> fs_info->sectorsize_bits;
+	int error = 0;
+	int i;
+
+	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
+
+	/* We're here because we had some read errors or csum mismatch */
+	ASSERT(error_bitmap);
+
+	/*
+	 * We only get called on buffered IO, thus page must be mapped and bio
+	 * must not be cloned.
+	 */
+	ASSERT(page->mapping && !bio_flagged(failed_bio, BIO_CLONED));
+
+	/* Iterate through all the sectors in the range */
+	for (i = 0; i < nr_bits; i++) {
+		const unsigned int offset = i * sectorsize;
+		struct extent_state *cached = NULL;
+		bool uptodate = false;
+		int ret;
+
+		if (!(error_bitmap & (1U << i))) {
+			/*
+			 * This sector has no error, just end the page read
+			 * and unlock the range.
+			 */
+			uptodate = true;
+			goto next;
+		}
+
+		ret = btrfs_repair_one_sector(inode, failed_bio,
+				bio_offset + offset,
+				page, pgoff + offset, start + offset,
+				failed_mirror, submit_bio_hook);
+		if (!ret) {
+			/*
+			 * We have submitted the read repair, the page release
+			 * will be handled by the endio function of the
+			 * submitted repair bio.
+			 * Thus we don't need to do any thing here.
+			 */
+			continue;
+		}
+		/*
+		 * Repair failed, just record the error but still continue.
+		 * Or the remaining sectors will not be properly unlocked.
+		 */
+		if (!error)
+			error = ret;
+next:
+		end_page_read(page, uptodate, start + offset, sectorsize);
+		if (uptodate)
+			set_extent_uptodate(&BTRFS_I(inode)->io_tree,
+					start + offset,
+					start + offset + sectorsize - 1,
+					&cached, GFP_ATOMIC);
+		unlock_extent_cached_atomic(&BTRFS_I(inode)->io_tree,
+				start + offset,
+				start + offset + sectorsize - 1,
+				&cached);
+	}
+	return errno_to_blk_status(error);
 }
 
 /* lots and lots of room for performance fixes in the end_bio funcs */
@@ -2862,30 +2983,6 @@ static void begin_page_read(struct btrfs_fs_info *fs_info, struct page *page)
 	btrfs_subpage_start_reader(fs_info, page, page_offset(page), PAGE_SIZE);
 }
 
-static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
-{
-	struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
-
-	ASSERT(page_offset(page) <= start &&
-		start + len <= page_offset(page) + PAGE_SIZE);
-
-	if (uptodate) {
-		btrfs_page_set_uptodate(fs_info, page, start, len);
-	} else {
-		btrfs_page_clear_uptodate(fs_info, page, start, len);
-		btrfs_page_set_error(fs_info, page, start, len);
-	}
-
-	if (fs_info->sectorsize == PAGE_SIZE)
-		unlock_page(page);
-	else if (is_data_inode(page->mapping->host))
-		/*
-		 * For subpage data, unlock the page if we're the last reader.
-		 * For subpage metadata, page lock is not utilized for read.
-		 */
-		btrfs_subpage_end_reader(fs_info, page, start, len);
-}
-
 /*
  * Find extent buffer for a givne bytenr.
  *
@@ -2929,7 +3026,6 @@ static struct extent_buffer *find_extent_buffer_readpage(
 static void end_bio_extent_readpage(struct bio *bio)
 {
 	struct bio_vec *bvec;
-	int uptodate = !bio->bi_status;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct extent_io_tree *tree, *failure_tree;
 	struct processed_extent processed = { 0 };
@@ -2944,10 +3040,12 @@ static void end_bio_extent_readpage(struct bio *bio)
 
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, iter_all) {
+		bool uptodate = !bio->bi_status;
 		struct page *page = bvec->bv_page;
 		struct inode *inode = page->mapping->host;
 		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 		const u32 sectorsize = fs_info->sectorsize;
+		unsigned int error_bitmap = (unsigned int)-1;
 		u64 start;
 		u64 end;
 		u32 len;
@@ -2982,14 +3080,16 @@ static void end_bio_extent_readpage(struct bio *bio)
 
 		mirror = io_bio->mirror_num;
 		if (likely(uptodate)) {
-			if (is_data_inode(inode))
-				ret = btrfs_verify_data_csum(io_bio,
+			if (is_data_inode(inode)) {
+				error_bitmap = btrfs_verify_data_csum(io_bio,
 						bio_offset, page, start, end);
-			else
+				ret = error_bitmap;
+			} else {
 				ret = btrfs_validate_metadata_buffer(io_bio,
 					page, start, end, mirror);
+			}
 			if (ret)
-				uptodate = 0;
+				uptodate = false;
 			else
 				clean_io_failure(BTRFS_I(inode)->root->fs_info,
 						 failure_tree, tree, start,
@@ -3001,27 +3101,18 @@ static void end_bio_extent_readpage(struct bio *bio)
 			goto readpage_ok;
 
 		if (is_data_inode(inode)) {
-
 			/*
-			 * The generic bio_readpage_error handles errors the
-			 * following way: If possible, new read requests are
-			 * created and submitted and will end up in
-			 * end_bio_extent_readpage as well (if we're lucky,
-			 * not in the !uptodate case). In that case it returns
-			 * 0 and we just go on with the next page in our bio.
-			 * If it can't handle the error it will return -EIO and
-			 * we remain responsible for that page.
+			 * btrfs_submit_read_repair() will handle all the good
+			 * and bad sectors, we just continue to the next bvec.
 			 */
-			if (!btrfs_submit_read_repair(inode, bio, bio_offset,
-						page,
-						start - page_offset(page),
-						start, end, mirror,
-						btrfs_submit_data_bio)) {
-				uptodate = !bio->bi_status;
-				ASSERT(bio_offset + len > bio_offset);
-				bio_offset += len;
-				continue;
-			}
+			submit_read_repair(inode, bio, bio_offset, page,
+					   start - page_offset(page), start,
+					   end, mirror, error_bitmap,
+					   btrfs_submit_data_bio);
+
+			ASSERT(bio_offset + len > bio_offset);
+			bio_offset += len;
+			continue;
 		} else {
 			struct extent_buffer *eb;
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 227215a5722cb..bbb4ef4d09348 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -296,12 +296,11 @@ struct io_failure_record {
 	int in_validation;
 };
 
-
-blk_status_t btrfs_submit_read_repair(struct inode *inode,
-				      struct bio *failed_bio, u32 bio_offset,
-				      struct page *page, unsigned int pgoff,
-				      u64 start, u64 end, int failed_mirror,
-				      submit_bio_hook_t *submit_bio_hook);
+int btrfs_repair_one_sector(struct inode *inode,
+			    struct bio *failed_bio, u32 bio_offset,
+			    struct page *page, unsigned int pgoff,
+			    u64 start, int failed_mirror,
+			    submit_bio_hook_t *submit_bio_hook);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 bool find_lock_delalloc_range(struct inode *inode,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3211469eb90fc..c70ae0a5df92b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7945,19 +7945,17 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
 						 btrfs_ino(BTRFS_I(inode)),
 						 pgoff);
 			} else {
-				blk_status_t status;
+				int ret;
 
 				ASSERT((start - io_bio->logical) < UINT_MAX);
-				status = btrfs_submit_read_repair(inode,
-							&io_bio->bio,
-							start - io_bio->logical,
-							bvec.bv_page, pgoff,
-							start,
-							start + sectorsize - 1,
-							io_bio->mirror_num,
-							submit_dio_repair_bio);
-				if (status)
-					err = status;
+				ret = btrfs_repair_one_sector(inode,
+						&io_bio->bio,
+						start - io_bio->logical,
+						bvec.bv_page, pgoff,
+						start, io_bio->mirror_num,
+						submit_dio_repair_bio);
+				if (ret)
+					err = errno_to_blk_status(ret);
 			}
 			start += sectorsize;
 			ASSERT(bio_offset + sectorsize > bio_offset);
-- 
GitLab


From 1245835d24f1ea989a0cbcdf93ddea3dcbc3814f Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 3 May 2021 10:08:56 +0800
Subject: [PATCH 3439/3804] btrfs: remove io_failure_record::in_validation

The io_failure_record::in_validation was introduced to handle failed bio
which cross several sectors.  In such case, we still need to verify
which sectors are corrupted.

But since we've changed the way how we handle corrupted sectors, by only
submitting repair for each corrupted sector, there is no need for extra
validation any more.

This patch will cleanup all io_failure_record::in_validation related
code.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 119 ++++++++-----------------------------------
 fs/btrfs/extent_io.h |   3 +-
 2 files changed, 21 insertions(+), 101 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 848dd54535f6e..ce6364dd1517c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2381,13 +2381,6 @@ int clean_io_failure(struct btrfs_fs_info *fs_info,
 
 	BUG_ON(!failrec->this_mirror);
 
-	if (failrec->in_validation) {
-		/* there was no real error, just free the record */
-		btrfs_debug(fs_info,
-			"clean_io_failure: freeing dummy error at %llu",
-			failrec->start);
-		goto out;
-	}
 	if (sb_rdonly(fs_info->sb))
 		goto out;
 
@@ -2464,9 +2457,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 	failrec = get_state_failrec(failure_tree, start);
 	if (!IS_ERR(failrec)) {
 		btrfs_debug(fs_info,
-			"Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d",
-			failrec->logical, failrec->start, failrec->len,
-			failrec->in_validation);
+	"Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu",
+			failrec->logical, failrec->start, failrec->len);
 		/*
 		 * when data can be on disk more than twice, add to failrec here
 		 * (e.g. with a list for failed_mirror) to make
@@ -2484,7 +2476,6 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 	failrec->len = sectorsize;
 	failrec->this_mirror = 0;
 	failrec->bio_flags = 0;
-	failrec->in_validation = 0;
 
 	read_lock(&em_tree->lock);
 	em = lookup_extent_mapping(em_tree, start, failrec->len);
@@ -2535,7 +2526,7 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
 	return failrec;
 }
 
-static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
+static bool btrfs_check_repairable(struct inode *inode,
 				   struct io_failure_record *failrec,
 				   int failed_mirror)
 {
@@ -2555,39 +2546,22 @@ static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
 		return false;
 	}
 
+	/* The failure record should only contain one sector */
+	ASSERT(failrec->len == fs_info->sectorsize);
+
 	/*
-	 * there are two premises:
-	 *	a) deliver good data to the caller
-	 *	b) correct the bad sectors on disk
+	 * There are two premises:
+	 * a) deliver good data to the caller
+	 * b) correct the bad sectors on disk
+	 *
+	 * Since we're only doing repair for one sector, we only need to get
+	 * a good copy of the failed sector and if we succeed, we have setup
+	 * everything for repair_io_failure to do the rest for us.
 	 */
-	if (needs_validation) {
-		/*
-		 * to fulfill b), we need to know the exact failing sectors, as
-		 * we don't want to rewrite any more than the failed ones. thus,
-		 * we need separate read requests for the failed bio
-		 *
-		 * if the following BUG_ON triggers, our validation request got
-		 * merged. we need separate requests for our algorithm to work.
-		 */
-		BUG_ON(failrec->in_validation);
-		failrec->in_validation = 1;
-		failrec->this_mirror = failed_mirror;
-	} else {
-		/*
-		 * we're ready to fulfill a) and b) alongside. get a good copy
-		 * of the failed sector and if we succeed, we have setup
-		 * everything for repair_io_failure to do the rest for us.
-		 */
-		if (failrec->in_validation) {
-			BUG_ON(failrec->this_mirror != failed_mirror);
-			failrec->in_validation = 0;
-			failrec->this_mirror = 0;
-		}
-		failrec->failed_mirror = failed_mirror;
+	failrec->failed_mirror = failed_mirror;
+	failrec->this_mirror++;
+	if (failrec->this_mirror == failed_mirror)
 		failrec->this_mirror++;
-		if (failrec->this_mirror == failed_mirror)
-			failrec->this_mirror++;
-	}
 
 	if (failrec->this_mirror > num_copies) {
 		btrfs_debug(fs_info,
@@ -2599,48 +2573,6 @@ static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
 	return true;
 }
 
-static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
-{
-	u64 len = 0;
-	const u32 blocksize = inode->i_sb->s_blocksize;
-
-	/*
-	 * If bi_status is BLK_STS_OK, then this was a checksum error, not an
-	 * I/O error. In this case, we already know exactly which sector was
-	 * bad, so we don't need to validate.
-	 */
-	if (bio->bi_status == BLK_STS_OK)
-		return false;
-
-	/*
-	 * We need to validate each sector individually if the failed I/O was
-	 * for multiple sectors.
-	 *
-	 * There are a few possible bios that can end up here:
-	 * 1. A buffered read bio, which is not cloned.
-	 * 2. A direct I/O read bio, which is cloned.
-	 * 3. A (buffered or direct) repair bio, which is not cloned.
-	 *
-	 * For cloned bios (case 2), we can get the size from
-	 * btrfs_io_bio->iter; for non-cloned bios (cases 1 and 3), we can get
-	 * it from the bvecs.
-	 */
-	if (bio_flagged(bio, BIO_CLONED)) {
-		if (btrfs_io_bio(bio)->iter.bi_size > blocksize)
-			return true;
-	} else {
-		struct bio_vec *bvec;
-		int i;
-
-		bio_for_each_bvec_all(bvec, bio, i) {
-			len += bvec->bv_len;
-			if (len > blocksize)
-				return true;
-		}
-	}
-	return false;
-}
-
 int btrfs_repair_one_sector(struct inode *inode,
 			    struct bio *failed_bio, u32 bio_offset,
 			    struct page *page, unsigned int pgoff,
@@ -2653,7 +2585,6 @@ int btrfs_repair_one_sector(struct inode *inode,
 	struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
 	struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
 	const int icsum = bio_offset >> fs_info->sectorsize_bits;
-	bool need_validation;
 	struct bio *repair_bio;
 	struct btrfs_io_bio *repair_io_bio;
 	blk_status_t status;
@@ -2667,16 +2598,8 @@ int btrfs_repair_one_sector(struct inode *inode,
 	if (IS_ERR(failrec))
 		return PTR_ERR(failrec);
 
-	/*
-	 * We will only submit repair for one sector, thus we don't need
-	 * extra validation anymore.
-	 *
-	 * TODO: All those extra validation related code will be cleaned up
-	 * later.
-	 */
-	need_validation = false;
-	if (!btrfs_check_repairable(inode, need_validation, failrec,
-				    failed_mirror)) {
+
+	if (!btrfs_check_repairable(inode, failrec, failed_mirror)) {
 		free_io_failure(failure_tree, tree, failrec);
 		return -EIO;
 	}
@@ -2684,8 +2607,6 @@ int btrfs_repair_one_sector(struct inode *inode,
 	repair_bio = btrfs_io_bio_alloc(1);
 	repair_io_bio = btrfs_io_bio(repair_bio);
 	repair_bio->bi_opf = REQ_OP_READ;
-	if (need_validation)
-		repair_bio->bi_opf |= REQ_FAILFAST_DEV;
 	repair_bio->bi_end_io = failed_bio->bi_end_io;
 	repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
 	repair_bio->bi_private = failed_bio->bi_private;
@@ -2703,8 +2624,8 @@ int btrfs_repair_one_sector(struct inode *inode,
 	repair_io_bio->iter = repair_bio->bi_iter;
 
 	btrfs_debug(btrfs_sb(inode->i_sb),
-"repair read error: submitting new read to mirror %d, in_validation=%d",
-		    failrec->this_mirror, failrec->in_validation);
+		    "repair read error: submitting new read to mirror %d",
+		    failrec->this_mirror);
 
 	status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
 				 failrec->bio_flags);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bbb4ef4d09348..fb9a9275fc41a 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -281,7 +281,7 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
  * When IO fails, either with EIO or csum verification fails, we
  * try other mirrors that might have a good copy of the data.  This
  * io_failure_record is used to record state as we go through all the
- * mirrors.  If another mirror has good data, the page is set up to date
+ * mirrors.  If another mirror has good data, the sector is set up to date
  * and things continue.  If a good mirror can't be found, the original
  * bio end_io callback is called to indicate things have failed.
  */
@@ -293,7 +293,6 @@ struct io_failure_record {
 	unsigned long bio_flags;
 	int this_mirror;
 	int failed_mirror;
-	int in_validation;
 };
 
 int btrfs_repair_one_sector(struct inode *inode,
-- 
GitLab


From 50535db8fbf67d44522de5b79ddf66fb6d0c14a8 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Wed, 5 May 2021 09:26:28 +0800
Subject: [PATCH 3440/3804] btrfs: return EAGAIN if defrag is canceled

When inode defrag is canceled, the error is set to EAGAIN but then
overwritten by number of defragmented bytes. As this would hide the
error, rather return EAGAIN. This does not harm 'btrfs fi defrag', it
will print the error and continue to next file (as it does in for any
other error).

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5dc2fd843ae37..a7739461533d3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1455,7 +1455,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 		if (btrfs_defrag_cancelled(fs_info)) {
 			btrfs_debug(fs_info, "defrag_file cancelled");
 			ret = -EAGAIN;
-			break;
+			goto error;
 		}
 
 		if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT,
@@ -1533,6 +1533,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 		}
 	}
 
+	ret = defrag_count;
+error:
 	if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) {
 		filemap_flush(inode->i_mapping);
 		if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
@@ -1546,8 +1548,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 		btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
 	}
 
-	ret = defrag_count;
-
 out_ra:
 	if (do_compress) {
 		btrfs_inode_lock(inode, 0);
-- 
GitLab


From e7ff9e6b8e7d89199119468ae61b29a56f81ad28 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Wed, 19 May 2021 00:40:29 +0900
Subject: [PATCH 3441/3804] btrfs: zoned: factor out zoned device lookup

To be able to construct a zone append bio we need to look up the
btrfs_device. The code doing the chunk map lookup to get the device is
present in btrfs_submit_compressed_write and submit_extent_page.

Factor out the lookup calls into a helper and use it in the submission
paths.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 16 ++++------------
 fs/btrfs/extent_io.c   | 16 +++++-----------
 fs/btrfs/zoned.c       | 21 +++++++++++++++++++++
 fs/btrfs/zoned.h       |  9 +++++++++
 4 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1346d698463a6..9a0c26e4e3891 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -427,24 +427,16 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	bio->bi_end_io = end_compressed_bio_write;
 
 	if (use_append) {
-		struct extent_map *em;
-		struct map_lookup *map;
-		struct block_device *bdev;
+		struct btrfs_device *device;
 
-		em = btrfs_get_chunk_map(fs_info, disk_start, PAGE_SIZE);
-		if (IS_ERR(em)) {
+		device = btrfs_zoned_get_device(fs_info, disk_start, PAGE_SIZE);
+		if (IS_ERR(device)) {
 			kfree(cb);
 			bio_put(bio);
 			return BLK_STS_NOTSUPP;
 		}
 
-		map = em->map_lookup;
-		/* We only support single profile for now */
-		ASSERT(map->num_stripes == 1);
-		bdev = map->stripes[0].dev->bdev;
-
-		bio_set_dev(bio, bdev);
-		free_extent_map(em);
+		bio_set_dev(bio, device->bdev);
 	}
 
 	if (blkcg_css) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ce6364dd1517c..2b250c610562d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3266,19 +3266,13 @@ static int submit_extent_page(unsigned int opf,
 		wbc_account_cgroup_owner(wbc, page, io_size);
 	}
 	if (btrfs_is_zoned(fs_info) && bio_op(bio) == REQ_OP_ZONE_APPEND) {
-		struct extent_map *em;
-		struct map_lookup *map;
+		struct btrfs_device *device;
 
-		em = btrfs_get_chunk_map(fs_info, disk_bytenr, io_size);
-		if (IS_ERR(em))
-			return PTR_ERR(em);
+		device = btrfs_zoned_get_device(fs_info, disk_bytenr, io_size);
+		if (IS_ERR(device))
+			return PTR_ERR(device);
 
-		map = em->map_lookup;
-		/* We only support single profile for now */
-		ASSERT(map->num_stripes == 1);
-		btrfs_io_bio(bio)->device = map->stripes[0].dev;
-
-		free_extent_map(em);
+		btrfs_io_bio(bio)->device = device;
 	}
 
 	*bio_ret = bio;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index c7243d392ca8e..549912120cfe4 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1533,3 +1533,24 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
 	length = wp - physical_pos;
 	return btrfs_zoned_issue_zeroout(tgt_dev, physical_pos, length);
 }
+
+struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
+					    u64 logical, u64 length)
+{
+	struct btrfs_device *device;
+	struct extent_map *em;
+	struct map_lookup *map;
+
+	em = btrfs_get_chunk_map(fs_info, logical, length);
+	if (IS_ERR(em))
+		return ERR_CAST(em);
+
+	map = em->map_lookup;
+	/* We only support single profile for now */
+	ASSERT(map->num_stripes == 1);
+	device = map->stripes[0].dev;
+
+	free_extent_map(em);
+
+	return device;
+}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index e55d32595c2c0..b0ae2608cb6bc 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -65,6 +65,8 @@ void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
 int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length);
 int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
 				  u64 physical_start, u64 physical_pos);
+struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
+					    u64 logical, u64 length);
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
 				     struct blk_zone *zone)
@@ -191,6 +193,13 @@ static inline int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev,
 	return -EOPNOTSUPP;
 }
 
+static inline struct btrfs_device *btrfs_zoned_get_device(
+						  struct btrfs_fs_info *fs_info,
+						  u64 logical, u64 length)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
 #endif
 
 static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
-- 
GitLab


From eb3b50536642b6e1ba67e84dcacdd9ccef30d850 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Wed, 9 Oct 2019 13:58:13 +0200
Subject: [PATCH 3442/3804] btrfs: scrub: per-device bandwidth control

Add sysfs interface to limit io during scrub. We relied on the ionice
interface to do that, eg. the idle class let the system usable while
scrub was running. This has changed when mq-deadline got widespread and
did not implement the scheduling classes. That was a CFQ thing that got
deleted. We've got numerous complaints from users about degraded
performance.

Currently only BFQ supports that but it's not a common scheduler and we
can't ask everybody to switch to it.

Alternatively the cgroup io limiting can be used but that also a
non-trivial setup (v2 required, the controller must be enabled on the
system). This can still be used if desired.

Other ideas that have been explored: piggy-back on ionice (that is set
per-process and is accessible) and interpret the class and classdata as
bandwidth limits, but this does not have enough flexibility as there are
only 8 allowed and we'd have to map fixed limits to each value. Also
adjusting the value would need to lookup the process that currently runs
scrub on the given device, and the value is not sticky so would have to
be adjusted each time scrub runs.

Running out of options, sysfs does not look that bad:

- it's accessible from scripts, or udev rules
- the name is similar to what MD-RAID has
  (/proc/sys/dev/raid/speed_limit_max or /sys/block/mdX/md/sync_speed_max)
- the value is sticky at least for filesystem mount time
- adjusting the value has immediate effect
- sysfs is available in constrained environments (eg. system rescue)
- the limit also applies to device replace

Sysfs:

- raw value is in bytes
- values written to the file accept suffixes like K, M
- file is in the per-device directory /sys/fs/btrfs/FSID/devinfo/DEVID/scrub_speed_max
- 0 means use default priority of IO

The scheduler is a simple deadline one and the accuracy is up to nearest
128K.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c   | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/sysfs.c   | 28 ++++++++++++++++++++
 fs/btrfs/volumes.h |  3 +++
 3 files changed, 97 insertions(+)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 485cda3eb8d76..518415d0c1227 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -165,6 +165,10 @@ struct scrub_ctx {
 	int			readonly;
 	int			pages_per_rd_bio;
 
+	/* State of IO submission throttling affecting the associated device */
+	ktime_t			throttle_deadline;
+	u64			throttle_sent;
+
 	int			is_dev_replace;
 	u64			write_pointer;
 
@@ -605,6 +609,7 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
 	spin_lock_init(&sctx->list_lock);
 	spin_lock_init(&sctx->stat_lock);
 	init_waitqueue_head(&sctx->list_wait);
+	sctx->throttle_deadline = 0;
 
 	WARN_ON(sctx->wr_curr_bio != NULL);
 	mutex_init(&sctx->wr_lock);
@@ -1988,6 +1993,65 @@ static void scrub_page_put(struct scrub_page *spage)
 	}
 }
 
+/*
+ * Throttling of IO submission, bandwidth-limit based, the timeslice is 1
+ * second.  Limit can be set via /sys/fs/UUID/devinfo/devid/scrub_speed_max.
+ */
+static void scrub_throttle(struct scrub_ctx *sctx)
+{
+	const int time_slice = 1000;
+	struct scrub_bio *sbio;
+	struct btrfs_device *device;
+	s64 delta;
+	ktime_t now;
+	u32 div;
+	u64 bwlimit;
+
+	sbio = sctx->bios[sctx->curr];
+	device = sbio->dev;
+	bwlimit = READ_ONCE(device->scrub_speed_max);
+	if (bwlimit == 0)
+		return;
+
+	/*
+	 * Slice is divided into intervals when the IO is submitted, adjust by
+	 * bwlimit and maximum of 64 intervals.
+	 */
+	div = max_t(u32, 1, (u32)(bwlimit / (16 * 1024 * 1024)));
+	div = min_t(u32, 64, div);
+
+	/* Start new epoch, set deadline */
+	now = ktime_get();
+	if (sctx->throttle_deadline == 0) {
+		sctx->throttle_deadline = ktime_add_ms(now, time_slice / div);
+		sctx->throttle_sent = 0;
+	}
+
+	/* Still in the time to send? */
+	if (ktime_before(now, sctx->throttle_deadline)) {
+		/* If current bio is within the limit, send it */
+		sctx->throttle_sent += sbio->bio->bi_iter.bi_size;
+		if (sctx->throttle_sent <= div_u64(bwlimit, div))
+			return;
+
+		/* We're over the limit, sleep until the rest of the slice */
+		delta = ktime_ms_delta(sctx->throttle_deadline, now);
+	} else {
+		/* New request after deadline, start new epoch */
+		delta = 0;
+	}
+
+	if (delta) {
+		long timeout;
+
+		timeout = div_u64(delta * HZ, 1000);
+		schedule_timeout_interruptible(timeout);
+	}
+
+	/* Next call will start the deadline period */
+	sctx->throttle_deadline = 0;
+}
+
 static void scrub_submit(struct scrub_ctx *sctx)
 {
 	struct scrub_bio *sbio;
@@ -1995,6 +2059,8 @@ static void scrub_submit(struct scrub_ctx *sctx)
 	if (sctx->curr == -1)
 		return;
 
+	scrub_throttle(sctx);
+
 	sbio = sctx->bios[sctx->curr];
 	sctx->curr = -1;
 	scrub_pending_bio_inc(sctx);
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 436ac7b4b3346..c45d9b6dfdb53 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1455,6 +1455,33 @@ static ssize_t btrfs_devinfo_replace_target_show(struct kobject *kobj,
 }
 BTRFS_ATTR(devid, replace_target, btrfs_devinfo_replace_target_show);
 
+static ssize_t btrfs_devinfo_scrub_speed_max_show(struct kobject *kobj,
+					     struct kobj_attribute *a,
+					     char *buf)
+{
+	struct btrfs_device *device = container_of(kobj, struct btrfs_device,
+						   devid_kobj);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n",
+			 READ_ONCE(device->scrub_speed_max));
+}
+
+static ssize_t btrfs_devinfo_scrub_speed_max_store(struct kobject *kobj,
+					      struct kobj_attribute *a,
+					      const char *buf, size_t len)
+{
+	struct btrfs_device *device = container_of(kobj, struct btrfs_device,
+						   devid_kobj);
+	char *endptr;
+	unsigned long long limit;
+
+	limit = memparse(buf, &endptr);
+	WRITE_ONCE(device->scrub_speed_max, limit);
+	return len;
+}
+BTRFS_ATTR_RW(devid, scrub_speed_max, btrfs_devinfo_scrub_speed_max_show,
+	      btrfs_devinfo_scrub_speed_max_store);
+
 static ssize_t btrfs_devinfo_writeable_show(struct kobject *kobj,
 					    struct kobj_attribute *a, char *buf)
 {
@@ -1472,6 +1499,7 @@ static struct attribute *devid_attrs[] = {
 	BTRFS_ATTR_PTR(devid, in_fs_metadata),
 	BTRFS_ATTR_PTR(devid, missing),
 	BTRFS_ATTR_PTR(devid, replace_target),
+	BTRFS_ATTR_PTR(devid, scrub_speed_max),
 	BTRFS_ATTR_PTR(devid, writeable),
 	NULL
 };
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 9c0d84e5ec066..063ce999b9d3b 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -143,6 +143,9 @@ struct btrfs_device {
 	struct completion kobj_unregister;
 	/* For sysfs/FSID/devinfo/devid/ */
 	struct kobject devid_kobj;
+
+	/* Bandwidth limit for scrub, in bytes */
+	u64 scrub_speed_max;
 };
 
 /*
-- 
GitLab


From a4cb90dc015cf18aa31bf7b8c38bf6426d9aed6a Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 21 May 2021 16:44:07 -0400
Subject: [PATCH 3443/3804] btrfs: make btrfs_release_delayed_iref handle the
 !iref case

Right now we only cleanup the delayed iref if we have
BTRFS_DELAYED_NODE_DEL_IREF set on the node.  However we have some error
conditions that need to cleanup the iref if it still exists, so to make
this code cleaner move the test_bit into btrfs_release_delayed_iref
itself and unconditionally call it in each of the cases instead.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 1a88f6214ebc0..bbef8c8305377 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -974,14 +974,16 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
 
 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
 {
-	struct btrfs_delayed_root *delayed_root;
 
-	ASSERT(delayed_node->root);
-	clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
-	delayed_node->count--;
+	if (test_and_clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags)) {
+		struct btrfs_delayed_root *delayed_root;
 
-	delayed_root = delayed_node->root->fs_info->delayed_root;
-	finish_one_item(delayed_root);
+		ASSERT(delayed_node->root);
+		delayed_node->count--;
+
+		delayed_root = delayed_node->root->fs_info->delayed_root;
+		finish_one_item(delayed_root);
+	}
 }
 
 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
@@ -1024,7 +1026,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(leaf);
 
 	if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
-		goto no_iref;
+		goto out;
 
 	path->slots[0]++;
 	if (path->slots[0] >= btrfs_header_nritems(leaf))
@@ -1046,7 +1048,6 @@ again:
 	btrfs_del_item(trans, root, path);
 out:
 	btrfs_release_delayed_iref(node);
-no_iref:
 	btrfs_release_path(path);
 err_out:
 	btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
@@ -1898,8 +1899,7 @@ static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
 		btrfs_release_delayed_item(prev_item);
 	}
 
-	if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
-		btrfs_release_delayed_iref(delayed_node);
+	btrfs_release_delayed_iref(delayed_node);
 
 	if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
 		btrfs_delayed_inode_release_metadata(fs_info, delayed_node, false);
-- 
GitLab


From bb385bedded3ccbd794559600de4a09448810f4a Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 21 May 2021 16:44:08 -0400
Subject: [PATCH 3444/3804] btrfs: fix error handling in
 __btrfs_update_delayed_inode

If we get an error while looking up the inode item we'll simply bail
without cleaning up the delayed node.  This results in this style of
warning happening on commit:

  WARNING: CPU: 0 PID: 76403 at fs/btrfs/delayed-inode.c:1365 btrfs_assert_delayed_root_empty+0x5b/0x90
  CPU: 0 PID: 76403 Comm: fsstress Tainted: G        W         5.13.0-rc1+ #373
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014
  RIP: 0010:btrfs_assert_delayed_root_empty+0x5b/0x90
  RSP: 0018:ffffb8bb815a7e50 EFLAGS: 00010286
  RAX: 0000000000000000 RBX: ffff95d6d07e1888 RCX: ffff95d6c0fa3000
  RDX: 0000000000000002 RSI: 000000000029e91c RDI: ffff95d6c0fc8060
  RBP: ffff95d6c0fc8060 R08: 00008d6d701a2c1d R09: 0000000000000000
  R10: ffff95d6d1760ea0 R11: 0000000000000001 R12: ffff95d6c15a4d00
  R13: ffff95d6c0fa3000 R14: 0000000000000000 R15: ffffb8bb815a7e90
  FS:  00007f490e8dbb80(0000) GS:ffff95d73bc00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f6e75555cb0 CR3: 00000001101ce001 CR4: 0000000000370ef0
  Call Trace:
   btrfs_commit_transaction+0x43c/0xb00
   ? finish_wait+0x80/0x80
   ? vfs_fsync_range+0x90/0x90
   iterate_supers+0x8c/0x100
   ksys_sync+0x50/0x90
   __do_sys_sync+0xa/0x10
   do_syscall_64+0x3d/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Because the iref isn't dropped and this leaves an elevated node->count,
so any release just re-queues it onto the delayed inodes list.  Fix this
by going to the out label to handle the proper cleanup of the delayed
node.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index bbef8c8305377..bce7bdd7fe03b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1011,12 +1011,10 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
 	nofs_flag = memalloc_nofs_save();
 	ret = btrfs_lookup_inode(trans, root, path, &key, mod);
 	memalloc_nofs_restore(nofs_flag);
-	if (ret > 0) {
-		btrfs_release_path(path);
-		return -ENOENT;
-	} else if (ret < 0) {
-		return ret;
-	}
+	if (ret > 0)
+		ret = -ENOENT;
+	if (ret < 0)
+		goto out;
 
 	leaf = path->nodes[0];
 	inode_item = btrfs_item_ptr(leaf, path->slots[0],
-- 
GitLab


From 04587ad9bef6ce9d510325b4ba9852b6129eebdb Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 21 May 2021 16:44:09 -0400
Subject: [PATCH 3445/3804] btrfs: abort transaction if we fail to update the
 delayed inode

If we fail to update the delayed inode we need to abort the transaction,
because we could leave an inode with the improper counts or some other
such corruption behind.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index bce7bdd7fe03b..2c18ed23aa275 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1051,6 +1051,14 @@ err_out:
 	btrfs_delayed_inode_release_metadata(fs_info, node, (ret < 0));
 	btrfs_release_delayed_inode(node);
 
+	/*
+	 * If we fail to update the delayed inode we need to abort the
+	 * transaction, because we could leave the inode with the improper
+	 * counts behind.
+	 */
+	if (ret && ret != -ENOENT)
+		btrfs_abort_transaction(trans, ret);
+
 	return ret;
 
 search:
-- 
GitLab


From 4f7e67378e1bccd4d1d4de5d7f5aaf928cc07928 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 24 May 2021 11:35:54 +0100
Subject: [PATCH 3446/3804] btrfs: fix misleading and incomplete comment of
 btrfs_truncate()

The comment at the top of btrfs_truncate() mentions that csum items are
dropped or truncated to the new i_size, but this is wrong and non sense,
as they are unrelated to the i_size and are located in the csums tree and
not on a tree with inode items (fs/subvolume tree or a log tree). Instead
that claim applies to file extent items, so fix the comment to refer to
them instead.

While at it make the whole comment for the function more descriptive and
follow the kernel doc style.

Tested-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c70ae0a5df92b..30643ef973f57 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4472,15 +4472,25 @@ out:
 #define NEED_TRUNCATE_BLOCK 1
 
 /*
- * this can truncate away extent items, csum items and directory items.
- * It starts at a high offset and removes keys until it can't find
- * any higher than new_size
+ * Remove inode items from a given root.
  *
- * csum items that cross the new i_size are truncated to the new size
- * as well.
+ * @trans:		A transaction handle.
+ * @root:		The root from which to remove items.
+ * @inode:		The inode whose items we want to remove.
+ * @new_size:		The new i_size for the inode. This is only applicable when
+ *			@min_type is BTRFS_EXTENT_DATA_KEY, must be 0 otherwise.
+ * @min_type:		The minimum key type to remove. All keys with a type
+ *			greater than this value are removed and all keys with
+ *			this type are removed only if their offset is >= @new_size.
  *
- * min_type is the minimum key type to truncate down to.  If set to 0, this
- * will kill all the items on this inode, including the INODE_ITEM_KEY.
+ * Remove all keys associated with the inode from the given root that have a key
+ * with a type greater than or equals to @min_type. When @min_type has a value of
+ * BTRFS_EXTENT_DATA_KEY, only remove file extent items that have an offset value
+ * greater than or equals to @new_size. If a file extent item that starts before
+ * @new_size and ends after it is found, its length is adjusted.
+ *
+ * Returns: 0 on success, < 0 on error and NEED_TRUNCATE_BLOCK when @min_type is
+ * BTRFS_EXTENT_DATA_KEY and the caller must truncate the last block.
  */
 int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root,
-- 
GitLab


From 0d7d316597c00fbc13fffadaab27a448d5a6a60f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 24 May 2021 11:35:55 +0100
Subject: [PATCH 3447/3804] btrfs: don't set the full sync flag when truncation
 does not touch extents

At btrfs_truncate() where we truncate the inode either to the same size
or to a smaller size, we always set the full sync flag on the inode.

This is needed in case the truncation drops or trims any file extent items
that start beyond or cross the new inode size, so that the next fsync
drops all inode items from the log and scans again the fs/subvolume tree
to find all items that must be logged.

However if the truncation does not drop or trims any file extent items, we
do not need to set the full sync flag and force the next fsync to use the
slow code path. So do not set the full sync flag in such cases.

One use case where it is frequent to do truncations that do not change
the inode size and do not drop any extents (no prealloc extents beyond
i_size) is when running Microsoft's SQL Server inside a Docker container.
One example workload is the one Philipp Fent reported recently, in the
thread with a link below. In this workload a large number of fsyncs are
preceded by such truncate operations.

After this change I constantly get the runtime for that workload from
Philipp to be reduced by about -12%, for example from 184 seconds down
to 162 seconds.

Link: https://lore.kernel.org/linux-btrfs/93c4600e-5263-5cba-adf0-6f47526e7561@in.tum.de/
Tested-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h            |  2 +-
 fs/btrfs/free-space-cache.c |  2 +-
 fs/btrfs/inode.c            | 41 +++++++++++++++++++++++++++----------
 fs/btrfs/tree-log.c         |  5 +++--
 4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 938d8ebf4cf39..d78cb2d89d7e1 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3125,7 +3125,7 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
 int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root,
 			       struct btrfs_inode *inode, u64 new_size,
-			       u32 min_type);
+			       u32 min_type, u64 *extents_found);
 
 int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4806295116d88..2131ae5b9ed78 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -327,7 +327,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
 	 * need to check for -EAGAIN.
 	 */
 	ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
-					 0, BTRFS_EXTENT_DATA_KEY);
+					 0, BTRFS_EXTENT_DATA_KEY, NULL);
 	if (ret)
 		goto fail;
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 30643ef973f57..3cd5286572d40 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4482,6 +4482,11 @@ out:
  * @min_type:		The minimum key type to remove. All keys with a type
  *			greater than this value are removed and all keys with
  *			this type are removed only if their offset is >= @new_size.
+ * @extents_found:	Output parameter that will contain the number of file
+ *			extent items that were removed or adjusted to the new
+ *			inode i_size. The caller is responsible for initializing
+ *			the counter. Also, it can be NULL if the caller does not
+ *			need this counter.
  *
  * Remove all keys associated with the inode from the given root that have a key
  * with a type greater than or equals to @min_type. When @min_type has a value of
@@ -4495,7 +4500,8 @@ out:
 int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *root,
 			       struct btrfs_inode *inode,
-			       u64 new_size, u32 min_type)
+			       u64 new_size, u32 min_type,
+			       u64 *extents_found)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_path *path;
@@ -4641,6 +4647,9 @@ search_again:
 		if (found_type != BTRFS_EXTENT_DATA_KEY)
 			goto delete;
 
+		if (extents_found != NULL)
+			(*extents_found)++;
+
 		if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
 			u64 num_dec;
 
@@ -5473,7 +5482,7 @@ void btrfs_evict_inode(struct inode *inode)
 		trans->block_rsv = rsv;
 
 		ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
-						 0, 0);
+						 0, 0, NULL);
 		trans->block_rsv = &fs_info->trans_block_rsv;
 		btrfs_end_transaction(trans);
 		btrfs_btree_balance_dirty(fs_info);
@@ -8677,6 +8686,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
 	struct btrfs_trans_handle *trans;
 	u64 mask = fs_info->sectorsize - 1;
 	u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
+	u64 extents_found = 0;
 
 	if (!skip_writeback) {
 		ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask),
@@ -8734,20 +8744,13 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
 				      min_size, false);
 	BUG_ON(ret);
 
-	/*
-	 * So if we truncate and then write and fsync we normally would just
-	 * write the extents that changed, which is a problem if we need to
-	 * first truncate that entire inode.  So set this flag so we write out
-	 * all of the extents in the inode to the sync log so we're completely
-	 * safe.
-	 */
-	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
 	trans->block_rsv = rsv;
 
 	while (1) {
 		ret = btrfs_truncate_inode_items(trans, root, BTRFS_I(inode),
 						 inode->i_size,
-						 BTRFS_EXTENT_DATA_KEY);
+						 BTRFS_EXTENT_DATA_KEY,
+						 &extents_found);
 		trans->block_rsv = &fs_info->trans_block_rsv;
 		if (ret != -ENOSPC && ret != -EAGAIN)
 			break;
@@ -8809,6 +8812,22 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
 	}
 out:
 	btrfs_free_block_rsv(fs_info, rsv);
+	/*
+	 * So if we truncate and then write and fsync we normally would just
+	 * write the extents that changed, which is a problem if we need to
+	 * first truncate that entire inode.  So set this flag so we write out
+	 * all of the extents in the inode to the sync log so we're completely
+	 * safe.
+	 *
+	 * If no extents were dropped or trimmed we don't need to force the next
+	 * fsync to truncate all the inode's items from the log and re-log them
+	 * all. This means the truncate operation did not change the file size,
+	 * or changed it to a smaller size but there was only an implicit hole
+	 * between the old i_size and the new i_size, and there were no prealloc
+	 * extents beyond i_size to drop.
+	 */
+	if (extents_found > 0)
+		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
 
 	return ret;
 }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index dbcf8bb2f3b9a..c6d4aeede159f 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4468,7 +4468,8 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
 				ret = btrfs_truncate_inode_items(trans,
 							 root->log_root,
 							 inode, truncate_offset,
-							 BTRFS_EXTENT_DATA_KEY);
+							 BTRFS_EXTENT_DATA_KEY,
+							 NULL);
 			} while (ret == -EAGAIN);
 			if (ret)
 				goto out;
@@ -5416,7 +5417,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 					  &inode->runtime_flags);
 				while(1) {
 					ret = btrfs_truncate_inode_items(trans,
-						log, inode, 0, 0);
+						log, inode, 0, 0, NULL);
 					if (ret != -EAGAIN)
 						break;
 				}
-- 
GitLab


From 5963ffcaf383134985a5a2d8a4baa582d3999e0a Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 20 May 2021 11:21:31 -0400
Subject: [PATCH 3448/3804] btrfs: always abort the transaction if we abort a
 trans handle

While stress testing our error handling I noticed that sometimes we
would still commit the transaction even though we had aborted the
transaction.

Currently we track if a trans handle has dirtied any metadata, and if it
hasn't we mark the filesystem as having an error (so no new transactions
can be started), but we will allow the current transaction to complete
as we do not mark the transaction itself as having been aborted.

This sounds good in theory, but we were not properly tracking IO errors
in btrfs_finish_ordered_io, and thus committing the transaction with
bogus free space data.  This isn't necessarily a problem per-se with the
free space cache, as the other guards in place would have kept us from
accepting the free space cache as valid, but highlights a real world
case where we had a bug and could have corrupted the filesystem because
of it.

This "skip abort on empty trans handle" is nice in theory, but assumes
we have perfect error handling everywhere, which we clearly do not.
Also we do not allow further transactions to be started, so all this
does is save the last transaction that was happening, which doesn't
necessarily gain us anything other than the potential for real
corruption.

Remove this particular bit of code, if we decide we need to abort the
transaction then abort the current one and keep us from doing real harm
to the file system, regardless of whether this specific trans handle
dirtied anything or not.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.c       |  5 +----
 fs/btrfs/extent-tree.c |  1 -
 fs/btrfs/super.c       | 11 -----------
 fs/btrfs/transaction.c |  8 --------
 fs/btrfs/transaction.h |  1 -
 5 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index a484fb72a01f0..4bc3ca2cbd7d4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -596,7 +596,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
 		       trans->transid, fs_info->generation);
 
 	if (!should_cow_block(trans, root, buf)) {
-		trans->dirty = true;
 		*cow_ret = buf;
 		return 0;
 	}
@@ -1788,10 +1787,8 @@ again:
 			 * then we don't want to set the path blocking,
 			 * so we test it here
 			 */
-			if (!should_cow_block(trans, root, b)) {
-				trans->dirty = true;
+			if (!should_cow_block(trans, root, b))
 				goto cow_done;
-			}
 
 			/*
 			 * must have write locks on this node and the
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3d5c35e4cb76e..d2f39a122d89d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4784,7 +4784,6 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
 			 buf->start + buf->len - 1, GFP_NOFS);
 	}
-	trans->dirty = true;
 	/* this returns a buffer locked for blocking */
 	return buf;
 }
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4a396c1147f17..bc613218c8c5b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -299,17 +299,6 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
 	struct btrfs_fs_info *fs_info = trans->fs_info;
 
 	WRITE_ONCE(trans->aborted, errno);
-	/* Nothing used. The other threads that have joined this
-	 * transaction may be able to continue. */
-	if (!trans->dirty && list_empty(&trans->new_bgs)) {
-		const char *errstr;
-
-		errstr = btrfs_decode_error(errno);
-		btrfs_warn(fs_info,
-		           "%s:%d: Aborting unused transaction(%s).",
-		           function, line, errstr);
-		return;
-	}
 	WRITE_ONCE(trans->transaction->aborted, errno);
 	/* Wake up anybody who may be waiting on this transaction */
 	wake_up(&fs_info->transaction_wait);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index f75de9f6c0ada..e0a82aa7da897 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2074,14 +2074,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 
 	ASSERT(refcount_read(&trans->use_count) == 1);
 
-	/*
-	 * Some places just start a transaction to commit it.  We need to make
-	 * sure that if this commit fails that the abort code actually marks the
-	 * transaction as failed, so set trans->dirty to make the abort code do
-	 * the right thing.
-	 */
-	trans->dirty = true;
-
 	/* Stop the commit early if ->aborted is set */
 	if (TRANS_ABORTED(cur_trans)) {
 		ret = cur_trans->aborted;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 364cfbb4c5c59..c49e2266b28ba 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -143,7 +143,6 @@ struct btrfs_trans_handle {
 	bool allocating_chunk;
 	bool can_flush_pending_bgs;
 	bool reloc_reserved;
-	bool dirty;
 	bool in_fsync;
 	struct btrfs_root *root;
 	struct btrfs_fs_info *fs_info;
-- 
GitLab


From 8c5ec995616f1202ab92e195fd75d6f60d86f85c Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 7 May 2021 20:00:14 +0200
Subject: [PATCH 3449/3804] btrfs: sysfs: fix format string for some discard
 stats

The type of discard_bitmap_bytes and discard_extent_bytes is u64 so the
format should be %llu, though the actual values would hardly ever
overflow to negative values.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c45d9b6dfdb53..4b508938e7285 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -429,7 +429,7 @@ static ssize_t btrfs_discard_bitmap_bytes_show(struct kobject *kobj,
 {
 	struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
 
-	return scnprintf(buf, PAGE_SIZE, "%lld\n",
+	return scnprintf(buf, PAGE_SIZE, "%llu\n",
 			fs_info->discard_ctl.discard_bitmap_bytes);
 }
 BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show);
@@ -451,7 +451,7 @@ static ssize_t btrfs_discard_extent_bytes_show(struct kobject *kobj,
 {
 	struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
 
-	return scnprintf(buf, PAGE_SIZE, "%lld\n",
+	return scnprintf(buf, PAGE_SIZE, "%llu\n",
 			fs_info->discard_ctl.discard_extent_bytes);
 }
 BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show);
-- 
GitLab


From 6819703f5a365c95488b07066a8744841bf14231 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 7 Jul 2020 18:30:06 +0200
Subject: [PATCH 3450/3804] btrfs: clear defrag status of a root if starting
 transaction fails

The defrag loop processes leaves in batches and starting transaction for
each. The whole defragmentation on a given root is protected by a bit
but in case the transaction fails, the bit is not cleared

In case the transaction fails the bit would prevent starting
defragmentation again, so make sure it's cleared.

CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e0a82aa7da897..22951621363f0 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1406,8 +1406,10 @@ int btrfs_defrag_root(struct btrfs_root *root)
 
 	while (1) {
 		trans = btrfs_start_transaction(root, 0);
-		if (IS_ERR(trans))
-			return PTR_ERR(trans);
+		if (IS_ERR(trans)) {
+			ret = PTR_ERR(trans);
+			break;
+		}
 
 		ret = btrfs_defrag_leaves(trans, root);
 
-- 
GitLab


From 1aeb6b563aea18cd55c73cf666d1d3245a00f08c Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 7 Jul 2020 18:38:05 +0200
Subject: [PATCH 3451/3804] btrfs: clear log tree recovering status if starting
 transaction fails

When a log recovery is in progress, lots of operations have to take that
into account, so we keep this status per tree during the operation. Long
time ago error handling revamp patch 79787eaab461 ("btrfs: replace many
BUG_ONs with proper error handling") removed clearing of the status in
an error branch. Add it back as was intended in e02119d5a7b4 ("Btrfs:
Add a write ahead tree log to optimize synchronous operations").

There are probably no visible effects, log replay is done only during
mount and if it fails all structures are cleared so the stale status
won't be kept.

Fixes: 79787eaab461 ("btrfs: replace many BUG_ONs with proper error handling")
Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c6d4aeede159f..5c1d58706fa91 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6372,6 +6372,7 @@ next:
 error:
 	if (wc.trans)
 		btrfs_end_transaction(wc.trans);
+	clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
 	btrfs_free_path(path);
 	return ret;
 }
-- 
GitLab


From 7735cd755b590f34a2b019a0a980dd56493a4d65 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 28 Nov 2019 15:37:46 +0100
Subject: [PATCH 3452/3804] btrfs: scrub: factor out common scrub_stripe
 constraints

There are common values set for the stripe constraints, some of them
are already factored out. Do that for increment and mirror_num as well.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 518415d0c1227..5839ad1e25a21 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3204,28 +3204,23 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 	physical = map->stripes[num].physical;
 	offset = 0;
 	nstripes = div64_u64(length, map->stripe_len);
+	mirror_num = 1;
+	increment = map->stripe_len;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
 		offset = map->stripe_len * num;
 		increment = map->stripe_len * map->num_stripes;
-		mirror_num = 1;
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
 		int factor = map->num_stripes / map->sub_stripes;
 		offset = map->stripe_len * (num / map->sub_stripes);
 		increment = map->stripe_len * factor;
 		mirror_num = num % map->sub_stripes + 1;
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
-		increment = map->stripe_len;
 		mirror_num = num % map->num_stripes + 1;
 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-		increment = map->stripe_len;
 		mirror_num = num % map->num_stripes + 1;
 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
 		get_raid56_logic_offset(physical, num, map, &offset, NULL);
 		increment = map->stripe_len * nr_data_stripes(map);
-		mirror_num = 1;
-	} else {
-		increment = map->stripe_len;
-		mirror_num = 1;
 	}
 
 	path = btrfs_alloc_path();
-- 
GitLab


From 49547068f6fdd148d62eaeb06163213422125d9b Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 15 Sep 2020 21:10:03 +0200
Subject: [PATCH 3453/3804] btrfs: document byte swap optimization of
 root_item::flags accessors

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d78cb2d89d7e1..a3b628ea4d64e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2216,11 +2216,13 @@ BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
 
 static inline bool btrfs_root_readonly(const struct btrfs_root *root)
 {
+	/* Byte-swap the constant at compile time, root_item::flags is LE */
 	return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
 }
 
 static inline bool btrfs_root_dead(const struct btrfs_root *root)
 {
+	/* Byte-swap the constant at compile time, root_item::flags is LE */
 	return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
 }
 
-- 
GitLab


From 282ab3ff16120ec670fe3330e85f8ebf13092f21 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 14 Oct 2019 14:38:33 +0200
Subject: [PATCH 3454/3804] btrfs: reduce compressed_bio members' types

Several members of compressed_bio are of type that's unnecessarily big
for the values that they'd hold:

- the size of the uncompressed and compressed data is 128K now, we can
  keep is as int
- same for number of pages
- the compress type fits to a byte
- the errors is 0/1

The size of the unpatched structure is 80 bytes with several holes.
Reordering nr_pages next to the pages the hole after pending_bios is
filled and the resulting size is 56 bytes. This keeps the csums array
aligned to 8 bytes, which is nice. Further size optimizations may be
possible but right now it looks good to me:

struct compressed_bio {
        refcount_t                 pending_bios;         /*     0     4 */
        unsigned int               nr_pages;             /*     4     4 */
        struct page * *            compressed_pages;     /*     8     8 */
        struct inode *             inode;                /*    16     8 */
        u64                        start;                /*    24     8 */
        unsigned int               len;                  /*    32     4 */
        unsigned int               compressed_len;       /*    36     4 */
        u8                         compress_type;        /*    40     1 */
        u8                         errors;               /*    41     1 */

        /* XXX 2 bytes hole, try to pack */

        int                        mirror_num;           /*    44     4 */
        struct bio *               orig_bio;             /*    48     8 */
        u8                         sums[];               /*    56     0 */

        /* size: 56, cachelines: 1, members: 12 */
        /* sum members: 54, holes: 1, sum holes: 2 */
        /* last cacheline: 56 bytes */
};

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c |  2 +-
 fs/btrfs/compression.h | 20 ++++++++++----------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9a0c26e4e3891..c006f5d81c2a5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -507,7 +507,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 		}
 		if (bytes_left < PAGE_SIZE) {
 			btrfs_info(fs_info,
-					"bytes left %lu compress len %lu nr %lu",
+					"bytes left %lu compress len %u nr %u",
 			       bytes_left, cb->compressed_len, cb->nr_pages);
 		}
 		bytes_left -= PAGE_SIZE;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 8001b700ea3ae..00d8439048c9a 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -31,6 +31,9 @@ struct compressed_bio {
 	/* number of bios pending for this compressed extent */
 	refcount_t pending_bios;
 
+	/* Number of compressed pages in the array */
+	unsigned int nr_pages;
+
 	/* the pages with the compressed data on them */
 	struct page **compressed_pages;
 
@@ -40,20 +43,17 @@ struct compressed_bio {
 	/* starting offset in the inode for our pages */
 	u64 start;
 
-	/* number of bytes in the inode we're working on */
-	unsigned long len;
-
-	/* number of bytes on disk */
-	unsigned long compressed_len;
+	/* Number of bytes in the inode we're working on */
+	unsigned int len;
 
-	/* the compression algorithm for this bio */
-	int compress_type;
+	/* Number of bytes on disk */
+	unsigned int compressed_len;
 
-	/* number of compressed pages in the array */
-	unsigned long nr_pages;
+	/* The compression algorithm for this bio */
+	u8 compress_type;
 
 	/* IO errors */
-	int errors;
+	u8 errors;
 	int mirror_num;
 
 	/* for reads, this is the bio we are copying the data into */
-- 
GitLab


From ff14aa798756a6b98b6020e51e52168128ffa9d7 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 21 Sep 2020 21:58:14 +0200
Subject: [PATCH 3455/3804] btrfs: remove extra sb::s_id from message in
 btrfs_validate_metadata_buffer

The s_id is already printed by message helpers.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dd7dc39b7508b..b28b314ff7a91 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -614,8 +614,8 @@ static int validate_extent_buffer(struct extent_buffer *eb)
 
 		read_extent_buffer(eb, &val, 0, csum_size);
 		btrfs_warn_rl(fs_info,
-	"%s checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d",
-			      fs_info->sb->s_id, eb->start,
+	"checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d",
+			      eb->start,
 			      CSUM_FMT_VALUE(csum_size, val),
 			      CSUM_FMT_VALUE(csum_size, result),
 			      btrfs_header_level(eb));
-- 
GitLab


From dfd29eed4ab5881a1af9f07c3573c0be5593dc1f Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 21 Sep 2020 22:07:14 +0200
Subject: [PATCH 3456/3804] btrfs: simplify eb checksum verification in
 btrfs_validate_metadata_buffer

The verification copies the calculated checksum bytes to a temporary
buffer but this is not necessary. We can map the eb header on the first
page and use the checksum bytes directly.

This saves at least one function call and boundary checks so it could
lead to a minor performance improvement.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b28b314ff7a91..34bcd986f738d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -584,6 +584,7 @@ static int validate_extent_buffer(struct extent_buffer *eb)
 	const u32 csum_size = fs_info->csum_size;
 	u8 found_level;
 	u8 result[BTRFS_CSUM_SIZE];
+	const u8 *header_csum;
 	int ret = 0;
 
 	found_start = btrfs_header_bytenr(eb);
@@ -608,15 +609,14 @@ static int validate_extent_buffer(struct extent_buffer *eb)
 	}
 
 	csum_tree_block(eb, result);
+	header_csum = page_address(eb->pages[0]) +
+		get_eb_offset_in_page(eb, offsetof(struct btrfs_header, csum));
 
-	if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
-		u8 val[BTRFS_CSUM_SIZE] = { 0 };
-
-		read_extent_buffer(eb, &val, 0, csum_size);
+	if (memcmp(result, header_csum, csum_size) != 0) {
 		btrfs_warn_rl(fs_info,
 	"checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d",
 			      eb->start,
-			      CSUM_FMT_VALUE(csum_size, val),
+			      CSUM_FMT_VALUE(csum_size, header_csum),
 			      CSUM_FMT_VALUE(csum_size, result),
 			      btrfs_header_level(eb));
 		ret = -EUCLEAN;
-- 
GitLab


From 24880be59c5abdb4f686e17fcf4414518d7fec31 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 21 Sep 2020 22:07:14 +0200
Subject: [PATCH 3457/3804] btrfs: clean up header members offsets in write
 helpers

Move header offsetof() to the expression that calculates the address so
it's part of get_eb_offset_in_page where the 2nd parameter is the member
offset.

Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2b250c610562d..2e924f60ea6f6 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -6519,9 +6519,10 @@ void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
 	char *kaddr;
 
 	assert_eb_page_uptodate(eb, eb->pages[0]);
-	kaddr = page_address(eb->pages[0]) + get_eb_offset_in_page(eb, 0);
-	memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
-			BTRFS_FSID_SIZE);
+	kaddr = page_address(eb->pages[0]) +
+		get_eb_offset_in_page(eb, offsetof(struct btrfs_header,
+						   chunk_tree_uuid));
+	memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
 }
 
 void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
@@ -6529,9 +6530,9 @@ void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
 	char *kaddr;
 
 	assert_eb_page_uptodate(eb, eb->pages[0]);
-	kaddr = page_address(eb->pages[0]) + get_eb_offset_in_page(eb, 0);
-	memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
-			BTRFS_FSID_SIZE);
+	kaddr = page_address(eb->pages[0]) +
+		get_eb_offset_in_page(eb, offsetof(struct btrfs_header, fsid));
+	memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
 }
 
 void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
-- 
GitLab


From 0d7ed32c1eebfa34e28d24930ea598a4492d289e Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 14 May 2021 17:42:30 +0200
Subject: [PATCH 3458/3804] btrfs: protect exclusive_operation by super_lock

The exclusive operation is now atomically checked and set using bit
operations. Switch it to protection by spinlock. The super block lock is
not frequently used and adding a new lock seems like an overkill so it
should be safe to reuse it.

The reason to use spinlock is to enhance the locking context so more
checks can be done, eg. allowing the same exclusive operation enter
the exclop section and cancel the running one. This will be used for
resize and device delete.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h |  4 ++--
 fs/btrfs/ioctl.c | 16 +++++++++++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a3b628ea4d64e..4049f533e35e4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -992,8 +992,8 @@ struct btrfs_fs_info {
 	 */
 	int send_in_progress;
 
-	/* Type of exclusive operation running */
-	unsigned long exclusive_operation;
+	/* Type of exclusive operation running, protected by super_lock */
+	enum btrfs_exclusive_operation exclusive_operation;
 
 	/*
 	 * Zone size > 0 when in ZONED mode, otherwise it's used for a check
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a7739461533d3..c4e710ea08baa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -353,15 +353,29 @@ update_flags:
 	return ret;
 }
 
+/*
+ * Start exclusive operation @type, return true on success
+ */
 bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
 			enum btrfs_exclusive_operation type)
 {
-	return !cmpxchg(&fs_info->exclusive_operation, BTRFS_EXCLOP_NONE, type);
+	bool ret = false;
+
+	spin_lock(&fs_info->super_lock);
+	if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) {
+		fs_info->exclusive_operation = type;
+		ret = true;
+	}
+	spin_unlock(&fs_info->super_lock);
+
+	return ret;
 }
 
 void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
 {
+	spin_lock(&fs_info->super_lock);
 	WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE);
+	spin_unlock(&fs_info->super_lock);
 	sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation");
 }
 
-- 
GitLab


From 907d2710d727541fffabdc52a025916d5109b3e5 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 18 May 2021 00:37:36 +0200
Subject: [PATCH 3459/3804] btrfs: add cancellable chunk relocation support

Add support code that will allow canceling relocation on the chunk
granularity. This is different and independent of balance, that also
uses relocation but is a higher level operation and manages it's own
state and pause/cancellation requests.

Relocation is used for resize (shrink) and device deletion so this will
be a common point to implement cancellation for both. The context is
entirely in btrfs_relocate_block_group and btrfs_recover_relocation,
enclosing one chunk relocation. The status bit is set and unset between
the chunks. As relocation can take long, the effects may not be
immediate and the request and actual action can slightly race.

The fs_info::reloc_cancel_req is only supposed to be increased and does
not pair with decrement like fs_info::balance_cancel_req.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h      |  9 +++++++
 fs/btrfs/disk-io.c    |  1 +
 fs/btrfs/relocation.c | 62 +++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4049f533e35e4..b7c36aad45efa 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -565,6 +565,12 @@ enum {
 	 */
 	BTRFS_FS_BALANCE_RUNNING,
 
+	/*
+	 * Indicate that relocation of a chunk has started, it's set per chunk
+	 * and is toggled between chunks.
+	 */
+	BTRFS_FS_RELOC_RUNNING,
+
 	/* Indicate that the cleaner thread is awake and doing something. */
 	BTRFS_FS_CLEANER_RUNNING,
 
@@ -871,6 +877,9 @@ struct btrfs_fs_info {
 	struct btrfs_balance_control *balance_ctl;
 	wait_queue_head_t balance_wait_q;
 
+	/* Cancellation requests for chunk relocation */
+	atomic_t reloc_cancel_req;
+
 	u32 data_chunk_allocations;
 	u32 metadata_ratio;
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 34bcd986f738d..d1d5091a83857 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2251,6 +2251,7 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
 	atomic_set(&fs_info->balance_cancel_req, 0);
 	fs_info->balance_ctl = NULL;
 	init_waitqueue_head(&fs_info->balance_wait_q);
+	atomic_set(&fs_info->reloc_cancel_req, 0);
 }
 
 static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b70be2ac2e9e6..420a898698895 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2876,11 +2876,12 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
 }
 
 /*
- * Allow error injection to test balance cancellation
+ * Allow error injection to test balance/relocation cancellation
  */
 noinline int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info)
 {
 	return atomic_read(&fs_info->balance_cancel_req) ||
+		atomic_read(&fs_info->reloc_cancel_req) ||
 		fatal_signal_pending(current);
 }
 ALLOW_ERROR_INJECTION(btrfs_should_cancel_balance, TRUE);
@@ -3780,6 +3781,47 @@ out:
 	return inode;
 }
 
+/*
+ * Mark start of chunk relocation that is cancellable. Check if the cancellation
+ * has been requested meanwhile and don't start in that case.
+ *
+ * Return:
+ *   0             success
+ *   -EINPROGRESS  operation is already in progress, that's probably a bug
+ *   -ECANCELED    cancellation request was set before the operation started
+ */
+static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
+{
+	if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
+		/* This should not happen */
+		btrfs_err(fs_info, "reloc already running, cannot start");
+		return -EINPROGRESS;
+	}
+
+	if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
+		btrfs_info(fs_info, "chunk relocation canceled on start");
+		/*
+		 * On cancel, clear all requests but let the caller mark
+		 * the end after cleanup operations.
+		 */
+		atomic_set(&fs_info->reloc_cancel_req, 0);
+		return -ECANCELED;
+	}
+	return 0;
+}
+
+/*
+ * Mark end of chunk relocation that is cancellable and wake any waiters.
+ */
+static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
+{
+	/* Requested after start, clear bit first so any waiters can continue */
+	if (atomic_read(&fs_info->reloc_cancel_req) > 0)
+		btrfs_info(fs_info, "chunk relocation canceled during operation");
+	clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
+	atomic_set(&fs_info->reloc_cancel_req, 0);
+}
+
 static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
 {
 	struct reloc_control *rc;
@@ -3862,6 +3904,12 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 		return -ENOMEM;
 	}
 
+	ret = reloc_chunk_start(fs_info);
+	if (ret < 0) {
+		err = ret;
+		goto out_put_bg;
+	}
+
 	rc->extent_root = extent_root;
 	rc->block_group = bg;
 
@@ -3952,7 +4000,9 @@ out:
 	if (err && rw)
 		btrfs_dec_block_group_ro(rc->block_group);
 	iput(rc->data_inode);
-	btrfs_put_block_group(rc->block_group);
+out_put_bg:
+	btrfs_put_block_group(bg);
+	reloc_chunk_end(fs_info);
 	free_reloc_control(rc);
 	return err;
 }
@@ -4073,6 +4123,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)
 		goto out;
 	}
 
+	ret = reloc_chunk_start(fs_info);
+	if (ret < 0) {
+		err = ret;
+		goto out_end;
+	}
+
 	rc->extent_root = fs_info->extent_root;
 
 	set_reloc_control(rc);
@@ -4137,6 +4193,8 @@ out_clean:
 		err = ret;
 out_unset:
 	unset_reloc_control(rc);
+out_end:
+	reloc_chunk_end(fs_info);
 	free_reloc_control(rc);
 out:
 	free_reloc_roots(&reloc_roots);
-- 
GitLab


From 578bda9e17fdb6b6eaab1980f87dd1819b123da0 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 18 May 2021 21:05:52 +0200
Subject: [PATCH 3460/3804] btrfs: introduce try-lock semantics for exclusive
 op start

Add try-lock for exclusive operation start to allow callers to do more
checks. The same operation must already be running. The try-lock and
unlock must pair and are a substitute for btrfs_exclop_start, thus it
must also pair with btrfs_exclop_finish to release the exclop context.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h |  3 +++
 fs/btrfs/ioctl.c | 26 ++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b7c36aad45efa..1124fa87e2e9e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3233,6 +3233,9 @@ void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs);
 bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
 			enum btrfs_exclusive_operation type);
+bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
+				 enum btrfs_exclusive_operation type);
+void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
 void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
 
 /* file.c */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c4e710ea08baa..cacd6ee17d8ea 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -371,6 +371,32 @@ bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
+/*
+ * Conditionally allow to enter the exclusive operation in case it's compatible
+ * with the running one.  This must be paired with btrfs_exclop_start_unlock and
+ * btrfs_exclop_finish.
+ *
+ * Compatibility:
+ * - the same type is already running
+ * - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller
+ *   must check the condition first that would allow none -> @type
+ */
+bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
+				 enum btrfs_exclusive_operation type)
+{
+	spin_lock(&fs_info->super_lock);
+	if (fs_info->exclusive_operation == type)
+		return true;
+
+	spin_unlock(&fs_info->super_lock);
+	return false;
+}
+
+void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info)
+{
+	spin_unlock(&fs_info->super_lock);
+}
+
 void btrfs_exclop_finish(struct btrfs_fs_info *fs_info)
 {
 	spin_lock(&fs_info->super_lock);
-- 
GitLab


From 17aaa434ed39cbad48824ef4bb9ec3707091ae5b Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 14 May 2021 21:32:44 +0200
Subject: [PATCH 3461/3804] btrfs: add wrapper for conditional start of
 exclusive operation

To support optional cancellation of some operations, add helper that will
wrap all the combinations. In normal mode it's same as
btrfs_exclop_start, in cancellation mode it checks if it's already
running and request cancellation and waits until completion.

The error codes can be returned to to user space and semantics is not
changed, adding ECANCELED. This should be evaluated as an error and that
the operation has not completed and the operation should be restarted
or the filesystem status reviewed.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cacd6ee17d8ea..572f575619162 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1600,6 +1600,48 @@ out_ra:
 	return ret;
 }
 
+/*
+ * Try to start exclusive operation @type or cancel it if it's running.
+ *
+ * Return:
+ *   0        - normal mode, newly claimed op started
+ *  >0        - normal mode, something else is running,
+ *              return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS to user space
+ * ECANCELED  - cancel mode, successful cancel
+ * ENOTCONN   - cancel mode, operation not running anymore
+ */
+static int exclop_start_or_cancel_reloc(struct btrfs_fs_info *fs_info,
+			enum btrfs_exclusive_operation type, bool cancel)
+{
+	if (!cancel) {
+		/* Start normal op */
+		if (!btrfs_exclop_start(fs_info, type))
+			return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
+		/* Exclusive operation is now claimed */
+		return 0;
+	}
+
+	/* Cancel running op */
+	if (btrfs_exclop_start_try_lock(fs_info, type)) {
+		/*
+		 * This blocks any exclop finish from setting it to NONE, so we
+		 * request cancellation. Either it runs and we will wait for it,
+		 * or it has finished and no waiting will happen.
+		 */
+		atomic_inc(&fs_info->reloc_cancel_req);
+		btrfs_exclop_start_unlock(fs_info);
+
+		if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
+			wait_on_bit(&fs_info->flags, BTRFS_FS_RELOC_RUNNING,
+				    TASK_INTERRUPTIBLE);
+
+		return -ECANCELED;
+	}
+
+	/* Something else is running or none */
+	return -ENOTCONN;
+}
+
 static noinline int btrfs_ioctl_resize(struct file *file,
 					void __user *arg)
 {
-- 
GitLab


From bb059a37c9ff3e40c0348e82a7e3ebd3918cf418 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Tue, 18 May 2021 21:12:33 +0200
Subject: [PATCH 3462/3804] btrfs: add cancellation to resize

Accept literal string "cancel" as resize operation and interpret that
as a request to cancel the running operation. If it's running, wait
until it finishes current work and return ECANCELED.

Shrinking resize uses relocation to move the chunks away, use the
conditional exclusive operation start and cancellation helpers.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 47 ++++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 572f575619162..157ead83d65c8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1659,6 +1659,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 	char *devstr = NULL;
 	int ret = 0;
 	int mod = 0;
+	bool cancel;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1667,20 +1668,23 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 	if (ret)
 		return ret;
 
-	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_RESIZE)) {
-		mnt_drop_write_file(file);
-		return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
-	}
-
+	/*
+	 * Read the arguments before checking exclusivity to be able to
+	 * distinguish regular resize and cancel
+	 */
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args)) {
 		ret = PTR_ERR(vol_args);
-		goto out;
+		goto out_drop;
 	}
-
 	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-
 	sizestr = vol_args->name;
+	cancel = (strcmp("cancel", sizestr) == 0);
+	ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_RESIZE, cancel);
+	if (ret)
+		goto out_free;
+	/* Exclusive operation is now claimed */
+
 	devstr = strchr(sizestr, ':');
 	if (devstr) {
 		sizestr = devstr + 1;
@@ -1688,10 +1692,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 		devstr = vol_args->name;
 		ret = kstrtoull(devstr, 10, &devid);
 		if (ret)
-			goto out_free;
+			goto out_finish;
 		if (!devid) {
 			ret = -EINVAL;
-			goto out_free;
+			goto out_finish;
 		}
 		btrfs_info(fs_info, "resizing devid %llu", devid);
 	}
@@ -1701,7 +1705,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 		btrfs_info(fs_info, "resizer unable to find device %llu",
 			   devid);
 		ret = -ENODEV;
-		goto out_free;
+		goto out_finish;
 	}
 
 	if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
@@ -1709,7 +1713,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 			   "resizer unable to apply on readonly device %llu",
 		       devid);
 		ret = -EPERM;
-		goto out_free;
+		goto out_finish;
 	}
 
 	if (!strcmp(sizestr, "max"))
@@ -1725,13 +1729,13 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 		new_size = memparse(sizestr, &retptr);
 		if (*retptr != '\0' || new_size == 0) {
 			ret = -EINVAL;
-			goto out_free;
+			goto out_finish;
 		}
 	}
 
 	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
 		ret = -EPERM;
-		goto out_free;
+		goto out_finish;
 	}
 
 	old_size = btrfs_device_get_total_bytes(device);
@@ -1739,24 +1743,24 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 	if (mod < 0) {
 		if (new_size > old_size) {
 			ret = -EINVAL;
-			goto out_free;
+			goto out_finish;
 		}
 		new_size = old_size - new_size;
 	} else if (mod > 0) {
 		if (new_size > ULLONG_MAX - old_size) {
 			ret = -ERANGE;
-			goto out_free;
+			goto out_finish;
 		}
 		new_size = old_size + new_size;
 	}
 
 	if (new_size < SZ_256M) {
 		ret = -EINVAL;
-		goto out_free;
+		goto out_finish;
 	}
 	if (new_size > device->bdev->bd_inode->i_size) {
 		ret = -EFBIG;
-		goto out_free;
+		goto out_finish;
 	}
 
 	new_size = round_down(new_size, fs_info->sectorsize);
@@ -1765,7 +1769,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 		trans = btrfs_start_transaction(root, 0);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
-			goto out_free;
+			goto out_finish;
 		}
 		ret = btrfs_grow_device(trans, device, new_size);
 		btrfs_commit_transaction(trans);
@@ -1778,10 +1782,11 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 			"resize device %s (devid %llu) from %llu to %llu",
 			rcu_str_deref(device->name), device->devid,
 			old_size, new_size);
+out_finish:
+	btrfs_exclop_finish(fs_info);
 out_free:
 	kfree(vol_args);
-out:
-	btrfs_exclop_finish(fs_info);
+out_drop:
 	mnt_drop_write_file(file);
 	return ret;
 }
-- 
GitLab


From 67ae34b69c4146e40f3828ecb59ff00a840c01dc Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 14 May 2021 21:21:27 +0200
Subject: [PATCH 3463/3804] btrfs: add device delete cancel

Accept device name "cancel" as a request to cancel running device
deletion operation. The string is literal, in case there's a real device
named "cancel", pass it as full absolute path or as "./cancel"

This works for v1 and v2 ioctls when the device is specified by name.
Moving chunks from the device uses relocation, use the conditional
exclusive operation start and cancellation helpers

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 43 ++++++++++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 157ead83d65c8..2bdaf20181977 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3206,6 +3206,7 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ioctl_vol_args_v2 *vol_args;
 	int ret;
+	bool cancel = false;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -3224,18 +3225,22 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
+	vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+	if (!(vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) &&
+	    strcmp("cancel", vol_args->name) == 0)
+		cancel = true;
 
-	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) {
-		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
+	ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE,
+					   cancel);
+	if (ret)
 		goto out;
-	}
+	/* Exclusive operation is now claimed */
 
-	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
+	if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
 		ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
-	} else {
-		vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+	else
 		ret = btrfs_rm_device(fs_info, vol_args->name, 0);
-	}
+
 	btrfs_exclop_finish(fs_info);
 
 	if (!ret) {
@@ -3259,6 +3264,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_ioctl_vol_args *vol_args;
 	int ret;
+	bool cancel;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -3267,25 +3273,24 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
 	if (ret)
 		return ret;
 
-	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_REMOVE)) {
-		ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
-		goto out_drop_write;
-	}
-
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args)) {
 		ret = PTR_ERR(vol_args);
-		goto out;
+		goto out_drop_write;
 	}
-
 	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-	ret = btrfs_rm_device(fs_info, vol_args->name, 0);
+	cancel = (strcmp("cancel", vol_args->name) == 0);
+
+	ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE,
+					   cancel);
+	if (ret == 0) {
+		ret = btrfs_rm_device(fs_info, vol_args->name, 0);
+		if (!ret)
+			btrfs_info(fs_info, "disk deleted %s", vol_args->name);
+		btrfs_exclop_finish(fs_info);
+	}
 
-	if (!ret)
-		btrfs_info(fs_info, "disk deleted %s", vol_args->name);
 	kfree(vol_args);
-out:
-	btrfs_exclop_finish(fs_info);
 out_drop_write:
 	mnt_drop_write_file(file);
 
-- 
GitLab


From b590b839720cf4fa46798ee6e950ed7369f52a15 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 28 May 2021 11:37:32 +0100
Subject: [PATCH 3464/3804] btrfs: avoid unnecessary logging of xattrs during
 fast fsyncs

When logging an inode we always log all its xattrs, so that we are able
to figure out which ones should be deleted during log replay. However this
is unnecessary when we are doing a fast fsync and no xattrs were added,
changed or deleted since the last time we logged the inode in the current
transaction.

So skip the logging of xattrs when the inode was previously logged in the
current transaction and no xattrs were added, changed or deleted. If any
changes to xattrs happened, than the inode has BTRFS_INODE_COPY_EVERYTHING
set in its runtime flags and the xattrs get logged. This saves time on
scanning for xattrs, allocating memory, COWing log tree extent buffers and
adding more lock contention on the extent buffers when there are multiple
tasks logging in parallel.

The use of xattrs is common when using ACLs, some applications, or when
using security modules like SELinux where every inode gets a security
xattr added to it.

The following test script, using fio, was used on a box with 12 cores, 64G
of RAM, a NVMe device and the default non-debug kernel config from Debian.
It uses 8 concurrent jobs each writing in blocks of 64K to its own 4G file,
each file with a single xattr of 50 bytes (about the same size for an ACL
or SELinux xattr), doing random buffered writes with an fsync after each
write.

   $ cat test.sh
   #!/bin/bash

   DEV=/dev/nvme0n1
   MNT=/mnt/test
   MOUNT_OPTIONS="-o ssd"
   MKFS_OPTIONS="-d single -m single"

   NUM_JOBS=8
   FILE_SIZE=4G

   cat <<EOF > /tmp/fio-job.ini
   [writers]
   rw=randwrite
   fsync=1
   fallocate=none
   group_reporting=1
   direct=0
   bs=64K
   ioengine=sync
   size=$FILE_SIZE
   directory=$MNT
   numjobs=$NUM_JOBS
   EOF

   echo "performance" | \
       tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

   mkfs.btrfs -f $MKFS_OPTIONS $DEV > /dev/null
   mount $MOUNT_OPTIONS $DEV $MNT

   echo "Creating files before fio runs, each with 1 xattr of 50 bytes"
   for ((i = 0; i < $NUM_JOBS; i++)); do
       path="$MNT/writers.$i.0"
       truncate -s $FILE_SIZE $path
       setfattr -n user.xa1 -v $(printf '%0.sX' $(seq 50)) $path
   done

   fio /tmp/fio-job.ini
   umount $MNT

fio output before this change:

WRITE: bw=120MiB/s (126MB/s), 120MiB/s-120MiB/s (126MB/s-126MB/s), io=32.0GiB (34.4GB), run=272145-272145msec

fio output after this change:

WRITE: bw=142MiB/s (149MB/s), 142MiB/s-142MiB/s (149MB/s-149MB/s), io=32.0GiB (34.4GB), run=230408-230408msec

+16.8% throughput, -16.6% runtime

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 5c1d58706fa91..cab451d19547a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5467,13 +5467,23 @@ log_extents:
 	btrfs_release_path(dst_path);
 	if (need_log_inode_item) {
 		err = log_inode_item(trans, log, dst_path, inode);
-		if (!err && !xattrs_logged) {
+		if (err)
+			goto out_unlock;
+		/*
+		 * If we are doing a fast fsync and the inode was logged before
+		 * in this transaction, we don't need to log the xattrs because
+		 * they were logged before. If xattrs were added, changed or
+		 * deleted since the last time we logged the inode, then we have
+		 * already logged them because the inode had the runtime flag
+		 * BTRFS_INODE_COPY_EVERYTHING set.
+		 */
+		if (!xattrs_logged && inode->logged_trans < trans->transid) {
 			err = btrfs_log_all_xattrs(trans, root, inode, path,
 						   dst_path);
+			if (err)
+				goto out_unlock;
 			btrfs_release_path(path);
 		}
-		if (err)
-			goto out_unlock;
 	}
 	if (fast_search) {
 		ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
-- 
GitLab


From 1d08ce58406d1cd6222fca72144146c7ee1450ec Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Sat, 29 May 2021 17:48:33 +0800
Subject: [PATCH 3465/3804] btrfs: reduce the variable size to fit nr_pages

Patch "btrfs: reduce compressed_bio member's types" reduced the
@nr_pages size to unsigned int, its cascading effects are updated here.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c006f5d81c2a5..2114cf2c684d1 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -149,7 +149,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
 	const u32 csum_size = fs_info->csum_size;
 	const u32 sectorsize = fs_info->sectorsize;
 	struct page *page;
-	unsigned long i;
+	unsigned int i;
 	char *kaddr;
 	u8 csum[BTRFS_CSUM_SIZE];
 	struct compressed_bio *cb = bio->bi_private;
@@ -208,7 +208,7 @@ static void end_compressed_bio_read(struct bio *bio)
 	struct compressed_bio *cb = bio->bi_private;
 	struct inode *inode;
 	struct page *page;
-	unsigned long index;
+	unsigned int index;
 	unsigned int mirror = btrfs_io_bio(bio)->mirror_num;
 	int ret = 0;
 
@@ -334,7 +334,7 @@ static void end_compressed_bio_write(struct bio *bio)
 	struct compressed_bio *cb = bio->bi_private;
 	struct inode *inode;
 	struct page *page;
-	unsigned long index;
+	unsigned int index;
 
 	if (bio->bi_status)
 		cb->errors = 1;
-- 
GitLab


From 356b4a2dc151c65e5abce07b7c0e4a146769892b Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Sat, 29 May 2021 17:48:34 +0800
Subject: [PATCH 3466/3804] btrfs: optimize variables size in
 btrfs_submit_compressed_read

Patch "btrfs: reduce compressed_bio member's types" reduced some
member's size. Declare the variables @compressed_len, @nr_pages and
@pg_index size as an unsigned int in the function
btrfs_submit_compressed_read.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 2114cf2c684d1..c527ae858d37e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -669,9 +669,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct extent_map_tree *em_tree;
 	struct compressed_bio *cb;
-	unsigned long compressed_len;
-	unsigned long nr_pages;
-	unsigned long pg_index;
+	unsigned int compressed_len;
+	unsigned int nr_pages;
+	unsigned int pg_index;
 	struct page *page;
 	struct bio *comp_bio;
 	u64 cur_disk_byte = bio->bi_iter.bi_sector << 9;
-- 
GitLab


From 65b5355f77082804949390dc2629256c8c24f69d Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Sat, 29 May 2021 17:48:35 +0800
Subject: [PATCH 3467/3804] btrfs: optimize variables size in
 btrfs_submit_compressed_write

Patch "btrfs: reduce compressed_bio member's types" reduced some
member's size. Function arguments @len, @compressed_len and @nr_pages
can be declared as unsigned int.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 6 +++---
 fs/btrfs/compression.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c527ae858d37e..36731b5987702 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -387,10 +387,10 @@ out:
  * the end io hooks.
  */
 blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
-				 unsigned long len, u64 disk_start,
-				 unsigned long compressed_len,
+				 unsigned int len, u64 disk_start,
+				 unsigned int compressed_len,
 				 struct page **compressed_pages,
-				 unsigned long nr_pages,
+				 unsigned int nr_pages,
 				 unsigned int write_flags,
 				 struct cgroup_subsys_state *blkcg_css)
 {
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 00d8439048c9a..c359f20920d0a 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -91,10 +91,10 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 			      struct bio *bio);
 
 blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
-				  unsigned long len, u64 disk_start,
-				  unsigned long compressed_len,
+				  unsigned int len, u64 disk_start,
+				  unsigned int compressed_len,
 				  struct page **compressed_pages,
-				  unsigned long nr_pages,
+				  unsigned int nr_pages,
 				  unsigned int write_flags,
 				  struct cgroup_subsys_state *blkcg_css);
 blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
-- 
GitLab


From 4183abf6cbfd8e71c5e19df697d8e43f1a2a6908 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Sat, 29 May 2021 17:48:36 +0800
Subject: [PATCH 3468/3804] btrfs: fix comment about max_out in
 btrfs_compress_pages

Commit e5d74902362f ("btrfs: derive maximum output size in the
compression implementation") removed @max_out argument in
btrfs_compress_pages() but its comment remained, remove it.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 36731b5987702..fdead01568f68 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1194,9 +1194,6 @@ static unsigned int btrfs_compress_set_level(int type, unsigned level)
  *
  * @total_out is an in/out parameter, must be set to the input length and will
  * be also used to return the total number of compressed bytes
- *
- * @max_out tells us the max number of bytes that we're allowed to
- * stuff into pages
  */
 int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
 			 u64 start, struct page **pages,
-- 
GitLab


From ec87b42f7095a92e484e34c2c9bb486ae79d6548 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 31 May 2021 10:37:03 +0300
Subject: [PATCH 3469/3804] btrfs: use list_last_entry in add_falloc_range

Instead of calling list_entry with head->prev simply call
list_last_entry which makes it obvious which member of the list is
being referred. This allows to remove the extra 'prev' pointer.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 55f68422061d1..a56a13999bd66 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3034,7 +3034,6 @@ struct falloc_range {
  */
 static int add_falloc_range(struct list_head *head, u64 start, u64 len)
 {
-	struct falloc_range *prev = NULL;
 	struct falloc_range *range = NULL;
 
 	if (list_empty(head))
@@ -3044,9 +3043,9 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
 	 * As fallocate iterate by bytenr order, we only need to check
 	 * the last range.
 	 */
-	prev = list_entry(head->prev, struct falloc_range, list);
-	if (prev->start + prev->len == start) {
-		prev->len += len;
+	range = list_last_entry(head, struct falloc_range, list);
+	if (range->start + range->len == start) {
+		range->len += len;
 		return 0;
 	}
 insert:
-- 
GitLab


From 8df507cbb5952719353c912a021b66c27641e90c Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Thu, 22 Apr 2021 19:02:46 +0800
Subject: [PATCH 3470/3804] btrfs: scrub: fix subpage repair error caused by
 hard coded PAGE_SIZE

[BUG]
For the following file layout, scrub will not be able to repair all
these two repairable error, but in fact make one corruption even
unrepairable:

	  inode offset 0      4k     8K
Mirror 1               |XXXXXX|      |
Mirror 2               |      |XXXXXX|

[CAUSE]
The root cause is the hard coded PAGE_SIZE, which makes scrub repair to
go crazy for subpage.

For above case, when reading the first sector, we use PAGE_SIZE other
than sectorsize to read, which makes us to read the full range [0, 64K).
In fact, after 8K there may be no data at all, we can just get some
garbage.

Then when doing the repair, we also writeback a full page from mirror 2,
this means, we will also writeback the corrupted data in mirror 2 back
to mirror 1, leaving the range [4K, 8K) unrepairable.

[FIX]
This patch will modify the following PAGE_SIZE use with sectorsize:

- scrub_print_warning_inode()
  Remove the min() and replace PAGE_SIZE with sectorsize.
  The min() makes no sense, as csum is done for the full sector with
  padding.

  This fixes a bug that subpage report extra length like:
   checksum error at logical 298844160 on dev /dev/mapper/arm_nvme-test,
   physical 575668224, root 5, inode 257, offset 0, length 12288, links 1 (path: file)

  Where the error is only 1 sector.

- scrub_handle_errored_block()
  Comments with PAGE|page involved, all changed to sector.

- scrub_setup_recheck_block()
- scrub_repair_page_from_good_copy()
- scrub_add_page_to_wr_bio()
- scrub_wr_submit()
- scrub_add_page_to_rd_bio()
- scrub_block_complete()
  Replace PAGE_SIZE with sectorsize.
  This solves several problems where we read/write extra range for
  subpage case.

RAID56 code is excluded intentionally, as RAID56 has extra PAGE_SIZE
usage, and is not really safe enough.
Thus we will reject RAID56 for subpage in later commit.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/scrub.c | 82 +++++++++++++++++++++++++-----------------------
 1 file changed, 42 insertions(+), 40 deletions(-)

diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 5839ad1e25a21..b60466db5654e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -631,7 +631,6 @@ nomem:
 static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
 				     void *warn_ctx)
 {
-	u64 isize;
 	u32 nlink;
 	int ret;
 	int i;
@@ -667,7 +666,6 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
 	eb = swarn->path->nodes[0];
 	inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
 					struct btrfs_inode_item);
-	isize = btrfs_inode_size(eb, inode_item);
 	nlink = btrfs_inode_nlink(eb, inode_item);
 	btrfs_release_path(swarn->path);
 
@@ -696,12 +694,12 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
 	 */
 	for (i = 0; i < ipath->fspath->elem_cnt; ++i)
 		btrfs_warn_in_rcu(fs_info,
-"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)",
+"%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %u, links %u (path: %s)",
 				  swarn->errstr, swarn->logical,
 				  rcu_str_deref(swarn->dev->name),
 				  swarn->physical,
 				  root, inum, offset,
-				  min(isize - offset, (u64)PAGE_SIZE), nlink,
+				  fs_info->sectorsize, nlink,
 				  (char *)(unsigned long)ipath->fspath->val[i]);
 
 	btrfs_put_root(local_root);
@@ -890,25 +888,25 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 	 * read all mirrors one after the other. This includes to
 	 * re-read the extent or metadata block that failed (that was
 	 * the cause that this fixup code is called) another time,
-	 * page by page this time in order to know which pages
+	 * sector by sector this time in order to know which sectors
 	 * caused I/O errors and which ones are good (for all mirrors).
 	 * It is the goal to handle the situation when more than one
 	 * mirror contains I/O errors, but the errors do not
 	 * overlap, i.e. the data can be repaired by selecting the
-	 * pages from those mirrors without I/O error on the
-	 * particular pages. One example (with blocks >= 2 * PAGE_SIZE)
-	 * would be that mirror #1 has an I/O error on the first page,
-	 * the second page is good, and mirror #2 has an I/O error on
-	 * the second page, but the first page is good.
-	 * Then the first page of the first mirror can be repaired by
-	 * taking the first page of the second mirror, and the
-	 * second page of the second mirror can be repaired by
-	 * copying the contents of the 2nd page of the 1st mirror.
-	 * One more note: if the pages of one mirror contain I/O
+	 * sectors from those mirrors without I/O error on the
+	 * particular sectors. One example (with blocks >= 2 * sectorsize)
+	 * would be that mirror #1 has an I/O error on the first sector,
+	 * the second sector is good, and mirror #2 has an I/O error on
+	 * the second sector, but the first sector is good.
+	 * Then the first sector of the first mirror can be repaired by
+	 * taking the first sector of the second mirror, and the
+	 * second sector of the second mirror can be repaired by
+	 * copying the contents of the 2nd sector of the 1st mirror.
+	 * One more note: if the sectors of one mirror contain I/O
 	 * errors, the checksum cannot be verified. In order to get
 	 * the best data for repairing, the first attempt is to find
 	 * a mirror without I/O errors and with a validated checksum.
-	 * Only if this is not possible, the pages are picked from
+	 * Only if this is not possible, the sectors are picked from
 	 * mirrors with I/O errors without considering the checksum.
 	 * If the latter is the case, at the end, the checksum of the
 	 * repaired area is verified in order to correctly maintain
@@ -1065,26 +1063,26 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 
 	/*
 	 * In case of I/O errors in the area that is supposed to be
-	 * repaired, continue by picking good copies of those pages.
-	 * Select the good pages from mirrors to rewrite bad pages from
+	 * repaired, continue by picking good copies of those sectors.
+	 * Select the good sectors from mirrors to rewrite bad sectors from
 	 * the area to fix. Afterwards verify the checksum of the block
 	 * that is supposed to be repaired. This verification step is
 	 * only done for the purpose of statistic counting and for the
 	 * final scrub report, whether errors remain.
 	 * A perfect algorithm could make use of the checksum and try
-	 * all possible combinations of pages from the different mirrors
+	 * all possible combinations of sectors from the different mirrors
 	 * until the checksum verification succeeds. For example, when
-	 * the 2nd page of mirror #1 faces I/O errors, and the 2nd page
+	 * the 2nd sector of mirror #1 faces I/O errors, and the 2nd sector
 	 * of mirror #2 is readable but the final checksum test fails,
-	 * then the 2nd page of mirror #3 could be tried, whether now
+	 * then the 2nd sector of mirror #3 could be tried, whether now
 	 * the final checksum succeeds. But this would be a rare
 	 * exception and is therefore not implemented. At least it is
 	 * avoided that the good copy is overwritten.
 	 * A more useful improvement would be to pick the sectors
 	 * without I/O error based on sector sizes (512 bytes on legacy
-	 * disks) instead of on PAGE_SIZE. Then maybe 512 byte of one
+	 * disks) instead of on sectorsize. Then maybe 512 byte of one
 	 * mirror could be repaired by taking 512 byte of a different
-	 * mirror, even if other 512 byte sectors in the same PAGE_SIZE
+	 * mirror, even if other 512 byte sectors in the same sectorsize
 	 * area are unreadable.
 	 */
 	success = 1;
@@ -1265,7 +1263,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 {
 	struct scrub_ctx *sctx = original_sblock->sctx;
 	struct btrfs_fs_info *fs_info = sctx->fs_info;
-	u64 length = original_sblock->page_count * PAGE_SIZE;
+	u64 length = original_sblock->page_count * fs_info->sectorsize;
 	u64 logical = original_sblock->pagev[0]->logical;
 	u64 generation = original_sblock->pagev[0]->generation;
 	u64 flags = original_sblock->pagev[0]->flags;
@@ -1288,13 +1286,13 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
 	 */
 
 	while (length > 0) {
-		sublen = min_t(u64, length, PAGE_SIZE);
+		sublen = min_t(u64, length, fs_info->sectorsize);
 		mapped_length = sublen;
 		bbio = NULL;
 
 		/*
-		 * with a length of PAGE_SIZE, each returned stripe
-		 * represents one mirror
+		 * With a length of sectorsize, each returned stripe represents
+		 * one mirror
 		 */
 		btrfs_bio_counter_inc_blocked(fs_info);
 		ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
@@ -1485,7 +1483,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
 		bio = btrfs_io_bio_alloc(1);
 		bio_set_dev(bio, spage->dev->bdev);
 
-		bio_add_page(bio, spage->page, PAGE_SIZE, 0);
+		bio_add_page(bio, spage->page, fs_info->sectorsize, 0);
 		bio->bi_iter.bi_sector = spage->physical >> 9;
 		bio->bi_opf = REQ_OP_READ;
 
@@ -1549,6 +1547,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
 	struct scrub_page *spage_bad = sblock_bad->pagev[page_num];
 	struct scrub_page *spage_good = sblock_good->pagev[page_num];
 	struct btrfs_fs_info *fs_info = sblock_bad->sctx->fs_info;
+	const u32 sectorsize = fs_info->sectorsize;
 
 	BUG_ON(spage_bad->page == NULL);
 	BUG_ON(spage_good->page == NULL);
@@ -1568,8 +1567,8 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
 		bio->bi_iter.bi_sector = spage_bad->physical >> 9;
 		bio->bi_opf = REQ_OP_WRITE;
 
-		ret = bio_add_page(bio, spage_good->page, PAGE_SIZE, 0);
-		if (PAGE_SIZE != ret) {
+		ret = bio_add_page(bio, spage_good->page, sectorsize, 0);
+		if (ret != sectorsize) {
 			bio_put(bio);
 			return -EIO;
 		}
@@ -1647,6 +1646,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
 {
 	struct scrub_bio *sbio;
 	int ret;
+	const u32 sectorsize = sctx->fs_info->sectorsize;
 
 	mutex_lock(&sctx->wr_lock);
 again:
@@ -1686,16 +1686,16 @@ again:
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio->bi_opf = REQ_OP_WRITE;
 		sbio->status = 0;
-	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
+	} else if (sbio->physical + sbio->page_count * sectorsize !=
 		   spage->physical_for_dev_replace ||
-		   sbio->logical + sbio->page_count * PAGE_SIZE !=
+		   sbio->logical + sbio->page_count * sectorsize !=
 		   spage->logical) {
 		scrub_wr_submit(sctx);
 		goto again;
 	}
 
-	ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
-	if (ret != PAGE_SIZE) {
+	ret = bio_add_page(sbio->bio, spage->page, sectorsize, 0);
+	if (ret != sectorsize) {
 		if (sbio->page_count < 1) {
 			bio_put(sbio->bio);
 			sbio->bio = NULL;
@@ -1734,7 +1734,8 @@ static void scrub_wr_submit(struct scrub_ctx *sctx)
 	btrfsic_submit_bio(sbio->bio);
 
 	if (btrfs_is_zoned(sctx->fs_info))
-		sctx->write_pointer = sbio->physical + sbio->page_count * PAGE_SIZE;
+		sctx->write_pointer = sbio->physical + sbio->page_count *
+			sctx->fs_info->sectorsize;
 }
 
 static void scrub_wr_bio_end_io(struct bio *bio)
@@ -2072,6 +2073,7 @@ static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
 {
 	struct scrub_block *sblock = spage->sblock;
 	struct scrub_bio *sbio;
+	const u32 sectorsize = sctx->fs_info->sectorsize;
 	int ret;
 
 again:
@@ -2110,9 +2112,9 @@ again:
 		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		bio->bi_opf = REQ_OP_READ;
 		sbio->status = 0;
-	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
+	} else if (sbio->physical + sbio->page_count * sectorsize !=
 		   spage->physical ||
-		   sbio->logical + sbio->page_count * PAGE_SIZE !=
+		   sbio->logical + sbio->page_count * sectorsize !=
 		   spage->logical ||
 		   sbio->dev != spage->dev) {
 		scrub_submit(sctx);
@@ -2120,8 +2122,8 @@ again:
 	}
 
 	sbio->pagev[sbio->page_count] = spage;
-	ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0);
-	if (ret != PAGE_SIZE) {
+	ret = bio_add_page(sbio->bio, spage->page, sectorsize, 0);
+	if (ret != sectorsize) {
 		if (sbio->page_count < 1) {
 			bio_put(sbio->bio);
 			sbio->bio = NULL;
@@ -2464,7 +2466,7 @@ static void scrub_block_complete(struct scrub_block *sblock)
 	if (sblock->sparity && corrupted && !sblock->data_corrected) {
 		u64 start = sblock->pagev[0]->logical;
 		u64 end = sblock->pagev[sblock->page_count - 1]->logical +
-			  PAGE_SIZE;
+			  sblock->sctx->fs_info->sectorsize;
 
 		ASSERT(end - start <= U32_MAX);
 		scrub_parity_mark_sectors_error(sblock->sparity,
-- 
GitLab


From 0044ae11e8be86b5e39857d47017417d4cda00f2 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 13 Apr 2021 14:23:14 +0800
Subject: [PATCH 3471/3804] btrfs: make free space cache size consistent across
 different PAGE_SIZE

Currently free space cache inode size is determined by two factors:

- block group size
- PAGE_SIZE

This means, for the same sized block groups, with different PAGE_SIZE,
it will result in different inode sizes.

This will not be a good thing for subpage support, so change the
requirement for PAGE_SIZE to sectorsize.

Now for the same 4K sectorsize btrfs, it should result the same inode
size no matter what the PAGE_SIZE is.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 6d5c4e45cfef0..c42b6528552ff 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -2505,7 +2505,7 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
 	struct extent_changeset *data_reserved = NULL;
 	u64 alloc_hint = 0;
 	int dcs = BTRFS_DC_ERROR;
-	u64 num_pages = 0;
+	u64 cache_size = 0;
 	int retries = 0;
 	int ret = 0;
 
@@ -2617,20 +2617,20 @@ again:
 	 * taking up quite a bit since it's not folded into the other space
 	 * cache.
 	 */
-	num_pages = div_u64(block_group->length, SZ_256M);
-	if (!num_pages)
-		num_pages = 1;
+	cache_size = div_u64(block_group->length, SZ_256M);
+	if (!cache_size)
+		cache_size = 1;
 
-	num_pages *= 16;
-	num_pages *= PAGE_SIZE;
+	cache_size *= 16;
+	cache_size *= fs_info->sectorsize;
 
 	ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
-					  num_pages);
+					  cache_size);
 	if (ret)
 		goto out_put;
 
-	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
-					      num_pages, num_pages,
+	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, cache_size,
+					      cache_size, cache_size,
 					      &alloc_hint);
 	/*
 	 * Our cache requires contiguous chunks so that we don't modify a bunch
-- 
GitLab


From 43c0d1a5e117954b8193912939eb01390b2f01f2 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 13 Apr 2021 17:58:48 +0800
Subject: [PATCH 3472/3804] btrfs: remove the unused parameter @len for
 btrfs_bio_fits_in_stripe()

The parameter @len is not really used in btrfs_bio_fits_in_stripe(),
just remove it.

It got removed in 420343131970 ("btrfs: let callers of
btrfs_get_io_geometry pass the em"), before that btrfs_get_chunk_map
utilized it.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c   | 5 ++---
 fs/btrfs/volumes.c | 6 +++---
 fs/btrfs/volumes.h | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3cd5286572d40..bd09ad1fed07b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2207,8 +2207,7 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
 	em = btrfs_get_chunk_map(fs_info, logical, map_length);
 	if (IS_ERR(em))
 		return PTR_ERR(em);
-	ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical,
-				    map_length, &geom);
+	ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical, &geom);
 	if (ret < 0)
 		goto out;
 
@@ -8197,7 +8196,7 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
 			goto out_err_em;
 		}
 		ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(dio_bio),
-					    logical, submit_len, &geom);
+					    logical, &geom);
 		if (ret) {
 			status = errno_to_blk_status(ret);
 			goto out_err_em;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e020447b25a24..80e962788396a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6127,17 +6127,17 @@ static bool need_full_stripe(enum btrfs_map_op op)
  * @em:      mapping containing the logical extent
  * @op:      type of operation - write or read
  * @logical: address that we want to figure out the geometry of
- * @len:     the length of IO we are going to perform, starting at @logical
  * @io_geom: pointer used to return values
  *
  * Returns < 0 in case a chunk for the given logical address cannot be found,
  * usually shouldn't happen unless @logical is corrupted, 0 otherwise.
  */
 int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
-			  enum btrfs_map_op op, u64 logical, u64 len,
+			  enum btrfs_map_op op, u64 logical,
 			  struct btrfs_io_geometry *io_geom)
 {
 	struct map_lookup *map;
+	u64 len;
 	u64 offset;
 	u64 stripe_offset;
 	u64 stripe_nr;
@@ -6243,7 +6243,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
 	em = btrfs_get_chunk_map(fs_info, logical, *length);
 	ASSERT(!IS_ERR(em));
 
-	ret = btrfs_get_io_geometry(fs_info, em, op, logical, *length, &geom);
+	ret = btrfs_get_io_geometry(fs_info, em, op, logical, &geom);
 	if (ret < 0)
 		return ret;
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 063ce999b9d3b..c7fc7caf575c0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -446,7 +446,7 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
 		     u64 logical, u64 *length,
 		     struct btrfs_bio **bbio_ret);
 int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *map,
-			  enum btrfs_map_op op, u64 logical, u64 len,
+			  enum btrfs_map_op op, u64 logical,
 			  struct btrfs_io_geometry *io_geom);
 int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
 int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
-- 
GitLab


From 1a0b5c4d6445abcbdc95cff4aa4e1dc9e565607a Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 13 Apr 2021 18:00:47 +0800
Subject: [PATCH 3473/3804] btrfs: allow btrfs_bio_fits_in_stripe() to accept
 bio without any page

Function btrfs_bio_fits_in_stripe() now requires a bio with at least one
page added.  Or btrfs_get_chunk_map() will fail with -ENOENT.

But in fact this requirement is not needed at all, as we can just pass
sectorsize for btrfs_get_chunk_map().

This tiny behavior change is important for later subpage refactoring on
submit_extent_page().

As for 64K page size, we can have a page range with pgoff=0 and size=64K.
If the logical bytenr is just 16K before the stripe boundary, we have to
split the page range into two bios.

This means, we must check page range against stripe boundary, even adding
the range to an empty bio.

This tiny refactoring is for the incoming changes, but on its own,
regular sectorsize == PAGE_SIZE is not affected anyway.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bd09ad1fed07b..3a87f928d9ce6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2193,25 +2193,22 @@ int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
 	struct inode *inode = page->mapping->host;
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	u64 logical = bio->bi_iter.bi_sector << 9;
+	u32 bio_len = bio->bi_iter.bi_size;
 	struct extent_map *em;
-	u64 length = 0;
-	u64 map_length;
 	int ret = 0;
 	struct btrfs_io_geometry geom;
 
 	if (bio_flags & EXTENT_BIO_COMPRESSED)
 		return 0;
 
-	length = bio->bi_iter.bi_size;
-	map_length = length;
-	em = btrfs_get_chunk_map(fs_info, logical, map_length);
+	em = btrfs_get_chunk_map(fs_info, logical, fs_info->sectorsize);
 	if (IS_ERR(em))
 		return PTR_ERR(em);
 	ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), logical, &geom);
 	if (ret < 0)
 		goto out;
 
-	if (geom.len < length + size)
+	if (geom.len < bio_len + size)
 		ret = 1;
 out:
 	free_extent_map(em);
-- 
GitLab


From 390ed29b817e6de4e8a9dd1749659e7de8ed1c4c Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 14 Apr 2021 16:42:15 +0800
Subject: [PATCH 3474/3804] btrfs: refactor submit_extent_page() to make bio
 and its flag tracing easier

There is a lot of code inside extent_io.c needs both "struct bio
**bio_ret" and "unsigned long prev_bio_flags", along with some
parameters like "unsigned long bio_flags".

Such strange parameters are here for bio assembly.

For example, we have such inode page layout:

  0       4K      8K      12K
  |<-- Extent A-->|<- EB->|

Then what we do is:

- Page [0, 4K)
  *bio_ret = NULL
  So we allocate a new bio to bio_ret,
  Add page [0, 4K) to *bio_ret.

- Page [4K, 8K)
  *bio_ret != NULL
  We found this page is continuous to *bio_ret,
  and if we're not at stripe boundary, we
  add page [4K, 8K) to *bio_ret.

- Page [8K, 12K)
  *bio_ret != NULL
  But we found this page is not continuous, so
  we submit *bio_ret, then allocate a new bio,
  and add page [8K, 12K) to the new bio.

This means we need to record both the bio and its bio_flag, but we
record them manually using those strange parameter list, other than
encapsulating them into their own structure.

So this patch will introduce a new structure, btrfs_bio_ctrl, to record
both the bio, and its bio_flags.

Also, in above case, for all pages added to the bio, we need to check if
the new page crosses stripe boundary.  This check itself can be time
consuming, and we don't really need to do that for each page.

This patch also integrates the stripe boundary check into btrfs_bio_ctrl.
When a new bio is allocated, the stripe and ordered extent boundary is
also calculated, so no matter how large the bio will be, we only
calculate the boundaries once, to save some CPU time.

The following functions/structures are affected:

- struct extent_page_data
  Replace its bio pointer with structure btrfs_bio_ctrl (embedded
  structure, not pointer)

- end_write_bio()
- flush_write_bio()
  Just change how bio is fetched

- btrfs_bio_add_page()
  Use pre-calculated boundaries instead of re-calculating them.
  And use @bio_ctrl to replace @bio and @prev_bio_flags.

- calc_bio_boundaries()
  New function

- submit_extent_page() callers
- btrfs_do_readpage() callers
- contiguous_readpages() callers
  To Use @bio_ctrl to replace @bio and @prev_bio_flags, and how to grab
  bio.

- btrfs_bio_fits_in_ordered_extent()
  Removed, as now the ordered extent size limit is done at bio
  allocation time, no need to check for each page range.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h     |   2 -
 fs/btrfs/extent_io.c | 212 +++++++++++++++++++++++++++----------------
 fs/btrfs/extent_io.h |  13 ++-
 fs/btrfs/inode.c     |  36 +-------
 4 files changed, 152 insertions(+), 111 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1124fa87e2e9e..ed5bc25bbcecd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3157,8 +3157,6 @@ void btrfs_split_delalloc_extent(struct inode *inode,
 				 struct extent_state *orig, u64 split);
 int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
 			     unsigned long bio_flags);
-bool btrfs_bio_fits_in_ordered_extent(struct page *page, struct bio *bio,
-				      unsigned int size);
 void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end);
 vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e924f60ea6f6..11b1d8f2ff23d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -136,7 +136,7 @@ struct tree_entry {
 };
 
 struct extent_page_data {
-	struct bio *bio;
+	struct btrfs_bio_ctrl bio_ctrl;
 	/* tells writepage not to lock the state bits for this range
 	 * it still does the unlocking
 	 */
@@ -185,10 +185,12 @@ int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 /* Cleanup unsubmitted bios */
 static void end_write_bio(struct extent_page_data *epd, int ret)
 {
-	if (epd->bio) {
-		epd->bio->bi_status = errno_to_blk_status(ret);
-		bio_endio(epd->bio);
-		epd->bio = NULL;
+	struct bio *bio = epd->bio_ctrl.bio;
+
+	if (bio) {
+		bio->bi_status = errno_to_blk_status(ret);
+		bio_endio(bio);
+		epd->bio_ctrl.bio = NULL;
 	}
 }
 
@@ -201,9 +203,10 @@ static void end_write_bio(struct extent_page_data *epd, int ret)
 static int __must_check flush_write_bio(struct extent_page_data *epd)
 {
 	int ret = 0;
+	struct bio *bio = epd->bio_ctrl.bio;
 
-	if (epd->bio) {
-		ret = submit_one_bio(epd->bio, 0, 0);
+	if (bio) {
+		ret = submit_one_bio(bio, 0, 0);
 		/*
 		 * Clean up of epd->bio is handled by its endio function.
 		 * And endio is either triggered by successful bio execution
@@ -211,7 +214,7 @@ static int __must_check flush_write_bio(struct extent_page_data *epd)
 		 * So at this point, no matter what happened, we don't need
 		 * to clean up epd->bio.
 		 */
-		epd->bio = NULL;
+		epd->bio_ctrl.bio = NULL;
 	}
 	return ret;
 }
@@ -3163,42 +3166,99 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
  *
  * Return true if successfully page added. Otherwise, return false.
  */
-static bool btrfs_bio_add_page(struct bio *bio, struct page *page,
+static bool btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
+			       struct page *page,
 			       u64 disk_bytenr, unsigned int size,
 			       unsigned int pg_offset,
-			       unsigned long prev_bio_flags,
 			       unsigned long bio_flags)
 {
+	struct bio *bio = bio_ctrl->bio;
+	u32 bio_size = bio->bi_iter.bi_size;
 	const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
 	bool contig;
 	int ret;
 
-	if (prev_bio_flags != bio_flags)
+	ASSERT(bio);
+	/* The limit should be calculated when bio_ctrl->bio is allocated */
+	ASSERT(bio_ctrl->len_to_oe_boundary && bio_ctrl->len_to_stripe_boundary);
+	if (bio_ctrl->bio_flags != bio_flags)
 		return false;
 
-	if (prev_bio_flags & EXTENT_BIO_COMPRESSED)
+	if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED)
 		contig = bio->bi_iter.bi_sector == sector;
 	else
 		contig = bio_end_sector(bio) == sector;
 	if (!contig)
 		return false;
 
-	if (btrfs_bio_fits_in_stripe(page, size, bio, bio_flags))
+	if (bio_size + size > bio_ctrl->len_to_oe_boundary ||
+	    bio_size + size > bio_ctrl->len_to_stripe_boundary)
 		return false;
 
-	if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
-		struct page *first_page = bio_first_bvec_all(bio)->bv_page;
-
-		if (!btrfs_bio_fits_in_ordered_extent(first_page, bio, size))
-			return false;
+	if (bio_op(bio) == REQ_OP_ZONE_APPEND)
 		ret = bio_add_zone_append_page(bio, page, size, pg_offset);
-	} else {
+	else
 		ret = bio_add_page(bio, page, size, pg_offset);
-	}
 
 	return ret == size;
 }
 
+static int calc_bio_boundaries(struct btrfs_bio_ctrl *bio_ctrl,
+			       struct btrfs_inode *inode)
+{
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	struct btrfs_io_geometry geom;
+	struct btrfs_ordered_extent *ordered;
+	struct extent_map *em;
+	u64 logical = (bio_ctrl->bio->bi_iter.bi_sector << SECTOR_SHIFT);
+	int ret;
+
+	/*
+	 * Pages for compressed extent are never submitted to disk directly,
+	 * thus it has no real boundary, just set them to U32_MAX.
+	 *
+	 * The split happens for real compressed bio, which happens in
+	 * btrfs_submit_compressed_read/write().
+	 */
+	if (bio_ctrl->bio_flags & EXTENT_BIO_COMPRESSED) {
+		bio_ctrl->len_to_oe_boundary = U32_MAX;
+		bio_ctrl->len_to_stripe_boundary = U32_MAX;
+		return 0;
+	}
+	em = btrfs_get_chunk_map(fs_info, logical, fs_info->sectorsize);
+	if (IS_ERR(em))
+		return PTR_ERR(em);
+	ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio_ctrl->bio),
+				    logical, &geom);
+	free_extent_map(em);
+	if (ret < 0) {
+		return ret;
+	}
+	if (geom.len > U32_MAX)
+		bio_ctrl->len_to_stripe_boundary = U32_MAX;
+	else
+		bio_ctrl->len_to_stripe_boundary = (u32)geom.len;
+
+	if (!btrfs_is_zoned(fs_info) ||
+	    bio_op(bio_ctrl->bio) != REQ_OP_ZONE_APPEND) {
+		bio_ctrl->len_to_oe_boundary = U32_MAX;
+		return 0;
+	}
+
+	ASSERT(fs_info->max_zone_append_size > 0);
+	/* Ordered extent not yet created, so we're good */
+	ordered = btrfs_lookup_ordered_extent(inode, logical);
+	if (!ordered) {
+		bio_ctrl->len_to_oe_boundary = U32_MAX;
+		return 0;
+	}
+
+	bio_ctrl->len_to_oe_boundary = min_t(u32, U32_MAX,
+		ordered->disk_bytenr + ordered->disk_num_bytes - logical);
+	btrfs_put_ordered_extent(ordered);
+	return 0;
+}
+
 /*
  * @opf:	bio REQ_OP_* and REQ_* flags as one value
  * @wbc:	optional writeback control for io accounting
@@ -3215,12 +3275,11 @@ static bool btrfs_bio_add_page(struct bio *bio, struct page *page,
  */
 static int submit_extent_page(unsigned int opf,
 			      struct writeback_control *wbc,
+			      struct btrfs_bio_ctrl *bio_ctrl,
 			      struct page *page, u64 disk_bytenr,
 			      size_t size, unsigned long pg_offset,
-			      struct bio **bio_ret,
 			      bio_end_io_t end_io_func,
 			      int mirror_num,
-			      unsigned long prev_bio_flags,
 			      unsigned long bio_flags,
 			      bool force_bio_submit)
 {
@@ -3231,19 +3290,19 @@ static int submit_extent_page(unsigned int opf,
 	struct extent_io_tree *tree = &inode->io_tree;
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 
-	ASSERT(bio_ret);
+	ASSERT(bio_ctrl);
 
-	if (*bio_ret) {
-		bio = *bio_ret;
+	ASSERT(pg_offset < PAGE_SIZE && size <= PAGE_SIZE &&
+	       pg_offset + size <= PAGE_SIZE);
+	if (bio_ctrl->bio) {
+		bio = bio_ctrl->bio;
 		if (force_bio_submit ||
-		    !btrfs_bio_add_page(bio, page, disk_bytenr, io_size,
-					pg_offset, prev_bio_flags, bio_flags)) {
-			ret = submit_one_bio(bio, mirror_num, prev_bio_flags);
-			if (ret < 0) {
-				*bio_ret = NULL;
+		    !btrfs_bio_add_page(bio_ctrl, page, disk_bytenr, io_size,
+					pg_offset, bio_flags)) {
+			ret = submit_one_bio(bio, mirror_num, bio_ctrl->bio_flags);
+			bio_ctrl->bio = NULL;
+			if (ret < 0)
 				return ret;
-			}
-			bio = NULL;
 		} else {
 			if (wbc)
 				wbc_account_cgroup_owner(wbc, page, io_size);
@@ -3275,7 +3334,9 @@ static int submit_extent_page(unsigned int opf,
 		btrfs_io_bio(bio)->device = device;
 	}
 
-	*bio_ret = bio;
+	bio_ctrl->bio = bio;
+	bio_ctrl->bio_flags = bio_flags;
+	ret = calc_bio_boundaries(bio_ctrl, inode);
 
 	return ret;
 }
@@ -3388,7 +3449,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
  * return 0 on success, otherwise return error
  */
 int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
-		      struct bio **bio, unsigned long *bio_flags,
+		      struct btrfs_bio_ctrl *bio_ctrl,
 		      unsigned int read_flags, u64 *prev_em_start)
 {
 	struct inode *inode = page->mapping->host;
@@ -3564,15 +3625,13 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 		}
 
 		ret = submit_extent_page(REQ_OP_READ | read_flags, NULL,
-					 page, disk_bytenr, iosize,
-					 pg_offset, bio,
+					 bio_ctrl, page, disk_bytenr, iosize,
+					 pg_offset,
 					 end_bio_extent_readpage, 0,
-					 *bio_flags,
 					 this_bio_flag,
 					 force_bio_submit);
 		if (!ret) {
 			nr++;
-			*bio_flags = this_bio_flag;
 		} else {
 			unlock_extent(tree, cur, cur + iosize - 1);
 			end_page_read(page, false, cur, iosize);
@@ -3586,11 +3645,10 @@ out:
 }
 
 static inline void contiguous_readpages(struct page *pages[], int nr_pages,
-					     u64 start, u64 end,
-					     struct extent_map **em_cached,
-					     struct bio **bio,
-					     unsigned long *bio_flags,
-					     u64 *prev_em_start)
+					u64 start, u64 end,
+					struct extent_map **em_cached,
+					struct btrfs_bio_ctrl *bio_ctrl,
+					u64 *prev_em_start)
 {
 	struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
 	int index;
@@ -3598,7 +3656,7 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
 	btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
 
 	for (index = 0; index < nr_pages; index++) {
-		btrfs_do_readpage(pages[index], em_cached, bio, bio_flags,
+		btrfs_do_readpage(pages[index], em_cached, bio_ctrl,
 				  REQ_RAHEAD, prev_em_start);
 		put_page(pages[index]);
 	}
@@ -3787,11 +3845,12 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 			       page->index, cur, end);
 		}
 
-		ret = submit_extent_page(opf | write_flags, wbc, page,
+		ret = submit_extent_page(opf | write_flags, wbc,
+					 &epd->bio_ctrl, page,
 					 disk_bytenr, iosize,
-					 cur - page_offset(page), &epd->bio,
+					 cur - page_offset(page),
 					 end_bio_extent_writepage,
-					 0, 0, 0, false);
+					 0, 0, false);
 		if (ret) {
 			SetPageError(page);
 			if (PageWriteback(page))
@@ -4222,10 +4281,10 @@ static int write_one_subpage_eb(struct extent_buffer *eb,
 	if (no_dirty_ebs)
 		clear_page_dirty_for_io(page);
 
-	ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc, page,
-			eb->start, eb->len, eb->start - page_offset(page),
-			&epd->bio, end_bio_extent_buffer_writepage, 0, 0, 0,
-			false);
+	ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
+			&epd->bio_ctrl, page, eb->start, eb->len,
+			eb->start - page_offset(page),
+			end_bio_extent_buffer_writepage, 0, 0, false);
 	if (ret) {
 		btrfs_subpage_clear_writeback(fs_info, page, eb->start, eb->len);
 		set_btree_ioerr(page, eb);
@@ -4285,10 +4344,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 		clear_page_dirty_for_io(p);
 		set_page_writeback(p);
 		ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
-					 p, disk_bytenr, PAGE_SIZE, 0,
-					 &epd->bio,
+					 &epd->bio_ctrl, p, disk_bytenr,
+					 PAGE_SIZE, 0,
 					 end_bio_extent_buffer_writepage,
-					 0, 0, 0, false);
+					 0, 0, false);
 		if (ret) {
 			set_btree_ioerr(p, eb);
 			if (PageWriteback(p))
@@ -4504,7 +4563,7 @@ int btree_write_cache_pages(struct address_space *mapping,
 {
 	struct extent_buffer *eb_context = NULL;
 	struct extent_page_data epd = {
-		.bio = NULL,
+		.bio_ctrl = { 0 },
 		.extent_locked = 0,
 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
 	};
@@ -4786,7 +4845,7 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)
 {
 	int ret;
 	struct extent_page_data epd = {
-		.bio = NULL,
+		.bio_ctrl = { 0 },
 		.extent_locked = 0,
 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
 	};
@@ -4813,7 +4872,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 		PAGE_SHIFT;
 
 	struct extent_page_data epd = {
-		.bio = NULL,
+		.bio_ctrl = { 0 },
 		.extent_locked = 1,
 		.sync_io = mode == WB_SYNC_ALL,
 	};
@@ -4856,7 +4915,7 @@ int extent_writepages(struct address_space *mapping,
 {
 	int ret = 0;
 	struct extent_page_data epd = {
-		.bio = NULL,
+		.bio_ctrl = { 0 },
 		.extent_locked = 0,
 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
 	};
@@ -4873,8 +4932,7 @@ int extent_writepages(struct address_space *mapping,
 
 void extent_readahead(struct readahead_control *rac)
 {
-	struct bio *bio = NULL;
-	unsigned long bio_flags = 0;
+	struct btrfs_bio_ctrl bio_ctrl = { 0 };
 	struct page *pagepool[16];
 	struct extent_map *em_cached = NULL;
 	u64 prev_em_start = (u64)-1;
@@ -4885,14 +4943,14 @@ void extent_readahead(struct readahead_control *rac)
 		u64 contig_end = contig_start + readahead_batch_length(rac) - 1;
 
 		contiguous_readpages(pagepool, nr, contig_start, contig_end,
-				&em_cached, &bio, &bio_flags, &prev_em_start);
+				&em_cached, &bio_ctrl, &prev_em_start);
 	}
 
 	if (em_cached)
 		free_extent_map(em_cached);
 
-	if (bio) {
-		if (submit_one_bio(bio, 0, bio_flags))
+	if (bio_ctrl.bio) {
+		if (submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags))
 			return;
 	}
 }
@@ -6182,7 +6240,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
 	struct btrfs_fs_info *fs_info = eb->fs_info;
 	struct extent_io_tree *io_tree;
 	struct page *page = eb->pages[0];
-	struct bio *bio = NULL;
+	struct btrfs_bio_ctrl bio_ctrl = { 0 };
 	int ret = 0;
 
 	ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags));
@@ -6213,9 +6271,10 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
 	check_buffer_tree_ref(eb);
 	btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
 
-	ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, page, eb->start,
-				 eb->len, eb->start - page_offset(page), &bio,
-				 end_bio_extent_readpage, mirror_num, 0, 0,
+	ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, &bio_ctrl,
+				 page, eb->start, eb->len,
+				 eb->start - page_offset(page),
+				 end_bio_extent_readpage, mirror_num, 0,
 				 true);
 	if (ret) {
 		/*
@@ -6225,10 +6284,11 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
 		 */
 		atomic_dec(&eb->io_pages);
 	}
-	if (bio) {
+	if (bio_ctrl.bio) {
 		int tmp;
 
-		tmp = submit_one_bio(bio, mirror_num, 0);
+		tmp = submit_one_bio(bio_ctrl.bio, mirror_num, 0);
+		bio_ctrl.bio = NULL;
 		if (tmp < 0)
 			return tmp;
 	}
@@ -6251,8 +6311,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 	int all_uptodate = 1;
 	int num_pages;
 	unsigned long num_reads = 0;
-	struct bio *bio = NULL;
-	unsigned long bio_flags = 0;
+	struct btrfs_bio_ctrl bio_ctrl = { 0 };
 
 	if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
 		return 0;
@@ -6316,9 +6375,9 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 
 			ClearPageError(page);
 			err = submit_extent_page(REQ_OP_READ | REQ_META, NULL,
-					 page, page_offset(page), PAGE_SIZE, 0,
-					 &bio, end_bio_extent_readpage,
-					 mirror_num, 0, 0, false);
+					 &bio_ctrl, page, page_offset(page),
+					 PAGE_SIZE, 0, end_bio_extent_readpage,
+					 mirror_num, 0, false);
 			if (err) {
 				/*
 				 * We failed to submit the bio so it's the
@@ -6335,8 +6394,9 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 		}
 	}
 
-	if (bio) {
-		err = submit_one_bio(bio, mirror_num, bio_flags);
+	if (bio_ctrl.bio) {
+		err = submit_one_bio(bio_ctrl.bio, mirror_num, bio_ctrl.bio_flags);
+		bio_ctrl.bio = NULL;
 		if (err)
 			return err;
 	}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index fb9a9275fc41a..946d09caa5925 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -101,6 +101,17 @@ struct extent_buffer {
 #endif
 };
 
+/*
+ * Structure to record info about the bio being assembled, and other info like
+ * how many bytes are there before stripe/ordered extent boundary.
+ */
+struct btrfs_bio_ctrl {
+	struct bio *bio;
+	unsigned long bio_flags;
+	u32 len_to_stripe_boundary;
+	u32 len_to_oe_boundary;
+};
+
 /*
  * Structure to record how many bytes and which ranges are set/cleared
  */
@@ -169,7 +180,7 @@ int try_release_extent_buffer(struct page *page);
 int __must_check submit_one_bio(struct bio *bio, int mirror_num,
 				unsigned long bio_flags);
 int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
-		      struct bio **bio, unsigned long *bio_flags,
+		      struct btrfs_bio_ctrl *bio_ctrl,
 		      unsigned int read_flags, u64 *prev_em_start);
 int extent_write_full_page(struct page *page, struct writeback_control *wbc);
 int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3a87f928d9ce6..90fbae6a13633 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2229,33 +2229,6 @@ static blk_status_t btrfs_submit_bio_start(struct inode *inode, struct bio *bio,
 	return btrfs_csum_one_bio(BTRFS_I(inode), bio, 0, 0);
 }
 
-bool btrfs_bio_fits_in_ordered_extent(struct page *page, struct bio *bio,
-				      unsigned int size)
-{
-	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
-	struct btrfs_fs_info *fs_info = inode->root->fs_info;
-	struct btrfs_ordered_extent *ordered;
-	u64 len = bio->bi_iter.bi_size + size;
-	bool ret = true;
-
-	ASSERT(btrfs_is_zoned(fs_info));
-	ASSERT(fs_info->max_zone_append_size > 0);
-	ASSERT(bio_op(bio) == REQ_OP_ZONE_APPEND);
-
-	/* Ordered extent not yet created, so we're good */
-	ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
-	if (!ordered)
-		return ret;
-
-	if ((bio->bi_iter.bi_sector << SECTOR_SHIFT) + len >
-	    ordered->disk_bytenr + ordered->disk_num_bytes)
-		ret = false;
-
-	btrfs_put_ordered_extent(ordered);
-
-	return ret;
-}
-
 static blk_status_t extract_ordered_extent(struct btrfs_inode *inode,
 					   struct bio *bio, loff_t file_offset)
 {
@@ -8297,15 +8270,14 @@ int btrfs_readpage(struct file *file, struct page *page)
 	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
 	u64 start = page_offset(page);
 	u64 end = start + PAGE_SIZE - 1;
-	unsigned long bio_flags = 0;
-	struct bio *bio = NULL;
+	struct btrfs_bio_ctrl bio_ctrl = { 0 };
 	int ret;
 
 	btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
 
-	ret = btrfs_do_readpage(page, NULL, &bio, &bio_flags, 0, NULL);
-	if (bio)
-		ret = submit_one_bio(bio, 0, bio_flags);
+	ret = btrfs_do_readpage(page, NULL, &bio_ctrl, 0, NULL);
+	if (bio_ctrl.bio)
+		ret = submit_one_bio(bio_ctrl.bio, 0, bio_ctrl.bio_flags);
 	return ret;
 }
 
-- 
GitLab


From fa04c16574c08ddea6885b5cd6a0ecb941bfa3c0 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 27 Apr 2021 12:53:35 +0800
Subject: [PATCH 3475/3804] btrfs: make subpage metadata write path call its
 own endio functions

For subpage metadata, we're reusing two functions for subpage metadata
write:

- end_bio_extent_buffer_writepage()
- write_one_eb()

But the truth is, for subpage we just call
end_bio_subpage_eb_writepage() without using any bit in
end_bio_extent_buffer_writepage().

For write_one_eb(), it's pretty similar, but with a small part of code
reused.

There is really no need to pollute the existing code path if we're not
really using most of them.

So this patch will do the following change to separate the subpage
metadata write path from regular write path by:

- Use end_bio_subpage_eb_writepage() directly as endio in
  write_one_subpage_eb()
- Directly call write_one_subpage_eb() in submit_eb_subpage()

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 72 ++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 11b1d8f2ff23d..3e835fa43e17f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4163,12 +4163,15 @@ static struct extent_buffer *find_extent_buffer_nolock(
  * Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
  * after all extent buffers in the page has finished their writeback.
  */
-static void end_bio_subpage_eb_writepage(struct btrfs_fs_info *fs_info,
-					 struct bio *bio)
+static void end_bio_subpage_eb_writepage(struct bio *bio)
 {
+	struct btrfs_fs_info *fs_info;
 	struct bio_vec *bvec;
 	struct bvec_iter_all iter_all;
 
+	fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
+	ASSERT(fs_info->sectorsize < PAGE_SIZE);
+
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		struct page *page = bvec->bv_page;
@@ -4219,16 +4222,11 @@ static void end_bio_subpage_eb_writepage(struct btrfs_fs_info *fs_info,
 
 static void end_bio_extent_buffer_writepage(struct bio *bio)
 {
-	struct btrfs_fs_info *fs_info;
 	struct bio_vec *bvec;
 	struct extent_buffer *eb;
 	int done;
 	struct bvec_iter_all iter_all;
 
-	fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
-	if (fs_info->sectorsize < PAGE_SIZE)
-		return end_bio_subpage_eb_writepage(fs_info, bio);
-
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		struct page *page = bvec->bv_page;
@@ -4254,12 +4252,34 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
 	bio_put(bio);
 }
 
+static void prepare_eb_write(struct extent_buffer *eb)
+{
+	u32 nritems;
+	unsigned long start;
+	unsigned long end;
+
+	clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
+	atomic_set(&eb->io_pages, num_extent_pages(eb));
+
+	/* Set btree blocks beyond nritems with 0 to avoid stale content */
+	nritems = btrfs_header_nritems(eb);
+	if (btrfs_header_level(eb) > 0) {
+		end = btrfs_node_key_ptr_offset(nritems);
+		memzero_extent_buffer(eb, end, eb->len - end);
+	} else {
+		/*
+		 * Leaf:
+		 * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
+		 */
+		start = btrfs_item_nr_offset(nritems);
+		end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
+		memzero_extent_buffer(eb, start, end - start);
+	}
+}
+
 /*
  * Unlike the work in write_one_eb(), we rely completely on extent locking.
  * Page locking is only utilized at minimum to keep the VMM code happy.
- *
- * Caller should still call write_one_eb() other than this function directly.
- * As write_one_eb() has extra preparation before submitting the extent buffer.
  */
 static int write_one_subpage_eb(struct extent_buffer *eb,
 				struct writeback_control *wbc,
@@ -4271,6 +4291,8 @@ static int write_one_subpage_eb(struct extent_buffer *eb,
 	bool no_dirty_ebs = false;
 	int ret;
 
+	prepare_eb_write(eb);
+
 	/* clear_page_dirty_for_io() in subpage helper needs page locked */
 	lock_page(page);
 	btrfs_subpage_set_writeback(fs_info, page, eb->start, eb->len);
@@ -4284,7 +4306,7 @@ static int write_one_subpage_eb(struct extent_buffer *eb,
 	ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
 			&epd->bio_ctrl, page, eb->start, eb->len,
 			eb->start - page_offset(page),
-			end_bio_extent_buffer_writepage, 0, 0, false);
+			end_bio_subpage_eb_writepage, 0, 0, false);
 	if (ret) {
 		btrfs_subpage_clear_writeback(fs_info, page, eb->start, eb->len);
 		set_btree_ioerr(page, eb);
@@ -4309,35 +4331,13 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
 			struct extent_page_data *epd)
 {
 	u64 disk_bytenr = eb->start;
-	u32 nritems;
 	int i, num_pages;
-	unsigned long start, end;
 	unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META;
 	int ret = 0;
 
-	clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags);
-	num_pages = num_extent_pages(eb);
-	atomic_set(&eb->io_pages, num_pages);
-
-	/* set btree blocks beyond nritems with 0 to avoid stale content. */
-	nritems = btrfs_header_nritems(eb);
-	if (btrfs_header_level(eb) > 0) {
-		end = btrfs_node_key_ptr_offset(nritems);
-
-		memzero_extent_buffer(eb, end, eb->len - end);
-	} else {
-		/*
-		 * leaf:
-		 * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0
-		 */
-		start = btrfs_item_nr_offset(nritems);
-		end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb);
-		memzero_extent_buffer(eb, start, end - start);
-	}
-
-	if (eb->fs_info->sectorsize < PAGE_SIZE)
-		return write_one_subpage_eb(eb, wbc, epd);
+	prepare_eb_write(eb);
 
+	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = eb->pages[i];
 
@@ -4451,7 +4451,7 @@ static int submit_eb_subpage(struct page *page,
 			free_extent_buffer(eb);
 			goto cleanup;
 		}
-		ret = write_one_eb(eb, wbc, epd);
+		ret = write_one_subpage_eb(eb, wbc, epd);
 		free_extent_buffer(eb);
 		if (ret < 0)
 			goto cleanup;
-- 
GitLab


From 38a39ac77e089515acbe85c6c70c3df1e728357d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Thu, 8 Apr 2021 20:32:27 +0800
Subject: [PATCH 3476/3804] btrfs: pass btrfs_inode to
 btrfs_writepage_endio_finish_ordered()

There is a pretty bad abuse of btrfs_writepage_endio_finish_ordered() in
end_compressed_bio_write().

It passes compressed pages to btrfs_writepage_endio_finish_ordered(),
which is only supposed to accept inode pages.

Thankfully the important info here is the inode, so let's pass
btrfs_inode directly into btrfs_writepage_endio_finish_ordered(), and
make @page parameter optional.

By this, end_compressed_bio_write() can happily pass page=NULL while
still getting everything done properly.

Also, to cooperate with such modification, replace @page parameter for
trace_btrfs_writepage_end_io_hook() with btrfs_inode.
Although this removes page_index info, the existing start/len should be
enough for most usage.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c       |  4 +---
 fs/btrfs/ctree.h             |  3 ++-
 fs/btrfs/extent_io.c         | 16 ++++++++++------
 fs/btrfs/inode.c             |  9 +++++----
 include/trace/events/btrfs.h | 20 ++++++++------------
 5 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index fdead01568f68..35ca49893803f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -349,12 +349,10 @@ static void end_compressed_bio_write(struct bio *bio)
 	 * call back into the FS and do all the end_io operations
 	 */
 	inode = cb->inode;
-	cb->compressed_pages[0]->mapping = cb->inode->i_mapping;
 	btrfs_record_physical_zoned(inode, cb->start, bio);
-	btrfs_writepage_endio_finish_ordered(cb->compressed_pages[0],
+	btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
 			cb->start, cb->start + cb->len - 1,
 			bio->bi_status == BLK_STS_OK);
-	cb->compressed_pages[0]->mapping = NULL;
 
 	end_compressed_writeback(inode, cb);
 	/* note, our inode could be gone now */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ed5bc25bbcecd..70952c1a39d1c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3196,7 +3196,8 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
 		u64 start, u64 end, int *page_started, unsigned long *nr_written,
 		struct writeback_control *wbc);
 int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
-void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
+void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
+					  struct page *page, u64 start,
 					  u64 end, int uptodate);
 extern const struct dentry_operations btrfs_dentry_operations;
 extern const struct iomap_ops btrfs_dio_iomap_ops;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3e835fa43e17f..c0600dec62f8b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2756,10 +2756,13 @@ next:
 
 void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
 {
+	struct btrfs_inode *inode;
 	int uptodate = (err == 0);
 	int ret = 0;
 
-	btrfs_writepage_endio_finish_ordered(page, start, end, uptodate);
+	ASSERT(page && page->mapping);
+	inode = BTRFS_I(page->mapping->host);
+	btrfs_writepage_endio_finish_ordered(inode, page, start, end, uptodate);
 
 	if (!uptodate) {
 		ClearPageUptodate(page);
@@ -3794,7 +3797,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		u32 iosize;
 
 		if (cur >= i_size) {
-			btrfs_writepage_endio_finish_ordered(page, cur, end, 1);
+			btrfs_writepage_endio_finish_ordered(inode, page, cur,
+							     end, 1);
 			break;
 		}
 		em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
@@ -3832,8 +3836,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 			if (compressed)
 				nr++;
 			else
-				btrfs_writepage_endio_finish_ordered(page, cur,
-							cur + iosize - 1, 1);
+				btrfs_writepage_endio_finish_ordered(inode,
+						page, cur, cur + iosize - 1, 1);
 			cur += iosize;
 			continue;
 		}
@@ -4892,8 +4896,8 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 		if (clear_page_dirty_for_io(page))
 			ret = __extent_writepage(page, &wbc_writepages, &epd);
 		else {
-			btrfs_writepage_endio_finish_ordered(page, start,
-						    start + PAGE_SIZE - 1, 1);
+			btrfs_writepage_endio_finish_ordered(BTRFS_I(inode),
+					page, start, start + PAGE_SIZE - 1, 1);
 			unlock_page(page);
 		}
 		put_page(page);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 90fbae6a13633..4a481db15ec30 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -946,7 +946,8 @@ retry:
 			const u64 end = start + async_extent->ram_size - 1;
 
 			p->mapping = inode->vfs_inode.i_mapping;
-			btrfs_writepage_endio_finish_ordered(p, start, end, 0);
+			btrfs_writepage_endio_finish_ordered(inode, p, start,
+							     end, 0);
 
 			p->mapping = NULL;
 			extent_clear_unlock_delalloc(inode, start, end, NULL, 0,
@@ -3038,15 +3039,15 @@ static void finish_ordered_fn(struct btrfs_work *work)
 	btrfs_finish_ordered_io(ordered_extent);
 }
 
-void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
+void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
+					  struct page *page, u64 start,
 					  u64 end, int uptodate)
 {
-	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_ordered_extent *ordered_extent = NULL;
 	struct btrfs_workqueue *wq;
 
-	trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
+	trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
 
 	ClearPagePrivate2(page);
 	if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index a41dd8a0c7302..76e0be7e14d05 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -654,34 +654,30 @@ DEFINE_EVENT(btrfs__writepage, __extent_writepage,
 
 TRACE_EVENT(btrfs_writepage_end_io_hook,
 
-	TP_PROTO(const struct page *page, u64 start, u64 end, int uptodate),
+	TP_PROTO(const struct btrfs_inode *inode, u64 start, u64 end,
+		 int uptodate),
 
-	TP_ARGS(page, start, end, uptodate),
+	TP_ARGS(inode, start, end, uptodate),
 
 	TP_STRUCT__entry_btrfs(
 		__field(	u64,	 ino		)
-		__field(	unsigned long, index	)
 		__field(	u64,	 start		)
 		__field(	u64,	 end		)
 		__field(	int,	 uptodate	)
 		__field(	u64,    root_objectid	)
 	),
 
-	TP_fast_assign_btrfs(btrfs_sb(page->mapping->host->i_sb),
-		__entry->ino	= btrfs_ino(BTRFS_I(page->mapping->host));
-		__entry->index	= page->index;
+	TP_fast_assign_btrfs(inode->root->fs_info,
+		__entry->ino	= btrfs_ino(inode);
 		__entry->start	= start;
 		__entry->end	= end;
 		__entry->uptodate = uptodate;
-		__entry->root_objectid	=
-			 BTRFS_I(page->mapping->host)->root->root_key.objectid;
+		__entry->root_objectid = inode->root->root_key.objectid;
 	),
 
-	TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu start=%llu "
-		  "end=%llu uptodate=%d",
+	TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu end=%llu uptodate=%d",
 		  show_root_type(__entry->root_objectid),
-		  __entry->ino, __entry->index,
-		  __entry->start,
+		  __entry->ino, __entry->start,
 		  __entry->end, __entry->uptodate)
 );
 
-- 
GitLab


From 87b4d86baae219a9a79f6b0a1434b2a42fd40d09 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Fri, 22 Jan 2021 14:00:52 +0800
Subject: [PATCH 3477/3804] btrfs: make Private2 lifespan more consistent

Currently we use page Private2 bit to indicate that we have ordered
extent for the page range.

But the lifespan of it is not consistent, during regular writeback path,
there are two locations to clear the same PagePrivate2:

    T ----- Page marked Dirty
    |
    + ----- Page marked Private2, through btrfs_run_dealloc_range()
    |
    + ----- Page cleared Private2, through btrfs_writepage_cow_fixup()
    |       in __extent_writepage_io()
    |       ^^^ Private2 cleared for the first time
    |
    + ----- Page marked Writeback, through btrfs_set_range_writeback()
    |       in __extent_writepage_io().
    |
    + ----- Page cleared Private2, through
    |       btrfs_writepage_endio_finish_ordered()
    |       ^^^ Private2 cleared for the second time.
    |
    + ----- Page cleared Writeback, through
            btrfs_writepage_endio_finish_ordered()

Currently PagePrivate2 is mostly to prevent ordered extent accounting
being executed for both endio and invalidatepage.
Thus only the one who cleared page Private2 is responsible for ordered
extent accounting.

But the fact is, in btrfs_writepage_endio_finish_ordered(), page
Private2 is cleared and ordered extent accounting is executed
unconditionally.

The race prevention only happens through btrfs_invalidatepage(), where
we wait for the page writeback first, before checking the Private2 bit.

This means, Private2 is also protected by Writeback bit, and there is no
need for btrfs_writepage_cow_fixup() to clear Priavte2.

This patch will change btrfs_writepage_cow_fixup() to just check
PagePrivate2, not to clear it.
The clearing will happen in either btrfs_invalidatepage() or
btrfs_writepage_endio_finish_ordered().

This makes the Private2 bit easier to understand, just meaning the page
has unfinished ordered extent attached to it.

And this patch is a hard requirement for the incoming refactoring for
how we finished ordered IO for endio context, as the coming patch will
check Private2 to determine if we need to do the ordered extent
accounting.  Thus this patch is definitely needed or we will hang due to
unfinished ordered extent.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4a481db15ec30..9a9158b19205d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2647,7 +2647,7 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
 	struct btrfs_writepage_fixup *fixup;
 
 	/* this page is properly in the ordered list */
-	if (TestClearPagePrivate2(page))
+	if (PagePrivate2(page))
 		return 0;
 
 	/*
-- 
GitLab


From e65f152e43484807b4caf7300e70d882e4652566 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Thu, 1 Apr 2021 15:15:06 +0800
Subject: [PATCH 3478/3804] btrfs: refactor how we finish ordered extent io for
 endio functions

Btrfs has two endio functions to mark certain io range finished for
ordered extents:

- __endio_write_update_ordered()
  This is for direct IO

- btrfs_writepage_endio_finish_ordered()
  This for buffered IO.

However they go different routines to handle ordered extent io:

- Whether to iterate through all ordered extents
  __endio_write_update_ordered() will but
  btrfs_writepage_endio_finish_ordered() will not.

  In fact, iterating through all ordered extents will benefit later
  subpage support, while for current PAGE_SIZE == sectorsize requirement
  this behavior makes no difference.

- Whether to update page Private2 flag
  __endio_write_update_ordered() will not update page Private2 flag as
  for iomap direct IO, the page can not be even mapped.
  While btrfs_writepage_endio_finish_ordered() will clear Private2 to
  prevent double accounting against btrfs_invalidatepage().

Those differences are pretty subtle, and the ordered extent iterations
code in callers makes code much harder to read.

So this patch will introduce a new function,
btrfs_mark_ordered_io_finished(), to do the heavy lifting:

- Iterate through all ordered extents in the range
- Do the ordered extent accounting
- Queue the work for finished ordered extent

This function has two new feature:

- Proper underflow detection and recovery
  The old underflow detection will only detect the problem, then
  continue.
  No proper info like root/inode/ordered extent info, nor noisy enough
  to be caught by fstests.

  Furthermore when underflow happens, the ordered extent will never
  finish.

  New error detection will reset the bytes_left to 0, do proper
  kernel warning, and output extra info including root, ino, ordered
  extent range, the underflow value.

- Prevent double accounting based on Private2 flag
  Now if we find a range without Private2 flag, we will skip to next
  range.
  As that means someone else has already finished the accounting of
  ordered extent.

  This makes no difference for current code, but will be a critical part
  for incoming subpage support, as we can call
  btrfs_mark_ordered_io_finished() for multiple sectors if they are
  beyond inode size.
  Thus such double accounting prevention is a key feature for subpage.

Now both endio functions only need to call that new function.

And since the only caller of btrfs_dec_test_first_ordered_pending() is
removed, also remove btrfs_dec_test_first_ordered_pending() completely.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c        |  55 +------------
 fs/btrfs/ordered-data.c | 177 +++++++++++++++++++++++++++-------------
 fs/btrfs/ordered-data.h |   8 +-
 3 files changed, 127 insertions(+), 113 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9a9158b19205d..c6243d242bc91 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3043,24 +3043,10 @@ void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
 					  struct page *page, u64 start,
 					  u64 end, int uptodate)
 {
-	struct btrfs_fs_info *fs_info = inode->root->fs_info;
-	struct btrfs_ordered_extent *ordered_extent = NULL;
-	struct btrfs_workqueue *wq;
-
 	trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
 
-	ClearPagePrivate2(page);
-	if (!btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
-					    end - start + 1, uptodate))
-		return;
-
-	if (btrfs_is_free_space_inode(inode))
-		wq = fs_info->endio_freespace_worker;
-	else
-		wq = fs_info->endio_write_workers;
-
-	btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
-	btrfs_queue_work(wq, &ordered_extent->work);
+	btrfs_mark_ordered_io_finished(inode, page, start, end + 1 - start,
+				       finish_ordered_fn, uptodate);
 }
 
 /*
@@ -7959,41 +7945,8 @@ static void __endio_write_update_ordered(struct btrfs_inode *inode,
 					 const u64 offset, const u64 bytes,
 					 const bool uptodate)
 {
-	struct btrfs_fs_info *fs_info = inode->root->fs_info;
-	struct btrfs_ordered_extent *ordered = NULL;
-	struct btrfs_workqueue *wq;
-	u64 ordered_offset = offset;
-	u64 ordered_bytes = bytes;
-	u64 last_offset;
-
-	if (btrfs_is_free_space_inode(inode))
-		wq = fs_info->endio_freespace_worker;
-	else
-		wq = fs_info->endio_write_workers;
-
-	while (ordered_offset < offset + bytes) {
-		last_offset = ordered_offset;
-		if (btrfs_dec_test_first_ordered_pending(inode, &ordered,
-							 &ordered_offset,
-							 ordered_bytes,
-							 uptodate)) {
-			btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
-					NULL);
-			btrfs_queue_work(wq, &ordered->work);
-		}
-
-		/* No ordered extent found in the range, exit */
-		if (ordered_offset == last_offset)
-			return;
-		/*
-		 * Our bio might span multiple ordered extents. In this case
-		 * we keep going until we have accounted the whole dio.
-		 */
-		if (ordered_offset < offset + bytes) {
-			ordered_bytes = offset + bytes - ordered_offset;
-			ordered = NULL;
-		}
-	}
+	btrfs_mark_ordered_io_finished(inode, NULL, offset, bytes,
+				       finish_ordered_fn, uptodate);
 }
 
 static blk_status_t btrfs_submit_bio_start_direct_io(struct inode *inode,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 6c413bb451a3d..e7ecce2c1bd8e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -300,81 +300,142 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
 }
 
 /*
- * Finish IO for one ordered extent across a given range.  The range can
- * contain several ordered extents.
+ * Mark all ordered extents io inside the specified range finished.
  *
- * @found_ret:	 Return the finished ordered extent
- * @file_offset: File offset for the finished IO
- * 		 Will also be updated to one byte past the range that is
- * 		 recordered as finished. This allows caller to walk forward.
- * @io_size:	 Length of the finish IO range
- * @uptodate:	 If the IO finished without problem
- *
- * Return true if any ordered extent is finished in the range, and update
- * @found_ret and @file_offset.
- * Return false otherwise.
+ * @page:	 The invovled page for the opeartion.
+ *		 For uncompressed buffered IO, the page status also needs to be
+ *		 updated to indicate whether the pending ordered io is finished.
+ *		 Can be NULL for direct IO and compressed write.
+ *		 For these cases, callers are ensured they won't execute the
+ *		 endio function twice.
+ * @finish_func: The function to be executed when all the IO of an ordered
+ *		 extent are finished.
  *
- * NOTE: Although The range can cross multiple ordered extents, only one
- * ordered extent will be updated during one call. The caller is responsible to
- * iterate all ordered extents in the range.
+ * This function is called for endio, thus the range must have ordered
+ * extent(s) coveri it.
  */
-bool btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
-				   struct btrfs_ordered_extent **finished_ret,
-				   u64 *file_offset, u64 io_size, int uptodate)
+void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
+				struct page *page, u64 file_offset,
+				u64 num_bytes, btrfs_func_t finish_func,
+				bool uptodate)
 {
-	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	struct btrfs_workqueue *wq;
 	struct rb_node *node;
 	struct btrfs_ordered_extent *entry = NULL;
-	bool finished = false;
 	unsigned long flags;
-	u64 dec_end;
-	u64 dec_start;
-	u64 to_dec;
+	u64 cur = file_offset;
+
+	if (btrfs_is_free_space_inode(inode))
+		wq = fs_info->endio_freespace_worker;
+	else
+		wq = fs_info->endio_write_workers;
+
+	if (page)
+		ASSERT(page->mapping && page_offset(page) <= file_offset &&
+		       file_offset + num_bytes <= page_offset(page) + PAGE_SIZE);
 
 	spin_lock_irqsave(&tree->lock, flags);
-	node = tree_search(tree, *file_offset);
-	if (!node)
-		goto out;
+	while (cur < file_offset + num_bytes) {
+		u64 entry_end;
+		u64 end;
+		u32 len;
 
-	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
-	if (!in_range(*file_offset, entry->file_offset, entry->num_bytes))
-		goto out;
+		node = tree_search(tree, cur);
+		/* No ordered extents at all */
+		if (!node)
+			break;
 
-	dec_start = max(*file_offset, entry->file_offset);
-	dec_end = min(*file_offset + io_size,
-		      entry->file_offset + entry->num_bytes);
-	*file_offset = dec_end;
-	if (dec_start > dec_end) {
-		btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu",
-			   dec_start, dec_end);
-	}
-	to_dec = dec_end - dec_start;
-	if (to_dec > entry->bytes_left) {
-		btrfs_crit(fs_info,
-			   "bad ordered accounting left %llu size %llu",
-			   entry->bytes_left, to_dec);
-	}
-	entry->bytes_left -= to_dec;
-	if (!uptodate)
-		set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
+		entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+		entry_end = entry->file_offset + entry->num_bytes;
+		/*
+		 * |<-- OE --->|  |
+		 *		  cur
+		 * Go to next OE.
+		 */
+		if (cur >= entry_end) {
+			node = rb_next(node);
+			/* No more ordered extents, exit */
+			if (!node)
+				break;
+			entry = rb_entry(node, struct btrfs_ordered_extent,
+					 rb_node);
+
+			/* Go to next ordered extent and continue */
+			cur = entry->file_offset;
+			continue;
+		}
+		/*
+		 * |	|<--- OE --->|
+		 * cur
+		 * Go to the start of OE.
+		 */
+		if (cur < entry->file_offset) {
+			cur = entry->file_offset;
+			continue;
+		}
 
-	if (entry->bytes_left == 0) {
 		/*
-		 * Ensure only one caller can set the flag and finished_ret
-		 * accordingly
+		 * Now we are definitely inside one ordered extent.
+		 *
+		 * |<--- OE --->|
+		 *	|
+		 *	cur
 		 */
-		finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
-		/* test_and_set_bit implies a barrier */
-		cond_wake_up_nomb(&entry->wait);
-	}
-out:
-	if (finished && finished_ret && entry) {
-		*finished_ret = entry;
-		refcount_inc(&entry->refs);
+		end = min(entry->file_offset + entry->num_bytes,
+			  file_offset + num_bytes) - 1;
+		ASSERT(end + 1 - cur < U32_MAX);
+		len = end + 1 - cur;
+
+		if (page) {
+			/*
+			 * Private2 bit indicates whether we still have pending
+			 * io unfinished for the ordered extent.
+			 *
+			 * If there's no such bit, we need to skip to next range.
+			 */
+			if (!PagePrivate2(page)) {
+				cur += len;
+				continue;
+			}
+			ClearPagePrivate2(page);
+		}
+
+		/* Now we're fine to update the accounting */
+		if (unlikely(len > entry->bytes_left)) {
+			WARN_ON(1);
+			btrfs_crit(fs_info,
+"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%u left=%llu",
+				   inode->root->root_key.objectid,
+				   btrfs_ino(inode),
+				   entry->file_offset,
+				   entry->num_bytes,
+				   len, entry->bytes_left);
+			entry->bytes_left = 0;
+		} else {
+			entry->bytes_left -= len;
+		}
+
+		if (!uptodate)
+			set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
+
+		/*
+		 * All the IO of the ordered extent is finished, we need to queue
+		 * the finish_func to be executed.
+		 */
+		if (entry->bytes_left == 0) {
+			set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+			cond_wake_up(&entry->wait);
+			refcount_inc(&entry->refs);
+			spin_unlock_irqrestore(&tree->lock, flags);
+			btrfs_init_work(&entry->work, finish_func, NULL, NULL);
+			btrfs_queue_work(wq, &entry->work);
+			spin_lock_irqsave(&tree->lock, flags);
+		}
+		cur += len;
 	}
 	spin_unlock_irqrestore(&tree->lock, flags);
-	return finished;
 }
 
 /*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index e60c07f364276..72eb4b8cbb881 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -172,13 +172,13 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
 void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
 void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
 				struct btrfs_ordered_extent *entry);
+void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
+				struct page *page, u64 file_offset,
+				u64 num_bytes, btrfs_func_t finish_func,
+				bool uptodate);
 bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
 				    struct btrfs_ordered_extent **cached,
 				    u64 file_offset, u64 io_size, int uptodate);
-bool btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
-				   struct btrfs_ordered_extent **finished_ret,
-				   u64 *file_offset, u64 io_size,
-				   int uptodate);
 int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
 			     u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
 			     int type);
-- 
GitLab


From 266a258678b9f254647f4297843cfbfbddde220a Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 6 Apr 2021 08:27:18 +0800
Subject: [PATCH 3479/3804] btrfs: update comments in btrfs_invalidatepage()

The existing comments in btrfs_invalidatepage() don't really get to the
point, especially for what Private2 is really representing and how the
race avoidance is done.

The truth is, there are only three entrances to do ordered extent
accounting:

- btrfs_writepage_endio_finish_ordered()
- __endio_write_update_ordered()
  Those two entrance are just endio functions for dio and buffered
  write.

- btrfs_invalidatepage()

But there is a pitfall, in endio functions there is no check on whether
the ordered extent is already accounted.
They just blindly clear the Private2 bit and do the accounting.

So it's all btrfs_invalidatepage()'s responsibility to make sure we
won't do double account for the same sector.

That's why in btrfs_invalidatepage() we have to wait for page writeback,
this will ensure all submitted bios have finished, thus their endio
functions have finished the accounting on the ordered extent.

Then we also check page Private2 to ensure that, we only run ordered
extent accounting on pages who has no bio submitted.

This patch will rework related comments to make it more clear on the
race and how we use wait_on_page_writeback() and Private2 to prevent
double accounting on ordered extent.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c6243d242bc91..e86a6113e1494 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8329,11 +8329,16 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 	bool completed_ordered = false;
 
 	/*
-	 * we have the page locked, so new writeback can't start,
-	 * and the dirty bit won't be cleared while we are here.
+	 * We have page locked so no new ordered extent can be created on this
+	 * page, nor bio can be submitted for this page.
 	 *
-	 * Wait for IO on this page so that we can safely clear
-	 * the PagePrivate2 bit and do ordered accounting
+	 * But already submitted bio can still be finished on this page.
+	 * Furthermore, endio function won't skip page which has Private2
+	 * already cleared, so it's possible for endio and invalidatepage to do
+	 * the same ordered extent accounting twice on one page.
+	 *
+	 * So here we wait for any submitted bios to finish, so that we won't
+	 * do double ordered extent accounting on the same page.
 	 */
 	wait_on_page_writeback(page);
 
@@ -8363,8 +8368,12 @@ again:
 					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
 					 EXTENT_DEFRAG, 1, 0, &cached_state);
 		/*
-		 * whoever cleared the private bit is responsible
-		 * for the finish_ordered_io
+		 * A page with Private2 bit means no bio has been submitted
+		 * covering the page, thus we have to manually do the ordered
+		 * extent accounting.
+		 *
+		 * For page without Private2, the ordered extent accounting is
+		 * done in its endio function of the submitted bio.
 		 */
 		if (TestClearPagePrivate2(page)) {
 			spin_lock_irq(&inode->ordered_tree.lock);
-- 
GitLab


From c095f3333fc4ae3e6881b9269962252ffd6b5de2 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 27 Apr 2021 15:03:40 +0800
Subject: [PATCH 3480/3804] btrfs: introduce btrfs_lookup_first_ordered_range()

Although we already have btrfs_lookup_first_ordered_extent() and
btrfs_lookup_ordered_extent(), they all have their own limitations:

- btrfs_lookup_ordered_extent() can't do extra range check

  It's only designed to lookup any ordered extent before certain bytenr.

- btrfs_lookup_first_ordered_extent() may not return the first ordered
  extent in the range

  It doesn't ensure the first ordered extent is returned.
  The existing callers are only interested in exhausting all ordered
  extents in a range, the order is not important.

For incoming btrfs_invalidatepage() refactoring, we need a way to
properly iterate all ordered extents in their bytenr order of a range.

So this patch will introduce a new function,
btrfs_lookup_first_ordered_range(), to do ordered extent with bytenr
order awareness and extra range check.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ordered-data.c | 75 +++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/ordered-data.h |  2 ++
 2 files changed, 77 insertions(+)

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e7ecce2c1bd8e..f3270396e547b 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -930,6 +930,81 @@ out:
 	return entry;
 }
 
+/*
+ * Lookup the first ordered extent that overlaps the range
+ * [@file_offset, @file_offset + @len).
+ *
+ * The difference between this and btrfs_lookup_first_ordered_extent() is
+ * that this one won't return any ordered extent that does not overlap the range.
+ * And the difference against btrfs_lookup_ordered_extent() is, this function
+ * ensures the first ordered extent gets returned.
+ */
+struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range(
+			struct btrfs_inode *inode, u64 file_offset, u64 len)
+{
+	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
+	struct rb_node *node;
+	struct rb_node *cur;
+	struct rb_node *prev;
+	struct rb_node *next;
+	struct btrfs_ordered_extent *entry = NULL;
+
+	spin_lock_irq(&tree->lock);
+	node = tree->tree.rb_node;
+	/*
+	 * Here we don't want to use tree_search() which will use tree->last
+	 * and screw up the search order.
+	 * And __tree_search() can't return the adjacent ordered extents
+	 * either, thus here we do our own search.
+	 */
+	while (node) {
+		entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+
+		if (file_offset < entry->file_offset) {
+			node = node->rb_left;
+		} else if (file_offset >= entry_end(entry)) {
+			node = node->rb_right;
+		} else {
+			/*
+			 * Direct hit, got an ordered extent that starts at
+			 * @file_offset
+			 */
+			goto out;
+		}
+	}
+	if (!entry) {
+		/* Empty tree */
+		goto out;
+	}
+
+	cur = &entry->rb_node;
+	/* We got an entry around @file_offset, check adjacent entries */
+	if (entry->file_offset < file_offset) {
+		prev = cur;
+		next = rb_next(cur);
+	} else {
+		prev = rb_prev(cur);
+		next = cur;
+	}
+	if (prev) {
+		entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node);
+		if (range_overlaps(entry, file_offset, len))
+			goto out;
+	}
+	if (next) {
+		entry = rb_entry(next, struct btrfs_ordered_extent, rb_node);
+		if (range_overlaps(entry, file_offset, len))
+			goto out;
+	}
+	/* No ordered extent in the range */
+	entry = NULL;
+out:
+	if (entry)
+		refcount_inc(&entry->refs);
+	spin_unlock_irq(&tree->lock);
+	return entry;
+}
+
 /*
  * btrfs_flush_ordered_range - Lock the passed range and ensures all pending
  * ordered extents in it are run to completion.
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 72eb4b8cbb881..566472004edd3 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -196,6 +196,8 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait);
 int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
 struct btrfs_ordered_extent *
 btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset);
+struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range(
+			struct btrfs_inode *inode, u64 file_offset, u64 len);
 struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
 		struct btrfs_inode *inode,
 		u64 file_offset,
-- 
GitLab


From 3b8358407aac088564f7db35ea842376686d0c92 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 6 Apr 2021 19:54:53 +0800
Subject: [PATCH 3481/3804] btrfs: refactor btrfs_invalidatepage() for subpage
 support

This patch will refactor btrfs_invalidatepage() for the incoming subpage
support.

The involved modifications are:

- Use while() loop instead of "goto again;"
- Use single variable to determine whether to delete extent states
  Each branch will also have comments why we can or cannot delete the
  extent states
- Do qgroup free and extent states deletion per-loop
  Current code can only work for PAGE_SIZE == sectorsize case.

This refactor also makes it clear what we do for different sectors:

- Sectors without ordered extent
  We're completely safe to remove all extent states for the sector(s)

- Sectors with ordered extent, but no Private2 bit
  This means the endio has already been executed, we can't remove all
  extent states for the sector(s).

- Sectors with ordere extent, still has Private2 bit
  This means we need to decrease the ordered extent accounting.
  And then it comes to two different variants:

  * We have finished and removed the ordered extent
    Then it's the same as "sectors without ordered extent"
  * We didn't finished the ordered extent
    We can remove some extent states, but not all.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 171 +++++++++++++++++++++++++++--------------------
 1 file changed, 97 insertions(+), 74 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e86a6113e1494..f036b6e992dfe 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8318,15 +8318,11 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 {
 	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
 	struct extent_io_tree *tree = &inode->io_tree;
-	struct btrfs_ordered_extent *ordered;
 	struct extent_state *cached_state = NULL;
 	u64 page_start = page_offset(page);
 	u64 page_end = page_start + PAGE_SIZE - 1;
-	u64 start;
-	u64 end;
+	u64 cur;
 	int inode_evicting = inode->vfs_inode.i_state & I_FREEING;
-	bool found_ordered = false;
-	bool completed_ordered = false;
 
 	/*
 	 * We have page locked so no new ordered extent can be created on this
@@ -8350,93 +8346,120 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 	if (!inode_evicting)
 		lock_extent_bits(tree, page_start, page_end, &cached_state);
 
-	start = page_start;
-again:
-	ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1);
-	if (ordered) {
-		found_ordered = true;
-		end = min(page_end,
-			  ordered->file_offset + ordered->num_bytes - 1);
+	cur = page_start;
+	while (cur < page_end) {
+		struct btrfs_ordered_extent *ordered;
+		bool delete_states;
+		u64 range_end;
+
+		ordered = btrfs_lookup_first_ordered_range(inode, cur,
+							   page_end + 1 - cur);
+		if (!ordered) {
+			range_end = page_end;
+			/*
+			 * No ordered extent covering this range, we are safe
+			 * to delete all extent states in the range.
+			 */
+			delete_states = true;
+			goto next;
+		}
+		if (ordered->file_offset > cur) {
+			/*
+			 * There is a range between [cur, oe->file_offset) not
+			 * covered by any ordered extent.
+			 * We are safe to delete all extent states, and handle
+			 * the ordered extent in the next iteration.
+			 */
+			range_end = ordered->file_offset - 1;
+			delete_states = true;
+			goto next;
+		}
+
+		range_end = min(ordered->file_offset + ordered->num_bytes - 1,
+				page_end);
+		if (!PagePrivate2(page)) {
+			/*
+			 * If Private2 is cleared, it means endio has already
+			 * been executed for the range.
+			 * We can't delete the extent states as
+			 * btrfs_finish_ordered_io() may still use some of them.
+			 */
+			delete_states = false;
+			goto next;
+		}
+		ClearPagePrivate2(page);
+
 		/*
 		 * IO on this page will never be started, so we need to account
 		 * for any ordered extents now. Don't clear EXTENT_DELALLOC_NEW
 		 * here, must leave that up for the ordered extent completion.
+		 *
+		 * This will also unlock the range for incoming
+		 * btrfs_finish_ordered_io().
 		 */
 		if (!inode_evicting)
-			clear_extent_bit(tree, start, end,
+			clear_extent_bit(tree, cur, range_end,
 					 EXTENT_DELALLOC |
 					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
 					 EXTENT_DEFRAG, 1, 0, &cached_state);
+
+		spin_lock_irq(&inode->ordered_tree.lock);
+		set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
+		ordered->truncated_len = min(ordered->truncated_len,
+					     cur - ordered->file_offset);
+		spin_unlock_irq(&inode->ordered_tree.lock);
+
+		if (btrfs_dec_test_ordered_pending(inode, &ordered,
+					cur, range_end + 1 - cur, 1)) {
+			btrfs_finish_ordered_io(ordered);
+			/*
+			 * The ordered extent has finished, now we're again
+			 * safe to delete all extent states of the range.
+			 */
+			delete_states = true;
+		} else {
+			/*
+			 * btrfs_finish_ordered_io() will get executed by endio
+			 * of other pages, thus we can't delete extent states
+			 * anymore
+			 */
+			delete_states = false;
+		}
+next:
+		if (ordered)
+			btrfs_put_ordered_extent(ordered);
 		/*
-		 * A page with Private2 bit means no bio has been submitted
-		 * covering the page, thus we have to manually do the ordered
-		 * extent accounting.
+		 * Qgroup reserved space handler
+		 * Sector(s) here will be either:
 		 *
-		 * For page without Private2, the ordered extent accounting is
-		 * done in its endio function of the submitted bio.
+		 * 1) Already written to disk or bio already finished
+		 *    Then its QGROUP_RESERVED bit in io_tree is already cleared.
+		 *    Qgroup will be handled by its qgroup_record then.
+		 *    btrfs_qgroup_free_data() call will do nothing here.
+		 *
+		 * 2) Not written to disk yet
+		 *    Then btrfs_qgroup_free_data() call will clear the
+		 *    QGROUP_RESERVED bit of its io_tree, and free the qgroup
+		 *    reserved data space.
+		 *    Since the IO will never happen for this page.
 		 */
-		if (TestClearPagePrivate2(page)) {
-			spin_lock_irq(&inode->ordered_tree.lock);
-			set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
-			ordered->truncated_len = min(ordered->truncated_len,
-						     start - ordered->file_offset);
-			spin_unlock_irq(&inode->ordered_tree.lock);
-
-			if (btrfs_dec_test_ordered_pending(inode, &ordered,
-							   start,
-							   end - start + 1, 1)) {
-				btrfs_finish_ordered_io(ordered);
-				completed_ordered = true;
-			}
-		}
-		btrfs_put_ordered_extent(ordered);
+		btrfs_qgroup_free_data(inode, NULL, cur, range_end + 1 - cur);
 		if (!inode_evicting) {
-			cached_state = NULL;
-			lock_extent_bits(tree, start, end,
-					 &cached_state);
+			clear_extent_bit(tree, cur, range_end, EXTENT_LOCKED |
+				 EXTENT_DELALLOC | EXTENT_UPTODATE |
+				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
+				 delete_states, &cached_state);
 		}
-
-		start = end + 1;
-		if (start < page_end)
-			goto again;
+		cur = range_end + 1;
 	}
-
 	/*
-	 * Qgroup reserved space handler
-	 * Page here will be either
-	 * 1) Already written to disk or ordered extent already submitted
-	 *    Then its QGROUP_RESERVED bit in io_tree is already cleaned.
-	 *    Qgroup will be handled by its qgroup_record then.
-	 *    btrfs_qgroup_free_data() call will do nothing here.
-	 *
-	 * 2) Not written to disk yet
-	 *    Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
-	 *    bit of its io_tree, and free the qgroup reserved data space.
-	 *    Since the IO will never happen for this page.
+	 * We have iterated through all ordered extents of the page, the page
+	 * should not have Private2 anymore, or the above iteration does
+	 * something wrong.
 	 */
-	btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
-	if (!inode_evicting) {
-		bool delete = true;
-
-		/*
-		 * If there's an ordered extent for this range and we have not
-		 * finished it ourselves, we must leave EXTENT_DELALLOC_NEW set
-		 * in the range for the ordered extent completion. We must also
-		 * not delete the range, otherwise we would lose that bit (and
-		 * any other bits set in the range). Make sure EXTENT_UPTODATE
-		 * is cleared if we don't delete, otherwise it can lead to
-		 * corruptions if the i_size is extented later.
-		 */
-		if (found_ordered && !completed_ordered)
-			delete = false;
-		clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
-				 EXTENT_DELALLOC | EXTENT_UPTODATE |
-				 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1,
-				 delete, &cached_state);
-
+	ASSERT(!PagePrivate2(page));
+	if (!inode_evicting)
 		__btrfs_releasepage(page, GFP_NOFS);
-	}
-
 	ClearPageChecked(page);
 	clear_page_extent_mapped(page);
 }
-- 
GitLab


From f57ad93735fd66e5ce085f3818c85551abd0cbe8 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 7 Apr 2021 19:22:13 +0800
Subject: [PATCH 3482/3804] btrfs: rename PagePrivate2 to PageOrdered inside
 btrfs

Inside btrfs we use Private2 page status to indicate we have an ordered
extent with pending IO for the sector.

But the page status name, Private2, tells us nothing about the bit
itself, so this patch will rename it to Ordered.
And with extra comment about the bit added, so reader who is still
uncertain about the page Ordered status, will find the comment pretty
easily.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h        | 10 +++++++++
 fs/btrfs/extent_io.c    |  4 ++--
 fs/btrfs/extent_io.h    |  2 +-
 fs/btrfs/inode.c        | 50 +++++++++++++++++++++--------------------
 fs/btrfs/ordered-data.c |  8 +++----
 5 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 70952c1a39d1c..c63980977fa4d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3799,4 +3799,14 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
 	return fs_info->zoned != 0;
 }
 
+/*
+ * We use page status Private2 to indicate there is an ordered extent with
+ * unfinished IO.
+ *
+ * Rename the Private2 accessors to Ordered, to improve readability.
+ */
+#define PageOrdered(page)		PagePrivate2(page)
+#define SetPageOrdered(page)		SetPagePrivate2(page)
+#define ClearPageOrdered(page)		ClearPagePrivate2(page)
+
 #endif
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c0600dec62f8b..13c5e880404da 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1975,8 +1975,8 @@ static int __process_pages_contig(struct address_space *mapping,
 		}
 
 		for (i = 0; i < ret; i++) {
-			if (page_ops & PAGE_SET_PRIVATE2)
-				SetPagePrivate2(pages[i]);
+			if (page_ops & PAGE_SET_ORDERED)
+				SetPageOrdered(pages[i]);
 
 			if (locked_page && pages[i] == locked_page) {
 				put_page(pages[i]);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 946d09caa5925..62027f551b445 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -39,7 +39,7 @@ enum {
 /* Page starts writeback, clear dirty bit and set writeback bit */
 #define PAGE_START_WRITEBACK	(1 << 1)
 #define PAGE_END_WRITEBACK	(1 << 2)
-#define PAGE_SET_PRIVATE2	(1 << 3)
+#define PAGE_SET_ORDERED	(1 << 3)
 #define PAGE_SET_ERROR		(1 << 4)
 #define PAGE_LOCK		(1 << 5)
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f036b6e992dfe..412abf9231365 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -170,7 +170,7 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
 		index++;
 		if (!page)
 			continue;
-		ClearPagePrivate2(page);
+		ClearPageOrdered(page);
 		put_page(page);
 	}
 
@@ -1151,15 +1151,16 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 
 		btrfs_dec_block_group_reservations(fs_info, ins.objectid);
 
-		/* we're not doing compressed IO, don't unlock the first
-		 * page (which the caller expects to stay locked), don't
-		 * clear any dirty bits and don't set any writeback bits
+		/*
+		 * We're not doing compressed IO, don't unlock the first page
+		 * (which the caller expects to stay locked), don't clear any
+		 * dirty bits and don't set any writeback bits
 		 *
-		 * Do set the Private2 bit so we know this page was properly
-		 * setup for writepage
+		 * Do set the Ordered (Private2) bit so we know this page was
+		 * properly setup for writepage.
 		 */
 		page_ops = unlock ? PAGE_UNLOCK : 0;
-		page_ops |= PAGE_SET_PRIVATE2;
+		page_ops |= PAGE_SET_ORDERED;
 
 		extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
 					     locked_page,
@@ -1823,7 +1824,7 @@ out_check:
 					     locked_page, EXTENT_LOCKED |
 					     EXTENT_DELALLOC |
 					     EXTENT_CLEAR_DATA_RESV,
-					     PAGE_UNLOCK | PAGE_SET_PRIVATE2);
+					     PAGE_UNLOCK | PAGE_SET_ORDERED);
 
 		cur_offset = extent_end;
 
@@ -2571,7 +2572,7 @@ again:
 	lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
 
 	/* already ordered? We're done */
-	if (PagePrivate2(page))
+	if (PageOrdered(page))
 		goto out_reserved;
 
 	ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
@@ -2646,8 +2647,8 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_writepage_fixup *fixup;
 
-	/* this page is properly in the ordered list */
-	if (PagePrivate2(page))
+	/* This page has ordered extent covering it already */
+	if (PageOrdered(page))
 		return 0;
 
 	/*
@@ -8300,9 +8301,9 @@ static int btrfs_migratepage(struct address_space *mapping,
 	if (page_has_private(page))
 		attach_page_private(newpage, detach_page_private(page));
 
-	if (PagePrivate2(page)) {
-		ClearPagePrivate2(page);
-		SetPagePrivate2(newpage);
+	if (PageOrdered(page)) {
+		ClearPageOrdered(page);
+		SetPageOrdered(newpage);
 	}
 
 	if (mode != MIGRATE_SYNC_NO_COPY)
@@ -8329,9 +8330,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 	 * page, nor bio can be submitted for this page.
 	 *
 	 * But already submitted bio can still be finished on this page.
-	 * Furthermore, endio function won't skip page which has Private2
-	 * already cleared, so it's possible for endio and invalidatepage to do
-	 * the same ordered extent accounting twice on one page.
+	 * Furthermore, endio function won't skip page which has Ordered
+	 * (Private2) already cleared, so it's possible for endio and
+	 * invalidatepage to do the same ordered extent accounting twice
+	 * on one page.
 	 *
 	 * So here we wait for any submitted bios to finish, so that we won't
 	 * do double ordered extent accounting on the same page.
@@ -8377,17 +8379,17 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 
 		range_end = min(ordered->file_offset + ordered->num_bytes - 1,
 				page_end);
-		if (!PagePrivate2(page)) {
+		if (!PageOrdered(page)) {
 			/*
-			 * If Private2 is cleared, it means endio has already
-			 * been executed for the range.
+			 * If Ordered (Private2) is cleared, it means endio has
+			 * already been executed for the range.
 			 * We can't delete the extent states as
 			 * btrfs_finish_ordered_io() may still use some of them.
 			 */
 			delete_states = false;
 			goto next;
 		}
-		ClearPagePrivate2(page);
+		ClearPageOrdered(page);
 
 		/*
 		 * IO on this page will never be started, so we need to account
@@ -8454,10 +8456,10 @@ next:
 	}
 	/*
 	 * We have iterated through all ordered extents of the page, the page
-	 * should not have Private2 anymore, or the above iteration does
-	 * something wrong.
+	 * should not have Ordered (Private2) anymore, or the above iteration
+	 * did something wrong.
 	 */
-	ASSERT(!PagePrivate2(page));
+	ASSERT(!PageOrdered(page));
 	if (!inode_evicting)
 		__btrfs_releasepage(page, GFP_NOFS);
 	ClearPageChecked(page);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f3270396e547b..b1b377ad99a04 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -390,16 +390,16 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
 
 		if (page) {
 			/*
-			 * Private2 bit indicates whether we still have pending
-			 * io unfinished for the ordered extent.
+			 * Ordered (Private2) bit indicates whether we still
+			 * have pending io unfinished for the ordered extent.
 			 *
 			 * If there's no such bit, we need to skip to next range.
 			 */
-			if (!PagePrivate2(page)) {
+			if (!PageOrdered(page)) {
 				cur += len;
 				continue;
 			}
-			ClearPagePrivate2(page);
+			ClearPageOrdered(page);
 		}
 
 		/* Now we're fine to update the accounting */
-- 
GitLab


From 968f2566ad897d643af66df0d44c070128402941 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 18 May 2021 15:09:41 +0800
Subject: [PATCH 3483/3804] btrfs: fix hang when run_delalloc_range() failed

[BUG]
When running subpage preparation patches on x86, btrfs/125 will hang
forever with one ordered extent never finished.

[CAUSE]
The test case btrfs/125 itself will always fail as the fix is never merged.

When the test fails at balance, btrfs needs to cleanup the ordered
extent in btrfs_cleanup_ordered_extents() for data reloc inode.

The problem is in the sequence how we cleanup the page Order bit.

Currently it works like:

  btrfs_cleanup_ordered_extents()
  |- find_get_page();
  |- btrfs_page_clear_ordered(page);
  |  Now the page doesn't have Ordered bit anymore.
  |  !!! This also includes the first (locked) page !!!
  |
  |- offset += PAGE_SIZE
  |  This is to skip the first page
  |- __endio_write_update_ordered()
     |- btrfs_mark_ordered_io_finished(NULL)
        Except the first page, all ordered extents are finished.

Then the locked page is cleaned up in __extent_writepage():

  __extent_writepage()
  |- If (PageError(page))
  |- end_extent_writepage()
     |- btrfs_mark_ordered_io_finished(page)
        |- if (btrfs_test_page_ordered(page))
        |-  !!! The page gets skipped !!!
            The ordered extent is not decreased as the page doesn't
            have ordered bit anymore.

This leaves the ordered extent with bytes_left == sectorsize, thus never
finish.

[FIX]
The fix is to ensure we never clear page Ordered bit without running the
ordered extent accounting.

Here we choose to skip the locked page in
btrfs_cleanup_ordered_extents() so that later end_extent_writepage() can
properly finish the ordered extent.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 412abf9231365..2eedcf65b8aa9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -166,10 +166,31 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
 	struct page *page;
 
 	while (index <= end_index) {
+		/*
+		 * For locked page, we will call end_extent_writepage() on it
+		 * in run_delalloc_range() for the error handling.  That
+		 * end_extent_writepage() function will call
+		 * btrfs_mark_ordered_io_finished() to clear page Ordered and
+		 * run the ordered extent accounting.
+		 *
+		 * Here we can't just clear the Ordered bit, or
+		 * btrfs_mark_ordered_io_finished() would skip the accounting
+		 * for the page range, and the ordered extent will never finish.
+		 */
+		if (index == (page_offset(locked_page) >> PAGE_SHIFT)) {
+			index++;
+			continue;
+		}
 		page = find_get_page(inode->vfs_inode.i_mapping, index);
 		index++;
 		if (!page)
 			continue;
+
+		/*
+		 * Here we just clear all Ordered bits for every page in the
+		 * range, then __endio_write_update_ordered() will handle
+		 * the ordered extent accounting for the range.
+		 */
 		ClearPageOrdered(page);
 		put_page(page);
 	}
-- 
GitLab


From 98af9ab12b49a5ae338b523e64b5a7dd637781d4 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:37 +0800
Subject: [PATCH 3484/3804] btrfs: pass bytenr directly to
 __process_pages_contig()

As a preparation for incoming subpage support, we need bytenr passed to
__process_pages_contig() directly, not the current page index.

So change the parameter and all callers to pass bytenr in.

With the modification, here we need to replace the old @index_ret with
@processed_end for __process_pages_contig(), but this brings a small
problem.

Normally we follow the inclusive return value, meaning @processed_end
should be the last byte we processed.

If parameter @start is 0, and we failed to lock any page, then we would
return @processed_end as -1, causing more problems for
__unlock_for_delalloc().

So here for @processed_end, we use two different return value patterns.
If we have locked any page, @processed_end will be the last byte of
locked page.
Or it will be @start otherwise.

This change will impact lock_delalloc_pages(), so it needs to check
@processed_end to only unlock the range if we have locked any.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 57 ++++++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 13c5e880404da..8bf13823641f7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1810,8 +1810,8 @@ out:
 
 static int __process_pages_contig(struct address_space *mapping,
 				  struct page *locked_page,
-				  pgoff_t start_index, pgoff_t end_index,
-				  unsigned long page_ops, pgoff_t *index_ret);
+				  u64 start, u64 end, unsigned long page_ops,
+				  u64 *processed_end);
 
 static noinline void __unlock_for_delalloc(struct inode *inode,
 					   struct page *locked_page,
@@ -1824,7 +1824,7 @@ static noinline void __unlock_for_delalloc(struct inode *inode,
 	if (index == locked_page->index && end_index == index)
 		return;
 
-	__process_pages_contig(inode->i_mapping, locked_page, index, end_index,
+	__process_pages_contig(inode->i_mapping, locked_page, start, end,
 			       PAGE_UNLOCK, NULL);
 }
 
@@ -1834,19 +1834,19 @@ static noinline int lock_delalloc_pages(struct inode *inode,
 					u64 delalloc_end)
 {
 	unsigned long index = delalloc_start >> PAGE_SHIFT;
-	unsigned long index_ret = index;
 	unsigned long end_index = delalloc_end >> PAGE_SHIFT;
+	u64 processed_end = delalloc_start;
 	int ret;
 
 	ASSERT(locked_page);
 	if (index == locked_page->index && index == end_index)
 		return 0;
 
-	ret = __process_pages_contig(inode->i_mapping, locked_page, index,
-				     end_index, PAGE_LOCK, &index_ret);
-	if (ret == -EAGAIN)
+	ret = __process_pages_contig(inode->i_mapping, locked_page, delalloc_start,
+				     delalloc_end, PAGE_LOCK, &processed_end);
+	if (ret == -EAGAIN && processed_end > delalloc_start)
 		__unlock_for_delalloc(inode, locked_page, delalloc_start,
-				      (u64)index_ret << PAGE_SHIFT);
+				      processed_end);
 	return ret;
 }
 
@@ -1941,12 +1941,14 @@ out_failed:
 
 static int __process_pages_contig(struct address_space *mapping,
 				  struct page *locked_page,
-				  pgoff_t start_index, pgoff_t end_index,
-				  unsigned long page_ops, pgoff_t *index_ret)
+				  u64 start, u64 end, unsigned long page_ops,
+				  u64 *processed_end)
 {
+	pgoff_t start_index = start >> PAGE_SHIFT;
+	pgoff_t end_index = end >> PAGE_SHIFT;
+	pgoff_t index = start_index;
 	unsigned long nr_pages = end_index - start_index + 1;
 	unsigned long pages_processed = 0;
-	pgoff_t index = start_index;
 	struct page *pages[16];
 	unsigned ret;
 	int err = 0;
@@ -1954,17 +1956,19 @@ static int __process_pages_contig(struct address_space *mapping,
 
 	if (page_ops & PAGE_LOCK) {
 		ASSERT(page_ops == PAGE_LOCK);
-		ASSERT(index_ret && *index_ret == start_index);
+		ASSERT(processed_end && *processed_end == start);
 	}
 
 	if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
 		mapping_set_error(mapping, -EIO);
 
 	while (nr_pages > 0) {
-		ret = find_get_pages_contig(mapping, index,
+		int found_pages;
+
+		found_pages = find_get_pages_contig(mapping, index,
 				     min_t(unsigned long,
 				     nr_pages, ARRAY_SIZE(pages)), pages);
-		if (ret == 0) {
+		if (found_pages == 0) {
 			/*
 			 * Only if we're going to lock these pages,
 			 * can we find nothing at @index.
@@ -2007,13 +2011,27 @@ static int __process_pages_contig(struct address_space *mapping,
 			put_page(pages[i]);
 			pages_processed++;
 		}
-		nr_pages -= ret;
-		index += ret;
+		nr_pages -= found_pages;
+		index += found_pages;
 		cond_resched();
 	}
 out:
-	if (err && index_ret)
-		*index_ret = start_index + pages_processed - 1;
+	if (err && processed_end) {
+		/*
+		 * Update @processed_end. I know this is awful since it has
+		 * two different return value patterns (inclusive vs exclusive).
+		 *
+		 * But the exclusive pattern is necessary if @start is 0, or we
+		 * underflow and check against processed_end won't work as
+		 * expected.
+		 */
+		if (pages_processed)
+			*processed_end = min(end,
+			((u64)(start_index + pages_processed) << PAGE_SHIFT) - 1);
+		else
+			*processed_end = start;
+
+	}
 	return err;
 }
 
@@ -2024,8 +2042,7 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
 	clear_extent_bit(&inode->io_tree, start, end, clear_bits, 1, 0, NULL);
 
 	__process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
-			       start >> PAGE_SHIFT, end >> PAGE_SHIFT,
-			       page_ops, NULL);
+			       start, end, page_ops, NULL);
 }
 
 /*
-- 
GitLab


From ed8f13bf4a2ccb6c90d3210421455c2ceae678de Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:38 +0800
Subject: [PATCH 3485/3804] btrfs: refactor page status update into
 process_one_page()

In __process_pages_contig() we update page status according to page_ops.

That update process is a bunch of 'if' branches, which lie inside
two loops, this makes it pretty hard to expand for later subpage
operations.

So this patch will extract these operations into its own function,
process_one_pages().

Also since we're refactoring __process_pages_contig(), also move the new
helper and __process_pages_contig() before the first caller of them, to
remove the forward declaration.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 206 +++++++++++++++++++++++--------------------
 1 file changed, 109 insertions(+), 97 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8bf13823641f7..2595f9ff0a577 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1808,10 +1808,118 @@ out:
 	return found;
 }
 
+/*
+ * Process one page for __process_pages_contig().
+ *
+ * Return >0 if we hit @page == @locked_page.
+ * Return 0 if we updated the page status.
+ * Return -EGAIN if the we need to try again.
+ * (For PAGE_LOCK case but got dirty page or page not belong to mapping)
+ */
+static int process_one_page(struct address_space *mapping,
+			    struct page *page, struct page *locked_page,
+			    unsigned long page_ops)
+{
+	if (page_ops & PAGE_SET_ORDERED)
+		SetPageOrdered(page);
+
+	if (page == locked_page)
+		return 1;
+
+	if (page_ops & PAGE_SET_ERROR)
+		SetPageError(page);
+	if (page_ops & PAGE_START_WRITEBACK) {
+		clear_page_dirty_for_io(page);
+		set_page_writeback(page);
+	}
+	if (page_ops & PAGE_END_WRITEBACK)
+		end_page_writeback(page);
+	if (page_ops & PAGE_LOCK) {
+		lock_page(page);
+		if (!PageDirty(page) || page->mapping != mapping) {
+			unlock_page(page);
+			return -EAGAIN;
+		}
+	}
+	if (page_ops & PAGE_UNLOCK)
+		unlock_page(page);
+	return 0;
+}
+
 static int __process_pages_contig(struct address_space *mapping,
 				  struct page *locked_page,
 				  u64 start, u64 end, unsigned long page_ops,
-				  u64 *processed_end);
+				  u64 *processed_end)
+{
+	pgoff_t start_index = start >> PAGE_SHIFT;
+	pgoff_t end_index = end >> PAGE_SHIFT;
+	pgoff_t index = start_index;
+	unsigned long nr_pages = end_index - start_index + 1;
+	unsigned long pages_processed = 0;
+	struct page *pages[16];
+	int err = 0;
+	int i;
+
+	if (page_ops & PAGE_LOCK) {
+		ASSERT(page_ops == PAGE_LOCK);
+		ASSERT(processed_end && *processed_end == start);
+	}
+
+	if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
+		mapping_set_error(mapping, -EIO);
+
+	while (nr_pages > 0) {
+		int found_pages;
+
+		found_pages = find_get_pages_contig(mapping, index,
+				     min_t(unsigned long,
+				     nr_pages, ARRAY_SIZE(pages)), pages);
+		if (found_pages == 0) {
+			/*
+			 * Only if we're going to lock these pages, we can find
+			 * nothing at @index.
+			 */
+			ASSERT(page_ops & PAGE_LOCK);
+			err = -EAGAIN;
+			goto out;
+		}
+
+		for (i = 0; i < found_pages; i++) {
+			int process_ret;
+
+			process_ret = process_one_page(mapping, pages[i],
+					locked_page, page_ops);
+			if (process_ret < 0) {
+				for (; i < found_pages; i++)
+					put_page(pages[i]);
+				err = -EAGAIN;
+				goto out;
+			}
+			put_page(pages[i]);
+			pages_processed++;
+		}
+		nr_pages -= found_pages;
+		index += found_pages;
+		cond_resched();
+	}
+out:
+	if (err && processed_end) {
+		/*
+		 * Update @processed_end. I know this is awful since it has
+		 * two different return value patterns (inclusive vs exclusive).
+		 *
+		 * But the exclusive pattern is necessary if @start is 0, or we
+		 * underflow and check against processed_end won't work as
+		 * expected.
+		 */
+		if (pages_processed)
+			*processed_end = min(end,
+			((u64)(start_index + pages_processed) << PAGE_SHIFT) - 1);
+		else
+			*processed_end = start;
+	}
+	return err;
+}
 
 static noinline void __unlock_for_delalloc(struct inode *inode,
 					   struct page *locked_page,
@@ -1939,102 +2047,6 @@ out_failed:
 	return found;
 }
 
-static int __process_pages_contig(struct address_space *mapping,
-				  struct page *locked_page,
-				  u64 start, u64 end, unsigned long page_ops,
-				  u64 *processed_end)
-{
-	pgoff_t start_index = start >> PAGE_SHIFT;
-	pgoff_t end_index = end >> PAGE_SHIFT;
-	pgoff_t index = start_index;
-	unsigned long nr_pages = end_index - start_index + 1;
-	unsigned long pages_processed = 0;
-	struct page *pages[16];
-	unsigned ret;
-	int err = 0;
-	int i;
-
-	if (page_ops & PAGE_LOCK) {
-		ASSERT(page_ops == PAGE_LOCK);
-		ASSERT(processed_end && *processed_end == start);
-	}
-
-	if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
-		mapping_set_error(mapping, -EIO);
-
-	while (nr_pages > 0) {
-		int found_pages;
-
-		found_pages = find_get_pages_contig(mapping, index,
-				     min_t(unsigned long,
-				     nr_pages, ARRAY_SIZE(pages)), pages);
-		if (found_pages == 0) {
-			/*
-			 * Only if we're going to lock these pages,
-			 * can we find nothing at @index.
-			 */
-			ASSERT(page_ops & PAGE_LOCK);
-			err = -EAGAIN;
-			goto out;
-		}
-
-		for (i = 0; i < ret; i++) {
-			if (page_ops & PAGE_SET_ORDERED)
-				SetPageOrdered(pages[i]);
-
-			if (locked_page && pages[i] == locked_page) {
-				put_page(pages[i]);
-				pages_processed++;
-				continue;
-			}
-			if (page_ops & PAGE_START_WRITEBACK) {
-				clear_page_dirty_for_io(pages[i]);
-				set_page_writeback(pages[i]);
-			}
-			if (page_ops & PAGE_SET_ERROR)
-				SetPageError(pages[i]);
-			if (page_ops & PAGE_END_WRITEBACK)
-				end_page_writeback(pages[i]);
-			if (page_ops & PAGE_UNLOCK)
-				unlock_page(pages[i]);
-			if (page_ops & PAGE_LOCK) {
-				lock_page(pages[i]);
-				if (!PageDirty(pages[i]) ||
-				    pages[i]->mapping != mapping) {
-					unlock_page(pages[i]);
-					for (; i < ret; i++)
-						put_page(pages[i]);
-					err = -EAGAIN;
-					goto out;
-				}
-			}
-			put_page(pages[i]);
-			pages_processed++;
-		}
-		nr_pages -= found_pages;
-		index += found_pages;
-		cond_resched();
-	}
-out:
-	if (err && processed_end) {
-		/*
-		 * Update @processed_end. I know this is awful since it has
-		 * two different return value patterns (inclusive vs exclusive).
-		 *
-		 * But the exclusive pattern is necessary if @start is 0, or we
-		 * underflow and check against processed_end won't work as
-		 * expected.
-		 */
-		if (pages_processed)
-			*processed_end = min(end,
-			((u64)(start_index + pages_processed) << PAGE_SHIFT) - 1);
-		else
-			*processed_end = start;
-
-	}
-	return err;
-}
-
 void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
 				  struct page *locked_page,
 				  u32 clear_bits, unsigned long page_ops)
-- 
GitLab


From 60e2d25500aa74388bd0a30a39bb84249f2c75d5 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:39 +0800
Subject: [PATCH 3486/3804] btrfs: provide btrfs_page_clamp_*() helpers

In the coming subpage RW supports, there are a lot of page status update
calls which need to be converted to subpage compatible version, which
needs @start and @len.

Some call sites already have such @start/@len and are already in
page range, like various endio functions.

But there are also call sites which need to clamp the range for subpage
case, like btrfs_dirty_pagse() and __process_contig_pages().

Here we introduce new helpers, btrfs_page_clamp_*(), to do and only do the
clamp for subpage version.

Although in theory all existing btrfs_page_*() calls can be converted to
use btrfs_page_clamp_*() directly, but that would make us to do
unnecessary clamp operations.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/subpage.c | 38 ++++++++++++++++++++++++++++++++++++++
 fs/btrfs/subpage.h | 10 ++++++++++
 2 files changed, 48 insertions(+)

diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 2d19089ab6255..a6cf1776f3f9b 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -354,6 +354,16 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
 
+static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len)
+{
+	u64 orig_start = *start;
+	u32 orig_len = *len;
+
+	*start = max_t(u64, page_offset(page), orig_start);
+	*len = min_t(u64, page_offset(page) + PAGE_SIZE,
+		     orig_start + orig_len) - *start;
+}
+
 /*
  * Unlike set/clear which is dependent on each page status, for test all bits
  * are tested in the same way.
@@ -408,6 +418,34 @@ bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info,	\
 	if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE)	\
 		return test_page_func(page);				\
 	return btrfs_subpage_test_##name(fs_info, page, start, len);	\
+}									\
+void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info,	\
+		struct page *page, u64 start, u32 len)			\
+{									\
+	if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) {	\
+		set_page_func(page);					\
+		return;							\
+	}								\
+	btrfs_subpage_clamp_range(page, &start, &len);			\
+	btrfs_subpage_set_##name(fs_info, page, start, len);		\
+}									\
+void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
+		struct page *page, u64 start, u32 len)			\
+{									\
+	if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) {	\
+		clear_page_func(page);					\
+		return;							\
+	}								\
+	btrfs_subpage_clamp_range(page, &start, &len);			\
+	btrfs_subpage_clear_##name(fs_info, page, start, len);		\
+}									\
+bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info,	\
+		struct page *page, u64 start, u32 len)			\
+{									\
+	if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE)	\
+		return test_page_func(page);				\
+	btrfs_subpage_clamp_range(page, &start, &len);			\
+	return btrfs_subpage_test_##name(fs_info, page, start, len);	\
 }
 IMPLEMENT_BTRFS_PAGE_OPS(uptodate, SetPageUptodate, ClearPageUptodate,
 			 PageUptodate);
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index bfd626e955be3..291cb1932f273 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -72,6 +72,10 @@ void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
  * btrfs_page_*() are for call sites where the page can either be subpage
  * specific or regular page. The function will handle both cases.
  * But the range still needs to be inside the page.
+ *
+ * btrfs_page_clamp_*() are similar to btrfs_page_*(), except the range doesn't
+ * need to be inside the page. Those functions will truncate the range
+ * automatically.
  */
 #define DECLARE_BTRFS_SUBPAGE_OPS(name)					\
 void btrfs_subpage_set_##name(const struct btrfs_fs_info *fs_info,	\
@@ -85,6 +89,12 @@ void btrfs_page_set_##name(const struct btrfs_fs_info *fs_info,		\
 void btrfs_page_clear_##name(const struct btrfs_fs_info *fs_info,	\
 		struct page *page, u64 start, u32 len);			\
 bool btrfs_page_test_##name(const struct btrfs_fs_info *fs_info,	\
+		struct page *page, u64 start, u32 len);			\
+void btrfs_page_clamp_set_##name(const struct btrfs_fs_info *fs_info,	\
+		struct page *page, u64 start, u32 len);			\
+void btrfs_page_clamp_clear_##name(const struct btrfs_fs_info *fs_info,	\
+		struct page *page, u64 start, u32 len);			\
+bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info,	\
 		struct page *page, u64 start, u32 len);
 
 DECLARE_BTRFS_SUBPAGE_OPS(uptodate);
-- 
GitLab


From 321a02db327a82aeaf9a114518705293cb8c2b31 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:40 +0800
Subject: [PATCH 3487/3804] btrfs: only require sector size alignment for
 end_bio_extent_writepage()

Just like read page, for subpage support we only require sector size
alignment.

So change the error message condition to only require sector alignment.

This should not affect existing code, as for regular sectorsize ==
PAGE_SIZE case, we are still requiring page alignment.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2595f9ff0a577..9edef629ace20 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2824,25 +2824,20 @@ static void end_bio_extent_writepage(struct bio *bio)
 		struct page *page = bvec->bv_page;
 		struct inode *inode = page->mapping->host;
 		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+		const u32 sectorsize = fs_info->sectorsize;
 
-		/* We always issue full-page reads, but if some block
-		 * in a page fails to read, blk_update_request() will
-		 * advance bv_offset and adjust bv_len to compensate.
-		 * Print a warning for nonzero offsets, and an error
-		 * if they don't add up to a full page.  */
-		if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
-			if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
-				btrfs_err(fs_info,
-				   "partial page write in btrfs with offset %u and length %u",
-					bvec->bv_offset, bvec->bv_len);
-			else
-				btrfs_info(fs_info,
-				   "incomplete page write in btrfs with offset %u and length %u",
-					bvec->bv_offset, bvec->bv_len);
-		}
+		/* Our read/write should always be sector aligned. */
+		if (!IS_ALIGNED(bvec->bv_offset, sectorsize))
+			btrfs_err(fs_info,
+		"partial page write in btrfs with offset %u and length %u",
+				  bvec->bv_offset, bvec->bv_len);
+		else if (!IS_ALIGNED(bvec->bv_len, sectorsize))
+			btrfs_info(fs_info,
+		"incomplete page write with offset %u and length %u",
+				   bvec->bv_offset, bvec->bv_len);
 
-		start = page_offset(page);
-		end = start + bvec->bv_offset + bvec->bv_len - 1;
+		start = page_offset(page) + bvec->bv_offset;
+		end = start + bvec->bv_len - 1;
 
 		if (first_bvec) {
 			btrfs_record_physical_zoned(inode, start, bio);
-- 
GitLab


From f02a85d2d551f1a34ac3a02b59d419767c97556b Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:41 +0800
Subject: [PATCH 3488/3804] btrfs: make btrfs_dirty_pages() to be subpage
 compatible

Since the extent io tree operations in btrfs_dirty_pages() are already
subpage compatible, we only need to make the page status update to use
subpage helpers.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a56a13999bd66..617af76e876e5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -28,6 +28,7 @@
 #include "compression.h"
 #include "delalloc-space.h"
 #include "reflink.h"
+#include "subpage.h"
 
 static struct kmem_cache *btrfs_inode_defrag_cachep;
 /*
@@ -482,6 +483,7 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
 	start_pos = round_down(pos, fs_info->sectorsize);
 	num_bytes = round_up(write_bytes + pos - start_pos,
 			     fs_info->sectorsize);
+	ASSERT(num_bytes <= U32_MAX);
 
 	end_of_last_block = start_pos + num_bytes - 1;
 
@@ -500,9 +502,10 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
 
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = pages[i];
-		SetPageUptodate(p);
+
+		btrfs_page_clamp_set_uptodate(fs_info, p, start_pos, num_bytes);
 		ClearPageChecked(p);
-		set_page_dirty(p);
+		btrfs_page_clamp_set_dirty(fs_info, p, start_pos, num_bytes);
 	}
 
 	/*
-- 
GitLab


From e38992be1f6cf3ed88169347b7d92cec40cc44d3 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:42 +0800
Subject: [PATCH 3489/3804] btrfs: make __process_pages_contig() to handle
 subpage dirty/error/writeback status

For __process_pages_contig() and process_one_page(), to handle subpage
we only need to pass bytenr in and call subpage helpers to handle
dirty/error/writeback status.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9edef629ace20..3ecd23c6b9559 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1816,10 +1816,16 @@ out:
  * Return -EGAIN if the we need to try again.
  * (For PAGE_LOCK case but got dirty page or page not belong to mapping)
  */
-static int process_one_page(struct address_space *mapping,
+static int process_one_page(struct btrfs_fs_info *fs_info,
+			    struct address_space *mapping,
 			    struct page *page, struct page *locked_page,
-			    unsigned long page_ops)
+			    unsigned long page_ops, u64 start, u64 end)
 {
+	u32 len;
+
+	ASSERT(end + 1 - start != 0 && end + 1 - start < U32_MAX);
+	len = end + 1 - start;
+
 	if (page_ops & PAGE_SET_ORDERED)
 		SetPageOrdered(page);
 
@@ -1827,13 +1833,13 @@ static int process_one_page(struct address_space *mapping,
 		return 1;
 
 	if (page_ops & PAGE_SET_ERROR)
-		SetPageError(page);
+		btrfs_page_clamp_set_error(fs_info, page, start, len);
 	if (page_ops & PAGE_START_WRITEBACK) {
-		clear_page_dirty_for_io(page);
-		set_page_writeback(page);
+		btrfs_page_clamp_clear_dirty(fs_info, page, start, len);
+		btrfs_page_clamp_set_writeback(fs_info, page, start, len);
 	}
 	if (page_ops & PAGE_END_WRITEBACK)
-		end_page_writeback(page);
+		btrfs_page_clamp_clear_writeback(fs_info, page, start, len);
 	if (page_ops & PAGE_LOCK) {
 		lock_page(page);
 		if (!PageDirty(page) || page->mapping != mapping) {
@@ -1851,6 +1857,7 @@ static int __process_pages_contig(struct address_space *mapping,
 				  u64 start, u64 end, unsigned long page_ops,
 				  u64 *processed_end)
 {
+	struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
 	pgoff_t start_index = start >> PAGE_SHIFT;
 	pgoff_t end_index = end >> PAGE_SHIFT;
 	pgoff_t index = start_index;
@@ -1887,8 +1894,9 @@ static int __process_pages_contig(struct address_space *mapping,
 		for (i = 0; i < found_pages; i++) {
 			int process_ret;
 
-			process_ret = process_one_page(mapping, pages[i],
-					locked_page, page_ops);
+			process_ret = process_one_page(fs_info, mapping,
+					pages[i], locked_page, page_ops,
+					start, end);
 			if (process_ret < 0) {
 				for (; i < found_pages; i++)
 					put_page(pages[i]);
-- 
GitLab


From 9047e3170a06f60a96a1d4a2f7762000657c7bbb Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:43 +0800
Subject: [PATCH 3490/3804] btrfs: make end_bio_extent_writepage() to be
 subpage compatible

Now in end_bio_extent_writepage(), the only subpage incompatible code is
the end_page_writeback().

Just call the subpage helpers.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3ecd23c6b9559..4220ef4ff7d08 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2853,7 +2853,8 @@ static void end_bio_extent_writepage(struct bio *bio)
 		}
 
 		end_extent_writepage(page, error, start, end);
-		end_page_writeback(page);
+
+		btrfs_page_clear_writeback(fs_info, page, start, bvec->bv_len);
 	}
 
 	bio_put(bio);
-- 
GitLab


From 1e1de38792e0ae28ac4a07628f20e42536c9202b Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:44 +0800
Subject: [PATCH 3491/3804] btrfs: make process_one_page() to handle subpage
 locking

Introduce a new data inodes specific subpage member, writers, to record
how many sectors are under page lock for delalloc writing.

This member acts pretty much the same as readers, except it's only for
delalloc writes.

This is important for delalloc code to trace which page can really be
freed, as we have cases like run_delalloc_nocow() where we may exit
processing nocow range inside a page, but need to exit to do cow half
way.
In that case, we need a way to determine if we can really unlock a full
page.

With the new btrfs_subpage::writers, there is a new requirement:
- Page locked by process_one_page() must be unlocked by
  process_one_page()
  There are still tons of call sites manually lock and unlock a page,
  without updating btrfs_subpage::writers.
  So if we lock a page through process_one_page() then it must be
  unlocked by process_one_page() to keep btrfs_subpage::writers
  consistent.

  This will be handled in next patch.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 10 +++--
 fs/btrfs/subpage.c   | 89 ++++++++++++++++++++++++++++++++++++++------
 fs/btrfs/subpage.h   | 10 +++++
 3 files changed, 94 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4220ef4ff7d08..8cb51da0e16dd 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1841,14 +1841,18 @@ static int process_one_page(struct btrfs_fs_info *fs_info,
 	if (page_ops & PAGE_END_WRITEBACK)
 		btrfs_page_clamp_clear_writeback(fs_info, page, start, len);
 	if (page_ops & PAGE_LOCK) {
-		lock_page(page);
+		int ret;
+
+		ret = btrfs_page_start_writer_lock(fs_info, page, start, len);
+		if (ret)
+			return ret;
 		if (!PageDirty(page) || page->mapping != mapping) {
-			unlock_page(page);
+			btrfs_page_end_writer_lock(fs_info, page, start, len);
 			return -EAGAIN;
 		}
 	}
 	if (page_ops & PAGE_UNLOCK)
-		unlock_page(page);
+		btrfs_page_end_writer_lock(fs_info, page, start, len);
 	return 0;
 }
 
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index a6cf1776f3f9b..69a0dbf353626 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -110,10 +110,12 @@ int btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
 	if (!*ret)
 		return -ENOMEM;
 	spin_lock_init(&(*ret)->lock);
-	if (type == BTRFS_SUBPAGE_METADATA)
+	if (type == BTRFS_SUBPAGE_METADATA) {
 		atomic_set(&(*ret)->eb_refs, 0);
-	else
+	} else {
 		atomic_set(&(*ret)->readers, 0);
+		atomic_set(&(*ret)->writers, 0);
+	}
 	return 0;
 }
 
@@ -203,6 +205,79 @@ void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
 		unlock_page(page);
 }
 
+static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len)
+{
+	u64 orig_start = *start;
+	u32 orig_len = *len;
+
+	*start = max_t(u64, page_offset(page), orig_start);
+	*len = min_t(u64, page_offset(page) + PAGE_SIZE,
+		     orig_start + orig_len) - *start;
+}
+
+void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len)
+{
+	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+	const int nbits = (len >> fs_info->sectorsize_bits);
+	int ret;
+
+	btrfs_subpage_assert(fs_info, page, start, len);
+
+	ASSERT(atomic_read(&subpage->readers) == 0);
+	ret = atomic_add_return(nbits, &subpage->writers);
+	ASSERT(ret == nbits);
+}
+
+bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len)
+{
+	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+	const int nbits = (len >> fs_info->sectorsize_bits);
+
+	btrfs_subpage_assert(fs_info, page, start, len);
+
+	ASSERT(atomic_read(&subpage->writers) >= nbits);
+	return atomic_sub_and_test(nbits, &subpage->writers);
+}
+
+/*
+ * Lock a page for delalloc page writeback.
+ *
+ * Return -EAGAIN if the page is not properly initialized.
+ * Return 0 with the page locked, and writer counter updated.
+ *
+ * Even with 0 returned, the page still need extra check to make sure
+ * it's really the correct page, as the caller is using
+ * find_get_pages_contig(), which can race with page invalidating.
+ */
+int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len)
+{
+	if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE) {
+		lock_page(page);
+		return 0;
+	}
+	lock_page(page);
+	if (!PagePrivate(page) || !page->private) {
+		unlock_page(page);
+		return -EAGAIN;
+	}
+	btrfs_subpage_clamp_range(page, &start, &len);
+	btrfs_subpage_start_writer(fs_info, page, start, len);
+	return 0;
+}
+
+void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len)
+{
+	if (unlikely(!fs_info) || fs_info->sectorsize == PAGE_SIZE)
+		return unlock_page(page);
+	btrfs_subpage_clamp_range(page, &start, &len);
+	if (btrfs_subpage_end_and_test_writer(fs_info, page, start, len))
+		unlock_page(page);
+}
+
 /*
  * Convert the [start, start + len) range into a u16 bitmap
  *
@@ -354,16 +429,6 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
 
-static void btrfs_subpage_clamp_range(struct page *page, u64 *start, u32 *len)
-{
-	u64 orig_start = *start;
-	u32 orig_len = *len;
-
-	*start = max_t(u64, page_offset(page), orig_start);
-	*len = min_t(u64, page_offset(page) + PAGE_SIZE,
-		     orig_start + orig_len) - *start;
-}
-
 /*
  * Unlike set/clear which is dependent on each page status, for test all bits
  * are tested in the same way.
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 291cb1932f273..9d087ab3244ec 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -33,6 +33,7 @@ struct btrfs_subpage {
 		/* Structures only used by data */
 		struct {
 			atomic_t readers;
+			atomic_t writers;
 		};
 	};
 };
@@ -63,6 +64,15 @@ void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
 void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len);
 
+void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len);
+bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len);
+int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len);
+void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len);
+
 /*
  * Template for subpage related operations.
  *
-- 
GitLab


From 6f17400bd92e82ad549ea5374ffc71e35e2e4ee5 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:45 +0800
Subject: [PATCH 3492/3804] btrfs: introduce helpers for subpage ordered status

This patch introduces the following functions to handle btrfs subpage
ordered (Private2) status:

- btrfs_subpage_set_ordered()
- btrfs_subpage_clear_ordered()
- btrfs_subpage_test_ordered()
  These helpers can only be called when the range is ensured to be
  inside the page.

- btrfs_page_set_ordered()
- btrfs_page_clear_ordered()
- btrfs_page_test_ordered()
  These helpers can handle both regular sector size and subpage without
  problem.

These functions are here to coordinate btrfs_invalidatepage() with
btrfs_writepage_endio_finish_ordered(), to make sure only one of those
functions can finish the ordered extent.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/subpage.c | 29 +++++++++++++++++++++++++++++
 fs/btrfs/subpage.h |  4 ++++
 2 files changed, 33 insertions(+)

diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 69a0dbf353626..7d72eaf5f972b 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -429,6 +429,32 @@ void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
 	spin_unlock_irqrestore(&subpage->lock, flags);
 }
 
+void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len)
+{
+	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned long flags;
+
+	spin_lock_irqsave(&subpage->lock, flags);
+	subpage->ordered_bitmap |= tmp;
+	SetPageOrdered(page);
+	spin_unlock_irqrestore(&subpage->lock, flags);
+}
+
+void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
+		struct page *page, u64 start, u32 len)
+{
+	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+	const u16 tmp = btrfs_subpage_calc_bitmap(fs_info, page, start, len);
+	unsigned long flags;
+
+	spin_lock_irqsave(&subpage->lock, flags);
+	subpage->ordered_bitmap &= ~tmp;
+	if (subpage->ordered_bitmap == 0)
+		ClearPageOrdered(page);
+	spin_unlock_irqrestore(&subpage->lock, flags);
+}
 /*
  * Unlike set/clear which is dependent on each page status, for test all bits
  * are tested in the same way.
@@ -451,6 +477,7 @@ IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate);
 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error);
 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
 IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
+IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
 
 /*
  * Note that, in selftests (extent-io-tests), we can have empty fs_info passed
@@ -519,3 +546,5 @@ IMPLEMENT_BTRFS_PAGE_OPS(dirty, set_page_dirty, clear_page_dirty_for_io,
 			 PageDirty);
 IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback,
 			 PageWriteback);
+IMPLEMENT_BTRFS_PAGE_OPS(ordered, SetPageOrdered, ClearPageOrdered,
+			 PageOrdered);
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 9d087ab3244ec..65298a5efe7bb 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -34,6 +34,9 @@ struct btrfs_subpage {
 		struct {
 			atomic_t readers;
 			atomic_t writers;
+
+			/* Tracke pending ordered extent in this sector */
+			u16 ordered_bitmap;
 		};
 	};
 };
@@ -111,6 +114,7 @@ DECLARE_BTRFS_SUBPAGE_OPS(uptodate);
 DECLARE_BTRFS_SUBPAGE_OPS(error);
 DECLARE_BTRFS_SUBPAGE_OPS(dirty);
 DECLARE_BTRFS_SUBPAGE_OPS(writeback);
+DECLARE_BTRFS_SUBPAGE_OPS(ordered);
 
 bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
 		struct page *page, u64 start, u32 len);
-- 
GitLab


From b945a4637ec72a8ed0e526580a136d24f11abde1 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:46 +0800
Subject: [PATCH 3493/3804] btrfs: make page Ordered bit to be subpage
 compatible

This involves the following modification:

- Ordered extent creation
  This is done in process_one_page(), now PAGE_SET_ORDERED will call
  subpage helper to do the work.

- endio functions
  This is done in btrfs_mark_ordered_io_finished().

- btrfs_invalidatepage()

- btrfs_cleanup_ordered_extents()
  Use the subpage page helper, and add an extra branch to exit if the
  locked page have covered the full range.

Now the usage of page Ordered flag for ordered extent accounting is fully
subpage compatible.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c    |  2 +-
 fs/btrfs/inode.c        | 19 ++++++++++++++-----
 fs/btrfs/ordered-data.c |  5 +++--
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8cb51da0e16dd..37a24b2e63ef1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1827,7 +1827,7 @@ static int process_one_page(struct btrfs_fs_info *fs_info,
 	len = end + 1 - start;
 
 	if (page_ops & PAGE_SET_ORDERED)
-		SetPageOrdered(page);
+		btrfs_page_clamp_set_ordered(fs_info, page, start, len);
 
 	if (page == locked_page)
 		return 1;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2eedcf65b8aa9..52f31d59a38e3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -51,6 +51,7 @@
 #include "block-group.h"
 #include "space-info.h"
 #include "zoned.h"
+#include "subpage.h"
 
 struct btrfs_iget_args {
 	u64 ino;
@@ -191,18 +192,22 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
 		 * range, then __endio_write_update_ordered() will handle
 		 * the ordered extent accounting for the range.
 		 */
-		ClearPageOrdered(page);
+		btrfs_page_clamp_clear_ordered(inode->root->fs_info, page,
+					       offset, bytes);
 		put_page(page);
 	}
 
+	/* The locked page covers the full range, nothing needs to be done */
+	if (bytes + offset <= page_offset(locked_page) + PAGE_SIZE)
+		return;
 	/*
 	 * In case this page belongs to the delalloc range being instantiated
 	 * then skip it, since the first page of a range is going to be
 	 * properly cleaned up by the caller of run_delalloc_range
 	 */
 	if (page_start >= offset && page_end <= (offset + bytes - 1)) {
-		offset += PAGE_SIZE;
-		bytes -= PAGE_SIZE;
+		bytes = offset + bytes - page_offset(locked_page) - PAGE_SIZE;
+		offset = page_offset(locked_page) + PAGE_SIZE;
 	}
 
 	return __endio_write_update_ordered(inode, offset, bytes, false);
@@ -8339,6 +8344,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 				 unsigned int length)
 {
 	struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct extent_io_tree *tree = &inode->io_tree;
 	struct extent_state *cached_state = NULL;
 	u64 page_start = page_offset(page);
@@ -8374,6 +8380,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 		struct btrfs_ordered_extent *ordered;
 		bool delete_states;
 		u64 range_end;
+		u32 range_len;
 
 		ordered = btrfs_lookup_first_ordered_range(inode, cur,
 							   page_end + 1 - cur);
@@ -8400,7 +8407,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 
 		range_end = min(ordered->file_offset + ordered->num_bytes - 1,
 				page_end);
-		if (!PageOrdered(page)) {
+		ASSERT(range_end + 1 - cur < U32_MAX);
+		range_len = range_end + 1 - cur;
+		if (!btrfs_page_test_ordered(fs_info, page, cur, range_len)) {
 			/*
 			 * If Ordered (Private2) is cleared, it means endio has
 			 * already been executed for the range.
@@ -8410,7 +8419,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 			delete_states = false;
 			goto next;
 		}
-		ClearPageOrdered(page);
+		btrfs_page_clear_ordered(fs_info, page, cur, range_len);
 
 		/*
 		 * IO on this page will never be started, so we need to account
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index b1b377ad99a04..6eb41b7c0c843 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -16,6 +16,7 @@
 #include "compression.h"
 #include "delalloc-space.h"
 #include "qgroup.h"
+#include "subpage.h"
 
 static struct kmem_cache *btrfs_ordered_extent_cache;
 
@@ -395,11 +396,11 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
 			 *
 			 * If there's no such bit, we need to skip to next range.
 			 */
-			if (!PageOrdered(page)) {
+			if (!btrfs_page_test_ordered(fs_info, page, cur, len)) {
 				cur += len;
 				continue;
 			}
-			ClearPageOrdered(page);
+			btrfs_page_clear_ordered(fs_info, page, cur, len);
 		}
 
 		/* Now we're fine to update the accounting */
-- 
GitLab


From a33a8e9afcab270bfd8081ded8efb8c1e9eac7f3 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:47 +0800
Subject: [PATCH 3494/3804] btrfs: update locked page dirty/writeback/error
 bits in __process_pages_contig

When __process_pages_contig() gets called for
extent_clear_unlock_delalloc(), if we hit the locked page, only Private2
bit is updated, but dirty/writeback/error bits are all skipped.

There are several call sites that call extent_clear_unlock_delalloc()
with locked_page and PAGE_CLEAR_DIRTY/PAGE_SET_WRITEBACK/PAGE_END_WRITEBACK

- cow_file_range()
- run_delalloc_nocow()
- cow_file_range_async()
  All for their error handling branches.

For those call sites, since we skip the locked page for
dirty/error/writeback bit update, the locked page will still have its
subpage dirty bit remaining.

Normally it's the call sites which locked the page to handle the locked
page, but it won't hurt if we also do the update.

Especially there are already other call sites doing the same thing by
manually passing NULL as locked_page.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 37a24b2e63ef1..7dec08156ff0f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1828,10 +1828,6 @@ static int process_one_page(struct btrfs_fs_info *fs_info,
 
 	if (page_ops & PAGE_SET_ORDERED)
 		btrfs_page_clamp_set_ordered(fs_info, page, start, len);
-
-	if (page == locked_page)
-		return 1;
-
 	if (page_ops & PAGE_SET_ERROR)
 		btrfs_page_clamp_set_error(fs_info, page, start, len);
 	if (page_ops & PAGE_START_WRITEBACK) {
@@ -1840,6 +1836,10 @@ static int process_one_page(struct btrfs_fs_info *fs_info,
 	}
 	if (page_ops & PAGE_END_WRITEBACK)
 		btrfs_page_clamp_clear_writeback(fs_info, page, start, len);
+
+	if (page == locked_page)
+		return 1;
+
 	if (page_ops & PAGE_LOCK) {
 		int ret;
 
-- 
GitLab


From 4750af3bbe5d975951b09afc61f18c7b29db7d44 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:48 +0800
Subject: [PATCH 3495/3804] btrfs: prevent extent_clear_unlock_delalloc() to
 unlock page not locked by __process_pages_contig()

In cow_file_range(), after we have succeeded creating an inline extent,
we unlock the page with extent_clear_unlock_delalloc() by passing
locked_page == NULL.

For sectorsize == PAGE_SIZE case, this is just making the page lock and
unlock harder to grab.

But for incoming subpage case, it can be a big problem.

For incoming subpage case, page locking have two entry points:

- __process_pages_contig()
  In that case, we know exactly the range we want to lock (which only
  requires sector alignment).
  To handle the subpage requirement, we introduce btrfs_subpage::writers
  to page::private, and will update it in __process_pages_contig().

- Other directly lock/unlock_page() call sites
  Those won't touch btrfs_subpage::writers at all.

This means, page locked by __process_pages_contig() can only be unlocked
by __process_pages_contig().
Thankfully we already have the existing infrastructure in the form of
@locked_page in various call sites.

Unfortunately, extent_clear_unlock_delalloc() in cow_file_range() after
creating an inline extent is the exception.
It intentionally call extent_clear_unlock_delalloc() with locked_page ==
NULL, to also unlock current page (and clear its dirty/writeback bits).

To co-operate with incoming subpage modifications, and make the page
lock/unlock pair easier to understand, this patch will still call
extent_clear_unlock_delalloc() with locked_page, and only unlock the
page in __extent_writepage().

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 52f31d59a38e3..9f7b7110f629e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1091,7 +1091,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 			 * our outstanding extent for clearing delalloc for this
 			 * range.
 			 */
-			extent_clear_unlock_delalloc(inode, start, end, NULL,
+			extent_clear_unlock_delalloc(inode, start, end,
+				     locked_page,
 				     EXTENT_LOCKED | EXTENT_DELALLOC |
 				     EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
 				     EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
@@ -1099,6 +1100,19 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
 			*nr_written = *nr_written +
 			     (end - start + PAGE_SIZE) / PAGE_SIZE;
 			*page_started = 1;
+			/*
+			 * locked_page is locked by the caller of
+			 * writepage_delalloc(), not locked by
+			 * __process_pages_contig().
+			 *
+			 * We can't let __process_pages_contig() to unlock it,
+			 * as it doesn't have any subpage::writers recorded.
+			 *
+			 * Here we manually unlock the page, since the caller
+			 * can't use page_started to determine if it's an
+			 * inline extent or a compressed extent.
+			 */
+			unlock_page(locked_page);
 			goto out;
 		} else if (ret < 0) {
 			goto out_unlock;
-- 
GitLab


From d2a9106448abad5646591795c8962ac043db4f89 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:49 +0800
Subject: [PATCH 3496/3804] btrfs: make btrfs_set_range_writeback() subpage
 compatible

Function btrfs_set_range_writeback() currently just sets the page
writeback unconditionally.

Change it to call the subpage helper so that we can handle both cases
well.

Since the subpage helpers needs btrfs_fs_info, also change the parameter
to accept btrfs_inode.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h     |  2 +-
 fs/btrfs/extent_io.c |  3 +--
 fs/btrfs/inode.c     | 12 ++++++++----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c63980977fa4d..82e58f2fbe0af 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3157,7 +3157,7 @@ void btrfs_split_delalloc_extent(struct inode *inode,
 				 struct extent_state *orig, u64 split);
 int btrfs_bio_fits_in_stripe(struct page *page, size_t size, struct bio *bio,
 			     unsigned long bio_flags);
-void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end);
+void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
 vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_evict_inode(struct inode *inode);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7dec08156ff0f..2fb26193b7957 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3800,7 +3800,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 				 int *nr_ret)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
-	struct extent_io_tree *tree = &inode->io_tree;
 	u64 start = page_offset(page);
 	u64 end = start + PAGE_SIZE - 1;
 	u64 cur = start;
@@ -3879,7 +3878,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 			continue;
 		}
 
-		btrfs_set_range_writeback(tree, cur, cur + iosize - 1);
+		btrfs_set_range_writeback(inode, cur, cur + iosize - 1);
 		if (!PageWriteback(page)) {
 			btrfs_err(inode->root->fs_info,
 				   "page %lu not writeback, cur %llu end %llu",
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9f7b7110f629e..6d905a7b87d03 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -10234,17 +10234,21 @@ out:
 	return ret;
 }
 
-void btrfs_set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
 {
-	struct inode *inode = tree->private_data;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	unsigned long index = start >> PAGE_SHIFT;
 	unsigned long end_index = end >> PAGE_SHIFT;
 	struct page *page;
+	u32 len;
 
+	ASSERT(end + 1 - start <= U32_MAX);
+	len = end + 1 - start;
 	while (index <= end_index) {
-		page = find_get_page(inode->i_mapping, index);
+		page = find_get_page(inode->vfs_inode.i_mapping, index);
 		ASSERT(page); /* Pages should be in the extent_io_tree */
-		set_page_writeback(page);
+
+		btrfs_page_set_writeback(fs_info, page, start, len);
 		put_page(page);
 		index++;
 	}
-- 
GitLab


From c5ef5c6c733a087fc3f8b298010d7e6911bff1e3 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:50 +0800
Subject: [PATCH 3497/3804] btrfs: make __extent_writepage_io() only submit
 dirty range for subpage

__extent_writepage_io() function originally just iterates through all
the extent maps of a page, and submits any regular extents.

This is fine for sectorsize == PAGE_SIZE case, as if a page is dirty, we
need to submit the only sector contained in the page.

But for subpage case, one dirty page can contain several clean sectors
with at least one dirty sector.

If __extent_writepage_io() still submit all regular extent maps, it can
submit data which is already written to disk.
And since such already written data won't have corresponding ordered
extents, it will trigger a BUG_ON() in btrfs_csum_one_bio().

Change the behavior of __extent_writepage_io() by finding the first
dirty byte in the page, and only submit the dirty range other than the
full extent.

Since we're also here, also modify the following calls to be subpage
compatible:

- SetPageError()
- end_page_writeback()

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 80 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2fb26193b7957..1acbb7f1e6e3e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3783,6 +3783,54 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
 	return 0;
 }
 
+/*
+ * Find the first byte we need to write.
+ *
+ * For subpage, one page can contain several sectors, and
+ * __extent_writepage_io() will just grab all extent maps in the page
+ * range and try to submit all non-inline/non-compressed extents.
+ *
+ * This is a big problem for subpage, we shouldn't re-submit already written
+ * data at all.
+ * This function will lookup subpage dirty bit to find which range we really
+ * need to submit.
+ *
+ * Return the next dirty range in [@start, @end).
+ * If no dirty range is found, @start will be page_offset(page) + PAGE_SIZE.
+ */
+static void find_next_dirty_byte(struct btrfs_fs_info *fs_info,
+				 struct page *page, u64 *start, u64 *end)
+{
+	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
+	u64 orig_start = *start;
+	/* Declare as unsigned long so we can use bitmap ops */
+	unsigned long dirty_bitmap;
+	unsigned long flags;
+	int nbits = (orig_start - page_offset(page)) >> fs_info->sectorsize_bits;
+	int range_start_bit = nbits;
+	int range_end_bit;
+
+	/*
+	 * For regular sector size == page size case, since one page only
+	 * contains one sector, we return the page offset directly.
+	 */
+	if (fs_info->sectorsize == PAGE_SIZE) {
+		*start = page_offset(page);
+		*end = page_offset(page) + PAGE_SIZE;
+		return;
+	}
+
+	/* We should have the page locked, but just in case */
+	spin_lock_irqsave(&subpage->lock, flags);
+	dirty_bitmap = subpage->dirty_bitmap;
+	spin_unlock_irqrestore(&subpage->lock, flags);
+
+	bitmap_next_set_region(&dirty_bitmap, &range_start_bit, &range_end_bit,
+			       BTRFS_SUBPAGE_BITMAP_SIZE);
+	*start = page_offset(page) + range_start_bit * fs_info->sectorsize;
+	*end = page_offset(page) + range_end_bit * fs_info->sectorsize;
+}
+
 /*
  * helper for __extent_writepage.  This calls the writepage start hooks,
  * and does the loop to map the page into extents and bios.
@@ -3830,6 +3878,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 	while (cur <= end) {
 		u64 disk_bytenr;
 		u64 em_end;
+		u64 dirty_range_start = cur;
+		u64 dirty_range_end;
 		u32 iosize;
 
 		if (cur >= i_size) {
@@ -3837,9 +3887,17 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 							     end, 1);
 			break;
 		}
+
+		find_next_dirty_byte(fs_info, page, &dirty_range_start,
+				     &dirty_range_end);
+		if (cur < dirty_range_start) {
+			cur = dirty_range_start;
+			continue;
+		}
+
 		em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
 		if (IS_ERR_OR_NULL(em)) {
-			SetPageError(page);
+			btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
 			ret = PTR_ERR_OR_ZERO(em);
 			break;
 		}
@@ -3854,8 +3912,11 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 		disk_bytenr = em->block_start + extent_offset;
 
-		/* Note that em_end from extent_map_end() is exclusive */
-		iosize = min(em_end, end + 1) - cur;
+		/*
+		 * Note that em_end from extent_map_end() and dirty_range_end from
+		 * find_next_dirty_byte() are all exclusive
+		 */
+		iosize = min(min(em_end, end + 1), dirty_range_end) - cur;
 
 		if (btrfs_use_zone_append(inode, em->block_start))
 			opf = REQ_OP_ZONE_APPEND;
@@ -3885,6 +3946,14 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 			       page->index, cur, end);
 		}
 
+		/*
+		 * Although the PageDirty bit is cleared before entering this
+		 * function, subpage dirty bit is not cleared.
+		 * So clear subpage dirty bit here so next time we won't submit
+		 * page for range already written to disk.
+		 */
+		btrfs_page_clear_dirty(fs_info, page, cur, iosize);
+
 		ret = submit_extent_page(opf | write_flags, wbc,
 					 &epd->bio_ctrl, page,
 					 disk_bytenr, iosize,
@@ -3892,9 +3961,10 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 					 end_bio_extent_writepage,
 					 0, 0, false);
 		if (ret) {
-			SetPageError(page);
+			btrfs_page_set_error(fs_info, page, cur, iosize);
 			if (PageWriteback(page))
-				end_page_writeback(page);
+				btrfs_page_clear_writeback(fs_info, page, cur,
+							   iosize);
 		}
 
 		cur += iosize;
-- 
GitLab


From 6c9ac8be458152a6316cf28fcd52c7f38f7ec8ec Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:51 +0800
Subject: [PATCH 3498/3804] btrfs: make btrfs_truncate_block() to be subpage
 compatible

btrfs_truncate_block() itself is already mostly subpage compatible, the
only missing part is the page dirtying code.

Currently if we have a sector that needs to be truncated, we set the
sector aligned range delalloc, then set the full page dirty.

The problem is, current subpage code requires subpage dirty bit to be
set, or __extent_writepage_io() won't submit bio, thus leads to ordered
extent never to finish.

So this patch will make btrfs_truncate_block() to call
btrfs_page_set_dirty() helper to replace set_page_dirty() to fix the
problem.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6d905a7b87d03..88838ef22413a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4965,7 +4965,7 @@ again:
 		flush_dcache_page(page);
 	}
 	ClearPageChecked(page);
-	set_page_dirty(page);
+	btrfs_page_set_dirty(fs_info, page, block_start, block_end + 1 - block_start);
 	unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
 
 	if (only_release_metadata)
-- 
GitLab


From 2d8ec40ee46d211fa8396678210faf19e013b093 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:52 +0800
Subject: [PATCH 3499/3804] btrfs: make btrfs_page_mkwrite() to be subpage
 compatible

Only set_page_dirty() and SetPageUptodate() is not subpage compatible.
Convert them to subpage helpers, so that __extent_writepage_io() can
submit page content correctly.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 88838ef22413a..0d67a14114e7d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8651,8 +8651,8 @@ again:
 		flush_dcache_page(page);
 	}
 	ClearPageChecked(page);
-	set_page_dirty(page);
-	SetPageUptodate(page);
+	btrfs_page_set_dirty(fs_info, page, page_start, end + 1 - page_start);
+	btrfs_page_set_uptodate(fs_info, page, page_start, end + 1 - page_start);
 
 	btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
 
-- 
GitLab


From 3115deb381e9242527017700cc7a946799d3af25 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:53 +0800
Subject: [PATCH 3500/3804] btrfs: reflink: make copy_inline_to_page() to be
 subpage compatible

The modifications are:

- Page copy destination
  For subpage case, one page can contain multiple sectors, thus we can
  no longer expect the memcpy_to_page()/btrfs_decompress() to copy
  data into page offset 0.
  The correct offset is offset_in_page(file_offset) now, which should
  handle both regular sectorsize and subpage cases well.

- Page status update
  Now we need to use subpage helper to handle the page status update.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/reflink.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 9178da07cc9c9..9b0814318e726 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -7,6 +7,7 @@
 #include "delalloc-space.h"
 #include "reflink.h"
 #include "transaction.h"
+#include "subpage.h"
 
 #define BTRFS_MAX_DEDUPE_LEN	SZ_16M
 
@@ -52,7 +53,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
 			       const u64 datal,
 			       const u8 comp_type)
 {
-	const u64 block_size = btrfs_inode_sectorsize(inode);
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	const u32 block_size = fs_info->sectorsize;
 	const u64 range_end = file_offset + block_size - 1;
 	const size_t inline_size = size - btrfs_file_extent_calc_inline_size(0);
 	char *data_start = inline_data + btrfs_file_extent_calc_inline_size(0);
@@ -106,10 +108,12 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
 	set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
 
 	if (comp_type == BTRFS_COMPRESS_NONE) {
-		memcpy_to_page(page, 0, data_start, datal);
+		memcpy_to_page(page, offset_in_page(file_offset), data_start,
+			       datal);
 		flush_dcache_page(page);
 	} else {
-		ret = btrfs_decompress(comp_type, data_start, page, 0,
+		ret = btrfs_decompress(comp_type, data_start, page,
+				       offset_in_page(file_offset),
 				       inline_size, datal);
 		if (ret)
 			goto out_unlock;
@@ -133,9 +137,9 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
 		flush_dcache_page(page);
 	}
 
-	SetPageUptodate(page);
+	btrfs_page_set_uptodate(fs_info, page, file_offset, block_size);
 	ClearPageChecked(page);
-	set_page_dirty(page);
+	btrfs_page_set_dirty(fs_info, page, file_offset, block_size);
 out_unlock:
 	if (page) {
 		unlock_page(page);
-- 
GitLab


From 0528476b6ac7832f31e2ed740a57ae31316b124e Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:54 +0800
Subject: [PATCH 3501/3804] btrfs: fix the filemap_range_has_page() call in
 btrfs_punch_hole_lock_range()

[BUG]
With current subpage RW support, the following script can hang the fs
with 64K page size.

 # mkfs.btrfs -f -s 4k $dev
 # mount $dev -o nospace_cache $mnt
 # fsstress -w -n 50 -p 1 -s 1607749395 -d $mnt

The kernel will do an infinite loop in btrfs_punch_hole_lock_range().

[CAUSE]
In btrfs_punch_hole_lock_range() we:

- Truncate page cache range
- Lock extent io tree
- Wait any ordered extents in the range.

We exit the loop until we meet all the following conditions:

- No ordered extent in the lock range
- No page is in the lock range

The latter condition has a pitfall, it only works for sector size ==
PAGE_SIZE case.

While can't handle the following subpage case:

  0       32K     64K     96K     128K
  |       |///////||//////|       ||

lockstart=32K
lockend=96K - 1

In this case, although the range crosses 2 pages,
truncate_pagecache_range() will invalidate no page at all, but only zero
the [32K, 96K) range of the two pages.

Thus filemap_range_has_page(32K, 96K-1) will always return true, thus we
will never meet the loop exit condition.

[FIX]
Fix the problem by doing page alignment for the lock range.

Function filemap_range_has_page() has already handled lend < lstart
case, we only need to round up @lockstart, and round_down @lockend for
truncate_pagecache_range().

This modification should not change any thing for sector size ==
PAGE_SIZE case, as in that case our range is already page aligned.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 617af76e876e5..d0081b2d47ab6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2486,6 +2486,17 @@ static int btrfs_punch_hole_lock_range(struct inode *inode,
 				       const u64 lockend,
 				       struct extent_state **cached_state)
 {
+	/*
+	 * For subpage case, if the range is not at page boundary, we could
+	 * have pages at the leading/tailing part of the range.
+	 * This could lead to dead loop since filemap_range_has_page()
+	 * will always return true.
+	 * So here we need to do extra page alignment for
+	 * filemap_range_has_page().
+	 */
+	const u64 page_lockstart = round_up(lockstart, PAGE_SIZE);
+	const u64 page_lockend = round_down(lockend + 1, PAGE_SIZE) - 1;
+
 	while (1) {
 		struct btrfs_ordered_extent *ordered;
 		int ret;
@@ -2506,7 +2517,7 @@ static int btrfs_punch_hole_lock_range(struct inode *inode,
 		    (ordered->file_offset + ordered->num_bytes <= lockstart ||
 		     ordered->file_offset > lockend)) &&
 		     !filemap_range_has_page(inode->i_mapping,
-					     lockstart, lockend)) {
+					     page_lockstart, page_lockend)) {
 			if (ordered)
 				btrfs_put_ordered_extent(ordered);
 			break;
-- 
GitLab


From bcd77455d590eaa0422a5e84ae852007cfce574a Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 31 May 2021 16:50:55 +0800
Subject: [PATCH 3502/3804] btrfs: don't clear page extent mapped if we're not
 invalidating the full page

[BUG]
With current btrfs subpage rw support, the following script can lead to
fs hang:

  $ mkfs.btrfs -f -s 4k $dev
  $ mount $dev -o nospace_cache $mnt
  $ fsstress -w -n 100 -p 1 -s 1608140256 -v -d $mnt

The fs will hang at btrfs_start_ordered_extent().

[CAUSE]
In above test case, btrfs_invalidate() will be called with the following
parameters:

  offset = 0 length = 53248 page dirty = 1 subpage dirty bitmap = 0x2000

Since @offset is 0, btrfs_invalidate() will try to invalidate the full
page, and finally call clear_page_extent_mapped() which will detach
subpage structure from the page.

And since the page no longer has subpage structure, the subpage dirty
bitmap will be cleared, preventing the dirty range from being written
back, thus no way to wake up the ordered extent.

[FIX]
Just follow other filesystems, only to invalidate the page if the range
covers the full page.

There are cases like truncate_setsize() which can call
btrfs_invalidatepage() with offset == 0 and length != 0 for the last
page of an inode.

Although the old code will still try to invalidate the full page, we are
still safe to just wait for ordered extent to finish.
So it shouldn't cause extra problems.

Tested-by: Ritesh Harjani <riteshh@linux.ibm.com> # [ppc64]
Tested-by: Anand Jain <anand.jain@oracle.com> # [aarch64]
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0d67a14114e7d..794d906cba6c1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8381,7 +8381,19 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 	 */
 	wait_on_page_writeback(page);
 
-	if (offset) {
+	/*
+	 * For subpage case, we have call sites like
+	 * btrfs_punch_hole_lock_range() which passes range not aligned to
+	 * sectorsize.
+	 * If the range doesn't cover the full page, we don't need to and
+	 * shouldn't clear page extent mapped, as page->private can still
+	 * record subpage dirty bits for other part of the range.
+	 *
+	 * For cases that can invalidate the full even the range doesn't
+	 * cover the full page, like invalidating the last page, we're
+	 * still safe to wait for ordered extent to finish.
+	 */
+	if (!(offset == 0 && length == PAGE_SIZE)) {
 		btrfs_releasepage(page, GFP_NOFS);
 		return;
 	}
-- 
GitLab


From 3d078efae6f3854eadf9def9cbb4f30389c0c504 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Mon, 7 Jun 2021 17:02:58 +0800
Subject: [PATCH 3503/3804] btrfs: subpage: fix a rare race between metadata
 endio and eb freeing

[BUG]
There is a very rare ASSERT() triggering during full fstests run for
subpage rw support.

No other reproducer so far.

The ASSERT() gets triggered for metadata read in
btrfs_page_set_uptodate() inside end_page_read().

[CAUSE]
There is still a small race window for metadata only, the race could
happen like this:

                T1                  |              T2
------------------------------------+-----------------------------
end_bio_extent_readpage()           |
|- btrfs_validate_metadata_buffer() |
|  |- free_extent_buffer()          |
|     Still have 2 refs             |
|- end_page_read()                  |
   |- if (unlikely(PagePrivate())   |
   |  The page still has Private    |
   |                                | free_extent_buffer()
   |                                | |  Only one ref 1, will be
   |                                | |  released
   |                                | |- detach_extent_buffer_page()
   |                                |    |- btrfs_detach_subpage()
   |- btrfs_set_page_uptodate()     |
      The page no longer has Private|
      >>> ASSERT() triggered <<<    |

This race window is super small, thus pretty hard to hit, even with so
many runs of fstests.

But the race window is still there, we have to go another way to solve
it other than relying on random PagePrivate() check.

Data path is not affected, as it will lock the page before reading,
while unlocking the page after the last read has finished, thus no race
window.

[FIX]
This patch will fix the bug by repurposing btrfs_subpage::readers.

Now btrfs_subpage::readers will be a member shared by both metadata and
data.

For metadata path, we don't do the page unlock as metadata only relies
on extent locking.

At the same time, teach page_range_has_eb() to take
btrfs_subpage::readers into consideration.

So that even if the last eb of a page gets freed, page::private won't be
detached as long as there still are pending end_page_read() calls.

By this we eliminate the race window, this will slight increase the
metadata memory usage, as the page may not be released as frequently as
usual.  But it should not be a big deal.

The code got introduced in ("btrfs: submit read time repair only for
each corrupted sector"), but the fix is in a separate patch to keep the
problem description and the crash is rare so it should not hurt
bisectability.

Signed-off-by: Qu Wegruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent_io.c | 30 +++++++++---------------------
 fs/btrfs/subpage.c   | 19 +++++++++++++++----
 fs/btrfs/subpage.h   |  9 ++++++++-
 3 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1acbb7f1e6e3e..9e81d25dea70e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2687,21 +2687,6 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
 	ASSERT(page_offset(page) <= start &&
 	       start + len <= page_offset(page) + PAGE_SIZE);
 
-	/*
-	 * For subapge metadata case, all btrfs_page_* helpers need page to
-	 * have page::private populated.
-	 * But we can have rare case where the last eb in the page is only
-	 * referred by the IO, and it gets released immedately after it's
-	 * read and verified.
-	 *
-	 * This can detach the page private completely.
-	 * In that case, we can just skip the page status update completely,
-	 * as the page has no eb anymore.
-	 */
-	if (fs_info->sectorsize < PAGE_SIZE && unlikely(!PagePrivate(page))) {
-		ASSERT(!is_data_inode(page->mapping->host));
-		return;
-	}
 	if (uptodate) {
 		btrfs_page_set_uptodate(fs_info, page, start, len);
 	} else {
@@ -2711,11 +2696,7 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
 
 	if (fs_info->sectorsize == PAGE_SIZE)
 		unlock_page(page);
-	else if (is_data_inode(page->mapping->host))
-		/*
-		 * For subpage data, unlock the page if we're the last reader.
-		 * For subpage metadata, page lock is not utilized for read.
-		 */
+	else
 		btrfs_subpage_end_reader(fs_info, page, start, len);
 }
 
@@ -5603,6 +5584,12 @@ static bool page_range_has_eb(struct btrfs_fs_info *fs_info, struct page *page)
 		subpage = (struct btrfs_subpage *)page->private;
 		if (atomic_read(&subpage->eb_refs))
 			return true;
+		/*
+		 * Even there is no eb refs here, we may still have
+		 * end_page_read() call relying on page::private.
+		 */
+		if (atomic_read(&subpage->readers))
+			return true;
 	}
 	return false;
 }
@@ -5663,7 +5650,7 @@ static void detach_extent_buffer_page(struct extent_buffer *eb, struct page *pag
 
 	/*
 	 * We can only detach the page private if there are no other ebs in the
-	 * page range.
+	 * page range and no unfinished IO.
 	 */
 	if (!page_range_has_eb(fs_info, page))
 		btrfs_detach_subpage(fs_info, page);
@@ -6381,6 +6368,7 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait,
 	check_buffer_tree_ref(eb);
 	btrfs_subpage_clear_error(fs_info, page, eb->start, eb->len);
 
+	btrfs_subpage_start_reader(fs_info, page, eb->start, eb->len);
 	ret = submit_extent_page(REQ_OP_READ | REQ_META, NULL, &bio_ctrl,
 				 page, eb->start, eb->len,
 				 eb->start - page_offset(page),
diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c
index 7d72eaf5f972b..640bcd21bf289 100644
--- a/fs/btrfs/subpage.c
+++ b/fs/btrfs/subpage.c
@@ -3,6 +3,7 @@
 #include <linux/slab.h>
 #include "ctree.h"
 #include "subpage.h"
+#include "btrfs_inode.h"
 
 /*
  * Subpage (sectorsize < PAGE_SIZE) support overview:
@@ -185,12 +186,10 @@ void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
 	const int nbits = len >> fs_info->sectorsize_bits;
-	int ret;
 
 	btrfs_subpage_assert(fs_info, page, start, len);
 
-	ret = atomic_add_return(nbits, &subpage->readers);
-	ASSERT(ret == nbits);
+	atomic_add(nbits, &subpage->readers);
 }
 
 void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
@@ -198,10 +197,22 @@ void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
 {
 	struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
 	const int nbits = len >> fs_info->sectorsize_bits;
+	bool is_data;
+	bool last;
 
 	btrfs_subpage_assert(fs_info, page, start, len);
+	is_data = is_data_inode(page->mapping->host);
 	ASSERT(atomic_read(&subpage->readers) >= nbits);
-	if (atomic_sub_and_test(nbits, &subpage->readers))
+	last = atomic_sub_and_test(nbits, &subpage->readers);
+
+	/*
+	 * For data we need to unlock the page if the last read has finished.
+	 *
+	 * And please don't replace @last with atomic_sub_and_test() call
+	 * inside if () condition.
+	 * As we want the atomic_sub_and_test() to be always executed.
+	 */
+	if (is_data && last)
 		unlock_page(page);
 }
 
diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h
index 65298a5efe7bb..4d7aca85d9158 100644
--- a/fs/btrfs/subpage.h
+++ b/fs/btrfs/subpage.h
@@ -22,6 +22,14 @@ struct btrfs_subpage {
 	u16 error_bitmap;
 	u16 dirty_bitmap;
 	u16 writeback_bitmap;
+	/*
+	 * Both data and metadata needs to track how many readers are for the
+	 * page.
+	 * Data relies on @readers to unlock the page when last reader finished.
+	 * While metadata doesn't need page unlock, it needs to prevent
+	 * page::private get cleared before the last end_page_read().
+	 */
+	atomic_t readers;
 	union {
 		/*
 		 * Structures only used by metadata
@@ -32,7 +40,6 @@ struct btrfs_subpage {
 		atomic_t eb_refs;
 		/* Structures only used by data */
 		struct {
-			atomic_t readers;
 			atomic_t writers;
 
 			/* Tracke pending ordered extent in this sector */
-- 
GitLab


From 77d255348bb2ce9a174cca020aa38f2ce82cb2bc Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 1 Jun 2021 09:08:15 +0300
Subject: [PATCH 3504/3804] btrfs: eliminate insert label in add_falloc_range

By way of inverting the list_empty conditional the insert label can be
eliminated, making the function's flow entirely linear.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d0081b2d47ab6..28a05ba47060e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3050,19 +3050,18 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
 {
 	struct falloc_range *range = NULL;
 
-	if (list_empty(head))
-		goto insert;
-
-	/*
-	 * As fallocate iterate by bytenr order, we only need to check
-	 * the last range.
-	 */
-	range = list_last_entry(head, struct falloc_range, list);
-	if (range->start + range->len == start) {
-		range->len += len;
-		return 0;
+	if (!list_empty(head)) {
+		/*
+		 * As fallocate iterates by bytenr order, we only need to check
+		 * the last range.
+		 */
+		range = list_last_entry(head, struct falloc_range, list);
+		if (range->start + range->len == start) {
+			range->len += len;
+			return 0;
+		}
 	}
-insert:
+
 	range = kmalloc(sizeof(*range), GFP_KERNEL);
 	if (!range)
 		return -ENOMEM;
-- 
GitLab


From bfaa324e9a8073f539e5cf2d4fe14fe55e317525 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 3 Jun 2021 10:43:11 -0700
Subject: [PATCH 3505/3804] btrfs: remove total_data_size variable in
 btrfs_batch_insert_items()

clang warns:

  fs/btrfs/delayed-inode.c:684:6: warning: variable 'total_data_size' set
  but not used [-Wunused-but-set-variable]
	  int total_data_size = 0, total_size = 0;
	      ^
  1 warning generated.

This variable's value has been unused since commit fc0d82e103c7 ("btrfs:
sink total_data parameter in setup_items_for_insert"). Eliminate it.

Link: https://github.com/ClangBuiltLinux/linux/issues/1391
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-inode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 2c18ed23aa275..257c1e18abd4f 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -681,7 +681,7 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
 {
 	struct btrfs_delayed_item *curr, *next;
 	int free_space;
-	int total_data_size = 0, total_size = 0;
+	int total_size = 0;
 	struct extent_buffer *leaf;
 	char *data_ptr;
 	struct btrfs_key *keys;
@@ -706,7 +706,6 @@ static int btrfs_batch_insert_items(struct btrfs_root *root,
 	 */
 	while (total_size + next->data_len + sizeof(struct btrfs_item) <=
 	       free_space) {
-		total_data_size += next->data_len;
 		total_size += next->data_len + sizeof(struct btrfs_item);
 		list_add_tail(&next->tree_list, &head);
 		nitems++;
-- 
GitLab


From 32cc4f8759e19661e3a349419f0bcf6dcfddd323 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 3 Jun 2021 17:20:21 +0200
Subject: [PATCH 3506/3804] btrfs: sink wait_for_unblock parameter to async
 commit

There's only one caller left btrfs_ioctl_start_sync that passes 0, so we
can remove the switch in btrfs_commit_transaction_async.

A cleanup 9babda9f33fd ("btrfs: Remove async_transid from
btrfs_mksubvol/create_subvol/create_snapshot") removed calls that passed
1, so this is a followup.

As this removes last call of wait_current_trans_commit_start_and_unblock,
remove the function as well.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c       |  2 +-
 fs/btrfs/transaction.c | 24 ++----------------------
 fs/btrfs/transaction.h |  3 +--
 3 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2bdaf20181977..f83eb4a225cce 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3643,7 +3643,7 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
 		goto out;
 	}
 	transid = trans->transid;
-	ret = btrfs_commit_transaction_async(trans, 0);
+	ret = btrfs_commit_transaction_async(trans);
 	if (ret) {
 		btrfs_end_transaction(trans);
 		return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 22951621363f0..30347e6600276 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1882,19 +1882,6 @@ static void wait_current_trans_commit_start(struct btrfs_fs_info *fs_info,
 		   TRANS_ABORTED(trans));
 }
 
-/*
- * wait for the current transaction to start and then become unblocked.
- * caller holds ref.
- */
-static void wait_current_trans_commit_start_and_unblock(
-					struct btrfs_fs_info *fs_info,
-					struct btrfs_transaction *trans)
-{
-	wait_event(fs_info->transaction_wait,
-		   trans->state >= TRANS_STATE_UNBLOCKED ||
-		   TRANS_ABORTED(trans));
-}
-
 /*
  * commit transactions asynchronously. once btrfs_commit_transaction_async
  * returns, any subsequent transaction will not be allowed to join.
@@ -1922,8 +1909,7 @@ static void do_async_commit(struct work_struct *work)
 	kfree(ac);
 }
 
-int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
-				   int wait_for_unblock)
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
 	struct btrfs_async_commit *ac;
@@ -1955,13 +1941,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 		__sb_writers_release(fs_info->sb, SB_FREEZE_FS);
 
 	schedule_work(&ac->work);
-
-	/* wait for transaction to start and unblock */
-	if (wait_for_unblock)
-		wait_current_trans_commit_start_and_unblock(fs_info, cur_trans);
-	else
-		wait_current_trans_commit_start(fs_info, cur_trans);
-
+	wait_current_trans_commit_start(fs_info, cur_trans);
 	if (current->journal_info == trans)
 		current->journal_info = NULL;
 
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index c49e2266b28ba..0702e8d9b30eb 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -226,8 +226,7 @@ void btrfs_add_dead_root(struct btrfs_root *root);
 int btrfs_defrag_root(struct btrfs_root *root);
 int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
-int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
-				   int wait_for_unblock);
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
 bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
 void btrfs_throttle(struct btrfs_fs_info *fs_info);
-- 
GitLab


From ae5d29d4e70ac53d758032df870ca9012b44c69a Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Thu, 3 Jun 2021 17:20:24 +0200
Subject: [PATCH 3507/3804] btrfs: inline wait_current_trans_commit_start in
 its caller

Function wait_current_trans_commit_start is now fairly trivial so it can
be inlined in its only caller.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 30347e6600276..73df8b81496e5 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1870,18 +1870,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
 	return ret;
 }
 
-/*
- * wait for the current transaction commit to start and block subsequent
- * transaction joins
- */
-static void wait_current_trans_commit_start(struct btrfs_fs_info *fs_info,
-					    struct btrfs_transaction *trans)
-{
-	wait_event(fs_info->transaction_blocked_wait,
-		   trans->state >= TRANS_STATE_COMMIT_START ||
-		   TRANS_ABORTED(trans));
-}
-
 /*
  * commit transactions asynchronously. once btrfs_commit_transaction_async
  * returns, any subsequent transaction will not be allowed to join.
@@ -1941,7 +1929,13 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
 		__sb_writers_release(fs_info->sb, SB_FREEZE_FS);
 
 	schedule_work(&ac->work);
-	wait_current_trans_commit_start(fs_info, cur_trans);
+	/*
+	 * Wait for the current transaction commit to start and block
+	 * subsequent transaction joins
+	 */
+	wait_event(fs_info->transaction_blocked_wait,
+		   cur_trans->state >= TRANS_STATE_COMMIT_START ||
+		   TRANS_ABORTED(cur_trans));
 	if (current->journal_info == trans)
 		current->journal_info = NULL;
 
-- 
GitLab


From d187f217335dba2b49fc9002aab2004e04acddee Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 18 Jun 2021 13:54:08 +0200
Subject: [PATCH 3508/3804] x86/sev: Make sure IRQs are disabled while GHCB is
 active

The #VC handler only cares about IRQs being disabled while the GHCB is
active, as it must not be interrupted by something which could cause
another #VC while it holds the GHCB (NMI is the exception for which the
backup GHCB exits).

Make sure nothing interrupts the code path while the GHCB is active
by making sure that callers of __sev_{get,put}_ghcb() have disabled
interrupts upfront.

 [ bp: Massage commit message. ]

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-2-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 8178db07a06a6..9f32cbb773d96 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -12,7 +12,6 @@
 #include <linux/sched/debug.h>	/* For show_regs() */
 #include <linux/percpu-defs.h>
 #include <linux/mem_encrypt.h>
-#include <linux/lockdep.h>
 #include <linux/printk.h>
 #include <linux/mm_types.h>
 #include <linux/set_memory.h>
@@ -192,11 +191,19 @@ void noinstr __sev_es_ist_exit(void)
 	this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
 }
 
-static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
+/*
+ * Nothing shall interrupt this code path while holding the per-CPU
+ * GHCB. The backup GHCB is only for NMIs interrupting this path.
+ *
+ * Callers must disable local interrupts around it.
+ */
+static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
 {
 	struct sev_es_runtime_data *data;
 	struct ghcb *ghcb;
 
+	WARN_ON(!irqs_disabled());
+
 	data = this_cpu_read(runtime_data);
 	ghcb = &data->ghcb_page;
 
@@ -213,7 +220,9 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
 			data->ghcb_active        = false;
 			data->backup_ghcb_active = false;
 
+			instrumentation_begin();
 			panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+			instrumentation_end();
 		}
 
 		/* Mark backup_ghcb active before writing to it */
@@ -486,11 +495,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
 /* Include code shared with pre-decompression boot stage */
 #include "sev-shared.c"
 
-static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+static noinstr void __sev_put_ghcb(struct ghcb_state *state)
 {
 	struct sev_es_runtime_data *data;
 	struct ghcb *ghcb;
 
+	WARN_ON(!irqs_disabled());
+
 	data = this_cpu_read(runtime_data);
 	ghcb = &data->ghcb_page;
 
@@ -514,7 +525,7 @@ void noinstr __sev_es_nmi_complete(void)
 	struct ghcb_state state;
 	struct ghcb *ghcb;
 
-	ghcb = sev_es_get_ghcb(&state);
+	ghcb = __sev_get_ghcb(&state);
 
 	vc_ghcb_invalidate(ghcb);
 	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
@@ -524,7 +535,7 @@ void noinstr __sev_es_nmi_complete(void)
 	sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
 	VMGEXIT();
 
-	sev_es_put_ghcb(&state);
+	__sev_put_ghcb(&state);
 }
 
 static u64 get_jump_table_addr(void)
@@ -536,7 +547,7 @@ static u64 get_jump_table_addr(void)
 
 	local_irq_save(flags);
 
-	ghcb = sev_es_get_ghcb(&state);
+	ghcb = __sev_get_ghcb(&state);
 
 	vc_ghcb_invalidate(ghcb);
 	ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
@@ -550,7 +561,7 @@ static u64 get_jump_table_addr(void)
 	    ghcb_sw_exit_info_2_is_valid(ghcb))
 		ret = ghcb->save.sw_exit_info_2;
 
-	sev_es_put_ghcb(&state);
+	__sev_put_ghcb(&state);
 
 	local_irq_restore(flags);
 
@@ -675,7 +686,7 @@ static void sev_es_ap_hlt_loop(void)
 	struct ghcb_state state;
 	struct ghcb *ghcb;
 
-	ghcb = sev_es_get_ghcb(&state);
+	ghcb = __sev_get_ghcb(&state);
 
 	while (true) {
 		vc_ghcb_invalidate(ghcb);
@@ -692,7 +703,7 @@ static void sev_es_ap_hlt_loop(void)
 			break;
 	}
 
-	sev_es_put_ghcb(&state);
+	__sev_put_ghcb(&state);
 }
 
 /*
@@ -1351,7 +1362,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	}
 
 	irq_state = irqentry_nmi_enter(regs);
-	lockdep_assert_irqs_disabled();
 	instrumentation_begin();
 
 	/*
@@ -1360,7 +1370,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	 * keep the IRQs disabled to protect us against concurrent TLB flushes.
 	 */
 
-	ghcb = sev_es_get_ghcb(&state);
+	ghcb = __sev_get_ghcb(&state);
 
 	vc_ghcb_invalidate(ghcb);
 	result = vc_init_em_ctxt(&ctxt, regs, error_code);
@@ -1368,7 +1378,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	if (result == ES_OK)
 		result = vc_handle_exitcode(&ctxt, ghcb, error_code);
 
-	sev_es_put_ghcb(&state);
+	__sev_put_ghcb(&state);
 
 	/* Done - now check the result */
 	switch (result) {
-- 
GitLab


From be1a5408868af341f61f93c191b5e346ee88c82a Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 18 Jun 2021 13:54:09 +0200
Subject: [PATCH 3509/3804] x86/sev: Split up runtime #VC handler for correct
 state tracking

Split up the #VC handler code into a from-user and a from-kernel part.
This allows clean and correct state tracking, as the #VC handler needs
to enter NMI-state when raised from kernel mode and plain IRQ state when
raised from user-mode.

Fixes: 62441a1fb532 ("x86/sev-es: Correctly track IRQ states in runtime #VC handler")
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210618115409.22735-3-joro@8bytes.org
---
 arch/x86/entry/entry_64.S       |   4 +-
 arch/x86/include/asm/idtentry.h |  29 +++----
 arch/x86/kernel/sev.c           | 148 +++++++++++++++++---------------
 3 files changed, 91 insertions(+), 90 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a16a5294d55f6..1886aaf199143 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym)
 
 	movq	%rsp, %rdi		/* pt_regs pointer */
 
-	call	\cfunc
+	call	kernel_\cfunc
 
 	/*
 	 * No need to switch back to the IST stack. The current stack is either
@@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym)
 
 	/* Switch to the regular task stack */
 .Lfrom_usermode_switch_stack_\@:
-	idtentry_body safe_stack_\cfunc, has_error_code=1
+	idtentry_body user_\cfunc, has_error_code=1
 
 _ASM_NOKPROBE(\asmsym)
 SYM_CODE_END(\asmsym)
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 73d45b0dfff2d..cd9f3e3049449 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs)
  */
 #define DECLARE_IDTENTRY_VC(vector, func)				\
 	DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func);			\
-	__visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code);	\
-	__visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code)
+	__visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code);	\
+	__visible noinstr void   user_##func(struct pt_regs *regs, unsigned long error_code)
 
 /**
  * DEFINE_IDTENTRY_IST - Emit code for IST entry points
@@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs)
 	DEFINE_IDTENTRY_RAW_ERRORCODE(func)
 
 /**
- * DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler
-				   which runs on a safe stack.
+ * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
+			       when raised from kernel mode
  * @func:	Function name of the entry point
  *
  * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
  */
-#define DEFINE_IDTENTRY_VC_SAFE_STACK(func)				\
-	DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func)
+#define DEFINE_IDTENTRY_VC_KERNEL(func)				\
+	DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)
 
 /**
- * DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler
-			    which runs on the VC fall-back stack
+ * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
+			     when raised from user mode
  * @func:	Function name of the entry point
  *
  * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
  */
-#define DEFINE_IDTENTRY_VC_IST(func)				\
-	DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func)
-
-/**
- * DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
- * @func:	Function name of the entry point
- *
- * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
- */
-#define DEFINE_IDTENTRY_VC(func)					\
-	DEFINE_IDTENTRY_RAW_ERRORCODE(func)
+#define DEFINE_IDTENTRY_VC_USER(func)				\
+	DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
 
 #else	/* CONFIG_X86_64 */
 
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 9f32cbb773d96..87a4b00f028e5 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -793,7 +793,7 @@ void __init sev_es_init_vc_handling(void)
 	sev_es_setup_play_dead();
 
 	/* Secondary CPUs use the runtime #VC handler */
-	initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
+	initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
 }
 
 static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
@@ -1231,14 +1231,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
 	return ES_EXCEPTION;
 }
 
-static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
-{
-	if (user_mode(regs))
-		noist_exc_debug(regs);
-	else
-		exc_debug(regs);
-}
-
 static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
 					 struct ghcb *ghcb,
 					 unsigned long exit_code)
@@ -1334,41 +1326,13 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
 	return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
 }
 
-/*
- * Main #VC exception handler. It is called when the entry code was able to
- * switch off the IST to a safe kernel stack.
- *
- * With the current implementation it is always possible to switch to a safe
- * stack because #VC exceptions only happen at known places, like intercepted
- * instructions or accesses to MMIO areas/IO ports. They can also happen with
- * code instrumentation when the hypervisor intercepts #DB, but the critical
- * paths are forbidden to be instrumented, so #DB exceptions currently also
- * only happen in safe places.
- */
-DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
+static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
 {
-	irqentry_state_t irq_state;
 	struct ghcb_state state;
 	struct es_em_ctxt ctxt;
 	enum es_result result;
 	struct ghcb *ghcb;
-
-	/*
-	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
-	 */
-	if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
-		vc_handle_trap_db(regs);
-		return;
-	}
-
-	irq_state = irqentry_nmi_enter(regs);
-	instrumentation_begin();
-
-	/*
-	 * This is invoked through an interrupt gate, so IRQs are disabled. The
-	 * code below might walk page-tables for user or kernel addresses, so
-	 * keep the IRQs disabled to protect us against concurrent TLB flushes.
-	 */
+	bool ret = true;
 
 	ghcb = __sev_get_ghcb(&state);
 
@@ -1388,15 +1352,18 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 	case ES_UNSUPPORTED:
 		pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
 				   error_code, regs->ip);
-		goto fail;
+		ret = false;
+		break;
 	case ES_VMM_ERROR:
 		pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
 				   error_code, regs->ip);
-		goto fail;
+		ret = false;
+		break;
 	case ES_DECODE_FAILED:
 		pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
 				   error_code, regs->ip);
-		goto fail;
+		ret = false;
+		break;
 	case ES_EXCEPTION:
 		vc_forward_exception(&ctxt);
 		break;
@@ -1412,24 +1379,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 		BUG();
 	}
 
-out:
-	instrumentation_end();
-	irqentry_nmi_exit(regs, irq_state);
+	return ret;
+}
 
-	return;
+static __always_inline bool vc_is_db(unsigned long error_code)
+{
+	return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
+}
 
-fail:
-	if (user_mode(regs)) {
-		/*
-		 * Do not kill the machine if user-space triggered the
-		 * exception. Send SIGBUS instead and let user-space deal with
-		 * it.
-		 */
-		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
-	} else {
-		pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
-			 result);
+/*
+ * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
+ * and will panic when an error happens.
+ */
+DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
+{
+	irqentry_state_t irq_state;
+
+	/*
+	 * With the current implementation it is always possible to switch to a
+	 * safe stack because #VC exceptions only happen at known places, like
+	 * intercepted instructions or accesses to MMIO areas/IO ports. They can
+	 * also happen with code instrumentation when the hypervisor intercepts
+	 * #DB, but the critical paths are forbidden to be instrumented, so #DB
+	 * exceptions currently also only happen in safe places.
+	 *
+	 * But keep this here in case the noinstr annotations are violated due
+	 * to bug elsewhere.
+	 */
+	if (unlikely(on_vc_fallback_stack(regs))) {
+		instrumentation_begin();
+		panic("Can't handle #VC exception from unsupported context\n");
+		instrumentation_end();
+	}
+
+	/*
+	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
+	 */
+	if (vc_is_db(error_code)) {
+		exc_debug(regs);
+		return;
+	}
+
+	irq_state = irqentry_nmi_enter(regs);
 
+	instrumentation_begin();
+
+	if (!vc_raw_handle_exception(regs, error_code)) {
 		/* Show some debug info */
 		show_regs(regs);
 
@@ -1440,23 +1435,38 @@ fail:
 		panic("Returned from Terminate-Request to Hypervisor\n");
 	}
 
-	goto out;
+	instrumentation_end();
+	irqentry_nmi_exit(regs, irq_state);
 }
 
-/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
-DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
+/*
+ * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
+ * and will kill the current task with SIGBUS when an error happens.
+ */
+DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
 {
+	/*
+	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
+	 */
+	if (vc_is_db(error_code)) {
+		noist_exc_debug(regs);
+		return;
+	}
+
+	irqentry_enter_from_user_mode(regs);
 	instrumentation_begin();
-	panic("Can't handle #VC exception from unsupported context\n");
-	instrumentation_end();
-}
 
-DEFINE_IDTENTRY_VC(exc_vmm_communication)
-{
-	if (likely(!on_vc_fallback_stack(regs)))
-		safe_stack_exc_vmm_communication(regs, error_code);
-	else
-		ist_exc_vmm_communication(regs, error_code);
+	if (!vc_raw_handle_exception(regs, error_code)) {
+		/*
+		 * Do not kill the machine if user-space triggered the
+		 * exception. Send SIGBUS instead and let user-space deal with
+		 * it.
+		 */
+		force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
+	}
+
+	instrumentation_end();
+	irqentry_exit_to_user_mode(regs);
 }
 
 bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
-- 
GitLab


From 31197d3a0f1caeb60fb01f6755e28347e4f44037 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 21 Jun 2021 16:13:55 +0200
Subject: [PATCH 3510/3804] objtool/x86: Ignore __x86_indirect_alt_* symbols

Because the __x86_indirect_alt* symbols are just that, objtool will
try and validate them as regular symbols, instead of the alternative
replacements that they are.

This goes sideways for FRAME_POINTER=y builds; which generate a fair
amount of warnings.

Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/YNCgxwLBiK9wclYJ@hirez.programming.kicks-ass.net
---
 arch/x86/lib/retpoline.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 4d32cb06ffd5b..ec9922cba30a4 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -58,12 +58,16 @@ SYM_FUNC_START_NOALIGN(__x86_indirect_alt_call_\reg)
 2:	.skip	5-(2b-1b), 0x90
 SYM_FUNC_END(__x86_indirect_alt_call_\reg)
 
+STACK_FRAME_NON_STANDARD(__x86_indirect_alt_call_\reg)
+
 SYM_FUNC_START_NOALIGN(__x86_indirect_alt_jmp_\reg)
 	ANNOTATE_RETPOLINE_SAFE
 1:	jmp	*%\reg
 2:	.skip	5-(2b-1b), 0x90
 SYM_FUNC_END(__x86_indirect_alt_jmp_\reg)
 
+STACK_FRAME_NON_STANDARD(__x86_indirect_alt_jmp_\reg)
+
 .endm
 
 /*
-- 
GitLab


From 1815d9c86e3090477fbde066ff314a7e9721ee0f Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Sun, 20 Jun 2021 19:03:26 +0800
Subject: [PATCH 3511/3804] drm: add a locked version of drm_is_current_master

While checking the master status of the DRM file in
drm_is_current_master(), the device's master mutex should be
held. Without the mutex, the pointer fpriv->master may be freed
concurrently by another process calling drm_setmaster_ioctl(). This
could lead to use-after-free errors when the pointer is subsequently
dereferenced in drm_lease_owner().

The callers of drm_is_current_master() from drm_auth.c hold the
device's master mutex, but external callers do not. Hence, we implement
drm_is_current_master_locked() to be used within drm_auth.c, and
modify drm_is_current_master() to grab the device's master mutex
before checking the master status.

Reported-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210620110327.4964-2-desmondcheongzx@gmail.com
---
 drivers/gpu/drm/drm_auth.c | 51 ++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index 232abbba36868..86d4b72e95cbd 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -61,6 +61,35 @@
  * trusted clients.
  */
 
+static bool drm_is_current_master_locked(struct drm_file *fpriv)
+{
+	lockdep_assert_held_once(&fpriv->master->dev->master_mutex);
+
+	return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master;
+}
+
+/**
+ * drm_is_current_master - checks whether @priv is the current master
+ * @fpriv: DRM file private
+ *
+ * Checks whether @fpriv is current master on its device. This decides whether a
+ * client is allowed to run DRM_MASTER IOCTLs.
+ *
+ * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting
+ * - the current master is assumed to own the non-shareable display hardware.
+ */
+bool drm_is_current_master(struct drm_file *fpriv)
+{
+	bool ret;
+
+	mutex_lock(&fpriv->master->dev->master_mutex);
+	ret = drm_is_current_master_locked(fpriv);
+	mutex_unlock(&fpriv->master->dev->master_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_is_current_master);
+
 int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_auth *auth = data;
@@ -223,7 +252,7 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out_unlock;
 
-	if (drm_is_current_master(file_priv))
+	if (drm_is_current_master_locked(file_priv))
 		goto out_unlock;
 
 	if (dev->master) {
@@ -272,7 +301,7 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out_unlock;
 
-	if (!drm_is_current_master(file_priv)) {
+	if (!drm_is_current_master_locked(file_priv)) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}
@@ -321,7 +350,7 @@ void drm_master_release(struct drm_file *file_priv)
 	if (file_priv->magic)
 		idr_remove(&file_priv->master->magic_map, file_priv->magic);
 
-	if (!drm_is_current_master(file_priv))
+	if (!drm_is_current_master_locked(file_priv))
 		goto out;
 
 	drm_legacy_lock_master_cleanup(dev, master);
@@ -342,22 +371,6 @@ out:
 	mutex_unlock(&dev->master_mutex);
 }
 
-/**
- * drm_is_current_master - checks whether @priv is the current master
- * @fpriv: DRM file private
- *
- * Checks whether @fpriv is current master on its device. This decides whether a
- * client is allowed to run DRM_MASTER IOCTLs.
- *
- * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting
- * - the current master is assumed to own the non-shareable display hardware.
- */
-bool drm_is_current_master(struct drm_file *fpriv)
-{
-	return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master;
-}
-EXPORT_SYMBOL(drm_is_current_master);
-
 /**
  * drm_master_get - reference a master pointer
  * @master: &struct drm_master
-- 
GitLab


From 5140bc7d6bc8abad58b4f2a2c011607bfd922992 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 18 Jun 2021 16:59:41 -0700
Subject: [PATCH 3512/3804] KVM: VMX: Skip #PF(RSVD) intercepts when emulating
 smaller maxphyaddr

As part of smaller maxphyaddr emulation, kvm needs to intercept
present page faults to see if it needs to add the RSVD flag (bit 3) to
the error code. However, there is no need to intercept page faults
that already have the RSVD flag set. When setting up the page fault
intercept, add the RSVD flag into the #PF error code mask field (but
not the #PF error code match field) to skip the intercept when the
RSVD flag is already set.

Signed-off-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210618235941.1041604-1-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 889e83f712352..ab6f682645d71 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -747,16 +747,21 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
 	if (is_guest_mode(vcpu))
 		eb |= get_vmcs12(vcpu)->exception_bitmap;
         else {
-		/*
-		 * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
-		 * between guest and host.  In that case we only care about present
-		 * faults.  For vmcs02, however, PFEC_MASK and PFEC_MATCH are set in
-		 * prepare_vmcs02_rare.
-		 */
-		bool selective_pf_trap = enable_ept && (eb & (1u << PF_VECTOR));
-		int mask = selective_pf_trap ? PFERR_PRESENT_MASK : 0;
+		int mask = 0, match = 0;
+
+		if (enable_ept && (eb & (1u << PF_VECTOR))) {
+			/*
+			 * If EPT is enabled, #PF is currently only intercepted
+			 * if MAXPHYADDR is smaller on the guest than on the
+			 * host.  In that case we only care about present,
+			 * non-reserved faults.  For vmcs02, however, PFEC_MASK
+			 * and PFEC_MATCH are set in prepare_vmcs02_rare.
+			 */
+			mask = PFERR_PRESENT_MASK | PFERR_RSVD_MASK;
+			match = PFERR_PRESENT_MASK;
+		}
 		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask);
-		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask);
+		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, match);
 	}
 
 	vmcs_write32(EXCEPTION_BITMAP, eb);
-- 
GitLab


From ba1f82456ba8438a8abc96274d57bfe76d34a4a8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 18 Jun 2021 14:46:58 -0700
Subject: [PATCH 3513/3804] KVM: nVMX: Dynamically compute max VMCS index for
 vmcs12

Calculate the max VMCS index for vmcs12 by walking the array to find the
actual max index.  Hardcoding the index is prone to bitrot, and the
calculation is only done on KVM bringup (albeit on every CPU, but there
aren't _that_ many null entries in the array).

Fixes: 3c0f99366e34 ("KVM: nVMX: Add a TSC multiplier field in VMCS12")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210618214658.2700765-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 37 +++++++++++++++++++++++++++++++++++--
 arch/x86/kvm/vmx/vmcs.h   |  8 ++++++++
 arch/x86/kvm/vmx/vmcs12.h |  6 ------
 3 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b531e08a095bb..183fd9d62fc52 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -6374,6 +6374,40 @@ void nested_vmx_set_vmcs_shadowing_bitmap(void)
 	}
 }
 
+/*
+ * Indexing into the vmcs12 uses the VMCS encoding rotated left by 6.  Undo
+ * that madness to get the encoding for comparison.
+ */
+#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10)))
+
+static u64 nested_vmx_calc_vmcs_enum_msr(void)
+{
+	/*
+	 * Note these are the so called "index" of the VMCS field encoding, not
+	 * the index into vmcs12.
+	 */
+	unsigned int max_idx, idx;
+	int i;
+
+	/*
+	 * For better or worse, KVM allows VMREAD/VMWRITE to all fields in
+	 * vmcs12, regardless of whether or not the associated feature is
+	 * exposed to L1.  Simply find the field with the highest index.
+	 */
+	max_idx = 0;
+	for (i = 0; i < nr_vmcs12_fields; i++) {
+		/* The vmcs12 table is very, very sparsely populated. */
+		if (!vmcs_field_to_offset_table[i])
+			continue;
+
+		idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
+		if (idx > max_idx)
+			max_idx = idx;
+	}
+
+	return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT;
+}
+
 /*
  * nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
  * returned for the various VMX controls MSRs when nested VMX is enabled.
@@ -6619,8 +6653,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
 	rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
 	rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
 
-	/* highest index: VMX_PREEMPTION_TIMER_VALUE */
-	msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
+	msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
 }
 
 void nested_vmx_hardware_unsetup(void)
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index 1472c6c376f74..de3b04d4b587a 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -164,4 +164,12 @@ static inline int vmcs_field_readonly(unsigned long field)
 	return (((field >> 10) & 0x3) == 1);
 }
 
+#define VMCS_FIELD_INDEX_SHIFT		(1)
+#define VMCS_FIELD_INDEX_MASK		GENMASK(9, 1)
+
+static inline unsigned int vmcs_field_index(unsigned long field)
+{
+	return (field & VMCS_FIELD_INDEX_MASK) >> VMCS_FIELD_INDEX_SHIFT;
+}
+
 #endif /* __KVM_X86_VMX_VMCS_H */
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index bb81a23afe898..5e0e1b39f4950 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -205,12 +205,6 @@ struct __packed vmcs12 {
  */
 #define VMCS12_SIZE		KVM_STATE_NESTED_VMX_VMCS_SIZE
 
-/*
- * VMCS12_MAX_FIELD_INDEX is the highest index value used in any
- * supported VMCS12 field encoding.
- */
-#define VMCS12_MAX_FIELD_INDEX 0x17
-
 /*
  * For save/restore compatibility, the vmcs12 field offsets must not change.
  */
-- 
GitLab


From 2062d44da3499eed3c7d005df8f0b54d300ac0b5 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 18 Jun 2021 10:17:02 +0530
Subject: [PATCH 3514/3804] arm64/mm: Rename ARM64_SWAPPER_USES_SECTION_MAPS

ARM64_SWAPPER_USES_SECTION_MAPS implies that a PMD level huge page mappings
are used for swapper, idmap and vmemmap. Lets make it PMD explicit removing
any possible confusion with generic memory sections and also bit generic as
it's applicable for idmap and vmemmap mappings as well. Hence rename it as
ARM64_KERNEL_USES_PMD_MAPS instead.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/1623991622-24294-1-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/kernel-pgtable.h | 10 +++++-----
 arch/arm64/mm/mmu.c                     |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 1260187adb31f..3512184cfec17 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -18,9 +18,9 @@
  * 64K (section size = 512M).
  */
 #ifdef CONFIG_ARM64_4K_PAGES
-#define ARM64_SWAPPER_USES_SECTION_MAPS 1
+#define ARM64_KERNEL_USES_PMD_MAPS 1
 #else
-#define ARM64_SWAPPER_USES_SECTION_MAPS 0
+#define ARM64_KERNEL_USES_PMD_MAPS 0
 #endif
 
 /*
@@ -33,7 +33,7 @@
  * VA range, so pages required to map highest possible PA are reserved in all
  * cases.
  */
-#if ARM64_SWAPPER_USES_SECTION_MAPS
+#if ARM64_KERNEL_USES_PMD_MAPS
 #define SWAPPER_PGTABLE_LEVELS	(CONFIG_PGTABLE_LEVELS - 1)
 #define IDMAP_PGTABLE_LEVELS	(ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
 #else
@@ -90,7 +90,7 @@
 #define IDMAP_DIR_SIZE		(IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
 
 /* Initial memory map size */
-#if ARM64_SWAPPER_USES_SECTION_MAPS
+#if ARM64_KERNEL_USES_PMD_MAPS
 #define SWAPPER_BLOCK_SHIFT	PMD_SHIFT
 #define SWAPPER_BLOCK_SIZE	PMD_SIZE
 #define SWAPPER_TABLE_SHIFT	PUD_SHIFT
@@ -106,7 +106,7 @@
 #define SWAPPER_PTE_FLAGS	(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define SWAPPER_PMD_FLAGS	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#if ARM64_SWAPPER_USES_SECTION_MAPS
+#if ARM64_KERNEL_USES_PMD_MAPS
 #define SWAPPER_MM_MMUFLAGS	(PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
 #else
 #define SWAPPER_MM_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 5b75f7eefb726..e04e4b6bdf16b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1113,14 +1113,14 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
 }
 #endif
 
-#if !ARM64_SWAPPER_USES_SECTION_MAPS
+#if !ARM64_KERNEL_USES_PMD_MAPS
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
 	WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
 	return vmemmap_populate_basepages(start, end, node, altmap);
 }
-#else	/* !ARM64_SWAPPER_USES_SECTION_MAPS */
+#else	/* !ARM64_KERNEL_USES_PMD_MAPS */
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
@@ -1165,7 +1165,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 
 	return 0;
 }
-#endif	/* !ARM64_SWAPPER_USES_SECTION_MAPS */
+#endif	/* !ARM64_KERNEL_USES_PMD_MAPS */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 void vmemmap_free(unsigned long start, unsigned long end,
-- 
GitLab


From 3093e6cca3ba7d47848068cb256c489675125181 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Thu, 17 Jun 2021 15:54:13 +0200
Subject: [PATCH 3515/3804] gpio: mxc: Fix disabled interrupt wake-up support

A disabled/masked interrupt marked as wakeup source must be re-enable
and unmasked in order to be able to wake-up the host. That can be done
by flaging the irqchip with IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND.

Note: It 'sometimes' works without that change, but only thanks to the
lazy generic interrupt disabling (keeping interrupt unmasked).

Reported-by: Michal Koziel <michal.koziel@emlogic.no>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-mxc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c
index 157106e1e4381..b9fdf05d76694 100644
--- a/drivers/gpio/gpio-mxc.c
+++ b/drivers/gpio/gpio-mxc.c
@@ -334,7 +334,7 @@ static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base)
 	ct->chip.irq_unmask = irq_gc_mask_set_bit;
 	ct->chip.irq_set_type = gpio_set_irq_type;
 	ct->chip.irq_set_wake = gpio_set_wake_irq;
-	ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND;
+	ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND;
 	ct->regs.ack = GPIO_ISR;
 	ct->regs.mask = GPIO_IMR;
 
-- 
GitLab


From 66e9c6a86b800f60b1e1ea1ff7271f9e6ed1fa96 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Jun 2021 14:13:41 +0100
Subject: [PATCH 3516/3804] afs: Fix afs_write_end() to handle short writes

Fix afs_write_end() to correctly handle a short copy into the intended
write region of the page.  Two things are necessary:

 (1) If the page is not up to date, then we should just return 0
     (ie. indicating a zero-length copy).  The loop in
     generic_perform_write() will go around again, possibly breaking up the
     iterator into discrete chunks[1].

     This is analogous to commit b9de313cf05fe08fa59efaf19756ec5283af672a
     for ceph.

 (2) The page should not have been set uptodate if it wasn't completely set
     up by netfs_write_begin() (this will be fixed in the next patch), so
     we need to set uptodate here in such a case.

Also remove the assertion that was checking that the page was set uptodate
since it's now set uptodate if it wasn't already a few lines above.  The
assertion was from when uptodate was set elsewhere.

Changes:
v3: Remove the handling of len exceeding the end of the page.

Fixes: 3003bbd0697b ("afs: Use the netfs_write_begin() helper")
Reported-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: linux-afs@lists.infradead.org
Link: https://lore.kernel.org/r/YMwVp268KTzTf8cN@zeniv-ca.linux.org.uk/ [1]
Link: https://lore.kernel.org/r/162367682522.460125.5652091227576721609.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/162391825688.1173366.3437507255136307904.stgit@warthog.procyon.org.uk/ # v2
---
 fs/afs/write.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index a523bb86915d0..641c54679399f 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -118,6 +118,15 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 	_enter("{%llx:%llu},{%lx}",
 	       vnode->fid.vid, vnode->fid.vnode, page->index);
 
+	if (!PageUptodate(page)) {
+		if (copied < len) {
+			copied = 0;
+			goto out;
+		}
+
+		SetPageUptodate(page);
+	}
+
 	if (copied == 0)
 		goto out;
 
@@ -132,8 +141,6 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		write_sequnlock(&vnode->cb_lock);
 	}
 
-	ASSERT(PageUptodate(page));
-
 	if (PagePrivate(page)) {
 		priv = page_private(page);
 		f = afs_page_dirty_from(page, priv);
-- 
GitLab


From 827a746f405d25f79560c7868474aec5aee174e1 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Sun, 13 Jun 2021 19:33:45 -0400
Subject: [PATCH 3517/3804] netfs: fix test for whether we can skip read when
 writing beyond EOF

It's not sufficient to skip reading when the pos is beyond the EOF.
There may be data at the head of the page that we need to fill in
before the write.

Add a new helper function that corrects and clarifies the logic of
when we can skip reads, and have it only zero out the part of the page
that won't have data copied in for the write.

Finally, don't set the page Uptodate after zeroing. It's not up to date
since the write data won't have been copied in yet.

[DH made the following changes:

 - Prefixed the new function with "netfs_".

 - Don't call zero_user_segments() for a full-page write.

 - Altered the beyond-last-page check to avoid a DIV instruction and got
   rid of then-redundant zero-length file check.
]

Fixes: e1b1240c1ff5f ("netfs: Add write_begin helper")
Reported-by: Andrew W Elble <aweits@rit.edu>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: ceph-devel@vger.kernel.org
Link: https://lore.kernel.org/r/20210613233345.113565-1-jlayton@kernel.org/
Link: https://lore.kernel.org/r/162367683365.460125.4467036947364047314.stgit@warthog.procyon.org.uk/ # v1
Link: https://lore.kernel.org/r/162391826758.1173366.11794946719301590013.stgit@warthog.procyon.org.uk/ # v2
---
 fs/netfs/read_helper.c | 49 +++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 725614625ed48..0b6cd3b8734c6 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -1011,12 +1011,42 @@ out:
 }
 EXPORT_SYMBOL(netfs_readpage);
 
-static void netfs_clear_thp(struct page *page)
+/**
+ * netfs_skip_page_read - prep a page for writing without reading first
+ * @page: page being prepared
+ * @pos: starting position for the write
+ * @len: length of write
+ *
+ * In some cases, write_begin doesn't need to read at all:
+ * - full page write
+ * - write that lies in a page that is completely beyond EOF
+ * - write that covers the the page from start to EOF or beyond it
+ *
+ * If any of these criteria are met, then zero out the unwritten parts
+ * of the page and return true. Otherwise, return false.
+ */
+static bool netfs_skip_page_read(struct page *page, loff_t pos, size_t len)
 {
-	unsigned int i;
+	struct inode *inode = page->mapping->host;
+	loff_t i_size = i_size_read(inode);
+	size_t offset = offset_in_thp(page, pos);
+
+	/* Full page write */
+	if (offset == 0 && len >= thp_size(page))
+		return true;
+
+	/* pos beyond last page in the file */
+	if (pos - offset >= i_size)
+		goto zero_out;
+
+	/* Write that covers from the start of the page to EOF or beyond */
+	if (offset == 0 && (pos + len) >= i_size)
+		goto zero_out;
 
-	for (i = 0; i < thp_nr_pages(page); i++)
-		clear_highpage(page + i);
+	return false;
+zero_out:
+	zero_user_segments(page, 0, offset, offset + len, thp_size(page));
+	return true;
 }
 
 /**
@@ -1024,7 +1054,7 @@ static void netfs_clear_thp(struct page *page)
  * @file: The file to read from
  * @mapping: The mapping to read from
  * @pos: File position at which the write will begin
- * @len: The length of the write in this page
+ * @len: The length of the write (may extend beyond the end of the page chosen)
  * @flags: AOP_* flags
  * @_page: Where to put the resultant page
  * @_fsdata: Place for the netfs to store a cookie
@@ -1061,8 +1091,6 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
 	struct inode *inode = file_inode(file);
 	unsigned int debug_index = 0;
 	pgoff_t index = pos >> PAGE_SHIFT;
-	int pos_in_page = pos & ~PAGE_MASK;
-	loff_t size;
 	int ret;
 
 	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
@@ -1090,13 +1118,8 @@ retry:
 	 * within the cache granule containing the EOF, in which case we need
 	 * to preload the granule.
 	 */
-	size = i_size_read(inode);
 	if (!ops->is_cache_enabled(inode) &&
-	    ((pos_in_page == 0 && len == thp_size(page)) ||
-	     (pos >= size) ||
-	     (pos_in_page == 0 && (pos + len) >= size))) {
-		netfs_clear_thp(page);
-		SetPageUptodate(page);
+	    netfs_skip_page_read(page, pos, len)) {
 		netfs_stat(&netfs_n_rh_write_zskip);
 		goto have_page_no_wait;
 	}
-- 
GitLab


From 4c6a23188e26339fd3dbc78e6ce6fe0fc4009553 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <mdaenzer@redhat.com>
Date: Wed, 16 Jun 2021 12:46:51 +0200
Subject: [PATCH 3518/3804] drm/amdgpu: Call drm_framebuffer_init last for
 framebuffer init
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Once drm_framebuffer_init has returned 0, the framebuffer is hooked up
to the reference counting machinery and can no longer be destroyed with
a simple kfree. Therefore, it must be called last.

If drm_framebuffer_init returns 0 but its caller then returns non-0,
there will likely be memory corruption fireworks down the road.
The following lead me to this fix:

[   12.891228] kernel BUG at lib/list_debug.c:25!
[...]
[   12.891263] RIP: 0010:__list_add_valid+0x4b/0x70
[...]
[   12.891324] Call Trace:
[   12.891330]  drm_framebuffer_init+0xb5/0x100 [drm]
[   12.891378]  amdgpu_display_gem_fb_verify_and_init+0x47/0x120 [amdgpu]
[   12.891592]  ? amdgpu_display_user_framebuffer_create+0x10d/0x1f0 [amdgpu]
[   12.891794]  amdgpu_display_user_framebuffer_create+0x126/0x1f0 [amdgpu]
[   12.891995]  drm_internal_framebuffer_create+0x378/0x3f0 [drm]
[   12.892036]  ? drm_internal_framebuffer_create+0x3f0/0x3f0 [drm]
[   12.892075]  drm_mode_addfb2+0x34/0xd0 [drm]
[   12.892115]  ? drm_internal_framebuffer_create+0x3f0/0x3f0 [drm]
[   12.892153]  drm_ioctl_kernel+0xe2/0x150 [drm]
[   12.892193]  drm_ioctl+0x3da/0x460 [drm]
[   12.892232]  ? drm_internal_framebuffer_create+0x3f0/0x3f0 [drm]
[   12.892274]  amdgpu_drm_ioctl+0x43/0x80 [amdgpu]
[   12.892475]  __se_sys_ioctl+0x72/0xc0
[   12.892483]  do_syscall_64+0x33/0x40
[   12.892491]  entry_SYSCALL_64_after_hwframe+0x44/0xae

Fixes: f258907fdd835e "drm/amdgpu: Verify bo size can fit framebuffer size on init."
Signed-off-by: Michel Dänzer <mdaenzer@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index c13985fb35bed..2a4cd7d377bfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1047,11 +1047,12 @@ int amdgpu_display_gem_fb_init(struct drm_device *dev,
 
 	rfb->base.obj[0] = obj;
 	drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
-	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+
+	ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
 	if (ret)
 		goto err;
 
-	ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
+	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
 	if (ret)
 		goto err;
 
@@ -1071,9 +1072,6 @@ int amdgpu_display_gem_fb_verify_and_init(
 
 	rfb->base.obj[0] = obj;
 	drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
-	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
-	if (ret)
-		goto err;
 	/* Verify that the modifier is supported. */
 	if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format,
 				      mode_cmd->modifier[0])) {
@@ -1092,6 +1090,10 @@ int amdgpu_display_gem_fb_verify_and_init(
 	if (ret)
 		goto err;
 
+	ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+	if (ret)
+		goto err;
+
 	return 0;
 err:
 	drm_dbg_kms(dev, "Failed to verify and init gem fb: %d\n", ret);
-- 
GitLab


From baacf52a473b24e10322b67757ddb92ab8d86717 Mon Sep 17 00:00:00 2001
From: Yifan Zhang <yifan1.zhang@amd.com>
Date: Sat, 19 Jun 2021 11:39:43 +0800
Subject: [PATCH 3519/3804] Revert "drm/amdgpu/gfx10: enlarge
 CP_MEC_DOORBELL_RANGE_UPPER to cover full doorbell."

This reverts commit 1c0b0efd148d5b24c4932ddb3fa03c8edd6097b3.

Reason for revert: Side effect of enlarging CP_MEC_DOORBELL_RANGE may
cause some APUs fail to enter gfxoff in certain user cases.

Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 327b1f8213a8b..0597aeb5f0e89 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -6871,12 +6871,8 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring)
 	if (ring->use_doorbell) {
 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
 			(adev->doorbell_index.kiq * 2) << 2);
-		/* If GC has entered CGPG, ringing doorbell > first page doesn't
-		 * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
-		 * this issue.
-		 */
 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
-			(adev->doorbell.size - 4));
+			(adev->doorbell_index.userqueue_end * 2) << 2);
 	}
 
 	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
-- 
GitLab


From ee5468b9f1d3bf48082eed351dace14598e8ca39 Mon Sep 17 00:00:00 2001
From: Yifan Zhang <yifan1.zhang@amd.com>
Date: Sat, 19 Jun 2021 11:40:54 +0800
Subject: [PATCH 3520/3804] Revert "drm/amdgpu/gfx9: fix the doorbell missing
 when in CGPG issue."

This reverts commit 4cbbe34807938e6e494e535a68d5ff64edac3f20.

Reason for revert: side effect of enlarging CP_MEC_DOORBELL_RANGE may
cause some APUs fail to enter gfxoff in certain user cases.

Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c09225d065c27..516467e962b72 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3673,12 +3673,8 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 	if (ring->use_doorbell) {
 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
 					(adev->doorbell_index.kiq * 2) << 2);
-		/* If GC has entered CGPG, ringing doorbell > first page doesn't
-		 * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround
-		 * this issue.
-		 */
 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
-					(adev->doorbell.size - 4));
+					(adev->doorbell_index.userqueue_end * 2) << 2);
 	}
 
 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
-- 
GitLab


From 2f9ace5d4557f8ceea07969d6214c320f5e50c0c Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Mon, 31 May 2021 12:33:40 +0200
Subject: [PATCH 3521/3804] KVM: arm64: selftests: get-reg-list: Introduce vcpu
 configs

We already break register lists into sublists that get selected based
on vcpu config. However, since we only had two configs (vregs and sve),
we didn't structure the code very well to manage them. Restructure it
now to more cleanly handle register sublists that are dependent on the
vcpu config.

This patch has no intended functional change (except for the vcpu
config name now being prepended to all output).

Signed-off-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210531103344.29325-2-drjones@redhat.com
---
 .../selftests/kvm/aarch64/get-reg-list.c      | 265 ++++++++++++------
 1 file changed, 175 insertions(+), 90 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 486932164cf21..7bb09ce20ddef 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -37,7 +37,30 @@
 #define reg_list_sve() (false)
 #endif
 
-#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
+
+struct reg_sublist {
+	const char *name;
+	long capability;
+	int feature;
+	bool finalize;
+	__u64 *regs;
+	__u64 regs_n;
+	__u64 *rejects_set;
+	__u64 rejects_set_n;
+};
+
+struct vcpu_config {
+	char *name;
+	struct reg_sublist sublists[];
+};
+
+static struct vcpu_config vregs_config;
+static struct vcpu_config sve_config;
+
+#define for_each_sublist(c, s)							\
+	for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
 
 #define for_each_reg(i)								\
 	for ((i) = 0; (i) < reg_list->n; ++(i))
@@ -54,12 +77,41 @@
 	for_each_reg_filtered(i)						\
 		if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
 
+static const char *config_name(struct vcpu_config *c)
+{
+	struct reg_sublist *s;
+	int len = 0;
 
-static struct kvm_reg_list *reg_list;
+	if (c->name)
+		return c->name;
 
-static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
-static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
-static __u64 *blessed_reg, blessed_n;
+	for_each_sublist(c, s)
+		len += strlen(s->name) + 1;
+
+	c->name = malloc(len);
+
+	len = 0;
+	for_each_sublist(c, s) {
+		if (!strcmp(s->name, "base"))
+			continue;
+		strcat(c->name + len, s->name);
+		len += strlen(s->name) + 1;
+		c->name[len - 1] = '+';
+	}
+	c->name[len - 1] = '\0';
+
+	return c->name;
+}
+
+static bool has_cap(struct vcpu_config *c, long capability)
+{
+	struct reg_sublist *s;
+
+	for_each_sublist(c, s)
+		if (s->capability == capability)
+			return true;
+	return false;
+}
 
 static bool filter_reg(__u64 reg)
 {
@@ -96,11 +148,13 @@ static const char *str_with_index(const char *template, __u64 index)
 	return (const char *)str;
 }
 
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
 #define CORE_REGS_XX_NR_WORDS	2
 #define CORE_SPSR_XX_NR_WORDS	2
 #define CORE_FPREGS_XX_NR_WORDS	4
 
-static const char *core_id_to_str(__u64 id)
+static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
 {
 	__u64 core_off = id & ~REG_MASK, idx;
 
@@ -111,7 +165,7 @@ static const char *core_id_to_str(__u64 id)
 	case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
 	     KVM_REG_ARM_CORE_REG(regs.regs[30]):
 		idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
-		TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
+		TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx);
 		return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
 	case KVM_REG_ARM_CORE_REG(regs.sp):
 		return "KVM_REG_ARM_CORE_REG(regs.sp)";
@@ -126,12 +180,12 @@ static const char *core_id_to_str(__u64 id)
 	case KVM_REG_ARM_CORE_REG(spsr[0]) ...
 	     KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
 		idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
-		TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
+		TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx);
 		return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
 	case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
 	     KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
 		idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
-		TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
+		TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx);
 		return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
 	case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
 		return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
@@ -139,11 +193,11 @@ static const char *core_id_to_str(__u64 id)
 		return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
 	}
 
-	TEST_FAIL("Unknown core reg id: 0x%llx", id);
+	TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id);
 	return NULL;
 }
 
-static const char *sve_id_to_str(__u64 id)
+static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
 {
 	__u64 sve_off, n, i;
 
@@ -153,37 +207,37 @@ static const char *sve_id_to_str(__u64 id)
 	sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
 	i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
 
-	TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+	TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id);
 
 	switch (sve_off) {
 	case KVM_REG_ARM64_SVE_ZREG_BASE ...
 	     KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
 		n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
 		TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
-			    "Unexpected bits set in SVE ZREG id: 0x%llx", id);
+			    "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id);
 		return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
 	case KVM_REG_ARM64_SVE_PREG_BASE ...
 	     KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
 		n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
 		TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
-			    "Unexpected bits set in SVE PREG id: 0x%llx", id);
+			    "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id);
 		return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
 	case KVM_REG_ARM64_SVE_FFR_BASE:
 		TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
-			    "Unexpected bits set in SVE FFR id: 0x%llx", id);
+			    "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id);
 		return "KVM_REG_ARM64_SVE_FFR(0)";
 	}
 
 	return NULL;
 }
 
-static void print_reg(__u64 id)
+static void print_reg(struct vcpu_config *c, __u64 id)
 {
 	unsigned op0, op1, crn, crm, op2;
 	const char *reg_size = NULL;
 
 	TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
-		    "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+		    "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id);
 
 	switch (id & KVM_REG_SIZE_MASK) {
 	case KVM_REG_SIZE_U8:
@@ -214,17 +268,17 @@ static void print_reg(__u64 id)
 		reg_size = "KVM_REG_SIZE_U2048";
 		break;
 	default:
-		TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
-			  (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+		TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+			  config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
 	}
 
 	switch (id & KVM_REG_ARM_COPROC_MASK) {
 	case KVM_REG_ARM_CORE:
-		printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+		printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id));
 		break;
 	case KVM_REG_ARM_DEMUX:
 		TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
-			    "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+			    "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id);
 		printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
 		       reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
 		break;
@@ -235,23 +289,23 @@ static void print_reg(__u64 id)
 		crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
 		op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
 		TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
-			    "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+			    "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id);
 		printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
 		break;
 	case KVM_REG_ARM_FW:
 		TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
-			    "Unexpected bits set in FW reg id: 0x%llx", id);
+			    "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id);
 		printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
 		break;
 	case KVM_REG_ARM64_SVE:
-		if (reg_list_sve())
-			printf("\t%s,\n", sve_id_to_str(id));
+		if (has_cap(c, KVM_CAP_ARM_SVE))
+			printf("\t%s,\n", sve_id_to_str(c, id));
 		else
-			TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+			TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id);
 		break;
 	default:
-		TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
-			  (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+		TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+			  config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
 	}
 }
 
@@ -312,40 +366,51 @@ static void core_reg_fixup(void)
 	reg_list = tmp;
 }
 
-static void prepare_vcpu_init(struct kvm_vcpu_init *init)
+static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init)
 {
-	if (reg_list_sve())
-		init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
+	struct reg_sublist *s;
+
+	for_each_sublist(c, s)
+		if (s->capability)
+			init->features[s->feature / 32] |= 1 << (s->feature % 32);
 }
 
-static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config *c)
 {
+	struct reg_sublist *s;
 	int feature;
 
-	if (reg_list_sve()) {
-		feature = KVM_ARM_VCPU_SVE;
-		vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+	for_each_sublist(c, s) {
+		if (s->finalize) {
+			feature = s->feature;
+			vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+		}
 	}
 }
 
-static void check_supported(void)
+static void check_supported(struct vcpu_config *c)
 {
-	if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
-		fprintf(stderr, "SVE not available, skipping tests\n");
-		exit(KSFT_SKIP);
+	struct reg_sublist *s;
+
+	for_each_sublist(c, s) {
+		if (s->capability && !kvm_check_cap(s->capability)) {
+			fprintf(stderr, "%s: %s not available, skipping tests\n", config_name(c), s->name);
+			exit(KSFT_SKIP);
+		}
 	}
 }
 
 int main(int ac, char **av)
 {
+	struct vcpu_config *c = reg_list_sve() ? &sve_config : &vregs_config;
 	struct kvm_vcpu_init init = { .target = -1, };
-	int new_regs = 0, missing_regs = 0, i;
+	int new_regs = 0, missing_regs = 0, i, n;
 	int failed_get = 0, failed_set = 0, failed_reject = 0;
 	bool print_list = false, print_filtered = false, fixup_core_regs = false;
 	struct kvm_vm *vm;
-	__u64 *vec_regs;
+	struct reg_sublist *s;
 
-	check_supported();
+	check_supported(c);
 
 	for (i = 1; i < ac; ++i) {
 		if (strcmp(av[i], "--core-reg-fixup") == 0)
@@ -359,9 +424,9 @@ int main(int ac, char **av)
 	}
 
 	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
-	prepare_vcpu_init(&init);
+	prepare_vcpu_init(c, &init);
 	aarch64_vcpu_add_default(vm, 0, &init, NULL);
-	finalize_vcpu(vm, 0);
+	finalize_vcpu(vm, 0, c);
 
 	reg_list = vcpu_get_reg_list(vm, 0);
 
@@ -374,7 +439,7 @@ int main(int ac, char **av)
 			__u64 id = reg_list->reg[i];
 			if ((print_list && !filter_reg(id)) ||
 			    (print_filtered && filter_reg(id)))
-				print_reg(id);
+				print_reg(c, id);
 		}
 		putchar('\n');
 		return 0;
@@ -396,50 +461,52 @@ int main(int ac, char **av)
 			.id = reg_list->reg[i],
 			.addr = (__u64)&addr,
 		};
+		bool reject_reg = false;
 		int ret;
 
 		ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
 		if (ret) {
-			puts("Failed to get ");
-			print_reg(reg.id);
+			printf("%s: Failed to get ", config_name(c));
+			print_reg(c, reg.id);
 			putchar('\n');
 			++failed_get;
 		}
 
 		/* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
-		if (find_reg(rejects_set, rejects_set_n, reg.id)) {
-			ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
-			if (ret != -1 || errno != EPERM) {
-				printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
-				print_reg(reg.id);
-				putchar('\n');
-				++failed_reject;
+		for_each_sublist(c, s) {
+			if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+				reject_reg = true;
+				ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+				if (ret != -1 || errno != EPERM) {
+					printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+					print_reg(c, reg.id);
+					putchar('\n');
+					++failed_reject;
+				}
+				break;
 			}
-			continue;
 		}
 
-		ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
-		if (ret) {
-			puts("Failed to set ");
-			print_reg(reg.id);
-			putchar('\n');
-			++failed_set;
+		if (!reject_reg) {
+			ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+			if (ret) {
+				printf("%s: Failed to set ", config_name(c));
+				print_reg(c, reg.id);
+				putchar('\n');
+				++failed_set;
+			}
 		}
 	}
 
-	if (reg_list_sve()) {
-		blessed_n = base_regs_n + sve_regs_n;
-		vec_regs = sve_regs;
-	} else {
-		blessed_n = base_regs_n + vregs_n;
-		vec_regs = vregs;
-	}
-
+	for_each_sublist(c, s)
+		blessed_n += s->regs_n;
 	blessed_reg = calloc(blessed_n, sizeof(__u64));
-	for (i = 0; i < base_regs_n; ++i)
-		blessed_reg[i] = base_regs[i];
-	for (i = 0; i < blessed_n - base_regs_n; ++i)
-		blessed_reg[base_regs_n + i] = vec_regs[i];
+
+	n = 0;
+	for_each_sublist(c, s) {
+		for (i = 0; i < s->regs_n; ++i)
+			blessed_reg[n++] = s->regs[i];
+	}
 
 	for_each_new_reg(i)
 		++new_regs;
@@ -448,31 +515,31 @@ int main(int ac, char **av)
 		++missing_regs;
 
 	if (new_regs || missing_regs) {
-		printf("Number blessed registers: %5lld\n", blessed_n);
-		printf("Number registers:         %5lld\n", reg_list->n);
+		printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+		printf("%s: Number registers:         %5lld\n", config_name(c), reg_list->n);
 	}
 
 	if (new_regs) {
-		printf("\nThere are %d new registers.\n"
+		printf("\n%s: There are %d new registers.\n"
 		       "Consider adding them to the blessed reg "
-		       "list with the following lines:\n\n", new_regs);
+		       "list with the following lines:\n\n", config_name(c), new_regs);
 		for_each_new_reg(i)
-			print_reg(reg_list->reg[i]);
+			print_reg(c, reg_list->reg[i]);
 		putchar('\n');
 	}
 
 	if (missing_regs) {
-		printf("\nThere are %d missing registers.\n"
-		       "The following lines are missing registers:\n\n", missing_regs);
+		printf("\n%s: There are %d missing registers.\n"
+		       "The following lines are missing registers:\n\n", config_name(c), missing_regs);
 		for_each_missing_reg(i)
-			print_reg(blessed_reg[i]);
+			print_reg(c, blessed_reg[i]);
 		putchar('\n');
 	}
 
 	TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
-		    "There are %d missing registers; "
+		    "%s: There are %d missing registers; "
 		    "%d registers failed get; %d registers failed set; %d registers failed reject",
-		    missing_regs, failed_get, failed_set, failed_reject);
+		    config_name(c), missing_regs, failed_get, failed_set, failed_reject);
 
 	return 0;
 }
@@ -761,7 +828,6 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(3, 4, 5, 0, 1),	/* IFSR32_EL2 */
 	ARM64_SYS_REG(3, 4, 5, 3, 0),	/* FPEXC32_EL2 */
 };
-static __u64 base_regs_n = ARRAY_SIZE(base_regs);
 
 static __u64 vregs[] = {
 	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
@@ -797,7 +863,6 @@ static __u64 vregs[] = {
 	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
 	KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
 };
-static __u64 vregs_n = ARRAY_SIZE(vregs);
 
 static __u64 sve_regs[] = {
 	KVM_REG_ARM64_SVE_VLS,
@@ -852,11 +917,31 @@ static __u64 sve_regs[] = {
 	KVM_REG_ARM64_SVE_FFR(0),
 	ARM64_SYS_REG(3, 0, 1, 2, 0),   /* ZCR_EL1 */
 };
-static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
 
-static __u64 rejects_set[] = {
-#ifdef REG_LIST_SVE
+static __u64 sve_rejects_set[] = {
 	KVM_REG_ARM64_SVE_VLS,
-#endif
 };
-static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
+
+#define BASE_SUBLIST \
+	{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+	{ "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define SVE_SUBLIST \
+	{ "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+	  .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+	  .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+
+static struct vcpu_config vregs_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	VREGS_SUBLIST,
+	{0},
+	},
+};
+static struct vcpu_config sve_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	SVE_SUBLIST,
+	{0},
+	},
+};
-- 
GitLab


From 94e9223c06bece9165a36f0f56bac3552a45cbfc Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Mon, 31 May 2021 12:33:41 +0200
Subject: [PATCH 3522/3804] KVM: arm64: selftests: get-reg-list: Prepare to run
 multiple configs at once

We don't want to have to create a new binary for each vcpu config, so
prepare to run the test for multiple vcpu configs in a single binary.
We do this by factoring out the test from main() and then looping over
configs. When given '--list' we still never print more than a single
reg-list for a single vcpu config though, because it would be confusing
otherwise.

No functional change intended.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210531103344.29325-3-drjones@redhat.com
---
 .../selftests/kvm/aarch64/get-reg-list.c      | 68 ++++++++++++++-----
 1 file changed, 51 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 7bb09ce20ddef..14fc8d82e30fe 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -56,8 +56,8 @@ struct vcpu_config {
 	struct reg_sublist sublists[];
 };
 
-static struct vcpu_config vregs_config;
-static struct vcpu_config sve_config;
+static struct vcpu_config *vcpu_configs[];
+static int vcpu_configs_n;
 
 #define for_each_sublist(c, s)							\
 	for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
@@ -400,29 +400,20 @@ static void check_supported(struct vcpu_config *c)
 	}
 }
 
-int main(int ac, char **av)
+static bool print_list;
+static bool print_filtered;
+static bool fixup_core_regs;
+
+static void run_test(struct vcpu_config *c)
 {
-	struct vcpu_config *c = reg_list_sve() ? &sve_config : &vregs_config;
 	struct kvm_vcpu_init init = { .target = -1, };
 	int new_regs = 0, missing_regs = 0, i, n;
 	int failed_get = 0, failed_set = 0, failed_reject = 0;
-	bool print_list = false, print_filtered = false, fixup_core_regs = false;
 	struct kvm_vm *vm;
 	struct reg_sublist *s;
 
 	check_supported(c);
 
-	for (i = 1; i < ac; ++i) {
-		if (strcmp(av[i], "--core-reg-fixup") == 0)
-			fixup_core_regs = true;
-		else if (strcmp(av[i], "--list") == 0)
-			print_list = true;
-		else if (strcmp(av[i], "--list-filtered") == 0)
-			print_filtered = true;
-		else
-			TEST_FAIL("Unknown option: %s\n", av[i]);
-	}
-
 	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
 	prepare_vcpu_init(c, &init);
 	aarch64_vcpu_add_default(vm, 0, &init, NULL);
@@ -442,7 +433,7 @@ int main(int ac, char **av)
 				print_reg(c, id);
 		}
 		putchar('\n');
-		return 0;
+		return;
 	}
 
 	/*
@@ -541,6 +532,44 @@ int main(int ac, char **av)
 		    "%d registers failed get; %d registers failed set; %d registers failed reject",
 		    config_name(c), missing_regs, failed_get, failed_set, failed_reject);
 
+	pr_info("%s: PASS\n", config_name(c));
+	blessed_n = 0;
+	free(blessed_reg);
+	free(reg_list);
+	kvm_vm_free(vm);
+}
+
+int main(int ac, char **av)
+{
+	struct vcpu_config *c, *sel = NULL;
+	int i;
+
+	for (i = 1; i < ac; ++i) {
+		if (strcmp(av[i], "--core-reg-fixup") == 0)
+			fixup_core_regs = true;
+		else if (strcmp(av[i], "--list") == 0)
+			print_list = true;
+		else if (strcmp(av[i], "--list-filtered") == 0)
+			print_filtered = true;
+		else
+			TEST_FAIL("Unknown option: %s\n", av[i]);
+	}
+
+	if (print_list || print_filtered) {
+		/*
+		 * We only want to print the register list of a single config.
+		 * TODO: Add command line support to pick which config.
+		 */
+		sel = vcpu_configs[0];
+	}
+
+	for (i = 0; i < vcpu_configs_n; ++i) {
+		c = vcpu_configs[i];
+		if (sel && c != sel)
+			continue;
+		run_test(c);
+	}
+
 	return 0;
 }
 
@@ -945,3 +974,8 @@ static struct vcpu_config sve_config = {
 	{0},
 	},
 };
+
+static struct vcpu_config *vcpu_configs[] = {
+	reg_list_sve() ? &sve_config : &vregs_config,
+};
+static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
-- 
GitLab


From f3032fcc9cf065733ce9a50057aaeffd6c464e2e Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Mon, 31 May 2021 12:33:42 +0200
Subject: [PATCH 3523/3804] KVM: arm64: selftests: get-reg-list: Provide config
 selection option

Add a new command line option that allows the user to select a specific
configuration, e.g. --config=sve will give the sve config. Also provide
help text and the --help/-h options.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210531103344.29325-4-drjones@redhat.com
---
 .../selftests/kvm/aarch64/get-reg-list.c      | 56 ++++++++++++++++++-
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 14fc8d82e30fe..03e041d97a183 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -539,6 +539,52 @@ static void run_test(struct vcpu_config *c)
 	kvm_vm_free(vm);
 }
 
+static void help(void)
+{
+	struct vcpu_config *c;
+	int i;
+
+	printf(
+	"\n"
+	"usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n"
+	" --config=<selection>        Used to select a specific vcpu configuration for the test/listing\n"
+	"                             '<selection>' may be\n");
+
+	for (i = 0; i < vcpu_configs_n; ++i) {
+		c = vcpu_configs[i];
+		printf(
+	"                               '%s'\n", config_name(c));
+	}
+
+	printf(
+	"\n"
+	" --list                      Print the register list rather than test it (requires --config)\n"
+	" --list-filtered             Print registers that would normally be filtered out (requires --config)\n"
+	" --core-reg-fixup            Needed when running on old kernels with broken core reg listings\n"
+	"\n"
+	);
+}
+
+static struct vcpu_config *parse_config(const char *config)
+{
+	struct vcpu_config *c;
+	int i;
+
+	if (config[8] != '=')
+		help(), exit(1);
+
+	for (i = 0; i < vcpu_configs_n; ++i) {
+		c = vcpu_configs[i];
+		if (strcmp(config_name(c), &config[9]) == 0)
+			break;
+	}
+
+	if (i == vcpu_configs_n)
+		help(), exit(1);
+
+	return c;
+}
+
 int main(int ac, char **av)
 {
 	struct vcpu_config *c, *sel = NULL;
@@ -547,20 +593,24 @@ int main(int ac, char **av)
 	for (i = 1; i < ac; ++i) {
 		if (strcmp(av[i], "--core-reg-fixup") == 0)
 			fixup_core_regs = true;
+		else if (strncmp(av[i], "--config", 8) == 0)
+			sel = parse_config(av[i]);
 		else if (strcmp(av[i], "--list") == 0)
 			print_list = true;
 		else if (strcmp(av[i], "--list-filtered") == 0)
 			print_filtered = true;
+		else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+			help(), exit(0);
 		else
-			TEST_FAIL("Unknown option: %s\n", av[i]);
+			help(), exit(1);
 	}
 
 	if (print_list || print_filtered) {
 		/*
 		 * We only want to print the register list of a single config.
-		 * TODO: Add command line support to pick which config.
 		 */
-		sel = vcpu_configs[0];
+		if (!sel)
+			help(), exit(1);
 	}
 
 	for (i = 0; i < vcpu_configs_n; ++i) {
-- 
GitLab


From 32edd2290889d0cd0751dd11853e5a368188066d Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Mon, 31 May 2021 12:33:43 +0200
Subject: [PATCH 3524/3804] KVM: arm64: selftests: get-reg-list: Remove
 get-reg-list-sve

Now that we can easily run the test for multiple vcpu configs, let's
merge get-reg-list and get-reg-list-sve into just get-reg-list. We
also add a final change to make it more possible to run multiple
tests, which is to fork the test, rather than directly run it. That
allows a test to fail, but subsequent tests can still run.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210531103344.29325-5-drjones@redhat.com
---
 tools/testing/selftests/kvm/.gitignore        |  1 -
 tools/testing/selftests/kvm/Makefile          |  1 -
 .../selftests/kvm/aarch64/get-reg-list-sve.c  |  3 --
 .../selftests/kvm/aarch64/get-reg-list.c      | 31 +++++++++++++------
 4 files changed, 21 insertions(+), 15 deletions(-)
 delete mode 100644 tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 524c857a049c3..dd36575b732a9 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 /aarch64/get-reg-list
-/aarch64/get-reg-list-sve
 /aarch64/vgic_init
 /s390x/memop
 /s390x/resets
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index daaee1888b128..5c8f3725a7f07 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -79,7 +79,6 @@ TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
deleted file mode 100644
index efba76682b4b8..0000000000000
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
+++ /dev/null
@@ -1,3 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define REG_LIST_SVE
-#include "get-reg-list.c"
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 03e041d97a183..b46b8a1fdc0c5 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -27,16 +27,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
 #include "kvm_util.h"
 #include "test_util.h"
 #include "processor.h"
 
-#ifdef REG_LIST_SVE
-#define reg_list_sve() (true)
-#else
-#define reg_list_sve() (false)
-#endif
-
 static struct kvm_reg_list *reg_list;
 static __u64 *blessed_reg, blessed_n;
 
@@ -588,7 +585,8 @@ static struct vcpu_config *parse_config(const char *config)
 int main(int ac, char **av)
 {
 	struct vcpu_config *c, *sel = NULL;
-	int i;
+	int i, ret = 0;
+	pid_t pid;
 
 	for (i = 1; i < ac; ++i) {
 		if (strcmp(av[i], "--core-reg-fixup") == 0)
@@ -617,10 +615,22 @@ int main(int ac, char **av)
 		c = vcpu_configs[i];
 		if (sel && c != sel)
 			continue;
-		run_test(c);
+
+		pid = fork();
+
+		if (!pid) {
+			run_test(c);
+			exit(0);
+		} else {
+			int wstatus;
+			pid_t wpid = wait(&wstatus);
+			TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+			if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+				ret = KSFT_FAIL;
+		}
 	}
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -1026,6 +1036,7 @@ static struct vcpu_config sve_config = {
 };
 
 static struct vcpu_config *vcpu_configs[] = {
-	reg_list_sve() ? &sve_config : &vregs_config,
+	&vregs_config,
+	&sve_config,
 };
 static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
-- 
GitLab


From 313673bad871750c0c829def53d037868af75b67 Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Mon, 31 May 2021 12:33:44 +0200
Subject: [PATCH 3525/3804] KVM: arm64: selftests: get-reg-list: Split base and
 pmu registers

Since KVM commit 11663111cd49 ("KVM: arm64: Hide PMU registers from
userspace when not available") the get-reg-list* tests have been
failing with

  ...
  ... There are 74 missing registers.
  The following lines are missing registers:
  ...

where the 74 missing registers are all PMU registers. This isn't a
bug in KVM that the selftest found, even though it's true that a
KVM userspace that wasn't setting the KVM_ARM_VCPU_PMU_V3 VCPU
flag, but still expecting the PMU registers to be in the reg-list,
would suddenly no longer have their expectations met. In that case,
the expectations were wrong, though, so that KVM userspace needs to
be fixed, and so does this selftest. The fix for this selftest is to
pull the PMU registers out of the base register sublist into their
own sublist and then create new, pmu-enabled vcpu configs which can
be tested.

Signed-off-by: Andrew Jones <drjones@redhat.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210531103344.29325-6-drjones@redhat.com
---
 .../selftests/kvm/aarch64/get-reg-list.c      | 39 +++++++++++++++----
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index b46b8a1fdc0c5..a16c8f05366c6 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -637,7 +637,7 @@ int main(int ac, char **av)
  * The current blessed list was primed with the output of kernel version
  * v4.15 with --core-reg-fixup and then later updated with new registers.
  *
- * The blessed list is up to date with kernel version v5.10-rc5
+ * The blessed list is up to date with kernel version v5.13-rc3
  */
 static __u64 base_regs[] = {
 	KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -829,8 +829,6 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(3, 0, 5, 2, 0),	/* ESR_EL1 */
 	ARM64_SYS_REG(3, 0, 6, 0, 0),	/* FAR_EL1 */
 	ARM64_SYS_REG(3, 0, 7, 4, 0),	/* PAR_EL1 */
-	ARM64_SYS_REG(3, 0, 9, 14, 1),	/* PMINTENSET_EL1 */
-	ARM64_SYS_REG(3, 0, 9, 14, 2),	/* PMINTENCLR_EL1 */
 	ARM64_SYS_REG(3, 0, 10, 2, 0),	/* MAIR_EL1 */
 	ARM64_SYS_REG(3, 0, 10, 3, 0),	/* AMAIR_EL1 */
 	ARM64_SYS_REG(3, 0, 12, 0, 0),	/* VBAR_EL1 */
@@ -839,6 +837,16 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(3, 0, 13, 0, 4),	/* TPIDR_EL1 */
 	ARM64_SYS_REG(3, 0, 14, 1, 0),	/* CNTKCTL_EL1 */
 	ARM64_SYS_REG(3, 2, 0, 0, 0),	/* CSSELR_EL1 */
+	ARM64_SYS_REG(3, 3, 13, 0, 2),	/* TPIDR_EL0 */
+	ARM64_SYS_REG(3, 3, 13, 0, 3),	/* TPIDRRO_EL0 */
+	ARM64_SYS_REG(3, 4, 3, 0, 0),	/* DACR32_EL2 */
+	ARM64_SYS_REG(3, 4, 5, 0, 1),	/* IFSR32_EL2 */
+	ARM64_SYS_REG(3, 4, 5, 3, 0),	/* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+	ARM64_SYS_REG(3, 0, 9, 14, 1),	/* PMINTENSET_EL1 */
+	ARM64_SYS_REG(3, 0, 9, 14, 2),	/* PMINTENCLR_EL1 */
 	ARM64_SYS_REG(3, 3, 9, 12, 0),	/* PMCR_EL0 */
 	ARM64_SYS_REG(3, 3, 9, 12, 1),	/* PMCNTENSET_EL0 */
 	ARM64_SYS_REG(3, 3, 9, 12, 2),	/* PMCNTENCLR_EL0 */
@@ -848,8 +856,6 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(3, 3, 9, 13, 0),	/* PMCCNTR_EL0 */
 	ARM64_SYS_REG(3, 3, 9, 14, 0),	/* PMUSERENR_EL0 */
 	ARM64_SYS_REG(3, 3, 9, 14, 3),	/* PMOVSSET_EL0 */
-	ARM64_SYS_REG(3, 3, 13, 0, 2),	/* TPIDR_EL0 */
-	ARM64_SYS_REG(3, 3, 13, 0, 3),	/* TPIDRRO_EL0 */
 	ARM64_SYS_REG(3, 3, 14, 8, 0),
 	ARM64_SYS_REG(3, 3, 14, 8, 1),
 	ARM64_SYS_REG(3, 3, 14, 8, 2),
@@ -913,9 +919,6 @@ static __u64 base_regs[] = {
 	ARM64_SYS_REG(3, 3, 14, 15, 5),
 	ARM64_SYS_REG(3, 3, 14, 15, 6),
 	ARM64_SYS_REG(3, 3, 14, 15, 7),	/* PMCCFILTR_EL0 */
-	ARM64_SYS_REG(3, 4, 3, 0, 0),	/* DACR32_EL2 */
-	ARM64_SYS_REG(3, 4, 5, 0, 1),	/* IFSR32_EL2 */
-	ARM64_SYS_REG(3, 4, 5, 3, 0),	/* FPEXC32_EL2 */
 };
 
 static __u64 vregs[] = {
@@ -1015,6 +1018,8 @@ static __u64 sve_rejects_set[] = {
 	{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
 #define VREGS_SUBLIST \
 	{ "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+	{ "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
 #define SVE_SUBLIST \
 	{ "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
 	  .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
@@ -1027,6 +1032,14 @@ static struct vcpu_config vregs_config = {
 	{0},
 	},
 };
+static struct vcpu_config vregs_pmu_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	VREGS_SUBLIST,
+	PMU_SUBLIST,
+	{0},
+	},
+};
 static struct vcpu_config sve_config = {
 	.sublists = {
 	BASE_SUBLIST,
@@ -1034,9 +1047,19 @@ static struct vcpu_config sve_config = {
 	{0},
 	},
 };
+static struct vcpu_config sve_pmu_config = {
+	.sublists = {
+	BASE_SUBLIST,
+	SVE_SUBLIST,
+	PMU_SUBLIST,
+	{0},
+	},
+};
 
 static struct vcpu_config *vcpu_configs[] = {
 	&vregs_config,
+	&vregs_pmu_config,
 	&sve_config,
+	&sve_pmu_config,
 };
 static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
-- 
GitLab


From b356a831088730a3ef36848cd9f2d62dcac392bf Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 22 Jun 2021 08:46:44 +0100
Subject: [PATCH 3526/3804] KVM: arm64: Update MAINTAINERS to include selftests

As the KVM/arm64 selftests are routed via the kvmarm tree,
add the relevant references to the MAINTAINERS file.

Suggested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210622070732.zod7gaqhqo344vg6@gator
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 503fd21901f10..b9d5999253c45 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9993,6 +9993,8 @@ F:	arch/arm64/include/asm/kvm*
 F:	arch/arm64/include/uapi/asm/kvm*
 F:	arch/arm64/kvm/
 F:	include/kvm/arm_*
+F:	tools/testing/selftests/kvm/*/aarch64/
+F:	tools/testing/selftests/kvm/aarch64/
 
 KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
 M:	Huacai Chen <chenhuacai@kernel.org>
-- 
GitLab


From cb8f63b8cbf39845244f3ccae43bb7e63bd70543 Mon Sep 17 00:00:00 2001
From: Gabriel Knezek <gabeknez@linux.microsoft.com>
Date: Mon, 21 Jun 2021 15:28:59 -0700
Subject: [PATCH 3527/3804] gpiolib: cdev: zero padding during conversion to
 gpioline_info_changed

When userspace requests a GPIO v1 line info changed event,
lineinfo_watch_read() populates and returns the gpioline_info_changed
structure. It contains 5 words of padding at the end which are not
initialized before being returned to userspace.

Zero the structure in gpio_v2_line_info_change_to_v1() before populating
its contents.

Fixes: aad955842d1c ("gpiolib: cdev: support GPIO_V2_GET_LINEINFO_IOCTL and GPIO_V2_GET_LINEINFO_WATCH_IOCTL")
Signed-off-by: Gabriel Knezek <gabeknez@linux.microsoft.com>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpiolib-cdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 1631727bf0da1..c7b5446d01fd2 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -1880,6 +1880,7 @@ static void gpio_v2_line_info_changed_to_v1(
 		struct gpio_v2_line_info_changed *lic_v2,
 		struct gpioline_info_changed *lic_v1)
 {
+	memset(lic_v1, 0, sizeof(*lic_v1));
 	gpio_v2_line_info_to_v1(&lic_v2->info, &lic_v1->info);
 	lic_v1->timestamp = lic_v2->timestamp_ns;
 	lic_v1->event_type = lic_v2->event_type;
-- 
GitLab


From 766c268bc6d39b8124e50d075a36b8a3305bc8e2 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Thu, 17 Jun 2021 11:56:50 +0206
Subject: [PATCH 3528/3804] lib/dump_stack: move cpu lock to printk.c

dump_stack() implements its own cpu-reentrant spinning lock to
best-effort serialize stack traces in the printk log. However,
there are other functions (such as show_regs()) that can also
benefit from this serialization.

Move the cpu-reentrant spinning lock (cpu lock) into new helper
functions printk_cpu_lock_irqsave()/printk_cpu_unlock_irqrestore()
so that it is available for others as well. For !CONFIG_SMP the
cpu lock is a NOP.

Note that having multiple cpu locks in the system can easily
lead to deadlock. Code needing a cpu lock should use the
printk cpu lock, since the printk cpu lock could be acquired
from any code and any context.

Also note that it is not necessary for a cpu lock to disable
interrupts. However, in upcoming work this cpu lock will be used
for emergency tasks (for example, atomic consoles during kernel
crashes) and any interruptions while holding the cpu lock should
be avoided if possible.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
[pmladek@suse.com: Backported on top of 5.13-rc1.]
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210617095051.4808-2-john.ogness@linutronix.de
---
 include/linux/printk.h | 41 +++++++++++++++++++++++++
 kernel/printk/printk.c | 69 ++++++++++++++++++++++++++++++++++++++++++
 lib/dump_stack.c       | 38 ++---------------------
 3 files changed, 112 insertions(+), 36 deletions(-)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index fe7eb2351610d..1790a5521fd9d 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -282,6 +282,47 @@ static inline void printk_safe_flush_on_panic(void)
 }
 #endif
 
+#ifdef CONFIG_SMP
+extern int __printk_cpu_trylock(void);
+extern void __printk_wait_on_cpu_lock(void);
+extern void __printk_cpu_unlock(void);
+
+/**
+ * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning
+ *                             lock and disable interrupts.
+ * @flags: Stack-allocated storage for saving local interrupt state,
+ *         to be passed to printk_cpu_unlock_irqrestore().
+ *
+ * If the lock is owned by another CPU, spin until it becomes available.
+ * Interrupts are restored while spinning.
+ */
+#define printk_cpu_lock_irqsave(flags)		\
+	for (;;) {				\
+		local_irq_save(flags);		\
+		if (__printk_cpu_trylock())	\
+			break;			\
+		local_irq_restore(flags);	\
+		__printk_wait_on_cpu_lock();	\
+	}
+
+/**
+ * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning
+ *                                  lock and restore interrupts.
+ * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave().
+ */
+#define printk_cpu_unlock_irqrestore(flags)	\
+	do {					\
+		__printk_cpu_unlock();		\
+		local_irq_restore(flags);	\
+	} while (0)				\
+
+#else
+
+#define printk_cpu_lock_irqsave(flags) ((void)flags)
+#define printk_cpu_unlock_irqrestore(flags) ((void)flags)
+
+#endif /* CONFIG_SMP */
+
 extern int kptr_restrict;
 
 /**
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 421c35571797e..9dfad0efb67fe 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3531,3 +3531,72 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter)
 EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
 
 #endif
+
+#ifdef CONFIG_SMP
+static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1);
+static atomic_t printk_cpulock_nested = ATOMIC_INIT(0);
+
+/**
+ * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant
+ *                               spinning lock is not owned by any CPU.
+ *
+ * Context: Any context.
+ */
+void __printk_wait_on_cpu_lock(void)
+{
+	do {
+		cpu_relax();
+	} while (atomic_read(&printk_cpulock_owner) != -1);
+}
+EXPORT_SYMBOL(__printk_wait_on_cpu_lock);
+
+/**
+ * __printk_cpu_trylock() - Try to acquire the printk cpu-reentrant
+ *                          spinning lock.
+ *
+ * If no processor has the lock, the calling processor takes the lock and
+ * becomes the owner. If the calling processor is already the owner of the
+ * lock, this function succeeds immediately.
+ *
+ * Context: Any context. Expects interrupts to be disabled.
+ * Return: 1 on success, otherwise 0.
+ */
+int __printk_cpu_trylock(void)
+{
+	int cpu;
+	int old;
+
+	cpu = smp_processor_id();
+
+	old = atomic_cmpxchg(&printk_cpulock_owner, -1, cpu);
+	if (old == -1) {
+		/* This CPU is now the owner. */
+		return 1;
+	} else if (old == cpu) {
+		/* This CPU is already the owner. */
+		atomic_inc(&printk_cpulock_nested);
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(__printk_cpu_trylock);
+
+/**
+ * __printk_cpu_unlock() - Release the printk cpu-reentrant spinning lock.
+ *
+ * The calling processor must be the owner of the lock.
+ *
+ * Context: Any context. Expects interrupts to be disabled.
+ */
+void __printk_cpu_unlock(void)
+{
+	if (atomic_read(&printk_cpulock_nested)) {
+		atomic_dec(&printk_cpulock_nested);
+		return;
+	}
+
+	atomic_set(&printk_cpulock_owner, -1);
+}
+EXPORT_SYMBOL(__printk_cpu_unlock);
+#endif /* CONFIG_SMP */
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index f5a33b6f773f7..5ebf4375fa8c9 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -84,50 +84,16 @@ static void __dump_stack(void)
  *
  * Architectures can override this implementation by implementing its own.
  */
-#ifdef CONFIG_SMP
-static atomic_t dump_lock = ATOMIC_INIT(-1);
-
 asmlinkage __visible void dump_stack(void)
 {
 	unsigned long flags;
-	int was_locked;
-	int old;
-	int cpu;
 
 	/*
 	 * Permit this cpu to perform nested stack dumps while serialising
 	 * against other CPUs
 	 */
-retry:
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-	old = atomic_cmpxchg(&dump_lock, -1, cpu);
-	if (old == -1) {
-		was_locked = 0;
-	} else if (old == cpu) {
-		was_locked = 1;
-	} else {
-		local_irq_restore(flags);
-		/*
-		 * Wait for the lock to release before jumping to
-		 * atomic_cmpxchg() in order to mitigate the thundering herd
-		 * problem.
-		 */
-		do { cpu_relax(); } while (atomic_read(&dump_lock) != -1);
-		goto retry;
-	}
-
-	__dump_stack();
-
-	if (!was_locked)
-		atomic_set(&dump_lock, -1);
-
-	local_irq_restore(flags);
-}
-#else
-asmlinkage __visible void dump_stack(void)
-{
+	printk_cpu_lock_irqsave(flags);
 	__dump_stack();
+	printk_cpu_unlock_irqrestore(flags);
 }
-#endif
 EXPORT_SYMBOL(dump_stack);
-- 
GitLab


From 3342aa8e6b4f6e3f1521e9b4cf5cfe50dbc37774 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Thu, 17 Jun 2021 11:56:51 +0206
Subject: [PATCH 3529/3804] printk: fix cpu lock ordering

The cpu lock implementation uses a full memory barrier to take
the lock, but no memory barriers when releasing the lock. This
means that changes performed by a lock owner may not be seen by
the next lock owner. This may have been "good enough" for use
by dump_stack() as a serialization mechanism, but it is not
enough to provide proper protection for a critical section.

Correct this problem by using acquire/release memory barriers
for lock/unlock, respectively.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20210617095051.4808-3-john.ogness@linutronix.de
---
 kernel/printk/printk.c | 53 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 3 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 9dfad0efb67fe..142a58d124d95 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3568,10 +3568,33 @@ int __printk_cpu_trylock(void)
 
 	cpu = smp_processor_id();
 
-	old = atomic_cmpxchg(&printk_cpulock_owner, -1, cpu);
+	/*
+	 * Guarantee loads and stores from this CPU when it is the lock owner
+	 * are _not_ visible to the previous lock owner. This pairs with
+	 * __printk_cpu_unlock:B.
+	 *
+	 * Memory barrier involvement:
+	 *
+	 * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B, then
+	 * __printk_cpu_unlock:A can never read from __printk_cpu_trylock:B.
+	 *
+	 * Relies on:
+	 *
+	 * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B
+	 * of the previous CPU
+	 *    matching
+	 * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B
+	 * of this CPU
+	 */
+	old = atomic_cmpxchg_acquire(&printk_cpulock_owner, -1,
+				     cpu); /* LMM(__printk_cpu_trylock:A) */
 	if (old == -1) {
-		/* This CPU is now the owner. */
+		/*
+		 * This CPU is now the owner and begins loading/storing
+		 * data: LMM(__printk_cpu_trylock:B)
+		 */
 		return 1;
+
 	} else if (old == cpu) {
 		/* This CPU is already the owner. */
 		atomic_inc(&printk_cpulock_nested);
@@ -3596,7 +3619,31 @@ void __printk_cpu_unlock(void)
 		return;
 	}
 
-	atomic_set(&printk_cpulock_owner, -1);
+	/*
+	 * This CPU is finished loading/storing data:
+	 * LMM(__printk_cpu_unlock:A)
+	 */
+
+	/*
+	 * Guarantee loads and stores from this CPU when it was the
+	 * lock owner are visible to the next lock owner. This pairs
+	 * with __printk_cpu_trylock:A.
+	 *
+	 * Memory barrier involvement:
+	 *
+	 * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B,
+	 * then __printk_cpu_trylock:B reads from __printk_cpu_unlock:A.
+	 *
+	 * Relies on:
+	 *
+	 * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B
+	 * of this CPU
+	 *    matching
+	 * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B
+	 * of the next CPU
+	 */
+	atomic_set_release(&printk_cpulock_owner,
+			   -1); /* LMM(__printk_cpu_unlock:B) */
 }
 EXPORT_SYMBOL(__printk_cpu_unlock);
 #endif /* CONFIG_SMP */
-- 
GitLab


From 69e3b846d8a753f9f279f29531ca56b0f7563ad0 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Jun 2021 12:17:11 +0100
Subject: [PATCH 3530/3804] arm64: mte: Sync tags for pages where PTE is
 untagged

A KVM guest could store tags in a page even if the VMM hasn't mapped
the page with PROT_MTE. So when restoring pages from swap we will
need to check to see if there are any saved tags even if !pte_tagged().

However don't check pages for which pte_access_permitted() returns false
as these will not have been swapped out.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210621111716.37157-2-steven.price@arm.com
---
 arch/arm64/include/asm/mte.h     |  4 ++--
 arch/arm64/include/asm/pgtable.h | 22 +++++++++++++++++++---
 arch/arm64/kernel/mte.c          | 18 +++++++++++++-----
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index bc88a1ced0d7e..347ef38a35f79 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -37,7 +37,7 @@ void mte_free_tag_storage(char *storage);
 /* track which pages have valid allocation tags */
 #define PG_mte_tagged	PG_arch_2
 
-void mte_sync_tags(pte_t *ptep, pte_t pte);
+void mte_sync_tags(pte_t old_pte, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void mte_thread_init_user(void);
 void mte_thread_switch(struct task_struct *next);
@@ -53,7 +53,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
 /* unused if !CONFIG_ARM64_MTE, silence the compiler */
 #define PG_mte_tagged	0
 
-static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
+static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
 {
 }
 static inline void mte_copy_page_tags(void *kto, const void *kfrom)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0b10204e72fcb..db5402168841c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
 		__sync_icache_dcache(pte);
 
-	if (system_supports_mte() &&
-	    pte_present(pte) && pte_tagged(pte) && !pte_special(pte))
-		mte_sync_tags(ptep, pte);
+	/*
+	 * If the PTE would provide user space access to the tags associated
+	 * with it then ensure that the MTE tags are synchronised.  Although
+	 * pte_access_permitted() returns false for exec only mappings, they
+	 * don't expose tags (instruction fetches don't check tags).
+	 */
+	if (system_supports_mte() && pte_access_permitted(pte, false) &&
+	    !pte_special(pte)) {
+		pte_t old_pte = READ_ONCE(*ptep);
+		/*
+		 * We only need to synchronise if the new PTE has tags enabled
+		 * or if swapping in (in which case another mapping may have
+		 * set tags in the past even if this PTE isn't tagged).
+		 * (!pte_none() && !pte_present()) is an open coded version of
+		 * is_swap_pte()
+		 */
+		if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
+			mte_sync_tags(old_pte, pte);
+	}
 
 	__check_racy_pte_update(mm, ptep, pte);
 
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 125a10e413e9f..69b3fde8759e4 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode);
 EXPORT_SYMBOL_GPL(mte_async_mode);
 #endif
 
-static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
+static void mte_sync_page_tags(struct page *page, pte_t old_pte,
+			       bool check_swap, bool pte_is_tagged)
 {
-	pte_t old_pte = READ_ONCE(*ptep);
-
 	if (check_swap && is_swap_pte(old_pte)) {
 		swp_entry_t entry = pte_to_swp_entry(old_pte);
 
@@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
 			return;
 	}
 
+	if (!pte_is_tagged)
+		return;
+
 	page_kasan_tag_reset(page);
 	/*
 	 * We need smp_wmb() in between setting the flags and clearing the
@@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
 	mte_clear_page_tags(page_address(page));
 }
 
-void mte_sync_tags(pte_t *ptep, pte_t pte)
+void mte_sync_tags(pte_t old_pte, pte_t pte)
 {
 	struct page *page = pte_page(pte);
 	long i, nr_pages = compound_nr(page);
 	bool check_swap = nr_pages == 1;
+	bool pte_is_tagged = pte_tagged(pte);
+
+	/* Early out if there's nothing to do */
+	if (!check_swap && !pte_is_tagged)
+		return;
 
 	/* if PG_mte_tagged is set, tags have already been initialised */
 	for (i = 0; i < nr_pages; i++, page++) {
 		if (!test_and_set_bit(PG_mte_tagged, &page->flags))
-			mte_sync_page_tags(page, ptep, check_swap);
+			mte_sync_page_tags(page, old_pte, check_swap,
+					   pte_is_tagged);
 	}
 }
 
-- 
GitLab


From f54b3ca7ea1e5e02f481cf4ca54568e57bd66086 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 22 Jun 2021 09:54:09 +0200
Subject: [PATCH 3531/3804] Revert "drm: add a locked version of
 drm_is_current_master"

This reverts commit 1815d9c86e3090477fbde066ff314a7e9721ee0f.

Unfortunately this inverts the locking hierarchy, so back to the
drawing board. Full lockdep splat below:

======================================================
WARNING: possible circular locking dependency detected
5.13.0-rc7-CI-CI_DRM_10254+ #1 Not tainted
------------------------------------------------------
kms_frontbuffer/1087 is trying to acquire lock:
ffff88810dcd01a8 (&dev->master_mutex){+.+.}-{3:3}, at: drm_is_current_master+0x1b/0x40
but task is already holding lock:
ffff88810dcd0488 (&dev->mode_config.mutex){+.+.}-{3:3}, at: drm_mode_getconnector+0x1c6/0x4a0
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (&dev->mode_config.mutex){+.+.}-{3:3}:
       __mutex_lock+0xab/0x970
       drm_client_modeset_probe+0x22e/0xca0
       __drm_fb_helper_initial_config_and_unlock+0x42/0x540
       intel_fbdev_initial_config+0xf/0x20 [i915]
       async_run_entry_fn+0x28/0x130
       process_one_work+0x26d/0x5c0
       worker_thread+0x37/0x380
       kthread+0x144/0x170
       ret_from_fork+0x1f/0x30
-> #1 (&client->modeset_mutex){+.+.}-{3:3}:
       __mutex_lock+0xab/0x970
       drm_client_modeset_commit_locked+0x1c/0x180
       drm_client_modeset_commit+0x1c/0x40
       __drm_fb_helper_restore_fbdev_mode_unlocked+0x88/0xb0
       drm_fb_helper_set_par+0x34/0x40
       intel_fbdev_set_par+0x11/0x40 [i915]
       fbcon_init+0x270/0x4f0
       visual_init+0xc6/0x130
       do_bind_con_driver+0x1e5/0x2d0
       do_take_over_console+0x10e/0x180
       do_fbcon_takeover+0x53/0xb0
       register_framebuffer+0x22d/0x310
       __drm_fb_helper_initial_config_and_unlock+0x36c/0x540
       intel_fbdev_initial_config+0xf/0x20 [i915]
       async_run_entry_fn+0x28/0x130
       process_one_work+0x26d/0x5c0
       worker_thread+0x37/0x380
       kthread+0x144/0x170
       ret_from_fork+0x1f/0x30
-> #0 (&dev->master_mutex){+.+.}-{3:3}:
       __lock_acquire+0x151e/0x2590
       lock_acquire+0xd1/0x3d0
       __mutex_lock+0xab/0x970
       drm_is_current_master+0x1b/0x40
       drm_mode_getconnector+0x37e/0x4a0
       drm_ioctl_kernel+0xa8/0xf0
       drm_ioctl+0x1e8/0x390
       __x64_sys_ioctl+0x6a/0xa0
       do_syscall_64+0x39/0xb0
       entry_SYSCALL_64_after_hwframe+0x44/0xae
other info that might help us debug this:
Chain exists of: &dev->master_mutex --> &client->modeset_mutex --> &dev->mode_config.mutex
 Possible unsafe locking scenario:
       CPU0                    CPU1
       ----                    ----
  lock(&dev->mode_config.mutex);
                               lock(&client->modeset_mutex);
                               lock(&dev->mode_config.mutex);
  lock(&dev->master_mutex);
*** DEADLOCK ***
1 lock held by kms_frontbuffer/1087:
 #0: ffff88810dcd0488 (&dev->mode_config.mutex){+.+.}-{3:3}, at: drm_mode_getconnector+0x1c6/0x4a0
stack backtrace:
CPU: 7 PID: 1087 Comm: kms_frontbuffer Not tainted 5.13.0-rc7-CI-CI_DRM_10254+ #1
Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP TLC, BIOS ICLSFWR1.R00.3234.A01.1906141750 06/14/2019
Call Trace:
 dump_stack+0x7f/0xad
 check_noncircular+0x12e/0x150
 __lock_acquire+0x151e/0x2590
 lock_acquire+0xd1/0x3d0
 __mutex_lock+0xab/0x970
 drm_is_current_master+0x1b/0x40
 drm_mode_getconnector+0x37e/0x4a0
 drm_ioctl_kernel+0xa8/0xf0
 drm_ioctl+0x1e8/0x390
 __x64_sys_ioctl+0x6a/0xa0
 do_syscall_64+0x39/0xb0
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Note that this broke the intel-gfx CI pretty much across the board
because it has to reboot machines after it hits a lockdep splat.

Testcase: igt/debugfs_test/read_all_entries
Acked-by: Petri Latvala <petri.latvala@intel.com>
Fixes: 1815d9c86e30 ("drm: add a locked version of drm_is_current_master")
Cc: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Cc: Emil Velikov <emil.l.velikov@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210622075409.2673805-1-daniel.vetter@ffwll.ch
---
 drivers/gpu/drm/drm_auth.c | 51 ++++++++++++++------------------------
 1 file changed, 19 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index 86d4b72e95cbd..232abbba36868 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -61,35 +61,6 @@
  * trusted clients.
  */
 
-static bool drm_is_current_master_locked(struct drm_file *fpriv)
-{
-	lockdep_assert_held_once(&fpriv->master->dev->master_mutex);
-
-	return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master;
-}
-
-/**
- * drm_is_current_master - checks whether @priv is the current master
- * @fpriv: DRM file private
- *
- * Checks whether @fpriv is current master on its device. This decides whether a
- * client is allowed to run DRM_MASTER IOCTLs.
- *
- * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting
- * - the current master is assumed to own the non-shareable display hardware.
- */
-bool drm_is_current_master(struct drm_file *fpriv)
-{
-	bool ret;
-
-	mutex_lock(&fpriv->master->dev->master_mutex);
-	ret = drm_is_current_master_locked(fpriv);
-	mutex_unlock(&fpriv->master->dev->master_mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL(drm_is_current_master);
-
 int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_auth *auth = data;
@@ -252,7 +223,7 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out_unlock;
 
-	if (drm_is_current_master_locked(file_priv))
+	if (drm_is_current_master(file_priv))
 		goto out_unlock;
 
 	if (dev->master) {
@@ -301,7 +272,7 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto out_unlock;
 
-	if (!drm_is_current_master_locked(file_priv)) {
+	if (!drm_is_current_master(file_priv)) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}
@@ -350,7 +321,7 @@ void drm_master_release(struct drm_file *file_priv)
 	if (file_priv->magic)
 		idr_remove(&file_priv->master->magic_map, file_priv->magic);
 
-	if (!drm_is_current_master_locked(file_priv))
+	if (!drm_is_current_master(file_priv))
 		goto out;
 
 	drm_legacy_lock_master_cleanup(dev, master);
@@ -371,6 +342,22 @@ out:
 	mutex_unlock(&dev->master_mutex);
 }
 
+/**
+ * drm_is_current_master - checks whether @priv is the current master
+ * @fpriv: DRM file private
+ *
+ * Checks whether @fpriv is current master on its device. This decides whether a
+ * client is allowed to run DRM_MASTER IOCTLs.
+ *
+ * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting
+ * - the current master is assumed to own the non-shareable display hardware.
+ */
+bool drm_is_current_master(struct drm_file *fpriv)
+{
+	return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master;
+}
+EXPORT_SYMBOL(drm_is_current_master);
+
 /**
  * drm_master_get - reference a master pointer
  * @master: &struct drm_master
-- 
GitLab


From 9301982c424a003c0095bf157154a85bf5322bd0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 18 Jun 2021 16:18:24 +0200
Subject: [PATCH 3532/3804] x86/fpu: Preserve supervisor states in
 sanitize_restored_user_xstate()

sanitize_restored_user_xstate() preserves the supervisor states only
when the fx_only argument is zero, which allows unprivileged user space
to put supervisor states back into init state.

Preserve them unconditionally.

 [ bp: Fix a typo or two in the text. ]

Fixes: 5d6b6a6f9b5c ("x86/fpu/xstate: Update sanitize_restored_xstate() for supervisor xstates")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20210618143444.438635017@linutronix.de
---
 arch/x86/kernel/fpu/signal.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index ec3ae30547920..b7b92cdf3add4 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpregs_state *state,
 
 	if (use_xsave()) {
 		/*
-		 * Note: we don't need to zero the reserved bits in the
-		 * xstate_header here because we either didn't copy them at all,
-		 * or we checked earlier that they aren't set.
+		 * Clear all feature bits which are not set in
+		 * user_xfeatures and clear all extended features
+		 * for fx_only mode.
 		 */
+		u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures;
 
 		/*
-		 * 'user_xfeatures' might have bits clear which are
-		 * set in header->xfeatures. This represents features that
-		 * were in init state prior to a signal delivery, and need
-		 * to be reset back to the init state.  Clear any user
-		 * feature bits which are set in the kernel buffer to get
-		 * them back to the init state.
-		 *
-		 * Supervisor state is unchanged by input from userspace.
-		 * Ensure supervisor state bits stay set and supervisor
-		 * state is not modified.
+		 * Supervisor state has to be preserved. The sigframe
+		 * restore can only modify user features, i.e. @mask
+		 * cannot contain them.
 		 */
-		if (fx_only)
-			header->xfeatures = XFEATURE_MASK_FPSSE;
-		else
-			header->xfeatures &= user_xfeatures |
-					     xfeatures_mask_supervisor();
+		header->xfeatures &= mask | xfeatures_mask_supervisor();
 	}
 
 	if (use_fxsr()) {
-- 
GitLab


From f9dfb5e390fab2df9f7944bb91e7705aba14cd26 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 18 Jun 2021 16:18:25 +0200
Subject: [PATCH 3533/3804] x86/fpu: Make init_fpstate correct with optimized
 XSAVE

The XSAVE init code initializes all enabled and supported components with
XRSTOR(S) to init state. Then it XSAVEs the state of the components back
into init_fpstate which is used in several places to fill in the init state
of components.

This works correctly with XSAVE, but not with XSAVEOPT and XSAVES because
those use the init optimization and skip writing state of components which
are in init state. So init_fpstate.xsave still contains all zeroes after
this operation.

There are two ways to solve that:

   1) Use XSAVE unconditionally, but that requires to reshuffle the buffer when
      XSAVES is enabled because XSAVES uses compacted format.

   2) Save the components which are known to have a non-zero init state by other
      means.

Looking deeper, #2 is the right thing to do because all components the
kernel supports have all-zeroes init state except the legacy features (FP,
SSE). Those cannot be hard coded because the states are not identical on all
CPUs, but they can be saved with FXSAVE which avoids all conditionals.

Use FXSAVE to save the legacy FP/SSE components in init_fpstate along with
a BUILD_BUG_ON() which reminds developers to validate that a newly added
component has all zeroes init state. As a bonus remove the now unused
copy_xregs_to_kernel_booting() crutch.

The XSAVE and reshuffle method can still be implemented in the unlikely
case that components are added which have a non-zero init state and no
other means to save them. For now, FXSAVE is just simple and good enough.

  [ bp: Fix a typo or two in the text. ]

Fixes: 6bad06b76892 ("x86, xsave: Use xsaveopt in context-switch path when supported")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20210618143444.587311343@linutronix.de
---
 arch/x86/include/asm/fpu/internal.h | 30 ++++++---------------
 arch/x86/kernel/fpu/xstate.c        | 41 ++++++++++++++++++++++++++---
 2 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index fdee23ea4e173..16bf4d4a8159e 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
 		asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
 }
 
+static inline void fxsave(struct fxregs_state *fx)
+{
+	if (IS_ENABLED(CONFIG_X86_32))
+		asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx));
+	else
+		asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx));
+}
+
 /* These macros all use (%edi)/(%rdi) as the single memory argument. */
 #define XSAVE		".byte " REX_PREFIX "0x0f,0xae,0x27"
 #define XSAVEOPT	".byte " REX_PREFIX "0x0f,0xae,0x37"
@@ -268,28 +276,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
 		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
 		     : "memory")
 
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
-{
-	u64 mask = xfeatures_mask_all;
-	u32 lmask = mask;
-	u32 hmask = mask >> 32;
-	int err;
-
-	WARN_ON(system_state != SYSTEM_BOOTING);
-
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
-	else
-		XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
-
-	/* We should never fault when copying to a kernel buffer: */
-	WARN_ON_FPU(err);
-}
-
 /*
  * This function is called only during boot time when x86 caps are not set
  * up and alternative can not be used yet.
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d0eef963aad13..1cadb2faf7405 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -440,6 +440,25 @@ static void __init print_xstate_offset_size(void)
 	}
 }
 
+/*
+ * All supported features have either init state all zeros or are
+ * handled in setup_init_fpu() individually. This is an explicit
+ * feature list and does not use XFEATURE_MASK*SUPPORTED to catch
+ * newly added supported features at build time and make people
+ * actually look at the init state for the new feature.
+ */
+#define XFEATURES_INIT_FPSTATE_HANDLED		\
+	(XFEATURE_MASK_FP |			\
+	 XFEATURE_MASK_SSE |			\
+	 XFEATURE_MASK_YMM |			\
+	 XFEATURE_MASK_OPMASK |			\
+	 XFEATURE_MASK_ZMM_Hi256 |		\
+	 XFEATURE_MASK_Hi16_ZMM	 |		\
+	 XFEATURE_MASK_PKRU |			\
+	 XFEATURE_MASK_BNDREGS |		\
+	 XFEATURE_MASK_BNDCSR |			\
+	 XFEATURE_MASK_PASID)
+
 /*
  * setup the xstate image representing the init state
  */
@@ -447,6 +466,10 @@ static void __init setup_init_fpu_buf(void)
 {
 	static int on_boot_cpu __initdata = 1;
 
+	BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED |
+		      XFEATURE_MASK_SUPERVISOR_SUPPORTED) !=
+		     XFEATURES_INIT_FPSTATE_HANDLED);
+
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
 
@@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(void)
 	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
 
 	/*
-	 * Dump the init state again. This is to identify the init state
-	 * of any feature which is not represented by all zero's.
+	 * All components are now in init state. Read the state back so
+	 * that init_fpstate contains all non-zero init state. This only
+	 * works with XSAVE, but not with XSAVEOPT and XSAVES because
+	 * those use the init optimization which skips writing data for
+	 * components in init state.
+	 *
+	 * XSAVE could be used, but that would require to reshuffle the
+	 * data when XSAVES is available because XSAVES uses xstate
+	 * compaction. But doing so is a pointless exercise because most
+	 * components have an all zeros init state except for the legacy
+	 * ones (FP and SSE). Those can be saved with FXSAVE into the
+	 * legacy area. Adding new features requires to ensure that init
+	 * state is all zeroes or if not to add the necessary handling
+	 * here.
 	 */
-	copy_xregs_to_kernel_booting(&init_fpstate.xsave);
+	fxsave(&init_fpstate.fxsave);
 }
 
 static int xfeature_uncompacted_offset(int xfeature_nr)
-- 
GitLab


From c3d128581f64a9b3729e697a63760ff0a2c4a8fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Mon, 31 May 2021 13:50:35 -0300
Subject: [PATCH 3534/3804] selftests: futex: Add futex wait test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are three different strategies to uniquely identify a futex in the
kernel:

 - Private futexes: uses the pointer to mm_struct and the page address

 - Shared futexes: checks if the page containing the address is a PageAnon:
   - If it is, uses the same data as a private futexes
   - If it isn't, uses an inode sequence number from struct inode and
      the page's index

Create a selftest to check those three paths and basic wait/wake
mechanism.

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Link: https://lore.kernel.org/r/20210531165036.41468-2-andrealmeid@collabora.com
---
 .../selftests/futex/functional/.gitignore     |   1 +
 .../selftests/futex/functional/Makefile       |   3 +-
 .../selftests/futex/functional/futex_wait.c   | 171 ++++++++++++++++++
 .../testing/selftests/futex/functional/run.sh |   3 +
 4 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/futex/functional/futex_wait.c

diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 0efcd494daabf..bd24699bacc97 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -6,3 +6,4 @@ futex_wait_private_mapped_file
 futex_wait_timeout
 futex_wait_uninitialized_heap
 futex_wait_wouldblock
+futex_wait
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 1d2b3b2a5b86b..20a5b4a1bc879 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -15,7 +15,8 @@ TEST_GEN_FILES := \
 	futex_requeue_pi_signal_restart \
 	futex_requeue_pi_mismatched_ops \
 	futex_wait_uninitialized_heap \
-	futex_wait_private_mapped_file
+	futex_wait_private_mapped_file \
+	futex_wait
 
 TEST_PROGS := run.sh
 
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
new file mode 100644
index 0000000000000..685140d9b93d2
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait"
+#define timeout_ns  30000000
+#define WAKE_WAIT_US 10000
+#define SHM_PATH "futex_shm_file"
+
+void *futex;
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+static void *waiterfn(void *arg)
+{
+	struct timespec to;
+	unsigned int flags = 0;
+
+	if (arg)
+		flags = *((unsigned int *) arg);
+
+	to.tv_sec = 0;
+	to.tv_nsec = timeout_ns;
+
+	if (futex_wait(futex, 0, &to, flags))
+		printf("waiter failed errno %d\n", errno);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	int res, ret = RET_PASS, fd, c, shm_id;
+	u_int32_t f_private = 0, *shared_data;
+	unsigned int flags = FUTEX_PRIVATE_FLAG;
+	pthread_t waiter;
+	void *shm;
+
+	futex = &f_private;
+
+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	ksft_print_header();
+	ksft_set_plan(3);
+	ksft_print_msg("%s: Test futex_wait\n", basename(argv[0]));
+
+	/* Testing a private futex */
+	info("Calling private futex_wait on futex: %p\n", futex);
+	if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags))
+		error("pthread_create failed\n", errno);
+
+	usleep(WAKE_WAIT_US);
+
+	info("Calling private futex_wake on futex: %p\n", futex);
+	res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG);
+	if (res != 1) {
+		ksft_test_result_fail("futex_wake private returned: %d %s\n",
+				      errno, strerror(errno));
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_wake private succeeds\n");
+	}
+
+	/* Testing an anon page shared memory */
+	shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
+	if (shm_id < 0) {
+		perror("shmget");
+		exit(1);
+	}
+
+	shared_data = shmat(shm_id, NULL, 0);
+
+	*shared_data = 0;
+	futex = shared_data;
+
+	info("Calling shared (page anon) futex_wait on futex: %p\n", futex);
+	if (pthread_create(&waiter, NULL, waiterfn, NULL))
+		error("pthread_create failed\n", errno);
+
+	usleep(WAKE_WAIT_US);
+
+	info("Calling shared (page anon) futex_wake on futex: %p\n", futex);
+	res = futex_wake(futex, 1, 0);
+	if (res != 1) {
+		ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n",
+				      errno, strerror(errno));
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_wake shared (page anon) succeeds\n");
+	}
+
+
+	/* Testing a file backed shared memory */
+	fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+	if (fd < 0) {
+		perror("open");
+		exit(1);
+	}
+
+	if (ftruncate(fd, sizeof(f_private))) {
+		perror("ftruncate");
+		exit(1);
+	}
+
+	shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (shm == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	memcpy(shm, &f_private, sizeof(f_private));
+
+	futex = shm;
+
+	info("Calling shared (file backed) futex_wait on futex: %p\n", futex);
+	if (pthread_create(&waiter, NULL, waiterfn, NULL))
+		error("pthread_create failed\n", errno);
+
+	usleep(WAKE_WAIT_US);
+
+	info("Calling shared (file backed) futex_wake on futex: %p\n", futex);
+	res = futex_wake(shm, 1, 0);
+	if (res != 1) {
+		ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n",
+				      errno, strerror(errno));
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_wake shared (file backed) succeeds\n");
+	}
+
+	/* Freeing resources */
+	shmdt(shared_data);
+	munmap(shm, sizeof(f_private));
+	remove(SHM_PATH);
+	close(fd);
+
+	ksft_print_cnts();
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 1acb6ace1680e..d5e1430bcdca0 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -73,3 +73,6 @@ echo
 echo
 ./futex_wait_uninitialized_heap $COLOR
 ./futex_wait_private_mapped_file $COLOR
+
+echo
+./futex_wait $COLOR
-- 
GitLab


From 7cb5dd8e2c8ce2b8f778f37cfd8bb955d663d16d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Almeida?= <andrealmeid@collabora.com>
Date: Mon, 31 May 2021 13:50:36 -0300
Subject: [PATCH 3535/3804] selftests: futex: Add futex compare requeue test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add testing for futex_cmp_requeue(). The first test just requeues from one
waiter to another one, and wakes it. The second performs both wake and
requeue, and checks the return values to see if the operation woke/requeued
the expected number of waiters.

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Link: https://lore.kernel.org/r/20210531165036.41468-3-andrealmeid@collabora.com
---
 .../selftests/futex/functional/.gitignore     |   1 +
 .../selftests/futex/functional/Makefile       |   3 +-
 .../futex/functional/futex_requeue.c          | 136 ++++++++++++++++++
 .../testing/selftests/futex/functional/run.sh |   3 +
 4 files changed, 142 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/futex/functional/futex_requeue.c

diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index bd24699bacc97..0e78b49d0f2f2 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -7,3 +7,4 @@ futex_wait_timeout
 futex_wait_uninitialized_heap
 futex_wait_wouldblock
 futex_wait
+futex_requeue
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 20a5b4a1bc879..bd1fec59e010d 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -16,7 +16,8 @@ TEST_GEN_FILES := \
 	futex_requeue_pi_mismatched_ops \
 	futex_wait_uninitialized_heap \
 	futex_wait_private_mapped_file \
-	futex_wait
+	futex_wait \
+	futex_requeue
 
 TEST_PROGS := run.sh
 
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
new file mode 100644
index 0000000000000..51485be6eb2f1
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <limits.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-requeue"
+#define timeout_ns  30000000
+#define WAKE_WAIT_US 10000
+
+volatile futex_t *f1;
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+void *waiterfn(void *arg)
+{
+	struct timespec to;
+
+	to.tv_sec = 0;
+	to.tv_nsec = timeout_ns;
+
+	if (futex_wait(f1, *f1, &to, 0))
+		printf("waiter failed errno %d\n", errno);
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	pthread_t waiter[10];
+	int res, ret = RET_PASS;
+	int c, i;
+	volatile futex_t _f1 = 0;
+	volatile futex_t f2 = 0;
+
+	f1 = &_f1;
+
+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	ksft_print_header();
+	ksft_set_plan(2);
+	ksft_print_msg("%s: Test futex_requeue\n",
+		       basename(argv[0]));
+
+	/*
+	 * Requeue a waiter from f1 to f2, and wake f2.
+	 */
+	if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
+		error("pthread_create failed\n", errno);
+
+	usleep(WAKE_WAIT_US);
+
+	info("Requeuing 1 futex from f1 to f2\n");
+	res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0);
+	if (res != 1) {
+		ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	}
+
+
+	info("Waking 1 futex at f2\n");
+	res = futex_wake(&f2, 1, 0);
+	if (res != 1) {
+		ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_requeue simple succeeds\n");
+	}
+
+
+	/*
+	 * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7.
+	 * At futex_wake, wake INT_MAX (should be exactly 7).
+	 */
+	for (i = 0; i < 10; i++) {
+		if (pthread_create(&waiter[i], NULL, waiterfn, NULL))
+			error("pthread_create failed\n", errno);
+	}
+
+	usleep(WAKE_WAIT_US);
+
+	info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
+	res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0);
+	if (res != 10) {
+		ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	}
+
+	info("Waking INT_MAX futexes at f2\n");
+	res = futex_wake(&f2, INT_MAX, 0);
+	if (res != 7) {
+		ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_requeue many succeeds\n");
+	}
+
+	ksft_print_cnts();
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index d5e1430bcdca0..11a9d62290f57 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -76,3 +76,6 @@ echo
 
 echo
 ./futex_wait $COLOR
+
+echo
+./futex_requeue $COLOR
-- 
GitLab


From cf292e93f423fdebdf751a22ea01249196806328 Mon Sep 17 00:00:00 2001
From: Raphael Gault <raphael.gault@arm.com>
Date: Mon, 17 May 2021 13:02:56 -0500
Subject: [PATCH 3536/3804] arm64: Restrict undef hook for cpufeature registers

This commit modifies the mask of the mrs_hook declared in
arch/arm64/kernel/cpufeatures.c which emulates only feature register
access. This is necessary because this hook's mask was too large and
thus masking any mrs instruction, even if not related to the emulated
registers which made the pmu emulation inefficient.

Signed-off-by: Raphael Gault <raphael.gault@arm.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210517180256.2881891-1-robh@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpufeature.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 52389018ff335..dbae006f625f3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -3018,8 +3018,8 @@ static int emulate_mrs(struct pt_regs *regs, u32 insn)
 }
 
 static struct undef_hook mrs_hook = {
-	.instr_mask = 0xfff00000,
-	.instr_val  = 0xd5300000,
+	.instr_mask = 0xffff0000,
+	.instr_val  = 0xd5380000,
 	.pstate_mask = PSR_AA32_MODE_MASK,
 	.pstate_val = PSR_MODE_EL0t,
 	.fn = emulate_mrs,
-- 
GitLab


From 9bc146acc33125cd9f365b92f1c02ec89f639977 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 22 Jun 2021 12:33:29 +0800
Subject: [PATCH 3537/3804] regulator: hi6421v600: Fix setting wrong
 driver_data

Current code set "config.driver_data = sreg" but sreg only init the mutex,
the othere fields are just zero. Fix it by pass *info to config.driver_data
so each regulator can get corresponding data by rdev_get_drvdata().

Separate enable_mutex from struct hi6421_spmi_reg_info since only need one
mutex for the driver.

Fixes: d2dfd50a0b57 ("staging: hikey9xx: hi6421v600-regulator: move LDO config from DT")
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210622043329.392072-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/hi6421v600-regulator.c | 26 ++++++++++++++----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c
index cf14109d486f5..48922704f0e14 100644
--- a/drivers/regulator/hi6421v600-regulator.c
+++ b/drivers/regulator/hi6421v600-regulator.c
@@ -16,13 +16,15 @@
 #include <linux/regulator/driver.h>
 #include <linux/spmi.h>
 
+struct hi6421_spmi_reg_priv {
+	/* Serialize regulator enable logic */
+	struct mutex enable_mutex;
+};
+
 struct hi6421_spmi_reg_info {
 	struct regulator_desc	desc;
 	u8			eco_mode_mask;
 	u32			eco_uA;
-
-	/* Serialize regulator enable logic */
-	struct mutex enable_mutex;
 };
 
 static const unsigned int ldo3_voltages[] = {
@@ -96,11 +98,12 @@ static const unsigned int ldo34_voltages[] = {
 
 static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev)
 {
-	struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev);
+	struct hi6421_spmi_reg_priv *priv;
 	int ret;
 
+	priv = dev_get_drvdata(rdev->dev.parent);
 	/* cannot enable more than one regulator at one time */
-	mutex_lock(&sreg->enable_mutex);
+	mutex_lock(&priv->enable_mutex);
 
 	ret = regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
 				 rdev->desc->enable_mask,
@@ -109,7 +112,7 @@ static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev)
 	/* Avoid powering up multiple devices at the same time */
 	usleep_range(rdev->desc->off_on_delay, rdev->desc->off_on_delay + 60);
 
-	mutex_unlock(&sreg->enable_mutex);
+	mutex_unlock(&priv->enable_mutex);
 
 	return ret;
 }
@@ -228,7 +231,7 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev)
 {
 	struct device *pmic_dev = pdev->dev.parent;
 	struct regulator_config config = { };
-	struct hi6421_spmi_reg_info *sreg;
+	struct hi6421_spmi_reg_priv *priv;
 	struct hi6421_spmi_reg_info *info;
 	struct device *dev = &pdev->dev;
 	struct hi6421_spmi_pmic *pmic;
@@ -244,17 +247,18 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev)
 	if (WARN_ON(!pmic))
 		return -ENODEV;
 
-	sreg = devm_kzalloc(dev, sizeof(*sreg), GFP_KERNEL);
-	if (!sreg)
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
 
-	mutex_init(&sreg->enable_mutex);
+	mutex_init(&priv->enable_mutex);
+	platform_set_drvdata(pdev, priv);
 
 	for (i = 0; i < ARRAY_SIZE(regulator_info); i++) {
 		info = &regulator_info[i];
 
 		config.dev = pdev->dev.parent;
-		config.driver_data = sreg;
+		config.driver_data = info;
 		config.regmap = pmic->regmap;
 
 		rdev = devm_regulator_register(dev, &info->desc, &config);
-- 
GitLab


From 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 1 Jun 2021 09:40:25 -0400
Subject: [PATCH 3538/3804] ceph: must hold snap_rwsem when filling inode for
 async create

...and add a lockdep assertion for it to ceph_fill_inode().

Cc: stable@vger.kernel.org # v5.7+
Fixes: 9a8d03ca2e2c3 ("ceph: attempt to do async create when possible")
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/file.c  | 3 +++
 fs/ceph/inode.c | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 77fc037d5bebe..4dc96885acd05 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -578,6 +578,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
 	struct ceph_inode_info *ci = ceph_inode(dir);
 	struct inode *inode;
 	struct timespec64 now;
+	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
 	struct ceph_vino vino = { .ino = req->r_deleg_ino,
 				  .snap = CEPH_NOSNAP };
 
@@ -615,8 +616,10 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
 
 	ceph_file_layout_to_legacy(lo, &in.layout);
 
+	down_read(&mdsc->snap_rwsem);
 	ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
 			      req->r_fmode, NULL);
+	up_read(&mdsc->snap_rwsem);
 	if (ret) {
 		dout("%s failed to fill inode: %d\n", __func__, ret);
 		ceph_dir_clear_complete(dir);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e1c63adb196dd..df0c8a724609d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -777,6 +777,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
 	umode_t mode = le32_to_cpu(info->mode);
 	dev_t rdev = le32_to_cpu(info->rdev);
 
+	lockdep_assert_held(&mdsc->snap_rwsem);
+
 	dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__,
 	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
 	     ci->i_version);
-- 
GitLab


From 7a971e2c0767b6fc9a77c4108eceff0509c61cdb Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Wed, 2 Jun 2021 12:46:07 -0400
Subject: [PATCH 3539/3804] ceph: fix error handling in ceph_atomic_open and
 ceph_lookup

Commit aa60cfc3f7ee broke the error handling in these functions such
that they don't handle non-ENOENT errors from ceph_mdsc_do_request
properly.

Move the checking of -ENOENT out of ceph_handle_snapdir and into the
callers, and if we get a different error, return it immediately.

Fixes: aa60cfc3f7ee ("ceph: don't use d_add in ceph_handle_snapdir")
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/dir.c   | 22 ++++++++++++----------
 fs/ceph/file.c  | 14 ++++++++------
 fs/ceph/super.h |  2 +-
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 5624fae7a603d..9ba79b6531fba 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -668,14 +668,13 @@ out:
  * Handle lookups for the hidden .snap directory.
  */
 struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
-				   struct dentry *dentry, int err)
+				   struct dentry *dentry)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
 	struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */
 
 	/* .snap dir? */
-	if (err == -ENOENT &&
-	    ceph_snap(parent) == CEPH_NOSNAP &&
+	if (ceph_snap(parent) == CEPH_NOSNAP &&
 	    strcmp(dentry->d_name.name, fsc->mount_options->snapdir_name) == 0) {
 		struct dentry *res;
 		struct inode *inode = ceph_get_snapdir(parent);
@@ -742,7 +741,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
 	struct ceph_mds_request *req;
-	struct dentry *res;
 	int op;
 	int mask;
 	int err;
@@ -793,12 +791,16 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
 	req->r_parent = dir;
 	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
-	res = ceph_handle_snapdir(req, dentry, err);
-	if (IS_ERR(res)) {
-		err = PTR_ERR(res);
-	} else {
-		dentry = res;
-		err = 0;
+	if (err == -ENOENT) {
+		struct dentry *res;
+
+		res = ceph_handle_snapdir(req, dentry);
+		if (IS_ERR(res)) {
+			err = PTR_ERR(res);
+		} else {
+			dentry = res;
+			err = 0;
+		}
 	}
 	dentry = ceph_finish_lookup(req, dentry, err);
 	ceph_mdsc_put_request(req);  /* will dput(dentry) */
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4dc96885acd05..d51af36980324 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -742,14 +742,16 @@ retry:
 	err = ceph_mdsc_do_request(mdsc,
 				   (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
 				   req);
-	dentry = ceph_handle_snapdir(req, dentry, err);
-	if (IS_ERR(dentry)) {
-		err = PTR_ERR(dentry);
-		goto out_req;
+	if (err == -ENOENT) {
+		dentry = ceph_handle_snapdir(req, dentry);
+		if (IS_ERR(dentry)) {
+			err = PTR_ERR(dentry);
+			goto out_req;
+		}
+		err = 0;
 	}
-	err = 0;
 
-	if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
+	if (!err && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
 		err = ceph_handle_notrace_create(dir, dentry);
 
 	if (d_in_lookup(dentry)) {
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index db80d89556b10..839e6b0239eeb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1218,7 +1218,7 @@ extern const struct dentry_operations ceph_dentry_ops;
 extern loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order);
 extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
 extern struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
-			       struct dentry *dentry, int err);
+			       struct dentry *dentry);
 extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
 					 struct dentry *dentry, int err);
 
-- 
GitLab


From 0c79378c01999bd60057c475f163ec807c24891f Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Mon, 21 Jun 2021 19:53:55 +0200
Subject: [PATCH 3540/3804] spi: add ancillary device support

Introduce support for ancillary devices, similar to existing
implementation for I2C. This is useful for devices having
multiple chip-selects, for example some microcontrollers
provide a normal SPI interface and a flashing SPI interface.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20210621175359.126729-2-sebastian.reichel@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 137 +++++++++++++++++++++++++++++++---------
 include/linux/spi/spi.h |   2 +
 2 files changed, 108 insertions(+), 31 deletions(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8553e7d48f660..572ad95c1d4f5 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -564,49 +564,23 @@ static void spi_cleanup(struct spi_device *spi)
 		spi->controller->cleanup(spi);
 }
 
-/**
- * spi_add_device - Add spi_device allocated with spi_alloc_device
- * @spi: spi_device to register
- *
- * Companion function to spi_alloc_device.  Devices allocated with
- * spi_alloc_device can be added onto the spi bus with this function.
- *
- * Return: 0 on success; negative errno on failure
- */
-int spi_add_device(struct spi_device *spi)
+static int __spi_add_device(struct spi_device *spi)
 {
 	struct spi_controller *ctlr = spi->controller;
 	struct device *dev = ctlr->dev.parent;
 	int status;
 
-	/* Chipselects are numbered 0..max; validate. */
-	if (spi->chip_select >= ctlr->num_chipselect) {
-		dev_err(dev, "cs%d >= max %d\n", spi->chip_select,
-			ctlr->num_chipselect);
-		return -EINVAL;
-	}
-
-	/* Set the bus ID string */
-	spi_dev_set_name(spi);
-
-	/* We need to make sure there's no other device with this
-	 * chipselect **BEFORE** we call setup(), else we'll trash
-	 * its configuration.  Lock against concurrent add() calls.
-	 */
-	mutex_lock(&spi_add_lock);
-
 	status = bus_for_each_dev(&spi_bus_type, NULL, spi, spi_dev_check);
 	if (status) {
 		dev_err(dev, "chipselect %d already in use\n",
 				spi->chip_select);
-		goto done;
+		return status;
 	}
 
 	/* Controller may unregister concurrently */
 	if (IS_ENABLED(CONFIG_SPI_DYNAMIC) &&
 	    !device_is_registered(&ctlr->dev)) {
-		status = -ENODEV;
-		goto done;
+		return -ENODEV;
 	}
 
 	/* Descriptors take precedence */
@@ -623,7 +597,7 @@ int spi_add_device(struct spi_device *spi)
 	if (status < 0) {
 		dev_err(dev, "can't setup %s, status %d\n",
 				dev_name(&spi->dev), status);
-		goto done;
+		return status;
 	}
 
 	/* Device may be bound to an active driver when this returns */
@@ -636,12 +610,64 @@ int spi_add_device(struct spi_device *spi)
 		dev_dbg(dev, "registered child %s\n", dev_name(&spi->dev));
 	}
 
-done:
+	return status;
+}
+
+/**
+ * spi_add_device - Add spi_device allocated with spi_alloc_device
+ * @spi: spi_device to register
+ *
+ * Companion function to spi_alloc_device.  Devices allocated with
+ * spi_alloc_device can be added onto the spi bus with this function.
+ *
+ * Return: 0 on success; negative errno on failure
+ */
+int spi_add_device(struct spi_device *spi)
+{
+	struct spi_controller *ctlr = spi->controller;
+	struct device *dev = ctlr->dev.parent;
+	int status;
+
+	/* Chipselects are numbered 0..max; validate. */
+	if (spi->chip_select >= ctlr->num_chipselect) {
+		dev_err(dev, "cs%d >= max %d\n", spi->chip_select,
+			ctlr->num_chipselect);
+		return -EINVAL;
+	}
+
+	/* Set the bus ID string */
+	spi_dev_set_name(spi);
+
+	/* We need to make sure there's no other device with this
+	 * chipselect **BEFORE** we call setup(), else we'll trash
+	 * its configuration.  Lock against concurrent add() calls.
+	 */
+	mutex_lock(&spi_add_lock);
+	status = __spi_add_device(spi);
 	mutex_unlock(&spi_add_lock);
 	return status;
 }
 EXPORT_SYMBOL_GPL(spi_add_device);
 
+static int spi_add_device_locked(struct spi_device *spi)
+{
+	struct spi_controller *ctlr = spi->controller;
+	struct device *dev = ctlr->dev.parent;
+
+	/* Chipselects are numbered 0..max; validate. */
+	if (spi->chip_select >= ctlr->num_chipselect) {
+		dev_err(dev, "cs%d >= max %d\n", spi->chip_select,
+			ctlr->num_chipselect);
+		return -EINVAL;
+	}
+
+	/* Set the bus ID string */
+	spi_dev_set_name(spi);
+
+	WARN_ON(!mutex_is_locked(&spi_add_lock));
+	return __spi_add_device(spi);
+}
+
 /**
  * spi_new_device - instantiate one new SPI device
  * @ctlr: Controller to which device is connected
@@ -2125,6 +2151,55 @@ static void of_register_spi_devices(struct spi_controller *ctlr)
 static void of_register_spi_devices(struct spi_controller *ctlr) { }
 #endif
 
+/**
+ * spi_new_ancillary_device() - Register ancillary SPI device
+ * @spi:         Pointer to the main SPI device registering the ancillary device
+ * @chip_select: Chip Select of the ancillary device
+ *
+ * Register an ancillary SPI device; for example some chips have a chip-select
+ * for normal device usage and another one for setup/firmware upload.
+ *
+ * This may only be called from main SPI device's probe routine.
+ *
+ * Return: 0 on success; negative errno on failure
+ */
+struct spi_device *spi_new_ancillary_device(struct spi_device *spi,
+					     u8 chip_select)
+{
+	struct spi_device *ancillary;
+	int rc = 0;
+
+	/* Alloc an spi_device */
+	ancillary = spi_alloc_device(spi->controller);
+	if (!ancillary) {
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	strlcpy(ancillary->modalias, "dummy", sizeof(ancillary->modalias));
+
+	/* Use provided chip-select for ancillary device */
+	ancillary->chip_select = chip_select;
+
+	/* Take over SPI mode/speed from SPI main device */
+	ancillary->max_speed_hz = spi->max_speed_hz;
+	ancillary->mode = ancillary->mode;
+
+	/* Register the new device */
+	rc = spi_add_device_locked(ancillary);
+	if (rc) {
+		dev_err(&spi->dev, "failed to register ancillary device\n");
+		goto err_out;
+	}
+
+	return ancillary;
+
+err_out:
+	spi_dev_put(ancillary);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(spi_new_ancillary_device);
+
 #ifdef CONFIG_ACPI
 struct acpi_spi_lookup {
 	struct spi_controller 	*ctlr;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index f924160e995f4..3ada36175e5f8 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -299,6 +299,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
 		driver_unregister(&sdrv->driver);
 }
 
+extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 chip_select);
+
 /* use a define to avoid include chaining to get THIS_MODULE */
 #define spi_register_driver(driver) \
 	__spi_register_driver(THIS_MODULE, driver)
-- 
GitLab


From d90609a4b72dbfe42da2a55f3078c35e669948e0 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Mon, 21 Jun 2021 19:53:56 +0200
Subject: [PATCH 3541/3804] spi: dt-bindings: support devices with multiple
 chipselects

Add binding support for devices, that have more than one
chip select. A typical example are SPI connected microcontroller,
that can also be programmed over SPI like NXP Kinetis or
chips with a configuration and a data chip select, such as
Microchip's MRF89XA transceiver.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20210621175359.126729-3-sebastian.reichel@collabora.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-controller.yaml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml
index 0477396e4945d..faef4f6f55b85 100644
--- a/Documentation/devicetree/bindings/spi/spi-controller.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml
@@ -114,8 +114,11 @@ patternProperties:
           Compatible of the SPI device.
 
       reg:
-        minimum: 0
-        maximum: 256
+        minItems: 1
+        maxItems: 256
+        items:
+          minimum: 0
+          maximum: 256
         description:
           Chip select used by the device.
 
-- 
GitLab


From 240001d4e3041832e8a2654adc3ccf1683132b92 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 21 Jun 2021 13:12:34 +0200
Subject: [PATCH 3542/3804] x86/entry: Fix noinstr fail in
 __do_fast_syscall_32()

Fix:

  vmlinux.o: warning: objtool: __do_fast_syscall_32()+0xf5: call to trace_hardirqs_off() leaves .noinstr.text section

Fixes: 5d5675df792f ("x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210621120120.467898710@infradead.org
---
 arch/x86/entry/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 7b2542b13ebd9..a6bf516257ecb 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -130,8 +130,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
 		/* User code screwed up. */
 		regs->ax = -EFAULT;
 
-		instrumentation_end();
 		local_irq_disable();
+		instrumentation_end();
 		irqentry_exit_to_user_mode(regs);
 		return false;
 	}
-- 
GitLab


From 84e60065df9ef03759115a7e48c04bbc0d292165 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 21 Jun 2021 13:12:35 +0200
Subject: [PATCH 3543/3804] x86/xen: Fix noinstr fail in
 xen_pv_evtchn_do_upcall()

Fix:

  vmlinux.o: warning: objtool: xen_pv_evtchn_do_upcall()+0x23: call to irq_enter_rcu() leaves .noinstr.text section

Fixes: 359f01d1816f ("x86/entry: Use run_sysvec_on_irqstack_cond() for XEN upcall")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210621120120.532960208@infradead.org
---
 arch/x86/entry/common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index a6bf516257ecb..04bce95bc7e3b 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -269,15 +269,16 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
 	irqentry_state_t state = irqentry_enter(regs);
 	bool inhcall;
 
+	instrumentation_begin();
 	run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
 
 	inhcall = get_and_clear_inhcall();
 	if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
-		instrumentation_begin();
 		irqentry_exit_cond_resched();
 		instrumentation_end();
 		restore_inhcall(inhcall);
 	} else {
+		instrumentation_end();
 		irqentry_exit(regs, state);
 	}
 }
-- 
GitLab


From 4c9c26f1e67648f41f28f8c997c5c9467a3dbbe4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 21 Jun 2021 13:12:36 +0200
Subject: [PATCH 3544/3804] x86/xen: Fix noinstr fail in exc_xen_unknown_trap()

Fix:

  vmlinux.o: warning: objtool: exc_xen_unknown_trap()+0x7: call to printk() leaves .noinstr.text section

Fixes: 2e92493637a0 ("x86/xen: avoid warning in Xen pv guest with CONFIG_AMD_MEM_ENCRYPT enabled")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210621120120.606560778@infradead.org
---
 arch/x86/xen/enlighten_pv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index e87699aa2dc82..03149422dce2b 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -592,8 +592,10 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug)
 DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap)
 {
 	/* This should never happen and there is no way to handle it. */
+	instrumentation_begin();
 	pr_err("Unknown trap in Xen PV mode.");
 	BUG();
+	instrumentation_end();
 }
 
 #ifdef CONFIG_X86_MCE
-- 
GitLab


From 1f008d46f1243899d27fd034ab5c41985bd16cee Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 21 Jun 2021 13:12:37 +0200
Subject: [PATCH 3545/3804] x86: Always inline task_size_max()

Fix:

  vmlinux.o: warning: objtool: handle_bug()+0x10: call to task_size_max() leaves .noinstr.text section

When #UD isn't a BUG, we shouldn't violate noinstr (we'll still
probably die, but that's another story).

Fixes: 025768a966a3 ("x86/cpu: Use alternative to generate the TASK_SIZE_MAX constant")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210621120120.682468274@infradead.org
---
 arch/x86/include/asm/page_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index ca840fec77765..4bde0dc66100c 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -75,7 +75,7 @@ void copy_page(void *to, void *from);
  *
  * With page table isolation enabled, we map the LDT in ... [stay tuned]
  */
-static inline unsigned long task_size_max(void)
+static __always_inline unsigned long task_size_max(void)
 {
 	unsigned long ret;
 
-- 
GitLab


From 49faa77759b211fff344898edc23bb780707fff5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 21 Jun 2021 13:12:38 +0200
Subject: [PATCH 3546/3804] locking/lockdep: Improve noinstr vs errors

Better handle the failure paths.

  vmlinux.o: warning: objtool: debug_locks_off()+0x23: call to console_verbose() leaves .noinstr.text section
  vmlinux.o: warning: objtool: debug_locks_off()+0x19: call to __kasan_check_write() leaves .noinstr.text section

  debug_locks_off+0x19/0x40:
  instrument_atomic_write at include/linux/instrumented.h:86
  (inlined by) __debug_locks_off at include/linux/debug_locks.h:17
  (inlined by) debug_locks_off at lib/debug_locks.c:41

Fixes: 6eebad1ad303 ("lockdep: __always_inline more for noinstr")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20210621120120.784404944@infradead.org
---
 include/linux/debug_locks.h | 2 ++
 kernel/locking/lockdep.c    | 4 +++-
 lib/debug_locks.c           | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 2915f56ad4214..edb5c186b0b7a 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -27,8 +27,10 @@ extern int debug_locks_off(void);
 	int __ret = 0;							\
 									\
 	if (!oops_in_progress && unlikely(c)) {				\
+		instrumentation_begin();				\
 		if (debug_locks_off() && !debug_locks_silent)		\
 			WARN(1, "DEBUG_LOCKS_WARN_ON(%s)", #c);		\
+		instrumentation_end();					\
 		__ret = 1;						\
 	}								\
 	__ret;								\
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 7641bd4072390..e32313072506d 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -843,7 +843,7 @@ static int count_matching_names(struct lock_class *new_class)
 }
 
 /* used from NMI context -- must be lockless */
-static __always_inline struct lock_class *
+static noinstr struct lock_class *
 look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
 {
 	struct lockdep_subclass_key *key;
@@ -851,12 +851,14 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass)
 	struct lock_class *class;
 
 	if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
+		instrumentation_begin();
 		debug_locks_off();
 		printk(KERN_ERR
 			"BUG: looking up invalid subclass: %u\n", subclass);
 		printk(KERN_ERR
 			"turning off the locking correctness validator.\n");
 		dump_stack();
+		instrumentation_end();
 		return NULL;
 	}
 
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 06d3135bd184c..a75ee30b77cb8 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
 /*
  * Generic 'turn off all lock debugging' function:
  */
-noinstr int debug_locks_off(void)
+int debug_locks_off(void)
 {
 	if (debug_locks && __debug_locks_off()) {
 		if (!debug_locks_silent) {
-- 
GitLab


From fdaba61ef8a268d4136d0a113d153f7a89eb9984 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Mon, 21 Jun 2021 19:43:30 +0200
Subject: [PATCH 3547/3804] sched/fair: Ensure that the CFS parent is added
 after unthrottling

Ensure that a CFS parent will be in the list whenever one of its children is also
in the list.

A warning on rq->tmp_alone_branch != &rq->leaf_cfs_rq_list has been
reported while running LTP test cfs_bandwidth01.

Odin Ugedal found the root cause:

	$ tree /sys/fs/cgroup/ltp/ -d --charset=ascii
	/sys/fs/cgroup/ltp/
	|-- drain
	`-- test-6851
	    `-- level2
		|-- level3a
		|   |-- worker1
		|   `-- worker2
		`-- level3b
		    `-- worker3

Timeline (ish):
- worker3 gets throttled
- level3b is decayed, since it has no more load
- level2 get throttled
- worker3 get unthrottled
- level2 get unthrottled
  - worker3 is added to list
  - level3b is not added to list, since nr_running==0 and is decayed

 [ Vincent Guittot: Rebased and updated to fix for the reported warning. ]

Fixes: a7b359fc6a37 ("sched/fair: Correctly insert cfs_rq's to list on unthrottle")
Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Acked-by: Odin Ugedal <odin@uged.al>
Link: https://lore.kernel.org/r/20210621174330.11258-1-vincent.guittot@linaro.org
---
 kernel/sched/fair.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bfaa6e1f6067d..23663318fb81a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3298,6 +3298,31 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * Because list_add_leaf_cfs_rq always places a child cfs_rq on the list
+ * immediately before a parent cfs_rq, and cfs_rqs are removed from the list
+ * bottom-up, we only have to test whether the cfs_rq before us on the list
+ * is our child.
+ * If cfs_rq is not on the list, test whether a child needs its to be added to
+ * connect a branch to the tree  * (see list_add_leaf_cfs_rq() for details).
+ */
+static inline bool child_cfs_rq_on_list(struct cfs_rq *cfs_rq)
+{
+	struct cfs_rq *prev_cfs_rq;
+	struct list_head *prev;
+
+	if (cfs_rq->on_list) {
+		prev = cfs_rq->leaf_cfs_rq_list.prev;
+	} else {
+		struct rq *rq = rq_of(cfs_rq);
+
+		prev = rq->tmp_alone_branch;
+	}
+
+	prev_cfs_rq = container_of(prev, struct cfs_rq, leaf_cfs_rq_list);
+
+	return (prev_cfs_rq->tg->parent == cfs_rq->tg);
+}
 
 static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
 {
@@ -3313,6 +3338,9 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
 	if (cfs_rq->avg.runnable_sum)
 		return false;
 
+	if (child_cfs_rq_on_list(cfs_rq))
+		return false;
+
 	return true;
 }
 
-- 
GitLab


From d8ac76cdd1755b21e8c008c28d0b7251c0b14986 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 9 Jun 2021 11:25:03 +0100
Subject: [PATCH 3548/3804] btrfs: send: fix invalid path for unlink operations
 after parent orphanization

During an incremental send operation, when processing the new references
for the current inode, we might send an unlink operation for another inode
that has a conflicting path and has more than one hard link. However this
path was computed and cached before we processed previous new references
for the current inode. We may have orphanized a directory of that path
while processing a previous new reference, in which case the path will
be invalid and cause the receiver process to fail.

The following reproducer triggers the problem and explains how/why it
happens in its comments:

  $ cat test-send-unlink.sh
  #!/bin/bash

  DEV=/dev/sdi
  MNT=/mnt/sdi

  mkfs.btrfs -f $DEV >/dev/null
  mount $DEV $MNT

  # Create our test files and directory. Inode 259 (file3) has two hard
  # links.
  touch $MNT/file1
  touch $MNT/file2
  touch $MNT/file3

  mkdir $MNT/A
  ln $MNT/file3 $MNT/A/hard_link

  # Filesystem looks like:
  #
  # .                                     (ino 256)
  # |----- file1                          (ino 257)
  # |----- file2                          (ino 258)
  # |----- file3                          (ino 259)
  # |----- A/                             (ino 260)
  #        |---- hard_link                (ino 259)
  #

  # Now create the base snapshot, which is going to be the parent snapshot
  # for a later incremental send.
  btrfs subvolume snapshot -r $MNT $MNT/snap1
  btrfs send -f /tmp/snap1.send $MNT/snap1

  # Move inode 257 into directory inode 260. This results in computing the
  # path for inode 260 as "/A" and caching it.
  mv $MNT/file1 $MNT/A/file1

  # Move inode 258 (file2) into directory inode 260, with a name of
  # "hard_link", moving first inode 259 away since it currently has that
  # location and name.
  mv $MNT/A/hard_link $MNT/tmp
  mv $MNT/file2 $MNT/A/hard_link

  # Now rename inode 260 to something else (B for example) and then create
  # a hard link for inode 258 that has the old name and location of inode
  # 260 ("/A").
  mv $MNT/A $MNT/B
  ln $MNT/B/hard_link $MNT/A

  # Filesystem now looks like:
  #
  # .                                     (ino 256)
  # |----- tmp                            (ino 259)
  # |----- file3                          (ino 259)
  # |----- B/                             (ino 260)
  # |      |---- file1                    (ino 257)
  # |      |---- hard_link                (ino 258)
  # |
  # |----- A                              (ino 258)

  # Create another snapshot of our subvolume and use it for an incremental
  # send.
  btrfs subvolume snapshot -r $MNT $MNT/snap2
  btrfs send -f /tmp/snap2.send -p $MNT/snap1 $MNT/snap2

  # Now unmount the filesystem, create a new one, mount it and try to
  # apply both send streams to recreate both snapshots.
  umount $DEV

  mkfs.btrfs -f $DEV >/dev/null

  mount $DEV $MNT

  # First add the first snapshot to the new filesystem by applying the
  # first send stream.
  btrfs receive -f /tmp/snap1.send $MNT

  # The incremental receive operation below used to fail with the
  # following error:
  #
  #    ERROR: unlink A/hard_link failed: No such file or directory
  #
  # This is because when send is processing inode 257, it generates the
  # path for inode 260 as "/A", since that inode is its parent in the send
  # snapshot, and caches that path.
  #
  # Later when processing inode 258, it first processes its new reference
  # that has the path of "/A", which results in orphanizing inode 260
  # because there is a a path collision. This results in issuing a rename
  # operation from "/A" to "/o260-6-0".
  #
  # Finally when processing the new reference "B/hard_link" for inode 258,
  # it notices that it collides with inode 259 (not yet processed, because
  # it has a higher inode number), since that inode has the name
  # "hard_link" under the directory inode 260. It also checks that inode
  # 259 has two hardlinks, so it decides to issue a unlink operation for
  # the name "hard_link" for inode 259. However the path passed to the
  # unlink operation is "/A/hard_link", which is incorrect since currently
  # "/A" does not exists, due to the orphanization of inode 260 mentioned
  # before. The path is incorrect because it was computed and cached
  # before the orphanization. This results in the receiver to fail with
  # the above error.
  btrfs receive -f /tmp/snap2.send $MNT

  umount $MNT

When running the test, it fails like this:

  $ ./test-send-unlink.sh
  Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap1'
  At subvol /mnt/sdi/snap1
  Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap2'
  At subvol /mnt/sdi/snap2
  At subvol snap1
  At snapshot snap2
  ERROR: unlink A/hard_link failed: No such file or directory

Fix this by recomputing a path before issuing an unlink operation when
processing the new references for the current inode if we previously
have orphanized a directory.

A test case for fstests will follow soon.

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index bd69db72acc5e..a2b3c594379d6 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4064,6 +4064,17 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
 				if (ret < 0)
 					goto out;
 			} else {
+				/*
+				 * If we previously orphanized a directory that
+				 * collided with a new reference that we already
+				 * processed, recompute the current path because
+				 * that directory may be part of the path.
+				 */
+				if (orphanized_dir) {
+					ret = refresh_ref_path(sctx, cur);
+					if (ret < 0)
+						goto out;
+				}
 				ret = send_unlink(sctx, cur->full_path);
 				if (ret < 0)
 					goto out;
-- 
GitLab


From b05fbcc36be1f8597a1febef4892053a0b2f3f60 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 10 Jun 2021 05:23:02 +0000
Subject: [PATCH 3549/3804] btrfs: disable build on platforms having page size
 256K

With a config having PAGE_SIZE set to 256K, BTRFS build fails
with the following message

  include/linux/compiler_types.h:326:38: error: call to
  '__compiletime_assert_791' declared with attribute error:
  BUILD_BUG_ON failed: (BTRFS_MAX_COMPRESSED % PAGE_SIZE) != 0

BTRFS_MAX_COMPRESSED being 128K, BTRFS cannot support platforms with
256K pages at the time being.

There are two platforms that can select 256K pages:
 - hexagon
 - powerpc

Disable BTRFS when 256K page size is selected. Supporting this would
require changes to the subpage mode that's currently being developed.
Given that 256K is many times larger than page sizes commonly used and
for what the algorithms and structures have been tuned, it's out of
scope and disabling build is a reasonable option.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 68b95ad82126e..520a0f6a7d9e9 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -18,6 +18,8 @@ config BTRFS_FS
 	select RAID6_PQ
 	select XOR_BLOCKS
 	select SRCU
+	depends on !PPC_256K_PAGES	# powerpc
+	depends on !PAGE_SIZE_256KB	# hexagon
 
 	help
 	  Btrfs is a general purpose copy-on-write filesystem with extents,
-- 
GitLab


From bb930007c006c5d7b8ecba41bb5bafd2dcd1fa79 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Fri, 11 Jun 2021 14:51:15 +0800
Subject: [PATCH 3550/3804] btrfs: send: use list_move_tail instead of
 list_del/list_add_tail

Use list_move_tail() instead of list_del() + list_add_tail() as it's
doing the same thing and allows further cleanups.  Open code
name_cache_used() as there is only one user.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index a2b3c594379d6..974274c7e26ec 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2077,16 +2077,6 @@ static struct name_cache_entry *name_cache_search(struct send_ctx *sctx,
 	return NULL;
 }
 
-/*
- * Removes the entry from the list and adds it back to the end. This marks the
- * entry as recently used so that name_cache_clean_unused does not remove it.
- */
-static void name_cache_used(struct send_ctx *sctx, struct name_cache_entry *nce)
-{
-	list_del(&nce->list);
-	list_add_tail(&nce->list, &sctx->name_cache_list);
-}
-
 /*
  * Remove some entries from the beginning of name_cache_list.
  */
@@ -2147,7 +2137,13 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx,
 			kfree(nce);
 			nce = NULL;
 		} else {
-			name_cache_used(sctx, nce);
+			/*
+			 * Removes the entry from the list and adds it back to
+			 * the end.  This marks the entry as recently used so
+			 * that name_cache_clean_unused does not remove it.
+			 */
+			list_move_tail(&nce->list, &sctx->name_cache_list);
+
 			*parent_ino = nce->parent_ino;
 			*parent_gen = nce->parent_gen;
 			ret = fs_path_add(dest, nce->name, nce->name_len);
-- 
GitLab


From c86bdc9b7c2c396ad476ecbb20738d2720bf0992 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Fri, 11 Jun 2021 09:31:06 +0800
Subject: [PATCH 3551/3804] btrfs: remove a stale comment for
 btrfs_decompress_bio()

Since commit 8140dc30a432 ("btrfs: btrfs_decompress_bio() could accept
compressed_bio instead"), btrfs_decompress_bio() accepts
"struct compressed_bio" other than open-coded parameter list.

Thus the comments for the parameter list is no longer needed.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 35ca49893803f..9a023ae0f98b4 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1212,20 +1212,6 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
 	return ret;
 }
 
-/*
- * pages_in is an array of pages with compressed data.
- *
- * disk_start is the starting logical offset of this array in the file
- *
- * orig_bio contains the pages from the file that we want to decompress into
- *
- * srclen is the number of bytes in pages_in
- *
- * The basic idea is that we have a bio that was created by readpages.
- * The pages in the bio are for the uncompressed data, and they may not
- * be contiguous.  They all correspond to the range of bytes covered by
- * the compressed extent.
- */
 static int btrfs_decompress_bio(struct compressed_bio *cb)
 {
 	struct list_head *workspace;
-- 
GitLab


From 1a9fd4172d5c8ba64735b3aef7eed643d398ce05 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 21 May 2021 17:42:23 +0200
Subject: [PATCH 3552/3804] btrfs: fix typos in comments

Fix typos that have snuck in since the last round. Found by codespell.

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/backref.c                | 2 +-
 fs/btrfs/ctree.h                  | 6 +++---
 fs/btrfs/delalloc-space.c         | 2 +-
 fs/btrfs/dev-replace.c            | 2 +-
 fs/btrfs/discard.c                | 2 +-
 fs/btrfs/disk-io.c                | 2 +-
 fs/btrfs/extent-tree.c            | 2 +-
 fs/btrfs/file-item.c              | 2 +-
 fs/btrfs/inode.c                  | 4 ++--
 fs/btrfs/ioctl.c                  | 2 +-
 fs/btrfs/locking.c                | 4 ++--
 fs/btrfs/props.c                  | 2 +-
 fs/btrfs/qgroup.c                 | 2 +-
 fs/btrfs/scrub.c                  | 2 +-
 fs/btrfs/send.c                   | 2 +-
 fs/btrfs/space-info.c             | 4 ++--
 fs/btrfs/tests/extent-map-tests.c | 2 +-
 fs/btrfs/volumes.c                | 8 ++++----
 fs/btrfs/zoned.c                  | 4 ++--
 include/uapi/linux/btrfs.h        | 4 ++--
 include/uapi/linux/btrfs_tree.h   | 4 ++--
 21 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 117d423fdb930..7a8a2fc195338 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -2675,7 +2675,7 @@ static int handle_direct_tree_backref(struct btrfs_backref_cache *cache,
  *
  * @ref_key:	The same as @ref_key in  handle_direct_tree_backref()
  * @tree_key:	The first key of this tree block.
- * @path:	A clean (released) path, to avoid allocating path everytime
+ * @path:	A clean (released) path, to avoid allocating path every time
  *		the function get called.
  */
 static int handle_indirect_tree_backref(struct btrfs_backref_cache *cache,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 82e58f2fbe0af..cbdabecffb05c 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2757,9 +2757,9 @@ enum btrfs_reserve_flush_enum {
 	/*
 	 * Flush space by above mentioned methods and by:
 	 * - Running delayed iputs
-	 * - Commiting transaction
+	 * - Committing transaction
 	 *
-	 * Can be interruped by fatal signal.
+	 * Can be interrupted by a fatal signal.
 	 */
 	BTRFS_RESERVE_FLUSH_DATA,
 	BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE,
@@ -2769,7 +2769,7 @@ enum btrfs_reserve_flush_enum {
 	 * Pretty much the same as FLUSH_ALL, but can also steal space from
 	 * global rsv.
 	 *
-	 * Can be interruped by fatal signal.
+	 * Can be interrupted by a fatal signal.
 	 */
 	BTRFS_RESERVE_FLUSH_ALL_STEAL,
 };
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index 56642ca7af105..2059d1504149a 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -89,7 +89,7 @@
  *    ->outstanding_extents += 1 (current value is 1)
  *
  *  -> set_delalloc
- *    ->outstanding_extents += 1 (currrent value is 2)
+ *    ->outstanding_extents += 1 (current value is 2)
  *
  *  -> btrfs_delalloc_release_extents()
  *    ->outstanding_extents -= 1 (current value is 1)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index d05f73530af7a..d029be40ea6f0 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -37,7 +37,7 @@
  * - Write duplication
  *
  *   All new writes will be written to both target and source devices, so even
- *   if replace gets canceled, sources device still contans up-to-date data.
+ *   if replace gets canceled, sources device still contains up-to-date data.
  *
  *   Location:		handle_ops_on_dev_replace() from __btrfs_map_block()
  *   Start:		btrfs_dev_replace_start()
diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c
index 306ff20af70f0..e1b7bd927d691 100644
--- a/fs/btrfs/discard.c
+++ b/fs/btrfs/discard.c
@@ -624,7 +624,7 @@ void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
  * @fs_info: fs_info of interest
  *
  * The unused_bgs list needs to be punted to the discard lists because the
- * order of operations is changed.  In the normal sychronous discard path, the
+ * order of operations is changed.  In the normal synchronous discard path, the
  * block groups are trimmed via a single large trim in transaction commit.  This
  * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
  * it must be done before going down the unused_bgs path.
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d1d5091a83857..544bb7a82e574 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3471,7 +3471,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	 * At this point we know all the devices that make this filesystem,
 	 * including the seed devices but we don't know yet if the replace
 	 * target is required. So free devices that are not part of this
-	 * filesystem but skip the replace traget device which is checked
+	 * filesystem but skip the replace target device which is checked
 	 * below in btrfs_init_dev_replace().
 	 */
 	btrfs_free_extra_devids(fs_devices);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d2f39a122d89d..421120d6a14bd 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1425,7 +1425,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
  *		    bytenr of the parent block. Since new extents are always
  *		    created with indirect references, this will only be the case
  *		    when relocating a shared extent. In that case, root_objectid
- *		    will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must
+ *		    will be BTRFS_TREE_RELOC_OBJECTID. Otherwise, parent must
  *		    be 0
  *
  * @root_objectid:  The id of the root where this modification has originated,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 441cee7fbb629..df6631eefc652 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -618,7 +618,7 @@ fail:
  * @file_start:  offset in file this bio begins to describe
  * @contig:	 Boolean. If true/1 means all bio vecs in this bio are
  *		 contiguous and they begin at @file_start in the file. False/0
- *		 means this bio can contains potentially discontigous bio vecs
+ *		 means this bio can contain potentially discontiguous bio vecs
  *		 so the logical offset of each should be calculated separately.
  */
 blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 794d906cba6c1..a2494c645681d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2784,7 +2784,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
 	/*
 	 * If we dropped an inline extent here, we know the range where it is
 	 * was not marked with the EXTENT_DELALLOC_NEW bit, so we update the
-	 * number of bytes only for that range contaning the inline extent.
+	 * number of bytes only for that range containing the inline extent.
 	 * The remaining of the range will be processed when clearning the
 	 * EXTENT_DELALLOC_BIT bit through the ordered extent completion.
 	 */
@@ -4114,7 +4114,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 	 * This is a placeholder inode for a subvolume we didn't have a
 	 * reference to at the time of the snapshot creation.  In the meantime
 	 * we could have renamed the real subvol link into our snapshot, so
-	 * depending on btrfs_del_root_ref to return -ENOENT here is incorret.
+	 * depending on btrfs_del_root_ref to return -ENOENT here is incorrect.
 	 * Instead simply lookup the dir_index_item for this entry so we can
 	 * remove it.  Otherwise we know we have a ref to the root and we can
 	 * call btrfs_del_root_ref, and it _shouldn't_ fail.
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f83eb4a225cce..0ba98e08a0290 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2984,7 +2984,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 				err = PTR_ERR(subvol_name_ptr);
 				goto free_parent;
 			}
-			/* subvol_name_ptr is already NULL termined */
+			/* subvol_name_ptr is already nul terminated */
 			subvol_name = (char *)kbasename(subvol_name_ptr);
 		}
 	} else {
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 5fafc5e89bb76..313d9d685adb7 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -57,7 +57,7 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
 /*
  * Try-lock for read.
  *
- * Retrun 1 if the rwlock has been taken, 0 otherwise
+ * Return 1 if the rwlock has been taken, 0 otherwise
  */
 int btrfs_try_tree_read_lock(struct extent_buffer *eb)
 {
@@ -72,7 +72,7 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
 /*
  * Try-lock for write.
  *
- * Retrun 1 if the rwlock has been taken, 0 otherwise
+ * Return 1 if the rwlock has been taken, 0 otherwise
  */
 int btrfs_try_tree_write_lock(struct extent_buffer *eb)
 {
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 2dcb1cb216349..a17e53e700b1f 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -348,7 +348,7 @@ static int inherit_props(struct btrfs_trans_handle *trans,
 
 		/*
 		 * This is not strictly necessary as the property should be
-		 * valid, but in case it isn't, don't propagate it futher.
+		 * valid, but in case it isn't, don't propagate it further.
 		 */
 		ret = h->validate(value, strlen(value));
 		if (ret)
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 3ded812f522cc..d72885903b8c9 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2521,7 +2521,7 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 	int ret = 0;
 
 	/*
-	 * If quotas get disabled meanwhile, the resouces need to be freed and
+	 * If quotas get disabled meanwhile, the resources need to be freed and
 	 * we can't just exit here.
 	 */
 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b60466db5654e..088641ba7a8e6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2486,7 +2486,7 @@ static void drop_csum_range(struct scrub_ctx *sctx, struct btrfs_ordered_sum *su
  * the csum into @csum.
  *
  * The search source is sctx->csum_list, which is a pre-populated list
- * storing bytenr ordered csum ranges.  We're reponsible to cleanup any range
+ * storing bytenr ordered csum ranges.  We're responsible to cleanup any range
  * that is before @logical.
  *
  * Return 0 if there is no csum for the range.
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 974274c7e26ec..6e69302828ef2 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6514,7 +6514,7 @@ static int changed_extent(struct send_ctx *sctx,
 	 * updates the inode item, but it only changes the iversion (sequence
 	 * field in the inode item) of the inode, so if a file is deduplicated
 	 * the same amount of times in both the parent and send snapshots, its
-	 * iversion becames the same in both snapshots, whence the inode item is
+	 * iversion becomes the same in both snapshots, whence the inode item is
 	 * the same on both snapshots.
 	 */
 	if (sctx->cur_ino != sctx->cmp_key->objectid)
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 42d0fa2092d4b..f26fdb7a17e87 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -389,7 +389,7 @@ again:
 
 		ticket = list_first_entry(head, struct reserve_ticket, list);
 
-		/* Check and see if our ticket can be satisified now. */
+		/* Check and see if our ticket can be satisfied now. */
 		if ((used + ticket->bytes <= space_info->total_bytes) ||
 		    btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
 					 flush)) {
@@ -961,7 +961,7 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
 		 * if it doesn't feel like the space reclaimed by the commit
 		 * would result in the ticket succeeding.  However if we have a
 		 * smaller ticket in the queue it may be small enough to be
-		 * satisified by committing the transaction, so if any
+		 * satisfied by committing the transaction, so if any
 		 * subsequent ticket is smaller than the first ticket go ahead
 		 * and send us back for another loop through the enospc flushing
 		 * code.
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index c0aefe6dee0b6..319fed82d741f 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -557,7 +557,7 @@ int btrfs_test_extent_map(void)
 		{
 			/*
 			 * Test a chunk with 2 data stripes one of which
-			 * interesects the physical address of the super block
+			 * intersects the physical address of the super block
 			 * is correctly recognised.
 			 */
 			.raid_type = BTRFS_BLOCK_GROUP_RAID1,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 80e962788396a..582695cee9d13 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -717,7 +717,7 @@ static struct btrfs_fs_devices *find_fsid_changed(
 
 	/*
 	 * Handles the case where scanned device is part of an fs that had
-	 * multiple successful changes of FSID but curently device didn't
+	 * multiple successful changes of FSID but currently device didn't
 	 * observe it. Meaning our fsid will be different than theirs. We need
 	 * to handle two subcases :
 	 *  1 - The fs still continues to have different METADATA/FSID uuids.
@@ -1550,7 +1550,7 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
  * check to ensure dev extents are not double allocated.
  * This makes the function safe to allocate dev extents but may not report
  * correct usable device space, as device extent freed in current transaction
- * is not reported as avaiable.
+ * is not reported as available.
  */
 static int find_free_dev_extent_start(struct btrfs_device *device,
 				u64 num_bytes, u64 search_start, u64 *start,
@@ -6152,7 +6152,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
 	offset = logical - em->start;
 	/* Len of a stripe in a chunk */
 	stripe_len = map->stripe_len;
-	/* Stripe wher this block falls in */
+	/* Stripe where this block falls in */
 	stripe_nr = div64_u64(offset, stripe_len);
 	/* Offset of stripe in the chunk */
 	stripe_offset = stripe_nr * stripe_len;
@@ -7863,7 +7863,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info,
 		ret = -EUCLEAN;
 	}
 
-	/* Make sure no dev extent is beyond device bondary */
+	/* Make sure no dev extent is beyond device boundary */
 	dev = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL);
 	if (!dev) {
 		btrfs_err(fs_info, "failed to find devid %llu", devid);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 549912120cfe4..297c0b1c0634d 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -81,7 +81,7 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
 	 *   *: Special case, no superblock is written
 	 *   0: Use write pointer of zones[0]
 	 *   1: Use write pointer of zones[1]
-	 *   C: Compare super blcoks from zones[0] and zones[1], use the latest
+	 *   C: Compare super blocks from zones[0] and zones[1], use the latest
 	 *      one determined by generation
 	 *   x: Invalid state
 	 */
@@ -433,7 +433,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
 		}
 
 		/*
-		 * If zones[0] is conventional, always use the beggining of the
+		 * If zones[0] is conventional, always use the beginning of the
 		 * zone to record superblock. No need to validate in that case.
 		 */
 		if (zone_info->sb_zones[BTRFS_NR_SB_LOG_ZONES * i].type ==
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 5df73001aad4e..22cd037123fad 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -154,7 +154,7 @@ struct btrfs_scrub_progress {
 	__u64 tree_bytes_scrubbed;	/* # of tree bytes scrubbed */
 	__u64 read_errors;		/* # of read errors encountered (EIO) */
 	__u64 csum_errors;		/* # of failed csum checks */
-	__u64 verify_errors;		/* # of occurences, where the metadata
+	__u64 verify_errors;		/* # of occurrences, where the metadata
 					 * of a tree block did not match the
 					 * expected values, like generation or
 					 * logical */
@@ -174,7 +174,7 @@ struct btrfs_scrub_progress {
 	__u64 last_physical;		/* last physical address scrubbed. In
 					 * case a scrub was aborted, this can
 					 * be used to restart the scrub */
-	__u64 unverified_errors;	/* # of occurences where a read for a
+	__u64 unverified_errors;	/* # of occurrences where a read for a
 					 * full (64k) bio failed, but the re-
 					 * check succeeded for each 4k piece.
 					 * Intermittent error. */
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 58d7cff9afb17..ccdb40fe40dc2 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -59,7 +59,7 @@
 /* for storing balance parameters in the root tree */
 #define BTRFS_BALANCE_OBJECTID -4ULL
 
-/* orhpan objectid for tracking unlinked/truncated files */
+/* orphan objectid for tracking unlinked/truncated files */
 #define BTRFS_ORPHAN_OBJECTID -5ULL
 
 /* does write ahead logging to speed up fsyncs */
@@ -275,7 +275,7 @@
 #define BTRFS_PERSISTENT_ITEM_KEY	249
 
 /*
- * Persistantly stores the device replace state in the device tree.
+ * Persistently stores the device replace state in the device tree.
  * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
  */
 #define BTRFS_DEV_REPLACE_KEY	250
-- 
GitLab


From da658b5708c68b03b395b7c5c50bae47826db8cc Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 4 Jun 2021 15:00:05 +0200
Subject: [PATCH 3553/3804] btrfs: sysfs: export dev stats in devinfo directory

The device stats can be read by ioctl, wrapped by command 'btrfs device
stats'. Provide another source where to read the information in
/sys/fs/btrfs/FSID/devinfo/DEVID/error_stats . The format is a list of
'key value' pairs one per line, which is common in other stat files.
The names are the same as used in other device stat outputs.

The stats are all in one file as it's the snapshot of all available
stats. The 'one value per file' format is not very suitable here. The
stats should be valid right after the stats item is read from disk,
shortly after initializing the device.

In case the stats are not yet valid, print just 'invalid' as the file
contents.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 4b508938e7285..ebde1d09e686b 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1495,7 +1495,36 @@ static ssize_t btrfs_devinfo_writeable_show(struct kobject *kobj,
 }
 BTRFS_ATTR(devid, writeable, btrfs_devinfo_writeable_show);
 
+static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj,
+		struct kobj_attribute *a, char *buf)
+{
+	struct btrfs_device *device = container_of(kobj, struct btrfs_device,
+						   devid_kobj);
+
+	if (!device->dev_stats_valid)
+		return scnprintf(buf, PAGE_SIZE, "invalid\n");
+
+	/*
+	 * Print all at once so we get a snapshot of all values from the same
+	 * time. Keep them in sync and in order of definition of
+	 * btrfs_dev_stat_values.
+	 */
+	return scnprintf(buf, PAGE_SIZE,
+		"write_errs %d\n"
+		"read_errs %d\n"
+		"flush_errs %d\n"
+		"corruption_errs %d\n"
+		"generation_errs %d\n",
+		btrfs_dev_stat_read(device, BTRFS_DEV_STAT_WRITE_ERRS),
+		btrfs_dev_stat_read(device, BTRFS_DEV_STAT_READ_ERRS),
+		btrfs_dev_stat_read(device, BTRFS_DEV_STAT_FLUSH_ERRS),
+		btrfs_dev_stat_read(device, BTRFS_DEV_STAT_CORRUPTION_ERRS),
+		btrfs_dev_stat_read(device, BTRFS_DEV_STAT_GENERATION_ERRS));
+}
+BTRFS_ATTR(devid, error_stats, btrfs_devinfo_error_stats_show);
+
 static struct attribute *devid_attrs[] = {
+	BTRFS_ATTR_PTR(devid, error_stats),
 	BTRFS_ATTR_PTR(devid, in_fs_metadata),
 	BTRFS_ATTR_PTR(devid, missing),
 	BTRFS_ATTR_PTR(devid, replace_target),
-- 
GitLab


From 44365827cccc1441d4187509257e5276af133a49 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Mon, 21 Jun 2021 10:21:14 +0900
Subject: [PATCH 3554/3804] btrfs: fix unbalanced unlock in
 qgroup_account_snapshot()

qgroup_account_snapshot() is trying to unlock the not taken
tree_log_mutex in a error path. Since ret != 0 in this case, we can
just return from here.

Fixes: 2a4d84c11a87 ("btrfs: move delayed ref flushing for qgroup into qgroup helper")
CC: stable@vger.kernel.org # 5.12+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/transaction.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 73df8b81496e5..57be211d6db6e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1478,7 +1478,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
 	ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
-		goto out;
+		return ret;
 	}
 
 	/*
-- 
GitLab


From f2165627319ffd33a6217275e5690b1ab5c45763 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 14 Jun 2021 12:45:18 +0200
Subject: [PATCH 3555/3804] btrfs: compression: don't try to compress if we
 don't have enough pages

The early check if we should attempt compression does not take into
account the number of input pages. It can happen that there's only one
page, eg. a tail page after some ranges of the BTRFS_MAX_UNCOMPRESSED
have been processed, or an isolated page that won't be converted to an
inline extent.

The single page would be compressed but a later check would drop it
again because the result size must be at least one block shorter than
the input. That can never work with just one page.

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a2494c645681d..e6eb20987351d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -629,7 +629,7 @@ again:
 	 * inode has not been flagged as nocompress.  This flag can
 	 * change at any time if we discover bad compression ratios.
 	 */
-	if (inode_need_compress(BTRFS_I(inode), start, end)) {
+	if (nr_pages > 1 && inode_need_compress(BTRFS_I(inode), start, end)) {
 		WARN_ON(pages);
 		pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
 		if (!pages) {
-- 
GitLab


From 5548c8c6f55bf0097075b3720e14857e3272429f Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 14 Jun 2021 18:10:04 +0200
Subject: [PATCH 3556/3804] btrfs: props: change how empty value is interpreted

Based on user feedback and actual problems with compression property,
there's no support to unset any compression options, or to force no
compression flag.

Note: This has changed recently in e2fsprogs 1.46.2, 'chattr +m'
(setting NOCOMPRESS).

In btrfs properties, the empty value should really mean reset to
defaults, for all properties in general. Right now there's only the
compression one, so this change should not cause too many problems.

Old behaviour:

  $ lsattr file
  ---------------------- file
  # the NOCOMPRESS bit is set
  $ btrfs prop set file compression ''
  $ lsattr file
  ---------------------m file

This is equivalent to 'btrfs prop set file compression no' in current
btrfs-progs as the 'no' or 'none' values are translated to an empty
string.

This is where the new behaviour is different: empty string drops the
compression flag (-c) and nocompress (-m):

  $ lsattr file
  ---------------------- file
  # No change
  $ btrfs prop set file compression ''
  $ lsattr file
  ---------------------- file
  $ btrfs prop set file compression lzo
  $ lsattr file
  --------c------------- file
  $ btrfs prop get file compression
  compression=lzo
  $ btrfs prop set file compression ''
  # Reset to the initial state
  $ lsattr file
  ---------------------- file
  # Set NOCOMPRESS bit
  $ btrfs prop set file compression no
  $ lsattr file
  ---------------------m file

This obviously brings problems with backward compatibility, so this
patch should not be backported without making sure the updated
btrfs-progs are also used and that scripts have been updated to use the
new semantics.

Summary:

- old kernel:
  no, none, "" - set NOCOMPRESS bit
- new kernel:
  no, none - set NOCOMPRESS bit
  "" - drop all compression flags, ie. COMPRESS and NOCOMPRESS

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/props.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index a17e53e700b1f..b1cb5a8c29997 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -260,6 +260,10 @@ static int prop_compression_validate(const char *value, size_t len)
 	if (btrfs_compress_is_valid_type(value, len))
 		return 0;
 
+	if ((len == 2 && strncmp("no", value, 2) == 0) ||
+	    (len == 4 && strncmp("none", value, 4) == 0))
+		return 0;
+
 	return -EINVAL;
 }
 
@@ -269,7 +273,17 @@ static int prop_compression_apply(struct inode *inode, const char *value,
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	int type;
 
+	/* Reset to defaults */
 	if (len == 0) {
+		BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+		BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+		BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
+		return 0;
+	}
+
+	/* Set NOCOMPRESS flag */
+	if ((len == 2 && strncmp("no", value, 2) == 0) ||
+	    (len == 4 && strncmp("none", value, 4) == 0)) {
 		BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
 		BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
 		BTRFS_I(inode)->prop_compress = BTRFS_COMPRESS_NONE;
-- 
GitLab


From ccd9395b5241310f1ef518ad371f8de779f0b681 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 18 Jun 2021 14:57:05 +0200
Subject: [PATCH 3557/3804] btrfs: switch mount option bits to enums and use
 wider type

Switch defines of BTRFS_MOUNT_* to an enum (the symbolic names are
recorded in the debugging information for convenience).

There are two more things done but separating them would not make much
sense as it's touching the same lines:

- Renumber shifts 18..31 to 17..30 to get rid of the hole in the
  sequence.

- Use 1UL as the value that gets shifted because we're approaching the
  32bit limit and due to integer promotions the value of (1 << 31)
  becomes 0xffffffff80000000 when cast to unsigned long (eg. the option
  manipulating helpers).

  This is not causing any problems yet as the operations are in-memory
  and masking the 31st bit works, we don't have more than 31 bits so the
  ill effects of not masking higher bits don't happen. But once we have
  more, the problems will emerge.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h | 65 ++++++++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index cbdabecffb05c..0941af26362db 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1384,38 +1384,39 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
  *
  * Note: don't forget to add new options to btrfs_show_options()
  */
-#define BTRFS_MOUNT_NODATASUM		(1 << 0)
-#define BTRFS_MOUNT_NODATACOW		(1 << 1)
-#define BTRFS_MOUNT_NOBARRIER		(1 << 2)
-#define BTRFS_MOUNT_SSD			(1 << 3)
-#define BTRFS_MOUNT_DEGRADED		(1 << 4)
-#define BTRFS_MOUNT_COMPRESS		(1 << 5)
-#define BTRFS_MOUNT_NOTREELOG           (1 << 6)
-#define BTRFS_MOUNT_FLUSHONCOMMIT       (1 << 7)
-#define BTRFS_MOUNT_SSD_SPREAD		(1 << 8)
-#define BTRFS_MOUNT_NOSSD		(1 << 9)
-#define BTRFS_MOUNT_DISCARD_SYNC	(1 << 10)
-#define BTRFS_MOUNT_FORCE_COMPRESS      (1 << 11)
-#define BTRFS_MOUNT_SPACE_CACHE		(1 << 12)
-#define BTRFS_MOUNT_CLEAR_CACHE		(1 << 13)
-#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
-#define BTRFS_MOUNT_ENOSPC_DEBUG	 (1 << 15)
-#define BTRFS_MOUNT_AUTO_DEFRAG		(1 << 16)
-/* bit 17 is free */
-#define BTRFS_MOUNT_USEBACKUPROOT	(1 << 18)
-#define BTRFS_MOUNT_SKIP_BALANCE	(1 << 19)
-#define BTRFS_MOUNT_CHECK_INTEGRITY	(1 << 20)
-#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
-#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR	(1 << 22)
-#define BTRFS_MOUNT_RESCAN_UUID_TREE	(1 << 23)
-#define BTRFS_MOUNT_FRAGMENT_DATA	(1 << 24)
-#define BTRFS_MOUNT_FRAGMENT_METADATA	(1 << 25)
-#define BTRFS_MOUNT_FREE_SPACE_TREE	(1 << 26)
-#define BTRFS_MOUNT_NOLOGREPLAY		(1 << 27)
-#define BTRFS_MOUNT_REF_VERIFY		(1 << 28)
-#define BTRFS_MOUNT_DISCARD_ASYNC	(1 << 29)
-#define BTRFS_MOUNT_IGNOREBADROOTS	(1 << 30)
-#define BTRFS_MOUNT_IGNOREDATACSUMS	(1 << 31)
+enum {
+	BTRFS_MOUNT_NODATASUM			= (1UL << 0),
+	BTRFS_MOUNT_NODATACOW			= (1UL << 1),
+	BTRFS_MOUNT_NOBARRIER			= (1UL << 2),
+	BTRFS_MOUNT_SSD				= (1UL << 3),
+	BTRFS_MOUNT_DEGRADED			= (1UL << 4),
+	BTRFS_MOUNT_COMPRESS			= (1UL << 5),
+	BTRFS_MOUNT_NOTREELOG   		= (1UL << 6),
+	BTRFS_MOUNT_FLUSHONCOMMIT		= (1UL << 7),
+	BTRFS_MOUNT_SSD_SPREAD			= (1UL << 8),
+	BTRFS_MOUNT_NOSSD			= (1UL << 9),
+	BTRFS_MOUNT_DISCARD_SYNC		= (1UL << 10),
+	BTRFS_MOUNT_FORCE_COMPRESS      	= (1UL << 11),
+	BTRFS_MOUNT_SPACE_CACHE			= (1UL << 12),
+	BTRFS_MOUNT_CLEAR_CACHE			= (1UL << 13),
+	BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED	= (1UL << 14),
+	BTRFS_MOUNT_ENOSPC_DEBUG		= (1UL << 15),
+	BTRFS_MOUNT_AUTO_DEFRAG			= (1UL << 16),
+	BTRFS_MOUNT_USEBACKUPROOT		= (1UL << 17),
+	BTRFS_MOUNT_SKIP_BALANCE		= (1UL << 18),
+	BTRFS_MOUNT_CHECK_INTEGRITY		= (1UL << 19),
+	BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA = (1UL << 20),
+	BTRFS_MOUNT_PANIC_ON_FATAL_ERROR	= (1UL << 21),
+	BTRFS_MOUNT_RESCAN_UUID_TREE		= (1UL << 22),
+	BTRFS_MOUNT_FRAGMENT_DATA		= (1UL << 23),
+	BTRFS_MOUNT_FRAGMENT_METADATA		= (1UL << 24),
+	BTRFS_MOUNT_FREE_SPACE_TREE		= (1UL << 25),
+	BTRFS_MOUNT_NOLOGREPLAY			= (1UL << 26),
+	BTRFS_MOUNT_REF_VERIFY			= (1UL << 27),
+	BTRFS_MOUNT_DISCARD_ASYNC		= (1UL << 28),
+	BTRFS_MOUNT_IGNOREBADROOTS		= (1UL << 29),
+	BTRFS_MOUNT_IGNOREDATACSUMS		= (1UL << 30),
+};
 
 #define BTRFS_DEFAULT_COMMIT_INTERVAL	(30)
 #define BTRFS_DEFAULT_MAX_INLINE	(2048)
-- 
GitLab


From cbeaae4f6f6e787b7dac6230a31d9ad93d594f95 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Fri, 18 Jun 2021 16:16:49 +0200
Subject: [PATCH 3558/3804] btrfs: shorten integrity checker extent data mount
 option

Subjectively, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA is quite long and
calling it CHECK_INTEGRITY_DATA still keeps the meaning and matches the
mount option name.

Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h   | 2 +-
 fs/btrfs/disk-io.c | 3 +--
 fs/btrfs/super.c   | 5 ++---
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0941af26362db..c80d3be148a53 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1405,7 +1405,7 @@ enum {
 	BTRFS_MOUNT_USEBACKUPROOT		= (1UL << 17),
 	BTRFS_MOUNT_SKIP_BALANCE		= (1UL << 18),
 	BTRFS_MOUNT_CHECK_INTEGRITY		= (1UL << 19),
-	BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA = (1UL << 20),
+	BTRFS_MOUNT_CHECK_INTEGRITY_DATA	= (1UL << 20),
 	BTRFS_MOUNT_PANIC_ON_FATAL_ERROR	= (1UL << 21),
 	BTRFS_MOUNT_RESCAN_UUID_TREE		= (1UL << 22),
 	BTRFS_MOUNT_FRAGMENT_DATA		= (1UL << 23),
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 544bb7a82e574..6eb0010f9c7e1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3598,8 +3598,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	if (btrfs_test_opt(fs_info, CHECK_INTEGRITY)) {
 		ret = btrfsic_mount(fs_info, fs_devices,
 				    btrfs_test_opt(fs_info,
-					CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ?
-				    1 : 0,
+					CHECK_INTEGRITY_DATA) ? 1 : 0,
 				    fs_info->check_integrity_print_mask);
 		if (ret)
 			btrfs_warn(fs_info,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index bc613218c8c5b..d07b18b2b2505 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -934,8 +934,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 		case Opt_check_integrity_including_extent_data:
 			btrfs_info(info,
 				   "enabling check integrity including extent data");
-			btrfs_set_opt(info->mount_opt,
-				      CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
+			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY_DATA);
 			btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
 			break;
 		case Opt_check_integrity:
@@ -1516,7 +1515,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 	if (btrfs_test_opt(info, SKIP_BALANCE))
 		seq_puts(seq, ",skip_balance");
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
-	if (btrfs_test_opt(info, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
+	if (btrfs_test_opt(info, CHECK_INTEGRITY_DATA))
 		seq_puts(seq, ",check_int_data");
 	else if (btrfs_test_opt(info, CHECK_INTEGRITY))
 		seq_puts(seq, ",check_int");
-- 
GitLab


From 1cea5cf0e664290cc917da9a2c1f8df3716891cd Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 21 Jun 2021 11:10:38 +0100
Subject: [PATCH 3559/3804] btrfs: ensure relocation never runs while we have
 send operations running

Relocation and send do not play well together because while send is
running a block group can be relocated, a transaction committed and
the respective disk extents get re-allocated and written to or discarded
while send is about to do something with the extents.

This was explained in commit 9e967495e0e0ae ("Btrfs: prevent send failures
and crashes due to concurrent relocation"), which prevented balance and
send from running in parallel but it did not address one remaining case
where chunk relocation can happen: shrinking a device (and device deletion
which shrinks a device's size to 0 before deleting the device).

We also have now one more case where relocation is triggered: on zoned
filesystems partially used block groups get relocated by a background
thread, introduced in commit 18bb8bbf13c183 ("btrfs: zoned: automatically
reclaim zones").

So make sure that instead of preventing balance from running when there
are ongoing send operations, we prevent relocation from happening.
This uses the infrastructure recently added by a patch that has the
subject: "btrfs: add cancellable chunk relocation support".

Also it adds a spinlock used exclusively for the exclusivity between
send and relocation, as before fs_info->balance_mutex was used, which
would make an attempt to run send to block waiting for balance to
finish, which can take a lot of time on large filesystems.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c | 10 ++++++++--
 fs/btrfs/ctree.h       |  5 +++--
 fs/btrfs/disk-io.c     |  1 +
 fs/btrfs/relocation.c  | 13 +++++++++++++
 fs/btrfs/send.c        | 14 +++++++-------
 fs/btrfs/volumes.c     |  8 --------
 6 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index c42b6528552ff..024a1c6e5b402 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1491,7 +1491,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 		container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
 	struct btrfs_block_group *bg;
 	struct btrfs_space_info *space_info;
-	int ret;
+	LIST_HEAD(again_list);
 
 	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
 		return;
@@ -1502,6 +1502,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 	mutex_lock(&fs_info->reclaim_bgs_lock);
 	spin_lock(&fs_info->unused_bgs_lock);
 	while (!list_empty(&fs_info->reclaim_bgs)) {
+		int ret = 0;
+
 		bg = list_first_entry(&fs_info->reclaim_bgs,
 				      struct btrfs_block_group,
 				      bg_list);
@@ -1547,9 +1549,13 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 				  bg->start);
 
 next:
-		btrfs_put_block_group(bg);
 		spin_lock(&fs_info->unused_bgs_lock);
+		if (ret == -EAGAIN && list_empty(&bg->bg_list))
+			list_add_tail(&bg->bg_list, &again_list);
+		else
+			btrfs_put_block_group(bg);
 	}
+	list_splice_tail(&again_list, &fs_info->reclaim_bgs);
 	spin_unlock(&fs_info->unused_bgs_lock);
 	mutex_unlock(&fs_info->reclaim_bgs_lock);
 	btrfs_exclop_finish(fs_info);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index c80d3be148a53..15d17e12c5de3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -561,13 +561,13 @@ enum {
 	/*
 	 * Indicate that balance has been set up from the ioctl and is in the
 	 * main phase. The fs_info::balance_ctl is initialized.
-	 * Set and cleared while holding fs_info::balance_mutex.
 	 */
 	BTRFS_FS_BALANCE_RUNNING,
 
 	/*
 	 * Indicate that relocation of a chunk has started, it's set per chunk
 	 * and is toggled between chunks.
+	 * Set, tested and cleared while holding fs_info::send_reloc_lock.
 	 */
 	BTRFS_FS_RELOC_RUNNING,
 
@@ -995,9 +995,10 @@ struct btrfs_fs_info {
 
 	struct crypto_shash *csum_shash;
 
+	spinlock_t send_reloc_lock;
 	/*
 	 * Number of send operations in progress.
-	 * Updated while holding fs_info::balance_mutex.
+	 * Updated while holding fs_info::send_reloc_lock.
 	 */
 	int send_in_progress;
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6eb0010f9c7e1..4621120b6bc72 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2999,6 +2999,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
 	spin_lock_init(&fs_info->swapfile_pins_lock);
 	fs_info->swapfile_pins = RB_ROOT;
 
+	spin_lock_init(&fs_info->send_reloc_lock);
 	fs_info->send_in_progress = 0;
 
 	fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 420a898698895..fc831597cb22e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3789,14 +3789,25 @@ out:
  *   0             success
  *   -EINPROGRESS  operation is already in progress, that's probably a bug
  *   -ECANCELED    cancellation request was set before the operation started
+ *   -EAGAIN       can not start because there are ongoing send operations
  */
 static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
 {
+	spin_lock(&fs_info->send_reloc_lock);
+	if (fs_info->send_in_progress) {
+		btrfs_warn_rl(fs_info,
+"cannot run relocation while send operations are in progress (%d in progress)",
+			      fs_info->send_in_progress);
+		spin_unlock(&fs_info->send_reloc_lock);
+		return -EAGAIN;
+	}
 	if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
 		/* This should not happen */
+		spin_unlock(&fs_info->send_reloc_lock);
 		btrfs_err(fs_info, "reloc already running, cannot start");
 		return -EINPROGRESS;
 	}
+	spin_unlock(&fs_info->send_reloc_lock);
 
 	if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
 		btrfs_info(fs_info, "chunk relocation canceled on start");
@@ -3818,7 +3829,9 @@ static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
 	/* Requested after start, clear bit first so any waiters can continue */
 	if (atomic_read(&fs_info->reloc_cancel_req) > 0)
 		btrfs_info(fs_info, "chunk relocation canceled during operation");
+	spin_lock(&fs_info->send_reloc_lock);
 	clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
+	spin_unlock(&fs_info->send_reloc_lock);
 	atomic_set(&fs_info->reloc_cancel_req, 0);
 }
 
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 6e69302828ef2..37e502b09a80b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7416,23 +7416,23 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
 	if (ret)
 		goto out;
 
-	mutex_lock(&fs_info->balance_mutex);
-	if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) {
-		mutex_unlock(&fs_info->balance_mutex);
+	spin_lock(&fs_info->send_reloc_lock);
+	if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
+		spin_unlock(&fs_info->send_reloc_lock);
 		btrfs_warn_rl(fs_info,
-		"cannot run send because a balance operation is in progress");
+		"cannot run send because a relocation operation is in progress");
 		ret = -EAGAIN;
 		goto out;
 	}
 	fs_info->send_in_progress++;
-	mutex_unlock(&fs_info->balance_mutex);
+	spin_unlock(&fs_info->send_reloc_lock);
 
 	current->journal_info = BTRFS_SEND_TRANS_STUB;
 	ret = send_subvol(sctx);
 	current->journal_info = NULL;
-	mutex_lock(&fs_info->balance_mutex);
+	spin_lock(&fs_info->send_reloc_lock);
 	fs_info->send_in_progress--;
-	mutex_unlock(&fs_info->balance_mutex);
+	spin_unlock(&fs_info->send_reloc_lock);
 	if (ret < 0)
 		goto out;
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 582695cee9d13..782e16795bc40 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4217,14 +4217,6 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
 				btrfs_bg_type_to_raid_name(data_target));
 	}
 
-	if (fs_info->send_in_progress) {
-		btrfs_warn_rl(fs_info,
-"cannot run balance while send operations are in progress (%d in progress)",
-			      fs_info->send_in_progress);
-		ret = -EAGAIN;
-		goto out;
-	}
-
 	ret = insert_balance_item(fs_info, bctl);
 	if (ret && ret != -EEXIST)
 		goto out;
-- 
GitLab


From 35b22c19afe71c37540c0e4b574a441d27b03853 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 21 Jun 2021 11:10:39 +0100
Subject: [PATCH 3560/3804] btrfs: send: fix crash when memory allocations
 trigger reclaim

When doing a send we don't expect the task to ever start a transaction
after the initial check that verifies if commit roots match the regular
roots. This is because after that we set current->journal_info with a
stub (special value) that signals we are in send context, so that we take
a read lock on an extent buffer when reading it from disk and verifying
it is valid (its generation matches the generation stored in the parent).
This stub was introduced in 2014 by commit a26e8c9f75b0bf ("Btrfs: don't
clear uptodate if the eb is under IO") in order to fix a concurrency issue
between send and balance.

However there is one particular exception where we end up needing to start
a transaction and when this happens it results in a crash with a stack
trace like the following:

[60015.902283] kernel: WARNING: CPU: 3 PID: 58159 at arch/x86/include/asm/kfence.h:44 kfence_protect_page+0x21/0x80
[60015.902292] kernel: Modules linked in: uinput rfcomm snd_seq_dummy (...)
[60015.902384] kernel: CPU: 3 PID: 58159 Comm: btrfs Not tainted 5.12.9-300.fc34.x86_64 #1
[60015.902387] kernel: Hardware name: Gigabyte Technology Co., Ltd. To be filled by O.E.M./F2A88XN-WIFI, BIOS F6 12/24/2015
[60015.902389] kernel: RIP: 0010:kfence_protect_page+0x21/0x80
[60015.902393] kernel: Code: ff 0f 1f 84 00 00 00 00 00 55 48 89 fd (...)
[60015.902396] kernel: RSP: 0018:ffff9fb583453220 EFLAGS: 00010246
[60015.902399] kernel: RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff9fb583453224
[60015.902401] kernel: RDX: ffff9fb583453224 RSI: 0000000000000000 RDI: 0000000000000000
[60015.902402] kernel: RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
[60015.902404] kernel: R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000002
[60015.902406] kernel: R13: ffff9fb583453348 R14: 0000000000000000 R15: 0000000000000001
[60015.902408] kernel: FS:  00007f158e62d8c0(0000) GS:ffff93bd37580000(0000) knlGS:0000000000000000
[60015.902410] kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[60015.902412] kernel: CR2: 0000000000000039 CR3: 00000001256d2000 CR4: 00000000000506e0
[60015.902414] kernel: Call Trace:
[60015.902419] kernel:  kfence_unprotect+0x13/0x30
[60015.902423] kernel:  page_fault_oops+0x89/0x270
[60015.902427] kernel:  ? search_module_extables+0xf/0x40
[60015.902431] kernel:  ? search_bpf_extables+0x57/0x70
[60015.902435] kernel:  kernelmode_fixup_or_oops+0xd6/0xf0
[60015.902437] kernel:  __bad_area_nosemaphore+0x142/0x180
[60015.902440] kernel:  exc_page_fault+0x67/0x150
[60015.902445] kernel:  asm_exc_page_fault+0x1e/0x30
[60015.902450] kernel: RIP: 0010:start_transaction+0x71/0x580
[60015.902454] kernel: Code: d3 0f 84 92 00 00 00 80 e7 06 0f 85 63 (...)
[60015.902456] kernel: RSP: 0018:ffff9fb5834533f8 EFLAGS: 00010246
[60015.902458] kernel: RAX: 0000000000000001 RBX: 0000000000000001 RCX: 0000000000000000
[60015.902460] kernel: RDX: 0000000000000801 RSI: 0000000000000000 RDI: 0000000000000039
[60015.902462] kernel: RBP: ffff93bc0a7eb800 R08: 0000000000000001 R09: 0000000000000000
[60015.902463] kernel: R10: 0000000000098a00 R11: 0000000000000001 R12: 0000000000000001
[60015.902464] kernel: R13: 0000000000000000 R14: ffff93bc0c92b000 R15: ffff93bc0c92b000
[60015.902468] kernel:  btrfs_commit_inode_delayed_inode+0x5d/0x120
[60015.902473] kernel:  btrfs_evict_inode+0x2c5/0x3f0
[60015.902476] kernel:  evict+0xd1/0x180
[60015.902480] kernel:  inode_lru_isolate+0xe7/0x180
[60015.902483] kernel:  __list_lru_walk_one+0x77/0x150
[60015.902487] kernel:  ? iput+0x1a0/0x1a0
[60015.902489] kernel:  ? iput+0x1a0/0x1a0
[60015.902491] kernel:  list_lru_walk_one+0x47/0x70
[60015.902495] kernel:  prune_icache_sb+0x39/0x50
[60015.902497] kernel:  super_cache_scan+0x161/0x1f0
[60015.902501] kernel:  do_shrink_slab+0x142/0x240
[60015.902505] kernel:  shrink_slab+0x164/0x280
[60015.902509] kernel:  shrink_node+0x2c8/0x6e0
[60015.902512] kernel:  do_try_to_free_pages+0xcb/0x4b0
[60015.902514] kernel:  try_to_free_pages+0xda/0x190
[60015.902516] kernel:  __alloc_pages_slowpath.constprop.0+0x373/0xcc0
[60015.902521] kernel:  ? __memcg_kmem_charge_page+0xc2/0x1e0
[60015.902525] kernel:  __alloc_pages_nodemask+0x30a/0x340
[60015.902528] kernel:  pipe_write+0x30b/0x5c0
[60015.902531] kernel:  ? set_next_entity+0xad/0x1e0
[60015.902534] kernel:  ? switch_mm_irqs_off+0x58/0x440
[60015.902538] kernel:  __kernel_write+0x13a/0x2b0
[60015.902541] kernel:  kernel_write+0x73/0x150
[60015.902543] kernel:  send_cmd+0x7b/0xd0
[60015.902545] kernel:  send_extent_data+0x5a3/0x6b0
[60015.902549] kernel:  process_extent+0x19b/0xed0
[60015.902551] kernel:  btrfs_ioctl_send+0x1434/0x17e0
[60015.902554] kernel:  ? _btrfs_ioctl_send+0xe1/0x100
[60015.902557] kernel:  _btrfs_ioctl_send+0xbf/0x100
[60015.902559] kernel:  ? enqueue_entity+0x18c/0x7b0
[60015.902562] kernel:  btrfs_ioctl+0x185f/0x2f80
[60015.902564] kernel:  ? psi_task_change+0x84/0xc0
[60015.902569] kernel:  ? _flat_send_IPI_mask+0x21/0x40
[60015.902572] kernel:  ? check_preempt_curr+0x2f/0x70
[60015.902576] kernel:  ? selinux_file_ioctl+0x137/0x1e0
[60015.902579] kernel:  ? expand_files+0x1cb/0x1d0
[60015.902582] kernel:  ? __x64_sys_ioctl+0x82/0xb0
[60015.902585] kernel:  __x64_sys_ioctl+0x82/0xb0
[60015.902588] kernel:  do_syscall_64+0x33/0x40
[60015.902591] kernel:  entry_SYSCALL_64_after_hwframe+0x44/0xae
[60015.902595] kernel: RIP: 0033:0x7f158e38f0ab
[60015.902599] kernel: Code: ff ff ff 85 c0 79 9b (...)
[60015.902602] kernel: RSP: 002b:00007ffcb2519bf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[60015.902605] kernel: RAX: ffffffffffffffda RBX: 00007ffcb251ae00 RCX: 00007f158e38f0ab
[60015.902607] kernel: RDX: 00007ffcb2519cf0 RSI: 0000000040489426 RDI: 0000000000000004
[60015.902608] kernel: RBP: 0000000000000004 R08: 00007f158e297640 R09: 00007f158e297640
[60015.902610] kernel: R10: 0000000000000008 R11: 0000000000000246 R12: 0000000000000000
[60015.902612] kernel: R13: 0000000000000002 R14: 00007ffcb251aee0 R15: 0000558c1a83e2a0
[60015.902615] kernel: ---[ end trace 7bbc33e23bb887ae ]---

This happens because when writing to the pipe, by calling kernel_write(),
we end up doing page allocations using GFP_HIGHUSER | __GFP_ACCOUNT as the
gfp flags, which allow reclaim to happen if there is memory pressure. This
allocation happens at fs/pipe.c:pipe_write().

If the reclaim is triggered, inode eviction can be triggered and that in
turn can result in starting a transaction if the inode has a link count
of 0. The transaction start happens early on during eviction, when we call
btrfs_commit_inode_delayed_inode() at btrfs_evict_inode(). This happens if
there is currently an open file descriptor for an inode with a link count
of 0 and the reclaim task gets a reference on the inode before that
descriptor is closed, in which case the reclaim task ends up doing the
final iput that triggers the inode eviction.

When we have assertions enabled (CONFIG_BTRFS_ASSERT=y), this triggers
the following assertion at transaction.c:start_transaction():

    /* Send isn't supposed to start transactions. */
    ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB);

And when assertions are not enabled, it triggers a crash since after that
assertion we cast current->journal_info into a transaction handle pointer
and then dereference it:

   if (current->journal_info) {
       WARN_ON(type & TRANS_EXTWRITERS);
       h = current->journal_info;
       refcount_inc(&h->use_count);
       (...)

Which obviously results in a crash due to an invalid memory access.

The same type of issue can happen during other memory allocations we
do directly in the send code with kmalloc (and friends) as they use
GFP_KERNEL and therefore may trigger reclaim too, which started to
happen since 2016 after commit e780b0d1c1523e ("btrfs: send: use
GFP_KERNEL everywhere").

The issue could be solved by setting up a NOFS context for the entire
send operation so that reclaim could not be triggered when allocating
memory or pages through kernel_write(). However that is not very friendly
and we can in fact get rid of the send stub because:

1) The stub was introduced way back in 2014 by commit a26e8c9f75b0bf
   ("Btrfs: don't clear uptodate if the eb is under IO") to solve an
   issue exclusive to when send and balance are running in parallel,
   however there were other problems between balance and send and we do
   not allow anymore to have balance and send run concurrently since
   commit 9e967495e0e0ae ("Btrfs: prevent send failures and crashes due
   to concurrent relocation"). More generically the issues are between
   send and relocation, and that last commit eliminated only the
   possibility of having send and balance run concurrently, but shrinking
   a device also can trigger relocation, and on zoned filesystems we have
   relocation of partially used block groups triggered automatically as
   well. The previous patch that has a subject of:

   "btrfs: ensure relocation never runs while we have send operations running"

   Addresses all the remaining cases that can trigger relocation.

2) We can actually allow starting and even committing transactions while
   in a send context if needed because send is not holding any locks that
   would block the start or the commit of a transaction.

So get rid of all the logic added by commit a26e8c9f75b0bf ("Btrfs: don't
clear uptodate if the eb is under IO"). We can now always call
clear_extent_buffer_uptodate() at verify_parent_transid() since send is
the only case that uses commit roots without having a transaction open or
without holding the commit_root_sem.

Reported-by: Chris Murphy <lists@colorremedies.com>
Link: https://lore.kernel.org/linux-btrfs/CAJCQCtRQ57=qXo3kygwpwEBOU_CA_eKvdmjP52sU=eFvuVOEGw@mail.gmail.com/
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c     | 18 +-----------------
 fs/btrfs/qgroup.c      |  8 +-------
 fs/btrfs/send.c        |  2 --
 fs/btrfs/transaction.c |  3 ---
 fs/btrfs/transaction.h |  2 --
 5 files changed, 2 insertions(+), 31 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4621120b6bc72..7cc32e2813bea 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -241,7 +241,6 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 {
 	struct extent_state *cached_state = NULL;
 	int ret;
-	bool need_lock = (current->journal_info == BTRFS_SEND_TRANS_STUB);
 
 	if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
 		return 0;
@@ -249,9 +248,6 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 	if (atomic)
 		return -EAGAIN;
 
-	if (need_lock)
-		btrfs_tree_read_lock(eb);
-
 	lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
 			 &cached_state);
 	if (extent_buffer_uptodate(eb) &&
@@ -264,22 +260,10 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 			eb->start,
 			parent_transid, btrfs_header_generation(eb));
 	ret = 1;
-
-	/*
-	 * Things reading via commit roots that don't have normal protection,
-	 * like send, can have a really old block in cache that may point at a
-	 * block that has been freed and re-allocated.  So don't clear uptodate
-	 * if we find an eb that is under IO (dirty/writeback) because we could
-	 * end up reading in the stale data and then writing it back out and
-	 * making everybody very sad.
-	 */
-	if (!extent_buffer_under_io(eb))
-		clear_extent_buffer_uptodate(eb);
+	clear_extent_buffer_uptodate(eb);
 out:
 	unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
 			     &cached_state);
-	if (need_lock)
-		btrfs_tree_read_unlock(eb);
 	return ret;
 }
 
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d72885903b8c9..07ec06d4e9726 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3545,13 +3545,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
 	struct btrfs_trans_handle *trans;
 	int ret;
 
-	/*
-	 * Can't hold an open transaction or we run the risk of deadlocking,
-	 * and can't either be under the context of a send operation (where
-	 * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that
-	 * would result in a crash when starting a transaction and does not
-	 * make sense either (send is a read-only operation).
-	 */
+	/* Can't hold an open transaction or we run the risk of deadlocking. */
 	ASSERT(current->journal_info == NULL);
 	if (WARN_ON(current->journal_info))
 		return 0;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 37e502b09a80b..6ac37ae6c8117 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7427,9 +7427,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
 	fs_info->send_in_progress++;
 	spin_unlock(&fs_info->send_reloc_lock);
 
-	current->journal_info = BTRFS_SEND_TRANS_STUB;
 	ret = send_subvol(sctx);
-	current->journal_info = NULL;
 	spin_lock(&fs_info->send_reloc_lock);
 	fs_info->send_in_progress--;
 	spin_unlock(&fs_info->send_reloc_lock);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 57be211d6db6e..50318231c1a88 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -583,9 +583,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
 	bool do_chunk_alloc = false;
 	int ret;
 
-	/* Send isn't supposed to start transactions. */
-	ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB);
-
 	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
 		return ERR_PTR(-EROFS);
 
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 0702e8d9b30eb..07d76029f598e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -122,8 +122,6 @@ struct btrfs_transaction {
 
 #define TRANS_EXTWRITERS	(__TRANS_START | __TRANS_ATTACH)
 
-#define BTRFS_SEND_TRANS_STUB	((void *)1)
-
 struct btrfs_trans_handle {
 	u64 transid;
 	u64 bytes_reserved;
-- 
GitLab


From c416a30cddec0840520e9ffb170aea6c6b6c64af Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 22 Jun 2021 15:51:58 +0300
Subject: [PATCH 3561/3804] btrfs: rip out may_commit_transaction

may_commit_transaction was introduced before the ticketing
infrastructure existed.  There was a problem where we'd legitimately be
out of space, but every reservation would trigger a transaction commit
and then fail.  Thus if you had 1000 things trying to make a
reservation, they'd all do the flushing loop and thus commit the
transaction 1000 times before they'd get their ENOSPC.

This helper was introduced to short circuit this, if there wasn't space
that could be reclaimed by committing the transaction then simply ENOSPC
out.  This made true ENOSPC tests much faster as we didn't waste a bunch
of time.

However many of our bugs over the years have been from cases where we
didn't account for some space that would be reclaimed by committing a
transaction.  The delayed refs rsv space, delayed rsv, many pinned bytes
miscalculations, etc.  And in the meantime the original problem has been
solved with ticketing.  We no longer will commit the transaction 1000
times.  Instead we'll get 1000 waiters, we will go through the flushing
mechanisms, and if there's no progress after 2 loops we ENOSPC everybody
out.  The ticketing infrastructure gives us a deterministic way to see
if we're making progress or not, thus we avoid a lot of extra work.

So simplify this step by simply unconditionally committing the
transaction.  This removes what is arguably our most common source of
early ENOSPC bugs and will allow us to drastically simplify many of the
things we track because we simply won't need them with this stuff gone.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h             |   1 -
 fs/btrfs/space-info.c        | 136 +++--------------------------------
 include/trace/events/btrfs.h |   3 +-
 3 files changed, 12 insertions(+), 128 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 15d17e12c5de3..d7ef4d7d2c1af 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2787,7 +2787,6 @@ enum btrfs_flush_state {
 	ALLOC_CHUNK_FORCE	=	8,
 	RUN_DELAYED_IPUTS	=	9,
 	COMMIT_TRANS		=	10,
-	FORCE_COMMIT_TRANS	=	11,
 };
 
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index f26fdb7a17e87..4c0d7290c5573 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -133,18 +133,13 @@
  *     operations, however they won't be usable until the transaction commits.
  *
  *   COMMIT_TRANS
- *     may_commit_transaction() is the ultimate arbiter on whether we commit the
- *     transaction or not.  In order to avoid constantly churning we do all the
- *     above flushing first and then commit the transaction as the last resort.
- *     However we need to take into account things like pinned space that would
- *     be freed, plus any delayed work we may not have gotten rid of in the case
- *     of metadata.
- *
- *   FORCE_COMMIT_TRANS
- *     For use by the preemptive flusher.  We use this to bypass the ticketing
- *     checks in may_commit_transaction, as we have more information about the
- *     overall state of the system and may want to commit the transaction ahead
- *     of actual ENOSPC conditions.
+ *     This will commit the transaction.  Historically we had a lot of logic
+ *     surrounding whether or not we'd commit the transaction, but this waits born
+ *     out of a pre-tickets era where we could end up committing the transaction
+ *     thousands of times in a row without making progress.  Now thanks to our
+ *     ticketing system we know if we're not making progress and can error
+ *     everybody out after a few commits rather than burning the disk hoping for
+ *     a different answer.
  *
  * OVERCOMMIT
  *
@@ -575,109 +570,6 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
 	}
 }
 
-/**
- * Possibly commit the transaction if its ok to
- *
- * @fs_info:    the filesystem
- * @space_info: space_info we are checking for commit, either data or metadata
- *
- * This will check to make sure that committing the transaction will actually
- * get us somewhere and then commit the transaction if it does.  Otherwise it
- * will return -ENOSPC.
- */
-static int may_commit_transaction(struct btrfs_fs_info *fs_info,
-				  struct btrfs_space_info *space_info)
-{
-	struct reserve_ticket *ticket = NULL;
-	struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv;
-	struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
-	struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv;
-	struct btrfs_trans_handle *trans;
-	u64 reclaim_bytes = 0;
-	u64 bytes_needed = 0;
-	u64 cur_free_bytes = 0;
-
-	trans = (struct btrfs_trans_handle *)current->journal_info;
-	if (trans)
-		return -EAGAIN;
-
-	spin_lock(&space_info->lock);
-	cur_free_bytes = btrfs_space_info_used(space_info, true);
-	if (cur_free_bytes < space_info->total_bytes)
-		cur_free_bytes = space_info->total_bytes - cur_free_bytes;
-	else
-		cur_free_bytes = 0;
-
-	if (!list_empty(&space_info->priority_tickets))
-		ticket = list_first_entry(&space_info->priority_tickets,
-					  struct reserve_ticket, list);
-	else if (!list_empty(&space_info->tickets))
-		ticket = list_first_entry(&space_info->tickets,
-					  struct reserve_ticket, list);
-	if (ticket)
-		bytes_needed = ticket->bytes;
-
-	if (bytes_needed > cur_free_bytes)
-		bytes_needed -= cur_free_bytes;
-	else
-		bytes_needed = 0;
-	spin_unlock(&space_info->lock);
-
-	if (!bytes_needed)
-		return 0;
-
-	trans = btrfs_join_transaction(fs_info->extent_root);
-	if (IS_ERR(trans))
-		return PTR_ERR(trans);
-
-	/*
-	 * See if there is enough pinned space to make this reservation, or if
-	 * we have block groups that are going to be freed, allowing us to
-	 * possibly do a chunk allocation the next loop through.
-	 */
-	if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) ||
-	    __percpu_counter_compare(&space_info->total_bytes_pinned,
-				     bytes_needed,
-				     BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0)
-		goto commit;
-
-	/*
-	 * See if there is some space in the delayed insertion reserve for this
-	 * reservation.  If the space_info's don't match (like for DATA or
-	 * SYSTEM) then just go enospc, reclaiming this space won't recover any
-	 * space to satisfy those reservations.
-	 */
-	if (space_info != delayed_rsv->space_info)
-		goto enospc;
-
-	spin_lock(&delayed_rsv->lock);
-	reclaim_bytes += delayed_rsv->reserved;
-	spin_unlock(&delayed_rsv->lock);
-
-	spin_lock(&delayed_refs_rsv->lock);
-	reclaim_bytes += delayed_refs_rsv->reserved;
-	spin_unlock(&delayed_refs_rsv->lock);
-
-	spin_lock(&trans_rsv->lock);
-	reclaim_bytes += trans_rsv->reserved;
-	spin_unlock(&trans_rsv->lock);
-
-	if (reclaim_bytes >= bytes_needed)
-		goto commit;
-	bytes_needed -= reclaim_bytes;
-
-	if (__percpu_counter_compare(&space_info->total_bytes_pinned,
-				   bytes_needed,
-				   BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0)
-		goto enospc;
-
-commit:
-	return btrfs_commit_transaction(trans);
-enospc:
-	btrfs_end_transaction(trans);
-	return -ENOSPC;
-}
-
 /*
  * Try to flush some data based on policy set by @state. This is only advisory
  * and may fail for various reasons. The caller is supposed to examine the
@@ -752,9 +644,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 		btrfs_wait_on_delayed_iputs(fs_info);
 		break;
 	case COMMIT_TRANS:
-		ret = may_commit_transaction(fs_info, space_info);
-		break;
-	case FORCE_COMMIT_TRANS:
+		ASSERT(current->journal_info == NULL);
 		trans = btrfs_join_transaction(root);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
@@ -1136,7 +1026,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
 			   (delayed_block_rsv->reserved +
 			    delayed_refs_rsv->reserved)) {
 			to_reclaim = space_info->bytes_pinned;
-			flush = FORCE_COMMIT_TRANS;
+			flush = COMMIT_TRANS;
 		} else if (delayed_block_rsv->reserved >
 			   delayed_refs_rsv->reserved) {
 			to_reclaim = delayed_block_rsv->reserved;
@@ -1206,12 +1096,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
  *   the information it needs to make the right decision.
  *
  * COMMIT_TRANS
- *   This is where we reclaim all of the pinned space generated by the previous
- *   two stages.  We will not commit the transaction if we don't think we're
- *   likely to satisfy our request, which means if our current free space +
- *   total_bytes_pinned < reservation we will not commit.  This is why the
- *   previous states are actually important, to make sure we know for sure
- *   whether committing the transaction will allow us to make progress.
+ *   This is where we reclaim all of the pinned space generated by running the
+ *   iputs
  *
  * ALLOC_CHUNK_FORCE
  *   For data we start with alloc chunk force, however we could have been full
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 76e0be7e14d05..c7237317a8b94 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -99,8 +99,7 @@ struct btrfs_space_info;
 	EM( ALLOC_CHUNK,		"ALLOC_CHUNK")			\
 	EM( ALLOC_CHUNK_FORCE,		"ALLOC_CHUNK_FORCE")		\
 	EM( RUN_DELAYED_IPUTS,		"RUN_DELAYED_IPUTS")		\
-	EM( COMMIT_TRANS,		"COMMIT_TRANS")			\
-	EMe(FORCE_COMMIT_TRANS,		"FORCE_COMMIT_TRANS")
+	EMe(COMMIT_TRANS,		"COMMIT_TRANS")
 
 /*
  * First define the enums in the above macros to be exported to userspace via
-- 
GitLab


From 048085539243bfd43839fe3dc6cbc02b0c620fdc Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 22 Jun 2021 15:51:59 +0300
Subject: [PATCH 3562/3804] btrfs: remove FLUSH_DELAYED_REFS from data ENOSPC
 flushing

Since we unconditionally commit the transaction now we no longer need to
run the delayed refs to make sure our total_bytes_pinned value is
uptodate, we can simply commit the transaction.  Remove this stage from
the data flushing list.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 4c0d7290c5573..ec18fed8ec80d 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1080,21 +1080,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
  *   immediately re-usable, it comes in the form of a delayed ref, which must be
  *   run and then the transaction must be committed.
  *
- * FLUSH_DELAYED_REFS
- *   The above two cases generate delayed refs that will affect
- *   ->total_bytes_pinned.  However this counter can be inconsistent with
- *   reality if there are outstanding delayed refs.  This is because we adjust
- *   the counter based solely on the current set of delayed refs and disregard
- *   any on-disk state which might include more refs.  So for example, if we
- *   have an extent with 2 references, but we only drop 1, we'll see that there
- *   is a negative delayed ref count for the extent and assume that the space
- *   will be freed, and thus increase ->total_bytes_pinned.
- *
- *   Running the delayed refs gives us the actual real view of what will be
- *   freed at the transaction commit time.  This stage will not actually free
- *   space for us, it just makes sure that may_commit_transaction() has all of
- *   the information it needs to make the right decision.
- *
  * COMMIT_TRANS
  *   This is where we reclaim all of the pinned space generated by running the
  *   iputs
@@ -1107,7 +1092,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
 static const enum btrfs_flush_state data_flush_states[] = {
 	FLUSH_DELALLOC_WAIT,
 	RUN_DELAYED_IPUTS,
-	FLUSH_DELAYED_REFS,
 	COMMIT_TRANS,
 	ALLOC_CHUNK_FORCE,
 };
-- 
GitLab


From 3ffad6961db6c44b324e4ee5a8025e5f63c657d7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 22 Jun 2021 15:52:00 +0300
Subject: [PATCH 3563/3804] btrfs: rip the first_ticket_bytes logic from
 fail_all_tickets

This was a trick implemented to handle the case where we had a giant
reservation in front of a bunch of little reservations in the ticket
queue.  If the giant reservation was too large for the transaction
commit to make a difference we'd ENOSPC everybody out instead of
committing the transaction.  This logic was put in to force us to go
back and re-try the transaction commit logic to see if we could make
progress.

Instead now we know we've committed the transaction, so any space that
would have been recovered is now available, and would be caught by the
btrfs_try_granting_tickets() in this loop, so we no longer need this
code and can simply delete it.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/space-info.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index ec18fed8ec80d..c3e1d5e2ea0da 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -830,7 +830,6 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
 {
 	struct reserve_ticket *ticket;
 	u64 tickets_id = space_info->tickets_id;
-	u64 first_ticket_bytes = 0;
 
 	if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
 		btrfs_info(fs_info, "cannot satisfy tickets, dumping space info");
@@ -846,21 +845,6 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
 		    steal_from_global_rsv(fs_info, space_info, ticket))
 			return true;
 
-		/*
-		 * may_commit_transaction will avoid committing the transaction
-		 * if it doesn't feel like the space reclaimed by the commit
-		 * would result in the ticket succeeding.  However if we have a
-		 * smaller ticket in the queue it may be small enough to be
-		 * satisfied by committing the transaction, so if any
-		 * subsequent ticket is smaller than the first ticket go ahead
-		 * and send us back for another loop through the enospc flushing
-		 * code.
-		 */
-		if (first_ticket_bytes == 0)
-			first_ticket_bytes = ticket->bytes;
-		else if (first_ticket_bytes > ticket->bytes)
-			return true;
-
 		if (btrfs_test_opt(fs_info, ENOSPC_DEBUG))
 			btrfs_info(fs_info, "failing ticket with %llu bytes",
 				   ticket->bytes);
-- 
GitLab


From 138a12d865749e28b39300b8a07337811253939b Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 22 Jun 2021 15:52:01 +0300
Subject: [PATCH 3564/3804] btrfs: rip out btrfs_space_info::total_bytes_pinned

We used this in may_commit_transaction() in order to determine if we
needed to commit the transaction.  However we no longer have that logic
and thus have no use of this counter anymore, so delete it.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/block-group.c |  3 ---
 fs/btrfs/delayed-ref.c | 26 --------------------------
 fs/btrfs/disk-io.c     |  3 ---
 fs/btrfs/extent-tree.c | 15 ---------------
 fs/btrfs/space-info.c  |  7 -------
 fs/btrfs/space-info.h  | 30 ------------------------------
 fs/btrfs/sysfs.c       | 13 -------------
 7 files changed, 97 deletions(-)

diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 024a1c6e5b402..38b127b9edfc9 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1399,7 +1399,6 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 		btrfs_space_info_update_bytes_pinned(fs_info, space_info,
 						     -block_group->pinned);
 		space_info->bytes_readonly += block_group->pinned;
-		__btrfs_mod_total_bytes_pinned(space_info, -block_group->pinned);
 		block_group->pinned = 0;
 
 		spin_unlock(&block_group->lock);
@@ -3068,8 +3067,6 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
 			spin_unlock(&cache->lock);
 			spin_unlock(&cache->space_info->lock);
 
-			__btrfs_mod_total_bytes_pinned(cache->space_info,
-						       num_bytes);
 			set_extent_dirty(&trans->transaction->pinned_extents,
 					 bytenr, bytenr + num_bytes - 1,
 					 GFP_NOFS | __GFP_NOFAIL);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index c92d9d4f5f46c..06bc842ecdb34 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -641,7 +641,6 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
 	struct btrfs_delayed_ref_root *delayed_refs =
 		&trans->transaction->delayed_refs;
 	struct btrfs_fs_info *fs_info = trans->fs_info;
-	u64 flags = btrfs_ref_head_to_space_flags(existing);
 	int old_ref_mod;
 
 	BUG_ON(existing->is_data != update->is_data);
@@ -711,26 +710,6 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans,
 		}
 	}
 
-	/*
-	 * This handles the following conditions:
-	 *
-	 * 1. We had a ref mod of 0 or more and went negative, indicating that
-	 *    we may be freeing space, so add our space to the
-	 *    total_bytes_pinned counter.
-	 * 2. We were negative and went to 0 or positive, so no longer can say
-	 *    that the space would be pinned, decrement our counter from the
-	 *    total_bytes_pinned counter.
-	 * 3. We are now at 0 and have ->must_insert_reserved set, which means
-	 *    this was a new allocation and then we dropped it, and thus must
-	 *    add our space to the total_bytes_pinned counter.
-	 */
-	if (existing->total_ref_mod < 0 && old_ref_mod >= 0)
-		btrfs_mod_total_bytes_pinned(fs_info, flags, existing->num_bytes);
-	else if (existing->total_ref_mod >= 0 && old_ref_mod < 0)
-		btrfs_mod_total_bytes_pinned(fs_info, flags, -existing->num_bytes);
-	else if (existing->total_ref_mod == 0 && existing->must_insert_reserved)
-		btrfs_mod_total_bytes_pinned(fs_info, flags, existing->num_bytes);
-
 	spin_unlock(&existing->lock);
 }
 
@@ -835,17 +814,12 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
 		kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
 		head_ref = existing;
 	} else {
-		u64 flags = btrfs_ref_head_to_space_flags(head_ref);
-
 		if (head_ref->is_data && head_ref->ref_mod < 0) {
 			delayed_refs->pending_csums += head_ref->num_bytes;
 			trans->delayed_ref_updates +=
 				btrfs_csum_bytes_to_leaves(trans->fs_info,
 							   head_ref->num_bytes);
 		}
-		if (head_ref->ref_mod < 0)
-			btrfs_mod_total_bytes_pinned(trans->fs_info, flags,
-						     head_ref->num_bytes);
 		delayed_refs->num_heads++;
 		delayed_refs->num_heads_ready++;
 		atomic_inc(&delayed_refs->num_entries);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7cc32e2813bea..b117dd3b81726 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4680,9 +4680,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 			cache->space_info->bytes_reserved -= head->num_bytes;
 			spin_unlock(&cache->lock);
 			spin_unlock(&cache->space_info->lock);
-			percpu_counter_add_batch(
-				&cache->space_info->total_bytes_pinned,
-				head->num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
 
 			btrfs_put_block_group(cache);
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 421120d6a14bd..d296483d148fd 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1804,19 +1804,6 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
 		nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
 	}
 
-	/*
-	 * We were dropping refs, or had a new ref and dropped it, and thus must
-	 * adjust down our total_bytes_pinned, the space may or may not have
-	 * been pinned and so is accounted for properly in the pinned space by
-	 * now.
-	 */
-	if (head->total_ref_mod < 0 ||
-	    (head->total_ref_mod == 0 && head->must_insert_reserved)) {
-		u64 flags = btrfs_ref_head_to_space_flags(head);
-
-		btrfs_mod_total_bytes_pinned(fs_info, flags, -head->num_bytes);
-	}
-
 	btrfs_delayed_refs_rsv_release(fs_info, nr_items);
 }
 
@@ -2551,7 +2538,6 @@ static int pin_down_extent(struct btrfs_trans_handle *trans,
 	spin_unlock(&cache->lock);
 	spin_unlock(&cache->space_info->lock);
 
-	__btrfs_mod_total_bytes_pinned(cache->space_info, num_bytes);
 	set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
 			 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
 	return 0;
@@ -2762,7 +2748,6 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 		cache->pinned -= len;
 		btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
 		space_info->max_extent_size = 0;
-		__btrfs_mod_total_bytes_pinned(space_info, -len);
 		if (cache->ro) {
 			space_info->bytes_readonly += len;
 			readonly = true;
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index c3e1d5e2ea0da..f79bf85f24399 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -192,13 +192,6 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
 	if (!space_info)
 		return -ENOMEM;
 
-	ret = percpu_counter_init(&space_info->total_bytes_pinned, 0,
-				 GFP_KERNEL);
-	if (ret) {
-		kfree(space_info);
-		return ret;
-	}
-
 	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
 		INIT_LIST_HEAD(&space_info->block_groups[i]);
 	init_rwsem(&space_info->groups_sem);
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index b1a8ffb03b3eb..cb5056472e798 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -43,18 +43,6 @@ struct btrfs_space_info {
 
 	u64 flags;
 
-	/*
-	 * bytes_pinned is kept in line with what is actually pinned, as in
-	 * we've called update_block_group and dropped the bytes_used counter
-	 * and increased the bytes_pinned counter.  However this means that
-	 * bytes_pinned does not reflect the bytes that will be pinned once the
-	 * delayed refs are flushed, so this counter is inc'ed every time we
-	 * call btrfs_free_extent so it is a realtime count of what will be
-	 * freed once the transaction is committed.  It will be zeroed every
-	 * time the transaction commits.
-	 */
-	struct percpu_counter total_bytes_pinned;
-
 	struct list_head list;
 	/* Protected by the spinlock 'lock'. */
 	struct list_head ro_bgs;
@@ -157,22 +145,4 @@ static inline void btrfs_space_info_free_bytes_may_use(
 }
 int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
 			     enum btrfs_reserve_flush_enum flush);
-
-static inline void __btrfs_mod_total_bytes_pinned(
-					struct btrfs_space_info *space_info,
-					s64 mod)
-{
-	percpu_counter_add_batch(&space_info->total_bytes_pinned, mod,
-				 BTRFS_TOTAL_BYTES_PINNED_BATCH);
-}
-
-static inline void btrfs_mod_total_bytes_pinned(struct btrfs_fs_info *fs_info,
-						u64 flags, s64 mod)
-{
-	struct btrfs_space_info *space_info = btrfs_find_space_info(fs_info, flags);
-
-	ASSERT(space_info);
-	__btrfs_mod_total_bytes_pinned(space_info, mod);
-}
-
 #endif /* BTRFS_SPACE_INFO_H */
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index ebde1d09e686b..9d1d140118fff 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -665,15 +665,6 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj,	\
 }									\
 BTRFS_ATTR(space_info, field, btrfs_space_info_show_##field)
 
-static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj,
-						       struct kobj_attribute *a,
-						       char *buf)
-{
-	struct btrfs_space_info *sinfo = to_space_info(kobj);
-	s64 val = percpu_counter_sum(&sinfo->total_bytes_pinned);
-	return scnprintf(buf, PAGE_SIZE, "%lld\n", val);
-}
-
 SPACE_INFO_ATTR(flags);
 SPACE_INFO_ATTR(total_bytes);
 SPACE_INFO_ATTR(bytes_used);
@@ -684,8 +675,6 @@ SPACE_INFO_ATTR(bytes_readonly);
 SPACE_INFO_ATTR(bytes_zone_unusable);
 SPACE_INFO_ATTR(disk_used);
 SPACE_INFO_ATTR(disk_total);
-BTRFS_ATTR(space_info, total_bytes_pinned,
-	   btrfs_space_info_show_total_bytes_pinned);
 
 static struct attribute *space_info_attrs[] = {
 	BTRFS_ATTR_PTR(space_info, flags),
@@ -698,7 +687,6 @@ static struct attribute *space_info_attrs[] = {
 	BTRFS_ATTR_PTR(space_info, bytes_zone_unusable),
 	BTRFS_ATTR_PTR(space_info, disk_used),
 	BTRFS_ATTR_PTR(space_info, disk_total),
-	BTRFS_ATTR_PTR(space_info, total_bytes_pinned),
 	NULL,
 };
 ATTRIBUTE_GROUPS(space_info);
@@ -706,7 +694,6 @@ ATTRIBUTE_GROUPS(space_info);
 static void space_info_release(struct kobject *kobj)
 {
 	struct btrfs_space_info *sinfo = to_space_info(kobj);
-	percpu_counter_destroy(&sinfo->total_bytes_pinned);
 	kfree(sinfo);
 }
 
-- 
GitLab


From ea7fc1bb1cd1b92b42b1d9273ce7e231d3dc9321 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Jun 2021 12:17:12 +0100
Subject: [PATCH 3565/3804] KVM: arm64: Introduce MTE VM feature

Add a new VM feature 'KVM_ARM_CAP_MTE' which enables memory tagging
for a VM. This will expose the feature to the guest and automatically
tag memory pages touched by the VM as PG_mte_tagged (and clear the tag
storage) to ensure that the guest cannot see stale tags, and so that
the tags are correctly saved/restored across swap.

Actually exposing the new capability to user space happens in a later
patch.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
[maz: move VM_SHARED sampling into the critical section]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210621111716.37157-3-steven.price@arm.com
---
 arch/arm64/include/asm/kvm_emulate.h |  3 ++
 arch/arm64/include/asm/kvm_host.h    |  4 ++
 arch/arm64/kvm/hyp/exception.c       |  3 +-
 arch/arm64/kvm/mmu.c                 | 67 +++++++++++++++++++++++++++-
 arch/arm64/kvm/sys_regs.c            |  7 +++
 include/uapi/linux/kvm.h             |  1 +
 6 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 01b9857757f2a..fd418955e31e6 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 	if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
 	    vcpu_el1_is_32bit(vcpu))
 		vcpu->arch.hcr_el2 |= HCR_TID2;
+
+	if (kvm_has_mte(vcpu->kvm))
+		vcpu->arch.hcr_el2 |= HCR_ATA;
 }
 
 static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7cd7d5c8c4bc2..1c4293c46ef64 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -132,6 +132,9 @@ struct kvm_arch {
 
 	u8 pfr0_csv2;
 	u8 pfr0_csv3;
+
+	/* Memory Tagging Extension enabled for the guest */
+	bool mte_enabled;
 };
 
 struct kvm_vcpu_fault_info {
@@ -769,6 +772,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
 #define kvm_arm_vcpu_sve_finalized(vcpu) \
 	((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
 
+#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled)
 #define kvm_vcpu_has_pmu(vcpu)					\
 	(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
 
diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c
index 11541b94b328f..0418399e0a201 100644
--- a/arch/arm64/kvm/hyp/exception.c
+++ b/arch/arm64/kvm/hyp/exception.c
@@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
 	new |= (old & PSR_C_BIT);
 	new |= (old & PSR_V_BIT);
 
-	// TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
+	if (kvm_has_mte(vcpu->kvm))
+		new |= PSR_TCO_BIT;
 
 	new |= (old & PSR_DIT_BIT);
 
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c10207fed2f36..c6a97d463892b 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -822,6 +822,45 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot,
 	return PAGE_SIZE;
 }
 
+/*
+ * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
+ * able to see the page's tags and therefore they must be initialised first. If
+ * PG_mte_tagged is set, tags have already been initialised.
+ *
+ * The race in the test/set of the PG_mte_tagged flag is handled by:
+ * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
+ *   racing to santise the same page
+ * - mmap_lock protects between a VM faulting a page in and the VMM performing
+ *   an mprotect() to add VM_MTE
+ */
+static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+			     unsigned long size)
+{
+	unsigned long i, nr_pages = size >> PAGE_SHIFT;
+	struct page *page;
+
+	if (!kvm_has_mte(kvm))
+		return 0;
+
+	/*
+	 * pfn_to_online_page() is used to reject ZONE_DEVICE pages
+	 * that may not support tags.
+	 */
+	page = pfn_to_online_page(pfn);
+
+	if (!page)
+		return -EFAULT;
+
+	for (i = 0; i < nr_pages; i++, page++) {
+		if (!test_bit(PG_mte_tagged, &page->flags)) {
+			mte_clear_page_tags(page_address(page));
+			set_bit(PG_mte_tagged, &page->flags);
+		}
+	}
+
+	return 0;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -830,6 +869,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	bool write_fault, writable, force_pte = false;
 	bool exec_fault;
 	bool device = false;
+	bool shared;
 	unsigned long mmu_seq;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -873,6 +913,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		vma_shift = PAGE_SHIFT;
 	}
 
+	shared = (vma->vm_flags & VM_PFNMAP);
+
 	switch (vma_shift) {
 #ifndef __PAGETABLE_PMD_FOLDED
 	case PUD_SHIFT:
@@ -971,8 +1013,18 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (writable)
 		prot |= KVM_PGTABLE_PROT_W;
 
-	if (fault_status != FSC_PERM && !device)
+	if (fault_status != FSC_PERM && !device) {
+		/* Check the VMM hasn't introduced a new VM_SHARED VMA */
+		if (kvm_has_mte(kvm) && shared) {
+			ret = -EFAULT;
+			goto out_unlock;
+		}
+		ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
+		if (ret)
+			goto out_unlock;
+
 		clean_dcache_guest_page(pfn, vma_pagesize);
+	}
 
 	if (exec_fault) {
 		prot |= KVM_PGTABLE_PROT_X;
@@ -1168,12 +1220,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	kvm_pfn_t pfn = pte_pfn(range->pte);
+	int ret;
 
 	if (!kvm->arch.mmu.pgt)
 		return false;
 
 	WARN_ON(range->end - range->start != 1);
 
+	ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
+	if (ret)
+		return false;
+
 	/*
 	 * We've moved a page around, probably through CoW, so let's treat it
 	 * just like a translation fault and clean the cache to the PoC.
@@ -1381,6 +1438,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		if (!vma)
 			break;
 
+		/*
+		 * VM_SHARED mappings are not allowed with MTE to avoid races
+		 * when updating the PG_mte_tagged page flag, see
+		 * sanitise_mte_tags for more details.
+		 */
+		if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
+			return -EINVAL;
+
 		/*
 		 * Take the intersection of this VMA with the memory region
 		 */
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1a7968ad078c6..36f67f8deae17 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
 		break;
 	case SYS_ID_AA64PFR1_EL1:
 		val &= ~FEATURE(ID_AA64PFR1_MTE);
+		if (kvm_has_mte(vcpu->kvm)) {
+			u64 pfr, mte;
+
+			pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
+			mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
+			val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
+		}
 		break;
 	case SYS_ID_AA64ISAR1_EL1:
 		if (!vcpu_has_ptrauth(vcpu))
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 79d9c44d1ad73..d4da58ddcad7e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_SGX_ATTRIBUTE 196
 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
 #define KVM_CAP_PTP_KVM 198
+#define KVM_CAP_ARM_MTE 199
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
GitLab


From e1f358b5046479d2897f23b1d5b092687c6e7a67 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Jun 2021 12:17:13 +0100
Subject: [PATCH 3566/3804] KVM: arm64: Save/restore MTE registers

Define the new system registers that MTE introduces and context switch
them. The MTE feature is still hidden from the ID register as it isn't
supported in a VM yet.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210621111716.37157-4-steven.price@arm.com
---
 arch/arm64/include/asm/kvm_arm.h           |  3 +-
 arch/arm64/include/asm/kvm_host.h          |  6 ++
 arch/arm64/include/asm/kvm_mte.h           | 66 ++++++++++++++++++++++
 arch/arm64/include/asm/sysreg.h            |  3 +-
 arch/arm64/kernel/asm-offsets.c            |  2 +
 arch/arm64/kvm/hyp/entry.S                 |  7 +++
 arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 21 +++++++
 arch/arm64/kvm/sys_regs.c                  | 22 ++++++--
 8 files changed, 124 insertions(+), 6 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_mte.h

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 692c9049befab..d436831dd7068 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -12,7 +12,8 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
-#define HCR_ATA		(UL(1) << 56)
+#define HCR_ATA_SHIFT	56
+#define HCR_ATA		(UL(1) << HCR_ATA_SHIFT)
 #define HCR_FWB		(UL(1) << 46)
 #define HCR_API		(UL(1) << 41)
 #define HCR_APK		(UL(1) << 40)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 1c4293c46ef64..74a7447a83a11 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -209,6 +209,12 @@ enum vcpu_sysreg {
 	CNTP_CVAL_EL0,
 	CNTP_CTL_EL0,
 
+	/* Memory Tagging Extension registers */
+	RGSR_EL1,	/* Random Allocation Tag Seed Register */
+	GCR_EL1,	/* Tag Control Register */
+	TFSR_EL1,	/* Tag Fault Status Register (EL1) */
+	TFSRE0_EL1,	/* Tag Fault Status Register (EL0) */
+
 	/* 32bit specific registers. Keep them at the end of the range */
 	DACR32_EL2,	/* Domain Access Control Register */
 	IFSR32_EL2,	/* Instruction Fault Status Register */
diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
new file mode 100644
index 0000000000000..de002636eb1fb
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mte.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2021 ARM Ltd.
+ */
+#ifndef __ASM_KVM_MTE_H
+#define __ASM_KVM_MTE_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_MTE
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+	b	.L__skip_switch\@
+alternative_else_nop_endif
+	mrs	\reg1, hcr_el2
+	tbz	\reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+	mrs_s	\reg1, SYS_RGSR_EL1
+	str	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
+	mrs_s	\reg1, SYS_GCR_EL1
+	str	\reg1, [\h_ctxt, #CPU_GCR_EL1]
+
+	ldr	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
+	msr_s	SYS_RGSR_EL1, \reg1
+	ldr	\reg1, [\g_ctxt, #CPU_GCR_EL1]
+	msr_s	SYS_GCR_EL1, \reg1
+
+.L__skip_switch\@:
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+	b	.L__skip_switch\@
+alternative_else_nop_endif
+	mrs	\reg1, hcr_el2
+	tbz	\reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
+
+	mrs_s	\reg1, SYS_RGSR_EL1
+	str	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
+	mrs_s	\reg1, SYS_GCR_EL1
+	str	\reg1, [\g_ctxt, #CPU_GCR_EL1]
+
+	ldr	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
+	msr_s	SYS_RGSR_EL1, \reg1
+	ldr	\reg1, [\h_ctxt, #CPU_GCR_EL1]
+	msr_s	SYS_GCR_EL1, \reg1
+
+	isb
+
+.L__skip_switch\@:
+.endm
+
+#else /* !CONFIG_ARM64_MTE */
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+.endm
+
+#endif /* CONFIG_ARM64_MTE */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_KVM_MTE_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 65d15700a1685..347ccac2341ee 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -651,7 +651,8 @@
 
 #define INIT_SCTLR_EL2_MMU_ON						\
 	(SCTLR_ELx_M  | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I |	\
-	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
+	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 |		\
+	 SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
 
 #define INIT_SCTLR_EL2_MMU_OFF \
 	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0cb34ccb6e733..6f0044cb233e7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -111,6 +111,8 @@ int main(void)
   DEFINE(VCPU_WORKAROUND_FLAGS,	offsetof(struct kvm_vcpu, arch.workaround_flags));
   DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
   DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_cpu_context, regs));
+  DEFINE(CPU_RGSR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
+  DEFINE(CPU_GCR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
   DEFINE(CPU_APIAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
   DEFINE(CPU_APIBKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
   DEFINE(CPU_APDAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e831d3dfd50d7..435346ea1504e 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -13,6 +13,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_mte.h>
 #include <asm/kvm_ptrauth.h>
 
 	.text
@@ -51,6 +52,9 @@ alternative_else_nop_endif
 
 	add	x29, x0, #VCPU_CONTEXT
 
+	// mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
+	mte_switch_to_guest x29, x1, x2
+
 	// Macro ptrauth_switch_to_guest format:
 	// 	ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
 	// The below macro to restore guest keys is not implemented in C code
@@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
 	// when this feature is enabled for kernel code.
 	ptrauth_switch_to_hyp x1, x2, x3, x4, x5
 
+	// mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
+	mte_switch_to_hyp x1, x2, x3
+
 	// Restore hyp's sp_el0
 	restore_sp_el0 x2, x3
 
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index cce43bfe158fa..de7e14c862e6c 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -14,6 +14,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 {
@@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
 	ctxt_sys_reg(ctxt, TPIDRRO_EL0)	= read_sysreg(tpidrro_el0);
 }
 
+static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+{
+	struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
+
+	if (!vcpu)
+		vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+
+	return kvm_has_mte(kern_hyp_va(vcpu->kvm));
+}
+
 static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 {
 	ctxt_sys_reg(ctxt, CSSELR_EL1)	= read_sysreg(csselr_el1);
@@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 	ctxt_sys_reg(ctxt, PAR_EL1)	= read_sysreg_par();
 	ctxt_sys_reg(ctxt, TPIDR_EL1)	= read_sysreg(tpidr_el1);
 
+	if (ctxt_has_mte(ctxt)) {
+		ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
+		ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
+	}
+
 	ctxt_sys_reg(ctxt, SP_EL1)	= read_sysreg(sp_el1);
 	ctxt_sys_reg(ctxt, ELR_EL1)	= read_sysreg_el1(SYS_ELR);
 	ctxt_sys_reg(ctxt, SPSR_EL1)	= read_sysreg_el1(SYS_SPSR);
@@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1),	par_el1);
 	write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1),	tpidr_el1);
 
+	if (ctxt_has_mte(ctxt)) {
+		write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
+		write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
+	}
+
 	if (!has_vhe() &&
 	    cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
 	    ctxt->__hyp_running_vcpu) {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 36f67f8deae17..5c75b24eae219 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1309,6 +1309,20 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
+				   const struct sys_reg_desc *rd)
+{
+	return REG_HIDDEN;
+}
+
+#define MTE_REG(name) {				\
+	SYS_DESC(SYS_##name),			\
+	.access = undef_access,			\
+	.reset = reset_unknown,			\
+	.reg = name,				\
+	.visibility = mte_visibility,		\
+}
+
 /* sys_reg_desc initialiser for known cpufeature ID registers */
 #define ID_SANITISED(name) {			\
 	SYS_DESC(SYS_##name),			\
@@ -1477,8 +1491,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
 	{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
 
-	{ SYS_DESC(SYS_RGSR_EL1), undef_access },
-	{ SYS_DESC(SYS_GCR_EL1), undef_access },
+	MTE_REG(RGSR_EL1),
+	MTE_REG(GCR_EL1),
 
 	{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
 	{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
@@ -1505,8 +1519,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
 	{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
 
-	{ SYS_DESC(SYS_TFSR_EL1), undef_access },
-	{ SYS_DESC(SYS_TFSRE0_EL1), undef_access },
+	MTE_REG(TFSR_EL1),
+	MTE_REG(TFSRE0_EL1),
 
 	{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
 	{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
-- 
GitLab


From 673638f434ee4a00319e254ade338c57618d6f7e Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Jun 2021 12:17:14 +0100
Subject: [PATCH 3567/3804] KVM: arm64: Expose KVM_ARM_CAP_MTE

It's now safe for the VMM to enable MTE in a guest, so expose the
capability to user space.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210621111716.37157-5-steven.price@arm.com
---
 arch/arm64/kvm/arm.c      | 9 +++++++++
 arch/arm64/kvm/reset.c    | 4 ++++
 arch/arm64/kvm/sys_regs.c | 3 +++
 3 files changed, 16 insertions(+)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e720148232a06..28ce26a68f09c 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		r = 0;
 		kvm->arch.return_nisv_io_abort_to_user = true;
 		break;
+	case KVM_CAP_ARM_MTE:
+		if (!system_supports_mte() || kvm->created_vcpus)
+			return -EINVAL;
+		r = 0;
+		kvm->arch.mte_enabled = true;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		 */
 		r = 1;
 		break;
+	case KVM_CAP_ARM_MTE:
+		r = system_supports_mte();
+		break;
 	case KVM_CAP_STEAL_TIME:
 		r = kvm_arm_pvtime_supported();
 		break;
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index d37ebee085cfe..cba7872d69a85 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
 	if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
 		return false;
 
+	/* MTE is incompatible with AArch32 */
+	if (kvm_has_mte(vcpu->kvm) && is32bit)
+		return false;
+
 	/* Check that the vcpus are either all 32bit or all 64bit */
 	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
 		if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 5c75b24eae219..f6f126eb6ac13 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1312,6 +1312,9 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
 				   const struct sys_reg_desc *rd)
 {
+	if (kvm_has_mte(vcpu->kvm))
+		return 0;
+
 	return REG_HIDDEN;
 }
 
-- 
GitLab


From f0376edb1ddcab19a473b4bf1fbd5b6bbed3705b Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Jun 2021 12:17:15 +0100
Subject: [PATCH 3568/3804] KVM: arm64: Add ioctl to fetch/store tags in a
 guest

The VMM may not wish to have it's own mapping of guest memory mapped
with PROT_MTE because this causes problems if the VMM has tag checking
enabled (the guest controls the tags in physical RAM and it's unlikely
the tags are correct for the VMM).

Instead add a new ioctl which allows the VMM to easily read/write the
tags from guest memory, allowing the VMM's mapping to be non-PROT_MTE
while the VMM can still read/write the tags for the purpose of
migration.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210621111716.37157-6-steven.price@arm.com
---
 arch/arm64/include/asm/kvm_host.h |  3 ++
 arch/arm64/include/asm/mte-def.h  |  1 +
 arch/arm64/include/uapi/asm/kvm.h | 11 +++++
 arch/arm64/kvm/arm.c              |  7 +++
 arch/arm64/kvm/guest.c            | 82 +++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h          |  1 +
 6 files changed, 105 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 74a7447a83a11..c93a7198c2421 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -730,6 +730,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
 
+long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+				struct kvm_arm_copy_mte_tags *copy_tags);
+
 /* Guest/host FPSIMD coordination helpers */
 int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
index cf241b0f0a425..626d359b396e5 100644
--- a/arch/arm64/include/asm/mte-def.h
+++ b/arch/arm64/include/asm/mte-def.h
@@ -7,6 +7,7 @@
 
 #define MTE_GRANULE_SIZE	UL(16)
 #define MTE_GRANULE_MASK	(~(MTE_GRANULE_SIZE - 1))
+#define MTE_GRANULES_PER_PAGE	(PAGE_SIZE / MTE_GRANULE_SIZE)
 #define MTE_TAG_SHIFT		56
 #define MTE_TAG_SIZE		4
 #define MTE_TAG_MASK		GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 24223adae150c..b3edde68bc3e0 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -184,6 +184,17 @@ struct kvm_vcpu_events {
 	__u32 reserved[12];
 };
 
+struct kvm_arm_copy_mte_tags {
+	__u64 guest_ipa;
+	__u64 length;
+	void __user *addr;
+	__u64 flags;
+	__u64 reserved[2];
+};
+
+#define KVM_ARM_TAGS_TO_GUEST		0
+#define KVM_ARM_TAGS_FROM_GUEST		1
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK		0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT	16
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 28ce26a68f09c..511f3716fe334 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1359,6 +1359,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 		return 0;
 	}
+	case KVM_ARM_MTE_COPY_TAGS: {
+		struct kvm_arm_copy_mte_tags copy_tags;
+
+		if (copy_from_user(&copy_tags, argp, sizeof(copy_tags)))
+			return -EFAULT;
+		return kvm_vm_ioctl_mte_copy_tags(kvm, &copy_tags);
+	}
 	default:
 		return -EINVAL;
 	}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5cb4a1cd5603a..4ddb20017b2f5 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -995,3 +995,85 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 
 	return ret;
 }
+
+long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
+				struct kvm_arm_copy_mte_tags *copy_tags)
+{
+	gpa_t guest_ipa = copy_tags->guest_ipa;
+	size_t length = copy_tags->length;
+	void __user *tags = copy_tags->addr;
+	gpa_t gfn;
+	bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
+	int ret = 0;
+
+	if (!kvm_has_mte(kvm))
+		return -EINVAL;
+
+	if (copy_tags->reserved[0] || copy_tags->reserved[1])
+		return -EINVAL;
+
+	if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
+		return -EINVAL;
+
+	if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
+		return -EINVAL;
+
+	gfn = gpa_to_gfn(guest_ipa);
+
+	mutex_lock(&kvm->slots_lock);
+
+	while (length > 0) {
+		kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
+		void *maddr;
+		unsigned long num_tags;
+		struct page *page;
+
+		if (is_error_noslot_pfn(pfn)) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		page = pfn_to_online_page(pfn);
+		if (!page) {
+			/* Reject ZONE_DEVICE memory */
+			ret = -EFAULT;
+			goto out;
+		}
+		maddr = page_address(page);
+
+		if (!write) {
+			if (test_bit(PG_mte_tagged, &page->flags))
+				num_tags = mte_copy_tags_to_user(tags, maddr,
+							MTE_GRANULES_PER_PAGE);
+			else
+				/* No tags in memory, so write zeros */
+				num_tags = MTE_GRANULES_PER_PAGE -
+					clear_user(tags, MTE_GRANULES_PER_PAGE);
+			kvm_release_pfn_clean(pfn);
+		} else {
+			num_tags = mte_copy_tags_from_user(maddr, tags,
+							MTE_GRANULES_PER_PAGE);
+			kvm_release_pfn_dirty(pfn);
+		}
+
+		if (num_tags != MTE_GRANULES_PER_PAGE) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		/* Set the flag after checking the write completed fully */
+		if (write)
+			set_bit(PG_mte_tagged, &page->flags);
+
+		gfn++;
+		tags += num_tags;
+		length -= PAGE_SIZE;
+	}
+
+out:
+	mutex_unlock(&kvm->slots_lock);
+	/* If some data has been copied report the number of bytes copied */
+	if (length != copy_tags->length)
+		return copy_tags->length - length;
+	return ret;
+}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d4da58ddcad7e..da1edd2b40465 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1429,6 +1429,7 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_PMU_EVENT_FILTER */
 #define KVM_SET_PMU_EVENT_FILTER  _IOW(KVMIO,  0xb2, struct kvm_pmu_event_filter)
 #define KVM_PPC_SVM_OFF		  _IO(KVMIO,  0xb3)
+#define KVM_ARM_MTE_COPY_TAGS	  _IOR(KVMIO,  0xb4, struct kvm_arm_copy_mte_tags)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
-- 
GitLab


From 04c02c201d7e8149ae336ead69fb64e4e6f94bc9 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 21 Jun 2021 12:17:16 +0100
Subject: [PATCH 3569/3804] KVM: arm64: Document MTE capability and ioctl

A new capability (KVM_CAP_ARM_MTE) identifies that the kernel supports
granting a guest access to the tags, and provides a mechanism for the
VMM to enable it.

A new ioctl (KVM_ARM_MTE_COPY_TAGS) provides a simple way for a VMM to
access the tags of a guest without having to maintain a PROT_MTE mapping
in userspace. The above capability gates access to the ioctl.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210621111716.37157-7-steven.price@arm.com
---
 Documentation/virt/kvm/api.rst | 61 ++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 7fcb2fd38f42e..97661a97943fd 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5034,6 +5034,43 @@ see KVM_XEN_VCPU_SET_ATTR above.
 The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
 with the KVM_XEN_VCPU_GET_ATTR ioctl.
 
+4.130 KVM_ARM_MTE_COPY_TAGS
+---------------------------
+
+:Capability: KVM_CAP_ARM_MTE
+:Architectures: arm64
+:Type: vm ioctl
+:Parameters: struct kvm_arm_copy_mte_tags
+:Returns: number of bytes copied, < 0 on error (-EINVAL for incorrect
+          arguments, -EFAULT if memory cannot be accessed).
+
+::
+
+  struct kvm_arm_copy_mte_tags {
+	__u64 guest_ipa;
+	__u64 length;
+	void __user *addr;
+	__u64 flags;
+	__u64 reserved[2];
+  };
+
+Copies Memory Tagging Extension (MTE) tags to/from guest tag memory. The
+``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned. The ``addr``
+field must point to a buffer which the tags will be copied to or from.
+
+``flags`` specifies the direction of copy, either ``KVM_ARM_TAGS_TO_GUEST`` or
+``KVM_ARM_TAGS_FROM_GUEST``.
+
+The size of the buffer to store the tags is ``(length / 16)`` bytes
+(granules in MTE are 16 bytes long). Each byte contains a single tag
+value. This matches the format of ``PTRACE_PEEKMTETAGS`` and
+``PTRACE_POKEMTETAGS``.
+
+If an error occurs before any data is copied then a negative error code is
+returned. If some tags have been copied before an error occurs then the number
+of bytes successfully copied is returned. If the call completes successfully
+then ``length`` is returned.
+
 5. The kvm_run structure
 ========================
 
@@ -6362,6 +6399,30 @@ default.
 
 See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
 
+7.26 KVM_CAP_ARM_MTE
+--------------------
+
+:Architectures: arm64
+:Parameters: none
+
+This capability indicates that KVM (and the hardware) supports exposing the
+Memory Tagging Extensions (MTE) to the guest. It must also be enabled by the
+VMM before creating any VCPUs to allow the guest access. Note that MTE is only
+available to a guest running in AArch64 mode and enabling this capability will
+cause attempts to create AArch32 VCPUs to fail.
+
+When enabled the guest is able to access tags associated with any memory given
+to the guest. KVM will ensure that the tags are maintained during swap or
+hibernation of the host; however the VMM needs to manually save/restore the
+tags as appropriate if the VM is migrated.
+
+When this capability is enabled all memory in memslots must be mapped as
+not-shareable (no MAP_SHARED), attempts to create a memslot with a
+MAP_SHARED mmap will result in an -EINVAL return.
+
+When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
+perform a bulk copy of tags to/from the guest.
+
 8. Other capabilities.
 ======================
 
-- 
GitLab


From 17b11f71795abdce46f62a808f906857e525cea8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 21 Jun 2021 13:36:35 +0200
Subject: [PATCH 3570/3804] drm/nouveau: wait for moving fence after pinning v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We actually need to wait for the moving fence after pinning
the BO to make sure that the pin is completed.

v2: grab the lock while waiting

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
References: https://lore.kernel.org/dri-devel/20210621151758.2347474-1-daniel.vetter@ffwll.ch/
CC: stable@kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20210622114506.106349-1-christian.koenig@amd.com
---
 drivers/gpu/drm/nouveau/nouveau_prime.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index 347488685f745..60019d0532fcf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -93,7 +93,22 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj)
 	if (ret)
 		return -EINVAL;
 
-	return 0;
+	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
+	if (ret)
+		goto error;
+
+	if (nvbo->bo.moving)
+		ret = dma_fence_wait(nvbo->bo.moving, true);
+
+	ttm_bo_unreserve(&nvbo->bo);
+	if (ret)
+		goto error;
+
+	return ret;
+
+error:
+	nouveau_bo_unpin(nvbo);
+	return ret;
 }
 
 void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
-- 
GitLab


From 4b41726aae563273bb4b4a9462ba51ce4d372f78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 21 Jun 2021 13:43:05 +0200
Subject: [PATCH 3571/3804] drm/radeon: wait for moving fence after pinning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We actually need to wait for the moving fence after pinning
the BO to make sure that the pin is completed.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
References: https://lore.kernel.org/dri-devel/20210621151758.2347474-1-daniel.vetter@ffwll.ch/
CC: stable@kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20210622114506.106349-2-christian.koenig@amd.com
---
 drivers/gpu/drm/radeon/radeon_prime.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index 42a87948e28c5..4a90807351e72 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -77,9 +77,19 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)
 
 	/* pin buffer into GTT */
 	ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL);
-	if (likely(ret == 0))
-		bo->prime_shared_count++;
-
+	if (unlikely(ret))
+		goto error;
+
+	if (bo->tbo.moving) {
+		ret = dma_fence_wait(bo->tbo.moving, false);
+		if (unlikely(ret)) {
+			radeon_bo_unpin(bo);
+			goto error;
+		}
+	}
+
+	bo->prime_shared_count++;
+error:
 	radeon_bo_unreserve(bo);
 	return ret;
 }
-- 
GitLab


From 8ddf5b9bb479570a3825d70fecfb9399bc15700c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 21 Jun 2021 14:29:14 +0200
Subject: [PATCH 3572/3804] drm/amdgpu: wait for moving fence after pinning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We actually need to wait for the moving fence after pinning
the BO to make sure that the pin is completed.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
References: https://lore.kernel.org/dri-devel/20210621151758.2347474-1-daniel.vetter@ffwll.ch/
CC: stable@kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20210622114506.106349-3-christian.koenig@amd.com
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index baa980a477d94..37ec593650803 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -214,9 +214,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
 {
 	struct drm_gem_object *obj = attach->dmabuf->priv;
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	int r;
 
 	/* pin buffer into GTT */
-	return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+	if (r)
+		return r;
+
+	if (bo->tbo.moving) {
+		r = dma_fence_wait(bo->tbo.moving, true);
+		if (r) {
+			amdgpu_bo_unpin(bo);
+			return r;
+		}
+	}
+	return 0;
 }
 
 /**
-- 
GitLab


From 53324b51c5eee22d420a2df68b1820d929fa90f3 Mon Sep 17 00:00:00 2001
From: Bharata B Rao <bharata@linux.ibm.com>
Date: Mon, 21 Jun 2021 14:20:01 +0530
Subject: [PATCH 3573/3804] KVM: PPC: Book3S HV: Nested support in
 H_RPT_INVALIDATE

Enable support for process-scoped invalidations from nested
guests and partition-scoped invalidations for nested guests.

Process-scoped invalidations for any level of nested guests
are handled by implementing H_RPT_INVALIDATE handler in the
nested guest exit path in L0.

Partition-scoped invalidation requests are forwarded to the
right nested guest, handled there and passed down to L0
for eventual handling.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
[aneesh: Nested guest partition-scoped invalidation changes]
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
[mpe: Squash in fixup patch]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210621085003.904767-5-bharata@linux.ibm.com
---
 .../include/asm/book3s/64/tlbflush-radix.h    |   4 +
 arch/powerpc/include/asm/kvm_book3s.h         |   3 +
 arch/powerpc/kvm/book3s_hv.c                  |  59 +++++++++-
 arch/powerpc/kvm/book3s_hv_nested.c           | 107 ++++++++++++++++++
 arch/powerpc/mm/book3s64/radix_tlb.c          |   4 -
 5 files changed, 170 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 8b33601cdb9d7..a46fd37ad552e 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -4,6 +4,10 @@
 
 #include <asm/hvcall.h>
 
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
 struct vm_area_struct;
 struct mm_struct;
 struct mmu_gather;
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index e6b53c6e21e32..caaa0f592d8e1 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -307,6 +307,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1);
 void kvmhv_release_all_nested(struct kvm *kvm);
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+			     unsigned long type, unsigned long pg_sizes,
+			     unsigned long start, unsigned long end);
 int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
 			  u64 time_limit, unsigned long lpcr);
 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b32b968ce56d8..279eae8f9dbcf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -923,6 +923,34 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
 	return yield_count;
 }
 
+/*
+ * H_RPT_INVALIDATE hcall handler for nested guests.
+ *
+ * Handles only nested process-scoped invalidation requests in L0.
+ */
+static int kvmppc_nested_h_rpt_invalidate(struct kvm_vcpu *vcpu)
+{
+	unsigned long type = kvmppc_get_gpr(vcpu, 6);
+	unsigned long pid, pg_sizes, start, end;
+
+	/*
+	 * The partition-scoped invalidations aren't handled here in L0.
+	 */
+	if (type & H_RPTI_TYPE_NESTED)
+		return RESUME_HOST;
+
+	pid = kvmppc_get_gpr(vcpu, 4);
+	pg_sizes = kvmppc_get_gpr(vcpu, 7);
+	start = kvmppc_get_gpr(vcpu, 8);
+	end = kvmppc_get_gpr(vcpu, 9);
+
+	do_h_rpt_invalidate_prt(pid, vcpu->arch.nested->shadow_lpid,
+				type, pg_sizes, start, end);
+
+	kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+	return RESUME_GUEST;
+}
+
 static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
 				    unsigned long id, unsigned long target,
 				    unsigned long type, unsigned long pg_sizes,
@@ -936,10 +964,18 @@ static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
 
 	/*
 	 * Partition-scoped invalidation for nested guests.
-	 * Not yet supported
 	 */
-	if (type & H_RPTI_TYPE_NESTED)
-		return H_P3;
+	if (type & H_RPTI_TYPE_NESTED) {
+		if (!nesting_enabled(vcpu->kvm))
+			return H_FUNCTION;
+
+		/* Support only cores as target */
+		if (target != H_RPTI_TARGET_CMMU)
+			return H_P2;
+
+		return do_h_rpt_invalidate_pat(vcpu, id, type, pg_sizes,
+					       start, end);
+	}
 
 	/*
 	 * Process-scoped invalidation for L1 guests.
@@ -1784,6 +1820,23 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		if (!xics_on_xive())
 			kvmppc_xics_rm_complete(vcpu, 0);
 		break;
+	case BOOK3S_INTERRUPT_SYSCALL:
+	{
+		unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+		/*
+		 * The H_RPT_INVALIDATE hcalls issued by nested
+		 * guests for process-scoped invalidations when
+		 * GTSE=0, are handled here in L0.
+		 */
+		if (req == H_RPT_INVALIDATE) {
+			r = kvmppc_nested_h_rpt_invalidate(vcpu);
+			break;
+		}
+
+		r = RESUME_HOST;
+		break;
+	}
 	default:
 		r = RESUME_HOST;
 		break;
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 60724f6744219..2e9958e563fb1 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1214,6 +1214,113 @@ long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
 	return H_SUCCESS;
 }
 
+static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
+					 unsigned long lpid, unsigned long ric)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_nested_guest *gp;
+
+	gp = kvmhv_get_nested(kvm, lpid, false);
+	if (gp) {
+		kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+		kvmhv_put_nested(gp);
+	}
+	return H_SUCCESS;
+}
+
+/*
+ * Number of pages above which we invalidate the entire LPID rather than
+ * flush individual pages.
+ */
+static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
+
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+					 unsigned long lpid,
+					 unsigned long pg_sizes,
+					 unsigned long start,
+					 unsigned long end)
+{
+	int ret = H_P4;
+	unsigned long addr, nr_pages;
+	struct mmu_psize_def *def;
+	unsigned long psize, ap, page_size;
+	bool flush_lpid;
+
+	for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+		def = &mmu_psize_defs[psize];
+		if (!(pg_sizes & def->h_rpt_pgsize))
+			continue;
+
+		nr_pages = (end - start) >> def->shift;
+		flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
+		if (flush_lpid)
+			return do_tlb_invalidate_nested_all(vcpu, lpid,
+							RIC_FLUSH_TLB);
+		addr = start;
+		ap = mmu_get_ap(psize);
+		page_size = 1UL << def->shift;
+		do {
+			ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+						   get_epn(addr));
+			if (ret)
+				return H_P4;
+			addr += page_size;
+		} while (addr < end);
+	}
+	return ret;
+}
+
+/*
+ * Performs partition-scoped invalidations for nested guests
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+			     unsigned long type, unsigned long pg_sizes,
+			     unsigned long start, unsigned long end)
+{
+	/*
+	 * If L2 lpid isn't valid, we need to return H_PARAMETER.
+	 *
+	 * However, nested KVM issues a L2 lpid flush call when creating
+	 * partition table entries for L2. This happens even before the
+	 * corresponding shadow lpid is created in HV which happens in
+	 * H_ENTER_NESTED call. Since we can't differentiate this case from
+	 * the invalid case, we ignore such flush requests and return success.
+	 */
+	if (!kvmhv_find_nested(vcpu->kvm, lpid))
+		return H_SUCCESS;
+
+	/*
+	 * A flush all request can be handled by a full lpid flush only.
+	 */
+	if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
+		return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL);
+
+	/*
+	 * We don't need to handle a PWC flush like process table here,
+	 * because intermediate partition scoped table in nested guest doesn't
+	 * really have PWC. Only level we have PWC is in L0 and for nested
+	 * invalidate at L0 we always do kvm_flush_lpid() which does
+	 * radix__flush_all_lpid(). For range invalidate at any level, we
+	 * are not removing the higher level page tables and hence there is
+	 * no PWC invalidate needed.
+	 *
+	 * if (type & H_RPTI_TYPE_PWC) {
+	 *	ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC);
+	 *	if (ret)
+	 *		return H_P4;
+	 * }
+	 */
+
+	if (start == 0 && end == -1)
+		return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
+
+	if (type & H_RPTI_TYPE_TLB)
+		return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
+						    start, end);
+	return H_SUCCESS;
+}
+
 /* Used to convert a nested guest real address to a L1 guest real address */
 static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
 				       struct kvm_nested_guest *gp,
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 1815fe4c5ffa8..318ec4f336611 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -20,10 +20,6 @@
 
 #include "internal.h"
 
-#define RIC_FLUSH_TLB 0
-#define RIC_FLUSH_PWC 1
-#define RIC_FLUSH_ALL 2
-
 /*
  * tlbiel instruction for radix, set invalidation
  * i.e., r=1 and is=01 or is=10 or is=11
-- 
GitLab


From b87cc116c7e1bc62a84d8c46acd401db179edb11 Mon Sep 17 00:00:00 2001
From: Bharata B Rao <bharata@linux.ibm.com>
Date: Mon, 21 Jun 2021 14:20:02 +0530
Subject: [PATCH 3574/3804] KVM: PPC: Book3S HV: Add KVM_CAP_PPC_RPT_INVALIDATE
 capability

Now that we have H_RPT_INVALIDATE fully implemented, enable
support for the same via KVM_CAP_PPC_RPT_INVALIDATE KVM capability

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210621085003.904767-6-bharata@linux.ibm.com
---
 Documentation/virt/kvm/api.rst | 18 ++++++++++++++++++
 arch/powerpc/kvm/powerpc.c     |  3 +++
 include/uapi/linux/kvm.h       |  1 +
 3 files changed, 22 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 7fcb2fd38f42e..9977e845633f6 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6362,6 +6362,24 @@ default.
 
 See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
 
+7.26 KVM_CAP_PPC_RPT_INVALIDATE
+-------------------------------
+
+:Capability: KVM_CAP_PPC_RPT_INVALIDATE
+:Architectures: ppc
+:Type: vm
+
+This capability indicates that the kernel is capable of handling
+H_RPT_INVALIDATE hcall.
+
+In order to enable the use of H_RPT_INVALIDATE in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+This capability is enabled for hypervisors on platforms like POWER9
+that support radix MMU.
+
 8. Other capabilities.
 ======================
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a2a68a958fa01..be33b5321a766 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -682,6 +682,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
 		       !kvmppc_hv_ops->enable_dawr1(NULL));
 		break;
+	case KVM_CAP_PPC_RPT_INVALIDATE:
+		r = 1;
+		break;
 #endif
 	default:
 		r = 0;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 3fd9a7e9d90cd..613198a94c439 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1082,6 +1082,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_SGX_ATTRIBUTE 196
 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
 #define KVM_CAP_PTP_KVM 198
+#define KVM_CAP_PPC_RPT_INVALIDATE 199
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
GitLab


From 81468083f3c76a08183813e3af63a7c3cea3f537 Mon Sep 17 00:00:00 2001
From: Bharata B Rao <bharata@linux.ibm.com>
Date: Mon, 21 Jun 2021 14:20:03 +0530
Subject: [PATCH 3575/3804] KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested
 KVM

In the nested KVM case, replace H_TLB_INVALIDATE by the new hcall
H_RPT_INVALIDATE if available. The availability of this hcall
is determined from "hcall-rpt-invalidate" string in ibm,hypertas-functions
DT property.

Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210621085003.904767-7-bharata@linux.ibm.com
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 27 +++++++++++++++++++++-----
 arch/powerpc/kvm/book3s_hv_nested.c    | 12 ++++++++++--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index d909c069363e0..b5905ae4377c2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,7 @@
 #include <asm/pte-walk.h>
 #include <asm/ultravisor.h>
 #include <asm/kvm_book3s_uvmem.h>
+#include <asm/plpar_wrappers.h>
 
 /*
  * Supported radix tree geometry.
@@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
 	}
 
 	psi = shift_to_mmu_psize(pshift);
-	rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
-				lpid, rb);
+
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+		rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+					lpid, rb);
+	} else {
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB,
+					    psize_to_rpti_pgsize(psi),
+					    addr, addr + psize);
+	}
+
 	if (rc)
 		pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
 }
@@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned int lpid)
 		return;
 	}
 
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
-				lpid, TLBIEL_INVAL_SET_LPID);
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+					    0, -1UL);
 	if (rc)
 		pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 2e9958e563fb1..3a06ac0b53e2d 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -19,6 +19,7 @@
 #include <asm/pgalloc.h>
 #include <asm/pte-walk.h>
 #include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
 
 static struct patb_entry *pseries_partition_tb;
 
@@ -467,8 +468,15 @@ static void kvmhv_flush_lpid(unsigned int lpid)
 		return;
 	}
 
-	rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
-				lpid, TLBIEL_INVAL_SET_LPID);
+	if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+		rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+					lpid, TLBIEL_INVAL_SET_LPID);
+	else
+		rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+					    H_RPTI_TYPE_NESTED |
+					    H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+					    H_RPTI_TYPE_PAT,
+					    H_RPTI_PAGE_ALL, 0, -1UL);
 	if (rc)
 		pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
 }
-- 
GitLab


From 399f8dd9a866e107639eabd3c1979cd526ca3a98 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 22 Jun 2021 01:08:30 +0200
Subject: [PATCH 3576/3804] signal: Prevent sigqueue caching after task got
 released

syzbot reported a memory leak related to sigqueue caching.

The assumption that a task cannot cache a sigqueue after the signal handler
has been dropped and exit_task_sigqueue_cache() has been invoked turns out
to be wrong.

Such a task can still invoke release_task(other_task), which cleans up the
signals of 'other_task' and ends up in sigqueue_cache_or_free(), which in
turn will cache the signal because task->sigqueue_cache is NULL. That's
obviously bogus because nothing will free the cached signal of that task
anymore, so the cached item is leaked.

This happens when e.g. the last non-leader thread exits and reaps the
zombie leader.

Prevent this by setting tsk::sigqueue_cache to an error pointer value in
exit_task_sigqueue_cache() which forces any subsequent invocation of
sigqueue_cache_or_free() from that task to hand the sigqueue back to the
kmemcache.

Add comments to all relevant places.

Fixes: 4bad58ebc8bc ("signal: Allow tasks to cache one sigqueue struct")
Reported-by: syzbot+0bac5fec63d4f399ba98@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Link: https://lore.kernel.org/r/878s32g6j5.ffs@nanos.tec.linutronix.de
---
 kernel/signal.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/kernel/signal.c b/kernel/signal.c
index f7c6ffcbd0440..f1ecd8f0c11d9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -435,6 +435,12 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
 		 * Preallocation does not hold sighand::siglock so it can't
 		 * use the cache. The lockless caching requires that only
 		 * one consumer and only one producer run at a time.
+		 *
+		 * For the regular allocation case it is sufficient to
+		 * check @q for NULL because this code can only be called
+		 * if the target task @t has not been reaped yet; which
+		 * means this code can never observe the error pointer which is
+		 * written to @t->sigqueue_cache in exit_task_sigqueue_cache().
 		 */
 		q = READ_ONCE(t->sigqueue_cache);
 		if (!q || sigqueue_flags)
@@ -463,13 +469,18 @@ void exit_task_sigqueue_cache(struct task_struct *tsk)
 	struct sigqueue *q = tsk->sigqueue_cache;
 
 	if (q) {
-		tsk->sigqueue_cache = NULL;
 		/*
 		 * Hand it back to the cache as the task might
 		 * be self reaping which would leak the object.
 		 */
 		 kmem_cache_free(sigqueue_cachep, q);
 	}
+
+	/*
+	 * Set an error pointer to ensure that @tsk will not cache a
+	 * sigqueue when it is reaping it's child tasks
+	 */
+	tsk->sigqueue_cache = ERR_PTR(-1);
 }
 
 static void sigqueue_cache_or_free(struct sigqueue *q)
@@ -481,6 +492,10 @@ static void sigqueue_cache_or_free(struct sigqueue *q)
 	 * is intentional when run without holding current->sighand->siglock,
 	 * which is fine as current obviously cannot run __sigqueue_free()
 	 * concurrently.
+	 *
+	 * The NULL check is safe even if current has been reaped already,
+	 * in which case exit_task_sigqueue_cache() wrote an error pointer
+	 * into current->sigqueue_cache.
 	 */
 	if (!READ_ONCE(current->sigqueue_cache))
 		WRITE_ONCE(current->sigqueue_cache, q);
-- 
GitLab


From 51696f39cbee5bb684e7959c0c98b5f54548aa34 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Mon, 21 Jun 2021 11:24:40 -0700
Subject: [PATCH 3577/3804] KVM: PPC: Book3S HV: Workaround high stack usage
 with clang

LLVM does not emit optimal byteswap assembly, which results in high
stack usage in kvmhv_enter_nested_guest() due to the inlining of
byteswap_pt_regs(). With LLVM 12.0.0:

arch/powerpc/kvm/book3s_hv_nested.c:289:6: error: stack frame size of
2512 bytes in function 'kvmhv_enter_nested_guest' [-Werror,-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
     ^
1 error generated.

While this gets fixed in LLVM, mark byteswap_pt_regs() as
noinline_for_stack so that it does not get inlined and break the build
due to -Werror by default in arch/powerpc/. Not inlining saves
approximately 800 bytes with LLVM 12.0.0:

arch/powerpc/kvm/book3s_hv_nested.c:290:6: warning: stack frame size of
1728 bytes in function 'kvmhv_enter_nested_guest' [-Wframe-larger-than=]
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
     ^
1 warning generated.

Cc: stable@vger.kernel.org # v4.20+
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://github.com/ClangBuiltLinux/linux/issues/1292
Link: https://bugs.llvm.org/show_bug.cgi?id=49610
Link: https://lore.kernel.org/r/202104031853.vDT0Qjqj-lkp@intel.com/
Link: https://gist.github.com/ba710e3703bf45043a31e2806c843ffd
Link: https://lore.kernel.org/r/20210621182440.990242-1-nathan@kernel.org
---
 arch/powerpc/kvm/book3s_hv_nested.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 3a06ac0b53e2d..8543ad538b0c3 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -54,7 +54,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
 	hr->dawrx1 = vcpu->arch.dawrx1;
 }
 
-static void byteswap_pt_regs(struct pt_regs *regs)
+/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
+static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
 {
 	unsigned long *addr = (unsigned long *) regs;
 
-- 
GitLab


From 64ab7071254c178e81a6d0203354aad6521258ea Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Tue, 8 Jun 2021 10:43:05 +0800
Subject: [PATCH 3578/3804] clockevents: Add missing parameter documentation

Add the missing documentation for the @cpu parameter of
tick_cleanup_dead_cpu().

Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210608024305.2750999-1-libaokun1@huawei.com
---
 kernel/time/clockevents.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 0056d2bed53ed..bb9d2fe584804 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -629,6 +629,7 @@ void tick_offline_cpu(unsigned int cpu)
 
 /**
  * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
+ * @cpu:	The dead CPU
  */
 void tick_cleanup_dead_cpu(int cpu)
 {
-- 
GitLab


From fecfcbc288e9f4923f40fd23ca78a6acdc7fdf6c Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vincent.donnefort@arm.com>
Date: Mon, 21 Jun 2021 11:37:51 +0100
Subject: [PATCH 3579/3804] sched/rt: Fix RT utilization tracking during policy
 change

RT keeps track of the utilization on a per-rq basis with the structure
avg_rt. This utilization is updated during task_tick_rt(),
put_prev_task_rt() and set_next_task_rt(). However, when the current
running task changes its policy, set_next_task_rt() which would usually
take care of updating the utilization when the rq starts running RT tasks,
will not see a such change, leaving the avg_rt structure outdated. When
that very same task will be dequeued later, put_prev_task_rt() will then
update the utilization, based on a wrong last_update_time, leading to a
huge spike in the RT utilization signal.

The signal would eventually recover from this issue after few ms. Even if
no RT tasks are run, avg_rt is also updated in __update_blocked_others().
But as the CPU capacity depends partly on the avg_rt, this issue has
nonetheless a significant impact on the scheduler.

Fix this issue by ensuring a load update when a running task changes
its policy to RT.

Fixes: 371bf427 ("sched/rt: Add rt_rq utilization tracking")
Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/1624271872-211872-2-git-send-email-vincent.donnefort@arm.com
---
 kernel/sched/rt.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a5254471371c2..3daf42a0f4623 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2341,13 +2341,20 @@ void __init init_sched_rt_class(void)
 static void switched_to_rt(struct rq *rq, struct task_struct *p)
 {
 	/*
-	 * If we are already running, then there's nothing
-	 * that needs to be done. But if we are not running
-	 * we may need to preempt the current running task.
-	 * If that current running task is also an RT task
+	 * If we are running, update the avg_rt tracking, as the running time
+	 * will now on be accounted into the latter.
+	 */
+	if (task_current(rq, p)) {
+		update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
+		return;
+	}
+
+	/*
+	 * If we are not running we may need to preempt the current
+	 * running task. If that current running task is also an RT task
 	 * then see if we can move to another run queue.
 	 */
-	if (task_on_rq_queued(p) && rq->curr != p) {
+	if (task_on_rq_queued(p)) {
 #ifdef CONFIG_SMP
 		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
 			rt_queue_push_tasks(rq);
-- 
GitLab


From d7d607096ae6d378b4e92d49946d22739c047d4c Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vincent.donnefort@arm.com>
Date: Mon, 21 Jun 2021 11:37:52 +0100
Subject: [PATCH 3580/3804] sched/rt: Fix Deadline utilization tracking during
 policy change

DL keeps track of the utilization on a per-rq basis with the structure
avg_dl. This utilization is updated during task_tick_dl(),
put_prev_task_dl() and set_next_task_dl(). However, when the current
running task changes its policy, set_next_task_dl() which would usually
take care of updating the utilization when the rq starts running DL
tasks, will not see a such change, leaving the avg_dl structure outdated.
When that very same task will be dequeued later, put_prev_task_dl() will
then update the utilization, based on a wrong last_update_time, leading to
a huge spike in the DL utilization signal.

The signal would eventually recover from this issue after few ms. Even
if no DL tasks are run, avg_dl is also updated in
__update_blocked_others(). But as the CPU capacity depends partly on the
avg_dl, this issue has nonetheless a significant impact on the scheduler.

Fix this issue by ensuring a load update when a running task changes
its policy to DL.

Fixes: 3727e0e ("sched/dl: Add dl_rq utilization tracking")
Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/1624271872-211872-3-git-send-email-vincent.donnefort@arm.com
---
 kernel/sched/deadline.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 22878cd5bd706..aaacd6cfd42f0 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2497,6 +2497,8 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 			check_preempt_curr_dl(rq, p, 0);
 		else
 			resched_curr(rq);
+	} else {
+		update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
 	}
 }
 
-- 
GitLab


From 0213b7083e81f4acd69db32cb72eb4e5f220329a Mon Sep 17 00:00:00 2001
From: Qais Yousef <qais.yousef@arm.com>
Date: Thu, 17 Jun 2021 17:51:55 +0100
Subject: [PATCH 3581/3804] sched/uclamp: Fix uclamp_tg_restrict()

Now cpu.uclamp.min acts as a protection, we need to make sure that the
uclamp request of the task is within the allowed range of the cgroup,
that is it is clamp()'ed correctly by tg->uclamp[UCLAMP_MIN] and
tg->uclamp[UCLAMP_MAX].

As reported by Xuewen [1] we can have some corner cases where there's
inversion between uclamp requested by task (p) and the uclamp values of
the taskgroup it's attached to (tg). Following table demonstrates
2 corner cases:

	           |  p  |  tg  |  effective
	-----------+-----+------+-----------
	CASE 1
	-----------+-----+------+-----------
	uclamp_min | 60% | 0%   |  60%
	-----------+-----+------+-----------
	uclamp_max | 80% | 50%  |  50%
	-----------+-----+------+-----------
	CASE 2
	-----------+-----+------+-----------
	uclamp_min | 0%  | 30%  |  30%
	-----------+-----+------+-----------
	uclamp_max | 20% | 50%  |  20%
	-----------+-----+------+-----------

With this fix we get:

	           |  p  |  tg  |  effective
	-----------+-----+------+-----------
	CASE 1
	-----------+-----+------+-----------
	uclamp_min | 60% | 0%   |  50%
	-----------+-----+------+-----------
	uclamp_max | 80% | 50%  |  50%
	-----------+-----+------+-----------
	CASE 2
	-----------+-----+------+-----------
	uclamp_min | 0%  | 30%  |  30%
	-----------+-----+------+-----------
	uclamp_max | 20% | 50%  |  30%
	-----------+-----+------+-----------

Additionally uclamp_update_active_tasks() must now unconditionally
update both UCLAMP_MIN/MAX because changing the tg's UCLAMP_MAX for
instance could have an impact on the effective UCLAMP_MIN of the tasks.

	           |  p  |  tg  |  effective
	-----------+-----+------+-----------
	old
	-----------+-----+------+-----------
	uclamp_min | 60% | 0%   |  50%
	-----------+-----+------+-----------
	uclamp_max | 80% | 50%  |  50%
	-----------+-----+------+-----------
	*new*
	-----------+-----+------+-----------
	uclamp_min | 60% | 0%   | *60%*
	-----------+-----+------+-----------
	uclamp_max | 80% |*70%* | *70%*
	-----------+-----+------+-----------

[1] https://lore.kernel.org/lkml/CAB8ipk_a6VFNjiEnHRHkUMBKbA+qzPQvhtNjJ_YNzQhqV_o8Zw@mail.gmail.com/

Fixes: 0c18f2ecfcc2 ("sched/uclamp: Fix wrong implementation of cpu.uclamp.min")
Reported-by: Xuewen Yan <xuewen.yan94@gmail.com>
Signed-off-by: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210617165155.3774110-1-qais.yousef@arm.com
---
 kernel/sched/core.c | 49 +++++++++++++++++----------------------------
 1 file changed, 18 insertions(+), 31 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 309745a7ec514..fc231d61bcda8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1403,8 +1403,10 @@ static void uclamp_sync_util_min_rt_default(void)
 static inline struct uclamp_se
 uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
 {
+	/* Copy by value as we could modify it */
 	struct uclamp_se uc_req = p->uclamp_req[clamp_id];
 #ifdef CONFIG_UCLAMP_TASK_GROUP
+	unsigned int tg_min, tg_max, value;
 
 	/*
 	 * Tasks in autogroups or root task group will be
@@ -1415,23 +1417,11 @@ uclamp_tg_restrict(struct task_struct *p, enum uclamp_id clamp_id)
 	if (task_group(p) == &root_task_group)
 		return uc_req;
 
-	switch (clamp_id) {
-	case UCLAMP_MIN: {
-		struct uclamp_se uc_min = task_group(p)->uclamp[clamp_id];
-		if (uc_req.value < uc_min.value)
-			return uc_min;
-		break;
-	}
-	case UCLAMP_MAX: {
-		struct uclamp_se uc_max = task_group(p)->uclamp[clamp_id];
-		if (uc_req.value > uc_max.value)
-			return uc_max;
-		break;
-	}
-	default:
-		WARN_ON_ONCE(1);
-		break;
-	}
+	tg_min = task_group(p)->uclamp[UCLAMP_MIN].value;
+	tg_max = task_group(p)->uclamp[UCLAMP_MAX].value;
+	value = uc_req.value;
+	value = clamp(value, tg_min, tg_max);
+	uclamp_se_set(&uc_req, value, false);
 #endif
 
 	return uc_req;
@@ -1630,8 +1620,9 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
 }
 
 static inline void
-uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
+uclamp_update_active(struct task_struct *p)
 {
+	enum uclamp_id clamp_id;
 	struct rq_flags rf;
 	struct rq *rq;
 
@@ -1651,9 +1642,11 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
 	 * affecting a valid clamp bucket, the next time it's enqueued,
 	 * it will already see the updated clamp bucket value.
 	 */
-	if (p->uclamp[clamp_id].active) {
-		uclamp_rq_dec_id(rq, p, clamp_id);
-		uclamp_rq_inc_id(rq, p, clamp_id);
+	for_each_clamp_id(clamp_id) {
+		if (p->uclamp[clamp_id].active) {
+			uclamp_rq_dec_id(rq, p, clamp_id);
+			uclamp_rq_inc_id(rq, p, clamp_id);
+		}
 	}
 
 	task_rq_unlock(rq, p, &rf);
@@ -1661,20 +1654,14 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
 
 #ifdef CONFIG_UCLAMP_TASK_GROUP
 static inline void
-uclamp_update_active_tasks(struct cgroup_subsys_state *css,
-			   unsigned int clamps)
+uclamp_update_active_tasks(struct cgroup_subsys_state *css)
 {
-	enum uclamp_id clamp_id;
 	struct css_task_iter it;
 	struct task_struct *p;
 
 	css_task_iter_start(css, 0, &it);
-	while ((p = css_task_iter_next(&it))) {
-		for_each_clamp_id(clamp_id) {
-			if ((0x1 << clamp_id) & clamps)
-				uclamp_update_active(p, clamp_id);
-		}
-	}
+	while ((p = css_task_iter_next(&it)))
+		uclamp_update_active(p);
 	css_task_iter_end(&it);
 }
 
@@ -9634,7 +9621,7 @@ static void cpu_util_update_eff(struct cgroup_subsys_state *css)
 		}
 
 		/* Immediately update descendants RUNNABLE tasks */
-		uclamp_update_active_tasks(css, clamps);
+		uclamp_update_active_tasks(css);
 	}
 }
 
-- 
GitLab


From 69c7a5fb2482636f525f016c8333fdb9111ecb9d Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Sat, 19 Jun 2021 01:01:07 +0800
Subject: [PATCH 3582/3804] locking/lockdep: Fix the dep path printing for
 backwards BFS

We use the same code to print backwards lock dependency path as the
forwards lock dependency path, and this could result into incorrect
printing because for a backwards lock_list ->trace is not the call trace
where the lock of ->class is acquired.

Fix this by introducing a separate function on printing the backwards
dependency path. Also add a few comments about the printing while we are
at it.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210618170110.3699115-2-boqun.feng@gmail.com
---
 kernel/locking/lockdep.c | 108 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 106 insertions(+), 2 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 48d736aa03b24..3b32cd9cdfd05 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2304,7 +2304,56 @@ static void print_lock_class_header(struct lock_class *class, int depth)
 }
 
 /*
- * printk the shortest lock dependencies from @start to @end in reverse order:
+ * Dependency path printing:
+ *
+ * After BFS we get a lock dependency path (linked via ->parent of lock_list),
+ * printing out each lock in the dependency path will help on understanding how
+ * the deadlock could happen. Here are some details about dependency path
+ * printing:
+ *
+ * 1)	A lock_list can be either forwards or backwards for a lock dependency,
+ * 	for a lock dependency A -> B, there are two lock_lists:
+ *
+ * 	a)	lock_list in the ->locks_after list of A, whose ->class is B and
+ * 		->links_to is A. In this case, we can say the lock_list is
+ * 		"A -> B" (forwards case).
+ *
+ * 	b)	lock_list in the ->locks_before list of B, whose ->class is A
+ * 		and ->links_to is B. In this case, we can say the lock_list is
+ * 		"B <- A" (bacwards case).
+ *
+ * 	The ->trace of both a) and b) point to the call trace where B was
+ * 	acquired with A held.
+ *
+ * 2)	A "helper" lock_list is introduced during BFS, this lock_list doesn't
+ * 	represent a certain lock dependency, it only provides an initial entry
+ * 	for BFS. For example, BFS may introduce a "helper" lock_list whose
+ * 	->class is A, as a result BFS will search all dependencies starting with
+ * 	A, e.g. A -> B or A -> C.
+ *
+ * 	The notation of a forwards helper lock_list is like "-> A", which means
+ * 	we should search the forwards dependencies starting with "A", e.g A -> B
+ * 	or A -> C.
+ *
+ * 	The notation of a bacwards helper lock_list is like "<- B", which means
+ * 	we should search the backwards dependencies ending with "B", e.g.
+ * 	B <- A or B <- C.
+ */
+
+/*
+ * printk the shortest lock dependencies from @root to @leaf in reverse order.
+ *
+ * We have a lock dependency path as follow:
+ *
+ *    @root                                                                 @leaf
+ *      |                                                                     |
+ *      V                                                                     V
+ *	          ->parent                                   ->parent
+ * | lock_list | <--------- | lock_list | ... | lock_list  | <--------- | lock_list |
+ * |    -> L1  |            | L1 -> L2  | ... |Ln-2 -> Ln-1|            | Ln-1 -> Ln|
+ *
+ * , so it's natural that we start from @leaf and print every ->class and
+ * ->trace until we reach the @root.
  */
 static void __used
 print_shortest_lock_dependencies(struct lock_list *leaf,
@@ -2332,6 +2381,61 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
 	} while (entry && (depth >= 0));
 }
 
+/*
+ * printk the shortest lock dependencies from @leaf to @root.
+ *
+ * We have a lock dependency path (from a backwards search) as follow:
+ *
+ *    @leaf                                                                 @root
+ *      |                                                                     |
+ *      V                                                                     V
+ *	          ->parent                                   ->parent
+ * | lock_list | ---------> | lock_list | ... | lock_list  | ---------> | lock_list |
+ * | L2 <- L1  |            | L3 <- L2  | ... | Ln <- Ln-1 |            |    <- Ln  |
+ *
+ * , so when we iterate from @leaf to @root, we actually print the lock
+ * dependency path L1 -> L2 -> .. -> Ln in the non-reverse order.
+ *
+ * Another thing to notice here is that ->class of L2 <- L1 is L1, while the
+ * ->trace of L2 <- L1 is the call trace of L2, in fact we don't have the call
+ * trace of L1 in the dependency path, which is alright, because most of the
+ * time we can figure out where L1 is held from the call trace of L2.
+ */
+static void __used
+print_shortest_lock_dependencies_backwards(struct lock_list *leaf,
+					   struct lock_list *root)
+{
+	struct lock_list *entry = leaf;
+	const struct lock_trace *trace = NULL;
+	int depth;
+
+	/*compute depth from generated tree by BFS*/
+	depth = get_lock_depth(leaf);
+
+	do {
+		print_lock_class_header(entry->class, depth);
+		if (trace) {
+			printk("%*s ... acquired at:\n", depth, "");
+			print_lock_trace(trace, 2);
+			printk("\n");
+		}
+
+		/*
+		 * Record the pointer to the trace for the next lock_list
+		 * entry, see the comments for the function.
+		 */
+		trace = entry->trace;
+
+		if (depth == 0 && (entry != root)) {
+			printk("lockdep:%s bad path found in chain graph\n", __func__);
+			break;
+		}
+
+		entry = get_lock_parent(entry);
+		depth--;
+	} while (entry && (depth >= 0));
+}
+
 static void
 print_irq_lock_scenario(struct lock_list *safe_entry,
 			struct lock_list *unsafe_entry,
@@ -2449,7 +2553,7 @@ print_bad_irq_dependency(struct task_struct *curr,
 	prev_root->trace = save_trace();
 	if (!prev_root->trace)
 		return;
-	print_shortest_lock_dependencies(backwards_entry, prev_root);
+	print_shortest_lock_dependencies_backwards(backwards_entry, prev_root);
 
 	pr_warn("\nthe dependencies between the lock to be acquired");
 	pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
-- 
GitLab


From d4c157c7b1a67a0844a904baaca9a840c196c103 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Sat, 19 Jun 2021 01:01:08 +0800
Subject: [PATCH 3583/3804] locking/lockdep: Remove the unnecessary trace
 saving

In print_bad_irq_dependency(), save_trace() is called to set the ->trace
for @prev_root as the current call trace, however @prev_root corresponds
to the the held lock, which may not be acquired in current call trace,
therefore it's wrong to use save_trace() to set ->trace of @prev_root.
Moreover, with our adjustment of printing backwards dependency path, the
->trace of @prev_root is unncessary, so remove it.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210618170110.3699115-3-boqun.feng@gmail.com
---
 kernel/locking/lockdep.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 3b32cd9cdfd05..74d084a398be9 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2550,9 +2550,6 @@ print_bad_irq_dependency(struct task_struct *curr,
 	lockdep_print_held_locks(curr);
 
 	pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
-	prev_root->trace = save_trace();
-	if (!prev_root->trace)
-		return;
 	print_shortest_lock_dependencies_backwards(backwards_entry, prev_root);
 
 	pr_warn("\nthe dependencies between the lock to be acquired");
-- 
GitLab


From 7b1f8c6179769af6ffa055e1169610b51d71edd5 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Sat, 19 Jun 2021 01:01:09 +0800
Subject: [PATCH 3584/3804] lockding/lockdep: Avoid to find wrong lock dep path
 in check_irq_usage()

In the step #3 of check_irq_usage(), we seach backwards to find a lock
whose usage conflicts the usage of @target_entry1 on safe/unsafe.
However, we should only keep the irq-unsafe usage of @target_entry1 into
consideration, because it could be a case where a lock is hardirq-unsafe
but soft-safe, and in check_irq_usage() we find it because its
hardirq-unsafe could result into a hardirq-safe-unsafe deadlock, but
currently since we don't filter out the other usage bits, so we may find
a lock dependency path softirq-unsafe -> softirq-safe, which in fact
doesn't cause a deadlock. And this may cause misleading lockdep splats.

Fix this by only keeping LOCKF_ENABLED_IRQ_ALL bits when we try the
backwards search.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210618170110.3699115-4-boqun.feng@gmail.com
---
 kernel/locking/lockdep.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 74d084a398be9..6ff1e8405a834 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2768,8 +2768,18 @@ static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
 	 * Step 3: we found a bad match! Now retrieve a lock from the backward
 	 * list whose usage mask matches the exclusive usage mask from the
 	 * lock found on the forward list.
+	 *
+	 * Note, we should only keep the LOCKF_ENABLED_IRQ_ALL bits, considering
+	 * the follow case:
+	 *
+	 * When trying to add A -> B to the graph, we find that there is a
+	 * hardirq-safe L, that L -> ... -> A, and another hardirq-unsafe M,
+	 * that B -> ... -> M. However M is **softirq-safe**, if we use exact
+	 * invert bits of M's usage_mask, we will find another lock N that is
+	 * **softirq-unsafe** and N -> ... -> A, however N -> .. -> M will not
+	 * cause a inversion deadlock.
 	 */
-	backward_mask = original_mask(target_entry1->class->usage_mask);
+	backward_mask = original_mask(target_entry1->class->usage_mask & LOCKF_ENABLED_IRQ_ALL);
 
 	ret = find_usage_backwards(&this, backward_mask, &target_entry);
 	if (bfs_error(ret)) {
-- 
GitLab


From 8946ccc25ed22d957ca7f0b6fac1dcf6d25eaf1f Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Sat, 19 Jun 2021 01:01:10 +0800
Subject: [PATCH 3585/3804] locking/selftests: Add a selftest for
 check_irq_usage()

Johannes Berg reported a lockdep problem which could be reproduced by
the special test case introduced in this patch, so add it.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210618170110.3699115-5-boqun.feng@gmail.com
---
 lib/locking-selftest.c | 65 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 2d85abac17448..5c50b09103963 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -53,6 +53,7 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose);
 #define LOCKTYPE_WW	0x10
 #define LOCKTYPE_RTMUTEX 0x20
 #define LOCKTYPE_LL	0x40
+#define LOCKTYPE_SPECIAL 0x80
 
 static struct ww_acquire_ctx t, t2;
 static struct ww_mutex o, o2, o3;
@@ -2744,6 +2745,66 @@ static void local_lock_tests(void)
 	pr_cont("\n");
 }
 
+static void hardirq_deadlock_softirq_not_deadlock(void)
+{
+	/* mutex_A is hardirq-unsafe and softirq-unsafe */
+	/* mutex_A -> lock_C */
+	mutex_lock(&mutex_A);
+	HARDIRQ_DISABLE();
+	spin_lock(&lock_C);
+	spin_unlock(&lock_C);
+	HARDIRQ_ENABLE();
+	mutex_unlock(&mutex_A);
+
+	/* lock_A is hardirq-safe */
+	HARDIRQ_ENTER();
+	spin_lock(&lock_A);
+	spin_unlock(&lock_A);
+	HARDIRQ_EXIT();
+
+	/* lock_A -> lock_B */
+	HARDIRQ_DISABLE();
+	spin_lock(&lock_A);
+	spin_lock(&lock_B);
+	spin_unlock(&lock_B);
+	spin_unlock(&lock_A);
+	HARDIRQ_ENABLE();
+
+	/* lock_B -> lock_C */
+	HARDIRQ_DISABLE();
+	spin_lock(&lock_B);
+	spin_lock(&lock_C);
+	spin_unlock(&lock_C);
+	spin_unlock(&lock_B);
+	HARDIRQ_ENABLE();
+
+	/* lock_D is softirq-safe */
+	SOFTIRQ_ENTER();
+	spin_lock(&lock_D);
+	spin_unlock(&lock_D);
+	SOFTIRQ_EXIT();
+
+	/* And lock_D is hardirq-unsafe */
+	SOFTIRQ_DISABLE();
+	spin_lock(&lock_D);
+	spin_unlock(&lock_D);
+	SOFTIRQ_ENABLE();
+
+	/*
+	 * mutex_A -> lock_C -> lock_D is softirq-unsafe -> softirq-safe, not
+	 * deadlock.
+	 *
+	 * lock_A -> lock_B -> lock_C -> lock_D is hardirq-safe ->
+	 * hardirq-unsafe, deadlock.
+	 */
+	HARDIRQ_DISABLE();
+	spin_lock(&lock_C);
+	spin_lock(&lock_D);
+	spin_unlock(&lock_D);
+	spin_unlock(&lock_C);
+	HARDIRQ_ENABLE();
+}
+
 void locking_selftest(void)
 {
 	/*
@@ -2872,6 +2933,10 @@ void locking_selftest(void)
 
 	local_lock_tests();
 
+	print_testname("hardirq_unsafe_softirq_safe");
+	dotest(hardirq_deadlock_softirq_not_deadlock, FAILURE, LOCKTYPE_SPECIAL);
+	pr_cont("\n");
+
 	if (unexpected_testcase_failures) {
 		printk("-----------------------------------------------------------------\n");
 		debug_locks = 0;
-- 
GitLab


From f8b298cc39f0619544c607eaef09fd0b2afd10f3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 17 Jun 2021 20:57:18 +0200
Subject: [PATCH 3586/3804] lockdep: Fix wait-type for empty stack

Even the very first lock can violate the wait-context check, consider
the various IRQ contexts.

Fixes: de8f5e4f2dc1 ("lockdep: Introduce wait-type checks")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Joerg Roedel <jroedel@suse.de>
Link: https://lore.kernel.org/r/20210617190313.256987481@infradead.org
---
 kernel/locking/lockdep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 6ff1e8405a834..0584b2090084a 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4688,7 +4688,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
 	u8 curr_inner;
 	int depth;
 
-	if (!curr->lockdep_depth || !next_inner || next->trylock)
+	if (!next_inner || next->trylock)
 		return 0;
 
 	if (!next_outer)
-- 
GitLab


From c0c2c0dad6a06e0c05e9a52d65f932bd54364c97 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 17 Jun 2021 20:57:19 +0200
Subject: [PATCH 3587/3804] lockdep/selftests: Fix selftests vs
 PROVE_RAW_LOCK_NESTING

When PROVE_RAW_LOCK_NESTING=y many of the selftests FAILED because
HARDIRQ context is out-of-bounds for spinlocks. Instead make the
default hardware context the threaded hardirq context, which preserves
the old locking rules.

The wait-type specific locking selftests will have a non-threaded
HARDIRQ variant.

Fixes: de8f5e4f2dc1 ("lockdep: Introduce wait-type checks")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Joerg Roedel <jroedel@suse.de>
Link: https://lore.kernel.org/r/20210617190313.322096283@infradead.org
---
 lib/locking-selftest.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 5c50b09103963..af12e848a3edf 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -195,6 +195,7 @@ static void init_shared_classes(void)
 #define HARDIRQ_ENTER()				\
 	local_irq_disable();			\
 	__irq_enter();				\
+	lockdep_hardirq_threaded();		\
 	WARN_ON(!in_irq());
 
 #define HARDIRQ_EXIT()				\
-- 
GitLab


From 1a8122960484b19d8d887fb32e1cf42be5647533 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 17 Jun 2021 20:57:20 +0200
Subject: [PATCH 3588/3804] lockdep/selftest: Remove wait-type RCU_CALLBACK
 tests

The problem is that rcu_callback_map doesn't have wait_types defined,
and doing so would make it indistinguishable from SOFTIRQ in any case.
Remove it.

Fixes: 9271a40d2a14 ("lockdep/selftest: Add wait context selftests")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Joerg Roedel <jroedel@suse.de>
Link: https://lore.kernel.org/r/20210617190313.384290291@infradead.org
---
 lib/locking-selftest.c | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index af12e848a3edf..161108e5d2fe0 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -2494,16 +2494,6 @@ static void rcu_sched_exit(int *_)
 	int rcu_sched_guard_##name __guard(rcu_sched_exit);	\
 	rcu_read_lock_sched();
 
-static void rcu_callback_exit(int *_)
-{
-	rcu_lock_release(&rcu_callback_map);
-}
-
-#define RCU_CALLBACK_CONTEXT(name, ...)					\
-	int rcu_callback_guard_##name __guard(rcu_callback_exit);	\
-	rcu_lock_acquire(&rcu_callback_map);
-
-
 static void raw_spinlock_exit(raw_spinlock_t **lock)
 {
 	raw_spin_unlock(*lock);
@@ -2560,8 +2550,6 @@ static void __maybe_unused inner##_in_##outer(void)				\
  * ---------------+-------+----------+------+-------
  * RCU_BH         |   o   |    o     |  o   |  x
  * ---------------+-------+----------+------+-------
- * RCU_CALLBACK   |   o   |    o     |  o   |  x
- * ---------------+-------+----------+------+-------
  * RCU_SCHED      |   o   |    o     |  x   |  x
  * ---------------+-------+----------+------+-------
  * RAW_SPIN       |   o   |    o     |  x   |  x
@@ -2578,7 +2566,6 @@ GENERATE_2_CONTEXT_TESTCASE(NOTTHREADED_HARDIRQ, , inner, inner_lock)		\
 GENERATE_2_CONTEXT_TESTCASE(SOFTIRQ, , inner, inner_lock)			\
 GENERATE_2_CONTEXT_TESTCASE(RCU, , inner, inner_lock)				\
 GENERATE_2_CONTEXT_TESTCASE(RCU_BH, , inner, inner_lock)			\
-GENERATE_2_CONTEXT_TESTCASE(RCU_CALLBACK, , inner, inner_lock)			\
 GENERATE_2_CONTEXT_TESTCASE(RCU_SCHED, , inner, inner_lock)			\
 GENERATE_2_CONTEXT_TESTCASE(RAW_SPINLOCK, raw_lock_A, inner, inner_lock)	\
 GENERATE_2_CONTEXT_TESTCASE(SPINLOCK, lock_A, inner, inner_lock)		\
@@ -2640,10 +2627,6 @@ static void wait_context_tests(void)
 	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(RCU_BH);
 	pr_cont("\n");
 
-	print_testname("in RCU callback context");
-	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(RCU_CALLBACK);
-	pr_cont("\n");
-
 	print_testname("in RCU-sched context");
 	DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(RCU_SCHED);
 	pr_cont("\n");
-- 
GitLab


From e112c41341c03d9224a9fc522bdb3539bc849b56 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 22 Apr 2021 21:44:22 +0200
Subject: [PATCH 3589/3804] futex: Prepare futex_lock_pi() for runtime clock
 selection

futex_lock_pi() is the only futex operation which cannot select the clock
for timeouts (CLOCK_MONOTONIC/CLOCK_REALTIME). That's inconsistent and
there is no particular reason why this cannot be supported.

This was overlooked when CLOCK_REALTIME_FLAG was introduced and
unfortunately not reported when the inconsistency was discovered in glibc.

Prepare the function and enforce the CLOCK_REALTIME_FLAG on FUTEX_LOCK_PI
so that a new FUTEX_LOCK_PI2 can implement it correctly.

Reported-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210422194705.338657741@linutronix.de
---
 kernel/futex.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 08008c225bec0..f820439a8aae7 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2783,7 +2783,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
 	if (refill_pi_state_cache())
 		return -ENOMEM;
 
-	to = futex_setup_timer(time, &timeout, FLAGS_CLOCKRT, 0);
+	to = futex_setup_timer(time, &timeout, flags, 0);
 
 retry:
 	ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
@@ -3739,6 +3739,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 	case FUTEX_WAKE_OP:
 		return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
 	case FUTEX_LOCK_PI:
+		flags |= FLAGS_CLOCKRT;
 		return futex_lock_pi(uaddr, flags, timeout, 0);
 	case FUTEX_UNLOCK_PI:
 		return futex_unlock_pi(uaddr, flags);
-- 
GitLab


From bf22a6976897977b0a3f1aeba6823c959fc4fdae Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 22 Apr 2021 21:44:23 +0200
Subject: [PATCH 3590/3804] futex: Provide FUTEX_LOCK_PI2 to support clock
 selection

The FUTEX_LOCK_PI futex operand uses a CLOCK_REALTIME based absolute
timeout since it was implemented, but it does not require that the
FUTEX_CLOCK_REALTIME flag is set, because that was introduced later.

In theory as none of the user space implementations can set the
FUTEX_CLOCK_REALTIME flag on this operand, it would be possible to
creatively abuse it and make the meaning invers, i.e. select CLOCK_REALTIME
when not set and CLOCK_MONOTONIC when set. But that's a nasty hackery.

Another option would be to have a new FUTEX_CLOCK_MONOTONIC flag only for
FUTEX_LOCK_PI, but that's also awkward because it does not allow libraries
to handle the timeout clock selection consistently.

So provide a new FUTEX_LOCK_PI2 operand which implements the timeout
semantics which the other operands use and leave FUTEX_LOCK_PI alone.

Reported-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210422194705.440773992@linutronix.de
---
 include/uapi/linux/futex.h | 2 ++
 kernel/futex.c             | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index a89eb0accd5e2..235e5b2facaa4 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -21,6 +21,7 @@
 #define FUTEX_WAKE_BITSET	10
 #define FUTEX_WAIT_REQUEUE_PI	11
 #define FUTEX_CMP_REQUEUE_PI	12
+#define FUTEX_LOCK_PI2		13
 
 #define FUTEX_PRIVATE_FLAG	128
 #define FUTEX_CLOCK_REALTIME	256
@@ -32,6 +33,7 @@
 #define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAKE_OP_PRIVATE	(FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG)
 #define FUTEX_LOCK_PI_PRIVATE	(FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
+#define FUTEX_LOCK_PI2_PRIVATE	(FUTEX_LOCK_PI2 | FUTEX_PRIVATE_FLAG)
 #define FUTEX_UNLOCK_PI_PRIVATE	(FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAIT_BITSET_PRIVATE	(FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG)
diff --git a/kernel/futex.c b/kernel/futex.c
index f820439a8aae7..f832b64346256 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3707,12 +3707,14 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 
 	if (op & FUTEX_CLOCK_REALTIME) {
 		flags |= FLAGS_CLOCKRT;
-		if (cmd != FUTEX_WAIT_BITSET &&	cmd != FUTEX_WAIT_REQUEUE_PI)
+		if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
+		    cmd != FUTEX_LOCK_PI2)
 			return -ENOSYS;
 	}
 
 	switch (cmd) {
 	case FUTEX_LOCK_PI:
+	case FUTEX_LOCK_PI2:
 	case FUTEX_UNLOCK_PI:
 	case FUTEX_TRYLOCK_PI:
 	case FUTEX_WAIT_REQUEUE_PI:
@@ -3740,6 +3742,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 		return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
 	case FUTEX_LOCK_PI:
 		flags |= FLAGS_CLOCKRT;
+		fallthrough;
+	case FUTEX_LOCK_PI2:
 		return futex_lock_pi(uaddr, flags, timeout, 0);
 	case FUTEX_UNLOCK_PI:
 		return futex_unlock_pi(uaddr, flags);
@@ -3760,6 +3764,7 @@ static __always_inline bool futex_cmd_has_timeout(u32 cmd)
 	switch (cmd) {
 	case FUTEX_WAIT:
 	case FUTEX_LOCK_PI:
+	case FUTEX_LOCK_PI2:
 	case FUTEX_WAIT_BITSET:
 	case FUTEX_WAIT_REQUEUE_PI:
 		return true;
-- 
GitLab


From 0e8a89d49d45197770f2e57fb15f1bc9ded96eb0 Mon Sep 17 00:00:00 2001
From: Xiongwei Song <sxwjean@gmail.com>
Date: Fri, 18 Jun 2021 21:02:30 +0800
Subject: [PATCH 3591/3804] locking/lockdep: Correct the description error for
 check_redundant()

If there is no matched result, check_redundant() will return BFS_RNOMATCH.

Signed-off-by: Xiongwei Song <sxwjean@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lkml.kernel.org/r/20210618130230.123249-1-sxwjean@me.com
---
 kernel/locking/lockdep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 0584b2090084a..095c87f97a313 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2829,7 +2829,7 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
  * <target> or not. If it can, <src> -> <target> dependency is already
  * in the graph.
  *
- * Return BFS_RMATCH if it does, or BFS_RMATCH if it does not, return BFS_E* if
+ * Return BFS_RMATCH if it does, or BFS_RNOMATCH if it does not, return BFS_E* if
  * any error appears in the bfs search.
  */
 static noinline enum bfs_result
-- 
GitLab


From db3a34e17433de2390eb80d436970edcebd0ca3e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:19 -0700
Subject: [PATCH 3592/3804] clocksource: Retry clock read if long delays
 detected

When the clocksource watchdog marks a clock as unstable, this might be due
to that clock being unstable or it might be due to delays that happen to
occur between the reads of the two clocks.  Yes, interrupts are disabled
across those two reads, but there are no shortage of things that can delay
interrupts-disabled regions of code ranging from SMI handlers to vCPU
preemption.  It would be good to have some indication as to why the clock
was marked unstable.

Therefore, re-read the watchdog clock on either side of the read from the
clock under test.  If the watchdog clock shows an excessive time delta
between its pair of reads, the reads are retried.

The maximum number of retries is specified by a new kernel boot parameter
clocksource.max_cswd_read_retries, which defaults to three, that is, up to
four reads, one initial and up to three retries.  If more than one retry
was required, a message is printed on the console (the occasional single
retry is expected behavior, especially in guest OSes).  If the maximum
number of retries is exceeded, the clock under test will be marked
unstable.  However, the probability of this happening due to various sorts
of delays is quite small.  In addition, the reason (clock-read delays) for
the unstable marking will be apparent.

Reported-by: Chris Mason <clm@fb.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-1-paulmck@kernel.org
---
 .../admin-guide/kernel-parameters.txt         |  6 +++
 kernel/time/clocksource.c                     | 53 ++++++++++++++++---
 2 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index cb89dbdedc463..995deccc28bcd 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -581,6 +581,12 @@
 			loops can be debugged more effectively on production
 			systems.
 
+	clocksource.max_cswd_read_retries= [KNL]
+			Number of clocksource_watchdog() retries due to
+			external delays before the clock will be marked
+			unstable.  Defaults to three retries, that is,
+			four attempts to read the clock under test.
+
 	clearcpuid=BITNUM[,BITNUM...] [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 2cd902592fc1f..43243f2be98e9 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -124,6 +124,13 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating);
 #define WATCHDOG_INTERVAL (HZ >> 1)
 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 
+/*
+ * Maximum permissible delay between two readouts of the watchdog
+ * clocksource surrounding a read of the clocksource being validated.
+ * This delay could be due to SMIs, NMIs, or to VCPU preemptions.
+ */
+#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
+
 static void clocksource_watchdog_work(struct work_struct *work)
 {
 	/*
@@ -184,12 +191,45 @@ void clocksource_mark_unstable(struct clocksource *cs)
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
+static ulong max_cswd_read_retries = 3;
+module_param(max_cswd_read_retries, ulong, 0644);
+
+static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+{
+	unsigned int nretries;
+	u64 wd_end, wd_delta;
+	int64_t wd_delay;
+
+	for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
+		local_irq_disable();
+		*wdnow = watchdog->read(watchdog);
+		*csnow = cs->read(cs);
+		wd_end = watchdog->read(watchdog);
+		local_irq_enable();
+
+		wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
+		wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
+					      watchdog->shift);
+		if (wd_delay <= WATCHDOG_MAX_SKEW) {
+			if (nretries > 1 || nretries >= max_cswd_read_retries) {
+				pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
+					smp_processor_id(), watchdog->name, nretries);
+			}
+			return true;
+		}
+	}
+
+	pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
+		smp_processor_id(), watchdog->name, wd_delay, nretries);
+	return false;
+}
+
 static void clocksource_watchdog(struct timer_list *unused)
 {
-	struct clocksource *cs;
 	u64 csnow, wdnow, cslast, wdlast, delta;
-	int64_t wd_nsec, cs_nsec;
 	int next_cpu, reset_pending;
+	int64_t wd_nsec, cs_nsec;
+	struct clocksource *cs;
 
 	spin_lock(&watchdog_lock);
 	if (!watchdog_running)
@@ -206,10 +246,11 @@ static void clocksource_watchdog(struct timer_list *unused)
 			continue;
 		}
 
-		local_irq_disable();
-		csnow = cs->read(cs);
-		wdnow = watchdog->read(watchdog);
-		local_irq_enable();
+		if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
+			/* Clock readout unreliable, so give it up. */
+			__clocksource_unstable(cs);
+			continue;
+		}
 
 		/* Clocksource initialized ? */
 		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
-- 
GitLab


From 7560c02bdffb7c52d1457fa551b9e745d4b9e754 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:20 -0700
Subject: [PATCH 3593/3804] clocksource: Check per-CPU clock synchronization
 when marked unstable

Some sorts of per-CPU clock sources have a history of going out of
synchronization with each other.  However, this problem has purportedy been
solved in the past ten years.  Except that it is all too possible that the
problem has instead simply been made less likely, which might mean that
some of the occasional "Marking clocksource 'tsc' as unstable" messages
might be due to desynchronization.  How would anyone know?

Therefore apply CPU-to-CPU synchronization checking to newly unstable
clocksource that are marked with the new CLOCK_SOURCE_VERIFY_PERCPU flag.
Lists of desynchronized CPUs are printed, with the caveat that if it
is the reporting CPU that is itself desynchronized, it will appear that
all the other clocks are wrong.  Just like in real life.

Reported-by: Chris Mason <clm@fb.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-2-paulmck@kernel.org
---
 arch/x86/kernel/tsc.c       |  3 +-
 include/linux/clocksource.h |  2 +-
 kernel/time/clocksource.c   | 60 +++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 57ec011921805..6eb1b097e97eb 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1152,7 +1152,8 @@ static struct clocksource clocksource_tsc = {
 	.mask			= CLOCKSOURCE_MASK(64),
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
 				  CLOCK_SOURCE_VALID_FOR_HRES |
-				  CLOCK_SOURCE_MUST_VERIFY,
+				  CLOCK_SOURCE_MUST_VERIFY |
+				  CLOCK_SOURCE_VERIFY_PERCPU,
 	.vdso_clock_mode	= VDSO_CLOCKMODE_TSC,
 	.enable			= tsc_cs_enable,
 	.resume			= tsc_resume,
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index d6ab416ee2d2c..7f83d51c0fd7b 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -137,7 +137,7 @@ struct clocksource {
 #define CLOCK_SOURCE_UNSTABLE			0x40
 #define CLOCK_SOURCE_SUSPEND_NONSTOP		0x80
 #define CLOCK_SOURCE_RESELECT			0x100
-
+#define CLOCK_SOURCE_VERIFY_PERCPU		0x200
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 43243f2be98e9..cb12225bf0502 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -224,6 +224,60 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
 	return false;
 }
 
+static u64 csnow_mid;
+static cpumask_t cpus_ahead;
+static cpumask_t cpus_behind;
+
+static void clocksource_verify_one_cpu(void *csin)
+{
+	struct clocksource *cs = (struct clocksource *)csin;
+
+	csnow_mid = cs->read(cs);
+}
+
+static void clocksource_verify_percpu(struct clocksource *cs)
+{
+	int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
+	u64 csnow_begin, csnow_end;
+	int cpu, testcpu;
+	s64 delta;
+
+	cpumask_clear(&cpus_ahead);
+	cpumask_clear(&cpus_behind);
+	preempt_disable();
+	testcpu = smp_processor_id();
+	pr_warn("Checking clocksource %s synchronization from CPU %d.\n", cs->name, testcpu);
+	for_each_online_cpu(cpu) {
+		if (cpu == testcpu)
+			continue;
+		csnow_begin = cs->read(cs);
+		smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
+		csnow_end = cs->read(cs);
+		delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
+		if (delta < 0)
+			cpumask_set_cpu(cpu, &cpus_behind);
+		delta = (csnow_end - csnow_mid) & cs->mask;
+		if (delta < 0)
+			cpumask_set_cpu(cpu, &cpus_ahead);
+		delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
+		cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+		if (cs_nsec > cs_nsec_max)
+			cs_nsec_max = cs_nsec;
+		if (cs_nsec < cs_nsec_min)
+			cs_nsec_min = cs_nsec;
+	}
+	preempt_enable();
+	if (!cpumask_empty(&cpus_ahead))
+		pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
+			cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
+	if (!cpumask_empty(&cpus_behind))
+		pr_warn("        CPUs %*pbl behind CPU %d for clocksource %s.\n",
+			cpumask_pr_args(&cpus_behind), testcpu, cs->name);
+	if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
+		pr_warn("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
+			testcpu, cs_nsec_min, cs_nsec_max, cs->name);
+}
+
 static void clocksource_watchdog(struct timer_list *unused)
 {
 	u64 csnow, wdnow, cslast, wdlast, delta;
@@ -448,6 +502,12 @@ static int __clocksource_watchdog_kthread(void)
 	unsigned long flags;
 	int select = 0;
 
+	/* Do any required per-CPU skew verification. */
+	if (curr_clocksource &&
+	    curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
+	    curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
+		clocksource_verify_percpu(curr_clocksource);
+
 	spin_lock_irqsave(&watchdog_lock, flags);
 	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
-- 
GitLab


From fa218f1cce6ba40069c8daab8821de7e6be1cdd0 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:21 -0700
Subject: [PATCH 3594/3804] clocksource: Limit number of CPUs checked for clock
 synchronization

Currently, if skew is detected on a clock marked CLOCK_SOURCE_VERIFY_PERCPU,
that clock is checked on all CPUs.  This is thorough, but might not be
what you want on a system with a few tens of CPUs, let alone a few hundred
of them.

Therefore, by default check only up to eight randomly chosen CPUs.  Also
provide a new clocksource.verify_n_cpus kernel boot parameter.  A value of
-1 says to check all of the CPUs, and a non-negative value says to randomly
select that number of CPUs, without concern about selecting the same CPU
multiple times.  However, make use of a cpumask so that a given CPU will be
checked at most once.

Suggested-by: Thomas Gleixner <tglx@linutronix.de> # For verify_n_cpus=1.
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-3-paulmck@kernel.org
---
 .../admin-guide/kernel-parameters.txt         | 10 +++
 kernel/time/clocksource.c                     | 74 ++++++++++++++++++-
 2 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 995deccc28bcd..9ec9ea1a51f29 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -587,6 +587,16 @@
 			unstable.  Defaults to three retries, that is,
 			four attempts to read the clock under test.
 
+	clocksource.verify_n_cpus= [KNL]
+			Limit the number of CPUs checked for clocksources
+			marked with CLOCK_SOURCE_VERIFY_PERCPU that
+			are marked unstable due to excessive skew.
+			A negative value says to check all CPUs, while
+			zero says not to check any.  Values larger than
+			nr_cpu_ids are silently truncated to nr_cpu_ids.
+			The actual CPUs are chosen randomly, with
+			no replacement if the same CPU is chosen twice.
+
 	clearcpuid=BITNUM[,BITNUM...] [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index cb12225bf0502..e4beab21a1fa3 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -14,6 +14,8 @@
 #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
 #include <linux/tick.h>
 #include <linux/kthread.h>
+#include <linux/prandom.h>
+#include <linux/cpu.h>
 
 #include "tick-internal.h"
 #include "timekeeping_internal.h"
@@ -193,6 +195,8 @@ void clocksource_mark_unstable(struct clocksource *cs)
 
 static ulong max_cswd_read_retries = 3;
 module_param(max_cswd_read_retries, ulong, 0644);
+static int verify_n_cpus = 8;
+module_param(verify_n_cpus, int, 0644);
 
 static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
 {
@@ -227,6 +231,55 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
 static u64 csnow_mid;
 static cpumask_t cpus_ahead;
 static cpumask_t cpus_behind;
+static cpumask_t cpus_chosen;
+
+static void clocksource_verify_choose_cpus(void)
+{
+	int cpu, i, n = verify_n_cpus;
+
+	if (n < 0) {
+		/* Check all of the CPUs. */
+		cpumask_copy(&cpus_chosen, cpu_online_mask);
+		cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
+		return;
+	}
+
+	/* If no checking desired, or no other CPU to check, leave. */
+	cpumask_clear(&cpus_chosen);
+	if (n == 0 || num_online_cpus() <= 1)
+		return;
+
+	/* Make sure to select at least one CPU other than the current CPU. */
+	cpu = cpumask_next(-1, cpu_online_mask);
+	if (cpu == smp_processor_id())
+		cpu = cpumask_next(cpu, cpu_online_mask);
+	if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
+		return;
+	cpumask_set_cpu(cpu, &cpus_chosen);
+
+	/* Force a sane value for the boot parameter. */
+	if (n > nr_cpu_ids)
+		n = nr_cpu_ids;
+
+	/*
+	 * Randomly select the specified number of CPUs.  If the same
+	 * CPU is selected multiple times, that CPU is checked only once,
+	 * and no replacement CPU is selected.  This gracefully handles
+	 * situations where verify_n_cpus is greater than the number of
+	 * CPUs that are currently online.
+	 */
+	for (i = 1; i < n; i++) {
+		cpu = prandom_u32() % nr_cpu_ids;
+		cpu = cpumask_next(cpu - 1, cpu_online_mask);
+		if (cpu >= nr_cpu_ids)
+			cpu = cpumask_next(-1, cpu_online_mask);
+		if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
+			cpumask_set_cpu(cpu, &cpus_chosen);
+	}
+
+	/* Don't verify ourselves. */
+	cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
+}
 
 static void clocksource_verify_one_cpu(void *csin)
 {
@@ -242,12 +295,22 @@ static void clocksource_verify_percpu(struct clocksource *cs)
 	int cpu, testcpu;
 	s64 delta;
 
+	if (verify_n_cpus == 0)
+		return;
 	cpumask_clear(&cpus_ahead);
 	cpumask_clear(&cpus_behind);
+	get_online_cpus();
 	preempt_disable();
+	clocksource_verify_choose_cpus();
+	if (cpumask_weight(&cpus_chosen) == 0) {
+		preempt_enable();
+		put_online_cpus();
+		pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
+		return;
+	}
 	testcpu = smp_processor_id();
-	pr_warn("Checking clocksource %s synchronization from CPU %d.\n", cs->name, testcpu);
-	for_each_online_cpu(cpu) {
+	pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
+	for_each_cpu(cpu, &cpus_chosen) {
 		if (cpu == testcpu)
 			continue;
 		csnow_begin = cs->read(cs);
@@ -267,6 +330,7 @@ static void clocksource_verify_percpu(struct clocksource *cs)
 			cs_nsec_min = cs_nsec;
 	}
 	preempt_enable();
+	put_online_cpus();
 	if (!cpumask_empty(&cpus_ahead))
 		pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
 			cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
@@ -337,6 +401,12 @@ static void clocksource_watchdog(struct timer_list *unused)
 				watchdog->name, wdnow, wdlast, watchdog->mask);
 			pr_warn("                      '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
 				cs->name, csnow, cslast, cs->mask);
+			if (curr_clocksource == cs)
+				pr_warn("                      '%s' is current clocksource.\n", cs->name);
+			else if (curr_clocksource)
+				pr_warn("                      '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
+			else
+				pr_warn("                      No current clocksource.\n");
 			__clocksource_unstable(cs);
 			continue;
 		}
-- 
GitLab


From 2e27e793e280ff12cb5c202a1214c08b0d3a0f26 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:22 -0700
Subject: [PATCH 3595/3804] clocksource: Reduce clocksource-skew threshold

Currently, WATCHDOG_THRESHOLD is set to detect a 62.5-millisecond skew in
a 500-millisecond WATCHDOG_INTERVAL.  This requires that clocks be skewed
by more than 12.5% in order to be marked unstable.  Except that a clock
that is skewed by that much is probably destroying unsuspecting software
right and left.  And given that there are now checks for false-positive
skews due to delays between reading the two clocks, it should be possible
to greatly decrease WATCHDOG_THRESHOLD, at least for fine-grained clocks
such as TSC.

Therefore, add a new uncertainty_margin field to the clocksource structure
that contains the maximum uncertainty in nanoseconds for the corresponding
clock.  This field may be initialized manually, as it is for
clocksource_tsc_early and clocksource_jiffies, which is copied to
refined_jiffies.  If the field is not initialized manually, it will be
computed at clock-registry time as the period of the clock in question
based on the scale and freq parameters to __clocksource_update_freq_scale()
function.  If either of those two parameters are zero, the
tens-of-milliseconds WATCHDOG_THRESHOLD is used as a cowardly alternative
to dividing by zero.  No matter how the uncertainty_margin field is
calculated, it is bounded below by twice WATCHDOG_MAX_SKEW, that is, by 100
microseconds.

Note that manually initialized uncertainty_margin fields are not adjusted,
but there is a WARN_ON_ONCE() that triggers if any such field is less than
twice WATCHDOG_MAX_SKEW.  This WARN_ON_ONCE() is intended to discourage
production use of the one-nanosecond uncertainty_margin values that are
used to test the clock-skew code itself.

The actual clock-skew check uses the sum of the uncertainty_margin fields
of the two clocksource structures being compared.  Integer overflow is
avoided because the largest computed value of the uncertainty_margin
fields is one billion (10^9), and double that value fits into an
unsigned int.  However, if someone manually specifies (say) UINT_MAX,
they will get what they deserve.

Note that the refined_jiffies uncertainty_margin field is initialized to
TICK_NSEC, which means that skew checks involving this clocksource will
be sufficently forgiving.  In a similar vein, the clocksource_tsc_early
uncertainty_margin field is initialized to 32*NSEC_PER_MSEC, which
replicates the current behavior and allows custom setting if needed
in order to address the rare skews detected for this clocksource in
current mainline.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-4-paulmck@kernel.org
---
 arch/x86/kernel/tsc.c       |  1 +
 include/linux/clocksource.h |  3 +++
 kernel/time/clocksource.c   | 48 +++++++++++++++++++++++++++++--------
 kernel/time/jiffies.c       | 15 ++++++------
 4 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6eb1b097e97eb..2e076a459a0c0 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1128,6 +1128,7 @@ static int tsc_cs_enable(struct clocksource *cs)
 static struct clocksource clocksource_tsc_early = {
 	.name			= "tsc-early",
 	.rating			= 299,
+	.uncertainty_margin	= 32 * NSEC_PER_MSEC,
 	.read			= read_tsc,
 	.mask			= CLOCKSOURCE_MASK(64),
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS |
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 7f83d51c0fd7b..895203727cb54 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -43,6 +43,8 @@ struct module;
  * @shift:		Cycle to nanosecond divisor (power of two)
  * @max_idle_ns:	Maximum idle time permitted by the clocksource (nsecs)
  * @maxadj:		Maximum adjustment value to mult (~11%)
+ * @uncertainty_margin:	Maximum uncertainty in nanoseconds per half second.
+ *			Zero says to use default WATCHDOG_THRESHOLD.
  * @archdata:		Optional arch-specific data
  * @max_cycles:		Maximum safe cycle value which won't overflow on
  *			multiplication
@@ -98,6 +100,7 @@ struct clocksource {
 	u32			shift;
 	u64			max_idle_ns;
 	u32			maxadj;
+	u32			uncertainty_margin;
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
 	struct arch_clocksource_data archdata;
 #endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e4beab21a1fa3..9b27888a6e75e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -95,6 +95,20 @@ static char override_name[CS_NAME_LEN];
 static int finished_booting;
 static u64 suspend_start;
 
+/*
+ * Threshold: 0.0312s, when doubled: 0.0625s.
+ * Also a default for cs->uncertainty_margin when registering clocks.
+ */
+#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5)
+
+/*
+ * Maximum permissible delay between two readouts of the watchdog
+ * clocksource surrounding a read of the clocksource being validated.
+ * This delay could be due to SMIs, NMIs, or to VCPU preemptions.  Used as
+ * a lower bound for cs->uncertainty_margin values when registering clocks.
+ */
+#define WATCHDOG_MAX_SKEW (50 * NSEC_PER_USEC)
+
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static void clocksource_watchdog_work(struct work_struct *work);
 static void clocksource_select(void);
@@ -121,17 +135,9 @@ static int clocksource_watchdog_kthread(void *data);
 static void __clocksource_change_rating(struct clocksource *cs, int rating);
 
 /*
- * Interval: 0.5sec Threshold: 0.0625s
+ * Interval: 0.5sec.
  */
 #define WATCHDOG_INTERVAL (HZ >> 1)
-#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
-
-/*
- * Maximum permissible delay between two readouts of the watchdog
- * clocksource surrounding a read of the clocksource being validated.
- * This delay could be due to SMIs, NMIs, or to VCPU preemptions.
- */
-#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
 
 static void clocksource_watchdog_work(struct work_struct *work)
 {
@@ -348,6 +354,7 @@ static void clocksource_watchdog(struct timer_list *unused)
 	int next_cpu, reset_pending;
 	int64_t wd_nsec, cs_nsec;
 	struct clocksource *cs;
+	u32 md;
 
 	spin_lock(&watchdog_lock);
 	if (!watchdog_running)
@@ -394,7 +401,8 @@ static void clocksource_watchdog(struct timer_list *unused)
 			continue;
 
 		/* Check the deviation from the watchdog clocksource. */
-		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+		md = cs->uncertainty_margin + watchdog->uncertainty_margin;
+		if (abs(cs_nsec - wd_nsec) > md) {
 			pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
 				smp_processor_id(), cs->name);
 			pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
@@ -1047,6 +1055,26 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq
 		clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
 				       NSEC_PER_SEC / scale, sec * scale);
 	}
+
+	/*
+	 * If the uncertainty margin is not specified, calculate it.
+	 * If both scale and freq are non-zero, calculate the clock
+	 * period, but bound below at 2*WATCHDOG_MAX_SKEW.  However,
+	 * if either of scale or freq is zero, be very conservative and
+	 * take the tens-of-milliseconds WATCHDOG_THRESHOLD value for the
+	 * uncertainty margin.  Allow stupidly small uncertainty margins
+	 * to be specified by the caller for testing purposes, but warn
+	 * to discourage production use of this capability.
+	 */
+	if (scale && freq && !cs->uncertainty_margin) {
+		cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq);
+		if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW)
+			cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW;
+	} else if (!cs->uncertainty_margin) {
+		cs->uncertainty_margin = WATCHDOG_THRESHOLD;
+	}
+	WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW);
+
 	/*
 	 * Ensure clocksources that have large 'mult' values don't overflow
 	 * when adjusted.
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index a492e4da69ba2..01935aafdb460 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -49,13 +49,14 @@ static u64 jiffies_read(struct clocksource *cs)
  * for "tick-less" systems.
  */
 static struct clocksource clocksource_jiffies = {
-	.name		= "jiffies",
-	.rating		= 1, /* lowest valid rating*/
-	.read		= jiffies_read,
-	.mask		= CLOCKSOURCE_MASK(32),
-	.mult		= TICK_NSEC << JIFFIES_SHIFT, /* details above */
-	.shift		= JIFFIES_SHIFT,
-	.max_cycles	= 10,
+	.name			= "jiffies",
+	.rating			= 1, /* lowest valid rating*/
+	.uncertainty_margin	= 32 * NSEC_PER_MSEC,
+	.read			= jiffies_read,
+	.mask			= CLOCKSOURCE_MASK(32),
+	.mult			= TICK_NSEC << JIFFIES_SHIFT, /* details above */
+	.shift			= JIFFIES_SHIFT,
+	.max_cycles		= 10,
 };
 
 __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock);
-- 
GitLab


From 1253b9b87e42ab6a3d5c2cb27af2bdd67d7e50ff Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 27 May 2021 12:01:23 -0700
Subject: [PATCH 3596/3804] clocksource: Provide kernel module to test
 clocksource watchdog

When the clocksource watchdog marks a clock as unstable, this might
be due to that clock being unstable or it might be due to delays that
happen to occur between the reads of the two clocks.  It would be good
to have a way of testing the clocksource watchdog's ability to
distinguish between these two causes of clock skew and instability.

Therefore, provide a new clocksource-wdtest module selected by a new
TEST_CLOCKSOURCE_WATCHDOG Kconfig option.  This module has a single module
parameter named "holdoff" that provides the number of seconds of delay
before testing should start, which defaults to zero when built as a module
and to 10 seconds when built directly into the kernel.  Very large systems
that boot slowly may need to increase the value of this module parameter.

This module uses hand-crafted clocksource structures to do its testing,
thus avoiding messing up timing for the rest of the kernel and for user
applications.  This module first verifies that the ->uncertainty_margin
field of the clocksource structures are set sanely.  It then tests the
delay-detection capability of the clocksource watchdog, increasing the
number of consecutive delays injected, first provoking console messages
complaining about the delays and finally forcing a clock-skew event.
Unexpected test results cause at least one WARN_ON_ONCE() console splat.
If there are no splats, the test has passed.  Finally, it fuzzes the
value returned from a clocksource to test the clocksource watchdog's
ability to detect time skew.

This module checks the state of its clocksource after each test, and
uses WARN_ON_ONCE() to emit a console splat if there are any failures.
This should enable all types of test frameworks to detect any such
failures.

This facility is intended for diagnostic use only, and should be avoided
on production systems.

Reported-by: Chris Mason <clm@fb.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Feng Tang <feng.tang@intel.com>
Link: https://lore.kernel.org/r/20210527190124.440372-5-paulmck@kernel.org
---
 .../admin-guide/kernel-parameters.txt         |   6 +
 include/linux/clocksource.h                   |   3 +
 kernel/time/Makefile                          |   1 +
 kernel/time/clocksource-wdtest.c              | 202 ++++++++++++++++++
 kernel/time/clocksource.c                     |   6 +-
 lib/Kconfig.debug                             |  12 ++
 6 files changed, 228 insertions(+), 2 deletions(-)
 create mode 100644 kernel/time/clocksource-wdtest.c

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9ec9ea1a51f29..591048ed13653 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -597,6 +597,12 @@
 			The actual CPUs are chosen randomly, with
 			no replacement if the same CPU is chosen twice.
 
+	clocksource-wdtest.holdoff= [KNL]
+			Set the time in seconds that the clocksource
+			watchdog test waits before commencing its tests.
+			Defaults to zero when built as a module and to
+			10 seconds when built into the kernel.
+
 	clearcpuid=BITNUM[,BITNUM...] [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 895203727cb54..1d42d4b173271 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -291,4 +291,7 @@ static inline void timer_probe(void) {}
 #define TIMER_ACPI_DECLARE(name, table_id, fn)		\
 	ACPI_DECLARE_PROBE_ENTRY(timer, name, table_id, 0, NULL, 0, fn)
 
+extern ulong max_cswd_read_retries;
+void clocksource_verify_percpu(struct clocksource *cs);
+
 #endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 1fb1c1ef6a19b..1ed85b25b0968 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_HAVE_GENERIC_VDSO)			+= vsyscall.o
 obj-$(CONFIG_DEBUG_FS)				+= timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)			+= test_udelay.o
 obj-$(CONFIG_TIME_NS)				+= namespace.o
+obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG)		+= clocksource-wdtest.o
diff --git a/kernel/time/clocksource-wdtest.c b/kernel/time/clocksource-wdtest.c
new file mode 100644
index 0000000000000..01df12395c0ee
--- /dev/null
+++ b/kernel/time/clocksource-wdtest.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Unit test for the clocksource watchdog.
+ *
+ * Copyright (C) 2021 Facebook, Inc.
+ *
+ * Author: Paul E. McKenney <paulmck@kernel.org>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
+#include <linux/tick.h>
+#include <linux/kthread.h>
+#include <linux/delay.h>
+#include <linux/prandom.h>
+#include <linux/cpu.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
+
+static int holdoff = IS_BUILTIN(CONFIG_TEST_CLOCKSOURCE_WATCHDOG) ? 10 : 0;
+module_param(holdoff, int, 0444);
+MODULE_PARM_DESC(holdoff, "Time to wait to start test (s).");
+
+/* Watchdog kthread's task_struct pointer for debug purposes. */
+static struct task_struct *wdtest_task;
+
+static u64 wdtest_jiffies_read(struct clocksource *cs)
+{
+	return (u64)jiffies;
+}
+
+/* Assume HZ > 100. */
+#define JIFFIES_SHIFT	8
+
+static struct clocksource clocksource_wdtest_jiffies = {
+	.name			= "wdtest-jiffies",
+	.rating			= 1, /* lowest valid rating*/
+	.uncertainty_margin	= TICK_NSEC,
+	.read			= wdtest_jiffies_read,
+	.mask			= CLOCKSOURCE_MASK(32),
+	.flags			= CLOCK_SOURCE_MUST_VERIFY,
+	.mult			= TICK_NSEC << JIFFIES_SHIFT, /* details above */
+	.shift			= JIFFIES_SHIFT,
+	.max_cycles		= 10,
+};
+
+static int wdtest_ktime_read_ndelays;
+static bool wdtest_ktime_read_fuzz;
+
+static u64 wdtest_ktime_read(struct clocksource *cs)
+{
+	int wkrn = READ_ONCE(wdtest_ktime_read_ndelays);
+	static int sign = 1;
+	u64 ret;
+
+	if (wkrn) {
+		udelay(cs->uncertainty_margin / 250);
+		WRITE_ONCE(wdtest_ktime_read_ndelays, wkrn - 1);
+	}
+	ret = ktime_get_real_fast_ns();
+	if (READ_ONCE(wdtest_ktime_read_fuzz)) {
+		sign = -sign;
+		ret = ret + sign * 100 * NSEC_PER_MSEC;
+	}
+	return ret;
+}
+
+static void wdtest_ktime_cs_mark_unstable(struct clocksource *cs)
+{
+	pr_info("--- Marking %s unstable due to clocksource watchdog.\n", cs->name);
+}
+
+#define KTIME_FLAGS (CLOCK_SOURCE_IS_CONTINUOUS | \
+		     CLOCK_SOURCE_VALID_FOR_HRES | \
+		     CLOCK_SOURCE_MUST_VERIFY | \
+		     CLOCK_SOURCE_VERIFY_PERCPU)
+
+static struct clocksource clocksource_wdtest_ktime = {
+	.name			= "wdtest-ktime",
+	.rating			= 300,
+	.read			= wdtest_ktime_read,
+	.mask			= CLOCKSOURCE_MASK(64),
+	.flags			= KTIME_FLAGS,
+	.mark_unstable		= wdtest_ktime_cs_mark_unstable,
+	.list			= LIST_HEAD_INIT(clocksource_wdtest_ktime.list),
+};
+
+/* Reset the clocksource if needed. */
+static void wdtest_ktime_clocksource_reset(void)
+{
+	if (clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE) {
+		clocksource_unregister(&clocksource_wdtest_ktime);
+		clocksource_wdtest_ktime.flags = KTIME_FLAGS;
+		schedule_timeout_uninterruptible(HZ / 10);
+		clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
+	}
+}
+
+/* Run the specified series of watchdog tests. */
+static int wdtest_func(void *arg)
+{
+	unsigned long j1, j2;
+	char *s;
+	int i;
+
+	schedule_timeout_uninterruptible(holdoff * HZ);
+
+	/*
+	 * Verify that jiffies-like clocksources get the manually
+	 * specified uncertainty margin.
+	 */
+	pr_info("--- Verify jiffies-like uncertainty margin.\n");
+	__clocksource_register(&clocksource_wdtest_jiffies);
+	WARN_ON_ONCE(clocksource_wdtest_jiffies.uncertainty_margin != TICK_NSEC);
+
+	j1 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
+	schedule_timeout_uninterruptible(HZ);
+	j2 = clocksource_wdtest_jiffies.read(&clocksource_wdtest_jiffies);
+	WARN_ON_ONCE(j1 == j2);
+
+	clocksource_unregister(&clocksource_wdtest_jiffies);
+
+	/*
+	 * Verify that tsc-like clocksources are assigned a reasonable
+	 * uncertainty margin.
+	 */
+	pr_info("--- Verify tsc-like uncertainty margin.\n");
+	clocksource_register_khz(&clocksource_wdtest_ktime, 1000 * 1000);
+	WARN_ON_ONCE(clocksource_wdtest_ktime.uncertainty_margin < NSEC_PER_USEC);
+
+	j1 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
+	udelay(1);
+	j2 = clocksource_wdtest_ktime.read(&clocksource_wdtest_ktime);
+	pr_info("--- tsc-like times: %lu - %lu = %lu.\n", j2, j1, j2 - j1);
+	WARN_ON_ONCE(time_before(j2, j1 + NSEC_PER_USEC));
+
+	/* Verify tsc-like stability with various numbers of errors injected. */
+	for (i = 0; i <= max_cswd_read_retries + 1; i++) {
+		if (i <= 1 && i < max_cswd_read_retries)
+			s = "";
+		else if (i <= max_cswd_read_retries)
+			s = ", expect message";
+		else
+			s = ", expect clock skew";
+		pr_info("--- Watchdog with %dx error injection, %lu retries%s.\n", i, max_cswd_read_retries, s);
+		WRITE_ONCE(wdtest_ktime_read_ndelays, i);
+		schedule_timeout_uninterruptible(2 * HZ);
+		WARN_ON_ONCE(READ_ONCE(wdtest_ktime_read_ndelays));
+		WARN_ON_ONCE((i <= max_cswd_read_retries) !=
+			     !(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
+		wdtest_ktime_clocksource_reset();
+	}
+
+	/* Verify tsc-like stability with clock-value-fuzz error injection. */
+	pr_info("--- Watchdog clock-value-fuzz error injection, expect clock skew and per-CPU mismatches.\n");
+	WRITE_ONCE(wdtest_ktime_read_fuzz, true);
+	schedule_timeout_uninterruptible(2 * HZ);
+	WARN_ON_ONCE(!(clocksource_wdtest_ktime.flags & CLOCK_SOURCE_UNSTABLE));
+	clocksource_verify_percpu(&clocksource_wdtest_ktime);
+	WRITE_ONCE(wdtest_ktime_read_fuzz, false);
+
+	clocksource_unregister(&clocksource_wdtest_ktime);
+
+	pr_info("--- Done with test.\n");
+	return 0;
+}
+
+static void wdtest_print_module_parms(void)
+{
+	pr_alert("--- holdoff=%d\n", holdoff);
+}
+
+/* Cleanup function. */
+static void clocksource_wdtest_cleanup(void)
+{
+}
+
+static int __init clocksource_wdtest_init(void)
+{
+	int ret = 0;
+
+	wdtest_print_module_parms();
+
+	/* Create watchdog-test task. */
+	wdtest_task = kthread_run(wdtest_func, NULL, "wdtest");
+	if (IS_ERR(wdtest_task)) {
+		ret = PTR_ERR(wdtest_task);
+		pr_warn("%s: Failed to create wdtest kthread.\n", __func__);
+		wdtest_task = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+module_init(clocksource_wdtest_init);
+module_exit(clocksource_wdtest_cleanup);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 9b27888a6e75e..74d6a234fd14e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -199,8 +199,9 @@ void clocksource_mark_unstable(struct clocksource *cs)
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
-static ulong max_cswd_read_retries = 3;
+ulong max_cswd_read_retries = 3;
 module_param(max_cswd_read_retries, ulong, 0644);
+EXPORT_SYMBOL_GPL(max_cswd_read_retries);
 static int verify_n_cpus = 8;
 module_param(verify_n_cpus, int, 0644);
 
@@ -294,7 +295,7 @@ static void clocksource_verify_one_cpu(void *csin)
 	csnow_mid = cs->read(cs);
 }
 
-static void clocksource_verify_percpu(struct clocksource *cs)
+void clocksource_verify_percpu(struct clocksource *cs)
 {
 	int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
 	u64 csnow_begin, csnow_end;
@@ -347,6 +348,7 @@ static void clocksource_verify_percpu(struct clocksource *cs)
 		pr_warn("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
 			testcpu, cs_nsec_min, cs_nsec_max, cs->name);
 }
+EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
 
 static void clocksource_watchdog(struct timer_list *unused)
 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 678c13967580e..0a5a70c742e68 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2571,6 +2571,18 @@ config TEST_FPU
 
 	  If unsure, say N.
 
+config TEST_CLOCKSOURCE_WATCHDOG
+	tristate "Test clocksource watchdog in kernel space"
+	depends on CLOCKSOURCE_WATCHDOG
+	help
+	  Enable this option to create a kernel module that will trigger
+	  a test of the clocksource watchdog.  This module may be loaded
+	  via modprobe or insmod in which case it will run upon being
+	  loaded, or it may be built in, in which case it will run
+	  shortly after boot.
+
+	  If unsure, say N.
+
 endif # RUNTIME_TESTING_MENU
 
 config ARCH_USE_MEMTEST
-- 
GitLab


From 22a22383371667962b46bd90d534cc57669537ac Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Thu, 27 May 2021 12:01:24 -0700
Subject: [PATCH 3597/3804] clocksource: Print deviation in nanoseconds when a
 clocksource becomes unstable

Currently when an unstable clocksource is detected, the raw counters of
that clocksource and watchdog will be printed, which can only be understood
after some math calculation.

So print the delta in nanoseconds as well to make it easier for humans to
check the results.

[ paulmck: Fix typo. ]

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210527190124.440372-6-paulmck@kernel.org
---
 kernel/time/clocksource.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 74d6a234fd14e..b89c76e1c02c4 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -407,10 +407,10 @@ static void clocksource_watchdog(struct timer_list *unused)
 		if (abs(cs_nsec - wd_nsec) > md) {
 			pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
 				smp_processor_id(), cs->name);
-			pr_warn("                      '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
-				watchdog->name, wdnow, wdlast, watchdog->mask);
-			pr_warn("                      '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
-				cs->name, csnow, cslast, cs->mask);
+			pr_warn("                      '%s' wd_nsec: %lld wd_now: %llx wd_last: %llx mask: %llx\n",
+				watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
+			pr_warn("                      '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
+				cs->name, cs_nsec, csnow, cslast, cs->mask);
 			if (curr_clocksource == cs)
 				pr_warn("                      '%s' is current clocksource.\n", cs->name);
 			else if (curr_clocksource)
-- 
GitLab


From 4e82d2e20f3b11f253bc5c6e92f05ed3694a1ae3 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1@huawei.com>
Date: Wed, 9 Jun 2021 15:02:42 +0800
Subject: [PATCH 3598/3804] clockevents: Use list_move() instead of
 list_del()/list_add()

Simplify the code.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210609070242.1322450-1-libaokun1@huawei.com
---
 kernel/time/clockevents.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index bb9d2fe584804..003ccf338d201 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -347,8 +347,7 @@ static void clockevents_notify_released(void)
 	while (!list_empty(&clockevents_released)) {
 		dev = list_entry(clockevents_released.next,
 				 struct clock_event_device, list);
-		list_del(&dev->list);
-		list_add(&dev->list, &clockevent_devices);
+		list_move(&dev->list, &clockevent_devices);
 		tick_check_new_device(dev);
 	}
 }
@@ -576,8 +575,7 @@ void clockevents_exchange_device(struct clock_event_device *old,
 	if (old) {
 		module_put(old->owner);
 		clockevents_switch_state(old, CLOCK_EVT_STATE_DETACHED);
-		list_del(&old->list);
-		list_add(&old->list, &clockevents_released);
+		list_move(&old->list, &clockevents_released);
 	}
 
 	if (new) {
-- 
GitLab


From 4d6035f9bf4ea12776322746a216e856dfe46698 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 22 Jun 2021 17:35:18 +0200
Subject: [PATCH 3599/3804] Revert "PCI: PM: Do not read power state in
 pci_enable_device_flags()"

Revert commit 4514d991d992 ("PCI: PM: Do not read power state in
pci_enable_device_flags()") that is reported to cause PCI device
initialization issues on some systems.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213481
Link: https://lore.kernel.org/linux-acpi/YNDoGICcg0V8HhpQ@eldamar.lan
Reported-by: Michael <phyre@rogers.com>
Reported-by: Salvatore Bonaccorso <carnil@debian.org>
Fixes: 4514d991d992 ("PCI: PM: Do not read power state in pci_enable_device_flags()")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/pci.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index b717680377a9e..8d4ebe095d0c8 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1900,11 +1900,21 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags)
 	int err;
 	int i, bars = 0;
 
-	if (atomic_inc_return(&dev->enable_cnt) > 1) {
-		pci_update_current_state(dev, dev->current_state);
-		return 0;		/* already enabled */
+	/*
+	 * Power state could be unknown at this point, either due to a fresh
+	 * boot or a device removal call.  So get the current power state
+	 * so that things like MSI message writing will behave as expected
+	 * (e.g. if the device really is in D0 at enable time).
+	 */
+	if (dev->pm_cap) {
+		u16 pmcsr;
+		pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
+		dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
 	}
 
+	if (atomic_inc_return(&dev->enable_cnt) > 1)
+		return 0;		/* already enabled */
+
 	bridge = pci_upstream_bridge(dev);
 	if (bridge)
 		pci_enable_bridge(bridge);
-- 
GitLab


From 6fd8f323b3e4e5290d02174559308669507c00dd Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Thu, 13 May 2021 21:46:38 +0800
Subject: [PATCH 3600/3804] drm/kmb: Fix error return code in kmb_hw_init()

When the call to platform_get_irq() to obtain the IRQ of the lcd fails, the
returned error code should be propagated. However, we currently do not
explicitly assign this error code to 'ret'. As a result, 0 was incorrectly
returned.

Fixes: 7f7b96a8a0a1 ("drm/kmb: Add support for KeemBay Display")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Anitha Chrisanthus <anitha.chrisanthus@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210513134639.6541-1-thunder.leizhen@huawei.com
---
 drivers/gpu/drm/kmb/kmb_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c
index f64e06e1067dd..96ea1a2c11dd6 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.c
+++ b/drivers/gpu/drm/kmb/kmb_drv.c
@@ -137,6 +137,7 @@ static int kmb_hw_init(struct drm_device *drm, unsigned long flags)
 	/* Allocate LCD interrupt resources */
 	irq_lcd = platform_get_irq(pdev, 0);
 	if (irq_lcd < 0) {
+		ret = irq_lcd;
 		drm_err(&kmb->drm, "irq_lcd not found");
 		goto setup_fail;
 	}
-- 
GitLab


From 629e33a16809ae0274e1f5fc3d22b92b9bd0fdf1 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 22 Jun 2021 16:27:38 +0300
Subject: [PATCH 3601/3804] btrfs: remove unused btrfs_fs_info::total_pinned

This got added 14 years ago in 324ae4df00fd ("Btrfs: Add block group
pinned accounting back") but it was not ever used. Subsequently its
usage got gradually removed in 8790d502e440 ("Btrfs: Add support for
mirroring across drives") and 11833d66be94 ("Btrfs: improve async block
group caching"). Let's remove it for good!

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d7ef4d7d2c1af..e5e53e592d4f9 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -823,8 +823,6 @@ struct btrfs_fs_info {
 	struct kobject *space_info_kobj;
 	struct kobject *qgroups_kobj;
 
-	u64 total_pinned;
-
 	/* used to keep from writing metadata until there is a nice batch */
 	struct percpu_counter dirty_metadata_bytes;
 	struct percpu_counter delalloc_bytes;
-- 
GitLab


From 0c18f29aae7ce3dadd26d8ee3505d07cc982df75 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Tue, 22 Jun 2021 13:36:41 +0200
Subject: [PATCH 3602/3804] module: limit enabling module.sig_enforce

Irrespective as to whether CONFIG_MODULE_SIG is configured, specifying
"module.sig_enforce=1" on the boot command line sets "sig_enforce".
Only allow "sig_enforce" to be set when CONFIG_MODULE_SIG is configured.

This patch makes the presence of /sys/module/module/parameters/sig_enforce
dependent on CONFIG_MODULE_SIG=y.

Fixes: fda784e50aac ("module: export module signature enforcement status")
Reported-by: Nayna Jain <nayna@linux.ibm.com>
Tested-by: Mimi Zohar <zohar@linux.ibm.com>
Tested-by: Jessica Yu <jeyu@kernel.org>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/module.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/module.c b/kernel/module.c
index 7e78dfabca97f..927d46cb8eb93 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -266,9 +266,18 @@ static void module_assert_mutex_or_preempt(void)
 #endif
 }
 
+#ifdef CONFIG_MODULE_SIG
 static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE);
 module_param(sig_enforce, bool_enable_only, 0644);
 
+void set_module_sig_enforced(void)
+{
+	sig_enforce = true;
+}
+#else
+#define sig_enforce false
+#endif
+
 /*
  * Export sig_enforce kernel cmdline parameter to allow other subsystems rely
  * on that instead of directly to CONFIG_MODULE_SIG_FORCE config.
@@ -279,11 +288,6 @@ bool is_module_sig_enforced(void)
 }
 EXPORT_SYMBOL(is_module_sig_enforced);
 
-void set_module_sig_enforced(void)
-{
-	sig_enforce = true;
-}
-
 /* Block module loading/unloading? */
 int modules_disabled = 0;
 core_param(nomodule, modules_disabled, bint, 0);
-- 
GitLab


From d1b7f92035c6fb42529ada531e2cbf3534544c82 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Jun 2021 13:55:04 +0200
Subject: [PATCH 3603/3804] scsi: sd: Call sd_revalidate_disk() for
 ioctl(BLKRRPART)

While the disk state has nothing to do with partitions, BLKRRPART is used
to force a full revalidate after things like a disk format for historical
reasons. Restore that behavior.

Link: https://lore.kernel.org/r/20210617115504.1732350-1-hch@lst.de
Fixes: 471bd0af544b ("sd: use bdev_check_media_change")
Reported-by: Xiang Chen <chenxiang66@hisilicon.com>
Tested-by: Xiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cb3c37d1e0091..a2c3d9ad9ee49 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1387,6 +1387,22 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
 	}
 }
 
+static bool sd_need_revalidate(struct block_device *bdev,
+		struct scsi_disk *sdkp)
+{
+	if (sdkp->device->removable || sdkp->write_prot) {
+		if (bdev_check_media_change(bdev))
+			return true;
+	}
+
+	/*
+	 * Force a full rescan after ioctl(BLKRRPART).  While the disk state has
+	 * nothing to do with partitions, BLKRRPART is used to force a full
+	 * revalidate after things like a format for historical reasons.
+	 */
+	return test_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
+}
+
 /**
  *	sd_open - open a scsi disk device
  *	@bdev: Block device of the scsi disk to open
@@ -1423,10 +1439,8 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
 	if (!scsi_block_when_processing_errors(sdev))
 		goto error_out;
 
-	if (sdev->removable || sdkp->write_prot) {
-		if (bdev_check_media_change(bdev))
-			sd_revalidate_disk(bdev->bd_disk);
-	}
+	if (sd_need_revalidate(bdev, sdkp))
+		sd_revalidate_disk(bdev->bd_disk);
 
 	/*
 	 * If the drive is empty, just let the open fail.
-- 
GitLab


From 4fa3b91bdee1b08348c82660668ca0ca34e271ad Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Mon, 21 Jun 2021 16:03:56 +0200
Subject: [PATCH 3604/3804] KVM: s390: get rid of register asm usage

Using register asm statements has been proven to be very error prone,
especially when using code instrumentation where gcc may add function
calls, which clobbers register contents in an unexpected way.

Therefore get rid of register asm statements in kvm code, even though
there is currently nothing wrong with them. This way we know for sure
that this bug class won't be introduced here.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Link: https://lore.kernel.org/r/20210621140356.1210771-1-hca@linux.ibm.com
[borntraeger@de.ibm.com: checkpatch strict fix]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1296fc10f80c8..876fc1f7282a0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -329,31 +329,31 @@ static void allow_cpu_feat(unsigned long nr)
 
 static inline int plo_test_bit(unsigned char nr)
 {
-	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
+	unsigned long function = (unsigned long)nr | 0x100;
 	int cc;
 
 	asm volatile(
+		"	lgr	0,%[function]\n"
 		/* Parameter registers are ignored for "test bit" */
 		"	plo	0,0,0,0(0)\n"
 		"	ipm	%0\n"
 		"	srl	%0,28\n"
 		: "=d" (cc)
-		: "d" (r0)
-		: "cc");
+		: [function] "d" (function)
+		: "cc", "0");
 	return cc == 0;
 }
 
 static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
 {
-	register unsigned long r0 asm("0") = 0;	/* query function */
-	register unsigned long r1 asm("1") = (unsigned long) query;
-
 	asm volatile(
-		/* Parameter regs are ignored */
+		"	lghi	0,0\n"
+		"	lgr	1,%[query]\n"
+		/* Parameter registers are ignored */
 		"	.insn	rrf,%[opc] << 16,2,4,6,0\n"
 		:
-		: "d" (r0), "a" (r1), [opc] "i" (opcode)
-		: "cc", "memory");
+		: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
+		: "cc", "memory", "0", "1");
 }
 
 #define INSN_SORTL 0xb938
-- 
GitLab


From a3efa842926600b04cb1252e9211892c3bfc4d49 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 9 Mar 2021 16:24:19 +0100
Subject: [PATCH 3605/3804] KVM: s390: gen_facilities: allow facilities 165,
 193, 194 and 196

This enables the NNPA, BEAR enhancement,reset DAT protection and
processor activity counter facilities via the cpu model.

Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/tools/gen_facilities.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 61ce5b59b828e..606324e56e4ec 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -115,6 +115,10 @@ static struct facility_def facility_defs[] = {
 			12, /* AP Query Configuration Information */
 			15, /* AP Facilities Test */
 			156, /* etoken facility */
+			165, /* nnpa facility */
+			193, /* bear enhancement facility */
+			194, /* rdp enhancement facility */
+			196, /* processor activity instrumentation facility */
 			-1  /* END */
 		}
 	},
-- 
GitLab


From 1f703d2cf20464338c3d5279dddfb65ac79b8782 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 25 Jan 2021 13:39:45 +0100
Subject: [PATCH 3606/3804] KVM: s390: allow facility 192
 (vector-packed-decimal-enhancement facility 2)

pass through newer vector instructions if vector support is enabled.

Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 876fc1f7282a0..f72f361d39ddb 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -713,6 +713,10 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 				set_kvm_facility(kvm->arch.model.fac_mask, 152);
 				set_kvm_facility(kvm->arch.model.fac_list, 152);
 			}
+			if (test_facility(192)) {
+				set_kvm_facility(kvm->arch.model.fac_mask, 192);
+				set_kvm_facility(kvm->arch.model.fac_list, 192);
+			}
 			r = 0;
 		} else
 			r = -EINVAL;
-- 
GitLab


From 310f134ed41fcaa03eff302b1e69f1ce1ee21841 Mon Sep 17 00:00:00 2001
From: Brijesh Singh <brijesh.singh@amd.com>
Date: Wed, 23 Jun 2021 08:40:00 +0200
Subject: [PATCH 3607/3804] x86/sev: Add defines for GHCB version 2 MSR
 protocol requests

Add the necessary defines for supporting the GHCB version 2 protocol.
This includes defines for:

	- MSR-based AP hlt request/response
	- Hypervisor Feature request/response

This is the bare minimum of requests that need to be supported by a GHCB
version 2 implementation. There are more requests in the specification,
but those depend on Secure Nested Paging support being available.

These defines are shared between SEV host and guest support.

  [ bp: Fold in https://lkml.kernel.org/r/20210622144825.27588-2-joro@8bytes.org too.
        Simplify the brewing macro maze into readability. ]

Co-developed-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/YNLXQIZ5e1wjkshG@8bytes.org
---
 arch/x86/include/asm/sev-common.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index 629c3df243f03..2cef6c5a52c2a 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -9,8 +9,13 @@
 #define __ASM_X86_SEV_COMMON_H
 
 #define GHCB_MSR_INFO_POS		0
-#define GHCB_MSR_INFO_MASK		(BIT_ULL(12) - 1)
+#define GHCB_DATA_LOW			12
+#define GHCB_MSR_INFO_MASK		(BIT_ULL(GHCB_DATA_LOW) - 1)
 
+#define GHCB_DATA(v)			\
+	(((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW)
+
+/* SEV Information Request/Response */
 #define GHCB_MSR_SEV_INFO_RESP		0x001
 #define GHCB_MSR_SEV_INFO_REQ		0x002
 #define GHCB_MSR_VER_MAX_POS		48
@@ -28,6 +33,7 @@
 #define GHCB_MSR_PROTO_MAX(v)		(((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK)
 #define GHCB_MSR_PROTO_MIN(v)		(((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK)
 
+/* CPUID Request/Response */
 #define GHCB_MSR_CPUID_REQ		0x004
 #define GHCB_MSR_CPUID_RESP		0x005
 #define GHCB_MSR_CPUID_FUNC_POS		32
@@ -45,6 +51,14 @@
 		(((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
 		(((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
 
+/* AP Reset Hold */
+#define GHCB_MSR_AP_RESET_HOLD_REQ		0x006
+#define GHCB_MSR_AP_RESET_HOLD_RESP		0x007
+
+/* GHCB Hypervisor Feature Request/Response */
+#define GHCB_MSR_HV_FT_REQ			0x080
+#define GHCB_MSR_HV_FT_RESP			0x081
+
 #define GHCB_MSR_TERM_REQ		0x100
 #define GHCB_MSR_TERM_REASON_SET_POS	12
 #define GHCB_MSR_TERM_REASON_SET_MASK	0xf
-- 
GitLab


From 8d9d46bbf3b6b7ff8edcac33603ab45c29e0e07f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 22 Jun 2021 16:48:25 +0200
Subject: [PATCH 3608/3804] x86/sev: Use "SEV: " prefix for messages from sev.c

The source file has been renamed froms sev-es.c to sev.c, but the
messages are still prefixed with "SEV-ES: ". Change that to "SEV: " to
make it consistent.

Fixes: e759959fe3b8 ("x86/sev-es: Rename sev-es.{ch} to sev.{ch}")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210622144825.27588-4-joro@8bytes.org
---
 arch/x86/kernel/sev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 87a4b00f028e5..a6895e440bc35 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -7,7 +7,7 @@
  * Author: Joerg Roedel <jroedel@suse.de>
  */
 
-#define pr_fmt(fmt)	"SEV-ES: " fmt
+#define pr_fmt(fmt)	"SEV: " fmt
 
 #include <linux/sched/debug.h>	/* For show_regs() */
 #include <linux/percpu-defs.h>
-- 
GitLab


From 50c9462edcbf900f3d5097ca3ad60171346124de Mon Sep 17 00:00:00 2001
From: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Date: Wed, 23 Jun 2021 12:56:09 +0800
Subject: [PATCH 3609/3804] regulator: mt6358: Fix vdram2 .vsel_mask

The valid vsel value are 0 and 12, so the .vsel_mask should be 0xf.

Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Reviewed-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/1624424169-510-1-git-send-email-hsin-hsiung.wang@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/mt6358-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c
index 13cb6ac9a8929..1d4eb5dc4fac8 100644
--- a/drivers/regulator/mt6358-regulator.c
+++ b/drivers/regulator/mt6358-regulator.c
@@ -457,7 +457,7 @@ static struct mt6358_regulator_info mt6358_regulators[] = {
 	MT6358_REG_FIXED("ldo_vaud28", VAUD28,
 			 MT6358_LDO_VAUD28_CON0, 0, 2800000),
 	MT6358_LDO("ldo_vdram2", VDRAM2, vdram2_voltages, vdram2_idx,
-		   MT6358_LDO_VDRAM2_CON0, 0, MT6358_LDO_VDRAM2_ELR0, 0x10, 0),
+		   MT6358_LDO_VDRAM2_CON0, 0, MT6358_LDO_VDRAM2_ELR0, 0xf, 0),
 	MT6358_LDO("ldo_vsim1", VSIM1, vsim_voltages, vsim_idx,
 		   MT6358_LDO_VSIM1_CON0, 0, MT6358_VSIM1_ANA_CON0, 0xf00, 8),
 	MT6358_LDO("ldo_vibr", VIBR, vibr_voltages, vibr_idx,
-- 
GitLab


From 0d7993b234c9fad8cb6bec6adfaa74694ba85ecb Mon Sep 17 00:00:00 2001
From: Mirko Vogt <mirko-dev|linux@nanl.de>
Date: Mon, 14 Jun 2021 16:45:07 +0200
Subject: [PATCH 3610/3804] spi: spi-sun6i: Fix chipselect/clock bug

The current sun6i SPI implementation initializes the transfer too early,
resulting in SCK going high before the transfer. When using an additional
(gpio) chipselect with sun6i, the chipselect is asserted at a time when
clock is high, making the SPI transfer fail.

This is due to SUN6I_GBL_CTL_BUS_ENABLE being written into
SUN6I_GBL_CTL_REG at an early stage. Moving that to the transfer
function, hence, right before the transfer starts, mitigates that
problem.

Fixes: 3558fe900e8af (spi: sunxi: Add Allwinner A31 SPI controller driver)
Signed-off-by: Mirko Vogt <mirko-dev|linux@nanl.de>
Signed-off-by: Ralf Schlatterbeck <rsc@runtux.com>
Link: https://lore.kernel.org/r/20210614144507.y3udezjfbko7eavv@runtux.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sun6i.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index cc8401980125d..23ad052528dbe 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -379,6 +379,10 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	}
 
 	sun6i_spi_write(sspi, SUN6I_CLK_CTL_REG, reg);
+	/* Finally enable the bus - doing so before might raise SCK to HIGH */
+	reg = sun6i_spi_read(sspi, SUN6I_GBL_CTL_REG);
+	reg |= SUN6I_GBL_CTL_BUS_ENABLE;
+	sun6i_spi_write(sspi, SUN6I_GBL_CTL_REG, reg);
 
 	/* Setup the transfer now... */
 	if (sspi->tx_buf)
@@ -504,7 +508,7 @@ static int sun6i_spi_runtime_resume(struct device *dev)
 	}
 
 	sun6i_spi_write(sspi, SUN6I_GBL_CTL_REG,
-			SUN6I_GBL_CTL_BUS_ENABLE | SUN6I_GBL_CTL_MASTER | SUN6I_GBL_CTL_TP);
+			SUN6I_GBL_CTL_MASTER | SUN6I_GBL_CTL_TP);
 
 	return 0;
 
-- 
GitLab


From a336dc8f683e5be794186b5643cd34cb28dd2c53 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 18 Jun 2021 22:14:11 +0800
Subject: [PATCH 3611/3804] regulator: da9052: Ensure enough delay time for
 .set_voltage_time_sel

Use DIV_ROUND_UP to prevent truncation by integer division issue.
This ensures we return enough delay time.

Also fix returning negative value when new_sel < old_sel.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210618141412.4014912-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/da9052-regulator.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/da9052-regulator.c b/drivers/regulator/da9052-regulator.c
index e18d291c7f21c..23fa429ebe760 100644
--- a/drivers/regulator/da9052-regulator.c
+++ b/drivers/regulator/da9052-regulator.c
@@ -250,7 +250,8 @@ static int da9052_regulator_set_voltage_time_sel(struct regulator_dev *rdev,
 	case DA9052_ID_BUCK3:
 	case DA9052_ID_LDO2:
 	case DA9052_ID_LDO3:
-		ret = (new_sel - old_sel) * info->step_uV / 6250;
+		ret = DIV_ROUND_UP(abs(new_sel - old_sel) * info->step_uV,
+				   6250);
 		break;
 	}
 
-- 
GitLab


From 1aeb1a72f330a0fa21610fb44769cd0e68379418 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 22 Jun 2021 22:15:26 +0800
Subject: [PATCH 3612/3804] regulator: max8893: Select REGMAP_I2C to fix build
 error

Fix build error if REGMAP_I2C is not set.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Link: https://lore.kernel.org/r/20210622141526.472175-1-axel.lin@ingics.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 7c39570b99b09..3a71b020ea1c0 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -581,6 +581,7 @@ config REGULATOR_MAX8660
 config REGULATOR_MAX8893
 	tristate "Maxim 8893 voltage regulator"
 	depends on I2C
+	select REGMAP_I2C
 	help
 	  This driver controls a Maxim 8893 voltage output
 	  regulator via I2C bus.
-- 
GitLab


From ddf275b219ab22bc07c14ac88c290694089dced0 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 22 Jun 2021 15:47:30 +0100
Subject: [PATCH 3613/3804] regulator: bd9576: Fix uninitializes variable
 may_have_irqs

The boolean variable may_have_irqs is not ininitialized and is
only being set to true in the case where chip is ROHM_CHIP_TYPE_BD9576.
Fix this by ininitialized may_have_irqs to false.

Addresses-Coverity: ("Uninitialized scalar variable")
Fixes: e7bf1fa58c46 ("regulator: bd9576: Support error reporting")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/20210622144730.22821-1-colin.king@canonical.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd9576-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/bd9576-regulator.c b/drivers/regulator/bd9576-regulator.c
index 8b54d88827bee..e16c3727db7ad 100644
--- a/drivers/regulator/bd9576-regulator.c
+++ b/drivers/regulator/bd9576-regulator.c
@@ -897,7 +897,7 @@ static int bd957x_probe(struct platform_device *pdev)
 {
 	int i;
 	unsigned int num_reg_data;
-	bool vout_mode, ddr_sel, may_have_irqs;
+	bool vout_mode, ddr_sel, may_have_irqs = false;
 	struct regmap *regmap;
 	struct bd957x_data *ic_data;
 	struct regulator_config config = { 0 };
-- 
GitLab


From 0f4f58b847b23d79185ad20ecf629c9f913f4f41 Mon Sep 17 00:00:00 2001
From: Jon Lin <jon.lin@rock-chips.com>
Date: Mon, 21 Jun 2021 18:47:56 +0800
Subject: [PATCH 3614/3804] spi: rockchip: add compatible string for rv1126

Add compatible string for rv1126 for potential applications.

Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20210621104800.19088-3-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 52d6259d96eda..bbeed3ae4ee1a 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -922,6 +922,7 @@ static const struct of_device_id rockchip_spi_dt_match[] = {
 	{ .compatible = "rockchip,rk3368-spi", },
 	{ .compatible = "rockchip,rk3399-spi", },
 	{ .compatible = "rockchip,rv1108-spi", },
+	{ .compatible = "rockchip,rv1126-spi", },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, rockchip_spi_dt_match);
-- 
GitLab


From 4a47fcdb5f8b220a396e896a4efed51c13e27d8b Mon Sep 17 00:00:00 2001
From: Jon Lin <jon.lin@rock-chips.com>
Date: Mon, 21 Jun 2021 18:47:57 +0800
Subject: [PATCH 3615/3804] spi: rockchip: Set rx_fifo interrupt waterline base
 on transfer item

The error here is to calculate the width as 8 bits. In fact, 16 bits
should be considered.

Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20210621104800.19088-4-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index bbeed3ae4ee1a..0887b19ef3ad7 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -540,8 +540,8 @@ static int rockchip_spi_config(struct rockchip_spi *rs,
 	 * interrupt exactly when the fifo is full doesn't seem to work,
 	 * so we need the strict inequality here
 	 */
-	if (xfer->len < rs->fifo_len)
-		writel_relaxed(xfer->len - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
+	if ((xfer->len / rs->n_bytes) < rs->fifo_len)
+		writel_relaxed(xfer->len / rs->n_bytes - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
 	else
 		writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
 
-- 
GitLab


From 2758bd093ac35ca5b62dbecfd30dab60e8b59790 Mon Sep 17 00:00:00 2001
From: Jon Lin <jon.lin@rock-chips.com>
Date: Mon, 21 Jun 2021 18:47:58 +0800
Subject: [PATCH 3616/3804] spi: rockchip: Wait for STB status in slave mode
 tx_xfer

After ROCKCHIP_SPI_VER2_TYPE2, SR->STB is a more accurate judgment
bit for spi slave transmition.

Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20210621104800.19088-5-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 0887b19ef3ad7..950d3bce443bf 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -116,13 +116,14 @@
 #define BAUDR_SCKDV_MIN				2
 #define BAUDR_SCKDV_MAX				65534
 
-/* Bit fields in SR, 5bit */
-#define SR_MASK						0x1f
+/* Bit fields in SR, 6bit */
+#define SR_MASK						0x3f
 #define SR_BUSY						(1 << 0)
 #define SR_TF_FULL					(1 << 1)
 #define SR_TF_EMPTY					(1 << 2)
 #define SR_RF_EMPTY					(1 << 3)
 #define SR_RF_FULL					(1 << 4)
+#define SR_SLAVE_TX_BUSY				(1 << 5)
 
 /* Bit fields in ISR, IMR, ISR, RISR, 5bit */
 #define INT_MASK					0x1f
@@ -197,13 +198,19 @@ static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable)
 	writel_relaxed((enable ? 1U : 0U), rs->regs + ROCKCHIP_SPI_SSIENR);
 }
 
-static inline void wait_for_idle(struct rockchip_spi *rs)
+static inline void wait_for_tx_idle(struct rockchip_spi *rs, bool slave_mode)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies(5);
 
 	do {
-		if (!(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY))
-			return;
+		if (slave_mode) {
+			if (!(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_SLAVE_TX_BUSY) &&
+			    !((readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY)))
+				return;
+		} else {
+			if (!(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY))
+				return;
+		}
 	} while (!time_after(jiffies, timeout));
 
 	dev_warn(rs->dev, "spi controller is in busy state!\n");
@@ -383,7 +390,7 @@ static void rockchip_spi_dma_txcb(void *data)
 		return;
 
 	/* Wait until the FIFO data completely. */
-	wait_for_idle(rs);
+	wait_for_tx_idle(rs, ctlr->slave);
 
 	spi_enable_chip(rs, false);
 	spi_finalize_current_transfer(ctlr);
@@ -545,7 +552,7 @@ static int rockchip_spi_config(struct rockchip_spi *rs,
 	else
 		writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
 
-	writel_relaxed(rs->fifo_len / 2, rs->regs + ROCKCHIP_SPI_DMATDLR);
+	writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_DMATDLR);
 	writel_relaxed(rockchip_spi_calc_burst_size(xfer->len / rs->n_bytes) - 1,
 		       rs->regs + ROCKCHIP_SPI_DMARDLR);
 	writel_relaxed(dmacr, rs->regs + ROCKCHIP_SPI_DMACR);
-- 
GitLab


From b8d423711d1870c5e1280d5bbb0639fe6638a60e Mon Sep 17 00:00:00 2001
From: Jon Lin <jon.lin@rock-chips.com>
Date: Mon, 21 Jun 2021 18:48:47 +0800
Subject: [PATCH 3617/3804] spi: rockchip: Support cs-gpio

1.Add standard cs-gpio support
2.Refer to spi-controller.yaml for details

Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20210621104848.19539-1-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 950d3bce443bf..fbd750b1d28e2 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -157,7 +157,8 @@
  */
 #define ROCKCHIP_SPI_MAX_TRANLEN		0xffff
 
-#define ROCKCHIP_SPI_MAX_CS_NUM			2
+/* 2 for native cs, 2 for cs-gpio */
+#define ROCKCHIP_SPI_MAX_CS_NUM			4
 #define ROCKCHIP_SPI_VER2_TYPE1			0x05EC0002
 #define ROCKCHIP_SPI_VER2_TYPE2			0x00110002
 
@@ -245,11 +246,15 @@ static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 		/* Keep things powered as long as CS is asserted */
 		pm_runtime_get_sync(rs->dev);
 
-		ROCKCHIP_SPI_SET_BITS(rs->regs + ROCKCHIP_SPI_SER,
-				      BIT(spi->chip_select));
+		if (spi->cs_gpiod)
+			ROCKCHIP_SPI_SET_BITS(rs->regs + ROCKCHIP_SPI_SER, 1);
+		else
+			ROCKCHIP_SPI_SET_BITS(rs->regs + ROCKCHIP_SPI_SER, BIT(spi->chip_select));
 	} else {
-		ROCKCHIP_SPI_CLR_BITS(rs->regs + ROCKCHIP_SPI_SER,
-				      BIT(spi->chip_select));
+		if (spi->cs_gpiod)
+			ROCKCHIP_SPI_CLR_BITS(rs->regs + ROCKCHIP_SPI_SER, 1);
+		else
+			ROCKCHIP_SPI_CLR_BITS(rs->regs + ROCKCHIP_SPI_SER, BIT(spi->chip_select));
 
 		/* Drop reference from when we first asserted CS */
 		pm_runtime_put(rs->dev);
-- 
GitLab


From 736b81e075172f1e6cd7a8bc1a1374a2dee9e4dc Mon Sep 17 00:00:00 2001
From: Jon Lin <jon.lin@rock-chips.com>
Date: Mon, 21 Jun 2021 18:48:48 +0800
Subject: [PATCH 3618/3804] spi: rockchip: Support SPI_CS_HIGH

1.Add standard spi-cs-high support
2.Refer to spi-controller.yaml for details

Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20210621104848.19539-2-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rockchip.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index fbd750b1d28e2..540861ca2ba37 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -107,6 +107,8 @@
 #define CR0_OPM_MASTER				0x0
 #define CR0_OPM_SLAVE				0x1
 
+#define CR0_SOI_OFFSET				23
+
 #define CR0_MTM_OFFSET				0x21
 
 /* Bit fields in SER, 2bit */
@@ -236,7 +238,7 @@ static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	struct spi_controller *ctlr = spi->controller;
 	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
-	bool cs_asserted = !enable;
+	bool cs_asserted = spi->mode & SPI_CS_HIGH ? enable : !enable;
 
 	/* Return immediately for no-op */
 	if (cs_asserted == rs->cs_asserted[spi->chip_select])
@@ -507,6 +509,8 @@ static int rockchip_spi_config(struct rockchip_spi *rs,
 	cr0 |= (spi->mode & 0x3U) << CR0_SCPH_OFFSET;
 	if (spi->mode & SPI_LSB_FIRST)
 		cr0 |= CR0_FBM_LSB << CR0_FBM_OFFSET;
+	if (spi->mode & SPI_CS_HIGH)
+		cr0 |= BIT(spi->chip_select) << CR0_SOI_OFFSET;
 
 	if (xfer->rx_buf && xfer->tx_buf)
 		cr0 |= CR0_XFM_TR << CR0_XFM_OFFSET;
@@ -795,6 +799,14 @@ static int rockchip_spi_probe(struct platform_device *pdev)
 		ctlr->can_dma = rockchip_spi_can_dma;
 	}
 
+	switch (readl_relaxed(rs->regs + ROCKCHIP_SPI_VERSION)) {
+	case ROCKCHIP_SPI_VER2_TYPE2:
+		ctlr->mode_bits |= SPI_CS_HIGH;
+		break;
+	default:
+		break;
+	}
+
 	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to register controller\n");
-- 
GitLab


From c58c7e9bf55ced301fdd9c8c1841361cc5fc8458 Mon Sep 17 00:00:00 2001
From: Jon Lin <jon.lin@rock-chips.com>
Date: Mon, 21 Jun 2021 18:47:55 +0800
Subject: [PATCH 3619/3804] spi: spi-rockchip: add description for rv1126

The description below will be used for rv1126.dtsi or compatible one in
the future

Signed-off-by: Jon Lin <jon.lin@rock-chips.com>
Link: https://lore.kernel.org/r/20210621104800.19088-2-jon.lin@rock-chips.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/devicetree/bindings/spi/spi-rockchip.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml
index 1e6cf29e63881..7f987e79337c8 100644
--- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml
@@ -33,6 +33,7 @@ properties:
               - rockchip,rk3328-spi
               - rockchip,rk3368-spi
               - rockchip,rk3399-spi
+              - rockchip,rv1126-spi
           - const: rockchip,rk3066-spi
 
   reg:
-- 
GitLab


From 52218fcd61cb42bde0d301db4acb3ffdf3463cc7 Mon Sep 17 00:00:00 2001
From: Zhenyu Ye <yezhenyu2@huawei.com>
Date: Wed, 23 Jun 2021 15:05:22 +0800
Subject: [PATCH 3620/3804] arm64: tlb: fix the TTL value of tlb_get_level

The TTL field indicates the level of page table walk holding the *leaf*
entry for the address being invalidated. But currently, the TTL field
may be set to an incorrent value in the following stack:

pte_free_tlb
    __pte_free_tlb
        tlb_remove_table
            tlb_table_invalidate
                tlb_flush_mmu_tlbonly
                    tlb_flush

In this case, we just want to flush a PTE page, but the tlb->cleared_pmds
is set and we get tlb_level = 2 in the tlb_get_level() function. This may
cause some unexpected problems.

This patch set the TTL field to 0 if tlb->freed_tables is set. The
tlb->freed_tables indicates page table pages are freed, not the leaf
entry.

Cc: <stable@vger.kernel.org> # 5.9.x
Fixes: c4ab2cbc1d87 ("arm64: tlb: Set the TTL field in flush_tlb_range")
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: ZhuRui <zhurui3@huawei.com>
Signed-off-by: Zhenyu Ye <yezhenyu2@huawei.com>
Link: https://lore.kernel.org/r/b80ead47-1f88-3a00-18e1-cacc22f54cc4@huawei.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/include/asm/tlb.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 61c97d3b58c70..c995d1f4594f6 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -28,6 +28,10 @@ static void tlb_flush(struct mmu_gather *tlb);
  */
 static inline int tlb_get_level(struct mmu_gather *tlb)
 {
+	/* The TTL field is only valid for the leaf entry. */
+	if (tlb->freed_tables)
+		return 0;
+
 	if (tlb->cleared_ptes && !(tlb->cleared_pmds ||
 				   tlb->cleared_puds ||
 				   tlb->cleared_p4ds))
-- 
GitLab


From 29176edd6e7ad7333d0bb19a309b2104fa4f4341 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 23 Jun 2021 11:58:42 +0200
Subject: [PATCH 3621/3804] spi: spi-rspi: : use proper DMAENGINE API for
 termination

dmaengine_terminate_all() is deprecated in favor of explicitly saying if
it should be sync or async. Here, we want dmaengine_terminate_sync()
because there is no other synchronization code in the driver to handle
an async case.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20210623095843.3228-2-wsa+renesas@sang-engineering.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-rspi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index e39fd38f5180e..d16ed88802d36 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -618,9 +618,9 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx,
 			ret = -ETIMEDOUT;
 		}
 		if (tx)
-			dmaengine_terminate_all(rspi->ctlr->dma_tx);
+			dmaengine_terminate_sync(rspi->ctlr->dma_tx);
 		if (rx)
-			dmaengine_terminate_all(rspi->ctlr->dma_rx);
+			dmaengine_terminate_sync(rspi->ctlr->dma_rx);
 	}
 
 	rspi_disable_irq(rspi, irq_mask);
@@ -634,7 +634,7 @@ static int rspi_dma_transfer(struct rspi_data *rspi, struct sg_table *tx,
 
 no_dma_tx:
 	if (rx)
-		dmaengine_terminate_all(rspi->ctlr->dma_rx);
+		dmaengine_terminate_sync(rspi->ctlr->dma_rx);
 no_dma_rx:
 	if (ret == -EAGAIN) {
 		dev_warn_once(&rspi->ctlr->dev,
-- 
GitLab


From a26dee29ec04a3f6779684852c36a2a71fd68fd8 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 23 Jun 2021 11:58:43 +0200
Subject: [PATCH 3622/3804] spi: spi-sh-msiof: : use proper DMAENGINE API for
 termination

dmaengine_terminate_all() is deprecated in favor of explicitly saying if
it should be sync or async. Here, we want dmaengine_terminate_sync()
because there is no other synchronization code in the driver to handle
an async case.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Link: https://lore.kernel.org/r/20210623095843.3228-3-wsa+renesas@sang-engineering.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-sh-msiof.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 41ed9ff8fad0d..f88d9acd20d94 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -853,10 +853,10 @@ stop_reset:
 	sh_msiof_spi_stop(p, rx);
 stop_dma:
 	if (tx)
-		dmaengine_terminate_all(p->ctlr->dma_tx);
+		dmaengine_terminate_sync(p->ctlr->dma_tx);
 no_dma_tx:
 	if (rx)
-		dmaengine_terminate_all(p->ctlr->dma_rx);
+		dmaengine_terminate_sync(p->ctlr->dma_rx);
 	sh_msiof_write(p, SIIER, 0);
 	return ret;
 }
-- 
GitLab


From 4c58d922c0877e23cc7d3d7c6bff49b85faaca89 Mon Sep 17 00:00:00 2001
From: Like Xu <like.xu.linux@gmail.com>
Date: Mon, 21 Jun 2021 11:47:10 +0800
Subject: [PATCH 3623/3804] perf/x86/intel: Fix PEBS-via-PT reload base value
 for Extended PEBS

If we use the "PEBS-via-PT" feature on a platform that supports
extended PBES, like this:

    perf record -c 10000 \
    -e '{intel_pt/branch=0/,branch-instructions/aux-output/p}' uname

we will encounter the following call trace:

[  250.906542] unchecked MSR access error: WRMSR to 0x14e1 (tried to write
0x0000000000000000) at rIP: 0xffffffff88073624 (native_write_msr+0x4/0x20)
[  250.920779] Call Trace:
[  250.923508]  intel_pmu_pebs_enable+0x12c/0x190
[  250.928359]  intel_pmu_enable_event+0x346/0x390
[  250.933300]  x86_pmu_start+0x64/0x80
[  250.937231]  x86_pmu_enable+0x16a/0x2f0
[  250.941434]  perf_event_exec+0x144/0x4c0
[  250.945731]  begin_new_exec+0x650/0xbf0
[  250.949933]  load_elf_binary+0x13e/0x1700
[  250.954321]  ? lock_acquire+0xc2/0x390
[  250.958430]  ? bprm_execve+0x34f/0x8a0
[  250.962544]  ? lock_is_held_type+0xa7/0x120
[  250.967118]  ? find_held_lock+0x32/0x90
[  250.971321]  ? sched_clock_cpu+0xc/0xb0
[  250.975527]  bprm_execve+0x33d/0x8a0
[  250.979452]  do_execveat_common.isra.0+0x161/0x1d0
[  250.984673]  __x64_sys_execve+0x33/0x40
[  250.988877]  do_syscall_64+0x3d/0x80
[  250.992806]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  250.998302] RIP: 0033:0x7fbc971d82fb
[  251.002235] Code: Unable to access opcode bytes at RIP 0x7fbc971d82d1.
[  251.009303] RSP: 002b:00007fffb8aed808 EFLAGS: 00000202 ORIG_RAX: 000000000000003b
[  251.017478] RAX: ffffffffffffffda RBX: 00007fffb8af2f00 RCX: 00007fbc971d82fb
[  251.025187] RDX: 00005574792aac50 RSI: 00007fffb8af2f00 RDI: 00007fffb8aed810
[  251.032901] RBP: 00007fffb8aed970 R08: 0000000000000020 R09: 00007fbc9725c8b0
[  251.040613] R10: 6d6c61632f6d6f63 R11: 0000000000000202 R12: 00005574792aac50
[  251.048327] R13: 00007fffb8af35f0 R14: 00005574792aafdf R15: 00005574792aafe7

This is because the target reload msr address is calculated
based on the wrong base msr and the target reload msr value
is accessed from ds->pebs_event_reset[] with the wrong offset.

According to Intel SDM Table 2-14, for extended PBES feature,
the reload msr for MSR_IA32_FIXED_CTRx should be based on
MSR_RELOAD_FIXED_CTRx.

For fixed counters, let's fix it by overriding the reload msr
address and its value, thus avoiding out-of-bounds access.

Fixes: 42880f726c66("perf/x86/intel: Support PEBS output to PT")
Signed-off-by: Like Xu <likexu@tencent.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210621034710.31107-1-likexu@tencent.com
---
 arch/x86/events/intel/ds.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 1ec8fd311f380..8647713276a73 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1187,6 +1187,9 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct debug_store *ds = cpuc->ds;
+	u64 value = ds->pebs_event_reset[hwc->idx];
+	u32 base = MSR_RELOAD_PMC0;
+	unsigned int idx = hwc->idx;
 
 	if (!is_pebs_pt(event))
 		return;
@@ -1196,7 +1199,12 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
 
 	cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
 
-	wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
+	if (hwc->idx >= INTEL_PMC_IDX_FIXED) {
+		base = MSR_RELOAD_FIXED_CTR0;
+		idx = hwc->idx - INTEL_PMC_IDX_FIXED;
+		value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx];
+	}
+	wrmsrl(base + idx, value);
 }
 
 void intel_pmu_pebs_enable(struct perf_event *event)
@@ -1204,6 +1212,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct debug_store *ds = cpuc->ds;
+	unsigned int idx = hwc->idx;
 
 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
@@ -1222,19 +1231,18 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 		}
 	}
 
+	if (idx >= INTEL_PMC_IDX_FIXED)
+		idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
+
 	/*
 	 * Use auto-reload if possible to save a MSR write in the PMI.
 	 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
 	 */
 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
-		unsigned int idx = hwc->idx;
-
-		if (idx >= INTEL_PMC_IDX_FIXED)
-			idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED);
 		ds->pebs_event_reset[idx] =
 			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
 	} else {
-		ds->pebs_event_reset[hwc->idx] = 0;
+		ds->pebs_event_reset[idx] = 0;
 	}
 
 	intel_pmu_pebs_via_pt_enable(event);
-- 
GitLab


From ee72a94ea4a6d8fa304a506859cd07ecdc0cf5c4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 18 Jun 2021 08:12:52 -0700
Subject: [PATCH 3624/3804] perf/x86/intel: Fix fixed counter check warning for
 some Alder Lake

For some Alder Lake machine, the below fixed counter check warning may be
triggered.

[    2.010766] hw perf events fixed 5 > max(4), clipping!

Current perf unconditionally increases the number of the GP counters and
the fixed counters for a big core PMU on an Alder Lake system, because
the number enumerated in the CPUID only reflects the common counters.
The big core may has more counters. However, Alder Lake may have an
alternative configuration. With that configuration,
the X86_FEATURE_HYBRID_CPU is not set. The number of the GP counters and
fixed counters enumerated in the CPUID is accurate. Perf mistakenly
increases the number of counters. The warning is triggered.

Directly use the enumerated value on the system with the alternative
configuration.

Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support")
Reported-by: Jin Yao <yao.jin@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/1624029174-122219-2-git-send-email-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2521d03de5e02..d39991b93f4a3 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6157,8 +6157,13 @@ __init int intel_pmu_init(void)
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
 		pmu->name = "cpu_core";
 		pmu->cpu_type = hybrid_big;
-		pmu->num_counters = x86_pmu.num_counters + 2;
-		pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
+		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
+			pmu->num_counters = x86_pmu.num_counters + 2;
+			pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
+		} else {
+			pmu->num_counters = x86_pmu.num_counters;
+			pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+		}
 		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
 		pmu->unconstrained = (struct event_constraint)
 					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-- 
GitLab


From d18216fafecf2a3a7c2b97086892269d6ab3cd5e Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 18 Jun 2021 08:12:53 -0700
Subject: [PATCH 3625/3804] perf/x86/intel: Add more events requires FRONTEND
 MSR on Sapphire Rapids

On Sapphire Rapids, there are two more events 0x40ad and 0x04c2 which
rely on the FRONTEND MSR. If the FRONTEND MSR is not set correctly, the
count value is not correct.

Update intel_spr_extra_regs[] to support them.

Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids")
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/1624029174-122219-3-git-send-email-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d39991b93f4a3..e442b5542edd6 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -280,6 +280,8 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
 	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
 	INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+	INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
+	INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
 	EVENT_EXTRA_END
 };
 
-- 
GitLab


From 1d5c7880992a06679585e7e568cc679c0c5fd4f2 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 18 Jun 2021 08:12:54 -0700
Subject: [PATCH 3626/3804] perf/x86/intel: Fix instructions:ppp support in
 Sapphire Rapids

Perf errors out when sampling instructions:ppp.

$ perf record -e instructions:ppp -- true
Error:
The sys_perf_event_open() syscall returned with 22 (Invalid argument)
for event (instructions:ppp).

The instruction PDIR is only available on the fixed counter 0. The event
constraint has been updated to fixed0_constraint in
icl_get_event_constraints(). The Sapphire Rapids codes unconditionally
error out for the event which is not available on the GP counter 0.

Make the instructions:ppp an exception.

Fixes: 61b985e3e775 ("perf/x86/intel: Add perf core PMU support for Sapphire Rapids")
Reported-by: Yasin, Ahmad <ahmad.yasin@intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/1624029174-122219-4-git-send-email-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index e442b5542edd6..e355db5da0973 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4032,8 +4032,10 @@ spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	 * The :ppp indicates the Precise Distribution (PDist) facility, which
 	 * is only supported on the GP counter 0. If a :ppp event which is not
 	 * available on the GP counter 0, error out.
+	 * Exception: Instruction PDIR is only available on the fixed counter 0.
 	 */
-	if (event->attr.precise_ip == 3) {
+	if ((event->attr.precise_ip == 3) &&
+	    !constraint_match(&fixed0_constraint, event->hw.config)) {
 		if (c->idxmsk64 & BIT_ULL(0))
 			return &counter0_constraint;
 
-- 
GitLab


From 012669c740e6e2afa8bdb95394d06676f933dd2d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 22 Jun 2021 16:21:01 +0200
Subject: [PATCH 3627/3804] perf: Fix task context PMU for Hetero

On HETEROGENEOUS hardware (ARM big.Little, Intel Alderlake etc.) each
CPU might have a different hardware PMU. Since each such PMU is
represented by a different struct pmu, but we only have a single HW
task context.

That means that the task context needs to switch PMU type when it
switches CPUs.

Not doing this means that ctx->pmu calls (pmu_{dis,en}able(),
{start,commit,cancel}_txn() etc.) are called against the wrong PMU and
things will go wobbly.

Fixes: f83d2f91d259 ("perf/x86/intel: Add Alder Lake Hybrid support")
Reported-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lkml.kernel.org/r/YMsy7BuGT8nBTspT@hirez.programming.kicks-ass.net
---
 kernel/events/core.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6c964dee2cd7e..0e125ae2fa92f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3822,9 +3822,16 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
 					struct task_struct *task)
 {
 	struct perf_cpu_context *cpuctx;
-	struct pmu *pmu = ctx->pmu;
+	struct pmu *pmu;
 
 	cpuctx = __get_cpu_context(ctx);
+
+	/*
+	 * HACK: for HETEROGENEOUS the task context might have switched to a
+	 * different PMU, force (re)set the context,
+	 */
+	pmu = ctx->pmu = cpuctx->ctx.pmu;
+
 	if (cpuctx->task_ctx == ctx) {
 		if (cpuctx->sched_cb_usage)
 			__perf_pmu_sched_task(cpuctx, true);
-- 
GitLab


From 5dca69e26fe97f17d4a6cbd6872103c868577b14 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 23 Jun 2021 16:14:21 +0300
Subject: [PATCH 3628/3804] software node: Handle software node injection to an
 existing device properly

The function software_node_notify() - the function that creates
and removes the symlinks between the node and the device - was
called unconditionally in device_add_software_node() and
device_remove_software_node(), but it needs to be called in
those functions only in the special case where the node is
added to a device that has already been registered.

This fixes NULL pointer dereference that happens if
device_remove_software_node() is used with device that was
never registered.

Fixes: b622b24519f5 ("software node: Allow node addition to already existing device")
Reported-and-tested-by: Dominik Brodowski <linux@dominikbrodowski.net>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/swnode.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
index 3cc11b813f28c..d1f1a82401207 100644
--- a/drivers/base/swnode.c
+++ b/drivers/base/swnode.c
@@ -1045,7 +1045,15 @@ int device_add_software_node(struct device *dev, const struct software_node *nod
 	}
 
 	set_secondary_fwnode(dev, &swnode->fwnode);
-	software_node_notify(dev, KOBJ_ADD);
+
+	/*
+	 * If the device has been fully registered by the time this function is
+	 * called, software_node_notify() must be called separately so that the
+	 * symlinks get created and the reference count of the node is kept in
+	 * balance.
+	 */
+	if (device_is_registered(dev))
+		software_node_notify(dev, KOBJ_ADD);
 
 	return 0;
 }
@@ -1065,7 +1073,8 @@ void device_remove_software_node(struct device *dev)
 	if (!swnode)
 		return;
 
-	software_node_notify(dev, KOBJ_REMOVE);
+	if (device_is_registered(dev))
+		software_node_notify(dev, KOBJ_REMOVE);
 	set_secondary_fwnode(dev, NULL);
 	kobject_put(&swnode->kobj);
 }
@@ -1119,8 +1128,7 @@ int software_node_notify(struct device *dev, unsigned long action)
 
 	switch (action) {
 	case KOBJ_ADD:
-		ret = sysfs_create_link_nowarn(&dev->kobj, &swnode->kobj,
-					       "software_node");
+		ret = sysfs_create_link(&dev->kobj, &swnode->kobj, "software_node");
 		if (ret)
 			break;
 
-- 
GitLab


From 3b7180573c250eb6e2a7eec54ae91f27472332ea Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 22 Jun 2021 21:11:39 +0200
Subject: [PATCH 3629/3804] cpufreq: Make cpufreq_online() call
 driver->offline() on errors

In the CPU removal path the ->offline() callback provided by the
driver is always invoked before ->exit(), but in the cpufreq_online()
error path it is not, so ->exit() is expected to somehow know the
context in which it has been called and act accordingly.

That is less than straightforward, so make cpufreq_online() invoke
the driver's ->offline() callback, if present, on errors before
->exit() too.

This only potentially affects intel_pstate.

Fixes: 91a12e91dc39 ("cpufreq: Allow light-weight tear down and bring up of CPUs")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 802abc925b2ae..cbab834c37a03 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1367,9 +1367,14 @@ static int cpufreq_online(unsigned int cpu)
 			goto out_free_policy;
 		}
 
+		/*
+		 * The initialization has succeeded and the policy is online.
+		 * If there is a problem with its frequency table, take it
+		 * offline and drop it.
+		 */
 		ret = cpufreq_table_validate_and_sort(policy);
 		if (ret)
-			goto out_exit_policy;
+			goto out_offline_policy;
 
 		/* related_cpus should at least include policy->cpus. */
 		cpumask_copy(policy->related_cpus, policy->cpus);
@@ -1515,6 +1520,10 @@ out_destroy_policy:
 
 	up_write(&policy->rwsem);
 
+out_offline_policy:
+	if (cpufreq_driver->offline)
+		cpufreq_driver->offline(policy);
+
 out_exit_policy:
 	if (cpufreq_driver->exit)
 		cpufreq_driver->exit(policy);
-- 
GitLab


From 8c37d01e1a86073d15ea7084390fba58d9a1665f Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 17 Jun 2021 15:05:43 +0900
Subject: [PATCH 3630/3804] PM / devfreq: passive: Fix get_target_freq when not
 using required-opp

The 86ad9a24f21e ("PM / devfreq: Add required OPPs support to passive governor")
supported the required-opp property for using devfreq passive governor.
But, 86ad9a24f21e has caused the problem on use-case when required-opp
is not used such as exynos-bus.c devfreq driver. So that fix the
get_target_freq of passive governor for supporting the case of when
required-opp is not used.

Fixes: 86ad9a24f21e ("PM / devfreq: Add required OPPs support to passive governor")
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/governor_passive.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c
index b094132bd20b3..fc09324a03e03 100644
--- a/drivers/devfreq/governor_passive.c
+++ b/drivers/devfreq/governor_passive.c
@@ -65,7 +65,7 @@ static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
 		dev_pm_opp_put(p_opp);
 
 		if (IS_ERR(opp))
-			return PTR_ERR(opp);
+			goto no_required_opp;
 
 		*freq = dev_pm_opp_get_freq(opp);
 		dev_pm_opp_put(opp);
@@ -73,6 +73,7 @@ static int devfreq_passive_get_target_freq(struct devfreq *devfreq,
 		return 0;
 	}
 
+no_required_opp:
 	/*
 	 * Get the OPP table's index of decided frequency by governor
 	 * of parent device.
-- 
GitLab


From 7f049fbdd57f6ea71dc741d903c19c73b2f70950 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 11 Jun 2021 15:03:16 +0200
Subject: [PATCH 3631/3804] perf/x86/intel/lbr: Zero the xstate buffer on
 allocation

XRSTORS requires a valid xstate buffer to work correctly. XSAVES does not
guarantee to write a fully valid buffer according to the SDM:

  "XSAVES does not write to any parts of the XSAVE header other than the
   XSTATE_BV and XCOMP_BV fields."

XRSTORS triggers a #GP:

  "If bytes 63:16 of the XSAVE header are not all zero."

It's dubious at best how this can work at all when the buffer is not zeroed
before use.

Allocate the buffers with __GFP_ZERO to prevent XRSTORS failure.

Fixes: ce711ea3cab9 ("perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/87wnr0wo2z.ffs@nanos.tec.linutronix.de
---
 arch/x86/events/intel/lbr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 4409d2cccfda5..e8453de7a9648 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -731,7 +731,8 @@ void reserve_lbr_buffers(void)
 		if (!kmem_cache || cpuc->lbr_xsave)
 			continue;
 
-		cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL,
+		cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache,
+							GFP_KERNEL | __GFP_ZERO,
 							cpu_to_node(cpu));
 	}
 }
-- 
GitLab


From 533d87fbb82583d37e4af7bbab26d070523b48ee Mon Sep 17 00:00:00 2001
From: kernel test robot <rong.a.chen@intel.com>
Date: Tue, 15 Jun 2021 09:31:03 +0800
Subject: [PATCH 3632/3804] crypto: sl3516 - fix duplicated inclusion

drivers/crypto/gemini/sl3516-ce-cipher.c: linux/io.h is included more than once.

Generated by: scripts/checkincludes.pl

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: kernel test robot <lkp@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/gemini/sl3516-ce-cipher.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
index 0b34a4971e498..b41c2f5fc495a 100644
--- a/drivers/crypto/gemini/sl3516-ce-cipher.c
+++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
@@ -12,7 +12,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
 #include <linux/io.h>
-#include <linux/io.h>
 #include <linux/pm_runtime.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/skcipher.h>
-- 
GitLab


From d886d55f4c7345ea1628ecc49eaea3f496f8d3cb Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Jun 2021 14:14:52 +0800
Subject: [PATCH 3633/3804] crypto: sa2ul - Remove unused auth_len variable

This patch removes the unused auth_len variable from
sa_aead_dma_in_callback.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/sa2ul.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 51bb69bc573c3..544d7040cfc58 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -1698,7 +1698,6 @@ static void sa_aead_dma_in_callback(void *data)
 	size_t pl, ml;
 	int i;
 	int err = 0;
-	u16 auth_len;
 	u32 *mdptr;
 
 	sa_sync_from_device(rxd);
@@ -1711,13 +1710,10 @@ static void sa_aead_dma_in_callback(void *data)
 	for (i = 0; i < (authsize / 4); i++)
 		mdptr[i + 4] = swab32(mdptr[i + 4]);
 
-	auth_len = req->assoclen + req->cryptlen;
-
 	if (rxd->enc) {
 		scatterwalk_map_and_copy(&mdptr[4], req->dst, start, authsize,
 					 1);
 	} else {
-		auth_len -= authsize;
 		start -= authsize;
 		scatterwalk_map_and_copy(auth_tag, req->src, start, authsize,
 					 0);
-- 
GitLab


From 84c2c729eabda52a2f6caa087d51f0d7420bca0c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 15 Jun 2021 11:11:53 +0100
Subject: [PATCH 3634/3804] crypto: hisilicon/sec - Fix spelling mistake
 "fallbcak" -> "fallback"

There is a spelling mistake in a dev_err message. Fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index f23af61661dea..6a45bd23b3635 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -2290,7 +2290,7 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx,
 
 	/* Kunpeng920 aead mode not support input 0 size */
 	if (!a_ctx->fallback_aead_tfm) {
-		dev_err(dev, "aead fallbcak tfm is NULL!\n");
+		dev_err(dev, "aead fallback tfm is NULL!\n");
 		return -EINVAL;
 	}
 
-- 
GitLab


From 74c66120fda6596ad57f41e1607b3a5d51ca143d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 16 Jun 2021 13:34:59 -0700
Subject: [PATCH 3635/3804] crypto: nx - Fix memcpy() over-reading in nonce

Fix typo in memcpy() where size should be CTR_RFC3686_NONCE_SIZE.

Fixes: 030f4e968741 ("crypto: nx - Fix reentrancy bugs")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/nx/nx-aes-ctr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c
index 13f518802343d..6120e350ff71d 100644
--- a/drivers/crypto/nx/nx-aes-ctr.c
+++ b/drivers/crypto/nx/nx-aes-ctr.c
@@ -118,7 +118,7 @@ static int ctr3686_aes_nx_crypt(struct skcipher_request *req)
 	struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
 	u8 iv[16];
 
-	memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE);
+	memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_NONCE_SIZE);
 	memcpy(iv + CTR_RFC3686_NONCE_SIZE, req->iv, CTR_RFC3686_IV_SIZE);
 	iv[12] = iv[13] = iv[14] = 0;
 	iv[15] = 1;
-- 
GitLab


From 5163ab505e489400b4738b2a5547ec83d2dff7bb Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 17 Jun 2021 15:28:10 +0800
Subject: [PATCH 3636/3804] crypto: api - Move crypto attr definitions out of
 crypto.h

The definitions for crypto_attr-related types and enums are not
needed by most Crypto API users.  This patch moves them out of
crypto.h and into algapi.h/internal.h depending on the extent of
their use.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/internal.h       | 12 ++++++++++++
 include/crypto/algapi.h |  9 +++++++++
 include/linux/crypto.h  | 21 ---------------------
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/crypto/internal.h b/crypto/internal.h
index 976ec9dfc76db..f00869af689f5 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -29,6 +29,18 @@ struct crypto_larval {
 	u32 mask;
 };
 
+enum {
+	CRYPTOA_UNSPEC,
+	CRYPTOA_ALG,
+	CRYPTOA_TYPE,
+	__CRYPTOA_MAX,
+};
+
+#define CRYPTOA_MAX (__CRYPTOA_MAX - 1)
+
+/* Maximum number of (rtattr) parameters for each template. */
+#define CRYPTO_MAX_ATTRS 32
+
 extern struct list_head crypto_alg_list;
 extern struct rw_semaphore crypto_alg_sem;
 extern struct blocking_notifier_head crypto_chain;
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 41d42e649da4f..5f6841c73e5a7 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -96,6 +96,15 @@ struct scatter_walk {
 	unsigned int offset;
 };
 
+struct crypto_attr_alg {
+	char name[CRYPTO_MAX_ALG_NAME];
+};
+
+struct crypto_attr_type {
+	u32 type;
+	u32 mask;
+};
+
 void crypto_mod_put(struct crypto_alg *alg);
 
 int crypto_register_template(struct crypto_template *tmpl);
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 3b9263d6122fd..855869e1fd327 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -643,27 +643,6 @@ struct crypto_comp {
 	struct crypto_tfm base;
 };
 
-enum {
-	CRYPTOA_UNSPEC,
-	CRYPTOA_ALG,
-	CRYPTOA_TYPE,
-	__CRYPTOA_MAX,
-};
-
-#define CRYPTOA_MAX (__CRYPTOA_MAX - 1)
-
-/* Maximum number of (rtattr) parameters for each template. */
-#define CRYPTO_MAX_ATTRS 32
-
-struct crypto_attr_alg {
-	char name[CRYPTO_MAX_ALG_NAME];
-};
-
-struct crypto_attr_type {
-	u32 type;
-	u32 mask;
-};
-
 /* 
  * Transform user interface.
  */
-- 
GitLab


From 2a96726bd0ccde4f12b9b9a9f61f7b1ac5af7e10 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 17 Jun 2021 15:57:12 +0800
Subject: [PATCH 3637/3804] crypto: nx - Fix RCU warning in nx842_OF_upd_status

The function nx842_OF_upd_status triggers a sparse RCU warning when
it directly dereferences the RCU-protected devdata.  This appears
to be an accident as there was another variable of the same name
that was passed in from the caller.

After it was removed (because the main purpose of using it, to
update the status member was itself removed) the global variable
unintenionally stood in as its replacement.

This patch restores the devdata parameter.

Fixes: 90fd73f912f0 ("crypto: nx - remove pSeries NX 'status' field")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/nx/nx-842-pseries.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index 67caff73f058f..1491cbfbc071c 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -538,13 +538,15 @@ static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
  * The status field indicates if the device is enabled when the status
  * is 'okay'.  Otherwise the device driver will be disabled.
  *
+ * @devdata: struct nx842_devdata to use for dev_info
  * @prop: struct property point containing the maxsyncop for the update
  *
  * Returns:
  *  0 - Device is available
  *  -ENODEV - Device is not available
  */
-static int nx842_OF_upd_status(struct property *prop)
+static int nx842_OF_upd_status(struct nx842_devdata *devdata,
+			       struct property *prop)
 {
 	const char *status = (const char *)prop->value;
 
@@ -757,7 +759,7 @@ static int nx842_OF_upd(struct property *new_prop)
 		goto out;
 
 	/* Perform property updates */
-	ret = nx842_OF_upd_status(status);
+	ret = nx842_OF_upd_status(new_devdata, status);
 	if (ret)
 		goto error_out;
 
-- 
GitLab


From b20d9a73a3b2a859d32ae569588557bc47c87a1e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 17 Jun 2021 16:00:12 +0800
Subject: [PATCH 3638/3804] crypto: nx - Fix numerous sparse byte-order
 warnings

The nx driver started out its life as a BE-only driver.  However,
somewhere along the way LE support was partially added.  This never
seems to have been extended all the way but it does trigger numerous
warnings during build.

This patch fixes all those warnings, but it doesn't mean that the
driver will work on LE.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/nx/nx-aes-cbc.c |  2 +-
 drivers/crypto/nx/nx-aes-ccm.c |  4 ++--
 drivers/crypto/nx/nx-aes-ctr.c |  2 +-
 drivers/crypto/nx/nx-aes-ecb.c |  2 +-
 drivers/crypto/nx/nx-aes-gcm.c |  2 +-
 drivers/crypto/nx/nx-sha256.c  | 19 ++++++++++++-------
 drivers/crypto/nx/nx-sha512.c  | 19 ++++++++++++-------
 drivers/crypto/nx/nx_csbcpb.h  |  4 ++--
 8 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c
index d6314ea9ae896..0e440f704a8f9 100644
--- a/drivers/crypto/nx/nx-aes-cbc.c
+++ b/drivers/crypto/nx/nx-aes-cbc.c
@@ -88,7 +88,7 @@ static int cbc_aes_nx_crypt(struct skcipher_request *req,
 
 		memcpy(req->iv, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE);
 		atomic_inc(&(nx_ctx->stats->aes_ops));
-		atomic64_add(csbcpb->csb.processed_byte_count,
+		atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
 			     &(nx_ctx->stats->aes_bytes));
 
 		processed += to_process;
diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c
index e7384d1075739..3793885f928dd 100644
--- a/drivers/crypto/nx/nx-aes-ccm.c
+++ b/drivers/crypto/nx/nx-aes-ccm.c
@@ -391,7 +391,7 @@ static int ccm_nx_decrypt(struct aead_request   *req,
 
 		/* update stats */
 		atomic_inc(&(nx_ctx->stats->aes_ops));
-		atomic64_add(csbcpb->csb.processed_byte_count,
+		atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
 			     &(nx_ctx->stats->aes_bytes));
 
 		processed += to_process;
@@ -460,7 +460,7 @@ static int ccm_nx_encrypt(struct aead_request   *req,
 
 		/* update stats */
 		atomic_inc(&(nx_ctx->stats->aes_ops));
-		atomic64_add(csbcpb->csb.processed_byte_count,
+		atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
 			     &(nx_ctx->stats->aes_bytes));
 
 		processed += to_process;
diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c
index 6120e350ff71d..dfa3ad1a12f28 100644
--- a/drivers/crypto/nx/nx-aes-ctr.c
+++ b/drivers/crypto/nx/nx-aes-ctr.c
@@ -102,7 +102,7 @@ static int ctr_aes_nx_crypt(struct skcipher_request *req, u8 *iv)
 		memcpy(iv, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE);
 
 		atomic_inc(&(nx_ctx->stats->aes_ops));
-		atomic64_add(csbcpb->csb.processed_byte_count,
+		atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
 			     &(nx_ctx->stats->aes_bytes));
 
 		processed += to_process;
diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c
index 7a729dc2bc17a..502a565074e98 100644
--- a/drivers/crypto/nx/nx-aes-ecb.c
+++ b/drivers/crypto/nx/nx-aes-ecb.c
@@ -86,7 +86,7 @@ static int ecb_aes_nx_crypt(struct skcipher_request *req,
 			goto out;
 
 		atomic_inc(&(nx_ctx->stats->aes_ops));
-		atomic64_add(csbcpb->csb.processed_byte_count,
+		atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
 			     &(nx_ctx->stats->aes_bytes));
 
 		processed += to_process;
diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index fc9baca13920c..4a796318b4306 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -382,7 +382,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc,
 		NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
 
 		atomic_inc(&(nx_ctx->stats->aes_ops));
-		atomic64_add(csbcpb->csb.processed_byte_count,
+		atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
 			     &(nx_ctx->stats->aes_bytes));
 
 		processed += to_process;
diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c
index b0ad665e4bda8..c3bebf0feabe1 100644
--- a/drivers/crypto/nx/nx-sha256.c
+++ b/drivers/crypto/nx/nx-sha256.c
@@ -16,6 +16,11 @@
 #include "nx_csbcpb.h"
 #include "nx.h"
 
+struct sha256_state_be {
+	__be32 state[SHA256_DIGEST_SIZE / 4];
+	u64 count;
+	u8 buf[SHA256_BLOCK_SIZE];
+};
 
 static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm)
 {
@@ -36,7 +41,7 @@ static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm)
 }
 
 static int nx_sha256_init(struct shash_desc *desc) {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state_be *sctx = shash_desc_ctx(desc);
 
 	memset(sctx, 0, sizeof *sctx);
 
@@ -56,7 +61,7 @@ static int nx_sha256_init(struct shash_desc *desc) {
 static int nx_sha256_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state_be *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
 	struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
 	struct nx_sg *out_sg;
@@ -175,7 +180,7 @@ out:
 
 static int nx_sha256_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state_be *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
 	struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
 	struct nx_sg *in_sg, *out_sg;
@@ -245,7 +250,7 @@ out:
 
 static int nx_sha256_export(struct shash_desc *desc, void *out)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state_be *sctx = shash_desc_ctx(desc);
 
 	memcpy(out, sctx, sizeof(*sctx));
 
@@ -254,7 +259,7 @@ static int nx_sha256_export(struct shash_desc *desc, void *out)
 
 static int nx_sha256_import(struct shash_desc *desc, const void *in)
 {
-	struct sha256_state *sctx = shash_desc_ctx(desc);
+	struct sha256_state_be *sctx = shash_desc_ctx(desc);
 
 	memcpy(sctx, in, sizeof(*sctx));
 
@@ -268,8 +273,8 @@ struct shash_alg nx_shash_sha256_alg = {
 	.final      = nx_sha256_final,
 	.export     = nx_sha256_export,
 	.import     = nx_sha256_import,
-	.descsize   = sizeof(struct sha256_state),
-	.statesize  = sizeof(struct sha256_state),
+	.descsize   = sizeof(struct sha256_state_be),
+	.statesize  = sizeof(struct sha256_state_be),
 	.base       = {
 		.cra_name        = "sha256",
 		.cra_driver_name = "sha256-nx",
diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c
index c29103a1a0b6c..1ffb40d2c3245 100644
--- a/drivers/crypto/nx/nx-sha512.c
+++ b/drivers/crypto/nx/nx-sha512.c
@@ -15,6 +15,11 @@
 #include "nx_csbcpb.h"
 #include "nx.h"
 
+struct sha512_state_be {
+	__be64 state[SHA512_DIGEST_SIZE / 8];
+	u64 count[2];
+	u8 buf[SHA512_BLOCK_SIZE];
+};
 
 static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm)
 {
@@ -36,7 +41,7 @@ static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm)
 
 static int nx_sha512_init(struct shash_desc *desc)
 {
-	struct sha512_state *sctx = shash_desc_ctx(desc);
+	struct sha512_state_be *sctx = shash_desc_ctx(desc);
 
 	memset(sctx, 0, sizeof *sctx);
 
@@ -56,7 +61,7 @@ static int nx_sha512_init(struct shash_desc *desc)
 static int nx_sha512_update(struct shash_desc *desc, const u8 *data,
 			    unsigned int len)
 {
-	struct sha512_state *sctx = shash_desc_ctx(desc);
+	struct sha512_state_be *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
 	struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
 	struct nx_sg *out_sg;
@@ -178,7 +183,7 @@ out:
 
 static int nx_sha512_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha512_state *sctx = shash_desc_ctx(desc);
+	struct sha512_state_be *sctx = shash_desc_ctx(desc);
 	struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
 	struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
 	struct nx_sg *in_sg, *out_sg;
@@ -251,7 +256,7 @@ out:
 
 static int nx_sha512_export(struct shash_desc *desc, void *out)
 {
-	struct sha512_state *sctx = shash_desc_ctx(desc);
+	struct sha512_state_be *sctx = shash_desc_ctx(desc);
 
 	memcpy(out, sctx, sizeof(*sctx));
 
@@ -260,7 +265,7 @@ static int nx_sha512_export(struct shash_desc *desc, void *out)
 
 static int nx_sha512_import(struct shash_desc *desc, const void *in)
 {
-	struct sha512_state *sctx = shash_desc_ctx(desc);
+	struct sha512_state_be *sctx = shash_desc_ctx(desc);
 
 	memcpy(sctx, in, sizeof(*sctx));
 
@@ -274,8 +279,8 @@ struct shash_alg nx_shash_sha512_alg = {
 	.final      = nx_sha512_final,
 	.export     = nx_sha512_export,
 	.import     = nx_sha512_import,
-	.descsize   = sizeof(struct sha512_state),
-	.statesize  = sizeof(struct sha512_state),
+	.descsize   = sizeof(struct sha512_state_be),
+	.statesize  = sizeof(struct sha512_state_be),
 	.base       = {
 		.cra_name        = "sha512",
 		.cra_driver_name = "sha512-nx",
diff --git a/drivers/crypto/nx/nx_csbcpb.h b/drivers/crypto/nx/nx_csbcpb.h
index 493f8490ff942..e64f7e36fb929 100644
--- a/drivers/crypto/nx/nx_csbcpb.h
+++ b/drivers/crypto/nx/nx_csbcpb.h
@@ -140,8 +140,8 @@ struct cop_status_block {
 	u8 crb_seq_number;
 	u8 completion_code;
 	u8 completion_extension;
-	u32 processed_byte_count;
-	u64 address;
+	__be32 processed_byte_count;
+	__be64 address;
 } __packed;
 
 /* Nest accelerator workbook section 4.4 */
-- 
GitLab


From f873a4d650399ba5af20460f650fa7ea530cbf9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com>
Date: Thu, 17 Jun 2021 12:19:26 +0300
Subject: [PATCH 3639/3804] MAINTAINERS: update caam crypto driver maintainers
 list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Aymen steps down as caam maintainer, being replaced by Pankaj.

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 388924c2d23ac..690e54bf7e236 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7159,7 +7159,7 @@ F:	include/video/
 
 FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
 M:	Horia Geantă <horia.geanta@nxp.com>
-M:	Aymen Sghaier <aymen.sghaier@nxp.com>
+M:	Pankaj Gupta <pankaj.gupta@nxp.com>
 L:	linux-crypto@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/crypto/fsl-sec4.txt
-- 
GitLab


From cac6f1b87b1f7feafb7db349a2b1ca86634bc950 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 18 Jun 2021 17:35:51 +0800
Subject: [PATCH 3640/3804] crypto: sl3516 - Fix build warning without
 CONFIG_PM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

drivers/crypto/gemini/sl3516-ce-core.c:345:12:
 warning: ‘sl3516_ce_pm_resume’ defined but not used [-Wunused-function]
 static int sl3516_ce_pm_resume(struct device *dev)
            ^~~~~~~~~~~~~~~~~~~

The driver needs PM, otherwise clock and resets are never set.
So make it depends on PM to fix this warning.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Suggested-by: LABBE Corentin <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 99b0907901788..6f14f39d32e3e 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -273,6 +273,7 @@ config CRYPTO_DEV_SL3516
 	select CRYPTO_ECB
 	select CRYPTO_AES
 	select HW_RANDOM
+	depends on PM
 	help
 	  This option allows you to have support for SL3516 crypto offloader.
 
-- 
GitLab


From d18344c0d095df544bd7174b8fae2cba523dd4a4 Mon Sep 17 00:00:00 2001
From: Wenkai Lin <linwenkai6@hisilicon.com>
Date: Fri, 18 Jun 2021 17:36:06 +0800
Subject: [PATCH 3641/3804] crypto: hisilicon/qm - implement for querying
 hardware tasks status.

This patch adds a function hisi_qm_is_q_updated to
check if the task is ready in hardware queue when
user polls an UACCE queue.This prevents users from
repeatedly querying whether the accelerator has
completed tasks, which wastes CPU resources.

Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 580709408cfc7..1d67f94a1d568 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -2926,6 +2926,23 @@ static void hisi_qm_uacce_stop_queue(struct uacce_queue *q)
 	hisi_qm_stop_qp(q->priv);
 }
 
+static int hisi_qm_is_q_updated(struct uacce_queue *q)
+{
+	struct hisi_qp *qp = q->priv;
+	struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
+	int updated = 0;
+
+	while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
+		/* make sure to read data from memory */
+		dma_rmb();
+		qm_cq_head_update(qp);
+		cqe = qp->cqe + qp->qp_status.cq_head;
+		updated = 1;
+	}
+
+	return updated;
+}
+
 static void qm_set_sqctype(struct uacce_queue *q, u16 type)
 {
 	struct hisi_qm *qm = q->uacce->priv;
@@ -2971,6 +2988,7 @@ static const struct uacce_ops uacce_qm_ops = {
 	.stop_queue = hisi_qm_uacce_stop_queue,
 	.mmap = hisi_qm_uacce_mmap,
 	.ioctl = hisi_qm_uacce_ioctl,
+	.is_q_updated = hisi_qm_is_q_updated,
 };
 
 static int qm_alloc_uacce(struct hisi_qm *qm)
-- 
GitLab


From e31694e0a7a709293319475d8001e05e31f2178c Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 23 Jun 2021 10:42:28 -0500
Subject: [PATCH 3642/3804] objtool: Don't make .altinstructions writable

When objtool creates the .altinstructions section, it sets the SHF_WRITE
flag to make the section writable -- unless the section had already been
previously created by the kernel.  The mismatch between kernel-created
and objtool-created section flags can cause failures with external
tooling (kpatch-build).  And the section doesn't need to be writable
anyway.

Make the section flags consistent with the kernel's.

Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls")
Reported-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/6c284ae89717889ea136f9f0064d914cd8329d31.1624462939.git.jpoimboe@redhat.com
---
 tools/objtool/arch/x86/decode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 523aa4157f801..bc821056aba90 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -684,7 +684,7 @@ static int elf_add_alternative(struct elf *elf,
 	sec = find_section_by_name(elf, ".altinstructions");
 	if (!sec) {
 		sec = elf_create_section(elf, ".altinstructions",
-					 SHF_WRITE, size, 0);
+					 SHF_ALLOC, size, 0);
 
 		if (!sec) {
 			WARN_ELF("elf_create_section");
-- 
GitLab


From 9f38b678ffc4e2ccf167a1131c0403dc4f5e1bb7 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 21 Jun 2021 18:59:26 +0000
Subject: [PATCH 3643/3804] crypto: sl3516 - depends on HAS_IOMEM

The sl3516 driver need to depend on HAS_IOMEM.
This fixes a build error:
ERROR: modpost: "devm_platform_ioremap_resource" [drivers/crypto/gemini/sl3516-ce.ko] undefined!

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 6f14f39d32e3e..ebcec460c0457 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -268,6 +268,7 @@ config CRYPTO_DEV_NIAGARA2
 
 config CRYPTO_DEV_SL3516
 	tristate "Stormlink SL3516 crypto offloader"
+	depends on HAS_IOMEM
 	select CRYPTO_SKCIPHER
 	select CRYPTO_ENGINE
 	select CRYPTO_ECB
-- 
GitLab


From f4183717b370ad28dd0c0d74760142b20e6e7931 Mon Sep 17 00:00:00 2001
From: Huaixin Chang <changhuaixin@linux.alibaba.com>
Date: Mon, 21 Jun 2021 17:27:58 +0800
Subject: [PATCH 3644/3804] sched/fair: Introduce the burstable CFS controller

The CFS bandwidth controller limits CPU requests of a task group to
quota during each period. However, parallel workloads might be bursty
so that they get throttled even when their average utilization is under
quota. And they are latency sensitive at the same time so that
throttling them is undesired.

We borrow time now against our future underrun, at the cost of increased
interference against the other system users. All nicely bounded.

Traditional (UP-EDF) bandwidth control is something like:

  (U = \Sum u_i) <= 1

This guaranteeds both that every deadline is met and that the system is
stable. After all, if U were > 1, then for every second of walltime,
we'd have to run more than a second of program time, and obviously miss
our deadline, but the next deadline will be further out still, there is
never time to catch up, unbounded fail.

This work observes that a workload doesn't always executes the full
quota; this enables one to describe u_i as a statistical distribution.

For example, have u_i = {x,e}_i, where x is the p(95) and x+e p(100)
(the traditional WCET). This effectively allows u to be smaller,
increasing the efficiency (we can pack more tasks in the system), but at
the cost of missing deadlines when all the odds line up. However, it
does maintain stability, since every overrun must be paired with an
underrun as long as our x is above the average.

That is, suppose we have 2 tasks, both specify a p(95) value, then we
have a p(95)*p(95) = 90.25% chance both tasks are within their quota and
everything is good. At the same time we have a p(5)p(5) = 0.25% chance
both tasks will exceed their quota at the same time (guaranteed deadline
fail). Somewhere in between there's a threshold where one exceeds and
the other doesn't underrun enough to compensate; this depends on the
specific CDFs.

At the same time, we can say that the worst case deadline miss, will be
\Sum e_i; that is, there is a bounded tardiness (under the assumption
that x+e is indeed WCET).

The benefit of burst is seen when testing with schbench. Default value of
kernel.sched_cfs_bandwidth_slice_us(5ms) and CONFIG_HZ(1000) is used.

	mkdir /sys/fs/cgroup/cpu/test
	echo $$ > /sys/fs/cgroup/cpu/test/cgroup.procs
	echo 100000 > /sys/fs/cgroup/cpu/test/cpu.cfs_quota_us
	echo 100000 > /sys/fs/cgroup/cpu/test/cpu.cfs_burst_us

	./schbench -m 1 -t 3 -r 20 -c 80000 -R 10

The average CPU usage is at 80%. I run this for 10 times, and got long tail
latency for 6 times and got throttled for 8 times.

Tail latencies are shown below, and it wasn't the worst case.

	Latency percentiles (usec)
		50.0000th: 19872
		75.0000th: 21344
		90.0000th: 22176
		95.0000th: 22496
		*99.0000th: 22752
		99.5000th: 22752
		99.9000th: 22752
		min=0, max=22727
	rps: 9.90 p95 (usec) 22496 p99 (usec) 22752 p95/cputime 28.12% p99/cputime 28.44%

The interferenece when using burst is valued by the possibilities for
missing the deadline and the average WCET. Test results showed that when
there many cgroups or CPU is under utilized, the interference is
limited. More details are shown in:
https://lore.kernel.org/lkml/5371BD36-55AE-4F71-B9D7-B86DC32E3D2B@linux.alibaba.com/

Co-developed-by: Shanpei Chen <shanpeic@linux.alibaba.com>
Signed-off-by: Shanpei Chen <shanpeic@linux.alibaba.com>
Co-developed-by: Tianchen Ding <dtcccc@linux.alibaba.com>
Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com>
Signed-off-by: Huaixin Chang <changhuaixin@linux.alibaba.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ben Segall <bsegall@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20210621092800.23714-2-changhuaixin@linux.alibaba.com
---
 kernel/sched/core.c  | 68 ++++++++++++++++++++++++++++++++++++++++----
 kernel/sched/fair.c  | 14 ++++++---
 kernel/sched/sched.h |  1 +
 3 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fc231d61bcda8..2883c22eef102 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9780,7 +9780,8 @@ static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC;
 
 static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
 
-static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
+static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
+				u64 burst)
 {
 	int i, ret = 0, runtime_enabled, runtime_was_enabled;
 	struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
@@ -9810,6 +9811,10 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 	if (quota != RUNTIME_INF && quota > max_cfs_runtime)
 		return -EINVAL;
 
+	if (quota != RUNTIME_INF && (burst > quota ||
+				     burst + quota > max_cfs_runtime))
+		return -EINVAL;
+
 	/*
 	 * Prevent race between setting of cfs_rq->runtime_enabled and
 	 * unthrottle_offline_cfs_rqs().
@@ -9831,6 +9836,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
 	raw_spin_lock_irq(&cfs_b->lock);
 	cfs_b->period = ns_to_ktime(period);
 	cfs_b->quota = quota;
+	cfs_b->burst = burst;
 
 	__refill_cfs_bandwidth_runtime(cfs_b);
 
@@ -9864,9 +9870,10 @@ out_unlock:
 
 static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
 {
-	u64 quota, period;
+	u64 quota, period, burst;
 
 	period = ktime_to_ns(tg->cfs_bandwidth.period);
+	burst = tg->cfs_bandwidth.burst;
 	if (cfs_quota_us < 0)
 		quota = RUNTIME_INF;
 	else if ((u64)cfs_quota_us <= U64_MAX / NSEC_PER_USEC)
@@ -9874,7 +9881,7 @@ static int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
 	else
 		return -EINVAL;
 
-	return tg_set_cfs_bandwidth(tg, period, quota);
+	return tg_set_cfs_bandwidth(tg, period, quota, burst);
 }
 
 static long tg_get_cfs_quota(struct task_group *tg)
@@ -9892,15 +9899,16 @@ static long tg_get_cfs_quota(struct task_group *tg)
 
 static int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
 {
-	u64 quota, period;
+	u64 quota, period, burst;
 
 	if ((u64)cfs_period_us > U64_MAX / NSEC_PER_USEC)
 		return -EINVAL;
 
 	period = (u64)cfs_period_us * NSEC_PER_USEC;
 	quota = tg->cfs_bandwidth.quota;
+	burst = tg->cfs_bandwidth.burst;
 
-	return tg_set_cfs_bandwidth(tg, period, quota);
+	return tg_set_cfs_bandwidth(tg, period, quota, burst);
 }
 
 static long tg_get_cfs_period(struct task_group *tg)
@@ -9913,6 +9921,30 @@ static long tg_get_cfs_period(struct task_group *tg)
 	return cfs_period_us;
 }
 
+static int tg_set_cfs_burst(struct task_group *tg, long cfs_burst_us)
+{
+	u64 quota, period, burst;
+
+	if ((u64)cfs_burst_us > U64_MAX / NSEC_PER_USEC)
+		return -EINVAL;
+
+	burst = (u64)cfs_burst_us * NSEC_PER_USEC;
+	period = ktime_to_ns(tg->cfs_bandwidth.period);
+	quota = tg->cfs_bandwidth.quota;
+
+	return tg_set_cfs_bandwidth(tg, period, quota, burst);
+}
+
+static long tg_get_cfs_burst(struct task_group *tg)
+{
+	u64 burst_us;
+
+	burst_us = tg->cfs_bandwidth.burst;
+	do_div(burst_us, NSEC_PER_USEC);
+
+	return burst_us;
+}
+
 static s64 cpu_cfs_quota_read_s64(struct cgroup_subsys_state *css,
 				  struct cftype *cft)
 {
@@ -9937,6 +9969,18 @@ static int cpu_cfs_period_write_u64(struct cgroup_subsys_state *css,
 	return tg_set_cfs_period(css_tg(css), cfs_period_us);
 }
 
+static u64 cpu_cfs_burst_read_u64(struct cgroup_subsys_state *css,
+				  struct cftype *cft)
+{
+	return tg_get_cfs_burst(css_tg(css));
+}
+
+static int cpu_cfs_burst_write_u64(struct cgroup_subsys_state *css,
+				   struct cftype *cftype, u64 cfs_burst_us)
+{
+	return tg_set_cfs_burst(css_tg(css), cfs_burst_us);
+}
+
 struct cfs_schedulable_data {
 	struct task_group *tg;
 	u64 period, quota;
@@ -10089,6 +10133,11 @@ static struct cftype cpu_legacy_files[] = {
 		.read_u64 = cpu_cfs_period_read_u64,
 		.write_u64 = cpu_cfs_period_write_u64,
 	},
+	{
+		.name = "cfs_burst_us",
+		.read_u64 = cpu_cfs_burst_read_u64,
+		.write_u64 = cpu_cfs_burst_write_u64,
+	},
 	{
 		.name = "stat",
 		.seq_show = cpu_cfs_stat_show,
@@ -10254,12 +10303,13 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
 {
 	struct task_group *tg = css_tg(of_css(of));
 	u64 period = tg_get_cfs_period(tg);
+	u64 burst = tg_get_cfs_burst(tg);
 	u64 quota;
 	int ret;
 
 	ret = cpu_period_quota_parse(buf, &period, &quota);
 	if (!ret)
-		ret = tg_set_cfs_bandwidth(tg, period, quota);
+		ret = tg_set_cfs_bandwidth(tg, period, quota, burst);
 	return ret ?: nbytes;
 }
 #endif
@@ -10286,6 +10336,12 @@ static struct cftype cpu_files[] = {
 		.seq_show = cpu_max_show,
 		.write = cpu_max_write,
 	},
+	{
+		.name = "max.burst",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.read_u64 = cpu_cfs_burst_read_u64,
+		.write_u64 = cpu_cfs_burst_write_u64,
+	},
 #endif
 #ifdef CONFIG_UCLAMP_TASK_GROUP
 	{
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7b8990fd48962..4a3e61a88acce 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4626,8 +4626,11 @@ static inline u64 sched_cfs_bandwidth_slice(void)
  */
 void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
 {
-	if (cfs_b->quota != RUNTIME_INF)
-		cfs_b->runtime = cfs_b->quota;
+	if (unlikely(cfs_b->quota == RUNTIME_INF))
+		return;
+
+	cfs_b->runtime += cfs_b->quota;
+	cfs_b->runtime = min(cfs_b->runtime, cfs_b->quota + cfs_b->burst);
 }
 
 static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -4988,6 +4991,9 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u
 	throttled = !list_empty(&cfs_b->throttled_cfs_rq);
 	cfs_b->nr_periods += overrun;
 
+	/* Refill extra burst quota even if cfs_b->idle */
+	__refill_cfs_bandwidth_runtime(cfs_b);
+
 	/*
 	 * idle depends on !throttled (for the case of a large deficit), and if
 	 * we're going inactive then everything else can be deferred
@@ -4995,8 +5001,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u
 	if (cfs_b->idle && !throttled)
 		goto out_deactivate;
 
-	__refill_cfs_bandwidth_runtime(cfs_b);
-
 	if (!throttled) {
 		/* mark as potentially idle for the upcoming period */
 		cfs_b->idle = 1;
@@ -5246,6 +5250,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
 			if (new < max_cfs_quota_period) {
 				cfs_b->period = ns_to_ktime(new);
 				cfs_b->quota *= 2;
+				cfs_b->burst *= 2;
 
 				pr_warn_ratelimited(
 	"cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n",
@@ -5277,6 +5282,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
 	cfs_b->runtime = 0;
 	cfs_b->quota = RUNTIME_INF;
 	cfs_b->period = ns_to_ktime(default_cfs_period());
+	cfs_b->burst = 0;
 
 	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
 	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 01e48f682d54e..c80d42e9589bf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -366,6 +366,7 @@ struct cfs_bandwidth {
 	ktime_t			period;
 	u64			quota;
 	u64			runtime;
+	u64			burst;
 	s64			hierarchical_quota;
 
 	u8			idle;
-- 
GitLab


From 8f91efd870ea5d8bc10b0fcc9740db51cd4c0c83 Mon Sep 17 00:00:00 2001
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Date: Fri, 11 Jun 2021 08:29:34 +0800
Subject: [PATCH 3645/3804] psi: Fix race between psi_trigger_create/destroy

Race detected between psi_trigger_destroy/create as shown below, which
cause panic by accessing invalid psi_system->poll_wait->wait_queue_entry
and psi_system->poll_timer->entry->next. Under this modification, the
race window is removed by initialising poll_wait and poll_timer in
group_init which are executed only once at beginning.

  psi_trigger_destroy()                   psi_trigger_create()

  mutex_lock(trigger_lock);
  rcu_assign_pointer(poll_task, NULL);
  mutex_unlock(trigger_lock);
					  mutex_lock(trigger_lock);
					  if (!rcu_access_pointer(group->poll_task)) {
					    timer_setup(poll_timer, poll_timer_fn, 0);
					    rcu_assign_pointer(poll_task, task);
					  }
					  mutex_unlock(trigger_lock);

  synchronize_rcu();
  del_timer_sync(poll_timer); <-- poll_timer has been reinitialized by
                                  psi_trigger_create()

So, trigger_lock/RCU correctly protects destruction of
group->poll_task but misses this race affecting poll_timer and
poll_wait.

Fixes: 461daba06bdc ("psi: eliminate kthread_worker from psi trigger scheduling mechanism")
Co-developed-by: ziwei.dai <ziwei.dai@unisoc.com>
Signed-off-by: ziwei.dai <ziwei.dai@unisoc.com>
Co-developed-by: ke.wang <ke.wang@unisoc.com>
Signed-off-by: ke.wang <ke.wang@unisoc.com>
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Link: https://lkml.kernel.org/r/1623371374-15664-1-git-send-email-huangzhaoyang@gmail.com
---
 kernel/sched/psi.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index cc25a3cff41fb..58b36d17a09a0 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -182,6 +182,8 @@ struct psi_group psi_system = {
 
 static void psi_avgs_work(struct work_struct *work);
 
+static void poll_timer_fn(struct timer_list *t);
+
 static void group_init(struct psi_group *group)
 {
 	int cpu;
@@ -201,6 +203,8 @@ static void group_init(struct psi_group *group)
 	memset(group->polling_total, 0, sizeof(group->polling_total));
 	group->polling_next_update = ULLONG_MAX;
 	group->polling_until = 0;
+	init_waitqueue_head(&group->poll_wait);
+	timer_setup(&group->poll_timer, poll_timer_fn, 0);
 	rcu_assign_pointer(group->poll_task, NULL);
 }
 
@@ -1157,9 +1161,7 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 			return ERR_CAST(task);
 		}
 		atomic_set(&group->poll_wakeup, 0);
-		init_waitqueue_head(&group->poll_wait);
 		wake_up_process(task);
-		timer_setup(&group->poll_timer, poll_timer_fn, 0);
 		rcu_assign_pointer(group->poll_task, task);
 	}
 
@@ -1211,6 +1213,7 @@ static void psi_trigger_destroy(struct kref *ref)
 					group->poll_task,
 					lockdep_is_held(&group->trigger_lock));
 			rcu_assign_pointer(group->poll_task, NULL);
+			del_timer(&group->poll_timer);
 		}
 	}
 
@@ -1223,17 +1226,14 @@ static void psi_trigger_destroy(struct kref *ref)
 	 */
 	synchronize_rcu();
 	/*
-	 * Destroy the kworker after releasing trigger_lock to prevent a
+	 * Stop kthread 'psimon' after releasing trigger_lock to prevent a
 	 * deadlock while waiting for psi_poll_work to acquire trigger_lock
 	 */
 	if (task_to_destroy) {
 		/*
 		 * After the RCU grace period has expired, the worker
 		 * can no longer be found through group->poll_task.
-		 * But it might have been already scheduled before
-		 * that - deschedule it cleanly before destroying it.
 		 */
-		del_timer_sync(&group->poll_timer);
 		kthread_stop(task_to_destroy);
 	}
 	kfree(t);
-- 
GitLab


From 2309a05d2abe713f7debc951640b010370c8befb Mon Sep 17 00:00:00 2001
From: Beata Michalska <beata.michalska@arm.com>
Date: Thu, 3 Jun 2021 15:06:25 +0100
Subject: [PATCH 3646/3804] sched/core: Introduce SD_ASYM_CPUCAPACITY_FULL
 sched_domain flag

Introducing new, complementary to SD_ASYM_CPUCAPACITY, sched_domain
topology flag, to distinguish between shed_domains where any CPU
capacity asymmetry is detected (SD_ASYM_CPUCAPACITY) and ones where
a full set of CPU capacities is visible to all domain members
(SD_ASYM_CPUCAPACITY_FULL).

With the distinction between full and partial CPU capacity asymmetry,
brought in by the newly introduced flag, the scope of the original
SD_ASYM_CPUCAPACITY flag gets shifted, still maintaining the existing
behaviour when one is detected on a given sched domain, allowing
misfit migrations within sched domains that do not observe full range
of CPU capacities but still do have members with different capacity
values. It loses though it's meaning when it comes to the lowest CPU
asymmetry sched_domain level per-cpu pointer, which is to be now
denoted by SD_ASYM_CPUCAPACITY_FULL flag.

Signed-off-by: Beata Michalska <beata.michalska@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/20210603140627.8409-2-beata.michalska@arm.com
---
 include/linux/sched/sd_flags.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
index 34b21e971d77b..57bde66d95f7a 100644
--- a/include/linux/sched/sd_flags.h
+++ b/include/linux/sched/sd_flags.h
@@ -90,6 +90,16 @@ SD_FLAG(SD_WAKE_AFFINE, SDF_SHARED_CHILD)
  */
 SD_FLAG(SD_ASYM_CPUCAPACITY, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
 
+/*
+ * Domain members have different CPU capacities spanning all unique CPU
+ * capacity values.
+ *
+ * SHARED_PARENT: Set from the topmost domain down to the first domain where
+ *		  all available CPU capacities are visible
+ * NEEDS_GROUPS: Per-CPU capacity is asymmetric between groups.
+ */
+SD_FLAG(SD_ASYM_CPUCAPACITY_FULL, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
+
 /*
  * Domain members share CPU capacity (i.e. SMT)
  *
-- 
GitLab


From c744dc4ab58d1c09624ca3397cf15b142a0e0cb7 Mon Sep 17 00:00:00 2001
From: Beata Michalska <beata.michalska@arm.com>
Date: Thu, 3 Jun 2021 15:06:26 +0100
Subject: [PATCH 3647/3804] sched/topology: Rework CPU capacity asymmetry
 detection

Currently the CPU capacity asymmetry detection, performed through
asym_cpu_capacity_level, tries to identify the lowest topology level
at which the highest CPU capacity is being observed, not necessarily
finding the level at which all possible capacity values are visible
to all CPUs, which might be bit problematic for some possible/valid
asymmetric topologies i.e.:

DIE      [                                ]
MC       [                       ][       ]

CPU       [0] [1] [2] [3] [4] [5]  [6] [7]
Capacity  |.....| |.....| |.....|  |.....|
	     L	     M       B        B

Where:
 arch_scale_cpu_capacity(L) = 512
 arch_scale_cpu_capacity(M) = 871
 arch_scale_cpu_capacity(B) = 1024

In this particular case, the asymmetric topology level will point
at MC, as all possible CPU masks for that level do cover the CPU
with the highest capacity. It will work just fine for the first
cluster, not so much for the second one though (consider the
find_energy_efficient_cpu which might end up attempting the energy
aware wake-up for a domain that does not see any asymmetry at all)

Rework the way the capacity asymmetry levels are being detected,
allowing to point to the lowest topology level (for a given CPU), where
full set of available CPU capacities is visible to all CPUs within given
domain. As a result, the per-cpu sd_asym_cpucapacity might differ across
the domains. This will have an impact on EAS wake-up placement in a way
that it might see different range of CPUs to be considered, depending on
the given current and target CPUs.

Additionally, those levels, where any range of asymmetry (not
necessarily full) is being detected will get identified as well.
The selected asymmetric topology level will be denoted by
SD_ASYM_CPUCAPACITY_FULL sched domain flag whereas the 'sub-levels'
would receive the already used SD_ASYM_CPUCAPACITY flag. This allows
maintaining the current behaviour for asymmetric topologies, with
misfit migration operating correctly on lower levels, if applicable,
as any asymmetry is enough to trigger the misfit migration.
The logic there relies on the SD_ASYM_CPUCAPACITY flag and does not
relate to the full asymmetry level denoted by the sd_asym_cpucapacity
pointer.

Detecting the CPU capacity asymmetry is being based on a set of
available CPU capacities for all possible CPUs. This data is being
generated upon init and updated once CPU topology changes are being
detected (through arch_update_cpu_topology). As such, any changes
to identified CPU capacities (like initializing cpufreq) need to be
explicitly advertised by corresponding archs to trigger rebuilding
the data.

Additional -dflags- parameter, used when building sched domains, has
been removed as well, as the asymmetry flags are now being set directly
in sd_init.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Suggested-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Beata Michalska <beata.michalska@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Valentin Schneider <valentin.schneider@arm.com>
Link: https://lore.kernel.org/r/20210603140627.8409-3-beata.michalska@arm.com
---
 kernel/sched/topology.c | 209 +++++++++++++++++++++++++---------------
 1 file changed, 131 insertions(+), 78 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 053115b55f89f..b77ad49dc14f6 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -675,7 +675,7 @@ static void update_top_cache_domain(int cpu)
 	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
 	rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
 
-	sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY);
+	sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY_FULL);
 	rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
 }
 
@@ -1266,6 +1266,116 @@ next:
 	update_group_capacity(sd, cpu);
 }
 
+/*
+ * Asymmetric CPU capacity bits
+ */
+struct asym_cap_data {
+	struct list_head link;
+	unsigned long capacity;
+	unsigned long cpus[];
+};
+
+/*
+ * Set of available CPUs grouped by their corresponding capacities
+ * Each list entry contains a CPU mask reflecting CPUs that share the same
+ * capacity.
+ * The lifespan of data is unlimited.
+ */
+static LIST_HEAD(asym_cap_list);
+
+#define cpu_capacity_span(asym_data) to_cpumask((asym_data)->cpus)
+
+/*
+ * Verify whether there is any CPU capacity asymmetry in a given sched domain.
+ * Provides sd_flags reflecting the asymmetry scope.
+ */
+static inline int
+asym_cpu_capacity_classify(const struct cpumask *sd_span,
+			   const struct cpumask *cpu_map)
+{
+	struct asym_cap_data *entry;
+	int count = 0, miss = 0;
+
+	/*
+	 * Count how many unique CPU capacities this domain spans across
+	 * (compare sched_domain CPUs mask with ones representing  available
+	 * CPUs capacities). Take into account CPUs that might be offline:
+	 * skip those.
+	 */
+	list_for_each_entry(entry, &asym_cap_list, link) {
+		if (cpumask_intersects(sd_span, cpu_capacity_span(entry)))
+			++count;
+		else if (cpumask_intersects(cpu_map, cpu_capacity_span(entry)))
+			++miss;
+	}
+
+	WARN_ON_ONCE(!count && !list_empty(&asym_cap_list));
+
+	/* No asymmetry detected */
+	if (count < 2)
+		return 0;
+	/* Some of the available CPU capacity values have not been detected */
+	if (miss)
+		return SD_ASYM_CPUCAPACITY;
+
+	/* Full asymmetry */
+	return SD_ASYM_CPUCAPACITY | SD_ASYM_CPUCAPACITY_FULL;
+
+}
+
+static inline void asym_cpu_capacity_update_data(int cpu)
+{
+	unsigned long capacity = arch_scale_cpu_capacity(cpu);
+	struct asym_cap_data *entry = NULL;
+
+	list_for_each_entry(entry, &asym_cap_list, link) {
+		if (capacity == entry->capacity)
+			goto done;
+	}
+
+	entry = kzalloc(sizeof(*entry) + cpumask_size(), GFP_KERNEL);
+	if (WARN_ONCE(!entry, "Failed to allocate memory for asymmetry data\n"))
+		return;
+	entry->capacity = capacity;
+	list_add(&entry->link, &asym_cap_list);
+done:
+	__cpumask_set_cpu(cpu, cpu_capacity_span(entry));
+}
+
+/*
+ * Build-up/update list of CPUs grouped by their capacities
+ * An update requires explicit request to rebuild sched domains
+ * with state indicating CPU topology changes.
+ */
+static void asym_cpu_capacity_scan(void)
+{
+	struct asym_cap_data *entry, *next;
+	int cpu;
+
+	list_for_each_entry(entry, &asym_cap_list, link)
+		cpumask_clear(cpu_capacity_span(entry));
+
+	for_each_cpu_and(cpu, cpu_possible_mask, housekeeping_cpumask(HK_FLAG_DOMAIN))
+		asym_cpu_capacity_update_data(cpu);
+
+	list_for_each_entry_safe(entry, next, &asym_cap_list, link) {
+		if (cpumask_empty(cpu_capacity_span(entry))) {
+			list_del(&entry->link);
+			kfree(entry);
+		}
+	}
+
+	/*
+	 * Only one capacity value has been detected i.e. this system is symmetric.
+	 * No need to keep this data around.
+	 */
+	if (list_is_singular(&asym_cap_list)) {
+		entry = list_first_entry(&asym_cap_list, typeof(*entry), link);
+		list_del(&entry->link);
+		kfree(entry);
+	}
+}
+
 /*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
@@ -1399,11 +1509,12 @@ int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl,
 	const struct cpumask *cpu_map,
-	struct sched_domain *child, int dflags, int cpu)
+	struct sched_domain *child, int cpu)
 {
 	struct sd_data *sdd = &tl->data;
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
 	int sd_id, sd_weight, sd_flags = 0;
+	struct cpumask *sd_span;
 
 #ifdef CONFIG_NUMA
 	/*
@@ -1420,9 +1531,6 @@ sd_init(struct sched_domain_topology_level *tl,
 			"wrong sd_flags in topology description\n"))
 		sd_flags &= TOPOLOGY_SD_FLAGS;
 
-	/* Apply detected topology flags */
-	sd_flags |= dflags;
-
 	*sd = (struct sched_domain){
 		.min_interval		= sd_weight,
 		.max_interval		= 2*sd_weight,
@@ -1454,13 +1562,19 @@ sd_init(struct sched_domain_topology_level *tl,
 #endif
 	};
 
-	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
-	sd_id = cpumask_first(sched_domain_span(sd));
+	sd_span = sched_domain_span(sd);
+	cpumask_and(sd_span, cpu_map, tl->mask(cpu));
+	sd_id = cpumask_first(sd_span);
+
+	sd->flags |= asym_cpu_capacity_classify(sd_span, cpu_map);
+
+	WARN_ONCE((sd->flags & (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY)) ==
+		  (SD_SHARE_CPUCAPACITY | SD_ASYM_CPUCAPACITY),
+		  "CPU capacity asymmetry not supported on SMT\n");
 
 	/*
 	 * Convert topological properties into behaviour.
 	 */
-
 	/* Don't attempt to spread across CPUs of different capacities. */
 	if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child)
 		sd->child->flags &= ~SD_PREFER_SIBLING;
@@ -1926,9 +2040,9 @@ static void __sdt_free(const struct cpumask *cpu_map)
 
 static struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 		const struct cpumask *cpu_map, struct sched_domain_attr *attr,
-		struct sched_domain *child, int dflags, int cpu)
+		struct sched_domain *child, int cpu)
 {
-	struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
+	struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
 
 	if (child) {
 		sd->level = child->level + 1;
@@ -1990,65 +2104,6 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
 	return true;
 }
 
-/*
- * Find the sched_domain_topology_level where all CPU capacities are visible
- * for all CPUs.
- */
-static struct sched_domain_topology_level
-*asym_cpu_capacity_level(const struct cpumask *cpu_map)
-{
-	int i, j, asym_level = 0;
-	bool asym = false;
-	struct sched_domain_topology_level *tl, *asym_tl = NULL;
-	unsigned long cap;
-
-	/* Is there any asymmetry? */
-	cap = arch_scale_cpu_capacity(cpumask_first(cpu_map));
-
-	for_each_cpu(i, cpu_map) {
-		if (arch_scale_cpu_capacity(i) != cap) {
-			asym = true;
-			break;
-		}
-	}
-
-	if (!asym)
-		return NULL;
-
-	/*
-	 * Examine topology from all CPU's point of views to detect the lowest
-	 * sched_domain_topology_level where a highest capacity CPU is visible
-	 * to everyone.
-	 */
-	for_each_cpu(i, cpu_map) {
-		unsigned long max_capacity = arch_scale_cpu_capacity(i);
-		int tl_id = 0;
-
-		for_each_sd_topology(tl) {
-			if (tl_id < asym_level)
-				goto next_level;
-
-			for_each_cpu_and(j, tl->mask(i), cpu_map) {
-				unsigned long capacity;
-
-				capacity = arch_scale_cpu_capacity(j);
-
-				if (capacity <= max_capacity)
-					continue;
-
-				max_capacity = capacity;
-				asym_level = tl_id;
-				asym_tl = tl;
-			}
-next_level:
-			tl_id++;
-		}
-	}
-
-	return asym_tl;
-}
-
-
 /*
  * Build sched domains for a given set of CPUs and attach the sched domains
  * to the individual CPUs
@@ -2061,7 +2116,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	struct s_data d;
 	struct rq *rq = NULL;
 	int i, ret = -ENOMEM;
-	struct sched_domain_topology_level *tl_asym;
 	bool has_asym = false;
 
 	if (WARN_ON(cpumask_empty(cpu_map)))
@@ -2071,24 +2125,19 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	if (alloc_state != sa_rootdomain)
 		goto error;
 
-	tl_asym = asym_cpu_capacity_level(cpu_map);
-
 	/* Set up domains for CPUs specified by the cpu_map: */
 	for_each_cpu(i, cpu_map) {
 		struct sched_domain_topology_level *tl;
-		int dflags = 0;
 
 		sd = NULL;
 		for_each_sd_topology(tl) {
-			if (tl == tl_asym) {
-				dflags |= SD_ASYM_CPUCAPACITY;
-				has_asym = true;
-			}
 
 			if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
 				goto error;
 
-			sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
+			sd = build_sched_domain(tl, cpu_map, attr, sd, i);
+
+			has_asym |= sd->flags & SD_ASYM_CPUCAPACITY;
 
 			if (tl == sched_domain_topology)
 				*per_cpu_ptr(d.sd, i) = sd;
@@ -2217,6 +2266,7 @@ int sched_init_domains(const struct cpumask *cpu_map)
 	zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
 
 	arch_update_cpu_topology();
+	asym_cpu_capacity_scan();
 	ndoms_cur = 1;
 	doms_cur = alloc_sched_domains(ndoms_cur);
 	if (!doms_cur)
@@ -2299,6 +2349,9 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
 
 	/* Let the architecture update CPU core mappings: */
 	new_topology = arch_update_cpu_topology();
+	/* Trigger rebuilding CPU capacity asymmetry data */
+	if (new_topology)
+		asym_cpu_capacity_scan();
 
 	if (!doms_new) {
 		WARN_ON_ONCE(dattr_new);
-- 
GitLab


From adf3c31e18b765ea24eba7b0c1efc076b8ee3d55 Mon Sep 17 00:00:00 2001
From: Beata Michalska <beata.michalska@arm.com>
Date: Thu, 3 Jun 2021 15:06:27 +0100
Subject: [PATCH 3648/3804] sched/doc: Update the CPU capacity asymmetry bits

Update the documentation bits referring to capacity aware scheduling
with regards to newly introduced SD_ASYM_CPUCAPACITY_FULL sched_domain
flag.

Signed-off-by: Beata Michalska <beata.michalska@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <valentin.schneider@arm.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lore.kernel.org/r/20210603140627.8409-4-beata.michalska@arm.com
---
 Documentation/scheduler/sched-capacity.rst | 6 ++++--
 Documentation/scheduler/sched-energy.rst   | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/scheduler/sched-capacity.rst b/Documentation/scheduler/sched-capacity.rst
index 9b7cbe43b2d11..805f85f330b54 100644
--- a/Documentation/scheduler/sched-capacity.rst
+++ b/Documentation/scheduler/sched-capacity.rst
@@ -284,8 +284,10 @@ whether the system exhibits asymmetric CPU capacities. Should that be the
 case:
 
 - The sched_asym_cpucapacity static key will be enabled.
-- The SD_ASYM_CPUCAPACITY flag will be set at the lowest sched_domain level that
-  spans all unique CPU capacity values.
+- The SD_ASYM_CPUCAPACITY_FULL flag will be set at the lowest sched_domain
+  level that spans all unique CPU capacity values.
+- The SD_ASYM_CPUCAPACITY flag will be set for any sched_domain that spans
+  CPUs with any range of asymmetry.
 
 The sched_asym_cpucapacity static key is intended to guard sections of code that
 cater to asymmetric CPU capacity systems. Do note however that said key is
diff --git a/Documentation/scheduler/sched-energy.rst b/Documentation/scheduler/sched-energy.rst
index afe02d3944025..8fbce5e767d98 100644
--- a/Documentation/scheduler/sched-energy.rst
+++ b/Documentation/scheduler/sched-energy.rst
@@ -328,7 +328,7 @@ section lists these dependencies and provides hints as to how they can be met.
 
 As mentioned in the introduction, EAS is only supported on platforms with
 asymmetric CPU topologies for now. This requirement is checked at run-time by
-looking for the presence of the SD_ASYM_CPUCAPACITY flag when the scheduling
+looking for the presence of the SD_ASYM_CPUCAPACITY_FULL flag when the scheduling
 domains are built.
 
 See Documentation/scheduler/sched-capacity.rst for requirements to be met for this
-- 
GitLab


From 309505dd56854c1f9744c9a2b8aa40d897002bca Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Thu, 24 Jun 2021 15:09:31 +0800
Subject: [PATCH 3649/3804] KVM: selftests: Fix mapping length truncation in
 m{,un}map()

max_mem_slots is now declared as uint32_t. The result of (0x200000 * 32767)
is unexpectedly truncated to be 0xffe00000, whilst we actually need to
allocate about, 63GB. Cast max_mem_slots to size_t in both mmap() and
munmap() to fix the length truncation.

We'll otherwise see the failure on arm64 thanks to the access_ok() checking
in __kvm_set_memory_region(), as the unmapped VA happen to go beyond the
task's allowed address space.

 # ./set_memory_region_test
Allowed number of memory slots: 32767
Adding slots 0..32766, each memory region with 2048K size
==== Test Assertion Failure ====
  set_memory_region_test.c:391: ret == 0
  pid=94861 tid=94861 errno=22 - Invalid argument
     1	0x00000000004015a7: test_add_max_memory_regions at set_memory_region_test.c:389
     2	 (inlined by) main at set_memory_region_test.c:426
     3	0x0000ffffb8e67bdf: ?? ??:0
     4	0x00000000004016db: _start at :?
  KVM_SET_USER_MEMORY_REGION IOCTL failed,
  rc: -1 errno: 22 slot: 2615

Fixes: 3bf0fcd75434 ("KVM: selftests: Speed up set_memory_region_test")
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Message-Id: <20210624070931.565-1-yuzenghui@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/set_memory_region_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 978f5b5f4dc02..d8812f27648ca 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -376,7 +376,7 @@ static void test_add_max_memory_regions(void)
 	pr_info("Adding slots 0..%i, each memory region with %dK size\n",
 		(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
 
-	mem = mmap(NULL, MEM_REGION_SIZE * max_mem_slots + alignment,
+	mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
 		   PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 	TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
 	mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
@@ -401,7 +401,7 @@ static void test_add_max_memory_regions(void)
 	TEST_ASSERT(ret == -1 && errno == EINVAL,
 		    "Adding one more memory slot should fail with EINVAL");
 
-	munmap(mem, MEM_REGION_SIZE * max_mem_slots + alignment);
+	munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
 	munmap(mem_extra, MEM_REGION_SIZE);
 	kvm_vm_free(vm);
 }
-- 
GitLab


From f9b871c89ae61d5a4c0b81659fa6819c50d4ced2 Mon Sep 17 00:00:00 2001
From: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Date: Wed, 16 Jun 2021 20:15:30 +0200
Subject: [PATCH 3650/3804] x86/resctrl: Fix kernel-doc in pseudo_lock.c

Add undocumented parameters detected by scripts/kernel-doc.

Signed-off-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20210616181530.4094-1-fmdefrancesco@gmail.com
---
 arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 05a89e33fde28..2207916cae656 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -49,6 +49,7 @@ static struct class *pseudo_lock_class;
 
 /**
  * get_prefetch_disable_bits - prefetch disable bits of supported platforms
+ * @void: It takes no parameters.
  *
  * Capture the list of platforms that have been validated to support
  * pseudo-locking. This includes testing to ensure pseudo-locked regions
@@ -162,7 +163,7 @@ static struct rdtgroup *region_find_by_minor(unsigned int minor)
 }
 
 /**
- * pseudo_lock_pm_req - A power management QoS request list entry
+ * struct pseudo_lock_pm_req - A power management QoS request list entry
  * @list:	Entry within the @pm_reqs list for a pseudo-locked region
  * @req:	PM QoS request
  */
@@ -184,6 +185,7 @@ static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
 
 /**
  * pseudo_lock_cstates_constrain - Restrict cores from entering C6
+ * @plr: Pseudo-locked region
  *
  * To prevent the cache from being affected by power management entering
  * C6 has to be avoided. This is accomplished by requesting a latency
@@ -196,6 +198,8 @@ static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
  * the ACPI latencies need to be considered while keeping in mind that C2
  * may be set to map to deeper sleep states. In this case the latency
  * requirement needs to prevent entering C2 also.
+ *
+ * Return: 0 on success, <0 on failure
  */
 static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
 {
@@ -520,7 +524,7 @@ static int pseudo_lock_fn(void *_rdtgrp)
 
 /**
  * rdtgroup_monitor_in_progress - Test if monitoring in progress
- * @r: resource group being queried
+ * @rdtgrp: resource group being queried
  *
  * Return: 1 if monitor groups have been created for this resource
  * group, 0 otherwise.
@@ -1140,6 +1144,8 @@ out:
 
 /**
  * pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
+ * @rdtgrp: Resource group to which the pseudo-locked region belongs.
+ * @sel: Selector of which measurement to perform on a pseudo-locked region.
  *
  * The measurement of latency to access a pseudo-locked region should be
  * done from a cpu that is associated with that pseudo-locked region.
-- 
GitLab


From fd2afa70eff057fab57c9e06708b68677b261a0c Mon Sep 17 00:00:00 2001
From: "Fabio M. De Francesco" <fmdefrancesco@gmail.com>
Date: Sat, 19 Jun 2021 00:32:06 +0200
Subject: [PATCH 3651/3804] x86/resctrl: Fix kernel-doc in internal.h

Add description of undocumented parameters. Issues detected by
scripts/kernel-doc.

Signed-off-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lkml.kernel.org/r/20210618223206.29539-1-fmdefrancesco@gmail.com
---
 arch/x86/kernel/cpu/resctrl/internal.h | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index c4d320d02fd5b..6a5f60a372198 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -70,6 +70,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  * struct mon_evt - Entry in the event list of a resource
  * @evtid:		event id
  * @name:		name of the event
+ * @list:		entry in &rdt_resource->evt_list
  */
 struct mon_evt {
 	u32			evtid;
@@ -78,10 +79,13 @@ struct mon_evt {
 };
 
 /**
- * struct mon_data_bits - Monitoring details for each event file
- * @rid:               Resource id associated with the event file.
+ * union mon_data_bits - Monitoring details for each event file
+ * @priv:              Used to store monitoring event data in @u
+ *                     as kernfs private data
+ * @rid:               Resource id associated with the event file
  * @evtid:             Event id associated with the event file
  * @domid:             The domain to which the event file belongs
+ * @u:                 Name of the bit fields struct
  */
 union mon_data_bits {
 	void *priv;
@@ -119,6 +123,7 @@ enum rdt_group_type {
  * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
  * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
  *                          allowed AND the allocations are Cache Pseudo-Locked
+ * @RDT_NUM_MODES: Total number of modes
  *
  * The mode of a resource group enables control over the allowed overlap
  * between allocations associated with different resource groups (classes
@@ -142,7 +147,7 @@ enum rdtgrp_mode {
 
 /**
  * struct mongroup - store mon group's data in resctrl fs.
- * @mon_data_kn		kernlfs node for the mon_data directory
+ * @mon_data_kn:		kernfs node for the mon_data directory
  * @parent:			parent rdtgrp
  * @crdtgrp_list:		child rdtgroup node list
  * @rmid:			rmid for this rdtgroup
@@ -282,11 +287,11 @@ struct rftype {
 /**
  * struct mbm_state - status for each MBM counter in each domain
  * @chunks:	Total data moved (multiply by rdt_group.mon_scale to get bytes)
- * @prev_msr	Value of IA32_QM_CTR for this RMID last time we read it
+ * @prev_msr:	Value of IA32_QM_CTR for this RMID last time we read it
  * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
- * @prev_bw	The most recent bandwidth in MBps
- * @delta_bw	Difference between the current and previous bandwidth
- * @delta_comp	Indicates whether to compute the delta_bw
+ * @prev_bw:	The most recent bandwidth in MBps
+ * @delta_bw:	Difference between the current and previous bandwidth
+ * @delta_comp:	Indicates whether to compute the delta_bw
  */
 struct mbm_state {
 	u64	chunks;
@@ -456,11 +461,13 @@ struct rdt_parse_data {
  * @data_width:		Character width of data when displaying
  * @domains:		All domains for this resource
  * @cache:		Cache allocation related data
+ * @membw:		If the component has bandwidth controls, their properties.
  * @format_str:		Per resource format string to show domain value
  * @parse_ctrlval:	Per resource function pointer to parse control values
  * @evt_list:		List of monitoring events
  * @num_rmid:		Number of RMIDs available
  * @mon_scale:		cqm counter * mon_scale = occupancy in bytes
+ * @mbm_width:		Monitor width, to detect and correct for overflow.
  * @fflags:		flags to choose base and info files
  */
 struct rdt_resource {
-- 
GitLab


From 18f63b15b0283d6f37be3174e2c7b6f2d6ed91cf Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Mon, 21 Jun 2021 15:16:48 -0700
Subject: [PATCH 3652/3804] KVM: x86: Print CPU of last attempted VM-entry when
 dumping VMCS/VMCB

Failed VM-entry is often due to a faulty core. To help identify bad
cores, print the id of the last logical processor that attempted
VM-entry whenever dumping a VMCS or VMCB.

Signed-off-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210621221648.1833148-1-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 2 ++
 arch/x86/kvm/vmx/vmx.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4cee285b01851..8834822c00cdc 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3132,6 +3132,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
 		return;
 	}
 
+	pr_err("VMCB %p, last attempted VMRUN on CPU %d\n",
+	       svm->current_vmcb->ptr, vcpu->arch.last_vmentry_cpu);
 	pr_err("VMCB Control Area:\n");
 	pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff);
 	pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ab6f682645d71..684daa3eefc26 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5724,6 +5724,8 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
 	if (cpu_has_secondary_exec_ctrls())
 		secondary_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
 
+	pr_err("VMCS %p, last attempted VM-entry on CPU %d\n",
+	       vmx->loaded_vmcs->vmcs, vcpu->arch.last_vmentry_cpu);
 	pr_err("*** Guest State ***\n");
 	pr_err("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
 	       vmcs_readl(GUEST_CR0), vmcs_readl(CR0_READ_SHADOW),
-- 
GitLab


From e5830fb13b8cad5e3bdf84f0f7a3dcb4f4d9bcbb Mon Sep 17 00:00:00 2001
From: Hou Wenlong <houwenlong93@linux.alibaba.com>
Date: Tue, 22 Jun 2021 21:55:32 +0800
Subject: [PATCH 3653/3804] KVM: selftests: fix triple fault if ept=0 in
 dirty_log_test

Commit 22f232d134e1 ("KVM: selftests: x86: Set supported CPUIDs on
default VM") moved vcpu_set_cpuid into vm_create_with_vcpus, but
dirty_log_test doesn't use it to create vm. So vcpu's CPUIDs is
not set, the guest's pa_bits in kvm would be smaller than the
value queried by userspace.

However, the dirty track memory slot is in the highest GPA, the
reserved bits in gpte would be set with wrong pa_bits.
For shadow paging, page fault would fail in permission_fault and
be injected into guest. Since guest doesn't have idt, it finally
leads to vm_exit for triple fault.

Move vcpu_set_cpuid into vm_vcpu_add_default to set supported
CPUIDs on default vcpu, since almost all tests need it.

Fixes: 22f232d134e1 ("KVM: selftests: x86: Set supported CPUIDs on default VM")
Signed-off-by: Hou Wenlong <houwenlong93@linux.alibaba.com>
Message-Id: <411ea2173f89abce56fc1fca5af913ed9c5a89c9.1624351343.git.houwenlong93@linux.alibaba.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c           | 4 ----
 tools/testing/selftests/kvm/lib/x86_64/processor.c   | 3 +++
 tools/testing/selftests/kvm/steal_time.c             | 2 --
 tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c | 2 --
 4 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index a2b732cf96ea4..8ea854d7822d9 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -375,10 +375,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
 		uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
 
 		vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-#ifdef __x86_64__
-		vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
-#endif
 	}
 
 	return vm;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index efe2350444213..595322b24e4cb 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -600,6 +600,9 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 	/* Setup the MP state */
 	mp_state.mp_state = 0;
 	vcpu_set_mp_state(vm, vcpuid, &mp_state);
+
+	/* Setup supported CPUIDs */
+	vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
 }
 
 /*
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index fcc840088c919..a6fe75cb9a6eb 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -73,8 +73,6 @@ static void steal_time_init(struct kvm_vm *vm)
 	for (i = 0; i < NR_VCPUS; ++i) {
 		int ret;
 
-		vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid());
-
 		/* ST_GPA_BASE is identity mapped */
 		st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
 		sync_global_to_guest(vm, st_gva[i]);
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
index 5f8dd74d415f8..fd309fb9e2c4e 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -102,8 +102,6 @@ static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
 		vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
 	else
 		vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
-
-	vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
 }
 
 static void run_vm_bsp(uint32_t bsp_vcpu)
-- 
GitLab


From 31c656570065727028f96c811b5ea9fc61502a18 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 22 Jun 2021 16:09:12 +0100
Subject: [PATCH 3654/3804] KVM: x86/mmu: Fix uninitialized boolean variable
 flush

In the case where kvm_memslots_have_rmaps(kvm) is false the boolean
variable flush is not set and is uninitialized.  If is_tdp_mmu_enabled(kvm)
is true then the call to kvm_tdp_mmu_zap_collapsible_sptes passes the
uninitialized value of flush into the call. Fix this by initializing
flush to false.

Addresses-Coverity: ("Uninitialized scalar variable")
Fixes: e2209710ccc5 ("KVM: x86/mmu: Skip rmap operations if rmaps not allocated")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622150912.23429-1-colin.king@canonical.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 84d48a33e38b3..b3be690d081a9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5689,7 +5689,7 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
 {
 	/* FIXME: const-ify all uses of struct kvm_memory_slot.  */
 	struct kvm_memory_slot *slot = (struct kvm_memory_slot *)memslot;
-	bool flush;
+	bool flush = false;
 
 	if (kvm_memslots_have_rmaps(kvm)) {
 		write_lock(&kvm->mmu_lock);
-- 
GitLab


From b33bb78a1fada6445c265c585ee0dd0fc6279102 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:22:44 -0700
Subject: [PATCH 3655/3804] KVM: nVMX: Handle split-lock #AC exceptions that
 happen in L2

Mark #ACs that won't be reinjected to the guest as wanted by L0 so that
KVM handles split-lock #AC from L2 instead of forwarding the exception to
L1.  Split-lock #AC isn't yet virtualized, i.e. L1 will treat it like a
regular #AC and do the wrong thing, e.g. reinject it into L2.

Fixes: e6f8b6c12f03 ("KVM: VMX: Extend VMXs #AC interceptor to handle split lock #AC in guest")
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622172244.3561540-1-seanjc@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 3 +++
 arch/x86/kvm/vmx/vmcs.h   | 5 +++++
 arch/x86/kvm/vmx/vmx.c    | 4 ++--
 arch/x86/kvm/vmx/vmx.h    | 1 +
 4 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 183fd9d62fc52..fa3f50f0a3fa5 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5833,6 +5833,9 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
 		else if (is_breakpoint(intr_info) &&
 			 vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
 			return true;
+		else if (is_alignment_check(intr_info) &&
+			 !vmx_guest_inject_ac(vcpu))
+			return true;
 		return false;
 	case EXIT_REASON_EXTERNAL_INTERRUPT:
 		return true;
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index de3b04d4b587a..4b9957e2bf5b7 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -117,6 +117,11 @@ static inline bool is_gp_fault(u32 intr_info)
 	return is_exception_n(intr_info, GP_VECTOR);
 }
 
+static inline bool is_alignment_check(u32 intr_info)
+{
+	return is_exception_n(intr_info, AC_VECTOR);
+}
+
 static inline bool is_machine_check(u32 intr_info)
 {
 	return is_exception_n(intr_info, MC_VECTOR);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 684daa3eefc26..5a1067c42f3a2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4743,7 +4743,7 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
  *  - Guest has #AC detection enabled in CR0
  *  - Guest EFLAGS has AC bit set
  */
-static inline bool guest_inject_ac(struct kvm_vcpu *vcpu)
+bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
 {
 	if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
 		return true;
@@ -4851,7 +4851,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 		kvm_run->debug.arch.exception = ex_no;
 		break;
 	case AC_VECTOR:
-		if (guest_inject_ac(vcpu)) {
+		if (vmx_guest_inject_ac(vcpu)) {
 			kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
 			return 1;
 		}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 5740f8e2aa231..3979a947933af 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -376,6 +376,7 @@ void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
 
+bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
 void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
 void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
 bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
-- 
GitLab


From ecc3a92c6f4953c134a9590c762755e6593f507c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:11 -0700
Subject: [PATCH 3656/3804] KVM: selftests: Remove errant asm/barrier.h include
 to fix arm64 build

Drop an unnecessary include of asm/barrier.h from dirty_log_test.c to
allow the test to build on arm64.  arm64, s390, and x86 all build cleanly
without the include (PPC and MIPS aren't supported in KVM's selftests).

arm64's barrier.h includes linux/kasan-checks.h, which is not copied
into tools/.

  In file included from ../../../../tools/include/asm/barrier.h:8,
                   from dirty_log_test.c:19:
     .../arm64/include/asm/barrier.h:12:10: fatal error: linux/kasan-checks.h: No such file or directory
     12 | #include <linux/kasan-checks.h>
        |          ^~~~~~~~~~~~~~~~~~~~~~
  compilation terminated.

Fixes: 84292e565951 ("KVM: selftests: Add dirty ring buffer test")
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 81edbd23d371c..b4d24f50aca62 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -16,7 +16,6 @@
 #include <errno.h>
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
-#include <asm/barrier.h>
 #include <linux/atomic.h>
 
 #include "kvm_util.h"
-- 
GitLab


From 96d41cfd1bb9964602fabea9c7e72ca723f749db Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:12 -0700
Subject: [PATCH 3657/3804] KVM: selftests: Zero out the correct page in the
 Hyper-V features test

Fix an apparent copy-paste goof in hyperv_features where hcall_page
(which is two pages, so technically just the first page) gets zeroed
twice, and hcall_params gets zeroed none times.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/hyperv_features.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 9947ef63dfa1a..030c9447cb905 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -630,7 +630,7 @@ int main(void)
 	memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
 
 	hcall_params = vm_vaddr_alloc(vm, getpagesize(), 0x20000, 0, 0);
-	memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+	memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
 
 	vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
 	vcpu_enable_cap(vm, VCPU_ID, &cap);
-- 
GitLab


From 7a4f1a75b78c10d0d0e90841f45a60e12f599eff Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:13 -0700
Subject: [PATCH 3658/3804] KVM: selftests: Unconditionally use memslot 0 when
 loading elf binary

Use memslot '0' for all vm_vaddr_alloc() calls when loading the test
binary.  This is the first step toward adding a helper to handle page
allocations with a default value for the target memslot.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c         | 2 +-
 tools/testing/selftests/kvm/hardware_disable_test.c  | 2 +-
 tools/testing/selftests/kvm/include/kvm_util.h       | 3 +--
 tools/testing/selftests/kvm/lib/elf.c                | 6 ++----
 tools/testing/selftests/kvm/lib/kvm_util.c           | 2 +-
 tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c | 2 +-
 6 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index b4d24f50aca62..9026fa4ea133c 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -680,7 +680,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
 	vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	kvm_vm_elf_load(vm, program_invocation_name);
 #ifdef __x86_64__
 	vm_create_irqchip(vm);
 #endif
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index 4b8db3bce6102..b21c69a56daa2 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -105,7 +105,7 @@ static void run_test(uint32_t run)
 		CPU_SET(i, &cpu_set);
 
 	vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
-	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	kvm_vm_elf_load(vm, program_invocation_name);
 	vm_create_irqchip(vm);
 
 	pr_debug("%s: [%d] start vcpus\n", __func__, run);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 35739567189e0..59608b17707d5 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -98,8 +98,7 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
 int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
 		       size_t len);
 
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
-		     uint32_t data_memslot, uint32_t pgd_memslot);
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
 
 void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
 
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index bc75a91e00a64..edeeaf73d3b15 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -111,8 +111,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
  * by the image and it needs to have sufficient available physical pages, to
  * back the virtual pages used to load the image.
  */
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
-	uint32_t data_memslot, uint32_t pgd_memslot)
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
 {
 	off_t offset, offset_rv;
 	Elf64_Ehdr hdr;
@@ -164,8 +163,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
 		seg_vend |= vm->page_size - 1;
 		size_t seg_size = seg_vend - seg_vstart + 1;
 
-		vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
-			data_memslot, pgd_memslot);
+		vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart, 0, 0);
 		TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
 			"virtual memory for segment at requested min addr,\n"
 			"  segment idx: %u\n"
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 8ea854d7822d9..52b9639b5d6de 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -365,7 +365,7 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
 	pages = vm_adjust_num_guest_pages(mode, pages);
 	vm = vm_create(mode, pages, O_RDWR);
 
-	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	kvm_vm_elf_load(vm, program_invocation_name);
 
 #ifdef __x86_64__
 	vm_create_irqchip(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
index fd309fb9e2c4e..ae76436af0cc1 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -90,7 +90,7 @@ static struct kvm_vm *create_vm(void)
 	pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
 	vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
 
-	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+	kvm_vm_elf_load(vm, program_invocation_name);
 	vm_create_irqchip(vm);
 
 	return vm;
-- 
GitLab


From 1dcd1c58ae7dc42102d2976421aefb5362427b9e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:14 -0700
Subject: [PATCH 3659/3804] KVM: selftests: Unconditionally use memslot 0 for
 x86's GDT/TSS setup

Refactor x86's GDT/TSS allocations to for memslot '0' at its
vm_addr_alloc() call sites instead of passing in '0' from on high.  This
is a step toward using a common helper for allocating pages.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/lib/x86_64/processor.c       | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 595322b24e4cb..1fdcf91587322 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -518,24 +518,22 @@ unmapped_gva:
 	exit(EXIT_FAILURE);
 }
 
-static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
-			  int pgd_memslot)
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
 {
 	if (!vm->gdt)
 		vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
-			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+			KVM_UTIL_MIN_VADDR, 0, 0);
 
 	dt->base = vm->gdt;
 	dt->limit = getpagesize();
 }
 
 static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
-				int selector, int gdt_memslot,
-				int pgd_memslot)
+				int selector)
 {
 	if (!vm->tss)
 		vm->tss = vm_vaddr_alloc(vm, getpagesize(),
-			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+			KVM_UTIL_MIN_VADDR, 0, 0);
 
 	memset(segp, 0, sizeof(*segp));
 	segp->base = vm->tss;
@@ -546,7 +544,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
 	kvm_seg_fill_gdt_64bit(vm, segp);
 }
 
-static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
 {
 	struct kvm_sregs sregs;
 
@@ -555,7 +553,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
 
 	sregs.idt.limit = 0;
 
-	kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+	kvm_setup_gdt(vm, &sregs.gdt);
 
 	switch (vm->mode) {
 	case VM_MODE_PXXV48_4K:
@@ -567,7 +565,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
 		kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
 		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
 		kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
-		kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+		kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
 		break;
 
 	default:
@@ -588,7 +586,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 
 	/* Create VCPU */
 	vm_vcpu_add(vm, vcpuid);
-	vcpu_setup(vm, vcpuid, 0, 0);
+	vcpu_setup(vm, vcpuid);
 
 	/* Setup guest general purpose registers */
 	vcpu_regs_get(vm, vcpuid, &regs);
-- 
GitLab


From 95be3709ff4e3af848c285ebddea9916a24d6d0f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:15 -0700
Subject: [PATCH 3660/3804] KVM: selftests: Use "standard" min virtual address
 for Hyper-V pages

Use the de facto standard minimum virtual address for Hyper-V's hcall
params page.  It's the allocator's job to not double-allocate memory,
i.e. there's no reason to force different regions for the params vs.
hcall page.  This will allow adding a page allocation helper with a
"standard" minimum address.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/hyperv_features.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 030c9447cb905..ad7ee06fa71e2 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -629,7 +629,7 @@ int main(void)
 	hcall_page = vm_vaddr_alloc(vm, 2 * getpagesize(), 0x10000, 0, 0);
 	memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
 
-	hcall_params = vm_vaddr_alloc(vm, getpagesize(), 0x20000, 0, 0);
+	hcall_params = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
 	memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
 
 	vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
-- 
GitLab


From a9db9609c0e41d8c06611678d45dff36ded563dc Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:16 -0700
Subject: [PATCH 3661/3804] KVM: selftests: Add helpers to allocate N pages of
 virtual memory

Add wrappers to allocate 1 and N pages of memory using de facto standard
values as the defaults for minimum virtual address, data memslot, and
page table memslot.  Convert all compatible users.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/include/kvm_util.h  |  3 ++
 tools/testing/selftests/kvm/lib/kvm_util.c    | 38 +++++++++++++++++++
 tools/testing/selftests/kvm/lib/x86_64/svm.c  |  9 ++---
 tools/testing/selftests/kvm/lib/x86_64/vmx.c  | 25 ++++++------
 .../selftests/kvm/x86_64/hyperv_clock.c       |  2 +-
 .../selftests/kvm/x86_64/hyperv_features.c    |  6 +--
 6 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 59608b17707d5..70385bf25446f 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -142,6 +142,9 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 			  uint32_t data_memslot, uint32_t pgd_memslot);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	      unsigned int npages, uint32_t pgd_memslot);
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 52b9639b5d6de..6401e04e22688 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1276,6 +1276,44 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 	return vaddr_start;
 }
 
+/*
+ * VM Virtual Address Allocate Pages
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Starting guest virtual address
+ *
+ * Allocates at least N system pages worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
+{
+	return vm_vaddr_alloc(vm, nr_pages * getpagesize(), 0x10000, 0, 0);
+}
+
+/*
+ * VM Virtual Address Allocate Page
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Starting guest virtual address
+ *
+ * Allocates at least one system page worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
+{
+	return vm_vaddr_alloc_pages(vm, 1);
+}
+
 /*
  * Map a range of VM virtual address to the VM's physical address
  *
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 827fe6028dd42..2ac98d70d02bd 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -30,17 +30,14 @@ u64 rflags;
 struct svm_test_data *
 vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
 {
-	vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
-					    0x10000, 0, 0);
+	vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
 	struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
 
-	svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
-					   0x10000, 0, 0);
+	svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
 	svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
 	svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
 
-	svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
-						0x10000, 0, 0);
+	svm->save_area = (void *)vm_vaddr_alloc_page(vm);
 	svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
 	svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 2448b30e8efae..d568d8cfd44d3 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -77,50 +77,48 @@ int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
 struct vmx_pages *
 vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
 {
-	vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
 	struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
 
 	/* Setup of a region of guest memory for the vmxon region. */
-	vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
 	vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
 	vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
 
 	/* Setup of a region of guest memory for a vmcs. */
-	vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
 	vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
 	vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
 
 	/* Setup of a region of guest memory for the MSR bitmap. */
-	vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->msr = (void *)vm_vaddr_alloc_page(vm);
 	vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
 	vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
 	memset(vmx->msr_hva, 0, getpagesize());
 
 	/* Setup of a region of guest memory for the shadow VMCS. */
-	vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
 	vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
 	vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
 
 	/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
-	vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
 	vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
 	vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
 	memset(vmx->vmread_hva, 0, getpagesize());
 
-	vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
 	vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
 	vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
 	memset(vmx->vmwrite_hva, 0, getpagesize());
 
 	/* Setup of a region of guest memory for the VP Assist page. */
-	vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
-						0x10000, 0, 0);
+	vmx->vp_assist = (void *)vm_vaddr_alloc_page(vm);
 	vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
 	vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
 
 	/* Setup of a region of guest memory for the enlightened VMCS. */
-	vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
-						       0x10000, 0, 0);
+	vmx->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
 	vmx->enlightened_vmcs_hva =
 		addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
 	vmx->enlightened_vmcs_gpa =
@@ -538,7 +536,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
 		  uint32_t eptp_memslot)
 {
-	vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
 	vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
 	vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
 }
@@ -546,8 +544,7 @@ void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
 void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
 				      uint32_t eptp_memslot)
 {
-	vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(),
-						  0x10000, 0, 0);
+	vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
 	vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
 	vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
 }
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index 489625acc9cfd..bab10ae787b61 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -214,7 +214,7 @@ int main(void)
 
 	vcpu_set_hv_cpuid(vm, VCPU_ID);
 
-	tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	tsc_page_gva = vm_vaddr_alloc_page(vm);
 	memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
 	TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
 		"TSC page has to be page aligned\n");
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index ad7ee06fa71e2..42bd658f52a82 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -604,7 +604,7 @@ int main(void)
 	/* Test MSRs */
 	vm = vm_create_default(VCPU_ID, 0, guest_msr);
 
-	msr_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	msr_gva = vm_vaddr_alloc_page(vm);
 	memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
 	vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
 	vcpu_enable_cap(vm, VCPU_ID, &cap);
@@ -626,10 +626,10 @@ int main(void)
 	vm = vm_create_default(VCPU_ID, 0, guest_hcall);
 
 	/* Hypercall input/output */
-	hcall_page = vm_vaddr_alloc(vm, 2 * getpagesize(), 0x10000, 0, 0);
+	hcall_page = vm_vaddr_alloc_pages(vm, 2);
 	memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
 
-	hcall_params = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	hcall_params = vm_vaddr_alloc_page(vm);
 	memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
 
 	vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
-- 
GitLab


From 106a2e766eae7161a0500048004bbc2f75ea9a98 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:17 -0700
Subject: [PATCH 3662/3804] KVM: selftests: Lower the min virtual address for
 misc page allocations

Reduce the minimum virtual address of page allocations from 0x10000 to
KVM_UTIL_MIN_VADDR (0x2000).  Both values appear to be completely
arbitrary, and reducing the min to KVM_UTIL_MIN_VADDR will allow for
additional consolidation of code.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6401e04e22688..6c02ff93e8078 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1292,7 +1292,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
  */
 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
 {
-	return vm_vaddr_alloc(vm, nr_pages * getpagesize(), 0x10000, 0, 0);
+	return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR, 0, 0);
 }
 
 /*
-- 
GitLab


From 5ae4d8706f091278709cd8af410685dd17c1dca9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:18 -0700
Subject: [PATCH 3663/3804] KVM: selftests: Use alloc_page helper for x86-64's
 GDT/IDT/TSS allocations

Switch to the vm_vaddr_alloc_page() helper for x86-64's "kernel"
allocations now that the helper uses the same min virtual address as the
open coded versions.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/x86_64/processor.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 1fdcf91587322..0c51526c2824c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -521,8 +521,7 @@ unmapped_gva:
 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
 {
 	if (!vm->gdt)
-		vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
-			KVM_UTIL_MIN_VADDR, 0, 0);
+		vm->gdt = vm_vaddr_alloc_page(vm);
 
 	dt->base = vm->gdt;
 	dt->limit = getpagesize();
@@ -532,8 +531,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
 				int selector)
 {
 	if (!vm->tss)
-		vm->tss = vm_vaddr_alloc(vm, getpagesize(),
-			KVM_UTIL_MIN_VADDR, 0, 0);
+		vm->tss = vm_vaddr_alloc_page(vm);
 
 	memset(segp, 0, sizeof(*segp));
 	segp->base = vm->tss;
@@ -1223,8 +1221,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm)
 	extern void *idt_handlers;
 	int i;
 
-	vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
-	vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
+	vm->idt = vm_vaddr_alloc_page(vm);
+	vm->handlers = vm_vaddr_alloc_page(vm);
 	/* Handlers have the same address in both address spaces.*/
 	for (i = 0; i < NUM_INTERRUPTS; i++)
 		set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
-- 
GitLab


From 233446c1e68f6086a7f6738318a5314b528fb642 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:19 -0700
Subject: [PATCH 3664/3804] KVM: selftests: Use alloc page helper for xAPIC IPI
 test

Use the common page allocation helper for the xAPIC IPI test, effectively
raising the minimum virtual address from 0x1000 to 0x2000.  Presumably
the test won't explode if it can't get a page at address 0x1000...

Cc: Peter Shier <pshier@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 21b22718a9dbd..5a79c8ed46119 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -427,7 +427,7 @@ int main(int argc, char *argv[])
 
 	vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
 
-	test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0);
+	test_data_page_vaddr = vm_vaddr_alloc_page(vm);
 	data =
 	   (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
 	memset(data, 0, sizeof(*data));
-- 
GitLab


From 408633c326c487f4f32d02c7d891c9b0242d5c45 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:20 -0700
Subject: [PATCH 3665/3804] KVM: selftests: Use "standard" min virtual address
 for CPUID test alloc

Use KVM_UTIL_MIN_ADDR as the minimum for x86-64's CPUID array.  The
system page size was likely used as the minimum because _something_ had
to be provided.  Increasing the min from 0x1000 to 0x2000 should have no
meaningful impact on the test, and will allow changing vm_vaddr_alloc()
to use KVM_UTIL_MIN_VADDR as the default.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-11-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/get_cpuid_test.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
index 8c77537af5a1c..5e5682691f870 100644
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
@@ -145,8 +145,7 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
 struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
 {
 	int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
-	vm_vaddr_t gva = vm_vaddr_alloc(vm, size,
-					getpagesize(), 0, 0);
+	vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR, 0, 0);
 	struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
 
 	memcpy(guest_cpuids, cpuid, size);
-- 
GitLab


From 276010551664f73b6f1616dde471d6f0d63a73ba Mon Sep 17 00:00:00 2001
From: Cassio Neri <cassio.neri@gmail.com>
Date: Tue, 22 Jun 2021 22:36:16 +0100
Subject: [PATCH 3666/3804] time: Improve performance of time64_to_tm()

The current implementation of time64_to_tm() contains unnecessary loops,
branches and look-up tables. The new one uses an arithmetic-based algorithm
appeared in [1] and is approximately 3x faster (YMMV).

The drawback is that the new code isn't intuitive and contains many 'magic
numbers' (not unusual for this type of algorithm). However, [1] justifies
all those numbers and, given this function's history, the code is unlikely
to need much maintenance, if any at all.

Add a KUnit test for it which checks every day in a 160,000 years interval
centered at 1970-01-01 against the expected result.

[1] Neri, Schneider, "Euclidean Affine Functions and Applications to
Calendar Algorithms". https://arxiv.org/abs/2102.06959

Signed-off-by: Cassio Neri <cassio.neri@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20210622213616.313046-1-cassio.neri@gmail.com
---
 kernel/time/Kconfig     |   9 +++
 kernel/time/Makefile    |   1 +
 kernel/time/time_test.c |  98 ++++++++++++++++++++++++++++++
 kernel/time/timeconv.c  | 128 ++++++++++++++++++++++------------------
 4 files changed, 178 insertions(+), 58 deletions(-)
 create mode 100644 kernel/time/time_test.c

diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 83e158d016bad..3610b1bef1421 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -64,6 +64,15 @@ config LEGACY_TIMER_TICK
 	  lack support for the generic clockevent framework.
 	  New platforms should use generic clockevents instead.
 
+config TIME_KUNIT_TEST
+	tristate "KUnit test for kernel/time functions" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Enable this option to test RTC library functions.
+
+	  If unsure, say N.
+
 if GENERIC_CLOCKEVENTS
 menu "Timers subsystem"
 
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 1ed85b25b0968..7e875e63ff3b6 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_DEBUG_FS)				+= timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)			+= test_udelay.o
 obj-$(CONFIG_TIME_NS)				+= namespace.o
 obj-$(CONFIG_TEST_CLOCKSOURCE_WATCHDOG)		+= clocksource-wdtest.o
+obj-$(CONFIG_TIME_KUNIT_TEST)			+= time_test.o
diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c
new file mode 100644
index 0000000000000..341ebfad5e99d
--- /dev/null
+++ b/kernel/time/time_test.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: LGPL-2.1+
+
+#include <kunit/test.h>
+#include <linux/time.h>
+
+/*
+ * Traditional implementation of leap year evaluation.
+ */
+static bool is_leap(long year)
+{
+	return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
+}
+
+/*
+ * Gets the last day of a month.
+ */
+static int last_day_of_month(long year, int month)
+{
+	if (month == 2)
+		return 28 + is_leap(year);
+	if (month == 4 || month == 6 || month == 9 || month == 11)
+		return 30;
+	return 31;
+}
+
+/*
+ * Advances a date by one day.
+ */
+static void advance_date(long *year, int *month, int *mday, int *yday)
+{
+	if (*mday != last_day_of_month(*year, *month)) {
+		++*mday;
+		++*yday;
+		return;
+	}
+
+	*mday = 1;
+	if (*month != 12) {
+		++*month;
+		++*yday;
+		return;
+	}
+
+	*month = 1;
+	*yday  = 0;
+	++*year;
+}
+
+/*
+ * Checks every day in a 160000 years interval centered at 1970-01-01
+ * against the expected result.
+ */
+static void time64_to_tm_test_date_range(struct kunit *test)
+{
+	/*
+	 * 80000 years	= (80000 / 400) * 400 years
+	 *		= (80000 / 400) * 146097 days
+	 *		= (80000 / 400) * 146097 * 86400 seconds
+	 */
+	time64_t total_secs = ((time64_t) 80000) / 400 * 146097 * 86400;
+	long year = 1970 - 80000;
+	int month = 1;
+	int mdday = 1;
+	int yday = 0;
+
+	struct tm result;
+	time64_t secs;
+	s64 days;
+
+	for (secs = -total_secs; secs <= total_secs; secs += 86400) {
+
+		time64_to_tm(secs, 0, &result);
+
+		days = div_s64(secs, 86400);
+
+		#define FAIL_MSG "%05ld/%02d/%02d (%2d) : %ld", \
+			year, month, mdday, yday, days
+
+		KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG);
+		KUNIT_ASSERT_EQ_MSG(test, month - 1, result.tm_mon, FAIL_MSG);
+		KUNIT_ASSERT_EQ_MSG(test, mdday, result.tm_mday, FAIL_MSG);
+		KUNIT_ASSERT_EQ_MSG(test, yday, result.tm_yday, FAIL_MSG);
+
+		advance_date(&year, &month, &mdday, &yday);
+	}
+}
+
+static struct kunit_case time_test_cases[] = {
+	KUNIT_CASE(time64_to_tm_test_date_range),
+	{}
+};
+
+static struct kunit_suite time_test_suite = {
+	.name = "time_test_cases",
+	.test_cases = time_test_cases,
+};
+
+kunit_test_suite(time_test_suite);
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
index 62e3b46717a67..59b922c826e77 100644
--- a/kernel/time/timeconv.c
+++ b/kernel/time/timeconv.c
@@ -22,47 +22,16 @@
 
 /*
  * Converts the calendar time to broken-down time representation
- * Based on code from glibc-2.6
  *
  * 2009-7-14:
  *   Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
+ * 2021-06-02:
+ *   Reimplemented by Cassio Neri <cassio.neri@gmail.com>
  */
 
 #include <linux/time.h>
 #include <linux/module.h>
-
-/*
- * Nonzero if YEAR is a leap year (every 4 years,
- * except every 100th isn't, and every 400th is).
- */
-static int __isleap(long year)
-{
-	return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
-}
-
-/* do a mathdiv for long type */
-static long math_div(long a, long b)
-{
-	return a / b - (a % b < 0);
-}
-
-/* How many leap years between y1 and y2, y1 must less or equal to y2 */
-static long leaps_between(long y1, long y2)
-{
-	long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
-		+ math_div(y1 - 1, 400);
-	long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
-		+ math_div(y2 - 1, 400);
-	return leaps2 - leaps1;
-}
-
-/* How many days come before each month (0-12). */
-static const unsigned short __mon_yday[2][13] = {
-	/* Normal years. */
-	{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
-	/* Leap years. */
-	{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
-};
+#include <linux/kernel.h>
 
 #define SECS_PER_HOUR	(60 * 60)
 #define SECS_PER_DAY	(SECS_PER_HOUR * 24)
@@ -77,9 +46,11 @@ static const unsigned short __mon_yday[2][13] = {
  */
 void time64_to_tm(time64_t totalsecs, int offset, struct tm *result)
 {
-	long days, rem, y;
+	u32 u32tmp, day_of_century, year_of_century, day_of_year, month, day;
+	u64 u64tmp, udays, century, year;
+	bool is_Jan_or_Feb, is_leap_year;
+	long days, rem;
 	int remainder;
-	const unsigned short *ip;
 
 	days = div_s64_rem(totalsecs, SECS_PER_DAY, &remainder);
 	rem = remainder;
@@ -103,27 +74,68 @@ void time64_to_tm(time64_t totalsecs, int offset, struct tm *result)
 	if (result->tm_wday < 0)
 		result->tm_wday += 7;
 
-	y = 1970;
-
-	while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
-		/* Guess a corrected year, assuming 365 days per year. */
-		long yg = y + math_div(days, 365);
-
-		/* Adjust DAYS and Y to match the guessed year. */
-		days -= (yg - y) * 365 + leaps_between(y, yg);
-		y = yg;
-	}
-
-	result->tm_year = y - 1900;
-
-	result->tm_yday = days;
-
-	ip = __mon_yday[__isleap(y)];
-	for (y = 11; days < ip[y]; y--)
-		continue;
-	days -= ip[y];
-
-	result->tm_mon = y;
-	result->tm_mday = days + 1;
+	/*
+	 * The following algorithm is, basically, Proposition 6.3 of Neri
+	 * and Schneider [1]. In a few words: it works on the computational
+	 * (fictitious) calendar where the year starts in March, month = 2
+	 * (*), and finishes in February, month = 13. This calendar is
+	 * mathematically convenient because the day of the year does not
+	 * depend on whether the year is leap or not. For instance:
+	 *
+	 * March 1st		0-th day of the year;
+	 * ...
+	 * April 1st		31-st day of the year;
+	 * ...
+	 * January 1st		306-th day of the year; (Important!)
+	 * ...
+	 * February 28th	364-th day of the year;
+	 * February 29th	365-th day of the year (if it exists).
+	 *
+	 * After having worked out the date in the computational calendar
+	 * (using just arithmetics) it's easy to convert it to the
+	 * corresponding date in the Gregorian calendar.
+	 *
+	 * [1] "Euclidean Affine Functions and Applications to Calendar
+	 * Algorithms". https://arxiv.org/abs/2102.06959
+	 *
+	 * (*) The numbering of months follows tm more closely and thus,
+	 * is slightly different from [1].
+	 */
+
+	udays	= ((u64) days) + 2305843009213814918ULL;
+
+	u64tmp		= 4 * udays + 3;
+	century		= div64_u64_rem(u64tmp, 146097, &u64tmp);
+	day_of_century	= (u32) (u64tmp / 4);
+
+	u32tmp		= 4 * day_of_century + 3;
+	u64tmp		= 2939745ULL * u32tmp;
+	year_of_century	= upper_32_bits(u64tmp);
+	day_of_year	= lower_32_bits(u64tmp) / 2939745 / 4;
+
+	year		= 100 * century + year_of_century;
+	is_leap_year	= year_of_century ? !(year_of_century % 4) : !(century % 4);
+
+	u32tmp		= 2141 * day_of_year + 132377;
+	month		= u32tmp >> 16;
+	day		= ((u16) u32tmp) / 2141;
+
+	/*
+	 * Recall that January 1st is the 306-th day of the year in the
+	 * computational (not Gregorian) calendar.
+	 */
+	is_Jan_or_Feb	= day_of_year >= 306;
+
+	/* Convert to the Gregorian calendar and adjust to Unix time. */
+	year		= year + is_Jan_or_Feb - 6313183731940000ULL;
+	month		= is_Jan_or_Feb ? month - 12 : month;
+	day		= day + 1;
+	day_of_year	+= is_Jan_or_Feb ? -306 : 31 + 28 + is_leap_year;
+
+	/* Convert to tm's format. */
+	result->tm_year = (long) (year - 1900);
+	result->tm_mon  = (int) month;
+	result->tm_mday = (int) day;
+	result->tm_yday = (int) day_of_year;
 }
 EXPORT_SYMBOL(time64_to_tm);
-- 
GitLab


From 10043bb6af4230c57aeabaee02e6a42302f18d0f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 8 Jun 2021 17:07:13 +0100
Subject: [PATCH 3667/3804] ASoC: rt5645: Avoid upgrading static warnings to
 errors

One of the fixes reverted as part of the UMN fallout was actually fine,
however rather than undoing the revert the process that handled all this
stuff resulted in a patch which attempted to add extra error checks
instead.  Unfortunately this new change wasn't really based on a good
understanding of the subsystem APIs and bypassed the usual patch flow
without ensuring it was reviewed by people with subsystem knowledge and
was merged as a fix rather than during the merge window.

The effect of the new fix is to upgrade what were previously warnings on
static data in the code to hard errors on that data.  If this actually
happens then it would break existing systems, if it doesn't happen then
the change has no effect so this was not a safe change to apply as a fix
to the release candidates.  Since the new code has not been tested and
doesn't in practice improve error handling revert it instead, and also
drop the original revert since the original fix was fine.  This takes
the driver back to what it was in -rc1.

Fixes: 5e70b8e22b64e ("ASoC: rt5645: add error checking to rt5645_probe function")
Fixes: 1e0ce84215dbf ("Revert "ASoC: rt5645: fix a NULL pointer dereference")
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Phillip Potter <phil@philpotter.co.uk>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20210608160713.21040-1-broonie@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
(cherry picked from commit 916cccb5078eee57fce131c5fe18e417545083e2)
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/soc/codecs/rt5645.c | 49 +++++++++------------------------------
 1 file changed, 11 insertions(+), 38 deletions(-)

diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 438fa18bcb55d..9408ee63cb268 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3388,44 +3388,30 @@ static int rt5645_probe(struct snd_soc_component *component)
 {
 	struct snd_soc_dapm_context *dapm = snd_soc_component_get_dapm(component);
 	struct rt5645_priv *rt5645 = snd_soc_component_get_drvdata(component);
-	int ret = 0;
 
 	rt5645->component = component;
 
 	switch (rt5645->codec_type) {
 	case CODEC_TYPE_RT5645:
-		ret = snd_soc_dapm_new_controls(dapm,
+		snd_soc_dapm_new_controls(dapm,
 			rt5645_specific_dapm_widgets,
 			ARRAY_SIZE(rt5645_specific_dapm_widgets));
-		if (ret < 0)
-			goto exit;
-
-		ret = snd_soc_dapm_add_routes(dapm,
+		snd_soc_dapm_add_routes(dapm,
 			rt5645_specific_dapm_routes,
 			ARRAY_SIZE(rt5645_specific_dapm_routes));
-		if (ret < 0)
-			goto exit;
-
 		if (rt5645->v_id < 3) {
-			ret = snd_soc_dapm_add_routes(dapm,
+			snd_soc_dapm_add_routes(dapm,
 				rt5645_old_dapm_routes,
 				ARRAY_SIZE(rt5645_old_dapm_routes));
-			if (ret < 0)
-				goto exit;
 		}
 		break;
 	case CODEC_TYPE_RT5650:
-		ret = snd_soc_dapm_new_controls(dapm,
+		snd_soc_dapm_new_controls(dapm,
 			rt5650_specific_dapm_widgets,
 			ARRAY_SIZE(rt5650_specific_dapm_widgets));
-		if (ret < 0)
-			goto exit;
-
-		ret = snd_soc_dapm_add_routes(dapm,
+		snd_soc_dapm_add_routes(dapm,
 			rt5650_specific_dapm_routes,
 			ARRAY_SIZE(rt5650_specific_dapm_routes));
-		if (ret < 0)
-			goto exit;
 		break;
 	}
 
@@ -3433,17 +3419,9 @@ static int rt5645_probe(struct snd_soc_component *component)
 
 	/* for JD function */
 	if (rt5645->pdata.jd_mode) {
-		ret = snd_soc_dapm_force_enable_pin(dapm, "JD Power");
-		if (ret < 0)
-			goto exit;
-
-		ret = snd_soc_dapm_force_enable_pin(dapm, "LDO2");
-		if (ret < 0)
-			goto exit;
-
-		ret = snd_soc_dapm_sync(dapm);
-		if (ret < 0)
-			goto exit;
+		snd_soc_dapm_force_enable_pin(dapm, "JD Power");
+		snd_soc_dapm_force_enable_pin(dapm, "LDO2");
+		snd_soc_dapm_sync(dapm);
 	}
 
 	if (rt5645->pdata.long_name)
@@ -3454,14 +3432,9 @@ static int rt5645_probe(struct snd_soc_component *component)
 		GFP_KERNEL);
 
 	if (!rt5645->eq_param)
-		ret = -ENOMEM;
-exit:
-	/*
-	 * If there was an error above, everything will be cleaned up by the
-	 * caller if we return an error here.  This will be done with a later
-	 * call to rt5645_remove().
-	 */
-	return ret;
+		return -ENOMEM;
+
+	return 0;
 }
 
 static void rt5645_remove(struct snd_soc_component *component)
-- 
GitLab


From 5c6d4f97267f02f47acea8a652265348ec12de51 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@kernel.org>
Date: Sun, 20 Jun 2021 11:01:35 -0500
Subject: [PATCH 3668/3804] MAINTAINERS: remove Timur Tabi from Freescale SOC
 sound drivers

I haven't touched these drivers in seven years, and none of the
patches sent to me these days affect code that I wrote.  The
other maintainers are doing a very good job without me.

Signed-off-by: Timur Tabi <timur@kernel.org>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Link: https://lore.kernel.org/r/20210620160135.28651-1-timur@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
(cherry picked from commit 50b1ce617d66d04f1f9006e51793e6cffcdec6ea)
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8c5ee008301a6..b3b9a253316f9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7354,7 +7354,6 @@ F:	drivers/net/ethernet/freescale/fs_enet/
 F:	include/linux/fs_enet_pd.h
 
 FREESCALE SOC SOUND DRIVERS
-M:	Timur Tabi <timur@kernel.org>
 M:	Nicolin Chen <nicoleotsuka@gmail.com>
 M:	Xiubo Li <Xiubo.Lee@gmail.com>
 R:	Fabio Estevam <festevam@gmail.com>
-- 
GitLab


From 3de218ff39b9e3f0d453fe3154f12a174de44b25 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 23 Jun 2021 15:09:13 +0200
Subject: [PATCH 3669/3804] xen/events: reset active flag for lateeoi events
 later

In order to avoid a race condition for user events when changing
cpu affinity reset the active flag only when EOI-ing the event.

This is working fine as all user events are lateeoi events. Note that
lateeoi_ack_mask_dynirq() is not modified as there is no explicit call
to xen_irq_lateeoi() expected later.

Cc: stable@vger.kernel.org
Reported-by: Julien Grall <julien@xen.org>
Fixes: b6622798bc50b62 ("xen/events: avoid handling the same event on two cpus at the same time")
Tested-by: Julien Grall <julien@xen.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrvsky@oracle.com>
Link: https://lore.kernel.org/r/20210623130913.9405-1-jgross@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/events/events_base.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 7bbfd58958bcc..d7e361fb05482 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -642,6 +642,9 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
 	}
 
 	info->eoi_time = 0;
+
+	/* is_active hasn't been reset yet, do it now. */
+	smp_store_release(&info->is_active, 0);
 	do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
 }
 
@@ -811,6 +814,7 @@ static void xen_evtchn_close(evtchn_port_t port)
 		BUG();
 }
 
+/* Not called for lateeoi events. */
 static void event_handler_exit(struct irq_info *info)
 {
 	smp_store_release(&info->is_active, 0);
@@ -1883,7 +1887,12 @@ static void lateeoi_ack_dynirq(struct irq_data *data)
 
 	if (VALID_EVTCHN(evtchn)) {
 		do_mask(info, EVT_MASK_REASON_EOI_PENDING);
-		event_handler_exit(info);
+		/*
+		 * Don't call event_handler_exit().
+		 * Need to keep is_active non-zero in order to ignore re-raised
+		 * events after cpu affinity changes while a lateeoi is pending.
+		 */
+		clear_evtchn(evtchn);
 	}
 }
 
-- 
GitLab


From 50e7a31d30e8221632675abed3be306382324ca2 Mon Sep 17 00:00:00 2001
From: Shuah Khan <skhan@linuxfoundation.org>
Date: Wed, 16 Jun 2021 17:19:06 +0200
Subject: [PATCH 3670/3804] media: Fix Media Controller API config checks

Smatch static checker warns that "mdev" can be null:

sound/usb/media.c:287 snd_media_device_create()
    warn: 'mdev' can also be NULL

If CONFIG_MEDIA_CONTROLLER is disabled, this file should not be included
in the build.

The below conditions in the sound/usb/Makefile are in place to ensure that
media.c isn't included in the build.

sound/usb/Makefile:
snd-usb-audio-$(CONFIG_SND_USB_AUDIO_USE_MEDIA_CONTROLLER) += media.o

select SND_USB_AUDIO_USE_MEDIA_CONTROLLER if MEDIA_CONTROLLER &&
       (MEDIA_SUPPORT=y || MEDIA_SUPPORT=SND_USB_AUDIO)

The following config check in include/media/media-dev-allocator.h is
in place to enable the API only when CONFIG_MEDIA_CONTROLLER and
CONFIG_USB are enabled.

 #if defined(CONFIG_MEDIA_CONTROLLER) && defined(CONFIG_USB)

This check doesn't work as intended when CONFIG_USB=m. When CONFIG_USB=m,
CONFIG_USB_MODULE is defined and CONFIG_USB is not. The above config check
doesn't catch that CONFIG_USB is defined as a module and disables the API.
This results in sound/usb enabling Media Controller specific ALSA driver
code, while Media disables the Media Controller API.

Fix the problem requires two changes:

1. Change the check to use IS_ENABLED to detect when CONFIG_USB is enabled
   as a module or static. Since CONFIG_MEDIA_CONTROLLER is a bool, leave
   the check unchanged to be consistent with drivers/media/Makefile.

2. Change the drivers/media/mc/Makefile to include mc-dev-allocator.o
   in mc-objs when CONFIG_USB is enabled.

Link: https://lore.kernel.org/alsa-devel/YLeAvT+R22FQ%2FEyw@mwanda/

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/mc/Makefile           | 2 +-
 include/media/media-dev-allocator.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/mc/Makefile b/drivers/media/mc/Makefile
index 119037f0e686d..2b7af42ba59c1 100644
--- a/drivers/media/mc/Makefile
+++ b/drivers/media/mc/Makefile
@@ -3,7 +3,7 @@
 mc-objs	:= mc-device.o mc-devnode.o mc-entity.o \
 	   mc-request.o
 
-ifeq ($(CONFIG_USB),y)
+ifneq ($(CONFIG_USB),)
 	mc-objs += mc-dev-allocator.o
 endif
 
diff --git a/include/media/media-dev-allocator.h b/include/media/media-dev-allocator.h
index b35ea6062596b..2ab54d426c644 100644
--- a/include/media/media-dev-allocator.h
+++ b/include/media/media-dev-allocator.h
@@ -19,7 +19,7 @@
 
 struct usb_device;
 
-#if defined(CONFIG_MEDIA_CONTROLLER) && defined(CONFIG_USB)
+#if defined(CONFIG_MEDIA_CONTROLLER) && IS_ENABLED(CONFIG_USB)
 /**
  * media_device_usb_allocate() - Allocate and return struct &media device
  *
-- 
GitLab


From d330099115597bbc238d6758a4930e72b49ea9ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 11 Jun 2021 14:34:50 +0200
Subject: [PATCH 3671/3804] drm/nouveau: fix dma_address check for CPU/GPU sync
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AGP for example doesn't have a dma_address array.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210614110517.1624-1-christian.koenig@amd.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 3e09df0472ce4..170aba99a1101 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -546,7 +546,7 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
 	struct ttm_tt *ttm_dma = (struct ttm_tt *)nvbo->bo.ttm;
 	int i, j;
 
-	if (!ttm_dma)
+	if (!ttm_dma || !ttm_dma->dma_address)
 		return;
 	if (!ttm_dma->pages) {
 		NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma);
@@ -582,7 +582,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
 	struct ttm_tt *ttm_dma = (struct ttm_tt *)nvbo->bo.ttm;
 	int i, j;
 
-	if (!ttm_dma)
+	if (!ttm_dma || !ttm_dma->dma_address)
 		return;
 	if (!ttm_dma->pages) {
 		NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma);
-- 
GitLab


From 98db7259fa7b963d80da49fd636744e28a78981e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 24 Jun 2021 14:21:05 +0100
Subject: [PATCH 3672/3804] KVM: arm64: Set the MTE tag bit before releasing
 the page

Setting a page flag without holding a reference to the page
is living dangerously. In the tag-writing path, we drop the
reference to the page by calling kvm_release_pfn_dirty(),
and only then set the PG_mte_tagged bit.

It would be safer to do it the other way round.

Fixes: f0376edb1ddca ("KVM: arm64: Add ioctl to fetch/store tags in a guest")
Cc: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/87k0mjidwb.wl-maz@kernel.org
---
 arch/arm64/kvm/guest.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 4ddb20017b2f5..60815ae477cfb 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -1053,6 +1053,14 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
 		} else {
 			num_tags = mte_copy_tags_from_user(maddr, tags,
 							MTE_GRANULES_PER_PAGE);
+
+			/*
+			 * Set the flag after checking the write
+			 * completed fully
+			 */
+			if (num_tags == MTE_GRANULES_PER_PAGE)
+				set_bit(PG_mte_tagged, &page->flags);
+
 			kvm_release_pfn_dirty(pfn);
 		}
 
@@ -1061,10 +1069,6 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
 			goto out;
 		}
 
-		/* Set the flag after checking the write completed fully */
-		if (write)
-			set_bit(PG_mte_tagged, &page->flags);
-
 		gfn++;
 		tags += num_tags;
 		length -= PAGE_SIZE;
-- 
GitLab


From b50aa49638c7e12abf4ecc483f4e928c5cccc1b0 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 23 Jun 2021 07:22:30 +0300
Subject: [PATCH 3673/3804] hwmon: (lm90) Prevent integer underflows of
 temperature calculations

The min/max/crit and all other temperature values that are passed to
the driver are unlimited and value that is close to INT_MIN results in
integer underflow of the temperature calculations made by the driver
for LM99 sensor. Temperature hysteresis is among those values that need
to be limited, but limiting of hysteresis is independent from the sensor
version. Add the missing limits.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Link: https://lore.kernel.org/r/20210623042231.16008-2-digetx@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/lm90.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index b53f17511b054..567b7c521f388 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -1029,8 +1029,11 @@ static int lm90_set_temp11(struct lm90_data *data, int index, long val)
 	int err;
 
 	/* +16 degrees offset for temp2 for the LM99 */
-	if (data->kind == lm99 && index <= 2)
+	if (data->kind == lm99 && index <= 2) {
+		/* prevent integer underflow */
+		val = max(val, -128000l);
 		val -= 16000;
+	}
 
 	if (data->kind == adt7461 || data->kind == tmp451)
 		data->temp11[index] = temp_to_u16_adt7461(data, val);
@@ -1089,8 +1092,11 @@ static int lm90_set_temp8(struct lm90_data *data, int index, long val)
 	int err;
 
 	/* +16 degrees offset for temp2 for the LM99 */
-	if (data->kind == lm99 && index == 3)
+	if (data->kind == lm99 && index == 3) {
+		/* prevent integer underflow */
+		val = max(val, -128000l);
 		val -= 16000;
+	}
 
 	if (data->kind == adt7461 || data->kind == tmp451)
 		data->temp8[index] = temp_to_u8_adt7461(data, val);
@@ -1137,6 +1143,9 @@ static int lm90_set_temphyst(struct lm90_data *data, long val)
 	else
 		temp = temp_from_s8(data->temp8[LOCAL_CRIT]);
 
+	/* prevent integer underflow */
+	val = max(val, -128000l);
+
 	data->temp_hyst = hyst_to_reg(temp - val);
 	err = i2c_smbus_write_byte_data(client, LM90_REG_W_TCRIT_HYST,
 					data->temp_hyst);
-- 
GitLab


From a5f6c0f85a09f46c88c0ac53f3d2f70eef105a65 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 23 Jun 2021 07:22:31 +0300
Subject: [PATCH 3674/3804] hwmon: Support set_trips() of thermal device ops

Support set_trips() callback of thermal device ops. This allows HWMON
device to operatively notify thermal core about temperature changes, which
is very handy to have in a case where HWMON sensor is used by CPU thermal
zone that performs passive cooling and emergency shutdown on overheat.
Thermal core will be able to react faster to temperature changes.

The set_trips() callback is entirely optional. If HWMON sensor doesn't
support setting thermal trips, then the callback is a NO-OP. The dummy
callback has no effect on the thermal core. The temperature trips are
either complement the temperature polling mechanism of thermal core or
replace the polling if sensor can set the trips and polling is disabled
by a particular device in a device-tree.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Link: https://lore.kernel.org/r/20210623042231.16008-3-digetx@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index fd47ab4e68922..8d3b1dae31df1 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -153,8 +153,44 @@ static int hwmon_thermal_get_temp(void *data, int *temp)
 	return 0;
 }
 
+static int hwmon_thermal_set_trips(void *data, int low, int high)
+{
+	struct hwmon_thermal_data *tdata = data;
+	struct hwmon_device *hwdev = to_hwmon_device(tdata->dev);
+	const struct hwmon_chip_info *chip = hwdev->chip;
+	const struct hwmon_channel_info **info = chip->info;
+	unsigned int i;
+	int err;
+
+	if (!chip->ops->write)
+		return 0;
+
+	for (i = 0; info[i] && info[i]->type != hwmon_temp; i++)
+		continue;
+
+	if (!info[i])
+		return 0;
+
+	if (info[i]->config[tdata->index] & HWMON_T_MIN) {
+		err = chip->ops->write(tdata->dev, hwmon_temp,
+				       hwmon_temp_min, tdata->index, low);
+		if (err && err != -EOPNOTSUPP)
+			return err;
+	}
+
+	if (info[i]->config[tdata->index] & HWMON_T_MAX) {
+		err = chip->ops->write(tdata->dev, hwmon_temp,
+				       hwmon_temp_max, tdata->index, high);
+		if (err && err != -EOPNOTSUPP)
+			return err;
+	}
+
+	return 0;
+}
+
 static const struct thermal_zone_of_device_ops hwmon_thermal_ops = {
 	.get_temp = hwmon_thermal_get_temp,
+	.set_trips = hwmon_thermal_set_trips,
 };
 
 static void hwmon_thermal_remove_sensor(void *data)
-- 
GitLab


From a75a895e6457784fdf2a0a20a024ae29ff8a7f28 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 22:05:21 +0200
Subject: [PATCH 3675/3804] KVM: selftests: Unconditionally use memslot 0 for
 vaddr allocations

Drop the memslot param(s) from vm_vaddr_alloc() now that all callers
directly specific '0' as the memslot.  Drop the memslot param from
virt_pgd_alloc() as well since vm_vaddr_alloc() is its only user.
I.e. shove the hardcoded '0' down to the vm_phy_pages_alloc() calls.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/kvm_util.h      |  5 ++---
 tools/testing/selftests/kvm/lib/aarch64/processor.c |  6 +++---
 tools/testing/selftests/kvm/lib/elf.c               |  2 +-
 tools/testing/selftests/kvm/lib/kvm_util.c          | 12 +++++-------
 tools/testing/selftests/kvm/lib/s390x/processor.c   |  6 +++---
 tools/testing/selftests/kvm/lib/x86_64/processor.c  |  6 +++---
 tools/testing/selftests/kvm/x86_64/get_cpuid_test.c |  2 +-
 7 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 70385bf25446f..72cdd4d0a6ee5 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -140,8 +140,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-			  uint32_t data_memslot, uint32_t pgd_memslot);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
 
@@ -239,7 +238,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
 
 const char *exit_reason_str(unsigned int exit_reason);
 
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pgd_alloc(struct kvm_vm *vm);
 
 /*
  * VM Virtual Page Map
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index cee92d477dc0c..eb079d828b361 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -72,12 +72,12 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
 	return 1 << (vm->page_shift - 3);
 }
 
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
 {
 	if (!vm->pgd_created) {
 		vm_paddr_t paddr = vm_phy_pages_alloc(vm,
 			page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
 		vm->pgd = paddr;
 		vm->pgd_created = true;
 	}
@@ -302,7 +302,7 @@ void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
 					DEFAULT_STACK_PGS * vm->page_size :
 					vm->page_size;
 	uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
-					DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+					      DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
 
 	vm_vcpu_add(vm, vcpuid);
 	aarch64_vcpu_setup(vm, vcpuid, init);
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index edeeaf73d3b15..eac44f5d0db03 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -163,7 +163,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
 		seg_vend |= vm->page_size - 1;
 		size_t seg_size = seg_vend - seg_vstart + 1;
 
-		vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart, 0, 0);
+		vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart);
 		TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
 			"virtual memory for segment at requested min addr,\n"
 			"  segment idx: %u\n"
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6c02ff93e8078..65369a42808bd 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1247,15 +1247,13 @@ va_found:
  * a unique set of pages, with the minimum real allocation being at least
  * a page.
  */
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
-			  uint32_t data_memslot, uint32_t pgd_memslot)
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
 {
 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
 
-	virt_pgd_alloc(vm, pgd_memslot);
+	virt_pgd_alloc(vm);
 	vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
-					      KVM_UTIL_MIN_PFN * vm->page_size,
-					      data_memslot);
+					      KVM_UTIL_MIN_PFN * vm->page_size, 0);
 
 	/*
 	 * Find an unused range of virtual page addresses of at least
@@ -1267,7 +1265,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
 	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
 		pages--, vaddr += vm->page_size, paddr += vm->page_size) {
 
-		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+		virt_pg_map(vm, vaddr, paddr, 0);
 
 		sparsebit_set(vm->vpages_mapped,
 			vaddr >> vm->page_shift);
@@ -1292,7 +1290,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
  */
 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
 {
-	return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR, 0, 0);
+	return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
 }
 
 /*
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 0152f356c0994..b46e90b888202 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -13,7 +13,7 @@
 
 #define PAGES_PER_REGION 4
 
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
 {
 	vm_paddr_t paddr;
 
@@ -24,7 +24,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
 		return;
 
 	paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
-				   KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+				   KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
 	memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
 
 	vm->pgd = paddr;
@@ -170,7 +170,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 		    vm->page_size);
 
 	stack_vaddr = vm_vaddr_alloc(vm, stack_size,
-				     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+				     DEFAULT_GUEST_STACK_VADDR_MIN);
 
 	vm_vcpu_add(vm, vcpuid);
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 0c51526c2824c..f4b90de00410b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -207,7 +207,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
 	}
 }
 
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
 {
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -215,7 +215,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
 	/* If needed, create page map l4 table. */
 	if (!vm->pgd_created) {
 		vm_paddr_t paddr = vm_phy_page_alloc(vm,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
 		vm->pgd = paddr;
 		vm->pgd_created = true;
 	}
@@ -580,7 +580,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 	struct kvm_regs regs;
 	vm_vaddr_t stack_vaddr;
 	stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
-				     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+				     DEFAULT_GUEST_STACK_VADDR_MIN);
 
 	/* Create VCPU */
 	vm_vcpu_add(vm, vcpuid);
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
index 5e5682691f870..a711f83749eab 100644
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
@@ -145,7 +145,7 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
 struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
 {
 	int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
-	vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR, 0, 0);
+	vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
 	struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
 
 	memcpy(guest_cpuids, cpuid, size);
-- 
GitLab


From 4307af730b8543714a76be9d77422a5762671435 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:22 -0700
Subject: [PATCH 3676/3804] KVM: selftests: Unconditionally use memslot '0' for
 page table allocations

Drop the memslot param from virt_pg_map() and virt_map() and shove the
hardcoded '0' down to the vm_phy_page_alloc() calls.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-13-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_test.c      |  2 +-
 tools/testing/selftests/kvm/include/kvm_util.h    |  5 ++---
 tools/testing/selftests/kvm/kvm_page_table_test.c |  2 +-
 .../testing/selftests/kvm/lib/aarch64/processor.c | 15 +++++++--------
 tools/testing/selftests/kvm/lib/aarch64/ucall.c   |  2 +-
 tools/testing/selftests/kvm/lib/kvm_util.c        |  6 +++---
 tools/testing/selftests/kvm/lib/perf_test_util.c  |  2 +-
 tools/testing/selftests/kvm/lib/s390x/processor.c |  9 ++++-----
 .../testing/selftests/kvm/lib/x86_64/processor.c  |  9 ++++-----
 tools/testing/selftests/kvm/memslot_perf_test.c   |  2 +-
 .../selftests/kvm/set_memory_region_test.c        |  2 +-
 tools/testing/selftests/kvm/steal_time.c          |  2 +-
 .../selftests/kvm/x86_64/vmx_dirty_log_test.c     |  2 +-
 .../testing/selftests/kvm/x86_64/xapic_ipi_test.c |  2 +-
 .../selftests/kvm/x86_64/xen_shinfo_test.c        |  2 +-
 .../selftests/kvm/x86_64/xen_vmcall_test.c        |  2 +-
 16 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 9026fa4ea133c..5fe0140e407e6 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -760,7 +760,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 				    KVM_MEM_LOG_DIRTY_PAGES);
 
 	/* Do mapping for the dirty track memory slot */
-	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
 
 	/* Cache the HVA pointer of the region */
 	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 72cdd4d0a6ee5..532541ac1e358 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -145,7 +145,7 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
 
 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-	      unsigned int npages, uint32_t pgd_memslot);
+	      unsigned int npages);
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
@@ -256,8 +256,7 @@ void virt_pgd_alloc(struct kvm_vm *vm);
  * Within @vm, creates a virtual translation for the page starting
  * at @vaddr to the page starting at @paddr.
  */
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-		 uint32_t memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
 
 vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
 			     uint32_t memslot);
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 82171f17c1d7f..0d04a7db7f249 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -303,7 +303,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
 				    TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
 
 	/* Do mapping(GVA->GPA) for the testing memory slot */
-	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
 
 	/* Cache the HVA pointer of the region */
 	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index eb079d828b361..ba6f0cff78929 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -83,8 +83,8 @@ void virt_pgd_alloc(struct kvm_vm *vm)
 	}
 }
 
-void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-		  uint32_t pgd_memslot, uint64_t flags)
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+			 uint64_t flags)
 {
 	uint8_t attr_idx = flags & 7;
 	uint64_t *ptep;
@@ -105,7 +105,7 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 
 	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
 	if (!*ptep) {
-		*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
 		*ptep |= 3;
 	}
 
@@ -113,14 +113,14 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	case 4:
 		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
 		if (!*ptep) {
-			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
 			*ptep |= 3;
 		}
 		/* fall through */
 	case 3:
 		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
 		if (!*ptep) {
-			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
 			*ptep |= 3;
 		}
 		/* fall through */
@@ -135,12 +135,11 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	*ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
 }
 
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-		 uint32_t pgd_memslot)
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 {
 	uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
 
-	_virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+	_virt_pg_map(vm, vaddr, paddr, attr_idx);
 }
 
 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 2f37b90ee1a94..e0b0164e9af85 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -14,7 +14,7 @@ static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
 	if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
 		return false;
 
-	virt_pg_map(vm, gpa, gpa, 0);
+	virt_pg_map(vm, gpa, gpa);
 
 	ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
 	sync_global_to_guest(vm, ucall_exit_mmio_addr);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 65369a42808bd..06b8fa60840c2 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1265,7 +1265,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
 	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
 		pages--, vaddr += vm->page_size, paddr += vm->page_size) {
 
-		virt_pg_map(vm, vaddr, paddr, 0);
+		virt_pg_map(vm, vaddr, paddr);
 
 		sparsebit_set(vm->vpages_mapped,
 			vaddr >> vm->page_shift);
@@ -1330,7 +1330,7 @@ vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
  * @npages starting at @vaddr to the page range starting at @paddr.
  */
 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-	      unsigned int npages, uint32_t pgd_memslot)
+	      unsigned int npages)
 {
 	size_t page_size = vm->page_size;
 	size_t size = npages * page_size;
@@ -1339,7 +1339,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
 
 	while (npages--) {
-		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+		virt_pg_map(vm, vaddr, paddr);
 		vaddr += page_size;
 		paddr += page_size;
 	}
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 7397ca2998358..b488f4aefea82 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -101,7 +101,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 				    guest_num_pages, 0);
 
 	/* Do mapping for the demand paging memory slot */
-	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+	virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
 
 	ucall_init(vm, NULL);
 
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index b46e90b888202..fbc4ea2a0d643 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -36,12 +36,12 @@ void virt_pgd_alloc(struct kvm_vm *vm)
  * a page table (ri == 4). Returns a suitable region/segment table entry
  * which points to the freshly allocated pages.
  */
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
 {
 	uint64_t taddr;
 
 	taddr = vm_phy_pages_alloc(vm,  ri < 4 ? PAGES_PER_REGION : 1,
-				   KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+				   KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
 	memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
 
 	return (taddr & REGION_ENTRY_ORIGIN)
@@ -49,8 +49,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
 		| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
 }
 
-void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
-		 uint32_t memslot)
+void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
 {
 	int ri, idx;
 	uint64_t *entry;
@@ -77,7 +76,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
 	for (ri = 1; ri <= 4; ri++) {
 		idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
 		if (entry[idx] & REGION_ENTRY_INVALID)
-			entry[idx] = virt_alloc_region(vm, ri, memslot);
+			entry[idx] = virt_alloc_region(vm, ri);
 		entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
 	}
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index f4b90de00410b..92ec0b65c5460 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -221,8 +221,7 @@ void virt_pgd_alloc(struct kvm_vm *vm)
 	}
 }
 
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
-	uint32_t pgd_memslot)
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 {
 	uint16_t index[4];
 	struct pageMapL4Entry *pml4e;
@@ -256,7 +255,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	pml4e = addr_gpa2hva(vm, vm->pgd);
 	if (!pml4e[index[3]].present) {
 		pml4e[index[3]].address = vm_phy_page_alloc(vm,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0)
 			>> vm->page_shift;
 		pml4e[index[3]].writable = true;
 		pml4e[index[3]].present = true;
@@ -267,7 +266,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
 	if (!pdpe[index[2]].present) {
 		pdpe[index[2]].address = vm_phy_page_alloc(vm,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0)
 			>> vm->page_shift;
 		pdpe[index[2]].writable = true;
 		pdpe[index[2]].present = true;
@@ -278,7 +277,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
 	if (!pde[index[1]].present) {
 		pde[index[1]].address = vm_phy_page_alloc(vm,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0)
 			>> vm->page_shift;
 		pde[index[1]].writable = true;
 		pde[index[1]].present = true;
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 11239652d8057..d6e381e01db70 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -306,7 +306,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 		guest_addr += npages * 4096;
 	}
 
-	virt_map(data->vm, MEM_GPA, MEM_GPA, mempages, 0);
+	virt_map(data->vm, MEM_GPA, MEM_GPA, mempages);
 
 	sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
 	atomic_init(&sync->start_flag, false);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 978f5b5f4dc02..d79d58eada9f8 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -132,7 +132,7 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
 	gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
 	TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
 
-	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+	virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2);
 
 	/* Ditto for the host mapping so that both pages can be zeroed. */
 	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index a6fe75cb9a6eb..b0031f2d38fd0 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -293,7 +293,7 @@ int main(int ac, char **av)
 	vm = vm_create_default(0, 0, guest_code);
 	gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
-	virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0);
+	virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
 	ucall_init(vm, NULL);
 
 	/* Add the rest of the VCPUs */
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 537de10685544..18f6361978275 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -97,7 +97,7 @@ int main(int argc, char *argv[])
 	 * Add an identity map for GVA range [0xc0000000, 0xc0002000).  This
 	 * affects both L1 and L2.  However...
 	 */
-	virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
+	virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
 
 	/*
 	 * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 5a79c8ed46119..1846117ad5840 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -423,7 +423,7 @@ int main(int argc, char *argv[])
 	vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
 	vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
 
-	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
+	virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
 
 	vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
 
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 1f4a0599683c5..117bf49a3d795 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -146,7 +146,7 @@ int main(int argc, char *argv[])
 	/* Map a region for the shared_info page */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
 				    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
-	virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
+	virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
 
 	struct kvm_xen_hvm_config hvmc = {
 		.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index 8389e0bfd7114..adc94452b57c6 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -103,7 +103,7 @@ int main(int argc, char *argv[])
 	/* Map a region for the hypercall pages */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
 				    HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
-	virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0);
+	virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
 
 	for (;;) {
 		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-- 
GitLab


From 444d084b467ce0e99a8d709100ee7ebb0c493515 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:23 -0700
Subject: [PATCH 3677/3804] KVM: selftests: Unconditionally allocate EPT tables
 in memslot 0

Drop the EPTP memslot param from all EPT helpers and shove the hardcoded
'0' down to the vm_phy_page_alloc() calls.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-14-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/vmx.h        | 10 ++++-----
 tools/testing/selftests/kvm/lib/x86_64/vmx.c  | 21 ++++++++-----------
 .../kvm/x86_64/vmx_apic_access_test.c         |  2 +-
 .../selftests/kvm/x86_64/vmx_dirty_log_test.c |  6 +++---
 4 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 516c81d863537..583ceb0d14574 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -608,15 +608,13 @@ bool nested_vmx_supported(void);
 void nested_vmx_check_supported(void);
 
 void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-		   uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
+		   uint64_t nested_paddr, uint64_t paddr);
 void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-		 uint64_t nested_paddr, uint64_t paddr, uint64_t size,
-		 uint32_t eptp_memslot);
+		 uint64_t nested_paddr, uint64_t paddr, uint64_t size);
 void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
-			uint32_t memslot, uint32_t eptp_memslot);
+			uint32_t memslot);
 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
 		  uint32_t eptp_memslot);
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
-				      uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
 
 #endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index d568d8cfd44d3..1d26c3979eda1 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -393,7 +393,7 @@ void nested_vmx_check_supported(void)
 }
 
 void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-	 	   uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+		   uint64_t nested_paddr, uint64_t paddr)
 {
 	uint16_t index[4];
 	struct eptPageTableEntry *pml4e;
@@ -427,7 +427,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 	pml4e = vmx->eptp_hva;
 	if (!pml4e[index[3]].readable) {
 		pml4e[index[3]].address = vm_phy_page_alloc(vm,
-			  KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+			  KVM_EPT_PAGE_TABLE_MIN_PADDR, 0)
 			>> vm->page_shift;
 		pml4e[index[3]].writable = true;
 		pml4e[index[3]].readable = true;
@@ -439,7 +439,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
 	if (!pdpe[index[2]].readable) {
 		pdpe[index[2]].address = vm_phy_page_alloc(vm,
-			  KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+			  KVM_EPT_PAGE_TABLE_MIN_PADDR, 0)
 			>> vm->page_shift;
 		pdpe[index[2]].writable = true;
 		pdpe[index[2]].readable = true;
@@ -451,7 +451,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
 	if (!pde[index[1]].readable) {
 		pde[index[1]].address = vm_phy_page_alloc(vm,
-			  KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
+			  KVM_EPT_PAGE_TABLE_MIN_PADDR, 0)
 			>> vm->page_shift;
 		pde[index[1]].writable = true;
 		pde[index[1]].readable = true;
@@ -492,8 +492,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
  * page range starting at nested_paddr to the page range starting at paddr.
  */
 void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
-		uint64_t nested_paddr, uint64_t paddr, uint64_t size,
-		uint32_t eptp_memslot)
+		uint64_t nested_paddr, uint64_t paddr, uint64_t size)
 {
 	size_t page_size = vm->page_size;
 	size_t npages = size / page_size;
@@ -502,7 +501,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
 
 	while (npages--) {
-		nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+		nested_pg_map(vmx, vm, nested_paddr, paddr);
 		nested_paddr += page_size;
 		paddr += page_size;
 	}
@@ -512,7 +511,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
  * physical pages in VM.
  */
 void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
-			uint32_t memslot, uint32_t eptp_memslot)
+			uint32_t memslot)
 {
 	sparsebit_idx_t i, last;
 	struct userspace_mem_region *region =
@@ -528,8 +527,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
 		nested_map(vmx, vm,
 			   (uint64_t)i << vm->page_shift,
 			   (uint64_t)i << vm->page_shift,
-			   1 << vm->page_shift,
-			   eptp_memslot);
+			   1 << vm->page_shift);
 	}
 }
 
@@ -541,8 +539,7 @@ void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
 	vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
 }
 
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
-				      uint32_t eptp_memslot)
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
 {
 	vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
 	vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
index d14888b34adb3..d438c4d3228a4 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -96,7 +96,7 @@ int main(int argc, char *argv[])
 	}
 
 	vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	prepare_virtualize_apic_accesses(vmx, vm, 0);
+	prepare_virtualize_apic_accesses(vmx, vm);
 	vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa);
 
 	while (!done) {
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 18f6361978275..06a64980a5d25 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -107,9 +107,9 @@ int main(int argc, char *argv[])
 	 * meaning after the last call to virt_map.
 	 */
 	prepare_eptp(vmx, vm, 0);
-	nested_map_memslot(vmx, vm, 0, 0);
-	nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
-	nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+	nested_map_memslot(vmx, vm, 0);
+	nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+	nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
 
 	bmap = bitmap_alloc(TEST_MEM_PAGES);
 	host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
-- 
GitLab


From cce0c23dd944068d7f07a03938d5b3cbcdaf4148 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:24 -0700
Subject: [PATCH 3678/3804] KVM: selftests: Add wrapper to allocate page table
 page

Add a helper to allocate a page for use in constructing the guest's page
tables.  All architectures have identical address and memslot
requirements (which appear to be arbitrary anyways).

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-15-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/include/kvm_util.h  |  2 ++
 .../selftests/kvm/lib/aarch64/processor.c     | 19 ++++++-------------
 tools/testing/selftests/kvm/lib/kvm_util.c    |  8 ++++++++
 .../selftests/kvm/lib/s390x/processor.c       |  2 --
 .../selftests/kvm/lib/x86_64/processor.c      | 19 ++++---------------
 tools/testing/selftests/kvm/lib/x86_64/vmx.c  | 12 +++---------
 6 files changed, 23 insertions(+), 39 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 532541ac1e358..62573918299cf 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -30,6 +30,7 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
 
 /* Minimum allocated guest virtual and physical addresses */
 #define KVM_UTIL_MIN_VADDR		0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR	0x180000
 
 #define DEFAULT_GUEST_PHY_PAGES		512
 #define DEFAULT_GUEST_STACK_VADDR_MIN	0xab6000
@@ -262,6 +263,7 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
 			     uint32_t memslot);
 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
 			      vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
 
 /*
  * Create a VM with reasonable defaults
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index ba6f0cff78929..ad465ca162378 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -11,7 +11,6 @@
 #include "../kvm_util_internal.h"
 #include "processor.h"
 
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR		0x180000
 #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN	0xac0000
 
 static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
@@ -104,25 +103,19 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 		paddr, vm->max_gfn, vm->page_size);
 
 	ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
-	if (!*ptep) {
-		*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
-		*ptep |= 3;
-	}
+	if (!*ptep)
+		*ptep = vm_alloc_page_table(vm) | 3;
 
 	switch (vm->pgtable_levels) {
 	case 4:
 		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
-		if (!*ptep) {
-			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
-			*ptep |= 3;
-		}
+		if (!*ptep)
+			*ptep = vm_alloc_page_table(vm) | 3;
 		/* fall through */
 	case 3:
 		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
-		if (!*ptep) {
-			*ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
-			*ptep |= 3;
-		}
+		if (!*ptep)
+			*ptep = vm_alloc_page_table(vm) | 3;
 		/* fall through */
 	case 2:
 		ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 06b8fa60840c2..7a2b84e812928 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -2209,6 +2209,14 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
 	return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
 }
 
+/* Arbitrary minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
+{
+	return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+}
+
 /*
  * Address Guest Virtual to Host Virtual
  *
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index fbc4ea2a0d643..f87c7137598e9 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -9,8 +9,6 @@
 #include "kvm_util.h"
 #include "../kvm_util_internal.h"
 
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR		0x180000
-
 #define PAGES_PER_REGION 4
 
 void virt_pgd_alloc(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 92ec0b65c5460..f96a6221e798b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -17,9 +17,6 @@
 #define DEFAULT_CODE_SELECTOR 0x8
 #define DEFAULT_DATA_SELECTOR 0x10
 
-/* Minimum physical address used for virtual translation tables. */
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
 vm_vaddr_t exception_handlers;
 
 /* Virtual translation table structure declarations */
@@ -214,9 +211,7 @@ void virt_pgd_alloc(struct kvm_vm *vm)
 
 	/* If needed, create page map l4 table. */
 	if (!vm->pgd_created) {
-		vm_paddr_t paddr = vm_phy_page_alloc(vm,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
-		vm->pgd = paddr;
+		vm->pgd = vm_alloc_page_table(vm);
 		vm->pgd_created = true;
 	}
 }
@@ -254,9 +249,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 	/* Allocate page directory pointer table if not present. */
 	pml4e = addr_gpa2hva(vm, vm->pgd);
 	if (!pml4e[index[3]].present) {
-		pml4e[index[3]].address = vm_phy_page_alloc(vm,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0)
-			>> vm->page_shift;
+		pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
 		pml4e[index[3]].writable = true;
 		pml4e[index[3]].present = true;
 	}
@@ -265,9 +258,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 	struct pageDirectoryPointerEntry *pdpe;
 	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
 	if (!pdpe[index[2]].present) {
-		pdpe[index[2]].address = vm_phy_page_alloc(vm,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0)
-			>> vm->page_shift;
+		pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
 		pdpe[index[2]].writable = true;
 		pdpe[index[2]].present = true;
 	}
@@ -276,9 +267,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 	struct pageDirectoryEntry *pde;
 	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
 	if (!pde[index[1]].present) {
-		pde[index[1]].address = vm_phy_page_alloc(vm,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0)
-			>> vm->page_shift;
+		pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
 		pde[index[1]].writable = true;
 		pde[index[1]].present = true;
 	}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 1d26c3979eda1..d089d8b850b5c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -426,9 +426,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 	/* Allocate page directory pointer table if not present. */
 	pml4e = vmx->eptp_hva;
 	if (!pml4e[index[3]].readable) {
-		pml4e[index[3]].address = vm_phy_page_alloc(vm,
-			  KVM_EPT_PAGE_TABLE_MIN_PADDR, 0)
-			>> vm->page_shift;
+		pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
 		pml4e[index[3]].writable = true;
 		pml4e[index[3]].readable = true;
 		pml4e[index[3]].executable = true;
@@ -438,9 +436,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 	struct eptPageTableEntry *pdpe;
 	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
 	if (!pdpe[index[2]].readable) {
-		pdpe[index[2]].address = vm_phy_page_alloc(vm,
-			  KVM_EPT_PAGE_TABLE_MIN_PADDR, 0)
-			>> vm->page_shift;
+		pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
 		pdpe[index[2]].writable = true;
 		pdpe[index[2]].readable = true;
 		pdpe[index[2]].executable = true;
@@ -450,9 +446,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
 	struct eptPageTableEntry *pde;
 	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
 	if (!pde[index[1]].readable) {
-		pde[index[1]].address = vm_phy_page_alloc(vm,
-			  KVM_EPT_PAGE_TABLE_MIN_PADDR, 0)
-			>> vm->page_shift;
+		pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
 		pde[index[1]].writable = true;
 		pde[index[1]].readable = true;
 		pde[index[1]].executable = true;
-- 
GitLab


From 6d96ca6a602b24013c8be1160d40c667e133ddb9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:25 -0700
Subject: [PATCH 3679/3804] KVM: selftests: Rename x86's page table "address"
 to "pfn"

Rename the "address" field to "pfn" in x86's page table structs to match
reality.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-16-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/lib/x86_64/processor.c      | 47 +++++++++----------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index f96a6221e798b..fa4ad136b34d9 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -30,7 +30,7 @@ struct pageMapL4Entry {
 	uint64_t ignored_06:1;
 	uint64_t page_size:1;
 	uint64_t ignored_11_08:4;
-	uint64_t address:40;
+	uint64_t pfn:40;
 	uint64_t ignored_62_52:11;
 	uint64_t execute_disable:1;
 };
@@ -45,7 +45,7 @@ struct pageDirectoryPointerEntry {
 	uint64_t ignored_06:1;
 	uint64_t page_size:1;
 	uint64_t ignored_11_08:4;
-	uint64_t address:40;
+	uint64_t pfn:40;
 	uint64_t ignored_62_52:11;
 	uint64_t execute_disable:1;
 };
@@ -60,7 +60,7 @@ struct pageDirectoryEntry {
 	uint64_t ignored_06:1;
 	uint64_t page_size:1;
 	uint64_t ignored_11_08:4;
-	uint64_t address:40;
+	uint64_t pfn:40;
 	uint64_t ignored_62_52:11;
 	uint64_t execute_disable:1;
 };
@@ -76,7 +76,7 @@ struct pageTableEntry {
 	uint64_t reserved_07:1;
 	uint64_t global:1;
 	uint64_t ignored_11_09:3;
-	uint64_t address:40;
+	uint64_t pfn:40;
 	uint64_t ignored_62_52:11;
 	uint64_t execute_disable:1;
 };
@@ -249,33 +249,33 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 	/* Allocate page directory pointer table if not present. */
 	pml4e = addr_gpa2hva(vm, vm->pgd);
 	if (!pml4e[index[3]].present) {
-		pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
+		pml4e[index[3]].pfn = vm_alloc_page_table(vm) >> vm->page_shift;
 		pml4e[index[3]].writable = true;
 		pml4e[index[3]].present = true;
 	}
 
 	/* Allocate page directory table if not present. */
 	struct pageDirectoryPointerEntry *pdpe;
-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
 	if (!pdpe[index[2]].present) {
-		pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
+		pdpe[index[2]].pfn = vm_alloc_page_table(vm) >> vm->page_shift;
 		pdpe[index[2]].writable = true;
 		pdpe[index[2]].present = true;
 	}
 
 	/* Allocate page table if not present. */
 	struct pageDirectoryEntry *pde;
-	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
 	if (!pde[index[1]].present) {
-		pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
+		pde[index[1]].pfn = vm_alloc_page_table(vm) >> vm->page_shift;
 		pde[index[1]].writable = true;
 		pde[index[1]].present = true;
 	}
 
 	/* Fill in page table entry. */
 	struct pageTableEntry *pte;
-	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
-	pte[index[0]].address = paddr >> vm->page_shift;
+	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
+	pte[index[0]].pfn = paddr >> vm->page_shift;
 	pte[index[0]].writable = true;
 	pte[index[0]].present = 1;
 }
@@ -305,11 +305,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 			" %u\n",
 			indent, "",
 			pml4e - pml4e_start, pml4e,
-			addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+			addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
 			pml4e->writable, pml4e->execute_disable);
 
-		pdpe_start = addr_gpa2hva(vm, pml4e->address
-			* vm->page_size);
+		pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
 		for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
 			pdpe = &pdpe_start[n2];
 			if (!pdpe->present)
@@ -319,11 +318,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 				indent, "",
 				pdpe - pdpe_start, pdpe,
 				addr_hva2gpa(vm, pdpe),
-				(uint64_t) pdpe->address, pdpe->writable,
+				(uint64_t) pdpe->pfn, pdpe->writable,
 				pdpe->execute_disable);
 
-			pde_start = addr_gpa2hva(vm,
-				pdpe->address * vm->page_size);
+			pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
 			for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
 				pde = &pde_start[n3];
 				if (!pde->present)
@@ -332,11 +330,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 					"0x%-12lx 0x%-10lx %u  %u\n",
 					indent, "", pde - pde_start, pde,
 					addr_hva2gpa(vm, pde),
-					(uint64_t) pde->address, pde->writable,
+					(uint64_t) pde->pfn, pde->writable,
 					pde->execute_disable);
 
-				pte_start = addr_gpa2hva(vm,
-					pde->address * vm->page_size);
+				pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
 				for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
 					pte = &pte_start[n4];
 					if (!pte->present)
@@ -347,7 +344,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 						indent, "",
 						pte - pte_start, pte,
 						addr_hva2gpa(vm, pte),
-						(uint64_t) pte->address,
+						(uint64_t) pte->pfn,
 						pte->writable,
 						pte->execute_disable,
 						pte->dirty,
@@ -487,19 +484,19 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 	if (!pml4e[index[3]].present)
 		goto unmapped_gva;
 
-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
 	if (!pdpe[index[2]].present)
 		goto unmapped_gva;
 
-	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
 	if (!pde[index[1]].present)
 		goto unmapped_gva;
 
-	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
 	if (!pte[index[0]].present)
 		goto unmapped_gva;
 
-	return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+	return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
 
 unmapped_gva:
 	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
-- 
GitLab


From f681d6861b0c7b28af1a339171602a6e82b1cbda Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:26 -0700
Subject: [PATCH 3680/3804] KVM: selftests: Add PTE helper for x86-64 in
 preparation for hugepages

Add a helper to retrieve a PTE pointer given a PFN, address, and level
in preparation for adding hugepage support.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-17-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/lib/x86_64/processor.c      | 59 ++++++++++---------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index fa4ad136b34d9..6796b65e181c6 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -216,10 +216,21 @@ void virt_pgd_alloc(struct kvm_vm *vm)
 	}
 }
 
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
+			  int level)
+{
+	uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
+	int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
+
+	return &page_table[index];
+}
+
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 {
-	uint16_t index[4];
 	struct pageMapL4Entry *pml4e;
+	struct pageDirectoryPointerEntry *pdpe;
+	struct pageDirectoryEntry *pde;
+	struct pageTableEntry *pte;
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -241,43 +252,35 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 		paddr, vm->max_gfn, vm->page_size);
 
-	index[0] = (vaddr >> 12) & 0x1ffu;
-	index[1] = (vaddr >> 21) & 0x1ffu;
-	index[2] = (vaddr >> 30) & 0x1ffu;
-	index[3] = (vaddr >> 39) & 0x1ffu;
-
 	/* Allocate page directory pointer table if not present. */
-	pml4e = addr_gpa2hva(vm, vm->pgd);
-	if (!pml4e[index[3]].present) {
-		pml4e[index[3]].pfn = vm_alloc_page_table(vm) >> vm->page_shift;
-		pml4e[index[3]].writable = true;
-		pml4e[index[3]].present = true;
+	pml4e = virt_get_pte(vm, vm->pgd >> vm->page_shift, vaddr, 3);
+	if (!pml4e->present) {
+		pml4e->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+		pml4e->writable = true;
+		pml4e->present = true;
 	}
 
 	/* Allocate page directory table if not present. */
-	struct pageDirectoryPointerEntry *pdpe;
-	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
-	if (!pdpe[index[2]].present) {
-		pdpe[index[2]].pfn = vm_alloc_page_table(vm) >> vm->page_shift;
-		pdpe[index[2]].writable = true;
-		pdpe[index[2]].present = true;
+	pdpe = virt_get_pte(vm, pml4e->pfn, vaddr, 2);
+	if (!pdpe->present) {
+		pdpe->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+		pdpe->writable = true;
+		pdpe->present = true;
 	}
 
 	/* Allocate page table if not present. */
-	struct pageDirectoryEntry *pde;
-	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
-	if (!pde[index[1]].present) {
-		pde[index[1]].pfn = vm_alloc_page_table(vm) >> vm->page_shift;
-		pde[index[1]].writable = true;
-		pde[index[1]].present = true;
+	pde = virt_get_pte(vm, pdpe->pfn, vaddr, 1);
+	if (!pde->present) {
+		pde->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+		pde->writable = true;
+		pde->present = true;
 	}
 
 	/* Fill in page table entry. */
-	struct pageTableEntry *pte;
-	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
-	pte[index[0]].pfn = paddr >> vm->page_shift;
-	pte[index[0]].writable = true;
-	pte[index[0]].present = 1;
+	pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
+	pte->pfn = paddr >> vm->page_shift;
+	pte->writable = true;
+	pte->present = 1;
 }
 
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-- 
GitLab


From b007e904b36a945d01a9080d754702ca5f9c68b4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:27 -0700
Subject: [PATCH 3681/3804] KVM: selftests: Genericize upper level page table
 entry struct

In preparation for adding hugepage support, replace "pageMapL4Entry",
"pageDirectoryPointerEntry", and "pageDirectoryEntry" with a common
"pageUpperEntry", and add a helper to create an upper level entry. All
upper level entries have the same layout, using unique structs provides
minimal value and requires a non-trivial amount of code duplication.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-18-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/lib/x86_64/processor.c      | 91 ++++++-------------
 1 file changed, 26 insertions(+), 65 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 6796b65e181c6..fc33acf819efc 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -20,37 +20,7 @@
 vm_vaddr_t exception_handlers;
 
 /* Virtual translation table structure declarations */
-struct pageMapL4Entry {
-	uint64_t present:1;
-	uint64_t writable:1;
-	uint64_t user:1;
-	uint64_t write_through:1;
-	uint64_t cache_disable:1;
-	uint64_t accessed:1;
-	uint64_t ignored_06:1;
-	uint64_t page_size:1;
-	uint64_t ignored_11_08:4;
-	uint64_t pfn:40;
-	uint64_t ignored_62_52:11;
-	uint64_t execute_disable:1;
-};
-
-struct pageDirectoryPointerEntry {
-	uint64_t present:1;
-	uint64_t writable:1;
-	uint64_t user:1;
-	uint64_t write_through:1;
-	uint64_t cache_disable:1;
-	uint64_t accessed:1;
-	uint64_t ignored_06:1;
-	uint64_t page_size:1;
-	uint64_t ignored_11_08:4;
-	uint64_t pfn:40;
-	uint64_t ignored_62_52:11;
-	uint64_t execute_disable:1;
-};
-
-struct pageDirectoryEntry {
+struct pageUpperEntry {
 	uint64_t present:1;
 	uint64_t writable:1;
 	uint64_t user:1;
@@ -225,11 +195,24 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
 	return &page_table[index];
 }
 
+static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
+						    uint64_t pt_pfn,
+						    uint64_t vaddr,
+						    int level)
+{
+	struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+	if (!pte->present) {
+		pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+		pte->writable = true;
+		pte->present = true;
+	}
+	return pte;
+}
+
 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 {
-	struct pageMapL4Entry *pml4e;
-	struct pageDirectoryPointerEntry *pdpe;
-	struct pageDirectoryEntry *pde;
+	struct pageUpperEntry *pml4e, *pdpe, *pde;
 	struct pageTableEntry *pte;
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -252,29 +235,10 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
 		paddr, vm->max_gfn, vm->page_size);
 
-	/* Allocate page directory pointer table if not present. */
-	pml4e = virt_get_pte(vm, vm->pgd >> vm->page_shift, vaddr, 3);
-	if (!pml4e->present) {
-		pml4e->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
-		pml4e->writable = true;
-		pml4e->present = true;
-	}
-
-	/* Allocate page directory table if not present. */
-	pdpe = virt_get_pte(vm, pml4e->pfn, vaddr, 2);
-	if (!pdpe->present) {
-		pdpe->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
-		pdpe->writable = true;
-		pdpe->present = true;
-	}
-
-	/* Allocate page table if not present. */
-	pde = virt_get_pte(vm, pdpe->pfn, vaddr, 1);
-	if (!pde->present) {
-		pde->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
-		pde->writable = true;
-		pde->present = true;
-	}
+	/* Allocate upper level page tables, if not already present. */
+	pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, vaddr, 3);
+	pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, 2);
+	pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, 1);
 
 	/* Fill in page table entry. */
 	pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
@@ -285,9 +249,9 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
-	struct pageMapL4Entry *pml4e, *pml4e_start;
-	struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
-	struct pageDirectoryEntry *pde, *pde_start;
+	struct pageUpperEntry *pml4e, *pml4e_start;
+	struct pageUpperEntry *pdpe, *pdpe_start;
+	struct pageUpperEntry *pde, *pde_start;
 	struct pageTableEntry *pte, *pte_start;
 
 	if (!vm->pgd_created)
@@ -298,8 +262,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	fprintf(stream, "%*s      index hvaddr         gpaddr         "
 		"addr         w exec dirty\n",
 		indent, "");
-	pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
-		vm->pgd);
+	pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
 	for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
 		pml4e = &pml4e_start[n1];
 		if (!pml4e->present)
@@ -468,9 +431,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 {
 	uint16_t index[4];
-	struct pageMapL4Entry *pml4e;
-	struct pageDirectoryPointerEntry *pdpe;
-	struct pageDirectoryEntry *pde;
+	struct pageUpperEntry *pml4e, *pdpe, *pde;
 	struct pageTableEntry *pte;
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
-- 
GitLab


From ad5f16e422258d51414e7d8aaf856000eec9dfce Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:28 -0700
Subject: [PATCH 3682/3804] KVM: selftests: Add hugepage support for x86-64

Add x86-64 hugepage support in the form of a x86-only variant of
virt_pg_map() that takes an explicit page size.  To keep things simple,
follow the existing logic for 4k pages and disallow creating a hugepage
if the upper-level entry is present, even if the desired pfn matches.

Opportunistically fix a double "beyond beyond" reported by checkpatch.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-19-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/processor.h  |  8 ++
 .../selftests/kvm/lib/x86_64/processor.c      | 85 +++++++++++++------
 2 files changed, 68 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 9a5b47d2d5d63..f21126941f19f 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -412,6 +412,14 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
 void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
 struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
 
+enum x86_page_size {
+	X86_PAGE_SIZE_4K = 0,
+	X86_PAGE_SIZE_2M,
+	X86_PAGE_SIZE_1G,
+};
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		   enum x86_page_size page_size);
+
 /*
  * Basic CPU control in CR0
  */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index fc33acf819efc..5e0e3a131dadd 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -198,55 +198,90 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
 static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
 						    uint64_t pt_pfn,
 						    uint64_t vaddr,
-						    int level)
+						    uint64_t paddr,
+						    int level,
+						    enum x86_page_size page_size)
 {
 	struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
 
 	if (!pte->present) {
-		pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
 		pte->writable = true;
 		pte->present = true;
+		pte->page_size = (level == page_size);
+		if (pte->page_size)
+			pte->pfn = paddr >> vm->page_shift;
+		else
+			pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+	} else {
+		/*
+		 * Entry already present.  Assert that the caller doesn't want
+		 * a hugepage at this level, and that there isn't a hugepage at
+		 * this level.
+		 */
+		TEST_ASSERT(level != page_size,
+			    "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
+			    page_size, vaddr);
+		TEST_ASSERT(!pte->page_size,
+			    "Cannot create page table at level: %u, vaddr: 0x%lx\n",
+			    level, vaddr);
 	}
 	return pte;
 }
 
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+		   enum x86_page_size page_size)
 {
+	const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
 	struct pageUpperEntry *pml4e, *pdpe, *pde;
 	struct pageTableEntry *pte;
 
-	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
-		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
-	TEST_ASSERT((vaddr % vm->page_size) == 0,
-		"Virtual address not on page boundary,\n"
-		"  vaddr: 0x%lx vm->page_size: 0x%x",
-		vaddr, vm->page_size);
-	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
-		(vaddr >> vm->page_shift)),
-		"Invalid virtual address, vaddr: 0x%lx",
-		vaddr);
-	TEST_ASSERT((paddr % vm->page_size) == 0,
-		"Physical address not on page boundary,\n"
-		"  paddr: 0x%lx vm->page_size: 0x%x",
-		paddr, vm->page_size);
+	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+		    "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	TEST_ASSERT((vaddr % pg_size) == 0,
+		    "Virtual address not aligned,\n"
+		    "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+		    "Invalid virtual address, vaddr: 0x%lx", vaddr);
+	TEST_ASSERT((paddr % pg_size) == 0,
+		    "Physical address not aligned,\n"
+		    "  paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
 	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
-		"Physical address beyond beyond maximum supported,\n"
-		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
-		paddr, vm->max_gfn, vm->page_size);
+		    "Physical address beyond maximum supported,\n"
+		    "  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		    paddr, vm->max_gfn, vm->page_size);
+
+	/*
+	 * Allocate upper level page tables, if not already present.  Return
+	 * early if a hugepage was created.
+	 */
+	pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
+				      vaddr, paddr, 3, page_size);
+	if (pml4e->page_size)
+		return;
+
+	pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
+	if (pdpe->page_size)
+		return;
 
-	/* Allocate upper level page tables, if not already present. */
-	pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, vaddr, 3);
-	pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, 2);
-	pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, 1);
+	pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
+	if (pde->page_size)
+		return;
 
 	/* Fill in page table entry. */
 	pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
+	TEST_ASSERT(!pte->present,
+		    "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
 	pte->pfn = paddr >> vm->page_shift;
 	pte->writable = true;
 	pte->present = 1;
 }
 
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+	__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+}
+
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
 	struct pageUpperEntry *pml4e, *pml4e_start;
-- 
GitLab


From ef6a74b2e55e97daf4c7ba2d287878dc3f693b41 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 13:05:29 -0700
Subject: [PATCH 3683/3804] KVM: sefltests: Add x86-64 test to verify MMU
 reacts to CPUID updates

Add an x86-only test to verify that x86's MMU reacts to CPUID updates
that impact the MMU.  KVM has had multiple bugs where it fails to
reconfigure the MMU after the guest's vCPU model changes.

Sadly, this test is effectively limited to shadow paging because the
hardware page walk handler doesn't support software disabling of GBPAGES
support, and KVM doesn't manually walk the GVA->GPA on faults for
performance reasons (doing so would large defeat the benefits of TDP).

Don't require !TDP for the tests as there is still value in running the
tests with TDP, even though the tests will fail (barring KVM hacks).
E.g. KVM should not completely explode if MAXPHYADDR results in KVM using
4-level vs. 5-level paging for the guest.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622200529.3650424-20-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/include/x86_64/processor.h  |   3 +
 .../selftests/kvm/x86_64/mmu_role_test.c      | 147 ++++++++++++++++++
 4 files changed, 152 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/mmu_role_test.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index e0e14150744ec..6ead3403eca67 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -15,6 +15,7 @@
 /x86_64/hyperv_cpuid
 /x86_64/hyperv_features
 /x86_64/mmio_warning_test
+/x86_64/mmu_role_test
 /x86_64/platform_info_test
 /x86_64/set_boot_cpu_id
 /x86_64/set_sregs_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 61e2accd080dc..8dc007bac0fed 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -47,6 +47,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
+TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index f21126941f19f..914b0d16929cb 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -55,6 +55,9 @@
 #define CPUID_PKU		(1ul << 3)
 #define CPUID_LA57		(1ul << 16)
 
+/* CPUID.0x8000_0001.EDX */
+#define CPUID_GBPAGES		(1ul << 26)
+
 #define UNEXPECTED_VECTOR_PORT 0xfff0u
 
 /* General Registers in 64-Bit Mode */
diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
new file mode 100644
index 0000000000000..523371cf8e8f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID			1
+
+#define MMIO_GPA	0x100000000ull
+
+static void guest_code(void)
+{
+	(void)READ_ONCE(*((uint64_t *)MMIO_GPA));
+	(void)READ_ONCE(*((uint64_t *)MMIO_GPA));
+
+	GUEST_ASSERT(0);
+}
+
+static void guest_pf_handler(struct ex_regs *regs)
+{
+	/* PFEC == RSVD | PRESENT (read, kernel). */
+	GUEST_ASSERT(regs->error_code == 0x9);
+	GUEST_DONE();
+}
+
+static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
+{
+	u32 good_cpuid_val = *cpuid_reg;
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	uint64_t cmd;
+	int r;
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	run = vcpu_state(vm, VCPU_ID);
+
+	/* Map 1gb page without a backing memlot. */
+	__virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
+
+	r = _vcpu_run(vm, VCPU_ID);
+
+	/* Guest access to the 1gb page should trigger MMIO. */
+	TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_MMIO,
+		    "Unexpected exit reason: %u (%s), expected MMIO exit (1gb page w/o memslot)\n",
+		    run->exit_reason, exit_reason_str(run->exit_reason));
+
+	TEST_ASSERT(run->mmio.len == 8, "Unexpected exit mmio size = %u", run->mmio.len);
+
+	TEST_ASSERT(run->mmio.phys_addr == MMIO_GPA,
+		    "Unexpected exit mmio address = 0x%llx", run->mmio.phys_addr);
+
+	/*
+	 * Effect the CPUID change for the guest and re-enter the guest.  Its
+	 * access should now #PF due to the PAGE_SIZE bit being reserved or
+	 * the resulting GPA being invalid.  Note, kvm_get_supported_cpuid()
+	 * returns the struct that contains the entry being modified.  Eww.
+	 */
+	*cpuid_reg = evil_cpuid_val;
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	/*
+	 * Add a dummy memslot to coerce KVM into bumping the MMIO generation.
+	 * KVM does not "officially" support mucking with CPUID after KVM_RUN,
+	 * and will incorrectly reuse MMIO SPTEs.  Don't delete the memslot!
+	 * KVM x86 zaps all shadow pages on memslot deletion.
+	 */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    MMIO_GPA << 1, 10, 1, 0);
+
+	/* Set up a #PF handler to eat the RSVD #PF and signal all done! */
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+	vm_handle_exception(vm, PF_VECTOR, guest_pf_handler);
+
+	r = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
+
+	cmd = get_ucall(vm, VCPU_ID, NULL);
+	TEST_ASSERT(cmd == UCALL_DONE,
+		    "Unexpected guest exit, exit_reason=%s, ucall.cmd = %lu\n",
+		    exit_reason_str(run->exit_reason), cmd);
+
+	/*
+	 * Restore the happy CPUID value for the next test.  Yes, changes are
+	 * indeed persistent across VM destruction.
+	 */
+	*cpuid_reg = good_cpuid_val;
+
+	kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_cpuid_entry2 *entry;
+	int opt;
+
+	/*
+	 * All tests are opt-in because TDP doesn't play nice with reserved #PF
+	 * in the GVA->GPA translation.  The hardware page walker doesn't let
+	 * software change GBPAGES or MAXPHYADDR, and KVM doesn't manually walk
+	 * the GVA on fault for performance reasons.
+	 */
+	bool do_gbpages = false;
+	bool do_maxphyaddr = false;
+
+	setbuf(stdout, NULL);
+
+	while ((opt = getopt(argc, argv, "gm")) != -1) {
+		switch (opt) {
+		case 'g':
+			do_gbpages = true;
+			break;
+		case 'm':
+			do_maxphyaddr = true;
+			break;
+		case 'h':
+		default:
+			printf("usage: %s [-g (GBPAGES)] [-m (MAXPHYADDR)]\n", argv[0]);
+			break;
+		}
+	}
+
+	if (!do_gbpages && !do_maxphyaddr) {
+		print_skip("No sub-tests selected");
+		return 0;
+	}
+
+	entry = kvm_get_supported_cpuid_entry(0x80000001);
+	if (!(entry->edx & CPUID_GBPAGES)) {
+		print_skip("1gb hugepages not supported");
+		return 0;
+	}
+
+	if (do_gbpages) {
+		pr_info("Test MMIO after toggling CPUID.GBPAGES\n\n");
+		mmu_role_test(&entry->edx, entry->edx & ~CPUID_GBPAGES);
+	}
+
+	if (do_maxphyaddr) {
+		pr_info("Test MMIO after changing CPUID.MAXPHYADDR\n\n");
+		entry = kvm_get_supported_cpuid_entry(0x80000008);
+		mmu_role_test(&entry->eax, (entry->eax & ~0xff) | 0x20);
+	}
+
+	return 0;
+}
-- 
GitLab


From 6c6e166b2c8513721d166c74060d26d3f4aecb48 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 00:24:54 -0700
Subject: [PATCH 3684/3804] KVM: x86/mmu: Don't WARN on a NULL shadow page in
 TDP MMU check

Treat a NULL shadow page in the "is a TDP MMU" check as valid, non-TDP
root.  KVM uses a "direct" PAE paging MMU when TDP is disabled and the
guest is running with paging disabled.  In that case, root_hpa points at
the pae_root page (of which only 32 bytes are used), not a standard
shadow page, and the WARN fires (a lot).

Fixes: 0b873fd7fb53 ("KVM: x86/mmu: Remove redundant is_tdp_mmu_enabled check")
Cc: David Matlack <dmatlack@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622072454.3449146-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index b981a044ab55d..1cae4485b3bc5 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -94,11 +94,13 @@ static inline bool is_tdp_mmu(struct kvm_mmu *mmu)
 	if (WARN_ON(!VALID_PAGE(hpa)))
 		return false;
 
+	/*
+	 * A NULL shadow page is legal when shadowing a non-paging guest with
+	 * PAE paging, as the MMU will be direct with root_hpa pointing at the
+	 * pae_root page, not a shadow page.
+	 */
 	sp = to_shadow_page(hpa);
-	if (WARN_ON(!sp))
-		return false;
-
-	return is_tdp_mmu_page(sp) && sp->root_count;
+	return sp && is_tdp_mmu_page(sp) && sp->root_count;
 }
 #else
 static inline bool kvm_mmu_init_tdp_mmu(struct kvm *kvm) { return false; }
-- 
GitLab


From 0193cc908b5ae8aff2e2d2997ca5d4ae26ed24d4 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Fri, 18 Jun 2021 22:27:03 +0000
Subject: [PATCH 3685/3804] KVM: stats: Separate generic stats from
 architecture specific ones

Generic KVM stats are those collected in architecture independent code
or those supported by all architectures; put all generic statistics in
a separate structure.  This ensures that they are defined the same way
in the statistics API which is being added, removing duplication among
different architectures in the declaration of the descriptors.

No functional change intended.

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210618222709.1858088-2-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/include/asm/kvm_host.h   |  9 ++-------
 arch/arm64/kvm/guest.c              | 12 ++++++------
 arch/mips/include/asm/kvm_host.h    |  9 ++-------
 arch/mips/kvm/mips.c                | 12 ++++++------
 arch/powerpc/include/asm/kvm_host.h |  9 ++-------
 arch/powerpc/kvm/book3s.c           | 12 ++++++------
 arch/powerpc/kvm/book3s_hv.c        | 12 ++++++------
 arch/powerpc/kvm/book3s_pr.c        |  2 +-
 arch/powerpc/kvm/book3s_pr_papr.c   |  2 +-
 arch/powerpc/kvm/booke.c            | 14 +++++++-------
 arch/s390/include/asm/kvm_host.h    |  9 ++-------
 arch/s390/kvm/kvm-s390.c            | 12 ++++++------
 arch/x86/include/asm/kvm_host.h     |  9 ++-------
 arch/x86/kvm/x86.c                  | 14 +++++++-------
 include/linux/kvm_types.h           | 12 ++++++++++++
 virt/kvm/kvm_main.c                 | 14 +++++++-------
 16 files changed, 75 insertions(+), 88 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d56f365b38a83..5a2c82f63baaa 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -556,16 +556,11 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
 }
 
 struct kvm_vm_stat {
-	u64 remote_tlb_flush;
+	struct kvm_vm_stat_generic generic;
 };
 
 struct kvm_vcpu_stat {
-	u64 halt_successful_poll;
-	u64 halt_attempted_poll;
-	u64 halt_poll_success_ns;
-	u64 halt_poll_fail_ns;
-	u64 halt_poll_invalid;
-	u64 halt_wakeup;
+	struct kvm_vcpu_stat_generic generic;
 	u64 hvc_exit_stat;
 	u64 wfe_exit_stat;
 	u64 wfi_exit_stat;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5cb4a1cd5603a..988ead309cbe0 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -29,18 +29,18 @@
 #include "trace.h"
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
-	VCPU_STAT("halt_successful_poll", halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
 	VCPU_STAT("hvc_exit_stat", hvc_exit_stat),
 	VCPU_STAT("wfe_exit_stat", wfe_exit_stat),
 	VCPU_STAT("wfi_exit_stat", wfi_exit_stat),
 	VCPU_STAT("mmio_exit_user", mmio_exit_user),
 	VCPU_STAT("mmio_exit_kernel", mmio_exit_kernel),
 	VCPU_STAT("exits", exits),
-	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
 	{ NULL }
 };
 
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 4245c082095f6..696f6b0093776 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -109,10 +109,11 @@ static inline bool kvm_is_error_hva(unsigned long addr)
 }
 
 struct kvm_vm_stat {
-	u64 remote_tlb_flush;
+	struct kvm_vm_stat_generic generic;
 };
 
 struct kvm_vcpu_stat {
+	struct kvm_vcpu_stat_generic generic;
 	u64 wait_exits;
 	u64 cache_exits;
 	u64 signal_exits;
@@ -142,12 +143,6 @@ struct kvm_vcpu_stat {
 #ifdef CONFIG_CPU_LOONGSON64
 	u64 vz_cpucfg_exits;
 #endif
-	u64 halt_successful_poll;
-	u64 halt_attempted_poll;
-	u64 halt_poll_success_ns;
-	u64 halt_poll_fail_ns;
-	u64 halt_poll_invalid;
-	u64 halt_wakeup;
 };
 
 struct kvm_arch_memory_slot {
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 4d4af97dcc888..2f2969aef60c6 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -68,12 +68,12 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 #ifdef CONFIG_CPU_LOONGSON64
 	VCPU_STAT("vz_cpucfg", vz_cpucfg_exits),
 #endif
-	VCPU_STAT("halt_successful_poll", halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", halt_wakeup),
-	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
+	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
 	{NULL}
 };
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index dd8bd4706259d..9f52f282b1aa4 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -81,12 +81,13 @@ struct kvmppc_book3s_shadow_vcpu;
 struct kvm_nested_guest;
 
 struct kvm_vm_stat {
-	u64 remote_tlb_flush;
+	struct kvm_vm_stat_generic generic;
 	u64 num_2M_pages;
 	u64 num_1G_pages;
 };
 
 struct kvm_vcpu_stat {
+	struct kvm_vcpu_stat_generic generic;
 	u64 sum_exits;
 	u64 mmio_exits;
 	u64 signal_exits;
@@ -102,14 +103,8 @@ struct kvm_vcpu_stat {
 	u64 emulated_inst_exits;
 	u64 dec_exits;
 	u64 ext_intr_exits;
-	u64 halt_poll_success_ns;
-	u64 halt_poll_fail_ns;
 	u64 halt_wait_ns;
-	u64 halt_successful_poll;
-	u64 halt_attempted_poll;
 	u64 halt_successful_wait;
-	u64 halt_poll_invalid;
-	u64 halt_wakeup;
 	u64 dbell_exits;
 	u64 gdbell_exits;
 	u64 ld;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 5e1e1cff0ee38..ae9f1b855ff9d 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -47,14 +47,14 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("dec", dec_exits),
 	VCPU_STAT("ext_intr", ext_intr_exits),
 	VCPU_STAT("queue_intr", queue_intr),
-	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
 	VCPU_STAT("halt_wait_ns", halt_wait_ns),
-	VCPU_STAT("halt_successful_poll", halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
+	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
 	VCPU_STAT("halt_successful_wait", halt_successful_wait),
-	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
 	VCPU_STAT("pf_storage", pf_storage),
 	VCPU_STAT("sp_storage", sp_storage),
 	VCPU_STAT("pf_instruc", pf_instruc),
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7e73e5bfe4ba7..cd544a46183e1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -230,7 +230,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 
 	waitp = kvm_arch_vcpu_get_wait(vcpu);
 	if (rcuwait_wake_up(waitp))
-		++vcpu->stat.halt_wakeup;
+		++vcpu->stat.generic.halt_wakeup;
 
 	cpu = READ_ONCE(vcpu->arch.thread_cpu);
 	if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -4092,7 +4092,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 	cur = start_poll = ktime_get();
 	if (vc->halt_poll_ns) {
 		ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
-		++vc->runner->stat.halt_attempted_poll;
+		++vc->runner->stat.generic.halt_attempted_poll;
 
 		vc->vcore_state = VCORE_POLLING;
 		spin_unlock(&vc->lock);
@@ -4109,7 +4109,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 		vc->vcore_state = VCORE_INACTIVE;
 
 		if (!do_sleep) {
-			++vc->runner->stat.halt_successful_poll;
+			++vc->runner->stat.generic.halt_successful_poll;
 			goto out;
 		}
 	}
@@ -4121,7 +4121,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 		do_sleep = 0;
 		/* If we polled, count this as a successful poll */
 		if (vc->halt_poll_ns)
-			++vc->runner->stat.halt_successful_poll;
+			++vc->runner->stat.generic.halt_successful_poll;
 		goto out;
 	}
 
@@ -4148,13 +4148,13 @@ out:
 			ktime_to_ns(cur) - ktime_to_ns(start_wait);
 		/* Attribute failed poll time */
 		if (vc->halt_poll_ns)
-			vc->runner->stat.halt_poll_fail_ns +=
+			vc->runner->stat.generic.halt_poll_fail_ns +=
 				ktime_to_ns(start_wait) -
 				ktime_to_ns(start_poll);
 	} else {
 		/* Attribute successful poll time */
 		if (vc->halt_poll_ns)
-			vc->runner->stat.halt_poll_success_ns +=
+			vc->runner->stat.generic.halt_poll_success_ns +=
 				ktime_to_ns(cur) -
 				ktime_to_ns(start_poll);
 	}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d7733b07f4894..71bcb01404613 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -493,7 +493,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 		if (!vcpu->arch.pending_exceptions) {
 			kvm_vcpu_block(vcpu);
 			kvm_clear_request(KVM_REQ_UNHALT, vcpu);
-			vcpu->stat.halt_wakeup++;
+			vcpu->stat.generic.halt_wakeup++;
 
 			/* Unset POW bit after we woke up */
 			msr &= ~MSR_POW;
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 031c8015864a9..ac14239f3424a 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -378,7 +378,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 		kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
 		kvm_vcpu_block(vcpu);
 		kvm_clear_request(KVM_REQ_UNHALT, vcpu);
-		vcpu->stat.halt_wakeup++;
+		vcpu->stat.generic.halt_wakeup++;
 		return EMULATE_DONE;
 	case H_LOGICAL_CI_LOAD:
 		return kvmppc_h_pr_logical_ci_load(vcpu);
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7d5fe43f85c45..7a75559ab51d5 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -49,15 +49,15 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("inst_emu", emulated_inst_exits),
 	VCPU_STAT("dec", dec_exits),
 	VCPU_STAT("ext_intr", ext_intr_exits),
-	VCPU_STAT("halt_successful_poll", halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
 	VCPU_STAT("doorbell", dbell_exits),
 	VCPU_STAT("guest doorbell", gdbell_exits),
-	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
-	VM_STAT("remote_tlb_flush", remote_tlb_flush),
+	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
+	VM_STAT("remote_tlb_flush", generic.remote_tlb_flush),
 	{ NULL }
 };
 
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 8925f3969478f..9b4473f76e568 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -361,6 +361,7 @@ struct sie_page {
 };
 
 struct kvm_vcpu_stat {
+	struct kvm_vcpu_stat_generic generic;
 	u64 exit_userspace;
 	u64 exit_null;
 	u64 exit_external_request;
@@ -370,13 +371,7 @@ struct kvm_vcpu_stat {
 	u64 exit_validity;
 	u64 exit_instruction;
 	u64 exit_pei;
-	u64 halt_successful_poll;
-	u64 halt_attempted_poll;
-	u64 halt_poll_invalid;
 	u64 halt_no_poll_steal;
-	u64 halt_wakeup;
-	u64 halt_poll_success_ns;
-	u64 halt_poll_fail_ns;
 	u64 instruction_lctl;
 	u64 instruction_lctlg;
 	u64 instruction_stctl;
@@ -755,12 +750,12 @@ struct kvm_vcpu_arch {
 };
 
 struct kvm_vm_stat {
+	struct kvm_vm_stat_generic generic;
 	u64 inject_io;
 	u64 inject_float_mchk;
 	u64 inject_pfault_done;
 	u64 inject_service_signal;
 	u64 inject_virtio;
-	u64 remote_tlb_flush;
 };
 
 struct kvm_arch_memory_slot {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1296fc10f80c8..75ad44c447176 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -72,13 +72,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("exit_program_interruption", exit_program_interruption),
 	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
 	VCPU_STAT("exit_operation_exception", exit_operation_exception),
-	VCPU_STAT("halt_successful_poll", halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
+	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
 	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
-	VCPU_STAT("halt_wakeup", halt_wakeup),
-	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
+	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
 	VCPU_STAT("instruction_lctlg", instruction_lctlg),
 	VCPU_STAT("instruction_lctl", instruction_lctl),
 	VCPU_STAT("instruction_stctl", instruction_stctl),
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e11d64aa0bcd1..408051552121a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1160,6 +1160,7 @@ struct kvm_arch {
 };
 
 struct kvm_vm_stat {
+	struct kvm_vm_stat_generic generic;
 	u64 mmu_shadow_zapped;
 	u64 mmu_pte_write;
 	u64 mmu_pde_zapped;
@@ -1167,13 +1168,13 @@ struct kvm_vm_stat {
 	u64 mmu_recycled;
 	u64 mmu_cache_miss;
 	u64 mmu_unsync;
-	u64 remote_tlb_flush;
 	u64 lpages;
 	u64 nx_lpage_splits;
 	u64 max_mmu_page_hash_collisions;
 };
 
 struct kvm_vcpu_stat {
+	struct kvm_vcpu_stat_generic generic;
 	u64 pf_fixed;
 	u64 pf_guest;
 	u64 tlb_flush;
@@ -1187,10 +1188,6 @@ struct kvm_vcpu_stat {
 	u64 nmi_window_exits;
 	u64 l1d_flush;
 	u64 halt_exits;
-	u64 halt_successful_poll;
-	u64 halt_attempted_poll;
-	u64 halt_poll_invalid;
-	u64 halt_wakeup;
 	u64 request_irq_exits;
 	u64 irq_exits;
 	u64 host_state_reload;
@@ -1201,8 +1198,6 @@ struct kvm_vcpu_stat {
 	u64 irq_injections;
 	u64 nmi_injections;
 	u64 req_event;
-	u64 halt_poll_success_ns;
-	u64 halt_poll_fail_ns;
 	u64 nested_run;
 	u64 directed_yield_attempted;
 	u64 directed_yield_successful;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 38c003b603398..71202330848ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -235,10 +235,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("irq_window", irq_window_exits),
 	VCPU_STAT("nmi_window", nmi_window_exits),
 	VCPU_STAT("halt_exits", halt_exits),
-	VCPU_STAT("halt_successful_poll", halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", halt_wakeup),
+	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
+	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
+	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
+	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
 	VCPU_STAT("hypercalls", hypercalls),
 	VCPU_STAT("request_irq", request_irq_exits),
 	VCPU_STAT("irq_exits", irq_exits),
@@ -250,8 +250,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("nmi_injections", nmi_injections),
 	VCPU_STAT("req_event", req_event),
 	VCPU_STAT("l1d_flush", l1d_flush),
-	VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
+	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
 	VCPU_STAT("nested_run", nested_run),
 	VCPU_STAT("directed_yield_attempted", directed_yield_attempted),
 	VCPU_STAT("directed_yield_successful", directed_yield_successful),
@@ -263,7 +263,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VM_STAT("mmu_recycled", mmu_recycled),
 	VM_STAT("mmu_cache_miss", mmu_cache_miss),
 	VM_STAT("mmu_unsync", mmu_unsync),
-	VM_STAT("remote_tlb_flush", remote_tlb_flush),
+	VM_STAT("remote_tlb_flush", generic.remote_tlb_flush),
 	VM_STAT("largepages", lpages, .mode = 0444),
 	VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
 	VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index a7580f69dda02..48db778291b7e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -76,5 +76,17 @@ struct kvm_mmu_memory_cache {
 };
 #endif
 
+struct kvm_vm_stat_generic {
+	u64 remote_tlb_flush;
+};
+
+struct kvm_vcpu_stat_generic {
+	u64 halt_successful_poll;
+	u64 halt_attempted_poll;
+	u64 halt_poll_invalid;
+	u64 halt_wakeup;
+	u64 halt_poll_success_ns;
+	u64 halt_poll_fail_ns;
+};
 
 #endif /* __KVM_TYPES_H__ */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ed4d1581d5029..cec986487b308 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -332,7 +332,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	 */
 	if (!kvm_arch_flush_remote_tlb(kvm)
 	    || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
-		++kvm->stat.remote_tlb_flush;
+		++kvm->stat.generic.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
@@ -3029,9 +3029,9 @@ static inline void
 update_halt_poll_stats(struct kvm_vcpu *vcpu, u64 poll_ns, bool waited)
 {
 	if (waited)
-		vcpu->stat.halt_poll_fail_ns += poll_ns;
+		vcpu->stat.generic.halt_poll_fail_ns += poll_ns;
 	else
-		vcpu->stat.halt_poll_success_ns += poll_ns;
+		vcpu->stat.generic.halt_poll_success_ns += poll_ns;
 }
 
 /*
@@ -3049,16 +3049,16 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) {
 		ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
 
-		++vcpu->stat.halt_attempted_poll;
+		++vcpu->stat.generic.halt_attempted_poll;
 		do {
 			/*
 			 * This sets KVM_REQ_UNHALT if an interrupt
 			 * arrives.
 			 */
 			if (kvm_vcpu_check_block(vcpu) < 0) {
-				++vcpu->stat.halt_successful_poll;
+				++vcpu->stat.generic.halt_successful_poll;
 				if (!vcpu_valid_wakeup(vcpu))
-					++vcpu->stat.halt_poll_invalid;
+					++vcpu->stat.generic.halt_poll_invalid;
 				goto out;
 			}
 			poll_end = cur = ktime_get();
@@ -3115,7 +3115,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 	waitp = kvm_arch_vcpu_get_wait(vcpu);
 	if (rcuwait_wake_up(waitp)) {
 		WRITE_ONCE(vcpu->ready, true);
-		++vcpu->stat.halt_wakeup;
+		++vcpu->stat.generic.halt_wakeup;
 		return true;
 	}
 
-- 
GitLab


From cb082bfab59a224a49ae803fed52cd03e8d6b5e0 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Fri, 18 Jun 2021 22:27:04 +0000
Subject: [PATCH 3686/3804] KVM: stats: Add fd-based API to read binary stats
 data

This commit defines the API for userspace and prepare the common
functionalities to support per VM/VCPU binary stats data readings.

The KVM stats now is only accessible by debugfs, which has some
shortcomings this change series are supposed to fix:
1. The current debugfs stats solution in KVM could be disabled
   when kernel Lockdown mode is enabled, which is a potential
   rick for production.
2. The current debugfs stats solution in KVM is organized as "one
   stats per file", it is good for debugging, but not efficient
   for production.
3. The stats read/clear in current debugfs solution in KVM are
   protected by the global kvm_lock.

Besides that, there are some other benefits with this change:
1. All KVM VM/VCPU stats can be read out in a bulk by one copy
   to userspace.
2. A schema is used to describe KVM statistics. From userspace's
   perspective, the KVM statistics are self-describing.
3. With the fd-based solution, a separate telemetry would be able
   to read KVM stats in a less privileged environment.
4. After the initial setup by reading in stats descriptors, a
   telemetry only needs to read the stats data itself, no more
   parsing or setup is needed.

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com> #arm64
Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210618222709.1858088-3-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/kvm/Makefile   |   2 +-
 arch/mips/kvm/Makefile    |   2 +-
 arch/powerpc/kvm/Makefile |   2 +-
 arch/s390/kvm/Makefile    |   3 +-
 arch/x86/kvm/Makefile     |   2 +-
 include/linux/kvm_host.h  |  82 ++++++++++++++++++++-
 include/linux/kvm_types.h |   2 +
 include/uapi/linux/kvm.h  |  73 +++++++++++++++++++
 virt/kvm/binary_stats.c   | 146 ++++++++++++++++++++++++++++++++++++++
 9 files changed, 307 insertions(+), 7 deletions(-)
 create mode 100644 virt/kvm/binary_stats.c

diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 589921392cb12..989bb5dad2c82 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_KVM) += kvm.o
 obj-$(CONFIG_KVM) += hyp/
 
 kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
-	 $(KVM)/vfio.o $(KVM)/irqchip.o \
+	 $(KVM)/vfio.o $(KVM)/irqchip.o $(KVM)/binary_stats.o \
 	 arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \
 	 inject_fault.o va_layout.o handle_exit.o \
 	 guest.o debug.o reset.o sys_regs.o \
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index 30cc060857c7d..c67250a956b89 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for KVM support for MIPS
 #
 
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o eventfd.o binary_stats.o)
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
 
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ab241317481c0..583c14ef596ea 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,7 +6,7 @@
 ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
 KVM := ../../../virt/kvm
 
-common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o
+common-objs-y = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/binary_stats.o
 common-objs-$(CONFIG_KVM_VFIO) += $(KVM)/vfio.o
 common-objs-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
 
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 12decca22e7c8..b3aaadc60ead1 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -4,7 +4,8 @@
 # Copyright IBM Corp. 2008
 
 KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o \
+	      $(KVM)/irqchip.o $(KVM)/vfio.o $(KVM)/binary_stats.o
 
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 83331376b779b..75dfd27b6e8a5 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -11,7 +11,7 @@ KVM := ../../../virt/kvm
 
 kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o \
-				$(KVM)/dirty_ring.o
+				$(KVM)/dirty_ring.o $(KVM)/binary_stats.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o emulate.o i8259.o irq.o lapic.o \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 37cbb56ccd09c..9ee7f350473bf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1272,16 +1272,94 @@ struct kvm_stats_debugfs_item {
 	int mode;
 };
 
+struct _kvm_stats_desc {
+	struct kvm_stats_desc desc;
+	char name[KVM_STATS_NAME_SIZE];
+};
+
 #define KVM_DBGFS_GET_MODE(dbgfs_item)                                         \
 	((dbgfs_item)->mode ? (dbgfs_item)->mode : 0644)
 
-#define VM_STAT(n, x, ...) 							\
+#define VM_STAT(n, x, ...)						       \
 	{ n, offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ }
-#define VCPU_STAT(n, x, ...)							\
+#define VCPU_STAT(n, x, ...)						       \
 	{ n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ }
 
+#define STATS_DESC_COMMON(type, unit, base, exp)			       \
+	.flags = type | unit | base |					       \
+		 BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) |	       \
+		 BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) |	       \
+		 BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK),	       \
+	.exponent = exp,						       \
+	.size = 1
+
+#define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp)		       \
+	{								       \
+		{							       \
+			STATS_DESC_COMMON(type, unit, base, exp),	       \
+			.offset = offsetof(struct kvm_vm_stat, generic.stat)   \
+		},							       \
+		.name = #stat,						       \
+	}
+#define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp)		       \
+	{								       \
+		{							       \
+			STATS_DESC_COMMON(type, unit, base, exp),	       \
+			.offset = offsetof(struct kvm_vcpu_stat, generic.stat) \
+		},							       \
+		.name = #stat,						       \
+	}
+#define VM_STATS_DESC(stat, type, unit, base, exp)			       \
+	{								       \
+		{							       \
+			STATS_DESC_COMMON(type, unit, base, exp),	       \
+			.offset = offsetof(struct kvm_vm_stat, stat)	       \
+		},							       \
+		.name = #stat,						       \
+	}
+#define VCPU_STATS_DESC(stat, type, unit, base, exp)			       \
+	{								       \
+		{							       \
+			STATS_DESC_COMMON(type, unit, base, exp),	       \
+			.offset = offsetof(struct kvm_vcpu_stat, stat)	       \
+		},							       \
+		.name = #stat,						       \
+	}
+/* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */
+#define STATS_DESC(SCOPE, stat, type, unit, base, exp)			       \
+	SCOPE##_STATS_DESC(stat, type, unit, base, exp)
+
+#define STATS_DESC_CUMULATIVE(SCOPE, name, unit, base, exponent)	       \
+	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE, unit, base, exponent)
+#define STATS_DESC_INSTANT(SCOPE, name, unit, base, exponent)		       \
+	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT, unit, base, exponent)
+#define STATS_DESC_PEAK(SCOPE, name, unit, base, exponent)		       \
+	STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK, unit, base, exponent)
+
+/* Cumulative counter, read/write */
+#define STATS_DESC_COUNTER(SCOPE, name)					       \
+	STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_NONE,		       \
+		KVM_STATS_BASE_POW10, 0)
+/* Instantaneous counter, read only */
+#define STATS_DESC_ICOUNTER(SCOPE, name)				       \
+	STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_NONE,		       \
+		KVM_STATS_BASE_POW10, 0)
+/* Peak counter, read/write */
+#define STATS_DESC_PCOUNTER(SCOPE, name)				       \
+	STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE,		       \
+		KVM_STATS_BASE_POW10, 0)
+
+/* Cumulative time in nanosecond */
+#define STATS_DESC_TIME_NSEC(SCOPE, name)				       \
+	STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS,	       \
+		KVM_STATS_BASE_POW10, -9)
+
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
+ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
+		       const struct _kvm_stats_desc *desc,
+		       void *stats, size_t size_stats,
+		       char __user *user_buffer, size_t size, loff_t *offset);
 
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 48db778291b7e..ed6a985c56807 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -89,4 +89,6 @@ struct kvm_vcpu_stat_generic {
 	u64 halt_poll_fail_ns;
 };
 
+#define KVM_STATS_NAME_SIZE	48
+
 #endif /* __KVM_TYPES_H__ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 330835f1005b6..f1ba602260f6e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1087,6 +1087,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_SREGS2 200
 #define KVM_CAP_EXIT_HYPERCALL 201
 #define KVM_CAP_PPC_RPT_INVALIDATE 202
+#define KVM_CAP_BINARY_STATS_FD 203
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1906,4 +1907,76 @@ struct kvm_dirty_gfn {
 #define KVM_BUS_LOCK_DETECTION_OFF             (1 << 0)
 #define KVM_BUS_LOCK_DETECTION_EXIT            (1 << 1)
 
+/**
+ * struct kvm_stats_header - Header of per vm/vcpu binary statistics data.
+ * @flags: Some extra information for header, always 0 for now.
+ * @name_size: The size in bytes of the memory which contains statistics
+ *             name string including trailing '\0'. The memory is allocated
+ *             at the send of statistics descriptor.
+ * @num_desc: The number of statistics the vm or vcpu has.
+ * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed
+ *             by vm/vcpu stats fd.
+ * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file
+ *               pointd by vm/vcpu stats fd.
+ * @data_offset: The offset of the vm/vcpu stats' data block in the file
+ *               pointed by vm/vcpu stats fd.
+ *
+ * This is the header userspace needs to read from stats fd before any other
+ * readings. It is used by userspace to discover all the information about the
+ * vm/vcpu's binary statistics.
+ * Userspace reads this header from the start of the vm/vcpu's stats fd.
+ */
+struct kvm_stats_header {
+	__u32 flags;
+	__u32 name_size;
+	__u32 num_desc;
+	__u32 id_offset;
+	__u32 desc_offset;
+	__u32 data_offset;
+};
+
+#define KVM_STATS_TYPE_SHIFT		0
+#define KVM_STATS_TYPE_MASK		(0xF << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_CUMULATIVE	(0x0 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_INSTANT		(0x1 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_PEAK		(0x2 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_MAX		KVM_STATS_TYPE_PEAK
+
+#define KVM_STATS_UNIT_SHIFT		4
+#define KVM_STATS_UNIT_MASK		(0xF << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_NONE		(0x0 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_BYTES		(0x1 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_SECONDS		(0x2 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_CYCLES		(0x3 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX		KVM_STATS_UNIT_CYCLES
+
+#define KVM_STATS_BASE_SHIFT		8
+#define KVM_STATS_BASE_MASK		(0xF << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW10		(0x0 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_POW2		(0x1 << KVM_STATS_BASE_SHIFT)
+#define KVM_STATS_BASE_MAX		KVM_STATS_BASE_POW2
+
+/**
+ * struct kvm_stats_desc - Descriptor of a KVM statistics.
+ * @flags: Annotations of the stats, like type, unit, etc.
+ * @exponent: Used together with @flags to determine the unit.
+ * @size: The number of data items for this stats.
+ *        Every data item is of type __u64.
+ * @offset: The offset of the stats to the start of stat structure in
+ *          struture kvm or kvm_vcpu.
+ * @unused: Unused field for future usage. Always 0 for now.
+ * @name: The name string for the stats. Its size is indicated by the
+ *        &kvm_stats_header->name_size.
+ */
+struct kvm_stats_desc {
+	__u32 flags;
+	__s16 exponent;
+	__u16 size;
+	__u32 offset;
+	__u32 unused;
+	char name[];
+};
+
+#define KVM_GET_STATS_FD  _IO(KVMIO,  0xce)
+
 #endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/binary_stats.c b/virt/kvm/binary_stats.c
new file mode 100644
index 0000000000000..e609d428811a9
--- /dev/null
+++ b/virt/kvm/binary_stats.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KVM binary statistics interface implementation
+ *
+ * Copyright 2021 Google LLC
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/errno.h>
+#include <linux/uaccess.h>
+
+/**
+ * kvm_stats_read() - Common function to read from the binary statistics
+ * file descriptor.
+ *
+ * @id: identification string of the stats
+ * @header: stats header for a vm or a vcpu
+ * @desc: start address of an array of stats descriptors for a vm or a vcpu
+ * @stats: start address of stats data block for a vm or a vcpu
+ * @size_stats: the size of stats data block pointed by @stats
+ * @user_buffer: start address of userspace buffer
+ * @size: requested read size from userspace
+ * @offset: the start position from which the content will be read for the
+ *          corresponding vm or vcp file descriptor
+ *
+ * The file content of a vm/vcpu file descriptor is now defined as below:
+ * +-------------+
+ * |   Header    |
+ * +-------------+
+ * |  id string  |
+ * +-------------+
+ * | Descriptors |
+ * +-------------+
+ * | Stats Data  |
+ * +-------------+
+ * Although this function allows userspace to read any amount of data (as long
+ * as in the limit) from any position, the typical usage would follow below
+ * steps:
+ * 1. Read header from offset 0. Get the offset of descriptors and stats data
+ *    and some other necessary information. This is a one-time work for the
+ *    lifecycle of the corresponding vm/vcpu stats fd.
+ * 2. Read id string from its offset. This is a one-time work for the lifecycle
+ *    of the corresponding vm/vcpu stats fd.
+ * 3. Read descriptors from its offset and discover all the stats by parsing
+ *    descriptors. This is a one-time work for the lifecycle of the
+ *    corresponding vm/vcpu stats fd.
+ * 4. Periodically read stats data from its offset using pread.
+ *
+ * Return: the number of bytes that has been successfully read
+ */
+ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
+		       const struct _kvm_stats_desc *desc,
+		       void *stats, size_t size_stats,
+		       char __user *user_buffer, size_t size, loff_t *offset)
+{
+	ssize_t len;
+	ssize_t copylen;
+	ssize_t remain = size;
+	size_t size_desc;
+	size_t size_header;
+	void *src;
+	loff_t pos = *offset;
+	char __user *dest = user_buffer;
+
+	size_header = sizeof(*header);
+	size_desc = header->num_desc * sizeof(*desc);
+
+	len = KVM_STATS_NAME_SIZE + size_header + size_desc + size_stats - pos;
+	len = min(len, remain);
+	if (len <= 0)
+		return 0;
+	remain = len;
+
+	/*
+	 * Copy kvm stats header.
+	 * The header is the first block of content userspace usually read out.
+	 * The pos is 0 and the copylen and remain would be the size of header.
+	 * The copy of the header would be skipped if offset is larger than the
+	 * size of header. That usually happens when userspace reads stats
+	 * descriptors and stats data.
+	 */
+	copylen = size_header - pos;
+	copylen = min(copylen, remain);
+	if (copylen > 0) {
+		src = (void *)header + pos;
+		if (copy_to_user(dest, src, copylen))
+			return -EFAULT;
+		remain -= copylen;
+		pos += copylen;
+		dest += copylen;
+	}
+
+	/*
+	 * Copy kvm stats header id string.
+	 * The id string is unique for every vm/vcpu, which is stored in kvm
+	 * and kvm_vcpu structure.
+	 * The id string is part of the stat header from the perspective of
+	 * userspace, it is usually read out together with previous constant
+	 * header part and could be skipped for later descriptors and stats
+	 * data readings.
+	 */
+	copylen = header->id_offset + KVM_STATS_NAME_SIZE - pos;
+	copylen = min(copylen, remain);
+	if (copylen > 0) {
+		src = id + pos - header->id_offset;
+		if (copy_to_user(dest, src, copylen))
+			return -EFAULT;
+		remain -= copylen;
+		pos += copylen;
+		dest += copylen;
+	}
+
+	/*
+	 * Copy kvm stats descriptors.
+	 * The descriptors copy would be skipped in the typical case that
+	 * userspace periodically read stats data, since the pos would be
+	 * greater than the end address of descriptors
+	 * (header->header.desc_offset + size_desc) causing copylen <= 0.
+	 */
+	copylen = header->desc_offset + size_desc - pos;
+	copylen = min(copylen, remain);
+	if (copylen > 0) {
+		src = (void *)desc + pos - header->desc_offset;
+		if (copy_to_user(dest, src, copylen))
+			return -EFAULT;
+		remain -= copylen;
+		pos += copylen;
+		dest += copylen;
+	}
+
+	/* Copy kvm stats values */
+	copylen = header->data_offset + size_stats - pos;
+	copylen = min(copylen, remain);
+	if (copylen > 0) {
+		src = stats + pos - header->data_offset;
+		if (copy_to_user(dest, src, copylen))
+			return -EFAULT;
+		remain -= copylen;
+		pos += copylen;
+		dest += copylen;
+	}
+
+	*offset = pos;
+	return len;
+}
-- 
GitLab


From f8be156be163a052a067306417cd0ff679068c97 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 24 Jun 2021 08:29:04 -0400
Subject: [PATCH 3687/3804] KVM: do not allow mapping valid but
 non-reference-counted pages

It's possible to create a region which maps valid but non-refcounted
pages (e.g., tail pages of non-compound higher order allocations). These
host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family
of APIs, which take a reference to the page, which takes it from 0 to 1.
When the reference is dropped, this will free the page incorrectly.

Fix this by only taking a reference on valid pages if it was non-zero,
which indicates it is participating in normal refcounting (and can be
released with put_page).

This addresses CVE-2021-22543.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6a6bc7af0e28d..46fb042837d20 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2055,6 +2055,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
 	return true;
 }
 
+static int kvm_try_get_pfn(kvm_pfn_t pfn)
+{
+	if (kvm_is_reserved_pfn(pfn))
+		return 1;
+	return get_page_unless_zero(pfn_to_page(pfn));
+}
+
 static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 			       unsigned long addr, bool *async,
 			       bool write_fault, bool *writable,
@@ -2104,13 +2111,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
 	 * Whoever called remap_pfn_range is also going to call e.g.
 	 * unmap_mapping_range before the underlying pages are freed,
 	 * causing a call to our MMU notifier.
+	 *
+	 * Certain IO or PFNMAP mappings can be backed with valid
+	 * struct pages, but be allocated without refcounting e.g.,
+	 * tail pages of non-compound higher order allocations, which
+	 * would then underflow the refcount when the caller does the
+	 * required put_page. Don't allow those pages here.
 	 */ 
-	kvm_get_pfn(pfn);
+	if (!kvm_try_get_pfn(pfn))
+		r = -EFAULT;
 
 out:
 	pte_unmap_unlock(ptep, ptl);
 	*p_pfn = pfn;
-	return 0;
+
+	return r;
 }
 
 /*
-- 
GitLab


From b01d550663fa5fd40a1785b0f1211fb657892edf Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 23 Jun 2021 18:23:00 +0100
Subject: [PATCH 3688/3804] spi: Fix self assignment issue with ancillary->mode

There is an assignment of ancillary->mode to itself which looks
dubious since the proceeding comment states that the speed and
mode is taken over from the SPI main device, indicating that
ancillary->mode should assigned using the value spi->mode.
Fix this.

Addresses-Coverity: ("Self assignment")
Fixes: 0c79378c0199 ("spi: add ancillary device support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Link: https://lore.kernel.org/r/20210623172300.161484-1-colin.king@canonical.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 572ad95c1d4f5..c296f08b36c18 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -2183,7 +2183,7 @@ struct spi_device *spi_new_ancillary_device(struct spi_device *spi,
 
 	/* Take over SPI mode/speed from SPI main device */
 	ancillary->max_speed_hz = spi->max_speed_hz;
-	ancillary->mode = ancillary->mode;
+	ancillary->mode = spi->mode;
 
 	/* Register the new device */
 	rc = spi_add_device_locked(ancillary);
-- 
GitLab


From 3c0d0894320cc517fda657c69939cd0313d0b4e2 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 21 Jun 2021 11:53:38 +0200
Subject: [PATCH 3689/3804] libceph: don't pass result into
 ac->ops->handle_reply()

There is no result to pass in msgr2 case because authentication
failures are reported through auth_bad_method frame and in MAuth
case an error is returned immediately.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
---
 include/linux/ceph/auth.h |  2 +-
 net/ceph/auth.c           | 15 ++++++++++-----
 net/ceph/auth_none.c      |  4 ++--
 net/ceph/auth_x.c         |  6 ++----
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index 71b5d481c6530..39425e2f7cb21 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -50,7 +50,7 @@ struct ceph_auth_client_ops {
 	 * another request.
 	 */
 	int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
-	int (*handle_reply)(struct ceph_auth_client *ac, int result,
+	int (*handle_reply)(struct ceph_auth_client *ac,
 			    void *buf, void *end, u8 *session_key,
 			    int *session_key_len, u8 *con_secret,
 			    int *con_secret_len);
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index de407e8feb978..3a9d44eee941a 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -260,14 +260,19 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
 		ac->negotiating = false;
 	}
 
-	ret = ac->ops->handle_reply(ac, result, payload, payload_end,
+	if (result) {
+		pr_err("auth protocol '%s' mauth authentication failed: %d\n",
+		       ceph_auth_proto_name(ac->protocol), result);
+		ret = result;
+		goto out;
+	}
+
+	ret = ac->ops->handle_reply(ac, payload, payload_end,
 				    NULL, NULL, NULL, NULL);
 	if (ret == -EAGAIN) {
 		ret = build_request(ac, true, reply_buf, reply_len);
 		goto out;
 	} else if (ret) {
-		pr_err("auth protocol '%s' mauth authentication failed: %d\n",
-		       ceph_auth_proto_name(ac->protocol), result);
 		goto out;
 	}
 
@@ -480,7 +485,7 @@ int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
 	int ret;
 
 	mutex_lock(&ac->mutex);
-	ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
+	ret = ac->ops->handle_reply(ac, reply, reply + reply_len,
 				    NULL, NULL, NULL, NULL);
 	if (ret == -EAGAIN)
 		ret = build_request(ac, false, buf, buf_len);
@@ -498,7 +503,7 @@ int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
 	int ret;
 
 	mutex_lock(&ac->mutex);
-	ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
+	ret = ac->ops->handle_reply(ac, reply, reply + reply_len,
 				    session_key, session_key_len,
 				    con_secret, con_secret_len);
 	if (!ret)
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 70e86e4622502..aab490a111eb3 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -69,7 +69,7 @@ static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
  * the generic auth code decode the global_id, and we carry no actual
  * authenticate state, so nothing happens here.
  */
-static int handle_reply(struct ceph_auth_client *ac, int result,
+static int handle_reply(struct ceph_auth_client *ac,
 			void *buf, void *end, u8 *session_key,
 			int *session_key_len, u8 *con_secret,
 			int *con_secret_len)
@@ -77,7 +77,7 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
 	struct ceph_auth_none_info *xi = ac->private;
 
 	xi->starting = false;
-	return result;
+	return 0;
 }
 
 static void ceph_auth_none_destroy_authorizer(struct ceph_authorizer *a)
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 79641c4afee93..cab99c5581b0a 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -661,7 +661,7 @@ e_inval:
 	return -EINVAL;
 }
 
-static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
+static int ceph_x_handle_reply(struct ceph_auth_client *ac,
 			       void *buf, void *end,
 			       u8 *session_key, int *session_key_len,
 			       u8 *con_secret, int *con_secret_len)
@@ -669,13 +669,11 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
 	struct ceph_x_info *xi = ac->private;
 	struct ceph_x_ticket_handler *th;
 	int len = end - buf;
+	int result;
 	void *p;
 	int op;
 	int ret;
 
-	if (result)
-		return result;  /* XXX hmm? */
-
 	if (xi->starting) {
 		/* it's a hello */
 		struct ceph_x_server_challenge *sc = buf;
-- 
GitLab


From 03af4c7bad8ca59143bca488b90b3775d10d7f94 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 21 Jun 2021 12:17:40 +0200
Subject: [PATCH 3690/3804] libceph: set global_id as soon as we get an auth
 ticket

Commit 61ca49a9105f ("libceph: don't set global_id until we get an
auth ticket") delayed the setting of global_id too much.  It is set
only after all tickets are received, but in pre-nautilus clusters an
auth ticket and the service tickets are obtained in separate steps
(for a total of three MAuth replies).  When the service tickets are
requested, global_id is used to build an authorizer; if global_id is
still 0 we never get them and fail to establish the session.

Moving the setting of global_id into protocol implementations.  This
way global_id can be set exactly when an auth ticket is received, not
sooner nor later.

Fixes: 61ca49a9105f ("libceph: don't set global_id until we get an auth ticket")
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
---
 include/linux/ceph/auth.h |  4 +++-
 net/ceph/auth.c           | 13 +++++--------
 net/ceph/auth_none.c      |  3 ++-
 net/ceph/auth_x.c         | 11 ++++++-----
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index 39425e2f7cb21..6b138fa97db85 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -50,7 +50,7 @@ struct ceph_auth_client_ops {
 	 * another request.
 	 */
 	int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
-	int (*handle_reply)(struct ceph_auth_client *ac,
+	int (*handle_reply)(struct ceph_auth_client *ac, u64 global_id,
 			    void *buf, void *end, u8 *session_key,
 			    int *session_key_len, u8 *con_secret,
 			    int *con_secret_len);
@@ -104,6 +104,8 @@ struct ceph_auth_client {
 	struct mutex mutex;
 };
 
+void ceph_auth_set_global_id(struct ceph_auth_client *ac, u64 global_id);
+
 struct ceph_auth_client *ceph_auth_init(const char *name,
 					const struct ceph_crypto_key *key,
 					const int *con_modes);
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index 3a9d44eee941a..d2b268a1838e8 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -36,7 +36,7 @@ static int init_protocol(struct ceph_auth_client *ac, int proto)
 	}
 }
 
-static void set_global_id(struct ceph_auth_client *ac, u64 global_id)
+void ceph_auth_set_global_id(struct ceph_auth_client *ac, u64 global_id)
 {
 	dout("%s global_id %llu\n", __func__, global_id);
 
@@ -267,7 +267,7 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
 		goto out;
 	}
 
-	ret = ac->ops->handle_reply(ac, payload, payload_end,
+	ret = ac->ops->handle_reply(ac, global_id, payload, payload_end,
 				    NULL, NULL, NULL, NULL);
 	if (ret == -EAGAIN) {
 		ret = build_request(ac, true, reply_buf, reply_len);
@@ -276,8 +276,6 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
 		goto out;
 	}
 
-	set_global_id(ac, global_id);
-
 out:
 	mutex_unlock(&ac->mutex);
 	return ret;
@@ -485,7 +483,7 @@ int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
 	int ret;
 
 	mutex_lock(&ac->mutex);
-	ret = ac->ops->handle_reply(ac, reply, reply + reply_len,
+	ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
 				    NULL, NULL, NULL, NULL);
 	if (ret == -EAGAIN)
 		ret = build_request(ac, false, buf, buf_len);
@@ -503,11 +501,10 @@ int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
 	int ret;
 
 	mutex_lock(&ac->mutex);
-	ret = ac->ops->handle_reply(ac, reply, reply + reply_len,
+	ret = ac->ops->handle_reply(ac, global_id, reply, reply + reply_len,
 				    session_key, session_key_len,
 				    con_secret, con_secret_len);
-	if (!ret)
-		set_global_id(ac, global_id);
+	WARN_ON(ret == -EAGAIN || ret > 0);
 	mutex_unlock(&ac->mutex);
 	return ret;
 }
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index aab490a111eb3..097e9f8d87a72 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -69,7 +69,7 @@ static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
  * the generic auth code decode the global_id, and we carry no actual
  * authenticate state, so nothing happens here.
  */
-static int handle_reply(struct ceph_auth_client *ac,
+static int handle_reply(struct ceph_auth_client *ac, u64 global_id,
 			void *buf, void *end, u8 *session_key,
 			int *session_key_len, u8 *con_secret,
 			int *con_secret_len)
@@ -77,6 +77,7 @@ static int handle_reply(struct ceph_auth_client *ac,
 	struct ceph_auth_none_info *xi = ac->private;
 
 	xi->starting = false;
+	ceph_auth_set_global_id(ac, global_id);
 	return 0;
 }
 
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index cab99c5581b0a..b71b1635916e1 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -597,7 +597,7 @@ bad:
 	return -EINVAL;
 }
 
-static int handle_auth_session_key(struct ceph_auth_client *ac,
+static int handle_auth_session_key(struct ceph_auth_client *ac, u64 global_id,
 				   void **p, void *end,
 				   u8 *session_key, int *session_key_len,
 				   u8 *con_secret, int *con_secret_len)
@@ -613,6 +613,7 @@ static int handle_auth_session_key(struct ceph_auth_client *ac,
 	if (ret)
 		return ret;
 
+	ceph_auth_set_global_id(ac, global_id);
 	if (*p == end) {
 		/* pre-nautilus (or didn't request service tickets!) */
 		WARN_ON(session_key || con_secret);
@@ -661,7 +662,7 @@ e_inval:
 	return -EINVAL;
 }
 
-static int ceph_x_handle_reply(struct ceph_auth_client *ac,
+static int ceph_x_handle_reply(struct ceph_auth_client *ac, u64 global_id,
 			       void *buf, void *end,
 			       u8 *session_key, int *session_key_len,
 			       u8 *con_secret, int *con_secret_len)
@@ -695,9 +696,9 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac,
 	switch (op) {
 	case CEPHX_GET_AUTH_SESSION_KEY:
 		/* AUTH ticket + [connection secret] + service tickets */
-		ret = handle_auth_session_key(ac, &p, end, session_key,
-					      session_key_len, con_secret,
-					      con_secret_len);
+		ret = handle_auth_session_key(ac, global_id, &p, end,
+					      session_key, session_key_len,
+					      con_secret, con_secret_len);
 		break;
 
 	case CEPHX_GET_PRINCIPAL_SESSION_KEY:
-- 
GitLab


From 3265a7e6b41bae8608e7e91ac6798de5e5564164 Mon Sep 17 00:00:00 2001
From: Andreas Hecht <andreas.e.hecht@gmail.com>
Date: Thu, 24 Jun 2021 17:25:35 +0200
Subject: [PATCH 3691/3804] i2c: dev: Add __user annotation

Fix Sparse warnings:
drivers/i2c/i2c-dev.c:546:19: warning: incorrect type in assignment (different address spaces)
drivers/i2c/i2c-dev.c:549:53: warning: incorrect type in argument 2 (different address spaces)

compat_ptr() returns a pointer tagged __user which gets assigned to a
pointer missing the __user annotation. The same pointer is passed to
copy_from_user() as an argument where it is expected to have the __user
annotation. Fix both by adding the __user annotation to the pointer.

Fixes: 7d5cb45655f2 ("i2c compat ioctls: move to ->compat_ioctl()")
Signed-off-by: Andreas Hecht <andreas.e.hecht@gmail.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/i2c-dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 6ef38a8ee95cb..cb64fe649390e 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -526,7 +526,7 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo
 		return put_user(funcs, (compat_ulong_t __user *)arg);
 	case I2C_RDWR: {
 		struct i2c_rdwr_ioctl_data32 rdwr_arg;
-		struct i2c_msg32 *p;
+		struct i2c_msg32 __user *p;
 		struct i2c_msg *rdwr_pa;
 		int i;
 
-- 
GitLab


From 4ca070ef0dd885616ef294d269a9bf8e3b258e1a Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 24 May 2021 11:09:12 +0200
Subject: [PATCH 3692/3804] i2c: robotfuzz-osif: fix control-request directions

The direction of the pipe argument must match the request-type direction
bit or control requests may fail depending on the host-controller-driver
implementation.

Control transfers without a data stage are treated as OUT requests by
the USB stack and should be using usb_sndctrlpipe(). Failing to do so
will now trigger a warning.

Fix the OSIFI2C_SET_BIT_RATE and OSIFI2C_STOP requests which erroneously
used the osif_usb_read() helper and set the IN direction bit.

Reported-by: syzbot+9d7dadd15b8819d73f41@syzkaller.appspotmail.com
Fixes: 83e53a8f120f ("i2c: Add bus driver for for OSIF USB i2c device.")
Cc: stable@vger.kernel.org      # 3.14
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-robotfuzz-osif.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c
index a39f7d0927973..66dfa211e736b 100644
--- a/drivers/i2c/busses/i2c-robotfuzz-osif.c
+++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c
@@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs,
 			}
 		}
 
-		ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
+		ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0);
 		if (ret) {
 			dev_err(&adapter->dev, "failure sending STOP\n");
 			return -EREMOTEIO;
@@ -153,7 +153,7 @@ static int osif_probe(struct usb_interface *interface,
 	 * Set bus frequency. The frequency is:
 	 * 120,000,000 / ( 16 + 2 * div * 4^prescale).
 	 * Using dev = 52, prescale = 0 give 100KHz */
-	ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
+	ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0,
 			    NULL, 0);
 	if (ret) {
 		dev_err(&interface->dev, "failure sending bit rate");
-- 
GitLab


From fcfe1baeddbf1c7c448b44c82586d0cbc8abc9f5 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Fri, 18 Jun 2021 22:27:05 +0000
Subject: [PATCH 3693/3804] KVM: stats: Support binary stats retrieval for a VM

Add a VM ioctl to get a statistics file descriptor by which a read
functionality is provided for userspace to read out VM stats header,
descriptors and data.
Define VM statistics descriptors and header for all architectures.

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com> #arm64
Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210618222709.1858088-4-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/kvm/guest.c    | 15 ++++++++++++++
 arch/mips/kvm/mips.c      | 15 ++++++++++++++
 arch/powerpc/kvm/book3s.c | 17 ++++++++++++++++
 arch/powerpc/kvm/booke.c  | 17 ++++++++++++++++
 arch/s390/kvm/kvm-s390.c  | 20 +++++++++++++++++++
 arch/x86/kvm/x86.c        | 25 +++++++++++++++++++++++
 include/linux/kvm_host.h  |  6 ++++++
 virt/kvm/kvm_main.c       | 42 +++++++++++++++++++++++++++++++++++++++
 8 files changed, 157 insertions(+)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 988ead309cbe0..d7606a3c449b0 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -28,6 +28,21 @@
 
 #include "trace.h"
 
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+	KVM_GENERIC_VM_STATS()
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+		sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+	.id_offset =  sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vm_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
 	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 2f2969aef60c6..9f8b203737df2 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -38,6 +38,21 @@
 #define VECTORSPACING 0x100	/* for EI/VI mode */
 #endif
 
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+	KVM_GENERIC_VM_STATS()
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+		sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vm_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("wait", wait_exits),
 	VCPU_STAT("cache", cache_exits),
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index ae9f1b855ff9d..1f004837f9c57 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -38,6 +38,23 @@
 
 /* #define EXIT_DEBUG */
 
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+	KVM_GENERIC_VM_STATS(),
+	STATS_DESC_ICOUNTER(VM, num_2M_pages),
+	STATS_DESC_ICOUNTER(VM, num_1G_pages)
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+		sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vm_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("exits", sum_exits),
 	VCPU_STAT("mmio", mmio_exits),
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7a75559ab51d5..a49ea4dcf9638 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -36,6 +36,23 @@
 
 unsigned long kvmppc_booke_handlers;
 
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+	KVM_GENERIC_VM_STATS(),
+	STATS_DESC_ICOUNTER(VM, num_2M_pages),
+	STATS_DESC_ICOUNTER(VM, num_1G_pages)
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+		sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vm_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("mmio", mmio_exits),
 	VCPU_STAT("sig", signal_exits),
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 75ad44c447176..c7c7a28af41cb 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -58,6 +58,26 @@
 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
 
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+	KVM_GENERIC_VM_STATS(),
+	STATS_DESC_COUNTER(VM, inject_io),
+	STATS_DESC_COUNTER(VM, inject_float_mchk),
+	STATS_DESC_COUNTER(VM, inject_pfault_done),
+	STATS_DESC_COUNTER(VM, inject_service_signal),
+	STATS_DESC_COUNTER(VM, inject_virtio)
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+		sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vm_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("userspace_handled", exit_userspace),
 	VCPU_STAT("exit_null", exit_null),
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 71202330848ad..570fd07048471 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -223,6 +223,31 @@ EXPORT_SYMBOL_GPL(host_xss);
 u64 __read_mostly supported_xss;
 EXPORT_SYMBOL_GPL(supported_xss);
 
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+	KVM_GENERIC_VM_STATS(),
+	STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
+	STATS_DESC_COUNTER(VM, mmu_pte_write),
+	STATS_DESC_COUNTER(VM, mmu_pde_zapped),
+	STATS_DESC_COUNTER(VM, mmu_flooded),
+	STATS_DESC_COUNTER(VM, mmu_recycled),
+	STATS_DESC_COUNTER(VM, mmu_cache_miss),
+	STATS_DESC_ICOUNTER(VM, mmu_unsync),
+	STATS_DESC_ICOUNTER(VM, lpages),
+	STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
+	STATS_DESC_ICOUNTER(VM, max_mmu_page_hash_collisions)
+};
+static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
+		sizeof(struct kvm_vm_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vm_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("pf_fixed", pf_fixed),
 	VCPU_STAT("pf_guest", pf_guest),
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9ee7f350473bf..e79ce64b9f6fe 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -599,6 +599,7 @@ struct kvm {
 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
 	struct notifier_block pm_notifier;
 #endif
+	char stats_id[KVM_STATS_NAME_SIZE];
 };
 
 #define kvm_err(fmt, ...) \
@@ -1354,12 +1355,17 @@ struct _kvm_stats_desc {
 	STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS,	       \
 		KVM_STATS_BASE_POW10, -9)
 
+#define KVM_GENERIC_VM_STATS()						       \
+	STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
+
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
 ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
 		       const struct _kvm_stats_desc *desc,
 		       void *stats, size_t size_stats,
 		       char __user *user_buffer, size_t size, loff_t *offset);
+extern const struct kvm_stats_header kvm_vm_stats_header;
+extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
 
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cec986487b308..33ec43a87d0f8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4055,6 +4055,42 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 	}
 }
 
+static ssize_t kvm_vm_stats_read(struct file *file, char __user *user_buffer,
+			      size_t size, loff_t *offset)
+{
+	struct kvm *kvm = file->private_data;
+
+	return kvm_stats_read(kvm->stats_id, &kvm_vm_stats_header,
+				&kvm_vm_stats_desc[0], &kvm->stat,
+				sizeof(kvm->stat), user_buffer, size, offset);
+}
+
+static const struct file_operations kvm_vm_stats_fops = {
+	.read = kvm_vm_stats_read,
+	.llseek = noop_llseek,
+};
+
+static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
+{
+	int fd;
+	struct file *file;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0)
+		return fd;
+
+	file = anon_inode_getfile("kvm-vm-stats",
+			&kvm_vm_stats_fops, kvm, O_RDONLY);
+	if (IS_ERR(file)) {
+		put_unused_fd(fd);
+		return PTR_ERR(file);
+	}
+	file->f_mode |= FMODE_PREAD;
+	fd_install(fd, file);
+
+	return fd;
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -4237,6 +4273,9 @@ static long kvm_vm_ioctl(struct file *filp,
 	case KVM_RESET_DIRTY_RINGS:
 		r = kvm_vm_ioctl_reset_dirty_pages(kvm);
 		break;
+	case KVM_GET_STATS_FD:
+		r = kvm_vm_ioctl_get_stats_fd(kvm);
+		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
@@ -4316,6 +4355,9 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	if (r < 0)
 		goto put_kvm;
 
+	snprintf(kvm->stats_id, sizeof(kvm->stats_id),
+			"kvm-%d", task_pid_nr(current));
+
 	file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
 	if (IS_ERR(file)) {
 		put_unused_fd(r);
-- 
GitLab


From ce55c049459cff0034cc1bcfdce3bf343a2d6317 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Fri, 18 Jun 2021 22:27:06 +0000
Subject: [PATCH 3694/3804] KVM: stats: Support binary stats retrieval for a
 VCPU

Add a VCPU ioctl to get a statistics file descriptor by which a read
functionality is provided for userspace to read out VCPU stats header,
descriptors and data.
Define VCPU statistics descriptors and header for all architectures.

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com> #arm64
Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210618222709.1858088-5-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/kvm/guest.c    |  21 ++++++++
 arch/mips/kvm/mips.c      |  44 ++++++++++++++++
 arch/powerpc/kvm/book3s.c |  45 ++++++++++++++++
 arch/powerpc/kvm/booke.c  |  38 ++++++++++++++
 arch/s390/kvm/kvm-s390.c  | 108 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c        |  41 +++++++++++++++
 include/linux/kvm_host.h  |  13 ++++-
 virt/kvm/kvm_main.c       |  51 +++++++++++++++++-
 8 files changed, 359 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index d7606a3c449b0..f1dc2092d3a0b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -43,6 +43,27 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 		       sizeof(kvm_vm_stats_desc),
 };
 
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+	KVM_GENERIC_VCPU_STATS(),
+	STATS_DESC_COUNTER(VCPU, hvc_exit_stat),
+	STATS_DESC_COUNTER(VCPU, wfe_exit_stat),
+	STATS_DESC_COUNTER(VCPU, wfi_exit_stat),
+	STATS_DESC_COUNTER(VCPU, mmio_exit_user),
+	STATS_DESC_COUNTER(VCPU, mmio_exit_kernel),
+	STATS_DESC_COUNTER(VCPU, exits)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vcpu_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
 	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 9f8b203737df2..2aba78c2266dc 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -53,6 +53,50 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 		       sizeof(kvm_vm_stats_desc),
 };
 
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+	KVM_GENERIC_VCPU_STATS(),
+	STATS_DESC_COUNTER(VCPU, wait_exits),
+	STATS_DESC_COUNTER(VCPU, cache_exits),
+	STATS_DESC_COUNTER(VCPU, signal_exits),
+	STATS_DESC_COUNTER(VCPU, int_exits),
+	STATS_DESC_COUNTER(VCPU, cop_unusable_exits),
+	STATS_DESC_COUNTER(VCPU, tlbmod_exits),
+	STATS_DESC_COUNTER(VCPU, tlbmiss_ld_exits),
+	STATS_DESC_COUNTER(VCPU, tlbmiss_st_exits),
+	STATS_DESC_COUNTER(VCPU, addrerr_st_exits),
+	STATS_DESC_COUNTER(VCPU, addrerr_ld_exits),
+	STATS_DESC_COUNTER(VCPU, syscall_exits),
+	STATS_DESC_COUNTER(VCPU, resvd_inst_exits),
+	STATS_DESC_COUNTER(VCPU, break_inst_exits),
+	STATS_DESC_COUNTER(VCPU, trap_inst_exits),
+	STATS_DESC_COUNTER(VCPU, msa_fpe_exits),
+	STATS_DESC_COUNTER(VCPU, fpe_exits),
+	STATS_DESC_COUNTER(VCPU, msa_disabled_exits),
+	STATS_DESC_COUNTER(VCPU, flush_dcache_exits),
+	STATS_DESC_COUNTER(VCPU, vz_gpsi_exits),
+	STATS_DESC_COUNTER(VCPU, vz_gsfc_exits),
+	STATS_DESC_COUNTER(VCPU, vz_hc_exits),
+	STATS_DESC_COUNTER(VCPU, vz_grr_exits),
+	STATS_DESC_COUNTER(VCPU, vz_gva_exits),
+	STATS_DESC_COUNTER(VCPU, vz_ghfc_exits),
+	STATS_DESC_COUNTER(VCPU, vz_gpa_exits),
+	STATS_DESC_COUNTER(VCPU, vz_resvd_exits),
+#ifdef CONFIG_CPU_LOONGSON64
+	STATS_DESC_COUNTER(VCPU, vz_cpucfg_exits),
+#endif
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vcpu_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("wait", wait_exits),
 	VCPU_STAT("cache", cache_exits),
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 1f004837f9c57..61229302bce28 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -55,6 +55,51 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 		       sizeof(kvm_vm_stats_desc),
 };
 
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+	KVM_GENERIC_VCPU_STATS(),
+	STATS_DESC_COUNTER(VCPU, sum_exits),
+	STATS_DESC_COUNTER(VCPU, mmio_exits),
+	STATS_DESC_COUNTER(VCPU, signal_exits),
+	STATS_DESC_COUNTER(VCPU, light_exits),
+	STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+	STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+	STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+	STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+	STATS_DESC_COUNTER(VCPU, syscall_exits),
+	STATS_DESC_COUNTER(VCPU, isi_exits),
+	STATS_DESC_COUNTER(VCPU, dsi_exits),
+	STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+	STATS_DESC_COUNTER(VCPU, dec_exits),
+	STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+	STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
+	STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+	STATS_DESC_COUNTER(VCPU, dbell_exits),
+	STATS_DESC_COUNTER(VCPU, gdbell_exits),
+	STATS_DESC_COUNTER(VCPU, ld),
+	STATS_DESC_COUNTER(VCPU, st),
+	STATS_DESC_COUNTER(VCPU, pf_storage),
+	STATS_DESC_COUNTER(VCPU, pf_instruc),
+	STATS_DESC_COUNTER(VCPU, sp_storage),
+	STATS_DESC_COUNTER(VCPU, sp_instruc),
+	STATS_DESC_COUNTER(VCPU, queue_intr),
+	STATS_DESC_COUNTER(VCPU, ld_slow),
+	STATS_DESC_COUNTER(VCPU, st_slow),
+	STATS_DESC_COUNTER(VCPU, pthru_all),
+	STATS_DESC_COUNTER(VCPU, pthru_host),
+	STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vcpu_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("exits", sum_exits),
 	VCPU_STAT("mmio", mmio_exits),
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a49ea4dcf9638..6e8de33bc1381 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -53,6 +53,44 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 		       sizeof(kvm_vm_stats_desc),
 };
 
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+	KVM_GENERIC_VCPU_STATS(),
+	STATS_DESC_COUNTER(VCPU, sum_exits),
+	STATS_DESC_COUNTER(VCPU, mmio_exits),
+	STATS_DESC_COUNTER(VCPU, signal_exits),
+	STATS_DESC_COUNTER(VCPU, light_exits),
+	STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+	STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+	STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+	STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+	STATS_DESC_COUNTER(VCPU, syscall_exits),
+	STATS_DESC_COUNTER(VCPU, isi_exits),
+	STATS_DESC_COUNTER(VCPU, dsi_exits),
+	STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+	STATS_DESC_COUNTER(VCPU, dec_exits),
+	STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+	STATS_DESC_TIME_NSEC(VCPU, halt_wait_ns),
+	STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+	STATS_DESC_COUNTER(VCPU, dbell_exits),
+	STATS_DESC_COUNTER(VCPU, gdbell_exits),
+	STATS_DESC_COUNTER(VCPU, ld),
+	STATS_DESC_COUNTER(VCPU, st),
+	STATS_DESC_COUNTER(VCPU, pthru_all),
+	STATS_DESC_COUNTER(VCPU, pthru_host),
+	STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vcpu_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("mmio", mmio_exits),
 	VCPU_STAT("sig", signal_exits),
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c7c7a28af41cb..8ac10bcaf8ba5 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -78,6 +78,114 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 		       sizeof(kvm_vm_stats_desc),
 };
 
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+	KVM_GENERIC_VCPU_STATS(),
+	STATS_DESC_COUNTER(VCPU, exit_userspace),
+	STATS_DESC_COUNTER(VCPU, exit_null),
+	STATS_DESC_COUNTER(VCPU, exit_external_request),
+	STATS_DESC_COUNTER(VCPU, exit_io_request),
+	STATS_DESC_COUNTER(VCPU, exit_external_interrupt),
+	STATS_DESC_COUNTER(VCPU, exit_stop_request),
+	STATS_DESC_COUNTER(VCPU, exit_validity),
+	STATS_DESC_COUNTER(VCPU, exit_instruction),
+	STATS_DESC_COUNTER(VCPU, exit_pei),
+	STATS_DESC_COUNTER(VCPU, halt_no_poll_steal),
+	STATS_DESC_COUNTER(VCPU, instruction_lctl),
+	STATS_DESC_COUNTER(VCPU, instruction_lctlg),
+	STATS_DESC_COUNTER(VCPU, instruction_stctl),
+	STATS_DESC_COUNTER(VCPU, instruction_stctg),
+	STATS_DESC_COUNTER(VCPU, exit_program_interruption),
+	STATS_DESC_COUNTER(VCPU, exit_instr_and_program),
+	STATS_DESC_COUNTER(VCPU, exit_operation_exception),
+	STATS_DESC_COUNTER(VCPU, deliver_ckc),
+	STATS_DESC_COUNTER(VCPU, deliver_cputm),
+	STATS_DESC_COUNTER(VCPU, deliver_external_call),
+	STATS_DESC_COUNTER(VCPU, deliver_emergency_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_service_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_virtio),
+	STATS_DESC_COUNTER(VCPU, deliver_stop_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_prefix_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_restart_signal),
+	STATS_DESC_COUNTER(VCPU, deliver_program),
+	STATS_DESC_COUNTER(VCPU, deliver_io),
+	STATS_DESC_COUNTER(VCPU, deliver_machine_check),
+	STATS_DESC_COUNTER(VCPU, exit_wait_state),
+	STATS_DESC_COUNTER(VCPU, inject_ckc),
+	STATS_DESC_COUNTER(VCPU, inject_cputm),
+	STATS_DESC_COUNTER(VCPU, inject_external_call),
+	STATS_DESC_COUNTER(VCPU, inject_emergency_signal),
+	STATS_DESC_COUNTER(VCPU, inject_mchk),
+	STATS_DESC_COUNTER(VCPU, inject_pfault_init),
+	STATS_DESC_COUNTER(VCPU, inject_program),
+	STATS_DESC_COUNTER(VCPU, inject_restart),
+	STATS_DESC_COUNTER(VCPU, inject_set_prefix),
+	STATS_DESC_COUNTER(VCPU, inject_stop_signal),
+	STATS_DESC_COUNTER(VCPU, instruction_epsw),
+	STATS_DESC_COUNTER(VCPU, instruction_gs),
+	STATS_DESC_COUNTER(VCPU, instruction_io_other),
+	STATS_DESC_COUNTER(VCPU, instruction_lpsw),
+	STATS_DESC_COUNTER(VCPU, instruction_lpswe),
+	STATS_DESC_COUNTER(VCPU, instruction_pfmf),
+	STATS_DESC_COUNTER(VCPU, instruction_ptff),
+	STATS_DESC_COUNTER(VCPU, instruction_sck),
+	STATS_DESC_COUNTER(VCPU, instruction_sckpf),
+	STATS_DESC_COUNTER(VCPU, instruction_stidp),
+	STATS_DESC_COUNTER(VCPU, instruction_spx),
+	STATS_DESC_COUNTER(VCPU, instruction_stpx),
+	STATS_DESC_COUNTER(VCPU, instruction_stap),
+	STATS_DESC_COUNTER(VCPU, instruction_iske),
+	STATS_DESC_COUNTER(VCPU, instruction_ri),
+	STATS_DESC_COUNTER(VCPU, instruction_rrbe),
+	STATS_DESC_COUNTER(VCPU, instruction_sske),
+	STATS_DESC_COUNTER(VCPU, instruction_ipte_interlock),
+	STATS_DESC_COUNTER(VCPU, instruction_stsi),
+	STATS_DESC_COUNTER(VCPU, instruction_stfl),
+	STATS_DESC_COUNTER(VCPU, instruction_tb),
+	STATS_DESC_COUNTER(VCPU, instruction_tpi),
+	STATS_DESC_COUNTER(VCPU, instruction_tprot),
+	STATS_DESC_COUNTER(VCPU, instruction_tsch),
+	STATS_DESC_COUNTER(VCPU, instruction_sie),
+	STATS_DESC_COUNTER(VCPU, instruction_essa),
+	STATS_DESC_COUNTER(VCPU, instruction_sthyi),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_sense_running),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_external_call),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_emergency),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_cond_emergency),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_start),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_stop_store_status),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_status),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_store_adtl_status),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_arch),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_prefix),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_restart),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset),
+	STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown),
+	STATS_DESC_COUNTER(VCPU, diagnose_10),
+	STATS_DESC_COUNTER(VCPU, diagnose_44),
+	STATS_DESC_COUNTER(VCPU, diagnose_9c),
+	STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored),
+	STATS_DESC_COUNTER(VCPU, diagnose_9c_forward),
+	STATS_DESC_COUNTER(VCPU, diagnose_258),
+	STATS_DESC_COUNTER(VCPU, diagnose_308),
+	STATS_DESC_COUNTER(VCPU, diagnose_500),
+	STATS_DESC_COUNTER(VCPU, diagnose_other),
+	STATS_DESC_COUNTER(VCPU, pfault_sync)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vcpu_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("userspace_handled", exit_userspace),
 	VCPU_STAT("exit_null", exit_null),
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 570fd07048471..53b7c25d6ebc5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -248,6 +248,47 @@ const struct kvm_stats_header kvm_vm_stats_header = {
 		       sizeof(kvm_vm_stats_desc),
 };
 
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+	KVM_GENERIC_VCPU_STATS(),
+	STATS_DESC_COUNTER(VCPU, pf_fixed),
+	STATS_DESC_COUNTER(VCPU, pf_guest),
+	STATS_DESC_COUNTER(VCPU, tlb_flush),
+	STATS_DESC_COUNTER(VCPU, invlpg),
+	STATS_DESC_COUNTER(VCPU, exits),
+	STATS_DESC_COUNTER(VCPU, io_exits),
+	STATS_DESC_COUNTER(VCPU, mmio_exits),
+	STATS_DESC_COUNTER(VCPU, signal_exits),
+	STATS_DESC_COUNTER(VCPU, irq_window_exits),
+	STATS_DESC_COUNTER(VCPU, nmi_window_exits),
+	STATS_DESC_COUNTER(VCPU, l1d_flush),
+	STATS_DESC_COUNTER(VCPU, halt_exits),
+	STATS_DESC_COUNTER(VCPU, request_irq_exits),
+	STATS_DESC_COUNTER(VCPU, irq_exits),
+	STATS_DESC_COUNTER(VCPU, host_state_reload),
+	STATS_DESC_COUNTER(VCPU, fpu_reload),
+	STATS_DESC_COUNTER(VCPU, insn_emulation),
+	STATS_DESC_COUNTER(VCPU, insn_emulation_fail),
+	STATS_DESC_COUNTER(VCPU, hypercalls),
+	STATS_DESC_COUNTER(VCPU, irq_injections),
+	STATS_DESC_COUNTER(VCPU, nmi_injections),
+	STATS_DESC_COUNTER(VCPU, req_event),
+	STATS_DESC_COUNTER(VCPU, nested_run),
+	STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
+	STATS_DESC_COUNTER(VCPU, directed_yield_successful),
+	STATS_DESC_ICOUNTER(VCPU, guest_mode)
+};
+static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
+		sizeof(struct kvm_vcpu_stat) / sizeof(u64));
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+	.name_size = KVM_STATS_NAME_SIZE,
+	.num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+	.id_offset = sizeof(struct kvm_stats_header),
+	.desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+	.data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+		       sizeof(kvm_vcpu_stats_desc),
+};
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	VCPU_STAT("pf_fixed", pf_fixed),
 	VCPU_STAT("pf_guest", pf_guest),
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e79ce64b9f6fe..9e75afef16b05 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -305,7 +305,6 @@ struct kvm_vcpu {
 	struct pid __rcu *pid;
 	int sigset_active;
 	sigset_t sigset;
-	struct kvm_vcpu_stat stat;
 	unsigned int halt_poll_ns;
 	bool valid_wakeup;
 
@@ -342,6 +341,8 @@ struct kvm_vcpu {
 	bool preempted;
 	bool ready;
 	struct kvm_vcpu_arch arch;
+	struct kvm_vcpu_stat stat;
+	char stats_id[KVM_STATS_NAME_SIZE];
 	struct kvm_dirty_ring dirty_ring;
 };
 
@@ -1358,6 +1359,14 @@ struct _kvm_stats_desc {
 #define KVM_GENERIC_VM_STATS()						       \
 	STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush)
 
+#define KVM_GENERIC_VCPU_STATS()					       \
+	STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll),		       \
+	STATS_DESC_COUNTER(VCPU_GENERIC, halt_attempted_poll),		       \
+	STATS_DESC_COUNTER(VCPU_GENERIC, halt_poll_invalid),		       \
+	STATS_DESC_COUNTER(VCPU_GENERIC, halt_wakeup),			       \
+	STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_success_ns),	       \
+	STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns)
+
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
 ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
@@ -1366,6 +1375,8 @@ ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
 		       char __user *user_buffer, size_t size, loff_t *offset);
 extern const struct kvm_stats_header kvm_vm_stats_header;
 extern const struct _kvm_stats_desc kvm_vm_stats_desc[];
+extern const struct kvm_stats_header kvm_vcpu_stats_header;
+extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
 
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 33ec43a87d0f8..c8d0028df4ac3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3448,6 +3448,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus);
 	BUG_ON(kvm->vcpus[vcpu->vcpu_idx]);
 
+	/* Fill the stats id string for the vcpu */
+	snprintf(vcpu->stats_id, sizeof(vcpu->stats_id), "kvm-%d/vcpu-%d",
+		 task_pid_nr(current), id);
+
 	/* Now it's all set up, let userspace reach it */
 	kvm_get_kvm(kvm);
 	r = create_vcpu_fd(vcpu);
@@ -3497,6 +3501,44 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
 	return 0;
 }
 
+static ssize_t kvm_vcpu_stats_read(struct file *file, char __user *user_buffer,
+			      size_t size, loff_t *offset)
+{
+	struct kvm_vcpu *vcpu = file->private_data;
+
+	return kvm_stats_read(vcpu->stats_id, &kvm_vcpu_stats_header,
+			&kvm_vcpu_stats_desc[0], &vcpu->stat,
+			sizeof(vcpu->stat), user_buffer, size, offset);
+}
+
+static const struct file_operations kvm_vcpu_stats_fops = {
+	.read = kvm_vcpu_stats_read,
+	.llseek = noop_llseek,
+};
+
+static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
+{
+	int fd;
+	struct file *file;
+	char name[15 + ITOA_MAX_LEN + 1];
+
+	snprintf(name, sizeof(name), "kvm-vcpu-stats:%d", vcpu->vcpu_id);
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0)
+		return fd;
+
+	file = anon_inode_getfile(name, &kvm_vcpu_stats_fops, vcpu, O_RDONLY);
+	if (IS_ERR(file)) {
+		put_unused_fd(fd);
+		return PTR_ERR(file);
+	}
+	file->f_mode |= FMODE_PREAD;
+	fd_install(fd, file);
+
+	return fd;
+}
+
 static long kvm_vcpu_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -3694,6 +3736,10 @@ out_free1:
 		r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
 		break;
 	}
+	case KVM_GET_STATS_FD: {
+		r = kvm_vcpu_ioctl_get_stats_fd(vcpu);
+		break;
+	}
 	default:
 		r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
 	}
@@ -3952,6 +3998,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #else
 		return 0;
 #endif
+	case KVM_CAP_BINARY_STATS_FD:
+		return 1;
 	default:
 		break;
 	}
@@ -5254,7 +5302,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 		kmem_cache_create_usercopy("kvm_vcpu", vcpu_size, vcpu_align,
 					   SLAB_ACCOUNT,
 					   offsetof(struct kvm_vcpu, arch),
-					   sizeof_field(struct kvm_vcpu, arch),
+					   offsetofend(struct kvm_vcpu, stats_id)
+					   - offsetof(struct kvm_vcpu, arch),
 					   NULL);
 	if (!kvm_vcpu_cache) {
 		r = -ENOMEM;
-- 
GitLab


From fdc09ddd40645b0e3f245e4512fd4b4c34cde5e5 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Fri, 18 Jun 2021 22:27:07 +0000
Subject: [PATCH 3695/3804] KVM: stats: Add documentation for binary statistics
 interface

This new API provides a file descriptor for every VM and VCPU to read
KVM statistics data in binary format.
It is meant to provide a lightweight, flexible, scalable and efficient
lock-free solution for user space telemetry applications to pull the
statistics data periodically for large scale systems. The pulling
frequency could be as high as a few times per second.
The statistics descriptors are defined by KVM in kernel and can be
by userspace to discover VM/VCPU statistics during the one-time setup
stage.
The statistics data itself could be read out by userspace telemetry
periodically without any extra parsing or setup effort.
There are a few existed interface protocols and definitions, but no
one can fulfil all the requirements this interface implemented as
below:
1. During high frequency periodic stats reading, there should be no
   extra efforts except the stats data read itself.
2. Support stats annotation, like type (cumulative, instantaneous,
   peak, histogram, etc) and unit (counter, time, size, cycles, etc).
3. The stats data reading should be free of lock/synchronization. We
   don't care about the consistency between all the stats data. All
   stats data can not be read out at exactly the same time. We really
   care about the change or trend of the stats data. The lock-free
   solution is not just for efficiency and scalability, also for the
   stats data accuracy and usability. For example, in the situation
   that all the stats data readings are protected by a global lock,
   if one VCPU died somehow with that lock held, then all stats data
   reading would be blocked, then we have no way from stats data that
   which VCPU has died.
4. The stats data reading workload can be handed over to other
   unprivileged process.

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Reviewed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210618222709.1858088-6-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 170 +++++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index dd3fe231e4352..b87fa32835f28 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5081,6 +5081,176 @@ Writes special registers into the vcpu.
 See KVM_GET_SREGS2 for the data structures.
 This ioctl (when supported) replaces the KVM_SET_SREGS.
 
+4.133 KVM_GET_STATS_FD
+----------------------
+
+:Capability: KVM_CAP_STATS_BINARY_FD
+:Architectures: all
+:Type: vm ioctl, vcpu ioctl
+:Parameters: none
+:Returns: statistics file descriptor on success, < 0 on error
+
+Errors:
+
+  ======     ======================================================
+  ENOMEM     if the fd could not be created due to lack of memory
+  EMFILE     if the number of opened files exceeds the limit
+  ======     ======================================================
+
+The returned file descriptor can be used to read VM/vCPU statistics data in
+binary format. The data in the file descriptor consists of four blocks
+organized as follows:
+
++-------------+
+|   Header    |
++-------------+
+|  id string  |
++-------------+
+| Descriptors |
++-------------+
+| Stats Data  |
++-------------+
+
+Apart from the header starting at offset 0, please be aware that it is
+not guaranteed that the four blocks are adjacent or in the above order;
+the offsets of the id, descriptors and data blocks are found in the
+header.  However, all four blocks are aligned to 64 bit offsets in the
+file and they do not overlap.
+
+All blocks except the data block are immutable.  Userspace can read them
+only one time after retrieving the file descriptor, and then use ``pread`` or
+``lseek`` to read the statistics repeatedly.
+
+All data is in system endianness.
+
+The format of the header is as follows::
+
+	struct kvm_stats_header {
+		__u32 flags;
+		__u32 name_size;
+		__u32 num_desc;
+		__u32 id_offset;
+		__u32 desc_offset;
+		__u32 data_offset;
+	};
+
+The ``flags`` field is not used at the moment. It is always read as 0.
+
+The ``name_size`` field is the size (in byte) of the statistics name string
+(including trailing '\0') which is contained in the "id string" block and
+appended at the end of every descriptor.
+
+The ``num_desc`` field is the number of descriptors that are included in the
+descriptor block.  (The actual number of values in the data block may be
+larger, since each descriptor may comprise more than one value).
+
+The ``id_offset`` field is the offset of the id string from the start of the
+file indicated by the file descriptor. It is a multiple of 8.
+
+The ``desc_offset`` field is the offset of the Descriptors block from the start
+of the file indicated by the file descriptor. It is a multiple of 8.
+
+The ``data_offset`` field is the offset of the Stats Data block from the start
+of the file indicated by the file descriptor. It is a multiple of 8.
+
+The id string block contains a string which identifies the file descriptor on
+which KVM_GET_STATS_FD was invoked.  The size of the block, including the
+trailing ``'\0'``, is indicated by the ``name_size`` field in the header.
+
+The descriptors block is only needed to be read once for the lifetime of the
+file descriptor contains a sequence of ``struct kvm_stats_desc``, each followed
+by a string of size ``name_size``.
+
+	#define KVM_STATS_TYPE_SHIFT		0
+	#define KVM_STATS_TYPE_MASK		(0xF << KVM_STATS_TYPE_SHIFT)
+	#define KVM_STATS_TYPE_CUMULATIVE	(0x0 << KVM_STATS_TYPE_SHIFT)
+	#define KVM_STATS_TYPE_INSTANT		(0x1 << KVM_STATS_TYPE_SHIFT)
+	#define KVM_STATS_TYPE_PEAK		(0x2 << KVM_STATS_TYPE_SHIFT)
+
+	#define KVM_STATS_UNIT_SHIFT		4
+	#define KVM_STATS_UNIT_MASK		(0xF << KVM_STATS_UNIT_SHIFT)
+	#define KVM_STATS_UNIT_NONE		(0x0 << KVM_STATS_UNIT_SHIFT)
+	#define KVM_STATS_UNIT_BYTES		(0x1 << KVM_STATS_UNIT_SHIFT)
+	#define KVM_STATS_UNIT_SECONDS		(0x2 << KVM_STATS_UNIT_SHIFT)
+	#define KVM_STATS_UNIT_CYCLES		(0x3 << KVM_STATS_UNIT_SHIFT)
+
+	#define KVM_STATS_BASE_SHIFT		8
+	#define KVM_STATS_BASE_MASK		(0xF << KVM_STATS_BASE_SHIFT)
+	#define KVM_STATS_BASE_POW10		(0x0 << KVM_STATS_BASE_SHIFT)
+	#define KVM_STATS_BASE_POW2		(0x1 << KVM_STATS_BASE_SHIFT)
+
+	struct kvm_stats_desc {
+		__u32 flags;
+		__s16 exponent;
+		__u16 size;
+		__u32 offset;
+		__u32 unused;
+		char name[];
+	};
+
+The ``flags`` field contains the type and unit of the statistics data described
+by this descriptor. Its endianness is CPU native.
+The following flags are supported:
+
+Bits 0-3 of ``flags`` encode the type:
+  * ``KVM_STATS_TYPE_CUMULATIVE``
+    The statistics data is cumulative. The value of data can only be increased.
+    Most of the counters used in KVM are of this type.
+    The corresponding ``size`` field for this type is always 1.
+    All cumulative statistics data are read/write.
+  * ``KVM_STATS_TYPE_INSTANT``
+    The statistics data is instantaneous. Its value can be increased or
+    decreased. This type is usually used as a measurement of some resources,
+    like the number of dirty pages, the number of large pages, etc.
+    All instant statistics are read only.
+    The corresponding ``size`` field for this type is always 1.
+  * ``KVM_STATS_TYPE_PEAK``
+    The statistics data is peak. The value of data can only be increased, and
+    represents a peak value for a measurement, for example the maximum number
+    of items in a hash table bucket, the longest time waited and so on.
+    The corresponding ``size`` field for this type is always 1.
+
+Bits 4-7 of ``flags`` encode the unit:
+  * ``KVM_STATS_UNIT_NONE``
+    There is no unit for the value of statistics data. This usually means that
+    the value is a simple counter of an event.
+  * ``KVM_STATS_UNIT_BYTES``
+    It indicates that the statistics data is used to measure memory size, in the
+    unit of Byte, KiByte, MiByte, GiByte, etc. The unit of the data is
+    determined by the ``exponent`` field in the descriptor.
+  * ``KVM_STATS_UNIT_SECONDS``
+    It indicates that the statistics data is used to measure time or latency.
+  * ``KVM_STATS_UNIT_CYCLES``
+    It indicates that the statistics data is used to measure CPU clock cycles.
+
+Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the
+unit:
+  * ``KVM_STATS_BASE_POW10``
+    The scale is based on power of 10. It is used for measurement of time and
+    CPU clock cycles.  For example, an exponent of -9 can be used with
+    ``KVM_STATS_UNIT_SECONDS`` to express that the unit is nanoseconds.
+  * ``KVM_STATS_BASE_POW2``
+    The scale is based on power of 2. It is used for measurement of memory size.
+    For example, an exponent of 20 can be used with ``KVM_STATS_UNIT_BYTES`` to
+    express that the unit is MiB.
+
+The ``size`` field is the number of values of this statistics data. Its
+value is usually 1 for most of simple statistics. 1 means it contains an
+unsigned 64bit data.
+
+The ``offset`` field is the offset from the start of Data Block to the start of
+the corresponding statistics data.
+
+The ``unused`` field is reserved for future support for other types of
+statistics data, like log/linear histogram. Its value is always 0 for the types
+defined above.
+
+The ``name`` field is the name string of the statistics data. The name string
+starts at the end of ``struct kvm_stats_desc``.  The maximum length including
+the trailing ``'\0'``, is indicated by ``name_size`` in the header.
+
+The Stats Data block contains an array of 64-bit values in the same order
+as the descriptors in Descriptors block.
 
 5. The kvm_run structure
 ========================
-- 
GitLab


From 0b45d58738cd67d8b63bf093bd56f2f57a00f642 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Fri, 18 Jun 2021 22:27:08 +0000
Subject: [PATCH 3696/3804] KVM: selftests: Add selftest for KVM statistics
 data binary interface

Add selftest to check KVM stats descriptors validity.

Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Tested-by: Fuad Tabba <tabba@google.com> #arm64
Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210618222709.1858088-7-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   3 +
 .../testing/selftests/kvm/include/kvm_util.h  |   3 +
 .../selftests/kvm/kvm_binary_stats_test.c     | 237 ++++++++++++++++++
 tools/testing/selftests/kvm/lib/kvm_util.c    |  12 +
 5 files changed, 256 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/kvm_binary_stats_test.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 6ead3403eca67..14c550d64d3c5 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -47,3 +47,4 @@
 /memslot_perf_test
 /set_memory_region_test
 /steal_time
+/kvm_binary_stats_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 8dc007bac0fed..279051fb901ce 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -80,6 +80,7 @@ TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
 TEST_GEN_PROGS_x86_64 += memslot_perf_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
+TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
 
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
@@ -91,6 +92,7 @@ TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_aarch64 += kvm_page_table_test
 TEST_GEN_PROGS_aarch64 += set_memory_region_test
 TEST_GEN_PROGS_aarch64 += steal_time
+TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
 
 TEST_GEN_PROGS_s390x = s390x/memop
 TEST_GEN_PROGS_s390x += s390x/resets
@@ -100,6 +102,7 @@ TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
 TEST_GEN_PROGS_s390x += kvm_page_table_test
 TEST_GEN_PROGS_s390x += set_memory_region_test
+TEST_GEN_PROGS_s390x += kvm_binary_stats_test
 
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 62573918299cf..45678a2566dd6 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -393,4 +393,7 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
 #define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
 	__GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
 
+int vm_get_stats_fd(struct kvm_vm *vm);
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
+
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
new file mode 100644
index 0000000000000..5906bbc08483c
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kvm_binary_stats_test
+ *
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Test the fd-based interface for KVM statistics.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+
+static void stats_test(int stats_fd)
+{
+	ssize_t ret;
+	int i;
+	size_t size_desc;
+	size_t size_data = 0;
+	struct kvm_stats_header *header;
+	char *id;
+	struct kvm_stats_desc *stats_desc;
+	u64 *stats_data;
+	struct kvm_stats_desc *pdesc;
+
+	/* Read kvm stats header */
+	header = malloc(sizeof(*header));
+	TEST_ASSERT(header, "Allocate memory for stats header");
+
+	ret = read(stats_fd, header, sizeof(*header));
+	TEST_ASSERT(ret == sizeof(*header), "Read stats header");
+	size_desc = sizeof(*stats_desc) + header->name_size;
+
+	/* Read kvm stats id string */
+	id = malloc(header->name_size);
+	TEST_ASSERT(id, "Allocate memory for id string");
+	ret = read(stats_fd, id, header->name_size);
+	TEST_ASSERT(ret == header->name_size, "Read id string");
+
+	/* Check id string, that should start with "kvm" */
+	TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header->name_size,
+				"Invalid KVM stats type, id: %s", id);
+
+	/* Sanity check for other fields in header */
+	if (header->num_desc == 0) {
+		printf("No KVM stats defined!");
+		return;
+	}
+	/* Check overlap */
+	TEST_ASSERT(header->desc_offset > 0 && header->data_offset > 0
+			&& header->desc_offset >= sizeof(*header)
+			&& header->data_offset >= sizeof(*header),
+			"Invalid offset fields in header");
+	TEST_ASSERT(header->desc_offset > header->data_offset ||
+			(header->desc_offset + size_desc * header->num_desc <=
+							header->data_offset),
+			"Descriptor block is overlapped with data block");
+
+	/* Allocate memory for stats descriptors */
+	stats_desc = calloc(header->num_desc, size_desc);
+	TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
+	/* Read kvm stats descriptors */
+	ret = pread(stats_fd, stats_desc,
+			size_desc * header->num_desc, header->desc_offset);
+	TEST_ASSERT(ret == size_desc * header->num_desc,
+			"Read KVM stats descriptors");
+
+	/* Sanity check for fields in descriptors */
+	for (i = 0; i < header->num_desc; ++i) {
+		pdesc = (void *)stats_desc + i * size_desc;
+		/* Check type,unit,base boundaries */
+		TEST_ASSERT((pdesc->flags & KVM_STATS_TYPE_MASK)
+				<= KVM_STATS_TYPE_MAX, "Unknown KVM stats type");
+		TEST_ASSERT((pdesc->flags & KVM_STATS_UNIT_MASK)
+				<= KVM_STATS_UNIT_MAX, "Unknown KVM stats unit");
+		TEST_ASSERT((pdesc->flags & KVM_STATS_BASE_MASK)
+				<= KVM_STATS_BASE_MAX, "Unknown KVM stats base");
+		/* Check exponent for stats unit
+		 * Exponent for counter should be greater than or equal to 0
+		 * Exponent for unit bytes should be greater than or equal to 0
+		 * Exponent for unit seconds should be less than or equal to 0
+		 * Exponent for unit clock cycles should be greater than or
+		 * equal to 0
+		 */
+		switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
+		case KVM_STATS_UNIT_NONE:
+		case KVM_STATS_UNIT_BYTES:
+		case KVM_STATS_UNIT_CYCLES:
+			TEST_ASSERT(pdesc->exponent >= 0,
+					"Unsupported KVM stats unit");
+			break;
+		case KVM_STATS_UNIT_SECONDS:
+			TEST_ASSERT(pdesc->exponent <= 0,
+					"Unsupported KVM stats unit");
+			break;
+		}
+		/* Check name string */
+		TEST_ASSERT(strlen(pdesc->name) < header->name_size,
+				"KVM stats name(%s) too long", pdesc->name);
+		/* Check size field, which should not be zero */
+		TEST_ASSERT(pdesc->size, "KVM descriptor(%s) with size of 0",
+				pdesc->name);
+		size_data += pdesc->size * sizeof(*stats_data);
+	}
+	/* Check overlap */
+	TEST_ASSERT(header->data_offset >= header->desc_offset
+		|| header->data_offset + size_data <= header->desc_offset,
+		"Data block is overlapped with Descriptor block");
+	/* Check validity of all stats data size */
+	TEST_ASSERT(size_data >= header->num_desc * sizeof(*stats_data),
+			"Data size is not correct");
+	/* Check stats offset */
+	for (i = 0; i < header->num_desc; ++i) {
+		pdesc = (void *)stats_desc + i * size_desc;
+		TEST_ASSERT(pdesc->offset < size_data,
+			"Invalid offset (%u) for stats: %s",
+			pdesc->offset, pdesc->name);
+	}
+
+	/* Allocate memory for stats data */
+	stats_data = malloc(size_data);
+	TEST_ASSERT(stats_data, "Allocate memory for stats data");
+	/* Read kvm stats data as a bulk */
+	ret = pread(stats_fd, stats_data, size_data, header->data_offset);
+	TEST_ASSERT(ret == size_data, "Read KVM stats data");
+	/* Read kvm stats data one by one */
+	size_data = 0;
+	for (i = 0; i < header->num_desc; ++i) {
+		pdesc = (void *)stats_desc + i * size_desc;
+		ret = pread(stats_fd, stats_data,
+				pdesc->size * sizeof(*stats_data),
+				header->data_offset + size_data);
+		TEST_ASSERT(ret == pdesc->size * sizeof(*stats_data),
+				"Read data of KVM stats: %s", pdesc->name);
+		size_data += pdesc->size * sizeof(*stats_data);
+	}
+
+	free(stats_data);
+	free(stats_desc);
+	free(id);
+	free(header);
+}
+
+
+static void vm_stats_test(struct kvm_vm *vm)
+{
+	int stats_fd;
+
+	/* Get fd for VM stats */
+	stats_fd = vm_get_stats_fd(vm);
+	TEST_ASSERT(stats_fd >= 0, "Get VM stats fd");
+
+	stats_test(stats_fd);
+	close(stats_fd);
+	TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+static void vcpu_stats_test(struct kvm_vm *vm, int vcpu_id)
+{
+	int stats_fd;
+
+	/* Get fd for VCPU stats */
+	stats_fd = vcpu_get_stats_fd(vm, vcpu_id);
+	TEST_ASSERT(stats_fd >= 0, "Get VCPU stats fd");
+
+	stats_test(stats_fd);
+	close(stats_fd);
+	TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+#define DEFAULT_NUM_VM		4
+#define DEFAULT_NUM_VCPU	4
+
+/*
+ * Usage: kvm_bin_form_stats [#vm] [#vcpu]
+ * The first parameter #vm set the number of VMs being created.
+ * The second parameter #vcpu set the number of VCPUs being created.
+ * By default, DEFAULT_NUM_VM VM and DEFAULT_NUM_VCPU VCPU for the VM would be
+ * created for testing.
+ */
+
+int main(int argc, char *argv[])
+{
+	int i, j;
+	struct kvm_vm **vms;
+	int max_vm = DEFAULT_NUM_VM;
+	int max_vcpu = DEFAULT_NUM_VCPU;
+
+	/* Get the number of VMs and VCPUs that would be created for testing. */
+	if (argc > 1) {
+		max_vm = strtol(argv[1], NULL, 0);
+		if (max_vm <= 0)
+			max_vm = DEFAULT_NUM_VM;
+	}
+	if (argc > 2) {
+		max_vcpu = strtol(argv[2], NULL, 0);
+		if (max_vcpu <= 0)
+			max_vcpu = DEFAULT_NUM_VCPU;
+	}
+
+	/* Check the extension for binary stats */
+	if (kvm_check_cap(KVM_CAP_BINARY_STATS_FD) <= 0) {
+		print_skip("Binary form statistics interface is not supported");
+		exit(KSFT_SKIP);
+	}
+
+	/* Create VMs and VCPUs */
+	vms = malloc(sizeof(vms[0]) * max_vm);
+	TEST_ASSERT(vms, "Allocate memory for storing VM pointers");
+	for (i = 0; i < max_vm; ++i) {
+		vms[i] = vm_create(VM_MODE_DEFAULT,
+				DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+		for (j = 0; j < max_vcpu; ++j)
+			vm_vcpu_add(vms[i], j);
+	}
+
+	/* Check stats read for every VM and VCPU */
+	for (i = 0; i < max_vm; ++i) {
+		vm_stats_test(vms[i]);
+		for (j = 0; j < max_vcpu; ++j)
+			vcpu_stats_test(vms[i], j);
+	}
+
+	for (i = 0; i < max_vm; ++i)
+		kvm_vm_free(vms[i]);
+	free(vms);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 7a2b84e812928..5b56b57b3c207 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -2326,3 +2326,15 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
 	n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
 	return vm_adjust_num_guest_pages(mode, n);
 }
+
+int vm_get_stats_fd(struct kvm_vm *vm)
+{
+	return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
+}
+
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+	return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
+}
-- 
GitLab


From bc9e9e672df9f16f3825320c53ec01b3d44add28 Mon Sep 17 00:00:00 2001
From: Jing Zhang <jingzhangos@google.com>
Date: Wed, 23 Jun 2021 17:28:46 -0400
Subject: [PATCH 3697/3804] KVM: debugfs: Reuse binary stats descriptors

To remove code duplication, use the binary stats descriptors in the
implementation of the debugfs interface for statistics. This unifies
the definition of statistics for the binary and debugfs interfaces.

Signed-off-by: Jing Zhang <jingzhangos@google.com>
Message-Id: <20210618222709.1858088-8-jingzhangos@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/kvm/guest.c    |  16 ------
 arch/mips/kvm/mips.c      |  39 --------------
 arch/powerpc/kvm/book3s.c |  33 ------------
 arch/powerpc/kvm/booke.c  |  25 ---------
 arch/s390/kvm/kvm-s390.c  | 108 --------------------------------------
 arch/x86/kvm/x86.c        |  49 +----------------
 include/linux/kvm_host.h  |  17 +-----
 virt/kvm/kvm_main.c       | 104 ++++++++++++++++++++++++++----------
 8 files changed, 78 insertions(+), 313 deletions(-)

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index f1dc2092d3a0b..1512a8007a786 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -64,22 +64,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
 		       sizeof(kvm_vcpu_stats_desc),
 };
 
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
-	VCPU_STAT("hvc_exit_stat", hvc_exit_stat),
-	VCPU_STAT("wfe_exit_stat", wfe_exit_stat),
-	VCPU_STAT("wfi_exit_stat", wfi_exit_stat),
-	VCPU_STAT("mmio_exit_user", mmio_exit_user),
-	VCPU_STAT("mmio_exit_kernel", mmio_exit_kernel),
-	VCPU_STAT("exits", exits),
-	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
-	{ NULL }
-};
-
 static bool core_reg_offset_is_vreg(u64 off)
 {
 	return off >= KVM_REG_ARM_CORE_REG(fp_regs.vregs) &&
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 2aba78c2266dc..af9dd029a4e12 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -97,45 +97,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
 		       sizeof(kvm_vcpu_stats_desc),
 };
 
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	VCPU_STAT("wait", wait_exits),
-	VCPU_STAT("cache", cache_exits),
-	VCPU_STAT("signal", signal_exits),
-	VCPU_STAT("interrupt", int_exits),
-	VCPU_STAT("cop_unusable", cop_unusable_exits),
-	VCPU_STAT("tlbmod", tlbmod_exits),
-	VCPU_STAT("tlbmiss_ld", tlbmiss_ld_exits),
-	VCPU_STAT("tlbmiss_st", tlbmiss_st_exits),
-	VCPU_STAT("addrerr_st", addrerr_st_exits),
-	VCPU_STAT("addrerr_ld", addrerr_ld_exits),
-	VCPU_STAT("syscall", syscall_exits),
-	VCPU_STAT("resvd_inst", resvd_inst_exits),
-	VCPU_STAT("break_inst", break_inst_exits),
-	VCPU_STAT("trap_inst", trap_inst_exits),
-	VCPU_STAT("msa_fpe", msa_fpe_exits),
-	VCPU_STAT("fpe", fpe_exits),
-	VCPU_STAT("msa_disabled", msa_disabled_exits),
-	VCPU_STAT("flush_dcache", flush_dcache_exits),
-	VCPU_STAT("vz_gpsi", vz_gpsi_exits),
-	VCPU_STAT("vz_gsfc", vz_gsfc_exits),
-	VCPU_STAT("vz_hc", vz_hc_exits),
-	VCPU_STAT("vz_grr", vz_grr_exits),
-	VCPU_STAT("vz_gva", vz_gva_exits),
-	VCPU_STAT("vz_ghfc", vz_ghfc_exits),
-	VCPU_STAT("vz_gpa", vz_gpa_exits),
-	VCPU_STAT("vz_resvd", vz_resvd_exits),
-#ifdef CONFIG_CPU_LOONGSON64
-	VCPU_STAT("vz_cpucfg", vz_cpucfg_exits),
-#endif
-	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
-	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
-	{NULL}
-};
-
 bool kvm_trace_guest_mode_change;
 
 int kvm_guest_mode_change_trace_reg(void)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 61229302bce28..79833f78d1da7 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -100,39 +100,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
 		       sizeof(kvm_vcpu_stats_desc),
 };
 
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	VCPU_STAT("exits", sum_exits),
-	VCPU_STAT("mmio", mmio_exits),
-	VCPU_STAT("sig", signal_exits),
-	VCPU_STAT("sysc", syscall_exits),
-	VCPU_STAT("inst_emu", emulated_inst_exits),
-	VCPU_STAT("dec", dec_exits),
-	VCPU_STAT("ext_intr", ext_intr_exits),
-	VCPU_STAT("queue_intr", queue_intr),
-	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
-	VCPU_STAT("halt_wait_ns", halt_wait_ns),
-	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
-	VCPU_STAT("halt_successful_wait", halt_successful_wait),
-	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
-	VCPU_STAT("pf_storage", pf_storage),
-	VCPU_STAT("sp_storage", sp_storage),
-	VCPU_STAT("pf_instruc", pf_instruc),
-	VCPU_STAT("sp_instruc", sp_instruc),
-	VCPU_STAT("ld", ld),
-	VCPU_STAT("ld_slow", ld_slow),
-	VCPU_STAT("st", st),
-	VCPU_STAT("st_slow", st_slow),
-	VCPU_STAT("pthru_all", pthru_all),
-	VCPU_STAT("pthru_host", pthru_host),
-	VCPU_STAT("pthru_bad_aff", pthru_bad_aff),
-	VM_STAT("largepages_2M", num_2M_pages, .mode = 0444),
-	VM_STAT("largepages_1G", num_1G_pages, .mode = 0444),
-	{ NULL }
-};
-
 static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 			unsigned long pending_now, unsigned long old_pending)
 {
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 6e8de33bc1381..551b30d84aeeb 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -91,31 +91,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
 		       sizeof(kvm_vcpu_stats_desc),
 };
 
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	VCPU_STAT("mmio", mmio_exits),
-	VCPU_STAT("sig", signal_exits),
-	VCPU_STAT("itlb_r", itlb_real_miss_exits),
-	VCPU_STAT("itlb_v", itlb_virt_miss_exits),
-	VCPU_STAT("dtlb_r", dtlb_real_miss_exits),
-	VCPU_STAT("dtlb_v", dtlb_virt_miss_exits),
-	VCPU_STAT("sysc", syscall_exits),
-	VCPU_STAT("isi", isi_exits),
-	VCPU_STAT("dsi", dsi_exits),
-	VCPU_STAT("inst_emu", emulated_inst_exits),
-	VCPU_STAT("dec", dec_exits),
-	VCPU_STAT("ext_intr", ext_intr_exits),
-	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
-	VCPU_STAT("doorbell", dbell_exits),
-	VCPU_STAT("guest doorbell", gdbell_exits),
-	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
-	VM_STAT("remote_tlb_flush", generic.remote_tlb_flush),
-	{ NULL }
-};
-
 /* TODO: use vcpu_printf() */
 void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8ac10bcaf8ba5..1695f0ced5baa 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -186,114 +186,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
 		       sizeof(kvm_vcpu_stats_desc),
 };
 
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	VCPU_STAT("userspace_handled", exit_userspace),
-	VCPU_STAT("exit_null", exit_null),
-	VCPU_STAT("pfault_sync", pfault_sync),
-	VCPU_STAT("exit_validity", exit_validity),
-	VCPU_STAT("exit_stop_request", exit_stop_request),
-	VCPU_STAT("exit_external_request", exit_external_request),
-	VCPU_STAT("exit_io_request", exit_io_request),
-	VCPU_STAT("exit_external_interrupt", exit_external_interrupt),
-	VCPU_STAT("exit_instruction", exit_instruction),
-	VCPU_STAT("exit_pei", exit_pei),
-	VCPU_STAT("exit_program_interruption", exit_program_interruption),
-	VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program),
-	VCPU_STAT("exit_operation_exception", exit_operation_exception),
-	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
-	VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal),
-	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
-	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
-	VCPU_STAT("instruction_lctlg", instruction_lctlg),
-	VCPU_STAT("instruction_lctl", instruction_lctl),
-	VCPU_STAT("instruction_stctl", instruction_stctl),
-	VCPU_STAT("instruction_stctg", instruction_stctg),
-	VCPU_STAT("deliver_ckc", deliver_ckc),
-	VCPU_STAT("deliver_cputm", deliver_cputm),
-	VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal),
-	VCPU_STAT("deliver_external_call", deliver_external_call),
-	VCPU_STAT("deliver_service_signal", deliver_service_signal),
-	VCPU_STAT("deliver_virtio", deliver_virtio),
-	VCPU_STAT("deliver_stop_signal", deliver_stop_signal),
-	VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal),
-	VCPU_STAT("deliver_restart_signal", deliver_restart_signal),
-	VCPU_STAT("deliver_program", deliver_program),
-	VCPU_STAT("deliver_io", deliver_io),
-	VCPU_STAT("deliver_machine_check", deliver_machine_check),
-	VCPU_STAT("exit_wait_state", exit_wait_state),
-	VCPU_STAT("inject_ckc", inject_ckc),
-	VCPU_STAT("inject_cputm", inject_cputm),
-	VCPU_STAT("inject_external_call", inject_external_call),
-	VM_STAT("inject_float_mchk", inject_float_mchk),
-	VCPU_STAT("inject_emergency_signal", inject_emergency_signal),
-	VM_STAT("inject_io", inject_io),
-	VCPU_STAT("inject_mchk", inject_mchk),
-	VM_STAT("inject_pfault_done", inject_pfault_done),
-	VCPU_STAT("inject_program", inject_program),
-	VCPU_STAT("inject_restart", inject_restart),
-	VM_STAT("inject_service_signal", inject_service_signal),
-	VCPU_STAT("inject_set_prefix", inject_set_prefix),
-	VCPU_STAT("inject_stop_signal", inject_stop_signal),
-	VCPU_STAT("inject_pfault_init", inject_pfault_init),
-	VM_STAT("inject_virtio", inject_virtio),
-	VCPU_STAT("instruction_epsw", instruction_epsw),
-	VCPU_STAT("instruction_gs", instruction_gs),
-	VCPU_STAT("instruction_io_other", instruction_io_other),
-	VCPU_STAT("instruction_lpsw", instruction_lpsw),
-	VCPU_STAT("instruction_lpswe", instruction_lpswe),
-	VCPU_STAT("instruction_pfmf", instruction_pfmf),
-	VCPU_STAT("instruction_ptff", instruction_ptff),
-	VCPU_STAT("instruction_stidp", instruction_stidp),
-	VCPU_STAT("instruction_sck", instruction_sck),
-	VCPU_STAT("instruction_sckpf", instruction_sckpf),
-	VCPU_STAT("instruction_spx", instruction_spx),
-	VCPU_STAT("instruction_stpx", instruction_stpx),
-	VCPU_STAT("instruction_stap", instruction_stap),
-	VCPU_STAT("instruction_iske", instruction_iske),
-	VCPU_STAT("instruction_ri", instruction_ri),
-	VCPU_STAT("instruction_rrbe", instruction_rrbe),
-	VCPU_STAT("instruction_sske", instruction_sske),
-	VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock),
-	VCPU_STAT("instruction_essa", instruction_essa),
-	VCPU_STAT("instruction_stsi", instruction_stsi),
-	VCPU_STAT("instruction_stfl", instruction_stfl),
-	VCPU_STAT("instruction_tb", instruction_tb),
-	VCPU_STAT("instruction_tpi", instruction_tpi),
-	VCPU_STAT("instruction_tprot", instruction_tprot),
-	VCPU_STAT("instruction_tsch", instruction_tsch),
-	VCPU_STAT("instruction_sthyi", instruction_sthyi),
-	VCPU_STAT("instruction_sie", instruction_sie),
-	VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense),
-	VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running),
-	VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call),
-	VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency),
-	VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency),
-	VCPU_STAT("instruction_sigp_start", instruction_sigp_start),
-	VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop),
-	VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status),
-	VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status),
-	VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status),
-	VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch),
-	VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix),
-	VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart),
-	VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset),
-	VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset),
-	VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown),
-	VCPU_STAT("instruction_diag_10", diagnose_10),
-	VCPU_STAT("instruction_diag_44", diagnose_44),
-	VCPU_STAT("instruction_diag_9c", diagnose_9c),
-	VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored),
-	VCPU_STAT("diag_9c_forward", diagnose_9c_forward),
-	VCPU_STAT("instruction_diag_258", diagnose_258),
-	VCPU_STAT("instruction_diag_308", diagnose_308),
-	VCPU_STAT("instruction_diag_500", diagnose_500),
-	VCPU_STAT("instruction_diag_other", diagnose_other),
-	{ NULL }
-};
-
 /* allow nested virtualization in KVM (if enabled by user space) */
 static int nested;
 module_param(nested, int, S_IRUGO);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 53b7c25d6ebc5..5833b8780808f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -234,7 +234,7 @@ const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
 	STATS_DESC_ICOUNTER(VM, mmu_unsync),
 	STATS_DESC_ICOUNTER(VM, lpages),
 	STATS_DESC_ICOUNTER(VM, nx_lpage_splits),
-	STATS_DESC_ICOUNTER(VM, max_mmu_page_hash_collisions)
+	STATS_DESC_PCOUNTER(VM, max_mmu_page_hash_collisions)
 };
 static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
 		sizeof(struct kvm_vm_stat) / sizeof(u64));
@@ -289,53 +289,6 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
 		       sizeof(kvm_vcpu_stats_desc),
 };
 
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	VCPU_STAT("pf_fixed", pf_fixed),
-	VCPU_STAT("pf_guest", pf_guest),
-	VCPU_STAT("tlb_flush", tlb_flush),
-	VCPU_STAT("invlpg", invlpg),
-	VCPU_STAT("exits", exits),
-	VCPU_STAT("io_exits", io_exits),
-	VCPU_STAT("mmio_exits", mmio_exits),
-	VCPU_STAT("signal_exits", signal_exits),
-	VCPU_STAT("irq_window", irq_window_exits),
-	VCPU_STAT("nmi_window", nmi_window_exits),
-	VCPU_STAT("halt_exits", halt_exits),
-	VCPU_STAT("halt_successful_poll", generic.halt_successful_poll),
-	VCPU_STAT("halt_attempted_poll", generic.halt_attempted_poll),
-	VCPU_STAT("halt_poll_invalid", generic.halt_poll_invalid),
-	VCPU_STAT("halt_wakeup", generic.halt_wakeup),
-	VCPU_STAT("hypercalls", hypercalls),
-	VCPU_STAT("request_irq", request_irq_exits),
-	VCPU_STAT("irq_exits", irq_exits),
-	VCPU_STAT("host_state_reload", host_state_reload),
-	VCPU_STAT("fpu_reload", fpu_reload),
-	VCPU_STAT("insn_emulation", insn_emulation),
-	VCPU_STAT("insn_emulation_fail", insn_emulation_fail),
-	VCPU_STAT("irq_injections", irq_injections),
-	VCPU_STAT("nmi_injections", nmi_injections),
-	VCPU_STAT("req_event", req_event),
-	VCPU_STAT("l1d_flush", l1d_flush),
-	VCPU_STAT("halt_poll_success_ns", generic.halt_poll_success_ns),
-	VCPU_STAT("halt_poll_fail_ns", generic.halt_poll_fail_ns),
-	VCPU_STAT("nested_run", nested_run),
-	VCPU_STAT("directed_yield_attempted", directed_yield_attempted),
-	VCPU_STAT("directed_yield_successful", directed_yield_successful),
-	VCPU_STAT("guest_mode", guest_mode),
-	VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
-	VM_STAT("mmu_pte_write", mmu_pte_write),
-	VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
-	VM_STAT("mmu_flooded", mmu_flooded),
-	VM_STAT("mmu_recycled", mmu_recycled),
-	VM_STAT("mmu_cache_miss", mmu_cache_miss),
-	VM_STAT("mmu_unsync", mmu_unsync),
-	VM_STAT("remote_tlb_flush", generic.remote_tlb_flush),
-	VM_STAT("largepages", lpages, .mode = 0444),
-	VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444),
-	VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions),
-	{ NULL }
-};
-
 u64 __read_mostly host_xcr0;
 u64 __read_mostly supported_xcr0;
 EXPORT_SYMBOL_GPL(supported_xcr0);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9e75afef16b05..ae7735b490b45 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1264,14 +1264,8 @@ enum kvm_stat_kind {
 
 struct kvm_stat_data {
 	struct kvm *kvm;
-	struct kvm_stats_debugfs_item *dbgfs_item;
-};
-
-struct kvm_stats_debugfs_item {
-	const char *name;
-	int offset;
+	const struct _kvm_stats_desc *desc;
 	enum kvm_stat_kind kind;
-	int mode;
 };
 
 struct _kvm_stats_desc {
@@ -1279,14 +1273,6 @@ struct _kvm_stats_desc {
 	char name[KVM_STATS_NAME_SIZE];
 };
 
-#define KVM_DBGFS_GET_MODE(dbgfs_item)                                         \
-	((dbgfs_item)->mode ? (dbgfs_item)->mode : 0644)
-
-#define VM_STAT(n, x, ...)						       \
-	{ n, offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ }
-#define VCPU_STAT(n, x, ...)						       \
-	{ n, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ }
-
 #define STATS_DESC_COMMON(type, unit, base, exp)			       \
 	.flags = type | unit | base |					       \
 		 BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) |	       \
@@ -1367,7 +1353,6 @@ struct _kvm_stats_desc {
 	STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_success_ns),	       \
 	STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns)
 
-extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
 ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header,
 		       const struct _kvm_stats_desc *desc,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c8d0028df4ac3..3dcc2abbfc609 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -115,7 +115,6 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu);
 struct dentry *kvm_debugfs_dir;
 EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
 
-static int kvm_debugfs_num_entries;
 static const struct file_operations stat_fops_per_vm;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
@@ -860,9 +859,24 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
 	kvfree(slots);
 }
 
+static umode_t kvm_stats_debugfs_mode(const struct _kvm_stats_desc *pdesc)
+{
+	switch (pdesc->desc.flags & KVM_STATS_TYPE_MASK) {
+	case KVM_STATS_TYPE_INSTANT:
+		return 0444;
+	case KVM_STATS_TYPE_CUMULATIVE:
+	case KVM_STATS_TYPE_PEAK:
+	default:
+		return 0644;
+	}
+}
+
+
 static void kvm_destroy_vm_debugfs(struct kvm *kvm)
 {
 	int i;
+	int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
+				      kvm_vcpu_stats_header.num_desc;
 
 	if (!kvm->debugfs_dentry)
 		return;
@@ -880,7 +894,10 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 {
 	char dir_name[ITOA_MAX_LEN * 2];
 	struct kvm_stat_data *stat_data;
-	struct kvm_stats_debugfs_item *p;
+	const struct _kvm_stats_desc *pdesc;
+	int i;
+	int kvm_debugfs_num_entries = kvm_vm_stats_header.num_desc +
+				      kvm_vcpu_stats_header.num_desc;
 
 	if (!debugfs_initialized())
 		return 0;
@@ -894,15 +911,32 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
 	if (!kvm->debugfs_stat_data)
 		return -ENOMEM;
 
-	for (p = debugfs_entries; p->name; p++) {
+	for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) {
+		pdesc = &kvm_vm_stats_desc[i];
 		stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
 		if (!stat_data)
 			return -ENOMEM;
 
 		stat_data->kvm = kvm;
-		stat_data->dbgfs_item = p;
-		kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
-		debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p),
+		stat_data->desc = pdesc;
+		stat_data->kind = KVM_STAT_VM;
+		kvm->debugfs_stat_data[i] = stat_data;
+		debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
+				    kvm->debugfs_dentry, stat_data,
+				    &stat_fops_per_vm);
+	}
+
+	for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) {
+		pdesc = &kvm_vcpu_stats_desc[i];
+		stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL_ACCOUNT);
+		if (!stat_data)
+			return -ENOMEM;
+
+		stat_data->kvm = kvm;
+		stat_data->desc = pdesc;
+		stat_data->kind = KVM_STAT_VCPU;
+		kvm->debugfs_stat_data[i] = stat_data;
+		debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
 				    kvm->debugfs_dentry, stat_data,
 				    &stat_fops_per_vm);
 	}
@@ -4900,7 +4934,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
 		return -ENOENT;
 
 	if (simple_attr_open(inode, file, get,
-		    KVM_DBGFS_GET_MODE(stat_data->dbgfs_item) & 0222
+		    kvm_stats_debugfs_mode(stat_data->desc) & 0222
 		    ? set : NULL,
 		    fmt)) {
 		kvm_put_kvm(stat_data->kvm);
@@ -4923,14 +4957,14 @@ static int kvm_debugfs_release(struct inode *inode, struct file *file)
 
 static int kvm_get_stat_per_vm(struct kvm *kvm, size_t offset, u64 *val)
 {
-	*val = *(u64 *)((void *)kvm + offset);
+	*val = *(u64 *)((void *)(&kvm->stat) + offset);
 
 	return 0;
 }
 
 static int kvm_clear_stat_per_vm(struct kvm *kvm, size_t offset)
 {
-	*(u64 *)((void *)kvm + offset) = 0;
+	*(u64 *)((void *)(&kvm->stat) + offset) = 0;
 
 	return 0;
 }
@@ -4943,7 +4977,7 @@ static int kvm_get_stat_per_vcpu(struct kvm *kvm, size_t offset, u64 *val)
 	*val = 0;
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
-		*val += *(u64 *)((void *)vcpu + offset);
+		*val += *(u64 *)((void *)(&vcpu->stat) + offset);
 
 	return 0;
 }
@@ -4954,7 +4988,7 @@ static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset)
 	struct kvm_vcpu *vcpu;
 
 	kvm_for_each_vcpu(i, vcpu, kvm)
-		*(u64 *)((void *)vcpu + offset) = 0;
+		*(u64 *)((void *)(&vcpu->stat) + offset) = 0;
 
 	return 0;
 }
@@ -4964,14 +4998,14 @@ static int kvm_stat_data_get(void *data, u64 *val)
 	int r = -EFAULT;
 	struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
 
-	switch (stat_data->dbgfs_item->kind) {
+	switch (stat_data->kind) {
 	case KVM_STAT_VM:
 		r = kvm_get_stat_per_vm(stat_data->kvm,
-					stat_data->dbgfs_item->offset, val);
+					stat_data->desc->desc.offset, val);
 		break;
 	case KVM_STAT_VCPU:
 		r = kvm_get_stat_per_vcpu(stat_data->kvm,
-					  stat_data->dbgfs_item->offset, val);
+					  stat_data->desc->desc.offset, val);
 		break;
 	}
 
@@ -4986,14 +5020,14 @@ static int kvm_stat_data_clear(void *data, u64 val)
 	if (val)
 		return -EINVAL;
 
-	switch (stat_data->dbgfs_item->kind) {
+	switch (stat_data->kind) {
 	case KVM_STAT_VM:
 		r = kvm_clear_stat_per_vm(stat_data->kvm,
-					  stat_data->dbgfs_item->offset);
+					  stat_data->desc->desc.offset);
 		break;
 	case KVM_STAT_VCPU:
 		r = kvm_clear_stat_per_vcpu(stat_data->kvm,
-					    stat_data->dbgfs_item->offset);
+					    stat_data->desc->desc.offset);
 		break;
 	}
 
@@ -5050,6 +5084,7 @@ static int vm_stat_clear(void *_offset, u64 val)
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(vm_stat_readonly_fops, vm_stat_get, NULL, "%llu\n");
 
 static int vcpu_stat_get(void *_offset, u64 *val)
 {
@@ -5086,11 +5121,7 @@ static int vcpu_stat_clear(void *_offset, u64 val)
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear,
 			"%llu\n");
-
-static const struct file_operations *stat_fops[] = {
-	[KVM_STAT_VCPU] = &vcpu_stat_fops,
-	[KVM_STAT_VM]   = &vm_stat_fops,
-};
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_readonly_fops, vcpu_stat_get, NULL, "%llu\n");
 
 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 {
@@ -5144,15 +5175,32 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 
 static void kvm_init_debug(void)
 {
-	struct kvm_stats_debugfs_item *p;
+	const struct file_operations *fops;
+	const struct _kvm_stats_desc *pdesc;
+	int i;
 
 	kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
 
-	kvm_debugfs_num_entries = 0;
-	for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
-		debugfs_create_file(p->name, KVM_DBGFS_GET_MODE(p),
-				    kvm_debugfs_dir, (void *)(long)p->offset,
-				    stat_fops[p->kind]);
+	for (i = 0; i < kvm_vm_stats_header.num_desc; ++i) {
+		pdesc = &kvm_vm_stats_desc[i];
+		if (kvm_stats_debugfs_mode(pdesc) & 0222)
+			fops = &vm_stat_fops;
+		else
+			fops = &vm_stat_readonly_fops;
+		debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
+				kvm_debugfs_dir,
+				(void *)(long)pdesc->desc.offset, fops);
+	}
+
+	for (i = 0; i < kvm_vcpu_stats_header.num_desc; ++i) {
+		pdesc = &kvm_vcpu_stats_desc[i];
+		if (kvm_stats_debugfs_mode(pdesc) & 0222)
+			fops = &vcpu_stat_fops;
+		else
+			fops = &vcpu_stat_readonly_fops;
+		debugfs_create_file(pdesc->name, kvm_stats_debugfs_mode(pdesc),
+				kvm_debugfs_dir,
+				(void *)(long)pdesc->desc.offset, fops);
 	}
 }
 
-- 
GitLab


From f0d4379087d8a83f478b371ff7786e8df0cc2314 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:46 -0700
Subject: [PATCH 3698/3804] KVM: x86/mmu: Remove broken WARN that fires on
 32-bit KVM w/ nested EPT

Remove a misguided WARN that attempts to detect the scenario where using
a special A/D tracking flag will set reserved bits on a non-MMIO spte.
The WARN triggers false positives when using EPT with 32-bit KVM because
of the !64-bit clause, which is just flat out wrong.  The whole A/D
tracking goo is specific to EPT, and one of the big selling points of EPT
is that EPT is decoupled from the host's native paging mode.

Drop the WARN instead of trying to salvage the check.  Keeping a check
specific to A/D tracking bits would essentially regurgitate the same code
that led to KVM needed the tracking bits in the first place.

A better approach would be to add a generic WARN on reserved bits being
set, which would naturally cover the A/D tracking bits, work for all
flavors of paging, and be self-documenting to some extent.

Fixes: 8a406c89532c ("KVM: x86/mmu: Rename and document A/D scheme for TDP SPTEs")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/spte.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 66d43cec0c31a..8e8e8da740a07 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -102,13 +102,6 @@ int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
 	else if (kvm_vcpu_ad_need_write_protect(vcpu))
 		spte |= SPTE_TDP_AD_WRPROT_ONLY_MASK;
 
-	/*
-	 * Bits 62:52 of PAE SPTEs are reserved.  WARN if said bits are set
-	 * if PAE paging may be employed (shadow paging or any 32-bit KVM).
-	 */
-	WARN_ON_ONCE((!tdp_enabled || !IS_ENABLED(CONFIG_X86_64)) &&
-		     (spte & SPTE_TDP_AD_MASK));
-
 	/*
 	 * For the EPT case, shadow_present_mask is 0 if hardware
 	 * supports exec-only page table entries.  In that case,
-- 
GitLab


From 112022bdb5bc372e00e6e43cb88ee38ea67b97bd Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:47 -0700
Subject: [PATCH 3699/3804] KVM: x86/mmu: Treat NX as used (not reserved) for
 all !TDP shadow MMUs

Mark NX as being used for all non-nested shadow MMUs, as KVM will set the
NX bit for huge SPTEs if the iTLB mutli-hit mitigation is enabled.
Checking the mitigation itself is not sufficient as it can be toggled on
at any time and KVM doesn't reset MMU contexts when that happens.  KVM
could reset the contexts, but that would require purging all SPTEs in all
MMUs, for no real benefit.  And, KVM already forces EFER.NX=1 when TDP is
disabled (for WP=0, SMEP=1, NX=0), so technically NX is never reserved
for shadow MMUs.

Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b3be690d081a9..444e068e6ad99 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4221,7 +4221,15 @@ static inline u64 reserved_hpa_bits(void)
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 {
-	bool uses_nx = context->nx ||
+	/*
+	 * KVM uses NX when TDP is disabled to handle a variety of scenarios,
+	 * notably for huge SPTEs if iTLB multi-hit mitigation is enabled and
+	 * to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0.
+	 * The iTLB multi-hit workaround can be toggled at any time, so assume
+	 * NX can be used by any non-nested shadow MMU to avoid having to reset
+	 * MMU contexts.  Note, KVM forces EFER.NX=1 when TDP is disabled.
+	 */
+	bool uses_nx = context->nx || !tdp_enabled ||
 		context->mmu_role.base.smep_andnot_wp;
 	struct rsvd_bits_validate *shadow_zero_check;
 	int i;
-- 
GitLab


From 0aa1837533e5f4be8cc21bbc06314c23ba2c5447 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:48 -0700
Subject: [PATCH 3700/3804] KVM: x86: Properly reset MMU context at vCPU
 RESET/INIT

Reset the MMU context at vCPU INIT (and RESET for good measure) if CR0.PG
was set prior to INIT.  Simply re-initializing the current MMU is not
sufficient as the current root HPA may not be usable in the new context.
E.g. if TDP is disabled and INIT arrives while the vCPU is in long mode,
KVM will fail to switch to the 32-bit pae_root and bomb on the next
VM-Enter due to running with a 64-bit CR3 in 32-bit mode.

This bug was papered over in both VMX and SVM, but still managed to rear
its head in the MMU role on VMX.  Because EFER.LMA=1 requires CR0.PG=1,
kvm_calc_shadow_mmu_root_page_role() checks for EFER.LMA without first
checking CR0.PG.  VMX's RESET/INIT flow writes CR0 before EFER, and so
an INIT with the vCPU in 64-bit mode will cause the hack-a-fix to
generate the wrong MMU role.

In VMX, the INIT issue is specific to running without unrestricted guest
since unrestricted guest is available if and only if EPT is enabled.
Commit 8668a3c468ed ("KVM: VMX: Reset mmu context when entering real
mode") resolved the issue by forcing a reset when entering emulated real
mode.

In SVM, commit ebae871a509d ("kvm: svm: reset mmu on VCPU reset") forced
a MMU reset on every INIT to workaround the flaw in common x86.  Note, at
the time the bug was fixed, the SVM problem was exacerbated by a complete
lack of a CR4 update.

The vendor resets will be reverted in future patches, primarily to aid
bisection in case there are non-INIT flows that rely on the existing VMX
logic.

Because CR0.PG is unconditionally cleared on INIT, and because CR0.WP and
all CR4/EFER paging bits are ignored if CR0.PG=0, simply checking that
CR0.PG was '1' prior to INIT/RESET is sufficient to detect a required MMU
context reset.

Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5833b8780808f..4bd10fb1dfd6b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10754,6 +10754,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
+	unsigned long old_cr0 = kvm_read_cr0(vcpu);
+
 	kvm_lapic_reset(vcpu, init_event);
 
 	vcpu->arch.hflags = 0;
@@ -10822,6 +10824,17 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vcpu->arch.ia32_xss = 0;
 
 	static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
+
+	/*
+	 * Reset the MMU context if paging was enabled prior to INIT (which is
+	 * implied if CR0.PG=1 as CR0 will be '0' prior to RESET).  Unlike the
+	 * standard CR0/CR4/EFER modification paths, only CR0.PG needs to be
+	 * checked because it is unconditionally cleared on INIT and all other
+	 * paging related bits are ignored if paging is disabled, i.e. CR0.WP,
+	 * CR4, and EFER changes are all irrelevant if CR0.PG was '0'.
+	 */
+	if (old_cr0 & X86_CR0_PG)
+		kvm_mmu_reset_context(vcpu);
 }
 
 void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
-- 
GitLab


From ef318b9edf66a082f23d00d79b70c17b4c055a26 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:49 -0700
Subject: [PATCH 3701/3804] KVM: x86/mmu: Use MMU's role to detect CR4.SMEP
 value in nested NPT walk

Use the MMU's role to get its effective SMEP value when injecting a fault
into the guest.  When walking L1's (nested) NPT while L2 is active, vCPU
state will reflect L2, whereas NPT uses the host's (L1 in this case) CR0,
CR4, EFER, etc...  If L1 and L2 have different settings for SMEP and
L1 does not have EFER.NX=1, this can result in an incorrect PFEC.FETCH
when injecting #NPF.

Fixes: e57d4a356ad3 ("KVM: Add instruction fetch checking when walking guest page table")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/paging_tmpl.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 823a5919f9fa0..52fffd68b5229 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -471,8 +471,7 @@ retry_walk:
 
 error:
 	errcode |= write_fault | user_fault;
-	if (fetch_fault && (mmu->nx ||
-			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)))
+	if (fetch_fault && (mmu->nx || mmu->mmu_role.ext.cr4_smep))
 		errcode |= PFERR_FETCH_MASK;
 
 	walker->fault.vector = PF_VECTOR;
-- 
GitLab


From f71a53d1180d5ecc346f0c6a23191d837fe2871b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:50 -0700
Subject: [PATCH 3702/3804] Revert "KVM: x86/mmu: Drop
 kvm_mmu_extended_role.cr4_la57 hack"

Restore CR4.LA57 to the mmu_role to fix an amusing edge case with nested
virtualization.  When KVM (L0) is using TDP, CR4.LA57 is not reflected in
mmu_role.base.level because that tracks the shadow root level, i.e. TDP
level.  Normally, this is not an issue because LA57 can't be toggled
while long mode is active, i.e. the guest has to first disable paging,
then toggle LA57, then re-enable paging, thus ensuring an MMU
reinitialization.

But if L1 is crafty, it can load a new CR4 on VM-Exit and toggle LA57
without having to bounce through an unpaged section.  L1 can also load a
new CR3 on exit, i.e. it doesn't even need to play crazy paging games, a
single entry PML5 is sufficient.  Such shenanigans are only problematic
if L0 and L1 use TDP, otherwise L1 and L2 share an MMU that gets
reinitialized on nested VM-Enter/VM-Exit due to mmu_role.base.guest_mode.

Note, in the L2 case with nested TDP, even though L1 can switch between
L2s with different LA57 settings, thus bypassing the paging requirement,
in that case KVM's nested_mmu will track LA57 in base.level.

This reverts commit 8053f924cad30bf9f9a24e02b6c8ddfabf5202ea.

Fixes: 8053f924cad3 ("KVM: x86/mmu: Drop kvm_mmu_extended_role.cr4_la57 hack")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/mmu/mmu.c          | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 408051552121a..a474cd13b0c84 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -320,6 +320,7 @@ union kvm_mmu_extended_role {
 		unsigned int cr4_pke:1;
 		unsigned int cr4_smap:1;
 		unsigned int cr4_smep:1;
+		unsigned int cr4_la57:1;
 		unsigned int maxphyaddr:6;
 	};
 };
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 444e068e6ad99..fa35762f325c4 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4537,6 +4537,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
 	ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
 	ext.cr4_pse = !!is_pse(vcpu);
 	ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
+	ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
 	ext.maxphyaddr = cpuid_maxphyaddr(vcpu);
 
 	ext.valid = 1;
-- 
GitLab


From 49c6f8756cdffeb9af1fbcb86bacacced26465d7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:51 -0700
Subject: [PATCH 3703/3804] KVM: x86: Force all MMUs to reinitialize if guest
 CPUID is modified

Invalidate all MMUs' roles after a CPUID update to force reinitizliation
of the MMU context/helpers.  Despite the efforts of commit de3ccd26fafc
("KVM: MMU: record maximum physical address width in kvm_mmu_extended_role"),
there are still a handful of CPUID-based properties that affect MMU
behavior but are not incorporated into mmu_role.  E.g. 1gb hugepage
support, AMD vs. Intel handling of bit 8, and SEV's C-Bit location all
factor into the guest's reserved PTE bits.

The obvious alternative would be to add all such properties to mmu_role,
but doing so provides no benefit over simply forcing a reinitialization
on every CPUID update, as setting guest CPUID is a rare operation.

Note, reinitializing all MMUs after a CPUID update does not fix all of
KVM's woes.  Specifically, kvm_mmu_page_role doesn't track the CPUID
properties, which means that a vCPU can reuse shadow pages that should
not exist for the new vCPU model, e.g. that map GPAs that are now illegal
(due to MAXPHYADDR changes) or that set bits that are now reserved
(PAGE_SIZE for 1gb pages), etc...

Tracking the relevant CPUID properties in kvm_mmu_page_role would address
the majority of problems, but fully tracking that much state in the
shadow page role comes with an unpalatable cost as it would require a
non-trivial increase in KVM's memory footprint.  The GBPAGES case is even
worse, as neither Intel nor AMD provides a way to disable 1gb hugepage
support in the hardware page walker, i.e. it's a virtualization hole that
can't be closed when using TDP.

In other words, resetting the MMU after a CPUID update is largely a
superficial fix.  But, it will allow reverting the tracking of MAXPHYADDR
in the mmu_role, and that case in particular needs to mostly work because
KVM's shadow_root_level depends on guest MAXPHYADDR when 5-level paging
is supported.  For cases where KVM botches guest behavior, the damage is
limited to that guest.  But for the shadow_root_level, a misconfigured
MMU can cause KVM to incorrectly access memory, e.g. due to walking off
the end of its shadow page tables.

Fixes: 7dcd57552008 ("x86/kvm/mmu: check if tdp/shadow MMU reconfiguration is needed")
Cc: Yu Zhang <yu.c.zhang@linux.intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/cpuid.c            |  6 +++---
 arch/x86/kvm/mmu/mmu.c          | 12 ++++++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a474cd13b0c84..f1e4d5f2bf8dd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1496,6 +1496,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
 void kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 
+void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 				      struct kvm_memory_slot *memslot,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b4da665bb8923..c42613cfb5ba6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -202,10 +202,10 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
 
 	/*
-	 * Except for the MMU, which needs to be reset after any vendor
-	 * specific adjustments to the reserved GPA bits.
+	 * Except for the MMU, which needs to do its thing any vendor specific
+	 * adjustments to the reserved GPA bits.
 	 */
-	kvm_mmu_reset_context(vcpu);
+	kvm_mmu_after_set_cpuid(vcpu);
 }
 
 static int is_efer_nx(void)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index fa35762f325c4..1ab3fdb1f2e43 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4903,6 +4903,18 @@ kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
 	return role.base;
 }
 
+void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Invalidate all MMU roles to force them to reinitialize as CPUID
+	 * information is factored into reserved bit calculations.
+	 */
+	vcpu->arch.root_mmu.mmu_role.ext.valid = 0;
+	vcpu->arch.guest_mmu.mmu_role.ext.valid = 0;
+	vcpu->arch.nested_mmu.mmu_role.ext.valid = 0;
+	kvm_mmu_reset_context(vcpu);
+}
+
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
 {
 	kvm_mmu_unload(vcpu);
-- 
GitLab


From 63f5a1909f9e465eb446274969f65471794deafb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:52 -0700
Subject: [PATCH 3704/3804] KVM: x86: Alert userspace that KVM_SET_CPUID{,2}
 after KVM_RUN is broken

Warn userspace that KVM_SET_CPUID{,2} after KVM_RUN "may" cause guest
instability.  Initialize last_vmentry_cpu to -1 and use it to detect if
the vCPU has been run at least once when its CPUID model is changed.

KVM does not correctly handle changes to paging related settings in the
guest's vCPU model after KVM_RUN, e.g. MAXPHYADDR, GBPAGES, etc...  KVM
could theoretically zap all shadow pages, but actually making that happen
is a mess due to lock inversion (vcpu->mutex is held).  And even then,
updating paging settings on the fly would only work if all vCPUs are
stopped, updated in concert with identical settings, then restarted.

To support running vCPUs with different vCPU models (that affect paging),
KVM would need to track all relevant information in kvm_mmu_page_role.
Note, that's the _page_ role, not the full mmu_role.  Updating mmu_role
isn't sufficient as a vCPU can reuse a shadow page translation that was
created by a vCPU with different settings and thus completely skip the
reserved bit checks (that are tied to CPUID).

Tracking CPUID state in kvm_mmu_page_role is _extremely_ undesirable as
it would require doubling gfn_track from a u16 to a u32, i.e. would
increase KVM's memory footprint by 2 bytes for every 4kb of guest memory.
E.g. MAXPHYADDR (6 bits), GBPAGES, AMD vs. INTEL = 1 bit, and SEV C-BIT
would all need to be tracked.

In practice, there is no remotely sane use case for changing any paging
related CPUID entries on the fly, so just sweep it under the rug (after
yelling at userspace).

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst  | 11 ++++++++---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/mmu/mmu.c          | 20 ++++++++++++++++++++
 arch/x86/kvm/x86.c              |  2 ++
 4 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index b87fa32835f28..5d8db4922df6a 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -688,9 +688,14 @@ MSRs that have been set successfully.
 Defines the vcpu responses to the cpuid instruction.  Applications
 should use the KVM_SET_CPUID2 ioctl if available.
 
-Note, when this IOCTL fails, KVM gives no guarantees that previous valid CPUID
-configuration (if there is) is not corrupted. Userspace can get a copy of the
-resulting CPUID configuration through KVM_GET_CPUID2 in case.
+Caveat emptor:
+  - If this IOCTL fails, KVM gives no guarantees that previous valid CPUID
+    configuration (if there is) is not corrupted. Userspace can get a copy
+    of the resulting CPUID configuration through KVM_GET_CPUID2 in case.
+  - Using KVM_SET_CPUID{,2} after KVM_RUN, i.e. changing the guest vCPU model
+    after running the guest, may cause guest instability.
+  - Using heterogeneous CPUID configurations, modulo APIC IDs, topology, etc...
+    may cause guest instability.
 
 ::
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f1e4d5f2bf8dd..f8faf3efc08d7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -840,7 +840,7 @@ struct kvm_vcpu_arch {
 	bool l1tf_flush_l1d;
 
 	/* Host CPU on which VM-entry was most recently attempted */
-	unsigned int last_vmentry_cpu;
+	int last_vmentry_cpu;
 
 	/* AMD MSRC001_0015 Hardware Configuration */
 	u64 msr_hwcr;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1ab3fdb1f2e43..36201c02a4729 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4913,6 +4913,26 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	vcpu->arch.guest_mmu.mmu_role.ext.valid = 0;
 	vcpu->arch.nested_mmu.mmu_role.ext.valid = 0;
 	kvm_mmu_reset_context(vcpu);
+
+	/*
+	 * KVM does not correctly handle changing guest CPUID after KVM_RUN, as
+	 * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't
+	 * tracked in kvm_mmu_page_role.  As a result, KVM may miss guest page
+	 * faults due to reusing SPs/SPTEs.  Alert userspace, but otherwise
+	 * sweep the problem under the rug.
+	 *
+	 * KVM's horrific CPUID ABI makes the problem all but impossible to
+	 * solve, as correctly handling multiple vCPU models (with respect to
+	 * paging and physical address properties) in a single VM would require
+	 * tracking all relevant CPUID information in kvm_mmu_page_role.  That
+	 * is very undesirable as it would double the memory requirements for
+	 * gfn_track (see struct kvm_mmu_page_role comments), and in practice
+	 * no sane VMM mucks with the core vCPU model on the fly.
+	 */
+	if (vcpu->arch.last_vmentry_cpu != -1) {
+		pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} after KVM_RUN may cause guest instability\n");
+		pr_warn_ratelimited("KVM: KVM_SET_CPUID{,2} will fail after KVM_RUN starting with Linux 5.16\n");
+	}
 }
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4bd10fb1dfd6b..c862783035b8f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10602,6 +10602,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	struct page *page;
 	int r;
 
+	vcpu->arch.last_vmentry_cpu = -1;
+
 	if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	else
-- 
GitLab


From 6c032f12dd1e80a9dcd4847feab134d14e5551f8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:53 -0700
Subject: [PATCH 3705/3804] Revert "KVM: MMU: record maximum physical address
 width in kvm_mmu_extended_role"

Drop MAXPHYADDR from mmu_role now that all MMUs have their role
invalidated after a CPUID update.  Invalidating the role forces all MMUs
to re-evaluate the guest's MAXPHYADDR, and the guest's MAXPHYADDR can
only be changed only through a CPUID update.

This reverts commit de3ccd26fafc707b09792d9b633c8b5b48865315.

Cc: Yu Zhang <yu.c.zhang@linux.intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 -
 arch/x86/kvm/mmu/mmu.c          | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8faf3efc08d7..250915da1681d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -321,7 +321,6 @@ union kvm_mmu_extended_role {
 		unsigned int cr4_smap:1;
 		unsigned int cr4_smep:1;
 		unsigned int cr4_la57:1;
-		unsigned int maxphyaddr:6;
 	};
 };
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 36201c02a4729..54514f06714ad 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4538,7 +4538,6 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
 	ext.cr4_pse = !!is_pse(vcpu);
 	ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
 	ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
-	ext.maxphyaddr = cpuid_maxphyaddr(vcpu);
 
 	ext.valid = 1;
 
-- 
GitLab


From ddc16abbbae9cd21705323d47158fb9c334438ba Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:54 -0700
Subject: [PATCH 3706/3804] KVM: x86/mmu: Unconditionally zap unsync SPs when
 creating >4k SP at GFN

When creating a new upper-level shadow page, zap unsync shadow pages at
the same target gfn instead of attempting to sync the pages.  This fixes
a bug where an unsync shadow page could be sync'd with an incompatible
context, e.g. wrong smm, is_guest, etc... flags.  In practice, the bug is
relatively benign as sync_page() is all but guaranteed to fail its check
that the guest's desired gfn (for the to-be-sync'd page) matches the
current gfn associated with the shadow page.  I.e. kvm_sync_page() would
end up zapping the page anyways.

Alternatively, __kvm_sync_page() could be modified to explicitly verify
the mmu_role of the unsync shadow page is compatible with the current MMU
context.  But, except for this specific case, __kvm_sync_page() is called
iff the page is compatible, e.g. the transient sync in kvm_mmu_get_page()
requires an exact role match, and the call from kvm_sync_mmu_roots() is
only synchronizing shadow pages from the current MMU (which better be
compatible or KVM has problems).  And as described above, attempting to
sync shadow pages when creating an upper-level shadow page is unlikely
to succeed, e.g. zero successful syncs were observed when running Linux
guests despite over a million attempts.

Fixes: 9f1a122f970d ("KVM: MMU: allow more page become unsync at getting sp time")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-10-seanjc@google.com>
[Remove WARN_ON after __kvm_sync_page. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 50 ++++++++++++++----------------------------
 1 file changed, 16 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 54514f06714ad..4af466f0ec6d6 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1843,24 +1843,6 @@ static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	return __kvm_sync_page(vcpu, sp, invalid_list);
 }
 
-/* @gfn should be write-protected at the call site */
-static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn,
-			   struct list_head *invalid_list)
-{
-	struct kvm_mmu_page *s;
-	bool ret = false;
-
-	for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) {
-		if (!s->unsync)
-			continue;
-
-		WARN_ON(s->role.level != PG_LEVEL_4K);
-		ret |= kvm_sync_page(vcpu, s, invalid_list);
-	}
-
-	return ret;
-}
-
 struct mmu_page_path {
 	struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL];
 	unsigned int idx[PT64_ROOT_MAX_LEVEL];
@@ -1990,8 +1972,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	struct hlist_head *sp_list;
 	unsigned quadrant;
 	struct kvm_mmu_page *sp;
-	bool need_sync = false;
-	bool flush = false;
 	int collisions = 0;
 	LIST_HEAD(invalid_list);
 
@@ -2014,11 +1994,21 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 			continue;
 		}
 
-		if (!need_sync && sp->unsync)
-			need_sync = true;
-
-		if (sp->role.word != role.word)
+		if (sp->role.word != role.word) {
+			/*
+			 * If the guest is creating an upper-level page, zap
+			 * unsync pages for the same gfn.  While it's possible
+			 * the guest is using recursive page tables, in all
+			 * likelihood the guest has stopped using the unsync
+			 * page and is installing a completely unrelated page.
+			 * Unsync pages must not be left as is, because the new
+			 * upper-level page will be write-protected.
+			 */
+			if (level > PG_LEVEL_4K && sp->unsync)
+				kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
+							 &invalid_list);
 			continue;
+		}
 
 		if (direct_mmu)
 			goto trace_get_page;
@@ -2052,22 +2042,14 @@ trace_get_page:
 	sp->role = role;
 	hlist_add_head(&sp->hash_link, sp_list);
 	if (!direct) {
-		/*
-		 * we should do write protection before syncing pages
-		 * otherwise the content of the synced shadow page may
-		 * be inconsistent with guest page table.
-		 */
 		account_shadowed(vcpu->kvm, sp);
 		if (level == PG_LEVEL_4K && rmap_write_protect(vcpu, gfn))
 			kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1);
-
-		if (level > PG_LEVEL_4K && need_sync)
-			flush |= kvm_sync_pages(vcpu, gfn, &invalid_list);
 	}
 	trace_kvm_mmu_get_page(sp, true);
-
-	kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
 out:
+	kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
+
 	if (collisions > vcpu->kvm->stat.max_mmu_page_hash_collisions)
 		vcpu->kvm->stat.max_mmu_page_hash_collisions = collisions;
 	return sp;
-- 
GitLab


From 00a669780ffa8c4b5f3e37346b5bf45508dd15bb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:55 -0700
Subject: [PATCH 3707/3804] KVM: x86/mmu: Use MMU role to check for matching
 guest page sizes

Originally, __kvm_sync_page used to check the cr4_pae bit in the role
to avoid zapping 4-byte kvm_mmu_pages when guest page size are 8-byte
or the other way round.  However, in commit 47c42e6b4192 ("KVM: x86: fix
handling of role.cr4_pae and rename it to 'gpte_size'", 2019-03-28) it
was observed that this did not work for nested EPT, where the page table
size would be 8 bytes even if CR4.PAE=0.  (Note that the check still
has to be done for nested *NPT*, so it is not possible to use tdp_enabled
or similar).

Therefore, a hack was introduced to identify nested EPT shadow pages
and unconditionally call __kvm_sync_page() on them.  However, it is
possible to do without the hack to identify nested EPT shadow pages:
if EPT is active, there will be no shadow pages in non-EPT format,
and all of them will have gpte_is_8_bytes set to true; we can just
check the MMU role directly, and the test will always be true.

Even for non-EPT shadow MMUs, this test should really always be true
now that __kvm_sync_page() is called if and only if the role is an
exact match (kvm_mmu_get_page()) or is part of the current MMU context
(kvm_mmu_sync_roots()).  A future commit will convert the likely-pointless
check into a meaningful WARN to enforce that the mmu_roles of the current
context and the shadow page are compatible.

Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-11-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/mmu.rst |  3 ---
 arch/x86/kvm/mmu/mmu.c         | 16 +++-------------
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst
index 20d85daed395e..ddbb23998742c 100644
--- a/Documentation/virt/kvm/mmu.rst
+++ b/Documentation/virt/kvm/mmu.rst
@@ -192,9 +192,6 @@ Shadow pages contain the following information:
     Contains the value of cr4.smap && !cr0.wp for which the page is valid
     (pages for which this is true are different from other pages; see the
     treatment of cr0.wp=0 below).
-  role.ept_sp:
-    This is a virtual flag to denote a shadowed nested EPT page.  ept_sp
-    is true if "cr0_wp && smap_andnot_wp", an otherwise invalid combination.
   role.smm:
     Is 1 if the page is valid in system management mode.  This field
     determines which of the kvm_memslots array was used to build this
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4af466f0ec6d6..71a2ee7552248 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1780,16 +1780,13 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)])	\
 		if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
 
-static inline bool is_ept_sp(struct kvm_mmu_page *sp)
-{
-	return sp->role.cr0_wp && sp->role.smap_andnot_wp;
-}
-
 /* @sp->gfn should be write-protected at the call site */
 static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			    struct list_head *invalid_list)
 {
-	if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) ||
+	union kvm_mmu_page_role mmu_role = vcpu->arch.mmu->mmu_role.base;
+
+	if (sp->role.gpte_is_8_bytes != mmu_role.gpte_is_8_bytes ||
 	    vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
 		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
 		return false;
@@ -4721,13 +4718,6 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
 	role.base.guest_mode = true;
 	role.base.access = ACC_ALL;
 
-	/*
-	 * WP=1 and NOT_WP=1 is an impossible combination, use WP and the
-	 * SMAP variation to denote shadow EPT entries.
-	 */
-	role.base.cr0_wp = true;
-	role.base.smap_andnot_wp = true;
-
 	role.ext = kvm_calc_mmu_role_ext(vcpu);
 	role.ext.execonly = execonly;
 
-- 
GitLab


From 2640b0865395b6a31f76d6eca9937dec3e876ca3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:56 -0700
Subject: [PATCH 3708/3804] KVM: x86/mmu: WARN and zap SP when sync'ing if MMU
 role mismatches

When synchronizing a shadow page, WARN and zap the page if its mmu role
isn't compatible with the current MMU context, where "compatible" is an
exact match sans the bits that have no meaning in the overall MMU context
or will be explicitly overwritten during the sync.  Many of the helpers
used by sync_page() are specific to the current context, updating a SMM
vs. non-SMM shadow page would use the wrong memslots, updating L1 vs. L2
PTEs might work but would be extremely bizaree, and so on and so forth.

Drop the guard with respect to 8-byte vs. 4-byte PTEs in
__kvm_sync_page(), it was made useless when kvm_mmu_get_page() stopped
trying to sync shadow pages irrespective of the current MMU context.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-12-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c         |  5 +----
 arch/x86/kvm/mmu/paging_tmpl.h | 27 +++++++++++++++++++++++++--
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 71a2ee7552248..e4415e739807f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1784,10 +1784,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 			    struct list_head *invalid_list)
 {
-	union kvm_mmu_page_role mmu_role = vcpu->arch.mmu->mmu_role.base;
-
-	if (sp->role.gpte_is_8_bytes != mmu_role.gpte_is_8_bytes ||
-	    vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
+	if (vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
 		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
 		return false;
 	}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 52fffd68b5229..b632606a87d60 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -1030,13 +1030,36 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gpa_t vaddr,
  */
 static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
+	union kvm_mmu_page_role mmu_role = vcpu->arch.mmu->mmu_role.base;
 	int i, nr_present = 0;
 	bool host_writable;
 	gpa_t first_pte_gpa;
 	int set_spte_ret = 0;
 
-	/* direct kvm_mmu_page can not be unsync. */
-	BUG_ON(sp->role.direct);
+	/*
+	 * Ignore various flags when verifying that it's safe to sync a shadow
+	 * page using the current MMU context.
+	 *
+	 *  - level: not part of the overall MMU role and will never match as the MMU's
+	 *           level tracks the root level
+	 *  - access: updated based on the new guest PTE
+	 *  - quadrant: not part of the overall MMU role (similar to level)
+	 */
+	const union kvm_mmu_page_role sync_role_ign = {
+		.level = 0xf,
+		.access = 0x7,
+		.quadrant = 0x3,
+	};
+
+	/*
+	 * Direct pages can never be unsync, and KVM should never attempt to
+	 * sync a shadow page for a different MMU context, e.g. if the role
+	 * differs then the memslot lookup (SMM vs. non-SMM) will be bogus, the
+	 * reserved bits checks will be wrong, etc...
+	 */
+	if (WARN_ON_ONCE(sp->role.direct ||
+			 (sp->role.word ^ mmu_role.word) & ~sync_role_ign.word))
+		return 0;
 
 	first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
 
-- 
GitLab


From 07dc4f35a44c8f85ba7262b56b70c3fcbc3b74fd Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 23 Jun 2021 12:49:19 -0400
Subject: [PATCH 3709/3804] KVM: x86/mmu: comment on kvm_mmu_get_page's syncing
 of pages

Explain the usage of sync_page() in kvm_mmu_get_page(), which is
subtle in how and why it differs from mmu_sync_children().

Signed-off-by: Sean Christopherson <seanjc@google.com>
[Split out of a different patch by Sean. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e4415e739807f..726e5b1715432 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2008,8 +2008,17 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 			goto trace_get_page;
 
 		if (sp->unsync) {
-			/* The page is good, but __kvm_sync_page might still end
-			 * up zapping it.  If so, break in order to rebuild it.
+			/*
+			 * The page is good, but is stale.  __kvm_sync_page does
+			 * get the latest guest state, but (unlike mmu_unsync_children)
+			 * it doesn't write-protect the page or mark it synchronized!
+			 * This way the validity of the mapping is ensured, but the
+			 * overhead of write protection is not incurred until the
+			 * guest invalidates the TLB mapping.  This allows multiple
+			 * SPs for a single gfn to be unsync.
+			 *
+			 * If the sync fails, the page is zapped.  If so, break
+			 * in order to rebuild it.
 			 */
 			if (!__kvm_sync_page(vcpu, sp, &invalid_list))
 				break;
-- 
GitLab


From 479a1efc8119d8699cca73d00625b28003d0a1f8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:57 -0700
Subject: [PATCH 3710/3804] KVM: x86/mmu: Drop the intermediate "transient"
 __kvm_sync_page()

Nove the kvm_unlink_unsync_page() call out of kvm_sync_page() and into
it's sole caller, and fold __kvm_sync_page() into kvm_sync_page() since
the latter becomes a pure pass-through.  There really should be no reason
for code to do a complete sync of a shadow page outside of the full
kvm_mmu_sync_roots(), e.g. the one use case that creeped in turned out to
be flawed and counter-productive.

Drop the stale comment about @sp->gfn needing to be write-protected, as
it directly contradicts the kvm_mmu_get_page() usage.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-13-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 726e5b1715432..92b7ab1a0a776 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1780,9 +1780,8 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 	  &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)])	\
 		if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
 
-/* @sp->gfn should be write-protected at the call site */
-static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			    struct list_head *invalid_list)
+static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+			 struct list_head *invalid_list)
 {
 	if (vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
 		kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
@@ -1830,13 +1829,6 @@ static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 	       unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
 }
 
-static bool kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
-			 struct list_head *invalid_list)
-{
-	kvm_unlink_unsync_page(vcpu->kvm, sp);
-	return __kvm_sync_page(vcpu, sp, invalid_list);
-}
-
 struct mmu_page_path {
 	struct kvm_mmu_page *parent[PT64_ROOT_MAX_LEVEL];
 	unsigned int idx[PT64_ROOT_MAX_LEVEL];
@@ -1931,6 +1923,7 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
 		}
 
 		for_each_sp(pages, sp, parents, i) {
+			kvm_unlink_unsync_page(vcpu->kvm, sp);
 			flush |= kvm_sync_page(vcpu, sp, &invalid_list);
 			mmu_pages_clear_parents(&parents);
 		}
@@ -2009,7 +2002,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 
 		if (sp->unsync) {
 			/*
-			 * The page is good, but is stale.  __kvm_sync_page does
+			 * The page is good, but is stale.  kvm_sync_page does
 			 * get the latest guest state, but (unlike mmu_unsync_children)
 			 * it doesn't write-protect the page or mark it synchronized!
 			 * This way the validity of the mapping is ensured, but the
@@ -2020,7 +2013,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 			 * If the sync fails, the page is zapped.  If so, break
 			 * in order to rebuild it.
 			 */
-			if (!__kvm_sync_page(vcpu, sp, &invalid_list))
+			if (!kvm_sync_page(vcpu, sp, &invalid_list))
 				break;
 
 			WARN_ON(!list_empty(&invalid_list));
-- 
GitLab


From 0337f585f57fc80a50e0645ca709512687185c72 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:58 -0700
Subject: [PATCH 3711/3804] KVM: x86/mmu: Rename unsync helper and update
 related comments

Rename mmu_need_write_protect() to mmu_try_to_unsync_pages() and update
a variety of related, stale comments.  Add several new comments to call
out subtle details, e.g. that upper-level shadow pages are write-tracked,
and that can_unsync is false iff KVM is in the process of synchronizing
pages.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-14-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c          | 34 ++++++++++++++++++++++++---------
 arch/x86/kvm/mmu/mmu_internal.h |  3 +--
 arch/x86/kvm/mmu/spte.c         | 10 ++++++++--
 3 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 92b7ab1a0a776..dffa9486e642a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2458,17 +2458,33 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	kvm_mmu_mark_parents_unsync(sp);
 }
 
-bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
-			    bool can_unsync)
+/*
+ * Attempt to unsync any shadow pages that can be reached by the specified gfn,
+ * KVM is creating a writable mapping for said gfn.  Returns 0 if all pages
+ * were marked unsync (or if there is no shadow page), -EPERM if the SPTE must
+ * be write-protected.
+ */
+int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
 {
 	struct kvm_mmu_page *sp;
 
+	/*
+	 * Force write-protection if the page is being tracked.  Note, the page
+	 * track machinery is used to write-protect upper-level shadow pages,
+	 * i.e. this guards the role.level == 4K assertion below!
+	 */
 	if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
-		return true;
+		return -EPERM;
 
+	/*
+	 * The page is not write-tracked, mark existing shadow pages unsync
+	 * unless KVM is synchronizing an unsync SP (can_unsync = false).  In
+	 * that case, KVM must complete emulation of the guest TLB flush before
+	 * allowing shadow pages to become unsync (writable by the guest).
+	 */
 	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
 		if (!can_unsync)
-			return true;
+			return -EPERM;
 
 		if (sp->unsync)
 			continue;
@@ -2499,8 +2515,8 @@ bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 	 *                      2.2 Guest issues TLB flush.
 	 *                          That causes a VM Exit.
 	 *
-	 *                      2.3 kvm_mmu_sync_pages() reads sp->unsync.
-	 *                          Since it is false, so it just returns.
+	 *                      2.3 Walking of unsync pages sees sp->unsync is
+	 *                          false and skips the page.
 	 *
 	 *                      2.4 Guest accesses GVA X.
 	 *                          Since the mapping in the SP was not updated,
@@ -2516,7 +2532,7 @@ bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
 	 */
 	smp_wmb();
 
-	return false;
+	return 0;
 }
 
 static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
@@ -3461,8 +3477,8 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 		 * flush strictly after those changes are made. We only need to
 		 * ensure that the other CPU sets these flags before any actual
 		 * changes to the page tables are made. The comments in
-		 * mmu_need_write_protect() describe what could go wrong if this
-		 * requirement isn't satisfied.
+		 * mmu_try_to_unsync_pages() describe what could go wrong if
+		 * this requirement isn't satisfied.
 		 */
 		if (!smp_load_acquire(&sp->unsync) &&
 		    !smp_load_acquire(&sp->unsync_children))
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 18be103df9d59..35567293c1fdc 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -122,8 +122,7 @@ static inline bool is_nx_huge_page_enabled(void)
 	return READ_ONCE(nx_huge_pages);
 }
 
-bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
-			    bool can_unsync);
+int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync);
 
 void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 8e8e8da740a07..246e61e0771e4 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -147,13 +147,19 @@ int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
 		/*
 		 * Optimization: for pte sync, if spte was writable the hash
 		 * lookup is unnecessary (and expensive). Write protection
-		 * is responsibility of mmu_get_page / kvm_sync_page.
+		 * is responsibility of kvm_mmu_get_page / kvm_mmu_sync_roots.
 		 * Same reasoning can be applied to dirty page accounting.
 		 */
 		if (!can_unsync && is_writable_pte(old_spte))
 			goto out;
 
-		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
+		/*
+		 * Unsync shadow pages that are reachable by the new, writable
+		 * SPTE.  Write-protect the SPTE if the page can't be unsync'd,
+		 * e.g. it's write-tracked (upper-level SPs) or has one or more
+		 * shadow pages and unsync'ing pages is not allowed.
+		 */
+		if (mmu_try_to_unsync_pages(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %llx, marking ro\n",
 				 __func__, gfn);
 			ret |= SET_SPTE_WRITE_PROTECTED_PT;
-- 
GitLab


From dbc4739b6b3ed478531155c832573a3fb1ab32d9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:56:59 -0700
Subject: [PATCH 3712/3804] KVM: x86: Fix sizes used to pass around CR0, CR4,
 and EFER

When configuring KVM's MMU, pass CR0 and CR4 as unsigned longs, and EFER
as a u64 in various flows (mostly MMU).  Passing the params as u32s is
functionally ok since all of the affected registers reserve bits 63:32 to
zero (enforced by KVM), but it's technically wrong.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-15-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.h        |  4 ++--
 arch/x86/kvm/mmu/mmu.c    | 11 ++++++-----
 arch/x86/kvm/svm/nested.c |  2 +-
 arch/x86/kvm/x86.c        |  2 +-
 4 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index bc11402df83bb..47131b92b9901 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -66,8 +66,8 @@ void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
 
 void kvm_init_mmu(struct kvm_vcpu *vcpu);
-void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer,
-			     gpa_t nested_cr3);
+void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
+			     unsigned long cr4, u64 efer, gpa_t nested_cr3);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 			     bool accessed_dirty, gpa_t new_eptp);
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index dffa9486e642a..f3c4c6349ddcc 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4659,8 +4659,8 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
 }
 
 static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
-				    u32 cr0, u32 cr4, u32 efer,
-				    union kvm_mmu_role new_role)
+				    unsigned long cr0, unsigned long cr4,
+				    u64 efer, union kvm_mmu_role new_role)
 {
 	if (!(cr0 & X86_CR0_PG))
 		nonpaging_init_context(vcpu, context);
@@ -4675,7 +4675,8 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	reset_shadow_zero_bits_mask(vcpu, context);
 }
 
-static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer)
+static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
+				unsigned long cr4, u64 efer)
 {
 	struct kvm_mmu *context = &vcpu->arch.root_mmu;
 	union kvm_mmu_role new_role =
@@ -4697,8 +4698,8 @@ kvm_calc_shadow_npt_root_page_role(struct kvm_vcpu *vcpu)
 	return role;
 }
 
-void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer,
-			     gpa_t nested_cr3)
+void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
+			     unsigned long cr4, u64 efer, gpa_t nested_cr3)
 {
 	struct kvm_mmu *context = &vcpu->arch.guest_mmu;
 	union kvm_mmu_role new_role = kvm_calc_shadow_npt_root_page_role(vcpu);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index dca20f949b637..9f0e7ed672b2c 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1244,8 +1244,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
 		&user_kvm_nested_state->data.svm[0];
 	struct vmcb_control_area *ctl;
 	struct vmcb_save_area *save;
+	unsigned long cr0;
 	int ret;
-	u32 cr0;
 
 	BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
 		     KVM_STATE_NESTED_SVM_VMCB_SIZE);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c862783035b8f..0b059698cd5c0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9095,8 +9095,8 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 {
 	struct kvm_segment cs, ds;
 	struct desc_ptr dt;
+	unsigned long cr0;
 	char buf[512];
-	u32 cr0;
 
 	memset(buf, 0, 512);
 #ifdef CONFIG_X86_64
-- 
GitLab


From 31e96bc63655ba643e31d83d8652b43f01e43f5b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:00 -0700
Subject: [PATCH 3713/3804] KVM: nSVM: Add a comment to document why nNPT uses
 vmcb01, not vCPU state

Add a comment in the nested NPT initialization flow to call out that it
intentionally uses vmcb01 instead current vCPU state to get the effective
hCR4 and hEFER for L1's NPT context.

Note, despite nSVM's efforts to handle the case where vCPU state doesn't
reflect L1 state, the MMU may still do the wrong thing due to pulling
state from the vCPU instead of the passed in CR0/CR4/EFER values.  This
will be addressed in future commits.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-16-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/nested.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 9f0e7ed672b2c..f17d8c9050c01 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -98,6 +98,12 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 	WARN_ON(mmu_is_nested(vcpu));
 
 	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
+
+	/*
+	 * The NPT format depends on L1's CR4 and EFER, which is in vmcb01.  Note,
+	 * when called via KVM_SET_NESTED_STATE, that state may _not_ match current
+	 * vCPU state.  CR0.WP is explicitly ignored, while CR0.PG is required.
+	 */
 	kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
 				svm->vmcb01.ptr->save.efer,
 				svm->nested.ctl.nested_cr3);
-- 
GitLab


From 18feaad3c6556192b0d28f0777b021d137076917 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:01 -0700
Subject: [PATCH 3714/3804] KVM: x86/mmu: Drop smep_andnot_wp check from "uses
 NX" for shadow MMUs

Drop the smep_andnot_wp role check from the "uses NX" calculation now
that all non-nested shadow MMUs treat NX as used via the !TDP check.

The shadow MMU for nested NPT, which shares the helper, does not need to
deal with SMEP (or WP) as NPT walks are always "user" accesses and WP is
explicitly noted as being ignored:

  Table walks for guest page tables are always treated as user writes at
  the nested page table level.

  A table walk for the guest page itself is always treated as a user
  access at the nested page table level

  The host hCR0.WP bit is ignored under nested paging.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-17-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f3c4c6349ddcc..588d789cc79f2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4223,8 +4223,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 	 * NX can be used by any non-nested shadow MMU to avoid having to reset
 	 * MMU contexts.  Note, KVM forces EFER.NX=1 when TDP is disabled.
 	 */
-	bool uses_nx = context->nx || !tdp_enabled ||
-		context->mmu_role.base.smep_andnot_wp;
+	bool uses_nx = context->nx || !tdp_enabled;
 	struct rsvd_bits_validate *shadow_zero_check;
 	int i;
 
-- 
GitLab


From 20f632bd0060e12fca083adc44b097231e2f4649 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:02 -0700
Subject: [PATCH 3715/3804] KVM: x86: Read and pass all CR0/CR4 role bits to
 shadow MMU helper

Grab all CR0/CR4 MMU role bits from current vCPU state when initializing
a non-nested shadow MMU.  Extract the masks from kvm_post_set_cr{0,4}(),
as the CR0/CR4 update masks must exactly match the mmu_role bits, with
one exception (see below).  The "full" CR0/CR4 will be used by future
commits to initialize the MMU and its role, as opposed to the current
approach of pulling everything from vCPU, which is incorrect for certain
flows, e.g. nested NPT.

CR4.LA57 is an exception, as it can be toggled on VM-Exit (for L1's MMU)
but can't be toggled via MOV CR4 while long mode is active.  I.e. LA57
needs to be in the mmu_role, but technically doesn't need to be checked
by kvm_post_set_cr4().  However, the extra check is completely benign as
the hardware restrictions simply mean LA57 will never be _the_ cause of
a MMU reset during MOV CR4.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-18-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.h     | 6 ++++++
 arch/x86/kvm/mmu/mmu.c | 4 ++--
 arch/x86/kvm/x86.c     | 9 ++-------
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 47131b92b9901..4e926f4935b07 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,6 +44,12 @@
 #define PT32_ROOT_LEVEL 2
 #define PT32E_ROOT_LEVEL 3
 
+#define KVM_MMU_CR4_ROLE_BITS (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | \
+			       X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE | \
+			       X86_CR4_LA57)
+
+#define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP)
+
 static __always_inline u64 rsvd_bits(int s, int e)
 {
 	BUILD_BUG_ON(__builtin_constant_p(e) && __builtin_constant_p(s) && e < s);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 588d789cc79f2..51a48f17c80ed 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4778,8 +4778,8 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 	struct kvm_mmu *context = &vcpu->arch.root_mmu;
 
 	kvm_init_shadow_mmu(vcpu,
-			    kvm_read_cr0_bits(vcpu, X86_CR0_PG),
-			    kvm_read_cr4_bits(vcpu, X86_CR4_PAE),
+			    kvm_read_cr0_bits(vcpu, KVM_MMU_CR0_ROLE_BITS),
+			    kvm_read_cr4_bits(vcpu, KVM_MMU_CR4_ROLE_BITS),
 			    vcpu->arch.efer);
 
 	context->get_guest_pgd     = get_cr3;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0b059698cd5c0..a7c7b2b28de78 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -851,14 +851,12 @@ EXPORT_SYMBOL_GPL(load_pdptrs);
 
 void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
 {
-	unsigned long update_bits = X86_CR0_PG | X86_CR0_WP;
-
 	if ((cr0 ^ old_cr0) & X86_CR0_PG) {
 		kvm_clear_async_pf_completion_queue(vcpu);
 		kvm_async_pf_hash_reset(vcpu);
 	}
 
-	if ((cr0 ^ old_cr0) & update_bits)
+	if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS)
 		kvm_mmu_reset_context(vcpu);
 
 	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
@@ -1037,10 +1035,7 @@ EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
 
 void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
 {
-	unsigned long mmu_role_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
-				      X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
-
-	if (((cr4 ^ old_cr4) & mmu_role_bits) ||
+	if (((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS) ||
 	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
 		kvm_mmu_reset_context(vcpu);
 }
-- 
GitLab


From 16be1d12925305d4d20fd897632d9a6836a865c8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:03 -0700
Subject: [PATCH 3716/3804] KVM: x86/mmu: Move nested NPT reserved bit
 calculation into MMU proper

Move nested NPT's invocation of reset_shadow_zero_bits_mask() into the
MMU proper and unexport said function.  Aside from dropping an export,
this is a baby step toward eliminating the call entirely by fixing the
shadow_root_level confusion.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-19-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.h        |  3 ---
 arch/x86/kvm/mmu/mmu.c    | 11 ++++++++---
 arch/x86/kvm/svm/nested.c |  1 -
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 4e926f4935b07..62844bacd13f4 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -68,9 +68,6 @@ static __always_inline u64 rsvd_bits(int s, int e)
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
 void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
 
-void
-reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
-
 void kvm_init_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
 			     unsigned long cr4, u64 efer, gpa_t nested_cr3);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 51a48f17c80ed..0c23a6d5722d0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4212,8 +4212,8 @@ static inline u64 reserved_hpa_bits(void)
  * table in guest or amd nested guest, its mmu features completely
  * follow the features in guest.
  */
-void
-reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
+static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
+					struct kvm_mmu *context)
 {
 	/*
 	 * KVM uses NX when TDP is disabled to handle a variety of scenarios,
@@ -4247,7 +4247,6 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
 	}
 
 }
-EXPORT_SYMBOL_GPL(reset_shadow_zero_bits_mask);
 
 static inline bool boot_cpu_is_amd(void)
 {
@@ -4714,6 +4713,12 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
 		 */
 		context->shadow_root_level = new_role.base.level;
 	}
+
+	/*
+	 * Redo the shadow bits, the reset done by shadow_mmu_init_context()
+	 * (above) may use the wrong shadow_root_level.
+	 */
+	reset_shadow_zero_bits_mask(vcpu, context);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_npt_mmu);
 
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index f17d8c9050c01..a9e3b0736c206 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -110,7 +110,6 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
 	vcpu->arch.mmu->get_guest_pgd     = nested_svm_get_tdp_cr3;
 	vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
 	vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
-	reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu);
 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
 }
 
-- 
GitLab


From d555f7057ebe34aae42fe2f592a3047e9b151326 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:04 -0700
Subject: [PATCH 3717/3804] KVM: x86/mmu: Grab shadow root level from mmu_role
 for shadow MMUs

Use the mmu_role to initialize shadow root level instead of assuming the
level of KVM's shadow root (host) is the same as that of the guest root,
or in the case of 32-bit non-PAE paging where KVM forces PAE paging.
For nested NPT, the shadow root level cannot be adapted to L1's NPT root
level and is instead always the TDP root level because NPT uses the
current host CR0/CR4/EFER, e.g. 64-bit KVM can't drop into 32-bit PAE to
shadow L1's NPT.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-20-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0c23a6d5722d0..466cb93eb3b56 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3898,7 +3898,6 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
 	context->sync_page = nonpaging_sync_page;
 	context->invlpg = NULL;
 	context->root_level = 0;
-	context->shadow_root_level = PT32E_ROOT_LEVEL;
 	context->direct_map = true;
 	context->nx = false;
 }
@@ -4466,10 +4465,10 @@ static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu
 
 static void paging64_init_context_common(struct kvm_vcpu *vcpu,
 					 struct kvm_mmu *context,
-					 int level)
+					 int root_level)
 {
 	context->nx = is_nx(vcpu);
-	context->root_level = level;
+	context->root_level = root_level;
 
 	reset_rsvds_bits_mask(vcpu, context);
 	update_permission_bitmask(vcpu, context, false);
@@ -4481,7 +4480,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->gva_to_gpa = paging64_gva_to_gpa;
 	context->sync_page = paging64_sync_page;
 	context->invlpg = paging64_invlpg;
-	context->shadow_root_level = level;
 	context->direct_map = false;
 }
 
@@ -4509,7 +4507,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
 	context->gva_to_gpa = paging32_gva_to_gpa;
 	context->sync_page = paging32_sync_page;
 	context->invlpg = paging32_invlpg;
-	context->shadow_root_level = PT32E_ROOT_LEVEL;
 	context->direct_map = false;
 }
 
@@ -4669,6 +4666,8 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	else
 		paging32_init_context(vcpu, context);
 
+	context->shadow_root_level = new_role.base.level;
+
 	context->mmu_role.as_u64 = new_role.as_u64;
 	reset_shadow_zero_bits_mask(vcpu, context);
 }
@@ -4704,16 +4703,9 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
 
 	__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base);
 
-	if (new_role.as_u64 != context->mmu_role.as_u64) {
+	if (new_role.as_u64 != context->mmu_role.as_u64)
 		shadow_mmu_init_context(vcpu, context, cr0, cr4, efer, new_role);
 
-		/*
-		 * Override the level set by the common init helper, nested TDP
-		 * always uses the host's TDP configuration.
-		 */
-		context->shadow_root_level = new_role.base.level;
-	}
-
 	/*
 	 * Redo the shadow bits, the reset done by shadow_mmu_init_context()
 	 * (above) may use the wrong shadow_root_level.
-- 
GitLab


From 594e91a100ccab334675c4fc9145e6ef3c788449 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:05 -0700
Subject: [PATCH 3718/3804] KVM: x86/mmu: Add struct and helpers to retrieve
 MMU role bits from regs

Introduce "struct kvm_mmu_role_regs" to hold the register state that is
incorporated into the mmu_role.  For nested TDP, the register state that
is factored into the MMU isn't vCPU state; the dedicated struct will be
used to propagate the correct state throughout the flows without having
to pass multiple params, and also provides helpers for the various flag
accessors.

Intentionally make the new helpers cumbersome/ugly by prepending four
underscores.  In the not-too-distant future, it will be preferable to use
the mmu_role to query bits as the mmu_role can drop irrelevant bits
without creating contradictions, e.g. clearing CR4 bits when CR0.PG=0.
Reserve the clean helper names (no underscores) for the mmu_role.

Add a helper for vCPU conversion, which is the common case.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-21-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 66 +++++++++++++++++++++++++++++++++---------
 1 file changed, 53 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 466cb93eb3b56..a9c968bb3eae6 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -176,9 +176,46 @@ static void mmu_spte_set(u64 *sptep, u64 spte);
 static union kvm_mmu_page_role
 kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
 
+struct kvm_mmu_role_regs {
+	const unsigned long cr0;
+	const unsigned long cr4;
+	const u64 efer;
+};
+
 #define CREATE_TRACE_POINTS
 #include "mmutrace.h"
 
+/*
+ * Yes, lot's of underscores.  They're a hint that you probably shouldn't be
+ * reading from the role_regs.  Once the mmu_role is constructed, it becomes
+ * the single source of truth for the MMU's state.
+ */
+#define BUILD_MMU_ROLE_REGS_ACCESSOR(reg, name, flag)			\
+static inline bool ____is_##reg##_##name(struct kvm_mmu_role_regs *regs)\
+{									\
+	return !!(regs->reg & flag);					\
+}
+BUILD_MMU_ROLE_REGS_ACCESSOR(cr0, pg, X86_CR0_PG);
+BUILD_MMU_ROLE_REGS_ACCESSOR(cr0, wp, X86_CR0_WP);
+BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pse, X86_CR4_PSE);
+BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pae, X86_CR4_PAE);
+BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, smep, X86_CR4_SMEP);
+BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, smap, X86_CR4_SMAP);
+BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, pke, X86_CR4_PKE);
+BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, la57, X86_CR4_LA57);
+BUILD_MMU_ROLE_REGS_ACCESSOR(efer, nx, EFER_NX);
+BUILD_MMU_ROLE_REGS_ACCESSOR(efer, lma, EFER_LMA);
+
+static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu_role_regs regs = {
+		.cr0 = kvm_read_cr0_bits(vcpu, KVM_MMU_CR0_ROLE_BITS),
+		.cr4 = kvm_read_cr4_bits(vcpu, KVM_MMU_CR4_ROLE_BITS),
+		.efer = vcpu->arch.efer,
+	};
+
+	return regs;
+}
 
 static inline bool kvm_available_flush_tlb_with_range(void)
 {
@@ -4654,14 +4691,14 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
 }
 
 static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
-				    unsigned long cr0, unsigned long cr4,
-				    u64 efer, union kvm_mmu_role new_role)
+				    struct kvm_mmu_role_regs *regs,
+				    union kvm_mmu_role new_role)
 {
-	if (!(cr0 & X86_CR0_PG))
+	if (!____is_cr0_pg(regs))
 		nonpaging_init_context(vcpu, context);
-	else if (efer & EFER_LMA)
+	else if (____is_efer_lma(regs))
 		paging64_init_context(vcpu, context);
-	else if (cr4 & X86_CR4_PAE)
+	else if (____is_cr4_pae(regs))
 		paging32E_init_context(vcpu, context);
 	else
 		paging32_init_context(vcpu, context);
@@ -4672,15 +4709,15 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	reset_shadow_zero_bits_mask(vcpu, context);
 }
 
-static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
-				unsigned long cr4, u64 efer)
+static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu,
+				struct kvm_mmu_role_regs *regs)
 {
 	struct kvm_mmu *context = &vcpu->arch.root_mmu;
 	union kvm_mmu_role new_role =
 		kvm_calc_shadow_mmu_root_page_role(vcpu, false);
 
 	if (new_role.as_u64 != context->mmu_role.as_u64)
-		shadow_mmu_init_context(vcpu, context, cr0, cr4, efer, new_role);
+		shadow_mmu_init_context(vcpu, context, regs, new_role);
 }
 
 static union kvm_mmu_role
@@ -4699,12 +4736,17 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
 			     unsigned long cr4, u64 efer, gpa_t nested_cr3)
 {
 	struct kvm_mmu *context = &vcpu->arch.guest_mmu;
+	struct kvm_mmu_role_regs regs = {
+		.cr0 = cr0,
+		.cr4 = cr4,
+		.efer = efer,
+	};
 	union kvm_mmu_role new_role = kvm_calc_shadow_npt_root_page_role(vcpu);
 
 	__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base);
 
 	if (new_role.as_u64 != context->mmu_role.as_u64)
-		shadow_mmu_init_context(vcpu, context, cr0, cr4, efer, new_role);
+		shadow_mmu_init_context(vcpu, context, &regs, new_role);
 
 	/*
 	 * Redo the shadow bits, the reset done by shadow_mmu_init_context()
@@ -4773,11 +4815,9 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_ept_mmu);
 static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.root_mmu;
+	struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu);
 
-	kvm_init_shadow_mmu(vcpu,
-			    kvm_read_cr0_bits(vcpu, KVM_MMU_CR0_ROLE_BITS),
-			    kvm_read_cr4_bits(vcpu, KVM_MMU_CR4_ROLE_BITS),
-			    vcpu->arch.efer);
+	kvm_init_shadow_mmu(vcpu, &regs);
 
 	context->get_guest_pgd     = get_cr3;
 	context->get_pdptr         = kvm_pdptr_read;
-- 
GitLab


From af098972295aab280b362090aef964d4eb89f63f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:06 -0700
Subject: [PATCH 3719/3804] KVM: x86/mmu: Consolidate misc updates into
 shadow_mmu_init_context()

Consolidate the MMU metadata update calls to deduplicate code, and to
prep for future cleanup.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-22-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a9c968bb3eae6..28bfe18eb4165 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4507,11 +4507,6 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->nx = is_nx(vcpu);
 	context->root_level = root_level;
 
-	reset_rsvds_bits_mask(vcpu, context);
-	update_permission_bitmask(vcpu, context, false);
-	update_pkru_bitmask(vcpu, context, false);
-	update_last_nonleaf_level(vcpu, context);
-
 	MMU_WARN_ON(!is_pae(vcpu));
 	context->page_fault = paging64_page_fault;
 	context->gva_to_gpa = paging64_gva_to_gpa;
@@ -4534,12 +4529,6 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
 {
 	context->nx = false;
 	context->root_level = PT32_ROOT_LEVEL;
-
-	reset_rsvds_bits_mask(vcpu, context);
-	update_permission_bitmask(vcpu, context, false);
-	update_pkru_bitmask(vcpu, context, false);
-	update_last_nonleaf_level(vcpu, context);
-
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
 	context->sync_page = paging32_sync_page;
@@ -4703,6 +4692,12 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	else
 		paging32_init_context(vcpu, context);
 
+	if (____is_cr0_pg(regs)) {
+		reset_rsvds_bits_mask(vcpu, context);
+		update_permission_bitmask(vcpu, context, false);
+		update_pkru_bitmask(vcpu, context, false);
+		update_last_nonleaf_level(vcpu, context);
+	}
 	context->shadow_root_level = new_role.base.level;
 
 	context->mmu_role.as_u64 = new_role.as_u64;
-- 
GitLab


From cd6767c334b628cf566db56c778e67f7e6ae2845 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:07 -0700
Subject: [PATCH 3720/3804] KVM: x86/mmu: Ignore CR0 and CR4 bits in nested EPT
 MMU role

Do not incorporate CR0/CR4 bits into the role for the nested EPT MMU, as
EPT behavior is not influenced by CR0/CR4.  Note, this is the guest_mmu,
(L1's EPT), not nested_mmu (L2's IA32 paging); the nested_mmu does need
CR0/CR4, and is initialized in a separate flow.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-23-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 28bfe18eb4165..52e405555cd68 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4767,8 +4767,10 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
 	role.base.guest_mode = true;
 	role.base.access = ACC_ALL;
 
-	role.ext = kvm_calc_mmu_role_ext(vcpu);
+	/* EPT, and thus nested EPT, does not consume CR0, CR4, nor EFER. */
+	role.ext.word = 0;
 	role.ext.execonly = execonly;
+	role.ext.valid = 1;
 
 	return role;
 }
-- 
GitLab


From 8626c120baefe68d22a22d6af9a7eed0b50bee90 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:08 -0700
Subject: [PATCH 3721/3804] KVM: x86/mmu: Use MMU's role_regs, not vCPU state,
 to compute mmu_role

Use the provided role_regs to calculate the mmu_role instead of pulling
bits from current vCPU state.  For some flows, e.g. nested TDP, the vCPU
state may not be correct (or relevant).

Cc: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-24-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 92 ++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 52e405555cd68..81992ba2899fb 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4542,17 +4542,18 @@ static void paging32E_init_context(struct kvm_vcpu *vcpu,
 	paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
 }
 
-static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
+static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
+							 struct kvm_mmu_role_regs *regs)
 {
 	union kvm_mmu_extended_role ext = {0};
 
-	ext.cr0_pg = !!is_paging(vcpu);
-	ext.cr4_pae = !!is_pae(vcpu);
-	ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
-	ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
-	ext.cr4_pse = !!is_pse(vcpu);
-	ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
-	ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
+	ext.cr0_pg = ____is_cr0_pg(regs);
+	ext.cr4_pae = ____is_cr4_pae(regs);
+	ext.cr4_smep = ____is_cr4_smep(regs);
+	ext.cr4_smap = ____is_cr4_smap(regs);
+	ext.cr4_pse = ____is_cr4_pse(regs);
+	ext.cr4_pke = ____is_cr4_pke(regs);
+	ext.cr4_la57 = ____is_cr4_la57(regs);
 
 	ext.valid = 1;
 
@@ -4560,20 +4561,21 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
 }
 
 static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
+						   struct kvm_mmu_role_regs *regs,
 						   bool base_only)
 {
 	union kvm_mmu_role role = {0};
 
 	role.base.access = ACC_ALL;
-	role.base.nxe = !!is_nx(vcpu);
-	role.base.cr0_wp = is_write_protection(vcpu);
+	role.base.nxe = ____is_efer_nx(regs);
+	role.base.cr0_wp = ____is_cr0_wp(regs);
 	role.base.smm = is_smm(vcpu);
 	role.base.guest_mode = is_guest_mode(vcpu);
 
 	if (base_only)
 		return role;
 
-	role.ext = kvm_calc_mmu_role_ext(vcpu);
+	role.ext = kvm_calc_mmu_role_ext(vcpu, regs);
 
 	return role;
 }
@@ -4588,9 +4590,10 @@ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
 }
 
 static union kvm_mmu_role
-kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
+kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
+				struct kvm_mmu_role_regs *regs, bool base_only)
 {
-	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
+	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, regs, base_only);
 
 	role.base.ad_disabled = (shadow_accessed_mask == 0);
 	role.base.level = kvm_mmu_get_tdp_level(vcpu);
@@ -4603,8 +4606,9 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
 static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *context = &vcpu->arch.root_mmu;
+	struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu);
 	union kvm_mmu_role new_role =
-		kvm_calc_tdp_mmu_root_page_role(vcpu, false);
+		kvm_calc_tdp_mmu_root_page_role(vcpu, &regs, false);
 
 	if (new_role.as_u64 == context->mmu_role.as_u64)
 		return;
@@ -4648,30 +4652,30 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 }
 
 static union kvm_mmu_role
-kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu, bool base_only)
+kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu,
+				      struct kvm_mmu_role_regs *regs, bool base_only)
 {
-	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only);
+	union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, regs, base_only);
 
-	role.base.smep_andnot_wp = role.ext.cr4_smep &&
-		!is_write_protection(vcpu);
-	role.base.smap_andnot_wp = role.ext.cr4_smap &&
-		!is_write_protection(vcpu);
-	role.base.gpte_is_8_bytes = !!is_pae(vcpu);
+	role.base.smep_andnot_wp = role.ext.cr4_smep && !____is_cr0_wp(regs);
+	role.base.smap_andnot_wp = role.ext.cr4_smap && !____is_cr0_wp(regs);
+	role.base.gpte_is_8_bytes = ____is_cr4_pae(regs);
 
 	return role;
 }
 
 static union kvm_mmu_role
-kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only)
+kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu,
+				   struct kvm_mmu_role_regs *regs, bool base_only)
 {
 	union kvm_mmu_role role =
-		kvm_calc_shadow_root_page_role_common(vcpu, base_only);
+		kvm_calc_shadow_root_page_role_common(vcpu, regs, base_only);
 
-	role.base.direct = !is_paging(vcpu);
+	role.base.direct = !____is_cr0_pg(regs);
 
-	if (!is_long_mode(vcpu))
+	if (!____is_efer_lma(regs))
 		role.base.level = PT32E_ROOT_LEVEL;
-	else if (is_la57_mode(vcpu))
+	else if (____is_cr4_la57(regs))
 		role.base.level = PT64_ROOT_5LEVEL;
 	else
 		role.base.level = PT64_ROOT_4LEVEL;
@@ -4709,17 +4713,18 @@ static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu,
 {
 	struct kvm_mmu *context = &vcpu->arch.root_mmu;
 	union kvm_mmu_role new_role =
-		kvm_calc_shadow_mmu_root_page_role(vcpu, false);
+		kvm_calc_shadow_mmu_root_page_role(vcpu, regs, false);
 
 	if (new_role.as_u64 != context->mmu_role.as_u64)
 		shadow_mmu_init_context(vcpu, context, regs, new_role);
 }
 
 static union kvm_mmu_role
-kvm_calc_shadow_npt_root_page_role(struct kvm_vcpu *vcpu)
+kvm_calc_shadow_npt_root_page_role(struct kvm_vcpu *vcpu,
+				   struct kvm_mmu_role_regs *regs)
 {
 	union kvm_mmu_role role =
-		kvm_calc_shadow_root_page_role_common(vcpu, false);
+		kvm_calc_shadow_root_page_role_common(vcpu, regs, false);
 
 	role.base.direct = false;
 	role.base.level = kvm_mmu_get_tdp_level(vcpu);
@@ -4736,7 +4741,9 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
 		.cr4 = cr4,
 		.efer = efer,
 	};
-	union kvm_mmu_role new_role = kvm_calc_shadow_npt_root_page_role(vcpu);
+	union kvm_mmu_role new_role;
+
+	new_role = kvm_calc_shadow_npt_root_page_role(vcpu, &regs);
 
 	__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base);
 
@@ -4821,9 +4828,12 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
 	context->inject_page_fault = kvm_inject_page_fault;
 }
 
-static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
+static union kvm_mmu_role
+kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu, struct kvm_mmu_role_regs *regs)
 {
-	union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false);
+	union kvm_mmu_role role;
+
+	role = kvm_calc_shadow_root_page_role_common(vcpu, regs, false);
 
 	/*
 	 * Nested MMUs are used only for walking L2's gva->gpa, they never have
@@ -4832,12 +4842,12 @@ static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
 	 */
 	role.base.direct = true;
 
-	if (!is_paging(vcpu))
+	if (!____is_cr0_pg(regs))
 		role.base.level = 0;
-	else if (is_long_mode(vcpu))
-		role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL :
-						       PT64_ROOT_4LEVEL;
-	else if (is_pae(vcpu))
+	else if (____is_efer_lma(regs))
+		role.base.level = ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL :
+							  PT64_ROOT_4LEVEL;
+	else if (____is_cr4_pae(regs))
 		role.base.level = PT32E_ROOT_LEVEL;
 	else
 		role.base.level = PT32_ROOT_LEVEL;
@@ -4847,7 +4857,8 @@ static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
 
 static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 {
-	union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu);
+	struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu);
+	union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu, &regs);
 	struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
 
 	if (new_role.as_u64 == g_context->mmu_role.as_u64)
@@ -4913,12 +4924,13 @@ EXPORT_SYMBOL_GPL(kvm_init_mmu);
 static union kvm_mmu_page_role
 kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
 {
+	struct kvm_mmu_role_regs regs = vcpu_to_role_regs(vcpu);
 	union kvm_mmu_role role;
 
 	if (tdp_enabled)
-		role = kvm_calc_tdp_mmu_root_page_role(vcpu, true);
+		role = kvm_calc_tdp_mmu_root_page_role(vcpu, &regs, true);
 	else
-		role = kvm_calc_shadow_mmu_root_page_role(vcpu, true);
+		role = kvm_calc_shadow_mmu_root_page_role(vcpu, &regs, true);
 
 	return role.base;
 }
-- 
GitLab


From 167f8a5cae99fb2050d3d674ca84457a526e23dd Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:09 -0700
Subject: [PATCH 3722/3804] KVM: x86/mmu: Rename "nxe" role bit to "efer_nx"
 for macro shenanigans

Rename "nxe" to "efer_nx" so that future macro magic can use the pattern
<reg>_<bit> for all CR0, CR4, and EFER bits that included in the role.
Using "efer_nx" also makes it clear that the role bit reflects EFER.NX,
not the NX bit in the corresponding PTE.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-25-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/mmu.rst            | 4 ++--
 arch/x86/include/asm/kvm_host.h           | 4 ++--
 arch/x86/kvm/mmu/mmu.c                    | 2 +-
 arch/x86/kvm/mmu/mmutrace.h               | 2 +-
 tools/lib/traceevent/plugins/plugin_kvm.c | 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/virt/kvm/mmu.rst b/Documentation/virt/kvm/mmu.rst
index ddbb23998742c..f60f5488e1219 100644
--- a/Documentation/virt/kvm/mmu.rst
+++ b/Documentation/virt/kvm/mmu.rst
@@ -180,8 +180,8 @@ Shadow pages contain the following information:
   role.gpte_is_8_bytes:
     Reflects the size of the guest PTE for which the page is valid, i.e. '1'
     if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
-  role.nxe:
-    Contains the value of efer.nxe for which the page is valid.
+  role.efer_nx:
+    Contains the value of efer.nx for which the page is valid.
   role.cr0_wp:
     Contains the value of cr0.wp for which the page is valid.
   role.smep_andnot_wp:
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 250915da1681d..520140eed4239 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -274,7 +274,7 @@ struct kvm_kernel_irq_routing_entry;
  * by indirect shadow page can not be more than 15 bits.
  *
  * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access,
- * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
+ * @efer_nx, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
  */
 union kvm_mmu_page_role {
 	u32 word;
@@ -285,7 +285,7 @@ union kvm_mmu_page_role {
 		unsigned direct:1;
 		unsigned access:3;
 		unsigned invalid:1;
-		unsigned nxe:1;
+		unsigned efer_nx:1;
 		unsigned cr0_wp:1;
 		unsigned smep_andnot_wp:1;
 		unsigned smap_andnot_wp:1;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 81992ba2899fb..25f23de89cdff 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4567,7 +4567,7 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
 	union kvm_mmu_role role = {0};
 
 	role.base.access = ACC_ALL;
-	role.base.nxe = ____is_efer_nx(regs);
+	role.base.efer_nx = ____is_efer_nx(regs);
 	role.base.cr0_wp = ____is_cr0_wp(regs);
 	role.base.smm = is_smm(vcpu);
 	role.base.guest_mode = is_guest_mode(vcpu);
diff --git a/arch/x86/kvm/mmu/mmutrace.h b/arch/x86/kvm/mmu/mmutrace.h
index e798489b56b55..efbad33a06457 100644
--- a/arch/x86/kvm/mmu/mmutrace.h
+++ b/arch/x86/kvm/mmu/mmutrace.h
@@ -40,7 +40,7 @@
 			 role.direct ? " direct" : "",			\
 			 access_str[role.access],			\
 			 role.invalid ? " invalid" : "",		\
-			 role.nxe ? "" : "!",				\
+			 role.efer_nx ? "" : "!",			\
 			 role.ad_disabled ? "!" : "",			\
 			 __entry->root_count,				\
 			 __entry->unsync ? "unsync" : "sync", 0);	\
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
index 51ceeb9147eb6..9ce7b4b68e3fe 100644
--- a/tools/lib/traceevent/plugins/plugin_kvm.c
+++ b/tools/lib/traceevent/plugins/plugin_kvm.c
@@ -366,7 +366,7 @@ union kvm_mmu_page_role {
 		unsigned direct:1;
 		unsigned access:3;
 		unsigned invalid:1;
-		unsigned nxe:1;
+		unsigned efer_nx:1;
 		unsigned cr0_wp:1;
 		unsigned smep_and_not_wp:1;
 		unsigned smap_and_not_wp:1;
@@ -403,7 +403,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
 				 access_str[role.access],
 				 role.invalid ? " invalid" : "",
 				 role.cr4_pae ? "" : "!",
-				 role.nxe ? "" : "!",
+				 role.efer_nx ? "" : "!",
 				 role.cr0_wp ? "" : "!",
 				 role.smep_and_not_wp ? " smep" : "",
 				 role.smap_and_not_wp ? " smap" : "",
-- 
GitLab


From 6066772455f21ce1e90f003243c9864091621773 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:10 -0700
Subject: [PATCH 3723/3804] KVM: x86/mmu: Add accessors to query mmu_role bits

Add accessors via a builder macro for all mmu_role bits that track a CR0,
CR4, or EFER bit, abstracting whether the bits are in the base or the
extended role.

Future commits will switch to using mmu_role instead of vCPU state to
configure the MMU, i.e. there are about to be a large number of users.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-26-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c         | 21 +++++++++++++++++++++
 arch/x86/kvm/mmu/paging_tmpl.h |  2 +-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 25f23de89cdff..1e5beac6920f9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -206,6 +206,27 @@ BUILD_MMU_ROLE_REGS_ACCESSOR(cr4, la57, X86_CR4_LA57);
 BUILD_MMU_ROLE_REGS_ACCESSOR(efer, nx, EFER_NX);
 BUILD_MMU_ROLE_REGS_ACCESSOR(efer, lma, EFER_LMA);
 
+/*
+ * The MMU itself (with a valid role) is the single source of truth for the
+ * MMU.  Do not use the regs used to build the MMU/role, nor the vCPU.  The
+ * regs don't account for dependencies, e.g. clearing CR4 bits if CR0.PG=1,
+ * and the vCPU may be incorrect/irrelevant.
+ */
+#define BUILD_MMU_ROLE_ACCESSOR(base_or_ext, reg, name)		\
+static inline bool is_##reg##_##name(struct kvm_mmu *mmu)	\
+{								\
+	return !!(mmu->mmu_role. base_or_ext . reg##_##name);	\
+}
+BUILD_MMU_ROLE_ACCESSOR(ext,  cr0, pg);
+BUILD_MMU_ROLE_ACCESSOR(base, cr0, wp);
+BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, pse);
+BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, pae);
+BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, smep);
+BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, smap);
+BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, pke);
+BUILD_MMU_ROLE_ACCESSOR(ext,  cr4, la57);
+BUILD_MMU_ROLE_ACCESSOR(base, efer, nx);
+
 static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu_role_regs regs = {
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index b632606a87d60..5cf36eb96ee24 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -471,7 +471,7 @@ retry_walk:
 
 error:
 	errcode |= write_fault | user_fault;
-	if (fetch_fault && (mmu->nx || mmu->mmu_role.ext.cr4_smep))
+	if (fetch_fault && (mmu->nx || is_cr4_smep(mmu)))
 		errcode |= PFERR_FETCH_MASK;
 
 	walker->fault.vector = PF_VECTOR;
-- 
GitLab


From ca8d664f509932eb316a4ae3926176be745e3b3d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:11 -0700
Subject: [PATCH 3724/3804] KVM: x86/mmu: Do not set paging-related bits in MMU
 role if CR0.PG=0

Don't set CR0/CR4/EFER bits in the MMU role if paging is disabled, paging
modifiers are irrelevant if there is no paging in the first place.
Somewhat arbitrarily clear gpte_is_8_bytes for shadow paging if paging is
disabled in the guest.  Again, there are no guest PTEs to process, so the
size is meaningless.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-27-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 1e5beac6920f9..b109ea16d39eb 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4568,13 +4568,15 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
 {
 	union kvm_mmu_extended_role ext = {0};
 
-	ext.cr0_pg = ____is_cr0_pg(regs);
-	ext.cr4_pae = ____is_cr4_pae(regs);
-	ext.cr4_smep = ____is_cr4_smep(regs);
-	ext.cr4_smap = ____is_cr4_smap(regs);
-	ext.cr4_pse = ____is_cr4_pse(regs);
-	ext.cr4_pke = ____is_cr4_pke(regs);
-	ext.cr4_la57 = ____is_cr4_la57(regs);
+	if (____is_cr0_pg(regs)) {
+		ext.cr0_pg = 1;
+		ext.cr4_pae = ____is_cr4_pae(regs);
+		ext.cr4_smep = ____is_cr4_smep(regs);
+		ext.cr4_smap = ____is_cr4_smap(regs);
+		ext.cr4_pse = ____is_cr4_pse(regs);
+		ext.cr4_pke = ____is_cr4_pke(regs);
+		ext.cr4_la57 = ____is_cr4_la57(regs);
+	}
 
 	ext.valid = 1;
 
@@ -4588,8 +4590,10 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu,
 	union kvm_mmu_role role = {0};
 
 	role.base.access = ACC_ALL;
-	role.base.efer_nx = ____is_efer_nx(regs);
-	role.base.cr0_wp = ____is_cr0_wp(regs);
+	if (____is_cr0_pg(regs)) {
+		role.base.efer_nx = ____is_efer_nx(regs);
+		role.base.cr0_wp = ____is_cr0_wp(regs);
+	}
 	role.base.smm = is_smm(vcpu);
 	role.base.guest_mode = is_guest_mode(vcpu);
 
@@ -4680,7 +4684,7 @@ kvm_calc_shadow_root_page_role_common(struct kvm_vcpu *vcpu,
 
 	role.base.smep_andnot_wp = role.ext.cr4_smep && !____is_cr0_wp(regs);
 	role.base.smap_andnot_wp = role.ext.cr4_smap && !____is_cr0_wp(regs);
-	role.base.gpte_is_8_bytes = ____is_cr4_pae(regs);
+	role.base.gpte_is_8_bytes = ____is_cr0_pg(regs) && ____is_cr4_pae(regs);
 
 	return role;
 }
-- 
GitLab


From 84c679f5f52c7a98c9f0986ff89d50dc073b97f3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:12 -0700
Subject: [PATCH 3725/3804] KVM: x86/mmu: Set CR4.PKE/LA57 in MMU role iff long
 mode is active

Don't set cr4_pke or cr4_la57 in the MMU role if long mode isn't active,
which is required for protection keys and 5-level paging to be fully
enabled.  Ignoring the bit avoids unnecessary reconfiguration on reuse,
and also means consumers of mmu_role don't need to manually check for
long mode.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-28-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b109ea16d39eb..eb80d8a4beade 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4574,8 +4574,10 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
 		ext.cr4_smep = ____is_cr4_smep(regs);
 		ext.cr4_smap = ____is_cr4_smap(regs);
 		ext.cr4_pse = ____is_cr4_pse(regs);
-		ext.cr4_pke = ____is_cr4_pke(regs);
-		ext.cr4_la57 = ____is_cr4_la57(regs);
+
+		/* PKEY and LA57 are active iff long mode is active. */
+		ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
+		ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
 	}
 
 	ext.valid = 1;
-- 
GitLab


From 18db1b1790a899880dc4afdb9ac6c82c91080d66 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:13 -0700
Subject: [PATCH 3726/3804] KVM: x86/mmu: Always set new mmu_role immediately
 after checking old role

Refactor shadow MMU initialization to immediately set its new mmu_role
after verifying it differs from the old role, and so that all flavors
of MMU initialization share the same check-and-set pattern.  Immediately
setting the role will allow future commits to use mmu_role to configure
the MMU without consuming stale state.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-29-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index eb80d8a4beade..f5a55c97284c7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4714,6 +4714,11 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 				    struct kvm_mmu_role_regs *regs,
 				    union kvm_mmu_role new_role)
 {
+	if (new_role.as_u64 == context->mmu_role.as_u64)
+		return;
+
+	context->mmu_role.as_u64 = new_role.as_u64;
+
 	if (!____is_cr0_pg(regs))
 		nonpaging_init_context(vcpu, context);
 	else if (____is_efer_lma(regs))
@@ -4731,7 +4736,6 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	}
 	context->shadow_root_level = new_role.base.level;
 
-	context->mmu_role.as_u64 = new_role.as_u64;
 	reset_shadow_zero_bits_mask(vcpu, context);
 }
 
@@ -4742,8 +4746,7 @@ static void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu,
 	union kvm_mmu_role new_role =
 		kvm_calc_shadow_mmu_root_page_role(vcpu, regs, false);
 
-	if (new_role.as_u64 != context->mmu_role.as_u64)
-		shadow_mmu_init_context(vcpu, context, regs, new_role);
+	shadow_mmu_init_context(vcpu, context, regs, new_role);
 }
 
 static union kvm_mmu_role
@@ -4774,8 +4777,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
 
 	__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base);
 
-	if (new_role.as_u64 != context->mmu_role.as_u64)
-		shadow_mmu_init_context(vcpu, context, &regs, new_role);
+	shadow_mmu_init_context(vcpu, context, &regs, new_role);
 
 	/*
 	 * Redo the shadow bits, the reset done by shadow_mmu_init_context()
@@ -4823,6 +4825,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	if (new_role.as_u64 == context->mmu_role.as_u64)
 		return;
 
+	context->mmu_role.as_u64 = new_role.as_u64;
+
 	context->shadow_root_level = level;
 
 	context->nx = true;
@@ -4833,7 +4837,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->invlpg = ept_invlpg;
 	context->root_level = level;
 	context->direct_map = false;
-	context->mmu_role.as_u64 = new_role.as_u64;
 
 	update_permission_bitmask(vcpu, context, true);
 	update_pkru_bitmask(vcpu, context, true);
-- 
GitLab


From 8c985b2d8e682edac84bde63cef660cc574f795e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:14 -0700
Subject: [PATCH 3727/3804] KVM: x86/mmu: Don't grab CR4.PSE for calculating
 shadow reserved bits

Unconditionally pass pse=false when calculating reserved bits for shadow
PTEs.  CR4.PSE is only relevant for 32-bit non-PAE paging, which KVM does
not use for shadow paging (including nested NPT).

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-30-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f5a55c97284c7..d017352d76c8d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4281,19 +4281,22 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 	 * MMU contexts.  Note, KVM forces EFER.NX=1 when TDP is disabled.
 	 */
 	bool uses_nx = context->nx || !tdp_enabled;
+
+	/* @amd adds a check on bit of SPTEs, which KVM shouldn't use anyways. */
+	bool is_amd = true;
+	/* KVM doesn't use 2-level page tables for the shadow MMU. */
+	bool is_pse = false;
 	struct rsvd_bits_validate *shadow_zero_check;
 	int i;
 
-	/*
-	 * Passing "true" to the last argument is okay; it adds a check
-	 * on bit 8 of the SPTEs which KVM doesn't use anyway.
-	 */
+	WARN_ON_ONCE(context->shadow_root_level < PT32E_ROOT_LEVEL);
+
 	shadow_zero_check = &context->shadow_zero_check;
 	__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
 				reserved_hpa_bits(),
 				context->shadow_root_level, uses_nx,
 				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
-				is_pse(vcpu), true);
+				is_pse, is_amd);
 
 	if (!shadow_me_mask)
 		return;
@@ -4329,7 +4332,7 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 					reserved_hpa_bits(),
 					context->shadow_root_level, false,
 					boot_cpu_has(X86_FEATURE_GBPAGES),
-					true, true);
+					false, true);
 	else
 		__reset_rsvds_bits_mask_ept(shadow_zero_check,
 					    reserved_hpa_bits(), false);
-- 
GitLab


From 4e9c0d80dbbd2dd411d726ed10eccaaba6d63a08 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:15 -0700
Subject: [PATCH 3728/3804] KVM: x86/mmu: Use MMU's role to get CR4.PSE for
 computing rsvd bits

Use the MMU's role to get CR4.PSE when calculating reserved bits for the
guest's PTEs.  Practically speaking, this is a glorified nop as the role
always come from vCPU state for the relevant flows, but converting to
the roles will provide consistency once everything else is converted, and
will Just Work if the "always comes from vCPU" behavior were ever to
change (unlikely).

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-31-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d017352d76c8d..3a8af50e15100 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4216,7 +4216,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 				vcpu->arch.reserved_gpa_bits,
 				context->root_level, context->nx,
 				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
-				is_pse(vcpu),
+				is_cr4_pse(context),
 				guest_cpuid_is_amd_or_hygon(vcpu));
 }
 
-- 
GitLab


From b705a277b7059673c93e7ada01cc446dfae3e85a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:16 -0700
Subject: [PATCH 3729/3804] KVM: x86/mmu: Drop vCPU param from reserved bits
 calculator

Drop the vCPU param from __reset_rsvds_bits_mask() as it's now unused,
and ideally will remain unused in the future.  Any information that's
needed by the low level helper should be explicitly provided as it's used
for both shadow/host MMUs and guest MMUs, i.e. vCPU state may be
meaningless or simply wrong.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-32-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3a8af50e15100..7651f9cbd12e2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4119,8 +4119,7 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu,
 #undef PTTYPE
 
 static void
-__reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
-			struct rsvd_bits_validate *rsvd_check,
+__reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
 			u64 pa_bits_rsvd, int level, bool nx, bool gbpages,
 			bool pse, bool amd)
 {
@@ -4212,7 +4211,7 @@ __reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu *context)
 {
-	__reset_rsvds_bits_mask(vcpu, &context->guest_rsvd_check,
+	__reset_rsvds_bits_mask(&context->guest_rsvd_check,
 				vcpu->arch.reserved_gpa_bits,
 				context->root_level, context->nx,
 				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
@@ -4292,8 +4291,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 	WARN_ON_ONCE(context->shadow_root_level < PT32E_ROOT_LEVEL);
 
 	shadow_zero_check = &context->shadow_zero_check;
-	__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
-				reserved_hpa_bits(),
+	__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
 				context->shadow_root_level, uses_nx,
 				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
 				is_pse, is_amd);
@@ -4328,8 +4326,7 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 	shadow_zero_check = &context->shadow_zero_check;
 
 	if (boot_cpu_is_amd())
-		__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
-					reserved_hpa_bits(),
+		__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
 					context->shadow_root_level, false,
 					boot_cpu_has(X86_FEATURE_GBPAGES),
 					false, true);
-- 
GitLab


From c596f1470ab7adb9ba6edf301b1f8f29dcefb55f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:17 -0700
Subject: [PATCH 3730/3804] KVM: x86/mmu: Use MMU's role to compute permission
 bitmask

Use the MMU's role to generate the permission bitmasks for the MMU.
For some flows, the vCPU state may not be correct (or relevant), e.g.
the nested NPT MMU can be initialized with incoherent vCPU state.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-33-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 7651f9cbd12e2..d1d25dd9ca911 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4365,8 +4365,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 	 (7 & (access) ? 128 : 0))
 
 
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
-				      struct kvm_mmu *mmu, bool ept)
+static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
 {
 	unsigned byte;
 
@@ -4374,9 +4373,9 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
 	const u8 w = BYTE_MASK(ACC_WRITE_MASK);
 	const u8 u = BYTE_MASK(ACC_USER_MASK);
 
-	bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
-	bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
-	bool cr0_wp = is_write_protection(vcpu);
+	bool cr4_smep = is_cr4_smep(mmu);
+	bool cr4_smap = is_cr4_smap(mmu);
+	bool cr0_wp = is_cr0_wp(mmu);
 
 	for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
 		unsigned pfec = byte << 1;
@@ -4672,7 +4671,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = paging32_gva_to_gpa;
 	}
 
-	update_permission_bitmask(vcpu, context, false);
+	update_permission_bitmask(context, false);
 	update_pkru_bitmask(vcpu, context, false);
 	update_last_nonleaf_level(vcpu, context);
 	reset_tdp_shadow_zero_bits_mask(vcpu, context);
@@ -4730,7 +4729,7 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 
 	if (____is_cr0_pg(regs)) {
 		reset_rsvds_bits_mask(vcpu, context);
-		update_permission_bitmask(vcpu, context, false);
+		update_permission_bitmask(context, false);
 		update_pkru_bitmask(vcpu, context, false);
 		update_last_nonleaf_level(vcpu, context);
 	}
@@ -4838,7 +4837,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->root_level = level;
 	context->direct_map = false;
 
-	update_permission_bitmask(vcpu, context, true);
+	update_permission_bitmask(context, true);
 	update_pkru_bitmask(vcpu, context, true);
 	update_last_nonleaf_level(vcpu, context);
 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
@@ -4935,7 +4934,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
 	}
 
-	update_permission_bitmask(vcpu, g_context, false);
+	update_permission_bitmask(g_context, false);
 	update_pkru_bitmask(vcpu, g_context, false);
 	update_last_nonleaf_level(vcpu, g_context);
 }
-- 
GitLab


From 2e4c06618d4024f760ba6dfab0978533bd00d03e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:18 -0700
Subject: [PATCH 3731/3804] KVM: x86/mmu: Use MMU's role to compute PKRU
 bitmask

Use the MMU's role to calculate the Protection Keys (Restrict Userspace)
bitmask instead of pulling bits from current vCPU state.  For some flows,
the vCPU state may not be correct (or relevant), e.g. EPT doesn't
interact with PKRU.  Case in point, the "ept" param simply disappears.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-34-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d1d25dd9ca911..4a6c1848d39f4 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4460,24 +4460,17 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
 * away both AD and WD.  For all reads or if the last condition holds, WD
 * only will be masked away.
 */
-static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-				bool ept)
+static void update_pkru_bitmask(struct kvm_mmu *mmu)
 {
 	unsigned bit;
 	bool wp;
 
-	if (ept) {
+	if (!is_cr4_pke(mmu)) {
 		mmu->pkru_mask = 0;
 		return;
 	}
 
-	/* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
-	if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
-		mmu->pkru_mask = 0;
-		return;
-	}
-
-	wp = is_write_protection(vcpu);
+	wp = is_cr0_wp(mmu);
 
 	for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
 		unsigned pfec, pkey_bits;
@@ -4672,7 +4665,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	}
 
 	update_permission_bitmask(context, false);
-	update_pkru_bitmask(vcpu, context, false);
+	update_pkru_bitmask(context);
 	update_last_nonleaf_level(vcpu, context);
 	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
@@ -4730,7 +4723,7 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	if (____is_cr0_pg(regs)) {
 		reset_rsvds_bits_mask(vcpu, context);
 		update_permission_bitmask(context, false);
-		update_pkru_bitmask(vcpu, context, false);
+		update_pkru_bitmask(context);
 		update_last_nonleaf_level(vcpu, context);
 	}
 	context->shadow_root_level = new_role.base.level;
@@ -4838,8 +4831,8 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->direct_map = false;
 
 	update_permission_bitmask(context, true);
-	update_pkru_bitmask(vcpu, context, true);
 	update_last_nonleaf_level(vcpu, context);
+	update_pkru_bitmask(context);
 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
 	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
 }
@@ -4935,7 +4928,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	}
 
 	update_permission_bitmask(g_context, false);
-	update_pkru_bitmask(vcpu, g_context, false);
+	update_pkru_bitmask(g_context);
 	update_last_nonleaf_level(vcpu, g_context);
 }
 
-- 
GitLab


From b67a93a87e1f9281a1d9f4a28052fed49b4591f1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:19 -0700
Subject: [PATCH 3732/3804] KVM: x86/mmu: Use MMU's roles to compute last
 non-leaf level

Use the MMU's role to get CR4.PSE when determining the last level at
which the guest _cannot_ create a non-leaf PTE, i.e. cannot create a
huge page.

Note, the existing logic is arguably wrong when considering 5-level
paging and the case where 1gb pages aren't supported.  In practice, the
logic is confusing but not broken, because except for 32-bit non-PAE
paging, bit 7 (_PAGE_PSE) bit is reserved when a huge page isn't supported at
that level.  I.e. setting bit 7 will terminate the guest walk one way or
another.  Furthermore, last_nonleaf_level is only consulted after KVM has
verified there are no reserved bits set.

All that confusion will be addressed in a future patch by dropping
last_nonleaf_level entirely.  For now, massage the code to continue the
march toward using mmu_role for (almost) all MMU computations.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-35-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4a6c1848d39f4..0ca2f9bd82846 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4504,12 +4504,12 @@ static void update_pkru_bitmask(struct kvm_mmu *mmu)
 	}
 }
 
-static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+static void update_last_nonleaf_level(struct kvm_mmu *mmu)
 {
 	unsigned root_level = mmu->root_level;
 
 	mmu->last_nonleaf_level = root_level;
-	if (root_level == PT32_ROOT_LEVEL && is_pse(vcpu))
+	if (root_level == PT32_ROOT_LEVEL && is_cr4_pse(mmu))
 		mmu->last_nonleaf_level++;
 }
 
@@ -4666,7 +4666,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
 	update_permission_bitmask(context, false);
 	update_pkru_bitmask(context);
-	update_last_nonleaf_level(vcpu, context);
+	update_last_nonleaf_level(context);
 	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
@@ -4724,7 +4724,7 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 		reset_rsvds_bits_mask(vcpu, context);
 		update_permission_bitmask(context, false);
 		update_pkru_bitmask(context);
-		update_last_nonleaf_level(vcpu, context);
+		update_last_nonleaf_level(context);
 	}
 	context->shadow_root_level = new_role.base.level;
 
@@ -4831,7 +4831,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->direct_map = false;
 
 	update_permission_bitmask(context, true);
-	update_last_nonleaf_level(vcpu, context);
+	update_last_nonleaf_level(context);
 	update_pkru_bitmask(context);
 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
 	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
@@ -4929,7 +4929,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 
 	update_permission_bitmask(g_context, false);
 	update_pkru_bitmask(g_context);
-	update_last_nonleaf_level(vcpu, g_context);
+	update_last_nonleaf_level(g_context);
 }
 
 void kvm_init_mmu(struct kvm_vcpu *vcpu)
-- 
GitLab


From cd628f0f1e1ce0709c2c6bc852b1a3abf9638b26 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:20 -0700
Subject: [PATCH 3733/3804] KVM: x86/mmu: Use MMU's role to detect EFER.NX in
 guest page walk

Use the NX bit from the MMU's role instead of the MMU itself so that the
redundant, dedicated "nx" flag can be dropped.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-36-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 5cf36eb96ee24..c92e712607b60 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -471,7 +471,7 @@ retry_walk:
 
 error:
 	errcode |= write_fault | user_fault;
-	if (fetch_fault && (mmu->nx || is_cr4_smep(mmu)))
+	if (fetch_fault && (is_efer_nx(mmu) || is_cr4_smep(mmu)))
 		errcode |= PFERR_FETCH_MASK;
 
 	walker->fault.vector = PF_VECTOR;
-- 
GitLab


From 84a16226046d1c9339a9be3f2b76ea2dc5677f02 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:21 -0700
Subject: [PATCH 3734/3804] KVM: x86/mmu: Use MMU's role/role_regs to compute
 context's metadata

Use the MMU's role and role_regs to calculate the MMU's guest root level
and NX bit.  For some flows, the vCPU state may not be correct (or
relevant), e.g. EPT doesn't interact with EFER.NX and nested NPT will
configure the guest_mmu with possibly-stale vCPU state.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-37-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 36 ++++++++++++++++--------------------
 1 file changed, 16 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0ca2f9bd82846..9c3bfc5cb5272 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3948,8 +3948,7 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
 				 max_level, true);
 }
 
-static void nonpaging_init_context(struct kvm_vcpu *vcpu,
-				   struct kvm_mmu *context)
+static void nonpaging_init_context(struct kvm_mmu *context)
 {
 	context->page_fault = nonpaging_page_fault;
 	context->gva_to_gpa = nonpaging_gva_to_gpa;
@@ -4513,14 +4512,13 @@ static void update_last_nonleaf_level(struct kvm_mmu *mmu)
 		mmu->last_nonleaf_level++;
 }
 
-static void paging64_init_context_common(struct kvm_vcpu *vcpu,
-					 struct kvm_mmu *context,
+static void paging64_init_context_common(struct kvm_mmu *context,
 					 int root_level)
 {
-	context->nx = is_nx(vcpu);
+	context->nx = is_efer_nx(context);
 	context->root_level = root_level;
 
-	MMU_WARN_ON(!is_pae(vcpu));
+	WARN_ON_ONCE(!is_cr4_pae(context));
 	context->page_fault = paging64_page_fault;
 	context->gva_to_gpa = paging64_gva_to_gpa;
 	context->sync_page = paging64_sync_page;
@@ -4528,17 +4526,16 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->direct_map = false;
 }
 
-static void paging64_init_context(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu *context)
+static void paging64_init_context(struct kvm_mmu *context,
+				  struct kvm_mmu_role_regs *regs)
 {
-	int root_level = is_la57_mode(vcpu) ?
-			 PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
+	int root_level = ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL :
+						 PT64_ROOT_4LEVEL;
 
-	paging64_init_context_common(vcpu, context, root_level);
+	paging64_init_context_common(context, root_level);
 }
 
-static void paging32_init_context(struct kvm_vcpu *vcpu,
-				  struct kvm_mmu *context)
+static void paging32_init_context(struct kvm_mmu *context)
 {
 	context->nx = false;
 	context->root_level = PT32_ROOT_LEVEL;
@@ -4549,10 +4546,9 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
 	context->direct_map = false;
 }
 
-static void paging32E_init_context(struct kvm_vcpu *vcpu,
-				   struct kvm_mmu *context)
+static void paging32E_init_context(struct kvm_mmu *context)
 {
-	paging64_init_context_common(vcpu, context, PT32E_ROOT_LEVEL);
+	paging64_init_context_common(context, PT32E_ROOT_LEVEL);
 }
 
 static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
@@ -4712,13 +4708,13 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	context->mmu_role.as_u64 = new_role.as_u64;
 
 	if (!____is_cr0_pg(regs))
-		nonpaging_init_context(vcpu, context);
+		nonpaging_init_context(context);
 	else if (____is_efer_lma(regs))
-		paging64_init_context(vcpu, context);
+		paging64_init_context(context, regs);
 	else if (____is_cr4_pae(regs))
-		paging32E_init_context(vcpu, context);
+		paging32E_init_context(context);
 	else
-		paging32_init_context(vcpu, context);
+		paging32_init_context(context);
 
 	if (____is_cr0_pg(regs)) {
 		reset_rsvds_bits_mask(vcpu, context);
-- 
GitLab


From 90599c280123618049af5cf375aae5b4e73bec03 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:22 -0700
Subject: [PATCH 3735/3804] KVM: x86/mmu: Use MMU's role to get EFER.NX during
 MMU configuration

Get the MMU's effective EFER.NX from its role instead of using the
one-off, dedicated flag.  This will allow dropping said flag in a
future commit.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-38-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 9c3bfc5cb5272..5eaab1b732049 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4212,7 +4212,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 {
 	__reset_rsvds_bits_mask(&context->guest_rsvd_check,
 				vcpu->arch.reserved_gpa_bits,
-				context->root_level, context->nx,
+				context->root_level, is_efer_nx(context),
 				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
 				is_cr4_pse(context),
 				guest_cpuid_is_amd_or_hygon(vcpu));
@@ -4278,7 +4278,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 	 * NX can be used by any non-nested shadow MMU to avoid having to reset
 	 * MMU contexts.  Note, KVM forces EFER.NX=1 when TDP is disabled.
 	 */
-	bool uses_nx = context->nx || !tdp_enabled;
+	bool uses_nx = is_efer_nx(context) || !tdp_enabled;
 
 	/* @amd adds a check on bit of SPTEs, which KVM shouldn't use anyways. */
 	bool is_amd = true;
@@ -4375,6 +4375,7 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
 	bool cr4_smep = is_cr4_smep(mmu);
 	bool cr4_smap = is_cr4_smap(mmu);
 	bool cr0_wp = is_cr0_wp(mmu);
+	bool efer_nx = is_efer_nx(mmu);
 
 	for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
 		unsigned pfec = byte << 1;
@@ -4400,7 +4401,7 @@ static void update_permission_bitmask(struct kvm_mmu *mmu, bool ept)
 			u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
 
 			/* Not really needed: !nx will cause pte.nx to fault */
-			if (!mmu->nx)
+			if (!efer_nx)
 				ff = 0;
 
 			/* Allow supervisor writes if !cr0.wp */
-- 
GitLab


From a4c93252fed1517362d2ce43c6a5fd50a1152ed6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:23 -0700
Subject: [PATCH 3736/3804] KVM: x86/mmu: Drop "nx" from MMU context now that
 there are no readers

Drop kvm_mmu.nx as there no consumers left.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-39-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 --
 arch/x86/kvm/mmu/mmu.c          | 17 -----------------
 2 files changed, 19 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 520140eed4239..3f4f6ad7405bf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -423,8 +423,6 @@ struct kvm_mmu {
 	/* Can have large pages at levels 2..last_nonleaf_level-1. */
 	u8 last_nonleaf_level;
 
-	bool nx;
-
 	u64 pdptrs[4]; /* pae */
 };
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5eaab1b732049..91b27538328fe 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -322,11 +322,6 @@ static int is_cpuid_PSE36(void)
 	return 1;
 }
 
-static int is_nx(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.efer & EFER_NX;
-}
-
 static gfn_t pse36_gfn_delta(u32 gpte)
 {
 	int shift = 32 - PT32_DIR_PSE36_SHIFT - PAGE_SHIFT;
@@ -3956,7 +3951,6 @@ static void nonpaging_init_context(struct kvm_mmu *context)
 	context->invlpg = NULL;
 	context->root_level = 0;
 	context->direct_map = true;
-	context->nx = false;
 }
 
 static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
@@ -4516,7 +4510,6 @@ static void update_last_nonleaf_level(struct kvm_mmu *mmu)
 static void paging64_init_context_common(struct kvm_mmu *context,
 					 int root_level)
 {
-	context->nx = is_efer_nx(context);
 	context->root_level = root_level;
 
 	WARN_ON_ONCE(!is_cr4_pae(context));
@@ -4538,7 +4531,6 @@ static void paging64_init_context(struct kvm_mmu *context,
 
 static void paging32_init_context(struct kvm_mmu *context)
 {
-	context->nx = false;
 	context->root_level = PT32_ROOT_LEVEL;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
@@ -4640,22 +4632,18 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->inject_page_fault = kvm_inject_page_fault;
 
 	if (!is_paging(vcpu)) {
-		context->nx = false;
 		context->gva_to_gpa = nonpaging_gva_to_gpa;
 		context->root_level = 0;
 	} else if (is_long_mode(vcpu)) {
-		context->nx = is_nx(vcpu);
 		context->root_level = is_la57_mode(vcpu) ?
 				PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
 		reset_rsvds_bits_mask(vcpu, context);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 	} else if (is_pae(vcpu)) {
-		context->nx = is_nx(vcpu);
 		context->root_level = PT32E_ROOT_LEVEL;
 		reset_rsvds_bits_mask(vcpu, context);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 	} else {
-		context->nx = false;
 		context->root_level = PT32_ROOT_LEVEL;
 		reset_rsvds_bits_mask(vcpu, context);
 		context->gva_to_gpa = paging32_gva_to_gpa;
@@ -4818,7 +4806,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 
 	context->shadow_root_level = level;
 
-	context->nx = true;
 	context->ept_ad = accessed_dirty;
 	context->page_fault = ept_page_fault;
 	context->gva_to_gpa = ept_gva_to_gpa;
@@ -4903,22 +4890,18 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	 * the gva_to_gpa functions between mmu and nested_mmu are swapped.
 	 */
 	if (!is_paging(vcpu)) {
-		g_context->nx = false;
 		g_context->root_level = 0;
 		g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
 	} else if (is_long_mode(vcpu)) {
-		g_context->nx = is_nx(vcpu);
 		g_context->root_level = is_la57_mode(vcpu) ?
 					PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
 		reset_rsvds_bits_mask(vcpu, g_context);
 		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
 	} else if (is_pae(vcpu)) {
-		g_context->nx = is_nx(vcpu);
 		g_context->root_level = PT32E_ROOT_LEVEL;
 		reset_rsvds_bits_mask(vcpu, g_context);
 		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
 	} else {
-		g_context->nx = false;
 		g_context->root_level = PT32_ROOT_LEVEL;
 		reset_rsvds_bits_mask(vcpu, g_context);
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
-- 
GitLab


From 5472fcd4c6c8026565644f31490cfddfdafb9519 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:24 -0700
Subject: [PATCH 3737/3804] KVM: x86/mmu: Get nested MMU's root level from the
 MMU's role

Initialize the MMU's (guest) root_level using its mmu_role instead of
redoing the calculations.  The role_regs used to calculate the mmu_role
are initialized from the vCPU, i.e. this should be a complete nop.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-40-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 91b27538328fe..3d87b7fcf6b37 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4874,6 +4874,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	g_context->get_guest_pgd     = get_cr3;
 	g_context->get_pdptr         = kvm_pdptr_read;
 	g_context->inject_page_fault = kvm_inject_page_fault;
+	g_context->root_level        = new_role.base.level;
 
 	/*
 	 * L2 page tables are never shadowed, so there is no need to sync
@@ -4890,19 +4891,14 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	 * the gva_to_gpa functions between mmu and nested_mmu are swapped.
 	 */
 	if (!is_paging(vcpu)) {
-		g_context->root_level = 0;
 		g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
 	} else if (is_long_mode(vcpu)) {
-		g_context->root_level = is_la57_mode(vcpu) ?
-					PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
 		reset_rsvds_bits_mask(vcpu, g_context);
 		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
 	} else if (is_pae(vcpu)) {
-		g_context->root_level = PT32E_ROOT_LEVEL;
 		reset_rsvds_bits_mask(vcpu, g_context);
 		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
 	} else {
-		g_context->root_level = PT32_ROOT_LEVEL;
 		reset_rsvds_bits_mask(vcpu, g_context);
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
 	}
-- 
GitLab


From 87e99d7d7054f6a861f18b0e2f30280d2f526f23 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:25 -0700
Subject: [PATCH 3738/3804] KVM: x86/mmu: Use MMU role_regs to get LA57, and
 drop vCPU LA57 helper

Get LA57 from the role_regs, which are initialized from the vCPU even
though TDP is enabled, instead of pulling the value directly from the
vCPU when computing the guest's root_level for TDP MMUs.  Note, the check
is inside an is_long_mode() statement, so that requirement is not lost.

Use role_regs even though the MMU's role is available and arguably
"better".  A future commit will consolidate the guest root level logic,
and it needs access to EFER.LMA, which is not tracked in the role (it
can't be toggled on VM-Exit, unlike LA57).

Drop is_la57_mode() as there are no remaining users, and to discourage
pulling MMU state from the vCPU (in the future).

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-41-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c |  2 +-
 arch/x86/kvm/x86.h     | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 3d87b7fcf6b37..b5e63c4ed7d1a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4635,7 +4635,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = nonpaging_gva_to_gpa;
 		context->root_level = 0;
 	} else if (is_long_mode(vcpu)) {
-		context->root_level = is_la57_mode(vcpu) ?
+		context->root_level = ____is_cr4_la57(&regs) ?
 				PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
 		reset_rsvds_bits_mask(vcpu, context);
 		context->gva_to_gpa = paging64_gva_to_gpa;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 521f74e5bbf21..44ae103127400 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -157,16 +157,6 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
 	return cs_l;
 }
 
-static inline bool is_la57_mode(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_X86_64
-	return (vcpu->arch.efer & EFER_LMA) &&
-		 kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
-#else
-	return 0;
-#endif
-}
-
 static inline bool x86_exception_has_error_code(unsigned int vector)
 {
 	static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
-- 
GitLab


From fa4b558802c0ed4ef8132c1b2d1e993c519eb0ae Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:26 -0700
Subject: [PATCH 3739/3804] KVM: x86/mmu: Consolidate reset_rsvds_bits_mask()
 calls

Move calls to reset_rsvds_bits_mask() out of the various mode statements
and under a more generic CR0.PG=1 check.  This will allow for additional
code consolidation in the future.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-42-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b5e63c4ed7d1a..9a06003811fde 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4637,18 +4637,18 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	} else if (is_long_mode(vcpu)) {
 		context->root_level = ____is_cr4_la57(&regs) ?
 				PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
-		reset_rsvds_bits_mask(vcpu, context);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 	} else if (is_pae(vcpu)) {
 		context->root_level = PT32E_ROOT_LEVEL;
-		reset_rsvds_bits_mask(vcpu, context);
 		context->gva_to_gpa = paging64_gva_to_gpa;
 	} else {
 		context->root_level = PT32_ROOT_LEVEL;
-		reset_rsvds_bits_mask(vcpu, context);
 		context->gva_to_gpa = paging32_gva_to_gpa;
 	}
 
+	if (is_cr0_pg(context))
+		reset_rsvds_bits_mask(vcpu, context);
+
 	update_permission_bitmask(context, false);
 	update_pkru_bitmask(context);
 	update_last_nonleaf_level(context);
@@ -4890,18 +4890,17 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	 * nested page tables as the second level of translation. Basically
 	 * the gva_to_gpa functions between mmu and nested_mmu are swapped.
 	 */
-	if (!is_paging(vcpu)) {
+	if (!is_paging(vcpu))
 		g_context->gva_to_gpa = nonpaging_gva_to_gpa_nested;
-	} else if (is_long_mode(vcpu)) {
-		reset_rsvds_bits_mask(vcpu, g_context);
+	else if (is_long_mode(vcpu))
 		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
-	} else if (is_pae(vcpu)) {
-		reset_rsvds_bits_mask(vcpu, g_context);
+	else if (is_pae(vcpu))
 		g_context->gva_to_gpa = paging64_gva_to_gpa_nested;
-	} else {
-		reset_rsvds_bits_mask(vcpu, g_context);
+	else
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
-	}
+
+	if (is_cr0_pg(g_context))
+		reset_rsvds_bits_mask(vcpu, g_context);
 
 	update_permission_bitmask(g_context, false);
 	update_pkru_bitmask(g_context);
-- 
GitLab


From af0eb17e99e5df76380404881e3e5042d582a6b3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:27 -0700
Subject: [PATCH 3740/3804] KVM: x86/mmu: Don't update nested guest's paging
 bitmasks if CR0.PG=0

Don't bother updating the bitmasks and last-leaf information if paging is
disabled as the metadata will never be used.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-43-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 9a06003811fde..6447d9fe16727 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4646,12 +4646,12 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = paging32_gva_to_gpa;
 	}
 
-	if (is_cr0_pg(context))
+	if (is_cr0_pg(context)) {
 		reset_rsvds_bits_mask(vcpu, context);
-
-	update_permission_bitmask(context, false);
-	update_pkru_bitmask(context);
-	update_last_nonleaf_level(context);
+		update_permission_bitmask(context, false);
+		update_pkru_bitmask(context);
+		update_last_nonleaf_level(context);
+	}
 	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
@@ -4899,12 +4899,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	else
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
 
-	if (is_cr0_pg(g_context))
+	if (is_cr0_pg(g_context)) {
 		reset_rsvds_bits_mask(vcpu, g_context);
-
-	update_permission_bitmask(g_context, false);
-	update_pkru_bitmask(g_context);
-	update_last_nonleaf_level(g_context);
+		update_permission_bitmask(g_context, false);
+		update_pkru_bitmask(g_context);
+		update_last_nonleaf_level(g_context);
+	}
 }
 
 void kvm_init_mmu(struct kvm_vcpu *vcpu)
-- 
GitLab


From 533f9a4b387bf79c722faf0a760a09129d9627f9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:28 -0700
Subject: [PATCH 3741/3804] KVM: x86/mmu: Add helper to update paging metadata

Consolidate MMU guest metadata updates into a common helper for TDP,
shadow, and nested MMUs.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-44-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6447d9fe16727..01ab309f8f311 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4507,6 +4507,18 @@ static void update_last_nonleaf_level(struct kvm_mmu *mmu)
 		mmu->last_nonleaf_level++;
 }
 
+static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
+					struct kvm_mmu *mmu)
+{
+	if (!is_cr0_pg(mmu))
+		return;
+
+	reset_rsvds_bits_mask(vcpu, mmu);
+	update_permission_bitmask(mmu, false);
+	update_pkru_bitmask(mmu);
+	update_last_nonleaf_level(mmu);
+}
+
 static void paging64_init_context_common(struct kvm_mmu *context,
 					 int root_level)
 {
@@ -4646,12 +4658,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = paging32_gva_to_gpa;
 	}
 
-	if (is_cr0_pg(context)) {
-		reset_rsvds_bits_mask(vcpu, context);
-		update_permission_bitmask(context, false);
-		update_pkru_bitmask(context);
-		update_last_nonleaf_level(context);
-	}
+	reset_guest_paging_metadata(vcpu, context);
 	reset_tdp_shadow_zero_bits_mask(vcpu, context);
 }
 
@@ -4705,12 +4712,7 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	else
 		paging32_init_context(context);
 
-	if (____is_cr0_pg(regs)) {
-		reset_rsvds_bits_mask(vcpu, context);
-		update_permission_bitmask(context, false);
-		update_pkru_bitmask(context);
-		update_last_nonleaf_level(context);
-	}
+	reset_guest_paging_metadata(vcpu, context);
 	context->shadow_root_level = new_role.base.level;
 
 	reset_shadow_zero_bits_mask(vcpu, context);
@@ -4899,12 +4901,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	else
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
 
-	if (is_cr0_pg(g_context)) {
-		reset_rsvds_bits_mask(vcpu, g_context);
-		update_permission_bitmask(g_context, false);
-		update_pkru_bitmask(g_context);
-		update_last_nonleaf_level(g_context);
-	}
+	reset_guest_paging_metadata(vcpu, g_context);
 }
 
 void kvm_init_mmu(struct kvm_vcpu *vcpu)
-- 
GitLab


From f4bd6f73763a91a0c6fc39974d57034e19f25494 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:29 -0700
Subject: [PATCH 3742/3804] KVM: x86/mmu: Add a helper to calculate root from
 role_regs

Add a helper to calculate the level for non-EPT page tables from the
MMU's role_regs.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-45-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 60 ++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 35 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 01ab309f8f311..8cf0c1a837161 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -238,6 +238,19 @@ static struct kvm_mmu_role_regs vcpu_to_role_regs(struct kvm_vcpu *vcpu)
 	return regs;
 }
 
+static int role_regs_to_root_level(struct kvm_mmu_role_regs *regs)
+{
+	if (!____is_cr0_pg(regs))
+		return 0;
+	else if (____is_efer_lma(regs))
+		return ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL :
+					       PT64_ROOT_4LEVEL;
+	else if (____is_cr4_pae(regs))
+		return PT32E_ROOT_LEVEL;
+	else
+		return PT32_ROOT_LEVEL;
+}
+
 static inline bool kvm_available_flush_tlb_with_range(void)
 {
 	return kvm_x86_ops.tlb_remote_flush_with_range;
@@ -3949,7 +3962,6 @@ static void nonpaging_init_context(struct kvm_mmu *context)
 	context->gva_to_gpa = nonpaging_gva_to_gpa;
 	context->sync_page = nonpaging_sync_page;
 	context->invlpg = NULL;
-	context->root_level = 0;
 	context->direct_map = true;
 }
 
@@ -4519,11 +4531,8 @@ static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
 	update_last_nonleaf_level(mmu);
 }
 
-static void paging64_init_context_common(struct kvm_mmu *context,
-					 int root_level)
+static void paging64_init_context_common(struct kvm_mmu *context)
 {
-	context->root_level = root_level;
-
 	WARN_ON_ONCE(!is_cr4_pae(context));
 	context->page_fault = paging64_page_fault;
 	context->gva_to_gpa = paging64_gva_to_gpa;
@@ -4532,18 +4541,13 @@ static void paging64_init_context_common(struct kvm_mmu *context,
 	context->direct_map = false;
 }
 
-static void paging64_init_context(struct kvm_mmu *context,
-				  struct kvm_mmu_role_regs *regs)
+static void paging64_init_context(struct kvm_mmu *context)
 {
-	int root_level = ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL :
-						 PT64_ROOT_4LEVEL;
-
-	paging64_init_context_common(context, root_level);
+	paging64_init_context_common(context);
 }
 
 static void paging32_init_context(struct kvm_mmu *context)
 {
-	context->root_level = PT32_ROOT_LEVEL;
 	context->page_fault = paging32_page_fault;
 	context->gva_to_gpa = paging32_gva_to_gpa;
 	context->sync_page = paging32_sync_page;
@@ -4553,7 +4557,7 @@ static void paging32_init_context(struct kvm_mmu *context)
 
 static void paging32E_init_context(struct kvm_mmu *context)
 {
-	paging64_init_context_common(context, PT32E_ROOT_LEVEL);
+	paging64_init_context_common(context);
 }
 
 static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
@@ -4642,21 +4646,16 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->get_guest_pgd = get_cr3;
 	context->get_pdptr = kvm_pdptr_read;
 	context->inject_page_fault = kvm_inject_page_fault;
+	context->root_level = role_regs_to_root_level(&regs);
 
-	if (!is_paging(vcpu)) {
+	if (!is_paging(vcpu))
 		context->gva_to_gpa = nonpaging_gva_to_gpa;
-		context->root_level = 0;
-	} else if (is_long_mode(vcpu)) {
-		context->root_level = ____is_cr4_la57(&regs) ?
-				PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
+	else if (is_long_mode(vcpu))
 		context->gva_to_gpa = paging64_gva_to_gpa;
-	} else if (is_pae(vcpu)) {
-		context->root_level = PT32E_ROOT_LEVEL;
+	else if (is_pae(vcpu))
 		context->gva_to_gpa = paging64_gva_to_gpa;
-	} else {
-		context->root_level = PT32_ROOT_LEVEL;
+	else
 		context->gva_to_gpa = paging32_gva_to_gpa;
-	}
 
 	reset_guest_paging_metadata(vcpu, context);
 	reset_tdp_shadow_zero_bits_mask(vcpu, context);
@@ -4706,11 +4705,12 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 	if (!____is_cr0_pg(regs))
 		nonpaging_init_context(context);
 	else if (____is_efer_lma(regs))
-		paging64_init_context(context, regs);
+		paging64_init_context(context);
 	else if (____is_cr4_pae(regs))
 		paging32E_init_context(context);
 	else
 		paging32_init_context(context);
+	context->root_level = role_regs_to_root_level(regs);
 
 	reset_guest_paging_metadata(vcpu, context);
 	context->shadow_root_level = new_role.base.level;
@@ -4849,17 +4849,7 @@ kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu, struct kvm_mmu_role_regs *regs)
 	 * to "true" to try to detect bogus usage of the nested MMU.
 	 */
 	role.base.direct = true;
-
-	if (!____is_cr0_pg(regs))
-		role.base.level = 0;
-	else if (____is_efer_lma(regs))
-		role.base.level = ____is_cr4_la57(regs) ? PT64_ROOT_5LEVEL :
-							  PT64_ROOT_4LEVEL;
-	else if (____is_cr4_pae(regs))
-		role.base.level = PT32E_ROOT_LEVEL;
-	else
-		role.base.level = PT32_ROOT_LEVEL;
-
+	role.base.level = role_regs_to_root_level(regs);
 	return role;
 }
 
-- 
GitLab


From fe660f7244d7e237ab7726813dc9aec8e94900d6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:30 -0700
Subject: [PATCH 3743/3804] KVM: x86/mmu: Collapse 32-bit PAE and 64-bit
 statements for helpers

Skip paging32E_init_context() and paging64_init_context_common() and go
directly to paging64_init_context() (was the common version) now that
the relevant flows don't need to distinguish between 64-bit PAE and
32-bit PAE for other reasons.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-46-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8cf0c1a837161..08ac4e451b951 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4531,9 +4531,8 @@ static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
 	update_last_nonleaf_level(mmu);
 }
 
-static void paging64_init_context_common(struct kvm_mmu *context)
+static void paging64_init_context(struct kvm_mmu *context)
 {
-	WARN_ON_ONCE(!is_cr4_pae(context));
 	context->page_fault = paging64_page_fault;
 	context->gva_to_gpa = paging64_gva_to_gpa;
 	context->sync_page = paging64_sync_page;
@@ -4541,11 +4540,6 @@ static void paging64_init_context_common(struct kvm_mmu *context)
 	context->direct_map = false;
 }
 
-static void paging64_init_context(struct kvm_mmu *context)
-{
-	paging64_init_context_common(context);
-}
-
 static void paging32_init_context(struct kvm_mmu *context)
 {
 	context->page_fault = paging32_page_fault;
@@ -4555,11 +4549,6 @@ static void paging32_init_context(struct kvm_mmu *context)
 	context->direct_map = false;
 }
 
-static void paging32E_init_context(struct kvm_mmu *context)
-{
-	paging64_init_context_common(context);
-}
-
 static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
 							 struct kvm_mmu_role_regs *regs)
 {
@@ -4650,8 +4639,6 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 
 	if (!is_paging(vcpu))
 		context->gva_to_gpa = nonpaging_gva_to_gpa;
-	else if (is_long_mode(vcpu))
-		context->gva_to_gpa = paging64_gva_to_gpa;
 	else if (is_pae(vcpu))
 		context->gva_to_gpa = paging64_gva_to_gpa;
 	else
@@ -4704,10 +4691,8 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 
 	if (!____is_cr0_pg(regs))
 		nonpaging_init_context(context);
-	else if (____is_efer_lma(regs))
-		paging64_init_context(context);
 	else if (____is_cr4_pae(regs))
-		paging32E_init_context(context);
+		paging64_init_context(context);
 	else
 		paging32_init_context(context);
 	context->root_level = role_regs_to_root_level(regs);
-- 
GitLab


From 36f267871edceafbfbbc5d570c34c089a2afa1c1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:31 -0700
Subject: [PATCH 3744/3804] KVM: x86/mmu: Use MMU's role to determine PTTYPE

Use the MMU's role instead of vCPU state or role_regs to determine the
PTTYPE, i.e. which helpers to wire up.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-47-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 08ac4e451b951..4676d696b909a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4637,9 +4637,9 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	context->inject_page_fault = kvm_inject_page_fault;
 	context->root_level = role_regs_to_root_level(&regs);
 
-	if (!is_paging(vcpu))
+	if (!is_cr0_pg(context))
 		context->gva_to_gpa = nonpaging_gva_to_gpa;
-	else if (is_pae(vcpu))
+	else if (is_cr4_pae(context))
 		context->gva_to_gpa = paging64_gva_to_gpa;
 	else
 		context->gva_to_gpa = paging32_gva_to_gpa;
@@ -4689,9 +4689,9 @@ static void shadow_mmu_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *conte
 
 	context->mmu_role.as_u64 = new_role.as_u64;
 
-	if (!____is_cr0_pg(regs))
+	if (!is_cr0_pg(context))
 		nonpaging_init_context(context);
-	else if (____is_cr4_pae(regs))
+	else if (is_cr4_pae(context))
 		paging64_init_context(context);
 	else
 		paging32_init_context(context);
-- 
GitLab


From 961f84457cd4e2fc479e59d015f1d292ec30373b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:32 -0700
Subject: [PATCH 3745/3804] KVM: x86/mmu: Add helpers to do full reserved SPTE
 checks w/ generic MMU

Extract the reserved SPTE check and print helpers in get_mmio_spte() to
new helpers so that KVM can also WARN on reserved badness when making a
SPTE.

Tag the checking helper with __always_inline to improve the probability
of the compiler generating optimal code for the checking loop, e.g. gcc
appears to avoid using %rbp when the helper is tagged with a vanilla
"inline".

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-48-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c  | 23 ++---------------------
 arch/x86/kvm/mmu/spte.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4676d696b909a..ad025059a041f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3594,19 +3594,6 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gpa_t vaddr,
 	return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception);
 }
 
-static bool
-__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level)
-{
-	int bit7 = (pte >> 7) & 1;
-
-	return pte & rsvd_check->rsvd_bits_mask[bit7][level-1];
-}
-
-static bool __is_bad_mt_xwr(struct rsvd_bits_validate *rsvd_check, u64 pte)
-{
-	return rsvd_check->bad_mt_xwr & BIT_ULL(pte & 0x3f);
-}
-
 static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
 	/*
@@ -3684,13 +3671,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 	rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
 
 	for (level = root; level >= leaf; level--)
-		/*
-		 * Use a bitwise-OR instead of a logical-OR to aggregate the
-		 * reserved bit and EPT's invalid memtype/XWR checks to avoid
-		 * adding a Jcc in the loop.
-		 */
-		reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
-			    __is_rsvd_bits_set(rsvd_check, sptes[level], level);
+		reserved |= is_rsvd_spte(rsvd_check, sptes[level], level);
 
 	if (reserved) {
 		pr_err("%s: reserved bits set on MMU-present spte, addr 0x%llx, hierarchy:\n",
@@ -3698,7 +3679,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 		for (level = root; level >= leaf; level--)
 			pr_err("------ spte = 0x%llx level = %d, rsvd bits = 0x%llx",
 			       sptes[level], level,
-			       rsvd_check->rsvd_bits_mask[(sptes[level] >> 7) & 1][level-1]);
+			       get_rsvd_bits(rsvd_check, sptes[level], level));
 	}
 
 	return reserved;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index bca0ba11cccf3..7a5ce93141075 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -293,6 +293,38 @@ static inline bool is_dirty_spte(u64 spte)
 	return dirty_mask ? spte & dirty_mask : spte & PT_WRITABLE_MASK;
 }
 
+static inline u64 get_rsvd_bits(struct rsvd_bits_validate *rsvd_check, u64 pte,
+				int level)
+{
+	int bit7 = (pte >> 7) & 1;
+
+	return rsvd_check->rsvd_bits_mask[bit7][level-1];
+}
+
+static inline bool __is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check,
+				      u64 pte, int level)
+{
+	return pte & get_rsvd_bits(rsvd_check, pte, level);
+}
+
+static inline bool __is_bad_mt_xwr(struct rsvd_bits_validate *rsvd_check,
+				   u64 pte)
+{
+	return rsvd_check->bad_mt_xwr & BIT_ULL(pte & 0x3f);
+}
+
+static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
+					 u64 spte, int level)
+{
+	/*
+	 * Use a bitwise-OR instead of a logical-OR to aggregate the reserved
+	 * bits and EPT's invalid memtype/XWR checks to avoid an extra Jcc
+	 * (this is extremely unlikely to be short-circuited as true).
+	 */
+	return __is_bad_mt_xwr(rsvd_check, spte) |
+	       __is_rsvd_bits_set(rsvd_check, spte, level);
+}
+
 static inline bool spte_can_locklessly_be_made_writable(u64 spte)
 {
 	return (spte & shadow_host_writable_mask) &&
-- 
GitLab


From 3b77daa5efe1cb343ee498ade6ee58c8ada58074 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:33 -0700
Subject: [PATCH 3746/3804] KVM: x86/mmu: WARN on any reserved SPTE value when
 making a valid SPTE

Replace make_spte()'s WARN on a collision with the magic MMIO value with
a generic WARN on reserved bits being set (including EPT's reserved WX
combination).  Warning on any reserved bits covers MMIO, A/D tracking
bits with PAE paging, and in theory any future goofs that are introduced.

Opportunistically convert to ONCE behavior to avoid spamming the kernel
log, odds are very good that if KVM screws up one SPTE, it will botch all
SPTEs for the same MMU.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-49-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/spte.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 246e61e0771e4..3e97cdb13eb7e 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -175,7 +175,10 @@ int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
 		spte = mark_spte_for_access_track(spte);
 
 out:
-	WARN_ON(is_mmio_spte(spte));
+	WARN_ONCE(is_rsvd_spte(&vcpu->arch.mmu->shadow_zero_check, spte, level),
+		  "spte = 0x%llx, level = %d, rsvd bits = 0x%llx", spte, level,
+		  get_rsvd_bits(&vcpu->arch.mmu->shadow_zero_check, spte, level));
+
 	*new_spte = spte;
 	return ret;
 }
-- 
GitLab


From 616007c866a250143e95ea7a696bd924df251f8a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:34 -0700
Subject: [PATCH 3747/3804] KVM: x86: Enhance comments for MMU roles and nested
 transition trickiness

Expand the comments for the MMU roles.  The interactions with gfn_track
PGD reuse in particular are hairy.

Regarding PGD reuse, add comments in the nested virtualization flows to
call out why kvm_init_mmu() is unconditionally called even when nested
TDP is used.

Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-50-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 57 +++++++++++++++++++++++++++------
 arch/x86/kvm/svm/nested.c       |  1 +
 arch/x86/kvm/vmx/nested.c       |  1 +
 3 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3f4f6ad7405bf..f033ecf43d4c8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -269,12 +269,36 @@ enum x86_intercept_stage;
 struct kvm_kernel_irq_routing_entry;
 
 /*
- * the pages used as guest page table on soft mmu are tracked by
- * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used
- * by indirect shadow page can not be more than 15 bits.
+ * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
+ * also includes TDP pages) to determine whether or not a page can be used in
+ * the given MMU context.  This is a subset of the overall kvm_mmu_role to
+ * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
+ * 2 bytes per gfn instead of 4 bytes per gfn.
  *
- * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access,
- * @efer_nx, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp.
+ * Indirect upper-level shadow pages are tracked for write-protection via
+ * gfn_track.  As above, gfn_track is a 16 bit counter, so KVM must not create
+ * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
+ * gfn_track will overflow and explosions will ensure.
+ *
+ * A unique shadow page (SP) for a gfn is created if and only if an existing SP
+ * cannot be reused.  The ability to reuse a SP is tracked by its role, which
+ * incorporates various mode bits and properties of the SP.  Roughly speaking,
+ * the number of unique SPs that can theoretically be created is 2^n, where n
+ * is the number of bits that are used to compute the role.
+ *
+ * But, even though there are 18 bits in the mask below, not all combinations
+ * of modes and flags are possible.  The maximum number of possible upper-level
+ * shadow pages for a single gfn is in the neighborhood of 2^13.
+ *
+ *   - invalid shadow pages are not accounted.
+ *   - level is effectively limited to four combinations, not 16 as the number
+ *     bits would imply, as 4k SPs are not tracked (allowed to go unsync).
+ *   - level is effectively unused for non-PAE paging because there is exactly
+ *     one upper level (see 4k SP exception above).
+ *   - quadrant is used only for non-PAE paging and is exclusive with
+ *     gpte_is_8_bytes.
+ *   - execonly and ad_disabled are used only for nested EPT, which makes it
+ *     exclusive with quadrant.
  */
 union kvm_mmu_page_role {
 	u32 word;
@@ -303,13 +327,26 @@ union kvm_mmu_page_role {
 	};
 };
 
-union kvm_mmu_extended_role {
 /*
- * This structure complements kvm_mmu_page_role caching everything needed for
- * MMU configuration. If nothing in both these structures changed, MMU
- * re-configuration can be skipped. @valid bit is set on first usage so we don't
- * treat all-zero structure as valid data.
+ * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties
+ * relevant to the current MMU configuration.   When loading CR0, CR4, or EFER,
+ * including on nested transitions, if nothing in the full role changes then
+ * MMU re-configuration can be skipped. @valid bit is set on first usage so we
+ * don't treat all-zero structure as valid data.
+ *
+ * The properties that are tracked in the extended role but not the page role
+ * are for things that either (a) do not affect the validity of the shadow page
+ * or (b) are indirectly reflected in the shadow page's role.  For example,
+ * CR4.PKE only affects permission checks for software walks of the guest page
+ * tables (because KVM doesn't support Protection Keys with shadow paging), and
+ * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level.
+ *
+ * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role.
+ * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and
+ * SMAP, but the MMU's permission checks for software walks need to be SMEP and
+ * SMAP aware regardless of CR0.WP.
  */
+union kvm_mmu_extended_role {
 	u32 word;
 	struct {
 		unsigned int valid:1;
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index a9e3b0736c206..21d03e3a5dfd5 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -424,6 +424,7 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
+	/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
 	kvm_init_mmu(vcpu);
 
 	return 0;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index fa3f50f0a3fa5..1a52134b0c42a 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1098,6 +1098,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
 	vcpu->arch.cr3 = cr3;
 	kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
 
+	/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
 	kvm_init_mmu(vcpu);
 
 	return 0;
-- 
GitLab


From 7cd138db5cae0dac295714b4412a9b44fb4f4e65 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:35 -0700
Subject: [PATCH 3748/3804] KVM: x86/mmu: Optimize and clean up so called "last
 nonleaf level" logic

Drop the pre-computed last_nonleaf_level, which is arguably wrong and at
best confusing.  Per the comment:

  Can have large pages at levels 2..last_nonleaf_level-1.

the intent of the variable would appear to be to track what levels can
_legally_ have large pages, but that intent doesn't align with reality.
The computed value will be wrong for 5-level paging, or if 1gb pages are
not supported.

The flawed code is not a problem in practice, because except for 32-bit
PSE paging, bit 7 is reserved if large pages aren't supported at the
level.  Take advantage of this invariant and simply omit the level magic
math for 64-bit page tables (including PAE).

For 32-bit paging (non-PAE), the adjustments are needed purely because
bit 7 is ignored if PSE=0.  Retain that logic as is, but make
is_last_gpte() unique per PTTYPE so that the PSE check is avoided for
PAE and EPT paging.  In the spirit of avoiding branches, bump the "last
nonleaf level" for 32-bit PSE paging by adding the PSE bit itself.

Note, bit 7 is ignored or has other meaning in CR3/EPTP, but despite
FNAME(walk_addr_generic) briefly grabbing CR3/EPTP in "pte", they are
not PTEs and will blow up all the other gpte helpers.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-51-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ---
 arch/x86/kvm/mmu/mmu.c          | 31 -------------------------------
 arch/x86/kvm/mmu/paging_tmpl.h  | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f033ecf43d4c8..3cd496c8acb83 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -457,9 +457,6 @@ struct kvm_mmu {
 
 	struct rsvd_bits_validate guest_rsvd_check;
 
-	/* Can have large pages at levels 2..last_nonleaf_level-1. */
-	u8 last_nonleaf_level;
-
 	u64 pdptrs[4]; /* pae */
 };
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ad025059a041f..417f81c004da0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4071,26 +4071,6 @@ static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
 	return false;
 }
 
-static inline bool is_last_gpte(struct kvm_mmu *mmu,
-				unsigned level, unsigned gpte)
-{
-	/*
-	 * The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
-	 * If it is clear, there are no large pages at this level, so clear
-	 * PT_PAGE_SIZE_MASK in gpte if that is the case.
-	 */
-	gpte &= level - mmu->last_nonleaf_level;
-
-	/*
-	 * PG_LEVEL_4K always terminates.  The RHS has bit 7 set
-	 * iff level <= PG_LEVEL_4K, which for our purpose means
-	 * level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then.
-	 */
-	gpte |= level - PG_LEVEL_4K - 1;
-
-	return gpte & PT_PAGE_SIZE_MASK;
-}
-
 #define PTTYPE_EPT 18 /* arbitrary */
 #define PTTYPE PTTYPE_EPT
 #include "paging_tmpl.h"
@@ -4491,15 +4471,6 @@ static void update_pkru_bitmask(struct kvm_mmu *mmu)
 	}
 }
 
-static void update_last_nonleaf_level(struct kvm_mmu *mmu)
-{
-	unsigned root_level = mmu->root_level;
-
-	mmu->last_nonleaf_level = root_level;
-	if (root_level == PT32_ROOT_LEVEL && is_cr4_pse(mmu))
-		mmu->last_nonleaf_level++;
-}
-
 static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
 					struct kvm_mmu *mmu)
 {
@@ -4509,7 +4480,6 @@ static void reset_guest_paging_metadata(struct kvm_vcpu *vcpu,
 	reset_rsvds_bits_mask(vcpu, mmu);
 	update_permission_bitmask(mmu, false);
 	update_pkru_bitmask(mmu);
-	update_last_nonleaf_level(mmu);
 }
 
 static void paging64_init_context(struct kvm_mmu *context)
@@ -4783,7 +4753,6 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 	context->direct_map = false;
 
 	update_permission_bitmask(context, true);
-	update_last_nonleaf_level(context);
 	update_pkru_bitmask(context);
 	reset_rsvds_bits_mask_ept(vcpu, context, execonly);
 	reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index c92e712607b60..75c3fe966e81a 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -305,6 +305,35 @@ static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
 	return pkeys;
 }
 
+static inline bool FNAME(is_last_gpte)(struct kvm_mmu *mmu,
+				       unsigned int level, unsigned int gpte)
+{
+	/*
+	 * For EPT and PAE paging (both variants), bit 7 is either reserved at
+	 * all level or indicates a huge page (ignoring CR3/EPTP).  In either
+	 * case, bit 7 being set terminates the walk.
+	 */
+#if PTTYPE == 32
+	/*
+	 * 32-bit paging requires special handling because bit 7 is ignored if
+	 * CR4.PSE=0, not reserved.  Clear bit 7 in the gpte if the level is
+	 * greater than the last level for which bit 7 is the PAGE_SIZE bit.
+	 *
+	 * The RHS has bit 7 set iff level < (2 + PSE).  If it is clear, bit 7
+	 * is not reserved and does not indicate a large page at this level,
+	 * so clear PT_PAGE_SIZE_MASK in gpte if that is the case.
+	 */
+	gpte &= level - (PT32_ROOT_LEVEL + mmu->mmu_role.ext.cr4_pse);
+#endif
+	/*
+	 * PG_LEVEL_4K always terminates.  The RHS has bit 7 set
+	 * iff level <= PG_LEVEL_4K, which for our purpose means
+	 * level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then.
+	 */
+	gpte |= level - PG_LEVEL_4K - 1;
+
+	return gpte & PT_PAGE_SIZE_MASK;
+}
 /*
  * Fetch a guest pte for a guest virtual address, or for an L2's GPA.
  */
@@ -421,7 +450,7 @@ retry_walk:
 
 		/* Convert to ACC_*_MASK flags for struct guest_walker.  */
 		walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
-	} while (!is_last_gpte(mmu, walker->level, pte));
+	} while (!FNAME(is_last_gpte)(mmu, walker->level, pte));
 
 	pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
 	accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
-- 
GitLab


From f82fdaf536ee6de36e3a7b4764f17b81afb8ef93 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:36 -0700
Subject: [PATCH 3749/3804] KVM: x86/mmu: Drop redundant rsvd bits reset for
 nested NPT

Drop the extra reset of shadow_zero_bits in the nested NPT flow now
that shadow_mmu_init_context computes the correct level for nested NPT.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-52-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 417f81c004da0..690f560341a2a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4693,12 +4693,6 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
 	__kvm_mmu_new_pgd(vcpu, nested_cr3, new_role.base);
 
 	shadow_mmu_init_context(vcpu, context, &regs, new_role);
-
-	/*
-	 * Redo the shadow bits, the reset done by shadow_mmu_init_context()
-	 * (above) may use the wrong shadow_root_level.
-	 */
-	reset_shadow_zero_bits_mask(vcpu, context);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_npt_mmu);
 
-- 
GitLab


From fdaa293598f908adb945001dabb305225144e183 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:37 -0700
Subject: [PATCH 3750/3804] KVM: x86/mmu: Get CR0.WP from MMU, not vCPU, in
 shadow page fault

Use the current MMU instead of vCPU state to query CR0.WP when handling
a page fault.  In the nested NPT case, the current CR0.WP reflects L2,
whereas the page fault is shadowing L1's NPT.  Practically speaking, this
is a nop a NPT walks are always user faults, but fix it up for
consistency.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-53-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.h             | 5 -----
 arch/x86/kvm/mmu/paging_tmpl.h | 5 ++---
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 62844bacd13f4..83e6c6965f1e5 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -165,11 +165,6 @@ static inline bool is_writable_pte(unsigned long pte)
 	return pte & PT_WRITABLE_MASK;
 }
 
-static inline bool is_write_protection(struct kvm_vcpu *vcpu)
-{
-	return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
-}
-
 /*
  * Check if a given access (described through the I/D, W/R and U/S bits of a
  * page fault error code pfec) causes a permission fault with the given PTE
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 75c3fe966e81a..2f5a0e8d05ed8 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -795,7 +795,7 @@ FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu,
 	bool self_changed = false;
 
 	if (!(walker->pte_access & ACC_WRITE_MASK ||
-	      (!is_write_protection(vcpu) && !user_fault)))
+	    (!is_cr0_wp(vcpu->arch.mmu) && !user_fault)))
 		return false;
 
 	for (level = walker->level; level <= walker->max_level; level++) {
@@ -893,8 +893,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 	 * we will cache the incorrect access into mmio spte.
 	 */
 	if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) &&
-	     !is_write_protection(vcpu) && !user_fault &&
-	      !is_noslot_pfn(pfn)) {
+	    !is_cr0_wp(vcpu->arch.mmu) && !user_fault && !is_noslot_pfn(pfn)) {
 		walker.pte_access |= ACC_WRITE_MASK;
 		walker.pte_access &= ~ACC_USER_MASK;
 
-- 
GitLab


From 9a65d0b70fa06ae46b9f8ab7dc8e6b3c6f4661ba Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:38 -0700
Subject: [PATCH 3751/3804] KVM: x86/mmu: Get CR4.SMEP from MMU, not vCPU, in
 shadow page fault

Use the current MMU instead of vCPU state to query CR4.SMEP when handling
a page fault.  In the nested NPT case, the current CR4.SMEP reflects L2,
whereas the page fault is shadowing L1's NPT, which uses L1's hCR4.
Practically speaking, this is a nop a NPT walks are always user faults,
i.e. this code will never be reached, but fix it up for consistency.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-54-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 2f5a0e8d05ed8..490a028ddabe9 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -903,7 +903,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 		 * then we should prevent the kernel from executing it
 		 * if SMEP is enabled.
 		 */
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
+		if (is_cr4_smep(vcpu->arch.mmu))
 			walker.pte_access &= ~ACC_EXEC_MASK;
 	}
 
-- 
GitLab


From 27de925044e18eb056d6157305c841b1408621b5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Jun 2021 10:57:39 -0700
Subject: [PATCH 3752/3804] KVM: x86/mmu: Let guest use GBPAGES if supported in
 hardware and TDP is on

Let the guest use 1g hugepages if TDP is enabled and the host supports
GBPAGES, KVM can't actively prevent the guest from using 1g pages in this
case since they can't be disabled in the hardware page walker.  While
injecting a page fault if a bogus 1g page is encountered during a
software page walk is perfectly reasonable since KVM is simply honoring
userspace's vCPU model, doing so arguably doesn't provide any meaningful
value, and at worst will be horribly confusing as the guest will see
inconsistent behavior and seemingly spurious page faults.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210622175739.3610207-55-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 690f560341a2a..00732757cc609 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4174,13 +4174,28 @@ __reset_rsvds_bits_mask(struct rsvd_bits_validate *rsvd_check,
 	}
 }
 
+static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * If TDP is enabled, let the guest use GBPAGES if they're supported in
+	 * hardware.  The hardware page walker doesn't let KVM disable GBPAGES,
+	 * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
+	 * walk for performance and complexity reasons.  Not to mention KVM
+	 * _can't_ solve the problem because GVA->GPA walks aren't visible to
+	 * KVM once a TDP translation is installed.  Mimic hardware behavior so
+	 * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
+	 */
+	return tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
+			     guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
+}
+
 static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu *context)
 {
 	__reset_rsvds_bits_mask(&context->guest_rsvd_check,
 				vcpu->arch.reserved_gpa_bits,
 				context->root_level, is_efer_nx(context),
-				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
+				guest_can_use_gbpages(vcpu),
 				is_cr4_pse(context),
 				guest_cpuid_is_amd_or_hygon(vcpu));
 }
@@ -4259,8 +4274,7 @@ static void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
 	shadow_zero_check = &context->shadow_zero_check;
 	__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
 				context->shadow_root_level, uses_nx,
-				guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
-				is_pse, is_amd);
+				guest_can_use_gbpages(vcpu), is_pse, is_amd);
 
 	if (!shadow_me_mask)
 		return;
-- 
GitLab


From 19238e75bd8ed8ffe784bf5b37586e77b2093742 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 10 May 2021 07:48:33 -0700
Subject: [PATCH 3753/3804] kvm: x86: Allow userspace to handle emulation
 errors

Add a fallback mechanism to the in-kernel instruction emulator that
allows userspace the opportunity to process an instruction the emulator
was unable to.  When the in-kernel instruction emulator fails to process
an instruction it will either inject a #UD into the guest or exit to
userspace with exit reason KVM_INTERNAL_ERROR.  This is because it does
not know how to proceed in an appropriate manner.  This feature lets
userspace get involved to see if it can figure out a better path
forward.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: David Edmondson <david.edmondson@oracle.com>
Message-Id: <20210510144834.658457-2-aaronlewis@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst  | 20 +++++++++++++++++
 arch/x86/include/asm/kvm_host.h |  6 +++++
 arch/x86/kvm/x86.c              | 40 +++++++++++++++++++++++++++++----
 include/uapi/linux/kvm.h        | 23 +++++++++++++++++++
 4 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 5d8db4922df6a..3b6e3b1628b4f 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6546,6 +6546,7 @@ KVM_RUN_BUS_LOCK flag is used to distinguish between them.
 This capability can be used to check / enable 2nd DAWR feature provided
 by POWER10 processor.
 
+
 7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
 -------------------------------------
 
@@ -6603,6 +6604,25 @@ present in the "ibm,hypertas-functions" device-tree property.
 This capability is enabled for hypervisors on platforms like POWER9
 that support radix MMU.
 
+7.27 KVM_CAP_EXIT_ON_EMULATION_FAILURE
+--------------------------------------
+
+:Architectures: x86
+:Parameters: args[0] whether the feature should be enabled or not
+
+When this capability is enabled, an emulation failure will result in an exit
+to userspace with KVM_INTERNAL_ERROR (except when the emulator was invoked
+to handle a VMware backdoor instruction). Furthermore, KVM will now provide up
+to 15 instruction bytes for any exit to userspace resulting from an emulation
+failure.  When these exits to userspace occur use the emulation_failure struct
+instead of the internal struct.  They both have the same layout, but the
+emulation_failure struct matches the content better.  It also explicitly
+defines the 'flags' field which is used to describe the fields in the struct
+that are valid (ie: if KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES is
+set in the 'flags' field then both 'insn_size' and 'insn_bytes' have valid data
+in them.)
+
+
 8. Other capabilities.
 ======================
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3cd496c8acb83..c9ec5c76c4381 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1114,6 +1114,12 @@ struct kvm_arch {
 	bool exception_payload_enabled;
 
 	bool bus_lock_detection_enabled;
+	/*
+	 * If exit_on_emulation_error is set, and the in-kernel instruction
+	 * emulator fails to emulate an instruction, allow userspace
+	 * the opportunity to look at it.
+	 */
+	bool exit_on_emulation_error;
 
 	/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
 	u32 user_space_msr_mask;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a7c7b2b28de78..17468d983fbd5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4010,6 +4010,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 #endif
 	case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
 	case KVM_CAP_SREGS2:
+	case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
 		r = 1;
 		break;
 	case KVM_CAP_EXIT_HYPERCALL:
@@ -5649,6 +5650,13 @@ split_irqchip_unlock:
 		kvm->arch.hypercall_exit_enabled = cap->args[0];
 		r = 0;
 		break;
+	case KVM_CAP_EXIT_ON_EMULATION_FAILURE:
+		r = -EINVAL;
+		if (cap->args[0] & ~1)
+			break;
+		kvm->arch.exit_on_emulation_error = cap->args[0];
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -7444,8 +7452,33 @@ void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 }
 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
+static void prepare_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+	struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+	u32 insn_size = ctxt->fetch.end - ctxt->fetch.data;
+	struct kvm_run *run = vcpu->run;
+
+	run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	run->emulation_failure.suberror = KVM_INTERNAL_ERROR_EMULATION;
+	run->emulation_failure.ndata = 0;
+	run->emulation_failure.flags = 0;
+
+	if (insn_size) {
+		run->emulation_failure.ndata = 3;
+		run->emulation_failure.flags |=
+			KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES;
+		run->emulation_failure.insn_size = insn_size;
+		memset(run->emulation_failure.insn_bytes, 0x90,
+		       sizeof(run->emulation_failure.insn_bytes));
+		memcpy(run->emulation_failure.insn_bytes,
+		       ctxt->fetch.data, insn_size);
+	}
+}
+
 static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 {
+	struct kvm *kvm = vcpu->kvm;
+
 	++vcpu->stat.insn_emulation_fail;
 	trace_kvm_emulate_insn_failed(vcpu);
 
@@ -7454,10 +7487,9 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 		return 1;
 	}
 
-	if (emulation_type & EMULTYPE_SKIP) {
-		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
-		vcpu->run->internal.ndata = 0;
+	if (kvm->arch.exit_on_emulation_error ||
+	    (emulation_type & EMULTYPE_SKIP)) {
+		prepare_emulation_failure_exit(vcpu);
 		return 0;
 	}
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f1ba602260f6e..68c9e6d8bbda2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -280,6 +280,9 @@ struct kvm_xen_exit {
 /* Encounter unexpected vm-exit reason */
 #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON	4
 
+/* Flags that describe what fields in emulation_failure hold valid data. */
+#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
 	/* in */
@@ -383,6 +386,25 @@ struct kvm_run {
 			__u32 ndata;
 			__u64 data[16];
 		} internal;
+		/*
+		 * KVM_INTERNAL_ERROR_EMULATION
+		 *
+		 * "struct emulation_failure" is an overlay of "struct internal"
+		 * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of
+		 * KVM_EXIT_INTERNAL_ERROR.  Note, unlike other internal error
+		 * sub-types, this struct is ABI!  It also needs to be backwards
+		 * compatible with "struct internal".  Take special care that
+		 * "ndata" is correct, that new fields are enumerated in "flags",
+		 * and that each flag enumerates fields that are 64-bit aligned
+		 * and sized (so that ndata+internal.data[] is valid/accurate).
+		 */
+		struct {
+			__u32 suberror;
+			__u32 ndata;
+			__u64 flags;
+			__u8  insn_size;
+			__u8  insn_bytes[15];
+		} emulation_failure;
 		/* KVM_EXIT_OSI */
 		struct {
 			__u64 gprs[32];
@@ -1088,6 +1110,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_EXIT_HYPERCALL 201
 #define KVM_CAP_PPC_RPT_INVALIDATE 202
 #define KVM_CAP_BINARY_STATS_FD 203
+#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
GitLab


From 39bbcc3a4e39a41a494ea245858db581bf83e752 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Mon, 10 May 2021 07:48:34 -0700
Subject: [PATCH 3754/3804] selftests: kvm: Allows userspace to handle
 emulation errors.

This test exercises the feature KVM_CAP_EXIT_ON_EMULATION_FAILURE.  When
enabled, errors in the in-kernel instruction emulator are forwarded to
userspace with the instruction bytes stored in the exit struct for
KVM_EXIT_INTERNAL_ERROR.  So, when the guest attempts to emulate an
'flds' instruction, which isn't able to be emulated in KVM, instead
of failing, KVM sends the instruction to userspace to handle.

For this test to work properly the module parameter
'allow_smaller_maxphyaddr' has to be set.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Message-Id: <20210510144834.658457-3-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/include/x86_64/processor.h  |   4 +
 .../selftests/kvm/lib/x86_64/processor.c      |  92 ++++++++
 .../kvm/x86_64/emulator_error_test.c          | 219 ++++++++++++++++++
 5 files changed, 317 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/emulator_error_test.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 14c550d64d3c5..d5bc9bf3b528e 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -8,6 +8,7 @@
 /x86_64/cr4_cpuid_sync_test
 /x86_64/debug_regs
 /x86_64/evmcs_test
+/x86_64/emulator_error_test
 /x86_64/get_cpuid_test
 /x86_64/get_msr_index_features
 /x86_64/kvm_pv_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 279051fb901ce..bc65c57ae40da 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -41,6 +41,7 @@ LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_ha
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 914b0d16929cb..6d27a54359719 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -399,6 +399,10 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
 void vm_handle_exception(struct kvm_vm *vm, int vector,
 			void (*handler)(struct ex_regs *));
 
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr);
+void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+			     uint64_t pte);
+
 /*
  * set_cpuid() - overwrites a matching cpuid entry with the provided value.
  *		 matches based on ent->function && ent->index. returns true
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 5e0e3a131dadd..3114b18454d5f 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -282,6 +282,98 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 	__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
 }
 
+static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+						       uint64_t vaddr)
+{
+	uint16_t index[4];
+	struct pageUpperEntry *pml4e, *pdpe, *pde;
+	struct pageTableEntry *pte;
+	struct kvm_cpuid_entry2 *entry;
+	struct kvm_sregs sregs;
+	int max_phy_addr;
+	/* Set the bottom 52 bits. */
+	uint64_t rsvd_mask = 0x000fffffffffffff;
+
+	entry = kvm_get_supported_cpuid_index(0x80000008, 0);
+	max_phy_addr = entry->eax & 0x000000ff;
+	/* Clear the bottom bits of the reserved mask. */
+	rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr;
+
+	/*
+	 * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
+	 * with 4-Level Paging and 5-Level Paging".
+	 * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1,
+	 * the XD flag (bit 63) is reserved.
+	 */
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+	if ((sregs.efer & EFER_NX) == 0) {
+		rsvd_mask |= (1ull << 63);
+	}
+
+	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(vaddr >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx",
+		vaddr);
+	/*
+	 * Based on the mode check above there are 48 bits in the vaddr, so
+	 * shift 16 to sign extend the last bit (bit-47),
+	 */
+	TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
+		"Canonical check failed.  The virtual address is invalid.");
+
+	index[0] = (vaddr >> 12) & 0x1ffu;
+	index[1] = (vaddr >> 21) & 0x1ffu;
+	index[2] = (vaddr >> 30) & 0x1ffu;
+	index[3] = (vaddr >> 39) & 0x1ffu;
+
+	pml4e = addr_gpa2hva(vm, vm->pgd);
+	TEST_ASSERT(pml4e[index[3]].present,
+		"Expected pml4e to be present for gva: 0x%08lx", vaddr);
+	TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
+		(rsvd_mask | (1ull << 7))) == 0,
+		"Unexpected reserved bits set.");
+
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
+	TEST_ASSERT(pdpe[index[2]].present,
+		"Expected pdpe to be present for gva: 0x%08lx", vaddr);
+	TEST_ASSERT(pdpe[index[2]].page_size == 0,
+		"Expected pdpe to map a pde not a 1-GByte page.");
+	TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+		"Unexpected reserved bits set.");
+
+	pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
+	TEST_ASSERT(pde[index[1]].present,
+		"Expected pde to be present for gva: 0x%08lx", vaddr);
+	TEST_ASSERT(pde[index[1]].page_size == 0,
+		"Expected pde to map a pte not a 2-MByte page.");
+	TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+		"Unexpected reserved bits set.");
+
+	pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
+	TEST_ASSERT(pte[index[0]].present,
+		"Expected pte to be present for gva: 0x%08lx", vaddr);
+
+	return &pte[index[0]];
+}
+
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
+{
+	struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+
+	return *(uint64_t *)pte;
+}
+
+void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+			     uint64_t pte)
+{
+	struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
+								  vaddr);
+
+	*(uint64_t *)new_pte = pte;
+}
+
 void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
 	struct pageUpperEntry *pml4e, *pml4e_start;
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
new file mode 100644
index 0000000000000..f070ff0224fa3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID	   1
+#define PAGE_SIZE  4096
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA	0x0000123456789000
+#define MEM_REGION_GPA	0x0000000700000000
+#define MEM_REGION_SLOT	10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(void)
+{
+	__asm__ __volatile__("flds (%[addr])"
+			     :: [addr]"r"(MEM_REGION_GVA));
+
+	GUEST_DONE();
+}
+
+static void run_guest(struct kvm_vm *vm)
+{
+	int rc;
+
+	rc = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+}
+
+/*
+ * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2,
+ * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)".
+ */
+#define GET_RM(insn_byte) (insn_byte & 0x7)
+#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3)
+#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6)
+
+/* Ensure we are dealing with a simple 2-byte flds instruction. */
+static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size)
+{
+	return insn_size >= 2 &&
+	       insn_bytes[0] == 0xd9 &&
+	       GET_REG(insn_bytes[1]) == 0x0 &&
+	       GET_MOD(insn_bytes[1]) == 0x0 &&
+	       /* Ensure there is no SIB byte. */
+	       GET_RM(insn_bytes[1]) != 0x4 &&
+	       /* Ensure there is no displacement byte. */
+	       GET_RM(insn_bytes[1]) != 0x5;
+}
+
+static void process_exit_on_emulation_error(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct kvm_regs regs;
+	uint8_t *insn_bytes;
+	uint8_t insn_size;
+	uint64_t flags;
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+		    "Unexpected suberror: %u",
+		    run->emulation_failure.suberror);
+
+	if (run->emulation_failure.ndata >= 1) {
+		flags = run->emulation_failure.flags;
+		if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) &&
+		    run->emulation_failure.ndata >= 3) {
+			insn_size = run->emulation_failure.insn_size;
+			insn_bytes = run->emulation_failure.insn_bytes;
+
+			TEST_ASSERT(insn_size <= 15 && insn_size > 0,
+				    "Unexpected instruction size: %u",
+				    insn_size);
+
+			TEST_ASSERT(is_flds(insn_bytes, insn_size),
+				    "Unexpected instruction.  Expected 'flds' (0xd9 /0)");
+
+			/*
+			 * If is_flds() succeeded then the instruction bytes
+			 * contained an flds instruction that is 2-bytes in
+			 * length (ie: no prefix, no SIB, no displacement).
+			 */
+			vcpu_regs_get(vm, VCPU_ID, &regs);
+			regs.rip += 2;
+			vcpu_regs_set(vm, VCPU_ID, &regs);
+		}
+	}
+}
+
+static void do_guest_assert(struct kvm_vm *vm, struct ucall *uc)
+{
+	TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], __FILE__,
+		  uc->args[1]);
+}
+
+static void check_for_guest_assert(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct ucall uc;
+
+	if (run->exit_reason == KVM_EXIT_IO &&
+	    get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
+		do_guest_assert(vm, &uc);
+	}
+}
+
+static void process_ucall_done(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct ucall uc;
+
+	check_for_guest_assert(vm);
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+		    "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+		    uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct ucall uc;
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	switch (get_ucall(vm, VCPU_ID, &uc)) {
+	case UCALL_SYNC:
+		break;
+	case UCALL_ABORT:
+		do_guest_assert(vm, &uc);
+		break;
+	case UCALL_DONE:
+		process_ucall_done(vm);
+		break;
+	default:
+		TEST_ASSERT(false, "Unexpected ucall");
+	}
+
+	return uc.cmd;
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_enable_cap emul_failure_cap = {
+		.cap = KVM_CAP_EXIT_ON_EMULATION_FAILURE,
+		.args[0] = 1,
+	};
+	struct kvm_cpuid_entry2 *entry;
+	struct kvm_cpuid2 *cpuid;
+	struct kvm_vm *vm;
+	uint64_t gpa, pte;
+	uint64_t *hva;
+	int rc;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	if (!kvm_check_cap(KVM_CAP_SMALLER_MAXPHYADDR)) {
+		printf("module parameter 'allow_smaller_maxphyaddr' is not set.  Skipping test.\n");
+		return 0;
+	}
+
+	cpuid = kvm_get_supported_cpuid();
+
+	entry = kvm_get_supported_cpuid_index(0x80000008, 0);
+	entry->eax = (entry->eax & 0xffffff00) | MAXPHYADDR;
+	set_cpuid(cpuid, entry);
+
+	vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+
+	rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+	TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+	vm_enable_cap(vm, &emul_failure_cap);
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    MEM_REGION_GPA, MEM_REGION_SLOT,
+				    MEM_REGION_SIZE / PAGE_SIZE, 0);
+	gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+				 MEM_REGION_GPA, MEM_REGION_SLOT);
+	TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+	virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+	memset(hva, 0, PAGE_SIZE);
+	pte = vm_get_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA);
+	vm_set_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA, pte | (1ull << 36));
+
+	run_guest(vm);
+	process_exit_on_emulation_error(vm);
+	run_guest(vm);
+
+	TEST_ASSERT(process_ucall(vm) == UCALL_DONE, "Expected UCALL_DONE");
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
-- 
GitLab


From 88213da2351479c529c368a9b763c4d52f02255b Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Wed, 23 Jun 2021 20:34:27 +0000
Subject: [PATCH 3755/3804] kvm: x86: disable the narrow guest module parameter
 on unload

When the kvm_intel module unloads the module parameter
'allow_smaller_maxphyaddr' is not cleared because the backing variable is
defined in the kvm module.  As a result, if the module parameter's state
was set before kvm_intel unloads, it will also be set when it reloads.
Explicitly clear the state in vmx_exit() to prevent this from happening.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Message-Id: <20210623203426.1891402-1-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
---
 arch/x86/kvm/vmx/vmx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5a1067c42f3a2..104bbbe2dfd06 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7925,6 +7925,8 @@ static void vmx_exit(void)
 	}
 #endif
 	vmx_cleanup_l1d_flush();
+
+	allow_smaller_maxphyaddr = false;
 }
 module_exit(vmx_exit);
 
-- 
GitLab


From a01b45e9d34d278129296daf91c4771143fa9dd9 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Wed, 23 Jun 2021 14:29:55 +0300
Subject: [PATCH 3756/3804] KVM: x86: rename apic_access_page_done to
 apic_access_memslot_enabled

This better reflects the purpose of this variable on AMD, since
on AMD the AVIC's memory slot can be enabled and disabled dynamically.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20210623113002.111448-4-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/svm/avic.c         | 4 ++--
 arch/x86/kvm/vmx/vmx.c          | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c9ec5c76c4381..974cbfb1eefe3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1054,7 +1054,7 @@ struct kvm_arch {
 	struct kvm_apic_map __rcu *apic_map;
 	atomic_t apic_map_dirty;
 
-	bool apic_access_page_done;
+	bool apic_access_memslot_enabled;
 	unsigned long apicv_inhibit_reasons;
 
 	gpa_t wall_clock;
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index a9abed054cd5c..1d01da64c333d 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -236,7 +236,7 @@ static int avic_update_access_page(struct kvm *kvm, bool activate)
 	 * APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
 	 * memory region. So, we need to ensure that kvm->mm == current->mm.
 	 */
-	if ((kvm->arch.apic_access_page_done == activate) ||
+	if ((kvm->arch.apic_access_memslot_enabled == activate) ||
 	    (kvm->mm != current->mm))
 		goto out;
 
@@ -249,7 +249,7 @@ static int avic_update_access_page(struct kvm *kvm, bool activate)
 		goto out;
 	}
 
-	kvm->arch.apic_access_page_done = activate;
+	kvm->arch.apic_access_memslot_enabled = activate;
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 104bbbe2dfd06..927a552393b96 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3621,7 +3621,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	int ret = 0;
 
 	mutex_lock(&kvm->slots_lock);
-	if (kvm->arch.apic_access_page_done)
+	if (kvm->arch.apic_access_memslot_enabled)
 		goto out;
 	hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
 				      APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
@@ -3641,7 +3641,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	 * is able to migrate it.
 	 */
 	put_page(page);
-	kvm->arch.apic_access_page_done = true;
+	kvm->arch.apic_access_memslot_enabled = true;
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return ret;
-- 
GitLab


From 1af11d098db18bfda5168dc407513726e1b1bdb3 Mon Sep 17 00:00:00 2001
From: gushengxian <gushengxian@yulong.com>
Date: Tue, 22 Jun 2021 04:55:07 -0700
Subject: [PATCH 3757/3804] ata: rb532_cf: remove redundant codes

The codes "dev_err(&pdev->dev, "no IRQ resource found\n");" is
redundant because platform_get_irq() already prints an error.

Signed-off-by: gushengxian <gushengxian@yulong.com>
Link: https://lore.kernel.org/r/20210622115507.359017-1-13145886936@163.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_rb532_cf.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index 303f8c375b3af..2e110aefe59b7 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c
@@ -115,10 +115,8 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no IRQ resource found\n");
+	if (irq < 0)
 		return irq;
-	}
 	if (!irq)
 		return -EINVAL;
 
-- 
GitLab


From f003c03bd29e6f46fef1b9a8e8d636ac732286d5 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:01 -0700
Subject: [PATCH 3758/3804] mm: page_vma_mapped_walk(): use page for pvmw->page

Patch series "mm: page_vma_mapped_walk() cleanup and THP fixes".

I've marked all of these for stable: many are merely cleanups, but I
think they are much better before the main fix than after.

This patch (of 11):

page_vma_mapped_walk() cleanup: sometimes the local copy of pvwm->page
was used, sometimes pvmw->page itself: use the local copy "page"
throughout.

Link: https://lkml.kernel.org/r/589b358c-febc-c88e-d4c2-7834b37fa7bf@google.com
Link: https://lkml.kernel.org/r/88e67645-f467-c279-bf5e-af4b5c6b13eb@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Will Deacon <will@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index e37bd43904af7..a6dbf714ca152 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -156,7 +156,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	if (pvmw->pte)
 		goto next_pte;
 
-	if (unlikely(PageHuge(pvmw->page))) {
+	if (unlikely(PageHuge(page))) {
 		/* when pud is not present, pte will be NULL */
 		pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
 		if (!pvmw->pte)
@@ -217,8 +217,7 @@ restart:
 		 * cannot return prematurely, while zap_huge_pmd() has
 		 * cleared *pmd but not decremented compound_mapcount().
 		 */
-		if ((pvmw->flags & PVMW_SYNC) &&
-		    PageTransCompound(pvmw->page)) {
+		if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) {
 			spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
 
 			spin_unlock(ptl);
@@ -234,9 +233,9 @@ restart:
 			return true;
 next_pte:
 		/* Seek to next pte only makes sense for THP */
-		if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
+		if (!PageTransHuge(page) || PageHuge(page))
 			return not_found(pvmw);
-		end = vma_address_end(pvmw->page, pvmw->vma);
+		end = vma_address_end(page, pvmw->vma);
 		do {
 			pvmw->address += PAGE_SIZE;
 			if (pvmw->address >= end)
-- 
GitLab


From 6d0fd5987657cb0c9756ce684e3a74c0f6351728 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:04 -0700
Subject: [PATCH 3759/3804] mm: page_vma_mapped_walk(): settle PageHuge on
 entry

page_vma_mapped_walk() cleanup: get the hugetlbfs PageHuge case out of
the way at the start, so no need to worry about it later.

Link: https://lkml.kernel.org/r/e31a483c-6d73-a6bb-26c5-43c3b880a2@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index a6dbf714ca152..7c0504641fb8b 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -153,10 +153,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	if (pvmw->pmd && !pvmw->pte)
 		return not_found(pvmw);
 
-	if (pvmw->pte)
-		goto next_pte;
-
 	if (unlikely(PageHuge(page))) {
+		/* The only possible mapping was handled on last iteration */
+		if (pvmw->pte)
+			return not_found(pvmw);
+
 		/* when pud is not present, pte will be NULL */
 		pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
 		if (!pvmw->pte)
@@ -168,6 +169,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 			return not_found(pvmw);
 		return true;
 	}
+
+	if (pvmw->pte)
+		goto next_pte;
 restart:
 	pgd = pgd_offset(mm, pvmw->address);
 	if (!pgd_present(*pgd))
@@ -233,7 +237,7 @@ restart:
 			return true;
 next_pte:
 		/* Seek to next pte only makes sense for THP */
-		if (!PageTransHuge(page) || PageHuge(page))
+		if (!PageTransHuge(page))
 			return not_found(pvmw);
 		end = vma_address_end(page, pvmw->vma);
 		do {
-- 
GitLab


From 3306d3119ceacc43ea8b141a73e21fea68eec30c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:07 -0700
Subject: [PATCH 3760/3804] mm: page_vma_mapped_walk(): use pmde for *pvmw->pmd

page_vma_mapped_walk() cleanup: re-evaluate pmde after taking lock, then
use it in subsequent tests, instead of repeatedly dereferencing pointer.

Link: https://lkml.kernel.org/r/53fbc9d-891e-46b2-cb4b-468c3b19238e@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 7c0504641fb8b..8f972b05a0de2 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -191,18 +191,19 @@ restart:
 	pmde = READ_ONCE(*pvmw->pmd);
 	if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
 		pvmw->ptl = pmd_lock(mm, pvmw->pmd);
-		if (likely(pmd_trans_huge(*pvmw->pmd))) {
+		pmde = *pvmw->pmd;
+		if (likely(pmd_trans_huge(pmde))) {
 			if (pvmw->flags & PVMW_MIGRATION)
 				return not_found(pvmw);
-			if (pmd_page(*pvmw->pmd) != page)
+			if (pmd_page(pmde) != page)
 				return not_found(pvmw);
 			return true;
-		} else if (!pmd_present(*pvmw->pmd)) {
+		} else if (!pmd_present(pmde)) {
 			if (thp_migration_supported()) {
 				if (!(pvmw->flags & PVMW_MIGRATION))
 					return not_found(pvmw);
-				if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
-					swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
+				if (is_migration_entry(pmd_to_swp_entry(pmde))) {
+					swp_entry_t entry = pmd_to_swp_entry(pmde);
 
 					if (migration_entry_to_page(entry) != page)
 						return not_found(pvmw);
-- 
GitLab


From e2e1d4076c77b3671cf8ce702535ae7dee3acf89 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:10 -0700
Subject: [PATCH 3761/3804] mm: page_vma_mapped_walk(): prettify PVMW_MIGRATION
 block

page_vma_mapped_walk() cleanup: rearrange the !pmd_present() block to
follow the same "return not_found, return not_found, return true"
pattern as the block above it (note: returning not_found there is never
premature, since existence or prior existence of huge pmd guarantees
good alignment).

Link: https://lkml.kernel.org/r/378c8650-1488-2edf-9647-32a53cf2e21@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 8f972b05a0de2..261fc929b08d6 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -198,24 +198,22 @@ restart:
 			if (pmd_page(pmde) != page)
 				return not_found(pvmw);
 			return true;
-		} else if (!pmd_present(pmde)) {
-			if (thp_migration_supported()) {
-				if (!(pvmw->flags & PVMW_MIGRATION))
-					return not_found(pvmw);
-				if (is_migration_entry(pmd_to_swp_entry(pmde))) {
-					swp_entry_t entry = pmd_to_swp_entry(pmde);
+		}
+		if (!pmd_present(pmde)) {
+			swp_entry_t entry;
 
-					if (migration_entry_to_page(entry) != page)
-						return not_found(pvmw);
-					return true;
-				}
-			}
-			return not_found(pvmw);
-		} else {
-			/* THP pmd was split under us: handle on pte level */
-			spin_unlock(pvmw->ptl);
-			pvmw->ptl = NULL;
+			if (!thp_migration_supported() ||
+			    !(pvmw->flags & PVMW_MIGRATION))
+				return not_found(pvmw);
+			entry = pmd_to_swp_entry(pmde);
+			if (!is_migration_entry(entry) ||
+			    migration_entry_to_page(entry) != page)
+				return not_found(pvmw);
+			return true;
 		}
+		/* THP pmd was split under us: handle on pte level */
+		spin_unlock(pvmw->ptl);
+		pvmw->ptl = NULL;
 	} else if (!pmd_present(pmde)) {
 		/*
 		 * If PVMW_SYNC, take and drop THP pmd lock so that we
-- 
GitLab


From 448282487483d6fa5b2eeeafaa0acc681e544a9c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:14 -0700
Subject: [PATCH 3762/3804] mm: page_vma_mapped_walk(): crossing page table
 boundary

page_vma_mapped_walk() cleanup: adjust the test for crossing page table
boundary - I believe pvmw->address is always page-aligned, but nothing
else here assumed that; and remember to reset pvmw->pte to NULL after
unmapping the page table, though I never saw any bug from that.

Link: https://lkml.kernel.org/r/799b3f9c-2a9e-dfef-5d89-26e9f76fd97@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 261fc929b08d6..9c87b3090a1ba 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -244,16 +244,16 @@ next_pte:
 			if (pvmw->address >= end)
 				return not_found(pvmw);
 			/* Did we cross page table boundary? */
-			if (pvmw->address % PMD_SIZE == 0) {
-				pte_unmap(pvmw->pte);
+			if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
 				if (pvmw->ptl) {
 					spin_unlock(pvmw->ptl);
 					pvmw->ptl = NULL;
 				}
+				pte_unmap(pvmw->pte);
+				pvmw->pte = NULL;
 				goto restart;
-			} else {
-				pvmw->pte++;
 			}
+			pvmw->pte++;
 		} while (pte_none(*pvmw->pte));
 
 		if (!pvmw->ptl) {
-- 
GitLab


From b3807a91aca7d21c05d5790612e49969117a72b9 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:17 -0700
Subject: [PATCH 3763/3804] mm: page_vma_mapped_walk(): add a level of
 indentation

page_vma_mapped_walk() cleanup: add a level of indentation to much of
the body, making no functional change in this commit, but reducing the
later diff when this is all converted to a loop.

[hughd@google.com: : page_vma_mapped_walk(): add a level of indentation fix]
  Link: https://lkml.kernel.org/r/7f817555-3ce1-c785-e438-87d8efdcaf26@google.com

Link: https://lkml.kernel.org/r/efde211-f3e2-fe54-977-ef481419e7f3@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 105 ++++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 50 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 9c87b3090a1ba..5b5832d063382 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -173,62 +173,67 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	if (pvmw->pte)
 		goto next_pte;
 restart:
-	pgd = pgd_offset(mm, pvmw->address);
-	if (!pgd_present(*pgd))
-		return false;
-	p4d = p4d_offset(pgd, pvmw->address);
-	if (!p4d_present(*p4d))
-		return false;
-	pud = pud_offset(p4d, pvmw->address);
-	if (!pud_present(*pud))
-		return false;
-	pvmw->pmd = pmd_offset(pud, pvmw->address);
-	/*
-	 * Make sure the pmd value isn't cached in a register by the
-	 * compiler and used as a stale value after we've observed a
-	 * subsequent update.
-	 */
-	pmde = READ_ONCE(*pvmw->pmd);
-	if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
-		pvmw->ptl = pmd_lock(mm, pvmw->pmd);
-		pmde = *pvmw->pmd;
-		if (likely(pmd_trans_huge(pmde))) {
-			if (pvmw->flags & PVMW_MIGRATION)
-				return not_found(pvmw);
-			if (pmd_page(pmde) != page)
-				return not_found(pvmw);
-			return true;
-		}
-		if (!pmd_present(pmde)) {
-			swp_entry_t entry;
+	{
+		pgd = pgd_offset(mm, pvmw->address);
+		if (!pgd_present(*pgd))
+			return false;
+		p4d = p4d_offset(pgd, pvmw->address);
+		if (!p4d_present(*p4d))
+			return false;
+		pud = pud_offset(p4d, pvmw->address);
+		if (!pud_present(*pud))
+			return false;
 
-			if (!thp_migration_supported() ||
-			    !(pvmw->flags & PVMW_MIGRATION))
-				return not_found(pvmw);
-			entry = pmd_to_swp_entry(pmde);
-			if (!is_migration_entry(entry) ||
-			    migration_entry_to_page(entry) != page)
-				return not_found(pvmw);
-			return true;
-		}
-		/* THP pmd was split under us: handle on pte level */
-		spin_unlock(pvmw->ptl);
-		pvmw->ptl = NULL;
-	} else if (!pmd_present(pmde)) {
+		pvmw->pmd = pmd_offset(pud, pvmw->address);
 		/*
-		 * If PVMW_SYNC, take and drop THP pmd lock so that we
-		 * cannot return prematurely, while zap_huge_pmd() has
-		 * cleared *pmd but not decremented compound_mapcount().
+		 * Make sure the pmd value isn't cached in a register by the
+		 * compiler and used as a stale value after we've observed a
+		 * subsequent update.
 		 */
-		if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) {
-			spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+		pmde = READ_ONCE(*pvmw->pmd);
 
-			spin_unlock(ptl);
+		if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+			pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+			pmde = *pvmw->pmd;
+			if (likely(pmd_trans_huge(pmde))) {
+				if (pvmw->flags & PVMW_MIGRATION)
+					return not_found(pvmw);
+				if (pmd_page(pmde) != page)
+					return not_found(pvmw);
+				return true;
+			}
+			if (!pmd_present(pmde)) {
+				swp_entry_t entry;
+
+				if (!thp_migration_supported() ||
+				    !(pvmw->flags & PVMW_MIGRATION))
+					return not_found(pvmw);
+				entry = pmd_to_swp_entry(pmde);
+				if (!is_migration_entry(entry) ||
+				    migration_entry_to_page(entry) != page)
+					return not_found(pvmw);
+				return true;
+			}
+			/* THP pmd was split under us: handle on pte level */
+			spin_unlock(pvmw->ptl);
+			pvmw->ptl = NULL;
+		} else if (!pmd_present(pmde)) {
+			/*
+			 * If PVMW_SYNC, take and drop THP pmd lock so that we
+			 * cannot return prematurely, while zap_huge_pmd() has
+			 * cleared *pmd but not decremented compound_mapcount().
+			 */
+			if ((pvmw->flags & PVMW_SYNC) &&
+			    PageTransCompound(page)) {
+				spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+
+				spin_unlock(ptl);
+			}
+			return false;
 		}
-		return false;
+		if (!map_pte(pvmw))
+			goto next_pte;
 	}
-	if (!map_pte(pvmw))
-		goto next_pte;
 	while (1) {
 		unsigned long end;
 
-- 
GitLab


From 474466301dfd8b39a10c01db740645f3f7ae9a28 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:20 -0700
Subject: [PATCH 3764/3804] mm: page_vma_mapped_walk(): use goto instead of
 while (1)

page_vma_mapped_walk() cleanup: add a label this_pte, matching next_pte,
and use "goto this_pte", in place of the "while (1)" loop at the end.

Link: https://lkml.kernel.org/r/a52b234a-851-3616-2525-f42736e8934@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 5b5832d063382..6b9320340f1b4 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -144,6 +144,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 {
 	struct mm_struct *mm = pvmw->vma->vm_mm;
 	struct page *page = pvmw->page;
+	unsigned long end;
 	pgd_t *pgd;
 	p4d_t *p4d;
 	pud_t *pud;
@@ -233,10 +234,7 @@ restart:
 		}
 		if (!map_pte(pvmw))
 			goto next_pte;
-	}
-	while (1) {
-		unsigned long end;
-
+this_pte:
 		if (check_pte(pvmw))
 			return true;
 next_pte:
@@ -265,6 +263,7 @@ next_pte:
 			pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
 			spin_lock(pvmw->ptl);
 		}
+		goto this_pte;
 	}
 }
 
-- 
GitLab


From a765c417d876cc635f628365ec9aa6f09470069a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:23 -0700
Subject: [PATCH 3765/3804] mm: page_vma_mapped_walk(): get vma_address_end()
 earlier

page_vma_mapped_walk() cleanup: get THP's vma_address_end() at the
start, rather than later at next_pte.

It's a little unnecessary overhead on the first call, but makes for a
simpler loop in the following commit.

Link: https://lkml.kernel.org/r/4542b34d-862f-7cb4-bb22-e0df6ce830a2@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 6b9320340f1b4..df89ea3df4f42 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -171,6 +171,15 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 		return true;
 	}
 
+	/*
+	 * Seek to next pte only makes sense for THP.
+	 * But more important than that optimization, is to filter out
+	 * any PageKsm page: whose page->index misleads vma_address()
+	 * and vma_address_end() to disaster.
+	 */
+	end = PageTransCompound(page) ?
+		vma_address_end(page, pvmw->vma) :
+		pvmw->address + PAGE_SIZE;
 	if (pvmw->pte)
 		goto next_pte;
 restart:
@@ -238,10 +247,6 @@ this_pte:
 		if (check_pte(pvmw))
 			return true;
 next_pte:
-		/* Seek to next pte only makes sense for THP */
-		if (!PageTransHuge(page))
-			return not_found(pvmw);
-		end = vma_address_end(page, pvmw->vma);
 		do {
 			pvmw->address += PAGE_SIZE;
 			if (pvmw->address >= end)
-- 
GitLab


From a9a7504d9beaf395481faa91e70e2fd08f7a3dde Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:26 -0700
Subject: [PATCH 3766/3804] mm/thp: fix page_vma_mapped_walk() if THP mapped by
 ptes

Running certain tests with a DEBUG_VM kernel would crash within hours,
on the total_mapcount BUG() in split_huge_page_to_list(), while trying
to free up some memory by punching a hole in a shmem huge page: split's
try_to_unmap() was unable to find all the mappings of the page (which,
on a !DEBUG_VM kernel, would then keep the huge page pinned in memory).

Crash dumps showed two tail pages of a shmem huge page remained mapped
by pte: ptes in a non-huge-aligned vma of a gVisor process, at the end
of a long unmapped range; and no page table had yet been allocated for
the head of the huge page to be mapped into.

Although designed to handle these odd misaligned huge-page-mapped-by-pte
cases, page_vma_mapped_walk() falls short by returning false prematurely
when !pmd_present or !pud_present or !p4d_present or !pgd_present: there
are cases when a huge page may span the boundary, with ptes present in
the next.

Restructure page_vma_mapped_walk() as a loop to continue in these cases,
while keeping its layout much as before.  Add a step_forward() helper to
advance pvmw->address across those boundaries: originally I tried to use
mm's standard p?d_addr_end() macros, but hit the same crash 512 times
less often: because of the way redundant levels are folded together, but
folded differently in different configurations, it was just too
difficult to use them correctly; and step_forward() is simpler anyway.

Link: https://lkml.kernel.org/r/fedb8632-1798-de42-f39e-873551d5bc81@google.com
Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Wang Yugui <wangyugui@e16-tech.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index df89ea3df4f42..a4e962b510c7a 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -116,6 +116,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
 	return pfn_is_match(pvmw->page, pfn);
 }
 
+static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
+{
+	pvmw->address = (pvmw->address + size) & ~(size - 1);
+	if (!pvmw->address)
+		pvmw->address = ULONG_MAX;
+}
+
 /**
  * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
  * @pvmw->address
@@ -183,16 +190,22 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	if (pvmw->pte)
 		goto next_pte;
 restart:
-	{
+	do {
 		pgd = pgd_offset(mm, pvmw->address);
-		if (!pgd_present(*pgd))
-			return false;
+		if (!pgd_present(*pgd)) {
+			step_forward(pvmw, PGDIR_SIZE);
+			continue;
+		}
 		p4d = p4d_offset(pgd, pvmw->address);
-		if (!p4d_present(*p4d))
-			return false;
+		if (!p4d_present(*p4d)) {
+			step_forward(pvmw, P4D_SIZE);
+			continue;
+		}
 		pud = pud_offset(p4d, pvmw->address);
-		if (!pud_present(*pud))
-			return false;
+		if (!pud_present(*pud)) {
+			step_forward(pvmw, PUD_SIZE);
+			continue;
+		}
 
 		pvmw->pmd = pmd_offset(pud, pvmw->address);
 		/*
@@ -239,7 +252,8 @@ restart:
 
 				spin_unlock(ptl);
 			}
-			return false;
+			step_forward(pvmw, PMD_SIZE);
+			continue;
 		}
 		if (!map_pte(pvmw))
 			goto next_pte;
@@ -269,7 +283,9 @@ next_pte:
 			spin_lock(pvmw->ptl);
 		}
 		goto this_pte;
-	}
+	} while (pvmw->address < end);
+
+	return false;
 }
 
 /**
-- 
GitLab


From a7a69d8ba88d8dcee7ef00e91d413a4bd003a814 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:30 -0700
Subject: [PATCH 3767/3804] mm/thp: another PVMW_SYNC fix in
 page_vma_mapped_walk()

Aha! Shouldn't that quick scan over pte_none()s make sure that it holds
ptlock in the PVMW_SYNC case? That too might have been responsible for
BUGs or WARNs in split_huge_page_to_list() or its unmap_page(), though
I've never seen any.

Link: https://lkml.kernel.org/r/1bdf384c-8137-a149-2a1e-475a4791c3c@google.com
Link: https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Wang Yugui <wangyugui@e16-tech.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_vma_mapped.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index a4e962b510c7a..a4435311754b0 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -276,6 +276,10 @@ next_pte:
 				goto restart;
 			}
 			pvmw->pte++;
+			if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
+				pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+				spin_lock(pvmw->ptl);
+			}
 		} while (pte_none(*pvmw->pte));
 
 		if (!pvmw->ptl) {
-- 
GitLab


From 8fd0c1b0647a6bda4067ee0cd61e8395954b6f28 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Thu, 24 Jun 2021 18:39:33 -0700
Subject: [PATCH 3768/3804] nilfs2: fix memory leak in
 nilfs_sysfs_delete_device_group

My local syzbot instance hit memory leak in nilfs2.  The problem was in
missing kobject_put() in nilfs_sysfs_delete_device_group().

kobject_del() does not call kobject_cleanup() for passed kobject and it
leads to leaking duped kobject name if kobject_put() was not called.

Fail log:

  BUG: memory leak
  unreferenced object 0xffff8880596171e0 (size 8):
  comm "syz-executor379", pid 8381, jiffies 4294980258 (age 21.100s)
  hex dump (first 8 bytes):
    6c 6f 6f 70 30 00 00 00                          loop0...
  backtrace:
     kstrdup+0x36/0x70 mm/util.c:60
     kstrdup_const+0x53/0x80 mm/util.c:83
     kvasprintf_const+0x108/0x190 lib/kasprintf.c:48
     kobject_set_name_vargs+0x56/0x150 lib/kobject.c:289
     kobject_add_varg lib/kobject.c:384 [inline]
     kobject_init_and_add+0xc9/0x160 lib/kobject.c:473
     nilfs_sysfs_create_device_group+0x150/0x800 fs/nilfs2/sysfs.c:999
     init_nilfs+0xe26/0x12b0 fs/nilfs2/the_nilfs.c:637

Link: https://lkml.kernel.org/r/20210612140559.20022-1-paskripkin@gmail.com
Fixes: da7141fb78db ("nilfs2: add /sys/fs/nilfs2/<device> group")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Acked-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: Michael L. Semon <mlsemon35@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/sysfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 303d71430bdd1..9c6c0e2e5880a 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -1053,6 +1053,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs)
 	nilfs_sysfs_delete_superblock_group(nilfs);
 	nilfs_sysfs_delete_segctor_group(nilfs);
 	kobject_del(&nilfs->ns_dev_kobj);
+	kobject_put(&nilfs->ns_dev_kobj);
 	kfree(nilfs->ns_dev_subgroups);
 }
 
-- 
GitLab


From 15a64f5a8870b5610b616a4aa753262dfaa5d76e Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
Date: Thu, 24 Jun 2021 18:39:36 -0700
Subject: [PATCH 3769/3804] mm/vmalloc: add vmalloc_no_huge

Patch series "mm: add vmalloc_no_huge and use it", v4.

Add vmalloc_no_huge() and export it, so modules can allocate memory with
small pages.

Use the newly added vmalloc_no_huge() in KVM on s390 to get around a
hardware limitation.

This patch (of 2):

Commit 121e6f3258fe3 ("mm/vmalloc: hugepage vmalloc mappings") added
support for hugepage vmalloc mappings, it also added the flag
VM_NO_HUGE_VMAP for __vmalloc_node_range to request the allocation to be
performed with 0-order non-huge pages.

This flag is not accessible when calling vmalloc, the only option is to
call directly __vmalloc_node_range, which is not exported.

This means that a module can't vmalloc memory with small pages.

Case in point: KVM on s390x needs to vmalloc a large area, and it needs
to be mapped with non-huge pages, because of a hardware limitation.

This patch adds the function vmalloc_no_huge, which works like vmalloc,
but it is guaranteed to always back the mapping using small pages.  This
new function is exported, therefore it is usable by modules.

[akpm@linux-foundation.org: whitespace fixes, per Christoph]

Link: https://lkml.kernel.org/r/20210614132357.10202-1-imbrenda@linux.ibm.com
Link: https://lkml.kernel.org/r/20210614132357.10202-2-imbrenda@linux.ibm.com
Fixes: 121e6f3258fe3 ("mm/vmalloc: hugepage vmalloc mappings")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h |  1 +
 mm/vmalloc.c            | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 4d668abb63917..bfaaf0b6fa766 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -135,6 +135,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
 			const void *caller);
 void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
 		int node, const void *caller);
+void *vmalloc_no_huge(unsigned long size);
 
 extern void vfree(const void *addr);
 extern void vfree_atomic(const void *addr);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a13ac524f6ff8..fada19e17814c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2998,6 +2998,23 @@ void *vmalloc(unsigned long size)
 }
 EXPORT_SYMBOL(vmalloc);
 
+/**
+ * vmalloc_no_huge - allocate virtually contiguous memory using small pages
+ * @size:    allocation size
+ *
+ * Allocate enough non-huge pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+void *vmalloc_no_huge(unsigned long size)
+{
+	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+				    GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
+				    NUMA_NO_NODE, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(vmalloc_no_huge);
+
 /**
  * vzalloc - allocate virtually contiguous memory with zero fill
  * @size:    allocation size
-- 
GitLab


From 185cca24e977411495d57ec71e43350b69c08e63 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
Date: Thu, 24 Jun 2021 18:39:39 -0700
Subject: [PATCH 3770/3804] KVM: s390: prepare for hugepage vmalloc

The Create Secure Configuration Ultravisor Call does not support using
large pages for the virtual memory area.  This is a hardware limitation.

This patch replaces the vzalloc call with an almost equivalent call to
the newly introduced vmalloc_no_huge function, which guarantees that
only small pages will be used for the backing.

The new call will not clear the allocated memory, but that has never
been an actual requirement.

Link: https://lkml.kernel.org/r/20210614132357.10202-3-imbrenda@linux.ibm.com
Fixes: 121e6f3258fe3 ("mm/vmalloc: hugepage vmalloc mappings")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/kvm/pv.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 813b6e93dc836..c8841f476e913 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -140,7 +140,12 @@ static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
 	/* Allocate variable storage */
 	vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
 	vlen += uv_info.guest_virt_base_stor_len;
-	kvm->arch.pv.stor_var = vzalloc(vlen);
+	/*
+	 * The Create Secure Configuration Ultravisor Call does not support
+	 * using large pages for the virtual memory area.
+	 * This is a hardware limitation.
+	 */
+	kvm->arch.pv.stor_var = vmalloc_no_huge(vlen);
 	if (!kvm->arch.pv.stor_var)
 		goto out_err;
 	return 0;
-- 
GitLab


From 7ca3027b726be681c8e6292b5a81ebcde7581710 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Thu, 24 Jun 2021 18:39:42 -0700
Subject: [PATCH 3771/3804] mm/vmalloc: unbreak kasan vmalloc support

In commit 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings"),
__vmalloc_node_range was changed such that __get_vm_area_node was no
longer called with the requested/real size of the vmalloc allocation,
but rather with a rounded-up size.

This means that __get_vm_area_node called kasan_unpoision_vmalloc() with
a rounded up size rather than the real size.  This led to it allowing
access to too much memory and so missing vmalloc OOBs and failing the
kasan kunit tests.

Pass the real size and the desired shift into __get_vm_area_node.  This
allows it to round up the size for the underlying allocators while still
unpoisioning the correct quantity of shadow memory.

Adjust the other call-sites to pass in PAGE_SHIFT for the shift value.

Link: https://lkml.kernel.org/r/20210617081330.98629-1-dja@axtens.net
Link: https://bugzilla.kernel.org/show_bug.cgi?id=213335
Fixes: 121e6f3258fe ("mm/vmalloc: hugepage vmalloc mappings")
Signed-off-by: Daniel Axtens <dja@axtens.net>
Tested-by: David Gow <davidgow@google.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Tested-by: Andrey Konovalov <andreyknvl@gmail.com>
Acked-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index fada19e17814c..d0a7d89be091e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2344,15 +2344,16 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm)
 }
 
 static struct vm_struct *__get_vm_area_node(unsigned long size,
-		unsigned long align, unsigned long flags, unsigned long start,
-		unsigned long end, int node, gfp_t gfp_mask, const void *caller)
+		unsigned long align, unsigned long shift, unsigned long flags,
+		unsigned long start, unsigned long end, int node,
+		gfp_t gfp_mask, const void *caller)
 {
 	struct vmap_area *va;
 	struct vm_struct *area;
 	unsigned long requested_size = size;
 
 	BUG_ON(in_interrupt());
-	size = PAGE_ALIGN(size);
+	size = ALIGN(size, 1ul << shift);
 	if (unlikely(!size))
 		return NULL;
 
@@ -2384,8 +2385,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
 				       unsigned long start, unsigned long end,
 				       const void *caller)
 {
-	return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE,
-				  GFP_KERNEL, caller);
+	return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end,
+				  NUMA_NO_NODE, GFP_KERNEL, caller);
 }
 
 /**
@@ -2401,7 +2402,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags,
  */
 struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
 {
-	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
+	return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
+				  VMALLOC_START, VMALLOC_END,
 				  NUMA_NO_NODE, GFP_KERNEL,
 				  __builtin_return_address(0));
 }
@@ -2409,7 +2411,8 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
 struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
 				const void *caller)
 {
-	return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
+	return __get_vm_area_node(size, 1, PAGE_SHIFT, flags,
+				  VMALLOC_START, VMALLOC_END,
 				  NUMA_NO_NODE, GFP_KERNEL, caller);
 }
 
@@ -2902,9 +2905,9 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 	}
 
 again:
-	size = PAGE_ALIGN(size);
-	area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
-				vm_flags, start, end, node, gfp_mask, caller);
+	area = __get_vm_area_node(real_size, align, shift, VM_ALLOC |
+				  VM_UNINITIALIZED | vm_flags, start, end, node,
+				  gfp_mask, caller);
 	if (!area) {
 		warn_alloc(gfp_mask, NULL,
 			   "vmalloc size %lu allocation failure: "
@@ -2923,6 +2926,7 @@ again:
 	 */
 	clear_vm_uninitialized_flag(area);
 
+	size = PAGE_ALIGN(size);
 	kmemleak_vmalloc(area, size, gfp_mask);
 
 	return addr;
-- 
GitLab


From 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Thu, 24 Jun 2021 18:39:45 -0700
Subject: [PATCH 3772/3804] kthread_worker: split code for canceling the
 delayed work timer

Patch series "kthread_worker: Fix race between kthread_mod_delayed_work()
and kthread_cancel_delayed_work_sync()".

This patchset fixes the race between kthread_mod_delayed_work() and
kthread_cancel_delayed_work_sync() including proper return value
handling.

This patch (of 2):

Simple code refactoring as a preparation step for fixing a race between
kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync().

It does not modify the existing behavior.

Link: https://lkml.kernel.org/r/20210610133051.15337-2-pmladek@suse.com
Signed-off-by: Petr Mladek <pmladek@suse.com>
Cc: <jenhaochen@google.com>
Cc: Martin Liu <liumartin@google.com>
Cc: Minchan Kim <minchan@google.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kthread.c | 46 +++++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index fe3f2a40d61e8..121a0e1fc6595 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1092,6 +1092,33 @@ void kthread_flush_work(struct kthread_work *work)
 }
 EXPORT_SYMBOL_GPL(kthread_flush_work);
 
+/*
+ * Make sure that the timer is neither set nor running and could
+ * not manipulate the work list_head any longer.
+ *
+ * The function is called under worker->lock. The lock is temporary
+ * released but the timer can't be set again in the meantime.
+ */
+static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
+					      unsigned long *flags)
+{
+	struct kthread_delayed_work *dwork =
+		container_of(work, struct kthread_delayed_work, work);
+	struct kthread_worker *worker = work->worker;
+
+	/*
+	 * del_timer_sync() must be called to make sure that the timer
+	 * callback is not running. The lock must be temporary released
+	 * to avoid a deadlock with the callback. In the meantime,
+	 * any queuing is blocked by setting the canceling counter.
+	 */
+	work->canceling++;
+	raw_spin_unlock_irqrestore(&worker->lock, *flags);
+	del_timer_sync(&dwork->timer);
+	raw_spin_lock_irqsave(&worker->lock, *flags);
+	work->canceling--;
+}
+
 /*
  * This function removes the work from the worker queue. Also it makes sure
  * that it won't get queued later via the delayed work's timer.
@@ -1106,23 +1133,8 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
 				  unsigned long *flags)
 {
 	/* Try to cancel the timer if exists. */
-	if (is_dwork) {
-		struct kthread_delayed_work *dwork =
-			container_of(work, struct kthread_delayed_work, work);
-		struct kthread_worker *worker = work->worker;
-
-		/*
-		 * del_timer_sync() must be called to make sure that the timer
-		 * callback is not running. The lock must be temporary released
-		 * to avoid a deadlock with the callback. In the meantime,
-		 * any queuing is blocked by setting the canceling counter.
-		 */
-		work->canceling++;
-		raw_spin_unlock_irqrestore(&worker->lock, *flags);
-		del_timer_sync(&dwork->timer);
-		raw_spin_lock_irqsave(&worker->lock, *flags);
-		work->canceling--;
-	}
+	if (is_dwork)
+		kthread_cancel_delayed_work_timer(work, flags);
 
 	/*
 	 * Try to remove the work from a worker list. It might either
-- 
GitLab


From 5fa54346caf67b4b1b10b1f390316ae466da4d53 Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Thu, 24 Jun 2021 18:39:48 -0700
Subject: [PATCH 3773/3804] kthread: prevent deadlock when
 kthread_mod_delayed_work() races with kthread_cancel_delayed_work_sync()

The system might hang with the following backtrace:

	schedule+0x80/0x100
	schedule_timeout+0x48/0x138
	wait_for_common+0xa4/0x134
	wait_for_completion+0x1c/0x2c
	kthread_flush_work+0x114/0x1cc
	kthread_cancel_work_sync.llvm.16514401384283632983+0xe8/0x144
	kthread_cancel_delayed_work_sync+0x18/0x2c
	xxxx_pm_notify+0xb0/0xd8
	blocking_notifier_call_chain_robust+0x80/0x194
	pm_notifier_call_chain_robust+0x28/0x4c
	suspend_prepare+0x40/0x260
	enter_state+0x80/0x3f4
	pm_suspend+0x60/0xdc
	state_store+0x108/0x144
	kobj_attr_store+0x38/0x88
	sysfs_kf_write+0x64/0xc0
	kernfs_fop_write_iter+0x108/0x1d0
	vfs_write+0x2f4/0x368
	ksys_write+0x7c/0xec

It is caused by the following race between kthread_mod_delayed_work()
and kthread_cancel_delayed_work_sync():

CPU0				CPU1

Context: Thread A		Context: Thread B

kthread_mod_delayed_work()
  spin_lock()
  __kthread_cancel_work()
     spin_unlock()
     del_timer_sync()
				kthread_cancel_delayed_work_sync()
				  spin_lock()
				  __kthread_cancel_work()
				    spin_unlock()
				    del_timer_sync()
				    spin_lock()

				  work->canceling++
				  spin_unlock
     spin_lock()
   queue_delayed_work()
     // dwork is put into the worker->delayed_work_list

   spin_unlock()

				  kthread_flush_work()
     // flush_work is put at the tail of the dwork

				    wait_for_completion()

Context: IRQ

  kthread_delayed_work_timer_fn()
    spin_lock()
    list_del_init(&work->node);
    spin_unlock()

BANG: flush_work is not longer linked and will never get proceed.

The problem is that kthread_mod_delayed_work() checks work->canceling
flag before canceling the timer.

A simple solution is to (re)check work->canceling after
__kthread_cancel_work().  But then it is not clear what should be
returned when __kthread_cancel_work() removed the work from the queue
(list) and it can't queue it again with the new @delay.

The return value might be used for reference counting.  The caller has
to know whether a new work has been queued or an existing one was
replaced.

The proper solution is that kthread_mod_delayed_work() will remove the
work from the queue (list) _only_ when work->canceling is not set.  The
flag must be checked after the timer is stopped and the remaining
operations can be done under worker->lock.

Note that kthread_mod_delayed_work() could remove the timer and then
bail out.  It is fine.  The other canceling caller needs to cancel the
timer as well.  The important thing is that the queue (list)
manipulation is done atomically under worker->lock.

Link: https://lkml.kernel.org/r/20210610133051.15337-3-pmladek@suse.com
Fixes: 9a6b06c8d9a220860468a ("kthread: allow to modify delayed kthread work")
Signed-off-by: Petr Mladek <pmladek@suse.com>
Reported-by: Martin Liu <liumartin@google.com>
Cc: <jenhaochen@google.com>
Cc: Minchan Kim <minchan@google.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kthread.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 121a0e1fc6595..0fccf7d0c6a16 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1120,8 +1120,11 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
 }
 
 /*
- * This function removes the work from the worker queue. Also it makes sure
- * that it won't get queued later via the delayed work's timer.
+ * This function removes the work from the worker queue.
+ *
+ * It is called under worker->lock. The caller must make sure that
+ * the timer used by delayed work is not running, e.g. by calling
+ * kthread_cancel_delayed_work_timer().
  *
  * The work might still be in use when this function finishes. See the
  * current_work proceed by the worker.
@@ -1129,13 +1132,8 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
  * Return: %true if @work was pending and successfully canceled,
  *	%false if @work was not pending
  */
-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
-				  unsigned long *flags)
+static bool __kthread_cancel_work(struct kthread_work *work)
 {
-	/* Try to cancel the timer if exists. */
-	if (is_dwork)
-		kthread_cancel_delayed_work_timer(work, flags);
-
 	/*
 	 * Try to remove the work from a worker list. It might either
 	 * be from worker->work_list or from worker->delayed_work_list.
@@ -1188,11 +1186,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
 	/* Work must not be used with >1 worker, see kthread_queue_work() */
 	WARN_ON_ONCE(work->worker != worker);
 
-	/* Do not fight with another command that is canceling this work. */
+	/*
+	 * Temporary cancel the work but do not fight with another command
+	 * that is canceling the work as well.
+	 *
+	 * It is a bit tricky because of possible races with another
+	 * mod_delayed_work() and cancel_delayed_work() callers.
+	 *
+	 * The timer must be canceled first because worker->lock is released
+	 * when doing so. But the work can be removed from the queue (list)
+	 * only when it can be queued again so that the return value can
+	 * be used for reference counting.
+	 */
+	kthread_cancel_delayed_work_timer(work, &flags);
 	if (work->canceling)
 		goto out;
+	ret = __kthread_cancel_work(work);
 
-	ret = __kthread_cancel_work(work, true, &flags);
 fast_queue:
 	__kthread_queue_delayed_work(worker, dwork, delay);
 out:
@@ -1214,7 +1224,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
 	WARN_ON_ONCE(work->worker != worker);
 
-	ret = __kthread_cancel_work(work, is_dwork, &flags);
+	if (is_dwork)
+		kthread_cancel_delayed_work_timer(work, &flags);
+
+	ret = __kthread_cancel_work(work);
 
 	if (worker->current_work != work)
 		goto out_fast;
-- 
GitLab


From fe19bd3dae3d15d2fbfdb3de8839a6ea0fe94264 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 24 Jun 2021 18:39:52 -0700
Subject: [PATCH 3774/3804] mm, futex: fix shared futex pgoff on shmem huge
 page

If more than one futex is placed on a shmem huge page, it can happen
that waking the second wakes the first instead, and leaves the second
waiting: the key's shared.pgoff is wrong.

When 3.11 commit 13d60f4b6ab5 ("futex: Take hugepages into account when
generating futex_key"), the only shared huge pages came from hugetlbfs,
and the code added to deal with its exceptional page->index was put into
hugetlb source.  Then that was missed when 4.8 added shmem huge pages.

page_to_pgoff() is what others use for this nowadays: except that, as
currently written, it gives the right answer on hugetlbfs head, but
nonsense on hugetlbfs tails.  Fix that by calling hugetlbfs-specific
hugetlb_basepage_index() on PageHuge tails as well as on head.

Yes, it's unconventional to declare hugetlb_basepage_index() there in
pagemap.h, rather than in hugetlb.h; but I do not expect anything but
page_to_pgoff() ever to need it.

[akpm@linux-foundation.org: give hugetlb_basepage_index() prototype the correct scope]

Link: https://lkml.kernel.org/r/b17d946b-d09-326e-b42a-52884c36df32@google.com
Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
Reported-by: Neel Natu <neelnatu@google.com>
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Zhang Yi <wetpzy@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 16 ----------------
 include/linux/pagemap.h | 13 +++++++------
 kernel/futex.c          |  3 +--
 mm/hugetlb.c            |  5 +----
 4 files changed, 9 insertions(+), 28 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6504346a19473..3c0117656745a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -741,17 +741,6 @@ static inline int hstate_index(struct hstate *h)
 	return h - hstates;
 }
 
-pgoff_t __basepage_index(struct page *page);
-
-/* Return page->index in PAGE_SIZE units */
-static inline pgoff_t basepage_index(struct page *page)
-{
-	if (!PageCompound(page))
-		return page->index;
-
-	return __basepage_index(page);
-}
-
 extern int dissolve_free_huge_page(struct page *page);
 extern int dissolve_free_huge_pages(unsigned long start_pfn,
 				    unsigned long end_pfn);
@@ -988,11 +977,6 @@ static inline int hstate_index(struct hstate *h)
 	return 0;
 }
 
-static inline pgoff_t basepage_index(struct page *page)
-{
-	return page->index;
-}
-
 static inline int dissolve_free_huge_page(struct page *page)
 {
 	return 0;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e89df447fae32..0f1b34dbf3a2e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -516,7 +516,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
 }
 
 /*
- * Get index of the page with in radix-tree
+ * Get index of the page within radix-tree (but not for hugetlb pages).
  * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
  */
 static inline pgoff_t page_to_index(struct page *page)
@@ -535,15 +535,16 @@ static inline pgoff_t page_to_index(struct page *page)
 	return pgoff;
 }
 
+extern pgoff_t hugetlb_basepage_index(struct page *page);
+
 /*
- * Get the offset in PAGE_SIZE.
- * (TODO: hugepage should have ->index in PAGE_SIZE)
+ * Get the offset in PAGE_SIZE (even for hugetlb pages).
+ * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
  */
 static inline pgoff_t page_to_pgoff(struct page *page)
 {
-	if (unlikely(PageHeadHuge(page)))
-		return page->index << compound_order(page);
-
+	if (unlikely(PageHuge(page)))
+		return hugetlb_basepage_index(page);
 	return page_to_index(page);
 }
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 4938a00bc7857..408cad5e89680 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -35,7 +35,6 @@
 #include <linux/jhash.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
-#include <linux/hugetlb.h>
 #include <linux/freezer.h>
 #include <linux/memblock.h>
 #include <linux/fault-inject.h>
@@ -650,7 +649,7 @@ again:
 
 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
 		key->shared.i_seq = get_inode_sequence_number(inode);
-		key->shared.pgoff = basepage_index(tail);
+		key->shared.pgoff = page_to_pgoff(tail);
 		rcu_read_unlock();
 	}
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e0a5f9cbbece9..5ba5a0da6d572 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1588,15 +1588,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage)
 	return NULL;
 }
 
-pgoff_t __basepage_index(struct page *page)
+pgoff_t hugetlb_basepage_index(struct page *page)
 {
 	struct page *page_head = compound_head(page);
 	pgoff_t index = page_index(page_head);
 	unsigned long compound_idx;
 
-	if (!PageHuge(page_head))
-		return page_index(page);
-
 	if (compound_order(page_head) >= MAX_ORDER)
 		compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
 	else
-- 
GitLab


From 171936ddaf97e6f4e1264f4128bb5cf15691339c Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 24 Jun 2021 18:39:55 -0700
Subject: [PATCH 3775/3804] mm/memory-failure: use a mutex to avoid
 memory_failure() races

Patch series "mm,hwpoison: fix sending SIGBUS for Action Required MCE", v5.

I wrote this patchset to materialize what I think is the current
allowable solution mentioned by the previous discussion [1].  I simply
borrowed Tony's mutex patch and Aili's return code patch, then I queued
another one to find error virtual address in the best effort manner.  I
know that this is not a perfect solution, but should work for some
typical case.

[1]: https://lore.kernel.org/linux-mm/20210331192540.2141052f@alex-virtual-machine/

This patch (of 2):

There can be races when multiple CPUs consume poison from the same page.
The first into memory_failure() atomically sets the HWPoison page flag
and begins hunting for tasks that map this page.  Eventually it
invalidates those mappings and may send a SIGBUS to the affected tasks.

But while all that work is going on, other CPUs see a "success" return
code from memory_failure() and so they believe the error has been
handled and continue executing.

Fix by wrapping most of the internal parts of memory_failure() in a
mutex.

[akpm@linux-foundation.org: make mf_mutex local to memory_failure()]

Link: https://lkml.kernel.org/r/20210521030156.2612074-1-nao.horiguchi@gmail.com
Link: https://lkml.kernel.org/r/20210521030156.2612074-2-nao.horiguchi@gmail.com
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Aili Yao <yaoaili@kingsoft.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jue Wang <juew@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0143d32bc6663..a7fc1cd6765a1 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1429,9 +1429,10 @@ int memory_failure(unsigned long pfn, int flags)
 	struct page *hpage;
 	struct page *orig_head;
 	struct dev_pagemap *pgmap;
-	int res;
+	int res = 0;
 	unsigned long page_flags;
 	bool retry = true;
+	static DEFINE_MUTEX(mf_mutex);
 
 	if (!sysctl_memory_failure_recovery)
 		panic("Memory failure on page %lx", pfn);
@@ -1449,13 +1450,18 @@ int memory_failure(unsigned long pfn, int flags)
 		return -ENXIO;
 	}
 
+	mutex_lock(&mf_mutex);
+
 try_again:
-	if (PageHuge(p))
-		return memory_failure_hugetlb(pfn, flags);
+	if (PageHuge(p)) {
+		res = memory_failure_hugetlb(pfn, flags);
+		goto unlock_mutex;
+	}
+
 	if (TestSetPageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
-		return 0;
+		goto unlock_mutex;
 	}
 
 	orig_head = hpage = compound_head(p);
@@ -1488,17 +1494,19 @@ try_again:
 				res = MF_FAILED;
 			}
 			action_result(pfn, MF_MSG_BUDDY, res);
-			return res == MF_RECOVERED ? 0 : -EBUSY;
+			res = res == MF_RECOVERED ? 0 : -EBUSY;
 		} else {
 			action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
-			return -EBUSY;
+			res = -EBUSY;
 		}
+		goto unlock_mutex;
 	}
 
 	if (PageTransHuge(hpage)) {
 		if (try_to_split_thp_page(p, "Memory Failure") < 0) {
 			action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
-			return -EBUSY;
+			res = -EBUSY;
+			goto unlock_mutex;
 		}
 		VM_BUG_ON_PAGE(!page_count(p), p);
 	}
@@ -1522,7 +1530,7 @@ try_again:
 	if (PageCompound(p) && compound_head(p) != orig_head) {
 		action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
 		res = -EBUSY;
-		goto out;
+		goto unlock_page;
 	}
 
 	/*
@@ -1542,14 +1550,14 @@ try_again:
 		num_poisoned_pages_dec();
 		unlock_page(p);
 		put_page(p);
-		return 0;
+		goto unlock_mutex;
 	}
 	if (hwpoison_filter(p)) {
 		if (TestClearPageHWPoison(p))
 			num_poisoned_pages_dec();
 		unlock_page(p);
 		put_page(p);
-		return 0;
+		goto unlock_mutex;
 	}
 
 	/*
@@ -1573,7 +1581,7 @@ try_again:
 	if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
 		action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
 		res = -EBUSY;
-		goto out;
+		goto unlock_page;
 	}
 
 	/*
@@ -1582,13 +1590,15 @@ try_again:
 	if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
 		action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
 		res = -EBUSY;
-		goto out;
+		goto unlock_page;
 	}
 
 identify_page_state:
 	res = identify_page_state(pfn, p, page_flags);
-out:
+unlock_page:
 	unlock_page(p);
+unlock_mutex:
+	mutex_unlock(&mf_mutex);
 	return res;
 }
 EXPORT_SYMBOL_GPL(memory_failure);
-- 
GitLab


From 47af12bae17f99b5e77f8651cb7f3e1877610acf Mon Sep 17 00:00:00 2001
From: Aili Yao <yaoaili@kingsoft.com>
Date: Thu, 24 Jun 2021 18:39:58 -0700
Subject: [PATCH 3776/3804] mm,hwpoison: return -EHWPOISON to denote that the
 page has already been poisoned

When memory_failure() is called with MF_ACTION_REQUIRED on the page that
has already been hwpoisoned, memory_failure() could fail to send SIGBUS
to the affected process, which results in infinite loop of MCEs.

Currently memory_failure() returns 0 if it's called for already
hwpoisoned page, then the caller, kill_me_maybe(), could return without
sending SIGBUS to current process.  An action required MCE is raised
when the current process accesses to the broken memory, so no SIGBUS
means that the current process continues to run and access to the error
page again soon, so running into MCE loop.

This issue can arise for example in the following scenarios:

 - Two or more threads access to the poisoned page concurrently. If
   local MCE is enabled, MCE handler independently handles the MCE
   events. So there's a race among MCE events, and the second or latter
   threads fall into the situation in question.

 - If there was a precedent memory error event and memory_failure() for
   the event failed to unmap the error page for some reason, the
   subsequent memory access to the error page triggers the MCE loop
   situation.

To fix the issue, make memory_failure() return an error code when the
error page has already been hwpoisoned.  This allows memory error
handler to control how it sends signals to userspace.  And make sure
that any process touching a hwpoisoned page should get a SIGBUS even in
"already hwpoisoned" path of memory_failure() as is done in page fault
path.

Link: https://lkml.kernel.org/r/20210521030156.2612074-3-nao.horiguchi@gmail.com
Signed-off-by: Aili Yao <yaoaili@kingsoft.com>
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jue Wang <juew@google.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a7fc1cd6765a1..f24105db7081f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1253,7 +1253,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 	if (TestSetPageHWPoison(head)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 		       pfn);
-		return 0;
+		return -EHWPOISON;
 	}
 
 	num_poisoned_pages_inc();
@@ -1461,6 +1461,7 @@ try_again:
 	if (TestSetPageHWPoison(p)) {
 		pr_err("Memory failure: %#lx: already hardware poisoned\n",
 			pfn);
+		res = -EHWPOISON;
 		goto unlock_mutex;
 	}
 
-- 
GitLab


From ea6d0630100b285f059d0a8d8e86f38a46407536 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Thu, 24 Jun 2021 18:40:01 -0700
Subject: [PATCH 3777/3804] mm/hwpoison: do not lock page again when
 me_huge_page() successfully recovers

Currently me_huge_page() temporary unlocks page to perform some actions
then locks it again later.  My testcase (which calls hard-offline on
some tail page in a hugetlb, then accesses the address of the hugetlb
range) showed that page allocation code detects this page lock on buddy
page and printed out "BUG: Bad page state" message.

check_new_page_bad() does not consider a page with __PG_HWPOISON as bad
page, so this flag works as kind of filter, but this filtering doesn't
work in this case because the "bad page" is not the actual hwpoisoned
page.  So stop locking page again.  Actions to be taken depend on the
page type of the error, so page unlocking should be done in ->action()
callbacks.  So let's make it assumed and change all existing callbacks
that way.

Link: https://lkml.kernel.org/r/20210609072029.74645-1-nao.horiguchi@gmail.com
Fixes: commit 78bb920344b8 ("mm: hwpoison: dissolve in-use hugepage in unrecoverable memory error")
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory-failure.c | 44 ++++++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f24105db7081f..6f5f78885ab42 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -658,6 +658,7 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
  */
 static int me_kernel(struct page *p, unsigned long pfn)
 {
+	unlock_page(p);
 	return MF_IGNORED;
 }
 
@@ -667,6 +668,7 @@ static int me_kernel(struct page *p, unsigned long pfn)
 static int me_unknown(struct page *p, unsigned long pfn)
 {
 	pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
+	unlock_page(p);
 	return MF_FAILED;
 }
 
@@ -675,6 +677,7 @@ static int me_unknown(struct page *p, unsigned long pfn)
  */
 static int me_pagecache_clean(struct page *p, unsigned long pfn)
 {
+	int ret;
 	struct address_space *mapping;
 
 	delete_from_lru_cache(p);
@@ -683,8 +686,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 	 * For anonymous pages we're done the only reference left
 	 * should be the one m_f() holds.
 	 */
-	if (PageAnon(p))
-		return MF_RECOVERED;
+	if (PageAnon(p)) {
+		ret = MF_RECOVERED;
+		goto out;
+	}
 
 	/*
 	 * Now truncate the page in the page cache. This is really
@@ -698,7 +703,8 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 		/*
 		 * Page has been teared down in the meanwhile
 		 */
-		return MF_FAILED;
+		ret = MF_FAILED;
+		goto out;
 	}
 
 	/*
@@ -706,7 +712,10 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
 	 *
 	 * Open: to take i_mutex or not for this? Right now we don't.
 	 */
-	return truncate_error_page(p, pfn, mapping);
+	ret = truncate_error_page(p, pfn, mapping);
+out:
+	unlock_page(p);
+	return ret;
 }
 
 /*
@@ -782,24 +791,26 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
  */
 static int me_swapcache_dirty(struct page *p, unsigned long pfn)
 {
+	int ret;
+
 	ClearPageDirty(p);
 	/* Trigger EIO in shmem: */
 	ClearPageUptodate(p);
 
-	if (!delete_from_lru_cache(p))
-		return MF_DELAYED;
-	else
-		return MF_FAILED;
+	ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
+	unlock_page(p);
+	return ret;
 }
 
 static int me_swapcache_clean(struct page *p, unsigned long pfn)
 {
+	int ret;
+
 	delete_from_swap_cache(p);
 
-	if (!delete_from_lru_cache(p))
-		return MF_RECOVERED;
-	else
-		return MF_FAILED;
+	ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
+	unlock_page(p);
+	return ret;
 }
 
 /*
@@ -820,6 +831,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 	mapping = page_mapping(hpage);
 	if (mapping) {
 		res = truncate_error_page(hpage, pfn, mapping);
+		unlock_page(hpage);
 	} else {
 		res = MF_FAILED;
 		unlock_page(hpage);
@@ -834,7 +846,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 			page_ref_inc(p);
 			res = MF_RECOVERED;
 		}
-		lock_page(hpage);
 	}
 
 	return res;
@@ -866,6 +877,8 @@ static struct page_state {
 	unsigned long mask;
 	unsigned long res;
 	enum mf_action_page_type type;
+
+	/* Callback ->action() has to unlock the relevant page inside it. */
 	int (*action)(struct page *p, unsigned long pfn);
 } error_states[] = {
 	{ reserved,	reserved,	MF_MSG_KERNEL,	me_kernel },
@@ -929,6 +942,7 @@ static int page_action(struct page_state *ps, struct page *p,
 	int result;
 	int count;
 
+	/* page p should be unlocked after returning from ps->action().  */
 	result = ps->action(p, pfn);
 
 	count = page_count(p) - 1;
@@ -1313,7 +1327,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
 		goto out;
 	}
 
-	res = identify_page_state(pfn, p, page_flags);
+	return identify_page_state(pfn, p, page_flags);
 out:
 	unlock_page(head);
 	return res;
@@ -1596,6 +1610,8 @@ try_again:
 
 identify_page_state:
 	res = identify_page_state(pfn, p, page_flags);
+	mutex_unlock(&mf_mutex);
+	return res;
 unlock_page:
 	unlock_page(p);
 unlock_mutex:
-- 
GitLab


From b08e50dd64489e3997029d204f761cb57a3762d2 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 24 Jun 2021 18:40:04 -0700
Subject: [PATCH 3778/3804] mm/page_alloc: __alloc_pages_bulk(): do bounds
 check before accessing array

In the event that somebody would call this with an already fully
populated page_array, the last loop iteration would do an access beyond
the end of page_array.

It's of course extremely unlikely that would ever be done, but this
triggers my internal static analyzer.  Also, if it really is not
supposed to be invoked this way (i.e., with no NULL entries in
page_array), the nr_populated<nr_pages check could simply be removed
instead.

Link: https://lkml.kernel.org/r/20210507064504.1712559-1-linux@rasmusvillemoes.dk
Fixes: 0f87d9d30f21 ("mm/page_alloc: add an array-based interface to the bulk page allocator")
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d1f5de1c1283b..7124bb00219de 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5053,7 +5053,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 	 * Skip populated array elements to determine if any pages need
 	 * to be allocated before disabling IRQs.
 	 */
-	while (page_array && page_array[nr_populated] && nr_populated < nr_pages)
+	while (page_array && nr_populated < nr_pages && page_array[nr_populated])
 		nr_populated++;
 
 	/* Use the single page allocator for one page. */
-- 
GitLab


From b3b64ebd38225d8032b5db42938d969b602040c2 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Thu, 24 Jun 2021 18:40:07 -0700
Subject: [PATCH 3779/3804] mm/page_alloc: do bulk array bounds check after
 checking populated elements

Dan Carpenter reported the following

  The patch 0f87d9d30f21: "mm/page_alloc: add an array-based interface
  to the bulk page allocator" from Apr 29, 2021, leads to the following
  static checker warning:

        mm/page_alloc.c:5338 __alloc_pages_bulk()
        warn: potentially one past the end of array 'page_array[nr_populated]'

The problem can occur if an array is passed in that is fully populated.
That potentially ends up allocating a single page and storing it past
the end of the array.  This patch returns 0 if the array is fully
populated.

Link: https://lkml.kernel.org/r/20210618125102.GU30378@techsingularity.net
Fixes: 0f87d9d30f21 ("mm/page_alloc: add an array-based interface to the bulk page allocator")
Signed-off-by: Mel Gorman <mgorman@techsinguliarity.net>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7124bb00219de..ef2265f86b913 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5056,6 +5056,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 	while (page_array && nr_populated < nr_pages && page_array[nr_populated])
 		nr_populated++;
 
+	/* Already populated array? */
+	if (unlikely(page_array && nr_pages - nr_populated == 0))
+		return 0;
+
 	/* Use the single page allocator for one page. */
 	if (nr_pages - nr_populated == 1)
 		goto failed;
-- 
GitLab


From ee924d3ddd4561b7e6671bd431ff55bb9a24c47c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <kabel@kernel.org>
Date: Thu, 24 Jun 2021 18:40:10 -0700
Subject: [PATCH 3780/3804] MAINTAINERS: fix Marek's identity again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix my name to use diacritics, since MAINTAINERS supports it.

Fix my e-mail address in MAINTAINERS' marvell10g PHY driver description,
I accidentally put my other e-mail address here.

Link: https://lkml.kernel.org/r/20210616113624.19351-1-kabel@kernel.org
Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8c5ee008301a6..1d959fcbcbe12 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1816,7 +1816,7 @@ F:	drivers/pinctrl/pinctrl-gemini.c
 F:	drivers/rtc/rtc-ftrtc010.c
 
 ARM/CZ.NIC TURRIS SUPPORT
-M:	Marek Behun <kabel@kernel.org>
+M:	Marek Behún <kabel@kernel.org>
 S:	Maintained
 W:	https://www.turris.cz/
 F:	Documentation/ABI/testing/debugfs-moxtet
@@ -10946,7 +10946,7 @@ F:	include/linux/mv643xx.h
 
 MARVELL MV88X3310 PHY DRIVER
 M:	Russell King <linux@armlinux.org.uk>
-M:	Marek Behun <marek.behun@nic.cz>
+M:	Marek Behún <kabel@kernel.org>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/phy/marvell10g.c
-- 
GitLab


From 72a461adbe88acf6a8cc5dba7720cf94d7056154 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <kabel@kernel.org>
Date: Thu, 24 Jun 2021 18:40:13 -0700
Subject: [PATCH 3781/3804] mailmap: add Marek's other e-mail address and
 identity without diacritics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some of my commits were sent with identities
  Marek Behun <marek.behun@nic.cz>
  Marek Behún <marek.behun@nic.cz>
while the correct one is
  Marek Behún <kabel@kernel.org>

Put this into mailmap so that git shortlog prints all my commits under
one identity.

Link: https://lkml.kernel.org/r/20210616113624.19351-2-kabel@kernel.org
Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.mailmap b/.mailmap
index c79a78766c07f..db58eedb44f1d 100644
--- a/.mailmap
+++ b/.mailmap
@@ -212,6 +212,8 @@ Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com>
 Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org>
 Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
 Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
+Marek Behún <kabel@kernel.org> <marek.behun@nic.cz>
+Marek Behún <kabel@kernel.org> Marek Behun <marek.behun@nic.cz>
 Mark Brown <broonie@sirena.org.uk>
 Mark Starovoytov <mstarovo@pm.me> <mstarovoitov@marvell.com>
 Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
-- 
GitLab


From c6414e1a2bd26b0071e2b9d6034621f705dfd4c0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 25 Jun 2021 10:37:34 +0200
Subject: [PATCH 3782/3804] gpio: AMD8111 and TQMX86 require HAS_IOPORT_MAP

Both of these drivers use ioport_map(), so they need to
depend on HAS_IOPORT_MAP. Otherwise, they cannot be built
even with COMPILE_TEST on architectures without an ioport
implementation, such as ARCH=um.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 1dd0ec6727fde..3c69b785cb79d 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1383,6 +1383,7 @@ config GPIO_TPS68470
 config GPIO_TQMX86
 	tristate "TQ-Systems QTMX86 GPIO"
 	depends on MFD_TQMX86 || COMPILE_TEST
+	depends on HAS_IOPORT_MAP
 	select GPIOLIB_IRQCHIP
 	help
 	  This driver supports GPIO on the TQMX86 IO controller.
@@ -1450,6 +1451,7 @@ menu "PCI GPIO expanders"
 config GPIO_AMD8111
 	tristate "AMD 8111 GPIO driver"
 	depends on X86 || COMPILE_TEST
+	depends on HAS_IOPORT_MAP
 	help
 	  The AMD 8111 south bridge contains 32 GPIO pins which can be used.
 
-- 
GitLab


From c58db2abb19fd2bf23fb25bb3630a9f540df6042 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Date: Mon, 14 Jun 2021 06:43:17 +0900
Subject: [PATCH 3783/3804] spi: convert Xilinx Zynq UltraScale+ MPSoC GQSPI
 bindings to YAML

Convert spi for Xilinx Zynq UltraScale+ MPSoC GQSPI bindings
documentation to YAML.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu@nigauri.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20210613214317.296667-1-iwamatsu@nigauri.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/spi/spi-zynqmp-qspi.txt          | 25 ---------
 .../bindings/spi/spi-zynqmp-qspi.yaml         | 51 +++++++++++++++++++
 2 files changed, 51 insertions(+), 25 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.txt
 create mode 100644 Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml

diff --git a/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.txt b/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.txt
deleted file mode 100644
index 0f6d37ff541c4..0000000000000
--- a/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-Xilinx Zynq UltraScale+ MPSoC GQSPI controller Device Tree Bindings
--------------------------------------------------------------------
-
-Required properties:
-- compatible		: Should be "xlnx,zynqmp-qspi-1.0".
-- reg			: Physical base address and size of GQSPI registers map.
-- interrupts		: Property with a value describing the interrupt
-			  number.
-- clock-names		: List of input clock names - "ref_clk", "pclk"
-			  (See clock bindings for details).
-- clocks		: Clock phandles (see clock bindings for details).
-
-Optional properties:
-- num-cs		: Number of chip selects used.
-
-Example:
-	qspi: spi@ff0f0000 {
-		compatible = "xlnx,zynqmp-qspi-1.0";
-		clock-names = "ref_clk", "pclk";
-		clocks = <&misc_clk &misc_clk>;
-		interrupts = <0 15 4>;
-		interrupt-parent = <&gic>;
-		num-cs = <1>;
-		reg = <0x0 0xff0f0000 0x1000>,<0x0 0xc0000000 0x8000000>;
-	};
diff --git a/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml b/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml
new file mode 100644
index 0000000000000..ea72c8001256f
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-zynqmp-qspi.yaml
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/spi/spi-zynqmp-qspi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xilinx Zynq UltraScale+ MPSoC GQSPI controller Device Tree Bindings
+
+maintainers:
+  - Michal Simek <michal.simek@xilinx.com>
+
+allOf:
+  - $ref: "spi-controller.yaml#"
+
+properties:
+  compatible:
+    const: xlnx,zynqmp-qspi-1.0
+
+  reg:
+    maxItems: 2
+
+  interrupts:
+    maxItems: 1
+
+  clock-names:
+    items:
+      - const: ref_clk
+      - const: pclk
+
+  clocks:
+    maxItems: 2
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/xlnx-zynqmp-clk.h>
+    soc {
+      #address-cells = <2>;
+      #size-cells = <2>;
+
+      qspi: spi@ff0f0000 {
+        compatible = "xlnx,zynqmp-qspi-1.0";
+        clocks = <&zynqmp_clk QSPI_REF>, <&zynqmp_clk LPD_LSBUS>;
+        clock-names = "ref_clk", "pclk";
+        interrupts = <0 15 4>;
+        interrupt-parent = <&gic>;
+        reg = <0x0 0xff0f0000 0x0 0x1000>,
+              <0x0 0xc0000000 0x0 0x8000000>;
+      };
+    };
-- 
GitLab


From b470e10eb43f19e08245cd87dd3192a8141cfbb5 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Fri, 25 Jun 2021 10:52:11 +0530
Subject: [PATCH 3784/3804] spi: core: add dma_map_dev for dma device

Some controllers like qcom geni need the parent device to be used for
dma mapping, so add a dma_map_dev field and let drivers fill this to be
used as mapping device

Signed-off-by: Vinod Koul <vkoul@kernel.org>
Link: https://lore.kernel.org/r/20210625052213.32260-4-vkoul@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 4 ++++
 include/linux/spi/spi.h | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index c296f08b36c18..35928d0843d93 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -993,11 +993,15 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg)
 
 	if (ctlr->dma_tx)
 		tx_dev = ctlr->dma_tx->device->dev;
+	else if (ctlr->dma_map_dev)
+		tx_dev = ctlr->dma_map_dev;
 	else
 		tx_dev = ctlr->dev.parent;
 
 	if (ctlr->dma_rx)
 		rx_dev = ctlr->dma_rx->device->dev;
+	else if (ctlr->dma_map_dev)
+		rx_dev = ctlr->dma_map_dev;
 	else
 		rx_dev = ctlr->dev.parent;
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 3ada36175e5f8..97b8d12b5f2bb 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -588,6 +588,7 @@ struct spi_controller {
 	bool			(*can_dma)(struct spi_controller *ctlr,
 					   struct spi_device *spi,
 					   struct spi_transfer *xfer);
+	struct device *dma_map_dev;
 
 	/*
 	 * These hooks are for drivers that want to use the generic
-- 
GitLab


From 808e9df477757955a9644ca323010339be0c40ee Mon Sep 17 00:00:00 2001
From: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Date: Fri, 25 Jun 2021 20:36:55 +0300
Subject: [PATCH 3785/3804] userfaultfd: uapi: fix UFFDIO_CONTINUE ioctl
 request definition

This ioctl request reads from uffdio_continue structure written by
userspace which justifies _IOC_WRITE flag.  It also writes back to that
structure which justifies _IOC_READ flag.

See NOTEs in include/uapi/asm-generic/ioctl.h for more information.

Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl")
Signed-off-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Acked-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/userfaultfd.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index bafbeb1a26245..650480f41f1d5 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -80,8 +80,8 @@
 				      struct uffdio_zeropage)
 #define UFFDIO_WRITEPROTECT	_IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
 				      struct uffdio_writeprotect)
-#define UFFDIO_CONTINUE		_IOR(UFFDIO, _UFFDIO_CONTINUE,	\
-				     struct uffdio_continue)
+#define UFFDIO_CONTINUE		_IOWR(UFFDIO, _UFFDIO_CONTINUE,	\
+				      struct uffdio_continue)
 
 /* read() structure */
 struct uffd_msg {
-- 
GitLab


From b4b27b9eed8ebdbf9f3046197d29d733c8c944f3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 27 Jun 2021 13:32:54 -0700
Subject: [PATCH 3786/3804] Revert "signal: Allow tasks to cache one sigqueue
 struct"

This reverts commits 4bad58ebc8bc4f20d89cff95417c9b4674769709 (and
399f8dd9a866e107639eabd3c1979cd526ca3a98, which tried to fix it).

I do not believe these are correct, and I'm about to release 5.13, so am
reverting them out of an abundance of caution.

The locking is odd, and appears broken.

On the allocation side (in __sigqueue_alloc()), the locking is somewhat
straightforward: it depends on sighand->siglock.  Since one caller
doesn't hold that lock, it further then tests 'sigqueue_flags' to avoid
the case with no locks held.

On the freeing side (in sigqueue_cache_or_free()), there is no locking
at all, and the logic instead depends on 'current' being a single
thread, and not able to race with itself.

To make things more exciting, there's also the data race between freeing
a signal and allocating one, which is handled by using WRITE_ONCE() and
READ_ONCE(), and being mutually exclusive wrt the initial state (ie
freeing will only free if the old state was NULL, while allocating will
obviously only use the value if it was non-NULL, so only one or the
other will actually act on the value).

However, while the free->alloc paths do seem mutually exclusive thanks
to just the data value dependency, it's not clear what the memory
ordering constraints are on it.  Could writes from the previous
allocation possibly be delayed and seen by the new allocation later,
causing logical inconsistencies?

So it's all very exciting and unusual.

And in particular, it seems that the freeing side is incorrect in
depending on "current" being single-threaded.  Yes, 'current' is a
single thread, but in the presense of asynchronous events even a single
thread can have data races.

And such asynchronous events can and do happen, with interrupts causing
signals to be flushed and thus free'd (for example - sending a
SIGCONT/SIGSTOP can happen from interrupt context, and can flush
previously queued process control signals).

So regardless of all the other questions about the memory ordering and
locking for this new cached allocation, the sigqueue_cache_or_free()
assumptions seem to be fundamentally incorrect.

It may be that people will show me the errors of my ways, and tell me
why this is all safe after all.  We can reinstate it if so.  But my
current belief is that the WRITE_ONCE() that sets the cached entry needs
to be a smp_store_release(), and the READ_ONCE() that finds a cached
entry needs to be a smp_load_acquire() to handle memory ordering
correctly.

And the sequence in sigqueue_cache_or_free() would need to either use a
lock or at least be interrupt-safe some way (perhaps by using something
like the percpu 'cmpxchg': it doesn't need to be SMP-safe, but like the
percpu operations it needs to be interrupt-safe).

Fixes: 399f8dd9a866 ("signal: Prevent sigqueue caching after task got released")
Fixes: 4bad58ebc8bc ("signal: Allow tasks to cache one sigqueue struct")
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h  |  1 -
 include/linux/signal.h |  1 -
 kernel/exit.c          |  1 -
 kernel/fork.c          |  1 -
 kernel/signal.c        | 59 ++----------------------------------------
 5 files changed, 2 insertions(+), 61 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 28a98fc4ded4f..32813c345115f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -997,7 +997,6 @@ struct task_struct {
 	/* Signal handlers: */
 	struct signal_struct		*signal;
 	struct sighand_struct __rcu		*sighand;
-	struct sigqueue			*sigqueue_cache;
 	sigset_t			blocked;
 	sigset_t			real_blocked;
 	/* Restored if set_restore_sigmask() was used: */
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 201f88e3738b2..5160fd45e5cab 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -267,7 +267,6 @@ static inline void init_sigpending(struct sigpending *sig)
 }
 
 extern void flush_sigqueue(struct sigpending *queue);
-extern void exit_task_sigqueue_cache(struct task_struct *tsk);
 
 /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
 static inline int valid_signal(unsigned long sig)
diff --git a/kernel/exit.c b/kernel/exit.c
index fd1c04193e18b..65809fac30387 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -162,7 +162,6 @@ static void __exit_signal(struct task_struct *tsk)
 		flush_sigqueue(&sig->shared_pending);
 		tty_kref_put(tty);
 	}
-	exit_task_sigqueue_cache(tsk);
 }
 
 static void delayed_put_task_struct(struct rcu_head *rhp)
diff --git a/kernel/fork.c b/kernel/fork.c
index dc06afd725cbd..a070caed5c8ed 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2008,7 +2008,6 @@ static __latent_entropy struct task_struct *copy_process(
 	spin_lock_init(&p->alloc_lock);
 
 	init_sigpending(&p->pending);
-	p->sigqueue_cache = NULL;
 
 	p->utime = p->stime = p->gtime = 0;
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
diff --git a/kernel/signal.c b/kernel/signal.c
index f1ecd8f0c11d9..30a0bee5ff9bb 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -431,22 +431,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
 	rcu_read_unlock();
 
 	if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
-		/*
-		 * Preallocation does not hold sighand::siglock so it can't
-		 * use the cache. The lockless caching requires that only
-		 * one consumer and only one producer run at a time.
-		 *
-		 * For the regular allocation case it is sufficient to
-		 * check @q for NULL because this code can only be called
-		 * if the target task @t has not been reaped yet; which
-		 * means this code can never observe the error pointer which is
-		 * written to @t->sigqueue_cache in exit_task_sigqueue_cache().
-		 */
-		q = READ_ONCE(t->sigqueue_cache);
-		if (!q || sigqueue_flags)
-			q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
-		else
-			WRITE_ONCE(t->sigqueue_cache, NULL);
+		q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
 	} else {
 		print_dropped_signal(sig);
 	}
@@ -463,53 +448,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
 	return q;
 }
 
-void exit_task_sigqueue_cache(struct task_struct *tsk)
-{
-	/* Race free because @tsk is mopped up */
-	struct sigqueue *q = tsk->sigqueue_cache;
-
-	if (q) {
-		/*
-		 * Hand it back to the cache as the task might
-		 * be self reaping which would leak the object.
-		 */
-		 kmem_cache_free(sigqueue_cachep, q);
-	}
-
-	/*
-	 * Set an error pointer to ensure that @tsk will not cache a
-	 * sigqueue when it is reaping it's child tasks
-	 */
-	tsk->sigqueue_cache = ERR_PTR(-1);
-}
-
-static void sigqueue_cache_or_free(struct sigqueue *q)
-{
-	/*
-	 * Cache one sigqueue per task. This pairs with the consumer side
-	 * in __sigqueue_alloc() and needs READ/WRITE_ONCE() to prevent the
-	 * compiler from store tearing and to tell KCSAN that the data race
-	 * is intentional when run without holding current->sighand->siglock,
-	 * which is fine as current obviously cannot run __sigqueue_free()
-	 * concurrently.
-	 *
-	 * The NULL check is safe even if current has been reaped already,
-	 * in which case exit_task_sigqueue_cache() wrote an error pointer
-	 * into current->sigqueue_cache.
-	 */
-	if (!READ_ONCE(current->sigqueue_cache))
-		WRITE_ONCE(current->sigqueue_cache, q);
-	else
-		kmem_cache_free(sigqueue_cachep, q);
-}
-
 static void __sigqueue_free(struct sigqueue *q)
 {
 	if (q->flags & SIGQUEUE_PREALLOC)
 		return;
 	if (atomic_dec_and_test(&q->user->sigpending))
 		free_uid(q->user);
-	sigqueue_cache_or_free(q);
+	kmem_cache_free(sigqueue_cachep, q);
 }
 
 void flush_sigqueue(struct sigpending *queue)
-- 
GitLab


From 8215d5b7f15f8643bf12fe005b2bc0cc322aff62 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Sun, 27 Jun 2021 21:32:29 +0800
Subject: [PATCH 3787/3804] MAINTAINERS: erofs: update my email address

Old email address will be invalid after a few days, update it
to kernel.org one.

Link: https://lore.kernel.org/r/20210627133229.8025-1-chao@kernel.org
Signed-off-by: Chao Yu <chao@kernel.org>
Acked-by: Gao Xiang <xiang@kernel.org>
Signed-off-by: Gao Xiang <xiang@kernel.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b706dd20ff2bf..efcae0848559d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6771,7 +6771,7 @@ F:	include/video/s1d13xxxfb.h
 
 EROFS FILE SYSTEM
 M:	Gao Xiang <xiang@kernel.org>
-M:	Chao Yu <yuchao0@huawei.com>
+M:	Chao Yu <chao@kernel.org>
 L:	linux-erofs@lists.ozlabs.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs.git
-- 
GitLab


From 62fb9874f5da54fdb243003b386128037319b219 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 27 Jun 2021 15:21:11 -0700
Subject: [PATCH 3788/3804] Linux 5.13

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 3e8dbe68eac8d..0565caea0362a 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 13
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION =
 NAME = Opossums on Parade
 
 # *DOCUMENTATION*
-- 
GitLab


From 2d0a9eb23ccfdf11308bec6db0bc007585d919d2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 26 Jun 2021 22:44:11 +0200
Subject: [PATCH 3789/3804] time/kunit: Add missing MODULE_LICENSE()

[ mingo: MODULE_LICENSE() takes a string. ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/time/time_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/time_test.c b/kernel/time/time_test.c
index 341ebfad5e99d..831e8e779acef 100644
--- a/kernel/time/time_test.c
+++ b/kernel/time/time_test.c
@@ -96,3 +96,4 @@ static struct kunit_suite time_test_suite = {
 };
 
 kunit_test_suite(time_test_suite);
+MODULE_LICENSE("GPL");
-- 
GitLab


From 95778c2d0979618e3349b1d2324ec282a5a6adbf Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Mon, 22 Mar 2021 15:44:08 +0100
Subject: [PATCH 3790/3804] media: video-mux: Skip dangling endpoints

i.MX6 device tree include files contain dangling endpoints for the
board device tree writers' convenience. These are still included in
many existing device trees.
Treat dangling endpoints as non-existent to support them.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Fixes: 612b385efb1e ("media: video-mux: Create media links in bound notifier")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/video-mux.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/video-mux.c b/drivers/media/platform/video-mux.c
index f7e2a5e48ccf0..905005e271ca9 100644
--- a/drivers/media/platform/video-mux.c
+++ b/drivers/media/platform/video-mux.c
@@ -364,7 +364,7 @@ static int video_mux_async_register(struct video_mux *vmux,
 
 	for (i = 0; i < num_input_pads; i++) {
 		struct v4l2_async_subdev *asd;
-		struct fwnode_handle *ep;
+		struct fwnode_handle *ep, *remote_ep;
 
 		ep = fwnode_graph_get_endpoint_by_id(
 			dev_fwnode(vmux->subdev.dev), i, 0,
@@ -372,6 +372,14 @@ static int video_mux_async_register(struct video_mux *vmux,
 		if (!ep)
 			continue;
 
+		/* Skip dangling endpoints for backwards compatibility */
+		remote_ep = fwnode_graph_get_remote_endpoint(ep);
+		if (!remote_ep) {
+			fwnode_handle_put(ep);
+			continue;
+		}
+		fwnode_handle_put(remote_ep);
+
 		asd = v4l2_async_notifier_add_fwnode_remote_subdev(
 			&vmux->notifier, ep, struct v4l2_async_subdev);
 
-- 
GitLab


From 11420749c6b4b237361750de3d5b5579175f8622 Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Date: Fri, 23 Apr 2021 19:27:45 +0200
Subject: [PATCH 3791/3804] media: mtk-vpu: on suspend, read/write regs only if
 vpu is running

If the vpu is not running, we should not rely on VPU_IDLE_REG
value. In this case, the suspend cb should only unprepare the
clock. This fixes a system-wide suspend to ram failure:

[  273.073363] PM: suspend entry (deep)
[  273.410502] mtk-msdc 11230000.mmc: phase: [map:ffffffff] [maxlen:32] [final:10]
[  273.455926] Filesystems sync: 0.378 seconds
[  273.589707] Freezing user space processes ... (elapsed 0.003 seconds) done.
[  273.600104] OOM killer disabled.
[  273.603409] Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done.
[  273.613361] mwifiex_sdio mmc2:0001:1: None of the WOWLAN triggers enabled
[  274.784952] mtk_vpu 10020000.vpu: vpu idle timeout
[  274.789764] PM: dpm_run_callback(): platform_pm_suspend+0x0/0x70 returns -5
[  274.796740] mtk_vpu 10020000.vpu: PM: failed to suspend: error -5
[  274.802842] PM: Some devices failed to suspend, or early wake event detected
[  275.426489] OOM killer enabled.
[  275.429718] Restarting tasks ...
[  275.435765] done.
[  275.447510] PM: suspend exit

Fixes: 1f565e263c3e ("media: mtk-vpu: VPU should be in idle state before system is suspended")
Signed-off-by: Dafna Hirschfeld <dafna.hirschfeld@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/mtk-vpu/mtk_vpu.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c
index ef458b417fa73..ec290dde59cfd 100644
--- a/drivers/media/platform/mtk-vpu/mtk_vpu.c
+++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c
@@ -985,6 +985,12 @@ static int mtk_vpu_suspend(struct device *dev)
 		return ret;
 	}
 
+	if (!vpu_running(vpu)) {
+		vpu_clock_disable(vpu);
+		clk_unprepare(vpu->clk);
+		return 0;
+	}
+
 	mutex_lock(&vpu->vpu_mutex);
 	/* disable vpu timer interrupt */
 	vpu_cfg_writel(vpu, vpu_cfg_readl(vpu, VPU_INT_STATUS) | VPU_IDLE_STATE,
-- 
GitLab


From 61c6f04a988e420a1fc5e8e81cf9aebf142a7bd6 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 23 Apr 2021 22:44:57 +0200
Subject: [PATCH 3792/3804] media: s5p-mfc: Fix display delay control creation

v4l2_ctrl_new_std() fails if the caller provides no 'step' parameter for
integer control, so define it to fix following error:

s5p_mfc_dec_ctrls_setup:1166: Adding control (1) failed

Fixes: c3042bff918a ("media: s5p-mfc: Use display delay and display enable std controls")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/platform/s5p-mfc/s5p_mfc_dec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
index a92a9ca6e87eb..c1d3bda8385b1 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
@@ -172,6 +172,7 @@ static struct mfc_control controls[] = {
 		.type = V4L2_CTRL_TYPE_INTEGER,
 		.minimum = 0,
 		.maximum = 16383,
+		.step = 1,
 		.default_value = 0,
 	},
 	{
-- 
GitLab


From 66d9282523b3228183b14d9f812872dd2620704d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 28 Jun 2021 16:02:19 +0100
Subject: [PATCH 3793/3804] mm/page_alloc: Correct return value of populated
 elements if bulk array is populated

Dave Jones reported the following

	This made it into 5.13 final, and completely breaks NFSD for me
	(Serving tcp v3 mounts).  Existing mounts on clients hang, as do
	new mounts from new clients.  Rebooting the server back to rc7
	everything recovers.

The commit b3b64ebd3822 ("mm/page_alloc: do bulk array bounds check after
checking populated elements") returns the wrong value if the array is
already populated which is interpreted as an allocation failure. Dave
reported this fixes his problem and it also passed a test running dbench
over NFS.

Fixes: b3b64ebd3822 ("mm/page_alloc: do bulk array bounds check after checking populated elements")
Reported-and-tested-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Cc: <stable@vger.kernel.org> [5.13+]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef2265f86b913..04220581579cd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5058,7 +5058,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 
 	/* Already populated array? */
 	if (unlikely(page_array && nr_pages - nr_populated == 0))
-		return 0;
+		return nr_populated;
 
 	/* Use the single page allocator for one page. */
 	if (nr_pages - nr_populated == 1)
-- 
GitLab


From 0ae71c7720e3ae3aabd2e8a072d27f7bd173d25c Mon Sep 17 00:00:00 2001
From: Rodrigo Campos <rodrigo@kinvolk.io>
Date: Mon, 17 May 2021 12:39:07 -0700
Subject: [PATCH 3794/3804] seccomp: Support atomic "addfd + send reply"

Alban Crequy reported a race condition userspace faces when we want to
add some fds and make the syscall return them[1] using seccomp notify.

The problem is that currently two different ioctl() calls are needed by
the process handling the syscalls (agent) for another userspace process
(target): SECCOMP_IOCTL_NOTIF_ADDFD to allocate the fd and
SECCOMP_IOCTL_NOTIF_SEND to return that value. Therefore, it is possible
for the agent to do the first ioctl to add a file descriptor but the
target is interrupted (EINTR) before the agent does the second ioctl()
call.

This patch adds a flag to the ADDFD ioctl() so it adds the fd and
returns that value atomically to the target program, as suggested by
Kees Cook[2]. This is done by simply allowing
seccomp_do_user_notification() to add the fd and return it in this case.
Therefore, in this case the target wakes up from the wait in
seccomp_do_user_notification() either to interrupt the syscall or to add
the fd and return it.

This "allocate an fd and return" functionality is useful for syscalls
that return a file descriptor only, like connect(2). Other syscalls that
return a file descriptor but not as return value (or return more than
one fd), like socketpair(), pipe(), recvmsg with SCM_RIGHTs, will not
work with this flag.

This effectively combines SECCOMP_IOCTL_NOTIF_ADDFD and
SECCOMP_IOCTL_NOTIF_SEND into an atomic opteration. The notification's
return value, nor error can be set by the user. Upon successful invocation
of the SECCOMP_IOCTL_NOTIF_ADDFD ioctl with the SECCOMP_ADDFD_FLAG_SEND
flag, the notifying process's errno will be 0, and the return value will
be the file descriptor number that was installed.

[1]: https://lore.kernel.org/lkml/CADZs7q4sw71iNHmV8EOOXhUKJMORPzF7thraxZYddTZsxta-KQ@mail.gmail.com/
[2]: https://lore.kernel.org/lkml/202012011322.26DCBC64F2@keescook/

Signed-off-by: Rodrigo Campos <rodrigo@kinvolk.io>
Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Acked-by: Tycho Andersen <tycho@tycho.pizza>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210517193908.3113-4-sargun@sargun.me
---
 .../userspace-api/seccomp_filter.rst          | 12 +++++
 include/uapi/linux/seccomp.h                  |  1 +
 kernel/seccomp.c                              | 51 ++++++++++++++++---
 3 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 6efb41cc80725..d61219889e494 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be
 returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should
 be the same ``id`` as in ``struct seccomp_notif``.
 
+Userspace can also add file descriptors to the notifying process via
+``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of
+``struct seccomp_notif_addfd`` should be the same ``id`` as in
+``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags
+like O_EXEC on the file descriptor in the notifying process. If the supervisor
+wants to inject the file descriptor with a specific number, the
+``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to
+the specific number to use. If that file descriptor is already open in the
+notifying process it will be replaced. The supervisor can also add an FD, and
+respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
+value will be the injected file descriptor number.
+
 It is worth noting that ``struct seccomp_data`` contains the values of register
 arguments to the syscall, but does not contain pointers to memory. The task's
 memory is accessible to suitably privileged traces via ``ptrace()`` or
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 6ba18b82a02e4..78074254ab98a 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -115,6 +115,7 @@ struct seccomp_notif_resp {
 
 /* valid flags for seccomp_notif_addfd */
 #define SECCOMP_ADDFD_FLAG_SETFD	(1UL << 0) /* Specify remote fd */
+#define SECCOMP_ADDFD_FLAG_SEND		(1UL << 1) /* Addfd and return it, atomically */
 
 /**
  * struct seccomp_notif_addfd
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 9f58049ac16d9..057e17f3215d5 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -107,6 +107,7 @@ struct seccomp_knotif {
  *      installing process should allocate the fd as normal.
  * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
  *         is allowed.
+ * @ioctl_flags: The flags used for the seccomp_addfd ioctl.
  * @ret: The return value of the installing process. It is set to the fd num
  *       upon success (>= 0).
  * @completion: Indicates that the installing process has completed fd
@@ -118,6 +119,7 @@ struct seccomp_kaddfd {
 	struct file *file;
 	int fd;
 	unsigned int flags;
+	__u32 ioctl_flags;
 
 	union {
 		bool setfd;
@@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter)
 	return filter->notif->next_id++;
 }
 
-static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd)
+static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n)
 {
+	int fd;
+
 	/*
 	 * Remove the notification, and reset the list pointers, indicating
 	 * that it has been handled.
 	 */
 	list_del_init(&addfd->list);
 	if (!addfd->setfd)
-		addfd->ret = receive_fd(addfd->file, addfd->flags);
+		fd = receive_fd(addfd->file, addfd->flags);
 	else
-		addfd->ret = receive_fd_replace(addfd->fd, addfd->file,
-						addfd->flags);
+		fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags);
+	addfd->ret = fd;
+
+	if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) {
+		/* If we fail reset and return an error to the notifier */
+		if (fd < 0) {
+			n->state = SECCOMP_NOTIFY_SENT;
+		} else {
+			/* Return the FD we just added */
+			n->flags = 0;
+			n->error = 0;
+			n->val = fd;
+		}
+	}
+
+	/*
+	 * Mark the notification as completed. From this point, addfd mem
+	 * might be invalidated and we can't safely read it anymore.
+	 */
 	complete(&addfd->completion);
 }
 
@@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall,
 						 struct seccomp_kaddfd, list);
 		/* Check if we were woken up by a addfd message */
 		if (addfd)
-			seccomp_handle_addfd(addfd);
+			seccomp_handle_addfd(addfd, &n);
 
 	}  while (n.state != SECCOMP_NOTIFY_REPLIED);
 
@@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
 	if (addfd.newfd_flags & ~O_CLOEXEC)
 		return -EINVAL;
 
-	if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD)
+	if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND))
 		return -EINVAL;
 
 	if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD))
@@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
 	if (!kaddfd.file)
 		return -EBADF;
 
+	kaddfd.ioctl_flags = addfd.flags;
 	kaddfd.flags = addfd.newfd_flags;
 	kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD;
 	kaddfd.fd = addfd.newfd;
@@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter,
 		goto out_unlock;
 	}
 
+	if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) {
+		/*
+		 * Disallow queuing an atomic addfd + send reply while there are
+		 * some addfd requests still to process.
+		 *
+		 * There is no clear reason to support it and allows us to keep
+		 * the loop on the other side straight-forward.
+		 */
+		if (!list_empty(&knotif->addfd)) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+
+		/* Allow exactly only one reply */
+		knotif->state = SECCOMP_NOTIFY_REPLIED;
+	}
+
 	list_add(&kaddfd.list, &knotif->addfd);
 	complete(&knotif->ready);
 	mutex_unlock(&filter->notify_lock);
-- 
GitLab


From e540ad97e73cefb41e93d0c06d0fe6a8620a77e0 Mon Sep 17 00:00:00 2001
From: Rodrigo Campos <rodrigo@kinvolk.io>
Date: Mon, 17 May 2021 12:39:08 -0700
Subject: [PATCH 3795/3804] selftests/seccomp: Add test for atomic addfd+send

This just adds a test to verify that when using the new introduced flag
to ADDFD, a valid fd is added and returned as the syscall result.

Signed-off-by: Rodrigo Campos <rodrigo@kinvolk.io>
Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Acked-by: Tycho Andersen <tycho@tycho.pizza>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20210517193908.3113-5-sargun@sargun.me
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 98c3b647f54dc..e2ba7adc26941 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -235,6 +235,10 @@ struct seccomp_notif_addfd {
 };
 #endif
 
+#ifndef SECCOMP_ADDFD_FLAG_SEND
+#define SECCOMP_ADDFD_FLAG_SEND	(1UL << 1) /* Addfd and return it, atomically */
+#endif
+
 struct seccomp_notif_addfd_small {
 	__u64 id;
 	char weird[4];
@@ -3976,8 +3980,14 @@ TEST(user_notification_addfd)
 	ASSERT_GE(pid, 0);
 
 	if (pid == 0) {
+		/* fds will be added and this value is expected */
 		if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
 			exit(1);
+
+		/* Atomic addfd+send is received here. Check it is a valid fd */
+		if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
+			exit(1);
+
 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
 	}
 
@@ -4056,6 +4066,30 @@ TEST(user_notification_addfd)
 	ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
 	ASSERT_EQ(addfd.id, req.id);
 
+	/* Verify we can do an atomic addfd and send */
+	addfd.newfd = 0;
+	addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+	fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+
+	/* Child has fds 0-6 and 42 used, we expect the lower fd available: 7 */
+	EXPECT_EQ(fd, 7);
+	EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+	/*
+	 * This sets the ID of the ADD FD to the last request plus 1. The
+	 * notification ID increments 1 per notification.
+	 */
+	addfd.id = req.id + 1;
+
+	/* This spins until the underlying notification is generated */
+	while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
+	       errno != -EINPROGRESS)
+		nanosleep(&delay, NULL);
+
+	memset(&req, 0, sizeof(req));
+	ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+	ASSERT_EQ(addfd.id, req.id);
+
 	resp.id = req.id;
 	resp.error = 0;
 	resp.val = USER_NOTIF_MAGIC;
@@ -4116,6 +4150,10 @@ TEST(user_notification_addfd_rlimit)
 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
 	EXPECT_EQ(errno, EMFILE);
 
+	addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+	EXPECT_EQ(errno, EMFILE);
+
 	addfd.newfd = 100;
 	addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
-- 
GitLab


From 93e720d710dfe689099c23bb91414303cf715d27 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 26 May 2021 19:49:15 -0700
Subject: [PATCH 3796/3804] selftests/seccomp: More closely track fds being
 assigned

Since the open fds might not always start at "4" (especially when
running under kselftest, etc), start counting from the first assigned
fd, rather than using the more permissive EXPECT_GE(fd, 0).

Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/lkml/20210527032948.3730953-1-keescook@chromium.org
Reviewed-by: Rodrigo Campos <rodrigo@kinvolk.io>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e2ba7adc26941..03b37e6609652 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -3954,7 +3954,7 @@ TEST(user_notification_addfd)
 {
 	pid_t pid;
 	long ret;
-	int status, listener, memfd, fd;
+	int status, listener, memfd, fd, nextfd;
 	struct seccomp_notif_addfd addfd = {};
 	struct seccomp_notif_addfd_small small = {};
 	struct seccomp_notif_addfd_big big = {};
@@ -3963,18 +3963,21 @@ TEST(user_notification_addfd)
 	/* 100 ms */
 	struct timespec delay = { .tv_nsec = 100000000 };
 
+	/* There may be arbitrary already-open fds at test start. */
 	memfd = memfd_create("test", 0);
 	ASSERT_GE(memfd, 0);
+	nextfd = memfd + 1;
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret) {
 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
 	}
 
+	/* fd: 4 */
 	/* Check that the basic notification machinery works */
 	listener = user_notif_syscall(__NR_getppid,
 				      SECCOMP_FILTER_FLAG_NEW_LISTENER);
-	ASSERT_GE(listener, 0);
+	ASSERT_EQ(listener, nextfd++);
 
 	pid = fork();
 	ASSERT_GE(pid, 0);
@@ -4029,14 +4032,14 @@ TEST(user_notification_addfd)
 
 	/* Verify we can set an arbitrary remote fd */
 	fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
-	EXPECT_GE(fd, 0);
+	EXPECT_EQ(fd, nextfd++);
 	EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
 
 	/* Verify we can set an arbitrary remote fd with large size */
 	memset(&big, 0x0, sizeof(big));
 	big.addfd = addfd;
 	fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
-	EXPECT_GE(fd, 0);
+	EXPECT_EQ(fd, nextfd++);
 
 	/* Verify we can set a specific remote fd */
 	addfd.newfd = 42;
@@ -4070,9 +4073,11 @@ TEST(user_notification_addfd)
 	addfd.newfd = 0;
 	addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
 	fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
-
-	/* Child has fds 0-6 and 42 used, we expect the lower fd available: 7 */
-	EXPECT_EQ(fd, 7);
+	/*
+	 * Child has earlier "low" fds and now 42, so we expect the next
+	 * lowest available fd to be assigned here.
+	 */
+	EXPECT_EQ(fd, nextfd++);
 	EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
 
 	/*
-- 
GitLab


From 62ddb91b7771626658c382c2b849a058f1586123 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 26 May 2021 19:46:30 -0700
Subject: [PATCH 3797/3804] selftests/seccomp: Flush benchmark output

When running the seccomp benchmark under a test runner, it wouldn't
provide any feedback on progress. Set stdout unbuffered.

Suggested-by: Will Drewry <wad@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_benchmark.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index fcc8065852665..363cad7550420 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -143,6 +143,8 @@ int main(int argc, char *argv[])
 	unsigned long long native, filter1, filter2, bitmap1, bitmap2;
 	unsigned long long entry, per_filter1, per_filter2;
 
+	setbuf(stdout, NULL);
+
 	printf("Current BPF sysctl settings:\n");
 	system("sysctl net.core.bpf_jit_enable");
 	system("sysctl net.core.bpf_jit_harden");
-- 
GitLab


From 9a03abc16c77062c73972df08206f1031862d9b4 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Jun 2021 16:18:34 -0700
Subject: [PATCH 3798/3804] selftests/seccomp: Avoid using "sysctl" for report

Instead of depending on "sysctl" being installed, just use "grep -H" for
sysctl status reporting. Additionally report kernel version for easier
comparisons.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 tools/testing/selftests/seccomp/seccomp_benchmark.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 363cad7550420..6e5102a7d7c9e 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -145,9 +145,13 @@ int main(int argc, char *argv[])
 
 	setbuf(stdout, NULL);
 
+	printf("Running on:\n");
+	system("uname -a");
+
 	printf("Current BPF sysctl settings:\n");
-	system("sysctl net.core.bpf_jit_enable");
-	system("sysctl net.core.bpf_jit_harden");
+	/* Avoid using "sysctl" which may not be installed. */
+	system("grep -H . /proc/sys/net/core/bpf_jit_enable");
+	system("grep -H . /proc/sys/net/core/bpf_jit_harden");
 
 	if (argc > 1)
 		samples = strtoull(argv[1], NULL, 0);
-- 
GitLab


From 49d6feef94c9f47ac4030563058f8a36267597b0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 30 Jun 2021 18:44:46 +0200
Subject: [PATCH 3799/3804] cpufreq: intel_pstate: Combine ->stop_cpu() and
 ->offline()

Combine the ->stop_cpu() and ->offline() callback routines for
intel_pstate in the active mode so as to avoid setting the
->stop_cpu callback pointer which is going to be dropped from
the framework.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/intel_pstate.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 6012964df51ba..bb4549959b113 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2532,7 +2532,7 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
 	return 0;
 }
 
-static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
+static int intel_cpufreq_cpu_offline(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu = all_cpu_data[policy->cpu];
 
@@ -2577,11 +2577,11 @@ static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
 	return 0;
 }
 
-static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
+static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
 {
-	pr_debug("CPU %d stopping\n", policy->cpu);
-
 	intel_pstate_clear_update_util_hook(policy->cpu);
+
+	return intel_cpufreq_cpu_offline(policy);
 }
 
 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
@@ -2654,7 +2654,6 @@ static struct cpufreq_driver intel_pstate = {
 	.resume		= intel_pstate_resume,
 	.init		= intel_pstate_cpu_init,
 	.exit		= intel_pstate_cpu_exit,
-	.stop_cpu	= intel_pstate_stop_cpu,
 	.offline	= intel_pstate_cpu_offline,
 	.online		= intel_pstate_cpu_online,
 	.update_limits	= intel_pstate_update_limits,
@@ -2956,7 +2955,7 @@ static struct cpufreq_driver intel_cpufreq = {
 	.fast_switch	= intel_cpufreq_fast_switch,
 	.init		= intel_cpufreq_cpu_init,
 	.exit		= intel_cpufreq_cpu_exit,
-	.offline	= intel_pstate_cpu_offline,
+	.offline	= intel_cpufreq_cpu_offline,
 	.online		= intel_pstate_cpu_online,
 	.suspend	= intel_pstate_suspend,
 	.resume		= intel_pstate_resume,
-- 
GitLab


From 9357a380f90a89a168d505561d11f68272e0e768 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 23 Jun 2021 09:54:39 +0530
Subject: [PATCH 3800/3804] cpufreq: CPPC: Migrate to ->exit() callback instead
 of ->stop_cpu()

Commit 367dc4aa932b ("cpufreq: Add stop CPU callback to cpufreq_driver
interface") added the ->stop_cpu() callback to allow the drivers to do
clean up before the CPU is completely down and its state can't be
modified.

At that time the CPU hotplug framework used to call the cpufreq core's
registered notifier for different events like CPU_DOWN_PREPARE and
CPU_POST_DEAD. The ->stop_cpu() callback was called during the
CPU_DOWN_PREPARE event.

This is no longer the case, cpuhp_cpufreq_offline() is called only
once by the CPU hotplug core now and we don't really need two
separate callbacks for cpufreq drivers, i.e. ->stop_cpu() and
-<exit(), as everything can be done from the ->exit() callback
itself.

Migrate to using the ->exit() callback instead of ->stop_cpu().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Minor edits in the changelog and subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cppc_cpufreq.c | 46 ++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 2f769b1630c57..be4f62e2c5f14 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -182,27 +182,6 @@ static int cppc_verify_policy(struct cpufreq_policy_data *policy)
 	return 0;
 }
 
-static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy)
-{
-	struct cppc_cpudata *cpu_data = policy->driver_data;
-	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
-	unsigned int cpu = policy->cpu;
-	int ret;
-
-	cpu_data->perf_ctrls.desired_perf = caps->lowest_perf;
-
-	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
-	if (ret)
-		pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
-			 caps->lowest_perf, cpu, ret);
-
-	/* Remove CPU node from list and free driver data for policy */
-	free_cpumask_var(cpu_data->shared_cpu_map);
-	list_del(&cpu_data->node);
-	kfree(policy->driver_data);
-	policy->driver_data = NULL;
-}
-
 /*
  * The PCC subspace describes the rate at which platform can accept commands
  * on the shared PCC channel (including READs which do not count towards freq
@@ -352,6 +331,29 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	return ret;
 }
 
+static int cppc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+	struct cppc_cpudata *cpu_data = policy->driver_data;
+	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
+	unsigned int cpu = policy->cpu;
+	int ret;
+
+	cpu_data->perf_ctrls.desired_perf = caps->lowest_perf;
+
+	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
+	if (ret)
+		pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
+			 caps->lowest_perf, cpu, ret);
+
+	/* Remove CPU node from list and free driver data for policy */
+	free_cpumask_var(cpu_data->shared_cpu_map);
+	list_del(&cpu_data->node);
+	kfree(policy->driver_data);
+	policy->driver_data = NULL;
+
+	return 0;
+}
+
 static inline u64 get_delta(u64 t1, u64 t0)
 {
 	if (t1 > t0 || t0 > ~(u32)0)
@@ -451,7 +453,7 @@ static struct cpufreq_driver cppc_cpufreq_driver = {
 	.target = cppc_cpufreq_set_target,
 	.get = cppc_cpufreq_get_rate,
 	.init = cppc_cpufreq_cpu_init,
-	.stop_cpu = cppc_cpufreq_stop_cpu,
+	.exit = cppc_cpufreq_cpu_exit,
 	.set_boost = cppc_cpufreq_set_boost,
 	.attr = cppc_cpufreq_attr,
 	.name = "cppc_cpufreq",
-- 
GitLab


From fe2535a44904a77615a3af8e8fd7dafb98fb0e1b Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 18 Jun 2021 13:31:27 +0530
Subject: [PATCH 3801/3804] cpufreq: CPPC: Fix potential memleak in
 cppc_cpufreq_cpu_init

It's a classic example of memleak, we allocate something, we fail and
never free the resources.

Make sure we free all resources on policy ->init() failures.

Fixes: a28b2bfc099c ("cppc_cpufreq: replace per-cpu data array with a list")
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Ionela Voinescu <ionela.voinescu@arm.com>
Tested-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cppc_cpufreq.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index be4f62e2c5f14..945ab4942c1c3 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -256,6 +256,16 @@ out:
 	return NULL;
 }
 
+static void cppc_cpufreq_put_cpu_data(struct cpufreq_policy *policy)
+{
+	struct cppc_cpudata *cpu_data = policy->driver_data;
+
+	list_del(&cpu_data->node);
+	free_cpumask_var(cpu_data->shared_cpu_map);
+	kfree(cpu_data);
+	policy->driver_data = NULL;
+}
+
 static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
 	unsigned int cpu = policy->cpu;
@@ -309,7 +319,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	default:
 		pr_debug("Unsupported CPU co-ord type: %d\n",
 			 policy->shared_type);
-		return -EFAULT;
+		ret = -EFAULT;
+		goto out;
 	}
 
 	/*
@@ -324,10 +335,16 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	cpu_data->perf_ctrls.desired_perf =  caps->highest_perf;
 
 	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
-	if (ret)
+	if (ret) {
 		pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
 			 caps->highest_perf, cpu, ret);
+		goto out;
+	}
+
+	return 0;
 
+out:
+	cppc_cpufreq_put_cpu_data(policy);
 	return ret;
 }
 
@@ -345,12 +362,7 @@ static int cppc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 		pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n",
 			 caps->lowest_perf, cpu, ret);
 
-	/* Remove CPU node from list and free driver data for policy */
-	free_cpumask_var(cpu_data->shared_cpu_map);
-	list_del(&cpu_data->node);
-	kfree(policy->driver_data);
-	policy->driver_data = NULL;
-
+	cppc_cpufreq_put_cpu_data(policy);
 	return 0;
 }
 
-- 
GitLab


From eead1840cbd31e553bf8ccdefbd5b065bf596b71 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 18 Jun 2021 13:42:23 +0530
Subject: [PATCH 3802/3804] cpufreq: CPPC: Pass structure instance by reference

Don't pass structure instance by value, pass it by reference instead.

Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Ionela Voinescu <ionela.voinescu@arm.com>
Tested-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cppc_cpufreq.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 945ab4942c1c3..4a7f0f9b8c60d 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -375,18 +375,18 @@ static inline u64 get_delta(u64 t1, u64 t0)
 }
 
 static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
-				     struct cppc_perf_fb_ctrs fb_ctrs_t0,
-				     struct cppc_perf_fb_ctrs fb_ctrs_t1)
+				     struct cppc_perf_fb_ctrs *fb_ctrs_t0,
+				     struct cppc_perf_fb_ctrs *fb_ctrs_t1)
 {
 	u64 delta_reference, delta_delivered;
 	u64 reference_perf, delivered_perf;
 
-	reference_perf = fb_ctrs_t0.reference_perf;
+	reference_perf = fb_ctrs_t0->reference_perf;
 
-	delta_reference = get_delta(fb_ctrs_t1.reference,
-				    fb_ctrs_t0.reference);
-	delta_delivered = get_delta(fb_ctrs_t1.delivered,
-				    fb_ctrs_t0.delivered);
+	delta_reference = get_delta(fb_ctrs_t1->reference,
+				    fb_ctrs_t0->reference);
+	delta_delivered = get_delta(fb_ctrs_t1->delivered,
+				    fb_ctrs_t0->delivered);
 
 	/* Check to avoid divide-by zero */
 	if (delta_reference || delta_delivered)
@@ -417,7 +417,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 	if (ret)
 		return ret;
 
-	return cppc_get_rate_from_fbctrs(cpu_data, fb_ctrs_t0, fb_ctrs_t1);
+	return cppc_get_rate_from_fbctrs(cpu_data, &fb_ctrs_t0, &fb_ctrs_t1);
 }
 
 static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
-- 
GitLab


From 83150f5d05f065fb5c12c612f119015cabdcc124 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 15 Jun 2021 14:27:50 +0530
Subject: [PATCH 3803/3804] arch_topology: Avoid use-after-free for
 scale_freq_data

Currently topology_scale_freq_tick() (which gets called from
scheduler_tick()) may end up using a pointer to "struct
scale_freq_data", which was previously cleared by
topology_clear_scale_freq_source(), as there is no protection in place
here. The users of topology_clear_scale_freq_source() though needs a
guarantee that the previously cleared scale_freq_data isn't used
anymore, so they can free the related resources.

Since topology_scale_freq_tick() is called from scheduler tick, we don't
want to add locking in there. Use the RCU update mechanism instead
(which is already used by the scheduler's utilization update path) to
guarantee race free updates here.

synchronize_rcu() makes sure that all RCU critical sections that started
before it is called, will finish before it returns. And so the callers
of topology_clear_scale_freq_source() don't need to worry about their
callback getting called anymore.

Cc: Paul E. McKenney <paulmck@kernel.org>
Fixes: 01e055c120a4 ("arch_topology: Allow multiple entities to provide sched_freq_tick() callback")
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Ionela Voinescu <ionela.voinescu@arm.com>
Tested-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/base/arch_topology.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index c1179edc0f3b8..921312a8d9576 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -18,10 +18,11 @@
 #include <linux/cpumask.h>
 #include <linux/init.h>
 #include <linux/percpu.h>
+#include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 
-static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
+static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
 static struct cpumask scale_freq_counters_mask;
 static bool scale_freq_invariant;
 
@@ -66,16 +67,20 @@ void topology_set_scale_freq_source(struct scale_freq_data *data,
 	if (cpumask_empty(&scale_freq_counters_mask))
 		scale_freq_invariant = topology_scale_freq_invariant();
 
+	rcu_read_lock();
+
 	for_each_cpu(cpu, cpus) {
-		sfd = per_cpu(sft_data, cpu);
+		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
 
 		/* Use ARCH provided counters whenever possible */
 		if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
-			per_cpu(sft_data, cpu) = data;
+			rcu_assign_pointer(per_cpu(sft_data, cpu), data);
 			cpumask_set_cpu(cpu, &scale_freq_counters_mask);
 		}
 	}
 
+	rcu_read_unlock();
+
 	update_scale_freq_invariant(true);
 }
 EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);
@@ -86,22 +91,32 @@ void topology_clear_scale_freq_source(enum scale_freq_source source,
 	struct scale_freq_data *sfd;
 	int cpu;
 
+	rcu_read_lock();
+
 	for_each_cpu(cpu, cpus) {
-		sfd = per_cpu(sft_data, cpu);
+		sfd = rcu_dereference(*per_cpu_ptr(&sft_data, cpu));
 
 		if (sfd && sfd->source == source) {
-			per_cpu(sft_data, cpu) = NULL;
+			rcu_assign_pointer(per_cpu(sft_data, cpu), NULL);
 			cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
 		}
 	}
 
+	rcu_read_unlock();
+
+	/*
+	 * Make sure all references to previous sft_data are dropped to avoid
+	 * use-after-free races.
+	 */
+	synchronize_rcu();
+
 	update_scale_freq_invariant(false);
 }
 EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);
 
 void topology_scale_freq_tick(void)
 {
-	struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data);
+	struct scale_freq_data *sfd = rcu_dereference_sched(*this_cpu_ptr(&sft_data));
 
 	if (sfd)
 		sfd->set_freq_scale();
-- 
GitLab


From 1eb5dde674f57b1a1918dab33f09e35cdd64eb07 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 23 Jun 2020 15:49:40 +0530
Subject: [PATCH 3804/3804] cpufreq: CPPC: Add support for frequency invariance

The Frequency Invariance Engine (FIE) is providing a frequency scaling
correction factor that helps achieve more accurate load-tracking.

Normally, this scaling factor can be obtained directly with the help of
the cpufreq drivers as they know the exact frequency the hardware is
running at. But that isn't the case for CPPC cpufreq driver.

Another way of obtaining that is using the arch specific counter
support, which is already present in kernel, but that hardware is
optional for platforms.

This patch updates the CPPC driver to register itself with the topology
core to provide its own implementation (cppc_scale_freq_tick()) of
topology_scale_freq_tick() which gets called by the scheduler on every
tick. Note that the arch specific counters have higher priority than
CPPC counters, if available, though the CPPC driver doesn't need to have
any special handling for that.

On an invocation of cppc_scale_freq_tick(), we schedule an irq work
(since we reach here from hard-irq context), which then schedules a
normal work item and cppc_scale_freq_workfn() updates the per_cpu
arch_freq_scale variable based on the counter updates since the last
tick.

To allow platforms to disable this CPPC counter-based frequency
invariance support, this is all done under CONFIG_ACPI_CPPC_CPUFREQ_FIE,
which is enabled by default.

This also exports sched_setattr_nocheck() as the CPPC driver can be
built as a module.

Cc: linux-acpi@vger.kernel.org
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Ionela Voinescu <ionela.voinescu@arm.com>
Tested-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/Kconfig.arm    |  10 ++
 drivers/cpufreq/cppc_cpufreq.c | 252 +++++++++++++++++++++++++++++++--
 include/linux/arch_topology.h  |   1 +
 kernel/sched/core.c            |   1 +
 4 files changed, 251 insertions(+), 13 deletions(-)

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index e65e0a43be644..a5c5f70acfc9e 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -19,6 +19,16 @@ config ACPI_CPPC_CPUFREQ
 
 	  If in doubt, say N.
 
+config ACPI_CPPC_CPUFREQ_FIE
+	bool "Frequency Invariance support for CPPC cpufreq driver"
+	depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY
+	default y
+	help
+	  This extends frequency invariance support in the CPPC cpufreq driver,
+	  by using CPPC delivered and reference performance counters.
+
+	  If in doubt, say N.
+
 config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
 	tristate "Allwinner nvmem based SUN50I CPUFreq driver"
 	depends on ARCH_SUNXI
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 4a7f0f9b8c60d..d4c27022b9c9b 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -10,14 +10,18 @@
 
 #define pr_fmt(fmt)	"CPPC Cpufreq:"	fmt
 
+#include <linux/arch_topology.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
 #include <linux/dmi.h>
+#include <linux/irq_work.h>
+#include <linux/kthread.h>
 #include <linux/time.h>
 #include <linux/vmalloc.h>
+#include <uapi/linux/sched/types.h>
 
 #include <asm/unaligned.h>
 
@@ -57,6 +61,216 @@ static struct cppc_workaround_oem_info wa_info[] = {
 	}
 };
 
+#ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE
+
+/* Frequency invariance support */
+struct cppc_freq_invariance {
+	int cpu;
+	struct irq_work irq_work;
+	struct kthread_work work;
+	struct cppc_perf_fb_ctrs prev_perf_fb_ctrs;
+	struct cppc_cpudata *cpu_data;
+};
+
+static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
+static struct kthread_worker *kworker_fie;
+
+static struct cpufreq_driver cppc_cpufreq_driver;
+static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
+static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
+				 struct cppc_perf_fb_ctrs *fb_ctrs_t0,
+				 struct cppc_perf_fb_ctrs *fb_ctrs_t1);
+
+/**
+ * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance
+ * @work: The work item.
+ *
+ * The CPPC driver register itself with the topology core to provide its own
+ * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which
+ * gets called by the scheduler on every tick.
+ *
+ * Note that the arch specific counters have higher priority than CPPC counters,
+ * if available, though the CPPC driver doesn't need to have any special
+ * handling for that.
+ *
+ * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we
+ * reach here from hard-irq context), which then schedules a normal work item
+ * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable
+ * based on the counter updates since the last tick.
+ */
+static void cppc_scale_freq_workfn(struct kthread_work *work)
+{
+	struct cppc_freq_invariance *cppc_fi;
+	struct cppc_perf_fb_ctrs fb_ctrs = {0};
+	struct cppc_cpudata *cpu_data;
+	unsigned long local_freq_scale;
+	u64 perf;
+
+	cppc_fi = container_of(work, struct cppc_freq_invariance, work);
+	cpu_data = cppc_fi->cpu_data;
+
+	if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) {
+		pr_warn("%s: failed to read perf counters\n", __func__);
+		return;
+	}
+
+	perf = cppc_perf_from_fbctrs(cpu_data, &cppc_fi->prev_perf_fb_ctrs,
+				     &fb_ctrs);
+	cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
+
+	perf <<= SCHED_CAPACITY_SHIFT;
+	local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf);
+
+	/* This can happen due to counter's overflow */
+	if (unlikely(local_freq_scale > 1024))
+		local_freq_scale = 1024;
+
+	per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale;
+}
+
+static void cppc_irq_work(struct irq_work *irq_work)
+{
+	struct cppc_freq_invariance *cppc_fi;
+
+	cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work);
+	kthread_queue_work(kworker_fie, &cppc_fi->work);
+}
+
+static void cppc_scale_freq_tick(void)
+{
+	struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id());
+
+	/*
+	 * cppc_get_perf_ctrs() can potentially sleep, call that from the right
+	 * context.
+	 */
+	irq_work_queue(&cppc_fi->irq_work);
+}
+
+static struct scale_freq_data cppc_sftd = {
+	.source = SCALE_FREQ_SOURCE_CPPC,
+	.set_freq_scale = cppc_scale_freq_tick,
+};
+
+static void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy)
+{
+	struct cppc_freq_invariance *cppc_fi;
+	int cpu, ret;
+
+	if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
+		return;
+
+	for_each_cpu(cpu, policy->cpus) {
+		cppc_fi = &per_cpu(cppc_freq_inv, cpu);
+		cppc_fi->cpu = cpu;
+		cppc_fi->cpu_data = policy->driver_data;
+		kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn);
+		init_irq_work(&cppc_fi->irq_work, cppc_irq_work);
+
+		ret = cppc_get_perf_ctrs(cpu, &cppc_fi->prev_perf_fb_ctrs);
+		if (ret) {
+			pr_warn("%s: failed to read perf counters for cpu:%d: %d\n",
+				__func__, cpu, ret);
+
+			/*
+			 * Don't abort if the CPU was offline while the driver
+			 * was getting registered.
+			 */
+			if (cpu_online(cpu))
+				return;
+		}
+	}
+
+	/* Register for freq-invariance */
+	topology_set_scale_freq_source(&cppc_sftd, policy->cpus);
+}
+
+/*
+ * We free all the resources on policy's removal and not on CPU removal as the
+ * irq-work are per-cpu and the hotplug core takes care of flushing the pending
+ * irq-works (hint: smpcfd_dying_cpu()) on CPU hotplug. Even if the kthread-work
+ * fires on another CPU after the concerned CPU is removed, it won't harm.
+ *
+ * We just need to make sure to remove them all on policy->exit().
+ */
+static void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy)
+{
+	struct cppc_freq_invariance *cppc_fi;
+	int cpu;
+
+	if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
+		return;
+
+	/* policy->cpus will be empty here, use related_cpus instead */
+	topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, policy->related_cpus);
+
+	for_each_cpu(cpu, policy->related_cpus) {
+		cppc_fi = &per_cpu(cppc_freq_inv, cpu);
+		irq_work_sync(&cppc_fi->irq_work);
+		kthread_cancel_work_sync(&cppc_fi->work);
+	}
+}
+
+static void __init cppc_freq_invariance_init(void)
+{
+	struct sched_attr attr = {
+		.size		= sizeof(struct sched_attr),
+		.sched_policy	= SCHED_DEADLINE,
+		.sched_nice	= 0,
+		.sched_priority	= 0,
+		/*
+		 * Fake (unused) bandwidth; workaround to "fix"
+		 * priority inheritance.
+		 */
+		.sched_runtime	= 1000000,
+		.sched_deadline = 10000000,
+		.sched_period	= 10000000,
+	};
+	int ret;
+
+	if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
+		return;
+
+	kworker_fie = kthread_create_worker(0, "cppc_fie");
+	if (IS_ERR(kworker_fie))
+		return;
+
+	ret = sched_setattr_nocheck(kworker_fie->task, &attr);
+	if (ret) {
+		pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__,
+			ret);
+		kthread_destroy_worker(kworker_fie);
+		return;
+	}
+}
+
+static void cppc_freq_invariance_exit(void)
+{
+	if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate)
+		return;
+
+	kthread_destroy_worker(kworker_fie);
+	kworker_fie = NULL;
+}
+
+#else
+static inline void cppc_cpufreq_cpu_fie_init(struct cpufreq_policy *policy)
+{
+}
+
+static inline void cppc_cpufreq_cpu_fie_exit(struct cpufreq_policy *policy)
+{
+}
+
+static inline void cppc_freq_invariance_init(void)
+{
+}
+
+static inline void cppc_freq_invariance_exit(void)
+{
+}
+#endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */
+
 /* Callback function used to retrieve the max frequency from DMI */
 static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
 {
@@ -341,6 +555,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		goto out;
 	}
 
+	cppc_cpufreq_cpu_fie_init(policy);
 	return 0;
 
 out:
@@ -355,6 +570,8 @@ static int cppc_cpufreq_cpu_exit(struct cpufreq_policy *policy)
 	unsigned int cpu = policy->cpu;
 	int ret;
 
+	cppc_cpufreq_cpu_fie_exit(policy);
+
 	cpu_data->perf_ctrls.desired_perf = caps->lowest_perf;
 
 	ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls);
@@ -374,12 +591,12 @@ static inline u64 get_delta(u64 t1, u64 t0)
 	return (u32)t1 - (u32)t0;
 }
 
-static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
-				     struct cppc_perf_fb_ctrs *fb_ctrs_t0,
-				     struct cppc_perf_fb_ctrs *fb_ctrs_t1)
+static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
+				 struct cppc_perf_fb_ctrs *fb_ctrs_t0,
+				 struct cppc_perf_fb_ctrs *fb_ctrs_t1)
 {
 	u64 delta_reference, delta_delivered;
-	u64 reference_perf, delivered_perf;
+	u64 reference_perf;
 
 	reference_perf = fb_ctrs_t0->reference_perf;
 
@@ -388,14 +605,11 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
 	delta_delivered = get_delta(fb_ctrs_t1->delivered,
 				    fb_ctrs_t0->delivered);
 
-	/* Check to avoid divide-by zero */
-	if (delta_reference || delta_delivered)
-		delivered_perf = (reference_perf * delta_delivered) /
-					delta_reference;
-	else
-		delivered_perf = cpu_data->perf_ctrls.desired_perf;
+	/* Check to avoid divide-by zero and invalid delivered_perf */
+	if (!delta_reference || !delta_delivered)
+		return cpu_data->perf_ctrls.desired_perf;
 
-	return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
+	return (reference_perf * delta_delivered) / delta_reference;
 }
 
 static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
@@ -403,6 +617,7 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 	struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
 	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
 	struct cppc_cpudata *cpu_data = policy->driver_data;
+	u64 delivered_perf;
 	int ret;
 
 	cpufreq_cpu_put(policy);
@@ -417,7 +632,10 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 	if (ret)
 		return ret;
 
-	return cppc_get_rate_from_fbctrs(cpu_data, &fb_ctrs_t0, &fb_ctrs_t1);
+	delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
+					       &fb_ctrs_t1);
+
+	return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
 }
 
 static int cppc_cpufreq_set_boost(struct cpufreq_policy *policy, int state)
@@ -518,14 +736,21 @@ static void cppc_check_hisi_workaround(void)
 
 static int __init cppc_cpufreq_init(void)
 {
+	int ret;
+
 	if ((acpi_disabled) || !acpi_cpc_valid())
 		return -ENODEV;
 
 	INIT_LIST_HEAD(&cpu_data_list);
 
 	cppc_check_hisi_workaround();
+	cppc_freq_invariance_init();
 
-	return cpufreq_register_driver(&cppc_cpufreq_driver);
+	ret = cpufreq_register_driver(&cppc_cpufreq_driver);
+	if (ret)
+		cppc_freq_invariance_exit();
+
+	return ret;
 }
 
 static inline void free_cpu_data(void)
@@ -543,6 +768,7 @@ static inline void free_cpu_data(void)
 static void __exit cppc_cpufreq_exit(void)
 {
 	cpufreq_unregister_driver(&cppc_cpufreq_driver);
+	cppc_freq_invariance_exit();
 
 	free_cpu_data();
 }
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 11e555cfaecb4..f180240dc95f4 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -37,6 +37,7 @@ bool topology_scale_freq_invariant(void);
 enum scale_freq_source {
 	SCALE_FREQ_SOURCE_CPUFREQ = 0,
 	SCALE_FREQ_SOURCE_ARCH,
+	SCALE_FREQ_SOURCE_CPPC,
 };
 
 struct scale_freq_data {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cf16f8fda9a6b..2d9ff40f46619 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7182,6 +7182,7 @@ int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
 {
 	return __sched_setscheduler(p, attr, false, true);
 }
+EXPORT_SYMBOL_GPL(sched_setattr_nocheck);
 
 /**
  * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
-- 
GitLab